summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 13:00:47 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 13:00:47 +0000
commit2cb7e0aaedad73b076ea18c6900b0e86c5760d79 (patch)
treeda68ca54bb79f4080079bf0828acda937593a4e1 /src
parentInitial commit. (diff)
downloadsystemd-upstream.tar.xz
systemd-upstream.zip
Adding upstream version 247.3.upstream/247.3upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src')
-rw-r--r--src/ac-power/ac-power.c90
-rw-r--r--src/activate/activate.c521
-rw-r--r--src/analyze/analyze-condition.c109
-rw-r--r--src/analyze/analyze-condition.h6
-rw-r--r--src/analyze/analyze-security.c2220
-rw-r--r--src/analyze/analyze-security.h12
-rw-r--r--src/analyze/analyze-verify.c283
-rw-r--r--src/analyze/analyze-verify.h10
-rw-r--r--src/analyze/analyze.c2461
-rw-r--r--src/analyze/meson.build11
-rw-r--r--src/analyze/test-verify.c19
-rw-r--r--src/ask-password/ask-password.c189
-rw-r--r--src/backlight/backlight.c511
-rw-r--r--src/basic/MurmurHash2.c91
-rw-r--r--src/basic/MurmurHash2.h31
-rw-r--r--src/basic/af-list.c40
-rw-r--r--src/basic/af-list.h25
-rw-r--r--src/basic/af-to-name.awk9
-rw-r--r--src/basic/alloc-util.c112
-rw-r--r--src/basic/alloc-util.h174
-rw-r--r--src/basic/architecture.c178
-rw-r--r--src/basic/architecture.h237
-rw-r--r--src/basic/arphrd-list.c25
-rw-r--r--src/basic/arphrd-list.h5
-rw-r--r--src/basic/arphrd-to-name.awk12
-rw-r--r--src/basic/async.c108
-rw-r--r--src/basic/async.h13
-rw-r--r--src/basic/audit-util.c89
-rw-r--r--src/basic/audit-util.h17
-rw-r--r--src/basic/blockdev-util.c253
-rw-r--r--src/basic/blockdev-util.h24
-rw-r--r--src/basic/btrfs-util.c2021
-rw-r--r--src/basic/btrfs-util.h129
-rw-r--r--src/basic/build.h168
-rw-r--r--src/basic/bus-label.c81
-rw-r--r--src/basic/bus-label.h14
-rw-r--r--src/basic/cap-list.c122
-rw-r--r--src/basic/cap-list.h11
-rw-r--r--src/basic/cap-to-name.awk9
-rw-r--r--src/basic/capability-util.c605
-rw-r--r--src/basic/capability-util.h78
-rw-r--r--src/basic/cgroup-util.c2189
-rw-r--r--src/basic/cgroup-util.h290
-rw-r--r--src/basic/chattr-util.c96
-rw-r--r--src/basic/chattr-util.h39
-rw-r--r--src/basic/conf-files.c320
-rw-r--r--src/basic/conf-files.h23
-rw-r--r--src/basic/copy.c1237
-rw-r--r--src/basic/copy.h67
-rw-r--r--src/basic/def.h67
-rw-r--r--src/basic/device-nodes.c63
-rw-r--r--src/basic/device-nodes.h16
-rw-r--r--src/basic/dirent-util.c71
-rw-r--r--src/basic/dirent-util.h36
-rw-r--r--src/basic/dlfcn-util.c40
-rw-r--r--src/basic/dlfcn-util.h10
-rw-r--r--src/basic/efivars.c407
-rw-r--r--src/basic/efivars.h80
-rw-r--r--src/basic/env-file.c567
-rw-r--r--src/basic/env-file.h17
-rw-r--r--src/basic/env-util.c756
-rw-r--r--src/basic/env-util.h57
-rw-r--r--src/basic/errno-list.c37
-rw-r--r--src/basic/errno-list.h15
-rw-r--r--src/basic/errno-to-name.awk9
-rw-r--r--src/basic/errno-util.h119
-rw-r--r--src/basic/escape.c549
-rw-r--r--src/basic/escape.h67
-rw-r--r--src/basic/ether-addr-util.c126
-rw-r--r--src/basic/ether-addr-util.h52
-rw-r--r--src/basic/extract-word.c290
-rw-r--r--src/basic/extract-word.h18
-rw-r--r--src/basic/fd-util.c1067
-rw-r--r--src/basic/fd-util.h108
-rw-r--r--src/basic/fileio.c1320
-rw-r--r--src/basic/fileio.h123
-rw-r--r--src/basic/format-util.c81
-rw-r--r--src/basic/format-util.h89
-rw-r--r--src/basic/fs-util.c1615
-rw-r--r--src/basic/fs-util.h134
-rw-r--r--src/basic/gcrypt-util.c49
-rw-r--r--src/basic/gcrypt-util.h34
-rwxr-xr-xsrc/basic/generate-af-list.sh6
-rwxr-xr-xsrc/basic/generate-arphrd-list.sh6
-rwxr-xr-xsrc/basic/generate-cap-list.sh6
-rwxr-xr-xsrc/basic/generate-errno-list.sh5
-rw-r--r--src/basic/glob-util.c73
-rw-r--r--src/basic/glob-util.h21
-rw-r--r--src/basic/gunicode.c111
-rw-r--r--src/basic/gunicode.h30
-rw-r--r--src/basic/hash-funcs.c107
-rw-r--r--src/basic/hash-funcs.h110
-rw-r--r--src/basic/hashmap.c2028
-rw-r--r--src/basic/hashmap.h449
-rw-r--r--src/basic/hexdecoct.c861
-rw-r--r--src/basic/hexdecoct.h44
-rw-r--r--src/basic/hostname-util.c329
-rw-r--r--src/basic/hostname-util.h29
-rw-r--r--src/basic/in-addr-util.c784
-rw-r--r--src/basic/in-addr-util.h89
-rw-r--r--src/basic/io-util.c335
-rw-r--r--src/basic/io-util.h92
-rw-r--r--src/basic/ioprio.h56
-rw-r--r--src/basic/kbd-util.c109
-rw-r--r--src/basic/kbd-util.h20
-rw-r--r--src/basic/khash.c321
-rw-r--r--src/basic/khash.h37
-rw-r--r--src/basic/label.c106
-rw-r--r--src/basic/label.h23
-rw-r--r--src/basic/limits-util.c154
-rw-r--r--src/basic/limits-util.h10
-rw-r--r--src/basic/linux/README6
-rw-r--r--src/basic/linux/btrfs.h991
-rw-r--r--src/basic/linux/btrfs_tree.h979
-rw-r--r--src/basic/linux/can/netlink.h144
-rw-r--r--src/basic/linux/can/vxcan.h13
-rw-r--r--src/basic/linux/fib_rules.h90
-rw-r--r--src/basic/linux/fou.h48
-rw-r--r--src/basic/linux/hdlc/ioctl.h94
-rw-r--r--src/basic/linux/if.h297
-rw-r--r--src/basic/linux/if_addr.h72
-rw-r--r--src/basic/linux/if_arp.h164
-rw-r--r--src/basic/linux/if_bonding.h165
-rw-r--r--src/basic/linux/if_bridge.h575
-rw-r--r--src/basic/linux/if_ether.h172
-rw-r--r--src/basic/linux/if_link.h1079
-rw-r--r--src/basic/linux/if_macsec.h192
-rw-r--r--src/basic/linux/if_tun.h114
-rw-r--r--src/basic/linux/if_tunnel.h183
-rw-r--r--src/basic/linux/in.h311
-rw-r--r--src/basic/linux/in6.h301
-rw-r--r--src/basic/linux/ipv6_route.h64
-rw-r--r--src/basic/linux/l2tp.h201
-rw-r--r--src/basic/linux/libc-compat.h267
-rw-r--r--src/basic/linux/loadavg.h48
-rw-r--r--src/basic/linux/netdevice.h66
-rw-r--r--src/basic/linux/netlink.h355
-rw-r--r--src/basic/linux/nexthop.h59
-rw-r--r--src/basic/linux/pkt_sched.h1265
-rw-r--r--src/basic/linux/rtnetlink.h787
-rwxr-xr-xsrc/basic/linux/update.sh9
-rw-r--r--src/basic/linux/wireguard.h196
-rw-r--r--src/basic/list.h186
-rw-r--r--src/basic/locale-util.c457
-rw-r--r--src/basic/locale-util.h89
-rw-r--r--src/basic/log.c1491
-rw-r--r--src/basic/log.h350
-rw-r--r--src/basic/login-util.c12
-rw-r--r--src/basic/login-util.h11
-rw-r--r--src/basic/macro.h657
-rw-r--r--src/basic/memfd-util.c154
-rw-r--r--src/basic/memfd-util.h18
-rw-r--r--src/basic/memory-util.c59
-rw-r--r--src/basic/memory-util.h103
-rw-r--r--src/basic/mempool.c100
-rw-r--r--src/basic/mempool.h31
-rw-r--r--src/basic/meson.build350
-rw-r--r--src/basic/missing_audit.h24
-rw-r--r--src/basic/missing_capability.h39
-rw-r--r--src/basic/missing_drm.h10
-rw-r--r--src/basic/missing_fcntl.h60
-rw-r--r--src/basic/missing_fs.h67
-rw-r--r--src/basic/missing_input.h45
-rw-r--r--src/basic/missing_keyctl.h78
-rw-r--r--src/basic/missing_loop.h15
-rw-r--r--src/basic/missing_magic.h39
-rw-r--r--src/basic/missing_mman.h12
-rw-r--r--src/basic/missing_network.h36
-rw-r--r--src/basic/missing_prctl.h14
-rw-r--r--src/basic/missing_random.h20
-rw-r--r--src/basic/missing_resource.h11
-rw-r--r--src/basic/missing_sched.h21
-rw-r--r--src/basic/missing_securebits.h17
-rw-r--r--src/basic/missing_socket.h73
-rw-r--r--src/basic/missing_stat.h135
-rw-r--r--src/basic/missing_stdlib.h13
-rw-r--r--src/basic/missing_syscall.h812
-rw-r--r--src/basic/missing_timerfd.h8
-rw-r--r--src/basic/missing_type.h12
-rw-r--r--src/basic/missing_xfs.h42
-rw-r--r--src/basic/mkdir-label.c59
-rw-r--r--src/basic/mkdir.c241
-rw-r--r--src/basic/mkdir.h30
-rw-r--r--src/basic/mountpoint-util.c511
-rw-r--r--src/basic/mountpoint-util.h25
-rw-r--r--src/basic/namespace-util.c185
-rw-r--r--src/basic/namespace-util.h11
-rw-r--r--src/basic/nss-util.h215
-rw-r--r--src/basic/nulstr-util.c17
-rw-r--r--src/basic/nulstr-util.h13
-rw-r--r--src/basic/ordered-set.c101
-rw-r--r--src/basic/ordered-set.h76
-rw-r--r--src/basic/parse-util.c906
-rw-r--r--src/basic/parse-util.h151
-rw-r--r--src/basic/path-lookup.c869
-rw-r--r--src/basic/path-lookup.h74
-rw-r--r--src/basic/path-util.c1138
-rw-r--r--src/basic/path-util.h188
-rw-r--r--src/basic/prioq.c300
-rw-r--r--src/basic/prioq.h32
-rw-r--r--src/basic/proc-cmdline.c376
-rw-r--r--src/basic/proc-cmdline.h39
-rw-r--r--src/basic/process-util.c1651
-rw-r--r--src/basic/process-util.h201
-rw-r--r--src/basic/procfs-util.c268
-rw-r--r--src/basic/procfs-util.h17
-rw-r--r--src/basic/pthread-util.h16
-rw-r--r--src/basic/quota-util.c41
-rw-r--r--src/basic/quota-util.h19
-rw-r--r--src/basic/random-util.c485
-rw-r--r--src/basic/random-util.h42
-rw-r--r--src/basic/ratelimit.c38
-rw-r--r--src/basic/ratelimit.h24
-rw-r--r--src/basic/raw-clone.h79
-rw-r--r--src/basic/raw-reboot.h14
-rw-r--r--src/basic/replace-var.c93
-rw-r--r--src/basic/replace-var.h4
-rw-r--r--src/basic/rlimit-util.c409
-rw-r--r--src/basic/rlimit-util.h25
-rw-r--r--src/basic/rm-rf.c265
-rw-r--r--src/basic/rm-rf.h34
-rw-r--r--src/basic/selinux-util.c674
-rw-r--r--src/basic/selinux-util.h53
-rw-r--r--src/basic/set.h154
-rw-r--r--src/basic/sigbus.c139
-rw-r--r--src/basic/sigbus.h7
-rw-r--r--src/basic/signal-util.c295
-rw-r--r--src/basic/signal-util.h45
-rw-r--r--src/basic/siphash24.c202
-rw-r--r--src/basic/siphash24.h53
-rw-r--r--src/basic/smack-util.c288
-rw-r--r--src/basic/smack-util.h46
-rw-r--r--src/basic/socket-label.c133
-rw-r--r--src/basic/socket-util.c1388
-rw-r--r--src/basic/socket-util.h271
-rw-r--r--src/basic/sort-util.c29
-rw-r--r--src/basic/sort-util.h70
-rw-r--r--src/basic/sparse-endian.h90
-rw-r--r--src/basic/special.h114
-rw-r--r--src/basic/stat-util.c472
-rw-r--r--src/basic/stat-util.h115
-rw-r--r--src/basic/static-destruct.h58
-rw-r--r--src/basic/stdio-util.h64
-rw-r--r--src/basic/strbuf.c183
-rw-r--r--src/basic/strbuf.h39
-rw-r--r--src/basic/string-table.c15
-rw-r--r--src/basic/string-table.h111
-rw-r--r--src/basic/string-util.c1136
-rw-r--r--src/basic/string-util.h279
-rw-r--r--src/basic/strv.c995
-rw-r--r--src/basic/strv.h240
-rw-r--r--src/basic/strxcpyx.c116
-rw-r--r--src/basic/strxcpyx.h14
-rw-r--r--src/basic/syslog-util.c131
-rw-r--r--src/basic/syslog-util.h16
-rw-r--r--src/basic/terminal-util.c1386
-rw-r--r--src/basic/terminal-util.h210
-rw-r--r--src/basic/time-util.c1612
-rw-r--r--src/basic/time-util.h201
-rw-r--r--src/basic/tmpfile-util.c338
-rw-r--r--src/basic/tmpfile-util.h19
-rw-r--r--src/basic/umask-util.h26
-rw-r--r--src/basic/unaligned.h99
-rw-r--r--src/basic/unit-def.c289
-rw-r--r--src/basic/unit-def.h306
-rw-r--r--src/basic/unit-name.c796
-rw-r--r--src/basic/unit-name.h64
-rw-r--r--src/basic/user-util.c1074
-rw-r--r--src/basic/user-util.h111
-rw-r--r--src/basic/utf8.c585
-rw-r--r--src/basic/utf8.h57
-rw-r--r--src/basic/util.c273
-rw-r--r--src/basic/util.h68
-rw-r--r--src/basic/virt.c705
-rw-r--r--src/basic/virt.h63
-rw-r--r--src/basic/xattr-util.c266
-rw-r--r--src/basic/xattr-util.h27
-rw-r--r--src/binfmt/binfmt.c238
-rw-r--r--src/boot/bless-boot-generator.c71
-rw-r--r--src/boot/bless-boot.c526
-rw-r--r--src/boot/boot-check-no-failures.c114
-rw-r--r--src/boot/bootctl.c1851
-rw-r--r--src/boot/efi/boot.c2539
-rw-r--r--src/boot/efi/console.c227
-rw-r--r--src/boot/efi/console.h24
-rw-r--r--src/boot/efi/crc32.c143
-rw-r--r--src/boot/efi/crc32.h8
-rw-r--r--src/boot/efi/disk.c35
-rw-r--r--src/boot/efi/disk.h4
-rw-r--r--src/boot/efi/graphics.c77
-rw-r--r--src/boot/efi/graphics.h8
-rw-r--r--src/boot/efi/linux.c74
-rw-r--r--src/boot/efi/linux.h87
-rw-r--r--src/boot/efi/loader-features.h14
-rw-r--r--src/boot/efi/measure.c316
-rw-r--r--src/boot/efi/measure.h4
-rw-r--r--src/boot/efi/meson.build260
-rw-r--r--src/boot/efi/missing_efi.h55
-rwxr-xr-xsrc/boot/efi/no-undefined-symbols.sh7
-rw-r--r--src/boot/efi/pe.c170
-rw-r--r--src/boot/efi/pe.h7
-rw-r--r--src/boot/efi/random-seed.c328
-rw-r--r--src/boot/efi/random-seed.h14
-rw-r--r--src/boot/efi/sha256.c277
-rw-r--r--src/boot/efi/sha256.h28
-rw-r--r--src/boot/efi/shim.c210
-rw-r--r--src/boot/efi/shim.h16
-rw-r--r--src/boot/efi/splash.c305
-rw-r--r--src/boot/efi/splash.h4
-rw-r--r--src/boot/efi/stub.c134
-rw-r--r--src/boot/efi/util.c358
-rw-r--r--src/boot/efi/util.h70
-rw-r--r--src/busctl/busctl-introspect.c730
-rw-r--r--src/busctl/busctl-introspect.h14
-rw-r--r--src/busctl/busctl.c2607
-rw-r--r--src/cgls/cgls.c300
-rw-r--r--src/cgroups-agent/cgroups-agent.c47
-rw-r--r--src/cgtop/cgtop.c1109
-rw-r--r--src/core/all-units.h15
-rw-r--r--src/core/apparmor-setup.c100
-rw-r--r--src/core/apparmor-setup.h4
-rw-r--r--src/core/audit-fd.c63
-rw-r--r--src/core/audit-fd.h5
-rw-r--r--src/core/automount.c1135
-rw-r--r--src/core/automount.h44
-rw-r--r--src/core/bpf-devices.c529
-rw-r--r--src/core/bpf-devices.h21
-rw-r--r--src/core/bpf-firewall.c911
-rw-r--r--src/core/bpf-firewall.h23
-rw-r--r--src/core/cgroup.c3778
-rw-r--r--src/core/cgroup.h296
-rw-r--r--src/core/core-varlink.c482
-rw-r--r--src/core/core-varlink.h12
-rw-r--r--src/core/dbus-automount.c64
-rw-r--r--src/core/dbus-automount.h11
-rw-r--r--src/core/dbus-cgroup.c1718
-rw-r--r--src/core/dbus-cgroup.h14
-rw-r--r--src/core/dbus-device.c11
-rw-r--r--src/core/dbus-device.h6
-rw-r--r--src/core/dbus-execute.c3459
-rw-r--r--src/core/dbus-execute.h33
-rw-r--r--src/core/dbus-job.c378
-rw-r--r--src/core/dbus-job.h20
-rw-r--r--src/core/dbus-kill.c83
-rw-r--r--src/core/dbus-kill.h12
-rw-r--r--src/core/dbus-manager.c3317
-rw-r--r--src/core/dbus-manager.h17
-rw-r--r--src/core/dbus-mount.c155
-rw-r--r--src/core/dbus-mount.h12
-rw-r--r--src/core/dbus-path.c159
-rw-r--r--src/core/dbus-path.h11
-rw-r--r--src/core/dbus-scope.c259
-rw-r--r--src/core/dbus-scope.h19
-rw-r--r--src/core/dbus-service.c462
-rw-r--r--src/core/dbus-service.h12
-rw-r--r--src/core/dbus-slice.c34
-rw-r--r--src/core/dbus-slice.h12
-rw-r--r--src/core/dbus-socket.c485
-rw-r--r--src/core/dbus-socket.h12
-rw-r--r--src/core/dbus-swap.c76
-rw-r--r--src/core/dbus-swap.h16
-rw-r--r--src/core/dbus-target.c9
-rw-r--r--src/core/dbus-target.h6
-rw-r--r--src/core/dbus-timer.c382
-rw-r--r--src/core/dbus-timer.h11
-rw-r--r--src/core/dbus-unit.c2475
-rw-r--r--src/core/dbus-unit.h41
-rw-r--r--src/core/dbus-util.c155
-rw-r--r--src/core/dbus-util.h250
-rw-r--r--src/core/dbus.c1250
-rw-r--r--src/core/dbus.h36
-rw-r--r--src/core/device.c1121
-rw-r--r--src/core/device.h43
-rw-r--r--src/core/dynamic-user.c825
-rw-r--r--src/core/dynamic-user.h40
-rw-r--r--src/core/efi-random.c96
-rw-r--r--src/core/efi-random.h4
-rw-r--r--src/core/emergency-action.c171
-rw-r--r--src/core/emergency-action.h34
-rw-r--r--src/core/execute.c6512
-rw-r--r--src/core/execute.h472
-rw-r--r--src/core/generator-setup.c58
-rw-r--r--src/core/generator-setup.h8
-rw-r--r--src/core/hostname-setup.c63
-rw-r--r--src/core/hostname-setup.h4
-rw-r--r--src/core/ima-setup.c92
-rw-r--r--src/core/ima-setup.h9
-rw-r--r--src/core/ip-address-access.c208
-rw-r--r--src/core/ip-address-access.h25
-rw-r--r--src/core/job.c1698
-rw-r--r--src/core/job.h243
-rw-r--r--src/core/kill.c57
-rw-r--r--src/core/kill.h56
-rw-r--r--src/core/killall.c283
-rw-r--r--src/core/killall.h6
-rw-r--r--src/core/kmod-setup.c127
-rw-r--r--src/core/kmod-setup.h4
-rw-r--r--src/core/load-dropin.c125
-rw-r--r--src/core/load-dropin.h20
-rw-r--r--src/core/load-fragment-gperf-nulstr.awk14
-rw-r--r--src/core/load-fragment-gperf.gperf.m4509
-rw-r--r--src/core/load-fragment.c5851
-rw-r--r--src/core/load-fragment.h143
-rw-r--r--src/core/locale-setup.c96
-rw-r--r--src/core/locale-setup.h4
-rw-r--r--src/core/loopback-setup.c213
-rw-r--r--src/core/loopback-setup.h4
-rw-r--r--src/core/machine-id-setup.c282
-rw-r--r--src/core/machine-id-setup.h7
-rw-r--r--src/core/macros.systemd.in165
-rw-r--r--src/core/main.c2935
-rw-r--r--src/core/manager.c4975
-rw-r--r--src/core/manager.h570
-rw-r--r--src/core/meson.build226
-rw-r--r--src/core/mount-setup.c561
-rw-r--r--src/core/mount-setup.h12
-rw-r--r--src/core/mount.c2204
-rw-r--r--src/core/mount.h102
-rw-r--r--src/core/namespace.c2384
-rw-r--r--src/core/namespace.h176
-rw-r--r--src/core/org.freedesktop.systemd1.conf404
-rw-r--r--src/core/org.freedesktop.systemd1.policy.in73
-rw-r--r--src/core/org.freedesktop.systemd1.service13
-rw-r--r--src/core/path.c846
-rw-r--r--src/core/path.h76
-rw-r--r--src/core/scope.c700
-rw-r--r--src/core/scope.h45
-rw-r--r--src/core/selinux-access.c292
-rw-r--r--src/core/selinux-access.h14
-rw-r--r--src/core/selinux-setup.c110
-rw-r--r--src/core/selinux-setup.h6
-rw-r--r--src/core/service.c4612
-rw-r--r--src/core/service.h246
-rw-r--r--src/core/show-status.c128
-rw-r--r--src/core/show-status.h43
-rw-r--r--src/core/slice.c475
-rw-r--r--src/core/slice.h18
-rw-r--r--src/core/smack-setup.c396
-rw-r--r--src/core/smack-setup.h10
-rw-r--r--src/core/socket.c3533
-rw-r--r--src/core/socket.h198
-rw-r--r--src/core/swap.c1694
-rw-r--r--src/core/swap.h99
-rw-r--r--src/core/system.conf.in71
-rw-r--r--src/core/systemd.pc.in101
-rw-r--r--src/core/target.c219
-rw-r--r--src/core/target.h16
-rw-r--r--src/core/timer.c963
-rw-r--r--src/core/timer.h79
-rw-r--r--src/core/transaction.c1201
-rw-r--r--src/core/transaction.h35
-rw-r--r--src/core/triggers.systemd.in143
-rw-r--r--src/core/unit-printf.c269
-rw-r--r--src/core/unit-printf.h7
-rw-r--r--src/core/unit.c6368
-rw-r--r--src/core/unit.h944
-rw-r--r--src/core/user.conf.in47
-rw-r--r--src/coredump/coredump-vacuum.c248
-rw-r--r--src/coredump/coredump-vacuum.h7
-rw-r--r--src/coredump/coredump.c1330
-rw-r--r--src/coredump/coredump.conf21
-rw-r--r--src/coredump/coredumpctl.c1114
-rw-r--r--src/coredump/meson.build28
-rw-r--r--src/coredump/stacktrace.c198
-rw-r--r--src/coredump/stacktrace.h4
-rw-r--r--src/coredump/test-coredump-vacuum.c13
-rw-r--r--src/cryptsetup/cryptsetup-generator.c917
-rw-r--r--src/cryptsetup/cryptsetup-keyfile.c110
-rw-r--r--src/cryptsetup/cryptsetup-keyfile.h13
-rw-r--r--src/cryptsetup/cryptsetup-pkcs11.c144
-rw-r--r--src/cryptsetup/cryptsetup-pkcs11.h41
-rw-r--r--src/cryptsetup/cryptsetup.c1058
-rw-r--r--src/debug-generator/debug-generator.c198
-rw-r--r--src/delta/delta.c689
-rw-r--r--src/detect-virt/detect-virt.c176
-rw-r--r--src/dissect/dissect.c776
-rw-r--r--src/environment-d-generator/environment-d-generator.c99
-rw-r--r--src/escape/escape.c259
-rw-r--r--src/firstboot/firstboot.c1341
-rw-r--r--src/fsck/fsck.c429
-rw-r--r--src/fstab-generator/fstab-generator.c964
-rw-r--r--src/fuzz/fuzz-bus-label.c18
-rw-r--r--src/fuzz/fuzz-bus-message.c45
-rw-r--r--src/fuzz/fuzz-calendarspec.c24
-rw-r--r--src/fuzz/fuzz-catalog.c26
-rw-r--r--src/fuzz/fuzz-compress.c80
-rw-r--r--src/fuzz/fuzz-dhcp-server.c56
-rw-r--r--src/fuzz/fuzz-dhcp-server.options2
-rw-r--r--src/fuzz/fuzz-dhcp6-client.c62
-rw-r--r--src/fuzz/fuzz-dhcp6-client.options2
-rw-r--r--src/fuzz/fuzz-dns-packet.c25
-rw-r--r--src/fuzz/fuzz-dns-packet.options2
-rw-r--r--src/fuzz/fuzz-env-file.c32
-rw-r--r--src/fuzz/fuzz-env-file.options2
-rw-r--r--src/fuzz/fuzz-fido-id-desc.dict6
-rw-r--r--src/fuzz/fuzz-hostname-util.c27
-rw-r--r--src/fuzz/fuzz-journal-remote.c78
-rw-r--r--src/fuzz/fuzz-journal-remote.options2
-rw-r--r--src/fuzz/fuzz-journald-audit.c15
-rw-r--r--src/fuzz/fuzz-journald-kmsg.c18
-rw-r--r--src/fuzz/fuzz-journald-native-fd.c47
-rw-r--r--src/fuzz/fuzz-journald-native.c10
-rw-r--r--src/fuzz/fuzz-journald-stream.c37
-rw-r--r--src/fuzz/fuzz-journald-stream.options2
-rw-r--r--src/fuzz/fuzz-journald-syslog.c10
-rw-r--r--src/fuzz/fuzz-journald.c46
-rw-r--r--src/fuzz/fuzz-journald.h12
-rw-r--r--src/fuzz/fuzz-json.c31
-rw-r--r--src/fuzz/fuzz-lldp.c43
-rw-r--r--src/fuzz/fuzz-lldp.options2
-rw-r--r--src/fuzz/fuzz-main.c45
-rw-r--r--src/fuzz/fuzz-ndisc-rs.c61
-rw-r--r--src/fuzz/fuzz-ndisc-rs.options2
-rw-r--r--src/fuzz/fuzz-nspawn-oci.c27
-rw-r--r--src/fuzz/fuzz-nspawn-oci.options2
-rw-r--r--src/fuzz/fuzz-nspawn-settings.c27
-rw-r--r--src/fuzz/fuzz-nspawn-settings.options2
-rw-r--r--src/fuzz/fuzz-time-util.c28
-rw-r--r--src/fuzz/fuzz-udev-database.c26
-rw-r--r--src/fuzz/fuzz-udev-rule-parse-value.c31
-rw-r--r--src/fuzz/fuzz-udev-rules.c36
-rw-r--r--src/fuzz/fuzz-udev-rules.options2
-rw-r--r--src/fuzz/fuzz-unit-file.c88
-rw-r--r--src/fuzz/fuzz-unit-file.options2
-rw-r--r--src/fuzz/fuzz-varlink.c131
-rw-r--r--src/fuzz/fuzz-xdg-desktop.c36
-rw-r--r--src/fuzz/fuzz.h8
-rw-r--r--src/fuzz/meson.build159
-rw-r--r--src/getty-generator/getty-generator.c204
-rw-r--r--src/gpt-auto-generator/gpt-auto-generator.c843
-rw-r--r--src/hibernate-resume/hibernate-resume-generator.c142
-rw-r--r--src/hibernate-resume/hibernate-resume.c63
-rw-r--r--src/home/home-util.c135
-rw-r--r--src/home/home-util.h23
-rw-r--r--src/home/homectl-fido2.c534
-rw-r--r--src/home/homectl-fido2.h10
-rw-r--r--src/home/homectl-pkcs11.c477
-rw-r--r--src/home/homectl-pkcs11.h11
-rw-r--r--src/home/homectl-recovery-key.c199
-rw-r--r--src/home/homectl-recovery-key.h6
-rw-r--r--src/home/homectl.c3381
-rw-r--r--src/home/homed-bus.c66
-rw-r--r--src/home/homed-bus.h10
-rw-r--r--src/home/homed-conf.c54
-rw-r--r--src/home/homed-conf.h12
-rw-r--r--src/home/homed-gperf.gperf21
-rw-r--r--src/home/homed-home-bus.c953
-rw-r--r--src/home/homed-home-bus.h34
-rw-r--r--src/home/homed-home.c2836
-rw-r--r--src/home/homed-home.h171
-rw-r--r--src/home/homed-manager-bus.c899
-rw-r--r--src/home/homed-manager-bus.h6
-rw-r--r--src/home/homed-manager.c1742
-rw-r--r--src/home/homed-manager.h70
-rw-r--r--src/home/homed-operation.c76
-rw-r--r--src/home/homed-operation.h63
-rw-r--r--src/home/homed-varlink.c366
-rw-r--r--src/home/homed-varlink.h8
-rw-r--r--src/home/homed.c51
-rw-r--r--src/home/homed.conf16
-rw-r--r--src/home/homework-cifs.c213
-rw-r--r--src/home/homework-cifs.h11
-rw-r--r--src/home/homework-directory.c242
-rw-r--r--src/home/homework-directory.h10
-rw-r--r--src/home/homework-fido2.c197
-rw-r--r--src/home/homework-fido2.h6
-rw-r--r--src/home/homework-fscrypt.c643
-rw-r--r--src/home/homework-fscrypt.h10
-rw-r--r--src/home/homework-luks.c3087
-rw-r--r--src/home/homework-luks.h46
-rw-r--r--src/home/homework-mount.c96
-rw-r--r--src/home/homework-mount.h8
-rw-r--r--src/home/homework-pkcs11.c104
-rw-r--r--src/home/homework-pkcs11.h21
-rw-r--r--src/home/homework-quota.c124
-rw-r--r--src/home/homework-quota.h8
-rw-r--r--src/home/homework.c1747
-rw-r--r--src/home/homework.h70
-rw-r--r--src/home/meson.build122
-rw-r--r--src/home/modhex.c74
-rw-r--r--src/home/modhex.h14
-rw-r--r--src/home/org.freedesktop.home1.conf193
-rw-r--r--src/home/org.freedesktop.home1.policy72
-rw-r--r--src/home/org.freedesktop.home1.service7
-rw-r--r--src/home/pam_systemd_home.c1070
-rw-r--r--src/home/pam_systemd_home.sym12
-rw-r--r--src/home/test-modhex.c51
-rw-r--r--src/home/user-record-pwquality.c91
-rw-r--r--src/home/user-record-pwquality.h7
-rw-r--r--src/home/user-record-sign.c176
-rw-r--r--src/home/user-record-sign.h19
-rw-r--r--src/home/user-record-util.c1366
-rw-r--r--src/home/user-record-util.h61
-rw-r--r--src/hostname/hostnamectl.c451
-rw-r--r--src/hostname/hostnamed.c1011
-rw-r--r--src/hostname/meson.build10
-rw-r--r--src/hostname/org.freedesktop.hostname1.conf29
-rw-r--r--src/hostname/org.freedesktop.hostname1.policy60
-rw-r--r--src/hostname/org.freedesktop.hostname1.service12
-rw-r--r--src/hwdb/hwdb.c135
-rw-r--r--src/id128/id128.c261
-rw-r--r--src/import/curl-util.c380
-rw-r--r--src/import/curl-util.h39
-rw-r--r--src/import/export-raw.c331
-rw-r--r--src/import/export-raw.h18
-rw-r--r--src/import/export-tar.c330
-rw-r--r--src/import/export-tar.h18
-rw-r--r--src/import/export.c297
-rw-r--r--src/import/import-common.c299
-rw-r--r--src/import/import-common.h12
-rw-r--r--src/import/import-compress.c466
-rw-r--r--src/import/import-compress.h47
-rw-r--r--src/import/import-fs.c327
-rw-r--r--src/import/import-pubring.gpgbin0 -> 9551 bytes
-rw-r--r--src/import/import-raw.c429
-rw-r--r--src/import/import-raw.h18
-rw-r--r--src/import/import-tar.c365
-rw-r--r--src/import/import-tar.h18
-rw-r--r--src/import/import.c321
-rw-r--r--src/import/importd.c1397
-rw-r--r--src/import/meson.build79
-rw-r--r--src/import/org.freedesktop.import1.conf84
-rw-r--r--src/import/org.freedesktop.import1.policy51
-rw-r--r--src/import/org.freedesktop.import1.service14
-rw-r--r--src/import/pull-common.c526
-rw-r--r--src/import/pull-common.h18
-rw-r--r--src/import/pull-job.c639
-rw-r--r--src/import/pull-job.h93
-rw-r--r--src/import/pull-raw.c741
-rw-r--r--src/import/pull-raw.h18
-rw-r--r--src/import/pull-tar.c559
-rw-r--r--src/import/pull-tar.h18
-rw-r--r--src/import/pull.c332
-rw-r--r--src/import/qcow2-util.c334
-rw-r--r--src/import/qcow2-util.h5
-rw-r--r--src/import/test-qcow2.c39
-rw-r--r--src/initctl/initctl.c360
-rw-r--r--src/journal-remote/browse.html547
-rw-r--r--src/journal-remote/journal-gatewayd.c1036
-rw-r--r--src/journal-remote/journal-remote-main.c1177
-rw-r--r--src/journal-remote/journal-remote-parse.c88
-rw-r--r--src/journal-remote/journal-remote-parse.h20
-rw-r--r--src/journal-remote/journal-remote-write.c106
-rw-r--r--src/journal-remote/journal-remote-write.h40
-rw-r--r--src/journal-remote/journal-remote.c536
-rw-r--r--src/journal-remote/journal-remote.conf.in19
-rw-r--r--src/journal-remote/journal-remote.h65
-rw-r--r--src/journal-remote/journal-upload-journal.c414
-rw-r--r--src/journal-remote/journal-upload.c904
-rw-r--r--src/journal-remote/journal-upload.conf.in18
-rw-r--r--src/journal-remote/journal-upload.h74
-rwxr-xr-xsrc/journal-remote/log-generator.py78
-rw-r--r--src/journal-remote/meson.build77
-rw-r--r--src/journal-remote/microhttpd-util.c310
-rw-r--r--src/journal-remote/microhttpd-util.h84
-rw-r--r--src/journal/audit-type.c6
-rw-r--r--src/journal/audit-type.h22
-rw-r--r--src/journal/audit_type-to-name.awk9
-rw-r--r--src/journal/cat.c169
-rw-r--r--src/journal/catalog.c742
-rw-r--r--src/journal/catalog.h19
-rw-r--r--src/journal/compress.c1061
-rw-r--r--src/journal/compress.h91
-rw-r--r--src/journal/fsprg.c378
-rw-r--r--src/journal/fsprg.h62
-rwxr-xr-xsrc/journal/generate-audit_type-list.sh15
-rw-r--r--src/journal/journal-authenticate.c536
-rw-r--r--src/journal/journal-authenticate.h23
-rw-r--r--src/journal/journal-def.h252
-rw-r--r--src/journal/journal-file.c4162
-rw-r--r--src/journal/journal-file.h276
-rw-r--r--src/journal/journal-internal.h138
-rw-r--r--src/journal/journal-send.c569
-rw-r--r--src/journal/journal-vacuum.c321
-rw-r--r--src/journal/journal-vacuum.h9
-rw-r--r--src/journal/journal-verify.c1327
-rw-r--r--src/journal/journal-verify.h6
-rw-r--r--src/journal/journalctl.c2830
-rw-r--r--src/journal/journald-audit.c555
-rw-r--r--src/journal/journald-audit.h11
-rw-r--r--src/journal/journald-console.c103
-rw-r--r--src/journal/journald-console.h6
-rw-r--r--src/journal/journald-context.c792
-rw-r--r--src/journal/journald-context.h101
-rw-r--r--src/journal/journald-gperf.gperf52
-rw-r--r--src/journal/journald-kmsg.c454
-rw-r--r--src/journal/journald-kmsg.h13
-rw-r--r--src/journal/journald-native.c505
-rw-r--r--src/journal/journald-native.h23
-rw-r--r--src/journal/journald-rate-limit.c254
-rw-r--r--src/journal/journald-rate-limit.h10
-rw-r--r--src/journal/journald-server.c2619
-rw-r--r--src/journal/journald-server.h225
-rw-r--r--src/journal/journald-stream.c963
-rw-r--r--src/journal/journald-stream.h15
-rw-r--r--src/journal/journald-syslog.c527
-rw-r--r--src/journal/journald-syslog.h15
-rw-r--r--src/journal/journald-wall.c54
-rw-r--r--src/journal/journald-wall.h8
-rw-r--r--src/journal/journald.c131
-rw-r--r--src/journal/journald.conf44
-rw-r--r--src/journal/lookup3.c1006
-rw-r--r--src/journal/lookup3.h23
-rw-r--r--src/journal/meson.build133
-rw-r--r--src/journal/mmap-cache.c669
-rw-r--r--src/journal/mmap-cache.h34
-rw-r--r--src/journal/pcre2-dlopen.c57
-rw-r--r--src/journal/pcre2-dlopen.h18
-rw-r--r--src/journal/sd-journal.c3274
-rw-r--r--src/journal/test-audit-type.c26
-rw-r--r--src/journal/test-catalog.c235
-rw-r--r--src/journal/test-compress-benchmark.c179
-rw-r--r--src/journal/test-compress.c372
-rw-r--r--src/journal/test-journal-config.c53
-rw-r--r--src/journal/test-journal-enum.c37
-rw-r--r--src/journal/test-journal-flush.c66
-rw-r--r--src/journal/test-journal-init.c50
-rw-r--r--src/journal/test-journal-interleaving.c296
-rw-r--r--src/journal/test-journal-match.c62
-rw-r--r--src/journal/test-journal-send.c103
-rw-r--r--src/journal/test-journal-stream.c191
-rw-r--r--src/journal/test-journal-syslog.c59
-rw-r--r--src/journal/test-journal-verify.c136
-rw-r--r--src/journal/test-journal.c259
-rw-r--r--src/journal/test-mmap-cache.c66
-rw-r--r--src/kernel-install/00-entry-directory.install32
-rw-r--r--src/kernel-install/50-depmod.install27
-rw-r--r--src/kernel-install/90-loaderentry.install124
-rwxr-xr-xsrc/kernel-install/kernel-install181
-rw-r--r--src/kernel-install/meson.build21
-rw-r--r--src/libsystemd-network/arp-util.c138
-rw-r--r--src/libsystemd-network/arp-util.h18
-rw-r--r--src/libsystemd-network/dhcp-client-internal.h4
-rw-r--r--src/libsystemd-network/dhcp-identifier.c215
-rw-r--r--src/libsystemd-network/dhcp-identifier.h62
-rw-r--r--src/libsystemd-network/dhcp-internal.h69
-rw-r--r--src/libsystemd-network/dhcp-lease-internal.h84
-rw-r--r--src/libsystemd-network/dhcp-network.c244
-rw-r--r--src/libsystemd-network/dhcp-option.c358
-rw-r--r--src/libsystemd-network/dhcp-packet.c171
-rw-r--r--src/libsystemd-network/dhcp-protocol.h99
-rw-r--r--src/libsystemd-network/dhcp-server-internal.h98
-rw-r--r--src/libsystemd-network/dhcp6-internal.h123
-rw-r--r--src/libsystemd-network/dhcp6-lease-internal.h63
-rw-r--r--src/libsystemd-network/dhcp6-network.c74
-rw-r--r--src/libsystemd-network/dhcp6-option.c799
-rw-r--r--src/libsystemd-network/dhcp6-protocol.h120
-rw-r--r--src/libsystemd-network/icmp6-util.c210
-rw-r--r--src/libsystemd-network/icmp6-util.h24
-rw-r--r--src/libsystemd-network/lldp-internal.h39
-rw-r--r--src/libsystemd-network/lldp-neighbor.c792
-rw-r--r--src/libsystemd-network/lldp-neighbor.h92
-rw-r--r--src/libsystemd-network/lldp-network.c78
-rw-r--r--src/libsystemd-network/lldp-network.h6
-rw-r--r--src/libsystemd-network/meson.build51
-rw-r--r--src/libsystemd-network/ndisc-internal.h44
-rw-r--r--src/libsystemd-network/ndisc-router.c750
-rw-r--r--src/libsystemd-network/ndisc-router.h48
-rw-r--r--src/libsystemd-network/network-internal.c886
-rw-r--r--src/libsystemd-network/network-internal.h73
-rw-r--r--src/libsystemd-network/radv-internal.h129
-rw-r--r--src/libsystemd-network/sd-dhcp-client.c2267
-rw-r--r--src/libsystemd-network/sd-dhcp-lease.c1399
-rw-r--r--src/libsystemd-network/sd-dhcp-server.c1222
-rw-r--r--src/libsystemd-network/sd-dhcp6-client.c1848
-rw-r--r--src/libsystemd-network/sd-dhcp6-lease.c433
-rw-r--r--src/libsystemd-network/sd-ipv4acd.c501
-rw-r--r--src/libsystemd-network/sd-ipv4ll.c324
-rw-r--r--src/libsystemd-network/sd-lldp.c498
-rw-r--r--src/libsystemd-network/sd-ndisc.c389
-rw-r--r--src/libsystemd-network/sd-radv.c953
-rw-r--r--src/libsystemd-network/test-acd.c96
-rw-r--r--src/libsystemd-network/test-dhcp-client.c570
-rw-r--r--src/libsystemd-network/test-dhcp-option.c370
-rw-r--r--src/libsystemd-network/test-dhcp-server.c245
-rw-r--r--src/libsystemd-network/test-dhcp6-client.c1004
-rw-r--r--src/libsystemd-network/test-ipv4ll-manual.c110
-rw-r--r--src/libsystemd-network/test-ipv4ll.c205
-rw-r--r--src/libsystemd-network/test-lldp.c378
-rw-r--r--src/libsystemd-network/test-ndisc-ra.c370
-rw-r--r--src/libsystemd-network/test-ndisc-rs.c418
-rw-r--r--src/libsystemd-network/test-sd-dhcp-lease.c93
-rw-r--r--src/libsystemd/disable-mempool.c5
-rw-r--r--src/libsystemd/libsystemd.pc.in20
-rw-r--r--src/libsystemd/libsystemd.sym738
-rw-r--r--src/libsystemd/meson.build115
-rw-r--r--src/libsystemd/sd-bus/GVARIANT-SERIALIZATION105
-rw-r--r--src/libsystemd/sd-bus/bus-common-errors.c142
-rw-r--r--src/libsystemd/sd-bus/bus-common-errors.h123
-rw-r--r--src/libsystemd/sd-bus/bus-container.c100
-rw-r--r--src/libsystemd/sd-bus/bus-container.h6
-rw-r--r--src/libsystemd/sd-bus/bus-control.c942
-rw-r--r--src/libsystemd/sd-bus/bus-control.h9
-rw-r--r--src/libsystemd/sd-bus/bus-convenience.c788
-rw-r--r--src/libsystemd/sd-bus/bus-creds.c1340
-rw-r--r--src/libsystemd/sd-bus/bus-creds.h72
-rw-r--r--src/libsystemd/sd-bus/bus-dump.c592
-rw-r--r--src/libsystemd/sd-bus/bus-dump.h12
-rw-r--r--src/libsystemd/sd-bus/bus-error.c615
-rw-r--r--src/libsystemd/sd-bus/bus-error.h48
-rw-r--r--src/libsystemd/sd-bus/bus-gvariant.c299
-rw-r--r--src/libsystemd/sd-bus/bus-gvariant.h12
-rw-r--r--src/libsystemd/sd-bus/bus-internal.c338
-rw-r--r--src/libsystemd/sd-bus/bus-internal.h416
-rw-r--r--src/libsystemd/sd-bus/bus-introspect.c285
-rw-r--r--src/libsystemd/sd-bus/bus-introspect.h26
-rw-r--r--src/libsystemd/sd-bus/bus-kernel.c50
-rw-r--r--src/libsystemd/sd-bus/bus-kernel.h24
-rw-r--r--src/libsystemd/sd-bus/bus-match.c1100
-rw-r--r--src/libsystemd/sd-bus/bus-match.h80
-rw-r--r--src/libsystemd/sd-bus/bus-message.c5973
-rw-r--r--src/libsystemd/sd-bus/bus-message.h227
-rw-r--r--src/libsystemd/sd-bus/bus-objects.c3031
-rw-r--r--src/libsystemd/sd-bus/bus-objects.h20
-rw-r--r--src/libsystemd/sd-bus/bus-protocol.h105
-rw-r--r--src/libsystemd/sd-bus/bus-signature.c148
-rw-r--r--src/libsystemd/sd-bus/bus-signature.h10
-rw-r--r--src/libsystemd/sd-bus/bus-slot.c311
-rw-r--r--src/libsystemd/sd-bus/bus-slot.h10
-rw-r--r--src/libsystemd/sd-bus/bus-socket.c1348
-rw-r--r--src/libsystemd/sd-bus/bus-socket.h20
-rw-r--r--src/libsystemd/sd-bus/bus-track.c493
-rw-r--r--src/libsystemd/sd-bus/bus-track.h5
-rw-r--r--src/libsystemd/sd-bus/bus-type.c162
-rw-r--r--src/libsystemd/sd-bus/bus-type.h16
-rw-r--r--src/libsystemd/sd-bus/sd-bus.c4252
-rw-r--r--src/libsystemd/sd-bus/test-bus-address.c70
-rw-r--r--src/libsystemd/sd-bus/test-bus-benchmark.c324
-rw-r--r--src/libsystemd/sd-bus/test-bus-chat.c547
-rw-r--r--src/libsystemd/sd-bus/test-bus-cleanup.c79
-rw-r--r--src/libsystemd/sd-bus/test-bus-creds.c35
-rw-r--r--src/libsystemd/sd-bus/test-bus-error.c299
-rw-r--r--src/libsystemd/sd-bus/test-bus-gvariant.c221
-rw-r--r--src/libsystemd/sd-bus/test-bus-introspect.c35
-rw-r--r--src/libsystemd/sd-bus/test-bus-marshal.c422
-rw-r--r--src/libsystemd/sd-bus/test-bus-match.c148
-rw-r--r--src/libsystemd/sd-bus/test-bus-objects.c536
-rw-r--r--src/libsystemd/sd-bus/test-bus-queue-ref-cycle.c56
-rw-r--r--src/libsystemd/sd-bus/test-bus-server.c198
-rw-r--r--src/libsystemd/sd-bus/test-bus-signature.c147
-rw-r--r--src/libsystemd/sd-bus/test-bus-track.c108
l---------src/libsystemd/sd-bus/test-bus-vtable-cc.cc1
-rw-r--r--src/libsystemd/sd-bus/test-bus-vtable.c76
-rw-r--r--src/libsystemd/sd-bus/test-bus-watch-bind.c224
-rw-r--r--src/libsystemd/sd-bus/test-vtable-data.h132
-rw-r--r--src/libsystemd/sd-daemon/sd-daemon.c679
-rw-r--r--src/libsystemd/sd-device/device-enumerator-private.h18
-rw-r--r--src/libsystemd/sd-device/device-enumerator.c963
-rw-r--r--src/libsystemd/sd-device/device-internal.h114
-rw-r--r--src/libsystemd/sd-device/device-monitor-private.h20
-rw-r--r--src/libsystemd/sd-device/device-monitor.c772
-rw-r--r--src/libsystemd/sd-device/device-private.c1017
-rw-r--r--src/libsystemd/sd-device/device-private.h78
-rw-r--r--src/libsystemd/sd-device/device-util.h64
-rw-r--r--src/libsystemd/sd-device/sd-device.c1996
-rw-r--r--src/libsystemd/sd-device/test-sd-device-monitor.c218
-rw-r--r--src/libsystemd/sd-device/test-sd-device-thread.c39
-rw-r--r--src/libsystemd/sd-device/test-sd-device.c172
-rw-r--r--src/libsystemd/sd-device/test-udev-device-thread.c36
-rw-r--r--src/libsystemd/sd-event/event-source.h213
-rw-r--r--src/libsystemd/sd-event/event-util.c99
-rw-r--r--src/libsystemd/sd-event/event-util.h13
-rw-r--r--src/libsystemd/sd-event/sd-event.c4010
-rw-r--r--src/libsystemd/sd-event/test-event.c607
-rw-r--r--src/libsystemd/sd-hwdb/hwdb-internal.h65
-rw-r--r--src/libsystemd/sd-hwdb/hwdb-util.c668
-rw-r--r--src/libsystemd/sd-hwdb/hwdb-util.h10
-rw-r--r--src/libsystemd/sd-hwdb/sd-hwdb.c466
-rw-r--r--src/libsystemd/sd-id128/id128-util.c212
-rw-r--r--src/libsystemd/sd-id128/id128-util.h38
-rw-r--r--src/libsystemd/sd-id128/sd-id128.c324
-rw-r--r--src/libsystemd/sd-login/sd-login.c1051
-rw-r--r--src/libsystemd/sd-login/test-login.c305
-rw-r--r--src/libsystemd/sd-netlink/generic-netlink.c176
-rw-r--r--src/libsystemd/sd-netlink/generic-netlink.h6
-rw-r--r--src/libsystemd/sd-netlink/netlink-internal.h149
-rw-r--r--src/libsystemd/sd-netlink/netlink-message.c1320
-rw-r--r--src/libsystemd/sd-netlink/netlink-slot.c202
-rw-r--r--src/libsystemd/sd-netlink/netlink-slot.h14
-rw-r--r--src/libsystemd/sd-netlink/netlink-socket.c444
-rw-r--r--src/libsystemd/sd-netlink/netlink-types.c1488
-rw-r--r--src/libsystemd/sd-netlink/netlink-types.h119
-rw-r--r--src/libsystemd/sd-netlink/netlink-util.c447
-rw-r--r--src/libsystemd/sd-netlink/netlink-util.h114
-rw-r--r--src/libsystemd/sd-netlink/rtnl-message.c1144
-rw-r--r--src/libsystemd/sd-netlink/sd-netlink.c921
-rw-r--r--src/libsystemd/sd-netlink/test-netlink.c642
-rw-r--r--src/libsystemd/sd-network/network-util.c105
-rw-r--r--src/libsystemd/sd-network/network-util.h60
-rw-r--r--src/libsystemd/sd-network/sd-network.c448
-rw-r--r--src/libsystemd/sd-path/sd-path.c691
-rw-r--r--src/libsystemd/sd-resolve/resolve-private.h39
-rw-r--r--src/libsystemd/sd-resolve/sd-resolve.c1301
-rw-r--r--src/libsystemd/sd-resolve/test-resolve.c107
-rw-r--r--src/libsystemd/sd-utf8/sd-utf8.c18
-rw-r--r--src/libudev/libudev-device-internal.h10
-rw-r--r--src/libudev/libudev-device.c905
-rw-r--r--src/libudev/libudev-enumerate.c458
-rw-r--r--src/libudev/libudev-hwdb.c123
-rw-r--r--src/libudev/libudev-list-internal.h16
-rw-r--r--src/libudev/libudev-list.c241
-rw-r--r--src/libudev/libudev-monitor.c305
-rw-r--r--src/libudev/libudev-queue.c236
-rw-r--r--src/libudev/libudev-util.c214
-rw-r--r--src/libudev/libudev-util.h24
-rw-r--r--src/libudev/libudev.c154
-rw-r--r--src/libudev/libudev.h191
-rw-r--r--src/libudev/libudev.pc.in19
-rw-r--r--src/libudev/libudev.sym126
-rw-r--r--src/libudev/meson.build29
-rw-r--r--src/locale/kbd-model-map68
-rw-r--r--src/locale/keymap-util.c782
-rw-r--r--src/locale/keymap-util.h44
-rw-r--r--src/locale/language-fallback-map13
-rw-r--r--src/locale/localectl.c519
-rw-r--r--src/locale/localed.c823
-rw-r--r--src/locale/meson.build38
-rw-r--r--src/locale/org.freedesktop.locale1.conf29
-rw-r--r--src/locale/org.freedesktop.locale1.policy42
-rw-r--r--src/locale/org.freedesktop.locale1.service14
-rw-r--r--src/locale/test-keymap-util.c203
-rw-r--r--src/login/70-power-switch.rules15
-rw-r--r--src/login/70-uaccess.rules.m488
-rw-r--r--src/login/71-seat.rules.in82
-rw-r--r--src/login/73-seat-late.rules.m420
-rw-r--r--src/login/inhibit.c333
-rw-r--r--src/login/loginctl.c1488
-rw-r--r--src/login/logind-acl.c256
-rw-r--r--src/login/logind-acl.h34
-rw-r--r--src/login/logind-action.c186
-rw-r--r--src/login/logind-action.h36
-rw-r--r--src/login/logind-brightness.c252
-rw-r--r--src/login/logind-brightness.h9
-rw-r--r--src/login/logind-button.c380
-rw-r--r--src/login/logind-button.h26
-rw-r--r--src/login/logind-core.c844
-rw-r--r--src/login/logind-dbus.c4223
-rw-r--r--src/login/logind-dbus.h34
-rw-r--r--src/login/logind-device.c106
-rw-r--r--src/login/logind-device.h25
-rw-r--r--src/login/logind-gperf.gperf47
-rw-r--r--src/login/logind-inhibit.c537
-rw-r--r--src/login/logind-inhibit.h73
-rw-r--r--src/login/logind-seat-dbus.c487
-rw-r--r--src/login/logind-seat-dbus.h16
-rw-r--r--src/login/logind-seat.c664
-rw-r--r--src/login/logind-seat.h75
-rw-r--r--src/login/logind-session-dbus.c972
-rw-r--r--src/login/logind-session-dbus.h23
-rw-r--r--src/login/logind-session-device.c518
-rw-r--r--src/login/logind-session-device.h39
-rw-r--r--src/login/logind-session.c1461
-rw-r--r--src/login/logind-session.h178
-rw-r--r--src/login/logind-user-dbus.c434
-rw-r--r--src/login/logind-user-dbus.h16
-rw-r--r--src/login/logind-user.c956
-rw-r--r--src/login/logind-user.h75
-rw-r--r--src/login/logind-utmp.c169
-rw-r--r--src/login/logind.c1202
-rw-r--r--src/login/logind.conf.in41
-rw-r--r--src/login/logind.h181
-rw-r--r--src/login/meson.build124
-rw-r--r--src/login/org.freedesktop.login1.conf324
-rw-r--r--src/login/org.freedesktop.login1.policy415
-rw-r--r--src/login/org.freedesktop.login1.service14
-rw-r--r--src/login/pam_systemd.c1024
-rw-r--r--src/login/pam_systemd.sym8
-rw-r--r--src/login/sysfs-show.c165
-rw-r--r--src/login/sysfs-show.h8
-rw-r--r--src/login/systemd-user.m420
-rw-r--r--src/login/test-inhibit.c96
-rw-r--r--src/login/test-login-shared.c22
-rw-r--r--src/login/test-login-tables.c17
-rw-r--r--src/login/user-runtime-dir.c213
-rw-r--r--src/machine-id-setup/machine-id-setup-main.c142
-rw-r--r--src/machine/image-dbus.c505
-rw-r--r--src/machine/image-dbus.h19
-rw-r--r--src/machine/machine-dbus.c1563
-rw-r--r--src/machine/machine-dbus.h27
-rw-r--r--src/machine/machine.c907
-rw-r--r--src/machine/machine.h102
-rw-r--r--src/machine/machinectl.c2897
-rw-r--r--src/machine/machined-core.c104
-rw-r--r--src/machine/machined-dbus.c1617
-rw-r--r--src/machine/machined-varlink.c423
-rw-r--r--src/machine/machined-varlink.h7
-rw-r--r--src/machine/machined.c367
-rw-r--r--src/machine/machined.h63
-rw-r--r--src/machine/meson.build44
-rw-r--r--src/machine/operation.c138
-rw-r--r--src/machine/operation.h31
-rw-r--r--src/machine/org.freedesktop.machine1.conf242
-rw-r--r--src/machine/org.freedesktop.machine1.policy104
-rw-r--r--src/machine/org.freedesktop.machine1.service14
-rw-r--r--src/machine/test-machine-tables.c12
-rw-r--r--src/modules-load/modules-load.c227
-rw-r--r--src/mount/mount-tool.c1539
-rw-r--r--src/network/fuzz-netdev-parser.c25
-rw-r--r--src/network/fuzz-network-parser.c28
-rw-r--r--src/network/fuzz-network-parser.options2
-rw-r--r--src/network/generator/main.c206
-rw-r--r--src/network/generator/network-generator.c1233
-rw-r--r--src/network/generator/network-generator.h108
-rw-r--r--src/network/generator/test-network-generator.c438
-rw-r--r--src/network/meson.build303
-rw-r--r--src/network/netdev/bareudp.c138
-rw-r--r--src/network/netdev/bareudp.h34
-rw-r--r--src/network/netdev/bond.c527
-rw-r--r--src/network/netdev/bond.h62
-rw-r--r--src/network/netdev/bridge.c368
-rw-r--r--src/network/netdev/bridge.h47
-rw-r--r--src/network/netdev/dummy.c10
-rw-r--r--src/network/netdev/dummy.h11
-rw-r--r--src/network/netdev/fou-tunnel.c279
-rw-r--r--src/network/netdev/fou-tunnel.h42
-rw-r--r--src/network/netdev/geneve.c356
-rw-r--r--src/network/netdev/geneve.h52
-rw-r--r--src/network/netdev/ifb.c11
-rw-r--r--src/network/netdev/ifb.h13
-rw-r--r--src/network/netdev/ipvlan.c91
-rw-r--r--src/network/netdev/ipvlan.h25
-rw-r--r--src/network/netdev/l2tp-tunnel.c728
-rw-r--r--src/network/netdev/l2tp-tunnel.h78
-rw-r--r--src/network/netdev/macsec.c1252
-rw-r--r--src/network/netdev/macsec.h87
-rw-r--r--src/network/netdev/macvlan.c105
-rw-r--r--src/network/netdev/macvlan.h22
-rw-r--r--src/network/netdev/netdev-gperf.gperf232
-rw-r--r--src/network/netdev/netdev.c868
-rw-r--r--src/network/netdev/netdev.h244
-rw-r--r--src/network/netdev/netdevsim.c10
-rw-r--r--src/network/netdev/netdevsim.h13
-rw-r--r--src/network/netdev/nlmon.c22
-rw-r--r--src/network/netdev/nlmon.h14
-rw-r--r--src/network/netdev/tunnel.c903
-rw-r--r--src/network/netdev/tunnel.h92
-rw-r--r--src/network/netdev/tuntap.c164
-rw-r--r--src/network/netdev/tuntap.h21
-rw-r--r--src/network/netdev/vcan.c10
-rw-r--r--src/network/netdev/vcan.h17
-rw-r--r--src/network/netdev/veth.c95
-rw-r--r--src/network/netdev/veth.h16
-rw-r--r--src/network/netdev/vlan.c92
-rw-r--r--src/network/netdev/vlan.h20
-rw-r--r--src/network/netdev/vrf.c32
-rw-r--r--src/network/netdev/vrf.h15
-rw-r--r--src/network/netdev/vxcan.c74
-rw-r--r--src/network/netdev/vxcan.h16
-rw-r--r--src/network/netdev/vxlan.c390
-rw-r--r--src/network/netdev/vxlan.h74
-rw-r--r--src/network/netdev/wireguard.c946
-rw-r--r--src/network/netdev/wireguard.h70
-rw-r--r--src/network/netdev/xfrm.c33
-rw-r--r--src/network/netdev/xfrm.h14
-rw-r--r--src/network/networkctl.c2830
-rw-r--r--src/network/networkd-address-label.c242
-rw-r--r--src/network/networkd-address-label.h29
-rw-r--r--src/network/networkd-address-pool.c190
-rw-r--r--src/network/networkd-address-pool.h17
-rw-r--r--src/network/networkd-address.c1922
-rw-r--r--src/network/networkd-address.h93
-rw-r--r--src/network/networkd-brvlan.c283
-rw-r--r--src/network/networkd-brvlan.h19
-rw-r--r--src/network/networkd-can.c315
-rw-r--r--src/network/networkd-can.h10
-rw-r--r--src/network/networkd-conf.c191
-rw-r--r--src/network/networkd-conf.h17
-rw-r--r--src/network/networkd-dhcp-common.c935
-rw-r--r--src/network/networkd-dhcp-common.h72
-rw-r--r--src/network/networkd-dhcp-server-bus.c110
-rw-r--r--src/network/networkd-dhcp-server-bus.h9
-rw-r--r--src/network/networkd-dhcp-server.c439
-rw-r--r--src/network/networkd-dhcp-server.h12
-rw-r--r--src/network/networkd-dhcp4.c1761
-rw-r--r--src/network/networkd-dhcp4.h30
-rw-r--r--src/network/networkd-dhcp6.c1719
-rw-r--r--src/network/networkd-dhcp6.h45
-rw-r--r--src/network/networkd-fdb.c409
-rw-r--r--src/network/networkd-fdb.h52
-rw-r--r--src/network/networkd-gperf.gperf25
-rw-r--r--src/network/networkd-ipv4ll.c313
-rw-r--r--src/network/networkd-ipv4ll.h15
-rw-r--r--src/network/networkd-ipv6-proxy-ndp.c164
-rw-r--r--src/network/networkd-ipv6-proxy-ndp.h10
-rw-r--r--src/network/networkd-link-bus.c816
-rw-r--r--src/network/networkd-link-bus.h36
-rw-r--r--src/network/networkd-link.c3263
-rw-r--r--src/network/networkd-link.h249
-rw-r--r--src/network/networkd-lldp-rx.c205
-rw-r--r--src/network/networkd-lldp-rx.h23
-rw-r--r--src/network/networkd-lldp-tx.c493
-rw-r--r--src/network/networkd-lldp-tx.h23
-rw-r--r--src/network/networkd-manager-bus.c274
-rw-r--r--src/network/networkd-manager-bus.h10
-rw-r--r--src/network/networkd-manager.c1254
-rw-r--r--src/network/networkd-manager.h97
-rw-r--r--src/network/networkd-mdb.c365
-rw-r--r--src/network/networkd-mdb.h29
-rw-r--r--src/network/networkd-ndisc.c1516
-rw-r--r--src/network/networkd-ndisc.h85
-rw-r--r--src/network/networkd-neighbor.c725
-rw-r--r--src/network/networkd-neighbor.h45
-rw-r--r--src/network/networkd-network-bus.c137
-rw-r--r--src/network/networkd-network-bus.h11
-rw-r--r--src/network/networkd-network-gperf.gperf482
-rw-r--r--src/network/networkd-network.c1238
-rw-r--r--src/network/networkd-network.h340
-rw-r--r--src/network/networkd-nexthop.c534
-rw-r--r--src/network/networkd-nexthop.h41
-rw-r--r--src/network/networkd-radv.c999
-rw-r--r--src/network/networkd-radv.h70
-rw-r--r--src/network/networkd-route.c2537
-rw-r--r--src/network/networkd-route.h102
-rw-r--r--src/network/networkd-routing-policy-rule.c1810
-rw-r--r--src/network/networkd-routing-policy-rule.h73
-rw-r--r--src/network/networkd-speed-meter.c113
-rw-r--r--src/network/networkd-speed-meter.h12
-rw-r--r--src/network/networkd-sriov.c532
-rw-r--r--src/network/networkd-sriov.h46
-rw-r--r--src/network/networkd-sysctl.c288
-rw-r--r--src/network/networkd-sysctl.h25
-rw-r--r--src/network/networkd-util.c165
-rw-r--r--src/network/networkd-util.h83
-rw-r--r--src/network/networkd-wifi.c62
-rw-r--r--src/network/networkd-wifi.h8
-rw-r--r--src/network/networkd.c110
-rw-r--r--src/network/networkd.conf21
-rw-r--r--src/network/org.freedesktop.network1.conf27
-rw-r--r--src/network/org.freedesktop.network1.policy186
-rw-r--r--src/network/org.freedesktop.network1.service14
-rw-r--r--src/network/systemd-networkd.pkla4
-rw-r--r--src/network/systemd-networkd.rules10
-rw-r--r--src/network/tc/cake.c163
-rw-r--r--src/network/tc/cake.h20
-rw-r--r--src/network/tc/codel.c255
-rw-r--r--src/network/tc/codel.h24
-rw-r--r--src/network/tc/drr.c109
-rw-r--r--src/network/tc/drr.h23
-rw-r--r--src/network/tc/ets.c344
-rw-r--r--src/network/tc/ets.h25
-rw-r--r--src/network/tc/fifo.c187
-rw-r--r--src/network/tc/fifo.h25
-rw-r--r--src/network/tc/fq-codel.c355
-rw-r--r--src/network/tc/fq-codel.h28
-rw-r--r--src/network/tc/fq-pie.c103
-rw-r--r--src/network/tc/fq-pie.h17
-rw-r--r--src/network/tc/fq.c420
-rw-r--r--src/network/tc/fq.h29
-rw-r--r--src/network/tc/gred.c196
-rw-r--r--src/network/tc/gred.h20
-rw-r--r--src/network/tc/hhf.c98
-rw-r--r--src/network/tc/hhf.h17
-rw-r--r--src/network/tc/htb.c489
-rw-r--r--src/network/tc/htb.h39
-rw-r--r--src/network/tc/netem.c236
-rw-r--r--src/network/tc/netem.h25
-rw-r--r--src/network/tc/pie.c97
-rw-r--r--src/network/tc/pie.h17
-rw-r--r--src/network/tc/qdisc.c381
-rw-r--r--src/network/tc/qdisc.h107
-rw-r--r--src/network/tc/qfq.c178
-rw-r--r--src/network/tc/qfq.h26
-rw-r--r--src/network/tc/sfb.c108
-rw-r--r--src/network/tc/sfb.h17
-rw-r--r--src/network/tc/sfq.c91
-rw-r--r--src/network/tc/sfq.h18
-rw-r--r--src/network/tc/tbf.c346
-rw-r--r--src/network/tc/tbf.h26
-rw-r--r--src/network/tc/tc-util.c132
-rw-r--r--src/network/tc/tc-util.h14
-rw-r--r--src/network/tc/tc.c81
-rw-r--r--src/network/tc/tc.h32
-rw-r--r--src/network/tc/tclass.c289
-rw-r--r--src/network/tc/tclass.h71
-rw-r--r--src/network/tc/teql.c91
-rw-r--r--src/network/tc/teql.h16
-rw-r--r--src/network/test-network-tables.c49
-rw-r--r--src/network/test-network.c258
-rw-r--r--src/network/test-networkd-conf.c260
-rw-r--r--src/network/test-routing-policy-rule.c90
-rw-r--r--src/network/wait-online/link.c153
-rw-r--r--src/network/wait-online/link.h30
-rw-r--r--src/network/wait-online/manager.c369
-rw-r--r--src/network/wait-online/manager.h42
-rw-r--r--src/network/wait-online/wait-online.c224
-rw-r--r--src/notify/notify.c281
-rw-r--r--src/nspawn/meson.build60
-rw-r--r--src/nspawn/nspawn-cgroup.c605
-rw-r--r--src/nspawn/nspawn-cgroup.h14
-rw-r--r--src/nspawn/nspawn-creds.c25
-rw-r--r--src/nspawn/nspawn-creds.h12
-rw-r--r--src/nspawn/nspawn-def.h9
-rw-r--r--src/nspawn/nspawn-expose-ports.c231
-rw-r--r--src/nspawn/nspawn-expose-ports.h26
-rw-r--r--src/nspawn/nspawn-gperf.gperf77
-rw-r--r--src/nspawn/nspawn-mount.c1312
-rw-r--r--src/nspawn/nspawn-mount.h69
-rw-r--r--src/nspawn/nspawn-network.c765
-rw-r--r--src/nspawn/nspawn-network.h23
-rw-r--r--src/nspawn/nspawn-oci.c2258
-rw-r--r--src/nspawn/nspawn-oci.h6
-rw-r--r--src/nspawn/nspawn-patch-uid.c485
-rw-r--r--src/nspawn/nspawn-patch-uid.h7
-rw-r--r--src/nspawn/nspawn-register.c361
-rw-r--r--src/nspawn/nspawn-register.h14
-rw-r--r--src/nspawn/nspawn-seccomp.c260
-rw-r--r--src/nspawn/nspawn-seccomp.h6
-rw-r--r--src/nspawn/nspawn-settings.c868
-rw-r--r--src/nspawn/nspawn-settings.h266
-rw-r--r--src/nspawn/nspawn-setuid.c246
-rw-r--r--src/nspawn/nspawn-setuid.h5
-rw-r--r--src/nspawn/nspawn-stub-pid1.c200
-rw-r--r--src/nspawn/nspawn-stub-pid1.h6
-rw-r--r--src/nspawn/nspawn.c5561
-rw-r--r--src/nspawn/test-nspawn-tables.c11
-rw-r--r--src/nspawn/test-patch-uid.c43
-rw-r--r--src/nss-myhostname/nss-myhostname.c500
-rw-r--r--src/nss-myhostname/nss-myhostname.sym19
-rw-r--r--src/nss-mymachines/nss-mymachines.c429
-rw-r--r--src/nss-mymachines/nss-mymachines.sym21
-rw-r--r--src/nss-resolve/nss-resolve.c664
-rw-r--r--src/nss-resolve/nss-resolve.sym19
-rw-r--r--src/nss-systemd/nss-systemd.c640
-rw-r--r--src/nss-systemd/nss-systemd.h13
-rw-r--r--src/nss-systemd/nss-systemd.sym28
-rw-r--r--src/nss-systemd/userdb-glue.c328
-rw-r--r--src/nss-systemd/userdb-glue.h20
-rw-r--r--src/oom/meson.build36
-rw-r--r--src/oom/oomctl.c138
-rw-r--r--src/oom/oomd-manager-bus.c47
-rw-r--r--src/oom/oomd-manager-bus.h8
-rw-r--r--src/oom/oomd-manager.c546
-rw-r--r--src/oom/oomd-manager.h60
-rw-r--r--src/oom/oomd-util.c451
-rw-r--r--src/oom/oomd-util.h112
-rw-r--r--src/oom/oomd.c178
-rw-r--r--src/oom/oomd.conf16
-rw-r--r--src/oom/org.freedesktop.oom1.conf47
-rw-r--r--src/oom/org.freedesktop.oom1.service14
-rw-r--r--src/oom/test-oomd-util.c346
-rw-r--r--src/partition/growfs.c261
-rw-r--r--src/partition/makefs.c71
-rw-r--r--src/partition/meson.build7
-rw-r--r--src/partition/repart.c4114
-rwxr-xr-xsrc/partition/test-repart.sh213
-rw-r--r--src/path/path.c220
-rw-r--r--src/portable/meson.build29
-rw-r--r--src/portable/org.freedesktop.portable1.conf117
-rw-r--r--src/portable/org.freedesktop.portable1.policy43
-rw-r--r--src/portable/org.freedesktop.portable1.service7
-rw-r--r--src/portable/portable.c1426
-rw-r--r--src/portable/portable.h74
-rw-r--r--src/portable/portablectl.c1126
-rw-r--r--src/portable/portabled-bus.c402
-rw-r--r--src/portable/portabled-bus.h10
-rw-r--r--src/portable/portabled-image-bus.c740
-rw-r--r--src/portable/portabled-image-bus.h41
-rw-r--r--src/portable/portabled-image.c103
-rw-r--r--src/portable/portabled-image.h12
-rw-r--r--src/portable/portabled-operation.c128
-rw-r--r--src/portable/portabled-operation.h29
-rw-r--r--src/portable/portabled.c169
-rw-r--r--src/portable/portabled.h25
-rw-r--r--src/portable/profile/default/service.conf32
-rw-r--r--src/portable/profile/nonetwork/service.conf32
-rw-r--r--src/portable/profile/strict/service.conf31
-rw-r--r--src/portable/profile/trusted/service.conf7
-rw-r--r--src/pstore/meson.build10
-rw-r--r--src/pstore/pstore.c408
-rw-r--r--src/pstore/pstore.conf16
-rw-r--r--src/quotacheck/quotacheck.c102
-rw-r--r--src/random-seed/random-seed.c301
-rw-r--r--src/rc-local-generator/rc-local-generator.c71
-rw-r--r--src/remount-fs/remount-fs.c152
-rw-r--r--src/reply-password/reply-password.c81
-rw-r--r--src/resolve/RFCs60
-rw-r--r--src/resolve/dns-type.c316
-rw-r--r--src/resolve/dns-type.h144
-rw-r--r--src/resolve/dns_type-to-name.awk11
-rwxr-xr-xsrc/resolve/generate-dns_type-gperf.py24
-rw-r--r--src/resolve/generate-dns_type-list.sed1
-rw-r--r--src/resolve/meson.build235
-rw-r--r--src/resolve/org.freedesktop.resolve1.conf27
-rw-r--r--src/resolve/org.freedesktop.resolve1.policy142
-rw-r--r--src/resolve/org.freedesktop.resolve1.service14
-rw-r--r--src/resolve/resolv.conf19
-rw-r--r--src/resolve/resolvconf-compat.c275
-rw-r--r--src/resolve/resolvconf-compat.h4
-rw-r--r--src/resolve/resolvectl.c3322
-rw-r--r--src/resolve/resolvectl.h29
-rw-r--r--src/resolve/resolved-bus.c2236
-rw-r--r--src/resolve/resolved-bus.h15
-rw-r--r--src/resolve/resolved-conf.c516
-rw-r--r--src/resolve/resolved-conf.h22
-rw-r--r--src/resolve/resolved-def.h34
-rw-r--r--src/resolve/resolved-dns-answer.c771
-rw-r--r--src/resolve/resolved-dns-answer.h129
-rw-r--r--src/resolve/resolved-dns-cache.c1119
-rw-r--r--src/resolve/resolved-dns-cache.h35
-rw-r--r--src/resolve/resolved-dns-dnssec.c2263
-rw-r--r--src/resolve/resolved-dns-dnssec.h81
-rw-r--r--src/resolve/resolved-dns-packet.c2410
-rw-r--r--src/resolve/resolved-dns-packet.h302
-rw-r--r--src/resolve/resolved-dns-query.c1041
-rw-r--r--src/resolve/resolved-dns-query.h130
-rw-r--r--src/resolve/resolved-dns-question.c447
-rw-r--r--src/resolve/resolved-dns-question.h56
-rw-r--r--src/resolve/resolved-dns-rr.c1824
-rw-r--r--src/resolve/resolved-dns-rr.h341
-rw-r--r--src/resolve/resolved-dns-scope.c1436
-rw-r--r--src/resolve/resolved-dns-scope.h113
-rw-r--r--src/resolve/resolved-dns-search-domain.c196
-rw-r--r--src/resolve/resolved-dns-search-domain.h56
-rw-r--r--src/resolve/resolved-dns-server.c953
-rw-r--r--src/resolve/resolved-dns-server.h164
-rw-r--r--src/resolve/resolved-dns-stream.c590
-rw-r--r--src/resolve/resolved-dns-stream.h107
-rw-r--r--src/resolve/resolved-dns-stub.c776
-rw-r--r--src/resolve/resolved-dns-stub.h41
-rw-r--r--src/resolve/resolved-dns-synthesize.c450
-rw-r--r--src/resolve/resolved-dns-synthesize.h12
-rw-r--r--src/resolve/resolved-dns-transaction.c3258
-rw-r--r--src/resolve/resolved-dns-transaction.h181
-rw-r--r--src/resolve/resolved-dns-trust-anchor.c770
-rw-r--r--src/resolve/resolved-dns-trust-anchor.h25
-rw-r--r--src/resolve/resolved-dns-zone.c696
-rw-r--r--src/resolve/resolved-dns-zone.h69
-rw-r--r--src/resolve/resolved-dnssd-bus.c135
-rw-r--r--src/resolve/resolved-dnssd-bus.h11
-rw-r--r--src/resolve/resolved-dnssd-gperf.gperf24
-rw-r--r--src/resolve/resolved-dnssd.c368
-rw-r--r--src/resolve/resolved-dnssd.h61
-rw-r--r--src/resolve/resolved-dnstls-gnutls.c241
-rw-r--r--src/resolve/resolved-dnstls-gnutls.h24
-rw-r--r--src/resolve/resolved-dnstls-openssl.c411
-rw-r--r--src/resolve/resolved-dnstls-openssl.h25
-rw-r--r--src/resolve/resolved-dnstls.h37
-rw-r--r--src/resolve/resolved-etc-hosts.c384
-rw-r--r--src/resolve/resolved-etc-hosts.h25
-rw-r--r--src/resolve/resolved-gperf.gperf32
-rw-r--r--src/resolve/resolved-link-bus.c851
-rw-r--r--src/resolve/resolved-link-bus.h22
-rw-r--r--src/resolve/resolved-link.c1432
-rw-r--r--src/resolve/resolved-link.h113
-rw-r--r--src/resolve/resolved-llmnr.c450
-rw-r--r--src/resolve/resolved-llmnr.h14
-rw-r--r--src/resolve/resolved-manager.c1551
-rw-r--r--src/resolve/resolved-manager.h195
-rw-r--r--src/resolve/resolved-mdns.c482
-rw-r--r--src/resolve/resolved-mdns.h13
-rw-r--r--src/resolve/resolved-resolv-conf.c429
-rw-r--r--src/resolve/resolved-resolv-conf.h23
-rw-r--r--src/resolve/resolved-varlink.c544
-rw-r--r--src/resolve/resolved-varlink.h7
-rw-r--r--src/resolve/resolved.c99
-rw-r--r--src/resolve/resolved.conf.in30
-rw-r--r--src/resolve/test-dns-packet.c120
-rw-r--r--src/resolve/test-dnssec-complex.c223
-rw-r--r--src/resolve/test-dnssec.c623
-rw-r--r--src/resolve/test-resolve-tables.c54
-rw-r--r--src/resolve/test-resolved-etc-hosts.c150
-rw-r--r--src/resolve/test-resolved-packet.c32
-rw-r--r--src/rfkill/rfkill.c377
-rw-r--r--src/run-generator/run-generator.c139
-rw-r--r--src/run/run.c1777
-rw-r--r--src/shared/acl-util.c437
-rw-r--r--src/shared/acl-util.h40
-rw-r--r--src/shared/acpi-fpdt.c147
-rw-r--r--src/shared/acpi-fpdt.h6
-rw-r--r--src/shared/apparmor-util.c22
-rw-r--r--src/shared/apparmor-util.h6
-rw-r--r--src/shared/ask-password-api.c1050
-rw-r--r--src/shared/ask-password-api.h21
-rw-r--r--src/shared/barrier.c398
-rw-r--r--src/shared/barrier.h74
-rw-r--r--src/shared/base-filesystem.c124
-rw-r--r--src/shared/base-filesystem.h6
-rw-r--r--src/shared/binfmt-util.c35
-rw-r--r--src/shared/binfmt-util.h4
-rw-r--r--src/shared/bitmap.c212
-rw-r--r--src/shared/bitmap.h37
-rw-r--r--src/shared/blkid-util.h10
-rw-r--r--src/shared/bond-util.c73
-rw-r--r--src/shared/bond-util.h106
-rw-r--r--src/shared/boot-timestamps.c46
-rw-r--r--src/shared/boot-timestamps.h6
-rw-r--r--src/shared/bootspec.c1432
-rw-r--r--src/shared/bootspec.h86
-rw-r--r--src/shared/bpf-program.c256
-rw-r--r--src/shared/bpf-program.h43
-rw-r--r--src/shared/bridge-util.c13
-rw-r--r--src/shared/bridge-util.h20
-rw-r--r--src/shared/bus-get-properties.c167
-rw-r--r--src/shared/bus-get-properties.h102
-rw-r--r--src/shared/bus-locator.c213
-rw-r--r--src/shared/bus-locator.h34
-rw-r--r--src/shared/bus-log-control-api.c115
-rw-r--r--src/shared/bus-log-control-api.h19
-rw-r--r--src/shared/bus-map-properties.c246
-rw-r--r--src/shared/bus-map-properties.h25
-rw-r--r--src/shared/bus-message-util.c182
-rw-r--r--src/shared/bus-message-util.h18
-rw-r--r--src/shared/bus-object.c177
-rw-r--r--src/shared/bus-object.h34
-rw-r--r--src/shared/bus-polkit.c415
-rw-r--r--src/shared/bus-polkit.h11
-rw-r--r--src/shared/bus-print-properties.c462
-rw-r--r--src/shared/bus-print-properties.h16
-rw-r--r--src/shared/bus-unit-procs.c407
-rw-r--r--src/shared/bus-unit-procs.h8
-rw-r--r--src/shared/bus-unit-util.c2432
-rw-r--r--src/shared/bus-unit-util.h32
-rw-r--r--src/shared/bus-util.c577
-rw-r--r--src/shared/bus-util.h77
-rw-r--r--src/shared/bus-wait-for-jobs.c331
-rw-r--r--src/shared/bus-wait-for-jobs.h16
-rw-r--r--src/shared/bus-wait-for-units.c429
-rw-r--r--src/shared/bus-wait-for-units.h35
-rw-r--r--src/shared/calendarspec.c1405
-rw-r--r--src/shared/calendarspec.h46
-rw-r--r--src/shared/cgroup-setup.c841
-rw-r--r--src/shared/cgroup-setup.h34
-rw-r--r--src/shared/cgroup-show.c400
-rw-r--r--src/shared/cgroup-show.h24
-rw-r--r--src/shared/chown-recursive.c178
-rw-r--r--src/shared/chown-recursive.h8
-rw-r--r--src/shared/clean-ipc.c454
-rw-r--r--src/shared/clean-ipc.h17
-rw-r--r--src/shared/clock-util.c167
-rw-r--r--src/shared/clock-util.h11
-rw-r--r--src/shared/condition.c973
-rw-r--r--src/shared/condition.h106
-rw-r--r--src/shared/conf-parser.c1247
-rw-r--r--src/shared/conf-parser.h303
-rw-r--r--src/shared/coredump-util.c74
-rw-r--r--src/shared/coredump-util.h29
-rw-r--r--src/shared/cpu-set-util.c295
-rw-r--r--src/shared/cpu-set-util.h52
-rw-r--r--src/shared/cryptsetup-util.c111
-rw-r--r--src/shared/cryptsetup-util.h44
-rw-r--r--src/shared/daemon-util.h22
-rw-r--r--src/shared/dev-setup.c120
-rw-r--r--src/shared/dev-setup.h8
-rw-r--r--src/shared/dissect-image.c2557
-rw-r--r--src/shared/dissect-image.h163
-rw-r--r--src/shared/dm-util.c45
-rw-r--r--src/shared/dm-util.h4
-rw-r--r--src/shared/dns-domain.c1414
-rw-r--r--src/shared/dns-domain.h115
-rw-r--r--src/shared/dropin.c279
-rw-r--r--src/shared/dropin.h26
-rw-r--r--src/shared/efi-loader.c806
-rw-r--r--src/shared/efi-loader.h97
-rw-r--r--src/shared/enable-mempool.c5
-rw-r--r--src/shared/env-file-label.c21
-rw-r--r--src/shared/env-file-label.h8
-rw-r--r--src/shared/ethtool-util.c1149
-rw-r--r--src/shared/ethtool-util.h129
-rw-r--r--src/shared/exec-util.c446
-rw-r--r--src/shared/exec-util.h47
-rw-r--r--src/shared/exit-status.c178
-rw-r--r--src/shared/exit-status.h111
-rw-r--r--src/shared/fdset.c252
-rw-r--r--src/shared/fdset.h44
-rw-r--r--src/shared/fileio-label.c37
-rw-r--r--src/shared/fileio-label.h15
-rw-r--r--src/shared/firewall-util.c350
-rw-r--r--src/shared/firewall-util.h65
-rw-r--r--src/shared/format-table.c2549
-rw-r--r--src/shared/format-table.h139
-rw-r--r--src/shared/fsck-util.h14
-rw-r--r--src/shared/fstab-util.c297
-rw-r--r--src/shared/fstab-util.h38
-rwxr-xr-xsrc/shared/generate-ip-protocol-list.sh6
-rwxr-xr-xsrc/shared/generate-syscall-list.py14
-rw-r--r--src/shared/generator.c631
-rw-r--r--src/shared/generator.h84
-rw-r--r--src/shared/geneve-util.c12
-rw-r--r--src/shared/geneve-util.h17
-rw-r--r--src/shared/gpt.c97
-rw-r--r--src/shared/gpt.h128
-rw-r--r--src/shared/group-record.c348
-rw-r--r--src/shared/group-record.h46
-rw-r--r--src/shared/id128-print.c76
-rw-r--r--src/shared/id128-print.h19
-rw-r--r--src/shared/idn-util.c91
-rw-r--r--src/shared/idn-util.h32
-rw-r--r--src/shared/ima-util.c15
-rw-r--r--src/shared/ima-util.h6
-rw-r--r--src/shared/import-util.c179
-rw-r--r--src/shared/import-util.h27
-rw-r--r--src/shared/initreq.h74
-rw-r--r--src/shared/install-printf.c127
-rw-r--r--src/shared/install-printf.h6
-rw-r--r--src/shared/install.c3479
-rw-r--r--src/shared/install.h213
-rw-r--r--src/shared/ip-protocol-list.c67
-rw-r--r--src/shared/ip-protocol-list.h6
-rw-r--r--src/shared/ip-protocol-to-name.awk9
-rw-r--r--src/shared/ipvlan-util.c22
-rw-r--r--src/shared/ipvlan-util.h29
-rw-r--r--src/shared/journal-importer.c483
-rw-r--r--src/shared/journal-importer.h60
-rw-r--r--src/shared/journal-util.c139
-rw-r--r--src/shared/journal-util.h10
-rw-r--r--src/shared/json-internal.h76
-rw-r--r--src/shared/json.c4410
-rw-r--r--src/shared/json.h356
-rw-r--r--src/shared/libcrypt-util.c212
-rw-r--r--src/shared/libcrypt-util.h13
-rw-r--r--src/shared/libmount-util.h47
-rw-r--r--src/shared/libshared.sym3
-rw-r--r--src/shared/linux/README8
-rw-r--r--src/shared/linux/auto_dev-ioctl.h220
-rw-r--r--src/shared/linux/bpf.h3057
-rw-r--r--src/shared/linux/bpf_common.h57
-rw-r--r--src/shared/linux/bpf_insn.h225
-rw-r--r--src/shared/linux/dm-ioctl.h363
-rw-r--r--src/shared/linux/ethtool.h2021
-rw-r--r--src/shared/linux/nl80211.h6554
-rw-r--r--src/shared/local-addresses.c315
-rw-r--r--src/shared/local-addresses.h17
-rw-r--r--src/shared/lockfile-util.c137
-rw-r--r--src/shared/lockfile-util.h14
-rw-r--r--src/shared/log-link.h39
-rw-r--r--src/shared/logs-show.c1672
-rw-r--r--src/shared/logs-show.h64
-rw-r--r--src/shared/loop-util.c722
-rw-r--r--src/shared/loop-util.h29
-rw-r--r--src/shared/machine-image.c1274
-rw-r--r--src/shared/machine-image.h112
-rw-r--r--src/shared/machine-pool.c45
-rw-r--r--src/shared/machine-pool.h8
-rw-r--r--src/shared/macvlan-util.c15
-rw-r--r--src/shared/macvlan-util.h17
-rw-r--r--src/shared/main-func.h40
-rw-r--r--src/shared/meson.build399
-rw-r--r--src/shared/mkfs-util.c135
-rw-r--r--src/shared/mkfs-util.h10
-rw-r--r--src/shared/module-util.c72
-rw-r--r--src/shared/module-util.h12
-rw-r--r--src/shared/mount-util.c744
-rw-r--r--src/shared/mount-util.h99
-rw-r--r--src/shared/netif-naming-scheme.c69
-rw-r--r--src/shared/netif-naming-scheme.h57
-rw-r--r--src/shared/nscd-flush.c151
-rw-r--r--src/shared/nscd-flush.h4
-rw-r--r--src/shared/nsflags.c71
-rw-r--r--src/shared/nsflags.h29
-rw-r--r--src/shared/numa-util.c192
-rw-r--r--src/shared/numa-util.h35
-rw-r--r--src/shared/offline-passwd.c164
-rw-r--r--src/shared/offline-passwd.h9
-rw-r--r--src/shared/openssl-util.h12
-rw-r--r--src/shared/os-util.c149
-rw-r--r--src/shared/os-util.h13
-rw-r--r--src/shared/output-mode.c42
-rw-r--r--src/shared/output-mode.h49
-rw-r--r--src/shared/pager.c331
-rw-r--r--src/shared/pager.h17
-rw-r--r--src/shared/pam-util.c83
-rw-r--r--src/shared/pam-util.h15
-rw-r--r--src/shared/pe-header.h61
-rw-r--r--src/shared/pkcs11-util.c932
-rw-r--r--src/shared/pkcs11-util.h47
-rw-r--r--src/shared/pretty-print.c325
-rw-r--r--src/shared/pretty-print.h19
-rw-r--r--src/shared/psi-util.c118
-rw-r--r--src/shared/psi-util.h30
-rw-r--r--src/shared/ptyfwd.c681
-rw-r--r--src/shared/ptyfwd.h42
-rw-r--r--src/shared/pwquality-util.c191
-rw-r--r--src/shared/pwquality-util.h41
-rw-r--r--src/shared/qrcode-util.c107
-rw-r--r--src/shared/qrcode-util.h13
-rw-r--r--src/shared/reboot-util.c109
-rw-r--r--src/shared/reboot-util.h15
-rw-r--r--src/shared/resize-fs.c121
-rw-r--r--src/shared/resize-fs.h15
-rw-r--r--src/shared/resolve-util.c52
-rw-r--r--src/shared/resolve-util.h92
-rw-r--r--src/shared/seccomp-util.c2140
-rw-r--r--src/shared/seccomp-util.h142
-rw-r--r--src/shared/securebits-util.c66
-rw-r--r--src/shared/securebits-util.h18
-rw-r--r--src/shared/serialize.c215
-rw-r--r--src/shared/serialize.h27
-rw-r--r--src/shared/service-util.c87
-rw-r--r--src/shared/service-util.h10
-rw-r--r--src/shared/sleep-config.c703
-rw-r--r--src/shared/sleep-config.h59
-rw-r--r--src/shared/socket-netlink.c493
-rw-r--r--src/shared/socket-netlink.h50
-rw-r--r--src/shared/spawn-ask-password-agent.c61
-rw-r--r--src/shared/spawn-ask-password-agent.h11
-rw-r--r--src/shared/spawn-polkit-agent.c98
-rw-r--r--src/shared/spawn-polkit-agent.h11
-rw-r--r--src/shared/specifier.c358
-rw-r--r--src/shared/specifier.h91
-rw-r--r--src/shared/switch-root.c127
-rw-r--r--src/shared/switch-root.h6
-rw-r--r--src/shared/syscall-names.text598
-rw-r--r--src/shared/sysctl-util.c129
-rw-r--r--src/shared/sysctl-util.h30
-rw-r--r--src/shared/test-tables.h44
-rw-r--r--src/shared/tests.c343
-rw-r--r--src/shared/tests.h45
-rw-r--r--src/shared/tmpfile-util-label.c26
-rw-r--r--src/shared/tmpfile-util-label.h10
-rw-r--r--src/shared/tomoyo-util.c15
-rw-r--r--src/shared/tomoyo-util.h6
-rw-r--r--src/shared/udev-util.c371
-rw-r--r--src/shared/udev-util.h36
-rw-r--r--src/shared/uid-range.c180
-rw-r--r--src/shared/uid-range.h15
-rw-r--r--src/shared/unit-file.c601
-rw-r--r--src/shared/unit-file.h61
-rw-r--r--src/shared/user-record-nss.c531
-rw-r--r--src/shared/user-record-nss.h24
-rw-r--r--src/shared/user-record-show.c584
-rw-r--r--src/shared/user-record-show.h10
-rw-r--r--src/shared/user-record.c2272
-rw-r--r--src/shared/user-record.h444
-rw-r--r--src/shared/userdb.c1249
-rw-r--r--src/shared/userdb.h41
-rw-r--r--src/shared/utmp-wtmp.c409
-rw-r--r--src/shared/utmp-wtmp.h67
-rw-r--r--src/shared/varlink.c2502
-rw-r--r--src/shared/varlink.h174
-rw-r--r--src/shared/verbs.c112
-rw-r--r--src/shared/verbs.h23
-rw-r--r--src/shared/vlan-util.c100
-rw-r--r--src/shared/vlan-util.h21
-rw-r--r--src/shared/volatile-util.c46
-rw-r--r--src/shared/volatile-util.h16
-rw-r--r--src/shared/watchdog.c191
-rw-r--r--src/shared/watchdog.h17
-rw-r--r--src/shared/web-util.c53
-rw-r--r--src/shared/web-util.h12
-rw-r--r--src/shared/wifi-util.c128
-rw-r--r--src/shared/wifi-util.h11
-rw-r--r--src/shared/xml.c237
-rw-r--r--src/shared/xml.h14
-rw-r--r--src/shutdown/meson.build5
-rw-r--r--src/shutdown/shutdown.c625
-rw-r--r--src/shutdown/umount.c843
-rw-r--r--src/shutdown/umount.h32
-rw-r--r--src/sleep/sleep.c393
-rw-r--r--src/sleep/sleep.conf25
-rw-r--r--src/socket-proxy/socket-proxyd.c722
-rw-r--r--src/stdio-bridge/stdio-bridge.c259
-rw-r--r--src/sulogin-shell/sulogin-shell.c124
-rw-r--r--src/sysctl/sysctl.c432
-rw-r--r--src/system-update-generator/system-update-generator.c72
-rw-r--r--src/systemctl/systemctl-add-dependency.c88
-rw-r--r--src/systemctl/systemctl-add-dependency.h4
-rw-r--r--src/systemctl/systemctl-cancel-job.c43
-rw-r--r--src/systemctl/systemctl-cancel-job.h4
-rw-r--r--src/systemctl/systemctl-clean-or-freeze.c101
-rw-r--r--src/systemctl/systemctl-clean-or-freeze.h4
-rw-r--r--src/systemctl/systemctl-compat-halt.c202
-rw-r--r--src/systemctl/systemctl-compat-halt.h6
-rw-r--r--src/systemctl/systemctl-compat-runlevel.c82
-rw-r--r--src/systemctl/systemctl-compat-runlevel.h6
-rw-r--r--src/systemctl/systemctl-compat-shutdown.c143
-rw-r--r--src/systemctl/systemctl-compat-shutdown.h4
-rw-r--r--src/systemctl/systemctl-compat-telinit.c152
-rw-r--r--src/systemctl/systemctl-compat-telinit.h6
-rw-r--r--src/systemctl/systemctl-daemon-reload.c63
-rw-r--r--src/systemctl/systemctl-daemon-reload.h4
-rw-r--r--src/systemctl/systemctl-edit.c588
-rw-r--r--src/systemctl/systemctl-edit.h5
-rw-r--r--src/systemctl/systemctl-enable.c284
-rw-r--r--src/systemctl/systemctl-enable.h4
-rw-r--r--src/systemctl/systemctl-is-active.c62
-rw-r--r--src/systemctl/systemctl-is-active.h5
-rw-r--r--src/systemctl/systemctl-is-enabled.c138
-rw-r--r--src/systemctl/systemctl-is-enabled.h4
-rw-r--r--src/systemctl/systemctl-is-system-running.c84
-rw-r--r--src/systemctl/systemctl-is-system-running.h4
-rw-r--r--src/systemctl/systemctl-kill.c50
-rw-r--r--src/systemctl/systemctl-kill.h4
-rw-r--r--src/systemctl/systemctl-list-dependencies.c174
-rw-r--r--src/systemctl/systemctl-list-dependencies.h4
-rw-r--r--src/systemctl/systemctl-list-jobs.c176
-rw-r--r--src/systemctl/systemctl-list-jobs.h4
-rw-r--r--src/systemctl/systemctl-list-machines.c246
-rw-r--r--src/systemctl/systemctl-list-machines.h24
-rw-r--r--src/systemctl/systemctl-list-unit-files.c275
-rw-r--r--src/systemctl/systemctl-list-unit-files.h4
-rw-r--r--src/systemctl/systemctl-list-units.c771
-rw-r--r--src/systemctl/systemctl-list-units.h8
-rw-r--r--src/systemctl/systemctl-log-setting.c144
-rw-r--r--src/systemctl/systemctl-log-setting.h5
-rw-r--r--src/systemctl/systemctl-logind.c380
-rw-r--r--src/systemctl/systemctl-logind.h18
-rw-r--r--src/systemctl/systemctl-preset-all.c61
-rw-r--r--src/systemctl/systemctl-preset-all.h4
-rw-r--r--src/systemctl/systemctl-reset-failed.c41
-rw-r--r--src/systemctl/systemctl-reset-failed.h4
-rw-r--r--src/systemctl/systemctl-service-watchdogs.c43
-rw-r--r--src/systemctl/systemctl-service-watchdogs.h4
-rw-r--r--src/systemctl/systemctl-set-default.c158
-rw-r--r--src/systemctl/systemctl-set-default.h5
-rw-r--r--src/systemctl/systemctl-set-environment.c184
-rw-r--r--src/systemctl/systemctl-set-environment.h6
-rw-r--r--src/systemctl/systemctl-set-property.c56
-rw-r--r--src/systemctl/systemctl-set-property.h4
-rw-r--r--src/systemctl/systemctl-show.c2135
-rw-r--r--src/systemctl/systemctl-show.h4
-rw-r--r--src/systemctl/systemctl-start-special.c248
-rw-r--r--src/systemctl/systemctl-start-special.h5
-rw-r--r--src/systemctl/systemctl-start-unit.c368
-rw-r--r--src/systemctl/systemctl-start-unit.h16
-rw-r--r--src/systemctl/systemctl-switch-root.c77
-rw-r--r--src/systemctl/systemctl-switch-root.h4
-rw-r--r--src/systemctl/systemctl-sysv-compat.c271
-rw-r--r--src/systemctl/systemctl-sysv-compat.h35
-rw-r--r--src/systemctl/systemctl-trivial-method.c44
-rw-r--r--src/systemctl/systemctl-trivial-method.h4
-rw-r--r--src/systemctl/systemctl-util.c936
-rw-r--r--src/systemctl/systemctl-util.h58
-rw-r--r--src/systemctl/systemctl.c1128
-rw-r--r--src/systemctl/systemctl.h92
-rwxr-xr-xsrc/systemctl/systemd-sysv-install.SKELETON49
-rw-r--r--src/systemd/_sd-common.h102
-rw-r--r--src/systemd/meson.build84
-rw-r--r--src/systemd/sd-bus-protocol.h105
-rw-r--r--src/systemd/sd-bus-vtable.h311
-rw-r--r--src/systemd/sd-bus.h532
-rw-r--r--src/systemd/sd-daemon.h333
-rw-r--r--src/systemd/sd-device.h128
-rw-r--r--src/systemd/sd-dhcp-client.h222
-rw-r--r--src/systemd/sd-dhcp-lease.h82
-rw-r--r--src/systemd/sd-dhcp-option.h38
-rw-r--r--src/systemd/sd-dhcp-server.h86
-rw-r--r--src/systemd/sd-dhcp6-client.h180
-rw-r--r--src/systemd/sd-dhcp6-lease.h55
-rw-r--r--src/systemd/sd-dhcp6-option.h37
-rw-r--r--src/systemd/sd-event.h173
-rw-r--r--src/systemd/sd-hwdb.h45
-rw-r--r--src/systemd/sd-id128.h124
-rw-r--r--src/systemd/sd-ipv4acd.h58
-rw-r--r--src/systemd/sd-ipv4ll.h60
-rw-r--r--src/systemd/sd-journal.h177
-rw-r--r--src/systemd/sd-lldp.h194
-rw-r--r--src/systemd/sd-login.h246
-rw-r--r--src/systemd/sd-messages.h185
-rw-r--r--src/systemd/sd-ndisc.h131
-rw-r--r--src/systemd/sd-netlink.h247
-rw-r--r--src/systemd/sd-network.h203
-rw-r--r--src/systemd/sd-path.h121
-rw-r--r--src/systemd/sd-radv.h101
-rw-r--r--src/systemd/sd-resolve.h124
-rw-r--r--src/systemd/sd-utf8.h29
-rw-r--r--src/sysusers/sysusers.c2038
-rw-r--r--src/sysv-generator/sysv-generator.c950
-rwxr-xr-xsrc/test/generate-sym-test.py30
-rw-r--r--src/test/meson.build1194
-rw-r--r--src/test/test-acl-util.c76
-rw-r--r--src/test/test-af-list.c33
-rw-r--r--src/test/test-alloc-util.c164
-rw-r--r--src/test/test-architecture.c52
-rw-r--r--src/test/test-arphrd-list.c29
-rw-r--r--src/test/test-ask-password-api.c26
-rw-r--r--src/test/test-async.c39
-rw-r--r--src/test/test-barrier.c464
-rw-r--r--src/test/test-bitmap.c116
-rw-r--r--src/test/test-boot-timestamps.c92
-rw-r--r--src/test/test-bpf-devices.c306
-rw-r--r--src/test/test-bpf-firewall.c201
-rw-r--r--src/test/test-btrfs.c181
-rw-r--r--src/test/test-bus-util.c55
-rw-r--r--src/test/test-calendarspec.c252
-rw-r--r--src/test/test-cap-list.c128
-rw-r--r--src/test/test-capability.c280
-rw-r--r--src/test/test-cgroup-cpu.c38
-rw-r--r--src/test/test-cgroup-mask.c168
-rw-r--r--src/test/test-cgroup-setup.c73
-rw-r--r--src/test/test-cgroup-unit-default.c146
-rw-r--r--src/test/test-cgroup-util.c455
-rw-r--r--src/test/test-cgroup.c137
-rw-r--r--src/test/test-chase-symlinks.c115
-rw-r--r--src/test/test-chown-rec.c161
-rw-r--r--src/test/test-clock.c78
-rw-r--r--src/test/test-condition.c869
-rw-r--r--src/test/test-conf-files.c157
-rw-r--r--src/test/test-conf-parser.c411
-rw-r--r--src/test/test-copy.c323
-rw-r--r--src/test/test-coredump-util.c78
-rw-r--r--src/test/test-cpu-set-util.c290
-rw-r--r--src/test/test-daemon.c57
-rw-r--r--src/test/test-date.c110
-rw-r--r--src/test/test-dev-setup.c63
-rw-r--r--src/test/test-device-nodes.c40
-rw-r--r--src/test/test-dlopen.c15
-rw-r--r--src/test/test-dns-domain.c830
-rw-r--r--src/test/test-ellipsize.c127
-rw-r--r--src/test/test-emergency-action.c51
-rw-r--r--src/test/test-engine.c158
-rw-r--r--src/test/test-env-file.c188
-rw-r--r--src/test/test-env-util.c355
-rw-r--r--src/test/test-escape.c196
-rw-r--r--src/test/test-exec-util.c473
-rw-r--r--src/test/test-execute.c966
-rw-r--r--src/test/test-exit-status.c52
-rw-r--r--src/test/test-extract-word.c626
-rw-r--r--src/test/test-fd-util.c431
-rw-r--r--src/test/test-fdset.c203
-rw-r--r--src/test/test-fileio.c951
-rw-r--r--src/test/test-firewall-util.c42
-rw-r--r--src/test/test-format-table.c514
-rw-r--r--src/test/test-format-util.c39
-rw-r--r--src/test/test-fs-util.c857
-rw-r--r--src/test/test-fstab-util.c167
-rw-r--r--src/test/test-gcrypt-util.c32
-rw-r--r--src/test/test-glob-util.c102
-rw-r--r--src/test/test-hash.c76
-rw-r--r--src/test/test-hashmap-ordered.awk11
-rw-r--r--src/test/test-hashmap-plain.c1098
-rw-r--r--src/test/test-hashmap.c186
-rw-r--r--src/test/test-hexdecoct.c355
-rw-r--r--src/test/test-hostname-util.c172
-rw-r--r--src/test/test-hostname.c14
-rw-r--r--src/test/test-id128.c161
-rw-r--r--src/test/test-in-addr-util.c345
-rw-r--r--src/test/test-install-root.c1266
-rw-r--r--src/test/test-install.c272
-rw-r--r--src/test/test-io-util.c52
-rw-r--r--src/test/test-ip-protocol-list.c64
-rw-r--r--src/test/test-ipcrm.c29
-rw-r--r--src/test/test-job-type.c81
-rw-r--r--src/test/test-journal-importer.c78
-rw-r--r--src/test/test-json.c578
-rw-r--r--src/test/test-libcrypt-util.c102
-rw-r--r--src/test/test-libmount.c115
-rw-r--r--src/test/test-libudev.c584
-rw-r--r--src/test/test-list.c254
-rw-r--r--src/test/test-load-fragment.c855
-rw-r--r--src/test/test-local-addresses.c44
-rw-r--r--src/test/test-locale-util.c131
-rw-r--r--src/test/test-log.c94
-rw-r--r--src/test/test-loop-block.c250
-rw-r--r--src/test/test-loopback.c20
-rw-r--r--src/test/test-mount-util.c70
-rw-r--r--src/test/test-mountpoint-util.c316
-rw-r--r--src/test/test-namespace.c223
-rw-r--r--src/test/test-netlink-manual.c127
-rw-r--r--src/test/test-ns.c106
-rw-r--r--src/test/test-nscd-flush.c20
-rw-r--r--src/test/test-nss.c536
-rw-r--r--src/test/test-offline-passwd.c85
-rw-r--r--src/test/test-ordered-set.c137
-rw-r--r--src/test/test-os-util.c21
-rw-r--r--src/test/test-parse-util.c998
-rw-r--r--src/test/test-path-lookup.c133
-rw-r--r--src/test/test-path-util.c733
-rw-r--r--src/test/test-path.c412
-rw-r--r--src/test/test-pretty-print.c43
-rw-r--r--src/test/test-prioq.c128
-rw-r--r--src/test/test-proc-cmdline.c269
-rw-r--r--src/test/test-process-util.c720
-rw-r--r--src/test/test-procfs-util.c53
-rw-r--r--src/test/test-psi-util.c80
-rw-r--r--src/test/test-qrcode-util.c23
-rw-r--r--src/test/test-random-util.c68
-rw-r--r--src/test/test-ratelimit.c29
-rw-r--r--src/test/test-replace-var.c27
-rw-r--r--src/test/test-rlimit-util.c134
-rw-r--r--src/test/test-rm-rf.c74
-rw-r--r--src/test/test-sched-prio.c81
-rw-r--r--src/test/test-sd-hwdb.c77
-rw-r--r--src/test/test-sd-path.c69
-rw-r--r--src/test/test-seccomp.c1096
-rw-r--r--src/test/test-selinux.c105
-rw-r--r--src/test/test-serialize.c208
-rw-r--r--src/test/test-set-disable-mempool.c53
-rw-r--r--src/test/test-set.c243
-rw-r--r--src/test/test-sigbus.c61
-rw-r--r--src/test/test-signal-util.c150
-rw-r--r--src/test/test-siphash24.c107
-rw-r--r--src/test/test-sizeof.c93
-rw-r--r--src/test/test-sleep.c132
-rw-r--r--src/test/test-socket-netlink.c405
-rw-r--r--src/test/test-socket-util.c524
-rw-r--r--src/test/test-specifier.c82
-rw-r--r--src/test/test-stat-util.c167
-rw-r--r--src/test/test-static-destruct.c34
-rw-r--r--src/test/test-strbuf.c75
-rw-r--r--src/test/test-string-util.c928
-rw-r--r--src/test/test-strip-tab-ansi.c72
-rw-r--r--src/test/test-strv.c1059
-rw-r--r--src/test/test-strxcpyx.c109
-rw-r--r--src/test/test-sysctl-util.c44
-rwxr-xr-xsrc/test/test-systemd-tmpfiles.py143
-rw-r--r--src/test/test-tables.c128
-rw-r--r--src/test/test-terminal-util.c166
-rw-r--r--src/test/test-time-util.c557
-rw-r--r--src/test/test-tmpfiles.c66
-rw-r--r--src/test/test-udev-util.c202
-rw-r--r--src/test/test-udev.c139
-rw-r--r--src/test/test-uid-range.c74
-rw-r--r--src/test/test-umask-util.c41
-rw-r--r--src/test/test-umount.c74
-rw-r--r--src/test/test-unaligned.c172
-rw-r--r--src/test/test-unit-file.c102
-rw-r--r--src/test/test-unit-name.c907
-rw-r--r--src/test/test-user-record.c104
-rw-r--r--src/test/test-user-util.c515
-rw-r--r--src/test/test-utf8.c253
-rw-r--r--src/test/test-util.c527
-rw-r--r--src/test/test-varlink.c239
-rw-r--r--src/test/test-verbs.c63
-rw-r--r--src/test/test-watch-pid.c90
-rw-r--r--src/test/test-watchdog.c40
-rw-r--r--src/test/test-web-util.c24
-rw-r--r--src/test/test-xattr-util.c89
-rw-r--r--src/test/test-xdg-autostart.c93
-rw-r--r--src/test/test-xml.c66
-rw-r--r--src/time-wait-sync/time-wait-sync.c244
-rw-r--r--src/timedate/meson.build10
-rw-r--r--src/timedate/org.freedesktop.timedate1.conf29
-rw-r--r--src/timedate/org.freedesktop.timedate1.policy62
-rw-r--r--src/timedate/org.freedesktop.timedate1.service14
-rw-r--r--src/timedate/timedatectl.c1068
-rw-r--r--src/timedate/timedated.c1161
-rw-r--r--src/timesync/80-systemd-timesync.list1
-rw-r--r--src/timesync/meson.build55
-rw-r--r--src/timesync/org.freedesktop.timesync1.conf42
-rw-r--r--src/timesync/org.freedesktop.timesync1.service14
-rw-r--r--src/timesync/test-timesync.c34
-rw-r--r--src/timesync/timesyncd-bus.c207
-rw-r--r--src/timesync/timesyncd-bus.h6
-rw-r--r--src/timesync/timesyncd-conf.c128
-rw-r--r--src/timesync/timesyncd-conf.h14
-rw-r--r--src/timesync/timesyncd-gperf.gperf25
-rw-r--r--src/timesync/timesyncd-manager.c1118
-rw-r--r--src/timesync/timesyncd-manager.h112
-rw-r--r--src/timesync/timesyncd-ntp-message.h45
-rw-r--r--src/timesync/timesyncd-server.c130
-rw-r--r--src/timesync/timesyncd-server.h47
-rw-r--r--src/timesync/timesyncd.c178
-rw-r--r--src/timesync/timesyncd.conf.in19
-rw-r--r--src/tmpfiles/meson.build7
-rw-r--r--src/tmpfiles/tmpfiles.c3495
-rw-r--r--src/tty-ask-password-agent/tty-ask-password-agent.c715
-rw-r--r--src/udev/.vimrc4
-rw-r--r--src/udev/ata_id/ata_id.c651
-rw-r--r--src/udev/cdrom_id/cdrom_id.c1018
-rw-r--r--src/udev/fido_id/fido_id.c96
-rw-r--r--src/udev/fido_id/fido_id_desc.c92
-rw-r--r--src/udev/fido_id/fido_id_desc.h8
-rw-r--r--src/udev/fido_id/fuzz-fido-id-desc.c23
-rw-r--r--src/udev/fido_id/test-fido-id-desc.c85
-rwxr-xr-xsrc/udev/generate-keyboard-keys-gperf.sh18
-rwxr-xr-xsrc/udev/generate-keyboard-keys-list.sh7
-rw-r--r--src/udev/meson.build225
-rw-r--r--src/udev/mtd_probe/mtd_probe.c59
-rw-r--r--src/udev/mtd_probe/mtd_probe.h52
-rw-r--r--src/udev/mtd_probe/probe_smartmedia.c97
-rw-r--r--src/udev/net/fuzz-link-parser.c28
-rw-r--r--src/udev/net/fuzz-link-parser.options2
-rw-r--r--src/udev/net/link-config-gperf.gperf68
-rw-r--r--src/udev/net/link-config.c711
-rw-r--r--src/udev/net/link-config.h98
-rw-r--r--src/udev/scsi_id/README4
-rw-r--r--src/udev/scsi_id/scsi.h100
-rw-r--r--src/udev/scsi_id/scsi_id.c595
-rw-r--r--src/udev/scsi_id/scsi_id.h63
-rw-r--r--src/udev/scsi_id/scsi_serial.c893
-rw-r--r--src/udev/udev-builtin-blkid.c317
-rw-r--r--src/udev/udev-builtin-btrfs.c40
-rw-r--r--src/udev/udev-builtin-hwdb.c221
-rw-r--r--src/udev/udev-builtin-input_id.c395
-rw-r--r--src/udev/udev-builtin-keyboard.c254
-rw-r--r--src/udev/udev-builtin-kmod.c76
-rw-r--r--src/udev/udev-builtin-net_id.c961
-rw-r--r--src/udev/udev-builtin-net_setup_link.c88
-rw-r--r--src/udev/udev-builtin-path_id.c732
-rw-r--r--src/udev/udev-builtin-uaccess.c80
-rw-r--r--src/udev/udev-builtin-usb_id.c462
-rw-r--r--src/udev/udev-builtin.c145
-rw-r--r--src/udev/udev-builtin.h71
-rw-r--r--src/udev/udev-ctrl.c393
-rw-r--r--src/udev/udev-ctrl.h79
-rw-r--r--src/udev/udev-event.c1087
-rw-r--r--src/udev/udev-event.h69
-rw-r--r--src/udev/udev-node.c528
-rw-r--r--src/udev/udev-node.h15
-rw-r--r--src/udev/udev-rules.c2393
-rw-r--r--src/udev/udev-rules.h30
-rw-r--r--src/udev/udev-watch.c174
-rw-r--r--src/udev/udev-watch.h10
-rw-r--r--src/udev/udev.conf11
-rw-r--r--src/udev/udev.pc.in6
-rw-r--r--src/udev/udevadm-control.c186
-rw-r--r--src/udev/udevadm-hwdb.c101
-rw-r--r--src/udev/udevadm-info.c519
-rw-r--r--src/udev/udevadm-monitor.c254
-rw-r--r--src/udev/udevadm-settle.c226
-rw-r--r--src/udev/udevadm-test-builtin.c97
-rw-r--r--src/udev/udevadm-test.c162
-rw-r--r--src/udev/udevadm-trigger.c393
-rw-r--r--src/udev/udevadm-util.c95
-rw-r--r--src/udev/udevadm-util.h6
-rw-r--r--src/udev/udevadm.c134
-rw-r--r--src/udev/udevadm.h22
-rw-r--r--src/udev/udevd.c1960
-rw-r--r--src/udev/udevd.h4
-rw-r--r--src/udev/v4l_id/v4l_id.c93
-rw-r--r--src/update-done/update-done.c59
-rw-r--r--src/update-utmp/update-utmp.c251
-rw-r--r--src/user-sessions/user-sessions.c45
-rw-r--r--src/userdb/meson.build15
-rw-r--r--src/userdb/userdbctl.c789
-rw-r--r--src/userdb/userdbd-manager.c301
-rw-r--r--src/userdb/userdbd-manager.h34
-rw-r--r--src/userdb/userdbd.c56
-rw-r--r--src/userdb/userwork.c775
-rw-r--r--src/vconsole/90-vconsole.rules.in12
-rw-r--r--src/vconsole/meson.build10
-rw-r--r--src/vconsole/vconsole-setup.c489
-rw-r--r--src/veritysetup/veritysetup-generator.c230
-rw-r--r--src/veritysetup/veritysetup.c139
-rw-r--r--src/version/version.h.in8
-rw-r--r--src/volatile-root/volatile-root.c197
-rw-r--r--src/xdg-autostart-generator/xdg-autostart-condition.c46
-rw-r--r--src/xdg-autostart-generator/xdg-autostart-generator.c115
-rw-r--r--src/xdg-autostart-generator/xdg-autostart-service.c660
-rw-r--r--src/xdg-autostart-generator/xdg-autostart-service.h37
2030 files changed, 644331 insertions, 0 deletions
diff --git a/src/ac-power/ac-power.c b/src/ac-power/ac-power.c
new file mode 100644
index 0000000..9fabdb9
--- /dev/null
+++ b/src/ac-power/ac-power.c
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+
+#include "main-func.h"
+#include "util.h"
+
+static bool arg_verbose = false;
+
+static void help(void) {
+ printf("%s\n\n"
+ "Report whether we are connected to an external power source.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " -v --verbose Show state as text\n"
+ , program_invocation_short_name);
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "verbose", no_argument, NULL, 'v' },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hv", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ help();
+ return 0;
+
+ case ARG_VERSION:
+ return version();
+
+ case 'v':
+ arg_verbose = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s takes no arguments.",
+ program_invocation_short_name);
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ /* This is mostly intended to be used for scripts which want
+ * to detect whether AC power is plugged in or not. */
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = on_ac_power();
+ if (r < 0)
+ return log_error_errno(r, "Failed to read AC status: %m");
+
+ if (arg_verbose)
+ puts(yes_no(r));
+
+ return r == 0;
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/activate/activate.c b/src/activate/activate.c
new file mode 100644
index 0000000..1f7a249
--- /dev/null
+++ b/src/activate/activate.c
@@ -0,0 +1,521 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <sys/epoll.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "socket-netlink.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static char **arg_listen = NULL;
+static bool arg_accept = false;
+static int arg_socket_type = SOCK_STREAM;
+static char **arg_args = NULL;
+static char **arg_setenv = NULL;
+static char **arg_fdnames = NULL;
+static bool arg_inetd = false;
+
+static int add_epoll(int epoll_fd, int fd) {
+ struct epoll_event ev = {
+ .events = EPOLLIN,
+ .data.fd = fd,
+ };
+
+ assert(epoll_fd >= 0);
+ assert(fd >= 0);
+
+ if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0)
+ return log_error_errno(errno, "Failed to add event on epoll fd:%d for fd:%d: %m", epoll_fd, fd);
+
+ return 0;
+}
+
+static int open_sockets(int *epoll_fd, bool accept) {
+ char **address;
+ int n, fd, r, count = 0;
+
+ n = sd_listen_fds(true);
+ if (n < 0)
+ return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
+ if (n > 0) {
+ log_info("Received %i descriptors via the environment.", n);
+
+ for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
+ r = fd_cloexec(fd, arg_accept);
+ if (r < 0)
+ return r;
+
+ count++;
+ }
+ }
+
+ /* Close logging and all other descriptors */
+ if (arg_listen) {
+ _cleanup_free_ int *except = NULL;
+ int i;
+
+ except = new(int, n);
+ if (!except)
+ return log_oom();
+
+ for (i = 0; i < n; i++)
+ except[i] = SD_LISTEN_FDS_START + i;
+
+ log_close();
+ r = close_all_fds(except, n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close all file descriptors: %m");
+ }
+
+ /** Note: we leak some fd's on error here. I doesn't matter
+ * much, since the program will exit immediately anyway, but
+ * would be a pain to fix.
+ */
+
+ STRV_FOREACH(address, arg_listen) {
+ fd = make_socket_fd(LOG_DEBUG, *address, arg_socket_type, (arg_accept * SOCK_CLOEXEC));
+ if (fd < 0) {
+ log_open();
+ return log_error_errno(fd, "Failed to open '%s': %m", *address);
+ }
+
+ assert(fd == SD_LISTEN_FDS_START + count);
+ count++;
+ }
+
+ if (arg_listen)
+ log_open();
+
+ *epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (*epoll_fd < 0)
+ return log_error_errno(errno, "Failed to create epoll object: %m");
+
+ for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + count; fd++) {
+ _cleanup_free_ char *name = NULL;
+
+ getsockname_pretty(fd, &name);
+ log_info("Listening on %s as %i.", strna(name), fd);
+
+ r = add_epoll(*epoll_fd, fd);
+ if (r < 0)
+ return r;
+ }
+
+ return count;
+}
+
+static int exec_process(const char *name, char **argv, char **env, int start_fd, size_t n_fds) {
+ _cleanup_strv_free_ char **envp = NULL;
+ _cleanup_free_ char *joined = NULL;
+ size_t n_env = 0, length;
+ const char *tocopy;
+ char **s;
+ int r;
+
+ if (arg_inetd && n_fds != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--inetd only supported for single file descriptors.");
+
+ length = strv_length(arg_setenv);
+
+ /* PATH, TERM, HOME, USER, LISTEN_FDS, LISTEN_PID, LISTEN_FDNAMES, NULL */
+ envp = new0(char *, length + 8);
+ if (!envp)
+ return log_oom();
+
+ STRV_FOREACH(s, arg_setenv) {
+
+ if (strchr(*s, '=')) {
+ char *k;
+
+ k = strdup(*s);
+ if (!k)
+ return log_oom();
+
+ envp[n_env++] = k;
+ } else {
+ _cleanup_free_ char *p;
+ const char *n;
+
+ p = strjoin(*s, "=");
+ if (!p)
+ return log_oom();
+
+ n = strv_find_prefix(env, p);
+ if (!n)
+ continue;
+
+ envp[n_env] = strdup(n);
+ if (!envp[n_env])
+ return log_oom();
+
+ n_env++;
+ }
+ }
+
+ FOREACH_STRING(tocopy, "TERM=", "PATH=", "USER=", "HOME=") {
+ const char *n;
+
+ n = strv_find_prefix(env, tocopy);
+ if (!n)
+ continue;
+
+ envp[n_env] = strdup(n);
+ if (!envp[n_env])
+ return log_oom();
+
+ n_env++;
+ }
+
+ if (arg_inetd) {
+ assert(n_fds == 1);
+
+ r = rearrange_stdio(start_fd, start_fd, STDERR_FILENO); /* invalidates start_fd on success + error */
+ if (r < 0)
+ return log_error_errno(r, "Failed to move fd to stdin+stdout: %m");
+
+ } else {
+ if (start_fd != SD_LISTEN_FDS_START) {
+ assert(n_fds == 1);
+
+ if (dup2(start_fd, SD_LISTEN_FDS_START) < 0)
+ return log_error_errno(errno, "Failed to dup connection: %m");
+
+ safe_close(start_fd);
+ }
+
+ if (asprintf((char **) (envp + n_env++), "LISTEN_FDS=%zu", n_fds) < 0)
+ return log_oom();
+
+ if (asprintf((char **) (envp + n_env++), "LISTEN_PID=" PID_FMT, getpid_cached()) < 0)
+ return log_oom();
+
+ if (arg_fdnames) {
+ _cleanup_free_ char *names = NULL;
+ size_t len;
+ char *e;
+
+ len = strv_length(arg_fdnames);
+ if (len == 1)
+ for (size_t i = 1; i < n_fds; i++) {
+ r = strv_extend(&arg_fdnames, arg_fdnames[0]);
+ if (r < 0)
+ return log_oom();
+ }
+ else if (len != n_fds)
+ log_warning("The number of fd names is different than number of fds: %zu vs %zu", len, n_fds);
+
+ names = strv_join(arg_fdnames, ":");
+ if (!names)
+ return log_oom();
+
+ e = strjoin("LISTEN_FDNAMES=", names);
+ if (!e)
+ return log_oom();
+
+ envp[n_env++] = e;
+ }
+ }
+
+ joined = strv_join(argv, " ");
+ if (!joined)
+ return log_oom();
+
+ log_info("Execing %s (%s)", name, joined);
+ execvpe(name, argv, envp);
+
+ return log_error_errno(errno, "Failed to execp %s (%s): %m", name, joined);
+}
+
+static int fork_and_exec_process(const char *child, char **argv, char **env, int fd) {
+ _cleanup_free_ char *joined = NULL;
+ pid_t child_pid;
+ int r;
+
+ joined = strv_join(argv, " ");
+ if (!joined)
+ return log_oom();
+
+ r = safe_fork("(activate)",
+ FORK_RESET_SIGNALS | FORK_DEATHSIG | FORK_RLIMIT_NOFILE_SAFE | FORK_LOG,
+ &child_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* In the child */
+ exec_process(child, argv, env, fd, 1);
+ _exit(EXIT_FAILURE);
+ }
+
+ log_info("Spawned %s (%s) as PID " PID_FMT ".", child, joined, child_pid);
+ return 0;
+}
+
+static int do_accept(const char *name, char **argv, char **envp, int fd) {
+ _cleanup_free_ char *local = NULL, *peer = NULL;
+ _cleanup_close_ int fd_accepted = -1;
+
+ fd_accepted = accept4(fd, NULL, NULL, 0);
+ if (fd_accepted < 0) {
+ if (ERRNO_IS_ACCEPT_AGAIN(errno))
+ return 0;
+
+ return log_error_errno(errno, "Failed to accept connection on fd:%d: %m", fd);
+ }
+
+ (void) getsockname_pretty(fd_accepted, &local);
+ (void) getpeername_pretty(fd_accepted, true, &peer);
+ log_info("Connection from %s to %s", strna(peer), strna(local));
+
+ return fork_and_exec_process(name, argv, envp, fd_accepted);
+}
+
+/* SIGCHLD handler. */
+static void sigchld_hdl(int sig) {
+ PROTECT_ERRNO;
+
+ for (;;) {
+ siginfo_t si;
+ int r;
+
+ si.si_pid = 0;
+ r = waitid(P_ALL, 0, &si, WEXITED | WNOHANG);
+ if (r < 0) {
+ if (errno != ECHILD)
+ log_error_errno(errno, "Failed to reap children: %m");
+ return;
+ }
+ if (si.si_pid == 0)
+ return;
+
+ log_info("Child %d died with code %d", si.si_pid, si.si_status);
+ }
+}
+
+static int install_chld_handler(void) {
+ static const struct sigaction act = {
+ .sa_flags = SA_NOCLDSTOP | SA_RESTART,
+ .sa_handler = sigchld_hdl,
+ };
+
+ if (sigaction(SIGCHLD, &act, 0) < 0)
+ return log_error_errno(errno, "Failed to install SIGCHLD handler: %m");
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-socket-activate", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n"
+ "\n%sListen on sockets and launch child on connection.%s\n"
+ "\nOptions:\n"
+ " -h --help Show this help and exit\n"
+ " --version Print version string and exit\n"
+ " -l --listen=ADDR Listen for raw connections at ADDR\n"
+ " -d --datagram Listen on datagram instead of stream socket\n"
+ " --seqpacket Listen on SOCK_SEQPACKET instead of stream socket\n"
+ " -a --accept Spawn separate child for each connection\n"
+ " -E --setenv=NAME[=VALUE] Pass an environment variable to children\n"
+ " --fdname=NAME[:NAME...] Specify names for file descriptors\n"
+ " --inetd Enable inetd file descriptor passing protocol\n"
+ "\nNote: file descriptors from sd_listen_fds() will be passed through.\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight(), ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_FDNAME,
+ ARG_SEQPACKET,
+ ARG_INETD,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "datagram", no_argument, NULL, 'd' },
+ { "seqpacket", no_argument, NULL, ARG_SEQPACKET },
+ { "listen", required_argument, NULL, 'l' },
+ { "accept", no_argument, NULL, 'a' },
+ { "setenv", required_argument, NULL, 'E' },
+ { "environment", required_argument, NULL, 'E' }, /* legacy alias */
+ { "fdname", required_argument, NULL, ARG_FDNAME },
+ { "inetd", no_argument, NULL, ARG_INETD },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "+hl:aE:d", options, NULL)) >= 0)
+ switch (c) {
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'l':
+ r = strv_extend(&arg_listen, optarg);
+ if (r < 0)
+ return log_oom();
+
+ break;
+
+ case 'd':
+ if (arg_socket_type == SOCK_SEQPACKET)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--datagram may not be combined with --seqpacket.");
+
+ arg_socket_type = SOCK_DGRAM;
+ break;
+
+ case ARG_SEQPACKET:
+ if (arg_socket_type == SOCK_DGRAM)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--seqpacket may not be combined with --datagram.");
+
+ arg_socket_type = SOCK_SEQPACKET;
+ break;
+
+ case 'a':
+ arg_accept = true;
+ break;
+
+ case 'E':
+ r = strv_extend(&arg_setenv, optarg);
+ if (r < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_FDNAME: {
+ _cleanup_strv_free_ char **names;
+ char **s;
+
+ names = strv_split(optarg, ":");
+ if (!names)
+ return log_oom();
+
+ STRV_FOREACH(s, names)
+ if (!fdname_is_valid(*s)) {
+ _cleanup_free_ char *esc;
+
+ esc = cescape(*s);
+ log_warning("File descriptor name \"%s\" is not valid.", esc);
+ }
+
+ /* Empty optargs means one empty name */
+ r = strv_extend_strv(&arg_fdnames,
+ strv_isempty(names) ? STRV_MAKE("") : names,
+ false);
+ if (r < 0)
+ return log_error_errno(r, "strv_extend_strv: %m");
+ break;
+ }
+
+ case ARG_INETD:
+ arg_inetd = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind == argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: command to execute is missing.",
+ program_invocation_short_name);
+
+ if (arg_socket_type == SOCK_DGRAM && arg_accept)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Datagram sockets do not accept connections. "
+ "The --datagram and --accept options may not be combined.");
+
+ arg_args = argv + optind;
+
+ return 1 /* work to do */;
+}
+
+int main(int argc, char **argv, char **envp) {
+ int r, n;
+ int epoll_fd = -1;
+
+ log_show_color(true);
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+
+ r = install_chld_handler();
+ if (r < 0)
+ return EXIT_FAILURE;
+
+ n = open_sockets(&epoll_fd, arg_accept);
+ if (n < 0)
+ return EXIT_FAILURE;
+ if (n == 0) {
+ log_error("No sockets to listen on specified or passed in.");
+ return EXIT_FAILURE;
+ }
+
+ for (;;) {
+ struct epoll_event event;
+
+ if (epoll_wait(epoll_fd, &event, 1, -1) < 0) {
+ if (errno == EINTR)
+ continue;
+
+ log_error_errno(errno, "epoll_wait() failed: %m");
+ return EXIT_FAILURE;
+ }
+
+ log_info("Communication attempt on fd %i.", event.data.fd);
+ if (arg_accept) {
+ r = do_accept(argv[optind], argv + optind, envp, event.data.fd);
+ if (r < 0)
+ return EXIT_FAILURE;
+ } else
+ break;
+ }
+
+ exec_process(argv[optind], argv + optind, envp, SD_LISTEN_FDS_START, (size_t) n);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/analyze/analyze-condition.c b/src/analyze/analyze-condition.c
new file mode 100644
index 0000000..241c188
--- /dev/null
+++ b/src/analyze/analyze-condition.c
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdlib.h>
+
+#include "analyze-condition.h"
+#include "condition.h"
+#include "conf-parser.h"
+#include "load-fragment.h"
+#include "service.h"
+
+static int parse_condition(Unit *u, const char *line) {
+ assert(u);
+ assert(line);
+
+ for (ConditionType t = 0; t < _CONDITION_TYPE_MAX; t++) {
+ ConfigParserCallback callback;
+ Condition **target;
+ const char *p, *name;
+
+ name = condition_type_to_string(t);
+ p = startswith(line, name);
+ if (p)
+ target = &u->conditions;
+ else {
+ name = assert_type_to_string(t);
+ p = startswith(line, name);
+ if (!p)
+ continue;
+
+ target = &u->asserts;
+ }
+
+ p += strspn(p, WHITESPACE);
+
+ if (*p != '=')
+ continue;
+ p++;
+
+ p += strspn(p, WHITESPACE);
+
+ if (condition_takes_path(t))
+ callback = config_parse_unit_condition_path;
+ else
+ callback = config_parse_unit_condition_string;
+
+ return callback(NULL, "(cmdline)", 0, NULL, 0, name, t, p, target, u);
+ }
+
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot parse \"%s\".", line);
+}
+
+_printf_(7, 8)
+static int log_helper(void *userdata, int level, int error, const char *file, int line, const char *func, const char *format, ...) {
+ Unit *u = userdata;
+ va_list ap;
+ int r;
+
+ assert(u);
+
+ /* "upgrade" debug messages */
+ level = MIN(LOG_INFO, level);
+
+ va_start(ap, format);
+ r = log_object_internalv(level, error, file, line, func,
+ NULL,
+ u->id,
+ NULL,
+ NULL,
+ format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int verify_conditions(char **lines, UnitFileScope scope) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ Unit *u;
+ char **line;
+ int r, q = 1;
+
+ r = manager_new(scope, MANAGER_TEST_RUN_MINIMAL, &m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize manager: %m");
+
+ log_debug("Starting manager...");
+ r = manager_startup(m, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ r = unit_new_for_name(m, sizeof(Service), "test.service", &u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create test.service: %m");
+
+ STRV_FOREACH(line, lines) {
+ r = parse_condition(u, *line);
+ if (r < 0)
+ return r;
+ }
+
+ r = condition_test_list(u->asserts, environ, assert_type_to_string, log_helper, u);
+ if (u->asserts)
+ log_notice("Asserts %s.", r > 0 ? "succeeded" : "failed");
+
+ q = condition_test_list(u->conditions, environ, condition_type_to_string, log_helper, u);
+ if (u->conditions)
+ log_notice("Conditions %s.", q > 0 ? "succeeded" : "failed");
+
+ return r > 0 && q > 0 ? 0 : -EIO;
+}
diff --git a/src/analyze/analyze-condition.h b/src/analyze/analyze-condition.h
new file mode 100644
index 0000000..7b52669
--- /dev/null
+++ b/src/analyze/analyze-condition.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "install.h"
+
+int verify_conditions(char **lines, UnitFileScope scope);
diff --git a/src/analyze/analyze-security.c b/src/analyze/analyze-security.c
new file mode 100644
index 0000000..8d94fbc
--- /dev/null
+++ b/src/analyze/analyze-security.c
@@ -0,0 +1,2220 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/utsname.h>
+
+#include "analyze-security.h"
+#include "bus-error.h"
+#include "bus-map-properties.h"
+#include "bus-unit-util.h"
+#include "bus-util.h"
+#include "env-util.h"
+#include "format-table.h"
+#include "in-addr-util.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "missing_capability.h"
+#include "missing_sched.h"
+#include "nulstr-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#if HAVE_SECCOMP
+# include "seccomp-util.h"
+#endif
+#include "set.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "unit-def.h"
+#include "unit-name.h"
+
+struct security_info {
+ char *id;
+ char *type;
+ char *load_state;
+ char *fragment_path;
+ bool default_dependencies;
+
+ uint64_t ambient_capabilities;
+ uint64_t capability_bounding_set;
+
+ char *user;
+ char **supplementary_groups;
+ bool dynamic_user;
+
+ bool ip_address_deny_all;
+ bool ip_address_allow_localhost;
+ bool ip_address_allow_other;
+
+ bool ip_filters_custom_ingress;
+ bool ip_filters_custom_egress;
+
+ char *keyring_mode;
+ char *protect_proc;
+ char *proc_subset;
+ bool lock_personality;
+ bool memory_deny_write_execute;
+ bool no_new_privileges;
+ char *notify_access;
+ bool protect_hostname;
+
+ bool private_devices;
+ bool private_mounts;
+ bool private_network;
+ bool private_tmp;
+ bool private_users;
+
+ bool protect_control_groups;
+ bool protect_kernel_modules;
+ bool protect_kernel_tunables;
+ bool protect_kernel_logs;
+ bool protect_clock;
+
+ char *protect_home;
+ char *protect_system;
+
+ bool remove_ipc;
+
+ bool restrict_address_family_inet;
+ bool restrict_address_family_unix;
+ bool restrict_address_family_netlink;
+ bool restrict_address_family_packet;
+ bool restrict_address_family_other;
+
+ uint64_t restrict_namespaces;
+ bool restrict_realtime;
+ bool restrict_suid_sgid;
+
+ char *root_directory;
+ char *root_image;
+
+ bool delegate;
+ char *device_policy;
+ bool device_allow_non_empty;
+
+ char **system_call_architectures;
+
+ bool system_call_filter_allow_list;
+ Set *system_call_filter;
+
+ uint32_t _umask;
+};
+
+struct security_assessor {
+ const char *id;
+ const char *description_good;
+ const char *description_bad;
+ const char *description_na;
+ const char *url;
+ uint64_t weight;
+ uint64_t range;
+ int (*assess)(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description);
+ size_t offset;
+ uint64_t parameter;
+ bool default_dependencies_only;
+};
+
+static void security_info_free(struct security_info *i) {
+ if (!i)
+ return;
+
+ free(i->id);
+ free(i->type);
+ free(i->load_state);
+ free(i->fragment_path);
+
+ free(i->user);
+
+ free(i->protect_home);
+ free(i->protect_system);
+
+ free(i->root_directory);
+ free(i->root_image);
+
+ free(i->keyring_mode);
+ free(i->protect_proc);
+ free(i->proc_subset);
+ free(i->notify_access);
+
+ free(i->device_policy);
+
+ strv_free(i->supplementary_groups);
+ strv_free(i->system_call_architectures);
+
+ set_free(i->system_call_filter);
+}
+
+static bool security_info_runs_privileged(const struct security_info *i) {
+ assert(i);
+
+ if (STRPTR_IN_SET(i->user, "0", "root"))
+ return true;
+
+ if (i->dynamic_user)
+ return false;
+
+ return isempty(i->user);
+}
+
+static int assess_bool(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ const bool *b = data;
+
+ assert(b);
+ assert(ret_badness);
+ assert(ret_description);
+
+ *ret_badness = a->parameter ? *b : !*b;
+ *ret_description = NULL;
+
+ return 0;
+}
+
+static int assess_user(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ _cleanup_free_ char *d = NULL;
+ uint64_t b;
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ if (streq_ptr(info->user, NOBODY_USER_NAME)) {
+ d = strdup("Service runs under as '" NOBODY_USER_NAME "' user, which should not be used for services");
+ b = 9;
+ } else if (info->dynamic_user && !STR_IN_SET(info->user, "0", "root")) {
+ d = strdup("Service runs under a transient non-root user identity");
+ b = 0;
+ } else if (info->user && !STR_IN_SET(info->user, "0", "root", "")) {
+ d = strdup("Service runs under a static non-root user identity");
+ b = 0;
+ } else {
+ *ret_badness = 10;
+ *ret_description = NULL;
+ return 0;
+ }
+
+ if (!d)
+ return log_oom();
+
+ *ret_badness = b;
+ *ret_description = TAKE_PTR(d);
+
+ return 0;
+}
+
+static int assess_protect_home(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ const char *description;
+ uint64_t badness;
+ char *copy;
+ int r;
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ badness = 10;
+ description = "Service has full access to home directories";
+
+ r = parse_boolean(info->protect_home);
+ if (r < 0) {
+ if (streq_ptr(info->protect_home, "read-only")) {
+ badness = 5;
+ description = "Service has read-only access to home directories";
+ } else if (streq_ptr(info->protect_home, "tmpfs")) {
+ badness = 1;
+ description = "Service has access to fake empty home directories";
+ }
+ } else if (r > 0) {
+ badness = 0;
+ description = "Service has no access to home directories";
+ }
+
+ copy = strdup(description);
+ if (!copy)
+ return log_oom();
+
+ *ret_badness = badness;
+ *ret_description = copy;
+
+ return 0;
+}
+
+static int assess_protect_system(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ const char *description;
+ uint64_t badness;
+ char *copy;
+ int r;
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ badness = 10;
+ description = "Service has full access to the OS file hierarchy";
+
+ r = parse_boolean(info->protect_system);
+ if (r < 0) {
+ if (streq_ptr(info->protect_system, "full")) {
+ badness = 3;
+ description = "Service has very limited write access to the OS file hierarchy";
+ } else if (streq_ptr(info->protect_system, "strict")) {
+ badness = 0;
+ description = "Service has strict read-only access to the OS file hierarchy";
+ }
+ } else if (r > 0) {
+ badness = 5;
+ description = "Service has limited write access to the OS file hierarchy";
+ }
+
+ copy = strdup(description);
+ if (!copy)
+ return log_oom();
+
+ *ret_badness = badness;
+ *ret_description = copy;
+
+ return 0;
+}
+
+static int assess_root_directory(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ *ret_badness =
+ empty_or_root(info->root_directory) &&
+ empty_or_root(info->root_image);
+ *ret_description = NULL;
+
+ return 0;
+}
+
+static int assess_capability_bounding_set(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ *ret_badness = !!(info->capability_bounding_set & a->parameter);
+ *ret_description = NULL;
+
+ return 0;
+}
+
+static int assess_umask(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ char *copy = NULL;
+ const char *d;
+ uint64_t b;
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ if (!FLAGS_SET(info->_umask, 0002)) {
+ d = "Files created by service are world-writable by default";
+ b = 10;
+ } else if (!FLAGS_SET(info->_umask, 0004)) {
+ d = "Files created by service are world-readable by default";
+ b = 5;
+ } else if (!FLAGS_SET(info->_umask, 0020)) {
+ d = "Files created by service are group-writable by default";
+ b = 2;
+ } else if (!FLAGS_SET(info->_umask, 0040)) {
+ d = "Files created by service are group-readable by default";
+ b = 1;
+ } else {
+ d = "Files created by service are accessible only by service's own user by default";
+ b = 0;
+ }
+
+ copy = strdup(d);
+ if (!copy)
+ return log_oom();
+
+ *ret_badness = b;
+ *ret_description = copy;
+
+ return 0;
+}
+
+static int assess_keyring_mode(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ *ret_badness = !streq_ptr(info->keyring_mode, "private");
+ *ret_description = NULL;
+
+ return 0;
+}
+
+static int assess_protect_proc(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ if (streq_ptr(info->protect_proc, "noaccess"))
+ *ret_badness = 1;
+ else if (STRPTR_IN_SET(info->protect_proc, "invisible", "ptraceable"))
+ *ret_badness = 0;
+ else
+ *ret_badness = 3;
+
+ *ret_description = NULL;
+
+ return 0;
+}
+
+static int assess_proc_subset(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ *ret_badness = !streq_ptr(info->proc_subset, "pid");
+ *ret_description = NULL;
+
+ return 0;
+}
+
+static int assess_notify_access(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ *ret_badness = streq_ptr(info->notify_access, "all");
+ *ret_description = NULL;
+
+ return 0;
+}
+
+static int assess_remove_ipc(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ if (security_info_runs_privileged(info))
+ *ret_badness = UINT64_MAX;
+ else
+ *ret_badness = !info->remove_ipc;
+
+ *ret_description = NULL;
+ return 0;
+}
+
+static int assess_supplementary_groups(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ if (security_info_runs_privileged(info))
+ *ret_badness = UINT64_MAX;
+ else
+ *ret_badness = !strv_isempty(info->supplementary_groups);
+
+ *ret_description = NULL;
+ return 0;
+}
+
+static int assess_restrict_namespaces(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ *ret_badness = !!(info->restrict_namespaces & a->parameter);
+ *ret_description = NULL;
+
+ return 0;
+}
+
+static int assess_system_call_architectures(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ char *d;
+ uint64_t b;
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ if (strv_isempty(info->system_call_architectures)) {
+ b = 10;
+ d = strdup("Service may execute system calls with all ABIs");
+ } else if (strv_equal(info->system_call_architectures, STRV_MAKE("native"))) {
+ b = 0;
+ d = strdup("Service may execute system calls only with native ABI");
+ } else {
+ b = 8;
+ d = strdup("Service may execute system calls with multiple ABIs");
+ }
+
+ if (!d)
+ return log_oom();
+
+ *ret_badness = b;
+ *ret_description = d;
+
+ return 0;
+}
+
+#if HAVE_SECCOMP
+
+static bool syscall_names_in_filter(Set *s, bool allow_list, const SyscallFilterSet *f, const char **ret_offending_syscall) {
+ const char *syscall;
+
+ NULSTR_FOREACH(syscall, f->value) {
+ int id;
+
+ if (syscall[0] == '@') {
+ const SyscallFilterSet *g;
+
+ assert_se(g = syscall_filter_set_find(syscall));
+ if (syscall_names_in_filter(s, allow_list, g, ret_offending_syscall))
+ return true; /* bad! */
+
+ continue;
+ }
+
+ /* Let's see if the system call actually exists on this platform, before complaining */
+ id = seccomp_syscall_resolve_name(syscall);
+ if (id < 0)
+ continue;
+
+ if (set_contains(s, syscall) == allow_list) {
+ log_debug("Offending syscall filter item: %s", syscall);
+ if (ret_offending_syscall)
+ *ret_offending_syscall = syscall;
+ return true; /* bad! */
+ }
+ }
+
+ *ret_offending_syscall = NULL;
+ return false;
+}
+
+static int assess_system_call_filter(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ assert(a);
+ assert(info);
+ assert(ret_badness);
+ assert(ret_description);
+
+ assert(a->parameter < _SYSCALL_FILTER_SET_MAX);
+ const SyscallFilterSet *f = syscall_filter_sets + a->parameter;
+
+ char *d = NULL;
+ uint64_t b;
+
+ if (!info->system_call_filter_allow_list && set_isempty(info->system_call_filter)) {
+ d = strdup("Service does not filter system calls");
+ b = 10;
+ } else {
+ bool bad;
+ const char *offender = NULL;
+
+ log_debug("Analyzing system call filter, checking against: %s", f->name);
+ bad = syscall_names_in_filter(info->system_call_filter, info->system_call_filter_allow_list, f, &offender);
+ log_debug("Result: %s", bad ? "bad" : "good");
+
+ if (info->system_call_filter_allow_list) {
+ if (bad) {
+ (void) asprintf(&d, "System call allow list defined for service, and %s is included "
+ "(e.g. %s is allowed)",
+ f->name, offender);
+ b = 9;
+ } else {
+ (void) asprintf(&d, "System call allow list defined for service, and %s is not included",
+ f->name);
+ b = 0;
+ }
+ } else {
+ if (bad) {
+ (void) asprintf(&d, "System call deny list defined for service, and %s is not included "
+ "(e.g. %s is allowed)",
+ f->name, offender);
+ b = 10;
+ } else {
+ (void) asprintf(&d, "System call deny list defined for service, and %s is included",
+ f->name);
+ b = 0;
+ }
+ }
+ }
+
+ if (!d)
+ return log_oom();
+
+ *ret_badness = b;
+ *ret_description = d;
+
+ return 0;
+}
+
+#endif
+
+static int assess_ip_address_allow(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ char *d = NULL;
+ uint64_t b;
+
+ assert(info);
+ assert(ret_badness);
+ assert(ret_description);
+
+ if (info->ip_filters_custom_ingress || info->ip_filters_custom_egress) {
+ d = strdup("Service defines custom ingress/egress IP filters with BPF programs");
+ b = 0;
+ } else if (!info->ip_address_deny_all) {
+ d = strdup("Service does not define an IP address allow list");
+ b = 10;
+ } else if (info->ip_address_allow_other) {
+ d = strdup("Service defines IP address allow list with non-localhost entries");
+ b = 5;
+ } else if (info->ip_address_allow_localhost) {
+ d = strdup("Service defines IP address allow list with only localhost entries");
+ b = 2;
+ } else {
+ d = strdup("Service blocks all IP address ranges");
+ b = 0;
+ }
+
+ if (!d)
+ return log_oom();
+
+ *ret_badness = b;
+ *ret_description = d;
+
+ return 0;
+}
+
+static int assess_device_allow(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ char *d = NULL;
+ uint64_t b;
+
+ assert(info);
+ assert(ret_badness);
+ assert(ret_description);
+
+ if (STRPTR_IN_SET(info->device_policy, "strict", "closed")) {
+
+ if (info->device_allow_non_empty) {
+ d = strdup("Service has a device ACL with some special devices");
+ b = 5;
+ } else {
+ d = strdup("Service has a minimal device ACL");
+ b = 0;
+ }
+ } else {
+ d = strdup("Service has no device ACL");
+ b = 10;
+ }
+
+ if (!d)
+ return log_oom();
+
+ *ret_badness = b;
+ *ret_description = d;
+
+ return 0;
+}
+
+static int assess_ambient_capabilities(
+ const struct security_assessor *a,
+ const struct security_info *info,
+ const void *data,
+ uint64_t *ret_badness,
+ char **ret_description) {
+
+ assert(ret_badness);
+ assert(ret_description);
+
+ *ret_badness = info->ambient_capabilities != 0;
+ *ret_description = NULL;
+
+ return 0;
+}
+
+static const struct security_assessor security_assessor_table[] = {
+ {
+ .id = "User=/DynamicUser=",
+ .description_bad = "Service runs as root user",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#User=",
+ .weight = 2000,
+ .range = 10,
+ .assess = assess_user,
+ },
+ {
+ .id = "SupplementaryGroups=",
+ .description_good = "Service has no supplementary groups",
+ .description_bad = "Service runs with supplementary groups",
+ .description_na = "Service runs as root, option does not matter",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SupplementaryGroups=",
+ .weight = 200,
+ .range = 1,
+ .assess = assess_supplementary_groups,
+ },
+ {
+ .id = "PrivateDevices=",
+ .description_good = "Service has no access to hardware devices",
+ .description_bad = "Service potentially has access to hardware devices",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#PrivateDevices=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, private_devices),
+ },
+ {
+ .id = "PrivateMounts=",
+ .description_good = "Service cannot install system mounts",
+ .description_bad = "Service may install system mounts",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#PrivateMounts=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, private_mounts),
+ },
+ {
+ .id = "PrivateNetwork=",
+ .description_good = "Service has no access to the host's network",
+ .description_bad = "Service has access to the host's network",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#PrivateNetwork=",
+ .weight = 2500,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, private_network),
+ },
+ {
+ .id = "PrivateTmp=",
+ .description_good = "Service has no access to other software's temporary files",
+ .description_bad = "Service has access to other software's temporary files",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#PrivateTmp=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, private_tmp),
+ .default_dependencies_only = true,
+ },
+ {
+ .id = "PrivateUsers=",
+ .description_good = "Service does not have access to other users",
+ .description_bad = "Service has access to other users",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#PrivateUsers=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, private_users),
+ },
+ {
+ .id = "ProtectControlGroups=",
+ .description_good = "Service cannot modify the control group file system",
+ .description_bad = "Service may modify the control group file system",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProtectControlGroups=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, protect_control_groups),
+ },
+ {
+ .id = "ProtectKernelModules=",
+ .description_good = "Service cannot load or read kernel modules",
+ .description_bad = "Service may load or read kernel modules",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProtectKernelModules=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, protect_kernel_modules),
+ },
+ {
+ .id = "ProtectKernelTunables=",
+ .description_good = "Service cannot alter kernel tunables (/proc/sys, …)",
+ .description_bad = "Service may alter kernel tunables",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProtectKernelTunables=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, protect_kernel_tunables),
+ },
+ {
+ .id = "ProtectKernelLogs=",
+ .description_good = "Service cannot read from or write to the kernel log ring buffer",
+ .description_bad = "Service may read from or write to the kernel log ring buffer",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProtectKernelLogs=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, protect_kernel_logs),
+ },
+ {
+ .id = "ProtectClock=",
+ .description_good = "Service cannot write to the hardware clock or system clock",
+ .description_bad = "Service may write to the hardware clock or system clock",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProtectClock=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, protect_clock),
+ },
+ {
+ .id = "ProtectHome=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProtectHome=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_protect_home,
+ .default_dependencies_only = true,
+ },
+ {
+ .id = "ProtectHostname=",
+ .description_good = "Service cannot change system host/domainname",
+ .description_bad = "Service may change system host/domainname",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProtectHostname=",
+ .weight = 50,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, protect_hostname),
+ },
+ {
+ .id = "ProtectSystem=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProtectSystem=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_protect_system,
+ .default_dependencies_only = true,
+ },
+ {
+ .id = "RootDirectory=/RootImage=",
+ .description_good = "Service has its own root directory/image",
+ .description_bad = "Service runs within the host's root directory",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RootDirectory=",
+ .weight = 200,
+ .range = 1,
+ .assess = assess_root_directory,
+ .default_dependencies_only = true,
+ },
+ {
+ .id = "LockPersonality=",
+ .description_good = "Service cannot change ABI personality",
+ .description_bad = "Service may change ABI personality",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#LockPersonality=",
+ .weight = 100,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, lock_personality),
+ },
+ {
+ .id = "MemoryDenyWriteExecute=",
+ .description_good = "Service cannot create writable executable memory mappings",
+ .description_bad = "Service may create writable executable memory mappings",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#MemoryDenyWriteExecute=",
+ .weight = 100,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, memory_deny_write_execute),
+ },
+ {
+ .id = "NoNewPrivileges=",
+ .description_good = "Service processes cannot acquire new privileges",
+ .description_bad = "Service processes may acquire new privileges",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#NoNewPrivileges=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, no_new_privileges),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYS_ADMIN",
+ .description_good = "Service has no administrator privileges",
+ .description_bad = "Service has administrator privileges",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 1500,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = UINT64_C(1) << CAP_SYS_ADMIN,
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SET(UID|GID|PCAP)",
+ .description_good = "Service cannot change UID/GID identities/capabilities",
+ .description_bad = "Service may change UID/GID identities/capabilities",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 1500,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SETUID)|
+ (UINT64_C(1) << CAP_SETGID)|
+ (UINT64_C(1) << CAP_SETPCAP),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYS_PTRACE",
+ .description_good = "Service has no ptrace() debugging abilities",
+ .description_bad = "Service has ptrace() debugging abilities",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 1500,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SYS_PTRACE),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYS_TIME",
+ .description_good = "Service processes cannot change the system clock",
+ .description_bad = "Service processes may change the system clock",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = UINT64_C(1) << CAP_SYS_TIME,
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_NET_ADMIN",
+ .description_good = "Service has no network configuration privileges",
+ .description_bad = "Service has network configuration privileges",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_NET_ADMIN),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYS_RAWIO",
+ .description_good = "Service has no raw I/O access",
+ .description_bad = "Service has raw I/O access",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SYS_RAWIO),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYS_MODULE",
+ .description_good = "Service cannot load kernel modules",
+ .description_bad = "Service may load kernel modules",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SYS_MODULE),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_AUDIT_*",
+ .description_good = "Service has no audit subsystem access",
+ .description_bad = "Service has audit subsystem access",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_AUDIT_CONTROL) |
+ (UINT64_C(1) << CAP_AUDIT_READ) |
+ (UINT64_C(1) << CAP_AUDIT_WRITE),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYSLOG",
+ .description_good = "Service has no access to kernel logging",
+ .description_bad = "Service has access to kernel logging",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SYSLOG),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYS_(NICE|RESOURCE)",
+ .description_good = "Service has no privileges to change resource use parameters",
+ .description_bad = "Service has privileges to change resource use parameters",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SYS_NICE) |
+ (UINT64_C(1) << CAP_SYS_RESOURCE),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_MKNOD",
+ .description_good = "Service cannot create device nodes",
+ .description_bad = "Service may create device nodes",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_MKNOD),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_(CHOWN|FSETID|SETFCAP)",
+ .description_good = "Service cannot change file ownership/access mode/capabilities",
+ .description_bad = "Service may change file ownership/access mode/capabilities unrestricted",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_CHOWN) |
+ (UINT64_C(1) << CAP_FSETID) |
+ (UINT64_C(1) << CAP_SETFCAP),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_(DAC_*|FOWNER|IPC_OWNER)",
+ .description_good = "Service cannot override UNIX file/IPC permission checks",
+ .description_bad = "Service may override UNIX file/IPC permission checks",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_DAC_OVERRIDE) |
+ (UINT64_C(1) << CAP_DAC_READ_SEARCH) |
+ (UINT64_C(1) << CAP_FOWNER) |
+ (UINT64_C(1) << CAP_IPC_OWNER),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_KILL",
+ .description_good = "Service cannot send UNIX signals to arbitrary processes",
+ .description_bad = "Service may send UNIX signals to arbitrary processes",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_KILL),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_NET_(BIND_SERVICE|BROADCAST|RAW)",
+ .description_good = "Service has no elevated networking privileges",
+ .description_bad = "Service has elevated networking privileges",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_NET_BIND_SERVICE) |
+ (UINT64_C(1) << CAP_NET_BROADCAST) |
+ (UINT64_C(1) << CAP_NET_RAW),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYS_BOOT",
+ .description_good = "Service cannot issue reboot()",
+ .description_bad = "Service may issue reboot()",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 100,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SYS_BOOT),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_MAC_*",
+ .description_good = "Service cannot adjust SMACK MAC",
+ .description_bad = "Service may adjust SMACK MAC",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 100,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_MAC_ADMIN)|
+ (UINT64_C(1) << CAP_MAC_OVERRIDE),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_LINUX_IMMUTABLE",
+ .description_good = "Service cannot mark files immutable",
+ .description_bad = "Service may mark files immutable",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 75,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_LINUX_IMMUTABLE),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_IPC_LOCK",
+ .description_good = "Service cannot lock memory into RAM",
+ .description_bad = "Service may lock memory into RAM",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 50,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_IPC_LOCK),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYS_CHROOT",
+ .description_good = "Service cannot issue chroot()",
+ .description_bad = "Service may issue chroot()",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 50,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SYS_CHROOT),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_BLOCK_SUSPEND",
+ .description_good = "Service cannot establish wake locks",
+ .description_bad = "Service may establish wake locks",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 25,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_BLOCK_SUSPEND),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_WAKE_ALARM",
+ .description_good = "Service cannot program timers that wake up the system",
+ .description_bad = "Service may program timers that wake up the system",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 25,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_WAKE_ALARM),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_LEASE",
+ .description_good = "Service cannot create file leases",
+ .description_bad = "Service may create file leases",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 25,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_LEASE),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYS_TTY_CONFIG",
+ .description_good = "Service cannot issue vhangup()",
+ .description_bad = "Service may issue vhangup()",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 25,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SYS_TTY_CONFIG),
+ },
+ {
+ .id = "CapabilityBoundingSet=~CAP_SYS_PACCT",
+ .description_good = "Service cannot use acct()",
+ .description_bad = "Service may use acct()",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#CapabilityBoundingSet=",
+ .weight = 25,
+ .range = 1,
+ .assess = assess_capability_bounding_set,
+ .parameter = (UINT64_C(1) << CAP_SYS_PACCT),
+ },
+ {
+ .id = "UMask=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#UMask=",
+ .weight = 100,
+ .range = 10,
+ .assess = assess_umask,
+ },
+ {
+ .id = "KeyringMode=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#KeyringMode=",
+ .description_good = "Service doesn't share key material with other services",
+ .description_bad = "Service shares key material with other service",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_keyring_mode,
+ },
+ {
+ .id = "ProtectProc=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProtectProc=",
+ .description_good = "Service has restricted access to process tree (/proc hidepid=)",
+ .description_bad = "Service has full access to process tree (/proc hidepid=)",
+ .weight = 1000,
+ .range = 3,
+ .assess = assess_protect_proc,
+ },
+ {
+ .id = "ProcSubset=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProcSubset=",
+ .description_good = "Service has no access to non-process /proc files (/proc subset=)",
+ .description_bad = "Service has full access to non-process /proc files (/proc subset=)",
+ .weight = 10,
+ .range = 1,
+ .assess = assess_proc_subset,
+ },
+ {
+ .id = "NotifyAccess=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#NotifyAccess=",
+ .description_good = "Service child processes cannot alter service state",
+ .description_bad = "Service child processes may alter service state",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_notify_access,
+ },
+ {
+ .id = "RemoveIPC=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RemoveIPC=",
+ .description_good = "Service user cannot leave SysV IPC objects around",
+ .description_bad = "Service user may leave SysV IPC objects around",
+ .description_na = "Service runs as root, option does not apply",
+ .weight = 100,
+ .range = 1,
+ .assess = assess_remove_ipc,
+ .offset = offsetof(struct security_info, remove_ipc),
+ },
+ {
+ .id = "Delegate=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Delegate=",
+ .description_good = "Service does not maintain its own delegated control group subtree",
+ .description_bad = "Service maintains its own delegated control group subtree",
+ .weight = 100,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, delegate),
+ .parameter = true, /* invert! */
+ },
+ {
+ .id = "RestrictRealtime=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictRealtime=",
+ .description_good = "Service realtime scheduling access is restricted",
+ .description_bad = "Service may acquire realtime scheduling",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, restrict_realtime),
+ },
+ {
+ .id = "RestrictSUIDSGID=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictSUIDSGID=",
+ .description_good = "SUID/SGID file creation by service is restricted",
+ .description_bad = "Service may create SUID/SGID files",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, restrict_suid_sgid),
+ },
+ {
+ .id = "RestrictNamespaces=~CLONE_NEWUSER",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictNamespaces=",
+ .description_good = "Service cannot create user namespaces",
+ .description_bad = "Service may create user namespaces",
+ .weight = 1500,
+ .range = 1,
+ .assess = assess_restrict_namespaces,
+ .parameter = CLONE_NEWUSER,
+ },
+ {
+ .id = "RestrictNamespaces=~CLONE_NEWNS",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictNamespaces=",
+ .description_good = "Service cannot create file system namespaces",
+ .description_bad = "Service may create file system namespaces",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_restrict_namespaces,
+ .parameter = CLONE_NEWNS,
+ },
+ {
+ .id = "RestrictNamespaces=~CLONE_NEWIPC",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictNamespaces=",
+ .description_good = "Service cannot create IPC namespaces",
+ .description_bad = "Service may create IPC namespaces",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_restrict_namespaces,
+ .parameter = CLONE_NEWIPC,
+ },
+ {
+ .id = "RestrictNamespaces=~CLONE_NEWPID",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictNamespaces=",
+ .description_good = "Service cannot create process namespaces",
+ .description_bad = "Service may create process namespaces",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_restrict_namespaces,
+ .parameter = CLONE_NEWPID,
+ },
+ {
+ .id = "RestrictNamespaces=~CLONE_NEWCGROUP",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictNamespaces=",
+ .description_good = "Service cannot create cgroup namespaces",
+ .description_bad = "Service may create cgroup namespaces",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_restrict_namespaces,
+ .parameter = CLONE_NEWCGROUP,
+ },
+ {
+ .id = "RestrictNamespaces=~CLONE_NEWNET",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictNamespaces=",
+ .description_good = "Service cannot create network namespaces",
+ .description_bad = "Service may create network namespaces",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_restrict_namespaces,
+ .parameter = CLONE_NEWNET,
+ },
+ {
+ .id = "RestrictNamespaces=~CLONE_NEWUTS",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictNamespaces=",
+ .description_good = "Service cannot create hostname namespaces",
+ .description_bad = "Service may create hostname namespaces",
+ .weight = 100,
+ .range = 1,
+ .assess = assess_restrict_namespaces,
+ .parameter = CLONE_NEWUTS,
+ },
+ {
+ .id = "RestrictAddressFamilies=~AF_(INET|INET6)",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictAddressFamilies=",
+ .description_good = "Service cannot allocate Internet sockets",
+ .description_bad = "Service may allocate Internet sockets",
+ .weight = 1500,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, restrict_address_family_inet),
+ },
+ {
+ .id = "RestrictAddressFamilies=~AF_UNIX",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictAddressFamilies=",
+ .description_good = "Service cannot allocate local sockets",
+ .description_bad = "Service may allocate local sockets",
+ .weight = 25,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, restrict_address_family_unix),
+ },
+ {
+ .id = "RestrictAddressFamilies=~AF_NETLINK",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictAddressFamilies=",
+ .description_good = "Service cannot allocate netlink sockets",
+ .description_bad = "Service may allocate netlink sockets",
+ .weight = 200,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, restrict_address_family_netlink),
+ },
+ {
+ .id = "RestrictAddressFamilies=~AF_PACKET",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictAddressFamilies=",
+ .description_good = "Service cannot allocate packet sockets",
+ .description_bad = "Service may allocate packet sockets",
+ .weight = 1000,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, restrict_address_family_packet),
+ },
+ {
+ .id = "RestrictAddressFamilies=~…",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictAddressFamilies=",
+ .description_good = "Service cannot allocate exotic sockets",
+ .description_bad = "Service may allocate exotic sockets",
+ .weight = 1250,
+ .range = 1,
+ .assess = assess_bool,
+ .offset = offsetof(struct security_info, restrict_address_family_other),
+ },
+ {
+ .id = "SystemCallArchitectures=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallArchitectures=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_system_call_architectures,
+ },
+#if HAVE_SECCOMP
+ {
+ .id = "SystemCallFilter=~@swap",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_SWAP,
+ },
+ {
+ .id = "SystemCallFilter=~@obsolete",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 250,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_OBSOLETE,
+ },
+ {
+ .id = "SystemCallFilter=~@clock",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_CLOCK,
+ },
+ {
+ .id = "SystemCallFilter=~@cpu-emulation",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 250,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_CPU_EMULATION,
+ },
+ {
+ .id = "SystemCallFilter=~@debug",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_DEBUG,
+ },
+ {
+ .id = "SystemCallFilter=~@mount",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_MOUNT,
+ },
+ {
+ .id = "SystemCallFilter=~@module",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_MODULE,
+ },
+ {
+ .id = "SystemCallFilter=~@raw-io",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_RAW_IO,
+ },
+ {
+ .id = "SystemCallFilter=~@reboot",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_REBOOT,
+ },
+ {
+ .id = "SystemCallFilter=~@privileged",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 700,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_PRIVILEGED,
+ },
+ {
+ .id = "SystemCallFilter=~@resources",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#SystemCallFilter=",
+ .weight = 700,
+ .range = 10,
+ .assess = assess_system_call_filter,
+ .parameter = SYSCALL_FILTER_SET_RESOURCES,
+ },
+#endif
+ {
+ .id = "IPAddressDeny=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#IPAddressDeny=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_ip_address_allow,
+ },
+ {
+ .id = "DeviceAllow=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#DeviceAllow=",
+ .weight = 1000,
+ .range = 10,
+ .assess = assess_device_allow,
+ },
+ {
+ .id = "AmbientCapabilities=",
+ .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#AmbientCapabilities=",
+ .description_good = "Service process does not receive ambient capabilities",
+ .description_bad = "Service process receives ambient capabilities",
+ .weight = 500,
+ .range = 1,
+ .assess = assess_ambient_capabilities,
+ },
+};
+
+static int assess(const struct security_info *info, Table *overview_table, AnalyzeSecurityFlags flags) {
+ static const struct {
+ uint64_t exposure;
+ const char *name;
+ const char *color;
+ SpecialGlyph smiley;
+ } badness_table[] = {
+ { 100, "DANGEROUS", ANSI_HIGHLIGHT_RED, SPECIAL_GLYPH_DEPRESSED_SMILEY },
+ { 90, "UNSAFE", ANSI_HIGHLIGHT_RED, SPECIAL_GLYPH_UNHAPPY_SMILEY },
+ { 75, "EXPOSED", ANSI_HIGHLIGHT_YELLOW, SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY },
+ { 50, "MEDIUM", NULL, SPECIAL_GLYPH_NEUTRAL_SMILEY },
+ { 10, "OK", ANSI_HIGHLIGHT_GREEN, SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY },
+ { 1, "SAFE", ANSI_HIGHLIGHT_GREEN, SPECIAL_GLYPH_HAPPY_SMILEY },
+ { 0, "PERFECT", ANSI_HIGHLIGHT_GREEN, SPECIAL_GLYPH_ECSTATIC_SMILEY },
+ };
+
+ uint64_t badness_sum = 0, weight_sum = 0, exposure;
+ _cleanup_(table_unrefp) Table *details_table = NULL;
+ size_t i;
+ int r;
+
+ if (!FLAGS_SET(flags, ANALYZE_SECURITY_SHORT)) {
+ details_table = table_new(" ", "name", "description", "weight", "badness", "range", "exposure");
+ if (!details_table)
+ return log_oom();
+
+ (void) table_set_sort(details_table, (size_t) 3, (size_t) 1, (size_t) -1);
+ (void) table_set_reverse(details_table, 3, true);
+
+ if (getenv_bool("SYSTEMD_ANALYZE_DEBUG") <= 0)
+ (void) table_set_display(details_table, (size_t) 0, (size_t) 1, (size_t) 2, (size_t) 6, (size_t) -1);
+ }
+
+ for (i = 0; i < ELEMENTSOF(security_assessor_table); i++) {
+ const struct security_assessor *a = security_assessor_table + i;
+ _cleanup_free_ char *d = NULL;
+ uint64_t badness;
+ void *data;
+
+ data = (uint8_t *) info + a->offset;
+
+ if (a->default_dependencies_only && !info->default_dependencies) {
+ badness = UINT64_MAX;
+ d = strdup("Service runs in special boot phase, option does not apply");
+ if (!d)
+ return log_oom();
+ } else {
+ r = a->assess(a, info, data, &badness, &d);
+ if (r < 0)
+ return r;
+ }
+
+ assert(a->range > 0);
+
+ if (badness != UINT64_MAX) {
+ assert(badness <= a->range);
+
+ badness_sum += DIV_ROUND_UP(badness * a->weight, a->range);
+ weight_sum += a->weight;
+ }
+
+ if (details_table) {
+ const char *checkmark, *description, *color = NULL;
+
+ if (badness == UINT64_MAX) {
+ checkmark = " ";
+ description = a->description_na;
+ color = NULL;
+ } else if (badness == a->range) {
+ checkmark = special_glyph(SPECIAL_GLYPH_CROSS_MARK);
+ description = a->description_bad;
+ color = ansi_highlight_red();
+ } else if (badness == 0) {
+ checkmark = special_glyph(SPECIAL_GLYPH_CHECK_MARK);
+ description = a->description_good;
+ color = ansi_highlight_green();
+ } else {
+ checkmark = special_glyph(SPECIAL_GLYPH_CROSS_MARK);
+ description = NULL;
+ color = ansi_highlight_red();
+ }
+
+ if (d)
+ description = d;
+
+ r = table_add_many(details_table,
+ TABLE_STRING, checkmark,
+ TABLE_SET_MINIMUM_WIDTH, 1,
+ TABLE_SET_MAXIMUM_WIDTH, 1,
+ TABLE_SET_ELLIPSIZE_PERCENT, 0,
+ TABLE_SET_COLOR, color,
+ TABLE_STRING, a->id, TABLE_SET_URL, a->url,
+ TABLE_STRING, description,
+ TABLE_UINT64, a->weight, TABLE_SET_ALIGN_PERCENT, 100,
+ TABLE_UINT64, badness, TABLE_SET_ALIGN_PERCENT, 100,
+ TABLE_UINT64, a->range, TABLE_SET_ALIGN_PERCENT, 100,
+ TABLE_EMPTY, TABLE_SET_ALIGN_PERCENT, 100);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+ }
+
+ assert(weight_sum > 0);
+
+ if (details_table) {
+ size_t row;
+
+ for (row = 1; row < table_get_rows(details_table); row++) {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 1 + DECIMAL_STR_MAX(uint64_t) + 1];
+ const uint64_t *weight, *badness, *range;
+ TableCell *cell;
+ uint64_t x;
+
+ assert_se(weight = table_get_at(details_table, row, 3));
+ assert_se(badness = table_get_at(details_table, row, 4));
+ assert_se(range = table_get_at(details_table, row, 5));
+
+ if (*badness == UINT64_MAX || *badness == 0)
+ continue;
+
+ assert_se(cell = table_get_cell(details_table, row, 6));
+
+ x = DIV_ROUND_UP(DIV_ROUND_UP(*badness * *weight * 100U, *range), weight_sum);
+ xsprintf(buf, "%" PRIu64 ".%" PRIu64, x / 10, x % 10);
+
+ r = table_update(details_table, cell, TABLE_STRING, buf);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update cell in table: %m");
+ }
+
+ r = table_print(details_table, stdout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to output table: %m");
+ }
+
+ exposure = DIV_ROUND_UP(badness_sum * 100U, weight_sum);
+
+ for (i = 0; i < ELEMENTSOF(badness_table); i++)
+ if (exposure >= badness_table[i].exposure)
+ break;
+
+ assert(i < ELEMENTSOF(badness_table));
+
+ if (details_table) {
+ _cleanup_free_ char *clickable = NULL;
+ const char *name;
+
+ /* If we shall output the details table, also print the brief summary underneath */
+
+ if (info->fragment_path) {
+ r = terminal_urlify_path(info->fragment_path, info->id, &clickable);
+ if (r < 0)
+ return log_oom();
+
+ name = clickable;
+ } else
+ name = info->id;
+
+ printf("\n%s %sOverall exposure level for %s%s: %s%" PRIu64 ".%" PRIu64 " %s%s %s\n",
+ special_glyph(SPECIAL_GLYPH_ARROW),
+ ansi_highlight(),
+ name,
+ ansi_normal(),
+ colors_enabled() ? strempty(badness_table[i].color) : "",
+ exposure / 10, exposure % 10,
+ badness_table[i].name,
+ ansi_normal(),
+ special_glyph(badness_table[i].smiley));
+ }
+
+ fflush(stdout);
+
+ if (overview_table) {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 1 + DECIMAL_STR_MAX(uint64_t) + 1];
+ _cleanup_free_ char *url = NULL;
+
+ if (info->fragment_path) {
+ r = file_url_from_path(info->fragment_path, &url);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate URL from path: %m");
+ }
+
+ xsprintf(buf, "%" PRIu64 ".%" PRIu64, exposure / 10, exposure % 10);
+
+ r = table_add_many(overview_table,
+ TABLE_STRING, info->id,
+ TABLE_SET_URL, url,
+ TABLE_STRING, buf,
+ TABLE_SET_ALIGN_PERCENT, 100,
+ TABLE_STRING, badness_table[i].name,
+ TABLE_SET_COLOR, strempty(badness_table[i].color),
+ TABLE_STRING, special_glyph(badness_table[i].smiley));
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ return 0;
+}
+
+static int property_read_restrict_address_families(
+ sd_bus *bus,
+ const char *member,
+ sd_bus_message *m,
+ sd_bus_error *error,
+ void *userdata) {
+
+ struct security_info *info = userdata;
+ int allow_list, r;
+
+ assert(bus);
+ assert(member);
+ assert(m);
+
+ r = sd_bus_message_enter_container(m, 'r', "bas");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(m, "b", &allow_list);
+ if (r < 0)
+ return r;
+
+ info->restrict_address_family_inet =
+ info->restrict_address_family_unix =
+ info->restrict_address_family_netlink =
+ info->restrict_address_family_packet =
+ info->restrict_address_family_other = allow_list;
+
+ r = sd_bus_message_enter_container(m, 'a', "s");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *name;
+
+ r = sd_bus_message_read(m, "s", &name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (STR_IN_SET(name, "AF_INET", "AF_INET6"))
+ info->restrict_address_family_inet = !allow_list;
+ else if (streq(name, "AF_UNIX"))
+ info->restrict_address_family_unix = !allow_list;
+ else if (streq(name, "AF_NETLINK"))
+ info->restrict_address_family_netlink = !allow_list;
+ else if (streq(name, "AF_PACKET"))
+ info->restrict_address_family_packet = !allow_list;
+ else
+ info->restrict_address_family_other = !allow_list;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_exit_container(m);
+}
+
+static int property_read_system_call_filter(
+ sd_bus *bus,
+ const char *member,
+ sd_bus_message *m,
+ sd_bus_error *error,
+ void *userdata) {
+
+ struct security_info *info = userdata;
+ int allow_list, r;
+
+ assert(bus);
+ assert(member);
+ assert(m);
+
+ r = sd_bus_message_enter_container(m, 'r', "bas");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(m, "b", &allow_list);
+ if (r < 0)
+ return r;
+
+ info->system_call_filter_allow_list = allow_list;
+
+ r = sd_bus_message_enter_container(m, 'a', "s");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *name;
+
+ r = sd_bus_message_read(m, "s", &name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = set_put_strdup(&info->system_call_filter, name);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_exit_container(m);
+}
+
+static int property_read_ip_address_allow(
+ sd_bus *bus,
+ const char *member,
+ sd_bus_message *m,
+ sd_bus_error *error,
+ void *userdata) {
+
+ struct security_info *info = userdata;
+ bool deny_ipv4 = false, deny_ipv6 = false;
+ int r;
+
+ assert(bus);
+ assert(member);
+ assert(m);
+
+ r = sd_bus_message_enter_container(m, 'a', "(iayu)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const void *data;
+ size_t size;
+ int32_t family;
+ uint32_t prefixlen;
+
+ r = sd_bus_message_enter_container(m, 'r', "iayu");
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_read(m, "i", &family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_array(m, 'y', &data, &size);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(m, "u", &prefixlen);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ if (streq(member, "IPAddressAllow")) {
+ union in_addr_union u;
+
+ if (family == AF_INET && size == 4 && prefixlen == 8)
+ memcpy(&u.in, data, size);
+ else if (family == AF_INET6 && size == 16 && prefixlen == 128)
+ memcpy(&u.in6, data, size);
+ else {
+ info->ip_address_allow_other = true;
+ continue;
+ }
+
+ if (in_addr_is_localhost(family, &u))
+ info->ip_address_allow_localhost = true;
+ else
+ info->ip_address_allow_other = true;
+ } else {
+ assert(streq(member, "IPAddressDeny"));
+
+ if (family == AF_INET && size == 4 && prefixlen == 0)
+ deny_ipv4 = true;
+ else if (family == AF_INET6 && size == 16 && prefixlen == 0)
+ deny_ipv6 = true;
+ }
+ }
+
+ info->ip_address_deny_all = deny_ipv4 && deny_ipv6;
+
+ return sd_bus_message_exit_container(m);
+}
+
+static int property_read_ip_filters(
+ sd_bus *bus,
+ const char *member,
+ sd_bus_message *m,
+ sd_bus_error *error,
+ void *userdata) {
+
+ struct security_info *info = userdata;
+ _cleanup_(strv_freep) char **l = NULL;
+ int r;
+
+ assert(bus);
+ assert(member);
+ assert(m);
+
+ r = sd_bus_message_read_strv(m, &l);
+ if (r < 0)
+ return r;
+
+ if (streq(member, "IPIngressFilterPath"))
+ info->ip_filters_custom_ingress = !strv_isempty(l);
+ else if (streq(member, "IPEgressFilterPath"))
+ info->ip_filters_custom_ingress = !strv_isempty(l);
+
+ return 0;
+}
+
+static int property_read_device_allow(
+ sd_bus *bus,
+ const char *member,
+ sd_bus_message *m,
+ sd_bus_error *error,
+ void *userdata) {
+
+ struct security_info *info = userdata;
+ size_t n = 0;
+ int r;
+
+ assert(bus);
+ assert(member);
+ assert(m);
+
+ r = sd_bus_message_enter_container(m, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *name, *policy;
+
+ r = sd_bus_message_read(m, "(ss)", &name, &policy);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ n++;
+ }
+
+ info->device_allow_non_empty = n > 0;
+
+ return sd_bus_message_exit_container(m);
+}
+
+static int acquire_security_info(sd_bus *bus, const char *name, struct security_info *info, AnalyzeSecurityFlags flags) {
+
+ static const struct bus_properties_map security_map[] = {
+ { "AmbientCapabilities", "t", NULL, offsetof(struct security_info, ambient_capabilities) },
+ { "CapabilityBoundingSet", "t", NULL, offsetof(struct security_info, capability_bounding_set) },
+ { "DefaultDependencies", "b", NULL, offsetof(struct security_info, default_dependencies) },
+ { "Delegate", "b", NULL, offsetof(struct security_info, delegate) },
+ { "DeviceAllow", "a(ss)", property_read_device_allow, 0 },
+ { "DevicePolicy", "s", NULL, offsetof(struct security_info, device_policy) },
+ { "DynamicUser", "b", NULL, offsetof(struct security_info, dynamic_user) },
+ { "FragmentPath", "s", NULL, offsetof(struct security_info, fragment_path) },
+ { "IPAddressAllow", "a(iayu)", property_read_ip_address_allow, 0 },
+ { "IPAddressDeny", "a(iayu)", property_read_ip_address_allow, 0 },
+ { "IPIngressFilterPath", "as", property_read_ip_filters, 0 },
+ { "IPEgressFilterPath", "as", property_read_ip_filters, 0 },
+ { "Id", "s", NULL, offsetof(struct security_info, id) },
+ { "KeyringMode", "s", NULL, offsetof(struct security_info, keyring_mode) },
+ { "ProtectProc", "s", NULL, offsetof(struct security_info, protect_proc) },
+ { "ProcSubset", "s", NULL, offsetof(struct security_info, proc_subset) },
+ { "LoadState", "s", NULL, offsetof(struct security_info, load_state) },
+ { "LockPersonality", "b", NULL, offsetof(struct security_info, lock_personality) },
+ { "MemoryDenyWriteExecute", "b", NULL, offsetof(struct security_info, memory_deny_write_execute) },
+ { "NoNewPrivileges", "b", NULL, offsetof(struct security_info, no_new_privileges) },
+ { "NotifyAccess", "s", NULL, offsetof(struct security_info, notify_access) },
+ { "PrivateDevices", "b", NULL, offsetof(struct security_info, private_devices) },
+ { "PrivateMounts", "b", NULL, offsetof(struct security_info, private_mounts) },
+ { "PrivateNetwork", "b", NULL, offsetof(struct security_info, private_network) },
+ { "PrivateTmp", "b", NULL, offsetof(struct security_info, private_tmp) },
+ { "PrivateUsers", "b", NULL, offsetof(struct security_info, private_users) },
+ { "ProtectControlGroups", "b", NULL, offsetof(struct security_info, protect_control_groups) },
+ { "ProtectHome", "s", NULL, offsetof(struct security_info, protect_home) },
+ { "ProtectHostname", "b", NULL, offsetof(struct security_info, protect_hostname) },
+ { "ProtectKernelModules", "b", NULL, offsetof(struct security_info, protect_kernel_modules) },
+ { "ProtectKernelTunables", "b", NULL, offsetof(struct security_info, protect_kernel_tunables) },
+ { "ProtectKernelLogs", "b", NULL, offsetof(struct security_info, protect_kernel_logs) },
+ { "ProtectClock", "b", NULL, offsetof(struct security_info, protect_clock) },
+ { "ProtectSystem", "s", NULL, offsetof(struct security_info, protect_system) },
+ { "RemoveIPC", "b", NULL, offsetof(struct security_info, remove_ipc) },
+ { "RestrictAddressFamilies", "(bas)", property_read_restrict_address_families, 0 },
+ { "RestrictNamespaces", "t", NULL, offsetof(struct security_info, restrict_namespaces) },
+ { "RestrictRealtime", "b", NULL, offsetof(struct security_info, restrict_realtime) },
+ { "RestrictSUIDSGID", "b", NULL, offsetof(struct security_info, restrict_suid_sgid) },
+ { "RootDirectory", "s", NULL, offsetof(struct security_info, root_directory) },
+ { "RootImage", "s", NULL, offsetof(struct security_info, root_image) },
+ { "SupplementaryGroups", "as", NULL, offsetof(struct security_info, supplementary_groups) },
+ { "SystemCallArchitectures", "as", NULL, offsetof(struct security_info, system_call_architectures) },
+ { "SystemCallFilter", "(as)", property_read_system_call_filter, 0 },
+ { "Type", "s", NULL, offsetof(struct security_info, type) },
+ { "UMask", "u", NULL, offsetof(struct security_info, _umask) },
+ { "User", "s", NULL, offsetof(struct security_info, user) },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ /* Note: this mangles *info on failure! */
+
+ assert(bus);
+ assert(name);
+ assert(info);
+
+ path = unit_dbus_path_from_name(name);
+ if (!path)
+ return log_oom();
+
+ r = bus_map_all_properties(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ security_map,
+ BUS_MAP_STRDUP | BUS_MAP_BOOLEAN_AS_BOOL,
+ &error,
+ NULL,
+ info);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get unit properties: %s", bus_error_message(&error, r));
+
+ if (!streq_ptr(info->load_state, "loaded")) {
+
+ if (FLAGS_SET(flags, ANALYZE_SECURITY_ONLY_LOADED))
+ return -EMEDIUMTYPE;
+
+ if (streq_ptr(info->load_state, "not-found"))
+ log_error("Unit %s not found, cannot analyze.", name);
+ else if (streq_ptr(info->load_state, "masked"))
+ log_error("Unit %s is masked, cannot analyze.", name);
+ else
+ log_error("Unit %s not loaded properly, cannot analyze.", name);
+
+ return -EINVAL;
+ }
+
+ if (FLAGS_SET(flags, ANALYZE_SECURITY_ONLY_LONG_RUNNING) && streq_ptr(info->type, "oneshot"))
+ return -EMEDIUMTYPE;
+
+ if (info->private_devices ||
+ info->private_tmp ||
+ info->protect_control_groups ||
+ info->protect_kernel_tunables ||
+ info->protect_kernel_modules ||
+ !streq_ptr(info->protect_home, "no") ||
+ !streq_ptr(info->protect_system, "no") ||
+ info->root_image)
+ info->private_mounts = true;
+
+ if (info->protect_kernel_modules)
+ info->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYS_MODULE);
+
+ if (info->protect_kernel_logs)
+ info->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYSLOG);
+
+ if (info->protect_clock)
+ info->capability_bounding_set &= ~((UINT64_C(1) << CAP_SYS_TIME) |
+ (UINT64_C(1) << CAP_WAKE_ALARM));
+
+ if (info->private_devices)
+ info->capability_bounding_set &= ~((UINT64_C(1) << CAP_MKNOD) |
+ (UINT64_C(1) << CAP_SYS_RAWIO));
+
+ return 0;
+}
+
+static int analyze_security_one(sd_bus *bus, const char *name, Table *overview_table, AnalyzeSecurityFlags flags) {
+ _cleanup_(security_info_free) struct security_info info = {
+ .default_dependencies = true,
+ .capability_bounding_set = UINT64_MAX,
+ .restrict_namespaces = UINT64_MAX,
+ ._umask = 0002,
+ };
+ int r;
+
+ assert(bus);
+ assert(name);
+
+ r = acquire_security_info(bus, name, &info, flags);
+ if (r == -EMEDIUMTYPE) /* Ignore this one because not loaded or Type is oneshot */
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = assess(&info, overview_table, flags);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int analyze_security(sd_bus *bus, char **units, AnalyzeSecurityFlags flags) {
+ _cleanup_(table_unrefp) Table *overview_table = NULL;
+ int ret = 0, r;
+
+ assert(bus);
+
+ if (strv_length(units) != 1) {
+ overview_table = table_new("unit", "exposure", "predicate", "happy");
+ if (!overview_table)
+ return log_oom();
+ }
+
+ if (strv_isempty(units)) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_strv_free_ char **list = NULL;
+ size_t allocated = 0, n = 0;
+ char **i;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "ListUnits",
+ &error,
+ &reply,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list units: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssssssouso)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ UnitInfo info;
+ char *copy = NULL;
+
+ r = bus_parse_unit_info(reply, &info);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ if (!endswith(info.id, ".service"))
+ continue;
+
+ if (!GREEDY_REALLOC(list, allocated, n + 2))
+ return log_oom();
+
+ copy = strdup(info.id);
+ if (!copy)
+ return log_oom();
+
+ list[n++] = copy;
+ list[n] = NULL;
+ }
+
+ strv_sort(list);
+
+ flags |= ANALYZE_SECURITY_SHORT|ANALYZE_SECURITY_ONLY_LOADED|ANALYZE_SECURITY_ONLY_LONG_RUNNING;
+
+ STRV_FOREACH(i, list) {
+ r = analyze_security_one(bus, *i, overview_table, flags);
+ if (r < 0 && ret >= 0)
+ ret = r;
+ }
+
+ } else {
+ char **i;
+
+ STRV_FOREACH(i, units) {
+ _cleanup_free_ char *mangled = NULL, *instance = NULL;
+ const char *name;
+
+ if (!FLAGS_SET(flags, ANALYZE_SECURITY_SHORT) && i != units) {
+ putc('\n', stdout);
+ fflush(stdout);
+ }
+
+ r = unit_name_mangle(*i, 0, &mangled);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle unit name '%s': %m", *i);
+
+ if (!endswith(mangled, ".service"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unit %s is not a service unit, refusing.",
+ *i);
+
+ if (unit_name_is_valid(mangled, UNIT_NAME_TEMPLATE)) {
+ r = unit_name_replace_instance(mangled, "test-instance", &instance);
+ if (r < 0)
+ return log_oom();
+
+ name = instance;
+ } else
+ name = mangled;
+
+ r = analyze_security_one(bus, name, overview_table, flags);
+ if (r < 0 && ret >= 0)
+ ret = r;
+ }
+ }
+
+ if (overview_table) {
+ if (!FLAGS_SET(flags, ANALYZE_SECURITY_SHORT)) {
+ putc('\n', stdout);
+ fflush(stdout);
+ }
+
+ r = table_print(overview_table, stdout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to output table: %m");
+ }
+
+ return ret;
+}
diff --git a/src/analyze/analyze-security.h b/src/analyze/analyze-security.h
new file mode 100644
index 0000000..e8de39f
--- /dev/null
+++ b/src/analyze/analyze-security.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+typedef enum AnalyzeSecurityFlags {
+ ANALYZE_SECURITY_SHORT = 1 << 0,
+ ANALYZE_SECURITY_ONLY_LOADED = 1 << 1,
+ ANALYZE_SECURITY_ONLY_LONG_RUNNING = 1 << 2,
+} AnalyzeSecurityFlags;
+
+int analyze_security(sd_bus *bus, char **units, AnalyzeSecurityFlags flags);
diff --git a/src/analyze/analyze-verify.c b/src/analyze/analyze-verify.c
new file mode 100644
index 0000000..a9c8917
--- /dev/null
+++ b/src/analyze/analyze-verify.c
@@ -0,0 +1,283 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "all-units.h"
+#include "analyze-verify.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "log.h"
+#include "manager.h"
+#include "pager.h"
+#include "path-util.h"
+#include "strv.h"
+#include "unit-name.h"
+
+static int prepare_filename(const char *filename, char **ret) {
+ int r;
+ const char *name;
+ _cleanup_free_ char *abspath = NULL;
+ _cleanup_free_ char *dir = NULL;
+ _cleanup_free_ char *with_instance = NULL;
+ char *c;
+
+ assert(filename);
+ assert(ret);
+
+ r = path_make_absolute_cwd(filename, &abspath);
+ if (r < 0)
+ return r;
+
+ name = basename(abspath);
+ if (!unit_name_is_valid(name, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ if (unit_name_is_valid(name, UNIT_NAME_TEMPLATE)) {
+ r = unit_name_replace_instance(name, "i", &with_instance);
+ if (r < 0)
+ return r;
+ }
+
+ dir = dirname_malloc(abspath);
+ if (!dir)
+ return -ENOMEM;
+
+ c = path_join(dir, with_instance ?: name);
+ if (!c)
+ return -ENOMEM;
+
+ *ret = c;
+ return 0;
+}
+
+static int generate_path(char **var, char **filenames) {
+ const char *old;
+ char **filename;
+
+ _cleanup_strv_free_ char **ans = NULL;
+ int r;
+
+ STRV_FOREACH(filename, filenames) {
+ char *t;
+
+ t = dirname_malloc(*filename);
+ if (!t)
+ return -ENOMEM;
+
+ r = strv_consume(&ans, t);
+ if (r < 0)
+ return r;
+ }
+
+ assert_se(strv_uniq(ans));
+
+ /* First, prepend our directories. Second, if some path was specified, use that, and
+ * otherwise use the defaults. Any duplicates will be filtered out in path-lookup.c.
+ * Treat explicit empty path to mean that nothing should be appended.
+ */
+ old = getenv("SYSTEMD_UNIT_PATH");
+ if (!streq_ptr(old, "")) {
+ if (!old)
+ old = ":";
+
+ r = strv_extend(&ans, old);
+ if (r < 0)
+ return r;
+ }
+
+ *var = strv_join(ans, ":");
+ if (!*var)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int verify_socket(Unit *u) {
+ Unit *service;
+ int r;
+
+ assert(u);
+
+ if (u->type != UNIT_SOCKET)
+ return 0;
+
+ r = socket_load_service_unit(SOCKET(u), -1, &service);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "service unit for the socket cannot be loaded: %m");
+
+ if (service->load_state != UNIT_LOADED)
+ return log_unit_error_errno(u, SYNTHETIC_ERRNO(ENOENT),
+ "service %s not loaded, socket cannot be started.", service->id);
+
+ log_unit_debug(u, "using service unit %s.", service->id);
+ return 0;
+}
+
+int verify_executable(Unit *u, const ExecCommand *exec) {
+ if (!exec)
+ return 0;
+
+ if (exec->flags & EXEC_COMMAND_IGNORE_FAILURE)
+ return 0;
+
+ if (access(exec->path, X_OK) < 0)
+ return log_unit_error_errno(u, errno, "Command %s is not executable: %m", exec->path);
+
+ return 0;
+}
+
+static int verify_executables(Unit *u) {
+ ExecCommand *exec;
+ int r = 0, k;
+ unsigned i;
+
+ assert(u);
+
+ exec = u->type == UNIT_SOCKET ? SOCKET(u)->control_command :
+ u->type == UNIT_MOUNT ? MOUNT(u)->control_command :
+ u->type == UNIT_SWAP ? SWAP(u)->control_command : NULL;
+ k = verify_executable(u, exec);
+ if (k < 0 && r == 0)
+ r = k;
+
+ if (u->type == UNIT_SERVICE)
+ for (i = 0; i < ELEMENTSOF(SERVICE(u)->exec_command); i++) {
+ k = verify_executable(u, SERVICE(u)->exec_command[i]);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ if (u->type == UNIT_SOCKET)
+ for (i = 0; i < ELEMENTSOF(SOCKET(u)->exec_command); i++) {
+ k = verify_executable(u, SOCKET(u)->exec_command[i]);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int verify_documentation(Unit *u, bool check_man) {
+ char **p;
+ int r = 0, k;
+
+ STRV_FOREACH(p, u->documentation) {
+ log_unit_debug(u, "Found documentation item: %s", *p);
+
+ if (check_man && startswith(*p, "man:")) {
+ k = show_man_page(*p + 4, true);
+ if (k != 0) {
+ if (k < 0)
+ log_unit_error_errno(u, k, "Can't show %s: %m", *p + 4);
+ else {
+ log_unit_error(u, "Command 'man %s' failed with code %d", *p + 4, k);
+ k = -ENOEXEC;
+ }
+ if (r == 0)
+ r = k;
+ }
+ }
+ }
+
+ /* Check remote URLs? */
+
+ return r;
+}
+
+static int verify_unit(Unit *u, bool check_man) {
+ _cleanup_(sd_bus_error_free) sd_bus_error err = SD_BUS_ERROR_NULL;
+ int r, k;
+
+ assert(u);
+
+ if (DEBUG_LOGGING)
+ unit_dump(u, stdout, "\t");
+
+ log_unit_debug(u, "Creating %s/start job", u->id);
+ r = manager_add_job(u->manager, JOB_START, u, JOB_REPLACE, NULL, &err, NULL);
+ if (r < 0)
+ log_unit_error_errno(u, r, "Failed to create %s/start: %s", u->id, bus_error_message(&err, r));
+
+ k = verify_socket(u);
+ if (k < 0 && r == 0)
+ r = k;
+
+ k = verify_executables(u);
+ if (k < 0 && r == 0)
+ r = k;
+
+ k = verify_documentation(u, check_man);
+ if (k < 0 && r == 0)
+ r = k;
+
+ return r;
+}
+
+int verify_units(char **filenames, UnitFileScope scope, bool check_man, bool run_generators) {
+ const ManagerTestRunFlags flags =
+ MANAGER_TEST_RUN_BASIC |
+ MANAGER_TEST_RUN_ENV_GENERATORS |
+ run_generators * MANAGER_TEST_RUN_GENERATORS;
+
+ _cleanup_(manager_freep) Manager *m = NULL;
+ Unit *units[strv_length(filenames)];
+ _cleanup_free_ char *var = NULL;
+ int r, k, i, count = 0;
+ char **filename;
+
+ if (strv_isempty(filenames))
+ return 0;
+
+ /* set the path */
+ r = generate_path(&var, filenames);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit load path: %m");
+
+ assert_se(set_unit_path(var) >= 0);
+
+ r = manager_new(scope, flags, &m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize manager: %m");
+
+ log_debug("Starting manager...");
+
+ r = manager_startup(m, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ manager_clear_jobs(m);
+
+ log_debug("Loading remaining units from the command line...");
+
+ STRV_FOREACH(filename, filenames) {
+ _cleanup_free_ char *prepared = NULL;
+
+ log_debug("Handling %s...", *filename);
+
+ k = prepare_filename(*filename, &prepared);
+ if (k < 0) {
+ log_error_errno(k, "Failed to prepare filename %s: %m", *filename);
+ if (r == 0)
+ r = k;
+ continue;
+ }
+
+ k = manager_load_startable_unit_or_warn(m, NULL, prepared, &units[count]);
+ if (k < 0) {
+ if (r == 0)
+ r = k;
+ continue;
+ }
+
+ count++;
+ }
+
+ for (i = 0; i < count; i++) {
+ k = verify_unit(units[i], check_man);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ return r;
+}
diff --git a/src/analyze/analyze-verify.h b/src/analyze/analyze-verify.h
new file mode 100644
index 0000000..43bfbcb
--- /dev/null
+++ b/src/analyze/analyze-verify.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "execute.h"
+#include "path-lookup.h"
+
+int verify_executable(Unit *u, const ExecCommand *exec);
+int verify_units(char **filenames, UnitFileScope scope, bool check_man, bool run_generators);
diff --git a/src/analyze/analyze.c b/src/analyze/analyze.c
new file mode 100644
index 0000000..9920f2a
--- /dev/null
+++ b/src/analyze/analyze.c
@@ -0,0 +1,2461 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2013 Simon Peeters
+***/
+
+#include <getopt.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "analyze-condition.h"
+#include "analyze-security.h"
+#include "analyze-verify.h"
+#include "build.h"
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "bus-map-properties.h"
+#include "bus-unit-util.h"
+#include "calendarspec.h"
+#include "cap-list.h"
+#include "capability-util.h"
+#include "conf-files.h"
+#include "copy.h"
+#include "def.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-table.h"
+#include "glob-util.h"
+#include "hashmap.h"
+#include "locale-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "nulstr-util.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#if HAVE_SECCOMP
+# include "seccomp-util.h"
+#endif
+#include "sort-util.h"
+#include "special.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "unit-name.h"
+#include "util.h"
+#include "verbs.h"
+
+#define SCALE_X (0.1 / 1000.0) /* pixels per us */
+#define SCALE_Y (20.0)
+
+#define svg(...) printf(__VA_ARGS__)
+
+#define svg_bar(class, x1, x2, y) \
+ svg(" <rect class=\"%s\" x=\"%.03f\" y=\"%.03f\" width=\"%.03f\" height=\"%.03f\" />\n", \
+ (class), \
+ SCALE_X * (x1), SCALE_Y * (y), \
+ SCALE_X * ((x2) - (x1)), SCALE_Y - 1.0)
+
+#define svg_text(b, x, y, format, ...) \
+ do { \
+ svg(" <text class=\"%s\" x=\"%.03f\" y=\"%.03f\">", (b) ? "left" : "right", SCALE_X * (x) + (b ? 5.0 : -5.0), SCALE_Y * (y) + 14.0); \
+ svg(format, ## __VA_ARGS__); \
+ svg("</text>\n"); \
+ } while (false)
+
+static enum dot {
+ DEP_ALL,
+ DEP_ORDER,
+ DEP_REQUIRE
+} arg_dot = DEP_ALL;
+static char **arg_dot_from_patterns = NULL;
+static char **arg_dot_to_patterns = NULL;
+static usec_t arg_fuzz = 0;
+static PagerFlags arg_pager_flags = 0;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static const char *arg_host = NULL;
+static UnitFileScope arg_scope = UNIT_FILE_SYSTEM;
+static bool arg_man = true;
+static bool arg_generators = false;
+static const char *arg_root = NULL;
+static unsigned arg_iterations = 1;
+static usec_t arg_base_time = USEC_INFINITY;
+
+STATIC_DESTRUCTOR_REGISTER(arg_dot_from_patterns, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_dot_to_patterns, strv_freep);
+
+struct boot_times {
+ usec_t firmware_time;
+ usec_t loader_time;
+ usec_t kernel_time;
+ usec_t kernel_done_time;
+ usec_t initrd_time;
+ usec_t userspace_time;
+ usec_t finish_time;
+ usec_t security_start_time;
+ usec_t security_finish_time;
+ usec_t generators_start_time;
+ usec_t generators_finish_time;
+ usec_t unitsload_start_time;
+ usec_t unitsload_finish_time;
+ usec_t initrd_security_start_time;
+ usec_t initrd_security_finish_time;
+ usec_t initrd_generators_start_time;
+ usec_t initrd_generators_finish_time;
+ usec_t initrd_unitsload_start_time;
+ usec_t initrd_unitsload_finish_time;
+
+ /*
+ * If we're analyzing the user instance, all timestamps will be offset
+ * by its own start-up timestamp, which may be arbitrarily big.
+ * With "plot", this causes arbitrarily wide output SVG files which almost
+ * completely consist of empty space. Thus we cancel out this offset.
+ *
+ * This offset is subtracted from times above by acquire_boot_times(),
+ * but it still needs to be subtracted from unit-specific timestamps
+ * (so it is stored here for reference).
+ */
+ usec_t reverse_offset;
+};
+
+struct unit_times {
+ bool has_data;
+ char *name;
+ usec_t activating;
+ usec_t activated;
+ usec_t deactivated;
+ usec_t deactivating;
+ usec_t time;
+};
+
+struct host_info {
+ char *hostname;
+ char *kernel_name;
+ char *kernel_release;
+ char *kernel_version;
+ char *os_pretty_name;
+ char *virtualization;
+ char *architecture;
+};
+
+static int acquire_bus(sd_bus **bus, bool *use_full_bus) {
+ bool user = arg_scope != UNIT_FILE_SYSTEM;
+ int r;
+
+ if (use_full_bus && *use_full_bus) {
+ r = bus_connect_transport(arg_transport, arg_host, user, bus);
+ if (IN_SET(r, 0, -EHOSTDOWN))
+ return r;
+
+ *use_full_bus = false;
+ }
+
+ return bus_connect_transport_systemd(arg_transport, arg_host, user, bus);
+}
+
+static int bus_get_uint64_property(sd_bus *bus, const char *path, const char *interface, const char *property, uint64_t *val) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(property);
+ assert(val);
+
+ r = sd_bus_get_property_trivial(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ interface,
+ property,
+ &error,
+ 't', val);
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse reply: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int bus_get_unit_property_strv(sd_bus *bus, const char *path, const char *property, char ***strv) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(property);
+ assert(strv);
+
+ r = sd_bus_get_property_strv(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ property,
+ &error,
+ strv);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get unit property %s: %s", property, bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int compare_unit_start(const struct unit_times *a, const struct unit_times *b) {
+ return CMP(a->activating, b->activating);
+}
+
+static void unit_times_free(struct unit_times *t) {
+ struct unit_times *p;
+
+ for (p = t; p->has_data; p++)
+ free(p->name);
+ free(t);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct unit_times *, unit_times_free);
+
+static void subtract_timestamp(usec_t *a, usec_t b) {
+ assert(a);
+
+ if (*a > 0) {
+ assert(*a >= b);
+ *a -= b;
+ }
+}
+
+static int acquire_boot_times(sd_bus *bus, struct boot_times **bt) {
+ static const struct bus_properties_map property_map[] = {
+ { "FirmwareTimestampMonotonic", "t", NULL, offsetof(struct boot_times, firmware_time) },
+ { "LoaderTimestampMonotonic", "t", NULL, offsetof(struct boot_times, loader_time) },
+ { "KernelTimestamp", "t", NULL, offsetof(struct boot_times, kernel_time) },
+ { "InitRDTimestampMonotonic", "t", NULL, offsetof(struct boot_times, initrd_time) },
+ { "UserspaceTimestampMonotonic", "t", NULL, offsetof(struct boot_times, userspace_time) },
+ { "FinishTimestampMonotonic", "t", NULL, offsetof(struct boot_times, finish_time) },
+ { "SecurityStartTimestampMonotonic", "t", NULL, offsetof(struct boot_times, security_start_time) },
+ { "SecurityFinishTimestampMonotonic", "t", NULL, offsetof(struct boot_times, security_finish_time) },
+ { "GeneratorsStartTimestampMonotonic", "t", NULL, offsetof(struct boot_times, generators_start_time) },
+ { "GeneratorsFinishTimestampMonotonic", "t", NULL, offsetof(struct boot_times, generators_finish_time) },
+ { "UnitsLoadStartTimestampMonotonic", "t", NULL, offsetof(struct boot_times, unitsload_start_time) },
+ { "UnitsLoadFinishTimestampMonotonic", "t", NULL, offsetof(struct boot_times, unitsload_finish_time) },
+ { "InitRDSecurityStartTimestampMonotonic", "t", NULL, offsetof(struct boot_times, initrd_security_start_time) },
+ { "InitRDSecurityFinishTimestampMonotonic", "t", NULL, offsetof(struct boot_times, initrd_security_finish_time) },
+ { "InitRDGeneratorsStartTimestampMonotonic", "t", NULL, offsetof(struct boot_times, initrd_generators_start_time) },
+ { "InitRDGeneratorsFinishTimestampMonotonic", "t", NULL, offsetof(struct boot_times, initrd_generators_finish_time) },
+ { "InitRDUnitsLoadStartTimestampMonotonic", "t", NULL, offsetof(struct boot_times, initrd_unitsload_start_time) },
+ { "InitRDUnitsLoadFinishTimestampMonotonic", "t", NULL, offsetof(struct boot_times, initrd_unitsload_finish_time) },
+ {},
+ };
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ static struct boot_times times;
+ static bool cached = false;
+ int r;
+
+ if (cached)
+ goto finish;
+
+ assert_cc(sizeof(usec_t) == sizeof(uint64_t));
+
+ r = bus_map_all_properties(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ property_map,
+ BUS_MAP_STRDUP,
+ &error,
+ NULL,
+ &times);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get timestamp properties: %s", bus_error_message(&error, r));
+
+ if (times.finish_time <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINPROGRESS),
+ "Bootup is not yet finished (org.freedesktop.systemd1.Manager.FinishTimestampMonotonic=%"PRIu64").\n"
+ "Please try again later.\n"
+ "Hint: Use 'systemctl%s list-jobs' to see active jobs",
+ times.finish_time,
+ arg_scope == UNIT_FILE_SYSTEM ? "" : " --user");
+
+ if (arg_scope == UNIT_FILE_SYSTEM && times.security_start_time > 0) {
+ /* security_start_time is set when systemd is not running under container environment. */
+ if (times.initrd_time > 0)
+ times.kernel_done_time = times.initrd_time;
+ else
+ times.kernel_done_time = times.userspace_time;
+ } else {
+ /*
+ * User-instance-specific or container-system-specific timestamps processing
+ * (see comment to reverse_offset in struct boot_times).
+ */
+ times.reverse_offset = times.userspace_time;
+
+ times.firmware_time = times.loader_time = times.kernel_time = times.initrd_time =
+ times.userspace_time = times.security_start_time = times.security_finish_time = 0;
+
+ subtract_timestamp(&times.finish_time, times.reverse_offset);
+
+ subtract_timestamp(&times.generators_start_time, times.reverse_offset);
+ subtract_timestamp(&times.generators_finish_time, times.reverse_offset);
+
+ subtract_timestamp(&times.unitsload_start_time, times.reverse_offset);
+ subtract_timestamp(&times.unitsload_finish_time, times.reverse_offset);
+ }
+
+ cached = true;
+
+finish:
+ *bt = &times;
+ return 0;
+}
+
+static void free_host_info(struct host_info *hi) {
+ if (!hi)
+ return;
+
+ free(hi->hostname);
+ free(hi->kernel_name);
+ free(hi->kernel_release);
+ free(hi->kernel_version);
+ free(hi->os_pretty_name);
+ free(hi->virtualization);
+ free(hi->architecture);
+ free(hi);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct host_info *, free_host_info);
+
+static int acquire_time_data(sd_bus *bus, struct unit_times **out) {
+ static const struct bus_properties_map property_map[] = {
+ { "InactiveExitTimestampMonotonic", "t", NULL, offsetof(struct unit_times, activating) },
+ { "ActiveEnterTimestampMonotonic", "t", NULL, offsetof(struct unit_times, activated) },
+ { "ActiveExitTimestampMonotonic", "t", NULL, offsetof(struct unit_times, deactivating) },
+ { "InactiveEnterTimestampMonotonic", "t", NULL, offsetof(struct unit_times, deactivated) },
+ {},
+ };
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(unit_times_freep) struct unit_times *unit_times = NULL;
+ struct boot_times *boot_times = NULL;
+ size_t allocated = 0, c = 0;
+ UnitInfo u;
+ int r;
+
+ r = acquire_boot_times(bus, &boot_times);
+ if (r < 0)
+ return r;
+
+ r = bus_call_method(bus, bus_systemd_mgr, "ListUnits", &error, &reply, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list units: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssssssouso)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = bus_parse_unit_info(reply, &u)) > 0) {
+ struct unit_times *t;
+
+ if (!GREEDY_REALLOC(unit_times, allocated, c + 2))
+ return log_oom();
+
+ unit_times[c + 1].has_data = false;
+ t = &unit_times[c];
+ t->name = NULL;
+
+ assert_cc(sizeof(usec_t) == sizeof(uint64_t));
+
+ r = bus_map_all_properties(
+ bus,
+ "org.freedesktop.systemd1",
+ u.unit_path,
+ property_map,
+ BUS_MAP_STRDUP,
+ &error,
+ NULL,
+ t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get timestamp properties of unit %s: %s",
+ u.id, bus_error_message(&error, r));
+
+ subtract_timestamp(&t->activating, boot_times->reverse_offset);
+ subtract_timestamp(&t->activated, boot_times->reverse_offset);
+ subtract_timestamp(&t->deactivating, boot_times->reverse_offset);
+ subtract_timestamp(&t->deactivated, boot_times->reverse_offset);
+
+ if (t->activated >= t->activating)
+ t->time = t->activated - t->activating;
+ else if (t->deactivated >= t->activating)
+ t->time = t->deactivated - t->activating;
+ else
+ t->time = 0;
+
+ if (t->activating == 0)
+ continue;
+
+ t->name = strdup(u.id);
+ if (!t->name)
+ return log_oom();
+
+ t->has_data = true;
+ c++;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ *out = TAKE_PTR(unit_times);
+ return c;
+}
+
+static int acquire_host_info(sd_bus *bus, struct host_info **hi) {
+ static const struct bus_properties_map hostname_map[] = {
+ { "Hostname", "s", NULL, offsetof(struct host_info, hostname) },
+ { "KernelName", "s", NULL, offsetof(struct host_info, kernel_name) },
+ { "KernelRelease", "s", NULL, offsetof(struct host_info, kernel_release) },
+ { "KernelVersion", "s", NULL, offsetof(struct host_info, kernel_version) },
+ { "OperatingSystemPrettyName", "s", NULL, offsetof(struct host_info, os_pretty_name) },
+ {}
+ };
+
+ static const struct bus_properties_map manager_map[] = {
+ { "Virtualization", "s", NULL, offsetof(struct host_info, virtualization) },
+ { "Architecture", "s", NULL, offsetof(struct host_info, architecture) },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *system_bus = NULL;
+ _cleanup_(free_host_infop) struct host_info *host;
+ int r;
+
+ host = new0(struct host_info, 1);
+ if (!host)
+ return log_oom();
+
+ if (arg_scope != UNIT_FILE_SYSTEM) {
+ r = bus_connect_transport(arg_transport, arg_host, false, &system_bus);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to connect to system bus, ignoring: %m");
+ goto manager;
+ }
+ }
+
+ r = bus_map_all_properties(
+ system_bus ?: bus,
+ "org.freedesktop.hostname1",
+ "/org/freedesktop/hostname1",
+ hostname_map,
+ BUS_MAP_STRDUP,
+ &error,
+ NULL,
+ host);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to get host information from systemd-hostnamed, ignoring: %s",
+ bus_error_message(&error, r));
+ sd_bus_error_free(&error);
+ }
+
+manager:
+ r = bus_map_all_properties(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ manager_map,
+ BUS_MAP_STRDUP,
+ &error,
+ NULL,
+ host);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get host information from systemd: %s",
+ bus_error_message(&error, r));
+
+ *hi = TAKE_PTR(host);
+ return 0;
+}
+
+static int pretty_boot_time(sd_bus *bus, char **_buf) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ struct boot_times *t;
+ static char buf[4096];
+ size_t size;
+ char *ptr;
+ int r;
+ usec_t activated_time = USEC_INFINITY;
+ _cleanup_free_ char *path = NULL, *unit_id = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ r = acquire_boot_times(bus, &t);
+ if (r < 0)
+ return r;
+
+ path = unit_dbus_path_from_name(SPECIAL_DEFAULT_TARGET);
+ if (!path)
+ return log_oom();
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "Id",
+ &error,
+ &unit_id);
+ if (r < 0) {
+ log_error_errno(r, "default.target doesn't seem to exist: %s", bus_error_message(&error, r));
+ unit_id = NULL;
+ }
+
+ r = bus_get_uint64_property(bus, path,
+ "org.freedesktop.systemd1.Unit",
+ "ActiveEnterTimestampMonotonic",
+ &activated_time);
+ if (r < 0) {
+ log_info_errno(r, "Could not get time to reach default.target, ignoring: %m");
+ activated_time = USEC_INFINITY;
+ }
+
+ ptr = buf;
+ size = sizeof(buf);
+
+ size = strpcpyf(&ptr, size, "Startup finished in ");
+ if (t->firmware_time > 0)
+ size = strpcpyf(&ptr, size, "%s (firmware) + ", format_timespan(ts, sizeof(ts), t->firmware_time - t->loader_time, USEC_PER_MSEC));
+ if (t->loader_time > 0)
+ size = strpcpyf(&ptr, size, "%s (loader) + ", format_timespan(ts, sizeof(ts), t->loader_time, USEC_PER_MSEC));
+ if (t->kernel_done_time > 0)
+ size = strpcpyf(&ptr, size, "%s (kernel) + ", format_timespan(ts, sizeof(ts), t->kernel_done_time, USEC_PER_MSEC));
+ if (t->initrd_time > 0)
+ size = strpcpyf(&ptr, size, "%s (initrd) + ", format_timespan(ts, sizeof(ts), t->userspace_time - t->initrd_time, USEC_PER_MSEC));
+
+ size = strpcpyf(&ptr, size, "%s (userspace) ", format_timespan(ts, sizeof(ts), t->finish_time - t->userspace_time, USEC_PER_MSEC));
+ if (t->kernel_done_time > 0)
+ strpcpyf(&ptr, size, "= %s ", format_timespan(ts, sizeof(ts), t->firmware_time + t->finish_time, USEC_PER_MSEC));
+
+ if (unit_id && timestamp_is_set(activated_time)) {
+ usec_t base = t->userspace_time > 0 ? t->userspace_time : t->reverse_offset;
+
+ size = strpcpyf(&ptr, size, "\n%s reached after %s in userspace", unit_id,
+ format_timespan(ts, sizeof(ts), activated_time - base, USEC_PER_MSEC));
+ } else if (unit_id && activated_time == 0)
+ size = strpcpyf(&ptr, size, "\n%s was never reached", unit_id);
+ else if (unit_id && activated_time == USEC_INFINITY)
+ size = strpcpyf(&ptr, size, "\nCould not get time to reach %s.", unit_id);
+ else if (!unit_id)
+ size = strpcpyf(&ptr, size, "\ncould not find default.target");
+
+ ptr = strdup(buf);
+ if (!ptr)
+ return log_oom();
+
+ *_buf = ptr;
+ return 0;
+}
+
+static void svg_graph_box(double height, double begin, double end) {
+ long long i;
+
+ /* outside box, fill */
+ svg("<rect class=\"box\" x=\"0\" y=\"0\" width=\"%.03f\" height=\"%.03f\" />\n",
+ SCALE_X * (end - begin),
+ SCALE_Y * height);
+
+ for (i = ((long long) (begin / 100000)) * 100000; i <= end; i += 100000) {
+ /* lines for each second */
+ if (i % 5000000 == 0)
+ svg(" <line class=\"sec5\" x1=\"%.03f\" y1=\"0\" x2=\"%.03f\" y2=\"%.03f\" />\n"
+ " <text class=\"sec\" x=\"%.03f\" y=\"%.03f\" >%.01fs</text>\n",
+ SCALE_X * i,
+ SCALE_X * i,
+ SCALE_Y * height,
+ SCALE_X * i,
+ -5.0,
+ 0.000001 * i);
+ else if (i % 1000000 == 0)
+ svg(" <line class=\"sec1\" x1=\"%.03f\" y1=\"0\" x2=\"%.03f\" y2=\"%.03f\" />\n"
+ " <text class=\"sec\" x=\"%.03f\" y=\"%.03f\" >%.01fs</text>\n",
+ SCALE_X * i,
+ SCALE_X * i,
+ SCALE_Y * height,
+ SCALE_X * i,
+ -5.0,
+ 0.000001 * i);
+ else
+ svg(" <line class=\"sec01\" x1=\"%.03f\" y1=\"0\" x2=\"%.03f\" y2=\"%.03f\" />\n",
+ SCALE_X * i,
+ SCALE_X * i,
+ SCALE_Y * height);
+ }
+}
+
+static int plot_unit_times(struct unit_times *u, double width, int y) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ bool b;
+
+ if (!u->name)
+ return 0;
+
+ svg_bar("activating", u->activating, u->activated, y);
+ svg_bar("active", u->activated, u->deactivating, y);
+ svg_bar("deactivating", u->deactivating, u->deactivated, y);
+
+ /* place the text on the left if we have passed the half of the svg width */
+ b = u->activating * SCALE_X < width / 2;
+ if (u->time)
+ svg_text(b, u->activating, y, "%s (%s)",
+ u->name, format_timespan(ts, sizeof(ts), u->time, USEC_PER_MSEC));
+ else
+ svg_text(b, u->activating, y, "%s", u->name);
+
+ return 1;
+}
+
+static int analyze_plot(int argc, char *argv[], void *userdata) {
+ _cleanup_(free_host_infop) struct host_info *host = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(unit_times_freep) struct unit_times *times = NULL;
+ _cleanup_free_ char *pretty_times = NULL;
+ bool use_full_bus = arg_scope == UNIT_FILE_SYSTEM;
+ struct boot_times *boot;
+ struct unit_times *u;
+ int n, m = 1, y = 0, r;
+ double width;
+
+ r = acquire_bus(&bus, &use_full_bus);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ n = acquire_boot_times(bus, &boot);
+ if (n < 0)
+ return n;
+
+ n = pretty_boot_time(bus, &pretty_times);
+ if (n < 0)
+ return n;
+
+ if (use_full_bus || arg_scope != UNIT_FILE_SYSTEM) {
+ n = acquire_host_info(bus, &host);
+ if (n < 0)
+ return n;
+ }
+
+ n = acquire_time_data(bus, &times);
+ if (n <= 0)
+ return n;
+
+ typesafe_qsort(times, n, compare_unit_start);
+
+ width = SCALE_X * (boot->firmware_time + boot->finish_time);
+ if (width < 800.0)
+ width = 800.0;
+
+ if (boot->firmware_time > boot->loader_time)
+ m++;
+ if (boot->loader_time > 0) {
+ m++;
+ if (width < 1000.0)
+ width = 1000.0;
+ }
+ if (boot->initrd_time > 0)
+ m++;
+ if (boot->kernel_done_time > 0)
+ m++;
+
+ for (u = times; u->has_data; u++) {
+ double text_start, text_width;
+
+ if (u->activating > boot->finish_time) {
+ u->name = mfree(u->name);
+ continue;
+ }
+
+ /* If the text cannot fit on the left side then
+ * increase the svg width so it fits on the right.
+ * TODO: calculate the text width more accurately */
+ text_width = 8.0 * strlen(u->name);
+ text_start = (boot->firmware_time + u->activating) * SCALE_X;
+ if (text_width > text_start && text_width + text_start > width)
+ width = text_width + text_start;
+
+ if (u->deactivated > u->activating &&
+ u->deactivated <= boot->finish_time &&
+ u->activated == 0 && u->deactivating == 0)
+ u->activated = u->deactivating = u->deactivated;
+ if (u->activated < u->activating || u->activated > boot->finish_time)
+ u->activated = boot->finish_time;
+ if (u->deactivating < u->activated || u->deactivating > boot->finish_time)
+ u->deactivating = boot->finish_time;
+ if (u->deactivated < u->deactivating || u->deactivated > boot->finish_time)
+ u->deactivated = boot->finish_time;
+ m++;
+ }
+
+ svg("<?xml version=\"1.0\" standalone=\"no\"?>\n"
+ "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\" "
+ "\"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n");
+
+ svg("<svg width=\"%.0fpx\" height=\"%.0fpx\" version=\"1.1\" "
+ "xmlns=\"http://www.w3.org/2000/svg\">\n\n",
+ 80.0 + width, 150.0 + (m * SCALE_Y) +
+ 5 * SCALE_Y /* legend */);
+
+ /* write some basic info as a comment, including some help */
+ svg("<!-- This file is a systemd-analyze SVG file. It is best rendered in a -->\n"
+ "<!-- browser such as Chrome, Chromium or Firefox. Other applications -->\n"
+ "<!-- that render these files properly but much slower are ImageMagick, -->\n"
+ "<!-- gimp, inkscape, etc. To display the files on your system, just -->\n"
+ "<!-- point your browser to this file. -->\n\n"
+ "<!-- This plot was generated by systemd-analyze version %-16.16s -->\n\n", GIT_VERSION);
+
+ /* style sheet */
+ svg("<defs>\n <style type=\"text/css\">\n <![CDATA[\n"
+ " rect { stroke-width: 1; stroke-opacity: 0; }\n"
+ " rect.background { fill: rgb(255,255,255); }\n"
+ " rect.activating { fill: rgb(255,0,0); fill-opacity: 0.7; }\n"
+ " rect.active { fill: rgb(200,150,150); fill-opacity: 0.7; }\n"
+ " rect.deactivating { fill: rgb(150,100,100); fill-opacity: 0.7; }\n"
+ " rect.kernel { fill: rgb(150,150,150); fill-opacity: 0.7; }\n"
+ " rect.initrd { fill: rgb(150,150,150); fill-opacity: 0.7; }\n"
+ " rect.firmware { fill: rgb(150,150,150); fill-opacity: 0.7; }\n"
+ " rect.loader { fill: rgb(150,150,150); fill-opacity: 0.7; }\n"
+ " rect.userspace { fill: rgb(150,150,150); fill-opacity: 0.7; }\n"
+ " rect.security { fill: rgb(144,238,144); fill-opacity: 0.7; }\n"
+ " rect.generators { fill: rgb(102,204,255); fill-opacity: 0.7; }\n"
+ " rect.unitsload { fill: rgb( 82,184,255); fill-opacity: 0.7; }\n"
+ " rect.box { fill: rgb(240,240,240); stroke: rgb(192,192,192); }\n"
+ " line { stroke: rgb(64,64,64); stroke-width: 1; }\n"
+ "// line.sec1 { }\n"
+ " line.sec5 { stroke-width: 2; }\n"
+ " line.sec01 { stroke: rgb(224,224,224); stroke-width: 1; }\n"
+ " text { font-family: Verdana, Helvetica; font-size: 14px; }\n"
+ " text.left { font-family: Verdana, Helvetica; font-size: 14px; text-anchor: start; }\n"
+ " text.right { font-family: Verdana, Helvetica; font-size: 14px; text-anchor: end; }\n"
+ " text.sec { font-size: 10px; }\n"
+ " ]]>\n </style>\n</defs>\n\n");
+
+ svg("<rect class=\"background\" width=\"100%%\" height=\"100%%\" />\n");
+ svg("<text x=\"20\" y=\"50\">%s</text>", pretty_times);
+ if (host)
+ svg("<text x=\"20\" y=\"30\">%s %s (%s %s %s) %s %s</text>",
+ isempty(host->os_pretty_name) ? "Linux" : host->os_pretty_name,
+ strempty(host->hostname),
+ strempty(host->kernel_name),
+ strempty(host->kernel_release),
+ strempty(host->kernel_version),
+ strempty(host->architecture),
+ strempty(host->virtualization));
+
+ svg("<g transform=\"translate(%.3f,100)\">\n", 20.0 + (SCALE_X * boot->firmware_time));
+ svg_graph_box(m, -(double) boot->firmware_time, boot->finish_time);
+
+ if (boot->firmware_time > 0) {
+ svg_bar("firmware", -(double) boot->firmware_time, -(double) boot->loader_time, y);
+ svg_text(true, -(double) boot->firmware_time, y, "firmware");
+ y++;
+ }
+ if (boot->loader_time > 0) {
+ svg_bar("loader", -(double) boot->loader_time, 0, y);
+ svg_text(true, -(double) boot->loader_time, y, "loader");
+ y++;
+ }
+ if (boot->kernel_done_time > 0) {
+ svg_bar("kernel", 0, boot->kernel_done_time, y);
+ svg_text(true, 0, y, "kernel");
+ y++;
+ }
+ if (boot->initrd_time > 0) {
+ svg_bar("initrd", boot->initrd_time, boot->userspace_time, y);
+ if (boot->initrd_security_start_time < boot->initrd_security_finish_time)
+ svg_bar("security", boot->initrd_security_start_time, boot->initrd_security_finish_time, y);
+ if (boot->initrd_generators_start_time < boot->initrd_generators_finish_time)
+ svg_bar("generators", boot->initrd_generators_start_time, boot->initrd_generators_finish_time, y);
+ if (boot->initrd_unitsload_start_time < boot->initrd_unitsload_finish_time)
+ svg_bar("unitsload", boot->initrd_unitsload_start_time, boot->initrd_unitsload_finish_time, y);
+ svg_text(true, boot->initrd_time, y, "initrd");
+ y++;
+ }
+
+ for (u = times; u->has_data; u++) {
+ if (u->activating >= boot->userspace_time)
+ break;
+
+ y += plot_unit_times(u, width, y);
+ }
+
+ svg_bar("active", boot->userspace_time, boot->finish_time, y);
+ if (boot->security_start_time > 0)
+ svg_bar("security", boot->security_start_time, boot->security_finish_time, y);
+ svg_bar("generators", boot->generators_start_time, boot->generators_finish_time, y);
+ svg_bar("unitsload", boot->unitsload_start_time, boot->unitsload_finish_time, y);
+ svg_text(true, boot->userspace_time, y, "systemd");
+ y++;
+
+ for (; u->has_data; u++)
+ y += plot_unit_times(u, width, y);
+
+ svg("</g>\n");
+
+ /* Legend */
+ svg("<g transform=\"translate(20,100)\">\n");
+ y++;
+ svg_bar("activating", 0, 300000, y);
+ svg_text(true, 400000, y, "Activating");
+ y++;
+ svg_bar("active", 0, 300000, y);
+ svg_text(true, 400000, y, "Active");
+ y++;
+ svg_bar("deactivating", 0, 300000, y);
+ svg_text(true, 400000, y, "Deactivating");
+ y++;
+ if (boot->security_start_time > 0) {
+ svg_bar("security", 0, 300000, y);
+ svg_text(true, 400000, y, "Setting up security module");
+ y++;
+ }
+ svg_bar("generators", 0, 300000, y);
+ svg_text(true, 400000, y, "Generators");
+ y++;
+ svg_bar("unitsload", 0, 300000, y);
+ svg_text(true, 400000, y, "Loading unit files");
+ y++;
+
+ svg("</g>\n\n");
+
+ svg("</svg>\n");
+
+ return 0;
+}
+
+static int list_dependencies_print(
+ const char *name,
+ unsigned level,
+ unsigned branches,
+ bool last,
+ struct unit_times *times,
+ struct boot_times *boot) {
+
+ unsigned i;
+ char ts[FORMAT_TIMESPAN_MAX], ts2[FORMAT_TIMESPAN_MAX];
+
+ for (i = level; i != 0; i--)
+ printf("%s", special_glyph(branches & (1 << (i-1)) ? SPECIAL_GLYPH_TREE_VERTICAL : SPECIAL_GLYPH_TREE_SPACE));
+
+ printf("%s", special_glyph(last ? SPECIAL_GLYPH_TREE_RIGHT : SPECIAL_GLYPH_TREE_BRANCH));
+
+ if (times) {
+ if (times->time > 0)
+ printf("%s%s @%s +%s%s", ansi_highlight_red(), name,
+ format_timespan(ts, sizeof(ts), times->activating - boot->userspace_time, USEC_PER_MSEC),
+ format_timespan(ts2, sizeof(ts2), times->time, USEC_PER_MSEC), ansi_normal());
+ else if (times->activated > boot->userspace_time)
+ printf("%s @%s", name, format_timespan(ts, sizeof(ts), times->activated - boot->userspace_time, USEC_PER_MSEC));
+ else
+ printf("%s", name);
+ } else
+ printf("%s", name);
+ printf("\n");
+
+ return 0;
+}
+
+static int list_dependencies_get_dependencies(sd_bus *bus, const char *name, char ***deps) {
+ _cleanup_free_ char *path = NULL;
+
+ assert(bus);
+ assert(name);
+ assert(deps);
+
+ path = unit_dbus_path_from_name(name);
+ if (!path)
+ return -ENOMEM;
+
+ return bus_get_unit_property_strv(bus, path, "After", deps);
+}
+
+static Hashmap *unit_times_hashmap;
+
+static int list_dependencies_compare(char *const *a, char *const *b) {
+ usec_t usa = 0, usb = 0;
+ struct unit_times *times;
+
+ times = hashmap_get(unit_times_hashmap, *a);
+ if (times)
+ usa = times->activated;
+ times = hashmap_get(unit_times_hashmap, *b);
+ if (times)
+ usb = times->activated;
+
+ return CMP(usb, usa);
+}
+
+static bool times_in_range(const struct unit_times *times, const struct boot_times *boot) {
+ return times && times->activated > 0 && times->activated <= boot->finish_time;
+}
+
+static int list_dependencies_one(sd_bus *bus, const char *name, unsigned level, char ***units, unsigned branches) {
+ _cleanup_strv_free_ char **deps = NULL;
+ char **c;
+ int r;
+ usec_t service_longest = 0;
+ int to_print = 0;
+ struct unit_times *times;
+ struct boot_times *boot;
+
+ if (strv_extend(units, name))
+ return log_oom();
+
+ r = list_dependencies_get_dependencies(bus, name, &deps);
+ if (r < 0)
+ return r;
+
+ typesafe_qsort(deps, strv_length(deps), list_dependencies_compare);
+
+ r = acquire_boot_times(bus, &boot);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(c, deps) {
+ times = hashmap_get(unit_times_hashmap, *c);
+ if (times_in_range(times, boot) && times->activated >= service_longest)
+ service_longest = times->activated;
+ }
+
+ if (service_longest == 0)
+ return r;
+
+ STRV_FOREACH(c, deps) {
+ times = hashmap_get(unit_times_hashmap, *c);
+ if (times_in_range(times, boot) && service_longest - times->activated <= arg_fuzz)
+ to_print++;
+ }
+
+ if (!to_print)
+ return r;
+
+ STRV_FOREACH(c, deps) {
+ times = hashmap_get(unit_times_hashmap, *c);
+ if (!times_in_range(times, boot) || service_longest - times->activated > arg_fuzz)
+ continue;
+
+ to_print--;
+
+ r = list_dependencies_print(*c, level, branches, to_print == 0, times, boot);
+ if (r < 0)
+ return r;
+
+ if (strv_contains(*units, *c)) {
+ r = list_dependencies_print("...", level + 1, (branches << 1) | (to_print ? 1 : 0),
+ true, NULL, boot);
+ if (r < 0)
+ return r;
+ continue;
+ }
+
+ r = list_dependencies_one(bus, *c, level + 1, units, (branches << 1) | (to_print ? 1 : 0));
+ if (r < 0)
+ return r;
+
+ if (to_print == 0)
+ break;
+ }
+ return 0;
+}
+
+static int list_dependencies(sd_bus *bus, const char *name) {
+ _cleanup_strv_free_ char **units = NULL;
+ char ts[FORMAT_TIMESPAN_MAX];
+ struct unit_times *times;
+ int r;
+ const char *id;
+ _cleanup_free_ char *path = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ struct boot_times *boot;
+
+ assert(bus);
+
+ path = unit_dbus_path_from_name(name);
+ if (!path)
+ return -ENOMEM;
+
+ r = sd_bus_get_property(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "Id",
+ &error,
+ &reply,
+ "s");
+ if (r < 0)
+ return log_error_errno(r, "Failed to get ID: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "s", &id);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ times = hashmap_get(unit_times_hashmap, id);
+
+ r = acquire_boot_times(bus, &boot);
+ if (r < 0)
+ return r;
+
+ if (times) {
+ if (times->time)
+ printf("%s%s +%s%s\n", ansi_highlight_red(), id,
+ format_timespan(ts, sizeof(ts), times->time, USEC_PER_MSEC), ansi_normal());
+ else if (times->activated > boot->userspace_time)
+ printf("%s @%s\n", id, format_timespan(ts, sizeof(ts), times->activated - boot->userspace_time, USEC_PER_MSEC));
+ else
+ printf("%s\n", id);
+ }
+
+ return list_dependencies_one(bus, name, 0, &units, 0);
+}
+
+static int analyze_critical_chain(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(unit_times_freep) struct unit_times *times = NULL;
+ struct unit_times *u;
+ Hashmap *h;
+ int n, r;
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ n = acquire_time_data(bus, &times);
+ if (n <= 0)
+ return n;
+
+ h = hashmap_new(&string_hash_ops);
+ if (!h)
+ return log_oom();
+
+ for (u = times; u->has_data; u++) {
+ r = hashmap_put(h, u->name, u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add entry to hashmap: %m");
+ }
+ unit_times_hashmap = h;
+
+ (void) pager_open(arg_pager_flags);
+
+ puts("The time when unit became active or started is printed after the \"@\" character.\n"
+ "The time the unit took to start is printed after the \"+\" character.\n");
+
+ if (argc > 1) {
+ char **name;
+ STRV_FOREACH(name, strv_skip(argv, 1))
+ list_dependencies(bus, *name);
+ } else
+ list_dependencies(bus, SPECIAL_DEFAULT_TARGET);
+
+ h = hashmap_free(h);
+ return 0;
+}
+
+static int analyze_blame(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(unit_times_freep) struct unit_times *times = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ struct unit_times *u;
+ TableCell *cell;
+ int n, r;
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ n = acquire_time_data(bus, &times);
+ if (n <= 0)
+ return n;
+
+ table = table_new("time", "unit");
+ if (!table)
+ return log_oom();
+
+ table_set_header(table, false);
+
+ assert_se(cell = table_get_cell(table, 0, 0));
+ r = table_set_ellipsize_percent(table, cell, 100);
+ if (r < 0)
+ return r;
+
+ r = table_set_align_percent(table, cell, 100);
+ if (r < 0)
+ return r;
+
+ assert_se(cell = table_get_cell(table, 0, 1));
+ r = table_set_ellipsize_percent(table, cell, 100);
+ if (r < 0)
+ return r;
+
+ r = table_set_sort(table, (size_t) 0, (size_t) SIZE_MAX);
+ if (r < 0)
+ return r;
+
+ r = table_set_reverse(table, 0, true);
+ if (r < 0)
+ return r;
+
+ for (u = times; u->has_data; u++) {
+ if (u->time <= 0)
+ continue;
+
+ r = table_add_many(table,
+ TABLE_TIMESPAN_MSEC, u->time,
+ TABLE_STRING, u->name);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ (void) pager_open(arg_pager_flags);
+
+ return table_print(table, NULL);
+}
+
+static int analyze_time(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *buf = NULL;
+ int r;
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ r = pretty_boot_time(bus, &buf);
+ if (r < 0)
+ return r;
+
+ puts(buf);
+ return 0;
+}
+
+static int graph_one_property(
+ sd_bus *bus,
+ const UnitInfo *u,
+ const char *prop,
+ const char *color,
+ char *patterns[],
+ char *from_patterns[],
+ char *to_patterns[]) {
+
+ _cleanup_strv_free_ char **units = NULL;
+ char **unit;
+ int r;
+ bool match_patterns;
+
+ assert(u);
+ assert(prop);
+ assert(color);
+
+ match_patterns = strv_fnmatch(patterns, u->id);
+
+ if (!strv_isempty(from_patterns) && !match_patterns && !strv_fnmatch(from_patterns, u->id))
+ return 0;
+
+ r = bus_get_unit_property_strv(bus, u->unit_path, prop, &units);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(unit, units) {
+ bool match_patterns2;
+
+ match_patterns2 = strv_fnmatch(patterns, *unit);
+
+ if (!strv_isempty(to_patterns) && !match_patterns2 && !strv_fnmatch(to_patterns, *unit))
+ continue;
+
+ if (!strv_isempty(patterns) && !match_patterns && !match_patterns2)
+ continue;
+
+ printf("\t\"%s\"->\"%s\" [color=\"%s\"];\n", u->id, *unit, color);
+ }
+
+ return 0;
+}
+
+static int graph_one(sd_bus *bus, const UnitInfo *u, char *patterns[], char *from_patterns[], char *to_patterns[]) {
+ int r;
+
+ assert(bus);
+ assert(u);
+
+ if (IN_SET(arg_dot, DEP_ORDER, DEP_ALL)) {
+ r = graph_one_property(bus, u, "After", "green", patterns, from_patterns, to_patterns);
+ if (r < 0)
+ return r;
+ }
+
+ if (IN_SET(arg_dot, DEP_REQUIRE, DEP_ALL)) {
+ r = graph_one_property(bus, u, "Requires", "black", patterns, from_patterns, to_patterns);
+ if (r < 0)
+ return r;
+ r = graph_one_property(bus, u, "Requisite", "darkblue", patterns, from_patterns, to_patterns);
+ if (r < 0)
+ return r;
+ r = graph_one_property(bus, u, "Wants", "grey66", patterns, from_patterns, to_patterns);
+ if (r < 0)
+ return r;
+ r = graph_one_property(bus, u, "Conflicts", "red", patterns, from_patterns, to_patterns);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int expand_patterns(sd_bus *bus, char **patterns, char ***ret) {
+ _cleanup_strv_free_ char **expanded_patterns = NULL;
+ char **pattern;
+ int r;
+
+ STRV_FOREACH(pattern, patterns) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *unit = NULL, *unit_id = NULL;
+
+ if (strv_extend(&expanded_patterns, *pattern) < 0)
+ return log_oom();
+
+ if (string_is_glob(*pattern))
+ continue;
+
+ unit = unit_dbus_path_from_name(*pattern);
+ if (!unit)
+ return log_oom();
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ unit,
+ "org.freedesktop.systemd1.Unit",
+ "Id",
+ &error,
+ &unit_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get ID: %s", bus_error_message(&error, r));
+
+ if (!streq(*pattern, unit_id)) {
+ if (strv_extend(&expanded_patterns, unit_id) < 0)
+ return log_oom();
+ }
+ }
+
+ *ret = TAKE_PTR(expanded_patterns); /* do not free */
+
+ return 0;
+}
+
+static int dot(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_strv_free_ char **expanded_patterns = NULL;
+ _cleanup_strv_free_ char **expanded_from_patterns = NULL;
+ _cleanup_strv_free_ char **expanded_to_patterns = NULL;
+ int r;
+ UnitInfo u;
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ r = expand_patterns(bus, strv_skip(argv, 1), &expanded_patterns);
+ if (r < 0)
+ return r;
+
+ r = expand_patterns(bus, arg_dot_from_patterns, &expanded_from_patterns);
+ if (r < 0)
+ return r;
+
+ r = expand_patterns(bus, arg_dot_to_patterns, &expanded_to_patterns);
+ if (r < 0)
+ return r;
+
+ r = bus_call_method(bus, bus_systemd_mgr, "ListUnits", &error, &reply, "");
+ if (r < 0)
+ log_error_errno(r, "Failed to list units: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssssssouso)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ printf("digraph systemd {\n");
+
+ while ((r = bus_parse_unit_info(reply, &u)) > 0) {
+
+ r = graph_one(bus, &u, expanded_patterns, expanded_from_patterns, expanded_to_patterns);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ printf("}\n");
+
+ log_info(" Color legend: black = Requires\n"
+ " dark blue = Requisite\n"
+ " dark grey = Wants\n"
+ " red = Conflicts\n"
+ " green = After\n");
+
+ if (on_tty())
+ log_notice("-- You probably want to process this output with graphviz' dot tool.\n"
+ "-- Try a shell pipeline like 'systemd-analyze dot | dot -Tsvg > systemd.svg'!\n");
+
+ return 0;
+}
+
+static int dump_fallback(sd_bus *bus) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *text = NULL;
+ int r;
+
+ assert(bus);
+
+ r = bus_call_method(bus, bus_systemd_mgr, "Dump", &error, &reply, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to issue method call Dump: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "s", &text);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ fputs(text, stdout);
+ return 0;
+}
+
+static int dump(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int fd = -1;
+ int r;
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ (void) pager_open(arg_pager_flags);
+
+ if (!sd_bus_can_send(bus, SD_BUS_TYPE_UNIX_FD))
+ return dump_fallback(bus);
+
+ r = bus_call_method(bus, bus_systemd_mgr, "DumpByFileDescriptor", &error, &reply, NULL);
+ if (r < 0) {
+ /* fall back to Dump if DumpByFileDescriptor is not supported */
+ if (!IN_SET(r, -EACCES, -EBADR))
+ return log_error_errno(r, "Failed to issue method call DumpByFileDescriptor: %s",
+ bus_error_message(&error, r));
+
+ return dump_fallback(bus);
+ }
+
+ r = sd_bus_message_read(reply, "h", &fd);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ fflush(stdout);
+ return copy_bytes(fd, STDOUT_FILENO, (uint64_t) -1, 0);
+}
+
+static int cat_config(int argc, char *argv[], void *userdata) {
+ char **arg, **list;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ list = strv_skip(argv, 1);
+ STRV_FOREACH(arg, list) {
+ const char *t = NULL;
+
+ if (arg != list)
+ print_separator();
+
+ if (path_is_absolute(*arg)) {
+ const char *dir;
+
+ NULSTR_FOREACH(dir, CONF_PATHS_NULSTR("")) {
+ t = path_startswith(*arg, dir);
+ if (t)
+ break;
+ }
+
+ if (!t)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path %s does not start with any known prefix.", *arg);
+ } else
+ t = *arg;
+
+ r = conf_files_cat(arg_root, t);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int set_log_level(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(argc == 2);
+ assert(argv);
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ r = bus_set_property(bus, bus_systemd_mgr, "LogLevel", &error, "s", argv[1]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to issue method call: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int get_log_level(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *level = NULL;
+ int r;
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ r = bus_get_property_string(bus, bus_systemd_mgr, "LogLevel", &error, &level);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get log level: %s", bus_error_message(&error, r));
+
+ puts(level);
+ return 0;
+}
+
+static int get_or_set_log_level(int argc, char *argv[], void *userdata) {
+ return (argc == 1) ? get_log_level(argc, argv, userdata) : set_log_level(argc, argv, userdata);
+}
+
+static int set_log_target(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(argc == 2);
+ assert(argv);
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ r = bus_set_property(bus, bus_systemd_mgr, "LogTarget", &error, "s", argv[1]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to issue method call: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int get_log_target(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *target = NULL;
+ int r;
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ r = bus_get_property_string(bus, bus_systemd_mgr, "LogTarget", &error, &target);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get log target: %s", bus_error_message(&error, r));
+
+ puts(target);
+ return 0;
+}
+
+static int get_or_set_log_target(int argc, char *argv[], void *userdata) {
+ return (argc == 1) ? get_log_target(argc, argv, userdata) : set_log_target(argc, argv, userdata);
+}
+
+static bool strv_fnmatch_strv_or_empty(char* const* patterns, char **strv, int flags) {
+ char **s;
+ STRV_FOREACH(s, strv)
+ if (strv_fnmatch_or_empty(patterns, *s, flags))
+ return true;
+
+ return false;
+}
+
+static int do_unit_files(int argc, char *argv[], void *userdata) {
+ _cleanup_(lookup_paths_free) LookupPaths lp = {};
+ _cleanup_hashmap_free_ Hashmap *unit_ids = NULL;
+ _cleanup_hashmap_free_ Hashmap *unit_names = NULL;
+ char **patterns = strv_skip(argv, 1);
+ const char *k, *dst;
+ char **v;
+ int r;
+
+ r = lookup_paths_init(&lp, arg_scope, 0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "lookup_paths_init() failed: %m");
+
+ r = unit_file_build_name_map(&lp, NULL, &unit_ids, &unit_names, NULL);
+ if (r < 0)
+ return log_error_errno(r, "unit_file_build_name_map() failed: %m");
+
+ HASHMAP_FOREACH_KEY(dst, k, unit_ids) {
+ if (!strv_fnmatch_or_empty(patterns, k, FNM_NOESCAPE) &&
+ !strv_fnmatch_or_empty(patterns, dst, FNM_NOESCAPE))
+ continue;
+
+ printf("ids: %s → %s\n", k, dst);
+ }
+
+ HASHMAP_FOREACH_KEY(v, k, unit_names) {
+ if (!strv_fnmatch_or_empty(patterns, k, FNM_NOESCAPE) &&
+ !strv_fnmatch_strv_or_empty(patterns, v, FNM_NOESCAPE))
+ continue;
+
+ _cleanup_free_ char *j = strv_join(v, ", ");
+ printf("aliases: %s ← %s\n", k, j);
+ }
+
+ return 0;
+}
+
+static int dump_unit_paths(int argc, char *argv[], void *userdata) {
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ int r;
+ char **p;
+
+ r = lookup_paths_init(&paths, arg_scope, 0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "lookup_paths_init() failed: %m");
+
+ STRV_FOREACH(p, paths.search_path)
+ puts(*p);
+
+ return 0;
+}
+
+static int dump_exit_status(int argc, char *argv[], void *userdata) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ int r;
+
+ table = table_new("name", "status", "class");
+ if (!table)
+ return log_oom();
+
+ r = table_set_align_percent(table, table_get_cell(table, 0, 1), 100);
+ if (r < 0)
+ return log_error_errno(r, "Failed to right-align status: %m");
+
+ if (strv_isempty(strv_skip(argv, 1)))
+ for (size_t i = 0; i < ELEMENTSOF(exit_status_mappings); i++) {
+ if (!exit_status_mappings[i].name)
+ continue;
+
+ r = table_add_many(table,
+ TABLE_STRING, exit_status_mappings[i].name,
+ TABLE_INT, (int) i,
+ TABLE_STRING, exit_status_class(i));
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+ else
+ for (int i = 1; i < argc; i++) {
+ int status;
+
+ status = exit_status_from_string(argv[i]);
+ if (status < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid exit status \"%s\".", argv[i]);
+
+ assert(status >= 0 && (size_t) status < ELEMENTSOF(exit_status_mappings));
+ r = table_add_many(table,
+ TABLE_STRING, exit_status_mappings[status].name ?: "-",
+ TABLE_INT, status,
+ TABLE_STRING, exit_status_class(status) ?: "-");
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ (void) pager_open(arg_pager_flags);
+
+ return table_print(table, NULL);
+}
+
+static int dump_capabilities(int argc, char *argv[], void *userdata) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ unsigned last_cap;
+ int r;
+
+ table = table_new("name", "number");
+ if (!table)
+ return log_oom();
+
+ (void) table_set_align_percent(table, table_get_cell(table, 0, 1), 100);
+
+ /* Determine the maximum of the last cap known by the kernel and by us */
+ last_cap = MAX((unsigned) CAP_LAST_CAP, cap_last_cap());
+
+ if (strv_isempty(strv_skip(argv, 1)))
+ for (unsigned c = 0; c <= last_cap; c++) {
+ r = table_add_many(table,
+ TABLE_STRING, capability_to_name(c) ?: "cap_???",
+ TABLE_UINT, c);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+ else {
+ for (int i = 1; i < argc; i++) {
+ int c;
+
+ c = capability_from_name(argv[i]);
+ if (c < 0 || (unsigned) c > last_cap)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Capability \"%s\" not known.", argv[i]);
+
+ r = table_add_many(table,
+ TABLE_STRING, capability_to_name(c) ?: "cap_???",
+ TABLE_UINT, (unsigned) c);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ (void) table_set_sort(table, (size_t) 1, (size_t) -1);
+ }
+
+ (void) pager_open(arg_pager_flags);
+
+ return table_print(table, NULL);
+}
+
+#if HAVE_SECCOMP
+
+static int load_kernel_syscalls(Set **ret) {
+ _cleanup_set_free_ Set *syscalls = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ /* Let's read the available system calls from the list of available tracing events. Slightly dirty,
+ * but good enough for analysis purposes. */
+
+ f = fopen("/sys/kernel/tracing/available_events", "re");
+ if (!f) {
+ /* We tried the non-debugfs mount point and that didn't work. If it wasn't mounted, maybe the
+ * old debugfs mount point works? */
+ f = fopen("/sys/kernel/debug/tracing/available_events", "re");
+ if (!f)
+ return log_full_errno(IN_SET(errno, EPERM, EACCES, ENOENT) ? LOG_DEBUG : LOG_WARNING, errno,
+ "Can't read open tracefs' available_events file: %m");
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *e;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read system call list: %m");
+ if (r == 0)
+ break;
+
+ e = startswith(line, "syscalls:sys_enter_");
+ if (!e)
+ continue;
+
+ /* These are named differently inside the kernel than their external name for historical
+ * reasons. Let's hide them here. */
+ if (STR_IN_SET(e, "newuname", "newfstat", "newstat", "newlstat", "sysctl"))
+ continue;
+
+ r = set_put_strdup(&syscalls, e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add system call to list: %m");
+ }
+
+ *ret = TAKE_PTR(syscalls);
+ return 0;
+}
+
+static void syscall_set_remove(Set *s, const SyscallFilterSet *set) {
+ const char *syscall;
+
+ NULSTR_FOREACH(syscall, set->value) {
+ if (syscall[0] == '@')
+ continue;
+
+ free(set_remove(s, syscall));
+ }
+}
+
+static void dump_syscall_filter(const SyscallFilterSet *set) {
+ const char *syscall;
+
+ printf("%s%s%s\n"
+ " # %s\n",
+ ansi_highlight(),
+ set->name,
+ ansi_normal(),
+ set->help);
+
+ NULSTR_FOREACH(syscall, set->value)
+ printf(" %s%s%s\n", syscall[0] == '@' ? ansi_underline() : "", syscall, ansi_normal());
+}
+
+static int dump_syscall_filters(int argc, char *argv[], void *userdata) {
+ bool first = true;
+
+ (void) pager_open(arg_pager_flags);
+
+ if (strv_isempty(strv_skip(argv, 1))) {
+ _cleanup_set_free_ Set *kernel = NULL, *known = NULL;
+ const char *sys;
+ int i, k;
+
+ NULSTR_FOREACH(sys, syscall_filter_sets[SYSCALL_FILTER_SET_KNOWN].value)
+ if (set_put_strdup(&known, sys) < 0)
+ return log_oom();
+
+ k = load_kernel_syscalls(&kernel);
+
+ for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++) {
+ const SyscallFilterSet *set = syscall_filter_sets + i;
+ if (!first)
+ puts("");
+
+ dump_syscall_filter(set);
+ syscall_set_remove(kernel, set);
+ if (i != SYSCALL_FILTER_SET_KNOWN)
+ syscall_set_remove(known, set);
+ first = false;
+ }
+
+ if (!set_isempty(known)) {
+ _cleanup_free_ char **l = NULL;
+ char **syscall;
+
+ printf("\n"
+ "# %sUngrouped System Calls%s (known but not included in any of the groups except @known):\n",
+ ansi_highlight(), ansi_normal());
+
+ l = set_get_strv(known);
+ if (!l)
+ return log_oom();
+
+ strv_sort(l);
+
+ STRV_FOREACH(syscall, l)
+ printf("# %s\n", *syscall);
+ }
+
+ if (k < 0) {
+ fputc('\n', stdout);
+ fflush(stdout);
+ log_notice_errno(k, "# Not showing unlisted system calls, couldn't retrieve kernel system call list: %m");
+ } else if (!set_isempty(kernel)) {
+ _cleanup_free_ char **l = NULL;
+ char **syscall;
+
+ printf("\n"
+ "# %sUnlisted System Calls%s (supported by the local kernel, but not included in any of the groups listed above):\n",
+ ansi_highlight(), ansi_normal());
+
+ l = set_get_strv(kernel);
+ if (!l)
+ return log_oom();
+
+ strv_sort(l);
+
+ STRV_FOREACH(syscall, l)
+ printf("# %s\n", *syscall);
+ }
+ } else {
+ char **name;
+
+ STRV_FOREACH(name, strv_skip(argv, 1)) {
+ const SyscallFilterSet *set;
+
+ if (!first)
+ puts("");
+
+ set = syscall_filter_set_find(*name);
+ if (!set) {
+ /* make sure the error appears below normal output */
+ fflush(stdout);
+
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT),
+ "Filter set \"%s\" not found.", *name);
+ }
+
+ dump_syscall_filter(set);
+ first = false;
+ }
+ }
+
+ return 0;
+}
+
+#else
+static int dump_syscall_filters(int argc, char *argv[], void *userdata) {
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Not compiled with syscall filters, sorry.");
+}
+#endif
+
+static void parsing_hint(const char *p, bool calendar, bool timestamp, bool timespan) {
+ if (calendar && calendar_spec_from_string(p, NULL) >= 0)
+ log_notice("Hint: this expression is a valid calendar specification. "
+ "Use 'systemd-analyze calendar \"%s\"' instead?", p);
+ if (timestamp && parse_timestamp(p, NULL) >= 0)
+ log_notice("Hint: this expression is a valid timestamp. "
+ "Use 'systemd-analyze timestamp \"%s\"' instead?", p);
+ if (timespan && parse_time(p, NULL, USEC_PER_SEC) >= 0)
+ log_notice("Hint: this expression is a valid timespan. "
+ "Use 'systemd-analyze timespan \"%s\"' instead?", p);
+}
+
+static int dump_timespan(int argc, char *argv[], void *userdata) {
+ char **input_timespan;
+
+ STRV_FOREACH(input_timespan, strv_skip(argv, 1)) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ usec_t output_usecs;
+ TableCell *cell;
+ int r;
+
+ r = parse_time(*input_timespan, &output_usecs, USEC_PER_SEC);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse time span '%s': %m", *input_timespan);
+ parsing_hint(*input_timespan, true, true, false);
+ return r;
+ }
+
+ table = table_new("name", "value");
+ if (!table)
+ return log_oom();
+
+ table_set_header(table, false);
+
+ assert_se(cell = table_get_cell(table, 0, 0));
+ r = table_set_ellipsize_percent(table, cell, 100);
+ if (r < 0)
+ return r;
+
+ r = table_set_align_percent(table, cell, 100);
+ if (r < 0)
+ return r;
+
+ assert_se(cell = table_get_cell(table, 0, 1));
+ r = table_set_ellipsize_percent(table, cell, 100);
+ if (r < 0)
+ return r;
+
+ r = table_add_many(table,
+ TABLE_STRING, "Original:",
+ TABLE_STRING, *input_timespan);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_cell_stringf(table, NULL, "%ss:", special_glyph(SPECIAL_GLYPH_MU));
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_many(table,
+ TABLE_UINT64, output_usecs,
+ TABLE_STRING, "Human:",
+ TABLE_TIMESPAN, output_usecs,
+ TABLE_SET_COLOR, ansi_highlight());
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_print(table, NULL);
+ if (r < 0)
+ return r;
+
+ if (input_timespan[1])
+ putchar('\n');
+ }
+
+ return EXIT_SUCCESS;
+}
+
+static int test_timestamp_one(const char *p) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ TableCell *cell;
+ usec_t usec;
+ int r;
+
+ r = parse_timestamp(p, &usec);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse \"%s\": %m", p);
+ parsing_hint(p, true, false, true);
+ return r;
+ }
+
+ table = table_new("name", "value");
+ if (!table)
+ return log_oom();
+
+ table_set_header(table, false);
+
+ assert_se(cell = table_get_cell(table, 0, 0));
+ r = table_set_ellipsize_percent(table, cell, 100);
+ if (r < 0)
+ return r;
+
+ r = table_set_align_percent(table, cell, 100);
+ if (r < 0)
+ return r;
+
+ assert_se(cell = table_get_cell(table, 0, 1));
+ r = table_set_ellipsize_percent(table, cell, 100);
+ if (r < 0)
+ return r;
+
+ r = table_add_many(table,
+ TABLE_STRING, "Original form:",
+ TABLE_STRING, p,
+ TABLE_STRING, "Normalized form:",
+ TABLE_TIMESTAMP, usec,
+ TABLE_SET_COLOR, ansi_highlight_blue());
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (!in_utc_timezone()) {
+ r = table_add_many(table,
+ TABLE_STRING, "(in UTC):",
+ TABLE_TIMESTAMP_UTC, usec);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = table_add_cell(table, NULL, TABLE_STRING, "UNIX seconds:");
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (usec % USEC_PER_SEC == 0)
+ r = table_add_cell_stringf(table, NULL, "@%"PRI_USEC,
+ usec / USEC_PER_SEC);
+ else
+ r = table_add_cell_stringf(table, NULL, "@%"PRI_USEC".%06"PRI_USEC"",
+ usec / USEC_PER_SEC,
+ usec % USEC_PER_SEC);
+ if (r < 0)
+ return r;
+
+ r = table_add_many(table,
+ TABLE_STRING, "From now:",
+ TABLE_TIMESTAMP_RELATIVE, usec);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ return table_print(table, NULL);
+}
+
+static int test_timestamp(int argc, char *argv[], void *userdata) {
+ int ret = 0, r;
+ char **p;
+
+ STRV_FOREACH(p, strv_skip(argv, 1)) {
+ r = test_timestamp_one(*p);
+ if (ret == 0 && r < 0)
+ ret = r;
+
+ if (*(p + 1))
+ putchar('\n');
+ }
+
+ return ret;
+}
+
+static int test_calendar_one(usec_t n, const char *p) {
+ _cleanup_(calendar_spec_freep) CalendarSpec *spec = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ _cleanup_free_ char *t = NULL;
+ TableCell *cell;
+ int r;
+
+ r = calendar_spec_from_string(p, &spec);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse calendar specification '%s': %m", p);
+ parsing_hint(p, false, true, true);
+ return r;
+ }
+
+ r = calendar_spec_to_string(spec, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format calendar specification '%s': %m", p);
+
+ table = table_new("name", "value");
+ if (!table)
+ return log_oom();
+
+ table_set_header(table, false);
+
+ assert_se(cell = table_get_cell(table, 0, 0));
+ r = table_set_ellipsize_percent(table, cell, 100);
+ if (r < 0)
+ return r;
+
+ r = table_set_align_percent(table, cell, 100);
+ if (r < 0)
+ return r;
+
+ assert_se(cell = table_get_cell(table, 0, 1));
+ r = table_set_ellipsize_percent(table, cell, 100);
+ if (r < 0)
+ return r;
+
+ if (!streq(t, p)) {
+ r = table_add_many(table,
+ TABLE_STRING, "Original form:",
+ TABLE_STRING, p);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = table_add_many(table,
+ TABLE_STRING, "Normalized form:",
+ TABLE_STRING, t);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ for (unsigned i = 0; i < arg_iterations; i++) {
+ usec_t next;
+
+ r = calendar_spec_next_usec(spec, n, &next);
+ if (r == -ENOENT) {
+ if (i == 0) {
+ r = table_add_many(table,
+ TABLE_STRING, "Next elapse:",
+ TABLE_STRING, "never",
+ TABLE_SET_COLOR, ansi_highlight_yellow());
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+ break;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine next elapse for '%s': %m", p);
+
+ if (i == 0) {
+ r = table_add_many(table,
+ TABLE_STRING, "Next elapse:",
+ TABLE_TIMESTAMP, next,
+ TABLE_SET_COLOR, ansi_highlight_blue());
+ if (r < 0)
+ return table_log_add_error(r);
+ } else {
+ int k = DECIMAL_STR_WIDTH(i + 1);
+
+ if (k < 8)
+ k = 8 - k;
+ else
+ k = 0;
+
+ r = table_add_cell_stringf(table, NULL, "Iter. #%u:", i+1);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_many(table,
+ TABLE_TIMESTAMP, next,
+ TABLE_SET_COLOR, ansi_highlight_blue());
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (!in_utc_timezone()) {
+ r = table_add_many(table,
+ TABLE_STRING, "(in UTC):",
+ TABLE_TIMESTAMP_UTC, next);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = table_add_many(table,
+ TABLE_STRING, "From now:",
+ TABLE_TIMESTAMP_RELATIVE, next);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ n = next;
+ }
+
+ return table_print(table, NULL);
+}
+
+static int test_calendar(int argc, char *argv[], void *userdata) {
+ int ret = 0, r;
+ char **p;
+ usec_t n;
+
+ if (arg_base_time != USEC_INFINITY)
+ n = arg_base_time;
+ else
+ n = now(CLOCK_REALTIME); /* We want to use the same "base" for all expressions */
+
+ STRV_FOREACH(p, strv_skip(argv, 1)) {
+ r = test_calendar_one(n, *p);
+ if (ret == 0 && r < 0)
+ ret = r;
+
+ if (*(p + 1))
+ putchar('\n');
+ }
+
+ return ret;
+}
+
+static int service_watchdogs(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int b, r;
+
+ assert(IN_SET(argc, 1, 2));
+ assert(argv);
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ if (argc == 1) {
+ /* get ServiceWatchdogs */
+ r = bus_get_property_trivial(bus, bus_systemd_mgr, "ServiceWatchdogs", &error, 'b', &b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get service-watchdog state: %s", bus_error_message(&error, r));
+
+ printf("%s\n", yes_no(!!b));
+
+ } else {
+ /* set ServiceWatchdogs */
+ b = parse_boolean(argv[1]);
+ if (b < 0)
+ return log_error_errno(b, "Failed to parse service-watchdogs argument: %m");
+
+ r = bus_set_property(bus, bus_systemd_mgr, "ServiceWatchdogs", &error, "b", b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set service-watchdog state: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int do_condition(int argc, char *argv[], void *userdata) {
+ return verify_conditions(strv_skip(argv, 1), arg_scope);
+}
+
+static int do_verify(int argc, char *argv[], void *userdata) {
+ return verify_units(strv_skip(argv, 1), arg_scope, arg_man, arg_generators);
+}
+
+static int do_security(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ r = acquire_bus(&bus, NULL);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ (void) pager_open(arg_pager_flags);
+
+ return analyze_security(bus, strv_skip(argv, 1), 0);
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *link = NULL, *dot_link = NULL;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = terminal_urlify_man("systemd-analyze", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ /* Not using terminal_urlify_man() for this, since we don't want the "man page" text suffix in this case. */
+ r = terminal_urlify("man:dot(1)", "dot(1)", &dot_link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND ...\n\n"
+ "%sProfile systemd, show unit dependencies, check unit files.%s\n"
+ "\nCommands:\n"
+ " [time] Print time required to boot the machine\n"
+ " blame Print list of running units ordered by time to init\n"
+ " critical-chain [UNIT...] Print a tree of the time critical chain of units\n"
+ " plot Output SVG graphic showing service initialization\n"
+ " dot [UNIT...] Output dependency graph in %s format\n"
+ " dump Output state serialization of service manager\n"
+ " cat-config Show configuration file and drop-ins\n"
+ " unit-files List files and symlinks for units\n"
+ " unit-paths List load directories for units\n"
+ " exit-status [STATUS...] List exit status definitions\n"
+ " capability [CAP...] List capability definitions\n"
+ " syscall-filter [NAME...] Print list of syscalls in seccomp filter\n"
+ " condition CONDITION... Evaluate conditions and asserts\n"
+ " verify FILE... Check unit files for correctness\n"
+ " calendar SPEC... Validate repetitive calendar time events\n"
+ " timestamp TIMESTAMP... Validate a timestamp\n"
+ " timespan SPAN... Validate a time span\n"
+ " security [UNIT...] Analyze security of unit\n"
+ "\nOptions:\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --system Operate on system systemd instance\n"
+ " --user Operate on user systemd instance\n"
+ " --global Operate on global user configuration\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " --order Show only order in the graph\n"
+ " --require Show only requirement in the graph\n"
+ " --from-pattern=GLOB Show only origins in the graph\n"
+ " --to-pattern=GLOB Show only destinations in the graph\n"
+ " --fuzz=SECONDS Also print services which finished SECONDS earlier\n"
+ " than the latest in the branch\n"
+ " --man[=BOOL] Do [not] check for existence of man pages\n"
+ " --generators[=BOOL] Do [not] run unit generators (requires privileges)\n"
+ " --iterations=N Show the specified number of iterations\n"
+ " --base-time=TIMESTAMP Calculate calendar times relative to specified time\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight()
+ , ansi_normal()
+ , dot_link
+ , link
+ );
+
+ /* When updating this list, including descriptions, apply changes to
+ * shell-completion/bash/systemd-analyze and shell-completion/zsh/_systemd-analyze too. */
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_ORDER,
+ ARG_REQUIRE,
+ ARG_ROOT,
+ ARG_SYSTEM,
+ ARG_USER,
+ ARG_GLOBAL,
+ ARG_DOT_FROM_PATTERN,
+ ARG_DOT_TO_PATTERN,
+ ARG_FUZZ,
+ ARG_NO_PAGER,
+ ARG_MAN,
+ ARG_GENERATORS,
+ ARG_ITERATIONS,
+ ARG_BASE_TIME,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "order", no_argument, NULL, ARG_ORDER },
+ { "require", no_argument, NULL, ARG_REQUIRE },
+ { "root", required_argument, NULL, ARG_ROOT },
+ { "system", no_argument, NULL, ARG_SYSTEM },
+ { "user", no_argument, NULL, ARG_USER },
+ { "global", no_argument, NULL, ARG_GLOBAL },
+ { "from-pattern", required_argument, NULL, ARG_DOT_FROM_PATTERN },
+ { "to-pattern", required_argument, NULL, ARG_DOT_TO_PATTERN },
+ { "fuzz", required_argument, NULL, ARG_FUZZ },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "man", optional_argument, NULL, ARG_MAN },
+ { "generators", optional_argument, NULL, ARG_GENERATORS },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "iterations", required_argument, NULL, ARG_ITERATIONS },
+ { "base-time", required_argument, NULL, ARG_BASE_TIME },
+ {}
+ };
+
+ int r, c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hH:M:", options, NULL)) >= 0)
+ switch (c) {
+
+ case 'h':
+ return help(0, NULL, NULL);
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_ROOT:
+ arg_root = optarg;
+ break;
+
+ case ARG_SYSTEM:
+ arg_scope = UNIT_FILE_SYSTEM;
+ break;
+
+ case ARG_USER:
+ arg_scope = UNIT_FILE_USER;
+ break;
+
+ case ARG_GLOBAL:
+ arg_scope = UNIT_FILE_GLOBAL;
+ break;
+
+ case ARG_ORDER:
+ arg_dot = DEP_ORDER;
+ break;
+
+ case ARG_REQUIRE:
+ arg_dot = DEP_REQUIRE;
+ break;
+
+ case ARG_DOT_FROM_PATTERN:
+ if (strv_extend(&arg_dot_from_patterns, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_DOT_TO_PATTERN:
+ if (strv_extend(&arg_dot_to_patterns, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_FUZZ:
+ r = parse_sec(optarg, &arg_fuzz);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case ARG_MAN:
+ if (optarg) {
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse --man= argument.");
+
+ arg_man = r;
+ } else
+ arg_man = true;
+
+ break;
+
+ case ARG_GENERATORS:
+ if (optarg) {
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse --generators= argument.");
+
+ arg_generators = r;
+ } else
+ arg_generators = true;
+
+ break;
+
+ case ARG_ITERATIONS:
+ r = safe_atou(optarg, &arg_iterations);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse iterations: %s", optarg);
+
+ break;
+
+ case ARG_BASE_TIME:
+ r = parse_timestamp(optarg, &arg_base_time);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --base-time= parameter: %s", optarg);
+
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option code.");
+ }
+
+ if (arg_scope == UNIT_FILE_GLOBAL &&
+ !STR_IN_SET(argv[optind] ?: "time", "dot", "unit-paths", "verify"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --global only makes sense with verbs dot, unit-paths, verify.");
+
+ if (streq_ptr(argv[optind], "cat-config") && arg_scope == UNIT_FILE_USER)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --user is not supported for cat-config right now.");
+
+ if (arg_root && !streq_ptr(argv[optind], "cat-config"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --root is only supported for cat-config right now.");
+
+ return 1; /* work to do */
+}
+
+static int run(int argc, char *argv[]) {
+
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "time", VERB_ANY, 1, VERB_DEFAULT, analyze_time },
+ { "blame", VERB_ANY, 1, 0, analyze_blame },
+ { "critical-chain", VERB_ANY, VERB_ANY, 0, analyze_critical_chain },
+ { "plot", VERB_ANY, 1, 0, analyze_plot },
+ { "dot", VERB_ANY, VERB_ANY, 0, dot },
+ /* The following seven verbs are deprecated */
+ { "log-level", VERB_ANY, 2, 0, get_or_set_log_level },
+ { "log-target", VERB_ANY, 2, 0, get_or_set_log_target },
+ { "set-log-level", 2, 2, 0, set_log_level },
+ { "get-log-level", VERB_ANY, 1, 0, get_log_level },
+ { "set-log-target", 2, 2, 0, set_log_target },
+ { "get-log-target", VERB_ANY, 1, 0, get_log_target },
+ { "service-watchdogs", VERB_ANY, 2, 0, service_watchdogs },
+ { "dump", VERB_ANY, 1, 0, dump },
+ { "cat-config", 2, VERB_ANY, 0, cat_config },
+ { "unit-files", VERB_ANY, VERB_ANY, 0, do_unit_files },
+ { "unit-paths", 1, 1, 0, dump_unit_paths },
+ { "exit-status", VERB_ANY, VERB_ANY, 0, dump_exit_status },
+ { "syscall-filter", VERB_ANY, VERB_ANY, 0, dump_syscall_filters },
+ { "capability", VERB_ANY, VERB_ANY, 0, dump_capabilities },
+ { "condition", 2, VERB_ANY, 0, do_condition },
+ { "verify", 2, VERB_ANY, 0, do_verify },
+ { "calendar", 2, VERB_ANY, 0, test_calendar },
+ { "timestamp", 2, VERB_ANY, 0, test_timestamp },
+ { "timespan", 2, VERB_ANY, 0, dump_timespan },
+ { "security", VERB_ANY, VERB_ANY, 0, do_security },
+ {}
+ };
+
+ int r;
+
+ setlocale(LC_ALL, "");
+ setlocale(LC_NUMERIC, "C"); /* we want to format/parse floats in C style */
+
+ log_setup_cli();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/analyze/meson.build b/src/analyze/meson.build
new file mode 100644
index 0000000..9e4d95b
--- /dev/null
+++ b/src/analyze/meson.build
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+systemd_analyze_sources = files('''
+ analyze.c
+ analyze-condition.c
+ analyze-condition.h
+ analyze-verify.c
+ analyze-verify.h
+ analyze-security.c
+ analyze-security.h
+'''.split())
diff --git a/src/analyze/test-verify.c b/src/analyze/test-verify.c
new file mode 100644
index 0000000..12c3215
--- /dev/null
+++ b/src/analyze/test-verify.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#include "analyze-verify.h"
+#include "tests.h"
+
+static void test_verify_nonexistent(void) {
+ /* Negative cases */
+ assert_se(verify_executable(NULL, &(ExecCommand) {.flags = EXEC_COMMAND_IGNORE_FAILURE, .path = (char*) "/non/existent"}) == 0);
+ assert_se(verify_executable(NULL, &(ExecCommand) {.path = (char*) "/non/existent"}) < 0);
+
+ /* Ordinary cases */
+ assert_se(verify_executable(NULL, &(ExecCommand) {.path = (char*) "/bin/echo"}) == 0);
+ assert_se(verify_executable(NULL, &(ExecCommand) {.flags = EXEC_COMMAND_IGNORE_FAILURE, .path = (char*) "/bin/echo"}) == 0);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_verify_nonexistent();
+}
diff --git a/src/ask-password/ask-password.c b/src/ask-password/ask-password.c
new file mode 100644
index 0000000..a24ee9a
--- /dev/null
+++ b/src/ask-password/ask-password.c
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <unistd.h>
+
+#include "ask-password-api.h"
+#include "def.h"
+#include "log.h"
+#include "macro.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "strv.h"
+
+static const char *arg_icon = NULL;
+static const char *arg_id = NULL;
+static const char *arg_keyname = NULL;
+static char *arg_message = NULL;
+static usec_t arg_timeout = DEFAULT_TIMEOUT_USEC;
+static bool arg_multiple = false;
+static bool arg_no_output = false;
+static AskPasswordFlags arg_flags = ASK_PASSWORD_PUSH_CACHE;
+
+STATIC_DESTRUCTOR_REGISTER(arg_message, freep);
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-ask-password", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] MESSAGE\n\n"
+ "Query the user for a system passphrase, via the TTY or an UI agent.\n\n"
+ " -h --help Show this help\n"
+ " --icon=NAME Icon name\n"
+ " --id=ID Query identifier (e.g. \"cryptsetup:/dev/sda5\")\n"
+ " --keyname=NAME Kernel key name for caching passwords (e.g. \"cryptsetup\")\n"
+ " --timeout=SEC Timeout in seconds\n"
+ " --echo Do not mask input (useful for usernames)\n"
+ " --no-tty Ask question via agent even on TTY\n"
+ " --accept-cached Accept cached passwords\n"
+ " --multiple List multiple passwords if available\n"
+ " --no-output Do not print password to standard output\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_ICON = 0x100,
+ ARG_TIMEOUT,
+ ARG_ECHO,
+ ARG_NO_TTY,
+ ARG_ACCEPT_CACHED,
+ ARG_MULTIPLE,
+ ARG_ID,
+ ARG_KEYNAME,
+ ARG_NO_OUTPUT,
+ ARG_VERSION,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "icon", required_argument, NULL, ARG_ICON },
+ { "timeout", required_argument, NULL, ARG_TIMEOUT },
+ { "echo", no_argument, NULL, ARG_ECHO },
+ { "no-tty", no_argument, NULL, ARG_NO_TTY },
+ { "accept-cached", no_argument, NULL, ARG_ACCEPT_CACHED },
+ { "multiple", no_argument, NULL, ARG_MULTIPLE },
+ { "id", required_argument, NULL, ARG_ID },
+ { "keyname", required_argument, NULL, ARG_KEYNAME },
+ { "no-output", no_argument, NULL, ARG_NO_OUTPUT },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_ICON:
+ arg_icon = optarg;
+ break;
+
+ case ARG_TIMEOUT:
+ if (parse_sec(optarg, &arg_timeout) < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse --timeout parameter %s",
+ optarg);
+ break;
+
+ case ARG_ECHO:
+ arg_flags |= ASK_PASSWORD_ECHO;
+ break;
+
+ case ARG_NO_TTY:
+ arg_flags |= ASK_PASSWORD_NO_TTY;
+ break;
+
+ case ARG_ACCEPT_CACHED:
+ arg_flags |= ASK_PASSWORD_ACCEPT_CACHED;
+ break;
+
+ case ARG_MULTIPLE:
+ arg_multiple = true;
+ break;
+
+ case ARG_ID:
+ arg_id = optarg;
+ break;
+
+ case ARG_KEYNAME:
+ arg_keyname = optarg;
+ break;
+
+ case ARG_NO_OUTPUT:
+ arg_no_output = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (argc > optind) {
+ arg_message = strv_join(argv + optind, " ");
+ if (!arg_message)
+ return log_oom();
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_strv_free_erase_ char **l = NULL;
+ usec_t timeout;
+ char **p;
+ int r;
+
+ log_show_color(true);
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (arg_timeout > 0)
+ timeout = now(CLOCK_MONOTONIC) + arg_timeout;
+ else
+ timeout = 0;
+
+ r = ask_password_auto(arg_message, arg_icon, arg_id, arg_keyname, timeout, arg_flags, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query password: %m");
+
+ STRV_FOREACH(p, l) {
+ if (!arg_no_output)
+ puts(*p);
+
+ if (!arg_multiple)
+ break;
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/backlight/backlight.c b/src/backlight/backlight.c
new file mode 100644
index 0000000..d1b6a81
--- /dev/null
+++ b/src/backlight/backlight.c
@@ -0,0 +1,511 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-util.h"
+#include "escape.h"
+#include "fileio.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "terminal-util.h"
+#include "reboot-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-backlight", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s save [backlight|leds]:DEVICE\n"
+ "%s load [backlight|leds]:DEVICE\n"
+ "\n%sSave and restore backlight brightness at shutdown and boot.%s\n\n"
+ " save Save current brightness\n"
+ " load Set brightness to be the previously saved value\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , program_invocation_short_name
+ , ansi_highlight(), ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int find_pci_or_platform_parent(sd_device *device, sd_device **ret) {
+ const char *subsystem, *sysname, *value;
+ sd_device *parent;
+ int r;
+
+ assert(device);
+ assert(ret);
+
+ r = sd_device_get_parent(device, &parent);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_subsystem(parent, &subsystem);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_sysname(parent, &sysname);
+ if (r < 0)
+ return r;
+
+ if (streq(subsystem, "drm")) {
+ const char *c;
+
+ c = startswith(sysname, "card");
+ if (!c)
+ return -ENODATA;
+
+ c += strspn(c, DIGITS);
+ if (*c == '-') {
+ /* A connector DRM device, let's ignore all but LVDS and eDP! */
+ if (!STARTSWITH_SET(c, "-LVDS-", "-Embedded DisplayPort-"))
+ return -EOPNOTSUPP;
+ }
+
+ } else if (streq(subsystem, "pci") &&
+ sd_device_get_sysattr_value(parent, "class", &value) >= 0) {
+ unsigned long class;
+
+ r = safe_atolu(value, &class);
+ if (r < 0)
+ return log_warning_errno(r, "Cannot parse PCI class '%s' of device %s:%s: %m",
+ value, subsystem, sysname);
+
+ /* Graphics card */
+ if (class == 0x30000) {
+ *ret = parent;
+ return 0;
+ }
+
+ } else if (streq(subsystem, "platform")) {
+ *ret = parent;
+ return 0;
+ }
+
+ return find_pci_or_platform_parent(parent, ret);
+}
+
+static int same_device(sd_device *a, sd_device *b) {
+ const char *a_val, *b_val;
+ int r;
+
+ assert(a);
+ assert(b);
+
+ r = sd_device_get_subsystem(a, &a_val);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_subsystem(b, &b_val);
+ if (r < 0)
+ return r;
+
+ if (!streq(a_val, b_val))
+ return false;
+
+ r = sd_device_get_sysname(a, &a_val);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_sysname(b, &b_val);
+ if (r < 0)
+ return r;
+
+ return streq(a_val, b_val);
+}
+
+static int validate_device(sd_device *device) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *enumerate = NULL;
+ const char *v, *subsystem;
+ sd_device *parent, *other;
+ int r;
+
+ assert(device);
+
+ /* Verify whether we should actually care for a specific
+ * backlight device. For backlight devices there might be
+ * multiple ways to access the same control: "firmware"
+ * (i.e. ACPI), "platform" (i.e. via the machine's EC) and
+ * "raw" (via the graphics card). In general we should prefer
+ * "firmware" (i.e. ACPI) or "platform" access over "raw"
+ * access, in order not to confuse the BIOS/EC, and
+ * compatibility with possible low-level hotkey handling of
+ * screen brightness. The kernel will already make sure to
+ * expose only one of "firmware" and "platform" for the same
+ * device to userspace. However, we still need to make sure
+ * that we use "raw" only if no "firmware" or "platform"
+ * device for the same device exists. */
+
+ r = sd_device_get_subsystem(device, &subsystem);
+ if (r < 0)
+ return r;
+ if (!streq(subsystem, "backlight"))
+ return true;
+
+ r = sd_device_get_sysattr_value(device, "type", &v);
+ if (r < 0)
+ return r;
+ if (!streq(v, "raw"))
+ return true;
+
+ r = find_pci_or_platform_parent(device, &parent);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_subsystem(parent, &subsystem);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_new(&enumerate);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(enumerate);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_subsystem(enumerate, "backlight", true);
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(enumerate, other) {
+ const char *other_subsystem;
+ sd_device *other_parent;
+
+ if (same_device(device, other) > 0)
+ continue;
+
+ if (sd_device_get_sysattr_value(other, "type", &v) < 0 ||
+ !STR_IN_SET(v, "platform", "firmware"))
+ continue;
+
+ /* OK, so there's another backlight device, and it's a
+ * platform or firmware device, so, let's see if we
+ * can verify it belongs to the same device as ours. */
+ if (find_pci_or_platform_parent(other, &other_parent) < 0)
+ continue;
+
+ if (same_device(parent, other_parent)) {
+ const char *device_sysname = NULL, *other_sysname = NULL;
+
+ /* Both have the same PCI parent, that means we are out. */
+
+ (void) sd_device_get_sysname(device, &device_sysname);
+ (void) sd_device_get_sysname(other, &other_sysname);
+
+ log_debug("Skipping backlight device %s, since device %s is on same PCI device and takes precedence.",
+ device_sysname, other_sysname);
+ return false;
+ }
+
+ if (sd_device_get_subsystem(other_parent, &other_subsystem) < 0)
+ continue;
+
+ if (streq(other_subsystem, "platform") && streq(subsystem, "pci")) {
+ const char *device_sysname = NULL, *other_sysname = NULL;
+
+ /* The other is connected to the platform bus and we are a PCI device, that also means we are out. */
+
+ (void) sd_device_get_sysname(device, &device_sysname);
+ (void) sd_device_get_sysname(other, &other_sysname);
+
+ log_debug("Skipping backlight device %s, since device %s is a platform device and takes precedence.",
+ device_sysname, other_sysname);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int get_max_brightness(sd_device *device, unsigned *ret) {
+ const char *max_brightness_str;
+ unsigned max_brightness;
+ int r;
+
+ assert(device);
+ assert(ret);
+
+ r = sd_device_get_sysattr_value(device, "max_brightness", &max_brightness_str);
+ if (r < 0)
+ return log_device_warning_errno(device, r, "Failed to read 'max_brightness' attribute: %m");
+
+ r = safe_atou(max_brightness_str, &max_brightness);
+ if (r < 0)
+ return log_device_warning_errno(device, r, "Failed to parse 'max_brightness' \"%s\": %m", max_brightness_str);
+
+ if (max_brightness <= 0)
+ return log_device_warning_errno(device, SYNTHETIC_ERRNO(EINVAL), "Maximum brightness is 0, ignoring device.");
+
+ log_device_debug(device, "Maximum brightness is %u", max_brightness);
+ *ret = max_brightness;
+ return 0;
+}
+
+/* Some systems turn the backlight all the way off at the lowest levels.
+ * clamp_brightness clamps the saved brightness to at least 1 or 5% of
+ * max_brightness in case of 'backlight' subsystem. This avoids preserving
+ * an unreadably dim screen, which would otherwise force the user to
+ * disable state restoration. */
+static int clamp_brightness(sd_device *device, bool saved, unsigned max_brightness, unsigned *brightness) {
+ unsigned new_brightness, min_brightness;
+ const char *subsystem;
+ int r;
+
+ assert(device);
+ assert(brightness);
+
+ r = sd_device_get_subsystem(device, &subsystem);
+ if (r < 0)
+ return log_device_warning_errno(device, r, "Failed to get device subsystem: %m");
+
+ if (streq(subsystem, "backlight"))
+ min_brightness = MAX(1U, max_brightness/20);
+ else
+ min_brightness = 0;
+
+ new_brightness = CLAMP(*brightness, min_brightness, max_brightness);
+ if (new_brightness != *brightness)
+ log_device_info(device, "%s brightness %u is %s to %u.",
+ saved ? "Saved" : "Current",
+ *brightness,
+ new_brightness > *brightness ?
+ "too low; increasing" : "too high; decreasing",
+ new_brightness);
+
+ *brightness = new_brightness;
+ return 0;
+}
+
+static bool shall_clamp(sd_device *d) {
+ const char *s;
+ int r;
+
+ assert(d);
+
+ r = sd_device_get_property_value(d, "ID_BACKLIGHT_CLAMP", &s);
+ if (r < 0) {
+ if (r != -ENOENT)
+ log_device_debug_errno(d, r, "Failed to get ID_BACKLIGHT_CLAMP property, ignoring: %m");
+ return true;
+ }
+
+ r = parse_boolean(s);
+ if (r < 0) {
+ log_device_debug_errno(d, r, "Failed to parse ID_BACKLIGHT_CLAMP property, ignoring: %m");
+ return true;
+ }
+
+ return r;
+}
+
+static int read_brightness(sd_device *device, unsigned max_brightness, unsigned *ret_brightness) {
+ const char *subsystem, *value;
+ unsigned brightness;
+ int r;
+
+ assert(device);
+ assert(ret_brightness);
+
+ r = sd_device_get_subsystem(device, &subsystem);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "Failed to get subsystem: %m");
+
+ if (streq(subsystem, "backlight")) {
+ r = sd_device_get_sysattr_value(device, "actual_brightness", &value);
+ if (r == -ENOENT) {
+ log_device_debug_errno(device, r, "Failed to read 'actual_brightness' attribute, "
+ "fall back to use 'brightness' attribute: %m");
+ goto use_brightness;
+ }
+ if (r < 0)
+ return log_device_debug_errno(device, r, "Failed to read 'actual_brightness' attribute: %m");
+
+ r = safe_atou(value, &brightness);
+ if (r < 0) {
+ log_device_debug_errno(device, r, "Failed to parse 'actual_brightness' attribute, "
+ "fall back to use 'brightness' attribute: %s", value);
+ goto use_brightness;
+ }
+
+ if (brightness > max_brightness) {
+ log_device_debug(device, "actual_brightness=%u is larger than max_brightness=%u, "
+ "fall back to use 'brightness' attribute", brightness, max_brightness);
+ goto use_brightness;
+ }
+
+ log_device_debug(device, "Current actual_brightness is %u", brightness);
+ *ret_brightness = brightness;
+ return 0;
+ }
+
+use_brightness:
+ r = sd_device_get_sysattr_value(device, "brightness", &value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "Failed to read 'brightness' attribute: %m");
+
+ r = safe_atou(value, &brightness);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "Failed to parse 'brightness' attribute: %s", value);
+
+ if (brightness > max_brightness)
+ return log_device_debug_errno(device, SYNTHETIC_ERRNO(EINVAL),
+ "brightness=%u is larger than max_brightness=%u",
+ brightness, max_brightness);
+
+ log_device_debug(device, "Current brightness is %u", brightness);
+ *ret_brightness = brightness;
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ _cleanup_free_ char *escaped_ss = NULL, *escaped_sysname = NULL, *escaped_path_id = NULL;
+ const char *sysname, *path_id, *ss, *saved;
+ unsigned max_brightness, brightness;
+ int r;
+
+ log_setup_service();
+
+ if (strv_contains(strv_skip(argv, 1), "--help"))
+ return help();
+
+ if (argc != 3)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program requires two arguments.");
+
+ umask(0022);
+
+ r = mkdir_p("/var/lib/systemd/backlight", 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create backlight directory /var/lib/systemd/backlight: %m");
+
+ sysname = strchr(argv[2], ':');
+ if (!sysname)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Requires a subsystem and sysname pair specifying a backlight device.");
+
+ ss = strndupa(argv[2], sysname - argv[2]);
+
+ sysname++;
+
+ if (!STR_IN_SET(ss, "backlight", "leds"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Not a backlight or LED device: '%s:%s'", ss, sysname);
+
+ r = sd_device_new_from_subsystem_sysname(&device, ss, sysname);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get backlight or LED device '%s:%s': %m", ss, sysname);
+
+ /* If max_brightness is 0, then there is no actual backlight
+ * device. This happens on desktops with Asus mainboards
+ * that load the eeepc-wmi module. */
+ if (get_max_brightness(device, &max_brightness) < 0)
+ return 0;
+
+ escaped_ss = cescape(ss);
+ if (!escaped_ss)
+ return log_oom();
+
+ escaped_sysname = cescape(sysname);
+ if (!escaped_sysname)
+ return log_oom();
+
+ if (sd_device_get_property_value(device, "ID_PATH", &path_id) >= 0) {
+ escaped_path_id = cescape(path_id);
+ if (!escaped_path_id)
+ return log_oom();
+
+ saved = strjoina("/var/lib/systemd/backlight/", escaped_path_id, ":", escaped_ss, ":", escaped_sysname);
+ } else
+ saved = strjoina("/var/lib/systemd/backlight/", escaped_ss, ":", escaped_sysname);
+
+ /* If there are multiple conflicting backlight devices, then
+ * their probing at boot-time might happen in any order. This
+ * means the validity checking of the device then is not
+ * reliable, since it might not see other devices conflicting
+ * with a specific backlight. To deal with this, we will
+ * actively delete backlight state files at shutdown (where
+ * device probing should be complete), so that the validity
+ * check at boot time doesn't have to be reliable. */
+
+ if (streq(argv[1], "load")) {
+ _cleanup_free_ char *value = NULL;
+ bool clamp;
+
+ if (shall_restore_state() == 0)
+ return 0;
+
+ if (validate_device(device) == 0)
+ return 0;
+
+ clamp = shall_clamp(device);
+
+ r = read_one_line_file(saved, &value);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "Failed to read %s: %m", saved);
+ if (r > 0) {
+ r = safe_atou(value, &brightness);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse saved brightness '%s', removing %s.",
+ value, saved);
+ (void) unlink(saved);
+ } else {
+ log_debug("Using saved brightness %u.", brightness);
+ if (clamp)
+ (void) clamp_brightness(device, true, max_brightness, &brightness);
+
+ /* Do not fall back to read current brightness below. */
+ r = 1;
+ }
+ }
+ if (r <= 0) {
+ /* Fallback to clamping current brightness or exit early if clamping is not
+ * supported/enabled. */
+ if (!clamp)
+ return 0;
+
+ r = read_brightness(device, max_brightness, &brightness);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Failed to read current brightness: %m");
+
+ (void) clamp_brightness(device, false, max_brightness, &brightness);
+ }
+
+ r = sd_device_set_sysattr_valuef(device, "brightness", "%u", brightness);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Failed to write system 'brightness' attribute: %m");
+
+ } else if (streq(argv[1], "save")) {
+ if (validate_device(device) == 0) {
+ (void) unlink(saved);
+ return 0;
+ }
+
+ r = read_brightness(device, max_brightness, &brightness);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Failed to read current brightness: %m");
+
+ r = write_string_filef(saved, WRITE_STRING_FILE_CREATE, "%u", brightness);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Failed to write %s: %m", saved);
+
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown verb %s.", argv[1]);
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/basic/MurmurHash2.c b/src/basic/MurmurHash2.c
new file mode 100644
index 0000000..43a89a0
--- /dev/null
+++ b/src/basic/MurmurHash2.c
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: LicenseRef-murmurhash2-public-domain */
+//-----------------------------------------------------------------------------
+// MurmurHash2 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - This code makes a few assumptions about how your machine behaves -
+
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4
+
+// And it has a few limitations -
+
+// 1. It will not work incrementally.
+// 2. It will not produce the same results on little-endian and big-endian
+// machines.
+
+#include "MurmurHash2.h"
+
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else // defined(_MSC_VER)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
+{
+ // 'm' and 'r' are mixing constants generated offline.
+ // They're not really 'magic', they just happen to work well.
+
+ const uint32_t m = 0x5bd1e995;
+ const int r = 24;
+
+ // Initialize the hash to a 'random' value
+
+ uint32_t h = seed ^ len;
+
+ // Mix 4 bytes at a time into the hash
+
+ const unsigned char * data = (const unsigned char *)key;
+
+ while (len >= 4)
+ {
+ uint32_t k = *(uint32_t*)data;
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h *= m;
+ h ^= k;
+
+ data += 4;
+ len -= 4;
+ }
+
+ // Handle the last few bytes of the input array
+
+ switch(len)
+ {
+ case 3: h ^= data[2] << 16; /* fall through */
+ case 2: h ^= data[1] << 8; /* fall through */
+ case 1: h ^= data[0]; /* fall through */
+ h *= m;
+ };
+
+ // Do a few final mixes of the hash to ensure the last few
+ // bytes are well-incorporated.
+
+ h ^= h >> 13;
+ h *= m;
+ h ^= h >> 15;
+
+ return h;
+}
diff --git a/src/basic/MurmurHash2.h b/src/basic/MurmurHash2.h
new file mode 100644
index 0000000..5758b86
--- /dev/null
+++ b/src/basic/MurmurHash2.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: LicenseRef-murmurhash2-public-domain */
+//-----------------------------------------------------------------------------
+// MurmurHash2 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#pragma once
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else // defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed );
+
+//-----------------------------------------------------------------------------
diff --git a/src/basic/af-list.c b/src/basic/af-list.c
new file mode 100644
index 0000000..7e819d6
--- /dev/null
+++ b/src/basic/af-list.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <string.h>
+#include <sys/socket.h>
+
+#include "af-list.h"
+#include "macro.h"
+
+static const struct af_name* lookup_af(register const char *str, register GPERF_LEN_TYPE len);
+
+#include "af-from-name.h"
+#include "af-to-name.h"
+
+const char *af_to_name(int id) {
+
+ if (id <= 0)
+ return NULL;
+
+ if ((size_t) id >= ELEMENTSOF(af_names))
+ return NULL;
+
+ return af_names[id];
+}
+
+int af_from_name(const char *name) {
+ const struct af_name *sc;
+
+ assert(name);
+
+ sc = lookup_af(name, strlen(name));
+ if (!sc)
+ return -EINVAL;
+
+ return sc->id;
+}
+
+int af_max(void) {
+ return ELEMENTSOF(af_names);
+}
diff --git a/src/basic/af-list.h b/src/basic/af-list.h
new file mode 100644
index 0000000..688ac63
--- /dev/null
+++ b/src/basic/af-list.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/socket.h>
+
+#include "string-util.h"
+
+const char *af_to_name(int id);
+int af_from_name(const char *name);
+
+static inline const char* af_to_name_short(int id) {
+ const char *f;
+
+ if (id == AF_UNSPEC)
+ return "*";
+
+ f = af_to_name(id);
+ if (!f)
+ return "unknown";
+
+ assert(startswith(f, "AF_"));
+ return f + 3;
+}
+
+int af_max(void);
diff --git a/src/basic/af-to-name.awk b/src/basic/af-to-name.awk
new file mode 100644
index 0000000..18d0a89
--- /dev/null
+++ b/src/basic/af-to-name.awk
@@ -0,0 +1,9 @@
+BEGIN{
+ print "static const char* const af_names[] = { "
+}
+!/AF_FILE/ && !/AF_ROUTE/ && !/AF_LOCAL/ {
+ printf " [%s] = \"%s\",\n", $1, $1
+}
+END{
+ print "};"
+}
diff --git a/src/basic/alloc-util.c b/src/basic/alloc-util.c
new file mode 100644
index 0000000..bad15cc
--- /dev/null
+++ b/src/basic/alloc-util.c
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <malloc.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "memory-util.h"
+
+void* memdup(const void *p, size_t l) {
+ void *ret;
+
+ assert(l == 0 || p);
+
+ ret = malloc(l ?: 1);
+ if (!ret)
+ return NULL;
+
+ memcpy(ret, p, l);
+ return ret;
+}
+
+void* memdup_suffix0(const void *p, size_t l) {
+ void *ret;
+
+ assert(l == 0 || p);
+
+ /* The same as memdup() but place a safety NUL byte after the allocated memory */
+
+ if (_unlikely_(l == SIZE_MAX)) /* prevent overflow */
+ return NULL;
+
+ ret = malloc(l + 1);
+ if (!ret)
+ return NULL;
+
+ *((uint8_t*) mempcpy(ret, p, l)) = 0;
+ return ret;
+}
+
+void* greedy_realloc(void **p, size_t *allocated, size_t need, size_t size) {
+ size_t a, newalloc;
+ void *q;
+
+ assert(p);
+ assert(allocated);
+
+ if (*allocated >= need)
+ return *p;
+
+ if (_unlikely_(need > SIZE_MAX/2)) /* Overflow check */
+ return NULL;
+
+ newalloc = need * 2;
+ if (size_multiply_overflow(newalloc, size))
+ return NULL;
+
+ a = newalloc * size;
+ if (a < 64) /* Allocate at least 64 bytes */
+ a = 64;
+
+ q = realloc(*p, a);
+ if (!q)
+ return NULL;
+
+ if (size > 0) {
+ size_t bn;
+
+ /* Adjust for the 64 byte minimum */
+ newalloc = a / size;
+
+ bn = malloc_usable_size(q) / size;
+ if (bn > newalloc) {
+ void *qq;
+
+ /* The actual size allocated is larger than what we asked for. Let's call realloc() again to
+ * take possession of the extra space. This should be cheap, since libc doesn't have to move
+ * the memory for this. */
+
+ qq = reallocarray(q, bn, size);
+ if (_likely_(qq)) {
+ *p = qq;
+ *allocated = bn;
+ return qq;
+ }
+ }
+ }
+
+ *p = q;
+ *allocated = newalloc;
+ return q;
+}
+
+void* greedy_realloc0(void **p, size_t *allocated, size_t need, size_t size) {
+ size_t prev;
+ uint8_t *q;
+
+ assert(p);
+ assert(allocated);
+
+ prev = *allocated;
+
+ q = greedy_realloc(p, allocated, need, size);
+ if (!q)
+ return NULL;
+
+ if (*allocated > prev)
+ memzero(q + prev * size, (*allocated - prev) * size);
+
+ return q;
+}
diff --git a/src/basic/alloc-util.h b/src/basic/alloc-util.h
new file mode 100644
index 0000000..f3e192d
--- /dev/null
+++ b/src/basic/alloc-util.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <alloca.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "macro.h"
+
+#if HAS_FEATURE_MEMORY_SANITIZER
+# include <sanitizer/msan_interface.h>
+#endif
+
+typedef void (*free_func_t)(void *p);
+
+/* If for some reason more than 4M are allocated on the stack, let's abort immediately. It's better than
+ * proceeding and smashing the stack limits. Note that by default RLIMIT_STACK is 8M on Linux. */
+#define ALLOCA_MAX (4U*1024U*1024U)
+
+#define new(t, n) ((t*) malloc_multiply(sizeof(t), (n)))
+
+#define new0(t, n) ((t*) calloc((n) ?: 1, sizeof(t)))
+
+#define newa(t, n) \
+ ({ \
+ size_t _n_ = n; \
+ assert(!size_multiply_overflow(sizeof(t), _n_)); \
+ assert(sizeof(t)*_n_ <= ALLOCA_MAX); \
+ (t*) alloca((sizeof(t)*_n_) ?: 1); \
+ })
+
+#define newa0(t, n) \
+ ({ \
+ size_t _n_ = n; \
+ assert(!size_multiply_overflow(sizeof(t), _n_)); \
+ assert(sizeof(t)*_n_ <= ALLOCA_MAX); \
+ (t*) alloca0((sizeof(t)*_n_) ?: 1); \
+ })
+
+#define newdup(t, p, n) ((t*) memdup_multiply(p, sizeof(t), (n)))
+
+#define newdup_suffix0(t, p, n) ((t*) memdup_suffix0_multiply(p, sizeof(t), (n)))
+
+#define malloc0(n) (calloc(1, (n) ?: 1))
+
+static inline void *mfree(void *memory) {
+ free(memory);
+ return NULL;
+}
+
+#define free_and_replace(a, b) \
+ ({ \
+ free(a); \
+ (a) = (b); \
+ (b) = NULL; \
+ 0; \
+ })
+
+void* memdup(const void *p, size_t l) _alloc_(2);
+void* memdup_suffix0(const void *p, size_t l); /* We can't use _alloc_() here, since we return a buffer one byte larger than the specified size */
+
+#define memdupa(p, l) \
+ ({ \
+ void *_q_; \
+ size_t _l_ = l; \
+ assert(_l_ <= ALLOCA_MAX); \
+ _q_ = alloca(_l_ ?: 1); \
+ memcpy(_q_, p, _l_); \
+ })
+
+#define memdupa_suffix0(p, l) \
+ ({ \
+ void *_q_; \
+ size_t _l_ = l; \
+ assert(_l_ <= ALLOCA_MAX); \
+ _q_ = alloca(_l_ + 1); \
+ ((uint8_t*) _q_)[_l_] = 0; \
+ memcpy(_q_, p, _l_); \
+ })
+
+static inline void freep(void *p) {
+ free(*(void**) p);
+}
+
+#define _cleanup_free_ _cleanup_(freep)
+
+static inline bool size_multiply_overflow(size_t size, size_t need) {
+ return _unlikely_(need != 0 && size > (SIZE_MAX / need));
+}
+
+_malloc_ _alloc_(1, 2) static inline void *malloc_multiply(size_t size, size_t need) {
+ if (size_multiply_overflow(size, need))
+ return NULL;
+
+ return malloc(size * need ?: 1);
+}
+
+#if !HAVE_REALLOCARRAY
+_alloc_(2, 3) static inline void *reallocarray(void *p, size_t need, size_t size) {
+ if (size_multiply_overflow(size, need))
+ return NULL;
+
+ return realloc(p, size * need ?: 1);
+}
+#endif
+
+_alloc_(2, 3) static inline void *memdup_multiply(const void *p, size_t size, size_t need) {
+ if (size_multiply_overflow(size, need))
+ return NULL;
+
+ return memdup(p, size * need);
+}
+
+/* Note that we can't decorate this function with _alloc_() since the returned memory area is one byte larger
+ * than the product of its parameters. */
+static inline void *memdup_suffix0_multiply(const void *p, size_t size, size_t need) {
+ if (size_multiply_overflow(size, need))
+ return NULL;
+
+ return memdup_suffix0(p, size * need);
+}
+
+void* greedy_realloc(void **p, size_t *allocated, size_t need, size_t size);
+void* greedy_realloc0(void **p, size_t *allocated, size_t need, size_t size);
+
+#define GREEDY_REALLOC(array, allocated, need) \
+ greedy_realloc((void**) &(array), &(allocated), (need), sizeof((array)[0]))
+
+#define GREEDY_REALLOC0(array, allocated, need) \
+ greedy_realloc0((void**) &(array), &(allocated), (need), sizeof((array)[0]))
+
+#define alloca0(n) \
+ ({ \
+ char *_new_; \
+ size_t _len_ = n; \
+ assert(_len_ <= ALLOCA_MAX); \
+ _new_ = alloca(_len_ ?: 1); \
+ (void *) memset(_new_, 0, _len_); \
+ })
+
+/* It's not clear what alignment glibc/gcc alloca() guarantee, hence provide a guaranteed safe version */
+#define alloca_align(size, align) \
+ ({ \
+ void *_ptr_; \
+ size_t _mask_ = (align) - 1; \
+ size_t _size_ = size; \
+ assert(_size_ <= ALLOCA_MAX); \
+ _ptr_ = alloca((_size_ + _mask_) ?: 1); \
+ (void*)(((uintptr_t)_ptr_ + _mask_) & ~_mask_); \
+ })
+
+#define alloca0_align(size, align) \
+ ({ \
+ void *_new_; \
+ size_t _xsize_ = (size); \
+ _new_ = alloca_align(_xsize_, (align)); \
+ (void*)memset(_new_, 0, _xsize_); \
+ })
+
+/* Takes inspiration from Rust's Option::take() method: reads and returns a pointer, but at the same time
+ * resets it to NULL. See: https://doc.rust-lang.org/std/option/enum.Option.html#method.take */
+#define TAKE_PTR(ptr) \
+ ({ \
+ typeof(ptr) _ptr_ = (ptr); \
+ (ptr) = NULL; \
+ _ptr_; \
+ })
+
+#if HAS_FEATURE_MEMORY_SANITIZER
+# define msan_unpoison(r, s) __msan_unpoison(r, s)
+#else
+# define msan_unpoison(r, s)
+#endif
diff --git a/src/basic/architecture.c b/src/basic/architecture.c
new file mode 100644
index 0000000..409632c
--- /dev/null
+++ b/src/basic/architecture.c
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/utsname.h>
+
+#include "architecture.h"
+#include "macro.h"
+#include "string-table.h"
+#include "string-util.h"
+
+int uname_architecture(void) {
+
+ /* Return a sanitized enum identifying the architecture we are
+ * running on. This is based on uname(), and the user may
+ * hence control what this returns by using
+ * personality(). This puts the user in control on systems
+ * that can run binaries of multiple architectures.
+ *
+ * We do not translate the string returned by uname()
+ * 1:1. Instead we try to clean it up and break down the
+ * confusion on x86 and arm in particular.
+ *
+ * We do not try to distinguish CPUs not CPU features, but
+ * actual architectures, i.e. that have genuinely different
+ * code. */
+
+ static const struct {
+ const char *machine;
+ int arch;
+ } arch_map[] = {
+#if defined(__x86_64__) || defined(__i386__)
+ { "x86_64", ARCHITECTURE_X86_64 },
+ { "i686", ARCHITECTURE_X86 },
+ { "i586", ARCHITECTURE_X86 },
+ { "i486", ARCHITECTURE_X86 },
+ { "i386", ARCHITECTURE_X86 },
+#elif defined(__powerpc__) || defined(__powerpc64__)
+ { "ppc64", ARCHITECTURE_PPC64 },
+ { "ppc64le", ARCHITECTURE_PPC64_LE },
+ { "ppc", ARCHITECTURE_PPC },
+ { "ppcle", ARCHITECTURE_PPC_LE },
+#elif defined(__ia64__)
+ { "ia64", ARCHITECTURE_IA64 },
+#elif defined(__hppa__) || defined(__hppa64__)
+ { "parisc64", ARCHITECTURE_PARISC64 },
+ { "parisc", ARCHITECTURE_PARISC },
+#elif defined(__s390__) || defined(__s390x__)
+ { "s390x", ARCHITECTURE_S390X },
+ { "s390", ARCHITECTURE_S390 },
+#elif defined(__sparc__)
+ { "sparc64", ARCHITECTURE_SPARC64 },
+ { "sparc", ARCHITECTURE_SPARC },
+#elif defined(__mips__) || defined(__mips64__)
+ { "mips64", ARCHITECTURE_MIPS64 },
+ { "mips", ARCHITECTURE_MIPS },
+#elif defined(__alpha__)
+ { "alpha" , ARCHITECTURE_ALPHA },
+#elif defined(__arm__) || defined(__aarch64__)
+ { "aarch64", ARCHITECTURE_ARM64 },
+ { "aarch64_be", ARCHITECTURE_ARM64_BE },
+ { "armv4l", ARCHITECTURE_ARM },
+ { "armv4b", ARCHITECTURE_ARM_BE },
+ { "armv4tl", ARCHITECTURE_ARM },
+ { "armv4tb", ARCHITECTURE_ARM_BE },
+ { "armv5tl", ARCHITECTURE_ARM },
+ { "armv5tb", ARCHITECTURE_ARM_BE },
+ { "armv5tel", ARCHITECTURE_ARM },
+ { "armv5teb" , ARCHITECTURE_ARM_BE },
+ { "armv5tejl", ARCHITECTURE_ARM },
+ { "armv5tejb", ARCHITECTURE_ARM_BE },
+ { "armv6l", ARCHITECTURE_ARM },
+ { "armv6b", ARCHITECTURE_ARM_BE },
+ { "armv7l", ARCHITECTURE_ARM },
+ { "armv7b", ARCHITECTURE_ARM_BE },
+ { "armv7ml", ARCHITECTURE_ARM },
+ { "armv7mb", ARCHITECTURE_ARM_BE },
+ { "armv4l", ARCHITECTURE_ARM },
+ { "armv4b", ARCHITECTURE_ARM_BE },
+ { "armv4tl", ARCHITECTURE_ARM },
+ { "armv4tb", ARCHITECTURE_ARM_BE },
+ { "armv5tl", ARCHITECTURE_ARM },
+ { "armv5tb", ARCHITECTURE_ARM_BE },
+ { "armv5tel", ARCHITECTURE_ARM },
+ { "armv5teb", ARCHITECTURE_ARM_BE },
+ { "armv5tejl", ARCHITECTURE_ARM },
+ { "armv5tejb", ARCHITECTURE_ARM_BE },
+ { "armv6l", ARCHITECTURE_ARM },
+ { "armv6b", ARCHITECTURE_ARM_BE },
+ { "armv7l", ARCHITECTURE_ARM },
+ { "armv7b", ARCHITECTURE_ARM_BE },
+ { "armv7ml", ARCHITECTURE_ARM },
+ { "armv7mb", ARCHITECTURE_ARM_BE },
+ { "armv8l", ARCHITECTURE_ARM },
+ { "armv8b", ARCHITECTURE_ARM_BE },
+#elif defined(__sh__) || defined(__sh64__)
+ { "sh5", ARCHITECTURE_SH64 },
+ { "sh2", ARCHITECTURE_SH },
+ { "sh2a", ARCHITECTURE_SH },
+ { "sh3", ARCHITECTURE_SH },
+ { "sh4", ARCHITECTURE_SH },
+ { "sh4a", ARCHITECTURE_SH },
+#elif defined(__m68k__)
+ { "m68k", ARCHITECTURE_M68K },
+#elif defined(__tilegx__)
+ { "tilegx", ARCHITECTURE_TILEGX },
+#elif defined(__cris__)
+ { "crisv32", ARCHITECTURE_CRIS },
+#elif defined(__nios2__)
+ { "nios2", ARCHITECTURE_NIOS2 },
+#elif defined(__riscv__) || defined(__riscv)
+ /* __riscv__ is obsolete, remove in 2018 */
+ { "riscv32", ARCHITECTURE_RISCV32 },
+ { "riscv64", ARCHITECTURE_RISCV64 },
+# if __SIZEOF_POINTER__ == 4
+ { "riscv", ARCHITECTURE_RISCV32 },
+# elif __SIZEOF_POINTER__ == 8
+ { "riscv", ARCHITECTURE_RISCV64 },
+# endif
+#elif defined(__arc__)
+ { "arc", ARCHITECTURE_ARC },
+ { "arceb", ARCHITECTURE_ARC_BE },
+#else
+#error "Please register your architecture here!"
+#endif
+ };
+
+ static int cached = _ARCHITECTURE_INVALID;
+ struct utsname u;
+ unsigned i;
+
+ if (cached != _ARCHITECTURE_INVALID)
+ return cached;
+
+ assert_se(uname(&u) >= 0);
+
+ for (i = 0; i < ELEMENTSOF(arch_map); i++)
+ if (streq(arch_map[i].machine, u.machine))
+ return cached = arch_map[i].arch;
+
+ assert_not_reached("Couldn't identify architecture. You need to patch systemd.");
+ return _ARCHITECTURE_INVALID;
+}
+
+static const char *const architecture_table[_ARCHITECTURE_MAX] = {
+ [ARCHITECTURE_X86] = "x86",
+ [ARCHITECTURE_X86_64] = "x86-64",
+ [ARCHITECTURE_PPC] = "ppc",
+ [ARCHITECTURE_PPC_LE] = "ppc-le",
+ [ARCHITECTURE_PPC64] = "ppc64",
+ [ARCHITECTURE_PPC64_LE] = "ppc64-le",
+ [ARCHITECTURE_IA64] = "ia64",
+ [ARCHITECTURE_PARISC] = "parisc",
+ [ARCHITECTURE_PARISC64] = "parisc64",
+ [ARCHITECTURE_S390] = "s390",
+ [ARCHITECTURE_S390X] = "s390x",
+ [ARCHITECTURE_SPARC] = "sparc",
+ [ARCHITECTURE_SPARC64] = "sparc64",
+ [ARCHITECTURE_MIPS] = "mips",
+ [ARCHITECTURE_MIPS_LE] = "mips-le",
+ [ARCHITECTURE_MIPS64] = "mips64",
+ [ARCHITECTURE_MIPS64_LE] = "mips64-le",
+ [ARCHITECTURE_ALPHA] = "alpha",
+ [ARCHITECTURE_ARM] = "arm",
+ [ARCHITECTURE_ARM_BE] = "arm-be",
+ [ARCHITECTURE_ARM64] = "arm64",
+ [ARCHITECTURE_ARM64_BE] = "arm64-be",
+ [ARCHITECTURE_SH] = "sh",
+ [ARCHITECTURE_SH64] = "sh64",
+ [ARCHITECTURE_M68K] = "m68k",
+ [ARCHITECTURE_TILEGX] = "tilegx",
+ [ARCHITECTURE_CRIS] = "cris",
+ [ARCHITECTURE_NIOS2] = "nios2",
+ [ARCHITECTURE_RISCV32] = "riscv32",
+ [ARCHITECTURE_RISCV64] = "riscv64",
+ [ARCHITECTURE_ARC] = "arc",
+ [ARCHITECTURE_ARC_BE] = "arc-be",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(architecture, int);
diff --git a/src/basic/architecture.h b/src/basic/architecture.h
new file mode 100644
index 0000000..1db625c
--- /dev/null
+++ b/src/basic/architecture.h
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <endian.h>
+
+#include "macro.h"
+#include "util.h"
+
+/* A cleaned up architecture definition. We don't want to get lost in
+ * processor features, models, generations or even ABIs. Hence we
+ * focus on general family, and distinguish word width and
+ * endianness. */
+
+enum {
+ ARCHITECTURE_X86 = 0,
+ ARCHITECTURE_X86_64,
+ ARCHITECTURE_PPC,
+ ARCHITECTURE_PPC_LE,
+ ARCHITECTURE_PPC64,
+ ARCHITECTURE_PPC64_LE,
+ ARCHITECTURE_IA64,
+ ARCHITECTURE_PARISC,
+ ARCHITECTURE_PARISC64,
+ ARCHITECTURE_S390,
+ ARCHITECTURE_S390X,
+ ARCHITECTURE_SPARC,
+ ARCHITECTURE_SPARC64,
+ ARCHITECTURE_MIPS,
+ ARCHITECTURE_MIPS_LE,
+ ARCHITECTURE_MIPS64,
+ ARCHITECTURE_MIPS64_LE,
+ ARCHITECTURE_ALPHA,
+ ARCHITECTURE_ARM,
+ ARCHITECTURE_ARM_BE,
+ ARCHITECTURE_ARM64,
+ ARCHITECTURE_ARM64_BE,
+ ARCHITECTURE_SH,
+ ARCHITECTURE_SH64,
+ ARCHITECTURE_M68K,
+ ARCHITECTURE_TILEGX,
+ ARCHITECTURE_CRIS,
+ ARCHITECTURE_NIOS2,
+ ARCHITECTURE_RISCV32,
+ ARCHITECTURE_RISCV64,
+ ARCHITECTURE_ARC,
+ ARCHITECTURE_ARC_BE,
+ _ARCHITECTURE_MAX,
+ _ARCHITECTURE_INVALID = -1
+};
+
+int uname_architecture(void);
+
+/*
+ * LIB_ARCH_TUPLE should resolve to the local library path
+ * architecture tuple systemd is built for, according to the Debian
+ * tuple list:
+ *
+ * https://wiki.debian.org/Multiarch/Tuples
+ *
+ * This is used in library search paths that should understand
+ * Debian's paths on all distributions.
+ */
+
+#if defined(__x86_64__)
+# define native_architecture() ARCHITECTURE_X86_64
+# if defined(__ILP32__)
+# define LIB_ARCH_TUPLE "x86_64-linux-gnux32"
+# else
+# define LIB_ARCH_TUPLE "x86_64-linux-gnu"
+# endif
+# define SECONDARY_ARCHITECTURE ARCHITECTURE_X86
+#elif defined(__i386__)
+# define native_architecture() ARCHITECTURE_X86
+# define LIB_ARCH_TUPLE "i386-linux-gnu"
+#elif defined(__powerpc64__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_PPC64
+# define LIB_ARCH_TUPLE "ppc64-linux-gnu"
+# define SECONDARY_ARCHITECTURE ARCHITECTURE_PPC
+# else
+# define native_architecture() ARCHITECTURE_PPC64_LE
+# define LIB_ARCH_TUPLE "powerpc64le-linux-gnu"
+# define SECONDARY_ARCHITECTURE ARCHITECTURE_PPC_LE
+# endif
+#elif defined(__powerpc__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_PPC
+# if defined(__NO_FPRS__)
+# define LIB_ARCH_TUPLE "powerpc-linux-gnuspe"
+# else
+# define LIB_ARCH_TUPLE "powerpc-linux-gnu"
+# endif
+# else
+# define native_architecture() ARCHITECTURE_PPC_LE
+# error "Missing LIB_ARCH_TUPLE for PPCLE"
+# endif
+#elif defined(__ia64__)
+# define native_architecture() ARCHITECTURE_IA64
+# define LIB_ARCH_TUPLE "ia64-linux-gnu"
+#elif defined(__hppa64__)
+# define native_architecture() ARCHITECTURE_PARISC64
+# error "Missing LIB_ARCH_TUPLE for HPPA64"
+#elif defined(__hppa__)
+# define native_architecture() ARCHITECTURE_PARISC
+# define LIB_ARCH_TUPLE "hppa‑linux‑gnu"
+#elif defined(__s390x__)
+# define native_architecture() ARCHITECTURE_S390X
+# define LIB_ARCH_TUPLE "s390x-linux-gnu"
+# define SECONDARY_ARCHITECTURE ARCHITECTURE_S390
+#elif defined(__s390__)
+# define native_architecture() ARCHITECTURE_S390
+# define LIB_ARCH_TUPLE "s390-linux-gnu"
+#elif defined(__sparc__) && defined (__arch64__)
+# define native_architecture() ARCHITECTURE_SPARC64
+# define LIB_ARCH_TUPLE "sparc64-linux-gnu"
+#elif defined(__sparc__)
+# define native_architecture() ARCHITECTURE_SPARC
+# define LIB_ARCH_TUPLE "sparc-linux-gnu"
+#elif defined(__mips64) && defined(__LP64__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_MIPS64
+# define LIB_ARCH_TUPLE "mips64-linux-gnuabi64"
+# else
+# define native_architecture() ARCHITECTURE_MIPS64_LE
+# define LIB_ARCH_TUPLE "mips64el-linux-gnuabi64"
+# endif
+#elif defined(__mips64)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_MIPS64
+# define LIB_ARCH_TUPLE "mips64-linux-gnuabin32"
+# else
+# define native_architecture() ARCHITECTURE_MIPS64_LE
+# define LIB_ARCH_TUPLE "mips64el-linux-gnuabin32"
+# endif
+#elif defined(__mips__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_MIPS
+# define LIB_ARCH_TUPLE "mips-linux-gnu"
+# else
+# define native_architecture() ARCHITECTURE_MIPS_LE
+# define LIB_ARCH_TUPLE "mipsel-linux-gnu"
+# endif
+#elif defined(__alpha__)
+# define native_architecture() ARCHITECTURE_ALPHA
+# define LIB_ARCH_TUPLE "alpha-linux-gnu"
+#elif defined(__aarch64__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_ARM64_BE
+# define LIB_ARCH_TUPLE "aarch64_be-linux-gnu"
+# else
+# define native_architecture() ARCHITECTURE_ARM64
+# define LIB_ARCH_TUPLE "aarch64-linux-gnu"
+# define SECONDARY_ARCHITECTURE ARCHITECTURE_ARM
+# endif
+#elif defined(__arm__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_ARM_BE
+# if defined(__ARM_EABI__)
+# if defined(__ARM_PCS_VFP)
+# define LIB_ARCH_TUPLE "armeb-linux-gnueabihf"
+# else
+# define LIB_ARCH_TUPLE "armeb-linux-gnueabi"
+# endif
+# else
+# define LIB_ARCH_TUPLE "armeb-linux-gnu"
+# endif
+# else
+# define native_architecture() ARCHITECTURE_ARM
+# if defined(__ARM_EABI__)
+# if defined(__ARM_PCS_VFP)
+# define LIB_ARCH_TUPLE "arm-linux-gnueabihf"
+# else
+# define LIB_ARCH_TUPLE "arm-linux-gnueabi"
+# endif
+# else
+# define LIB_ARCH_TUPLE "arm-linux-gnu"
+# endif
+# endif
+#elif defined(__sh64__)
+# define native_architecture() ARCHITECTURE_SH64
+# error "Missing LIB_ARCH_TUPLE for SH64"
+#elif defined(__sh__)
+# define native_architecture() ARCHITECTURE_SH
+# if defined(__SH1__)
+# define LIB_ARCH_TUPLE "sh1-linux-gnu"
+# elif defined(__SH2__)
+# define LIB_ARCH_TUPLE "sh2-linux-gnu"
+# elif defined(__SH2A__)
+# define LIB_ARCH_TUPLE "sh2a-linux-gnu"
+# elif defined(__SH2E__)
+# define LIB_ARCH_TUPLE "sh2e-linux-gnu"
+# elif defined(__SH3__)
+# define LIB_ARCH_TUPLE "sh3-linux-gnu"
+# elif defined(__SH3E__)
+# define LIB_ARCH_TUPLE "sh3e-linux-gnu"
+# elif defined(__SH4__) && !defined(__SH4A__)
+# define LIB_ARCH_TUPLE "sh4-linux-gnu"
+# elif defined(__SH4A__)
+# define LIB_ARCH_TUPLE "sh4a-linux-gnu"
+# endif
+#elif defined(__m68k__)
+# define native_architecture() ARCHITECTURE_M68K
+# define LIB_ARCH_TUPLE "m68k-linux-gnu"
+#elif defined(__tilegx__)
+# define native_architecture() ARCHITECTURE_TILEGX
+# define LIB_ARCH_TUPLE "tilegx-linux-gnu"
+#elif defined(__cris__)
+# define native_architecture() ARCHITECTURE_CRIS
+# error "Missing LIB_ARCH_TUPLE for CRIS"
+#elif defined(__nios2__)
+# define native_architecture() ARCHITECTURE_NIOS2
+# define LIB_ARCH_TUPLE "nios2-linux-gnu"
+#elif defined(__riscv__) || defined(__riscv)
+ /* __riscv__ is obsolete, remove in 2018 */
+# if __SIZEOF_POINTER__ == 4
+# define native_architecture() ARCHITECTURE_RISCV32
+# define LIB_ARCH_TUPLE "riscv32-linux-gnu"
+# elif __SIZEOF_POINTER__ == 8
+# define native_architecture() ARCHITECTURE_RISCV64
+# define LIB_ARCH_TUPLE "riscv64-linux-gnu"
+# else
+# error "Unrecognized riscv architecture variant"
+# endif
+#elif defined(__arc__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define native_architecture() ARCHITECTURE_ARC_BE
+# define LIB_ARCH_TUPLE "arceb-linux"
+# else
+# define native_architecture() ARCHITECTURE_ARC
+# define LIB_ARCH_TUPLE "arc-linux"
+# endif
+#else
+# error "Please register your architecture here!"
+#endif
+
+const char *architecture_to_string(int a) _const_;
+int architecture_from_string(const char *s) _pure_;
diff --git a/src/basic/arphrd-list.c b/src/basic/arphrd-list.c
new file mode 100644
index 0000000..99048d2
--- /dev/null
+++ b/src/basic/arphrd-list.c
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <linux/if_arp.h>
+#include <string.h>
+
+#include "arphrd-list.h"
+#include "macro.h"
+
+static const struct arphrd_name* lookup_arphrd(register const char *str, register GPERF_LEN_TYPE len);
+
+#include "arphrd-from-name.h"
+#include "arphrd-to-name.h"
+
+int arphrd_from_name(const char *name) {
+ const struct arphrd_name *sc;
+
+ assert(name);
+
+ sc = lookup_arphrd(name, strlen(name));
+ if (!sc)
+ return -EINVAL;
+
+ return sc->id;
+}
diff --git a/src/basic/arphrd-list.h b/src/basic/arphrd-list.h
new file mode 100644
index 0000000..bc95b45
--- /dev/null
+++ b/src/basic/arphrd-list.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+const char *arphrd_to_name(int id);
+int arphrd_from_name(const char *name);
diff --git a/src/basic/arphrd-to-name.awk b/src/basic/arphrd-to-name.awk
new file mode 100644
index 0000000..db1c739
--- /dev/null
+++ b/src/basic/arphrd-to-name.awk
@@ -0,0 +1,12 @@
+BEGIN{
+ print "const char *arphrd_to_name(int id) {"
+ print " switch(id) {"
+}
+!/^HDLC$/ {
+ printf " case ARPHRD_%s: return \"%s\";\n", $1, $1
+}
+END{
+ print " default: return NULL;"
+ print " }"
+ print "}"
+}
diff --git a/src/basic/async.c b/src/basic/async.c
new file mode 100644
index 0000000..443cfa9
--- /dev/null
+++ b/src/basic/async.c
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <unistd.h>
+
+#include "async.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "util.h"
+
+int asynchronous_job(void* (*func)(void *p), void *arg) {
+ sigset_t ss, saved_ss;
+ pthread_attr_t a;
+ pthread_t t;
+ int r, k;
+
+ /* It kinda sucks that we have to resort to threads to implement an asynchronous close(), but well, such is
+ * life. */
+
+ r = pthread_attr_init(&a);
+ if (r > 0)
+ return -r;
+
+ r = pthread_attr_setdetachstate(&a, PTHREAD_CREATE_DETACHED);
+ if (r > 0) {
+ r = -r;
+ goto finish;
+ }
+
+ assert_se(sigfillset(&ss) >= 0);
+
+ /* Block all signals before forking off the thread, so that the new thread is started with all signals
+ * blocked. This way the existence of the new thread won't affect signal handling in other threads. */
+
+ r = pthread_sigmask(SIG_BLOCK, &ss, &saved_ss);
+ if (r > 0) {
+ r = -r;
+ goto finish;
+ }
+
+ r = pthread_create(&t, &a, func, arg);
+
+ k = pthread_sigmask(SIG_SETMASK, &saved_ss, NULL);
+
+ if (r > 0)
+ r = -r;
+ else if (k > 0)
+ r = -k;
+ else
+ r = 0;
+
+finish:
+ pthread_attr_destroy(&a);
+ return r;
+}
+
+int asynchronous_sync(pid_t *ret_pid) {
+ int r;
+
+ /* This forks off an invocation of fork() as a child process, in order to initiate synchronization to
+ * disk. Note that we implement this as helper process rather than thread as we don't want the sync() to hang our
+ * original process ever, and a thread would do that as the process can't exit with threads hanging in blocking
+ * syscalls. */
+
+ r = safe_fork("(sd-sync)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS, ret_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Child process */
+ (void) sync();
+ _exit(EXIT_SUCCESS);
+ }
+
+ return 0;
+}
+
+static void *close_thread(void *p) {
+ (void) pthread_setname_np(pthread_self(), "close");
+
+ assert_se(close_nointr(PTR_TO_FD(p)) != -EBADF);
+ return NULL;
+}
+
+int asynchronous_close(int fd) {
+ int r;
+
+ /* This is supposed to behave similar to safe_close(), but
+ * actually invoke close() asynchronously, so that it will
+ * never block. Ideally the kernel would have an API for this,
+ * but it doesn't, so we work around it, and hide this as a
+ * far away as we can. */
+
+ if (fd >= 0) {
+ PROTECT_ERRNO;
+
+ r = asynchronous_job(close_thread, FD_TO_PTR(fd));
+ if (r < 0)
+ assert_se(close_nointr(fd) != -EBADF);
+ }
+
+ return -1;
+}
diff --git a/src/basic/async.h b/src/basic/async.h
new file mode 100644
index 0000000..e0bbaa5
--- /dev/null
+++ b/src/basic/async.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+#include "macro.h"
+
+int asynchronous_job(void* (*func)(void *p), void *arg);
+
+int asynchronous_sync(pid_t *ret_pid);
+int asynchronous_close(int fd);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(int, asynchronous_close);
diff --git a/src/basic/audit-util.c b/src/basic/audit-util.c
new file mode 100644
index 0000000..1bf88b1
--- /dev/null
+++ b/src/basic/audit-util.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <linux/netlink.h>
+#include <stdio.h>
+#include <sys/socket.h>
+
+#include "alloc-util.h"
+#include "audit-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "user-util.h"
+
+int audit_session_from_pid(pid_t pid, uint32_t *id) {
+ _cleanup_free_ char *s = NULL;
+ const char *p;
+ uint32_t u;
+ int r;
+
+ assert(id);
+
+ /* We don't convert ENOENT to ESRCH here, since we can't
+ * really distinguish between "audit is not available in the
+ * kernel" and "the process does not exist", both which will
+ * result in ENOENT. */
+
+ p = procfs_file_alloca(pid, "sessionid");
+
+ r = read_one_line_file(p, &s);
+ if (r < 0)
+ return r;
+
+ r = safe_atou32(s, &u);
+ if (r < 0)
+ return r;
+
+ if (!audit_session_is_valid(u))
+ return -ENODATA;
+
+ *id = u;
+ return 0;
+}
+
+int audit_loginuid_from_pid(pid_t pid, uid_t *uid) {
+ _cleanup_free_ char *s = NULL;
+ const char *p;
+ uid_t u;
+ int r;
+
+ assert(uid);
+
+ p = procfs_file_alloca(pid, "loginuid");
+
+ r = read_one_line_file(p, &s);
+ if (r < 0)
+ return r;
+
+ r = parse_uid(s, &u);
+ if (r == -ENXIO) /* the UID was -1 */
+ return -ENODATA;
+ if (r < 0)
+ return r;
+
+ *uid = u;
+ return 0;
+}
+
+bool use_audit(void) {
+ static int cached_use = -1;
+
+ if (cached_use < 0) {
+ int fd;
+
+ fd = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_AUDIT);
+ if (fd < 0) {
+ cached_use = !IN_SET(errno, EAFNOSUPPORT, EPROTONOSUPPORT, EPERM);
+ if (!cached_use)
+ log_debug_errno(errno, "Won't talk to audit: %m");
+ } else {
+ cached_use = true;
+ safe_close(fd);
+ }
+ }
+
+ return cached_use;
+}
diff --git a/src/basic/audit-util.h b/src/basic/audit-util.h
new file mode 100644
index 0000000..aa21771
--- /dev/null
+++ b/src/basic/audit-util.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#define AUDIT_SESSION_INVALID ((uint32_t) -1)
+
+int audit_session_from_pid(pid_t pid, uint32_t *id);
+int audit_loginuid_from_pid(pid_t pid, uid_t *uid);
+
+bool use_audit(void);
+
+static inline bool audit_session_is_valid(uint32_t id) {
+ return id > 0 && id != AUDIT_SESSION_INVALID;
+}
diff --git a/src/basic/blockdev-util.c b/src/basic/blockdev-util.c
new file mode 100644
index 0000000..0f1e30c
--- /dev/null
+++ b/src/basic/blockdev-util.c
@@ -0,0 +1,253 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/file.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "blockdev-util.h"
+#include "btrfs-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "missing_magic.h"
+#include "parse-util.h"
+#include "stat-util.h"
+
+int block_get_whole_disk(dev_t d, dev_t *ret) {
+ char p[SYS_BLOCK_PATH_MAX("/partition")];
+ _cleanup_free_ char *s = NULL;
+ dev_t devt;
+ int r;
+
+ assert(ret);
+
+ if (major(d) == 0)
+ return -ENODEV;
+
+ /* If it has a queue this is good enough for us */
+ xsprintf_sys_block_path(p, "/queue", d);
+ if (access(p, F_OK) >= 0) {
+ *ret = d;
+ return 0;
+ }
+ if (errno != ENOENT)
+ return -errno;
+
+ /* If it is a partition find the originating device */
+ xsprintf_sys_block_path(p, "/partition", d);
+ if (access(p, F_OK) < 0)
+ return -errno;
+
+ /* Get parent dev_t */
+ xsprintf_sys_block_path(p, "/../dev", d);
+ r = read_one_line_file(p, &s);
+ if (r < 0)
+ return r;
+
+ r = parse_dev(s, &devt);
+ if (r < 0)
+ return r;
+
+ /* Only return this if it is really good enough for us. */
+ xsprintf_sys_block_path(p, "/queue", devt);
+ if (access(p, F_OK) < 0)
+ return -errno;
+
+ *ret = devt;
+ return 1;
+}
+
+int get_block_device(const char *path, dev_t *ret) {
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+ int r;
+
+ assert(path);
+ assert(ret);
+
+ /* Gets the block device directly backing a file system. If the block device is encrypted, returns
+ * the device mapper block device. */
+
+ fd = open(path, O_NOFOLLOW|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st))
+ return -errno;
+
+ if (major(st.st_dev) != 0) {
+ *ret = st.st_dev;
+ return 1;
+ }
+
+ r = btrfs_get_block_device_fd(fd, ret);
+ if (r > 0)
+ return 1;
+ if (r != -ENOTTY) /* not btrfs */
+ return r;
+
+ *ret = 0;
+ return 0;
+}
+
+int block_get_originating(dev_t dt, dev_t *ret) {
+ _cleanup_closedir_ DIR *d = NULL;
+ _cleanup_free_ char *t = NULL;
+ char p[SYS_BLOCK_PATH_MAX("/slaves")];
+ struct dirent *de, *found = NULL;
+ const char *q;
+ dev_t devt;
+ int r;
+
+ /* For the specified block device tries to chase it through the layers, in case LUKS-style DM stacking is used,
+ * trying to find the next underlying layer. */
+
+ xsprintf_sys_block_path(p, "/slaves", dt);
+ d = opendir(p);
+ if (!d)
+ return -errno;
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (!IN_SET(de->d_type, DT_LNK, DT_UNKNOWN))
+ continue;
+
+ if (found) {
+ _cleanup_free_ char *u = NULL, *v = NULL, *a = NULL, *b = NULL;
+
+ /* We found a device backed by multiple other devices. We don't really support automatic
+ * discovery on such setups, with the exception of dm-verity partitions. In this case there are
+ * two backing devices: the data partition and the hash partition. We are fine with such
+ * setups, however, only if both partitions are on the same physical device. Hence, let's
+ * verify this. */
+
+ u = path_join(p, de->d_name, "../dev");
+ if (!u)
+ return -ENOMEM;
+
+ v = path_join(p, found->d_name, "../dev");
+ if (!v)
+ return -ENOMEM;
+
+ r = read_one_line_file(u, &a);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read %s: %m", u);
+
+ r = read_one_line_file(v, &b);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read %s: %m", v);
+
+ /* Check if the parent device is the same. If not, then the two backing devices are on
+ * different physical devices, and we don't support that. */
+ if (!streq(a, b))
+ return -ENOTUNIQ;
+ }
+
+ found = de;
+ }
+
+ if (!found)
+ return -ENOENT;
+
+ q = strjoina(p, "/", found->d_name, "/dev");
+
+ r = read_one_line_file(q, &t);
+ if (r < 0)
+ return r;
+
+ r = parse_dev(t, &devt);
+ if (r < 0)
+ return -EINVAL;
+
+ if (major(devt) == 0)
+ return -ENOENT;
+
+ *ret = devt;
+ return 1;
+}
+
+int get_block_device_harder(const char *path, dev_t *ret) {
+ int r;
+
+ assert(path);
+ assert(ret);
+
+ /* Gets the backing block device for a file system, and handles LUKS encrypted file systems, looking for its
+ * immediate parent, if there is one. */
+
+ r = get_block_device(path, ret);
+ if (r <= 0)
+ return r;
+
+ r = block_get_originating(*ret, ret);
+ if (r < 0)
+ log_debug_errno(r, "Failed to chase block device '%s', ignoring: %m", path);
+
+ return 1;
+}
+
+int lock_whole_block_device(dev_t devt, int operation) {
+ _cleanup_free_ char *whole_node = NULL;
+ _cleanup_close_ int lock_fd = -1;
+ dev_t whole_devt;
+ int r;
+
+ /* Let's get a BSD file lock on the whole block device, as per: https://systemd.io/BLOCK_DEVICE_LOCKING */
+
+ r = block_get_whole_disk(devt, &whole_devt);
+ if (r < 0)
+ return r;
+
+ r = device_path_make_major_minor(S_IFBLK, whole_devt, &whole_node);
+ if (r < 0)
+ return r;
+
+ lock_fd = open(whole_node, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ if (lock_fd < 0)
+ return -errno;
+
+ if (flock(lock_fd, operation) < 0)
+ return -errno;
+
+ return TAKE_FD(lock_fd);
+}
+
+int blockdev_partscan_enabled(int fd) {
+ _cleanup_free_ char *p = NULL, *buf = NULL;
+ unsigned long long ull;
+ struct stat st;
+ int r;
+
+ /* Checks if partition scanning is correctly enabled on the block device */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTBLK;
+
+ if (asprintf(&p, "/sys/dev/block/%u:%u/capability", major(st.st_rdev), minor(st.st_rdev)) < 0)
+ return -ENOMEM;
+
+ r = read_one_line_file(p, &buf);
+ if (r == -ENOENT) /* If the capability file doesn't exist then we are most likely looking at a
+ * partition block device, not the whole block device. And that means we have no
+ * partition scanning on for it (we do for its parent, but not for the partition
+ * itself). */
+ return false;
+ if (r < 0)
+ return r;
+
+ r = safe_atollu_full(buf, 16, &ull);
+ if (r < 0)
+ return r;
+
+#ifndef GENHD_FL_NO_PART_SCAN
+#define GENHD_FL_NO_PART_SCAN (0x0200)
+#endif
+
+ return !FLAGS_SET(ull, GENHD_FL_NO_PART_SCAN);
+}
diff --git a/src/basic/blockdev-util.h b/src/basic/blockdev-util.h
new file mode 100644
index 0000000..10048ff
--- /dev/null
+++ b/src/basic/blockdev-util.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+#include "macro.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+#define SYS_BLOCK_PATH_MAX(suffix) \
+ (STRLEN("/sys/dev/block/") + DECIMAL_STR_MAX(dev_t) + 1 + DECIMAL_STR_MAX(dev_t) + strlen_ptr(suffix))
+#define xsprintf_sys_block_path(buf, suffix, devno) \
+ xsprintf(buf, "/sys/dev/block/%u:%u%s", major(devno), minor(devno), strempty(suffix))
+
+int block_get_whole_disk(dev_t d, dev_t *ret);
+int block_get_originating(dev_t d, dev_t *ret);
+
+int get_block_device(const char *path, dev_t *dev);
+
+int get_block_device_harder(const char *path, dev_t *dev);
+
+int lock_whole_block_device(dev_t devt, int operation);
+
+int blockdev_partscan_enabled(int fd);
diff --git a/src/basic/btrfs-util.c b/src/basic/btrfs-util.c
new file mode 100644
index 0000000..2634659
--- /dev/null
+++ b/src/basic/btrfs-util.c
@@ -0,0 +1,2021 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <linux/btrfs_tree.h>
+#include <linux/fs.h>
+#include <linux/loop.h>
+#include <linux/magic.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/sysmacros.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "blockdev-util.h"
+#include "btrfs-util.h"
+#include "chattr-util.h"
+#include "copy.h"
+#include "device-nodes.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "macro.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "smack-util.h"
+#include "sparse-endian.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "util.h"
+
+/* WARNING: Be careful with file system ioctls! When we get an fd, we
+ * need to make sure it either refers to only a regular file or
+ * directory, or that it is located on btrfs, before invoking any
+ * btrfs ioctls. The ioctl numbers are reused by some device drivers
+ * (such as DRM), and hence might have bad effects when invoked on
+ * device nodes (that reference drivers) rather than fds to normal
+ * files or directories. */
+
+static int validate_subvolume_name(const char *name) {
+
+ if (!filename_is_valid(name))
+ return -EINVAL;
+
+ if (strlen(name) > BTRFS_SUBVOL_NAME_MAX)
+ return -E2BIG;
+
+ return 0;
+}
+
+static int extract_subvolume_name(const char *path, const char **subvolume) {
+ const char *fn;
+ int r;
+
+ assert(path);
+ assert(subvolume);
+
+ fn = basename(path);
+
+ r = validate_subvolume_name(fn);
+ if (r < 0)
+ return r;
+
+ *subvolume = fn;
+ return 0;
+}
+
+int btrfs_is_filesystem(int fd) {
+ struct statfs sfs;
+
+ assert(fd >= 0);
+
+ if (fstatfs(fd, &sfs) < 0)
+ return -errno;
+
+ return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC);
+}
+
+int btrfs_is_subvol_fd(int fd) {
+ struct stat st;
+
+ assert(fd >= 0);
+
+ /* On btrfs subvolumes always have the inode 256 */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
+ return 0;
+
+ return btrfs_is_filesystem(fd);
+}
+
+int btrfs_is_subvol(const char *path) {
+ _cleanup_close_ int fd = -1;
+
+ assert(path);
+
+ fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_is_subvol_fd(fd);
+}
+
+int btrfs_subvol_make_fd(int fd, const char *subvolume) {
+ struct btrfs_ioctl_vol_args args = {};
+ _cleanup_close_ int real_fd = -1;
+ int r;
+
+ assert(subvolume);
+
+ r = validate_subvolume_name(subvolume);
+ if (r < 0)
+ return r;
+
+ r = fcntl(fd, F_GETFL);
+ if (r < 0)
+ return -errno;
+ if (FLAGS_SET(r, O_PATH)) {
+ /* An O_PATH fd was specified, let's convert here to a proper one, as btrfs ioctl's can't deal with
+ * O_PATH. */
+
+ real_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (real_fd < 0)
+ return real_fd;
+
+ fd = real_fd;
+ }
+
+ strncpy(args.name, subvolume, sizeof(args.name)-1);
+
+ if (ioctl(fd, BTRFS_IOC_SUBVOL_CREATE, &args) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_subvol_make(const char *path) {
+ _cleanup_close_ int fd = -1;
+ const char *subvolume;
+ int r;
+
+ assert(path);
+
+ r = extract_subvolume_name(path, &subvolume);
+ if (r < 0)
+ return r;
+
+ fd = open_parent(path, O_CLOEXEC, 0);
+ if (fd < 0)
+ return fd;
+
+ return btrfs_subvol_make_fd(fd, subvolume);
+}
+
+int btrfs_subvol_make_fallback(const char *path, mode_t mode) {
+ mode_t old, combined;
+ int r;
+
+ assert(path);
+
+ /* Let's work like mkdir(), i.e. take the specified mode, and mask it with the current umask. */
+ old = umask(~mode);
+ combined = old | ~mode;
+ if (combined != ~mode)
+ umask(combined);
+ r = btrfs_subvol_make(path);
+ umask(old);
+
+ if (r >= 0)
+ return 1; /* subvol worked */
+ if (r != -ENOTTY)
+ return r;
+
+ if (mkdir(path, mode) < 0)
+ return -errno;
+
+ return 0; /* plain directory */
+}
+
+int btrfs_subvol_set_read_only_fd(int fd, bool b) {
+ uint64_t flags, nflags;
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
+ return -EINVAL;
+
+ if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
+ return -errno;
+
+ nflags = UPDATE_FLAG(flags, BTRFS_SUBVOL_RDONLY, b);
+ if (flags == nflags)
+ return 0;
+
+ if (ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &nflags) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_subvol_set_read_only(const char *path, bool b) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_subvol_set_read_only_fd(fd, b);
+}
+
+int btrfs_subvol_get_read_only_fd(int fd) {
+ uint64_t flags;
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
+ return -EINVAL;
+
+ if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
+ return -errno;
+
+ return !!(flags & BTRFS_SUBVOL_RDONLY);
+}
+
+int btrfs_reflink(int infd, int outfd) {
+ int r;
+
+ assert(infd >= 0);
+ assert(outfd >= 0);
+
+ /* Make sure we invoke the ioctl on a regular file, so that no device driver accidentally gets it. */
+
+ r = fd_verify_regular(outfd);
+ if (r < 0)
+ return r;
+
+ if (ioctl(outfd, BTRFS_IOC_CLONE, infd) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_clone_range(int infd, uint64_t in_offset, int outfd, uint64_t out_offset, uint64_t sz) {
+ struct btrfs_ioctl_clone_range_args args = {
+ .src_fd = infd,
+ .src_offset = in_offset,
+ .src_length = sz,
+ .dest_offset = out_offset,
+ };
+ int r;
+
+ assert(infd >= 0);
+ assert(outfd >= 0);
+ assert(sz > 0);
+
+ r = fd_verify_regular(outfd);
+ if (r < 0)
+ return r;
+
+ if (ioctl(outfd, BTRFS_IOC_CLONE_RANGE, &args) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_get_block_device_fd(int fd, dev_t *dev) {
+ struct btrfs_ioctl_fs_info_args fsi = {};
+ uint64_t id;
+ int r;
+
+ assert(fd >= 0);
+ assert(dev);
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+
+ if (ioctl(fd, BTRFS_IOC_FS_INFO, &fsi) < 0)
+ return -errno;
+
+ /* We won't do this for btrfs RAID */
+ if (fsi.num_devices != 1) {
+ *dev = 0;
+ return 0;
+ }
+
+ for (id = 1; id <= fsi.max_id; id++) {
+ struct btrfs_ioctl_dev_info_args di = {
+ .devid = id,
+ };
+ struct stat st;
+
+ if (ioctl(fd, BTRFS_IOC_DEV_INFO, &di) < 0) {
+ if (errno == ENODEV)
+ continue;
+
+ return -errno;
+ }
+
+ /* For the root fs — when no initrd is involved — btrfs returns /dev/root on any kernels from
+ * the past few years. That sucks, as we have no API to determine the actual root then. let's
+ * return an recognizable error for this case, so that the caller can maybe print a nice
+ * message about this.
+ *
+ * https://bugzilla.kernel.org/show_bug.cgi?id=89721 */
+ if (path_equal((char*) di.path, "/dev/root"))
+ return -EUCLEAN;
+
+ if (stat((char*) di.path, &st) < 0)
+ return -errno;
+
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTBLK;
+
+ if (major(st.st_rdev) == 0)
+ return -ENODEV;
+
+ *dev = st.st_rdev;
+ return 1;
+ }
+
+ return -ENODEV;
+}
+
+int btrfs_get_block_device(const char *path, dev_t *dev) {
+ _cleanup_close_ int fd = -1;
+
+ assert(path);
+ assert(dev);
+
+ fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_get_block_device_fd(fd, dev);
+}
+
+int btrfs_subvol_get_id_fd(int fd, uint64_t *ret) {
+ struct btrfs_ioctl_ino_lookup_args args = {
+ .objectid = BTRFS_FIRST_FREE_OBJECTID
+ };
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+
+ if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args) < 0)
+ return -errno;
+
+ *ret = args.treeid;
+ return 0;
+}
+
+int btrfs_subvol_get_id(int fd, const char *subvol, uint64_t *ret) {
+ _cleanup_close_ int subvol_fd = -1;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ subvol_fd = openat(fd, subvol, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (subvol_fd < 0)
+ return -errno;
+
+ return btrfs_subvol_get_id_fd(subvol_fd, ret);
+}
+
+static bool btrfs_ioctl_search_args_inc(struct btrfs_ioctl_search_args *args) {
+ assert(args);
+
+ /* the objectid, type, offset together make up the btrfs key,
+ * which is considered a single 136byte integer when
+ * comparing. This call increases the counter by one, dealing
+ * with the overflow between the overflows */
+
+ if (args->key.min_offset < (uint64_t) -1) {
+ args->key.min_offset++;
+ return true;
+ }
+
+ if (args->key.min_type < (uint8_t) -1) {
+ args->key.min_type++;
+ args->key.min_offset = 0;
+ return true;
+ }
+
+ if (args->key.min_objectid < (uint64_t) -1) {
+ args->key.min_objectid++;
+ args->key.min_offset = 0;
+ args->key.min_type = 0;
+ return true;
+ }
+
+ return 0;
+}
+
+static void btrfs_ioctl_search_args_set(struct btrfs_ioctl_search_args *args, const struct btrfs_ioctl_search_header *h) {
+ assert(args);
+ assert(h);
+
+ args->key.min_objectid = h->objectid;
+ args->key.min_type = h->type;
+ args->key.min_offset = h->offset;
+}
+
+static int btrfs_ioctl_search_args_compare(const struct btrfs_ioctl_search_args *args) {
+ int r;
+
+ assert(args);
+
+ /* Compare min and max */
+
+ r = CMP(args->key.min_objectid, args->key.max_objectid);
+ if (r != 0)
+ return r;
+
+ r = CMP(args->key.min_type, args->key.max_type);
+ if (r != 0)
+ return r;
+
+ return CMP(args->key.min_offset, args->key.max_offset);
+}
+
+#define FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) \
+ for ((i) = 0, \
+ (sh) = (const struct btrfs_ioctl_search_header*) (args).buf; \
+ (i) < (args).key.nr_items; \
+ (i)++, \
+ (sh) = (const struct btrfs_ioctl_search_header*) ((uint8_t*) (sh) + sizeof(struct btrfs_ioctl_search_header) + (sh)->len))
+
+#define BTRFS_IOCTL_SEARCH_HEADER_BODY(sh) \
+ ((void*) ((uint8_t*) sh + sizeof(struct btrfs_ioctl_search_header)))
+
+int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *ret) {
+ struct btrfs_ioctl_search_args args = {
+ /* Tree of tree roots */
+ .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
+
+ /* Look precisely for the subvolume items */
+ .key.min_type = BTRFS_ROOT_ITEM_KEY,
+ .key.max_type = BTRFS_ROOT_ITEM_KEY,
+
+ .key.min_offset = 0,
+ .key.max_offset = (uint64_t) -1,
+
+ /* No restrictions on the other components */
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+
+ bool found = false;
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (subvol_id == 0) {
+ r = btrfs_subvol_get_id_fd(fd, &subvol_id);
+ if (r < 0)
+ return r;
+ } else {
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+ }
+
+ args.key.min_objectid = args.key.max_objectid = subvol_id;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
+ return -errno;
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+
+ const struct btrfs_root_item *ri;
+
+ /* Make sure we start the next search at least from this entry */
+ btrfs_ioctl_search_args_set(&args, sh);
+
+ if (sh->objectid != subvol_id)
+ continue;
+ if (sh->type != BTRFS_ROOT_ITEM_KEY)
+ continue;
+
+ /* Older versions of the struct lacked the otime setting */
+ if (sh->len < offsetof(struct btrfs_root_item, otime) + sizeof(struct btrfs_timespec))
+ continue;
+
+ ri = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
+
+ ret->otime = (usec_t) le64toh(ri->otime.sec) * USEC_PER_SEC +
+ (usec_t) le32toh(ri->otime.nsec) / NSEC_PER_USEC;
+
+ ret->subvol_id = subvol_id;
+ ret->read_only = le64toh(ri->flags) & BTRFS_ROOT_SUBVOL_RDONLY;
+
+ assert_cc(sizeof(ri->uuid) == sizeof(ret->uuid));
+ memcpy(&ret->uuid, ri->uuid, sizeof(ret->uuid));
+ memcpy(&ret->parent_uuid, ri->parent_uuid, sizeof(ret->parent_uuid));
+
+ found = true;
+ goto finish;
+ }
+
+ /* Increase search key by one, to read the next item, if we can. */
+ if (!btrfs_ioctl_search_args_inc(&args))
+ break;
+ }
+
+finish:
+ if (!found)
+ return -ENODATA;
+
+ return 0;
+}
+
+int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
+
+ struct btrfs_ioctl_search_args args = {
+ /* Tree of quota items */
+ .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
+
+ /* The object ID is always 0 */
+ .key.min_objectid = 0,
+ .key.max_objectid = 0,
+
+ /* Look precisely for the quota items */
+ .key.min_type = BTRFS_QGROUP_STATUS_KEY,
+ .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
+
+ /* No restrictions on the other components */
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+
+ bool found_info = false, found_limit = false;
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (qgroupid == 0) {
+ r = btrfs_subvol_get_id_fd(fd, &qgroupid);
+ if (r < 0)
+ return r;
+ } else {
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+ }
+
+ args.key.min_offset = args.key.max_offset = qgroupid;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
+ if (errno == ENOENT) /* quota tree is missing: quota disabled */
+ break;
+
+ return -errno;
+ }
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+
+ /* Make sure we start the next search at least from this entry */
+ btrfs_ioctl_search_args_set(&args, sh);
+
+ if (sh->objectid != 0)
+ continue;
+ if (sh->offset != qgroupid)
+ continue;
+
+ if (sh->type == BTRFS_QGROUP_INFO_KEY) {
+ const struct btrfs_qgroup_info_item *qii = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
+
+ ret->referenced = le64toh(qii->rfer);
+ ret->exclusive = le64toh(qii->excl);
+
+ found_info = true;
+
+ } else if (sh->type == BTRFS_QGROUP_LIMIT_KEY) {
+ const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
+
+ if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_RFER)
+ ret->referenced_max = le64toh(qli->max_rfer);
+ else
+ ret->referenced_max = (uint64_t) -1;
+
+ if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_EXCL)
+ ret->exclusive_max = le64toh(qli->max_excl);
+ else
+ ret->exclusive_max = (uint64_t) -1;
+
+ found_limit = true;
+ }
+
+ if (found_info && found_limit)
+ goto finish;
+ }
+
+ /* Increase search key by one, to read the next item, if we can. */
+ if (!btrfs_ioctl_search_args_inc(&args))
+ break;
+ }
+
+finish:
+ if (!found_limit && !found_info)
+ return -ENODATA;
+
+ if (!found_info) {
+ ret->referenced = (uint64_t) -1;
+ ret->exclusive = (uint64_t) -1;
+ }
+
+ if (!found_limit) {
+ ret->referenced_max = (uint64_t) -1;
+ ret->exclusive_max = (uint64_t) -1;
+ }
+
+ return 0;
+}
+
+int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
+}
+
+int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret) {
+ uint64_t level, lowest = (uint64_t) -1, lowest_qgroupid = 0;
+ _cleanup_free_ uint64_t *qgroups = NULL;
+ int r, n, i;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ /* This finds the "subtree" qgroup for a specific
+ * subvolume. This only works for subvolumes that have been
+ * prepared with btrfs_subvol_auto_qgroup_fd() with
+ * insert_intermediary_qgroup=true (or equivalent). For others
+ * it will return the leaf qgroup instead. The two cases may
+ * be distuingished via the return value, which is 1 in case
+ * an appropriate "subtree" qgroup was found, and 0
+ * otherwise. */
+
+ if (subvol_id == 0) {
+ r = btrfs_subvol_get_id_fd(fd, &subvol_id);
+ if (r < 0)
+ return r;
+ }
+
+ r = btrfs_qgroupid_split(subvol_id, &level, NULL);
+ if (r < 0)
+ return r;
+ if (level != 0) /* Input must be a leaf qgroup */
+ return -EINVAL;
+
+ n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
+ if (n < 0)
+ return n;
+
+ for (i = 0; i < n; i++) {
+ uint64_t id;
+
+ r = btrfs_qgroupid_split(qgroups[i], &level, &id);
+ if (r < 0)
+ return r;
+
+ if (id != subvol_id)
+ continue;
+
+ if (lowest == (uint64_t) -1 || level < lowest) {
+ lowest_qgroupid = qgroups[i];
+ lowest = level;
+ }
+ }
+
+ if (lowest == (uint64_t) -1) {
+ /* No suitable higher-level qgroup found, let's return
+ * the leaf qgroup instead, and indicate that with the
+ * return value. */
+
+ *ret = subvol_id;
+ return 0;
+ }
+
+ *ret = lowest_qgroupid;
+ return 1;
+}
+
+int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
+ uint64_t qgroupid;
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ /* This determines the quota data of the qgroup with the
+ * lowest level, that shares the id part with the specified
+ * subvolume. This is useful for determining the quota data
+ * for entire subvolume subtrees, as long as the subtrees have
+ * been set up with btrfs_qgroup_subvol_auto_fd() or in a
+ * compatible way */
+
+ r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
+ if (r < 0)
+ return r;
+
+ return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
+}
+
+int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_subvol_get_subtree_quota_fd(fd, subvol_id, ret);
+}
+
+int btrfs_defrag_fd(int fd) {
+ int r;
+
+ assert(fd >= 0);
+
+ r = fd_verify_regular(fd);
+ if (r < 0)
+ return r;
+
+ if (ioctl(fd, BTRFS_IOC_DEFRAG, NULL) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_defrag(const char *p) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_defrag_fd(fd);
+}
+
+int btrfs_quota_enable_fd(int fd, bool b) {
+ struct btrfs_ioctl_quota_ctl_args args = {
+ .cmd = b ? BTRFS_QUOTA_CTL_ENABLE : BTRFS_QUOTA_CTL_DISABLE,
+ };
+ int r;
+
+ assert(fd >= 0);
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+
+ if (ioctl(fd, BTRFS_IOC_QUOTA_CTL, &args) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_quota_enable(const char *path, bool b) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_quota_enable_fd(fd, b);
+}
+
+int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max) {
+
+ struct btrfs_ioctl_qgroup_limit_args args = {
+ .lim.max_rfer = referenced_max,
+ .lim.flags = BTRFS_QGROUP_LIMIT_MAX_RFER,
+ };
+ unsigned c;
+ int r;
+
+ assert(fd >= 0);
+
+ if (qgroupid == 0) {
+ r = btrfs_subvol_get_id_fd(fd, &qgroupid);
+ if (r < 0)
+ return r;
+ } else {
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+ }
+
+ args.qgroupid = qgroupid;
+
+ for (c = 0;; c++) {
+ if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &args) < 0) {
+
+ if (errno == EBUSY && c < 10) {
+ (void) btrfs_quota_scan_wait(fd);
+ continue;
+ }
+
+ return -errno;
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
+}
+
+int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max) {
+ uint64_t qgroupid;
+ int r;
+
+ assert(fd >= 0);
+
+ r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
+ if (r < 0)
+ return r;
+
+ return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
+}
+
+int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_subvol_set_subtree_quota_limit_fd(fd, subvol_id, referenced_max);
+}
+
+int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret) {
+ assert(ret);
+
+ if (level >= (UINT64_C(1) << (64 - BTRFS_QGROUP_LEVEL_SHIFT)))
+ return -EINVAL;
+
+ if (id >= (UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT))
+ return -EINVAL;
+
+ *ret = (level << BTRFS_QGROUP_LEVEL_SHIFT) | id;
+ return 0;
+}
+
+int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id) {
+ assert(level || id);
+
+ if (level)
+ *level = qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
+
+ if (id)
+ *id = qgroupid & ((UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT) - 1);
+
+ return 0;
+}
+
+static int qgroup_create_or_destroy(int fd, bool b, uint64_t qgroupid) {
+
+ struct btrfs_ioctl_qgroup_create_args args = {
+ .create = b,
+ .qgroupid = qgroupid,
+ };
+ unsigned c;
+ int r;
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENOTTY;
+
+ for (c = 0;; c++) {
+ if (ioctl(fd, BTRFS_IOC_QGROUP_CREATE, &args) < 0) {
+
+ /* On old kernels if quota is not enabled, we get EINVAL. On newer kernels we get
+ * ENOTCONN. Let's always convert this to ENOTCONN to make this recognizable
+ * everywhere the same way. */
+
+ if (IN_SET(errno, EINVAL, ENOTCONN))
+ return -ENOTCONN;
+
+ if (errno == EBUSY && c < 10) {
+ (void) btrfs_quota_scan_wait(fd);
+ continue;
+ }
+
+ return -errno;
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+int btrfs_qgroup_create(int fd, uint64_t qgroupid) {
+ return qgroup_create_or_destroy(fd, true, qgroupid);
+}
+
+int btrfs_qgroup_destroy(int fd, uint64_t qgroupid) {
+ return qgroup_create_or_destroy(fd, false, qgroupid);
+}
+
+int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid) {
+ _cleanup_free_ uint64_t *qgroups = NULL;
+ uint64_t subvol_id;
+ int i, n, r;
+
+ /* Destroys the specified qgroup, but unassigns it from all
+ * its parents first. Also, it recursively destroys all
+ * qgroups it is assigned to that have the same id part of the
+ * qgroupid as the specified group. */
+
+ r = btrfs_qgroupid_split(qgroupid, NULL, &subvol_id);
+ if (r < 0)
+ return r;
+
+ n = btrfs_qgroup_find_parents(fd, qgroupid, &qgroups);
+ if (n < 0)
+ return n;
+
+ for (i = 0; i < n; i++) {
+ uint64_t id;
+
+ r = btrfs_qgroupid_split(qgroups[i], NULL, &id);
+ if (r < 0)
+ return r;
+
+ r = btrfs_qgroup_unassign(fd, qgroupid, qgroups[i]);
+ if (r < 0)
+ return r;
+
+ if (id != subvol_id)
+ continue;
+
+ /* The parent qgroupid shares the same id part with
+ * us? If so, destroy it too. */
+
+ (void) btrfs_qgroup_destroy_recursive(fd, qgroups[i]);
+ }
+
+ return btrfs_qgroup_destroy(fd, qgroupid);
+}
+
+int btrfs_quota_scan_start(int fd) {
+ struct btrfs_ioctl_quota_rescan_args args = {};
+
+ assert(fd >= 0);
+
+ if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN, &args) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_quota_scan_wait(int fd) {
+ assert(fd >= 0);
+
+ if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int btrfs_quota_scan_ongoing(int fd) {
+ struct btrfs_ioctl_quota_rescan_args args = {};
+
+ assert(fd >= 0);
+
+ if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_STATUS, &args) < 0)
+ return -errno;
+
+ return !!args.flags;
+}
+
+static int qgroup_assign_or_unassign(int fd, bool b, uint64_t child, uint64_t parent) {
+ struct btrfs_ioctl_qgroup_assign_args args = {
+ .assign = b,
+ .src = child,
+ .dst = parent,
+ };
+ unsigned c;
+ int r;
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENOTTY;
+
+ for (c = 0;; c++) {
+ r = ioctl(fd, BTRFS_IOC_QGROUP_ASSIGN, &args);
+ if (r < 0) {
+ if (errno == EBUSY && c < 10) {
+ (void) btrfs_quota_scan_wait(fd);
+ continue;
+ }
+
+ return -errno;
+ }
+
+ if (r == 0)
+ return 0;
+
+ /* If the return value is > 0, we need to request a rescan */
+
+ (void) btrfs_quota_scan_start(fd);
+ return 1;
+ }
+}
+
+int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent) {
+ return qgroup_assign_or_unassign(fd, true, child, parent);
+}
+
+int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent) {
+ return qgroup_assign_or_unassign(fd, false, child, parent);
+}
+
+static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, BtrfsRemoveFlags flags) {
+ struct btrfs_ioctl_search_args args = {
+ .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
+
+ .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
+ .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
+
+ .key.min_type = BTRFS_ROOT_BACKREF_KEY,
+ .key.max_type = BTRFS_ROOT_BACKREF_KEY,
+
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+
+ struct btrfs_ioctl_vol_args vol_args = {};
+ _cleanup_close_ int subvol_fd = -1;
+ struct stat st;
+ bool made_writable = false;
+ int r;
+
+ assert(fd >= 0);
+ assert(subvolume);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode))
+ return -EINVAL;
+
+ subvol_fd = openat(fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (subvol_fd < 0)
+ return -errno;
+
+ /* Let's check if this is actually a subvolume. Note that this is mostly redundant, as BTRFS_IOC_SNAP_DESTROY
+ * would fail anyway if it is not. However, it's a good thing to check this ahead of time so that we can return
+ * ENOTTY unconditionally in this case. This is different from the ioctl() which will return EPERM/EACCES if we
+ * don't have the privileges to remove subvolumes, regardless if the specified directory is actually a
+ * subvolume or not. In order to make it easy for callers to cover the "this is not a btrfs subvolume" case
+ * let's prefer ENOTTY over EPERM/EACCES though. */
+ r = btrfs_is_subvol_fd(subvol_fd);
+ if (r < 0)
+ return r;
+ if (r == 0) /* Not a btrfs subvolume */
+ return -ENOTTY;
+
+ if (subvol_id == 0) {
+ r = btrfs_subvol_get_id_fd(subvol_fd, &subvol_id);
+ if (r < 0)
+ return r;
+ }
+
+ /* First, try to remove the subvolume. If it happens to be
+ * already empty, this will just work. */
+ strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
+ if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) >= 0) {
+ (void) btrfs_qgroup_destroy_recursive(fd, subvol_id); /* for the leaf subvolumes, the qgroup id is identical to the subvol id */
+ return 0;
+ }
+ if (!(flags & BTRFS_REMOVE_RECURSIVE) || errno != ENOTEMPTY)
+ return -errno;
+
+ /* OK, the subvolume is not empty, let's look for child
+ * subvolumes, and remove them, first */
+
+ args.key.min_offset = args.key.max_offset = subvol_id;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
+ return -errno;
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+ _cleanup_free_ char *p = NULL;
+ const struct btrfs_root_ref *ref;
+
+ btrfs_ioctl_search_args_set(&args, sh);
+
+ if (sh->type != BTRFS_ROOT_BACKREF_KEY)
+ continue;
+ if (sh->offset != subvol_id)
+ continue;
+
+ ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
+
+ p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
+ if (!p)
+ return -ENOMEM;
+
+ struct btrfs_ioctl_ino_lookup_args ino_args = {
+ .treeid = subvol_id,
+ .objectid = htole64(ref->dirid),
+ };
+
+ if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
+ return -errno;
+
+ if (!made_writable) {
+ r = btrfs_subvol_set_read_only_fd(subvol_fd, false);
+ if (r < 0)
+ return r;
+
+ made_writable = true;
+ }
+
+ if (isempty(ino_args.name))
+ /* Subvolume is in the top-level
+ * directory of the subvolume. */
+ r = subvol_remove_children(subvol_fd, p, sh->objectid, flags);
+ else {
+ _cleanup_close_ int child_fd = -1;
+
+ /* Subvolume is somewhere further down,
+ * hence we need to open the
+ * containing directory first */
+
+ child_fd = openat(subvol_fd, ino_args.name, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (child_fd < 0)
+ return -errno;
+
+ r = subvol_remove_children(child_fd, p, sh->objectid, flags);
+ }
+ if (r < 0)
+ return r;
+ }
+
+ /* Increase search key by one, to read the next item, if we can. */
+ if (!btrfs_ioctl_search_args_inc(&args))
+ break;
+ }
+
+ /* OK, the child subvolumes should all be gone now, let's try
+ * again to remove the subvolume */
+ if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) < 0)
+ return -errno;
+
+ (void) btrfs_qgroup_destroy_recursive(fd, subvol_id);
+ return 0;
+}
+
+int btrfs_subvol_remove(const char *path, BtrfsRemoveFlags flags) {
+ _cleanup_close_ int fd = -1;
+ const char *subvolume;
+ int r;
+
+ assert(path);
+
+ r = extract_subvolume_name(path, &subvolume);
+ if (r < 0)
+ return r;
+
+ fd = open_parent(path, O_CLOEXEC, 0);
+ if (fd < 0)
+ return fd;
+
+ return subvol_remove_children(fd, subvolume, 0, flags);
+}
+
+int btrfs_subvol_remove_fd(int fd, const char *subvolume, BtrfsRemoveFlags flags) {
+ return subvol_remove_children(fd, subvolume, 0, flags);
+}
+
+int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid) {
+
+ struct btrfs_ioctl_search_args args = {
+ /* Tree of quota items */
+ .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
+
+ /* The object ID is always 0 */
+ .key.min_objectid = 0,
+ .key.max_objectid = 0,
+
+ /* Look precisely for the quota items */
+ .key.min_type = BTRFS_QGROUP_LIMIT_KEY,
+ .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
+
+ /* For our qgroup */
+ .key.min_offset = old_qgroupid,
+ .key.max_offset = old_qgroupid,
+
+ /* No restrictions on the other components */
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+
+ int r;
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
+ if (errno == ENOENT) /* quota tree missing: quota is not enabled, hence nothing to copy */
+ break;
+
+ return -errno;
+ }
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+ const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
+ struct btrfs_ioctl_qgroup_limit_args qargs;
+ unsigned c;
+
+ /* Make sure we start the next search at least from this entry */
+ btrfs_ioctl_search_args_set(&args, sh);
+
+ if (sh->objectid != 0)
+ continue;
+ if (sh->type != BTRFS_QGROUP_LIMIT_KEY)
+ continue;
+ if (sh->offset != old_qgroupid)
+ continue;
+
+ /* We found the entry, now copy things over. */
+
+ qargs = (struct btrfs_ioctl_qgroup_limit_args) {
+ .qgroupid = new_qgroupid,
+
+ .lim.max_rfer = le64toh(qli->max_rfer),
+ .lim.max_excl = le64toh(qli->max_excl),
+ .lim.rsv_rfer = le64toh(qli->rsv_rfer),
+ .lim.rsv_excl = le64toh(qli->rsv_excl),
+
+ .lim.flags = le64toh(qli->flags) & (BTRFS_QGROUP_LIMIT_MAX_RFER|
+ BTRFS_QGROUP_LIMIT_MAX_EXCL|
+ BTRFS_QGROUP_LIMIT_RSV_RFER|
+ BTRFS_QGROUP_LIMIT_RSV_EXCL),
+ };
+
+ for (c = 0;; c++) {
+ if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &qargs) < 0) {
+ if (errno == EBUSY && c < 10) {
+ (void) btrfs_quota_scan_wait(fd);
+ continue;
+ }
+ return -errno;
+ }
+
+ break;
+ }
+
+ return 1;
+ }
+
+ /* Increase search key by one, to read the next item, if we can. */
+ if (!btrfs_ioctl_search_args_inc(&args))
+ break;
+ }
+
+ return 0;
+}
+
+static int copy_quota_hierarchy(int fd, uint64_t old_subvol_id, uint64_t new_subvol_id) {
+ _cleanup_free_ uint64_t *old_qgroups = NULL, *old_parent_qgroups = NULL;
+ bool copy_from_parent = false, insert_intermediary_qgroup = false;
+ int n_old_qgroups, n_old_parent_qgroups, r, i;
+ uint64_t old_parent_id;
+
+ assert(fd >= 0);
+
+ /* Copies a reduced form of quota information from the old to
+ * the new subvolume. */
+
+ n_old_qgroups = btrfs_qgroup_find_parents(fd, old_subvol_id, &old_qgroups);
+ if (n_old_qgroups <= 0) /* Nothing to copy */
+ return n_old_qgroups;
+
+ r = btrfs_subvol_get_parent(fd, old_subvol_id, &old_parent_id);
+ if (r == -ENXIO)
+ /* We have no parent, hence nothing to copy. */
+ n_old_parent_qgroups = 0;
+ else if (r < 0)
+ return r;
+ else {
+ n_old_parent_qgroups = btrfs_qgroup_find_parents(fd, old_parent_id, &old_parent_qgroups);
+ if (n_old_parent_qgroups < 0)
+ return n_old_parent_qgroups;
+ }
+
+ for (i = 0; i < n_old_qgroups; i++) {
+ uint64_t id;
+ int j;
+
+ r = btrfs_qgroupid_split(old_qgroups[i], NULL, &id);
+ if (r < 0)
+ return r;
+
+ if (id == old_subvol_id) {
+ /* The old subvolume was member of a qgroup
+ * that had the same id, but a different level
+ * as it self. Let's set up something similar
+ * in the destination. */
+ insert_intermediary_qgroup = true;
+ break;
+ }
+
+ for (j = 0; j < n_old_parent_qgroups; j++)
+ if (old_parent_qgroups[j] == old_qgroups[i])
+ /* The old subvolume shared a common
+ * parent qgroup with its parent
+ * subvolume. Let's set up something
+ * similar in the destination. */
+ copy_from_parent = true;
+ }
+
+ if (!insert_intermediary_qgroup && !copy_from_parent)
+ return 0;
+
+ return btrfs_subvol_auto_qgroup_fd(fd, new_subvol_id, insert_intermediary_qgroup);
+}
+
+static int copy_subtree_quota_limits(int fd, uint64_t old_subvol, uint64_t new_subvol) {
+ uint64_t old_subtree_qgroup, new_subtree_qgroup;
+ bool changed;
+ int r;
+
+ /* First copy the leaf limits */
+ r = btrfs_qgroup_copy_limits(fd, old_subvol, new_subvol);
+ if (r < 0)
+ return r;
+ changed = r > 0;
+
+ /* Then, try to copy the subtree limits, if there are any. */
+ r = btrfs_subvol_find_subtree_qgroup(fd, old_subvol, &old_subtree_qgroup);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return changed;
+
+ r = btrfs_subvol_find_subtree_qgroup(fd, new_subvol, &new_subtree_qgroup);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return changed;
+
+ r = btrfs_qgroup_copy_limits(fd, old_subtree_qgroup, new_subtree_qgroup);
+ if (r != 0)
+ return r;
+
+ return changed;
+}
+
+static int subvol_snapshot_children(
+ int old_fd,
+ int new_fd,
+ const char *subvolume,
+ uint64_t old_subvol_id,
+ BtrfsSnapshotFlags flags) {
+
+ struct btrfs_ioctl_search_args args = {
+ .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
+
+ .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
+ .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
+
+ .key.min_type = BTRFS_ROOT_BACKREF_KEY,
+ .key.max_type = BTRFS_ROOT_BACKREF_KEY,
+
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+
+ struct btrfs_ioctl_vol_args_v2 vol_args = {
+ .flags = flags & BTRFS_SNAPSHOT_READ_ONLY ? BTRFS_SUBVOL_RDONLY : 0,
+ .fd = old_fd,
+ };
+ _cleanup_close_ int subvolume_fd = -1;
+ uint64_t new_subvol_id;
+ int r;
+
+ assert(old_fd >= 0);
+ assert(new_fd >= 0);
+ assert(subvolume);
+
+ strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
+
+ if (ioctl(new_fd, BTRFS_IOC_SNAP_CREATE_V2, &vol_args) < 0)
+ return -errno;
+
+ if (!(flags & BTRFS_SNAPSHOT_RECURSIVE) &&
+ !(flags & BTRFS_SNAPSHOT_QUOTA))
+ return 0;
+
+ if (old_subvol_id == 0) {
+ r = btrfs_subvol_get_id_fd(old_fd, &old_subvol_id);
+ if (r < 0)
+ return r;
+ }
+
+ r = btrfs_subvol_get_id(new_fd, vol_args.name, &new_subvol_id);
+ if (r < 0)
+ return r;
+
+ if (flags & BTRFS_SNAPSHOT_QUOTA)
+ (void) copy_quota_hierarchy(new_fd, old_subvol_id, new_subvol_id);
+
+ if (!(flags & BTRFS_SNAPSHOT_RECURSIVE)) {
+
+ if (flags & BTRFS_SNAPSHOT_QUOTA)
+ (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
+
+ return 0;
+ }
+
+ args.key.min_offset = args.key.max_offset = old_subvol_id;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(old_fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
+ return -errno;
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+ _cleanup_free_ char *p = NULL, *c = NULL, *np = NULL;
+ const struct btrfs_root_ref *ref;
+ _cleanup_close_ int old_child_fd = -1, new_child_fd = -1;
+
+ btrfs_ioctl_search_args_set(&args, sh);
+
+ if (sh->type != BTRFS_ROOT_BACKREF_KEY)
+ continue;
+
+ /* Avoid finding the source subvolume a second
+ * time */
+ if (sh->offset != old_subvol_id)
+ continue;
+
+ /* Avoid running into loops if the new
+ * subvolume is below the old one. */
+ if (sh->objectid == new_subvol_id)
+ continue;
+
+ ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
+ p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
+ if (!p)
+ return -ENOMEM;
+
+ struct btrfs_ioctl_ino_lookup_args ino_args = {
+ .treeid = old_subvol_id,
+ .objectid = htole64(ref->dirid),
+ };
+
+ if (ioctl(old_fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
+ return -errno;
+
+ c = path_join(ino_args.name, p);
+ if (!c)
+ return -ENOMEM;
+
+ old_child_fd = openat(old_fd, c, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (old_child_fd < 0)
+ return -errno;
+
+ np = path_join(subvolume, ino_args.name);
+ if (!np)
+ return -ENOMEM;
+
+ new_child_fd = openat(new_fd, np, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (new_child_fd < 0)
+ return -errno;
+
+ if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
+ /* If the snapshot is read-only we
+ * need to mark it writable
+ * temporarily, to put the subsnapshot
+ * into place. */
+
+ if (subvolume_fd < 0) {
+ subvolume_fd = openat(new_fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (subvolume_fd < 0)
+ return -errno;
+ }
+
+ r = btrfs_subvol_set_read_only_fd(subvolume_fd, false);
+ if (r < 0)
+ return r;
+ }
+
+ /* When btrfs clones the subvolumes, child
+ * subvolumes appear as empty directories. Remove
+ * them, so that we can create a new snapshot
+ * in their place */
+ if (unlinkat(new_child_fd, p, AT_REMOVEDIR) < 0) {
+ int k = -errno;
+
+ if (flags & BTRFS_SNAPSHOT_READ_ONLY)
+ (void) btrfs_subvol_set_read_only_fd(subvolume_fd, true);
+
+ return k;
+ }
+
+ r = subvol_snapshot_children(old_child_fd, new_child_fd, p, sh->objectid, flags & ~BTRFS_SNAPSHOT_FALLBACK_COPY);
+
+ /* Restore the readonly flag */
+ if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
+ int k;
+
+ k = btrfs_subvol_set_read_only_fd(subvolume_fd, true);
+ if (r >= 0 && k < 0)
+ return k;
+ }
+
+ if (r < 0)
+ return r;
+ }
+
+ /* Increase search key by one, to read the next item, if we can. */
+ if (!btrfs_ioctl_search_args_inc(&args))
+ break;
+ }
+
+ if (flags & BTRFS_SNAPSHOT_QUOTA)
+ (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
+
+ return 0;
+}
+
+int btrfs_subvol_snapshot_fd_full(
+ int old_fd,
+ const char *new_path,
+ BtrfsSnapshotFlags flags,
+ copy_progress_path_t progress_path,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ _cleanup_close_ int new_fd = -1;
+ const char *subvolume;
+ int r;
+
+ assert(old_fd >= 0);
+ assert(new_path);
+
+ r = btrfs_is_subvol_fd(old_fd);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ bool plain_directory = false;
+
+ /* If the source isn't a proper subvolume, fail unless fallback is requested */
+ if (!(flags & BTRFS_SNAPSHOT_FALLBACK_COPY))
+ return -EISDIR;
+
+ r = btrfs_subvol_make(new_path);
+ if (r == -ENOTTY && (flags & BTRFS_SNAPSHOT_FALLBACK_DIRECTORY)) {
+ /* If the destination doesn't support subvolumes, then use a plain directory, if that's requested. */
+ if (mkdir(new_path, 0755) < 0)
+ return -errno;
+
+ plain_directory = true;
+ } else if (r < 0)
+ return r;
+
+ r = copy_directory_fd_full(
+ old_fd, new_path,
+ COPY_MERGE|COPY_REFLINK|COPY_SAME_MOUNT|COPY_HARDLINKS|(FLAGS_SET(flags, BTRFS_SNAPSHOT_SIGINT) ? COPY_SIGINT : 0),
+ progress_path, progress_bytes, userdata);
+ if (r < 0)
+ goto fallback_fail;
+
+ if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
+
+ if (plain_directory) {
+ /* Plain directories have no recursive read-only flag, but something pretty close to
+ * it: the IMMUTABLE bit. Let's use this here, if this is requested. */
+
+ if (flags & BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE)
+ (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL, NULL);
+ } else {
+ r = btrfs_subvol_set_read_only(new_path, true);
+ if (r < 0)
+ goto fallback_fail;
+ }
+ }
+
+ return 0;
+
+ fallback_fail:
+ (void) rm_rf(new_path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+ return r;
+ }
+
+ r = extract_subvolume_name(new_path, &subvolume);
+ if (r < 0)
+ return r;
+
+ new_fd = open_parent(new_path, O_CLOEXEC, 0);
+ if (new_fd < 0)
+ return new_fd;
+
+ return subvol_snapshot_children(old_fd, new_fd, subvolume, 0, flags);
+}
+
+int btrfs_subvol_snapshot_full(
+ const char *old_path,
+ const char *new_path,
+ BtrfsSnapshotFlags flags,
+ copy_progress_path_t progress_path,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ _cleanup_close_ int old_fd = -1;
+
+ assert(old_path);
+ assert(new_path);
+
+ old_fd = open(old_path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+ if (old_fd < 0)
+ return -errno;
+
+ return btrfs_subvol_snapshot_fd_full(old_fd, new_path, flags, progress_path, progress_bytes, userdata);
+}
+
+int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret) {
+
+ struct btrfs_ioctl_search_args args = {
+ /* Tree of quota items */
+ .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
+
+ /* Look precisely for the quota relation items */
+ .key.min_type = BTRFS_QGROUP_RELATION_KEY,
+ .key.max_type = BTRFS_QGROUP_RELATION_KEY,
+
+ /* No restrictions on the other components */
+ .key.min_offset = 0,
+ .key.max_offset = (uint64_t) -1,
+
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+
+ _cleanup_free_ uint64_t *items = NULL;
+ size_t n_items = 0, n_allocated = 0;
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (qgroupid == 0) {
+ r = btrfs_subvol_get_id_fd(fd, &qgroupid);
+ if (r < 0)
+ return r;
+ } else {
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+ }
+
+ args.key.min_objectid = args.key.max_objectid = qgroupid;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
+ if (errno == ENOENT) /* quota tree missing: quota is disabled */
+ break;
+
+ return -errno;
+ }
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+
+ /* Make sure we start the next search at least from this entry */
+ btrfs_ioctl_search_args_set(&args, sh);
+
+ if (sh->type != BTRFS_QGROUP_RELATION_KEY)
+ continue;
+ if (sh->offset < sh->objectid)
+ continue;
+ if (sh->objectid != qgroupid)
+ continue;
+
+ if (!GREEDY_REALLOC(items, n_allocated, n_items+1))
+ return -ENOMEM;
+
+ items[n_items++] = sh->offset;
+ }
+
+ /* Increase search key by one, to read the next item, if we can. */
+ if (!btrfs_ioctl_search_args_inc(&args))
+ break;
+ }
+
+ if (n_items <= 0) {
+ *ret = NULL;
+ return 0;
+ }
+
+ *ret = TAKE_PTR(items);
+
+ return (int) n_items;
+}
+
+int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool insert_intermediary_qgroup) {
+ _cleanup_free_ uint64_t *qgroups = NULL;
+ uint64_t parent_subvol;
+ bool changed = false;
+ int n = 0, r;
+
+ assert(fd >= 0);
+
+ /*
+ * Sets up the specified subvolume's qgroup automatically in
+ * one of two ways:
+ *
+ * If insert_intermediary_qgroup is false, the subvolume's
+ * leaf qgroup will be assigned to the same parent qgroups as
+ * the subvolume's parent subvolume.
+ *
+ * If insert_intermediary_qgroup is true a new intermediary
+ * higher-level qgroup is created, with a higher level number,
+ * but reusing the id of the subvolume. The level number is
+ * picked as one smaller than the lowest level qgroup the
+ * parent subvolume is a member of. If the parent subvolume's
+ * leaf qgroup is assigned to no higher-level qgroup a new
+ * qgroup of level 255 is created instead. Either way, the new
+ * qgroup is then assigned to the parent's higher-level
+ * qgroup, and the subvolume itself is assigned to it.
+ *
+ * If the subvolume is already assigned to a higher level
+ * qgroup, no operation is executed.
+ *
+ * Effectively this means: regardless if
+ * insert_intermediary_qgroup is true or not, after this
+ * function is invoked the subvolume will be accounted within
+ * the same qgroups as the parent. However, if it is true, it
+ * will also get its own higher-level qgroup, which may in
+ * turn be used by subvolumes created beneath this subvolume
+ * later on.
+ *
+ * This hence defines a simple default qgroup setup for
+ * subvolumes, as long as this function is invoked on each
+ * created subvolume: each subvolume is always accounting
+ * together with its immediate parents. Optionally, if
+ * insert_intermediary_qgroup is true, it will also get a
+ * qgroup that then includes all its own child subvolumes.
+ */
+
+ if (subvol_id == 0) {
+ r = btrfs_is_subvol_fd(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+
+ r = btrfs_subvol_get_id_fd(fd, &subvol_id);
+ if (r < 0)
+ return r;
+ }
+
+ n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
+ if (n < 0)
+ return n;
+ if (n > 0) /* already parent qgroups set up, let's bail */
+ return 0;
+
+ qgroups = mfree(qgroups);
+
+ r = btrfs_subvol_get_parent(fd, subvol_id, &parent_subvol);
+ if (r == -ENXIO)
+ /* No parent, hence no qgroup memberships */
+ n = 0;
+ else if (r < 0)
+ return r;
+ else {
+ n = btrfs_qgroup_find_parents(fd, parent_subvol, &qgroups);
+ if (n < 0)
+ return n;
+ }
+
+ if (insert_intermediary_qgroup) {
+ uint64_t lowest = 256, new_qgroupid;
+ bool created = false;
+ int i;
+
+ /* Determine the lowest qgroup that the parent
+ * subvolume is assigned to. */
+
+ for (i = 0; i < n; i++) {
+ uint64_t level;
+
+ r = btrfs_qgroupid_split(qgroups[i], &level, NULL);
+ if (r < 0)
+ return r;
+
+ if (level < lowest)
+ lowest = level;
+ }
+
+ if (lowest <= 1) /* There are no levels left we could use insert an intermediary qgroup at */
+ return -EBUSY;
+
+ r = btrfs_qgroupid_make(lowest - 1, subvol_id, &new_qgroupid);
+ if (r < 0)
+ return r;
+
+ /* Create the new intermediary group, unless it already exists */
+ r = btrfs_qgroup_create(fd, new_qgroupid);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ if (r >= 0)
+ changed = created = true;
+
+ for (i = 0; i < n; i++) {
+ r = btrfs_qgroup_assign(fd, new_qgroupid, qgroups[i]);
+ if (r < 0 && r != -EEXIST) {
+ if (created)
+ (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
+
+ return r;
+ }
+ if (r >= 0)
+ changed = true;
+ }
+
+ r = btrfs_qgroup_assign(fd, subvol_id, new_qgroupid);
+ if (r < 0 && r != -EEXIST) {
+ if (created)
+ (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
+ return r;
+ }
+ if (r >= 0)
+ changed = true;
+
+ } else {
+ int i;
+
+ /* Assign our subvolume to all the same qgroups as the parent */
+
+ for (i = 0; i < n; i++) {
+ r = btrfs_qgroup_assign(fd, subvol_id, qgroups[i]);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ if (r >= 0)
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+ if (fd < 0)
+ return -errno;
+
+ return btrfs_subvol_auto_qgroup_fd(fd, subvol_id, create_intermediary_qgroup);
+}
+
+int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret) {
+
+ struct btrfs_ioctl_search_args args = {
+ /* Tree of tree roots */
+ .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
+
+ /* Look precisely for the subvolume items */
+ .key.min_type = BTRFS_ROOT_BACKREF_KEY,
+ .key.max_type = BTRFS_ROOT_BACKREF_KEY,
+
+ /* No restrictions on the other components */
+ .key.min_offset = 0,
+ .key.max_offset = (uint64_t) -1,
+
+ .key.min_transid = 0,
+ .key.max_transid = (uint64_t) -1,
+ };
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (subvol_id == 0) {
+ r = btrfs_subvol_get_id_fd(fd, &subvol_id);
+ if (r < 0)
+ return r;
+ } else {
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ENOTTY;
+ }
+
+ args.key.min_objectid = args.key.max_objectid = subvol_id;
+
+ while (btrfs_ioctl_search_args_compare(&args) <= 0) {
+ const struct btrfs_ioctl_search_header *sh;
+ unsigned i;
+
+ args.key.nr_items = 256;
+ if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
+ return negative_errno();
+
+ if (args.key.nr_items <= 0)
+ break;
+
+ FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
+
+ if (sh->type != BTRFS_ROOT_BACKREF_KEY)
+ continue;
+ if (sh->objectid != subvol_id)
+ continue;
+
+ *ret = sh->offset;
+ return 0;
+ }
+ }
+
+ return -ENXIO;
+}
diff --git a/src/basic/btrfs-util.h b/src/basic/btrfs-util.h
new file mode 100644
index 0000000..c8b44f6
--- /dev/null
+++ b/src/basic/btrfs-util.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "sd-id128.h"
+
+#include "copy.h"
+#include "time-util.h"
+
+typedef struct BtrfsSubvolInfo {
+ uint64_t subvol_id;
+ usec_t otime;
+
+ sd_id128_t uuid;
+ sd_id128_t parent_uuid;
+
+ bool read_only;
+} BtrfsSubvolInfo;
+
+typedef struct BtrfsQuotaInfo {
+ uint64_t referenced;
+ uint64_t exclusive;
+ uint64_t referenced_max;
+ uint64_t exclusive_max;
+} BtrfsQuotaInfo;
+
+typedef enum BtrfsSnapshotFlags {
+ BTRFS_SNAPSHOT_FALLBACK_COPY = 1 << 0, /* If the source isn't a subvolume, reflink everything */
+ BTRFS_SNAPSHOT_READ_ONLY = 1 << 1,
+ BTRFS_SNAPSHOT_RECURSIVE = 1 << 2,
+ BTRFS_SNAPSHOT_QUOTA = 1 << 3,
+ BTRFS_SNAPSHOT_FALLBACK_DIRECTORY = 1 << 4, /* If the destination doesn't support subvolumes, reflink/copy instead */
+ BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE = 1 << 5, /* When we can't create a subvolume, use the FS_IMMUTABLE attribute for indicating read-only */
+ BTRFS_SNAPSHOT_SIGINT = 1 << 6, /* Check for SIGINT regularly, and return EINTR if seen */
+} BtrfsSnapshotFlags;
+
+typedef enum BtrfsRemoveFlags {
+ BTRFS_REMOVE_RECURSIVE = 1 << 0,
+ BTRFS_REMOVE_QUOTA = 1 << 1,
+} BtrfsRemoveFlags;
+
+int btrfs_is_filesystem(int fd);
+
+int btrfs_is_subvol_fd(int fd);
+int btrfs_is_subvol(const char *path);
+
+int btrfs_reflink(int infd, int outfd);
+int btrfs_clone_range(int infd, uint64_t in_offset, int ofd, uint64_t out_offset, uint64_t sz);
+
+int btrfs_get_block_device_fd(int fd, dev_t *dev);
+int btrfs_get_block_device(const char *path, dev_t *dev);
+
+int btrfs_defrag_fd(int fd);
+int btrfs_defrag(const char *p);
+
+int btrfs_quota_enable_fd(int fd, bool b);
+int btrfs_quota_enable(const char *path, bool b);
+
+int btrfs_quota_scan_start(int fd);
+int btrfs_quota_scan_wait(int fd);
+int btrfs_quota_scan_ongoing(int fd);
+
+int btrfs_subvol_make(const char *path);
+int btrfs_subvol_make_fd(int fd, const char *subvolume);
+
+int btrfs_subvol_make_fallback(const char *path, mode_t);
+
+int btrfs_subvol_snapshot_fd_full(int old_fd, const char *new_path, BtrfsSnapshotFlags flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata);
+static inline int btrfs_subvol_snapshot_fd(int old_fd, const char *new_path, BtrfsSnapshotFlags flags) {
+ return btrfs_subvol_snapshot_fd_full(old_fd, new_path, flags, NULL, NULL, NULL);
+}
+
+int btrfs_subvol_snapshot_full(const char *old_path, const char *new_path, BtrfsSnapshotFlags flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata);
+static inline int btrfs_subvol_snapshot(const char *old_path, const char *new_path, BtrfsSnapshotFlags flags) {
+ return btrfs_subvol_snapshot_full(old_path, new_path, flags, NULL, NULL, NULL);
+}
+
+int btrfs_subvol_remove(const char *path, BtrfsRemoveFlags flags);
+int btrfs_subvol_remove_fd(int fd, const char *subvolume, BtrfsRemoveFlags flags);
+
+int btrfs_subvol_set_read_only_fd(int fd, bool b);
+int btrfs_subvol_set_read_only(const char *path, bool b);
+int btrfs_subvol_get_read_only_fd(int fd);
+
+int btrfs_subvol_get_id(int fd, const char *subvolume, uint64_t *ret);
+int btrfs_subvol_get_id_fd(int fd, uint64_t *ret);
+int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret);
+
+int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *info);
+
+int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret);
+
+int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *quota);
+int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *quota);
+
+int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max);
+int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max);
+
+int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool new_qgroup);
+int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup);
+
+int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret);
+int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id);
+
+int btrfs_qgroup_create(int fd, uint64_t qgroupid);
+int btrfs_qgroup_destroy(int fd, uint64_t qgroupid);
+int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid);
+
+int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max);
+int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max);
+
+int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid);
+
+int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent);
+int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent);
+
+int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret);
+
+int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *quota);
+int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *quota);
+
+static inline int btrfs_log_dev_root(int level, int ret, const char *p) {
+ return log_full_errno(level, ret,
+ "File system behind %s is reported by btrfs to be backed by pseudo-device /dev/root, which is not a valid userspace accessible device node. "
+ "Cannot determine correct backing block device.", p);
+}
diff --git a/src/basic/build.h b/src/basic/build.h
new file mode 100644
index 0000000..4697639
--- /dev/null
+++ b/src/basic/build.h
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "version.h"
+
+#if HAVE_PAM
+#define _PAM_FEATURE_ "+PAM"
+#else
+#define _PAM_FEATURE_ "-PAM"
+#endif
+
+#if HAVE_AUDIT
+#define _AUDIT_FEATURE_ "+AUDIT"
+#else
+#define _AUDIT_FEATURE_ "-AUDIT"
+#endif
+
+#if HAVE_SELINUX
+#define _SELINUX_FEATURE_ "+SELINUX"
+#else
+#define _SELINUX_FEATURE_ "-SELINUX"
+#endif
+
+#if HAVE_APPARMOR
+#define _APPARMOR_FEATURE_ "+APPARMOR"
+#else
+#define _APPARMOR_FEATURE_ "-APPARMOR"
+#endif
+
+#if ENABLE_IMA
+#define _IMA_FEATURE_ "+IMA"
+#else
+#define _IMA_FEATURE_ "-IMA"
+#endif
+
+#if ENABLE_SMACK
+#define _SMACK_FEATURE_ "+SMACK"
+#else
+#define _SMACK_FEATURE_ "-SMACK"
+#endif
+
+#if HAVE_SYSV_COMPAT
+#define _SYSVINIT_FEATURE_ "+SYSVINIT"
+#else
+#define _SYSVINIT_FEATURE_ "-SYSVINIT"
+#endif
+
+#if ENABLE_UTMP
+#define _UTMP_FEATURE_ "+UTMP"
+#else
+#define _UTMP_FEATURE_ "-UTMP"
+#endif
+
+#if HAVE_LIBCRYPTSETUP
+#define _LIBCRYPTSETUP_FEATURE_ "+LIBCRYPTSETUP"
+#else
+#define _LIBCRYPTSETUP_FEATURE_ "-LIBCRYPTSETUP"
+#endif
+
+#if HAVE_GCRYPT
+#define _GCRYPT_FEATURE_ "+GCRYPT"
+#else
+#define _GCRYPT_FEATURE_ "-GCRYPT"
+#endif
+
+#if HAVE_GNUTLS
+#define _GNUTLS_FEATURE_ "+GNUTLS"
+#else
+#define _GNUTLS_FEATURE_ "-GNUTLS"
+#endif
+
+#if HAVE_ACL
+#define _ACL_FEATURE_ "+ACL"
+#else
+#define _ACL_FEATURE_ "-ACL"
+#endif
+
+#if HAVE_XZ
+#define _XZ_FEATURE_ "+XZ"
+#else
+#define _XZ_FEATURE_ "-XZ"
+#endif
+
+#if HAVE_LZ4
+#define _LZ4_FEATURE_ "+LZ4"
+#else
+#define _LZ4_FEATURE_ "-LZ4"
+#endif
+
+#if HAVE_ZSTD
+#define _ZSTD_FEATURE_ "+ZSTD"
+#else
+#define _ZSTD_FEATURE_ "-ZSTD"
+#endif
+
+#if HAVE_SECCOMP
+#define _SECCOMP_FEATURE_ "+SECCOMP"
+#else
+#define _SECCOMP_FEATURE_ "-SECCOMP"
+#endif
+
+#if HAVE_BLKID
+#define _BLKID_FEATURE_ "+BLKID"
+#else
+#define _BLKID_FEATURE_ "-BLKID"
+#endif
+
+#if HAVE_ELFUTILS
+#define _ELFUTILS_FEATURE_ "+ELFUTILS"
+#else
+#define _ELFUTILS_FEATURE_ "-ELFUTILS"
+#endif
+
+#if HAVE_KMOD
+#define _KMOD_FEATURE_ "+KMOD"
+#else
+#define _KMOD_FEATURE_ "-KMOD"
+#endif
+
+#if HAVE_LIBIDN2
+#define _IDN2_FEATURE_ "+IDN2"
+#else
+#define _IDN2_FEATURE_ "-IDN2"
+#endif
+
+#if HAVE_LIBIDN
+#define _IDN_FEATURE_ "+IDN"
+#else
+#define _IDN_FEATURE_ "-IDN"
+#endif
+
+#if HAVE_PCRE2
+#define _PCRE2_FEATURE_ "+PCRE2"
+#else
+#define _PCRE2_FEATURE_ "-PCRE2"
+#endif
+
+#define _CGROUP_HIERARCHY_ "default-hierarchy=" DEFAULT_HIERARCHY_NAME
+
+#define SYSTEMD_FEATURES \
+ _PAM_FEATURE_ " " \
+ _AUDIT_FEATURE_ " " \
+ _SELINUX_FEATURE_ " " \
+ _IMA_FEATURE_ " " \
+ _APPARMOR_FEATURE_ " " \
+ _SMACK_FEATURE_ " " \
+ _SYSVINIT_FEATURE_ " " \
+ _UTMP_FEATURE_ " " \
+ _LIBCRYPTSETUP_FEATURE_ " " \
+ _GCRYPT_FEATURE_ " " \
+ _GNUTLS_FEATURE_ " " \
+ _ACL_FEATURE_ " " \
+ _XZ_FEATURE_ " " \
+ _LZ4_FEATURE_ " " \
+ _ZSTD_FEATURE_ " " \
+ _SECCOMP_FEATURE_ " " \
+ _BLKID_FEATURE_ " " \
+ _ELFUTILS_FEATURE_ " " \
+ _KMOD_FEATURE_ " " \
+ _IDN2_FEATURE_ " " \
+ _IDN_FEATURE_ " " \
+ _PCRE2_FEATURE_ " " \
+ _CGROUP_HIERARCHY_
+
+enum {
+ BUILD_MODE_DEVELOPER,
+ BUILD_MODE_RELEASE,
+};
diff --git a/src/basic/bus-label.c b/src/basic/bus-label.c
new file mode 100644
index 0000000..cd6c58a
--- /dev/null
+++ b/src/basic/bus-label.c
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "bus-label.h"
+#include "hexdecoct.h"
+#include "macro.h"
+
+char *bus_label_escape(const char *s) {
+ char *r, *t;
+ const char *f;
+
+ assert_return(s, NULL);
+
+ /* Escapes all chars that D-Bus' object path cannot deal
+ * with. Can be reversed with bus_path_unescape(). We special
+ * case the empty string. */
+
+ if (*s == 0)
+ return strdup("_");
+
+ r = new(char, strlen(s)*3 + 1);
+ if (!r)
+ return NULL;
+
+ for (f = s, t = r; *f; f++) {
+
+ /* Escape everything that is not a-zA-Z0-9. We also
+ * escape 0-9 if it's the first character */
+
+ if (!(*f >= 'A' && *f <= 'Z') &&
+ !(*f >= 'a' && *f <= 'z') &&
+ !(f > s && *f >= '0' && *f <= '9')) {
+ *(t++) = '_';
+ *(t++) = hexchar(*f >> 4);
+ *(t++) = hexchar(*f);
+ } else
+ *(t++) = *f;
+ }
+
+ *t = 0;
+
+ return r;
+}
+
+char *bus_label_unescape_n(const char *f, size_t l) {
+ char *r, *t;
+ size_t i;
+
+ assert_return(f, NULL);
+
+ /* Special case for the empty string */
+ if (l == 1 && *f == '_')
+ return strdup("");
+
+ r = new(char, l + 1);
+ if (!r)
+ return NULL;
+
+ for (i = 0, t = r; i < l; ++i) {
+ if (f[i] == '_') {
+ int a, b;
+
+ if (l - i < 3 ||
+ (a = unhexchar(f[i + 1])) < 0 ||
+ (b = unhexchar(f[i + 2])) < 0) {
+ /* Invalid escape code, let's take it literal then */
+ *(t++) = '_';
+ } else {
+ *(t++) = (char) ((a << 4) | b);
+ i += 2;
+ }
+ } else
+ *(t++) = f[i];
+ }
+
+ *t = 0;
+
+ return r;
+}
diff --git a/src/basic/bus-label.h b/src/basic/bus-label.h
new file mode 100644
index 0000000..446daba
--- /dev/null
+++ b/src/basic/bus-label.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "string-util.h"
+
+char *bus_label_escape(const char *s);
+char *bus_label_unescape_n(const char *f, size_t l);
+
+static inline char *bus_label_unescape(const char *f) {
+ return bus_label_unescape_n(f, strlen_ptr(f));
+}
diff --git a/src/basic/cap-list.c b/src/basic/cap-list.c
new file mode 100644
index 0000000..d295a63
--- /dev/null
+++ b/src/basic/cap-list.c
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "capability-util.h"
+#include "cap-list.h"
+#include "extract-word.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "stdio-util.h"
+#include "util.h"
+
+static const struct capability_name* lookup_capability(register const char *str, register GPERF_LEN_TYPE len);
+
+#include "cap-from-name.h"
+#include "cap-to-name.h"
+
+const char *capability_to_name(int id) {
+ if (id < 0)
+ return NULL;
+
+ if ((size_t) id >= ELEMENTSOF(capability_names))
+ return NULL;
+
+ return capability_names[id];
+}
+
+int capability_from_name(const char *name) {
+ const struct capability_name *sc;
+ int r, i;
+
+ assert(name);
+
+ /* Try to parse numeric capability */
+ r = safe_atoi(name, &i);
+ if (r >= 0) {
+ if (i >= 0 && i < 64)
+ return i;
+ else
+ return -EINVAL;
+ }
+
+ /* Try to parse string capability */
+ sc = lookup_capability(name, strlen(name));
+ if (!sc)
+ return -EINVAL;
+
+ return sc->id;
+}
+
+/* This is the number of capability names we are *compiled* with.
+ * For the max capability number of the currently-running kernel,
+ * use cap_last_cap(). */
+int capability_list_length(void) {
+ return (int) ELEMENTSOF(capability_names);
+}
+
+int capability_set_to_string_alloc(uint64_t set, char **s) {
+ _cleanup_free_ char *str = NULL;
+ size_t allocated = 0, n = 0;
+
+ assert(s);
+
+ for (unsigned i = 0; i <= cap_last_cap(); i++)
+ if (set & (UINT64_C(1) << i)) {
+ const char *p;
+ char buf[2 + 16 + 1];
+ size_t add;
+
+ p = capability_to_name(i);
+ if (!p) {
+ xsprintf(buf, "0x%x", i);
+ p = buf;
+ }
+
+ add = strlen(p);
+
+ if (!GREEDY_REALLOC(str, allocated, n + add + 2))
+ return -ENOMEM;
+
+ strcpy(mempcpy(str + n, p, add), " ");
+ n += add + 1;
+ }
+
+ if (!GREEDY_REALLOC(str, allocated, n + 1))
+ return -ENOMEM;
+
+ str[n > 0 ? n - 1 : 0] = '\0'; /* truncate the last space, if it's there */
+
+ *s = TAKE_PTR(str);
+
+ return 0;
+}
+
+int capability_set_from_string(const char *s, uint64_t *set) {
+ uint64_t val = 0;
+
+ assert(set);
+
+ for (const char *p = s;;) {
+ _cleanup_free_ char *word = NULL;
+ int r;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r == -ENOMEM)
+ return r;
+ if (r <= 0)
+ break;
+
+ r = capability_from_name(word);
+ if (r < 0)
+ continue;
+
+ val |= ((uint64_t) UINT64_C(1)) << (uint64_t) r;
+ }
+
+ *set = val;
+
+ return 0;
+}
diff --git a/src/basic/cap-list.h b/src/basic/cap-list.h
new file mode 100644
index 0000000..71235d6
--- /dev/null
+++ b/src/basic/cap-list.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+
+const char *capability_to_name(int id);
+int capability_from_name(const char *name);
+int capability_list_length(void);
+
+int capability_set_to_string_alloc(uint64_t set, char **s);
+int capability_set_from_string(const char *s, uint64_t *set);
diff --git a/src/basic/cap-to-name.awk b/src/basic/cap-to-name.awk
new file mode 100644
index 0000000..402a782
--- /dev/null
+++ b/src/basic/cap-to-name.awk
@@ -0,0 +1,9 @@
+BEGIN{
+ print "static const char* const capability_names[] = { "
+}
+{
+ printf " [%s] = \"%s\",\n", $1, tolower($1)
+}
+END{
+ print "};"
+}
diff --git a/src/basic/capability-util.c b/src/basic/capability-util.c
new file mode 100644
index 0000000..c1520d9
--- /dev/null
+++ b/src/basic/capability-util.c
@@ -0,0 +1,605 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "capability-util.h"
+#include "cap-list.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "missing_prctl.h"
+#include "parse-util.h"
+#include "user-util.h"
+#include "util.h"
+
+int have_effective_cap(int value) {
+ _cleanup_cap_free_ cap_t cap;
+ cap_flag_value_t fv;
+
+ cap = cap_get_proc();
+ if (!cap)
+ return -errno;
+
+ if (cap_get_flag(cap, value, CAP_EFFECTIVE, &fv) < 0)
+ return -errno;
+
+ return fv == CAP_SET;
+}
+
+unsigned cap_last_cap(void) {
+ static thread_local unsigned saved;
+ static thread_local bool valid = false;
+ _cleanup_free_ char *content = NULL;
+ unsigned long p = 0;
+ int r;
+
+ if (valid)
+ return saved;
+
+ /* available since linux-3.2 */
+ r = read_one_line_file("/proc/sys/kernel/cap_last_cap", &content);
+ if (r >= 0) {
+ r = safe_atolu(content, &p);
+ if (r >= 0) {
+
+ if (p > 63) /* Safety for the future: if one day the kernel learns more than 64 caps,
+ * then we are in trouble (since we, as much userspace and kernel space
+ * store capability masks in uint64_t types). Let's hence protect
+ * ourselves against that and always cap at 63 for now. */
+ p = 63;
+
+ saved = p;
+ valid = true;
+ return p;
+ }
+ }
+
+ /* fall back to syscall-probing for pre linux-3.2 */
+ p = MIN((unsigned long) CAP_LAST_CAP, 63U);
+
+ if (prctl(PR_CAPBSET_READ, p) < 0) {
+
+ /* Hmm, look downwards, until we find one that works */
+ for (p--; p > 0; p--)
+ if (prctl(PR_CAPBSET_READ, p) >= 0)
+ break;
+
+ } else {
+
+ /* Hmm, look upwards, until we find one that doesn't work */
+ for (; p < 63; p++)
+ if (prctl(PR_CAPBSET_READ, p+1) < 0)
+ break;
+ }
+
+ saved = p;
+ valid = true;
+
+ return p;
+}
+
+int capability_update_inherited_set(cap_t caps, uint64_t set) {
+ /* Add capabilities in the set to the inherited caps, drops capabilities not in the set.
+ * Do not apply them yet. */
+
+ for (unsigned i = 0; i <= cap_last_cap(); i++) {
+ cap_flag_value_t flag = set & (UINT64_C(1) << i) ? CAP_SET : CAP_CLEAR;
+ cap_value_t v;
+
+ v = (cap_value_t) i;
+
+ if (cap_set_flag(caps, CAP_INHERITABLE, 1, &v, flag) < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int capability_ambient_set_apply(uint64_t set, bool also_inherit) {
+ _cleanup_cap_free_ cap_t caps = NULL;
+ int r;
+
+ /* Remove capabilities requested in ambient set, but not in the bounding set */
+ for (unsigned i = 0; i <= cap_last_cap(); i++) {
+ if (set == 0)
+ break;
+
+ if (FLAGS_SET(set, (UINT64_C(1) << i)) && prctl(PR_CAPBSET_READ, i) != 1) {
+ log_debug("Ambient capability %s requested but missing from bounding set,"
+ " suppressing automatically.", capability_to_name(i));
+ set &= ~(UINT64_C(1) << i);
+ }
+ }
+
+ /* Add the capabilities to the ambient set (an possibly also the inheritable set) */
+
+ /* Check that we can use PR_CAP_AMBIENT or quit early. */
+ if (!ambient_capabilities_supported())
+ return (set & all_capabilities()) == 0 ?
+ 0 : -EOPNOTSUPP; /* if actually no ambient caps are to be set, be silent,
+ * otherwise fail recognizably */
+
+ if (also_inherit) {
+ caps = cap_get_proc();
+ if (!caps)
+ return -errno;
+
+ r = capability_update_inherited_set(caps, set);
+ if (r < 0)
+ return -errno;
+
+ if (cap_set_proc(caps) < 0)
+ return -errno;
+ }
+
+ for (unsigned i = 0; i <= cap_last_cap(); i++) {
+
+ if (set & (UINT64_C(1) << i)) {
+
+ /* Add the capability to the ambient set. */
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, i, 0, 0) < 0)
+ return -errno;
+ } else {
+
+ /* Drop the capability so we don't inherit capabilities we didn't ask for. */
+ r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, i, 0, 0);
+ if (r < 0)
+ return -errno;
+
+ if (r)
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_LOWER, i, 0, 0) < 0)
+ return -errno;
+
+ }
+ }
+
+ return 0;
+}
+
+int capability_gain_cap_setpcap(cap_t *ret_before_caps) {
+ _cleanup_cap_free_ cap_t caps = NULL;
+ cap_flag_value_t fv;
+ caps = cap_get_proc();
+ if (!caps)
+ return -errno;
+
+ if (cap_get_flag(caps, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0)
+ return -errno;
+
+ if (fv != CAP_SET) {
+ _cleanup_cap_free_ cap_t temp_cap = NULL;
+ static const cap_value_t v = CAP_SETPCAP;
+
+ temp_cap = cap_dup(caps);
+ if (!temp_cap)
+ return -errno;
+
+ if (cap_set_flag(temp_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0)
+ return -errno;
+
+ if (cap_set_proc(temp_cap) < 0)
+ log_debug_errno(errno, "Can't acquire effective CAP_SETPCAP bit, ignoring: %m");
+
+ /* If we didn't manage to acquire the CAP_SETPCAP bit, we continue anyway, after all this just means
+ * we'll fail later, when we actually intend to drop some capabilities or try to set securebits. */
+ }
+ if (ret_before_caps)
+ /* Return the capabilities as they have been before setting CAP_SETPCAP */
+ *ret_before_caps = TAKE_PTR(caps);
+
+ return 0;
+}
+
+int capability_bounding_set_drop(uint64_t keep, bool right_now) {
+ _cleanup_cap_free_ cap_t before_cap = NULL, after_cap = NULL;
+ int r;
+
+ /* If we are run as PID 1 we will lack CAP_SETPCAP by default
+ * in the effective set (yes, the kernel drops that when
+ * executing init!), so get it back temporarily so that we can
+ * call PR_CAPBSET_DROP. */
+
+ r = capability_gain_cap_setpcap(&before_cap);
+ if (r < 0)
+ return r;
+
+ after_cap = cap_dup(before_cap);
+ if (!after_cap)
+ return -errno;
+
+ for (unsigned i = 0; i <= cap_last_cap(); i++) {
+ cap_value_t v;
+
+ if ((keep & (UINT64_C(1) << i)))
+ continue;
+
+ /* Drop it from the bounding set */
+ if (prctl(PR_CAPBSET_DROP, i) < 0) {
+ r = -errno;
+
+ /* If dropping the capability failed, let's see if we didn't have it in the first place. If so,
+ * continue anyway, as dropping a capability we didn't have in the first place doesn't really
+ * matter anyway. */
+ if (prctl(PR_CAPBSET_READ, i) != 0)
+ goto finish;
+ }
+ v = (cap_value_t) i;
+
+ /* Also drop it from the inheritable set, so
+ * that anything we exec() loses the
+ * capability for good. */
+ if (cap_set_flag(after_cap, CAP_INHERITABLE, 1, &v, CAP_CLEAR) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ /* If we shall apply this right now drop it
+ * also from our own capability sets. */
+ if (right_now) {
+ if (cap_set_flag(after_cap, CAP_PERMITTED, 1, &v, CAP_CLEAR) < 0 ||
+ cap_set_flag(after_cap, CAP_EFFECTIVE, 1, &v, CAP_CLEAR) < 0) {
+ r = -errno;
+ goto finish;
+ }
+ }
+ }
+
+ r = 0;
+
+finish:
+ if (cap_set_proc(after_cap) < 0) {
+ /* If there are no actual changes anyway then let's ignore this error. */
+ if (cap_compare(before_cap, after_cap) != 0)
+ r = -errno;
+ }
+
+ return r;
+}
+
+static int drop_from_file(const char *fn, uint64_t keep) {
+ _cleanup_free_ char *p = NULL;
+ uint64_t current, after;
+ uint32_t hi, lo;
+ int r, k;
+
+ r = read_one_line_file(fn, &p);
+ if (r < 0)
+ return r;
+
+ k = sscanf(p, "%" PRIu32 " %" PRIu32, &lo, &hi);
+ if (k != 2)
+ return -EIO;
+
+ current = (uint64_t) lo | ((uint64_t) hi << 32);
+ after = current & keep;
+
+ if (current == after)
+ return 0;
+
+ lo = after & UINT32_C(0xFFFFFFFF);
+ hi = (after >> 32) & UINT32_C(0xFFFFFFFF);
+
+ return write_string_filef(fn, 0, "%" PRIu32 " %" PRIu32, lo, hi);
+}
+
+int capability_bounding_set_drop_usermode(uint64_t keep) {
+ int r;
+
+ r = drop_from_file("/proc/sys/kernel/usermodehelper/inheritable", keep);
+ if (r < 0)
+ return r;
+
+ r = drop_from_file("/proc/sys/kernel/usermodehelper/bset", keep);
+ if (r < 0)
+ return r;
+
+ return r;
+}
+
+int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities) {
+ int r;
+
+ /* Unfortunately we cannot leave privilege dropping to PID 1 here, since we want to run as user but
+ * want to keep some capabilities. Since file capabilities have been introduced this cannot be done
+ * across exec() anymore, unless our binary has the capability configured in the file system, which
+ * we want to avoid. */
+
+ if (setresgid(gid, gid, gid) < 0)
+ return log_error_errno(errno, "Failed to change group ID: %m");
+
+ r = maybe_setgroups(0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to drop auxiliary groups list: %m");
+
+ /* Ensure we keep the permitted caps across the setresuid(). Note that we do this even if we actually
+ * don't want to keep any capabilities, since we want to be able to drop them from the bounding set
+ * too, and we can only do that if we have capabilities. */
+ if (prctl(PR_SET_KEEPCAPS, 1) < 0)
+ return log_error_errno(errno, "Failed to enable keep capabilities flag: %m");
+
+ if (setresuid(uid, uid, uid) < 0)
+ return log_error_errno(errno, "Failed to change user ID: %m");
+
+ if (prctl(PR_SET_KEEPCAPS, 0) < 0)
+ return log_error_errno(errno, "Failed to disable keep capabilities flag: %m");
+
+ /* Drop all caps from the bounding set (as well as the inheritable/permitted/effective sets), except
+ * the ones we want to keep */
+ r = capability_bounding_set_drop(keep_capabilities, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to drop capabilities: %m");
+
+ /* Now upgrade the permitted caps we still kept to effective caps */
+ if (keep_capabilities != 0) {
+ cap_value_t bits[u64log2(keep_capabilities) + 1];
+ _cleanup_cap_free_ cap_t d = NULL;
+ unsigned i, j = 0;
+
+ d = cap_init();
+ if (!d)
+ return log_oom();
+
+ for (i = 0; i < ELEMENTSOF(bits); i++)
+ if (keep_capabilities & (1ULL << i))
+ bits[j++] = i;
+
+ /* use enough bits */
+ assert(i == 64 || (keep_capabilities >> i) == 0);
+ /* don't use too many bits */
+ assert(keep_capabilities & (UINT64_C(1) << (i - 1)));
+
+ if (cap_set_flag(d, CAP_EFFECTIVE, j, bits, CAP_SET) < 0 ||
+ cap_set_flag(d, CAP_PERMITTED, j, bits, CAP_SET) < 0)
+ return log_error_errno(errno, "Failed to enable capabilities bits: %m");
+
+ if (cap_set_proc(d) < 0)
+ return log_error_errno(errno, "Failed to increase capabilities: %m");
+ }
+
+ return 0;
+}
+
+int drop_capability(cap_value_t cv) {
+ _cleanup_cap_free_ cap_t tmp_cap = NULL;
+
+ tmp_cap = cap_get_proc();
+ if (!tmp_cap)
+ return -errno;
+
+ if ((cap_set_flag(tmp_cap, CAP_INHERITABLE, 1, &cv, CAP_CLEAR) < 0) ||
+ (cap_set_flag(tmp_cap, CAP_PERMITTED, 1, &cv, CAP_CLEAR) < 0) ||
+ (cap_set_flag(tmp_cap, CAP_EFFECTIVE, 1, &cv, CAP_CLEAR) < 0))
+ return -errno;
+
+ if (cap_set_proc(tmp_cap) < 0)
+ return -errno;
+
+ return 0;
+}
+
+bool ambient_capabilities_supported(void) {
+ static int cache = -1;
+
+ if (cache >= 0)
+ return cache;
+
+ /* If PR_CAP_AMBIENT returns something valid, or an unexpected error code we assume that ambient caps are
+ * available. */
+
+ cache = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_KILL, 0, 0) >= 0 ||
+ !IN_SET(errno, EINVAL, EOPNOTSUPP, ENOSYS);
+
+ return cache;
+}
+
+bool capability_quintet_mangle(CapabilityQuintet *q) {
+ uint64_t combined, drop = 0;
+ bool ambient_supported;
+
+ assert(q);
+
+ combined = q->effective | q->bounding | q->inheritable | q->permitted;
+
+ ambient_supported = q->ambient != (uint64_t) -1;
+ if (ambient_supported)
+ combined |= q->ambient;
+
+ for (unsigned i = 0; i <= cap_last_cap(); i++) {
+ unsigned long bit = UINT64_C(1) << i;
+ if (!FLAGS_SET(combined, bit))
+ continue;
+
+ if (prctl(PR_CAPBSET_READ, i) > 0)
+ continue;
+
+ drop |= bit;
+
+ log_debug("Not in the current bounding set: %s", capability_to_name(i));
+ }
+
+ q->effective &= ~drop;
+ q->bounding &= ~drop;
+ q->inheritable &= ~drop;
+ q->permitted &= ~drop;
+
+ if (ambient_supported)
+ q->ambient &= ~drop;
+
+ return drop != 0; /* Let the caller know we changed something */
+}
+
+int capability_quintet_enforce(const CapabilityQuintet *q) {
+ _cleanup_cap_free_ cap_t c = NULL, modified = NULL;
+ int r;
+
+ if (q->ambient != (uint64_t) -1) {
+ bool changed = false;
+
+ c = cap_get_proc();
+ if (!c)
+ return -errno;
+
+ /* In order to raise the ambient caps set we first need to raise the matching
+ * inheritable + permitted cap */
+ for (unsigned i = 0; i <= cap_last_cap(); i++) {
+ uint64_t m = UINT64_C(1) << i;
+ cap_value_t cv = (cap_value_t) i;
+ cap_flag_value_t old_value_inheritable, old_value_permitted;
+
+ if ((q->ambient & m) == 0)
+ continue;
+
+ if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value_inheritable) < 0)
+ return -errno;
+ if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value_permitted) < 0)
+ return -errno;
+
+ if (old_value_inheritable == CAP_SET && old_value_permitted == CAP_SET)
+ continue;
+
+ if (cap_set_flag(c, CAP_INHERITABLE, 1, &cv, CAP_SET) < 0)
+ return -errno;
+ if (cap_set_flag(c, CAP_PERMITTED, 1, &cv, CAP_SET) < 0)
+ return -errno;
+
+ changed = true;
+ }
+
+ if (changed)
+ if (cap_set_proc(c) < 0)
+ return -errno;
+
+ r = capability_ambient_set_apply(q->ambient, false);
+ if (r < 0)
+ return r;
+ }
+
+ if (q->inheritable != (uint64_t) -1 || q->permitted != (uint64_t) -1 || q->effective != (uint64_t) -1) {
+ bool changed = false;
+
+ if (!c) {
+ c = cap_get_proc();
+ if (!c)
+ return -errno;
+ }
+
+ for (unsigned i = 0; i <= cap_last_cap(); i++) {
+ uint64_t m = UINT64_C(1) << i;
+ cap_value_t cv = (cap_value_t) i;
+
+ if (q->inheritable != (uint64_t) -1) {
+ cap_flag_value_t old_value, new_value;
+
+ if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value) < 0) {
+ if (errno == EINVAL) /* If the kernel knows more caps than this
+ * version of libcap, then this will return
+ * EINVAL. In that case, simply ignore it,
+ * pretend it doesn't exist. */
+ continue;
+
+ return -errno;
+ }
+
+ new_value = (q->inheritable & m) ? CAP_SET : CAP_CLEAR;
+
+ if (old_value != new_value) {
+ changed = true;
+
+ if (cap_set_flag(c, CAP_INHERITABLE, 1, &cv, new_value) < 0)
+ return -errno;
+ }
+ }
+
+ if (q->permitted != (uint64_t) -1) {
+ cap_flag_value_t old_value, new_value;
+
+ if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value) < 0) {
+ if (errno == EINVAL)
+ continue;
+
+ return -errno;
+ }
+
+ new_value = (q->permitted & m) ? CAP_SET : CAP_CLEAR;
+
+ if (old_value != new_value) {
+ changed = true;
+
+ if (cap_set_flag(c, CAP_PERMITTED, 1, &cv, new_value) < 0)
+ return -errno;
+ }
+ }
+
+ if (q->effective != (uint64_t) -1) {
+ cap_flag_value_t old_value, new_value;
+
+ if (cap_get_flag(c, cv, CAP_EFFECTIVE, &old_value) < 0) {
+ if (errno == EINVAL)
+ continue;
+
+ return -errno;
+ }
+
+ new_value = (q->effective & m) ? CAP_SET : CAP_CLEAR;
+
+ if (old_value != new_value) {
+ changed = true;
+
+ if (cap_set_flag(c, CAP_EFFECTIVE, 1, &cv, new_value) < 0)
+ return -errno;
+ }
+ }
+ }
+
+ if (changed) {
+ /* In order to change the bounding caps, we need to keep CAP_SETPCAP for a bit
+ * longer. Let's add it to our list hence for now. */
+ if (q->bounding != (uint64_t) -1) {
+ cap_value_t cv = CAP_SETPCAP;
+
+ modified = cap_dup(c);
+ if (!modified)
+ return -ENOMEM;
+
+ if (cap_set_flag(modified, CAP_PERMITTED, 1, &cv, CAP_SET) < 0)
+ return -errno;
+ if (cap_set_flag(modified, CAP_EFFECTIVE, 1, &cv, CAP_SET) < 0)
+ return -errno;
+
+ if (cap_compare(modified, c) == 0) {
+ /* No change? then drop this nonsense again */
+ cap_free(modified);
+ modified = NULL;
+ }
+ }
+
+ /* Now, let's enforce the caps for the first time. Note that this is where we acquire
+ * caps in any of the sets we currently don't have. We have to do this before
+ * dropping the bounding caps below, since at that point we can never acquire new
+ * caps in inherited/permitted/effective anymore, but only lose them. */
+ if (cap_set_proc(modified ?: c) < 0)
+ return -errno;
+ }
+ }
+
+ if (q->bounding != (uint64_t) -1) {
+ r = capability_bounding_set_drop(q->bounding, false);
+ if (r < 0)
+ return r;
+ }
+
+ /* If needed, let's now set the caps again, this time in the final version, which differs from what
+ * we have already set only in the CAP_SETPCAP bit, which we needed for dropping the bounding
+ * bits. This call only undoes bits and doesn't acquire any which means the bounding caps don't
+ * matter. */
+ if (modified)
+ if (cap_set_proc(c) < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/basic/capability-util.h b/src/basic/capability-util.h
new file mode 100644
index 0000000..f5ce290
--- /dev/null
+++ b/src/basic/capability-util.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/capability.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "missing_capability.h"
+#include "util.h"
+
+#define CAP_ALL (uint64_t) -1
+
+unsigned cap_last_cap(void);
+int have_effective_cap(int value);
+int capability_gain_cap_setpcap(cap_t *return_caps);
+int capability_bounding_set_drop(uint64_t keep, bool right_now);
+int capability_bounding_set_drop_usermode(uint64_t keep);
+
+int capability_ambient_set_apply(uint64_t set, bool also_inherit);
+int capability_update_inherited_set(cap_t caps, uint64_t ambient_set);
+
+int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities);
+
+int drop_capability(cap_value_t cv);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(cap_t, cap_free);
+#define _cleanup_cap_free_ _cleanup_(cap_freep)
+
+static inline void cap_free_charpp(char **p) {
+ if (*p)
+ cap_free(*p);
+}
+#define _cleanup_cap_free_charp_ _cleanup_(cap_free_charpp)
+
+static inline uint64_t all_capabilities(void) {
+ return UINT64_MAX >> (63 - cap_last_cap());
+}
+
+static inline bool cap_test_all(uint64_t caps) {
+ return FLAGS_SET(caps, all_capabilities());
+}
+
+bool ambient_capabilities_supported(void);
+
+/* Identical to linux/capability.h's CAP_TO_MASK(), but uses an unsigned 1U instead of a signed 1 for shifting left, in
+ * order to avoid complaints about shifting a signed int left by 31 bits, which would make it negative. */
+#define CAP_TO_MASK_CORRECTED(x) (1U << ((x) & 31U))
+
+typedef struct CapabilityQuintet {
+ /* Stores all five types of capabilities in one go. Note that we use (uint64_t) -1 for unset here. This hence
+ * needs to be updated as soon as Linux learns more than 63 caps. */
+ uint64_t effective;
+ uint64_t bounding;
+ uint64_t inheritable;
+ uint64_t permitted;
+ uint64_t ambient;
+} CapabilityQuintet;
+
+assert_cc(CAP_LAST_CAP < 64);
+
+#define CAPABILITY_QUINTET_NULL { (uint64_t) -1, (uint64_t) -1, (uint64_t) -1, (uint64_t) -1, (uint64_t) -1 }
+
+static inline bool capability_quintet_is_set(const CapabilityQuintet *q) {
+ return q->effective != (uint64_t) -1 ||
+ q->bounding != (uint64_t) -1 ||
+ q->inheritable != (uint64_t) -1 ||
+ q->permitted != (uint64_t) -1 ||
+ q->ambient != (uint64_t) -1;
+}
+
+/* Mangles the specified caps quintet taking the current bounding set into account:
+ * drops all caps from all five sets if our bounding set doesn't allow them.
+ * Returns true if the quintet was modified. */
+bool capability_quintet_mangle(CapabilityQuintet *q);
+
+int capability_quintet_enforce(const CapabilityQuintet *q);
diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c
new file mode 100644
index 0000000..f28bf18
--- /dev/null
+++ b/src/basic/cgroup-util.c
@@ -0,0 +1,2189 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <ftw.h>
+#include <limits.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "cgroup-util.h"
+#include "def.h"
+#include "dirent-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "login-util.h"
+#include "macro.h"
+#include "missing_magic.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "set.h"
+#include "special.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "user-util.h"
+#include "xattr-util.h"
+
+static int cg_enumerate_items(const char *controller, const char *path, FILE **_f, const char *item) {
+ _cleanup_free_ char *fs = NULL;
+ FILE *f;
+ int r;
+
+ assert(_f);
+
+ r = cg_get_path(controller, path, item, &fs);
+ if (r < 0)
+ return r;
+
+ f = fopen(fs, "re");
+ if (!f)
+ return -errno;
+
+ *_f = f;
+ return 0;
+}
+
+int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
+ return cg_enumerate_items(controller, path, _f, "cgroup.procs");
+}
+
+int cg_read_pid(FILE *f, pid_t *_pid) {
+ unsigned long ul;
+
+ /* Note that the cgroup.procs might contain duplicates! See
+ * cgroups.txt for details. */
+
+ assert(f);
+ assert(_pid);
+
+ errno = 0;
+ if (fscanf(f, "%lu", &ul) != 1) {
+
+ if (feof(f))
+ return 0;
+
+ return errno_or_else(EIO);
+ }
+
+ if (ul <= 0)
+ return -EIO;
+
+ *_pid = (pid_t) ul;
+ return 1;
+}
+
+int cg_read_event(
+ const char *controller,
+ const char *path,
+ const char *event,
+ char **ret) {
+
+ _cleanup_free_ char *events = NULL, *content = NULL;
+ int r;
+
+ r = cg_get_path(controller, path, "cgroup.events", &events);
+ if (r < 0)
+ return r;
+
+ r = read_full_file(events, &content, NULL);
+ if (r < 0)
+ return r;
+
+ for (const char *p = content;;) {
+ _cleanup_free_ char *line = NULL, *key = NULL, *val = NULL;
+ const char *q;
+
+ r = extract_first_word(&p, &line, "\n", 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENOENT;
+
+ q = line;
+ r = extract_first_word(&q, &key, " ", 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ if (!streq(key, event))
+ continue;
+
+ val = strdup(q);
+ if (!val)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(val);
+ return 0;
+ }
+}
+
+bool cg_ns_supported(void) {
+ static thread_local int enabled = -1;
+
+ if (enabled >= 0)
+ return enabled;
+
+ if (access("/proc/self/ns/cgroup", F_OK) < 0) {
+ if (errno != ENOENT)
+ log_debug_errno(errno, "Failed to check whether /proc/self/ns/cgroup is available, assuming not: %m");
+ enabled = false;
+ } else
+ enabled = true;
+
+ return enabled;
+}
+
+bool cg_freezer_supported(void) {
+ static thread_local int supported = -1;
+
+ if (supported >= 0)
+ return supported;
+
+ supported = cg_all_unified() > 0 && access("/sys/fs/cgroup/init.scope/cgroup.freeze", F_OK) == 0;
+
+ return supported;
+}
+
+int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
+ _cleanup_free_ char *fs = NULL;
+ int r;
+ DIR *d;
+
+ assert(_d);
+
+ /* This is not recursive! */
+
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ d = opendir(fs);
+ if (!d)
+ return -errno;
+
+ *_d = d;
+ return 0;
+}
+
+int cg_read_subgroup(DIR *d, char **fn) {
+ struct dirent *de;
+
+ assert(d);
+ assert(fn);
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ char *b;
+
+ if (de->d_type != DT_DIR)
+ continue;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ b = strdup(de->d_name);
+ if (!b)
+ return -ENOMEM;
+
+ *fn = b;
+ return 1;
+ }
+
+ return 0;
+}
+
+int cg_rmdir(const char *controller, const char *path) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ r = cg_get_path(controller, path, NULL, &p);
+ if (r < 0)
+ return r;
+
+ r = rmdir(p);
+ if (r < 0 && errno != ENOENT)
+ return -errno;
+
+ r = cg_hybrid_unified();
+ if (r <= 0)
+ return r;
+
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
+ if (r < 0)
+ log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
+ }
+
+ return 0;
+}
+
+static int cg_kill_items(
+ const char *controller,
+ const char *path,
+ int sig,
+ CGroupFlags flags,
+ Set *s,
+ cg_kill_log_func_t log_kill,
+ void *userdata,
+ const char *item) {
+
+ _cleanup_set_free_ Set *allocated_set = NULL;
+ bool done = false;
+ int r, ret = 0, ret_log_kill = 0;
+ pid_t my_pid;
+
+ assert(sig >= 0);
+
+ /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
+ * SIGCONT on SIGKILL. */
+ if (IN_SET(sig, SIGCONT, SIGKILL))
+ flags &= ~CGROUP_SIGCONT;
+
+ /* This goes through the tasks list and kills them all. This
+ * is repeated until no further processes are added to the
+ * tasks list, to properly handle forking processes */
+
+ if (!s) {
+ s = allocated_set = set_new(NULL);
+ if (!s)
+ return -ENOMEM;
+ }
+
+ my_pid = getpid_cached();
+
+ do {
+ _cleanup_fclose_ FILE *f = NULL;
+ pid_t pid = 0;
+ done = true;
+
+ r = cg_enumerate_items(controller, path, &f, item);
+ if (r < 0) {
+ if (ret >= 0 && r != -ENOENT)
+ return r;
+
+ return ret;
+ }
+
+ while ((r = cg_read_pid(f, &pid)) > 0) {
+
+ if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
+ continue;
+
+ if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
+ continue;
+
+ if (log_kill)
+ ret_log_kill = log_kill(pid, sig, userdata);
+
+ /* If we haven't killed this process yet, kill
+ * it */
+ if (kill(pid, sig) < 0) {
+ if (ret >= 0 && errno != ESRCH)
+ ret = -errno;
+ } else {
+ if (flags & CGROUP_SIGCONT)
+ (void) kill(pid, SIGCONT);
+
+ if (ret == 0) {
+ if (log_kill)
+ ret = ret_log_kill;
+ else
+ ret = 1;
+ }
+ }
+
+ done = false;
+
+ r = set_put(s, PID_TO_PTR(pid));
+ if (r < 0) {
+ if (ret >= 0)
+ return r;
+
+ return ret;
+ }
+ }
+
+ if (r < 0) {
+ if (ret >= 0)
+ return r;
+
+ return ret;
+ }
+
+ /* To avoid racing against processes which fork
+ * quicker than we can kill them we repeat this until
+ * no new pids need to be killed. */
+
+ } while (!done);
+
+ return ret;
+}
+
+int cg_kill(
+ const char *controller,
+ const char *path,
+ int sig,
+ CGroupFlags flags,
+ Set *s,
+ cg_kill_log_func_t log_kill,
+ void *userdata) {
+ int r;
+
+ r = cg_kill_items(controller, path, sig, flags, s, log_kill, userdata, "cgroup.procs");
+ if (r < 0 || sig != SIGKILL)
+ return r;
+
+ /* Only in case of killing with SIGKILL and when using cgroupsv2, kill remaining threads manually as
+ a workaround for kernel bug. It was fixed in 5.2-rc5 (c03cd7738a83), backported to 4.19.66
+ (4340d175b898) and 4.14.138 (feb6b123b7dd). */
+ r = cg_unified_controller(controller);
+ if (r <= 0)
+ return r;
+
+ return cg_kill_items(controller, path, sig, flags, s, log_kill, userdata, "cgroup.threads");
+}
+
+int cg_kill_recursive(
+ const char *controller,
+ const char *path,
+ int sig,
+ CGroupFlags flags,
+ Set *s,
+ cg_kill_log_func_t log_kill,
+ void *userdata) {
+
+ _cleanup_set_free_ Set *allocated_set = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ int r, ret;
+ char *fn;
+
+ assert(path);
+ assert(sig >= 0);
+
+ if (!s) {
+ s = allocated_set = set_new(NULL);
+ if (!s)
+ return -ENOMEM;
+ }
+
+ ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
+
+ r = cg_enumerate_subgroups(controller, path, &d);
+ if (r < 0) {
+ if (ret >= 0 && r != -ENOENT)
+ return r;
+
+ return ret;
+ }
+
+ while ((r = cg_read_subgroup(d, &fn)) > 0) {
+ _cleanup_free_ char *p = NULL;
+
+ p = path_join(empty_to_root(path), fn);
+ free(fn);
+ if (!p)
+ return -ENOMEM;
+
+ r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
+ if (r != 0 && ret >= 0)
+ ret = r;
+ }
+ if (ret >= 0 && r < 0)
+ ret = r;
+
+ if (flags & CGROUP_REMOVE) {
+ r = cg_rmdir(controller, path);
+ if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
+ return r;
+ }
+
+ return ret;
+}
+
+static const char *controller_to_dirname(const char *controller) {
+ const char *e;
+
+ assert(controller);
+
+ /* Converts a controller name to the directory name below
+ * /sys/fs/cgroup/ we want to mount it to. Effectively, this
+ * just cuts off the name= prefixed used for named
+ * hierarchies, if it is specified. */
+
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ if (cg_hybrid_unified() > 0)
+ controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
+ else
+ controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
+ }
+
+ e = startswith(controller, "name=");
+ if (e)
+ return e;
+
+ return controller;
+}
+
+static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
+ const char *dn;
+ char *t = NULL;
+
+ assert(fs);
+ assert(controller);
+
+ dn = controller_to_dirname(controller);
+
+ if (isempty(path) && isempty(suffix))
+ t = path_join("/sys/fs/cgroup", dn);
+ else if (isempty(path))
+ t = path_join("/sys/fs/cgroup", dn, suffix);
+ else if (isempty(suffix))
+ t = path_join("/sys/fs/cgroup", dn, path);
+ else
+ t = path_join("/sys/fs/cgroup", dn, path, suffix);
+ if (!t)
+ return -ENOMEM;
+
+ *fs = t;
+ return 0;
+}
+
+static int join_path_unified(const char *path, const char *suffix, char **fs) {
+ char *t;
+
+ assert(fs);
+
+ if (isempty(path) && isempty(suffix))
+ t = strdup("/sys/fs/cgroup");
+ else if (isempty(path))
+ t = path_join("/sys/fs/cgroup", suffix);
+ else if (isempty(suffix))
+ t = path_join("/sys/fs/cgroup", path);
+ else
+ t = path_join("/sys/fs/cgroup", path, suffix);
+ if (!t)
+ return -ENOMEM;
+
+ *fs = t;
+ return 0;
+}
+
+int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
+ int r;
+
+ assert(fs);
+
+ if (!controller) {
+ char *t;
+
+ /* If no controller is specified, we return the path
+ * *below* the controllers, without any prefix. */
+
+ if (!path && !suffix)
+ return -EINVAL;
+
+ if (!suffix)
+ t = strdup(path);
+ else if (!path)
+ t = strdup(suffix);
+ else
+ t = path_join(path, suffix);
+ if (!t)
+ return -ENOMEM;
+
+ *fs = path_simplify(t, false);
+ return 0;
+ }
+
+ if (!cg_controller_is_valid(controller))
+ return -EINVAL;
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ r = join_path_unified(path, suffix, fs);
+ else
+ r = join_path_legacy(controller, path, suffix, fs);
+ if (r < 0)
+ return r;
+
+ path_simplify(*fs, false);
+ return 0;
+}
+
+static int controller_is_accessible(const char *controller) {
+ int r;
+
+ assert(controller);
+
+ /* Checks whether a specific controller is accessible,
+ * i.e. its hierarchy mounted. In the unified hierarchy all
+ * controllers are considered accessible, except for the named
+ * hierarchies */
+
+ if (!cg_controller_is_valid(controller))
+ return -EINVAL;
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* We don't support named hierarchies if we are using
+ * the unified hierarchy. */
+
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
+ return 0;
+
+ if (startswith(controller, "name="))
+ return -EOPNOTSUPP;
+
+ } else {
+ const char *cc, *dn;
+
+ dn = controller_to_dirname(controller);
+ cc = strjoina("/sys/fs/cgroup/", dn);
+
+ if (laccess(cc, F_OK) < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
+ int r;
+
+ assert(controller);
+ assert(fs);
+
+ /* Check if the specified controller is actually accessible */
+ r = controller_is_accessible(controller);
+ if (r < 0)
+ return r;
+
+ return cg_get_path(controller, path, suffix, fs);
+}
+
+int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
+ _cleanup_free_ char *fs = NULL;
+ int r;
+
+ assert(path);
+ assert(name);
+ assert(value || size <= 0);
+
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ if (setxattr(fs, name, value, size, flags) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
+ _cleanup_free_ char *fs = NULL;
+ ssize_t n;
+ int r;
+
+ assert(path);
+ assert(name);
+
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ n = getxattr(fs, name, value, size);
+ if (n < 0)
+ return -errno;
+
+ return (int) n;
+}
+
+int cg_get_xattr_malloc(const char *controller, const char *path, const char *name, char **ret) {
+ _cleanup_free_ char *fs = NULL;
+ int r;
+
+ assert(path);
+ assert(name);
+
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ r = getxattr_malloc(fs, name, ret, false);
+ if (r < 0)
+ return r;
+
+ return r;
+}
+
+int cg_remove_xattr(const char *controller, const char *path, const char *name) {
+ _cleanup_free_ char *fs = NULL;
+ int r;
+
+ assert(path);
+ assert(name);
+
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ if (removexattr(fs, name) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int cg_pid_get_path(const char *controller, pid_t pid, char **ret_path) {
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *fs, *controller_str;
+ int unified, r;
+
+ assert(pid >= 0);
+ assert(ret_path);
+
+ if (controller) {
+ if (!cg_controller_is_valid(controller))
+ return -EINVAL;
+ } else
+ controller = SYSTEMD_CGROUP_CONTROLLER;
+
+ unified = cg_unified_controller(controller);
+ if (unified < 0)
+ return unified;
+ if (unified == 0) {
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
+ controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
+ else
+ controller_str = controller;
+ }
+
+ fs = procfs_file_alloca(pid, "cgroup");
+ r = fopen_unlocked(fs, "re", &f);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *e;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENODATA;
+
+ if (unified) {
+ e = startswith(line, "0:");
+ if (!e)
+ continue;
+
+ e = strchr(e, ':');
+ if (!e)
+ continue;
+ } else {
+ char *l;
+
+ l = strchr(line, ':');
+ if (!l)
+ continue;
+
+ l++;
+ e = strchr(l, ':');
+ if (!e)
+ continue;
+ *e = 0;
+
+ r = string_contains_word(l, ",", controller_str);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+ }
+
+ char *path = strdup(e + 1);
+ if (!path)
+ return -ENOMEM;
+
+ /* Truncate suffix indicating the process is a zombie */
+ e = endswith(path, " (deleted)");
+ if (e)
+ *e = 0;
+
+ *ret_path = path;
+ return 0;
+ }
+}
+
+int cg_install_release_agent(const char *controller, const char *agent) {
+ _cleanup_free_ char *fs = NULL, *contents = NULL;
+ const char *sc;
+ int r;
+
+ assert(agent);
+
+ r = cg_unified_controller(controller);
+ if (r < 0)
+ return r;
+ if (r > 0) /* doesn't apply to unified hierarchy */
+ return -EOPNOTSUPP;
+
+ r = cg_get_path(controller, NULL, "release_agent", &fs);
+ if (r < 0)
+ return r;
+
+ r = read_one_line_file(fs, &contents);
+ if (r < 0)
+ return r;
+
+ sc = strstrip(contents);
+ if (isempty(sc)) {
+ r = write_string_file(fs, agent, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+ } else if (!path_equal(sc, agent))
+ return -EEXIST;
+
+ fs = mfree(fs);
+ r = cg_get_path(controller, NULL, "notify_on_release", &fs);
+ if (r < 0)
+ return r;
+
+ contents = mfree(contents);
+ r = read_one_line_file(fs, &contents);
+ if (r < 0)
+ return r;
+
+ sc = strstrip(contents);
+ if (streq(sc, "0")) {
+ r = write_string_file(fs, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ if (!streq(sc, "1"))
+ return -EIO;
+
+ return 0;
+}
+
+int cg_uninstall_release_agent(const char *controller) {
+ _cleanup_free_ char *fs = NULL;
+ int r;
+
+ r = cg_unified_controller(controller);
+ if (r < 0)
+ return r;
+ if (r > 0) /* Doesn't apply to unified hierarchy */
+ return -EOPNOTSUPP;
+
+ r = cg_get_path(controller, NULL, "notify_on_release", &fs);
+ if (r < 0)
+ return r;
+
+ r = write_string_file(fs, "0", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+
+ fs = mfree(fs);
+
+ r = cg_get_path(controller, NULL, "release_agent", &fs);
+ if (r < 0)
+ return r;
+
+ r = write_string_file(fs, "", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int cg_is_empty(const char *controller, const char *path) {
+ _cleanup_fclose_ FILE *f = NULL;
+ pid_t pid;
+ int r;
+
+ assert(path);
+
+ r = cg_enumerate_processes(controller, path, &f);
+ if (r == -ENOENT)
+ return true;
+ if (r < 0)
+ return r;
+
+ r = cg_read_pid(f, &pid);
+ if (r < 0)
+ return r;
+
+ return r == 0;
+}
+
+int cg_is_empty_recursive(const char *controller, const char *path) {
+ int r;
+
+ assert(path);
+
+ /* The root cgroup is always populated */
+ if (controller && empty_or_root(path))
+ return false;
+
+ r = cg_unified_controller(controller);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ _cleanup_free_ char *t = NULL;
+
+ /* On the unified hierarchy we can check empty state
+ * via the "populated" attribute of "cgroup.events". */
+
+ r = cg_read_event(controller, path, "populated", &t);
+ if (r == -ENOENT)
+ return true;
+ if (r < 0)
+ return r;
+
+ return streq(t, "0");
+ } else {
+ _cleanup_closedir_ DIR *d = NULL;
+ char *fn;
+
+ r = cg_is_empty(controller, path);
+ if (r <= 0)
+ return r;
+
+ r = cg_enumerate_subgroups(controller, path, &d);
+ if (r == -ENOENT)
+ return true;
+ if (r < 0)
+ return r;
+
+ while ((r = cg_read_subgroup(d, &fn)) > 0) {
+ _cleanup_free_ char *p = NULL;
+
+ p = path_join(path, fn);
+ free(fn);
+ if (!p)
+ return -ENOMEM;
+
+ r = cg_is_empty_recursive(controller, p);
+ if (r <= 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ return true;
+ }
+}
+
+int cg_split_spec(const char *spec, char **ret_controller, char **ret_path) {
+ _cleanup_free_ char *controller = NULL, *path = NULL;
+
+ assert(spec);
+
+ if (*spec == '/') {
+ if (!path_is_normalized(spec))
+ return -EINVAL;
+
+ if (ret_path) {
+ path = strdup(spec);
+ if (!path)
+ return -ENOMEM;
+
+ path_simplify(path, false);
+ }
+
+ } else {
+ const char *e;
+
+ e = strchr(spec, ':');
+ if (e) {
+ controller = strndup(spec, e-spec);
+ if (!controller)
+ return -ENOMEM;
+ if (!cg_controller_is_valid(controller))
+ return -EINVAL;
+
+ if (!isempty(e + 1)) {
+ path = strdup(e+1);
+ if (!path)
+ return -ENOMEM;
+
+ if (!path_is_normalized(path) ||
+ !path_is_absolute(path))
+ return -EINVAL;
+
+ path_simplify(path, false);
+ }
+
+ } else {
+ if (!cg_controller_is_valid(spec))
+ return -EINVAL;
+
+ if (ret_controller) {
+ controller = strdup(spec);
+ if (!controller)
+ return -ENOMEM;
+ }
+ }
+ }
+
+ if (ret_controller)
+ *ret_controller = TAKE_PTR(controller);
+ if (ret_path)
+ *ret_path = TAKE_PTR(path);
+ return 0;
+}
+
+int cg_mangle_path(const char *path, char **result) {
+ _cleanup_free_ char *c = NULL, *p = NULL;
+ char *t;
+ int r;
+
+ assert(path);
+ assert(result);
+
+ /* First, check if it already is a filesystem path */
+ if (path_startswith(path, "/sys/fs/cgroup")) {
+
+ t = strdup(path);
+ if (!t)
+ return -ENOMEM;
+
+ *result = path_simplify(t, false);
+ return 0;
+ }
+
+ /* Otherwise, treat it as cg spec */
+ r = cg_split_spec(path, &c, &p);
+ if (r < 0)
+ return r;
+
+ return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
+}
+
+int cg_get_root_path(char **path) {
+ char *p, *e;
+ int r;
+
+ assert(path);
+
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
+ if (r < 0)
+ return r;
+
+ e = endswith(p, "/" SPECIAL_INIT_SCOPE);
+ if (!e)
+ e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
+ if (!e)
+ e = endswith(p, "/system"); /* even more legacy */
+ if (e)
+ *e = 0;
+
+ *path = p;
+ return 0;
+}
+
+int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
+ _cleanup_free_ char *rt = NULL;
+ char *p;
+ int r;
+
+ assert(cgroup);
+ assert(shifted);
+
+ if (!root) {
+ /* If the root was specified let's use that, otherwise
+ * let's determine it from PID 1 */
+
+ r = cg_get_root_path(&rt);
+ if (r < 0)
+ return r;
+
+ root = rt;
+ }
+
+ p = path_startswith(cgroup, root);
+ if (p && p > cgroup)
+ *shifted = p - 1;
+ else
+ *shifted = cgroup;
+
+ return 0;
+}
+
+int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
+ _cleanup_free_ char *raw = NULL;
+ const char *c;
+ int r;
+
+ assert(pid >= 0);
+ assert(cgroup);
+
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
+ if (r < 0)
+ return r;
+
+ r = cg_shift_path(raw, root, &c);
+ if (r < 0)
+ return r;
+
+ if (c == raw)
+ *cgroup = TAKE_PTR(raw);
+ else {
+ char *n;
+
+ n = strdup(c);
+ if (!n)
+ return -ENOMEM;
+
+ *cgroup = n;
+ }
+
+ return 0;
+}
+
+int cg_path_decode_unit(const char *cgroup, char **unit) {
+ char *c, *s;
+ size_t n;
+
+ assert(cgroup);
+ assert(unit);
+
+ n = strcspn(cgroup, "/");
+ if (n < 3)
+ return -ENXIO;
+
+ c = strndupa(cgroup, n);
+ c = cg_unescape(c);
+
+ if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
+ return -ENXIO;
+
+ s = strdup(c);
+ if (!s)
+ return -ENOMEM;
+
+ *unit = s;
+ return 0;
+}
+
+static bool valid_slice_name(const char *p, size_t n) {
+
+ if (!p)
+ return false;
+
+ if (n < STRLEN("x.slice"))
+ return false;
+
+ if (memcmp(p + n - 6, ".slice", 6) == 0) {
+ char buf[n+1], *c;
+
+ memcpy(buf, p, n);
+ buf[n] = 0;
+
+ c = cg_unescape(buf);
+
+ return unit_name_is_valid(c, UNIT_NAME_PLAIN);
+ }
+
+ return false;
+}
+
+static const char *skip_slices(const char *p) {
+ assert(p);
+
+ /* Skips over all slice assignments */
+
+ for (;;) {
+ size_t n;
+
+ p += strspn(p, "/");
+
+ n = strcspn(p, "/");
+ if (!valid_slice_name(p, n))
+ return p;
+
+ p += n;
+ }
+}
+
+int cg_path_get_unit(const char *path, char **ret) {
+ const char *e;
+ char *unit;
+ int r;
+
+ assert(path);
+ assert(ret);
+
+ e = skip_slices(path);
+
+ r = cg_path_decode_unit(e, &unit);
+ if (r < 0)
+ return r;
+
+ /* We skipped over the slices, don't accept any now */
+ if (endswith(unit, ".slice")) {
+ free(unit);
+ return -ENXIO;
+ }
+
+ *ret = unit;
+ return 0;
+}
+
+int cg_pid_get_unit(pid_t pid, char **unit) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ assert(unit);
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_unit(cgroup, unit);
+}
+
+/**
+ * Skip session-*.scope, but require it to be there.
+ */
+static const char *skip_session(const char *p) {
+ size_t n;
+
+ if (isempty(p))
+ return NULL;
+
+ p += strspn(p, "/");
+
+ n = strcspn(p, "/");
+ if (n < STRLEN("session-x.scope"))
+ return NULL;
+
+ if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
+ char buf[n - 8 - 6 + 1];
+
+ memcpy(buf, p + 8, n - 8 - 6);
+ buf[n - 8 - 6] = 0;
+
+ /* Note that session scopes never need unescaping,
+ * since they cannot conflict with the kernel's own
+ * names, hence we don't need to call cg_unescape()
+ * here. */
+
+ if (!session_id_valid(buf))
+ return false;
+
+ p += n;
+ p += strspn(p, "/");
+ return p;
+ }
+
+ return NULL;
+}
+
+/**
+ * Skip user@*.service, but require it to be there.
+ */
+static const char *skip_user_manager(const char *p) {
+ size_t n;
+
+ if (isempty(p))
+ return NULL;
+
+ p += strspn(p, "/");
+
+ n = strcspn(p, "/");
+ if (n < STRLEN("user@x.service"))
+ return NULL;
+
+ if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
+ char buf[n - 5 - 8 + 1];
+
+ memcpy(buf, p + 5, n - 5 - 8);
+ buf[n - 5 - 8] = 0;
+
+ /* Note that user manager services never need unescaping,
+ * since they cannot conflict with the kernel's own
+ * names, hence we don't need to call cg_unescape()
+ * here. */
+
+ if (parse_uid(buf, NULL) < 0)
+ return NULL;
+
+ p += n;
+ p += strspn(p, "/");
+
+ return p;
+ }
+
+ return NULL;
+}
+
+static const char *skip_user_prefix(const char *path) {
+ const char *e, *t;
+
+ assert(path);
+
+ /* Skip slices, if there are any */
+ e = skip_slices(path);
+
+ /* Skip the user manager, if it's in the path now... */
+ t = skip_user_manager(e);
+ if (t)
+ return t;
+
+ /* Alternatively skip the user session if it is in the path... */
+ return skip_session(e);
+}
+
+int cg_path_get_user_unit(const char *path, char **ret) {
+ const char *t;
+
+ assert(path);
+ assert(ret);
+
+ t = skip_user_prefix(path);
+ if (!t)
+ return -ENXIO;
+
+ /* And from here on it looks pretty much the same as for a system unit, hence let's use the same
+ * parser. */
+ return cg_path_get_unit(t, ret);
+}
+
+int cg_pid_get_user_unit(pid_t pid, char **unit) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ assert(unit);
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_user_unit(cgroup, unit);
+}
+
+int cg_path_get_machine_name(const char *path, char **machine) {
+ _cleanup_free_ char *u = NULL;
+ const char *sl;
+ int r;
+
+ r = cg_path_get_unit(path, &u);
+ if (r < 0)
+ return r;
+
+ sl = strjoina("/run/systemd/machines/unit:", u);
+ return readlink_malloc(sl, machine);
+}
+
+int cg_pid_get_machine_name(pid_t pid, char **machine) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ assert(machine);
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_machine_name(cgroup, machine);
+}
+
+int cg_path_get_session(const char *path, char **session) {
+ _cleanup_free_ char *unit = NULL;
+ char *start, *end;
+ int r;
+
+ assert(path);
+
+ r = cg_path_get_unit(path, &unit);
+ if (r < 0)
+ return r;
+
+ start = startswith(unit, "session-");
+ if (!start)
+ return -ENXIO;
+ end = endswith(start, ".scope");
+ if (!end)
+ return -ENXIO;
+
+ *end = 0;
+ if (!session_id_valid(start))
+ return -ENXIO;
+
+ if (session) {
+ char *rr;
+
+ rr = strdup(start);
+ if (!rr)
+ return -ENOMEM;
+
+ *session = rr;
+ }
+
+ return 0;
+}
+
+int cg_pid_get_session(pid_t pid, char **session) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_session(cgroup, session);
+}
+
+int cg_path_get_owner_uid(const char *path, uid_t *uid) {
+ _cleanup_free_ char *slice = NULL;
+ char *start, *end;
+ int r;
+
+ assert(path);
+
+ r = cg_path_get_slice(path, &slice);
+ if (r < 0)
+ return r;
+
+ start = startswith(slice, "user-");
+ if (!start)
+ return -ENXIO;
+ end = endswith(start, ".slice");
+ if (!end)
+ return -ENXIO;
+
+ *end = 0;
+ if (parse_uid(start, uid) < 0)
+ return -ENXIO;
+
+ return 0;
+}
+
+int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_owner_uid(cgroup, uid);
+}
+
+int cg_path_get_slice(const char *p, char **slice) {
+ const char *e = NULL;
+
+ assert(p);
+ assert(slice);
+
+ /* Finds the right-most slice unit from the beginning, but
+ * stops before we come to the first non-slice unit. */
+
+ for (;;) {
+ size_t n;
+
+ p += strspn(p, "/");
+
+ n = strcspn(p, "/");
+ if (!valid_slice_name(p, n)) {
+
+ if (!e) {
+ char *s;
+
+ s = strdup(SPECIAL_ROOT_SLICE);
+ if (!s)
+ return -ENOMEM;
+
+ *slice = s;
+ return 0;
+ }
+
+ return cg_path_decode_unit(e, slice);
+ }
+
+ e = p;
+ p += n;
+ }
+}
+
+int cg_pid_get_slice(pid_t pid, char **slice) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ assert(slice);
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_slice(cgroup, slice);
+}
+
+int cg_path_get_user_slice(const char *p, char **slice) {
+ const char *t;
+ assert(p);
+ assert(slice);
+
+ t = skip_user_prefix(p);
+ if (!t)
+ return -ENXIO;
+
+ /* And now it looks pretty much the same as for a system
+ * slice, so let's just use the same parser from here on. */
+ return cg_path_get_slice(t, slice);
+}
+
+int cg_pid_get_user_slice(pid_t pid, char **slice) {
+ _cleanup_free_ char *cgroup = NULL;
+ int r;
+
+ assert(slice);
+
+ r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_user_slice(cgroup, slice);
+}
+
+char *cg_escape(const char *p) {
+ bool need_prefix = false;
+
+ /* This implements very minimal escaping for names to be used
+ * as file names in the cgroup tree: any name which might
+ * conflict with a kernel name or is prefixed with '_' is
+ * prefixed with a '_'. That way, when reading cgroup names it
+ * is sufficient to remove a single prefixing underscore if
+ * there is one. */
+
+ /* The return value of this function (unlike cg_unescape())
+ * needs free()! */
+
+ if (IN_SET(p[0], 0, '_', '.') ||
+ STR_IN_SET(p, "notify_on_release", "release_agent", "tasks") ||
+ startswith(p, "cgroup."))
+ need_prefix = true;
+ else {
+ const char *dot;
+
+ dot = strrchr(p, '.');
+ if (dot) {
+ CGroupController c;
+ size_t l = dot - p;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ const char *n;
+
+ n = cgroup_controller_to_string(c);
+
+ if (l != strlen(n))
+ continue;
+
+ if (memcmp(p, n, l) != 0)
+ continue;
+
+ need_prefix = true;
+ break;
+ }
+ }
+ }
+
+ if (need_prefix)
+ return strjoin("_", p);
+
+ return strdup(p);
+}
+
+char *cg_unescape(const char *p) {
+ assert(p);
+
+ /* The return value of this function (unlike cg_escape())
+ * doesn't need free()! */
+
+ if (p[0] == '_')
+ return (char*) p+1;
+
+ return (char*) p;
+}
+
+#define CONTROLLER_VALID \
+ DIGITS LETTERS \
+ "_"
+
+bool cg_controller_is_valid(const char *p) {
+ const char *t, *s;
+
+ if (!p)
+ return false;
+
+ if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
+ return true;
+
+ s = startswith(p, "name=");
+ if (s)
+ p = s;
+
+ if (IN_SET(*p, 0, '_'))
+ return false;
+
+ for (t = p; *t; t++)
+ if (!strchr(CONTROLLER_VALID, *t))
+ return false;
+
+ if (t - p > FILENAME_MAX)
+ return false;
+
+ return true;
+}
+
+int cg_slice_to_path(const char *unit, char **ret) {
+ _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
+ const char *dash;
+ int r;
+
+ assert(unit);
+ assert(ret);
+
+ if (streq(unit, SPECIAL_ROOT_SLICE)) {
+ char *x;
+
+ x = strdup("");
+ if (!x)
+ return -ENOMEM;
+ *ret = x;
+ return 0;
+ }
+
+ if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
+ return -EINVAL;
+
+ if (!endswith(unit, ".slice"))
+ return -EINVAL;
+
+ r = unit_name_to_prefix(unit, &p);
+ if (r < 0)
+ return r;
+
+ dash = strchr(p, '-');
+
+ /* Don't allow initial dashes */
+ if (dash == p)
+ return -EINVAL;
+
+ while (dash) {
+ _cleanup_free_ char *escaped = NULL;
+ char n[dash - p + sizeof(".slice")];
+
+#if HAS_FEATURE_MEMORY_SANITIZER
+ /* msan doesn't instrument stpncpy, so it thinks
+ * n is later used uninitialized:
+ * https://github.com/google/sanitizers/issues/926
+ */
+ zero(n);
+#endif
+
+ /* Don't allow trailing or double dashes */
+ if (IN_SET(dash[1], 0, '-'))
+ return -EINVAL;
+
+ strcpy(stpncpy(n, p, dash - p), ".slice");
+ if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
+ return -EINVAL;
+
+ escaped = cg_escape(n);
+ if (!escaped)
+ return -ENOMEM;
+
+ if (!strextend(&s, escaped, "/", NULL))
+ return -ENOMEM;
+
+ dash = strchr(dash+1, '-');
+ }
+
+ e = cg_escape(unit);
+ if (!e)
+ return -ENOMEM;
+
+ if (!strextend(&s, e, NULL))
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
+
+int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ r = cg_get_path(controller, path, attribute, &p);
+ if (r < 0)
+ return r;
+
+ return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER);
+}
+
+int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ r = cg_get_path(controller, path, attribute, &p);
+ if (r < 0)
+ return r;
+
+ return read_one_line_file(p, ret);
+}
+
+int cg_get_attribute_as_uint64(const char *controller, const char *path, const char *attribute, uint64_t *ret) {
+ _cleanup_free_ char *value = NULL;
+ uint64_t v;
+ int r;
+
+ assert(ret);
+
+ r = cg_get_attribute(controller, path, attribute, &value);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+
+ if (streq(value, "max")) {
+ *ret = CGROUP_LIMIT_MAX;
+ return 0;
+ }
+
+ r = safe_atou64(value, &v);
+ if (r < 0)
+ return r;
+
+ *ret = v;
+ return 0;
+}
+
+int cg_get_attribute_as_bool(const char *controller, const char *path, const char *attribute, bool *ret) {
+ _cleanup_free_ char *value = NULL;
+ int r;
+
+ assert(ret);
+
+ r = cg_get_attribute(controller, path, attribute, &value);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+
+ r = parse_boolean(value);
+ if (r < 0)
+ return r;
+
+ *ret = r;
+ return 0;
+}
+
+int cg_get_keyed_attribute_full(
+ const char *controller,
+ const char *path,
+ const char *attribute,
+ char **keys,
+ char **ret_values,
+ CGroupKeyMode mode) {
+
+ _cleanup_free_ char *filename = NULL, *contents = NULL;
+ const char *p;
+ size_t n, i, n_done = 0;
+ char **v;
+ int r;
+
+ /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with
+ * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
+ * entries as 'keys'. On success each entry will be set to the value of the matching key.
+ *
+ * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. If mode
+ * is set to GG_KEY_MODE_GRACEFUL we ignore missing keys and return those that were parsed successfully. */
+
+ r = cg_get_path(controller, path, attribute, &filename);
+ if (r < 0)
+ return r;
+
+ r = read_full_file(filename, &contents, NULL);
+ if (r < 0)
+ return r;
+
+ n = strv_length(keys);
+ if (n == 0) /* No keys to retrieve? That's easy, we are done then */
+ return 0;
+
+ /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
+ v = newa0(char*, n);
+
+ for (p = contents; *p;) {
+ const char *w = NULL;
+
+ for (i = 0; i < n; i++)
+ if (!v[i]) {
+ w = first_word(p, keys[i]);
+ if (w)
+ break;
+ }
+
+ if (w) {
+ size_t l;
+
+ l = strcspn(w, NEWLINE);
+ v[i] = strndup(w, l);
+ if (!v[i]) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ n_done++;
+ if (n_done >= n)
+ goto done;
+
+ p = w + l;
+ } else
+ p += strcspn(p, NEWLINE);
+
+ p += strspn(p, NEWLINE);
+ }
+
+ if (mode & CG_KEY_MODE_GRACEFUL)
+ goto done;
+
+ r = -ENXIO;
+
+fail:
+ for (i = 0; i < n; i++)
+ free(v[i]);
+
+ return r;
+
+done:
+ memcpy(ret_values, v, sizeof(char*) * n);
+ if (mode & CG_KEY_MODE_GRACEFUL)
+ return n_done;
+
+ return 0;
+}
+
+int cg_mask_to_string(CGroupMask mask, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ size_t n = 0, allocated = 0;
+ bool space = false;
+ CGroupController c;
+
+ assert(ret);
+
+ if (mask == 0) {
+ *ret = NULL;
+ return 0;
+ }
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ const char *k;
+ size_t l;
+
+ if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c)))
+ continue;
+
+ k = cgroup_controller_to_string(c);
+ l = strlen(k);
+
+ if (!GREEDY_REALLOC(s, allocated, n + space + l + 1))
+ return -ENOMEM;
+
+ if (space)
+ s[n] = ' ';
+ memcpy(s + n + space, k, l);
+ n += space + l;
+
+ space = true;
+ }
+
+ assert(s);
+
+ s[n] = 0;
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
+
+int cg_mask_from_string(const char *value, CGroupMask *ret) {
+ CGroupMask m = 0;
+
+ assert(ret);
+ assert(value);
+
+ for (;;) {
+ _cleanup_free_ char *n = NULL;
+ CGroupController v;
+ int r;
+
+ r = extract_first_word(&value, &n, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ v = cgroup_controller_from_string(n);
+ if (v < 0)
+ continue;
+
+ m |= CGROUP_CONTROLLER_TO_MASK(v);
+ }
+
+ *ret = m;
+ return 0;
+}
+
+int cg_mask_supported(CGroupMask *ret) {
+ CGroupMask mask;
+ int r;
+
+ /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that
+ * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz
+ * pseudo-controllers. */
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
+
+ /* In the unified hierarchy we can read the supported and accessible controllers from
+ * the top-level cgroup attribute */
+
+ r = cg_get_root_path(&root);
+ if (r < 0)
+ return r;
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
+ if (r < 0)
+ return r;
+
+ r = read_one_line_file(path, &controllers);
+ if (r < 0)
+ return r;
+
+ r = cg_mask_from_string(controllers, &mask);
+ if (r < 0)
+ return r;
+
+ /* Mask controllers that are not supported in unified hierarchy. */
+ mask &= CGROUP_MASK_V2;
+
+ } else {
+ CGroupController c;
+
+ /* In the legacy hierarchy, we check which hierarchies are mounted. */
+
+ mask = 0;
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *n;
+
+ if (!FLAGS_SET(CGROUP_MASK_V1, bit))
+ continue;
+
+ n = cgroup_controller_to_string(c);
+ if (controller_is_accessible(n) >= 0)
+ mask |= bit;
+ }
+ }
+
+ *ret = mask;
+ return 0;
+}
+
+int cg_kernel_controllers(Set **ret) {
+ _cleanup_set_free_free_ Set *controllers = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(ret);
+
+ /* Determines the full list of kernel-known controllers. Might include controllers we don't actually support
+ * and controllers that aren't currently accessible (because not mounted). This does not include "name="
+ * pseudo-controllers. */
+
+ controllers = set_new(&string_hash_ops);
+ if (!controllers)
+ return -ENOMEM;
+
+ r = fopen_unlocked("/proc/cgroups", "re", &f);
+ if (r == -ENOENT) {
+ *ret = NULL;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ /* Ignore the header line */
+ (void) read_line(f, (size_t) -1, NULL);
+
+ for (;;) {
+ char *controller;
+ int enabled = 0;
+
+ errno = 0;
+ if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
+
+ if (feof(f))
+ break;
+
+ if (ferror(f))
+ return errno_or_else(EIO);
+
+ return -EBADMSG;
+ }
+
+ if (!enabled) {
+ free(controller);
+ continue;
+ }
+
+ if (!cg_controller_is_valid(controller)) {
+ free(controller);
+ return -EBADMSG;
+ }
+
+ r = set_consume(controllers, controller);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(controllers);
+
+ return 0;
+}
+
+/* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on
+ * /sys/fs/cgroup/systemd. This unfortunately broke other tools (such as docker) which expected the v1
+ * "name=systemd" hierarchy on /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mounts v2 on
+ * /sys/fs/cgroup/unified and maintains "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility
+ * with other tools.
+ *
+ * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep
+ * cgroup v2 process management but disable the compat dual layout, we return true on
+ * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and false on cg_hybrid_unified().
+ */
+static thread_local bool unified_systemd_v232;
+
+int cg_unified_cached(bool flush) {
+ static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
+
+ struct statfs fs;
+
+ /* Checks if we support the unified hierarchy. Returns an
+ * error when the cgroup hierarchies aren't mounted yet or we
+ * have any other trouble determining if the unified hierarchy
+ * is supported. */
+
+ if (flush)
+ unified_cache = CGROUP_UNIFIED_UNKNOWN;
+ else if (unified_cache >= CGROUP_UNIFIED_NONE)
+ return unified_cache;
+
+ if (statfs("/sys/fs/cgroup/", &fs) < 0)
+ return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
+
+ if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
+ log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
+ unified_cache = CGROUP_UNIFIED_ALL;
+ } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
+ if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
+ F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
+ log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
+ unified_cache = CGROUP_UNIFIED_SYSTEMD;
+ unified_systemd_v232 = false;
+ } else {
+ if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
+ return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
+
+ if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
+ log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
+ unified_cache = CGROUP_UNIFIED_SYSTEMD;
+ unified_systemd_v232 = true;
+ } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
+ log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
+ unified_cache = CGROUP_UNIFIED_NONE;
+ } else {
+ log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
+ (unsigned long long) fs.f_type);
+ unified_cache = CGROUP_UNIFIED_NONE;
+ }
+ }
+ } else if (F_TYPE_EQUAL(fs.f_type, SYSFS_MAGIC)) {
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
+ "No filesystem is currently mounted on /sys/fs/cgroup.");
+ } else
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
+ "Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
+ (unsigned long long)fs.f_type);
+
+ return unified_cache;
+}
+
+int cg_unified_controller(const char *controller) {
+ int r;
+
+ r = cg_unified_cached(false);
+ if (r < 0)
+ return r;
+
+ if (r == CGROUP_UNIFIED_NONE)
+ return false;
+
+ if (r >= CGROUP_UNIFIED_ALL)
+ return true;
+
+ return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
+}
+
+int cg_all_unified(void) {
+ int r;
+
+ r = cg_unified_cached(false);
+ if (r < 0)
+ return r;
+
+ return r >= CGROUP_UNIFIED_ALL;
+}
+
+int cg_hybrid_unified(void) {
+ int r;
+
+ r = cg_unified_cached(false);
+ if (r < 0)
+ return r;
+
+ return r == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
+}
+
+const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
+ [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
+ [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
+ [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
+ [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
+};
+
+static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
+ [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax",
+ [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax",
+ [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
+ [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
+
+bool is_cgroup_fs(const struct statfs *s) {
+ return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
+ is_fs_type(s, CGROUP2_SUPER_MAGIC);
+}
+
+bool fd_is_cgroup_fs(int fd) {
+ struct statfs s;
+
+ if (fstatfs(fd, &s) < 0)
+ return -errno;
+
+ return is_cgroup_fs(&s);
+}
+
+static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
+ [CGROUP_CONTROLLER_CPU] = "cpu",
+ [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
+ [CGROUP_CONTROLLER_CPUSET] = "cpuset",
+ [CGROUP_CONTROLLER_IO] = "io",
+ [CGROUP_CONTROLLER_BLKIO] = "blkio",
+ [CGROUP_CONTROLLER_MEMORY] = "memory",
+ [CGROUP_CONTROLLER_DEVICES] = "devices",
+ [CGROUP_CONTROLLER_PIDS] = "pids",
+ [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
+ [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
+
+CGroupMask get_cpu_accounting_mask(void) {
+ static CGroupMask needed_mask = (CGroupMask) -1;
+
+ /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is
+ * provided externally from the CPU controller, which means we don't
+ * need to enable the CPU controller just to get metrics. This is good,
+ * because enabling the CPU controller comes at a minor performance
+ * hit, especially when it's propagated deep into large hierarchies.
+ * There's also no separate CPU accounting controller available within
+ * a unified hierarchy.
+ *
+ * This combination of factors results in the desired cgroup mask to
+ * enable for CPU accounting varying as follows:
+ *
+ * ╔═════════════════════╤═════════════════════╗
+ * ║ Linux ≥4.15 │ Linux <4.15 ║
+ * ╔═══════════════╬═════════════════════╪═════════════════════╣
+ * ║ Unified ║ nothing │ CGROUP_MASK_CPU ║
+ * ╟───────────────╫─────────────────────┼─────────────────────╢
+ * ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║
+ * ╚═══════════════╩═════════════════════╧═════════════════════╝
+ *
+ * We check kernel version here instead of manually checking whether
+ * cpu.stat is present for every cgroup, as that check in itself would
+ * already be fairly expensive.
+ *
+ * Kernels where this patch has been backported will therefore have the
+ * CPU controller enabled unnecessarily. This is more expensive than
+ * necessary, but harmless. ☺️
+ */
+
+ if (needed_mask == (CGroupMask) -1) {
+ if (cg_all_unified()) {
+ struct utsname u;
+ assert_se(uname(&u) >= 0);
+
+ if (str_verscmp(u.release, "4.15") < 0)
+ needed_mask = CGROUP_MASK_CPU;
+ else
+ needed_mask = 0;
+ } else
+ needed_mask = CGROUP_MASK_CPUACCT;
+ }
+
+ return needed_mask;
+}
+
+bool cpu_accounting_is_cheap(void) {
+ return get_cpu_accounting_mask() == 0;
+}
+
+static const char* const managed_oom_mode_table[_MANAGED_OOM_MODE_MAX] = {
+ [MANAGED_OOM_AUTO] = "auto",
+ [MANAGED_OOM_KILL] = "kill",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(managed_oom_mode, ManagedOOMMode);
diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h
new file mode 100644
index 0000000..bdc0d0d
--- /dev/null
+++ b/src/basic/cgroup-util.h
@@ -0,0 +1,290 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <dirent.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+
+#include "def.h"
+#include "set.h"
+
+#define SYSTEMD_CGROUP_CONTROLLER_LEGACY "name=systemd"
+#define SYSTEMD_CGROUP_CONTROLLER_HYBRID "name=unified"
+#define SYSTEMD_CGROUP_CONTROLLER "_systemd"
+
+/* An enum of well known cgroup controllers */
+typedef enum CGroupController {
+ /* Original cgroup controllers */
+ CGROUP_CONTROLLER_CPU,
+ CGROUP_CONTROLLER_CPUACCT, /* v1 only */
+ CGROUP_CONTROLLER_CPUSET, /* v2 only */
+ CGROUP_CONTROLLER_IO, /* v2 only */
+ CGROUP_CONTROLLER_BLKIO, /* v1 only */
+ CGROUP_CONTROLLER_MEMORY,
+ CGROUP_CONTROLLER_DEVICES, /* v1 only */
+ CGROUP_CONTROLLER_PIDS,
+
+ /* BPF-based pseudo-controllers, v2 only */
+ CGROUP_CONTROLLER_BPF_FIREWALL,
+ CGROUP_CONTROLLER_BPF_DEVICES,
+
+ _CGROUP_CONTROLLER_MAX,
+ _CGROUP_CONTROLLER_INVALID = -1,
+} CGroupController;
+
+#define CGROUP_CONTROLLER_TO_MASK(c) (1U << (c))
+
+/* A bit mask of well known cgroup controllers */
+typedef enum CGroupMask {
+ CGROUP_MASK_CPU = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPU),
+ CGROUP_MASK_CPUACCT = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPUACCT),
+ CGROUP_MASK_CPUSET = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPUSET),
+ CGROUP_MASK_IO = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_IO),
+ CGROUP_MASK_BLKIO = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BLKIO),
+ CGROUP_MASK_MEMORY = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_MEMORY),
+ CGROUP_MASK_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_DEVICES),
+ CGROUP_MASK_PIDS = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_PIDS),
+ CGROUP_MASK_BPF_FIREWALL = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FIREWALL),
+ CGROUP_MASK_BPF_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_DEVICES),
+
+ /* All real cgroup v1 controllers */
+ CGROUP_MASK_V1 = CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT|CGROUP_MASK_BLKIO|CGROUP_MASK_MEMORY|CGROUP_MASK_DEVICES|CGROUP_MASK_PIDS,
+
+ /* All real cgroup v2 controllers */
+ CGROUP_MASK_V2 = CGROUP_MASK_CPU|CGROUP_MASK_CPUSET|CGROUP_MASK_IO|CGROUP_MASK_MEMORY|CGROUP_MASK_PIDS,
+
+ /* All cgroup v2 BPF pseudo-controllers */
+ CGROUP_MASK_BPF = CGROUP_MASK_BPF_FIREWALL|CGROUP_MASK_BPF_DEVICES,
+
+ _CGROUP_MASK_ALL = CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX) - 1
+} CGroupMask;
+
+static inline CGroupMask CGROUP_MASK_EXTEND_JOINED(CGroupMask mask) {
+ /* We always mount "cpu" and "cpuacct" in the same hierarchy. Hence, when one bit is set also set the other */
+
+ if (mask & (CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT))
+ mask |= (CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT);
+
+ return mask;
+}
+
+CGroupMask get_cpu_accounting_mask(void);
+bool cpu_accounting_is_cheap(void);
+
+/* Special values for all weight knobs on unified hierarchy */
+#define CGROUP_WEIGHT_INVALID ((uint64_t) -1)
+#define CGROUP_WEIGHT_MIN UINT64_C(1)
+#define CGROUP_WEIGHT_MAX UINT64_C(10000)
+#define CGROUP_WEIGHT_DEFAULT UINT64_C(100)
+
+#define CGROUP_LIMIT_MIN UINT64_C(0)
+#define CGROUP_LIMIT_MAX ((uint64_t) -1)
+
+static inline bool CGROUP_WEIGHT_IS_OK(uint64_t x) {
+ return
+ x == CGROUP_WEIGHT_INVALID ||
+ (x >= CGROUP_WEIGHT_MIN && x <= CGROUP_WEIGHT_MAX);
+}
+
+/* IO limits on unified hierarchy */
+typedef enum CGroupIOLimitType {
+ CGROUP_IO_RBPS_MAX,
+ CGROUP_IO_WBPS_MAX,
+ CGROUP_IO_RIOPS_MAX,
+ CGROUP_IO_WIOPS_MAX,
+
+ _CGROUP_IO_LIMIT_TYPE_MAX,
+ _CGROUP_IO_LIMIT_TYPE_INVALID = -1
+} CGroupIOLimitType;
+
+extern const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX];
+
+const char* cgroup_io_limit_type_to_string(CGroupIOLimitType t) _const_;
+CGroupIOLimitType cgroup_io_limit_type_from_string(const char *s) _pure_;
+
+/* Special values for the cpu.shares attribute */
+#define CGROUP_CPU_SHARES_INVALID ((uint64_t) -1)
+#define CGROUP_CPU_SHARES_MIN UINT64_C(2)
+#define CGROUP_CPU_SHARES_MAX UINT64_C(262144)
+#define CGROUP_CPU_SHARES_DEFAULT UINT64_C(1024)
+
+static inline bool CGROUP_CPU_SHARES_IS_OK(uint64_t x) {
+ return
+ x == CGROUP_CPU_SHARES_INVALID ||
+ (x >= CGROUP_CPU_SHARES_MIN && x <= CGROUP_CPU_SHARES_MAX);
+}
+
+/* Special values for the blkio.weight attribute */
+#define CGROUP_BLKIO_WEIGHT_INVALID ((uint64_t) -1)
+#define CGROUP_BLKIO_WEIGHT_MIN UINT64_C(10)
+#define CGROUP_BLKIO_WEIGHT_MAX UINT64_C(1000)
+#define CGROUP_BLKIO_WEIGHT_DEFAULT UINT64_C(500)
+
+static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) {
+ return
+ x == CGROUP_BLKIO_WEIGHT_INVALID ||
+ (x >= CGROUP_BLKIO_WEIGHT_MIN && x <= CGROUP_BLKIO_WEIGHT_MAX);
+}
+
+typedef enum CGroupUnified {
+ CGROUP_UNIFIED_UNKNOWN = -1,
+ CGROUP_UNIFIED_NONE = 0, /* Both systemd and controllers on legacy */
+ CGROUP_UNIFIED_SYSTEMD = 1, /* Only systemd on unified */
+ CGROUP_UNIFIED_ALL = 2, /* Both systemd and controllers on unified */
+} CGroupUnified;
+
+/*
+ * General rules:
+ *
+ * We accept named hierarchies in the syntax "foo" and "name=foo".
+ *
+ * We expect that named hierarchies do not conflict in name with a
+ * kernel hierarchy, modulo the "name=" prefix.
+ *
+ * We always generate "normalized" controller names, i.e. without the
+ * "name=" prefix.
+ *
+ * We require absolute cgroup paths. When returning, we will always
+ * generate paths with multiple adjacent / removed.
+ */
+
+int cg_enumerate_processes(const char *controller, const char *path, FILE **_f);
+int cg_read_pid(FILE *f, pid_t *_pid);
+int cg_read_event(const char *controller, const char *path, const char *event,
+ char **val);
+
+int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d);
+int cg_read_subgroup(DIR *d, char **fn);
+
+typedef enum CGroupFlags {
+ CGROUP_SIGCONT = 1 << 0,
+ CGROUP_IGNORE_SELF = 1 << 1,
+ CGROUP_REMOVE = 1 << 2,
+} CGroupFlags;
+
+typedef int (*cg_kill_log_func_t)(pid_t pid, int sig, void *userdata);
+
+int cg_kill(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata);
+int cg_kill_recursive(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata);
+
+int cg_split_spec(const char *spec, char **ret_controller, char **ret_path);
+int cg_mangle_path(const char *path, char **result);
+
+int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs);
+int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs);
+
+int cg_pid_get_path(const char *controller, pid_t pid, char **path);
+
+int cg_rmdir(const char *controller, const char *path);
+
+typedef enum {
+ CG_KEY_MODE_GRACEFUL = 1 << 0,
+} CGroupKeyMode;
+
+int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value);
+int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret);
+int cg_get_keyed_attribute_full(const char *controller, const char *path, const char *attribute, char **keys, char **values, CGroupKeyMode mode);
+
+static inline int cg_get_keyed_attribute(
+ const char *controller,
+ const char *path,
+ const char *attribute,
+ char **keys,
+ char **ret_values) {
+ return cg_get_keyed_attribute_full(controller, path, attribute, keys, ret_values, 0);
+}
+
+static inline int cg_get_keyed_attribute_graceful(
+ const char *controller,
+ const char *path,
+ const char *attribute,
+ char **keys,
+ char **ret_values) {
+ return cg_get_keyed_attribute_full(controller, path, attribute, keys, ret_values, CG_KEY_MODE_GRACEFUL);
+}
+
+int cg_get_attribute_as_uint64(const char *controller, const char *path, const char *attribute, uint64_t *ret);
+
+/* Does a parse_boolean() on the attribute contents and sets ret accordingly */
+int cg_get_attribute_as_bool(const char *controller, const char *path, const char *attribute, bool *ret);
+
+int cg_set_access(const char *controller, const char *path, uid_t uid, gid_t gid);
+
+int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags);
+int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size);
+int cg_get_xattr_malloc(const char *controller, const char *path, const char *name, char **ret);
+int cg_remove_xattr(const char *controller, const char *path, const char *name);
+
+int cg_install_release_agent(const char *controller, const char *agent);
+int cg_uninstall_release_agent(const char *controller);
+
+int cg_is_empty(const char *controller, const char *path);
+int cg_is_empty_recursive(const char *controller, const char *path);
+
+int cg_get_root_path(char **path);
+
+int cg_path_get_session(const char *path, char **session);
+int cg_path_get_owner_uid(const char *path, uid_t *uid);
+int cg_path_get_unit(const char *path, char **unit);
+int cg_path_get_user_unit(const char *path, char **unit);
+int cg_path_get_machine_name(const char *path, char **machine);
+int cg_path_get_slice(const char *path, char **slice);
+int cg_path_get_user_slice(const char *path, char **slice);
+
+int cg_shift_path(const char *cgroup, const char *cached_root, const char **shifted);
+int cg_pid_get_path_shifted(pid_t pid, const char *cached_root, char **cgroup);
+
+int cg_pid_get_session(pid_t pid, char **session);
+int cg_pid_get_owner_uid(pid_t pid, uid_t *uid);
+int cg_pid_get_unit(pid_t pid, char **unit);
+int cg_pid_get_user_unit(pid_t pid, char **unit);
+int cg_pid_get_machine_name(pid_t pid, char **machine);
+int cg_pid_get_slice(pid_t pid, char **slice);
+int cg_pid_get_user_slice(pid_t pid, char **slice);
+
+int cg_path_decode_unit(const char *cgroup, char **unit);
+
+char *cg_escape(const char *p);
+char *cg_unescape(const char *p) _pure_;
+
+bool cg_controller_is_valid(const char *p);
+
+int cg_slice_to_path(const char *unit, char **ret);
+
+typedef const char* (*cg_migrate_callback_t)(CGroupMask mask, void *userdata);
+
+int cg_mask_supported(CGroupMask *ret);
+int cg_mask_from_string(const char *s, CGroupMask *ret);
+int cg_mask_to_string(CGroupMask mask, char **ret);
+
+int cg_kernel_controllers(Set **controllers);
+
+bool cg_ns_supported(void);
+bool cg_freezer_supported(void);
+
+int cg_all_unified(void);
+int cg_hybrid_unified(void);
+int cg_unified_controller(const char *controller);
+int cg_unified_cached(bool flush);
+static inline int cg_unified(void) {
+ return cg_unified_cached(true);
+}
+
+const char* cgroup_controller_to_string(CGroupController c) _const_;
+CGroupController cgroup_controller_from_string(const char *s) _pure_;
+
+bool is_cgroup_fs(const struct statfs *s);
+bool fd_is_cgroup_fs(int fd);
+
+typedef enum ManagedOOMMode {
+ MANAGED_OOM_AUTO,
+ MANAGED_OOM_KILL,
+ _MANAGED_OOM_MODE_MAX,
+ _MANAGED_OOM_MODE_INVALID = -1,
+} ManagedOOMMode;
+
+const char* managed_oom_mode_to_string(ManagedOOMMode m) _const_;
+ManagedOOMMode managed_oom_mode_from_string(const char *s) _pure_;
diff --git a/src/basic/chattr-util.c b/src/basic/chattr-util.c
new file mode 100644
index 0000000..c724e17
--- /dev/null
+++ b/src/basic/chattr-util.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <linux/fs.h>
+
+#include "chattr-util.h"
+#include "fd-util.h"
+#include "macro.h"
+
+int chattr_fd(int fd, unsigned value, unsigned mask, unsigned *previous) {
+ unsigned old_attr, new_attr;
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /* Explicitly check whether this is a regular file or
+ * directory. If it is anything else (such as a device node or
+ * fifo), then the ioctl will not hit the file systems but
+ * possibly drivers, where the ioctl might have different
+ * effects. Notably, DRM is using the same ioctl() number. */
+
+ if (!S_ISDIR(st.st_mode) && !S_ISREG(st.st_mode))
+ return -ENOTTY;
+
+ if (mask == 0 && !previous)
+ return 0;
+
+ if (ioctl(fd, FS_IOC_GETFLAGS, &old_attr) < 0)
+ return -errno;
+
+ new_attr = (old_attr & ~mask) | (value & mask);
+ if (new_attr == old_attr) {
+ if (previous)
+ *previous = old_attr;
+ return 0;
+ }
+
+ if (ioctl(fd, FS_IOC_SETFLAGS, &new_attr) < 0)
+ return -errno;
+
+ if (previous)
+ *previous = old_attr;
+
+ return 1;
+}
+
+int chattr_path(const char *p, unsigned value, unsigned mask, unsigned *previous) {
+ _cleanup_close_ int fd = -1;
+
+ assert(p);
+
+ if (mask == 0)
+ return 0;
+
+ fd = open(p, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return chattr_fd(fd, value, mask, previous);
+}
+
+int read_attr_fd(int fd, unsigned *ret) {
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode) && !S_ISREG(st.st_mode))
+ return -ENOTTY;
+
+ if (ioctl(fd, FS_IOC_GETFLAGS, ret) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int read_attr_path(const char *p, unsigned *ret) {
+ _cleanup_close_ int fd = -1;
+
+ assert(p);
+ assert(ret);
+
+ fd = open(p, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return read_attr_fd(fd, ret);
+}
diff --git a/src/basic/chattr-util.h b/src/basic/chattr-util.h
new file mode 100644
index 0000000..2fcdb64
--- /dev/null
+++ b/src/basic/chattr-util.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/fs.h>
+
+#include "missing_fs.h"
+
+/* The chattr() flags to apply when creating a new file *before* writing to it. In particular, flags such as
+ * FS_NOCOW_FL don't work if applied a-posteriori. All other flags are fine (or even necessary, think
+ * FS_IMMUTABLE_FL!) to apply after writing to the files. */
+#define CHATTR_EARLY_FL \
+ (FS_NOATIME_FL | \
+ FS_COMPR_FL | \
+ FS_NOCOW_FL | \
+ FS_NOCOMP_FL | \
+ FS_PROJINHERIT_FL)
+
+#define CHATTR_ALL_FL \
+ (FS_NOATIME_FL | \
+ FS_SYNC_FL | \
+ FS_DIRSYNC_FL | \
+ FS_APPEND_FL | \
+ FS_COMPR_FL | \
+ FS_NODUMP_FL | \
+ FS_EXTENT_FL | \
+ FS_IMMUTABLE_FL | \
+ FS_JOURNAL_DATA_FL | \
+ FS_SECRM_FL | \
+ FS_UNRM_FL | \
+ FS_NOTAIL_FL | \
+ FS_TOPDIR_FL | \
+ FS_NOCOW_FL | \
+ FS_PROJINHERIT_FL)
+
+int chattr_fd(int fd, unsigned value, unsigned mask, unsigned *previous);
+int chattr_path(const char *p, unsigned value, unsigned mask, unsigned *previous);
+
+int read_attr_fd(int fd, unsigned *ret);
+int read_attr_path(const char *p, unsigned *ret);
diff --git a/src/basic/conf-files.c b/src/basic/conf-files.c
new file mode 100644
index 0000000..f8c9976
--- /dev/null
+++ b/src/basic/conf-files.c
@@ -0,0 +1,320 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "conf-files.h"
+#include "def.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "hashmap.h"
+#include "log.h"
+#include "macro.h"
+#include "path-util.h"
+#include "set.h"
+#include "sort-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+
+static int files_add(
+ Hashmap *h,
+ Set *masked,
+ const char *suffix,
+ const char *root,
+ unsigned flags,
+ const char *path) {
+
+ _cleanup_closedir_ DIR *dir = NULL;
+ const char *dirpath;
+ struct dirent *de;
+ int r;
+
+ assert(h);
+ assert((flags & CONF_FILES_FILTER_MASKED) == 0 || masked);
+ assert(path);
+
+ dirpath = prefix_roota(root, path);
+
+ dir = opendir(dirpath);
+ if (!dir) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_debug_errno(errno, "Failed to open directory '%s': %m", dirpath);
+ }
+
+ FOREACH_DIRENT(de, dir, return -errno) {
+ struct stat st;
+ char *p, *key;
+
+ /* Does this match the suffix? */
+ if (suffix && !endswith(de->d_name, suffix))
+ continue;
+
+ /* Has this file already been found in an earlier directory? */
+ if (hashmap_contains(h, de->d_name)) {
+ log_debug("Skipping overridden file '%s/%s'.", dirpath, de->d_name);
+ continue;
+ }
+
+ /* Has this been masked in an earlier directory? */
+ if ((flags & CONF_FILES_FILTER_MASKED) && set_contains(masked, de->d_name)) {
+ log_debug("File '%s/%s' is masked by previous entry.", dirpath, de->d_name);
+ continue;
+ }
+
+ /* Read file metadata if we shall validate the check for file masks, for node types or whether the node is marked executable. */
+ if (flags & (CONF_FILES_FILTER_MASKED|CONF_FILES_REGULAR|CONF_FILES_DIRECTORY|CONF_FILES_EXECUTABLE))
+ if (fstatat(dirfd(dir), de->d_name, &st, 0) < 0) {
+ log_debug_errno(errno, "Failed to stat '%s/%s', ignoring: %m", dirpath, de->d_name);
+ continue;
+ }
+
+ /* Is this a masking entry? */
+ if ((flags & CONF_FILES_FILTER_MASKED))
+ if (null_or_empty(&st)) {
+ assert(masked);
+
+ /* Mark this one as masked */
+ r = set_put_strdup(&masked, de->d_name);
+ if (r < 0)
+ return r;
+
+ log_debug("File '%s/%s' is a mask.", dirpath, de->d_name);
+ continue;
+ }
+
+ /* Does this node have the right type? */
+ if (flags & (CONF_FILES_REGULAR|CONF_FILES_DIRECTORY))
+ if (!((flags & CONF_FILES_DIRECTORY) && S_ISDIR(st.st_mode)) &&
+ !((flags & CONF_FILES_REGULAR) && S_ISREG(st.st_mode))) {
+ log_debug("Ignoring '%s/%s', as it is not a of the right type.", dirpath, de->d_name);
+ continue;
+ }
+
+ /* Does this node have the executable bit set? */
+ if (flags & CONF_FILES_EXECUTABLE)
+ /* As requested: check if the file is marked executable. Note that we don't check access(X_OK)
+ * here, as we care about whether the file is marked executable at all, and not whether it is
+ * executable for us, because if so, such errors are stuff we should log about. */
+
+ if ((st.st_mode & 0111) == 0) { /* not executable */
+ log_debug("Ignoring '%s/%s', as it is not marked executable.", dirpath, de->d_name);
+ continue;
+ }
+
+ if (flags & CONF_FILES_BASENAME) {
+ p = strdup(de->d_name);
+ if (!p)
+ return -ENOMEM;
+
+ key = p;
+ } else {
+ p = path_join(dirpath, de->d_name);
+ if (!p)
+ return -ENOMEM;
+
+ key = basename(p);
+ }
+
+ r = hashmap_put(h, key, p);
+ if (r < 0) {
+ free(p);
+ return log_debug_errno(r, "Failed to add item to hashmap: %m");
+ }
+
+ assert(r > 0);
+ }
+
+ return 0;
+}
+
+static int base_cmp(char * const *a, char * const *b) {
+ return strcmp(basename(*a), basename(*b));
+}
+
+static int conf_files_list_strv_internal(char ***strv, const char *suffix, const char *root, unsigned flags, char **dirs) {
+ _cleanup_hashmap_free_ Hashmap *fh = NULL;
+ _cleanup_set_free_free_ Set *masked = NULL;
+ char **files, **p;
+ int r;
+
+ assert(strv);
+
+ /* This alters the dirs string array */
+ if (!path_strv_resolve_uniq(dirs, root))
+ return -ENOMEM;
+
+ fh = hashmap_new(&path_hash_ops);
+ if (!fh)
+ return -ENOMEM;
+
+ if (flags & CONF_FILES_FILTER_MASKED) {
+ masked = set_new(&path_hash_ops);
+ if (!masked)
+ return -ENOMEM;
+ }
+
+ STRV_FOREACH(p, dirs) {
+ r = files_add(fh, masked, suffix, root, flags, *p);
+ if (r == -ENOMEM)
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to search for files in %s, ignoring: %m", *p);
+ }
+
+ files = hashmap_get_strv(fh);
+ if (!files)
+ return -ENOMEM;
+
+ typesafe_qsort(files, hashmap_size(fh), base_cmp);
+ *strv = files;
+
+ return 0;
+}
+
+int conf_files_insert(char ***strv, const char *root, char **dirs, const char *path) {
+ /* Insert a path into strv, at the place honouring the usual sorting rules:
+ * - we first compare by the basename
+ * - and then we compare by dirname, allowing just one file with the given
+ * basename.
+ * This means that we will
+ * - add a new entry if basename(path) was not on the list,
+ * - do nothing if an entry with higher priority was already present,
+ * - do nothing if our new entry matches the existing entry,
+ * - replace the existing entry if our new entry has higher priority.
+ */
+ size_t i, n;
+ char *t;
+ int r;
+
+ n = strv_length(*strv);
+ for (i = 0; i < n; i++) {
+ int c;
+
+ c = base_cmp((char* const*) *strv + i, (char* const*) &path);
+ if (c == 0) {
+ char **dir;
+
+ /* Oh, there already is an entry with a matching name (the last component). */
+
+ STRV_FOREACH(dir, dirs) {
+ _cleanup_free_ char *rdir = NULL;
+ char *p1, *p2;
+
+ rdir = path_join(root, *dir);
+ if (!rdir)
+ return -ENOMEM;
+
+ p1 = path_startswith((*strv)[i], rdir);
+ if (p1)
+ /* Existing entry with higher priority
+ * or same priority, no need to do anything. */
+ return 0;
+
+ p2 = path_startswith(path, *dir);
+ if (p2) {
+ /* Our new entry has higher priority */
+
+ t = path_join(root, path);
+ if (!t)
+ return log_oom();
+
+ return free_and_replace((*strv)[i], t);
+ }
+ }
+
+ } else if (c > 0)
+ /* Following files have lower priority, let's go insert our
+ * new entry. */
+ break;
+
+ /* … we are not there yet, let's continue */
+ }
+
+ /* The new file has lower priority than all the existing entries */
+ t = path_join(root, path);
+ if (!t)
+ return -ENOMEM;
+
+ r = strv_insert(strv, i, t);
+ if (r < 0)
+ free(t);
+
+ return r;
+}
+
+int conf_files_list_strv(char ***strv, const char *suffix, const char *root, unsigned flags, const char* const* dirs) {
+ _cleanup_strv_free_ char **copy = NULL;
+
+ assert(strv);
+
+ copy = strv_copy((char**) dirs);
+ if (!copy)
+ return -ENOMEM;
+
+ return conf_files_list_strv_internal(strv, suffix, root, flags, copy);
+}
+
+int conf_files_list(char ***strv, const char *suffix, const char *root, unsigned flags, const char *dir) {
+ _cleanup_strv_free_ char **dirs = NULL;
+
+ assert(strv);
+
+ dirs = strv_new(dir);
+ if (!dirs)
+ return -ENOMEM;
+
+ return conf_files_list_strv_internal(strv, suffix, root, flags, dirs);
+}
+
+int conf_files_list_nulstr(char ***strv, const char *suffix, const char *root, unsigned flags, const char *dirs) {
+ _cleanup_strv_free_ char **d = NULL;
+
+ assert(strv);
+
+ d = strv_split_nulstr(dirs);
+ if (!d)
+ return -ENOMEM;
+
+ return conf_files_list_strv_internal(strv, suffix, root, flags, d);
+}
+
+int conf_files_list_with_replacement(
+ const char *root,
+ char **config_dirs,
+ const char *replacement,
+ char ***files,
+ char **replace_file) {
+
+ _cleanup_strv_free_ char **f = NULL;
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(config_dirs);
+ assert(files);
+ assert(replace_file || !replacement);
+
+ r = conf_files_list_strv(&f, ".conf", root, 0, (const char* const*) config_dirs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate config files: %m");
+
+ if (replacement) {
+ r = conf_files_insert(&f, root, config_dirs, replacement);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extend config file list: %m");
+
+ p = path_join(root, replacement);
+ if (!p)
+ return log_oom();
+ }
+
+ *files = TAKE_PTR(f);
+ if (replace_file)
+ *replace_file = TAKE_PTR(p);
+ return 0;
+}
diff --git a/src/basic/conf-files.h b/src/basic/conf-files.h
new file mode 100644
index 0000000..7774ed7
--- /dev/null
+++ b/src/basic/conf-files.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "macro.h"
+
+enum {
+ CONF_FILES_EXECUTABLE = 1 << 0,
+ CONF_FILES_REGULAR = 1 << 1,
+ CONF_FILES_DIRECTORY = 1 << 2,
+ CONF_FILES_BASENAME = 1 << 3,
+ CONF_FILES_FILTER_MASKED = 1 << 4,
+};
+
+int conf_files_list(char ***ret, const char *suffix, const char *root, unsigned flags, const char *dir);
+int conf_files_list_strv(char ***ret, const char *suffix, const char *root, unsigned flags, const char* const* dirs);
+int conf_files_list_nulstr(char ***ret, const char *suffix, const char *root, unsigned flags, const char *dirs);
+int conf_files_insert(char ***strv, const char *root, char **dirs, const char *path);
+int conf_files_list_with_replacement(
+ const char *root,
+ char **config_dirs,
+ const char *replacement,
+ char ***files,
+ char **replace_file);
diff --git a/src/basic/copy.c b/src/basic/copy.c
new file mode 100644
index 0000000..6a9c3a3
--- /dev/null
+++ b/src/basic/copy.c
@@ -0,0 +1,1237 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/sendfile.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "chattr-util.h"
+#include "copy.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "macro.h"
+#include "missing_syscall.h"
+#include "mountpoint-util.h"
+#include "nulstr-util.h"
+#include "rm-rf.h"
+#include "selinux-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+#include "xattr-util.h"
+
+#define COPY_BUFFER_SIZE (16U*1024U)
+
+/* A safety net for descending recursively into file system trees to copy. On Linux PATH_MAX is 4096, which means the
+ * deepest valid path one can build is around 2048, which we hence use as a safety net here, to not spin endlessly in
+ * case of bind mount cycles and suchlike. */
+#define COPY_DEPTH_MAX 2048U
+
+static ssize_t try_copy_file_range(
+ int fd_in, loff_t *off_in,
+ int fd_out, loff_t *off_out,
+ size_t len,
+ unsigned flags) {
+
+ static int have = -1;
+ ssize_t r;
+
+ if (have == 0)
+ return -ENOSYS;
+
+ r = copy_file_range(fd_in, off_in, fd_out, off_out, len, flags);
+ if (have < 0)
+ have = r >= 0 || errno != ENOSYS;
+ if (r < 0)
+ return -errno;
+
+ return r;
+}
+
+enum {
+ FD_IS_NO_PIPE,
+ FD_IS_BLOCKING_PIPE,
+ FD_IS_NONBLOCKING_PIPE,
+};
+
+static int fd_is_nonblock_pipe(int fd) {
+ struct stat st;
+ int flags;
+
+ /* Checks whether the specified file descriptor refers to a pipe, and if so if O_NONBLOCK is set. */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISFIFO(st.st_mode))
+ return FD_IS_NO_PIPE;
+
+ flags = fcntl(fd, F_GETFL);
+ if (flags < 0)
+ return -errno;
+
+ return FLAGS_SET(flags, O_NONBLOCK) ? FD_IS_NONBLOCKING_PIPE : FD_IS_BLOCKING_PIPE;
+}
+
+static int sigint_pending(void) {
+ sigset_t ss;
+
+ assert_se(sigemptyset(&ss) >= 0);
+ assert_se(sigaddset(&ss, SIGINT) >= 0);
+
+ if (sigtimedwait(&ss, NULL, &(struct timespec) { 0, 0 }) < 0) {
+ if (errno == EAGAIN)
+ return false;
+
+ return -errno;
+ }
+
+ return true;
+}
+
+int copy_bytes_full(
+ int fdf, int fdt,
+ uint64_t max_bytes,
+ CopyFlags copy_flags,
+ void **ret_remains,
+ size_t *ret_remains_size,
+ copy_progress_bytes_t progress,
+ void *userdata) {
+
+ bool try_cfr = true, try_sendfile = true, try_splice = true;
+ int r, nonblock_pipe = -1;
+ size_t m = SSIZE_MAX; /* that is the maximum that sendfile and c_f_r accept */
+
+ assert(fdf >= 0);
+ assert(fdt >= 0);
+
+ /* Tries to copy bytes from the file descriptor 'fdf' to 'fdt' in the smartest possible way. Copies a maximum
+ * of 'max_bytes', which may be specified as UINT64_MAX, in which no maximum is applied. Returns negative on
+ * error, zero if EOF is hit before the bytes limit is hit and positive otherwise. If the copy fails for some
+ * reason but we read but didn't yet write some data an ret_remains/ret_remains_size is not NULL, then it will
+ * be initialized with an allocated buffer containing this "remaining" data. Note that these two parameters are
+ * initialized with a valid buffer only on failure and only if there's actually data already read. Otherwise
+ * these parameters if non-NULL are set to NULL. */
+
+ if (ret_remains)
+ *ret_remains = NULL;
+ if (ret_remains_size)
+ *ret_remains_size = 0;
+
+ /* Try btrfs reflinks first. This only works on regular, seekable files, hence let's check the file offsets of
+ * source and destination first. */
+ if ((copy_flags & COPY_REFLINK)) {
+ off_t foffset;
+
+ foffset = lseek(fdf, 0, SEEK_CUR);
+ if (foffset >= 0) {
+ off_t toffset;
+
+ toffset = lseek(fdt, 0, SEEK_CUR);
+ if (toffset >= 0) {
+
+ if (foffset == 0 && toffset == 0 && max_bytes == UINT64_MAX)
+ r = btrfs_reflink(fdf, fdt); /* full file reflink */
+ else
+ r = btrfs_clone_range(fdf, foffset, fdt, toffset, max_bytes == UINT64_MAX ? 0 : max_bytes); /* partial reflink */
+ if (r >= 0) {
+ off_t t;
+
+ /* This worked, yay! Now — to be fully correct — let's adjust the file pointers */
+ if (max_bytes == UINT64_MAX) {
+
+ /* We cloned to the end of the source file, let's position the read
+ * pointer there, and query it at the same time. */
+ t = lseek(fdf, 0, SEEK_END);
+ if (t < 0)
+ return -errno;
+ if (t < foffset)
+ return -ESPIPE;
+
+ /* Let's adjust the destination file write pointer by the same number
+ * of bytes. */
+ t = lseek(fdt, toffset + (t - foffset), SEEK_SET);
+ if (t < 0)
+ return -errno;
+
+ return 0; /* we copied the whole thing, hence hit EOF, return 0 */
+ } else {
+ t = lseek(fdf, foffset + max_bytes, SEEK_SET);
+ if (t < 0)
+ return -errno;
+
+ t = lseek(fdt, toffset + max_bytes, SEEK_SET);
+ if (t < 0)
+ return -errno;
+
+ return 1; /* we copied only some number of bytes, which worked, but this means we didn't hit EOF, return 1 */
+ }
+ }
+ }
+ }
+ }
+
+ for (;;) {
+ ssize_t n;
+
+ if (max_bytes <= 0)
+ return 1; /* return > 0 if we hit the max_bytes limit */
+
+ if (FLAGS_SET(copy_flags, COPY_SIGINT)) {
+ r = sigint_pending();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return -EINTR;
+ }
+
+ if (max_bytes != UINT64_MAX && m > max_bytes)
+ m = max_bytes;
+
+ /* First try copy_file_range(), unless we already tried */
+ if (try_cfr) {
+ n = try_copy_file_range(fdf, NULL, fdt, NULL, m, 0u);
+ if (n < 0) {
+ if (!IN_SET(n, -EINVAL, -ENOSYS, -EXDEV, -EBADF))
+ return n;
+
+ try_cfr = false;
+ /* use fallback below */
+ } else if (n == 0) /* EOF */
+ break;
+ else
+ /* Success! */
+ goto next;
+ }
+
+ /* First try sendfile(), unless we already tried */
+ if (try_sendfile) {
+ n = sendfile(fdt, fdf, NULL, m);
+ if (n < 0) {
+ if (!IN_SET(errno, EINVAL, ENOSYS))
+ return -errno;
+
+ try_sendfile = false;
+ /* use fallback below */
+ } else if (n == 0) /* EOF */
+ break;
+ else
+ /* Success! */
+ goto next;
+ }
+
+ /* Then try splice, unless we already tried. */
+ if (try_splice) {
+
+ /* splice()'s asynchronous I/O support is a bit weird. When it encounters a pipe file
+ * descriptor, then it will ignore its O_NONBLOCK flag and instead only honour the
+ * SPLICE_F_NONBLOCK flag specified in its flag parameter. Let's hide this behaviour here, and
+ * check if either of the specified fds are a pipe, and if so, let's pass the flag
+ * automatically, depending on O_NONBLOCK being set.
+ *
+ * Here's a twist though: when we use it to move data between two pipes of which one has
+ * O_NONBLOCK set and the other has not, then we have no individual control over O_NONBLOCK
+ * behaviour. Hence in that case we can't use splice() and still guarantee systematic
+ * O_NONBLOCK behaviour, hence don't. */
+
+ if (nonblock_pipe < 0) {
+ int a, b;
+
+ /* Check if either of these fds is a pipe, and if so non-blocking or not */
+ a = fd_is_nonblock_pipe(fdf);
+ if (a < 0)
+ return a;
+
+ b = fd_is_nonblock_pipe(fdt);
+ if (b < 0)
+ return b;
+
+ if ((a == FD_IS_NO_PIPE && b == FD_IS_NO_PIPE) ||
+ (a == FD_IS_BLOCKING_PIPE && b == FD_IS_NONBLOCKING_PIPE) ||
+ (a == FD_IS_NONBLOCKING_PIPE && b == FD_IS_BLOCKING_PIPE))
+
+ /* splice() only works if one of the fds is a pipe. If neither is, let's skip
+ * this step right-away. As mentioned above, if one of the two fds refers to a
+ * blocking pipe and the other to a non-blocking pipe, we can't use splice()
+ * either, hence don't try either. This hence means we can only use splice() if
+ * either only one of the two fds is a pipe, or if both are pipes with the same
+ * nonblocking flag setting. */
+
+ try_splice = false;
+ else
+ nonblock_pipe = a == FD_IS_NONBLOCKING_PIPE || b == FD_IS_NONBLOCKING_PIPE;
+ }
+ }
+
+ if (try_splice) {
+ n = splice(fdf, NULL, fdt, NULL, m, nonblock_pipe ? SPLICE_F_NONBLOCK : 0);
+ if (n < 0) {
+ if (!IN_SET(errno, EINVAL, ENOSYS))
+ return -errno;
+
+ try_splice = false;
+ /* use fallback below */
+ } else if (n == 0) /* EOF */
+ break;
+ else
+ /* Success! */
+ goto next;
+ }
+
+ /* As a fallback just copy bits by hand */
+ {
+ uint8_t buf[MIN(m, COPY_BUFFER_SIZE)], *p = buf;
+ ssize_t z;
+
+ n = read(fdf, buf, sizeof buf);
+ if (n < 0)
+ return -errno;
+ if (n == 0) /* EOF */
+ break;
+
+ z = (size_t) n;
+ do {
+ ssize_t k;
+
+ k = write(fdt, p, z);
+ if (k < 0) {
+ r = -errno;
+
+ if (ret_remains) {
+ void *copy;
+
+ copy = memdup(p, z);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret_remains = copy;
+ }
+
+ if (ret_remains_size)
+ *ret_remains_size = z;
+
+ return r;
+ }
+
+ assert(k <= z);
+ z -= k;
+ p += k;
+ } while (z > 0);
+ }
+
+ next:
+ if (progress) {
+ r = progress(n, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ if (max_bytes != (uint64_t) -1) {
+ assert(max_bytes >= (uint64_t) n);
+ max_bytes -= n;
+ }
+
+ /* sendfile accepts at most SSIZE_MAX-offset bytes to copy,
+ * so reduce our maximum by the amount we already copied,
+ * but don't go below our copy buffer size, unless we are
+ * close the limit of bytes we are allowed to copy. */
+ m = MAX(MIN(COPY_BUFFER_SIZE, max_bytes), m - n);
+ }
+
+ return 0; /* return 0 if we hit EOF earlier than the size limit */
+}
+
+static int fd_copy_symlink(
+ int df,
+ const char *from,
+ const struct stat *st,
+ int dt,
+ const char *to,
+ uid_t override_uid,
+ gid_t override_gid,
+ CopyFlags copy_flags) {
+
+ _cleanup_free_ char *target = NULL;
+ int r;
+
+ assert(from);
+ assert(st);
+ assert(to);
+
+ r = readlinkat_malloc(df, from, &target);
+ if (r < 0)
+ return r;
+
+ if (copy_flags & COPY_MAC_CREATE) {
+ r = mac_selinux_create_file_prepare_at(dt, to, S_IFLNK);
+ if (r < 0)
+ return r;
+ }
+ r = symlinkat(target, dt, to);
+ if (copy_flags & COPY_MAC_CREATE)
+ mac_selinux_create_file_clear();
+ if (r < 0)
+ return -errno;
+
+ if (fchownat(dt, to,
+ uid_is_valid(override_uid) ? override_uid : st->st_uid,
+ gid_is_valid(override_gid) ? override_gid : st->st_gid,
+ AT_SYMLINK_NOFOLLOW) < 0)
+ return -errno;
+
+ return 0;
+}
+
+/* Encapsulates the database we store potential hardlink targets in */
+typedef struct HardlinkContext {
+ int dir_fd; /* An fd to the directory we use as lookup table. Never AT_FDCWD. Lazily created, when
+ * we add the first entry. */
+
+ /* These two fields are used to create the hardlink repository directory above — via
+ * mkdirat(parent_fd, subdir) — and are kept so that we can automatically remove the directory again
+ * when we are done. */
+ int parent_fd; /* Possibly AT_FDCWD */
+ char *subdir;
+} HardlinkContext;
+
+static int hardlink_context_setup(
+ HardlinkContext *c,
+ int dt,
+ const char *to,
+ CopyFlags copy_flags) {
+
+ _cleanup_close_ int dt_copy = -1;
+ int r;
+
+ assert(c);
+ assert(c->dir_fd < 0 && c->dir_fd != AT_FDCWD);
+ assert(c->parent_fd < 0);
+ assert(!c->subdir);
+
+ /* If hardlink recreation is requested we have to maintain a database of inodes that are potential
+ * hardlink sources. Given that generally disk sizes have to be assumed to be larger than what fits
+ * into physical RAM we cannot maintain that database in dynamic memory alone. Here we opt to
+ * maintain it on disk, to simplify things: inside the destination directory we'll maintain a
+ * temporary directory consisting of hardlinks of every inode we copied that might be subject of
+ * hardlinks. We can then use that as hardlink source later on. Yes, this means additional disk IO
+ * but thankfully Linux is optimized for this kind of thing. If this ever becomes a performance
+ * bottleneck we can certainly place an in-memory hash table in front of this, but for the beginning,
+ * let's keep things simple, and just use the disk as lookup table for inodes.
+ *
+ * Note that this should have zero performance impact as long as .n_link of all files copied remains
+ * <= 0, because in that case we will not actually allocate the hardlink inode lookup table directory
+ * on disk (we do so lazily, when the first candidate with .n_link > 1 is seen). This means, in the
+ * common case where hardlinks are not used at all or only for few files the fact that we store the
+ * table on disk shouldn't matter perfomance-wise. */
+
+ if (!FLAGS_SET(copy_flags, COPY_HARDLINKS))
+ return 0;
+
+ if (dt == AT_FDCWD)
+ dt_copy = AT_FDCWD;
+ else if (dt < 0)
+ return -EBADF;
+ else {
+ dt_copy = fcntl(dt, F_DUPFD_CLOEXEC, 3);
+ if (dt_copy < 0)
+ return -errno;
+ }
+
+ r = tempfn_random_child(to, "hardlink", &c->subdir);
+ if (r < 0)
+ return r;
+
+ c->parent_fd = TAKE_FD(dt_copy);
+
+ /* We don't actually create the directory we keep the table in here, that's done on-demand when the
+ * first entry is added, using hardlink_context_realize() below. */
+ return 1;
+}
+
+static int hardlink_context_realize(HardlinkContext *c) {
+ int r;
+
+ if (!c)
+ return 0;
+
+ if (c->dir_fd >= 0) /* Already realized */
+ return 1;
+
+ if (c->parent_fd < 0 && c->parent_fd != AT_FDCWD) /* Not configured */
+ return 0;
+
+ assert(c->subdir);
+
+ if (mkdirat(c->parent_fd, c->subdir, 0700) < 0)
+ return -errno;
+
+ c->dir_fd = openat(c->parent_fd, c->subdir, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+ if (c->dir_fd < 0) {
+ r = -errno;
+ (void) unlinkat(c->parent_fd, c->subdir, AT_REMOVEDIR);
+ return r;
+ }
+
+ return 1;
+}
+
+static void hardlink_context_destroy(HardlinkContext *c) {
+ int r;
+
+ assert(c);
+
+ /* Automatically remove the hardlink lookup table directory again after we are done. This is used via
+ * _cleanup_() so that we really delete this, even on failure. */
+
+ if (c->dir_fd >= 0) {
+ r = rm_rf_children(TAKE_FD(c->dir_fd), REMOVE_PHYSICAL, NULL); /* consumes dir_fd in all cases, even on failure */
+ if (r < 0)
+ log_debug_errno(r, "Failed to remove hardlink store (%s) contents, ignoring: %m", c->subdir);
+
+ assert(c->parent_fd >= 0 || c->parent_fd == AT_FDCWD);
+ assert(c->subdir);
+
+ if (unlinkat(c->parent_fd, c->subdir, AT_REMOVEDIR) < 0)
+ log_debug_errno(errno, "Failed to remove hardlink store (%s) directory, ignoring: %m", c->subdir);
+ }
+
+ assert_cc(AT_FDCWD < 0);
+ c->parent_fd = safe_close(c->parent_fd);
+
+ c->subdir = mfree(c->subdir);
+}
+
+static int try_hardlink(
+ HardlinkContext *c,
+ const struct stat *st,
+ int dt,
+ const char *to) {
+
+ char dev_ino[DECIMAL_STR_MAX(dev_t)*2 + DECIMAL_STR_MAX(uint64_t) + 4];
+
+ assert(st);
+ assert(dt >= 0 || dt == AT_FDCWD);
+ assert(to);
+
+ if (!c) /* No temporary hardlink directory, don't bother */
+ return 0;
+
+ if (st->st_nlink <= 1) /* Source not hardlinked, don't bother */
+ return 0;
+
+ if (c->dir_fd < 0) /* not yet realized, hence empty */
+ return 0;
+
+ xsprintf(dev_ino, "%u:%u:%" PRIu64, major(st->st_dev), minor(st->st_dev), (uint64_t) st->st_ino);
+ if (linkat(c->dir_fd, dev_ino, dt, to, 0) < 0) {
+ if (errno != ENOENT) /* doesn't exist in store yet */
+ log_debug_errno(errno, "Failed to hardlink %s to %s, ignoring: %m", dev_ino, to);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int memorize_hardlink(
+ HardlinkContext *c,
+ const struct stat *st,
+ int dt,
+ const char *to) {
+
+ char dev_ino[DECIMAL_STR_MAX(dev_t)*2 + DECIMAL_STR_MAX(uint64_t) + 4];
+ int r;
+
+ assert(st);
+ assert(dt >= 0 || dt == AT_FDCWD);
+ assert(to);
+
+ if (!c) /* No temporary hardlink directory, don't bother */
+ return 0;
+
+ if (st->st_nlink <= 1) /* Source not hardlinked, don't bother */
+ return 0;
+
+ r = hardlink_context_realize(c); /* Create the hardlink store lazily */
+ if (r < 0)
+ return r;
+
+ xsprintf(dev_ino, "%u:%u:%" PRIu64, major(st->st_dev), minor(st->st_dev), (uint64_t) st->st_ino);
+ if (linkat(dt, to, c->dir_fd, dev_ino, 0) < 0) {
+ log_debug_errno(errno, "Failed to hardlink %s to %s, ignoring: %m", to, dev_ino);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int fd_copy_regular(
+ int df,
+ const char *from,
+ const struct stat *st,
+ int dt,
+ const char *to,
+ uid_t override_uid,
+ gid_t override_gid,
+ CopyFlags copy_flags,
+ HardlinkContext *hardlink_context,
+ copy_progress_bytes_t progress,
+ void *userdata) {
+
+ _cleanup_close_ int fdf = -1, fdt = -1;
+ struct timespec ts[2];
+ int r, q;
+
+ assert(from);
+ assert(st);
+ assert(to);
+
+ r = try_hardlink(hardlink_context, st, dt, to);
+ if (r < 0)
+ return r;
+ if (r > 0) /* worked! */
+ return 0;
+
+ fdf = openat(df, from, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fdf < 0)
+ return -errno;
+
+ if (copy_flags & COPY_MAC_CREATE) {
+ r = mac_selinux_create_file_prepare_at(dt, to, S_IFREG);
+ if (r < 0)
+ return r;
+ }
+ fdt = openat(dt, to, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, st->st_mode & 07777);
+ if (copy_flags & COPY_MAC_CREATE)
+ mac_selinux_create_file_clear();
+ if (fdt < 0)
+ return -errno;
+
+ r = copy_bytes_full(fdf, fdt, (uint64_t) -1, copy_flags, NULL, NULL, progress, userdata);
+ if (r < 0) {
+ (void) unlinkat(dt, to, 0);
+ return r;
+ }
+
+ if (fchown(fdt,
+ uid_is_valid(override_uid) ? override_uid : st->st_uid,
+ gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
+ r = -errno;
+
+ if (fchmod(fdt, st->st_mode & 07777) < 0)
+ r = -errno;
+
+ ts[0] = st->st_atim;
+ ts[1] = st->st_mtim;
+ (void) futimens(fdt, ts);
+ (void) copy_xattr(fdf, fdt);
+
+ q = close(fdt);
+ fdt = -1;
+
+ if (q < 0) {
+ r = -errno;
+ (void) unlinkat(dt, to, 0);
+ }
+
+ (void) memorize_hardlink(hardlink_context, st, dt, to);
+ return r;
+}
+
+static int fd_copy_fifo(
+ int df,
+ const char *from,
+ const struct stat *st,
+ int dt,
+ const char *to,
+ uid_t override_uid,
+ gid_t override_gid,
+ CopyFlags copy_flags,
+ HardlinkContext *hardlink_context) {
+ int r;
+
+ assert(from);
+ assert(st);
+ assert(to);
+
+ r = try_hardlink(hardlink_context, st, dt, to);
+ if (r < 0)
+ return r;
+ if (r > 0) /* worked! */
+ return 0;
+
+ if (copy_flags & COPY_MAC_CREATE) {
+ r = mac_selinux_create_file_prepare_at(dt, to, S_IFIFO);
+ if (r < 0)
+ return r;
+ }
+ r = mkfifoat(dt, to, st->st_mode & 07777);
+ if (copy_flags & COPY_MAC_CREATE)
+ mac_selinux_create_file_clear();
+ if (r < 0)
+ return -errno;
+
+ if (fchownat(dt, to,
+ uid_is_valid(override_uid) ? override_uid : st->st_uid,
+ gid_is_valid(override_gid) ? override_gid : st->st_gid,
+ AT_SYMLINK_NOFOLLOW) < 0)
+ r = -errno;
+
+ if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
+ r = -errno;
+
+ (void) memorize_hardlink(hardlink_context, st, dt, to);
+ return r;
+}
+
+static int fd_copy_node(
+ int df,
+ const char *from,
+ const struct stat *st,
+ int dt,
+ const char *to,
+ uid_t override_uid,
+ gid_t override_gid,
+ CopyFlags copy_flags,
+ HardlinkContext *hardlink_context) {
+ int r;
+
+ assert(from);
+ assert(st);
+ assert(to);
+
+ r = try_hardlink(hardlink_context, st, dt, to);
+ if (r < 0)
+ return r;
+ if (r > 0) /* worked! */
+ return 0;
+
+ if (copy_flags & COPY_MAC_CREATE) {
+ r = mac_selinux_create_file_prepare_at(dt, to, st->st_mode & S_IFMT);
+ if (r < 0)
+ return r;
+ }
+ r = mknodat(dt, to, st->st_mode, st->st_rdev);
+ if (copy_flags & COPY_MAC_CREATE)
+ mac_selinux_create_file_clear();
+ if (r < 0)
+ return -errno;
+
+ if (fchownat(dt, to,
+ uid_is_valid(override_uid) ? override_uid : st->st_uid,
+ gid_is_valid(override_gid) ? override_gid : st->st_gid,
+ AT_SYMLINK_NOFOLLOW) < 0)
+ r = -errno;
+
+ if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
+ r = -errno;
+
+ (void) memorize_hardlink(hardlink_context, st, dt, to);
+ return r;
+}
+
+static int fd_copy_directory(
+ int df,
+ const char *from,
+ const struct stat *st,
+ int dt,
+ const char *to,
+ dev_t original_device,
+ unsigned depth_left,
+ uid_t override_uid,
+ gid_t override_gid,
+ CopyFlags copy_flags,
+ HardlinkContext *hardlink_context,
+ const char *display_path,
+ copy_progress_path_t progress_path,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ _cleanup_(hardlink_context_destroy) HardlinkContext our_hardlink_context = {
+ .dir_fd = -1,
+ .parent_fd = -1,
+ };
+
+ _cleanup_close_ int fdf = -1, fdt = -1;
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ bool exists, created;
+ int r;
+
+ assert(st);
+ assert(to);
+
+ if (depth_left == 0)
+ return -ENAMETOOLONG;
+
+ if (from)
+ fdf = openat(df, from, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ else
+ fdf = fcntl(df, F_DUPFD_CLOEXEC, 3);
+ if (fdf < 0)
+ return -errno;
+
+ if (!hardlink_context) {
+ /* If recreating hardlinks is requested let's set up a context for that now. */
+ r = hardlink_context_setup(&our_hardlink_context, dt, to, copy_flags);
+ if (r < 0)
+ return r;
+ if (r > 0) /* It's enabled and allocated, let's now use the same context for all recursive
+ * invocations from here down */
+ hardlink_context = &our_hardlink_context;
+ }
+
+ d = take_fdopendir(&fdf);
+ if (!d)
+ return -errno;
+
+ exists = false;
+ if (copy_flags & COPY_MERGE_EMPTY) {
+ r = dir_is_empty_at(dt, to);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ else if (r == 1)
+ exists = true;
+ }
+
+ if (exists)
+ created = false;
+ else {
+ if (copy_flags & COPY_MAC_CREATE)
+ r = mkdirat_label(dt, to, st->st_mode & 07777);
+ else
+ r = mkdirat(dt, to, st->st_mode & 07777);
+ if (r >= 0)
+ created = true;
+ else if (errno == EEXIST && (copy_flags & COPY_MERGE))
+ created = false;
+ else
+ return -errno;
+ }
+
+ fdt = openat(dt, to, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fdt < 0)
+ return -errno;
+
+ r = 0;
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ const char *child_display_path = NULL;
+ _cleanup_free_ char *dp = NULL;
+ struct stat buf;
+ int q;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (FLAGS_SET(copy_flags, COPY_SIGINT)) {
+ r = sigint_pending();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return -EINTR;
+ }
+
+ if (fstatat(dirfd(d), de->d_name, &buf, AT_SYMLINK_NOFOLLOW) < 0) {
+ r = -errno;
+ continue;
+ }
+
+ if (progress_path) {
+ if (display_path)
+ child_display_path = dp = path_join(display_path, de->d_name);
+ else
+ child_display_path = de->d_name;
+
+ r = progress_path(child_display_path, &buf, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ if (S_ISDIR(buf.st_mode)) {
+ /*
+ * Don't descend into directories on other file systems, if this is requested. We do a simple
+ * .st_dev check here, which basically comes for free. Note that we do this check only on
+ * directories, not other kind of file system objects, for two reason:
+ *
+ * • The kernel's overlayfs pseudo file system that overlays multiple real file systems
+ * propagates the .st_dev field of the file system a file originates from all the way up
+ * through the stack to stat(). It doesn't do that for directories however. This means that
+ * comparing .st_dev on non-directories suggests that they all are mount points. To avoid
+ * confusion we hence avoid relying on this check for regular files.
+ *
+ * • The main reason we do this check at all is to protect ourselves from bind mount cycles,
+ * where we really want to avoid descending down in all eternity. However the .st_dev check
+ * is usually not sufficient for this protection anyway, as bind mount cycles from the same
+ * file system onto itself can't be detected that way. (Note we also do a recursion depth
+ * check, which is probably the better protection in this regard, which is why
+ * COPY_SAME_MOUNT is optional).
+ */
+
+ if (FLAGS_SET(copy_flags, COPY_SAME_MOUNT)) {
+ if (buf.st_dev != original_device)
+ continue;
+
+ r = fd_is_mount_point(dirfd(d), de->d_name, 0);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+ }
+
+ q = fd_copy_directory(dirfd(d), de->d_name, &buf, fdt, de->d_name, original_device, depth_left-1, override_uid, override_gid, copy_flags, hardlink_context, child_display_path, progress_path, progress_bytes, userdata);
+ } else if (S_ISREG(buf.st_mode))
+ q = fd_copy_regular(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags, hardlink_context, progress_bytes, userdata);
+ else if (S_ISLNK(buf.st_mode))
+ q = fd_copy_symlink(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags);
+ else if (S_ISFIFO(buf.st_mode))
+ q = fd_copy_fifo(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags, hardlink_context);
+ else if (S_ISBLK(buf.st_mode) || S_ISCHR(buf.st_mode) || S_ISSOCK(buf.st_mode))
+ q = fd_copy_node(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags, hardlink_context);
+ else
+ q = -EOPNOTSUPP;
+
+ if (q == -EINTR) /* Propagate SIGINT up instantly */
+ return q;
+ if (q == -EEXIST && (copy_flags & COPY_MERGE))
+ q = 0;
+ if (q < 0)
+ r = q;
+ }
+
+ if (created) {
+ struct timespec ut[2] = {
+ st->st_atim,
+ st->st_mtim
+ };
+
+ if (fchown(fdt,
+ uid_is_valid(override_uid) ? override_uid : st->st_uid,
+ gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
+ r = -errno;
+
+ if (fchmod(fdt, st->st_mode & 07777) < 0)
+ r = -errno;
+
+ (void) copy_xattr(dirfd(d), fdt);
+ (void) futimens(fdt, ut);
+ }
+
+ return r;
+}
+
+int copy_tree_at_full(
+ int fdf,
+ const char *from,
+ int fdt,
+ const char *to,
+ uid_t override_uid,
+ gid_t override_gid,
+ CopyFlags copy_flags,
+ copy_progress_path_t progress_path,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ struct stat st;
+
+ assert(from);
+ assert(to);
+
+ if (fstatat(fdf, from, &st, AT_SYMLINK_NOFOLLOW) < 0)
+ return -errno;
+
+ if (S_ISREG(st.st_mode))
+ return fd_copy_regular(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags, NULL, progress_bytes, userdata);
+ else if (S_ISDIR(st.st_mode))
+ return fd_copy_directory(fdf, from, &st, fdt, to, st.st_dev, COPY_DEPTH_MAX, override_uid, override_gid, copy_flags, NULL, NULL, progress_path, progress_bytes, userdata);
+ else if (S_ISLNK(st.st_mode))
+ return fd_copy_symlink(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags);
+ else if (S_ISFIFO(st.st_mode))
+ return fd_copy_fifo(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags, NULL);
+ else if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode) || S_ISSOCK(st.st_mode))
+ return fd_copy_node(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags, NULL);
+ else
+ return -EOPNOTSUPP;
+}
+
+int copy_directory_fd_full(
+ int dirfd,
+ const char *to,
+ CopyFlags copy_flags,
+ copy_progress_path_t progress_path,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ struct stat st;
+
+ assert(dirfd >= 0);
+ assert(to);
+
+ if (fstat(dirfd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode))
+ return -ENOTDIR;
+
+ return fd_copy_directory(dirfd, NULL, &st, AT_FDCWD, to, st.st_dev, COPY_DEPTH_MAX, UID_INVALID, GID_INVALID, copy_flags, NULL, NULL, progress_path, progress_bytes, userdata);
+}
+
+int copy_directory_full(
+ const char *from,
+ const char *to,
+ CopyFlags copy_flags,
+ copy_progress_path_t progress_path,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ struct stat st;
+
+ assert(from);
+ assert(to);
+
+ if (lstat(from, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode))
+ return -ENOTDIR;
+
+ return fd_copy_directory(AT_FDCWD, from, &st, AT_FDCWD, to, st.st_dev, COPY_DEPTH_MAX, UID_INVALID, GID_INVALID, copy_flags, NULL, NULL, progress_path, progress_bytes, userdata);
+}
+
+int copy_file_fd_full(
+ const char *from,
+ int fdt,
+ CopyFlags copy_flags,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ _cleanup_close_ int fdf = -1;
+ int r;
+
+ assert(from);
+ assert(fdt >= 0);
+
+ fdf = open(from, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fdf < 0)
+ return -errno;
+
+ r = copy_bytes_full(fdf, fdt, (uint64_t) -1, copy_flags, NULL, NULL, progress_bytes, userdata);
+
+ (void) copy_times(fdf, fdt, copy_flags);
+ (void) copy_xattr(fdf, fdt);
+
+ return r;
+}
+
+int copy_file_full(
+ const char *from,
+ const char *to,
+ int flags,
+ mode_t mode,
+ unsigned chattr_flags,
+ unsigned chattr_mask,
+ CopyFlags copy_flags,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ int fdt = -1, r;
+
+ assert(from);
+ assert(to);
+
+ RUN_WITH_UMASK(0000) {
+ if (copy_flags & COPY_MAC_CREATE) {
+ r = mac_selinux_create_file_prepare(to, S_IFREG);
+ if (r < 0)
+ return r;
+ }
+ fdt = open(to, flags|O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY, mode);
+ if (copy_flags & COPY_MAC_CREATE)
+ mac_selinux_create_file_clear();
+ if (fdt < 0)
+ return -errno;
+ }
+
+ if (chattr_mask != 0)
+ (void) chattr_fd(fdt, chattr_flags, chattr_mask & CHATTR_EARLY_FL, NULL);
+
+ r = copy_file_fd_full(from, fdt, copy_flags, progress_bytes, userdata);
+ if (r < 0) {
+ close(fdt);
+ (void) unlink(to);
+ return r;
+ }
+
+ if (chattr_mask != 0)
+ (void) chattr_fd(fdt, chattr_flags, chattr_mask & ~CHATTR_EARLY_FL, NULL);
+
+ if (close(fdt) < 0) {
+ unlink_noerrno(to);
+ return -errno;
+ }
+
+ return 0;
+}
+
+int copy_file_atomic_full(
+ const char *from,
+ const char *to,
+ mode_t mode,
+ unsigned chattr_flags,
+ unsigned chattr_mask,
+ CopyFlags copy_flags,
+ copy_progress_bytes_t progress_bytes,
+ void *userdata) {
+
+ _cleanup_(unlink_and_freep) char *t = NULL;
+ _cleanup_close_ int fdt = -1;
+ int r;
+
+ assert(from);
+ assert(to);
+
+ /* We try to use O_TMPFILE here to create the file if we can. Note that this only works if COPY_REPLACE is not
+ * set though as we need to use linkat() for linking the O_TMPFILE file into the file system but that system
+ * call can't replace existing files. Hence, if COPY_REPLACE is set we create a temporary name in the file
+ * system right-away and unconditionally which we then can renameat() to the right name after we completed
+ * writing it. */
+
+ if (copy_flags & COPY_REPLACE) {
+ r = tempfn_random(to, NULL, &t);
+ if (r < 0)
+ return r;
+
+ if (copy_flags & COPY_MAC_CREATE) {
+ r = mac_selinux_create_file_prepare(to, S_IFREG);
+ if (r < 0) {
+ t = mfree(t);
+ return r;
+ }
+ }
+ fdt = open(t, O_CREAT|O_EXCL|O_NOFOLLOW|O_NOCTTY|O_WRONLY|O_CLOEXEC, 0600);
+ if (copy_flags & COPY_MAC_CREATE)
+ mac_selinux_create_file_clear();
+ if (fdt < 0) {
+ t = mfree(t);
+ return -errno;
+ }
+ } else {
+ if (copy_flags & COPY_MAC_CREATE) {
+ r = mac_selinux_create_file_prepare(to, S_IFREG);
+ if (r < 0)
+ return r;
+ }
+ fdt = open_tmpfile_linkable(to, O_WRONLY|O_CLOEXEC, &t);
+ if (copy_flags & COPY_MAC_CREATE)
+ mac_selinux_create_file_clear();
+ if (fdt < 0)
+ return fdt;
+ }
+
+ if (chattr_mask != 0)
+ (void) chattr_fd(fdt, chattr_flags, chattr_mask & CHATTR_EARLY_FL, NULL);
+
+ r = copy_file_fd_full(from, fdt, copy_flags, progress_bytes, userdata);
+ if (r < 0)
+ return r;
+
+ if (fchmod(fdt, mode) < 0)
+ return -errno;
+
+ if (copy_flags & COPY_REPLACE) {
+ if (renameat(AT_FDCWD, t, AT_FDCWD, to) < 0)
+ return -errno;
+ } else {
+ r = link_tmpfile(fdt, t, to);
+ if (r < 0)
+ return r;
+ }
+
+ if (chattr_mask != 0)
+ (void) chattr_fd(fdt, chattr_flags, chattr_mask & ~CHATTR_EARLY_FL, NULL);
+
+ t = mfree(t);
+ return 0;
+}
+
+int copy_times(int fdf, int fdt, CopyFlags flags) {
+ struct timespec ut[2];
+ struct stat st;
+
+ assert(fdf >= 0);
+ assert(fdt >= 0);
+
+ if (fstat(fdf, &st) < 0)
+ return -errno;
+
+ ut[0] = st.st_atim;
+ ut[1] = st.st_mtim;
+
+ if (futimens(fdt, ut) < 0)
+ return -errno;
+
+ if (FLAGS_SET(flags, COPY_CRTIME)) {
+ usec_t crtime;
+
+ if (fd_getcrtime(fdf, &crtime) >= 0)
+ (void) fd_setcrtime(fdt, crtime);
+ }
+
+ return 0;
+}
+
+int copy_access(int fdf, int fdt) {
+ struct stat st;
+
+ assert(fdf >= 0);
+ assert(fdt >= 0);
+
+ if (fstat(fdf, &st) < 0)
+ return -errno;
+
+ if (fchmod(fdt, st.st_mode & 07777) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int copy_xattr(int fdf, int fdt) {
+ _cleanup_free_ char *names = NULL;
+ int ret = 0, r;
+ const char *p;
+
+ r = flistxattr_malloc(fdf, &names);
+ if (r < 0)
+ return r;
+
+ NULSTR_FOREACH(p, names) {
+ _cleanup_free_ char *value = NULL;
+
+ if (!startswith(p, "user."))
+ continue;
+
+ r = fgetxattr_malloc(fdf, p, &value);
+ if (r == -ENODATA)
+ continue; /* gone by now */
+ if (r < 0)
+ return r;
+
+ if (fsetxattr(fdt, p, value, r, 0) < 0)
+ ret = -errno;
+ }
+
+ return ret;
+}
diff --git a/src/basic/copy.h b/src/basic/copy.h
new file mode 100644
index 0000000..b583dff
--- /dev/null
+++ b/src/basic/copy.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+typedef enum CopyFlags {
+ COPY_REFLINK = 1 << 0, /* Try to reflink */
+ COPY_MERGE = 1 << 1, /* Merge existing trees with our new one to copy */
+ COPY_REPLACE = 1 << 2, /* Replace an existing file if there's one */
+ COPY_SAME_MOUNT = 1 << 3, /* Don't descend recursively into other file systems, across mount point boundaries */
+ COPY_MERGE_EMPTY = 1 << 4, /* Merge an existing, empty directory with our new tree to copy */
+ COPY_CRTIME = 1 << 5, /* Generate a user.crtime_usec xattr off the source crtime if there is one, on copying */
+ COPY_SIGINT = 1 << 6, /* Check for SIGINT regularly and return EINTR if seen (caller needs to block SIGINT) */
+ COPY_MAC_CREATE = 1 << 7, /* Create files with the correct MAC label (currently SELinux only) */
+ COPY_HARDLINKS = 1 << 8, /* Try to reproduce hard links */
+} CopyFlags;
+
+typedef int (*copy_progress_bytes_t)(uint64_t n_bytes, void *userdata);
+typedef int (*copy_progress_path_t)(const char *path, const struct stat *st, void *userdata);
+
+int copy_file_fd_full(const char *from, int to, CopyFlags copy_flags, copy_progress_bytes_t progress, void *userdata);
+static inline int copy_file_fd(const char *from, int to, CopyFlags copy_flags) {
+ return copy_file_fd_full(from, to, copy_flags, NULL, NULL);
+}
+
+int copy_file_full(const char *from, const char *to, int open_flags, mode_t mode, unsigned chattr_flags, unsigned chattr_mask, CopyFlags copy_flags, copy_progress_bytes_t progress, void *userdata);
+static inline int copy_file(const char *from, const char *to, int open_flags, mode_t mode, unsigned chattr_flags, unsigned chattr_mask, CopyFlags copy_flags) {
+ return copy_file_full(from, to, open_flags, mode, chattr_flags, chattr_mask, copy_flags, NULL, NULL);
+}
+
+int copy_file_atomic_full(const char *from, const char *to, mode_t mode, unsigned chattr_flags, unsigned chattr_mask, CopyFlags copy_flags, copy_progress_bytes_t progress, void *userdata);
+static inline int copy_file_atomic(const char *from, const char *to, mode_t mode, unsigned chattr_flags, unsigned chattr_mask, CopyFlags copy_flags) {
+ return copy_file_atomic_full(from, to, mode, chattr_flags, chattr_mask, copy_flags, NULL, NULL);
+}
+
+int copy_tree_at_full(int fdf, const char *from, int fdt, const char *to, uid_t override_uid, gid_t override_gid, CopyFlags copy_flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata);
+static inline int copy_tree_at(int fdf, const char *from, int fdt, const char *to, uid_t override_uid, gid_t override_gid, CopyFlags copy_flags) {
+ return copy_tree_at_full(fdf, from, fdt, to, override_uid, override_gid, copy_flags, NULL, NULL, NULL);
+}
+static inline int copy_tree(const char *from, const char *to, uid_t override_uid, gid_t override_gid, CopyFlags copy_flags) {
+ return copy_tree_at_full(AT_FDCWD, from, AT_FDCWD, to, override_uid, override_gid, copy_flags, NULL, NULL, NULL);
+}
+
+int copy_directory_fd_full(int dirfd, const char *to, CopyFlags copy_flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata);
+static inline int copy_directory_fd(int dirfd, const char *to, CopyFlags copy_flags) {
+ return copy_directory_fd_full(dirfd, to, copy_flags, NULL, NULL, NULL);
+}
+
+int copy_directory_full(const char *from, const char *to, CopyFlags copy_flags, copy_progress_path_t progress_path, copy_progress_bytes_t progress_bytes, void *userdata);
+static inline int copy_directory(const char *from, const char *to, CopyFlags copy_flags) {
+ return copy_directory_full(from, to, copy_flags, NULL, NULL, NULL);
+}
+
+int copy_bytes_full(int fdf, int fdt, uint64_t max_bytes, CopyFlags copy_flags, void **ret_remains, size_t *ret_remains_size, copy_progress_bytes_t progress, void *userdata);
+static inline int copy_bytes(int fdf, int fdt, uint64_t max_bytes, CopyFlags copy_flags) {
+ return copy_bytes_full(fdf, fdt, max_bytes, copy_flags, NULL, NULL, NULL, NULL);
+}
+
+int copy_times(int fdf, int fdt, CopyFlags flags);
+int copy_access(int fdf, int fdt);
+int copy_xattr(int fdf, int fdt);
diff --git a/src/basic/def.h b/src/basic/def.h
new file mode 100644
index 0000000..2e60abb
--- /dev/null
+++ b/src/basic/def.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#define DEFAULT_TIMEOUT_USEC (90*USEC_PER_SEC)
+#define DEFAULT_RESTART_USEC (100*USEC_PER_MSEC)
+#define DEFAULT_CONFIRM_USEC (30*USEC_PER_SEC)
+
+#define DEFAULT_START_LIMIT_INTERVAL (10*USEC_PER_SEC)
+#define DEFAULT_START_LIMIT_BURST 5
+
+/* The default time after which exit-on-idle services exit. This
+ * should be kept lower than the watchdog timeout, because otherwise
+ * the watchdog pings will keep the loop busy. */
+#define DEFAULT_EXIT_USEC (30*USEC_PER_SEC)
+
+/* The default value for the net.unix.max_dgram_qlen sysctl */
+#define DEFAULT_UNIX_MAX_DGRAM_QLEN 512UL
+
+#define SIGNALS_CRASH_HANDLER SIGSEGV,SIGILL,SIGFPE,SIGBUS,SIGQUIT,SIGABRT
+#define SIGNALS_IGNORE SIGPIPE
+
+#define NOTIFY_FD_MAX 768
+#define NOTIFY_BUFFER_MAX PIPE_BUF
+
+#if HAVE_SPLIT_USR
+# define _CONF_PATHS_SPLIT_USR_NULSTR(n) "/lib/" n "\0"
+# define _CONF_PATHS_SPLIT_USR(n) , "/lib/" n
+#else
+# define _CONF_PATHS_SPLIT_USR_NULSTR(n)
+# define _CONF_PATHS_SPLIT_USR(n)
+#endif
+
+/* Return a nulstr for a standard cascade of configuration paths,
+ * suitable to pass to conf_files_list_nulstr() or config_parse_many_nulstr()
+ * to implement drop-in directories for extending configuration
+ * files. */
+#define CONF_PATHS_NULSTR(n) \
+ "/etc/" n "\0" \
+ "/run/" n "\0" \
+ "/usr/local/lib/" n "\0" \
+ "/usr/lib/" n "\0" \
+ _CONF_PATHS_SPLIT_USR_NULSTR(n)
+
+#define CONF_PATHS_USR(n) \
+ "/etc/" n, \
+ "/run/" n, \
+ "/usr/local/lib/" n, \
+ "/usr/lib/" n
+
+#define CONF_PATHS(n) \
+ CONF_PATHS_USR(n) \
+ _CONF_PATHS_SPLIT_USR(n)
+
+#define CONF_PATHS_USR_STRV(n) \
+ STRV_MAKE(CONF_PATHS_USR(n))
+
+#define CONF_PATHS_STRV(n) \
+ STRV_MAKE(CONF_PATHS(n))
+
+#define HIGH_RLIMIT_MEMLOCK (1024ULL*1024ULL*64ULL)
+
+#define PLYMOUTH_SOCKET { \
+ .un.sun_family = AF_UNIX, \
+ .un.sun_path = "\0/org/freedesktop/plymouthd", \
+ }
+
+#define VARLINK_ADDR_PATH_MANAGED_OOM "/run/systemd/io.system.ManagedOOM"
diff --git a/src/basic/device-nodes.c b/src/basic/device-nodes.c
new file mode 100644
index 0000000..7eb9c35
--- /dev/null
+++ b/src/basic/device-nodes.c
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "device-nodes.h"
+#include "utf8.h"
+
+int allow_listed_char_for_devnode(char c, const char *white) {
+
+ if ((c >= '0' && c <= '9') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= 'a' && c <= 'z') ||
+ strchr("#+-.:=@_", c) != NULL ||
+ (white != NULL && strchr(white, c) != NULL))
+ return 1;
+
+ return 0;
+}
+
+int encode_devnode_name(const char *str, char *str_enc, size_t len) {
+ size_t i, j;
+
+ if (!str || !str_enc)
+ return -EINVAL;
+
+ for (i = 0, j = 0; str[i] != '\0'; i++) {
+ int seqlen;
+
+ seqlen = utf8_encoded_valid_unichar(str + i, (size_t) -1);
+ if (seqlen > 1) {
+
+ if (len-j < (size_t)seqlen)
+ return -EINVAL;
+
+ memcpy(&str_enc[j], &str[i], seqlen);
+ j += seqlen;
+ i += (seqlen-1);
+
+ } else if (str[i] == '\\' || !allow_listed_char_for_devnode(str[i], NULL)) {
+
+ if (len-j < 4)
+ return -EINVAL;
+
+ sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]);
+ j += 4;
+
+ } else {
+ if (len-j < 1)
+ return -EINVAL;
+
+ str_enc[j] = str[i];
+ j++;
+ }
+ }
+
+ if (len-j < 1)
+ return -EINVAL;
+
+ str_enc[j] = '\0';
+ return 0;
+}
diff --git a/src/basic/device-nodes.h b/src/basic/device-nodes.h
new file mode 100644
index 0000000..9e5c79f
--- /dev/null
+++ b/src/basic/device-nodes.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stddef.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "stdio-util.h"
+
+int encode_devnode_name(const char *str, char *str_enc, size_t len);
+int allow_listed_char_for_devnode(char c, const char *additional);
+
+#define DEV_NUM_PATH_MAX \
+ (STRLEN("/dev/block/") + DECIMAL_STR_MAX(dev_t) + 1 + DECIMAL_STR_MAX(dev_t))
+#define xsprintf_dev_num_path(buf, type, devno) \
+ xsprintf(buf, "/dev/%s/%u:%u", type, major(devno), minor(devno))
diff --git a/src/basic/dirent-util.c b/src/basic/dirent-util.c
new file mode 100644
index 0000000..48aced7
--- /dev/null
+++ b/src/basic/dirent-util.c
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#include "dirent-util.h"
+#include "path-util.h"
+#include "string-util.h"
+
+int dirent_ensure_type(DIR *d, struct dirent *de) {
+ struct stat st;
+
+ assert(d);
+ assert(de);
+
+ if (de->d_type != DT_UNKNOWN)
+ return 0;
+
+ if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
+ return -errno;
+
+ de->d_type =
+ S_ISREG(st.st_mode) ? DT_REG :
+ S_ISDIR(st.st_mode) ? DT_DIR :
+ S_ISLNK(st.st_mode) ? DT_LNK :
+ S_ISFIFO(st.st_mode) ? DT_FIFO :
+ S_ISSOCK(st.st_mode) ? DT_SOCK :
+ S_ISCHR(st.st_mode) ? DT_CHR :
+ S_ISBLK(st.st_mode) ? DT_BLK :
+ DT_UNKNOWN;
+
+ return 0;
+}
+
+bool dirent_is_file(const struct dirent *de) {
+ assert(de);
+
+ if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
+ return false;
+
+ if (hidden_or_backup_file(de->d_name))
+ return false;
+
+ return true;
+}
+
+bool dirent_is_file_with_suffix(const struct dirent *de, const char *suffix) {
+ assert(de);
+
+ if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
+ return false;
+
+ if (de->d_name[0] == '.')
+ return false;
+
+ if (!suffix)
+ return true;
+
+ return endswith(de->d_name, suffix);
+}
+
+struct dirent* readdir_no_dot(DIR *dirp) {
+ struct dirent* d;
+
+ for (;;) {
+ d = readdir(dirp);
+ if (d && dot_or_dot_dot(d->d_name))
+ continue;
+ return d;
+ }
+}
diff --git a/src/basic/dirent-util.h b/src/basic/dirent-util.h
new file mode 100644
index 0000000..07750c3
--- /dev/null
+++ b/src/basic/dirent-util.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <dirent.h>
+#include <errno.h>
+#include <stdbool.h>
+
+#include "macro.h"
+#include "path-util.h"
+
+int dirent_ensure_type(DIR *d, struct dirent *de);
+
+bool dirent_is_file(const struct dirent *de) _pure_;
+bool dirent_is_file_with_suffix(const struct dirent *de, const char *suffix) _pure_;
+
+struct dirent* readdir_no_dot(DIR *dirp);
+
+#define FOREACH_DIRENT(de, d, on_error) \
+ for (errno = 0, de = readdir(d);; errno = 0, de = readdir(d)) \
+ if (!de) { \
+ if (errno > 0) { \
+ on_error; \
+ } \
+ break; \
+ } else if (hidden_or_backup_file((de)->d_name)) \
+ continue; \
+ else
+
+#define FOREACH_DIRENT_ALL(de, d, on_error) \
+ for (errno = 0, de = readdir(d);; errno = 0, de = readdir(d)) \
+ if (!de) { \
+ if (errno > 0) { \
+ on_error; \
+ } \
+ break; \
+ } else
diff --git a/src/basic/dlfcn-util.c b/src/basic/dlfcn-util.c
new file mode 100644
index 0000000..2dbff0e
--- /dev/null
+++ b/src/basic/dlfcn-util.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "dlfcn-util.h"
+
+int dlsym_many_and_warn(void *dl, int level, ...) {
+ va_list ap;
+ int r;
+
+ /* Tries to resolve a bunch of function symbols, and logs errors about the ones it cannot
+ * resolve. Note that this function possibly modifies the supplied function pointers if the whole
+ * operation fails */
+
+ va_start(ap, level);
+
+ for (;;) {
+ void (**fn)(void);
+ void (*tfn)(void);
+ const char *symbol;
+
+ fn = va_arg(ap, typeof(fn));
+ if (!fn)
+ break;
+
+ symbol = va_arg(ap, typeof(symbol));
+
+ tfn = (typeof(tfn)) dlsym(dl, symbol);
+ if (!tfn) {
+ r = log_full_errno(level,
+ SYNTHETIC_ERRNO(ELIBBAD),
+ "Can't find symbol %s: %s", symbol, dlerror());
+ va_end(ap);
+ return r;
+ }
+
+ *fn = tfn;
+ }
+
+ va_end(ap);
+ return 0;
+}
diff --git a/src/basic/dlfcn-util.h b/src/basic/dlfcn-util.h
new file mode 100644
index 0000000..2c94ed5
--- /dev/null
+++ b/src/basic/dlfcn-util.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <dlfcn.h>
+
+#include "macro.h"
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(void*, dlclose);
+
+int dlsym_many_and_warn(void *dl, int level, ...);
diff --git a/src/basic/efivars.c b/src/basic/efivars.c
new file mode 100644
index 0000000..5aeddef
--- /dev/null
+++ b/src/basic/efivars.c
@@ -0,0 +1,407 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/fs.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "chattr-util.h"
+#include "efivars.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "io-util.h"
+#include "macro.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "utf8.h"
+#include "virt.h"
+
+#if ENABLE_EFI
+
+/* Reads from efivarfs sometimes fail with EINTR. Retry that many times. */
+#define EFI_N_RETRIES_NO_DELAY 20
+#define EFI_N_RETRIES_TOTAL 25
+#define EFI_RETRY_DELAY (50 * USEC_PER_MSEC)
+
+char* efi_variable_path(sd_id128_t vendor, const char *name) {
+ char *p;
+
+ if (asprintf(&p,
+ "/sys/firmware/efi/efivars/%s-" SD_ID128_UUID_FORMAT_STR,
+ name, SD_ID128_FORMAT_VAL(vendor)) < 0)
+ return NULL;
+
+ return p;
+}
+
+static char* efi_variable_cache_path(sd_id128_t vendor, const char *name) {
+ char *p;
+
+ if (asprintf(&p,
+ "/run/systemd/efivars/%s-" SD_ID128_UUID_FORMAT_STR,
+ name, SD_ID128_FORMAT_VAL(vendor)) < 0)
+ return NULL;
+
+ return p;
+}
+
+int efi_get_variable(
+ sd_id128_t vendor,
+ const char *name,
+ uint32_t *ret_attribute,
+ void **ret_value,
+ size_t *ret_size) {
+
+ _cleanup_close_ int fd = -1;
+ _cleanup_free_ char *p = NULL;
+ _cleanup_free_ void *buf = NULL;
+ struct stat st;
+ usec_t begin;
+ uint32_t a;
+ ssize_t n;
+
+ assert(name);
+
+ p = efi_variable_path(vendor, name);
+ if (!p)
+ return -ENOMEM;
+
+ if (!ret_value && !ret_size && !ret_attribute) {
+ /* If caller is not interested in anything, just check if the variable exists and is
+ * readable. */
+ if (access(p, R_OK) < 0)
+ return -errno;
+
+ return 0;
+ }
+
+ if (DEBUG_LOGGING) {
+ log_debug("Reading EFI variable %s.", p);
+ begin = now(CLOCK_MONOTONIC);
+ }
+
+ fd = open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return log_debug_errno(errno, "open(\"%s\") failed: %m", p);
+
+ if (fstat(fd, &st) < 0)
+ return log_debug_errno(errno, "fstat(\"%s\") failed: %m", p);
+ if (st.st_size < 4)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENODATA), "EFI variable %s is shorter than 4 bytes, refusing.", p);
+ if (st.st_size > 4*1024*1024 + 4)
+ return log_debug_errno(SYNTHETIC_ERRNO(E2BIG), "EFI variable %s is ridiculously large, refusing.", p);
+
+ if (ret_value || ret_attribute) {
+ /* The kernel ratelimits reads from the efivarfs because EFI is inefficient, and we'll
+ * occasionally fail with EINTR here. A slowdown is better than a failure for us, so
+ * retry a few times and eventually fail with -EBUSY.
+ *
+ * See https://github.com/torvalds/linux/blob/master/fs/efivarfs/file.c#L75
+ * and
+ * https://github.com/torvalds/linux/commit/bef3efbeb897b56867e271cdbc5f8adaacaeb9cd.
+ */
+ for (unsigned try = 0;; try++) {
+ n = read(fd, &a, sizeof(a));
+ if (n >= 0)
+ break;
+ log_debug_errno(errno, "Reading from \"%s\" failed: %m", p);
+ if (errno != EINTR)
+ return -errno;
+ if (try >= EFI_N_RETRIES_TOTAL)
+ return -EBUSY;
+
+ if (try >= EFI_N_RETRIES_NO_DELAY)
+ (void) usleep(EFI_RETRY_DELAY);
+ }
+
+ if (n != sizeof(a))
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO),
+ "Read %zi bytes from EFI variable %s, expected %zu.", n, p, sizeof(a));
+ }
+
+ if (ret_value) {
+ buf = malloc(st.st_size - 4 + 3);
+ if (!buf)
+ return -ENOMEM;
+
+ n = read(fd, buf, (size_t) st.st_size - 4);
+ if (n < 0)
+ return log_debug_errno(errno, "Failed to read value of EFI variable %s: %m", p);
+ assert(n <= st.st_size - 4);
+
+ /* Always NUL terminate (3 bytes, to properly protect UTF-16, even if truncated in the middle of a character) */
+ ((char*) buf)[n] = 0;
+ ((char*) buf)[n + 1] = 0;
+ ((char*) buf)[n + 2] = 0;
+ } else
+ /* Assume that the reported size is accurate */
+ n = st.st_size - 4;
+
+ if (DEBUG_LOGGING) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ usec_t end;
+
+ end = now(CLOCK_MONOTONIC);
+ if (end > begin + EFI_RETRY_DELAY)
+ log_debug("Detected slow EFI variable read access on " SD_ID128_FORMAT_STR "-%s: %s",
+ SD_ID128_FORMAT_VAL(vendor), name, format_timespan(ts, sizeof(ts), end - begin, 1));
+ }
+
+ /* Note that efivarfs interestingly doesn't require ftruncate() to update an existing EFI variable
+ * with a smaller value. */
+
+ if (ret_attribute)
+ *ret_attribute = a;
+
+ if (ret_value)
+ *ret_value = TAKE_PTR(buf);
+
+ if (ret_size)
+ *ret_size = n;
+
+ return 0;
+}
+
+int efi_get_variable_string(sd_id128_t vendor, const char *name, char **p) {
+ _cleanup_free_ void *s = NULL;
+ size_t ss = 0;
+ int r;
+ char *x;
+
+ r = efi_get_variable(vendor, name, NULL, &s, &ss);
+ if (r < 0)
+ return r;
+
+ x = utf16_to_utf8(s, ss);
+ if (!x)
+ return -ENOMEM;
+
+ *p = x;
+ return 0;
+}
+
+int efi_set_variable(
+ sd_id128_t vendor,
+ const char *name,
+ const void *value,
+ size_t size) {
+
+ struct var {
+ uint32_t attr;
+ char buf[];
+ } _packed_ * _cleanup_free_ buf = NULL;
+ _cleanup_free_ char *p = NULL;
+ _cleanup_close_ int fd = -1;
+ bool saved_flags_valid = false;
+ unsigned saved_flags;
+ int r;
+
+ assert(name);
+ assert(value || size == 0);
+
+ p = efi_variable_path(vendor, name);
+ if (!p)
+ return -ENOMEM;
+
+ /* Newer efivarfs protects variables that are not in an allow list with FS_IMMUTABLE_FL by default,
+ * to protect them for accidental removal and modification. We are not changing these variables
+ * accidentally however, hence let's unset the bit first. */
+
+ r = chattr_path(p, 0, FS_IMMUTABLE_FL, &saved_flags);
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to drop FS_IMMUTABLE_FL flag from '%s', ignoring: %m", p);
+
+ saved_flags_valid = r >= 0;
+
+ if (size == 0) {
+ if (unlink(p) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ return 0;
+ }
+
+ fd = open(p, O_WRONLY|O_CREAT|O_NOCTTY|O_CLOEXEC, 0644);
+ if (fd < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ buf = malloc(sizeof(uint32_t) + size);
+ if (!buf) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ buf->attr = EFI_VARIABLE_NON_VOLATILE|EFI_VARIABLE_BOOTSERVICE_ACCESS|EFI_VARIABLE_RUNTIME_ACCESS;
+ memcpy(buf->buf, value, size);
+
+ r = loop_write(fd, buf, sizeof(uint32_t) + size, false);
+ if (r < 0)
+ goto finish;
+
+ /* For some reason efivarfs doesn't update mtime automatically. Let's do it manually then. This is
+ * useful for processes that cache EFI variables to detect when changes occurred. */
+ if (futimens(fd, (struct timespec[2]) {
+ { .tv_nsec = UTIME_NOW },
+ { .tv_nsec = UTIME_NOW }
+ }) < 0)
+ log_debug_errno(errno, "Failed to update mtime/atime on %s, ignoring: %m", p);
+
+ r = 0;
+
+finish:
+ if (saved_flags_valid) {
+ int q;
+
+ /* Restore the original flags field, just in case */
+ if (fd < 0)
+ q = chattr_path(p, saved_flags, FS_IMMUTABLE_FL, NULL);
+ else
+ q = chattr_fd(fd, saved_flags, FS_IMMUTABLE_FL, NULL);
+ if (q < 0)
+ log_debug_errno(q, "Failed to restore FS_IMMUTABLE_FL on '%s', ignoring: %m", p);
+ }
+
+ return r;
+}
+
+int efi_set_variable_string(sd_id128_t vendor, const char *name, const char *v) {
+ _cleanup_free_ char16_t *u16 = NULL;
+
+ u16 = utf8_to_utf16(v, strlen(v));
+ if (!u16)
+ return -ENOMEM;
+
+ return efi_set_variable(vendor, name, u16, (char16_strlen(u16) + 1) * sizeof(char16_t));
+}
+
+bool is_efi_boot(void) {
+ static int cache = -1;
+
+ if (cache < 0) {
+ if (detect_container() > 0)
+ cache = false;
+ else
+ cache = access("/sys/firmware/efi/", F_OK) >= 0;
+ }
+
+ return cache;
+}
+
+static int read_flag(const char *varname) {
+ _cleanup_free_ void *v = NULL;
+ uint8_t b;
+ size_t s;
+ int r;
+
+ if (!is_efi_boot()) /* If this is not an EFI boot, assume the queried flags are zero */
+ return 0;
+
+ r = efi_get_variable(EFI_VENDOR_GLOBAL, varname, NULL, &v, &s);
+ if (r < 0)
+ return r;
+
+ if (s != 1)
+ return -EINVAL;
+
+ b = *(uint8_t *)v;
+ return !!b;
+}
+
+bool is_efi_secure_boot(void) {
+ static int cache = -1;
+
+ if (cache < 0)
+ cache = read_flag("SecureBoot");
+
+ return cache > 0;
+}
+
+bool is_efi_secure_boot_setup_mode(void) {
+ static int cache = -1;
+
+ if (cache < 0)
+ cache = read_flag("SetupMode");
+
+ return cache > 0;
+}
+
+int cache_efi_options_variable(void) {
+ _cleanup_free_ char *line = NULL, *cachepath = NULL;
+ int r;
+
+ /* In SecureBoot mode this is probably not what you want. As your cmdline is cryptographically signed
+ * like when using Type #2 EFI Unified Kernel Images (https://systemd.io/BOOT_LOADER_SPECIFICATION/)
+ * The user's intention is then that the cmdline should not be modified. You want to make sure that
+ * the system starts up as exactly specified in the signed artifact.
+ *
+ * (NB: For testing purposes, we still check the $SYSTEMD_EFI_OPTIONS env var before accessing this
+ * cache, even when in SecureBoot mode.) */
+ if (is_efi_secure_boot()) {
+ _cleanup_free_ char *k;
+
+ k = efi_variable_path(EFI_VENDOR_SYSTEMD, "SystemdOptions");
+ if (!k)
+ return -ENOMEM;
+
+ /* Let's be helpful with the returned error and check if the variable exists at all. If it
+ * does, let's return a recognizable error (EPERM), and if not ENODATA. */
+
+ if (access(k, F_OK) < 0)
+ return errno == ENOENT ? -ENODATA : -errno;
+
+ return -EPERM;
+ }
+
+ r = efi_get_variable_string(EFI_VENDOR_SYSTEMD, "SystemdOptions", &line);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+
+ cachepath = efi_variable_cache_path(EFI_VENDOR_SYSTEMD, "SystemdOptions");
+ if (!cachepath)
+ return -ENOMEM;
+
+ return write_string_file(cachepath, line, WRITE_STRING_FILE_ATOMIC|WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_MKDIR_0755);
+}
+
+int systemd_efi_options_variable(char **line) {
+ const char *e;
+ _cleanup_free_ char *cachepath = NULL;
+ int r;
+
+ assert(line);
+
+ /* For testing purposes it is sometimes useful to be able to override this */
+ e = secure_getenv("SYSTEMD_EFI_OPTIONS");
+ if (e) {
+ char *m;
+
+ m = strdup(e);
+ if (!m)
+ return -ENOMEM;
+
+ *line = m;
+ return 0;
+ }
+
+ cachepath = efi_variable_cache_path(EFI_VENDOR_SYSTEMD, "SystemdOptions");
+ if (!cachepath)
+ return -ENOMEM;
+
+ r = read_one_line_file(cachepath, line);
+ if (r == -ENOENT)
+ return -ENODATA;
+ return r;
+}
+#endif
diff --git a/src/basic/efivars.h b/src/basic/efivars.h
new file mode 100644
index 0000000..d310dde
--- /dev/null
+++ b/src/basic/efivars.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#if !ENABLE_EFI
+# include <errno.h>
+#endif
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "sd-id128.h"
+
+#include "efi/loader-features.h"
+#include "time-util.h"
+
+#define EFI_VENDOR_LOADER SD_ID128_MAKE(4a,67,b0,82,0a,4c,41,cf,b6,c7,44,0b,29,bb,8c,4f)
+#define EFI_VENDOR_GLOBAL SD_ID128_MAKE(8b,e4,df,61,93,ca,11,d2,aa,0d,00,e0,98,03,2b,8c)
+#define EFI_VENDOR_SYSTEMD SD_ID128_MAKE(8c,f2,64,4b,4b,0b,42,8f,93,87,6d,87,60,50,dc,67)
+#define EFI_VARIABLE_NON_VOLATILE 0x0000000000000001
+#define EFI_VARIABLE_BOOTSERVICE_ACCESS 0x0000000000000002
+#define EFI_VARIABLE_RUNTIME_ACCESS 0x0000000000000004
+
+#if ENABLE_EFI
+
+char* efi_variable_path(sd_id128_t vendor, const char *name);
+int efi_get_variable(sd_id128_t vendor, const char *name, uint32_t *attribute, void **value, size_t *size);
+int efi_get_variable_string(sd_id128_t vendor, const char *name, char **p);
+int efi_set_variable(sd_id128_t vendor, const char *name, const void *value, size_t size);
+int efi_set_variable_string(sd_id128_t vendor, const char *name, const char *p);
+
+bool is_efi_boot(void);
+bool is_efi_secure_boot(void);
+bool is_efi_secure_boot_setup_mode(void);
+
+int cache_efi_options_variable(void);
+int systemd_efi_options_variable(char **line);
+
+#else
+
+static inline char* efi_variable_path(sd_id128_t vendor, const char *name) {
+ return NULL;
+}
+
+static inline int efi_get_variable(sd_id128_t vendor, const char *name, uint32_t *attribute, void **value, size_t *size) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_variable_string(sd_id128_t vendor, const char *name, char **p) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_set_variable(sd_id128_t vendor, const char *name, const void *value, size_t size) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_set_variable_string(sd_id128_t vendor, const char *name, const char *p) {
+ return -EOPNOTSUPP;
+}
+
+static inline bool is_efi_boot(void) {
+ return false;
+}
+
+static inline bool is_efi_secure_boot(void) {
+ return false;
+}
+
+static inline bool is_efi_secure_boot_setup_mode(void) {
+ return false;
+}
+
+static inline int cache_efi_options_variable(void) {
+ return -EOPNOTSUPP;
+}
+
+static inline int systemd_efi_options_variable(char **line) {
+ return -ENODATA;
+}
+
+#endif
diff --git a/src/basic/env-file.c b/src/basic/env-file.c
new file mode 100644
index 0000000..99c3e3f
--- /dev/null
+++ b/src/basic/env-file.c
@@ -0,0 +1,567 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "utf8.h"
+
+static int parse_env_file_internal(
+ FILE *f,
+ const char *fname,
+ int (*push) (const char *filename, unsigned line,
+ const char *key, char *value, void *userdata, int *n_pushed),
+ void *userdata,
+ int *n_pushed) {
+
+ size_t key_alloc = 0, n_key = 0, value_alloc = 0, n_value = 0, last_value_whitespace = (size_t) -1, last_key_whitespace = (size_t) -1;
+ _cleanup_free_ char *contents = NULL, *key = NULL, *value = NULL;
+ unsigned line = 1;
+ char *p;
+ int r;
+
+ enum {
+ PRE_KEY,
+ KEY,
+ PRE_VALUE,
+ VALUE,
+ VALUE_ESCAPE,
+ SINGLE_QUOTE_VALUE,
+ DOUBLE_QUOTE_VALUE,
+ DOUBLE_QUOTE_VALUE_ESCAPE,
+ COMMENT,
+ COMMENT_ESCAPE
+ } state = PRE_KEY;
+
+ if (f)
+ r = read_full_stream(f, &contents, NULL);
+ else
+ r = read_full_file(fname, &contents, NULL);
+ if (r < 0)
+ return r;
+
+ for (p = contents; *p; p++) {
+ char c = *p;
+
+ switch (state) {
+
+ case PRE_KEY:
+ if (strchr(COMMENTS, c))
+ state = COMMENT;
+ else if (!strchr(WHITESPACE, c)) {
+ state = KEY;
+ last_key_whitespace = (size_t) -1;
+
+ if (!GREEDY_REALLOC(key, key_alloc, n_key+2))
+ return -ENOMEM;
+
+ key[n_key++] = c;
+ }
+ break;
+
+ case KEY:
+ if (strchr(NEWLINE, c)) {
+ state = PRE_KEY;
+ line++;
+ n_key = 0;
+ } else if (c == '=') {
+ state = PRE_VALUE;
+ last_value_whitespace = (size_t) -1;
+ } else {
+ if (!strchr(WHITESPACE, c))
+ last_key_whitespace = (size_t) -1;
+ else if (last_key_whitespace == (size_t) -1)
+ last_key_whitespace = n_key;
+
+ if (!GREEDY_REALLOC(key, key_alloc, n_key+2))
+ return -ENOMEM;
+
+ key[n_key++] = c;
+ }
+
+ break;
+
+ case PRE_VALUE:
+ if (strchr(NEWLINE, c)) {
+ state = PRE_KEY;
+ line++;
+ key[n_key] = 0;
+
+ if (value)
+ value[n_value] = 0;
+
+ /* strip trailing whitespace from key */
+ if (last_key_whitespace != (size_t) -1)
+ key[last_key_whitespace] = 0;
+
+ r = push(fname, line, key, value, userdata, n_pushed);
+ if (r < 0)
+ return r;
+
+ n_key = 0;
+ value = NULL;
+ value_alloc = n_value = 0;
+
+ } else if (c == '\'')
+ state = SINGLE_QUOTE_VALUE;
+ else if (c == '"')
+ state = DOUBLE_QUOTE_VALUE;
+ else if (c == '\\')
+ state = VALUE_ESCAPE;
+ else if (!strchr(WHITESPACE, c)) {
+ state = VALUE;
+
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+2))
+ return -ENOMEM;
+
+ value[n_value++] = c;
+ }
+
+ break;
+
+ case VALUE:
+ if (strchr(NEWLINE, c)) {
+ state = PRE_KEY;
+ line++;
+
+ key[n_key] = 0;
+
+ if (value)
+ value[n_value] = 0;
+
+ /* Chomp off trailing whitespace from value */
+ if (last_value_whitespace != (size_t) -1)
+ value[last_value_whitespace] = 0;
+
+ /* strip trailing whitespace from key */
+ if (last_key_whitespace != (size_t) -1)
+ key[last_key_whitespace] = 0;
+
+ r = push(fname, line, key, value, userdata, n_pushed);
+ if (r < 0)
+ return r;
+
+ n_key = 0;
+ value = NULL;
+ value_alloc = n_value = 0;
+
+ } else if (c == '\\') {
+ state = VALUE_ESCAPE;
+ last_value_whitespace = (size_t) -1;
+ } else {
+ if (!strchr(WHITESPACE, c))
+ last_value_whitespace = (size_t) -1;
+ else if (last_value_whitespace == (size_t) -1)
+ last_value_whitespace = n_value;
+
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+2))
+ return -ENOMEM;
+
+ value[n_value++] = c;
+ }
+
+ break;
+
+ case VALUE_ESCAPE:
+ state = VALUE;
+
+ if (!strchr(NEWLINE, c)) {
+ /* Escaped newlines we eat up entirely */
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+2))
+ return -ENOMEM;
+
+ value[n_value++] = c;
+ }
+ break;
+
+ case SINGLE_QUOTE_VALUE:
+ if (c == '\'')
+ state = PRE_VALUE;
+ else {
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+2))
+ return -ENOMEM;
+
+ value[n_value++] = c;
+ }
+
+ break;
+
+ case DOUBLE_QUOTE_VALUE:
+ if (c == '"')
+ state = PRE_VALUE;
+ else if (c == '\\')
+ state = DOUBLE_QUOTE_VALUE_ESCAPE;
+ else {
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+2))
+ return -ENOMEM;
+
+ value[n_value++] = c;
+ }
+
+ break;
+
+ case DOUBLE_QUOTE_VALUE_ESCAPE:
+ state = DOUBLE_QUOTE_VALUE;
+
+ if (strchr(SHELL_NEED_ESCAPE, c)) {
+ /* If this is a char that needs escaping, just unescape it. */
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+2))
+ return -ENOMEM;
+ value[n_value++] = c;
+ } else if (c != '\n') {
+ /* If other char than what needs escaping, keep the "\" in place, like the
+ * real shell does. */
+ if (!GREEDY_REALLOC(value, value_alloc, n_value+3))
+ return -ENOMEM;
+ value[n_value++] = '\\';
+ value[n_value++] = c;
+ }
+
+ /* Escaped newlines (aka "continuation lines") are eaten up entirely */
+ break;
+
+ case COMMENT:
+ if (c == '\\')
+ state = COMMENT_ESCAPE;
+ else if (strchr(NEWLINE, c)) {
+ state = PRE_KEY;
+ line++;
+ }
+ break;
+
+ case COMMENT_ESCAPE:
+ state = COMMENT;
+ break;
+ }
+ }
+
+ if (IN_SET(state,
+ PRE_VALUE,
+ VALUE,
+ VALUE_ESCAPE,
+ SINGLE_QUOTE_VALUE,
+ DOUBLE_QUOTE_VALUE,
+ DOUBLE_QUOTE_VALUE_ESCAPE)) {
+
+ key[n_key] = 0;
+
+ if (value)
+ value[n_value] = 0;
+
+ if (state == VALUE)
+ if (last_value_whitespace != (size_t) -1)
+ value[last_value_whitespace] = 0;
+
+ /* strip trailing whitespace from key */
+ if (last_key_whitespace != (size_t) -1)
+ key[last_key_whitespace] = 0;
+
+ r = push(fname, line, key, value, userdata, n_pushed);
+ if (r < 0)
+ return r;
+
+ value = NULL;
+ }
+
+ return 0;
+}
+
+static int check_utf8ness_and_warn(
+ const char *filename, unsigned line,
+ const char *key, char *value) {
+
+ if (!utf8_is_valid(key)) {
+ _cleanup_free_ char *p = NULL;
+
+ p = utf8_escape_invalid(key);
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s:%u: invalid UTF-8 in key '%s', ignoring.",
+ strna(filename), line, p);
+ }
+
+ if (value && !utf8_is_valid(value)) {
+ _cleanup_free_ char *p = NULL;
+
+ p = utf8_escape_invalid(value);
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s:%u: invalid UTF-8 value for key %s: '%s', ignoring.",
+ strna(filename), line, key, p);
+ }
+
+ return 0;
+}
+
+static int parse_env_file_push(
+ const char *filename, unsigned line,
+ const char *key, char *value,
+ void *userdata,
+ int *n_pushed) {
+
+ const char *k;
+ va_list aq, *ap = userdata;
+ int r;
+
+ r = check_utf8ness_and_warn(filename, line, key, value);
+ if (r < 0)
+ return r;
+
+ va_copy(aq, *ap);
+
+ while ((k = va_arg(aq, const char *))) {
+ char **v;
+
+ v = va_arg(aq, char **);
+
+ if (streq(key, k)) {
+ va_end(aq);
+ free(*v);
+ *v = value;
+
+ if (n_pushed)
+ (*n_pushed)++;
+
+ return 1;
+ }
+ }
+
+ va_end(aq);
+ free(value);
+
+ return 0;
+}
+
+int parse_env_filev(
+ FILE *f,
+ const char *fname,
+ va_list ap) {
+
+ int r, n_pushed = 0;
+ va_list aq;
+
+ va_copy(aq, ap);
+ r = parse_env_file_internal(f, fname, parse_env_file_push, &aq, &n_pushed);
+ va_end(aq);
+ if (r < 0)
+ return r;
+
+ return n_pushed;
+}
+
+int parse_env_file_sentinel(
+ FILE *f,
+ const char *fname,
+ ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, fname);
+ r = parse_env_filev(f, fname, ap);
+ va_end(ap);
+
+ return r;
+}
+
+static int load_env_file_push(
+ const char *filename, unsigned line,
+ const char *key, char *value,
+ void *userdata,
+ int *n_pushed) {
+ char ***m = userdata;
+ char *p;
+ int r;
+
+ r = check_utf8ness_and_warn(filename, line, key, value);
+ if (r < 0)
+ return r;
+
+ p = strjoin(key, "=", value);
+ if (!p)
+ return -ENOMEM;
+
+ r = strv_env_replace(m, p);
+ if (r < 0) {
+ free(p);
+ return r;
+ }
+
+ if (n_pushed)
+ (*n_pushed)++;
+
+ free(value);
+ return 0;
+}
+
+int load_env_file(FILE *f, const char *fname, char ***rl) {
+ char **m = NULL;
+ int r;
+
+ r = parse_env_file_internal(f, fname, load_env_file_push, &m, NULL);
+ if (r < 0) {
+ strv_free(m);
+ return r;
+ }
+
+ *rl = m;
+ return 0;
+}
+
+static int load_env_file_push_pairs(
+ const char *filename, unsigned line,
+ const char *key, char *value,
+ void *userdata,
+ int *n_pushed) {
+ char ***m = userdata;
+ int r;
+
+ r = check_utf8ness_and_warn(filename, line, key, value);
+ if (r < 0)
+ return r;
+
+ r = strv_extend(m, key);
+ if (r < 0)
+ return -ENOMEM;
+
+ if (!value) {
+ r = strv_extend(m, "");
+ if (r < 0)
+ return -ENOMEM;
+ } else {
+ r = strv_push(m, value);
+ if (r < 0)
+ return r;
+ }
+
+ if (n_pushed)
+ (*n_pushed)++;
+
+ return 0;
+}
+
+int load_env_file_pairs(FILE *f, const char *fname, char ***rl) {
+ char **m = NULL;
+ int r;
+
+ r = parse_env_file_internal(f, fname, load_env_file_push_pairs, &m, NULL);
+ if (r < 0) {
+ strv_free(m);
+ return r;
+ }
+
+ *rl = m;
+ return 0;
+}
+
+static int merge_env_file_push(
+ const char *filename, unsigned line,
+ const char *key, char *value,
+ void *userdata,
+ int *n_pushed) {
+
+ char ***env = userdata;
+ char *expanded_value;
+
+ assert(env);
+
+ if (!value) {
+ log_error("%s:%u: invalid syntax (around \"%s\"), ignoring.", strna(filename), line, key);
+ return 0;
+ }
+
+ if (!env_name_is_valid(key)) {
+ log_error("%s:%u: invalid variable name \"%s\", ignoring.", strna(filename), line, key);
+ free(value);
+ return 0;
+ }
+
+ expanded_value = replace_env(value, *env,
+ REPLACE_ENV_USE_ENVIRONMENT|
+ REPLACE_ENV_ALLOW_BRACELESS|
+ REPLACE_ENV_ALLOW_EXTENDED);
+ if (!expanded_value)
+ return -ENOMEM;
+
+ free_and_replace(value, expanded_value);
+
+ log_debug("%s:%u: setting %s=%s", filename, line, key, value);
+
+ return load_env_file_push(filename, line, key, value, env, n_pushed);
+}
+
+int merge_env_file(
+ char ***env,
+ FILE *f,
+ const char *fname) {
+
+ /* NOTE: this function supports braceful and braceless variable expansions,
+ * plus "extended" substitutions, unlike other exported parsing functions.
+ */
+
+ return parse_env_file_internal(f, fname, merge_env_file_push, env, NULL);
+}
+
+static void write_env_var(FILE *f, const char *v) {
+ const char *p;
+
+ p = strchr(v, '=');
+ if (!p) {
+ /* Fallback */
+ fputs_unlocked(v, f);
+ fputc_unlocked('\n', f);
+ return;
+ }
+
+ p++;
+ fwrite_unlocked(v, 1, p-v, f);
+
+ if (string_has_cc(p, NULL) || chars_intersect(p, WHITESPACE SHELL_NEED_QUOTES)) {
+ fputc_unlocked('"', f);
+
+ for (; *p; p++) {
+ if (strchr(SHELL_NEED_ESCAPE, *p))
+ fputc_unlocked('\\', f);
+
+ fputc_unlocked(*p, f);
+ }
+
+ fputc_unlocked('"', f);
+ } else
+ fputs_unlocked(p, f);
+
+ fputc_unlocked('\n', f);
+}
+
+int write_env_file(const char *fname, char **l) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ char **i;
+ int r;
+
+ assert(fname);
+
+ r = fopen_temporary(fname, &f, &p);
+ if (r < 0)
+ return r;
+
+ (void) fchmod_umask(fileno(f), 0644);
+
+ STRV_FOREACH(i, l)
+ write_env_var(f, *i);
+
+ r = fflush_and_check(f);
+ if (r >= 0) {
+ if (rename(p, fname) >= 0)
+ return 0;
+
+ r = -errno;
+ }
+
+ (void) unlink(p);
+ return r;
+}
diff --git a/src/basic/env-file.h b/src/basic/env-file.h
new file mode 100644
index 0000000..de47588
--- /dev/null
+++ b/src/basic/env-file.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "macro.h"
+
+int parse_env_filev(FILE *f, const char *fname, va_list ap);
+int parse_env_file_sentinel(FILE *f, const char *fname, ...) _sentinel_;
+#define parse_env_file(f, fname, ...) parse_env_file_sentinel(f, fname, __VA_ARGS__, NULL)
+int load_env_file(FILE *f, const char *fname, char ***l);
+int load_env_file_pairs(FILE *f, const char *fname, char ***l);
+
+int merge_env_file(char ***env, FILE *f, const char *fname);
+
+int write_env_file(const char *fname, char **l);
diff --git a/src/basic/env-util.c b/src/basic/env-util.c
new file mode 100644
index 0000000..b2483af
--- /dev/null
+++ b/src/basic/env-util.c
@@ -0,0 +1,756 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "env-util.h"
+#include "escape.h"
+#include "extract-word.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+
+/* We follow bash for the character set. Different shells have different rules. */
+#define VALID_BASH_ENV_NAME_CHARS \
+ DIGITS LETTERS \
+ "_"
+
+static bool env_name_is_valid_n(const char *e, size_t n) {
+ const char *p;
+
+ if (!e)
+ return false;
+
+ if (n <= 0)
+ return false;
+
+ if (e[0] >= '0' && e[0] <= '9')
+ return false;
+
+ /* POSIX says the overall size of the environment block cannot
+ * be > ARG_MAX, an individual assignment hence cannot be
+ * either. Discounting the equal sign and trailing NUL this
+ * hence leaves ARG_MAX-2 as longest possible variable
+ * name. */
+ if (n > (size_t) sysconf(_SC_ARG_MAX) - 2)
+ return false;
+
+ for (p = e; p < e + n; p++)
+ if (!strchr(VALID_BASH_ENV_NAME_CHARS, *p))
+ return false;
+
+ return true;
+}
+
+bool env_name_is_valid(const char *e) {
+ return env_name_is_valid_n(e, strlen_ptr(e));
+}
+
+bool env_value_is_valid(const char *e) {
+ if (!e)
+ return false;
+
+ if (!utf8_is_valid(e))
+ return false;
+
+ /* Note that variable *values* may contain control characters, in particular NL, TAB, BS, DEL, ESC…
+ * When printing those variables with show-environment, we'll escape them. Make sure to print
+ * environment variables carefully! */
+
+ /* POSIX says the overall size of the environment block cannot be > ARG_MAX, an individual assignment
+ * hence cannot be either. Discounting the shortest possible variable name of length 1, the equal
+ * sign and trailing NUL this hence leaves ARG_MAX-3 as longest possible variable value. */
+ if (strlen(e) > sc_arg_max() - 3)
+ return false;
+
+ return true;
+}
+
+bool env_assignment_is_valid(const char *e) {
+ const char *eq;
+
+ eq = strchr(e, '=');
+ if (!eq)
+ return false;
+
+ if (!env_name_is_valid_n(e, eq - e))
+ return false;
+
+ if (!env_value_is_valid(eq + 1))
+ return false;
+
+ /* POSIX says the overall size of the environment block cannot be > ARG_MAX, hence the individual
+ * variable assignments cannot be either, but let's leave room for one trailing NUL byte. */
+ if (strlen(e) > sc_arg_max() - 1)
+ return false;
+
+ return true;
+}
+
+bool strv_env_is_valid(char **e) {
+ char **p, **q;
+
+ STRV_FOREACH(p, e) {
+ size_t k;
+
+ if (!env_assignment_is_valid(*p))
+ return false;
+
+ /* Check if there are duplicate assignments */
+ k = strcspn(*p, "=");
+ STRV_FOREACH(q, p + 1)
+ if (strneq(*p, *q, k) && (*q)[k] == '=')
+ return false;
+ }
+
+ return true;
+}
+
+bool strv_env_name_is_valid(char **l) {
+ char **p;
+
+ STRV_FOREACH(p, l) {
+ if (!env_name_is_valid(*p))
+ return false;
+
+ if (strv_contains(p + 1, *p))
+ return false;
+ }
+
+ return true;
+}
+
+bool strv_env_name_or_assignment_is_valid(char **l) {
+ char **p;
+
+ STRV_FOREACH(p, l) {
+ if (!env_assignment_is_valid(*p) && !env_name_is_valid(*p))
+ return false;
+
+ if (strv_contains(p + 1, *p))
+ return false;
+ }
+
+ return true;
+}
+
+static int env_append(char **r, char ***k, char **a) {
+ assert(r);
+ assert(k);
+ assert(*k >= r);
+
+ if (!a)
+ return 0;
+
+ /* Expects the following arguments: 'r' shall point to the beginning of an strv we are going to append to, 'k'
+ * to a pointer pointing to the NULL entry at the end of the same array. 'a' shall point to another strv.
+ *
+ * This call adds every entry of 'a' to 'r', either overriding an existing matching entry, or appending to it.
+ *
+ * This call assumes 'r' has enough pre-allocated space to grow by all of 'a''s items. */
+
+ for (; *a; a++) {
+ char **j, *c;
+ size_t n;
+
+ n = strcspn(*a, "=");
+ if ((*a)[n] == '=')
+ n++;
+
+ for (j = r; j < *k; j++)
+ if (strneq(*j, *a, n))
+ break;
+
+ c = strdup(*a);
+ if (!c)
+ return -ENOMEM;
+
+ if (j >= *k) { /* Append to the end? */
+ (*k)[0] = c;
+ (*k)[1] = NULL;
+ (*k)++;
+ } else
+ free_and_replace(*j, c); /* Override existing item */
+ }
+
+ return 0;
+}
+
+char **strv_env_merge(size_t n_lists, ...) {
+ _cleanup_strv_free_ char **ret = NULL;
+ size_t n = 0, i;
+ char **l, **k;
+ va_list ap;
+
+ /* Merges an arbitrary number of environment sets */
+
+ va_start(ap, n_lists);
+ for (i = 0; i < n_lists; i++) {
+ l = va_arg(ap, char**);
+ n += strv_length(l);
+ }
+ va_end(ap);
+
+ ret = new(char*, n+1);
+ if (!ret)
+ return NULL;
+
+ *ret = NULL;
+ k = ret;
+
+ va_start(ap, n_lists);
+ for (i = 0; i < n_lists; i++) {
+ l = va_arg(ap, char**);
+ if (env_append(ret, &k, l) < 0) {
+ va_end(ap);
+ return NULL;
+ }
+ }
+ va_end(ap);
+
+ return TAKE_PTR(ret);
+}
+
+static bool env_match(const char *t, const char *pattern) {
+ assert(t);
+ assert(pattern);
+
+ /* pattern a matches string a
+ * a matches a=
+ * a matches a=b
+ * a= matches a=
+ * a=b matches a=b
+ * a= does not match a
+ * a=b does not match a=
+ * a=b does not match a
+ * a=b does not match a=c */
+
+ if (streq(t, pattern))
+ return true;
+
+ if (!strchr(pattern, '=')) {
+ size_t l = strlen(pattern);
+
+ return strneq(t, pattern, l) && t[l] == '=';
+ }
+
+ return false;
+}
+
+static bool env_entry_has_name(const char *entry, const char *name) {
+ const char *t;
+
+ assert(entry);
+ assert(name);
+
+ t = startswith(entry, name);
+ if (!t)
+ return false;
+
+ return *t == '=';
+}
+
+char **strv_env_delete(char **x, size_t n_lists, ...) {
+ size_t n, i = 0;
+ char **k, **r;
+ va_list ap;
+
+ /* Deletes every entry from x that is mentioned in the other
+ * string lists */
+
+ n = strv_length(x);
+
+ r = new(char*, n+1);
+ if (!r)
+ return NULL;
+
+ STRV_FOREACH(k, x) {
+ size_t v;
+
+ va_start(ap, n_lists);
+ for (v = 0; v < n_lists; v++) {
+ char **l, **j;
+
+ l = va_arg(ap, char**);
+ STRV_FOREACH(j, l)
+ if (env_match(*k, *j))
+ goto skip;
+ }
+ va_end(ap);
+
+ r[i] = strdup(*k);
+ if (!r[i]) {
+ strv_free(r);
+ return NULL;
+ }
+
+ i++;
+ continue;
+
+ skip:
+ va_end(ap);
+ }
+
+ r[i] = NULL;
+
+ assert(i <= n);
+
+ return r;
+}
+
+char **strv_env_unset(char **l, const char *p) {
+
+ char **f, **t;
+
+ if (!l)
+ return NULL;
+
+ assert(p);
+
+ /* Drops every occurrence of the env var setting p in the
+ * string list. Edits in-place. */
+
+ for (f = t = l; *f; f++) {
+
+ if (env_match(*f, p)) {
+ free(*f);
+ continue;
+ }
+
+ *(t++) = *f;
+ }
+
+ *t = NULL;
+ return l;
+}
+
+char **strv_env_unset_many(char **l, ...) {
+ char **f, **t;
+
+ if (!l)
+ return NULL;
+
+ /* Like strv_env_unset() but applies many at once. Edits in-place. */
+
+ for (f = t = l; *f; f++) {
+ bool found = false;
+ const char *p;
+ va_list ap;
+
+ va_start(ap, l);
+
+ while ((p = va_arg(ap, const char*))) {
+ if (env_match(*f, p)) {
+ found = true;
+ break;
+ }
+ }
+
+ va_end(ap);
+
+ if (found) {
+ free(*f);
+ continue;
+ }
+
+ *(t++) = *f;
+ }
+
+ *t = NULL;
+ return l;
+}
+
+int strv_env_replace(char ***l, char *p) {
+ const char *t, *name;
+ char **f;
+ int r;
+
+ assert(p);
+
+ /* Replace first occurrence of the env var or add a new one in the string list. Drop other occurrences. Edits
+ * in-place. Does not copy p. p must be a valid key=value assignment.
+ */
+
+ t = strchr(p, '=');
+ if (!t)
+ return -EINVAL;
+
+ name = strndupa(p, t - p);
+
+ STRV_FOREACH(f, *l)
+ if (env_entry_has_name(*f, name)) {
+ free_and_replace(*f, p);
+ strv_env_unset(f + 1, *f);
+ return 0;
+ }
+
+ /* We didn't find a match, we need to append p or create a new strv */
+ r = strv_push(l, p);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+char **strv_env_set(char **x, const char *p) {
+ _cleanup_strv_free_ char **ret = NULL;
+ size_t n, m;
+ char **k;
+
+ /* Overrides the env var setting of p, returns a new copy */
+
+ n = strv_length(x);
+ m = n + 2;
+ if (m < n) /* overflow? */
+ return NULL;
+
+ ret = new(char*, m);
+ if (!ret)
+ return NULL;
+
+ *ret = NULL;
+ k = ret;
+
+ if (env_append(ret, &k, x) < 0)
+ return NULL;
+
+ if (env_append(ret, &k, STRV_MAKE(p)) < 0)
+ return NULL;
+
+ return TAKE_PTR(ret);
+}
+
+char *strv_env_get_n(char **l, const char *name, size_t k, unsigned flags) {
+ char **i;
+
+ assert(name);
+
+ if (k <= 0)
+ return NULL;
+
+ STRV_FOREACH_BACKWARDS(i, l)
+ if (strneq(*i, name, k) &&
+ (*i)[k] == '=')
+ return *i + k + 1;
+
+ if (flags & REPLACE_ENV_USE_ENVIRONMENT) {
+ const char *t;
+
+ t = strndupa(name, k);
+ return getenv(t);
+ };
+
+ return NULL;
+}
+
+char *strv_env_get(char **l, const char *name) {
+ assert(name);
+
+ return strv_env_get_n(l, name, strlen(name), 0);
+}
+
+char **strv_env_clean_with_callback(char **e, void (*invalid_callback)(const char *p, void *userdata), void *userdata) {
+ char **p, **q;
+ int k = 0;
+
+ STRV_FOREACH(p, e) {
+ size_t n;
+ bool duplicate = false;
+
+ if (!env_assignment_is_valid(*p)) {
+ if (invalid_callback)
+ invalid_callback(*p, userdata);
+ free(*p);
+ continue;
+ }
+
+ n = strcspn(*p, "=");
+ STRV_FOREACH(q, p + 1)
+ if (strneq(*p, *q, n) && (*q)[n] == '=') {
+ duplicate = true;
+ break;
+ }
+
+ if (duplicate) {
+ free(*p);
+ continue;
+ }
+
+ e[k++] = *p;
+ }
+
+ if (e)
+ e[k] = NULL;
+
+ return e;
+}
+
+char *replace_env_n(const char *format, size_t n, char **env, unsigned flags) {
+ enum {
+ WORD,
+ CURLY,
+ VARIABLE,
+ VARIABLE_RAW,
+ TEST,
+ DEFAULT_VALUE,
+ ALTERNATE_VALUE,
+ } state = WORD;
+
+ const char *e, *word = format, *test_value;
+ char *k;
+ _cleanup_free_ char *r = NULL;
+ size_t i, len;
+ int nest = 0;
+
+ assert(format);
+
+ for (e = format, i = 0; *e && i < n; e ++, i ++)
+ switch (state) {
+
+ case WORD:
+ if (*e == '$')
+ state = CURLY;
+ break;
+
+ case CURLY:
+ if (*e == '{') {
+ k = strnappend(r, word, e-word-1);
+ if (!k)
+ return NULL;
+
+ free_and_replace(r, k);
+
+ word = e-1;
+ state = VARIABLE;
+ nest++;
+ } else if (*e == '$') {
+ k = strnappend(r, word, e-word);
+ if (!k)
+ return NULL;
+
+ free_and_replace(r, k);
+
+ word = e+1;
+ state = WORD;
+
+ } else if (flags & REPLACE_ENV_ALLOW_BRACELESS && strchr(VALID_BASH_ENV_NAME_CHARS, *e)) {
+ k = strnappend(r, word, e-word-1);
+ if (!k)
+ return NULL;
+
+ free_and_replace(r, k);
+
+ word = e-1;
+ state = VARIABLE_RAW;
+
+ } else
+ state = WORD;
+ break;
+
+ case VARIABLE:
+ if (*e == '}') {
+ const char *t;
+
+ t = strv_env_get_n(env, word+2, e-word-2, flags);
+
+ k = strjoin(r, t);
+ if (!k)
+ return NULL;
+
+ free_and_replace(r, k);
+
+ word = e+1;
+ state = WORD;
+ } else if (*e == ':') {
+ if (!(flags & REPLACE_ENV_ALLOW_EXTENDED))
+ /* Treat this as unsupported syntax, i.e. do no replacement */
+ state = WORD;
+ else {
+ len = e-word-2;
+ state = TEST;
+ }
+ }
+ break;
+
+ case TEST:
+ if (*e == '-')
+ state = DEFAULT_VALUE;
+ else if (*e == '+')
+ state = ALTERNATE_VALUE;
+ else {
+ state = WORD;
+ break;
+ }
+
+ test_value = e+1;
+ break;
+
+ case DEFAULT_VALUE: /* fall through */
+ case ALTERNATE_VALUE:
+ assert(flags & REPLACE_ENV_ALLOW_EXTENDED);
+
+ if (*e == '{') {
+ nest++;
+ break;
+ }
+
+ if (*e != '}')
+ break;
+
+ nest--;
+ if (nest == 0) {
+ const char *t;
+ _cleanup_free_ char *v = NULL;
+
+ t = strv_env_get_n(env, word+2, len, flags);
+
+ if (t && state == ALTERNATE_VALUE)
+ t = v = replace_env_n(test_value, e-test_value, env, flags);
+ else if (!t && state == DEFAULT_VALUE)
+ t = v = replace_env_n(test_value, e-test_value, env, flags);
+
+ k = strjoin(r, t);
+ if (!k)
+ return NULL;
+
+ free_and_replace(r, k);
+
+ word = e+1;
+ state = WORD;
+ }
+ break;
+
+ case VARIABLE_RAW:
+ assert(flags & REPLACE_ENV_ALLOW_BRACELESS);
+
+ if (!strchr(VALID_BASH_ENV_NAME_CHARS, *e)) {
+ const char *t;
+
+ t = strv_env_get_n(env, word+1, e-word-1, flags);
+
+ k = strjoin(r, t);
+ if (!k)
+ return NULL;
+
+ free_and_replace(r, k);
+
+ word = e--;
+ i--;
+ state = WORD;
+ }
+ break;
+ }
+
+ if (state == VARIABLE_RAW) {
+ const char *t;
+
+ assert(flags & REPLACE_ENV_ALLOW_BRACELESS);
+
+ t = strv_env_get_n(env, word+1, e-word-1, flags);
+ return strjoin(r, t);
+ } else
+ return strnappend(r, word, e-word);
+}
+
+char **replace_env_argv(char **argv, char **env) {
+ char **ret, **i;
+ size_t k = 0, l = 0;
+
+ l = strv_length(argv);
+
+ ret = new(char*, l+1);
+ if (!ret)
+ return NULL;
+
+ STRV_FOREACH(i, argv) {
+
+ /* If $FOO appears as single word, replace it by the split up variable */
+ if ((*i)[0] == '$' && !IN_SET((*i)[1], '{', '$')) {
+ char *e;
+ char **w, **m = NULL;
+ size_t q;
+
+ e = strv_env_get(env, *i+1);
+ if (e) {
+ int r;
+
+ r = strv_split_full(&m, e, WHITESPACE, EXTRACT_RELAX|EXTRACT_UNQUOTE);
+ if (r < 0) {
+ ret[k] = NULL;
+ strv_free(ret);
+ return NULL;
+ }
+ } else
+ m = NULL;
+
+ q = strv_length(m);
+ l = l + q - 1;
+
+ w = reallocarray(ret, l + 1, sizeof(char *));
+ if (!w) {
+ ret[k] = NULL;
+ strv_free(ret);
+ strv_free(m);
+ return NULL;
+ }
+
+ ret = w;
+ if (m) {
+ memcpy(ret + k, m, q * sizeof(char*));
+ free(m);
+ }
+
+ k += q;
+ continue;
+ }
+
+ /* If ${FOO} appears as part of a word, replace it by the variable as-is */
+ ret[k] = replace_env(*i, env, 0);
+ if (!ret[k]) {
+ strv_free(ret);
+ return NULL;
+ }
+ k++;
+ }
+
+ ret[k] = NULL;
+ return ret;
+}
+
+int getenv_bool(const char *p) {
+ const char *e;
+
+ e = getenv(p);
+ if (!e)
+ return -ENXIO;
+
+ return parse_boolean(e);
+}
+
+int getenv_bool_secure(const char *p) {
+ const char *e;
+
+ e = secure_getenv(p);
+ if (!e)
+ return -ENXIO;
+
+ return parse_boolean(e);
+}
+
+int set_unset_env(const char *name, const char *value, bool overwrite) {
+ int r;
+
+ if (value)
+ r = setenv(name, value, overwrite);
+ else
+ r = unsetenv(name);
+ if (r < 0)
+ return -errno;
+ return 0;
+}
diff --git a/src/basic/env-util.h b/src/basic/env-util.h
new file mode 100644
index 0000000..6684b33
--- /dev/null
+++ b/src/basic/env-util.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "macro.h"
+#include "string.h"
+
+static inline size_t sc_arg_max(void) {
+ long l = sysconf(_SC_ARG_MAX);
+ assert(l > 0);
+ return (size_t) l;
+}
+
+bool env_name_is_valid(const char *e);
+bool env_value_is_valid(const char *e);
+bool env_assignment_is_valid(const char *e);
+
+enum {
+ REPLACE_ENV_USE_ENVIRONMENT = 1 << 0,
+ REPLACE_ENV_ALLOW_BRACELESS = 1 << 1,
+ REPLACE_ENV_ALLOW_EXTENDED = 1 << 2,
+};
+
+char *replace_env_n(const char *format, size_t n, char **env, unsigned flags);
+char **replace_env_argv(char **argv, char **env);
+
+static inline char *replace_env(const char *format, char **env, unsigned flags) {
+ return replace_env_n(format, strlen(format), env, flags);
+}
+
+bool strv_env_is_valid(char **e);
+#define strv_env_clean(l) strv_env_clean_with_callback(l, NULL, NULL)
+char **strv_env_clean_with_callback(char **l, void (*invalid_callback)(const char *p, void *userdata), void *userdata);
+
+bool strv_env_name_is_valid(char **l);
+bool strv_env_name_or_assignment_is_valid(char **l);
+
+char **strv_env_merge(size_t n_lists, ...);
+char **strv_env_delete(char **x, size_t n_lists, ...); /* New copy */
+
+char **strv_env_set(char **x, const char *p); /* New copy ... */
+char **strv_env_unset(char **l, const char *p); /* In place ... */
+char **strv_env_unset_many(char **l, ...) _sentinel_;
+int strv_env_replace(char ***l, char *p); /* In place ... */
+
+char *strv_env_get_n(char **l, const char *name, size_t k, unsigned flags) _pure_;
+char *strv_env_get(char **x, const char *n) _pure_;
+
+int getenv_bool(const char *p);
+int getenv_bool_secure(const char *p);
+
+/* Like setenv, but calls unsetenv if value == NULL. */
+int set_unset_env(const char *name, const char *value, bool overwrite);
diff --git a/src/basic/errno-list.c b/src/basic/errno-list.c
new file mode 100644
index 0000000..2aeb38c
--- /dev/null
+++ b/src/basic/errno-list.c
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <string.h>
+
+#include "errno-list.h"
+#include "macro.h"
+
+static const struct errno_name* lookup_errno(register const char *str,
+ register GPERF_LEN_TYPE len);
+
+#include "errno-from-name.h"
+#include "errno-to-name.h"
+
+const char *errno_to_name(int id) {
+
+ if (id < 0)
+ id = -id;
+
+ if ((size_t) id >= ELEMENTSOF(errno_names))
+ return NULL;
+
+ return errno_names[id];
+}
+
+int errno_from_name(const char *name) {
+ const struct errno_name *sc;
+
+ assert(name);
+
+ sc = lookup_errno(name, strlen(name));
+ if (!sc)
+ return -EINVAL;
+
+ assert(sc->id > 0);
+ return sc->id;
+}
diff --git a/src/basic/errno-list.h b/src/basic/errno-list.h
new file mode 100644
index 0000000..082b833
--- /dev/null
+++ b/src/basic/errno-list.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+/*
+ * MAX_ERRNO is defined as 4095 in linux/err.h
+ * We use the same value here.
+ */
+#define ERRNO_MAX 4095
+
+const char *errno_to_name(int id);
+int errno_from_name(const char *name);
+static inline bool errno_is_valid(int n) {
+ return n > 0 && n <= ERRNO_MAX;
+}
diff --git a/src/basic/errno-to-name.awk b/src/basic/errno-to-name.awk
new file mode 100644
index 0000000..0878aba
--- /dev/null
+++ b/src/basic/errno-to-name.awk
@@ -0,0 +1,9 @@
+BEGIN{
+ print "static const char* const errno_names[] = { "
+}
+!/EDEADLOCK/ && !/EWOULDBLOCK/ && !/ENOTSUP/ {
+ printf " [%s] = \"%s\",\n", $1, $1
+}
+END{
+ print "};"
+}
diff --git a/src/basic/errno-util.h b/src/basic/errno-util.h
new file mode 100644
index 0000000..5609820
--- /dev/null
+++ b/src/basic/errno-util.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "macro.h"
+
+static inline void _reset_errno_(int *saved_errno) {
+ if (*saved_errno < 0) /* Invalidated by UNPROTECT_ERRNO? */
+ return;
+
+ errno = *saved_errno;
+}
+
+#define PROTECT_ERRNO \
+ _cleanup_(_reset_errno_) _unused_ int _saved_errno_ = errno
+
+#define UNPROTECT_ERRNO \
+ do { \
+ errno = _saved_errno_; \
+ _saved_errno_ = -1; \
+ } while (false)
+
+static inline int negative_errno(void) {
+ /* This helper should be used to shut up gcc if you know 'errno' is
+ * negative. Instead of "return -errno;", use "return negative_errno();"
+ * It will suppress bogus gcc warnings in case it assumes 'errno' might
+ * be 0 and thus the caller's error-handling might not be triggered. */
+ assert_return(errno > 0, -EINVAL);
+ return -errno;
+}
+
+static inline const char *strerror_safe(int error) {
+ /* 'safe' here does NOT mean thread safety. */
+ return strerror(abs(error));
+}
+
+static inline int errno_or_else(int fallback) {
+ /* To be used when invoking library calls where errno handling is not defined clearly: we return
+ * errno if it is set, and the specified error otherwise. The idea is that the caller initializes
+ * errno to zero before doing an API call, and then uses this helper to retrieve a somewhat useful
+ * error code */
+ if (errno > 0)
+ return -errno;
+
+ return -abs(fallback);
+}
+
+/* Hint #1: ENETUNREACH happens if we try to connect to "non-existing" special IP addresses, such as ::5.
+ *
+ * Hint #2: The kernel sends e.g., EHOSTUNREACH or ENONET to userspace in some ICMP error cases. See the
+ * icmp_err_convert[] in net/ipv4/icmp.c in the kernel sources.
+ *
+ * Hint #3: When asynchronous connect() on TCP fails because the host never acknowledges a single packet,
+ * kernel tells us that with ETIMEDOUT, see tcp(7). */
+static inline bool ERRNO_IS_DISCONNECT(int r) {
+ return IN_SET(abs(r),
+ ECONNABORTED,
+ ECONNREFUSED,
+ ECONNRESET,
+ EHOSTDOWN,
+ EHOSTUNREACH,
+ ENETDOWN,
+ ENETRESET,
+ ENETUNREACH,
+ ENONET,
+ ENOPROTOOPT,
+ ENOTCONN,
+ EPIPE,
+ EPROTO,
+ ESHUTDOWN,
+ ETIMEDOUT);
+}
+
+/* Transient errors we might get on accept() that we should ignore. As per error handling comment in
+ * the accept(2) man page. */
+static inline bool ERRNO_IS_ACCEPT_AGAIN(int r) {
+ return ERRNO_IS_DISCONNECT(r) ||
+ IN_SET(abs(r),
+ EAGAIN,
+ EINTR,
+ EOPNOTSUPP);
+}
+
+/* Resource exhaustion, could be our fault or general system trouble */
+static inline bool ERRNO_IS_RESOURCE(int r) {
+ return IN_SET(abs(r),
+ EMFILE,
+ ENFILE,
+ ENOMEM);
+}
+
+/* Seven different errors for "operation/system call/ioctl/socket feature not supported" */
+static inline bool ERRNO_IS_NOT_SUPPORTED(int r) {
+ return IN_SET(abs(r),
+ EOPNOTSUPP,
+ ENOTTY,
+ ENOSYS,
+ EAFNOSUPPORT,
+ EPFNOSUPPORT,
+ EPROTONOSUPPORT,
+ ESOCKTNOSUPPORT);
+}
+
+/* Two different errors for access problems */
+static inline bool ERRNO_IS_PRIVILEGE(int r) {
+ return IN_SET(abs(r),
+ EACCES,
+ EPERM);
+}
+
+/* Three difference errors for "not enough disk space" */
+static inline bool ERRNO_IS_DISK_SPACE(int r) {
+ return IN_SET(abs(r),
+ ENOSPC,
+ EDQUOT,
+ EFBIG);
+}
diff --git a/src/basic/escape.c b/src/basic/escape.c
new file mode 100644
index 0000000..31f3cda
--- /dev/null
+++ b/src/basic/escape.c
@@ -0,0 +1,549 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "hexdecoct.h"
+#include "macro.h"
+#include "utf8.h"
+
+int cescape_char(char c, char *buf) {
+ char *buf_old = buf;
+
+ /* Needs space for 4 characters in the buffer */
+
+ switch (c) {
+
+ case '\a':
+ *(buf++) = '\\';
+ *(buf++) = 'a';
+ break;
+ case '\b':
+ *(buf++) = '\\';
+ *(buf++) = 'b';
+ break;
+ case '\f':
+ *(buf++) = '\\';
+ *(buf++) = 'f';
+ break;
+ case '\n':
+ *(buf++) = '\\';
+ *(buf++) = 'n';
+ break;
+ case '\r':
+ *(buf++) = '\\';
+ *(buf++) = 'r';
+ break;
+ case '\t':
+ *(buf++) = '\\';
+ *(buf++) = 't';
+ break;
+ case '\v':
+ *(buf++) = '\\';
+ *(buf++) = 'v';
+ break;
+ case '\\':
+ *(buf++) = '\\';
+ *(buf++) = '\\';
+ break;
+ case '"':
+ *(buf++) = '\\';
+ *(buf++) = '"';
+ break;
+ case '\'':
+ *(buf++) = '\\';
+ *(buf++) = '\'';
+ break;
+
+ default:
+ /* For special chars we prefer octal over
+ * hexadecimal encoding, simply because glib's
+ * g_strescape() does the same */
+ if ((c < ' ') || (c >= 127)) {
+ *(buf++) = '\\';
+ *(buf++) = octchar((unsigned char) c >> 6);
+ *(buf++) = octchar((unsigned char) c >> 3);
+ *(buf++) = octchar((unsigned char) c);
+ } else
+ *(buf++) = c;
+ break;
+ }
+
+ return buf - buf_old;
+}
+
+char* cescape_length(const char *s, size_t n) {
+ const char *f;
+ char *r, *t;
+
+ assert(s || n == 0);
+
+ /* Does C style string escaping. May be reversed with
+ * cunescape(). */
+
+ r = new(char, n*4 + 1);
+ if (!r)
+ return NULL;
+
+ for (f = s, t = r; f < s + n; f++)
+ t += cescape_char(*f, t);
+
+ *t = 0;
+
+ return r;
+}
+
+char* cescape(const char *s) {
+ assert(s);
+
+ return cescape_length(s, strlen(s));
+}
+
+int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit, bool accept_nul) {
+ int r = 1;
+
+ assert(p);
+ assert(ret);
+
+ /* Unescapes C style. Returns the unescaped character in ret.
+ * Sets *eight_bit to true if the escaped sequence either fits in
+ * one byte in UTF-8 or is a non-unicode literal byte and should
+ * instead be copied directly.
+ */
+
+ if (length != (size_t) -1 && length < 1)
+ return -EINVAL;
+
+ switch (p[0]) {
+
+ case 'a':
+ *ret = '\a';
+ break;
+ case 'b':
+ *ret = '\b';
+ break;
+ case 'f':
+ *ret = '\f';
+ break;
+ case 'n':
+ *ret = '\n';
+ break;
+ case 'r':
+ *ret = '\r';
+ break;
+ case 't':
+ *ret = '\t';
+ break;
+ case 'v':
+ *ret = '\v';
+ break;
+ case '\\':
+ *ret = '\\';
+ break;
+ case '"':
+ *ret = '"';
+ break;
+ case '\'':
+ *ret = '\'';
+ break;
+
+ case 's':
+ /* This is an extension of the XDG syntax files */
+ *ret = ' ';
+ break;
+
+ case 'x': {
+ /* hexadecimal encoding */
+ int a, b;
+
+ if (length != (size_t) -1 && length < 3)
+ return -EINVAL;
+
+ a = unhexchar(p[1]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unhexchar(p[2]);
+ if (b < 0)
+ return -EINVAL;
+
+ /* Don't allow NUL bytes */
+ if (a == 0 && b == 0 && !accept_nul)
+ return -EINVAL;
+
+ *ret = (a << 4U) | b;
+ *eight_bit = true;
+ r = 3;
+ break;
+ }
+
+ case 'u': {
+ /* C++11 style 16bit unicode */
+
+ int a[4];
+ size_t i;
+ uint32_t c;
+
+ if (length != (size_t) -1 && length < 5)
+ return -EINVAL;
+
+ for (i = 0; i < 4; i++) {
+ a[i] = unhexchar(p[1 + i]);
+ if (a[i] < 0)
+ return a[i];
+ }
+
+ c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
+
+ /* Don't allow 0 chars */
+ if (c == 0 && !accept_nul)
+ return -EINVAL;
+
+ *ret = c;
+ r = 5;
+ break;
+ }
+
+ case 'U': {
+ /* C++11 style 32bit unicode */
+
+ int a[8];
+ size_t i;
+ char32_t c;
+
+ if (length != (size_t) -1 && length < 9)
+ return -EINVAL;
+
+ for (i = 0; i < 8; i++) {
+ a[i] = unhexchar(p[1 + i]);
+ if (a[i] < 0)
+ return a[i];
+ }
+
+ c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
+ ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] << 8U) | ((uint32_t) a[6] << 4U) | (uint32_t) a[7];
+
+ /* Don't allow 0 chars */
+ if (c == 0 && !accept_nul)
+ return -EINVAL;
+
+ /* Don't allow invalid code points */
+ if (!unichar_is_valid(c))
+ return -EINVAL;
+
+ *ret = c;
+ r = 9;
+ break;
+ }
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7': {
+ /* octal encoding */
+ int a, b, c;
+ char32_t m;
+
+ if (length != (size_t) -1 && length < 3)
+ return -EINVAL;
+
+ a = unoctchar(p[0]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unoctchar(p[1]);
+ if (b < 0)
+ return -EINVAL;
+
+ c = unoctchar(p[2]);
+ if (c < 0)
+ return -EINVAL;
+
+ /* don't allow NUL bytes */
+ if (a == 0 && b == 0 && c == 0 && !accept_nul)
+ return -EINVAL;
+
+ /* Don't allow bytes above 255 */
+ m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
+ if (m > 255)
+ return -EINVAL;
+
+ *ret = m;
+ *eight_bit = true;
+ r = 3;
+ break;
+ }
+
+ default:
+ return -EINVAL;
+ }
+
+ return r;
+}
+
+int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
+ char *r, *t;
+ const char *f;
+ size_t pl;
+
+ assert(s);
+ assert(ret);
+
+ /* Undoes C style string escaping, and optionally prefixes it. */
+
+ pl = strlen_ptr(prefix);
+
+ r = new(char, pl+length+1);
+ if (!r)
+ return -ENOMEM;
+
+ if (prefix)
+ memcpy(r, prefix, pl);
+
+ for (f = s, t = r + pl; f < s + length; f++) {
+ size_t remaining;
+ bool eight_bit = false;
+ char32_t u;
+ int k;
+
+ remaining = s + length - f;
+ assert(remaining > 0);
+
+ if (*f != '\\') {
+ /* A literal, copy verbatim */
+ *(t++) = *f;
+ continue;
+ }
+
+ if (remaining == 1) {
+ if (flags & UNESCAPE_RELAX) {
+ /* A trailing backslash, copy verbatim */
+ *(t++) = *f;
+ continue;
+ }
+
+ free(r);
+ return -EINVAL;
+ }
+
+ k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit, flags & UNESCAPE_ACCEPT_NUL);
+ if (k < 0) {
+ if (flags & UNESCAPE_RELAX) {
+ /* Invalid escape code, let's take it literal then */
+ *(t++) = '\\';
+ continue;
+ }
+
+ free(r);
+ return k;
+ }
+
+ f += k;
+ if (eight_bit)
+ /* One byte? Set directly as specified */
+ *(t++) = u;
+ else
+ /* Otherwise encode as multi-byte UTF-8 */
+ t += utf8_encode_unichar(t, u);
+ }
+
+ *t = 0;
+
+ *ret = r;
+ return t - r;
+}
+
+char* xescape_full(const char *s, const char *bad, size_t console_width, bool eight_bits) {
+ char *ans, *t, *prev, *prev2;
+ const char *f;
+
+ /* Escapes all chars in bad, in addition to \ and all special chars, in \xFF style escaping. May be
+ * reversed with cunescape(). If eight_bits is true, characters >= 127 are let through unchanged.
+ * This corresponds to non-ASCII printable characters in pre-unicode encodings.
+ *
+ * If console_width is reached, output is truncated and "..." is appended. */
+
+ if (console_width == 0)
+ return strdup("");
+
+ ans = new(char, MIN(strlen(s), console_width) * 4 + 1);
+ if (!ans)
+ return NULL;
+
+ memset(ans, '_', MIN(strlen(s), console_width) * 4);
+ ans[MIN(strlen(s), console_width) * 4] = 0;
+
+ for (f = s, t = prev = prev2 = ans; ; f++) {
+ char *tmp_t = t;
+
+ if (!*f) {
+ *t = 0;
+ return ans;
+ }
+
+ if ((unsigned char) *f < ' ' || (!eight_bits && (unsigned char) *f >= 127) ||
+ *f == '\\' || strchr(bad, *f)) {
+ if ((size_t) (t - ans) + 4 > console_width)
+ break;
+
+ *(t++) = '\\';
+ *(t++) = 'x';
+ *(t++) = hexchar(*f >> 4);
+ *(t++) = hexchar(*f);
+ } else {
+ if ((size_t) (t - ans) + 1 > console_width)
+ break;
+
+ *(t++) = *f;
+ }
+
+ /* We might need to go back two cycles to fit three dots, so remember two positions */
+ prev2 = prev;
+ prev = tmp_t;
+ }
+
+ /* We can just write where we want, since chars are one-byte */
+ size_t c = MIN(console_width, 3u); /* If the console is too narrow, write fewer dots */
+ size_t off;
+ if (console_width - c >= (size_t) (t - ans))
+ off = (size_t) (t - ans);
+ else if (console_width - c >= (size_t) (prev - ans))
+ off = (size_t) (prev - ans);
+ else if (console_width - c >= (size_t) (prev2 - ans))
+ off = (size_t) (prev2 - ans);
+ else
+ off = console_width - c;
+ assert(off <= (size_t) (t - ans));
+
+ memcpy(ans + off, "...", c);
+ ans[off + c] = '\0';
+ return ans;
+}
+
+char* escape_non_printable_full(const char *str, size_t console_width, bool eight_bit) {
+ if (eight_bit)
+ return xescape_full(str, "", console_width, true);
+ else
+ return utf8_escape_non_printable_full(str, console_width);
+}
+
+char* octescape(const char *s, size_t len) {
+ char *r, *t;
+ const char *f;
+
+ /* Escapes all chars in bad, in addition to \ and " chars,
+ * in \nnn style escaping. */
+
+ r = new(char, len * 4 + 1);
+ if (!r)
+ return NULL;
+
+ for (f = s, t = r; f < s + len; f++) {
+
+ if (*f < ' ' || *f >= 127 || IN_SET(*f, '\\', '"')) {
+ *(t++) = '\\';
+ *(t++) = '0' + (*f >> 6);
+ *(t++) = '0' + ((*f >> 3) & 8);
+ *(t++) = '0' + (*f & 8);
+ } else
+ *(t++) = *f;
+ }
+
+ *t = 0;
+
+ return r;
+
+}
+
+static char* strcpy_backslash_escaped(char *t, const char *s, const char *bad, bool escape_tab_nl) {
+ assert(bad);
+
+ for (; *s; s++) {
+ if (escape_tab_nl && IN_SET(*s, '\n', '\t')) {
+ *(t++) = '\\';
+ *(t++) = *s == '\n' ? 'n' : 't';
+ continue;
+ }
+
+ if (*s == '\\' || strchr(bad, *s))
+ *(t++) = '\\';
+
+ *(t++) = *s;
+ }
+
+ return t;
+}
+
+char* shell_escape(const char *s, const char *bad) {
+ char *r, *t;
+
+ r = new(char, strlen(s)*2+1);
+ if (!r)
+ return NULL;
+
+ t = strcpy_backslash_escaped(r, s, bad, false);
+ *t = 0;
+
+ return r;
+}
+
+char* shell_maybe_quote(const char *s, EscapeStyle style) {
+ const char *p;
+ char *r, *t;
+
+ assert(s);
+
+ /* Encloses a string in quotes if necessary to make it OK as a shell
+ * string. Note that we treat benign UTF-8 characters as needing
+ * escaping too, but that should be OK. */
+
+ for (p = s; *p; p++)
+ if (*p <= ' ' ||
+ *p >= 127 ||
+ strchr(SHELL_NEED_QUOTES, *p))
+ break;
+
+ if (!*p)
+ return strdup(s);
+
+ r = new(char, (style == ESCAPE_POSIX) + 1 + strlen(s)*2 + 1 + 1);
+ if (!r)
+ return NULL;
+
+ t = r;
+ switch (style) {
+ case ESCAPE_BACKSLASH:
+ case ESCAPE_BACKSLASH_ONELINE:
+ *(t++) = '"';
+ break;
+ case ESCAPE_POSIX:
+ *(t++) = '$';
+ *(t++) = '\'';
+ break;
+ default:
+ assert_not_reached("Bad EscapeStyle");
+ }
+
+ t = mempcpy(t, s, p - s);
+
+ if (IN_SET(style, ESCAPE_BACKSLASH, ESCAPE_BACKSLASH_ONELINE))
+ t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE,
+ style == ESCAPE_BACKSLASH_ONELINE);
+ else
+ t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE_POSIX, true);
+
+ if (IN_SET(style, ESCAPE_BACKSLASH, ESCAPE_BACKSLASH_ONELINE))
+ *(t++) = '"';
+ else
+ *(t++) = '\'';
+ *t = 0;
+
+ return r;
+}
diff --git a/src/basic/escape.h b/src/basic/escape.h
new file mode 100644
index 0000000..691b6d8
--- /dev/null
+++ b/src/basic/escape.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <uchar.h>
+
+#include "string-util.h"
+#include "missing_type.h"
+
+/* What characters are special in the shell? */
+/* must be escaped outside and inside double-quotes */
+#define SHELL_NEED_ESCAPE "\"\\`$"
+
+/* Those that can be escaped or double-quoted.
+ *
+ * Strictly speaking, ! does not need to be escaped, except in interactive
+ * mode, but let's be extra nice to the user and quote ! in case this
+ * output is ever used in interactive mode. */
+#define SHELL_NEED_QUOTES SHELL_NEED_ESCAPE GLOB_CHARS "'()<>|&;!"
+
+/* Note that we assume control characters would need to be escaped too in
+ * addition to the "special" characters listed here, if they appear in the
+ * string. Current users disallow control characters. Also '"' shall not
+ * be escaped.
+ */
+#define SHELL_NEED_ESCAPE_POSIX "\\\'"
+
+typedef enum UnescapeFlags {
+ UNESCAPE_RELAX = 1 << 0,
+ UNESCAPE_ACCEPT_NUL = 1 << 1,
+} UnescapeFlags;
+
+typedef enum EscapeStyle {
+ ESCAPE_BACKSLASH = 1, /* Add shell quotes ("") so the shell will consider this a single
+ argument, possibly multiline. Tabs and newlines are not escaped. */
+ ESCAPE_BACKSLASH_ONELINE = 2, /* Similar to ESCAPE_BACKSLASH, but always produces a single-line
+ string instead. Shell escape sequences are produced for tabs and
+ newlines. */
+ ESCAPE_POSIX = 3, /* Similar to ESCAPE_BACKSLASH_ONELINE, but uses POSIX shell escape
+ * syntax (a string enclosed in $'') instead of plain quotes. */
+} EscapeStyle;
+
+char* cescape(const char *s);
+char* cescape_length(const char *s, size_t n);
+int cescape_char(char c, char *buf);
+
+int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret);
+static inline int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
+ return cunescape_length_with_prefix(s, length, NULL, flags, ret);
+}
+static inline int cunescape(const char *s, UnescapeFlags flags, char **ret) {
+ return cunescape_length(s, strlen(s), flags, ret);
+}
+int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit, bool accept_nul);
+
+char* xescape_full(const char *s, const char *bad, size_t console_width, bool eight_bits);
+static inline char* xescape(const char *s, const char *bad) {
+ return xescape_full(s, bad, SIZE_MAX, false);
+}
+char* octescape(const char *s, size_t len);
+char* escape_non_printable_full(const char *str, size_t console_width, bool eight_bit);
+
+char* shell_escape(const char *s, const char *bad);
+char* shell_maybe_quote(const char *s, EscapeStyle style);
diff --git a/src/basic/ether-addr-util.c b/src/basic/ether-addr-util.c
new file mode 100644
index 0000000..c8094b6
--- /dev/null
+++ b/src/basic/ether-addr-util.c
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <net/ethernet.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "ether-addr-util.h"
+#include "macro.h"
+#include "string-util.h"
+
+char* hw_addr_to_string(const hw_addr_data *addr, char buffer[HW_ADDR_TO_STRING_MAX]) {
+ assert(addr);
+ assert(buffer);
+ assert(addr->length <= HW_ADDR_MAX_SIZE);
+
+ for (size_t i = 0; i < addr->length; i++) {
+ sprintf(&buffer[3*i], "%02"PRIx8, addr->addr.bytes[i]);
+ if (i < addr->length - 1)
+ buffer[3*i + 2] = ':';
+ }
+
+ return buffer;
+}
+
+char* ether_addr_to_string(const struct ether_addr *addr, char buffer[ETHER_ADDR_TO_STRING_MAX]) {
+ assert(addr);
+ assert(buffer);
+
+ /* Like ether_ntoa() but uses %02x instead of %x to print
+ * ethernet addresses, which makes them look less funny. Also,
+ * doesn't use a static buffer. */
+
+ sprintf(buffer, "%02x:%02x:%02x:%02x:%02x:%02x",
+ addr->ether_addr_octet[0],
+ addr->ether_addr_octet[1],
+ addr->ether_addr_octet[2],
+ addr->ether_addr_octet[3],
+ addr->ether_addr_octet[4],
+ addr->ether_addr_octet[5]);
+
+ return buffer;
+}
+
+int ether_addr_compare(const struct ether_addr *a, const struct ether_addr *b) {
+ return memcmp(a, b, ETH_ALEN);
+}
+
+static void ether_addr_hash_func(const struct ether_addr *p, struct siphash *state) {
+ siphash24_compress(p, sizeof(struct ether_addr), state);
+}
+
+DEFINE_HASH_OPS(ether_addr_hash_ops, struct ether_addr, ether_addr_hash_func, ether_addr_compare);
+
+int ether_addr_from_string(const char *s, struct ether_addr *ret) {
+ size_t pos = 0, n, field;
+ char sep = '\0';
+ const char *hex = HEXDIGITS, *hexoff;
+ size_t x;
+ bool touched;
+
+#define parse_fields(v) \
+ for (field = 0; field < ELEMENTSOF(v); field++) { \
+ touched = false; \
+ for (n = 0; n < (2 * sizeof(v[0])); n++) { \
+ if (s[pos] == '\0') \
+ break; \
+ hexoff = strchr(hex, s[pos]); \
+ if (!hexoff) \
+ break; \
+ assert(hexoff >= hex); \
+ x = hexoff - hex; \
+ if (x >= 16) \
+ x -= 6; /* A-F */ \
+ assert(x < 16); \
+ touched = true; \
+ v[field] <<= 4; \
+ v[field] += x; \
+ pos++; \
+ } \
+ if (!touched) \
+ return -EINVAL; \
+ if (field < (ELEMENTSOF(v)-1)) { \
+ if (s[pos] != sep) \
+ return -EINVAL; \
+ else \
+ pos++; \
+ } \
+ }
+
+ assert(s);
+ assert(ret);
+
+ s += strspn(s, WHITESPACE);
+ sep = s[strspn(s, hex)];
+
+ if (sep == '.') {
+ uint16_t shorts[3] = { 0 };
+
+ parse_fields(shorts);
+
+ if (s[pos] != '\0')
+ return -EINVAL;
+
+ for (n = 0; n < ELEMENTSOF(shorts); n++) {
+ ret->ether_addr_octet[2*n] = ((shorts[n] & (uint16_t)0xff00) >> 8);
+ ret->ether_addr_octet[2*n + 1] = (shorts[n] & (uint16_t)0x00ff);
+ }
+
+ } else if (IN_SET(sep, ':', '-')) {
+ struct ether_addr out = ETHER_ADDR_NULL;
+
+ parse_fields(out.ether_addr_octet);
+
+ if (s[pos] != '\0')
+ return -EINVAL;
+
+ for (n = 0; n < ELEMENTSOF(out.ether_addr_octet); n++)
+ ret->ether_addr_octet[n] = out.ether_addr_octet[n];
+
+ } else
+ return -EINVAL;
+
+ return 0;
+}
diff --git a/src/basic/ether-addr-util.h b/src/basic/ether-addr-util.h
new file mode 100644
index 0000000..942ce55
--- /dev/null
+++ b/src/basic/ether-addr-util.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/if_infiniband.h>
+#include <net/ethernet.h>
+#include <stdbool.h>
+
+#include "hash-funcs.h"
+
+/* This is MAX_ADDR_LEN as defined in linux/netdevice.h, but net/if_arp.h
+ * defines a macro of the same name with a much lower size. */
+#define HW_ADDR_MAX_SIZE 32
+
+union hw_addr_union {
+ struct ether_addr ether;
+ uint8_t infiniband[INFINIBAND_ALEN];
+ uint8_t bytes[HW_ADDR_MAX_SIZE];
+};
+
+typedef struct hw_addr_data {
+ union hw_addr_union addr;
+ size_t length;
+} hw_addr_data;
+
+#define HW_ADDR_TO_STRING_MAX (3*HW_ADDR_MAX_SIZE)
+char* hw_addr_to_string(const hw_addr_data *addr, char buffer[HW_ADDR_TO_STRING_MAX]);
+
+/* Use only as function argument, never stand-alone! */
+#define HW_ADDR_TO_STR(hw_addr) hw_addr_to_string((hw_addr), (char[HW_ADDR_TO_STRING_MAX]){})
+
+#define HW_ADDR_NULL ((const hw_addr_data){})
+
+#define ETHER_ADDR_FORMAT_STR "%02X%02X%02X%02X%02X%02X"
+#define ETHER_ADDR_FORMAT_VAL(x) (x).ether_addr_octet[0], (x).ether_addr_octet[1], (x).ether_addr_octet[2], (x).ether_addr_octet[3], (x).ether_addr_octet[4], (x).ether_addr_octet[5]
+
+#define ETHER_ADDR_TO_STRING_MAX (3*6)
+char* ether_addr_to_string(const struct ether_addr *addr, char buffer[ETHER_ADDR_TO_STRING_MAX]);
+
+int ether_addr_compare(const struct ether_addr *a, const struct ether_addr *b);
+static inline bool ether_addr_equal(const struct ether_addr *a, const struct ether_addr *b) {
+ return ether_addr_compare(a, b) == 0;
+}
+
+#define ETHER_ADDR_NULL ((const struct ether_addr){})
+
+static inline bool ether_addr_is_null(const struct ether_addr *addr) {
+ return ether_addr_equal(addr, &ETHER_ADDR_NULL);
+}
+
+int ether_addr_from_string(const char *s, struct ether_addr *ret);
+
+extern const struct hash_ops ether_addr_hash_ops;
diff --git a/src/basic/extract-word.c b/src/basic/extract-word.c
new file mode 100644
index 0000000..76b3fe1
--- /dev/null
+++ b/src/basic/extract-word.c
@@ -0,0 +1,290 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <syslog.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "extract-word.h"
+#include "log.h"
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+
+int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
+ _cleanup_free_ char *s = NULL;
+ size_t allocated = 0, sz = 0;
+ char c;
+ int r;
+
+ char quote = 0; /* 0 or ' or " */
+ bool backslash = false; /* whether we've just seen a backslash */
+
+ assert(p);
+ assert(ret);
+
+ /* Bail early if called after last value or with no input */
+ if (!*p)
+ goto finish;
+ c = **p;
+
+ if (!separators)
+ separators = WHITESPACE;
+
+ /* Parses the first word of a string, and returns it in
+ * *ret. Removes all quotes in the process. When parsing fails
+ * (because of an uneven number of quotes or similar), leaves
+ * the pointer *p at the first invalid character. */
+
+ if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
+ if (!GREEDY_REALLOC(s, allocated, sz+1))
+ return -ENOMEM;
+
+ for (;; (*p)++, c = **p) {
+ if (c == 0)
+ goto finish_force_terminate;
+ else if (strchr(separators, c)) {
+ if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
+ (*p)++;
+ goto finish_force_next;
+ }
+ } else {
+ /* We found a non-blank character, so we will always
+ * want to return a string (even if it is empty),
+ * allocate it here. */
+ if (!GREEDY_REALLOC(s, allocated, sz+1))
+ return -ENOMEM;
+ break;
+ }
+ }
+
+ for (;; (*p)++, c = **p) {
+ if (backslash) {
+ if (!GREEDY_REALLOC(s, allocated, sz+7))
+ return -ENOMEM;
+
+ if (c == 0) {
+ if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
+ (!quote || flags & EXTRACT_RELAX)) {
+ /* If we find an unquoted trailing backslash and we're in
+ * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
+ * output.
+ *
+ * Unbalanced quotes will only be allowed in EXTRACT_RELAX
+ * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
+ */
+ s[sz++] = '\\';
+ goto finish_force_terminate;
+ }
+ if (flags & EXTRACT_RELAX)
+ goto finish_force_terminate;
+ return -EINVAL;
+ }
+
+ if (flags & (EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS)) {
+ bool eight_bit = false;
+ char32_t u;
+
+ if ((flags & EXTRACT_CUNESCAPE) &&
+ (r = cunescape_one(*p, (size_t) -1, &u, &eight_bit, false)) >= 0) {
+ /* A valid escaped sequence */
+ assert(r >= 1);
+
+ (*p) += r - 1;
+
+ if (eight_bit)
+ s[sz++] = u;
+ else
+ sz += utf8_encode_unichar(s + sz, u);
+ } else if ((flags & EXTRACT_UNESCAPE_SEPARATORS) &&
+ strchr(separators, **p))
+ /* An escaped separator char */
+ s[sz++] = c;
+ else if (flags & EXTRACT_CUNESCAPE_RELAX) {
+ s[sz++] = '\\';
+ s[sz++] = c;
+ } else
+ return -EINVAL;
+ } else
+ s[sz++] = c;
+
+ backslash = false;
+
+ } else if (quote) { /* inside either single or double quotes */
+ for (;; (*p)++, c = **p) {
+ if (c == 0) {
+ if (flags & EXTRACT_RELAX)
+ goto finish_force_terminate;
+ return -EINVAL;
+ } else if (c == quote) { /* found the end quote */
+ quote = 0;
+ break;
+ } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
+ backslash = true;
+ break;
+ } else {
+ if (!GREEDY_REALLOC(s, allocated, sz+2))
+ return -ENOMEM;
+
+ s[sz++] = c;
+ }
+ }
+
+ } else {
+ for (;; (*p)++, c = **p) {
+ if (c == 0)
+ goto finish_force_terminate;
+ else if (IN_SET(c, '\'', '"') && (flags & EXTRACT_UNQUOTE)) {
+ quote = c;
+ break;
+ } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
+ backslash = true;
+ break;
+ } else if (strchr(separators, c)) {
+ if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
+ (*p)++;
+ goto finish_force_next;
+ }
+ /* Skip additional coalesced separators. */
+ for (;; (*p)++, c = **p) {
+ if (c == 0)
+ goto finish_force_terminate;
+ if (!strchr(separators, c))
+ break;
+ }
+ goto finish;
+
+ } else {
+ if (!GREEDY_REALLOC(s, allocated, sz+2))
+ return -ENOMEM;
+
+ s[sz++] = c;
+ }
+ }
+ }
+ }
+
+finish_force_terminate:
+ *p = NULL;
+finish:
+ if (!s) {
+ *p = NULL;
+ *ret = NULL;
+ return 0;
+ }
+
+finish_force_next:
+ s[sz] = 0;
+ *ret = TAKE_PTR(s);
+
+ return 1;
+}
+
+int extract_first_word_and_warn(
+ const char **p,
+ char **ret,
+ const char *separators,
+ ExtractFlags flags,
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *rvalue) {
+
+ /* Try to unquote it, if it fails, warn about it and try again
+ * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
+ * backslashes verbatim in invalid escape sequences. */
+
+ const char *save;
+ int r;
+
+ save = *p;
+ r = extract_first_word(p, ret, separators, flags);
+ if (r >= 0)
+ return r;
+
+ if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
+
+ /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
+ *p = save;
+ r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
+ if (r >= 0) {
+ /* It worked this time, hence it must have been an invalid escape sequence. */
+ log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Ignoring unknown escape sequences: \"%s\"", *ret);
+ return r;
+ }
+
+ /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
+ if (r == -EINVAL)
+ return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
+ }
+
+ /* Can be any error, report it */
+ return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
+}
+
+/* We pass ExtractFlags as unsigned int (to avoid undefined behaviour when passing
+ * an object that undergoes default argument promotion as an argument to va_start).
+ * Let's make sure that ExtractFlags fits into an unsigned int. */
+assert_cc(sizeof(enum ExtractFlags) <= sizeof(unsigned));
+
+int extract_many_words(const char **p, const char *separators, unsigned flags, ...) {
+ va_list ap;
+ char **l;
+ int n = 0, i, c, r;
+
+ /* Parses a number of words from a string, stripping any
+ * quotes if necessary. */
+
+ assert(p);
+
+ /* Count how many words are expected */
+ va_start(ap, flags);
+ for (;;) {
+ if (!va_arg(ap, char **))
+ break;
+ n++;
+ }
+ va_end(ap);
+
+ if (n <= 0)
+ return 0;
+
+ /* Read all words into a temporary array */
+ l = newa0(char*, n);
+ for (c = 0; c < n; c++) {
+
+ r = extract_first_word(p, &l[c], separators, flags);
+ if (r < 0) {
+ int j;
+
+ for (j = 0; j < c; j++)
+ free(l[j]);
+
+ return r;
+ }
+
+ if (r == 0)
+ break;
+ }
+
+ /* If we managed to parse all words, return them in the passed
+ * in parameters */
+ va_start(ap, flags);
+ for (i = 0; i < n; i++) {
+ char **v;
+
+ v = va_arg(ap, char **);
+ assert(v);
+
+ *v = l[i];
+ }
+ va_end(ap);
+
+ return c;
+}
diff --git a/src/basic/extract-word.h b/src/basic/extract-word.h
new file mode 100644
index 0000000..d1de32e
--- /dev/null
+++ b/src/basic/extract-word.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "macro.h"
+
+typedef enum ExtractFlags {
+ EXTRACT_RELAX = 1 << 0,
+ EXTRACT_CUNESCAPE = 1 << 1,
+ EXTRACT_CUNESCAPE_RELAX = 1 << 2,
+ EXTRACT_UNESCAPE_SEPARATORS = 1 << 3,
+ EXTRACT_UNQUOTE = 1 << 4,
+ EXTRACT_DONT_COALESCE_SEPARATORS = 1 << 5,
+ EXTRACT_RETAIN_ESCAPE = 1 << 6,
+} ExtractFlags;
+
+int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags);
+int extract_first_word_and_warn(const char **p, char **ret, const char *separators, ExtractFlags flags, const char *unit, const char *filename, unsigned line, const char *rvalue);
+int extract_many_words(const char **p, const char *separators, unsigned flags, ...) _sentinel_;
diff --git a/src/basic/fd-util.c b/src/basic/fd-util.c
new file mode 100644
index 0000000..07a7b3a
--- /dev/null
+++ b/src/basic/fd-util.c
@@ -0,0 +1,1067 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "copy.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "macro.h"
+#include "memfd-util.h"
+#include "missing_fcntl.h"
+#include "missing_syscall.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "sort-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+/* The maximum number of iterations in the loop to close descriptors in the fallback case
+ * when /proc/self/fd/ is inaccessible. */
+#define MAX_FD_LOOP_LIMIT (1024*1024)
+
+int close_nointr(int fd) {
+ assert(fd >= 0);
+
+ if (close(fd) >= 0)
+ return 0;
+
+ /*
+ * Just ignore EINTR; a retry loop is the wrong thing to do on
+ * Linux.
+ *
+ * http://lkml.indiana.edu/hypermail/linux/kernel/0509.1/0877.html
+ * https://bugzilla.gnome.org/show_bug.cgi?id=682819
+ * http://utcc.utoronto.ca/~cks/space/blog/unix/CloseEINTR
+ * https://sites.google.com/site/michaelsafyan/software-engineering/checkforeintrwheninvokingclosethinkagain
+ */
+ if (errno == EINTR)
+ return 0;
+
+ return -errno;
+}
+
+int safe_close(int fd) {
+
+ /*
+ * Like close_nointr() but cannot fail. Guarantees errno is
+ * unchanged. Is a NOP with negative fds passed, and returns
+ * -1, so that it can be used in this syntax:
+ *
+ * fd = safe_close(fd);
+ */
+
+ if (fd >= 0) {
+ PROTECT_ERRNO;
+
+ /* The kernel might return pretty much any error code
+ * via close(), but the fd will be closed anyway. The
+ * only condition we want to check for here is whether
+ * the fd was invalid at all... */
+
+ assert_se(close_nointr(fd) != -EBADF);
+ }
+
+ return -1;
+}
+
+void safe_close_pair(int p[static 2]) {
+ assert(p);
+
+ if (p[0] == p[1]) {
+ /* Special case pairs which use the same fd in both
+ * directions... */
+ p[0] = p[1] = safe_close(p[0]);
+ return;
+ }
+
+ p[0] = safe_close(p[0]);
+ p[1] = safe_close(p[1]);
+}
+
+void close_many(const int fds[], size_t n_fd) {
+ size_t i;
+
+ assert(fds || n_fd <= 0);
+
+ for (i = 0; i < n_fd; i++)
+ safe_close(fds[i]);
+}
+
+int fclose_nointr(FILE *f) {
+ assert(f);
+
+ /* Same as close_nointr(), but for fclose() */
+
+ errno = 0; /* Extra safety: if the FILE* object is not encapsulating an fd, it might not set errno
+ * correctly. Let's hence initialize it to zero first, so that we aren't confused by any
+ * prior errno here */
+ if (fclose(f) == 0)
+ return 0;
+
+ if (errno == EINTR)
+ return 0;
+
+ return errno_or_else(EIO);
+}
+
+FILE* safe_fclose(FILE *f) {
+
+ /* Same as safe_close(), but for fclose() */
+
+ if (f) {
+ PROTECT_ERRNO;
+
+ assert_se(fclose_nointr(f) != -EBADF);
+ }
+
+ return NULL;
+}
+
+DIR* safe_closedir(DIR *d) {
+
+ if (d) {
+ PROTECT_ERRNO;
+
+ assert_se(closedir(d) >= 0 || errno != EBADF);
+ }
+
+ return NULL;
+}
+
+int fd_nonblock(int fd, bool nonblock) {
+ int flags, nflags;
+
+ assert(fd >= 0);
+
+ flags = fcntl(fd, F_GETFL, 0);
+ if (flags < 0)
+ return -errno;
+
+ nflags = UPDATE_FLAG(flags, O_NONBLOCK, nonblock);
+ if (nflags == flags)
+ return 0;
+
+ if (fcntl(fd, F_SETFL, nflags) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int fd_cloexec(int fd, bool cloexec) {
+ int flags, nflags;
+
+ assert(fd >= 0);
+
+ flags = fcntl(fd, F_GETFD, 0);
+ if (flags < 0)
+ return -errno;
+
+ nflags = UPDATE_FLAG(flags, FD_CLOEXEC, cloexec);
+ if (nflags == flags)
+ return 0;
+
+ if (fcntl(fd, F_SETFD, nflags) < 0)
+ return -errno;
+
+ return 0;
+}
+
+_pure_ static bool fd_in_set(int fd, const int fdset[], size_t n_fdset) {
+ size_t i;
+
+ assert(n_fdset == 0 || fdset);
+
+ for (i = 0; i < n_fdset; i++)
+ if (fdset[i] == fd)
+ return true;
+
+ return false;
+}
+
+static int get_max_fd(void) {
+ struct rlimit rl;
+ rlim_t m;
+
+ /* Return the highest possible fd, based RLIMIT_NOFILE, but enforcing FD_SETSIZE-1 as lower boundary
+ * and INT_MAX as upper boundary. */
+
+ if (getrlimit(RLIMIT_NOFILE, &rl) < 0)
+ return -errno;
+
+ m = MAX(rl.rlim_cur, rl.rlim_max);
+ if (m < FD_SETSIZE) /* Let's always cover at least 1024 fds */
+ return FD_SETSIZE-1;
+
+ if (m == RLIM_INFINITY || m > INT_MAX) /* Saturate on overflow. After all fds are "int", hence can
+ * never be above INT_MAX */
+ return INT_MAX;
+
+ return (int) (m - 1);
+}
+
+static int cmp_int(const int *a, const int *b) {
+ return CMP(*a, *b);
+}
+
+int close_all_fds(const int except[], size_t n_except) {
+ static bool have_close_range = true; /* Assume we live in the future */
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+
+ assert(n_except == 0 || except);
+
+ if (have_close_range) {
+ /* In the best case we have close_range() to close all fds between a start and an end fd,
+ * which we can use on the "inverted" exception array, i.e. all intervals between all
+ * adjacent pairs from the sorted exception array. This changes loop complexity from O(n)
+ * where n is number of open fds to O(m⋅log(m)) where m is the number of fds to keep
+ * open. Given that we assume n ≫ m that's preferable to us. */
+
+ if (n_except == 0) {
+ /* Close everything. Yay! */
+
+ if (close_range(3, -1, 0) >= 0)
+ return 1;
+
+ if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
+ return -errno;
+
+ have_close_range = false;
+ } else {
+ _cleanup_free_ int *sorted_malloc = NULL;
+ size_t n_sorted;
+ int *sorted;
+
+ assert(n_except < SIZE_MAX);
+ n_sorted = n_except + 1;
+
+ if (n_sorted > 64) /* Use heap for large numbers of fds, stack otherwise */
+ sorted = sorted_malloc = new(int, n_sorted);
+ else
+ sorted = newa(int, n_sorted);
+
+ if (sorted) {
+ int c = 0;
+
+ memcpy(sorted, except, n_except * sizeof(int));
+
+ /* Let's add fd 2 to the list of fds, to simplify the loop below, as this
+ * allows us to cover the head of the array the same way as the body */
+ sorted[n_sorted-1] = 2;
+
+ typesafe_qsort(sorted, n_sorted, cmp_int);
+
+ for (size_t i = 0; i < n_sorted-1; i++) {
+ int start, end;
+
+ start = MAX(sorted[i], 2); /* The first three fds shall always remain open */
+ end = MAX(sorted[i+1], 2);
+
+ assert(end >= start);
+
+ if (end - start <= 1)
+ continue;
+
+ /* Close everything between the start and end fds (both of which shall stay open) */
+ if (close_range(start + 1, end - 1, 0) < 0) {
+ if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
+ return -errno;
+
+ have_close_range = false;
+ break;
+ }
+
+ c += end - start - 1;
+ }
+
+ if (have_close_range) {
+ /* The loop succeeded. Let's now close everything beyond the end */
+
+ if (sorted[n_sorted-1] >= INT_MAX) /* Dont let the addition below overflow */
+ return c;
+
+ if (close_range(sorted[n_sorted-1] + 1, -1, 0) >= 0)
+ return c + 1;
+
+ if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
+ return -errno;
+
+ have_close_range = false;
+ }
+ }
+ }
+
+ /* Fallback on OOM or if close_range() is not supported */
+ }
+
+ d = opendir("/proc/self/fd");
+ if (!d) {
+ int fd, max_fd;
+
+ /* When /proc isn't available (for example in chroots) the fallback is brute forcing through
+ * the fd table */
+
+ max_fd = get_max_fd();
+ if (max_fd < 0)
+ return max_fd;
+
+ /* Refuse to do the loop over more too many elements. It's better to fail immediately than to
+ * spin the CPU for a long time. */
+ if (max_fd > MAX_FD_LOOP_LIMIT)
+ return log_debug_errno(SYNTHETIC_ERRNO(EPERM),
+ "/proc/self/fd is inaccessible. Refusing to loop over %d potential fds.",
+ max_fd);
+
+ for (fd = 3; fd >= 0; fd = fd < max_fd ? fd + 1 : -1) {
+ int q;
+
+ if (fd_in_set(fd, except, n_except))
+ continue;
+
+ q = close_nointr(fd);
+ if (q < 0 && q != -EBADF && r >= 0)
+ r = q;
+ }
+
+ return r;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ int fd = -1, q;
+
+ if (safe_atoi(de->d_name, &fd) < 0)
+ /* Let's better ignore this, just in case */
+ continue;
+
+ if (fd < 3)
+ continue;
+
+ if (fd == dirfd(d))
+ continue;
+
+ if (fd_in_set(fd, except, n_except))
+ continue;
+
+ q = close_nointr(fd);
+ if (q < 0 && q != -EBADF && r >= 0) /* Valgrind has its own FD and doesn't want to have it closed */
+ r = q;
+ }
+
+ return r;
+}
+
+int same_fd(int a, int b) {
+ struct stat sta, stb;
+ pid_t pid;
+ int r, fa, fb;
+
+ assert(a >= 0);
+ assert(b >= 0);
+
+ /* Compares two file descriptors. Note that semantics are
+ * quite different depending on whether we have kcmp() or we
+ * don't. If we have kcmp() this will only return true for
+ * dup()ed file descriptors, but not otherwise. If we don't
+ * have kcmp() this will also return true for two fds of the same
+ * file, created by separate open() calls. Since we use this
+ * call mostly for filtering out duplicates in the fd store
+ * this difference hopefully doesn't matter too much. */
+
+ if (a == b)
+ return true;
+
+ /* Try to use kcmp() if we have it. */
+ pid = getpid_cached();
+ r = kcmp(pid, pid, KCMP_FILE, a, b);
+ if (r == 0)
+ return true;
+ if (r > 0)
+ return false;
+ if (!IN_SET(errno, ENOSYS, EACCES, EPERM))
+ return -errno;
+
+ /* We don't have kcmp(), use fstat() instead. */
+ if (fstat(a, &sta) < 0)
+ return -errno;
+
+ if (fstat(b, &stb) < 0)
+ return -errno;
+
+ if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT))
+ return false;
+
+ /* We consider all device fds different, since two device fds
+ * might refer to quite different device contexts even though
+ * they share the same inode and backing dev_t. */
+
+ if (S_ISCHR(sta.st_mode) || S_ISBLK(sta.st_mode))
+ return false;
+
+ if (sta.st_dev != stb.st_dev || sta.st_ino != stb.st_ino)
+ return false;
+
+ /* The fds refer to the same inode on disk, let's also check
+ * if they have the same fd flags. This is useful to
+ * distinguish the read and write side of a pipe created with
+ * pipe(). */
+ fa = fcntl(a, F_GETFL);
+ if (fa < 0)
+ return -errno;
+
+ fb = fcntl(b, F_GETFL);
+ if (fb < 0)
+ return -errno;
+
+ return fa == fb;
+}
+
+void cmsg_close_all(struct msghdr *mh) {
+ struct cmsghdr *cmsg;
+
+ assert(mh);
+
+ CMSG_FOREACH(cmsg, mh)
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS)
+ close_many((int*) CMSG_DATA(cmsg), (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int));
+}
+
+bool fdname_is_valid(const char *s) {
+ const char *p;
+
+ /* Validates a name for $LISTEN_FDNAMES. We basically allow
+ * everything ASCII that's not a control character. Also, as
+ * special exception the ":" character is not allowed, as we
+ * use that as field separator in $LISTEN_FDNAMES.
+ *
+ * Note that the empty string is explicitly allowed
+ * here. However, we limit the length of the names to 255
+ * characters. */
+
+ if (!s)
+ return false;
+
+ for (p = s; *p; p++) {
+ if (*p < ' ')
+ return false;
+ if (*p >= 127)
+ return false;
+ if (*p == ':')
+ return false;
+ }
+
+ return p - s < 256;
+}
+
+int fd_get_path(int fd, char **ret) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ int r;
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ r = readlink_malloc(procfs_path, ret);
+ if (r == -ENOENT) {
+ /* ENOENT can mean two things: that the fd does not exist or that /proc is not mounted. Let's make
+ * things debuggable and distinguish the two. */
+
+ if (proc_mounted() == 0)
+ return -ENOSYS; /* /proc is not available or not set up properly, we're most likely in some chroot
+ * environment. */
+ return -EBADF; /* The directory exists, hence it's the fd that doesn't. */
+ }
+
+ return r;
+}
+
+int move_fd(int from, int to, int cloexec) {
+ int r;
+
+ /* Move fd 'from' to 'to', make sure FD_CLOEXEC remains equal if requested, and release the old fd. If
+ * 'cloexec' is passed as -1, the original FD_CLOEXEC is inherited for the new fd. If it is 0, it is turned
+ * off, if it is > 0 it is turned on. */
+
+ if (from < 0)
+ return -EBADF;
+ if (to < 0)
+ return -EBADF;
+
+ if (from == to) {
+
+ if (cloexec >= 0) {
+ r = fd_cloexec(to, cloexec);
+ if (r < 0)
+ return r;
+ }
+
+ return to;
+ }
+
+ if (cloexec < 0) {
+ int fl;
+
+ fl = fcntl(from, F_GETFD, 0);
+ if (fl < 0)
+ return -errno;
+
+ cloexec = !!(fl & FD_CLOEXEC);
+ }
+
+ r = dup3(from, to, cloexec ? O_CLOEXEC : 0);
+ if (r < 0)
+ return -errno;
+
+ assert(r == to);
+
+ safe_close(from);
+
+ return to;
+}
+
+int acquire_data_fd(const void *data, size_t size, unsigned flags) {
+
+ _cleanup_close_pair_ int pipefds[2] = { -1, -1 };
+ char pattern[] = "/dev/shm/data-fd-XXXXXX";
+ _cleanup_close_ int fd = -1;
+ int isz = 0, r;
+ ssize_t n;
+ off_t f;
+
+ assert(data || size == 0);
+
+ /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
+ * complex than I wish it was. But here's why:
+ *
+ * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
+ * read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14.
+ *
+ * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
+ * a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
+ * clients can only bump their size to a system-wide limit, which might be quite low.
+ *
+ * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
+ * earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
+ * /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
+ *
+ * d) Finally, we try creating a regular file in /dev/shm, which we then delete.
+ *
+ * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
+ * figure. */
+
+ if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) {
+ /* As a special case, return /dev/null if we have been called for an empty data block */
+ r = open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (r < 0)
+ return -errno;
+
+ return r;
+ }
+
+ if ((flags & ACQUIRE_NO_MEMFD) == 0) {
+ fd = memfd_new("data-fd");
+ if (fd < 0)
+ goto try_pipe;
+
+ n = write(fd, data, size);
+ if (n < 0)
+ return -errno;
+ if ((size_t) n != size)
+ return -EIO;
+
+ f = lseek(fd, 0, SEEK_SET);
+ if (f != 0)
+ return -errno;
+
+ r = memfd_set_sealed(fd);
+ if (r < 0)
+ return r;
+
+ return TAKE_FD(fd);
+ }
+
+try_pipe:
+ if ((flags & ACQUIRE_NO_PIPE) == 0) {
+ if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
+ return -errno;
+
+ isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
+ if (isz < 0)
+ return -errno;
+
+ if ((size_t) isz < size) {
+ isz = (int) size;
+ if (isz < 0 || (size_t) isz != size)
+ return -E2BIG;
+
+ /* Try to bump the pipe size */
+ (void) fcntl(pipefds[1], F_SETPIPE_SZ, isz);
+
+ /* See if that worked */
+ isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
+ if (isz < 0)
+ return -errno;
+
+ if ((size_t) isz < size)
+ goto try_dev_shm;
+ }
+
+ n = write(pipefds[1], data, size);
+ if (n < 0)
+ return -errno;
+ if ((size_t) n != size)
+ return -EIO;
+
+ (void) fd_nonblock(pipefds[0], false);
+
+ return TAKE_FD(pipefds[0]);
+ }
+
+try_dev_shm:
+ if ((flags & ACQUIRE_NO_TMPFILE) == 0) {
+ fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500);
+ if (fd < 0)
+ goto try_dev_shm_without_o_tmpfile;
+
+ n = write(fd, data, size);
+ if (n < 0)
+ return -errno;
+ if ((size_t) n != size)
+ return -EIO;
+
+ /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
+ return fd_reopen(fd, O_RDONLY|O_CLOEXEC);
+ }
+
+try_dev_shm_without_o_tmpfile:
+ if ((flags & ACQUIRE_NO_REGULAR) == 0) {
+ fd = mkostemp_safe(pattern);
+ if (fd < 0)
+ return fd;
+
+ n = write(fd, data, size);
+ if (n < 0) {
+ r = -errno;
+ goto unlink_and_return;
+ }
+ if ((size_t) n != size) {
+ r = -EIO;
+ goto unlink_and_return;
+ }
+
+ /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
+ r = open(pattern, O_RDONLY|O_CLOEXEC);
+ if (r < 0)
+ r = -errno;
+
+ unlink_and_return:
+ (void) unlink(pattern);
+ return r;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+/* When the data is smaller or equal to 64K, try to place the copy in a memfd/pipe */
+#define DATA_FD_MEMORY_LIMIT (64U*1024U)
+
+/* If memfd/pipe didn't work out, then let's use a file in /tmp up to a size of 1M. If it's large than that use /var/tmp instead. */
+#define DATA_FD_TMP_LIMIT (1024U*1024U)
+
+int fd_duplicate_data_fd(int fd) {
+
+ _cleanup_close_ int copy_fd = -1, tmp_fd = -1;
+ _cleanup_free_ void *remains = NULL;
+ size_t remains_size = 0;
+ const char *td;
+ struct stat st;
+ int r;
+
+ /* Creates a 'data' fd from the specified source fd, containing all the same data in a read-only fashion, but
+ * independent of it (i.e. the source fd can be closed and unmounted after this call succeeded). Tries to be
+ * somewhat smart about where to place the data. In the best case uses a memfd(). If memfd() are not supported
+ * uses a pipe instead. For larger data will use an unlinked file in /tmp, and for even larger data one in
+ * /var/tmp. */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /* For now, let's only accept regular files, sockets, pipes and char devices */
+ if (S_ISDIR(st.st_mode))
+ return -EISDIR;
+ if (S_ISLNK(st.st_mode))
+ return -ELOOP;
+ if (!S_ISREG(st.st_mode) && !S_ISSOCK(st.st_mode) && !S_ISFIFO(st.st_mode) && !S_ISCHR(st.st_mode))
+ return -EBADFD;
+
+ /* If we have reason to believe the data is bounded in size, then let's use memfds or pipes as backing fd. Note
+ * that we use the reported regular file size only as a hint, given that there are plenty special files in
+ * /proc and /sys which report a zero file size but can be read from. */
+
+ if (!S_ISREG(st.st_mode) || st.st_size < DATA_FD_MEMORY_LIMIT) {
+
+ /* Try a memfd first */
+ copy_fd = memfd_new("data-fd");
+ if (copy_fd >= 0) {
+ off_t f;
+
+ r = copy_bytes(fd, copy_fd, DATA_FD_MEMORY_LIMIT, 0);
+ if (r < 0)
+ return r;
+
+ f = lseek(copy_fd, 0, SEEK_SET);
+ if (f != 0)
+ return -errno;
+
+ if (r == 0) {
+ /* Did it fit into the limit? If so, we are done. */
+ r = memfd_set_sealed(copy_fd);
+ if (r < 0)
+ return r;
+
+ return TAKE_FD(copy_fd);
+ }
+
+ /* Hmm, pity, this didn't fit. Let's fall back to /tmp then, see below */
+
+ } else {
+ _cleanup_(close_pairp) int pipefds[2] = { -1, -1 };
+ int isz;
+
+ /* If memfds aren't available, use a pipe. Set O_NONBLOCK so that we will get EAGAIN rather
+ * then block indefinitely when we hit the pipe size limit */
+
+ if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
+ return -errno;
+
+ isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
+ if (isz < 0)
+ return -errno;
+
+ /* Try to enlarge the pipe size if necessary */
+ if ((size_t) isz < DATA_FD_MEMORY_LIMIT) {
+
+ (void) fcntl(pipefds[1], F_SETPIPE_SZ, DATA_FD_MEMORY_LIMIT);
+
+ isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
+ if (isz < 0)
+ return -errno;
+ }
+
+ if ((size_t) isz >= DATA_FD_MEMORY_LIMIT) {
+
+ r = copy_bytes_full(fd, pipefds[1], DATA_FD_MEMORY_LIMIT, 0, &remains, &remains_size, NULL, NULL);
+ if (r < 0 && r != -EAGAIN)
+ return r; /* If we get EAGAIN it could be because of the source or because of
+ * the destination fd, we can't know, as sendfile() and friends won't
+ * tell us. Hence, treat this as reason to fall back, just to be
+ * sure. */
+ if (r == 0) {
+ /* Everything fit in, yay! */
+ (void) fd_nonblock(pipefds[0], false);
+
+ return TAKE_FD(pipefds[0]);
+ }
+
+ /* Things didn't fit in. But we read data into the pipe, let's remember that, so that
+ * when writing the new file we incorporate this first. */
+ copy_fd = TAKE_FD(pipefds[0]);
+ }
+ }
+ }
+
+ /* If we have reason to believe this will fit fine in /tmp, then use that as first fallback. */
+ if ((!S_ISREG(st.st_mode) || st.st_size < DATA_FD_TMP_LIMIT) &&
+ (DATA_FD_MEMORY_LIMIT + remains_size) < DATA_FD_TMP_LIMIT) {
+ off_t f;
+
+ tmp_fd = open_tmpfile_unlinkable(NULL /* NULL as directory means /tmp */, O_RDWR|O_CLOEXEC);
+ if (tmp_fd < 0)
+ return tmp_fd;
+
+ if (copy_fd >= 0) {
+ /* If we tried a memfd/pipe first and it ended up being too large, then copy this into the
+ * temporary file first. */
+
+ r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, 0);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+ }
+
+ if (remains_size > 0) {
+ /* If there were remaining bytes (i.e. read into memory, but not written out yet) from the
+ * failed copy operation, let's flush them out next. */
+
+ r = loop_write(tmp_fd, remains, remains_size, false);
+ if (r < 0)
+ return r;
+ }
+
+ r = copy_bytes(fd, tmp_fd, DATA_FD_TMP_LIMIT - DATA_FD_MEMORY_LIMIT - remains_size, COPY_REFLINK);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ goto finish; /* Yay, it fit in */
+
+ /* It didn't fit in. Let's not forget to use what we already used */
+ f = lseek(tmp_fd, 0, SEEK_SET);
+ if (f != 0)
+ return -errno;
+
+ CLOSE_AND_REPLACE(copy_fd, tmp_fd);
+
+ remains = mfree(remains);
+ remains_size = 0;
+ }
+
+ /* As last fallback use /var/tmp */
+ r = var_tmp_dir(&td);
+ if (r < 0)
+ return r;
+
+ tmp_fd = open_tmpfile_unlinkable(td, O_RDWR|O_CLOEXEC);
+ if (tmp_fd < 0)
+ return tmp_fd;
+
+ if (copy_fd >= 0) {
+ /* If we tried a memfd/pipe first, or a file in /tmp, and it ended up being too large, than copy this
+ * into the temporary file first. */
+ r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, COPY_REFLINK);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+ }
+
+ if (remains_size > 0) {
+ /* Then, copy in any read but not yet written bytes. */
+ r = loop_write(tmp_fd, remains, remains_size, false);
+ if (r < 0)
+ return r;
+ }
+
+ /* Copy in the rest */
+ r = copy_bytes(fd, tmp_fd, UINT64_MAX, COPY_REFLINK);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+
+finish:
+ /* Now convert the O_RDWR file descriptor into an O_RDONLY one (and as side effect seek to the beginning of the
+ * file again */
+
+ return fd_reopen(tmp_fd, O_RDONLY|O_CLOEXEC);
+}
+
+int fd_move_above_stdio(int fd) {
+ int flags, copy;
+ PROTECT_ERRNO;
+
+ /* Moves the specified file descriptor if possible out of the range [0…2], i.e. the range of
+ * stdin/stdout/stderr. If it can't be moved outside of this range the original file descriptor is
+ * returned. This call is supposed to be used for long-lasting file descriptors we allocate in our code that
+ * might get loaded into foreign code, and where we want ensure our fds are unlikely used accidentally as
+ * stdin/stdout/stderr of unrelated code.
+ *
+ * Note that this doesn't fix any real bugs, it just makes it less likely that our code will be affected by
+ * buggy code from others that mindlessly invokes 'fprintf(stderr, …' or similar in places where stderr has
+ * been closed before.
+ *
+ * This function is written in a "best-effort" and "least-impact" style. This means whenever we encounter an
+ * error we simply return the original file descriptor, and we do not touch errno. */
+
+ if (fd < 0 || fd > 2)
+ return fd;
+
+ flags = fcntl(fd, F_GETFD, 0);
+ if (flags < 0)
+ return fd;
+
+ if (flags & FD_CLOEXEC)
+ copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ else
+ copy = fcntl(fd, F_DUPFD, 3);
+ if (copy < 0)
+ return fd;
+
+ assert(copy > 2);
+
+ (void) close(fd);
+ return copy;
+}
+
+int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd) {
+
+ int fd[3] = { /* Put together an array of fds we work on */
+ original_input_fd,
+ original_output_fd,
+ original_error_fd
+ };
+
+ int r, i,
+ null_fd = -1, /* if we open /dev/null, we store the fd to it here */
+ copy_fd[3] = { -1, -1, -1 }; /* This contains all fds we duplicate here temporarily, and hence need to close at the end */
+ bool null_readable, null_writable;
+
+ /* Sets up stdin, stdout, stderr with the three file descriptors passed in. If any of the descriptors is
+ * specified as -1 it will be connected with /dev/null instead. If any of the file descriptors is passed as
+ * itself (e.g. stdin as STDIN_FILENO) it is left unmodified, but the O_CLOEXEC bit is turned off should it be
+ * on.
+ *
+ * Note that if any of the passed file descriptors are > 2 they will be closed — both on success and on
+ * failure! Thus, callers should assume that when this function returns the input fds are invalidated.
+ *
+ * Note that when this function fails stdin/stdout/stderr might remain half set up!
+ *
+ * O_CLOEXEC is turned off for all three file descriptors (which is how it should be for
+ * stdin/stdout/stderr). */
+
+ null_readable = original_input_fd < 0;
+ null_writable = original_output_fd < 0 || original_error_fd < 0;
+
+ /* First step, open /dev/null once, if we need it */
+ if (null_readable || null_writable) {
+
+ /* Let's open this with O_CLOEXEC first, and convert it to non-O_CLOEXEC when we move the fd to the final position. */
+ null_fd = open("/dev/null", (null_readable && null_writable ? O_RDWR :
+ null_readable ? O_RDONLY : O_WRONLY) | O_CLOEXEC);
+ if (null_fd < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ /* If this fd is in the 0…2 range, let's move it out of it */
+ if (null_fd < 3) {
+ int copy;
+
+ copy = fcntl(null_fd, F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
+ if (copy < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ CLOSE_AND_REPLACE(null_fd, copy);
+ }
+ }
+
+ /* Let's assemble fd[] with the fds to install in place of stdin/stdout/stderr */
+ for (i = 0; i < 3; i++) {
+
+ if (fd[i] < 0)
+ fd[i] = null_fd; /* A negative parameter means: connect this one to /dev/null */
+ else if (fd[i] != i && fd[i] < 3) {
+ /* This fd is in the 0…2 territory, but not at its intended place, move it out of there, so that we can work there. */
+ copy_fd[i] = fcntl(fd[i], F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
+ if (copy_fd[i] < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ fd[i] = copy_fd[i];
+ }
+ }
+
+ /* At this point we now have the fds to use in fd[], and they are all above the stdio range, so that we
+ * have freedom to move them around. If the fds already were at the right places then the specific fds are
+ * -1. Let's now move them to the right places. This is the point of no return. */
+ for (i = 0; i < 3; i++) {
+
+ if (fd[i] == i) {
+
+ /* fd is already in place, but let's make sure O_CLOEXEC is off */
+ r = fd_cloexec(i, false);
+ if (r < 0)
+ goto finish;
+
+ } else {
+ assert(fd[i] > 2);
+
+ if (dup2(fd[i], i) < 0) { /* Turns off O_CLOEXEC on the new fd. */
+ r = -errno;
+ goto finish;
+ }
+ }
+ }
+
+ r = 0;
+
+finish:
+ /* Close the original fds, but only if they were outside of the stdio range. Also, properly check for the same
+ * fd passed in multiple times. */
+ safe_close_above_stdio(original_input_fd);
+ if (original_output_fd != original_input_fd)
+ safe_close_above_stdio(original_output_fd);
+ if (original_error_fd != original_input_fd && original_error_fd != original_output_fd)
+ safe_close_above_stdio(original_error_fd);
+
+ /* Close the copies we moved > 2 */
+ for (i = 0; i < 3; i++)
+ safe_close(copy_fd[i]);
+
+ /* Close our null fd, if it's > 2 */
+ safe_close_above_stdio(null_fd);
+
+ return r;
+}
+
+int fd_reopen(int fd, int flags) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ int new_fd;
+
+ /* Reopens the specified fd with new flags. This is useful for convert an O_PATH fd into a regular one, or to
+ * turn O_RDWR fds into O_RDONLY fds.
+ *
+ * This doesn't work on sockets (since they cannot be open()ed, ever).
+ *
+ * This implicitly resets the file read index to 0. */
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ new_fd = open(procfs_path, flags);
+ if (new_fd < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ if (proc_mounted() == 0)
+ return -ENOSYS; /* if we have no /proc/, the concept is not implementable */
+
+ return -ENOENT;
+ }
+
+ return new_fd;
+}
+
+int read_nr_open(void) {
+ _cleanup_free_ char *nr_open = NULL;
+ int r;
+
+ /* Returns the kernel's current fd limit, either by reading it of /proc/sys if that works, or using the
+ * hard-coded default compiled-in value of current kernels (1M) if not. This call will never fail. */
+
+ r = read_one_line_file("/proc/sys/fs/nr_open", &nr_open);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read /proc/sys/fs/nr_open, ignoring: %m");
+ else {
+ int v;
+
+ r = safe_atoi(nr_open, &v);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse /proc/sys/fs/nr_open value '%s', ignoring: %m", nr_open);
+ else
+ return v;
+ }
+
+ /* If we fail, fall back to the hard-coded kernel limit of 1024 * 1024. */
+ return 1024 * 1024;
+}
diff --git a/src/basic/fd-util.h b/src/basic/fd-util.h
new file mode 100644
index 0000000..2162537
--- /dev/null
+++ b/src/basic/fd-util.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <dirent.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/socket.h>
+
+#include "macro.h"
+
+/* Make sure we can distinguish fd 0 and NULL */
+#define FD_TO_PTR(fd) INT_TO_PTR((fd)+1)
+#define PTR_TO_FD(p) (PTR_TO_INT(p)-1)
+
+int close_nointr(int fd);
+int safe_close(int fd);
+void safe_close_pair(int p[static 2]);
+
+static inline int safe_close_above_stdio(int fd) {
+ if (fd < 3) /* Don't close stdin/stdout/stderr, but still invalidate the fd by returning -1 */
+ return -1;
+
+ return safe_close(fd);
+}
+
+void close_many(const int fds[], size_t n_fd);
+
+int fclose_nointr(FILE *f);
+FILE* safe_fclose(FILE *f);
+DIR* safe_closedir(DIR *f);
+
+static inline void closep(int *fd) {
+ safe_close(*fd);
+}
+
+static inline void close_pairp(int (*p)[2]) {
+ safe_close_pair(*p);
+}
+
+static inline void fclosep(FILE **f) {
+ safe_fclose(*f);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(FILE*, pclose);
+DEFINE_TRIVIAL_CLEANUP_FUNC(DIR*, closedir);
+
+#define _cleanup_close_ _cleanup_(closep)
+#define _cleanup_fclose_ _cleanup_(fclosep)
+#define _cleanup_pclose_ _cleanup_(pclosep)
+#define _cleanup_closedir_ _cleanup_(closedirp)
+#define _cleanup_close_pair_ _cleanup_(close_pairp)
+
+int fd_nonblock(int fd, bool nonblock);
+int fd_cloexec(int fd, bool cloexec);
+
+int close_all_fds(const int except[], size_t n_except);
+
+int same_fd(int a, int b);
+
+void cmsg_close_all(struct msghdr *mh);
+
+bool fdname_is_valid(const char *s);
+
+int fd_get_path(int fd, char **ret);
+
+int move_fd(int from, int to, int cloexec);
+
+enum {
+ ACQUIRE_NO_DEV_NULL = 1 << 0,
+ ACQUIRE_NO_MEMFD = 1 << 1,
+ ACQUIRE_NO_PIPE = 1 << 2,
+ ACQUIRE_NO_TMPFILE = 1 << 3,
+ ACQUIRE_NO_REGULAR = 1 << 4,
+};
+
+int acquire_data_fd(const void *data, size_t size, unsigned flags);
+
+int fd_duplicate_data_fd(int fd);
+
+int fd_move_above_stdio(int fd);
+
+int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd);
+
+static inline int make_null_stdio(void) {
+ return rearrange_stdio(-1, -1, -1);
+}
+
+/* Like TAKE_PTR() but for file descriptors, resetting them to -1 */
+#define TAKE_FD(fd) \
+ ({ \
+ int _fd_ = (fd); \
+ (fd) = -1; \
+ _fd_; \
+ })
+
+/* Like free_and_replace(), but for file descriptors */
+#define CLOSE_AND_REPLACE(a, b) \
+ ({ \
+ int *_fdp_ = &(a); \
+ safe_close(*_fdp_); \
+ *_fdp_ = TAKE_FD(b); \
+ 0; \
+ })
+
+
+int fd_reopen(int fd, int flags);
+
+int read_nr_open(void);
diff --git a/src/basic/fileio.c b/src/basic/fileio.c
new file mode 100644
index 0000000..973756c
--- /dev/null
+++ b/src/basic/fileio.c
@@ -0,0 +1,1320 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hexdecoct.h"
+#include "log.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+
+#define READ_FULL_BYTES_MAX (4U*1024U*1024U)
+
+int fopen_unlocked(const char *path, const char *options, FILE **ret) {
+ assert(ret);
+
+ FILE *f = fopen(path, options);
+ if (!f)
+ return -errno;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ *ret = f;
+ return 0;
+}
+
+int fdopen_unlocked(int fd, const char *options, FILE **ret) {
+ assert(ret);
+
+ FILE *f = fdopen(fd, options);
+ if (!f)
+ return -errno;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ *ret = f;
+ return 0;
+}
+
+int take_fdopen_unlocked(int *fd, const char *options, FILE **ret) {
+ int r;
+
+ assert(fd);
+
+ r = fdopen_unlocked(*fd, options, ret);
+ if (r < 0)
+ return r;
+
+ *fd = -1;
+
+ return 0;
+}
+
+FILE* take_fdopen(int *fd, const char *options) {
+ assert(fd);
+
+ FILE *f = fdopen(*fd, options);
+ if (!f)
+ return NULL;
+
+ *fd = -1;
+
+ return f;
+}
+
+DIR* take_fdopendir(int *dfd) {
+ assert(dfd);
+
+ DIR *d = fdopendir(*dfd);
+ if (!d)
+ return NULL;
+
+ *dfd = -1;
+
+ return d;
+}
+
+FILE* open_memstream_unlocked(char **ptr, size_t *sizeloc) {
+ FILE *f = open_memstream(ptr, sizeloc);
+ if (!f)
+ return NULL;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ return f;
+}
+
+FILE* fmemopen_unlocked(void *buf, size_t size, const char *mode) {
+ FILE *f = fmemopen(buf, size, mode);
+ if (!f)
+ return NULL;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ return f;
+}
+
+int write_string_stream_ts(
+ FILE *f,
+ const char *line,
+ WriteStringFileFlags flags,
+ const struct timespec *ts) {
+
+ bool needs_nl;
+ int r, fd;
+
+ assert(f);
+ assert(line);
+
+ if (ferror(f))
+ return -EIO;
+
+ if (ts) {
+ /* If we shall set the timestamp we need the fd. But fmemopen() streams generally don't have
+ * an fd. Let's fail early in that case. */
+ fd = fileno(f);
+ if (fd < 0)
+ return -EBADF;
+ }
+
+ needs_nl = !(flags & WRITE_STRING_FILE_AVOID_NEWLINE) && !endswith(line, "\n");
+
+ if (needs_nl && (flags & WRITE_STRING_FILE_DISABLE_BUFFER)) {
+ /* If STDIO buffering was disabled, then let's append the newline character to the string itself, so
+ * that the write goes out in one go, instead of two */
+
+ line = strjoina(line, "\n");
+ needs_nl = false;
+ }
+
+ if (fputs(line, f) == EOF)
+ return -errno;
+
+ if (needs_nl)
+ if (fputc('\n', f) == EOF)
+ return -errno;
+
+ if (flags & WRITE_STRING_FILE_SYNC)
+ r = fflush_sync_and_check(f);
+ else
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ if (ts) {
+ const struct timespec twice[2] = {*ts, *ts};
+
+ if (futimens(fd, twice) < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int write_string_file_atomic(
+ const char *fn,
+ const char *line,
+ WriteStringFileFlags flags,
+ const struct timespec *ts) {
+
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(fn);
+ assert(line);
+
+ /* Note that we'd really like to use O_TMPFILE here, but can't really, since we want replacement
+ * semantics here, and O_TMPFILE can't offer that. i.e. rename() replaces but linkat() doesn't. */
+
+ r = fopen_temporary(fn, &f, &p);
+ if (r < 0)
+ return r;
+
+ r = write_string_stream_ts(f, line, flags, ts);
+ if (r < 0)
+ goto fail;
+
+ r = fchmod_umask(fileno(f), FLAGS_SET(flags, WRITE_STRING_FILE_MODE_0600) ? 0600 : 0644);
+ if (r < 0)
+ goto fail;
+
+ if (rename(p, fn) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (FLAGS_SET(flags, WRITE_STRING_FILE_SYNC)) {
+ /* Sync the rename, too */
+ r = fsync_directory_of_file(fileno(f));
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(p);
+ return r;
+}
+
+int write_string_file_ts(
+ const char *fn,
+ const char *line,
+ WriteStringFileFlags flags,
+ const struct timespec *ts) {
+
+ _cleanup_fclose_ FILE *f = NULL;
+ int q, r, fd;
+
+ assert(fn);
+ assert(line);
+
+ /* We don't know how to verify whether the file contents was already on-disk. */
+ assert(!((flags & WRITE_STRING_FILE_VERIFY_ON_FAILURE) && (flags & WRITE_STRING_FILE_SYNC)));
+
+ if (flags & WRITE_STRING_FILE_MKDIR_0755) {
+ r = mkdir_parents(fn, 0755);
+ if (r < 0)
+ return r;
+ }
+
+ if (flags & WRITE_STRING_FILE_ATOMIC) {
+ assert(flags & WRITE_STRING_FILE_CREATE);
+
+ r = write_string_file_atomic(fn, line, flags, ts);
+ if (r < 0)
+ goto fail;
+
+ return r;
+ } else
+ assert(!ts);
+
+ /* We manually build our own version of fopen(..., "we") that works without O_CREAT and with O_NOFOLLOW if needed. */
+ fd = open(fn, O_WRONLY|O_CLOEXEC|O_NOCTTY |
+ (FLAGS_SET(flags, WRITE_STRING_FILE_NOFOLLOW) ? O_NOFOLLOW : 0) |
+ (FLAGS_SET(flags, WRITE_STRING_FILE_CREATE) ? O_CREAT : 0) |
+ (FLAGS_SET(flags, WRITE_STRING_FILE_TRUNCATE) ? O_TRUNC : 0),
+ (FLAGS_SET(flags, WRITE_STRING_FILE_MODE_0600) ? 0600 : 0666));
+ if (fd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ r = fdopen_unlocked(fd, "w", &f);
+ if (r < 0) {
+ safe_close(fd);
+ goto fail;
+ }
+
+ if (flags & WRITE_STRING_FILE_DISABLE_BUFFER)
+ setvbuf(f, NULL, _IONBF, 0);
+
+ r = write_string_stream_ts(f, line, flags, ts);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ if (!(flags & WRITE_STRING_FILE_VERIFY_ON_FAILURE))
+ return r;
+
+ f = safe_fclose(f);
+
+ /* OK, the operation failed, but let's see if the right
+ * contents in place already. If so, eat up the error. */
+
+ q = verify_file(fn, line, !(flags & WRITE_STRING_FILE_AVOID_NEWLINE));
+ if (q <= 0)
+ return r;
+
+ return 0;
+}
+
+int write_string_filef(
+ const char *fn,
+ WriteStringFileFlags flags,
+ const char *format, ...) {
+
+ _cleanup_free_ char *p = NULL;
+ va_list ap;
+ int r;
+
+ va_start(ap, format);
+ r = vasprintf(&p, format, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return -ENOMEM;
+
+ return write_string_file(fn, p, flags);
+}
+
+int read_one_line_file(const char *fn, char **line) {
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(fn);
+ assert(line);
+
+ r = fopen_unlocked(fn, "re", &f);
+ if (r < 0)
+ return r;
+
+ return read_line(f, LONG_LINE_MAX, line);
+}
+
+int verify_file(const char *fn, const char *blob, bool accept_extra_nl) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *buf = NULL;
+ size_t l, k;
+ int r;
+
+ assert(fn);
+ assert(blob);
+
+ l = strlen(blob);
+
+ if (accept_extra_nl && endswith(blob, "\n"))
+ accept_extra_nl = false;
+
+ buf = malloc(l + accept_extra_nl + 1);
+ if (!buf)
+ return -ENOMEM;
+
+ r = fopen_unlocked(fn, "re", &f);
+ if (r < 0)
+ return r;
+
+ /* We try to read one byte more than we need, so that we know whether we hit eof */
+ errno = 0;
+ k = fread(buf, 1, l + accept_extra_nl + 1, f);
+ if (ferror(f))
+ return errno_or_else(EIO);
+
+ if (k != l && k != l + accept_extra_nl)
+ return 0;
+ if (memcmp(buf, blob, l) != 0)
+ return 0;
+ if (k > l && buf[l] != '\n')
+ return 0;
+
+ return 1;
+}
+
+int read_full_virtual_file(const char *filename, char **ret_contents, size_t *ret_size) {
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+ size_t n, size;
+ int n_retries;
+ char *p;
+
+ assert(ret_contents);
+
+ /* Virtual filesystems such as sysfs or procfs use kernfs, and kernfs can work
+ * with two sorts of virtual files. One sort uses "seq_file", and the results of
+ * the first read are buffered for the second read. The other sort uses "raw"
+ * reads which always go direct to the device. In the latter case, the content of
+ * the virtual file must be retrieved with a single read otherwise a second read
+ * might get the new value instead of finding EOF immediately. That's the reason
+ * why the usage of fread(3) is prohibited in this case as it always performs a
+ * second call to read(2) looking for EOF. See issue 13585. */
+
+ fd = open(filename, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ /* Start size for files in /proc which usually report a file size of 0. */
+ size = LINE_MAX / 2;
+
+ /* Limit the number of attempts to read the number of bytes returned by fstat(). */
+ n_retries = 3;
+
+ for (;;) {
+ if (n_retries <= 0)
+ return -EIO;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISREG(st.st_mode))
+ return -EBADF;
+
+ /* Be prepared for files from /proc which generally report a file size of 0. */
+ if (st.st_size > 0) {
+ size = st.st_size;
+ n_retries--;
+ } else
+ size = size * 2;
+
+ if (size > READ_FULL_BYTES_MAX)
+ return -E2BIG;
+
+ p = realloc(buf, size + 1);
+ if (!p)
+ return -ENOMEM;
+ buf = TAKE_PTR(p);
+
+ for (;;) {
+ ssize_t k;
+
+ /* Read one more byte so we can detect whether the content of the
+ * file has already changed or the guessed size for files from /proc
+ * wasn't large enough . */
+ k = read(fd, buf, size + 1);
+ if (k >= 0) {
+ n = k;
+ break;
+ }
+
+ if (errno != EINTR)
+ return -errno;
+ }
+
+ /* Consider a short read as EOF */
+ if (n <= size)
+ break;
+
+ /* Hmm... either we read too few bytes from /proc or less likely the content
+ * of the file might have been changed (and is now bigger) while we were
+ * processing, let's try again either with a bigger guessed size or the new
+ * file size. */
+
+ if (lseek(fd, 0, SEEK_SET) < 0)
+ return -errno;
+ }
+
+ if (n < size) {
+ p = realloc(buf, n + 1);
+ if (!p)
+ return -ENOMEM;
+ buf = TAKE_PTR(p);
+ }
+
+ if (!ret_size) {
+ /* Safety check: if the caller doesn't want to know the size of what we
+ * just read it will rely on the trailing NUL byte. But if there's an
+ * embedded NUL byte, then we should refuse operation as otherwise
+ * there'd be ambiguity about what we just read. */
+
+ if (memchr(buf, 0, n))
+ return -EBADMSG;
+ } else
+ *ret_size = n;
+
+ buf[n] = 0;
+ *ret_contents = TAKE_PTR(buf);
+
+ return 0;
+}
+
+int read_full_stream_full(
+ FILE *f,
+ const char *filename,
+ ReadFullFileFlags flags,
+ char **ret_contents,
+ size_t *ret_size) {
+
+ _cleanup_free_ char *buf = NULL;
+ struct stat st;
+ size_t n, n_next, l;
+ int fd, r;
+
+ assert(f);
+ assert(ret_contents);
+ assert(!FLAGS_SET(flags, READ_FULL_FILE_UNBASE64 | READ_FULL_FILE_UNHEX));
+
+ n_next = LINE_MAX; /* Start size */
+
+ fd = fileno(f);
+ if (fd >= 0) { /* If the FILE* object is backed by an fd (as opposed to memory or such, see fmemopen()), let's
+ * optimize our buffering */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (S_ISREG(st.st_mode)) {
+
+ /* Safety check */
+ if (st.st_size > READ_FULL_BYTES_MAX)
+ return -E2BIG;
+
+ /* Start with the right file size. Note that we increase the size
+ * to read here by one, so that the first read attempt already
+ * makes us notice the EOF. */
+ if (st.st_size > 0)
+ n_next = st.st_size + 1;
+
+ if (flags & READ_FULL_FILE_WARN_WORLD_READABLE)
+ (void) warn_file_is_world_accessible(filename, &st, NULL, 0);
+ }
+ }
+
+ n = l = 0;
+ for (;;) {
+ char *t;
+ size_t k;
+
+ if (flags & READ_FULL_FILE_SECURE) {
+ t = malloc(n_next + 1);
+ if (!t) {
+ r = -ENOMEM;
+ goto finalize;
+ }
+ memcpy_safe(t, buf, n);
+ explicit_bzero_safe(buf, n);
+ buf = mfree(buf);
+ } else {
+ t = realloc(buf, n_next + 1);
+ if (!t)
+ return -ENOMEM;
+ }
+
+ buf = t;
+ n = n_next;
+
+ errno = 0;
+ k = fread(buf + l, 1, n - l, f);
+
+ assert(k <= n - l);
+ l += k;
+
+ if (ferror(f)) {
+ r = errno_or_else(EIO);
+ goto finalize;
+ }
+ if (feof(f))
+ break;
+
+ assert(k > 0); /* we can't have read zero bytes because that would have been EOF */
+
+ /* Safety check */
+ if (n >= READ_FULL_BYTES_MAX) {
+ r = -E2BIG;
+ goto finalize;
+ }
+
+ n_next = MIN(n * 2, READ_FULL_BYTES_MAX);
+ }
+
+ if (flags & (READ_FULL_FILE_UNBASE64 | READ_FULL_FILE_UNHEX)) {
+ _cleanup_free_ void *decoded = NULL;
+ size_t decoded_size;
+
+ buf[l++] = 0;
+ if (flags & READ_FULL_FILE_UNBASE64)
+ r = unbase64mem_full(buf, l, flags & READ_FULL_FILE_SECURE, &decoded, &decoded_size);
+ else
+ r = unhexmem_full(buf, l, flags & READ_FULL_FILE_SECURE, &decoded, &decoded_size);
+ if (r < 0)
+ goto finalize;
+
+ if (flags & READ_FULL_FILE_SECURE)
+ explicit_bzero_safe(buf, n);
+ free_and_replace(buf, decoded);
+ n = l = decoded_size;
+ }
+
+ if (!ret_size) {
+ /* Safety check: if the caller doesn't want to know the size of what we just read it will rely on the
+ * trailing NUL byte. But if there's an embedded NUL byte, then we should refuse operation as otherwise
+ * there'd be ambiguity about what we just read. */
+
+ if (memchr(buf, 0, l)) {
+ r = -EBADMSG;
+ goto finalize;
+ }
+ }
+
+ buf[l] = 0;
+ *ret_contents = TAKE_PTR(buf);
+
+ if (ret_size)
+ *ret_size = l;
+
+ return 0;
+
+finalize:
+ if (flags & READ_FULL_FILE_SECURE)
+ explicit_bzero_safe(buf, n);
+
+ return r;
+}
+
+int read_full_file_full(
+ int dir_fd,
+ const char *filename,
+ ReadFullFileFlags flags,
+ const char *bind_name,
+ char **contents, size_t *size) {
+
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(filename);
+ assert(contents);
+
+ r = xfopenat(dir_fd, filename, "re", 0, &f);
+ if (r < 0) {
+ _cleanup_close_ int dfd = -1, sk = -1;
+ union sockaddr_union sa;
+
+ /* ENXIO is what Linux returns if we open a node that is an AF_UNIX socket */
+ if (r != -ENXIO)
+ return r;
+
+ /* If this is enabled, let's try to connect to it */
+ if (!FLAGS_SET(flags, READ_FULL_FILE_CONNECT_SOCKET))
+ return -ENXIO;
+
+ if (dir_fd == AT_FDCWD)
+ r = sockaddr_un_set_path(&sa.un, filename);
+ else {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+
+ /* If we shall operate relative to some directory, then let's use O_PATH first to
+ * open the socket inode, and then connect to it via /proc/self/fd/. We have to do
+ * this since there's not connectat() that takes a directory fd as first arg. */
+
+ dfd = openat(dir_fd, filename, O_PATH|O_CLOEXEC);
+ if (dfd < 0)
+ return -errno;
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", dfd);
+ r = sockaddr_un_set_path(&sa.un, procfs_path);
+ }
+ if (r < 0)
+ return r;
+
+ sk = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
+ if (sk < 0)
+ return -errno;
+
+ if (bind_name) {
+ /* If the caller specified a socket name to bind to, do so before connecting. This is
+ * useful to communicate some minor, short meta-information token from the client to
+ * the server. */
+ union sockaddr_union bsa;
+
+ r = sockaddr_un_set_path(&bsa.un, bind_name);
+ if (r < 0)
+ return r;
+
+ if (bind(sk, &bsa.sa, r) < 0)
+ return r;
+ }
+
+ if (connect(sk, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0)
+ return errno == ENOTSOCK ? -ENXIO : -errno; /* propagate original error if this is
+ * not a socket after all */
+
+ if (shutdown(sk, SHUT_WR) < 0)
+ return -errno;
+
+ f = fdopen(sk, "r");
+ if (!f)
+ return -errno;
+
+ TAKE_FD(sk);
+ }
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ return read_full_stream_full(f, filename, flags, contents, size);
+}
+
+int executable_is_script(const char *path, char **interpreter) {
+ _cleanup_free_ char *line = NULL;
+ size_t len;
+ char *ans;
+ int r;
+
+ assert(path);
+
+ r = read_one_line_file(path, &line);
+ if (r == -ENOBUFS) /* First line overly long? if so, then it's not a script */
+ return 0;
+ if (r < 0)
+ return r;
+
+ if (!startswith(line, "#!"))
+ return 0;
+
+ ans = strstrip(line + 2);
+ len = strcspn(ans, " \t");
+
+ if (len == 0)
+ return 0;
+
+ ans = strndup(ans, len);
+ if (!ans)
+ return -ENOMEM;
+
+ *interpreter = ans;
+ return 1;
+}
+
+/**
+ * Retrieve one field from a file like /proc/self/status. pattern
+ * should not include whitespace or the delimiter (':'). pattern matches only
+ * the beginning of a line. Whitespace before ':' is skipped. Whitespace and
+ * zeros after the ':' will be skipped. field must be freed afterwards.
+ * terminator specifies the terminating characters of the field value (not
+ * included in the value).
+ */
+int get_proc_field(const char *filename, const char *pattern, const char *terminator, char **field) {
+ _cleanup_free_ char *status = NULL;
+ char *t, *f;
+ size_t len;
+ int r;
+
+ assert(terminator);
+ assert(filename);
+ assert(pattern);
+ assert(field);
+
+ r = read_full_virtual_file(filename, &status, NULL);
+ if (r < 0)
+ return r;
+
+ t = status;
+
+ do {
+ bool pattern_ok;
+
+ do {
+ t = strstr(t, pattern);
+ if (!t)
+ return -ENOENT;
+
+ /* Check that pattern occurs in beginning of line. */
+ pattern_ok = (t == status || t[-1] == '\n');
+
+ t += strlen(pattern);
+
+ } while (!pattern_ok);
+
+ t += strspn(t, " \t");
+ if (!*t)
+ return -ENOENT;
+
+ } while (*t != ':');
+
+ t++;
+
+ if (*t) {
+ t += strspn(t, " \t");
+
+ /* Also skip zeros, because when this is used for
+ * capabilities, we don't want the zeros. This way the
+ * same capability set always maps to the same string,
+ * irrespective of the total capability set size. For
+ * other numbers it shouldn't matter. */
+ t += strspn(t, "0");
+ /* Back off one char if there's nothing but whitespace
+ and zeros */
+ if (!*t || isspace(*t))
+ t--;
+ }
+
+ len = strcspn(t, terminator);
+
+ f = strndup(t, len);
+ if (!f)
+ return -ENOMEM;
+
+ *field = f;
+ return 0;
+}
+
+DIR *xopendirat(int fd, const char *name, int flags) {
+ int nfd;
+ DIR *d;
+
+ assert(!(flags & O_CREAT));
+
+ nfd = openat(fd, name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|flags, 0);
+ if (nfd < 0)
+ return NULL;
+
+ d = fdopendir(nfd);
+ if (!d) {
+ safe_close(nfd);
+ return NULL;
+ }
+
+ return d;
+}
+
+static int mode_to_flags(const char *mode) {
+ const char *p;
+ int flags;
+
+ if ((p = startswith(mode, "r+")))
+ flags = O_RDWR;
+ else if ((p = startswith(mode, "r")))
+ flags = O_RDONLY;
+ else if ((p = startswith(mode, "w+")))
+ flags = O_RDWR|O_CREAT|O_TRUNC;
+ else if ((p = startswith(mode, "w")))
+ flags = O_WRONLY|O_CREAT|O_TRUNC;
+ else if ((p = startswith(mode, "a+")))
+ flags = O_RDWR|O_CREAT|O_APPEND;
+ else if ((p = startswith(mode, "a")))
+ flags = O_WRONLY|O_CREAT|O_APPEND;
+ else
+ return -EINVAL;
+
+ for (; *p != 0; p++) {
+
+ switch (*p) {
+
+ case 'e':
+ flags |= O_CLOEXEC;
+ break;
+
+ case 'x':
+ flags |= O_EXCL;
+ break;
+
+ case 'm':
+ /* ignore this here, fdopen() might care later though */
+ break;
+
+ case 'c': /* not sure what to do about this one */
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return flags;
+}
+
+int xfopenat(int dir_fd, const char *path, const char *mode, int flags, FILE **ret) {
+ FILE *f;
+
+ /* A combination of fopen() with openat() */
+
+ if (dir_fd == AT_FDCWD && flags == 0) {
+ f = fopen(path, mode);
+ if (!f)
+ return -errno;
+ } else {
+ int fd, mode_flags;
+
+ mode_flags = mode_to_flags(mode);
+ if (mode_flags < 0)
+ return mode_flags;
+
+ fd = openat(dir_fd, path, mode_flags | flags);
+ if (fd < 0)
+ return -errno;
+
+ f = fdopen(fd, mode);
+ if (!f) {
+ safe_close(fd);
+ return -errno;
+ }
+ }
+
+ *ret = f;
+ return 0;
+}
+
+static int search_and_fopen_internal(const char *path, const char *mode, const char *root, char **search, FILE **_f) {
+ char **i;
+
+ assert(path);
+ assert(mode);
+ assert(_f);
+
+ if (!path_strv_resolve_uniq(search, root))
+ return -ENOMEM;
+
+ STRV_FOREACH(i, search) {
+ _cleanup_free_ char *p = NULL;
+ FILE *f;
+
+ p = path_join(root, *i, path);
+ if (!p)
+ return -ENOMEM;
+
+ f = fopen(p, mode);
+ if (f) {
+ *_f = f;
+ return 0;
+ }
+
+ if (errno != ENOENT)
+ return -errno;
+ }
+
+ return -ENOENT;
+}
+
+int search_and_fopen(const char *path, const char *mode, const char *root, const char **search, FILE **_f) {
+ _cleanup_strv_free_ char **copy = NULL;
+
+ assert(path);
+ assert(mode);
+ assert(_f);
+
+ if (path_is_absolute(path)) {
+ FILE *f;
+
+ f = fopen(path, mode);
+ if (f) {
+ *_f = f;
+ return 0;
+ }
+
+ return -errno;
+ }
+
+ copy = strv_copy((char**) search);
+ if (!copy)
+ return -ENOMEM;
+
+ return search_and_fopen_internal(path, mode, root, copy, _f);
+}
+
+int search_and_fopen_nulstr(const char *path, const char *mode, const char *root, const char *search, FILE **_f) {
+ _cleanup_strv_free_ char **s = NULL;
+
+ if (path_is_absolute(path)) {
+ FILE *f;
+
+ f = fopen(path, mode);
+ if (f) {
+ *_f = f;
+ return 0;
+ }
+
+ return -errno;
+ }
+
+ s = strv_split_nulstr(search);
+ if (!s)
+ return -ENOMEM;
+
+ return search_and_fopen_internal(path, mode, root, s, _f);
+}
+
+int chase_symlinks_and_fopen_unlocked(
+ const char *path,
+ const char *root,
+ unsigned chase_flags,
+ const char *open_flags,
+ FILE **ret_file,
+ char **ret_path) {
+
+ _cleanup_close_ int fd = -1;
+ _cleanup_free_ char *final_path = NULL;
+ int mode_flags, r;
+ FILE *f;
+
+ assert(path);
+ assert(open_flags);
+ assert(ret_file);
+
+ mode_flags = mode_to_flags(open_flags);
+ if (mode_flags < 0)
+ return mode_flags;
+
+ fd = chase_symlinks_and_open(path, root, chase_flags, mode_flags, ret_path ? &final_path : NULL);
+ if (fd < 0)
+ return fd;
+
+ r = fdopen_unlocked(fd, open_flags, &f);
+ if (r < 0)
+ return r;
+ TAKE_FD(fd);
+
+ *ret_file = f;
+ if (ret_path)
+ *ret_path = TAKE_PTR(final_path);
+ return 0;
+}
+
+int fflush_and_check(FILE *f) {
+ assert(f);
+
+ errno = 0;
+ fflush(f);
+
+ if (ferror(f))
+ return errno_or_else(EIO);
+
+ return 0;
+}
+
+int fflush_sync_and_check(FILE *f) {
+ int r, fd;
+
+ assert(f);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ /* Not all file streams have an fd associated (think: fmemopen()), let's handle this gracefully and
+ * assume that in that case we need no explicit syncing */
+ fd = fileno(f);
+ if (fd < 0)
+ return 0;
+
+ if (fsync(fd) < 0)
+ return -errno;
+
+ r = fsync_directory_of_file(fd);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int write_timestamp_file_atomic(const char *fn, usec_t n) {
+ char ln[DECIMAL_STR_MAX(n)+2];
+
+ /* Creates a "timestamp" file, that contains nothing but a
+ * usec_t timestamp, formatted in ASCII. */
+
+ if (n <= 0 || n >= USEC_INFINITY)
+ return -ERANGE;
+
+ xsprintf(ln, USEC_FMT "\n", n);
+
+ return write_string_file(fn, ln, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC);
+}
+
+int read_timestamp_file(const char *fn, usec_t *ret) {
+ _cleanup_free_ char *ln = NULL;
+ uint64_t t;
+ int r;
+
+ r = read_one_line_file(fn, &ln);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(ln, &t);
+ if (r < 0)
+ return r;
+
+ if (t <= 0 || t >= (uint64_t) USEC_INFINITY)
+ return -ERANGE;
+
+ *ret = (usec_t) t;
+ return 0;
+}
+
+int fputs_with_space(FILE *f, const char *s, const char *separator, bool *space) {
+ int r;
+
+ assert(s);
+
+ /* Outputs the specified string with fputs(), but optionally prefixes it with a separator. The *space parameter
+ * when specified shall initially point to a boolean variable initialized to false. It is set to true after the
+ * first invocation. This call is supposed to be use in loops, where a separator shall be inserted between each
+ * element, but not before the first one. */
+
+ if (!f)
+ f = stdout;
+
+ if (space) {
+ if (!separator)
+ separator = " ";
+
+ if (*space) {
+ r = fputs(separator, f);
+ if (r < 0)
+ return r;
+ }
+
+ *space = true;
+ }
+
+ return fputs(s, f);
+}
+
+/* A bitmask of the EOL markers we know */
+typedef enum EndOfLineMarker {
+ EOL_NONE = 0,
+ EOL_ZERO = 1 << 0, /* \0 (aka NUL) */
+ EOL_TEN = 1 << 1, /* \n (aka NL, aka LF) */
+ EOL_THIRTEEN = 1 << 2, /* \r (aka CR) */
+} EndOfLineMarker;
+
+static EndOfLineMarker categorize_eol(char c, ReadLineFlags flags) {
+
+ if (!IN_SET(flags, READ_LINE_ONLY_NUL)) {
+ if (c == '\n')
+ return EOL_TEN;
+ if (c == '\r')
+ return EOL_THIRTEEN;
+ }
+
+ if (c == '\0')
+ return EOL_ZERO;
+
+ return EOL_NONE;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(FILE*, funlockfile);
+
+int read_line_full(FILE *f, size_t limit, ReadLineFlags flags, char **ret) {
+ size_t n = 0, allocated = 0, count = 0;
+ _cleanup_free_ char *buffer = NULL;
+ int r;
+
+ assert(f);
+
+ /* Something like a bounded version of getline().
+ *
+ * Considers EOF, \n, \r and \0 end of line delimiters (or combinations of these), and does not include these
+ * delimiters in the string returned. Specifically, recognizes the following combinations of markers as line
+ * endings:
+ *
+ * • \n (UNIX)
+ * • \r (old MacOS)
+ * • \0 (C strings)
+ * • \n\0
+ * • \r\0
+ * • \r\n (Windows)
+ * • \n\r
+ * • \r\n\0
+ * • \n\r\0
+ *
+ * Returns the number of bytes read from the files (i.e. including delimiters — this hence usually differs from
+ * the number of characters in the returned string). When EOF is hit, 0 is returned.
+ *
+ * The input parameter limit is the maximum numbers of characters in the returned string, i.e. excluding
+ * delimiters. If the limit is hit we fail and return -ENOBUFS.
+ *
+ * If a line shall be skipped ret may be initialized as NULL. */
+
+ if (ret) {
+ if (!GREEDY_REALLOC(buffer, allocated, 1))
+ return -ENOMEM;
+ }
+
+ {
+ _unused_ _cleanup_(funlockfilep) FILE *flocked = f;
+ EndOfLineMarker previous_eol = EOL_NONE;
+ flockfile(f);
+
+ for (;;) {
+ EndOfLineMarker eol;
+ char c;
+
+ if (n >= limit)
+ return -ENOBUFS;
+
+ if (count >= INT_MAX) /* We couldn't return the counter anymore as "int", hence refuse this */
+ return -ENOBUFS;
+
+ r = safe_fgetc(f, &c);
+ if (r < 0)
+ return r;
+ if (r == 0) /* EOF is definitely EOL */
+ break;
+
+ eol = categorize_eol(c, flags);
+
+ if (FLAGS_SET(previous_eol, EOL_ZERO) ||
+ (eol == EOL_NONE && previous_eol != EOL_NONE) ||
+ (eol != EOL_NONE && (previous_eol & eol) != 0)) {
+ /* Previous char was a NUL? This is not an EOL, but the previous char was? This type of
+ * EOL marker has been seen right before? In either of these three cases we are
+ * done. But first, let's put this character back in the queue. (Note that we have to
+ * cast this to (unsigned char) here as ungetc() expects a positive 'int', and if we
+ * are on an architecture where 'char' equals 'signed char' we need to ensure we don't
+ * pass a negative value here. That said, to complicate things further ungetc() is
+ * actually happy with most negative characters and implicitly casts them back to
+ * positive ones as needed, except for \xff (aka -1, aka EOF), which it refuses. What a
+ * godawful API!) */
+ assert_se(ungetc((unsigned char) c, f) != EOF);
+ break;
+ }
+
+ count++;
+
+ if (eol != EOL_NONE) {
+ /* If we are on a tty, we can't shouldn't wait for more input, because that
+ * generally means waiting for the user, interactively. In the case of a TTY
+ * we expect only \n as the single EOL marker, so we are in the lucky
+ * position that there is no need to wait. We check this condition last, to
+ * avoid isatty() check if not necessary. */
+
+ if ((flags & (READ_LINE_IS_A_TTY|READ_LINE_NOT_A_TTY)) == 0) {
+ int fd;
+
+ fd = fileno(f);
+ if (fd < 0) /* Maybe an fmemopen() stream? Handle this gracefully,
+ * and don't call isatty() on an invalid fd */
+ flags |= READ_LINE_NOT_A_TTY;
+ else
+ flags |= isatty(fd) ? READ_LINE_IS_A_TTY : READ_LINE_NOT_A_TTY;
+ }
+ if (FLAGS_SET(flags, READ_LINE_IS_A_TTY))
+ break;
+ }
+
+ if (eol != EOL_NONE) {
+ previous_eol |= eol;
+ continue;
+ }
+
+ if (ret) {
+ if (!GREEDY_REALLOC(buffer, allocated, n + 2))
+ return -ENOMEM;
+
+ buffer[n] = c;
+ }
+
+ n++;
+ }
+ }
+
+ if (ret) {
+ buffer[n] = 0;
+
+ *ret = TAKE_PTR(buffer);
+ }
+
+ return (int) count;
+}
+
+int safe_fgetc(FILE *f, char *ret) {
+ int k;
+
+ assert(f);
+
+ /* A safer version of plain fgetc(): let's propagate the error that happened while reading as such, and
+ * separate the EOF condition from the byte read, to avoid those confusion signed/unsigned issues fgetc()
+ * has. */
+
+ errno = 0;
+ k = fgetc(f);
+ if (k == EOF) {
+ if (ferror(f))
+ return errno_or_else(EIO);
+
+ if (ret)
+ *ret = 0;
+
+ return 0;
+ }
+
+ if (ret)
+ *ret = k;
+
+ return 1;
+}
+
+int warn_file_is_world_accessible(const char *filename, struct stat *st, const char *unit, unsigned line) {
+ struct stat _st;
+
+ if (!filename)
+ return 0;
+
+ if (!st) {
+ if (stat(filename, &_st) < 0)
+ return -errno;
+ st = &_st;
+ }
+
+ if ((st->st_mode & S_IRWXO) == 0)
+ return 0;
+
+ if (unit)
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "%s has %04o mode that is too permissive, please adjust the ownership and access mode.",
+ filename, st->st_mode & 07777);
+ else
+ log_warning("%s has %04o mode that is too permissive, please adjust the ownership and access mode.",
+ filename, st->st_mode & 07777);
+ return 0;
+}
+
+int sync_rights(int from, int to) {
+ struct stat st;
+
+ if (fstat(from, &st) < 0)
+ return -errno;
+
+ return fchmod_and_chown(to, st.st_mode & 07777, st.st_uid, st.st_gid);
+}
+
+int rename_and_apply_smack_floor_label(const char *from, const char *to) {
+ int r = 0;
+ if (rename(from, to) < 0)
+ return -errno;
+
+#ifdef SMACK_RUN_LABEL
+ r = mac_smack_apply(to, SMACK_ATTR_ACCESS, SMACK_FLOOR_LABEL);
+ if (r < 0)
+ return r;
+#endif
+ return r;
+}
diff --git a/src/basic/fileio.h b/src/basic/fileio.h
new file mode 100644
index 0000000..0886354
--- /dev/null
+++ b/src/basic/fileio.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <dirent.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/fcntl.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "time-util.h"
+
+#define LONG_LINE_MAX (1U*1024U*1024U)
+
+typedef enum {
+ WRITE_STRING_FILE_CREATE = 1 << 0,
+ WRITE_STRING_FILE_TRUNCATE = 1 << 1,
+ WRITE_STRING_FILE_ATOMIC = 1 << 2,
+ WRITE_STRING_FILE_AVOID_NEWLINE = 1 << 3,
+ WRITE_STRING_FILE_VERIFY_ON_FAILURE = 1 << 4,
+ WRITE_STRING_FILE_SYNC = 1 << 5,
+ WRITE_STRING_FILE_DISABLE_BUFFER = 1 << 6,
+ WRITE_STRING_FILE_NOFOLLOW = 1 << 7,
+ WRITE_STRING_FILE_MKDIR_0755 = 1 << 8,
+ WRITE_STRING_FILE_MODE_0600 = 1 << 9,
+
+ /* And before you wonder, why write_string_file_atomic_label_ts() is a separate function instead of just one
+ more flag here: it's about linking: we don't want to pull -lselinux into all users of write_string_file()
+ and friends. */
+
+} WriteStringFileFlags;
+
+typedef enum {
+ READ_FULL_FILE_SECURE = 1 << 0, /* erase any buffers we employ internally, after use */
+ READ_FULL_FILE_UNBASE64 = 1 << 1, /* base64 decode what we read */
+ READ_FULL_FILE_UNHEX = 1 << 2, /* hex decode what we read */
+ READ_FULL_FILE_WARN_WORLD_READABLE = 1 << 3, /* if regular file, log at LOG_WARNING level if access mode above 0700 */
+ READ_FULL_FILE_CONNECT_SOCKET = 1 << 4, /* if socket inode, connect to it and read off it */
+} ReadFullFileFlags;
+
+int fopen_unlocked(const char *path, const char *options, FILE **ret);
+int fdopen_unlocked(int fd, const char *options, FILE **ret);
+int take_fdopen_unlocked(int *fd, const char *options, FILE **ret);
+FILE* take_fdopen(int *fd, const char *options);
+DIR* take_fdopendir(int *dfd);
+FILE* open_memstream_unlocked(char **ptr, size_t *sizeloc);
+FILE* fmemopen_unlocked(void *buf, size_t size, const char *mode);
+
+int write_string_stream_ts(FILE *f, const char *line, WriteStringFileFlags flags, const struct timespec *ts);
+static inline int write_string_stream(FILE *f, const char *line, WriteStringFileFlags flags) {
+ return write_string_stream_ts(f, line, flags, NULL);
+}
+int write_string_file_ts(const char *fn, const char *line, WriteStringFileFlags flags, const struct timespec *ts);
+static inline int write_string_file(const char *fn, const char *line, WriteStringFileFlags flags) {
+ return write_string_file_ts(fn, line, flags, NULL);
+}
+
+int write_string_filef(const char *fn, WriteStringFileFlags flags, const char *format, ...) _printf_(3, 4);
+
+int read_one_line_file(const char *filename, char **line);
+int read_full_file_full(int dir_fd, const char *filename, ReadFullFileFlags flags, const char *bind_name, char **contents, size_t *size);
+static inline int read_full_file(const char *filename, char **contents, size_t *size) {
+ return read_full_file_full(AT_FDCWD, filename, 0, NULL, contents, size);
+}
+int read_full_virtual_file(const char *filename, char **ret_contents, size_t *ret_size);
+int read_full_stream_full(FILE *f, const char *filename, ReadFullFileFlags flags, char **contents, size_t *size);
+static inline int read_full_stream(FILE *f, char **contents, size_t *size) {
+ return read_full_stream_full(f, NULL, 0, contents, size);
+}
+
+int verify_file(const char *fn, const char *blob, bool accept_extra_nl);
+
+int executable_is_script(const char *path, char **interpreter);
+
+int get_proc_field(const char *filename, const char *pattern, const char *terminator, char **field);
+
+DIR *xopendirat(int dirfd, const char *name, int flags);
+int xfopenat(int dir_fd, const char *path, const char *mode, int flags, FILE **ret);
+
+int search_and_fopen(const char *path, const char *mode, const char *root, const char **search, FILE **_f);
+int search_and_fopen_nulstr(const char *path, const char *mode, const char *root, const char *search, FILE **_f);
+
+int chase_symlinks_and_fopen_unlocked(
+ const char *path,
+ const char *root,
+ unsigned chase_flags,
+ const char *open_flags,
+ FILE **ret_file,
+ char **ret_path);
+
+int fflush_and_check(FILE *f);
+int fflush_sync_and_check(FILE *f);
+
+int write_timestamp_file_atomic(const char *fn, usec_t n);
+int read_timestamp_file(const char *fn, usec_t *ret);
+
+int fputs_with_space(FILE *f, const char *s, const char *separator, bool *space);
+
+typedef enum ReadLineFlags {
+ READ_LINE_ONLY_NUL = 1 << 0,
+ READ_LINE_IS_A_TTY = 1 << 1,
+ READ_LINE_NOT_A_TTY = 1 << 2,
+} ReadLineFlags;
+
+int read_line_full(FILE *f, size_t limit, ReadLineFlags flags, char **ret);
+
+static inline int read_line(FILE *f, size_t limit, char **ret) {
+ return read_line_full(f, limit, 0, ret);
+}
+
+static inline int read_nul_string(FILE *f, size_t limit, char **ret) {
+ return read_line_full(f, limit, READ_LINE_ONLY_NUL, ret);
+}
+
+int safe_fgetc(FILE *f, char *ret);
+
+int warn_file_is_world_accessible(const char *filename, struct stat *st, const char *unit, unsigned line);
+
+int sync_rights(int from, int to);
+
+int rename_and_apply_smack_floor_label(const char *temp_path, const char *dest_path);
diff --git a/src/basic/format-util.c b/src/basic/format-util.c
new file mode 100644
index 0000000..bf23037
--- /dev/null
+++ b/src/basic/format-util.c
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "format-util.h"
+#include "memory-util.h"
+#include "stdio-util.h"
+
+assert_cc(DECIMAL_STR_MAX(int) + 1 <= IF_NAMESIZE + 1);
+char *format_ifname_full(int ifindex, char buf[static IF_NAMESIZE + 1], FormatIfnameFlag flag) {
+ /* Buffer is always cleared */
+ memzero(buf, IF_NAMESIZE + 1);
+ if (if_indextoname(ifindex, buf))
+ return buf;
+
+ if (!FLAGS_SET(flag, FORMAT_IFNAME_IFINDEX))
+ return NULL;
+
+ if (FLAGS_SET(flag, FORMAT_IFNAME_IFINDEX_WITH_PERCENT))
+ snprintf(buf, IF_NAMESIZE + 1, "%%%d", ifindex);
+ else
+ snprintf(buf, IF_NAMESIZE + 1, "%d", ifindex);
+
+ return buf;
+}
+
+char *format_bytes_full(char *buf, size_t l, uint64_t t, FormatBytesFlag flag) {
+ typedef struct {
+ const char *suffix;
+ uint64_t factor;
+ } suffix_table;
+ static const suffix_table table_iec[] = {
+ { "E", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) },
+ { "P", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) },
+ { "T", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) },
+ { "G", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) },
+ { "M", UINT64_C(1024)*UINT64_C(1024) },
+ { "K", UINT64_C(1024) },
+ }, table_si[] = {
+ { "E", UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000) },
+ { "P", UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000) },
+ { "T", UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000) },
+ { "G", UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000) },
+ { "M", UINT64_C(1000)*UINT64_C(1000) },
+ { "K", UINT64_C(1000) },
+ };
+ const suffix_table *table;
+ size_t n, i;
+
+ assert_cc(ELEMENTSOF(table_iec) == ELEMENTSOF(table_si));
+
+ if (t == (uint64_t) -1)
+ return NULL;
+
+ table = flag & FORMAT_BYTES_USE_IEC ? table_iec : table_si;
+ n = ELEMENTSOF(table_iec);
+
+ for (i = 0; i < n; i++)
+ if (t >= table[i].factor) {
+ if (flag & FORMAT_BYTES_BELOW_POINT) {
+ snprintf(buf, l,
+ "%" PRIu64 ".%" PRIu64 "%s",
+ t / table[i].factor,
+ i != n - 1 ?
+ (t / table[i + 1].factor * UINT64_C(10) / table[n - 1].factor) % UINT64_C(10):
+ (t * UINT64_C(10) / table[i].factor) % UINT64_C(10),
+ table[i].suffix);
+ } else
+ snprintf(buf, l,
+ "%" PRIu64 "%s",
+ t / table[i].factor,
+ table[i].suffix);
+
+ goto finish;
+ }
+
+ snprintf(buf, l, "%" PRIu64 "%s", t, flag & FORMAT_BYTES_TRAILING_B ? "B" : "");
+
+finish:
+ buf[l-1] = 0;
+ return buf;
+
+}
diff --git a/src/basic/format-util.h b/src/basic/format-util.h
new file mode 100644
index 0000000..b7e1876
--- /dev/null
+++ b/src/basic/format-util.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <net/if.h>
+#include <stdbool.h>
+
+#include "cgroup-util.h"
+#include "macro.h"
+
+assert_cc(sizeof(pid_t) == sizeof(int32_t));
+#define PID_PRI PRIi32
+#define PID_FMT "%" PID_PRI
+
+assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+#define UID_FMT "%" PRIu32
+
+assert_cc(sizeof(gid_t) == sizeof(uint32_t));
+#define GID_FMT "%" PRIu32
+
+#if SIZEOF_TIME_T == 8
+# define PRI_TIME PRIi64
+#elif SIZEOF_TIME_T == 4
+# define PRI_TIME "li"
+#else
+# error Unknown time_t size
+#endif
+
+#if defined __x86_64__ && defined __ILP32__
+# define PRI_TIMEX PRIi64
+#else
+# define PRI_TIMEX "li"
+#endif
+
+#if SIZEOF_RLIM_T == 8
+# define RLIM_FMT "%" PRIu64
+#elif SIZEOF_RLIM_T == 4
+# define RLIM_FMT "%" PRIu32
+#else
+# error Unknown rlim_t size
+#endif
+
+#if SIZEOF_DEV_T == 8
+# define DEV_FMT "%" PRIu64
+#elif SIZEOF_DEV_T == 4
+# define DEV_FMT "%" PRIu32
+#else
+# error Unknown dev_t size
+#endif
+
+#if SIZEOF_INO_T == 8
+# define INO_FMT "%" PRIu64
+#elif SIZEOF_INO_T == 4
+# define INO_FMT "%" PRIu32
+#else
+# error Unknown ino_t size
+#endif
+
+typedef enum {
+ FORMAT_IFNAME_IFINDEX = 1 << 0,
+ FORMAT_IFNAME_IFINDEX_WITH_PERCENT = (1 << 1) | FORMAT_IFNAME_IFINDEX,
+} FormatIfnameFlag;
+
+char *format_ifname_full(int ifindex, char buf[static IF_NAMESIZE + 1], FormatIfnameFlag flag);
+static inline char *format_ifname(int ifindex, char buf[static IF_NAMESIZE + 1]) {
+ return format_ifname_full(ifindex, buf, 0);
+}
+
+typedef enum {
+ FORMAT_BYTES_USE_IEC = 1 << 0,
+ FORMAT_BYTES_BELOW_POINT = 1 << 1,
+ FORMAT_BYTES_TRAILING_B = 1 << 2,
+} FormatBytesFlag;
+
+#define FORMAT_BYTES_MAX 16U
+
+char *format_bytes_full(char *buf, size_t l, uint64_t t, FormatBytesFlag flag);
+
+static inline char *format_bytes(char *buf, size_t l, uint64_t t) {
+ return format_bytes_full(buf, l, t, FORMAT_BYTES_USE_IEC | FORMAT_BYTES_BELOW_POINT | FORMAT_BYTES_TRAILING_B);
+}
+
+static inline char *format_bytes_cgroup_protection(char *buf, size_t l, uint64_t t) {
+ if (t == CGROUP_LIMIT_MAX) {
+ (void) snprintf(buf, l, "%s", "infinity");
+ return buf;
+ }
+ return format_bytes(buf, l, t);
+}
diff --git a/src/basic/fs-util.c b/src/basic/fs-util.c
new file mode 100644
index 0000000..6924f5d
--- /dev/null
+++ b/src/basic/fs-util.c
@@ -0,0 +1,1615 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <linux/falloc.h>
+#include <linux/magic.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "blockdev-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "locale-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing_fcntl.h"
+#include "missing_fs.h"
+#include "missing_syscall.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+
+int unlink_noerrno(const char *path) {
+ PROTECT_ERRNO;
+ int r;
+
+ r = unlink(path);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int rmdir_parents(const char *path, const char *stop) {
+ size_t l;
+ int r = 0;
+
+ assert(path);
+ assert(stop);
+
+ l = strlen(path);
+
+ /* Skip trailing slashes */
+ while (l > 0 && path[l-1] == '/')
+ l--;
+
+ while (l > 0) {
+ char *t;
+
+ /* Skip last component */
+ while (l > 0 && path[l-1] != '/')
+ l--;
+
+ /* Skip trailing slashes */
+ while (l > 0 && path[l-1] == '/')
+ l--;
+
+ if (l <= 0)
+ break;
+
+ t = strndup(path, l);
+ if (!t)
+ return -ENOMEM;
+
+ if (path_startswith(stop, t)) {
+ free(t);
+ return 0;
+ }
+
+ r = rmdir(t);
+ free(t);
+
+ if (r < 0)
+ if (errno != ENOENT)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int rename_noreplace(int olddirfd, const char *oldpath, int newdirfd, const char *newpath) {
+ int r;
+
+ /* Try the ideal approach first */
+ if (renameat2(olddirfd, oldpath, newdirfd, newpath, RENAME_NOREPLACE) >= 0)
+ return 0;
+
+ /* renameat2() exists since Linux 3.15, btrfs and FAT added support for it later. If it is not implemented,
+ * fall back to a different method. */
+ if (!IN_SET(errno, EINVAL, ENOSYS, ENOTTY))
+ return -errno;
+
+ /* Let's try to use linkat()+unlinkat() as fallback. This doesn't work on directories and on some file systems
+ * that do not support hard links (such as FAT, most prominently), but for files it's pretty close to what we
+ * want — though not atomic (i.e. for a short period both the new and the old filename will exist). */
+ if (linkat(olddirfd, oldpath, newdirfd, newpath, 0) >= 0) {
+
+ if (unlinkat(olddirfd, oldpath, 0) < 0) {
+ r = -errno; /* Backup errno before the following unlinkat() alters it */
+ (void) unlinkat(newdirfd, newpath, 0);
+ return r;
+ }
+
+ return 0;
+ }
+
+ if (!IN_SET(errno, EINVAL, ENOSYS, ENOTTY, EPERM)) /* FAT returns EPERM on link()… */
+ return -errno;
+
+ /* OK, neither RENAME_NOREPLACE nor linkat()+unlinkat() worked. Let's then fall back to the racy TOCTOU
+ * vulnerable accessat(F_OK) check followed by classic, replacing renameat(), we have nothing better. */
+
+ if (faccessat(newdirfd, newpath, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
+ return -EEXIST;
+ if (errno != ENOENT)
+ return -errno;
+
+ if (renameat(olddirfd, oldpath, newdirfd, newpath) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int readlinkat_malloc(int fd, const char *p, char **ret) {
+ size_t l = FILENAME_MAX+1;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ for (;;) {
+ char *c;
+ ssize_t n;
+
+ c = new(char, l);
+ if (!c)
+ return -ENOMEM;
+
+ n = readlinkat(fd, p, c, l-1);
+ if (n < 0) {
+ r = -errno;
+ free(c);
+ return r;
+ }
+
+ if ((size_t) n < l-1) {
+ c[n] = 0;
+ *ret = c;
+ return 0;
+ }
+
+ free(c);
+ l *= 2;
+ }
+}
+
+int readlink_malloc(const char *p, char **ret) {
+ return readlinkat_malloc(AT_FDCWD, p, ret);
+}
+
+int readlink_value(const char *p, char **ret) {
+ _cleanup_free_ char *link = NULL;
+ char *value;
+ int r;
+
+ r = readlink_malloc(p, &link);
+ if (r < 0)
+ return r;
+
+ value = basename(link);
+ if (!value)
+ return -ENOENT;
+
+ value = strdup(value);
+ if (!value)
+ return -ENOMEM;
+
+ *ret = value;
+
+ return 0;
+}
+
+int readlink_and_make_absolute(const char *p, char **r) {
+ _cleanup_free_ char *target = NULL;
+ char *k;
+ int j;
+
+ assert(p);
+ assert(r);
+
+ j = readlink_malloc(p, &target);
+ if (j < 0)
+ return j;
+
+ k = file_in_same_dir(p, target);
+ if (!k)
+ return -ENOMEM;
+
+ *r = k;
+ return 0;
+}
+
+int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid) {
+ _cleanup_close_ int fd = -1;
+
+ assert(path);
+
+ fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW); /* Let's acquire an O_PATH fd, as precaution to change
+ * mode/owner on the same file */
+ if (fd < 0)
+ return -errno;
+
+ return fchmod_and_chown(fd, mode, uid, gid);
+}
+
+int fchmod_and_chown(int fd, mode_t mode, uid_t uid, gid_t gid) {
+ bool do_chown, do_chmod;
+ struct stat st;
+ int r;
+
+ /* Change ownership and access mode of the specified fd. Tries to do so safely, ensuring that at no
+ * point in time the access mode is above the old access mode under the old ownership or the new
+ * access mode under the new ownership. Note: this call tries hard to leave the access mode
+ * unaffected if the uid/gid is changed, i.e. it undoes implicit suid/sgid dropping the kernel does
+ * on chown().
+ *
+ * This call is happy with O_PATH fds. */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ do_chown =
+ (uid != UID_INVALID && st.st_uid != uid) ||
+ (gid != GID_INVALID && st.st_gid != gid);
+
+ do_chmod =
+ !S_ISLNK(st.st_mode) && /* chmod is not defined on symlinks */
+ ((mode != MODE_INVALID && ((st.st_mode ^ mode) & 07777) != 0) ||
+ do_chown); /* If we change ownership, make sure we reset the mode afterwards, since chown()
+ * modifies the access mode too */
+
+ if (mode == MODE_INVALID)
+ mode = st.st_mode; /* If we only shall do a chown(), save original mode, since chown() might break it. */
+ else if ((mode & S_IFMT) != 0 && ((mode ^ st.st_mode) & S_IFMT) != 0)
+ return -EINVAL; /* insist on the right file type if it was specified */
+
+ if (do_chown && do_chmod) {
+ mode_t minimal = st.st_mode & mode; /* the subset of the old and the new mask */
+
+ if (((minimal ^ st.st_mode) & 07777) != 0) {
+ r = fchmod_opath(fd, minimal & 07777);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (do_chown)
+ if (fchownat(fd, "", uid, gid, AT_EMPTY_PATH) < 0)
+ return -errno;
+
+ if (do_chmod) {
+ r = fchmod_opath(fd, mode & 07777);
+ if (r < 0)
+ return r;
+ }
+
+ return do_chown || do_chmod;
+}
+
+int fchmod_umask(int fd, mode_t m) {
+ mode_t u;
+ int r;
+
+ u = umask(0777);
+ r = fchmod(fd, m & (~u)) < 0 ? -errno : 0;
+ umask(u);
+
+ return r;
+}
+
+int fchmod_opath(int fd, mode_t m) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+
+ /* This function operates also on fd that might have been opened with
+ * O_PATH. Indeed fchmodat() doesn't have the AT_EMPTY_PATH flag like
+ * fchownat() does. */
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ if (chmod(procfs_path, m) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ if (proc_mounted() == 0)
+ return -ENOSYS; /* if we have no /proc/, the concept is not implementable */
+
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+int futimens_opath(int fd, const struct timespec ts[2]) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+
+ /* Similar to fchmod_path() but for futimens() */
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ if (utimensat(AT_FDCWD, procfs_path, ts, 0) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ if (proc_mounted() == 0)
+ return -ENOSYS; /* if we have no /proc/, the concept is not implementable */
+
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+int stat_warn_permissions(const char *path, const struct stat *st) {
+ assert(path);
+ assert(st);
+
+ /* Don't complain if we are reading something that is not a file, for example /dev/null */
+ if (!S_ISREG(st->st_mode))
+ return 0;
+
+ if (st->st_mode & 0111)
+ log_warning("Configuration file %s is marked executable. Please remove executable permission bits. Proceeding anyway.", path);
+
+ if (st->st_mode & 0002)
+ log_warning("Configuration file %s is marked world-writable. Please remove world writability permission bits. Proceeding anyway.", path);
+
+ if (getpid_cached() == 1 && (st->st_mode & 0044) != 0044)
+ log_warning("Configuration file %s is marked world-inaccessible. This has no effect as configuration data is accessible via APIs without restrictions. Proceeding anyway.", path);
+
+ return 0;
+}
+
+int fd_warn_permissions(const char *path, int fd) {
+ struct stat st;
+
+ assert(path);
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ return stat_warn_permissions(path, &st);
+}
+
+int touch_file(const char *path, bool parents, usec_t stamp, uid_t uid, gid_t gid, mode_t mode) {
+ char fdpath[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ _cleanup_close_ int fd = -1;
+ int r, ret = 0;
+
+ assert(path);
+
+ /* Note that touch_file() does not follow symlinks: if invoked on an existing symlink, then it is the symlink
+ * itself which is updated, not its target
+ *
+ * Returns the first error we encounter, but tries to apply as much as possible. */
+
+ if (parents)
+ (void) mkdir_parents(path, 0755);
+
+ /* Initially, we try to open the node with O_PATH, so that we get a reference to the node. This is useful in
+ * case the path refers to an existing device or socket node, as we can open it successfully in all cases, and
+ * won't trigger any driver magic or so. */
+ fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW);
+ if (fd < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ /* if the node doesn't exist yet, we create it, but with O_EXCL, so that we only create a regular file
+ * here, and nothing else */
+ fd = open(path, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, IN_SET(mode, 0, MODE_INVALID) ? 0644 : mode);
+ if (fd < 0)
+ return -errno;
+ }
+
+ /* Let's make a path from the fd, and operate on that. With this logic, we can adjust the access mode,
+ * ownership and time of the file node in all cases, even if the fd refers to an O_PATH object — which is
+ * something fchown(), fchmod(), futimensat() don't allow. */
+ xsprintf(fdpath, "/proc/self/fd/%i", fd);
+
+ ret = fchmod_and_chown(fd, mode, uid, gid);
+
+ if (stamp != USEC_INFINITY) {
+ struct timespec ts[2];
+
+ timespec_store(&ts[0], stamp);
+ ts[1] = ts[0];
+ r = utimensat(AT_FDCWD, fdpath, ts, 0);
+ } else
+ r = utimensat(AT_FDCWD, fdpath, NULL, 0);
+ if (r < 0 && ret >= 0)
+ return -errno;
+
+ return ret;
+}
+
+int touch(const char *path) {
+ return touch_file(path, false, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID);
+}
+
+int symlink_idempotent(const char *from, const char *to, bool make_relative) {
+ _cleanup_free_ char *relpath = NULL;
+ int r;
+
+ assert(from);
+ assert(to);
+
+ if (make_relative) {
+ _cleanup_free_ char *parent = NULL;
+
+ parent = dirname_malloc(to);
+ if (!parent)
+ return -ENOMEM;
+
+ r = path_make_relative(parent, from, &relpath);
+ if (r < 0)
+ return r;
+
+ from = relpath;
+ }
+
+ if (symlink(from, to) < 0) {
+ _cleanup_free_ char *p = NULL;
+
+ if (errno != EEXIST)
+ return -errno;
+
+ r = readlink_malloc(to, &p);
+ if (r == -EINVAL) /* Not a symlink? In that case return the original error we encountered: -EEXIST */
+ return -EEXIST;
+ if (r < 0) /* Any other error? In that case propagate it as is */
+ return r;
+
+ if (!streq(p, from)) /* Not the symlink we want it to be? In that case, propagate the original -EEXIST */
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+int symlink_atomic(const char *from, const char *to) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(from);
+ assert(to);
+
+ r = tempfn_random(to, NULL, &t);
+ if (r < 0)
+ return r;
+
+ if (symlink(from, t) < 0)
+ return -errno;
+
+ if (rename(t, to) < 0) {
+ unlink_noerrno(t);
+ return -errno;
+ }
+
+ return 0;
+}
+
+int mknod_atomic(const char *path, mode_t mode, dev_t dev) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(path);
+
+ r = tempfn_random(path, NULL, &t);
+ if (r < 0)
+ return r;
+
+ if (mknod(t, mode, dev) < 0)
+ return -errno;
+
+ if (rename(t, path) < 0) {
+ unlink_noerrno(t);
+ return -errno;
+ }
+
+ return 0;
+}
+
+int mkfifo_atomic(const char *path, mode_t mode) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(path);
+
+ r = tempfn_random(path, NULL, &t);
+ if (r < 0)
+ return r;
+
+ if (mkfifo(t, mode) < 0)
+ return -errno;
+
+ if (rename(t, path) < 0) {
+ unlink_noerrno(t);
+ return -errno;
+ }
+
+ return 0;
+}
+
+int mkfifoat_atomic(int dirfd, const char *path, mode_t mode) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(path);
+
+ if (path_is_absolute(path))
+ return mkfifo_atomic(path, mode);
+
+ /* We're only interested in the (random) filename. */
+ r = tempfn_random_child("", NULL, &t);
+ if (r < 0)
+ return r;
+
+ if (mkfifoat(dirfd, t, mode) < 0)
+ return -errno;
+
+ if (renameat(dirfd, t, dirfd, path) < 0) {
+ unlink_noerrno(t);
+ return -errno;
+ }
+
+ return 0;
+}
+
+int get_files_in_directory(const char *path, char ***list) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ size_t bufsize = 0, n = 0;
+ _cleanup_strv_free_ char **l = NULL;
+
+ assert(path);
+
+ /* Returns all files in a directory in *list, and the number
+ * of files as return value. If list is NULL returns only the
+ * number. */
+
+ d = opendir(path);
+ if (!d)
+ return -errno;
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ dirent_ensure_type(d, de);
+
+ if (!dirent_is_file(de))
+ continue;
+
+ if (list) {
+ /* one extra slot is needed for the terminating NULL */
+ if (!GREEDY_REALLOC(l, bufsize, n + 2))
+ return -ENOMEM;
+
+ l[n] = strdup(de->d_name);
+ if (!l[n])
+ return -ENOMEM;
+
+ l[++n] = NULL;
+ } else
+ n++;
+ }
+
+ if (list)
+ *list = TAKE_PTR(l);
+
+ return n;
+}
+
+static int getenv_tmp_dir(const char **ret_path) {
+ const char *n;
+ int r, ret = 0;
+
+ assert(ret_path);
+
+ /* We use the same order of environment variables python uses in tempfile.gettempdir():
+ * https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir */
+ FOREACH_STRING(n, "TMPDIR", "TEMP", "TMP") {
+ const char *e;
+
+ e = secure_getenv(n);
+ if (!e)
+ continue;
+ if (!path_is_absolute(e)) {
+ r = -ENOTDIR;
+ goto next;
+ }
+ if (!path_is_normalized(e)) {
+ r = -EPERM;
+ goto next;
+ }
+
+ r = is_dir(e, true);
+ if (r < 0)
+ goto next;
+ if (r == 0) {
+ r = -ENOTDIR;
+ goto next;
+ }
+
+ *ret_path = e;
+ return 1;
+
+ next:
+ /* Remember first error, to make this more debuggable */
+ if (ret >= 0)
+ ret = r;
+ }
+
+ if (ret < 0)
+ return ret;
+
+ *ret_path = NULL;
+ return ret;
+}
+
+static int tmp_dir_internal(const char *def, const char **ret) {
+ const char *e;
+ int r, k;
+
+ assert(def);
+ assert(ret);
+
+ r = getenv_tmp_dir(&e);
+ if (r > 0) {
+ *ret = e;
+ return 0;
+ }
+
+ k = is_dir(def, true);
+ if (k == 0)
+ k = -ENOTDIR;
+ if (k < 0)
+ return r < 0 ? r : k;
+
+ *ret = def;
+ return 0;
+}
+
+int var_tmp_dir(const char **ret) {
+
+ /* Returns the location for "larger" temporary files, that is backed by physical storage if available, and thus
+ * even might survive a boot: /var/tmp. If $TMPDIR (or related environment variables) are set, its value is
+ * returned preferably however. Note that both this function and tmp_dir() below are affected by $TMPDIR,
+ * making it a variable that overrides all temporary file storage locations. */
+
+ return tmp_dir_internal("/var/tmp", ret);
+}
+
+int tmp_dir(const char **ret) {
+
+ /* Similar to var_tmp_dir() above, but returns the location for "smaller" temporary files, which is usually
+ * backed by an in-memory file system: /tmp. */
+
+ return tmp_dir_internal("/tmp", ret);
+}
+
+int unlink_or_warn(const char *filename) {
+ if (unlink(filename) < 0 && errno != ENOENT)
+ /* If the file doesn't exist and the fs simply was read-only (in which
+ * case unlink() returns EROFS even if the file doesn't exist), don't
+ * complain */
+ if (errno != EROFS || access(filename, F_OK) >= 0)
+ return log_error_errno(errno, "Failed to remove \"%s\": %m", filename);
+
+ return 0;
+}
+
+int inotify_add_watch_fd(int fd, int what, uint32_t mask) {
+ char path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
+ int wd;
+
+ /* This is like inotify_add_watch(), except that the file to watch is not referenced by a path, but by an fd */
+ xsprintf(path, "/proc/self/fd/%i", what);
+
+ wd = inotify_add_watch(fd, path, mask);
+ if (wd < 0)
+ return -errno;
+
+ return wd;
+}
+
+int inotify_add_watch_and_warn(int fd, const char *pathname, uint32_t mask) {
+ int wd;
+
+ wd = inotify_add_watch(fd, pathname, mask);
+ if (wd < 0) {
+ if (errno == ENOSPC)
+ return log_error_errno(errno, "Failed to add a watch for %s: inotify watch limit reached", pathname);
+
+ return log_error_errno(errno, "Failed to add a watch for %s: %m", pathname);
+ }
+
+ return wd;
+}
+
+static bool unsafe_transition(const struct stat *a, const struct stat *b) {
+ /* Returns true if the transition from a to b is safe, i.e. that we never transition from unprivileged to
+ * privileged files or directories. Why bother? So that unprivileged code can't symlink to privileged files
+ * making us believe we read something safe even though it isn't safe in the specific context we open it in. */
+
+ if (a->st_uid == 0) /* Transitioning from privileged to unprivileged is always fine */
+ return false;
+
+ return a->st_uid != b->st_uid; /* Otherwise we need to stay within the same UID */
+}
+
+static int log_unsafe_transition(int a, int b, const char *path, unsigned flags) {
+ _cleanup_free_ char *n1 = NULL, *n2 = NULL;
+
+ if (!FLAGS_SET(flags, CHASE_WARN))
+ return -ENOLINK;
+
+ (void) fd_get_path(a, &n1);
+ (void) fd_get_path(b, &n2);
+
+ return log_warning_errno(SYNTHETIC_ERRNO(ENOLINK),
+ "Detected unsafe path transition %s %s %s during canonicalization of %s.",
+ strna(n1), special_glyph(SPECIAL_GLYPH_ARROW), strna(n2), path);
+}
+
+static int log_autofs_mount_point(int fd, const char *path, unsigned flags) {
+ _cleanup_free_ char *n1 = NULL;
+
+ if (!FLAGS_SET(flags, CHASE_WARN))
+ return -EREMOTE;
+
+ (void) fd_get_path(fd, &n1);
+
+ return log_warning_errno(SYNTHETIC_ERRNO(EREMOTE),
+ "Detected autofs mount point %s during canonicalization of %s.",
+ strna(n1), path);
+}
+
+int chase_symlinks(const char *path, const char *original_root, unsigned flags, char **ret_path, int *ret_fd) {
+ _cleanup_free_ char *buffer = NULL, *done = NULL, *root = NULL;
+ _cleanup_close_ int fd = -1;
+ unsigned max_follow = CHASE_SYMLINKS_MAX; /* how many symlinks to follow before giving up and returning ELOOP */
+ struct stat previous_stat;
+ bool exists = true;
+ char *todo;
+ int r;
+
+ assert(path);
+
+ /* Either the file may be missing, or we return an fd to the final object, but both make no sense */
+ if ((flags & CHASE_NONEXISTENT) && ret_fd)
+ return -EINVAL;
+
+ if ((flags & CHASE_STEP) && ret_fd)
+ return -EINVAL;
+
+ if (isempty(path))
+ return -EINVAL;
+
+ /* This is a lot like canonicalize_file_name(), but takes an additional "root" parameter, that allows following
+ * symlinks relative to a root directory, instead of the root of the host.
+ *
+ * Note that "root" primarily matters if we encounter an absolute symlink. It is also used when following
+ * relative symlinks to ensure they cannot be used to "escape" the root directory. The path parameter passed is
+ * assumed to be already prefixed by it, except if the CHASE_PREFIX_ROOT flag is set, in which case it is first
+ * prefixed accordingly.
+ *
+ * Algorithmically this operates on two path buffers: "done" are the components of the path we already
+ * processed and resolved symlinks, "." and ".." of. "todo" are the components of the path we still need to
+ * process. On each iteration, we move one component from "todo" to "done", processing it's special meaning
+ * each time. The "todo" path always starts with at least one slash, the "done" path always ends in no
+ * slash. We always keep an O_PATH fd to the component we are currently processing, thus keeping lookup races
+ * to a minimum.
+ *
+ * Suggested usage: whenever you want to canonicalize a path, use this function. Pass the absolute path you got
+ * as-is: fully qualified and relative to your host's root. Optionally, specify the root parameter to tell this
+ * function what to do when encountering a symlink with an absolute path as directory: prefix it by the
+ * specified path.
+ *
+ * There are five ways to invoke this function:
+ *
+ * 1. Without CHASE_STEP or ret_fd: in this case the path is resolved and the normalized path is
+ * returned in `ret_path`. The return value is < 0 on error. If CHASE_NONEXISTENT is also set, 0
+ * is returned if the file doesn't exist, > 0 otherwise. If CHASE_NONEXISTENT is not set, >= 0 is
+ * returned if the destination was found, -ENOENT if it wasn't.
+ *
+ * 2. With ret_fd: in this case the destination is opened after chasing it as O_PATH and this file
+ * descriptor is returned as return value. This is useful to open files relative to some root
+ * directory. Note that the returned O_PATH file descriptors must be converted into a regular one (using
+ * fd_reopen() or such) before it can be used for reading/writing. ret_fd may not be combined with
+ * CHASE_NONEXISTENT.
+ *
+ * 3. With CHASE_STEP: in this case only a single step of the normalization is executed, i.e. only the first
+ * symlink or ".." component of the path is resolved, and the resulting path is returned. This is useful if
+ * a caller wants to trace the path through the file system verbosely. Returns < 0 on error, > 0 if the
+ * path is fully normalized, and == 0 for each normalization step. This may be combined with
+ * CHASE_NONEXISTENT, in which case 1 is returned when a component is not found.
+ *
+ * 4. With CHASE_SAFE: in this case the path must not contain unsafe transitions, i.e. transitions from
+ * unprivileged to privileged files or directories. In such cases the return value is -ENOLINK. If
+ * CHASE_WARN is also set, a warning describing the unsafe transition is emitted.
+ *
+ * 5. With CHASE_NO_AUTOFS: in this case if an autofs mount point is encountered, path normalization
+ * is aborted and -EREMOTE is returned. If CHASE_WARN is also set, a warning showing the path of
+ * the mount point is emitted.
+ */
+
+ /* A root directory of "/" or "" is identical to none */
+ if (empty_or_root(original_root))
+ original_root = NULL;
+
+ if (!original_root && !ret_path && !(flags & (CHASE_NONEXISTENT|CHASE_NO_AUTOFS|CHASE_SAFE|CHASE_STEP)) && ret_fd) {
+ /* Shortcut the ret_fd case if the caller isn't interested in the actual path and has no root set
+ * and doesn't care about any of the other special features we provide either. */
+ r = open(path, O_PATH|O_CLOEXEC|((flags & CHASE_NOFOLLOW) ? O_NOFOLLOW : 0));
+ if (r < 0)
+ return -errno;
+
+ *ret_fd = r;
+ return 0;
+ }
+
+ if (original_root) {
+ r = path_make_absolute_cwd(original_root, &root);
+ if (r < 0)
+ return r;
+
+ /* Simplify the root directory, so that it has no duplicate slashes and nothing at the
+ * end. While we won't resolve the root path we still simplify it. Note that dropping the
+ * trailing slash should not change behaviour, since when opening it we specify O_DIRECTORY
+ * anyway. Moreover at the end of this function after processing everything we'll always turn
+ * the empty string back to "/". */
+ delete_trailing_chars(root, "/");
+ path_simplify(root, true);
+
+ if (flags & CHASE_PREFIX_ROOT) {
+ /* We don't support relative paths in combination with a root directory */
+ if (!path_is_absolute(path))
+ return -EINVAL;
+
+ path = prefix_roota(root, path);
+ }
+ }
+
+ r = path_make_absolute_cwd(path, &buffer);
+ if (r < 0)
+ return r;
+
+ fd = open(root ?: "/", O_CLOEXEC|O_DIRECTORY|O_PATH);
+ if (fd < 0)
+ return -errno;
+
+ if (flags & CHASE_SAFE) {
+ if (fstat(fd, &previous_stat) < 0)
+ return -errno;
+ }
+
+ if (root) {
+ _cleanup_free_ char *absolute = NULL;
+ const char *e;
+
+ /* If we are operating on a root directory, let's take the root directory as it is. */
+
+ e = path_startswith(buffer, root);
+ if (!e)
+ return log_full_errno(flags & CHASE_WARN ? LOG_WARNING : LOG_DEBUG,
+ SYNTHETIC_ERRNO(ECHRNG),
+ "Specified path '%s' is outside of specified root directory '%s', refusing to resolve.",
+ path, root);
+
+ done = strdup(root);
+ if (!done)
+ return -ENOMEM;
+
+ /* Make sure "todo" starts with a slash */
+ absolute = strjoin("/", e);
+ if (!absolute)
+ return -ENOMEM;
+
+ free_and_replace(buffer, absolute);
+ }
+
+ todo = buffer;
+ for (;;) {
+ _cleanup_free_ char *first = NULL;
+ _cleanup_close_ int child = -1;
+ struct stat st;
+ size_t n, m;
+
+ /* Determine length of first component in the path */
+ n = strspn(todo, "/"); /* The slashes */
+
+ if (n > 1) {
+ /* If we are looking at more than a single slash then skip all but one, so that when
+ * we are done with everything we have a normalized path with only single slashes
+ * separating the path components. */
+ todo += n - 1;
+ n = 1;
+ }
+
+ m = n + strcspn(todo + n, "/"); /* The entire length of the component */
+
+ /* Extract the first component. */
+ first = strndup(todo, m);
+ if (!first)
+ return -ENOMEM;
+
+ todo += m;
+
+ /* Empty? Then we reached the end. */
+ if (isempty(first))
+ break;
+
+ /* Just a single slash? Then we reached the end. */
+ if (path_equal(first, "/")) {
+ /* Preserve the trailing slash */
+
+ if (flags & CHASE_TRAIL_SLASH)
+ if (!strextend(&done, "/", NULL))
+ return -ENOMEM;
+
+ break;
+ }
+
+ /* Just a dot? Then let's eat this up. */
+ if (path_equal(first, "/."))
+ continue;
+
+ /* Two dots? Then chop off the last bit of what we already found out. */
+ if (path_equal(first, "/..")) {
+ _cleanup_free_ char *parent = NULL;
+ _cleanup_close_ int fd_parent = -1;
+
+ /* If we already are at the top, then going up will not change anything. This is in-line with
+ * how the kernel handles this. */
+ if (empty_or_root(done))
+ continue;
+
+ parent = dirname_malloc(done);
+ if (!parent)
+ return -ENOMEM;
+
+ /* Don't allow this to leave the root dir. */
+ if (root &&
+ path_startswith(done, root) &&
+ !path_startswith(parent, root))
+ continue;
+
+ free_and_replace(done, parent);
+
+ if (flags & CHASE_STEP)
+ goto chased_one;
+
+ fd_parent = openat(fd, "..", O_CLOEXEC|O_NOFOLLOW|O_PATH);
+ if (fd_parent < 0)
+ return -errno;
+
+ if (flags & CHASE_SAFE) {
+ if (fstat(fd_parent, &st) < 0)
+ return -errno;
+
+ if (unsafe_transition(&previous_stat, &st))
+ return log_unsafe_transition(fd, fd_parent, path, flags);
+
+ previous_stat = st;
+ }
+
+ safe_close(fd);
+ fd = TAKE_FD(fd_parent);
+
+ continue;
+ }
+
+ /* Otherwise let's see what this is. */
+ child = openat(fd, first + n, O_CLOEXEC|O_NOFOLLOW|O_PATH);
+ if (child < 0) {
+
+ if (errno == ENOENT &&
+ (flags & CHASE_NONEXISTENT) &&
+ (isempty(todo) || path_is_normalized(todo))) {
+
+ /* If CHASE_NONEXISTENT is set, and the path does not exist, then that's OK, return
+ * what we got so far. But don't allow this if the remaining path contains "../ or "./"
+ * or something else weird. */
+
+ /* If done is "/", as first also contains slash at the head, then remove this redundant slash. */
+ if (streq_ptr(done, "/"))
+ *done = '\0';
+
+ if (!strextend(&done, first, todo, NULL))
+ return -ENOMEM;
+
+ exists = false;
+ break;
+ }
+
+ return -errno;
+ }
+
+ if (fstat(child, &st) < 0)
+ return -errno;
+ if ((flags & CHASE_SAFE) &&
+ unsafe_transition(&previous_stat, &st))
+ return log_unsafe_transition(fd, child, path, flags);
+
+ previous_stat = st;
+
+ if ((flags & CHASE_NO_AUTOFS) &&
+ fd_is_fs_type(child, AUTOFS_SUPER_MAGIC) > 0)
+ return log_autofs_mount_point(child, path, flags);
+
+ if (S_ISLNK(st.st_mode) && !((flags & CHASE_NOFOLLOW) && isempty(todo))) {
+ char *joined;
+ _cleanup_free_ char *destination = NULL;
+
+ /* This is a symlink, in this case read the destination. But let's make sure we don't follow
+ * symlinks without bounds. */
+ if (--max_follow <= 0)
+ return -ELOOP;
+
+ r = readlinkat_malloc(fd, first + n, &destination);
+ if (r < 0)
+ return r;
+ if (isempty(destination))
+ return -EINVAL;
+
+ if (path_is_absolute(destination)) {
+
+ /* An absolute destination. Start the loop from the beginning, but use the root
+ * directory as base. */
+
+ safe_close(fd);
+ fd = open(root ?: "/", O_CLOEXEC|O_DIRECTORY|O_PATH);
+ if (fd < 0)
+ return -errno;
+
+ if (flags & CHASE_SAFE) {
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (unsafe_transition(&previous_stat, &st))
+ return log_unsafe_transition(child, fd, path, flags);
+
+ previous_stat = st;
+ }
+
+ free(done);
+
+ /* Note that we do not revalidate the root, we take it as is. */
+ if (isempty(root))
+ done = NULL;
+ else {
+ done = strdup(root);
+ if (!done)
+ return -ENOMEM;
+ }
+
+ /* Prefix what's left to do with what we just read, and start the loop again, but
+ * remain in the current directory. */
+ joined = path_join(destination, todo);
+ } else
+ joined = path_join("/", destination, todo);
+ if (!joined)
+ return -ENOMEM;
+
+ free(buffer);
+ todo = buffer = joined;
+
+ if (flags & CHASE_STEP)
+ goto chased_one;
+
+ continue;
+ }
+
+ /* If this is not a symlink, then let's just add the name we read to what we already verified. */
+ if (!done)
+ done = TAKE_PTR(first);
+ else {
+ /* If done is "/", as first also contains slash at the head, then remove this redundant slash. */
+ if (streq(done, "/"))
+ *done = '\0';
+
+ if (!strextend(&done, first, NULL))
+ return -ENOMEM;
+ }
+
+ /* And iterate again, but go one directory further down. */
+ safe_close(fd);
+ fd = TAKE_FD(child);
+ }
+
+ if (!done) {
+ /* Special case, turn the empty string into "/", to indicate the root directory. */
+ done = strdup("/");
+ if (!done)
+ return -ENOMEM;
+ }
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(done);
+
+ if (ret_fd) {
+ /* Return the O_PATH fd we currently are looking to the caller. It can translate it to a
+ * proper fd by opening /proc/self/fd/xyz. */
+
+ assert(fd >= 0);
+ *ret_fd = TAKE_FD(fd);
+ }
+
+ if (flags & CHASE_STEP)
+ return 1;
+
+ return exists;
+
+chased_one:
+ if (ret_path) {
+ char *c;
+
+ c = strjoin(strempty(done), todo);
+ if (!c)
+ return -ENOMEM;
+
+ *ret_path = c;
+ }
+
+ return 0;
+}
+
+int chase_symlinks_and_open(
+ const char *path,
+ const char *root,
+ unsigned chase_flags,
+ int open_flags,
+ char **ret_path) {
+
+ _cleanup_close_ int path_fd = -1;
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ if (chase_flags & CHASE_NONEXISTENT)
+ return -EINVAL;
+
+ if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) {
+ /* Shortcut this call if none of the special features of this call are requested */
+ r = open(path, open_flags);
+ if (r < 0)
+ return -errno;
+
+ return r;
+ }
+
+ r = chase_symlinks(path, root, chase_flags, ret_path ? &p : NULL, &path_fd);
+ if (r < 0)
+ return r;
+ assert(path_fd >= 0);
+
+ r = fd_reopen(path_fd, open_flags);
+ if (r < 0)
+ return r;
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(p);
+
+ return r;
+}
+
+int chase_symlinks_and_opendir(
+ const char *path,
+ const char *root,
+ unsigned chase_flags,
+ char **ret_path,
+ DIR **ret_dir) {
+
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ _cleanup_close_ int path_fd = -1;
+ _cleanup_free_ char *p = NULL;
+ DIR *d;
+ int r;
+
+ if (!ret_dir)
+ return -EINVAL;
+ if (chase_flags & CHASE_NONEXISTENT)
+ return -EINVAL;
+
+ if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) {
+ /* Shortcut this call if none of the special features of this call are requested */
+ d = opendir(path);
+ if (!d)
+ return -errno;
+
+ *ret_dir = d;
+ return 0;
+ }
+
+ r = chase_symlinks(path, root, chase_flags, ret_path ? &p : NULL, &path_fd);
+ if (r < 0)
+ return r;
+ assert(path_fd >= 0);
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", path_fd);
+ d = opendir(procfs_path);
+ if (!d)
+ return -errno;
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(p);
+
+ *ret_dir = d;
+ return 0;
+}
+
+int chase_symlinks_and_stat(
+ const char *path,
+ const char *root,
+ unsigned chase_flags,
+ char **ret_path,
+ struct stat *ret_stat,
+ int *ret_fd) {
+
+ _cleanup_close_ int path_fd = -1;
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(path);
+ assert(ret_stat);
+
+ if (chase_flags & CHASE_NONEXISTENT)
+ return -EINVAL;
+
+ if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) {
+ /* Shortcut this call if none of the special features of this call are requested */
+ if (stat(path, ret_stat) < 0)
+ return -errno;
+
+ return 1;
+ }
+
+ r = chase_symlinks(path, root, chase_flags, ret_path ? &p : NULL, &path_fd);
+ if (r < 0)
+ return r;
+ assert(path_fd >= 0);
+
+ if (fstat(path_fd, ret_stat) < 0)
+ return -errno;
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(p);
+ if (ret_fd)
+ *ret_fd = TAKE_FD(path_fd);
+
+ return 1;
+}
+
+int access_fd(int fd, int mode) {
+ char p[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(fd) + 1];
+
+ /* Like access() but operates on an already open fd */
+
+ xsprintf(p, "/proc/self/fd/%i", fd);
+ if (access(p, mode) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ /* ENOENT can mean two things: that the fd does not exist or that /proc is not mounted. Let's
+ * make things debuggable and distinguish the two. */
+
+ if (proc_mounted() == 0)
+ return -ENOSYS; /* /proc is not available or not set up properly, we're most likely in some chroot
+ * environment. */
+
+ return -EBADF; /* The directory exists, hence it's the fd that doesn't. */
+ }
+
+ return 0;
+}
+
+void unlink_tempfilep(char (*p)[]) {
+ /* If the file is created with mkstemp(), it will (almost always)
+ * change the suffix. Treat this as a sign that the file was
+ * successfully created. We ignore both the rare case where the
+ * original suffix is used and unlink failures. */
+ if (!endswith(*p, ".XXXXXX"))
+ (void) unlink_noerrno(*p);
+}
+
+int unlinkat_deallocate(int fd, const char *name, UnlinkDeallocateFlags flags) {
+ _cleanup_close_ int truncate_fd = -1;
+ struct stat st;
+ off_t l, bs;
+
+ assert((flags & ~(UNLINK_REMOVEDIR|UNLINK_ERASE)) == 0);
+
+ /* Operates like unlinkat() but also deallocates the file contents if it is a regular file and there's no other
+ * link to it. This is useful to ensure that other processes that might have the file open for reading won't be
+ * able to keep the data pinned on disk forever. This call is particular useful whenever we execute clean-up
+ * jobs ("vacuuming"), where we want to make sure the data is really gone and the disk space released and
+ * returned to the free pool.
+ *
+ * Deallocation is preferably done by FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE (👊) if supported, which means
+ * the file won't change size. That's a good thing since we shouldn't needlessly trigger SIGBUS in other
+ * programs that have mmap()ed the file. (The assumption here is that changing file contents to all zeroes
+ * underneath those programs is the better choice than simply triggering SIGBUS in them which truncation does.)
+ * However if hole punching is not implemented in the kernel or file system we'll fall back to normal file
+ * truncation (🔪), as our goal of deallocating the data space trumps our goal of being nice to readers (💐).
+ *
+ * Note that we attempt deallocation, but failure to succeed with that is not considered fatal, as long as the
+ * primary job – to delete the file – is accomplished. */
+
+ if (!FLAGS_SET(flags, UNLINK_REMOVEDIR)) {
+ truncate_fd = openat(fd, name, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK);
+ if (truncate_fd < 0) {
+
+ /* If this failed because the file doesn't exist propagate the error right-away. Also,
+ * AT_REMOVEDIR wasn't set, and we tried to open the file for writing, which means EISDIR is
+ * returned when this is a directory but we are not supposed to delete those, hence propagate
+ * the error right-away too. */
+ if (IN_SET(errno, ENOENT, EISDIR))
+ return -errno;
+
+ if (errno != ELOOP) /* don't complain if this is a symlink */
+ log_debug_errno(errno, "Failed to open file '%s' for deallocation, ignoring: %m", name);
+ }
+ }
+
+ if (unlinkat(fd, name, FLAGS_SET(flags, UNLINK_REMOVEDIR) ? AT_REMOVEDIR : 0) < 0)
+ return -errno;
+
+ if (truncate_fd < 0) /* Don't have a file handle, can't do more ☹️ */
+ return 0;
+
+ if (fstat(truncate_fd, &st) < 0) {
+ log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring: %m", name);
+ return 0;
+ }
+
+ if (!S_ISREG(st.st_mode))
+ return 0;
+
+ if (FLAGS_SET(flags, UNLINK_ERASE) && st.st_size > 0 && st.st_nlink == 0) {
+ uint64_t left = st.st_size;
+ char buffer[64 * 1024];
+
+ /* If erasing is requested, let's overwrite the file with random data once before deleting
+ * it. This isn't going to give you shred(1) semantics, but hopefully should be good enough
+ * for stuff backed by tmpfs at least.
+ *
+ * Note that we only erase like this if the link count of the file is zero. If it is higher it
+ * is still linked by someone else and we'll leave it to them to remove it securely
+ * eventually! */
+
+ random_bytes(buffer, sizeof(buffer));
+
+ while (left > 0) {
+ ssize_t n;
+
+ n = write(truncate_fd, buffer, MIN(sizeof(buffer), left));
+ if (n < 0) {
+ log_debug_errno(errno, "Failed to erase data in file '%s', ignoring.", name);
+ break;
+ }
+
+ assert(left >= (size_t) n);
+ left -= n;
+ }
+
+ /* Let's refresh metadata */
+ if (fstat(truncate_fd, &st) < 0) {
+ log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring: %m", name);
+ return 0;
+ }
+ }
+
+ /* Don't dallocate if there's nothing to deallocate or if the file is linked elsewhere */
+ if (st.st_blocks == 0 || st.st_nlink > 0)
+ return 0;
+
+ /* If this is a regular file, it actually took up space on disk and there are no other links it's time to
+ * punch-hole/truncate this to release the disk space. */
+
+ bs = MAX(st.st_blksize, 512);
+ l = DIV_ROUND_UP(st.st_size, bs) * bs; /* Round up to next block size */
+
+ if (fallocate(truncate_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, 0, l) >= 0)
+ return 0; /* Successfully punched a hole! 😊 */
+
+ /* Fall back to truncation */
+ if (ftruncate(truncate_fd, 0) < 0) {
+ log_debug_errno(errno, "Failed to truncate file to 0, ignoring: %m");
+ return 0;
+ }
+
+ return 0;
+}
+
+int fsync_directory_of_file(int fd) {
+ _cleanup_free_ char *path = NULL;
+ _cleanup_close_ int dfd = -1;
+ int r;
+
+ r = fd_verify_regular(fd);
+ if (r < 0)
+ return r;
+
+ r = fd_get_path(fd, &path);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to query /proc/self/fd/%d%s: %m",
+ fd,
+ r == -ENOSYS ? ", ignoring" : "");
+
+ if (r == -ENOSYS)
+ /* If /proc is not available, we're most likely running in some
+ * chroot environment, and syncing the directory is not very
+ * important in that case. Let's just silently do nothing. */
+ return 0;
+
+ return r;
+ }
+
+ if (!path_is_absolute(path))
+ return -EINVAL;
+
+ dfd = open_parent(path, O_CLOEXEC, 0);
+ if (dfd < 0)
+ return dfd;
+
+ if (fsync(dfd) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int fsync_full(int fd) {
+ int r, q;
+
+ /* Sync both the file and the directory */
+
+ r = fsync(fd) < 0 ? -errno : 0;
+ q = fsync_directory_of_file(fd);
+
+ return r < 0 ? r : q;
+}
+
+int fsync_path_at(int at_fd, const char *path) {
+ _cleanup_close_ int opened_fd = -1;
+ int fd;
+
+ if (isempty(path)) {
+ if (at_fd == AT_FDCWD) {
+ opened_fd = open(".", O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+ if (opened_fd < 0)
+ return -errno;
+
+ fd = opened_fd;
+ } else
+ fd = at_fd;
+ } else {
+
+ opened_fd = openat(at_fd, path, O_RDONLY|O_CLOEXEC);
+ if (opened_fd < 0)
+ return -errno;
+
+ fd = opened_fd;
+ }
+
+ if (fsync(fd) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int syncfs_path(int atfd, const char *path) {
+ _cleanup_close_ int fd = -1;
+
+ assert(path);
+
+ fd = openat(atfd, path, O_CLOEXEC|O_RDONLY|O_NONBLOCK);
+ if (fd < 0)
+ return -errno;
+
+ if (syncfs(fd) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int open_parent(const char *path, int flags, mode_t mode) {
+ _cleanup_free_ char *parent = NULL;
+ int fd;
+
+ if (isempty(path))
+ return -EINVAL;
+ if (path_equal(path, "/")) /* requesting the parent of the root dir is fishy, let's prohibit that */
+ return -EINVAL;
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ /* Let's insist on O_DIRECTORY since the parent of a file or directory is a directory. Except if we open an
+ * O_TMPFILE file, because in that case we are actually create a regular file below the parent directory. */
+
+ if (FLAGS_SET(flags, O_PATH))
+ flags |= O_DIRECTORY;
+ else if (!FLAGS_SET(flags, O_TMPFILE))
+ flags |= O_DIRECTORY|O_RDONLY;
+
+ fd = open(parent, flags, mode);
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
+
+static int blockdev_is_encrypted(const char *sysfs_path, unsigned depth_left) {
+ _cleanup_free_ char *p = NULL, *uuids = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ int r, found_encrypted = false;
+
+ assert(sysfs_path);
+
+ if (depth_left == 0)
+ return -EINVAL;
+
+ p = path_join(sysfs_path, "dm/uuid");
+ if (!p)
+ return -ENOMEM;
+
+ r = read_one_line_file(p, &uuids);
+ if (r != -ENOENT) {
+ if (r < 0)
+ return r;
+
+ /* The DM device's uuid attribute is prefixed with "CRYPT-" if this is a dm-crypt device. */
+ if (startswith(uuids, "CRYPT-"))
+ return true;
+ }
+
+ /* Not a dm-crypt device itself. But maybe it is on top of one? Follow the links in the "slaves/"
+ * subdir. */
+
+ p = mfree(p);
+ p = path_join(sysfs_path, "slaves");
+ if (!p)
+ return -ENOMEM;
+
+ d = opendir(p);
+ if (!d) {
+ if (errno == ENOENT) /* Doesn't have underlying devices */
+ return false;
+
+ return -errno;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *q = NULL;
+ struct dirent *de;
+
+ errno = 0;
+ de = readdir_no_dot(d);
+ if (!de) {
+ if (errno != 0)
+ return -errno;
+
+ break; /* No more underlying devices */
+ }
+
+ q = path_join(p, de->d_name);
+ if (!q)
+ return -ENOMEM;
+
+ r = blockdev_is_encrypted(q, depth_left - 1);
+ if (r < 0)
+ return r;
+ if (r == 0) /* we found one that is not encrypted? then propagate that immediately */
+ return false;
+
+ found_encrypted = true;
+ }
+
+ return found_encrypted;
+}
+
+int path_is_encrypted(const char *path) {
+ char p[SYS_BLOCK_PATH_MAX(NULL)];
+ dev_t devt;
+ int r;
+
+ r = get_block_device(path, &devt);
+ if (r < 0)
+ return r;
+ if (r == 0) /* doesn't have a block device */
+ return false;
+
+ xsprintf_sys_block_path(p, NULL, devt);
+
+ return blockdev_is_encrypted(p, 10 /* safety net: maximum recursion depth */);
+}
diff --git a/src/basic/fs-util.h b/src/basic/fs-util.h
new file mode 100644
index 0000000..5dc8853
--- /dev/null
+++ b/src/basic/fs-util.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <dirent.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/inotify.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "errno-util.h"
+#include "time-util.h"
+
+#define MODE_INVALID ((mode_t) -1)
+
+/* The following macros add 1 when converting things, since 0 is a valid mode, while the pointer
+ * NULL is special */
+#define PTR_TO_MODE(p) ((mode_t) ((uintptr_t) (p)-1))
+#define MODE_TO_PTR(u) ((void *) ((uintptr_t) (u)+1))
+
+int unlink_noerrno(const char *path);
+
+int rmdir_parents(const char *path, const char *stop);
+
+int rename_noreplace(int olddirfd, const char *oldpath, int newdirfd, const char *newpath);
+
+int readlinkat_malloc(int fd, const char *p, char **ret);
+int readlink_malloc(const char *p, char **r);
+int readlink_value(const char *p, char **ret);
+int readlink_and_make_absolute(const char *p, char **r);
+
+int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid);
+int fchmod_and_chown(int fd, mode_t mode, uid_t uid, gid_t gid);
+
+int fchmod_umask(int fd, mode_t mode);
+int fchmod_opath(int fd, mode_t m);
+
+int futimens_opath(int fd, const struct timespec ts[2]);
+
+int fd_warn_permissions(const char *path, int fd);
+int stat_warn_permissions(const char *path, const struct stat *st);
+
+#define laccess(path, mode) faccessat(AT_FDCWD, (path), (mode), AT_SYMLINK_NOFOLLOW)
+
+int touch_file(const char *path, bool parents, usec_t stamp, uid_t uid, gid_t gid, mode_t mode);
+int touch(const char *path);
+
+int symlink_idempotent(const char *from, const char *to, bool make_relative);
+
+int symlink_atomic(const char *from, const char *to);
+int mknod_atomic(const char *path, mode_t mode, dev_t dev);
+int mkfifo_atomic(const char *path, mode_t mode);
+int mkfifoat_atomic(int dir_fd, const char *path, mode_t mode);
+
+int get_files_in_directory(const char *path, char ***list);
+
+int tmp_dir(const char **ret);
+int var_tmp_dir(const char **ret);
+
+int unlink_or_warn(const char *filename);
+
+#define INOTIFY_EVENT_MAX (sizeof(struct inotify_event) + NAME_MAX + 1)
+
+#define FOREACH_INOTIFY_EVENT(e, buffer, sz) \
+ for ((e) = &buffer.ev; \
+ (uint8_t*) (e) < (uint8_t*) (buffer.raw) + (sz); \
+ (e) = (struct inotify_event*) ((uint8_t*) (e) + sizeof(struct inotify_event) + (e)->len))
+
+union inotify_event_buffer {
+ struct inotify_event ev;
+ uint8_t raw[INOTIFY_EVENT_MAX];
+};
+
+int inotify_add_watch_fd(int fd, int what, uint32_t mask);
+int inotify_add_watch_and_warn(int fd, const char *pathname, uint32_t mask);
+
+enum {
+ CHASE_PREFIX_ROOT = 1 << 0, /* The specified path will be prefixed by the specified root before beginning the iteration */
+ CHASE_NONEXISTENT = 1 << 1, /* It's OK if the path doesn't actually exist. */
+ CHASE_NO_AUTOFS = 1 << 2, /* Return -EREMOTE if autofs mount point found */
+ CHASE_SAFE = 1 << 3, /* Return -EPERM if we ever traverse from unprivileged to privileged files or directories */
+ CHASE_TRAIL_SLASH = 1 << 4, /* Any trailing slash will be preserved */
+ CHASE_STEP = 1 << 5, /* Just execute a single step of the normalization */
+ CHASE_NOFOLLOW = 1 << 6, /* Do not follow the path's right-most component. With ret_fd, when the path's
+ * right-most component refers to symlink, return O_PATH fd of the symlink. */
+ CHASE_WARN = 1 << 7, /* Emit an appropriate warning when an error is encountered */
+};
+
+/* How many iterations to execute before returning -ELOOP */
+#define CHASE_SYMLINKS_MAX 32
+
+int chase_symlinks(const char *path_with_prefix, const char *root, unsigned flags, char **ret_path, int *ret_fd);
+
+int chase_symlinks_and_open(const char *path, const char *root, unsigned chase_flags, int open_flags, char **ret_path);
+int chase_symlinks_and_opendir(const char *path, const char *root, unsigned chase_flags, char **ret_path, DIR **ret_dir);
+int chase_symlinks_and_stat(const char *path, const char *root, unsigned chase_flags, char **ret_path, struct stat *ret_stat, int *ret_fd);
+
+/* Useful for usage with _cleanup_(), removes a directory and frees the pointer */
+static inline void rmdir_and_free(char *p) {
+ PROTECT_ERRNO;
+ (void) rmdir(p);
+ free(p);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, rmdir_and_free);
+
+static inline void unlink_and_free(char *p) {
+ (void) unlink_noerrno(p);
+ free(p);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, unlink_and_free);
+
+int access_fd(int fd, int mode);
+
+void unlink_tempfilep(char (*p)[]);
+
+typedef enum UnlinkDeallocateFlags {
+ UNLINK_REMOVEDIR = 1 << 0,
+ UNLINK_ERASE = 1 << 1,
+} UnlinkDeallocateFlags;
+
+int unlinkat_deallocate(int fd, const char *name, UnlinkDeallocateFlags flags);
+
+int fsync_directory_of_file(int fd);
+int fsync_full(int fd);
+int fsync_path_at(int at_fd, const char *path);
+
+int syncfs_path(int atfd, const char *path);
+
+int open_parent(const char *path, int flags, mode_t mode);
+
+int path_is_encrypted(const char *path);
diff --git a/src/basic/gcrypt-util.c b/src/basic/gcrypt-util.c
new file mode 100644
index 0000000..bf0d210
--- /dev/null
+++ b/src/basic/gcrypt-util.c
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_GCRYPT
+
+#include "gcrypt-util.h"
+#include "hexdecoct.h"
+
+void initialize_libgcrypt(bool secmem) {
+ if (gcry_control(GCRYCTL_INITIALIZATION_FINISHED_P))
+ return;
+
+ assert_se(gcry_check_version("1.4.5"));
+
+ /* Turn off "secmem". Clients which wish to make use of this
+ * feature should initialize the library manually */
+ if (!secmem)
+ gcry_control(GCRYCTL_DISABLE_SECMEM);
+ gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0);
+}
+
+int string_hashsum(const char *s, size_t len, int md_algorithm, char **out) {
+ _cleanup_(gcry_md_closep) gcry_md_hd_t md = NULL;
+ size_t hash_size;
+ void *hash;
+ char *enc;
+
+ initialize_libgcrypt(false);
+
+ hash_size = gcry_md_get_algo_dlen(md_algorithm);
+ assert(hash_size > 0);
+
+ gcry_md_open(&md, md_algorithm, 0);
+ if (!md)
+ return -EIO;
+
+ gcry_md_write(md, s, len);
+
+ hash = gcry_md_read(md, 0);
+ if (!hash)
+ return -EIO;
+
+ enc = hexmem(hash, hash_size);
+ if (!enc)
+ return -ENOMEM;
+
+ *out = enc;
+ return 0;
+}
+#endif
diff --git a/src/basic/gcrypt-util.h b/src/basic/gcrypt-util.h
new file mode 100644
index 0000000..c07b36c
--- /dev/null
+++ b/src/basic/gcrypt-util.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#if HAVE_GCRYPT
+#include <gcrypt.h>
+
+#include "macro.h"
+
+void initialize_libgcrypt(bool secmem);
+int string_hashsum(const char *s, size_t len, int md_algorithm, char **out);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(gcry_md_hd_t, gcry_md_close);
+#endif
+
+static inline int string_hashsum_sha224(const char *s, size_t len, char **out) {
+#if HAVE_GCRYPT
+ return string_hashsum(s, len, GCRY_MD_SHA224, out);
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+static inline int string_hashsum_sha256(const char *s, size_t len, char **out) {
+#if HAVE_GCRYPT
+ return string_hashsum(s, len, GCRY_MD_SHA256, out);
+#else
+ return -EOPNOTSUPP;
+#endif
+}
diff --git a/src/basic/generate-af-list.sh b/src/basic/generate-af-list.sh
new file mode 100755
index 0000000..6987877
--- /dev/null
+++ b/src/basic/generate-af-list.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+set -eu
+
+$1 -E -dM -include sys/socket.h -include "$2" -include "$3" - </dev/null | \
+ grep -Ev 'AF_UNSPEC|AF_MAX' | \
+ awk '/^#define[ \t]+AF_[^ \t]+[ \t]+[AP]F_[^ \t]/ { print $2; }'
diff --git a/src/basic/generate-arphrd-list.sh b/src/basic/generate-arphrd-list.sh
new file mode 100755
index 0000000..93cef47
--- /dev/null
+++ b/src/basic/generate-arphrd-list.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+set -eu
+
+$1 -dM -include linux/if_arp.h -include "$2" - </dev/null | \
+ awk '/^#define[ \t]+ARPHRD_[^ \t]+[ \t]+[^ \t]/ { print $2; }' | \
+ sed -e 's/ARPHRD_//'
diff --git a/src/basic/generate-cap-list.sh b/src/basic/generate-cap-list.sh
new file mode 100755
index 0000000..b814fa8
--- /dev/null
+++ b/src/basic/generate-cap-list.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+set -eu
+
+$1 -dM -include linux/capability.h -include "$2" -include "$3" - </dev/null | \
+ awk '/^#define[ \t]+CAP_[A-Z_]+[ \t]+/ { print $2; }' | \
+ grep -v CAP_LAST_CAP
diff --git a/src/basic/generate-errno-list.sh b/src/basic/generate-errno-list.sh
new file mode 100755
index 0000000..a4bb4d2
--- /dev/null
+++ b/src/basic/generate-errno-list.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+set -eu
+
+$1 -dM -include errno.h - </dev/null | \
+ awk '/^#define[ \t]+E[^ _]+[ \t]+/ { print $2; }'
diff --git a/src/basic/glob-util.c b/src/basic/glob-util.c
new file mode 100644
index 0000000..bc0278e
--- /dev/null
+++ b/src/basic/glob-util.c
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "dirent-util.h"
+#include "errno-util.h"
+#include "glob-util.h"
+#include "macro.h"
+#include "path-util.h"
+#include "strv.h"
+
+static void closedir_wrapper(void* v) {
+ (void) closedir(v);
+}
+
+int safe_glob(const char *path, int flags, glob_t *pglob) {
+ int k;
+
+ /* We want to set GLOB_ALTDIRFUNC ourselves, don't allow it to be set. */
+ assert(!(flags & GLOB_ALTDIRFUNC));
+
+ if (!pglob->gl_closedir)
+ pglob->gl_closedir = closedir_wrapper;
+ if (!pglob->gl_readdir)
+ pglob->gl_readdir = (struct dirent *(*)(void *)) readdir_no_dot;
+ if (!pglob->gl_opendir)
+ pglob->gl_opendir = (void *(*)(const char *)) opendir;
+ if (!pglob->gl_lstat)
+ pglob->gl_lstat = lstat;
+ if (!pglob->gl_stat)
+ pglob->gl_stat = stat;
+
+ errno = 0;
+ k = glob(path, flags | GLOB_ALTDIRFUNC, NULL, pglob);
+ if (k == GLOB_NOMATCH)
+ return -ENOENT;
+ if (k == GLOB_NOSPACE)
+ return -ENOMEM;
+ if (k != 0)
+ return errno_or_else(EIO);
+ if (strv_isempty(pglob->gl_pathv))
+ return -ENOENT;
+
+ return 0;
+}
+
+int glob_exists(const char *path) {
+ _cleanup_globfree_ glob_t g = {};
+ int k;
+
+ assert(path);
+
+ k = safe_glob(path, GLOB_NOSORT|GLOB_BRACE, &g);
+ if (k == -ENOENT)
+ return false;
+ if (k < 0)
+ return k;
+ return true;
+}
+
+int glob_extend(char ***strv, const char *path, int flags) {
+ _cleanup_globfree_ glob_t g = {};
+ int k;
+
+ k = safe_glob(path, GLOB_NOSORT|GLOB_BRACE|flags, &g);
+ if (k < 0)
+ return k;
+
+ return strv_extend_strv(strv, g.gl_pathv, false);
+}
diff --git a/src/basic/glob-util.h b/src/basic/glob-util.h
new file mode 100644
index 0000000..d2f8718
--- /dev/null
+++ b/src/basic/glob-util.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <glob.h>
+#include <stdbool.h>
+
+#include "macro.h"
+#include "string-util.h"
+
+/* Note: this function modifies pglob to set various functions. */
+int safe_glob(const char *path, int flags, glob_t *pglob);
+
+int glob_exists(const char *path);
+int glob_extend(char ***strv, const char *path, int flags);
+
+#define _cleanup_globfree_ _cleanup_(globfree)
+
+_pure_ static inline bool string_is_glob(const char *p) {
+ /* Check if a string contains any glob patterns. */
+ return !!strpbrk(p, GLOB_CHARS);
+}
diff --git a/src/basic/gunicode.c b/src/basic/gunicode.c
new file mode 100644
index 0000000..36beb95
--- /dev/null
+++ b/src/basic/gunicode.c
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/* gunicode.c - Unicode manipulation functions
+ *
+ * Copyright (C) 1999, 2000 Tom Tromey
+ * Copyright © 2000, 2005 Red Hat, Inc.
+ */
+
+#include "gunicode.h"
+
+#define unichar uint32_t
+
+/**
+ * g_utf8_prev_char:
+ * @p: a pointer to a position within a UTF-8 encoded string
+ *
+ * Finds the previous UTF-8 character in the string before @p.
+ *
+ * @p does not have to be at the beginning of a UTF-8 character. No check
+ * is made to see if the character found is actually valid other than
+ * it starts with an appropriate byte. If @p might be the first
+ * character of the string, you must use g_utf8_find_prev_char() instead.
+ *
+ * Return value: a pointer to the found character.
+ **/
+char *
+utf8_prev_char (const char *p)
+{
+ for (;;)
+ {
+ p--;
+ if ((*p & 0xc0) != 0x80)
+ return (char *)p;
+ }
+}
+
+struct Interval
+{
+ unichar start, end;
+};
+
+static int
+interval_compare (const void *key, const void *elt)
+{
+ unichar c = (unichar) (long) (key);
+ struct Interval *interval = (struct Interval *)elt;
+
+ if (c < interval->start)
+ return -1;
+ if (c > interval->end)
+ return +1;
+
+ return 0;
+}
+
+/*
+ * NOTE:
+ *
+ * The tables for g_unichar_iswide() and g_unichar_iswide_cjk() are
+ * generated from the Unicode Character Database's file
+ * extracted/DerivedEastAsianWidth.txt using the gen-iswide-table.py
+ * in this way:
+ *
+ * ./gen-iswide-table.py < path/to/ucd/extracted/DerivedEastAsianWidth.txt | fmt
+ *
+ * Last update for Unicode 6.0.
+ */
+
+/**
+ * g_unichar_iswide:
+ * @c: a Unicode character
+ *
+ * Determines if a character is typically rendered in a double-width
+ * cell.
+ *
+ * Return value: %TRUE if the character is wide
+ **/
+bool
+unichar_iswide (unichar c)
+{
+ /* See NOTE earlier for how to update this table. */
+ static const struct Interval wide[] = {
+ {0x1100, 0x115F}, {0x2329, 0x232A}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3},
+ {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3000, 0x303E}, {0x3041, 0x3096},
+ {0x3099, 0x30FF}, {0x3105, 0x312D}, {0x3131, 0x318E}, {0x3190, 0x31BA},
+ {0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x32FE},
+ {0x3300, 0x4DBF}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C},
+ {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52},
+ {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6},
+ {0x1B000, 0x1B001}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23A},
+ {0x1F240, 0x1F248}, {0x1F250, 0x1F251},
+ {0x1F300, 0x1F567}, /* Miscellaneous Symbols and Pictographs */
+ {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD},
+ };
+
+ if (bsearch ((void *)(uintptr_t)c, wide, (sizeof (wide) / sizeof ((wide)[0])), sizeof wide[0],
+ interval_compare))
+ return true;
+
+ return false;
+}
+
+const char utf8_skip_data[256] = {
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
+};
diff --git a/src/basic/gunicode.h b/src/basic/gunicode.h
new file mode 100644
index 0000000..6b71839
--- /dev/null
+++ b/src/basic/gunicode.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/* gunicode.h - Unicode manipulation functions
+ *
+ * Copyright (C) 1999, 2000 Tom Tromey
+ * Copyright © 2000, 2005 Red Hat, Inc.
+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+char *utf8_prev_char (const char *p);
+
+extern const char utf8_skip_data[256];
+
+/**
+ * g_utf8_next_char:
+ * @p: Pointer to the start of a valid UTF-8 character
+ *
+ * Skips to the next character in a UTF-8 string. The string must be
+ * valid; this macro is as fast as possible, and has no error-checking.
+ * You would use this macro to iterate over a string character by
+ * character. The macro returns the start of the next UTF-8 character.
+ * Before using this macro, use g_utf8_validate() to validate strings
+ * that may contain invalid UTF-8.
+ */
+#define utf8_next_char(p) (char *)((p) + utf8_skip_data[*(const unsigned char *)(p)])
+
+bool unichar_iswide (uint32_t c);
diff --git a/src/basic/hash-funcs.c b/src/basic/hash-funcs.c
new file mode 100644
index 0000000..e033de1
--- /dev/null
+++ b/src/basic/hash-funcs.c
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <string.h>
+
+#include "hash-funcs.h"
+#include "path-util.h"
+
+void string_hash_func(const char *p, struct siphash *state) {
+ siphash24_compress(p, strlen(p) + 1, state);
+}
+
+DEFINE_HASH_OPS(string_hash_ops, char, string_hash_func, string_compare_func);
+DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(string_hash_ops_free,
+ char, string_hash_func, string_compare_func, free);
+DEFINE_HASH_OPS_FULL(string_hash_ops_free_free,
+ char, string_hash_func, string_compare_func, free,
+ char, free);
+
+void path_hash_func(const char *q, struct siphash *state) {
+ size_t n;
+
+ assert(q);
+ assert(state);
+
+ /* Calculates a hash for a path in a way this duplicate inner slashes don't make a differences, and also
+ * whether there's a trailing slash or not. This fits well with the semantics of path_compare(), which does
+ * similar checks and also doesn't care for trailing slashes. Note that relative and absolute paths (i.e. those
+ * which begin in a slash or not) will hash differently though. */
+
+ n = strspn(q, "/");
+ if (n > 0) { /* Eat up initial slashes, and add one "/" to the hash for all of them */
+ siphash24_compress(q, 1, state);
+ q += n;
+ }
+
+ for (;;) {
+ /* Determine length of next component */
+ n = strcspn(q, "/");
+ if (n == 0) /* Reached the end? */
+ break;
+
+ /* Add this component to the hash and skip over it */
+ siphash24_compress(q, n, state);
+ q += n;
+
+ /* How many slashes follow this component? */
+ n = strspn(q, "/");
+ if (q[n] == 0) /* Is this a trailing slash? If so, we are at the end, and don't care about the slashes anymore */
+ break;
+
+ /* We are not add the end yet. Hash exactly one slash for all of the ones we just encountered. */
+ siphash24_compress(q, 1, state);
+ q += n;
+ }
+}
+
+DEFINE_HASH_OPS(path_hash_ops, char, path_hash_func, path_compare);
+DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(path_hash_ops_free,
+ char, path_hash_func, path_compare, free);
+
+void trivial_hash_func(const void *p, struct siphash *state) {
+ siphash24_compress(&p, sizeof(p), state);
+}
+
+int trivial_compare_func(const void *a, const void *b) {
+ return CMP(a, b);
+}
+
+const struct hash_ops trivial_hash_ops = {
+ .hash = trivial_hash_func,
+ .compare = trivial_compare_func,
+};
+
+const struct hash_ops trivial_hash_ops_free = {
+ .hash = trivial_hash_func,
+ .compare = trivial_compare_func,
+ .free_key = free,
+};
+
+const struct hash_ops trivial_hash_ops_free_free = {
+ .hash = trivial_hash_func,
+ .compare = trivial_compare_func,
+ .free_key = free,
+ .free_value = free,
+};
+
+void uint64_hash_func(const uint64_t *p, struct siphash *state) {
+ siphash24_compress(p, sizeof(uint64_t), state);
+}
+
+int uint64_compare_func(const uint64_t *a, const uint64_t *b) {
+ return CMP(*a, *b);
+}
+
+DEFINE_HASH_OPS(uint64_hash_ops, uint64_t, uint64_hash_func, uint64_compare_func);
+
+#if SIZEOF_DEV_T != 8
+void devt_hash_func(const dev_t *p, struct siphash *state) {
+ siphash24_compress(p, sizeof(dev_t), state);
+}
+
+int devt_compare_func(const dev_t *a, const dev_t *b) {
+ return CMP(*a, *b);
+}
+
+DEFINE_HASH_OPS(devt_hash_ops, dev_t, devt_hash_func, devt_compare_func);
+#endif
diff --git a/src/basic/hash-funcs.h b/src/basic/hash-funcs.h
new file mode 100644
index 0000000..5672df1
--- /dev/null
+++ b/src/basic/hash-funcs.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "siphash24.h"
+
+typedef void (*hash_func_t)(const void *p, struct siphash *state);
+typedef int (*compare_func_t)(const void *a, const void *b);
+
+struct hash_ops {
+ hash_func_t hash;
+ compare_func_t compare;
+ free_func_t free_key;
+ free_func_t free_value;
+};
+
+#define _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, free_key_func, free_value_func, scope) \
+ _unused_ static void (* UNIQ_T(static_hash_wrapper, uq))(const type *, struct siphash *) = hash_func; \
+ _unused_ static int (* UNIQ_T(static_compare_wrapper, uq))(const type *, const type *) = compare_func; \
+ scope const struct hash_ops name = { \
+ .hash = (hash_func_t) hash_func, \
+ .compare = (compare_func_t) compare_func, \
+ .free_key = free_key_func, \
+ .free_value = free_value_func, \
+ }
+
+#define _DEFINE_FREE_FUNC(uq, type, wrapper_name, func) \
+ /* Type-safe free function */ \
+ static void UNIQ_T(wrapper_name, uq)(void *a) { \
+ type *_a = a; \
+ func(_a); \
+ }
+
+#define _DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(uq, name, type, hash_func, compare_func, free_func, scope) \
+ _DEFINE_FREE_FUNC(uq, type, static_free_wrapper, free_func); \
+ _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, \
+ UNIQ_T(static_free_wrapper, uq), NULL, scope)
+
+#define _DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(uq, name, type, hash_func, compare_func, type_value, free_func, scope) \
+ _DEFINE_FREE_FUNC(uq, type_value, static_free_wrapper, free_func); \
+ _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, \
+ NULL, UNIQ_T(static_free_wrapper, uq), scope)
+
+#define _DEFINE_HASH_OPS_FULL(uq, name, type, hash_func, compare_func, free_key_func, type_value, free_value_func, scope) \
+ _DEFINE_FREE_FUNC(uq, type, static_free_key_wrapper, free_key_func); \
+ _DEFINE_FREE_FUNC(uq, type_value, static_free_value_wrapper, free_value_func); \
+ _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, \
+ UNIQ_T(static_free_key_wrapper, uq), \
+ UNIQ_T(static_free_value_wrapper, uq), scope)
+
+#define DEFINE_HASH_OPS(name, type, hash_func, compare_func) \
+ _DEFINE_HASH_OPS(UNIQ, name, type, hash_func, compare_func, NULL, NULL,)
+
+#define DEFINE_PRIVATE_HASH_OPS(name, type, hash_func, compare_func) \
+ _DEFINE_HASH_OPS(UNIQ, name, type, hash_func, compare_func, NULL, NULL, static)
+
+#define DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(name, type, hash_func, compare_func, free_func) \
+ _DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, free_func,)
+
+#define DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(name, type, hash_func, compare_func, free_func) \
+ _DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, free_func, static)
+
+#define DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(name, type, hash_func, compare_func, value_type, free_func) \
+ _DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, value_type, free_func,)
+
+#define DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(name, type, hash_func, compare_func, value_type, free_func) \
+ _DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, value_type, free_func, static)
+
+#define DEFINE_HASH_OPS_FULL(name, type, hash_func, compare_func, free_key_func, value_type, free_value_func) \
+ _DEFINE_HASH_OPS_FULL(UNIQ, name, type, hash_func, compare_func, free_key_func, value_type, free_value_func,)
+
+#define DEFINE_PRIVATE_HASH_OPS_FULL(name, type, hash_func, compare_func, free_key_func, value_type, free_value_func) \
+ _DEFINE_HASH_OPS_FULL(UNIQ, name, type, hash_func, compare_func, free_key_func, value_type, free_value_func, static)
+
+void string_hash_func(const char *p, struct siphash *state);
+#define string_compare_func strcmp
+extern const struct hash_ops string_hash_ops;
+extern const struct hash_ops string_hash_ops_free;
+extern const struct hash_ops string_hash_ops_free_free;
+
+void path_hash_func(const char *p, struct siphash *state);
+extern const struct hash_ops path_hash_ops;
+extern const struct hash_ops path_hash_ops_free;
+
+/* This will compare the passed pointers directly, and will not dereference them. This is hence not useful for strings
+ * or suchlike. */
+void trivial_hash_func(const void *p, struct siphash *state);
+int trivial_compare_func(const void *a, const void *b) _const_;
+extern const struct hash_ops trivial_hash_ops;
+extern const struct hash_ops trivial_hash_ops_free;
+extern const struct hash_ops trivial_hash_ops_free_free;
+
+/* 32bit values we can always just embed in the pointer itself, but in order to support 32bit archs we need store 64bit
+ * values indirectly, since they don't fit in a pointer. */
+void uint64_hash_func(const uint64_t *p, struct siphash *state);
+int uint64_compare_func(const uint64_t *a, const uint64_t *b) _pure_;
+extern const struct hash_ops uint64_hash_ops;
+
+/* On some archs dev_t is 32bit, and on others 64bit. And sometimes it's 64bit on 32bit archs, and sometimes 32bit on
+ * 64bit archs. Yuck! */
+#if SIZEOF_DEV_T != 8
+void devt_hash_func(const dev_t *p, struct siphash *state) _pure_;
+int devt_compare_func(const dev_t *a, const dev_t *b) _pure_;
+extern const struct hash_ops devt_hash_ops;
+#else
+#define devt_hash_func uint64_hash_func
+#define devt_compare_func uint64_compare_func
+#define devt_hash_ops uint64_hash_ops
+#endif
diff --git a/src/basic/hashmap.c b/src/basic/hashmap.c
new file mode 100644
index 0000000..cdc6847
--- /dev/null
+++ b/src/basic/hashmap.c
@@ -0,0 +1,2028 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "mempool.h"
+#include "missing_syscall.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "set.h"
+#include "siphash24.h"
+#include "string-util.h"
+#include "strv.h"
+
+#if ENABLE_DEBUG_HASHMAP
+#include "list.h"
+#endif
+
+/*
+ * Implementation of hashmaps.
+ * Addressing: open
+ * - uses less RAM compared to closed addressing (chaining), because
+ * our entries are small (especially in Sets, which tend to contain
+ * the majority of entries in systemd).
+ * Collision resolution: Robin Hood
+ * - tends to equalize displacement of entries from their optimal buckets.
+ * Probe sequence: linear
+ * - though theoretically worse than random probing/uniform hashing/double
+ * hashing, it is good for cache locality.
+ *
+ * References:
+ * Celis, P. 1986. Robin Hood Hashing.
+ * Ph.D. Dissertation. University of Waterloo, Waterloo, Ont., Canada, Canada.
+ * https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
+ * - The results are derived for random probing. Suggests deletion with
+ * tombstones and two mean-centered search methods. None of that works
+ * well for linear probing.
+ *
+ * Janson, S. 2005. Individual displacements for linear probing hashing with different insertion policies.
+ * ACM Trans. Algorithms 1, 2 (October 2005), 177-213.
+ * DOI=10.1145/1103963.1103964 http://doi.acm.org/10.1145/1103963.1103964
+ * http://www.math.uu.se/~svante/papers/sj157.pdf
+ * - Applies to Robin Hood with linear probing. Contains remarks on
+ * the unsuitability of mean-centered search with linear probing.
+ *
+ * Viola, A. 2005. Exact distribution of individual displacements in linear probing hashing.
+ * ACM Trans. Algorithms 1, 2 (October 2005), 214-242.
+ * DOI=10.1145/1103963.1103965 http://doi.acm.org/10.1145/1103963.1103965
+ * - Similar to Janson. Note that Viola writes about C_{m,n} (number of probes
+ * in a successful search), and Janson writes about displacement. C = d + 1.
+ *
+ * Goossaert, E. 2013. Robin Hood hashing: backward shift deletion.
+ * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
+ * - Explanation of backward shift deletion with pictures.
+ *
+ * Khuong, P. 2013. The Other Robin Hood Hashing.
+ * http://www.pvk.ca/Blog/2013/11/26/the-other-robin-hood-hashing/
+ * - Short summary of random vs. linear probing, and tombstones vs. backward shift.
+ */
+
+/*
+ * XXX Ideas for improvement:
+ * For unordered hashmaps, randomize iteration order, similarly to Perl:
+ * http://blog.booking.com/hardening-perls-hash-function.html
+ */
+
+/* INV_KEEP_FREE = 1 / (1 - max_load_factor)
+ * e.g. 1 / (1 - 0.8) = 5 ... keep one fifth of the buckets free. */
+#define INV_KEEP_FREE 5U
+
+/* Fields common to entries of all hashmap/set types */
+struct hashmap_base_entry {
+ const void *key;
+};
+
+/* Entry types for specific hashmap/set types
+ * hashmap_base_entry must be at the beginning of each entry struct. */
+
+struct plain_hashmap_entry {
+ struct hashmap_base_entry b;
+ void *value;
+};
+
+struct ordered_hashmap_entry {
+ struct plain_hashmap_entry p;
+ unsigned iterate_next, iterate_previous;
+};
+
+struct set_entry {
+ struct hashmap_base_entry b;
+};
+
+/* In several functions it is advantageous to have the hash table extended
+ * virtually by a couple of additional buckets. We reserve special index values
+ * for these "swap" buckets. */
+#define _IDX_SWAP_BEGIN (UINT_MAX - 3)
+#define IDX_PUT (_IDX_SWAP_BEGIN + 0)
+#define IDX_TMP (_IDX_SWAP_BEGIN + 1)
+#define _IDX_SWAP_END (_IDX_SWAP_BEGIN + 2)
+
+#define IDX_FIRST (UINT_MAX - 1) /* special index for freshly initialized iterators */
+#define IDX_NIL UINT_MAX /* special index value meaning "none" or "end" */
+
+assert_cc(IDX_FIRST == _IDX_SWAP_END);
+assert_cc(IDX_FIRST == _IDX_ITERATOR_FIRST);
+
+/* Storage space for the "swap" buckets.
+ * All entry types can fit into a ordered_hashmap_entry. */
+struct swap_entries {
+ struct ordered_hashmap_entry e[_IDX_SWAP_END - _IDX_SWAP_BEGIN];
+};
+
+/* Distance from Initial Bucket */
+typedef uint8_t dib_raw_t;
+#define DIB_RAW_OVERFLOW ((dib_raw_t)0xfdU) /* indicates DIB value is greater than representable */
+#define DIB_RAW_REHASH ((dib_raw_t)0xfeU) /* entry yet to be rehashed during in-place resize */
+#define DIB_RAW_FREE ((dib_raw_t)0xffU) /* a free bucket */
+#define DIB_RAW_INIT ((char)DIB_RAW_FREE) /* a byte to memset a DIB store with when initializing */
+
+#define DIB_FREE UINT_MAX
+
+#if ENABLE_DEBUG_HASHMAP
+struct hashmap_debug_info {
+ LIST_FIELDS(struct hashmap_debug_info, debug_list);
+ unsigned max_entries; /* high watermark of n_entries */
+
+ /* who allocated this hashmap */
+ int line;
+ const char *file;
+ const char *func;
+
+ /* fields to detect modification while iterating */
+ unsigned put_count; /* counts puts into the hashmap */
+ unsigned rem_count; /* counts removals from hashmap */
+ unsigned last_rem_idx; /* remembers last removal index */
+};
+
+/* Tracks all existing hashmaps. Get at it from gdb. See sd_dump_hashmaps.py */
+static LIST_HEAD(struct hashmap_debug_info, hashmap_debug_list);
+static pthread_mutex_t hashmap_debug_list_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif
+
+enum HashmapType {
+ HASHMAP_TYPE_PLAIN,
+ HASHMAP_TYPE_ORDERED,
+ HASHMAP_TYPE_SET,
+ _HASHMAP_TYPE_MAX
+};
+
+struct _packed_ indirect_storage {
+ void *storage; /* where buckets and DIBs are stored */
+ uint8_t hash_key[HASH_KEY_SIZE]; /* hash key; changes during resize */
+
+ unsigned n_entries; /* number of stored entries */
+ unsigned n_buckets; /* number of buckets */
+
+ unsigned idx_lowest_entry; /* Index below which all buckets are free.
+ Makes "while(hashmap_steal_first())" loops
+ O(n) instead of O(n^2) for unordered hashmaps. */
+ uint8_t _pad[3]; /* padding for the whole HashmapBase */
+ /* The bitfields in HashmapBase complete the alignment of the whole thing. */
+};
+
+struct direct_storage {
+ /* This gives us 39 bytes on 64bit, or 35 bytes on 32bit.
+ * That's room for 4 set_entries + 4 DIB bytes + 3 unused bytes on 64bit,
+ * or 7 set_entries + 7 DIB bytes + 0 unused bytes on 32bit. */
+ uint8_t storage[sizeof(struct indirect_storage)];
+};
+
+#define DIRECT_BUCKETS(entry_t) \
+ (sizeof(struct direct_storage) / (sizeof(entry_t) + sizeof(dib_raw_t)))
+
+/* We should be able to store at least one entry directly. */
+assert_cc(DIRECT_BUCKETS(struct ordered_hashmap_entry) >= 1);
+
+/* We have 3 bits for n_direct_entries. */
+assert_cc(DIRECT_BUCKETS(struct set_entry) < (1 << 3));
+
+/* Hashmaps with directly stored entries all use this shared hash key.
+ * It's no big deal if the key is guessed, because there can be only
+ * a handful of directly stored entries in a hashmap. When a hashmap
+ * outgrows direct storage, it gets its own key for indirect storage. */
+static uint8_t shared_hash_key[HASH_KEY_SIZE];
+
+/* Fields that all hashmap/set types must have */
+struct HashmapBase {
+ const struct hash_ops *hash_ops; /* hash and compare ops to use */
+
+ union _packed_ {
+ struct indirect_storage indirect; /* if has_indirect */
+ struct direct_storage direct; /* if !has_indirect */
+ };
+
+ enum HashmapType type:2; /* HASHMAP_TYPE_* */
+ bool has_indirect:1; /* whether indirect storage is used */
+ unsigned n_direct_entries:3; /* Number of entries in direct storage.
+ * Only valid if !has_indirect. */
+ bool from_pool:1; /* whether was allocated from mempool */
+ bool dirty:1; /* whether dirtied since last iterated_cache_get() */
+ bool cached:1; /* whether this hashmap is being cached */
+
+#if ENABLE_DEBUG_HASHMAP
+ struct hashmap_debug_info debug;
+#endif
+};
+
+/* Specific hash types
+ * HashmapBase must be at the beginning of each hashmap struct. */
+
+struct Hashmap {
+ struct HashmapBase b;
+};
+
+struct OrderedHashmap {
+ struct HashmapBase b;
+ unsigned iterate_list_head, iterate_list_tail;
+};
+
+struct Set {
+ struct HashmapBase b;
+};
+
+typedef struct CacheMem {
+ const void **ptr;
+ size_t n_populated, n_allocated;
+ bool active:1;
+} CacheMem;
+
+struct IteratedCache {
+ HashmapBase *hashmap;
+ CacheMem keys, values;
+};
+
+DEFINE_MEMPOOL(hashmap_pool, Hashmap, 8);
+DEFINE_MEMPOOL(ordered_hashmap_pool, OrderedHashmap, 8);
+/* No need for a separate Set pool */
+assert_cc(sizeof(Hashmap) == sizeof(Set));
+
+struct hashmap_type_info {
+ size_t head_size;
+ size_t entry_size;
+ struct mempool *mempool;
+ unsigned n_direct_buckets;
+};
+
+static _used_ const struct hashmap_type_info hashmap_type_info[_HASHMAP_TYPE_MAX] = {
+ [HASHMAP_TYPE_PLAIN] = {
+ .head_size = sizeof(Hashmap),
+ .entry_size = sizeof(struct plain_hashmap_entry),
+ .mempool = &hashmap_pool,
+ .n_direct_buckets = DIRECT_BUCKETS(struct plain_hashmap_entry),
+ },
+ [HASHMAP_TYPE_ORDERED] = {
+ .head_size = sizeof(OrderedHashmap),
+ .entry_size = sizeof(struct ordered_hashmap_entry),
+ .mempool = &ordered_hashmap_pool,
+ .n_direct_buckets = DIRECT_BUCKETS(struct ordered_hashmap_entry),
+ },
+ [HASHMAP_TYPE_SET] = {
+ .head_size = sizeof(Set),
+ .entry_size = sizeof(struct set_entry),
+ .mempool = &hashmap_pool,
+ .n_direct_buckets = DIRECT_BUCKETS(struct set_entry),
+ },
+};
+
+#if VALGRIND
+_destructor_ static void cleanup_pools(void) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ /* Be nice to valgrind */
+
+ /* The pool is only allocated by the main thread, but the memory can
+ * be passed to other threads. Let's clean up if we are the main thread
+ * and no other threads are live. */
+ /* We build our own is_main_thread() here, which doesn't use C11
+ * TLS based caching of the result. That's because valgrind apparently
+ * doesn't like malloc() (which C11 TLS internally uses) to be called
+ * from a GCC destructors. */
+ if (getpid() != gettid())
+ return;
+
+ r = get_proc_field("/proc/self/status", "Threads", WHITESPACE, &t);
+ if (r < 0 || !streq(t, "1"))
+ return;
+
+ mempool_drop(&hashmap_pool);
+ mempool_drop(&ordered_hashmap_pool);
+}
+#endif
+
+static unsigned n_buckets(HashmapBase *h) {
+ return h->has_indirect ? h->indirect.n_buckets
+ : hashmap_type_info[h->type].n_direct_buckets;
+}
+
+static unsigned n_entries(HashmapBase *h) {
+ return h->has_indirect ? h->indirect.n_entries
+ : h->n_direct_entries;
+}
+
+static void n_entries_inc(HashmapBase *h) {
+ if (h->has_indirect)
+ h->indirect.n_entries++;
+ else
+ h->n_direct_entries++;
+}
+
+static void n_entries_dec(HashmapBase *h) {
+ if (h->has_indirect)
+ h->indirect.n_entries--;
+ else
+ h->n_direct_entries--;
+}
+
+static void* storage_ptr(HashmapBase *h) {
+ return h->has_indirect ? h->indirect.storage
+ : h->direct.storage;
+}
+
+static uint8_t* hash_key(HashmapBase *h) {
+ return h->has_indirect ? h->indirect.hash_key
+ : shared_hash_key;
+}
+
+static unsigned base_bucket_hash(HashmapBase *h, const void *p) {
+ struct siphash state;
+ uint64_t hash;
+
+ siphash24_init(&state, hash_key(h));
+
+ h->hash_ops->hash(p, &state);
+
+ hash = siphash24_finalize(&state);
+
+ return (unsigned) (hash % n_buckets(h));
+}
+#define bucket_hash(h, p) base_bucket_hash(HASHMAP_BASE(h), p)
+
+static void base_set_dirty(HashmapBase *h) {
+ h->dirty = true;
+}
+#define hashmap_set_dirty(h) base_set_dirty(HASHMAP_BASE(h))
+
+static void get_hash_key(uint8_t hash_key[HASH_KEY_SIZE], bool reuse_is_ok) {
+ static uint8_t current[HASH_KEY_SIZE];
+ static bool current_initialized = false;
+
+ /* Returns a hash function key to use. In order to keep things
+ * fast we will not generate a new key each time we allocate a
+ * new hash table. Instead, we'll just reuse the most recently
+ * generated one, except if we never generated one or when we
+ * are rehashing an entire hash table because we reached a
+ * fill level */
+
+ if (!current_initialized || !reuse_is_ok) {
+ random_bytes(current, sizeof(current));
+ current_initialized = true;
+ }
+
+ memcpy(hash_key, current, sizeof(current));
+}
+
+static struct hashmap_base_entry* bucket_at(HashmapBase *h, unsigned idx) {
+ return (struct hashmap_base_entry*)
+ ((uint8_t*) storage_ptr(h) + idx * hashmap_type_info[h->type].entry_size);
+}
+
+static struct plain_hashmap_entry* plain_bucket_at(Hashmap *h, unsigned idx) {
+ return (struct plain_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx);
+}
+
+static struct ordered_hashmap_entry* ordered_bucket_at(OrderedHashmap *h, unsigned idx) {
+ return (struct ordered_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx);
+}
+
+static struct set_entry *set_bucket_at(Set *h, unsigned idx) {
+ return (struct set_entry*) bucket_at(HASHMAP_BASE(h), idx);
+}
+
+static struct ordered_hashmap_entry* bucket_at_swap(struct swap_entries *swap, unsigned idx) {
+ return &swap->e[idx - _IDX_SWAP_BEGIN];
+}
+
+/* Returns a pointer to the bucket at index idx.
+ * Understands real indexes and swap indexes, hence "_virtual". */
+static struct hashmap_base_entry* bucket_at_virtual(HashmapBase *h, struct swap_entries *swap,
+ unsigned idx) {
+ if (idx < _IDX_SWAP_BEGIN)
+ return bucket_at(h, idx);
+
+ if (idx < _IDX_SWAP_END)
+ return &bucket_at_swap(swap, idx)->p.b;
+
+ assert_not_reached("Invalid index");
+}
+
+static dib_raw_t* dib_raw_ptr(HashmapBase *h) {
+ return (dib_raw_t*)
+ ((uint8_t*) storage_ptr(h) + hashmap_type_info[h->type].entry_size * n_buckets(h));
+}
+
+static unsigned bucket_distance(HashmapBase *h, unsigned idx, unsigned from) {
+ return idx >= from ? idx - from
+ : n_buckets(h) + idx - from;
+}
+
+static unsigned bucket_calculate_dib(HashmapBase *h, unsigned idx, dib_raw_t raw_dib) {
+ unsigned initial_bucket;
+
+ if (raw_dib == DIB_RAW_FREE)
+ return DIB_FREE;
+
+ if (_likely_(raw_dib < DIB_RAW_OVERFLOW))
+ return raw_dib;
+
+ /*
+ * Having an overflow DIB value is very unlikely. The hash function
+ * would have to be bad. For example, in a table of size 2^24 filled
+ * to load factor 0.9 the maximum observed DIB is only about 60.
+ * In theory (assuming I used Maxima correctly), for an infinite size
+ * hash table with load factor 0.8 the probability of a given entry
+ * having DIB > 40 is 1.9e-8.
+ * This returns the correct DIB value by recomputing the hash value in
+ * the unlikely case. XXX Hitting this case could be a hint to rehash.
+ */
+ initial_bucket = bucket_hash(h, bucket_at(h, idx)->key);
+ return bucket_distance(h, idx, initial_bucket);
+}
+
+static void bucket_set_dib(HashmapBase *h, unsigned idx, unsigned dib) {
+ dib_raw_ptr(h)[idx] = dib != DIB_FREE ? MIN(dib, DIB_RAW_OVERFLOW) : DIB_RAW_FREE;
+}
+
+static unsigned skip_free_buckets(HashmapBase *h, unsigned idx) {
+ dib_raw_t *dibs;
+
+ dibs = dib_raw_ptr(h);
+
+ for ( ; idx < n_buckets(h); idx++)
+ if (dibs[idx] != DIB_RAW_FREE)
+ return idx;
+
+ return IDX_NIL;
+}
+
+static void bucket_mark_free(HashmapBase *h, unsigned idx) {
+ memzero(bucket_at(h, idx), hashmap_type_info[h->type].entry_size);
+ bucket_set_dib(h, idx, DIB_FREE);
+}
+
+static void bucket_move_entry(HashmapBase *h, struct swap_entries *swap,
+ unsigned from, unsigned to) {
+ struct hashmap_base_entry *e_from, *e_to;
+
+ assert(from != to);
+
+ e_from = bucket_at_virtual(h, swap, from);
+ e_to = bucket_at_virtual(h, swap, to);
+
+ memcpy(e_to, e_from, hashmap_type_info[h->type].entry_size);
+
+ if (h->type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*) h;
+ struct ordered_hashmap_entry *le, *le_to;
+
+ le_to = (struct ordered_hashmap_entry*) e_to;
+
+ if (le_to->iterate_next != IDX_NIL) {
+ le = (struct ordered_hashmap_entry*)
+ bucket_at_virtual(h, swap, le_to->iterate_next);
+ le->iterate_previous = to;
+ }
+
+ if (le_to->iterate_previous != IDX_NIL) {
+ le = (struct ordered_hashmap_entry*)
+ bucket_at_virtual(h, swap, le_to->iterate_previous);
+ le->iterate_next = to;
+ }
+
+ if (lh->iterate_list_head == from)
+ lh->iterate_list_head = to;
+ if (lh->iterate_list_tail == from)
+ lh->iterate_list_tail = to;
+ }
+}
+
+static unsigned next_idx(HashmapBase *h, unsigned idx) {
+ return (idx + 1U) % n_buckets(h);
+}
+
+static unsigned prev_idx(HashmapBase *h, unsigned idx) {
+ return (n_buckets(h) + idx - 1U) % n_buckets(h);
+}
+
+static void* entry_value(HashmapBase *h, struct hashmap_base_entry *e) {
+ switch (h->type) {
+
+ case HASHMAP_TYPE_PLAIN:
+ case HASHMAP_TYPE_ORDERED:
+ return ((struct plain_hashmap_entry*)e)->value;
+
+ case HASHMAP_TYPE_SET:
+ return (void*) e->key;
+
+ default:
+ assert_not_reached("Unknown hashmap type");
+ }
+}
+
+static void base_remove_entry(HashmapBase *h, unsigned idx) {
+ unsigned left, right, prev, dib;
+ dib_raw_t raw_dib, *dibs;
+
+ dibs = dib_raw_ptr(h);
+ assert(dibs[idx] != DIB_RAW_FREE);
+
+#if ENABLE_DEBUG_HASHMAP
+ h->debug.rem_count++;
+ h->debug.last_rem_idx = idx;
+#endif
+
+ left = idx;
+ /* Find the stop bucket ("right"). It is either free or has DIB == 0. */
+ for (right = next_idx(h, left); ; right = next_idx(h, right)) {
+ raw_dib = dibs[right];
+ if (IN_SET(raw_dib, 0, DIB_RAW_FREE))
+ break;
+
+ /* The buckets are not supposed to be all occupied and with DIB > 0.
+ * That would mean we could make everyone better off by shifting them
+ * backward. This scenario is impossible. */
+ assert(left != right);
+ }
+
+ if (h->type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*) h;
+ struct ordered_hashmap_entry *le = ordered_bucket_at(lh, idx);
+
+ if (le->iterate_next != IDX_NIL)
+ ordered_bucket_at(lh, le->iterate_next)->iterate_previous = le->iterate_previous;
+ else
+ lh->iterate_list_tail = le->iterate_previous;
+
+ if (le->iterate_previous != IDX_NIL)
+ ordered_bucket_at(lh, le->iterate_previous)->iterate_next = le->iterate_next;
+ else
+ lh->iterate_list_head = le->iterate_next;
+ }
+
+ /* Now shift all buckets in the interval (left, right) one step backwards */
+ for (prev = left, left = next_idx(h, left); left != right;
+ prev = left, left = next_idx(h, left)) {
+ dib = bucket_calculate_dib(h, left, dibs[left]);
+ assert(dib != 0);
+ bucket_move_entry(h, NULL, left, prev);
+ bucket_set_dib(h, prev, dib - 1);
+ }
+
+ bucket_mark_free(h, prev);
+ n_entries_dec(h);
+ base_set_dirty(h);
+}
+#define remove_entry(h, idx) base_remove_entry(HASHMAP_BASE(h), idx)
+
+static unsigned hashmap_iterate_in_insertion_order(OrderedHashmap *h, Iterator *i) {
+ struct ordered_hashmap_entry *e;
+ unsigned idx;
+
+ assert(h);
+ assert(i);
+
+ if (i->idx == IDX_NIL)
+ goto at_end;
+
+ if (i->idx == IDX_FIRST && h->iterate_list_head == IDX_NIL)
+ goto at_end;
+
+ if (i->idx == IDX_FIRST) {
+ idx = h->iterate_list_head;
+ e = ordered_bucket_at(h, idx);
+ } else {
+ idx = i->idx;
+ e = ordered_bucket_at(h, idx);
+ /*
+ * We allow removing the current entry while iterating, but removal may cause
+ * a backward shift. The next entry may thus move one bucket to the left.
+ * To detect when it happens, we remember the key pointer of the entry we were
+ * going to iterate next. If it does not match, there was a backward shift.
+ */
+ if (e->p.b.key != i->next_key) {
+ idx = prev_idx(HASHMAP_BASE(h), idx);
+ e = ordered_bucket_at(h, idx);
+ }
+ assert(e->p.b.key == i->next_key);
+ }
+
+#if ENABLE_DEBUG_HASHMAP
+ i->prev_idx = idx;
+#endif
+
+ if (e->iterate_next != IDX_NIL) {
+ struct ordered_hashmap_entry *n;
+ i->idx = e->iterate_next;
+ n = ordered_bucket_at(h, i->idx);
+ i->next_key = n->p.b.key;
+ } else
+ i->idx = IDX_NIL;
+
+ return idx;
+
+at_end:
+ i->idx = IDX_NIL;
+ return IDX_NIL;
+}
+
+static unsigned hashmap_iterate_in_internal_order(HashmapBase *h, Iterator *i) {
+ unsigned idx;
+
+ assert(h);
+ assert(i);
+
+ if (i->idx == IDX_NIL)
+ goto at_end;
+
+ if (i->idx == IDX_FIRST) {
+ /* fast forward to the first occupied bucket */
+ if (h->has_indirect) {
+ i->idx = skip_free_buckets(h, h->indirect.idx_lowest_entry);
+ h->indirect.idx_lowest_entry = i->idx;
+ } else
+ i->idx = skip_free_buckets(h, 0);
+
+ if (i->idx == IDX_NIL)
+ goto at_end;
+ } else {
+ struct hashmap_base_entry *e;
+
+ assert(i->idx > 0);
+
+ e = bucket_at(h, i->idx);
+ /*
+ * We allow removing the current entry while iterating, but removal may cause
+ * a backward shift. The next entry may thus move one bucket to the left.
+ * To detect when it happens, we remember the key pointer of the entry we were
+ * going to iterate next. If it does not match, there was a backward shift.
+ */
+ if (e->key != i->next_key)
+ e = bucket_at(h, --i->idx);
+
+ assert(e->key == i->next_key);
+ }
+
+ idx = i->idx;
+#if ENABLE_DEBUG_HASHMAP
+ i->prev_idx = idx;
+#endif
+
+ i->idx = skip_free_buckets(h, i->idx + 1);
+ if (i->idx != IDX_NIL)
+ i->next_key = bucket_at(h, i->idx)->key;
+ else
+ i->idx = IDX_NIL;
+
+ return idx;
+
+at_end:
+ i->idx = IDX_NIL;
+ return IDX_NIL;
+}
+
+static unsigned hashmap_iterate_entry(HashmapBase *h, Iterator *i) {
+ if (!h) {
+ i->idx = IDX_NIL;
+ return IDX_NIL;
+ }
+
+#if ENABLE_DEBUG_HASHMAP
+ if (i->idx == IDX_FIRST) {
+ i->put_count = h->debug.put_count;
+ i->rem_count = h->debug.rem_count;
+ } else {
+ /* While iterating, must not add any new entries */
+ assert(i->put_count == h->debug.put_count);
+ /* ... or remove entries other than the current one */
+ assert(i->rem_count == h->debug.rem_count ||
+ (i->rem_count == h->debug.rem_count - 1 &&
+ i->prev_idx == h->debug.last_rem_idx));
+ /* Reset our removals counter */
+ i->rem_count = h->debug.rem_count;
+ }
+#endif
+
+ return h->type == HASHMAP_TYPE_ORDERED ? hashmap_iterate_in_insertion_order((OrderedHashmap*) h, i)
+ : hashmap_iterate_in_internal_order(h, i);
+}
+
+bool _hashmap_iterate(HashmapBase *h, Iterator *i, void **value, const void **key) {
+ struct hashmap_base_entry *e;
+ void *data;
+ unsigned idx;
+
+ idx = hashmap_iterate_entry(h, i);
+ if (idx == IDX_NIL) {
+ if (value)
+ *value = NULL;
+ if (key)
+ *key = NULL;
+
+ return false;
+ }
+
+ e = bucket_at(h, idx);
+ data = entry_value(h, e);
+ if (value)
+ *value = data;
+ if (key)
+ *key = e->key;
+
+ return true;
+}
+
+#define HASHMAP_FOREACH_IDX(idx, h, i) \
+ for ((i) = ITERATOR_FIRST, (idx) = hashmap_iterate_entry((h), &(i)); \
+ (idx != IDX_NIL); \
+ (idx) = hashmap_iterate_entry((h), &(i)))
+
+IteratedCache* _hashmap_iterated_cache_new(HashmapBase *h) {
+ IteratedCache *cache;
+
+ assert(h);
+ assert(!h->cached);
+
+ if (h->cached)
+ return NULL;
+
+ cache = new0(IteratedCache, 1);
+ if (!cache)
+ return NULL;
+
+ cache->hashmap = h;
+ h->cached = true;
+
+ return cache;
+}
+
+static void reset_direct_storage(HashmapBase *h) {
+ const struct hashmap_type_info *hi = &hashmap_type_info[h->type];
+ void *p;
+
+ assert(!h->has_indirect);
+
+ p = mempset(h->direct.storage, 0, hi->entry_size * hi->n_direct_buckets);
+ memset(p, DIB_RAW_INIT, sizeof(dib_raw_t) * hi->n_direct_buckets);
+}
+
+static void shared_hash_key_initialize(void) {
+ random_bytes(shared_hash_key, sizeof(shared_hash_key));
+}
+
+static struct HashmapBase* hashmap_base_new(const struct hash_ops *hash_ops, enum HashmapType type HASHMAP_DEBUG_PARAMS) {
+ HashmapBase *h;
+ const struct hashmap_type_info *hi = &hashmap_type_info[type];
+ bool up;
+
+ up = mempool_enabled();
+
+ h = up ? mempool_alloc0_tile(hi->mempool) : malloc0(hi->head_size);
+ if (!h)
+ return NULL;
+
+ h->type = type;
+ h->from_pool = up;
+ h->hash_ops = hash_ops ?: &trivial_hash_ops;
+
+ if (type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*)h;
+ lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL;
+ }
+
+ reset_direct_storage(h);
+
+ static pthread_once_t once = PTHREAD_ONCE_INIT;
+ assert_se(pthread_once(&once, shared_hash_key_initialize) == 0);
+
+#if ENABLE_DEBUG_HASHMAP
+ h->debug.func = func;
+ h->debug.file = file;
+ h->debug.line = line;
+ assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0);
+ LIST_PREPEND(debug_list, hashmap_debug_list, &h->debug);
+ assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0);
+#endif
+
+ return h;
+}
+
+Hashmap *_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return (Hashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS);
+}
+
+OrderedHashmap *_ordered_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return (OrderedHashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS);
+}
+
+Set *_set_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return (Set*) hashmap_base_new(hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS);
+}
+
+static int hashmap_base_ensure_allocated(HashmapBase **h, const struct hash_ops *hash_ops,
+ enum HashmapType type HASHMAP_DEBUG_PARAMS) {
+ HashmapBase *q;
+
+ assert(h);
+
+ if (*h)
+ return 0;
+
+ q = hashmap_base_new(hash_ops, type HASHMAP_DEBUG_PASS_ARGS);
+ if (!q)
+ return -ENOMEM;
+
+ *h = q;
+ return 1;
+}
+
+int _hashmap_ensure_allocated(Hashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS);
+}
+
+int _ordered_hashmap_ensure_allocated(OrderedHashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS);
+}
+
+int _set_ensure_allocated(Set **s, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return hashmap_base_ensure_allocated((HashmapBase**)s, hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS);
+}
+
+int _ordered_hashmap_ensure_put(OrderedHashmap **h, const struct hash_ops *hash_ops, const void *key, void *value HASHMAP_DEBUG_PARAMS) {
+ int r;
+
+ r = _ordered_hashmap_ensure_allocated(h, hash_ops HASHMAP_DEBUG_PASS_ARGS);
+ if (r < 0)
+ return r;
+
+ return ordered_hashmap_put(*h, key, value);
+}
+
+static void hashmap_free_no_clear(HashmapBase *h) {
+ assert(!h->has_indirect);
+ assert(h->n_direct_entries == 0);
+
+#if ENABLE_DEBUG_HASHMAP
+ assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0);
+ LIST_REMOVE(debug_list, hashmap_debug_list, &h->debug);
+ assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0);
+#endif
+
+ if (h->from_pool) {
+ /* Ensure that the object didn't get migrated between threads. */
+ assert_se(is_main_thread());
+ mempool_free_tile(hashmap_type_info[h->type].mempool, h);
+ } else
+ free(h);
+}
+
+HashmapBase* _hashmap_free(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) {
+ if (h) {
+ _hashmap_clear(h, default_free_key, default_free_value);
+ hashmap_free_no_clear(h);
+ }
+
+ return NULL;
+}
+
+void _hashmap_clear(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) {
+ free_func_t free_key, free_value;
+ if (!h)
+ return;
+
+ free_key = h->hash_ops->free_key ?: default_free_key;
+ free_value = h->hash_ops->free_value ?: default_free_value;
+
+ if (free_key || free_value) {
+
+ /* If destructor calls are defined, let's destroy things defensively: let's take the item out of the
+ * hash table, and only then call the destructor functions. If these destructors then try to unregister
+ * themselves from our hash table a second time, the entry is already gone. */
+
+ while (_hashmap_size(h) > 0) {
+ void *k = NULL;
+ void *v;
+
+ v = _hashmap_first_key_and_value(h, true, &k);
+
+ if (free_key)
+ free_key(k);
+
+ if (free_value)
+ free_value(v);
+ }
+ }
+
+ if (h->has_indirect) {
+ free(h->indirect.storage);
+ h->has_indirect = false;
+ }
+
+ h->n_direct_entries = 0;
+ reset_direct_storage(h);
+
+ if (h->type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*) h;
+ lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL;
+ }
+
+ base_set_dirty(h);
+}
+
+static int resize_buckets(HashmapBase *h, unsigned entries_add);
+
+/*
+ * Finds an empty bucket to put an entry into, starting the scan at 'idx'.
+ * Performs Robin Hood swaps as it goes. The entry to put must be placed
+ * by the caller into swap slot IDX_PUT.
+ * If used for in-place resizing, may leave a displaced entry in swap slot
+ * IDX_PUT. Caller must rehash it next.
+ * Returns: true if it left a displaced entry to rehash next in IDX_PUT,
+ * false otherwise.
+ */
+static bool hashmap_put_robin_hood(HashmapBase *h, unsigned idx,
+ struct swap_entries *swap) {
+ dib_raw_t raw_dib, *dibs;
+ unsigned dib, distance;
+
+#if ENABLE_DEBUG_HASHMAP
+ h->debug.put_count++;
+#endif
+
+ dibs = dib_raw_ptr(h);
+
+ for (distance = 0; ; distance++) {
+ raw_dib = dibs[idx];
+ if (IN_SET(raw_dib, DIB_RAW_FREE, DIB_RAW_REHASH)) {
+ if (raw_dib == DIB_RAW_REHASH)
+ bucket_move_entry(h, swap, idx, IDX_TMP);
+
+ if (h->has_indirect && h->indirect.idx_lowest_entry > idx)
+ h->indirect.idx_lowest_entry = idx;
+
+ bucket_set_dib(h, idx, distance);
+ bucket_move_entry(h, swap, IDX_PUT, idx);
+ if (raw_dib == DIB_RAW_REHASH) {
+ bucket_move_entry(h, swap, IDX_TMP, IDX_PUT);
+ return true;
+ }
+
+ return false;
+ }
+
+ dib = bucket_calculate_dib(h, idx, raw_dib);
+
+ if (dib < distance) {
+ /* Found a wealthier entry. Go Robin Hood! */
+ bucket_set_dib(h, idx, distance);
+
+ /* swap the entries */
+ bucket_move_entry(h, swap, idx, IDX_TMP);
+ bucket_move_entry(h, swap, IDX_PUT, idx);
+ bucket_move_entry(h, swap, IDX_TMP, IDX_PUT);
+
+ distance = dib;
+ }
+
+ idx = next_idx(h, idx);
+ }
+}
+
+/*
+ * Puts an entry into a hashmap, boldly - no check whether key already exists.
+ * The caller must place the entry (only its key and value, not link indexes)
+ * in swap slot IDX_PUT.
+ * Caller must ensure: the key does not exist yet in the hashmap.
+ * that resize is not needed if !may_resize.
+ * Returns: 1 if entry was put successfully.
+ * -ENOMEM if may_resize==true and resize failed with -ENOMEM.
+ * Cannot return -ENOMEM if !may_resize.
+ */
+static int hashmap_base_put_boldly(HashmapBase *h, unsigned idx,
+ struct swap_entries *swap, bool may_resize) {
+ struct ordered_hashmap_entry *new_entry;
+ int r;
+
+ assert(idx < n_buckets(h));
+
+ new_entry = bucket_at_swap(swap, IDX_PUT);
+
+ if (may_resize) {
+ r = resize_buckets(h, 1);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ idx = bucket_hash(h, new_entry->p.b.key);
+ }
+ assert(n_entries(h) < n_buckets(h));
+
+ if (h->type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*) h;
+
+ new_entry->iterate_next = IDX_NIL;
+ new_entry->iterate_previous = lh->iterate_list_tail;
+
+ if (lh->iterate_list_tail != IDX_NIL) {
+ struct ordered_hashmap_entry *old_tail;
+
+ old_tail = ordered_bucket_at(lh, lh->iterate_list_tail);
+ assert(old_tail->iterate_next == IDX_NIL);
+ old_tail->iterate_next = IDX_PUT;
+ }
+
+ lh->iterate_list_tail = IDX_PUT;
+ if (lh->iterate_list_head == IDX_NIL)
+ lh->iterate_list_head = IDX_PUT;
+ }
+
+ assert_se(hashmap_put_robin_hood(h, idx, swap) == false);
+
+ n_entries_inc(h);
+#if ENABLE_DEBUG_HASHMAP
+ h->debug.max_entries = MAX(h->debug.max_entries, n_entries(h));
+#endif
+
+ base_set_dirty(h);
+
+ return 1;
+}
+#define hashmap_put_boldly(h, idx, swap, may_resize) \
+ hashmap_base_put_boldly(HASHMAP_BASE(h), idx, swap, may_resize)
+
+/*
+ * Returns 0 if resize is not needed.
+ * 1 if successfully resized.
+ * -ENOMEM on allocation failure.
+ */
+static int resize_buckets(HashmapBase *h, unsigned entries_add) {
+ struct swap_entries swap;
+ void *new_storage;
+ dib_raw_t *old_dibs, *new_dibs;
+ const struct hashmap_type_info *hi;
+ unsigned idx, optimal_idx;
+ unsigned old_n_buckets, new_n_buckets, n_rehashed, new_n_entries;
+ uint8_t new_shift;
+ bool rehash_next;
+
+ assert(h);
+
+ hi = &hashmap_type_info[h->type];
+ new_n_entries = n_entries(h) + entries_add;
+
+ /* overflow? */
+ if (_unlikely_(new_n_entries < entries_add))
+ return -ENOMEM;
+
+ /* For direct storage we allow 100% load, because it's tiny. */
+ if (!h->has_indirect && new_n_entries <= hi->n_direct_buckets)
+ return 0;
+
+ /*
+ * Load factor = n/m = 1 - (1/INV_KEEP_FREE).
+ * From it follows: m = n + n/(INV_KEEP_FREE - 1)
+ */
+ new_n_buckets = new_n_entries + new_n_entries / (INV_KEEP_FREE - 1);
+ /* overflow? */
+ if (_unlikely_(new_n_buckets < new_n_entries))
+ return -ENOMEM;
+
+ if (_unlikely_(new_n_buckets > UINT_MAX / (hi->entry_size + sizeof(dib_raw_t))))
+ return -ENOMEM;
+
+ old_n_buckets = n_buckets(h);
+
+ if (_likely_(new_n_buckets <= old_n_buckets))
+ return 0;
+
+ new_shift = log2u_round_up(MAX(
+ new_n_buckets * (hi->entry_size + sizeof(dib_raw_t)),
+ 2 * sizeof(struct direct_storage)));
+
+ /* Realloc storage (buckets and DIB array). */
+ new_storage = realloc(h->has_indirect ? h->indirect.storage : NULL,
+ 1U << new_shift);
+ if (!new_storage)
+ return -ENOMEM;
+
+ /* Must upgrade direct to indirect storage. */
+ if (!h->has_indirect) {
+ memcpy(new_storage, h->direct.storage,
+ old_n_buckets * (hi->entry_size + sizeof(dib_raw_t)));
+ h->indirect.n_entries = h->n_direct_entries;
+ h->indirect.idx_lowest_entry = 0;
+ h->n_direct_entries = 0;
+ }
+
+ /* Get a new hash key. If we've just upgraded to indirect storage,
+ * allow reusing a previously generated key. It's still a different key
+ * from the shared one that we used for direct storage. */
+ get_hash_key(h->indirect.hash_key, !h->has_indirect);
+
+ h->has_indirect = true;
+ h->indirect.storage = new_storage;
+ h->indirect.n_buckets = (1U << new_shift) /
+ (hi->entry_size + sizeof(dib_raw_t));
+
+ old_dibs = (dib_raw_t*)((uint8_t*) new_storage + hi->entry_size * old_n_buckets);
+ new_dibs = dib_raw_ptr(h);
+
+ /*
+ * Move the DIB array to the new place, replacing valid DIB values with
+ * DIB_RAW_REHASH to indicate all of the used buckets need rehashing.
+ * Note: Overlap is not possible, because we have at least doubled the
+ * number of buckets and dib_raw_t is smaller than any entry type.
+ */
+ for (idx = 0; idx < old_n_buckets; idx++) {
+ assert(old_dibs[idx] != DIB_RAW_REHASH);
+ new_dibs[idx] = old_dibs[idx] == DIB_RAW_FREE ? DIB_RAW_FREE
+ : DIB_RAW_REHASH;
+ }
+
+ /* Zero the area of newly added entries (including the old DIB area) */
+ memzero(bucket_at(h, old_n_buckets),
+ (n_buckets(h) - old_n_buckets) * hi->entry_size);
+
+ /* The upper half of the new DIB array needs initialization */
+ memset(&new_dibs[old_n_buckets], DIB_RAW_INIT,
+ (n_buckets(h) - old_n_buckets) * sizeof(dib_raw_t));
+
+ /* Rehash entries that need it */
+ n_rehashed = 0;
+ for (idx = 0; idx < old_n_buckets; idx++) {
+ if (new_dibs[idx] != DIB_RAW_REHASH)
+ continue;
+
+ optimal_idx = bucket_hash(h, bucket_at(h, idx)->key);
+
+ /*
+ * Not much to do if by luck the entry hashes to its current
+ * location. Just set its DIB.
+ */
+ if (optimal_idx == idx) {
+ new_dibs[idx] = 0;
+ n_rehashed++;
+ continue;
+ }
+
+ new_dibs[idx] = DIB_RAW_FREE;
+ bucket_move_entry(h, &swap, idx, IDX_PUT);
+ /* bucket_move_entry does not clear the source */
+ memzero(bucket_at(h, idx), hi->entry_size);
+
+ do {
+ /*
+ * Find the new bucket for the current entry. This may make
+ * another entry homeless and load it into IDX_PUT.
+ */
+ rehash_next = hashmap_put_robin_hood(h, optimal_idx, &swap);
+ n_rehashed++;
+
+ /* Did the current entry displace another one? */
+ if (rehash_next)
+ optimal_idx = bucket_hash(h, bucket_at_swap(&swap, IDX_PUT)->p.b.key);
+ } while (rehash_next);
+ }
+
+ assert(n_rehashed == n_entries(h));
+
+ return 1;
+}
+
+/*
+ * Finds an entry with a matching key
+ * Returns: index of the found entry, or IDX_NIL if not found.
+ */
+static unsigned base_bucket_scan(HashmapBase *h, unsigned idx, const void *key) {
+ struct hashmap_base_entry *e;
+ unsigned dib, distance;
+ dib_raw_t *dibs = dib_raw_ptr(h);
+
+ assert(idx < n_buckets(h));
+
+ for (distance = 0; ; distance++) {
+ if (dibs[idx] == DIB_RAW_FREE)
+ return IDX_NIL;
+
+ dib = bucket_calculate_dib(h, idx, dibs[idx]);
+
+ if (dib < distance)
+ return IDX_NIL;
+ if (dib == distance) {
+ e = bucket_at(h, idx);
+ if (h->hash_ops->compare(e->key, key) == 0)
+ return idx;
+ }
+
+ idx = next_idx(h, idx);
+ }
+}
+#define bucket_scan(h, idx, key) base_bucket_scan(HASHMAP_BASE(h), idx, key)
+
+int hashmap_put(Hashmap *h, const void *key, void *value) {
+ struct swap_entries swap;
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
+
+ assert(h);
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx != IDX_NIL) {
+ e = plain_bucket_at(h, idx);
+ if (e->value == value)
+ return 0;
+ return -EEXIST;
+ }
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p;
+ e->b.key = key;
+ e->value = value;
+ return hashmap_put_boldly(h, hash, &swap, true);
+}
+
+int set_put(Set *s, const void *key) {
+ struct swap_entries swap;
+ struct hashmap_base_entry *e;
+ unsigned hash, idx;
+
+ assert(s);
+
+ hash = bucket_hash(s, key);
+ idx = bucket_scan(s, hash, key);
+ if (idx != IDX_NIL)
+ return 0;
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p.b;
+ e->key = key;
+ return hashmap_put_boldly(s, hash, &swap, true);
+}
+
+int _set_ensure_put(Set **s, const struct hash_ops *hash_ops, const void *key HASHMAP_DEBUG_PARAMS) {
+ int r;
+
+ r = _set_ensure_allocated(s, hash_ops HASHMAP_DEBUG_PASS_ARGS);
+ if (r < 0)
+ return r;
+
+ return set_put(*s, key);
+}
+
+int _set_ensure_consume(Set **s, const struct hash_ops *hash_ops, void *key HASHMAP_DEBUG_PARAMS) {
+ int r;
+
+ r = _set_ensure_put(s, hash_ops, key HASHMAP_DEBUG_PASS_ARGS);
+ if (r <= 0) {
+ if (hash_ops && hash_ops->free_key)
+ hash_ops->free_key(key);
+ else
+ free(key);
+ }
+
+ return r;
+}
+
+int hashmap_replace(Hashmap *h, const void *key, void *value) {
+ struct swap_entries swap;
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
+
+ assert(h);
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx != IDX_NIL) {
+ e = plain_bucket_at(h, idx);
+#if ENABLE_DEBUG_HASHMAP
+ /* Although the key is equal, the key pointer may have changed,
+ * and this would break our assumption for iterating. So count
+ * this operation as incompatible with iteration. */
+ if (e->b.key != key) {
+ h->b.debug.put_count++;
+ h->b.debug.rem_count++;
+ h->b.debug.last_rem_idx = idx;
+ }
+#endif
+ e->b.key = key;
+ e->value = value;
+ hashmap_set_dirty(h);
+
+ return 0;
+ }
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p;
+ e->b.key = key;
+ e->value = value;
+ return hashmap_put_boldly(h, hash, &swap, true);
+}
+
+int hashmap_update(Hashmap *h, const void *key, void *value) {
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
+
+ assert(h);
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return -ENOENT;
+
+ e = plain_bucket_at(h, idx);
+ e->value = value;
+ hashmap_set_dirty(h);
+
+ return 0;
+}
+
+void* _hashmap_get(HashmapBase *h, const void *key) {
+ struct hashmap_base_entry *e;
+ unsigned hash, idx;
+
+ if (!h)
+ return NULL;
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = bucket_at(h, idx);
+ return entry_value(h, e);
+}
+
+void* hashmap_get2(Hashmap *h, const void *key, void **key2) {
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
+
+ if (!h)
+ return NULL;
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = plain_bucket_at(h, idx);
+ if (key2)
+ *key2 = (void*) e->b.key;
+
+ return e->value;
+}
+
+bool _hashmap_contains(HashmapBase *h, const void *key) {
+ unsigned hash;
+
+ if (!h)
+ return false;
+
+ hash = bucket_hash(h, key);
+ return bucket_scan(h, hash, key) != IDX_NIL;
+}
+
+void* _hashmap_remove(HashmapBase *h, const void *key) {
+ struct hashmap_base_entry *e;
+ unsigned hash, idx;
+ void *data;
+
+ if (!h)
+ return NULL;
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = bucket_at(h, idx);
+ data = entry_value(h, e);
+ remove_entry(h, idx);
+
+ return data;
+}
+
+void* hashmap_remove2(Hashmap *h, const void *key, void **rkey) {
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
+ void *data;
+
+ if (!h) {
+ if (rkey)
+ *rkey = NULL;
+ return NULL;
+ }
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL) {
+ if (rkey)
+ *rkey = NULL;
+ return NULL;
+ }
+
+ e = plain_bucket_at(h, idx);
+ data = e->value;
+ if (rkey)
+ *rkey = (void*) e->b.key;
+
+ remove_entry(h, idx);
+
+ return data;
+}
+
+int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key, void *value) {
+ struct swap_entries swap;
+ struct plain_hashmap_entry *e;
+ unsigned old_hash, new_hash, idx;
+
+ if (!h)
+ return -ENOENT;
+
+ old_hash = bucket_hash(h, old_key);
+ idx = bucket_scan(h, old_hash, old_key);
+ if (idx == IDX_NIL)
+ return -ENOENT;
+
+ new_hash = bucket_hash(h, new_key);
+ if (bucket_scan(h, new_hash, new_key) != IDX_NIL)
+ return -EEXIST;
+
+ remove_entry(h, idx);
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p;
+ e->b.key = new_key;
+ e->value = value;
+ assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1);
+
+ return 0;
+}
+
+int set_remove_and_put(Set *s, const void *old_key, const void *new_key) {
+ struct swap_entries swap;
+ struct hashmap_base_entry *e;
+ unsigned old_hash, new_hash, idx;
+
+ if (!s)
+ return -ENOENT;
+
+ old_hash = bucket_hash(s, old_key);
+ idx = bucket_scan(s, old_hash, old_key);
+ if (idx == IDX_NIL)
+ return -ENOENT;
+
+ new_hash = bucket_hash(s, new_key);
+ if (bucket_scan(s, new_hash, new_key) != IDX_NIL)
+ return -EEXIST;
+
+ remove_entry(s, idx);
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p.b;
+ e->key = new_key;
+ assert_se(hashmap_put_boldly(s, new_hash, &swap, false) == 1);
+
+ return 0;
+}
+
+int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_key, void *value) {
+ struct swap_entries swap;
+ struct plain_hashmap_entry *e;
+ unsigned old_hash, new_hash, idx_old, idx_new;
+
+ if (!h)
+ return -ENOENT;
+
+ old_hash = bucket_hash(h, old_key);
+ idx_old = bucket_scan(h, old_hash, old_key);
+ if (idx_old == IDX_NIL)
+ return -ENOENT;
+
+ old_key = bucket_at(HASHMAP_BASE(h), idx_old)->key;
+
+ new_hash = bucket_hash(h, new_key);
+ idx_new = bucket_scan(h, new_hash, new_key);
+ if (idx_new != IDX_NIL)
+ if (idx_old != idx_new) {
+ remove_entry(h, idx_new);
+ /* Compensate for a possible backward shift. */
+ if (old_key != bucket_at(HASHMAP_BASE(h), idx_old)->key)
+ idx_old = prev_idx(HASHMAP_BASE(h), idx_old);
+ assert(old_key == bucket_at(HASHMAP_BASE(h), idx_old)->key);
+ }
+
+ remove_entry(h, idx_old);
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p;
+ e->b.key = new_key;
+ e->value = value;
+ assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1);
+
+ return 0;
+}
+
+void* _hashmap_remove_value(HashmapBase *h, const void *key, void *value) {
+ struct hashmap_base_entry *e;
+ unsigned hash, idx;
+
+ if (!h)
+ return NULL;
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = bucket_at(h, idx);
+ if (entry_value(h, e) != value)
+ return NULL;
+
+ remove_entry(h, idx);
+
+ return value;
+}
+
+static unsigned find_first_entry(HashmapBase *h) {
+ Iterator i = ITERATOR_FIRST;
+
+ if (!h || !n_entries(h))
+ return IDX_NIL;
+
+ return hashmap_iterate_entry(h, &i);
+}
+
+void* _hashmap_first_key_and_value(HashmapBase *h, bool remove, void **ret_key) {
+ struct hashmap_base_entry *e;
+ void *key, *data;
+ unsigned idx;
+
+ idx = find_first_entry(h);
+ if (idx == IDX_NIL) {
+ if (ret_key)
+ *ret_key = NULL;
+ return NULL;
+ }
+
+ e = bucket_at(h, idx);
+ key = (void*) e->key;
+ data = entry_value(h, e);
+
+ if (remove)
+ remove_entry(h, idx);
+
+ if (ret_key)
+ *ret_key = key;
+
+ return data;
+}
+
+unsigned _hashmap_size(HashmapBase *h) {
+ if (!h)
+ return 0;
+
+ return n_entries(h);
+}
+
+unsigned _hashmap_buckets(HashmapBase *h) {
+ if (!h)
+ return 0;
+
+ return n_buckets(h);
+}
+
+int _hashmap_merge(Hashmap *h, Hashmap *other) {
+ Iterator i;
+ unsigned idx;
+
+ assert(h);
+
+ HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) {
+ struct plain_hashmap_entry *pe = plain_bucket_at(other, idx);
+ int r;
+
+ r = hashmap_put(h, pe->b.key, pe->value);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ return 0;
+}
+
+int set_merge(Set *s, Set *other) {
+ Iterator i;
+ unsigned idx;
+
+ assert(s);
+
+ HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) {
+ struct set_entry *se = set_bucket_at(other, idx);
+ int r;
+
+ r = set_put(s, se->b.key);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int _hashmap_reserve(HashmapBase *h, unsigned entries_add) {
+ int r;
+
+ assert(h);
+
+ r = resize_buckets(h, entries_add);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+/*
+ * The same as hashmap_merge(), but every new item from other is moved to h.
+ * Keys already in h are skipped and stay in other.
+ * Returns: 0 on success.
+ * -ENOMEM on alloc failure, in which case no move has been done.
+ */
+int _hashmap_move(HashmapBase *h, HashmapBase *other) {
+ struct swap_entries swap;
+ struct hashmap_base_entry *e, *n;
+ Iterator i;
+ unsigned idx;
+ int r;
+
+ assert(h);
+
+ if (!other)
+ return 0;
+
+ assert(other->type == h->type);
+
+ /*
+ * This reserves buckets for the worst case, where none of other's
+ * entries are yet present in h. This is preferable to risking
+ * an allocation failure in the middle of the moving and having to
+ * rollback or return a partial result.
+ */
+ r = resize_buckets(h, n_entries(other));
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH_IDX(idx, other, i) {
+ unsigned h_hash;
+
+ e = bucket_at(other, idx);
+ h_hash = bucket_hash(h, e->key);
+ if (bucket_scan(h, h_hash, e->key) != IDX_NIL)
+ continue;
+
+ n = &bucket_at_swap(&swap, IDX_PUT)->p.b;
+ n->key = e->key;
+ if (h->type != HASHMAP_TYPE_SET)
+ ((struct plain_hashmap_entry*) n)->value =
+ ((struct plain_hashmap_entry*) e)->value;
+ assert_se(hashmap_put_boldly(h, h_hash, &swap, false) == 1);
+
+ remove_entry(other, idx);
+ }
+
+ return 0;
+}
+
+int _hashmap_move_one(HashmapBase *h, HashmapBase *other, const void *key) {
+ struct swap_entries swap;
+ unsigned h_hash, other_hash, idx;
+ struct hashmap_base_entry *e, *n;
+ int r;
+
+ assert(h);
+
+ h_hash = bucket_hash(h, key);
+ if (bucket_scan(h, h_hash, key) != IDX_NIL)
+ return -EEXIST;
+
+ if (!other)
+ return -ENOENT;
+
+ assert(other->type == h->type);
+
+ other_hash = bucket_hash(other, key);
+ idx = bucket_scan(other, other_hash, key);
+ if (idx == IDX_NIL)
+ return -ENOENT;
+
+ e = bucket_at(other, idx);
+
+ n = &bucket_at_swap(&swap, IDX_PUT)->p.b;
+ n->key = e->key;
+ if (h->type != HASHMAP_TYPE_SET)
+ ((struct plain_hashmap_entry*) n)->value =
+ ((struct plain_hashmap_entry*) e)->value;
+ r = hashmap_put_boldly(h, h_hash, &swap, true);
+ if (r < 0)
+ return r;
+
+ remove_entry(other, idx);
+ return 0;
+}
+
+HashmapBase* _hashmap_copy(HashmapBase *h HASHMAP_DEBUG_PARAMS) {
+ HashmapBase *copy;
+ int r;
+
+ assert(h);
+
+ copy = hashmap_base_new(h->hash_ops, h->type HASHMAP_DEBUG_PASS_ARGS);
+ if (!copy)
+ return NULL;
+
+ switch (h->type) {
+ case HASHMAP_TYPE_PLAIN:
+ case HASHMAP_TYPE_ORDERED:
+ r = hashmap_merge((Hashmap*)copy, (Hashmap*)h);
+ break;
+ case HASHMAP_TYPE_SET:
+ r = set_merge((Set*)copy, (Set*)h);
+ break;
+ default:
+ assert_not_reached("Unknown hashmap type");
+ }
+
+ if (r < 0)
+ return _hashmap_free(copy, false, false);
+
+ return copy;
+}
+
+char** _hashmap_get_strv(HashmapBase *h) {
+ char **sv;
+ Iterator i;
+ unsigned idx, n;
+
+ sv = new(char*, n_entries(h)+1);
+ if (!sv)
+ return NULL;
+
+ n = 0;
+ HASHMAP_FOREACH_IDX(idx, h, i)
+ sv[n++] = entry_value(h, bucket_at(h, idx));
+ sv[n] = NULL;
+
+ return sv;
+}
+
+void* ordered_hashmap_next(OrderedHashmap *h, const void *key) {
+ struct ordered_hashmap_entry *e;
+ unsigned hash, idx;
+
+ if (!h)
+ return NULL;
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = ordered_bucket_at(h, idx);
+ if (e->iterate_next == IDX_NIL)
+ return NULL;
+ return ordered_bucket_at(h, e->iterate_next)->p.value;
+}
+
+int set_consume(Set *s, void *value) {
+ int r;
+
+ assert(s);
+ assert(value);
+
+ r = set_put(s, value);
+ if (r <= 0)
+ free(value);
+
+ return r;
+}
+
+int _hashmap_put_strdup_full(Hashmap **h, const struct hash_ops *hash_ops, const char *k, const char *v HASHMAP_DEBUG_PARAMS) {
+ int r;
+
+ r = _hashmap_ensure_allocated(h, hash_ops HASHMAP_DEBUG_PASS_ARGS);
+ if (r < 0)
+ return r;
+
+ _cleanup_free_ char *kdup = NULL, *vdup = NULL;
+
+ kdup = strdup(k);
+ if (!kdup)
+ return -ENOMEM;
+
+ if (v) {
+ vdup = strdup(v);
+ if (!vdup)
+ return -ENOMEM;
+ }
+
+ r = hashmap_put(*h, kdup, vdup);
+ if (r < 0) {
+ if (r == -EEXIST && streq_ptr(v, hashmap_get(*h, kdup)))
+ return 0;
+ return r;
+ }
+
+ /* 0 with non-null vdup would mean vdup is already in the hashmap, which cannot be */
+ assert(vdup == NULL || r > 0);
+ if (r > 0)
+ kdup = vdup = NULL;
+
+ return r;
+}
+
+int _set_put_strdup_full(Set **s, const struct hash_ops *hash_ops, const char *p HASHMAP_DEBUG_PARAMS) {
+ char *c;
+ int r;
+
+ assert(s);
+ assert(p);
+
+ r = _set_ensure_allocated(s, hash_ops HASHMAP_DEBUG_PASS_ARGS);
+ if (r < 0)
+ return r;
+
+ if (set_contains(*s, (char*) p))
+ return 0;
+
+ c = strdup(p);
+ if (!c)
+ return -ENOMEM;
+
+ return set_consume(*s, c);
+}
+
+int _set_put_strdupv_full(Set **s, const struct hash_ops *hash_ops, char **l HASHMAP_DEBUG_PARAMS) {
+ int n = 0, r;
+ char **i;
+
+ assert(s);
+
+ STRV_FOREACH(i, l) {
+ r = _set_put_strdup_full(s, hash_ops, *i HASHMAP_DEBUG_PASS_ARGS);
+ if (r < 0)
+ return r;
+
+ n += r;
+ }
+
+ return n;
+}
+
+int set_put_strsplit(Set *s, const char *v, const char *separators, ExtractFlags flags) {
+ const char *p = v;
+ int r;
+
+ assert(s);
+ assert(v);
+
+ for (;;) {
+ char *word;
+
+ r = extract_first_word(&p, &word, separators, flags);
+ if (r <= 0)
+ return r;
+
+ r = set_consume(s, word);
+ if (r < 0)
+ return r;
+ }
+}
+
+/* expand the cachemem if needed, return true if newly (re)activated. */
+static int cachemem_maintain(CacheMem *mem, unsigned size) {
+ assert(mem);
+
+ if (!GREEDY_REALLOC(mem->ptr, mem->n_allocated, size)) {
+ if (size > 0)
+ return -ENOMEM;
+ }
+
+ if (!mem->active) {
+ mem->active = true;
+ return true;
+ }
+
+ return false;
+}
+
+int iterated_cache_get(IteratedCache *cache, const void ***res_keys, const void ***res_values, unsigned *res_n_entries) {
+ bool sync_keys = false, sync_values = false;
+ unsigned size;
+ int r;
+
+ assert(cache);
+ assert(cache->hashmap);
+
+ size = n_entries(cache->hashmap);
+
+ if (res_keys) {
+ r = cachemem_maintain(&cache->keys, size);
+ if (r < 0)
+ return r;
+
+ sync_keys = r;
+ } else
+ cache->keys.active = false;
+
+ if (res_values) {
+ r = cachemem_maintain(&cache->values, size);
+ if (r < 0)
+ return r;
+
+ sync_values = r;
+ } else
+ cache->values.active = false;
+
+ if (cache->hashmap->dirty) {
+ if (cache->keys.active)
+ sync_keys = true;
+ if (cache->values.active)
+ sync_values = true;
+
+ cache->hashmap->dirty = false;
+ }
+
+ if (sync_keys || sync_values) {
+ unsigned i, idx;
+ Iterator iter;
+
+ i = 0;
+ HASHMAP_FOREACH_IDX(idx, cache->hashmap, iter) {
+ struct hashmap_base_entry *e;
+
+ e = bucket_at(cache->hashmap, idx);
+
+ if (sync_keys)
+ cache->keys.ptr[i] = e->key;
+ if (sync_values)
+ cache->values.ptr[i] = entry_value(cache->hashmap, e);
+ i++;
+ }
+ }
+
+ if (res_keys)
+ *res_keys = cache->keys.ptr;
+ if (res_values)
+ *res_values = cache->values.ptr;
+ if (res_n_entries)
+ *res_n_entries = size;
+
+ return 0;
+}
+
+IteratedCache* iterated_cache_free(IteratedCache *cache) {
+ if (cache) {
+ free(cache->keys.ptr);
+ free(cache->values.ptr);
+ }
+
+ return mfree(cache);
+}
+
+int set_strjoin(Set *s, const char *separator, bool wrap_with_separator, char **ret) {
+ size_t separator_len, allocated = 0, len = 0;
+ _cleanup_free_ char *str = NULL;
+ const char *value;
+ bool first;
+
+ assert(ret);
+
+ if (set_isempty(s)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ separator_len = strlen_ptr(separator);
+
+ if (separator_len == 0)
+ wrap_with_separator = false;
+
+ first = !wrap_with_separator;
+
+ SET_FOREACH(value, s) {
+ size_t l = strlen_ptr(value);
+
+ if (l == 0)
+ continue;
+
+ if (!GREEDY_REALLOC(str, allocated, len + l + (first ? 0 : separator_len) + (wrap_with_separator ? separator_len : 0) + 1))
+ return -ENOMEM;
+
+ if (separator_len > 0 && !first) {
+ memcpy(str + len, separator, separator_len);
+ len += separator_len;
+ }
+
+ memcpy(str + len, value, l);
+ len += l;
+ first = false;
+ }
+
+ if (wrap_with_separator) {
+ memcpy(str + len, separator, separator_len);
+ len += separator_len;
+ }
+
+ str[len] = '\0';
+
+ *ret = TAKE_PTR(str);
+ return 0;
+}
diff --git a/src/basic/hashmap.h b/src/basic/hashmap.h
new file mode 100644
index 0000000..e994483
--- /dev/null
+++ b/src/basic/hashmap.h
@@ -0,0 +1,449 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "hash-funcs.h"
+#include "macro.h"
+#include "util.h"
+
+/*
+ * A hash table implementation. As a minor optimization a NULL hashmap object
+ * will be treated as empty hashmap for all read operations. That way it is not
+ * necessary to instantiate an object for each Hashmap use.
+ *
+ * If ENABLE_DEBUG_HASHMAP is defined (by configuring with -Ddebug-extra=hashmap),
+ * the implementation will:
+ * - store extra data for debugging and statistics (see tools/gdb-sd_dump_hashmaps.py)
+ * - perform extra checks for invalid use of iterators
+ */
+
+#define HASH_KEY_SIZE 16
+
+typedef void* (*hashmap_destroy_t)(void *p);
+
+/* The base type for all hashmap and set types. Many functions in the implementation take (HashmapBase*)
+ * parameters and are run-time polymorphic, though the API is not meant to be polymorphic (do not call
+ * underscore-prefixed functions directly). */
+typedef struct HashmapBase HashmapBase;
+
+/* Specific hashmap/set types */
+typedef struct Hashmap Hashmap; /* Maps keys to values */
+typedef struct OrderedHashmap OrderedHashmap; /* Like Hashmap, but also remembers entry insertion order */
+typedef struct Set Set; /* Stores just keys */
+
+typedef struct IteratedCache IteratedCache; /* Caches the iterated order of one of the above */
+
+/* Ideally the Iterator would be an opaque struct, but it is instantiated
+ * by hashmap users, so the definition has to be here. Do not use its fields
+ * directly. */
+typedef struct {
+ unsigned idx; /* index of an entry to be iterated next */
+ const void *next_key; /* expected value of that entry's key pointer */
+#if ENABLE_DEBUG_HASHMAP
+ unsigned put_count; /* hashmap's put_count recorded at start of iteration */
+ unsigned rem_count; /* hashmap's rem_count in previous iteration */
+ unsigned prev_idx; /* idx in previous iteration */
+#endif
+} Iterator;
+
+#define _IDX_ITERATOR_FIRST (UINT_MAX - 1)
+#define ITERATOR_FIRST ((Iterator) { .idx = _IDX_ITERATOR_FIRST, .next_key = NULL })
+
+/* Macros for type checking */
+#define PTR_COMPATIBLE_WITH_HASHMAP_BASE(h) \
+ (__builtin_types_compatible_p(typeof(h), HashmapBase*) || \
+ __builtin_types_compatible_p(typeof(h), Hashmap*) || \
+ __builtin_types_compatible_p(typeof(h), OrderedHashmap*) || \
+ __builtin_types_compatible_p(typeof(h), Set*))
+
+#define PTR_COMPATIBLE_WITH_PLAIN_HASHMAP(h) \
+ (__builtin_types_compatible_p(typeof(h), Hashmap*) || \
+ __builtin_types_compatible_p(typeof(h), OrderedHashmap*)) \
+
+#define HASHMAP_BASE(h) \
+ __builtin_choose_expr(PTR_COMPATIBLE_WITH_HASHMAP_BASE(h), \
+ (HashmapBase*)(h), \
+ (void)0)
+
+#define PLAIN_HASHMAP(h) \
+ __builtin_choose_expr(PTR_COMPATIBLE_WITH_PLAIN_HASHMAP(h), \
+ (Hashmap*)(h), \
+ (void)0)
+
+#if ENABLE_DEBUG_HASHMAP
+# define HASHMAP_DEBUG_PARAMS , const char *func, const char *file, int line
+# define HASHMAP_DEBUG_SRC_ARGS , __func__, PROJECT_FILE, __LINE__
+# define HASHMAP_DEBUG_PASS_ARGS , func, file, line
+#else
+# define HASHMAP_DEBUG_PARAMS
+# define HASHMAP_DEBUG_SRC_ARGS
+# define HASHMAP_DEBUG_PASS_ARGS
+#endif
+
+Hashmap* _hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS);
+OrderedHashmap* _ordered_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS);
+#define hashmap_new(ops) _hashmap_new(ops HASHMAP_DEBUG_SRC_ARGS)
+#define ordered_hashmap_new(ops) _ordered_hashmap_new(ops HASHMAP_DEBUG_SRC_ARGS)
+
+#define hashmap_free_and_replace(a, b) \
+ ({ \
+ hashmap_free(a); \
+ (a) = (b); \
+ (b) = NULL; \
+ 0; \
+ })
+
+HashmapBase* _hashmap_free(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value);
+static inline Hashmap* hashmap_free(Hashmap *h) {
+ return (void*) _hashmap_free(HASHMAP_BASE(h), NULL, NULL);
+}
+static inline OrderedHashmap* ordered_hashmap_free(OrderedHashmap *h) {
+ return (void*) _hashmap_free(HASHMAP_BASE(h), NULL, NULL);
+}
+
+static inline Hashmap* hashmap_free_free(Hashmap *h) {
+ return (void*) _hashmap_free(HASHMAP_BASE(h), NULL, free);
+}
+static inline OrderedHashmap* ordered_hashmap_free_free(OrderedHashmap *h) {
+ return (void*) _hashmap_free(HASHMAP_BASE(h), NULL, free);
+}
+
+static inline Hashmap* hashmap_free_free_key(Hashmap *h) {
+ return (void*) _hashmap_free(HASHMAP_BASE(h), free, NULL);
+}
+static inline OrderedHashmap* ordered_hashmap_free_free_key(OrderedHashmap *h) {
+ return (void*) _hashmap_free(HASHMAP_BASE(h), free, NULL);
+}
+
+static inline Hashmap* hashmap_free_free_free(Hashmap *h) {
+ return (void*) _hashmap_free(HASHMAP_BASE(h), free, free);
+}
+static inline OrderedHashmap* ordered_hashmap_free_free_free(OrderedHashmap *h) {
+ return (void*) _hashmap_free(HASHMAP_BASE(h), free, free);
+}
+
+IteratedCache* iterated_cache_free(IteratedCache *cache);
+int iterated_cache_get(IteratedCache *cache, const void ***res_keys, const void ***res_values, unsigned *res_n_entries);
+
+HashmapBase* _hashmap_copy(HashmapBase *h HASHMAP_DEBUG_PARAMS);
+#define hashmap_copy(h) ((Hashmap*) _hashmap_copy(HASHMAP_BASE(h) HASHMAP_DEBUG_SRC_ARGS))
+#define ordered_hashmap_copy(h) ((OrderedHashmap*) _hashmap_copy(HASHMAP_BASE(h) HASHMAP_DEBUG_SRC_ARGS))
+
+int _hashmap_ensure_allocated(Hashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS);
+int _ordered_hashmap_ensure_allocated(OrderedHashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS);
+#define hashmap_ensure_allocated(h, ops) _hashmap_ensure_allocated(h, ops HASHMAP_DEBUG_SRC_ARGS)
+#define ordered_hashmap_ensure_allocated(h, ops) _ordered_hashmap_ensure_allocated(h, ops HASHMAP_DEBUG_SRC_ARGS)
+
+int _ordered_hashmap_ensure_put(OrderedHashmap **h, const struct hash_ops *hash_ops, const void *key, void *value HASHMAP_DEBUG_PARAMS);
+#define ordered_hashmap_ensure_put(s, ops, key, value) _ordered_hashmap_ensure_put(s, ops, key, value HASHMAP_DEBUG_SRC_ARGS)
+
+IteratedCache* _hashmap_iterated_cache_new(HashmapBase *h);
+static inline IteratedCache* hashmap_iterated_cache_new(Hashmap *h) {
+ return (IteratedCache*) _hashmap_iterated_cache_new(HASHMAP_BASE(h));
+}
+static inline IteratedCache* ordered_hashmap_iterated_cache_new(OrderedHashmap *h) {
+ return (IteratedCache*) _hashmap_iterated_cache_new(HASHMAP_BASE(h));
+}
+
+int hashmap_put(Hashmap *h, const void *key, void *value);
+static inline int ordered_hashmap_put(OrderedHashmap *h, const void *key, void *value) {
+ return hashmap_put(PLAIN_HASHMAP(h), key, value);
+}
+
+int _hashmap_put_strdup_full(Hashmap **h, const struct hash_ops *hash_ops, const char *k, const char *v HASHMAP_DEBUG_PARAMS);
+#define hashmap_put_strdup_full(h, hash_ops, k, v) _hashmap_put_strdup_full(h, hash_ops, k, v HASHMAP_DEBUG_SRC_ARGS)
+#define hashmap_put_strdup(h, k, v) hashmap_put_strdup_full(h, &string_hash_ops_free_free, k, v)
+
+int hashmap_update(Hashmap *h, const void *key, void *value);
+static inline int ordered_hashmap_update(OrderedHashmap *h, const void *key, void *value) {
+ return hashmap_update(PLAIN_HASHMAP(h), key, value);
+}
+
+int hashmap_replace(Hashmap *h, const void *key, void *value);
+static inline int ordered_hashmap_replace(OrderedHashmap *h, const void *key, void *value) {
+ return hashmap_replace(PLAIN_HASHMAP(h), key, value);
+}
+
+void* _hashmap_get(HashmapBase *h, const void *key);
+static inline void *hashmap_get(Hashmap *h, const void *key) {
+ return _hashmap_get(HASHMAP_BASE(h), key);
+}
+static inline void *ordered_hashmap_get(OrderedHashmap *h, const void *key) {
+ return _hashmap_get(HASHMAP_BASE(h), key);
+}
+
+void* hashmap_get2(Hashmap *h, const void *key, void **rkey);
+static inline void *ordered_hashmap_get2(OrderedHashmap *h, const void *key, void **rkey) {
+ return hashmap_get2(PLAIN_HASHMAP(h), key, rkey);
+}
+
+bool _hashmap_contains(HashmapBase *h, const void *key);
+static inline bool hashmap_contains(Hashmap *h, const void *key) {
+ return _hashmap_contains(HASHMAP_BASE(h), key);
+}
+static inline bool ordered_hashmap_contains(OrderedHashmap *h, const void *key) {
+ return _hashmap_contains(HASHMAP_BASE(h), key);
+}
+
+void* _hashmap_remove(HashmapBase *h, const void *key);
+static inline void *hashmap_remove(Hashmap *h, const void *key) {
+ return _hashmap_remove(HASHMAP_BASE(h), key);
+}
+static inline void *ordered_hashmap_remove(OrderedHashmap *h, const void *key) {
+ return _hashmap_remove(HASHMAP_BASE(h), key);
+}
+
+void* hashmap_remove2(Hashmap *h, const void *key, void **rkey);
+static inline void *ordered_hashmap_remove2(OrderedHashmap *h, const void *key, void **rkey) {
+ return hashmap_remove2(PLAIN_HASHMAP(h), key, rkey);
+}
+
+void* _hashmap_remove_value(HashmapBase *h, const void *key, void *value);
+static inline void *hashmap_remove_value(Hashmap *h, const void *key, void *value) {
+ return _hashmap_remove_value(HASHMAP_BASE(h), key, value);
+}
+
+static inline void* ordered_hashmap_remove_value(OrderedHashmap *h, const void *key, void *value) {
+ return hashmap_remove_value(PLAIN_HASHMAP(h), key, value);
+}
+
+int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key, void *value);
+static inline int ordered_hashmap_remove_and_put(OrderedHashmap *h, const void *old_key, const void *new_key, void *value) {
+ return hashmap_remove_and_put(PLAIN_HASHMAP(h), old_key, new_key, value);
+}
+
+int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_key, void *value);
+static inline int ordered_hashmap_remove_and_replace(OrderedHashmap *h, const void *old_key, const void *new_key, void *value) {
+ return hashmap_remove_and_replace(PLAIN_HASHMAP(h), old_key, new_key, value);
+}
+
+/* Since merging data from a OrderedHashmap into a Hashmap or vice-versa
+ * should just work, allow this by having looser type-checking here. */
+int _hashmap_merge(Hashmap *h, Hashmap *other);
+#define hashmap_merge(h, other) _hashmap_merge(PLAIN_HASHMAP(h), PLAIN_HASHMAP(other))
+#define ordered_hashmap_merge(h, other) hashmap_merge(h, other)
+
+int _hashmap_reserve(HashmapBase *h, unsigned entries_add);
+static inline int hashmap_reserve(Hashmap *h, unsigned entries_add) {
+ return _hashmap_reserve(HASHMAP_BASE(h), entries_add);
+}
+static inline int ordered_hashmap_reserve(OrderedHashmap *h, unsigned entries_add) {
+ return _hashmap_reserve(HASHMAP_BASE(h), entries_add);
+}
+
+int _hashmap_move(HashmapBase *h, HashmapBase *other);
+/* Unlike hashmap_merge, hashmap_move does not allow mixing the types. */
+static inline int hashmap_move(Hashmap *h, Hashmap *other) {
+ return _hashmap_move(HASHMAP_BASE(h), HASHMAP_BASE(other));
+}
+static inline int ordered_hashmap_move(OrderedHashmap *h, OrderedHashmap *other) {
+ return _hashmap_move(HASHMAP_BASE(h), HASHMAP_BASE(other));
+}
+
+int _hashmap_move_one(HashmapBase *h, HashmapBase *other, const void *key);
+static inline int hashmap_move_one(Hashmap *h, Hashmap *other, const void *key) {
+ return _hashmap_move_one(HASHMAP_BASE(h), HASHMAP_BASE(other), key);
+}
+static inline int ordered_hashmap_move_one(OrderedHashmap *h, OrderedHashmap *other, const void *key) {
+ return _hashmap_move_one(HASHMAP_BASE(h), HASHMAP_BASE(other), key);
+}
+
+unsigned _hashmap_size(HashmapBase *h) _pure_;
+static inline unsigned hashmap_size(Hashmap *h) {
+ return _hashmap_size(HASHMAP_BASE(h));
+}
+static inline unsigned ordered_hashmap_size(OrderedHashmap *h) {
+ return _hashmap_size(HASHMAP_BASE(h));
+}
+
+static inline bool hashmap_isempty(Hashmap *h) {
+ return hashmap_size(h) == 0;
+}
+static inline bool ordered_hashmap_isempty(OrderedHashmap *h) {
+ return ordered_hashmap_size(h) == 0;
+}
+
+unsigned _hashmap_buckets(HashmapBase *h) _pure_;
+static inline unsigned hashmap_buckets(Hashmap *h) {
+ return _hashmap_buckets(HASHMAP_BASE(h));
+}
+static inline unsigned ordered_hashmap_buckets(OrderedHashmap *h) {
+ return _hashmap_buckets(HASHMAP_BASE(h));
+}
+
+bool _hashmap_iterate(HashmapBase *h, Iterator *i, void **value, const void **key);
+static inline bool hashmap_iterate(Hashmap *h, Iterator *i, void **value, const void **key) {
+ return _hashmap_iterate(HASHMAP_BASE(h), i, value, key);
+}
+static inline bool ordered_hashmap_iterate(OrderedHashmap *h, Iterator *i, void **value, const void **key) {
+ return _hashmap_iterate(HASHMAP_BASE(h), i, value, key);
+}
+
+void _hashmap_clear(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value);
+static inline void hashmap_clear(Hashmap *h) {
+ _hashmap_clear(HASHMAP_BASE(h), NULL, NULL);
+}
+static inline void ordered_hashmap_clear(OrderedHashmap *h) {
+ _hashmap_clear(HASHMAP_BASE(h), NULL, NULL);
+}
+
+static inline void hashmap_clear_free(Hashmap *h) {
+ _hashmap_clear(HASHMAP_BASE(h), NULL, free);
+}
+static inline void ordered_hashmap_clear_free(OrderedHashmap *h) {
+ _hashmap_clear(HASHMAP_BASE(h), NULL, free);
+}
+
+static inline void hashmap_clear_free_key(Hashmap *h) {
+ _hashmap_clear(HASHMAP_BASE(h), free, NULL);
+}
+static inline void ordered_hashmap_clear_free_key(OrderedHashmap *h) {
+ _hashmap_clear(HASHMAP_BASE(h), free, NULL);
+}
+
+static inline void hashmap_clear_free_free(Hashmap *h) {
+ _hashmap_clear(HASHMAP_BASE(h), free, free);
+}
+static inline void ordered_hashmap_clear_free_free(OrderedHashmap *h) {
+ _hashmap_clear(HASHMAP_BASE(h), free, free);
+}
+
+/*
+ * Note about all *_first*() functions
+ *
+ * For plain Hashmaps and Sets the order of entries is undefined.
+ * The functions find whatever entry is first in the implementation
+ * internal order.
+ *
+ * Only for OrderedHashmaps the order is well defined and finding
+ * the first entry is O(1).
+ */
+
+void *_hashmap_first_key_and_value(HashmapBase *h, bool remove, void **ret_key);
+static inline void *hashmap_steal_first_key_and_value(Hashmap *h, void **ret) {
+ return _hashmap_first_key_and_value(HASHMAP_BASE(h), true, ret);
+}
+static inline void *ordered_hashmap_steal_first_key_and_value(OrderedHashmap *h, void **ret) {
+ return _hashmap_first_key_and_value(HASHMAP_BASE(h), true, ret);
+}
+static inline void *hashmap_first_key_and_value(Hashmap *h, void **ret) {
+ return _hashmap_first_key_and_value(HASHMAP_BASE(h), false, ret);
+}
+static inline void *ordered_hashmap_first_key_and_value(OrderedHashmap *h, void **ret) {
+ return _hashmap_first_key_and_value(HASHMAP_BASE(h), false, ret);
+}
+
+static inline void *hashmap_steal_first(Hashmap *h) {
+ return _hashmap_first_key_and_value(HASHMAP_BASE(h), true, NULL);
+}
+static inline void *ordered_hashmap_steal_first(OrderedHashmap *h) {
+ return _hashmap_first_key_and_value(HASHMAP_BASE(h), true, NULL);
+}
+static inline void *hashmap_first(Hashmap *h) {
+ return _hashmap_first_key_and_value(HASHMAP_BASE(h), false, NULL);
+}
+static inline void *ordered_hashmap_first(OrderedHashmap *h) {
+ return _hashmap_first_key_and_value(HASHMAP_BASE(h), false, NULL);
+}
+
+static inline void *_hashmap_first_key(HashmapBase *h, bool remove) {
+ void *key = NULL;
+
+ (void) _hashmap_first_key_and_value(HASHMAP_BASE(h), remove, &key);
+ return key;
+}
+static inline void *hashmap_steal_first_key(Hashmap *h) {
+ return _hashmap_first_key(HASHMAP_BASE(h), true);
+}
+static inline void *ordered_hashmap_steal_first_key(OrderedHashmap *h) {
+ return _hashmap_first_key(HASHMAP_BASE(h), true);
+}
+static inline void *hashmap_first_key(Hashmap *h) {
+ return _hashmap_first_key(HASHMAP_BASE(h), false);
+}
+static inline void *ordered_hashmap_first_key(OrderedHashmap *h) {
+ return _hashmap_first_key(HASHMAP_BASE(h), false);
+}
+
+#define hashmap_clear_with_destructor(_s, _f) \
+ ({ \
+ void *_item; \
+ while ((_item = hashmap_steal_first(_s))) \
+ _f(_item); \
+ })
+#define hashmap_free_with_destructor(_s, _f) \
+ ({ \
+ hashmap_clear_with_destructor(_s, _f); \
+ hashmap_free(_s); \
+ })
+#define ordered_hashmap_clear_with_destructor(_s, _f) \
+ ({ \
+ void *_item; \
+ while ((_item = ordered_hashmap_steal_first(_s))) \
+ _f(_item); \
+ })
+#define ordered_hashmap_free_with_destructor(_s, _f) \
+ ({ \
+ ordered_hashmap_clear_with_destructor(_s, _f); \
+ ordered_hashmap_free(_s); \
+ })
+
+/* no hashmap_next */
+void* ordered_hashmap_next(OrderedHashmap *h, const void *key);
+
+char** _hashmap_get_strv(HashmapBase *h);
+static inline char** hashmap_get_strv(Hashmap *h) {
+ return _hashmap_get_strv(HASHMAP_BASE(h));
+}
+static inline char** ordered_hashmap_get_strv(OrderedHashmap *h) {
+ return _hashmap_get_strv(HASHMAP_BASE(h));
+}
+
+/*
+ * Hashmaps are iterated in unpredictable order.
+ * OrderedHashmaps are an exception to this. They are iterated in the order
+ * the entries were inserted.
+ * It is safe to remove the current entry.
+ */
+#define _HASHMAP_FOREACH(e, h, i) \
+ for (Iterator i = ITERATOR_FIRST; hashmap_iterate((h), &i, (void**)&(e), NULL); )
+#define HASHMAP_FOREACH(e, h) \
+ _HASHMAP_FOREACH(e, h, UNIQ_T(i, UNIQ))
+
+#define _ORDERED_HASHMAP_FOREACH(e, h, i) \
+ for (Iterator i = ITERATOR_FIRST; ordered_hashmap_iterate((h), &i, (void**)&(e), NULL); )
+#define ORDERED_HASHMAP_FOREACH(e, h) \
+ _ORDERED_HASHMAP_FOREACH(e, h, UNIQ_T(i, UNIQ))
+
+#define _HASHMAP_FOREACH_KEY(e, k, h, i) \
+ for (Iterator i = ITERATOR_FIRST; hashmap_iterate((h), &i, (void**)&(e), (const void**) &(k)); )
+#define HASHMAP_FOREACH_KEY(e, k, h) \
+ _HASHMAP_FOREACH_KEY(e, k, h, UNIQ_T(i, UNIQ))
+
+#define _ORDERED_HASHMAP_FOREACH_KEY(e, k, h, i) \
+ for (Iterator i = ITERATOR_FIRST; ordered_hashmap_iterate((h), &i, (void**)&(e), (const void**) &(k)); )
+#define ORDERED_HASHMAP_FOREACH_KEY(e, k, h) \
+ _ORDERED_HASHMAP_FOREACH_KEY(e, k, h, UNIQ_T(i, UNIQ))
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free_free_key);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free_free_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free_free_key);
+DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free_free_free);
+
+#define _cleanup_hashmap_free_ _cleanup_(hashmap_freep)
+#define _cleanup_hashmap_free_free_ _cleanup_(hashmap_free_freep)
+#define _cleanup_hashmap_free_free_free_ _cleanup_(hashmap_free_free_freep)
+#define _cleanup_ordered_hashmap_free_ _cleanup_(ordered_hashmap_freep)
+#define _cleanup_ordered_hashmap_free_free_ _cleanup_(ordered_hashmap_free_freep)
+#define _cleanup_ordered_hashmap_free_free_free_ _cleanup_(ordered_hashmap_free_free_freep)
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(IteratedCache*, iterated_cache_free);
+
+#define _cleanup_iterated_cache_free_ _cleanup_(iterated_cache_freep)
diff --git a/src/basic/hexdecoct.c b/src/basic/hexdecoct.c
new file mode 100644
index 0000000..da60202
--- /dev/null
+++ b/src/basic/hexdecoct.c
@@ -0,0 +1,861 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "hexdecoct.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "string-util.h"
+
+char octchar(int x) {
+ return '0' + (x & 7);
+}
+
+int unoctchar(char c) {
+
+ if (c >= '0' && c <= '7')
+ return c - '0';
+
+ return -EINVAL;
+}
+
+char decchar(int x) {
+ return '0' + (x % 10);
+}
+
+int undecchar(char c) {
+
+ if (c >= '0' && c <= '9')
+ return c - '0';
+
+ return -EINVAL;
+}
+
+char hexchar(int x) {
+ static const char table[16] = "0123456789abcdef";
+
+ return table[x & 15];
+}
+
+int unhexchar(char c) {
+
+ if (c >= '0' && c <= '9')
+ return c - '0';
+
+ if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+
+ if (c >= 'A' && c <= 'F')
+ return c - 'A' + 10;
+
+ return -EINVAL;
+}
+
+char *hexmem(const void *p, size_t l) {
+ const uint8_t *x;
+ char *r, *z;
+
+ z = r = new(char, l * 2 + 1);
+ if (!r)
+ return NULL;
+
+ for (x = p; x < (const uint8_t*) p + l; x++) {
+ *(z++) = hexchar(*x >> 4);
+ *(z++) = hexchar(*x & 15);
+ }
+
+ *z = 0;
+ return r;
+}
+
+static int unhex_next(const char **p, size_t *l) {
+ int r;
+
+ assert(p);
+ assert(l);
+
+ /* Find the next non-whitespace character, and decode it. We
+ * greedily skip all preceding and all following whitespace. */
+
+ for (;;) {
+ if (*l == 0)
+ return -EPIPE;
+
+ if (!strchr(WHITESPACE, **p))
+ break;
+
+ /* Skip leading whitespace */
+ (*p)++, (*l)--;
+ }
+
+ r = unhexchar(**p);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ (*p)++, (*l)--;
+
+ if (*l == 0 || !strchr(WHITESPACE, **p))
+ break;
+
+ /* Skip following whitespace */
+ }
+
+ return r;
+}
+
+int unhexmem_full(const char *p, size_t l, bool secure, void **ret, size_t *ret_len) {
+ _cleanup_free_ uint8_t *buf = NULL;
+ size_t buf_size;
+ const char *x;
+ uint8_t *z;
+ int r;
+
+ assert(ret);
+ assert(ret_len);
+ assert(p || l == 0);
+
+ if (l == (size_t) -1)
+ l = strlen(p);
+
+ /* Note that the calculation of memory size is an upper boundary, as we ignore whitespace while decoding */
+ buf_size = (l + 1) / 2 + 1;
+ buf = malloc(buf_size);
+ if (!buf)
+ return -ENOMEM;
+
+ for (x = p, z = buf;;) {
+ int a, b;
+
+ a = unhex_next(&x, &l);
+ if (a == -EPIPE) /* End of string */
+ break;
+ if (a < 0) {
+ r = a;
+ goto on_failure;
+ }
+
+ b = unhex_next(&x, &l);
+ if (b < 0) {
+ r = b;
+ goto on_failure;
+ }
+
+ *(z++) = (uint8_t) a << 4 | (uint8_t) b;
+ }
+
+ *z = 0;
+
+ *ret_len = (size_t) (z - buf);
+ *ret = TAKE_PTR(buf);
+
+ return 0;
+
+on_failure:
+ if (secure)
+ explicit_bzero_safe(buf, buf_size);
+
+ return r;
+}
+
+/* https://tools.ietf.org/html/rfc4648#section-6
+ * Notice that base32hex differs from base32 in the alphabet it uses.
+ * The distinction is that the base32hex representation preserves the
+ * order of the underlying data when compared as bytestrings, this is
+ * useful when representing NSEC3 hashes, as one can then verify the
+ * order of hashes directly from their representation. */
+char base32hexchar(int x) {
+ static const char table[32] = "0123456789"
+ "ABCDEFGHIJKLMNOPQRSTUV";
+
+ return table[x & 31];
+}
+
+int unbase32hexchar(char c) {
+ unsigned offset;
+
+ if (c >= '0' && c <= '9')
+ return c - '0';
+
+ offset = '9' - '0' + 1;
+
+ if (c >= 'A' && c <= 'V')
+ return c - 'A' + offset;
+
+ return -EINVAL;
+}
+
+char *base32hexmem(const void *p, size_t l, bool padding) {
+ char *r, *z;
+ const uint8_t *x;
+ size_t len;
+
+ assert(p || l == 0);
+
+ if (padding)
+ /* five input bytes makes eight output bytes, padding is added so we must round up */
+ len = 8 * (l + 4) / 5;
+ else {
+ /* same, but round down as there is no padding */
+ len = 8 * l / 5;
+
+ switch (l % 5) {
+ case 4:
+ len += 7;
+ break;
+ case 3:
+ len += 5;
+ break;
+ case 2:
+ len += 4;
+ break;
+ case 1:
+ len += 2;
+ break;
+ }
+ }
+
+ z = r = malloc(len + 1);
+ if (!r)
+ return NULL;
+
+ for (x = p; x < (const uint8_t*) p + (l / 5) * 5; x += 5) {
+ /* x[0] == XXXXXXXX; x[1] == YYYYYYYY; x[2] == ZZZZZZZZ
+ * x[3] == QQQQQQQQ; x[4] == WWWWWWWW */
+ *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */
+ *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */
+ *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */
+ *(z++) = base32hexchar((x[1] & 1) << 4 | x[2] >> 4); /* 000YZZZZ */
+ *(z++) = base32hexchar((x[2] & 15) << 1 | x[3] >> 7); /* 000ZZZZQ */
+ *(z++) = base32hexchar((x[3] & 127) >> 2); /* 000QQQQQ */
+ *(z++) = base32hexchar((x[3] & 3) << 3 | x[4] >> 5); /* 000QQWWW */
+ *(z++) = base32hexchar((x[4] & 31)); /* 000WWWWW */
+ }
+
+ switch (l % 5) {
+ case 4:
+ *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */
+ *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */
+ *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */
+ *(z++) = base32hexchar((x[1] & 1) << 4 | x[2] >> 4); /* 000YZZZZ */
+ *(z++) = base32hexchar((x[2] & 15) << 1 | x[3] >> 7); /* 000ZZZZQ */
+ *(z++) = base32hexchar((x[3] & 127) >> 2); /* 000QQQQQ */
+ *(z++) = base32hexchar((x[3] & 3) << 3); /* 000QQ000 */
+ if (padding)
+ *(z++) = '=';
+
+ break;
+
+ case 3:
+ *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */
+ *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */
+ *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */
+ *(z++) = base32hexchar((x[1] & 1) << 4 | x[2] >> 4); /* 000YZZZZ */
+ *(z++) = base32hexchar((x[2] & 15) << 1); /* 000ZZZZ0 */
+ if (padding) {
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ }
+
+ break;
+
+ case 2:
+ *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */
+ *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */
+ *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */
+ *(z++) = base32hexchar((x[1] & 1) << 4); /* 000Y0000 */
+ if (padding) {
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ }
+
+ break;
+
+ case 1:
+ *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */
+ *(z++) = base32hexchar((x[0] & 7) << 2); /* 000XXX00 */
+ if (padding) {
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ *(z++) = '=';
+ }
+
+ break;
+ }
+
+ *z = 0;
+ return r;
+}
+
+int unbase32hexmem(const char *p, size_t l, bool padding, void **mem, size_t *_len) {
+ _cleanup_free_ uint8_t *r = NULL;
+ int a, b, c, d, e, f, g, h;
+ uint8_t *z;
+ const char *x;
+ size_t len;
+ unsigned pad = 0;
+
+ assert(p || l == 0);
+ assert(mem);
+ assert(_len);
+
+ if (l == (size_t) -1)
+ l = strlen(p);
+
+ /* padding ensures any base32hex input has input divisible by 8 */
+ if (padding && l % 8 != 0)
+ return -EINVAL;
+
+ if (padding) {
+ /* strip the padding */
+ while (l > 0 && p[l - 1] == '=' && pad < 7) {
+ pad++;
+ l--;
+ }
+ }
+
+ /* a group of eight input bytes needs five output bytes, in case of
+ * padding we need to add some extra bytes */
+ len = (l / 8) * 5;
+
+ switch (l % 8) {
+ case 7:
+ len += 4;
+ break;
+ case 5:
+ len += 3;
+ break;
+ case 4:
+ len += 2;
+ break;
+ case 2:
+ len += 1;
+ break;
+ case 0:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ z = r = malloc(len + 1);
+ if (!r)
+ return -ENOMEM;
+
+ for (x = p; x < p + (l / 8) * 8; x += 8) {
+ /* a == 000XXXXX; b == 000YYYYY; c == 000ZZZZZ; d == 000WWWWW
+ * e == 000SSSSS; f == 000QQQQQ; g == 000VVVVV; h == 000RRRRR */
+ a = unbase32hexchar(x[0]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unbase32hexchar(x[1]);
+ if (b < 0)
+ return -EINVAL;
+
+ c = unbase32hexchar(x[2]);
+ if (c < 0)
+ return -EINVAL;
+
+ d = unbase32hexchar(x[3]);
+ if (d < 0)
+ return -EINVAL;
+
+ e = unbase32hexchar(x[4]);
+ if (e < 0)
+ return -EINVAL;
+
+ f = unbase32hexchar(x[5]);
+ if (f < 0)
+ return -EINVAL;
+
+ g = unbase32hexchar(x[6]);
+ if (g < 0)
+ return -EINVAL;
+
+ h = unbase32hexchar(x[7]);
+ if (h < 0)
+ return -EINVAL;
+
+ *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */
+ *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */
+ *(z++) = (uint8_t) d << 4 | (uint8_t) e >> 1; /* WWWWSSSS */
+ *(z++) = (uint8_t) e << 7 | (uint8_t) f << 2 | (uint8_t) g >> 3; /* SQQQQQVV */
+ *(z++) = (uint8_t) g << 5 | (uint8_t) h; /* VVVRRRRR */
+ }
+
+ switch (l % 8) {
+ case 7:
+ a = unbase32hexchar(x[0]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unbase32hexchar(x[1]);
+ if (b < 0)
+ return -EINVAL;
+
+ c = unbase32hexchar(x[2]);
+ if (c < 0)
+ return -EINVAL;
+
+ d = unbase32hexchar(x[3]);
+ if (d < 0)
+ return -EINVAL;
+
+ e = unbase32hexchar(x[4]);
+ if (e < 0)
+ return -EINVAL;
+
+ f = unbase32hexchar(x[5]);
+ if (f < 0)
+ return -EINVAL;
+
+ g = unbase32hexchar(x[6]);
+ if (g < 0)
+ return -EINVAL;
+
+ /* g == 000VV000 */
+ if (g & 7)
+ return -EINVAL;
+
+ *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */
+ *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */
+ *(z++) = (uint8_t) d << 4 | (uint8_t) e >> 1; /* WWWWSSSS */
+ *(z++) = (uint8_t) e << 7 | (uint8_t) f << 2 | (uint8_t) g >> 3; /* SQQQQQVV */
+
+ break;
+ case 5:
+ a = unbase32hexchar(x[0]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unbase32hexchar(x[1]);
+ if (b < 0)
+ return -EINVAL;
+
+ c = unbase32hexchar(x[2]);
+ if (c < 0)
+ return -EINVAL;
+
+ d = unbase32hexchar(x[3]);
+ if (d < 0)
+ return -EINVAL;
+
+ e = unbase32hexchar(x[4]);
+ if (e < 0)
+ return -EINVAL;
+
+ /* e == 000SSSS0 */
+ if (e & 1)
+ return -EINVAL;
+
+ *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */
+ *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */
+ *(z++) = (uint8_t) d << 4 | (uint8_t) e >> 1; /* WWWWSSSS */
+
+ break;
+ case 4:
+ a = unbase32hexchar(x[0]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unbase32hexchar(x[1]);
+ if (b < 0)
+ return -EINVAL;
+
+ c = unbase32hexchar(x[2]);
+ if (c < 0)
+ return -EINVAL;
+
+ d = unbase32hexchar(x[3]);
+ if (d < 0)
+ return -EINVAL;
+
+ /* d == 000W0000 */
+ if (d & 15)
+ return -EINVAL;
+
+ *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */
+ *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */
+
+ break;
+ case 2:
+ a = unbase32hexchar(x[0]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unbase32hexchar(x[1]);
+ if (b < 0)
+ return -EINVAL;
+
+ /* b == 000YYY00 */
+ if (b & 3)
+ return -EINVAL;
+
+ *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */
+
+ break;
+ case 0:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ *z = 0;
+
+ *mem = TAKE_PTR(r);
+ *_len = len;
+
+ return 0;
+}
+
+/* https://tools.ietf.org/html/rfc4648#section-4 */
+char base64char(int x) {
+ static const char table[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789+/";
+ return table[x & 63];
+}
+
+int unbase64char(char c) {
+ unsigned offset;
+
+ if (c >= 'A' && c <= 'Z')
+ return c - 'A';
+
+ offset = 'Z' - 'A' + 1;
+
+ if (c >= 'a' && c <= 'z')
+ return c - 'a' + offset;
+
+ offset += 'z' - 'a' + 1;
+
+ if (c >= '0' && c <= '9')
+ return c - '0' + offset;
+
+ offset += '9' - '0' + 1;
+
+ if (c == '+')
+ return offset;
+
+ offset++;
+
+ if (c == '/')
+ return offset;
+
+ return -EINVAL;
+}
+
+ssize_t base64mem(const void *p, size_t l, char **out) {
+ char *r, *z;
+ const uint8_t *x;
+
+ assert(p || l == 0);
+ assert(out);
+
+ /* three input bytes makes four output bytes, padding is added so we must round up */
+ z = r = malloc(4 * (l + 2) / 3 + 1);
+ if (!r)
+ return -ENOMEM;
+
+ for (x = p; x < (const uint8_t*) p + (l / 3) * 3; x += 3) {
+ /* x[0] == XXXXXXXX; x[1] == YYYYYYYY; x[2] == ZZZZZZZZ */
+ *(z++) = base64char(x[0] >> 2); /* 00XXXXXX */
+ *(z++) = base64char((x[0] & 3) << 4 | x[1] >> 4); /* 00XXYYYY */
+ *(z++) = base64char((x[1] & 15) << 2 | x[2] >> 6); /* 00YYYYZZ */
+ *(z++) = base64char(x[2] & 63); /* 00ZZZZZZ */
+ }
+
+ switch (l % 3) {
+ case 2:
+ *(z++) = base64char(x[0] >> 2); /* 00XXXXXX */
+ *(z++) = base64char((x[0] & 3) << 4 | x[1] >> 4); /* 00XXYYYY */
+ *(z++) = base64char((x[1] & 15) << 2); /* 00YYYY00 */
+ *(z++) = '=';
+
+ break;
+ case 1:
+ *(z++) = base64char(x[0] >> 2); /* 00XXXXXX */
+ *(z++) = base64char((x[0] & 3) << 4); /* 00XX0000 */
+ *(z++) = '=';
+ *(z++) = '=';
+
+ break;
+ }
+
+ *z = 0;
+ *out = r;
+ return z - r;
+}
+
+static int base64_append_width(
+ char **prefix, int plen,
+ char sep, int indent,
+ const void *p, size_t l,
+ int width) {
+
+ _cleanup_free_ char *x = NULL;
+ char *t, *s;
+ ssize_t len, avail, line, lines;
+
+ len = base64mem(p, l, &x);
+ if (len <= 0)
+ return len;
+
+ lines = DIV_ROUND_UP(len, width);
+
+ if ((size_t) plen >= SSIZE_MAX - 1 - 1 ||
+ lines > (SSIZE_MAX - plen - 1 - 1) / (indent + width + 1))
+ return -ENOMEM;
+
+ t = realloc(*prefix, (ssize_t) plen + 1 + 1 + (indent + width + 1) * lines);
+ if (!t)
+ return -ENOMEM;
+
+ t[plen] = sep;
+
+ for (line = 0, s = t + plen + 1, avail = len; line < lines; line++) {
+ int act = MIN(width, avail);
+
+ if (line > 0 || sep == '\n') {
+ memset(s, ' ', indent);
+ s += indent;
+ }
+
+ memcpy(s, x + width * line, act);
+ s += act;
+ *(s++) = line < lines - 1 ? '\n' : '\0';
+ avail -= act;
+ }
+ assert(avail == 0);
+
+ *prefix = t;
+ return 0;
+}
+
+int base64_append(
+ char **prefix, int plen,
+ const void *p, size_t l,
+ int indent, int width) {
+
+ if (plen > width / 2 || plen + indent > width)
+ /* leave indent on the left, keep last column free */
+ return base64_append_width(prefix, plen, '\n', indent, p, l, width - indent - 1);
+ else
+ /* leave plen on the left, keep last column free */
+ return base64_append_width(prefix, plen, ' ', plen + 1, p, l, width - plen - 1);
+}
+
+static int unbase64_next(const char **p, size_t *l) {
+ int ret;
+
+ assert(p);
+ assert(l);
+
+ /* Find the next non-whitespace character, and decode it. If we find padding, we return it as INT_MAX. We
+ * greedily skip all preceding and all following whitespace. */
+
+ for (;;) {
+ if (*l == 0)
+ return -EPIPE;
+
+ if (!strchr(WHITESPACE, **p))
+ break;
+
+ /* Skip leading whitespace */
+ (*p)++, (*l)--;
+ }
+
+ if (**p == '=')
+ ret = INT_MAX; /* return padding as INT_MAX */
+ else {
+ ret = unbase64char(**p);
+ if (ret < 0)
+ return ret;
+ }
+
+ for (;;) {
+ (*p)++, (*l)--;
+
+ if (*l == 0)
+ break;
+ if (!strchr(WHITESPACE, **p))
+ break;
+
+ /* Skip following whitespace */
+ }
+
+ return ret;
+}
+
+int unbase64mem_full(const char *p, size_t l, bool secure, void **ret, size_t *ret_size) {
+ _cleanup_free_ uint8_t *buf = NULL;
+ const char *x;
+ uint8_t *z;
+ size_t len;
+ int r;
+
+ assert(p || l == 0);
+ assert(ret);
+ assert(ret_size);
+
+ if (l == (size_t) -1)
+ l = strlen(p);
+
+ /* A group of four input bytes needs three output bytes, in case of padding we need to add two or three extra
+ * bytes. Note that this calculation is an upper boundary, as we ignore whitespace while decoding */
+ len = (l / 4) * 3 + (l % 4 != 0 ? (l % 4) - 1 : 0);
+
+ buf = malloc(len + 1);
+ if (!buf)
+ return -ENOMEM;
+
+ for (x = p, z = buf;;) {
+ int a, b, c, d; /* a == 00XXXXXX; b == 00YYYYYY; c == 00ZZZZZZ; d == 00WWWWWW */
+
+ a = unbase64_next(&x, &l);
+ if (a == -EPIPE) /* End of string */
+ break;
+ if (a < 0) {
+ r = a;
+ goto on_failure;
+ }
+ if (a == INT_MAX) { /* Padding is not allowed at the beginning of a 4ch block */
+ r = -EINVAL;
+ goto on_failure;
+ }
+
+ b = unbase64_next(&x, &l);
+ if (b < 0) {
+ r = b;
+ goto on_failure;
+ }
+ if (b == INT_MAX) { /* Padding is not allowed at the second character of a 4ch block either */
+ r = -EINVAL;
+ goto on_failure;
+ }
+
+ c = unbase64_next(&x, &l);
+ if (c < 0) {
+ r = c;
+ goto on_failure;
+ }
+
+ d = unbase64_next(&x, &l);
+ if (d < 0) {
+ r = d;
+ goto on_failure;
+ }
+
+ if (c == INT_MAX) { /* Padding at the third character */
+
+ if (d != INT_MAX) { /* If the third character is padding, the fourth must be too */
+ r = -EINVAL;
+ goto on_failure;
+ }
+
+ /* b == 00YY0000 */
+ if (b & 15) {
+ r = -EINVAL;
+ goto on_failure;
+ }
+
+ if (l > 0) { /* Trailing rubbish? */
+ r = -ENAMETOOLONG;
+ goto on_failure;
+ }
+
+ *(z++) = (uint8_t) a << 2 | (uint8_t) (b >> 4); /* XXXXXXYY */
+ break;
+ }
+
+ if (d == INT_MAX) {
+ /* c == 00ZZZZ00 */
+ if (c & 3) {
+ r = -EINVAL;
+ goto on_failure;
+ }
+
+ if (l > 0) { /* Trailing rubbish? */
+ r = -ENAMETOOLONG;
+ goto on_failure;
+ }
+
+ *(z++) = (uint8_t) a << 2 | (uint8_t) b >> 4; /* XXXXXXYY */
+ *(z++) = (uint8_t) b << 4 | (uint8_t) c >> 2; /* YYYYZZZZ */
+ break;
+ }
+
+ *(z++) = (uint8_t) a << 2 | (uint8_t) b >> 4; /* XXXXXXYY */
+ *(z++) = (uint8_t) b << 4 | (uint8_t) c >> 2; /* YYYYZZZZ */
+ *(z++) = (uint8_t) c << 6 | (uint8_t) d; /* ZZWWWWWW */
+ }
+
+ *z = 0;
+
+ *ret_size = (size_t) (z - buf);
+ *ret = TAKE_PTR(buf);
+
+ return 0;
+
+on_failure:
+ if (secure)
+ explicit_bzero_safe(buf, len);
+
+ return r;
+}
+
+void hexdump(FILE *f, const void *p, size_t s) {
+ const uint8_t *b = p;
+ unsigned n = 0;
+
+ assert(b || s == 0);
+
+ if (!f)
+ f = stdout;
+
+ while (s > 0) {
+ size_t i;
+
+ fprintf(f, "%04x ", n);
+
+ for (i = 0; i < 16; i++) {
+
+ if (i >= s)
+ fputs(" ", f);
+ else
+ fprintf(f, "%02x ", b[i]);
+
+ if (i == 7)
+ fputc(' ', f);
+ }
+
+ fputc(' ', f);
+
+ for (i = 0; i < 16; i++) {
+
+ if (i >= s)
+ fputc(' ', f);
+ else
+ fputc(isprint(b[i]) ? (char) b[i] : '.', f);
+ }
+
+ fputc('\n', f);
+
+ if (s < 16)
+ break;
+
+ n += 16;
+ b += 16;
+ s -= 16;
+ }
+}
diff --git a/src/basic/hexdecoct.h b/src/basic/hexdecoct.h
new file mode 100644
index 0000000..7e2a689
--- /dev/null
+++ b/src/basic/hexdecoct.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+char octchar(int x) _const_;
+int unoctchar(char c) _const_;
+
+char decchar(int x) _const_;
+int undecchar(char c) _const_;
+
+char hexchar(int x) _const_;
+int unhexchar(char c) _const_;
+
+char *hexmem(const void *p, size_t l);
+int unhexmem_full(const char *p, size_t l, bool secure, void **mem, size_t *len);
+static inline int unhexmem(const char *p, size_t l, void **mem, size_t *len) {
+ return unhexmem_full(p, l, false, mem, len);
+}
+
+char base32hexchar(int x) _const_;
+int unbase32hexchar(char c) _const_;
+
+char base64char(int x) _const_;
+int unbase64char(char c) _const_;
+
+char *base32hexmem(const void *p, size_t l, bool padding);
+int unbase32hexmem(const char *p, size_t l, bool padding, void **mem, size_t *len);
+
+ssize_t base64mem(const void *p, size_t l, char **out);
+int base64_append(char **prefix, int plen,
+ const void *p, size_t l,
+ int margin, int width);
+int unbase64mem_full(const char *p, size_t l, bool secure, void **mem, size_t *len);
+static inline int unbase64mem(const char *p, size_t l, void **mem, size_t *len) {
+ return unbase64mem_full(p, l, false, mem, len);
+}
+
+void hexdump(FILE *f, const void *p, size_t s);
diff --git a/src/basic/hostname-util.c b/src/basic/hostname-util.c
new file mode 100644
index 0000000..09e49cc
--- /dev/null
+++ b/src/basic/hostname-util.c
@@ -0,0 +1,329 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hostname-util.h"
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+
+bool hostname_is_set(void) {
+ struct utsname u;
+
+ assert_se(uname(&u) >= 0);
+
+ if (isempty(u.nodename))
+ return false;
+
+ /* This is the built-in kernel default hostname */
+ if (streq(u.nodename, "(none)"))
+ return false;
+
+ return true;
+}
+
+char* gethostname_malloc(void) {
+ struct utsname u;
+ const char *s;
+
+ /* This call tries to return something useful, either the actual hostname
+ * or it makes something up. The only reason it might fail is OOM.
+ * It might even return "localhost" if that's set. */
+
+ assert_se(uname(&u) >= 0);
+
+ s = u.nodename;
+ if (isempty(s) || streq(s, "(none)"))
+ s = FALLBACK_HOSTNAME;
+
+ return strdup(s);
+}
+
+char* gethostname_short_malloc(void) {
+ struct utsname u;
+ const char *s;
+
+ /* Like above, but kills the FQDN part if present. */
+
+ assert_se(uname(&u) >= 0);
+
+ s = u.nodename;
+ if (isempty(s) || streq(s, "(none)") || s[0] == '.') {
+ s = FALLBACK_HOSTNAME;
+ assert(s[0] != '.');
+ }
+
+ return strndup(s, strcspn(s, "."));
+}
+
+int gethostname_strict(char **ret) {
+ struct utsname u;
+ char *k;
+
+ /* This call will rather fail than make up a name. It will not return "localhost" either. */
+
+ assert_se(uname(&u) >= 0);
+
+ if (isempty(u.nodename))
+ return -ENXIO;
+
+ if (streq(u.nodename, "(none)"))
+ return -ENXIO;
+
+ if (is_localhost(u.nodename))
+ return -ENXIO;
+
+ k = strdup(u.nodename);
+ if (!k)
+ return -ENOMEM;
+
+ *ret = k;
+ return 0;
+}
+
+bool valid_ldh_char(char c) {
+ return
+ (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ c == '-';
+}
+
+/**
+ * Check if s looks like a valid hostname or FQDN. This does not do
+ * full DNS validation, but only checks if the name is composed of
+ * allowed characters and the length is not above the maximum allowed
+ * by Linux (c.f. dns_name_is_valid()). Trailing dot is allowed if
+ * allow_trailing_dot is true and at least two components are present
+ * in the name. Note that due to the restricted charset and length
+ * this call is substantially more conservative than
+ * dns_name_is_valid().
+ */
+bool hostname_is_valid(const char *s, bool allow_trailing_dot) {
+ unsigned n_dots = 0;
+ const char *p;
+ bool dot, hyphen;
+
+ if (isempty(s))
+ return false;
+
+ /* Doesn't accept empty hostnames, hostnames with
+ * leading dots, and hostnames with multiple dots in a
+ * sequence. Also ensures that the length stays below
+ * HOST_NAME_MAX. */
+
+ for (p = s, dot = hyphen = true; *p; p++)
+ if (*p == '.') {
+ if (dot || hyphen)
+ return false;
+
+ dot = true;
+ hyphen = false;
+ n_dots++;
+
+ } else if (*p == '-') {
+ if (dot)
+ return false;
+
+ dot = false;
+ hyphen = true;
+
+ } else {
+ if (!valid_ldh_char(*p))
+ return false;
+
+ dot = false;
+ hyphen = false;
+ }
+
+ if (dot && (n_dots < 2 || !allow_trailing_dot))
+ return false;
+ if (hyphen)
+ return false;
+
+ if (p-s > HOST_NAME_MAX) /* Note that HOST_NAME_MAX is 64 on
+ * Linux, but DNS allows domain names
+ * up to 255 characters */
+ return false;
+
+ return true;
+}
+
+char* hostname_cleanup(char *s) {
+ char *p, *d;
+ bool dot, hyphen;
+
+ assert(s);
+
+ for (p = s, d = s, dot = hyphen = true; *p && d - s < HOST_NAME_MAX; p++)
+ if (*p == '.') {
+ if (dot || hyphen)
+ continue;
+
+ *(d++) = '.';
+ dot = true;
+ hyphen = false;
+
+ } else if (*p == '-') {
+ if (dot)
+ continue;
+
+ *(d++) = '-';
+ dot = false;
+ hyphen = true;
+
+ } else if (valid_ldh_char(*p)) {
+ *(d++) = *p;
+ dot = false;
+ hyphen = false;
+ }
+
+ if (d > s && IN_SET(d[-1], '-', '.'))
+ /* The dot can occur at most once, but we might have multiple
+ * hyphens, hence the loop */
+ d--;
+ *d = 0;
+
+ return s;
+}
+
+bool is_localhost(const char *hostname) {
+ assert(hostname);
+
+ /* This tries to identify local host and domain names
+ * described in RFC6761 plus the redhatism of localdomain */
+
+ return STRCASE_IN_SET(
+ hostname,
+ "localhost",
+ "localhost.",
+ "localhost.localdomain",
+ "localhost.localdomain.") ||
+ endswith_no_case(hostname, ".localhost") ||
+ endswith_no_case(hostname, ".localhost.") ||
+ endswith_no_case(hostname, ".localhost.localdomain") ||
+ endswith_no_case(hostname, ".localhost.localdomain.");
+}
+
+bool is_gateway_hostname(const char *hostname) {
+ assert(hostname);
+
+ /* This tries to identify the valid syntaxes for the our
+ * synthetic "gateway" host. */
+
+ return
+ strcaseeq(hostname, "_gateway") || strcaseeq(hostname, "_gateway.")
+#if ENABLE_COMPAT_GATEWAY_HOSTNAME
+ || strcaseeq(hostname, "gateway") || strcaseeq(hostname, "gateway.")
+#endif
+ ;
+}
+
+int sethostname_idempotent(const char *s) {
+ char buf[HOST_NAME_MAX + 1] = {};
+
+ assert(s);
+
+ if (gethostname(buf, sizeof(buf)) < 0)
+ return -errno;
+
+ if (streq(buf, s))
+ return 0;
+
+ if (sethostname(s, strlen(s)) < 0)
+ return -errno;
+
+ return 1;
+}
+
+int shorten_overlong(const char *s, char **ret) {
+ char *h, *p;
+
+ /* Shorten an overlong name to HOST_NAME_MAX or to the first dot,
+ * whatever comes earlier. */
+
+ assert(s);
+
+ h = strdup(s);
+ if (!h)
+ return -ENOMEM;
+
+ if (hostname_is_valid(h, false)) {
+ *ret = h;
+ return 0;
+ }
+
+ p = strchr(h, '.');
+ if (p)
+ *p = 0;
+
+ strshorten(h, HOST_NAME_MAX);
+
+ if (!hostname_is_valid(h, false)) {
+ free(h);
+ return -EDOM;
+ }
+
+ *ret = h;
+ return 1;
+}
+
+int read_etc_hostname_stream(FILE *f, char **ret) {
+ int r;
+
+ assert(f);
+ assert(ret);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *p;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0) /* EOF without any hostname? the file is empty, let's treat that exactly like no file at all: ENOENT */
+ return -ENOENT;
+
+ p = strstrip(line);
+
+ /* File may have empty lines or comments, ignore them */
+ if (!IN_SET(*p, '\0', '#')) {
+ char *copy;
+
+ hostname_cleanup(p); /* normalize the hostname */
+
+ if (!hostname_is_valid(p, true)) /* check that the hostname we return is valid */
+ return -EBADMSG;
+
+ copy = strdup(p);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret = copy;
+ return 0;
+ }
+ }
+}
+
+int read_etc_hostname(const char *path, char **ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ assert(ret);
+
+ if (!path)
+ path = "/etc/hostname";
+
+ f = fopen(path, "re");
+ if (!f)
+ return -errno;
+
+ return read_etc_hostname_stream(f, ret);
+
+}
diff --git a/src/basic/hostname-util.h b/src/basic/hostname-util.h
new file mode 100644
index 0000000..c1e47a2
--- /dev/null
+++ b/src/basic/hostname-util.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "macro.h"
+
+bool hostname_is_set(void);
+
+char* gethostname_malloc(void);
+char* gethostname_short_malloc(void);
+int gethostname_strict(char **ret);
+
+bool valid_ldh_char(char c) _const_;
+bool hostname_is_valid(const char *s, bool allow_trailing_dot) _pure_;
+char* hostname_cleanup(char *s);
+
+#define machine_name_is_valid(s) hostname_is_valid(s, false)
+
+bool is_localhost(const char *hostname);
+bool is_gateway_hostname(const char *hostname);
+
+int sethostname_idempotent(const char *s);
+
+int shorten_overlong(const char *s, char **ret);
+
+int read_etc_hostname_stream(FILE *f, char **ret);
+int read_etc_hostname(const char *path, char **ret);
diff --git a/src/basic/in-addr-util.c b/src/basic/in-addr-util.c
new file mode 100644
index 0000000..a4f13b6
--- /dev/null
+++ b/src/basic/in-addr-util.c
@@ -0,0 +1,784 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+#include <endian.h>
+#include <errno.h>
+#include <net/if.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "in-addr-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "random-util.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "util.h"
+
+bool in4_addr_is_null(const struct in_addr *a) {
+ assert(a);
+
+ return a->s_addr == 0;
+}
+
+int in_addr_is_null(int family, const union in_addr_union *u) {
+ assert(u);
+
+ if (family == AF_INET)
+ return in4_addr_is_null(&u->in);
+
+ if (family == AF_INET6)
+ return IN6_IS_ADDR_UNSPECIFIED(&u->in6);
+
+ return -EAFNOSUPPORT;
+}
+
+bool in4_addr_is_link_local(const struct in_addr *a) {
+ assert(a);
+
+ return (be32toh(a->s_addr) & UINT32_C(0xFFFF0000)) == (UINT32_C(169) << 24 | UINT32_C(254) << 16);
+}
+
+int in_addr_is_link_local(int family, const union in_addr_union *u) {
+ assert(u);
+
+ if (family == AF_INET)
+ return in4_addr_is_link_local(&u->in);
+
+ if (family == AF_INET6)
+ return IN6_IS_ADDR_LINKLOCAL(&u->in6);
+
+ return -EAFNOSUPPORT;
+}
+
+bool in6_addr_is_link_local_all_nodes(const struct in6_addr *a) {
+ assert(a);
+
+ /* ff02::1 */
+ return be32toh(a->s6_addr32[0]) == UINT32_C(0xff020000) &&
+ a->s6_addr32[1] == 0 &&
+ a->s6_addr32[2] == 0 &&
+ be32toh(a->s6_addr32[3]) == UINT32_C(0x00000001);
+}
+
+int in_addr_is_multicast(int family, const union in_addr_union *u) {
+ assert(u);
+
+ if (family == AF_INET)
+ return IN_MULTICAST(be32toh(u->in.s_addr));
+
+ if (family == AF_INET6)
+ return IN6_IS_ADDR_MULTICAST(&u->in6);
+
+ return -EAFNOSUPPORT;
+}
+
+bool in4_addr_is_local_multicast(const struct in_addr *a) {
+ assert(a);
+
+ return (be32toh(a->s_addr) & UINT32_C(0xffffff00)) == UINT32_C(0xe0000000);
+}
+
+bool in4_addr_is_localhost(const struct in_addr *a) {
+ assert(a);
+
+ /* All of 127.x.x.x is localhost. */
+ return (be32toh(a->s_addr) & UINT32_C(0xFF000000)) == UINT32_C(127) << 24;
+}
+
+bool in4_addr_is_non_local(const struct in_addr *a) {
+ /* Whether the address is not null and not localhost.
+ *
+ * As such, it is suitable to configure as DNS/NTP server from DHCP. */
+ return !in4_addr_is_null(a) &&
+ !in4_addr_is_localhost(a);
+}
+
+int in_addr_is_localhost(int family, const union in_addr_union *u) {
+ assert(u);
+
+ if (family == AF_INET)
+ return in4_addr_is_localhost(&u->in);
+
+ if (family == AF_INET6)
+ return IN6_IS_ADDR_LOOPBACK(&u->in6);
+
+ return -EAFNOSUPPORT;
+}
+
+bool in4_addr_equal(const struct in_addr *a, const struct in_addr *b) {
+ assert(a);
+ assert(b);
+
+ return a->s_addr == b->s_addr;
+}
+
+int in_addr_equal(int family, const union in_addr_union *a, const union in_addr_union *b) {
+ assert(a);
+ assert(b);
+
+ if (family == AF_INET)
+ return in4_addr_equal(&a->in, &b->in);
+
+ if (family == AF_INET6)
+ return IN6_ARE_ADDR_EQUAL(&a->in6, &b->in6);
+
+ return -EAFNOSUPPORT;
+}
+
+int in_addr_prefix_intersect(
+ int family,
+ const union in_addr_union *a,
+ unsigned aprefixlen,
+ const union in_addr_union *b,
+ unsigned bprefixlen) {
+
+ unsigned m;
+
+ assert(a);
+ assert(b);
+
+ /* Checks whether there are any addresses that are in both
+ * networks */
+
+ m = MIN(aprefixlen, bprefixlen);
+
+ if (family == AF_INET) {
+ uint32_t x, nm;
+
+ x = be32toh(a->in.s_addr ^ b->in.s_addr);
+ nm = (m == 0) ? 0 : 0xFFFFFFFFUL << (32 - m);
+
+ return (x & nm) == 0;
+ }
+
+ if (family == AF_INET6) {
+ unsigned i;
+
+ if (m > 128)
+ m = 128;
+
+ for (i = 0; i < 16; i++) {
+ uint8_t x, nm;
+
+ x = a->in6.s6_addr[i] ^ b->in6.s6_addr[i];
+
+ if (m < 8)
+ nm = 0xFF << (8 - m);
+ else
+ nm = 0xFF;
+
+ if ((x & nm) != 0)
+ return 0;
+
+ if (m > 8)
+ m -= 8;
+ else
+ m = 0;
+ }
+
+ return 1;
+ }
+
+ return -EAFNOSUPPORT;
+}
+
+int in_addr_prefix_next(int family, union in_addr_union *u, unsigned prefixlen) {
+ assert(u);
+
+ /* Increases the network part of an address by one. Returns
+ * positive if that succeeds, or -ERANGE if this overflows. */
+
+ return in_addr_prefix_nth(family, u, prefixlen, 1);
+}
+
+/*
+ * Calculates the nth prefix of size prefixlen starting from the address denoted by u.
+ *
+ * On success 1 will be returned and the calculated prefix will be available in
+ * u. In the case nth == 0 the input will be left unchanged and 1 will be returned.
+ * In case the calculation cannot be performed (invalid prefix length,
+ * overflows would occur) -ERANGE is returned. If the address family given isn't
+ * supported -EAFNOSUPPORT will be returned.
+ *
+ *
+ * Examples:
+ * - in_addr_prefix_nth(AF_INET, 192.168.0.0, 24, 2), returns 1, writes 192.168.2.0 to u
+ * - in_addr_prefix_nth(AF_INET, 192.168.0.0, 24, 0), returns 1, no data written
+ * - in_addr_prefix_nth(AF_INET, 255.255.255.0, 24, 1), returns -ERANGE, no data written
+ * - in_addr_prefix_nth(AF_INET, 255.255.255.0, 0, 1), returns -ERANGE, no data written
+ * - in_addr_prefix_nth(AF_INET6, 2001:db8, 64, 0xff00) returns 1, writes 2001:0db8:0000:ff00:: to u
+ */
+int in_addr_prefix_nth(int family, union in_addr_union *u, unsigned prefixlen, uint64_t nth) {
+ assert(u);
+
+ if (prefixlen <= 0)
+ return -ERANGE;
+
+ if (nth == 0)
+ return 1;
+
+ if (family == AF_INET) {
+ uint32_t c, n, t;
+ if (prefixlen > 32)
+ prefixlen = 32;
+
+ c = be32toh(u->in.s_addr);
+
+ t = nth << (32 - prefixlen);
+
+ /* Check for wrap */
+ if (c > UINT32_MAX - t)
+ return -ERANGE;
+
+ n = c + t;
+
+ n &= UINT32_C(0xFFFFFFFF) << (32 - prefixlen);
+ u->in.s_addr = htobe32(n);
+ return 1;
+ }
+
+ if (family == AF_INET6) {
+ struct in6_addr result = {};
+ uint8_t overflow = 0;
+ uint64_t delta; /* this assumes that we only ever have to up to 1<<64 subnets */
+ unsigned start_byte = (prefixlen - 1) / 8;
+
+ if (prefixlen > 128)
+ prefixlen = 128;
+
+ /* First calculate what we have to add */
+ delta = nth << ((128 - prefixlen) % 8);
+
+ for (unsigned i = 16; i > 0; i--) {
+ unsigned j = i - 1;
+ unsigned d = 0;
+
+ if (j <= start_byte) {
+ int16_t t;
+
+ d = delta & 0xFF;
+ delta >>= 8;
+
+ t = u->in6.s6_addr[j] + d + overflow;
+ overflow = t > UINT8_MAX ? t - UINT8_MAX : 0;
+
+ result.s6_addr[j] = (uint8_t)t;
+ } else
+ result.s6_addr[j] = u->in6.s6_addr[j];
+ }
+
+ if (overflow || delta != 0)
+ return -ERANGE;
+
+ u->in6 = result;
+ return 1;
+ }
+
+ return -EAFNOSUPPORT;
+}
+
+int in_addr_random_prefix(
+ int family,
+ union in_addr_union *u,
+ unsigned prefixlen_fixed_part,
+ unsigned prefixlen) {
+
+ assert(u);
+
+ /* Random network part of an address by one. */
+
+ if (prefixlen <= 0)
+ return 0;
+
+ if (family == AF_INET) {
+ uint32_t c, n;
+
+ if (prefixlen_fixed_part > 32)
+ prefixlen_fixed_part = 32;
+ if (prefixlen > 32)
+ prefixlen = 32;
+ if (prefixlen_fixed_part >= prefixlen)
+ return -EINVAL;
+
+ c = be32toh(u->in.s_addr);
+ c &= ((UINT32_C(1) << prefixlen_fixed_part) - 1) << (32 - prefixlen_fixed_part);
+
+ random_bytes(&n, sizeof(n));
+ n &= ((UINT32_C(1) << (prefixlen - prefixlen_fixed_part)) - 1) << (32 - prefixlen);
+
+ u->in.s_addr = htobe32(n | c);
+ return 1;
+ }
+
+ if (family == AF_INET6) {
+ struct in6_addr n;
+ unsigned i, j;
+
+ if (prefixlen_fixed_part > 128)
+ prefixlen_fixed_part = 128;
+ if (prefixlen > 128)
+ prefixlen = 128;
+ if (prefixlen_fixed_part >= prefixlen)
+ return -EINVAL;
+
+ random_bytes(&n, sizeof(n));
+
+ for (i = 0; i < 16; i++) {
+ uint8_t mask_fixed_part = 0, mask = 0;
+
+ if (i < (prefixlen_fixed_part + 7) / 8) {
+ if (i < prefixlen_fixed_part / 8)
+ mask_fixed_part = 0xffu;
+ else {
+ j = prefixlen_fixed_part % 8;
+ mask_fixed_part = ((UINT8_C(1) << (j + 1)) - 1) << (8 - j);
+ }
+ }
+
+ if (i < (prefixlen + 7) / 8) {
+ if (i < prefixlen / 8)
+ mask = 0xffu ^ mask_fixed_part;
+ else {
+ j = prefixlen % 8;
+ mask = (((UINT8_C(1) << (j + 1)) - 1) << (8 - j)) ^ mask_fixed_part;
+ }
+ }
+
+ u->in6.s6_addr[i] &= mask_fixed_part;
+ u->in6.s6_addr[i] |= n.s6_addr[i] & mask;
+ }
+
+ return 1;
+ }
+
+ return -EAFNOSUPPORT;
+}
+
+int in_addr_to_string(int family, const union in_addr_union *u, char **ret) {
+ _cleanup_free_ char *x = NULL;
+ size_t l;
+
+ assert(u);
+ assert(ret);
+
+ if (family == AF_INET)
+ l = INET_ADDRSTRLEN;
+ else if (family == AF_INET6)
+ l = INET6_ADDRSTRLEN;
+ else
+ return -EAFNOSUPPORT;
+
+ x = new(char, l);
+ if (!x)
+ return -ENOMEM;
+
+ errno = 0;
+ if (!inet_ntop(family, u, x, l))
+ return errno_or_else(EINVAL);
+
+ *ret = TAKE_PTR(x);
+ return 0;
+}
+
+int in_addr_prefix_to_string(int family, const union in_addr_union *u, unsigned prefixlen, char **ret) {
+ _cleanup_free_ char *x = NULL;
+ char *p;
+ size_t l;
+
+ assert(u);
+ assert(ret);
+
+ if (family == AF_INET)
+ l = INET_ADDRSTRLEN + 3;
+ else if (family == AF_INET6)
+ l = INET6_ADDRSTRLEN + 4;
+ else
+ return -EAFNOSUPPORT;
+
+ if (prefixlen > FAMILY_ADDRESS_SIZE(family) * 8)
+ return -EINVAL;
+
+ x = new(char, l);
+ if (!x)
+ return -ENOMEM;
+
+ errno = 0;
+ if (!inet_ntop(family, u, x, l))
+ return errno_or_else(EINVAL);
+
+ p = x + strlen(x);
+ l -= strlen(x);
+ (void) strpcpyf(&p, l, "/%u", prefixlen);
+
+ *ret = TAKE_PTR(x);
+ return 0;
+}
+
+int in_addr_port_ifindex_name_to_string(int family, const union in_addr_union *u, uint16_t port, int ifindex, const char *server_name, char **ret) {
+ _cleanup_free_ char *ip_str = NULL, *x = NULL;
+ int r;
+
+ assert(IN_SET(family, AF_INET, AF_INET6));
+ assert(u);
+ assert(ret);
+
+ /* Much like in_addr_to_string(), but optionally appends the zone interface index to the address, to properly
+ * handle IPv6 link-local addresses. */
+
+ r = in_addr_to_string(family, u, &ip_str);
+ if (r < 0)
+ return r;
+
+ if (family == AF_INET6) {
+ r = in_addr_is_link_local(family, u);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ ifindex = 0;
+ } else
+ ifindex = 0; /* For IPv4 address, ifindex is always ignored. */
+
+ if (port == 0 && ifindex == 0 && isempty(server_name)) {
+ *ret = TAKE_PTR(ip_str);
+ return 0;
+ }
+
+ const char *separator = isempty(server_name) ? "" : "#";
+ server_name = strempty(server_name);
+
+ if (port > 0) {
+ if (family == AF_INET6) {
+ if (ifindex > 0)
+ r = asprintf(&x, "[%s]:%"PRIu16"%%%i%s%s", ip_str, port, ifindex, separator, server_name);
+ else
+ r = asprintf(&x, "[%s]:%"PRIu16"%s%s", ip_str, port, separator, server_name);
+ } else
+ r = asprintf(&x, "%s:%"PRIu16"%s%s", ip_str, port, separator, server_name);
+ } else {
+ if (ifindex > 0)
+ r = asprintf(&x, "%s%%%i%s%s", ip_str, ifindex, separator, server_name);
+ else {
+ x = strjoin(ip_str, separator, server_name);
+ r = x ? 0 : -ENOMEM;
+ }
+ }
+ if (r < 0)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(x);
+ return 0;
+}
+
+int in_addr_from_string(int family, const char *s, union in_addr_union *ret) {
+ union in_addr_union buffer;
+ assert(s);
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return -EAFNOSUPPORT;
+
+ errno = 0;
+ if (inet_pton(family, s, ret ?: &buffer) <= 0)
+ return errno_or_else(EINVAL);
+
+ return 0;
+}
+
+int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret) {
+ int r;
+
+ assert(s);
+
+ r = in_addr_from_string(AF_INET, s, ret);
+ if (r >= 0) {
+ if (ret_family)
+ *ret_family = AF_INET;
+ return 0;
+ }
+
+ r = in_addr_from_string(AF_INET6, s, ret);
+ if (r >= 0) {
+ if (ret_family)
+ *ret_family = AF_INET6;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr) {
+ assert(addr);
+
+ return 32U - u32ctz(be32toh(addr->s_addr));
+}
+
+struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen) {
+ assert(addr);
+ assert(prefixlen <= 32);
+
+ /* Shifting beyond 32 is not defined, handle this specially. */
+ if (prefixlen == 0)
+ addr->s_addr = 0;
+ else
+ addr->s_addr = htobe32((0xffffffff << (32 - prefixlen)) & 0xffffffff);
+
+ return addr;
+}
+
+int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen) {
+ uint8_t msb_octet = *(uint8_t*) addr;
+
+ /* addr may not be aligned, so make sure we only access it byte-wise */
+
+ assert(addr);
+ assert(prefixlen);
+
+ if (msb_octet < 128)
+ /* class A, leading bits: 0 */
+ *prefixlen = 8;
+ else if (msb_octet < 192)
+ /* class B, leading bits 10 */
+ *prefixlen = 16;
+ else if (msb_octet < 224)
+ /* class C, leading bits 110 */
+ *prefixlen = 24;
+ else
+ /* class D or E, no default prefixlen */
+ return -ERANGE;
+
+ return 0;
+}
+
+int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask) {
+ unsigned char prefixlen;
+ int r;
+
+ assert(addr);
+ assert(mask);
+
+ r = in4_addr_default_prefixlen(addr, &prefixlen);
+ if (r < 0)
+ return r;
+
+ in4_addr_prefixlen_to_netmask(mask, prefixlen);
+ return 0;
+}
+
+int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen) {
+ assert(addr);
+
+ if (family == AF_INET) {
+ struct in_addr mask;
+
+ if (!in4_addr_prefixlen_to_netmask(&mask, prefixlen))
+ return -EINVAL;
+
+ addr->in.s_addr &= mask.s_addr;
+ return 0;
+ }
+
+ if (family == AF_INET6) {
+ unsigned i;
+
+ for (i = 0; i < 16; i++) {
+ uint8_t mask;
+
+ if (prefixlen >= 8) {
+ mask = 0xFF;
+ prefixlen -= 8;
+ } else {
+ mask = 0xFF << (8 - prefixlen);
+ prefixlen = 0;
+ }
+
+ addr->in6.s6_addr[i] &= mask;
+ }
+
+ return 0;
+ }
+
+ return -EAFNOSUPPORT;
+}
+
+int in_addr_prefix_covers(int family,
+ const union in_addr_union *prefix,
+ unsigned char prefixlen,
+ const union in_addr_union *address) {
+
+ union in_addr_union masked_prefix, masked_address;
+ int r;
+
+ assert(prefix);
+ assert(address);
+
+ masked_prefix = *prefix;
+ r = in_addr_mask(family, &masked_prefix, prefixlen);
+ if (r < 0)
+ return r;
+
+ masked_address = *address;
+ r = in_addr_mask(family, &masked_address, prefixlen);
+ if (r < 0)
+ return r;
+
+ return in_addr_equal(family, &masked_prefix, &masked_address);
+}
+
+int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret) {
+ uint8_t u;
+ int r;
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return -EAFNOSUPPORT;
+
+ r = safe_atou8(p, &u);
+ if (r < 0)
+ return r;
+
+ if (u > FAMILY_ADDRESS_SIZE(family) * 8)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
+int in_addr_prefix_from_string(
+ const char *p,
+ int family,
+ union in_addr_union *ret_prefix,
+ unsigned char *ret_prefixlen) {
+
+ _cleanup_free_ char *str = NULL;
+ union in_addr_union buffer;
+ const char *e, *l;
+ unsigned char k;
+ int r;
+
+ assert(p);
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return -EAFNOSUPPORT;
+
+ e = strchr(p, '/');
+ if (e) {
+ str = strndup(p, e - p);
+ if (!str)
+ return -ENOMEM;
+
+ l = str;
+ } else
+ l = p;
+
+ r = in_addr_from_string(family, l, &buffer);
+ if (r < 0)
+ return r;
+
+ if (e) {
+ r = in_addr_parse_prefixlen(family, e+1, &k);
+ if (r < 0)
+ return r;
+ } else
+ k = FAMILY_ADDRESS_SIZE(family) * 8;
+
+ if (ret_prefix)
+ *ret_prefix = buffer;
+ if (ret_prefixlen)
+ *ret_prefixlen = k;
+
+ return 0;
+}
+
+int in_addr_prefix_from_string_auto_internal(
+ const char *p,
+ InAddrPrefixLenMode mode,
+ int *ret_family,
+ union in_addr_union *ret_prefix,
+ unsigned char *ret_prefixlen) {
+
+ _cleanup_free_ char *str = NULL;
+ union in_addr_union buffer;
+ const char *e, *l;
+ unsigned char k;
+ int family, r;
+
+ assert(p);
+
+ e = strchr(p, '/');
+ if (e) {
+ str = strndup(p, e - p);
+ if (!str)
+ return -ENOMEM;
+
+ l = str;
+ } else
+ l = p;
+
+ r = in_addr_from_string_auto(l, &family, &buffer);
+ if (r < 0)
+ return r;
+
+ if (e) {
+ r = in_addr_parse_prefixlen(family, e+1, &k);
+ if (r < 0)
+ return r;
+ } else
+ switch (mode) {
+ case PREFIXLEN_FULL:
+ k = FAMILY_ADDRESS_SIZE(family) * 8;
+ break;
+ case PREFIXLEN_REFUSE:
+ return -ENOANO; /* To distinguish this error from others. */
+ case PREFIXLEN_LEGACY:
+ if (family == AF_INET) {
+ r = in4_addr_default_prefixlen(&buffer.in, &k);
+ if (r < 0)
+ return r;
+ } else
+ k = 0;
+ break;
+ default:
+ assert_not_reached("Invalid prefixlen mode");
+ }
+
+ if (ret_family)
+ *ret_family = family;
+ if (ret_prefix)
+ *ret_prefix = buffer;
+ if (ret_prefixlen)
+ *ret_prefixlen = k;
+
+ return 0;
+
+}
+
+static void in_addr_data_hash_func(const struct in_addr_data *a, struct siphash *state) {
+ siphash24_compress(&a->family, sizeof(a->family), state);
+ siphash24_compress(&a->address, FAMILY_ADDRESS_SIZE(a->family), state);
+}
+
+static int in_addr_data_compare_func(const struct in_addr_data *x, const struct in_addr_data *y) {
+ int r;
+
+ r = CMP(x->family, y->family);
+ if (r != 0)
+ return r;
+
+ return memcmp(&x->address, &y->address, FAMILY_ADDRESS_SIZE(x->family));
+}
+
+DEFINE_HASH_OPS(in_addr_data_hash_ops, struct in_addr_data, in_addr_data_hash_func, in_addr_data_compare_func);
+
+void in6_addr_hash_func(const struct in6_addr *addr, struct siphash *state) {
+ assert(addr);
+
+ siphash24_compress(addr, sizeof(*addr), state);
+}
+
+int in6_addr_compare_func(const struct in6_addr *a, const struct in6_addr *b) {
+ return memcmp(a, b, sizeof(*a));
+}
+
+DEFINE_HASH_OPS(in6_addr_hash_ops, struct in6_addr, in6_addr_hash_func, in6_addr_compare_func);
diff --git a/src/basic/in-addr-util.h b/src/basic/in-addr-util.h
new file mode 100644
index 0000000..24308b7
--- /dev/null
+++ b/src/basic/in-addr-util.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <netinet/in.h>
+#include <stddef.h>
+#include <sys/socket.h>
+
+#include "hash-funcs.h"
+#include "macro.h"
+#include "util.h"
+
+union in_addr_union {
+ struct in_addr in;
+ struct in6_addr in6;
+ uint8_t bytes[CONST_MAX(sizeof(struct in_addr), sizeof(struct in6_addr))];
+};
+
+struct in_addr_data {
+ int family;
+ union in_addr_union address;
+};
+
+bool in4_addr_is_null(const struct in_addr *a);
+int in_addr_is_null(int family, const union in_addr_union *u);
+
+int in_addr_is_multicast(int family, const union in_addr_union *u);
+
+bool in4_addr_is_link_local(const struct in_addr *a);
+int in_addr_is_link_local(int family, const union in_addr_union *u);
+bool in6_addr_is_link_local_all_nodes(const struct in6_addr *a);
+
+bool in4_addr_is_localhost(const struct in_addr *a);
+int in_addr_is_localhost(int family, const union in_addr_union *u);
+
+bool in4_addr_is_local_multicast(const struct in_addr *a);
+bool in4_addr_is_non_local(const struct in_addr *a);
+
+bool in4_addr_equal(const struct in_addr *a, const struct in_addr *b);
+int in_addr_equal(int family, const union in_addr_union *a, const union in_addr_union *b);
+int in_addr_prefix_intersect(int family, const union in_addr_union *a, unsigned aprefixlen, const union in_addr_union *b, unsigned bprefixlen);
+int in_addr_prefix_next(int family, union in_addr_union *u, unsigned prefixlen);
+int in_addr_prefix_nth(int family, union in_addr_union *u, unsigned prefixlen, uint64_t nth);
+int in_addr_random_prefix(int family, union in_addr_union *u, unsigned prefixlen_fixed_part, unsigned prefixlen);
+int in_addr_to_string(int family, const union in_addr_union *u, char **ret);
+int in_addr_prefix_to_string(int family, const union in_addr_union *u, unsigned prefixlen, char **ret);
+int in_addr_port_ifindex_name_to_string(int family, const union in_addr_union *u, uint16_t port, int ifindex, const char *server_name, char **ret);
+static inline int in_addr_ifindex_to_string(int family, const union in_addr_union *u, int ifindex, char **ret) {
+ return in_addr_port_ifindex_name_to_string(family, u, 0, ifindex, NULL, ret);
+}
+static inline int in_addr_port_to_string(int family, const union in_addr_union *u, uint16_t port, char **ret) {
+ return in_addr_port_ifindex_name_to_string(family, u, port, 0, NULL, ret);
+}
+int in_addr_from_string(int family, const char *s, union in_addr_union *ret);
+int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret);
+
+unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr);
+struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen);
+int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen);
+int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask);
+int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen);
+int in_addr_prefix_covers(int family, const union in_addr_union *prefix, unsigned char prefixlen, const union in_addr_union *address);
+int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret);
+int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen);
+
+typedef enum InAddrPrefixLenMode {
+ PREFIXLEN_FULL, /* Default to prefixlen of address size, 32 for IPv4 or 128 for IPv6, if not specified. */
+ PREFIXLEN_REFUSE, /* Fail with -ENOANO if prefixlen is not specified. */
+ PREFIXLEN_LEGACY, /* Default to legacy default prefixlen calculation from address if not specified. */
+} InAddrPrefixLenMode;
+
+int in_addr_prefix_from_string_auto_internal(const char *p, InAddrPrefixLenMode mode, int *ret_family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen);
+static inline int in_addr_prefix_from_string_auto(const char *p, int *ret_family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen) {
+ return in_addr_prefix_from_string_auto_internal(p, PREFIXLEN_FULL, ret_family, ret_prefix, ret_prefixlen);
+}
+
+static inline size_t FAMILY_ADDRESS_SIZE(int family) {
+ assert(IN_SET(family, AF_INET, AF_INET6));
+ return family == AF_INET6 ? 16 : 4;
+}
+
+/* Workaround for clang, explicitly specify the maximum-size element here.
+ * See also oss-fuzz#11344. */
+#define IN_ADDR_NULL ((union in_addr_union) { .in6 = {} })
+
+void in6_addr_hash_func(const struct in6_addr *addr, struct siphash *state);
+int in6_addr_compare_func(const struct in6_addr *a, const struct in6_addr *b);
+
+extern const struct hash_ops in_addr_data_hash_ops;
+extern const struct hash_ops in6_addr_hash_ops;
diff --git a/src/basic/io-util.c b/src/basic/io-util.c
new file mode 100644
index 0000000..4d74052
--- /dev/null
+++ b/src/basic/io-util.c
@@ -0,0 +1,335 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <limits.h>
+#include <poll.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "io-util.h"
+#include "string-util.h"
+#include "time-util.h"
+
+int flush_fd(int fd) {
+ int count = 0;
+
+ /* Read from the specified file descriptor, until POLLIN is not set anymore, throwing away everything
+ * read. Note that some file descriptors (notable IP sockets) will trigger POLLIN even when no data can be read
+ * (due to IP packet checksum mismatches), hence this function is only safe to be non-blocking if the fd used
+ * was set to non-blocking too. */
+
+ for (;;) {
+ char buf[LINE_MAX];
+ ssize_t l;
+ int r;
+
+ r = fd_wait_for_event(fd, POLLIN, 0);
+ if (r < 0) {
+ if (r == -EINTR)
+ continue;
+
+ return r;
+ }
+ if (r == 0)
+ return count;
+
+ l = read(fd, buf, sizeof(buf));
+ if (l < 0) {
+ if (errno == EINTR)
+ continue;
+
+ if (errno == EAGAIN)
+ return count;
+
+ return -errno;
+ } else if (l == 0)
+ return count;
+
+ count += (int) l;
+ }
+}
+
+ssize_t loop_read(int fd, void *buf, size_t nbytes, bool do_poll) {
+ uint8_t *p = buf;
+ ssize_t n = 0;
+
+ assert(fd >= 0);
+ assert(buf);
+
+ /* If called with nbytes == 0, let's call read() at least
+ * once, to validate the operation */
+
+ if (nbytes > (size_t) SSIZE_MAX)
+ return -EINVAL;
+
+ do {
+ ssize_t k;
+
+ k = read(fd, p, nbytes);
+ if (k < 0) {
+ if (errno == EINTR)
+ continue;
+
+ if (errno == EAGAIN && do_poll) {
+
+ /* We knowingly ignore any return value here,
+ * and expect that any error/EOF is reported
+ * via read() */
+
+ (void) fd_wait_for_event(fd, POLLIN, USEC_INFINITY);
+ continue;
+ }
+
+ return n > 0 ? n : -errno;
+ }
+
+ if (k == 0)
+ return n;
+
+ assert((size_t) k <= nbytes);
+
+ p += k;
+ nbytes -= k;
+ n += k;
+ } while (nbytes > 0);
+
+ return n;
+}
+
+int loop_read_exact(int fd, void *buf, size_t nbytes, bool do_poll) {
+ ssize_t n;
+
+ n = loop_read(fd, buf, nbytes, do_poll);
+ if (n < 0)
+ return (int) n;
+ if ((size_t) n != nbytes)
+ return -EIO;
+
+ return 0;
+}
+
+int loop_write(int fd, const void *buf, size_t nbytes, bool do_poll) {
+ const uint8_t *p = buf;
+
+ assert(fd >= 0);
+ assert(buf);
+
+ if (_unlikely_(nbytes > (size_t) SSIZE_MAX))
+ return -EINVAL;
+
+ do {
+ ssize_t k;
+
+ k = write(fd, p, nbytes);
+ if (k < 0) {
+ if (errno == EINTR)
+ continue;
+
+ if (errno == EAGAIN && do_poll) {
+ /* We knowingly ignore any return value here,
+ * and expect that any error/EOF is reported
+ * via write() */
+
+ (void) fd_wait_for_event(fd, POLLOUT, USEC_INFINITY);
+ continue;
+ }
+
+ return -errno;
+ }
+
+ if (_unlikely_(nbytes > 0 && k == 0)) /* Can't really happen */
+ return -EIO;
+
+ assert((size_t) k <= nbytes);
+
+ p += k;
+ nbytes -= k;
+ } while (nbytes > 0);
+
+ return 0;
+}
+
+int pipe_eof(int fd) {
+ int r;
+
+ r = fd_wait_for_event(fd, POLLIN, 0);
+ if (r <= 0)
+ return r;
+
+ return !!(r & POLLHUP);
+}
+
+int fd_wait_for_event(int fd, int event, usec_t t) {
+
+ struct pollfd pollfd = {
+ .fd = fd,
+ .events = event,
+ };
+
+ struct timespec ts;
+ int r;
+
+ r = ppoll(&pollfd, 1, t == USEC_INFINITY ? NULL : timespec_store(&ts, t), NULL);
+ if (r < 0)
+ return -errno;
+ if (r == 0)
+ return 0;
+
+ if (pollfd.revents & POLLNVAL)
+ return -EBADF;
+
+ return pollfd.revents;
+}
+
+static size_t nul_length(const uint8_t *p, size_t sz) {
+ size_t n = 0;
+
+ while (sz > 0) {
+ if (*p != 0)
+ break;
+
+ n++;
+ p++;
+ sz--;
+ }
+
+ return n;
+}
+
+ssize_t sparse_write(int fd, const void *p, size_t sz, size_t run_length) {
+ const uint8_t *q, *w, *e;
+ ssize_t l;
+
+ q = w = p;
+ e = q + sz;
+ while (q < e) {
+ size_t n;
+
+ n = nul_length(q, e - q);
+
+ /* If there are more than the specified run length of
+ * NUL bytes, or if this is the beginning or the end
+ * of the buffer, then seek instead of write */
+ if ((n > run_length) ||
+ (n > 0 && q == p) ||
+ (n > 0 && q + n >= e)) {
+ if (q > w) {
+ l = write(fd, w, q - w);
+ if (l < 0)
+ return -errno;
+ if (l != q -w)
+ return -EIO;
+ }
+
+ if (lseek(fd, n, SEEK_CUR) == (off_t) -1)
+ return -errno;
+
+ q += n;
+ w = q;
+ } else if (n > 0)
+ q += n;
+ else
+ q++;
+ }
+
+ if (q > w) {
+ l = write(fd, w, q - w);
+ if (l < 0)
+ return -errno;
+ if (l != q - w)
+ return -EIO;
+ }
+
+ return q - (const uint8_t*) p;
+}
+
+char* set_iovec_string_field(struct iovec *iovec, size_t *n_iovec, const char *field, const char *value) {
+ char *x;
+
+ x = strjoin(field, value);
+ if (x)
+ iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(x);
+ return x;
+}
+
+char* set_iovec_string_field_free(struct iovec *iovec, size_t *n_iovec, const char *field, char *value) {
+ char *x;
+
+ x = set_iovec_string_field(iovec, n_iovec, field, value);
+ free(value);
+ return x;
+}
+
+struct iovec_wrapper *iovw_new(void) {
+ return malloc0(sizeof(struct iovec_wrapper));
+}
+
+void iovw_free_contents(struct iovec_wrapper *iovw, bool free_vectors) {
+ if (free_vectors)
+ for (size_t i = 0; i < iovw->count; i++)
+ free(iovw->iovec[i].iov_base);
+
+ iovw->iovec = mfree(iovw->iovec);
+ iovw->count = 0;
+ iovw->size_bytes = 0;
+}
+
+struct iovec_wrapper *iovw_free_free(struct iovec_wrapper *iovw) {
+ iovw_free_contents(iovw, true);
+
+ return mfree(iovw);
+}
+
+struct iovec_wrapper *iovw_free(struct iovec_wrapper *iovw) {
+ iovw_free_contents(iovw, false);
+
+ return mfree(iovw);
+}
+
+int iovw_put(struct iovec_wrapper *iovw, void *data, size_t len) {
+ if (iovw->count >= IOV_MAX)
+ return -E2BIG;
+
+ if (!GREEDY_REALLOC(iovw->iovec, iovw->size_bytes, iovw->count + 1))
+ return -ENOMEM;
+
+ iovw->iovec[iovw->count++] = IOVEC_MAKE(data, len);
+ return 0;
+}
+
+int iovw_put_string_field(struct iovec_wrapper *iovw, const char *field, const char *value) {
+ _cleanup_free_ char *x = NULL;
+ int r;
+
+ x = strjoin(field, value);
+ if (!x)
+ return -ENOMEM;
+
+ r = iovw_put(iovw, x, strlen(x));
+ if (r >= 0)
+ TAKE_PTR(x);
+
+ return r;
+}
+
+int iovw_put_string_field_free(struct iovec_wrapper *iovw, const char *field, char *value) {
+ _cleanup_free_ _unused_ char *free_ptr = value;
+
+ return iovw_put_string_field(iovw, field, value);
+}
+
+void iovw_rebase(struct iovec_wrapper *iovw, char *old, char *new) {
+ size_t i;
+
+ for (i = 0; i < iovw->count; i++)
+ iovw->iovec[i].iov_base = (char *)iovw->iovec[i].iov_base - old + new;
+}
+
+size_t iovw_size(struct iovec_wrapper *iovw) {
+ size_t n = 0, i;
+
+ for (i = 0; i < iovw->count; i++)
+ n += iovw->iovec[i].iov_len;
+
+ return n;
+}
diff --git a/src/basic/io-util.h b/src/basic/io-util.h
new file mode 100644
index 0000000..d817714
--- /dev/null
+++ b/src/basic/io-util.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+
+#include "macro.h"
+#include "time-util.h"
+
+int flush_fd(int fd);
+
+ssize_t loop_read(int fd, void *buf, size_t nbytes, bool do_poll);
+int loop_read_exact(int fd, void *buf, size_t nbytes, bool do_poll);
+int loop_write(int fd, const void *buf, size_t nbytes, bool do_poll);
+
+int pipe_eof(int fd);
+
+int fd_wait_for_event(int fd, int event, usec_t timeout);
+
+ssize_t sparse_write(int fd, const void *p, size_t sz, size_t run_length);
+
+static inline size_t IOVEC_TOTAL_SIZE(const struct iovec *i, size_t n) {
+ size_t j, r = 0;
+
+ for (j = 0; j < n; j++)
+ r += i[j].iov_len;
+
+ return r;
+}
+
+static inline size_t IOVEC_INCREMENT(struct iovec *i, size_t n, size_t k) {
+ size_t j;
+
+ for (j = 0; j < n; j++) {
+ size_t sub;
+
+ if (_unlikely_(k <= 0))
+ break;
+
+ sub = MIN(i[j].iov_len, k);
+ i[j].iov_len -= sub;
+ i[j].iov_base = (uint8_t*) i[j].iov_base + sub;
+ k -= sub;
+ }
+
+ return k;
+}
+
+static inline bool FILE_SIZE_VALID(uint64_t l) {
+ /* ftruncate() and friends take an unsigned file size, but actually cannot deal with file sizes larger than
+ * 2^63 since the kernel internally handles it as signed value. This call allows checking for this early. */
+
+ return (l >> 63) == 0;
+}
+
+static inline bool FILE_SIZE_VALID_OR_INFINITY(uint64_t l) {
+
+ /* Same as above, but allows one extra value: -1 as indication for infinity. */
+
+ if (l == (uint64_t) -1)
+ return true;
+
+ return FILE_SIZE_VALID(l);
+
+}
+
+#define IOVEC_INIT(base, len) { .iov_base = (base), .iov_len = (len) }
+#define IOVEC_MAKE(base, len) (struct iovec) IOVEC_INIT(base, len)
+#define IOVEC_INIT_STRING(string) IOVEC_INIT((char*) string, strlen(string))
+#define IOVEC_MAKE_STRING(string) (struct iovec) IOVEC_INIT_STRING(string)
+
+char* set_iovec_string_field(struct iovec *iovec, size_t *n_iovec, const char *field, const char *value);
+char* set_iovec_string_field_free(struct iovec *iovec, size_t *n_iovec, const char *field, char *value);
+
+struct iovec_wrapper {
+ struct iovec *iovec;
+ size_t count;
+ size_t size_bytes;
+};
+
+struct iovec_wrapper *iovw_new(void);
+struct iovec_wrapper *iovw_free(struct iovec_wrapper *iovw);
+struct iovec_wrapper *iovw_free_free(struct iovec_wrapper *iovw);
+void iovw_free_contents(struct iovec_wrapper *iovw, bool free_vectors);
+int iovw_put(struct iovec_wrapper *iovw, void *data, size_t len);
+int iovw_put_string_field(struct iovec_wrapper *iovw, const char *field, const char *value);
+int iovw_put_string_field_free(struct iovec_wrapper *iovw, const char *field, char *value);
+void iovw_rebase(struct iovec_wrapper *iovw, char *old, char *new);
+size_t iovw_size(struct iovec_wrapper *iovw);
diff --git a/src/basic/ioprio.h b/src/basic/ioprio.h
new file mode 100644
index 0000000..3fb168d
--- /dev/null
+++ b/src/basic/ioprio.h
@@ -0,0 +1,56 @@
+#ifndef IOPRIO_H
+#define IOPRIO_H
+
+/* This is minimal version of Linux' linux/ioprio.h header file, which
+ * is licensed GPL2 */
+
+#include <sys/syscall.h>
+#include <unistd.h>
+
+/*
+ * Gives us 8 prio classes with 13-bits of data for each class
+ */
+#define IOPRIO_BITS 16
+#define IOPRIO_N_CLASSES 8
+#define IOPRIO_CLASS_SHIFT 13
+#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1)
+
+#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT)
+#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK)
+#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data)
+
+#define ioprio_valid(mask) (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE)
+
+/*
+ * These are the io priority groups as implemented by CFQ. RT is the realtime
+ * class, it always gets premium service. BE is the best-effort scheduling
+ * class, the default for any process. IDLE is the idle scheduling class, it
+ * is only served when no one else is using the disk.
+ */
+enum {
+ IOPRIO_CLASS_NONE,
+ IOPRIO_CLASS_RT,
+ IOPRIO_CLASS_BE,
+ IOPRIO_CLASS_IDLE,
+};
+
+/*
+ * 8 best effort priority levels are supported
+ */
+#define IOPRIO_BE_NR (8)
+
+enum {
+ IOPRIO_WHO_PROCESS = 1,
+ IOPRIO_WHO_PGRP,
+ IOPRIO_WHO_USER,
+};
+
+static inline int ioprio_set(int which, int who, int ioprio) {
+ return syscall(__NR_ioprio_set, which, who, ioprio);
+}
+
+static inline int ioprio_get(int which, int who) {
+ return syscall(__NR_ioprio_get, which, who);
+}
+
+#endif
diff --git a/src/basic/kbd-util.c b/src/basic/kbd-util.c
new file mode 100644
index 0000000..267803e
--- /dev/null
+++ b/src/basic/kbd-util.c
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ftw.h>
+
+#include "kbd-util.h"
+#include "log.h"
+#include "nulstr-util.h"
+#include "path-util.h"
+#include "set.h"
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+
+static thread_local Set *keymaps = NULL;
+
+static int nftw_cb(
+ const char *fpath,
+ const struct stat *sb,
+ int tflag,
+ struct FTW *ftwbuf) {
+
+ _cleanup_free_ char *p = NULL;
+ char *e;
+ int r;
+
+ if (tflag != FTW_F)
+ return 0;
+
+ if (!endswith(fpath, ".map") &&
+ !endswith(fpath, ".map.gz"))
+ return 0;
+
+ p = strdup(basename(fpath));
+ if (!p)
+ return FTW_STOP;
+
+ e = endswith(p, ".map");
+ if (e)
+ *e = 0;
+
+ e = endswith(p, ".map.gz");
+ if (e)
+ *e = 0;
+
+ if (!keymap_is_valid(p))
+ return 0;
+
+ r = set_consume(keymaps, TAKE_PTR(p));
+ if (r < 0 && r != -EEXIST)
+ return r;
+
+ return 0;
+}
+
+int get_keymaps(char ***ret) {
+ _cleanup_strv_free_ char **l = NULL;
+ const char *dir;
+ int r;
+
+ keymaps = set_new(&string_hash_ops);
+ if (!keymaps)
+ return -ENOMEM;
+
+ NULSTR_FOREACH(dir, KBD_KEYMAP_DIRS) {
+ r = nftw(dir, nftw_cb, 20, FTW_PHYS|FTW_ACTIONRETVAL);
+
+ if (r == FTW_STOP)
+ log_debug("Directory not found %s", dir);
+ else if (r < 0)
+ log_debug_errno(r, "Can't add keymap: %m");
+ }
+
+ l = set_get_strv(keymaps);
+ if (!l) {
+ set_free_free(keymaps);
+ return -ENOMEM;
+ }
+
+ set_free(keymaps);
+
+ if (strv_isempty(l))
+ return -ENOENT;
+
+ strv_sort(l);
+
+ *ret = TAKE_PTR(l);
+
+ return 0;
+}
+
+bool keymap_is_valid(const char *name) {
+
+ if (isempty(name))
+ return false;
+
+ if (strlen(name) >= 128)
+ return false;
+
+ if (!utf8_is_valid(name))
+ return false;
+
+ if (!filename_is_valid(name))
+ return false;
+
+ if (!string_is_safe(name))
+ return false;
+
+ return true;
+}
diff --git a/src/basic/kbd-util.h b/src/basic/kbd-util.h
new file mode 100644
index 0000000..6714aeb
--- /dev/null
+++ b/src/basic/kbd-util.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#if HAVE_SPLIT_USR
+#define KBD_KEYMAP_DIRS \
+ "/usr/share/keymaps/\0" \
+ "/usr/share/kbd/keymaps/\0" \
+ "/usr/lib/kbd/keymaps/\0" \
+ "/lib/kbd/keymaps/\0"
+#else
+#define KBD_KEYMAP_DIRS \
+ "/usr/share/keymaps/\0" \
+ "/usr/share/kbd/keymaps/\0" \
+ "/usr/lib/kbd/keymaps/\0"
+#endif
+
+int get_keymaps(char ***l);
+bool keymap_is_valid(const char *name);
diff --git a/src/basic/khash.c b/src/basic/khash.c
new file mode 100644
index 0000000..6a4d1dd
--- /dev/null
+++ b/src/basic/khash.c
@@ -0,0 +1,321 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/if_alg.h>
+#include <stdbool.h>
+#include <sys/socket.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "hexdecoct.h"
+#include "khash.h"
+#include "macro.h"
+#include "missing_socket.h"
+#include "string-util.h"
+#include "util.h"
+
+/* On current kernels the maximum digest (according to "grep digestsize /proc/crypto | sort -u") is actually 32, but
+ * let's add some extra room, the few wasted bytes don't really matter... */
+#define LONGEST_DIGEST 128
+
+struct khash {
+ int fd;
+ char *algorithm;
+ uint8_t digest[LONGEST_DIGEST+1];
+ size_t digest_size;
+ bool digest_valid;
+};
+
+int khash_supported(void) {
+ static const union {
+ struct sockaddr sa;
+ struct sockaddr_alg alg;
+ } sa = {
+ .alg.salg_family = AF_ALG,
+ .alg.salg_type = "hash",
+ .alg.salg_name = "sha256", /* a very common algorithm */
+ };
+
+ static int cached = -1;
+
+ if (cached < 0) {
+ _cleanup_close_ int fd1 = -1, fd2 = -1;
+ uint8_t buf[LONGEST_DIGEST+1];
+
+ fd1 = socket(AF_ALG, SOCK_SEQPACKET|SOCK_CLOEXEC, 0);
+ if (fd1 < 0) {
+ /* The kernel returns EAFNOSUPPORT if AF_ALG is not supported at all */
+ if (IN_SET(errno, EAFNOSUPPORT, EOPNOTSUPP))
+ return (cached = false);
+
+ return -errno;
+ }
+
+ if (bind(fd1, &sa.sa, sizeof(sa)) < 0) {
+ /* The kernel returns ENOENT if the selected algorithm is not supported at all. We use a check
+ * for SHA256 as a proxy for whether the whole API is supported at all. After all it's one of
+ * the most common hash functions, and if it isn't supported, that's ample indication that
+ * something is really off. */
+
+ if (IN_SET(errno, ENOENT, EOPNOTSUPP))
+ return (cached = false);
+
+ return -errno;
+ }
+
+ fd2 = accept4(fd1, NULL, 0, SOCK_CLOEXEC);
+ if (fd2 < 0) {
+ if (errno == EOPNOTSUPP)
+ return (cached = false);
+
+ return -errno;
+ }
+
+ if (recv(fd2, buf, sizeof(buf), 0) < 0) {
+ /* On some kernels we get ENOKEY for non-keyed hash functions (such as sha256), let's refuse
+ * using the API in those cases, since the kernel is
+ * broken. https://github.com/systemd/systemd/issues/8278 */
+
+ if (IN_SET(errno, ENOKEY, EOPNOTSUPP))
+ return (cached = false);
+ }
+
+ cached = true;
+ }
+
+ return cached;
+}
+
+int khash_new_with_key(khash **ret, const char *algorithm, const void *key, size_t key_size) {
+ union {
+ struct sockaddr sa;
+ struct sockaddr_alg alg;
+ } sa = {
+ .alg.salg_family = AF_ALG,
+ .alg.salg_type = "hash",
+ };
+
+ _cleanup_(khash_unrefp) khash *h = NULL;
+ _cleanup_close_ int fd = -1;
+ int supported;
+ ssize_t n;
+
+ assert(ret);
+ assert(key || key_size == 0);
+
+ /* Filter out an empty algorithm early, as we do not support an algorithm by that name. */
+ if (isempty(algorithm))
+ return -EINVAL;
+
+ /* Overly long hash algorithm names we definitely do not support */
+ if (strlen(algorithm) >= sizeof(sa.alg.salg_name))
+ return -EOPNOTSUPP;
+
+ supported = khash_supported();
+ if (supported < 0)
+ return supported;
+ if (supported == 0)
+ return -EOPNOTSUPP;
+
+ fd = socket(AF_ALG, SOCK_SEQPACKET|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ return -errno;
+
+ strcpy((char*) sa.alg.salg_name, algorithm);
+ if (bind(fd, &sa.sa, sizeof(sa)) < 0) {
+ if (errno == ENOENT)
+ return -EOPNOTSUPP;
+ return -errno;
+ }
+
+ if (key) {
+ if (setsockopt(fd, SOL_ALG, ALG_SET_KEY, key, key_size) < 0)
+ return -errno;
+ }
+
+ h = new0(khash, 1);
+ if (!h)
+ return -ENOMEM;
+
+ h->fd = accept4(fd, NULL, 0, SOCK_CLOEXEC);
+ if (h->fd < 0)
+ return -errno;
+
+ h->algorithm = strdup(algorithm);
+ if (!h->algorithm)
+ return -ENOMEM;
+
+ /* Temporary fix for rc kernel bug: https://bugzilla.redhat.com/show_bug.cgi?id=1395896 */
+ (void) send(h->fd, NULL, 0, 0);
+
+ /* Figure out the digest size */
+ n = recv(h->fd, h->digest, sizeof(h->digest), 0);
+ if (n < 0)
+ return -errno;
+ if (n >= LONGEST_DIGEST) /* longer than what we expected? If so, we don't support this */
+ return -EOPNOTSUPP;
+
+ h->digest_size = (size_t) n;
+ h->digest_valid = true;
+
+ /* Temporary fix for rc kernel bug: https://bugzilla.redhat.com/show_bug.cgi?id=1395896 */
+ (void) send(h->fd, NULL, 0, 0);
+
+ *ret = TAKE_PTR(h);
+
+ return 0;
+}
+
+int khash_new(khash **ret, const char *algorithm) {
+ return khash_new_with_key(ret, algorithm, NULL, 0);
+}
+
+khash* khash_unref(khash *h) {
+ if (!h)
+ return NULL;
+
+ safe_close(h->fd);
+ free(h->algorithm);
+ return mfree(h);
+}
+
+int khash_dup(khash *h, khash **ret) {
+ _cleanup_(khash_unrefp) khash *copy = NULL;
+
+ assert(h);
+ assert(ret);
+
+ copy = newdup(khash, h, 1);
+ if (!copy)
+ return -ENOMEM;
+
+ copy->fd = -1;
+ copy->algorithm = strdup(h->algorithm);
+ if (!copy->algorithm)
+ return -ENOMEM;
+
+ copy->fd = accept4(h->fd, NULL, 0, SOCK_CLOEXEC);
+ if (copy->fd < 0)
+ return -errno;
+
+ *ret = TAKE_PTR(copy);
+
+ return 0;
+}
+
+const char *khash_get_algorithm(khash *h) {
+ assert(h);
+
+ return h->algorithm;
+}
+
+size_t khash_get_size(khash *h) {
+ assert(h);
+
+ return h->digest_size;
+}
+
+int khash_reset(khash *h) {
+ ssize_t n;
+
+ assert(h);
+
+ n = send(h->fd, NULL, 0, 0);
+ if (n < 0)
+ return -errno;
+
+ h->digest_valid = false;
+
+ return 0;
+}
+
+int khash_put(khash *h, const void *buffer, size_t size) {
+ ssize_t n;
+
+ assert(h);
+ assert(buffer || size == 0);
+
+ if (size <= 0)
+ return 0;
+
+ n = send(h->fd, buffer, size, MSG_MORE);
+ if (n < 0)
+ return -errno;
+
+ h->digest_valid = false;
+
+ return 0;
+}
+
+int khash_put_iovec(khash *h, const struct iovec *iovec, size_t n) {
+ struct msghdr mh = {
+ .msg_iov = (struct iovec*) iovec,
+ .msg_iovlen = n,
+ };
+ ssize_t k;
+
+ assert(h);
+ assert(iovec || n == 0);
+
+ if (n <= 0)
+ return 0;
+
+ k = sendmsg(h->fd, &mh, MSG_MORE);
+ if (k < 0)
+ return -errno;
+
+ h->digest_valid = false;
+
+ return 0;
+}
+
+static int retrieve_digest(khash *h) {
+ ssize_t n;
+
+ assert(h);
+
+ if (h->digest_valid)
+ return 0;
+
+ n = recv(h->fd, h->digest, h->digest_size, 0);
+ if (n < 0)
+ return n;
+ if ((size_t) n != h->digest_size) /* digest size changed? */
+ return -EIO;
+
+ h->digest_valid = true;
+
+ return 0;
+}
+
+int khash_digest_data(khash *h, const void **ret) {
+ int r;
+
+ assert(h);
+ assert(ret);
+
+ r = retrieve_digest(h);
+ if (r < 0)
+ return r;
+
+ *ret = h->digest;
+ return 0;
+}
+
+int khash_digest_string(khash *h, char **ret) {
+ int r;
+ char *p;
+
+ assert(h);
+ assert(ret);
+
+ r = retrieve_digest(h);
+ if (r < 0)
+ return r;
+
+ p = hexmem(h->digest, h->digest_size);
+ if (!p)
+ return -ENOMEM;
+
+ *ret = p;
+ return 0;
+}
diff --git a/src/basic/khash.h b/src/basic/khash.h
new file mode 100644
index 0000000..a343d30
--- /dev/null
+++ b/src/basic/khash.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+
+#include "macro.h"
+
+typedef struct khash khash;
+
+int khash_supported(void);
+
+/* For plain hash functions. Hash functions commonly supported on today's kernels are: crc32c, crct10dif, crc32,
+ * sha224, sha256, sha512, sha384, sha1, md5, md4, sha3-224, sha3-256, sha3-384, sha3-512, and more. */
+int khash_new(khash **ret, const char *algorithm);
+
+/* For keyed hash functions. Hash functions commonly supported on today's kernels are: hmac(sha256), cmac(aes),
+ * cmac(des3_ede), hmac(sha3-512), hmac(sha3-384), hmac(sha3-256), hmac(sha3-224), hmac(rmd160), hmac(rmd128),
+ * hmac(sha224), hmac(sha512), hmac(sha384), hmac(sha1), hmac(md5), and more. */
+int khash_new_with_key(khash **ret, const char *algorithm, const void *key, size_t key_size);
+
+int khash_dup(khash *h, khash **ret);
+khash* khash_unref(khash *h);
+
+const char *khash_get_algorithm(khash *h);
+size_t khash_get_size(khash *h);
+
+int khash_reset(khash *h);
+
+int khash_put(khash *h, const void *buffer, size_t size);
+int khash_put_iovec(khash *h, const struct iovec *iovec, size_t n);
+
+int khash_digest_data(khash *h, const void **ret);
+int khash_digest_string(khash *h, char **ret);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(khash*, khash_unref);
diff --git a/src/basic/label.c b/src/basic/label.c
new file mode 100644
index 0000000..1fc492f
--- /dev/null
+++ b/src/basic/label.c
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "btrfs-util.h"
+#include "fs-util.h"
+#include "label.h"
+#include "macro.h"
+#include "selinux-util.h"
+#include "smack-util.h"
+
+int label_fix_container(const char *path, const char *inside_path, LabelFixFlags flags) {
+ int r, q;
+
+ r = mac_selinux_fix_container(path, inside_path, flags);
+ q = mac_smack_fix_container(path, inside_path, flags);
+
+ if (r < 0)
+ return r;
+ if (q < 0)
+ return q;
+
+ return 0;
+}
+
+int symlink_label(const char *old_path, const char *new_path) {
+ int r;
+
+ assert(old_path);
+ assert(new_path);
+
+ r = mac_selinux_create_file_prepare(new_path, S_IFLNK);
+ if (r < 0)
+ return r;
+
+ if (symlink(old_path, new_path) < 0)
+ r = -errno;
+
+ mac_selinux_create_file_clear();
+
+ if (r < 0)
+ return r;
+
+ return mac_smack_fix(new_path, 0);
+}
+
+int symlink_atomic_label(const char *from, const char *to) {
+ int r;
+
+ assert(from);
+ assert(to);
+
+ r = mac_selinux_create_file_prepare(to, S_IFLNK);
+ if (r < 0)
+ return r;
+
+ if (symlink_atomic(from, to) < 0)
+ r = -errno;
+
+ mac_selinux_create_file_clear();
+
+ if (r < 0)
+ return r;
+
+ return mac_smack_fix(to, 0);
+}
+
+int mknod_label(const char *pathname, mode_t mode, dev_t dev) {
+ int r;
+
+ assert(pathname);
+
+ r = mac_selinux_create_file_prepare(pathname, mode);
+ if (r < 0)
+ return r;
+
+ if (mknod(pathname, mode, dev) < 0)
+ r = -errno;
+
+ mac_selinux_create_file_clear();
+
+ if (r < 0)
+ return r;
+
+ return mac_smack_fix(pathname, 0);
+}
+
+int btrfs_subvol_make_label(const char *path) {
+ int r;
+
+ assert(path);
+
+ r = mac_selinux_create_file_prepare(path, S_IFDIR);
+ if (r < 0)
+ return r;
+
+ r = btrfs_subvol_make(path);
+ mac_selinux_create_file_clear();
+
+ if (r < 0)
+ return r;
+
+ return mac_smack_fix(path, 0);
+}
diff --git a/src/basic/label.h b/src/basic/label.h
new file mode 100644
index 0000000..b5118d9
--- /dev/null
+++ b/src/basic/label.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+typedef enum LabelFixFlags {
+ LABEL_IGNORE_ENOENT = 1 << 0,
+ LABEL_IGNORE_EROFS = 1 << 1,
+} LabelFixFlags;
+
+int label_fix_container(const char *path, const char *inside_path, LabelFixFlags flags);
+static inline int label_fix(const char *path, LabelFixFlags flags) {
+ return label_fix_container(path, path, flags);
+}
+
+int mkdir_label(const char *path, mode_t mode);
+int mkdirat_label(int dirfd, const char *path, mode_t mode);
+int symlink_label(const char *old_path, const char *new_path);
+int symlink_atomic_label(const char *from, const char *to);
+int mknod_label(const char *pathname, mode_t mode, dev_t dev);
+
+int btrfs_subvol_make_label(const char *path);
diff --git a/src/basic/limits-util.c b/src/basic/limits-util.c
new file mode 100644
index 0000000..259c311
--- /dev/null
+++ b/src/basic/limits-util.c
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "cgroup-util.h"
+#include "limits-util.h"
+#include "memory-util.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "procfs-util.h"
+#include "string-util.h"
+
+uint64_t physical_memory(void) {
+ _cleanup_free_ char *root = NULL, *value = NULL;
+ uint64_t mem, lim;
+ size_t ps;
+ long sc;
+ int r;
+
+ /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
+ * memory.
+ *
+ * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
+ * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
+
+ sc = sysconf(_SC_PHYS_PAGES);
+ assert(sc > 0);
+
+ ps = page_size();
+ mem = (uint64_t) sc * (uint64_t) ps;
+
+ r = cg_get_root_path(&root);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to determine root cgroup, ignoring cgroup memory limit: %m");
+ return mem;
+ }
+
+ r = cg_all_unified();
+ if (r < 0) {
+ log_debug_errno(r, "Failed to determine root unified mode, ignoring cgroup memory limit: %m");
+ return mem;
+ }
+ if (r > 0) {
+ r = cg_get_attribute("memory", root, "memory.max", &value);
+ if (r == -ENOENT) /* Field does not exist on the system's top-level cgroup, hence don't
+ * complain. (Note that it might exist on our own root though, if we live
+ * in a cgroup namespace, hence check anyway instead of not even
+ * trying.) */
+ return mem;
+ if (r < 0) {
+ log_debug_errno(r, "Failed to read memory.max cgroup attribute, ignoring cgroup memory limit: %m");
+ return mem;
+ }
+
+ if (streq(value, "max"))
+ return mem;
+ } else {
+ r = cg_get_attribute("memory", root, "memory.limit_in_bytes", &value);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to read memory.limit_in_bytes cgroup attribute, ignoring cgroup memory limit: %m");
+ return mem;
+ }
+ }
+
+ r = safe_atou64(value, &lim);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse cgroup memory limit '%s', ignoring: %m", value);
+ return mem;
+ }
+ if (lim == UINT64_MAX)
+ return mem;
+
+ /* Make sure the limit is a multiple of our own page size */
+ lim /= ps;
+ lim *= ps;
+
+ return MIN(mem, lim);
+}
+
+uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
+ uint64_t p, m, ps, r;
+
+ assert(max > 0);
+
+ /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
+ * the result is a multiple of the page size (rounds down). */
+
+ ps = page_size();
+ assert(ps > 0);
+
+ p = physical_memory() / ps;
+ assert(p > 0);
+
+ m = p * v;
+ if (m / p != v)
+ return UINT64_MAX;
+
+ m /= max;
+
+ r = m * ps;
+ if (r / ps != m)
+ return UINT64_MAX;
+
+ return r;
+}
+
+uint64_t system_tasks_max(void) {
+ uint64_t a = TASKS_MAX, b = TASKS_MAX;
+ _cleanup_free_ char *root = NULL;
+ int r;
+
+ /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
+ * limit:
+ *
+ * a) the maximum tasks value the kernel allows on this architecture
+ * b) the cgroups pids_max attribute for the system
+ * c) the kernel's configured maximum PID value
+ *
+ * And then pick the smallest of the three */
+
+ r = procfs_tasks_get_limit(&a);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read maximum number of tasks from /proc, ignoring: %m");
+
+ r = cg_get_root_path(&root);
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine cgroup root path, ignoring: %m");
+ else {
+ r = cg_get_attribute_as_uint64("pids", root, "pids.max", &b);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read pids.max attribute of cgroup root, ignoring: %m");
+ }
+
+ return MIN3(TASKS_MAX,
+ a <= 0 ? TASKS_MAX : a,
+ b <= 0 ? TASKS_MAX : b);
+}
+
+uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
+ uint64_t t, m;
+
+ assert(max > 0);
+
+ /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
+ * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
+
+ t = system_tasks_max();
+ assert(t > 0);
+
+ m = t * v;
+ if (m / t != v) /* overflow? */
+ return UINT64_MAX;
+
+ return m / max;
+}
diff --git a/src/basic/limits-util.h b/src/basic/limits-util.h
new file mode 100644
index 0000000..d267fcf
--- /dev/null
+++ b/src/basic/limits-util.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+
+uint64_t physical_memory(void);
+uint64_t physical_memory_scale(uint64_t v, uint64_t max);
+
+uint64_t system_tasks_max(void);
+uint64_t system_tasks_max_scale(uint64_t v, uint64_t max);
diff --git a/src/basic/linux/README b/src/basic/linux/README
new file mode 100644
index 0000000..2bb70fd
--- /dev/null
+++ b/src/basic/linux/README
@@ -0,0 +1,6 @@
+The files in this directory are copied from current kernel master
+(b06ed1e7a2fa9b636f368a9e97c3c8877623f8b2) or WireGuard master
+(8416093498ac2c754536dad4757c5d86c9ba8809), and the following
+modifications are applied:
+- btrfs.h: drop '__user' attributes
+- if.h: drop '#include <linux/compiler.h>' and '__user' attributes
diff --git a/src/basic/linux/btrfs.h b/src/basic/linux/btrfs.h
new file mode 100644
index 0000000..d22f197
--- /dev/null
+++ b/src/basic/linux/btrfs.h
@@ -0,0 +1,991 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef _UAPI_LINUX_BTRFS_H
+#define _UAPI_LINUX_BTRFS_H
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define BTRFS_IOCTL_MAGIC 0x94
+#define BTRFS_VOL_NAME_MAX 255
+#define BTRFS_LABEL_SIZE 256
+
+/* this should be 4k */
+#define BTRFS_PATH_NAME_MAX 4087
+struct btrfs_ioctl_vol_args {
+ __s64 fd;
+ char name[BTRFS_PATH_NAME_MAX + 1];
+};
+
+#define BTRFS_DEVICE_PATH_NAME_MAX 1024
+#define BTRFS_SUBVOL_NAME_MAX 4039
+
+#ifndef __KERNEL__
+/* Deprecated since 5.7 */
+# define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
+#endif
+#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
+#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2)
+
+#define BTRFS_DEVICE_SPEC_BY_ID (1ULL << 3)
+
+#define BTRFS_SUBVOL_SPEC_BY_ID (1ULL << 4)
+
+#define BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED \
+ (BTRFS_SUBVOL_RDONLY | \
+ BTRFS_SUBVOL_QGROUP_INHERIT | \
+ BTRFS_DEVICE_SPEC_BY_ID | \
+ BTRFS_SUBVOL_SPEC_BY_ID)
+
+#define BTRFS_FSID_SIZE 16
+#define BTRFS_UUID_SIZE 16
+#define BTRFS_UUID_UNPARSED_SIZE 37
+
+/*
+ * flags definition for qgroup limits
+ *
+ * Used by:
+ * struct btrfs_qgroup_limit.flags
+ * struct btrfs_qgroup_limit_item.flags
+ */
+#define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0)
+#define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1)
+#define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2)
+#define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3)
+#define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4)
+#define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5)
+
+struct btrfs_qgroup_limit {
+ __u64 flags;
+ __u64 max_rfer;
+ __u64 max_excl;
+ __u64 rsv_rfer;
+ __u64 rsv_excl;
+};
+
+/*
+ * flags definition for qgroup inheritance
+ *
+ * Used by:
+ * struct btrfs_qgroup_inherit.flags
+ */
+#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0)
+
+struct btrfs_qgroup_inherit {
+ __u64 flags;
+ __u64 num_qgroups;
+ __u64 num_ref_copies;
+ __u64 num_excl_copies;
+ struct btrfs_qgroup_limit lim;
+ __u64 qgroups[0];
+};
+
+struct btrfs_ioctl_qgroup_limit_args {
+ __u64 qgroupid;
+ struct btrfs_qgroup_limit lim;
+};
+
+/*
+ * Arguments for specification of subvolumes or devices, supporting by-name or
+ * by-id and flags
+ *
+ * The set of supported flags depends on the ioctl
+ *
+ * BTRFS_SUBVOL_RDONLY is also provided/consumed by the following ioctls:
+ * - BTRFS_IOC_SUBVOL_GETFLAGS
+ * - BTRFS_IOC_SUBVOL_SETFLAGS
+ */
+
+/* Supported flags for BTRFS_IOC_RM_DEV_V2 */
+#define BTRFS_DEVICE_REMOVE_ARGS_MASK \
+ (BTRFS_DEVICE_SPEC_BY_ID)
+
+/* Supported flags for BTRFS_IOC_SNAP_CREATE_V2 and BTRFS_IOC_SUBVOL_CREATE_V2 */
+#define BTRFS_SUBVOL_CREATE_ARGS_MASK \
+ (BTRFS_SUBVOL_RDONLY | \
+ BTRFS_SUBVOL_QGROUP_INHERIT)
+
+/* Supported flags for BTRFS_IOC_SNAP_DESTROY_V2 */
+#define BTRFS_SUBVOL_DELETE_ARGS_MASK \
+ (BTRFS_SUBVOL_SPEC_BY_ID)
+
+struct btrfs_ioctl_vol_args_v2 {
+ __s64 fd;
+ __u64 transid;
+ __u64 flags;
+ union {
+ struct {
+ __u64 size;
+ struct btrfs_qgroup_inherit *qgroup_inherit;
+ };
+ __u64 unused[4];
+ };
+ union {
+ char name[BTRFS_SUBVOL_NAME_MAX + 1];
+ __u64 devid;
+ __u64 subvolid;
+ };
+};
+
+/*
+ * structure to report errors and progress to userspace, either as a
+ * result of a finished scrub, a canceled scrub or a progress inquiry
+ */
+struct btrfs_scrub_progress {
+ __u64 data_extents_scrubbed; /* # of data extents scrubbed */
+ __u64 tree_extents_scrubbed; /* # of tree extents scrubbed */
+ __u64 data_bytes_scrubbed; /* # of data bytes scrubbed */
+ __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */
+ __u64 read_errors; /* # of read errors encountered (EIO) */
+ __u64 csum_errors; /* # of failed csum checks */
+ __u64 verify_errors; /* # of occurences, where the metadata
+ * of a tree block did not match the
+ * expected values, like generation or
+ * logical */
+ __u64 no_csum; /* # of 4k data block for which no csum
+ * is present, probably the result of
+ * data written with nodatasum */
+ __u64 csum_discards; /* # of csum for which no data was found
+ * in the extent tree. */
+ __u64 super_errors; /* # of bad super blocks encountered */
+ __u64 malloc_errors; /* # of internal kmalloc errors. These
+ * will likely cause an incomplete
+ * scrub */
+ __u64 uncorrectable_errors; /* # of errors where either no intact
+ * copy was found or the writeback
+ * failed */
+ __u64 corrected_errors; /* # of errors corrected */
+ __u64 last_physical; /* last physical address scrubbed. In
+ * case a scrub was aborted, this can
+ * be used to restart the scrub */
+ __u64 unverified_errors; /* # of occurences where a read for a
+ * full (64k) bio failed, but the re-
+ * check succeeded for each 4k piece.
+ * Intermittent error. */
+};
+
+#define BTRFS_SCRUB_READONLY 1
+struct btrfs_ioctl_scrub_args {
+ __u64 devid; /* in */
+ __u64 start; /* in */
+ __u64 end; /* in */
+ __u64 flags; /* in */
+ struct btrfs_scrub_progress progress; /* out */
+ /* pad to 1k */
+ __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8];
+};
+
+#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0
+#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID 1
+struct btrfs_ioctl_dev_replace_start_params {
+ __u64 srcdevid; /* in, if 0, use srcdev_name instead */
+ __u64 cont_reading_from_srcdev_mode; /* in, see #define
+ * above */
+ __u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */
+ __u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */
+};
+
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED 0
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED 1
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED 2
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 3
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED 4
+struct btrfs_ioctl_dev_replace_status_params {
+ __u64 replace_state; /* out, see #define above */
+ __u64 progress_1000; /* out, 0 <= x <= 1000 */
+ __u64 time_started; /* out, seconds since 1-Jan-1970 */
+ __u64 time_stopped; /* out, seconds since 1-Jan-1970 */
+ __u64 num_write_errors; /* out */
+ __u64 num_uncorrectable_read_errors; /* out */
+};
+
+#define BTRFS_IOCTL_DEV_REPLACE_CMD_START 0
+#define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS 1
+#define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL 2
+#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR 0
+#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED 1
+#define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED 2
+#define BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS 3
+struct btrfs_ioctl_dev_replace_args {
+ __u64 cmd; /* in */
+ __u64 result; /* out */
+
+ union {
+ struct btrfs_ioctl_dev_replace_start_params start;
+ struct btrfs_ioctl_dev_replace_status_params status;
+ }; /* in/out */
+
+ __u64 spare[64];
+};
+
+struct btrfs_ioctl_dev_info_args {
+ __u64 devid; /* in/out */
+ __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */
+ __u64 bytes_used; /* out */
+ __u64 total_bytes; /* out */
+ __u64 unused[379]; /* pad to 4k */
+ __u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */
+};
+
+/*
+ * Retrieve information about the filesystem
+ */
+
+/* Request information about checksum type and size */
+#define BTRFS_FS_INFO_FLAG_CSUM_INFO (1 << 0)
+
+/* Request information about filesystem generation */
+#define BTRFS_FS_INFO_FLAG_GENERATION (1 << 1)
+/* Request information about filesystem metadata UUID */
+#define BTRFS_FS_INFO_FLAG_METADATA_UUID (1 << 2)
+
+struct btrfs_ioctl_fs_info_args {
+ __u64 max_id; /* out */
+ __u64 num_devices; /* out */
+ __u8 fsid[BTRFS_FSID_SIZE]; /* out */
+ __u32 nodesize; /* out */
+ __u32 sectorsize; /* out */
+ __u32 clone_alignment; /* out */
+ /* See BTRFS_FS_INFO_FLAG_* */
+ __u16 csum_type; /* out */
+ __u16 csum_size; /* out */
+ __u64 flags; /* in/out */
+ __u64 generation; /* out */
+ __u8 metadata_uuid[BTRFS_FSID_SIZE]; /* out */
+ __u8 reserved[944]; /* pad to 1k */
+};
+
+/*
+ * feature flags
+ *
+ * Used by:
+ * struct btrfs_ioctl_feature_flags
+ */
+#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE (1ULL << 0)
+/*
+ * Older kernels (< 4.9) on big-endian systems produced broken free space tree
+ * bitmaps, and btrfs-progs also used to corrupt the free space tree (versions
+ * < 4.7.3). If this bit is clear, then the free space tree cannot be trusted.
+ * btrfs-progs can also intentionally clear this bit to ask the kernel to
+ * rebuild the free space tree, however this might not work on older kernels
+ * that do not know about this bit. If not sure, clear the cache manually on
+ * first mount when booting older kernel versions.
+ */
+#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID (1ULL << 1)
+
+#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
+#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
+#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
+#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3)
+#define BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD (1ULL << 4)
+
+/*
+ * older kernels tried to do bigger metadata blocks, but the
+ * code was pretty buggy. Lets not let them try anymore.
+ */
+#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5)
+
+#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6)
+#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7)
+#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8)
+#define BTRFS_FEATURE_INCOMPAT_NO_HOLES (1ULL << 9)
+#define BTRFS_FEATURE_INCOMPAT_METADATA_UUID (1ULL << 10)
+#define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11)
+
+struct btrfs_ioctl_feature_flags {
+ __u64 compat_flags;
+ __u64 compat_ro_flags;
+ __u64 incompat_flags;
+};
+
+/* balance control ioctl modes */
+#define BTRFS_BALANCE_CTL_PAUSE 1
+#define BTRFS_BALANCE_CTL_CANCEL 2
+
+/*
+ * this is packed, because it should be exactly the same as its disk
+ * byte order counterpart (struct btrfs_disk_balance_args)
+ */
+struct btrfs_balance_args {
+ __u64 profiles;
+ union {
+ __u64 usage;
+ struct {
+ __u32 usage_min;
+ __u32 usage_max;
+ };
+ };
+ __u64 devid;
+ __u64 pstart;
+ __u64 pend;
+ __u64 vstart;
+ __u64 vend;
+
+ __u64 target;
+
+ __u64 flags;
+
+ /*
+ * BTRFS_BALANCE_ARGS_LIMIT with value 'limit'
+ * BTRFS_BALANCE_ARGS_LIMIT_RANGE - the extend version can use minimum
+ * and maximum
+ */
+ union {
+ __u64 limit; /* limit number of processed chunks */
+ struct {
+ __u32 limit_min;
+ __u32 limit_max;
+ };
+ };
+
+ /*
+ * Process chunks that cross stripes_min..stripes_max devices,
+ * BTRFS_BALANCE_ARGS_STRIPES_RANGE
+ */
+ __u32 stripes_min;
+ __u32 stripes_max;
+
+ __u64 unused[6];
+} __attribute__ ((__packed__));
+
+/* report balance progress to userspace */
+struct btrfs_balance_progress {
+ __u64 expected; /* estimated # of chunks that will be
+ * relocated to fulfill the request */
+ __u64 considered; /* # of chunks we have considered so far */
+ __u64 completed; /* # of chunks relocated so far */
+};
+
+/*
+ * flags definition for balance
+ *
+ * Restriper's general type filter
+ *
+ * Used by:
+ * btrfs_ioctl_balance_args.flags
+ * btrfs_balance_control.flags (internal)
+ */
+#define BTRFS_BALANCE_DATA (1ULL << 0)
+#define BTRFS_BALANCE_SYSTEM (1ULL << 1)
+#define BTRFS_BALANCE_METADATA (1ULL << 2)
+
+#define BTRFS_BALANCE_TYPE_MASK (BTRFS_BALANCE_DATA | \
+ BTRFS_BALANCE_SYSTEM | \
+ BTRFS_BALANCE_METADATA)
+
+#define BTRFS_BALANCE_FORCE (1ULL << 3)
+#define BTRFS_BALANCE_RESUME (1ULL << 4)
+
+/*
+ * flags definitions for per-type balance args
+ *
+ * Balance filters
+ *
+ * Used by:
+ * struct btrfs_balance_args
+ */
+#define BTRFS_BALANCE_ARGS_PROFILES (1ULL << 0)
+#define BTRFS_BALANCE_ARGS_USAGE (1ULL << 1)
+#define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2)
+#define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3)
+#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4)
+#define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5)
+#define BTRFS_BALANCE_ARGS_LIMIT_RANGE (1ULL << 6)
+#define BTRFS_BALANCE_ARGS_STRIPES_RANGE (1ULL << 7)
+#define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 10)
+
+#define BTRFS_BALANCE_ARGS_MASK \
+ (BTRFS_BALANCE_ARGS_PROFILES | \
+ BTRFS_BALANCE_ARGS_USAGE | \
+ BTRFS_BALANCE_ARGS_DEVID | \
+ BTRFS_BALANCE_ARGS_DRANGE | \
+ BTRFS_BALANCE_ARGS_VRANGE | \
+ BTRFS_BALANCE_ARGS_LIMIT | \
+ BTRFS_BALANCE_ARGS_LIMIT_RANGE | \
+ BTRFS_BALANCE_ARGS_STRIPES_RANGE | \
+ BTRFS_BALANCE_ARGS_USAGE_RANGE)
+
+/*
+ * Profile changing flags. When SOFT is set we won't relocate chunk if
+ * it already has the target profile (even though it may be
+ * half-filled).
+ */
+#define BTRFS_BALANCE_ARGS_CONVERT (1ULL << 8)
+#define BTRFS_BALANCE_ARGS_SOFT (1ULL << 9)
+
+
+/*
+ * flags definition for balance state
+ *
+ * Used by:
+ * struct btrfs_ioctl_balance_args.state
+ */
+#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0)
+#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1)
+#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2)
+
+struct btrfs_ioctl_balance_args {
+ __u64 flags; /* in/out */
+ __u64 state; /* out */
+
+ struct btrfs_balance_args data; /* in/out */
+ struct btrfs_balance_args meta; /* in/out */
+ struct btrfs_balance_args sys; /* in/out */
+
+ struct btrfs_balance_progress stat; /* out */
+
+ __u64 unused[72]; /* pad to 1k */
+};
+
+#define BTRFS_INO_LOOKUP_PATH_MAX 4080
+struct btrfs_ioctl_ino_lookup_args {
+ __u64 treeid;
+ __u64 objectid;
+ char name[BTRFS_INO_LOOKUP_PATH_MAX];
+};
+
+#define BTRFS_INO_LOOKUP_USER_PATH_MAX (4080 - BTRFS_VOL_NAME_MAX - 1)
+struct btrfs_ioctl_ino_lookup_user_args {
+ /* in, inode number containing the subvolume of 'subvolid' */
+ __u64 dirid;
+ /* in */
+ __u64 treeid;
+ /* out, name of the subvolume of 'treeid' */
+ char name[BTRFS_VOL_NAME_MAX + 1];
+ /*
+ * out, constructed path from the directory with which the ioctl is
+ * called to dirid
+ */
+ char path[BTRFS_INO_LOOKUP_USER_PATH_MAX];
+};
+
+/* Search criteria for the btrfs SEARCH ioctl family. */
+struct btrfs_ioctl_search_key {
+ /*
+ * The tree we're searching in. 1 is the tree of tree roots, 2 is the
+ * extent tree, etc...
+ *
+ * A special tree_id value of 0 will cause a search in the subvolume
+ * tree that the inode which is passed to the ioctl is part of.
+ */
+ __u64 tree_id; /* in */
+
+ /*
+ * When doing a tree search, we're actually taking a slice from a
+ * linear search space of 136-bit keys.
+ *
+ * A full 136-bit tree key is composed as:
+ * (objectid << 72) + (type << 64) + offset
+ *
+ * The individual min and max values for objectid, type and offset
+ * define the min_key and max_key values for the search range. All
+ * metadata items with a key in the interval [min_key, max_key] will be
+ * returned.
+ *
+ * Additionally, we can filter the items returned on transaction id of
+ * the metadata block they're stored in by specifying a transid range.
+ * Be aware that this transaction id only denotes when the metadata
+ * page that currently contains the item got written the last time as
+ * result of a COW operation. The number does not have any meaning
+ * related to the transaction in which an individual item that is being
+ * returned was created or changed.
+ */
+ __u64 min_objectid; /* in */
+ __u64 max_objectid; /* in */
+ __u64 min_offset; /* in */
+ __u64 max_offset; /* in */
+ __u64 min_transid; /* in */
+ __u64 max_transid; /* in */
+ __u32 min_type; /* in */
+ __u32 max_type; /* in */
+
+ /*
+ * input: The maximum amount of results desired.
+ * output: The actual amount of items returned, restricted by any of:
+ * - reaching the upper bound of the search range
+ * - reaching the input nr_items amount of items
+ * - completely filling the supplied memory buffer
+ */
+ __u32 nr_items; /* in/out */
+
+ /* align to 64 bits */
+ __u32 unused;
+
+ /* some extra for later */
+ __u64 unused1;
+ __u64 unused2;
+ __u64 unused3;
+ __u64 unused4;
+};
+
+struct btrfs_ioctl_search_header {
+ __u64 transid;
+ __u64 objectid;
+ __u64 offset;
+ __u32 type;
+ __u32 len;
+};
+
+#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key))
+/*
+ * the buf is an array of search headers where
+ * each header is followed by the actual item
+ * the type field is expanded to 32 bits for alignment
+ */
+struct btrfs_ioctl_search_args {
+ struct btrfs_ioctl_search_key key;
+ char buf[BTRFS_SEARCH_ARGS_BUFSIZE];
+};
+
+struct btrfs_ioctl_search_args_v2 {
+ struct btrfs_ioctl_search_key key; /* in/out - search parameters */
+ __u64 buf_size; /* in - size of buffer
+ * out - on EOVERFLOW: needed size
+ * to store item */
+ __u64 buf[0]; /* out - found items */
+};
+
+struct btrfs_ioctl_clone_range_args {
+ __s64 src_fd;
+ __u64 src_offset, src_length;
+ __u64 dest_offset;
+};
+
+/*
+ * flags definition for the defrag range ioctl
+ *
+ * Used by:
+ * struct btrfs_ioctl_defrag_range_args.flags
+ */
+#define BTRFS_DEFRAG_RANGE_COMPRESS 1
+#define BTRFS_DEFRAG_RANGE_START_IO 2
+struct btrfs_ioctl_defrag_range_args {
+ /* start of the defrag operation */
+ __u64 start;
+
+ /* number of bytes to defrag, use (u64)-1 to say all */
+ __u64 len;
+
+ /*
+ * flags for the operation, which can include turning
+ * on compression for this one defrag
+ */
+ __u64 flags;
+
+ /*
+ * any extent bigger than this will be considered
+ * already defragged. Use 0 to take the kernel default
+ * Use 1 to say every single extent must be rewritten
+ */
+ __u32 extent_thresh;
+
+ /*
+ * which compression method to use if turning on compression
+ * for this defrag operation. If unspecified, zlib will
+ * be used
+ */
+ __u32 compress_type;
+
+ /* spare for later */
+ __u32 unused[4];
+};
+
+
+#define BTRFS_SAME_DATA_DIFFERS 1
+/* For extent-same ioctl */
+struct btrfs_ioctl_same_extent_info {
+ __s64 fd; /* in - destination file */
+ __u64 logical_offset; /* in - start of extent in destination */
+ __u64 bytes_deduped; /* out - total # of bytes we were able
+ * to dedupe from this file */
+ /* status of this dedupe operation:
+ * 0 if dedup succeeds
+ * < 0 for error
+ * == BTRFS_SAME_DATA_DIFFERS if data differs
+ */
+ __s32 status; /* out - see above description */
+ __u32 reserved;
+};
+
+struct btrfs_ioctl_same_args {
+ __u64 logical_offset; /* in - start of extent in source */
+ __u64 length; /* in - length of extent */
+ __u16 dest_count; /* in - total elements in info array */
+ __u16 reserved1;
+ __u32 reserved2;
+ struct btrfs_ioctl_same_extent_info info[0];
+};
+
+struct btrfs_ioctl_space_info {
+ __u64 flags;
+ __u64 total_bytes;
+ __u64 used_bytes;
+};
+
+struct btrfs_ioctl_space_args {
+ __u64 space_slots;
+ __u64 total_spaces;
+ struct btrfs_ioctl_space_info spaces[0];
+};
+
+struct btrfs_data_container {
+ __u32 bytes_left; /* out -- bytes not needed to deliver output */
+ __u32 bytes_missing; /* out -- additional bytes needed for result */
+ __u32 elem_cnt; /* out */
+ __u32 elem_missed; /* out */
+ __u64 val[0]; /* out */
+};
+
+struct btrfs_ioctl_ino_path_args {
+ __u64 inum; /* in */
+ __u64 size; /* in */
+ __u64 reserved[4];
+ /* struct btrfs_data_container *fspath; out */
+ __u64 fspath; /* out */
+};
+
+struct btrfs_ioctl_logical_ino_args {
+ __u64 logical; /* in */
+ __u64 size; /* in */
+ __u64 reserved[3]; /* must be 0 for now */
+ __u64 flags; /* in, v2 only */
+ /* struct btrfs_data_container *inodes; out */
+ __u64 inodes;
+};
+/* Return every ref to the extent, not just those containing logical block.
+ * Requires logical == extent bytenr. */
+#define BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET (1ULL << 0)
+
+enum btrfs_dev_stat_values {
+ /* disk I/O failure stats */
+ BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */
+ BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */
+ BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */
+
+ /* stats for indirect indications for I/O failures */
+ BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or
+ * contents is illegal: this is an
+ * indication that the block was damaged
+ * during read or write, or written to
+ * wrong location or read from wrong
+ * location */
+ BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not
+ * been written */
+
+ BTRFS_DEV_STAT_VALUES_MAX
+};
+
+/* Reset statistics after reading; needs SYS_ADMIN capability */
+#define BTRFS_DEV_STATS_RESET (1ULL << 0)
+
+struct btrfs_ioctl_get_dev_stats {
+ __u64 devid; /* in */
+ __u64 nr_items; /* in/out */
+ __u64 flags; /* in/out */
+
+ /* out values: */
+ __u64 values[BTRFS_DEV_STAT_VALUES_MAX];
+
+ /*
+ * This pads the struct to 1032 bytes. It was originally meant to pad to
+ * 1024 bytes, but when adding the flags field, the padding calculation
+ * was not adjusted.
+ */
+ __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX];
+};
+
+#define BTRFS_QUOTA_CTL_ENABLE 1
+#define BTRFS_QUOTA_CTL_DISABLE 2
+#define BTRFS_QUOTA_CTL_RESCAN__NOTUSED 3
+struct btrfs_ioctl_quota_ctl_args {
+ __u64 cmd;
+ __u64 status;
+};
+
+struct btrfs_ioctl_quota_rescan_args {
+ __u64 flags;
+ __u64 progress;
+ __u64 reserved[6];
+};
+
+struct btrfs_ioctl_qgroup_assign_args {
+ __u64 assign;
+ __u64 src;
+ __u64 dst;
+};
+
+struct btrfs_ioctl_qgroup_create_args {
+ __u64 create;
+ __u64 qgroupid;
+};
+struct btrfs_ioctl_timespec {
+ __u64 sec;
+ __u32 nsec;
+};
+
+struct btrfs_ioctl_received_subvol_args {
+ char uuid[BTRFS_UUID_SIZE]; /* in */
+ __u64 stransid; /* in */
+ __u64 rtransid; /* out */
+ struct btrfs_ioctl_timespec stime; /* in */
+ struct btrfs_ioctl_timespec rtime; /* out */
+ __u64 flags; /* in */
+ __u64 reserved[16]; /* in */
+};
+
+/*
+ * Caller doesn't want file data in the send stream, even if the
+ * search of clone sources doesn't find an extent. UPDATE_EXTENT
+ * commands will be sent instead of WRITE commands.
+ */
+#define BTRFS_SEND_FLAG_NO_FILE_DATA 0x1
+
+/*
+ * Do not add the leading stream header. Used when multiple snapshots
+ * are sent back to back.
+ */
+#define BTRFS_SEND_FLAG_OMIT_STREAM_HEADER 0x2
+
+/*
+ * Omit the command at the end of the stream that indicated the end
+ * of the stream. This option is used when multiple snapshots are
+ * sent back to back.
+ */
+#define BTRFS_SEND_FLAG_OMIT_END_CMD 0x4
+
+#define BTRFS_SEND_FLAG_MASK \
+ (BTRFS_SEND_FLAG_NO_FILE_DATA | \
+ BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \
+ BTRFS_SEND_FLAG_OMIT_END_CMD)
+
+struct btrfs_ioctl_send_args {
+ __s64 send_fd; /* in */
+ __u64 clone_sources_count; /* in */
+ __u64 *clone_sources; /* in */
+ __u64 parent_root; /* in */
+ __u64 flags; /* in */
+ __u64 reserved[4]; /* in */
+};
+
+/*
+ * Information about a fs tree root.
+ *
+ * All items are filled by the ioctl
+ */
+struct btrfs_ioctl_get_subvol_info_args {
+ /* Id of this subvolume */
+ __u64 treeid;
+
+ /* Name of this subvolume, used to get the real name at mount point */
+ char name[BTRFS_VOL_NAME_MAX + 1];
+
+ /*
+ * Id of the subvolume which contains this subvolume.
+ * Zero for top-level subvolume or a deleted subvolume.
+ */
+ __u64 parent_id;
+
+ /*
+ * Inode number of the directory which contains this subvolume.
+ * Zero for top-level subvolume or a deleted subvolume
+ */
+ __u64 dirid;
+
+ /* Latest transaction id of this subvolume */
+ __u64 generation;
+
+ /* Flags of this subvolume */
+ __u64 flags;
+
+ /* UUID of this subvolume */
+ __u8 uuid[BTRFS_UUID_SIZE];
+
+ /*
+ * UUID of the subvolume of which this subvolume is a snapshot.
+ * All zero for a non-snapshot subvolume.
+ */
+ __u8 parent_uuid[BTRFS_UUID_SIZE];
+
+ /*
+ * UUID of the subvolume from which this subvolume was received.
+ * All zero for non-received subvolume.
+ */
+ __u8 received_uuid[BTRFS_UUID_SIZE];
+
+ /* Transaction id indicating when change/create/send/receive happened */
+ __u64 ctransid;
+ __u64 otransid;
+ __u64 stransid;
+ __u64 rtransid;
+ /* Time corresponding to c/o/s/rtransid */
+ struct btrfs_ioctl_timespec ctime;
+ struct btrfs_ioctl_timespec otime;
+ struct btrfs_ioctl_timespec stime;
+ struct btrfs_ioctl_timespec rtime;
+
+ /* Must be zero */
+ __u64 reserved[8];
+};
+
+#define BTRFS_MAX_ROOTREF_BUFFER_NUM 255
+struct btrfs_ioctl_get_subvol_rootref_args {
+ /* in/out, minimum id of rootref's treeid to be searched */
+ __u64 min_treeid;
+
+ /* out */
+ struct {
+ __u64 treeid;
+ __u64 dirid;
+ } rootref[BTRFS_MAX_ROOTREF_BUFFER_NUM];
+
+ /* out, number of found items */
+ __u8 num_items;
+ __u8 align[7];
+};
+
+/* Error codes as returned by the kernel */
+enum btrfs_err_code {
+ BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
+ BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
+ BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
+ BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
+ BTRFS_ERROR_DEV_TGT_REPLACE,
+ BTRFS_ERROR_DEV_MISSING_NOT_FOUND,
+ BTRFS_ERROR_DEV_ONLY_WRITABLE,
+ BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS,
+ BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET,
+ BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET,
+};
+
+#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_FORGET_DEV _IOW(BTRFS_IOCTL_MAGIC, 5, \
+ struct btrfs_ioctl_vol_args)
+/* trans start and trans end are dangerous, and only for
+ * use by applications that know how to avoid the
+ * resulting deadlocks
+ */
+#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6)
+#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7)
+#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8)
+
+#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int)
+#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \
+ struct btrfs_ioctl_vol_args)
+
+#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \
+ struct btrfs_ioctl_clone_range_args)
+
+#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \
+ struct btrfs_ioctl_defrag_range_args)
+#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \
+ struct btrfs_ioctl_search_args)
+#define BTRFS_IOC_TREE_SEARCH_V2 _IOWR(BTRFS_IOCTL_MAGIC, 17, \
+ struct btrfs_ioctl_search_args_v2)
+#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \
+ struct btrfs_ioctl_ino_lookup_args)
+#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, __u64)
+#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
+ struct btrfs_ioctl_space_args)
+#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
+#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
+#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
+ struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \
+ struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
+#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
+#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
+ struct btrfs_ioctl_scrub_args)
+#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28)
+#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \
+ struct btrfs_ioctl_scrub_args)
+#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \
+ struct btrfs_ioctl_dev_info_args)
+#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \
+ struct btrfs_ioctl_fs_info_args)
+#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \
+ struct btrfs_ioctl_balance_args)
+#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int)
+#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \
+ struct btrfs_ioctl_balance_args)
+#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \
+ struct btrfs_ioctl_ino_path_args)
+#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
+ struct btrfs_ioctl_logical_ino_args)
+#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \
+ struct btrfs_ioctl_received_subvol_args)
+#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args)
+#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \
+ struct btrfs_ioctl_quota_ctl_args)
+#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \
+ struct btrfs_ioctl_qgroup_assign_args)
+#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \
+ struct btrfs_ioctl_qgroup_create_args)
+#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
+ struct btrfs_ioctl_qgroup_limit_args)
+#define BTRFS_IOC_QUOTA_RESCAN _IOW(BTRFS_IOCTL_MAGIC, 44, \
+ struct btrfs_ioctl_quota_rescan_args)
+#define BTRFS_IOC_QUOTA_RESCAN_STATUS _IOR(BTRFS_IOCTL_MAGIC, 45, \
+ struct btrfs_ioctl_quota_rescan_args)
+#define BTRFS_IOC_QUOTA_RESCAN_WAIT _IO(BTRFS_IOCTL_MAGIC, 46)
+#define BTRFS_IOC_GET_FSLABEL FS_IOC_GETFSLABEL
+#define BTRFS_IOC_SET_FSLABEL FS_IOC_SETFSLABEL
+#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
+ struct btrfs_ioctl_get_dev_stats)
+#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \
+ struct btrfs_ioctl_dev_replace_args)
+#define BTRFS_IOC_FILE_EXTENT_SAME _IOWR(BTRFS_IOCTL_MAGIC, 54, \
+ struct btrfs_ioctl_same_args)
+#define BTRFS_IOC_GET_FEATURES _IOR(BTRFS_IOCTL_MAGIC, 57, \
+ struct btrfs_ioctl_feature_flags)
+#define BTRFS_IOC_SET_FEATURES _IOW(BTRFS_IOCTL_MAGIC, 57, \
+ struct btrfs_ioctl_feature_flags[2])
+#define BTRFS_IOC_GET_SUPPORTED_FEATURES _IOR(BTRFS_IOCTL_MAGIC, 57, \
+ struct btrfs_ioctl_feature_flags[3])
+#define BTRFS_IOC_RM_DEV_V2 _IOW(BTRFS_IOCTL_MAGIC, 58, \
+ struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_LOGICAL_INO_V2 _IOWR(BTRFS_IOCTL_MAGIC, 59, \
+ struct btrfs_ioctl_logical_ino_args)
+#define BTRFS_IOC_GET_SUBVOL_INFO _IOR(BTRFS_IOCTL_MAGIC, 60, \
+ struct btrfs_ioctl_get_subvol_info_args)
+#define BTRFS_IOC_GET_SUBVOL_ROOTREF _IOWR(BTRFS_IOCTL_MAGIC, 61, \
+ struct btrfs_ioctl_get_subvol_rootref_args)
+#define BTRFS_IOC_INO_LOOKUP_USER _IOWR(BTRFS_IOCTL_MAGIC, 62, \
+ struct btrfs_ioctl_ino_lookup_user_args)
+#define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \
+ struct btrfs_ioctl_vol_args_v2)
+
+#endif /* _UAPI_LINUX_BTRFS_H */
diff --git a/src/basic/linux/btrfs_tree.h b/src/basic/linux/btrfs_tree.h
new file mode 100644
index 0000000..9ba64ca
--- /dev/null
+++ b/src/basic/linux/btrfs_tree.h
@@ -0,0 +1,979 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _BTRFS_CTREE_H_
+#define _BTRFS_CTREE_H_
+
+#include <linux/btrfs.h>
+#include <linux/types.h>
+
+/*
+ * This header contains the structure definitions and constants used
+ * by file system objects that can be retrieved using
+ * the BTRFS_IOC_SEARCH_TREE ioctl. That means basically anything that
+ * is needed to describe a leaf node's key or item contents.
+ */
+
+/* holds pointers to all of the tree roots */
+#define BTRFS_ROOT_TREE_OBJECTID 1ULL
+
+/* stores information about which extents are in use, and reference counts */
+#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
+
+/*
+ * chunk tree stores translations from logical -> physical block numbering
+ * the super block points to the chunk tree
+ */
+#define BTRFS_CHUNK_TREE_OBJECTID 3ULL
+
+/*
+ * stores information about which areas of a given device are in use.
+ * one per device. The tree of tree roots points to the device tree
+ */
+#define BTRFS_DEV_TREE_OBJECTID 4ULL
+
+/* one per subvolume, storing files and directories */
+#define BTRFS_FS_TREE_OBJECTID 5ULL
+
+/* directory objectid inside the root tree */
+#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
+
+/* holds checksums of all the data extents */
+#define BTRFS_CSUM_TREE_OBJECTID 7ULL
+
+/* holds quota configuration and tracking */
+#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
+
+/* for storing items that use the BTRFS_UUID_KEY* types */
+#define BTRFS_UUID_TREE_OBJECTID 9ULL
+
+/* tracks free space in block groups. */
+#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
+
+/* device stats in the device tree */
+#define BTRFS_DEV_STATS_OBJECTID 0ULL
+
+/* for storing balance parameters in the root tree */
+#define BTRFS_BALANCE_OBJECTID -4ULL
+
+/* orhpan objectid for tracking unlinked/truncated files */
+#define BTRFS_ORPHAN_OBJECTID -5ULL
+
+/* does write ahead logging to speed up fsyncs */
+#define BTRFS_TREE_LOG_OBJECTID -6ULL
+#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL
+
+/* for space balancing */
+#define BTRFS_TREE_RELOC_OBJECTID -8ULL
+#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
+
+/*
+ * extent checksums all have this objectid
+ * this allows them to share the logging tree
+ * for fsyncs
+ */
+#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
+
+/* For storing free space cache */
+#define BTRFS_FREE_SPACE_OBJECTID -11ULL
+
+/*
+ * The inode number assigned to the special inode for storing
+ * free ino cache
+ */
+#define BTRFS_FREE_INO_OBJECTID -12ULL
+
+/* dummy objectid represents multiple objectids */
+#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
+
+/*
+ * All files have objectids in this range.
+ */
+#define BTRFS_FIRST_FREE_OBJECTID 256ULL
+#define BTRFS_LAST_FREE_OBJECTID -256ULL
+#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL
+
+
+/*
+ * the device items go into the chunk tree. The key is in the form
+ * [ 1 BTRFS_DEV_ITEM_KEY device_id ]
+ */
+#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
+
+#define BTRFS_BTREE_INODE_OBJECTID 1
+
+#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
+
+#define BTRFS_DEV_REPLACE_DEVID 0ULL
+
+/*
+ * inode items have the data typically returned from stat and store other
+ * info about object characteristics. There is one for every file and dir in
+ * the FS
+ */
+#define BTRFS_INODE_ITEM_KEY 1
+#define BTRFS_INODE_REF_KEY 12
+#define BTRFS_INODE_EXTREF_KEY 13
+#define BTRFS_XATTR_ITEM_KEY 24
+#define BTRFS_ORPHAN_ITEM_KEY 48
+/* reserve 2-15 close to the inode for later flexibility */
+
+/*
+ * dir items are the name -> inode pointers in a directory. There is one
+ * for every name in a directory.
+ */
+#define BTRFS_DIR_LOG_ITEM_KEY 60
+#define BTRFS_DIR_LOG_INDEX_KEY 72
+#define BTRFS_DIR_ITEM_KEY 84
+#define BTRFS_DIR_INDEX_KEY 96
+/*
+ * extent data is for file data
+ */
+#define BTRFS_EXTENT_DATA_KEY 108
+
+/*
+ * extent csums are stored in a separate tree and hold csums for
+ * an entire extent on disk.
+ */
+#define BTRFS_EXTENT_CSUM_KEY 128
+
+/*
+ * root items point to tree roots. They are typically in the root
+ * tree used by the super block to find all the other trees
+ */
+#define BTRFS_ROOT_ITEM_KEY 132
+
+/*
+ * root backrefs tie subvols and snapshots to the directory entries that
+ * reference them
+ */
+#define BTRFS_ROOT_BACKREF_KEY 144
+
+/*
+ * root refs make a fast index for listing all of the snapshots and
+ * subvolumes referenced by a given root. They point directly to the
+ * directory item in the root that references the subvol
+ */
+#define BTRFS_ROOT_REF_KEY 156
+
+/*
+ * extent items are in the extent map tree. These record which blocks
+ * are used, and how many references there are to each block
+ */
+#define BTRFS_EXTENT_ITEM_KEY 168
+
+/*
+ * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know
+ * the length, so we save the level in key->offset instead of the length.
+ */
+#define BTRFS_METADATA_ITEM_KEY 169
+
+#define BTRFS_TREE_BLOCK_REF_KEY 176
+
+#define BTRFS_EXTENT_DATA_REF_KEY 178
+
+#define BTRFS_EXTENT_REF_V0_KEY 180
+
+#define BTRFS_SHARED_BLOCK_REF_KEY 182
+
+#define BTRFS_SHARED_DATA_REF_KEY 184
+
+/*
+ * block groups give us hints into the extent allocation trees. Which
+ * blocks are free etc etc
+ */
+#define BTRFS_BLOCK_GROUP_ITEM_KEY 192
+
+/*
+ * Every block group is represented in the free space tree by a free space info
+ * item, which stores some accounting information. It is keyed on
+ * (block_group_start, FREE_SPACE_INFO, block_group_length).
+ */
+#define BTRFS_FREE_SPACE_INFO_KEY 198
+
+/*
+ * A free space extent tracks an extent of space that is free in a block group.
+ * It is keyed on (start, FREE_SPACE_EXTENT, length).
+ */
+#define BTRFS_FREE_SPACE_EXTENT_KEY 199
+
+/*
+ * When a block group becomes very fragmented, we convert it to use bitmaps
+ * instead of extents. A free space bitmap is keyed on
+ * (start, FREE_SPACE_BITMAP, length); the corresponding item is a bitmap with
+ * (length / sectorsize) bits.
+ */
+#define BTRFS_FREE_SPACE_BITMAP_KEY 200
+
+#define BTRFS_DEV_EXTENT_KEY 204
+#define BTRFS_DEV_ITEM_KEY 216
+#define BTRFS_CHUNK_ITEM_KEY 228
+
+/*
+ * Records the overall state of the qgroups.
+ * There's only one instance of this key present,
+ * (0, BTRFS_QGROUP_STATUS_KEY, 0)
+ */
+#define BTRFS_QGROUP_STATUS_KEY 240
+/*
+ * Records the currently used space of the qgroup.
+ * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid).
+ */
+#define BTRFS_QGROUP_INFO_KEY 242
+/*
+ * Contains the user configured limits for the qgroup.
+ * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid).
+ */
+#define BTRFS_QGROUP_LIMIT_KEY 244
+/*
+ * Records the child-parent relationship of qgroups. For
+ * each relation, 2 keys are present:
+ * (childid, BTRFS_QGROUP_RELATION_KEY, parentid)
+ * (parentid, BTRFS_QGROUP_RELATION_KEY, childid)
+ */
+#define BTRFS_QGROUP_RELATION_KEY 246
+
+/*
+ * Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY.
+ */
+#define BTRFS_BALANCE_ITEM_KEY 248
+
+/*
+ * The key type for tree items that are stored persistently, but do not need to
+ * exist for extended period of time. The items can exist in any tree.
+ *
+ * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data]
+ *
+ * Existing items:
+ *
+ * - balance status item
+ * (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0)
+ */
+#define BTRFS_TEMPORARY_ITEM_KEY 248
+
+/*
+ * Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY
+ */
+#define BTRFS_DEV_STATS_KEY 249
+
+/*
+ * The key type for tree items that are stored persistently and usually exist
+ * for a long period, eg. filesystem lifetime. The item kinds can be status
+ * information, stats or preference values. The item can exist in any tree.
+ *
+ * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data]
+ *
+ * Existing items:
+ *
+ * - device statistics, store IO stats in the device tree, one key for all
+ * stats
+ * (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0)
+ */
+#define BTRFS_PERSISTENT_ITEM_KEY 249
+
+/*
+ * Persistantly stores the device replace state in the device tree.
+ * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0).
+ */
+#define BTRFS_DEV_REPLACE_KEY 250
+
+/*
+ * Stores items that allow to quickly map UUIDs to something else.
+ * These items are part of the filesystem UUID tree.
+ * The key is built like this:
+ * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits).
+ */
+#if BTRFS_UUID_SIZE != 16
+#error "UUID items require BTRFS_UUID_SIZE == 16!"
+#endif
+#define BTRFS_UUID_KEY_SUBVOL 251 /* for UUIDs assigned to subvols */
+#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252 /* for UUIDs assigned to
+ * received subvols */
+
+/*
+ * string items are for debugging. They just store a short string of
+ * data in the FS
+ */
+#define BTRFS_STRING_ITEM_KEY 253
+
+
+
+/* 32 bytes in various csum fields */
+#define BTRFS_CSUM_SIZE 32
+
+/* csum types */
+enum btrfs_csum_type {
+ BTRFS_CSUM_TYPE_CRC32 = 0,
+ BTRFS_CSUM_TYPE_XXHASH = 1,
+ BTRFS_CSUM_TYPE_SHA256 = 2,
+ BTRFS_CSUM_TYPE_BLAKE2 = 3,
+};
+
+/*
+ * flags definitions for directory entry item type
+ *
+ * Used by:
+ * struct btrfs_dir_item.type
+ *
+ * Values 0..7 must match common file type values in fs_types.h.
+ */
+#define BTRFS_FT_UNKNOWN 0
+#define BTRFS_FT_REG_FILE 1
+#define BTRFS_FT_DIR 2
+#define BTRFS_FT_CHRDEV 3
+#define BTRFS_FT_BLKDEV 4
+#define BTRFS_FT_FIFO 5
+#define BTRFS_FT_SOCK 6
+#define BTRFS_FT_SYMLINK 7
+#define BTRFS_FT_XATTR 8
+#define BTRFS_FT_MAX 9
+
+/*
+ * The key defines the order in the tree, and so it also defines (optimal)
+ * block layout.
+ *
+ * objectid corresponds to the inode number.
+ *
+ * type tells us things about the object, and is a kind of stream selector.
+ * so for a given inode, keys with type of 1 might refer to the inode data,
+ * type of 2 may point to file data in the btree and type == 3 may point to
+ * extents.
+ *
+ * offset is the starting byte offset for this key in the stream.
+ *
+ * btrfs_disk_key is in disk byte order. struct btrfs_key is always
+ * in cpu native order. Otherwise they are identical and their sizes
+ * should be the same (ie both packed)
+ */
+struct btrfs_disk_key {
+ __le64 objectid;
+ __u8 type;
+ __le64 offset;
+} __attribute__ ((__packed__));
+
+struct btrfs_key {
+ __u64 objectid;
+ __u8 type;
+ __u64 offset;
+} __attribute__ ((__packed__));
+
+struct btrfs_dev_item {
+ /* the internal btrfs device id */
+ __le64 devid;
+
+ /* size of the device */
+ __le64 total_bytes;
+
+ /* bytes used */
+ __le64 bytes_used;
+
+ /* optimal io alignment for this device */
+ __le32 io_align;
+
+ /* optimal io width for this device */
+ __le32 io_width;
+
+ /* minimal io size for this device */
+ __le32 sector_size;
+
+ /* type and info about this device */
+ __le64 type;
+
+ /* expected generation for this device */
+ __le64 generation;
+
+ /*
+ * starting byte of this partition on the device,
+ * to allow for stripe alignment in the future
+ */
+ __le64 start_offset;
+
+ /* grouping information for allocation decisions */
+ __le32 dev_group;
+
+ /* seek speed 0-100 where 100 is fastest */
+ __u8 seek_speed;
+
+ /* bandwidth 0-100 where 100 is fastest */
+ __u8 bandwidth;
+
+ /* btrfs generated uuid for this device */
+ __u8 uuid[BTRFS_UUID_SIZE];
+
+ /* uuid of FS who owns this device */
+ __u8 fsid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_stripe {
+ __le64 devid;
+ __le64 offset;
+ __u8 dev_uuid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_chunk {
+ /* size of this chunk in bytes */
+ __le64 length;
+
+ /* objectid of the root referencing this chunk */
+ __le64 owner;
+
+ __le64 stripe_len;
+ __le64 type;
+
+ /* optimal io alignment for this chunk */
+ __le32 io_align;
+
+ /* optimal io width for this chunk */
+ __le32 io_width;
+
+ /* minimal io size for this chunk */
+ __le32 sector_size;
+
+ /* 2^16 stripes is quite a lot, a second limit is the size of a single
+ * item in the btree
+ */
+ __le16 num_stripes;
+
+ /* sub stripes only matter for raid10 */
+ __le16 sub_stripes;
+ struct btrfs_stripe stripe;
+ /* additional stripes go here */
+} __attribute__ ((__packed__));
+
+#define BTRFS_FREE_SPACE_EXTENT 1
+#define BTRFS_FREE_SPACE_BITMAP 2
+
+struct btrfs_free_space_entry {
+ __le64 offset;
+ __le64 bytes;
+ __u8 type;
+} __attribute__ ((__packed__));
+
+struct btrfs_free_space_header {
+ struct btrfs_disk_key location;
+ __le64 generation;
+ __le64 num_entries;
+ __le64 num_bitmaps;
+} __attribute__ ((__packed__));
+
+#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
+#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
+
+/* Super block flags */
+/* Errors detected */
+#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2)
+
+#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
+#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33)
+#define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34)
+#define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35)
+#define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36)
+
+
+/*
+ * items in the extent btree are used to record the objectid of the
+ * owner of the block and the number of references
+ */
+
+struct btrfs_extent_item {
+ __le64 refs;
+ __le64 generation;
+ __le64 flags;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_item_v0 {
+ __le32 refs;
+} __attribute__ ((__packed__));
+
+
+#define BTRFS_EXTENT_FLAG_DATA (1ULL << 0)
+#define BTRFS_EXTENT_FLAG_TREE_BLOCK (1ULL << 1)
+
+/* following flags only apply to tree blocks */
+
+/* use full backrefs for extent pointers in the block */
+#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8)
+
+/*
+ * this flag is only used internally by scrub and may be changed at any time
+ * it is only declared here to avoid collisions
+ */
+#define BTRFS_EXTENT_FLAG_SUPER (1ULL << 48)
+
+struct btrfs_tree_block_info {
+ struct btrfs_disk_key key;
+ __u8 level;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_data_ref {
+ __le64 root;
+ __le64 objectid;
+ __le64 offset;
+ __le32 count;
+} __attribute__ ((__packed__));
+
+struct btrfs_shared_data_ref {
+ __le32 count;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_inline_ref {
+ __u8 type;
+ __le64 offset;
+} __attribute__ ((__packed__));
+
+/* dev extents record free space on individual devices. The owner
+ * field points back to the chunk allocation mapping tree that allocated
+ * the extent. The chunk tree uuid field is a way to double check the owner
+ */
+struct btrfs_dev_extent {
+ __le64 chunk_tree;
+ __le64 chunk_objectid;
+ __le64 chunk_offset;
+ __le64 length;
+ __u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_inode_ref {
+ __le64 index;
+ __le16 name_len;
+ /* name goes here */
+} __attribute__ ((__packed__));
+
+struct btrfs_inode_extref {
+ __le64 parent_objectid;
+ __le64 index;
+ __le16 name_len;
+ __u8 name[0];
+ /* name goes here */
+} __attribute__ ((__packed__));
+
+struct btrfs_timespec {
+ __le64 sec;
+ __le32 nsec;
+} __attribute__ ((__packed__));
+
+struct btrfs_inode_item {
+ /* nfs style generation number */
+ __le64 generation;
+ /* transid that last touched this inode */
+ __le64 transid;
+ __le64 size;
+ __le64 nbytes;
+ __le64 block_group;
+ __le32 nlink;
+ __le32 uid;
+ __le32 gid;
+ __le32 mode;
+ __le64 rdev;
+ __le64 flags;
+
+ /* modification sequence number for NFS */
+ __le64 sequence;
+
+ /*
+ * a little future expansion, for more than this we can
+ * just grow the inode item and version it
+ */
+ __le64 reserved[4];
+ struct btrfs_timespec atime;
+ struct btrfs_timespec ctime;
+ struct btrfs_timespec mtime;
+ struct btrfs_timespec otime;
+} __attribute__ ((__packed__));
+
+struct btrfs_dir_log_item {
+ __le64 end;
+} __attribute__ ((__packed__));
+
+struct btrfs_dir_item {
+ struct btrfs_disk_key location;
+ __le64 transid;
+ __le16 data_len;
+ __le16 name_len;
+ __u8 type;
+} __attribute__ ((__packed__));
+
+#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0)
+
+/*
+ * Internal in-memory flag that a subvolume has been marked for deletion but
+ * still visible as a directory
+ */
+#define BTRFS_ROOT_SUBVOL_DEAD (1ULL << 48)
+
+struct btrfs_root_item {
+ struct btrfs_inode_item inode;
+ __le64 generation;
+ __le64 root_dirid;
+ __le64 bytenr;
+ __le64 byte_limit;
+ __le64 bytes_used;
+ __le64 last_snapshot;
+ __le64 flags;
+ __le32 refs;
+ struct btrfs_disk_key drop_progress;
+ __u8 drop_level;
+ __u8 level;
+
+ /*
+ * The following fields appear after subvol_uuids+subvol_times
+ * were introduced.
+ */
+
+ /*
+ * This generation number is used to test if the new fields are valid
+ * and up to date while reading the root item. Every time the root item
+ * is written out, the "generation" field is copied into this field. If
+ * anyone ever mounted the fs with an older kernel, we will have
+ * mismatching generation values here and thus must invalidate the
+ * new fields. See btrfs_update_root and btrfs_find_last_root for
+ * details.
+ * the offset of generation_v2 is also used as the start for the memset
+ * when invalidating the fields.
+ */
+ __le64 generation_v2;
+ __u8 uuid[BTRFS_UUID_SIZE];
+ __u8 parent_uuid[BTRFS_UUID_SIZE];
+ __u8 received_uuid[BTRFS_UUID_SIZE];
+ __le64 ctransid; /* updated when an inode changes */
+ __le64 otransid; /* trans when created */
+ __le64 stransid; /* trans when sent. non-zero for received subvol */
+ __le64 rtransid; /* trans when received. non-zero for received subvol */
+ struct btrfs_timespec ctime;
+ struct btrfs_timespec otime;
+ struct btrfs_timespec stime;
+ struct btrfs_timespec rtime;
+ __le64 reserved[8]; /* for future */
+} __attribute__ ((__packed__));
+
+/*
+ * this is used for both forward and backward root refs
+ */
+struct btrfs_root_ref {
+ __le64 dirid;
+ __le64 sequence;
+ __le16 name_len;
+} __attribute__ ((__packed__));
+
+struct btrfs_disk_balance_args {
+ /*
+ * profiles to operate on, single is denoted by
+ * BTRFS_AVAIL_ALLOC_BIT_SINGLE
+ */
+ __le64 profiles;
+
+ /*
+ * usage filter
+ * BTRFS_BALANCE_ARGS_USAGE with a single value means '0..N'
+ * BTRFS_BALANCE_ARGS_USAGE_RANGE - range syntax, min..max
+ */
+ union {
+ __le64 usage;
+ struct {
+ __le32 usage_min;
+ __le32 usage_max;
+ };
+ };
+
+ /* devid filter */
+ __le64 devid;
+
+ /* devid subset filter [pstart..pend) */
+ __le64 pstart;
+ __le64 pend;
+
+ /* btrfs virtual address space subset filter [vstart..vend) */
+ __le64 vstart;
+ __le64 vend;
+
+ /*
+ * profile to convert to, single is denoted by
+ * BTRFS_AVAIL_ALLOC_BIT_SINGLE
+ */
+ __le64 target;
+
+ /* BTRFS_BALANCE_ARGS_* */
+ __le64 flags;
+
+ /*
+ * BTRFS_BALANCE_ARGS_LIMIT with value 'limit'
+ * BTRFS_BALANCE_ARGS_LIMIT_RANGE - the extend version can use minimum
+ * and maximum
+ */
+ union {
+ __le64 limit;
+ struct {
+ __le32 limit_min;
+ __le32 limit_max;
+ };
+ };
+
+ /*
+ * Process chunks that cross stripes_min..stripes_max devices,
+ * BTRFS_BALANCE_ARGS_STRIPES_RANGE
+ */
+ __le32 stripes_min;
+ __le32 stripes_max;
+
+ __le64 unused[6];
+} __attribute__ ((__packed__));
+
+/*
+ * store balance parameters to disk so that balance can be properly
+ * resumed after crash or unmount
+ */
+struct btrfs_balance_item {
+ /* BTRFS_BALANCE_* */
+ __le64 flags;
+
+ struct btrfs_disk_balance_args data;
+ struct btrfs_disk_balance_args meta;
+ struct btrfs_disk_balance_args sys;
+
+ __le64 unused[4];
+} __attribute__ ((__packed__));
+
+enum {
+ BTRFS_FILE_EXTENT_INLINE = 0,
+ BTRFS_FILE_EXTENT_REG = 1,
+ BTRFS_FILE_EXTENT_PREALLOC = 2,
+ BTRFS_NR_FILE_EXTENT_TYPES = 3,
+};
+
+struct btrfs_file_extent_item {
+ /*
+ * transaction id that created this extent
+ */
+ __le64 generation;
+ /*
+ * max number of bytes to hold this extent in ram
+ * when we split a compressed extent we can't know how big
+ * each of the resulting pieces will be. So, this is
+ * an upper limit on the size of the extent in ram instead of
+ * an exact limit.
+ */
+ __le64 ram_bytes;
+
+ /*
+ * 32 bits for the various ways we might encode the data,
+ * including compression and encryption. If any of these
+ * are set to something a given disk format doesn't understand
+ * it is treated like an incompat flag for reading and writing,
+ * but not for stat.
+ */
+ __u8 compression;
+ __u8 encryption;
+ __le16 other_encoding; /* spare for later use */
+
+ /* are we inline data or a real extent? */
+ __u8 type;
+
+ /*
+ * disk space consumed by the extent, checksum blocks are included
+ * in these numbers
+ *
+ * At this offset in the structure, the inline extent data start.
+ */
+ __le64 disk_bytenr;
+ __le64 disk_num_bytes;
+ /*
+ * the logical offset in file blocks (no csums)
+ * this extent record is for. This allows a file extent to point
+ * into the middle of an existing extent on disk, sharing it
+ * between two snapshots (useful if some bytes in the middle of the
+ * extent have changed
+ */
+ __le64 offset;
+ /*
+ * the logical number of file blocks (no csums included). This
+ * always reflects the size uncompressed and without encoding.
+ */
+ __le64 num_bytes;
+
+} __attribute__ ((__packed__));
+
+struct btrfs_csum_item {
+ __u8 csum;
+} __attribute__ ((__packed__));
+
+struct btrfs_dev_stats_item {
+ /*
+ * grow this item struct at the end for future enhancements and keep
+ * the existing values unchanged
+ */
+ __le64 values[BTRFS_DEV_STAT_VALUES_MAX];
+} __attribute__ ((__packed__));
+
+#define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0
+#define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID 1
+
+struct btrfs_dev_replace_item {
+ /*
+ * grow this item struct at the end for future enhancements and keep
+ * the existing values unchanged
+ */
+ __le64 src_devid;
+ __le64 cursor_left;
+ __le64 cursor_right;
+ __le64 cont_reading_from_srcdev_mode;
+
+ __le64 replace_state;
+ __le64 time_started;
+ __le64 time_stopped;
+ __le64 num_write_errors;
+ __le64 num_uncorrectable_read_errors;
+} __attribute__ ((__packed__));
+
+/* different types of block groups (and chunks) */
+#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0)
+#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1)
+#define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2)
+#define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3)
+#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4)
+#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
+#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
+#define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7)
+#define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8)
+#define BTRFS_BLOCK_GROUP_RAID1C3 (1ULL << 9)
+#define BTRFS_BLOCK_GROUP_RAID1C4 (1ULL << 10)
+#define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
+ BTRFS_SPACE_INFO_GLOBAL_RSV)
+
+enum btrfs_raid_types {
+ BTRFS_RAID_RAID10,
+ BTRFS_RAID_RAID1,
+ BTRFS_RAID_DUP,
+ BTRFS_RAID_RAID0,
+ BTRFS_RAID_SINGLE,
+ BTRFS_RAID_RAID5,
+ BTRFS_RAID_RAID6,
+ BTRFS_RAID_RAID1C3,
+ BTRFS_RAID_RAID1C4,
+ BTRFS_NR_RAID_TYPES
+};
+
+#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \
+ BTRFS_BLOCK_GROUP_SYSTEM | \
+ BTRFS_BLOCK_GROUP_METADATA)
+
+#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
+ BTRFS_BLOCK_GROUP_RAID1 | \
+ BTRFS_BLOCK_GROUP_RAID1C3 | \
+ BTRFS_BLOCK_GROUP_RAID1C4 | \
+ BTRFS_BLOCK_GROUP_RAID5 | \
+ BTRFS_BLOCK_GROUP_RAID6 | \
+ BTRFS_BLOCK_GROUP_DUP | \
+ BTRFS_BLOCK_GROUP_RAID10)
+#define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \
+ BTRFS_BLOCK_GROUP_RAID6)
+
+#define BTRFS_BLOCK_GROUP_RAID1_MASK (BTRFS_BLOCK_GROUP_RAID1 | \
+ BTRFS_BLOCK_GROUP_RAID1C3 | \
+ BTRFS_BLOCK_GROUP_RAID1C4)
+
+/*
+ * We need a bit for restriper to be able to tell when chunks of type
+ * SINGLE are available. This "extended" profile format is used in
+ * fs_info->avail_*_alloc_bits (in-memory) and balance item fields
+ * (on-disk). The corresponding on-disk bit in chunk.type is reserved
+ * to avoid remappings between two formats in future.
+ */
+#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48)
+
+/*
+ * A fake block group type that is used to communicate global block reserve
+ * size to userspace via the SPACE_INFO ioctl.
+ */
+#define BTRFS_SPACE_INFO_GLOBAL_RSV (1ULL << 49)
+
+#define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \
+ BTRFS_AVAIL_ALLOC_BIT_SINGLE)
+
+static inline __u64 chunk_to_extended(__u64 flags)
+{
+ if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)
+ flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+
+ return flags;
+}
+static inline __u64 extended_to_chunk(__u64 flags)
+{
+ return flags & ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+}
+
+struct btrfs_block_group_item {
+ __le64 used;
+ __le64 chunk_objectid;
+ __le64 flags;
+} __attribute__ ((__packed__));
+
+struct btrfs_free_space_info {
+ __le32 extent_count;
+ __le32 flags;
+} __attribute__ ((__packed__));
+
+#define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0)
+
+#define BTRFS_QGROUP_LEVEL_SHIFT 48
+static inline __u16 btrfs_qgroup_level(__u64 qgroupid)
+{
+ return (__u16)(qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT);
+}
+
+/*
+ * is subvolume quota turned on?
+ */
+#define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0)
+/*
+ * RESCAN is set during the initialization phase
+ */
+#define BTRFS_QGROUP_STATUS_FLAG_RESCAN (1ULL << 1)
+/*
+ * Some qgroup entries are known to be out of date,
+ * either because the configuration has changed in a way that
+ * makes a rescan necessary, or because the fs has been mounted
+ * with a non-qgroup-aware version.
+ * Turning qouta off and on again makes it inconsistent, too.
+ */
+#define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2)
+
+#define BTRFS_QGROUP_STATUS_VERSION 1
+
+struct btrfs_qgroup_status_item {
+ __le64 version;
+ /*
+ * the generation is updated during every commit. As older
+ * versions of btrfs are not aware of qgroups, it will be
+ * possible to detect inconsistencies by checking the
+ * generation on mount time
+ */
+ __le64 generation;
+
+ /* flag definitions see above */
+ __le64 flags;
+
+ /*
+ * only used during scanning to record the progress
+ * of the scan. It contains a logical address
+ */
+ __le64 rescan;
+} __attribute__ ((__packed__));
+
+struct btrfs_qgroup_info_item {
+ __le64 generation;
+ __le64 rfer;
+ __le64 rfer_cmpr;
+ __le64 excl;
+ __le64 excl_cmpr;
+} __attribute__ ((__packed__));
+
+struct btrfs_qgroup_limit_item {
+ /*
+ * only updated when any of the other values change
+ */
+ __le64 flags;
+ __le64 max_rfer;
+ __le64 max_excl;
+ __le64 rsv_rfer;
+ __le64 rsv_excl;
+} __attribute__ ((__packed__));
+
+#endif /* _BTRFS_CTREE_H_ */
diff --git a/src/basic/linux/can/netlink.h b/src/basic/linux/can/netlink.h
new file mode 100644
index 0000000..6f598b7
--- /dev/null
+++ b/src/basic/linux/can/netlink.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * linux/can/netlink.h
+ *
+ * Definitions for the CAN netlink interface
+ *
+ * Copyright (c) 2009 Wolfgang Grandegger <wg@grandegger.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the version 2 of the GNU General Public License
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _UAPI_CAN_NETLINK_H
+#define _UAPI_CAN_NETLINK_H
+
+#include <linux/types.h>
+
+/*
+ * CAN bit-timing parameters
+ *
+ * For further information, please read chapter "8 BIT TIMING
+ * REQUIREMENTS" of the "Bosch CAN Specification version 2.0"
+ * at http://www.semiconductors.bosch.de/pdf/can2spec.pdf.
+ */
+struct can_bittiming {
+ __u32 bitrate; /* Bit-rate in bits/second */
+ __u32 sample_point; /* Sample point in one-tenth of a percent */
+ __u32 tq; /* Time quanta (TQ) in nanoseconds */
+ __u32 prop_seg; /* Propagation segment in TQs */
+ __u32 phase_seg1; /* Phase buffer segment 1 in TQs */
+ __u32 phase_seg2; /* Phase buffer segment 2 in TQs */
+ __u32 sjw; /* Synchronisation jump width in TQs */
+ __u32 brp; /* Bit-rate prescaler */
+};
+
+/*
+ * CAN hardware-dependent bit-timing constant
+ *
+ * Used for calculating and checking bit-timing parameters
+ */
+struct can_bittiming_const {
+ char name[16]; /* Name of the CAN controller hardware */
+ __u32 tseg1_min; /* Time segment 1 = prop_seg + phase_seg1 */
+ __u32 tseg1_max;
+ __u32 tseg2_min; /* Time segment 2 = phase_seg2 */
+ __u32 tseg2_max;
+ __u32 sjw_max; /* Synchronisation jump width */
+ __u32 brp_min; /* Bit-rate prescaler */
+ __u32 brp_max;
+ __u32 brp_inc;
+};
+
+/*
+ * CAN clock parameters
+ */
+struct can_clock {
+ __u32 freq; /* CAN system clock frequency in Hz */
+};
+
+/*
+ * CAN operational and error states
+ */
+enum can_state {
+ CAN_STATE_ERROR_ACTIVE = 0, /* RX/TX error count < 96 */
+ CAN_STATE_ERROR_WARNING, /* RX/TX error count < 128 */
+ CAN_STATE_ERROR_PASSIVE, /* RX/TX error count < 256 */
+ CAN_STATE_BUS_OFF, /* RX/TX error count >= 256 */
+ CAN_STATE_STOPPED, /* Device is stopped */
+ CAN_STATE_SLEEPING, /* Device is sleeping */
+ CAN_STATE_MAX
+};
+
+/*
+ * CAN bus error counters
+ */
+struct can_berr_counter {
+ __u16 txerr;
+ __u16 rxerr;
+};
+
+/*
+ * CAN controller mode
+ */
+struct can_ctrlmode {
+ __u32 mask;
+ __u32 flags;
+};
+
+#define CAN_CTRLMODE_LOOPBACK 0x01 /* Loopback mode */
+#define CAN_CTRLMODE_LISTENONLY 0x02 /* Listen-only mode */
+#define CAN_CTRLMODE_3_SAMPLES 0x04 /* Triple sampling mode */
+#define CAN_CTRLMODE_ONE_SHOT 0x08 /* One-Shot mode */
+#define CAN_CTRLMODE_BERR_REPORTING 0x10 /* Bus-error reporting */
+#define CAN_CTRLMODE_FD 0x20 /* CAN FD mode */
+#define CAN_CTRLMODE_PRESUME_ACK 0x40 /* Ignore missing CAN ACKs */
+#define CAN_CTRLMODE_FD_NON_ISO 0x80 /* CAN FD in non-ISO mode */
+
+/*
+ * CAN device statistics
+ */
+struct can_device_stats {
+ __u32 bus_error; /* Bus errors */
+ __u32 error_warning; /* Changes to error warning state */
+ __u32 error_passive; /* Changes to error passive state */
+ __u32 bus_off; /* Changes to bus off state */
+ __u32 arbitration_lost; /* Arbitration lost errors */
+ __u32 restarts; /* CAN controller re-starts */
+};
+
+/*
+ * CAN netlink interface
+ */
+enum {
+ IFLA_CAN_UNSPEC,
+ IFLA_CAN_BITTIMING,
+ IFLA_CAN_BITTIMING_CONST,
+ IFLA_CAN_CLOCK,
+ IFLA_CAN_STATE,
+ IFLA_CAN_CTRLMODE,
+ IFLA_CAN_RESTART_MS,
+ IFLA_CAN_RESTART,
+ IFLA_CAN_BERR_COUNTER,
+ IFLA_CAN_DATA_BITTIMING,
+ IFLA_CAN_DATA_BITTIMING_CONST,
+ IFLA_CAN_TERMINATION,
+ IFLA_CAN_TERMINATION_CONST,
+ IFLA_CAN_BITRATE_CONST,
+ IFLA_CAN_DATA_BITRATE_CONST,
+ IFLA_CAN_BITRATE_MAX,
+ __IFLA_CAN_MAX
+};
+
+#define IFLA_CAN_MAX (__IFLA_CAN_MAX - 1)
+
+/* u16 termination range: 1..65535 Ohms */
+#define CAN_TERMINATION_DISABLED 0
+
+#endif /* !_UAPI_CAN_NETLINK_H */
diff --git a/src/basic/linux/can/vxcan.h b/src/basic/linux/can/vxcan.h
new file mode 100644
index 0000000..4fa9d87
--- /dev/null
+++ b/src/basic/linux/can/vxcan.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+#ifndef _UAPI_CAN_VXCAN_H
+#define _UAPI_CAN_VXCAN_H
+
+enum {
+ VXCAN_INFO_UNSPEC,
+ VXCAN_INFO_PEER,
+
+ __VXCAN_INFO_MAX
+#define VXCAN_INFO_MAX (__VXCAN_INFO_MAX - 1)
+};
+
+#endif
diff --git a/src/basic/linux/fib_rules.h b/src/basic/linux/fib_rules.h
new file mode 100644
index 0000000..232df14
--- /dev/null
+++ b/src/basic/linux/fib_rules.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_FIB_RULES_H
+#define __LINUX_FIB_RULES_H
+
+#include <linux/types.h>
+#include <linux/rtnetlink.h>
+
+/* rule is permanent, and cannot be deleted */
+#define FIB_RULE_PERMANENT 0x00000001
+#define FIB_RULE_INVERT 0x00000002
+#define FIB_RULE_UNRESOLVED 0x00000004
+#define FIB_RULE_IIF_DETACHED 0x00000008
+#define FIB_RULE_DEV_DETACHED FIB_RULE_IIF_DETACHED
+#define FIB_RULE_OIF_DETACHED 0x00000010
+
+/* try to find source address in routing lookups */
+#define FIB_RULE_FIND_SADDR 0x00010000
+
+struct fib_rule_hdr {
+ __u8 family;
+ __u8 dst_len;
+ __u8 src_len;
+ __u8 tos;
+
+ __u8 table;
+ __u8 res1; /* reserved */
+ __u8 res2; /* reserved */
+ __u8 action;
+
+ __u32 flags;
+};
+
+struct fib_rule_uid_range {
+ __u32 start;
+ __u32 end;
+};
+
+struct fib_rule_port_range {
+ __u16 start;
+ __u16 end;
+};
+
+enum {
+ FRA_UNSPEC,
+ FRA_DST, /* destination address */
+ FRA_SRC, /* source address */
+ FRA_IIFNAME, /* interface name */
+#define FRA_IFNAME FRA_IIFNAME
+ FRA_GOTO, /* target to jump to (FR_ACT_GOTO) */
+ FRA_UNUSED2,
+ FRA_PRIORITY, /* priority/preference */
+ FRA_UNUSED3,
+ FRA_UNUSED4,
+ FRA_UNUSED5,
+ FRA_FWMARK, /* mark */
+ FRA_FLOW, /* flow/class id */
+ FRA_TUN_ID,
+ FRA_SUPPRESS_IFGROUP,
+ FRA_SUPPRESS_PREFIXLEN,
+ FRA_TABLE, /* Extended table id */
+ FRA_FWMASK, /* mask for netfilter mark */
+ FRA_OIFNAME,
+ FRA_PAD,
+ FRA_L3MDEV, /* iif or oif is l3mdev goto its table */
+ FRA_UID_RANGE, /* UID range */
+ FRA_PROTOCOL, /* Originator of the rule */
+ FRA_IP_PROTO, /* ip proto */
+ FRA_SPORT_RANGE, /* sport */
+ FRA_DPORT_RANGE, /* dport */
+ __FRA_MAX
+};
+
+#define FRA_MAX (__FRA_MAX - 1)
+
+enum {
+ FR_ACT_UNSPEC,
+ FR_ACT_TO_TBL, /* Pass to fixed table */
+ FR_ACT_GOTO, /* Jump to another rule */
+ FR_ACT_NOP, /* No operation */
+ FR_ACT_RES3,
+ FR_ACT_RES4,
+ FR_ACT_BLACKHOLE, /* Drop without notification */
+ FR_ACT_UNREACHABLE, /* Drop with ENETUNREACH */
+ FR_ACT_PROHIBIT, /* Drop with EACCES */
+ __FR_ACT_MAX,
+};
+
+#define FR_ACT_MAX (__FR_ACT_MAX - 1)
+
+#endif
diff --git a/src/basic/linux/fou.h b/src/basic/linux/fou.h
new file mode 100644
index 0000000..87c2c9f
--- /dev/null
+++ b/src/basic/linux/fou.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* fou.h - FOU Interface */
+
+#ifndef _UAPI_LINUX_FOU_H
+#define _UAPI_LINUX_FOU_H
+
+/* NETLINK_GENERIC related info
+ */
+#define FOU_GENL_NAME "fou"
+#define FOU_GENL_VERSION 0x1
+
+enum {
+ FOU_ATTR_UNSPEC,
+ FOU_ATTR_PORT, /* u16 */
+ FOU_ATTR_AF, /* u8 */
+ FOU_ATTR_IPPROTO, /* u8 */
+ FOU_ATTR_TYPE, /* u8 */
+ FOU_ATTR_REMCSUM_NOPARTIAL, /* flag */
+ FOU_ATTR_LOCAL_V4, /* u32 */
+ FOU_ATTR_LOCAL_V6, /* in6_addr */
+ FOU_ATTR_PEER_V4, /* u32 */
+ FOU_ATTR_PEER_V6, /* in6_addr */
+ FOU_ATTR_PEER_PORT, /* u16 */
+ FOU_ATTR_IFINDEX, /* s32 */
+
+ __FOU_ATTR_MAX,
+};
+
+#define FOU_ATTR_MAX (__FOU_ATTR_MAX - 1)
+
+enum {
+ FOU_CMD_UNSPEC,
+ FOU_CMD_ADD,
+ FOU_CMD_DEL,
+ FOU_CMD_GET,
+
+ __FOU_CMD_MAX,
+};
+
+enum {
+ FOU_ENCAP_UNSPEC,
+ FOU_ENCAP_DIRECT,
+ FOU_ENCAP_GUE,
+};
+
+#define FOU_CMD_MAX (__FOU_CMD_MAX - 1)
+
+#endif /* _UAPI_LINUX_FOU_H */
diff --git a/src/basic/linux/hdlc/ioctl.h b/src/basic/linux/hdlc/ioctl.h
new file mode 100644
index 0000000..b06341a
--- /dev/null
+++ b/src/basic/linux/hdlc/ioctl.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __HDLC_IOCTL_H__
+#define __HDLC_IOCTL_H__
+
+
+#define GENERIC_HDLC_VERSION 4 /* For synchronization with sethdlc utility */
+
+#define CLOCK_DEFAULT 0 /* Default setting */
+#define CLOCK_EXT 1 /* External TX and RX clock - DTE */
+#define CLOCK_INT 2 /* Internal TX and RX clock - DCE */
+#define CLOCK_TXINT 3 /* Internal TX and external RX clock */
+#define CLOCK_TXFROMRX 4 /* TX clock derived from external RX clock */
+
+
+#define ENCODING_DEFAULT 0 /* Default setting */
+#define ENCODING_NRZ 1
+#define ENCODING_NRZI 2
+#define ENCODING_FM_MARK 3
+#define ENCODING_FM_SPACE 4
+#define ENCODING_MANCHESTER 5
+
+
+#define PARITY_DEFAULT 0 /* Default setting */
+#define PARITY_NONE 1 /* No parity */
+#define PARITY_CRC16_PR0 2 /* CRC16, initial value 0x0000 */
+#define PARITY_CRC16_PR1 3 /* CRC16, initial value 0xFFFF */
+#define PARITY_CRC16_PR0_CCITT 4 /* CRC16, initial 0x0000, ITU-T version */
+#define PARITY_CRC16_PR1_CCITT 5 /* CRC16, initial 0xFFFF, ITU-T version */
+#define PARITY_CRC32_PR0_CCITT 6 /* CRC32, initial value 0x00000000 */
+#define PARITY_CRC32_PR1_CCITT 7 /* CRC32, initial value 0xFFFFFFFF */
+
+#define LMI_DEFAULT 0 /* Default setting */
+#define LMI_NONE 1 /* No LMI, all PVCs are static */
+#define LMI_ANSI 2 /* ANSI Annex D */
+#define LMI_CCITT 3 /* ITU-T Annex A */
+#define LMI_CISCO 4 /* The "original" LMI, aka Gang of Four */
+
+#ifndef __ASSEMBLY__
+
+typedef struct {
+ unsigned int clock_rate; /* bits per second */
+ unsigned int clock_type; /* internal, external, TX-internal etc. */
+ unsigned short loopback;
+} sync_serial_settings; /* V.35, V.24, X.21 */
+
+typedef struct {
+ unsigned int clock_rate; /* bits per second */
+ unsigned int clock_type; /* internal, external, TX-internal etc. */
+ unsigned short loopback;
+ unsigned int slot_map;
+} te1_settings; /* T1, E1 */
+
+typedef struct {
+ unsigned short encoding;
+ unsigned short parity;
+} raw_hdlc_proto;
+
+typedef struct {
+ unsigned int t391;
+ unsigned int t392;
+ unsigned int n391;
+ unsigned int n392;
+ unsigned int n393;
+ unsigned short lmi;
+ unsigned short dce; /* 1 for DCE (network side) operation */
+} fr_proto;
+
+typedef struct {
+ unsigned int dlci;
+} fr_proto_pvc; /* for creating/deleting FR PVCs */
+
+typedef struct {
+ unsigned int dlci;
+ char master[IFNAMSIZ]; /* Name of master FRAD device */
+}fr_proto_pvc_info; /* for returning PVC information only */
+
+typedef struct {
+ unsigned int interval;
+ unsigned int timeout;
+} cisco_proto;
+
+typedef struct {
+ unsigned short dce; /* 1 for DCE (network side) operation */
+ unsigned int modulo; /* modulo (8 = basic / 128 = extended) */
+ unsigned int window; /* frame window size */
+ unsigned int t1; /* timeout t1 */
+ unsigned int t2; /* timeout t2 */
+ unsigned int n2; /* frame retry counter */
+} x25_hdlc_proto;
+
+/* PPP doesn't need any info now - supply length = 0 to ioctl */
+
+#endif /* __ASSEMBLY__ */
+#endif /* __HDLC_IOCTL_H__ */
diff --git a/src/basic/linux/if.h b/src/basic/linux/if.h
new file mode 100644
index 0000000..e79f5c8
--- /dev/null
+++ b/src/basic/linux/if.h
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Global definitions for the INET interface module.
+ *
+ * Version: @(#)if.h 1.0.2 04/18/93
+ *
+ * Authors: Original taken from Berkeley UNIX 4.3, (c) UCB 1982-1988
+ * Ross Biro
+ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _LINUX_IF_H
+#define _LINUX_IF_H
+
+#include <linux/libc-compat.h> /* for compatibility with glibc */
+#include <linux/types.h> /* for "__kernel_caddr_t" et al */
+#include <linux/socket.h> /* for "struct sockaddr" et al */
+
+#ifndef __KERNEL__
+#include <sys/socket.h> /* for struct sockaddr. */
+#endif
+
+#if __UAPI_DEF_IF_IFNAMSIZ
+#define IFNAMSIZ 16
+#endif /* __UAPI_DEF_IF_IFNAMSIZ */
+#define IFALIASZ 256
+#define ALTIFNAMSIZ 128
+#include <linux/hdlc/ioctl.h>
+
+/* For glibc compatibility. An empty enum does not compile. */
+#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 || \
+ __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0
+/**
+ * enum net_device_flags - &struct net_device flags
+ *
+ * These are the &struct net_device flags, they can be set by drivers, the
+ * kernel and some can be triggered by userspace. Userspace can query and
+ * set these flags using userspace utilities but there is also a sysfs
+ * entry available for all dev flags which can be queried and set. These flags
+ * are shared for all types of net_devices. The sysfs entries are available
+ * via /sys/class/net/<dev>/flags. Flags which can be toggled through sysfs
+ * are annotated below, note that only a few flags can be toggled and some
+ * other flags are always preserved from the original net_device flags
+ * even if you try to set them via sysfs. Flags which are always preserved
+ * are kept under the flag grouping @IFF_VOLATILE. Flags which are volatile
+ * are annotated below as such.
+ *
+ * You should have a pretty good reason to be extending these flags.
+ *
+ * @IFF_UP: interface is up. Can be toggled through sysfs.
+ * @IFF_BROADCAST: broadcast address valid. Volatile.
+ * @IFF_DEBUG: turn on debugging. Can be toggled through sysfs.
+ * @IFF_LOOPBACK: is a loopback net. Volatile.
+ * @IFF_POINTOPOINT: interface is has p-p link. Volatile.
+ * @IFF_NOTRAILERS: avoid use of trailers. Can be toggled through sysfs.
+ * Volatile.
+ * @IFF_RUNNING: interface RFC2863 OPER_UP. Volatile.
+ * @IFF_NOARP: no ARP protocol. Can be toggled through sysfs. Volatile.
+ * @IFF_PROMISC: receive all packets. Can be toggled through sysfs.
+ * @IFF_ALLMULTI: receive all multicast packets. Can be toggled through
+ * sysfs.
+ * @IFF_MASTER: master of a load balancer. Volatile.
+ * @IFF_SLAVE: slave of a load balancer. Volatile.
+ * @IFF_MULTICAST: Supports multicast. Can be toggled through sysfs.
+ * @IFF_PORTSEL: can set media type. Can be toggled through sysfs.
+ * @IFF_AUTOMEDIA: auto media select active. Can be toggled through sysfs.
+ * @IFF_DYNAMIC: dialup device with changing addresses. Can be toggled
+ * through sysfs.
+ * @IFF_LOWER_UP: driver signals L1 up. Volatile.
+ * @IFF_DORMANT: driver signals dormant. Volatile.
+ * @IFF_ECHO: echo sent packets. Volatile.
+ */
+enum net_device_flags {
+/* for compatibility with glibc net/if.h */
+#if __UAPI_DEF_IF_NET_DEVICE_FLAGS
+ IFF_UP = 1<<0, /* sysfs */
+ IFF_BROADCAST = 1<<1, /* volatile */
+ IFF_DEBUG = 1<<2, /* sysfs */
+ IFF_LOOPBACK = 1<<3, /* volatile */
+ IFF_POINTOPOINT = 1<<4, /* volatile */
+ IFF_NOTRAILERS = 1<<5, /* sysfs */
+ IFF_RUNNING = 1<<6, /* volatile */
+ IFF_NOARP = 1<<7, /* sysfs */
+ IFF_PROMISC = 1<<8, /* sysfs */
+ IFF_ALLMULTI = 1<<9, /* sysfs */
+ IFF_MASTER = 1<<10, /* volatile */
+ IFF_SLAVE = 1<<11, /* volatile */
+ IFF_MULTICAST = 1<<12, /* sysfs */
+ IFF_PORTSEL = 1<<13, /* sysfs */
+ IFF_AUTOMEDIA = 1<<14, /* sysfs */
+ IFF_DYNAMIC = 1<<15, /* sysfs */
+#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS */
+#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO
+ IFF_LOWER_UP = 1<<16, /* volatile */
+ IFF_DORMANT = 1<<17, /* volatile */
+ IFF_ECHO = 1<<18, /* volatile */
+#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */
+};
+#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 || __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 */
+
+/* for compatibility with glibc net/if.h */
+#if __UAPI_DEF_IF_NET_DEVICE_FLAGS
+#define IFF_UP IFF_UP
+#define IFF_BROADCAST IFF_BROADCAST
+#define IFF_DEBUG IFF_DEBUG
+#define IFF_LOOPBACK IFF_LOOPBACK
+#define IFF_POINTOPOINT IFF_POINTOPOINT
+#define IFF_NOTRAILERS IFF_NOTRAILERS
+#define IFF_RUNNING IFF_RUNNING
+#define IFF_NOARP IFF_NOARP
+#define IFF_PROMISC IFF_PROMISC
+#define IFF_ALLMULTI IFF_ALLMULTI
+#define IFF_MASTER IFF_MASTER
+#define IFF_SLAVE IFF_SLAVE
+#define IFF_MULTICAST IFF_MULTICAST
+#define IFF_PORTSEL IFF_PORTSEL
+#define IFF_AUTOMEDIA IFF_AUTOMEDIA
+#define IFF_DYNAMIC IFF_DYNAMIC
+#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS */
+
+#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO
+#define IFF_LOWER_UP IFF_LOWER_UP
+#define IFF_DORMANT IFF_DORMANT
+#define IFF_ECHO IFF_ECHO
+#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */
+
+#define IFF_VOLATILE (IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_ECHO|\
+ IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT)
+
+#define IF_GET_IFACE 0x0001 /* for querying only */
+#define IF_GET_PROTO 0x0002
+
+/* For definitions see hdlc.h */
+#define IF_IFACE_V35 0x1000 /* V.35 serial interface */
+#define IF_IFACE_V24 0x1001 /* V.24 serial interface */
+#define IF_IFACE_X21 0x1002 /* X.21 serial interface */
+#define IF_IFACE_T1 0x1003 /* T1 telco serial interface */
+#define IF_IFACE_E1 0x1004 /* E1 telco serial interface */
+#define IF_IFACE_SYNC_SERIAL 0x1005 /* can't be set by software */
+#define IF_IFACE_X21D 0x1006 /* X.21 Dual Clocking (FarSite) */
+
+/* For definitions see hdlc.h */
+#define IF_PROTO_HDLC 0x2000 /* raw HDLC protocol */
+#define IF_PROTO_PPP 0x2001 /* PPP protocol */
+#define IF_PROTO_CISCO 0x2002 /* Cisco HDLC protocol */
+#define IF_PROTO_FR 0x2003 /* Frame Relay protocol */
+#define IF_PROTO_FR_ADD_PVC 0x2004 /* Create FR PVC */
+#define IF_PROTO_FR_DEL_PVC 0x2005 /* Delete FR PVC */
+#define IF_PROTO_X25 0x2006 /* X.25 */
+#define IF_PROTO_HDLC_ETH 0x2007 /* raw HDLC, Ethernet emulation */
+#define IF_PROTO_FR_ADD_ETH_PVC 0x2008 /* Create FR Ethernet-bridged PVC */
+#define IF_PROTO_FR_DEL_ETH_PVC 0x2009 /* Delete FR Ethernet-bridged PVC */
+#define IF_PROTO_FR_PVC 0x200A /* for reading PVC status */
+#define IF_PROTO_FR_ETH_PVC 0x200B
+#define IF_PROTO_RAW 0x200C /* RAW Socket */
+
+/* RFC 2863 operational status */
+enum {
+ IF_OPER_UNKNOWN,
+ IF_OPER_NOTPRESENT,
+ IF_OPER_DOWN,
+ IF_OPER_LOWERLAYERDOWN,
+ IF_OPER_TESTING,
+ IF_OPER_DORMANT,
+ IF_OPER_UP,
+};
+
+/* link modes */
+enum {
+ IF_LINK_MODE_DEFAULT,
+ IF_LINK_MODE_DORMANT, /* limit upward transition to dormant */
+ IF_LINK_MODE_TESTING, /* limit upward transition to testing */
+};
+
+/*
+ * Device mapping structure. I'd just gone off and designed a
+ * beautiful scheme using only loadable modules with arguments
+ * for driver options and along come the PCMCIA people 8)
+ *
+ * Ah well. The get() side of this is good for WDSETUP, and it'll
+ * be handy for debugging things. The set side is fine for now and
+ * being very small might be worth keeping for clean configuration.
+ */
+
+/* for compatibility with glibc net/if.h */
+#if __UAPI_DEF_IF_IFMAP
+struct ifmap {
+ unsigned long mem_start;
+ unsigned long mem_end;
+ unsigned short base_addr;
+ unsigned char irq;
+ unsigned char dma;
+ unsigned char port;
+ /* 3 bytes spare */
+};
+#endif /* __UAPI_DEF_IF_IFMAP */
+
+struct if_settings {
+ unsigned int type; /* Type of physical device or protocol */
+ unsigned int size; /* Size of the data allocated by the caller */
+ union {
+ /* {atm/eth/dsl}_settings anyone ? */
+ raw_hdlc_proto *raw_hdlc;
+ cisco_proto *cisco;
+ fr_proto *fr;
+ fr_proto_pvc *fr_pvc;
+ fr_proto_pvc_info *fr_pvc_info;
+ x25_hdlc_proto *x25;
+
+ /* interface settings */
+ sync_serial_settings *sync;
+ te1_settings *te1;
+ } ifs_ifsu;
+};
+
+/*
+ * Interface request structure used for socket
+ * ioctl's. All interface ioctl's must have parameter
+ * definitions which begin with ifr_name. The
+ * remainder may be interface specific.
+ */
+
+/* for compatibility with glibc net/if.h */
+#if __UAPI_DEF_IF_IFREQ
+struct ifreq {
+#define IFHWADDRLEN 6
+ union
+ {
+ char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */
+ } ifr_ifrn;
+
+ union {
+ struct sockaddr ifru_addr;
+ struct sockaddr ifru_dstaddr;
+ struct sockaddr ifru_broadaddr;
+ struct sockaddr ifru_netmask;
+ struct sockaddr ifru_hwaddr;
+ short ifru_flags;
+ int ifru_ivalue;
+ int ifru_mtu;
+ struct ifmap ifru_map;
+ char ifru_slave[IFNAMSIZ]; /* Just fits the size */
+ char ifru_newname[IFNAMSIZ];
+ void * ifru_data;
+ struct if_settings ifru_settings;
+ } ifr_ifru;
+};
+#endif /* __UAPI_DEF_IF_IFREQ */
+
+#define ifr_name ifr_ifrn.ifrn_name /* interface name */
+#define ifr_hwaddr ifr_ifru.ifru_hwaddr /* MAC address */
+#define ifr_addr ifr_ifru.ifru_addr /* address */
+#define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-p lnk */
+#define ifr_broadaddr ifr_ifru.ifru_broadaddr /* broadcast address */
+#define ifr_netmask ifr_ifru.ifru_netmask /* interface net mask */
+#define ifr_flags ifr_ifru.ifru_flags /* flags */
+#define ifr_metric ifr_ifru.ifru_ivalue /* metric */
+#define ifr_mtu ifr_ifru.ifru_mtu /* mtu */
+#define ifr_map ifr_ifru.ifru_map /* device map */
+#define ifr_slave ifr_ifru.ifru_slave /* slave device */
+#define ifr_data ifr_ifru.ifru_data /* for use by interface */
+#define ifr_ifindex ifr_ifru.ifru_ivalue /* interface index */
+#define ifr_bandwidth ifr_ifru.ifru_ivalue /* link bandwidth */
+#define ifr_qlen ifr_ifru.ifru_ivalue /* Queue length */
+#define ifr_newname ifr_ifru.ifru_newname /* New name */
+#define ifr_settings ifr_ifru.ifru_settings /* Device/proto settings*/
+
+/*
+ * Structure used in SIOCGIFCONF request.
+ * Used to retrieve interface configuration
+ * for machine (useful for programs which
+ * must know all networks accessible).
+ */
+
+/* for compatibility with glibc net/if.h */
+#if __UAPI_DEF_IF_IFCONF
+struct ifconf {
+ int ifc_len; /* size of buffer */
+ union {
+ char *ifcu_buf;
+ struct ifreq *ifcu_req;
+ } ifc_ifcu;
+};
+#endif /* __UAPI_DEF_IF_IFCONF */
+
+#define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */
+#define ifc_req ifc_ifcu.ifcu_req /* array of structures */
+
+#endif /* _LINUX_IF_H */
diff --git a/src/basic/linux/if_addr.h b/src/basic/linux/if_addr.h
new file mode 100644
index 0000000..dfcf3ce
--- /dev/null
+++ b/src/basic/linux/if_addr.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_IF_ADDR_H
+#define __LINUX_IF_ADDR_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+struct ifaddrmsg {
+ __u8 ifa_family;
+ __u8 ifa_prefixlen; /* The prefix length */
+ __u8 ifa_flags; /* Flags */
+ __u8 ifa_scope; /* Address scope */
+ __u32 ifa_index; /* Link index */
+};
+
+/*
+ * Important comment:
+ * IFA_ADDRESS is prefix address, rather than local interface address.
+ * It makes no difference for normally configured broadcast interfaces,
+ * but for point-to-point IFA_ADDRESS is DESTINATION address,
+ * local address is supplied in IFA_LOCAL attribute.
+ *
+ * IFA_FLAGS is a u32 attribute that extends the u8 field ifa_flags.
+ * If present, the value from struct ifaddrmsg will be ignored.
+ */
+enum {
+ IFA_UNSPEC,
+ IFA_ADDRESS,
+ IFA_LOCAL,
+ IFA_LABEL,
+ IFA_BROADCAST,
+ IFA_ANYCAST,
+ IFA_CACHEINFO,
+ IFA_MULTICAST,
+ IFA_FLAGS,
+ IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */
+ IFA_TARGET_NETNSID,
+ __IFA_MAX,
+};
+
+#define IFA_MAX (__IFA_MAX - 1)
+
+/* ifa_flags */
+#define IFA_F_SECONDARY 0x01
+#define IFA_F_TEMPORARY IFA_F_SECONDARY
+
+#define IFA_F_NODAD 0x02
+#define IFA_F_OPTIMISTIC 0x04
+#define IFA_F_DADFAILED 0x08
+#define IFA_F_HOMEADDRESS 0x10
+#define IFA_F_DEPRECATED 0x20
+#define IFA_F_TENTATIVE 0x40
+#define IFA_F_PERMANENT 0x80
+#define IFA_F_MANAGETEMPADDR 0x100
+#define IFA_F_NOPREFIXROUTE 0x200
+#define IFA_F_MCAUTOJOIN 0x400
+#define IFA_F_STABLE_PRIVACY 0x800
+
+struct ifa_cacheinfo {
+ __u32 ifa_prefered;
+ __u32 ifa_valid;
+ __u32 cstamp; /* created timestamp, hundredths of seconds */
+ __u32 tstamp; /* updated timestamp, hundredths of seconds */
+};
+
+/* backwards compatibility for userspace */
+#ifndef __KERNEL__
+#define IFA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg))))
+#define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg))
+#endif
+
+#endif
diff --git a/src/basic/linux/if_arp.h b/src/basic/linux/if_arp.h
new file mode 100644
index 0000000..c3cc5a9
--- /dev/null
+++ b/src/basic/linux/if_arp.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Global definitions for the ARP (RFC 826) protocol.
+ *
+ * Version: @(#)if_arp.h 1.0.1 04/16/93
+ *
+ * Authors: Original taken from Berkeley UNIX 4.3, (c) UCB 1986-1988
+ * Portions taken from the KA9Q/NOS (v2.00m PA0GRI) source.
+ * Ross Biro
+ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ * Florian La Roche,
+ * Jonathan Layes <layes@loran.com>
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br> ARPHRD_HWX25
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _UAPI_LINUX_IF_ARP_H
+#define _UAPI_LINUX_IF_ARP_H
+
+#include <linux/netdevice.h>
+
+/* ARP protocol HARDWARE identifiers. */
+#define ARPHRD_NETROM 0 /* from KA9Q: NET/ROM pseudo */
+#define ARPHRD_ETHER 1 /* Ethernet 10Mbps */
+#define ARPHRD_EETHER 2 /* Experimental Ethernet */
+#define ARPHRD_AX25 3 /* AX.25 Level 2 */
+#define ARPHRD_PRONET 4 /* PROnet token ring */
+#define ARPHRD_CHAOS 5 /* Chaosnet */
+#define ARPHRD_IEEE802 6 /* IEEE 802.2 Ethernet/TR/TB */
+#define ARPHRD_ARCNET 7 /* ARCnet */
+#define ARPHRD_APPLETLK 8 /* APPLEtalk */
+#define ARPHRD_DLCI 15 /* Frame Relay DLCI */
+#define ARPHRD_ATM 19 /* ATM */
+#define ARPHRD_METRICOM 23 /* Metricom STRIP (new IANA id) */
+#define ARPHRD_IEEE1394 24 /* IEEE 1394 IPv4 - RFC 2734 */
+#define ARPHRD_EUI64 27 /* EUI-64 */
+#define ARPHRD_INFINIBAND 32 /* InfiniBand */
+
+/* Dummy types for non ARP hardware */
+#define ARPHRD_SLIP 256
+#define ARPHRD_CSLIP 257
+#define ARPHRD_SLIP6 258
+#define ARPHRD_CSLIP6 259
+#define ARPHRD_RSRVD 260 /* Notional KISS type */
+#define ARPHRD_ADAPT 264
+#define ARPHRD_ROSE 270
+#define ARPHRD_X25 271 /* CCITT X.25 */
+#define ARPHRD_HWX25 272 /* Boards with X.25 in firmware */
+#define ARPHRD_CAN 280 /* Controller Area Network */
+#define ARPHRD_PPP 512
+#define ARPHRD_CISCO 513 /* Cisco HDLC */
+#define ARPHRD_HDLC ARPHRD_CISCO
+#define ARPHRD_LAPB 516 /* LAPB */
+#define ARPHRD_DDCMP 517 /* Digital's DDCMP protocol */
+#define ARPHRD_RAWHDLC 518 /* Raw HDLC */
+#define ARPHRD_RAWIP 519 /* Raw IP */
+
+#define ARPHRD_TUNNEL 768 /* IPIP tunnel */
+#define ARPHRD_TUNNEL6 769 /* IP6IP6 tunnel */
+#define ARPHRD_FRAD 770 /* Frame Relay Access Device */
+#define ARPHRD_SKIP 771 /* SKIP vif */
+#define ARPHRD_LOOPBACK 772 /* Loopback device */
+#define ARPHRD_LOCALTLK 773 /* Localtalk device */
+#define ARPHRD_FDDI 774 /* Fiber Distributed Data Interface */
+#define ARPHRD_BIF 775 /* AP1000 BIF */
+#define ARPHRD_SIT 776 /* sit0 device - IPv6-in-IPv4 */
+#define ARPHRD_IPDDP 777 /* IP over DDP tunneller */
+#define ARPHRD_IPGRE 778 /* GRE over IP */
+#define ARPHRD_PIMREG 779 /* PIMSM register interface */
+#define ARPHRD_HIPPI 780 /* High Performance Parallel Interface */
+#define ARPHRD_ASH 781 /* Nexus 64Mbps Ash */
+#define ARPHRD_ECONET 782 /* Acorn Econet */
+#define ARPHRD_IRDA 783 /* Linux-IrDA */
+/* ARP works differently on different FC media .. so */
+#define ARPHRD_FCPP 784 /* Point to point fibrechannel */
+#define ARPHRD_FCAL 785 /* Fibrechannel arbitrated loop */
+#define ARPHRD_FCPL 786 /* Fibrechannel public loop */
+#define ARPHRD_FCFABRIC 787 /* Fibrechannel fabric */
+ /* 787->799 reserved for fibrechannel media types */
+#define ARPHRD_IEEE802_TR 800 /* Magic type ident for TR */
+#define ARPHRD_IEEE80211 801 /* IEEE 802.11 */
+#define ARPHRD_IEEE80211_PRISM 802 /* IEEE 802.11 + Prism2 header */
+#define ARPHRD_IEEE80211_RADIOTAP 803 /* IEEE 802.11 + radiotap header */
+#define ARPHRD_IEEE802154 804
+#define ARPHRD_IEEE802154_MONITOR 805 /* IEEE 802.15.4 network monitor */
+
+#define ARPHRD_PHONET 820 /* PhoNet media type */
+#define ARPHRD_PHONET_PIPE 821 /* PhoNet pipe header */
+#define ARPHRD_CAIF 822 /* CAIF media type */
+#define ARPHRD_IP6GRE 823 /* GRE over IPv6 */
+#define ARPHRD_NETLINK 824 /* Netlink header */
+#define ARPHRD_6LOWPAN 825 /* IPv6 over LoWPAN */
+#define ARPHRD_VSOCKMON 826 /* Vsock monitor header */
+
+#define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */
+#define ARPHRD_NONE 0xFFFE /* zero header length */
+
+/* ARP protocol opcodes. */
+#define ARPOP_REQUEST 1 /* ARP request */
+#define ARPOP_REPLY 2 /* ARP reply */
+#define ARPOP_RREQUEST 3 /* RARP request */
+#define ARPOP_RREPLY 4 /* RARP reply */
+#define ARPOP_InREQUEST 8 /* InARP request */
+#define ARPOP_InREPLY 9 /* InARP reply */
+#define ARPOP_NAK 10 /* (ATM)ARP NAK */
+
+
+/* ARP ioctl request. */
+struct arpreq {
+ struct sockaddr arp_pa; /* protocol address */
+ struct sockaddr arp_ha; /* hardware address */
+ int arp_flags; /* flags */
+ struct sockaddr arp_netmask; /* netmask (only for proxy arps) */
+ char arp_dev[IFNAMSIZ];
+};
+
+struct arpreq_old {
+ struct sockaddr arp_pa; /* protocol address */
+ struct sockaddr arp_ha; /* hardware address */
+ int arp_flags; /* flags */
+ struct sockaddr arp_netmask; /* netmask (only for proxy arps) */
+};
+
+/* ARP Flag values. */
+#define ATF_COM 0x02 /* completed entry (ha valid) */
+#define ATF_PERM 0x04 /* permanent entry */
+#define ATF_PUBL 0x08 /* publish entry */
+#define ATF_USETRAILERS 0x10 /* has requested trailers */
+#define ATF_NETMASK 0x20 /* want to use a netmask (only
+ for proxy entries) */
+#define ATF_DONTPUB 0x40 /* don't answer this addresses */
+
+/*
+ * This structure defines an ethernet arp header.
+ */
+
+struct arphdr {
+ __be16 ar_hrd; /* format of hardware address */
+ __be16 ar_pro; /* format of protocol address */
+ unsigned char ar_hln; /* length of hardware address */
+ unsigned char ar_pln; /* length of protocol address */
+ __be16 ar_op; /* ARP opcode (command) */
+
+#if 0
+ /*
+ * Ethernet looks like this : This bit is variable sized however...
+ */
+ unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */
+ unsigned char ar_sip[4]; /* sender IP address */
+ unsigned char ar_tha[ETH_ALEN]; /* target hardware address */
+ unsigned char ar_tip[4]; /* target IP address */
+#endif
+
+};
+
+
+#endif /* _UAPI_LINUX_IF_ARP_H */
diff --git a/src/basic/linux/if_bonding.h b/src/basic/linux/if_bonding.h
new file mode 100644
index 0000000..45f3750
--- /dev/null
+++ b/src/basic/linux/if_bonding.h
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */
+/*
+ * Bond several ethernet interfaces into a Cisco, running 'Etherchannel'.
+ *
+ *
+ * Portions are (c) Copyright 1995 Simon "Guru Aleph-Null" Janes
+ * NCM: Network and Communications Management, Inc.
+ *
+ * BUT, I'm the one who modified it for ethernet, so:
+ * (c) Copyright 1999, Thomas Davis, tadavis@lbl.gov
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU Public License, incorporated herein by reference.
+ *
+ * 2003/03/18 - Amir Noam <amir.noam at intel dot com>
+ * - Added support for getting slave's speed and duplex via ethtool.
+ * Needed for 802.3ad and other future modes.
+ *
+ * 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and
+ * Shmulik Hen <shmulik.hen at intel dot com>
+ * - Enable support of modes that need to use the unique mac address of
+ * each slave.
+ *
+ * 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and
+ * Amir Noam <amir.noam at intel dot com>
+ * - Moved driver's private data types to bonding.h
+ *
+ * 2003/03/18 - Amir Noam <amir.noam at intel dot com>,
+ * Tsippy Mendelson <tsippy.mendelson at intel dot com> and
+ * Shmulik Hen <shmulik.hen at intel dot com>
+ * - Added support for IEEE 802.3ad Dynamic link aggregation mode.
+ *
+ * 2003/05/01 - Amir Noam <amir.noam at intel dot com>
+ * - Added ABI version control to restore compatibility between
+ * new/old ifenslave and new/old bonding.
+ *
+ * 2003/12/01 - Shmulik Hen <shmulik.hen at intel dot com>
+ * - Code cleanup and style changes
+ *
+ * 2005/05/05 - Jason Gabler <jygabler at lbl dot gov>
+ * - added definitions for various XOR hashing policies
+ */
+
+#ifndef _LINUX_IF_BONDING_H
+#define _LINUX_IF_BONDING_H
+
+#include <linux/if.h>
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+/* userland - kernel ABI version (2003/05/08) */
+#define BOND_ABI_VERSION 2
+
+/*
+ * We can remove these ioctl definitions in 2.5. People should use the
+ * SIOC*** versions of them instead
+ */
+#define BOND_ENSLAVE_OLD (SIOCDEVPRIVATE)
+#define BOND_RELEASE_OLD (SIOCDEVPRIVATE + 1)
+#define BOND_SETHWADDR_OLD (SIOCDEVPRIVATE + 2)
+#define BOND_SLAVE_INFO_QUERY_OLD (SIOCDEVPRIVATE + 11)
+#define BOND_INFO_QUERY_OLD (SIOCDEVPRIVATE + 12)
+#define BOND_CHANGE_ACTIVE_OLD (SIOCDEVPRIVATE + 13)
+
+#define BOND_CHECK_MII_STATUS (SIOCGMIIPHY)
+
+#define BOND_MODE_ROUNDROBIN 0
+#define BOND_MODE_ACTIVEBACKUP 1
+#define BOND_MODE_XOR 2
+#define BOND_MODE_BROADCAST 3
+#define BOND_MODE_8023AD 4
+#define BOND_MODE_TLB 5
+#define BOND_MODE_ALB 6 /* TLB + RLB (receive load balancing) */
+
+/* each slave's link has 4 states */
+#define BOND_LINK_UP 0 /* link is up and running */
+#define BOND_LINK_FAIL 1 /* link has just gone down */
+#define BOND_LINK_DOWN 2 /* link has been down for too long time */
+#define BOND_LINK_BACK 3 /* link is going back */
+
+/* each slave has several states */
+#define BOND_STATE_ACTIVE 0 /* link is active */
+#define BOND_STATE_BACKUP 1 /* link is backup */
+
+#define BOND_DEFAULT_MAX_BONDS 1 /* Default maximum number of devices to support */
+
+#define BOND_DEFAULT_TX_QUEUES 16 /* Default number of tx queues per device */
+
+#define BOND_DEFAULT_RESEND_IGMP 1 /* Default number of IGMP membership reports */
+
+/* hashing types */
+#define BOND_XMIT_POLICY_LAYER2 0 /* layer 2 (MAC only), default */
+#define BOND_XMIT_POLICY_LAYER34 1 /* layer 3+4 (IP ^ (TCP || UDP)) */
+#define BOND_XMIT_POLICY_LAYER23 2 /* layer 2+3 (IP ^ MAC) */
+#define BOND_XMIT_POLICY_ENCAP23 3 /* encapsulated layer 2+3 */
+#define BOND_XMIT_POLICY_ENCAP34 4 /* encapsulated layer 3+4 */
+
+/* 802.3ad port state definitions (43.4.2.2 in the 802.3ad standard) */
+#define LACP_STATE_LACP_ACTIVITY 0x1
+#define LACP_STATE_LACP_TIMEOUT 0x2
+#define LACP_STATE_AGGREGATION 0x4
+#define LACP_STATE_SYNCHRONIZATION 0x8
+#define LACP_STATE_COLLECTING 0x10
+#define LACP_STATE_DISTRIBUTING 0x20
+#define LACP_STATE_DEFAULTED 0x40
+#define LACP_STATE_EXPIRED 0x80
+
+typedef struct ifbond {
+ __s32 bond_mode;
+ __s32 num_slaves;
+ __s32 miimon;
+} ifbond;
+
+typedef struct ifslave {
+ __s32 slave_id; /* Used as an IN param to the BOND_SLAVE_INFO_QUERY ioctl */
+ char slave_name[IFNAMSIZ];
+ __s8 link;
+ __s8 state;
+ __u32 link_failure_count;
+} ifslave;
+
+struct ad_info {
+ __u16 aggregator_id;
+ __u16 ports;
+ __u16 actor_key;
+ __u16 partner_key;
+ __u8 partner_system[ETH_ALEN];
+};
+
+/* Embedded inside LINK_XSTATS_TYPE_BOND */
+enum {
+ BOND_XSTATS_UNSPEC,
+ BOND_XSTATS_3AD,
+ __BOND_XSTATS_MAX
+};
+#define BOND_XSTATS_MAX (__BOND_XSTATS_MAX - 1)
+
+/* Embedded inside BOND_XSTATS_3AD */
+enum {
+ BOND_3AD_STAT_LACPDU_RX,
+ BOND_3AD_STAT_LACPDU_TX,
+ BOND_3AD_STAT_LACPDU_UNKNOWN_RX,
+ BOND_3AD_STAT_LACPDU_ILLEGAL_RX,
+ BOND_3AD_STAT_MARKER_RX,
+ BOND_3AD_STAT_MARKER_TX,
+ BOND_3AD_STAT_MARKER_RESP_RX,
+ BOND_3AD_STAT_MARKER_RESP_TX,
+ BOND_3AD_STAT_MARKER_UNKNOWN_RX,
+ BOND_3AD_STAT_PAD,
+ __BOND_3AD_STAT_MAX
+};
+#define BOND_3AD_STAT_MAX (__BOND_3AD_STAT_MAX - 1)
+
+#endif /* _LINUX_IF_BONDING_H */
+
+/*
+ * Local variables:
+ * version-control: t
+ * kept-new-versions: 5
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
+
diff --git a/src/basic/linux/if_bridge.h b/src/basic/linux/if_bridge.h
new file mode 100644
index 0000000..c1227ae
--- /dev/null
+++ b/src/basic/linux/if_bridge.h
@@ -0,0 +1,575 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Linux ethernet bridge
+ *
+ * Authors:
+ * Lennert Buytenhek <buytenh@gnu.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_IF_BRIDGE_H
+#define _UAPI_LINUX_IF_BRIDGE_H
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+
+#define SYSFS_BRIDGE_ATTR "bridge"
+#define SYSFS_BRIDGE_FDB "brforward"
+#define SYSFS_BRIDGE_PORT_SUBDIR "brif"
+#define SYSFS_BRIDGE_PORT_ATTR "brport"
+#define SYSFS_BRIDGE_PORT_LINK "bridge"
+
+#define BRCTL_VERSION 1
+
+#define BRCTL_GET_VERSION 0
+#define BRCTL_GET_BRIDGES 1
+#define BRCTL_ADD_BRIDGE 2
+#define BRCTL_DEL_BRIDGE 3
+#define BRCTL_ADD_IF 4
+#define BRCTL_DEL_IF 5
+#define BRCTL_GET_BRIDGE_INFO 6
+#define BRCTL_GET_PORT_LIST 7
+#define BRCTL_SET_BRIDGE_FORWARD_DELAY 8
+#define BRCTL_SET_BRIDGE_HELLO_TIME 9
+#define BRCTL_SET_BRIDGE_MAX_AGE 10
+#define BRCTL_SET_AGEING_TIME 11
+#define BRCTL_SET_GC_INTERVAL 12
+#define BRCTL_GET_PORT_INFO 13
+#define BRCTL_SET_BRIDGE_STP_STATE 14
+#define BRCTL_SET_BRIDGE_PRIORITY 15
+#define BRCTL_SET_PORT_PRIORITY 16
+#define BRCTL_SET_PATH_COST 17
+#define BRCTL_GET_FDB_ENTRIES 18
+
+#define BR_STATE_DISABLED 0
+#define BR_STATE_LISTENING 1
+#define BR_STATE_LEARNING 2
+#define BR_STATE_FORWARDING 3
+#define BR_STATE_BLOCKING 4
+
+struct __bridge_info {
+ __u64 designated_root;
+ __u64 bridge_id;
+ __u32 root_path_cost;
+ __u32 max_age;
+ __u32 hello_time;
+ __u32 forward_delay;
+ __u32 bridge_max_age;
+ __u32 bridge_hello_time;
+ __u32 bridge_forward_delay;
+ __u8 topology_change;
+ __u8 topology_change_detected;
+ __u8 root_port;
+ __u8 stp_enabled;
+ __u32 ageing_time;
+ __u32 gc_interval;
+ __u32 hello_timer_value;
+ __u32 tcn_timer_value;
+ __u32 topology_change_timer_value;
+ __u32 gc_timer_value;
+};
+
+struct __port_info {
+ __u64 designated_root;
+ __u64 designated_bridge;
+ __u16 port_id;
+ __u16 designated_port;
+ __u32 path_cost;
+ __u32 designated_cost;
+ __u8 state;
+ __u8 top_change_ack;
+ __u8 config_pending;
+ __u8 unused0;
+ __u32 message_age_timer_value;
+ __u32 forward_delay_timer_value;
+ __u32 hold_timer_value;
+};
+
+struct __fdb_entry {
+ __u8 mac_addr[ETH_ALEN];
+ __u8 port_no;
+ __u8 is_local;
+ __u32 ageing_timer_value;
+ __u8 port_hi;
+ __u8 pad0;
+ __u16 unused;
+};
+
+/* Bridge Flags */
+#define BRIDGE_FLAGS_MASTER 1 /* Bridge command to/from master */
+#define BRIDGE_FLAGS_SELF 2 /* Bridge command to/from lowerdev */
+
+#define BRIDGE_MODE_VEB 0 /* Default loopback mode */
+#define BRIDGE_MODE_VEPA 1 /* 802.1Qbg defined VEPA mode */
+#define BRIDGE_MODE_UNDEF 0xFFFF /* mode undefined */
+
+/* Bridge management nested attributes
+ * [IFLA_AF_SPEC] = {
+ * [IFLA_BRIDGE_FLAGS]
+ * [IFLA_BRIDGE_MODE]
+ * [IFLA_BRIDGE_VLAN_INFO]
+ * }
+ */
+enum {
+ IFLA_BRIDGE_FLAGS,
+ IFLA_BRIDGE_MODE,
+ IFLA_BRIDGE_VLAN_INFO,
+ IFLA_BRIDGE_VLAN_TUNNEL_INFO,
+ IFLA_BRIDGE_MRP,
+ __IFLA_BRIDGE_MAX,
+};
+#define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1)
+
+#define BRIDGE_VLAN_INFO_MASTER (1<<0) /* Operate on Bridge device as well */
+#define BRIDGE_VLAN_INFO_PVID (1<<1) /* VLAN is PVID, ingress untagged */
+#define BRIDGE_VLAN_INFO_UNTAGGED (1<<2) /* VLAN egresses untagged */
+#define BRIDGE_VLAN_INFO_RANGE_BEGIN (1<<3) /* VLAN is start of vlan range */
+#define BRIDGE_VLAN_INFO_RANGE_END (1<<4) /* VLAN is end of vlan range */
+#define BRIDGE_VLAN_INFO_BRENTRY (1<<5) /* Global bridge VLAN entry */
+#define BRIDGE_VLAN_INFO_ONLY_OPTS (1<<6) /* Skip create/delete/flags */
+
+struct bridge_vlan_info {
+ __u16 flags;
+ __u16 vid;
+};
+
+enum {
+ IFLA_BRIDGE_VLAN_TUNNEL_UNSPEC,
+ IFLA_BRIDGE_VLAN_TUNNEL_ID,
+ IFLA_BRIDGE_VLAN_TUNNEL_VID,
+ IFLA_BRIDGE_VLAN_TUNNEL_FLAGS,
+ __IFLA_BRIDGE_VLAN_TUNNEL_MAX,
+};
+
+#define IFLA_BRIDGE_VLAN_TUNNEL_MAX (__IFLA_BRIDGE_VLAN_TUNNEL_MAX - 1)
+
+struct bridge_vlan_xstats {
+ __u64 rx_bytes;
+ __u64 rx_packets;
+ __u64 tx_bytes;
+ __u64 tx_packets;
+ __u16 vid;
+ __u16 flags;
+ __u32 pad2;
+};
+
+enum {
+ IFLA_BRIDGE_MRP_UNSPEC,
+ IFLA_BRIDGE_MRP_INSTANCE,
+ IFLA_BRIDGE_MRP_PORT_STATE,
+ IFLA_BRIDGE_MRP_PORT_ROLE,
+ IFLA_BRIDGE_MRP_RING_STATE,
+ IFLA_BRIDGE_MRP_RING_ROLE,
+ IFLA_BRIDGE_MRP_START_TEST,
+ IFLA_BRIDGE_MRP_INFO,
+ IFLA_BRIDGE_MRP_IN_ROLE,
+ IFLA_BRIDGE_MRP_IN_STATE,
+ IFLA_BRIDGE_MRP_START_IN_TEST,
+ __IFLA_BRIDGE_MRP_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_MAX (__IFLA_BRIDGE_MRP_MAX - 1)
+
+enum {
+ IFLA_BRIDGE_MRP_INSTANCE_UNSPEC,
+ IFLA_BRIDGE_MRP_INSTANCE_RING_ID,
+ IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX,
+ IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX,
+ IFLA_BRIDGE_MRP_INSTANCE_PRIO,
+ __IFLA_BRIDGE_MRP_INSTANCE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_INSTANCE_MAX (__IFLA_BRIDGE_MRP_INSTANCE_MAX - 1)
+
+enum {
+ IFLA_BRIDGE_MRP_PORT_STATE_UNSPEC,
+ IFLA_BRIDGE_MRP_PORT_STATE_STATE,
+ __IFLA_BRIDGE_MRP_PORT_STATE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_PORT_STATE_MAX (__IFLA_BRIDGE_MRP_PORT_STATE_MAX - 1)
+
+enum {
+ IFLA_BRIDGE_MRP_PORT_ROLE_UNSPEC,
+ IFLA_BRIDGE_MRP_PORT_ROLE_ROLE,
+ __IFLA_BRIDGE_MRP_PORT_ROLE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_PORT_ROLE_MAX (__IFLA_BRIDGE_MRP_PORT_ROLE_MAX - 1)
+
+enum {
+ IFLA_BRIDGE_MRP_RING_STATE_UNSPEC,
+ IFLA_BRIDGE_MRP_RING_STATE_RING_ID,
+ IFLA_BRIDGE_MRP_RING_STATE_STATE,
+ __IFLA_BRIDGE_MRP_RING_STATE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_RING_STATE_MAX (__IFLA_BRIDGE_MRP_RING_STATE_MAX - 1)
+
+enum {
+ IFLA_BRIDGE_MRP_RING_ROLE_UNSPEC,
+ IFLA_BRIDGE_MRP_RING_ROLE_RING_ID,
+ IFLA_BRIDGE_MRP_RING_ROLE_ROLE,
+ __IFLA_BRIDGE_MRP_RING_ROLE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_RING_ROLE_MAX (__IFLA_BRIDGE_MRP_RING_ROLE_MAX - 1)
+
+enum {
+ IFLA_BRIDGE_MRP_START_TEST_UNSPEC,
+ IFLA_BRIDGE_MRP_START_TEST_RING_ID,
+ IFLA_BRIDGE_MRP_START_TEST_INTERVAL,
+ IFLA_BRIDGE_MRP_START_TEST_MAX_MISS,
+ IFLA_BRIDGE_MRP_START_TEST_PERIOD,
+ IFLA_BRIDGE_MRP_START_TEST_MONITOR,
+ __IFLA_BRIDGE_MRP_START_TEST_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_START_TEST_MAX (__IFLA_BRIDGE_MRP_START_TEST_MAX - 1)
+
+enum {
+ IFLA_BRIDGE_MRP_INFO_UNSPEC,
+ IFLA_BRIDGE_MRP_INFO_RING_ID,
+ IFLA_BRIDGE_MRP_INFO_P_IFINDEX,
+ IFLA_BRIDGE_MRP_INFO_S_IFINDEX,
+ IFLA_BRIDGE_MRP_INFO_PRIO,
+ IFLA_BRIDGE_MRP_INFO_RING_STATE,
+ IFLA_BRIDGE_MRP_INFO_RING_ROLE,
+ IFLA_BRIDGE_MRP_INFO_TEST_INTERVAL,
+ IFLA_BRIDGE_MRP_INFO_TEST_MAX_MISS,
+ IFLA_BRIDGE_MRP_INFO_TEST_MONITOR,
+ IFLA_BRIDGE_MRP_INFO_I_IFINDEX,
+ IFLA_BRIDGE_MRP_INFO_IN_STATE,
+ IFLA_BRIDGE_MRP_INFO_IN_ROLE,
+ IFLA_BRIDGE_MRP_INFO_IN_TEST_INTERVAL,
+ IFLA_BRIDGE_MRP_INFO_IN_TEST_MAX_MISS,
+ __IFLA_BRIDGE_MRP_INFO_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_INFO_MAX (__IFLA_BRIDGE_MRP_INFO_MAX - 1)
+
+enum {
+ IFLA_BRIDGE_MRP_IN_STATE_UNSPEC,
+ IFLA_BRIDGE_MRP_IN_STATE_IN_ID,
+ IFLA_BRIDGE_MRP_IN_STATE_STATE,
+ __IFLA_BRIDGE_MRP_IN_STATE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_IN_STATE_MAX (__IFLA_BRIDGE_MRP_IN_STATE_MAX - 1)
+
+enum {
+ IFLA_BRIDGE_MRP_IN_ROLE_UNSPEC,
+ IFLA_BRIDGE_MRP_IN_ROLE_RING_ID,
+ IFLA_BRIDGE_MRP_IN_ROLE_IN_ID,
+ IFLA_BRIDGE_MRP_IN_ROLE_ROLE,
+ IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX,
+ __IFLA_BRIDGE_MRP_IN_ROLE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_IN_ROLE_MAX (__IFLA_BRIDGE_MRP_IN_ROLE_MAX - 1)
+
+enum {
+ IFLA_BRIDGE_MRP_START_IN_TEST_UNSPEC,
+ IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID,
+ IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL,
+ IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS,
+ IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD,
+ __IFLA_BRIDGE_MRP_START_IN_TEST_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_START_IN_TEST_MAX (__IFLA_BRIDGE_MRP_START_IN_TEST_MAX - 1)
+
+struct br_mrp_instance {
+ __u32 ring_id;
+ __u32 p_ifindex;
+ __u32 s_ifindex;
+ __u16 prio;
+};
+
+struct br_mrp_ring_state {
+ __u32 ring_id;
+ __u32 ring_state;
+};
+
+struct br_mrp_ring_role {
+ __u32 ring_id;
+ __u32 ring_role;
+};
+
+struct br_mrp_start_test {
+ __u32 ring_id;
+ __u32 interval;
+ __u32 max_miss;
+ __u32 period;
+ __u32 monitor;
+};
+
+struct br_mrp_in_state {
+ __u32 in_state;
+ __u16 in_id;
+};
+
+struct br_mrp_in_role {
+ __u32 ring_id;
+ __u32 in_role;
+ __u32 i_ifindex;
+ __u16 in_id;
+};
+
+struct br_mrp_start_in_test {
+ __u32 interval;
+ __u32 max_miss;
+ __u32 period;
+ __u16 in_id;
+};
+
+struct bridge_stp_xstats {
+ __u64 transition_blk;
+ __u64 transition_fwd;
+ __u64 rx_bpdu;
+ __u64 tx_bpdu;
+ __u64 rx_tcn;
+ __u64 tx_tcn;
+};
+
+/* Bridge vlan RTM header */
+struct br_vlan_msg {
+ __u8 family;
+ __u8 reserved1;
+ __u16 reserved2;
+ __u32 ifindex;
+};
+
+enum {
+ BRIDGE_VLANDB_DUMP_UNSPEC,
+ BRIDGE_VLANDB_DUMP_FLAGS,
+ __BRIDGE_VLANDB_DUMP_MAX,
+};
+#define BRIDGE_VLANDB_DUMP_MAX (__BRIDGE_VLANDB_DUMP_MAX - 1)
+
+/* flags used in BRIDGE_VLANDB_DUMP_FLAGS attribute to affect dumps */
+#define BRIDGE_VLANDB_DUMPF_STATS (1 << 0) /* Include stats in the dump */
+
+/* Bridge vlan RTM attributes
+ * [BRIDGE_VLANDB_ENTRY] = {
+ * [BRIDGE_VLANDB_ENTRY_INFO]
+ * ...
+ * }
+ */
+enum {
+ BRIDGE_VLANDB_UNSPEC,
+ BRIDGE_VLANDB_ENTRY,
+ __BRIDGE_VLANDB_MAX,
+};
+#define BRIDGE_VLANDB_MAX (__BRIDGE_VLANDB_MAX - 1)
+
+enum {
+ BRIDGE_VLANDB_ENTRY_UNSPEC,
+ BRIDGE_VLANDB_ENTRY_INFO,
+ BRIDGE_VLANDB_ENTRY_RANGE,
+ BRIDGE_VLANDB_ENTRY_STATE,
+ BRIDGE_VLANDB_ENTRY_TUNNEL_INFO,
+ BRIDGE_VLANDB_ENTRY_STATS,
+ __BRIDGE_VLANDB_ENTRY_MAX,
+};
+#define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1)
+
+/* [BRIDGE_VLANDB_ENTRY] = {
+ * [BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = {
+ * [BRIDGE_VLANDB_TINFO_ID]
+ * ...
+ * }
+ * }
+ */
+enum {
+ BRIDGE_VLANDB_TINFO_UNSPEC,
+ BRIDGE_VLANDB_TINFO_ID,
+ BRIDGE_VLANDB_TINFO_CMD,
+ __BRIDGE_VLANDB_TINFO_MAX,
+};
+#define BRIDGE_VLANDB_TINFO_MAX (__BRIDGE_VLANDB_TINFO_MAX - 1)
+
+/* [BRIDGE_VLANDB_ENTRY] = {
+ * [BRIDGE_VLANDB_ENTRY_STATS] = {
+ * [BRIDGE_VLANDB_STATS_RX_BYTES]
+ * ...
+ * }
+ * ...
+ * }
+ */
+enum {
+ BRIDGE_VLANDB_STATS_UNSPEC,
+ BRIDGE_VLANDB_STATS_RX_BYTES,
+ BRIDGE_VLANDB_STATS_RX_PACKETS,
+ BRIDGE_VLANDB_STATS_TX_BYTES,
+ BRIDGE_VLANDB_STATS_TX_PACKETS,
+ BRIDGE_VLANDB_STATS_PAD,
+ __BRIDGE_VLANDB_STATS_MAX,
+};
+#define BRIDGE_VLANDB_STATS_MAX (__BRIDGE_VLANDB_STATS_MAX - 1)
+
+/* Bridge multicast database attributes
+ * [MDBA_MDB] = {
+ * [MDBA_MDB_ENTRY] = {
+ * [MDBA_MDB_ENTRY_INFO] {
+ * struct br_mdb_entry
+ * [MDBA_MDB_EATTR attributes]
+ * }
+ * }
+ * }
+ * [MDBA_ROUTER] = {
+ * [MDBA_ROUTER_PORT] = {
+ * u32 ifindex
+ * [MDBA_ROUTER_PATTR attributes]
+ * }
+ * }
+ */
+enum {
+ MDBA_UNSPEC,
+ MDBA_MDB,
+ MDBA_ROUTER,
+ __MDBA_MAX,
+};
+#define MDBA_MAX (__MDBA_MAX - 1)
+
+enum {
+ MDBA_MDB_UNSPEC,
+ MDBA_MDB_ENTRY,
+ __MDBA_MDB_MAX,
+};
+#define MDBA_MDB_MAX (__MDBA_MDB_MAX - 1)
+
+enum {
+ MDBA_MDB_ENTRY_UNSPEC,
+ MDBA_MDB_ENTRY_INFO,
+ __MDBA_MDB_ENTRY_MAX,
+};
+#define MDBA_MDB_ENTRY_MAX (__MDBA_MDB_ENTRY_MAX - 1)
+
+/* per mdb entry additional attributes */
+enum {
+ MDBA_MDB_EATTR_UNSPEC,
+ MDBA_MDB_EATTR_TIMER,
+ __MDBA_MDB_EATTR_MAX
+};
+#define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1)
+
+/* multicast router types */
+enum {
+ MDB_RTR_TYPE_DISABLED,
+ MDB_RTR_TYPE_TEMP_QUERY,
+ MDB_RTR_TYPE_PERM,
+ MDB_RTR_TYPE_TEMP
+};
+
+enum {
+ MDBA_ROUTER_UNSPEC,
+ MDBA_ROUTER_PORT,
+ __MDBA_ROUTER_MAX,
+};
+#define MDBA_ROUTER_MAX (__MDBA_ROUTER_MAX - 1)
+
+/* router port attributes */
+enum {
+ MDBA_ROUTER_PATTR_UNSPEC,
+ MDBA_ROUTER_PATTR_TIMER,
+ MDBA_ROUTER_PATTR_TYPE,
+ __MDBA_ROUTER_PATTR_MAX
+};
+#define MDBA_ROUTER_PATTR_MAX (__MDBA_ROUTER_PATTR_MAX - 1)
+
+struct br_port_msg {
+ __u8 family;
+ __u32 ifindex;
+};
+
+struct br_mdb_entry {
+ __u32 ifindex;
+#define MDB_TEMPORARY 0
+#define MDB_PERMANENT 1
+ __u8 state;
+#define MDB_FLAGS_OFFLOAD (1 << 0)
+#define MDB_FLAGS_FAST_LEAVE (1 << 1)
+ __u8 flags;
+ __u16 vid;
+ struct {
+ union {
+ __be32 ip4;
+ struct in6_addr ip6;
+ } u;
+ __be16 proto;
+ } addr;
+};
+
+enum {
+ MDBA_SET_ENTRY_UNSPEC,
+ MDBA_SET_ENTRY,
+ __MDBA_SET_ENTRY_MAX,
+};
+#define MDBA_SET_ENTRY_MAX (__MDBA_SET_ENTRY_MAX - 1)
+
+/* Embedded inside LINK_XSTATS_TYPE_BRIDGE */
+enum {
+ BRIDGE_XSTATS_UNSPEC,
+ BRIDGE_XSTATS_VLAN,
+ BRIDGE_XSTATS_MCAST,
+ BRIDGE_XSTATS_PAD,
+ BRIDGE_XSTATS_STP,
+ __BRIDGE_XSTATS_MAX
+};
+#define BRIDGE_XSTATS_MAX (__BRIDGE_XSTATS_MAX - 1)
+
+enum {
+ BR_MCAST_DIR_RX,
+ BR_MCAST_DIR_TX,
+ BR_MCAST_DIR_SIZE
+};
+
+/* IGMP/MLD statistics */
+struct br_mcast_stats {
+ __u64 igmp_v1queries[BR_MCAST_DIR_SIZE];
+ __u64 igmp_v2queries[BR_MCAST_DIR_SIZE];
+ __u64 igmp_v3queries[BR_MCAST_DIR_SIZE];
+ __u64 igmp_leaves[BR_MCAST_DIR_SIZE];
+ __u64 igmp_v1reports[BR_MCAST_DIR_SIZE];
+ __u64 igmp_v2reports[BR_MCAST_DIR_SIZE];
+ __u64 igmp_v3reports[BR_MCAST_DIR_SIZE];
+ __u64 igmp_parse_errors;
+
+ __u64 mld_v1queries[BR_MCAST_DIR_SIZE];
+ __u64 mld_v2queries[BR_MCAST_DIR_SIZE];
+ __u64 mld_leaves[BR_MCAST_DIR_SIZE];
+ __u64 mld_v1reports[BR_MCAST_DIR_SIZE];
+ __u64 mld_v2reports[BR_MCAST_DIR_SIZE];
+ __u64 mld_parse_errors;
+
+ __u64 mcast_bytes[BR_MCAST_DIR_SIZE];
+ __u64 mcast_packets[BR_MCAST_DIR_SIZE];
+};
+
+/* bridge boolean options
+ * BR_BOOLOPT_NO_LL_LEARN - disable learning from link-local packets
+ *
+ * IMPORTANT: if adding a new option do not forget to handle
+ * it in br_boolopt_toggle/get and bridge sysfs
+ */
+enum br_boolopt_id {
+ BR_BOOLOPT_NO_LL_LEARN,
+ BR_BOOLOPT_MAX
+};
+
+/* struct br_boolopt_multi - change multiple bridge boolean options
+ *
+ * @optval: new option values (bit per option)
+ * @optmask: options to change (bit per option)
+ */
+struct br_boolopt_multi {
+ __u32 optval;
+ __u32 optmask;
+};
+#endif /* _UAPI_LINUX_IF_BRIDGE_H */
diff --git a/src/basic/linux/if_ether.h b/src/basic/linux/if_ether.h
new file mode 100644
index 0000000..d6de2b1
--- /dev/null
+++ b/src/basic/linux/if_ether.h
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Global definitions for the Ethernet IEEE 802.3 interface.
+ *
+ * Version: @(#)if_ether.h 1.0.1a 02/08/94
+ *
+ * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ * Donald Becker, <becker@super.org>
+ * Alan Cox, <alan@lxorguk.ukuu.org.uk>
+ * Steve Whitehouse, <gw7rrm@eeshack3.swan.ac.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_IF_ETHER_H
+#define _UAPI_LINUX_IF_ETHER_H
+
+#include <linux/types.h>
+
+/*
+ * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble
+ * and FCS/CRC (frame check sequence).
+ */
+
+#define ETH_ALEN 6 /* Octets in one ethernet addr */
+#define ETH_TLEN 2 /* Octets in ethernet type field */
+#define ETH_HLEN 14 /* Total octets in header. */
+#define ETH_ZLEN 60 /* Min. octets in frame sans FCS */
+#define ETH_DATA_LEN 1500 /* Max. octets in payload */
+#define ETH_FRAME_LEN 1514 /* Max. octets in frame sans FCS */
+#define ETH_FCS_LEN 4 /* Octets in the FCS */
+
+#define ETH_MIN_MTU 68 /* Min IPv4 MTU per RFC791 */
+#define ETH_MAX_MTU 0xFFFFU /* 65535, same as IP_MAX_MTU */
+
+/*
+ * These are the defined Ethernet Protocol ID's.
+ */
+
+#define ETH_P_LOOP 0x0060 /* Ethernet Loopback packet */
+#define ETH_P_PUP 0x0200 /* Xerox PUP packet */
+#define ETH_P_PUPAT 0x0201 /* Xerox PUP Addr Trans packet */
+#define ETH_P_TSN 0x22F0 /* TSN (IEEE 1722) packet */
+#define ETH_P_ERSPAN2 0x22EB /* ERSPAN version 2 (type III) */
+#define ETH_P_IP 0x0800 /* Internet Protocol packet */
+#define ETH_P_X25 0x0805 /* CCITT X.25 */
+#define ETH_P_ARP 0x0806 /* Address Resolution packet */
+#define ETH_P_BPQ 0x08FF /* G8BPQ AX.25 Ethernet Packet [ NOT AN OFFICIALLY REGISTERED ID ] */
+#define ETH_P_IEEEPUP 0x0a00 /* Xerox IEEE802.3 PUP packet */
+#define ETH_P_IEEEPUPAT 0x0a01 /* Xerox IEEE802.3 PUP Addr Trans packet */
+#define ETH_P_BATMAN 0x4305 /* B.A.T.M.A.N.-Advanced packet [ NOT AN OFFICIALLY REGISTERED ID ] */
+#define ETH_P_DEC 0x6000 /* DEC Assigned proto */
+#define ETH_P_DNA_DL 0x6001 /* DEC DNA Dump/Load */
+#define ETH_P_DNA_RC 0x6002 /* DEC DNA Remote Console */
+#define ETH_P_DNA_RT 0x6003 /* DEC DNA Routing */
+#define ETH_P_LAT 0x6004 /* DEC LAT */
+#define ETH_P_DIAG 0x6005 /* DEC Diagnostics */
+#define ETH_P_CUST 0x6006 /* DEC Customer use */
+#define ETH_P_SCA 0x6007 /* DEC Systems Comms Arch */
+#define ETH_P_TEB 0x6558 /* Trans Ether Bridging */
+#define ETH_P_RARP 0x8035 /* Reverse Addr Res packet */
+#define ETH_P_ATALK 0x809B /* Appletalk DDP */
+#define ETH_P_AARP 0x80F3 /* Appletalk AARP */
+#define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */
+#define ETH_P_ERSPAN 0x88BE /* ERSPAN type II */
+#define ETH_P_IPX 0x8137 /* IPX over DIX */
+#define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */
+#define ETH_P_PAUSE 0x8808 /* IEEE Pause frames. See 802.3 31B */
+#define ETH_P_SLOW 0x8809 /* Slow Protocol. See 802.3ad 43B */
+#define ETH_P_WCCP 0x883E /* Web-cache coordination protocol
+ * defined in draft-wilson-wrec-wccp-v2-00.txt */
+#define ETH_P_MPLS_UC 0x8847 /* MPLS Unicast traffic */
+#define ETH_P_MPLS_MC 0x8848 /* MPLS Multicast traffic */
+#define ETH_P_ATMMPOA 0x884c /* MultiProtocol Over ATM */
+#define ETH_P_PPP_DISC 0x8863 /* PPPoE discovery messages */
+#define ETH_P_PPP_SES 0x8864 /* PPPoE session messages */
+#define ETH_P_LINK_CTL 0x886c /* HPNA, wlan link local tunnel */
+#define ETH_P_ATMFATE 0x8884 /* Frame-based ATM Transport
+ * over Ethernet
+ */
+#define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */
+#define ETH_P_AOE 0x88A2 /* ATA over Ethernet */
+#define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */
+#define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */
+#define ETH_P_PREAUTH 0x88C7 /* 802.11 Preauthentication */
+#define ETH_P_TIPC 0x88CA /* TIPC */
+#define ETH_P_LLDP 0x88CC /* Link Layer Discovery Protocol */
+#define ETH_P_MRP 0x88E3 /* Media Redundancy Protocol */
+#define ETH_P_MACSEC 0x88E5 /* 802.1ae MACsec */
+#define ETH_P_8021AH 0x88E7 /* 802.1ah Backbone Service Tag */
+#define ETH_P_MVRP 0x88F5 /* 802.1Q MVRP */
+#define ETH_P_1588 0x88F7 /* IEEE 1588 Timesync */
+#define ETH_P_NCSI 0x88F8 /* NCSI protocol */
+#define ETH_P_PRP 0x88FB /* IEC 62439-3 PRP/HSRv0 */
+#define ETH_P_FCOE 0x8906 /* Fibre Channel over Ethernet */
+#define ETH_P_IBOE 0x8915 /* Infiniband over Ethernet */
+#define ETH_P_TDLS 0x890D /* TDLS */
+#define ETH_P_FIP 0x8914 /* FCoE Initialization Protocol */
+#define ETH_P_80221 0x8917 /* IEEE 802.21 Media Independent Handover Protocol */
+#define ETH_P_HSR 0x892F /* IEC 62439-3 HSRv1 */
+#define ETH_P_NSH 0x894F /* Network Service Header */
+#define ETH_P_LOOPBACK 0x9000 /* Ethernet loopback packet, per IEEE 802.3 */
+#define ETH_P_QINQ1 0x9100 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
+#define ETH_P_QINQ2 0x9200 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
+#define ETH_P_QINQ3 0x9300 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
+#define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
+#define ETH_P_DSA_8021Q 0xDADB /* Fake VLAN Header for DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
+#define ETH_P_IFE 0xED3E /* ForCES inter-FE LFB type */
+#define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */
+
+#define ETH_P_802_3_MIN 0x0600 /* If the value in the ethernet type is less than this value
+ * then the frame is Ethernet II. Else it is 802.3 */
+
+/*
+ * Non DIX types. Won't clash for 1500 types.
+ */
+
+#define ETH_P_802_3 0x0001 /* Dummy type for 802.3 frames */
+#define ETH_P_AX25 0x0002 /* Dummy protocol id for AX.25 */
+#define ETH_P_ALL 0x0003 /* Every packet (be careful!!!) */
+#define ETH_P_802_2 0x0004 /* 802.2 frames */
+#define ETH_P_SNAP 0x0005 /* Internal only */
+#define ETH_P_DDCMP 0x0006 /* DEC DDCMP: Internal only */
+#define ETH_P_WAN_PPP 0x0007 /* Dummy type for WAN PPP frames*/
+#define ETH_P_PPP_MP 0x0008 /* Dummy type for PPP MP frames */
+#define ETH_P_LOCALTALK 0x0009 /* Localtalk pseudo type */
+#define ETH_P_CAN 0x000C /* CAN: Controller Area Network */
+#define ETH_P_CANFD 0x000D /* CANFD: CAN flexible data rate*/
+#define ETH_P_PPPTALK 0x0010 /* Dummy type for Atalk over PPP*/
+#define ETH_P_TR_802_2 0x0011 /* 802.2 frames */
+#define ETH_P_MOBITEX 0x0015 /* Mobitex (kaz@cafe.net) */
+#define ETH_P_CONTROL 0x0016 /* Card specific control frames */
+#define ETH_P_IRDA 0x0017 /* Linux-IrDA */
+#define ETH_P_ECONET 0x0018 /* Acorn Econet */
+#define ETH_P_HDLC 0x0019 /* HDLC frames */
+#define ETH_P_ARCNET 0x001A /* 1A for ArcNet :-) */
+#define ETH_P_DSA 0x001B /* Distributed Switch Arch. */
+#define ETH_P_TRAILER 0x001C /* Trailer switch tagging */
+#define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */
+#define ETH_P_IEEE802154 0x00F6 /* IEEE802.15.4 frame */
+#define ETH_P_CAIF 0x00F7 /* ST-Ericsson CAIF protocol */
+#define ETH_P_XDSA 0x00F8 /* Multiplexed DSA protocol */
+#define ETH_P_MAP 0x00F9 /* Qualcomm multiplexing and
+ * aggregation protocol
+ */
+
+/*
+ * This is an Ethernet frame header.
+ */
+
+/* allow libcs like musl to deactivate this, glibc does not implement this. */
+#ifndef __UAPI_DEF_ETHHDR
+#define __UAPI_DEF_ETHHDR 1
+#endif
+
+#if __UAPI_DEF_ETHHDR
+struct ethhdr {
+ unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
+ unsigned char h_source[ETH_ALEN]; /* source ether addr */
+ __be16 h_proto; /* packet type ID field */
+} __attribute__((packed));
+#endif
+
+
+#endif /* _UAPI_LINUX_IF_ETHER_H */
diff --git a/src/basic/linux/if_link.h b/src/basic/linux/if_link.h
new file mode 100644
index 0000000..7fba4de
--- /dev/null
+++ b/src/basic/linux/if_link.h
@@ -0,0 +1,1079 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_IF_LINK_H
+#define _UAPI_LINUX_IF_LINK_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+/* This struct should be in sync with struct rtnl_link_stats64 */
+struct rtnl_link_stats {
+ __u32 rx_packets; /* total packets received */
+ __u32 tx_packets; /* total packets transmitted */
+ __u32 rx_bytes; /* total bytes received */
+ __u32 tx_bytes; /* total bytes transmitted */
+ __u32 rx_errors; /* bad packets received */
+ __u32 tx_errors; /* packet transmit problems */
+ __u32 rx_dropped; /* no space in linux buffers */
+ __u32 tx_dropped; /* no space available in linux */
+ __u32 multicast; /* multicast packets received */
+ __u32 collisions;
+
+ /* detailed rx_errors: */
+ __u32 rx_length_errors;
+ __u32 rx_over_errors; /* receiver ring buff overflow */
+ __u32 rx_crc_errors; /* recved pkt with crc error */
+ __u32 rx_frame_errors; /* recv'd frame alignment error */
+ __u32 rx_fifo_errors; /* recv'r fifo overrun */
+ __u32 rx_missed_errors; /* receiver missed packet */
+
+ /* detailed tx_errors */
+ __u32 tx_aborted_errors;
+ __u32 tx_carrier_errors;
+ __u32 tx_fifo_errors;
+ __u32 tx_heartbeat_errors;
+ __u32 tx_window_errors;
+
+ /* for cslip etc */
+ __u32 rx_compressed;
+ __u32 tx_compressed;
+
+ __u32 rx_nohandler; /* dropped, no handler found */
+};
+
+/* The main device statistics structure */
+struct rtnl_link_stats64 {
+ __u64 rx_packets; /* total packets received */
+ __u64 tx_packets; /* total packets transmitted */
+ __u64 rx_bytes; /* total bytes received */
+ __u64 tx_bytes; /* total bytes transmitted */
+ __u64 rx_errors; /* bad packets received */
+ __u64 tx_errors; /* packet transmit problems */
+ __u64 rx_dropped; /* no space in linux buffers */
+ __u64 tx_dropped; /* no space available in linux */
+ __u64 multicast; /* multicast packets received */
+ __u64 collisions;
+
+ /* detailed rx_errors: */
+ __u64 rx_length_errors;
+ __u64 rx_over_errors; /* receiver ring buff overflow */
+ __u64 rx_crc_errors; /* recved pkt with crc error */
+ __u64 rx_frame_errors; /* recv'd frame alignment error */
+ __u64 rx_fifo_errors; /* recv'r fifo overrun */
+ __u64 rx_missed_errors; /* receiver missed packet */
+
+ /* detailed tx_errors */
+ __u64 tx_aborted_errors;
+ __u64 tx_carrier_errors;
+ __u64 tx_fifo_errors;
+ __u64 tx_heartbeat_errors;
+ __u64 tx_window_errors;
+
+ /* for cslip etc */
+ __u64 rx_compressed;
+ __u64 tx_compressed;
+
+ __u64 rx_nohandler; /* dropped, no handler found */
+};
+
+/* The struct should be in sync with struct ifmap */
+struct rtnl_link_ifmap {
+ __u64 mem_start;
+ __u64 mem_end;
+ __u64 base_addr;
+ __u16 irq;
+ __u8 dma;
+ __u8 port;
+};
+
+/*
+ * IFLA_AF_SPEC
+ * Contains nested attributes for address family specific attributes.
+ * Each address family may create a attribute with the address family
+ * number as type and create its own attribute structure in it.
+ *
+ * Example:
+ * [IFLA_AF_SPEC] = {
+ * [AF_INET] = {
+ * [IFLA_INET_CONF] = ...,
+ * },
+ * [AF_INET6] = {
+ * [IFLA_INET6_FLAGS] = ...,
+ * [IFLA_INET6_CONF] = ...,
+ * }
+ * }
+ */
+
+enum {
+ IFLA_UNSPEC,
+ IFLA_ADDRESS,
+ IFLA_BROADCAST,
+ IFLA_IFNAME,
+ IFLA_MTU,
+ IFLA_LINK,
+ IFLA_QDISC,
+ IFLA_STATS,
+ IFLA_COST,
+#define IFLA_COST IFLA_COST
+ IFLA_PRIORITY,
+#define IFLA_PRIORITY IFLA_PRIORITY
+ IFLA_MASTER,
+#define IFLA_MASTER IFLA_MASTER
+ IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */
+#define IFLA_WIRELESS IFLA_WIRELESS
+ IFLA_PROTINFO, /* Protocol specific information for a link */
+#define IFLA_PROTINFO IFLA_PROTINFO
+ IFLA_TXQLEN,
+#define IFLA_TXQLEN IFLA_TXQLEN
+ IFLA_MAP,
+#define IFLA_MAP IFLA_MAP
+ IFLA_WEIGHT,
+#define IFLA_WEIGHT IFLA_WEIGHT
+ IFLA_OPERSTATE,
+ IFLA_LINKMODE,
+ IFLA_LINKINFO,
+#define IFLA_LINKINFO IFLA_LINKINFO
+ IFLA_NET_NS_PID,
+ IFLA_IFALIAS,
+ IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */
+ IFLA_VFINFO_LIST,
+ IFLA_STATS64,
+ IFLA_VF_PORTS,
+ IFLA_PORT_SELF,
+ IFLA_AF_SPEC,
+ IFLA_GROUP, /* Group the device belongs to */
+ IFLA_NET_NS_FD,
+ IFLA_EXT_MASK, /* Extended info mask, VFs, etc */
+ IFLA_PROMISCUITY, /* Promiscuity count: > 0 means acts PROMISC */
+#define IFLA_PROMISCUITY IFLA_PROMISCUITY
+ IFLA_NUM_TX_QUEUES,
+ IFLA_NUM_RX_QUEUES,
+ IFLA_CARRIER,
+ IFLA_PHYS_PORT_ID,
+ IFLA_CARRIER_CHANGES,
+ IFLA_PHYS_SWITCH_ID,
+ IFLA_LINK_NETNSID,
+ IFLA_PHYS_PORT_NAME,
+ IFLA_PROTO_DOWN,
+ IFLA_GSO_MAX_SEGS,
+ IFLA_GSO_MAX_SIZE,
+ IFLA_PAD,
+ IFLA_XDP,
+ IFLA_EVENT,
+ IFLA_NEW_NETNSID,
+ IFLA_IF_NETNSID,
+ IFLA_TARGET_NETNSID = IFLA_IF_NETNSID, /* new alias */
+ IFLA_CARRIER_UP_COUNT,
+ IFLA_CARRIER_DOWN_COUNT,
+ IFLA_NEW_IFINDEX,
+ IFLA_MIN_MTU,
+ IFLA_MAX_MTU,
+ IFLA_PROP_LIST,
+ IFLA_ALT_IFNAME, /* Alternative ifname */
+ IFLA_PERM_ADDRESS,
+ IFLA_PROTO_DOWN_REASON,
+ __IFLA_MAX
+};
+
+
+#define IFLA_MAX (__IFLA_MAX - 1)
+
+enum {
+ IFLA_PROTO_DOWN_REASON_UNSPEC,
+ IFLA_PROTO_DOWN_REASON_MASK, /* u32, mask for reason bits */
+ IFLA_PROTO_DOWN_REASON_VALUE, /* u32, reason bit value */
+
+ __IFLA_PROTO_DOWN_REASON_CNT,
+ IFLA_PROTO_DOWN_REASON_MAX = __IFLA_PROTO_DOWN_REASON_CNT - 1
+};
+
+/* backwards compatibility for userspace */
+#ifndef __KERNEL__
+#define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg))))
+#define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg))
+#endif
+
+enum {
+ IFLA_INET_UNSPEC,
+ IFLA_INET_CONF,
+ __IFLA_INET_MAX,
+};
+
+#define IFLA_INET_MAX (__IFLA_INET_MAX - 1)
+
+/* ifi_flags.
+
+ IFF_* flags.
+
+ The only change is:
+ IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are
+ more not changeable by user. They describe link media
+ characteristics and set by device driver.
+
+ Comments:
+ - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid
+ - If neither of these three flags are set;
+ the interface is NBMA.
+
+ - IFF_MULTICAST does not mean anything special:
+ multicasts can be used on all not-NBMA links.
+ IFF_MULTICAST means that this media uses special encapsulation
+ for multicast frames. Apparently, all IFF_POINTOPOINT and
+ IFF_BROADCAST devices are able to use multicasts too.
+ */
+
+/* IFLA_LINK.
+ For usual devices it is equal ifi_index.
+ If it is a "virtual interface" (f.e. tunnel), ifi_link
+ can point to real physical interface (f.e. for bandwidth calculations),
+ or maybe 0, what means, that real media is unknown (usual
+ for IPIP tunnels, when route to endpoint is allowed to change)
+ */
+
+/* Subtype attributes for IFLA_PROTINFO */
+enum {
+ IFLA_INET6_UNSPEC,
+ IFLA_INET6_FLAGS, /* link flags */
+ IFLA_INET6_CONF, /* sysctl parameters */
+ IFLA_INET6_STATS, /* statistics */
+ IFLA_INET6_MCAST, /* MC things. What of them? */
+ IFLA_INET6_CACHEINFO, /* time values and max reasm size */
+ IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */
+ IFLA_INET6_TOKEN, /* device token */
+ IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */
+ __IFLA_INET6_MAX
+};
+
+#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1)
+
+enum in6_addr_gen_mode {
+ IN6_ADDR_GEN_MODE_EUI64,
+ IN6_ADDR_GEN_MODE_NONE,
+ IN6_ADDR_GEN_MODE_STABLE_PRIVACY,
+ IN6_ADDR_GEN_MODE_RANDOM,
+};
+
+/* Bridge section */
+
+enum {
+ IFLA_BR_UNSPEC,
+ IFLA_BR_FORWARD_DELAY,
+ IFLA_BR_HELLO_TIME,
+ IFLA_BR_MAX_AGE,
+ IFLA_BR_AGEING_TIME,
+ IFLA_BR_STP_STATE,
+ IFLA_BR_PRIORITY,
+ IFLA_BR_VLAN_FILTERING,
+ IFLA_BR_VLAN_PROTOCOL,
+ IFLA_BR_GROUP_FWD_MASK,
+ IFLA_BR_ROOT_ID,
+ IFLA_BR_BRIDGE_ID,
+ IFLA_BR_ROOT_PORT,
+ IFLA_BR_ROOT_PATH_COST,
+ IFLA_BR_TOPOLOGY_CHANGE,
+ IFLA_BR_TOPOLOGY_CHANGE_DETECTED,
+ IFLA_BR_HELLO_TIMER,
+ IFLA_BR_TCN_TIMER,
+ IFLA_BR_TOPOLOGY_CHANGE_TIMER,
+ IFLA_BR_GC_TIMER,
+ IFLA_BR_GROUP_ADDR,
+ IFLA_BR_FDB_FLUSH,
+ IFLA_BR_MCAST_ROUTER,
+ IFLA_BR_MCAST_SNOOPING,
+ IFLA_BR_MCAST_QUERY_USE_IFADDR,
+ IFLA_BR_MCAST_QUERIER,
+ IFLA_BR_MCAST_HASH_ELASTICITY,
+ IFLA_BR_MCAST_HASH_MAX,
+ IFLA_BR_MCAST_LAST_MEMBER_CNT,
+ IFLA_BR_MCAST_STARTUP_QUERY_CNT,
+ IFLA_BR_MCAST_LAST_MEMBER_INTVL,
+ IFLA_BR_MCAST_MEMBERSHIP_INTVL,
+ IFLA_BR_MCAST_QUERIER_INTVL,
+ IFLA_BR_MCAST_QUERY_INTVL,
+ IFLA_BR_MCAST_QUERY_RESPONSE_INTVL,
+ IFLA_BR_MCAST_STARTUP_QUERY_INTVL,
+ IFLA_BR_NF_CALL_IPTABLES,
+ IFLA_BR_NF_CALL_IP6TABLES,
+ IFLA_BR_NF_CALL_ARPTABLES,
+ IFLA_BR_VLAN_DEFAULT_PVID,
+ IFLA_BR_PAD,
+ IFLA_BR_VLAN_STATS_ENABLED,
+ IFLA_BR_MCAST_STATS_ENABLED,
+ IFLA_BR_MCAST_IGMP_VERSION,
+ IFLA_BR_MCAST_MLD_VERSION,
+ IFLA_BR_VLAN_STATS_PER_PORT,
+ IFLA_BR_MULTI_BOOLOPT,
+ __IFLA_BR_MAX,
+};
+
+#define IFLA_BR_MAX (__IFLA_BR_MAX - 1)
+
+struct ifla_bridge_id {
+ __u8 prio[2];
+ __u8 addr[6]; /* ETH_ALEN */
+};
+
+enum {
+ BRIDGE_MODE_UNSPEC,
+ BRIDGE_MODE_HAIRPIN,
+};
+
+enum {
+ IFLA_BRPORT_UNSPEC,
+ IFLA_BRPORT_STATE, /* Spanning tree state */
+ IFLA_BRPORT_PRIORITY, /* " priority */
+ IFLA_BRPORT_COST, /* " cost */
+ IFLA_BRPORT_MODE, /* mode (hairpin) */
+ IFLA_BRPORT_GUARD, /* bpdu guard */
+ IFLA_BRPORT_PROTECT, /* root port protection */
+ IFLA_BRPORT_FAST_LEAVE, /* multicast fast leave */
+ IFLA_BRPORT_LEARNING, /* mac learning */
+ IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */
+ IFLA_BRPORT_PROXYARP, /* proxy ARP */
+ IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */
+ IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */
+ IFLA_BRPORT_ROOT_ID, /* designated root */
+ IFLA_BRPORT_BRIDGE_ID, /* designated bridge */
+ IFLA_BRPORT_DESIGNATED_PORT,
+ IFLA_BRPORT_DESIGNATED_COST,
+ IFLA_BRPORT_ID,
+ IFLA_BRPORT_NO,
+ IFLA_BRPORT_TOPOLOGY_CHANGE_ACK,
+ IFLA_BRPORT_CONFIG_PENDING,
+ IFLA_BRPORT_MESSAGE_AGE_TIMER,
+ IFLA_BRPORT_FORWARD_DELAY_TIMER,
+ IFLA_BRPORT_HOLD_TIMER,
+ IFLA_BRPORT_FLUSH,
+ IFLA_BRPORT_MULTICAST_ROUTER,
+ IFLA_BRPORT_PAD,
+ IFLA_BRPORT_MCAST_FLOOD,
+ IFLA_BRPORT_MCAST_TO_UCAST,
+ IFLA_BRPORT_VLAN_TUNNEL,
+ IFLA_BRPORT_BCAST_FLOOD,
+ IFLA_BRPORT_GROUP_FWD_MASK,
+ IFLA_BRPORT_NEIGH_SUPPRESS,
+ IFLA_BRPORT_ISOLATED,
+ IFLA_BRPORT_BACKUP_PORT,
+ IFLA_BRPORT_MRP_RING_OPEN,
+ IFLA_BRPORT_MRP_IN_OPEN,
+ __IFLA_BRPORT_MAX
+};
+#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
+
+struct ifla_cacheinfo {
+ __u32 max_reasm_len;
+ __u32 tstamp; /* ipv6InterfaceTable updated timestamp */
+ __u32 reachable_time;
+ __u32 retrans_time;
+};
+
+enum {
+ IFLA_INFO_UNSPEC,
+ IFLA_INFO_KIND,
+ IFLA_INFO_DATA,
+ IFLA_INFO_XSTATS,
+ IFLA_INFO_SLAVE_KIND,
+ IFLA_INFO_SLAVE_DATA,
+ __IFLA_INFO_MAX,
+};
+
+#define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1)
+
+/* VLAN section */
+
+enum {
+ IFLA_VLAN_UNSPEC,
+ IFLA_VLAN_ID,
+ IFLA_VLAN_FLAGS,
+ IFLA_VLAN_EGRESS_QOS,
+ IFLA_VLAN_INGRESS_QOS,
+ IFLA_VLAN_PROTOCOL,
+ __IFLA_VLAN_MAX,
+};
+
+#define IFLA_VLAN_MAX (__IFLA_VLAN_MAX - 1)
+
+struct ifla_vlan_flags {
+ __u32 flags;
+ __u32 mask;
+};
+
+enum {
+ IFLA_VLAN_QOS_UNSPEC,
+ IFLA_VLAN_QOS_MAPPING,
+ __IFLA_VLAN_QOS_MAX
+};
+
+#define IFLA_VLAN_QOS_MAX (__IFLA_VLAN_QOS_MAX - 1)
+
+struct ifla_vlan_qos_mapping {
+ __u32 from;
+ __u32 to;
+};
+
+/* MACVLAN section */
+enum {
+ IFLA_MACVLAN_UNSPEC,
+ IFLA_MACVLAN_MODE,
+ IFLA_MACVLAN_FLAGS,
+ IFLA_MACVLAN_MACADDR_MODE,
+ IFLA_MACVLAN_MACADDR,
+ IFLA_MACVLAN_MACADDR_DATA,
+ IFLA_MACVLAN_MACADDR_COUNT,
+ __IFLA_MACVLAN_MAX,
+};
+
+#define IFLA_MACVLAN_MAX (__IFLA_MACVLAN_MAX - 1)
+
+enum macvlan_mode {
+ MACVLAN_MODE_PRIVATE = 1, /* don't talk to other macvlans */
+ MACVLAN_MODE_VEPA = 2, /* talk to other ports through ext bridge */
+ MACVLAN_MODE_BRIDGE = 4, /* talk to bridge ports directly */
+ MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */
+ MACVLAN_MODE_SOURCE = 16,/* use source MAC address list to assign */
+};
+
+enum macvlan_macaddr_mode {
+ MACVLAN_MACADDR_ADD,
+ MACVLAN_MACADDR_DEL,
+ MACVLAN_MACADDR_FLUSH,
+ MACVLAN_MACADDR_SET,
+};
+
+#define MACVLAN_FLAG_NOPROMISC 1
+
+/* VRF section */
+enum {
+ IFLA_VRF_UNSPEC,
+ IFLA_VRF_TABLE,
+ __IFLA_VRF_MAX
+};
+
+#define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1)
+
+enum {
+ IFLA_VRF_PORT_UNSPEC,
+ IFLA_VRF_PORT_TABLE,
+ __IFLA_VRF_PORT_MAX
+};
+
+#define IFLA_VRF_PORT_MAX (__IFLA_VRF_PORT_MAX - 1)
+
+/* MACSEC section */
+enum {
+ IFLA_MACSEC_UNSPEC,
+ IFLA_MACSEC_SCI,
+ IFLA_MACSEC_PORT,
+ IFLA_MACSEC_ICV_LEN,
+ IFLA_MACSEC_CIPHER_SUITE,
+ IFLA_MACSEC_WINDOW,
+ IFLA_MACSEC_ENCODING_SA,
+ IFLA_MACSEC_ENCRYPT,
+ IFLA_MACSEC_PROTECT,
+ IFLA_MACSEC_INC_SCI,
+ IFLA_MACSEC_ES,
+ IFLA_MACSEC_SCB,
+ IFLA_MACSEC_REPLAY_PROTECT,
+ IFLA_MACSEC_VALIDATION,
+ IFLA_MACSEC_PAD,
+ IFLA_MACSEC_OFFLOAD,
+ __IFLA_MACSEC_MAX,
+};
+
+#define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1)
+
+/* XFRM section */
+enum {
+ IFLA_XFRM_UNSPEC,
+ IFLA_XFRM_LINK,
+ IFLA_XFRM_IF_ID,
+ __IFLA_XFRM_MAX
+};
+
+#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1)
+
+enum macsec_validation_type {
+ MACSEC_VALIDATE_DISABLED = 0,
+ MACSEC_VALIDATE_CHECK = 1,
+ MACSEC_VALIDATE_STRICT = 2,
+ __MACSEC_VALIDATE_END,
+ MACSEC_VALIDATE_MAX = __MACSEC_VALIDATE_END - 1,
+};
+
+enum macsec_offload {
+ MACSEC_OFFLOAD_OFF = 0,
+ MACSEC_OFFLOAD_PHY = 1,
+ MACSEC_OFFLOAD_MAC = 2,
+ __MACSEC_OFFLOAD_END,
+ MACSEC_OFFLOAD_MAX = __MACSEC_OFFLOAD_END - 1,
+};
+
+/* IPVLAN section */
+enum {
+ IFLA_IPVLAN_UNSPEC,
+ IFLA_IPVLAN_MODE,
+ IFLA_IPVLAN_FLAGS,
+ __IFLA_IPVLAN_MAX
+};
+
+#define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1)
+
+enum ipvlan_mode {
+ IPVLAN_MODE_L2 = 0,
+ IPVLAN_MODE_L3,
+ IPVLAN_MODE_L3S,
+ IPVLAN_MODE_MAX
+};
+
+#define IPVLAN_F_PRIVATE 0x01
+#define IPVLAN_F_VEPA 0x02
+
+/* VXLAN section */
+enum {
+ IFLA_VXLAN_UNSPEC,
+ IFLA_VXLAN_ID,
+ IFLA_VXLAN_GROUP, /* group or remote address */
+ IFLA_VXLAN_LINK,
+ IFLA_VXLAN_LOCAL,
+ IFLA_VXLAN_TTL,
+ IFLA_VXLAN_TOS,
+ IFLA_VXLAN_LEARNING,
+ IFLA_VXLAN_AGEING,
+ IFLA_VXLAN_LIMIT,
+ IFLA_VXLAN_PORT_RANGE, /* source port */
+ IFLA_VXLAN_PROXY,
+ IFLA_VXLAN_RSC,
+ IFLA_VXLAN_L2MISS,
+ IFLA_VXLAN_L3MISS,
+ IFLA_VXLAN_PORT, /* destination port */
+ IFLA_VXLAN_GROUP6,
+ IFLA_VXLAN_LOCAL6,
+ IFLA_VXLAN_UDP_CSUM,
+ IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
+ IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
+ IFLA_VXLAN_REMCSUM_TX,
+ IFLA_VXLAN_REMCSUM_RX,
+ IFLA_VXLAN_GBP,
+ IFLA_VXLAN_REMCSUM_NOPARTIAL,
+ IFLA_VXLAN_COLLECT_METADATA,
+ IFLA_VXLAN_LABEL,
+ IFLA_VXLAN_GPE,
+ IFLA_VXLAN_TTL_INHERIT,
+ IFLA_VXLAN_DF,
+ __IFLA_VXLAN_MAX
+};
+#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
+
+struct ifla_vxlan_port_range {
+ __be16 low;
+ __be16 high;
+};
+
+enum ifla_vxlan_df {
+ VXLAN_DF_UNSET = 0,
+ VXLAN_DF_SET,
+ VXLAN_DF_INHERIT,
+ __VXLAN_DF_END,
+ VXLAN_DF_MAX = __VXLAN_DF_END - 1,
+};
+
+/* GENEVE section */
+enum {
+ IFLA_GENEVE_UNSPEC,
+ IFLA_GENEVE_ID,
+ IFLA_GENEVE_REMOTE,
+ IFLA_GENEVE_TTL,
+ IFLA_GENEVE_TOS,
+ IFLA_GENEVE_PORT, /* destination port */
+ IFLA_GENEVE_COLLECT_METADATA,
+ IFLA_GENEVE_REMOTE6,
+ IFLA_GENEVE_UDP_CSUM,
+ IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
+ IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
+ IFLA_GENEVE_LABEL,
+ IFLA_GENEVE_TTL_INHERIT,
+ IFLA_GENEVE_DF,
+ __IFLA_GENEVE_MAX
+};
+#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1)
+
+enum ifla_geneve_df {
+ GENEVE_DF_UNSET = 0,
+ GENEVE_DF_SET,
+ GENEVE_DF_INHERIT,
+ __GENEVE_DF_END,
+ GENEVE_DF_MAX = __GENEVE_DF_END - 1,
+};
+
+/* Bareudp section */
+enum {
+ IFLA_BAREUDP_UNSPEC,
+ IFLA_BAREUDP_PORT,
+ IFLA_BAREUDP_ETHERTYPE,
+ IFLA_BAREUDP_SRCPORT_MIN,
+ IFLA_BAREUDP_MULTIPROTO_MODE,
+ __IFLA_BAREUDP_MAX
+};
+
+#define IFLA_BAREUDP_MAX (__IFLA_BAREUDP_MAX - 1)
+
+/* PPP section */
+enum {
+ IFLA_PPP_UNSPEC,
+ IFLA_PPP_DEV_FD,
+ __IFLA_PPP_MAX
+};
+#define IFLA_PPP_MAX (__IFLA_PPP_MAX - 1)
+
+/* GTP section */
+
+enum ifla_gtp_role {
+ GTP_ROLE_GGSN = 0,
+ GTP_ROLE_SGSN,
+};
+
+enum {
+ IFLA_GTP_UNSPEC,
+ IFLA_GTP_FD0,
+ IFLA_GTP_FD1,
+ IFLA_GTP_PDP_HASHSIZE,
+ IFLA_GTP_ROLE,
+ __IFLA_GTP_MAX,
+};
+#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
+
+/* Bonding section */
+
+enum {
+ IFLA_BOND_UNSPEC,
+ IFLA_BOND_MODE,
+ IFLA_BOND_ACTIVE_SLAVE,
+ IFLA_BOND_MIIMON,
+ IFLA_BOND_UPDELAY,
+ IFLA_BOND_DOWNDELAY,
+ IFLA_BOND_USE_CARRIER,
+ IFLA_BOND_ARP_INTERVAL,
+ IFLA_BOND_ARP_IP_TARGET,
+ IFLA_BOND_ARP_VALIDATE,
+ IFLA_BOND_ARP_ALL_TARGETS,
+ IFLA_BOND_PRIMARY,
+ IFLA_BOND_PRIMARY_RESELECT,
+ IFLA_BOND_FAIL_OVER_MAC,
+ IFLA_BOND_XMIT_HASH_POLICY,
+ IFLA_BOND_RESEND_IGMP,
+ IFLA_BOND_NUM_PEER_NOTIF,
+ IFLA_BOND_ALL_SLAVES_ACTIVE,
+ IFLA_BOND_MIN_LINKS,
+ IFLA_BOND_LP_INTERVAL,
+ IFLA_BOND_PACKETS_PER_SLAVE,
+ IFLA_BOND_AD_LACP_RATE,
+ IFLA_BOND_AD_SELECT,
+ IFLA_BOND_AD_INFO,
+ IFLA_BOND_AD_ACTOR_SYS_PRIO,
+ IFLA_BOND_AD_USER_PORT_KEY,
+ IFLA_BOND_AD_ACTOR_SYSTEM,
+ IFLA_BOND_TLB_DYNAMIC_LB,
+ IFLA_BOND_PEER_NOTIF_DELAY,
+ __IFLA_BOND_MAX,
+};
+
+#define IFLA_BOND_MAX (__IFLA_BOND_MAX - 1)
+
+enum {
+ IFLA_BOND_AD_INFO_UNSPEC,
+ IFLA_BOND_AD_INFO_AGGREGATOR,
+ IFLA_BOND_AD_INFO_NUM_PORTS,
+ IFLA_BOND_AD_INFO_ACTOR_KEY,
+ IFLA_BOND_AD_INFO_PARTNER_KEY,
+ IFLA_BOND_AD_INFO_PARTNER_MAC,
+ __IFLA_BOND_AD_INFO_MAX,
+};
+
+#define IFLA_BOND_AD_INFO_MAX (__IFLA_BOND_AD_INFO_MAX - 1)
+
+enum {
+ IFLA_BOND_SLAVE_UNSPEC,
+ IFLA_BOND_SLAVE_STATE,
+ IFLA_BOND_SLAVE_MII_STATUS,
+ IFLA_BOND_SLAVE_LINK_FAILURE_COUNT,
+ IFLA_BOND_SLAVE_PERM_HWADDR,
+ IFLA_BOND_SLAVE_QUEUE_ID,
+ IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
+ IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
+ IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
+ __IFLA_BOND_SLAVE_MAX,
+};
+
+#define IFLA_BOND_SLAVE_MAX (__IFLA_BOND_SLAVE_MAX - 1)
+
+/* SR-IOV virtual function management section */
+
+enum {
+ IFLA_VF_INFO_UNSPEC,
+ IFLA_VF_INFO,
+ __IFLA_VF_INFO_MAX,
+};
+
+#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1)
+
+enum {
+ IFLA_VF_UNSPEC,
+ IFLA_VF_MAC, /* Hardware queue specific attributes */
+ IFLA_VF_VLAN, /* VLAN ID and QoS */
+ IFLA_VF_TX_RATE, /* Max TX Bandwidth Allocation */
+ IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */
+ IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */
+ IFLA_VF_RATE, /* Min and Max TX Bandwidth Allocation */
+ IFLA_VF_RSS_QUERY_EN, /* RSS Redirection Table and Hash Key query
+ * on/off switch
+ */
+ IFLA_VF_STATS, /* network device statistics */
+ IFLA_VF_TRUST, /* Trust VF */
+ IFLA_VF_IB_NODE_GUID, /* VF Infiniband node GUID */
+ IFLA_VF_IB_PORT_GUID, /* VF Infiniband port GUID */
+ IFLA_VF_VLAN_LIST, /* nested list of vlans, option for QinQ */
+ IFLA_VF_BROADCAST, /* VF broadcast */
+ __IFLA_VF_MAX,
+};
+
+#define IFLA_VF_MAX (__IFLA_VF_MAX - 1)
+
+struct ifla_vf_mac {
+ __u32 vf;
+ __u8 mac[32]; /* MAX_ADDR_LEN */
+};
+
+struct ifla_vf_broadcast {
+ __u8 broadcast[32];
+};
+
+struct ifla_vf_vlan {
+ __u32 vf;
+ __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
+ __u32 qos;
+};
+
+enum {
+ IFLA_VF_VLAN_INFO_UNSPEC,
+ IFLA_VF_VLAN_INFO, /* VLAN ID, QoS and VLAN protocol */
+ __IFLA_VF_VLAN_INFO_MAX,
+};
+
+#define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1)
+#define MAX_VLAN_LIST_LEN 1
+
+struct ifla_vf_vlan_info {
+ __u32 vf;
+ __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
+ __u32 qos;
+ __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */
+};
+
+struct ifla_vf_tx_rate {
+ __u32 vf;
+ __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
+};
+
+struct ifla_vf_rate {
+ __u32 vf;
+ __u32 min_tx_rate; /* Min Bandwidth in Mbps */
+ __u32 max_tx_rate; /* Max Bandwidth in Mbps */
+};
+
+struct ifla_vf_spoofchk {
+ __u32 vf;
+ __u32 setting;
+};
+
+struct ifla_vf_guid {
+ __u32 vf;
+ __u64 guid;
+};
+
+enum {
+ IFLA_VF_LINK_STATE_AUTO, /* link state of the uplink */
+ IFLA_VF_LINK_STATE_ENABLE, /* link always up */
+ IFLA_VF_LINK_STATE_DISABLE, /* link always down */
+ __IFLA_VF_LINK_STATE_MAX,
+};
+
+struct ifla_vf_link_state {
+ __u32 vf;
+ __u32 link_state;
+};
+
+struct ifla_vf_rss_query_en {
+ __u32 vf;
+ __u32 setting;
+};
+
+enum {
+ IFLA_VF_STATS_RX_PACKETS,
+ IFLA_VF_STATS_TX_PACKETS,
+ IFLA_VF_STATS_RX_BYTES,
+ IFLA_VF_STATS_TX_BYTES,
+ IFLA_VF_STATS_BROADCAST,
+ IFLA_VF_STATS_MULTICAST,
+ IFLA_VF_STATS_PAD,
+ IFLA_VF_STATS_RX_DROPPED,
+ IFLA_VF_STATS_TX_DROPPED,
+ __IFLA_VF_STATS_MAX,
+};
+
+#define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1)
+
+struct ifla_vf_trust {
+ __u32 vf;
+ __u32 setting;
+};
+
+/* VF ports management section
+ *
+ * Nested layout of set/get msg is:
+ *
+ * [IFLA_NUM_VF]
+ * [IFLA_VF_PORTS]
+ * [IFLA_VF_PORT]
+ * [IFLA_PORT_*], ...
+ * [IFLA_VF_PORT]
+ * [IFLA_PORT_*], ...
+ * ...
+ * [IFLA_PORT_SELF]
+ * [IFLA_PORT_*], ...
+ */
+
+enum {
+ IFLA_VF_PORT_UNSPEC,
+ IFLA_VF_PORT, /* nest */
+ __IFLA_VF_PORT_MAX,
+};
+
+#define IFLA_VF_PORT_MAX (__IFLA_VF_PORT_MAX - 1)
+
+enum {
+ IFLA_PORT_UNSPEC,
+ IFLA_PORT_VF, /* __u32 */
+ IFLA_PORT_PROFILE, /* string */
+ IFLA_PORT_VSI_TYPE, /* 802.1Qbg (pre-)standard VDP */
+ IFLA_PORT_INSTANCE_UUID, /* binary UUID */
+ IFLA_PORT_HOST_UUID, /* binary UUID */
+ IFLA_PORT_REQUEST, /* __u8 */
+ IFLA_PORT_RESPONSE, /* __u16, output only */
+ __IFLA_PORT_MAX,
+};
+
+#define IFLA_PORT_MAX (__IFLA_PORT_MAX - 1)
+
+#define PORT_PROFILE_MAX 40
+#define PORT_UUID_MAX 16
+#define PORT_SELF_VF -1
+
+enum {
+ PORT_REQUEST_PREASSOCIATE = 0,
+ PORT_REQUEST_PREASSOCIATE_RR,
+ PORT_REQUEST_ASSOCIATE,
+ PORT_REQUEST_DISASSOCIATE,
+};
+
+enum {
+ PORT_VDP_RESPONSE_SUCCESS = 0,
+ PORT_VDP_RESPONSE_INVALID_FORMAT,
+ PORT_VDP_RESPONSE_INSUFFICIENT_RESOURCES,
+ PORT_VDP_RESPONSE_UNUSED_VTID,
+ PORT_VDP_RESPONSE_VTID_VIOLATION,
+ PORT_VDP_RESPONSE_VTID_VERSION_VIOALTION,
+ PORT_VDP_RESPONSE_OUT_OF_SYNC,
+ /* 0x08-0xFF reserved for future VDP use */
+ PORT_PROFILE_RESPONSE_SUCCESS = 0x100,
+ PORT_PROFILE_RESPONSE_INPROGRESS,
+ PORT_PROFILE_RESPONSE_INVALID,
+ PORT_PROFILE_RESPONSE_BADSTATE,
+ PORT_PROFILE_RESPONSE_INSUFFICIENT_RESOURCES,
+ PORT_PROFILE_RESPONSE_ERROR,
+};
+
+struct ifla_port_vsi {
+ __u8 vsi_mgr_id;
+ __u8 vsi_type_id[3];
+ __u8 vsi_type_version;
+ __u8 pad[3];
+};
+
+
+/* IPoIB section */
+
+enum {
+ IFLA_IPOIB_UNSPEC,
+ IFLA_IPOIB_PKEY,
+ IFLA_IPOIB_MODE,
+ IFLA_IPOIB_UMCAST,
+ __IFLA_IPOIB_MAX
+};
+
+enum {
+ IPOIB_MODE_DATAGRAM = 0, /* using unreliable datagram QPs */
+ IPOIB_MODE_CONNECTED = 1, /* using connected QPs */
+};
+
+#define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1)
+
+
+/* HSR/PRP section, both uses same interface */
+
+/* Different redundancy protocols for hsr device */
+enum {
+ HSR_PROTOCOL_HSR,
+ HSR_PROTOCOL_PRP,
+ HSR_PROTOCOL_MAX,
+};
+
+enum {
+ IFLA_HSR_UNSPEC,
+ IFLA_HSR_SLAVE1,
+ IFLA_HSR_SLAVE2,
+ IFLA_HSR_MULTICAST_SPEC, /* Last byte of supervision addr */
+ IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */
+ IFLA_HSR_SEQ_NR,
+ IFLA_HSR_VERSION, /* HSR version */
+ IFLA_HSR_PROTOCOL, /* Indicate different protocol than
+ * HSR. For example PRP.
+ */
+ __IFLA_HSR_MAX,
+};
+
+#define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1)
+
+/* STATS section */
+
+struct if_stats_msg {
+ __u8 family;
+ __u8 pad1;
+ __u16 pad2;
+ __u32 ifindex;
+ __u32 filter_mask;
+};
+
+/* A stats attribute can be netdev specific or a global stat.
+ * For netdev stats, lets use the prefix IFLA_STATS_LINK_*
+ */
+enum {
+ IFLA_STATS_UNSPEC, /* also used as 64bit pad attribute */
+ IFLA_STATS_LINK_64,
+ IFLA_STATS_LINK_XSTATS,
+ IFLA_STATS_LINK_XSTATS_SLAVE,
+ IFLA_STATS_LINK_OFFLOAD_XSTATS,
+ IFLA_STATS_AF_SPEC,
+ __IFLA_STATS_MAX,
+};
+
+#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1)
+
+#define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1))
+
+/* These are embedded into IFLA_STATS_LINK_XSTATS:
+ * [IFLA_STATS_LINK_XSTATS]
+ * -> [LINK_XSTATS_TYPE_xxx]
+ * -> [rtnl link type specific attributes]
+ */
+enum {
+ LINK_XSTATS_TYPE_UNSPEC,
+ LINK_XSTATS_TYPE_BRIDGE,
+ LINK_XSTATS_TYPE_BOND,
+ __LINK_XSTATS_TYPE_MAX
+};
+#define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1)
+
+/* These are stats embedded into IFLA_STATS_LINK_OFFLOAD_XSTATS */
+enum {
+ IFLA_OFFLOAD_XSTATS_UNSPEC,
+ IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */
+ __IFLA_OFFLOAD_XSTATS_MAX
+};
+#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1)
+
+/* XDP section */
+
+#define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0)
+#define XDP_FLAGS_SKB_MODE (1U << 1)
+#define XDP_FLAGS_DRV_MODE (1U << 2)
+#define XDP_FLAGS_HW_MODE (1U << 3)
+#define XDP_FLAGS_REPLACE (1U << 4)
+#define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \
+ XDP_FLAGS_DRV_MODE | \
+ XDP_FLAGS_HW_MODE)
+#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \
+ XDP_FLAGS_MODES | XDP_FLAGS_REPLACE)
+
+/* These are stored into IFLA_XDP_ATTACHED on dump. */
+enum {
+ XDP_ATTACHED_NONE = 0,
+ XDP_ATTACHED_DRV,
+ XDP_ATTACHED_SKB,
+ XDP_ATTACHED_HW,
+ XDP_ATTACHED_MULTI,
+};
+
+enum {
+ IFLA_XDP_UNSPEC,
+ IFLA_XDP_FD,
+ IFLA_XDP_ATTACHED,
+ IFLA_XDP_FLAGS,
+ IFLA_XDP_PROG_ID,
+ IFLA_XDP_DRV_PROG_ID,
+ IFLA_XDP_SKB_PROG_ID,
+ IFLA_XDP_HW_PROG_ID,
+ IFLA_XDP_EXPECTED_FD,
+ __IFLA_XDP_MAX,
+};
+
+#define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1)
+
+enum {
+ IFLA_EVENT_NONE,
+ IFLA_EVENT_REBOOT, /* internal reset / reboot */
+ IFLA_EVENT_FEATURES, /* change in offload features */
+ IFLA_EVENT_BONDING_FAILOVER, /* change in active slave */
+ IFLA_EVENT_NOTIFY_PEERS, /* re-sent grat. arp/ndisc */
+ IFLA_EVENT_IGMP_RESEND, /* re-sent IGMP JOIN */
+ IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */
+};
+
+/* tun section */
+
+enum {
+ IFLA_TUN_UNSPEC,
+ IFLA_TUN_OWNER,
+ IFLA_TUN_GROUP,
+ IFLA_TUN_TYPE,
+ IFLA_TUN_PI,
+ IFLA_TUN_VNET_HDR,
+ IFLA_TUN_PERSIST,
+ IFLA_TUN_MULTI_QUEUE,
+ IFLA_TUN_NUM_QUEUES,
+ IFLA_TUN_NUM_DISABLED_QUEUES,
+ __IFLA_TUN_MAX,
+};
+
+#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1)
+
+/* rmnet section */
+
+#define RMNET_FLAGS_INGRESS_DEAGGREGATION (1U << 0)
+#define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1)
+#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2)
+#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3)
+
+enum {
+ IFLA_RMNET_UNSPEC,
+ IFLA_RMNET_MUX_ID,
+ IFLA_RMNET_FLAGS,
+ __IFLA_RMNET_MAX,
+};
+
+#define IFLA_RMNET_MAX (__IFLA_RMNET_MAX - 1)
+
+struct ifla_rmnet_flags {
+ __u32 flags;
+ __u32 mask;
+};
+
+#endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/src/basic/linux/if_macsec.h b/src/basic/linux/if_macsec.h
new file mode 100644
index 0000000..3af2aa0
--- /dev/null
+++ b/src/basic/linux/if_macsec.h
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * include/uapi/linux/if_macsec.h - MACsec device
+ *
+ * Copyright (c) 2015 Sabrina Dubroca <sd@queasysnail.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _UAPI_MACSEC_H
+#define _UAPI_MACSEC_H
+
+#include <linux/types.h>
+
+#define MACSEC_GENL_NAME "macsec"
+#define MACSEC_GENL_VERSION 1
+
+#define MACSEC_MAX_KEY_LEN 128
+
+#define MACSEC_KEYID_LEN 16
+
+/* cipher IDs as per IEEE802.1AE-2018 (Table 14-1) */
+#define MACSEC_CIPHER_ID_GCM_AES_128 0x0080C20001000001ULL
+#define MACSEC_CIPHER_ID_GCM_AES_256 0x0080C20001000002ULL
+#define MACSEC_CIPHER_ID_GCM_AES_XPN_128 0x0080C20001000003ULL
+#define MACSEC_CIPHER_ID_GCM_AES_XPN_256 0x0080C20001000004ULL
+
+/* deprecated cipher ID for GCM-AES-128 */
+#define MACSEC_DEFAULT_CIPHER_ID 0x0080020001000001ULL
+#define MACSEC_DEFAULT_CIPHER_ALT MACSEC_CIPHER_ID_GCM_AES_128
+
+#define MACSEC_MIN_ICV_LEN 8
+#define MACSEC_MAX_ICV_LEN 32
+/* upper limit for ICV length as recommended by IEEE802.1AE-2006 */
+#define MACSEC_STD_ICV_LEN 16
+
+enum macsec_attrs {
+ MACSEC_ATTR_UNSPEC,
+ MACSEC_ATTR_IFINDEX, /* u32, ifindex of the MACsec netdevice */
+ MACSEC_ATTR_RXSC_CONFIG, /* config, nested macsec_rxsc_attrs */
+ MACSEC_ATTR_SA_CONFIG, /* config, nested macsec_sa_attrs */
+ MACSEC_ATTR_SECY, /* dump, nested macsec_secy_attrs */
+ MACSEC_ATTR_TXSA_LIST, /* dump, nested, macsec_sa_attrs for each TXSA */
+ MACSEC_ATTR_RXSC_LIST, /* dump, nested, macsec_rxsc_attrs for each RXSC */
+ MACSEC_ATTR_TXSC_STATS, /* dump, nested, macsec_txsc_stats_attr */
+ MACSEC_ATTR_SECY_STATS, /* dump, nested, macsec_secy_stats_attr */
+ MACSEC_ATTR_OFFLOAD, /* config, nested, macsec_offload_attrs */
+ __MACSEC_ATTR_END,
+ NUM_MACSEC_ATTR = __MACSEC_ATTR_END,
+ MACSEC_ATTR_MAX = __MACSEC_ATTR_END - 1,
+};
+
+enum macsec_secy_attrs {
+ MACSEC_SECY_ATTR_UNSPEC,
+ MACSEC_SECY_ATTR_SCI,
+ MACSEC_SECY_ATTR_ENCODING_SA,
+ MACSEC_SECY_ATTR_WINDOW,
+ MACSEC_SECY_ATTR_CIPHER_SUITE,
+ MACSEC_SECY_ATTR_ICV_LEN,
+ MACSEC_SECY_ATTR_PROTECT,
+ MACSEC_SECY_ATTR_REPLAY,
+ MACSEC_SECY_ATTR_OPER,
+ MACSEC_SECY_ATTR_VALIDATE,
+ MACSEC_SECY_ATTR_ENCRYPT,
+ MACSEC_SECY_ATTR_INC_SCI,
+ MACSEC_SECY_ATTR_ES,
+ MACSEC_SECY_ATTR_SCB,
+ MACSEC_SECY_ATTR_PAD,
+ __MACSEC_SECY_ATTR_END,
+ NUM_MACSEC_SECY_ATTR = __MACSEC_SECY_ATTR_END,
+ MACSEC_SECY_ATTR_MAX = __MACSEC_SECY_ATTR_END - 1,
+};
+
+enum macsec_rxsc_attrs {
+ MACSEC_RXSC_ATTR_UNSPEC,
+ MACSEC_RXSC_ATTR_SCI, /* config/dump, u64 */
+ MACSEC_RXSC_ATTR_ACTIVE, /* config/dump, u8 0..1 */
+ MACSEC_RXSC_ATTR_SA_LIST, /* dump, nested */
+ MACSEC_RXSC_ATTR_STATS, /* dump, nested, macsec_rxsc_stats_attr */
+ MACSEC_RXSC_ATTR_PAD,
+ __MACSEC_RXSC_ATTR_END,
+ NUM_MACSEC_RXSC_ATTR = __MACSEC_RXSC_ATTR_END,
+ MACSEC_RXSC_ATTR_MAX = __MACSEC_RXSC_ATTR_END - 1,
+};
+
+enum macsec_sa_attrs {
+ MACSEC_SA_ATTR_UNSPEC,
+ MACSEC_SA_ATTR_AN, /* config/dump, u8 0..3 */
+ MACSEC_SA_ATTR_ACTIVE, /* config/dump, u8 0..1 */
+ MACSEC_SA_ATTR_PN, /* config/dump, u32/u64 (u64 if XPN) */
+ MACSEC_SA_ATTR_KEY, /* config, data */
+ MACSEC_SA_ATTR_KEYID, /* config/dump, 128-bit */
+ MACSEC_SA_ATTR_STATS, /* dump, nested, macsec_sa_stats_attr */
+ MACSEC_SA_ATTR_PAD,
+ MACSEC_SA_ATTR_SSCI, /* config/dump, u32 - XPN only */
+ MACSEC_SA_ATTR_SALT, /* config, 96-bit - XPN only */
+ __MACSEC_SA_ATTR_END,
+ NUM_MACSEC_SA_ATTR = __MACSEC_SA_ATTR_END,
+ MACSEC_SA_ATTR_MAX = __MACSEC_SA_ATTR_END - 1,
+};
+
+enum macsec_offload_attrs {
+ MACSEC_OFFLOAD_ATTR_UNSPEC,
+ MACSEC_OFFLOAD_ATTR_TYPE, /* config/dump, u8 0..2 */
+ MACSEC_OFFLOAD_ATTR_PAD,
+ __MACSEC_OFFLOAD_ATTR_END,
+ NUM_MACSEC_OFFLOAD_ATTR = __MACSEC_OFFLOAD_ATTR_END,
+ MACSEC_OFFLOAD_ATTR_MAX = __MACSEC_OFFLOAD_ATTR_END - 1,
+};
+
+enum macsec_nl_commands {
+ MACSEC_CMD_GET_TXSC,
+ MACSEC_CMD_ADD_RXSC,
+ MACSEC_CMD_DEL_RXSC,
+ MACSEC_CMD_UPD_RXSC,
+ MACSEC_CMD_ADD_TXSA,
+ MACSEC_CMD_DEL_TXSA,
+ MACSEC_CMD_UPD_TXSA,
+ MACSEC_CMD_ADD_RXSA,
+ MACSEC_CMD_DEL_RXSA,
+ MACSEC_CMD_UPD_RXSA,
+ MACSEC_CMD_UPD_OFFLOAD,
+};
+
+/* u64 per-RXSC stats */
+enum macsec_rxsc_stats_attr {
+ MACSEC_RXSC_STATS_ATTR_UNSPEC,
+ MACSEC_RXSC_STATS_ATTR_IN_OCTETS_VALIDATED,
+ MACSEC_RXSC_STATS_ATTR_IN_OCTETS_DECRYPTED,
+ MACSEC_RXSC_STATS_ATTR_IN_PKTS_UNCHECKED,
+ MACSEC_RXSC_STATS_ATTR_IN_PKTS_DELAYED,
+ MACSEC_RXSC_STATS_ATTR_IN_PKTS_OK,
+ MACSEC_RXSC_STATS_ATTR_IN_PKTS_INVALID,
+ MACSEC_RXSC_STATS_ATTR_IN_PKTS_LATE,
+ MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_VALID,
+ MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_USING_SA,
+ MACSEC_RXSC_STATS_ATTR_IN_PKTS_UNUSED_SA,
+ MACSEC_RXSC_STATS_ATTR_PAD,
+ __MACSEC_RXSC_STATS_ATTR_END,
+ NUM_MACSEC_RXSC_STATS_ATTR = __MACSEC_RXSC_STATS_ATTR_END,
+ MACSEC_RXSC_STATS_ATTR_MAX = __MACSEC_RXSC_STATS_ATTR_END - 1,
+};
+
+/* u32 per-{RX,TX}SA stats */
+enum macsec_sa_stats_attr {
+ MACSEC_SA_STATS_ATTR_UNSPEC,
+ MACSEC_SA_STATS_ATTR_IN_PKTS_OK,
+ MACSEC_SA_STATS_ATTR_IN_PKTS_INVALID,
+ MACSEC_SA_STATS_ATTR_IN_PKTS_NOT_VALID,
+ MACSEC_SA_STATS_ATTR_IN_PKTS_NOT_USING_SA,
+ MACSEC_SA_STATS_ATTR_IN_PKTS_UNUSED_SA,
+ MACSEC_SA_STATS_ATTR_OUT_PKTS_PROTECTED,
+ MACSEC_SA_STATS_ATTR_OUT_PKTS_ENCRYPTED,
+ __MACSEC_SA_STATS_ATTR_END,
+ NUM_MACSEC_SA_STATS_ATTR = __MACSEC_SA_STATS_ATTR_END,
+ MACSEC_SA_STATS_ATTR_MAX = __MACSEC_SA_STATS_ATTR_END - 1,
+};
+
+/* u64 per-TXSC stats */
+enum macsec_txsc_stats_attr {
+ MACSEC_TXSC_STATS_ATTR_UNSPEC,
+ MACSEC_TXSC_STATS_ATTR_OUT_PKTS_PROTECTED,
+ MACSEC_TXSC_STATS_ATTR_OUT_PKTS_ENCRYPTED,
+ MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_PROTECTED,
+ MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_ENCRYPTED,
+ MACSEC_TXSC_STATS_ATTR_PAD,
+ __MACSEC_TXSC_STATS_ATTR_END,
+ NUM_MACSEC_TXSC_STATS_ATTR = __MACSEC_TXSC_STATS_ATTR_END,
+ MACSEC_TXSC_STATS_ATTR_MAX = __MACSEC_TXSC_STATS_ATTR_END - 1,
+};
+
+/* u64 per-SecY stats */
+enum macsec_secy_stats_attr {
+ MACSEC_SECY_STATS_ATTR_UNSPEC,
+ MACSEC_SECY_STATS_ATTR_OUT_PKTS_UNTAGGED,
+ MACSEC_SECY_STATS_ATTR_IN_PKTS_UNTAGGED,
+ MACSEC_SECY_STATS_ATTR_OUT_PKTS_TOO_LONG,
+ MACSEC_SECY_STATS_ATTR_IN_PKTS_NO_TAG,
+ MACSEC_SECY_STATS_ATTR_IN_PKTS_BAD_TAG,
+ MACSEC_SECY_STATS_ATTR_IN_PKTS_UNKNOWN_SCI,
+ MACSEC_SECY_STATS_ATTR_IN_PKTS_NO_SCI,
+ MACSEC_SECY_STATS_ATTR_IN_PKTS_OVERRUN,
+ MACSEC_SECY_STATS_ATTR_PAD,
+ __MACSEC_SECY_STATS_ATTR_END,
+ NUM_MACSEC_SECY_STATS_ATTR = __MACSEC_SECY_STATS_ATTR_END,
+ MACSEC_SECY_STATS_ATTR_MAX = __MACSEC_SECY_STATS_ATTR_END - 1,
+};
+
+#endif /* _UAPI_MACSEC_H */
diff --git a/src/basic/linux/if_tun.h b/src/basic/linux/if_tun.h
new file mode 100644
index 0000000..454ae31
--- /dev/null
+++ b/src/basic/linux/if_tun.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Universal TUN/TAP device driver.
+ * Copyright (C) 1999-2000 Maxim Krasnyansky <max_mk@yahoo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _UAPI__IF_TUN_H
+#define _UAPI__IF_TUN_H
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <linux/filter.h>
+
+/* Read queue size */
+#define TUN_READQ_SIZE 500
+/* TUN device type flags: deprecated. Use IFF_TUN/IFF_TAP instead. */
+#define TUN_TUN_DEV IFF_TUN
+#define TUN_TAP_DEV IFF_TAP
+#define TUN_TYPE_MASK 0x000f
+
+/* Ioctl defines */
+#define TUNSETNOCSUM _IOW('T', 200, int)
+#define TUNSETDEBUG _IOW('T', 201, int)
+#define TUNSETIFF _IOW('T', 202, int)
+#define TUNSETPERSIST _IOW('T', 203, int)
+#define TUNSETOWNER _IOW('T', 204, int)
+#define TUNSETLINK _IOW('T', 205, int)
+#define TUNSETGROUP _IOW('T', 206, int)
+#define TUNGETFEATURES _IOR('T', 207, unsigned int)
+#define TUNSETOFFLOAD _IOW('T', 208, unsigned int)
+#define TUNSETTXFILTER _IOW('T', 209, unsigned int)
+#define TUNGETIFF _IOR('T', 210, unsigned int)
+#define TUNGETSNDBUF _IOR('T', 211, int)
+#define TUNSETSNDBUF _IOW('T', 212, int)
+#define TUNATTACHFILTER _IOW('T', 213, struct sock_fprog)
+#define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog)
+#define TUNGETVNETHDRSZ _IOR('T', 215, int)
+#define TUNSETVNETHDRSZ _IOW('T', 216, int)
+#define TUNSETQUEUE _IOW('T', 217, int)
+#define TUNSETIFINDEX _IOW('T', 218, unsigned int)
+#define TUNGETFILTER _IOR('T', 219, struct sock_fprog)
+#define TUNSETVNETLE _IOW('T', 220, int)
+#define TUNGETVNETLE _IOR('T', 221, int)
+/* The TUNSETVNETBE and TUNGETVNETBE ioctls are for cross-endian support on
+ * little-endian hosts. Not all kernel configurations support them, but all
+ * configurations that support SET also support GET.
+ */
+#define TUNSETVNETBE _IOW('T', 222, int)
+#define TUNGETVNETBE _IOR('T', 223, int)
+#define TUNSETSTEERINGEBPF _IOR('T', 224, int)
+#define TUNSETFILTEREBPF _IOR('T', 225, int)
+#define TUNSETCARRIER _IOW('T', 226, int)
+#define TUNGETDEVNETNS _IO('T', 227)
+
+/* TUNSETIFF ifr flags */
+#define IFF_TUN 0x0001
+#define IFF_TAP 0x0002
+#define IFF_NAPI 0x0010
+#define IFF_NAPI_FRAGS 0x0020
+#define IFF_NO_PI 0x1000
+/* This flag has no real effect */
+#define IFF_ONE_QUEUE 0x2000
+#define IFF_VNET_HDR 0x4000
+#define IFF_TUN_EXCL 0x8000
+#define IFF_MULTI_QUEUE 0x0100
+#define IFF_ATTACH_QUEUE 0x0200
+#define IFF_DETACH_QUEUE 0x0400
+/* read-only flag */
+#define IFF_PERSIST 0x0800
+#define IFF_NOFILTER 0x1000
+
+/* Socket options */
+#define TUN_TX_TIMESTAMP 1
+
+/* Features for GSO (TUNSETOFFLOAD). */
+#define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */
+#define TUN_F_TSO4 0x02 /* I can handle TSO for IPv4 packets */
+#define TUN_F_TSO6 0x04 /* I can handle TSO for IPv6 packets */
+#define TUN_F_TSO_ECN 0x08 /* I can handle TSO with ECN bits. */
+#define TUN_F_UFO 0x10 /* I can handle UFO packets */
+
+/* Protocol info prepended to the packets (when IFF_NO_PI is not set) */
+#define TUN_PKT_STRIP 0x0001
+struct tun_pi {
+ __u16 flags;
+ __be16 proto;
+};
+
+/*
+ * Filter spec (used for SETXXFILTER ioctls)
+ * This stuff is applicable only to the TAP (Ethernet) devices.
+ * If the count is zero the filter is disabled and the driver accepts
+ * all packets (promisc mode).
+ * If the filter is enabled in order to accept broadcast packets
+ * broadcast addr must be explicitly included in the addr list.
+ */
+#define TUN_FLT_ALLMULTI 0x0001 /* Accept all multicast packets */
+struct tun_filter {
+ __u16 flags; /* TUN_FLT_ flags see above */
+ __u16 count; /* Number of addresses */
+ __u8 addr[0][ETH_ALEN];
+};
+
+#endif /* _UAPI__IF_TUN_H */
diff --git a/src/basic/linux/if_tunnel.h b/src/basic/linux/if_tunnel.h
new file mode 100644
index 0000000..7d91055
--- /dev/null
+++ b/src/basic/linux/if_tunnel.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_IF_TUNNEL_H_
+#define _UAPI_IF_TUNNEL_H_
+
+#include <linux/types.h>
+#include <linux/if.h>
+#include <linux/ip.h>
+#include <linux/in6.h>
+#include <asm/byteorder.h>
+
+
+#define SIOCGETTUNNEL (SIOCDEVPRIVATE + 0)
+#define SIOCADDTUNNEL (SIOCDEVPRIVATE + 1)
+#define SIOCDELTUNNEL (SIOCDEVPRIVATE + 2)
+#define SIOCCHGTUNNEL (SIOCDEVPRIVATE + 3)
+#define SIOCGETPRL (SIOCDEVPRIVATE + 4)
+#define SIOCADDPRL (SIOCDEVPRIVATE + 5)
+#define SIOCDELPRL (SIOCDEVPRIVATE + 6)
+#define SIOCCHGPRL (SIOCDEVPRIVATE + 7)
+#define SIOCGET6RD (SIOCDEVPRIVATE + 8)
+#define SIOCADD6RD (SIOCDEVPRIVATE + 9)
+#define SIOCDEL6RD (SIOCDEVPRIVATE + 10)
+#define SIOCCHG6RD (SIOCDEVPRIVATE + 11)
+
+#define GRE_CSUM __cpu_to_be16(0x8000)
+#define GRE_ROUTING __cpu_to_be16(0x4000)
+#define GRE_KEY __cpu_to_be16(0x2000)
+#define GRE_SEQ __cpu_to_be16(0x1000)
+#define GRE_STRICT __cpu_to_be16(0x0800)
+#define GRE_REC __cpu_to_be16(0x0700)
+#define GRE_ACK __cpu_to_be16(0x0080)
+#define GRE_FLAGS __cpu_to_be16(0x0078)
+#define GRE_VERSION __cpu_to_be16(0x0007)
+
+#define GRE_IS_CSUM(f) ((f) & GRE_CSUM)
+#define GRE_IS_ROUTING(f) ((f) & GRE_ROUTING)
+#define GRE_IS_KEY(f) ((f) & GRE_KEY)
+#define GRE_IS_SEQ(f) ((f) & GRE_SEQ)
+#define GRE_IS_STRICT(f) ((f) & GRE_STRICT)
+#define GRE_IS_REC(f) ((f) & GRE_REC)
+#define GRE_IS_ACK(f) ((f) & GRE_ACK)
+
+#define GRE_VERSION_0 __cpu_to_be16(0x0000)
+#define GRE_VERSION_1 __cpu_to_be16(0x0001)
+#define GRE_PROTO_PPP __cpu_to_be16(0x880b)
+#define GRE_PPTP_KEY_MASK __cpu_to_be32(0xffff)
+
+struct ip_tunnel_parm {
+ char name[IFNAMSIZ];
+ int link;
+ __be16 i_flags;
+ __be16 o_flags;
+ __be32 i_key;
+ __be32 o_key;
+ struct iphdr iph;
+};
+
+enum {
+ IFLA_IPTUN_UNSPEC,
+ IFLA_IPTUN_LINK,
+ IFLA_IPTUN_LOCAL,
+ IFLA_IPTUN_REMOTE,
+ IFLA_IPTUN_TTL,
+ IFLA_IPTUN_TOS,
+ IFLA_IPTUN_ENCAP_LIMIT,
+ IFLA_IPTUN_FLOWINFO,
+ IFLA_IPTUN_FLAGS,
+ IFLA_IPTUN_PROTO,
+ IFLA_IPTUN_PMTUDISC,
+ IFLA_IPTUN_6RD_PREFIX,
+ IFLA_IPTUN_6RD_RELAY_PREFIX,
+ IFLA_IPTUN_6RD_PREFIXLEN,
+ IFLA_IPTUN_6RD_RELAY_PREFIXLEN,
+ IFLA_IPTUN_ENCAP_TYPE,
+ IFLA_IPTUN_ENCAP_FLAGS,
+ IFLA_IPTUN_ENCAP_SPORT,
+ IFLA_IPTUN_ENCAP_DPORT,
+ IFLA_IPTUN_COLLECT_METADATA,
+ IFLA_IPTUN_FWMARK,
+ __IFLA_IPTUN_MAX,
+};
+#define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1)
+
+enum tunnel_encap_types {
+ TUNNEL_ENCAP_NONE,
+ TUNNEL_ENCAP_FOU,
+ TUNNEL_ENCAP_GUE,
+ TUNNEL_ENCAP_MPLS,
+};
+
+#define TUNNEL_ENCAP_FLAG_CSUM (1<<0)
+#define TUNNEL_ENCAP_FLAG_CSUM6 (1<<1)
+#define TUNNEL_ENCAP_FLAG_REMCSUM (1<<2)
+
+/* SIT-mode i_flags */
+#define SIT_ISATAP 0x0001
+
+struct ip_tunnel_prl {
+ __be32 addr;
+ __u16 flags;
+ __u16 __reserved;
+ __u32 datalen;
+ __u32 __reserved2;
+ /* data follows */
+};
+
+/* PRL flags */
+#define PRL_DEFAULT 0x0001
+
+struct ip_tunnel_6rd {
+ struct in6_addr prefix;
+ __be32 relay_prefix;
+ __u16 prefixlen;
+ __u16 relay_prefixlen;
+};
+
+enum {
+ IFLA_GRE_UNSPEC,
+ IFLA_GRE_LINK,
+ IFLA_GRE_IFLAGS,
+ IFLA_GRE_OFLAGS,
+ IFLA_GRE_IKEY,
+ IFLA_GRE_OKEY,
+ IFLA_GRE_LOCAL,
+ IFLA_GRE_REMOTE,
+ IFLA_GRE_TTL,
+ IFLA_GRE_TOS,
+ IFLA_GRE_PMTUDISC,
+ IFLA_GRE_ENCAP_LIMIT,
+ IFLA_GRE_FLOWINFO,
+ IFLA_GRE_FLAGS,
+ IFLA_GRE_ENCAP_TYPE,
+ IFLA_GRE_ENCAP_FLAGS,
+ IFLA_GRE_ENCAP_SPORT,
+ IFLA_GRE_ENCAP_DPORT,
+ IFLA_GRE_COLLECT_METADATA,
+ IFLA_GRE_IGNORE_DF,
+ IFLA_GRE_FWMARK,
+ IFLA_GRE_ERSPAN_INDEX,
+ IFLA_GRE_ERSPAN_VER,
+ IFLA_GRE_ERSPAN_DIR,
+ IFLA_GRE_ERSPAN_HWID,
+ __IFLA_GRE_MAX,
+};
+
+#define IFLA_GRE_MAX (__IFLA_GRE_MAX - 1)
+
+/* VTI-mode i_flags */
+#define VTI_ISVTI ((__force __be16)0x0001)
+
+enum {
+ IFLA_VTI_UNSPEC,
+ IFLA_VTI_LINK,
+ IFLA_VTI_IKEY,
+ IFLA_VTI_OKEY,
+ IFLA_VTI_LOCAL,
+ IFLA_VTI_REMOTE,
+ IFLA_VTI_FWMARK,
+ __IFLA_VTI_MAX,
+};
+
+#define IFLA_VTI_MAX (__IFLA_VTI_MAX - 1)
+
+#define TUNNEL_CSUM __cpu_to_be16(0x01)
+#define TUNNEL_ROUTING __cpu_to_be16(0x02)
+#define TUNNEL_KEY __cpu_to_be16(0x04)
+#define TUNNEL_SEQ __cpu_to_be16(0x08)
+#define TUNNEL_STRICT __cpu_to_be16(0x10)
+#define TUNNEL_REC __cpu_to_be16(0x20)
+#define TUNNEL_VERSION __cpu_to_be16(0x40)
+#define TUNNEL_NO_KEY __cpu_to_be16(0x80)
+#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100)
+#define TUNNEL_OAM __cpu_to_be16(0x0200)
+#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400)
+#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800)
+#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000)
+#define TUNNEL_NOCACHE __cpu_to_be16(0x2000)
+#define TUNNEL_ERSPAN_OPT __cpu_to_be16(0x4000)
+
+#define TUNNEL_OPTIONS_PRESENT \
+ (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)
+
+#endif /* _UAPI_IF_TUNNEL_H_ */
diff --git a/src/basic/linux/in.h b/src/basic/linux/in.h
new file mode 100644
index 0000000..7d66876
--- /dev/null
+++ b/src/basic/linux/in.h
@@ -0,0 +1,311 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Definitions of the Internet Protocol.
+ *
+ * Version: @(#)in.h 1.0.1 04/21/93
+ *
+ * Authors: Original taken from the GNU Project <netinet/in.h> file.
+ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _UAPI_LINUX_IN_H
+#define _UAPI_LINUX_IN_H
+
+#include <linux/types.h>
+#include <linux/libc-compat.h>
+#include <linux/socket.h>
+
+#if __UAPI_DEF_IN_IPPROTO
+/* Standard well-defined IP protocols. */
+enum {
+ IPPROTO_IP = 0, /* Dummy protocol for TCP */
+#define IPPROTO_IP IPPROTO_IP
+ IPPROTO_ICMP = 1, /* Internet Control Message Protocol */
+#define IPPROTO_ICMP IPPROTO_ICMP
+ IPPROTO_IGMP = 2, /* Internet Group Management Protocol */
+#define IPPROTO_IGMP IPPROTO_IGMP
+ IPPROTO_IPIP = 4, /* IPIP tunnels (older KA9Q tunnels use 94) */
+#define IPPROTO_IPIP IPPROTO_IPIP
+ IPPROTO_TCP = 6, /* Transmission Control Protocol */
+#define IPPROTO_TCP IPPROTO_TCP
+ IPPROTO_EGP = 8, /* Exterior Gateway Protocol */
+#define IPPROTO_EGP IPPROTO_EGP
+ IPPROTO_PUP = 12, /* PUP protocol */
+#define IPPROTO_PUP IPPROTO_PUP
+ IPPROTO_UDP = 17, /* User Datagram Protocol */
+#define IPPROTO_UDP IPPROTO_UDP
+ IPPROTO_IDP = 22, /* XNS IDP protocol */
+#define IPPROTO_IDP IPPROTO_IDP
+ IPPROTO_TP = 29, /* SO Transport Protocol Class 4 */
+#define IPPROTO_TP IPPROTO_TP
+ IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */
+#define IPPROTO_DCCP IPPROTO_DCCP
+ IPPROTO_IPV6 = 41, /* IPv6-in-IPv4 tunnelling */
+#define IPPROTO_IPV6 IPPROTO_IPV6
+ IPPROTO_RSVP = 46, /* RSVP Protocol */
+#define IPPROTO_RSVP IPPROTO_RSVP
+ IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */
+#define IPPROTO_GRE IPPROTO_GRE
+ IPPROTO_ESP = 50, /* Encapsulation Security Payload protocol */
+#define IPPROTO_ESP IPPROTO_ESP
+ IPPROTO_AH = 51, /* Authentication Header protocol */
+#define IPPROTO_AH IPPROTO_AH
+ IPPROTO_MTP = 92, /* Multicast Transport Protocol */
+#define IPPROTO_MTP IPPROTO_MTP
+ IPPROTO_BEETPH = 94, /* IP option pseudo header for BEET */
+#define IPPROTO_BEETPH IPPROTO_BEETPH
+ IPPROTO_ENCAP = 98, /* Encapsulation Header */
+#define IPPROTO_ENCAP IPPROTO_ENCAP
+ IPPROTO_PIM = 103, /* Protocol Independent Multicast */
+#define IPPROTO_PIM IPPROTO_PIM
+ IPPROTO_COMP = 108, /* Compression Header Protocol */
+#define IPPROTO_COMP IPPROTO_COMP
+ IPPROTO_SCTP = 132, /* Stream Control Transport Protocol */
+#define IPPROTO_SCTP IPPROTO_SCTP
+ IPPROTO_UDPLITE = 136, /* UDP-Lite (RFC 3828) */
+#define IPPROTO_UDPLITE IPPROTO_UDPLITE
+ IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */
+#define IPPROTO_MPLS IPPROTO_MPLS
+ IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */
+#define IPPROTO_ETHERNET IPPROTO_ETHERNET
+ IPPROTO_RAW = 255, /* Raw IP packets */
+#define IPPROTO_RAW IPPROTO_RAW
+ IPPROTO_MPTCP = 262, /* Multipath TCP connection */
+#define IPPROTO_MPTCP IPPROTO_MPTCP
+ IPPROTO_MAX
+};
+#endif
+
+#if __UAPI_DEF_IN_ADDR
+/* Internet address. */
+struct in_addr {
+ __be32 s_addr;
+};
+#endif
+
+#define IP_TOS 1
+#define IP_TTL 2
+#define IP_HDRINCL 3
+#define IP_OPTIONS 4
+#define IP_ROUTER_ALERT 5
+#define IP_RECVOPTS 6
+#define IP_RETOPTS 7
+#define IP_PKTINFO 8
+#define IP_PKTOPTIONS 9
+#define IP_MTU_DISCOVER 10
+#define IP_RECVERR 11
+#define IP_RECVTTL 12
+#define IP_RECVTOS 13
+#define IP_MTU 14
+#define IP_FREEBIND 15
+#define IP_IPSEC_POLICY 16
+#define IP_XFRM_POLICY 17
+#define IP_PASSSEC 18
+#define IP_TRANSPARENT 19
+
+/* BSD compatibility */
+#define IP_RECVRETOPTS IP_RETOPTS
+
+/* TProxy original addresses */
+#define IP_ORIGDSTADDR 20
+#define IP_RECVORIGDSTADDR IP_ORIGDSTADDR
+
+#define IP_MINTTL 21
+#define IP_NODEFRAG 22
+#define IP_CHECKSUM 23
+#define IP_BIND_ADDRESS_NO_PORT 24
+#define IP_RECVFRAGSIZE 25
+#define IP_RECVERR_RFC4884 26
+
+/* IP_MTU_DISCOVER values */
+#define IP_PMTUDISC_DONT 0 /* Never send DF frames */
+#define IP_PMTUDISC_WANT 1 /* Use per route hints */
+#define IP_PMTUDISC_DO 2 /* Always DF */
+#define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */
+/* Always use interface mtu (ignores dst pmtu) but don't set DF flag.
+ * Also incoming ICMP frag_needed notifications will be ignored on
+ * this socket to prevent accepting spoofed ones.
+ */
+#define IP_PMTUDISC_INTERFACE 4
+/* weaker version of IP_PMTUDISC_INTERFACE, which allows packets to get
+ * fragmented if they exeed the interface mtu
+ */
+#define IP_PMTUDISC_OMIT 5
+
+#define IP_MULTICAST_IF 32
+#define IP_MULTICAST_TTL 33
+#define IP_MULTICAST_LOOP 34
+#define IP_ADD_MEMBERSHIP 35
+#define IP_DROP_MEMBERSHIP 36
+#define IP_UNBLOCK_SOURCE 37
+#define IP_BLOCK_SOURCE 38
+#define IP_ADD_SOURCE_MEMBERSHIP 39
+#define IP_DROP_SOURCE_MEMBERSHIP 40
+#define IP_MSFILTER 41
+#define MCAST_JOIN_GROUP 42
+#define MCAST_BLOCK_SOURCE 43
+#define MCAST_UNBLOCK_SOURCE 44
+#define MCAST_LEAVE_GROUP 45
+#define MCAST_JOIN_SOURCE_GROUP 46
+#define MCAST_LEAVE_SOURCE_GROUP 47
+#define MCAST_MSFILTER 48
+#define IP_MULTICAST_ALL 49
+#define IP_UNICAST_IF 50
+
+#define MCAST_EXCLUDE 0
+#define MCAST_INCLUDE 1
+
+/* These need to appear somewhere around here */
+#define IP_DEFAULT_MULTICAST_TTL 1
+#define IP_DEFAULT_MULTICAST_LOOP 1
+
+/* Request struct for multicast socket ops */
+
+#if __UAPI_DEF_IP_MREQ
+struct ip_mreq {
+ struct in_addr imr_multiaddr; /* IP multicast address of group */
+ struct in_addr imr_interface; /* local IP address of interface */
+};
+
+struct ip_mreqn {
+ struct in_addr imr_multiaddr; /* IP multicast address of group */
+ struct in_addr imr_address; /* local IP address of interface */
+ int imr_ifindex; /* Interface index */
+};
+
+struct ip_mreq_source {
+ __be32 imr_multiaddr;
+ __be32 imr_interface;
+ __be32 imr_sourceaddr;
+};
+
+struct ip_msfilter {
+ __be32 imsf_multiaddr;
+ __be32 imsf_interface;
+ __u32 imsf_fmode;
+ __u32 imsf_numsrc;
+ __be32 imsf_slist[1];
+};
+
+#define IP_MSFILTER_SIZE(numsrc) \
+ (sizeof(struct ip_msfilter) - sizeof(__u32) \
+ + (numsrc) * sizeof(__u32))
+
+struct group_req {
+ __u32 gr_interface; /* interface index */
+ struct __kernel_sockaddr_storage gr_group; /* group address */
+};
+
+struct group_source_req {
+ __u32 gsr_interface; /* interface index */
+ struct __kernel_sockaddr_storage gsr_group; /* group address */
+ struct __kernel_sockaddr_storage gsr_source; /* source address */
+};
+
+struct group_filter {
+ __u32 gf_interface; /* interface index */
+ struct __kernel_sockaddr_storage gf_group; /* multicast address */
+ __u32 gf_fmode; /* filter mode */
+ __u32 gf_numsrc; /* number of sources */
+ struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */
+};
+
+#define GROUP_FILTER_SIZE(numsrc) \
+ (sizeof(struct group_filter) - sizeof(struct __kernel_sockaddr_storage) \
+ + (numsrc) * sizeof(struct __kernel_sockaddr_storage))
+#endif
+
+#if __UAPI_DEF_IN_PKTINFO
+struct in_pktinfo {
+ int ipi_ifindex;
+ struct in_addr ipi_spec_dst;
+ struct in_addr ipi_addr;
+};
+#endif
+
+/* Structure describing an Internet (IP) socket address. */
+#if __UAPI_DEF_SOCKADDR_IN
+#define __SOCK_SIZE__ 16 /* sizeof(struct sockaddr) */
+struct sockaddr_in {
+ __kernel_sa_family_t sin_family; /* Address family */
+ __be16 sin_port; /* Port number */
+ struct in_addr sin_addr; /* Internet address */
+
+ /* Pad to size of `struct sockaddr'. */
+ unsigned char __pad[__SOCK_SIZE__ - sizeof(short int) -
+ sizeof(unsigned short int) - sizeof(struct in_addr)];
+};
+#define sin_zero __pad /* for BSD UNIX comp. -FvK */
+#endif
+
+#if __UAPI_DEF_IN_CLASS
+/*
+ * Definitions of the bits in an Internet address integer.
+ * On subnets, host and network parts are found according
+ * to the subnet mask, not these masks.
+ */
+#define IN_CLASSA(a) ((((long int) (a)) & 0x80000000) == 0)
+#define IN_CLASSA_NET 0xff000000
+#define IN_CLASSA_NSHIFT 24
+#define IN_CLASSA_HOST (0xffffffff & ~IN_CLASSA_NET)
+#define IN_CLASSA_MAX 128
+
+#define IN_CLASSB(a) ((((long int) (a)) & 0xc0000000) == 0x80000000)
+#define IN_CLASSB_NET 0xffff0000
+#define IN_CLASSB_NSHIFT 16
+#define IN_CLASSB_HOST (0xffffffff & ~IN_CLASSB_NET)
+#define IN_CLASSB_MAX 65536
+
+#define IN_CLASSC(a) ((((long int) (a)) & 0xe0000000) == 0xc0000000)
+#define IN_CLASSC_NET 0xffffff00
+#define IN_CLASSC_NSHIFT 8
+#define IN_CLASSC_HOST (0xffffffff & ~IN_CLASSC_NET)
+
+#define IN_CLASSD(a) ((((long int) (a)) & 0xf0000000) == 0xe0000000)
+#define IN_MULTICAST(a) IN_CLASSD(a)
+#define IN_MULTICAST_NET 0xe0000000
+
+#define IN_BADCLASS(a) (((long int) (a) ) == (long int)0xffffffff)
+#define IN_EXPERIMENTAL(a) IN_BADCLASS((a))
+
+#define IN_CLASSE(a) ((((long int) (a)) & 0xf0000000) == 0xf0000000)
+#define IN_CLASSE_NET 0xffffffff
+#define IN_CLASSE_NSHIFT 0
+
+/* Address to accept any incoming messages. */
+#define INADDR_ANY ((unsigned long int) 0x00000000)
+
+/* Address to send to all hosts. */
+#define INADDR_BROADCAST ((unsigned long int) 0xffffffff)
+
+/* Address indicating an error return. */
+#define INADDR_NONE ((unsigned long int) 0xffffffff)
+
+/* Network number for local host loopback. */
+#define IN_LOOPBACKNET 127
+
+/* Address to loopback in software to local host. */
+#define INADDR_LOOPBACK 0x7f000001 /* 127.0.0.1 */
+#define IN_LOOPBACK(a) ((((long int) (a)) & 0xff000000) == 0x7f000000)
+
+/* Defines for Multicast INADDR */
+#define INADDR_UNSPEC_GROUP 0xe0000000U /* 224.0.0.0 */
+#define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */
+#define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */
+#define INADDR_ALLSNOOPERS_GROUP 0xe000006aU /* 224.0.0.106 */
+#define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */
+#endif
+
+/* <asm/byteorder.h> contains the htonl type stuff.. */
+#include <asm/byteorder.h>
+
+
+#endif /* _UAPI_LINUX_IN_H */
diff --git a/src/basic/linux/in6.h b/src/basic/linux/in6.h
new file mode 100644
index 0000000..5ad396a
--- /dev/null
+++ b/src/basic/linux/in6.h
@@ -0,0 +1,301 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Types and definitions for AF_INET6
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * Sources:
+ * IPv6 Program Interfaces for BSD Systems
+ * <draft-ietf-ipngwg-bsd-api-05.txt>
+ *
+ * Advanced Sockets API for IPv6
+ * <draft-stevens-advanced-api-00.txt>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_IN6_H
+#define _UAPI_LINUX_IN6_H
+
+#include <linux/types.h>
+#include <linux/libc-compat.h>
+
+/*
+ * IPv6 address structure
+ */
+
+#if __UAPI_DEF_IN6_ADDR
+struct in6_addr {
+ union {
+ __u8 u6_addr8[16];
+#if __UAPI_DEF_IN6_ADDR_ALT
+ __be16 u6_addr16[8];
+ __be32 u6_addr32[4];
+#endif
+ } in6_u;
+#define s6_addr in6_u.u6_addr8
+#if __UAPI_DEF_IN6_ADDR_ALT
+#define s6_addr16 in6_u.u6_addr16
+#define s6_addr32 in6_u.u6_addr32
+#endif
+};
+#endif /* __UAPI_DEF_IN6_ADDR */
+
+#if __UAPI_DEF_SOCKADDR_IN6
+struct sockaddr_in6 {
+ unsigned short int sin6_family; /* AF_INET6 */
+ __be16 sin6_port; /* Transport layer port # */
+ __be32 sin6_flowinfo; /* IPv6 flow information */
+ struct in6_addr sin6_addr; /* IPv6 address */
+ __u32 sin6_scope_id; /* scope id (new in RFC2553) */
+};
+#endif /* __UAPI_DEF_SOCKADDR_IN6 */
+
+#if __UAPI_DEF_IPV6_MREQ
+struct ipv6_mreq {
+ /* IPv6 multicast address of group */
+ struct in6_addr ipv6mr_multiaddr;
+
+ /* local IPv6 address of interface */
+ int ipv6mr_ifindex;
+};
+#endif /* __UAPI_DEF_IVP6_MREQ */
+
+#define ipv6mr_acaddr ipv6mr_multiaddr
+
+struct in6_flowlabel_req {
+ struct in6_addr flr_dst;
+ __be32 flr_label;
+ __u8 flr_action;
+ __u8 flr_share;
+ __u16 flr_flags;
+ __u16 flr_expires;
+ __u16 flr_linger;
+ __u32 __flr_pad;
+ /* Options in format of IPV6_PKTOPTIONS */
+};
+
+#define IPV6_FL_A_GET 0
+#define IPV6_FL_A_PUT 1
+#define IPV6_FL_A_RENEW 2
+
+#define IPV6_FL_F_CREATE 1
+#define IPV6_FL_F_EXCL 2
+#define IPV6_FL_F_REFLECT 4
+#define IPV6_FL_F_REMOTE 8
+
+#define IPV6_FL_S_NONE 0
+#define IPV6_FL_S_EXCL 1
+#define IPV6_FL_S_PROCESS 2
+#define IPV6_FL_S_USER 3
+#define IPV6_FL_S_ANY 255
+
+
+/*
+ * Bitmask constant declarations to help applications select out the
+ * flow label and priority fields.
+ *
+ * Note that this are in host byte order while the flowinfo field of
+ * sockaddr_in6 is in network byte order.
+ */
+
+#define IPV6_FLOWINFO_FLOWLABEL 0x000fffff
+#define IPV6_FLOWINFO_PRIORITY 0x0ff00000
+
+/* These definitions are obsolete */
+#define IPV6_PRIORITY_UNCHARACTERIZED 0x0000
+#define IPV6_PRIORITY_FILLER 0x0100
+#define IPV6_PRIORITY_UNATTENDED 0x0200
+#define IPV6_PRIORITY_RESERVED1 0x0300
+#define IPV6_PRIORITY_BULK 0x0400
+#define IPV6_PRIORITY_RESERVED2 0x0500
+#define IPV6_PRIORITY_INTERACTIVE 0x0600
+#define IPV6_PRIORITY_CONTROL 0x0700
+#define IPV6_PRIORITY_8 0x0800
+#define IPV6_PRIORITY_9 0x0900
+#define IPV6_PRIORITY_10 0x0a00
+#define IPV6_PRIORITY_11 0x0b00
+#define IPV6_PRIORITY_12 0x0c00
+#define IPV6_PRIORITY_13 0x0d00
+#define IPV6_PRIORITY_14 0x0e00
+#define IPV6_PRIORITY_15 0x0f00
+
+/*
+ * IPV6 extension headers
+ */
+#if __UAPI_DEF_IPPROTO_V6
+#define IPPROTO_HOPOPTS 0 /* IPv6 hop-by-hop options */
+#define IPPROTO_ROUTING 43 /* IPv6 routing header */
+#define IPPROTO_FRAGMENT 44 /* IPv6 fragmentation header */
+#define IPPROTO_ICMPV6 58 /* ICMPv6 */
+#define IPPROTO_NONE 59 /* IPv6 no next header */
+#define IPPROTO_DSTOPTS 60 /* IPv6 destination options */
+#define IPPROTO_MH 135 /* IPv6 mobility header */
+#endif /* __UAPI_DEF_IPPROTO_V6 */
+
+/*
+ * IPv6 TLV options.
+ */
+#define IPV6_TLV_PAD1 0
+#define IPV6_TLV_PADN 1
+#define IPV6_TLV_ROUTERALERT 5
+#define IPV6_TLV_CALIPSO 7 /* RFC 5570 */
+#define IPV6_TLV_JUMBO 194
+#define IPV6_TLV_HAO 201 /* home address option */
+
+/*
+ * IPV6 socket options
+ */
+#if __UAPI_DEF_IPV6_OPTIONS
+#define IPV6_ADDRFORM 1
+#define IPV6_2292PKTINFO 2
+#define IPV6_2292HOPOPTS 3
+#define IPV6_2292DSTOPTS 4
+#define IPV6_2292RTHDR 5
+#define IPV6_2292PKTOPTIONS 6
+#define IPV6_CHECKSUM 7
+#define IPV6_2292HOPLIMIT 8
+#define IPV6_NEXTHOP 9
+#define IPV6_AUTHHDR 10 /* obsolete */
+#define IPV6_FLOWINFO 11
+
+#define IPV6_UNICAST_HOPS 16
+#define IPV6_MULTICAST_IF 17
+#define IPV6_MULTICAST_HOPS 18
+#define IPV6_MULTICAST_LOOP 19
+#define IPV6_ADD_MEMBERSHIP 20
+#define IPV6_DROP_MEMBERSHIP 21
+#define IPV6_ROUTER_ALERT 22
+#define IPV6_MTU_DISCOVER 23
+#define IPV6_MTU 24
+#define IPV6_RECVERR 25
+#define IPV6_V6ONLY 26
+#define IPV6_JOIN_ANYCAST 27
+#define IPV6_LEAVE_ANYCAST 28
+#define IPV6_MULTICAST_ALL 29
+#define IPV6_ROUTER_ALERT_ISOLATE 30
+#define IPV6_RECVERR_RFC4884 31
+
+/* IPV6_MTU_DISCOVER values */
+#define IPV6_PMTUDISC_DONT 0
+#define IPV6_PMTUDISC_WANT 1
+#define IPV6_PMTUDISC_DO 2
+#define IPV6_PMTUDISC_PROBE 3
+/* same as IPV6_PMTUDISC_PROBE, provided for symetry with IPv4
+ * also see comments on IP_PMTUDISC_INTERFACE
+ */
+#define IPV6_PMTUDISC_INTERFACE 4
+/* weaker version of IPV6_PMTUDISC_INTERFACE, which allows packets to
+ * get fragmented if they exceed the interface mtu
+ */
+#define IPV6_PMTUDISC_OMIT 5
+
+/* Flowlabel */
+#define IPV6_FLOWLABEL_MGR 32
+#define IPV6_FLOWINFO_SEND 33
+
+#define IPV6_IPSEC_POLICY 34
+#define IPV6_XFRM_POLICY 35
+#define IPV6_HDRINCL 36
+#endif
+
+/*
+ * Multicast:
+ * Following socket options are shared between IPv4 and IPv6.
+ *
+ * MCAST_JOIN_GROUP 42
+ * MCAST_BLOCK_SOURCE 43
+ * MCAST_UNBLOCK_SOURCE 44
+ * MCAST_LEAVE_GROUP 45
+ * MCAST_JOIN_SOURCE_GROUP 46
+ * MCAST_LEAVE_SOURCE_GROUP 47
+ * MCAST_MSFILTER 48
+ */
+
+/*
+ * Advanced API (RFC3542) (1)
+ *
+ * Note: IPV6_RECVRTHDRDSTOPTS does not exist. see net/ipv6/datagram.c.
+ */
+
+#define IPV6_RECVPKTINFO 49
+#define IPV6_PKTINFO 50
+#define IPV6_RECVHOPLIMIT 51
+#define IPV6_HOPLIMIT 52
+#define IPV6_RECVHOPOPTS 53
+#define IPV6_HOPOPTS 54
+#define IPV6_RTHDRDSTOPTS 55
+#define IPV6_RECVRTHDR 56
+#define IPV6_RTHDR 57
+#define IPV6_RECVDSTOPTS 58
+#define IPV6_DSTOPTS 59
+#define IPV6_RECVPATHMTU 60
+#define IPV6_PATHMTU 61
+#define IPV6_DONTFRAG 62
+#if 0 /* not yet */
+#define IPV6_USE_MIN_MTU 63
+#endif
+
+/*
+ * Netfilter (1)
+ *
+ * Following socket options are used in ip6_tables;
+ * see include/linux/netfilter_ipv6/ip6_tables.h.
+ *
+ * IP6T_SO_SET_REPLACE / IP6T_SO_GET_INFO 64
+ * IP6T_SO_SET_ADD_COUNTERS / IP6T_SO_GET_ENTRIES 65
+ */
+
+/*
+ * Advanced API (RFC3542) (2)
+ */
+#define IPV6_RECVTCLASS 66
+#define IPV6_TCLASS 67
+
+/*
+ * Netfilter (2)
+ *
+ * Following socket options are used in ip6_tables;
+ * see include/linux/netfilter_ipv6/ip6_tables.h.
+ *
+ * IP6T_SO_GET_REVISION_MATCH 68
+ * IP6T_SO_GET_REVISION_TARGET 69
+ * IP6T_SO_ORIGINAL_DST 80
+ */
+
+#define IPV6_AUTOFLOWLABEL 70
+/* RFC5014: Source address selection */
+#define IPV6_ADDR_PREFERENCES 72
+
+#define IPV6_PREFER_SRC_TMP 0x0001
+#define IPV6_PREFER_SRC_PUBLIC 0x0002
+#define IPV6_PREFER_SRC_PUBTMP_DEFAULT 0x0100
+#define IPV6_PREFER_SRC_COA 0x0004
+#define IPV6_PREFER_SRC_HOME 0x0400
+#define IPV6_PREFER_SRC_CGA 0x0008
+#define IPV6_PREFER_SRC_NONCGA 0x0800
+
+/* RFC5082: Generalized Ttl Security Mechanism */
+#define IPV6_MINHOPCOUNT 73
+
+#define IPV6_ORIGDSTADDR 74
+#define IPV6_RECVORIGDSTADDR IPV6_ORIGDSTADDR
+#define IPV6_TRANSPARENT 75
+#define IPV6_UNICAST_IF 76
+#define IPV6_RECVFRAGSIZE 77
+#define IPV6_FREEBIND 78
+
+/*
+ * Multicast Routing:
+ * see include/uapi/linux/mroute6.h.
+ *
+ * MRT6_BASE 200
+ * ...
+ * MRT6_MAX
+ */
+#endif /* _UAPI_LINUX_IN6_H */
diff --git a/src/basic/linux/ipv6_route.h b/src/basic/linux/ipv6_route.h
new file mode 100644
index 0000000..593800a
--- /dev/null
+++ b/src/basic/linux/ipv6_route.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_IPV6_ROUTE_H
+#define _UAPI_LINUX_IPV6_ROUTE_H
+
+#include <linux/types.h>
+#include <linux/in6.h> /* For struct in6_addr. */
+
+#define RTF_DEFAULT 0x00010000 /* default - learned via ND */
+#define RTF_ALLONLINK 0x00020000 /* (deprecated and will be removed)
+ fallback, no routers on link */
+#define RTF_ADDRCONF 0x00040000 /* addrconf route - RA */
+#define RTF_PREFIX_RT 0x00080000 /* A prefix only route - RA */
+#define RTF_ANYCAST 0x00100000 /* Anycast */
+
+#define RTF_NONEXTHOP 0x00200000 /* route with no nexthop */
+#define RTF_EXPIRES 0x00400000
+
+#define RTF_ROUTEINFO 0x00800000 /* route information - RA */
+
+#define RTF_CACHE 0x01000000 /* read-only: can not be set by user */
+#define RTF_FLOW 0x02000000 /* flow significant route */
+#define RTF_POLICY 0x04000000 /* policy route */
+
+#define RTF_PREF(pref) ((pref) << 27)
+#define RTF_PREF_MASK 0x18000000
+
+#define RTF_PCPU 0x40000000 /* read-only: can not be set by user */
+#define RTF_LOCAL 0x80000000
+
+
+struct in6_rtmsg {
+ struct in6_addr rtmsg_dst;
+ struct in6_addr rtmsg_src;
+ struct in6_addr rtmsg_gateway;
+ __u32 rtmsg_type;
+ __u16 rtmsg_dst_len;
+ __u16 rtmsg_src_len;
+ __u32 rtmsg_metric;
+ unsigned long rtmsg_info;
+ __u32 rtmsg_flags;
+ int rtmsg_ifindex;
+};
+
+#define RTMSG_NEWDEVICE 0x11
+#define RTMSG_DELDEVICE 0x12
+#define RTMSG_NEWROUTE 0x21
+#define RTMSG_DELROUTE 0x22
+
+#define IP6_RT_PRIO_USER 1024
+#define IP6_RT_PRIO_ADDRCONF 256
+
+#endif /* _UAPI_LINUX_IPV6_ROUTE_H */
diff --git a/src/basic/linux/l2tp.h b/src/basic/linux/l2tp.h
new file mode 100644
index 0000000..61158f5
--- /dev/null
+++ b/src/basic/linux/l2tp.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * L2TP-over-IP socket for L2TPv3.
+ *
+ * Author: James Chapman <jchapman@katalix.com>
+ */
+
+#ifndef _UAPI_LINUX_L2TP_H_
+#define _UAPI_LINUX_L2TP_H_
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+
+#define IPPROTO_L2TP 115
+
+/**
+ * struct sockaddr_l2tpip - the sockaddr structure for L2TP-over-IP sockets
+ * @l2tp_family: address family number AF_L2TPIP.
+ * @l2tp_addr: protocol specific address information
+ * @l2tp_conn_id: connection id of tunnel
+ */
+#define __SOCK_SIZE__ 16 /* sizeof(struct sockaddr) */
+struct sockaddr_l2tpip {
+ /* The first fields must match struct sockaddr_in */
+ __kernel_sa_family_t l2tp_family; /* AF_INET */
+ __be16 l2tp_unused; /* INET port number (unused) */
+ struct in_addr l2tp_addr; /* Internet address */
+
+ __u32 l2tp_conn_id; /* Connection ID of tunnel */
+
+ /* Pad to size of `struct sockaddr'. */
+ unsigned char __pad[__SOCK_SIZE__ -
+ sizeof(__kernel_sa_family_t) -
+ sizeof(__be16) - sizeof(struct in_addr) -
+ sizeof(__u32)];
+};
+
+/**
+ * struct sockaddr_l2tpip6 - the sockaddr structure for L2TP-over-IPv6 sockets
+ * @l2tp_family: address family number AF_L2TPIP.
+ * @l2tp_addr: protocol specific address information
+ * @l2tp_conn_id: connection id of tunnel
+ */
+struct sockaddr_l2tpip6 {
+ /* The first fields must match struct sockaddr_in6 */
+ __kernel_sa_family_t l2tp_family; /* AF_INET6 */
+ __be16 l2tp_unused; /* INET port number (unused) */
+ __be32 l2tp_flowinfo; /* IPv6 flow information */
+ struct in6_addr l2tp_addr; /* IPv6 address */
+ __u32 l2tp_scope_id; /* scope id (new in RFC2553) */
+ __u32 l2tp_conn_id; /* Connection ID of tunnel */
+};
+
+/*****************************************************************************
+ * NETLINK_GENERIC netlink family.
+ *****************************************************************************/
+
+/*
+ * Commands.
+ * Valid TLVs of each command are:-
+ * TUNNEL_CREATE - CONN_ID, pw_type, netns, ifname, ipinfo, udpinfo, udpcsum
+ * TUNNEL_DELETE - CONN_ID
+ * TUNNEL_MODIFY - CONN_ID, udpcsum
+ * TUNNEL_GETSTATS - CONN_ID, (stats)
+ * TUNNEL_GET - CONN_ID, (...)
+ * SESSION_CREATE - SESSION_ID, PW_TYPE, cookie, peer_cookie, l2spec
+ * SESSION_DELETE - SESSION_ID
+ * SESSION_MODIFY - SESSION_ID
+ * SESSION_GET - SESSION_ID, (...)
+ * SESSION_GETSTATS - SESSION_ID, (stats)
+ *
+ */
+enum {
+ L2TP_CMD_NOOP,
+ L2TP_CMD_TUNNEL_CREATE,
+ L2TP_CMD_TUNNEL_DELETE,
+ L2TP_CMD_TUNNEL_MODIFY,
+ L2TP_CMD_TUNNEL_GET,
+ L2TP_CMD_SESSION_CREATE,
+ L2TP_CMD_SESSION_DELETE,
+ L2TP_CMD_SESSION_MODIFY,
+ L2TP_CMD_SESSION_GET,
+ __L2TP_CMD_MAX,
+};
+
+#define L2TP_CMD_MAX (__L2TP_CMD_MAX - 1)
+
+/*
+ * ATTR types defined for L2TP
+ */
+enum {
+ L2TP_ATTR_NONE, /* no data */
+ L2TP_ATTR_PW_TYPE, /* u16, enum l2tp_pwtype */
+ L2TP_ATTR_ENCAP_TYPE, /* u16, enum l2tp_encap_type */
+ L2TP_ATTR_OFFSET, /* u16 (not used) */
+ L2TP_ATTR_DATA_SEQ, /* u16 (not used) */
+ L2TP_ATTR_L2SPEC_TYPE, /* u8, enum l2tp_l2spec_type */
+ L2TP_ATTR_L2SPEC_LEN, /* u8 (not used) */
+ L2TP_ATTR_PROTO_VERSION, /* u8 */
+ L2TP_ATTR_IFNAME, /* string */
+ L2TP_ATTR_CONN_ID, /* u32 */
+ L2TP_ATTR_PEER_CONN_ID, /* u32 */
+ L2TP_ATTR_SESSION_ID, /* u32 */
+ L2TP_ATTR_PEER_SESSION_ID, /* u32 */
+ L2TP_ATTR_UDP_CSUM, /* u8 */
+ L2TP_ATTR_VLAN_ID, /* u16 (not used) */
+ L2TP_ATTR_COOKIE, /* 0, 4 or 8 bytes */
+ L2TP_ATTR_PEER_COOKIE, /* 0, 4 or 8 bytes */
+ L2TP_ATTR_DEBUG, /* u32, enum l2tp_debug_flags */
+ L2TP_ATTR_RECV_SEQ, /* u8 */
+ L2TP_ATTR_SEND_SEQ, /* u8 */
+ L2TP_ATTR_LNS_MODE, /* u8 */
+ L2TP_ATTR_USING_IPSEC, /* u8 */
+ L2TP_ATTR_RECV_TIMEOUT, /* msec */
+ L2TP_ATTR_FD, /* int */
+ L2TP_ATTR_IP_SADDR, /* u32 */
+ L2TP_ATTR_IP_DADDR, /* u32 */
+ L2TP_ATTR_UDP_SPORT, /* u16 */
+ L2TP_ATTR_UDP_DPORT, /* u16 */
+ L2TP_ATTR_MTU, /* u16 (not used) */
+ L2TP_ATTR_MRU, /* u16 (not used) */
+ L2TP_ATTR_STATS, /* nested */
+ L2TP_ATTR_IP6_SADDR, /* struct in6_addr */
+ L2TP_ATTR_IP6_DADDR, /* struct in6_addr */
+ L2TP_ATTR_UDP_ZERO_CSUM6_TX, /* flag */
+ L2TP_ATTR_UDP_ZERO_CSUM6_RX, /* flag */
+ L2TP_ATTR_PAD,
+ __L2TP_ATTR_MAX,
+};
+
+#define L2TP_ATTR_MAX (__L2TP_ATTR_MAX - 1)
+
+/* Nested in L2TP_ATTR_STATS */
+enum {
+ L2TP_ATTR_STATS_NONE, /* no data */
+ L2TP_ATTR_TX_PACKETS, /* u64 */
+ L2TP_ATTR_TX_BYTES, /* u64 */
+ L2TP_ATTR_TX_ERRORS, /* u64 */
+ L2TP_ATTR_RX_PACKETS, /* u64 */
+ L2TP_ATTR_RX_BYTES, /* u64 */
+ L2TP_ATTR_RX_SEQ_DISCARDS, /* u64 */
+ L2TP_ATTR_RX_OOS_PACKETS, /* u64 */
+ L2TP_ATTR_RX_ERRORS, /* u64 */
+ L2TP_ATTR_STATS_PAD,
+ __L2TP_ATTR_STATS_MAX,
+};
+
+#define L2TP_ATTR_STATS_MAX (__L2TP_ATTR_STATS_MAX - 1)
+
+enum l2tp_pwtype {
+ L2TP_PWTYPE_NONE = 0x0000,
+ L2TP_PWTYPE_ETH_VLAN = 0x0004,
+ L2TP_PWTYPE_ETH = 0x0005,
+ L2TP_PWTYPE_PPP = 0x0007,
+ L2TP_PWTYPE_PPP_AC = 0x0008,
+ L2TP_PWTYPE_IP = 0x000b,
+ __L2TP_PWTYPE_MAX
+};
+
+enum l2tp_l2spec_type {
+ L2TP_L2SPECTYPE_NONE,
+ L2TP_L2SPECTYPE_DEFAULT,
+};
+
+enum l2tp_encap_type {
+ L2TP_ENCAPTYPE_UDP,
+ L2TP_ENCAPTYPE_IP,
+};
+
+/* For L2TP_ATTR_DATA_SEQ. Unused. */
+enum l2tp_seqmode {
+ L2TP_SEQ_NONE = 0,
+ L2TP_SEQ_IP = 1,
+ L2TP_SEQ_ALL = 2,
+};
+
+/**
+ * enum l2tp_debug_flags - debug message categories for L2TP tunnels/sessions
+ *
+ * @L2TP_MSG_DEBUG: verbose debug (if compiled in)
+ * @L2TP_MSG_CONTROL: userspace - kernel interface
+ * @L2TP_MSG_SEQ: sequence numbers
+ * @L2TP_MSG_DATA: data packets
+ */
+enum l2tp_debug_flags {
+ L2TP_MSG_DEBUG = (1 << 0),
+ L2TP_MSG_CONTROL = (1 << 1),
+ L2TP_MSG_SEQ = (1 << 2),
+ L2TP_MSG_DATA = (1 << 3),
+};
+
+/*
+ * NETLINK_GENERIC related info
+ */
+#define L2TP_GENL_NAME "l2tp"
+#define L2TP_GENL_VERSION 0x1
+#define L2TP_GENL_MCGROUP "l2tp"
+
+#endif /* _UAPI_LINUX_L2TP_H_ */
diff --git a/src/basic/linux/libc-compat.h b/src/basic/linux/libc-compat.h
new file mode 100644
index 0000000..8254c93
--- /dev/null
+++ b/src/basic/linux/libc-compat.h
@@ -0,0 +1,267 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Compatibility interface for userspace libc header coordination:
+ *
+ * Define compatibility macros that are used to control the inclusion or
+ * exclusion of UAPI structures and definitions in coordination with another
+ * userspace C library.
+ *
+ * This header is intended to solve the problem of UAPI definitions that
+ * conflict with userspace definitions. If a UAPI header has such conflicting
+ * definitions then the solution is as follows:
+ *
+ * * Synchronize the UAPI header and the libc headers so either one can be
+ * used and such that the ABI is preserved. If this is not possible then
+ * no simple compatibility interface exists (you need to write translating
+ * wrappers and rename things) and you can't use this interface.
+ *
+ * Then follow this process:
+ *
+ * (a) Include libc-compat.h in the UAPI header.
+ * e.g. #include <linux/libc-compat.h>
+ * This include must be as early as possible.
+ *
+ * (b) In libc-compat.h add enough code to detect that the comflicting
+ * userspace libc header has been included first.
+ *
+ * (c) If the userspace libc header has been included first define a set of
+ * guard macros of the form __UAPI_DEF_FOO and set their values to 1, else
+ * set their values to 0.
+ *
+ * (d) Back in the UAPI header with the conflicting definitions, guard the
+ * definitions with:
+ * #if __UAPI_DEF_FOO
+ * ...
+ * #endif
+ *
+ * This fixes the situation where the linux headers are included *after* the
+ * libc headers. To fix the problem with the inclusion in the other order the
+ * userspace libc headers must be fixed like this:
+ *
+ * * For all definitions that conflict with kernel definitions wrap those
+ * defines in the following:
+ * #if !__UAPI_DEF_FOO
+ * ...
+ * #endif
+ *
+ * This prevents the redefinition of a construct already defined by the kernel.
+ */
+#ifndef _UAPI_LIBC_COMPAT_H
+#define _UAPI_LIBC_COMPAT_H
+
+/* We have included glibc headers... */
+#if defined(__GLIBC__)
+
+/* Coordinate with glibc net/if.h header. */
+#if defined(_NET_IF_H) && defined(__USE_MISC)
+
+/* GLIBC headers included first so don't define anything
+ * that would already be defined. */
+
+#define __UAPI_DEF_IF_IFCONF 0
+#define __UAPI_DEF_IF_IFMAP 0
+#define __UAPI_DEF_IF_IFNAMSIZ 0
+#define __UAPI_DEF_IF_IFREQ 0
+/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */
+#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 0
+/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */
+#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO
+#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1
+#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */
+
+#else /* _NET_IF_H */
+
+/* Linux headers included first, and we must define everything
+ * we need. The expectation is that glibc will check the
+ * __UAPI_DEF_* defines and adjust appropriately. */
+
+#define __UAPI_DEF_IF_IFCONF 1
+#define __UAPI_DEF_IF_IFMAP 1
+#define __UAPI_DEF_IF_IFNAMSIZ 1
+#define __UAPI_DEF_IF_IFREQ 1
+/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */
+#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1
+/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */
+#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1
+
+#endif /* _NET_IF_H */
+
+/* Coordinate with glibc netinet/in.h header. */
+#if defined(_NETINET_IN_H)
+
+/* GLIBC headers included first so don't define anything
+ * that would already be defined. */
+#define __UAPI_DEF_IN_ADDR 0
+#define __UAPI_DEF_IN_IPPROTO 0
+#define __UAPI_DEF_IN_PKTINFO 0
+#define __UAPI_DEF_IP_MREQ 0
+#define __UAPI_DEF_SOCKADDR_IN 0
+#define __UAPI_DEF_IN_CLASS 0
+
+#define __UAPI_DEF_IN6_ADDR 0
+/* The exception is the in6_addr macros which must be defined
+ * if the glibc code didn't define them. This guard matches
+ * the guard in glibc/inet/netinet/in.h which defines the
+ * additional in6_addr macros e.g. s6_addr16, and s6_addr32. */
+#if defined(__USE_MISC) || defined (__USE_GNU)
+#define __UAPI_DEF_IN6_ADDR_ALT 0
+#else
+#define __UAPI_DEF_IN6_ADDR_ALT 1
+#endif
+#define __UAPI_DEF_SOCKADDR_IN6 0
+#define __UAPI_DEF_IPV6_MREQ 0
+#define __UAPI_DEF_IPPROTO_V6 0
+#define __UAPI_DEF_IPV6_OPTIONS 0
+#define __UAPI_DEF_IN6_PKTINFO 0
+#define __UAPI_DEF_IP6_MTUINFO 0
+
+#else
+
+/* Linux headers included first, and we must define everything
+ * we need. The expectation is that glibc will check the
+ * __UAPI_DEF_* defines and adjust appropriately. */
+#define __UAPI_DEF_IN_ADDR 1
+#define __UAPI_DEF_IN_IPPROTO 1
+#define __UAPI_DEF_IN_PKTINFO 1
+#define __UAPI_DEF_IP_MREQ 1
+#define __UAPI_DEF_SOCKADDR_IN 1
+#define __UAPI_DEF_IN_CLASS 1
+
+#define __UAPI_DEF_IN6_ADDR 1
+/* We unconditionally define the in6_addr macros and glibc must
+ * coordinate. */
+#define __UAPI_DEF_IN6_ADDR_ALT 1
+#define __UAPI_DEF_SOCKADDR_IN6 1
+#define __UAPI_DEF_IPV6_MREQ 1
+#define __UAPI_DEF_IPPROTO_V6 1
+#define __UAPI_DEF_IPV6_OPTIONS 1
+#define __UAPI_DEF_IN6_PKTINFO 1
+#define __UAPI_DEF_IP6_MTUINFO 1
+
+#endif /* _NETINET_IN_H */
+
+/* Coordinate with glibc netipx/ipx.h header. */
+#if defined(__NETIPX_IPX_H)
+
+#define __UAPI_DEF_SOCKADDR_IPX 0
+#define __UAPI_DEF_IPX_ROUTE_DEFINITION 0
+#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 0
+#define __UAPI_DEF_IPX_CONFIG_DATA 0
+#define __UAPI_DEF_IPX_ROUTE_DEF 0
+
+#else /* defined(__NETIPX_IPX_H) */
+
+#define __UAPI_DEF_SOCKADDR_IPX 1
+#define __UAPI_DEF_IPX_ROUTE_DEFINITION 1
+#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1
+#define __UAPI_DEF_IPX_CONFIG_DATA 1
+#define __UAPI_DEF_IPX_ROUTE_DEF 1
+
+#endif /* defined(__NETIPX_IPX_H) */
+
+/* Definitions for xattr.h */
+#if defined(_SYS_XATTR_H)
+#define __UAPI_DEF_XATTR 0
+#else
+#define __UAPI_DEF_XATTR 1
+#endif
+
+/* If we did not see any headers from any supported C libraries,
+ * or we are being included in the kernel, then define everything
+ * that we need. Check for previous __UAPI_* definitions to give
+ * unsupported C libraries a way to opt out of any kernel definition. */
+#else /* !defined(__GLIBC__) */
+
+/* Definitions for if.h */
+#ifndef __UAPI_DEF_IF_IFCONF
+#define __UAPI_DEF_IF_IFCONF 1
+#endif
+#ifndef __UAPI_DEF_IF_IFMAP
+#define __UAPI_DEF_IF_IFMAP 1
+#endif
+#ifndef __UAPI_DEF_IF_IFNAMSIZ
+#define __UAPI_DEF_IF_IFNAMSIZ 1
+#endif
+#ifndef __UAPI_DEF_IF_IFREQ
+#define __UAPI_DEF_IF_IFREQ 1
+#endif
+/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */
+#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS
+#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1
+#endif
+/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */
+#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO
+#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1
+#endif
+
+/* Definitions for in.h */
+#ifndef __UAPI_DEF_IN_ADDR
+#define __UAPI_DEF_IN_ADDR 1
+#endif
+#ifndef __UAPI_DEF_IN_IPPROTO
+#define __UAPI_DEF_IN_IPPROTO 1
+#endif
+#ifndef __UAPI_DEF_IN_PKTINFO
+#define __UAPI_DEF_IN_PKTINFO 1
+#endif
+#ifndef __UAPI_DEF_IP_MREQ
+#define __UAPI_DEF_IP_MREQ 1
+#endif
+#ifndef __UAPI_DEF_SOCKADDR_IN
+#define __UAPI_DEF_SOCKADDR_IN 1
+#endif
+#ifndef __UAPI_DEF_IN_CLASS
+#define __UAPI_DEF_IN_CLASS 1
+#endif
+
+/* Definitions for in6.h */
+#ifndef __UAPI_DEF_IN6_ADDR
+#define __UAPI_DEF_IN6_ADDR 1
+#endif
+#ifndef __UAPI_DEF_IN6_ADDR_ALT
+#define __UAPI_DEF_IN6_ADDR_ALT 1
+#endif
+#ifndef __UAPI_DEF_SOCKADDR_IN6
+#define __UAPI_DEF_SOCKADDR_IN6 1
+#endif
+#ifndef __UAPI_DEF_IPV6_MREQ
+#define __UAPI_DEF_IPV6_MREQ 1
+#endif
+#ifndef __UAPI_DEF_IPPROTO_V6
+#define __UAPI_DEF_IPPROTO_V6 1
+#endif
+#ifndef __UAPI_DEF_IPV6_OPTIONS
+#define __UAPI_DEF_IPV6_OPTIONS 1
+#endif
+#ifndef __UAPI_DEF_IN6_PKTINFO
+#define __UAPI_DEF_IN6_PKTINFO 1
+#endif
+#ifndef __UAPI_DEF_IP6_MTUINFO
+#define __UAPI_DEF_IP6_MTUINFO 1
+#endif
+
+/* Definitions for ipx.h */
+#ifndef __UAPI_DEF_SOCKADDR_IPX
+#define __UAPI_DEF_SOCKADDR_IPX 1
+#endif
+#ifndef __UAPI_DEF_IPX_ROUTE_DEFINITION
+#define __UAPI_DEF_IPX_ROUTE_DEFINITION 1
+#endif
+#ifndef __UAPI_DEF_IPX_INTERFACE_DEFINITION
+#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1
+#endif
+#ifndef __UAPI_DEF_IPX_CONFIG_DATA
+#define __UAPI_DEF_IPX_CONFIG_DATA 1
+#endif
+#ifndef __UAPI_DEF_IPX_ROUTE_DEF
+#define __UAPI_DEF_IPX_ROUTE_DEF 1
+#endif
+
+/* Definitions for xattr.h */
+#ifndef __UAPI_DEF_XATTR
+#define __UAPI_DEF_XATTR 1
+#endif
+
+#endif /* __GLIBC__ */
+
+#endif /* _UAPI_LIBC_COMPAT_H */
diff --git a/src/basic/linux/loadavg.h b/src/basic/linux/loadavg.h
new file mode 100644
index 0000000..521a787
--- /dev/null
+++ b/src/basic/linux/loadavg.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SCHED_LOADAVG_H
+#define _LINUX_SCHED_LOADAVG_H
+
+/*
+ * These are the constant used to fake the fixed-point load-average
+ * counting. Some notes:
+ * - 11 bit fractions expand to 22 bits by the multiplies: this gives
+ * a load-average precision of 10 bits integer + 11 bits fractional
+ * - if you want to count load-averages more often, you need more
+ * precision, or rounding will get you. With 2-second counting freq,
+ * the EXP_n values would be 1981, 2034 and 2043 if still using only
+ * 11 bit fractions.
+ */
+extern unsigned long avenrun[]; /* Load averages */
+extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
+
+#define FSHIFT 11 /* nr of bits of precision */
+#define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */
+#define LOAD_FREQ (5*HZ+1) /* 5 sec intervals */
+#define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */
+#define EXP_5 2014 /* 1/exp(5sec/5min) */
+#define EXP_15 2037 /* 1/exp(5sec/15min) */
+
+/*
+ * a1 = a0 * e + a * (1 - e)
+ */
+static inline unsigned long
+calc_load(unsigned long load, unsigned long exp, unsigned long active)
+{
+ unsigned long newload;
+
+ newload = load * exp + active * (FIXED_1 - exp);
+ if (active >= load)
+ newload += FIXED_1-1;
+
+ return newload / FIXED_1;
+}
+
+extern unsigned long calc_load_n(unsigned long load, unsigned long exp,
+ unsigned long active, unsigned int n);
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+extern void calc_global_load(unsigned long ticks);
+
+#endif /* _LINUX_SCHED_LOADAVG_H */
diff --git a/src/basic/linux/netdevice.h b/src/basic/linux/netdevice.h
new file mode 100644
index 0000000..f3770c5
--- /dev/null
+++ b/src/basic/linux/netdevice.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Definitions for the Interfaces handler.
+ *
+ * Version: @(#)dev.h 1.0.10 08/12/93
+ *
+ * Authors: Ross Biro
+ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ * Corey Minyard <wf-rch!minyard@relay.EU.net>
+ * Donald J. Becker, <becker@cesdis.gsfc.nasa.gov>
+ * Alan Cox, <alan@lxorguk.ukuu.org.uk>
+ * Bjorn Ekwall. <bj0rn@blox.se>
+ * Pekka Riikonen <priikone@poseidon.pspt.fi>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Moved to /usr/include/linux for NET3
+ */
+#ifndef _UAPI_LINUX_NETDEVICE_H
+#define _UAPI_LINUX_NETDEVICE_H
+
+#include <linux/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_link.h>
+
+
+#define MAX_ADDR_LEN 32 /* Largest hardware address length */
+
+/* Initial net device group. All devices belong to group 0 by default. */
+#define INIT_NETDEV_GROUP 0
+
+
+/* interface name assignment types (sysfs name_assign_type attribute) */
+#define NET_NAME_UNKNOWN 0 /* unknown origin (not exposed to userspace) */
+#define NET_NAME_ENUM 1 /* enumerated by kernel */
+#define NET_NAME_PREDICTABLE 2 /* predictably named by the kernel */
+#define NET_NAME_USER 3 /* provided by user-space */
+#define NET_NAME_RENAMED 4 /* renamed by user-space */
+
+/* Media selection options. */
+enum {
+ IF_PORT_UNKNOWN = 0,
+ IF_PORT_10BASE2,
+ IF_PORT_10BASET,
+ IF_PORT_AUI,
+ IF_PORT_100BASET,
+ IF_PORT_100BASETX,
+ IF_PORT_100BASEFX
+};
+
+/* hardware address assignment types */
+#define NET_ADDR_PERM 0 /* address is permanent (default) */
+#define NET_ADDR_RANDOM 1 /* address is generated randomly */
+#define NET_ADDR_STOLEN 2 /* address is stolen from other device */
+#define NET_ADDR_SET 3 /* address is set using
+ * dev_set_mac_address() */
+
+#endif /* _UAPI_LINUX_NETDEVICE_H */
diff --git a/src/basic/linux/netlink.h b/src/basic/linux/netlink.h
new file mode 100644
index 0000000..eac8a6a
--- /dev/null
+++ b/src/basic/linux/netlink.h
@@ -0,0 +1,355 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_NETLINK_H
+#define _UAPI__LINUX_NETLINK_H
+
+#include <linux/kernel.h>
+#include <linux/socket.h> /* for __kernel_sa_family_t */
+#include <linux/types.h>
+
+#define NETLINK_ROUTE 0 /* Routing/device hook */
+#define NETLINK_UNUSED 1 /* Unused number */
+#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */
+#define NETLINK_FIREWALL 3 /* Unused number, formerly ip_queue */
+#define NETLINK_SOCK_DIAG 4 /* socket monitoring */
+#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */
+#define NETLINK_XFRM 6 /* ipsec */
+#define NETLINK_SELINUX 7 /* SELinux event notifications */
+#define NETLINK_ISCSI 8 /* Open-iSCSI */
+#define NETLINK_AUDIT 9 /* auditing */
+#define NETLINK_FIB_LOOKUP 10
+#define NETLINK_CONNECTOR 11
+#define NETLINK_NETFILTER 12 /* netfilter subsystem */
+#define NETLINK_IP6_FW 13
+#define NETLINK_DNRTMSG 14 /* DECnet routing messages */
+#define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */
+#define NETLINK_GENERIC 16
+/* leave room for NETLINK_DM (DM Events) */
+#define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */
+#define NETLINK_ECRYPTFS 19
+#define NETLINK_RDMA 20
+#define NETLINK_CRYPTO 21 /* Crypto layer */
+#define NETLINK_SMC 22 /* SMC monitoring */
+
+#define NETLINK_INET_DIAG NETLINK_SOCK_DIAG
+
+#define MAX_LINKS 32
+
+struct sockaddr_nl {
+ __kernel_sa_family_t nl_family; /* AF_NETLINK */
+ unsigned short nl_pad; /* zero */
+ __u32 nl_pid; /* port ID */
+ __u32 nl_groups; /* multicast groups mask */
+};
+
+struct nlmsghdr {
+ __u32 nlmsg_len; /* Length of message including header */
+ __u16 nlmsg_type; /* Message content */
+ __u16 nlmsg_flags; /* Additional flags */
+ __u32 nlmsg_seq; /* Sequence number */
+ __u32 nlmsg_pid; /* Sending process port ID */
+};
+
+/* Flags values */
+
+#define NLM_F_REQUEST 0x01 /* It is request message. */
+#define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */
+#define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */
+#define NLM_F_ECHO 0x08 /* Echo this request */
+#define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */
+#define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */
+
+/* Modifiers to GET request */
+#define NLM_F_ROOT 0x100 /* specify tree root */
+#define NLM_F_MATCH 0x200 /* return all matching */
+#define NLM_F_ATOMIC 0x400 /* atomic GET */
+#define NLM_F_DUMP (NLM_F_ROOT|NLM_F_MATCH)
+
+/* Modifiers to NEW request */
+#define NLM_F_REPLACE 0x100 /* Override existing */
+#define NLM_F_EXCL 0x200 /* Do not touch, if it exists */
+#define NLM_F_CREATE 0x400 /* Create, if it does not exist */
+#define NLM_F_APPEND 0x800 /* Add to end of list */
+
+/* Modifiers to DELETE request */
+#define NLM_F_NONREC 0x100 /* Do not delete recursively */
+
+/* Flags for ACK message */
+#define NLM_F_CAPPED 0x100 /* request was capped */
+#define NLM_F_ACK_TLVS 0x200 /* extended ACK TVLs were included */
+
+/*
+ 4.4BSD ADD NLM_F_CREATE|NLM_F_EXCL
+ 4.4BSD CHANGE NLM_F_REPLACE
+
+ True CHANGE NLM_F_CREATE|NLM_F_REPLACE
+ Append NLM_F_CREATE
+ Check NLM_F_EXCL
+ */
+
+#define NLMSG_ALIGNTO 4U
+#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) )
+#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
+#define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN)
+#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len))
+#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0)))
+#define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \
+ (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len)))
+#define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \
+ (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \
+ (nlh)->nlmsg_len <= (len))
+#define NLMSG_PAYLOAD(nlh,len) ((nlh)->nlmsg_len - NLMSG_SPACE((len)))
+
+#define NLMSG_NOOP 0x1 /* Nothing. */
+#define NLMSG_ERROR 0x2 /* Error */
+#define NLMSG_DONE 0x3 /* End of a dump */
+#define NLMSG_OVERRUN 0x4 /* Data lost */
+
+#define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */
+
+struct nlmsgerr {
+ int error;
+ struct nlmsghdr msg;
+ /*
+ * followed by the message contents unless NETLINK_CAP_ACK was set
+ * or the ACK indicates success (error == 0)
+ * message length is aligned with NLMSG_ALIGN()
+ */
+ /*
+ * followed by TLVs defined in enum nlmsgerr_attrs
+ * if NETLINK_EXT_ACK was set
+ */
+};
+
+/**
+ * enum nlmsgerr_attrs - nlmsgerr attributes
+ * @NLMSGERR_ATTR_UNUSED: unused
+ * @NLMSGERR_ATTR_MSG: error message string (string)
+ * @NLMSGERR_ATTR_OFFS: offset of the invalid attribute in the original
+ * message, counting from the beginning of the header (u32)
+ * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to
+ * be used - in the success case - to identify a created
+ * object or operation or similar (binary)
+ * @__NLMSGERR_ATTR_MAX: number of attributes
+ * @NLMSGERR_ATTR_MAX: highest attribute number
+ */
+enum nlmsgerr_attrs {
+ NLMSGERR_ATTR_UNUSED,
+ NLMSGERR_ATTR_MSG,
+ NLMSGERR_ATTR_OFFS,
+ NLMSGERR_ATTR_COOKIE,
+
+ __NLMSGERR_ATTR_MAX,
+ NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1
+};
+
+#define NETLINK_ADD_MEMBERSHIP 1
+#define NETLINK_DROP_MEMBERSHIP 2
+#define NETLINK_PKTINFO 3
+#define NETLINK_BROADCAST_ERROR 4
+#define NETLINK_NO_ENOBUFS 5
+#ifndef __KERNEL__
+#define NETLINK_RX_RING 6
+#define NETLINK_TX_RING 7
+#endif
+#define NETLINK_LISTEN_ALL_NSID 8
+#define NETLINK_LIST_MEMBERSHIPS 9
+#define NETLINK_CAP_ACK 10
+#define NETLINK_EXT_ACK 11
+#define NETLINK_GET_STRICT_CHK 12
+
+struct nl_pktinfo {
+ __u32 group;
+};
+
+struct nl_mmap_req {
+ unsigned int nm_block_size;
+ unsigned int nm_block_nr;
+ unsigned int nm_frame_size;
+ unsigned int nm_frame_nr;
+};
+
+struct nl_mmap_hdr {
+ unsigned int nm_status;
+ unsigned int nm_len;
+ __u32 nm_group;
+ /* credentials */
+ __u32 nm_pid;
+ __u32 nm_uid;
+ __u32 nm_gid;
+};
+
+#ifndef __KERNEL__
+enum nl_mmap_status {
+ NL_MMAP_STATUS_UNUSED,
+ NL_MMAP_STATUS_RESERVED,
+ NL_MMAP_STATUS_VALID,
+ NL_MMAP_STATUS_COPY,
+ NL_MMAP_STATUS_SKIP,
+};
+
+#define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO
+#define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT)
+#define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr))
+#endif
+
+#define NET_MAJOR 36 /* Major 36 is reserved for networking */
+
+enum {
+ NETLINK_UNCONNECTED = 0,
+ NETLINK_CONNECTED,
+};
+
+/*
+ * <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)-->
+ * +---------------------+- - -+- - - - - - - - - -+- - -+
+ * | Header | Pad | Payload | Pad |
+ * | (struct nlattr) | ing | | ing |
+ * +---------------------+- - -+- - - - - - - - - -+- - -+
+ * <-------------- nlattr->nla_len -------------->
+ */
+
+struct nlattr {
+ __u16 nla_len;
+ __u16 nla_type;
+};
+
+/*
+ * nla_type (16 bits)
+ * +---+---+-------------------------------+
+ * | N | O | Attribute Type |
+ * +---+---+-------------------------------+
+ * N := Carries nested attributes
+ * O := Payload stored in network byte order
+ *
+ * Note: The N and O flag are mutually exclusive.
+ */
+#define NLA_F_NESTED (1 << 15)
+#define NLA_F_NET_BYTEORDER (1 << 14)
+#define NLA_TYPE_MASK ~(NLA_F_NESTED | NLA_F_NET_BYTEORDER)
+
+#define NLA_ALIGNTO 4
+#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
+#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr)))
+
+/* Generic 32 bitflags attribute content sent to the kernel.
+ *
+ * The value is a bitmap that defines the values being set
+ * The selector is a bitmask that defines which value is legit
+ *
+ * Examples:
+ * value = 0x0, and selector = 0x1
+ * implies we are selecting bit 1 and we want to set its value to 0.
+ *
+ * value = 0x2, and selector = 0x2
+ * implies we are selecting bit 2 and we want to set its value to 1.
+ *
+ */
+struct nla_bitfield32 {
+ __u32 value;
+ __u32 selector;
+};
+
+/*
+ * policy descriptions - it's specific to each family how this is used
+ * Normally, it should be retrieved via a dump inside another attribute
+ * specifying where it applies.
+ */
+
+/**
+ * enum netlink_attribute_type - type of an attribute
+ * @NL_ATTR_TYPE_INVALID: unused
+ * @NL_ATTR_TYPE_FLAG: flag attribute (present/not present)
+ * @NL_ATTR_TYPE_U8: 8-bit unsigned attribute
+ * @NL_ATTR_TYPE_U16: 16-bit unsigned attribute
+ * @NL_ATTR_TYPE_U32: 32-bit unsigned attribute
+ * @NL_ATTR_TYPE_U64: 64-bit unsigned attribute
+ * @NL_ATTR_TYPE_S8: 8-bit signed attribute
+ * @NL_ATTR_TYPE_S16: 16-bit signed attribute
+ * @NL_ATTR_TYPE_S32: 32-bit signed attribute
+ * @NL_ATTR_TYPE_S64: 64-bit signed attribute
+ * @NL_ATTR_TYPE_BINARY: binary data, min/max length may be specified
+ * @NL_ATTR_TYPE_STRING: string, min/max length may be specified
+ * @NL_ATTR_TYPE_NUL_STRING: NUL-terminated string,
+ * min/max length may be specified
+ * @NL_ATTR_TYPE_NESTED: nested, i.e. the content of this attribute
+ * consists of sub-attributes. The nested policy and maxtype
+ * inside may be specified.
+ * @NL_ATTR_TYPE_NESTED_ARRAY: nested array, i.e. the content of this
+ * attribute contains sub-attributes whose type is irrelevant
+ * (just used to separate the array entries) and each such array
+ * entry has attributes again, the policy for those inner ones
+ * and the corresponding maxtype may be specified.
+ * @NL_ATTR_TYPE_BITFIELD32: &struct nla_bitfield32 attribute
+ */
+enum netlink_attribute_type {
+ NL_ATTR_TYPE_INVALID,
+
+ NL_ATTR_TYPE_FLAG,
+
+ NL_ATTR_TYPE_U8,
+ NL_ATTR_TYPE_U16,
+ NL_ATTR_TYPE_U32,
+ NL_ATTR_TYPE_U64,
+
+ NL_ATTR_TYPE_S8,
+ NL_ATTR_TYPE_S16,
+ NL_ATTR_TYPE_S32,
+ NL_ATTR_TYPE_S64,
+
+ NL_ATTR_TYPE_BINARY,
+ NL_ATTR_TYPE_STRING,
+ NL_ATTR_TYPE_NUL_STRING,
+
+ NL_ATTR_TYPE_NESTED,
+ NL_ATTR_TYPE_NESTED_ARRAY,
+
+ NL_ATTR_TYPE_BITFIELD32,
+};
+
+/**
+ * enum netlink_policy_type_attr - policy type attributes
+ * @NL_POLICY_TYPE_ATTR_UNSPEC: unused
+ * @NL_POLICY_TYPE_ATTR_TYPE: type of the attribute,
+ * &enum netlink_attribute_type (U32)
+ * @NL_POLICY_TYPE_ATTR_MIN_VALUE_S: minimum value for signed
+ * integers (S64)
+ * @NL_POLICY_TYPE_ATTR_MAX_VALUE_S: maximum value for signed
+ * integers (S64)
+ * @NL_POLICY_TYPE_ATTR_MIN_VALUE_U: minimum value for unsigned
+ * integers (U64)
+ * @NL_POLICY_TYPE_ATTR_MAX_VALUE_U: maximum value for unsigned
+ * integers (U64)
+ * @NL_POLICY_TYPE_ATTR_MIN_LENGTH: minimum length for binary
+ * attributes, no minimum if not given (U32)
+ * @NL_POLICY_TYPE_ATTR_MAX_LENGTH: maximum length for binary
+ * attributes, no maximum if not given (U32)
+ * @NL_POLICY_TYPE_ATTR_POLICY_IDX: sub policy for nested and
+ * nested array types (U32)
+ * @NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE: maximum sub policy
+ * attribute for nested and nested array types, this can
+ * in theory be < the size of the policy pointed to by
+ * the index, if limited inside the nesting (U32)
+ * @NL_POLICY_TYPE_ATTR_BITFIELD32_MASK: valid mask for the
+ * bitfield32 type (U32)
+ * @NL_POLICY_TYPE_ATTR_PAD: pad attribute for 64-bit alignment
+ */
+enum netlink_policy_type_attr {
+ NL_POLICY_TYPE_ATTR_UNSPEC,
+ NL_POLICY_TYPE_ATTR_TYPE,
+ NL_POLICY_TYPE_ATTR_MIN_VALUE_S,
+ NL_POLICY_TYPE_ATTR_MAX_VALUE_S,
+ NL_POLICY_TYPE_ATTR_MIN_VALUE_U,
+ NL_POLICY_TYPE_ATTR_MAX_VALUE_U,
+ NL_POLICY_TYPE_ATTR_MIN_LENGTH,
+ NL_POLICY_TYPE_ATTR_MAX_LENGTH,
+ NL_POLICY_TYPE_ATTR_POLICY_IDX,
+ NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE,
+ NL_POLICY_TYPE_ATTR_BITFIELD32_MASK,
+ NL_POLICY_TYPE_ATTR_PAD,
+
+ /* keep last */
+ __NL_POLICY_TYPE_ATTR_MAX,
+ NL_POLICY_TYPE_ATTR_MAX = __NL_POLICY_TYPE_ATTR_MAX - 1
+};
+
+#endif /* _UAPI__LINUX_NETLINK_H */
diff --git a/src/basic/linux/nexthop.h b/src/basic/linux/nexthop.h
new file mode 100644
index 0000000..2d4a1e7
--- /dev/null
+++ b/src/basic/linux/nexthop.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_NEXTHOP_H
+#define _UAPI_LINUX_NEXTHOP_H
+
+#include <linux/types.h>
+
+struct nhmsg {
+ unsigned char nh_family;
+ unsigned char nh_scope; /* return only */
+ unsigned char nh_protocol; /* Routing protocol that installed nh */
+ unsigned char resvd;
+ unsigned int nh_flags; /* RTNH_F flags */
+};
+
+/* entry in a nexthop group */
+struct nexthop_grp {
+ __u32 id; /* nexthop id - must exist */
+ __u8 weight; /* weight of this nexthop */
+ __u8 resvd1;
+ __u16 resvd2;
+};
+
+enum {
+ NEXTHOP_GRP_TYPE_MPATH, /* default type if not specified */
+ __NEXTHOP_GRP_TYPE_MAX,
+};
+
+#define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1)
+
+enum {
+ NHA_UNSPEC,
+ NHA_ID, /* u32; id for nexthop. id == 0 means auto-assign */
+
+ NHA_GROUP, /* array of nexthop_grp */
+ NHA_GROUP_TYPE, /* u16 one of NEXTHOP_GRP_TYPE */
+ /* if NHA_GROUP attribute is added, no other attributes can be set */
+
+ NHA_BLACKHOLE, /* flag; nexthop used to blackhole packets */
+ /* if NHA_BLACKHOLE is added, OIF, GATEWAY, ENCAP can not be set */
+
+ NHA_OIF, /* u32; nexthop device */
+ NHA_GATEWAY, /* be32 (IPv4) or in6_addr (IPv6) gw address */
+ NHA_ENCAP_TYPE, /* u16; lwt encap type */
+ NHA_ENCAP, /* lwt encap data */
+
+ /* NHA_OIF can be appended to dump request to return only
+ * nexthops using given device
+ */
+ NHA_GROUPS, /* flag; only return nexthop groups in dump */
+ NHA_MASTER, /* u32; only return nexthops with given master dev */
+
+ NHA_FDB, /* flag; nexthop belongs to a bridge fdb */
+ /* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */
+
+ __NHA_MAX,
+};
+
+#define NHA_MAX (__NHA_MAX - 1)
+#endif
diff --git a/src/basic/linux/pkt_sched.h b/src/basic/linux/pkt_sched.h
new file mode 100644
index 0000000..9e7c2c6
--- /dev/null
+++ b/src/basic/linux/pkt_sched.h
@@ -0,0 +1,1265 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_PKT_SCHED_H
+#define __LINUX_PKT_SCHED_H
+
+#include <linux/const.h>
+#include <linux/types.h>
+
+/* Logical priority bands not depending on specific packet scheduler.
+ Every scheduler will map them to real traffic classes, if it has
+ no more precise mechanism to classify packets.
+
+ These numbers have no special meaning, though their coincidence
+ with obsolete IPv6 values is not occasional :-). New IPv6 drafts
+ preferred full anarchy inspired by diffserv group.
+
+ Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy
+ class, actually, as rule it will be handled with more care than
+ filler or even bulk.
+ */
+
+#define TC_PRIO_BESTEFFORT 0
+#define TC_PRIO_FILLER 1
+#define TC_PRIO_BULK 2
+#define TC_PRIO_INTERACTIVE_BULK 4
+#define TC_PRIO_INTERACTIVE 6
+#define TC_PRIO_CONTROL 7
+
+#define TC_PRIO_MAX 15
+
+/* Generic queue statistics, available for all the elements.
+ Particular schedulers may have also their private records.
+ */
+
+struct tc_stats {
+ __u64 bytes; /* Number of enqueued bytes */
+ __u32 packets; /* Number of enqueued packets */
+ __u32 drops; /* Packets dropped because of lack of resources */
+ __u32 overlimits; /* Number of throttle events when this
+ * flow goes out of allocated bandwidth */
+ __u32 bps; /* Current flow byte rate */
+ __u32 pps; /* Current flow packet rate */
+ __u32 qlen;
+ __u32 backlog;
+};
+
+struct tc_estimator {
+ signed char interval;
+ unsigned char ewma_log;
+};
+
+/* "Handles"
+ ---------
+
+ All the traffic control objects have 32bit identifiers, or "handles".
+
+ They can be considered as opaque numbers from user API viewpoint,
+ but actually they always consist of two fields: major and
+ minor numbers, which are interpreted by kernel specially,
+ that may be used by applications, though not recommended.
+
+ F.e. qdisc handles always have minor number equal to zero,
+ classes (or flows) have major equal to parent qdisc major, and
+ minor uniquely identifying class inside qdisc.
+
+ Macros to manipulate handles:
+ */
+
+#define TC_H_MAJ_MASK (0xFFFF0000U)
+#define TC_H_MIN_MASK (0x0000FFFFU)
+#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK)
+#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK)
+#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK))
+
+#define TC_H_UNSPEC (0U)
+#define TC_H_ROOT (0xFFFFFFFFU)
+#define TC_H_INGRESS (0xFFFFFFF1U)
+#define TC_H_CLSACT TC_H_INGRESS
+
+#define TC_H_MIN_PRIORITY 0xFFE0U
+#define TC_H_MIN_INGRESS 0xFFF2U
+#define TC_H_MIN_EGRESS 0xFFF3U
+
+/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */
+enum tc_link_layer {
+ TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */
+ TC_LINKLAYER_ETHERNET,
+ TC_LINKLAYER_ATM,
+};
+#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */
+
+struct tc_ratespec {
+ unsigned char cell_log;
+ __u8 linklayer; /* lower 4 bits */
+ unsigned short overhead;
+ short cell_align;
+ unsigned short mpu;
+ __u32 rate;
+};
+
+#define TC_RTAB_SIZE 1024
+
+struct tc_sizespec {
+ unsigned char cell_log;
+ unsigned char size_log;
+ short cell_align;
+ int overhead;
+ unsigned int linklayer;
+ unsigned int mpu;
+ unsigned int mtu;
+ unsigned int tsize;
+};
+
+enum {
+ TCA_STAB_UNSPEC,
+ TCA_STAB_BASE,
+ TCA_STAB_DATA,
+ __TCA_STAB_MAX
+};
+
+#define TCA_STAB_MAX (__TCA_STAB_MAX - 1)
+
+/* FIFO section */
+
+struct tc_fifo_qopt {
+ __u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */
+};
+
+/* SKBPRIO section */
+
+/*
+ * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1).
+ * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able
+ * to map one to one the DS field of IPV4 and IPV6 headers.
+ * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY.
+ */
+
+#define SKBPRIO_MAX_PRIORITY 64
+
+struct tc_skbprio_qopt {
+ __u32 limit; /* Queue length in packets. */
+};
+
+/* PRIO section */
+
+#define TCQ_PRIO_BANDS 16
+#define TCQ_MIN_PRIO_BANDS 2
+
+struct tc_prio_qopt {
+ int bands; /* Number of bands */
+ __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */
+};
+
+/* MULTIQ section */
+
+struct tc_multiq_qopt {
+ __u16 bands; /* Number of bands */
+ __u16 max_bands; /* Maximum number of queues */
+};
+
+/* PLUG section */
+
+#define TCQ_PLUG_BUFFER 0
+#define TCQ_PLUG_RELEASE_ONE 1
+#define TCQ_PLUG_RELEASE_INDEFINITE 2
+#define TCQ_PLUG_LIMIT 3
+
+struct tc_plug_qopt {
+ /* TCQ_PLUG_BUFFER: Inset a plug into the queue and
+ * buffer any incoming packets
+ * TCQ_PLUG_RELEASE_ONE: Dequeue packets from queue head
+ * to beginning of the next plug.
+ * TCQ_PLUG_RELEASE_INDEFINITE: Dequeue all packets from queue.
+ * Stop buffering packets until the next TCQ_PLUG_BUFFER
+ * command is received (just act as a pass-thru queue).
+ * TCQ_PLUG_LIMIT: Increase/decrease queue size
+ */
+ int action;
+ __u32 limit;
+};
+
+/* TBF section */
+
+struct tc_tbf_qopt {
+ struct tc_ratespec rate;
+ struct tc_ratespec peakrate;
+ __u32 limit;
+ __u32 buffer;
+ __u32 mtu;
+};
+
+enum {
+ TCA_TBF_UNSPEC,
+ TCA_TBF_PARMS,
+ TCA_TBF_RTAB,
+ TCA_TBF_PTAB,
+ TCA_TBF_RATE64,
+ TCA_TBF_PRATE64,
+ TCA_TBF_BURST,
+ TCA_TBF_PBURST,
+ TCA_TBF_PAD,
+ __TCA_TBF_MAX,
+};
+
+#define TCA_TBF_MAX (__TCA_TBF_MAX - 1)
+
+
+/* TEQL section */
+
+/* TEQL does not require any parameters */
+
+/* SFQ section */
+
+struct tc_sfq_qopt {
+ unsigned quantum; /* Bytes per round allocated to flow */
+ int perturb_period; /* Period of hash perturbation */
+ __u32 limit; /* Maximal packets in queue */
+ unsigned divisor; /* Hash divisor */
+ unsigned flows; /* Maximal number of flows */
+};
+
+struct tc_sfqred_stats {
+ __u32 prob_drop; /* Early drops, below max threshold */
+ __u32 forced_drop; /* Early drops, after max threshold */
+ __u32 prob_mark; /* Marked packets, below max threshold */
+ __u32 forced_mark; /* Marked packets, after max threshold */
+ __u32 prob_mark_head; /* Marked packets, below max threshold */
+ __u32 forced_mark_head;/* Marked packets, after max threshold */
+};
+
+struct tc_sfq_qopt_v1 {
+ struct tc_sfq_qopt v0;
+ unsigned int depth; /* max number of packets per flow */
+ unsigned int headdrop;
+/* SFQRED parameters */
+ __u32 limit; /* HARD maximal flow queue length (bytes) */
+ __u32 qth_min; /* Min average length threshold (bytes) */
+ __u32 qth_max; /* Max average length threshold (bytes) */
+ unsigned char Wlog; /* log(W) */
+ unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */
+ unsigned char Scell_log; /* cell size for idle damping */
+ unsigned char flags;
+ __u32 max_P; /* probability, high resolution */
+/* SFQRED stats */
+ struct tc_sfqred_stats stats;
+};
+
+
+struct tc_sfq_xstats {
+ __s32 allot;
+};
+
+/* RED section */
+
+enum {
+ TCA_RED_UNSPEC,
+ TCA_RED_PARMS,
+ TCA_RED_STAB,
+ TCA_RED_MAX_P,
+ TCA_RED_FLAGS, /* bitfield32 */
+ TCA_RED_EARLY_DROP_BLOCK, /* u32 */
+ TCA_RED_MARK_BLOCK, /* u32 */
+ __TCA_RED_MAX,
+};
+
+#define TCA_RED_MAX (__TCA_RED_MAX - 1)
+
+struct tc_red_qopt {
+ __u32 limit; /* HARD maximal queue length (bytes) */
+ __u32 qth_min; /* Min average length threshold (bytes) */
+ __u32 qth_max; /* Max average length threshold (bytes) */
+ unsigned char Wlog; /* log(W) */
+ unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */
+ unsigned char Scell_log; /* cell size for idle damping */
+
+ /* This field can be used for flags that a RED-like qdisc has
+ * historically supported. E.g. when configuring RED, it can be used for
+ * ECN, HARDDROP and ADAPTATIVE. For SFQ it can be used for ECN,
+ * HARDDROP. Etc. Because this field has not been validated, and is
+ * copied back on dump, any bits besides those to which a given qdisc
+ * has assigned a historical meaning need to be considered for free use
+ * by userspace tools.
+ *
+ * Any further flags need to be passed differently, e.g. through an
+ * attribute (such as TCA_RED_FLAGS above). Such attribute should allow
+ * passing both recent and historic flags in one value.
+ */
+ unsigned char flags;
+#define TC_RED_ECN 1
+#define TC_RED_HARDDROP 2
+#define TC_RED_ADAPTATIVE 4
+#define TC_RED_NODROP 8
+};
+
+#define TC_RED_HISTORIC_FLAGS (TC_RED_ECN | TC_RED_HARDDROP | TC_RED_ADAPTATIVE)
+
+struct tc_red_xstats {
+ __u32 early; /* Early drops */
+ __u32 pdrop; /* Drops due to queue limits */
+ __u32 other; /* Drops due to drop() calls */
+ __u32 marked; /* Marked packets */
+};
+
+/* GRED section */
+
+#define MAX_DPs 16
+
+enum {
+ TCA_GRED_UNSPEC,
+ TCA_GRED_PARMS,
+ TCA_GRED_STAB,
+ TCA_GRED_DPS,
+ TCA_GRED_MAX_P,
+ TCA_GRED_LIMIT,
+ TCA_GRED_VQ_LIST, /* nested TCA_GRED_VQ_ENTRY */
+ __TCA_GRED_MAX,
+};
+
+#define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
+
+enum {
+ TCA_GRED_VQ_ENTRY_UNSPEC,
+ TCA_GRED_VQ_ENTRY, /* nested TCA_GRED_VQ_* */
+ __TCA_GRED_VQ_ENTRY_MAX,
+};
+#define TCA_GRED_VQ_ENTRY_MAX (__TCA_GRED_VQ_ENTRY_MAX - 1)
+
+enum {
+ TCA_GRED_VQ_UNSPEC,
+ TCA_GRED_VQ_PAD,
+ TCA_GRED_VQ_DP, /* u32 */
+ TCA_GRED_VQ_STAT_BYTES, /* u64 */
+ TCA_GRED_VQ_STAT_PACKETS, /* u32 */
+ TCA_GRED_VQ_STAT_BACKLOG, /* u32 */
+ TCA_GRED_VQ_STAT_PROB_DROP, /* u32 */
+ TCA_GRED_VQ_STAT_PROB_MARK, /* u32 */
+ TCA_GRED_VQ_STAT_FORCED_DROP, /* u32 */
+ TCA_GRED_VQ_STAT_FORCED_MARK, /* u32 */
+ TCA_GRED_VQ_STAT_PDROP, /* u32 */
+ TCA_GRED_VQ_STAT_OTHER, /* u32 */
+ TCA_GRED_VQ_FLAGS, /* u32 */
+ __TCA_GRED_VQ_MAX
+};
+
+#define TCA_GRED_VQ_MAX (__TCA_GRED_VQ_MAX - 1)
+
+struct tc_gred_qopt {
+ __u32 limit; /* HARD maximal queue length (bytes) */
+ __u32 qth_min; /* Min average length threshold (bytes) */
+ __u32 qth_max; /* Max average length threshold (bytes) */
+ __u32 DP; /* up to 2^32 DPs */
+ __u32 backlog;
+ __u32 qave;
+ __u32 forced;
+ __u32 early;
+ __u32 other;
+ __u32 pdrop;
+ __u8 Wlog; /* log(W) */
+ __u8 Plog; /* log(P_max/(qth_max-qth_min)) */
+ __u8 Scell_log; /* cell size for idle damping */
+ __u8 prio; /* prio of this VQ */
+ __u32 packets;
+ __u32 bytesin;
+};
+
+/* gred setup */
+struct tc_gred_sopt {
+ __u32 DPs;
+ __u32 def_DP;
+ __u8 grio;
+ __u8 flags;
+ __u16 pad1;
+};
+
+/* CHOKe section */
+
+enum {
+ TCA_CHOKE_UNSPEC,
+ TCA_CHOKE_PARMS,
+ TCA_CHOKE_STAB,
+ TCA_CHOKE_MAX_P,
+ __TCA_CHOKE_MAX,
+};
+
+#define TCA_CHOKE_MAX (__TCA_CHOKE_MAX - 1)
+
+struct tc_choke_qopt {
+ __u32 limit; /* Hard queue length (packets) */
+ __u32 qth_min; /* Min average threshold (packets) */
+ __u32 qth_max; /* Max average threshold (packets) */
+ unsigned char Wlog; /* log(W) */
+ unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */
+ unsigned char Scell_log; /* cell size for idle damping */
+ unsigned char flags; /* see RED flags */
+};
+
+struct tc_choke_xstats {
+ __u32 early; /* Early drops */
+ __u32 pdrop; /* Drops due to queue limits */
+ __u32 other; /* Drops due to drop() calls */
+ __u32 marked; /* Marked packets */
+ __u32 matched; /* Drops due to flow match */
+};
+
+/* HTB section */
+#define TC_HTB_NUMPRIO 8
+#define TC_HTB_MAXDEPTH 8
+#define TC_HTB_PROTOVER 3 /* the same as HTB and TC's major */
+
+struct tc_htb_opt {
+ struct tc_ratespec rate;
+ struct tc_ratespec ceil;
+ __u32 buffer;
+ __u32 cbuffer;
+ __u32 quantum;
+ __u32 level; /* out only */
+ __u32 prio;
+};
+struct tc_htb_glob {
+ __u32 version; /* to match HTB/TC */
+ __u32 rate2quantum; /* bps->quantum divisor */
+ __u32 defcls; /* default class number */
+ __u32 debug; /* debug flags */
+
+ /* stats */
+ __u32 direct_pkts; /* count of non shaped packets */
+};
+enum {
+ TCA_HTB_UNSPEC,
+ TCA_HTB_PARMS,
+ TCA_HTB_INIT,
+ TCA_HTB_CTAB,
+ TCA_HTB_RTAB,
+ TCA_HTB_DIRECT_QLEN,
+ TCA_HTB_RATE64,
+ TCA_HTB_CEIL64,
+ TCA_HTB_PAD,
+ __TCA_HTB_MAX,
+};
+
+#define TCA_HTB_MAX (__TCA_HTB_MAX - 1)
+
+struct tc_htb_xstats {
+ __u32 lends;
+ __u32 borrows;
+ __u32 giants; /* unused since 'Make HTB scheduler work with TSO.' */
+ __s32 tokens;
+ __s32 ctokens;
+};
+
+/* HFSC section */
+
+struct tc_hfsc_qopt {
+ __u16 defcls; /* default class */
+};
+
+struct tc_service_curve {
+ __u32 m1; /* slope of the first segment in bps */
+ __u32 d; /* x-projection of the first segment in us */
+ __u32 m2; /* slope of the second segment in bps */
+};
+
+struct tc_hfsc_stats {
+ __u64 work; /* total work done */
+ __u64 rtwork; /* work done by real-time criteria */
+ __u32 period; /* current period */
+ __u32 level; /* class level in hierarchy */
+};
+
+enum {
+ TCA_HFSC_UNSPEC,
+ TCA_HFSC_RSC,
+ TCA_HFSC_FSC,
+ TCA_HFSC_USC,
+ __TCA_HFSC_MAX,
+};
+
+#define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1)
+
+
+/* CBQ section */
+
+#define TC_CBQ_MAXPRIO 8
+#define TC_CBQ_MAXLEVEL 8
+#define TC_CBQ_DEF_EWMA 5
+
+struct tc_cbq_lssopt {
+ unsigned char change;
+ unsigned char flags;
+#define TCF_CBQ_LSS_BOUNDED 1
+#define TCF_CBQ_LSS_ISOLATED 2
+ unsigned char ewma_log;
+ unsigned char level;
+#define TCF_CBQ_LSS_FLAGS 1
+#define TCF_CBQ_LSS_EWMA 2
+#define TCF_CBQ_LSS_MAXIDLE 4
+#define TCF_CBQ_LSS_MINIDLE 8
+#define TCF_CBQ_LSS_OFFTIME 0x10
+#define TCF_CBQ_LSS_AVPKT 0x20
+ __u32 maxidle;
+ __u32 minidle;
+ __u32 offtime;
+ __u32 avpkt;
+};
+
+struct tc_cbq_wrropt {
+ unsigned char flags;
+ unsigned char priority;
+ unsigned char cpriority;
+ unsigned char __reserved;
+ __u32 allot;
+ __u32 weight;
+};
+
+struct tc_cbq_ovl {
+ unsigned char strategy;
+#define TC_CBQ_OVL_CLASSIC 0
+#define TC_CBQ_OVL_DELAY 1
+#define TC_CBQ_OVL_LOWPRIO 2
+#define TC_CBQ_OVL_DROP 3
+#define TC_CBQ_OVL_RCLASSIC 4
+ unsigned char priority2;
+ __u16 pad;
+ __u32 penalty;
+};
+
+struct tc_cbq_police {
+ unsigned char police;
+ unsigned char __res1;
+ unsigned short __res2;
+};
+
+struct tc_cbq_fopt {
+ __u32 split;
+ __u32 defmap;
+ __u32 defchange;
+};
+
+struct tc_cbq_xstats {
+ __u32 borrows;
+ __u32 overactions;
+ __s32 avgidle;
+ __s32 undertime;
+};
+
+enum {
+ TCA_CBQ_UNSPEC,
+ TCA_CBQ_LSSOPT,
+ TCA_CBQ_WRROPT,
+ TCA_CBQ_FOPT,
+ TCA_CBQ_OVL_STRATEGY,
+ TCA_CBQ_RATE,
+ TCA_CBQ_RTAB,
+ TCA_CBQ_POLICE,
+ __TCA_CBQ_MAX,
+};
+
+#define TCA_CBQ_MAX (__TCA_CBQ_MAX - 1)
+
+/* dsmark section */
+
+enum {
+ TCA_DSMARK_UNSPEC,
+ TCA_DSMARK_INDICES,
+ TCA_DSMARK_DEFAULT_INDEX,
+ TCA_DSMARK_SET_TC_INDEX,
+ TCA_DSMARK_MASK,
+ TCA_DSMARK_VALUE,
+ __TCA_DSMARK_MAX,
+};
+
+#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1)
+
+/* ATM section */
+
+enum {
+ TCA_ATM_UNSPEC,
+ TCA_ATM_FD, /* file/socket descriptor */
+ TCA_ATM_PTR, /* pointer to descriptor - later */
+ TCA_ATM_HDR, /* LL header */
+ TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */
+ TCA_ATM_ADDR, /* PVC address (for output only) */
+ TCA_ATM_STATE, /* VC state (ATM_VS_*; for output only) */
+ __TCA_ATM_MAX,
+};
+
+#define TCA_ATM_MAX (__TCA_ATM_MAX - 1)
+
+/* Network emulator */
+
+enum {
+ TCA_NETEM_UNSPEC,
+ TCA_NETEM_CORR,
+ TCA_NETEM_DELAY_DIST,
+ TCA_NETEM_REORDER,
+ TCA_NETEM_CORRUPT,
+ TCA_NETEM_LOSS,
+ TCA_NETEM_RATE,
+ TCA_NETEM_ECN,
+ TCA_NETEM_RATE64,
+ TCA_NETEM_PAD,
+ TCA_NETEM_LATENCY64,
+ TCA_NETEM_JITTER64,
+ TCA_NETEM_SLOT,
+ TCA_NETEM_SLOT_DIST,
+ __TCA_NETEM_MAX,
+};
+
+#define TCA_NETEM_MAX (__TCA_NETEM_MAX - 1)
+
+struct tc_netem_qopt {
+ __u32 latency; /* added delay (us) */
+ __u32 limit; /* fifo limit (packets) */
+ __u32 loss; /* random packet loss (0=none ~0=100%) */
+ __u32 gap; /* re-ordering gap (0 for none) */
+ __u32 duplicate; /* random packet dup (0=none ~0=100%) */
+ __u32 jitter; /* random jitter in latency (us) */
+};
+
+struct tc_netem_corr {
+ __u32 delay_corr; /* delay correlation */
+ __u32 loss_corr; /* packet loss correlation */
+ __u32 dup_corr; /* duplicate correlation */
+};
+
+struct tc_netem_reorder {
+ __u32 probability;
+ __u32 correlation;
+};
+
+struct tc_netem_corrupt {
+ __u32 probability;
+ __u32 correlation;
+};
+
+struct tc_netem_rate {
+ __u32 rate; /* byte/s */
+ __s32 packet_overhead;
+ __u32 cell_size;
+ __s32 cell_overhead;
+};
+
+struct tc_netem_slot {
+ __s64 min_delay; /* nsec */
+ __s64 max_delay;
+ __s32 max_packets;
+ __s32 max_bytes;
+ __s64 dist_delay; /* nsec */
+ __s64 dist_jitter; /* nsec */
+};
+
+enum {
+ NETEM_LOSS_UNSPEC,
+ NETEM_LOSS_GI, /* General Intuitive - 4 state model */
+ NETEM_LOSS_GE, /* Gilbert Elliot models */
+ __NETEM_LOSS_MAX
+};
+#define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1)
+
+/* State transition probabilities for 4 state model */
+struct tc_netem_gimodel {
+ __u32 p13;
+ __u32 p31;
+ __u32 p32;
+ __u32 p14;
+ __u32 p23;
+};
+
+/* Gilbert-Elliot models */
+struct tc_netem_gemodel {
+ __u32 p;
+ __u32 r;
+ __u32 h;
+ __u32 k1;
+};
+
+#define NETEM_DIST_SCALE 8192
+#define NETEM_DIST_MAX 16384
+
+/* DRR */
+
+enum {
+ TCA_DRR_UNSPEC,
+ TCA_DRR_QUANTUM,
+ __TCA_DRR_MAX
+};
+
+#define TCA_DRR_MAX (__TCA_DRR_MAX - 1)
+
+struct tc_drr_stats {
+ __u32 deficit;
+};
+
+/* MQPRIO */
+#define TC_QOPT_BITMASK 15
+#define TC_QOPT_MAX_QUEUE 16
+
+enum {
+ TC_MQPRIO_HW_OFFLOAD_NONE, /* no offload requested */
+ TC_MQPRIO_HW_OFFLOAD_TCS, /* offload TCs, no queue counts */
+ __TC_MQPRIO_HW_OFFLOAD_MAX
+};
+
+#define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1)
+
+enum {
+ TC_MQPRIO_MODE_DCB,
+ TC_MQPRIO_MODE_CHANNEL,
+ __TC_MQPRIO_MODE_MAX
+};
+
+#define __TC_MQPRIO_MODE_MAX (__TC_MQPRIO_MODE_MAX - 1)
+
+enum {
+ TC_MQPRIO_SHAPER_DCB,
+ TC_MQPRIO_SHAPER_BW_RATE, /* Add new shapers below */
+ __TC_MQPRIO_SHAPER_MAX
+};
+
+#define __TC_MQPRIO_SHAPER_MAX (__TC_MQPRIO_SHAPER_MAX - 1)
+
+struct tc_mqprio_qopt {
+ __u8 num_tc;
+ __u8 prio_tc_map[TC_QOPT_BITMASK + 1];
+ __u8 hw;
+ __u16 count[TC_QOPT_MAX_QUEUE];
+ __u16 offset[TC_QOPT_MAX_QUEUE];
+};
+
+#define TC_MQPRIO_F_MODE 0x1
+#define TC_MQPRIO_F_SHAPER 0x2
+#define TC_MQPRIO_F_MIN_RATE 0x4
+#define TC_MQPRIO_F_MAX_RATE 0x8
+
+enum {
+ TCA_MQPRIO_UNSPEC,
+ TCA_MQPRIO_MODE,
+ TCA_MQPRIO_SHAPER,
+ TCA_MQPRIO_MIN_RATE64,
+ TCA_MQPRIO_MAX_RATE64,
+ __TCA_MQPRIO_MAX,
+};
+
+#define TCA_MQPRIO_MAX (__TCA_MQPRIO_MAX - 1)
+
+/* SFB */
+
+enum {
+ TCA_SFB_UNSPEC,
+ TCA_SFB_PARMS,
+ __TCA_SFB_MAX,
+};
+
+#define TCA_SFB_MAX (__TCA_SFB_MAX - 1)
+
+/*
+ * Note: increment, decrement are Q0.16 fixed-point values.
+ */
+struct tc_sfb_qopt {
+ __u32 rehash_interval; /* delay between hash move, in ms */
+ __u32 warmup_time; /* double buffering warmup time in ms (warmup_time < rehash_interval) */
+ __u32 max; /* max len of qlen_min */
+ __u32 bin_size; /* maximum queue length per bin */
+ __u32 increment; /* probability increment, (d1 in Blue) */
+ __u32 decrement; /* probability decrement, (d2 in Blue) */
+ __u32 limit; /* max SFB queue length */
+ __u32 penalty_rate; /* inelastic flows are rate limited to 'rate' pps */
+ __u32 penalty_burst;
+};
+
+struct tc_sfb_xstats {
+ __u32 earlydrop;
+ __u32 penaltydrop;
+ __u32 bucketdrop;
+ __u32 queuedrop;
+ __u32 childdrop; /* drops in child qdisc */
+ __u32 marked;
+ __u32 maxqlen;
+ __u32 maxprob;
+ __u32 avgprob;
+};
+
+#define SFB_MAX_PROB 0xFFFF
+
+/* QFQ */
+enum {
+ TCA_QFQ_UNSPEC,
+ TCA_QFQ_WEIGHT,
+ TCA_QFQ_LMAX,
+ __TCA_QFQ_MAX
+};
+
+#define TCA_QFQ_MAX (__TCA_QFQ_MAX - 1)
+
+struct tc_qfq_stats {
+ __u32 weight;
+ __u32 lmax;
+};
+
+/* CODEL */
+
+enum {
+ TCA_CODEL_UNSPEC,
+ TCA_CODEL_TARGET,
+ TCA_CODEL_LIMIT,
+ TCA_CODEL_INTERVAL,
+ TCA_CODEL_ECN,
+ TCA_CODEL_CE_THRESHOLD,
+ __TCA_CODEL_MAX
+};
+
+#define TCA_CODEL_MAX (__TCA_CODEL_MAX - 1)
+
+struct tc_codel_xstats {
+ __u32 maxpacket; /* largest packet we've seen so far */
+ __u32 count; /* how many drops we've done since the last time we
+ * entered dropping state
+ */
+ __u32 lastcount; /* count at entry to dropping state */
+ __u32 ldelay; /* in-queue delay seen by most recently dequeued packet */
+ __s32 drop_next; /* time to drop next packet */
+ __u32 drop_overlimit; /* number of time max qdisc packet limit was hit */
+ __u32 ecn_mark; /* number of packets we ECN marked instead of dropped */
+ __u32 dropping; /* are we in dropping state ? */
+ __u32 ce_mark; /* number of CE marked packets because of ce_threshold */
+};
+
+/* FQ_CODEL */
+
+enum {
+ TCA_FQ_CODEL_UNSPEC,
+ TCA_FQ_CODEL_TARGET,
+ TCA_FQ_CODEL_LIMIT,
+ TCA_FQ_CODEL_INTERVAL,
+ TCA_FQ_CODEL_ECN,
+ TCA_FQ_CODEL_FLOWS,
+ TCA_FQ_CODEL_QUANTUM,
+ TCA_FQ_CODEL_CE_THRESHOLD,
+ TCA_FQ_CODEL_DROP_BATCH_SIZE,
+ TCA_FQ_CODEL_MEMORY_LIMIT,
+ __TCA_FQ_CODEL_MAX
+};
+
+#define TCA_FQ_CODEL_MAX (__TCA_FQ_CODEL_MAX - 1)
+
+enum {
+ TCA_FQ_CODEL_XSTATS_QDISC,
+ TCA_FQ_CODEL_XSTATS_CLASS,
+};
+
+struct tc_fq_codel_qd_stats {
+ __u32 maxpacket; /* largest packet we've seen so far */
+ __u32 drop_overlimit; /* number of time max qdisc
+ * packet limit was hit
+ */
+ __u32 ecn_mark; /* number of packets we ECN marked
+ * instead of being dropped
+ */
+ __u32 new_flow_count; /* number of time packets
+ * created a 'new flow'
+ */
+ __u32 new_flows_len; /* count of flows in new list */
+ __u32 old_flows_len; /* count of flows in old list */
+ __u32 ce_mark; /* packets above ce_threshold */
+ __u32 memory_usage; /* in bytes */
+ __u32 drop_overmemory;
+};
+
+struct tc_fq_codel_cl_stats {
+ __s32 deficit;
+ __u32 ldelay; /* in-queue delay seen by most recently
+ * dequeued packet
+ */
+ __u32 count;
+ __u32 lastcount;
+ __u32 dropping;
+ __s32 drop_next;
+};
+
+struct tc_fq_codel_xstats {
+ __u32 type;
+ union {
+ struct tc_fq_codel_qd_stats qdisc_stats;
+ struct tc_fq_codel_cl_stats class_stats;
+ };
+};
+
+/* FQ */
+
+enum {
+ TCA_FQ_UNSPEC,
+
+ TCA_FQ_PLIMIT, /* limit of total number of packets in queue */
+
+ TCA_FQ_FLOW_PLIMIT, /* limit of packets per flow */
+
+ TCA_FQ_QUANTUM, /* RR quantum */
+
+ TCA_FQ_INITIAL_QUANTUM, /* RR quantum for new flow */
+
+ TCA_FQ_RATE_ENABLE, /* enable/disable rate limiting */
+
+ TCA_FQ_FLOW_DEFAULT_RATE,/* obsolete, do not use */
+
+ TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */
+
+ TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */
+
+ TCA_FQ_FLOW_REFILL_DELAY, /* flow credit refill delay in usec */
+
+ TCA_FQ_ORPHAN_MASK, /* mask applied to orphaned skb hashes */
+
+ TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */
+
+ TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */
+
+ TCA_FQ_TIMER_SLACK, /* timer slack */
+
+ TCA_FQ_HORIZON, /* time horizon in us */
+
+ TCA_FQ_HORIZON_DROP, /* drop packets beyond horizon, or cap their EDT */
+
+ __TCA_FQ_MAX
+};
+
+#define TCA_FQ_MAX (__TCA_FQ_MAX - 1)
+
+struct tc_fq_qd_stats {
+ __u64 gc_flows;
+ __u64 highprio_packets;
+ __u64 tcp_retrans;
+ __u64 throttled;
+ __u64 flows_plimit;
+ __u64 pkts_too_long;
+ __u64 allocation_errors;
+ __s64 time_next_delayed_flow;
+ __u32 flows;
+ __u32 inactive_flows;
+ __u32 throttled_flows;
+ __u32 unthrottle_latency_ns;
+ __u64 ce_mark; /* packets above ce_threshold */
+ __u64 horizon_drops;
+ __u64 horizon_caps;
+};
+
+/* Heavy-Hitter Filter */
+
+enum {
+ TCA_HHF_UNSPEC,
+ TCA_HHF_BACKLOG_LIMIT,
+ TCA_HHF_QUANTUM,
+ TCA_HHF_HH_FLOWS_LIMIT,
+ TCA_HHF_RESET_TIMEOUT,
+ TCA_HHF_ADMIT_BYTES,
+ TCA_HHF_EVICT_TIMEOUT,
+ TCA_HHF_NON_HH_WEIGHT,
+ __TCA_HHF_MAX
+};
+
+#define TCA_HHF_MAX (__TCA_HHF_MAX - 1)
+
+struct tc_hhf_xstats {
+ __u32 drop_overlimit; /* number of times max qdisc packet limit
+ * was hit
+ */
+ __u32 hh_overlimit; /* number of times max heavy-hitters was hit */
+ __u32 hh_tot_count; /* number of captured heavy-hitters so far */
+ __u32 hh_cur_count; /* number of current heavy-hitters */
+};
+
+/* PIE */
+enum {
+ TCA_PIE_UNSPEC,
+ TCA_PIE_TARGET,
+ TCA_PIE_LIMIT,
+ TCA_PIE_TUPDATE,
+ TCA_PIE_ALPHA,
+ TCA_PIE_BETA,
+ TCA_PIE_ECN,
+ TCA_PIE_BYTEMODE,
+ TCA_PIE_DQ_RATE_ESTIMATOR,
+ __TCA_PIE_MAX
+};
+#define TCA_PIE_MAX (__TCA_PIE_MAX - 1)
+
+struct tc_pie_xstats {
+ __u64 prob; /* current probability */
+ __u32 delay; /* current delay in ms */
+ __u32 avg_dq_rate; /* current average dq_rate in
+ * bits/pie_time
+ */
+ __u32 dq_rate_estimating; /* is avg_dq_rate being calculated? */
+ __u32 packets_in; /* total number of packets enqueued */
+ __u32 dropped; /* packets dropped due to pie_action */
+ __u32 overlimit; /* dropped due to lack of space
+ * in queue
+ */
+ __u32 maxq; /* maximum queue size */
+ __u32 ecn_mark; /* packets marked with ecn*/
+};
+
+/* FQ PIE */
+enum {
+ TCA_FQ_PIE_UNSPEC,
+ TCA_FQ_PIE_LIMIT,
+ TCA_FQ_PIE_FLOWS,
+ TCA_FQ_PIE_TARGET,
+ TCA_FQ_PIE_TUPDATE,
+ TCA_FQ_PIE_ALPHA,
+ TCA_FQ_PIE_BETA,
+ TCA_FQ_PIE_QUANTUM,
+ TCA_FQ_PIE_MEMORY_LIMIT,
+ TCA_FQ_PIE_ECN_PROB,
+ TCA_FQ_PIE_ECN,
+ TCA_FQ_PIE_BYTEMODE,
+ TCA_FQ_PIE_DQ_RATE_ESTIMATOR,
+ __TCA_FQ_PIE_MAX
+};
+#define TCA_FQ_PIE_MAX (__TCA_FQ_PIE_MAX - 1)
+
+struct tc_fq_pie_xstats {
+ __u32 packets_in; /* total number of packets enqueued */
+ __u32 dropped; /* packets dropped due to fq_pie_action */
+ __u32 overlimit; /* dropped due to lack of space in queue */
+ __u32 overmemory; /* dropped due to lack of memory in queue */
+ __u32 ecn_mark; /* packets marked with ecn */
+ __u32 new_flow_count; /* count of new flows created by packets */
+ __u32 new_flows_len; /* count of flows in new list */
+ __u32 old_flows_len; /* count of flows in old list */
+ __u32 memory_usage; /* total memory across all queues */
+};
+
+/* CBS */
+struct tc_cbs_qopt {
+ __u8 offload;
+ __u8 _pad[3];
+ __s32 hicredit;
+ __s32 locredit;
+ __s32 idleslope;
+ __s32 sendslope;
+};
+
+enum {
+ TCA_CBS_UNSPEC,
+ TCA_CBS_PARMS,
+ __TCA_CBS_MAX,
+};
+
+#define TCA_CBS_MAX (__TCA_CBS_MAX - 1)
+
+
+/* ETF */
+struct tc_etf_qopt {
+ __s32 delta;
+ __s32 clockid;
+ __u32 flags;
+#define TC_ETF_DEADLINE_MODE_ON _BITUL(0)
+#define TC_ETF_OFFLOAD_ON _BITUL(1)
+#define TC_ETF_SKIP_SOCK_CHECK _BITUL(2)
+};
+
+enum {
+ TCA_ETF_UNSPEC,
+ TCA_ETF_PARMS,
+ __TCA_ETF_MAX,
+};
+
+#define TCA_ETF_MAX (__TCA_ETF_MAX - 1)
+
+
+/* CAKE */
+enum {
+ TCA_CAKE_UNSPEC,
+ TCA_CAKE_PAD,
+ TCA_CAKE_BASE_RATE64,
+ TCA_CAKE_DIFFSERV_MODE,
+ TCA_CAKE_ATM,
+ TCA_CAKE_FLOW_MODE,
+ TCA_CAKE_OVERHEAD,
+ TCA_CAKE_RTT,
+ TCA_CAKE_TARGET,
+ TCA_CAKE_AUTORATE,
+ TCA_CAKE_MEMORY,
+ TCA_CAKE_NAT,
+ TCA_CAKE_RAW,
+ TCA_CAKE_WASH,
+ TCA_CAKE_MPU,
+ TCA_CAKE_INGRESS,
+ TCA_CAKE_ACK_FILTER,
+ TCA_CAKE_SPLIT_GSO,
+ TCA_CAKE_FWMARK,
+ __TCA_CAKE_MAX
+};
+#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1)
+
+enum {
+ __TCA_CAKE_STATS_INVALID,
+ TCA_CAKE_STATS_PAD,
+ TCA_CAKE_STATS_CAPACITY_ESTIMATE64,
+ TCA_CAKE_STATS_MEMORY_LIMIT,
+ TCA_CAKE_STATS_MEMORY_USED,
+ TCA_CAKE_STATS_AVG_NETOFF,
+ TCA_CAKE_STATS_MIN_NETLEN,
+ TCA_CAKE_STATS_MAX_NETLEN,
+ TCA_CAKE_STATS_MIN_ADJLEN,
+ TCA_CAKE_STATS_MAX_ADJLEN,
+ TCA_CAKE_STATS_TIN_STATS,
+ TCA_CAKE_STATS_DEFICIT,
+ TCA_CAKE_STATS_COBALT_COUNT,
+ TCA_CAKE_STATS_DROPPING,
+ TCA_CAKE_STATS_DROP_NEXT_US,
+ TCA_CAKE_STATS_P_DROP,
+ TCA_CAKE_STATS_BLUE_TIMER_US,
+ __TCA_CAKE_STATS_MAX
+};
+#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1)
+
+enum {
+ __TCA_CAKE_TIN_STATS_INVALID,
+ TCA_CAKE_TIN_STATS_PAD,
+ TCA_CAKE_TIN_STATS_SENT_PACKETS,
+ TCA_CAKE_TIN_STATS_SENT_BYTES64,
+ TCA_CAKE_TIN_STATS_DROPPED_PACKETS,
+ TCA_CAKE_TIN_STATS_DROPPED_BYTES64,
+ TCA_CAKE_TIN_STATS_ACKS_DROPPED_PACKETS,
+ TCA_CAKE_TIN_STATS_ACKS_DROPPED_BYTES64,
+ TCA_CAKE_TIN_STATS_ECN_MARKED_PACKETS,
+ TCA_CAKE_TIN_STATS_ECN_MARKED_BYTES64,
+ TCA_CAKE_TIN_STATS_BACKLOG_PACKETS,
+ TCA_CAKE_TIN_STATS_BACKLOG_BYTES,
+ TCA_CAKE_TIN_STATS_THRESHOLD_RATE64,
+ TCA_CAKE_TIN_STATS_TARGET_US,
+ TCA_CAKE_TIN_STATS_INTERVAL_US,
+ TCA_CAKE_TIN_STATS_WAY_INDIRECT_HITS,
+ TCA_CAKE_TIN_STATS_WAY_MISSES,
+ TCA_CAKE_TIN_STATS_WAY_COLLISIONS,
+ TCA_CAKE_TIN_STATS_PEAK_DELAY_US,
+ TCA_CAKE_TIN_STATS_AVG_DELAY_US,
+ TCA_CAKE_TIN_STATS_BASE_DELAY_US,
+ TCA_CAKE_TIN_STATS_SPARSE_FLOWS,
+ TCA_CAKE_TIN_STATS_BULK_FLOWS,
+ TCA_CAKE_TIN_STATS_UNRESPONSIVE_FLOWS,
+ TCA_CAKE_TIN_STATS_MAX_SKBLEN,
+ TCA_CAKE_TIN_STATS_FLOW_QUANTUM,
+ __TCA_CAKE_TIN_STATS_MAX
+};
+#define TCA_CAKE_TIN_STATS_MAX (__TCA_CAKE_TIN_STATS_MAX - 1)
+#define TC_CAKE_MAX_TINS (8)
+
+enum {
+ CAKE_FLOW_NONE = 0,
+ CAKE_FLOW_SRC_IP,
+ CAKE_FLOW_DST_IP,
+ CAKE_FLOW_HOSTS, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_DST_IP */
+ CAKE_FLOW_FLOWS,
+ CAKE_FLOW_DUAL_SRC, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_FLOWS */
+ CAKE_FLOW_DUAL_DST, /* = CAKE_FLOW_DST_IP | CAKE_FLOW_FLOWS */
+ CAKE_FLOW_TRIPLE, /* = CAKE_FLOW_HOSTS | CAKE_FLOW_FLOWS */
+ CAKE_FLOW_MAX,
+};
+
+enum {
+ CAKE_DIFFSERV_DIFFSERV3 = 0,
+ CAKE_DIFFSERV_DIFFSERV4,
+ CAKE_DIFFSERV_DIFFSERV8,
+ CAKE_DIFFSERV_BESTEFFORT,
+ CAKE_DIFFSERV_PRECEDENCE,
+ CAKE_DIFFSERV_MAX
+};
+
+enum {
+ CAKE_ACK_NONE = 0,
+ CAKE_ACK_FILTER,
+ CAKE_ACK_AGGRESSIVE,
+ CAKE_ACK_MAX
+};
+
+enum {
+ CAKE_ATM_NONE = 0,
+ CAKE_ATM_ATM,
+ CAKE_ATM_PTM,
+ CAKE_ATM_MAX
+};
+
+
+/* TAPRIO */
+enum {
+ TC_TAPRIO_CMD_SET_GATES = 0x00,
+ TC_TAPRIO_CMD_SET_AND_HOLD = 0x01,
+ TC_TAPRIO_CMD_SET_AND_RELEASE = 0x02,
+};
+
+enum {
+ TCA_TAPRIO_SCHED_ENTRY_UNSPEC,
+ TCA_TAPRIO_SCHED_ENTRY_INDEX, /* u32 */
+ TCA_TAPRIO_SCHED_ENTRY_CMD, /* u8 */
+ TCA_TAPRIO_SCHED_ENTRY_GATE_MASK, /* u32 */
+ TCA_TAPRIO_SCHED_ENTRY_INTERVAL, /* u32 */
+ __TCA_TAPRIO_SCHED_ENTRY_MAX,
+};
+#define TCA_TAPRIO_SCHED_ENTRY_MAX (__TCA_TAPRIO_SCHED_ENTRY_MAX - 1)
+
+/* The format for schedule entry list is:
+ * [TCA_TAPRIO_SCHED_ENTRY_LIST]
+ * [TCA_TAPRIO_SCHED_ENTRY]
+ * [TCA_TAPRIO_SCHED_ENTRY_CMD]
+ * [TCA_TAPRIO_SCHED_ENTRY_GATES]
+ * [TCA_TAPRIO_SCHED_ENTRY_INTERVAL]
+ */
+enum {
+ TCA_TAPRIO_SCHED_UNSPEC,
+ TCA_TAPRIO_SCHED_ENTRY,
+ __TCA_TAPRIO_SCHED_MAX,
+};
+
+#define TCA_TAPRIO_SCHED_MAX (__TCA_TAPRIO_SCHED_MAX - 1)
+
+/* The format for the admin sched (dump only):
+ * [TCA_TAPRIO_SCHED_ADMIN_SCHED]
+ * [TCA_TAPRIO_ATTR_SCHED_BASE_TIME]
+ * [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]
+ * [TCA_TAPRIO_ATTR_SCHED_ENTRY]
+ * [TCA_TAPRIO_ATTR_SCHED_ENTRY_CMD]
+ * [TCA_TAPRIO_ATTR_SCHED_ENTRY_GATES]
+ * [TCA_TAPRIO_ATTR_SCHED_ENTRY_INTERVAL]
+ */
+
+#define TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST _BITUL(0)
+#define TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD _BITUL(1)
+
+enum {
+ TCA_TAPRIO_ATTR_UNSPEC,
+ TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */
+ TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST, /* nested of entry */
+ TCA_TAPRIO_ATTR_SCHED_BASE_TIME, /* s64 */
+ TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY, /* single entry */
+ TCA_TAPRIO_ATTR_SCHED_CLOCKID, /* s32 */
+ TCA_TAPRIO_PAD,
+ TCA_TAPRIO_ATTR_ADMIN_SCHED, /* The admin sched, only used in dump */
+ TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME, /* s64 */
+ TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, /* s64 */
+ TCA_TAPRIO_ATTR_FLAGS, /* u32 */
+ TCA_TAPRIO_ATTR_TXTIME_DELAY, /* u32 */
+ __TCA_TAPRIO_ATTR_MAX,
+};
+
+#define TCA_TAPRIO_ATTR_MAX (__TCA_TAPRIO_ATTR_MAX - 1)
+
+/* ETS */
+
+#define TCQ_ETS_MAX_BANDS 16
+
+enum {
+ TCA_ETS_UNSPEC,
+ TCA_ETS_NBANDS, /* u8 */
+ TCA_ETS_NSTRICT, /* u8 */
+ TCA_ETS_QUANTA, /* nested TCA_ETS_QUANTA_BAND */
+ TCA_ETS_QUANTA_BAND, /* u32 */
+ TCA_ETS_PRIOMAP, /* nested TCA_ETS_PRIOMAP_BAND */
+ TCA_ETS_PRIOMAP_BAND, /* u8 */
+ __TCA_ETS_MAX,
+};
+
+#define TCA_ETS_MAX (__TCA_ETS_MAX - 1)
+
+#endif
diff --git a/src/basic/linux/rtnetlink.h b/src/basic/linux/rtnetlink.h
new file mode 100644
index 0000000..9b814c9
--- /dev/null
+++ b/src/basic/linux/rtnetlink.h
@@ -0,0 +1,787 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_RTNETLINK_H
+#define _UAPI__LINUX_RTNETLINK_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+#include <linux/if_link.h>
+#include <linux/if_addr.h>
+#include <linux/neighbour.h>
+
+/* rtnetlink families. Values up to 127 are reserved for real address
+ * families, values above 128 may be used arbitrarily.
+ */
+#define RTNL_FAMILY_IPMR 128
+#define RTNL_FAMILY_IP6MR 129
+#define RTNL_FAMILY_MAX 129
+
+/****
+ * Routing/neighbour discovery messages.
+ ****/
+
+/* Types of messages */
+
+enum {
+ RTM_BASE = 16,
+#define RTM_BASE RTM_BASE
+
+ RTM_NEWLINK = 16,
+#define RTM_NEWLINK RTM_NEWLINK
+ RTM_DELLINK,
+#define RTM_DELLINK RTM_DELLINK
+ RTM_GETLINK,
+#define RTM_GETLINK RTM_GETLINK
+ RTM_SETLINK,
+#define RTM_SETLINK RTM_SETLINK
+
+ RTM_NEWADDR = 20,
+#define RTM_NEWADDR RTM_NEWADDR
+ RTM_DELADDR,
+#define RTM_DELADDR RTM_DELADDR
+ RTM_GETADDR,
+#define RTM_GETADDR RTM_GETADDR
+
+ RTM_NEWROUTE = 24,
+#define RTM_NEWROUTE RTM_NEWROUTE
+ RTM_DELROUTE,
+#define RTM_DELROUTE RTM_DELROUTE
+ RTM_GETROUTE,
+#define RTM_GETROUTE RTM_GETROUTE
+
+ RTM_NEWNEIGH = 28,
+#define RTM_NEWNEIGH RTM_NEWNEIGH
+ RTM_DELNEIGH,
+#define RTM_DELNEIGH RTM_DELNEIGH
+ RTM_GETNEIGH,
+#define RTM_GETNEIGH RTM_GETNEIGH
+
+ RTM_NEWRULE = 32,
+#define RTM_NEWRULE RTM_NEWRULE
+ RTM_DELRULE,
+#define RTM_DELRULE RTM_DELRULE
+ RTM_GETRULE,
+#define RTM_GETRULE RTM_GETRULE
+
+ RTM_NEWQDISC = 36,
+#define RTM_NEWQDISC RTM_NEWQDISC
+ RTM_DELQDISC,
+#define RTM_DELQDISC RTM_DELQDISC
+ RTM_GETQDISC,
+#define RTM_GETQDISC RTM_GETQDISC
+
+ RTM_NEWTCLASS = 40,
+#define RTM_NEWTCLASS RTM_NEWTCLASS
+ RTM_DELTCLASS,
+#define RTM_DELTCLASS RTM_DELTCLASS
+ RTM_GETTCLASS,
+#define RTM_GETTCLASS RTM_GETTCLASS
+
+ RTM_NEWTFILTER = 44,
+#define RTM_NEWTFILTER RTM_NEWTFILTER
+ RTM_DELTFILTER,
+#define RTM_DELTFILTER RTM_DELTFILTER
+ RTM_GETTFILTER,
+#define RTM_GETTFILTER RTM_GETTFILTER
+
+ RTM_NEWACTION = 48,
+#define RTM_NEWACTION RTM_NEWACTION
+ RTM_DELACTION,
+#define RTM_DELACTION RTM_DELACTION
+ RTM_GETACTION,
+#define RTM_GETACTION RTM_GETACTION
+
+ RTM_NEWPREFIX = 52,
+#define RTM_NEWPREFIX RTM_NEWPREFIX
+
+ RTM_GETMULTICAST = 58,
+#define RTM_GETMULTICAST RTM_GETMULTICAST
+
+ RTM_GETANYCAST = 62,
+#define RTM_GETANYCAST RTM_GETANYCAST
+
+ RTM_NEWNEIGHTBL = 64,
+#define RTM_NEWNEIGHTBL RTM_NEWNEIGHTBL
+ RTM_GETNEIGHTBL = 66,
+#define RTM_GETNEIGHTBL RTM_GETNEIGHTBL
+ RTM_SETNEIGHTBL,
+#define RTM_SETNEIGHTBL RTM_SETNEIGHTBL
+
+ RTM_NEWNDUSEROPT = 68,
+#define RTM_NEWNDUSEROPT RTM_NEWNDUSEROPT
+
+ RTM_NEWADDRLABEL = 72,
+#define RTM_NEWADDRLABEL RTM_NEWADDRLABEL
+ RTM_DELADDRLABEL,
+#define RTM_DELADDRLABEL RTM_DELADDRLABEL
+ RTM_GETADDRLABEL,
+#define RTM_GETADDRLABEL RTM_GETADDRLABEL
+
+ RTM_GETDCB = 78,
+#define RTM_GETDCB RTM_GETDCB
+ RTM_SETDCB,
+#define RTM_SETDCB RTM_SETDCB
+
+ RTM_NEWNETCONF = 80,
+#define RTM_NEWNETCONF RTM_NEWNETCONF
+ RTM_DELNETCONF,
+#define RTM_DELNETCONF RTM_DELNETCONF
+ RTM_GETNETCONF = 82,
+#define RTM_GETNETCONF RTM_GETNETCONF
+
+ RTM_NEWMDB = 84,
+#define RTM_NEWMDB RTM_NEWMDB
+ RTM_DELMDB = 85,
+#define RTM_DELMDB RTM_DELMDB
+ RTM_GETMDB = 86,
+#define RTM_GETMDB RTM_GETMDB
+
+ RTM_NEWNSID = 88,
+#define RTM_NEWNSID RTM_NEWNSID
+ RTM_DELNSID = 89,
+#define RTM_DELNSID RTM_DELNSID
+ RTM_GETNSID = 90,
+#define RTM_GETNSID RTM_GETNSID
+
+ RTM_NEWSTATS = 92,
+#define RTM_NEWSTATS RTM_NEWSTATS
+ RTM_GETSTATS = 94,
+#define RTM_GETSTATS RTM_GETSTATS
+
+ RTM_NEWCACHEREPORT = 96,
+#define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT
+
+ RTM_NEWCHAIN = 100,
+#define RTM_NEWCHAIN RTM_NEWCHAIN
+ RTM_DELCHAIN,
+#define RTM_DELCHAIN RTM_DELCHAIN
+ RTM_GETCHAIN,
+#define RTM_GETCHAIN RTM_GETCHAIN
+
+ RTM_NEWNEXTHOP = 104,
+#define RTM_NEWNEXTHOP RTM_NEWNEXTHOP
+ RTM_DELNEXTHOP,
+#define RTM_DELNEXTHOP RTM_DELNEXTHOP
+ RTM_GETNEXTHOP,
+#define RTM_GETNEXTHOP RTM_GETNEXTHOP
+
+ RTM_NEWLINKPROP = 108,
+#define RTM_NEWLINKPROP RTM_NEWLINKPROP
+ RTM_DELLINKPROP,
+#define RTM_DELLINKPROP RTM_DELLINKPROP
+ RTM_GETLINKPROP,
+#define RTM_GETLINKPROP RTM_GETLINKPROP
+
+ RTM_NEWVLAN = 112,
+#define RTM_NEWNVLAN RTM_NEWVLAN
+ RTM_DELVLAN,
+#define RTM_DELVLAN RTM_DELVLAN
+ RTM_GETVLAN,
+#define RTM_GETVLAN RTM_GETVLAN
+
+ __RTM_MAX,
+#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1)
+};
+
+#define RTM_NR_MSGTYPES (RTM_MAX + 1 - RTM_BASE)
+#define RTM_NR_FAMILIES (RTM_NR_MSGTYPES >> 2)
+#define RTM_FAM(cmd) (((cmd) - RTM_BASE) >> 2)
+
+/*
+ Generic structure for encapsulation of optional route information.
+ It is reminiscent of sockaddr, but with sa_family replaced
+ with attribute type.
+ */
+
+struct rtattr {
+ unsigned short rta_len;
+ unsigned short rta_type;
+};
+
+/* Macros to handle rtattributes */
+
+#define RTA_ALIGNTO 4U
+#define RTA_ALIGN(len) ( ((len)+RTA_ALIGNTO-1) & ~(RTA_ALIGNTO-1) )
+#define RTA_OK(rta,len) ((len) >= (int)sizeof(struct rtattr) && \
+ (rta)->rta_len >= sizeof(struct rtattr) && \
+ (rta)->rta_len <= (len))
+#define RTA_NEXT(rta,attrlen) ((attrlen) -= RTA_ALIGN((rta)->rta_len), \
+ (struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len)))
+#define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len))
+#define RTA_SPACE(len) RTA_ALIGN(RTA_LENGTH(len))
+#define RTA_DATA(rta) ((void*)(((char*)(rta)) + RTA_LENGTH(0)))
+#define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0))
+
+
+
+
+/******************************************************************************
+ * Definitions used in routing table administration.
+ ****/
+
+struct rtmsg {
+ unsigned char rtm_family;
+ unsigned char rtm_dst_len;
+ unsigned char rtm_src_len;
+ unsigned char rtm_tos;
+
+ unsigned char rtm_table; /* Routing table id */
+ unsigned char rtm_protocol; /* Routing protocol; see below */
+ unsigned char rtm_scope; /* See below */
+ unsigned char rtm_type; /* See below */
+
+ unsigned rtm_flags;
+};
+
+/* rtm_type */
+
+enum {
+ RTN_UNSPEC,
+ RTN_UNICAST, /* Gateway or direct route */
+ RTN_LOCAL, /* Accept locally */
+ RTN_BROADCAST, /* Accept locally as broadcast,
+ send as broadcast */
+ RTN_ANYCAST, /* Accept locally as broadcast,
+ but send as unicast */
+ RTN_MULTICAST, /* Multicast route */
+ RTN_BLACKHOLE, /* Drop */
+ RTN_UNREACHABLE, /* Destination is unreachable */
+ RTN_PROHIBIT, /* Administratively prohibited */
+ RTN_THROW, /* Not in this table */
+ RTN_NAT, /* Translate this address */
+ RTN_XRESOLVE, /* Use external resolver */
+ __RTN_MAX
+};
+
+#define RTN_MAX (__RTN_MAX - 1)
+
+
+/* rtm_protocol */
+
+#define RTPROT_UNSPEC 0
+#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirects;
+ not used by current IPv4 */
+#define RTPROT_KERNEL 2 /* Route installed by kernel */
+#define RTPROT_BOOT 3 /* Route installed during boot */
+#define RTPROT_STATIC 4 /* Route installed by administrator */
+
+/* Values of protocol >= RTPROT_STATIC are not interpreted by kernel;
+ they are just passed from user and back as is.
+ It will be used by hypothetical multiple routing daemons.
+ Note that protocol values should be standardized in order to
+ avoid conflicts.
+ */
+
+#define RTPROT_GATED 8 /* Apparently, GateD */
+#define RTPROT_RA 9 /* RDISC/ND router advertisements */
+#define RTPROT_MRT 10 /* Merit MRT */
+#define RTPROT_ZEBRA 11 /* Zebra */
+#define RTPROT_BIRD 12 /* BIRD */
+#define RTPROT_DNROUTED 13 /* DECnet routing daemon */
+#define RTPROT_XORP 14 /* XORP */
+#define RTPROT_NTK 15 /* Netsukuku */
+#define RTPROT_DHCP 16 /* DHCP client */
+#define RTPROT_MROUTED 17 /* Multicast daemon */
+#define RTPROT_KEEPALIVED 18 /* Keepalived daemon */
+#define RTPROT_BABEL 42 /* Babel daemon */
+#define RTPROT_BGP 186 /* BGP Routes */
+#define RTPROT_ISIS 187 /* ISIS Routes */
+#define RTPROT_OSPF 188 /* OSPF Routes */
+#define RTPROT_RIP 189 /* RIP Routes */
+#define RTPROT_EIGRP 192 /* EIGRP Routes */
+
+/* rtm_scope
+
+ Really it is not scope, but sort of distance to the destination.
+ NOWHERE are reserved for not existing destinations, HOST is our
+ local addresses, LINK are destinations, located on directly attached
+ link and UNIVERSE is everywhere in the Universe.
+
+ Intermediate values are also possible f.e. interior routes
+ could be assigned a value between UNIVERSE and LINK.
+*/
+
+enum rt_scope_t {
+ RT_SCOPE_UNIVERSE=0,
+/* User defined values */
+ RT_SCOPE_SITE=200,
+ RT_SCOPE_LINK=253,
+ RT_SCOPE_HOST=254,
+ RT_SCOPE_NOWHERE=255
+};
+
+/* rtm_flags */
+
+#define RTM_F_NOTIFY 0x100 /* Notify user of route change */
+#define RTM_F_CLONED 0x200 /* This route is cloned */
+#define RTM_F_EQUALIZE 0x400 /* Multipath equalizer: NI */
+#define RTM_F_PREFIX 0x800 /* Prefix addresses */
+#define RTM_F_LOOKUP_TABLE 0x1000 /* set rtm_table to FIB lookup result */
+#define RTM_F_FIB_MATCH 0x2000 /* return full fib lookup match */
+#define RTM_F_OFFLOAD 0x4000 /* route is offloaded */
+#define RTM_F_TRAP 0x8000 /* route is trapping packets */
+
+/* Reserved table identifiers */
+
+enum rt_class_t {
+ RT_TABLE_UNSPEC=0,
+/* User defined values */
+ RT_TABLE_COMPAT=252,
+ RT_TABLE_DEFAULT=253,
+ RT_TABLE_MAIN=254,
+ RT_TABLE_LOCAL=255,
+ RT_TABLE_MAX=0xFFFFFFFF
+};
+
+
+/* Routing message attributes */
+
+enum rtattr_type_t {
+ RTA_UNSPEC,
+ RTA_DST,
+ RTA_SRC,
+ RTA_IIF,
+ RTA_OIF,
+ RTA_GATEWAY,
+ RTA_PRIORITY,
+ RTA_PREFSRC,
+ RTA_METRICS,
+ RTA_MULTIPATH,
+ RTA_PROTOINFO, /* no longer used */
+ RTA_FLOW,
+ RTA_CACHEINFO,
+ RTA_SESSION, /* no longer used */
+ RTA_MP_ALGO, /* no longer used */
+ RTA_TABLE,
+ RTA_MARK,
+ RTA_MFC_STATS,
+ RTA_VIA,
+ RTA_NEWDST,
+ RTA_PREF,
+ RTA_ENCAP_TYPE,
+ RTA_ENCAP,
+ RTA_EXPIRES,
+ RTA_PAD,
+ RTA_UID,
+ RTA_TTL_PROPAGATE,
+ RTA_IP_PROTO,
+ RTA_SPORT,
+ RTA_DPORT,
+ RTA_NH_ID,
+ __RTA_MAX
+};
+
+#define RTA_MAX (__RTA_MAX - 1)
+
+#define RTM_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct rtmsg))))
+#define RTM_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct rtmsg))
+
+/* RTM_MULTIPATH --- array of struct rtnexthop.
+ *
+ * "struct rtnexthop" describes all necessary nexthop information,
+ * i.e. parameters of path to a destination via this nexthop.
+ *
+ * At the moment it is impossible to set different prefsrc, mtu, window
+ * and rtt for different paths from multipath.
+ */
+
+struct rtnexthop {
+ unsigned short rtnh_len;
+ unsigned char rtnh_flags;
+ unsigned char rtnh_hops;
+ int rtnh_ifindex;
+};
+
+/* rtnh_flags */
+
+#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
+#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
+#define RTNH_F_ONLINK 4 /* Gateway is forced on link */
+#define RTNH_F_OFFLOAD 8 /* offloaded route */
+#define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */
+#define RTNH_F_UNRESOLVED 32 /* The entry is unresolved (ipmr) */
+
+#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | RTNH_F_OFFLOAD)
+
+/* Macros to handle hexthops */
+
+#define RTNH_ALIGNTO 4
+#define RTNH_ALIGN(len) ( ((len)+RTNH_ALIGNTO-1) & ~(RTNH_ALIGNTO-1) )
+#define RTNH_OK(rtnh,len) ((rtnh)->rtnh_len >= sizeof(struct rtnexthop) && \
+ ((int)(rtnh)->rtnh_len) <= (len))
+#define RTNH_NEXT(rtnh) ((struct rtnexthop*)(((char*)(rtnh)) + RTNH_ALIGN((rtnh)->rtnh_len)))
+#define RTNH_LENGTH(len) (RTNH_ALIGN(sizeof(struct rtnexthop)) + (len))
+#define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len))
+#define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0)))
+
+/* RTA_VIA */
+struct rtvia {
+ __kernel_sa_family_t rtvia_family;
+ __u8 rtvia_addr[0];
+};
+
+/* RTM_CACHEINFO */
+
+struct rta_cacheinfo {
+ __u32 rta_clntref;
+ __u32 rta_lastuse;
+ __s32 rta_expires;
+ __u32 rta_error;
+ __u32 rta_used;
+
+#define RTNETLINK_HAVE_PEERINFO 1
+ __u32 rta_id;
+ __u32 rta_ts;
+ __u32 rta_tsage;
+};
+
+/* RTM_METRICS --- array of struct rtattr with types of RTAX_* */
+
+enum {
+ RTAX_UNSPEC,
+#define RTAX_UNSPEC RTAX_UNSPEC
+ RTAX_LOCK,
+#define RTAX_LOCK RTAX_LOCK
+ RTAX_MTU,
+#define RTAX_MTU RTAX_MTU
+ RTAX_WINDOW,
+#define RTAX_WINDOW RTAX_WINDOW
+ RTAX_RTT,
+#define RTAX_RTT RTAX_RTT
+ RTAX_RTTVAR,
+#define RTAX_RTTVAR RTAX_RTTVAR
+ RTAX_SSTHRESH,
+#define RTAX_SSTHRESH RTAX_SSTHRESH
+ RTAX_CWND,
+#define RTAX_CWND RTAX_CWND
+ RTAX_ADVMSS,
+#define RTAX_ADVMSS RTAX_ADVMSS
+ RTAX_REORDERING,
+#define RTAX_REORDERING RTAX_REORDERING
+ RTAX_HOPLIMIT,
+#define RTAX_HOPLIMIT RTAX_HOPLIMIT
+ RTAX_INITCWND,
+#define RTAX_INITCWND RTAX_INITCWND
+ RTAX_FEATURES,
+#define RTAX_FEATURES RTAX_FEATURES
+ RTAX_RTO_MIN,
+#define RTAX_RTO_MIN RTAX_RTO_MIN
+ RTAX_INITRWND,
+#define RTAX_INITRWND RTAX_INITRWND
+ RTAX_QUICKACK,
+#define RTAX_QUICKACK RTAX_QUICKACK
+ RTAX_CC_ALGO,
+#define RTAX_CC_ALGO RTAX_CC_ALGO
+ RTAX_FASTOPEN_NO_COOKIE,
+#define RTAX_FASTOPEN_NO_COOKIE RTAX_FASTOPEN_NO_COOKIE
+ __RTAX_MAX
+};
+
+#define RTAX_MAX (__RTAX_MAX - 1)
+
+#define RTAX_FEATURE_ECN (1 << 0)
+#define RTAX_FEATURE_SACK (1 << 1)
+#define RTAX_FEATURE_TIMESTAMP (1 << 2)
+#define RTAX_FEATURE_ALLFRAG (1 << 3)
+
+#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | RTAX_FEATURE_SACK | \
+ RTAX_FEATURE_TIMESTAMP | RTAX_FEATURE_ALLFRAG)
+
+struct rta_session {
+ __u8 proto;
+ __u8 pad1;
+ __u16 pad2;
+
+ union {
+ struct {
+ __u16 sport;
+ __u16 dport;
+ } ports;
+
+ struct {
+ __u8 type;
+ __u8 code;
+ __u16 ident;
+ } icmpt;
+
+ __u32 spi;
+ } u;
+};
+
+struct rta_mfc_stats {
+ __u64 mfcs_packets;
+ __u64 mfcs_bytes;
+ __u64 mfcs_wrong_if;
+};
+
+/****
+ * General form of address family dependent message.
+ ****/
+
+struct rtgenmsg {
+ unsigned char rtgen_family;
+};
+
+/*****************************************************************
+ * Link layer specific messages.
+ ****/
+
+/* struct ifinfomsg
+ * passes link level specific information, not dependent
+ * on network protocol.
+ */
+
+struct ifinfomsg {
+ unsigned char ifi_family;
+ unsigned char __ifi_pad;
+ unsigned short ifi_type; /* ARPHRD_* */
+ int ifi_index; /* Link index */
+ unsigned ifi_flags; /* IFF_* flags */
+ unsigned ifi_change; /* IFF_* change mask */
+};
+
+/********************************************************************
+ * prefix information
+ ****/
+
+struct prefixmsg {
+ unsigned char prefix_family;
+ unsigned char prefix_pad1;
+ unsigned short prefix_pad2;
+ int prefix_ifindex;
+ unsigned char prefix_type;
+ unsigned char prefix_len;
+ unsigned char prefix_flags;
+ unsigned char prefix_pad3;
+};
+
+enum
+{
+ PREFIX_UNSPEC,
+ PREFIX_ADDRESS,
+ PREFIX_CACHEINFO,
+ __PREFIX_MAX
+};
+
+#define PREFIX_MAX (__PREFIX_MAX - 1)
+
+struct prefix_cacheinfo {
+ __u32 preferred_time;
+ __u32 valid_time;
+};
+
+
+/*****************************************************************
+ * Traffic control messages.
+ ****/
+
+struct tcmsg {
+ unsigned char tcm_family;
+ unsigned char tcm__pad1;
+ unsigned short tcm__pad2;
+ int tcm_ifindex;
+ __u32 tcm_handle;
+ __u32 tcm_parent;
+/* tcm_block_index is used instead of tcm_parent
+ * in case tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK
+ */
+#define tcm_block_index tcm_parent
+ __u32 tcm_info;
+};
+
+/* For manipulation of filters in shared block, tcm_ifindex is set to
+ * TCM_IFINDEX_MAGIC_BLOCK, and tcm_parent is aliased to tcm_block_index
+ * which is the block index.
+ */
+#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU)
+
+enum {
+ TCA_UNSPEC,
+ TCA_KIND,
+ TCA_OPTIONS,
+ TCA_STATS,
+ TCA_XSTATS,
+ TCA_RATE,
+ TCA_FCNT,
+ TCA_STATS2,
+ TCA_STAB,
+ TCA_PAD,
+ TCA_DUMP_INVISIBLE,
+ TCA_CHAIN,
+ TCA_HW_OFFLOAD,
+ TCA_INGRESS_BLOCK,
+ TCA_EGRESS_BLOCK,
+ TCA_DUMP_FLAGS,
+ __TCA_MAX
+};
+
+#define TCA_MAX (__TCA_MAX - 1)
+
+#define TCA_DUMP_FLAGS_TERSE (1 << 0) /* Means that in dump user gets only basic
+ * data necessary to identify the objects
+ * (handle, cookie, etc.) and stats.
+ */
+
+#define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg))))
+#define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg))
+
+/********************************************************************
+ * Neighbor Discovery userland options
+ ****/
+
+struct nduseroptmsg {
+ unsigned char nduseropt_family;
+ unsigned char nduseropt_pad1;
+ unsigned short nduseropt_opts_len; /* Total length of options */
+ int nduseropt_ifindex;
+ __u8 nduseropt_icmp_type;
+ __u8 nduseropt_icmp_code;
+ unsigned short nduseropt_pad2;
+ unsigned int nduseropt_pad3;
+ /* Followed by one or more ND options */
+};
+
+enum {
+ NDUSEROPT_UNSPEC,
+ NDUSEROPT_SRCADDR,
+ __NDUSEROPT_MAX
+};
+
+#define NDUSEROPT_MAX (__NDUSEROPT_MAX - 1)
+
+#ifndef __KERNEL__
+/* RTnetlink multicast groups - backwards compatibility for userspace */
+#define RTMGRP_LINK 1
+#define RTMGRP_NOTIFY 2
+#define RTMGRP_NEIGH 4
+#define RTMGRP_TC 8
+
+#define RTMGRP_IPV4_IFADDR 0x10
+#define RTMGRP_IPV4_MROUTE 0x20
+#define RTMGRP_IPV4_ROUTE 0x40
+#define RTMGRP_IPV4_RULE 0x80
+
+#define RTMGRP_IPV6_IFADDR 0x100
+#define RTMGRP_IPV6_MROUTE 0x200
+#define RTMGRP_IPV6_ROUTE 0x400
+#define RTMGRP_IPV6_IFINFO 0x800
+
+#define RTMGRP_DECnet_IFADDR 0x1000
+#define RTMGRP_DECnet_ROUTE 0x4000
+
+#define RTMGRP_IPV6_PREFIX 0x20000
+#endif
+
+/* RTnetlink multicast groups */
+enum rtnetlink_groups {
+ RTNLGRP_NONE,
+#define RTNLGRP_NONE RTNLGRP_NONE
+ RTNLGRP_LINK,
+#define RTNLGRP_LINK RTNLGRP_LINK
+ RTNLGRP_NOTIFY,
+#define RTNLGRP_NOTIFY RTNLGRP_NOTIFY
+ RTNLGRP_NEIGH,
+#define RTNLGRP_NEIGH RTNLGRP_NEIGH
+ RTNLGRP_TC,
+#define RTNLGRP_TC RTNLGRP_TC
+ RTNLGRP_IPV4_IFADDR,
+#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR
+ RTNLGRP_IPV4_MROUTE,
+#define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE
+ RTNLGRP_IPV4_ROUTE,
+#define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE
+ RTNLGRP_IPV4_RULE,
+#define RTNLGRP_IPV4_RULE RTNLGRP_IPV4_RULE
+ RTNLGRP_IPV6_IFADDR,
+#define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR
+ RTNLGRP_IPV6_MROUTE,
+#define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE
+ RTNLGRP_IPV6_ROUTE,
+#define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE
+ RTNLGRP_IPV6_IFINFO,
+#define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO
+ RTNLGRP_DECnet_IFADDR,
+#define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR
+ RTNLGRP_NOP2,
+ RTNLGRP_DECnet_ROUTE,
+#define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE
+ RTNLGRP_DECnet_RULE,
+#define RTNLGRP_DECnet_RULE RTNLGRP_DECnet_RULE
+ RTNLGRP_NOP4,
+ RTNLGRP_IPV6_PREFIX,
+#define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX
+ RTNLGRP_IPV6_RULE,
+#define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE
+ RTNLGRP_ND_USEROPT,
+#define RTNLGRP_ND_USEROPT RTNLGRP_ND_USEROPT
+ RTNLGRP_PHONET_IFADDR,
+#define RTNLGRP_PHONET_IFADDR RTNLGRP_PHONET_IFADDR
+ RTNLGRP_PHONET_ROUTE,
+#define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE
+ RTNLGRP_DCB,
+#define RTNLGRP_DCB RTNLGRP_DCB
+ RTNLGRP_IPV4_NETCONF,
+#define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF
+ RTNLGRP_IPV6_NETCONF,
+#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF
+ RTNLGRP_MDB,
+#define RTNLGRP_MDB RTNLGRP_MDB
+ RTNLGRP_MPLS_ROUTE,
+#define RTNLGRP_MPLS_ROUTE RTNLGRP_MPLS_ROUTE
+ RTNLGRP_NSID,
+#define RTNLGRP_NSID RTNLGRP_NSID
+ RTNLGRP_MPLS_NETCONF,
+#define RTNLGRP_MPLS_NETCONF RTNLGRP_MPLS_NETCONF
+ RTNLGRP_IPV4_MROUTE_R,
+#define RTNLGRP_IPV4_MROUTE_R RTNLGRP_IPV4_MROUTE_R
+ RTNLGRP_IPV6_MROUTE_R,
+#define RTNLGRP_IPV6_MROUTE_R RTNLGRP_IPV6_MROUTE_R
+ RTNLGRP_NEXTHOP,
+#define RTNLGRP_NEXTHOP RTNLGRP_NEXTHOP
+ RTNLGRP_BRVLAN,
+#define RTNLGRP_BRVLAN RTNLGRP_BRVLAN
+ __RTNLGRP_MAX
+};
+#define RTNLGRP_MAX (__RTNLGRP_MAX - 1)
+
+/* TC action piece */
+struct tcamsg {
+ unsigned char tca_family;
+ unsigned char tca__pad1;
+ unsigned short tca__pad2;
+};
+
+enum {
+ TCA_ROOT_UNSPEC,
+ TCA_ROOT_TAB,
+#define TCA_ACT_TAB TCA_ROOT_TAB
+#define TCAA_MAX TCA_ROOT_TAB
+ TCA_ROOT_FLAGS,
+ TCA_ROOT_COUNT,
+ TCA_ROOT_TIME_DELTA, /* in msecs */
+ __TCA_ROOT_MAX,
+#define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
+};
+
+#define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
+#define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
+/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
+ *
+ * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
+ * actions in a dump. All dump responses will contain the number of actions
+ * being dumped stored in for user app's consumption in TCA_ROOT_COUNT
+ *
+ */
+#define TCA_FLAG_LARGE_DUMP_ON (1 << 0)
+
+/* New extended info filters for IFLA_EXT_MASK */
+#define RTEXT_FILTER_VF (1 << 0)
+#define RTEXT_FILTER_BRVLAN (1 << 1)
+#define RTEXT_FILTER_BRVLAN_COMPRESSED (1 << 2)
+#define RTEXT_FILTER_SKIP_STATS (1 << 3)
+#define RTEXT_FILTER_MRP (1 << 4)
+
+/* End of information exported to user level */
+
+
+
+#endif /* _UAPI__LINUX_RTNETLINK_H */
diff --git a/src/basic/linux/update.sh b/src/basic/linux/update.sh
new file mode 100755
index 0000000..b0b0cdc
--- /dev/null
+++ b/src/basic/linux/update.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+set -eu
+
+for i in *.h */*.h; do
+ curl https://raw.githubusercontent.com/torvalds/linux/master/include/uapi/linux/$i -o $i
+
+ sed -i -e 's/__user //g' -e '/^#include <linux\/compiler.h>/ d' $i
+done
diff --git a/src/basic/linux/wireguard.h b/src/basic/linux/wireguard.h
new file mode 100644
index 0000000..ae88be1
--- /dev/null
+++ b/src/basic/linux/wireguard.h
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Documentation
+ * =============
+ *
+ * The below enums and macros are for interfacing with WireGuard, using generic
+ * netlink, with family WG_GENL_NAME and version WG_GENL_VERSION. It defines two
+ * methods: get and set. Note that while they share many common attributes,
+ * these two functions actually accept a slightly different set of inputs and
+ * outputs.
+ *
+ * WG_CMD_GET_DEVICE
+ * -----------------
+ *
+ * May only be called via NLM_F_REQUEST | NLM_F_DUMP. The command should contain
+ * one but not both of:
+ *
+ * WGDEVICE_A_IFINDEX: NLA_U32
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1
+ *
+ * The kernel will then return several messages (NLM_F_MULTI) containing the
+ * following tree of nested items:
+ *
+ * WGDEVICE_A_IFINDEX: NLA_U32
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1
+ * WGDEVICE_A_PRIVATE_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
+ * WGDEVICE_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
+ * WGDEVICE_A_LISTEN_PORT: NLA_U16
+ * WGDEVICE_A_FWMARK: NLA_U32
+ * WGDEVICE_A_PEERS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGPEER_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
+ * WGPEER_A_PRESHARED_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
+ * WGPEER_A_ENDPOINT: NLA_MIN_LEN(struct sockaddr), struct sockaddr_in or struct sockaddr_in6
+ * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16
+ * WGPEER_A_LAST_HANDSHAKE_TIME: NLA_EXACT_LEN, struct __kernel_timespec
+ * WGPEER_A_RX_BYTES: NLA_U64
+ * WGPEER_A_TX_BYTES: NLA_U64
+ * WGPEER_A_ALLOWEDIPS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGALLOWEDIP_A_FAMILY: NLA_U16
+ * WGALLOWEDIP_A_IPADDR: NLA_MIN_LEN(struct in_addr), struct in_addr or struct in6_addr
+ * WGALLOWEDIP_A_CIDR_MASK: NLA_U8
+ * 0: NLA_NESTED
+ * ...
+ * 0: NLA_NESTED
+ * ...
+ * ...
+ * WGPEER_A_PROTOCOL_VERSION: NLA_U32
+ * 0: NLA_NESTED
+ * ...
+ * ...
+ *
+ * It is possible that all of the allowed IPs of a single peer will not
+ * fit within a single netlink message. In that case, the same peer will
+ * be written in the following message, except it will only contain
+ * WGPEER_A_PUBLIC_KEY and WGPEER_A_ALLOWEDIPS. This may occur several
+ * times in a row for the same peer. It is then up to the receiver to
+ * coalesce adjacent peers. Likewise, it is possible that all peers will
+ * not fit within a single message. So, subsequent peers will be sent
+ * in following messages, except those will only contain WGDEVICE_A_IFNAME
+ * and WGDEVICE_A_PEERS. It is then up to the receiver to coalesce these
+ * messages to form the complete list of peers.
+ *
+ * Since this is an NLA_F_DUMP command, the final message will always be
+ * NLMSG_DONE, even if an error occurs. However, this NLMSG_DONE message
+ * contains an integer error code. It is either zero or a negative error
+ * code corresponding to the errno.
+ *
+ * WG_CMD_SET_DEVICE
+ * -----------------
+ *
+ * May only be called via NLM_F_REQUEST. The command should contain the
+ * following tree of nested items, containing one but not both of
+ * WGDEVICE_A_IFINDEX and WGDEVICE_A_IFNAME:
+ *
+ * WGDEVICE_A_IFINDEX: NLA_U32
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1
+ * WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current
+ * peers should be removed prior to adding the list below.
+ * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove
+ * WGDEVICE_A_LISTEN_PORT: NLA_U16, 0 to choose randomly
+ * WGDEVICE_A_FWMARK: NLA_U32, 0 to disable
+ * WGDEVICE_A_PEERS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN
+ * WGPEER_A_FLAGS: NLA_U32, 0 and/or WGPEER_F_REMOVE_ME if the
+ * specified peer should not exist at the end of the
+ * operation, rather than added/updated and/or
+ * WGPEER_F_REPLACE_ALLOWEDIPS if all current allowed
+ * IPs of this peer should be removed prior to adding
+ * the list below and/or WGPEER_F_UPDATE_ONLY if the
+ * peer should only be set if it already exists.
+ * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN, all zeros to remove
+ * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6
+ * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16, 0 to disable
+ * WGPEER_A_ALLOWEDIPS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGALLOWEDIP_A_FAMILY: NLA_U16
+ * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr
+ * WGALLOWEDIP_A_CIDR_MASK: NLA_U8
+ * 0: NLA_NESTED
+ * ...
+ * 0: NLA_NESTED
+ * ...
+ * ...
+ * WGPEER_A_PROTOCOL_VERSION: NLA_U32, should not be set or used at
+ * all by most users of this API, as the
+ * most recent protocol will be used when
+ * this is unset. Otherwise, must be set
+ * to 1.
+ * 0: NLA_NESTED
+ * ...
+ * ...
+ *
+ * It is possible that the amount of configuration data exceeds that of
+ * the maximum message length accepted by the kernel. In that case, several
+ * messages should be sent one after another, with each successive one
+ * filling in information not contained in the prior. Note that if
+ * WGDEVICE_F_REPLACE_PEERS is specified in the first message, it probably
+ * should not be specified in fragments that come after, so that the list
+ * of peers is only cleared the first time but appended after. Likewise for
+ * peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the first message
+ * of a peer, it likely should not be specified in subsequent fragments.
+ *
+ * If an error occurs, NLMSG_ERROR will reply containing an errno.
+ */
+
+#ifndef _WG_UAPI_WIREGUARD_H
+#define _WG_UAPI_WIREGUARD_H
+
+#define WG_GENL_NAME "wireguard"
+#define WG_GENL_VERSION 1
+
+#define WG_KEY_LEN 32
+
+enum wg_cmd {
+ WG_CMD_GET_DEVICE,
+ WG_CMD_SET_DEVICE,
+ __WG_CMD_MAX
+};
+#define WG_CMD_MAX (__WG_CMD_MAX - 1)
+
+enum wgdevice_flag {
+ WGDEVICE_F_REPLACE_PEERS = 1U << 0,
+ __WGDEVICE_F_ALL = WGDEVICE_F_REPLACE_PEERS
+};
+enum wgdevice_attribute {
+ WGDEVICE_A_UNSPEC,
+ WGDEVICE_A_IFINDEX,
+ WGDEVICE_A_IFNAME,
+ WGDEVICE_A_PRIVATE_KEY,
+ WGDEVICE_A_PUBLIC_KEY,
+ WGDEVICE_A_FLAGS,
+ WGDEVICE_A_LISTEN_PORT,
+ WGDEVICE_A_FWMARK,
+ WGDEVICE_A_PEERS,
+ __WGDEVICE_A_LAST
+};
+#define WGDEVICE_A_MAX (__WGDEVICE_A_LAST - 1)
+
+enum wgpeer_flag {
+ WGPEER_F_REMOVE_ME = 1U << 0,
+ WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1,
+ WGPEER_F_UPDATE_ONLY = 1U << 2,
+ __WGPEER_F_ALL = WGPEER_F_REMOVE_ME | WGPEER_F_REPLACE_ALLOWEDIPS |
+ WGPEER_F_UPDATE_ONLY
+};
+enum wgpeer_attribute {
+ WGPEER_A_UNSPEC,
+ WGPEER_A_PUBLIC_KEY,
+ WGPEER_A_PRESHARED_KEY,
+ WGPEER_A_FLAGS,
+ WGPEER_A_ENDPOINT,
+ WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL,
+ WGPEER_A_LAST_HANDSHAKE_TIME,
+ WGPEER_A_RX_BYTES,
+ WGPEER_A_TX_BYTES,
+ WGPEER_A_ALLOWEDIPS,
+ WGPEER_A_PROTOCOL_VERSION,
+ __WGPEER_A_LAST
+};
+#define WGPEER_A_MAX (__WGPEER_A_LAST - 1)
+
+enum wgallowedip_attribute {
+ WGALLOWEDIP_A_UNSPEC,
+ WGALLOWEDIP_A_FAMILY,
+ WGALLOWEDIP_A_IPADDR,
+ WGALLOWEDIP_A_CIDR_MASK,
+ __WGALLOWEDIP_A_LAST
+};
+#define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1)
+
+#endif /* _WG_UAPI_WIREGUARD_H */
diff --git a/src/basic/list.h b/src/basic/list.h
new file mode 100644
index 0000000..256b718
--- /dev/null
+++ b/src/basic/list.h
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "macro.h"
+
+/* The head of the linked list. Use this in the structure that shall
+ * contain the head of the linked list */
+#define LIST_HEAD(t,name) \
+ t *name
+
+/* The pointers in the linked list's items. Use this in the item structure */
+#define LIST_FIELDS(t,name) \
+ t *name##_next, *name##_prev
+
+/* Initialize the list's head */
+#define LIST_HEAD_INIT(head) \
+ do { \
+ (head) = NULL; \
+ } while (false)
+
+/* Initialize a list item */
+#define LIST_INIT(name,item) \
+ do { \
+ typeof(*(item)) *_item = (item); \
+ assert(_item); \
+ _item->name##_prev = _item->name##_next = NULL; \
+ } while (false)
+
+/* Prepend an item to the list */
+#define LIST_PREPEND(name,head,item) \
+ do { \
+ typeof(*(head)) **_head = &(head), *_item = (item); \
+ assert(_item); \
+ if ((_item->name##_next = *_head)) \
+ _item->name##_next->name##_prev = _item; \
+ _item->name##_prev = NULL; \
+ *_head = _item; \
+ } while (false)
+
+/* Append an item to the list */
+#define LIST_APPEND(name,head,item) \
+ do { \
+ typeof(*(head)) **_hhead = &(head), *_tail; \
+ LIST_FIND_TAIL(name, *_hhead, _tail); \
+ LIST_INSERT_AFTER(name, *_hhead, _tail, item); \
+ } while (false)
+
+/* Remove an item from the list */
+#define LIST_REMOVE(name,head,item) \
+ do { \
+ typeof(*(head)) **_head = &(head), *_item = (item); \
+ assert(_item); \
+ if (_item->name##_next) \
+ _item->name##_next->name##_prev = _item->name##_prev; \
+ if (_item->name##_prev) \
+ _item->name##_prev->name##_next = _item->name##_next; \
+ else { \
+ assert(*_head == _item); \
+ *_head = _item->name##_next; \
+ } \
+ _item->name##_next = _item->name##_prev = NULL; \
+ } while (false)
+
+/* Find the head of the list */
+#define LIST_FIND_HEAD(name,item,head) \
+ do { \
+ typeof(*(item)) *_item = (item); \
+ if (!_item) \
+ (head) = NULL; \
+ else { \
+ while (_item->name##_prev) \
+ _item = _item->name##_prev; \
+ (head) = _item; \
+ } \
+ } while (false)
+
+/* Find the tail of the list */
+#define LIST_FIND_TAIL(name,item,tail) \
+ do { \
+ typeof(*(item)) *_item = (item); \
+ if (!_item) \
+ (tail) = NULL; \
+ else { \
+ while (_item->name##_next) \
+ _item = _item->name##_next; \
+ (tail) = _item; \
+ } \
+ } while (false)
+
+/* Insert an item after another one (a = where, b = what) */
+#define LIST_INSERT_AFTER(name,head,a,b) \
+ do { \
+ typeof(*(head)) **_head = &(head), *_a = (a), *_b = (b); \
+ assert(_b); \
+ if (!_a) { \
+ if ((_b->name##_next = *_head)) \
+ _b->name##_next->name##_prev = _b; \
+ _b->name##_prev = NULL; \
+ *_head = _b; \
+ } else { \
+ if ((_b->name##_next = _a->name##_next)) \
+ _b->name##_next->name##_prev = _b; \
+ _b->name##_prev = _a; \
+ _a->name##_next = _b; \
+ } \
+ } while (false)
+
+/* Insert an item before another one (a = where, b = what) */
+#define LIST_INSERT_BEFORE(name,head,a,b) \
+ do { \
+ typeof(*(head)) **_head = &(head), *_a = (a), *_b = (b); \
+ assert(_b); \
+ if (!_a) { \
+ if (!*_head) { \
+ _b->name##_next = NULL; \
+ _b->name##_prev = NULL; \
+ *_head = _b; \
+ } else { \
+ typeof(*(head)) *_tail = (head); \
+ while (_tail->name##_next) \
+ _tail = _tail->name##_next; \
+ _b->name##_next = NULL; \
+ _b->name##_prev = _tail; \
+ _tail->name##_next = _b; \
+ } \
+ } else { \
+ if ((_b->name##_prev = _a->name##_prev)) \
+ _b->name##_prev->name##_next = _b; \
+ else \
+ *_head = _b; \
+ _b->name##_next = _a; \
+ _a->name##_prev = _b; \
+ } \
+ } while (false)
+
+#define LIST_JUST_US(name,item) \
+ (!(item)->name##_prev && !(item)->name##_next) \
+
+#define LIST_FOREACH(name,i,head) \
+ for ((i) = (head); (i); (i) = (i)->name##_next)
+
+#define LIST_FOREACH_SAFE(name,i,n,head) \
+ for ((i) = (head); (i) && (((n) = (i)->name##_next), 1); (i) = (n))
+
+#define LIST_FOREACH_BEFORE(name,i,p) \
+ for ((i) = (p)->name##_prev; (i); (i) = (i)->name##_prev)
+
+#define LIST_FOREACH_AFTER(name,i,p) \
+ for ((i) = (p)->name##_next; (i); (i) = (i)->name##_next)
+
+/* Iterate through all the members of the list p is included in, but skip over p */
+#define LIST_FOREACH_OTHERS(name,i,p) \
+ for (({ \
+ (i) = (p); \
+ while ((i) && (i)->name##_prev) \
+ (i) = (i)->name##_prev; \
+ if ((i) == (p)) \
+ (i) = (p)->name##_next; \
+ }); \
+ (i); \
+ (i) = (i)->name##_next == (p) ? (p)->name##_next : (i)->name##_next)
+
+/* Loop starting from p->next until p->prev.
+ p can be adjusted meanwhile. */
+#define LIST_LOOP_BUT_ONE(name,i,head,p) \
+ for ((i) = (p)->name##_next ? (p)->name##_next : (head); \
+ (i) != (p); \
+ (i) = (i)->name##_next ? (i)->name##_next : (head))
+
+#define LIST_IS_EMPTY(head) \
+ (!(head))
+
+/* Join two lists tail to head: a->b, c->d to a->b->c->d and de-initialise second list */
+#define LIST_JOIN(name,a,b) \
+ do { \
+ assert(b); \
+ if (!(a)) \
+ (a) = (b); \
+ else { \
+ typeof(*(a)) *_head = (b), *_tail; \
+ LIST_FIND_TAIL(name, (a), _tail); \
+ _tail->name##_next = _head; \
+ _head->name##_prev = _tail; \
+ } \
+ (b) = NULL; \
+ } while (false)
diff --git a/src/basic/locale-util.c b/src/basic/locale-util.c
new file mode 100644
index 0000000..4c81cb9
--- /dev/null
+++ b/src/basic/locale-util.c
@@ -0,0 +1,457 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <ftw.h>
+#include <langinfo.h>
+#include <libintl.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include "def.h"
+#include "dirent-util.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "hashmap.h"
+#include "locale-util.h"
+#include "path-util.h"
+#include "set.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+
+static char *normalize_locale(const char *name) {
+ const char *e;
+
+ /* Locale names are weird: glibc has some magic rules when looking for the charset name on disk: it
+ * lowercases everything, and removes most special chars. This means the official .UTF-8 suffix
+ * becomes .utf8 when looking things up on disk. When enumerating locales, let's do the reverse
+ * operation, and go back to ".UTF-8" which appears to be the more commonly accepted name. We only do
+ * that for UTF-8 however, since it's kinda the only charset that matters. */
+
+ e = endswith(name, ".utf8");
+ if (e) {
+ _cleanup_free_ char *prefix = NULL;
+
+ prefix = strndup(name, e - name);
+ if (!prefix)
+ return NULL;
+
+ return strjoin(prefix, ".UTF-8");
+ }
+
+ e = strstr(name, ".utf8@");
+ if (e) {
+ _cleanup_free_ char *prefix = NULL;
+
+ prefix = strndup(name, e - name);
+ if (!prefix)
+ return NULL;
+
+ return strjoin(prefix, ".UTF-8@", e + 6);
+ }
+
+ return strdup(name);
+}
+
+static int add_locales_from_archive(Set *locales) {
+ /* Stolen from glibc... */
+
+ struct locarhead {
+ uint32_t magic;
+ /* Serial number. */
+ uint32_t serial;
+ /* Name hash table. */
+ uint32_t namehash_offset;
+ uint32_t namehash_used;
+ uint32_t namehash_size;
+ /* String table. */
+ uint32_t string_offset;
+ uint32_t string_used;
+ uint32_t string_size;
+ /* Table with locale records. */
+ uint32_t locrectab_offset;
+ uint32_t locrectab_used;
+ uint32_t locrectab_size;
+ /* MD5 sum hash table. */
+ uint32_t sumhash_offset;
+ uint32_t sumhash_used;
+ uint32_t sumhash_size;
+ };
+
+ struct namehashent {
+ /* Hash value of the name. */
+ uint32_t hashval;
+ /* Offset of the name in the string table. */
+ uint32_t name_offset;
+ /* Offset of the locale record. */
+ uint32_t locrec_offset;
+ };
+
+ const struct locarhead *h;
+ const struct namehashent *e;
+ const void *p = MAP_FAILED;
+ _cleanup_close_ int fd = -1;
+ size_t sz = 0;
+ struct stat st;
+ size_t i;
+ int r;
+
+ fd = open("/usr/lib/locale/locale-archive", O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return errno == ENOENT ? 0 : -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISREG(st.st_mode))
+ return -EBADMSG;
+
+ if (st.st_size < (off_t) sizeof(struct locarhead))
+ return -EBADMSG;
+
+ p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (p == MAP_FAILED)
+ return -errno;
+
+ h = (const struct locarhead *) p;
+ if (h->magic != 0xde020109 ||
+ h->namehash_offset + h->namehash_size > st.st_size ||
+ h->string_offset + h->string_size > st.st_size ||
+ h->locrectab_offset + h->locrectab_size > st.st_size ||
+ h->sumhash_offset + h->sumhash_size > st.st_size) {
+ r = -EBADMSG;
+ goto finish;
+ }
+
+ e = (const struct namehashent*) ((const uint8_t*) p + h->namehash_offset);
+ for (i = 0; i < h->namehash_size; i++) {
+ char *z;
+
+ if (e[i].locrec_offset == 0)
+ continue;
+
+ if (!utf8_is_valid((char*) p + e[i].name_offset))
+ continue;
+
+ z = normalize_locale((char*) p + e[i].name_offset);
+ if (!z) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ r = set_consume(locales, z);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = 0;
+
+ finish:
+ if (p != MAP_FAILED)
+ munmap((void*) p, sz);
+
+ return r;
+}
+
+static int add_locales_from_libdir (Set *locales) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *entry;
+ int r;
+
+ dir = opendir("/usr/lib/locale");
+ if (!dir)
+ return errno == ENOENT ? 0 : -errno;
+
+ FOREACH_DIRENT(entry, dir, return -errno) {
+ char *z;
+
+ dirent_ensure_type(dir, entry);
+
+ if (entry->d_type != DT_DIR)
+ continue;
+
+ z = normalize_locale(entry->d_name);
+ if (!z)
+ return -ENOMEM;
+
+ r = set_consume(locales, z);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ return 0;
+}
+
+int get_locales(char ***ret) {
+ _cleanup_set_free_ Set *locales = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ int r;
+
+ locales = set_new(&string_hash_ops);
+ if (!locales)
+ return -ENOMEM;
+
+ r = add_locales_from_archive(locales);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ r = add_locales_from_libdir(locales);
+ if (r < 0)
+ return r;
+
+ l = set_get_strv(locales);
+ if (!l)
+ return -ENOMEM;
+
+ r = getenv_bool("SYSTEMD_LIST_NON_UTF8_LOCALES");
+ if (r == -ENXIO || r == 0) {
+ char **a, **b;
+
+ /* Filter out non-UTF-8 locales, because it's 2019, by default */
+ for (a = b = l; *a; a++) {
+
+ if (endswith(*a, "UTF-8") ||
+ strstr(*a, ".UTF-8@"))
+ *(b++) = *a;
+ else
+ free(*a);
+ }
+
+ *b = NULL;
+
+ } else if (r < 0)
+ log_debug_errno(r, "Failed to parse $SYSTEMD_LIST_NON_UTF8_LOCALES as boolean");
+
+ strv_sort(l);
+
+ *ret = TAKE_PTR(l);
+
+ return 0;
+}
+
+bool locale_is_valid(const char *name) {
+
+ if (isempty(name))
+ return false;
+
+ if (strlen(name) >= 128)
+ return false;
+
+ if (!utf8_is_valid(name))
+ return false;
+
+ if (!filename_is_valid(name))
+ return false;
+
+ if (!string_is_safe(name))
+ return false;
+
+ return true;
+}
+
+int locale_is_installed(const char *name) {
+ if (!locale_is_valid(name))
+ return false;
+
+ if (STR_IN_SET(name, "C", "POSIX")) /* These ones are always OK */
+ return true;
+
+ _cleanup_(freelocalep) locale_t loc =
+ newlocale(LC_ALL_MASK, name, 0);
+ if (loc == (locale_t) 0)
+ return errno == ENOMEM ? -ENOMEM : false;
+
+ return true;
+}
+
+void init_gettext(void) {
+ setlocale(LC_ALL, "");
+ textdomain(GETTEXT_PACKAGE);
+}
+
+bool is_locale_utf8(void) {
+ const char *set;
+ static int cached_answer = -1;
+
+ /* Note that we default to 'true' here, since today UTF8 is
+ * pretty much supported everywhere. */
+
+ if (cached_answer >= 0)
+ goto out;
+
+ if (!setlocale(LC_ALL, "")) {
+ cached_answer = true;
+ goto out;
+ }
+
+ set = nl_langinfo(CODESET);
+ if (!set) {
+ cached_answer = true;
+ goto out;
+ }
+
+ if (streq(set, "UTF-8")) {
+ cached_answer = true;
+ goto out;
+ }
+
+ /* For LC_CTYPE=="C" return true, because CTYPE is effectively
+ * unset and everything can do to UTF-8 nowadays. */
+ set = setlocale(LC_CTYPE, NULL);
+ if (!set) {
+ cached_answer = true;
+ goto out;
+ }
+
+ /* Check result, but ignore the result if C was set
+ * explicitly. */
+ cached_answer =
+ STR_IN_SET(set, "C", "POSIX") &&
+ !getenv("LC_ALL") &&
+ !getenv("LC_CTYPE") &&
+ !getenv("LANG");
+
+out:
+ return (bool) cached_answer;
+}
+
+bool emoji_enabled(void) {
+ static int cached_emoji_enabled = -1;
+
+ if (cached_emoji_enabled < 0) {
+ int val;
+
+ val = getenv_bool("SYSTEMD_EMOJI");
+ if (val < 0)
+ cached_emoji_enabled =
+ is_locale_utf8() &&
+ !STRPTR_IN_SET(getenv("TERM"), "dumb", "linux");
+ else
+ cached_emoji_enabled = val;
+ }
+
+ return cached_emoji_enabled;
+}
+
+const char *special_glyph(SpecialGlyph code) {
+
+ /* A list of a number of interesting unicode glyphs we can use to decorate our output. It's probably wise to be
+ * conservative here, and primarily stick to the glyphs defined in the eurlatgr font, so that display still
+ * works reasonably well on the Linux console. For details see:
+ *
+ * http://git.altlinux.org/people/legion/packages/kbd.git?p=kbd.git;a=blob;f=data/consolefonts/README.eurlatgr
+ */
+
+ static const char* const draw_table[2][_SPECIAL_GLYPH_MAX] = {
+ /* ASCII fallback */
+ [false] = {
+ [SPECIAL_GLYPH_TREE_VERTICAL] = "| ",
+ [SPECIAL_GLYPH_TREE_BRANCH] = "|-",
+ [SPECIAL_GLYPH_TREE_RIGHT] = "`-",
+ [SPECIAL_GLYPH_TREE_SPACE] = " ",
+ [SPECIAL_GLYPH_TRIANGULAR_BULLET] = ">",
+ [SPECIAL_GLYPH_BLACK_CIRCLE] = "*",
+ [SPECIAL_GLYPH_BULLET] = "*",
+ [SPECIAL_GLYPH_MU] = "u",
+ [SPECIAL_GLYPH_CHECK_MARK] = "+",
+ [SPECIAL_GLYPH_CROSS_MARK] = "-",
+ [SPECIAL_GLYPH_LIGHT_SHADE] = "-",
+ [SPECIAL_GLYPH_DARK_SHADE] = "X",
+ [SPECIAL_GLYPH_SIGMA] = "S",
+ [SPECIAL_GLYPH_ARROW] = "->",
+ [SPECIAL_GLYPH_ELLIPSIS] = "...",
+ [SPECIAL_GLYPH_EXTERNAL_LINK] = "[LNK]",
+ [SPECIAL_GLYPH_ECSTATIC_SMILEY] = ":-]",
+ [SPECIAL_GLYPH_HAPPY_SMILEY] = ":-}",
+ [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY] = ":-)",
+ [SPECIAL_GLYPH_NEUTRAL_SMILEY] = ":-|",
+ [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = ":-(",
+ [SPECIAL_GLYPH_UNHAPPY_SMILEY] = ":-{",
+ [SPECIAL_GLYPH_DEPRESSED_SMILEY] = ":-[",
+ [SPECIAL_GLYPH_LOCK_AND_KEY] = "o-,",
+ [SPECIAL_GLYPH_TOUCH] = "O=", /* Yeah, not very convincing, can you do it better? */
+ },
+
+ /* UTF-8 */
+ [true] = {
+ /* The following are multiple glyphs in both ASCII and in UNICODE */
+ [SPECIAL_GLYPH_TREE_VERTICAL] = "\342\224\202 ", /* │ */
+ [SPECIAL_GLYPH_TREE_BRANCH] = "\342\224\234\342\224\200", /* ├─ */
+ [SPECIAL_GLYPH_TREE_RIGHT] = "\342\224\224\342\224\200", /* └─ */
+ [SPECIAL_GLYPH_TREE_SPACE] = " ", /* */
+
+ /* Single glyphs in both cases */
+ [SPECIAL_GLYPH_TRIANGULAR_BULLET] = "\342\200\243", /* ‣ */
+ [SPECIAL_GLYPH_BLACK_CIRCLE] = "\342\227\217", /* ● */
+ [SPECIAL_GLYPH_BULLET] = "\342\200\242", /* • */
+ [SPECIAL_GLYPH_MU] = "\316\274", /* μ (actually called: GREEK SMALL LETTER MU) */
+ [SPECIAL_GLYPH_CHECK_MARK] = "\342\234\223", /* ✓ */
+ [SPECIAL_GLYPH_CROSS_MARK] = "\342\234\227", /* ✗ (actually called: BALLOT X) */
+ [SPECIAL_GLYPH_LIGHT_SHADE] = "\342\226\221", /* ░ */
+ [SPECIAL_GLYPH_DARK_SHADE] = "\342\226\223", /* ▒ */
+ [SPECIAL_GLYPH_SIGMA] = "\316\243", /* Σ */
+
+ /* Single glyph in Unicode, two in ASCII */
+ [SPECIAL_GLYPH_ARROW] = "\342\206\222", /* → (actually called: RIGHTWARDS ARROW) */
+
+ /* Single glyph in Unicode, three in ASCII */
+ [SPECIAL_GLYPH_ELLIPSIS] = "\342\200\246", /* … (actually called: HORIZONTAL ELLIPSIS) */
+
+ /* Three glyphs in Unicode, five in ASCII */
+ [SPECIAL_GLYPH_EXTERNAL_LINK] = "[\360\237\241\225]", /* 🡕 (actually called: NORTH EAST SANS-SERIF ARROW, enclosed in []) */
+
+ /* These smileys are a single glyph in Unicode, and three in ASCII */
+ [SPECIAL_GLYPH_ECSTATIC_SMILEY] = "\360\237\230\207", /* 😇 (actually called: SMILING FACE WITH HALO) */
+ [SPECIAL_GLYPH_HAPPY_SMILEY] = "\360\237\230\200", /* 😀 (actually called: GRINNING FACE) */
+ [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY] = "\360\237\231\202", /* 🙂 (actually called: SLIGHTLY SMILING FACE) */
+ [SPECIAL_GLYPH_NEUTRAL_SMILEY] = "\360\237\230\220", /* 😐 (actually called: NEUTRAL FACE) */
+ [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = "\360\237\231\201", /* 🙁 (actually called: SLIGHTLY FROWNING FACE) */
+ [SPECIAL_GLYPH_UNHAPPY_SMILEY] = "\360\237\230\250", /* 😨 (actually called: FEARFUL FACE) */
+ [SPECIAL_GLYPH_DEPRESSED_SMILEY] = "\360\237\244\242", /* 🤢 (actually called: NAUSEATED FACE) */
+
+ /* This emoji is a single character cell glyph in Unicode, and three in ASCII */
+ [SPECIAL_GLYPH_LOCK_AND_KEY] = "\360\237\224\220", /* 🔐 (actually called: CLOSED LOCK WITH KEY) */
+
+ /* This emoji is a single character cell glyph in Unicode, and two in ASCII */
+ [SPECIAL_GLYPH_TOUCH] = "\360\237\221\206", /* 👆 (actually called: BACKHAND INDEX POINTING UP */
+ },
+ };
+
+ assert(code < _SPECIAL_GLYPH_MAX);
+
+ return draw_table[code >= _SPECIAL_GLYPH_FIRST_EMOJI ? emoji_enabled() : is_locale_utf8()][code];
+}
+
+void locale_variables_free(char *l[_VARIABLE_LC_MAX]) {
+ LocaleVariable i;
+
+ if (!l)
+ return;
+
+ for (i = 0; i < _VARIABLE_LC_MAX; i++)
+ l[i] = mfree(l[i]);
+}
+
+static const char * const locale_variable_table[_VARIABLE_LC_MAX] = {
+ [VARIABLE_LANG] = "LANG",
+ [VARIABLE_LANGUAGE] = "LANGUAGE",
+ [VARIABLE_LC_CTYPE] = "LC_CTYPE",
+ [VARIABLE_LC_NUMERIC] = "LC_NUMERIC",
+ [VARIABLE_LC_TIME] = "LC_TIME",
+ [VARIABLE_LC_COLLATE] = "LC_COLLATE",
+ [VARIABLE_LC_MONETARY] = "LC_MONETARY",
+ [VARIABLE_LC_MESSAGES] = "LC_MESSAGES",
+ [VARIABLE_LC_PAPER] = "LC_PAPER",
+ [VARIABLE_LC_NAME] = "LC_NAME",
+ [VARIABLE_LC_ADDRESS] = "LC_ADDRESS",
+ [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE",
+ [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT",
+ [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(locale_variable, LocaleVariable);
diff --git a/src/basic/locale-util.h b/src/basic/locale-util.h
new file mode 100644
index 0000000..2d672e2
--- /dev/null
+++ b/src/basic/locale-util.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <libintl.h>
+#include <stdbool.h>
+#include <locale.h>
+
+#include "macro.h"
+
+typedef enum LocaleVariable {
+ /* We don't list LC_ALL here on purpose. People should be
+ * using LANG instead. */
+
+ VARIABLE_LANG,
+ VARIABLE_LANGUAGE,
+ VARIABLE_LC_CTYPE,
+ VARIABLE_LC_NUMERIC,
+ VARIABLE_LC_TIME,
+ VARIABLE_LC_COLLATE,
+ VARIABLE_LC_MONETARY,
+ VARIABLE_LC_MESSAGES,
+ VARIABLE_LC_PAPER,
+ VARIABLE_LC_NAME,
+ VARIABLE_LC_ADDRESS,
+ VARIABLE_LC_TELEPHONE,
+ VARIABLE_LC_MEASUREMENT,
+ VARIABLE_LC_IDENTIFICATION,
+ _VARIABLE_LC_MAX,
+ _VARIABLE_LC_INVALID = -1
+} LocaleVariable;
+
+int get_locales(char ***l);
+bool locale_is_valid(const char *name);
+int locale_is_installed(const char *name);
+
+#define _(String) gettext(String)
+#define N_(String) String
+void init_gettext(void);
+
+bool is_locale_utf8(void);
+
+typedef enum {
+ SPECIAL_GLYPH_TREE_VERTICAL,
+ SPECIAL_GLYPH_TREE_BRANCH,
+ SPECIAL_GLYPH_TREE_RIGHT,
+ SPECIAL_GLYPH_TREE_SPACE,
+ SPECIAL_GLYPH_TRIANGULAR_BULLET,
+ SPECIAL_GLYPH_BLACK_CIRCLE,
+ SPECIAL_GLYPH_BULLET,
+ SPECIAL_GLYPH_MU,
+ SPECIAL_GLYPH_CHECK_MARK,
+ SPECIAL_GLYPH_CROSS_MARK,
+ SPECIAL_GLYPH_ARROW,
+ SPECIAL_GLYPH_ELLIPSIS,
+ SPECIAL_GLYPH_LIGHT_SHADE,
+ SPECIAL_GLYPH_DARK_SHADE,
+ SPECIAL_GLYPH_SIGMA,
+ SPECIAL_GLYPH_EXTERNAL_LINK,
+ _SPECIAL_GLYPH_FIRST_EMOJI,
+ SPECIAL_GLYPH_ECSTATIC_SMILEY = _SPECIAL_GLYPH_FIRST_EMOJI,
+ SPECIAL_GLYPH_HAPPY_SMILEY,
+ SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY,
+ SPECIAL_GLYPH_NEUTRAL_SMILEY,
+ SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY,
+ SPECIAL_GLYPH_UNHAPPY_SMILEY,
+ SPECIAL_GLYPH_DEPRESSED_SMILEY,
+ SPECIAL_GLYPH_LOCK_AND_KEY,
+ SPECIAL_GLYPH_TOUCH,
+ _SPECIAL_GLYPH_MAX,
+} SpecialGlyph;
+
+const char *special_glyph(SpecialGlyph code) _const_;
+
+bool emoji_enabled(void);
+
+const char* locale_variable_to_string(LocaleVariable i) _const_;
+LocaleVariable locale_variable_from_string(const char *s) _pure_;
+
+static inline void freelocalep(locale_t *p) {
+ if (*p == (locale_t) 0)
+ return;
+
+ freelocale(*p);
+}
+
+void locale_variables_free(char* l[_VARIABLE_LC_MAX]);
+static inline void locale_variables_freep(char*(*l)[_VARIABLE_LC_MAX]) {
+ locale_variables_free(*l);
+}
diff --git a/src/basic/log.c b/src/basic/log.c
new file mode 100644
index 0000000..d4054cf
--- /dev/null
+++ b/src/basic/log.c
@@ -0,0 +1,1491 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <sys/signalfd.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing_syscall.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "ratelimit.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "syslog-util.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "utf8.h"
+
+#define SNDBUF_SIZE (8*1024*1024)
+
+static LogTarget log_target = LOG_TARGET_CONSOLE;
+static int log_max_level[] = {LOG_INFO, LOG_INFO};
+assert_cc(ELEMENTSOF(log_max_level) == _LOG_REALM_MAX);
+static int log_facility = LOG_DAEMON;
+
+static int console_fd = STDERR_FILENO;
+static int syslog_fd = -1;
+static int kmsg_fd = -1;
+static int journal_fd = -1;
+
+static bool syslog_is_stream = false;
+
+static bool show_color = false;
+static bool show_location = false;
+static bool show_time = false;
+static bool show_tid = false;
+
+static bool upgrade_syslog_to_journal = false;
+static bool always_reopen_console = false;
+static bool open_when_needed = false;
+static bool prohibit_ipc = false;
+
+/* Akin to glibc's __abort_msg; which is private and we hence cannot
+ * use here. */
+static char *log_abort_msg = NULL;
+
+/* An assert to use in logging functions that does not call recursively
+ * into our logging functions (since that might lead to a loop). */
+#define assert_raw(expr) \
+ do { \
+ if (_unlikely_(!(expr))) { \
+ fputs(#expr "\n", stderr); \
+ abort(); \
+ } \
+ } while (false)
+
+static void log_close_console(void) {
+ console_fd = safe_close_above_stdio(console_fd);
+}
+
+static int log_open_console(void) {
+
+ if (!always_reopen_console) {
+ console_fd = STDERR_FILENO;
+ return 0;
+ }
+
+ if (console_fd < 3) {
+ int fd;
+
+ fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ console_fd = fd_move_above_stdio(fd);
+ }
+
+ return 0;
+}
+
+static void log_close_kmsg(void) {
+ kmsg_fd = safe_close(kmsg_fd);
+}
+
+static int log_open_kmsg(void) {
+
+ if (kmsg_fd >= 0)
+ return 0;
+
+ kmsg_fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC);
+ if (kmsg_fd < 0)
+ return -errno;
+
+ kmsg_fd = fd_move_above_stdio(kmsg_fd);
+ return 0;
+}
+
+static void log_close_syslog(void) {
+ syslog_fd = safe_close(syslog_fd);
+}
+
+static int create_log_socket(int type) {
+ struct timeval tv;
+ int fd;
+
+ fd = socket(AF_UNIX, type|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ return -errno;
+
+ fd = fd_move_above_stdio(fd);
+ (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
+
+ /* We need a blocking fd here since we'd otherwise lose messages way too early. However, let's not hang forever
+ * in the unlikely case of a deadlock. */
+ if (getpid_cached() == 1)
+ timeval_store(&tv, 10 * USEC_PER_MSEC);
+ else
+ timeval_store(&tv, 10 * USEC_PER_SEC);
+ (void) setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv));
+
+ return fd;
+}
+
+static int log_open_syslog(void) {
+
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/dev/log",
+ };
+
+ int r;
+
+ if (syslog_fd >= 0)
+ return 0;
+
+ syslog_fd = create_log_socket(SOCK_DGRAM);
+ if (syslog_fd < 0) {
+ r = syslog_fd;
+ goto fail;
+ }
+
+ if (connect(syslog_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
+ safe_close(syslog_fd);
+
+ /* Some legacy syslog systems still use stream
+ * sockets. They really shouldn't. But what can we
+ * do... */
+ syslog_fd = create_log_socket(SOCK_STREAM);
+ if (syslog_fd < 0) {
+ r = syslog_fd;
+ goto fail;
+ }
+
+ if (connect(syslog_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ syslog_is_stream = true;
+ } else
+ syslog_is_stream = false;
+
+ return 0;
+
+fail:
+ log_close_syslog();
+ return r;
+}
+
+static void log_close_journal(void) {
+ journal_fd = safe_close(journal_fd);
+}
+
+static int log_open_journal(void) {
+
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/journal/socket",
+ };
+
+ int r;
+
+ if (journal_fd >= 0)
+ return 0;
+
+ journal_fd = create_log_socket(SOCK_DGRAM);
+ if (journal_fd < 0) {
+ r = journal_fd;
+ goto fail;
+ }
+
+ if (connect(journal_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ log_close_journal();
+ return r;
+}
+
+static bool stderr_is_journal(void) {
+ _cleanup_free_ char *w = NULL;
+ const char *e;
+ uint64_t dev, ino;
+ struct stat st;
+
+ e = getenv("JOURNAL_STREAM");
+ if (!e)
+ return false;
+
+ if (extract_first_word(&e, &w, ":", EXTRACT_DONT_COALESCE_SEPARATORS) <= 0)
+ return false;
+ if (!e)
+ return false;
+
+ if (safe_atou64(w, &dev) < 0)
+ return false;
+ if (safe_atou64(e, &ino) < 0)
+ return false;
+
+ if (fstat(STDERR_FILENO, &st) < 0)
+ return false;
+
+ return st.st_dev == dev && st.st_ino == ino;
+}
+
+int log_open(void) {
+ int r;
+
+ /* Do not call from library code. */
+
+ /* If we don't use the console we close it here, to not get
+ * killed by SAK. If we don't use syslog we close it here so
+ * that we are not confused by somebody deleting the socket in
+ * the fs, and to make sure we don't use it if prohibit_ipc is
+ * set. If we don't use /dev/kmsg we still keep it open,
+ * because there is no reason to close it. */
+
+ if (log_target == LOG_TARGET_NULL) {
+ log_close_journal();
+ log_close_syslog();
+ log_close_console();
+ return 0;
+ }
+
+ if (getpid_cached() == 1 ||
+ stderr_is_journal() ||
+ IN_SET(log_target,
+ LOG_TARGET_KMSG,
+ LOG_TARGET_JOURNAL,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_SYSLOG,
+ LOG_TARGET_SYSLOG_OR_KMSG)) {
+
+ if (!prohibit_ipc) {
+ if (IN_SET(log_target,
+ LOG_TARGET_AUTO,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_JOURNAL)) {
+
+ r = log_open_journal();
+ if (r >= 0) {
+ log_close_syslog();
+ log_close_console();
+ return r;
+ }
+ }
+
+ if (IN_SET(log_target,
+ LOG_TARGET_SYSLOG_OR_KMSG,
+ LOG_TARGET_SYSLOG)) {
+
+ r = log_open_syslog();
+ if (r >= 0) {
+ log_close_journal();
+ log_close_console();
+ return r;
+ }
+ }
+ }
+
+ if (IN_SET(log_target, LOG_TARGET_AUTO,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_SYSLOG_OR_KMSG,
+ LOG_TARGET_KMSG)) {
+ r = log_open_kmsg();
+ if (r >= 0) {
+ log_close_journal();
+ log_close_syslog();
+ log_close_console();
+ return r;
+ }
+ }
+ }
+
+ log_close_journal();
+ log_close_syslog();
+
+ return log_open_console();
+}
+
+void log_set_target(LogTarget target) {
+ assert(target >= 0);
+ assert(target < _LOG_TARGET_MAX);
+
+ if (upgrade_syslog_to_journal) {
+ if (target == LOG_TARGET_SYSLOG)
+ target = LOG_TARGET_JOURNAL;
+ else if (target == LOG_TARGET_SYSLOG_OR_KMSG)
+ target = LOG_TARGET_JOURNAL_OR_KMSG;
+ }
+
+ log_target = target;
+}
+
+void log_close(void) {
+ /* Do not call from library code. */
+
+ log_close_journal();
+ log_close_syslog();
+ log_close_kmsg();
+ log_close_console();
+}
+
+void log_forget_fds(void) {
+ /* Do not call from library code. */
+
+ console_fd = kmsg_fd = syslog_fd = journal_fd = -1;
+}
+
+void log_set_max_level_realm(LogRealm realm, int level) {
+ assert((level & LOG_PRIMASK) == level);
+ assert(realm < ELEMENTSOF(log_max_level));
+
+ log_max_level[realm] = level;
+}
+
+void log_set_facility(int facility) {
+ log_facility = facility;
+}
+
+static int write_to_console(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *buffer) {
+
+ char location[256],
+ header_time[FORMAT_TIMESTAMP_MAX],
+ prefix[1 + DECIMAL_STR_MAX(int) + 2],
+ tid_string[3 + DECIMAL_STR_MAX(pid_t) + 1];
+ struct iovec iovec[9];
+ const char *on = NULL, *off = NULL;
+ size_t n = 0;
+
+ if (console_fd < 0)
+ return 0;
+
+ if (log_target == LOG_TARGET_CONSOLE_PREFIXED) {
+ xsprintf(prefix, "<%i>", level);
+ iovec[n++] = IOVEC_MAKE_STRING(prefix);
+ }
+
+ if (show_time) {
+ if (format_timestamp(header_time, sizeof(header_time), now(CLOCK_REALTIME))) {
+ iovec[n++] = IOVEC_MAKE_STRING(header_time);
+ iovec[n++] = IOVEC_MAKE_STRING(" ");
+ }
+ }
+
+ if (show_tid) {
+ xsprintf(tid_string, "(" PID_FMT ") ", gettid());
+ iovec[n++] = IOVEC_MAKE_STRING(tid_string);
+ }
+
+ if (show_color)
+ get_log_colors(LOG_PRI(level), &on, &off, NULL);
+
+ if (show_location) {
+ const char *lon = "", *loff = "";
+ if (show_color) {
+ lon = ANSI_HIGHLIGHT_YELLOW4;
+ loff = ANSI_NORMAL;
+ }
+
+ (void) snprintf(location, sizeof location, "%s%s:%i%s: ", lon, file, line, loff);
+ iovec[n++] = IOVEC_MAKE_STRING(location);
+ }
+
+ if (on)
+ iovec[n++] = IOVEC_MAKE_STRING(on);
+ iovec[n++] = IOVEC_MAKE_STRING(buffer);
+ if (off)
+ iovec[n++] = IOVEC_MAKE_STRING(off);
+ iovec[n++] = IOVEC_MAKE_STRING("\n");
+
+ if (writev(console_fd, iovec, n) < 0) {
+
+ if (errno == EIO && getpid_cached() == 1) {
+
+ /* If somebody tried to kick us from our console tty (via vhangup() or suchlike), try
+ * to reconnect. */
+
+ log_close_console();
+ (void) log_open_console();
+ if (console_fd < 0)
+ return 0;
+
+ if (writev(console_fd, iovec, n) < 0)
+ return -errno;
+ } else
+ return -errno;
+ }
+
+ return 1;
+}
+
+static int write_to_syslog(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *buffer) {
+
+ char header_priority[2 + DECIMAL_STR_MAX(int) + 1],
+ header_time[64],
+ header_pid[4 + DECIMAL_STR_MAX(pid_t) + 1];
+ struct iovec iovec[5] = {};
+ struct msghdr msghdr = {
+ .msg_iov = iovec,
+ .msg_iovlen = ELEMENTSOF(iovec),
+ };
+ time_t t;
+ struct tm tm;
+
+ if (syslog_fd < 0)
+ return 0;
+
+ xsprintf(header_priority, "<%i>", level);
+
+ t = (time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC);
+ if (!localtime_r(&t, &tm))
+ return -EINVAL;
+
+ if (strftime(header_time, sizeof(header_time), "%h %e %T ", &tm) <= 0)
+ return -EINVAL;
+
+ xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached());
+
+ iovec[0] = IOVEC_MAKE_STRING(header_priority);
+ iovec[1] = IOVEC_MAKE_STRING(header_time);
+ iovec[2] = IOVEC_MAKE_STRING(program_invocation_short_name);
+ iovec[3] = IOVEC_MAKE_STRING(header_pid);
+ iovec[4] = IOVEC_MAKE_STRING(buffer);
+
+ /* When using syslog via SOCK_STREAM separate the messages by NUL chars */
+ if (syslog_is_stream)
+ iovec[4].iov_len++;
+
+ for (;;) {
+ ssize_t n;
+
+ n = sendmsg(syslog_fd, &msghdr, MSG_NOSIGNAL);
+ if (n < 0)
+ return -errno;
+
+ if (!syslog_is_stream ||
+ (size_t) n >= IOVEC_TOTAL_SIZE(iovec, ELEMENTSOF(iovec)))
+ break;
+
+ IOVEC_INCREMENT(iovec, ELEMENTSOF(iovec), n);
+ }
+
+ return 1;
+}
+
+static int write_to_kmsg(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *buffer) {
+
+ /* Set a ratelimit on the amount of messages logged to /dev/kmsg. This is mostly supposed to be a
+ * safety catch for the case where start indiscriminately logging in a loop. It will not catch cases
+ * where we log excessively, but not in a tight loop.
+ *
+ * Note that this ratelimit is per-emitter, so we might still overwhelm /dev/kmsg with multiple
+ * loggers.
+ */
+ static thread_local RateLimit ratelimit = { 5 * USEC_PER_SEC, 200 };
+
+ char header_priority[2 + DECIMAL_STR_MAX(int) + 1],
+ header_pid[4 + DECIMAL_STR_MAX(pid_t) + 1];
+ struct iovec iovec[5] = {};
+
+ if (kmsg_fd < 0)
+ return 0;
+
+ if (!ratelimit_below(&ratelimit))
+ return 0;
+
+ xsprintf(header_priority, "<%i>", level);
+ xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached());
+
+ iovec[0] = IOVEC_MAKE_STRING(header_priority);
+ iovec[1] = IOVEC_MAKE_STRING(program_invocation_short_name);
+ iovec[2] = IOVEC_MAKE_STRING(header_pid);
+ iovec[3] = IOVEC_MAKE_STRING(buffer);
+ iovec[4] = IOVEC_MAKE_STRING("\n");
+
+ if (writev(kmsg_fd, iovec, ELEMENTSOF(iovec)) < 0)
+ return -errno;
+
+ return 1;
+}
+
+static int log_do_header(
+ char *header,
+ size_t size,
+ int level,
+ int error,
+ const char *file, int line, const char *func,
+ const char *object_field, const char *object,
+ const char *extra_field, const char *extra) {
+ int r;
+
+ error = IS_SYNTHETIC_ERRNO(error) ? 0 : ERRNO_VALUE(error);
+
+ r = snprintf(header, size,
+ "PRIORITY=%i\n"
+ "SYSLOG_FACILITY=%i\n"
+ "TID=" PID_FMT "\n"
+ "%s%.256s%s" /* CODE_FILE */
+ "%s%.*i%s" /* CODE_LINE */
+ "%s%.256s%s" /* CODE_FUNC */
+ "%s%.*i%s" /* ERRNO */
+ "%s%.256s%s" /* object */
+ "%s%.256s%s" /* extra */
+ "SYSLOG_IDENTIFIER=%.256s\n",
+ LOG_PRI(level),
+ LOG_FAC(level),
+ gettid(),
+ isempty(file) ? "" : "CODE_FILE=",
+ isempty(file) ? "" : file,
+ isempty(file) ? "" : "\n",
+ line ? "CODE_LINE=" : "",
+ line ? 1 : 0, line, /* %.0d means no output too, special case for 0 */
+ line ? "\n" : "",
+ isempty(func) ? "" : "CODE_FUNC=",
+ isempty(func) ? "" : func,
+ isempty(func) ? "" : "\n",
+ error ? "ERRNO=" : "",
+ error ? 1 : 0, error,
+ error ? "\n" : "",
+ isempty(object) ? "" : object_field,
+ isempty(object) ? "" : object,
+ isempty(object) ? "" : "\n",
+ isempty(extra) ? "" : extra_field,
+ isempty(extra) ? "" : extra,
+ isempty(extra) ? "" : "\n",
+ program_invocation_short_name);
+ assert_raw((size_t) r < size);
+
+ return 0;
+}
+
+static int write_to_journal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *object_field,
+ const char *object,
+ const char *extra_field,
+ const char *extra,
+ const char *buffer) {
+
+ char header[LINE_MAX];
+ struct iovec iovec[4] = {};
+ struct msghdr mh = {};
+
+ if (journal_fd < 0)
+ return 0;
+
+ log_do_header(header, sizeof(header), level, error, file, line, func, object_field, object, extra_field, extra);
+
+ iovec[0] = IOVEC_MAKE_STRING(header);
+ iovec[1] = IOVEC_MAKE_STRING("MESSAGE=");
+ iovec[2] = IOVEC_MAKE_STRING(buffer);
+ iovec[3] = IOVEC_MAKE_STRING("\n");
+
+ mh.msg_iov = iovec;
+ mh.msg_iovlen = ELEMENTSOF(iovec);
+
+ if (sendmsg(journal_fd, &mh, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ return 1;
+}
+
+int log_dispatch_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *object_field,
+ const char *object,
+ const char *extra_field,
+ const char *extra,
+ char *buffer) {
+
+ assert_raw(buffer);
+
+ if (log_target == LOG_TARGET_NULL)
+ return -ERRNO_VALUE(error);
+
+ /* Patch in LOG_DAEMON facility if necessary */
+ if ((level & LOG_FACMASK) == 0)
+ level |= log_facility;
+
+ if (open_when_needed)
+ (void) log_open();
+
+ do {
+ char *e;
+ int k = 0;
+
+ buffer += strspn(buffer, NEWLINE);
+
+ if (buffer[0] == 0)
+ break;
+
+ if ((e = strpbrk(buffer, NEWLINE)))
+ *(e++) = 0;
+
+ if (IN_SET(log_target, LOG_TARGET_AUTO,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_JOURNAL)) {
+
+ k = write_to_journal(level, error, file, line, func, object_field, object, extra_field, extra, buffer);
+ if (k < 0 && k != -EAGAIN)
+ log_close_journal();
+ }
+
+ if (IN_SET(log_target, LOG_TARGET_SYSLOG_OR_KMSG,
+ LOG_TARGET_SYSLOG)) {
+
+ k = write_to_syslog(level, error, file, line, func, buffer);
+ if (k < 0 && k != -EAGAIN)
+ log_close_syslog();
+ }
+
+ if (k <= 0 &&
+ IN_SET(log_target, LOG_TARGET_AUTO,
+ LOG_TARGET_SYSLOG_OR_KMSG,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_KMSG)) {
+
+ if (k < 0)
+ log_open_kmsg();
+
+ k = write_to_kmsg(level, error, file, line, func, buffer);
+ if (k < 0) {
+ log_close_kmsg();
+ (void) log_open_console();
+ }
+ }
+
+ if (k <= 0)
+ (void) write_to_console(level, error, file, line, func, buffer);
+
+ buffer = e;
+ } while (buffer);
+
+ if (open_when_needed)
+ log_close();
+
+ return -ERRNO_VALUE(error);
+}
+
+int log_dump_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ char *buffer) {
+
+ LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
+ PROTECT_ERRNO;
+
+ /* This modifies the buffer... */
+
+ if (_likely_(LOG_PRI(level) > log_max_level[realm]))
+ return -ERRNO_VALUE(error);
+
+ return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buffer);
+}
+
+int log_internalv_realm(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format,
+ va_list ap) {
+
+ LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
+ char buffer[LINE_MAX];
+ PROTECT_ERRNO;
+
+ if (_likely_(LOG_PRI(level) > log_max_level[realm]))
+ return -ERRNO_VALUE(error);
+
+ /* Make sure that %m maps to the specified error (or "Success"). */
+ errno = ERRNO_VALUE(error);
+
+ (void) vsnprintf(buffer, sizeof buffer, format, ap);
+
+ return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buffer);
+}
+
+int log_internal_realm(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, format);
+ r = log_internalv_realm(level, error, file, line, func, format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int log_object_internalv(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *object_field,
+ const char *object,
+ const char *extra_field,
+ const char *extra,
+ const char *format,
+ va_list ap) {
+
+ PROTECT_ERRNO;
+ char *buffer, *b;
+
+ if (_likely_(LOG_PRI(level) > log_max_level[LOG_REALM_SYSTEMD]))
+ return -ERRNO_VALUE(error);
+
+ /* Make sure that %m maps to the specified error (or "Success"). */
+ errno = ERRNO_VALUE(error);
+
+ /* Prepend the object name before the message */
+ if (object) {
+ size_t n;
+
+ n = strlen(object);
+ buffer = newa(char, n + 2 + LINE_MAX);
+ b = stpcpy(stpcpy(buffer, object), ": ");
+ } else
+ b = buffer = newa(char, LINE_MAX);
+
+ (void) vsnprintf(b, LINE_MAX, format, ap);
+
+ return log_dispatch_internal(level, error, file, line, func,
+ object_field, object, extra_field, extra, buffer);
+}
+
+int log_object_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *object_field,
+ const char *object,
+ const char *extra_field,
+ const char *extra,
+ const char *format, ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, format);
+ r = log_object_internalv(level, error, file, line, func, object_field, object, extra_field, extra, format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+static void log_assert(
+ int level,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format) {
+
+ static char buffer[LINE_MAX];
+ LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
+
+ if (_likely_(LOG_PRI(level) > log_max_level[realm]))
+ return;
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ (void) snprintf(buffer, sizeof buffer, format, text, file, line, func);
+ REENABLE_WARNING;
+
+ log_abort_msg = buffer;
+
+ log_dispatch_internal(level, 0, file, line, func, NULL, NULL, NULL, NULL, buffer);
+}
+
+_noreturn_ void log_assert_failed_realm(
+ LogRealm realm,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func) {
+ log_assert(LOG_REALM_PLUS_LEVEL(realm, LOG_CRIT), text, file, line, func,
+ "Assertion '%s' failed at %s:%u, function %s(). Aborting.");
+ abort();
+}
+
+_noreturn_ void log_assert_failed_unreachable_realm(
+ LogRealm realm,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func) {
+ log_assert(LOG_REALM_PLUS_LEVEL(realm, LOG_CRIT), text, file, line, func,
+ "Code should not be reached '%s' at %s:%u, function %s(). Aborting.");
+ abort();
+}
+
+void log_assert_failed_return_realm(
+ LogRealm realm,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func) {
+ PROTECT_ERRNO;
+ log_assert(LOG_REALM_PLUS_LEVEL(realm, LOG_DEBUG), text, file, line, func,
+ "Assertion '%s' failed at %s:%u, function %s(). Ignoring.");
+}
+
+int log_oom_internal(LogRealm realm, const char *file, int line, const char *func) {
+ return log_internal_realm(LOG_REALM_PLUS_LEVEL(realm, LOG_ERR),
+ ENOMEM, file, line, func, "Out of memory.");
+}
+
+int log_format_iovec(
+ struct iovec *iovec,
+ size_t iovec_len,
+ size_t *n,
+ bool newline_separator,
+ int error,
+ const char *format,
+ va_list ap) {
+
+ static const char nl = '\n';
+
+ while (format && *n + 1 < iovec_len) {
+ va_list aq;
+ char *m;
+ int r;
+
+ /* We need to copy the va_list structure,
+ * since vasprintf() leaves it afterwards at
+ * an undefined location */
+
+ errno = ERRNO_VALUE(error);
+
+ va_copy(aq, ap);
+ r = vasprintf(&m, format, aq);
+ va_end(aq);
+ if (r < 0)
+ return -EINVAL;
+
+ /* Now, jump enough ahead, so that we point to
+ * the next format string */
+ VA_FORMAT_ADVANCE(format, ap);
+
+ iovec[(*n)++] = IOVEC_MAKE_STRING(m);
+
+ if (newline_separator) {
+ iovec[*n] = IOVEC_MAKE((char *)&nl, 1);
+ (*n)++;
+ }
+
+ format = va_arg(ap, char *);
+ }
+ return 0;
+}
+
+int log_struct_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) {
+
+ LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
+ char buf[LINE_MAX];
+ bool found = false;
+ PROTECT_ERRNO;
+ va_list ap;
+
+ if (_likely_(LOG_PRI(level) > log_max_level[realm]) ||
+ log_target == LOG_TARGET_NULL)
+ return -ERRNO_VALUE(error);
+
+ if ((level & LOG_FACMASK) == 0)
+ level |= log_facility;
+
+ if (IN_SET(log_target,
+ LOG_TARGET_AUTO,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_JOURNAL)) {
+
+ if (open_when_needed)
+ log_open_journal();
+
+ if (journal_fd >= 0) {
+ char header[LINE_MAX];
+ struct iovec iovec[17] = {};
+ size_t n = 0, i;
+ int r;
+ struct msghdr mh = {
+ .msg_iov = iovec,
+ };
+ bool fallback = false;
+
+ /* If the journal is available do structured logging.
+ * Do not report the errno if it is synthetic. */
+ log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL);
+ iovec[n++] = IOVEC_MAKE_STRING(header);
+
+ va_start(ap, format);
+ r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, true, error, format, ap);
+ if (r < 0)
+ fallback = true;
+ else {
+ mh.msg_iovlen = n;
+ (void) sendmsg(journal_fd, &mh, MSG_NOSIGNAL);
+ }
+
+ va_end(ap);
+ for (i = 1; i < n; i += 2)
+ free(iovec[i].iov_base);
+
+ if (!fallback) {
+ if (open_when_needed)
+ log_close();
+
+ return -ERRNO_VALUE(error);
+ }
+ }
+ }
+
+ /* Fallback if journal logging is not available or didn't work. */
+
+ va_start(ap, format);
+ while (format) {
+ va_list aq;
+
+ errno = ERRNO_VALUE(error);
+
+ va_copy(aq, ap);
+ (void) vsnprintf(buf, sizeof buf, format, aq);
+ va_end(aq);
+
+ if (startswith(buf, "MESSAGE=")) {
+ found = true;
+ break;
+ }
+
+ VA_FORMAT_ADVANCE(format, ap);
+
+ format = va_arg(ap, char *);
+ }
+ va_end(ap);
+
+ if (!found) {
+ if (open_when_needed)
+ log_close();
+
+ return -ERRNO_VALUE(error);
+ }
+
+ return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buf + 8);
+}
+
+int log_struct_iovec_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const struct iovec input_iovec[],
+ size_t n_input_iovec) {
+
+ LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
+ PROTECT_ERRNO;
+ size_t i;
+ char *m;
+
+ if (_likely_(LOG_PRI(level) > log_max_level[realm]) ||
+ log_target == LOG_TARGET_NULL)
+ return -ERRNO_VALUE(error);
+
+ if ((level & LOG_FACMASK) == 0)
+ level |= log_facility;
+
+ if (IN_SET(log_target, LOG_TARGET_AUTO,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_JOURNAL) &&
+ journal_fd >= 0) {
+
+ struct iovec iovec[1 + n_input_iovec*2];
+ char header[LINE_MAX];
+ struct msghdr mh = {
+ .msg_iov = iovec,
+ .msg_iovlen = 1 + n_input_iovec*2,
+ };
+
+ log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL);
+ iovec[0] = IOVEC_MAKE_STRING(header);
+
+ for (i = 0; i < n_input_iovec; i++) {
+ iovec[1+i*2] = input_iovec[i];
+ iovec[1+i*2+1] = IOVEC_MAKE_STRING("\n");
+ }
+
+ if (sendmsg(journal_fd, &mh, MSG_NOSIGNAL) >= 0)
+ return -ERRNO_VALUE(error);
+ }
+
+ for (i = 0; i < n_input_iovec; i++)
+ if (memory_startswith(input_iovec[i].iov_base, input_iovec[i].iov_len, "MESSAGE="))
+ break;
+
+ if (_unlikely_(i >= n_input_iovec)) /* Couldn't find MESSAGE=? */
+ return -ERRNO_VALUE(error);
+
+ m = strndupa(input_iovec[i].iov_base + STRLEN("MESSAGE="),
+ input_iovec[i].iov_len - STRLEN("MESSAGE="));
+
+ return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, m);
+}
+
+int log_set_target_from_string(const char *e) {
+ LogTarget t;
+
+ t = log_target_from_string(e);
+ if (t < 0)
+ return -EINVAL;
+
+ log_set_target(t);
+ return 0;
+}
+
+int log_set_max_level_from_string_realm(LogRealm realm, const char *e) {
+ int t;
+
+ t = log_level_from_string(e);
+ if (t < 0)
+ return -EINVAL;
+
+ log_set_max_level_realm(realm, t);
+ return 0;
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+
+ /*
+ * The systemd.log_xyz= settings are parsed by all tools, and
+ * so is "debug".
+ *
+ * However, "quiet" is only parsed by PID 1, and only turns of
+ * status output to /dev/console, but does not alter the log
+ * level.
+ */
+
+ if (streq(key, "debug") && !value)
+ log_set_max_level(LOG_DEBUG);
+
+ else if (proc_cmdline_key_streq(key, "systemd.log_target")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (log_set_target_from_string(value) < 0)
+ log_warning("Failed to parse log target '%s'. Ignoring.", value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.log_level")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (log_set_max_level_from_string(value) < 0)
+ log_warning("Failed to parse log level '%s'. Ignoring.", value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.log_color")) {
+
+ if (log_show_color_from_string(value ?: "1") < 0)
+ log_warning("Failed to parse log color setting '%s'. Ignoring.", value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.log_location")) {
+
+ if (log_show_location_from_string(value ?: "1") < 0)
+ log_warning("Failed to parse log location setting '%s'. Ignoring.", value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.log_tid")) {
+
+ if (log_show_tid_from_string(value ?: "1") < 0)
+ log_warning("Failed to parse log tid setting '%s'. Ignoring.", value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.log_time")) {
+
+ if (log_show_time_from_string(value ?: "1") < 0)
+ log_warning("Failed to parse log time setting '%s'. Ignoring.", value);
+
+ }
+
+ return 0;
+}
+
+void log_parse_environment_realm(LogRealm realm) {
+ if (getpid_cached() == 1 || get_ctty_devnr(0, NULL) < 0)
+ /* Only try to read the command line in daemons. We assume that anything that has a
+ * controlling tty is user stuff. For PID1 we do a special check in case it hasn't
+ * closed the console yet. */
+ (void) proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_STRIP_RD_PREFIX);
+
+ log_parse_environment_cli_realm(realm);
+}
+
+void log_parse_environment_cli_realm(LogRealm realm) {
+ /* Do not call from library code. */
+
+ const char *e;
+
+ e = getenv("SYSTEMD_LOG_TARGET");
+ if (e && log_set_target_from_string(e) < 0)
+ log_warning("Failed to parse log target '%s'. Ignoring.", e);
+
+ e = getenv("SYSTEMD_LOG_LEVEL");
+ if (e && log_set_max_level_from_string_realm(realm, e) < 0)
+ log_warning("Failed to parse log level '%s'. Ignoring.", e);
+
+ e = getenv("SYSTEMD_LOG_COLOR");
+ if (e && log_show_color_from_string(e) < 0)
+ log_warning("Failed to parse log color '%s'. Ignoring.", e);
+
+ e = getenv("SYSTEMD_LOG_LOCATION");
+ if (e && log_show_location_from_string(e) < 0)
+ log_warning("Failed to parse log location '%s'. Ignoring.", e);
+
+ e = getenv("SYSTEMD_LOG_TIME");
+ if (e && log_show_time_from_string(e) < 0)
+ log_warning("Failed to parse log time '%s'. Ignoring.", e);
+
+ e = getenv("SYSTEMD_LOG_TID");
+ if (e && log_show_tid_from_string(e) < 0)
+ log_warning("Failed to parse log tid '%s'. Ignoring.", e);
+}
+
+LogTarget log_get_target(void) {
+ return log_target;
+}
+
+int log_get_max_level_realm(LogRealm realm) {
+ return log_max_level[realm];
+}
+
+void log_show_color(bool b) {
+ show_color = b;
+}
+
+bool log_get_show_color(void) {
+ return show_color;
+}
+
+void log_show_location(bool b) {
+ show_location = b;
+}
+
+bool log_get_show_location(void) {
+ return show_location;
+}
+
+void log_show_time(bool b) {
+ show_time = b;
+}
+
+bool log_get_show_time(void) {
+ return show_time;
+}
+
+void log_show_tid(bool b) {
+ show_tid = b;
+}
+
+bool log_get_show_tid(void) {
+ return show_tid;
+}
+
+int log_show_color_from_string(const char *e) {
+ int t;
+
+ t = parse_boolean(e);
+ if (t < 0)
+ return t;
+
+ log_show_color(t);
+ return 0;
+}
+
+int log_show_location_from_string(const char *e) {
+ int t;
+
+ t = parse_boolean(e);
+ if (t < 0)
+ return t;
+
+ log_show_location(t);
+ return 0;
+}
+
+int log_show_time_from_string(const char *e) {
+ int t;
+
+ t = parse_boolean(e);
+ if (t < 0)
+ return t;
+
+ log_show_time(t);
+ return 0;
+}
+
+int log_show_tid_from_string(const char *e) {
+ int t;
+
+ t = parse_boolean(e);
+ if (t < 0)
+ return t;
+
+ log_show_tid(t);
+ return 0;
+}
+
+bool log_on_console(void) {
+ if (IN_SET(log_target, LOG_TARGET_CONSOLE,
+ LOG_TARGET_CONSOLE_PREFIXED))
+ return true;
+
+ return syslog_fd < 0 && kmsg_fd < 0 && journal_fd < 0;
+}
+
+static const char *const log_target_table[_LOG_TARGET_MAX] = {
+ [LOG_TARGET_CONSOLE] = "console",
+ [LOG_TARGET_CONSOLE_PREFIXED] = "console-prefixed",
+ [LOG_TARGET_KMSG] = "kmsg",
+ [LOG_TARGET_JOURNAL] = "journal",
+ [LOG_TARGET_JOURNAL_OR_KMSG] = "journal-or-kmsg",
+ [LOG_TARGET_SYSLOG] = "syslog",
+ [LOG_TARGET_SYSLOG_OR_KMSG] = "syslog-or-kmsg",
+ [LOG_TARGET_AUTO] = "auto",
+ [LOG_TARGET_NULL] = "null",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(log_target, LogTarget);
+
+void log_received_signal(int level, const struct signalfd_siginfo *si) {
+ assert(si);
+
+ if (pid_is_valid(si->ssi_pid)) {
+ _cleanup_free_ char *p = NULL;
+
+ (void) get_process_comm(si->ssi_pid, &p);
+
+ log_full(level,
+ "Received SIG%s from PID %"PRIu32" (%s).",
+ signal_to_string(si->ssi_signo),
+ si->ssi_pid, strna(p));
+ } else
+ log_full(level,
+ "Received SIG%s.",
+ signal_to_string(si->ssi_signo));
+}
+
+int log_syntax_internal(
+ const char *unit,
+ int level,
+ const char *config_file,
+ unsigned config_line,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) {
+
+ PROTECT_ERRNO;
+ char buffer[LINE_MAX];
+ va_list ap;
+ const char *unit_fmt = NULL;
+
+ if (_likely_(LOG_PRI(level) > log_max_level[LOG_REALM_SYSTEMD]) ||
+ log_target == LOG_TARGET_NULL)
+ return -ERRNO_VALUE(error);
+
+ errno = ERRNO_VALUE(error);
+
+ va_start(ap, format);
+ (void) vsnprintf(buffer, sizeof buffer, format, ap);
+ va_end(ap);
+
+ if (unit)
+ unit_fmt = getpid_cached() == 1 ? "UNIT=%s" : "USER_UNIT=%s";
+
+ if (config_file) {
+ if (config_line > 0)
+ return log_struct_internal(
+ LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, level),
+ error,
+ file, line, func,
+ "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR,
+ "CONFIG_FILE=%s", config_file,
+ "CONFIG_LINE=%u", config_line,
+ LOG_MESSAGE("%s:%u: %s", config_file, config_line, buffer),
+ unit_fmt, unit,
+ NULL);
+ else
+ return log_struct_internal(
+ LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, level),
+ error,
+ file, line, func,
+ "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR,
+ "CONFIG_FILE=%s", config_file,
+ LOG_MESSAGE("%s: %s", config_file, buffer),
+ unit_fmt, unit,
+ NULL);
+ } else if (unit)
+ return log_struct_internal(
+ LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, level),
+ error,
+ file, line, func,
+ "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR,
+ LOG_MESSAGE("%s: %s", unit, buffer),
+ unit_fmt, unit,
+ NULL);
+ else
+ return log_struct_internal(
+ LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, level),
+ error,
+ file, line, func,
+ "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR,
+ LOG_MESSAGE("%s", buffer),
+ NULL);
+}
+
+int log_syntax_invalid_utf8_internal(
+ const char *unit,
+ int level,
+ const char *config_file,
+ unsigned config_line,
+ const char *file,
+ int line,
+ const char *func,
+ const char *rvalue) {
+
+ _cleanup_free_ char *p = NULL;
+
+ if (rvalue)
+ p = utf8_escape_invalid(rvalue);
+
+ log_syntax_internal(unit, level, config_file, config_line, 0, file, line, func,
+ "String is not UTF-8 clean, ignoring assignment: %s", strna(p));
+
+ return -EINVAL;
+}
+
+void log_set_upgrade_syslog_to_journal(bool b) {
+ upgrade_syslog_to_journal = b;
+
+ /* Make the change effective immediately */
+ if (b) {
+ if (log_target == LOG_TARGET_SYSLOG)
+ log_target = LOG_TARGET_JOURNAL;
+ else if (log_target == LOG_TARGET_SYSLOG_OR_KMSG)
+ log_target = LOG_TARGET_JOURNAL_OR_KMSG;
+ }
+}
+
+void log_set_always_reopen_console(bool b) {
+ always_reopen_console = b;
+}
+
+void log_set_open_when_needed(bool b) {
+ open_when_needed = b;
+}
+
+void log_set_prohibit_ipc(bool b) {
+ prohibit_ipc = b;
+}
+
+int log_emergency_level(void) {
+ /* Returns the log level to use for log_emergency() logging. We use LOG_EMERG only when we are PID 1, as only
+ * then the system of the whole system is obviously affected. */
+
+ return getpid_cached() == 1 ? LOG_EMERG : LOG_ERR;
+}
+
+int log_dup_console(void) {
+ int copy;
+
+ /* Duplicate the fd we use for fd logging if it's < 3 and use the copy from now on. This call is useful
+ * whenever we want to continue logging through the original fd, but want to rearrange stderr. */
+
+ if (console_fd >= 3)
+ return 0;
+
+ copy = fcntl(console_fd, F_DUPFD_CLOEXEC, 3);
+ if (copy < 0)
+ return -errno;
+
+ console_fd = copy;
+ return 0;
+}
+
+void log_setup_service(void) {
+ /* Sets up logging the way it is most appropriate for running a program as a service. Note that using this
+ * doesn't make the binary unsuitable for invocation on the command line, as log output will still go to the
+ * terminal if invoked interactively. */
+
+ log_set_target(LOG_TARGET_AUTO);
+ log_parse_environment();
+ (void) log_open();
+}
+
+void log_setup_cli(void) {
+ /* Sets up logging the way it is most appropriate for running a program as a CLI utility. */
+
+ log_show_color(true);
+ log_parse_environment_cli();
+ (void) log_open();
+}
diff --git a/src/basic/log.h b/src/basic/log.h
new file mode 100644
index 0000000..a2aae16
--- /dev/null
+++ b/src/basic/log.h
@@ -0,0 +1,350 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <syslog.h>
+
+#include "macro.h"
+
+/* Some structures we reference but don't want to pull in headers for */
+struct iovec;
+struct signalfd_siginfo;
+
+typedef enum LogRealm {
+ LOG_REALM_SYSTEMD,
+ LOG_REALM_UDEV,
+ _LOG_REALM_MAX,
+} LogRealm;
+
+#ifndef LOG_REALM
+# define LOG_REALM LOG_REALM_SYSTEMD
+#endif
+
+typedef enum LogTarget{
+ LOG_TARGET_CONSOLE,
+ LOG_TARGET_CONSOLE_PREFIXED,
+ LOG_TARGET_KMSG,
+ LOG_TARGET_JOURNAL,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_SYSLOG,
+ LOG_TARGET_SYSLOG_OR_KMSG,
+ LOG_TARGET_AUTO, /* console if stderr is not journal, JOURNAL_OR_KMSG otherwise */
+ LOG_TARGET_NULL,
+ _LOG_TARGET_MAX,
+ _LOG_TARGET_INVALID = -1
+} LogTarget;
+
+/* Note to readers: << and >> have lower precedence than & and | */
+#define LOG_REALM_PLUS_LEVEL(realm, level) ((realm) << 10 | (level))
+#define LOG_REALM_REMOVE_LEVEL(realm_level) ((realm_level) >> 10)
+#define SYNTHETIC_ERRNO(num) (1 << 30 | (num))
+#define IS_SYNTHETIC_ERRNO(val) ((val) >> 30 & 1)
+#define ERRNO_VALUE(val) (abs(val) & 255)
+
+void log_set_target(LogTarget target);
+void log_set_max_level_realm(LogRealm realm, int level);
+#define log_set_max_level(level) \
+ log_set_max_level_realm(LOG_REALM, (level))
+
+void log_set_facility(int facility);
+
+int log_set_target_from_string(const char *e);
+int log_set_max_level_from_string_realm(LogRealm realm, const char *e);
+#define log_set_max_level_from_string(e) \
+ log_set_max_level_from_string_realm(LOG_REALM, (e))
+
+void log_show_color(bool b);
+bool log_get_show_color(void) _pure_;
+void log_show_location(bool b);
+bool log_get_show_location(void) _pure_;
+void log_show_time(bool b);
+bool log_get_show_time(void) _pure_;
+void log_show_tid(bool b);
+bool log_get_show_tid(void) _pure_;
+
+int log_show_color_from_string(const char *e);
+int log_show_location_from_string(const char *e);
+int log_show_time_from_string(const char *e);
+int log_show_tid_from_string(const char *e);
+
+LogTarget log_get_target(void) _pure_;
+int log_get_max_level_realm(LogRealm realm) _pure_;
+#define log_get_max_level() \
+ log_get_max_level_realm(LOG_REALM)
+
+/* Functions below that open and close logs or configure logging based on the
+ * environment should not be called from library code — this is always a job
+ * for the application itself.
+ */
+
+assert_cc(STRLEN(__FILE__) > STRLEN(RELATIVE_SOURCE_PATH) + 1);
+#define PROJECT_FILE (&__FILE__[STRLEN(RELATIVE_SOURCE_PATH) + 1])
+
+int log_open(void);
+void log_close(void);
+void log_forget_fds(void);
+
+void log_parse_environment_realm(LogRealm realm);
+void log_parse_environment_cli_realm(LogRealm realm);
+#define log_parse_environment() \
+ log_parse_environment_realm(LOG_REALM)
+#define log_parse_environment_cli() \
+ log_parse_environment_cli_realm(LOG_REALM)
+
+int log_dispatch_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *object_field,
+ const char *object,
+ const char *extra,
+ const char *extra_field,
+ char *buffer);
+
+int log_internal_realm(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) _printf_(6,7);
+#define log_internal(level, ...) \
+ log_internal_realm(LOG_REALM_PLUS_LEVEL(LOG_REALM, (level)), __VA_ARGS__)
+
+int log_internalv_realm(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format,
+ va_list ap) _printf_(6,0);
+#define log_internalv(level, ...) \
+ log_internalv_realm(LOG_REALM_PLUS_LEVEL(LOG_REALM, (level)), __VA_ARGS__)
+
+/* Realm is fixed to LOG_REALM_SYSTEMD for those */
+int log_object_internalv(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *object_field,
+ const char *object,
+ const char *extra_field,
+ const char *extra,
+ const char *format,
+ va_list ap) _printf_(10,0);
+
+int log_object_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *object_field,
+ const char *object,
+ const char *extra_field,
+ const char *extra,
+ const char *format, ...) _printf_(10,11);
+
+int log_struct_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) _printf_(6,0) _sentinel_;
+
+int log_oom_internal(
+ LogRealm realm,
+ const char *file,
+ int line,
+ const char *func);
+
+int log_format_iovec(
+ struct iovec *iovec,
+ size_t iovec_len,
+ size_t *n,
+ bool newline_separator,
+ int error,
+ const char *format,
+ va_list ap) _printf_(6, 0);
+
+int log_struct_iovec_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const struct iovec *input_iovec,
+ size_t n_input_iovec);
+
+/* This modifies the buffer passed! */
+int log_dump_internal(
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ char *buffer);
+
+/* Logging for various assertions */
+_noreturn_ void log_assert_failed_realm(
+ LogRealm realm,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func);
+#define log_assert_failed(text, ...) \
+ log_assert_failed_realm(LOG_REALM, (text), __VA_ARGS__)
+
+_noreturn_ void log_assert_failed_unreachable_realm(
+ LogRealm realm,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func);
+#define log_assert_failed_unreachable(text, ...) \
+ log_assert_failed_unreachable_realm(LOG_REALM, (text), __VA_ARGS__)
+
+void log_assert_failed_return_realm(
+ LogRealm realm,
+ const char *text,
+ const char *file,
+ int line,
+ const char *func);
+#define log_assert_failed_return(text, ...) \
+ log_assert_failed_return_realm(LOG_REALM, (text), __VA_ARGS__)
+
+#define log_dispatch(level, error, buffer) \
+ log_dispatch_internal(level, error, PROJECT_FILE, __LINE__, __func__, NULL, NULL, NULL, NULL, buffer)
+
+/* Logging with level */
+#define log_full_errno_realm(realm, level, error, ...) \
+ ({ \
+ int _level = (level), _e = (error), _realm = (realm); \
+ (log_get_max_level_realm(_realm) >= LOG_PRI(_level)) \
+ ? log_internal_realm(LOG_REALM_PLUS_LEVEL(_realm, _level), _e, \
+ PROJECT_FILE, __LINE__, __func__, __VA_ARGS__) \
+ : -ERRNO_VALUE(_e); \
+ })
+
+#define log_full_errno(level, error, ...) \
+ log_full_errno_realm(LOG_REALM, (level), (error), __VA_ARGS__)
+
+#define log_full(level, ...) (void) log_full_errno((level), 0, __VA_ARGS__)
+
+int log_emergency_level(void);
+
+/* Normal logging */
+#define log_debug(...) log_full_errno(LOG_DEBUG, 0, __VA_ARGS__)
+#define log_info(...) log_full(LOG_INFO, __VA_ARGS__)
+#define log_notice(...) log_full(LOG_NOTICE, __VA_ARGS__)
+#define log_warning(...) log_full(LOG_WARNING, __VA_ARGS__)
+#define log_error(...) log_full(LOG_ERR, __VA_ARGS__)
+#define log_emergency(...) log_full(log_emergency_level(), __VA_ARGS__)
+
+/* Logging triggered by an errno-like error */
+#define log_debug_errno(error, ...) log_full_errno(LOG_DEBUG, error, __VA_ARGS__)
+#define log_info_errno(error, ...) log_full_errno(LOG_INFO, error, __VA_ARGS__)
+#define log_notice_errno(error, ...) log_full_errno(LOG_NOTICE, error, __VA_ARGS__)
+#define log_warning_errno(error, ...) log_full_errno(LOG_WARNING, error, __VA_ARGS__)
+#define log_error_errno(error, ...) log_full_errno(LOG_ERR, error, __VA_ARGS__)
+#define log_emergency_errno(error, ...) log_full_errno(log_emergency_level(), error, __VA_ARGS__)
+
+#ifdef LOG_TRACE
+# define log_trace(...) log_debug(__VA_ARGS__)
+#else
+# define log_trace(...) do {} while (0)
+#endif
+
+/* Structured logging */
+#define log_struct_errno(level, error, ...) \
+ log_struct_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \
+ error, PROJECT_FILE, __LINE__, __func__, __VA_ARGS__, NULL)
+#define log_struct(level, ...) log_struct_errno(level, 0, __VA_ARGS__)
+
+#define log_struct_iovec_errno(level, error, iovec, n_iovec) \
+ log_struct_iovec_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \
+ error, PROJECT_FILE, __LINE__, __func__, iovec, n_iovec)
+#define log_struct_iovec(level, iovec, n_iovec) log_struct_iovec_errno(level, 0, iovec, n_iovec)
+
+/* This modifies the buffer passed! */
+#define log_dump(level, buffer) \
+ log_dump_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \
+ 0, PROJECT_FILE, __LINE__, __func__, buffer)
+
+#define log_oom() log_oom_internal(LOG_REALM, PROJECT_FILE, __LINE__, __func__)
+
+bool log_on_console(void) _pure_;
+
+const char *log_target_to_string(LogTarget target) _const_;
+LogTarget log_target_from_string(const char *s) _pure_;
+
+/* Helper to prepare various field for structured logging */
+#define LOG_MESSAGE(fmt, ...) "MESSAGE=" fmt, ##__VA_ARGS__
+
+void log_received_signal(int level, const struct signalfd_siginfo *si);
+
+/* If turned on, any requests for a log target involving "syslog" will be implicitly upgraded to the equivalent journal target */
+void log_set_upgrade_syslog_to_journal(bool b);
+
+/* If turned on, and log_open() is called, we'll not use STDERR_FILENO for logging ever, but rather open /dev/console */
+void log_set_always_reopen_console(bool b);
+
+/* If turned on, we'll open the log stream implicitly if needed on each individual log call. This is normally not
+ * desired as we want to reuse our logging streams. It is useful however */
+void log_set_open_when_needed(bool b);
+
+/* If turned on, then we'll never use IPC-based logging, i.e. never log to syslog or the journal. We'll only log to
+ * stderr, the console or kmsg */
+void log_set_prohibit_ipc(bool b);
+
+int log_dup_console(void);
+
+int log_syntax_internal(
+ const char *unit,
+ int level,
+ const char *config_file,
+ unsigned config_line,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) _printf_(9, 10);
+
+int log_syntax_invalid_utf8_internal(
+ const char *unit,
+ int level,
+ const char *config_file,
+ unsigned config_line,
+ const char *file,
+ int line,
+ const char *func,
+ const char *rvalue);
+
+#define log_syntax(unit, level, config_file, config_line, error, ...) \
+ ({ \
+ int _level = (level), _e = (error); \
+ (log_get_max_level() >= LOG_PRI(_level)) \
+ ? log_syntax_internal(unit, _level, config_file, config_line, _e, PROJECT_FILE, __LINE__, __func__, __VA_ARGS__) \
+ : -ERRNO_VALUE(_e); \
+ })
+
+#define log_syntax_invalid_utf8(unit, level, config_file, config_line, rvalue) \
+ ({ \
+ int _level = (level); \
+ (log_get_max_level() >= LOG_PRI(_level)) \
+ ? log_syntax_invalid_utf8_internal(unit, _level, config_file, config_line, PROJECT_FILE, __LINE__, __func__, rvalue) \
+ : -EINVAL; \
+ })
+
+#define DEBUG_LOGGING _unlikely_(log_get_max_level() >= LOG_DEBUG)
+
+void log_setup_service(void);
+void log_setup_cli(void);
diff --git a/src/basic/login-util.c b/src/basic/login-util.c
new file mode 100644
index 0000000..044e8b7
--- /dev/null
+++ b/src/basic/login-util.c
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "login-util.h"
+#include "string-util.h"
+
+bool session_id_valid(const char *id) {
+
+ if (isempty(id))
+ return false;
+
+ return id[strspn(id, LETTERS DIGITS)] == '\0';
+}
diff --git a/src/basic/login-util.h b/src/basic/login-util.h
new file mode 100644
index 0000000..00a124d
--- /dev/null
+++ b/src/basic/login-util.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <unistd.h>
+
+bool session_id_valid(const char *id);
+
+static inline bool logind_running(void) {
+ return access("/run/systemd/seats/", F_OK) >= 0;
+}
diff --git a/src/basic/macro.h b/src/basic/macro.h
new file mode 100644
index 0000000..2782553
--- /dev/null
+++ b/src/basic/macro.h
@@ -0,0 +1,657 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+
+#define _printf_(a, b) __attribute__((__format__(printf, a, b)))
+#ifdef __clang__
+# define _alloc_(...)
+#else
+# define _alloc_(...) __attribute__((__alloc_size__(__VA_ARGS__)))
+#endif
+#define _sentinel_ __attribute__((__sentinel__))
+#define _section_(x) __attribute__((__section__(x)))
+#define _used_ __attribute__((__used__))
+#define _unused_ __attribute__((__unused__))
+#define _destructor_ __attribute__((__destructor__))
+#define _pure_ __attribute__((__pure__))
+#define _const_ __attribute__((__const__))
+#define _deprecated_ __attribute__((__deprecated__))
+#define _packed_ __attribute__((__packed__))
+#define _malloc_ __attribute__((__malloc__))
+#define _weak_ __attribute__((__weak__))
+#define _likely_(x) (__builtin_expect(!!(x), 1))
+#define _unlikely_(x) (__builtin_expect(!!(x), 0))
+#define _public_ __attribute__((__visibility__("default")))
+#define _hidden_ __attribute__((__visibility__("hidden")))
+#define _weakref_(x) __attribute__((__weakref__(#x)))
+#define _align_(x) __attribute__((__aligned__(x)))
+#define _alignas_(x) __attribute__((__aligned__(__alignof(x))))
+#define _alignptr_ __attribute__((__aligned__(sizeof(void*))))
+#define _cleanup_(x) __attribute__((__cleanup__(x)))
+#if __GNUC__ >= 7
+#define _fallthrough_ __attribute__((__fallthrough__))
+#else
+#define _fallthrough_
+#endif
+/* Define C11 noreturn without <stdnoreturn.h> and even on older gcc
+ * compiler versions */
+#ifndef _noreturn_
+#if __STDC_VERSION__ >= 201112L
+#define _noreturn_ _Noreturn
+#else
+#define _noreturn_ __attribute__((__noreturn__))
+#endif
+#endif
+
+#if !defined(HAS_FEATURE_MEMORY_SANITIZER)
+# if defined(__has_feature)
+# if __has_feature(memory_sanitizer)
+# define HAS_FEATURE_MEMORY_SANITIZER 1
+# endif
+# endif
+# if !defined(HAS_FEATURE_MEMORY_SANITIZER)
+# define HAS_FEATURE_MEMORY_SANITIZER 0
+# endif
+#endif
+
+#if !defined(HAS_FEATURE_ADDRESS_SANITIZER)
+# ifdef __SANITIZE_ADDRESS__
+# define HAS_FEATURE_ADDRESS_SANITIZER 1
+# elif defined(__has_feature)
+# if __has_feature(address_sanitizer)
+# define HAS_FEATURE_ADDRESS_SANITIZER 1
+# endif
+# endif
+# if !defined(HAS_FEATURE_ADDRESS_SANITIZER)
+# define HAS_FEATURE_ADDRESS_SANITIZER 0
+# endif
+#endif
+
+/* Note: on GCC "no_sanitize_address" is a function attribute only, on llvm it may also be applied to global
+ * variables. We define a specific macro which knows this. Note that on GCC we don't need this decorator so much, since
+ * our primary usecase for this attribute is registration structures placed in named ELF sections which shall not be
+ * padded, but GCC doesn't pad those anyway if AddressSanitizer is enabled. */
+#if HAS_FEATURE_ADDRESS_SANITIZER && defined(__clang__)
+#define _variable_no_sanitize_address_ __attribute__((__no_sanitize_address__))
+#else
+#define _variable_no_sanitize_address_
+#endif
+
+/* Apparently there's no has_feature() call defined to check for ubsan, hence let's define this
+ * unconditionally on llvm */
+#if defined(__clang__)
+#define _function_no_sanitize_float_cast_overflow_ __attribute__((no_sanitize("float-cast-overflow")))
+#else
+#define _function_no_sanitize_float_cast_overflow_
+#endif
+
+/* Temporarily disable some warnings */
+#define DISABLE_WARNING_DEPRECATED_DECLARATIONS \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
+
+#define DISABLE_WARNING_FORMAT_NONLITERAL \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wformat-nonliteral\"")
+
+#define DISABLE_WARNING_MISSING_PROTOTYPES \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wmissing-prototypes\"")
+
+#define DISABLE_WARNING_NONNULL \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wnonnull\"")
+
+#define DISABLE_WARNING_SHADOW \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wshadow\"")
+
+#define DISABLE_WARNING_INCOMPATIBLE_POINTER_TYPES \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wincompatible-pointer-types\"")
+
+#if HAVE_WSTRINGOP_TRUNCATION
+# define DISABLE_WARNING_STRINGOP_TRUNCATION \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wstringop-truncation\"")
+#else
+# define DISABLE_WARNING_STRINGOP_TRUNCATION \
+ _Pragma("GCC diagnostic push")
+#endif
+
+#define DISABLE_WARNING_FLOAT_EQUAL \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
+
+#define DISABLE_WARNING_TYPE_LIMITS \
+ _Pragma("GCC diagnostic push"); \
+ _Pragma("GCC diagnostic ignored \"-Wtype-limits\"")
+
+#define REENABLE_WARNING \
+ _Pragma("GCC diagnostic pop")
+
+/* automake test harness */
+#define EXIT_TEST_SKIP 77
+
+#define XSTRINGIFY(x) #x
+#define STRINGIFY(x) XSTRINGIFY(x)
+
+#define XCONCATENATE(x, y) x ## y
+#define CONCATENATE(x, y) XCONCATENATE(x, y)
+
+#define UNIQ_T(x, uniq) CONCATENATE(__unique_prefix_, CONCATENATE(x, uniq))
+#define UNIQ __COUNTER__
+
+/* builtins */
+#if __SIZEOF_INT__ == 4
+#define BUILTIN_FFS_U32(x) __builtin_ffs(x);
+#elif __SIZEOF_LONG__ == 4
+#define BUILTIN_FFS_U32(x) __builtin_ffsl(x);
+#else
+#error "neither int nor long are four bytes long?!?"
+#endif
+
+/* Rounds up */
+
+#define ALIGN4(l) (((l) + 3) & ~3)
+#define ALIGN8(l) (((l) + 7) & ~7)
+
+#if __SIZEOF_POINTER__ == 8
+#define ALIGN(l) ALIGN8(l)
+#elif __SIZEOF_POINTER__ == 4
+#define ALIGN(l) ALIGN4(l)
+#else
+#error "Wut? Pointers are neither 4 nor 8 bytes long?"
+#endif
+
+#define ALIGN_PTR(p) ((void*) ALIGN((unsigned long) (p)))
+#define ALIGN4_PTR(p) ((void*) ALIGN4((unsigned long) (p)))
+#define ALIGN8_PTR(p) ((void*) ALIGN8((unsigned long) (p)))
+
+static inline size_t ALIGN_TO(size_t l, size_t ali) {
+ return ((l + ali - 1) & ~(ali - 1));
+}
+
+#define ALIGN_TO_PTR(p, ali) ((void*) ALIGN_TO((unsigned long) (p), (ali)))
+
+/* align to next higher power-of-2 (except for: 0 => 0, overflow => 0) */
+static inline unsigned long ALIGN_POWER2(unsigned long u) {
+
+ /* Avoid subtraction overflow */
+ if (u == 0)
+ return 0;
+
+ /* clz(0) is undefined */
+ if (u == 1)
+ return 1;
+
+ /* left-shift overflow is undefined */
+ if (__builtin_clzl(u - 1UL) < 1)
+ return 0;
+
+ return 1UL << (sizeof(u) * 8 - __builtin_clzl(u - 1UL));
+}
+
+static inline size_t GREEDY_ALLOC_ROUND_UP(size_t l) {
+ size_t m;
+
+ /* Round up allocation sizes a bit to some reasonable, likely larger value. This is supposed to be
+ * used for cases which are likely called in an allocation loop of some form, i.e. that repetitively
+ * grow stuff, for example strv_extend() and suchlike.
+ *
+ * Note the difference to GREEDY_REALLOC() here, as this helper operates on a single size value only,
+ * and rounds up to next multiple of 2, needing no further counter.
+ *
+ * Note the benefits of direct ALIGN_POWER2() usage: type-safety for size_t, sane handling for very
+ * small (i.e. <= 2) and safe handling for very large (i.e. > SSIZE_MAX) values. */
+
+ if (l <= 2)
+ return 2; /* Never allocate less than 2 of something. */
+
+ m = ALIGN_POWER2(l);
+ if (m == 0) /* overflow? */
+ return l;
+
+ return m;
+}
+
+#ifndef __COVERITY__
+# define VOID_0 ((void)0)
+#else
+# define VOID_0 ((void*)0)
+#endif
+
+#define ELEMENTSOF(x) \
+ (__builtin_choose_expr( \
+ !__builtin_types_compatible_p(typeof(x), typeof(&*(x))), \
+ sizeof(x)/sizeof((x)[0]), \
+ VOID_0))
+
+/*
+ * STRLEN - return the length of a string literal, minus the trailing NUL byte.
+ * Contrary to strlen(), this is a constant expression.
+ * @x: a string literal.
+ */
+#define STRLEN(x) (sizeof(""x"") - 1)
+
+/*
+ * container_of - cast a member of a structure out to the containing structure
+ * @ptr: the pointer to the member.
+ * @type: the type of the container struct this is embedded in.
+ * @member: the name of the member within the struct.
+ */
+#define container_of(ptr, type, member) __container_of(UNIQ, (ptr), type, member)
+#define __container_of(uniq, ptr, type, member) \
+ ({ \
+ const typeof( ((type*)0)->member ) *UNIQ_T(A, uniq) = (ptr); \
+ (type*)( (char *)UNIQ_T(A, uniq) - offsetof(type, member) ); \
+ })
+
+#undef MAX
+#define MAX(a, b) __MAX(UNIQ, (a), UNIQ, (b))
+#define __MAX(aq, a, bq, b) \
+ ({ \
+ const typeof(a) UNIQ_T(A, aq) = (a); \
+ const typeof(b) UNIQ_T(B, bq) = (b); \
+ UNIQ_T(A, aq) > UNIQ_T(B, bq) ? UNIQ_T(A, aq) : UNIQ_T(B, bq); \
+ })
+
+/* evaluates to (void) if _A or _B are not constant or of different types */
+#define CONST_MAX(_A, _B) \
+ (__builtin_choose_expr( \
+ __builtin_constant_p(_A) && \
+ __builtin_constant_p(_B) && \
+ __builtin_types_compatible_p(typeof(_A), typeof(_B)), \
+ ((_A) > (_B)) ? (_A) : (_B), \
+ VOID_0))
+
+/* takes two types and returns the size of the larger one */
+#define MAXSIZE(A, B) (sizeof(union _packed_ { typeof(A) a; typeof(B) b; }))
+
+#define MAX3(x, y, z) \
+ ({ \
+ const typeof(x) _c = MAX(x, y); \
+ MAX(_c, z); \
+ })
+
+#define MAX4(x, y, z, a) \
+ ({ \
+ const typeof(x) _d = MAX3(x, y, z); \
+ MAX(_d, a); \
+ })
+
+#undef MIN
+#define MIN(a, b) __MIN(UNIQ, (a), UNIQ, (b))
+#define __MIN(aq, a, bq, b) \
+ ({ \
+ const typeof(a) UNIQ_T(A, aq) = (a); \
+ const typeof(b) UNIQ_T(B, bq) = (b); \
+ UNIQ_T(A, aq) < UNIQ_T(B, bq) ? UNIQ_T(A, aq) : UNIQ_T(B, bq); \
+ })
+
+/* evaluates to (void) if _A or _B are not constant or of different types */
+#define CONST_MIN(_A, _B) \
+ (__builtin_choose_expr( \
+ __builtin_constant_p(_A) && \
+ __builtin_constant_p(_B) && \
+ __builtin_types_compatible_p(typeof(_A), typeof(_B)), \
+ ((_A) < (_B)) ? (_A) : (_B), \
+ VOID_0))
+
+#define MIN3(x, y, z) \
+ ({ \
+ const typeof(x) _c = MIN(x, y); \
+ MIN(_c, z); \
+ })
+
+#define LESS_BY(a, b) __LESS_BY(UNIQ, (a), UNIQ, (b))
+#define __LESS_BY(aq, a, bq, b) \
+ ({ \
+ const typeof(a) UNIQ_T(A, aq) = (a); \
+ const typeof(b) UNIQ_T(B, bq) = (b); \
+ UNIQ_T(A, aq) > UNIQ_T(B, bq) ? UNIQ_T(A, aq) - UNIQ_T(B, bq) : 0; \
+ })
+
+#define CMP(a, b) __CMP(UNIQ, (a), UNIQ, (b))
+#define __CMP(aq, a, bq, b) \
+ ({ \
+ const typeof(a) UNIQ_T(A, aq) = (a); \
+ const typeof(b) UNIQ_T(B, bq) = (b); \
+ UNIQ_T(A, aq) < UNIQ_T(B, bq) ? -1 : \
+ UNIQ_T(A, aq) > UNIQ_T(B, bq) ? 1 : 0; \
+ })
+
+#undef CLAMP
+#define CLAMP(x, low, high) __CLAMP(UNIQ, (x), UNIQ, (low), UNIQ, (high))
+#define __CLAMP(xq, x, lowq, low, highq, high) \
+ ({ \
+ const typeof(x) UNIQ_T(X, xq) = (x); \
+ const typeof(low) UNIQ_T(LOW, lowq) = (low); \
+ const typeof(high) UNIQ_T(HIGH, highq) = (high); \
+ UNIQ_T(X, xq) > UNIQ_T(HIGH, highq) ? \
+ UNIQ_T(HIGH, highq) : \
+ UNIQ_T(X, xq) < UNIQ_T(LOW, lowq) ? \
+ UNIQ_T(LOW, lowq) : \
+ UNIQ_T(X, xq); \
+ })
+
+/* [(x + y - 1) / y] suffers from an integer overflow, even though the
+ * computation should be possible in the given type. Therefore, we use
+ * [x / y + !!(x % y)]. Note that on "Real CPUs" a division returns both the
+ * quotient and the remainder, so both should be equally fast. */
+#define DIV_ROUND_UP(x, y) __DIV_ROUND_UP(UNIQ, (x), UNIQ, (y))
+#define __DIV_ROUND_UP(xq, x, yq, y) \
+ ({ \
+ const typeof(x) UNIQ_T(X, xq) = (x); \
+ const typeof(y) UNIQ_T(Y, yq) = (y); \
+ (UNIQ_T(X, xq) / UNIQ_T(Y, yq) + !!(UNIQ_T(X, xq) % UNIQ_T(Y, yq))); \
+ })
+
+#ifdef __COVERITY__
+
+/* Use special definitions of assertion macros in order to prevent
+ * false positives of ASSERT_SIDE_EFFECT on Coverity static analyzer
+ * for uses of assert_se() and assert_return().
+ *
+ * These definitions make expression go through a (trivial) function
+ * call to ensure they are not discarded. Also use ! or !! to ensure
+ * the boolean expressions are seen as such.
+ *
+ * This technique has been described and recommended in:
+ * https://community.synopsys.com/s/question/0D534000046Yuzb/suppressing-assertsideeffect-for-functions-that-allow-for-sideeffects
+ */
+
+extern void __coverity_panic__(void);
+
+static inline void __coverity_check__(int condition) {
+ if (!condition)
+ __coverity_panic__();
+}
+
+static inline int __coverity_check_and_return__(int condition) {
+ return condition;
+}
+
+#define assert_message_se(expr, message) __coverity_check__(!!(expr))
+
+#define assert_log(expr, message) __coverity_check_and_return__(!!(expr))
+
+#else /* ! __COVERITY__ */
+
+#define assert_message_se(expr, message) \
+ do { \
+ if (_unlikely_(!(expr))) \
+ log_assert_failed(message, PROJECT_FILE, __LINE__, __PRETTY_FUNCTION__); \
+ } while (false)
+
+#define assert_log(expr, message) ((_likely_(expr)) \
+ ? (true) \
+ : (log_assert_failed_return(message, PROJECT_FILE, __LINE__, __PRETTY_FUNCTION__), false))
+
+#endif /* __COVERITY__ */
+
+#define assert_se(expr) assert_message_se(expr, #expr)
+
+/* We override the glibc assert() here. */
+#undef assert
+#ifdef NDEBUG
+#define assert(expr) do {} while (false)
+#else
+#define assert(expr) assert_message_se(expr, #expr)
+#endif
+
+#define assert_not_reached(t) \
+ log_assert_failed_unreachable(t, PROJECT_FILE, __LINE__, __PRETTY_FUNCTION__)
+
+#if defined(static_assert)
+#define assert_cc(expr) \
+ static_assert(expr, #expr)
+#else
+#define assert_cc(expr) \
+ struct CONCATENATE(_assert_struct_, __COUNTER__) { \
+ char x[(expr) ? 0 : -1]; \
+ }
+#endif
+
+#define assert_return(expr, r) \
+ do { \
+ if (!assert_log(expr, #expr)) \
+ return (r); \
+ } while (false)
+
+#define assert_return_errno(expr, r, err) \
+ do { \
+ if (!assert_log(expr, #expr)) { \
+ errno = err; \
+ return (r); \
+ } \
+ } while (false)
+
+#define return_with_errno(r, err) \
+ do { \
+ errno = abs(err); \
+ return r; \
+ } while (false)
+
+#define PTR_TO_INT(p) ((int) ((intptr_t) (p)))
+#define INT_TO_PTR(u) ((void *) ((intptr_t) (u)))
+#define PTR_TO_UINT(p) ((unsigned) ((uintptr_t) (p)))
+#define UINT_TO_PTR(u) ((void *) ((uintptr_t) (u)))
+
+#define PTR_TO_LONG(p) ((long) ((intptr_t) (p)))
+#define LONG_TO_PTR(u) ((void *) ((intptr_t) (u)))
+#define PTR_TO_ULONG(p) ((unsigned long) ((uintptr_t) (p)))
+#define ULONG_TO_PTR(u) ((void *) ((uintptr_t) (u)))
+
+#define PTR_TO_UINT8(p) ((uint8_t) ((uintptr_t) (p)))
+#define UINT8_TO_PTR(u) ((void *) ((uintptr_t) (u)))
+
+#define PTR_TO_INT32(p) ((int32_t) ((intptr_t) (p)))
+#define INT32_TO_PTR(u) ((void *) ((intptr_t) (u)))
+#define PTR_TO_UINT32(p) ((uint32_t) ((uintptr_t) (p)))
+#define UINT32_TO_PTR(u) ((void *) ((uintptr_t) (u)))
+
+#define PTR_TO_INT64(p) ((int64_t) ((intptr_t) (p)))
+#define INT64_TO_PTR(u) ((void *) ((intptr_t) (u)))
+#define PTR_TO_UINT64(p) ((uint64_t) ((uintptr_t) (p)))
+#define UINT64_TO_PTR(u) ((void *) ((uintptr_t) (u)))
+
+#define PTR_TO_SIZE(p) ((size_t) ((uintptr_t) (p)))
+#define SIZE_TO_PTR(u) ((void *) ((uintptr_t) (u)))
+
+#define CHAR_TO_STR(x) ((char[2]) { x, 0 })
+
+#define char_array_0(x) x[sizeof(x)-1] = 0;
+
+#define sizeof_field(struct_type, member) sizeof(((struct_type *) 0)->member)
+
+/* Returns the number of chars needed to format variables of the
+ * specified type as a decimal string. Adds in extra space for a
+ * negative '-' prefix (hence works correctly on signed
+ * types). Includes space for the trailing NUL. */
+#define DECIMAL_STR_MAX(type) \
+ (2+(sizeof(type) <= 1 ? 3 : \
+ sizeof(type) <= 2 ? 5 : \
+ sizeof(type) <= 4 ? 10 : \
+ sizeof(type) <= 8 ? 20 : sizeof(int[-2*(sizeof(type) > 8)])))
+
+#define DECIMAL_STR_WIDTH(x) \
+ ({ \
+ typeof(x) _x_ = (x); \
+ unsigned ans = 1; \
+ while ((_x_ /= 10) != 0) \
+ ans++; \
+ ans; \
+ })
+
+#define UPDATE_FLAG(orig, flag, b) \
+ ((b) ? ((orig) | (flag)) : ((orig) & ~(flag)))
+#define SET_FLAG(v, flag, b) \
+ (v) = UPDATE_FLAG(v, flag, b)
+#define FLAGS_SET(v, flags) \
+ ((~(v) & (flags)) == 0)
+
+#define CASE_F(X) case X:
+#define CASE_F_1(CASE, X) CASE_F(X)
+#define CASE_F_2(CASE, X, ...) CASE(X) CASE_F_1(CASE, __VA_ARGS__)
+#define CASE_F_3(CASE, X, ...) CASE(X) CASE_F_2(CASE, __VA_ARGS__)
+#define CASE_F_4(CASE, X, ...) CASE(X) CASE_F_3(CASE, __VA_ARGS__)
+#define CASE_F_5(CASE, X, ...) CASE(X) CASE_F_4(CASE, __VA_ARGS__)
+#define CASE_F_6(CASE, X, ...) CASE(X) CASE_F_5(CASE, __VA_ARGS__)
+#define CASE_F_7(CASE, X, ...) CASE(X) CASE_F_6(CASE, __VA_ARGS__)
+#define CASE_F_8(CASE, X, ...) CASE(X) CASE_F_7(CASE, __VA_ARGS__)
+#define CASE_F_9(CASE, X, ...) CASE(X) CASE_F_8(CASE, __VA_ARGS__)
+#define CASE_F_10(CASE, X, ...) CASE(X) CASE_F_9(CASE, __VA_ARGS__)
+#define CASE_F_11(CASE, X, ...) CASE(X) CASE_F_10(CASE, __VA_ARGS__)
+#define CASE_F_12(CASE, X, ...) CASE(X) CASE_F_11(CASE, __VA_ARGS__)
+#define CASE_F_13(CASE, X, ...) CASE(X) CASE_F_12(CASE, __VA_ARGS__)
+#define CASE_F_14(CASE, X, ...) CASE(X) CASE_F_13(CASE, __VA_ARGS__)
+#define CASE_F_15(CASE, X, ...) CASE(X) CASE_F_14(CASE, __VA_ARGS__)
+#define CASE_F_16(CASE, X, ...) CASE(X) CASE_F_15(CASE, __VA_ARGS__)
+#define CASE_F_17(CASE, X, ...) CASE(X) CASE_F_16(CASE, __VA_ARGS__)
+#define CASE_F_18(CASE, X, ...) CASE(X) CASE_F_17(CASE, __VA_ARGS__)
+#define CASE_F_19(CASE, X, ...) CASE(X) CASE_F_18(CASE, __VA_ARGS__)
+#define CASE_F_20(CASE, X, ...) CASE(X) CASE_F_19(CASE, __VA_ARGS__)
+
+#define GET_CASE_F(_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12,_13,_14,_15,_16,_17,_18,_19,_20,NAME,...) NAME
+#define FOR_EACH_MAKE_CASE(...) \
+ GET_CASE_F(__VA_ARGS__,CASE_F_20,CASE_F_19,CASE_F_18,CASE_F_17,CASE_F_16,CASE_F_15,CASE_F_14,CASE_F_13,CASE_F_12,CASE_F_11, \
+ CASE_F_10,CASE_F_9,CASE_F_8,CASE_F_7,CASE_F_6,CASE_F_5,CASE_F_4,CASE_F_3,CASE_F_2,CASE_F_1) \
+ (CASE_F,__VA_ARGS__)
+
+#define IN_SET(x, ...) \
+ ({ \
+ bool _found = false; \
+ /* If the build breaks in the line below, you need to extend the case macros. (We use "long double" as \
+ * type for the array, in the hope that checkers such as ubsan don't complain that the initializers for \
+ * the array are not representable by the base type. Ideally we'd use typeof(x) as base type, but that \
+ * doesn't work, as we want to use this on bitfields and gcc refuses typeof() on bitfields.) */ \
+ static const long double __assert_in_set[] _unused_ = { __VA_ARGS__ }; \
+ assert_cc(ELEMENTSOF(__assert_in_set) <= 20); \
+ switch(x) { \
+ FOR_EACH_MAKE_CASE(__VA_ARGS__) \
+ _found = true; \
+ break; \
+ default: \
+ break; \
+ } \
+ _found; \
+ })
+
+#define SWAP_TWO(x, y) do { \
+ typeof(x) _t = (x); \
+ (x) = (y); \
+ (y) = (_t); \
+ } while (false)
+
+#define STRV_MAKE(...) ((char**) ((const char*[]) { __VA_ARGS__, NULL }))
+#define STRV_MAKE_EMPTY ((char*[1]) { NULL })
+
+/* Pointers range from NULL to POINTER_MAX */
+#define POINTER_MAX ((void*) UINTPTR_MAX)
+
+/* Iterates through a specified list of pointers. Accepts NULL pointers, but uses POINTER_MAX as internal marker for EOL. */
+#define FOREACH_POINTER(p, x, ...) \
+ for (typeof(p) *_l = (typeof(p)[]) { ({ p = x; }), ##__VA_ARGS__, POINTER_MAX }; \
+ p != (typeof(p)) POINTER_MAX; \
+ p = *(++_l))
+
+/* Define C11 thread_local attribute even on older gcc compiler
+ * version */
+#ifndef thread_local
+/*
+ * Don't break on glibc < 2.16 that doesn't define __STDC_NO_THREADS__
+ * see http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53769
+ */
+#if __STDC_VERSION__ >= 201112L && !(defined(__STDC_NO_THREADS__) || (defined(__GNU_LIBRARY__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16))
+#define thread_local _Thread_local
+#else
+#define thread_local __thread
+#endif
+#endif
+
+#define DEFINE_TRIVIAL_DESTRUCTOR(name, type, func) \
+ static inline void name(type *p) { \
+ func(p); \
+ }
+
+#define DEFINE_TRIVIAL_CLEANUP_FUNC(type, func) \
+ static inline void func##p(type *p) { \
+ if (*p) \
+ func(*p); \
+ }
+
+#define _DEFINE_TRIVIAL_REF_FUNC(type, name, scope) \
+ scope type *name##_ref(type *p) { \
+ if (!p) \
+ return NULL; \
+ \
+ assert(p->n_ref > 0); \
+ p->n_ref++; \
+ return p; \
+ }
+
+#define _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func, scope) \
+ scope type *name##_unref(type *p) { \
+ if (!p) \
+ return NULL; \
+ \
+ assert(p->n_ref > 0); \
+ p->n_ref--; \
+ if (p->n_ref > 0) \
+ return NULL; \
+ \
+ return free_func(p); \
+ }
+
+#define DEFINE_TRIVIAL_REF_FUNC(type, name) \
+ _DEFINE_TRIVIAL_REF_FUNC(type, name,)
+#define DEFINE_PRIVATE_TRIVIAL_REF_FUNC(type, name) \
+ _DEFINE_TRIVIAL_REF_FUNC(type, name, static)
+#define DEFINE_PUBLIC_TRIVIAL_REF_FUNC(type, name) \
+ _DEFINE_TRIVIAL_REF_FUNC(type, name, _public_)
+
+#define DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func) \
+ _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func,)
+#define DEFINE_PRIVATE_TRIVIAL_UNREF_FUNC(type, name, free_func) \
+ _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func, static)
+#define DEFINE_PUBLIC_TRIVIAL_UNREF_FUNC(type, name, free_func) \
+ _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func, _public_)
+
+#define DEFINE_TRIVIAL_REF_UNREF_FUNC(type, name, free_func) \
+ DEFINE_TRIVIAL_REF_FUNC(type, name); \
+ DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func);
+
+#define DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(type, name, free_func) \
+ DEFINE_PRIVATE_TRIVIAL_REF_FUNC(type, name); \
+ DEFINE_PRIVATE_TRIVIAL_UNREF_FUNC(type, name, free_func);
+
+#define DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(type, name, free_func) \
+ DEFINE_PUBLIC_TRIVIAL_REF_FUNC(type, name); \
+ DEFINE_PUBLIC_TRIVIAL_UNREF_FUNC(type, name, free_func);
+
+/* A macro to force copying of a variable from memory. This is useful whenever we want to read something from
+ * memory and want to make sure the compiler won't optimize away the destination variable for us. It's not
+ * supposed to be a full CPU memory barrier, i.e. CPU is still allowed to reorder the reads, but it is not
+ * allowed to remove our local copies of the variables. We want this to work for unaligned memory, hence
+ * memcpy() is great for our purposes. */
+#define READ_NOW(x) \
+ ({ \
+ typeof(x) _copy; \
+ memcpy(&_copy, &(x), sizeof(_copy)); \
+ asm volatile ("" : : : "memory"); \
+ _copy; \
+ })
+
+static inline size_t size_add(size_t x, size_t y) {
+ return y >= SIZE_MAX - x ? SIZE_MAX : x + y;
+}
+
+#include "log.h"
diff --git a/src/basic/memfd-util.c b/src/basic/memfd-util.c
new file mode 100644
index 0000000..0b8ecea
--- /dev/null
+++ b/src/basic/memfd-util.c
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#if HAVE_LINUX_MEMFD_H
+#include <linux/memfd.h>
+#endif
+#include <stdio.h>
+#include <sys/prctl.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "memfd-util.h"
+#include "missing_fcntl.h"
+#include "missing_mman.h"
+#include "missing_syscall.h"
+#include "string-util.h"
+#include "utf8.h"
+
+int memfd_new(const char *name) {
+ _cleanup_free_ char *g = NULL;
+ int fd;
+
+ if (!name) {
+ char pr[17] = {};
+
+ /* If no name is specified we generate one. We include
+ * a hint indicating our library implementation, and
+ * add the thread name to it */
+
+ assert_se(prctl(PR_GET_NAME, (unsigned long) pr) >= 0);
+
+ if (isempty(pr))
+ name = "sd";
+ else {
+ _cleanup_free_ char *e = NULL;
+
+ e = utf8_escape_invalid(pr);
+ if (!e)
+ return -ENOMEM;
+
+ g = strjoin("sd-", e);
+ if (!g)
+ return -ENOMEM;
+
+ name = g;
+ }
+ }
+
+ fd = memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
+
+int memfd_map(int fd, uint64_t offset, size_t size, void **p) {
+ void *q;
+ int sealed;
+
+ assert(fd >= 0);
+ assert(size > 0);
+ assert(p);
+
+ sealed = memfd_get_sealed(fd);
+ if (sealed < 0)
+ return sealed;
+
+ if (sealed)
+ q = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, offset);
+ else
+ q = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset);
+
+ if (q == MAP_FAILED)
+ return -errno;
+
+ *p = q;
+ return 0;
+}
+
+int memfd_set_sealed(int fd) {
+ int r;
+
+ assert(fd >= 0);
+
+ r = fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_SEAL);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int memfd_get_sealed(int fd) {
+ int r;
+
+ assert(fd >= 0);
+
+ r = fcntl(fd, F_GET_SEALS);
+ if (r < 0)
+ return -errno;
+
+ return r == (F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_SEAL);
+}
+
+int memfd_get_size(int fd, uint64_t *sz) {
+ struct stat stat;
+ int r;
+
+ assert(fd >= 0);
+ assert(sz);
+
+ r = fstat(fd, &stat);
+ if (r < 0)
+ return -errno;
+
+ *sz = stat.st_size;
+ return 0;
+}
+
+int memfd_set_size(int fd, uint64_t sz) {
+ int r;
+
+ assert(fd >= 0);
+
+ r = ftruncate(fd, sz);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int memfd_new_and_map(const char *name, size_t sz, void **p) {
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(sz > 0);
+ assert(p);
+
+ fd = memfd_new(name);
+ if (fd < 0)
+ return fd;
+
+ r = memfd_set_size(fd, sz);
+ if (r < 0)
+ return r;
+
+ r = memfd_map(fd, 0, sz, p);
+ if (r < 0)
+ return r;
+
+ return TAKE_FD(fd);
+}
diff --git a/src/basic/memfd-util.h b/src/basic/memfd-util.h
new file mode 100644
index 0000000..8596c1a
--- /dev/null
+++ b/src/basic/memfd-util.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+int memfd_new(const char *name);
+int memfd_new_and_map(const char *name, size_t sz, void **p);
+
+int memfd_map(int fd, uint64_t offset, size_t size, void **p);
+
+int memfd_set_sealed(int fd);
+int memfd_get_sealed(int fd);
+
+int memfd_get_size(int fd, uint64_t *sz);
+int memfd_set_size(int fd, uint64_t sz);
diff --git a/src/basic/memory-util.c b/src/basic/memory-util.c
new file mode 100644
index 0000000..3338e35
--- /dev/null
+++ b/src/basic/memory-util.c
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "memory-util.h"
+
+size_t page_size(void) {
+ static thread_local size_t pgsz = 0;
+ long r;
+
+ if (_likely_(pgsz > 0))
+ return pgsz;
+
+ r = sysconf(_SC_PAGESIZE);
+ assert(r > 0);
+
+ pgsz = (size_t) r;
+ return pgsz;
+}
+
+bool memeqzero(const void *data, size_t length) {
+ /* Does the buffer consist entirely of NULs?
+ * Copied from https://github.com/systemd/casync/, copied in turn from
+ * https://github.com/rustyrussell/ccan/blob/master/ccan/mem/mem.c#L92,
+ * which is licensed CC-0.
+ */
+
+ const uint8_t *p = data;
+ size_t i;
+
+ /* Check first 16 bytes manually */
+ for (i = 0; i < 16; i++, length--) {
+ if (length == 0)
+ return true;
+ if (p[i])
+ return false;
+ }
+
+ /* Now we know first 16 bytes are NUL, memcmp with self. */
+ return memcmp(data, p + i, length) == 0;
+}
+
+#if !HAVE_EXPLICIT_BZERO
+/*
+ * The pointer to memset() is volatile so that compiler must de-reference the pointer and can't assume that
+ * it points to any function in particular (such as memset(), which it then might further "optimize"). This
+ * approach is inspired by openssl's crypto/mem_clr.c.
+ */
+typedef void *(*memset_t)(void *,int,size_t);
+
+static volatile memset_t memset_func = memset;
+
+void* explicit_bzero_safe(void *p, size_t l) {
+ if (l > 0)
+ memset_func(p, '\0', l);
+
+ return p;
+}
+#endif
diff --git a/src/basic/memory-util.h b/src/basic/memory-util.h
new file mode 100644
index 0000000..179edd2
--- /dev/null
+++ b/src/basic/memory-util.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <malloc.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+
+size_t page_size(void) _pure_;
+#define PAGE_ALIGN(l) ALIGN_TO((l), page_size())
+#define PAGE_ALIGN_DOWN(l) ((l) & ~(page_size() - 1))
+#define PAGE_OFFSET(l) ((l) & (page_size() - 1))
+
+/* Normal memcpy requires src to be nonnull. We do nothing if n is 0. */
+static inline void memcpy_safe(void *dst, const void *src, size_t n) {
+ if (n == 0)
+ return;
+ assert(src);
+ memcpy(dst, src, n);
+}
+
+/* Normal memcmp requires s1 and s2 to be nonnull. We do nothing if n is 0. */
+static inline int memcmp_safe(const void *s1, const void *s2, size_t n) {
+ if (n == 0)
+ return 0;
+ assert(s1);
+ assert(s2);
+ return memcmp(s1, s2, n);
+}
+
+/* Compare s1 (length n1) with s2 (length n2) in lexicographic order. */
+static inline int memcmp_nn(const void *s1, size_t n1, const void *s2, size_t n2) {
+ return memcmp_safe(s1, s2, MIN(n1, n2))
+ ?: CMP(n1, n2);
+}
+
+#define memzero(x,l) \
+ ({ \
+ size_t _l_ = (l); \
+ if (_l_ > 0) \
+ memset(x, 0, _l_); \
+ })
+
+#define zero(x) (memzero(&(x), sizeof(x)))
+
+bool memeqzero(const void *data, size_t length);
+
+#define eqzero(x) memeqzero(x, sizeof(x))
+
+static inline void *mempset(void *s, int c, size_t n) {
+ memset(s, c, n);
+ return (uint8_t*)s + n;
+}
+
+/* Normal memmem() requires haystack to be nonnull, which is annoying for zero-length buffers */
+static inline void *memmem_safe(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen) {
+
+ if (needlelen <= 0)
+ return (void*) haystack;
+
+ if (haystacklen < needlelen)
+ return NULL;
+
+ assert(haystack);
+ assert(needle);
+
+ return memmem(haystack, haystacklen, needle, needlelen);
+}
+
+#if HAVE_EXPLICIT_BZERO
+static inline void* explicit_bzero_safe(void *p, size_t l) {
+ if (l > 0)
+ explicit_bzero(p, l);
+
+ return p;
+}
+#else
+void *explicit_bzero_safe(void *p, size_t l);
+#endif
+
+static inline void* erase_and_free(void *p) {
+ size_t l;
+
+ if (!p)
+ return NULL;
+
+ l = malloc_usable_size(p);
+ explicit_bzero_safe(p, l);
+ return mfree(p);
+}
+
+static inline void erase_and_freep(void *p) {
+ erase_and_free(*(void**) p);
+}
+
+/* Use with _cleanup_ to erase a single 'char' when leaving scope */
+static inline void erase_char(char *p) {
+ explicit_bzero_safe(p, sizeof(char));
+}
diff --git a/src/basic/mempool.c b/src/basic/mempool.c
new file mode 100644
index 0000000..9eedc20
--- /dev/null
+++ b/src/basic/mempool.c
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "env-util.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "mempool.h"
+#include "process-util.h"
+#include "util.h"
+
+struct pool {
+ struct pool *next;
+ size_t n_tiles;
+ size_t n_used;
+};
+
+void* mempool_alloc_tile(struct mempool *mp) {
+ size_t i;
+
+ /* When a tile is released we add it to the list and simply
+ * place the next pointer at its offset 0. */
+
+ assert(mp->tile_size >= sizeof(void*));
+ assert(mp->at_least > 0);
+
+ if (mp->freelist) {
+ void *r;
+
+ r = mp->freelist;
+ mp->freelist = * (void**) mp->freelist;
+ return r;
+ }
+
+ if (_unlikely_(!mp->first_pool) ||
+ _unlikely_(mp->first_pool->n_used >= mp->first_pool->n_tiles)) {
+ size_t size, n;
+ struct pool *p;
+
+ n = mp->first_pool ? mp->first_pool->n_tiles : 0;
+ n = MAX(mp->at_least, n * 2);
+ size = PAGE_ALIGN(ALIGN(sizeof(struct pool)) + n*mp->tile_size);
+ n = (size - ALIGN(sizeof(struct pool))) / mp->tile_size;
+
+ p = malloc(size);
+ if (!p)
+ return NULL;
+
+ p->next = mp->first_pool;
+ p->n_tiles = n;
+ p->n_used = 0;
+
+ mp->first_pool = p;
+ }
+
+ i = mp->first_pool->n_used++;
+
+ return ((uint8_t*) mp->first_pool) + ALIGN(sizeof(struct pool)) + i*mp->tile_size;
+}
+
+void* mempool_alloc0_tile(struct mempool *mp) {
+ void *p;
+
+ p = mempool_alloc_tile(mp);
+ if (p)
+ memzero(p, mp->tile_size);
+ return p;
+}
+
+void mempool_free_tile(struct mempool *mp, void *p) {
+ * (void**) p = mp->freelist;
+ mp->freelist = p;
+}
+
+bool mempool_enabled(void) {
+ static int b = -1;
+
+ if (!is_main_thread())
+ return false;
+
+ if (!mempool_use_allowed)
+ b = false;
+ if (b < 0)
+ b = getenv_bool("SYSTEMD_MEMPOOL") != 0;
+
+ return b;
+}
+
+#if VALGRIND
+void mempool_drop(struct mempool *mp) {
+ struct pool *p = mp->first_pool;
+ while (p) {
+ struct pool *n;
+ n = p->next;
+ free(p);
+ p = n;
+ }
+}
+#endif
diff --git a/src/basic/mempool.h b/src/basic/mempool.h
new file mode 100644
index 0000000..0fe2f27
--- /dev/null
+++ b/src/basic/mempool.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+
+struct pool;
+
+struct mempool {
+ struct pool *first_pool;
+ void *freelist;
+ size_t tile_size;
+ unsigned at_least;
+};
+
+void* mempool_alloc_tile(struct mempool *mp);
+void* mempool_alloc0_tile(struct mempool *mp);
+void mempool_free_tile(struct mempool *mp, void *p);
+
+#define DEFINE_MEMPOOL(pool_name, tile_type, alloc_at_least) \
+static struct mempool pool_name = { \
+ .tile_size = sizeof(tile_type), \
+ .at_least = alloc_at_least, \
+}
+
+extern const bool mempool_use_allowed;
+bool mempool_enabled(void);
+
+#if VALGRIND
+void mempool_drop(struct mempool *mp);
+#endif
diff --git a/src/basic/meson.build b/src/basic/meson.build
new file mode 100644
index 0000000..1183ea8
--- /dev/null
+++ b/src/basic/meson.build
@@ -0,0 +1,350 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+basic_sources = files('''
+ MurmurHash2.c
+ MurmurHash2.h
+ af-list.c
+ af-list.h
+ alloc-util.c
+ alloc-util.h
+ architecture.c
+ architecture.h
+ arphrd-list.c
+ arphrd-list.h
+ async.c
+ async.h
+ audit-util.c
+ audit-util.h
+ blockdev-util.c
+ blockdev-util.h
+ btrfs-util.c
+ btrfs-util.h
+ build.h
+ bus-label.c
+ bus-label.h
+ cap-list.c
+ cap-list.h
+ capability-util.c
+ capability-util.h
+ cgroup-util.c
+ cgroup-util.h
+ chattr-util.c
+ chattr-util.h
+ conf-files.c
+ conf-files.h
+ copy.c
+ copy.h
+ def.h
+ device-nodes.c
+ device-nodes.h
+ dirent-util.c
+ dirent-util.h
+ dlfcn-util.c
+ dlfcn-util.h
+ efivars.c
+ efivars.h
+ env-file.c
+ env-file.h
+ env-util.c
+ env-util.h
+ errno-list.c
+ errno-list.h
+ errno-util.h
+ escape.c
+ escape.h
+ ether-addr-util.c
+ ether-addr-util.h
+ extract-word.c
+ extract-word.h
+ fd-util.c
+ fd-util.h
+ fileio.c
+ fileio.h
+ format-util.c
+ format-util.h
+ fs-util.c
+ fs-util.h
+ glob-util.c
+ glob-util.h
+ gunicode.c
+ gunicode.h
+ hash-funcs.c
+ hash-funcs.h
+ hashmap.c
+ hashmap.h
+ hexdecoct.c
+ hexdecoct.h
+ hostname-util.c
+ hostname-util.h
+ in-addr-util.c
+ in-addr-util.h
+ io-util.c
+ io-util.h
+ ioprio.h
+ kbd-util.c
+ kbd-util.h
+ khash.c
+ khash.h
+ label.c
+ label.h
+ limits-util.c
+ limits-util.h
+ linux/btrfs.h
+ linux/btrfs_tree.h
+ linux/can/netlink.h
+ linux/can/vxcan.h
+ linux/fib_rules.h
+ linux/fou.h
+ linux/hdlc/ioctl.h
+ linux/if.h
+ linux/if_addr.h
+ linux/if_arp.h
+ linux/if_bonding.h
+ linux/if_bridge.h
+ linux/if_ether.h
+ linux/if_link.h
+ linux/if_macsec.h
+ linux/if_tun.h
+ linux/if_tunnel.h
+ linux/in.h
+ linux/in6.h
+ linux/ipv6_route.h
+ linux/l2tp.h
+ linux/libc-compat.h
+ linux/loadavg.h
+ linux/netdevice.h
+ linux/netlink.h
+ linux/rtnetlink.h
+ linux/wireguard.h
+ list.h
+ locale-util.c
+ locale-util.h
+ log.c
+ log.h
+ login-util.c
+ login-util.h
+ macro.h
+ memfd-util.c
+ memfd-util.h
+ memory-util.c
+ memory-util.h
+ mempool.c
+ mempool.h
+ missing_audit.h
+ missing_capability.h
+ missing_drm.h
+ missing_fcntl.h
+ missing_fs.h
+ missing_input.h
+ missing_keyctl.h
+ missing_magic.h
+ missing_mman.h
+ missing_network.h
+ missing_prctl.h
+ missing_random.h
+ missing_resource.h
+ missing_sched.h
+ missing_securebits.h
+ missing_socket.h
+ missing_stat.h
+ missing_stdlib.h
+ missing_syscall.h
+ missing_timerfd.h
+ missing_type.h
+ mkdir-label.c
+ mkdir.c
+ mkdir.h
+ mountpoint-util.c
+ mountpoint-util.h
+ namespace-util.c
+ namespace-util.h
+ nss-util.h
+ nulstr-util.c
+ nulstr-util.h
+ ordered-set.c
+ ordered-set.h
+ parse-util.c
+ parse-util.h
+ path-lookup.c
+ path-lookup.h
+ path-util.c
+ path-util.h
+ prioq.c
+ prioq.h
+ proc-cmdline.c
+ proc-cmdline.h
+ process-util.c
+ process-util.h
+ procfs-util.c
+ procfs-util.h
+ pthread-util.h
+ quota-util.c
+ quota-util.h
+ random-util.c
+ random-util.h
+ ratelimit.c
+ ratelimit.h
+ raw-clone.h
+ raw-reboot.h
+ replace-var.c
+ replace-var.h
+ rlimit-util.c
+ rlimit-util.h
+ rm-rf.c
+ rm-rf.h
+ selinux-util.c
+ selinux-util.h
+ set.h
+ sigbus.c
+ sigbus.h
+ signal-util.c
+ signal-util.h
+ siphash24.c
+ siphash24.h
+ smack-util.c
+ smack-util.h
+ socket-label.c
+ socket-util.c
+ socket-util.h
+ sort-util.c
+ sort-util.h
+ sparse-endian.h
+ special.h
+ stat-util.c
+ stat-util.h
+ static-destruct.h
+ stdio-util.h
+ strbuf.c
+ strbuf.h
+ string-table.c
+ string-table.h
+ string-util.c
+ string-util.h
+ strv.c
+ strv.h
+ strxcpyx.c
+ strxcpyx.h
+ syslog-util.c
+ syslog-util.h
+ terminal-util.c
+ terminal-util.h
+ time-util.c
+ time-util.h
+ tmpfile-util.c
+ tmpfile-util.h
+ umask-util.h
+ unaligned.h
+ unit-def.c
+ unit-def.h
+ unit-name.c
+ unit-name.h
+ user-util.c
+ user-util.h
+ utf8.c
+ utf8.h
+ util.c
+ util.h
+ virt.c
+ virt.h
+ xattr-util.c
+ xattr-util.h
+'''.split())
+
+missing_audit_h = files('missing_audit.h')
+missing_capability_h = files('missing_capability.h')
+missing_socket_h = files('missing_socket.h')
+
+generate_af_list = find_program('generate-af-list.sh')
+af_list_txt = custom_target(
+ 'af-list.txt',
+ output : 'af-list.txt',
+ command : [generate_af_list, cpp, config_h, missing_socket_h],
+ capture : true)
+
+generate_arphrd_list = find_program('generate-arphrd-list.sh')
+arphrd_list_txt = custom_target(
+ 'arphrd-list.txt',
+ output : 'arphrd-list.txt',
+ command : [generate_arphrd_list, cpp, config_h],
+ capture : true)
+
+generate_cap_list = find_program('generate-cap-list.sh')
+cap_list_txt = custom_target(
+ 'cap-list.txt',
+ output : 'cap-list.txt',
+ command : [generate_cap_list, cpp, config_h, missing_capability_h],
+ capture : true)
+
+generate_errno_list = find_program('generate-errno-list.sh')
+errno_list_txt = custom_target(
+ 'errno-list.txt',
+ output : 'errno-list.txt',
+ command : [generate_errno_list, cpp],
+ capture : true)
+
+generated_gperf_headers = []
+foreach item : [['af', af_list_txt, 'af', ''],
+ ['arphrd', arphrd_list_txt, 'arphrd', 'ARPHRD_'],
+ ['cap', cap_list_txt, 'capability', ''],
+ ['errno', errno_list_txt, 'errno', '']]
+
+ fname = '@0@-from-name.gperf'.format(item[0])
+ gperf_file = custom_target(
+ fname,
+ input : item[1],
+ output : fname,
+ command : [generate_gperfs, item[2], item[3], '@INPUT@'],
+ capture : true)
+
+ fname = '@0@-from-name.h'.format(item[0])
+ target1 = custom_target(
+ fname,
+ input : gperf_file,
+ output : fname,
+ command : [gperf,
+ '-L', 'ANSI-C', '-t', '--ignore-case',
+ '-N', 'lookup_@0@'.format(item[2]),
+ '-H', 'hash_@0@_name'.format(item[2]),
+ '-p', '-C',
+ '@INPUT@'],
+ capture : true)
+
+ fname = '@0@-to-name.h'.format(item[0])
+ awkscript = '@0@-to-name.awk'.format(item[0])
+ target2 = custom_target(
+ fname,
+ input : [awkscript, item[1]],
+ output : fname,
+ command : [awk, '-f', '@INPUT0@', '@INPUT1@'],
+ capture : true)
+
+ generated_gperf_headers += [target1, target2]
+endforeach
+
+basic_sources += generated_gperf_headers
+basic_gcrypt_sources = files(
+ 'gcrypt-util.c',
+ 'gcrypt-util.h')
+
+libbasic = static_library(
+ 'basic',
+ basic_sources,
+ include_directories : includes,
+ dependencies : [versiondep,
+ threads,
+ libcap,
+ libseccomp,
+ libselinux,
+ libm,
+ libdl],
+ c_args : ['-fvisibility=default'],
+ install : false)
+
+# A convenience library that is separate from libbasic to avoid
+# unnecessary linking to libgcrypt.
+libbasic_gcrypt = static_library(
+ 'basic-gcrypt',
+ basic_gcrypt_sources,
+ include_directories : includes,
+ dependencies : [libgcrypt],
+ c_args : ['-fvisibility=default'])
diff --git a/src/basic/missing_audit.h b/src/basic/missing_audit.h
new file mode 100644
index 0000000..62e3c29
--- /dev/null
+++ b/src/basic/missing_audit.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/audit.h>
+
+#if HAVE_AUDIT
+#include <libaudit.h>
+#endif
+
+#ifndef AUDIT_SERVICE_START
+#define AUDIT_SERVICE_START 1130 /* Service (daemon) start */
+#endif
+
+#ifndef AUDIT_SERVICE_STOP
+#define AUDIT_SERVICE_STOP 1131 /* Service (daemon) stop */
+#endif
+
+#ifndef MAX_AUDIT_MESSAGE_LENGTH
+#define MAX_AUDIT_MESSAGE_LENGTH 8970
+#endif
+
+#ifndef AUDIT_NLGRP_MAX
+#define AUDIT_NLGRP_READLOG 1
+#endif
diff --git a/src/basic/missing_capability.h b/src/basic/missing_capability.h
new file mode 100644
index 0000000..4cf31cb
--- /dev/null
+++ b/src/basic/missing_capability.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/capability.h>
+
+/* 3a101b8de0d39403b2c7e5c23fd0b005668acf48 (3.16) */
+#ifndef CAP_AUDIT_READ
+# define CAP_AUDIT_READ 37
+#endif
+
+/* 980737282232b752bb14dab96d77665c15889c36 (5.8) */
+#ifndef CAP_PERFMON
+# define CAP_PERFMON 38
+#endif
+
+/* a17b53c4a4b55ec322c132b6670743612229ee9c (5.8) */
+#ifndef CAP_BPF
+# define CAP_BPF 39
+#endif
+
+/* 124ea650d3072b005457faed69909221c2905a1f (5.9) */
+#ifndef CAP_CHECKPOINT_RESTORE
+# define CAP_CHECKPOINT_RESTORE 40
+#endif
+
+#define SYSTEMD_CAP_LAST_CAP CAP_CHECKPOINT_RESTORE
+
+#ifdef CAP_LAST_CAP
+# if CAP_LAST_CAP > SYSTEMD_CAP_LAST_CAP
+# if BUILD_MODE == BUILD_MODE_DEVELOPER && defined(TEST_CAPABILITY_C)
+# warning "The capability list here is outdated"
+# endif
+# else
+# undef CAP_LAST_CAP
+# endif
+#endif
+#ifndef CAP_LAST_CAP
+# define CAP_LAST_CAP SYSTEMD_CAP_LAST_CAP
+#endif
diff --git a/src/basic/missing_drm.h b/src/basic/missing_drm.h
new file mode 100644
index 0000000..0dec591
--- /dev/null
+++ b/src/basic/missing_drm.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#ifndef DRM_IOCTL_SET_MASTER
+#define DRM_IOCTL_SET_MASTER _IO('d', 0x1e)
+#endif
+
+#ifndef DRM_IOCTL_DROP_MASTER
+#define DRM_IOCTL_DROP_MASTER _IO('d', 0x1f)
+#endif
diff --git a/src/basic/missing_fcntl.h b/src/basic/missing_fcntl.h
new file mode 100644
index 0000000..00937d2
--- /dev/null
+++ b/src/basic/missing_fcntl.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <fcntl.h>
+
+#ifndef F_LINUX_SPECIFIC_BASE
+#define F_LINUX_SPECIFIC_BASE 1024
+#endif
+
+#ifndef F_SETPIPE_SZ
+#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7)
+#endif
+
+#ifndef F_GETPIPE_SZ
+#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8)
+#endif
+
+#ifndef F_ADD_SEALS
+#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
+#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
+
+#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
+#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
+#define F_SEAL_GROW 0x0004 /* prevent file from growing */
+#define F_SEAL_WRITE 0x0008 /* prevent writes */
+#endif
+
+#ifndef F_OFD_GETLK
+#define F_OFD_GETLK 36
+#define F_OFD_SETLK 37
+#define F_OFD_SETLKW 38
+#endif
+
+#ifndef MAX_HANDLE_SZ
+#define MAX_HANDLE_SZ 128
+#endif
+
+/* The precise definition of __O_TMPFILE is arch specific; use the
+ * values defined by the kernel (note: some are hexa, some are octal,
+ * duplicated as-is from the kernel definitions):
+ * - alpha, parisc, sparc: each has a specific value;
+ * - others: they use the "generic" value.
+ */
+
+#ifndef __O_TMPFILE
+#if defined(__alpha__)
+#define __O_TMPFILE 0100000000
+#elif defined(__parisc__) || defined(__hppa__)
+#define __O_TMPFILE 0400000000
+#elif defined(__sparc__) || defined(__sparc64__)
+#define __O_TMPFILE 0x2000000
+#else
+#define __O_TMPFILE 020000000
+#endif
+#endif
+
+/* a horrid kludge trying to make sure that this will fail on old kernels */
+#ifndef O_TMPFILE
+#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
+#endif
diff --git a/src/basic/missing_fs.h b/src/basic/missing_fs.h
new file mode 100644
index 0000000..79c1620
--- /dev/null
+++ b/src/basic/missing_fs.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/* linux/fs.h */
+#ifndef RENAME_NOREPLACE /* 0a7c3937a1f23f8cb5fc77ae01661e9968a51d0c (3.15) */
+#define RENAME_NOREPLACE (1 << 0)
+#endif
+
+/* linux/fs.h or sys/mount.h */
+#ifndef MS_MOVE
+#define MS_MOVE 8192
+#endif
+
+#ifndef MS_REC
+#define MS_REC 16384
+#endif
+
+#ifndef MS_PRIVATE
+#define MS_PRIVATE (1<<18)
+#endif
+
+#ifndef MS_SLAVE
+#define MS_SLAVE (1<<19)
+#endif
+
+#ifndef MS_SHARED
+#define MS_SHARED (1<<20)
+#endif
+
+#ifndef MS_RELATIME
+#define MS_RELATIME (1<<21)
+#endif
+
+#ifndef MS_KERNMOUNT
+#define MS_KERNMOUNT (1<<22)
+#endif
+
+#ifndef MS_I_VERSION
+#define MS_I_VERSION (1<<23)
+#endif
+
+#ifndef MS_STRICTATIME
+#define MS_STRICTATIME (1<<24)
+#endif
+
+#ifndef MS_LAZYTIME
+#define MS_LAZYTIME (1<<25)
+#endif
+
+/* Not exposed yet. Defined at fs/ext4/ext4.h */
+#ifndef EXT4_IOC_RESIZE_FS
+#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
+#endif
+
+/* Not exposed yet. Defined at fs/cifs/cifsglob.h */
+#ifndef CIFS_MAGIC_NUMBER
+#define CIFS_MAGIC_NUMBER 0xFF534D42
+#endif
+
+/* linux/nsfs.h */
+#ifndef NS_GET_NSTYPE /* d95fa3c76a66b6d76b1e109ea505c55e66360f3c (4.11) */
+#define NS_GET_NSTYPE _IO(0xb7, 0x3)
+#endif
+
+#ifndef FS_PROJINHERIT_FL
+#define FS_PROJINHERIT_FL 0x20000000
+#endif
diff --git a/src/basic/missing_input.h b/src/basic/missing_input.h
new file mode 100644
index 0000000..6cf16ff
--- /dev/null
+++ b/src/basic/missing_input.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/input.h>
+#include <linux/types.h>
+
+/* linux@c7dc65737c9a607d3e6f8478659876074ad129b8 (3.12) */
+#ifndef EVIOCREVOKE
+#define EVIOCREVOKE _IOW('E', 0x91, int)
+#endif
+
+/* linux@06a16293f71927f756dcf37558a79c0b05a91641 (4.4) */
+#ifndef EVIOCSMASK
+struct input_mask {
+ __u32 type;
+ __u32 codes_size;
+ __u64 codes_ptr;
+};
+
+#define EVIOCGMASK _IOR('E', 0x92, struct input_mask)
+#define EVIOCSMASK _IOW('E', 0x93, struct input_mask)
+#endif
+
+/* linux@7611392fe8ff95ecae528b01a815ae3d72ca6b95 (3.17) */
+#ifndef INPUT_PROP_POINTING_STICK
+#define INPUT_PROP_POINTING_STICK 0x05
+#endif
+
+/* linux@500d4160abe9a2e88b12e319c13ae3ebd1e18108 (4.0) */
+#ifndef INPUT_PROP_ACCELEROMETER
+#define INPUT_PROP_ACCELEROMETER 0x06
+#endif
+
+/* linux@d09bbfd2a8408a995419dff0d2ba906013cf4cc9 (3.11) */
+#ifndef BTN_DPAD_UP
+#define BTN_DPAD_UP 0x220
+#define BTN_DPAD_DOWN 0x221
+#define BTN_DPAD_LEFT 0x222
+#define BTN_DPAD_RIGHT 0x223
+#endif
+
+/* linux@358f24704f2f016af7d504b357cdf32606091d07 (3.13) */
+#ifndef KEY_ALS_TOGGLE
+#define KEY_ALS_TOGGLE 0x230
+#endif
diff --git a/src/basic/missing_keyctl.h b/src/basic/missing_keyctl.h
new file mode 100644
index 0000000..5680483
--- /dev/null
+++ b/src/basic/missing_keyctl.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/keyctl.h>
+
+#ifndef KEYCTL_JOIN_SESSION_KEYRING
+#define KEYCTL_JOIN_SESSION_KEYRING 1
+#endif
+
+#ifndef KEYCTL_CHOWN
+#define KEYCTL_CHOWN 4
+#endif
+
+#ifndef KEYCTL_SETPERM
+#define KEYCTL_SETPERM 5
+#endif
+
+#ifndef KEYCTL_DESCRIBE
+#define KEYCTL_DESCRIBE 6
+#endif
+
+#ifndef KEYCTL_LINK
+#define KEYCTL_LINK 8
+#endif
+
+#ifndef KEYCTL_READ
+#define KEYCTL_READ 11
+#endif
+
+#ifndef KEYCTL_SET_TIMEOUT
+#define KEYCTL_SET_TIMEOUT 15
+#endif
+
+#ifndef KEY_SPEC_USER_KEYRING
+#define KEY_SPEC_USER_KEYRING -4
+#endif
+
+#ifndef KEY_SPEC_SESSION_KEYRING
+#define KEY_SPEC_SESSION_KEYRING -3
+#endif
+
+/* From linux/key.h */
+#ifndef KEY_POS_VIEW
+
+typedef int32_t key_serial_t;
+
+#define KEY_POS_VIEW 0x01000000
+#define KEY_POS_READ 0x02000000
+#define KEY_POS_WRITE 0x04000000
+#define KEY_POS_SEARCH 0x08000000
+#define KEY_POS_LINK 0x10000000
+#define KEY_POS_SETATTR 0x20000000
+#define KEY_POS_ALL 0x3f000000
+
+#define KEY_USR_VIEW 0x00010000
+#define KEY_USR_READ 0x00020000
+#define KEY_USR_WRITE 0x00040000
+#define KEY_USR_SEARCH 0x00080000
+#define KEY_USR_LINK 0x00100000
+#define KEY_USR_SETATTR 0x00200000
+#define KEY_USR_ALL 0x003f0000
+
+#define KEY_GRP_VIEW 0x00000100
+#define KEY_GRP_READ 0x00000200
+#define KEY_GRP_WRITE 0x00000400
+#define KEY_GRP_SEARCH 0x00000800
+#define KEY_GRP_LINK 0x00001000
+#define KEY_GRP_SETATTR 0x00002000
+#define KEY_GRP_ALL 0x00003f00
+
+#define KEY_OTH_VIEW 0x00000001
+#define KEY_OTH_READ 0x00000002
+#define KEY_OTH_WRITE 0x00000004
+#define KEY_OTH_SEARCH 0x00000008
+#define KEY_OTH_LINK 0x00000010
+#define KEY_OTH_SETATTR 0x00000020
+#define KEY_OTH_ALL 0x0000003f
+#endif
diff --git a/src/basic/missing_loop.h b/src/basic/missing_loop.h
new file mode 100644
index 0000000..b22ebda
--- /dev/null
+++ b/src/basic/missing_loop.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/loop.h>
+
+#ifndef LOOP_CONFIGURE
+struct loop_config {
+ __u32 fd;
+ __u32 block_size;
+ struct loop_info64 info;
+ __u64 __reserved[8];
+};
+
+#define LOOP_CONFIGURE 0x4C0A
+#endif
diff --git a/src/basic/missing_magic.h b/src/basic/missing_magic.h
new file mode 100644
index 0000000..e7466cb
--- /dev/null
+++ b/src/basic/missing_magic.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/magic.h>
+
+/* 62aa81d7c4c24b90fdb61da70ac0dbbc414f9939 (4.13) */
+#ifndef OCFS2_SUPER_MAGIC
+#define OCFS2_SUPER_MAGIC 0x7461636f
+#endif
+
+/* 67e9c74b8a873408c27ac9a8e4c1d1c8d72c93ff (4.5) */
+#ifndef CGROUP2_SUPER_MAGIC
+#define CGROUP2_SUPER_MAGIC 0x63677270
+#endif
+
+/* 4282d60689d4f21b40692029080440cc58e8a17d (4.1) */
+#ifndef TRACEFS_MAGIC
+#define TRACEFS_MAGIC 0x74726163
+#endif
+
+/* e149ed2b805fefdccf7ccdfc19eca22fdd4514ac (3.19) */
+#ifndef NSFS_MAGIC
+#define NSFS_MAGIC 0x6e736673
+#endif
+
+/* b2197755b2633e164a439682fb05a9b5ea48f706 (4.4) */
+#ifndef BPF_FS_MAGIC
+#define BPF_FS_MAGIC 0xcafe4a11
+#endif
+
+/* Not exposed yet (4.20). Defined at ipc/mqueue.c */
+#ifndef MQUEUE_MAGIC
+#define MQUEUE_MAGIC 0x19800202
+#endif
+
+/* Not exposed yet (as of Linux 5.4). Defined in fs/xfs/libxfs/xfs_format.h */
+#ifndef XFS_SB_MAGIC
+#define XFS_SB_MAGIC 0x58465342
+#endif
diff --git a/src/basic/missing_mman.h b/src/basic/missing_mman.h
new file mode 100644
index 0000000..4a10912
--- /dev/null
+++ b/src/basic/missing_mman.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/mman.h>
+
+#ifndef MFD_ALLOW_SEALING
+#define MFD_ALLOW_SEALING 0x0002U
+#endif
+
+#ifndef MFD_CLOEXEC
+#define MFD_CLOEXEC 0x0001U
+#endif
diff --git a/src/basic/missing_network.h b/src/basic/missing_network.h
new file mode 100644
index 0000000..f9db690
--- /dev/null
+++ b/src/basic/missing_network.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/* linux/in6.h or netinet/in.h */
+#ifndef IPV6_UNICAST_IF
+#define IPV6_UNICAST_IF 76
+#endif
+
+/* linux/in6.h or netinet/in.h */
+#ifndef IPV6_TRANSPARENT
+#define IPV6_TRANSPARENT 75
+#endif
+
+/* Not exposed but defined at include/net/ip.h */
+#ifndef IPV4_MIN_MTU
+#define IPV4_MIN_MTU 68
+#endif
+
+/* linux/ipv6.h */
+#ifndef IPV6_MIN_MTU
+#define IPV6_MIN_MTU 1280
+#endif
+
+/* Note that LOOPBACK_IFINDEX is currently not exposed by the
+ * kernel/glibc, but hardcoded internally by the kernel. However, as
+ * it is exported to userspace indirectly via rtnetlink and the
+ * ioctls, and made use of widely we define it here too, in a way that
+ * is compatible with the kernel's internal definition. */
+#ifndef LOOPBACK_IFINDEX
+#define LOOPBACK_IFINDEX 1
+#endif
+
+/* Not exposed yet. Similar values are defined in net/ethernet.h */
+#ifndef ETHERTYPE_LLDP
+#define ETHERTYPE_LLDP 0x88cc
+#endif
diff --git a/src/basic/missing_prctl.h b/src/basic/missing_prctl.h
new file mode 100644
index 0000000..ab85130
--- /dev/null
+++ b/src/basic/missing_prctl.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/prctl.h>
+
+/* 58319057b7847667f0c9585b9de0e8932b0fdb08 (4.3) */
+#ifndef PR_CAP_AMBIENT
+#define PR_CAP_AMBIENT 47
+
+#define PR_CAP_AMBIENT_IS_SET 1
+#define PR_CAP_AMBIENT_RAISE 2
+#define PR_CAP_AMBIENT_LOWER 3
+#define PR_CAP_AMBIENT_CLEAR_ALL 4
+#endif
diff --git a/src/basic/missing_random.h b/src/basic/missing_random.h
new file mode 100644
index 0000000..443b913
--- /dev/null
+++ b/src/basic/missing_random.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#if USE_SYS_RANDOM_H
+# include <sys/random.h>
+#else
+# include <linux/random.h>
+#endif
+
+#ifndef GRND_NONBLOCK
+#define GRND_NONBLOCK 0x0001
+#endif
+
+#ifndef GRND_RANDOM
+#define GRND_RANDOM 0x0002
+#endif
+
+#ifndef GRND_INSECURE
+#define GRND_INSECURE 0x0004
+#endif
diff --git a/src/basic/missing_resource.h b/src/basic/missing_resource.h
new file mode 100644
index 0000000..6e76765
--- /dev/null
+++ b/src/basic/missing_resource.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/resource.h>
+
+#ifndef RLIMIT_RTTIME
+#define RLIMIT_RTTIME 15
+#endif
+
+/* If RLIMIT_RTTIME is not defined, then we cannot use RLIMIT_NLIMITS as is */
+#define _RLIMIT_MAX (RLIMIT_RTTIME+1 > RLIMIT_NLIMITS ? RLIMIT_RTTIME+1 : RLIMIT_NLIMITS)
diff --git a/src/basic/missing_sched.h b/src/basic/missing_sched.h
new file mode 100644
index 0000000..6a889f6
--- /dev/null
+++ b/src/basic/missing_sched.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sched.h>
+
+#ifndef CLONE_NEWCGROUP
+#define CLONE_NEWCGROUP 0x02000000
+#endif
+
+/* Not exposed yet. Defined at include/linux/sched.h */
+#ifndef PF_KTHREAD
+#define PF_KTHREAD 0x00200000
+#endif
+
+/* The maximum thread/process name length including trailing NUL byte. This mimics the kernel definition of the same
+ * name, which we need in userspace at various places but is not defined in userspace currently, neither under this
+ * name nor any other. */
+/* Not exposed yet. Defined at include/linux/sched.h */
+#ifndef TASK_COMM_LEN
+#define TASK_COMM_LEN 16
+#endif
diff --git a/src/basic/missing_securebits.h b/src/basic/missing_securebits.h
new file mode 100644
index 0000000..40d6ec9
--- /dev/null
+++ b/src/basic/missing_securebits.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <linux/securebits.h>
+
+/* 746bf6d64275be0c65b0631d8a72b16f1454cfa1 (4.3) */
+#ifndef SECURE_NO_CAP_AMBIENT_RAISE
+#define SECURE_NO_CAP_AMBIENT_RAISE 6
+#define SECURE_NO_CAP_AMBIENT_RAISE_LOCKED 7 /* make bit-6 immutable */
+#define SECBIT_NO_CAP_AMBIENT_RAISE (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE))
+#define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE_LOCKED))
+
+#undef SECURE_ALL_BITS
+#define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \
+ issecure_mask(SECURE_NO_SETUID_FIXUP) | \
+ issecure_mask(SECURE_KEEP_CAPS) | \
+ issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE))
+#endif
diff --git a/src/basic/missing_socket.h b/src/basic/missing_socket.h
new file mode 100644
index 0000000..17bc1a5
--- /dev/null
+++ b/src/basic/missing_socket.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/socket.h>
+
+#if HAVE_LINUX_VM_SOCKETS_H
+#include <linux/vm_sockets.h>
+#else
+#define VMADDR_CID_ANY -1U
+struct sockaddr_vm {
+ unsigned short svm_family;
+ unsigned short svm_reserved1;
+ unsigned int svm_port;
+ unsigned int svm_cid;
+ unsigned char svm_zero[sizeof(struct sockaddr) -
+ sizeof(unsigned short) -
+ sizeof(unsigned short) -
+ sizeof(unsigned int) -
+ sizeof(unsigned int)];
+};
+#endif /* !HAVE_LINUX_VM_SOCKETS_H */
+
+#ifndef AF_VSOCK
+#define AF_VSOCK 40
+#endif
+
+#ifndef SO_REUSEPORT
+#define SO_REUSEPORT 15
+#endif
+
+#ifndef SO_PEERGROUPS
+#define SO_PEERGROUPS 59
+#endif
+
+#ifndef SO_BINDTOIFINDEX
+#define SO_BINDTOIFINDEX 62
+#endif
+
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
+#ifndef SOL_ALG
+#define SOL_ALG 279
+#endif
+
+/* Not exposed yet. Defined in include/linux/socket.h. */
+#ifndef SOL_SCTP
+#define SOL_SCTP 132
+#endif
+
+/* Not exposed yet. Defined in include/linux/socket.h */
+#ifndef SCM_SECURITY
+#define SCM_SECURITY 0x03
+#endif
+
+/* netinet/in.h */
+#ifndef IP_FREEBIND
+#define IP_FREEBIND 15
+#endif
+
+#ifndef IP_TRANSPARENT
+#define IP_TRANSPARENT 19
+#endif
+
+#ifndef IPV6_FREEBIND
+#define IPV6_FREEBIND 78
+#endif
+
+/* linux/sockios.h */
+#ifndef SIOCGSKNS
+#define SIOCGSKNS 0x894C
+#endif
diff --git a/src/basic/missing_stat.h b/src/basic/missing_stat.h
new file mode 100644
index 0000000..372fdf9
--- /dev/null
+++ b/src/basic/missing_stat.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/types.h>
+#include <sys/stat.h>
+
+#if WANT_LINUX_STAT_H
+#include <linux/stat.h>
+#endif
+
+/* Thew newest definition we are aware of (fa2fcf4f1df1559a0a4ee0f46915b496cc2ebf60; 5.8) */
+#define STATX_DEFINITION { \
+ __u32 stx_mask; \
+ __u32 stx_blksize; \
+ __u64 stx_attributes; \
+ __u32 stx_nlink; \
+ __u32 stx_uid; \
+ __u32 stx_gid; \
+ __u16 stx_mode; \
+ __u16 __spare0[1]; \
+ __u64 stx_ino; \
+ __u64 stx_size; \
+ __u64 stx_blocks; \
+ __u64 stx_attributes_mask; \
+ struct statx_timestamp stx_atime; \
+ struct statx_timestamp stx_btime; \
+ struct statx_timestamp stx_ctime; \
+ struct statx_timestamp stx_mtime; \
+ __u32 stx_rdev_major; \
+ __u32 stx_rdev_minor; \
+ __u32 stx_dev_major; \
+ __u32 stx_dev_minor; \
+ __u64 stx_mnt_id; \
+ __u64 __spare2; \
+ __u64 __spare3[12]; \
+}
+
+#if !HAVE_STRUCT_STATX
+struct statx_timestamp {
+ __s64 tv_sec;
+ __u32 tv_nsec;
+ __s32 __reserved;
+};
+
+struct statx STATX_DEFINITION;
+#endif
+
+/* Always define the newest version we are aware of as a distinct type, so that we can use it even if glibc
+ * defines an older definition */
+struct new_statx STATX_DEFINITION;
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef AT_STATX_SYNC_AS_STAT
+#define AT_STATX_SYNC_AS_STAT 0x0000
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef AT_STATX_FORCE_SYNC
+#define AT_STATX_FORCE_SYNC 0x2000
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef AT_STATX_DONT_SYNC
+#define AT_STATX_DONT_SYNC 0x4000
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_TYPE
+#define STATX_TYPE 0x00000001U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_MODE
+#define STATX_MODE 0x00000002U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_NLINK
+#define STATX_NLINK 0x00000004U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_UID
+#define STATX_UID 0x00000008U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_GID
+#define STATX_GID 0x00000010U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_ATIME
+#define STATX_ATIME 0x00000020U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_MTIME
+#define STATX_MTIME 0x00000040U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_CTIME
+#define STATX_CTIME 0x00000080U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_INO
+#define STATX_INO 0x00000100U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_SIZE
+#define STATX_SIZE 0x00000200U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_BLOCKS
+#define STATX_BLOCKS 0x00000400U
+#endif
+
+/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
+#ifndef STATX_BTIME
+#define STATX_BTIME 0x00000800U
+#endif
+
+/* fa2fcf4f1df1559a0a4ee0f46915b496cc2ebf60 (5.8) */
+#ifndef STATX_MNT_ID
+#define STATX_MNT_ID 0x00001000U
+#endif
+
+/* 80340fe3605c0e78cfe496c3b3878be828cfdbfe (5.8) */
+#ifndef STATX_ATTR_MOUNT_ROOT
+#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */
+#endif
diff --git a/src/basic/missing_stdlib.h b/src/basic/missing_stdlib.h
new file mode 100644
index 0000000..8c76f93
--- /dev/null
+++ b/src/basic/missing_stdlib.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdlib.h>
+
+/* stdlib.h */
+#if !HAVE_SECURE_GETENV
+# if HAVE___SECURE_GETENV
+# define secure_getenv __secure_getenv
+# else
+# error "neither secure_getenv nor __secure_getenv are available"
+# endif
+#endif
diff --git a/src/basic/missing_syscall.h b/src/basic/missing_syscall.h
new file mode 100644
index 0000000..0594a1b
--- /dev/null
+++ b/src/basic/missing_syscall.h
@@ -0,0 +1,812 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/* Missing glibc definitions to access certain kernel APIs */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#ifdef ARCH_MIPS
+#include <asm/sgidefs.h>
+#endif
+
+#if defined(__x86_64__) && defined(__ILP32__)
+# define systemd_SC_arch_bias(x) ((x) | /* __X32_SYSCALL_BIT */ 0x40000000)
+#elif defined(__ia64__)
+# define systemd_SC_arch_bias(x) (1024 + (x))
+#elif defined __alpha__
+# define systemd_SC_arch_bias(x) (110 + (x))
+#elif defined _MIPS_SIM
+# if _MIPS_SIM == _MIPS_SIM_ABI32
+# define systemd_SC_arch_bias(x) (4000 + (x))
+# elif _MIPS_SIM == _MIPS_SIM_NABI32
+# define systemd_SC_arch_bias(x) (6000 + (x))
+# elif _MIPS_SIM == _MIPS_SIM_ABI64
+# define systemd_SC_arch_bias(x) (5000 + (x))
+# else
+# error "Unknown MIPS ABI"
+# endif
+#else
+# define systemd_SC_arch_bias(x) (x)
+#endif
+
+#include "missing_keyctl.h"
+#include "missing_stat.h"
+
+/* linux/kcmp.h */
+#ifndef KCMP_FILE /* 3f4994cfc15f38a3159c6e3a4b3ab2e1481a6b02 (3.19) */
+#define KCMP_FILE 0
+#endif
+
+#if !HAVE_PIVOT_ROOT
+static inline int missing_pivot_root(const char *new_root, const char *put_old) {
+ return syscall(__NR_pivot_root, new_root, put_old);
+}
+
+# define pivot_root missing_pivot_root
+#endif
+
+/* ======================================================================= */
+
+#if defined __x86_64__
+# define systemd_NR_memfd_create systemd_SC_arch_bias(319)
+#elif defined __arm__
+# define systemd_NR_memfd_create 385
+#elif defined __aarch64__
+# define systemd_NR_memfd_create 279
+#elif defined __alpha__
+# define systemd_NR_memfd_create 512
+#elif defined(__powerpc__)
+# define systemd_NR_memfd_create 360
+#elif defined __s390__
+# define systemd_NR_memfd_create 350
+#elif defined _MIPS_SIM
+# if _MIPS_SIM == _MIPS_SIM_ABI32
+# define systemd_NR_memfd_create systemd_SC_arch_bias(354)
+# elif _MIPS_SIM == _MIPS_SIM_NABI32
+# define systemd_NR_memfd_create systemd_SC_arch_bias(318)
+# elif _MIPS_SIM == _MIPS_SIM_ABI64
+# define systemd_NR_memfd_create systemd_SC_arch_bias(314)
+# endif
+#elif defined __i386__
+# define systemd_NR_memfd_create 356
+#elif defined __arc__
+# define systemd_NR_memfd_create 279
+#else
+# warning "memfd_create() syscall number unknown for your architecture"
+#endif
+
+/* may be (invalid) negative number due to libseccomp, see PR 13319 */
+#if defined __NR_memfd_create && __NR_memfd_create >= 0
+# if defined systemd_NR_memfd_create
+assert_cc(__NR_memfd_create == systemd_NR_memfd_create);
+# endif
+#else
+# if defined __NR_memfd_create
+# undef __NR_memfd_create
+# endif
+# if defined systemd_NR_memfd_create
+# define __NR_memfd_create systemd_NR_memfd_create
+# endif
+#endif
+
+#if !HAVE_MEMFD_CREATE
+static inline int missing_memfd_create(const char *name, unsigned int flags) {
+# ifdef __NR_memfd_create
+ return syscall(__NR_memfd_create, name, flags);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define memfd_create missing_memfd_create
+#endif
+
+/* ======================================================================= */
+
+#if defined __x86_64__
+# define systemd_NR_getrandom systemd_SC_arch_bias(318)
+#elif defined(__i386__)
+# define systemd_NR_getrandom 355
+#elif defined(__arm__)
+# define systemd_NR_getrandom 384
+#elif defined(__aarch64__)
+# define systemd_NR_getrandom 278
+#elif defined(__alpha__)
+# define systemd_NR_getrandom 511
+#elif defined(__ia64__)
+# define systemd_NR_getrandom systemd_SC_arch_bias(318)
+#elif defined(__m68k__)
+# define systemd_NR_getrandom 352
+#elif defined(__s390x__)
+# define systemd_NR_getrandom 349
+#elif defined(__powerpc__)
+# define systemd_NR_getrandom 359
+#elif defined _MIPS_SIM
+# if _MIPS_SIM == _MIPS_SIM_ABI32
+# define systemd_NR_getrandom systemd_SC_arch_bias(353)
+# elif _MIPS_SIM == _MIPS_SIM_NABI32
+# define systemd_NR_getrandom systemd_SC_arch_bias(317)
+# elif _MIPS_SIM == _MIPS_SIM_ABI64
+# define systemd_NR_getrandom systemd_SC_arch_bias(313)
+# endif
+#elif defined(__arc__)
+# define systemd_NR_getrandom 278
+#else
+# warning "getrandom() syscall number unknown for your architecture"
+#endif
+
+/* may be (invalid) negative number due to libseccomp, see PR 13319 */
+#if defined __NR_getrandom && __NR_getrandom >= 0
+# if defined systemd_NR_getrandom
+assert_cc(__NR_getrandom == systemd_NR_getrandom);
+# endif
+#else
+# if defined __NR_getrandom
+# undef __NR_getrandom
+# endif
+# if defined systemd_NR_getrandom
+# define __NR_getrandom systemd_NR_getrandom
+# endif
+#endif
+
+#if !HAVE_GETRANDOM
+static inline int missing_getrandom(void *buffer, size_t count, unsigned flags) {
+# ifdef __NR_getrandom
+ return syscall(__NR_getrandom, buffer, count, flags);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define getrandom missing_getrandom
+#endif
+
+/* ======================================================================= */
+
+/* The syscall has been defined since forever, but the glibc wrapper was missing. */
+#if !HAVE_GETTID
+static inline pid_t missing_gettid(void) {
+# if defined __NR_gettid && __NR_gettid >= 0
+ return (pid_t) syscall(__NR_gettid);
+# else
+# error "__NR_gettid not defined"
+# endif
+}
+
+# define gettid missing_gettid
+#endif
+
+/* ======================================================================= */
+
+#if defined(__x86_64__)
+# define systemd_NR_name_to_handle_at systemd_SC_arch_bias(303)
+#elif defined(__i386__)
+# define systemd_NR_name_to_handle_at 341
+#elif defined(__arm__)
+# define systemd_NR_name_to_handle_at 370
+#elif defined __aarch64__
+# define systemd_NR_name_to_handle_at 264
+#elif defined(__alpha__)
+# define systemd_NR_name_to_handle_at 497
+#elif defined(__powerpc__)
+# define systemd_NR_name_to_handle_at 345
+#elif defined __s390__ || defined __s390x__
+# define systemd_NR_name_to_handle_at 335
+#elif defined(__arc__)
+# define systemd_NR_name_to_handle_at 264
+#elif defined _MIPS_SIM
+# if _MIPS_SIM == _MIPS_SIM_ABI32
+# define systemd_NR_name_to_handle_at systemd_SC_arch_bias(339)
+# elif _MIPS_SIM == _MIPS_SIM_NABI32
+# define systemd_NR_name_to_handle_at systemd_SC_arch_bias(303)
+# elif _MIPS_SIM == _MIPS_SIM_ABI64
+# define systemd_NR_name_to_handle_at systemd_SC_arch_bias(298)
+# endif
+#else
+# warning "name_to_handle_at number is not defined"
+#endif
+
+/* may be (invalid) negative number due to libseccomp, see PR 13319 */
+#if defined __NR_name_to_handle_at && __NR_name_to_handle_at >= 0
+# if defined systemd_NR_name_to_handle_at
+assert_cc(__NR_name_to_handle_at == systemd_NR_name_to_handle_at);
+# endif
+#else
+# if defined __NR_name_to_handle_at
+# undef __NR_name_to_handle_at
+# endif
+# if defined systemd_NR_name_to_handle_at
+# define __NR_name_to_handle_at systemd_NR_name_to_handle_at
+# endif
+#endif
+
+#if !HAVE_NAME_TO_HANDLE_AT
+struct file_handle {
+ unsigned int handle_bytes;
+ int handle_type;
+ unsigned char f_handle[0];
+};
+
+static inline int missing_name_to_handle_at(int fd, const char *name, struct file_handle *handle, int *mnt_id, int flags) {
+# ifdef __NR_name_to_handle_at
+ return syscall(__NR_name_to_handle_at, fd, name, handle, mnt_id, flags);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define name_to_handle_at missing_name_to_handle_at
+#endif
+
+/* ======================================================================= */
+
+#if defined __aarch64__
+# define systemd_NR_setns 268
+#elif defined __arm__
+# define systemd_NR_setns 375
+#elif defined __alpha__
+# define systemd_NR_setns 501
+#elif defined(__x86_64__)
+# define systemd_NR_setns systemd_SC_arch_bias(308)
+#elif defined(__i386__)
+# define systemd_NR_setns 346
+#elif defined(__powerpc__)
+# define systemd_NR_setns 350
+#elif defined __s390__ || defined __s390x__
+# define systemd_NR_setns 339
+#elif defined(__arc__)
+# define systemd_NR_setns 268
+#elif defined _MIPS_SIM
+# if _MIPS_SIM == _MIPS_SIM_ABI32
+# define systemd_NR_setns systemd_SC_arch_bias(344)
+# elif _MIPS_SIM == _MIPS_SIM_NABI32
+# define systemd_NR_setns systemd_SC_arch_bias(308)
+# elif _MIPS_SIM == _MIPS_SIM_ABI64
+# define systemd_NR_setns systemd_SC_arch_bias(303)
+# endif
+#else
+# warning "setns() syscall number unknown for your architecture"
+#endif
+
+/* may be (invalid) negative number due to libseccomp, see PR 13319 */
+#if defined __NR_setns && __NR_setns >= 0
+# if defined systemd_NR_setns
+assert_cc(__NR_setns == systemd_NR_setns);
+# endif
+#else
+# if defined __NR_setns
+# undef __NR_setns
+# endif
+# if defined systemd_NR_setns
+# define __NR_setns systemd_NR_setns
+# endif
+#endif
+
+#if !HAVE_SETNS
+static inline int missing_setns(int fd, int nstype) {
+# ifdef __NR_setns
+ return syscall(__NR_setns, fd, nstype);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define setns missing_setns
+#endif
+
+/* ======================================================================= */
+
+static inline pid_t raw_getpid(void) {
+#if defined(__alpha__)
+ return (pid_t) syscall(__NR_getxpid);
+#else
+ return (pid_t) syscall(__NR_getpid);
+#endif
+}
+
+/* ======================================================================= */
+
+#if defined __x86_64__
+# define systemd_NR_renameat2 systemd_SC_arch_bias(316)
+#elif defined __arm__
+# define systemd_NR_renameat2 382
+#elif defined __aarch64__
+# define systemd_NR_renameat2 276
+#elif defined __alpha__
+# define systemd_NR_renameat2 510
+#elif defined _MIPS_SIM
+# if _MIPS_SIM == _MIPS_SIM_ABI32
+# define systemd_NR_renameat2 systemd_SC_arch_bias(351)
+# elif _MIPS_SIM == _MIPS_SIM_NABI32
+# define systemd_NR_renameat2 systemd_SC_arch_bias(315)
+# elif _MIPS_SIM == _MIPS_SIM_ABI64
+# define systemd_NR_renameat2 systemd_SC_arch_bias(311)
+# endif
+#elif defined __i386__
+# define systemd_NR_renameat2 353
+#elif defined __powerpc64__
+# define systemd_NR_renameat2 357
+#elif defined __s390__ || defined __s390x__
+# define systemd_NR_renameat2 347
+#elif defined __arc__
+# define systemd_NR_renameat2 276
+#else
+# warning "renameat2() syscall number unknown for your architecture"
+#endif
+
+/* may be (invalid) negative number due to libseccomp, see PR 13319 */
+#if defined __NR_renameat2 && __NR_renameat2 >= 0
+# if defined systemd_NR_renameat2
+assert_cc(__NR_renameat2 == systemd_NR_renameat2);
+# endif
+#else
+# if defined __NR_renameat2
+# undef __NR_renameat2
+# endif
+# if defined systemd_NR_renameat2
+# define __NR_renameat2 systemd_NR_renameat2
+# endif
+#endif
+
+#if !HAVE_RENAMEAT2
+static inline int missing_renameat2(int oldfd, const char *oldname, int newfd, const char *newname, unsigned flags) {
+# ifdef __NR_renameat2
+ return syscall(__NR_renameat2, oldfd, oldname, newfd, newname, flags);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define renameat2 missing_renameat2
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_KCMP
+static inline int missing_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2) {
+# if defined __NR_kcmp && __NR_kcmp >= 0
+ return syscall(__NR_kcmp, pid1, pid2, type, idx1, idx2);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define kcmp missing_kcmp
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_KEYCTL
+static inline long missing_keyctl(int cmd, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) {
+# if defined __NR_keyctl && __NR_keyctl >= 0
+ return syscall(__NR_keyctl, cmd, arg2, arg3, arg4, arg5);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+
+# define keyctl missing_keyctl
+}
+
+static inline key_serial_t missing_add_key(const char *type, const char *description, const void *payload, size_t plen, key_serial_t ringid) {
+# if defined __NR_add_key && __NR_add_key >= 0
+ return syscall(__NR_add_key, type, description, payload, plen, ringid);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+
+# define add_key missing_add_key
+}
+
+static inline key_serial_t missing_request_key(const char *type, const char *description, const char * callout_info, key_serial_t destringid) {
+# if defined __NR_request_key && __NR_request_key >= 0
+ return syscall(__NR_request_key, type, description, callout_info, destringid);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+
+# define request_key missing_request_key
+}
+#endif
+
+/* ======================================================================= */
+
+#if defined(__x86_64__)
+# define systemd_NR_copy_file_range systemd_SC_arch_bias(326)
+#elif defined(__i386__)
+# define systemd_NR_copy_file_range 377
+#elif defined __s390__
+# define systemd_NR_copy_file_range 375
+#elif defined __arm__
+# define systemd_NR_copy_file_range 391
+#elif defined __aarch64__
+# define systemd_NR_copy_file_range 285
+#elif defined __alpha__
+# define systemd_NR_copy_file_range 519
+#elif defined __powerpc__
+# define systemd_NR_copy_file_range 379
+#elif defined __arc__
+# define systemd_NR_copy_file_range 285
+#elif defined _MIPS_SIM
+# if _MIPS_SIM == _MIPS_SIM_ABI32
+# define systemd_NR_copy_file_range systemd_SC_arch_bias(360)
+# elif _MIPS_SIM == _MIPS_SIM_NABI32
+# define systemd_NR_copy_file_range systemd_SC_arch_bias(324)
+# elif _MIPS_SIM == _MIPS_SIM_ABI64
+# define systemd_NR_copy_file_range systemd_SC_arch_bias(320)
+# endif
+#else
+# warning "copy_file_range() syscall number unknown for your architecture"
+#endif
+
+/* may be (invalid) negative number due to libseccomp, see PR 13319 */
+#if defined __NR_copy_file_range && __NR_copy_file_range >= 0
+# if defined systemd_NR_copy_file_range
+assert_cc(__NR_copy_file_range == systemd_NR_copy_file_range);
+# endif
+#else
+# if defined __NR_copy_file_range
+# undef __NR_copy_file_range
+# endif
+# if defined systemd_NR_copy_file_range
+# define __NR_copy_file_range systemd_NR_copy_file_range
+# endif
+#endif
+
+#if !HAVE_COPY_FILE_RANGE
+static inline ssize_t missing_copy_file_range(int fd_in, loff_t *off_in,
+ int fd_out, loff_t *off_out,
+ size_t len,
+ unsigned int flags) {
+# ifdef __NR_copy_file_range
+ return syscall(__NR_copy_file_range, fd_in, off_in, fd_out, off_out, len, flags);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define copy_file_range missing_copy_file_range
+#endif
+
+/* ======================================================================= */
+
+#if defined __i386__
+# define systemd_NR_bpf 357
+#elif defined __x86_64__
+# define systemd_NR_bpf systemd_SC_arch_bias(321)
+#elif defined __aarch64__
+# define systemd_NR_bpf 280
+#elif defined __arm__
+# define systemd_NR_bpf 386
+#elif defined __alpha__
+# define systemd_NR_bpf 515
+#elif defined(__powerpc__)
+# define systemd_NR_bpf 361
+#elif defined __sparc__
+# define systemd_NR_bpf 349
+#elif defined __s390__
+# define systemd_NR_bpf 351
+#elif defined __tilegx__
+# define systemd_NR_bpf 280
+#elif defined _MIPS_SIM
+# if _MIPS_SIM == _MIPS_SIM_ABI32
+# define systemd_NR_bpf systemd_SC_arch_bias(355)
+# elif _MIPS_SIM == _MIPS_SIM_NABI32
+# define systemd_NR_bpf systemd_SC_arch_bias(319)
+# elif _MIPS_SIM == _MIPS_SIM_ABI64
+# define systemd_NR_bpf systemd_SC_arch_bias(315)
+# endif
+#else
+# warning "bpf() syscall number unknown for your architecture"
+#endif
+
+/* may be (invalid) negative number due to libseccomp, see PR 13319 */
+#if defined __NR_bpf && __NR_bpf >= 0
+# if defined systemd_NR_bpf
+assert_cc(__NR_bpf == systemd_NR_bpf);
+# endif
+#else
+# if defined __NR_bpf
+# undef __NR_bpf
+# endif
+# if defined systemd_NR_bpf
+# define __NR_bpf systemd_NR_bpf
+# endif
+#endif
+
+#if !HAVE_BPF
+union bpf_attr;
+
+static inline int missing_bpf(int cmd, union bpf_attr *attr, size_t size) {
+#ifdef __NR_bpf
+ return (int) syscall(__NR_bpf, cmd, attr, size);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+# define bpf missing_bpf
+#endif
+
+/* ======================================================================= */
+
+#ifndef __IGNORE_pkey_mprotect
+# if defined __i386__
+# define systemd_NR_pkey_mprotect 380
+# elif defined __x86_64__
+# define systemd_NR_pkey_mprotect systemd_SC_arch_bias(329)
+# elif defined __aarch64__
+# define systemd_NR_pkey_mprotect 288
+# elif defined __arm__
+# define systemd_NR_pkey_mprotect 394
+# elif defined __alpha__
+# define systemd_NR_pkey_mprotect 524
+# elif defined __powerpc__
+# define systemd_NR_pkey_mprotect 386
+# elif defined __s390__
+# define systemd_NR_pkey_mprotect 384
+# elif defined _MIPS_SIM
+# if _MIPS_SIM == _MIPS_SIM_ABI32
+# define systemd_NR_pkey_mprotect systemd_SC_arch_bias(363)
+# elif _MIPS_SIM == _MIPS_SIM_NABI32
+# define systemd_NR_pkey_mprotect systemd_SC_arch_bias(327)
+# elif _MIPS_SIM == _MIPS_SIM_ABI64
+# define systemd_NR_pkey_mprotect systemd_SC_arch_bias(323)
+# endif
+# else
+# warning "pkey_mprotect() syscall number unknown for your architecture"
+# endif
+
+/* may be (invalid) negative number due to libseccomp, see PR 13319 */
+# if defined __NR_pkey_mprotect && __NR_pkey_mprotect >= 0
+# if defined systemd_NR_pkey_mprotect
+assert_cc(__NR_pkey_mprotect == systemd_NR_pkey_mprotect);
+# endif
+# else
+# if defined __NR_pkey_mprotect
+# undef __NR_pkey_mprotect
+# endif
+# if defined systemd_NR_pkey_mprotect
+# define __NR_pkey_mprotect systemd_NR_pkey_mprotect
+# endif
+# endif
+#endif
+
+/* ======================================================================= */
+
+#if defined __aarch64__
+# define systemd_NR_statx 291
+#elif defined __arm__
+# define systemd_NR_statx 397
+#elif defined __alpha__
+# define systemd_NR_statx 522
+#elif defined __i386__ || defined __powerpc64__
+# define systemd_NR_statx 383
+#elif defined __s390__ || defined __s390x__
+# define systemd_NR_statx 379
+#elif defined __sparc__
+# define systemd_NR_statx 360
+#elif defined __x86_64__
+# define systemd_NR_statx systemd_SC_arch_bias(332)
+#elif defined _MIPS_SIM
+# if _MIPS_SIM == _MIPS_SIM_ABI32
+# define systemd_NR_statx systemd_SC_arch_bias(366)
+# elif _MIPS_SIM == _MIPS_SIM_NABI32
+# define systemd_NR_statx systemd_SC_arch_bias(330)
+# elif _MIPS_SIM == _MIPS_SIM_ABI64
+# define systemd_NR_statx systemd_SC_arch_bias(326)
+# endif
+#else
+# warning "statx() syscall number unknown for your architecture"
+#endif
+
+/* may be (invalid) negative number due to libseccomp, see PR 13319 */
+#if defined __NR_statx && __NR_statx >= 0
+# if defined systemd_NR_statx
+assert_cc(__NR_statx == systemd_NR_statx);
+# endif
+#else
+# if defined __NR_statx
+# undef __NR_statx
+# endif
+# if defined systemd_NR_statx
+# define __NR_statx systemd_NR_statx
+# endif
+#endif
+
+#if !HAVE_STATX
+struct statx;
+
+static inline ssize_t missing_statx(int dfd, const char *filename, unsigned flags, unsigned int mask, struct statx *buffer) {
+# ifdef __NR_statx
+ return syscall(__NR_statx, dfd, filename, flags, mask, buffer);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+#endif
+
+/* This typedef is supposed to be always defined. */
+typedef struct statx struct_statx;
+
+#if !HAVE_STATX
+# define statx(dfd, filename, flags, mask, buffer) missing_statx(dfd, filename, flags, mask, buffer)
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_SET_MEMPOLICY
+enum {
+ MPOL_DEFAULT,
+ MPOL_PREFERRED,
+ MPOL_BIND,
+ MPOL_INTERLEAVE,
+ MPOL_LOCAL,
+};
+
+static inline long missing_set_mempolicy(int mode, const unsigned long *nodemask,
+ unsigned long maxnode) {
+ long i;
+# if defined __NR_set_mempolicy && __NR_set_mempolicy >= 0
+ i = syscall(__NR_set_mempolicy, mode, nodemask, maxnode);
+# else
+ errno = ENOSYS;
+ i = -1;
+# endif
+ return i;
+}
+
+# define set_mempolicy missing_set_mempolicy
+#endif
+
+#if !HAVE_GET_MEMPOLICY
+static inline long missing_get_mempolicy(int *mode, unsigned long *nodemask,
+ unsigned long maxnode, void *addr,
+ unsigned long flags) {
+ long i;
+# if defined __NR_get_mempolicy && __NR_get_mempolicy >= 0
+ i = syscall(__NR_get_mempolicy, mode, nodemask, maxnode, addr, flags);
+# else
+ errno = ENOSYS;
+ i = -1;
+# endif
+ return i;
+}
+
+# define get_mempolicy missing_get_mempolicy
+#endif
+
+/* ======================================================================= */
+
+/* should be always defined, see kernel 39036cd2727395c3369b1051005da74059a85317 */
+#define systemd_NR_pidfd_send_signal systemd_SC_arch_bias(424)
+
+/* may be (invalid) negative number due to libseccomp, see PR 13319 */
+#if defined __NR_pidfd_send_signal && __NR_pidfd_send_signal >= 0
+# if defined systemd_NR_pidfd_send_signal
+assert_cc(__NR_pidfd_send_signal == systemd_NR_pidfd_send_signal);
+# endif
+#else
+# if defined __NR_pidfd_send_signal
+# undef __NR_pidfd_send_signal
+# endif
+# define __NR_pidfd_send_signal systemd_NR_pidfd_send_signal
+#endif
+
+#if !HAVE_PIDFD_SEND_SIGNAL
+static inline int missing_pidfd_send_signal(int fd, int sig, siginfo_t *info, unsigned flags) {
+# ifdef __NR_pidfd_send_signal
+ return syscall(__NR_pidfd_send_signal, fd, sig, info, flags);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define pidfd_send_signal missing_pidfd_send_signal
+#endif
+
+/* should be always defined, see kernel 7615d9e1780e26e0178c93c55b73309a5dc093d7 */
+#define systemd_NR_pidfd_open systemd_SC_arch_bias(434)
+
+/* may be (invalid) negative number due to libseccomp, see PR 13319 */
+#if defined __NR_pidfd_open && __NR_pidfd_open >= 0
+# if defined systemd_NR_pidfd_open
+assert_cc(__NR_pidfd_open == systemd_NR_pidfd_open);
+# endif
+#else
+# if defined __NR_pidfd_open
+# undef __NR_pidfd_open
+# endif
+# define __NR_pidfd_open systemd_NR_pidfd_open
+#endif
+
+#if !HAVE_PIDFD_OPEN
+static inline int missing_pidfd_open(pid_t pid, unsigned flags) {
+# ifdef __NR_pidfd_open
+ return syscall(__NR_pidfd_open, pid, flags);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define pidfd_open missing_pidfd_open
+#endif
+
+/* ======================================================================= */
+
+#if !HAVE_RT_SIGQUEUEINFO
+static inline int missing_rt_sigqueueinfo(pid_t tgid, int sig, siginfo_t *info) {
+# if defined __NR_rt_sigqueueinfo && __NR_rt_sigqueueinfo >= 0
+ return syscall(__NR_rt_sigqueueinfo, tgid, sig, info);
+# else
+# error "__NR_rt_sigqueueinfo not defined"
+# endif
+}
+
+# define rt_sigqueueinfo missing_rt_sigqueueinfo
+#endif
+
+/* ======================================================================= */
+
+#define systemd_NR_close_range systemd_SC_arch_bias(436)
+
+/* may be (invalid) negative number due to libseccomp, see PR 13319 */
+#if defined __NR_close_range && __NR_close_range >= 0
+# if defined systemd_NR_close_range
+assert_cc(__NR_close_range == systemd_NR_close_range);
+# endif
+#else
+# if defined __NR_close_range
+# undef __NR_close_range
+# endif
+# if defined systemd_NR_close_range
+# define __NR_close_range systemd_NR_close_range
+# endif
+#endif
+
+#if !HAVE_CLOSE_RANGE
+static inline int missing_close_range(int first_fd, int end_fd, unsigned flags) {
+# ifdef __NR_close_range
+ /* Kernel-side the syscall expects fds as unsigned integers (just like close() actually), while
+ * userspace exclusively uses signed integers for fds. We don't know just yet how glibc is going to
+ * wrap this syscall, but let's assume it's going to be similar to what they do for close(),
+ * i.e. make the same unsigned → signed type change from the raw kernel syscall compared to the
+ * userspace wrapper. There's only one caveat for this: unlike for close() there's the special
+ * UINT_MAX fd value for the 'end_fd' argument. Let's safely map that to -1 here. And let's refuse
+ * any other negative values. */
+ if ((first_fd < 0) || (end_fd < 0 && end_fd != -1)) {
+ errno = -EBADF;
+ return -1;
+ }
+
+ return syscall(__NR_close_range,
+ (unsigned) first_fd,
+ end_fd == -1 ? UINT_MAX : (unsigned) end_fd, /* Of course, the compiler should figure out that this is the identity mapping IRL */
+ flags);
+# else
+ errno = ENOSYS;
+ return -1;
+# endif
+}
+
+# define close_range missing_close_range
+#endif
diff --git a/src/basic/missing_timerfd.h b/src/basic/missing_timerfd.h
new file mode 100644
index 0000000..dba3043
--- /dev/null
+++ b/src/basic/missing_timerfd.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/timerfd.h>
+
+#ifndef TFD_TIMER_CANCEL_ON_SET
+#define TFD_TIMER_CANCEL_ON_SET (1 << 1)
+#endif
diff --git a/src/basic/missing_type.h b/src/basic/missing_type.h
new file mode 100644
index 0000000..f623309
--- /dev/null
+++ b/src/basic/missing_type.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <uchar.h>
+
+#if !HAVE_CHAR32_T
+#define char32_t uint32_t
+#endif
+
+#if !HAVE_CHAR16_T
+#define char16_t uint16_t
+#endif
diff --git a/src/basic/missing_xfs.h b/src/basic/missing_xfs.h
new file mode 100644
index 0000000..ba5fe81
--- /dev/null
+++ b/src/basic/missing_xfs.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/* This is currently not exported in the public kernel headers, but the libxfs library code part of xfsprogs
+ * defines it as public header */
+
+#ifndef XFS_IOC_FSGEOMETRY
+#define XFS_IOC_FSGEOMETRY _IOR ('X', 124, struct xfs_fsop_geom)
+
+typedef struct xfs_fsop_geom {
+ uint32_t blocksize;
+ uint32_t rtextsize;
+ uint32_t agblocks;
+ uint32_t agcount;
+ uint32_t logblocks;
+ uint32_t sectsize;
+ uint32_t inodesize;
+ uint32_t imaxpct;
+ uint64_t datablocks;
+ uint64_t rtblocks;
+ uint64_t rtextents;
+ uint64_t logstart;
+ unsigned char uuid[16];
+ uint32_t sunit;
+ uint32_t swidth;
+ int32_t version;
+ uint32_t flags;
+ uint32_t logsectsize;
+ uint32_t rtsectsize;
+ uint32_t dirblocksize;
+ uint32_t logsunit;
+} xfs_fsop_geom_t;
+#endif
+
+#ifndef XFS_IOC_FSGROWFSDATA
+#define XFS_IOC_FSGROWFSDATA _IOW ('X', 110, struct xfs_growfs_data)
+
+typedef struct xfs_growfs_data {
+ uint64_t newblocks;
+ uint32_t imaxpct;
+} xfs_growfs_data_t;
+#endif
diff --git a/src/basic/mkdir-label.c b/src/basic/mkdir-label.c
new file mode 100644
index 0000000..9565117
--- /dev/null
+++ b/src/basic/mkdir-label.c
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "label.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "selinux-util.h"
+#include "smack-util.h"
+#include "user-util.h"
+
+int mkdir_label(const char *path, mode_t mode) {
+ int r;
+
+ assert(path);
+
+ r = mac_selinux_create_file_prepare(path, S_IFDIR);
+ if (r < 0)
+ return r;
+
+ r = mkdir_errno_wrapper(path, mode);
+ mac_selinux_create_file_clear();
+ if (r < 0)
+ return r;
+
+ return mac_smack_fix(path, 0);
+}
+
+int mkdirat_label(int dirfd, const char *path, mode_t mode) {
+ int r;
+
+ assert(path);
+
+ r = mac_selinux_create_file_prepare_at(dirfd, path, S_IFDIR);
+ if (r < 0)
+ return r;
+
+ r = mkdirat_errno_wrapper(dirfd, path, mode);
+ mac_selinux_create_file_clear();
+ if (r < 0)
+ return r;
+
+ return mac_smack_fix_at(dirfd, path, 0);
+}
+
+int mkdir_safe_label(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags) {
+ return mkdir_safe_internal(path, mode, uid, gid, flags, mkdir_label);
+}
+
+int mkdir_parents_label(const char *path, mode_t mode) {
+ return mkdir_parents_internal(NULL, path, mode, UID_INVALID, UID_INVALID, 0, mkdir_label);
+}
+
+int mkdir_p_label(const char *path, mode_t mode) {
+ return mkdir_p_internal(NULL, path, mode, UID_INVALID, UID_INVALID, 0, mkdir_label);
+}
diff --git a/src/basic/mkdir.c b/src/basic/mkdir.c
new file mode 100644
index 0000000..f91f8f7
--- /dev/null
+++ b/src/basic/mkdir.c
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "user-util.h"
+
+int mkdir_safe_internal(
+ const char *path,
+ mode_t mode,
+ uid_t uid, gid_t gid,
+ MkdirFlags flags,
+ mkdir_func_t _mkdir) {
+
+ struct stat st;
+ int r;
+
+ assert(path);
+ assert(_mkdir && _mkdir != mkdir);
+
+ if (_mkdir(path, mode) >= 0) {
+ r = chmod_and_chown(path, mode, uid, gid);
+ if (r < 0)
+ return r;
+ }
+
+ if (lstat(path, &st) < 0)
+ return -errno;
+
+ if ((flags & MKDIR_FOLLOW_SYMLINK) && S_ISLNK(st.st_mode)) {
+ _cleanup_free_ char *p = NULL;
+
+ r = chase_symlinks(path, NULL, CHASE_NONEXISTENT, &p, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return mkdir_safe_internal(p, mode, uid, gid,
+ flags & ~MKDIR_FOLLOW_SYMLINK,
+ _mkdir);
+
+ if (lstat(p, &st) < 0)
+ return -errno;
+ }
+
+ if (!S_ISDIR(st.st_mode))
+ return log_full_errno(flags & MKDIR_WARN_MODE ? LOG_WARNING : LOG_DEBUG, SYNTHETIC_ERRNO(ENOTDIR),
+ "Path \"%s\" already exists and is not a directory, refusing.", path);
+ if ((st.st_mode & 0007) > (mode & 0007) ||
+ (st.st_mode & 0070) > (mode & 0070) ||
+ (st.st_mode & 0700) > (mode & 0700))
+ return log_full_errno(flags & MKDIR_WARN_MODE ? LOG_WARNING : LOG_DEBUG, SYNTHETIC_ERRNO(EEXIST),
+ "Directory \"%s\" already exists, but has mode %04o that is too permissive (%04o was requested), refusing.",
+ path, st.st_mode & 0777, mode);
+
+ if ((uid != UID_INVALID && st.st_uid != uid) ||
+ (gid != GID_INVALID && st.st_gid != gid)) {
+ char u[DECIMAL_STR_MAX(uid_t)] = "-", g[DECIMAL_STR_MAX(gid_t)] = "-";
+
+ if (uid != UID_INVALID)
+ xsprintf(u, UID_FMT, uid);
+ if (gid != UID_INVALID)
+ xsprintf(g, GID_FMT, gid);
+ return log_full_errno(flags & MKDIR_WARN_MODE ? LOG_WARNING : LOG_DEBUG, SYNTHETIC_ERRNO(EEXIST),
+ "Directory \"%s\" already exists, but is owned by "UID_FMT":"GID_FMT" (%s:%s was requested), refusing.",
+ path, st.st_uid, st.st_gid, u, g);
+ }
+
+ return 0;
+}
+
+int mkdir_errno_wrapper(const char *pathname, mode_t mode) {
+ if (mkdir(pathname, mode) < 0)
+ return -errno;
+ return 0;
+}
+
+int mkdirat_errno_wrapper(int dirfd, const char *pathname, mode_t mode) {
+ if (mkdirat(dirfd, pathname, mode) < 0)
+ return -errno;
+ return 0;
+}
+
+int mkdir_safe(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags) {
+ return mkdir_safe_internal(path, mode, uid, gid, flags, mkdir_errno_wrapper);
+}
+
+int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir) {
+ const char *p, *e;
+ int r;
+
+ assert(path);
+ assert(_mkdir != mkdir);
+
+ if (prefix && !path_startswith(path, prefix))
+ return -ENOTDIR;
+
+ /* return immediately if directory exists */
+ e = strrchr(path, '/');
+ if (!e)
+ return 0;
+
+ if (e == path)
+ return 0;
+
+ p = strndupa(path, e - path);
+ r = is_dir(p, true);
+ if (r > 0)
+ return 0;
+ if (r == 0)
+ return -ENOTDIR;
+
+ /* create every parent directory in the path, except the last component */
+ p = path + strspn(path, "/");
+ for (;;) {
+ char t[strlen(path) + 1];
+
+ e = p + strcspn(p, "/");
+ p = e + strspn(e, "/");
+
+ /* Is this the last component? If so, then we're done */
+ if (*p == 0)
+ return 0;
+
+ memcpy(t, path, e - path);
+ t[e-path] = 0;
+
+ if (prefix && path_startswith(prefix, t))
+ continue;
+
+ if (!uid_is_valid(uid) && !gid_is_valid(gid) && flags == 0) {
+ r = _mkdir(t, mode);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ } else {
+ r = mkdir_safe_internal(t, mode, uid, gid, flags, _mkdir);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+ }
+}
+
+int mkdir_parents(const char *path, mode_t mode) {
+ return mkdir_parents_internal(NULL, path, mode, UID_INVALID, UID_INVALID, 0, mkdir_errno_wrapper);
+}
+
+int mkdir_parents_safe(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags) {
+ return mkdir_parents_internal(prefix, path, mode, uid, gid, flags, mkdir_errno_wrapper);
+}
+
+int mkdir_p_internal(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir) {
+ int r;
+
+ /* Like mkdir -p */
+
+ assert(_mkdir != mkdir);
+
+ r = mkdir_parents_internal(prefix, path, mode, uid, gid, flags, _mkdir);
+ if (r < 0)
+ return r;
+
+ if (!uid_is_valid(uid) && !gid_is_valid(gid) && flags == 0) {
+ r = _mkdir(path, mode);
+ if (r < 0 && (r != -EEXIST || is_dir(path, true) <= 0))
+ return r;
+ } else {
+ r = mkdir_safe_internal(path, mode, uid, gid, flags, _mkdir);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ return 0;
+}
+
+int mkdir_p(const char *path, mode_t mode) {
+ return mkdir_p_internal(NULL, path, mode, UID_INVALID, UID_INVALID, 0, mkdir_errno_wrapper);
+}
+
+int mkdir_p_safe(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags) {
+ return mkdir_p_internal(prefix, path, mode, uid, gid, flags, mkdir_errno_wrapper);
+}
+
+int mkdir_p_root(const char *root, const char *p, uid_t uid, gid_t gid, mode_t m) {
+ _cleanup_free_ char *pp = NULL;
+ _cleanup_close_ int dfd = -1;
+ const char *bn;
+ int r;
+
+ pp = dirname_malloc(p);
+ if (!pp)
+ return -ENOMEM;
+
+ /* Not top-level? */
+ if (!(path_equal(pp, "/") || isempty(pp) || path_equal(pp, "."))) {
+
+ /* Recurse up */
+ r = mkdir_p_root(root, pp, uid, gid, m);
+ if (r < 0)
+ return r;
+ }
+
+ bn = basename(p);
+ if (path_equal(bn, "/") || isempty(bn) || path_equal(bn, "."))
+ return 0;
+
+ if (!filename_is_valid(bn))
+ return -EINVAL;
+
+ dfd = chase_symlinks_and_open(pp, root, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_DIRECTORY, NULL);
+ if (dfd < 0)
+ return dfd;
+
+ if (mkdirat(dfd, bn, m) < 0) {
+ if (errno == EEXIST)
+ return 0;
+
+ return -errno;
+ }
+
+ if (uid_is_valid(uid) || gid_is_valid(gid)) {
+ _cleanup_close_ int nfd = -1;
+
+ nfd = openat(dfd, bn, O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (nfd < 0)
+ return -errno;
+
+ if (fchown(nfd, uid, gid) < 0)
+ return -errno;
+ }
+
+ return 1;
+}
diff --git a/src/basic/mkdir.h b/src/basic/mkdir.h
new file mode 100644
index 0000000..3c53d22
--- /dev/null
+++ b/src/basic/mkdir.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+typedef enum MkdirFlags {
+ MKDIR_FOLLOW_SYMLINK = 1 << 0,
+ MKDIR_WARN_MODE = 1 << 1,
+} MkdirFlags;
+
+int mkdir_errno_wrapper(const char *pathname, mode_t mode);
+int mkdirat_errno_wrapper(int dirfd, const char *pathname, mode_t mode);
+int mkdir_safe(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags);
+int mkdir_parents(const char *path, mode_t mode);
+int mkdir_parents_safe(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags);
+int mkdir_p(const char *path, mode_t mode);
+int mkdir_p_safe(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags);
+
+/* mandatory access control(MAC) versions */
+int mkdir_safe_label(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags);
+int mkdir_parents_label(const char *path, mode_t mod);
+int mkdir_p_label(const char *path, mode_t mode);
+
+/* internally used */
+typedef int (*mkdir_func_t)(const char *pathname, mode_t mode);
+int mkdir_safe_internal(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir);
+int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir);
+int mkdir_p_internal(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdir_func_t _mkdir);
+
+int mkdir_p_root(const char *root, const char *p, uid_t uid, gid_t gid, mode_t m);
diff --git a/src/basic/mountpoint-util.c b/src/basic/mountpoint-util.c
new file mode 100644
index 0000000..a6602ad
--- /dev/null
+++ b/src/basic/mountpoint-util.c
@@ -0,0 +1,511 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mount.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "missing_stat.h"
+#include "missing_syscall.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "strv.h"
+
+/* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
+ * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
+ * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
+ * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
+ * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
+ * with large file handles anyway. */
+#define ORIGINAL_MAX_HANDLE_SZ 128
+
+int name_to_handle_at_loop(
+ int fd,
+ const char *path,
+ struct file_handle **ret_handle,
+ int *ret_mnt_id,
+ int flags) {
+
+ _cleanup_free_ struct file_handle *h = NULL;
+ size_t n = ORIGINAL_MAX_HANDLE_SZ;
+
+ assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
+
+ /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
+ * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
+ * start value, it is not an upper bound on the buffer size required.
+ *
+ * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
+ * as NULL if there's no interest in either. */
+
+ for (;;) {
+ int mnt_id = -1;
+
+ h = malloc0(offsetof(struct file_handle, f_handle) + n);
+ if (!h)
+ return -ENOMEM;
+
+ h->handle_bytes = n;
+
+ if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
+
+ if (ret_handle)
+ *ret_handle = TAKE_PTR(h);
+
+ if (ret_mnt_id)
+ *ret_mnt_id = mnt_id;
+
+ return 0;
+ }
+ if (errno != EOVERFLOW)
+ return -errno;
+
+ if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
+
+ /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
+ * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
+ * be filled in, and the caller was interested in only the mount ID an nothing else. */
+
+ *ret_mnt_id = mnt_id;
+ return 0;
+ }
+
+ /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
+ * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
+ * buffer. In that case propagate EOVERFLOW */
+ if (h->handle_bytes <= n)
+ return -EOVERFLOW;
+
+ /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
+ n = h->handle_bytes;
+ if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
+ return -EOVERFLOW;
+
+ h = mfree(h);
+ }
+}
+
+static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *ret_mnt_id) {
+ char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
+ _cleanup_free_ char *fdinfo = NULL;
+ _cleanup_close_ int subfd = -1;
+ char *p;
+ int r;
+
+ assert(ret_mnt_id);
+ assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
+
+ if ((flags & AT_EMPTY_PATH) && isempty(filename))
+ xsprintf(path, "/proc/self/fdinfo/%i", fd);
+ else {
+ subfd = openat(fd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_FOLLOW ? 0 : O_NOFOLLOW));
+ if (subfd < 0)
+ return -errno;
+
+ xsprintf(path, "/proc/self/fdinfo/%i", subfd);
+ }
+
+ r = read_full_file(path, &fdinfo, NULL);
+ if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
+ return -EOPNOTSUPP;
+ if (r < 0)
+ return r;
+
+ p = startswith(fdinfo, "mnt_id:");
+ if (!p) {
+ p = strstr(fdinfo, "\nmnt_id:");
+ if (!p) /* The mnt_id field is a relatively new addition */
+ return -EOPNOTSUPP;
+
+ p += 8;
+ }
+
+ p += strspn(p, WHITESPACE);
+ p[strcspn(p, WHITESPACE)] = 0;
+
+ return safe_atoi(p, ret_mnt_id);
+}
+
+static bool filename_possibly_with_slash_suffix(const char *s) {
+ const char *slash, *copied;
+
+ /* Checks whether the specified string is either file name, or a filename with a suffix of
+ * slashes. But nothing else.
+ *
+ * this is OK: foo, bar, foo/, bar/, foo//, bar///
+ * this is not OK: "", "/", "/foo", "foo/bar", ".", ".." … */
+
+ slash = strchr(s, '/');
+ if (!slash)
+ return filename_is_valid(s);
+
+ if (slash - s > FILENAME_MAX) /* We want to allocate on the stack below, hence do a size check first */
+ return false;
+
+ if (slash[strspn(slash, "/")] != 0) /* Check that the suffix consist only of one or more slashes */
+ return false;
+
+ copied = strndupa(s, slash - s);
+ return filename_is_valid(copied);
+}
+
+int fd_is_mount_point(int fd, const char *filename, int flags) {
+ _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
+ int mount_id = -1, mount_id_parent = -1;
+ bool nosupp = false, check_st_dev = true;
+ STRUCT_STATX_DEFINE(sx);
+ struct stat a, b;
+ int r;
+
+ assert(fd >= 0);
+ assert(filename);
+ assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
+
+ /* Insist that the specified filename is actually a filename, and not a path, i.e. some inode further
+ * up or down the tree then immediately below the specified directory fd. */
+ if (!filename_possibly_with_slash_suffix(filename))
+ return -EINVAL;
+
+ /* First we will try statx()' STATX_ATTR_MOUNT_ROOT attribute, which is our ideal API, available
+ * since kernel 5.8.
+ *
+ * If that fails, our second try is the name_to_handle_at() syscall, which tells us the mount id and
+ * an opaque file "handle". It is not supported everywhere though (kernel compile-time option, not
+ * all file systems are hooked up). If it works the mount id is usually good enough to tell us
+ * whether something is a mount point.
+ *
+ * If that didn't work we will try to read the mount id from /proc/self/fdinfo/<fd>. This is almost
+ * as good as name_to_handle_at(), however, does not return the opaque file handle. The opaque file
+ * handle is pretty useful to detect the root directory, which we should always consider a mount
+ * point. Hence we use this only as fallback. Exporting the mnt_id in fdinfo is a pretty recent
+ * kernel addition.
+ *
+ * As last fallback we do traditional fstat() based st_dev comparisons. This is how things were
+ * traditionally done, but unionfs breaks this since it exposes file systems with a variety of st_dev
+ * reported. Also, btrfs subvolumes have different st_dev, even though they aren't real mounts of
+ * their own. */
+
+ if (statx(fd, filename, (FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? 0 : AT_SYMLINK_NOFOLLOW) |
+ (flags & AT_EMPTY_PATH) |
+ AT_NO_AUTOMOUNT, 0, &sx) < 0) {
+ if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
+ return -errno;
+
+ /* If statx() is not available or forbidden, fall back to name_to_handle_at() below */
+ } else if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) /* yay! */
+ return FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT);
+
+ r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
+ if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
+ /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
+ * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
+ * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
+ * (EINVAL): fall back to simpler logic. */
+ goto fallback_fdinfo;
+ else if (r == -EOPNOTSUPP)
+ /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
+ * supports it (in which case it is a mount point), otherwise fall back to the traditional stat()
+ * logic */
+ nosupp = true;
+ else if (r < 0)
+ return r;
+
+ r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
+ if (r == -EOPNOTSUPP) {
+ if (nosupp)
+ /* Neither parent nor child do name_to_handle_at()? We have no choice but to fall back. */
+ goto fallback_fdinfo;
+ else
+ /* The parent can't do name_to_handle_at() but the directory we are interested in can? If so,
+ * it must be a mount point. */
+ return 1;
+ } else if (r < 0)
+ return r;
+
+ /* The parent can do name_to_handle_at() but the
+ * directory we are interested in can't? If so, it
+ * must be a mount point. */
+ if (nosupp)
+ return 1;
+
+ /* If the file handle for the directory we are
+ * interested in and its parent are identical, we
+ * assume this is the root directory, which is a mount
+ * point. */
+
+ if (h->handle_bytes == h_parent->handle_bytes &&
+ h->handle_type == h_parent->handle_type &&
+ memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
+ return 1;
+
+ return mount_id != mount_id_parent;
+
+fallback_fdinfo:
+ r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
+ if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
+ goto fallback_fstat;
+ if (r < 0)
+ return r;
+
+ r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
+ if (r < 0)
+ return r;
+
+ if (mount_id != mount_id_parent)
+ return 1;
+
+ /* Hmm, so, the mount ids are the same. This leaves one
+ * special case though for the root file system. For that,
+ * let's see if the parent directory has the same inode as we
+ * are interested in. Hence, let's also do fstat() checks now,
+ * too, but avoid the st_dev comparisons, since they aren't
+ * that useful on unionfs mounts. */
+ check_st_dev = false;
+
+fallback_fstat:
+ /* yay for fstatat() taking a different set of flags than the other
+ * _at() above */
+ if (flags & AT_SYMLINK_FOLLOW)
+ flags &= ~AT_SYMLINK_FOLLOW;
+ else
+ flags |= AT_SYMLINK_NOFOLLOW;
+ if (fstatat(fd, filename, &a, flags) < 0)
+ return -errno;
+
+ if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
+ return -errno;
+
+ /* A directory with same device and inode as its parent? Must
+ * be the root directory */
+ if (a.st_dev == b.st_dev &&
+ a.st_ino == b.st_ino)
+ return 1;
+
+ return check_st_dev && (a.st_dev != b.st_dev);
+}
+
+/* flags can be AT_SYMLINK_FOLLOW or 0 */
+int path_is_mount_point(const char *t, const char *root, int flags) {
+ _cleanup_free_ char *canonical = NULL;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(t);
+ assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
+
+ if (path_equal(t, "/"))
+ return 1;
+
+ /* we need to resolve symlinks manually, we can't just rely on
+ * fd_is_mount_point() to do that for us; if we have a structure like
+ * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
+ * look at needs to be /usr, not /. */
+ if (flags & AT_SYMLINK_FOLLOW) {
+ r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical, NULL);
+ if (r < 0)
+ return r;
+
+ t = canonical;
+ }
+
+ fd = open_parent(t, O_PATH|O_CLOEXEC, 0);
+ if (fd < 0)
+ return fd;
+
+ return fd_is_mount_point(fd, last_path_component(t), flags);
+}
+
+int path_get_mnt_id(const char *path, int *ret) {
+ STRUCT_NEW_STATX_DEFINE(buf);
+ int r;
+
+ if (statx(AT_FDCWD, path, AT_SYMLINK_NOFOLLOW|AT_NO_AUTOMOUNT, STATX_MNT_ID, &buf.sx) < 0) {
+ if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
+ return -errno;
+
+ /* Fall back to name_to_handle_at() and then fdinfo if statx is not supported or we lack
+ * privileges */
+
+ } else if (FLAGS_SET(buf.nsx.stx_mask, STATX_MNT_ID)) {
+ *ret = buf.nsx.stx_mnt_id;
+ return 0;
+ }
+
+ r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
+ if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
+ return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
+
+ return r;
+}
+
+bool fstype_is_network(const char *fstype) {
+ const char *x;
+
+ x = startswith(fstype, "fuse.");
+ if (x)
+ fstype = x;
+
+ return STR_IN_SET(fstype,
+ "afs",
+ "ceph",
+ "cifs",
+ "smb3",
+ "smbfs",
+ "sshfs",
+ "ncpfs",
+ "ncp",
+ "nfs",
+ "nfs4",
+ "gfs",
+ "gfs2",
+ "glusterfs",
+ "pvfs2", /* OrangeFS */
+ "ocfs2",
+ "lustre",
+ "davfs");
+}
+
+bool fstype_is_api_vfs(const char *fstype) {
+ return STR_IN_SET(fstype,
+ "autofs",
+ "bpf",
+ "cgroup",
+ "cgroup2",
+ "configfs",
+ "cpuset",
+ "debugfs",
+ "devpts",
+ "devtmpfs",
+ "efivarfs",
+ "fusectl",
+ "hugetlbfs",
+ "mqueue",
+ "proc",
+ "pstore",
+ "ramfs",
+ "securityfs",
+ "sysfs",
+ "tmpfs",
+ "tracefs");
+}
+
+bool fstype_is_blockdev_backed(const char *fstype) {
+ const char *x;
+
+ x = startswith(fstype, "fuse.");
+ if (x)
+ fstype = x;
+
+ return !streq(fstype, "9p") && !fstype_is_network(fstype) && !fstype_is_api_vfs(fstype);
+}
+
+bool fstype_is_ro(const char *fstype) {
+ /* All Linux file systems that are necessarily read-only */
+ return STR_IN_SET(fstype,
+ "DM_verity_hash",
+ "iso9660",
+ "squashfs");
+}
+
+bool fstype_can_discard(const char *fstype) {
+ return STR_IN_SET(fstype,
+ "btrfs",
+ "ext4",
+ "vfat",
+ "xfs");
+}
+
+bool fstype_can_uid_gid(const char *fstype) {
+
+ /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
+ * current and future. */
+
+ return STR_IN_SET(fstype,
+ "adfs",
+ "exfat",
+ "fat",
+ "hfs",
+ "hpfs",
+ "iso9660",
+ "msdos",
+ "ntfs",
+ "vfat");
+}
+
+int dev_is_devtmpfs(void) {
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+ int mount_id, r;
+ char *e;
+
+ r = path_get_mnt_id("/dev", &mount_id);
+ if (r < 0)
+ return r;
+
+ r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ int mid;
+
+ r = read_line(proc_self_mountinfo, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (sscanf(line, "%i", &mid) != 1)
+ continue;
+
+ if (mid != mount_id)
+ continue;
+
+ e = strstr(line, " - ");
+ if (!e)
+ continue;
+
+ /* accept any name that starts with the currently expected type */
+ if (startswith(e + 3, "devtmpfs"))
+ return true;
+ }
+
+ return false;
+}
+
+const char *mount_propagation_flags_to_string(unsigned long flags) {
+
+ switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
+ case 0:
+ return "";
+ case MS_SHARED:
+ return "shared";
+ case MS_SLAVE:
+ return "slave";
+ case MS_PRIVATE:
+ return "private";
+ }
+
+ return NULL;
+}
+
+int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
+
+ if (isempty(name))
+ *ret = 0;
+ else if (streq(name, "shared"))
+ *ret = MS_SHARED;
+ else if (streq(name, "slave"))
+ *ret = MS_SLAVE;
+ else if (streq(name, "private"))
+ *ret = MS_PRIVATE;
+ else
+ return -EINVAL;
+ return 0;
+}
diff --git a/src/basic/mountpoint-util.h b/src/basic/mountpoint-util.h
new file mode 100644
index 0000000..aadb212
--- /dev/null
+++ b/src/basic/mountpoint-util.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <sys/types.h>
+
+int name_to_handle_at_loop(int fd, const char *path, struct file_handle **ret_handle, int *ret_mnt_id, int flags);
+
+int path_get_mnt_id(const char *path, int *ret);
+
+int fd_is_mount_point(int fd, const char *filename, int flags);
+int path_is_mount_point(const char *path, const char *root, int flags);
+
+bool fstype_is_network(const char *fstype);
+bool fstype_is_api_vfs(const char *fstype);
+bool fstype_is_blockdev_backed(const char *fstype);
+bool fstype_is_ro(const char *fsype);
+bool fstype_can_discard(const char *fstype);
+bool fstype_can_uid_gid(const char *fstype);
+
+int dev_is_devtmpfs(void);
+
+const char *mount_propagation_flags_to_string(unsigned long flags);
+int mount_propagation_flags_from_string(const char *name, unsigned long *ret);
diff --git a/src/basic/namespace-util.c b/src/basic/namespace-util.c
new file mode 100644
index 0000000..833a18a
--- /dev/null
+++ b/src/basic/namespace-util.c
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+
+#include "fd-util.h"
+#include "missing_fs.h"
+#include "missing_magic.h"
+#include "namespace-util.h"
+#include "process-util.h"
+#include "stat-util.h"
+#include "user-util.h"
+
+int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
+ _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
+ int rfd = -1;
+
+ assert(pid >= 0);
+
+ if (mntns_fd) {
+ const char *mntns;
+
+ mntns = procfs_file_alloca(pid, "ns/mnt");
+ mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (mntnsfd < 0)
+ return -errno;
+ }
+
+ if (pidns_fd) {
+ const char *pidns;
+
+ pidns = procfs_file_alloca(pid, "ns/pid");
+ pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (pidnsfd < 0)
+ return -errno;
+ }
+
+ if (netns_fd) {
+ const char *netns;
+
+ netns = procfs_file_alloca(pid, "ns/net");
+ netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (netnsfd < 0)
+ return -errno;
+ }
+
+ if (userns_fd) {
+ const char *userns;
+
+ userns = procfs_file_alloca(pid, "ns/user");
+ usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (usernsfd < 0 && errno != ENOENT)
+ return -errno;
+ }
+
+ if (root_fd) {
+ const char *root;
+
+ root = procfs_file_alloca(pid, "root");
+ rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+ if (rfd < 0)
+ return -errno;
+ }
+
+ if (pidns_fd)
+ *pidns_fd = TAKE_FD(pidnsfd);
+
+ if (mntns_fd)
+ *mntns_fd = TAKE_FD(mntnsfd);
+
+ if (netns_fd)
+ *netns_fd = TAKE_FD(netnsfd);
+
+ if (userns_fd)
+ *userns_fd = TAKE_FD(usernsfd);
+
+ if (root_fd)
+ *root_fd = TAKE_FD(rfd);
+
+ return 0;
+}
+
+int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
+ if (userns_fd >= 0) {
+ /* Can't setns to your own userns, since then you could
+ * escalate from non-root to root in your own namespace, so
+ * check if namespaces equal before attempting to enter. */
+ _cleanup_free_ char *userns_fd_path = NULL;
+ int r;
+ if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
+ return -ENOMEM;
+
+ r = files_same(userns_fd_path, "/proc/self/ns/user", 0);
+ if (r < 0)
+ return r;
+ if (r)
+ userns_fd = -1;
+ }
+
+ if (pidns_fd >= 0)
+ if (setns(pidns_fd, CLONE_NEWPID) < 0)
+ return -errno;
+
+ if (mntns_fd >= 0)
+ if (setns(mntns_fd, CLONE_NEWNS) < 0)
+ return -errno;
+
+ if (netns_fd >= 0)
+ if (setns(netns_fd, CLONE_NEWNET) < 0)
+ return -errno;
+
+ if (userns_fd >= 0)
+ if (setns(userns_fd, CLONE_NEWUSER) < 0)
+ return -errno;
+
+ if (root_fd >= 0) {
+ if (fchdir(root_fd) < 0)
+ return -errno;
+
+ if (chroot(".") < 0)
+ return -errno;
+ }
+
+ return reset_uid_gid();
+}
+
+int fd_is_network_ns(int fd) {
+ struct statfs s;
+ int r;
+
+ /* Checks whether the specified file descriptor refers to a network namespace. On old kernels there's no nice
+ * way to detect that, hence on those we'll return a recognizable error (EUCLEAN), so that callers can handle
+ * this somewhat nicely.
+ *
+ * This function returns > 0 if the fd definitely refers to a network namespace, 0 if it definitely does not
+ * refer to a network namespace, -EUCLEAN if we can't determine, and other negative error codes on error. */
+
+ if (fstatfs(fd, &s) < 0)
+ return -errno;
+
+ if (!is_fs_type(&s, NSFS_MAGIC)) {
+ /* On really old kernels, there was no "nsfs", and network namespace sockets belonged to procfs
+ * instead. Handle that in a somewhat smart way. */
+
+ if (is_fs_type(&s, PROC_SUPER_MAGIC)) {
+ struct statfs t;
+
+ /* OK, so it is procfs. Let's see if our own network namespace is procfs, too. If so, then the
+ * passed fd might refer to a network namespace, but we can't know for sure. In that case,
+ * return a recognizable error. */
+
+ if (statfs("/proc/self/ns/net", &t) < 0)
+ return -errno;
+
+ if (s.f_type == t.f_type)
+ return -EUCLEAN; /* It's possible, we simply don't know */
+ }
+
+ return 0; /* No! */
+ }
+
+ r = ioctl(fd, NS_GET_NSTYPE);
+ if (r < 0) {
+ if (errno == ENOTTY) /* Old kernels didn't know this ioctl, let's also return a recognizable error in that case */
+ return -EUCLEAN;
+
+ return -errno;
+ }
+
+ return r == CLONE_NEWNET;
+}
+
+int detach_mount_namespace(void) {
+
+ /* Detaches the mount namespace, disabling propagation from our namespace to the host */
+
+ if (unshare(CLONE_NEWNS) < 0)
+ return -errno;
+
+ if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/basic/namespace-util.h b/src/basic/namespace-util.h
new file mode 100644
index 0000000..7f7d066
--- /dev/null
+++ b/src/basic/namespace-util.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd);
+int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd);
+
+int fd_is_network_ns(int fd);
+
+int detach_mount_namespace(void);
diff --git a/src/basic/nss-util.h b/src/basic/nss-util.h
new file mode 100644
index 0000000..dfc0d3f
--- /dev/null
+++ b/src/basic/nss-util.h
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <grp.h>
+#include <netdb.h>
+#include <nss.h>
+#include <pwd.h>
+#include <resolv.h>
+
+#define NSS_SIGNALS_BLOCK SIGALRM,SIGVTALRM,SIGPIPE,SIGCHLD,SIGTSTP,SIGIO,SIGHUP,SIGUSR1,SIGUSR2,SIGPROF,SIGURG,SIGWINCH
+
+#ifndef DEPRECATED_RES_USE_INET6
+# define DEPRECATED_RES_USE_INET6 0x00002000
+#endif
+
+#define NSS_GETHOSTBYNAME_PROTOTYPES(module) \
+enum nss_status _nss_##module##_gethostbyname4_r( \
+ const char *name, \
+ struct gaih_addrtuple **pat, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop, \
+ int32_t *ttlp) _public_; \
+enum nss_status _nss_##module##_gethostbyname3_r( \
+ const char *name, \
+ int af, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop, \
+ int32_t *ttlp, \
+ char **canonp) _public_; \
+enum nss_status _nss_##module##_gethostbyname2_r( \
+ const char *name, \
+ int af, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop) _public_; \
+enum nss_status _nss_##module##_gethostbyname_r( \
+ const char *name, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop) _public_
+
+#define NSS_GETHOSTBYADDR_PROTOTYPES(module) \
+enum nss_status _nss_##module##_gethostbyaddr2_r( \
+ const void* addr, socklen_t len, \
+ int af, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop, \
+ int32_t *ttlp) _public_; \
+enum nss_status _nss_##module##_gethostbyaddr_r( \
+ const void* addr, socklen_t len, \
+ int af, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop) _public_
+
+#define NSS_GETHOSTBYNAME_FALLBACKS(module) \
+enum nss_status _nss_##module##_gethostbyname2_r( \
+ const char *name, \
+ int af, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop) { \
+ return _nss_##module##_gethostbyname3_r( \
+ name, \
+ af, \
+ host, \
+ buffer, buflen, \
+ errnop, h_errnop, \
+ NULL, \
+ NULL); \
+} \
+enum nss_status _nss_##module##_gethostbyname_r( \
+ const char *name, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop) { \
+ enum nss_status ret = NSS_STATUS_NOTFOUND; \
+ \
+ if (_res.options & DEPRECATED_RES_USE_INET6) \
+ ret = _nss_##module##_gethostbyname3_r( \
+ name, \
+ AF_INET6, \
+ host, \
+ buffer, buflen, \
+ errnop, h_errnop, \
+ NULL, \
+ NULL); \
+ if (ret == NSS_STATUS_NOTFOUND) \
+ ret = _nss_##module##_gethostbyname3_r( \
+ name, \
+ AF_INET, \
+ host, \
+ buffer, buflen, \
+ errnop, h_errnop, \
+ NULL, \
+ NULL); \
+ return ret; \
+}
+
+#define NSS_GETHOSTBYADDR_FALLBACKS(module) \
+enum nss_status _nss_##module##_gethostbyaddr_r( \
+ const void* addr, socklen_t len, \
+ int af, \
+ struct hostent *host, \
+ char *buffer, size_t buflen, \
+ int *errnop, int *h_errnop) { \
+ return _nss_##module##_gethostbyaddr2_r( \
+ addr, len, \
+ af, \
+ host, \
+ buffer, buflen, \
+ errnop, h_errnop, \
+ NULL); \
+}
+
+#define NSS_GETPW_PROTOTYPES(module) \
+enum nss_status _nss_##module##_getpwnam_r( \
+ const char *name, \
+ struct passwd *pwd, \
+ char *buffer, size_t buflen, \
+ int *errnop) _public_; \
+enum nss_status _nss_##module##_getpwuid_r( \
+ uid_t uid, \
+ struct passwd *pwd, \
+ char *buffer, size_t buflen, \
+ int *errnop) _public_
+
+#define NSS_GETGR_PROTOTYPES(module) \
+enum nss_status _nss_##module##_getgrnam_r( \
+ const char *name, \
+ struct group *gr, \
+ char *buffer, size_t buflen, \
+ int *errnop) _public_; \
+enum nss_status _nss_##module##_getgrgid_r( \
+ gid_t gid, \
+ struct group *gr, \
+ char *buffer, size_t buflen, \
+ int *errnop) _public_
+
+#define NSS_PWENT_PROTOTYPES(module) \
+enum nss_status _nss_##module##_endpwent( \
+ void) _public_; \
+enum nss_status _nss_##module##_setpwent( \
+ int stayopen) _public_; \
+enum nss_status _nss_##module##_getpwent_r( \
+ struct passwd *result, \
+ char *buffer, \
+ size_t buflen, \
+ int *errnop) _public_;
+
+#define NSS_GRENT_PROTOTYPES(module) \
+enum nss_status _nss_##module##_endgrent( \
+ void) _public_; \
+enum nss_status _nss_##module##_setgrent( \
+ int stayopen) _public_; \
+enum nss_status _nss_##module##_getgrent_r( \
+ struct group *result, \
+ char *buffer, \
+ size_t buflen, \
+ int *errnop) _public_;
+
+#define NSS_INITGROUPS_PROTOTYPE(module) \
+enum nss_status _nss_##module##_initgroups_dyn( \
+ const char *user, \
+ gid_t group, \
+ long int *start, \
+ long int *size, \
+ gid_t **groupsp, \
+ long int limit, \
+ int *errnop) _public_;
+
+typedef enum nss_status (*_nss_gethostbyname4_r_t)(
+ const char *name,
+ struct gaih_addrtuple **pat,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp);
+
+typedef enum nss_status (*_nss_gethostbyname3_r_t)(
+ const char *name,
+ int af,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp,
+ char **canonp);
+
+typedef enum nss_status (*_nss_gethostbyname2_r_t)(
+ const char *name,
+ int af,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop);
+
+typedef enum nss_status (*_nss_gethostbyname_r_t)(
+ const char *name,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop);
+
+typedef enum nss_status (*_nss_gethostbyaddr2_r_t)(
+ const void* addr, socklen_t len,
+ int af,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp);
+typedef enum nss_status (*_nss_gethostbyaddr_r_t)(
+ const void* addr, socklen_t len,
+ int af,
+ struct hostent *host,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop);
diff --git a/src/basic/nulstr-util.c b/src/basic/nulstr-util.c
new file mode 100644
index 0000000..49fcbb0
--- /dev/null
+++ b/src/basic/nulstr-util.c
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "nulstr-util.h"
+#include "string-util.h"
+
+bool nulstr_contains(const char *nulstr, const char *needle) {
+ const char *i;
+
+ if (!nulstr)
+ return false;
+
+ NULSTR_FOREACH(i, nulstr)
+ if (streq(i, needle))
+ return true;
+
+ return false;
+}
diff --git a/src/basic/nulstr-util.h b/src/basic/nulstr-util.h
new file mode 100644
index 0000000..ee9b632
--- /dev/null
+++ b/src/basic/nulstr-util.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <string.h>
+
+#define NULSTR_FOREACH(i, l) \
+ for ((i) = (l); (i) && *(i); (i) = strchr((i), 0)+1)
+
+#define NULSTR_FOREACH_PAIR(i, j, l) \
+ for ((i) = (l), (j) = strchr((i), 0)+1; (i) && *(i); (i) = strchr((j), 0)+1, (j) = *(i) ? strchr((i), 0)+1 : (i))
+
+bool nulstr_contains(const char *nulstr, const char *needle);
diff --git a/src/basic/ordered-set.c b/src/basic/ordered-set.c
new file mode 100644
index 0000000..58fa8af
--- /dev/null
+++ b/src/basic/ordered-set.c
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "fileio.h"
+#include "ordered-set.h"
+#include "strv.h"
+
+int _ordered_set_ensure_allocated(OrderedSet **s, const struct hash_ops *ops HASHMAP_DEBUG_PARAMS) {
+ if (*s)
+ return 0;
+
+ *s = _ordered_set_new(ops HASHMAP_DEBUG_PASS_ARGS);
+ if (!*s)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int _ordered_set_ensure_put(OrderedSet **s, const struct hash_ops *ops, void *p HASHMAP_DEBUG_PARAMS) {
+ int r;
+
+ r = _ordered_set_ensure_allocated(s, ops HASHMAP_DEBUG_PASS_ARGS);
+ if (r < 0)
+ return r;
+
+ return ordered_set_put(*s, p);
+}
+
+int ordered_set_consume(OrderedSet *s, void *p) {
+ int r;
+
+ r = ordered_set_put(s, p);
+ if (r <= 0)
+ free(p);
+
+ return r;
+}
+
+int ordered_set_put_strdup(OrderedSet *s, const char *p) {
+ char *c;
+ int r;
+
+ assert(s);
+ assert(p);
+
+ c = strdup(p);
+ if (!c)
+ return -ENOMEM;
+
+ r = ordered_set_consume(s, c);
+ if (r == -EEXIST)
+ return 0;
+
+ return r;
+}
+
+int ordered_set_put_strdupv(OrderedSet *s, char **l) {
+ int n = 0, r;
+ char **i;
+
+ STRV_FOREACH(i, l) {
+ r = ordered_set_put_strdup(s, *i);
+ if (r < 0)
+ return r;
+
+ n += r;
+ }
+
+ return n;
+}
+
+int ordered_set_put_string_set(OrderedSet *s, OrderedSet *l) {
+ int n = 0, r;
+ char *p;
+
+ /* Like ordered_set_put_strv, but for an OrderedSet of strings */
+
+ ORDERED_SET_FOREACH(p, l) {
+ r = ordered_set_put_strdup(s, p);
+ if (r < 0)
+ return r;
+
+ n += r;
+ }
+
+ return n;
+}
+
+void ordered_set_print(FILE *f, const char *field, OrderedSet *s) {
+ bool space = false;
+ char *p;
+
+ if (ordered_set_isempty(s))
+ return;
+
+ fputs(field, f);
+
+ ORDERED_SET_FOREACH(p, s)
+ fputs_with_space(f, p, NULL, &space);
+
+ fputc('\n', f);
+}
diff --git a/src/basic/ordered-set.h b/src/basic/ordered-set.h
new file mode 100644
index 0000000..baf8202
--- /dev/null
+++ b/src/basic/ordered-set.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdio.h>
+
+#include "hashmap.h"
+
+typedef struct OrderedSet OrderedSet;
+
+static inline OrderedSet* _ordered_set_new(const struct hash_ops *ops HASHMAP_DEBUG_PARAMS) {
+ return (OrderedSet*) _ordered_hashmap_new(ops HASHMAP_DEBUG_PASS_ARGS);
+}
+#define ordered_set_new(ops) _ordered_set_new(ops HASHMAP_DEBUG_SRC_ARGS)
+
+int _ordered_set_ensure_allocated(OrderedSet **s, const struct hash_ops *ops HASHMAP_DEBUG_PARAMS);
+#define ordered_set_ensure_allocated(s, ops) _ordered_set_ensure_allocated(s, ops HASHMAP_DEBUG_SRC_ARGS)
+
+int _ordered_set_ensure_put(OrderedSet **s, const struct hash_ops *ops, void *p HASHMAP_DEBUG_PARAMS);
+#define ordered_set_ensure_put(s, hash_ops, key) _ordered_set_ensure_put(s, hash_ops, key HASHMAP_DEBUG_SRC_ARGS)
+
+static inline OrderedSet* ordered_set_free(OrderedSet *s) {
+ return (OrderedSet*) ordered_hashmap_free((OrderedHashmap*) s);
+}
+
+static inline OrderedSet* ordered_set_free_free(OrderedSet *s) {
+ return (OrderedSet*) ordered_hashmap_free_free((OrderedHashmap*) s);
+}
+
+static inline int ordered_set_put(OrderedSet *s, void *p) {
+ return ordered_hashmap_put((OrderedHashmap*) s, p, p);
+}
+
+static inline unsigned ordered_set_size(OrderedSet *s) {
+ return ordered_hashmap_size((OrderedHashmap*) s);
+}
+
+static inline bool ordered_set_isempty(OrderedSet *s) {
+ return ordered_hashmap_isempty((OrderedHashmap*) s);
+}
+
+static inline bool ordered_set_iterate(OrderedSet *s, Iterator *i, void **value) {
+ return ordered_hashmap_iterate((OrderedHashmap*) s, i, value, NULL);
+}
+
+static inline void* ordered_set_remove(OrderedSet *s, void *p) {
+ return ordered_hashmap_remove((OrderedHashmap*) s, p);
+}
+
+static inline void* ordered_set_first(OrderedSet *s) {
+ return ordered_hashmap_first((OrderedHashmap*) s);
+}
+
+static inline void* ordered_set_steal_first(OrderedSet *s) {
+ return ordered_hashmap_steal_first((OrderedHashmap*) s);
+}
+
+static inline char** ordered_set_get_strv(OrderedSet *s) {
+ return _hashmap_get_strv(HASHMAP_BASE((OrderedHashmap*) s));
+}
+
+int ordered_set_consume(OrderedSet *s, void *p);
+int ordered_set_put_strdup(OrderedSet *s, const char *p);
+int ordered_set_put_strdupv(OrderedSet *s, char **l);
+int ordered_set_put_string_set(OrderedSet *s, OrderedSet *l);
+void ordered_set_print(FILE *f, const char *field, OrderedSet *s);
+
+#define _ORDERED_SET_FOREACH(e, s, i) \
+ for (Iterator i = ITERATOR_FIRST; ordered_set_iterate((s), &i, (void**)&(e)); )
+#define ORDERED_SET_FOREACH(e, s) \
+ _ORDERED_SET_FOREACH(e, s, UNIQ_T(i, UNIQ))
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedSet*, ordered_set_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedSet*, ordered_set_free_free);
+
+#define _cleanup_ordered_set_free_ _cleanup_(ordered_set_freep)
+#define _cleanup_ordered_set_free_free_ _cleanup_(ordered_set_free_freep)
diff --git a/src/basic/parse-util.c b/src/basic/parse-util.c
new file mode 100644
index 0000000..5d4dafe
--- /dev/null
+++ b/src/basic/parse-util.c
@@ -0,0 +1,906 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/oom.h>
+#include <net/if.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+
+#include "alloc-util.h"
+#include "errno-list.h"
+#include "extract-word.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "missing_network.h"
+#include "parse-util.h"
+#include "process-util.h"
+#if HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+int parse_boolean(const char *v) {
+ if (!v)
+ return -EINVAL;
+
+ if (STRCASE_IN_SET(v,
+ "1",
+ "yes",
+ "y",
+ "true",
+ "t",
+ "on"))
+ return 1;
+
+ if (STRCASE_IN_SET(v,
+ "0",
+ "no",
+ "n",
+ "false",
+ "f",
+ "off"))
+ return 0;
+
+ return -EINVAL;
+}
+
+int parse_pid(const char *s, pid_t* ret_pid) {
+ unsigned long ul = 0;
+ pid_t pid;
+ int r;
+
+ assert(s);
+ assert(ret_pid);
+
+ r = safe_atolu(s, &ul);
+ if (r < 0)
+ return r;
+
+ pid = (pid_t) ul;
+
+ if ((unsigned long) pid != ul)
+ return -ERANGE;
+
+ if (!pid_is_valid(pid))
+ return -ERANGE;
+
+ *ret_pid = pid;
+ return 0;
+}
+
+int parse_mode(const char *s, mode_t *ret) {
+ unsigned m;
+ int r;
+
+ assert(s);
+
+ r = safe_atou_full(s, 8 |
+ SAFE_ATO_REFUSE_PLUS_MINUS, /* Leading '+' or even '-' char? that's just weird,
+ * refuse. User might have wanted to add mode flags or
+ * so, but this parser doesn't allow that, so let's
+ * better be safe. */
+ &m);
+ if (r < 0)
+ return r;
+ if (m > 07777)
+ return -ERANGE;
+
+ if (ret)
+ *ret = m;
+ return 0;
+}
+
+int parse_ifindex(const char *s) {
+ int ifi, r;
+
+ assert(s);
+
+ r = safe_atoi(s, &ifi);
+ if (r < 0)
+ return r;
+ if (ifi <= 0)
+ return -EINVAL;
+
+ return ifi;
+}
+
+int parse_mtu(int family, const char *s, uint32_t *ret) {
+ uint64_t u;
+ size_t m;
+ int r;
+
+ r = parse_size(s, 1024, &u);
+ if (r < 0)
+ return r;
+
+ if (u > UINT32_MAX)
+ return -ERANGE;
+
+ if (family == AF_INET6)
+ m = IPV6_MIN_MTU; /* This is 1280 */
+ else
+ m = IPV4_MIN_MTU; /* For all other protocols, including 'unspecified' we assume the IPv4 minimal MTU */
+
+ if (u < m)
+ return -ERANGE;
+
+ *ret = (uint32_t) u;
+ return 0;
+}
+
+int parse_size(const char *t, uint64_t base, uint64_t *size) {
+
+ /* Soo, sometimes we want to parse IEC binary suffixes, and
+ * sometimes SI decimal suffixes. This function can parse
+ * both. Which one is the right way depends on the
+ * context. Wikipedia suggests that SI is customary for
+ * hardware metrics and network speeds, while IEC is
+ * customary for most data sizes used by software and volatile
+ * (RAM) memory. Hence be careful which one you pick!
+ *
+ * In either case we use just K, M, G as suffix, and not Ki,
+ * Mi, Gi or so (as IEC would suggest). That's because that's
+ * frickin' ugly. But this means you really need to make sure
+ * to document which base you are parsing when you use this
+ * call. */
+
+ struct table {
+ const char *suffix;
+ unsigned long long factor;
+ };
+
+ static const struct table iec[] = {
+ { "E", 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL*1024ULL },
+ { "P", 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL },
+ { "T", 1024ULL*1024ULL*1024ULL*1024ULL },
+ { "G", 1024ULL*1024ULL*1024ULL },
+ { "M", 1024ULL*1024ULL },
+ { "K", 1024ULL },
+ { "B", 1ULL },
+ { "", 1ULL },
+ };
+
+ static const struct table si[] = {
+ { "E", 1000ULL*1000ULL*1000ULL*1000ULL*1000ULL*1000ULL },
+ { "P", 1000ULL*1000ULL*1000ULL*1000ULL*1000ULL },
+ { "T", 1000ULL*1000ULL*1000ULL*1000ULL },
+ { "G", 1000ULL*1000ULL*1000ULL },
+ { "M", 1000ULL*1000ULL },
+ { "K", 1000ULL },
+ { "B", 1ULL },
+ { "", 1ULL },
+ };
+
+ const struct table *table;
+ const char *p;
+ unsigned long long r = 0;
+ unsigned n_entries, start_pos = 0;
+
+ assert(t);
+ assert(IN_SET(base, 1000, 1024));
+ assert(size);
+
+ if (base == 1000) {
+ table = si;
+ n_entries = ELEMENTSOF(si);
+ } else {
+ table = iec;
+ n_entries = ELEMENTSOF(iec);
+ }
+
+ p = t;
+ do {
+ unsigned long long l, tmp;
+ double frac = 0;
+ char *e;
+ unsigned i;
+
+ p += strspn(p, WHITESPACE);
+
+ errno = 0;
+ l = strtoull(p, &e, 10);
+ if (errno > 0)
+ return -errno;
+ if (e == p)
+ return -EINVAL;
+ if (*p == '-')
+ return -ERANGE;
+
+ if (*e == '.') {
+ e++;
+
+ /* strtoull() itself would accept space/+/- */
+ if (*e >= '0' && *e <= '9') {
+ unsigned long long l2;
+ char *e2;
+
+ l2 = strtoull(e, &e2, 10);
+ if (errno > 0)
+ return -errno;
+
+ /* Ignore failure. E.g. 10.M is valid */
+ frac = l2;
+ for (; e < e2; e++)
+ frac /= 10;
+ }
+ }
+
+ e += strspn(e, WHITESPACE);
+
+ for (i = start_pos; i < n_entries; i++)
+ if (startswith(e, table[i].suffix))
+ break;
+
+ if (i >= n_entries)
+ return -EINVAL;
+
+ if (l + (frac > 0) > ULLONG_MAX / table[i].factor)
+ return -ERANGE;
+
+ tmp = l * table[i].factor + (unsigned long long) (frac * table[i].factor);
+ if (tmp > ULLONG_MAX - r)
+ return -ERANGE;
+
+ r += tmp;
+ if ((unsigned long long) (uint64_t) r != r)
+ return -ERANGE;
+
+ p = e + strlen(table[i].suffix);
+
+ start_pos = i + 1;
+
+ } while (*p);
+
+ *size = r;
+
+ return 0;
+}
+
+int parse_range(const char *t, unsigned *lower, unsigned *upper) {
+ _cleanup_free_ char *word = NULL;
+ unsigned l, u;
+ int r;
+
+ assert(lower);
+ assert(upper);
+
+ /* Extract the lower bound. */
+ r = extract_first_word(&t, &word, "-", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ r = safe_atou(word, &l);
+ if (r < 0)
+ return r;
+
+ /* Check for the upper bound and extract it if needed */
+ if (!t)
+ /* Single number with no dashes. */
+ u = l;
+ else if (!*t)
+ /* Trailing dash is an error. */
+ return -EINVAL;
+ else {
+ r = safe_atou(t, &u);
+ if (r < 0)
+ return r;
+ }
+
+ *lower = l;
+ *upper = u;
+ return 0;
+}
+
+int parse_errno(const char *t) {
+ int r, e;
+
+ assert(t);
+
+ r = errno_from_name(t);
+ if (r > 0)
+ return r;
+
+ r = safe_atoi(t, &e);
+ if (r < 0)
+ return r;
+
+ /* 0 is also allowed here */
+ if (!errno_is_valid(e) && e != 0)
+ return -ERANGE;
+
+ return e;
+}
+
+#if HAVE_SECCOMP
+int parse_syscall_and_errno(const char *in, char **name, int *error) {
+ _cleanup_free_ char *n = NULL;
+ char *p;
+ int e = -1;
+
+ assert(in);
+ assert(name);
+ assert(error);
+
+ /*
+ * This parse "syscall:errno" like "uname:EILSEQ", "@sync:255".
+ * If errno is omitted, then error is set to -1.
+ * Empty syscall name is not allowed.
+ * Here, we do not check that the syscall name is valid or not.
+ */
+
+ p = strchr(in, ':');
+ if (p) {
+ e = seccomp_parse_errno_or_action(p + 1);
+ if (e < 0)
+ return e;
+
+ n = strndup(in, p - in);
+ } else
+ n = strdup(in);
+
+ if (!n)
+ return -ENOMEM;
+
+ if (isempty(n))
+ return -EINVAL;
+
+ *error = e;
+ *name = TAKE_PTR(n);
+
+ return 0;
+}
+#endif
+
+static const char *mangle_base(const char *s, unsigned *base) {
+ const char *k;
+
+ assert(s);
+ assert(base);
+
+ /* Base already explicitly specified, then don't do anything. */
+ if (SAFE_ATO_MASK_FLAGS(*base) != 0)
+ return s;
+
+ /* Support Python 3 style "0b" and 0x" prefixes, because they truly make sense, much more than C's "0" prefix for octal. */
+ k = STARTSWITH_SET(s, "0b", "0B");
+ if (k) {
+ *base = 2 | (*base & SAFE_ATO_ALL_FLAGS);
+ return k;
+ }
+
+ k = STARTSWITH_SET(s, "0o", "0O");
+ if (k) {
+ *base = 8 | (*base & SAFE_ATO_ALL_FLAGS);
+ return k;
+ }
+
+ return s;
+}
+
+int safe_atou_full(const char *s, unsigned base, unsigned *ret_u) {
+ char *x = NULL;
+ unsigned long l;
+
+ assert(s);
+ assert(SAFE_ATO_MASK_FLAGS(base) <= 16);
+
+ /* strtoul() is happy to parse negative values, and silently converts them to unsigned values without
+ * generating an error. We want a clean error, hence let's look for the "-" prefix on our own, and
+ * generate an error. But let's do so only after strtoul() validated that the string is clean
+ * otherwise, so that we return EINVAL preferably over ERANGE. */
+
+ if (FLAGS_SET(base, SAFE_ATO_REFUSE_LEADING_WHITESPACE) &&
+ strchr(WHITESPACE, s[0]))
+ return -EINVAL;
+
+ s += strspn(s, WHITESPACE);
+
+ if (FLAGS_SET(base, SAFE_ATO_REFUSE_PLUS_MINUS) &&
+ IN_SET(s[0], '+', '-'))
+ return -EINVAL; /* Note that we check the "-" prefix again a second time below, but return a
+ * different error. I.e. if the SAFE_ATO_REFUSE_PLUS_MINUS flag is set we
+ * blanket refuse +/- prefixed integers, while if it is missing we'll just
+ * return ERANGE, because the string actually parses correctly, but doesn't
+ * fit in the return type. */
+
+ if (FLAGS_SET(base, SAFE_ATO_REFUSE_LEADING_ZERO) &&
+ s[0] == '0' && !streq(s, "0"))
+ return -EINVAL; /* This is particularly useful to avoid ambiguities between C's octal
+ * notation and assumed-to-be-decimal integers with a leading zero. */
+
+ s = mangle_base(s, &base);
+
+ errno = 0;
+ l = strtoul(s, &x, SAFE_ATO_MASK_FLAGS(base) /* Let's mask off the flags bits so that only the actual
+ * base is left */);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if (l != 0 && s[0] == '-')
+ return -ERANGE;
+ if ((unsigned long) (unsigned) l != l)
+ return -ERANGE;
+
+ if (ret_u)
+ *ret_u = (unsigned) l;
+
+ return 0;
+}
+
+int safe_atoi(const char *s, int *ret_i) {
+ unsigned base = 0;
+ char *x = NULL;
+ long l;
+
+ assert(s);
+
+ s += strspn(s, WHITESPACE);
+ s = mangle_base(s, &base);
+
+ errno = 0;
+ l = strtol(s, &x, base);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if ((long) (int) l != l)
+ return -ERANGE;
+
+ if (ret_i)
+ *ret_i = (int) l;
+
+ return 0;
+}
+
+int safe_atollu_full(const char *s, unsigned base, long long unsigned *ret_llu) {
+ char *x = NULL;
+ unsigned long long l;
+
+ assert(s);
+ assert(SAFE_ATO_MASK_FLAGS(base) <= 16);
+
+ if (FLAGS_SET(base, SAFE_ATO_REFUSE_LEADING_WHITESPACE) &&
+ strchr(WHITESPACE, s[0]))
+ return -EINVAL;
+
+ s += strspn(s, WHITESPACE);
+
+ if (FLAGS_SET(base, SAFE_ATO_REFUSE_PLUS_MINUS) &&
+ IN_SET(s[0], '+', '-'))
+ return -EINVAL;
+
+ if (FLAGS_SET(base, SAFE_ATO_REFUSE_LEADING_ZERO) &&
+ s[0] == '0' && s[1] != 0)
+ return -EINVAL;
+
+ s = mangle_base(s, &base);
+
+ errno = 0;
+ l = strtoull(s, &x, SAFE_ATO_MASK_FLAGS(base));
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if (l != 0 && s[0] == '-')
+ return -ERANGE;
+
+ if (ret_llu)
+ *ret_llu = l;
+
+ return 0;
+}
+
+int safe_atolli(const char *s, long long int *ret_lli) {
+ unsigned base = 0;
+ char *x = NULL;
+ long long l;
+
+ assert(s);
+
+ s += strspn(s, WHITESPACE);
+ s = mangle_base(s, &base);
+
+ errno = 0;
+ l = strtoll(s, &x, base);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+
+ if (ret_lli)
+ *ret_lli = l;
+
+ return 0;
+}
+
+int safe_atou8(const char *s, uint8_t *ret) {
+ unsigned base = 0;
+ unsigned long l;
+ char *x = NULL;
+
+ assert(s);
+
+ s += strspn(s, WHITESPACE);
+ s = mangle_base(s, &base);
+
+ errno = 0;
+ l = strtoul(s, &x, base);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if (l != 0 && s[0] == '-')
+ return -ERANGE;
+ if ((unsigned long) (uint8_t) l != l)
+ return -ERANGE;
+
+ if (ret)
+ *ret = (uint8_t) l;
+ return 0;
+}
+
+int safe_atou16_full(const char *s, unsigned base, uint16_t *ret) {
+ char *x = NULL;
+ unsigned long l;
+
+ assert(s);
+ assert(SAFE_ATO_MASK_FLAGS(base) <= 16);
+
+ if (FLAGS_SET(base, SAFE_ATO_REFUSE_LEADING_WHITESPACE) &&
+ strchr(WHITESPACE, s[0]))
+ return -EINVAL;
+
+ s += strspn(s, WHITESPACE);
+
+ if (FLAGS_SET(base, SAFE_ATO_REFUSE_PLUS_MINUS) &&
+ IN_SET(s[0], '+', '-'))
+ return -EINVAL;
+
+ if (FLAGS_SET(base, SAFE_ATO_REFUSE_LEADING_ZERO) &&
+ s[0] == '0' && s[1] != 0)
+ return -EINVAL;
+
+ s = mangle_base(s, &base);
+
+ errno = 0;
+ l = strtoul(s, &x, SAFE_ATO_MASK_FLAGS(base));
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if (l != 0 && s[0] == '-')
+ return -ERANGE;
+ if ((unsigned long) (uint16_t) l != l)
+ return -ERANGE;
+
+ if (ret)
+ *ret = (uint16_t) l;
+
+ return 0;
+}
+
+int safe_atoi16(const char *s, int16_t *ret) {
+ unsigned base = 0;
+ char *x = NULL;
+ long l;
+
+ assert(s);
+
+ s += strspn(s, WHITESPACE);
+ s = mangle_base(s, &base);
+
+ errno = 0;
+ l = strtol(s, &x, base);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+ if ((long) (int16_t) l != l)
+ return -ERANGE;
+
+ if (ret)
+ *ret = (int16_t) l;
+
+ return 0;
+}
+
+int safe_atod(const char *s, double *ret_d) {
+ _cleanup_(freelocalep) locale_t loc = (locale_t) 0;
+ char *x = NULL;
+ double d = 0;
+
+ assert(s);
+
+ loc = newlocale(LC_NUMERIC_MASK, "C", (locale_t) 0);
+ if (loc == (locale_t) 0)
+ return -errno;
+
+ errno = 0;
+ d = strtod_l(s, &x, loc);
+ if (errno > 0)
+ return -errno;
+ if (!x || x == s || *x != 0)
+ return -EINVAL;
+
+ if (ret_d)
+ *ret_d = (double) d;
+
+ return 0;
+}
+
+int parse_fractional_part_u(const char **p, size_t digits, unsigned *res) {
+ size_t i;
+ unsigned val = 0;
+ const char *s;
+
+ s = *p;
+
+ /* accept any number of digits, strtoull is limited to 19 */
+ for (i=0; i < digits; i++,s++) {
+ if (*s < '0' || *s > '9') {
+ if (i == 0)
+ return -EINVAL;
+
+ /* too few digits, pad with 0 */
+ for (; i < digits; i++)
+ val *= 10;
+
+ break;
+ }
+
+ val *= 10;
+ val += *s - '0';
+ }
+
+ /* maybe round up */
+ if (*s >= '5' && *s <= '9')
+ val++;
+
+ s += strspn(s, DIGITS);
+
+ *p = s;
+ *res = val;
+
+ return 0;
+}
+
+int parse_percent_unbounded(const char *p) {
+ const char *pc, *n;
+ int r, v;
+
+ pc = endswith(p, "%");
+ if (!pc)
+ return -EINVAL;
+
+ n = strndupa(p, pc - p);
+ r = safe_atoi(n, &v);
+ if (r < 0)
+ return r;
+ if (v < 0)
+ return -ERANGE;
+
+ return v;
+}
+
+int parse_percent(const char *p) {
+ int v;
+
+ v = parse_percent_unbounded(p);
+ if (v > 100)
+ return -ERANGE;
+
+ return v;
+}
+
+int parse_permille_unbounded(const char *p) {
+ const char *pc, *pm, *dot, *n;
+ int r, q, v;
+
+ pm = endswith(p, "‰");
+ if (pm) {
+ n = strndupa(p, pm - p);
+ r = safe_atoi(n, &v);
+ if (r < 0)
+ return r;
+ if (v < 0)
+ return -ERANGE;
+ } else {
+ pc = endswith(p, "%");
+ if (!pc)
+ return -EINVAL;
+
+ dot = memchr(p, '.', pc - p);
+ if (dot) {
+ if (dot + 2 != pc)
+ return -EINVAL;
+ if (dot[1] < '0' || dot[1] > '9')
+ return -EINVAL;
+ q = dot[1] - '0';
+ n = strndupa(p, dot - p);
+ } else {
+ q = 0;
+ n = strndupa(p, pc - p);
+ }
+ r = safe_atoi(n, &v);
+ if (r < 0)
+ return r;
+ if (v < 0)
+ return -ERANGE;
+ if (v > (INT_MAX - q) / 10)
+ return -ERANGE;
+
+ v = v * 10 + q;
+ }
+
+ return v;
+}
+
+int parse_permille(const char *p) {
+ int v;
+
+ v = parse_permille_unbounded(p);
+ if (v > 1000)
+ return -ERANGE;
+
+ return v;
+}
+
+int parse_nice(const char *p, int *ret) {
+ int n, r;
+
+ r = safe_atoi(p, &n);
+ if (r < 0)
+ return r;
+
+ if (!nice_is_valid(n))
+ return -ERANGE;
+
+ *ret = n;
+ return 0;
+}
+
+int parse_ip_port(const char *s, uint16_t *ret) {
+ uint16_t l;
+ int r;
+
+ r = safe_atou16(s, &l);
+ if (r < 0)
+ return r;
+
+ if (l == 0)
+ return -EINVAL;
+
+ *ret = (uint16_t) l;
+
+ return 0;
+}
+
+int parse_ip_port_range(const char *s, uint16_t *low, uint16_t *high) {
+ unsigned l, h;
+ int r;
+
+ r = parse_range(s, &l, &h);
+ if (r < 0)
+ return r;
+
+ if (l <= 0 || l > 65535 || h <= 0 || h > 65535)
+ return -EINVAL;
+
+ if (h < l)
+ return -EINVAL;
+
+ *low = l;
+ *high = h;
+
+ return 0;
+}
+
+int parse_ip_prefix_length(const char *s, int *ret) {
+ unsigned l;
+ int r;
+
+ r = safe_atou(s, &l);
+ if (r < 0)
+ return r;
+
+ if (l > 128)
+ return -ERANGE;
+
+ *ret = (int) l;
+
+ return 0;
+}
+
+int parse_dev(const char *s, dev_t *ret) {
+ const char *major;
+ unsigned x, y;
+ size_t n;
+ int r;
+
+ n = strspn(s, DIGITS);
+ if (n == 0)
+ return -EINVAL;
+ if (s[n] != ':')
+ return -EINVAL;
+
+ major = strndupa(s, n);
+ r = safe_atou(major, &x);
+ if (r < 0)
+ return r;
+
+ r = safe_atou(s + n + 1, &y);
+ if (r < 0)
+ return r;
+
+ if (!DEVICE_MAJOR_VALID(x) || !DEVICE_MINOR_VALID(y))
+ return -ERANGE;
+
+ *ret = makedev(x, y);
+ return 0;
+}
+
+int parse_oom_score_adjust(const char *s, int *ret) {
+ int r, v;
+
+ assert(s);
+ assert(ret);
+
+ r = safe_atoi(s, &v);
+ if (r < 0)
+ return r;
+
+ if (v < OOM_SCORE_ADJ_MIN || v > OOM_SCORE_ADJ_MAX)
+ return -ERANGE;
+
+ *ret = v;
+ return 0;
+}
+
+int store_loadavg_fixed_point(unsigned long i, unsigned long f, loadavg_t *ret) {
+ assert(ret);
+
+ if (i >= (~0UL << FSHIFT))
+ return -ERANGE;
+
+ i = i << FSHIFT;
+ f = DIV_ROUND_UP((f << FSHIFT), 100);
+
+ if (f >= FIXED_1)
+ return -ERANGE;
+
+ *ret = i | f;
+ return 0;
+}
+
+int parse_loadavg_fixed_point(const char *s, loadavg_t *ret) {
+ const char *d, *f_str, *i_str;
+ unsigned long i, f;
+ int r;
+
+ assert(s);
+ assert(ret);
+
+ d = strchr(s, '.');
+ if (!d)
+ return -EINVAL;
+
+ i_str = strndupa(s, d - s);
+ f_str = d + 1;
+
+ r = safe_atolu_full(i_str, 10, &i);
+ if (r < 0)
+ return r;
+
+ r = safe_atolu_full(f_str, 10, &f);
+ if (r < 0)
+ return r;
+
+ return store_loadavg_fixed_point(i, f, ret);
+}
diff --git a/src/basic/parse-util.h b/src/basic/parse-util.h
new file mode 100644
index 0000000..81478ed
--- /dev/null
+++ b/src/basic/parse-util.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <limits.h>
+#include <linux/loadavg.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+typedef unsigned long loadavg_t;
+
+int parse_boolean(const char *v) _pure_;
+int parse_dev(const char *s, dev_t *ret);
+int parse_pid(const char *s, pid_t* ret_pid);
+int parse_mode(const char *s, mode_t *ret);
+int parse_ifindex(const char *s);
+int parse_mtu(int family, const char *s, uint32_t *ret);
+
+int parse_size(const char *t, uint64_t base, uint64_t *size);
+int parse_range(const char *t, unsigned *lower, unsigned *upper);
+int parse_errno(const char *t);
+#if HAVE_SECCOMP
+int parse_syscall_and_errno(const char *in, char **name, int *error);
+#endif
+
+#define SAFE_ATO_REFUSE_PLUS_MINUS (1U << 30)
+#define SAFE_ATO_REFUSE_LEADING_ZERO (1U << 29)
+#define SAFE_ATO_REFUSE_LEADING_WHITESPACE (1U << 28)
+#define SAFE_ATO_ALL_FLAGS (SAFE_ATO_REFUSE_PLUS_MINUS|SAFE_ATO_REFUSE_LEADING_ZERO|SAFE_ATO_REFUSE_LEADING_WHITESPACE)
+#define SAFE_ATO_MASK_FLAGS(base) ((base) & ~SAFE_ATO_ALL_FLAGS)
+
+int safe_atou_full(const char *s, unsigned base, unsigned *ret_u);
+
+static inline int safe_atou(const char *s, unsigned *ret_u) {
+ return safe_atou_full(s, 0, ret_u);
+}
+
+int safe_atoi(const char *s, int *ret_i);
+int safe_atolli(const char *s, long long int *ret_i);
+
+int safe_atou8(const char *s, uint8_t *ret);
+
+int safe_atou16_full(const char *s, unsigned base, uint16_t *ret);
+
+static inline int safe_atou16(const char *s, uint16_t *ret) {
+ return safe_atou16_full(s, 0, ret);
+}
+
+static inline int safe_atoux16(const char *s, uint16_t *ret) {
+ return safe_atou16_full(s, 16, ret);
+}
+
+int safe_atoi16(const char *s, int16_t *ret);
+
+static inline int safe_atou32_full(const char *s, unsigned base, uint32_t *ret_u) {
+ assert_cc(sizeof(uint32_t) == sizeof(unsigned));
+ return safe_atou_full(s, base, (unsigned*) ret_u);
+}
+
+static inline int safe_atou32(const char *s, uint32_t *ret_u) {
+ return safe_atou32_full(s, 0, (unsigned*) ret_u);
+}
+
+static inline int safe_atoi32(const char *s, int32_t *ret_i) {
+ assert_cc(sizeof(int32_t) == sizeof(int));
+ return safe_atoi(s, (int*) ret_i);
+}
+
+int safe_atollu_full(const char *s, unsigned base, long long unsigned *ret_llu);
+
+static inline int safe_atollu(const char *s, long long unsigned *ret_llu) {
+ return safe_atollu_full(s, 0, ret_llu);
+}
+
+static inline int safe_atou64(const char *s, uint64_t *ret_u) {
+ assert_cc(sizeof(uint64_t) == sizeof(unsigned long long));
+ return safe_atollu(s, (unsigned long long*) ret_u);
+}
+
+static inline int safe_atoi64(const char *s, int64_t *ret_i) {
+ assert_cc(sizeof(int64_t) == sizeof(long long int));
+ return safe_atolli(s, (long long int*) ret_i);
+}
+
+static inline int safe_atoux64(const char *s, uint64_t *ret) {
+ assert_cc(sizeof(int64_t) == sizeof(long long unsigned));
+ return safe_atollu_full(s, 16, (long long unsigned*) ret);
+}
+
+#if LONG_MAX == INT_MAX
+static inline int safe_atolu_full(const char *s, unsigned base, long unsigned *ret_u) {
+ assert_cc(sizeof(unsigned long) == sizeof(unsigned));
+ return safe_atou_full(s, base, (unsigned*) ret_u);
+}
+static inline int safe_atoli(const char *s, long int *ret_u) {
+ assert_cc(sizeof(long int) == sizeof(int));
+ return safe_atoi(s, (int*) ret_u);
+}
+#else
+static inline int safe_atolu_full(const char *s, unsigned base, unsigned long *ret_u) {
+ assert_cc(sizeof(unsigned long) == sizeof(unsigned long long));
+ return safe_atollu_full(s, base, (unsigned long long*) ret_u);
+}
+static inline int safe_atoli(const char *s, long int *ret_u) {
+ assert_cc(sizeof(long int) == sizeof(long long int));
+ return safe_atolli(s, (long long int*) ret_u);
+}
+#endif
+
+static inline int safe_atolu(const char *s, unsigned long *ret_u) {
+ return safe_atolu_full(s, 0, ret_u);
+}
+
+#if SIZE_MAX == UINT_MAX
+static inline int safe_atozu(const char *s, size_t *ret_u) {
+ assert_cc(sizeof(size_t) == sizeof(unsigned));
+ return safe_atou(s, (unsigned *) ret_u);
+}
+#else
+static inline int safe_atozu(const char *s, size_t *ret_u) {
+ assert_cc(sizeof(size_t) == sizeof(long unsigned));
+ return safe_atolu(s, ret_u);
+}
+#endif
+
+int safe_atod(const char *s, double *ret_d);
+
+int parse_fractional_part_u(const char **s, size_t digits, unsigned *res);
+
+int parse_percent_unbounded(const char *p);
+int parse_percent(const char *p);
+
+int parse_permille_unbounded(const char *p);
+int parse_permille(const char *p);
+
+int parse_nice(const char *p, int *ret);
+
+int parse_ip_port(const char *s, uint16_t *ret);
+int parse_ip_port_range(const char *s, uint16_t *low, uint16_t *high);
+
+int parse_ip_prefix_length(const char *s, int *ret);
+
+int parse_oom_score_adjust(const char *s, int *ret);
+
+/* Given a Linux load average (e.g. decimal number 34.89 where 34 is passed as i and 89 is passed as f), convert it
+ * to a loadavg_t. */
+int store_loadavg_fixed_point(unsigned long i, unsigned long f, loadavg_t *ret);
+int parse_loadavg_fixed_point(const char *s, loadavg_t *ret);
diff --git a/src/basic/path-lookup.c b/src/basic/path-lookup.c
new file mode 100644
index 0000000..96b8217
--- /dev/null
+++ b/src/basic/path-lookup.c
@@ -0,0 +1,869 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "path-lookup.h"
+#include "path-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+
+int xdg_user_runtime_dir(char **ret, const char *suffix) {
+ const char *e;
+ char *j;
+
+ assert(ret);
+ assert(suffix);
+
+ e = getenv("XDG_RUNTIME_DIR");
+ if (!e)
+ return -ENXIO;
+
+ j = path_join(e, suffix);
+ if (!j)
+ return -ENOMEM;
+
+ *ret = j;
+ return 0;
+}
+
+int xdg_user_config_dir(char **ret, const char *suffix) {
+ const char *e;
+ char *j;
+ int r;
+
+ assert(ret);
+
+ e = getenv("XDG_CONFIG_HOME");
+ if (e)
+ j = path_join(e, suffix);
+ else {
+ _cleanup_free_ char *home = NULL;
+
+ r = get_home_dir(&home);
+ if (r < 0)
+ return r;
+
+ j = path_join(home, "/.config", suffix);
+ }
+
+ if (!j)
+ return -ENOMEM;
+
+ *ret = j;
+ return 0;
+}
+
+int xdg_user_data_dir(char **ret, const char *suffix) {
+ const char *e;
+ char *j;
+ int r;
+
+ assert(ret);
+ assert(suffix);
+
+ /* We don't treat /etc/xdg/systemd here as the spec
+ * suggests because we assume that is a link to
+ * /etc/systemd/ anyway. */
+
+ e = getenv("XDG_DATA_HOME");
+ if (e)
+ j = path_join(e, suffix);
+ else {
+ _cleanup_free_ char *home = NULL;
+
+ r = get_home_dir(&home);
+ if (r < 0)
+ return r;
+
+ j = path_join(home, "/.local/share", suffix);
+ }
+ if (!j)
+ return -ENOMEM;
+
+ *ret = j;
+ return 1;
+}
+
+static const char* const user_data_unit_paths[] = {
+ "/usr/local/lib/systemd/user",
+ "/usr/local/share/systemd/user",
+ USER_DATA_UNIT_DIR,
+ "/usr/lib/systemd/user",
+ "/usr/share/systemd/user",
+ NULL
+};
+
+static const char* const user_config_unit_paths[] = {
+ USER_CONFIG_UNIT_DIR,
+ "/etc/systemd/user",
+ NULL
+};
+
+int xdg_user_dirs(char ***ret_config_dirs, char ***ret_data_dirs) {
+ /* Implement the mechanisms defined in
+ *
+ * http://standards.freedesktop.org/basedir-spec/basedir-spec-0.6.html
+ *
+ * We look in both the config and the data dirs because we
+ * want to encourage that distributors ship their unit files
+ * as data, and allow overriding as configuration.
+ */
+ const char *e;
+ _cleanup_strv_free_ char **config_dirs = NULL, **data_dirs = NULL;
+
+ e = getenv("XDG_CONFIG_DIRS");
+ if (e)
+ config_dirs = strv_split(e, ":");
+ else
+ config_dirs = strv_new("/etc/xdg");
+ if (!config_dirs)
+ return -ENOMEM;
+
+ e = getenv("XDG_DATA_DIRS");
+ if (e)
+ data_dirs = strv_split(e, ":");
+ else
+ data_dirs = strv_new("/usr/local/share",
+ "/usr/share");
+ if (!data_dirs)
+ return -ENOMEM;
+
+ *ret_config_dirs = TAKE_PTR(config_dirs);
+ *ret_data_dirs = TAKE_PTR(data_dirs);
+
+ return 0;
+}
+
+static char** user_dirs(
+ const char *persistent_config,
+ const char *runtime_config,
+ const char *global_persistent_config,
+ const char *global_runtime_config,
+ const char *generator,
+ const char *generator_early,
+ const char *generator_late,
+ const char *transient,
+ const char *persistent_control,
+ const char *runtime_control) {
+
+ _cleanup_strv_free_ char **config_dirs = NULL, **data_dirs = NULL;
+ _cleanup_free_ char *data_home = NULL;
+ _cleanup_strv_free_ char **res = NULL;
+ int r;
+
+ r = xdg_user_dirs(&config_dirs, &data_dirs);
+ if (r < 0)
+ return NULL;
+
+ r = xdg_user_data_dir(&data_home, "/systemd/user");
+ if (r < 0 && r != -ENXIO)
+ return NULL;
+
+ /* Now merge everything we found. */
+ if (strv_extend(&res, persistent_control) < 0)
+ return NULL;
+
+ if (strv_extend(&res, runtime_control) < 0)
+ return NULL;
+
+ if (strv_extend(&res, transient) < 0)
+ return NULL;
+
+ if (strv_extend(&res, generator_early) < 0)
+ return NULL;
+
+ if (strv_extend(&res, persistent_config) < 0)
+ return NULL;
+
+ if (strv_extend_strv_concat(&res, config_dirs, "/systemd/user") < 0)
+ return NULL;
+
+ /* global config has lower priority than the user config of the same type */
+ if (strv_extend(&res, global_persistent_config) < 0)
+ return NULL;
+
+ if (strv_extend_strv(&res, (char**) user_config_unit_paths, false) < 0)
+ return NULL;
+
+ if (strv_extend(&res, runtime_config) < 0)
+ return NULL;
+
+ if (strv_extend(&res, global_runtime_config) < 0)
+ return NULL;
+
+ if (strv_extend(&res, generator) < 0)
+ return NULL;
+
+ if (strv_extend(&res, data_home) < 0)
+ return NULL;
+
+ if (strv_extend_strv_concat(&res, data_dirs, "/systemd/user") < 0)
+ return NULL;
+
+ if (strv_extend_strv(&res, (char**) user_data_unit_paths, false) < 0)
+ return NULL;
+
+ if (strv_extend(&res, generator_late) < 0)
+ return NULL;
+
+ if (path_strv_make_absolute_cwd(res) < 0)
+ return NULL;
+
+ return TAKE_PTR(res);
+}
+
+bool path_is_user_data_dir(const char *path) {
+ assert(path);
+
+ return strv_contains((char**) user_data_unit_paths, path);
+}
+
+bool path_is_user_config_dir(const char *path) {
+ assert(path);
+
+ return strv_contains((char**) user_config_unit_paths, path);
+}
+
+static int acquire_generator_dirs(
+ UnitFileScope scope,
+ const char *tempdir,
+ char **generator,
+ char **generator_early,
+ char **generator_late) {
+
+ _cleanup_free_ char *x = NULL, *y = NULL, *z = NULL;
+ const char *prefix;
+
+ assert(generator);
+ assert(generator_early);
+ assert(generator_late);
+ assert(IN_SET(scope, UNIT_FILE_SYSTEM, UNIT_FILE_USER, UNIT_FILE_GLOBAL));
+
+ if (scope == UNIT_FILE_GLOBAL)
+ return -EOPNOTSUPP;
+
+ if (tempdir)
+ prefix = tempdir;
+ else if (scope == UNIT_FILE_SYSTEM)
+ prefix = "/run/systemd";
+ else {
+ /* UNIT_FILE_USER */
+ const char *e;
+
+ e = getenv("XDG_RUNTIME_DIR");
+ if (!e)
+ return -ENXIO;
+
+ prefix = strjoina(e, "/systemd");
+ }
+
+ x = path_join(prefix, "generator");
+ if (!x)
+ return -ENOMEM;
+
+ y = path_join(prefix, "generator.early");
+ if (!y)
+ return -ENOMEM;
+
+ z = path_join(prefix, "generator.late");
+ if (!z)
+ return -ENOMEM;
+
+ *generator = TAKE_PTR(x);
+ *generator_early = TAKE_PTR(y);
+ *generator_late = TAKE_PTR(z);
+
+ return 0;
+}
+
+static int acquire_transient_dir(
+ UnitFileScope scope,
+ const char *tempdir,
+ char **ret) {
+
+ char *transient;
+
+ assert(ret);
+ assert(IN_SET(scope, UNIT_FILE_SYSTEM, UNIT_FILE_USER, UNIT_FILE_GLOBAL));
+
+ if (scope == UNIT_FILE_GLOBAL)
+ return -EOPNOTSUPP;
+
+ if (tempdir)
+ transient = path_join(tempdir, "transient");
+ else if (scope == UNIT_FILE_SYSTEM)
+ transient = strdup("/run/systemd/transient");
+ else
+ return xdg_user_runtime_dir(ret, "/systemd/transient");
+
+ if (!transient)
+ return -ENOMEM;
+ *ret = transient;
+ return 0;
+}
+
+static int acquire_config_dirs(UnitFileScope scope, char **persistent, char **runtime) {
+ _cleanup_free_ char *a = NULL, *b = NULL;
+ int r;
+
+ assert(persistent);
+ assert(runtime);
+
+ switch (scope) {
+
+ case UNIT_FILE_SYSTEM:
+ a = strdup(SYSTEM_CONFIG_UNIT_DIR);
+ b = strdup("/run/systemd/system");
+ break;
+
+ case UNIT_FILE_GLOBAL:
+ a = strdup(USER_CONFIG_UNIT_DIR);
+ b = strdup("/run/systemd/user");
+ break;
+
+ case UNIT_FILE_USER:
+ r = xdg_user_config_dir(&a, "/systemd/user");
+ if (r < 0 && r != -ENXIO)
+ return r;
+
+ r = xdg_user_runtime_dir(runtime, "/systemd/user");
+ if (r < 0) {
+ if (r != -ENXIO)
+ return r;
+
+ /* If XDG_RUNTIME_DIR is not set, don't consider that fatal, simply initialize the runtime
+ * directory to NULL */
+ *runtime = NULL;
+ }
+
+ *persistent = TAKE_PTR(a);
+
+ return 0;
+
+ default:
+ assert_not_reached("Hmm, unexpected scope value.");
+ }
+
+ if (!a || !b)
+ return -ENOMEM;
+
+ *persistent = TAKE_PTR(a);
+ *runtime = TAKE_PTR(b);
+
+ return 0;
+}
+
+static int acquire_control_dirs(UnitFileScope scope, char **persistent, char **runtime) {
+ _cleanup_free_ char *a = NULL;
+ int r;
+
+ assert(persistent);
+ assert(runtime);
+
+ switch (scope) {
+
+ case UNIT_FILE_SYSTEM: {
+ _cleanup_free_ char *b = NULL;
+
+ a = strdup("/etc/systemd/system.control");
+ if (!a)
+ return -ENOMEM;
+
+ b = strdup("/run/systemd/system.control");
+ if (!b)
+ return -ENOMEM;
+
+ *runtime = TAKE_PTR(b);
+
+ break;
+ }
+
+ case UNIT_FILE_USER:
+ r = xdg_user_config_dir(&a, "/systemd/user.control");
+ if (r < 0 && r != -ENXIO)
+ return r;
+
+ r = xdg_user_runtime_dir(runtime, "/systemd/user.control");
+ if (r < 0) {
+ if (r != -ENXIO)
+ return r;
+
+ /* If XDG_RUNTIME_DIR is not set, don't consider this fatal, simply initialize the directory to
+ * NULL */
+ *runtime = NULL;
+ }
+
+ break;
+
+ case UNIT_FILE_GLOBAL:
+ return -EOPNOTSUPP;
+
+ default:
+ assert_not_reached("Hmm, unexpected scope value.");
+ }
+
+ *persistent = TAKE_PTR(a);
+
+ return 0;
+}
+
+static int acquire_attached_dirs(
+ UnitFileScope scope,
+ char **ret_persistent,
+ char **ret_runtime) {
+
+ _cleanup_free_ char *a = NULL, *b = NULL;
+
+ assert(ret_persistent);
+ assert(ret_runtime);
+
+ /* Portable services are not available to regular users for now. */
+ if (scope != UNIT_FILE_SYSTEM)
+ return -EOPNOTSUPP;
+
+ a = strdup("/etc/systemd/system.attached");
+ if (!a)
+ return -ENOMEM;
+
+ b = strdup("/run/systemd/system.attached");
+ if (!b)
+ return -ENOMEM;
+
+ *ret_persistent = TAKE_PTR(a);
+ *ret_runtime = TAKE_PTR(b);
+
+ return 0;
+}
+
+static int patch_root_prefix(char **p, const char *root_dir) {
+ char *c;
+
+ assert(p);
+
+ if (!*p)
+ return 0;
+
+ c = path_join(root_dir, *p);
+ if (!c)
+ return -ENOMEM;
+
+ free_and_replace(*p, c);
+ return 0;
+}
+
+static int patch_root_prefix_strv(char **l, const char *root_dir) {
+ char **i;
+ int r;
+
+ if (!root_dir)
+ return 0;
+
+ STRV_FOREACH(i, l) {
+ r = patch_root_prefix(i, root_dir);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int get_paths_from_environ(const char *var, char ***paths, bool *append) {
+ const char *e;
+ int r;
+
+ assert(var);
+ assert(paths);
+ assert(append);
+
+ *append = false;
+
+ e = getenv(var);
+ if (e) {
+ const char *k;
+
+ k = endswith(e, ":");
+ if (k) {
+ e = strndupa(e, k - e);
+ *append = true;
+ }
+
+ /* FIXME: empty components in other places should be rejected. */
+
+ r = path_split_and_make_absolute(e, paths);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int lookup_paths_init(
+ LookupPaths *p,
+ UnitFileScope scope,
+ LookupPathsFlags flags,
+ const char *root_dir) {
+
+ _cleanup_(rmdir_and_freep) char *tempdir = NULL;
+ _cleanup_free_ char
+ *root = NULL,
+ *persistent_config = NULL, *runtime_config = NULL,
+ *global_persistent_config = NULL, *global_runtime_config = NULL,
+ *generator = NULL, *generator_early = NULL, *generator_late = NULL,
+ *transient = NULL,
+ *persistent_control = NULL, *runtime_control = NULL,
+ *persistent_attached = NULL, *runtime_attached = NULL;
+ bool append = false; /* Add items from SYSTEMD_UNIT_PATH before normal directories */
+ _cleanup_strv_free_ char **paths = NULL;
+ int r;
+
+ assert(p);
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+#if HAVE_SPLIT_USR
+ flags |= LOOKUP_PATHS_SPLIT_USR;
+#endif
+
+ if (!empty_or_root(root_dir)) {
+ if (scope == UNIT_FILE_USER)
+ return -EINVAL;
+
+ r = is_dir(root_dir, true);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENOTDIR;
+
+ root = strdup(root_dir);
+ if (!root)
+ return -ENOMEM;
+ }
+
+ if (flags & LOOKUP_PATHS_TEMPORARY_GENERATED) {
+ r = mkdtemp_malloc("/tmp/systemd-temporary-XXXXXX", &tempdir);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create temporary directory: %m");
+ }
+
+ /* Note: when XDG_RUNTIME_DIR is not set this will not return -ENXIO, but simply set runtime_config to NULL */
+ r = acquire_config_dirs(scope, &persistent_config, &runtime_config);
+ if (r < 0)
+ return r;
+
+ if (scope == UNIT_FILE_USER) {
+ r = acquire_config_dirs(UNIT_FILE_GLOBAL, &global_persistent_config, &global_runtime_config);
+ if (r < 0)
+ return r;
+ }
+
+ if ((flags & LOOKUP_PATHS_EXCLUDE_GENERATED) == 0) {
+ /* Note: if XDG_RUNTIME_DIR is not set, this will fail completely with ENXIO */
+ r = acquire_generator_dirs(scope, tempdir,
+ &generator, &generator_early, &generator_late);
+ if (r < 0 && !IN_SET(r, -EOPNOTSUPP, -ENXIO))
+ return r;
+ }
+
+ /* Note: if XDG_RUNTIME_DIR is not set, this will fail completely with ENXIO */
+ r = acquire_transient_dir(scope, tempdir, &transient);
+ if (r < 0 && !IN_SET(r, -EOPNOTSUPP, -ENXIO))
+ return r;
+
+ /* Note: when XDG_RUNTIME_DIR is not set this will not return -ENXIO, but simply set runtime_control to NULL */
+ r = acquire_control_dirs(scope, &persistent_control, &runtime_control);
+ if (r < 0 && r != -EOPNOTSUPP)
+ return r;
+
+ r = acquire_attached_dirs(scope, &persistent_attached, &runtime_attached);
+ if (r < 0 && r != -EOPNOTSUPP)
+ return r;
+
+ /* First priority is whatever has been passed to us via env vars */
+ r = get_paths_from_environ("SYSTEMD_UNIT_PATH", &paths, &append);
+ if (r < 0)
+ return r;
+
+ if (!paths || append) {
+ /* Let's figure something out. */
+
+ _cleanup_strv_free_ char **add = NULL;
+
+ /* For the user units we include share/ in the search
+ * path in order to comply with the XDG basedir spec.
+ * For the system stuff we avoid such nonsense. OTOH
+ * we include /lib in the search path for the system
+ * stuff but avoid it for user stuff. */
+
+ switch (scope) {
+
+ case UNIT_FILE_SYSTEM:
+ add = strv_new(
+ /* If you modify this you also want to modify
+ * systemdsystemunitpath= in systemd.pc.in! */
+ STRV_IFNOTNULL(persistent_control),
+ STRV_IFNOTNULL(runtime_control),
+ STRV_IFNOTNULL(transient),
+ STRV_IFNOTNULL(generator_early),
+ persistent_config,
+ SYSTEM_CONFIG_UNIT_DIR,
+ "/etc/systemd/system",
+ STRV_IFNOTNULL(persistent_attached),
+ runtime_config,
+ "/run/systemd/system",
+ STRV_IFNOTNULL(runtime_attached),
+ STRV_IFNOTNULL(generator),
+ "/usr/local/lib/systemd/system",
+ SYSTEM_DATA_UNIT_PATH,
+ "/usr/lib/systemd/system",
+ STRV_IFNOTNULL(flags & LOOKUP_PATHS_SPLIT_USR ? "/lib/systemd/system" : NULL),
+ STRV_IFNOTNULL(generator_late));
+ break;
+
+ case UNIT_FILE_GLOBAL:
+ add = strv_new(
+ /* If you modify this you also want to modify
+ * systemduserunitpath= in systemd.pc.in, and
+ * the arrays in user_dirs() above! */
+ STRV_IFNOTNULL(persistent_control),
+ STRV_IFNOTNULL(runtime_control),
+ STRV_IFNOTNULL(transient),
+ STRV_IFNOTNULL(generator_early),
+ persistent_config,
+ USER_CONFIG_UNIT_DIR,
+ "/etc/systemd/user",
+ runtime_config,
+ "/run/systemd/user",
+ STRV_IFNOTNULL(generator),
+ "/usr/local/share/systemd/user",
+ "/usr/share/systemd/user",
+ "/usr/local/lib/systemd/user",
+ USER_DATA_UNIT_DIR,
+ "/usr/lib/systemd/user",
+ STRV_IFNOTNULL(generator_late));
+ break;
+
+ case UNIT_FILE_USER:
+ add = user_dirs(persistent_config, runtime_config,
+ global_persistent_config, global_runtime_config,
+ generator, generator_early, generator_late,
+ transient,
+ persistent_control, runtime_control);
+ break;
+
+ default:
+ assert_not_reached("Hmm, unexpected scope?");
+ }
+
+ if (!add)
+ return -ENOMEM;
+
+ if (paths) {
+ r = strv_extend_strv(&paths, add, true);
+ if (r < 0)
+ return r;
+ } else
+ /* Small optimization: if paths is NULL (and it usually is), we can simply assign 'add' to it,
+ * and don't have to copy anything */
+ paths = TAKE_PTR(add);
+ }
+
+ r = patch_root_prefix(&persistent_config, root);
+ if (r < 0)
+ return r;
+ r = patch_root_prefix(&runtime_config, root);
+ if (r < 0)
+ return r;
+
+ r = patch_root_prefix(&generator, root);
+ if (r < 0)
+ return r;
+ r = patch_root_prefix(&generator_early, root);
+ if (r < 0)
+ return r;
+ r = patch_root_prefix(&generator_late, root);
+ if (r < 0)
+ return r;
+
+ r = patch_root_prefix(&transient, root);
+ if (r < 0)
+ return r;
+
+ r = patch_root_prefix(&persistent_control, root);
+ if (r < 0)
+ return r;
+ r = patch_root_prefix(&runtime_control, root);
+ if (r < 0)
+ return r;
+
+ r = patch_root_prefix(&persistent_attached, root);
+ if (r < 0)
+ return r;
+ r = patch_root_prefix(&runtime_attached, root);
+ if (r < 0)
+ return r;
+
+ r = patch_root_prefix_strv(paths, root);
+ if (r < 0)
+ return -ENOMEM;
+
+ *p = (LookupPaths) {
+ .search_path = strv_uniq(TAKE_PTR(paths)),
+
+ .persistent_config = TAKE_PTR(persistent_config),
+ .runtime_config = TAKE_PTR(runtime_config),
+
+ .generator = TAKE_PTR(generator),
+ .generator_early = TAKE_PTR(generator_early),
+ .generator_late = TAKE_PTR(generator_late),
+
+ .transient = TAKE_PTR(transient),
+
+ .persistent_control = TAKE_PTR(persistent_control),
+ .runtime_control = TAKE_PTR(runtime_control),
+
+ .persistent_attached = TAKE_PTR(persistent_attached),
+ .runtime_attached = TAKE_PTR(runtime_attached),
+
+ .root_dir = TAKE_PTR(root),
+ .temporary_dir = TAKE_PTR(tempdir),
+ };
+
+ return 0;
+}
+
+void lookup_paths_free(LookupPaths *p) {
+ if (!p)
+ return;
+
+ p->search_path = strv_free(p->search_path);
+
+ p->persistent_config = mfree(p->persistent_config);
+ p->runtime_config = mfree(p->runtime_config);
+
+ p->persistent_attached = mfree(p->persistent_attached);
+ p->runtime_attached = mfree(p->runtime_attached);
+
+ p->generator = mfree(p->generator);
+ p->generator_early = mfree(p->generator_early);
+ p->generator_late = mfree(p->generator_late);
+
+ p->transient = mfree(p->transient);
+
+ p->persistent_control = mfree(p->persistent_control);
+ p->runtime_control = mfree(p->runtime_control);
+
+ p->root_dir = mfree(p->root_dir);
+ p->temporary_dir = mfree(p->temporary_dir);
+}
+
+void lookup_paths_log(LookupPaths *p) {
+ assert(p);
+
+ if (strv_isempty(p->search_path)) {
+ log_debug("Ignoring unit files.");
+ p->search_path = strv_free(p->search_path);
+ } else {
+ _cleanup_free_ char *t;
+
+ t = strv_join(p->search_path, "\n\t");
+ log_debug("Looking for unit files in (higher priority first):\n\t%s", strna(t));
+ }
+}
+
+char **generator_binary_paths(UnitFileScope scope) {
+ bool append = false; /* Add items from SYSTEMD_GENERATOR_PATH before normal directories */
+ _cleanup_strv_free_ char **paths = NULL;
+ int r;
+
+ /* First priority is whatever has been passed to us via env vars */
+ r = get_paths_from_environ("SYSTEMD_GENERATOR_PATH", &paths, &append);
+ if (r < 0)
+ return NULL;
+
+ if (!paths || append) {
+ _cleanup_strv_free_ char **add = NULL;
+
+ switch (scope) {
+
+ case UNIT_FILE_SYSTEM:
+ add = strv_new("/run/systemd/system-generators",
+ "/etc/systemd/system-generators",
+ "/usr/local/lib/systemd/system-generators",
+ SYSTEM_GENERATOR_DIR);
+ break;
+
+ case UNIT_FILE_GLOBAL:
+ case UNIT_FILE_USER:
+ add = strv_new("/run/systemd/user-generators",
+ "/etc/systemd/user-generators",
+ "/usr/local/lib/systemd/user-generators",
+ USER_GENERATOR_DIR);
+ break;
+
+ default:
+ assert_not_reached("Hmm, unexpected scope.");
+ }
+
+ if (!add)
+ return NULL;
+
+ if (paths) {
+ r = strv_extend_strv(&paths, add, true);
+ if (r < 0)
+ return NULL;
+ } else
+ /* Small optimization: if paths is NULL (and it usually is), we can simply assign 'add' to it,
+ * and don't have to copy anything */
+ paths = TAKE_PTR(add);
+ }
+
+ return TAKE_PTR(paths);
+}
+
+char **env_generator_binary_paths(bool is_system) {
+ bool append = false; /* Add items from SYSTEMD_ENVIRONMENT_GENERATOR_PATH before normal directories */
+ _cleanup_strv_free_ char **paths = NULL;
+ _cleanup_strv_free_ char **add = NULL;
+ int r;
+
+ /* First priority is whatever has been passed to us via env vars */
+ r = get_paths_from_environ("SYSTEMD_ENVIRONMENT_GENERATOR_PATH", &paths, &append);
+ if (r < 0)
+ return NULL;
+
+ if (!paths || append) {
+ if (is_system)
+ add = strv_new("/run/systemd/system-environment-generators",
+ "/etc/systemd/system-environment-generators",
+ "/usr/local/lib/systemd/system-environment-generators",
+ SYSTEM_ENV_GENERATOR_DIR);
+ else
+ add = strv_new("/run/systemd/user-environment-generators",
+ "/etc/systemd/user-environment-generators",
+ "/usr/local/lib/systemd/user-environment-generators",
+ USER_ENV_GENERATOR_DIR);
+
+ if (!add)
+ return NULL;
+ }
+
+ if (paths) {
+ r = strv_extend_strv(&paths, add, true);
+ if (r < 0)
+ return NULL;
+ } else
+ /* Small optimization: if paths is NULL (and it usually is), we can simply assign 'add' to it,
+ * and don't have to copy anything */
+ paths = TAKE_PTR(add);
+
+ return TAKE_PTR(paths);
+}
diff --git a/src/basic/path-lookup.h b/src/basic/path-lookup.h
new file mode 100644
index 0000000..088bb9b
--- /dev/null
+++ b/src/basic/path-lookup.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+typedef struct LookupPaths LookupPaths;
+
+#include "def.h"
+#include "unit-file.h"
+#include "macro.h"
+
+typedef enum LookupPathsFlags {
+ LOOKUP_PATHS_EXCLUDE_GENERATED = 1 << 0,
+ LOOKUP_PATHS_TEMPORARY_GENERATED = 1 << 1,
+ LOOKUP_PATHS_SPLIT_USR = 1 << 2,
+} LookupPathsFlags;
+
+struct LookupPaths {
+ /* Where we look for unit files. This includes the individual special paths below, but also any vendor
+ * supplied, static unit file paths. */
+ char **search_path;
+
+ /* Where we shall create or remove our installation symlinks, aka "configuration", and where the user/admin
+ * shall place their own unit files. */
+ char *persistent_config;
+ char *runtime_config;
+
+ /* Where units from a portable service image shall be placed. */
+ char *persistent_attached;
+ char *runtime_attached;
+
+ /* Where to place generated unit files (i.e. those a "generator" tool generated). Note the special semantics of
+ * this directory: the generators are flushed each time a "systemctl daemon-reload" is issued. The user should
+ * not alter these directories directly. */
+ char *generator;
+ char *generator_early;
+ char *generator_late;
+
+ /* Where to place transient unit files (i.e. those created dynamically via the bus API). Note the special
+ * semantics of this directory: all units created transiently have their unit files removed as the transient
+ * unit is unloaded. The user should not alter this directory directly. */
+ char *transient;
+
+ /* Where the snippets created by "systemctl set-property" are placed. Note that for transient units, the
+ * snippets are placed in the transient directory though (see above). The user should not alter this directory
+ * directly. */
+ char *persistent_control;
+ char *runtime_control;
+
+ /* The root directory prepended to all items above, or NULL */
+ char *root_dir;
+
+ /* A temporary directory when running in test mode, to be nuked */
+ char *temporary_dir;
+};
+
+int lookup_paths_init(LookupPaths *p, UnitFileScope scope, LookupPathsFlags flags, const char *root_dir);
+
+int xdg_user_dirs(char ***ret_config_dirs, char ***ret_data_dirs);
+int xdg_user_runtime_dir(char **ret, const char *suffix);
+int xdg_user_config_dir(char **ret, const char *suffix);
+int xdg_user_data_dir(char **ret, const char *suffix);
+
+bool path_is_user_data_dir(const char *path);
+bool path_is_user_config_dir(const char *path);
+
+void lookup_paths_log(LookupPaths *p);
+void lookup_paths_free(LookupPaths *p);
+
+char **generator_binary_paths(UnitFileScope scope);
+char **env_generator_binary_paths(bool is_system);
+
+#define NETWORK_DIRS ((const char* const*) CONF_PATHS_STRV("systemd/network"))
+#define NETWORK_DIRS_NULSTR CONF_PATHS_NULSTR("systemd/network")
diff --git a/src/basic/path-util.c b/src/basic/path-util.c
new file mode 100644
index 0000000..794599a
--- /dev/null
+++ b/src/basic/path-util.c
@@ -0,0 +1,1138 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+/* When we include libgen.h because we need dirname() we immediately
+ * undefine basename() since libgen.h defines it as a macro to the
+ * POSIX version which is really broken. We prefer GNU basename(). */
+#include <libgen.h>
+#undef basename
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "glob-util.h"
+#include "log.h"
+#include "macro.h"
+#include "nulstr-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "utf8.h"
+
+int path_split_and_make_absolute(const char *p, char ***ret) {
+ char **l;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ l = strv_split(p, ":");
+ if (!l)
+ return -ENOMEM;
+
+ r = path_strv_make_absolute_cwd(l);
+ if (r < 0) {
+ strv_free(l);
+ return r;
+ }
+
+ *ret = l;
+ return r;
+}
+
+char *path_make_absolute(const char *p, const char *prefix) {
+ assert(p);
+
+ /* Makes every item in the list an absolute path by prepending
+ * the prefix, if specified and necessary */
+
+ if (path_is_absolute(p) || isempty(prefix))
+ return strdup(p);
+
+ return path_join(prefix, p);
+}
+
+int safe_getcwd(char **ret) {
+ char *cwd;
+
+ cwd = get_current_dir_name();
+ if (!cwd)
+ return negative_errno();
+
+ /* Let's make sure the directory is really absolute, to protect us from the logic behind
+ * CVE-2018-1000001 */
+ if (cwd[0] != '/') {
+ free(cwd);
+ return -ENOMEDIUM;
+ }
+
+ *ret = cwd;
+ return 0;
+}
+
+int path_make_absolute_cwd(const char *p, char **ret) {
+ char *c;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ /* Similar to path_make_absolute(), but prefixes with the
+ * current working directory. */
+
+ if (path_is_absolute(p))
+ c = strdup(p);
+ else {
+ _cleanup_free_ char *cwd = NULL;
+
+ r = safe_getcwd(&cwd);
+ if (r < 0)
+ return r;
+
+ c = path_join(cwd, p);
+ }
+ if (!c)
+ return -ENOMEM;
+
+ *ret = c;
+ return 0;
+}
+
+int path_make_relative(const char *from_dir, const char *to_path, char **_r) {
+ char *f, *t, *r, *p;
+ unsigned n_parents = 0;
+
+ assert(from_dir);
+ assert(to_path);
+ assert(_r);
+
+ /* Strips the common part, and adds ".." elements as necessary. */
+
+ if (!path_is_absolute(from_dir) || !path_is_absolute(to_path))
+ return -EINVAL;
+
+ f = strdupa(from_dir);
+ t = strdupa(to_path);
+
+ path_simplify(f, true);
+ path_simplify(t, true);
+
+ /* Skip the common part. */
+ for (;;) {
+ size_t a, b;
+
+ f += *f == '/';
+ t += *t == '/';
+
+ if (!*f) {
+ if (!*t)
+ /* from_dir equals to_path. */
+ r = strdup(".");
+ else
+ /* from_dir is a parent directory of to_path. */
+ r = strdup(t);
+ if (!r)
+ return -ENOMEM;
+
+ *_r = r;
+ return 0;
+ }
+
+ if (!*t)
+ break;
+
+ a = strcspn(f, "/");
+ b = strcspn(t, "/");
+
+ if (a != b || memcmp(f, t, a) != 0)
+ break;
+
+ f += a;
+ t += b;
+ }
+
+ /* If we're here, then "from_dir" has one or more elements that need to
+ * be replaced with "..". */
+
+ /* Count the number of necessary ".." elements. */
+ for (; *f;) {
+ size_t w;
+
+ w = strcspn(f, "/");
+
+ /* If this includes ".." we can't do a simple series of "..", refuse */
+ if (w == 2 && f[0] == '.' && f[1] == '.')
+ return -EINVAL;
+
+ /* Count number of elements */
+ n_parents++;
+
+ f += w;
+ f += *f == '/';
+ }
+
+ r = new(char, n_parents * 3 + strlen(t) + 1);
+ if (!r)
+ return -ENOMEM;
+
+ for (p = r; n_parents > 0; n_parents--)
+ p = mempcpy(p, "../", 3);
+
+ if (*t)
+ strcpy(p, t);
+ else
+ /* Remove trailing slash */
+ *(--p) = 0;
+
+ *_r = r;
+ return 0;
+}
+
+char* path_startswith_strv(const char *p, char **set) {
+ char **s, *t;
+
+ STRV_FOREACH(s, set) {
+ t = path_startswith(p, *s);
+ if (t)
+ return t;
+ }
+
+ return NULL;
+}
+
+int path_strv_make_absolute_cwd(char **l) {
+ char **s;
+ int r;
+
+ /* Goes through every item in the string list and makes it
+ * absolute. This works in place and won't rollback any
+ * changes on failure. */
+
+ STRV_FOREACH(s, l) {
+ char *t;
+
+ r = path_make_absolute_cwd(*s, &t);
+ if (r < 0)
+ return r;
+
+ path_simplify(t, false);
+ free_and_replace(*s, t);
+ }
+
+ return 0;
+}
+
+char **path_strv_resolve(char **l, const char *root) {
+ char **s;
+ unsigned k = 0;
+ bool enomem = false;
+ int r;
+
+ if (strv_isempty(l))
+ return l;
+
+ /* Goes through every item in the string list and canonicalize
+ * the path. This works in place and won't rollback any
+ * changes on failure. */
+
+ STRV_FOREACH(s, l) {
+ _cleanup_free_ char *orig = NULL;
+ char *t, *u;
+
+ if (!path_is_absolute(*s)) {
+ free(*s);
+ continue;
+ }
+
+ if (root) {
+ orig = *s;
+ t = path_join(root, orig);
+ if (!t) {
+ enomem = true;
+ continue;
+ }
+ } else
+ t = *s;
+
+ r = chase_symlinks(t, root, 0, &u, NULL);
+ if (r == -ENOENT) {
+ if (root) {
+ u = TAKE_PTR(orig);
+ free(t);
+ } else
+ u = t;
+ } else if (r < 0) {
+ free(t);
+
+ if (r == -ENOMEM)
+ enomem = true;
+
+ continue;
+ } else if (root) {
+ char *x;
+
+ free(t);
+ x = path_startswith(u, root);
+ if (x) {
+ /* restore the slash if it was lost */
+ if (!startswith(x, "/"))
+ *(--x) = '/';
+
+ t = strdup(x);
+ free(u);
+ if (!t) {
+ enomem = true;
+ continue;
+ }
+ u = t;
+ } else {
+ /* canonicalized path goes outside of
+ * prefix, keep the original path instead */
+ free_and_replace(u, orig);
+ }
+ } else
+ free(t);
+
+ l[k++] = u;
+ }
+
+ l[k] = NULL;
+
+ if (enomem)
+ return NULL;
+
+ return l;
+}
+
+char **path_strv_resolve_uniq(char **l, const char *root) {
+
+ if (strv_isempty(l))
+ return l;
+
+ if (!path_strv_resolve(l, root))
+ return NULL;
+
+ return strv_uniq(l);
+}
+
+char *path_simplify(char *path, bool kill_dots) {
+ char *f, *t;
+ bool slash = false, ignore_slash = false, absolute;
+
+ assert(path);
+
+ /* Removes redundant inner and trailing slashes. Also removes unnecessary dots
+ * if kill_dots is true. Modifies the passed string in-place.
+ *
+ * ///foo//./bar/. becomes /foo/./bar/. (if kill_dots is false)
+ * ///foo//./bar/. becomes /foo/bar (if kill_dots is true)
+ * .//./foo//./bar/. becomes ././foo/./bar/. (if kill_dots is false)
+ * .//./foo//./bar/. becomes foo/bar (if kill_dots is true)
+ */
+
+ if (isempty(path))
+ return path;
+
+ absolute = path_is_absolute(path);
+
+ f = path;
+ if (kill_dots && *f == '.' && IN_SET(f[1], 0, '/')) {
+ ignore_slash = true;
+ f++;
+ }
+
+ for (t = path; *f; f++) {
+
+ if (*f == '/') {
+ slash = true;
+ continue;
+ }
+
+ if (slash) {
+ if (kill_dots && *f == '.' && IN_SET(f[1], 0, '/'))
+ continue;
+
+ slash = false;
+ if (ignore_slash)
+ ignore_slash = false;
+ else
+ *(t++) = '/';
+ }
+
+ *(t++) = *f;
+ }
+
+ /* Special rule, if we stripped everything, we either need a "/" (for the root directory)
+ * or "." for the current directory */
+ if (t == path) {
+ if (absolute)
+ *(t++) = '/';
+ else
+ *(t++) = '.';
+ }
+
+ *t = 0;
+ return path;
+}
+
+int path_simplify_and_warn(
+ char *path,
+ unsigned flag,
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *lvalue) {
+
+ bool fatal = flag & PATH_CHECK_FATAL;
+
+ assert(!FLAGS_SET(flag, PATH_CHECK_ABSOLUTE | PATH_CHECK_RELATIVE));
+
+ if (!utf8_is_valid(path))
+ return log_syntax_invalid_utf8(unit, LOG_ERR, filename, line, path);
+
+ if (flag & (PATH_CHECK_ABSOLUTE | PATH_CHECK_RELATIVE)) {
+ bool absolute;
+
+ absolute = path_is_absolute(path);
+
+ if (!absolute && (flag & PATH_CHECK_ABSOLUTE))
+ return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EINVAL),
+ "%s= path is not absolute%s: %s",
+ lvalue, fatal ? "" : ", ignoring", path);
+
+ if (absolute && (flag & PATH_CHECK_RELATIVE))
+ return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EINVAL),
+ "%s= path is absolute%s: %s",
+ lvalue, fatal ? "" : ", ignoring", path);
+ }
+
+ path_simplify(path, true);
+
+ if (!path_is_valid(path))
+ return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EINVAL),
+ "%s= path has invalid length (%zu bytes)%s.",
+ lvalue, strlen(path), fatal ? "" : ", ignoring");
+
+ if (!path_is_normalized(path))
+ return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EINVAL),
+ "%s= path is not normalized%s: %s",
+ lvalue, fatal ? "" : ", ignoring", path);
+
+ return 0;
+}
+
+char* path_startswith(const char *path, const char *prefix) {
+ assert(path);
+ assert(prefix);
+
+ /* Returns a pointer to the start of the first component after the parts matched by
+ * the prefix, iff
+ * - both paths are absolute or both paths are relative,
+ * and
+ * - each component in prefix in turn matches a component in path at the same position.
+ * An empty string will be returned when the prefix and path are equivalent.
+ *
+ * Returns NULL otherwise.
+ */
+
+ if ((path[0] == '/') != (prefix[0] == '/'))
+ return NULL;
+
+ for (;;) {
+ size_t a, b;
+
+ path += strspn(path, "/");
+ prefix += strspn(prefix, "/");
+
+ if (*prefix == 0)
+ return (char*) path;
+
+ if (*path == 0)
+ return NULL;
+
+ a = strcspn(path, "/");
+ b = strcspn(prefix, "/");
+
+ if (a != b)
+ return NULL;
+
+ if (memcmp(path, prefix, a) != 0)
+ return NULL;
+
+ path += a;
+ prefix += b;
+ }
+}
+
+int path_compare(const char *a, const char *b) {
+ int d;
+
+ assert(a);
+ assert(b);
+
+ /* A relative path and an absolute path must not compare as equal.
+ * Which one is sorted before the other does not really matter.
+ * Here a relative path is ordered before an absolute path. */
+ d = (a[0] == '/') - (b[0] == '/');
+ if (d != 0)
+ return d;
+
+ for (;;) {
+ size_t j, k;
+
+ a += strspn(a, "/");
+ b += strspn(b, "/");
+
+ if (*a == 0 && *b == 0)
+ return 0;
+
+ /* Order prefixes first: "/foo" before "/foo/bar" */
+ if (*a == 0)
+ return -1;
+ if (*b == 0)
+ return 1;
+
+ j = strcspn(a, "/");
+ k = strcspn(b, "/");
+
+ /* Alphabetical sort: "/foo/aaa" before "/foo/b" */
+ d = memcmp(a, b, MIN(j, k));
+ if (d != 0)
+ return (d > 0) - (d < 0); /* sign of d */
+
+ /* Sort "/foo/a" before "/foo/aaa" */
+ d = (j > k) - (j < k); /* sign of (j - k) */
+ if (d != 0)
+ return d;
+
+ a += j;
+ b += k;
+ }
+}
+
+bool path_equal(const char *a, const char *b) {
+ return path_compare(a, b) == 0;
+}
+
+bool path_equal_or_files_same(const char *a, const char *b, int flags) {
+ return path_equal(a, b) || files_same(a, b, flags) > 0;
+}
+
+char* path_join_internal(const char *first, ...) {
+ char *joined, *q;
+ const char *p;
+ va_list ap;
+ bool slash;
+ size_t sz;
+
+ /* Joins all listed strings until the sentinel and places a "/" between them unless the strings end/begin
+ * already with one so that it is unnecessary. Note that slashes which are already duplicate won't be
+ * removed. The string returned is hence always equal to or longer than the sum of the lengths of each
+ * individual string.
+ *
+ * Note: any listed empty string is simply skipped. This can be useful for concatenating strings of which some
+ * are optional.
+ *
+ * Examples:
+ *
+ * path_join("foo", "bar") → "foo/bar"
+ * path_join("foo/", "bar") → "foo/bar"
+ * path_join("", "foo", "", "bar", "") → "foo/bar" */
+
+ sz = strlen_ptr(first);
+ va_start(ap, first);
+ while ((p = va_arg(ap, char*)) != POINTER_MAX)
+ if (!isempty(p))
+ sz += 1 + strlen(p);
+ va_end(ap);
+
+ joined = new(char, sz + 1);
+ if (!joined)
+ return NULL;
+
+ if (!isempty(first)) {
+ q = stpcpy(joined, first);
+ slash = endswith(first, "/");
+ } else {
+ /* Skip empty items */
+ joined[0] = 0;
+ q = joined;
+ slash = true; /* no need to generate a slash anymore */
+ }
+
+ va_start(ap, first);
+ while ((p = va_arg(ap, char*)) != POINTER_MAX) {
+ if (isempty(p))
+ continue;
+
+ if (!slash && p[0] != '/')
+ *(q++) = '/';
+
+ q = stpcpy(q, p);
+ slash = endswith(p, "/");
+ }
+ va_end(ap);
+
+ return joined;
+}
+
+int find_executable_full(const char *name, bool use_path_envvar, char **ret) {
+ int last_error, r;
+ const char *p = NULL;
+
+ assert(name);
+
+ if (is_path(name)) {
+ if (access(name, X_OK) < 0)
+ return -errno;
+
+ if (ret) {
+ r = path_make_absolute_cwd(name, ret);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+ }
+
+ if (use_path_envvar)
+ /* Plain getenv, not secure_getenv, because we want to actually allow the user to pick the
+ * binary. */
+ p = getenv("PATH");
+ if (!p)
+ p = DEFAULT_PATH;
+
+ last_error = -ENOENT;
+
+ for (;;) {
+ _cleanup_free_ char *j = NULL, *element = NULL;
+
+ r = extract_first_word(&p, &element, ":", EXTRACT_RELAX|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (!path_is_absolute(element))
+ continue;
+
+ j = path_join(element, name);
+ if (!j)
+ return -ENOMEM;
+
+ if (access(j, X_OK) >= 0) {
+ _cleanup_free_ char *with_dash;
+
+ with_dash = strjoin(j, "/");
+ if (!with_dash)
+ return -ENOMEM;
+
+ /* If this passes, it must be a directory, and so should be skipped. */
+ if (access(with_dash, X_OK) >= 0)
+ continue;
+
+ /* We can't just `continue` inverting this case, since we need to update last_error. */
+ if (errno == ENOTDIR) {
+ /* Found it! */
+ if (ret)
+ *ret = path_simplify(TAKE_PTR(j), false);
+
+ return 0;
+ }
+ }
+
+ /* PATH entries which we don't have access to are ignored, as per tradition. */
+ if (errno != EACCES)
+ last_error = -errno;
+ }
+
+ return last_error;
+}
+
+bool paths_check_timestamp(const char* const* paths, usec_t *timestamp, bool update) {
+ bool changed = false;
+ const char* const* i;
+
+ assert(timestamp);
+
+ if (!paths)
+ return false;
+
+ STRV_FOREACH(i, paths) {
+ struct stat stats;
+ usec_t u;
+
+ if (stat(*i, &stats) < 0)
+ continue;
+
+ u = timespec_load(&stats.st_mtim);
+
+ /* first check */
+ if (*timestamp >= u)
+ continue;
+
+ log_debug("timestamp of '%s' changed", *i);
+
+ /* update timestamp */
+ if (update) {
+ *timestamp = u;
+ changed = true;
+ } else
+ return true;
+ }
+
+ return changed;
+}
+
+static int executable_is_good(const char *executable) {
+ _cleanup_free_ char *p = NULL, *d = NULL;
+ int r;
+
+ r = find_executable(executable, &p);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ /* An fsck that is linked to /bin/true is a non-existent fsck */
+
+ r = readlink_malloc(p, &d);
+ if (r == -EINVAL) /* not a symlink */
+ return 1;
+ if (r < 0)
+ return r;
+
+ return !PATH_IN_SET(d, "true"
+ "/bin/true",
+ "/usr/bin/true",
+ "/dev/null");
+}
+
+int fsck_exists(const char *fstype) {
+ const char *checker;
+
+ assert(fstype);
+
+ if (streq(fstype, "auto"))
+ return -EINVAL;
+
+ checker = strjoina("fsck.", fstype);
+ return executable_is_good(checker);
+}
+
+int parse_path_argument_and_warn(const char *path, bool suppress_root, char **arg) {
+ char *p;
+ int r;
+
+ /*
+ * This function is intended to be used in command line
+ * parsers, to handle paths that are passed in. It makes the
+ * path absolute, and reduces it to NULL if omitted or
+ * root (the latter optionally).
+ *
+ * NOTE THAT THIS WILL FREE THE PREVIOUS ARGUMENT POINTER ON
+ * SUCCESS! Hence, do not pass in uninitialized pointers.
+ */
+
+ if (isempty(path)) {
+ *arg = mfree(*arg);
+ return 0;
+ }
+
+ r = path_make_absolute_cwd(path, &p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse path \"%s\" and make it absolute: %m", path);
+
+ path_simplify(p, false);
+ if (suppress_root && empty_or_root(p))
+ p = mfree(p);
+
+ free_and_replace(*arg, p);
+
+ return 0;
+}
+
+char* dirname_malloc(const char *path) {
+ char *d, *dir, *dir2;
+
+ assert(path);
+
+ d = strdup(path);
+ if (!d)
+ return NULL;
+
+ dir = dirname(d);
+ assert(dir);
+
+ if (dir == d)
+ return d;
+
+ dir2 = strdup(dir);
+ free(d);
+
+ return dir2;
+}
+
+const char *last_path_component(const char *path) {
+
+ /* Finds the last component of the path, preserving the optional trailing slash that signifies a directory.
+ *
+ * a/b/c → c
+ * a/b/c/ → c/
+ * x → x
+ * x/ → x/
+ * /y → y
+ * /y/ → y/
+ * / → /
+ * // → /
+ * /foo/a → a
+ * /foo/a/ → a/
+ *
+ * Also, the empty string is mapped to itself.
+ *
+ * This is different than basename(), which returns "" when a trailing slash is present.
+ */
+
+ unsigned l, k;
+
+ if (!path)
+ return NULL;
+
+ l = k = strlen(path);
+ if (l == 0) /* special case — an empty string */
+ return path;
+
+ while (k > 0 && path[k-1] == '/')
+ k--;
+
+ if (k == 0) /* the root directory */
+ return path + l - 1;
+
+ while (k > 0 && path[k-1] != '/')
+ k--;
+
+ return path + k;
+}
+
+int path_extract_filename(const char *p, char **ret) {
+ _cleanup_free_ char *a = NULL;
+ const char *c, *e = NULL, *q;
+
+ /* Extracts the filename part (i.e. right-most component) from a path, i.e. string that passes
+ * filename_is_valid(). A wrapper around last_path_component(), but eats up trailing slashes. */
+
+ if (!p)
+ return -EINVAL;
+
+ c = last_path_component(p);
+
+ for (q = c; *q != 0; q++)
+ if (*q != '/')
+ e = q + 1;
+
+ if (!e) /* no valid character? */
+ return -EINVAL;
+
+ a = strndup(c, e - c);
+ if (!a)
+ return -ENOMEM;
+
+ if (!filename_is_valid(a))
+ return -EINVAL;
+
+ *ret = TAKE_PTR(a);
+
+ return 0;
+}
+
+bool filename_is_valid(const char *p) {
+ const char *e;
+
+ if (isempty(p))
+ return false;
+
+ if (dot_or_dot_dot(p))
+ return false;
+
+ e = strchrnul(p, '/');
+ if (*e != 0)
+ return false;
+
+ if (e - p > FILENAME_MAX) /* FILENAME_MAX is counted *without* the trailing NUL byte */
+ return false;
+
+ return true;
+}
+
+bool path_is_valid(const char *p) {
+
+ if (isempty(p))
+ return false;
+
+ if (strlen(p) >= PATH_MAX) /* PATH_MAX is counted *with* the trailing NUL byte */
+ return false;
+
+ return true;
+}
+
+bool path_is_normalized(const char *p) {
+
+ if (!path_is_valid(p))
+ return false;
+
+ if (dot_or_dot_dot(p))
+ return false;
+
+ if (startswith(p, "../") || endswith(p, "/..") || strstr(p, "/../"))
+ return false;
+
+ if (startswith(p, "./") || endswith(p, "/.") || strstr(p, "/./"))
+ return false;
+
+ if (strstr(p, "//"))
+ return false;
+
+ return true;
+}
+
+char *file_in_same_dir(const char *path, const char *filename) {
+ char *e, *ret;
+ size_t k;
+
+ assert(path);
+ assert(filename);
+
+ /* This removes the last component of path and appends
+ * filename, unless the latter is absolute anyway or the
+ * former isn't */
+
+ if (path_is_absolute(filename))
+ return strdup(filename);
+
+ e = strrchr(path, '/');
+ if (!e)
+ return strdup(filename);
+
+ k = strlen(filename);
+ ret = new(char, (e + 1 - path) + k + 1);
+ if (!ret)
+ return NULL;
+
+ memcpy(mempcpy(ret, path, e + 1 - path), filename, k + 1);
+ return ret;
+}
+
+bool hidden_or_backup_file(const char *filename) {
+ const char *p;
+
+ assert(filename);
+
+ if (filename[0] == '.' ||
+ streq(filename, "lost+found") ||
+ streq(filename, "aquota.user") ||
+ streq(filename, "aquota.group") ||
+ endswith(filename, "~"))
+ return true;
+
+ p = strrchr(filename, '.');
+ if (!p)
+ return false;
+
+ /* Please, let's not add more entries to the list below. If external projects think it's a good idea to come up
+ * with always new suffixes and that everybody else should just adjust to that, then it really should be on
+ * them. Hence, in future, let's not add any more entries. Instead, let's ask those packages to instead adopt
+ * one of the generic suffixes/prefixes for hidden files or backups, possibly augmented with an additional
+ * string. Specifically: there's now:
+ *
+ * The generic suffixes "~" and ".bak" for backup files
+ * The generic prefix "." for hidden files
+ *
+ * Thus, if a new package manager "foopkg" wants its own set of ".foopkg-new", ".foopkg-old", ".foopkg-dist"
+ * or so registered, let's refuse that and ask them to use ".foopkg.new", ".foopkg.old" or ".foopkg~" instead.
+ */
+
+ return STR_IN_SET(p + 1,
+ "rpmnew",
+ "rpmsave",
+ "rpmorig",
+ "dpkg-old",
+ "dpkg-new",
+ "dpkg-tmp",
+ "dpkg-dist",
+ "dpkg-bak",
+ "dpkg-backup",
+ "dpkg-remove",
+ "ucf-new",
+ "ucf-old",
+ "ucf-dist",
+ "swp",
+ "bak",
+ "old",
+ "new");
+}
+
+bool is_device_path(const char *path) {
+
+ /* Returns true on paths that likely refer to a device, either by path in sysfs or to something in /dev */
+
+ return PATH_STARTSWITH_SET(path, "/dev/", "/sys/");
+}
+
+bool valid_device_node_path(const char *path) {
+
+ /* Some superficial checks whether the specified path is a valid device node path, all without looking at the
+ * actual device node. */
+
+ if (!PATH_STARTSWITH_SET(path, "/dev/", "/run/systemd/inaccessible/"))
+ return false;
+
+ if (endswith(path, "/")) /* can't be a device node if it ends in a slash */
+ return false;
+
+ return path_is_normalized(path);
+}
+
+bool valid_device_allow_pattern(const char *path) {
+ assert(path);
+
+ /* Like valid_device_node_path(), but also allows full-subsystem expressions, like DeviceAllow= and DeviceDeny=
+ * accept it */
+
+ if (STARTSWITH_SET(path, "block-", "char-"))
+ return true;
+
+ return valid_device_node_path(path);
+}
+
+int systemd_installation_has_version(const char *root, unsigned minimal_version) {
+ const char *pattern;
+ int r;
+
+ /* Try to guess if systemd installation is later than the specified version. This
+ * is hacky and likely to yield false negatives, particularly if the installation
+ * is non-standard. False positives should be relatively rare.
+ */
+
+ NULSTR_FOREACH(pattern,
+ /* /lib works for systems without usr-merge, and for systems with a sane
+ * usr-merge, where /lib is a symlink to /usr/lib. /usr/lib is necessary
+ * for Gentoo which does a merge without making /lib a symlink.
+ */
+ "lib/systemd/libsystemd-shared-*.so\0"
+ "lib64/systemd/libsystemd-shared-*.so\0"
+ "usr/lib/systemd/libsystemd-shared-*.so\0"
+ "usr/lib64/systemd/libsystemd-shared-*.so\0") {
+
+ _cleanup_strv_free_ char **names = NULL;
+ _cleanup_free_ char *path = NULL;
+ char *c, **name;
+
+ path = path_join(root, pattern);
+ if (!path)
+ return -ENOMEM;
+
+ r = glob_extend(&names, path, 0);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return r;
+
+ assert_se(c = endswith(path, "*.so"));
+ *c = '\0'; /* truncate the glob part */
+
+ STRV_FOREACH(name, names) {
+ /* This is most likely to run only once, hence let's not optimize anything. */
+ char *t, *t2;
+ unsigned version;
+
+ t = startswith(*name, path);
+ if (!t)
+ continue;
+
+ t2 = endswith(t, ".so");
+ if (!t2)
+ continue;
+
+ t2[0] = '\0'; /* truncate the suffix */
+
+ r = safe_atou(t, &version);
+ if (r < 0) {
+ log_debug_errno(r, "Found libsystemd shared at \"%s.so\", but failed to parse version: %m", *name);
+ continue;
+ }
+
+ log_debug("Found libsystemd shared at \"%s.so\", version %u (%s).",
+ *name, version,
+ version >= minimal_version ? "OK" : "too old");
+ if (version >= minimal_version)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool dot_or_dot_dot(const char *path) {
+ if (!path)
+ return false;
+ if (path[0] != '.')
+ return false;
+ if (path[1] == 0)
+ return true;
+ if (path[1] != '.')
+ return false;
+
+ return path[2] == 0;
+}
+
+bool empty_or_root(const char *root) {
+
+ /* For operations relative to some root directory, returns true if the specified root directory is redundant,
+ * i.e. either / or NULL or the empty string or any equivalent. */
+
+ if (!root)
+ return true;
+
+ return root[strspn(root, "/")] == 0;
+}
+
+bool path_strv_contains(char **l, const char *path) {
+ char **i;
+
+ STRV_FOREACH(i, l)
+ if (path_equal(*i, path))
+ return true;
+
+ return false;
+}
+
+bool prefixed_path_strv_contains(char **l, const char *path) {
+ char **i, *j;
+
+ STRV_FOREACH(i, l) {
+ j = *i;
+ if (*j == '-')
+ j++;
+ if (*j == '+')
+ j++;
+ if (path_equal(j, path))
+ return true;
+ }
+
+ return false;
+}
+
+bool credential_name_valid(const char *s) {
+ /* We want that credential names are both valid in filenames (since that's our primary way to pass
+ * them around) and as fdnames (which is how we might want to pass them around eventually) */
+ return filename_is_valid(s) && fdname_is_valid(s);
+}
diff --git a/src/basic/path-util.h b/src/basic/path-util.h
new file mode 100644
index 0000000..d613709
--- /dev/null
+++ b/src/basic/path-util.h
@@ -0,0 +1,188 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <alloca.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+#define PATH_SPLIT_SBIN_BIN(x) x "sbin:" x "bin"
+#define PATH_SPLIT_SBIN_BIN_NULSTR(x) x "sbin\0" x "bin\0"
+
+#define PATH_NORMAL_SBIN_BIN(x) x "bin"
+#define PATH_NORMAL_SBIN_BIN_NULSTR(x) x "bin\0"
+
+#if HAVE_SPLIT_BIN
+# define PATH_SBIN_BIN(x) PATH_SPLIT_SBIN_BIN(x)
+# define PATH_SBIN_BIN_NULSTR(x) PATH_SPLIT_SBIN_BIN_NULSTR(x)
+#else
+# define PATH_SBIN_BIN(x) PATH_NORMAL_SBIN_BIN(x)
+# define PATH_SBIN_BIN_NULSTR(x) PATH_NORMAL_SBIN_BIN_NULSTR(x)
+#endif
+
+#define DEFAULT_PATH_NORMAL PATH_SBIN_BIN("/usr/local/") ":" PATH_SBIN_BIN("/usr/")
+#define DEFAULT_PATH_NORMAL_NULSTR PATH_SBIN_BIN_NULSTR("/usr/local/") PATH_SBIN_BIN_NULSTR("/usr/")
+#define DEFAULT_PATH_SPLIT_USR DEFAULT_PATH_NORMAL ":" PATH_SBIN_BIN("/")
+#define DEFAULT_PATH_SPLIT_USR_NULSTR DEFAULT_PATH_NORMAL_NULSTR PATH_SBIN_BIN_NULSTR("/")
+#define DEFAULT_PATH_COMPAT PATH_SPLIT_SBIN_BIN("/usr/local/") ":" PATH_SPLIT_SBIN_BIN("/usr/") ":" PATH_SPLIT_SBIN_BIN("/")
+
+#if HAVE_SPLIT_USR
+# define DEFAULT_PATH DEFAULT_PATH_SPLIT_USR
+# define DEFAULT_PATH_NULSTR DEFAULT_PATH_SPLIT_USR_NULSTR
+#else
+# define DEFAULT_PATH DEFAULT_PATH_NORMAL
+# define DEFAULT_PATH_NULSTR DEFAULT_PATH_NORMAL_NULSTR
+#endif
+
+#ifndef DEFAULT_USER_PATH
+# define DEFAULT_USER_PATH DEFAULT_PATH
+#endif
+
+static inline bool is_path(const char *p) {
+ assert(p);
+ return strchr(p, '/');
+}
+
+static inline bool path_is_absolute(const char *p) {
+ assert(p);
+ return p[0] == '/';
+}
+
+int path_split_and_make_absolute(const char *p, char ***ret);
+char* path_make_absolute(const char *p, const char *prefix);
+int safe_getcwd(char **ret);
+int path_make_absolute_cwd(const char *p, char **ret);
+int path_make_relative(const char *from_dir, const char *to_path, char **_r);
+char* path_startswith(const char *path, const char *prefix) _pure_;
+int path_compare(const char *a, const char *b) _pure_;
+bool path_equal(const char *a, const char *b) _pure_;
+bool path_equal_or_files_same(const char *a, const char *b, int flags);
+char* path_join_internal(const char *first, ...);
+#define path_join(x, ...) path_join_internal(x, __VA_ARGS__, POINTER_MAX)
+
+char* path_simplify(char *path, bool kill_dots);
+
+enum {
+ PATH_CHECK_FATAL = 1 << 0, /* If not set, then error message is appended with 'ignoring'. */
+ PATH_CHECK_ABSOLUTE = 1 << 1,
+ PATH_CHECK_RELATIVE = 1 << 2,
+};
+
+int path_simplify_and_warn(char *path, unsigned flag, const char *unit, const char *filename, unsigned line, const char *lvalue);
+
+static inline bool path_equal_ptr(const char *a, const char *b) {
+ return !!a == !!b && (!a || path_equal(a, b));
+}
+
+/* Note: the search terminates on the first NULL item. */
+#define PATH_IN_SET(p, ...) path_strv_contains(STRV_MAKE(__VA_ARGS__), p)
+
+char* path_startswith_strv(const char *p, char **set);
+#define PATH_STARTSWITH_SET(p, ...) path_startswith_strv(p, STRV_MAKE(__VA_ARGS__))
+
+int path_strv_make_absolute_cwd(char **l);
+char** path_strv_resolve(char **l, const char *root);
+char** path_strv_resolve_uniq(char **l, const char *root);
+
+int find_executable_full(const char *name, bool use_path_envvar, char **ret);
+static inline int find_executable(const char *name, char **ret) {
+ return find_executable_full(name, true, ret);
+}
+
+bool paths_check_timestamp(const char* const* paths, usec_t *paths_ts_usec, bool update);
+
+int fsck_exists(const char *fstype);
+
+/* Iterates through the path prefixes of the specified path, going up
+ * the tree, to root. Also returns "" (and not "/"!) for the root
+ * directory. Excludes the specified directory itself */
+#define PATH_FOREACH_PREFIX(prefix, path) \
+ for (char *_slash = ({ \
+ path_simplify(strcpy(prefix, path), false); \
+ streq(prefix, "/") ? NULL : strrchr(prefix, '/'); \
+ }); \
+ _slash && ((*_slash = 0), true); \
+ _slash = strrchr((prefix), '/'))
+
+/* Same as PATH_FOREACH_PREFIX but also includes the specified path itself */
+#define PATH_FOREACH_PREFIX_MORE(prefix, path) \
+ for (char *_slash = ({ \
+ path_simplify(strcpy(prefix, path), false); \
+ if (streq(prefix, "/")) \
+ prefix[0] = 0; \
+ strrchr(prefix, 0); \
+ }); \
+ _slash && ((*_slash = 0), true); \
+ _slash = strrchr((prefix), '/'))
+
+/* Similar to path_join(), but only works for two components, and only the first one may be NULL and returns
+ * an alloca() buffer, or possibly a const pointer into the path parameter. */
+#define prefix_roota(root, path) \
+ ({ \
+ const char* _path = (path), *_root = (root), *_ret; \
+ char *_p, *_n; \
+ size_t _l; \
+ while (_path[0] == '/' && _path[1] == '/') \
+ _path ++; \
+ if (isempty(_root)) \
+ _ret = _path; \
+ else { \
+ _l = strlen(_root) + 1 + strlen(_path) + 1; \
+ _n = newa(char, _l); \
+ _p = stpcpy(_n, _root); \
+ while (_p > _n && _p[-1] == '/') \
+ _p--; \
+ if (_path[0] != '/') \
+ *(_p++) = '/'; \
+ strcpy(_p, _path); \
+ _ret = _n; \
+ } \
+ _ret; \
+ })
+
+int parse_path_argument_and_warn(const char *path, bool suppress_root, char **arg);
+
+char* dirname_malloc(const char *path);
+const char *last_path_component(const char *path);
+int path_extract_filename(const char *p, char **ret);
+
+bool filename_is_valid(const char *p) _pure_;
+bool path_is_valid(const char *p) _pure_;
+bool path_is_normalized(const char *p) _pure_;
+
+char *file_in_same_dir(const char *path, const char *filename);
+
+bool hidden_or_backup_file(const char *filename) _pure_;
+
+bool is_device_path(const char *path);
+
+bool valid_device_node_path(const char *path);
+bool valid_device_allow_pattern(const char *path);
+
+int systemd_installation_has_version(const char *root, unsigned minimal_version);
+
+bool dot_or_dot_dot(const char *path);
+
+static inline const char *skip_dev_prefix(const char *p) {
+ const char *e;
+
+ /* Drop any /dev prefix if there is any */
+
+ e = path_startswith(p, "/dev/");
+
+ return e ?: p;
+}
+
+bool empty_or_root(const char *root);
+static inline const char *empty_to_root(const char *path) {
+ return isempty(path) ? "/" : path;
+}
+
+bool path_strv_contains(char **l, const char *path);
+bool prefixed_path_strv_contains(char **l, const char *path);
+
+bool credential_name_valid(const char *s);
diff --git a/src/basic/prioq.c b/src/basic/prioq.c
new file mode 100644
index 0000000..559e5d1
--- /dev/null
+++ b/src/basic/prioq.c
@@ -0,0 +1,300 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/*
+ * Priority Queue
+ * The prioq object implements a priority queue. That is, it orders objects by
+ * their priority and allows O(1) access to the object with the highest
+ * priority. Insertion and removal are Θ(log n). Optionally, the caller can
+ * provide a pointer to an index which will be kept up-to-date by the prioq.
+ *
+ * The underlying algorithm used in this implementation is a Heap.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "hashmap.h"
+#include "prioq.h"
+
+struct prioq_item {
+ void *data;
+ unsigned *idx;
+};
+
+struct Prioq {
+ compare_func_t compare_func;
+ unsigned n_items, n_allocated;
+
+ struct prioq_item *items;
+};
+
+Prioq *prioq_new(compare_func_t compare_func) {
+ Prioq *q;
+
+ q = new(Prioq, 1);
+ if (!q)
+ return q;
+
+ *q = (Prioq) {
+ .compare_func = compare_func,
+ };
+
+ return q;
+}
+
+Prioq* prioq_free(Prioq *q) {
+ if (!q)
+ return NULL;
+
+ free(q->items);
+ return mfree(q);
+}
+
+int prioq_ensure_allocated(Prioq **q, compare_func_t compare_func) {
+ assert(q);
+
+ if (*q)
+ return 0;
+
+ *q = prioq_new(compare_func);
+ if (!*q)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void swap(Prioq *q, unsigned j, unsigned k) {
+ assert(q);
+ assert(j < q->n_items);
+ assert(k < q->n_items);
+
+ assert(!q->items[j].idx || *(q->items[j].idx) == j);
+ assert(!q->items[k].idx || *(q->items[k].idx) == k);
+
+ SWAP_TWO(q->items[j].data, q->items[k].data);
+ SWAP_TWO(q->items[j].idx, q->items[k].idx);
+
+ if (q->items[j].idx)
+ *q->items[j].idx = j;
+
+ if (q->items[k].idx)
+ *q->items[k].idx = k;
+}
+
+static unsigned shuffle_up(Prioq *q, unsigned idx) {
+ assert(q);
+ assert(idx < q->n_items);
+
+ while (idx > 0) {
+ unsigned k;
+
+ k = (idx-1)/2;
+
+ if (q->compare_func(q->items[k].data, q->items[idx].data) <= 0)
+ break;
+
+ swap(q, idx, k);
+ idx = k;
+ }
+
+ return idx;
+}
+
+static unsigned shuffle_down(Prioq *q, unsigned idx) {
+ assert(q);
+
+ for (;;) {
+ unsigned j, k, s;
+
+ k = (idx+1)*2; /* right child */
+ j = k-1; /* left child */
+
+ if (j >= q->n_items)
+ break;
+
+ if (q->compare_func(q->items[j].data, q->items[idx].data) < 0)
+
+ /* So our left child is smaller than we are, let's
+ * remember this fact */
+ s = j;
+ else
+ s = idx;
+
+ if (k < q->n_items &&
+ q->compare_func(q->items[k].data, q->items[s].data) < 0)
+
+ /* So our right child is smaller than we are, let's
+ * remember this fact */
+ s = k;
+
+ /* s now points to the smallest of the three items */
+
+ if (s == idx)
+ /* No swap necessary, we're done */
+ break;
+
+ swap(q, idx, s);
+ idx = s;
+ }
+
+ return idx;
+}
+
+int prioq_put(Prioq *q, void *data, unsigned *idx) {
+ struct prioq_item *i;
+ unsigned k;
+
+ assert(q);
+
+ if (q->n_items >= q->n_allocated) {
+ unsigned n;
+ struct prioq_item *j;
+
+ n = MAX((q->n_items+1) * 2, 16u);
+ j = reallocarray(q->items, n, sizeof(struct prioq_item));
+ if (!j)
+ return -ENOMEM;
+
+ q->items = j;
+ q->n_allocated = n;
+ }
+
+ k = q->n_items++;
+ i = q->items + k;
+ i->data = data;
+ i->idx = idx;
+
+ if (idx)
+ *idx = k;
+
+ shuffle_up(q, k);
+
+ return 0;
+}
+
+static void remove_item(Prioq *q, struct prioq_item *i) {
+ struct prioq_item *l;
+
+ assert(q);
+ assert(i);
+
+ l = q->items + q->n_items - 1;
+
+ if (i == l)
+ /* Last entry, let's just remove it */
+ q->n_items--;
+ else {
+ unsigned k;
+
+ /* Not last entry, let's replace the last entry with
+ * this one, and reshuffle */
+
+ k = i - q->items;
+
+ i->data = l->data;
+ i->idx = l->idx;
+ if (i->idx)
+ *i->idx = k;
+ q->n_items--;
+
+ k = shuffle_down(q, k);
+ shuffle_up(q, k);
+ }
+}
+
+_pure_ static struct prioq_item* find_item(Prioq *q, void *data, unsigned *idx) {
+ struct prioq_item *i;
+
+ assert(q);
+
+ if (q->n_items <= 0)
+ return NULL;
+
+ if (idx) {
+ if (*idx == PRIOQ_IDX_NULL ||
+ *idx >= q->n_items)
+ return NULL;
+
+ i = q->items + *idx;
+ if (i->data != data)
+ return NULL;
+
+ return i;
+ } else {
+ for (i = q->items; i < q->items + q->n_items; i++)
+ if (i->data == data)
+ return i;
+ return NULL;
+ }
+}
+
+int prioq_remove(Prioq *q, void *data, unsigned *idx) {
+ struct prioq_item *i;
+
+ if (!q)
+ return 0;
+
+ i = find_item(q, data, idx);
+ if (!i)
+ return 0;
+
+ remove_item(q, i);
+ return 1;
+}
+
+int prioq_reshuffle(Prioq *q, void *data, unsigned *idx) {
+ struct prioq_item *i;
+ unsigned k;
+
+ assert(q);
+
+ i = find_item(q, data, idx);
+ if (!i)
+ return 0;
+
+ k = i - q->items;
+ k = shuffle_down(q, k);
+ shuffle_up(q, k);
+ return 1;
+}
+
+void *prioq_peek_by_index(Prioq *q, unsigned idx) {
+ if (!q)
+ return NULL;
+
+ if (idx >= q->n_items)
+ return NULL;
+
+ return q->items[idx].data;
+}
+
+void *prioq_pop(Prioq *q) {
+ void *data;
+
+ if (!q)
+ return NULL;
+
+ if (q->n_items <= 0)
+ return NULL;
+
+ data = q->items[0].data;
+ remove_item(q, q->items);
+ return data;
+}
+
+unsigned prioq_size(Prioq *q) {
+
+ if (!q)
+ return 0;
+
+ return q->n_items;
+}
+
+bool prioq_isempty(Prioq *q) {
+
+ if (!q)
+ return true;
+
+ return q->n_items <= 0;
+}
diff --git a/src/basic/prioq.h b/src/basic/prioq.h
new file mode 100644
index 0000000..951576c
--- /dev/null
+++ b/src/basic/prioq.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "macro.h"
+
+typedef struct Prioq Prioq;
+
+#define PRIOQ_IDX_NULL ((unsigned) -1)
+
+Prioq *prioq_new(compare_func_t compare);
+Prioq *prioq_free(Prioq *q);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Prioq*, prioq_free);
+int prioq_ensure_allocated(Prioq **q, compare_func_t compare_func);
+
+int prioq_put(Prioq *q, void *data, unsigned *idx);
+int prioq_remove(Prioq *q, void *data, unsigned *idx);
+int prioq_reshuffle(Prioq *q, void *data, unsigned *idx);
+
+void *prioq_peek_by_index(Prioq *q, unsigned idx) _pure_;
+static inline void *prioq_peek(Prioq *q) {
+ return prioq_peek_by_index(q, 0);
+}
+void *prioq_pop(Prioq *q);
+
+#define PRIOQ_FOREACH_ITEM(q, p) \
+ for (unsigned _i = 0; (p = prioq_peek_by_index(q, _i)); _i++)
+
+unsigned prioq_size(Prioq *q) _pure_;
+bool prioq_isempty(Prioq *q) _pure_;
diff --git a/src/basic/proc-cmdline.c b/src/basic/proc-cmdline.c
new file mode 100644
index 0000000..0b6fb13
--- /dev/null
+++ b/src/basic/proc-cmdline.c
@@ -0,0 +1,376 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "alloc-util.h"
+#include "efivars.h"
+#include "extract-word.h"
+#include "fileio.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "special.h"
+#include "string-util.h"
+#include "util.h"
+#include "virt.h"
+
+int proc_cmdline(char **ret) {
+ const char *e;
+ assert(ret);
+
+ /* For testing purposes it is sometimes useful to be able to override what we consider /proc/cmdline to be */
+ e = secure_getenv("SYSTEMD_PROC_CMDLINE");
+ if (e) {
+ char *m;
+
+ m = strdup(e);
+ if (!m)
+ return -ENOMEM;
+
+ *ret = m;
+ return 0;
+ }
+
+ if (detect_container() > 0)
+ return get_process_cmdline(1, SIZE_MAX, 0, ret);
+ else
+ return read_one_line_file("/proc/cmdline", ret);
+}
+
+static int proc_cmdline_extract_first(const char **p, char **ret_word, ProcCmdlineFlags flags) {
+ const char *q = *p;
+ int r;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ const char *c;
+
+ r = extract_first_word(&q, &word, NULL, EXTRACT_UNQUOTE|EXTRACT_RELAX);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ /* Filter out arguments that are intended only for the initrd */
+ c = startswith(word, "rd.");
+ if (c) {
+ if (!in_initrd())
+ continue;
+
+ if (FLAGS_SET(flags, PROC_CMDLINE_STRIP_RD_PREFIX)) {
+ r = free_and_strdup(&word, c);
+ if (r < 0)
+ return r;
+ }
+
+ } else if (FLAGS_SET(flags, PROC_CMDLINE_RD_STRICT) && in_initrd())
+ continue; /* And optionally filter out arguments that are intended only for the host */
+
+ *p = q;
+ *ret_word = TAKE_PTR(word);
+ return 1;
+ }
+
+ *p = q;
+ *ret_word = NULL;
+ return 0;
+}
+
+int proc_cmdline_parse_given(const char *line, proc_cmdline_parse_t parse_item, void *data, ProcCmdlineFlags flags) {
+ const char *p;
+ int r;
+
+ assert(parse_item);
+
+ /* The PROC_CMDLINE_VALUE_OPTIONAL flag doesn't really make sense for proc_cmdline_parse(), let's make this
+ * clear. */
+ assert(!FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL));
+
+ p = line;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ char *value;
+
+ r = proc_cmdline_extract_first(&p, &word, flags);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ value = strchr(word, '=');
+ if (value)
+ *(value++) = 0;
+
+ r = parse_item(word, value, data);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int proc_cmdline_parse(proc_cmdline_parse_t parse_item, void *data, ProcCmdlineFlags flags) {
+ _cleanup_free_ char *line = NULL;
+ int r;
+
+ assert(parse_item);
+
+ /* We parse the EFI variable first, because later settings have higher priority. */
+
+ if (!FLAGS_SET(flags, PROC_CMDLINE_IGNORE_EFI_OPTIONS)) {
+ r = systemd_efi_options_variable(&line);
+ if (r < 0) {
+ if (r != -ENODATA)
+ log_debug_errno(r, "Failed to get SystemdOptions EFI variable, ignoring: %m");
+ } else {
+ r = proc_cmdline_parse_given(line, parse_item, data, flags);
+ if (r < 0)
+ return r;
+
+ line = mfree(line);
+ }
+ }
+
+ r = proc_cmdline(&line);
+ if (r < 0)
+ return r;
+
+ return proc_cmdline_parse_given(line, parse_item, data, flags);
+}
+
+static bool relaxed_equal_char(char a, char b) {
+ return a == b ||
+ (a == '_' && b == '-') ||
+ (a == '-' && b == '_');
+}
+
+char *proc_cmdline_key_startswith(const char *s, const char *prefix) {
+ assert(s);
+ assert(prefix);
+
+ /* Much like startswith(), but considers "-" and "_" the same */
+
+ for (; *prefix != 0; s++, prefix++)
+ if (!relaxed_equal_char(*s, *prefix))
+ return NULL;
+
+ return (char*) s;
+}
+
+bool proc_cmdline_key_streq(const char *x, const char *y) {
+ assert(x);
+ assert(y);
+
+ /* Much like streq(), but considers "-" and "_" the same */
+
+ for (; *x != 0 || *y != 0; x++, y++)
+ if (!relaxed_equal_char(*x, *y))
+ return false;
+
+ return true;
+}
+
+static int cmdline_get_key(const char *line, const char *key, ProcCmdlineFlags flags, char **ret_value) {
+ _cleanup_free_ char *ret = NULL;
+ bool found = false;
+ const char *p;
+ int r;
+
+ assert(line);
+ assert(key);
+
+ p = line;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = proc_cmdline_extract_first(&p, &word, flags);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (ret_value) {
+ const char *e;
+
+ e = proc_cmdline_key_startswith(word, key);
+ if (!e)
+ continue;
+
+ if (*e == '=') {
+ r = free_and_strdup(&ret, e+1);
+ if (r < 0)
+ return r;
+
+ found = true;
+
+ } else if (*e == 0 && FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL))
+ found = true;
+
+ } else {
+ if (streq(word, key)) {
+ found = true;
+ break; /* we found what we were looking for */
+ }
+ }
+ }
+
+ if (ret_value)
+ *ret_value = TAKE_PTR(ret);
+
+ return found;
+}
+
+int proc_cmdline_get_key(const char *key, ProcCmdlineFlags flags, char **ret_value) {
+ _cleanup_free_ char *line = NULL, *v = NULL;
+ int r;
+
+ /* Looks for a specific key on the kernel command line and (with lower priority) the EFI variable.
+ * Supports three modes:
+ *
+ * a) The "ret_value" parameter is used. In this case a parameter beginning with the "key" string followed by
+ * "=" is searched for, and the value following it is returned in "ret_value".
+ *
+ * b) as above, but the PROC_CMDLINE_VALUE_OPTIONAL flag is set. In this case if the key is found as a separate
+ * word (i.e. not followed by "=" but instead by whitespace or the end of the command line), then this is
+ * also accepted, and "value" is returned as NULL.
+ *
+ * c) The "ret_value" parameter is NULL. In this case a search for the exact "key" parameter is performed.
+ *
+ * In all three cases, > 0 is returned if the key is found, 0 if not. */
+
+ if (isempty(key))
+ return -EINVAL;
+
+ if (FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL) && !ret_value)
+ return -EINVAL;
+
+ r = proc_cmdline(&line);
+ if (r < 0)
+ return r;
+
+ if (FLAGS_SET(flags, PROC_CMDLINE_IGNORE_EFI_OPTIONS)) /* Shortcut */
+ return cmdline_get_key(line, key, flags, ret_value);
+
+ r = cmdline_get_key(line, key, flags, ret_value ? &v : NULL);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (ret_value)
+ *ret_value = TAKE_PTR(v);
+
+ return r;
+ }
+
+ line = mfree(line);
+ r = systemd_efi_options_variable(&line);
+ if (r == -ENODATA) {
+ if (ret_value)
+ *ret_value = NULL;
+
+ return false; /* Not found */
+ }
+ if (r < 0)
+ return r;
+
+ return cmdline_get_key(line, key, flags, ret_value);
+}
+
+int proc_cmdline_get_bool(const char *key, bool *ret) {
+ _cleanup_free_ char *v = NULL;
+ int r;
+
+ assert(ret);
+
+ r = proc_cmdline_get_key(key, PROC_CMDLINE_VALUE_OPTIONAL, &v);
+ if (r < 0)
+ return r;
+ if (r == 0) { /* key not specified at all */
+ *ret = false;
+ return 0;
+ }
+
+ if (v) { /* key with parameter passed */
+ r = parse_boolean(v);
+ if (r < 0)
+ return r;
+ *ret = r;
+ } else /* key without parameter passed */
+ *ret = true;
+
+ return 1;
+}
+
+int proc_cmdline_get_key_many_internal(ProcCmdlineFlags flags, ...) {
+ _cleanup_free_ char *line = NULL;
+ bool processing_efi = true;
+ const char *p;
+ va_list ap;
+ int r, ret = 0;
+
+ /* The PROC_CMDLINE_VALUE_OPTIONAL flag doesn't really make sense for proc_cmdline_get_key_many(), let's make
+ * this clear. */
+ assert(!FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL));
+
+ /* This call may clobber arguments on failure! */
+
+ if (!FLAGS_SET(flags, PROC_CMDLINE_IGNORE_EFI_OPTIONS)) {
+ r = systemd_efi_options_variable(&line);
+ if (r < 0 && r != -ENODATA)
+ log_debug_errno(r, "Failed to get SystemdOptions EFI variable, ignoring: %m");
+ }
+
+ p = line;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = proc_cmdline_extract_first(&p, &word, flags);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* We finished with this command line. If this was the EFI one, then let's proceed with the regular one */
+ if (processing_efi) {
+ processing_efi = false;
+
+ line = mfree(line);
+ r = proc_cmdline(&line);
+ if (r < 0)
+ return r;
+
+ p = line;
+ continue;
+ }
+
+ break;
+ }
+
+ va_start(ap, flags);
+
+ for (;;) {
+ char **v;
+ const char *k, *e;
+
+ k = va_arg(ap, const char*);
+ if (!k)
+ break;
+
+ assert_se(v = va_arg(ap, char**));
+
+ e = proc_cmdline_key_startswith(word, k);
+ if (e && *e == '=') {
+ r = free_and_strdup(v, e + 1);
+ if (r < 0) {
+ va_end(ap);
+ return r;
+ }
+
+ ret++;
+ }
+ }
+
+ va_end(ap);
+ }
+
+ return ret;
+}
diff --git a/src/basic/proc-cmdline.h b/src/basic/proc-cmdline.h
new file mode 100644
index 0000000..45f3a27
--- /dev/null
+++ b/src/basic/proc-cmdline.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "log.h"
+
+typedef enum ProcCmdlineFlags {
+ PROC_CMDLINE_STRIP_RD_PREFIX = 1 << 0, /* automatically strip "rd." prefix if it is set (and we are in the initrd, since otherwise we'd not consider it anyway) */
+ PROC_CMDLINE_VALUE_OPTIONAL = 1 << 1, /* the value is optional (for boolean switches that can omit the value) */
+ PROC_CMDLINE_RD_STRICT = 1 << 2, /* ignore this in the initrd */
+ PROC_CMDLINE_IGNORE_EFI_OPTIONS = 1 << 3, /* don't check systemd's private EFI variable */
+} ProcCmdlineFlags;
+
+typedef int (*proc_cmdline_parse_t)(const char *key, const char *value, void *data);
+
+int proc_cmdline(char **ret);
+
+int proc_cmdline_parse_given(const char *line, proc_cmdline_parse_t parse_item, void *data, ProcCmdlineFlags flags);
+int proc_cmdline_parse(const proc_cmdline_parse_t parse, void *userdata, ProcCmdlineFlags flags);
+
+int proc_cmdline_get_key(const char *parameter, ProcCmdlineFlags flags, char **value);
+int proc_cmdline_get_bool(const char *key, bool *ret);
+
+int proc_cmdline_get_key_many_internal(ProcCmdlineFlags flags, ...);
+#define proc_cmdline_get_key_many(flags, ...) proc_cmdline_get_key_many_internal(flags, __VA_ARGS__, NULL)
+
+char *proc_cmdline_key_startswith(const char *s, const char *prefix);
+bool proc_cmdline_key_streq(const char *x, const char *y);
+
+/* A little helper call, to be used in proc_cmdline_parse_t callbacks */
+static inline bool proc_cmdline_value_missing(const char *key, const char *value) {
+ if (!value) {
+ log_warning("Missing argument for %s= kernel command line switch, ignoring.", key);
+ return true;
+ }
+
+ return false;
+}
diff --git a/src/basic/process-util.c b/src/basic/process-util.c
new file mode 100644
index 0000000..0851613
--- /dev/null
+++ b/src/basic/process-util.c
@@ -0,0 +1,1651 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/oom.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/personality.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <syslog.h>
+#include <unistd.h>
+#if HAVE_VALGRIND_VALGRIND_H
+#include <valgrind/valgrind.h>
+#endif
+
+#include "alloc-util.h"
+#include "architecture.h"
+#include "env-util.h"
+#include "errno-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "ioprio.h"
+#include "locale-util.h"
+#include "log.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "missing_sched.h"
+#include "missing_syscall.h"
+#include "namespace-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "raw-clone.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "user-util.h"
+#include "utf8.h"
+
+/* The kernel limits userspace processes to TASK_COMM_LEN (16 bytes), but allows higher values for its own
+ * workers, e.g. "kworker/u9:3-kcryptd/253:0". Let's pick a fixed smallish limit that will work for the kernel.
+ */
+#define COMM_MAX_LEN 128
+
+static int get_process_state(pid_t pid) {
+ _cleanup_free_ char *line = NULL;
+ const char *p;
+ char state;
+ int r;
+
+ assert(pid >= 0);
+
+ /* Shortcut: if we are enquired about our own state, we are obviously running */
+ if (pid == 0 || pid == getpid_cached())
+ return (unsigned char) 'R';
+
+ p = procfs_file_alloca(pid, "stat");
+
+ r = read_one_line_file(p, &line);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ p = strrchr(line, ')');
+ if (!p)
+ return -EIO;
+
+ p++;
+
+ if (sscanf(p, " %c", &state) != 1)
+ return -EIO;
+
+ return (unsigned char) state;
+}
+
+int get_process_comm(pid_t pid, char **ret) {
+ _cleanup_free_ char *escaped = NULL, *comm = NULL;
+ int r;
+
+ assert(ret);
+ assert(pid >= 0);
+
+ if (pid == 0 || pid == getpid_cached()) {
+ comm = new0(char, TASK_COMM_LEN + 1); /* Must fit in 16 byte according to prctl(2) */
+ if (!comm)
+ return -ENOMEM;
+
+ if (prctl(PR_GET_NAME, comm) < 0)
+ return -errno;
+ } else {
+ const char *p;
+
+ p = procfs_file_alloca(pid, "comm");
+
+ /* Note that process names of kernel threads can be much longer than TASK_COMM_LEN */
+ r = read_one_line_file(p, &comm);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+ }
+
+ escaped = new(char, COMM_MAX_LEN);
+ if (!escaped)
+ return -ENOMEM;
+
+ /* Escape unprintable characters, just in case, but don't grow the string beyond the underlying size */
+ cellescape(escaped, COMM_MAX_LEN, comm);
+
+ *ret = TAKE_PTR(escaped);
+ return 0;
+}
+
+int get_process_cmdline(pid_t pid, size_t max_columns, ProcessCmdlineFlags flags, char **line) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *t = NULL, *ans = NULL;
+ const char *p;
+ int r;
+ size_t k;
+
+ /* This is supposed to be a safety guard against runaway command lines. */
+ size_t max_length = sc_arg_max();
+
+ assert(line);
+ assert(pid >= 0);
+
+ /* Retrieves a process' command line. Replaces non-utf8 bytes by replacement character (�). If
+ * max_columns is != -1 will return a string of the specified console width at most, abbreviated with
+ * an ellipsis. If PROCESS_CMDLINE_COMM_FALLBACK is specified in flags and the process has no command
+ * line set (the case for kernel threads), or has a command line that resolves to the empty string
+ * will return the "comm" name of the process instead. This will use at most _SC_ARG_MAX bytes of
+ * input data.
+ *
+ * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
+ * comm_fallback is false). Returns 0 and sets *line otherwise. */
+
+ p = procfs_file_alloca(pid, "cmdline");
+ r = fopen_unlocked(p, "re", &f);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ /* We assume that each four-byte character uses one or two columns. If we ever check for combining
+ * characters, this assumption will need to be adjusted. */
+ if ((size_t) 4 * max_columns + 1 < max_columns)
+ max_length = MIN(max_length, (size_t) 4 * max_columns + 1);
+
+ t = new(char, max_length);
+ if (!t)
+ return -ENOMEM;
+
+ k = fread(t, 1, max_length, f);
+ if (k > 0) {
+ /* Arguments are separated by NULs. Let's replace those with spaces. */
+ for (size_t i = 0; i < k - 1; i++)
+ if (t[i] == '\0')
+ t[i] = ' ';
+
+ t[k] = '\0'; /* Normally, t[k] is already NUL, so this is just a guard in case of short read */
+ } else {
+ /* We only treat getting nothing as an error. We *could* also get an error after reading some
+ * data, but we ignore that case, as such an error is rather unlikely and we prefer to get
+ * some data rather than none. */
+ if (ferror(f))
+ return -errno;
+
+ if (!(flags & PROCESS_CMDLINE_COMM_FALLBACK))
+ return -ENOENT;
+
+ /* Kernel threads have no argv[] */
+ _cleanup_free_ char *t2 = NULL;
+
+ r = get_process_comm(pid, &t2);
+ if (r < 0)
+ return r;
+
+ mfree(t);
+ t = strjoin("[", t2, "]");
+ if (!t)
+ return -ENOMEM;
+ }
+
+ delete_trailing_chars(t, WHITESPACE);
+
+ bool eight_bit = (flags & PROCESS_CMDLINE_USE_LOCALE) && !is_locale_utf8();
+
+ ans = escape_non_printable_full(t, max_columns, eight_bit);
+ if (!ans)
+ return -ENOMEM;
+
+ (void) str_realloc(&ans);
+ *line = TAKE_PTR(ans);
+ return 0;
+}
+
+static int update_argv(const char name[], size_t l) {
+ static int can_do = -1;
+
+ if (can_do == 0)
+ return 0;
+ can_do = false; /* We'll set it to true only if the whole process works */
+
+ /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the
+ * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is
+ * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if
+ * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but
+ * mmap() is not. */
+ if (geteuid() != 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EPERM),
+ "Skipping PR_SET_MM, as we don't have privileges.");
+
+ static size_t mm_size = 0;
+ static char *mm = NULL;
+ int r;
+
+ if (mm_size < l+1) {
+ size_t nn_size;
+ char *nn;
+
+ nn_size = PAGE_ALIGN(l+1);
+ nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (nn == MAP_FAILED)
+ return log_debug_errno(errno, "mmap() failed: %m");
+
+ strncpy(nn, name, nn_size);
+
+ /* Now, let's tell the kernel about this new memory */
+ if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
+ if (ERRNO_IS_PRIVILEGE(errno))
+ return log_debug_errno(errno, "PR_SET_MM_ARG_START failed: %m");
+
+ /* HACK: prctl() API is kind of dumb on this point. The existing end address may already be
+ * below the desired start address, in which case the kernel may have kicked this back due
+ * to a range-check failure (see linux/kernel/sys.c:validate_prctl_map() to see this in
+ * action). The proper solution would be to have a prctl() API that could set both start+end
+ * simultaneously, or at least let us query the existing address to anticipate this condition
+ * and respond accordingly. For now, we can only guess at the cause of this failure and try
+ * a workaround--which will briefly expand the arg space to something potentially huge before
+ * resizing it to what we want. */
+ log_debug_errno(errno, "PR_SET_MM_ARG_START failed, attempting PR_SET_MM_ARG_END hack: %m");
+
+ if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0) {
+ r = log_debug_errno(errno, "PR_SET_MM_ARG_END hack failed, proceeding without: %m");
+ (void) munmap(nn, nn_size);
+ return r;
+ }
+
+ if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0)
+ return log_debug_errno(errno, "PR_SET_MM_ARG_START still failed, proceeding without: %m");
+ } else {
+ /* And update the end pointer to the new end, too. If this fails, we don't really know what
+ * to do, it's pretty unlikely that we can rollback, hence we'll just accept the failure,
+ * and continue. */
+ if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0)
+ log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
+ }
+
+ if (mm)
+ (void) munmap(mm, mm_size);
+
+ mm = nn;
+ mm_size = nn_size;
+ } else {
+ strncpy(mm, name, mm_size);
+
+ /* Update the end pointer, continuing regardless of any failure. */
+ if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) mm + l + 1, 0, 0) < 0)
+ log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
+ }
+
+ can_do = true;
+ return 0;
+}
+
+int rename_process(const char name[]) {
+ bool truncated = false;
+
+ /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's
+ * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in
+ * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded;
+ * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be
+ * truncated.
+ *
+ * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */
+
+ if (isempty(name))
+ return -EINVAL; /* let's not confuse users unnecessarily with an empty name */
+
+ if (!is_main_thread())
+ return -EPERM; /* Let's not allow setting the process name from other threads than the main one, as we
+ * cache things without locking, and we make assumptions that PR_SET_NAME sets the
+ * process name that isn't correct on any other threads */
+
+ size_t l = strlen(name);
+
+ /* First step, change the comm field. The main thread's comm is identical to the process comm. This means we
+ * can use PR_SET_NAME, which sets the thread name for the calling thread. */
+ if (prctl(PR_SET_NAME, name) < 0)
+ log_debug_errno(errno, "PR_SET_NAME failed: %m");
+ if (l >= TASK_COMM_LEN) /* Linux userspace process names can be 15 chars at max */
+ truncated = true;
+
+ /* Second step, change glibc's ID of the process name. */
+ if (program_invocation_name) {
+ size_t k;
+
+ k = strlen(program_invocation_name);
+ strncpy(program_invocation_name, name, k);
+ if (l > k)
+ truncated = true;
+ }
+
+ /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but
+ * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at
+ * the end. This is the best option for changing /proc/self/cmdline. */
+ (void) update_argv(name, l);
+
+ /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if
+ * it still looks here */
+ if (saved_argc > 0) {
+ if (saved_argv[0]) {
+ size_t k;
+
+ k = strlen(saved_argv[0]);
+ strncpy(saved_argv[0], name, k);
+ if (l > k)
+ truncated = true;
+ }
+
+ for (int i = 1; i < saved_argc; i++) {
+ if (!saved_argv[i])
+ break;
+
+ memzero(saved_argv[i], strlen(saved_argv[i]));
+ }
+ }
+
+ return !truncated;
+}
+
+int is_kernel_thread(pid_t pid) {
+ _cleanup_free_ char *line = NULL;
+ unsigned long long flags;
+ size_t l, i;
+ const char *p;
+ char *q;
+ int r;
+
+ if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
+ return 0;
+ if (!pid_is_valid(pid))
+ return -EINVAL;
+
+ p = procfs_file_alloca(pid, "stat");
+ r = read_one_line_file(p, &line);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ /* Skip past the comm field */
+ q = strrchr(line, ')');
+ if (!q)
+ return -EINVAL;
+ q++;
+
+ /* Skip 6 fields to reach the flags field */
+ for (i = 0; i < 6; i++) {
+ l = strspn(q, WHITESPACE);
+ if (l < 1)
+ return -EINVAL;
+ q += l;
+
+ l = strcspn(q, WHITESPACE);
+ if (l < 1)
+ return -EINVAL;
+ q += l;
+ }
+
+ /* Skip preceding whitespace */
+ l = strspn(q, WHITESPACE);
+ if (l < 1)
+ return -EINVAL;
+ q += l;
+
+ /* Truncate the rest */
+ l = strcspn(q, WHITESPACE);
+ if (l < 1)
+ return -EINVAL;
+ q[l] = 0;
+
+ r = safe_atollu(q, &flags);
+ if (r < 0)
+ return r;
+
+ return !!(flags & PF_KTHREAD);
+}
+
+int get_process_capeff(pid_t pid, char **capeff) {
+ const char *p;
+ int r;
+
+ assert(capeff);
+ assert(pid >= 0);
+
+ p = procfs_file_alloca(pid, "status");
+
+ r = get_proc_field(p, "CapEff", WHITESPACE, capeff);
+ if (r == -ENOENT)
+ return -ESRCH;
+
+ return r;
+}
+
+static int get_process_link_contents(const char *proc_file, char **name) {
+ int r;
+
+ assert(proc_file);
+ assert(name);
+
+ r = readlink_malloc(proc_file, name);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int get_process_exe(pid_t pid, char **name) {
+ const char *p;
+ char *d;
+ int r;
+
+ assert(pid >= 0);
+
+ p = procfs_file_alloca(pid, "exe");
+ r = get_process_link_contents(p, name);
+ if (r < 0)
+ return r;
+
+ d = endswith(*name, " (deleted)");
+ if (d)
+ *d = '\0';
+
+ return 0;
+}
+
+static int get_process_id(pid_t pid, const char *field, uid_t *uid) {
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *p;
+ int r;
+
+ assert(field);
+ assert(uid);
+
+ if (pid < 0)
+ return -EINVAL;
+
+ p = procfs_file_alloca(pid, "status");
+ r = fopen_unlocked(p, "re", &f);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ l = strstrip(line);
+
+ if (startswith(l, field)) {
+ l += strlen(field);
+ l += strspn(l, WHITESPACE);
+
+ l[strcspn(l, WHITESPACE)] = 0;
+
+ return parse_uid(l, uid);
+ }
+ }
+
+ return -EIO;
+}
+
+int get_process_uid(pid_t pid, uid_t *uid) {
+
+ if (pid == 0 || pid == getpid_cached()) {
+ *uid = getuid();
+ return 0;
+ }
+
+ return get_process_id(pid, "Uid:", uid);
+}
+
+int get_process_gid(pid_t pid, gid_t *gid) {
+
+ if (pid == 0 || pid == getpid_cached()) {
+ *gid = getgid();
+ return 0;
+ }
+
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+ return get_process_id(pid, "Gid:", gid);
+}
+
+int get_process_cwd(pid_t pid, char **cwd) {
+ const char *p;
+
+ assert(pid >= 0);
+
+ if (pid == 0 || pid == getpid_cached())
+ return safe_getcwd(cwd);
+
+ p = procfs_file_alloca(pid, "cwd");
+
+ return get_process_link_contents(p, cwd);
+}
+
+int get_process_root(pid_t pid, char **root) {
+ const char *p;
+
+ assert(pid >= 0);
+
+ p = procfs_file_alloca(pid, "root");
+
+ return get_process_link_contents(p, root);
+}
+
+#define ENVIRONMENT_BLOCK_MAX (5U*1024U*1024U)
+
+int get_process_environ(pid_t pid, char **env) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *outcome = NULL;
+ size_t allocated = 0, sz = 0;
+ const char *p;
+ int r;
+
+ assert(pid >= 0);
+ assert(env);
+
+ p = procfs_file_alloca(pid, "environ");
+
+ r = fopen_unlocked(p, "re", &f);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ char c;
+
+ if (sz >= ENVIRONMENT_BLOCK_MAX)
+ return -ENOBUFS;
+
+ if (!GREEDY_REALLOC(outcome, allocated, sz + 5))
+ return -ENOMEM;
+
+ r = safe_fgetc(f, &c);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (c == '\0')
+ outcome[sz++] = '\n';
+ else
+ sz += cescape_char(c, outcome + sz);
+ }
+
+ outcome[sz] = '\0';
+ *env = TAKE_PTR(outcome);
+
+ return 0;
+}
+
+int get_process_ppid(pid_t pid, pid_t *_ppid) {
+ int r;
+ _cleanup_free_ char *line = NULL;
+ long unsigned ppid;
+ const char *p;
+
+ assert(pid >= 0);
+ assert(_ppid);
+
+ if (pid == 0 || pid == getpid_cached()) {
+ *_ppid = getppid();
+ return 0;
+ }
+
+ p = procfs_file_alloca(pid, "stat");
+ r = read_one_line_file(p, &line);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ /* Let's skip the pid and comm fields. The latter is enclosed
+ * in () but does not escape any () in its value, so let's
+ * skip over it manually */
+
+ p = strrchr(line, ')');
+ if (!p)
+ return -EIO;
+
+ p++;
+
+ if (sscanf(p, " "
+ "%*c " /* state */
+ "%lu ", /* ppid */
+ &ppid) != 1)
+ return -EIO;
+
+ if ((long unsigned) (pid_t) ppid != ppid)
+ return -ERANGE;
+
+ *_ppid = (pid_t) ppid;
+
+ return 0;
+}
+
+int get_process_umask(pid_t pid, mode_t *umask) {
+ _cleanup_free_ char *m = NULL;
+ const char *p;
+ int r;
+
+ assert(umask);
+ assert(pid >= 0);
+
+ p = procfs_file_alloca(pid, "status");
+
+ r = get_proc_field(p, "Umask", WHITESPACE, &m);
+ if (r == -ENOENT)
+ return -ESRCH;
+
+ return parse_mode(m, umask);
+}
+
+int wait_for_terminate(pid_t pid, siginfo_t *status) {
+ siginfo_t dummy;
+
+ assert(pid >= 1);
+
+ if (!status)
+ status = &dummy;
+
+ for (;;) {
+ zero(*status);
+
+ if (waitid(P_PID, pid, status, WEXITED) < 0) {
+
+ if (errno == EINTR)
+ continue;
+
+ return negative_errno();
+ }
+
+ return 0;
+ }
+}
+
+/*
+ * Return values:
+ * < 0 : wait_for_terminate() failed to get the state of the
+ * process, the process was terminated by a signal, or
+ * failed for an unknown reason.
+ * >=0 : The process terminated normally, and its exit code is
+ * returned.
+ *
+ * That is, success is indicated by a return value of zero, and an
+ * error is indicated by a non-zero value.
+ *
+ * A warning is emitted if the process terminates abnormally,
+ * and also if it returns non-zero unless check_exit_code is true.
+ */
+int wait_for_terminate_and_check(const char *name, pid_t pid, WaitFlags flags) {
+ _cleanup_free_ char *buffer = NULL;
+ siginfo_t status;
+ int r, prio;
+
+ assert(pid > 1);
+
+ if (!name) {
+ r = get_process_comm(pid, &buffer);
+ if (r < 0)
+ log_debug_errno(r, "Failed to acquire process name of " PID_FMT ", ignoring: %m", pid);
+ else
+ name = buffer;
+ }
+
+ prio = flags & WAIT_LOG_ABNORMAL ? LOG_ERR : LOG_DEBUG;
+
+ r = wait_for_terminate(pid, &status);
+ if (r < 0)
+ return log_full_errno(prio, r, "Failed to wait for %s: %m", strna(name));
+
+ if (status.si_code == CLD_EXITED) {
+ if (status.si_status != EXIT_SUCCESS)
+ log_full(flags & WAIT_LOG_NON_ZERO_EXIT_STATUS ? LOG_ERR : LOG_DEBUG,
+ "%s failed with exit status %i.", strna(name), status.si_status);
+ else
+ log_debug("%s succeeded.", name);
+
+ return status.si_status;
+
+ } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) {
+
+ log_full(prio, "%s terminated by signal %s.", strna(name), signal_to_string(status.si_status));
+ return -EPROTO;
+ }
+
+ log_full(prio, "%s failed due to unknown reason.", strna(name));
+ return -EPROTO;
+}
+
+/*
+ * Return values:
+ *
+ * < 0 : wait_for_terminate_with_timeout() failed to get the state of the process, the process timed out, the process
+ * was terminated by a signal, or failed for an unknown reason.
+ *
+ * >=0 : The process terminated normally with no failures.
+ *
+ * Success is indicated by a return value of zero, a timeout is indicated by ETIMEDOUT, and all other child failure
+ * states are indicated by error is indicated by a non-zero value.
+ *
+ * This call assumes SIGCHLD has been blocked already, in particular before the child to wait for has been forked off
+ * to remain entirely race-free.
+ */
+int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
+ sigset_t mask;
+ int r;
+ usec_t until;
+
+ assert_se(sigemptyset(&mask) == 0);
+ assert_se(sigaddset(&mask, SIGCHLD) == 0);
+
+ /* Drop into a sigtimewait-based timeout. Waiting for the
+ * pid to exit. */
+ until = now(CLOCK_MONOTONIC) + timeout;
+ for (;;) {
+ usec_t n;
+ siginfo_t status = {};
+ struct timespec ts;
+
+ n = now(CLOCK_MONOTONIC);
+ if (n >= until)
+ break;
+
+ r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0;
+ /* Assuming we woke due to the child exiting. */
+ if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
+ if (status.si_pid == pid) {
+ /* This is the correct child.*/
+ if (status.si_code == CLD_EXITED)
+ return (status.si_status == 0) ? 0 : -EPROTO;
+ else
+ return -EPROTO;
+ }
+ }
+ /* Not the child, check for errors and proceed appropriately */
+ if (r < 0) {
+ switch (r) {
+ case -EAGAIN:
+ /* Timed out, child is likely hung. */
+ return -ETIMEDOUT;
+ case -EINTR:
+ /* Received a different signal and should retry */
+ continue;
+ default:
+ /* Return any unexpected errors */
+ return r;
+ }
+ }
+ }
+
+ return -EPROTO;
+}
+
+void sigkill_wait(pid_t pid) {
+ assert(pid > 1);
+
+ if (kill(pid, SIGKILL) >= 0)
+ (void) wait_for_terminate(pid, NULL);
+}
+
+void sigkill_waitp(pid_t *pid) {
+ PROTECT_ERRNO;
+
+ if (!pid)
+ return;
+ if (*pid <= 1)
+ return;
+
+ sigkill_wait(*pid);
+}
+
+void sigterm_wait(pid_t pid) {
+ assert(pid > 1);
+
+ if (kill_and_sigcont(pid, SIGTERM) >= 0)
+ (void) wait_for_terminate(pid, NULL);
+}
+
+int kill_and_sigcont(pid_t pid, int sig) {
+ int r;
+
+ r = kill(pid, sig) < 0 ? -errno : 0;
+
+ /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
+ * affected by a process being suspended anyway. */
+ if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL))
+ (void) kill(pid, SIGCONT);
+
+ return r;
+}
+
+int getenv_for_pid(pid_t pid, const char *field, char **ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char *value = NULL;
+ const char *path;
+ size_t l, sum = 0;
+ int r;
+
+ assert(pid >= 0);
+ assert(field);
+ assert(ret);
+
+ if (pid == 0 || pid == getpid_cached()) {
+ const char *e;
+
+ e = getenv(field);
+ if (!e) {
+ *ret = NULL;
+ return 0;
+ }
+
+ value = strdup(e);
+ if (!value)
+ return -ENOMEM;
+
+ *ret = value;
+ return 1;
+ }
+
+ if (!pid_is_valid(pid))
+ return -EINVAL;
+
+ path = procfs_file_alloca(pid, "environ");
+
+ r = fopen_unlocked(path, "re", &f);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ l = strlen(field);
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+
+ if (sum > ENVIRONMENT_BLOCK_MAX) /* Give up searching eventually */
+ return -ENOBUFS;
+
+ r = read_nul_string(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0) /* EOF */
+ break;
+
+ sum += r;
+
+ if (strneq(line, field, l) && line[l] == '=') {
+ value = strdup(line + l + 1);
+ if (!value)
+ return -ENOMEM;
+
+ *ret = value;
+ return 1;
+ }
+ }
+
+ *ret = NULL;
+ return 0;
+}
+
+int pid_is_my_child(pid_t pid) {
+ pid_t ppid;
+ int r;
+
+ if (pid <= 1)
+ return false;
+
+ r = get_process_ppid(pid, &ppid);
+ if (r < 0)
+ return r;
+
+ return ppid == getpid_cached();
+}
+
+bool pid_is_unwaited(pid_t pid) {
+ /* Checks whether a PID is still valid at all, including a zombie */
+
+ if (pid < 0)
+ return false;
+
+ if (pid <= 1) /* If we or PID 1 would be dead and have been waited for, this code would not be running */
+ return true;
+
+ if (pid == getpid_cached())
+ return true;
+
+ if (kill(pid, 0) >= 0)
+ return true;
+
+ return errno != ESRCH;
+}
+
+bool pid_is_alive(pid_t pid) {
+ int r;
+
+ /* Checks whether a PID is still valid and not a zombie */
+
+ if (pid < 0)
+ return false;
+
+ if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */
+ return true;
+
+ if (pid == getpid_cached())
+ return true;
+
+ r = get_process_state(pid);
+ if (IN_SET(r, -ESRCH, 'Z'))
+ return false;
+
+ return true;
+}
+
+int pid_from_same_root_fs(pid_t pid) {
+ const char *root;
+
+ if (pid < 0)
+ return false;
+
+ if (pid == 0 || pid == getpid_cached())
+ return true;
+
+ root = procfs_file_alloca(pid, "root");
+
+ return files_same(root, "/proc/1/root", 0);
+}
+
+bool is_main_thread(void) {
+ static thread_local int cached = 0;
+
+ if (_unlikely_(cached == 0))
+ cached = getpid_cached() == gettid() ? 1 : -1;
+
+ return cached > 0;
+}
+
+_noreturn_ void freeze(void) {
+
+ log_close();
+
+ /* Make sure nobody waits for us on a socket anymore */
+ (void) close_all_fds(NULL, 0);
+
+ sync();
+
+ /* Let's not freeze right away, but keep reaping zombies. */
+ for (;;) {
+ int r;
+ siginfo_t si = {};
+
+ r = waitid(P_ALL, 0, &si, WEXITED);
+ if (r < 0 && errno != EINTR)
+ break;
+ }
+
+ /* waitid() failed with an unexpected error, things are really borked. Freeze now! */
+ for (;;)
+ pause();
+}
+
+bool oom_score_adjust_is_valid(int oa) {
+ return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
+}
+
+unsigned long personality_from_string(const char *p) {
+ int architecture;
+
+ if (!p)
+ return PERSONALITY_INVALID;
+
+ /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
+ * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
+ * the same register size. */
+
+ architecture = architecture_from_string(p);
+ if (architecture < 0)
+ return PERSONALITY_INVALID;
+
+ if (architecture == native_architecture())
+ return PER_LINUX;
+#ifdef SECONDARY_ARCHITECTURE
+ if (architecture == SECONDARY_ARCHITECTURE)
+ return PER_LINUX32;
+#endif
+
+ return PERSONALITY_INVALID;
+}
+
+const char* personality_to_string(unsigned long p) {
+ int architecture = _ARCHITECTURE_INVALID;
+
+ if (p == PER_LINUX)
+ architecture = native_architecture();
+#ifdef SECONDARY_ARCHITECTURE
+ else if (p == PER_LINUX32)
+ architecture = SECONDARY_ARCHITECTURE;
+#endif
+
+ if (architecture < 0)
+ return NULL;
+
+ return architecture_to_string(architecture);
+}
+
+int safe_personality(unsigned long p) {
+ int ret;
+
+ /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
+ * and in others as negative return value containing an errno-like value. Let's work around this: this is a
+ * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
+ * the return value indicating the same issue, so that we are definitely on the safe side.
+ *
+ * See https://github.com/systemd/systemd/issues/6737 */
+
+ errno = 0;
+ ret = personality(p);
+ if (ret < 0) {
+ if (errno != 0)
+ return -errno;
+
+ errno = -ret;
+ }
+
+ return ret;
+}
+
+int opinionated_personality(unsigned long *ret) {
+ int current;
+
+ /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
+ * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
+ * two most relevant personalities: PER_LINUX and PER_LINUX32. */
+
+ current = safe_personality(PERSONALITY_INVALID);
+ if (current < 0)
+ return current;
+
+ if (((unsigned long) current & 0xffff) == PER_LINUX32)
+ *ret = PER_LINUX32;
+ else
+ *ret = PER_LINUX;
+
+ return 0;
+}
+
+void valgrind_summary_hack(void) {
+#if HAVE_VALGRIND_VALGRIND_H
+ if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
+ pid_t pid;
+ pid = raw_clone(SIGCHLD);
+ if (pid < 0)
+ log_emergency_errno(errno, "Failed to fork off valgrind helper: %m");
+ else if (pid == 0)
+ exit(EXIT_SUCCESS);
+ else {
+ log_info("Spawned valgrind helper as PID "PID_FMT".", pid);
+ (void) wait_for_terminate(pid, NULL);
+ }
+ }
+#endif
+}
+
+int pid_compare_func(const pid_t *a, const pid_t *b) {
+ /* Suitable for usage in qsort() */
+ return CMP(*a, *b);
+}
+
+int ioprio_parse_priority(const char *s, int *ret) {
+ int i, r;
+
+ assert(s);
+ assert(ret);
+
+ r = safe_atoi(s, &i);
+ if (r < 0)
+ return r;
+
+ if (!ioprio_priority_is_valid(i))
+ return -EINVAL;
+
+ *ret = i;
+ return 0;
+}
+
+/* The cached PID, possible values:
+ *
+ * == UNSET [0] → cache not initialized yet
+ * == BUSY [-1] → some thread is initializing it at the moment
+ * any other → the cached PID
+ */
+
+#define CACHED_PID_UNSET ((pid_t) 0)
+#define CACHED_PID_BUSY ((pid_t) -1)
+
+static pid_t cached_pid = CACHED_PID_UNSET;
+
+void reset_cached_pid(void) {
+ /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
+ cached_pid = CACHED_PID_UNSET;
+}
+
+/* We use glibc __register_atfork() + __dso_handle directly here, as they are not included in the glibc
+ * headers. __register_atfork() is mostly equivalent to pthread_atfork(), but doesn't require us to link against
+ * libpthread, as it is part of glibc anyway. */
+extern int __register_atfork(void (*prepare) (void), void (*parent) (void), void (*child) (void), void *dso_handle);
+extern void* __dso_handle _weak_;
+
+pid_t getpid_cached(void) {
+ static bool installed = false;
+ pid_t current_value;
+
+ /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
+ * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
+ * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
+ * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
+ *
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
+ * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
+ */
+
+ current_value = __sync_val_compare_and_swap(&cached_pid, CACHED_PID_UNSET, CACHED_PID_BUSY);
+
+ switch (current_value) {
+
+ case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */
+ pid_t new_pid;
+
+ new_pid = raw_getpid();
+
+ if (!installed) {
+ /* __register_atfork() either returns 0 or -ENOMEM, in its glibc implementation. Since it's
+ * only half-documented (glibc doesn't document it but LSB does — though only superficially)
+ * we'll check for errors only in the most generic fashion possible. */
+
+ if (__register_atfork(NULL, NULL, reset_cached_pid, __dso_handle) != 0) {
+ /* OOM? Let's try again later */
+ cached_pid = CACHED_PID_UNSET;
+ return new_pid;
+ }
+
+ installed = true;
+ }
+
+ cached_pid = new_pid;
+ return new_pid;
+ }
+
+ case CACHED_PID_BUSY: /* Somebody else is currently initializing */
+ return raw_getpid();
+
+ default: /* Properly initialized */
+ return current_value;
+ }
+}
+
+int must_be_root(void) {
+
+ if (geteuid() == 0)
+ return 0;
+
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Need to be root.");
+}
+
+static void restore_sigsetp(sigset_t **ssp) {
+ if (*ssp)
+ (void) sigprocmask(SIG_SETMASK, *ssp, NULL);
+}
+
+int safe_fork_full(
+ const char *name,
+ const int except_fds[],
+ size_t n_except_fds,
+ ForkFlags flags,
+ pid_t *ret_pid) {
+
+ pid_t original_pid, pid;
+ sigset_t saved_ss, ss;
+ _cleanup_(restore_sigsetp) sigset_t *saved_ssp = NULL;
+ bool block_signals = false, block_all = false;
+ int prio, r;
+
+ /* A wrapper around fork(), that does a couple of important initializations in addition to mere forking. Always
+ * returns the child's PID in *ret_pid. Returns == 0 in the child, and > 0 in the parent. */
+
+ prio = flags & FORK_LOG ? LOG_ERR : LOG_DEBUG;
+
+ original_pid = getpid_cached();
+
+ if (flags & (FORK_RESET_SIGNALS|FORK_DEATHSIG)) {
+ /* We temporarily block all signals, so that the new child has them blocked initially. This way, we can
+ * be sure that SIGTERMs are not lost we might send to the child. */
+
+ assert_se(sigfillset(&ss) >= 0);
+ block_signals = block_all = true;
+
+ } else if (flags & FORK_WAIT) {
+ /* Let's block SIGCHLD at least, so that we can safely watch for the child process */
+
+ assert_se(sigemptyset(&ss) >= 0);
+ assert_se(sigaddset(&ss, SIGCHLD) >= 0);
+ block_signals = true;
+ }
+
+ if (block_signals) {
+ if (sigprocmask(SIG_SETMASK, &ss, &saved_ss) < 0)
+ return log_full_errno(prio, errno, "Failed to set signal mask: %m");
+ saved_ssp = &saved_ss;
+ }
+
+ if (flags & FORK_NEW_MOUNTNS)
+ pid = raw_clone(SIGCHLD|CLONE_NEWNS);
+ else
+ pid = fork();
+ if (pid < 0)
+ return log_full_errno(prio, errno, "Failed to fork: %m");
+ if (pid > 0) {
+ /* We are in the parent process */
+
+ log_debug("Successfully forked off '%s' as PID " PID_FMT ".", strna(name), pid);
+
+ if (flags & FORK_WAIT) {
+ if (block_all) {
+ /* undo everything except SIGCHLD */
+ ss = saved_ss;
+ assert_se(sigaddset(&ss, SIGCHLD) >= 0);
+ (void) sigprocmask(SIG_SETMASK, &ss, NULL);
+ }
+
+ r = wait_for_terminate_and_check(name, pid, (flags & FORK_LOG ? WAIT_LOG : 0));
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS) /* exit status > 0 should be treated as failure, too */
+ return -EPROTO;
+ }
+
+ if (ret_pid)
+ *ret_pid = pid;
+
+ return 1;
+ }
+
+ /* We are in the child process */
+
+ /* Restore signal mask manually */
+ saved_ssp = NULL;
+
+ if (flags & FORK_REOPEN_LOG) {
+ /* Close the logs if requested, before we log anything. And make sure we reopen it if needed. */
+ log_close();
+ log_set_open_when_needed(true);
+ }
+
+ if (name) {
+ r = rename_process(name);
+ if (r < 0)
+ log_full_errno(flags & FORK_LOG ? LOG_WARNING : LOG_DEBUG,
+ r, "Failed to rename process, ignoring: %m");
+ }
+
+ if (flags & (FORK_DEATHSIG|FORK_DEATHSIG_SIGINT))
+ if (prctl(PR_SET_PDEATHSIG, (flags & FORK_DEATHSIG_SIGINT) ? SIGINT : SIGTERM) < 0) {
+ log_full_errno(prio, errno, "Failed to set death signal: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (flags & FORK_RESET_SIGNALS) {
+ r = reset_all_signal_handlers();
+ if (r < 0) {
+ log_full_errno(prio, r, "Failed to reset signal handlers: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ /* This implicitly undoes the signal mask stuff we did before the fork()ing above */
+ r = reset_signal_mask();
+ if (r < 0) {
+ log_full_errno(prio, r, "Failed to reset signal mask: %m");
+ _exit(EXIT_FAILURE);
+ }
+ } else if (block_signals) { /* undo what we did above */
+ if (sigprocmask(SIG_SETMASK, &saved_ss, NULL) < 0) {
+ log_full_errno(prio, errno, "Failed to restore signal mask: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (flags & FORK_DEATHSIG) {
+ pid_t ppid;
+ /* Let's see if the parent PID is still the one we started from? If not, then the parent
+ * already died by the time we set PR_SET_PDEATHSIG, hence let's emulate the effect */
+
+ ppid = getppid();
+ if (ppid == 0)
+ /* Parent is in a different PID namespace. */;
+ else if (ppid != original_pid) {
+ log_debug("Parent died early, raising SIGTERM.");
+ (void) raise(SIGTERM);
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (FLAGS_SET(flags, FORK_NEW_MOUNTNS | FORK_MOUNTNS_SLAVE)) {
+
+ /* Optionally, make sure we never propagate mounts to the host. */
+
+ if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
+ log_full_errno(prio, errno, "Failed to remount root directory as MS_SLAVE: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (flags & FORK_CLOSE_ALL_FDS) {
+ /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */
+ log_close();
+
+ r = close_all_fds(except_fds, n_except_fds);
+ if (r < 0) {
+ log_full_errno(prio, r, "Failed to close all file descriptors: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ /* When we were asked to reopen the logs, do so again now */
+ if (flags & FORK_REOPEN_LOG) {
+ log_open();
+ log_set_open_when_needed(false);
+ }
+
+ if (flags & FORK_NULL_STDIO) {
+ r = make_null_stdio();
+ if (r < 0) {
+ log_full_errno(prio, r, "Failed to connect stdin/stdout to /dev/null: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ } else if (flags & FORK_STDOUT_TO_STDERR) {
+ if (dup2(STDERR_FILENO, STDOUT_FILENO) < 0) {
+ log_full_errno(prio, errno, "Failed to connect stdout to stderr: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (flags & FORK_RLIMIT_NOFILE_SAFE) {
+ r = rlimit_nofile_safe();
+ if (r < 0) {
+ log_full_errno(prio, r, "Failed to lower RLIMIT_NOFILE's soft limit to 1K: %m");
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (ret_pid)
+ *ret_pid = getpid_cached();
+
+ return 0;
+}
+
+int namespace_fork(
+ const char *outer_name,
+ const char *inner_name,
+ const int except_fds[],
+ size_t n_except_fds,
+ ForkFlags flags,
+ int pidns_fd,
+ int mntns_fd,
+ int netns_fd,
+ int userns_fd,
+ int root_fd,
+ pid_t *ret_pid) {
+
+ int r;
+
+ /* This is much like safe_fork(), but forks twice, and joins the specified namespaces in the middle
+ * process. This ensures that we are fully a member of the destination namespace, with pidns an all, so that
+ * /proc/self/fd works correctly. */
+
+ r = safe_fork_full(outer_name, except_fds, n_except_fds, (flags|FORK_DEATHSIG) & ~(FORK_REOPEN_LOG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE), ret_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ pid_t pid;
+
+ /* Child */
+
+ r = namespace_enter(pidns_fd, mntns_fd, netns_fd, userns_fd, root_fd);
+ if (r < 0) {
+ log_full_errno(FLAGS_SET(flags, FORK_LOG) ? LOG_ERR : LOG_DEBUG, r, "Failed to join namespace: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ /* We mask a few flags here that either make no sense for the grandchild, or that we don't have to do again */
+ r = safe_fork_full(inner_name, except_fds, n_except_fds, flags & ~(FORK_WAIT|FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_NULL_STDIO), &pid);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+ if (r == 0) {
+ /* Child */
+ if (ret_pid)
+ *ret_pid = pid;
+ return 0;
+ }
+
+ r = wait_for_terminate_and_check(inner_name, pid, FLAGS_SET(flags, FORK_LOG) ? WAIT_LOG : 0);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(r);
+ }
+
+ return 1;
+}
+
+int fork_agent(const char *name, const int except[], size_t n_except, pid_t *ret_pid, const char *path, ...) {
+ bool stdout_is_tty, stderr_is_tty;
+ size_t n, i;
+ va_list ap;
+ char **l;
+ int r;
+
+ assert(path);
+
+ /* Spawns a temporary TTY agent, making sure it goes away when we go away */
+
+ r = safe_fork_full(name, except, n_except, FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS, ret_pid);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ /* In the child: */
+
+ stdout_is_tty = isatty(STDOUT_FILENO);
+ stderr_is_tty = isatty(STDERR_FILENO);
+
+ if (!stdout_is_tty || !stderr_is_tty) {
+ int fd;
+
+ /* Detach from stdout/stderr. and reopen
+ * /dev/tty for them. This is important to
+ * ensure that when systemctl is started via
+ * popen() or a similar call that expects to
+ * read EOF we actually do generate EOF and
+ * not delay this indefinitely by because we
+ * keep an unused copy of stdin around. */
+ fd = open("/dev/tty", O_WRONLY);
+ if (fd < 0) {
+ log_error_errno(errno, "Failed to open /dev/tty: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
+ log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
+ log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ safe_close_above_stdio(fd);
+ }
+
+ (void) rlimit_nofile_safe();
+
+ /* Count arguments */
+ va_start(ap, path);
+ for (n = 0; va_arg(ap, char*); n++)
+ ;
+ va_end(ap);
+
+ /* Allocate strv */
+ l = newa(char*, n + 1);
+
+ /* Fill in arguments */
+ va_start(ap, path);
+ for (i = 0; i <= n; i++)
+ l[i] = va_arg(ap, char*);
+ va_end(ap);
+
+ execv(path, l);
+ _exit(EXIT_FAILURE);
+}
+
+int set_oom_score_adjust(int value) {
+ char t[DECIMAL_STR_MAX(int)];
+
+ sprintf(t, "%i", value);
+
+ return write_string_file("/proc/self/oom_score_adj", t,
+ WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_DISABLE_BUFFER);
+}
+
+int pidfd_get_pid(int fd, pid_t *ret) {
+ char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
+ _cleanup_free_ char *fdinfo = NULL;
+ char *p;
+ int r;
+
+ if (fd < 0)
+ return -EBADF;
+
+ xsprintf(path, "/proc/self/fdinfo/%i", fd);
+
+ r = read_full_file(path, &fdinfo, NULL);
+ if (r == -ENOENT) /* if fdinfo doesn't exist we assume the process does not exist */
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ p = startswith(fdinfo, "Pid:");
+ if (!p) {
+ p = strstr(fdinfo, "\nPid:");
+ if (!p)
+ return -ENOTTY; /* not a pidfd? */
+
+ p += 5;
+ }
+
+ p += strspn(p, WHITESPACE);
+ p[strcspn(p, WHITESPACE)] = 0;
+
+ return parse_pid(p, ret);
+}
+
+static int rlimit_to_nice(rlim_t limit) {
+ if (limit <= 1)
+ return PRIO_MAX-1; /* i.e. 19 */
+
+ if (limit >= -PRIO_MIN + PRIO_MAX)
+ return PRIO_MIN; /* i.e. -20 */
+
+ return PRIO_MAX - (int) limit;
+}
+
+int setpriority_closest(int priority) {
+ int current, limit, saved_errno;
+ struct rlimit highest;
+
+ /* Try to set requested nice level */
+ if (setpriority(PRIO_PROCESS, 0, priority) >= 0)
+ return 1;
+
+ /* Permission failed */
+ saved_errno = -errno;
+ if (!ERRNO_IS_PRIVILEGE(saved_errno))
+ return saved_errno;
+
+ errno = 0;
+ current = getpriority(PRIO_PROCESS, 0);
+ if (errno != 0)
+ return -errno;
+
+ if (priority == current)
+ return 1;
+
+ /* Hmm, we'd expect that raising the nice level from our status quo would always work. If it doesn't,
+ * then the whole setpriority() system call is blocked to us, hence let's propagate the error
+ * right-away */
+ if (priority > current)
+ return saved_errno;
+
+ if (getrlimit(RLIMIT_NICE, &highest) < 0)
+ return -errno;
+
+ limit = rlimit_to_nice(highest.rlim_cur);
+
+ /* We are already less nice than limit allows us */
+ if (current < limit) {
+ log_debug("Cannot raise nice level, permissions and the resource limit do not allow it.");
+ return 0;
+ }
+
+ /* Push to the allowed limit */
+ if (setpriority(PRIO_PROCESS, 0, limit) < 0)
+ return -errno;
+
+ log_debug("Cannot set requested nice level (%i), used next best (%i).", priority, limit);
+ return 0;
+}
+
+static const char *const ioprio_class_table[] = {
+ [IOPRIO_CLASS_NONE] = "none",
+ [IOPRIO_CLASS_RT] = "realtime",
+ [IOPRIO_CLASS_BE] = "best-effort",
+ [IOPRIO_CLASS_IDLE] = "idle",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class, int, IOPRIO_N_CLASSES);
+
+static const char *const sigchld_code_table[] = {
+ [CLD_EXITED] = "exited",
+ [CLD_KILLED] = "killed",
+ [CLD_DUMPED] = "dumped",
+ [CLD_TRAPPED] = "trapped",
+ [CLD_STOPPED] = "stopped",
+ [CLD_CONTINUED] = "continued",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int);
+
+static const char* const sched_policy_table[] = {
+ [SCHED_OTHER] = "other",
+ [SCHED_BATCH] = "batch",
+ [SCHED_IDLE] = "idle",
+ [SCHED_FIFO] = "fifo",
+ [SCHED_RR] = "rr",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX);
diff --git a/src/basic/process-util.h b/src/basic/process-util.h
new file mode 100644
index 0000000..6144f14
--- /dev/null
+++ b/src/basic/process-util.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <errno.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+
+#include "alloc-util.h"
+#include "format-util.h"
+#include "ioprio.h"
+#include "macro.h"
+#include "time-util.h"
+
+#define procfs_file_alloca(pid, field) \
+ ({ \
+ pid_t _pid_ = (pid); \
+ const char *_field_ = (field); \
+ char *_r_; \
+ if (_pid_ == 0) { \
+ _r_ = newa(char, STRLEN("/proc/self/") + strlen(_field_) + 1); \
+ strcpy(stpcpy(_r_, "/proc/self/"), _field_); \
+ } else { \
+ _r_ = newa(char, STRLEN("/proc/") + DECIMAL_STR_MAX(pid_t) + 1 + strlen(_field_) + 1); \
+ sprintf(_r_, "/proc/" PID_FMT "/%s", _pid_, _field_); \
+ } \
+ (const char*) _r_; \
+ })
+
+typedef enum ProcessCmdlineFlags {
+ PROCESS_CMDLINE_COMM_FALLBACK = 1 << 0,
+ PROCESS_CMDLINE_USE_LOCALE = 1 << 1,
+} ProcessCmdlineFlags;
+
+int get_process_comm(pid_t pid, char **name);
+int get_process_cmdline(pid_t pid, size_t max_columns, ProcessCmdlineFlags flags, char **line);
+int get_process_exe(pid_t pid, char **name);
+int get_process_uid(pid_t pid, uid_t *uid);
+int get_process_gid(pid_t pid, gid_t *gid);
+int get_process_capeff(pid_t pid, char **capeff);
+int get_process_cwd(pid_t pid, char **cwd);
+int get_process_root(pid_t pid, char **root);
+int get_process_environ(pid_t pid, char **environ);
+int get_process_ppid(pid_t pid, pid_t *ppid);
+int get_process_umask(pid_t pid, mode_t *umask);
+
+int wait_for_terminate(pid_t pid, siginfo_t *status);
+
+typedef enum WaitFlags {
+ WAIT_LOG_ABNORMAL = 1 << 0,
+ WAIT_LOG_NON_ZERO_EXIT_STATUS = 1 << 1,
+
+ /* A shortcut for requesting the most complete logging */
+ WAIT_LOG = WAIT_LOG_ABNORMAL|WAIT_LOG_NON_ZERO_EXIT_STATUS,
+} WaitFlags;
+
+int wait_for_terminate_and_check(const char *name, pid_t pid, WaitFlags flags);
+int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout);
+
+void sigkill_wait(pid_t pid);
+void sigkill_waitp(pid_t *pid);
+void sigterm_wait(pid_t pid);
+
+int kill_and_sigcont(pid_t pid, int sig);
+
+int rename_process(const char name[]);
+int is_kernel_thread(pid_t pid);
+
+int getenv_for_pid(pid_t pid, const char *field, char **_value);
+
+bool pid_is_alive(pid_t pid);
+bool pid_is_unwaited(pid_t pid);
+int pid_is_my_child(pid_t pid);
+int pid_from_same_root_fs(pid_t pid);
+
+bool is_main_thread(void);
+
+_noreturn_ void freeze(void);
+
+bool oom_score_adjust_is_valid(int oa);
+
+#ifndef PERSONALITY_INVALID
+/* personality(7) documents that 0xffffffffUL is used for querying the
+ * current personality, hence let's use that here as error
+ * indicator. */
+#define PERSONALITY_INVALID 0xffffffffLU
+#endif
+
+unsigned long personality_from_string(const char *p);
+const char *personality_to_string(unsigned long);
+
+int safe_personality(unsigned long p);
+int opinionated_personality(unsigned long *ret);
+
+int ioprio_class_to_string_alloc(int i, char **s);
+int ioprio_class_from_string(const char *s);
+
+const char *sigchld_code_to_string(int i) _const_;
+int sigchld_code_from_string(const char *s) _pure_;
+
+int sched_policy_to_string_alloc(int i, char **s);
+int sched_policy_from_string(const char *s);
+
+static inline pid_t PTR_TO_PID(const void *p) {
+ return (pid_t) ((uintptr_t) p);
+}
+
+static inline void* PID_TO_PTR(pid_t pid) {
+ return (void*) ((uintptr_t) pid);
+}
+
+void valgrind_summary_hack(void);
+
+int pid_compare_func(const pid_t *a, const pid_t *b);
+
+static inline bool nice_is_valid(int n) {
+ return n >= PRIO_MIN && n < PRIO_MAX;
+}
+
+static inline bool sched_policy_is_valid(int i) {
+ return IN_SET(i, SCHED_OTHER, SCHED_BATCH, SCHED_IDLE, SCHED_FIFO, SCHED_RR);
+}
+
+static inline bool sched_priority_is_valid(int i) {
+ return i >= 0 && i <= sched_get_priority_max(SCHED_RR);
+}
+
+static inline bool ioprio_class_is_valid(int i) {
+ return IN_SET(i, IOPRIO_CLASS_NONE, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE);
+}
+
+static inline bool ioprio_priority_is_valid(int i) {
+ return i >= 0 && i < IOPRIO_BE_NR;
+}
+
+static inline bool pid_is_valid(pid_t p) {
+ return p > 0;
+}
+
+int ioprio_parse_priority(const char *s, int *ret);
+
+pid_t getpid_cached(void);
+void reset_cached_pid(void);
+
+int must_be_root(void);
+
+typedef enum ForkFlags {
+ FORK_RESET_SIGNALS = 1 << 0, /* Reset all signal handlers and signal mask */
+ FORK_CLOSE_ALL_FDS = 1 << 1, /* Close all open file descriptors in the child, except for 0,1,2 */
+ FORK_DEATHSIG = 1 << 2, /* Set PR_DEATHSIG in the child to SIGTERM */
+ FORK_DEATHSIG_SIGINT = 1 << 3, /* Set PR_DEATHSIG in the child to SIGINT */
+ FORK_NULL_STDIO = 1 << 4, /* Connect 0,1,2 to /dev/null */
+ FORK_REOPEN_LOG = 1 << 5, /* Reopen log connection */
+ FORK_LOG = 1 << 6, /* Log above LOG_DEBUG log level about failures */
+ FORK_WAIT = 1 << 7, /* Wait until child exited */
+ FORK_NEW_MOUNTNS = 1 << 8, /* Run child in its own mount namespace */
+ FORK_MOUNTNS_SLAVE = 1 << 9, /* Make child's mount namespace MS_SLAVE */
+ FORK_RLIMIT_NOFILE_SAFE = 1 << 10, /* Set RLIMIT_NOFILE soft limit to 1K for select() compat */
+ FORK_STDOUT_TO_STDERR = 1 << 11, /* Make stdout a copy of stderr */
+} ForkFlags;
+
+int safe_fork_full(const char *name, const int except_fds[], size_t n_except_fds, ForkFlags flags, pid_t *ret_pid);
+
+static inline int safe_fork(const char *name, ForkFlags flags, pid_t *ret_pid) {
+ return safe_fork_full(name, NULL, 0, flags, ret_pid);
+}
+
+int namespace_fork(const char *outer_name, const char *inner_name, const int except_fds[], size_t n_except_fds, ForkFlags flags, int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd, pid_t *ret_pid);
+
+int fork_agent(const char *name, const int except[], size_t n_except, pid_t *pid, const char *path, ...) _sentinel_;
+
+int set_oom_score_adjust(int value);
+
+/* The highest possibly (theoretic) pid_t value on this architecture. */
+#define PID_T_MAX ((pid_t) INT32_MAX)
+/* The maximum number of concurrent processes Linux allows on this architecture, as well as the highest valid PID value
+ * the kernel will potentially assign. This reflects a value compiled into the kernel (PID_MAX_LIMIT), and sets the
+ * upper boundary on what may be written to the /proc/sys/kernel/pid_max sysctl (but do note that the sysctl is off by
+ * 1, since PID 0 can never exist and there can hence only be one process less than the limit would suggest). Since
+ * these values are documented in proc(5) we feel quite confident that they are stable enough for the near future at
+ * least to define them here too. */
+#define TASKS_MAX 4194303U
+
+assert_cc(TASKS_MAX <= (unsigned long) PID_T_MAX);
+
+/* Like TAKE_PTR() but for child PIDs, resetting them to 0 */
+#define TAKE_PID(pid) \
+ ({ \
+ pid_t _pid_ = (pid); \
+ (pid) = 0; \
+ _pid_; \
+ })
+
+int pidfd_get_pid(int fd, pid_t *ret);
+
+int setpriority_closest(int priority);
diff --git a/src/basic/procfs-util.c b/src/basic/procfs-util.c
new file mode 100644
index 0000000..ccab71f
--- /dev/null
+++ b/src/basic/procfs-util.c
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "procfs-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+int procfs_tasks_get_limit(uint64_t *ret) {
+ _cleanup_free_ char *value = NULL;
+ uint64_t pid_max, threads_max;
+ int r;
+
+ assert(ret);
+
+ /* So there are two sysctl files that control the system limit of processes:
+ *
+ * 1. kernel.threads-max: this is probably the sysctl that makes more sense, as it directly puts a limit on
+ * concurrent tasks.
+ *
+ * 2. kernel.pid_max: this limits the numeric range PIDs can take, and thus indirectly also limits the number
+ * of concurrent threads. AFAICS it's primarily a compatibility concept: some crappy old code used a signed
+ * 16bit type for PIDs, hence the kernel provides a way to ensure the PIDs never go beyond INT16_MAX by
+ * default.
+ *
+ * By default #2 is set to much lower values than #1, hence the limit people come into contact with first, as
+ * it's the lowest boundary they need to bump when they want higher number of processes.
+ *
+ * Also note the weird definition of #2: PIDs assigned will be kept below this value, which means the number of
+ * tasks that can be created is one lower, as PID 0 is not a valid process ID. */
+
+ r = read_one_line_file("/proc/sys/kernel/pid_max", &value);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(value, &pid_max);
+ if (r < 0)
+ return r;
+
+ value = mfree(value);
+ r = read_one_line_file("/proc/sys/kernel/threads-max", &value);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(value, &threads_max);
+ if (r < 0)
+ return r;
+
+ /* Subtract one from pid_max, since PID 0 is not a valid PID */
+ *ret = MIN(pid_max-1, threads_max);
+ return 0;
+}
+
+int procfs_tasks_set_limit(uint64_t limit) {
+ char buffer[DECIMAL_STR_MAX(uint64_t)+1];
+ _cleanup_free_ char *value = NULL;
+ uint64_t pid_max;
+ int r;
+
+ if (limit == 0) /* This makes no sense, we are userspace and hence count as tasks too, and we want to live,
+ * hence the limit conceptually has to be above 0. Also, most likely if anyone asks for a zero
+ * limit they probably mean "no limit", hence let's better refuse this to avoid
+ * confusion. */
+ return -EINVAL;
+
+ /* The Linux kernel doesn't allow this value to go below 20, hence don't allow this either, higher values than
+ * TASKS_MAX are not accepted by the pid_max sysctl. We'll treat anything this high as "unbounded" and hence
+ * set it to the maximum. */
+ limit = CLAMP(limit, 20U, TASKS_MAX);
+
+ r = read_one_line_file("/proc/sys/kernel/pid_max", &value);
+ if (r < 0)
+ return r;
+ r = safe_atou64(value, &pid_max);
+ if (r < 0)
+ return r;
+
+ /* As pid_max is about the numeric pid_t range we'll bump it if necessary, but only ever increase it, never
+ * decrease it, as threads-max is the much more relevant sysctl. */
+ if (limit > pid_max-1) {
+ sprintf(buffer, "%" PRIu64, limit+1); /* Add one, since PID 0 is not a valid PID */
+ r = write_string_file("/proc/sys/kernel/pid_max", buffer, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+ }
+
+ sprintf(buffer, "%" PRIu64, limit);
+ r = write_string_file("/proc/sys/kernel/threads-max", buffer, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0) {
+ uint64_t threads_max;
+
+ /* Hmm, we couldn't write this? If so, maybe it was already set properly? In that case let's not
+ * generate an error */
+
+ value = mfree(value);
+ if (read_one_line_file("/proc/sys/kernel/threads-max", &value) < 0)
+ return r; /* return original error */
+
+ if (safe_atou64(value, &threads_max) < 0)
+ return r; /* return original error */
+
+ if (MIN(pid_max-1, threads_max) != limit)
+ return r; /* return original error */
+
+ /* Yay! Value set already matches what we were trying to set, hence consider this a success. */
+ }
+
+ return 0;
+}
+
+int procfs_tasks_get_current(uint64_t *ret) {
+ _cleanup_free_ char *value = NULL;
+ const char *p, *nr;
+ size_t n;
+ int r;
+
+ assert(ret);
+
+ r = read_one_line_file("/proc/loadavg", &value);
+ if (r < 0)
+ return r;
+
+ /* Look for the second part of the fourth field, which is separated by a slash from the first part. None of the
+ * earlier fields use a slash, hence let's use this to find the right spot. */
+ p = strchr(value, '/');
+ if (!p)
+ return -EINVAL;
+
+ p++;
+ n = strspn(p, DIGITS);
+ nr = strndupa(p, n);
+
+ return safe_atou64(nr, ret);
+}
+
+static uint64_t calc_gcd64(uint64_t a, uint64_t b) {
+
+ while (b > 0) {
+ uint64_t t;
+
+ t = a % b;
+
+ a = b;
+ b = t;
+ }
+
+ return a;
+}
+
+int procfs_cpu_get_usage(nsec_t *ret) {
+ _cleanup_free_ char *first_line = NULL;
+ unsigned long user_ticks, nice_ticks, system_ticks, irq_ticks, softirq_ticks,
+ guest_ticks = 0, guest_nice_ticks = 0;
+ long ticks_per_second;
+ uint64_t sum, gcd, a, b;
+ const char *p;
+ int r;
+
+ assert(ret);
+
+ r = read_one_line_file("/proc/stat", &first_line);
+ if (r < 0)
+ return r;
+
+ p = first_word(first_line, "cpu");
+ if (!p)
+ return -EINVAL;
+
+ if (sscanf(p, "%lu %lu %lu %*u %*u %lu %lu %*u %lu %lu",
+ &user_ticks,
+ &nice_ticks,
+ &system_ticks,
+ &irq_ticks,
+ &softirq_ticks,
+ &guest_ticks,
+ &guest_nice_ticks) < 5) /* we only insist on the first five fields */
+ return -EINVAL;
+
+ ticks_per_second = sysconf(_SC_CLK_TCK);
+ if (ticks_per_second < 0)
+ return -errno;
+ assert(ticks_per_second > 0);
+
+ sum = (uint64_t) user_ticks + (uint64_t) nice_ticks + (uint64_t) system_ticks +
+ (uint64_t) irq_ticks + (uint64_t) softirq_ticks +
+ (uint64_t) guest_ticks + (uint64_t) guest_nice_ticks;
+
+ /* Let's reduce this fraction before we apply it to avoid overflows when converting this to µsec */
+ gcd = calc_gcd64(NSEC_PER_SEC, ticks_per_second);
+
+ a = (uint64_t) NSEC_PER_SEC / gcd;
+ b = (uint64_t) ticks_per_second / gcd;
+
+ *ret = DIV_ROUND_UP((nsec_t) sum * (nsec_t) a, (nsec_t) b);
+ return 0;
+}
+
+int procfs_memory_get(uint64_t *ret_total, uint64_t *ret_used) {
+ uint64_t mem_total = UINT64_MAX, mem_free = UINT64_MAX;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ f = fopen("/proc/meminfo", "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ uint64_t *v;
+ char *p, *e;
+ size_t n;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL; /* EOF: Couldn't find one or both fields? */
+
+ p = first_word(line, "MemTotal:");
+ if (p)
+ v = &mem_total;
+ else {
+ p = first_word(line, "MemFree:");
+ if (p)
+ v = &mem_free;
+ else
+ continue;
+ }
+
+ /* Determine length of numeric value */
+ n = strspn(p, DIGITS);
+ if (n == 0)
+ return -EINVAL;
+ e = p + n;
+
+ /* Ensure the line ends in " kB" */
+ n = strspn(e, WHITESPACE);
+ if (n == 0)
+ return -EINVAL;
+ if (!streq(e + n, "kB"))
+ return -EINVAL;
+
+ *e = 0;
+ r = safe_atou64(p, v);
+ if (r < 0)
+ return r;
+ if (*v == UINT64_MAX)
+ return -EINVAL;
+
+ if (mem_total != UINT64_MAX && mem_free != UINT64_MAX)
+ break;
+ }
+
+ if (mem_free > mem_total)
+ return -EINVAL;
+
+ if (ret_total)
+ *ret_total = mem_total * 1024U;
+ if (ret_used)
+ *ret_used = (mem_total - mem_free) * 1024U;
+ return 0;
+}
diff --git a/src/basic/procfs-util.h b/src/basic/procfs-util.h
new file mode 100644
index 0000000..8258c9e
--- /dev/null
+++ b/src/basic/procfs-util.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+
+#include "time-util.h"
+
+int procfs_tasks_get_limit(uint64_t *ret);
+int procfs_tasks_set_limit(uint64_t limit);
+int procfs_tasks_get_current(uint64_t *ret);
+
+int procfs_cpu_get_usage(nsec_t *ret);
+
+int procfs_memory_get(uint64_t *ret_total, uint64_t *ret_used);
+static inline int procfs_memory_get_used(uint64_t *ret) {
+ return procfs_memory_get(NULL, ret);
+}
diff --git a/src/basic/pthread-util.h b/src/basic/pthread-util.h
new file mode 100644
index 0000000..113485d
--- /dev/null
+++ b/src/basic/pthread-util.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <pthread.h>
+
+#include "macro.h"
+
+static inline pthread_mutex_t* pthread_mutex_lock_assert(pthread_mutex_t *mutex) {
+ assert_se(pthread_mutex_lock(mutex) == 0);
+ return mutex;
+}
+
+static inline void pthread_mutex_unlock_assertp(pthread_mutex_t **mutexp) {
+ if (*mutexp)
+ assert_se(pthread_mutex_unlock(*mutexp) == 0);
+}
diff --git a/src/basic/quota-util.c b/src/basic/quota-util.c
new file mode 100644
index 0000000..fbf8ee5
--- /dev/null
+++ b/src/basic/quota-util.c
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/quota.h>
+
+#include "alloc-util.h"
+#include "blockdev-util.h"
+#include "quota-util.h"
+#include "stat-util.h"
+
+int quotactl_devno(int cmd, dev_t devno, int id, void *addr) {
+ _cleanup_free_ char *devnode = NULL;
+ int r;
+
+ /* Like quotactl() but takes a dev_t instead of a path to a device node, and fixes caddr_t → void*,
+ * like we should, today */
+
+ r = device_path_make_major_minor(S_IFBLK, devno, &devnode);
+ if (r < 0)
+ return r;
+
+ if (quotactl(cmd, devnode, id, addr) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int quotactl_path(int cmd, const char *path, int id, void *addr) {
+ dev_t devno;
+ int r;
+
+ /* Like quotactl() but takes a path to some fs object, and changes the backing file system. I.e. the
+ * argument shouldn't be a block device but a regular file system object */
+
+ r = get_block_device(path, &devno);
+ if (r < 0)
+ return r;
+ if (devno == 0) /* Doesn't have a block device */
+ return -ENODEV;
+
+ return quotactl_devno(cmd, devno, id, addr);
+}
diff --git a/src/basic/quota-util.h b/src/basic/quota-util.h
new file mode 100644
index 0000000..a61bdcb
--- /dev/null
+++ b/src/basic/quota-util.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <sys/quota.h>
+#include <sys/types.h>
+
+/* Wrapper around the QCMD() macro of linux/quota.h that removes some undefined behaviour. A typical quota
+ * command such as QCMD(Q_GETQUOTA, USRQUOTA) cannot be resolved on platforms where "int" is 32bit, as it is
+ * larger than INT_MAX. Yikes, because that are basically all platforms Linux supports. Let's add a wrapper
+ * that explicitly takes its arguments as unsigned 32bit, and then converts the shift result explicitly to
+ * int, acknowledging the undefined behaviour of the kernel headers. This doesn't remove the undefined
+ * behaviour, but it stops ubsan from complaining about it. */
+static inline int QCMD_FIXED(uint32_t cmd, uint32_t type) {
+ return (int) QCMD(cmd, type);
+}
+
+int quotactl_devno(int cmd, dev_t devno, int id, void *addr);
+int quotactl_path(int cmd, const char *path, int id, void *addr);
diff --git a/src/basic/random-util.c b/src/basic/random-util.c
new file mode 100644
index 0000000..c8c34a2
--- /dev/null
+++ b/src/basic/random-util.c
@@ -0,0 +1,485 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if defined(__i386__) || defined(__x86_64__)
+#include <cpuid.h>
+#endif
+
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/random.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#if HAVE_SYS_AUXV_H
+# include <sys/auxv.h>
+#endif
+
+#include "alloc-util.h"
+#include "env-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "io-util.h"
+#include "missing_random.h"
+#include "missing_syscall.h"
+#include "parse-util.h"
+#include "random-util.h"
+#include "siphash24.h"
+#include "time-util.h"
+
+static bool srand_called = false;
+
+int rdrand(unsigned long *ret) {
+
+ /* So, you are a "security researcher", and you wonder why we bother with using raw RDRAND here,
+ * instead of sticking to /dev/urandom or getrandom()?
+ *
+ * Here's why: early boot. On Linux, during early boot the random pool that backs /dev/urandom and
+ * getrandom() is generally not initialized yet. It is very common that initialization of the random
+ * pool takes a longer time (up to many minutes), in particular on embedded devices that have no
+ * explicit hardware random generator, as well as in virtualized environments such as major cloud
+ * installations that do not provide virtio-rng or a similar mechanism.
+ *
+ * In such an environment using getrandom() synchronously means we'd block the entire system boot-up
+ * until the pool is initialized, i.e. *very* long. Using getrandom() asynchronously (GRND_NONBLOCK)
+ * would mean acquiring randomness during early boot would simply fail. Using /dev/urandom would mean
+ * generating many kmsg log messages about our use of it before the random pool is properly
+ * initialized. Neither of these outcomes is desirable.
+ *
+ * Thus, for very specific purposes we use RDRAND instead of either of these three options. RDRAND
+ * provides us quickly and relatively reliably with random values, without having to delay boot,
+ * without triggering warning messages in kmsg.
+ *
+ * Note that we use RDRAND only under very specific circumstances, when the requirements on the
+ * quality of the returned entropy permit it. Specifically, here are some cases where we *do* use
+ * RDRAND:
+ *
+ * • UUID generation: UUIDs are supposed to be universally unique but are not cryptographic
+ * key material. The quality and trust level of RDRAND should hence be OK: UUIDs should be
+ * generated in a way that is reliably unique, but they do not require ultimate trust into
+ * the entropy generator. systemd generates a number of UUIDs during early boot, including
+ * 'invocation IDs' for every unit spawned that identify the specific invocation of the
+ * service globally, and a number of others. Other alternatives for generating these UUIDs
+ * have been considered, but don't really work: for example, hashing uuids from a local
+ * system identifier combined with a counter falls flat because during early boot disk
+ * storage is not yet available (think: initrd) and thus a system-specific ID cannot be
+ * stored or retrieved yet.
+ *
+ * • Hash table seed generation: systemd uses many hash tables internally. Hash tables are
+ * generally assumed to have O(1) access complexity, but can deteriorate to prohibitive
+ * O(n) access complexity if an attacker manages to trigger a large number of hash
+ * collisions. Thus, systemd (as any software employing hash tables should) uses seeded
+ * hash functions for its hash tables, with a seed generated randomly. The hash tables
+ * systemd employs watch the fill level closely and reseed if necessary. This allows use of
+ * a low quality RNG initially, as long as it improves should a hash table be under attack:
+ * the attacker after all needs to trigger many collisions to exploit it for the purpose
+ * of DoS, but if doing so improves the seed the attack surface is reduced as the attack
+ * takes place.
+ *
+ * Some cases where we do NOT use RDRAND are:
+ *
+ * • Generation of cryptographic key material 🔑
+ *
+ * • Generation of cryptographic salt values 🧂
+ *
+ * This function returns:
+ *
+ * -EOPNOTSUPP → RDRAND is not available on this system 😔
+ * -EAGAIN → The operation failed this time, but is likely to work if you try again a few
+ * times ♻
+ * -EUCLEAN → We got some random value, but it looked strange, so we refused using it.
+ * This failure might or might not be temporary. 😕
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+ static int have_rdrand = -1;
+ unsigned long v;
+ uint8_t success;
+
+ if (have_rdrand < 0) {
+ uint32_t eax, ebx, ecx, edx;
+
+ /* Check if RDRAND is supported by the CPU */
+ if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0) {
+ have_rdrand = false;
+ return -EOPNOTSUPP;
+ }
+
+/* Compat with old gcc where bit_RDRND didn't exist yet */
+#ifndef bit_RDRND
+#define bit_RDRND (1U << 30)
+#endif
+
+ have_rdrand = !!(ecx & bit_RDRND);
+
+ if (have_rdrand > 0) {
+ /* Allow disabling use of RDRAND with SYSTEMD_RDRAND=0
+ If it is unset getenv_bool_secure will return a negative value. */
+ if (getenv_bool_secure("SYSTEMD_RDRAND") == 0) {
+ have_rdrand = false;
+ return -EOPNOTSUPP;
+ }
+ }
+ }
+
+ if (have_rdrand == 0)
+ return -EOPNOTSUPP;
+
+ asm volatile("rdrand %0;"
+ "setc %1"
+ : "=r" (v),
+ "=qm" (success));
+ msan_unpoison(&success, sizeof(success));
+ if (!success)
+ return -EAGAIN;
+
+ /* Apparently on some AMD CPUs RDRAND will sometimes (after a suspend/resume cycle?) report success
+ * via the carry flag but nonetheless return the same fixed value -1 in all cases. This appears to be
+ * a bad bug in the CPU or firmware. Let's deal with that and work-around this by explicitly checking
+ * for this special value (and also 0, just to be sure) and filtering it out. This is a work-around
+ * only however and something AMD really should fix properly. The Linux kernel should probably work
+ * around this issue by turning off RDRAND altogether on those CPUs. See:
+ * https://github.com/systemd/systemd/issues/11810 */
+ if (v == 0 || v == ULONG_MAX)
+ return log_debug_errno(SYNTHETIC_ERRNO(EUCLEAN),
+ "RDRAND returned suspicious value %lx, assuming bad hardware RNG, not using value.", v);
+
+ *ret = v;
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+int genuine_random_bytes(void *p, size_t n, RandomFlags flags) {
+ static int have_syscall = -1;
+ _cleanup_close_ int fd = -1;
+ bool got_some = false;
+ int r;
+
+ /* Gathers some high-quality randomness from the kernel (or potentially mid-quality randomness from
+ * the CPU if the RANDOM_ALLOW_RDRAND flag is set). This call won't block, unless the RANDOM_BLOCK
+ * flag is set. If RANDOM_MAY_FAIL is set, an error is returned if the random pool is not
+ * initialized. Otherwise it will always return some data from the kernel, regardless of whether the
+ * random pool is fully initialized or not. If RANDOM_EXTEND_WITH_PSEUDO is set, and some but not
+ * enough better quality randomness could be acquired, the rest is filled up with low quality
+ * randomness.
+ *
+ * Of course, when creating cryptographic key material you really shouldn't use RANDOM_ALLOW_DRDRAND
+ * or even RANDOM_EXTEND_WITH_PSEUDO.
+ *
+ * When generating UUIDs it's fine to use RANDOM_ALLOW_RDRAND but not OK to use
+ * RANDOM_EXTEND_WITH_PSEUDO. In fact RANDOM_EXTEND_WITH_PSEUDO is only really fine when invoked via
+ * an "all bets are off" wrapper, such as random_bytes(), see below. */
+
+ if (n == 0)
+ return 0;
+
+ if (FLAGS_SET(flags, RANDOM_ALLOW_RDRAND))
+ /* Try x86-64' RDRAND intrinsic if we have it. We only use it if high quality randomness is
+ * not required, as we don't trust it (who does?). Note that we only do a single iteration of
+ * RDRAND here, even though the Intel docs suggest calling this in a tight loop of 10
+ * invocations or so. That's because we don't really care about the quality here. We
+ * generally prefer using RDRAND if the caller allows us to, since this way we won't upset
+ * the kernel's random subsystem by accessing it before the pool is initialized (after all it
+ * will kmsg log about every attempt to do so)..*/
+ for (;;) {
+ unsigned long u;
+ size_t m;
+
+ if (rdrand(&u) < 0) {
+ if (got_some && FLAGS_SET(flags, RANDOM_EXTEND_WITH_PSEUDO)) {
+ /* Fill in the remaining bytes using pseudo-random values */
+ pseudo_random_bytes(p, n);
+ return 0;
+ }
+
+ /* OK, this didn't work, let's go to getrandom() + /dev/urandom instead */
+ break;
+ }
+
+ m = MIN(sizeof(u), n);
+ memcpy(p, &u, m);
+
+ p = (uint8_t*) p + m;
+ n -= m;
+
+ if (n == 0)
+ return 0; /* Yay, success! */
+
+ got_some = true;
+ }
+
+ /* Use the getrandom() syscall unless we know we don't have it. */
+ if (have_syscall != 0 && !HAS_FEATURE_MEMORY_SANITIZER) {
+
+ for (;;) {
+ r = getrandom(p, n,
+ (FLAGS_SET(flags, RANDOM_BLOCK) ? 0 : GRND_NONBLOCK) |
+ (FLAGS_SET(flags, RANDOM_ALLOW_INSECURE) ? GRND_INSECURE : 0));
+ if (r > 0) {
+ have_syscall = true;
+
+ if ((size_t) r == n)
+ return 0; /* Yay, success! */
+
+ assert((size_t) r < n);
+ p = (uint8_t*) p + r;
+ n -= r;
+
+ if (FLAGS_SET(flags, RANDOM_EXTEND_WITH_PSEUDO)) {
+ /* Fill in the remaining bytes using pseudo-random values */
+ pseudo_random_bytes(p, n);
+ return 0;
+ }
+
+ got_some = true;
+
+ /* Hmm, we didn't get enough good data but the caller insists on good data? Then try again */
+ if (FLAGS_SET(flags, RANDOM_BLOCK))
+ continue;
+
+ /* Fill in the rest with /dev/urandom */
+ break;
+
+ } else if (r == 0) {
+ have_syscall = true;
+ return -EIO;
+
+ } else if (ERRNO_IS_NOT_SUPPORTED(errno)) {
+ /* We lack the syscall, continue with reading from /dev/urandom. */
+ have_syscall = false;
+ break;
+
+ } else if (errno == EAGAIN) {
+ /* The kernel has no entropy whatsoever. Let's remember to use the syscall
+ * the next time again though.
+ *
+ * If RANDOM_MAY_FAIL is set, return an error so that random_bytes() can
+ * produce some pseudo-random bytes instead. Otherwise, fall back to
+ * /dev/urandom, which we know is empty, but the kernel will produce some
+ * bytes for us on a best-effort basis. */
+ have_syscall = true;
+
+ if (got_some && FLAGS_SET(flags, RANDOM_EXTEND_WITH_PSEUDO)) {
+ /* Fill in the remaining bytes using pseudorandom values */
+ pseudo_random_bytes(p, n);
+ return 0;
+ }
+
+ if (FLAGS_SET(flags, RANDOM_MAY_FAIL))
+ return -ENODATA;
+
+ /* Use /dev/urandom instead */
+ break;
+
+ } else if (errno == EINVAL) {
+
+ /* Most likely: unknown flag. We know that GRND_INSECURE might cause this,
+ * hence try without. */
+
+ if (FLAGS_SET(flags, RANDOM_ALLOW_INSECURE)) {
+ flags = flags &~ RANDOM_ALLOW_INSECURE;
+ continue;
+ }
+
+ return -errno;
+ } else
+ return -errno;
+ }
+ }
+
+ fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return errno == ENOENT ? -ENOSYS : -errno;
+
+ return loop_read_exact(fd, p, n, true);
+}
+
+static void clear_srand_initialization(void) {
+ srand_called = false;
+}
+
+void initialize_srand(void) {
+ static bool pthread_atfork_registered = false;
+ unsigned x;
+#if HAVE_SYS_AUXV_H
+ const void *auxv;
+#endif
+ unsigned long k;
+
+ if (srand_called)
+ return;
+
+#if HAVE_SYS_AUXV_H
+ /* The kernel provides us with 16 bytes of entropy in auxv, so let's try to make use of that to seed
+ * the pseudo-random generator. It's better than nothing... But let's first hash it to make it harder
+ * to recover the original value by watching any pseudo-random bits we generate. After all the
+ * AT_RANDOM data might be used by other stuff too (in particular: ASLR), and we probably shouldn't
+ * leak the seed for that. */
+
+ auxv = ULONG_TO_PTR(getauxval(AT_RANDOM));
+ if (auxv) {
+ static const uint8_t auxval_hash_key[16] = {
+ 0x92, 0x6e, 0xfe, 0x1b, 0xcf, 0x00, 0x52, 0x9c, 0xcc, 0x42, 0xcf, 0xdc, 0x94, 0x1f, 0x81, 0x0f
+ };
+
+ x = (unsigned) siphash24(auxv, 16, auxval_hash_key);
+ } else
+#endif
+ x = 0;
+
+ x ^= (unsigned) now(CLOCK_REALTIME);
+ x ^= (unsigned) gettid();
+
+ if (rdrand(&k) >= 0)
+ x ^= (unsigned) k;
+
+ srand(x);
+ srand_called = true;
+
+ if (!pthread_atfork_registered) {
+ (void) pthread_atfork(NULL, NULL, clear_srand_initialization);
+ pthread_atfork_registered = true;
+ }
+}
+
+/* INT_MAX gives us only 31 bits, so use 24 out of that. */
+#if RAND_MAX >= INT_MAX
+assert_cc(RAND_MAX >= 16777215);
+# define RAND_STEP 3
+#else
+/* SHORT_INT_MAX or lower gives at most 15 bits, we just use 8 out of that. */
+assert_cc(RAND_MAX >= 255);
+# define RAND_STEP 1
+#endif
+
+void pseudo_random_bytes(void *p, size_t n) {
+ uint8_t *q;
+
+ /* This returns pseudo-random data using libc's rand() function. You probably never want to call this
+ * directly, because why would you use this if you can get better stuff cheaply? Use random_bytes()
+ * instead, see below: it will fall back to this function if there's nothing better to get, but only
+ * then. */
+
+ initialize_srand();
+
+ for (q = p; q < (uint8_t*) p + n; q += RAND_STEP) {
+ unsigned rr;
+
+ rr = (unsigned) rand();
+
+#if RAND_STEP >= 3
+ if ((size_t) (q - (uint8_t*) p + 2) < n)
+ q[2] = rr >> 16;
+#endif
+#if RAND_STEP >= 2
+ if ((size_t) (q - (uint8_t*) p + 1) < n)
+ q[1] = rr >> 8;
+#endif
+ q[0] = rr;
+ }
+}
+
+void random_bytes(void *p, size_t n) {
+
+ /* This returns high quality randomness if we can get it cheaply. If we can't because for some reason
+ * it is not available we'll try some crappy fallbacks.
+ *
+ * What this function will do:
+ *
+ * • This function will preferably use the CPU's RDRAND operation, if it is available, in
+ * order to return "mid-quality" random values cheaply.
+ *
+ * • Use getrandom() with GRND_NONBLOCK, to return high-quality random values if they are
+ * cheaply available.
+ *
+ * • This function will return pseudo-random data, generated via libc rand() if nothing
+ * better is available.
+ *
+ * • This function will work fine in early boot
+ *
+ * • This function will always succeed
+ *
+ * What this function won't do:
+ *
+ * • This function will never fail: it will give you randomness no matter what. It might not
+ * be high quality, but it will return some, possibly generated via libc's rand() call.
+ *
+ * • This function will never block: if the only way to get good randomness is a blocking,
+ * synchronous getrandom() we'll instead provide you with pseudo-random data.
+ *
+ * This function is hence great for things like seeding hash tables, generating random numeric UNIX
+ * user IDs (that are checked for collisions before use) and such.
+ *
+ * This function is hence not useful for generating UUIDs or cryptographic key material.
+ */
+
+ if (genuine_random_bytes(p, n, RANDOM_EXTEND_WITH_PSEUDO|RANDOM_MAY_FAIL|RANDOM_ALLOW_RDRAND|RANDOM_ALLOW_INSECURE) >= 0)
+ return;
+
+ /* If for some reason some user made /dev/urandom unavailable to us, or the kernel has no entropy, use a PRNG instead. */
+ pseudo_random_bytes(p, n);
+}
+
+size_t random_pool_size(void) {
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ /* Read pool size, if possible */
+ r = read_one_line_file("/proc/sys/kernel/random/poolsize", &s);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read pool size from kernel: %m");
+ else {
+ unsigned sz;
+
+ r = safe_atou(s, &sz);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse pool size: %s", s);
+ else
+ /* poolsize is in bits on 2.6, but we want bytes */
+ return CLAMP(sz / 8, RANDOM_POOL_SIZE_MIN, RANDOM_POOL_SIZE_MAX);
+ }
+
+ /* Use the minimum as default, if we can't retrieve the correct value */
+ return RANDOM_POOL_SIZE_MIN;
+}
+
+int random_write_entropy(int fd, const void *seed, size_t size, bool credit) {
+ int r;
+
+ assert(fd >= 0);
+ assert(seed && size > 0);
+
+ if (credit) {
+ _cleanup_free_ struct rand_pool_info *info = NULL;
+
+ /* The kernel API only accepts "int" as entropy count (which is in bits), let's avoid any
+ * chance for confusion here. */
+ if (size > INT_MAX / 8)
+ return -EOVERFLOW;
+
+ info = malloc(offsetof(struct rand_pool_info, buf) + size);
+ if (!info)
+ return -ENOMEM;
+
+ info->entropy_count = size * 8;
+ info->buf_size = size;
+ memcpy(info->buf, seed, size);
+
+ if (ioctl(fd, RNDADDENTROPY, info) < 0)
+ return -errno;
+ } else {
+ r = loop_write(fd, seed, size, false);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/basic/random-util.h b/src/basic/random-util.h
new file mode 100644
index 0000000..f661fc0
--- /dev/null
+++ b/src/basic/random-util.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+typedef enum RandomFlags {
+ RANDOM_EXTEND_WITH_PSEUDO = 1 << 0, /* If we can't get enough genuine randomness, but some, fill up the rest with pseudo-randomness */
+ RANDOM_BLOCK = 1 << 1, /* Rather block than return crap randomness (only if the kernel supports that) */
+ RANDOM_MAY_FAIL = 1 << 2, /* If we can't get any randomness at all, return early with -ENODATA */
+ RANDOM_ALLOW_RDRAND = 1 << 3, /* Allow usage of the CPU RNG */
+ RANDOM_ALLOW_INSECURE = 1 << 4, /* Allow usage of GRND_INSECURE flag to kernel's getrandom() API */
+} RandomFlags;
+
+int genuine_random_bytes(void *p, size_t n, RandomFlags flags); /* returns "genuine" randomness, optionally filled up with pseudo random, if not enough is available */
+void pseudo_random_bytes(void *p, size_t n); /* returns only pseudo-randommess (but possibly seeded from something better) */
+void random_bytes(void *p, size_t n); /* returns genuine randomness if cheaply available, and pseudo randomness if not. */
+
+void initialize_srand(void);
+
+static inline uint64_t random_u64(void) {
+ uint64_t u;
+ random_bytes(&u, sizeof(u));
+ return u;
+}
+
+static inline uint32_t random_u32(void) {
+ uint32_t u;
+ random_bytes(&u, sizeof(u));
+ return u;
+}
+
+int rdrand(unsigned long *ret);
+
+/* Some limits on the pool sizes when we deal with the kernel random pool */
+#define RANDOM_POOL_SIZE_MIN 512U
+#define RANDOM_POOL_SIZE_MAX (10U*1024U*1024U)
+
+size_t random_pool_size(void);
+
+int random_write_entropy(int fd, const void *seed, size_t size, bool credit);
diff --git a/src/basic/ratelimit.c b/src/basic/ratelimit.c
new file mode 100644
index 0000000..bae2ec3
--- /dev/null
+++ b/src/basic/ratelimit.c
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/time.h>
+
+#include "macro.h"
+#include "ratelimit.h"
+
+/* Modelled after Linux' lib/ratelimit.c by Dave Young
+ * <hidave.darkstar@gmail.com>, which is licensed GPLv2. */
+
+bool ratelimit_below(RateLimit *r) {
+ usec_t ts;
+
+ assert(r);
+
+ if (!ratelimit_configured(r))
+ return true;
+
+ ts = now(CLOCK_MONOTONIC);
+
+ if (r->begin <= 0 ||
+ ts - r->begin > r->interval) {
+ r->begin = ts;
+
+ /* Reset counter */
+ r->num = 0;
+ goto good;
+ }
+
+ if (r->num < r->burst)
+ goto good;
+
+ return false;
+
+good:
+ r->num++;
+ return true;
+}
diff --git a/src/basic/ratelimit.h b/src/basic/ratelimit.h
new file mode 100644
index 0000000..ee1d17c
--- /dev/null
+++ b/src/basic/ratelimit.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "time-util.h"
+#include "util.h"
+
+typedef struct RateLimit {
+ usec_t interval; /* Keep those two fields first so they can be initialized easily: */
+ unsigned burst; /* RateLimit rl = { INTERVAL, BURST }; */
+ unsigned num;
+ usec_t begin;
+} RateLimit;
+
+static inline void ratelimit_reset(RateLimit *rl) {
+ rl->num = rl->begin = 0;
+}
+
+static inline bool ratelimit_configured(RateLimit *rl) {
+ return rl->interval > 0 && rl->burst > 0;
+}
+
+bool ratelimit_below(RateLimit *r);
diff --git a/src/basic/raw-clone.h b/src/basic/raw-clone.h
new file mode 100644
index 0000000..becf42e
--- /dev/null
+++ b/src/basic/raw-clone.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2016 Michael Karcher
+***/
+
+#include <errno.h>
+#include <sched.h>
+#include <sys/syscall.h>
+
+#include "log.h"
+#include "macro.h"
+
+/**
+ * raw_clone() - uses clone to create a new process with clone flags
+ * @flags: Flags to pass to the clone system call
+ *
+ * Uses the clone system call to create a new process with the cloning flags and termination signal passed in the flags
+ * parameter. Opposed to glibc's clone function, using this function does not set up a separate stack for the child, but
+ * relies on copy-on-write semantics on the one stack at a common virtual address, just as fork does.
+ *
+ * To obtain copy-on-write semantics, flags must not contain CLONE_VM, and thus CLONE_THREAD and CLONE_SIGHAND (which
+ * require CLONE_VM) are not usable.
+ *
+ * Additionally, as this function does not pass the ptid, newtls and ctid parameters to the kernel, flags must not
+ * contain CLONE_PARENT_SETTID, CLONE_CHILD_SETTID, CLONE_CHILD_CLEARTID or CLONE_SETTLS.
+ *
+ * Returns: 0 in the child process and the child process id in the parent.
+ */
+static inline pid_t raw_clone(unsigned long flags) {
+ pid_t ret;
+
+ assert((flags & (CLONE_VM|CLONE_PARENT_SETTID|CLONE_CHILD_SETTID|
+ CLONE_CHILD_CLEARTID|CLONE_SETTLS)) == 0);
+#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__)
+ /* On s390/s390x and cris the order of the first and second arguments
+ * of the raw clone() system call is reversed. */
+ ret = (pid_t) syscall(__NR_clone, NULL, flags);
+#elif defined(__sparc__)
+ {
+ /**
+ * sparc always returns the other process id in %o0, and
+ * a boolean flag whether this is the child or the parent in
+ * %o1. Inline assembly is needed to get the flag returned
+ * in %o1.
+ */
+ int in_child, child_pid, error;
+
+ asm volatile("mov %3, %%g1\n\t"
+ "mov %4, %%o0\n\t"
+ "mov 0 , %%o1\n\t"
+#if defined(__arch64__)
+ "t 0x6d\n\t"
+#else
+ "t 0x10\n\t"
+#endif
+ "addx %%g0, 0, %2\n\t"
+ "mov %%o1, %0\n\t"
+ "mov %%o0, %1" :
+ "=r"(in_child), "=r"(child_pid), "=r"(error) :
+ "i"(__NR_clone), "r"(flags) :
+ "%o1", "%o0", "%g1", "cc" );
+
+ if (error) {
+ errno = child_pid;
+ ret = -1;
+ } else
+ ret = in_child ? 0 : child_pid;
+ }
+#else
+ ret = (pid_t) syscall(__NR_clone, flags, NULL);
+#endif
+
+ if (ret == 0)
+ reset_cached_pid();
+
+ return ret;
+}
diff --git a/src/basic/raw-reboot.h b/src/basic/raw-reboot.h
new file mode 100644
index 0000000..e6bff30
--- /dev/null
+++ b/src/basic/raw-reboot.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/reboot.h>
+#include <sys/reboot.h>
+#include <sys/syscall.h>
+
+/* glibc defines the reboot() API call, which is a wrapper around the system call of the same name, but without the
+ * extra "arg" parameter. Since we need that parameter for some calls, let's add a "raw" wrapper that is defined the
+ * same way, except it takes the additional argument. */
+
+static inline int raw_reboot(int cmd, const void *arg) {
+ return (int) syscall(SYS_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, arg);
+}
diff --git a/src/basic/replace-var.c b/src/basic/replace-var.c
new file mode 100644
index 0000000..01c26ce
--- /dev/null
+++ b/src/basic/replace-var.c
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "replace-var.h"
+#include "string-util.h"
+
+/*
+ * Generic infrastructure for replacing @FOO@ style variables in
+ * strings. Will call a callback for each replacement.
+ */
+
+static int get_variable(const char *b, char **r) {
+ size_t k;
+ char *t;
+
+ assert(b);
+ assert(r);
+
+ if (*b != '@')
+ return 0;
+
+ k = strspn(b + 1, UPPERCASE_LETTERS "_");
+ if (k <= 0 || b[k+1] != '@')
+ return 0;
+
+ t = strndup(b + 1, k);
+ if (!t)
+ return -ENOMEM;
+
+ *r = t;
+ return 1;
+}
+
+char *replace_var(const char *text, char *(*lookup)(const char *variable, void *userdata), void *userdata) {
+ char *r, *t;
+ const char *f;
+ size_t l;
+
+ assert(text);
+ assert(lookup);
+
+ l = strlen(text);
+ r = new(char, l+1);
+ if (!r)
+ return NULL;
+
+ f = text;
+ t = r;
+ while (*f) {
+ _cleanup_free_ char *v = NULL, *n = NULL;
+ char *a;
+ int k;
+ size_t skip, d, nl;
+
+ k = get_variable(f, &v);
+ if (k < 0)
+ goto oom;
+ if (k == 0) {
+ *(t++) = *(f++);
+ continue;
+ }
+
+ n = lookup(v, userdata);
+ if (!n)
+ goto oom;
+
+ skip = strlen(v) + 2;
+
+ d = t - r;
+ nl = l - skip + strlen(n);
+ a = realloc(r, nl + 1);
+ if (!a)
+ goto oom;
+
+ l = nl;
+ r = a;
+ t = r + d;
+
+ t = stpcpy(t, n);
+ f += skip;
+ }
+
+ *t = 0;
+ return r;
+
+oom:
+ return mfree(r);
+}
diff --git a/src/basic/replace-var.h b/src/basic/replace-var.h
new file mode 100644
index 0000000..644d9df
--- /dev/null
+++ b/src/basic/replace-var.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+char *replace_var(const char *text, char *(*lookup)(const char *variable, void *userdata), void *userdata);
diff --git a/src/basic/rlimit-util.c b/src/basic/rlimit-util.c
new file mode 100644
index 0000000..8809763
--- /dev/null
+++ b/src/basic/rlimit-util.c
@@ -0,0 +1,409 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "macro.h"
+#include "missing_resource.h"
+#include "rlimit-util.h"
+#include "string-table.h"
+#include "time-util.h"
+
+int setrlimit_closest(int resource, const struct rlimit *rlim) {
+ struct rlimit highest, fixed;
+
+ assert(rlim);
+
+ if (setrlimit(resource, rlim) >= 0)
+ return 0;
+
+ if (errno != EPERM)
+ return -errno;
+
+ /* So we failed to set the desired setrlimit, then let's try
+ * to get as close as we can */
+ if (getrlimit(resource, &highest) < 0)
+ return -errno;
+
+ /* If the hard limit is unbounded anyway, then the EPERM had other reasons, let's propagate the original EPERM
+ * then */
+ if (highest.rlim_max == RLIM_INFINITY)
+ return -EPERM;
+
+ fixed = (struct rlimit) {
+ .rlim_cur = MIN(rlim->rlim_cur, highest.rlim_max),
+ .rlim_max = MIN(rlim->rlim_max, highest.rlim_max),
+ };
+
+ /* Shortcut things if we wouldn't change anything. */
+ if (fixed.rlim_cur == highest.rlim_cur &&
+ fixed.rlim_max == highest.rlim_max)
+ return 0;
+
+ if (setrlimit(resource, &fixed) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int setrlimit_closest_all(const struct rlimit *const *rlim, int *which_failed) {
+ int i, r;
+
+ assert(rlim);
+
+ /* On failure returns the limit's index that failed in *which_failed, but only if non-NULL */
+
+ for (i = 0; i < _RLIMIT_MAX; i++) {
+ if (!rlim[i])
+ continue;
+
+ r = setrlimit_closest(i, rlim[i]);
+ if (r < 0) {
+ if (which_failed)
+ *which_failed = i;
+
+ return r;
+ }
+ }
+
+ if (which_failed)
+ *which_failed = -1;
+
+ return 0;
+}
+
+static int rlimit_parse_u64(const char *val, rlim_t *ret) {
+ uint64_t u;
+ int r;
+
+ assert(val);
+ assert(ret);
+
+ if (streq(val, "infinity")) {
+ *ret = RLIM_INFINITY;
+ return 0;
+ }
+
+ /* setrlimit(2) suggests rlim_t is always 64bit on Linux. */
+ assert_cc(sizeof(rlim_t) == sizeof(uint64_t));
+
+ r = safe_atou64(val, &u);
+ if (r < 0)
+ return r;
+ if (u >= (uint64_t) RLIM_INFINITY)
+ return -ERANGE;
+
+ *ret = (rlim_t) u;
+ return 0;
+}
+
+static int rlimit_parse_size(const char *val, rlim_t *ret) {
+ uint64_t u;
+ int r;
+
+ assert(val);
+ assert(ret);
+
+ if (streq(val, "infinity")) {
+ *ret = RLIM_INFINITY;
+ return 0;
+ }
+
+ r = parse_size(val, 1024, &u);
+ if (r < 0)
+ return r;
+ if (u >= (uint64_t) RLIM_INFINITY)
+ return -ERANGE;
+
+ *ret = (rlim_t) u;
+ return 0;
+}
+
+static int rlimit_parse_sec(const char *val, rlim_t *ret) {
+ uint64_t u;
+ usec_t t;
+ int r;
+
+ assert(val);
+ assert(ret);
+
+ if (streq(val, "infinity")) {
+ *ret = RLIM_INFINITY;
+ return 0;
+ }
+
+ r = parse_sec(val, &t);
+ if (r < 0)
+ return r;
+ if (t == USEC_INFINITY) {
+ *ret = RLIM_INFINITY;
+ return 0;
+ }
+
+ u = (uint64_t) DIV_ROUND_UP(t, USEC_PER_SEC);
+ if (u >= (uint64_t) RLIM_INFINITY)
+ return -ERANGE;
+
+ *ret = (rlim_t) u;
+ return 0;
+}
+
+static int rlimit_parse_usec(const char *val, rlim_t *ret) {
+ usec_t t;
+ int r;
+
+ assert(val);
+ assert(ret);
+
+ if (streq(val, "infinity")) {
+ *ret = RLIM_INFINITY;
+ return 0;
+ }
+
+ r = parse_time(val, &t, 1);
+ if (r < 0)
+ return r;
+ if (t == USEC_INFINITY) {
+ *ret = RLIM_INFINITY;
+ return 0;
+ }
+
+ *ret = (rlim_t) t;
+ return 0;
+}
+
+static int rlimit_parse_nice(const char *val, rlim_t *ret) {
+ uint64_t rl;
+ int r;
+
+ /* So, Linux is weird. The range for RLIMIT_NICE is 40..1, mapping to the nice levels -20..19. However, the
+ * RLIMIT_NICE limit defaults to 0 by the kernel, i.e. a value that maps to nice level 20, which of course is
+ * bogus and does not exist. In order to permit parsing the RLIMIT_NICE of 0 here we hence implement a slight
+ * asymmetry: when parsing as positive nice level we permit 0..19. When parsing as negative nice level, we
+ * permit -20..0. But when parsing as raw resource limit value then we also allow the special value 0.
+ *
+ * Yeah, Linux is quality engineering sometimes... */
+
+ if (val[0] == '+') {
+
+ /* Prefixed with "+": Parse as positive user-friendly nice value */
+ r = safe_atou64(val + 1, &rl);
+ if (r < 0)
+ return r;
+
+ if (rl >= PRIO_MAX)
+ return -ERANGE;
+
+ rl = 20 - rl;
+
+ } else if (val[0] == '-') {
+
+ /* Prefixed with "-": Parse as negative user-friendly nice value */
+ r = safe_atou64(val + 1, &rl);
+ if (r < 0)
+ return r;
+
+ if (rl > (uint64_t) (-PRIO_MIN))
+ return -ERANGE;
+
+ rl = 20 + rl;
+ } else {
+
+ /* Not prefixed: parse as raw resource limit value */
+ r = safe_atou64(val, &rl);
+ if (r < 0)
+ return r;
+
+ if (rl > (uint64_t) (20 - PRIO_MIN))
+ return -ERANGE;
+ }
+
+ *ret = (rlim_t) rl;
+ return 0;
+}
+
+static int (*const rlimit_parse_table[_RLIMIT_MAX])(const char *val, rlim_t *ret) = {
+ [RLIMIT_CPU] = rlimit_parse_sec,
+ [RLIMIT_FSIZE] = rlimit_parse_size,
+ [RLIMIT_DATA] = rlimit_parse_size,
+ [RLIMIT_STACK] = rlimit_parse_size,
+ [RLIMIT_CORE] = rlimit_parse_size,
+ [RLIMIT_RSS] = rlimit_parse_size,
+ [RLIMIT_NOFILE] = rlimit_parse_u64,
+ [RLIMIT_AS] = rlimit_parse_size,
+ [RLIMIT_NPROC] = rlimit_parse_u64,
+ [RLIMIT_MEMLOCK] = rlimit_parse_size,
+ [RLIMIT_LOCKS] = rlimit_parse_u64,
+ [RLIMIT_SIGPENDING] = rlimit_parse_u64,
+ [RLIMIT_MSGQUEUE] = rlimit_parse_size,
+ [RLIMIT_NICE] = rlimit_parse_nice,
+ [RLIMIT_RTPRIO] = rlimit_parse_u64,
+ [RLIMIT_RTTIME] = rlimit_parse_usec,
+};
+
+int rlimit_parse_one(int resource, const char *val, rlim_t *ret) {
+ assert(val);
+ assert(ret);
+
+ if (resource < 0)
+ return -EINVAL;
+ if (resource >= _RLIMIT_MAX)
+ return -EINVAL;
+
+ return rlimit_parse_table[resource](val, ret);
+}
+
+int rlimit_parse(int resource, const char *val, struct rlimit *ret) {
+ _cleanup_free_ char *hard = NULL, *soft = NULL;
+ rlim_t hl, sl;
+ int r;
+
+ assert(val);
+ assert(ret);
+
+ r = extract_first_word(&val, &soft, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ r = rlimit_parse_one(resource, soft, &sl);
+ if (r < 0)
+ return r;
+
+ r = extract_first_word(&val, &hard, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (!isempty(val))
+ return -EINVAL;
+ if (r == 0)
+ hl = sl;
+ else {
+ r = rlimit_parse_one(resource, hard, &hl);
+ if (r < 0)
+ return r;
+ if (sl > hl)
+ return -EILSEQ;
+ }
+
+ *ret = (struct rlimit) {
+ .rlim_cur = sl,
+ .rlim_max = hl,
+ };
+
+ return 0;
+}
+
+int rlimit_format(const struct rlimit *rl, char **ret) {
+ char *s = NULL;
+
+ assert(rl);
+ assert(ret);
+
+ if (rl->rlim_cur >= RLIM_INFINITY && rl->rlim_max >= RLIM_INFINITY)
+ s = strdup("infinity");
+ else if (rl->rlim_cur >= RLIM_INFINITY)
+ (void) asprintf(&s, "infinity:" RLIM_FMT, rl->rlim_max);
+ else if (rl->rlim_max >= RLIM_INFINITY)
+ (void) asprintf(&s, RLIM_FMT ":infinity", rl->rlim_cur);
+ else if (rl->rlim_cur == rl->rlim_max)
+ (void) asprintf(&s, RLIM_FMT, rl->rlim_cur);
+ else
+ (void) asprintf(&s, RLIM_FMT ":" RLIM_FMT, rl->rlim_cur, rl->rlim_max);
+
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+static const char* const rlimit_table[_RLIMIT_MAX] = {
+ [RLIMIT_AS] = "AS",
+ [RLIMIT_CORE] = "CORE",
+ [RLIMIT_CPU] = "CPU",
+ [RLIMIT_DATA] = "DATA",
+ [RLIMIT_FSIZE] = "FSIZE",
+ [RLIMIT_LOCKS] = "LOCKS",
+ [RLIMIT_MEMLOCK] = "MEMLOCK",
+ [RLIMIT_MSGQUEUE] = "MSGQUEUE",
+ [RLIMIT_NICE] = "NICE",
+ [RLIMIT_NOFILE] = "NOFILE",
+ [RLIMIT_NPROC] = "NPROC",
+ [RLIMIT_RSS] = "RSS",
+ [RLIMIT_RTPRIO] = "RTPRIO",
+ [RLIMIT_RTTIME] = "RTTIME",
+ [RLIMIT_SIGPENDING] = "SIGPENDING",
+ [RLIMIT_STACK] = "STACK",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(rlimit, int);
+
+int rlimit_from_string_harder(const char *s) {
+ const char *suffix;
+
+ /* The official prefix */
+ suffix = startswith(s, "RLIMIT_");
+ if (suffix)
+ return rlimit_from_string(suffix);
+
+ /* Our own unit file setting prefix */
+ suffix = startswith(s, "Limit");
+ if (suffix)
+ return rlimit_from_string(suffix);
+
+ return rlimit_from_string(s);
+}
+
+void rlimit_free_all(struct rlimit **rl) {
+ int i;
+
+ if (!rl)
+ return;
+
+ for (i = 0; i < _RLIMIT_MAX; i++)
+ rl[i] = mfree(rl[i]);
+}
+
+int rlimit_nofile_bump(int limit) {
+ int r;
+
+ /* Bumps the (soft) RLIMIT_NOFILE resource limit as close as possible to the specified limit. If a negative
+ * limit is specified, bumps it to the maximum the kernel and the hard resource limit allows. This call should
+ * be used by all our programs that might need a lot of fds, and that know how to deal with high fd numbers
+ * (i.e. do not use select() — which chokes on fds >= 1024) */
+
+ if (limit < 0)
+ limit = read_nr_open();
+
+ if (limit < 3)
+ limit = 3;
+
+ r = setrlimit_closest(RLIMIT_NOFILE, &RLIMIT_MAKE_CONST(limit));
+ if (r < 0)
+ return log_debug_errno(r, "Failed to set RLIMIT_NOFILE: %m");
+
+ return 0;
+}
+
+int rlimit_nofile_safe(void) {
+ struct rlimit rl;
+
+ /* Resets RLIMIT_NOFILE's soft limit FD_SETSIZE (i.e. 1024), for compatibility with software still using
+ * select() */
+
+ if (getrlimit(RLIMIT_NOFILE, &rl) < 0)
+ return log_debug_errno(errno, "Failed to query RLIMIT_NOFILE: %m");
+
+ if (rl.rlim_cur <= FD_SETSIZE)
+ return 0;
+
+ rl.rlim_cur = FD_SETSIZE;
+ if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
+ return log_debug_errno(errno, "Failed to lower RLIMIT_NOFILE's soft limit to " RLIM_FMT ": %m", rl.rlim_cur);
+
+ return 1;
+}
diff --git a/src/basic/rlimit-util.h b/src/basic/rlimit-util.h
new file mode 100644
index 0000000..59bc066
--- /dev/null
+++ b/src/basic/rlimit-util.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/resource.h>
+
+#include "macro.h"
+
+const char *rlimit_to_string(int i) _const_;
+int rlimit_from_string(const char *s) _pure_;
+int rlimit_from_string_harder(const char *s) _pure_;
+
+int setrlimit_closest(int resource, const struct rlimit *rlim);
+int setrlimit_closest_all(const struct rlimit * const *rlim, int *which_failed);
+
+int rlimit_parse_one(int resource, const char *val, rlim_t *ret);
+int rlimit_parse(int resource, const char *val, struct rlimit *ret);
+
+int rlimit_format(const struct rlimit *rl, char **ret);
+
+void rlimit_free_all(struct rlimit **rl);
+
+#define RLIMIT_MAKE_CONST(lim) ((struct rlimit) { lim, lim })
+
+int rlimit_nofile_bump(int limit);
+int rlimit_nofile_safe(void);
diff --git a/src/basic/rm-rf.c b/src/basic/rm-rf.c
new file mode 100644
index 0000000..b0d682f
--- /dev/null
+++ b/src/basic/rm-rf.c
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "cgroup-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "stat-util.h"
+#include "string-util.h"
+
+static bool is_physical_fs(const struct statfs *sfs) {
+ return !is_temporary_fs(sfs) && !is_cgroup_fs(sfs);
+}
+
+static int unlinkat_harder(
+ int dfd,
+ const char *filename,
+ int unlink_flags,
+ RemoveFlags remove_flags) {
+
+ struct stat st;
+ int r;
+
+ /* Like unlinkat(), but tries harder: if we get EACCESS we'll try to set the r/w/x bits on the
+ * directory. This is useful if we run unprivileged and have some files where the w bit is
+ * missing. */
+
+ if (unlinkat(dfd, filename, unlink_flags) >= 0)
+ return 0;
+ if (errno != EACCES || !FLAGS_SET(remove_flags, REMOVE_CHMOD))
+ return -errno;
+
+ if (fstat(dfd, &st) < 0)
+ return -errno;
+ if (!S_ISDIR(st.st_mode))
+ return -ENOTDIR;
+ if (FLAGS_SET(st.st_mode, 0700)) /* Already set? */
+ return -EACCES; /* original error */
+ if (st.st_uid != geteuid()) /* this only works if the UID matches ours */
+ return -EACCES;
+
+ if (fchmod(dfd, (st.st_mode | 0700) & 07777) < 0)
+ return -errno;
+
+ if (unlinkat(dfd, filename, unlink_flags) < 0) {
+ r = -errno;
+ /* Try to restore the original access mode if this didn't work */
+ (void) fchmod(dfd, st.st_mode & 07777);
+ return r;
+ }
+
+ return 0;
+}
+
+int rm_rf_children(int fd, RemoveFlags flags, struct stat *root_dev) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int ret = 0, r;
+ struct statfs sfs;
+
+ assert(fd >= 0);
+
+ /* This returns the first error we run into, but nevertheless tries to go on. This closes the passed
+ * fd, in all cases, including on failure.. */
+
+ if (!(flags & REMOVE_PHYSICAL)) {
+
+ r = fstatfs(fd, &sfs);
+ if (r < 0) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ if (is_physical_fs(&sfs)) {
+ /* We refuse to clean physical file systems with this call,
+ * unless explicitly requested. This is extra paranoia just
+ * to be sure we never ever remove non-state data. */
+ _cleanup_free_ char *path = NULL;
+
+ (void) fd_get_path(fd, &path);
+ log_error("Attempted to remove disk file system under \"%s\", and we can't allow that.",
+ strna(path));
+
+ safe_close(fd);
+ return -EPERM;
+ }
+ }
+
+ d = fdopendir(fd);
+ if (!d) {
+ safe_close(fd);
+ return errno == ENOENT ? 0 : -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ bool is_dir;
+ struct stat st;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (de->d_type == DT_UNKNOWN ||
+ (de->d_type == DT_DIR && (root_dev || (flags & REMOVE_SUBVOLUME)))) {
+ if (fstatat(fd, de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+ if (ret == 0 && errno != ENOENT)
+ ret = -errno;
+ continue;
+ }
+
+ is_dir = S_ISDIR(st.st_mode);
+ } else
+ is_dir = de->d_type == DT_DIR;
+
+ if (is_dir) {
+ _cleanup_close_ int subdir_fd = -1;
+
+ /* if root_dev is set, remove subdirectories only if device is same */
+ if (root_dev && st.st_dev != root_dev->st_dev)
+ continue;
+
+ subdir_fd = openat(fd, de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
+ if (subdir_fd < 0) {
+ if (ret == 0 && errno != ENOENT)
+ ret = -errno;
+ continue;
+ }
+
+ /* Stop at mount points */
+ r = fd_is_mount_point(fd, de->d_name, 0);
+ if (r < 0) {
+ if (ret == 0 && r != -ENOENT)
+ ret = r;
+
+ continue;
+ }
+ if (r > 0)
+ continue;
+
+ if ((flags & REMOVE_SUBVOLUME) && st.st_ino == 256) {
+
+ /* This could be a subvolume, try to remove it */
+
+ r = btrfs_subvol_remove_fd(fd, de->d_name, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
+ if (r < 0) {
+ if (!IN_SET(r, -ENOTTY, -EINVAL)) {
+ if (ret == 0)
+ ret = r;
+
+ continue;
+ }
+
+ /* ENOTTY, then it wasn't a btrfs subvolume, continue below. */
+ } else
+ /* It was a subvolume, continue. */
+ continue;
+ }
+
+ /* We pass REMOVE_PHYSICAL here, to avoid doing the fstatfs() to check the file
+ * system type again for each directory */
+ r = rm_rf_children(TAKE_FD(subdir_fd), flags | REMOVE_PHYSICAL, root_dev);
+ if (r < 0 && ret == 0)
+ ret = r;
+
+ r = unlinkat_harder(fd, de->d_name, AT_REMOVEDIR, flags);
+ if (r < 0 && r != -ENOENT && ret == 0)
+ ret = r;
+
+ } else if (!(flags & REMOVE_ONLY_DIRECTORIES)) {
+
+ r = unlinkat_harder(fd, de->d_name, 0, flags);
+ if (r < 0 && r != -ENOENT && ret == 0)
+ ret = r;
+ }
+ }
+ return ret;
+}
+
+int rm_rf(const char *path, RemoveFlags flags) {
+ int fd, r;
+ struct statfs s;
+
+ assert(path);
+
+ /* For now, don't support dropping subvols when also only dropping directories, since we can't do
+ * this race-freely. */
+ if (FLAGS_SET(flags, REMOVE_ONLY_DIRECTORIES|REMOVE_SUBVOLUME))
+ return -EINVAL;
+
+ /* We refuse to clean the root file system with this call. This is extra paranoia to never cause a
+ * really seriously broken system. */
+ if (path_equal_or_files_same(path, "/", AT_SYMLINK_NOFOLLOW))
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Attempted to remove entire root file system (\"%s\"), and we can't allow that.",
+ path);
+
+ if (FLAGS_SET(flags, REMOVE_SUBVOLUME | REMOVE_ROOT | REMOVE_PHYSICAL)) {
+ /* Try to remove as subvolume first */
+ r = btrfs_subvol_remove(path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
+ if (r >= 0)
+ return r;
+
+ if (FLAGS_SET(flags, REMOVE_MISSING_OK) && r == -ENOENT)
+ return 0;
+
+ if (!IN_SET(r, -ENOTTY, -EINVAL, -ENOTDIR))
+ return r;
+
+ /* Not btrfs or not a subvolume */
+ }
+
+ fd = open(path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
+ if (fd < 0) {
+ if (FLAGS_SET(flags, REMOVE_MISSING_OK) && errno == ENOENT)
+ return 0;
+
+ if (!IN_SET(errno, ENOTDIR, ELOOP))
+ return -errno;
+
+ if (FLAGS_SET(flags, REMOVE_ONLY_DIRECTORIES))
+ return 0;
+
+ if (FLAGS_SET(flags, REMOVE_ROOT)) {
+
+ if (!FLAGS_SET(flags, REMOVE_PHYSICAL)) {
+ if (statfs(path, &s) < 0)
+ return -errno;
+
+ if (is_physical_fs(&s))
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Attempted to remove files from a disk file system under \"%s\", refusing.",
+ path);
+ }
+
+ if (unlink(path) < 0) {
+ if (FLAGS_SET(flags, REMOVE_MISSING_OK) && errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+ }
+
+ return 0;
+ }
+
+ r = rm_rf_children(fd, flags, NULL);
+
+ if (FLAGS_SET(flags, REMOVE_ROOT) &&
+ rmdir(path) < 0 &&
+ r >= 0 &&
+ (!FLAGS_SET(flags, REMOVE_MISSING_OK) || errno != ENOENT))
+ r = -errno;
+
+ return r;
+}
diff --git a/src/basic/rm-rf.h b/src/basic/rm-rf.h
new file mode 100644
index 0000000..ec56232
--- /dev/null
+++ b/src/basic/rm-rf.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/stat.h>
+
+#include "errno-util.h"
+
+typedef enum RemoveFlags {
+ REMOVE_ONLY_DIRECTORIES = 1 << 0, /* Only remove empty directories, no files */
+ REMOVE_ROOT = 1 << 1, /* Remove the specified directory itself too, not just the contents of it */
+ REMOVE_PHYSICAL = 1 << 2, /* If not set, only removes files on tmpfs, never physical file systems */
+ REMOVE_SUBVOLUME = 1 << 3, /* Drop btrfs subvolumes in the tree too */
+ REMOVE_MISSING_OK = 1 << 4, /* If the top-level directory is missing, ignore the ENOENT for it */
+ REMOVE_CHMOD = 1 << 5, /* chmod() for write access if we cannot delete something */
+} RemoveFlags;
+
+int rm_rf_children(int fd, RemoveFlags flags, struct stat *root_dev);
+int rm_rf(const char *path, RemoveFlags flags);
+
+/* Useful for usage with _cleanup_(), destroys a directory and frees the pointer */
+static inline void rm_rf_physical_and_free(char *p) {
+ PROTECT_ERRNO;
+ (void) rm_rf(p, REMOVE_ROOT|REMOVE_PHYSICAL);
+ free(p);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, rm_rf_physical_and_free);
+
+/* Similar as above, but also has magic btrfs subvolume powers */
+static inline void rm_rf_subvolume_and_free(char *p) {
+ PROTECT_ERRNO;
+ (void) rm_rf(p, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+ free(p);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, rm_rf_subvolume_and_free);
diff --git a/src/basic/selinux-util.c b/src/basic/selinux-util.c
new file mode 100644
index 0000000..4989f4f
--- /dev/null
+++ b/src/basic/selinux-util.c
@@ -0,0 +1,674 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <malloc.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <syslog.h>
+
+#if HAVE_SELINUX
+#include <selinux/avc.h>
+#include <selinux/context.h>
+#include <selinux/label.h>
+#include <selinux/selinux.h>
+#endif
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "path-util.h"
+#include "selinux-util.h"
+#include "stdio-util.h"
+#include "time-util.h"
+
+#if HAVE_SELINUX
+DEFINE_TRIVIAL_CLEANUP_FUNC(context_t, context_free);
+#define _cleanup_context_free_ _cleanup_(context_freep)
+
+static int mac_selinux_reload(int seqno);
+
+static int cached_use = -1;
+static bool initialized = false;
+static int (*enforcing_status_func)(void) = security_getenforce;
+static int last_policyload = 0;
+static struct selabel_handle *label_hnd = NULL;
+
+#define log_enforcing(...) \
+ log_full(mac_selinux_enforcing() ? LOG_ERR : LOG_WARNING, __VA_ARGS__)
+
+#define log_enforcing_errno(error, ...) \
+ ({ \
+ bool _enforcing = mac_selinux_enforcing(); \
+ int _level = _enforcing ? LOG_ERR : LOG_WARNING; \
+ int _e = (error); \
+ \
+ int _r = (log_get_max_level() >= LOG_PRI(_level)) \
+ ? log_internal_realm(_level, _e, PROJECT_FILE, __LINE__, __func__, __VA_ARGS__) \
+ : -ERRNO_VALUE(_e); \
+ _enforcing ? _r : 0; \
+ })
+#endif
+
+bool mac_selinux_use(void) {
+#if HAVE_SELINUX
+ if (_unlikely_(cached_use < 0)) {
+ cached_use = is_selinux_enabled() > 0;
+ log_debug("SELinux enabled state cached to: %s", cached_use ? "enabled" : "disabled");
+ }
+
+ return cached_use;
+#else
+ return false;
+#endif
+}
+
+bool mac_selinux_enforcing(void) {
+#if HAVE_SELINUX
+ return enforcing_status_func() != 0;
+#else
+ return false;
+#endif
+}
+
+void mac_selinux_retest(void) {
+#if HAVE_SELINUX
+ cached_use = -1;
+#endif
+}
+
+#if HAVE_SELINUX
+# if HAVE_MALLINFO
+static struct mallinfo mallinfo_nowarn(void) {
+ /* glibc has deprecated mallinfo(), but the replacement malloc_info() returns an XML blob ;=[ */
+DISABLE_WARNING_DEPRECATED_DECLARATIONS
+ return mallinfo();
+REENABLE_WARNING
+}
+# else
+# warning "mallinfo() is missing, add mallinfo2() supported instead."
+# endif
+
+static int open_label_db(void) {
+ struct selabel_handle *hnd;
+ usec_t before_timestamp, after_timestamp;
+ char timespan[FORMAT_TIMESPAN_MAX];
+
+# if HAVE_MALLINFO
+ struct mallinfo before_mallinfo = mallinfo_nowarn();
+# endif
+ before_timestamp = now(CLOCK_MONOTONIC);
+
+ hnd = selabel_open(SELABEL_CTX_FILE, NULL, 0);
+ if (!hnd)
+ return log_enforcing_errno(errno, "Failed to initialize SELinux labeling handle: %m");
+
+ after_timestamp = now(CLOCK_MONOTONIC);
+# if HAVE_MALLINFO
+ struct mallinfo after_mallinfo = mallinfo_nowarn();
+ int l = after_mallinfo.uordblks > before_mallinfo.uordblks ? after_mallinfo.uordblks - before_mallinfo.uordblks : 0;
+ log_debug("Successfully loaded SELinux database in %s, size on heap is %iK.",
+ format_timespan(timespan, sizeof(timespan), after_timestamp - before_timestamp, 0),
+ DIV_ROUND_UP(l, 1024));
+# else
+ log_debug("Successfully loaded SELinux database in %s.",
+ format_timespan(timespan, sizeof(timespan), after_timestamp - before_timestamp, 0));
+# endif
+
+ /* release memory after measurement */
+ if (label_hnd)
+ selabel_close(label_hnd);
+ label_hnd = TAKE_PTR(hnd);
+
+ return 0;
+}
+#endif
+
+int mac_selinux_init(void) {
+#if HAVE_SELINUX
+ int r;
+ bool have_status_page = false;
+
+ if (initialized)
+ return 0;
+
+ if (!mac_selinux_use())
+ return 0;
+
+ r = selinux_status_open(/* netlink fallback */ 1);
+ if (r < 0) {
+ if (!ERRNO_IS_PRIVILEGE(errno))
+ return log_enforcing_errno(errno, "Failed to open SELinux status page: %m");
+ log_warning_errno(errno, "selinux_status_open() with netlink fallback failed, not checking for policy reloads: %m");
+ } else if (r == 1)
+ log_warning("selinux_status_open() failed to open the status page, using the netlink fallback.");
+ else
+ have_status_page = true;
+
+ r = open_label_db();
+ if (r < 0) {
+ selinux_status_close();
+ return r;
+ }
+
+ /* Save the current policyload sequence number, so mac_selinux_maybe_reload() does not trigger on
+ * first call without any actual change. */
+ last_policyload = selinux_status_policyload();
+
+ if (have_status_page)
+ /* Now that the SELinux status page has been successfully opened, retrieve the enforcing
+ * status over it (to avoid system calls in security_getenforce()). */
+ enforcing_status_func = selinux_status_getenforce;
+
+ initialized = true;
+#endif
+ return 0;
+}
+
+void mac_selinux_maybe_reload(void) {
+#if HAVE_SELINUX
+ int r;
+
+ if (!initialized)
+ return;
+
+ r = selinux_status_updated();
+ if (r < 0)
+ log_debug_errno(errno, "Failed to update SELinux from status page: %m");
+ if (r > 0) {
+ int policyload;
+
+ log_debug("SELinux status page update");
+
+ /* from libselinux > 3.1 callbacks gets automatically called, see
+ https://github.com/SELinuxProject/selinux/commit/05bdc03130d741e53e1fb45a958d0a2c184be503 */
+
+ /* only reload on policy changes, not enforcing status changes */
+ policyload = selinux_status_policyload();
+ if (policyload != last_policyload) {
+ mac_selinux_reload(policyload);
+ last_policyload = policyload;
+ }
+ }
+#endif
+}
+
+void mac_selinux_finish(void) {
+
+#if HAVE_SELINUX
+ if (label_hnd) {
+ selabel_close(label_hnd);
+ label_hnd = NULL;
+ }
+
+ enforcing_status_func = security_getenforce;
+
+ selinux_status_close();
+
+ initialized = false;
+#endif
+}
+
+#if HAVE_SELINUX
+static int mac_selinux_reload(int seqno) {
+ log_debug("SELinux reload %d", seqno);
+
+ (void) open_label_db();
+
+ return 0;
+}
+#endif
+
+int mac_selinux_fix_container(const char *path, const char *inside_path, LabelFixFlags flags) {
+
+ assert(path);
+ assert(inside_path);
+
+#if HAVE_SELINUX
+ _cleanup_close_ int fd = -1;
+
+ /* if mac_selinux_init() wasn't called before we are a NOOP */
+ if (!label_hnd)
+ return 0;
+
+ /* Open the file as O_PATH, to pin it while we determine and adjust the label */
+ fd = open(path, O_NOFOLLOW|O_CLOEXEC|O_PATH);
+ if (fd < 0) {
+ if ((flags & LABEL_IGNORE_ENOENT) && errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+
+ return mac_selinux_fix_container_fd(fd, path, inside_path, flags);
+#endif
+
+ return 0;
+}
+
+int mac_selinux_fix_container_fd(int fd, const char *path, const char *inside_path, LabelFixFlags flags) {
+
+ assert(fd >= 0);
+ assert(inside_path);
+
+#if HAVE_SELINUX
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ _cleanup_freecon_ char* fcon = NULL;
+ struct stat st;
+ int r;
+
+ /* if mac_selinux_init() wasn't called before we are a NOOP */
+ if (!label_hnd)
+ return 0;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /* Check for policy reload so 'label_hnd' is kept up-to-date by callbacks */
+ mac_selinux_maybe_reload();
+
+ if (selabel_lookup_raw(label_hnd, &fcon, inside_path, st.st_mode) < 0) {
+ /* If there's no label to set, then exit without warning */
+ if (errno == ENOENT)
+ return 0;
+
+ r = -errno;
+ goto fail;
+ }
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ if (setfilecon_raw(procfs_path, fcon) < 0) {
+ _cleanup_freecon_ char *oldcon = NULL;
+
+ /* If the FS doesn't support labels, then exit without warning */
+ if (ERRNO_IS_NOT_SUPPORTED(errno))
+ return 0;
+
+ /* It the FS is read-only and we were told to ignore failures caused by that, suppress error */
+ if (errno == EROFS && (flags & LABEL_IGNORE_EROFS))
+ return 0;
+
+ r = -errno;
+
+ /* If the old label is identical to the new one, suppress any kind of error */
+ if (getfilecon_raw(procfs_path, &oldcon) >= 0 && streq(fcon, oldcon))
+ return 0;
+
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ return log_enforcing_errno(r, "Unable to fix SELinux security context of %s (%s): %m", strna(path), strna(inside_path));
+#endif
+
+ return 0;
+}
+
+int mac_selinux_apply(const char *path, const char *label) {
+
+ assert(path);
+
+#if HAVE_SELINUX
+ if (!mac_selinux_use())
+ return 0;
+
+ assert(label);
+
+ if (setfilecon(path, label) < 0)
+ return log_enforcing_errno(errno, "Failed to set SELinux security context %s on path %s: %m", label, path);
+#endif
+ return 0;
+}
+
+int mac_selinux_apply_fd(int fd, const char *path, const char *label) {
+
+ assert(fd >= 0);
+
+#if HAVE_SELINUX
+ if (!mac_selinux_use())
+ return 0;
+
+ assert(label);
+
+ if (fsetfilecon(fd, label) < 0)
+ return log_enforcing_errno(errno, "Failed to set SELinux security context %s on path %s: %m", label, strna(path));
+#endif
+ return 0;
+}
+
+int mac_selinux_get_create_label_from_exe(const char *exe, char **label) {
+#if HAVE_SELINUX
+ _cleanup_freecon_ char *mycon = NULL, *fcon = NULL;
+ security_class_t sclass;
+ int r;
+
+ assert(exe);
+ assert(label);
+
+ if (!mac_selinux_use())
+ return -EOPNOTSUPP;
+
+ r = getcon_raw(&mycon);
+ if (r < 0)
+ return -errno;
+
+ r = getfilecon_raw(exe, &fcon);
+ if (r < 0)
+ return -errno;
+
+ sclass = string_to_security_class("process");
+ if (sclass == 0)
+ return -ENOSYS;
+
+ r = security_compute_create_raw(mycon, fcon, sclass, label);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+int mac_selinux_get_our_label(char **label) {
+#if HAVE_SELINUX
+ int r;
+
+ assert(label);
+
+ if (!mac_selinux_use())
+ return -EOPNOTSUPP;
+
+ r = getcon_raw(label);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+int mac_selinux_get_child_mls_label(int socket_fd, const char *exe, const char *exec_label, char **label) {
+#if HAVE_SELINUX
+ _cleanup_freecon_ char *mycon = NULL, *peercon = NULL, *fcon = NULL;
+ _cleanup_context_free_ context_t pcon = NULL, bcon = NULL;
+ security_class_t sclass;
+ const char *range = NULL;
+ int r;
+
+ assert(socket_fd >= 0);
+ assert(exe);
+ assert(label);
+
+ if (!mac_selinux_use())
+ return -EOPNOTSUPP;
+
+ r = getcon_raw(&mycon);
+ if (r < 0)
+ return -errno;
+
+ r = getpeercon_raw(socket_fd, &peercon);
+ if (r < 0)
+ return -errno;
+
+ if (!exec_label) {
+ /* If there is no context set for next exec let's use context
+ of target executable */
+ r = getfilecon_raw(exe, &fcon);
+ if (r < 0)
+ return -errno;
+ }
+
+ bcon = context_new(mycon);
+ if (!bcon)
+ return -ENOMEM;
+
+ pcon = context_new(peercon);
+ if (!pcon)
+ return -ENOMEM;
+
+ range = context_range_get(pcon);
+ if (!range)
+ return -errno;
+
+ r = context_range_set(bcon, range);
+ if (r)
+ return -errno;
+
+ freecon(mycon);
+ mycon = strdup(context_str(bcon));
+ if (!mycon)
+ return -ENOMEM;
+
+ sclass = string_to_security_class("process");
+ if (sclass == 0)
+ return -ENOSYS;
+
+ r = security_compute_create_raw(mycon, fcon, sclass, label);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+char* mac_selinux_free(char *label) {
+
+#if HAVE_SELINUX
+ freecon(label);
+#else
+ assert(!label);
+#endif
+
+ return NULL;
+}
+
+#if HAVE_SELINUX
+static int selinux_create_file_prepare_abspath(const char *abspath, mode_t mode) {
+ _cleanup_freecon_ char *filecon = NULL;
+ int r;
+
+ assert(abspath);
+ assert(path_is_absolute(abspath));
+
+ /* Check for policy reload so 'label_hnd' is kept up-to-date by callbacks */
+ mac_selinux_maybe_reload();
+
+ r = selabel_lookup_raw(label_hnd, &filecon, abspath, mode);
+ if (r < 0) {
+ /* No context specified by the policy? Proceed without setting it. */
+ if (errno == ENOENT)
+ return 0;
+
+ return log_enforcing_errno(errno, "Failed to determine SELinux security context for %s: %m", abspath);
+ }
+
+ if (setfscreatecon_raw(filecon) < 0)
+ return log_enforcing_errno(errno, "Failed to set SELinux security context %s for %s: %m", filecon, abspath);
+
+ return 0;
+}
+#endif
+
+int mac_selinux_create_file_prepare_at(int dirfd, const char *path, mode_t mode) {
+#if HAVE_SELINUX
+ _cleanup_free_ char *abspath = NULL;
+ int r;
+
+
+ assert(path);
+
+ if (!label_hnd)
+ return 0;
+
+ if (!path_is_absolute(path)) {
+ _cleanup_free_ char *p = NULL;
+
+ if (dirfd == AT_FDCWD)
+ r = safe_getcwd(&p);
+ else
+ r = fd_get_path(dirfd, &p);
+ if (r < 0)
+ return r;
+
+ path = abspath = path_join(p, path);
+ if (!path)
+ return -ENOMEM;
+ }
+
+ return selinux_create_file_prepare_abspath(path, mode);
+#else
+ return 0;
+#endif
+}
+
+int mac_selinux_create_file_prepare(const char *path, mode_t mode) {
+#if HAVE_SELINUX
+ int r;
+
+ _cleanup_free_ char *abspath = NULL;
+
+ assert(path);
+
+ if (!label_hnd)
+ return 0;
+
+ r = path_make_absolute_cwd(path, &abspath);
+ if (r < 0)
+ return r;
+
+ return selinux_create_file_prepare_abspath(abspath, mode);
+#else
+ return 0;
+#endif
+}
+
+void mac_selinux_create_file_clear(void) {
+
+#if HAVE_SELINUX
+ PROTECT_ERRNO;
+
+ if (!mac_selinux_use())
+ return;
+
+ setfscreatecon_raw(NULL);
+#endif
+}
+
+int mac_selinux_create_socket_prepare(const char *label) {
+
+#if HAVE_SELINUX
+ assert(label);
+
+ if (!mac_selinux_use())
+ return 0;
+
+ if (setsockcreatecon(label) < 0)
+ return log_enforcing_errno(errno, "Failed to set SELinux security context %s for sockets: %m", label);
+#endif
+
+ return 0;
+}
+
+void mac_selinux_create_socket_clear(void) {
+
+#if HAVE_SELINUX
+ PROTECT_ERRNO;
+
+ if (!mac_selinux_use())
+ return;
+
+ setsockcreatecon_raw(NULL);
+#endif
+}
+
+int mac_selinux_bind(int fd, const struct sockaddr *addr, socklen_t addrlen) {
+
+ /* Binds a socket and label its file system object according to the SELinux policy */
+
+#if HAVE_SELINUX
+ _cleanup_freecon_ char *fcon = NULL;
+ const struct sockaddr_un *un;
+ bool context_changed = false;
+ char *path;
+ int r;
+
+ assert(fd >= 0);
+ assert(addr);
+ assert(addrlen >= sizeof(sa_family_t));
+
+ if (!label_hnd)
+ goto skipped;
+
+ /* Filter out non-local sockets */
+ if (addr->sa_family != AF_UNIX)
+ goto skipped;
+
+ /* Filter out anonymous sockets */
+ if (addrlen < offsetof(struct sockaddr_un, sun_path) + 1)
+ goto skipped;
+
+ /* Filter out abstract namespace sockets */
+ un = (const struct sockaddr_un*) addr;
+ if (un->sun_path[0] == 0)
+ goto skipped;
+
+ path = strndupa(un->sun_path, addrlen - offsetof(struct sockaddr_un, sun_path));
+
+ /* Check for policy reload so 'label_hnd' is kept up-to-date by callbacks */
+ mac_selinux_maybe_reload();
+
+ if (path_is_absolute(path))
+ r = selabel_lookup_raw(label_hnd, &fcon, path, S_IFSOCK);
+ else {
+ _cleanup_free_ char *newpath = NULL;
+
+ r = path_make_absolute_cwd(path, &newpath);
+ if (r < 0)
+ return r;
+
+ r = selabel_lookup_raw(label_hnd, &fcon, newpath, S_IFSOCK);
+ }
+
+ if (r < 0) {
+ /* No context specified by the policy? Proceed without setting it */
+ if (errno == ENOENT)
+ goto skipped;
+
+ r = log_enforcing_errno(errno, "Failed to determine SELinux security context for %s: %m", path);
+ if (r < 0)
+ return r;
+ } else {
+ if (setfscreatecon_raw(fcon) < 0) {
+ r = log_enforcing_errno(errno, "Failed to set SELinux security context %s for %s: %m", fcon, path);
+ if (r < 0)
+ return r;
+ } else
+ context_changed = true;
+ }
+
+ r = bind(fd, addr, addrlen) < 0 ? -errno : 0;
+
+ if (context_changed)
+ (void) setfscreatecon_raw(NULL);
+
+ return r;
+
+skipped:
+#endif
+ if (bind(fd, addr, addrlen) < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/basic/selinux-util.h b/src/basic/selinux-util.h
new file mode 100644
index 0000000..1236d6e
--- /dev/null
+++ b/src/basic/selinux-util.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "label.h"
+
+#if HAVE_SELINUX
+#include <selinux/selinux.h>
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, freecon);
+#define _cleanup_freecon_ _cleanup_(freeconp)
+#endif
+
+bool mac_selinux_use(void);
+void mac_selinux_retest(void);
+bool mac_selinux_enforcing(void);
+
+int mac_selinux_init(void);
+void mac_selinux_maybe_reload(void);
+void mac_selinux_finish(void);
+
+int mac_selinux_fix_container(const char *path, const char *inside_path, LabelFixFlags flags);
+static inline int mac_selinux_fix(const char *path, LabelFixFlags flags) {
+ return mac_selinux_fix_container(path, path, flags);
+}
+
+int mac_selinux_fix_container_fd(int fd, const char *path, const char *inside_path, LabelFixFlags flags);
+static inline int mac_selinux_fix_fd(int fd, const char *path, LabelFixFlags flags) {
+ return mac_selinux_fix_container_fd(fd, path, path, flags);
+}
+
+int mac_selinux_apply(const char *path, const char *label);
+int mac_selinux_apply_fd(int fd, const char *path, const char *label);
+
+int mac_selinux_get_create_label_from_exe(const char *exe, char **label);
+int mac_selinux_get_our_label(char **label);
+int mac_selinux_get_child_mls_label(int socket_fd, const char *exe, const char *exec_label, char **label);
+char* mac_selinux_free(char *label);
+
+int mac_selinux_create_file_prepare(const char *path, mode_t mode);
+int mac_selinux_create_file_prepare_at(int dirfd, const char *path, mode_t mode);
+void mac_selinux_create_file_clear(void);
+
+int mac_selinux_create_socket_prepare(const char *label);
+void mac_selinux_create_socket_clear(void);
+
+int mac_selinux_bind(int fd, const struct sockaddr *addr, socklen_t addrlen);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, mac_selinux_free);
diff --git a/src/basic/set.h b/src/basic/set.h
new file mode 100644
index 0000000..57ff713
--- /dev/null
+++ b/src/basic/set.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "extract-word.h"
+#include "hashmap.h"
+#include "macro.h"
+
+#define set_free_and_replace(a, b) \
+ ({ \
+ set_free(a); \
+ (a) = (b); \
+ (b) = NULL; \
+ 0; \
+ })
+
+Set* _set_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS);
+#define set_new(ops) _set_new(ops HASHMAP_DEBUG_SRC_ARGS)
+
+static inline Set* set_free(Set *s) {
+ return (Set*) _hashmap_free(HASHMAP_BASE(s), NULL, NULL);
+}
+
+static inline Set* set_free_free(Set *s) {
+ return (Set*) _hashmap_free(HASHMAP_BASE(s), free, NULL);
+}
+
+/* no set_free_free_free */
+
+#define set_copy(s) ((Set*) _hashmap_copy(HASHMAP_BASE(h) HASHMAP_DEBUG_SRC_ARGS))
+
+int _set_ensure_allocated(Set **s, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS);
+#define set_ensure_allocated(h, ops) _set_ensure_allocated(h, ops HASHMAP_DEBUG_SRC_ARGS)
+
+int set_put(Set *s, const void *key);
+/* no set_update */
+/* no set_replace */
+static inline void *set_get(const Set *s, const void *key) {
+ return _hashmap_get(HASHMAP_BASE((Set *) s), key);
+}
+/* no set_get2 */
+
+static inline bool set_contains(const Set *s, const void *key) {
+ return _hashmap_contains(HASHMAP_BASE((Set *) s), key);
+}
+
+static inline void *set_remove(Set *s, const void *key) {
+ return _hashmap_remove(HASHMAP_BASE(s), key);
+}
+
+/* no set_remove2 */
+/* no set_remove_value */
+int set_remove_and_put(Set *s, const void *old_key, const void *new_key);
+/* no set_remove_and_replace */
+int set_merge(Set *s, Set *other);
+
+static inline int set_reserve(Set *h, unsigned entries_add) {
+ return _hashmap_reserve(HASHMAP_BASE(h), entries_add);
+}
+
+static inline int set_move(Set *s, Set *other) {
+ return _hashmap_move(HASHMAP_BASE(s), HASHMAP_BASE(other));
+}
+
+static inline int set_move_one(Set *s, Set *other, const void *key) {
+ return _hashmap_move_one(HASHMAP_BASE(s), HASHMAP_BASE(other), key);
+}
+
+static inline unsigned set_size(const Set *s) {
+ return _hashmap_size(HASHMAP_BASE((Set *) s));
+}
+
+static inline bool set_isempty(const Set *s) {
+ return set_size(s) == 0;
+}
+
+static inline unsigned set_buckets(const Set *s) {
+ return _hashmap_buckets(HASHMAP_BASE((Set *) s));
+}
+
+static inline bool set_iterate(const Set *s, Iterator *i, void **value) {
+ return _hashmap_iterate(HASHMAP_BASE((Set*) s), i, value, NULL);
+}
+
+static inline void set_clear(Set *s) {
+ _hashmap_clear(HASHMAP_BASE(s), NULL, NULL);
+}
+
+static inline void set_clear_free(Set *s) {
+ _hashmap_clear(HASHMAP_BASE(s), free, NULL);
+}
+
+/* no set_clear_free_free */
+
+static inline void *set_steal_first(Set *s) {
+ return _hashmap_first_key_and_value(HASHMAP_BASE(s), true, NULL);
+}
+
+#define set_clear_with_destructor(_s, _f) \
+ ({ \
+ void *_item; \
+ while ((_item = set_steal_first(_s))) \
+ _f(_item); \
+ })
+#define set_free_with_destructor(_s, _f) \
+ ({ \
+ set_clear_with_destructor(_s, _f); \
+ set_free(_s); \
+ })
+
+/* no set_steal_first_key */
+/* no set_first_key */
+
+static inline void *set_first(const Set *s) {
+ return _hashmap_first_key_and_value(HASHMAP_BASE((Set *) s), false, NULL);
+}
+
+/* no set_next */
+
+static inline char **set_get_strv(Set *s) {
+ return _hashmap_get_strv(HASHMAP_BASE(s));
+}
+
+int _set_ensure_put(Set **s, const struct hash_ops *hash_ops, const void *key HASHMAP_DEBUG_PARAMS);
+#define set_ensure_put(s, hash_ops, key) _set_ensure_put(s, hash_ops, key HASHMAP_DEBUG_SRC_ARGS)
+
+int _set_ensure_consume(Set **s, const struct hash_ops *hash_ops, void *key HASHMAP_DEBUG_PARAMS);
+#define set_ensure_consume(s, hash_ops, key) _set_ensure_consume(s, hash_ops, key HASHMAP_DEBUG_SRC_ARGS)
+
+int set_consume(Set *s, void *value);
+
+int _set_put_strdup_full(Set **s, const struct hash_ops *hash_ops, const char *p HASHMAP_DEBUG_PARAMS);
+#define set_put_strdup_full(s, hash_ops, p) _set_put_strdup_full(s, hash_ops, p HASHMAP_DEBUG_SRC_ARGS)
+#define set_put_strdup(s, p) set_put_strdup_full(s, &string_hash_ops_free, p)
+int _set_put_strdupv_full(Set **s, const struct hash_ops *hash_ops, char **l HASHMAP_DEBUG_PARAMS);
+#define set_put_strdupv_full(s, hash_ops, l) _set_put_strdupv_full(s, hash_ops, l HASHMAP_DEBUG_SRC_ARGS)
+#define set_put_strdupv(s, l) set_put_strdupv_full(s, &string_hash_ops_free, l)
+
+int set_put_strsplit(Set *s, const char *v, const char *separators, ExtractFlags flags);
+
+#define _SET_FOREACH(e, s, i) \
+ for (Iterator i = ITERATOR_FIRST; set_iterate((s), &i, (void**)&(e)); )
+#define SET_FOREACH(e, s) \
+ _SET_FOREACH(e, s, UNIQ_T(i, UNIQ))
+
+#define SET_FOREACH_MOVE(e, d, s) \
+ for (; ({ e = set_first(s); assert_se(!e || set_move_one(d, s, e) >= 0); e; }); )
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Set*, set_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Set*, set_free_free);
+
+#define _cleanup_set_free_ _cleanup_(set_freep)
+#define _cleanup_set_free_free_ _cleanup_(set_free_freep)
+
+int set_strjoin(Set *s, const char *separator, bool wrap_with_separator, char **ret);
diff --git a/src/basic/sigbus.c b/src/basic/sigbus.c
new file mode 100644
index 0000000..4c2e9ec
--- /dev/null
+++ b/src/basic/sigbus.c
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <signal.h>
+#include <stddef.h>
+#include <sys/mman.h>
+
+#include "macro.h"
+#include "memory-util.h"
+#include "sigbus.h"
+
+#define SIGBUS_QUEUE_MAX 64
+
+static struct sigaction old_sigaction;
+static unsigned n_installed = 0;
+
+/* We maintain a fixed size list of page addresses that triggered a
+ SIGBUS. We access with list with atomic operations, so that we
+ don't have to deal with locks between signal handler and main
+ programs in possibly multiple threads. */
+
+static void* volatile sigbus_queue[SIGBUS_QUEUE_MAX];
+static volatile sig_atomic_t n_sigbus_queue = 0;
+
+static void sigbus_push(void *addr) {
+ unsigned u;
+
+ assert(addr);
+
+ /* Find a free place, increase the number of entries and leave, if we can */
+ for (u = 0; u < SIGBUS_QUEUE_MAX; u++)
+ if (__sync_bool_compare_and_swap(&sigbus_queue[u], NULL, addr)) {
+ __sync_fetch_and_add(&n_sigbus_queue, 1);
+ return;
+ }
+
+ /* If we can't, make sure the queue size is out of bounds, to
+ * mark it as overflow */
+ for (;;) {
+ unsigned c;
+
+ __sync_synchronize();
+ c = n_sigbus_queue;
+
+ if (c > SIGBUS_QUEUE_MAX) /* already overflow */
+ return;
+
+ if (__sync_bool_compare_and_swap(&n_sigbus_queue, c, c + SIGBUS_QUEUE_MAX))
+ return;
+ }
+}
+
+int sigbus_pop(void **ret) {
+ assert(ret);
+
+ for (;;) {
+ unsigned u, c;
+
+ __sync_synchronize();
+ c = n_sigbus_queue;
+
+ if (_likely_(c == 0))
+ return 0;
+
+ if (_unlikely_(c >= SIGBUS_QUEUE_MAX))
+ return -EOVERFLOW;
+
+ for (u = 0; u < SIGBUS_QUEUE_MAX; u++) {
+ void *addr;
+
+ addr = sigbus_queue[u];
+ if (!addr)
+ continue;
+
+ if (__sync_bool_compare_and_swap(&sigbus_queue[u], addr, NULL)) {
+ __sync_fetch_and_sub(&n_sigbus_queue, 1);
+ *ret = addr;
+ return 1;
+ }
+ }
+ }
+}
+
+static void sigbus_handler(int sn, siginfo_t *si, void *data) {
+ unsigned long ul;
+ void *aligned;
+
+ assert(sn == SIGBUS);
+ assert(si);
+
+ if (si->si_code != BUS_ADRERR || !si->si_addr) {
+ assert_se(sigaction(SIGBUS, &old_sigaction, NULL) == 0);
+ raise(SIGBUS);
+ return;
+ }
+
+ ul = (unsigned long) si->si_addr;
+ ul = ul / page_size();
+ ul = ul * page_size();
+ aligned = (void*) ul;
+
+ /* Let's remember which address failed */
+ sigbus_push(aligned);
+
+ /* Replace mapping with an anonymous page, so that the
+ * execution can continue, however with a zeroed out page */
+ assert_se(mmap(aligned, page_size(), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == aligned);
+}
+
+void sigbus_install(void) {
+ struct sigaction sa = {
+ .sa_sigaction = sigbus_handler,
+ .sa_flags = SA_SIGINFO,
+ };
+
+ /* make sure that sysconf() is not called from a signal handler because
+ * it is not guaranteed to be async-signal-safe since POSIX.1-2008 */
+ (void) page_size();
+
+ n_installed++;
+
+ if (n_installed == 1)
+ assert_se(sigaction(SIGBUS, &sa, &old_sigaction) == 0);
+
+ return;
+}
+
+void sigbus_reset(void) {
+
+ if (n_installed <= 0)
+ return;
+
+ n_installed--;
+
+ if (n_installed == 0)
+ assert_se(sigaction(SIGBUS, &old_sigaction, NULL) == 0);
+
+ return;
+}
diff --git a/src/basic/sigbus.h b/src/basic/sigbus.h
new file mode 100644
index 0000000..a40b1a8
--- /dev/null
+++ b/src/basic/sigbus.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+void sigbus_install(void);
+void sigbus_reset(void);
+
+int sigbus_pop(void **ret);
diff --git a/src/basic/signal-util.c b/src/basic/signal-util.c
new file mode 100644
index 0000000..63b833b
--- /dev/null
+++ b/src/basic/signal-util.c
@@ -0,0 +1,295 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdarg.h>
+
+#include "macro.h"
+#include "parse-util.h"
+#include "signal-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+
+int reset_all_signal_handlers(void) {
+ static const struct sigaction sa = {
+ .sa_handler = SIG_DFL,
+ .sa_flags = SA_RESTART,
+ };
+ int sig, r = 0;
+
+ for (sig = 1; sig < _NSIG; sig++) {
+
+ /* These two cannot be caught... */
+ if (IN_SET(sig, SIGKILL, SIGSTOP))
+ continue;
+
+ /* On Linux the first two RT signals are reserved by
+ * glibc, and sigaction() will return EINVAL for them. */
+ if (sigaction(sig, &sa, NULL) < 0)
+ if (errno != EINVAL && r >= 0)
+ r = -errno;
+ }
+
+ return r;
+}
+
+int reset_signal_mask(void) {
+ sigset_t ss;
+
+ if (sigemptyset(&ss) < 0)
+ return -errno;
+
+ if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int sigaction_many_ap(const struct sigaction *sa, int sig, va_list ap) {
+ int r = 0;
+
+ /* negative signal ends the list. 0 signal is skipped. */
+ for (; sig >= 0; sig = va_arg(ap, int)) {
+
+ if (sig == 0)
+ continue;
+
+ if (sigaction(sig, sa, NULL) < 0) {
+ if (r >= 0)
+ r = -errno;
+ }
+ }
+
+ return r;
+}
+
+int sigaction_many(const struct sigaction *sa, ...) {
+ va_list ap;
+ int r;
+
+ va_start(ap, sa);
+ r = sigaction_many_ap(sa, 0, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int ignore_signals(int sig, ...) {
+
+ static const struct sigaction sa = {
+ .sa_handler = SIG_IGN,
+ .sa_flags = SA_RESTART,
+ };
+
+ va_list ap;
+ int r;
+
+ va_start(ap, sig);
+ r = sigaction_many_ap(&sa, sig, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int default_signals(int sig, ...) {
+
+ static const struct sigaction sa = {
+ .sa_handler = SIG_DFL,
+ .sa_flags = SA_RESTART,
+ };
+
+ va_list ap;
+ int r;
+
+ va_start(ap, sig);
+ r = sigaction_many_ap(&sa, sig, ap);
+ va_end(ap);
+
+ return r;
+}
+
+static int sigset_add_many_ap(sigset_t *ss, va_list ap) {
+ int sig, r = 0;
+
+ assert(ss);
+
+ while ((sig = va_arg(ap, int)) >= 0) {
+
+ if (sig == 0)
+ continue;
+
+ if (sigaddset(ss, sig) < 0) {
+ if (r >= 0)
+ r = -errno;
+ }
+ }
+
+ return r;
+}
+
+int sigset_add_many(sigset_t *ss, ...) {
+ va_list ap;
+ int r;
+
+ va_start(ap, ss);
+ r = sigset_add_many_ap(ss, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int sigprocmask_many(int how, sigset_t *old, ...) {
+ va_list ap;
+ sigset_t ss;
+ int r;
+
+ if (sigemptyset(&ss) < 0)
+ return -errno;
+
+ va_start(ap, old);
+ r = sigset_add_many_ap(&ss, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ if (sigprocmask(how, &ss, old) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static const char *const __signal_table[] = {
+ [SIGHUP] = "HUP",
+ [SIGINT] = "INT",
+ [SIGQUIT] = "QUIT",
+ [SIGILL] = "ILL",
+ [SIGTRAP] = "TRAP",
+ [SIGABRT] = "ABRT",
+ [SIGBUS] = "BUS",
+ [SIGFPE] = "FPE",
+ [SIGKILL] = "KILL",
+ [SIGUSR1] = "USR1",
+ [SIGSEGV] = "SEGV",
+ [SIGUSR2] = "USR2",
+ [SIGPIPE] = "PIPE",
+ [SIGALRM] = "ALRM",
+ [SIGTERM] = "TERM",
+#ifdef SIGSTKFLT
+ [SIGSTKFLT] = "STKFLT", /* Linux on SPARC doesn't know SIGSTKFLT */
+#endif
+ [SIGCHLD] = "CHLD",
+ [SIGCONT] = "CONT",
+ [SIGSTOP] = "STOP",
+ [SIGTSTP] = "TSTP",
+ [SIGTTIN] = "TTIN",
+ [SIGTTOU] = "TTOU",
+ [SIGURG] = "URG",
+ [SIGXCPU] = "XCPU",
+ [SIGXFSZ] = "XFSZ",
+ [SIGVTALRM] = "VTALRM",
+ [SIGPROF] = "PROF",
+ [SIGWINCH] = "WINCH",
+ [SIGIO] = "IO",
+ [SIGPWR] = "PWR",
+ [SIGSYS] = "SYS"
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(__signal, int);
+
+const char *signal_to_string(int signo) {
+ static thread_local char buf[STRLEN("RTMIN+") + DECIMAL_STR_MAX(int) + 1];
+ const char *name;
+
+ name = __signal_to_string(signo);
+ if (name)
+ return name;
+
+ if (signo >= SIGRTMIN && signo <= SIGRTMAX)
+ xsprintf(buf, "RTMIN+%d", signo - SIGRTMIN);
+ else
+ xsprintf(buf, "%d", signo);
+
+ return buf;
+}
+
+int signal_from_string(const char *s) {
+ const char *p;
+ int signo, r;
+
+ /* Check that the input is a signal number. */
+ if (safe_atoi(s, &signo) >= 0) {
+ if (SIGNAL_VALID(signo))
+ return signo;
+ else
+ return -ERANGE;
+ }
+
+ /* Drop "SIG" prefix. */
+ if (startswith(s, "SIG"))
+ s += 3;
+
+ /* Check that the input is a signal name. */
+ signo = __signal_from_string(s);
+ if (signo > 0)
+ return signo;
+
+ /* Check that the input is RTMIN or
+ * RTMIN+n (0 <= n <= SIGRTMAX-SIGRTMIN). */
+ p = startswith(s, "RTMIN");
+ if (p) {
+ if (*p == '\0')
+ return SIGRTMIN;
+ if (*p != '+')
+ return -EINVAL;
+
+ r = safe_atoi(p, &signo);
+ if (r < 0)
+ return r;
+
+ if (signo < 0 || signo > SIGRTMAX - SIGRTMIN)
+ return -ERANGE;
+
+ return signo + SIGRTMIN;
+ }
+
+ /* Check that the input is RTMAX or
+ * RTMAX-n (0 <= n <= SIGRTMAX-SIGRTMIN). */
+ p = startswith(s, "RTMAX");
+ if (p) {
+ if (*p == '\0')
+ return SIGRTMAX;
+ if (*p != '-')
+ return -EINVAL;
+
+ r = safe_atoi(p, &signo);
+ if (r < 0)
+ return r;
+
+ if (signo > 0 || signo < SIGRTMIN - SIGRTMAX)
+ return -ERANGE;
+
+ return signo + SIGRTMAX;
+ }
+
+ return -EINVAL;
+}
+
+void nop_signal_handler(int sig) {
+ /* nothing here */
+}
+
+int signal_is_blocked(int sig) {
+ sigset_t ss;
+ int r;
+
+ r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
+ if (r != 0)
+ return -r;
+
+ r = sigismember(&ss, sig);
+ if (r < 0)
+ return -errno;
+
+ return r;
+}
diff --git a/src/basic/signal-util.h b/src/basic/signal-util.h
new file mode 100644
index 0000000..bdd39d4
--- /dev/null
+++ b/src/basic/signal-util.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <signal.h>
+
+#include "macro.h"
+
+int reset_all_signal_handlers(void);
+int reset_signal_mask(void);
+
+int ignore_signals(int sig, ...);
+int default_signals(int sig, ...);
+int sigaction_many(const struct sigaction *sa, ...);
+
+int sigset_add_many(sigset_t *ss, ...);
+int sigprocmask_many(int how, sigset_t *old, ...);
+
+const char *signal_to_string(int i) _const_;
+int signal_from_string(const char *s) _pure_;
+
+void nop_signal_handler(int sig);
+
+static inline void block_signals_reset(sigset_t *ss) {
+ assert_se(sigprocmask(SIG_SETMASK, ss, NULL) >= 0);
+}
+
+#define BLOCK_SIGNALS(...) \
+ _cleanup_(block_signals_reset) _unused_ sigset_t _saved_sigset = ({ \
+ sigset_t _t; \
+ assert_se(sigprocmask_many(SIG_BLOCK, &_t, __VA_ARGS__, -1) >= 0); \
+ _t; \
+ })
+
+static inline bool SIGNAL_VALID(int signo) {
+ return signo > 0 && signo < _NSIG;
+}
+
+static inline const char* signal_to_string_with_check(int n) {
+ if (!SIGNAL_VALID(n))
+ return NULL;
+
+ return signal_to_string(n);
+}
+
+int signal_is_blocked(int sig);
diff --git a/src/basic/siphash24.c b/src/basic/siphash24.c
new file mode 100644
index 0000000..7c61eb1
--- /dev/null
+++ b/src/basic/siphash24.c
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: CC0-1.0 */
+
+/*
+ SipHash reference C implementation
+
+ Written in 2012 by
+ Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
+ Daniel J. Bernstein <djb@cr.yp.to>
+
+ To the extent possible under law, the author(s) have dedicated all copyright
+ and related and neighboring rights to this software to the public domain
+ worldwide. This software is distributed without any warranty.
+
+ You should have received a copy of the CC0 Public Domain Dedication along with
+ this software. If not, see <https://creativecommons.org/publicdomain/zero/1.0/>.
+
+ (Minimal changes made by Lennart Poettering, to make clean for inclusion in systemd)
+ (Refactored by Tom Gundersen to split up in several functions and follow systemd
+ coding style)
+*/
+
+#include <stdio.h>
+
+#include "macro.h"
+#include "siphash24.h"
+#include "unaligned.h"
+
+static uint64_t rotate_left(uint64_t x, uint8_t b) {
+ assert(b < 64);
+
+ return (x << b) | (x >> (64 - b));
+}
+
+static void sipround(struct siphash *state) {
+ assert(state);
+
+ state->v0 += state->v1;
+ state->v1 = rotate_left(state->v1, 13);
+ state->v1 ^= state->v0;
+ state->v0 = rotate_left(state->v0, 32);
+ state->v2 += state->v3;
+ state->v3 = rotate_left(state->v3, 16);
+ state->v3 ^= state->v2;
+ state->v0 += state->v3;
+ state->v3 = rotate_left(state->v3, 21);
+ state->v3 ^= state->v0;
+ state->v2 += state->v1;
+ state->v1 = rotate_left(state->v1, 17);
+ state->v1 ^= state->v2;
+ state->v2 = rotate_left(state->v2, 32);
+}
+
+void siphash24_init(struct siphash *state, const uint8_t k[static 16]) {
+ uint64_t k0, k1;
+
+ assert(state);
+ assert(k);
+
+ k0 = unaligned_read_le64(k);
+ k1 = unaligned_read_le64(k + 8);
+
+ *state = (struct siphash) {
+ /* "somepseudorandomlygeneratedbytes" */
+ .v0 = 0x736f6d6570736575ULL ^ k0,
+ .v1 = 0x646f72616e646f6dULL ^ k1,
+ .v2 = 0x6c7967656e657261ULL ^ k0,
+ .v3 = 0x7465646279746573ULL ^ k1,
+ .padding = 0,
+ .inlen = 0,
+ };
+}
+
+void siphash24_compress(const void *_in, size_t inlen, struct siphash *state) {
+
+ const uint8_t *in = _in;
+ const uint8_t *end = in + inlen;
+ size_t left = state->inlen & 7;
+ uint64_t m;
+
+ assert(in);
+ assert(state);
+
+ /* Update total length */
+ state->inlen += inlen;
+
+ /* If padding exists, fill it out */
+ if (left > 0) {
+ for ( ; in < end && left < 8; in ++, left ++)
+ state->padding |= ((uint64_t) *in) << (left * 8);
+
+ if (in == end && left < 8)
+ /* We did not have enough input to fill out the padding completely */
+ return;
+
+#if ENABLE_DEBUG_SIPHASH
+ printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0);
+ printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1);
+ printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2);
+ printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3);
+ printf("(%3zu) compress padding %08x %08x\n", state->inlen, (uint32_t) (state->padding >> 32), (uint32_t)state->padding);
+#endif
+
+ state->v3 ^= state->padding;
+ sipround(state);
+ sipround(state);
+ state->v0 ^= state->padding;
+
+ state->padding = 0;
+ }
+
+ end -= (state->inlen % sizeof(uint64_t));
+
+ for ( ; in < end; in += 8) {
+ m = unaligned_read_le64(in);
+#if ENABLE_DEBUG_SIPHASH
+ printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0);
+ printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1);
+ printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2);
+ printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3);
+ printf("(%3zu) compress %08x %08x\n", state->inlen, (uint32_t) (m >> 32), (uint32_t) m);
+#endif
+ state->v3 ^= m;
+ sipround(state);
+ sipround(state);
+ state->v0 ^= m;
+ }
+
+ left = state->inlen & 7;
+ switch (left) {
+ case 7:
+ state->padding |= ((uint64_t) in[6]) << 48;
+ _fallthrough_;
+ case 6:
+ state->padding |= ((uint64_t) in[5]) << 40;
+ _fallthrough_;
+ case 5:
+ state->padding |= ((uint64_t) in[4]) << 32;
+ _fallthrough_;
+ case 4:
+ state->padding |= ((uint64_t) in[3]) << 24;
+ _fallthrough_;
+ case 3:
+ state->padding |= ((uint64_t) in[2]) << 16;
+ _fallthrough_;
+ case 2:
+ state->padding |= ((uint64_t) in[1]) << 8;
+ _fallthrough_;
+ case 1:
+ state->padding |= ((uint64_t) in[0]);
+ _fallthrough_;
+ case 0:
+ break;
+ }
+}
+
+uint64_t siphash24_finalize(struct siphash *state) {
+ uint64_t b;
+
+ assert(state);
+
+ b = state->padding | (((uint64_t) state->inlen) << 56);
+
+#if ENABLE_DEBUG_SIPHASH
+ printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0);
+ printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1);
+ printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2);
+ printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3);
+ printf("(%3zu) padding %08x %08x\n", state->inlen, (uint32_t) (state->padding >> 32), (uint32_t) state->padding);
+#endif
+
+ state->v3 ^= b;
+ sipround(state);
+ sipround(state);
+ state->v0 ^= b;
+
+#if ENABLE_DEBUG_SIPHASH
+ printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0);
+ printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1);
+ printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2);
+ printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3);
+#endif
+ state->v2 ^= 0xff;
+
+ sipround(state);
+ sipround(state);
+ sipround(state);
+ sipround(state);
+
+ return state->v0 ^ state->v1 ^ state->v2 ^ state->v3;
+}
+
+uint64_t siphash24(const void *in, size_t inlen, const uint8_t k[static 16]) {
+ struct siphash state;
+
+ assert(in);
+ assert(k);
+
+ siphash24_init(&state, k);
+ siphash24_compress(in, inlen, &state);
+
+ return siphash24_finalize(&state);
+}
diff --git a/src/basic/siphash24.h b/src/basic/siphash24.h
new file mode 100644
index 0000000..0b3e845
--- /dev/null
+++ b/src/basic/siphash24.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: CC0-1.0 */
+
+#pragma once
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "string-util.h"
+#include "time-util.h"
+
+struct siphash {
+ uint64_t v0;
+ uint64_t v1;
+ uint64_t v2;
+ uint64_t v3;
+ uint64_t padding;
+ size_t inlen;
+};
+
+void siphash24_init(struct siphash *state, const uint8_t k[static 16]);
+void siphash24_compress(const void *in, size_t inlen, struct siphash *state);
+#define siphash24_compress_byte(byte, state) siphash24_compress((const uint8_t[]) { (byte) }, 1, (state))
+
+static inline void siphash24_compress_boolean(bool in, struct siphash *state) {
+ uint8_t i = in;
+
+ siphash24_compress(&i, sizeof i, state);
+}
+
+static inline void siphash24_compress_usec_t(usec_t in, struct siphash *state) {
+ siphash24_compress(&in, sizeof in, state);
+}
+
+static inline void siphash24_compress_safe(const void *in, size_t inlen, struct siphash *state) {
+ if (inlen == 0)
+ return;
+
+ siphash24_compress(in, inlen, state);
+}
+
+static inline void siphash24_compress_string(const char *in, struct siphash *state) {
+ siphash24_compress_safe(in, strlen_ptr(in), state);
+}
+
+uint64_t siphash24_finalize(struct siphash *state);
+
+uint64_t siphash24(const void *in, size_t inlen, const uint8_t k[static 16]);
+
+static inline uint64_t siphash24_string(const char *s, const uint8_t k[static 16]) {
+ return siphash24(s, strlen(s) + 1, k);
+}
diff --git a/src/basic/smack-util.c b/src/basic/smack-util.c
new file mode 100644
index 0000000..3362ee3
--- /dev/null
+++ b/src/basic/smack-util.c
@@ -0,0 +1,288 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2013 Intel Corporation
+
+ Author: Auke Kok <auke-jan.h.kok@intel.com>
+***/
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "smack-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "xattr-util.h"
+
+#if ENABLE_SMACK
+bool mac_smack_use(void) {
+ static int cached_use = -1;
+
+ if (cached_use < 0)
+ cached_use = access("/sys/fs/smackfs/", F_OK) >= 0;
+
+ return cached_use;
+}
+
+static const char* const smack_attr_table[_SMACK_ATTR_MAX] = {
+ [SMACK_ATTR_ACCESS] = "security.SMACK64",
+ [SMACK_ATTR_EXEC] = "security.SMACK64EXEC",
+ [SMACK_ATTR_MMAP] = "security.SMACK64MMAP",
+ [SMACK_ATTR_TRANSMUTE] = "security.SMACK64TRANSMUTE",
+ [SMACK_ATTR_IPIN] = "security.SMACK64IPIN",
+ [SMACK_ATTR_IPOUT] = "security.SMACK64IPOUT",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(smack_attr, SmackAttr);
+
+int mac_smack_read(const char *path, SmackAttr attr, char **label) {
+ assert(path);
+ assert(attr >= 0 && attr < _SMACK_ATTR_MAX);
+ assert(label);
+
+ if (!mac_smack_use())
+ return 0;
+
+ return getxattr_malloc(path, smack_attr_to_string(attr), label, true);
+}
+
+int mac_smack_read_fd(int fd, SmackAttr attr, char **label) {
+ assert(fd >= 0);
+ assert(attr >= 0 && attr < _SMACK_ATTR_MAX);
+ assert(label);
+
+ if (!mac_smack_use())
+ return 0;
+
+ return fgetxattr_malloc(fd, smack_attr_to_string(attr), label);
+}
+
+int mac_smack_apply(const char *path, SmackAttr attr, const char *label) {
+ int r;
+
+ assert(path);
+ assert(attr >= 0 && attr < _SMACK_ATTR_MAX);
+
+ if (!mac_smack_use())
+ return 0;
+
+ if (label)
+ r = lsetxattr(path, smack_attr_to_string(attr), label, strlen(label), 0);
+ else
+ r = lremovexattr(path, smack_attr_to_string(attr));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int mac_smack_apply_fd(int fd, SmackAttr attr, const char *label) {
+ int r;
+
+ assert(fd >= 0);
+ assert(attr >= 0 && attr < _SMACK_ATTR_MAX);
+
+ if (!mac_smack_use())
+ return 0;
+
+ if (label)
+ r = fsetxattr(fd, smack_attr_to_string(attr), label, strlen(label), 0);
+ else
+ r = fremovexattr(fd, smack_attr_to_string(attr));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int mac_smack_apply_pid(pid_t pid, const char *label) {
+ const char *p;
+ int r;
+
+ assert(label);
+
+ if (!mac_smack_use())
+ return 0;
+
+ p = procfs_file_alloca(pid, "attr/current");
+ r = write_string_file(p, label, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+
+ return r;
+}
+
+static int smack_fix_fd(int fd , const char *abspath, LabelFixFlags flags) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ const char *label;
+ struct stat st;
+ int r;
+
+ /* The caller should have done the sanity checks. */
+ assert(abspath);
+ assert(path_is_absolute(abspath));
+
+ /* Path must be in /dev. */
+ if (!path_startswith(abspath, "/dev"))
+ return 0;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /*
+ * Label directories and character devices "*".
+ * Label symlinks "_".
+ * Don't change anything else.
+ */
+
+ if (S_ISDIR(st.st_mode))
+ label = SMACK_STAR_LABEL;
+ else if (S_ISLNK(st.st_mode))
+ label = SMACK_FLOOR_LABEL;
+ else if (S_ISCHR(st.st_mode))
+ label = SMACK_STAR_LABEL;
+ else
+ return 0;
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ if (setxattr(procfs_path, "security.SMACK64", label, strlen(label), 0) < 0) {
+ _cleanup_free_ char *old_label = NULL;
+
+ r = -errno;
+
+ /* If the FS doesn't support labels, then exit without warning */
+ if (r == -EOPNOTSUPP)
+ return 0;
+
+ /* It the FS is read-only and we were told to ignore failures caused by that, suppress error */
+ if (r == -EROFS && (flags & LABEL_IGNORE_EROFS))
+ return 0;
+
+ /* If the old label is identical to the new one, suppress any kind of error */
+ if (getxattr_malloc(procfs_path, "security.SMACK64", &old_label, false) >= 0 &&
+ streq(old_label, label))
+ return 0;
+
+ return log_debug_errno(r, "Unable to fix SMACK label of %s: %m", abspath);
+ }
+
+ return 0;
+}
+
+int mac_smack_fix_at(int dirfd, const char *path, LabelFixFlags flags) {
+ _cleanup_free_ char *p = NULL;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(path);
+
+ if (!mac_smack_use())
+ return 0;
+
+ fd = openat(dirfd, path, O_NOFOLLOW|O_CLOEXEC|O_PATH);
+ if (fd < 0) {
+ if ((flags & LABEL_IGNORE_ENOENT) && errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+
+ if (!path_is_absolute(path)) {
+ r = fd_get_path(fd, &p);
+ if (r < 0)
+ return r;
+ path = p;
+ }
+
+ return smack_fix_fd(fd, path, flags);
+}
+
+int mac_smack_fix_container(const char *path, const char *inside_path, LabelFixFlags flags) {
+ _cleanup_free_ char *abspath = NULL;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(path);
+
+ if (!mac_smack_use())
+ return 0;
+
+ r = path_make_absolute_cwd(path, &abspath);
+ if (r < 0)
+ return r;
+
+ fd = open(abspath, O_NOFOLLOW|O_CLOEXEC|O_PATH);
+ if (fd < 0) {
+ if ((flags & LABEL_IGNORE_ENOENT) && errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+
+ return smack_fix_fd(fd, inside_path, flags);
+}
+
+int mac_smack_copy(const char *dest, const char *src) {
+ int r;
+ _cleanup_free_ char *label = NULL;
+
+ assert(dest);
+ assert(src);
+
+ r = mac_smack_read(src, SMACK_ATTR_ACCESS, &label);
+ if (r < 0)
+ return r;
+
+ r = mac_smack_apply(dest, SMACK_ATTR_ACCESS, label);
+ if (r < 0)
+ return r;
+
+ return r;
+}
+
+#else
+bool mac_smack_use(void) {
+ return false;
+}
+
+int mac_smack_read(const char *path, SmackAttr attr, char **label) {
+ return -EOPNOTSUPP;
+}
+
+int mac_smack_read_fd(int fd, SmackAttr attr, char **label) {
+ return -EOPNOTSUPP;
+}
+
+int mac_smack_apply(const char *path, SmackAttr attr, const char *label) {
+ return 0;
+}
+
+int mac_smack_apply_fd(int fd, SmackAttr attr, const char *label) {
+ return 0;
+}
+
+int mac_smack_apply_pid(pid_t pid, const char *label) {
+ return 0;
+}
+
+int mac_smack_fix_container(const char *path, const char *inside_path, LabelFixFlags flags) {
+ return 0;
+}
+
+int mac_smack_fix_at(int dirfd, const char *path, LabelFixFlags flags) {
+ return 0;
+}
+
+int mac_smack_copy(const char *dest, const char *src) {
+ return 0;
+}
+#endif
diff --git a/src/basic/smack-util.h b/src/basic/smack-util.h
new file mode 100644
index 0000000..d0b2352
--- /dev/null
+++ b/src/basic/smack-util.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2013 Intel Corporation
+
+ Author: Auke Kok <auke-jan.h.kok@intel.com>
+***/
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "label.h"
+#include "macro.h"
+
+#define SMACK_FLOOR_LABEL "_"
+#define SMACK_STAR_LABEL "*"
+
+typedef enum SmackAttr {
+ SMACK_ATTR_ACCESS,
+ SMACK_ATTR_EXEC,
+ SMACK_ATTR_MMAP,
+ SMACK_ATTR_TRANSMUTE,
+ SMACK_ATTR_IPIN,
+ SMACK_ATTR_IPOUT,
+ _SMACK_ATTR_MAX,
+ _SMACK_ATTR_INVALID = -1,
+} SmackAttr;
+
+bool mac_smack_use(void);
+
+int mac_smack_fix_container(const char *path, const char *inside_path, LabelFixFlags flags);
+static inline int mac_smack_fix(const char *path, LabelFixFlags flags) {
+ return mac_smack_fix_container(path, path, flags);
+}
+
+int mac_smack_fix_at(int dirfd, const char *path, LabelFixFlags flags);
+
+const char* smack_attr_to_string(SmackAttr i) _const_;
+SmackAttr smack_attr_from_string(const char *s) _pure_;
+int mac_smack_read(const char *path, SmackAttr attr, char **label);
+int mac_smack_read_fd(int fd, SmackAttr attr, char **label);
+int mac_smack_apply(const char *path, SmackAttr attr, const char *label);
+int mac_smack_apply_fd(int fd, SmackAttr attr, const char *label);
+int mac_smack_apply_pid(pid_t pid, const char *label);
+int mac_smack_copy(const char *dest, const char *src);
diff --git a/src/basic/socket-label.c b/src/basic/socket-label.c
new file mode 100644
index 0000000..ea78762
--- /dev/null
+++ b/src/basic/socket-label.c
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing_socket.h"
+#include "mkdir.h"
+#include "selinux-util.h"
+#include "socket-util.h"
+#include "umask-util.h"
+
+int socket_address_listen(
+ const SocketAddress *a,
+ int flags,
+ int backlog,
+ SocketAddressBindIPv6Only only,
+ const char *bind_to_device,
+ bool reuse_port,
+ bool free_bind,
+ bool transparent,
+ mode_t directory_mode,
+ mode_t socket_mode,
+ const char *label) {
+
+ _cleanup_close_ int fd = -1;
+ const char *p;
+ int r;
+
+ assert(a);
+
+ r = socket_address_verify(a, true);
+ if (r < 0)
+ return r;
+
+ if (socket_address_family(a) == AF_INET6 && !socket_ipv6_is_supported())
+ return -EAFNOSUPPORT;
+
+ if (label) {
+ r = mac_selinux_create_socket_prepare(label);
+ if (r < 0)
+ return r;
+ }
+
+ fd = socket(socket_address_family(a), a->type | flags, a->protocol);
+ r = fd < 0 ? -errno : 0;
+
+ if (label)
+ mac_selinux_create_socket_clear();
+
+ if (r < 0)
+ return r;
+
+ if (socket_address_family(a) == AF_INET6 && only != SOCKET_ADDRESS_DEFAULT) {
+ r = setsockopt_int(fd, IPPROTO_IPV6, IPV6_V6ONLY, only == SOCKET_ADDRESS_IPV6_ONLY);
+ if (r < 0)
+ return r;
+ }
+
+ if (IN_SET(socket_address_family(a), AF_INET, AF_INET6)) {
+ if (bind_to_device) {
+ r = socket_bind_to_ifname(fd, bind_to_device);
+ if (r < 0)
+ return r;
+ }
+
+ if (reuse_port) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEPORT, true);
+ if (r < 0)
+ log_warning_errno(r, "SO_REUSEPORT failed: %m");
+ }
+
+ if (free_bind) {
+ r = socket_set_freebind(fd, socket_address_family(a), true);
+ if (r < 0)
+ log_warning_errno(r, "IP_FREEBIND/IPV6_FREEBIND failed: %m");
+ }
+
+ if (transparent) {
+ r = socket_set_transparent(fd, socket_address_family(a), true);
+ if (r < 0)
+ log_warning_errno(r, "IP_TRANSPARENT/IPV6_TRANSPARENT failed: %m");
+ }
+ }
+
+ r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return r;
+
+ p = socket_address_get_path(a);
+ if (p) {
+ /* Create parents */
+ (void) mkdir_parents_label(p, directory_mode);
+
+ /* Enforce the right access mode for the socket */
+ RUN_WITH_UMASK(~socket_mode) {
+ r = mac_selinux_bind(fd, &a->sockaddr.sa, a->size);
+ if (r == -EADDRINUSE) {
+ /* Unlink and try again */
+
+ if (unlink(p) < 0)
+ return r; /* didn't work, return original error */
+
+ r = mac_selinux_bind(fd, &a->sockaddr.sa, a->size);
+ }
+ if (r < 0)
+ return r;
+ }
+ } else {
+ if (bind(fd, &a->sockaddr.sa, a->size) < 0)
+ return -errno;
+ }
+
+ if (socket_address_can_accept(a))
+ if (listen(fd, backlog) < 0)
+ return -errno;
+
+ /* Let's trigger an inotify event on the socket node, so that anyone waiting for this socket to be connectable
+ * gets notified */
+ if (p)
+ (void) touch(p);
+
+ return TAKE_FD(fd);
+}
diff --git a/src/basic/socket-util.c b/src/basic/socket-util.c
new file mode 100644
index 0000000..48d0718
--- /dev/null
+++ b/src/basic/socket-util.c
@@ -0,0 +1,1388 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <limits.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <netinet/ip.h>
+#include <poll.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <linux/if.h>
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "missing_socket.h"
+#include "missing_network.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "utf8.h"
+
+#if ENABLE_IDN
+# define IDN_FLAGS NI_IDN
+#else
+# define IDN_FLAGS 0
+#endif
+
+static const char* const socket_address_type_table[] = {
+ [SOCK_STREAM] = "Stream",
+ [SOCK_DGRAM] = "Datagram",
+ [SOCK_RAW] = "Raw",
+ [SOCK_RDM] = "ReliableDatagram",
+ [SOCK_SEQPACKET] = "SequentialPacket",
+ [SOCK_DCCP] = "DatagramCongestionControl",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(socket_address_type, int);
+
+int socket_address_verify(const SocketAddress *a, bool strict) {
+ assert(a);
+
+ /* With 'strict' we enforce additional sanity constraints which are not set by the standard,
+ * but should only apply to sockets we create ourselves. */
+
+ switch (socket_address_family(a)) {
+
+ case AF_INET:
+ if (a->size != sizeof(struct sockaddr_in))
+ return -EINVAL;
+
+ if (a->sockaddr.in.sin_port == 0)
+ return -EINVAL;
+
+ if (!IN_SET(a->type, 0, SOCK_STREAM, SOCK_DGRAM))
+ return -EINVAL;
+
+ return 0;
+
+ case AF_INET6:
+ if (a->size != sizeof(struct sockaddr_in6))
+ return -EINVAL;
+
+ if (a->sockaddr.in6.sin6_port == 0)
+ return -EINVAL;
+
+ if (!IN_SET(a->type, 0, SOCK_STREAM, SOCK_DGRAM))
+ return -EINVAL;
+
+ return 0;
+
+ case AF_UNIX:
+ if (a->size < offsetof(struct sockaddr_un, sun_path))
+ return -EINVAL;
+ if (a->size > sizeof(struct sockaddr_un) + !strict)
+ /* If !strict, allow one extra byte, since getsockname() on Linux will append
+ * a NUL byte if we have path sockets that are above sun_path's full size. */
+ return -EINVAL;
+
+ if (a->size > offsetof(struct sockaddr_un, sun_path) &&
+ a->sockaddr.un.sun_path[0] != 0 &&
+ strict) {
+ /* Only validate file system sockets here, and only in strict mode */
+ const char *e;
+
+ e = memchr(a->sockaddr.un.sun_path, 0, sizeof(a->sockaddr.un.sun_path));
+ if (e) {
+ /* If there's an embedded NUL byte, make sure the size of the socket address matches it */
+ if (a->size != offsetof(struct sockaddr_un, sun_path) + (e - a->sockaddr.un.sun_path) + 1)
+ return -EINVAL;
+ } else {
+ /* If there's no embedded NUL byte, then the size needs to match the whole
+ * structure or the structure with one extra NUL byte suffixed. (Yeah, Linux is awful,
+ * and considers both equivalent: getsockname() even extends sockaddr_un beyond its
+ * size if the path is non NUL terminated.)*/
+ if (!IN_SET(a->size, sizeof(a->sockaddr.un.sun_path), sizeof(a->sockaddr.un.sun_path)+1))
+ return -EINVAL;
+ }
+ }
+
+ if (!IN_SET(a->type, 0, SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET))
+ return -EINVAL;
+
+ return 0;
+
+ case AF_NETLINK:
+
+ if (a->size != sizeof(struct sockaddr_nl))
+ return -EINVAL;
+
+ if (!IN_SET(a->type, 0, SOCK_RAW, SOCK_DGRAM))
+ return -EINVAL;
+
+ return 0;
+
+ case AF_VSOCK:
+ if (a->size != sizeof(struct sockaddr_vm))
+ return -EINVAL;
+
+ if (!IN_SET(a->type, 0, SOCK_STREAM, SOCK_DGRAM))
+ return -EINVAL;
+
+ return 0;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+}
+
+int socket_address_print(const SocketAddress *a, char **ret) {
+ int r;
+
+ assert(a);
+ assert(ret);
+
+ r = socket_address_verify(a, false); /* We do non-strict validation, because we want to be
+ * able to pretty-print any socket the kernel considers
+ * valid. We still need to do validation to know if we
+ * can meaningfully print the address. */
+ if (r < 0)
+ return r;
+
+ if (socket_address_family(a) == AF_NETLINK) {
+ _cleanup_free_ char *sfamily = NULL;
+
+ r = netlink_family_to_string_alloc(a->protocol, &sfamily);
+ if (r < 0)
+ return r;
+
+ r = asprintf(ret, "%s %u", sfamily, a->sockaddr.nl.nl_groups);
+ if (r < 0)
+ return -ENOMEM;
+
+ return 0;
+ }
+
+ return sockaddr_pretty(&a->sockaddr.sa, a->size, false, true, ret);
+}
+
+bool socket_address_can_accept(const SocketAddress *a) {
+ assert(a);
+
+ return
+ IN_SET(a->type, SOCK_STREAM, SOCK_SEQPACKET);
+}
+
+bool socket_address_equal(const SocketAddress *a, const SocketAddress *b) {
+ assert(a);
+ assert(b);
+
+ /* Invalid addresses are unequal to all */
+ if (socket_address_verify(a, false) < 0 ||
+ socket_address_verify(b, false) < 0)
+ return false;
+
+ if (a->type != b->type)
+ return false;
+
+ if (socket_address_family(a) != socket_address_family(b))
+ return false;
+
+ switch (socket_address_family(a)) {
+
+ case AF_INET:
+ if (a->sockaddr.in.sin_addr.s_addr != b->sockaddr.in.sin_addr.s_addr)
+ return false;
+
+ if (a->sockaddr.in.sin_port != b->sockaddr.in.sin_port)
+ return false;
+
+ break;
+
+ case AF_INET6:
+ if (memcmp(&a->sockaddr.in6.sin6_addr, &b->sockaddr.in6.sin6_addr, sizeof(a->sockaddr.in6.sin6_addr)) != 0)
+ return false;
+
+ if (a->sockaddr.in6.sin6_port != b->sockaddr.in6.sin6_port)
+ return false;
+
+ break;
+
+ case AF_UNIX:
+ if (a->size <= offsetof(struct sockaddr_un, sun_path) ||
+ b->size <= offsetof(struct sockaddr_un, sun_path))
+ return false;
+
+ if ((a->sockaddr.un.sun_path[0] == 0) != (b->sockaddr.un.sun_path[0] == 0))
+ return false;
+
+ if (a->sockaddr.un.sun_path[0]) {
+ if (!path_equal_or_files_same(a->sockaddr.un.sun_path, b->sockaddr.un.sun_path, 0))
+ return false;
+ } else {
+ if (a->size != b->size)
+ return false;
+
+ if (memcmp(a->sockaddr.un.sun_path, b->sockaddr.un.sun_path, a->size) != 0)
+ return false;
+ }
+
+ break;
+
+ case AF_NETLINK:
+ if (a->protocol != b->protocol)
+ return false;
+
+ if (a->sockaddr.nl.nl_groups != b->sockaddr.nl.nl_groups)
+ return false;
+
+ break;
+
+ case AF_VSOCK:
+ if (a->sockaddr.vm.svm_cid != b->sockaddr.vm.svm_cid)
+ return false;
+
+ if (a->sockaddr.vm.svm_port != b->sockaddr.vm.svm_port)
+ return false;
+
+ break;
+
+ default:
+ /* Cannot compare, so we assume the addresses are different */
+ return false;
+ }
+
+ return true;
+}
+
+const char* socket_address_get_path(const SocketAddress *a) {
+ assert(a);
+
+ if (socket_address_family(a) != AF_UNIX)
+ return NULL;
+
+ if (a->sockaddr.un.sun_path[0] == 0)
+ return NULL;
+
+ /* Note that this is only safe because we know that there's an extra NUL byte after the sockaddr_un
+ * structure. On Linux AF_UNIX file system socket addresses don't have to be NUL terminated if they take up the
+ * full sun_path space. */
+ assert_cc(sizeof(union sockaddr_union) >= sizeof(struct sockaddr_un)+1);
+ return a->sockaddr.un.sun_path;
+}
+
+bool socket_ipv6_is_supported(void) {
+ if (access("/proc/net/if_inet6", F_OK) != 0)
+ return false;
+
+ return true;
+}
+
+bool socket_address_matches_fd(const SocketAddress *a, int fd) {
+ SocketAddress b;
+ socklen_t solen;
+
+ assert(a);
+ assert(fd >= 0);
+
+ b.size = sizeof(b.sockaddr);
+ if (getsockname(fd, &b.sockaddr.sa, &b.size) < 0)
+ return false;
+
+ if (b.sockaddr.sa.sa_family != a->sockaddr.sa.sa_family)
+ return false;
+
+ solen = sizeof(b.type);
+ if (getsockopt(fd, SOL_SOCKET, SO_TYPE, &b.type, &solen) < 0)
+ return false;
+
+ if (b.type != a->type)
+ return false;
+
+ if (a->protocol != 0) {
+ solen = sizeof(b.protocol);
+ if (getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &b.protocol, &solen) < 0)
+ return false;
+
+ if (b.protocol != a->protocol)
+ return false;
+ }
+
+ return socket_address_equal(a, &b);
+}
+
+int sockaddr_port(const struct sockaddr *_sa, unsigned *ret_port) {
+ union sockaddr_union *sa = (union sockaddr_union*) _sa;
+
+ /* Note, this returns the port as 'unsigned' rather than 'uint16_t', as AF_VSOCK knows larger ports */
+
+ assert(sa);
+
+ switch (sa->sa.sa_family) {
+
+ case AF_INET:
+ *ret_port = be16toh(sa->in.sin_port);
+ return 0;
+
+ case AF_INET6:
+ *ret_port = be16toh(sa->in6.sin6_port);
+ return 0;
+
+ case AF_VSOCK:
+ *ret_port = sa->vm.svm_port;
+ return 0;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+}
+
+int sockaddr_pretty(
+ const struct sockaddr *_sa,
+ socklen_t salen,
+ bool translate_ipv6,
+ bool include_port,
+ char **ret) {
+
+ union sockaddr_union *sa = (union sockaddr_union*) _sa;
+ char *p;
+ int r;
+
+ assert(sa);
+ assert(salen >= sizeof(sa->sa.sa_family));
+
+ switch (sa->sa.sa_family) {
+
+ case AF_INET: {
+ uint32_t a;
+
+ a = be32toh(sa->in.sin_addr.s_addr);
+
+ if (include_port)
+ r = asprintf(&p,
+ "%u.%u.%u.%u:%u",
+ a >> 24, (a >> 16) & 0xFF, (a >> 8) & 0xFF, a & 0xFF,
+ be16toh(sa->in.sin_port));
+ else
+ r = asprintf(&p,
+ "%u.%u.%u.%u",
+ a >> 24, (a >> 16) & 0xFF, (a >> 8) & 0xFF, a & 0xFF);
+ if (r < 0)
+ return -ENOMEM;
+ break;
+ }
+
+ case AF_INET6: {
+ static const unsigned char ipv4_prefix[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF
+ };
+
+ if (translate_ipv6 &&
+ memcmp(&sa->in6.sin6_addr, ipv4_prefix, sizeof(ipv4_prefix)) == 0) {
+ const uint8_t *a = sa->in6.sin6_addr.s6_addr+12;
+ if (include_port)
+ r = asprintf(&p,
+ "%u.%u.%u.%u:%u",
+ a[0], a[1], a[2], a[3],
+ be16toh(sa->in6.sin6_port));
+ else
+ r = asprintf(&p,
+ "%u.%u.%u.%u",
+ a[0], a[1], a[2], a[3]);
+ if (r < 0)
+ return -ENOMEM;
+ } else {
+ char a[INET6_ADDRSTRLEN], ifname[IF_NAMESIZE + 1];
+
+ inet_ntop(AF_INET6, &sa->in6.sin6_addr, a, sizeof(a));
+ if (sa->in6.sin6_scope_id != 0)
+ format_ifname_full(sa->in6.sin6_scope_id, ifname, FORMAT_IFNAME_IFINDEX);
+
+ if (include_port) {
+ r = asprintf(&p,
+ "[%s]:%u%s%s",
+ a,
+ be16toh(sa->in6.sin6_port),
+ sa->in6.sin6_scope_id != 0 ? "%" : "",
+ sa->in6.sin6_scope_id != 0 ? ifname : "");
+ if (r < 0)
+ return -ENOMEM;
+ } else {
+ p = sa->in6.sin6_scope_id != 0 ? strjoin(a, "%", ifname) : strdup(a);
+ if (!p)
+ return -ENOMEM;
+ }
+ }
+
+ break;
+ }
+
+ case AF_UNIX:
+ if (salen <= offsetof(struct sockaddr_un, sun_path) ||
+ (sa->un.sun_path[0] == 0 && salen == offsetof(struct sockaddr_un, sun_path) + 1))
+ /* The name must have at least one character (and the leading NUL does not count) */
+ p = strdup("<unnamed>");
+ else {
+ /* Note that we calculate the path pointer here through the .un_buffer[] field, in order to
+ * outtrick bounds checking tools such as ubsan, which are too smart for their own good: on
+ * Linux the kernel may return sun_path[] data one byte longer than the declared size of the
+ * field. */
+ char *path = (char*) sa->un_buffer + offsetof(struct sockaddr_un, sun_path);
+ size_t path_len = salen - offsetof(struct sockaddr_un, sun_path);
+
+ if (path[0] == 0) {
+ /* Abstract socket. When parsing address information from, we
+ * explicitly reject overly long paths and paths with embedded NULs.
+ * But we might get such a socket from the outside. Let's return
+ * something meaningful and printable in this case. */
+
+ _cleanup_free_ char *e = NULL;
+
+ e = cescape_length(path + 1, path_len - 1);
+ if (!e)
+ return -ENOMEM;
+
+ p = strjoin("@", e);
+ } else {
+ if (path[path_len - 1] == '\0')
+ /* We expect a terminating NUL and don't print it */
+ path_len --;
+
+ p = cescape_length(path, path_len);
+ }
+ }
+ if (!p)
+ return -ENOMEM;
+
+ break;
+
+ case AF_VSOCK:
+ if (include_port) {
+ if (sa->vm.svm_cid == VMADDR_CID_ANY)
+ r = asprintf(&p, "vsock::%u", sa->vm.svm_port);
+ else
+ r = asprintf(&p, "vsock:%u:%u", sa->vm.svm_cid, sa->vm.svm_port);
+ } else
+ r = asprintf(&p, "vsock:%u", sa->vm.svm_cid);
+ if (r < 0)
+ return -ENOMEM;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ *ret = p;
+ return 0;
+}
+
+int getpeername_pretty(int fd, bool include_port, char **ret) {
+ union sockaddr_union sa;
+ socklen_t salen = sizeof(sa);
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (getpeername(fd, &sa.sa, &salen) < 0)
+ return -errno;
+
+ if (sa.sa.sa_family == AF_UNIX) {
+ struct ucred ucred = {};
+
+ /* UNIX connection sockets are anonymous, so let's use
+ * PID/UID as pretty credentials instead */
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ if (asprintf(ret, "PID "PID_FMT"/UID "UID_FMT, ucred.pid, ucred.uid) < 0)
+ return -ENOMEM;
+
+ return 0;
+ }
+
+ /* For remote sockets we translate IPv6 addresses back to IPv4
+ * if applicable, since that's nicer. */
+
+ return sockaddr_pretty(&sa.sa, salen, true, include_port, ret);
+}
+
+int getsockname_pretty(int fd, char **ret) {
+ union sockaddr_union sa;
+ socklen_t salen = sizeof(sa);
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (getsockname(fd, &sa.sa, &salen) < 0)
+ return -errno;
+
+ /* For local sockets we do not translate IPv6 addresses back
+ * to IPv6 if applicable, since this is usually used for
+ * listening sockets where the difference between IPv4 and
+ * IPv6 matters. */
+
+ return sockaddr_pretty(&sa.sa, salen, false, true, ret);
+}
+
+int socknameinfo_pretty(union sockaddr_union *sa, socklen_t salen, char **_ret) {
+ int r;
+ char host[NI_MAXHOST], *ret;
+
+ assert(_ret);
+
+ r = getnameinfo(&sa->sa, salen, host, sizeof(host), NULL, 0, IDN_FLAGS);
+ if (r != 0) {
+ int saved_errno = errno;
+
+ r = sockaddr_pretty(&sa->sa, salen, true, true, &ret);
+ if (r < 0)
+ return r;
+
+ log_debug_errno(saved_errno, "getnameinfo(%s) failed: %m", ret);
+ } else {
+ ret = strdup(host);
+ if (!ret)
+ return -ENOMEM;
+ }
+
+ *_ret = ret;
+ return 0;
+}
+
+static const char* const netlink_family_table[] = {
+ [NETLINK_ROUTE] = "route",
+ [NETLINK_FIREWALL] = "firewall",
+ [NETLINK_INET_DIAG] = "inet-diag",
+ [NETLINK_NFLOG] = "nflog",
+ [NETLINK_XFRM] = "xfrm",
+ [NETLINK_SELINUX] = "selinux",
+ [NETLINK_ISCSI] = "iscsi",
+ [NETLINK_AUDIT] = "audit",
+ [NETLINK_FIB_LOOKUP] = "fib-lookup",
+ [NETLINK_CONNECTOR] = "connector",
+ [NETLINK_NETFILTER] = "netfilter",
+ [NETLINK_IP6_FW] = "ip6-fw",
+ [NETLINK_DNRTMSG] = "dnrtmsg",
+ [NETLINK_KOBJECT_UEVENT] = "kobject-uevent",
+ [NETLINK_GENERIC] = "generic",
+ [NETLINK_SCSITRANSPORT] = "scsitransport",
+ [NETLINK_ECRYPTFS] = "ecryptfs",
+ [NETLINK_RDMA] = "rdma",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(netlink_family, int, INT_MAX);
+
+static const char* const socket_address_bind_ipv6_only_table[_SOCKET_ADDRESS_BIND_IPV6_ONLY_MAX] = {
+ [SOCKET_ADDRESS_DEFAULT] = "default",
+ [SOCKET_ADDRESS_BOTH] = "both",
+ [SOCKET_ADDRESS_IPV6_ONLY] = "ipv6-only"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(socket_address_bind_ipv6_only, SocketAddressBindIPv6Only);
+
+SocketAddressBindIPv6Only socket_address_bind_ipv6_only_or_bool_from_string(const char *n) {
+ int r;
+
+ r = parse_boolean(n);
+ if (r > 0)
+ return SOCKET_ADDRESS_IPV6_ONLY;
+ if (r == 0)
+ return SOCKET_ADDRESS_BOTH;
+
+ return socket_address_bind_ipv6_only_from_string(n);
+}
+
+bool sockaddr_equal(const union sockaddr_union *a, const union sockaddr_union *b) {
+ assert(a);
+ assert(b);
+
+ if (a->sa.sa_family != b->sa.sa_family)
+ return false;
+
+ if (a->sa.sa_family == AF_INET)
+ return a->in.sin_addr.s_addr == b->in.sin_addr.s_addr;
+
+ if (a->sa.sa_family == AF_INET6)
+ return memcmp(&a->in6.sin6_addr, &b->in6.sin6_addr, sizeof(a->in6.sin6_addr)) == 0;
+
+ if (a->sa.sa_family == AF_VSOCK)
+ return a->vm.svm_cid == b->vm.svm_cid;
+
+ return false;
+}
+
+int fd_set_sndbuf(int fd, size_t n, bool increase) {
+ int r, value;
+ socklen_t l = sizeof(value);
+
+ if (n > INT_MAX)
+ return -ERANGE;
+
+ r = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &value, &l);
+ if (r >= 0 && l == sizeof(value) && increase ? (size_t) value >= n*2 : (size_t) value == n*2)
+ return 0;
+
+ /* First, try to set the buffer size with SO_SNDBUF. */
+ r = setsockopt_int(fd, SOL_SOCKET, SO_SNDBUF, n);
+ if (r < 0)
+ return r;
+
+ /* SO_SNDBUF above may set to the kernel limit, instead of the requested size.
+ * So, we need to check the actual buffer size here. */
+ l = sizeof(value);
+ r = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &value, &l);
+ if (r >= 0 && l == sizeof(value) && increase ? (size_t) value >= n*2 : (size_t) value == n*2)
+ return 1;
+
+ /* If we have the privileges we will ignore the kernel limit. */
+ r = setsockopt_int(fd, SOL_SOCKET, SO_SNDBUFFORCE, n);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int fd_set_rcvbuf(int fd, size_t n, bool increase) {
+ int r, value;
+ socklen_t l = sizeof(value);
+
+ if (n > INT_MAX)
+ return -ERANGE;
+
+ r = getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &value, &l);
+ if (r >= 0 && l == sizeof(value) && increase ? (size_t) value >= n*2 : (size_t) value == n*2)
+ return 0;
+
+ /* First, try to set the buffer size with SO_RCVBUF. */
+ r = setsockopt_int(fd, SOL_SOCKET, SO_RCVBUF, n);
+ if (r < 0)
+ return r;
+
+ /* SO_RCVBUF above may set to the kernel limit, instead of the requested size.
+ * So, we need to check the actual buffer size here. */
+ l = sizeof(value);
+ r = getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &value, &l);
+ if (r >= 0 && l == sizeof(value) && increase ? (size_t) value >= n*2 : (size_t) value == n*2)
+ return 1;
+
+ /* If we have the privileges we will ignore the kernel limit. */
+ r = setsockopt_int(fd, SOL_SOCKET, SO_RCVBUFFORCE, n);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static const char* const ip_tos_table[] = {
+ [IPTOS_LOWDELAY] = "low-delay",
+ [IPTOS_THROUGHPUT] = "throughput",
+ [IPTOS_RELIABILITY] = "reliability",
+ [IPTOS_LOWCOST] = "low-cost",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ip_tos, int, 0xff);
+
+bool ifname_valid_full(const char *p, IfnameValidFlags flags) {
+ bool numeric = true;
+
+ /* Checks whether a network interface name is valid. This is inspired by dev_valid_name() in the kernel sources
+ * but slightly stricter, as we only allow non-control, non-space ASCII characters in the interface name. We
+ * also don't permit names that only container numbers, to avoid confusion with numeric interface indexes. */
+
+ assert(!(flags & ~_IFNAME_VALID_ALL));
+
+ if (isempty(p))
+ return false;
+
+ if (flags & IFNAME_VALID_ALTERNATIVE) {
+ if (strlen(p) >= ALTIFNAMSIZ)
+ return false;
+ } else {
+ if (strlen(p) >= IFNAMSIZ)
+ return false;
+ }
+
+ if (dot_or_dot_dot(p))
+ return false;
+
+ for (const char *t = p; *t; t++) {
+ if ((unsigned char) *t >= 127U)
+ return false;
+
+ if ((unsigned char) *t <= 32U)
+ return false;
+
+ if (IN_SET(*t, ':', '/'))
+ return false;
+
+ numeric = numeric && (*t >= '0' && *t <= '9');
+ }
+
+ if (numeric) {
+ if (!(flags & IFNAME_VALID_NUMERIC))
+ return false;
+
+ /* Verify that the number is well-formatted and in range. */
+ if (parse_ifindex(p) < 0)
+ return false;
+ }
+
+ return true;
+}
+
+bool address_label_valid(const char *p) {
+
+ if (isempty(p))
+ return false;
+
+ if (strlen(p) >= IFNAMSIZ)
+ return false;
+
+ while (*p) {
+ if ((uint8_t) *p >= 127U)
+ return false;
+
+ if ((uint8_t) *p <= 31U)
+ return false;
+ p++;
+ }
+
+ return true;
+}
+
+int getpeercred(int fd, struct ucred *ucred) {
+ socklen_t n = sizeof(struct ucred);
+ struct ucred u;
+ int r;
+
+ assert(fd >= 0);
+ assert(ucred);
+
+ r = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &u, &n);
+ if (r < 0)
+ return -errno;
+
+ if (n != sizeof(struct ucred))
+ return -EIO;
+
+ /* Check if the data is actually useful and not suppressed due to namespacing issues */
+ if (!pid_is_valid(u.pid))
+ return -ENODATA;
+
+ /* Note that we don't check UID/GID here, as namespace translation works differently there: instead of
+ * receiving in "invalid" user/group we get the overflow UID/GID. */
+
+ *ucred = u;
+ return 0;
+}
+
+int getpeersec(int fd, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ socklen_t n = 64;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ for (;;) {
+ s = new0(char, n+1);
+ if (!s)
+ return -ENOMEM;
+
+ if (getsockopt(fd, SOL_SOCKET, SO_PEERSEC, s, &n) >= 0)
+ break;
+
+ if (errno != ERANGE)
+ return -errno;
+
+ s = mfree(s);
+ }
+
+ if (isempty(s))
+ return -EOPNOTSUPP;
+
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
+
+int getpeergroups(int fd, gid_t **ret) {
+ socklen_t n = sizeof(gid_t) * 64;
+ _cleanup_free_ gid_t *d = NULL;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ for (;;) {
+ d = malloc(n);
+ if (!d)
+ return -ENOMEM;
+
+ if (getsockopt(fd, SOL_SOCKET, SO_PEERGROUPS, d, &n) >= 0)
+ break;
+
+ if (errno != ERANGE)
+ return -errno;
+
+ d = mfree(d);
+ }
+
+ assert_se(n % sizeof(gid_t) == 0);
+ n /= sizeof(gid_t);
+
+ if ((socklen_t) (int) n != n)
+ return -E2BIG;
+
+ *ret = TAKE_PTR(d);
+
+ return (int) n;
+}
+
+ssize_t send_one_fd_iov_sa(
+ int transport_fd,
+ int fd,
+ struct iovec *iov, size_t iovlen,
+ const struct sockaddr *sa, socklen_t len,
+ int flags) {
+
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int))) control = {};
+ struct msghdr mh = {
+ .msg_name = (struct sockaddr*) sa,
+ .msg_namelen = len,
+ .msg_iov = iov,
+ .msg_iovlen = iovlen,
+ };
+ ssize_t k;
+
+ assert(transport_fd >= 0);
+
+ /*
+ * We need either an FD or data to send.
+ * If there's nothing, return an error.
+ */
+ if (fd < 0 && !iov)
+ return -EINVAL;
+
+ if (fd >= 0) {
+ struct cmsghdr *cmsg;
+
+ mh.msg_control = &control;
+ mh.msg_controllen = sizeof(control);
+
+ cmsg = CMSG_FIRSTHDR(&mh);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
+ }
+ k = sendmsg(transport_fd, &mh, MSG_NOSIGNAL | flags);
+ if (k < 0)
+ return (ssize_t) -errno;
+
+ return k;
+}
+
+int send_one_fd_sa(
+ int transport_fd,
+ int fd,
+ const struct sockaddr *sa, socklen_t len,
+ int flags) {
+
+ assert(fd >= 0);
+
+ return (int) send_one_fd_iov_sa(transport_fd, fd, NULL, 0, sa, len, flags);
+}
+
+ssize_t receive_one_fd_iov(
+ int transport_fd,
+ struct iovec *iov, size_t iovlen,
+ int flags,
+ int *ret_fd) {
+
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int))) control;
+ struct msghdr mh = {
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_iov = iov,
+ .msg_iovlen = iovlen,
+ };
+ struct cmsghdr *found;
+ ssize_t k;
+
+ assert(transport_fd >= 0);
+ assert(ret_fd);
+
+ /*
+ * Receive a single FD via @transport_fd. We don't care for
+ * the transport-type. We retrieve a single FD at most, so for
+ * packet-based transports, the caller must ensure to send
+ * only a single FD per packet. This is best used in
+ * combination with send_one_fd().
+ */
+
+ k = recvmsg_safe(transport_fd, &mh, MSG_CMSG_CLOEXEC | flags);
+ if (k < 0)
+ return k;
+
+ found = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int)));
+ if (!found) {
+ cmsg_close_all(&mh);
+
+ /* If didn't receive an FD or any data, return an error. */
+ if (k == 0)
+ return -EIO;
+ }
+
+ if (found)
+ *ret_fd = *(int*) CMSG_DATA(found);
+ else
+ *ret_fd = -1;
+
+ return k;
+}
+
+int receive_one_fd(int transport_fd, int flags) {
+ int fd;
+ ssize_t k;
+
+ k = receive_one_fd_iov(transport_fd, NULL, 0, flags, &fd);
+ if (k == 0)
+ return fd;
+
+ /* k must be negative, since receive_one_fd_iov() only returns
+ * a positive value if data was received through the iov. */
+ assert(k < 0);
+ return (int) k;
+}
+
+ssize_t next_datagram_size_fd(int fd) {
+ ssize_t l;
+ int k;
+
+ /* This is a bit like FIONREAD/SIOCINQ, however a bit more powerful. The difference being: recv(MSG_PEEK) will
+ * actually cause the next datagram in the queue to be validated regarding checksums, which FIONREAD doesn't
+ * do. This difference is actually of major importance as we need to be sure that the size returned here
+ * actually matches what we will read with recvmsg() next, as otherwise we might end up allocating a buffer of
+ * the wrong size. */
+
+ l = recv(fd, NULL, 0, MSG_PEEK|MSG_TRUNC);
+ if (l < 0) {
+ if (IN_SET(errno, EOPNOTSUPP, EFAULT))
+ goto fallback;
+
+ return -errno;
+ }
+ if (l == 0)
+ goto fallback;
+
+ return l;
+
+fallback:
+ k = 0;
+
+ /* Some sockets (AF_PACKET) do not support null-sized recv() with MSG_TRUNC set, let's fall back to FIONREAD
+ * for them. Checksums don't matter for raw sockets anyway, hence this should be fine. */
+
+ if (ioctl(fd, FIONREAD, &k) < 0)
+ return -errno;
+
+ return (ssize_t) k;
+}
+
+/* Put a limit on how many times will attempt to call accept4(). We loop
+ * only on "transient" errors, but let's make sure we don't loop forever. */
+#define MAX_FLUSH_ITERATIONS 1024
+
+int flush_accept(int fd) {
+
+ int r, b;
+ socklen_t l = sizeof(b);
+
+ /* Similar to flush_fd() but flushes all incoming connections by accepting and immediately closing
+ * them. */
+
+ if (getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &b, &l) < 0)
+ return -errno;
+
+ assert(l == sizeof(b));
+ if (!b) /* Let's check if this socket accepts connections before calling accept(). accept4() can
+ * return EOPNOTSUPP if the fd is not a listening socket, which we should treat as a fatal
+ * error, or in case the incoming TCP connection triggered a network issue, which we want to
+ * treat as a transient error. Thus, let's rule out the first reason for EOPNOTSUPP early, so
+ * we can loop safely on transient errors below. */
+ return -ENOTTY;
+
+ for (unsigned iteration = 0;; iteration++) {
+ int cfd;
+
+ r = fd_wait_for_event(fd, POLLIN, 0);
+ if (r < 0) {
+ if (r == -EINTR)
+ continue;
+
+ return r;
+ }
+ if (r == 0)
+ return 0;
+
+ if (iteration >= MAX_FLUSH_ITERATIONS)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBUSY),
+ "Failed to flush connections within " STRINGIFY(MAX_FLUSH_ITERATIONS) " iterations.");
+
+ cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (cfd < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ if (ERRNO_IS_ACCEPT_AGAIN(errno))
+ continue;
+
+ return -errno;
+ }
+
+ safe_close(cfd);
+ }
+}
+
+struct cmsghdr* cmsg_find(struct msghdr *mh, int level, int type, socklen_t length) {
+ struct cmsghdr *cmsg;
+
+ assert(mh);
+
+ CMSG_FOREACH(cmsg, mh)
+ if (cmsg->cmsg_level == level &&
+ cmsg->cmsg_type == type &&
+ (length == (socklen_t) -1 || length == cmsg->cmsg_len))
+ return cmsg;
+
+ return NULL;
+}
+
+int socket_ioctl_fd(void) {
+ int fd;
+
+ /* Create a socket to invoke the various network interface ioctl()s on. Traditionally only AF_INET was good for
+ * that. Since kernel 4.6 AF_NETLINK works for this too. We first try to use AF_INET hence, but if that's not
+ * available (for example, because it is made unavailable via SECCOMP or such), we'll fall back to the more
+ * generic AF_NETLINK. */
+
+ fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ fd = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_GENERIC);
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
+
+int sockaddr_un_unlink(const struct sockaddr_un *sa) {
+ const char *p, * nul;
+
+ assert(sa);
+
+ if (sa->sun_family != AF_UNIX)
+ return -EPROTOTYPE;
+
+ if (sa->sun_path[0] == 0) /* Nothing to do for abstract sockets */
+ return 0;
+
+ /* The path in .sun_path is not necessarily NUL terminated. Let's fix that. */
+ nul = memchr(sa->sun_path, 0, sizeof(sa->sun_path));
+ if (nul)
+ p = sa->sun_path;
+ else
+ p = memdupa_suffix0(sa->sun_path, sizeof(sa->sun_path));
+
+ if (unlink(p) < 0)
+ return -errno;
+
+ return 1;
+}
+
+int sockaddr_un_set_path(struct sockaddr_un *ret, const char *path) {
+ size_t l;
+
+ assert(ret);
+ assert(path);
+
+ /* Initialize ret->sun_path from the specified argument. This will interpret paths starting with '@' as
+ * abstract namespace sockets, and those starting with '/' as regular filesystem sockets. It won't accept
+ * anything else (i.e. no relative paths), to avoid ambiguities. Note that this function cannot be used to
+ * reference paths in the abstract namespace that include NUL bytes in the name. */
+
+ l = strlen(path);
+ if (l < 2)
+ return -EINVAL;
+ if (!IN_SET(path[0], '/', '@'))
+ return -EINVAL;
+
+ /* Don't allow paths larger than the space in sockaddr_un. Note that we are a tiny bit more restrictive than
+ * the kernel is: we insist on NUL termination (both for abstract namespace and regular file system socket
+ * addresses!), which the kernel doesn't. We do this to reduce chance of incompatibility with other apps that
+ * do not expect non-NUL terminated file system path*/
+ if (l+1 > sizeof(ret->sun_path))
+ return -EINVAL;
+
+ *ret = (struct sockaddr_un) {
+ .sun_family = AF_UNIX,
+ };
+
+ if (path[0] == '@') {
+ /* Abstract namespace socket */
+ memcpy(ret->sun_path + 1, path + 1, l); /* copy *with* trailing NUL byte */
+ return (int) (offsetof(struct sockaddr_un, sun_path) + l); /* 🔥 *don't* 🔥 include trailing NUL in size */
+
+ } else {
+ assert(path[0] == '/');
+
+ /* File system socket */
+ memcpy(ret->sun_path, path, l + 1); /* copy *with* trailing NUL byte */
+ return (int) (offsetof(struct sockaddr_un, sun_path) + l + 1); /* include trailing NUL in size */
+ }
+}
+
+int socket_bind_to_ifname(int fd, const char *ifname) {
+ assert(fd >= 0);
+
+ /* Call with NULL to drop binding */
+
+ if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, ifname, strlen_ptr(ifname)) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int socket_bind_to_ifindex(int fd, int ifindex) {
+ char ifname[IF_NAMESIZE + 1];
+ int r;
+
+ assert(fd >= 0);
+
+ if (ifindex <= 0) {
+ /* Drop binding */
+ if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, NULL, 0) < 0)
+ return -errno;
+
+ return 0;
+ }
+
+ r = setsockopt_int(fd, SOL_SOCKET, SO_BINDTOIFINDEX, ifindex);
+ if (r != -ENOPROTOOPT)
+ return r;
+
+ /* Fall back to SO_BINDTODEVICE on kernels < 5.0 which didn't have SO_BINDTOIFINDEX */
+ if (!format_ifname(ifindex, ifname))
+ return -errno;
+
+ return socket_bind_to_ifname(fd, ifname);
+}
+
+ssize_t recvmsg_safe(int sockfd, struct msghdr *msg, int flags) {
+ ssize_t n;
+
+ /* A wrapper around recvmsg() that checks for MSG_CTRUNC, and turns it into an error, in a reasonably
+ * safe way, closing any SCM_RIGHTS fds in the error path.
+ *
+ * Note that unlike our usual coding style this might modify *msg on failure. */
+
+ n = recvmsg(sockfd, msg, flags);
+ if (n < 0)
+ return -errno;
+
+ if (FLAGS_SET(msg->msg_flags, MSG_CTRUNC)) {
+ cmsg_close_all(msg);
+ return -EXFULL; /* a recognizable error code */
+ }
+
+ return n;
+}
+
+int socket_get_family(int fd, int *ret) {
+ int af;
+ socklen_t sl = sizeof(af);
+
+ if (getsockopt(fd, SOL_SOCKET, SO_DOMAIN, &af, &sl) < 0)
+ return -errno;
+
+ if (sl != sizeof(af))
+ return -EINVAL;
+
+ return af;
+}
+
+int socket_set_recvpktinfo(int fd, int af, bool b) {
+ int r;
+
+ if (af == AF_UNSPEC) {
+ r = socket_get_family(fd, &af);
+ if (r < 0)
+ return r;
+ }
+
+ switch (af) {
+
+ case AF_INET:
+ return setsockopt_int(fd, IPPROTO_IP, IP_PKTINFO, b);
+
+ case AF_INET6:
+ return setsockopt_int(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, b);
+
+ case AF_NETLINK:
+ return setsockopt_int(fd, SOL_NETLINK, NETLINK_PKTINFO, b);
+
+ case AF_PACKET:
+ return setsockopt_int(fd, SOL_PACKET, PACKET_AUXDATA, b);
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+}
+
+int socket_set_recverr(int fd, int af, bool b) {
+ int r;
+
+ if (af == AF_UNSPEC) {
+ r = socket_get_family(fd, &af);
+ if (r < 0)
+ return r;
+ }
+
+ switch (af) {
+
+ case AF_INET:
+ return setsockopt_int(fd, IPPROTO_IP, IP_RECVERR, b);
+
+ case AF_INET6:
+ return setsockopt_int(fd, IPPROTO_IPV6, IPV6_RECVERR, b);
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+}
+
+int socket_set_recvttl(int fd, int af, bool b) {
+ int r;
+
+ if (af == AF_UNSPEC) {
+ r = socket_get_family(fd, &af);
+ if (r < 0)
+ return r;
+ }
+
+ switch (af) {
+
+ case AF_INET:
+ return setsockopt_int(fd, IPPROTO_IP, IP_RECVTTL, b);
+
+ case AF_INET6:
+ return setsockopt_int(fd, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, b);
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+}
+
+int socket_set_ttl(int fd, int af, int ttl) {
+ int r;
+
+ if (af == AF_UNSPEC) {
+ r = socket_get_family(fd, &af);
+ if (r < 0)
+ return r;
+ }
+
+ switch (af) {
+
+ case AF_INET:
+ return setsockopt_int(fd, IPPROTO_IP, IP_TTL, ttl);
+
+ case AF_INET6:
+ return setsockopt_int(fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, ttl);
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+}
+
+int socket_set_unicast_if(int fd, int af, int ifi) {
+ be32_t ifindex_be = htobe32(ifi);
+ int r;
+
+ if (af == AF_UNSPEC) {
+ r = socket_get_family(fd, &af);
+ if (r < 0)
+ return r;
+ }
+
+ switch (af) {
+
+ case AF_INET:
+ if (setsockopt(fd, IPPROTO_IP, IP_UNICAST_IF, &ifindex_be, sizeof(ifindex_be)) < 0)
+ return -errno;
+
+ return 0;
+
+ case AF_INET6:
+ if (setsockopt(fd, IPPROTO_IPV6, IPV6_UNICAST_IF, &ifindex_be, sizeof(ifindex_be)) < 0)
+ return -errno;
+
+ return 0;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+}
+
+int socket_set_freebind(int fd, int af, bool b) {
+ int r;
+
+ if (af == AF_UNSPEC) {
+ r = socket_get_family(fd, &af);
+ if (r < 0)
+ return r;
+ }
+
+ switch (af) {
+
+ case AF_INET:
+ return setsockopt_int(fd, IPPROTO_IP, IP_FREEBIND, b);
+
+ case AF_INET6:
+ return setsockopt_int(fd, IPPROTO_IPV6, IPV6_FREEBIND, b);
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+}
+
+int socket_set_transparent(int fd, int af, bool b) {
+ int r;
+
+ if (af == AF_UNSPEC) {
+ r = socket_get_family(fd, &af);
+ if (r < 0)
+ return r;
+ }
+
+ switch (af) {
+
+ case AF_INET:
+ return setsockopt_int(fd, IPPROTO_IP, IP_TRANSPARENT, b);
+
+ case AF_INET6:
+ return setsockopt_int(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, b);
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+}
diff --git a/src/basic/socket-util.h b/src/basic/socket-util.h
new file mode 100644
index 0000000..9f79280
--- /dev/null
+++ b/src/basic/socket-util.h
@@ -0,0 +1,271 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <linux/netlink.h>
+#include <linux/if_ether.h>
+#include <linux/if_infiniband.h>
+#include <linux/if_packet.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "macro.h"
+#include "missing_socket.h"
+#include "sparse-endian.h"
+
+union sockaddr_union {
+ /* The minimal, abstract version */
+ struct sockaddr sa;
+
+ /* The libc provided version that allocates "enough room" for every protocol */
+ struct sockaddr_storage storage;
+
+ /* Protoctol-specific implementations */
+ struct sockaddr_in in;
+ struct sockaddr_in6 in6;
+ struct sockaddr_un un;
+ struct sockaddr_nl nl;
+ struct sockaddr_ll ll;
+ struct sockaddr_vm vm;
+
+ /* Ensure there is enough space to store Infiniband addresses */
+ uint8_t ll_buffer[offsetof(struct sockaddr_ll, sll_addr) + CONST_MAX(ETH_ALEN, INFINIBAND_ALEN)];
+
+ /* Ensure there is enough space after the AF_UNIX sun_path for one more NUL byte, just to be sure that the path
+ * component is always followed by at least one NUL byte. */
+ uint8_t un_buffer[sizeof(struct sockaddr_un) + 1];
+};
+
+#define SUN_PATH_LEN (sizeof(((struct sockaddr_un){}).sun_path))
+
+typedef struct SocketAddress {
+ union sockaddr_union sockaddr;
+
+ /* We store the size here explicitly due to the weird
+ * sockaddr_un semantics for abstract sockets */
+ socklen_t size;
+
+ /* Socket type, i.e. SOCK_STREAM, SOCK_DGRAM, ... */
+ int type;
+
+ /* Socket protocol, IPPROTO_xxx, usually 0, except for netlink */
+ int protocol;
+} SocketAddress;
+
+typedef enum SocketAddressBindIPv6Only {
+ SOCKET_ADDRESS_DEFAULT,
+ SOCKET_ADDRESS_BOTH,
+ SOCKET_ADDRESS_IPV6_ONLY,
+ _SOCKET_ADDRESS_BIND_IPV6_ONLY_MAX,
+ _SOCKET_ADDRESS_BIND_IPV6_ONLY_INVALID = -1
+} SocketAddressBindIPv6Only;
+
+#define socket_address_family(a) ((a)->sockaddr.sa.sa_family)
+
+const char* socket_address_type_to_string(int t) _const_;
+int socket_address_type_from_string(const char *s) _pure_;
+
+int sockaddr_un_unlink(const struct sockaddr_un *sa);
+
+static inline int socket_address_unlink(const SocketAddress *a) {
+ return socket_address_family(a) == AF_UNIX ? sockaddr_un_unlink(&a->sockaddr.un) : 0;
+}
+
+bool socket_address_can_accept(const SocketAddress *a) _pure_;
+
+int socket_address_listen(
+ const SocketAddress *a,
+ int flags,
+ int backlog,
+ SocketAddressBindIPv6Only only,
+ const char *bind_to_device,
+ bool reuse_port,
+ bool free_bind,
+ bool transparent,
+ mode_t directory_mode,
+ mode_t socket_mode,
+ const char *label);
+
+int socket_address_verify(const SocketAddress *a, bool strict) _pure_;
+int socket_address_print(const SocketAddress *a, char **p);
+bool socket_address_matches_fd(const SocketAddress *a, int fd);
+
+bool socket_address_equal(const SocketAddress *a, const SocketAddress *b) _pure_;
+
+const char* socket_address_get_path(const SocketAddress *a);
+
+bool socket_ipv6_is_supported(void);
+
+int sockaddr_port(const struct sockaddr *_sa, unsigned *port);
+
+int sockaddr_pretty(const struct sockaddr *_sa, socklen_t salen, bool translate_ipv6, bool include_port, char **ret);
+int getpeername_pretty(int fd, bool include_port, char **ret);
+int getsockname_pretty(int fd, char **ret);
+
+int socknameinfo_pretty(union sockaddr_union *sa, socklen_t salen, char **_ret);
+
+const char* socket_address_bind_ipv6_only_to_string(SocketAddressBindIPv6Only b) _const_;
+SocketAddressBindIPv6Only socket_address_bind_ipv6_only_from_string(const char *s) _pure_;
+SocketAddressBindIPv6Only socket_address_bind_ipv6_only_or_bool_from_string(const char *s);
+
+int netlink_family_to_string_alloc(int b, char **s);
+int netlink_family_from_string(const char *s) _pure_;
+
+bool sockaddr_equal(const union sockaddr_union *a, const union sockaddr_union *b);
+
+int fd_set_sndbuf(int fd, size_t n, bool increase);
+static inline int fd_inc_sndbuf(int fd, size_t n) {
+ return fd_set_sndbuf(fd, n, true);
+}
+int fd_set_rcvbuf(int fd, size_t n, bool increase);
+static inline int fd_inc_rcvbuf(int fd, size_t n) {
+ return fd_set_rcvbuf(fd, n, true);
+}
+
+int ip_tos_to_string_alloc(int i, char **s);
+int ip_tos_from_string(const char *s);
+
+typedef enum {
+ IFNAME_VALID_ALTERNATIVE = 1 << 0,
+ IFNAME_VALID_NUMERIC = 1 << 1,
+ _IFNAME_VALID_ALL = IFNAME_VALID_ALTERNATIVE | IFNAME_VALID_NUMERIC,
+} IfnameValidFlags;
+bool ifname_valid_full(const char *p, IfnameValidFlags flags);
+static inline bool ifname_valid(const char *p) {
+ return ifname_valid_full(p, 0);
+}
+bool address_label_valid(const char *p);
+
+int getpeercred(int fd, struct ucred *ucred);
+int getpeersec(int fd, char **ret);
+int getpeergroups(int fd, gid_t **ret);
+
+ssize_t send_one_fd_iov_sa(
+ int transport_fd,
+ int fd,
+ struct iovec *iov, size_t iovlen,
+ const struct sockaddr *sa, socklen_t len,
+ int flags);
+int send_one_fd_sa(int transport_fd,
+ int fd,
+ const struct sockaddr *sa, socklen_t len,
+ int flags);
+#define send_one_fd_iov(transport_fd, fd, iov, iovlen, flags) send_one_fd_iov_sa(transport_fd, fd, iov, iovlen, NULL, 0, flags)
+#define send_one_fd(transport_fd, fd, flags) send_one_fd_iov_sa(transport_fd, fd, NULL, 0, NULL, 0, flags)
+ssize_t receive_one_fd_iov(int transport_fd, struct iovec *iov, size_t iovlen, int flags, int *ret_fd);
+int receive_one_fd(int transport_fd, int flags);
+
+ssize_t next_datagram_size_fd(int fd);
+
+int flush_accept(int fd);
+
+#define CMSG_FOREACH(cmsg, mh) \
+ for ((cmsg) = CMSG_FIRSTHDR(mh); (cmsg); (cmsg) = CMSG_NXTHDR((mh), (cmsg)))
+
+struct cmsghdr* cmsg_find(struct msghdr *mh, int level, int type, socklen_t length);
+
+/* Type-safe, dereferencing version of cmsg_find() */
+#define CMSG_FIND_DATA(mh, level, type, ctype) \
+ ({ \
+ struct cmsghdr *_found; \
+ _found = cmsg_find(mh, level, type, CMSG_LEN(sizeof(ctype))); \
+ (ctype*) (_found ? CMSG_DATA(_found) : NULL); \
+ })
+
+/* Resolves to a type that can carry cmsghdr structures. Make sure things are properly aligned, i.e. the type
+ * itself is placed properly in memory and the size is also aligned to what's appropriate for "cmsghdr"
+ * structures. */
+#define CMSG_BUFFER_TYPE(size) \
+ union { \
+ struct cmsghdr cmsghdr; \
+ uint8_t buf[size]; \
+ uint8_t align_check[(size) >= CMSG_SPACE(0) && \
+ (size) == CMSG_ALIGN(size) ? 1 : -1]; \
+ }
+
+/*
+ * Certain hardware address types (e.g Infiniband) do not fit into sll_addr
+ * (8 bytes) and run over the structure. This macro returns the correct size that
+ * must be passed to kernel.
+ */
+#define SOCKADDR_LL_LEN(sa) \
+ ({ \
+ const struct sockaddr_ll *_sa = &(sa); \
+ size_t _mac_len = sizeof(_sa->sll_addr); \
+ assert(_sa->sll_family == AF_PACKET); \
+ if (be16toh(_sa->sll_hatype) == ARPHRD_ETHER) \
+ _mac_len = MAX(_mac_len, (size_t) ETH_ALEN); \
+ if (be16toh(_sa->sll_hatype) == ARPHRD_INFINIBAND) \
+ _mac_len = MAX(_mac_len, (size_t) INFINIBAND_ALEN); \
+ offsetof(struct sockaddr_ll, sll_addr) + _mac_len; \
+ })
+
+/* Covers only file system and abstract AF_UNIX socket addresses, but not unnamed socket addresses. */
+#define SOCKADDR_UN_LEN(sa) \
+ ({ \
+ const struct sockaddr_un *_sa = &(sa); \
+ assert(_sa->sun_family == AF_UNIX); \
+ offsetof(struct sockaddr_un, sun_path) + \
+ (_sa->sun_path[0] == 0 ? \
+ 1 + strnlen(_sa->sun_path+1, sizeof(_sa->sun_path)-1) : \
+ strnlen(_sa->sun_path, sizeof(_sa->sun_path))+1); \
+ })
+
+#define SOCKADDR_LEN(sa) \
+ ({ \
+ const union sockaddr_union *__sa = &(sa); \
+ size_t _len; \
+ switch(__sa->sa.sa_family) { \
+ case AF_INET: \
+ _len = sizeof(struct sockaddr_in); \
+ break; \
+ case AF_INET6: \
+ _len = sizeof(struct sockaddr_in6); \
+ break; \
+ case AF_UNIX: \
+ _len = SOCKADDR_UN_LEN(__sa->un); \
+ break; \
+ case AF_PACKET: \
+ _len = SOCKADDR_LL_LEN(__sa->ll); \
+ break; \
+ case AF_NETLINK: \
+ _len = sizeof(struct sockaddr_nl); \
+ break; \
+ case AF_VSOCK: \
+ _len = sizeof(struct sockaddr_vm); \
+ break; \
+ default: \
+ assert_not_reached("invalid socket family"); \
+ } \
+ _len; \
+ })
+
+int socket_ioctl_fd(void);
+
+int sockaddr_un_set_path(struct sockaddr_un *ret, const char *path);
+
+static inline int setsockopt_int(int fd, int level, int optname, int value) {
+ if (setsockopt(fd, level, optname, &value, sizeof(value)) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int socket_bind_to_ifname(int fd, const char *ifname);
+int socket_bind_to_ifindex(int fd, int ifindex);
+
+ssize_t recvmsg_safe(int sockfd, struct msghdr *msg, int flags);
+
+int socket_get_family(int fd, int *ret);
+int socket_set_recvpktinfo(int fd, int af, bool b);
+int socket_set_recverr(int fd, int af, bool b);
+int socket_set_recvttl(int fd, int af, bool b);
+int socket_set_ttl(int fd, int af, int ttl);
+int socket_set_unicast_if(int fd, int af, int ifi);
+int socket_set_freebind(int fd, int af, bool b);
+int socket_set_transparent(int fd, int af, bool b);
diff --git a/src/basic/sort-util.c b/src/basic/sort-util.c
new file mode 100644
index 0000000..92d7b85
--- /dev/null
+++ b/src/basic/sort-util.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sort-util.h"
+#include "alloc-util.h"
+
+/* hey glibc, APIs with callbacks without a user pointer are so useless */
+void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
+ __compar_d_fn_t compar, void *arg) {
+ size_t l, u, idx;
+ const void *p;
+ int comparison;
+
+ assert(!size_multiply_overflow(nmemb, size));
+
+ l = 0;
+ u = nmemb;
+ while (l < u) {
+ idx = (l + u) / 2;
+ p = (const uint8_t*) base + idx * size;
+ comparison = compar(key, p, arg);
+ if (comparison < 0)
+ u = idx;
+ else if (comparison > 0)
+ l = idx + 1;
+ else
+ return (void *)p;
+ }
+ return NULL;
+}
diff --git a/src/basic/sort-util.h b/src/basic/sort-util.h
new file mode 100644
index 0000000..1d194a1
--- /dev/null
+++ b/src/basic/sort-util.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdlib.h>
+
+#include "macro.h"
+
+void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
+ __compar_d_fn_t compar, void *arg);
+
+#define typesafe_bsearch_r(k, b, n, func, userdata) \
+ ({ \
+ const typeof(b[0]) *_k = k; \
+ int (*_func_)(const typeof(b[0])*, const typeof(b[0])*, typeof(userdata)) = func; \
+ xbsearch_r((const void*) _k, (b), (n), sizeof((b)[0]), (__compar_d_fn_t) _func_, userdata); \
+ })
+
+/**
+ * Normal bsearch requires base to be nonnull. Here were require
+ * that only if nmemb > 0.
+ */
+static inline void* bsearch_safe(const void *key, const void *base,
+ size_t nmemb, size_t size, __compar_fn_t compar) {
+ if (nmemb <= 0)
+ return NULL;
+
+ assert(base);
+ return bsearch(key, base, nmemb, size, compar);
+}
+
+#define typesafe_bsearch(k, b, n, func) \
+ ({ \
+ const typeof(b[0]) *_k = k; \
+ int (*_func_)(const typeof(b[0])*, const typeof(b[0])*) = func; \
+ bsearch_safe((const void*) _k, (b), (n), sizeof((b)[0]), (__compar_fn_t) _func_); \
+ })
+
+/**
+ * Normal qsort requires base to be nonnull. Here were require
+ * that only if nmemb > 0.
+ */
+static inline void _qsort_safe(void *base, size_t nmemb, size_t size, __compar_fn_t compar) {
+ if (nmemb <= 1)
+ return;
+
+ assert(base);
+ qsort(base, nmemb, size, compar);
+}
+
+/* A wrapper around the above, but that adds typesafety: the element size is automatically derived from the type and so
+ * is the prototype for the comparison function */
+#define typesafe_qsort(p, n, func) \
+ ({ \
+ int (*_func_)(const typeof(p[0])*, const typeof(p[0])*) = func; \
+ _qsort_safe((p), (n), sizeof((p)[0]), (__compar_fn_t) _func_); \
+ })
+
+static inline void qsort_r_safe(void *base, size_t nmemb, size_t size, __compar_d_fn_t compar, void *userdata) {
+ if (nmemb <= 1)
+ return;
+
+ assert(base);
+ qsort_r(base, nmemb, size, compar, userdata);
+}
+
+#define typesafe_qsort_r(p, n, func, userdata) \
+ ({ \
+ int (*_func_)(const typeof(p[0])*, const typeof(p[0])*, typeof(userdata)) = func; \
+ qsort_r_safe((p), (n), sizeof((p)[0]), (__compar_d_fn_t) _func_, userdata); \
+ })
diff --git a/src/basic/sparse-endian.h b/src/basic/sparse-endian.h
new file mode 100644
index 0000000..9583dda
--- /dev/null
+++ b/src/basic/sparse-endian.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * Copyright (c) 2012 Josh Triplett <josh@joshtriplett.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#pragma once
+
+#include <byteswap.h>
+#include <endian.h>
+#include <stdint.h>
+
+#ifdef __CHECKER__
+#define __sd_bitwise __attribute__((__bitwise__))
+#define __sd_force __attribute__((__force__))
+#else
+#define __sd_bitwise
+#define __sd_force
+#endif
+
+typedef uint16_t __sd_bitwise le16_t;
+typedef uint16_t __sd_bitwise be16_t;
+typedef uint32_t __sd_bitwise le32_t;
+typedef uint32_t __sd_bitwise be32_t;
+typedef uint64_t __sd_bitwise le64_t;
+typedef uint64_t __sd_bitwise be64_t;
+
+#undef htobe16
+#undef htole16
+#undef be16toh
+#undef le16toh
+#undef htobe32
+#undef htole32
+#undef be32toh
+#undef le32toh
+#undef htobe64
+#undef htole64
+#undef be64toh
+#undef le64toh
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define bswap_16_on_le(x) __bswap_16(x)
+#define bswap_32_on_le(x) __bswap_32(x)
+#define bswap_64_on_le(x) __bswap_64(x)
+#define bswap_16_on_be(x) (x)
+#define bswap_32_on_be(x) (x)
+#define bswap_64_on_be(x) (x)
+#elif __BYTE_ORDER == __BIG_ENDIAN
+#define bswap_16_on_le(x) (x)
+#define bswap_32_on_le(x) (x)
+#define bswap_64_on_le(x) (x)
+#define bswap_16_on_be(x) __bswap_16(x)
+#define bswap_32_on_be(x) __bswap_32(x)
+#define bswap_64_on_be(x) __bswap_64(x)
+#endif
+
+static inline le16_t htole16(uint16_t value) { return (le16_t __sd_force) bswap_16_on_be(value); }
+static inline le32_t htole32(uint32_t value) { return (le32_t __sd_force) bswap_32_on_be(value); }
+static inline le64_t htole64(uint64_t value) { return (le64_t __sd_force) bswap_64_on_be(value); }
+
+static inline be16_t htobe16(uint16_t value) { return (be16_t __sd_force) bswap_16_on_le(value); }
+static inline be32_t htobe32(uint32_t value) { return (be32_t __sd_force) bswap_32_on_le(value); }
+static inline be64_t htobe64(uint64_t value) { return (be64_t __sd_force) bswap_64_on_le(value); }
+
+static inline uint16_t le16toh(le16_t value) { return bswap_16_on_be((uint16_t __sd_force)value); }
+static inline uint32_t le32toh(le32_t value) { return bswap_32_on_be((uint32_t __sd_force)value); }
+static inline uint64_t le64toh(le64_t value) { return bswap_64_on_be((uint64_t __sd_force)value); }
+
+static inline uint16_t be16toh(be16_t value) { return bswap_16_on_le((uint16_t __sd_force)value); }
+static inline uint32_t be32toh(be32_t value) { return bswap_32_on_le((uint32_t __sd_force)value); }
+static inline uint64_t be64toh(be64_t value) { return bswap_64_on_le((uint64_t __sd_force)value); }
+
+#undef __sd_bitwise
+#undef __sd_force
diff --git a/src/basic/special.h b/src/basic/special.h
new file mode 100644
index 0000000..d55b328
--- /dev/null
+++ b/src/basic/special.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#define SPECIAL_DEFAULT_TARGET "default.target"
+#define SPECIAL_INITRD_TARGET "initrd.target"
+
+/* Shutdown targets */
+#define SPECIAL_UMOUNT_TARGET "umount.target"
+/* This is not really intended to be started by directly. This is
+ * mostly so that other targets (reboot/halt/poweroff) can depend on
+ * it to bring all services down that want to be brought down on
+ * system shutdown. */
+#define SPECIAL_SHUTDOWN_TARGET "shutdown.target"
+#define SPECIAL_HALT_TARGET "halt.target"
+#define SPECIAL_POWEROFF_TARGET "poweroff.target"
+#define SPECIAL_REBOOT_TARGET "reboot.target"
+#define SPECIAL_KEXEC_TARGET "kexec.target"
+#define SPECIAL_EXIT_TARGET "exit.target"
+#define SPECIAL_SUSPEND_TARGET "suspend.target"
+#define SPECIAL_HIBERNATE_TARGET "hibernate.target"
+#define SPECIAL_HYBRID_SLEEP_TARGET "hybrid-sleep.target"
+#define SPECIAL_SUSPEND_THEN_HIBERNATE_TARGET "suspend-then-hibernate.target"
+
+/* Special boot targets */
+#define SPECIAL_RESCUE_TARGET "rescue.target"
+#define SPECIAL_EMERGENCY_TARGET "emergency.target"
+#define SPECIAL_MULTI_USER_TARGET "multi-user.target"
+#define SPECIAL_GRAPHICAL_TARGET "graphical.target"
+
+/* Early boot targets */
+#define SPECIAL_SYSINIT_TARGET "sysinit.target"
+#define SPECIAL_SOCKETS_TARGET "sockets.target"
+#define SPECIAL_TIMERS_TARGET "timers.target"
+#define SPECIAL_PATHS_TARGET "paths.target"
+#define SPECIAL_LOCAL_FS_TARGET "local-fs.target"
+#define SPECIAL_LOCAL_FS_PRE_TARGET "local-fs-pre.target"
+#define SPECIAL_INITRD_FS_TARGET "initrd-fs.target"
+#define SPECIAL_INITRD_ROOT_DEVICE_TARGET "initrd-root-device.target"
+#define SPECIAL_INITRD_ROOT_FS_TARGET "initrd-root-fs.target"
+#define SPECIAL_REMOTE_FS_TARGET "remote-fs.target" /* LSB's $remote_fs */
+#define SPECIAL_REMOTE_FS_PRE_TARGET "remote-fs-pre.target"
+#define SPECIAL_SWAP_TARGET "swap.target"
+#define SPECIAL_NETWORK_ONLINE_TARGET "network-online.target"
+#define SPECIAL_TIME_SYNC_TARGET "time-sync.target" /* LSB's $time */
+#define SPECIAL_BASIC_TARGET "basic.target"
+
+/* LSB compatibility */
+#define SPECIAL_NETWORK_TARGET "network.target" /* LSB's $network */
+#define SPECIAL_NSS_LOOKUP_TARGET "nss-lookup.target" /* LSB's $named */
+#define SPECIAL_RPCBIND_TARGET "rpcbind.target" /* LSB's $portmap */
+
+/*
+ * Rules regarding adding further high level targets like the above:
+ *
+ * - Be conservative, only add more of these when we really need
+ * them. We need strong usecases for further additions.
+ *
+ * - When there can be multiple implementations running side-by-side,
+ * it needs to be a .target unit which can pull in all
+ * implementations.
+ *
+ * - If something can be implemented with socket activation, and
+ * without, it needs to be a .target unit, so that it can pull in
+ * the appropriate unit.
+ *
+ * - Otherwise, it should be a .service unit.
+ *
+ * - In some cases it is OK to have both a .service and a .target
+ * unit, i.e. if there can be multiple parallel implementations, but
+ * only one is the "system" one. Example: syslog.
+ *
+ * Or to put this in other words: .service symlinks can be used to
+ * arbitrate between multiple implementations if there can be only one
+ * of a kind. .target units can be used to support multiple
+ * implementations that can run side-by-side.
+ */
+
+/* Magic early boot services */
+#define SPECIAL_FSCK_SERVICE "systemd-fsck@.service"
+#define SPECIAL_FSCK_ROOT_SERVICE "systemd-fsck-root.service"
+#define SPECIAL_QUOTACHECK_SERVICE "systemd-quotacheck.service"
+#define SPECIAL_QUOTAON_SERVICE "quotaon.service"
+#define SPECIAL_REMOUNT_FS_SERVICE "systemd-remount-fs.service"
+#define SPECIAL_VOLATILE_ROOT_SERVICE "systemd-volatile-root.service"
+#define SPECIAL_UDEVD_SERVICE "systemd-udevd.service"
+
+/* Services systemd relies on */
+#define SPECIAL_DBUS_SERVICE "dbus.service"
+#define SPECIAL_DBUS_SOCKET "dbus.socket"
+#define SPECIAL_JOURNALD_SOCKET "systemd-journald.socket"
+#define SPECIAL_JOURNALD_SERVICE "systemd-journald.service"
+#define SPECIAL_TMPFILES_SETUP_SERVICE "systemd-tmpfiles-setup.service"
+
+/* Magic init signals */
+#define SPECIAL_KBREQUEST_TARGET "kbrequest.target"
+#define SPECIAL_SIGPWR_TARGET "sigpwr.target"
+#define SPECIAL_CTRL_ALT_DEL_TARGET "ctrl-alt-del.target"
+
+/* Where we add all our system units, users and machines by default */
+#define SPECIAL_SYSTEM_SLICE "system.slice"
+#define SPECIAL_USER_SLICE "user.slice"
+#define SPECIAL_MACHINE_SLICE "machine.slice"
+#define SPECIAL_ROOT_SLICE "-.slice"
+
+/* The scope unit systemd itself lives in. */
+#define SPECIAL_INIT_SCOPE "init.scope"
+
+/* The root directory. */
+#define SPECIAL_ROOT_MOUNT "-.mount"
+
+/* Special slices valid for the user instance */
+#define SPECIAL_SESSION_SLICE "session.slice"
+#define SPECIAL_APP_SLICE "app.slice"
+#define SPECIAL_BACKGROUND_SLICE "background.slice"
diff --git a/src/basic/stat-util.c b/src/basic/stat-util.c
new file mode 100644
index 0000000..f999681
--- /dev/null
+++ b/src/basic/stat-util.c
@@ -0,0 +1,472 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <sys/statvfs.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "missing_fs.h"
+#include "missing_magic.h"
+#include "missing_syscall.h"
+#include "parse-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+
+int is_symlink(const char *path) {
+ struct stat info;
+
+ assert(path);
+
+ if (lstat(path, &info) < 0)
+ return -errno;
+
+ return !!S_ISLNK(info.st_mode);
+}
+
+int is_dir(const char* path, bool follow) {
+ struct stat st;
+ int r;
+
+ assert(path);
+
+ if (follow)
+ r = stat(path, &st);
+ else
+ r = lstat(path, &st);
+ if (r < 0)
+ return -errno;
+
+ return !!S_ISDIR(st.st_mode);
+}
+
+int is_dir_fd(int fd) {
+ struct stat st;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ return !!S_ISDIR(st.st_mode);
+}
+
+int is_device_node(const char *path) {
+ struct stat info;
+
+ assert(path);
+
+ if (lstat(path, &info) < 0)
+ return -errno;
+
+ return !!(S_ISBLK(info.st_mode) || S_ISCHR(info.st_mode));
+}
+
+int dir_is_empty_at(int dir_fd, const char *path) {
+ _cleanup_close_ int fd = -1;
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ if (path)
+ fd = openat(dir_fd, path, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+ else
+ fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (fd < 0)
+ return -errno;
+
+ d = take_fdopendir(&fd);
+ if (!d)
+ return -errno;
+
+ FOREACH_DIRENT(de, d, return -errno)
+ return 0;
+
+ return 1;
+}
+
+bool null_or_empty(struct stat *st) {
+ assert(st);
+
+ if (S_ISREG(st->st_mode) && st->st_size <= 0)
+ return true;
+
+ /* We don't want to hardcode the major/minor of /dev/null, hence we do a simpler "is this a character
+ * device node?" check. */
+
+ if (S_ISCHR(st->st_mode))
+ return true;
+
+ return false;
+}
+
+int null_or_empty_path(const char *fn) {
+ struct stat st;
+
+ assert(fn);
+
+ /* If we have the path, let's do an easy text comparison first. */
+ if (path_equal(fn, "/dev/null"))
+ return true;
+
+ if (stat(fn, &st) < 0)
+ return -errno;
+
+ return null_or_empty(&st);
+}
+
+int null_or_empty_fd(int fd) {
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ return null_or_empty(&st);
+}
+
+int path_is_read_only_fs(const char *path) {
+ struct statvfs st;
+
+ assert(path);
+
+ if (statvfs(path, &st) < 0)
+ return -errno;
+
+ if (st.f_flag & ST_RDONLY)
+ return true;
+
+ /* On NFS, statvfs() might not reflect whether we can actually
+ * write to the remote share. Let's try again with
+ * access(W_OK) which is more reliable, at least sometimes. */
+ if (access(path, W_OK) < 0 && errno == EROFS)
+ return true;
+
+ return false;
+}
+
+int files_same(const char *filea, const char *fileb, int flags) {
+ struct stat a, b;
+
+ assert(filea);
+ assert(fileb);
+
+ if (fstatat(AT_FDCWD, filea, &a, flags) < 0)
+ return -errno;
+
+ if (fstatat(AT_FDCWD, fileb, &b, flags) < 0)
+ return -errno;
+
+ return a.st_dev == b.st_dev &&
+ a.st_ino == b.st_ino;
+}
+
+bool is_fs_type(const struct statfs *s, statfs_f_type_t magic_value) {
+ assert(s);
+ assert_cc(sizeof(statfs_f_type_t) >= sizeof(s->f_type));
+
+ return F_TYPE_EQUAL(s->f_type, magic_value);
+}
+
+int fd_is_fs_type(int fd, statfs_f_type_t magic_value) {
+ struct statfs s;
+
+ if (fstatfs(fd, &s) < 0)
+ return -errno;
+
+ return is_fs_type(&s, magic_value);
+}
+
+int path_is_fs_type(const char *path, statfs_f_type_t magic_value) {
+ struct statfs s;
+
+ if (statfs(path, &s) < 0)
+ return -errno;
+
+ return is_fs_type(&s, magic_value);
+}
+
+bool is_temporary_fs(const struct statfs *s) {
+ return is_fs_type(s, TMPFS_MAGIC) ||
+ is_fs_type(s, RAMFS_MAGIC);
+}
+
+bool is_network_fs(const struct statfs *s) {
+ return is_fs_type(s, CIFS_MAGIC_NUMBER) ||
+ is_fs_type(s, CODA_SUPER_MAGIC) ||
+ is_fs_type(s, NCP_SUPER_MAGIC) ||
+ is_fs_type(s, NFS_SUPER_MAGIC) ||
+ is_fs_type(s, SMB_SUPER_MAGIC) ||
+ is_fs_type(s, V9FS_MAGIC) ||
+ is_fs_type(s, AFS_SUPER_MAGIC) ||
+ is_fs_type(s, OCFS2_SUPER_MAGIC);
+}
+
+int fd_is_temporary_fs(int fd) {
+ struct statfs s;
+
+ if (fstatfs(fd, &s) < 0)
+ return -errno;
+
+ return is_temporary_fs(&s);
+}
+
+int fd_is_network_fs(int fd) {
+ struct statfs s;
+
+ if (fstatfs(fd, &s) < 0)
+ return -errno;
+
+ return is_network_fs(&s);
+}
+
+int path_is_temporary_fs(const char *path) {
+ struct statfs s;
+
+ if (statfs(path, &s) < 0)
+ return -errno;
+
+ return is_temporary_fs(&s);
+}
+
+int stat_verify_regular(const struct stat *st) {
+ assert(st);
+
+ /* Checks whether the specified stat() structure refers to a regular file. If not returns an appropriate error
+ * code. */
+
+ if (S_ISDIR(st->st_mode))
+ return -EISDIR;
+
+ if (S_ISLNK(st->st_mode))
+ return -ELOOP;
+
+ if (!S_ISREG(st->st_mode))
+ return -EBADFD;
+
+ return 0;
+}
+
+int fd_verify_regular(int fd) {
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ return stat_verify_regular(&st);
+}
+
+int stat_verify_directory(const struct stat *st) {
+ assert(st);
+
+ if (S_ISLNK(st->st_mode))
+ return -ELOOP;
+
+ if (!S_ISDIR(st->st_mode))
+ return -ENOTDIR;
+
+ return 0;
+}
+
+int fd_verify_directory(int fd) {
+ struct stat st;
+
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ return stat_verify_directory(&st);
+}
+
+int device_path_make_major_minor(mode_t mode, dev_t devno, char **ret) {
+ const char *t;
+
+ /* Generates the /dev/{char|block}/MAJOR:MINOR path for a dev_t */
+
+ if (S_ISCHR(mode))
+ t = "char";
+ else if (S_ISBLK(mode))
+ t = "block";
+ else
+ return -ENODEV;
+
+ if (asprintf(ret, "/dev/%s/%u:%u", t, major(devno), minor(devno)) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int device_path_make_canonical(mode_t mode, dev_t devno, char **ret) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ /* Finds the canonical path for a device, i.e. resolves the /dev/{char|block}/MAJOR:MINOR path to the end. */
+
+ assert(ret);
+
+ if (major(devno) == 0 && minor(devno) == 0) {
+ char *s;
+
+ /* A special hack to make sure our 'inaccessible' device nodes work. They won't have symlinks in
+ * /dev/block/ and /dev/char/, hence we handle them specially here. */
+
+ if (S_ISCHR(mode))
+ s = strdup("/run/systemd/inaccessible/chr");
+ else if (S_ISBLK(mode))
+ s = strdup("/run/systemd/inaccessible/blk");
+ else
+ return -ENODEV;
+
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+ }
+
+ r = device_path_make_major_minor(mode, devno, &p);
+ if (r < 0)
+ return r;
+
+ return chase_symlinks(p, NULL, 0, ret, NULL);
+}
+
+int device_path_parse_major_minor(const char *path, mode_t *ret_mode, dev_t *ret_devno) {
+ mode_t mode;
+ dev_t devno;
+ int r;
+
+ /* Tries to extract the major/minor directly from the device path if we can. Handles /dev/block/ and /dev/char/
+ * paths, as well out synthetic inaccessible device nodes. Never goes to disk. Returns -ENODEV if the device
+ * path cannot be parsed like this. */
+
+ if (path_equal(path, "/run/systemd/inaccessible/chr")) {
+ mode = S_IFCHR;
+ devno = makedev(0, 0);
+ } else if (path_equal(path, "/run/systemd/inaccessible/blk")) {
+ mode = S_IFBLK;
+ devno = makedev(0, 0);
+ } else {
+ const char *w;
+
+ w = path_startswith(path, "/dev/block/");
+ if (w)
+ mode = S_IFBLK;
+ else {
+ w = path_startswith(path, "/dev/char/");
+ if (!w)
+ return -ENODEV;
+
+ mode = S_IFCHR;
+ }
+
+ r = parse_dev(w, &devno);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_mode)
+ *ret_mode = mode;
+ if (ret_devno)
+ *ret_devno = devno;
+
+ return 0;
+}
+
+int proc_mounted(void) {
+ int r;
+
+ /* A quick check of procfs is properly mounted */
+
+ r = path_is_fs_type("/proc/", PROC_SUPER_MAGIC);
+ if (r == -ENOENT) /* not mounted at all */
+ return false;
+
+ return r;
+}
+
+bool stat_inode_unmodified(const struct stat *a, const struct stat *b) {
+
+ /* Returns if the specified stat structures reference the same, unmodified inode. This check tries to
+ * be reasonably careful when detecting changes: we check both inode and mtime, to cater for file
+ * systems where mtimes are fixed to 0 (think: ostree/nixos type installations). We also check file
+ * size, backing device, inode type and if this refers to a device not the major/minor.
+ *
+ * Note that we don't care if file attributes such as ownership or access mode change, this here is
+ * about contents of the file. The purpose here is to detect file contents changes, and nothing
+ * else. */
+
+ return a && b &&
+ (a->st_mode & S_IFMT) != 0 && /* We use the check for .st_mode if the structure was ever initialized */
+ ((a->st_mode ^ b->st_mode) & S_IFMT) == 0 && /* same inode type */
+ a->st_mtim.tv_sec == b->st_mtim.tv_sec &&
+ a->st_mtim.tv_nsec == b->st_mtim.tv_nsec &&
+ (!S_ISREG(a->st_mode) || a->st_size == b->st_size) && /* if regular file, compare file size */
+ a->st_dev == b->st_dev &&
+ a->st_ino == b->st_ino &&
+ (!(S_ISCHR(a->st_mode) || S_ISBLK(a->st_mode)) || a->st_rdev == b->st_rdev); /* if device node, also compare major/minor, because we can */
+}
+
+int statx_fallback(int dfd, const char *path, int flags, unsigned mask, struct statx *sx) {
+ static bool avoid_statx = false;
+ struct stat st;
+
+ if (!avoid_statx) {
+ if (statx(dfd, path, flags, mask, sx) < 0) {
+ if (!ERRNO_IS_NOT_SUPPORTED(errno) && errno != EPERM)
+ return -errno;
+
+ /* If statx() is not supported or if we see EPERM (which might indicate seccomp
+ * filtering or so), let's do a fallback. Not that on EACCES we'll not fall back,
+ * since that is likely an indication of fs access issues, which we should
+ * propagate */
+ } else
+ return 0;
+
+ avoid_statx = true;
+ }
+
+ /* Only do fallback if fstatat() supports the flag too, or if it's one of the sync flags, which are
+ * OK to ignore */
+ if ((flags & ~(AT_EMPTY_PATH|AT_NO_AUTOMOUNT|AT_SYMLINK_NOFOLLOW|
+ AT_STATX_SYNC_AS_STAT|AT_STATX_FORCE_SYNC|AT_STATX_DONT_SYNC)) != 0)
+ return -EOPNOTSUPP;
+
+ if (fstatat(dfd, path, &st, flags & (AT_EMPTY_PATH|AT_NO_AUTOMOUNT|AT_SYMLINK_NOFOLLOW)) < 0)
+ return -errno;
+
+ *sx = (struct statx) {
+ .stx_mask = STATX_TYPE|STATX_MODE|
+ STATX_NLINK|STATX_UID|STATX_GID|
+ STATX_ATIME|STATX_MTIME|STATX_CTIME|
+ STATX_INO|STATX_SIZE|STATX_BLOCKS,
+ .stx_blksize = st.st_blksize,
+ .stx_nlink = st.st_nlink,
+ .stx_uid = st.st_uid,
+ .stx_gid = st.st_gid,
+ .stx_mode = st.st_mode,
+ .stx_ino = st.st_ino,
+ .stx_size = st.st_size,
+ .stx_blocks = st.st_blocks,
+ .stx_rdev_major = major(st.st_rdev),
+ .stx_rdev_minor = minor(st.st_rdev),
+ .stx_dev_major = major(st.st_dev),
+ .stx_dev_minor = minor(st.st_dev),
+ .stx_atime.tv_sec = st.st_atim.tv_sec,
+ .stx_atime.tv_nsec = st.st_atim.tv_nsec,
+ .stx_mtime.tv_sec = st.st_mtim.tv_sec,
+ .stx_mtime.tv_nsec = st.st_mtim.tv_nsec,
+ .stx_ctime.tv_sec = st.st_ctim.tv_sec,
+ .stx_ctime.tv_nsec = st.st_ctim.tv_nsec,
+ };
+
+ return 0;
+}
diff --git a/src/basic/stat-util.h b/src/basic/stat-util.h
new file mode 100644
index 0000000..a566114
--- /dev/null
+++ b/src/basic/stat-util.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+
+#include "macro.h"
+#include "missing_stat.h"
+
+int is_symlink(const char *path);
+int is_dir(const char *path, bool follow);
+int is_dir_fd(int fd);
+int is_device_node(const char *path);
+
+int dir_is_empty_at(int dir_fd, const char *path);
+static inline int dir_is_empty(const char *path) {
+ return dir_is_empty_at(AT_FDCWD, path);
+}
+
+static inline int dir_is_populated(const char *path) {
+ int r;
+ r = dir_is_empty(path);
+ if (r < 0)
+ return r;
+ return !r;
+}
+
+bool null_or_empty(struct stat *st) _pure_;
+int null_or_empty_path(const char *fn);
+int null_or_empty_fd(int fd);
+
+int path_is_read_only_fs(const char *path);
+
+int files_same(const char *filea, const char *fileb, int flags);
+
+/* The .f_type field of struct statfs is really weird defined on
+ * different archs. Let's give its type a name. */
+typedef typeof(((struct statfs*)NULL)->f_type) statfs_f_type_t;
+
+bool is_fs_type(const struct statfs *s, statfs_f_type_t magic_value) _pure_;
+int fd_is_fs_type(int fd, statfs_f_type_t magic_value);
+int path_is_fs_type(const char *path, statfs_f_type_t magic_value);
+
+bool is_temporary_fs(const struct statfs *s) _pure_;
+bool is_network_fs(const struct statfs *s) _pure_;
+
+int fd_is_temporary_fs(int fd);
+int fd_is_network_fs(int fd);
+
+int path_is_temporary_fs(const char *path);
+
+/* Because statfs.t_type can be int on some architectures, we have to cast
+ * the const magic to the type, otherwise the compiler warns about
+ * signed/unsigned comparison, because the magic can be 32 bit unsigned.
+ */
+#define F_TYPE_EQUAL(a, b) (a == (typeof(a)) b)
+
+int stat_verify_regular(const struct stat *st);
+int fd_verify_regular(int fd);
+
+int stat_verify_directory(const struct stat *st);
+int fd_verify_directory(int fd);
+
+/* glibc and the Linux kernel have different ideas about the major/minor size. These calls will check whether the
+ * specified major is valid by the Linux kernel's standards, not by glibc's. Linux has 20bits of minor, and 12 bits of
+ * major space. See MINORBITS in linux/kdev_t.h in the kernel sources. (If you wonder why we define _y here, instead of
+ * comparing directly >= 0: it's to trick out -Wtype-limits, which would otherwise complain if the type is unsigned, as
+ * such a test would be pointless in such a case.) */
+
+#define DEVICE_MAJOR_VALID(x) \
+ ({ \
+ typeof(x) _x = (x), _y = 0; \
+ _x >= _y && _x < (UINT32_C(1) << 12); \
+ \
+ })
+
+#define DEVICE_MINOR_VALID(x) \
+ ({ \
+ typeof(x) _x = (x), _y = 0; \
+ _x >= _y && _x < (UINT32_C(1) << 20); \
+ })
+
+int device_path_make_major_minor(mode_t mode, dev_t devno, char **ret);
+int device_path_make_canonical(mode_t mode, dev_t devno, char **ret);
+int device_path_parse_major_minor(const char *path, mode_t *ret_mode, dev_t *ret_devno);
+
+int proc_mounted(void);
+
+bool stat_inode_unmodified(const struct stat *a, const struct stat *b);
+
+int statx_fallback(int dfd, const char *path, int flags, unsigned mask, struct statx *sx);
+
+#if HAS_FEATURE_MEMORY_SANITIZER
+# warning "Explicitly initializing struct statx, to work around msan limitation. Please remove as soon as msan has been updated to not require this."
+# define STRUCT_STATX_DEFINE(var) \
+ struct statx var = {}
+# define STRUCT_NEW_STATX_DEFINE(var) \
+ union { \
+ struct statx sx; \
+ struct new_statx nsx; \
+ } var = {}
+#else
+# define STRUCT_STATX_DEFINE(var) \
+ struct statx var
+# define STRUCT_NEW_STATX_DEFINE(var) \
+ union { \
+ struct statx sx; \
+ struct new_statx nsx; \
+ } var
+#endif
diff --git a/src/basic/static-destruct.h b/src/basic/static-destruct.h
new file mode 100644
index 0000000..0f96132
--- /dev/null
+++ b/src/basic/static-destruct.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include "alloc-util.h"
+#include "macro.h"
+
+/* A framework for registering static variables that shall be freed on shutdown of a process. It's a bit like gcc's
+ * destructor attribute, but allows us to precisely schedule when we want to free the variables. This is supposed to
+ * feel a bit like the gcc cleanup attribute, but for static variables. Note that this does not work for static
+ * variables declared in .so's, as the list is private to the same linking unit. But maybe that's a good thing. */
+
+typedef struct StaticDestructor {
+ void *data;
+ free_func_t destroy;
+} StaticDestructor;
+
+#define STATIC_DESTRUCTOR_REGISTER(variable, func) \
+ _STATIC_DESTRUCTOR_REGISTER(UNIQ, variable, func)
+
+#define _STATIC_DESTRUCTOR_REGISTER(uq, variable, func) \
+ /* Type-safe destructor */ \
+ static void UNIQ_T(static_destructor_wrapper, uq)(void *p) { \
+ typeof(variable) *q = p; \
+ func(q); \
+ } \
+ /* The actual destructor structure we place in a special section to find it */ \
+ _section_("SYSTEMD_STATIC_DESTRUCT") \
+ /* We pick pointer alignment, since that is apparently what gcc does for static variables */ \
+ _alignptr_ \
+ /* Make sure this is not dropped from the image because not explicitly referenced */ \
+ _used_ \
+ /* Make sure that AddressSanitizer doesn't pad this variable: we want everything in this section packed next to each other so that we can enumerate it. */ \
+ _variable_no_sanitize_address_ \
+ static const StaticDestructor UNIQ_T(static_destructor_entry, uq) = { \
+ .data = &(variable), \
+ .destroy = UNIQ_T(static_destructor_wrapper, uq), \
+ }
+
+/* Beginning and end of our section listing the destructors. We define these as weak as we want this to work even if
+ * there's not a single destructor is defined in which case the section will be missing. */
+extern const struct StaticDestructor _weak_ __start_SYSTEMD_STATIC_DESTRUCT[];
+extern const struct StaticDestructor _weak_ __stop_SYSTEMD_STATIC_DESTRUCT[];
+
+/* The function to destroy everything. (Note that this must be static inline, as it's key that it remains in
+ * the same linking unit as the variables we want to destroy.) */
+static inline void static_destruct(void) {
+ const StaticDestructor *d;
+
+ if (!__start_SYSTEMD_STATIC_DESTRUCT)
+ return;
+
+ d = ALIGN_TO_PTR(__start_SYSTEMD_STATIC_DESTRUCT, sizeof(void*));
+ while (d < __stop_SYSTEMD_STATIC_DESTRUCT) {
+ d->destroy(d->data);
+ d = ALIGN_TO_PTR(d + 1, sizeof(void*));
+ }
+}
diff --git a/src/basic/stdio-util.h b/src/basic/stdio-util.h
new file mode 100644
index 0000000..6dc1e72
--- /dev/null
+++ b/src/basic/stdio-util.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <printf.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "memory-util.h"
+
+#define snprintf_ok(buf, len, fmt, ...) \
+ ((size_t) snprintf(buf, len, fmt, __VA_ARGS__) < (len))
+
+#define xsprintf(buf, fmt, ...) \
+ assert_message_se(snprintf_ok(buf, ELEMENTSOF(buf), fmt, __VA_ARGS__), "xsprintf: " #buf "[] must be big enough")
+
+#define VA_FORMAT_ADVANCE(format, ap) \
+do { \
+ int _argtypes[128]; \
+ size_t _i, _k; \
+ /* See https://github.com/google/sanitizers/issues/992 */ \
+ if (HAS_FEATURE_MEMORY_SANITIZER) \
+ zero(_argtypes); \
+ _k = parse_printf_format((format), ELEMENTSOF(_argtypes), _argtypes); \
+ assert(_k < ELEMENTSOF(_argtypes)); \
+ for (_i = 0; _i < _k; _i++) { \
+ if (_argtypes[_i] & PA_FLAG_PTR) { \
+ (void) va_arg(ap, void*); \
+ continue; \
+ } \
+ \
+ switch (_argtypes[_i]) { \
+ case PA_INT: \
+ case PA_INT|PA_FLAG_SHORT: \
+ case PA_CHAR: \
+ (void) va_arg(ap, int); \
+ break; \
+ case PA_INT|PA_FLAG_LONG: \
+ (void) va_arg(ap, long int); \
+ break; \
+ case PA_INT|PA_FLAG_LONG_LONG: \
+ (void) va_arg(ap, long long int); \
+ break; \
+ case PA_WCHAR: \
+ (void) va_arg(ap, wchar_t); \
+ break; \
+ case PA_WSTRING: \
+ case PA_STRING: \
+ case PA_POINTER: \
+ (void) va_arg(ap, void*); \
+ break; \
+ case PA_FLOAT: \
+ case PA_DOUBLE: \
+ (void) va_arg(ap, double); \
+ break; \
+ case PA_DOUBLE|PA_FLAG_LONG_DOUBLE: \
+ (void) va_arg(ap, long double); \
+ break; \
+ default: \
+ assert_not_reached("Unknown format string argument."); \
+ } \
+ } \
+} while (false)
diff --git a/src/basic/strbuf.c b/src/basic/strbuf.c
new file mode 100644
index 0000000..aee6647
--- /dev/null
+++ b/src/basic/strbuf.c
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "sort-util.h"
+#include "strbuf.h"
+
+/*
+ * Strbuf stores given strings in a single continuous allocated memory
+ * area. Identical strings are de-duplicated and return the same offset
+ * as the first string stored. If the tail of a string already exists
+ * in the buffer, the tail is returned.
+ *
+ * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the
+ * information about the stored strings.
+ *
+ * Example of udev rules:
+ * $ ./udevadm test .
+ * ...
+ * read rules file: /usr/lib/udev/rules.d/99-systemd.rules
+ * rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings
+ * 23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used
+ * ...
+ */
+
+struct strbuf *strbuf_new(void) {
+ struct strbuf *str;
+
+ str = new(struct strbuf, 1);
+ if (!str)
+ return NULL;
+ *str = (struct strbuf) {
+ .buf = new0(char, 1),
+ .root = new0(struct strbuf_node, 1),
+ .len = 1,
+ .nodes_count = 1,
+ };
+ if (!str->buf || !str->root) {
+ free(str->buf);
+ free(str->root);
+ return mfree(str);
+ }
+
+ return str;
+}
+
+static struct strbuf_node* strbuf_node_cleanup(struct strbuf_node *node) {
+ size_t i;
+
+ for (i = 0; i < node->children_count; i++)
+ strbuf_node_cleanup(node->children[i].child);
+ free(node->children);
+ return mfree(node);
+}
+
+/* clean up trie data, leave only the string buffer */
+void strbuf_complete(struct strbuf *str) {
+ if (!str)
+ return;
+ if (str->root)
+ str->root = strbuf_node_cleanup(str->root);
+}
+
+/* clean up everything */
+void strbuf_cleanup(struct strbuf *str) {
+ if (!str)
+ return;
+
+ strbuf_complete(str);
+ free(str->buf);
+ free(str);
+}
+
+static int strbuf_children_cmp(const struct strbuf_child_entry *n1,
+ const struct strbuf_child_entry *n2) {
+ return n1->c - n2->c;
+}
+
+static void bubbleinsert(struct strbuf_node *node,
+ uint8_t c,
+ struct strbuf_node *node_child) {
+
+ struct strbuf_child_entry new = {
+ .c = c,
+ .child = node_child,
+ };
+ int left = 0, right = node->children_count;
+
+ while (right > left) {
+ int middle = (right + left) / 2 ;
+ if (strbuf_children_cmp(&node->children[middle], &new) <= 0)
+ left = middle + 1;
+ else
+ right = middle;
+ }
+
+ memmove(node->children + left + 1, node->children + left,
+ sizeof(struct strbuf_child_entry) * (node->children_count - left));
+ node->children[left] = new;
+
+ node->children_count++;
+}
+
+/* add string, return the index/offset into the buffer */
+ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) {
+ uint8_t c;
+ struct strbuf_node *node;
+ size_t depth;
+ char *buf_new;
+ struct strbuf_child_entry *child;
+ struct strbuf_node *node_child;
+ ssize_t off;
+
+ if (!str->root)
+ return -EINVAL;
+
+ /* search string; start from last character to find possibly matching tails */
+
+ str->in_count++;
+ if (len == 0) {
+ str->dedup_count++;
+ return 0;
+ }
+ str->in_len += len;
+
+ node = str->root;
+ for (depth = 0; depth <= len; depth++) {
+ struct strbuf_child_entry search;
+
+ /* match against current node */
+ off = node->value_off + node->value_len - len;
+ if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) {
+ str->dedup_len += len;
+ str->dedup_count++;
+ return off;
+ }
+
+ c = s[len - 1 - depth];
+
+ /* lookup child node */
+ search.c = c;
+ child = typesafe_bsearch(&search, node->children, node->children_count, strbuf_children_cmp);
+ if (!child)
+ break;
+ node = child->child;
+ }
+
+ /* add new string */
+ buf_new = realloc(str->buf, str->len + len+1);
+ if (!buf_new)
+ return -ENOMEM;
+ str->buf = buf_new;
+ off = str->len;
+ memcpy(str->buf + off, s, len);
+ str->len += len;
+ str->buf[str->len++] = '\0';
+
+ /* new node */
+ node_child = new(struct strbuf_node, 1);
+ if (!node_child)
+ return -ENOMEM;
+ *node_child = (struct strbuf_node) {
+ .value_off = off,
+ .value_len = len,
+ };
+
+ /* extend array, add new entry, sort for bisection */
+ child = reallocarray(node->children, node->children_count + 1, sizeof(struct strbuf_child_entry));
+ if (!child) {
+ free(node_child);
+ return -ENOMEM;
+ }
+
+ str->nodes_count++;
+
+ node->children = child;
+ bubbleinsert(node, c, node_child);
+
+ return off;
+}
diff --git a/src/basic/strbuf.h b/src/basic/strbuf.h
new file mode 100644
index 0000000..82758d7
--- /dev/null
+++ b/src/basic/strbuf.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+struct strbuf {
+ char *buf;
+ size_t len;
+ struct strbuf_node *root;
+
+ size_t nodes_count;
+ size_t in_count;
+ size_t in_len;
+ size_t dedup_len;
+ size_t dedup_count;
+};
+
+struct strbuf_node {
+ size_t value_off;
+ size_t value_len;
+
+ struct strbuf_child_entry *children;
+ uint8_t children_count;
+};
+
+struct strbuf_child_entry {
+ uint8_t c;
+ struct strbuf_node *child;
+};
+
+struct strbuf *strbuf_new(void);
+ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len);
+void strbuf_complete(struct strbuf *str);
+void strbuf_cleanup(struct strbuf *str);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct strbuf*, strbuf_cleanup);
diff --git a/src/basic/string-table.c b/src/basic/string-table.c
new file mode 100644
index 0000000..116021d
--- /dev/null
+++ b/src/basic/string-table.c
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "string-table.h"
+#include "string-util.h"
+
+ssize_t string_table_lookup(const char * const *table, size_t len, const char *key) {
+ if (!key)
+ return -1;
+
+ for (size_t i = 0; i < len; ++i)
+ if (streq_ptr(table[i], key))
+ return (ssize_t) i;
+
+ return -1;
+}
diff --git a/src/basic/string-table.h b/src/basic/string-table.h
new file mode 100644
index 0000000..b6b3611
--- /dev/null
+++ b/src/basic/string-table.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+ssize_t string_table_lookup(const char * const *table, size_t len, const char *key);
+
+/* For basic lookup tables with strictly enumerated entries */
+#define _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,scope) \
+ scope const char *name##_to_string(type i) { \
+ if (i < 0 || i >= (type) ELEMENTSOF(name##_table)) \
+ return NULL; \
+ return name##_table[i]; \
+ }
+
+#define _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,scope) \
+ scope type name##_from_string(const char *s) { \
+ return (type) string_table_lookup(name##_table, ELEMENTSOF(name##_table), s); \
+ }
+
+#define _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(name,type,yes,scope) \
+ scope type name##_from_string(const char *s) { \
+ int b; \
+ if (!s) \
+ return -1; \
+ b = parse_boolean(s); \
+ if (b == 0) \
+ return (type) 0; \
+ else if (b > 0) \
+ return yes; \
+ return (type) string_table_lookup(name##_table, ELEMENTSOF(name##_table), s); \
+ }
+
+#define _DEFINE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max,scope) \
+ scope int name##_to_string_alloc(type i, char **str) { \
+ char *s; \
+ if (i < 0 || i > max) \
+ return -ERANGE; \
+ if (i < (type) ELEMENTSOF(name##_table) && name##_table[i]) { \
+ s = strdup(name##_table[i]); \
+ if (!s) \
+ return -ENOMEM; \
+ } else { \
+ if (asprintf(&s, "%i", i) < 0) \
+ return -ENOMEM; \
+ } \
+ *str = s; \
+ return 0; \
+ }
+
+#define _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max,scope) \
+ scope type name##_from_string(const char *s) { \
+ unsigned u = 0; \
+ type i; \
+ if (!s) \
+ return (type) -1; \
+ i = (type) string_table_lookup(name##_table, ELEMENTSOF(name##_table), s); \
+ if (i >= 0) \
+ return i; \
+ if (safe_atou(s, &u) >= 0 && u <= max) \
+ return (type) u; \
+ return (type) -1; \
+ } \
+
+#define _DEFINE_STRING_TABLE_LOOKUP(name,type,scope) \
+ _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,scope) \
+ _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,scope)
+
+#define _DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(name,type,yes,scope) \
+ _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,scope) \
+ _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(name,type,yes,scope)
+
+#define DEFINE_STRING_TABLE_LOOKUP(name,type) _DEFINE_STRING_TABLE_LOOKUP(name,type,)
+#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP(name,type) _DEFINE_STRING_TABLE_LOOKUP(name,type,static)
+#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(name,type) _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,static)
+#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(name,type) _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,static)
+
+#define DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(name,type,yes) _DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(name,type,yes,)
+
+/* For string conversions where numbers are also acceptable */
+#define DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(name,type,max) \
+ _DEFINE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max,) \
+ _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max,)
+
+#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max) \
+ _DEFINE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max,static)
+#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max) \
+ _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max,static)
+
+#define DUMP_STRING_TABLE(name,type,max) \
+ do { \
+ type _k; \
+ flockfile(stdout); \
+ for (_k = 0; _k < (max); _k++) { \
+ const char *_t; \
+ _t = name##_to_string(_k); \
+ if (!_t) \
+ continue; \
+ fputs_unlocked(_t, stdout); \
+ fputc_unlocked('\n', stdout); \
+ } \
+ funlockfile(stdout); \
+ } while(false)
diff --git a/src/basic/string-util.c b/src/basic/string-util.c
new file mode 100644
index 0000000..7ab460f
--- /dev/null
+++ b/src/basic/string-util.c
@@ -0,0 +1,1136 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "extract-word.h"
+#include "fileio.h"
+#include "gunicode.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "utf8.h"
+#include "util.h"
+
+int strcmp_ptr(const char *a, const char *b) {
+ /* Like strcmp(), but tries to make sense of NULL pointers */
+
+ if (a && b)
+ return strcmp(a, b);
+ return CMP(a, b); /* Direct comparison of pointers, one of which is NULL */
+}
+
+int strcasecmp_ptr(const char *a, const char *b) {
+ /* Like strcasecmp(), but tries to make sense of NULL pointers */
+
+ if (a && b)
+ return strcasecmp(a, b);
+ return CMP(a, b); /* Direct comparison of pointers, one of which is NULL */
+}
+
+char* endswith(const char *s, const char *postfix) {
+ size_t sl, pl;
+
+ assert(s);
+ assert(postfix);
+
+ sl = strlen(s);
+ pl = strlen(postfix);
+
+ if (pl == 0)
+ return (char*) s + sl;
+
+ if (sl < pl)
+ return NULL;
+
+ if (memcmp(s + sl - pl, postfix, pl) != 0)
+ return NULL;
+
+ return (char*) s + sl - pl;
+}
+
+char* endswith_no_case(const char *s, const char *postfix) {
+ size_t sl, pl;
+
+ assert(s);
+ assert(postfix);
+
+ sl = strlen(s);
+ pl = strlen(postfix);
+
+ if (pl == 0)
+ return (char*) s + sl;
+
+ if (sl < pl)
+ return NULL;
+
+ if (strcasecmp(s + sl - pl, postfix) != 0)
+ return NULL;
+
+ return (char*) s + sl - pl;
+}
+
+char* first_word(const char *s, const char *word) {
+ size_t sl, wl;
+ const char *p;
+
+ assert(s);
+ assert(word);
+
+ /* Checks if the string starts with the specified word, either
+ * followed by NUL or by whitespace. Returns a pointer to the
+ * NUL or the first character after the whitespace. */
+
+ sl = strlen(s);
+ wl = strlen(word);
+
+ if (sl < wl)
+ return NULL;
+
+ if (wl == 0)
+ return (char*) s;
+
+ if (memcmp(s, word, wl) != 0)
+ return NULL;
+
+ p = s + wl;
+ if (*p == 0)
+ return (char*) p;
+
+ if (!strchr(WHITESPACE, *p))
+ return NULL;
+
+ p += strspn(p, WHITESPACE);
+ return (char*) p;
+}
+
+char *strnappend(const char *s, const char *suffix, size_t b) {
+ size_t a;
+ char *r;
+
+ if (!s && !suffix)
+ return strdup("");
+
+ if (!s)
+ return strndup(suffix, b);
+
+ if (!suffix)
+ return strdup(s);
+
+ assert(s);
+ assert(suffix);
+
+ a = strlen(s);
+ if (b > ((size_t) -1) - a)
+ return NULL;
+
+ r = new(char, a+b+1);
+ if (!r)
+ return NULL;
+
+ memcpy(r, s, a);
+ memcpy(r+a, suffix, b);
+ r[a+b] = 0;
+
+ return r;
+}
+
+char *strjoin_real(const char *x, ...) {
+ va_list ap;
+ size_t l = 1;
+ char *r, *p;
+
+ va_start(ap, x);
+ for (const char *t = x; t; t = va_arg(ap, const char *)) {
+ size_t n;
+
+ n = strlen(t);
+ if (n > SIZE_MAX - l) {
+ va_end(ap);
+ return NULL;
+ }
+ l += n;
+ }
+ va_end(ap);
+
+ p = r = new(char, l);
+ if (!r)
+ return NULL;
+
+ va_start(ap, x);
+ for (const char *t = x; t; t = va_arg(ap, const char *))
+ p = stpcpy(p, t);
+ va_end(ap);
+
+ *p = 0;
+
+ return r;
+}
+
+char *strstrip(char *s) {
+ if (!s)
+ return NULL;
+
+ /* Drops trailing whitespace. Modifies the string in place. Returns pointer to first non-space character */
+
+ return delete_trailing_chars(skip_leading_chars(s, WHITESPACE), WHITESPACE);
+}
+
+char *delete_chars(char *s, const char *bad) {
+ char *f, *t;
+
+ /* Drops all specified bad characters, regardless where in the string */
+
+ if (!s)
+ return NULL;
+
+ if (!bad)
+ bad = WHITESPACE;
+
+ for (f = s, t = s; *f; f++) {
+ if (strchr(bad, *f))
+ continue;
+
+ *(t++) = *f;
+ }
+
+ *t = 0;
+
+ return s;
+}
+
+char *delete_trailing_chars(char *s, const char *bad) {
+ char *p, *c = s;
+
+ /* Drops all specified bad characters, at the end of the string */
+
+ if (!s)
+ return NULL;
+
+ if (!bad)
+ bad = WHITESPACE;
+
+ for (p = s; *p; p++)
+ if (!strchr(bad, *p))
+ c = p + 1;
+
+ *c = 0;
+
+ return s;
+}
+
+char *truncate_nl(char *s) {
+ assert(s);
+
+ s[strcspn(s, NEWLINE)] = 0;
+ return s;
+}
+
+char ascii_tolower(char x) {
+
+ if (x >= 'A' && x <= 'Z')
+ return x - 'A' + 'a';
+
+ return x;
+}
+
+char ascii_toupper(char x) {
+
+ if (x >= 'a' && x <= 'z')
+ return x - 'a' + 'A';
+
+ return x;
+}
+
+char *ascii_strlower(char *t) {
+ char *p;
+
+ assert(t);
+
+ for (p = t; *p; p++)
+ *p = ascii_tolower(*p);
+
+ return t;
+}
+
+char *ascii_strupper(char *t) {
+ char *p;
+
+ assert(t);
+
+ for (p = t; *p; p++)
+ *p = ascii_toupper(*p);
+
+ return t;
+}
+
+char *ascii_strlower_n(char *t, size_t n) {
+ size_t i;
+
+ if (n <= 0)
+ return t;
+
+ for (i = 0; i < n; i++)
+ t[i] = ascii_tolower(t[i]);
+
+ return t;
+}
+
+int ascii_strcasecmp_n(const char *a, const char *b, size_t n) {
+
+ for (; n > 0; a++, b++, n--) {
+ int x, y;
+
+ x = (int) (uint8_t) ascii_tolower(*a);
+ y = (int) (uint8_t) ascii_tolower(*b);
+
+ if (x != y)
+ return x - y;
+ }
+
+ return 0;
+}
+
+int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) {
+ int r;
+
+ r = ascii_strcasecmp_n(a, b, MIN(n, m));
+ if (r != 0)
+ return r;
+
+ return CMP(n, m);
+}
+
+bool chars_intersect(const char *a, const char *b) {
+ const char *p;
+
+ /* Returns true if any of the chars in a are in b. */
+ for (p = a; *p; p++)
+ if (strchr(b, *p))
+ return true;
+
+ return false;
+}
+
+bool string_has_cc(const char *p, const char *ok) {
+ const char *t;
+
+ assert(p);
+
+ /*
+ * Check if a string contains control characters. If 'ok' is
+ * non-NULL it may be a string containing additional CCs to be
+ * considered OK.
+ */
+
+ for (t = p; *t; t++) {
+ if (ok && strchr(ok, *t))
+ continue;
+
+ if (*t > 0 && *t < ' ')
+ return true;
+
+ if (*t == 127)
+ return true;
+ }
+
+ return false;
+}
+
+static int write_ellipsis(char *buf, bool unicode) {
+ if (unicode || is_locale_utf8()) {
+ buf[0] = 0xe2; /* tri-dot ellipsis: … */
+ buf[1] = 0x80;
+ buf[2] = 0xa6;
+ } else {
+ buf[0] = '.';
+ buf[1] = '.';
+ buf[2] = '.';
+ }
+
+ return 3;
+}
+
+static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
+ size_t x, need_space, suffix_len;
+ char *t;
+
+ assert(s);
+ assert(percent <= 100);
+ assert(new_length != (size_t) -1);
+
+ if (old_length <= new_length)
+ return strndup(s, old_length);
+
+ /* Special case short ellipsations */
+ switch (new_length) {
+
+ case 0:
+ return strdup("");
+
+ case 1:
+ if (is_locale_utf8())
+ return strdup("…");
+ else
+ return strdup(".");
+
+ case 2:
+ if (!is_locale_utf8())
+ return strdup("..");
+
+ break;
+
+ default:
+ break;
+ }
+
+ /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one
+ * character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage,
+ * either for the UTF-8 encoded character or for three ASCII characters. */
+ need_space = is_locale_utf8() ? 1 : 3;
+
+ t = new(char, new_length+3);
+ if (!t)
+ return NULL;
+
+ assert(new_length >= need_space);
+
+ x = ((new_length - need_space) * percent + 50) / 100;
+ assert(x <= new_length - need_space);
+
+ memcpy(t, s, x);
+ write_ellipsis(t + x, false);
+ suffix_len = new_length - x - need_space;
+ memcpy(t + x + 3, s + old_length - suffix_len, suffix_len);
+ *(t + x + 3 + suffix_len) = '\0';
+
+ return t;
+}
+
+char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
+ size_t x, k, len, len2;
+ const char *i, *j;
+ char *e;
+ int r;
+
+ /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up
+ * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8
+ * strings.
+ *
+ * Ellipsation is done in a locale-dependent way:
+ * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...")
+ * 2. Otherwise, a unicode ellipsis is used ("…")
+ *
+ * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or
+ * the current locale is UTF-8.
+ */
+
+ assert(s);
+ assert(percent <= 100);
+
+ if (new_length == (size_t) -1)
+ return strndup(s, old_length);
+
+ if (new_length == 0)
+ return strdup("");
+
+ /* If no multibyte characters use ascii_ellipsize_mem for speed */
+ if (ascii_is_valid_n(s, old_length))
+ return ascii_ellipsize_mem(s, old_length, new_length, percent);
+
+ x = ((new_length - 1) * percent) / 100;
+ assert(x <= new_length - 1);
+
+ k = 0;
+ for (i = s; i < s + old_length; i = utf8_next_char(i)) {
+ char32_t c;
+ int w;
+
+ r = utf8_encoded_to_unichar(i, &c);
+ if (r < 0)
+ return NULL;
+
+ w = unichar_iswide(c) ? 2 : 1;
+ if (k + w <= x)
+ k += w;
+ else
+ break;
+ }
+
+ for (j = s + old_length; j > i; ) {
+ char32_t c;
+ int w;
+ const char *jj;
+
+ jj = utf8_prev_char(j);
+ r = utf8_encoded_to_unichar(jj, &c);
+ if (r < 0)
+ return NULL;
+
+ w = unichar_iswide(c) ? 2 : 1;
+ if (k + w <= new_length) {
+ k += w;
+ j = jj;
+ } else
+ break;
+ }
+ assert(i <= j);
+
+ /* we don't actually need to ellipsize */
+ if (i == j)
+ return memdup_suffix0(s, old_length);
+
+ /* make space for ellipsis, if possible */
+ if (j < s + old_length)
+ j = utf8_next_char(j);
+ else if (i > s)
+ i = utf8_prev_char(i);
+
+ len = i - s;
+ len2 = s + old_length - j;
+ e = new(char, len + 3 + len2 + 1);
+ if (!e)
+ return NULL;
+
+ /*
+ printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
+ old_length, new_length, x, len, len2, k);
+ */
+
+ memcpy(e, s, len);
+ write_ellipsis(e + len, true);
+ memcpy(e + len + 3, j, len2);
+ *(e + len + 3 + len2) = '\0';
+
+ return e;
+}
+
+char *cellescape(char *buf, size_t len, const char *s) {
+ /* Escape and ellipsize s into buffer buf of size len. Only non-control ASCII
+ * characters are copied as they are, everything else is escaped. The result
+ * is different then if escaping and ellipsization was performed in two
+ * separate steps, because each sequence is either stored in full or skipped.
+ *
+ * This function should be used for logging about strings which expected to
+ * be plain ASCII in a safe way.
+ *
+ * An ellipsis will be used if s is too long. It was always placed at the
+ * very end.
+ */
+
+ size_t i = 0, last_char_width[4] = {}, k = 0, j;
+
+ assert(len > 0); /* at least a terminating NUL */
+
+ for (;;) {
+ char four[4];
+ int w;
+
+ if (*s == 0) /* terminating NUL detected? then we are done! */
+ goto done;
+
+ w = cescape_char(*s, four);
+ if (i + w + 1 > len) /* This character doesn't fit into the buffer anymore? In that case let's
+ * ellipsize at the previous location */
+ break;
+
+ /* OK, there was space, let's add this escaped character to the buffer */
+ memcpy(buf + i, four, w);
+ i += w;
+
+ /* And remember its width in the ring buffer */
+ last_char_width[k] = w;
+ k = (k + 1) % 4;
+
+ s++;
+ }
+
+ /* Ellipsation is necessary. This means we might need to truncate the string again to make space for 4
+ * characters ideally, but the buffer is shorter than that in the first place take what we can get */
+ for (j = 0; j < ELEMENTSOF(last_char_width); j++) {
+
+ if (i + 4 <= len) /* nice, we reached our space goal */
+ break;
+
+ k = k == 0 ? 3 : k - 1;
+ if (last_char_width[k] == 0) /* bummer, we reached the beginning of the strings */
+ break;
+
+ assert(i >= last_char_width[k]);
+ i -= last_char_width[k];
+ }
+
+ if (i + 4 <= len) /* yay, enough space */
+ i += write_ellipsis(buf + i, false);
+ else if (i + 3 <= len) { /* only space for ".." */
+ buf[i++] = '.';
+ buf[i++] = '.';
+ } else if (i + 2 <= len) /* only space for a single "." */
+ buf[i++] = '.';
+ else
+ assert(i + 1 <= len);
+
+ done:
+ buf[i] = '\0';
+ return buf;
+}
+
+char* strshorten(char *s, size_t l) {
+ assert(s);
+
+ if (strnlen(s, l+1) > l)
+ s[l] = 0;
+
+ return s;
+}
+
+char *strreplace(const char *text, const char *old_string, const char *new_string) {
+ size_t l, old_len, new_len, allocated = 0;
+ char *t, *ret = NULL;
+ const char *f;
+
+ assert(old_string);
+ assert(new_string);
+
+ if (!text)
+ return NULL;
+
+ old_len = strlen(old_string);
+ new_len = strlen(new_string);
+
+ l = strlen(text);
+ if (!GREEDY_REALLOC(ret, allocated, l+1))
+ return NULL;
+
+ f = text;
+ t = ret;
+ while (*f) {
+ size_t d, nl;
+
+ if (!startswith(f, old_string)) {
+ *(t++) = *(f++);
+ continue;
+ }
+
+ d = t - ret;
+ nl = l - old_len + new_len;
+
+ if (!GREEDY_REALLOC(ret, allocated, nl + 1))
+ return mfree(ret);
+
+ l = nl;
+ t = ret + d;
+
+ t = stpcpy(t, new_string);
+ f += old_len;
+ }
+
+ *t = 0;
+ return ret;
+}
+
+static void advance_offsets(
+ ssize_t diff,
+ size_t offsets[2], /* note: we can't use [static 2] here, since this may be NULL */
+ size_t shift[static 2],
+ size_t size) {
+
+ if (!offsets)
+ return;
+
+ assert(shift);
+
+ if ((size_t) diff < offsets[0])
+ shift[0] += size;
+ if ((size_t) diff < offsets[1])
+ shift[1] += size;
+}
+
+char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
+ const char *begin = NULL;
+ enum {
+ STATE_OTHER,
+ STATE_ESCAPE,
+ STATE_CSI,
+ STATE_CSO,
+ } state = STATE_OTHER;
+ char *obuf = NULL;
+ size_t osz = 0, isz, shift[2] = {}, n_carriage_returns = 0;
+ FILE *f;
+
+ assert(ibuf);
+ assert(*ibuf);
+
+ /* This does three things:
+ *
+ * 1. Replaces TABs by 8 spaces
+ * 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' … 'm' sequences
+ * 3. Strips ANSI operating system sequences (CSO), i.e. ESC ']' … BEL sequences
+ * 4. Strip trailing \r characters (since they would "move the cursor", but have no
+ * other effect).
+ *
+ * Everything else will be left as it is. In particular other ANSI sequences are left as they are, as
+ * are any other special characters. Truncated ANSI sequences are left-as is too. This call is
+ * supposed to suppress the most basic formatting noise, but nothing else.
+ *
+ * Why care for CSO sequences? Well, to undo what terminal_urlify() and friends generate. */
+
+ isz = _isz ? *_isz : strlen(*ibuf);
+
+ /* Note we turn off internal locking on f for performance reasons. It's safe to do so since we
+ * created f here and it doesn't leave our scope. */
+ f = open_memstream_unlocked(&obuf, &osz);
+ if (!f)
+ return NULL;
+
+ for (const char *i = *ibuf; i < *ibuf + isz + 1; i++) {
+
+ switch (state) {
+
+ case STATE_OTHER:
+ if (i >= *ibuf + isz) /* EOT */
+ break;
+
+ if (*i == '\r') {
+ n_carriage_returns++;
+ break;
+ } else if (*i == '\n')
+ /* Ignore carriage returns before new line */
+ n_carriage_returns = 0;
+ for (; n_carriage_returns > 0; n_carriage_returns--)
+ fputc('\r', f);
+
+ if (*i == '\x1B')
+ state = STATE_ESCAPE;
+ else if (*i == '\t') {
+ fputs(" ", f);
+ advance_offsets(i - *ibuf, highlight, shift, 7);
+ } else
+ fputc(*i, f);
+
+ break;
+
+ case STATE_ESCAPE:
+ assert(n_carriage_returns == 0);
+
+ if (i >= *ibuf + isz) { /* EOT */
+ fputc('\x1B', f);
+ advance_offsets(i - *ibuf, highlight, shift, 1);
+ break;
+ } else if (*i == '[') { /* ANSI CSI */
+ state = STATE_CSI;
+ begin = i + 1;
+ } else if (*i == ']') { /* ANSI CSO */
+ state = STATE_CSO;
+ begin = i + 1;
+ } else {
+ fputc('\x1B', f);
+ fputc(*i, f);
+ advance_offsets(i - *ibuf, highlight, shift, 1);
+ state = STATE_OTHER;
+ }
+
+ break;
+
+ case STATE_CSI:
+ assert(n_carriage_returns == 0);
+
+ if (i >= *ibuf + isz || /* EOT … */
+ !strchr("01234567890;m", *i)) { /* … or invalid chars in sequence */
+ fputc('\x1B', f);
+ fputc('[', f);
+ advance_offsets(i - *ibuf, highlight, shift, 2);
+ state = STATE_OTHER;
+ i = begin-1;
+ } else if (*i == 'm')
+ state = STATE_OTHER;
+
+ break;
+
+ case STATE_CSO:
+ assert(n_carriage_returns == 0);
+
+ if (i >= *ibuf + isz || /* EOT … */
+ (*i != '\a' && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */
+ fputc('\x1B', f);
+ fputc(']', f);
+ advance_offsets(i - *ibuf, highlight, shift, 2);
+ state = STATE_OTHER;
+ i = begin-1;
+ } else if (*i == '\a')
+ state = STATE_OTHER;
+
+ break;
+ }
+ }
+
+ if (fflush_and_check(f) < 0) {
+ fclose(f);
+ return mfree(obuf);
+ }
+ fclose(f);
+
+ free_and_replace(*ibuf, obuf);
+
+ if (_isz)
+ *_isz = osz;
+
+ if (highlight) {
+ highlight[0] += shift[0];
+ highlight[1] += shift[1];
+ }
+
+ return *ibuf;
+}
+
+char *strextend_with_separator(char **x, const char *separator, ...) {
+ bool need_separator;
+ size_t f, l, l_separator;
+ char *r, *p;
+ va_list ap;
+
+ assert(x);
+
+ l = f = strlen_ptr(*x);
+
+ need_separator = !isempty(*x);
+ l_separator = strlen_ptr(separator);
+
+ va_start(ap, separator);
+ for (;;) {
+ const char *t;
+ size_t n;
+
+ t = va_arg(ap, const char *);
+ if (!t)
+ break;
+
+ n = strlen(t);
+
+ if (need_separator)
+ n += l_separator;
+
+ if (n > ((size_t) -1) - l) {
+ va_end(ap);
+ return NULL;
+ }
+
+ l += n;
+ need_separator = true;
+ }
+ va_end(ap);
+
+ need_separator = !isempty(*x);
+
+ r = realloc(*x, l+1);
+ if (!r)
+ return NULL;
+
+ p = r + f;
+
+ va_start(ap, separator);
+ for (;;) {
+ const char *t;
+
+ t = va_arg(ap, const char *);
+ if (!t)
+ break;
+
+ if (need_separator && separator)
+ p = stpcpy(p, separator);
+
+ p = stpcpy(p, t);
+
+ need_separator = true;
+ }
+ va_end(ap);
+
+ assert(p == r + l);
+
+ *p = 0;
+ *x = r;
+
+ return r + l;
+}
+
+char *strrep(const char *s, unsigned n) {
+ size_t l;
+ char *r, *p;
+ unsigned i;
+
+ assert(s);
+
+ l = strlen(s);
+ p = r = malloc(l * n + 1);
+ if (!r)
+ return NULL;
+
+ for (i = 0; i < n; i++)
+ p = stpcpy(p, s);
+
+ *p = 0;
+ return r;
+}
+
+int split_pair(const char *s, const char *sep, char **l, char **r) {
+ char *x, *a, *b;
+
+ assert(s);
+ assert(sep);
+ assert(l);
+ assert(r);
+
+ if (isempty(sep))
+ return -EINVAL;
+
+ x = strstr(s, sep);
+ if (!x)
+ return -EINVAL;
+
+ a = strndup(s, x - s);
+ if (!a)
+ return -ENOMEM;
+
+ b = strdup(x + strlen(sep));
+ if (!b) {
+ free(a);
+ return -ENOMEM;
+ }
+
+ *l = a;
+ *r = b;
+
+ return 0;
+}
+
+int free_and_strdup(char **p, const char *s) {
+ char *t;
+
+ assert(p);
+
+ /* Replaces a string pointer with a strdup()ed new string,
+ * possibly freeing the old one. */
+
+ if (streq_ptr(*p, s))
+ return 0;
+
+ if (s) {
+ t = strdup(s);
+ if (!t)
+ return -ENOMEM;
+ } else
+ t = NULL;
+
+ free(*p);
+ *p = t;
+
+ return 1;
+}
+
+int free_and_strndup(char **p, const char *s, size_t l) {
+ char *t;
+
+ assert(p);
+ assert(s || l == 0);
+
+ /* Replaces a string pointer with a strndup()ed new string,
+ * freeing the old one. */
+
+ if (!*p && !s)
+ return 0;
+
+ if (*p && s && strneq(*p, s, l) && (l > strlen(*p) || (*p)[l] == '\0'))
+ return 0;
+
+ if (s) {
+ t = strndup(s, l);
+ if (!t)
+ return -ENOMEM;
+ } else
+ t = NULL;
+
+ free_and_replace(*p, t);
+ return 1;
+}
+
+bool string_is_safe(const char *p) {
+ const char *t;
+
+ if (!p)
+ return false;
+
+ /* Checks if the specified string contains no quotes or control characters */
+
+ for (t = p; *t; t++) {
+ if (*t > 0 && *t < ' ') /* no control characters */
+ return false;
+
+ if (strchr(QUOTES "\\\x7f", *t))
+ return false;
+ }
+
+ return true;
+}
+
+char* string_erase(char *x) {
+ if (!x)
+ return NULL;
+
+ /* A delicious drop of snake-oil! To be called on memory where we stored passphrases or so, after we
+ * used them. */
+ explicit_bzero_safe(x, strlen(x));
+ return x;
+}
+
+int string_truncate_lines(const char *s, size_t n_lines, char **ret) {
+ const char *p = s, *e = s;
+ bool truncation_applied = false;
+ char *copy;
+ size_t n = 0;
+
+ assert(s);
+
+ /* Truncate after the specified number of lines. Returns > 0 if a truncation was applied or == 0 if
+ * there were fewer lines in the string anyway. Trailing newlines on input are ignored, and not
+ * generated either. */
+
+ for (;;) {
+ size_t k;
+
+ k = strcspn(p, "\n");
+
+ if (p[k] == 0) {
+ if (k == 0) /* final empty line */
+ break;
+
+ if (n >= n_lines) /* above threshold */
+ break;
+
+ e = p + k; /* last line to include */
+ break;
+ }
+
+ assert(p[k] == '\n');
+
+ if (n >= n_lines)
+ break;
+
+ if (k > 0)
+ e = p + k;
+
+ p += k + 1;
+ n++;
+ }
+
+ /* e points after the last character we want to keep */
+ if (isempty(e))
+ copy = strdup(s);
+ else {
+ if (!in_charset(e, "\n")) /* We only consider things truncated if we remove something that
+ * isn't a new-line or a series of them */
+ truncation_applied = true;
+
+ copy = strndup(s, e - s);
+ }
+ if (!copy)
+ return -ENOMEM;
+
+ *ret = copy;
+ return truncation_applied;
+}
+
+int string_extract_line(const char *s, size_t i, char **ret) {
+ const char *p = s;
+ size_t c = 0;
+
+ /* Extract the i'nth line from the specified string. Returns > 0 if there are more lines after that,
+ * and == 0 if we are looking at the last line or already beyond the last line. As special
+ * optimization, if the first line is requested and the string only consists of one line we return
+ * NULL, indicating the input string should be used as is, and avoid a memory allocation for a very
+ * common case. */
+
+ for (;;) {
+ const char *q;
+
+ q = strchr(p, '\n');
+ if (i == c) {
+ /* The line we are looking for! */
+
+ if (q) {
+ char *m;
+
+ m = strndup(p, q - p);
+ if (!m)
+ return -ENOMEM;
+
+ *ret = m;
+ return !isempty(q + 1); /* more coming? */
+ } else {
+ if (p == s)
+ *ret = NULL; /* Just use the input string */
+ else {
+ char *m;
+
+ m = strdup(p);
+ if (!m)
+ return -ENOMEM;
+
+ *ret = m;
+ }
+
+ return 0; /* The end */
+ }
+ }
+
+ if (!q) {
+ char *m;
+
+ /* No more lines, return empty line */
+
+ m = strdup("");
+ if (!m)
+ return -ENOMEM;
+
+ *ret = m;
+ return 0; /* The end */
+ }
+
+ p = q + 1;
+ c++;
+ }
+}
+
+int string_contains_word_strv(const char *string, const char *separators, char **words, const char **ret_word) {
+ /* In the default mode with no separators specified, we split on whitespace and
+ * don't coalesce separators. */
+ const ExtractFlags flags = separators ? EXTRACT_DONT_COALESCE_SEPARATORS : 0;
+
+ const char *found = NULL;
+
+ for (const char *p = string;;) {
+ _cleanup_free_ char *w = NULL;
+ int r;
+
+ r = extract_first_word(&p, &w, separators, flags);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ found = strv_find(words, w);
+ if (found)
+ break;
+ }
+
+ if (ret_word)
+ *ret_word = found;
+ return !!found;
+}
diff --git a/src/basic/string-util.h b/src/basic/string-util.h
new file mode 100644
index 0000000..fdd3ce7
--- /dev/null
+++ b/src/basic/string-util.h
@@ -0,0 +1,279 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+
+/* What is interpreted as whitespace? */
+#define WHITESPACE " \t\n\r"
+#define NEWLINE "\n\r"
+#define QUOTES "\"\'"
+#define COMMENTS "#;"
+#define GLOB_CHARS "*?["
+#define DIGITS "0123456789"
+#define LOWERCASE_LETTERS "abcdefghijklmnopqrstuvwxyz"
+#define UPPERCASE_LETTERS "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+#define LETTERS LOWERCASE_LETTERS UPPERCASE_LETTERS
+#define ALPHANUMERICAL LETTERS DIGITS
+#define HEXDIGITS DIGITS "abcdefABCDEF"
+
+#define streq(a,b) (strcmp((a),(b)) == 0)
+#define strneq(a, b, n) (strncmp((a), (b), (n)) == 0)
+#define strcaseeq(a,b) (strcasecmp((a),(b)) == 0)
+#define strncaseeq(a, b, n) (strncasecmp((a), (b), (n)) == 0)
+
+int strcmp_ptr(const char *a, const char *b) _pure_;
+int strcasecmp_ptr(const char *a, const char *b) _pure_;
+
+static inline bool streq_ptr(const char *a, const char *b) {
+ return strcmp_ptr(a, b) == 0;
+}
+
+static inline char* strstr_ptr(const char *haystack, const char *needle) {
+ if (!haystack || !needle)
+ return NULL;
+ return strstr(haystack, needle);
+}
+
+static inline const char* strempty(const char *s) {
+ return s ?: "";
+}
+
+static inline const char* strnull(const char *s) {
+ return s ?: "(null)";
+}
+
+static inline const char *strna(const char *s) {
+ return s ?: "n/a";
+}
+
+static inline const char* yes_no(bool b) {
+ return b ? "yes" : "no";
+}
+
+static inline const char* true_false(bool b) {
+ return b ? "true" : "false";
+}
+
+static inline const char* plus_minus(bool b) {
+ return b ? "+" : "-";
+}
+
+static inline const char* one_zero(bool b) {
+ return b ? "1" : "0";
+}
+
+static inline const char* enable_disable(bool b) {
+ return b ? "enable" : "disable";
+}
+
+static inline bool isempty(const char *p) {
+ return !p || !p[0];
+}
+
+static inline const char *empty_to_null(const char *p) {
+ return isempty(p) ? NULL : p;
+}
+
+static inline const char *empty_to_dash(const char *str) {
+ return isempty(str) ? "-" : str;
+}
+
+static inline bool empty_or_dash(const char *str) {
+ return !str ||
+ str[0] == 0 ||
+ (str[0] == '-' && str[1] == 0);
+}
+
+static inline const char *empty_or_dash_to_null(const char *p) {
+ return empty_or_dash(p) ? NULL : p;
+}
+
+static inline char *startswith(const char *s, const char *prefix) {
+ size_t l;
+
+ l = strlen(prefix);
+ if (strncmp(s, prefix, l) == 0)
+ return (char*) s + l;
+
+ return NULL;
+}
+
+static inline char *startswith_no_case(const char *s, const char *prefix) {
+ size_t l;
+
+ l = strlen(prefix);
+ if (strncasecmp(s, prefix, l) == 0)
+ return (char*) s + l;
+
+ return NULL;
+}
+
+char *endswith(const char *s, const char *postfix) _pure_;
+char *endswith_no_case(const char *s, const char *postfix) _pure_;
+
+char *first_word(const char *s, const char *word) _pure_;
+
+char *strnappend(const char *s, const char *suffix, size_t length);
+
+char *strjoin_real(const char *x, ...) _sentinel_;
+#define strjoin(a, ...) strjoin_real((a), __VA_ARGS__, NULL)
+
+#define strjoina(a, ...) \
+ ({ \
+ const char *_appendees_[] = { a, __VA_ARGS__ }; \
+ char *_d_, *_p_; \
+ size_t _len_ = 0; \
+ size_t _i_; \
+ for (_i_ = 0; _i_ < ELEMENTSOF(_appendees_) && _appendees_[_i_]; _i_++) \
+ _len_ += strlen(_appendees_[_i_]); \
+ _p_ = _d_ = newa(char, _len_ + 1); \
+ for (_i_ = 0; _i_ < ELEMENTSOF(_appendees_) && _appendees_[_i_]; _i_++) \
+ _p_ = stpcpy(_p_, _appendees_[_i_]); \
+ *_p_ = 0; \
+ _d_; \
+ })
+
+char *strstrip(char *s);
+char *delete_chars(char *s, const char *bad);
+char *delete_trailing_chars(char *s, const char *bad);
+char *truncate_nl(char *s);
+
+static inline char *skip_leading_chars(const char *s, const char *bad) {
+ if (!s)
+ return NULL;
+
+ if (!bad)
+ bad = WHITESPACE;
+
+ return (char*) s + strspn(s, bad);
+}
+
+char ascii_tolower(char x);
+char *ascii_strlower(char *s);
+char *ascii_strlower_n(char *s, size_t n);
+
+char ascii_toupper(char x);
+char *ascii_strupper(char *s);
+
+int ascii_strcasecmp_n(const char *a, const char *b, size_t n);
+int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m);
+
+bool chars_intersect(const char *a, const char *b) _pure_;
+
+static inline bool _pure_ in_charset(const char *s, const char* charset) {
+ assert(s);
+ assert(charset);
+ return s[strspn(s, charset)] == '\0';
+}
+
+bool string_has_cc(const char *p, const char *ok) _pure_;
+
+char *ellipsize_mem(const char *s, size_t old_length_bytes, size_t new_length_columns, unsigned percent);
+static inline char *ellipsize(const char *s, size_t length, unsigned percent) {
+ return ellipsize_mem(s, strlen(s), length, percent);
+}
+
+char *cellescape(char *buf, size_t len, const char *s);
+
+/* This limit is arbitrary, enough to give some idea what the string contains */
+#define CELLESCAPE_DEFAULT_LENGTH 64
+
+char* strshorten(char *s, size_t l);
+
+char *strreplace(const char *text, const char *old_string, const char *new_string);
+
+char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]);
+
+char *strextend_with_separator(char **x, const char *separator, ...) _sentinel_;
+
+#define strextend(x, ...) strextend_with_separator(x, NULL, __VA_ARGS__)
+
+char *strrep(const char *s, unsigned n);
+
+int split_pair(const char *s, const char *sep, char **l, char **r);
+
+int free_and_strdup(char **p, const char *s);
+static inline int free_and_strdup_warn(char **p, const char *s) {
+ if (free_and_strdup(p, s) < 0)
+ return log_oom();
+ return 0;
+}
+int free_and_strndup(char **p, const char *s, size_t l);
+
+bool string_is_safe(const char *p) _pure_;
+
+static inline size_t strlen_ptr(const char *s) {
+ if (!s)
+ return 0;
+
+ return strlen(s);
+}
+
+DISABLE_WARNING_STRINGOP_TRUNCATION;
+static inline void strncpy_exact(char *buf, const char *src, size_t buf_len) {
+ strncpy(buf, src, buf_len);
+}
+REENABLE_WARNING;
+
+/* Like startswith(), but operates on arbitrary memory blocks */
+static inline void *memory_startswith(const void *p, size_t sz, const char *token) {
+ assert(token);
+
+ size_t n = strlen(token);
+ if (sz < n)
+ return NULL;
+
+ assert(p);
+
+ if (memcmp(p, token, n) != 0)
+ return NULL;
+
+ return (uint8_t*) p + n;
+}
+
+/* Like startswith_no_case(), but operates on arbitrary memory blocks.
+ * It works only for ASCII strings.
+ */
+static inline void *memory_startswith_no_case(const void *p, size_t sz, const char *token) {
+ assert(token);
+
+ size_t n = strlen(token);
+ if (sz < n)
+ return NULL;
+
+ assert(p);
+
+ for (size_t i = 0; i < n; i++)
+ if (ascii_tolower(((char *)p)[i]) != ascii_tolower(token[i]))
+ return NULL;
+
+ return (uint8_t*) p + n;
+}
+
+static inline char* str_realloc(char **p) {
+ /* Reallocate *p to actual size */
+
+ if (!*p)
+ return NULL;
+
+ char *t = realloc(*p, strlen(*p) + 1);
+ if (!t)
+ return NULL;
+
+ return (*p = t);
+}
+
+char* string_erase(char *x);
+
+int string_truncate_lines(const char *s, size_t n_lines, char **ret);
+int string_extract_line(const char *s, size_t i, char **ret);
+
+int string_contains_word_strv(const char *string, const char *separators, char **words, const char **ret_word);
+static inline int string_contains_word(const char *string, const char *separators, const char *word) {
+ return string_contains_word_strv(string, separators, STRV_MAKE(word), NULL);
+}
diff --git a/src/basic/strv.c b/src/basic/strv.c
new file mode 100644
index 0000000..492dfe4
--- /dev/null
+++ b/src/basic/strv.c
@@ -0,0 +1,995 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fnmatch.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "extract-word.h"
+#include "fileio.h"
+#include "memory-util.h"
+#include "nulstr-util.h"
+#include "sort-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+char *strv_find(char * const *l, const char *name) {
+ char * const *i;
+
+ assert(name);
+
+ STRV_FOREACH(i, l)
+ if (streq(*i, name))
+ return *i;
+
+ return NULL;
+}
+
+char *strv_find_case(char * const *l, const char *name) {
+ char * const *i;
+
+ assert(name);
+
+ STRV_FOREACH(i, l)
+ if (strcaseeq(*i, name))
+ return *i;
+
+ return NULL;
+}
+
+char *strv_find_prefix(char * const *l, const char *name) {
+ char * const *i;
+
+ assert(name);
+
+ STRV_FOREACH(i, l)
+ if (startswith(*i, name))
+ return *i;
+
+ return NULL;
+}
+
+char *strv_find_startswith(char * const *l, const char *name) {
+ char * const *i, *e;
+
+ assert(name);
+
+ /* Like strv_find_prefix, but actually returns only the
+ * suffix, not the whole item */
+
+ STRV_FOREACH(i, l) {
+ e = startswith(*i, name);
+ if (e)
+ return e;
+ }
+
+ return NULL;
+}
+
+char **strv_free(char **l) {
+ char **k;
+
+ if (!l)
+ return NULL;
+
+ for (k = l; *k; k++)
+ free(*k);
+
+ return mfree(l);
+}
+
+char **strv_free_erase(char **l) {
+ char **i;
+
+ STRV_FOREACH(i, l)
+ erase_and_freep(i);
+
+ return mfree(l);
+}
+
+char **strv_copy(char * const *l) {
+ char **r, **k;
+
+ k = r = new(char*, strv_length(l) + 1);
+ if (!r)
+ return NULL;
+
+ if (l)
+ for (; *l; k++, l++) {
+ *k = strdup(*l);
+ if (!*k) {
+ strv_free(r);
+ return NULL;
+ }
+ }
+
+ *k = NULL;
+ return r;
+}
+
+size_t strv_length(char * const *l) {
+ size_t n = 0;
+
+ if (!l)
+ return 0;
+
+ for (; *l; l++)
+ n++;
+
+ return n;
+}
+
+char **strv_new_ap(const char *x, va_list ap) {
+ _cleanup_strv_free_ char **a = NULL;
+ size_t n = 0, i = 0;
+ va_list aq;
+
+ /* As a special trick we ignore all listed strings that equal
+ * STRV_IGNORE. This is supposed to be used with the
+ * STRV_IFNOTNULL() macro to include possibly NULL strings in
+ * the string list. */
+
+ va_copy(aq, ap);
+ for (const char *s = x; s; s = va_arg(aq, const char*)) {
+ if (s == STRV_IGNORE)
+ continue;
+
+ n++;
+ }
+ va_end(aq);
+
+ a = new(char*, n+1);
+ if (!a)
+ return NULL;
+
+ for (const char *s = x; s; s = va_arg(ap, const char*)) {
+ if (s == STRV_IGNORE)
+ continue;
+
+ a[i] = strdup(s);
+ if (!a[i])
+ return NULL;
+
+ i++;
+ }
+
+ a[i] = NULL;
+
+ return TAKE_PTR(a);
+}
+
+char **strv_new_internal(const char *x, ...) {
+ char **r;
+ va_list ap;
+
+ va_start(ap, x);
+ r = strv_new_ap(x, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int strv_extend_strv(char ***a, char * const *b, bool filter_duplicates) {
+ char * const *s, **t;
+ size_t p, q, i = 0, j;
+
+ assert(a);
+
+ if (strv_isempty(b))
+ return 0;
+
+ p = strv_length(*a);
+ q = strv_length(b);
+
+ if (p >= SIZE_MAX - q)
+ return -ENOMEM;
+
+ t = reallocarray(*a, GREEDY_ALLOC_ROUND_UP(p + q + 1), sizeof(char *));
+ if (!t)
+ return -ENOMEM;
+
+ t[p] = NULL;
+ *a = t;
+
+ STRV_FOREACH(s, b) {
+
+ if (filter_duplicates && strv_contains(t, *s))
+ continue;
+
+ t[p+i] = strdup(*s);
+ if (!t[p+i])
+ goto rollback;
+
+ i++;
+ t[p+i] = NULL;
+ }
+
+ assert(i <= q);
+
+ return (int) i;
+
+rollback:
+ for (j = 0; j < i; j++)
+ free(t[p + j]);
+
+ t[p] = NULL;
+ return -ENOMEM;
+}
+
+int strv_extend_strv_concat(char ***a, char * const *b, const char *suffix) {
+ char * const *s;
+ int r;
+
+ STRV_FOREACH(s, b) {
+ char *v;
+
+ v = strjoin(*s, suffix);
+ if (!v)
+ return -ENOMEM;
+
+ r = strv_push(a, v);
+ if (r < 0) {
+ free(v);
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+char **strv_split_newlines(const char *s) {
+ char **l;
+ size_t n;
+
+ assert(s);
+
+ /* Special version of strv_split() that splits on newlines and
+ * suppresses an empty string at the end */
+
+ l = strv_split(s, NEWLINE);
+ if (!l)
+ return NULL;
+
+ n = strv_length(l);
+ if (n <= 0)
+ return l;
+
+ if (isempty(l[n - 1]))
+ l[n - 1] = mfree(l[n - 1]);
+
+ return l;
+}
+
+int strv_split_full(char ***t, const char *s, const char *separators, ExtractFlags flags) {
+ _cleanup_strv_free_ char **l = NULL;
+ size_t n = 0, allocated = 0;
+ int r;
+
+ assert(t);
+ assert(s);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&s, &word, separators, flags);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (!GREEDY_REALLOC(l, allocated, n + 2))
+ return -ENOMEM;
+
+ l[n++] = TAKE_PTR(word);
+
+ l[n] = NULL;
+ }
+
+ if (!l) {
+ l = new0(char*, 1);
+ if (!l)
+ return -ENOMEM;
+ }
+
+ *t = TAKE_PTR(l);
+
+ return (int) n;
+}
+
+int strv_split_colon_pairs(char ***t, const char *s) {
+ _cleanup_strv_free_ char **l = NULL;
+ size_t n = 0, allocated = 0;
+ int r;
+
+ assert(t);
+ assert(s);
+
+ for (;;) {
+ _cleanup_free_ char *first = NULL, *second = NULL, *tuple = NULL, *second_or_empty = NULL;
+
+ r = extract_first_word(&s, &tuple, NULL, EXTRACT_UNQUOTE|EXTRACT_RETAIN_ESCAPE);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ const char *p = tuple;
+ r = extract_many_words(&p, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS,
+ &first, &second, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+ /* Enforce that at most 2 colon-separated words are contained in each group */
+ if (!isempty(p))
+ return -EINVAL;
+
+ second_or_empty = strdup(strempty(second));
+ if (!second_or_empty)
+ return -ENOMEM;
+
+ if (!GREEDY_REALLOC(l, allocated, n + 3))
+ return -ENOMEM;
+
+ l[n++] = TAKE_PTR(first);
+ l[n++] = TAKE_PTR(second_or_empty);
+
+ l[n] = NULL;
+ }
+
+ if (!l) {
+ l = new0(char*, 1);
+ if (!l)
+ return -ENOMEM;
+ }
+
+ *t = TAKE_PTR(l);
+
+ return (int) n;
+}
+
+char *strv_join_full(char * const *l, const char *separator, const char *prefix, bool unescape_separators) {
+ char * const *s;
+ char *r, *e;
+ size_t n, k, m;
+
+ if (!separator)
+ separator = " ";
+
+ k = strlen(separator);
+ m = strlen_ptr(prefix);
+
+ if (unescape_separators) /* If there separator is multi-char, we won't know how to escape it. */
+ assert(k == 1);
+
+ n = 0;
+ STRV_FOREACH(s, l) {
+ if (s != l)
+ n += k;
+
+ bool needs_escaping = unescape_separators && strchr(*s, separator[0]);
+
+ n += m + strlen(*s) * (1 + needs_escaping);
+ }
+
+ r = new(char, n+1);
+ if (!r)
+ return NULL;
+
+ e = r;
+ STRV_FOREACH(s, l) {
+ if (s != l)
+ e = stpcpy(e, separator);
+
+ if (prefix)
+ e = stpcpy(e, prefix);
+
+ bool needs_escaping = unescape_separators && strchr(*s, separator[0]);
+
+ if (needs_escaping)
+ for (size_t i = 0; (*s)[i]; i++) {
+ if ((*s)[i] == separator[0])
+ *(e++) = '\\';
+ *(e++) = (*s)[i];
+ }
+ else
+ e = stpcpy(e, *s);
+ }
+
+ *e = 0;
+
+ return r;
+}
+
+int strv_push(char ***l, char *value) {
+ char **c;
+ size_t n;
+
+ if (!value)
+ return 0;
+
+ n = strv_length(*l);
+
+ /* Check for overflow */
+ if (n > SIZE_MAX-2)
+ return -ENOMEM;
+
+ c = reallocarray(*l, GREEDY_ALLOC_ROUND_UP(n + 2), sizeof(char*));
+ if (!c)
+ return -ENOMEM;
+
+ c[n] = value;
+ c[n+1] = NULL;
+
+ *l = c;
+ return 0;
+}
+
+int strv_push_pair(char ***l, char *a, char *b) {
+ char **c;
+ size_t n;
+
+ if (!a && !b)
+ return 0;
+
+ n = strv_length(*l);
+
+ /* Check for overflow */
+ if (n > SIZE_MAX-3)
+ return -ENOMEM;
+
+ /* increase and check for overflow */
+ c = reallocarray(*l, GREEDY_ALLOC_ROUND_UP(n + !!a + !!b + 1), sizeof(char*));
+ if (!c)
+ return -ENOMEM;
+
+ if (a)
+ c[n++] = a;
+ if (b)
+ c[n++] = b;
+ c[n] = NULL;
+
+ *l = c;
+ return 0;
+}
+
+int strv_insert(char ***l, size_t position, char *value) {
+ char **c;
+ size_t n, m, i;
+
+ if (!value)
+ return 0;
+
+ n = strv_length(*l);
+ position = MIN(position, n);
+
+ /* increase and check for overflow */
+ m = n + 2;
+ if (m < n)
+ return -ENOMEM;
+
+ c = new(char*, m);
+ if (!c)
+ return -ENOMEM;
+
+ for (i = 0; i < position; i++)
+ c[i] = (*l)[i];
+ c[position] = value;
+ for (i = position; i < n; i++)
+ c[i+1] = (*l)[i];
+
+ c[n+1] = NULL;
+
+ free(*l);
+ *l = c;
+
+ return 0;
+}
+
+int strv_consume(char ***l, char *value) {
+ int r;
+
+ r = strv_push(l, value);
+ if (r < 0)
+ free(value);
+
+ return r;
+}
+
+int strv_consume_pair(char ***l, char *a, char *b) {
+ int r;
+
+ r = strv_push_pair(l, a, b);
+ if (r < 0) {
+ free(a);
+ free(b);
+ }
+
+ return r;
+}
+
+int strv_consume_prepend(char ***l, char *value) {
+ int r;
+
+ r = strv_push_prepend(l, value);
+ if (r < 0)
+ free(value);
+
+ return r;
+}
+
+int strv_prepend(char ***l, const char *value) {
+ char *v;
+
+ if (!value)
+ return 0;
+
+ v = strdup(value);
+ if (!v)
+ return -ENOMEM;
+
+ return strv_consume_prepend(l, v);
+}
+
+int strv_extend(char ***l, const char *value) {
+ char *v;
+
+ if (!value)
+ return 0;
+
+ v = strdup(value);
+ if (!v)
+ return -ENOMEM;
+
+ return strv_consume(l, v);
+}
+
+int strv_extend_front(char ***l, const char *value) {
+ size_t n, m;
+ char *v, **c;
+
+ assert(l);
+
+ /* Like strv_extend(), but prepends rather than appends the new entry */
+
+ if (!value)
+ return 0;
+
+ n = strv_length(*l);
+
+ /* Increase and overflow check. */
+ m = n + 2;
+ if (m < n)
+ return -ENOMEM;
+
+ v = strdup(value);
+ if (!v)
+ return -ENOMEM;
+
+ c = reallocarray(*l, m, sizeof(char*));
+ if (!c) {
+ free(v);
+ return -ENOMEM;
+ }
+
+ memmove(c+1, c, n * sizeof(char*));
+ c[0] = v;
+ c[n+1] = NULL;
+
+ *l = c;
+ return 0;
+}
+
+char **strv_uniq(char **l) {
+ char **i;
+
+ /* Drops duplicate entries. The first identical string will be
+ * kept, the others dropped */
+
+ STRV_FOREACH(i, l)
+ strv_remove(i+1, *i);
+
+ return l;
+}
+
+bool strv_is_uniq(char * const *l) {
+ char * const *i;
+
+ STRV_FOREACH(i, l)
+ if (strv_find(i+1, *i))
+ return false;
+
+ return true;
+}
+
+char **strv_remove(char **l, const char *s) {
+ char **f, **t;
+
+ if (!l)
+ return NULL;
+
+ assert(s);
+
+ /* Drops every occurrence of s in the string list, edits
+ * in-place. */
+
+ for (f = t = l; *f; f++)
+ if (streq(*f, s))
+ free(*f);
+ else
+ *(t++) = *f;
+
+ *t = NULL;
+ return l;
+}
+
+char **strv_parse_nulstr(const char *s, size_t l) {
+ /* l is the length of the input data, which will be split at NULs into
+ * elements of the resulting strv. Hence, the number of items in the resulting strv
+ * will be equal to one plus the number of NUL bytes in the l bytes starting at s,
+ * unless s[l-1] is NUL, in which case the final empty string is not stored in
+ * the resulting strv, and length is equal to the number of NUL bytes.
+ *
+ * Note that contrary to a normal nulstr which cannot contain empty strings, because
+ * the input data is terminated by any two consequent NUL bytes, this parser accepts
+ * empty strings in s.
+ */
+
+ const char *p;
+ size_t c = 0, i = 0;
+ char **v;
+
+ assert(s || l <= 0);
+
+ if (l <= 0)
+ return new0(char*, 1);
+
+ for (p = s; p < s + l; p++)
+ if (*p == 0)
+ c++;
+
+ if (s[l-1] != 0)
+ c++;
+
+ v = new0(char*, c+1);
+ if (!v)
+ return NULL;
+
+ p = s;
+ while (p < s + l) {
+ const char *e;
+
+ e = memchr(p, 0, s + l - p);
+
+ v[i] = strndup(p, e ? e - p : s + l - p);
+ if (!v[i]) {
+ strv_free(v);
+ return NULL;
+ }
+
+ i++;
+
+ if (!e)
+ break;
+
+ p = e + 1;
+ }
+
+ assert(i == c);
+
+ return v;
+}
+
+char **strv_split_nulstr(const char *s) {
+ const char *i;
+ char **r = NULL;
+
+ NULSTR_FOREACH(i, s)
+ if (strv_extend(&r, i) < 0) {
+ strv_free(r);
+ return NULL;
+ }
+
+ if (!r)
+ return strv_new(NULL);
+
+ return r;
+}
+
+int strv_make_nulstr(char * const *l, char **ret, size_t *ret_size) {
+ /* A valid nulstr with two NULs at the end will be created, but
+ * q will be the length without the two trailing NULs. Thus the output
+ * string is a valid nulstr and can be iterated over using NULSTR_FOREACH,
+ * and can also be parsed by strv_parse_nulstr as long as the length
+ * is provided separately.
+ */
+
+ size_t n_allocated = 0, n = 0;
+ _cleanup_free_ char *m = NULL;
+ char * const *i;
+
+ assert(ret);
+ assert(ret_size);
+
+ STRV_FOREACH(i, l) {
+ size_t z;
+
+ z = strlen(*i);
+
+ if (!GREEDY_REALLOC(m, n_allocated, n + z + 2))
+ return -ENOMEM;
+
+ memcpy(m + n, *i, z + 1);
+ n += z + 1;
+ }
+
+ if (!m) {
+ m = new0(char, 1);
+ if (!m)
+ return -ENOMEM;
+ n = 1;
+ } else
+ /* make sure there is a second extra NUL at the end of resulting nulstr */
+ m[n] = '\0';
+
+ assert(n > 0);
+ *ret = m;
+ *ret_size = n - 1;
+
+ m = NULL;
+
+ return 0;
+}
+
+bool strv_overlap(char * const *a, char * const *b) {
+ char * const *i;
+
+ STRV_FOREACH(i, a)
+ if (strv_contains(b, *i))
+ return true;
+
+ return false;
+}
+
+static int str_compare(char * const *a, char * const *b) {
+ return strcmp(*a, *b);
+}
+
+char **strv_sort(char **l) {
+ typesafe_qsort(l, strv_length(l), str_compare);
+ return l;
+}
+
+int strv_compare(char * const *a, char * const *b) {
+ int r;
+
+ if (strv_isempty(a)) {
+ if (strv_isempty(b))
+ return 0;
+ else
+ return -1;
+ }
+
+ if (strv_isempty(b))
+ return 1;
+
+ for ( ; *a || *b; ++a, ++b) {
+ r = strcmp_ptr(*a, *b);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+void strv_print(char * const *l) {
+ char * const *s;
+
+ STRV_FOREACH(s, l)
+ puts(*s);
+}
+
+int strv_extendf(char ***l, const char *format, ...) {
+ va_list ap;
+ char *x;
+ int r;
+
+ va_start(ap, format);
+ r = vasprintf(&x, format, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return -ENOMEM;
+
+ return strv_consume(l, x);
+}
+
+char **strv_reverse(char **l) {
+ size_t n, i;
+
+ n = strv_length(l);
+ if (n <= 1)
+ return l;
+
+ for (i = 0; i < n / 2; i++)
+ SWAP_TWO(l[i], l[n-1-i]);
+
+ return l;
+}
+
+char **strv_shell_escape(char **l, const char *bad) {
+ char **s;
+
+ /* Escapes every character in every string in l that is in bad,
+ * edits in-place, does not roll-back on error. */
+
+ STRV_FOREACH(s, l) {
+ char *v;
+
+ v = shell_escape(*s, bad);
+ if (!v)
+ return NULL;
+
+ free(*s);
+ *s = v;
+ }
+
+ return l;
+}
+
+bool strv_fnmatch_full(char* const* patterns, const char *s, int flags, size_t *matched_pos) {
+ for (size_t i = 0; patterns && patterns[i]; i++)
+ if (fnmatch(patterns[i], s, flags) == 0) {
+ if (matched_pos)
+ *matched_pos = i;
+ return true;
+ }
+
+ return false;
+}
+
+char ***strv_free_free(char ***l) {
+ char ***i;
+
+ if (!l)
+ return NULL;
+
+ for (i = l; *i; i++)
+ strv_free(*i);
+
+ return mfree(l);
+}
+
+char **strv_skip(char **l, size_t n) {
+
+ while (n > 0) {
+ if (strv_isempty(l))
+ return l;
+
+ l++, n--;
+ }
+
+ return l;
+}
+
+int strv_extend_n(char ***l, const char *value, size_t n) {
+ size_t i, j, k;
+ char **nl;
+
+ assert(l);
+
+ if (!value)
+ return 0;
+ if (n == 0)
+ return 0;
+
+ /* Adds the value n times to l */
+
+ k = strv_length(*l);
+ if (n >= SIZE_MAX - k)
+ return -ENOMEM;
+
+ nl = reallocarray(*l, GREEDY_ALLOC_ROUND_UP(k + n + 1), sizeof(char *));
+ if (!nl)
+ return -ENOMEM;
+
+ *l = nl;
+
+ for (i = k; i < k + n; i++) {
+ nl[i] = strdup(value);
+ if (!nl[i])
+ goto rollback;
+ }
+
+ nl[i] = NULL;
+ return 0;
+
+rollback:
+ for (j = k; j < i; j++)
+ free(nl[j]);
+
+ nl[k] = NULL;
+ return -ENOMEM;
+}
+
+int fputstrv(FILE *f, char * const *l, const char *separator, bool *space) {
+ bool b = false;
+ char * const *s;
+ int r;
+
+ /* Like fputs(), but for strv, and with a less stupid argument order */
+
+ if (!space)
+ space = &b;
+
+ STRV_FOREACH(s, l) {
+ r = fputs_with_space(f, *s, separator, space);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int string_strv_hashmap_put_internal(Hashmap *h, const char *key, const char *value) {
+ char **l;
+ int r;
+
+ l = hashmap_get(h, key);
+ if (l) {
+ /* A list for this key already exists, let's append to it if it is not listed yet */
+ if (strv_contains(l, value))
+ return 0;
+
+ r = strv_extend(&l, value);
+ if (r < 0)
+ return r;
+
+ assert_se(hashmap_update(h, key, l) >= 0);
+ } else {
+ /* No list for this key exists yet, create one */
+ _cleanup_strv_free_ char **l2 = NULL;
+ _cleanup_free_ char *t = NULL;
+
+ t = strdup(key);
+ if (!t)
+ return -ENOMEM;
+
+ r = strv_extend(&l2, value);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(h, t, l2);
+ if (r < 0)
+ return r;
+ TAKE_PTR(t);
+ TAKE_PTR(l2);
+ }
+
+ return 1;
+}
+
+int _string_strv_hashmap_put(Hashmap **h, const char *key, const char *value HASHMAP_DEBUG_PARAMS) {
+ int r;
+
+ r = _hashmap_ensure_allocated(h, &string_strv_hash_ops HASHMAP_DEBUG_PASS_ARGS);
+ if (r < 0)
+ return r;
+
+ return string_strv_hashmap_put_internal(*h, key, value);
+}
+
+int _string_strv_ordered_hashmap_put(OrderedHashmap **h, const char *key, const char *value HASHMAP_DEBUG_PARAMS) {
+ int r;
+
+ r = _ordered_hashmap_ensure_allocated(h, &string_strv_hash_ops HASHMAP_DEBUG_PASS_ARGS);
+ if (r < 0)
+ return r;
+
+ return string_strv_hashmap_put_internal(PLAIN_HASHMAP(*h), key, value);
+}
+
+DEFINE_HASH_OPS_FULL(string_strv_hash_ops, char, string_hash_func, string_compare_func, free, char*, strv_free);
diff --git a/src/basic/strv.h b/src/basic/strv.h
new file mode 100644
index 0000000..6b3e8e7
--- /dev/null
+++ b/src/basic/strv.h
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <fnmatch.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "string-util.h"
+
+char *strv_find(char * const *l, const char *name) _pure_;
+char *strv_find_case(char * const *l, const char *name) _pure_;
+char *strv_find_prefix(char * const *l, const char *name) _pure_;
+char *strv_find_startswith(char * const *l, const char *name) _pure_;
+
+#define strv_contains(l, s) (!!strv_find((l), (s)))
+#define strv_contains_case(l, s) (!!strv_find_case((l), (s)))
+
+char **strv_free(char **l);
+DEFINE_TRIVIAL_CLEANUP_FUNC(char**, strv_free);
+#define _cleanup_strv_free_ _cleanup_(strv_freep)
+
+char **strv_free_erase(char **l);
+DEFINE_TRIVIAL_CLEANUP_FUNC(char**, strv_free_erase);
+#define _cleanup_strv_free_erase_ _cleanup_(strv_free_erasep)
+
+char **strv_copy(char * const *l);
+size_t strv_length(char * const *l) _pure_;
+
+int strv_extend_strv(char ***a, char * const *b, bool filter_duplicates);
+int strv_extend_strv_concat(char ***a, char * const *b, const char *suffix);
+int strv_prepend(char ***l, const char *value);
+int strv_extend(char ***l, const char *value);
+int strv_extendf(char ***l, const char *format, ...) _printf_(2,0);
+int strv_extend_front(char ***l, const char *value);
+int strv_push(char ***l, char *value);
+int strv_push_pair(char ***l, char *a, char *b);
+int strv_insert(char ***l, size_t position, char *value);
+
+static inline int strv_push_prepend(char ***l, char *value) {
+ return strv_insert(l, 0, value);
+}
+
+int strv_consume(char ***l, char *value);
+int strv_consume_pair(char ***l, char *a, char *b);
+int strv_consume_prepend(char ***l, char *value);
+
+char **strv_remove(char **l, const char *s);
+char **strv_uniq(char **l);
+bool strv_is_uniq(char * const *l);
+
+int strv_compare(char * const *a, char * const *b);
+static inline bool strv_equal(char * const *a, char * const *b) {
+ return strv_compare(a, b) == 0;
+}
+
+char **strv_new_internal(const char *x, ...) _sentinel_;
+char **strv_new_ap(const char *x, va_list ap);
+#define strv_new(...) strv_new_internal(__VA_ARGS__, NULL)
+
+#define STRV_IGNORE ((const char *) POINTER_MAX)
+
+static inline const char* STRV_IFNOTNULL(const char *x) {
+ return x ? x : STRV_IGNORE;
+}
+
+static inline bool strv_isempty(char * const *l) {
+ return !l || !*l;
+}
+
+char **strv_split_newlines(const char *s);
+
+int strv_split_full(char ***t, const char *s, const char *separators, ExtractFlags flags);
+static inline char **strv_split(const char *s, const char *separators) {
+ char **ret;
+ int r;
+
+ r = strv_split_full(&ret, s, separators, 0);
+ if (r < 0)
+ return NULL;
+
+ return ret;
+}
+
+/* Given a string containing white-space separated tuples of words themselves separated by ':',
+ * returns a vector of strings. If the second element in a tuple is missing, the corresponding
+ * string in the vector is an empty string. */
+int strv_split_colon_pairs(char ***t, const char *s);
+
+char *strv_join_full(char * const *l, const char *separator, const char *prefix, bool escape_separtor);
+static inline char *strv_join(char * const *l, const char *separator) {
+ return strv_join_full(l, separator, NULL, false);
+}
+
+char **strv_parse_nulstr(const char *s, size_t l);
+char **strv_split_nulstr(const char *s);
+int strv_make_nulstr(char * const *l, char **p, size_t *n);
+
+static inline int strv_from_nulstr(char ***a, const char *nulstr) {
+ char **t;
+
+ t = strv_split_nulstr(nulstr);
+ if (!t)
+ return -ENOMEM;
+ *a = t;
+ return 0;
+}
+
+bool strv_overlap(char * const *a, char * const *b) _pure_;
+
+#define STRV_FOREACH(s, l) \
+ for ((s) = (l); (s) && *(s); (s)++)
+
+#define STRV_FOREACH_BACKWARDS(s, l) \
+ for (s = ({ \
+ typeof(l) _l = l; \
+ _l ? _l + strv_length(_l) - 1U : NULL; \
+ }); \
+ (l) && ((s) >= (l)); \
+ (s)--)
+
+#define STRV_FOREACH_PAIR(x, y, l) \
+ for ((x) = (l), (y) = (x) ? (x+1) : NULL; (x) && *(x) && *(y); (x) += 2, (y) = (x + 1))
+
+char **strv_sort(char **l);
+void strv_print(char * const *l);
+
+#define strv_from_stdarg_alloca(first) \
+ ({ \
+ char **_l; \
+ \
+ if (!first) \
+ _l = (char**) &first; \
+ else { \
+ size_t _n; \
+ va_list _ap; \
+ \
+ _n = 1; \
+ va_start(_ap, first); \
+ while (va_arg(_ap, char*)) \
+ _n++; \
+ va_end(_ap); \
+ \
+ _l = newa(char*, _n+1); \
+ _l[_n = 0] = (char*) first; \
+ va_start(_ap, first); \
+ for (;;) { \
+ _l[++_n] = va_arg(_ap, char*); \
+ if (!_l[_n]) \
+ break; \
+ } \
+ va_end(_ap); \
+ } \
+ _l; \
+ })
+
+#define STR_IN_SET(x, ...) strv_contains(STRV_MAKE(__VA_ARGS__), x)
+#define STRPTR_IN_SET(x, ...) \
+ ({ \
+ const char* _x = (x); \
+ _x && strv_contains(STRV_MAKE(__VA_ARGS__), _x); \
+ })
+
+#define STRCASE_IN_SET(x, ...) strv_contains_case(STRV_MAKE(__VA_ARGS__), x)
+#define STRCASEPTR_IN_SET(x, ...) \
+ ({ \
+ const char* _x = (x); \
+ _x && strv_contains_case(STRV_MAKE(__VA_ARGS__), _x); \
+ })
+
+#define STARTSWITH_SET(p, ...) \
+ ({ \
+ const char *_p = (p); \
+ char *_found = NULL, **_i; \
+ STRV_FOREACH(_i, STRV_MAKE(__VA_ARGS__)) { \
+ _found = startswith(_p, *_i); \
+ if (_found) \
+ break; \
+ } \
+ _found; \
+ })
+
+#define ENDSWITH_SET(p, ...) \
+ ({ \
+ const char *_p = (p); \
+ char *_found = NULL, **_i; \
+ STRV_FOREACH(_i, STRV_MAKE(__VA_ARGS__)) { \
+ _found = endswith(_p, *_i); \
+ if (_found) \
+ break; \
+ } \
+ _found; \
+ })
+
+#define FOREACH_STRING(x, y, ...) \
+ for (char **_l = STRV_MAKE(({ x = y; }), ##__VA_ARGS__); \
+ x; \
+ x = *(++_l))
+
+char **strv_reverse(char **l);
+char **strv_shell_escape(char **l, const char *bad);
+
+bool strv_fnmatch_full(char* const* patterns, const char *s, int flags, size_t *matched_pos);
+static inline bool strv_fnmatch(char* const* patterns, const char *s) {
+ return strv_fnmatch_full(patterns, s, 0, NULL);
+}
+
+static inline bool strv_fnmatch_or_empty(char* const* patterns, const char *s, int flags) {
+ assert(s);
+ return strv_isempty(patterns) ||
+ strv_fnmatch_full(patterns, s, flags, NULL);
+}
+
+char ***strv_free_free(char ***l);
+DEFINE_TRIVIAL_CLEANUP_FUNC(char***, strv_free_free);
+
+char **strv_skip(char **l, size_t n);
+
+int strv_extend_n(char ***l, const char *value, size_t n);
+
+int fputstrv(FILE *f, char * const *l, const char *separator, bool *space);
+
+#define strv_free_and_replace(a, b) \
+ ({ \
+ strv_free(a); \
+ (a) = (b); \
+ (b) = NULL; \
+ 0; \
+ })
+
+extern const struct hash_ops string_strv_hash_ops;
+int _string_strv_hashmap_put(Hashmap **h, const char *key, const char *value HASHMAP_DEBUG_PARAMS);
+int _string_strv_ordered_hashmap_put(OrderedHashmap **h, const char *key, const char *value HASHMAP_DEBUG_PARAMS);
+#define string_strv_hashmap_put(h, k, v) _string_strv_hashmap_put(h, k, v HASHMAP_DEBUG_SRC_ARGS)
+#define string_strv_ordered_hashmap_put(h, k, v) _string_strv_ordered_hashmap_put(h, k, v HASHMAP_DEBUG_SRC_ARGS)
diff --git a/src/basic/strxcpyx.c b/src/basic/strxcpyx.c
new file mode 100644
index 0000000..dbbf7d0
--- /dev/null
+++ b/src/basic/strxcpyx.c
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/*
+ * Concatenates/copies strings. In any case, terminates in all cases
+ * with '\0' and moves the @dest pointer forward to the added '\0'.
+ * Returns the remaining size, and 0 if the string was truncated.
+ *
+ * Due to the intended usage, these helpers silently noop invocations
+ * having zero size. This is technically an exception to the above
+ * statement "terminates in all cases". It's unexpected for such calls to
+ * occur outside of a loop where this is the preferred behavior.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "strxcpyx.h"
+
+size_t strnpcpy(char **dest, size_t size, const char *src, size_t len) {
+ assert(dest);
+ assert(src);
+
+ if (size == 0)
+ return 0;
+
+ if (len >= size) {
+ if (size > 1)
+ *dest = mempcpy(*dest, src, size-1);
+ size = 0;
+ } else if (len > 0) {
+ *dest = mempcpy(*dest, src, len);
+ size -= len;
+ }
+
+ *dest[0] = '\0';
+ return size;
+}
+
+size_t strpcpy(char **dest, size_t size, const char *src) {
+ assert(dest);
+ assert(src);
+
+ return strnpcpy(dest, size, src, strlen(src));
+}
+
+size_t strpcpyf(char **dest, size_t size, const char *src, ...) {
+ va_list va;
+ int i;
+
+ assert(dest);
+ assert(src);
+
+ if (size == 0)
+ return 0;
+
+ va_start(va, src);
+ i = vsnprintf(*dest, size, src, va);
+ if (i < (int)size) {
+ *dest += i;
+ size -= i;
+ } else
+ size = 0;
+ va_end(va);
+ return size;
+}
+
+size_t strpcpyl(char **dest, size_t size, const char *src, ...) {
+ va_list va;
+
+ assert(dest);
+ assert(src);
+
+ va_start(va, src);
+ do {
+ size = strpcpy(dest, size, src);
+ src = va_arg(va, char *);
+ } while (src);
+ va_end(va);
+ return size;
+}
+
+size_t strnscpy(char *dest, size_t size, const char *src, size_t len) {
+ char *s;
+
+ assert(dest);
+ assert(src);
+
+ s = dest;
+ return strnpcpy(&s, size, src, len);
+}
+
+size_t strscpy(char *dest, size_t size, const char *src) {
+ assert(dest);
+ assert(src);
+
+ return strnscpy(dest, size, src, strlen(src));
+}
+
+size_t strscpyl(char *dest, size_t size, const char *src, ...) {
+ va_list va;
+ char *s;
+
+ assert(dest);
+ assert(src);
+
+ va_start(va, src);
+ s = dest;
+ do {
+ size = strpcpy(&s, size, src);
+ src = va_arg(va, char *);
+ } while (src);
+ va_end(va);
+
+ return size;
+}
diff --git a/src/basic/strxcpyx.h b/src/basic/strxcpyx.h
new file mode 100644
index 0000000..cdef492
--- /dev/null
+++ b/src/basic/strxcpyx.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stddef.h>
+
+#include "macro.h"
+
+size_t strnpcpy(char **dest, size_t size, const char *src, size_t len);
+size_t strpcpy(char **dest, size_t size, const char *src);
+size_t strpcpyf(char **dest, size_t size, const char *src, ...) _printf_(3, 4);
+size_t strpcpyl(char **dest, size_t size, const char *src, ...) _sentinel_;
+size_t strnscpy(char *dest, size_t size, const char *src, size_t len);
+size_t strscpy(char *dest, size_t size, const char *src);
+size_t strscpyl(char *dest, size_t size, const char *src, ...) _sentinel_;
diff --git a/src/basic/syslog-util.c b/src/basic/syslog-util.c
new file mode 100644
index 0000000..4eb9eba
--- /dev/null
+++ b/src/basic/syslog-util.c
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <syslog.h>
+
+#include "sd-id128.h"
+
+#include "glob-util.h"
+#include "hexdecoct.h"
+#include "macro.h"
+#include "path-util.h"
+#include "string-table.h"
+#include "syslog-util.h"
+#include "unit-name.h"
+
+int syslog_parse_priority(const char **p, int *priority, bool with_facility) {
+ int a = 0, b = 0, c = 0;
+ const char *end;
+ size_t k;
+
+ assert(p);
+ assert(*p);
+ assert(priority);
+
+ if ((*p)[0] != '<')
+ return 0;
+
+ end = strchr(*p, '>');
+ if (!end)
+ return 0;
+
+ k = end - *p;
+ assert(k > 0);
+
+ if (k == 2)
+ c = undecchar((*p)[1]);
+ else if (k == 3) {
+ b = undecchar((*p)[1]);
+ c = undecchar((*p)[2]);
+ } else if (k == 4) {
+ a = undecchar((*p)[1]);
+ b = undecchar((*p)[2]);
+ c = undecchar((*p)[3]);
+ } else
+ return 0;
+
+ if (a < 0 || b < 0 || c < 0 ||
+ (!with_facility && (a || b || c > 7)))
+ return 0;
+
+ if (with_facility)
+ *priority = a*100 + b*10 + c;
+ else
+ *priority = (*priority & LOG_FACMASK) | c;
+
+ *p += k + 1;
+ return 1;
+}
+
+static const char *const log_facility_unshifted_table[LOG_NFACILITIES] = {
+ [LOG_FAC(LOG_KERN)] = "kern",
+ [LOG_FAC(LOG_USER)] = "user",
+ [LOG_FAC(LOG_MAIL)] = "mail",
+ [LOG_FAC(LOG_DAEMON)] = "daemon",
+ [LOG_FAC(LOG_AUTH)] = "auth",
+ [LOG_FAC(LOG_SYSLOG)] = "syslog",
+ [LOG_FAC(LOG_LPR)] = "lpr",
+ [LOG_FAC(LOG_NEWS)] = "news",
+ [LOG_FAC(LOG_UUCP)] = "uucp",
+ [LOG_FAC(LOG_CRON)] = "cron",
+ [LOG_FAC(LOG_AUTHPRIV)] = "authpriv",
+ [LOG_FAC(LOG_FTP)] = "ftp",
+ [LOG_FAC(LOG_LOCAL0)] = "local0",
+ [LOG_FAC(LOG_LOCAL1)] = "local1",
+ [LOG_FAC(LOG_LOCAL2)] = "local2",
+ [LOG_FAC(LOG_LOCAL3)] = "local3",
+ [LOG_FAC(LOG_LOCAL4)] = "local4",
+ [LOG_FAC(LOG_LOCAL5)] = "local5",
+ [LOG_FAC(LOG_LOCAL6)] = "local6",
+ [LOG_FAC(LOG_LOCAL7)] = "local7"
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(log_facility_unshifted, int, LOG_FAC(~0));
+
+bool log_facility_unshifted_is_valid(int facility) {
+ return facility >= 0 && facility <= LOG_FAC(~0);
+}
+
+static const char *const log_level_table[] = {
+ [LOG_EMERG] = "emerg",
+ [LOG_ALERT] = "alert",
+ [LOG_CRIT] = "crit",
+ [LOG_ERR] = "err",
+ [LOG_WARNING] = "warning",
+ [LOG_NOTICE] = "notice",
+ [LOG_INFO] = "info",
+ [LOG_DEBUG] = "debug"
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(log_level, int, LOG_DEBUG);
+
+bool log_level_is_valid(int level) {
+ return level >= 0 && level <= LOG_DEBUG;
+}
+
+/* The maximum size for a log namespace length. This is the file name size limit 255 minus the size of a
+ * formatted machine ID minus a separator char */
+#define LOG_NAMESPACE_MAX (NAME_MAX - (SD_ID128_STRING_MAX - 1) - 1)
+
+bool log_namespace_name_valid(const char *s) {
+ /* Let's make sure the namespace fits in a filename that is prefixed with the machine ID and a dot
+ * (so that /var/log/journal/<machine-id>.<namespace> can be created based on it). Also make sure it
+ * is suitable as unit instance name, and does not contain fishy characters. */
+
+ if (!filename_is_valid(s))
+ return false;
+
+ if (strlen(s) > LOG_NAMESPACE_MAX)
+ return false;
+
+ if (!unit_instance_is_valid(s))
+ return false;
+
+ if (!string_is_safe(s))
+ return false;
+
+ /* Let's avoid globbing for now */
+ if (string_is_glob(s))
+ return false;
+
+ return true;
+}
diff --git a/src/basic/syslog-util.h b/src/basic/syslog-util.h
new file mode 100644
index 0000000..d7aa97f
--- /dev/null
+++ b/src/basic/syslog-util.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+int log_facility_unshifted_to_string_alloc(int i, char **s);
+int log_facility_unshifted_from_string(const char *s);
+bool log_facility_unshifted_is_valid(int faciliy);
+
+int log_level_to_string_alloc(int i, char **s);
+int log_level_from_string(const char *s);
+bool log_level_is_valid(int level);
+
+int syslog_parse_priority(const char **p, int *priority, bool with_facility);
+
+bool log_namespace_name_valid(const char *s);
diff --git a/src/basic/terminal-util.c b/src/basic/terminal-util.c
new file mode 100644
index 0000000..e00e9e8
--- /dev/null
+++ b/src/basic/terminal-util.c
@@ -0,0 +1,1386 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/kd.h>
+#include <linux/tiocl.h>
+#include <linux/vt.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <sys/inotify.h>
+#include <sys/ioctl.h>
+#include <sys/sysmacros.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "copy.h"
+#include "def.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "macro.h"
+#include "namespace-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "util.h"
+
+static volatile unsigned cached_columns = 0;
+static volatile unsigned cached_lines = 0;
+
+static volatile int cached_on_tty = -1;
+static volatile int cached_colors_enabled = -1;
+static volatile int cached_underline_enabled = -1;
+
+int chvt(int vt) {
+ _cleanup_close_ int fd;
+
+ /* Switch to the specified vt number. If the VT is specified <= 0 switch to the VT the kernel log messages go,
+ * if that's configured. */
+
+ fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0)
+ return -errno;
+
+ if (vt <= 0) {
+ int tiocl[2] = {
+ TIOCL_GETKMSGREDIRECT,
+ 0
+ };
+
+ if (ioctl(fd, TIOCLINUX, tiocl) < 0)
+ return -errno;
+
+ vt = tiocl[0] <= 0 ? 1 : tiocl[0];
+ }
+
+ if (ioctl(fd, VT_ACTIVATE, vt) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int read_one_char(FILE *f, char *ret, usec_t t, bool *need_nl) {
+ _cleanup_free_ char *line = NULL;
+ struct termios old_termios;
+ int r, fd;
+
+ assert(f);
+ assert(ret);
+
+ /* If this is a terminal, then switch canonical mode off, so that we can read a single
+ * character. (Note that fmemopen() streams do not have an fd associated with them, let's handle that
+ * nicely.) */
+ fd = fileno(f);
+ if (fd >= 0 && tcgetattr(fd, &old_termios) >= 0) {
+ struct termios new_termios = old_termios;
+
+ new_termios.c_lflag &= ~ICANON;
+ new_termios.c_cc[VMIN] = 1;
+ new_termios.c_cc[VTIME] = 0;
+
+ if (tcsetattr(fd, TCSADRAIN, &new_termios) >= 0) {
+ char c;
+
+ if (t != USEC_INFINITY) {
+ if (fd_wait_for_event(fd, POLLIN, t) <= 0) {
+ (void) tcsetattr(fd, TCSADRAIN, &old_termios);
+ return -ETIMEDOUT;
+ }
+ }
+
+ r = safe_fgetc(f, &c);
+ (void) tcsetattr(fd, TCSADRAIN, &old_termios);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EIO;
+
+ if (need_nl)
+ *need_nl = c != '\n';
+
+ *ret = c;
+ return 0;
+ }
+ }
+
+ if (t != USEC_INFINITY && fd > 0) {
+ /* Let's wait the specified amount of time for input. When we have no fd we skip this, under
+ * the assumption that this is an fmemopen() stream or so where waiting doesn't make sense
+ * anyway, as the data is either already in the stream or cannot possible be placed there
+ * while we access the stream */
+
+ if (fd_wait_for_event(fd, POLLIN, t) <= 0)
+ return -ETIMEDOUT;
+ }
+
+ /* If this is not a terminal, then read a full line instead */
+
+ r = read_line(f, 16, &line); /* longer than necessary, to eat up UTF-8 chars/vt100 key sequences */
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EIO;
+
+ if (strlen(line) != 1)
+ return -EBADMSG;
+
+ if (need_nl)
+ *need_nl = false;
+
+ *ret = line[0];
+ return 0;
+}
+
+#define DEFAULT_ASK_REFRESH_USEC (2*USEC_PER_SEC)
+
+int ask_char(char *ret, const char *replies, const char *fmt, ...) {
+ int r;
+
+ assert(ret);
+ assert(replies);
+ assert(fmt);
+
+ for (;;) {
+ va_list ap;
+ char c;
+ bool need_nl = true;
+
+ if (colors_enabled())
+ fputs(ANSI_HIGHLIGHT, stdout);
+
+ putchar('\r');
+
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+
+ if (colors_enabled())
+ fputs(ANSI_NORMAL, stdout);
+
+ fflush(stdout);
+
+ r = read_one_char(stdin, &c, DEFAULT_ASK_REFRESH_USEC, &need_nl);
+ if (r < 0) {
+
+ if (r == -ETIMEDOUT)
+ continue;
+
+ if (r == -EBADMSG) {
+ puts("Bad input, please try again.");
+ continue;
+ }
+
+ putchar('\n');
+ return r;
+ }
+
+ if (need_nl)
+ putchar('\n');
+
+ if (strchr(replies, c)) {
+ *ret = c;
+ return 0;
+ }
+
+ puts("Read unexpected character, please try again.");
+ }
+}
+
+int ask_string(char **ret, const char *text, ...) {
+ _cleanup_free_ char *line = NULL;
+ va_list ap;
+ int r;
+
+ assert(ret);
+ assert(text);
+
+ if (colors_enabled())
+ fputs(ANSI_HIGHLIGHT, stdout);
+
+ va_start(ap, text);
+ vprintf(text, ap);
+ va_end(ap);
+
+ if (colors_enabled())
+ fputs(ANSI_NORMAL, stdout);
+
+ fflush(stdout);
+
+ r = read_line(stdin, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EIO;
+
+ *ret = TAKE_PTR(line);
+ return 0;
+}
+
+int reset_terminal_fd(int fd, bool switch_to_text) {
+ struct termios termios;
+ int r = 0;
+
+ /* Set terminal to some sane defaults */
+
+ assert(fd >= 0);
+
+ /* We leave locked terminal attributes untouched, so that
+ * Plymouth may set whatever it wants to set, and we don't
+ * interfere with that. */
+
+ /* Disable exclusive mode, just in case */
+ (void) ioctl(fd, TIOCNXCL);
+
+ /* Switch to text mode */
+ if (switch_to_text)
+ (void) ioctl(fd, KDSETMODE, KD_TEXT);
+
+ /* Set default keyboard mode */
+ (void) vt_reset_keyboard(fd);
+
+ if (tcgetattr(fd, &termios) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ /* We only reset the stuff that matters to the software. How
+ * hardware is set up we don't touch assuming that somebody
+ * else will do that for us */
+
+ termios.c_iflag &= ~(IGNBRK | BRKINT | ISTRIP | INLCR | IGNCR | IUCLC);
+ termios.c_iflag |= ICRNL | IMAXBEL | IUTF8;
+ termios.c_oflag |= ONLCR;
+ termios.c_cflag |= CREAD;
+ termios.c_lflag = ISIG | ICANON | IEXTEN | ECHO | ECHOE | ECHOK | ECHOCTL | ECHOPRT | ECHOKE;
+
+ termios.c_cc[VINTR] = 03; /* ^C */
+ termios.c_cc[VQUIT] = 034; /* ^\ */
+ termios.c_cc[VERASE] = 0177;
+ termios.c_cc[VKILL] = 025; /* ^X */
+ termios.c_cc[VEOF] = 04; /* ^D */
+ termios.c_cc[VSTART] = 021; /* ^Q */
+ termios.c_cc[VSTOP] = 023; /* ^S */
+ termios.c_cc[VSUSP] = 032; /* ^Z */
+ termios.c_cc[VLNEXT] = 026; /* ^V */
+ termios.c_cc[VWERASE] = 027; /* ^W */
+ termios.c_cc[VREPRINT] = 022; /* ^R */
+ termios.c_cc[VEOL] = 0;
+ termios.c_cc[VEOL2] = 0;
+
+ termios.c_cc[VTIME] = 0;
+ termios.c_cc[VMIN] = 1;
+
+ if (tcsetattr(fd, TCSANOW, &termios) < 0)
+ r = -errno;
+
+finish:
+ /* Just in case, flush all crap out */
+ (void) tcflush(fd, TCIOFLUSH);
+
+ return r;
+}
+
+int reset_terminal(const char *name) {
+ _cleanup_close_ int fd = -1;
+
+ /* We open the terminal with O_NONBLOCK here, to ensure we
+ * don't block on carrier if this is a terminal with carrier
+ * configured. */
+
+ fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0)
+ return fd;
+
+ return reset_terminal_fd(fd, true);
+}
+
+int open_terminal(const char *name, int mode) {
+ unsigned c = 0;
+ int fd;
+
+ /*
+ * If a TTY is in the process of being closed opening it might
+ * cause EIO. This is horribly awful, but unlikely to be
+ * changed in the kernel. Hence we work around this problem by
+ * retrying a couple of times.
+ *
+ * https://bugs.launchpad.net/ubuntu/+source/linux/+bug/554172/comments/245
+ */
+
+ if (mode & O_CREAT)
+ return -EINVAL;
+
+ for (;;) {
+ fd = open(name, mode, 0);
+ if (fd >= 0)
+ break;
+
+ if (errno != EIO)
+ return -errno;
+
+ /* Max 1s in total */
+ if (c >= 20)
+ return -errno;
+
+ usleep(50 * USEC_PER_MSEC);
+ c++;
+ }
+
+ if (isatty(fd) <= 0) {
+ safe_close(fd);
+ return -ENOTTY;
+ }
+
+ return fd;
+}
+
+int acquire_terminal(
+ const char *name,
+ AcquireTerminalFlags flags,
+ usec_t timeout) {
+
+ _cleanup_close_ int notify = -1, fd = -1;
+ usec_t ts = USEC_INFINITY;
+ int r, wd = -1;
+
+ assert(name);
+ assert(IN_SET(flags & ~ACQUIRE_TERMINAL_PERMISSIVE, ACQUIRE_TERMINAL_TRY, ACQUIRE_TERMINAL_FORCE, ACQUIRE_TERMINAL_WAIT));
+
+ /* We use inotify to be notified when the tty is closed. We create the watch before checking if we can actually
+ * acquire it, so that we don't lose any event.
+ *
+ * Note: strictly speaking this actually watches for the device being closed, it does *not* really watch
+ * whether a tty loses its controlling process. However, unless some rogue process uses TIOCNOTTY on /dev/tty
+ * *after* closing its tty otherwise this will not become a problem. As long as the administrator makes sure to
+ * not configure any service on the same tty as an untrusted user this should not be a problem. (Which they
+ * probably should not do anyway.) */
+
+ if ((flags & ~ACQUIRE_TERMINAL_PERMISSIVE) == ACQUIRE_TERMINAL_WAIT) {
+ notify = inotify_init1(IN_CLOEXEC | (timeout != USEC_INFINITY ? IN_NONBLOCK : 0));
+ if (notify < 0)
+ return -errno;
+
+ wd = inotify_add_watch(notify, name, IN_CLOSE);
+ if (wd < 0)
+ return -errno;
+
+ if (timeout != USEC_INFINITY)
+ ts = now(CLOCK_MONOTONIC);
+ }
+
+ for (;;) {
+ struct sigaction sa_old, sa_new = {
+ .sa_handler = SIG_IGN,
+ .sa_flags = SA_RESTART,
+ };
+
+ if (notify >= 0) {
+ r = flush_fd(notify);
+ if (r < 0)
+ return r;
+ }
+
+ /* We pass here O_NOCTTY only so that we can check the return value TIOCSCTTY and have a reliable way
+ * to figure out if we successfully became the controlling process of the tty */
+ fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ /* Temporarily ignore SIGHUP, so that we don't get SIGHUP'ed if we already own the tty. */
+ assert_se(sigaction(SIGHUP, &sa_new, &sa_old) == 0);
+
+ /* First, try to get the tty */
+ r = ioctl(fd, TIOCSCTTY,
+ (flags & ~ACQUIRE_TERMINAL_PERMISSIVE) == ACQUIRE_TERMINAL_FORCE) < 0 ? -errno : 0;
+
+ /* Reset signal handler to old value */
+ assert_se(sigaction(SIGHUP, &sa_old, NULL) == 0);
+
+ /* Success? Exit the loop now! */
+ if (r >= 0)
+ break;
+
+ /* Any failure besides -EPERM? Fail, regardless of the mode. */
+ if (r != -EPERM)
+ return r;
+
+ if (flags & ACQUIRE_TERMINAL_PERMISSIVE) /* If we are in permissive mode, then EPERM is fine, turn this
+ * into a success. Note that EPERM is also returned if we
+ * already are the owner of the TTY. */
+ break;
+
+ if (flags != ACQUIRE_TERMINAL_WAIT) /* If we are in TRY or FORCE mode, then propagate EPERM as EPERM */
+ return r;
+
+ assert(notify >= 0);
+ assert(wd >= 0);
+
+ for (;;) {
+ union inotify_event_buffer buffer;
+ struct inotify_event *e;
+ ssize_t l;
+
+ if (timeout != USEC_INFINITY) {
+ usec_t n;
+
+ assert(ts != USEC_INFINITY);
+
+ n = now(CLOCK_MONOTONIC);
+ if (ts + timeout < n)
+ return -ETIMEDOUT;
+
+ r = fd_wait_for_event(notify, POLLIN, ts + timeout - n);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ETIMEDOUT;
+ }
+
+ l = read(notify, &buffer, sizeof(buffer));
+ if (l < 0) {
+ if (IN_SET(errno, EINTR, EAGAIN))
+ continue;
+
+ return -errno;
+ }
+
+ FOREACH_INOTIFY_EVENT(e, buffer, l) {
+ if (e->mask & IN_Q_OVERFLOW) /* If we hit an inotify queue overflow, simply check if the terminal is up for grabs now. */
+ break;
+
+ if (e->wd != wd || !(e->mask & IN_CLOSE)) /* Safety checks */
+ return -EIO;
+ }
+
+ break;
+ }
+
+ /* We close the tty fd here since if the old session ended our handle will be dead. It's important that
+ * we do this after sleeping, so that we don't enter an endless loop. */
+ fd = safe_close(fd);
+ }
+
+ return TAKE_FD(fd);
+}
+
+int release_terminal(void) {
+ static const struct sigaction sa_new = {
+ .sa_handler = SIG_IGN,
+ .sa_flags = SA_RESTART,
+ };
+
+ _cleanup_close_ int fd = -1;
+ struct sigaction sa_old;
+ int r;
+
+ fd = open("/dev/tty", O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0)
+ return -errno;
+
+ /* Temporarily ignore SIGHUP, so that we don't get SIGHUP'ed
+ * by our own TIOCNOTTY */
+ assert_se(sigaction(SIGHUP, &sa_new, &sa_old) == 0);
+
+ r = ioctl(fd, TIOCNOTTY) < 0 ? -errno : 0;
+
+ assert_se(sigaction(SIGHUP, &sa_old, NULL) == 0);
+
+ return r;
+}
+
+int terminal_vhangup_fd(int fd) {
+ assert(fd >= 0);
+
+ if (ioctl(fd, TIOCVHANGUP) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int terminal_vhangup(const char *name) {
+ _cleanup_close_ int fd;
+
+ fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0)
+ return fd;
+
+ return terminal_vhangup_fd(fd);
+}
+
+int vt_disallocate(const char *name) {
+ const char *e;
+ int r;
+
+ /* Deallocate the VT if possible. If not possible
+ * (i.e. because it is the active one), at least clear it
+ * entirely (including the scrollback buffer). */
+
+ e = path_startswith(name, "/dev/");
+ if (!e)
+ return -EINVAL;
+
+ if (tty_is_vc(name)) {
+ _cleanup_close_ int fd = -1;
+ unsigned u;
+ const char *n;
+
+ n = startswith(e, "tty");
+ if (!n)
+ return -EINVAL;
+
+ r = safe_atou(n, &u);
+ if (r < 0)
+ return r;
+
+ if (u <= 0)
+ return -EINVAL;
+
+ /* Try to deallocate */
+ fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0)
+ return fd;
+
+ r = ioctl(fd, VT_DISALLOCATE, u);
+ if (r >= 0)
+ return 0;
+ if (errno != EBUSY)
+ return -errno;
+ }
+
+ /* So this is not a VT (in which case we cannot deallocate it),
+ * or we failed to deallocate. Let's at least clear the screen. */
+
+ _cleanup_close_ int fd2 = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (fd2 < 0)
+ return fd2;
+
+ (void) loop_write(fd2,
+ "\033[r" /* clear scrolling region */
+ "\033[H" /* move home */
+ "\033[3J", /* clear screen including scrollback, requires Linux 2.6.40 */
+ 10, false);
+ return 0;
+}
+
+int make_console_stdio(void) {
+ int fd, r;
+
+ /* Make /dev/console the controlling terminal and stdin/stdout/stderr, if we can. If we can't use
+ * /dev/null instead. This is particularly useful if /dev/console is turned off, e.g. if console=null
+ * is specified on the kernel command line. */
+
+ fd = acquire_terminal("/dev/console", ACQUIRE_TERMINAL_FORCE|ACQUIRE_TERMINAL_PERMISSIVE, USEC_INFINITY);
+ if (fd < 0) {
+ log_warning_errno(fd, "Failed to acquire terminal, using /dev/null stdin/stdout/stderr instead: %m");
+
+ r = make_null_stdio();
+ if (r < 0)
+ return log_error_errno(r, "Failed to make /dev/null stdin/stdout/stderr: %m");
+
+ } else {
+ r = reset_terminal_fd(fd, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to reset terminal, ignoring: %m");
+
+ r = rearrange_stdio(fd, fd, fd); /* This invalidates 'fd' both on success and on failure. */
+ if (r < 0)
+ return log_error_errno(r, "Failed to make terminal stdin/stdout/stderr: %m");
+ }
+
+ reset_terminal_feature_caches();
+ return 0;
+}
+
+bool tty_is_vc(const char *tty) {
+ assert(tty);
+
+ return vtnr_from_tty(tty) >= 0;
+}
+
+bool tty_is_console(const char *tty) {
+ assert(tty);
+
+ return streq(skip_dev_prefix(tty), "console");
+}
+
+int vtnr_from_tty(const char *tty) {
+ int i, r;
+
+ assert(tty);
+
+ tty = skip_dev_prefix(tty);
+
+ if (!startswith(tty, "tty") )
+ return -EINVAL;
+
+ if (tty[3] < '0' || tty[3] > '9')
+ return -EINVAL;
+
+ r = safe_atoi(tty+3, &i);
+ if (r < 0)
+ return r;
+
+ if (i < 0 || i > 63)
+ return -EINVAL;
+
+ return i;
+}
+
+ int resolve_dev_console(char **ret) {
+ _cleanup_free_ char *active = NULL;
+ char *tty;
+ int r;
+
+ assert(ret);
+
+ /* Resolve where /dev/console is pointing to, if /sys is actually ours (i.e. not read-only-mounted which is a
+ * sign for container setups) */
+
+ if (path_is_read_only_fs("/sys") > 0)
+ return -ENOMEDIUM;
+
+ r = read_one_line_file("/sys/class/tty/console/active", &active);
+ if (r < 0)
+ return r;
+
+ /* If multiple log outputs are configured the last one is what /dev/console points to */
+ tty = strrchr(active, ' ');
+ if (tty)
+ tty++;
+ else
+ tty = active;
+
+ if (streq(tty, "tty0")) {
+ active = mfree(active);
+
+ /* Get the active VC (e.g. tty1) */
+ r = read_one_line_file("/sys/class/tty/tty0/active", &active);
+ if (r < 0)
+ return r;
+
+ tty = active;
+ }
+
+ if (tty == active)
+ *ret = TAKE_PTR(active);
+ else {
+ char *tmp;
+
+ tmp = strdup(tty);
+ if (!tmp)
+ return -ENOMEM;
+
+ *ret = tmp;
+ }
+
+ return 0;
+}
+
+int get_kernel_consoles(char ***ret) {
+ _cleanup_strv_free_ char **l = NULL;
+ _cleanup_free_ char *line = NULL;
+ const char *p;
+ int r;
+
+ assert(ret);
+
+ /* If /sys is mounted read-only this means we are running in some kind of container environment. In that
+ * case /sys would reflect the host system, not us, hence ignore the data we can read from it. */
+ if (path_is_read_only_fs("/sys") > 0)
+ goto fallback;
+
+ r = read_one_line_file("/sys/class/tty/console/active", &line);
+ if (r < 0)
+ return r;
+
+ p = line;
+ for (;;) {
+ _cleanup_free_ char *tty = NULL, *path = NULL;
+
+ r = extract_first_word(&p, &tty, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (streq(tty, "tty0")) {
+ tty = mfree(tty);
+ r = read_one_line_file("/sys/class/tty/tty0/active", &tty);
+ if (r < 0)
+ return r;
+ }
+
+ path = path_join("/dev", tty);
+ if (!path)
+ return -ENOMEM;
+
+ if (access(path, F_OK) < 0) {
+ log_debug_errno(errno, "Console device %s is not accessible, skipping: %m", path);
+ continue;
+ }
+
+ r = strv_consume(&l, TAKE_PTR(path));
+ if (r < 0)
+ return r;
+ }
+
+ if (strv_isempty(l)) {
+ log_debug("No devices found for system console");
+ goto fallback;
+ }
+
+ *ret = TAKE_PTR(l);
+
+ return 0;
+
+fallback:
+ r = strv_extend(&l, "/dev/console");
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(l);
+
+ return 0;
+}
+
+bool tty_is_vc_resolve(const char *tty) {
+ _cleanup_free_ char *resolved = NULL;
+
+ assert(tty);
+
+ tty = skip_dev_prefix(tty);
+
+ if (streq(tty, "console")) {
+ if (resolve_dev_console(&resolved) < 0)
+ return false;
+
+ tty = resolved;
+ }
+
+ return tty_is_vc(tty);
+}
+
+const char *default_term_for_tty(const char *tty) {
+ return tty && tty_is_vc_resolve(tty) ? "linux" : "vt220";
+}
+
+int fd_columns(int fd) {
+ struct winsize ws = {};
+
+ if (fd < 0)
+ return -EBADF;
+
+ if (ioctl(fd, TIOCGWINSZ, &ws) < 0)
+ return -errno;
+
+ if (ws.ws_col <= 0)
+ return -EIO;
+
+ return ws.ws_col;
+}
+
+unsigned columns(void) {
+ const char *e;
+ int c;
+
+ if (cached_columns > 0)
+ return cached_columns;
+
+ c = 0;
+ e = getenv("COLUMNS");
+ if (e)
+ (void) safe_atoi(e, &c);
+
+ if (c <= 0 || c > USHRT_MAX) {
+ c = fd_columns(STDOUT_FILENO);
+ if (c <= 0)
+ c = 80;
+ }
+
+ cached_columns = c;
+ return cached_columns;
+}
+
+int fd_lines(int fd) {
+ struct winsize ws = {};
+
+ if (fd < 0)
+ return -EBADF;
+
+ if (ioctl(fd, TIOCGWINSZ, &ws) < 0)
+ return -errno;
+
+ if (ws.ws_row <= 0)
+ return -EIO;
+
+ return ws.ws_row;
+}
+
+unsigned lines(void) {
+ const char *e;
+ int l;
+
+ if (cached_lines > 0)
+ return cached_lines;
+
+ l = 0;
+ e = getenv("LINES");
+ if (e)
+ (void) safe_atoi(e, &l);
+
+ if (l <= 0 || l > USHRT_MAX) {
+ l = fd_lines(STDOUT_FILENO);
+ if (l <= 0)
+ l = 24;
+ }
+
+ cached_lines = l;
+ return cached_lines;
+}
+
+/* intended to be used as a SIGWINCH sighandler */
+void columns_lines_cache_reset(int signum) {
+ cached_columns = 0;
+ cached_lines = 0;
+}
+
+void reset_terminal_feature_caches(void) {
+ cached_columns = 0;
+ cached_lines = 0;
+
+ cached_colors_enabled = -1;
+ cached_underline_enabled = -1;
+ cached_on_tty = -1;
+}
+
+bool on_tty(void) {
+
+ /* We check both stdout and stderr, so that situations where pipes on the shell are used are reliably
+ * recognized, regardless if only the output or the errors are piped to some place. Since on_tty() is generally
+ * used to default to a safer, non-interactive, non-color mode of operation it's probably good to be defensive
+ * here, and check for both. Note that we don't check for STDIN_FILENO, because it should fine to use fancy
+ * terminal functionality when outputting stuff, even if the input is piped to us. */
+
+ if (cached_on_tty < 0)
+ cached_on_tty =
+ isatty(STDOUT_FILENO) > 0 &&
+ isatty(STDERR_FILENO) > 0;
+
+ return cached_on_tty;
+}
+
+int getttyname_malloc(int fd, char **ret) {
+ char path[PATH_MAX], *c; /* PATH_MAX is counted *with* the trailing NUL byte */
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ r = ttyname_r(fd, path, sizeof path); /* positive error */
+ assert(r >= 0);
+ if (r == ERANGE)
+ return -ENAMETOOLONG;
+ if (r > 0)
+ return -r;
+
+ c = strdup(skip_dev_prefix(path));
+ if (!c)
+ return -ENOMEM;
+
+ *ret = c;
+ return 0;
+}
+
+int getttyname_harder(int fd, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ r = getttyname_malloc(fd, &s);
+ if (r < 0)
+ return r;
+
+ if (streq(s, "tty"))
+ return get_ctty(0, NULL, ret);
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
+
+int get_ctty_devnr(pid_t pid, dev_t *d) {
+ int r;
+ _cleanup_free_ char *line = NULL;
+ const char *p;
+ unsigned long ttynr;
+
+ assert(pid >= 0);
+
+ p = procfs_file_alloca(pid, "stat");
+ r = read_one_line_file(p, &line);
+ if (r < 0)
+ return r;
+
+ p = strrchr(line, ')');
+ if (!p)
+ return -EIO;
+
+ p++;
+
+ if (sscanf(p, " "
+ "%*c " /* state */
+ "%*d " /* ppid */
+ "%*d " /* pgrp */
+ "%*d " /* session */
+ "%lu ", /* ttynr */
+ &ttynr) != 1)
+ return -EIO;
+
+ if (major(ttynr) == 0 && minor(ttynr) == 0)
+ return -ENXIO;
+
+ if (d)
+ *d = (dev_t) ttynr;
+
+ return 0;
+}
+
+int get_ctty(pid_t pid, dev_t *ret_devnr, char **ret) {
+ _cleanup_free_ char *fn = NULL, *b = NULL;
+ dev_t devnr;
+ int r;
+
+ r = get_ctty_devnr(pid, &devnr);
+ if (r < 0)
+ return r;
+
+ r = device_path_make_canonical(S_IFCHR, devnr, &fn);
+ if (r < 0) {
+ if (r != -ENOENT) /* No symlink for this in /dev/char/? */
+ return r;
+
+ if (major(devnr) == 136) {
+ /* This is an ugly hack: PTY devices are not listed in /dev/char/, as they don't follow the
+ * Linux device model. This means we have no nice way to match them up against their actual
+ * device node. Let's hence do the check by the fixed, assigned major number. Normally we try
+ * to avoid such fixed major/minor matches, but there appears to nother nice way to handle
+ * this. */
+
+ if (asprintf(&b, "pts/%u", minor(devnr)) < 0)
+ return -ENOMEM;
+ } else {
+ /* Probably something similar to the ptys which have no symlink in /dev/char/. Let's return
+ * something vaguely useful. */
+
+ r = device_path_make_major_minor(S_IFCHR, devnr, &fn);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (!b) {
+ const char *w;
+
+ w = path_startswith(fn, "/dev/");
+ if (w) {
+ b = strdup(w);
+ if (!b)
+ return -ENOMEM;
+ } else
+ b = TAKE_PTR(fn);
+ }
+
+ if (ret)
+ *ret = TAKE_PTR(b);
+
+ if (ret_devnr)
+ *ret_devnr = devnr;
+
+ return 0;
+}
+
+int ptsname_malloc(int fd, char **ret) {
+ size_t l = 100;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ for (;;) {
+ char *c;
+
+ c = new(char, l);
+ if (!c)
+ return -ENOMEM;
+
+ if (ptsname_r(fd, c, l) == 0) {
+ *ret = c;
+ return 0;
+ }
+ if (errno != ERANGE) {
+ free(c);
+ return -errno;
+ }
+
+ free(c);
+
+ if (l > SIZE_MAX / 2)
+ return -ENOMEM;
+
+ l *= 2;
+ }
+}
+
+int openpt_allocate(int flags, char **ret_slave) {
+ _cleanup_close_ int fd = -1;
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ fd = posix_openpt(flags|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (ret_slave) {
+ r = ptsname_malloc(fd, &p);
+ if (r < 0)
+ return r;
+
+ if (!path_startswith(p, "/dev/pts/"))
+ return -EINVAL;
+ }
+
+ if (unlockpt(fd) < 0)
+ return -errno;
+
+ if (ret_slave)
+ *ret_slave = TAKE_PTR(p);
+
+ return TAKE_FD(fd);
+}
+
+static int ptsname_namespace(int pty, char **ret) {
+ int no = -1, r;
+
+ /* Like ptsname(), but doesn't assume that the path is
+ * accessible in the local namespace. */
+
+ r = ioctl(pty, TIOCGPTN, &no);
+ if (r < 0)
+ return -errno;
+
+ if (no < 0)
+ return -EIO;
+
+ if (asprintf(ret, "/dev/pts/%i", no) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int openpt_allocate_in_namespace(pid_t pid, int flags, char **ret_slave) {
+ _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, usernsfd = -1, rootfd = -1, fd = -1;
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ pid_t child;
+ int r;
+
+ assert(pid > 0);
+
+ r = namespace_open(pid, &pidnsfd, &mntnsfd, NULL, &usernsfd, &rootfd);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
+ return -errno;
+
+ r = namespace_fork("(sd-openptns)", "(sd-openpt)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ pidnsfd, mntnsfd, -1, usernsfd, rootfd, &child);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ pair[0] = safe_close(pair[0]);
+
+ fd = openpt_allocate(flags, NULL);
+ if (fd < 0)
+ _exit(EXIT_FAILURE);
+
+ if (send_one_fd(pair[1], fd, 0) < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ r = wait_for_terminate_and_check("(sd-openptns)", child, 0);
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS)
+ return -EIO;
+
+ fd = receive_one_fd(pair[0], 0);
+ if (fd < 0)
+ return fd;
+
+ if (ret_slave) {
+ r = ptsname_namespace(fd, ret_slave);
+ if (r < 0)
+ return r;
+ }
+
+ return TAKE_FD(fd);
+}
+
+int open_terminal_in_namespace(pid_t pid, const char *name, int mode) {
+ _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, usernsfd = -1, rootfd = -1;
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ pid_t child;
+ int r;
+
+ r = namespace_open(pid, &pidnsfd, &mntnsfd, NULL, &usernsfd, &rootfd);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
+ return -errno;
+
+ r = namespace_fork("(sd-terminalns)", "(sd-terminal)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ pidnsfd, mntnsfd, -1, usernsfd, rootfd, &child);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ int master;
+
+ pair[0] = safe_close(pair[0]);
+
+ master = open_terminal(name, mode|O_NOCTTY|O_CLOEXEC);
+ if (master < 0)
+ _exit(EXIT_FAILURE);
+
+ if (send_one_fd(pair[1], master, 0) < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ r = wait_for_terminate_and_check("(sd-terminalns)", child, 0);
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS)
+ return -EIO;
+
+ return receive_one_fd(pair[0], 0);
+}
+
+static bool getenv_terminal_is_dumb(void) {
+ const char *e;
+
+ e = getenv("TERM");
+ if (!e)
+ return true;
+
+ return streq(e, "dumb");
+}
+
+bool terminal_is_dumb(void) {
+ if (!on_tty())
+ return true;
+
+ return getenv_terminal_is_dumb();
+}
+
+bool colors_enabled(void) {
+
+ /* Returns true if colors are considered supported on our stdout. For that we check $SYSTEMD_COLORS first
+ * (which is the explicit way to turn colors on/off). If that didn't work we turn colors off unless we are on a
+ * TTY. And if we are on a TTY we turn it off if $TERM is set to "dumb". There's one special tweak though: if
+ * we are PID 1 then we do not check whether we are connected to a TTY, because we don't keep /dev/console open
+ * continuously due to fear of SAK, and hence things are a bit weird. */
+
+ if (cached_colors_enabled < 0) {
+ int val;
+
+ val = getenv_bool("SYSTEMD_COLORS");
+ if (val >= 0)
+ cached_colors_enabled = val;
+
+ else if (getenv("NO_COLOR"))
+ /* We only check for the presence of the variable; value is ignored. */
+ cached_colors_enabled = false;
+
+ else if (getpid_cached() == 1)
+ /* PID1 outputs to the console without holding it open all the time */
+ cached_colors_enabled = !getenv_terminal_is_dumb();
+ else
+ cached_colors_enabled = !terminal_is_dumb();
+ }
+
+ return cached_colors_enabled;
+}
+
+bool dev_console_colors_enabled(void) {
+ _cleanup_free_ char *s = NULL;
+ int b;
+
+ /* Returns true if we assume that color is supported on /dev/console.
+ *
+ * For that we first check if we explicitly got told to use colors or not, by checking $SYSTEMD_COLORS. If that
+ * isn't set we check whether PID 1 has $TERM set, and if not, whether TERM is set on the kernel command
+ * line. If we find $TERM set we assume color if it's not set to "dumb", similarly to how regular
+ * colors_enabled() operates. */
+
+ b = getenv_bool("SYSTEMD_COLORS");
+ if (b >= 0)
+ return b;
+
+ if (getenv("NO_COLOR"))
+ return false;
+
+ if (getenv_for_pid(1, "TERM", &s) <= 0)
+ (void) proc_cmdline_get_key("TERM", 0, &s);
+
+ return !streq_ptr(s, "dumb");
+}
+
+bool underline_enabled(void) {
+
+ if (cached_underline_enabled < 0) {
+
+ /* The Linux console doesn't support underlining, turn it off, but only there. */
+
+ if (colors_enabled())
+ cached_underline_enabled = !streq_ptr(getenv("TERM"), "linux");
+ else
+ cached_underline_enabled = false;
+ }
+
+ return cached_underline_enabled;
+}
+
+int vt_default_utf8(void) {
+ _cleanup_free_ char *b = NULL;
+ int r;
+
+ /* Read the default VT UTF8 setting from the kernel */
+
+ r = read_one_line_file("/sys/module/vt/parameters/default_utf8", &b);
+ if (r < 0)
+ return r;
+
+ return parse_boolean(b);
+}
+
+int vt_reset_keyboard(int fd) {
+ int kb;
+
+ /* If we can't read the default, then default to unicode. It's 2017 after all. */
+ kb = vt_default_utf8() != 0 ? K_UNICODE : K_XLATE;
+
+ if (ioctl(fd, KDSKBMODE, kb) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int vt_restore(int fd) {
+ static const struct vt_mode mode = {
+ .mode = VT_AUTO,
+ };
+ int r, q = 0;
+
+ if (ioctl(fd, KDSETMODE, KD_TEXT) < 0)
+ q = log_debug_errno(errno, "Failed to set VT in text mode, ignoring: %m");
+
+ r = vt_reset_keyboard(fd);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to reset keyboard mode, ignoring: %m");
+ if (q >= 0)
+ q = r;
+ }
+
+ if (ioctl(fd, VT_SETMODE, &mode) < 0) {
+ log_debug_errno(errno, "Failed to set VT_AUTO mode, ignoring: %m");
+ if (q >= 0)
+ q = -errno;
+ }
+
+ r = fchmod_and_chown(fd, TTY_MODE, 0, (gid_t) -1);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to chmod()/chown() VT, ignoring: %m");
+ if (q >= 0)
+ q = r;
+ }
+
+ return q;
+}
+
+int vt_release(int fd, bool restore) {
+ assert(fd >= 0);
+
+ /* This function releases the VT by acknowledging the VT-switch signal
+ * sent by the kernel and optionally reset the VT in text and auto
+ * VT-switching modes. */
+
+ if (ioctl(fd, VT_RELDISP, 1) < 0)
+ return -errno;
+
+ if (restore)
+ return vt_restore(fd);
+
+ return 0;
+}
+
+void get_log_colors(int priority, const char **on, const char **off, const char **highlight) {
+ /* Note that this will initialize output variables only when there's something to output.
+ * The caller must pre-initialize to "" or NULL as appropriate. */
+
+ if (priority <= LOG_ERR) {
+ if (on)
+ *on = ANSI_HIGHLIGHT_RED;
+ if (off)
+ *off = ANSI_NORMAL;
+ if (highlight)
+ *highlight = ANSI_HIGHLIGHT;
+
+ } else if (priority <= LOG_WARNING) {
+ if (on)
+ *on = ANSI_HIGHLIGHT_YELLOW;
+ if (off)
+ *off = ANSI_NORMAL;
+ if (highlight)
+ *highlight = ANSI_HIGHLIGHT;
+
+ } else if (priority <= LOG_NOTICE) {
+ if (on)
+ *on = ANSI_HIGHLIGHT;
+ if (off)
+ *off = ANSI_NORMAL;
+ if (highlight)
+ *highlight = ANSI_HIGHLIGHT_RED;
+
+ } else if (priority >= LOG_DEBUG) {
+ if (on)
+ *on = ANSI_GREY;
+ if (off)
+ *off = ANSI_NORMAL;
+ if (highlight)
+ *highlight = ANSI_HIGHLIGHT_RED;
+ }
+}
diff --git a/src/basic/terminal-util.h b/src/basic/terminal-util.h
new file mode 100644
index 0000000..5cb1e13
--- /dev/null
+++ b/src/basic/terminal-util.h
@@ -0,0 +1,210 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <syslog.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "time-util.h"
+
+/* Regular colors */
+#define ANSI_BLACK "\x1B[0;30m" /* Some type of grey usually. */
+#define ANSI_RED "\x1B[0;31m"
+#define ANSI_GREEN "\x1B[0;32m"
+#define ANSI_YELLOW "\x1B[0;33m"
+#define ANSI_BLUE "\x1B[0;34m"
+#define ANSI_MAGENTA "\x1B[0;35m"
+#define ANSI_CYAN "\x1B[0;36m"
+#define ANSI_WHITE "\x1B[0;37m" /* This is actually rendered as light grey, legible even on a white
+ * background. See ANSI_HIGHLIGHT_WHITE for real white. */
+
+#define ANSI_BRIGHT_BLACK "\x1B[0;90m"
+#define ANSI_BRIGHT_RED "\x1B[0;91m"
+#define ANSI_BRIGHT_GREEN "\x1B[0;92m"
+#define ANSI_BRIGHT_YELLOW "\x1B[0;93m"
+#define ANSI_BRIGHT_BLUE "\x1B[0;94m"
+#define ANSI_BRIGHT_MAGENTA "\x1B[0;95m"
+#define ANSI_BRIGHT_CYAN "\x1B[0;96m"
+#define ANSI_BRIGHT_WHITE "\x1B[0;97m"
+
+#define ANSI_GREY "\x1B[0;38;5;245m"
+
+/* Bold/highlighted */
+#define ANSI_HIGHLIGHT_BLACK "\x1B[0;1;30m"
+#define ANSI_HIGHLIGHT_RED "\x1B[0;1;31m"
+#define ANSI_HIGHLIGHT_GREEN "\x1B[0;1;32m"
+#define _ANSI_HIGHLIGHT_YELLOW "\x1B[0;1;33m" /* This yellow is currently not displayed well by some terminals */
+#define ANSI_HIGHLIGHT_BLUE "\x1B[0;1;34m"
+#define ANSI_HIGHLIGHT_MAGENTA "\x1B[0;1;35m"
+#define ANSI_HIGHLIGHT_CYAN "\x1B[0;1;36m"
+#define ANSI_HIGHLIGHT_WHITE "\x1B[0;1;37m"
+#define ANSI_HIGHLIGHT_YELLOW4 "\x1B[0;1;38;5;100m"
+#define ANSI_HIGHLIGHT_KHAKI3 "\x1B[0;1;38;5;185m"
+#define ANSI_HIGHLIGHT_GREY "\x1B[0;1;38;5;245m"
+
+#define ANSI_HIGHLIGHT_YELLOW ANSI_HIGHLIGHT_KHAKI3 /* Replacement yellow that is more legible */
+
+/* Underlined */
+#define ANSI_GREY_UNDERLINE "\x1B[0;4;38;5;245m"
+#define ANSI_HIGHLIGHT_RED_UNDERLINE "\x1B[0;1;4;31m"
+#define ANSI_HIGHLIGHT_GREEN_UNDERLINE "\x1B[0;1;4;32m"
+#define ANSI_HIGHLIGHT_YELLOW_UNDERLINE "\x1B[0;1;4;38;5;185m"
+#define ANSI_HIGHLIGHT_BLUE_UNDERLINE "\x1B[0;1;4;34m"
+#define ANSI_HIGHLIGHT_MAGENTA_UNDERLINE "\x1B[0;1;4;35m"
+#define ANSI_HIGHLIGHT_GREY_UNDERLINE "\x1B[0;1;4;38;5;245m"
+
+/* Other ANSI codes */
+#define ANSI_UNDERLINE "\x1B[0;4m"
+#define ANSI_HIGHLIGHT "\x1B[0;1;39m"
+#define ANSI_HIGHLIGHT_UNDERLINE "\x1B[0;1;4m"
+
+/* Reset/clear ANSI styles */
+#define ANSI_NORMAL "\x1B[0m"
+
+/* Erase characters until the end of the line */
+#define ANSI_ERASE_TO_END_OF_LINE "\x1B[K"
+
+/* Move cursor up one line */
+#define ANSI_REVERSE_LINEFEED "\x1BM"
+
+/* Set cursor to top left corner and clear screen */
+#define ANSI_HOME_CLEAR "\x1B[H\x1B[2J"
+
+int reset_terminal_fd(int fd, bool switch_to_text);
+int reset_terminal(const char *name);
+
+int open_terminal(const char *name, int mode);
+
+/* Flags for tweaking the way we become the controlling process of a terminal. */
+typedef enum AcquireTerminalFlags {
+ /* Try to become the controlling process of the TTY. If we can't return -EPERM. */
+ ACQUIRE_TERMINAL_TRY = 0,
+
+ /* Tell the kernel to forcibly make us the controlling process of the TTY. Returns -EPERM if the kernel doesn't allow that. */
+ ACQUIRE_TERMINAL_FORCE = 1,
+
+ /* If we can't become the controlling process of the TTY right-away, then wait until we can. */
+ ACQUIRE_TERMINAL_WAIT = 2,
+
+ /* Pick one of the above, and then OR this flag in, in order to request permissive behaviour, if we can't become controlling process then don't mind */
+ ACQUIRE_TERMINAL_PERMISSIVE = 1 << 2,
+} AcquireTerminalFlags;
+
+int acquire_terminal(const char *name, AcquireTerminalFlags flags, usec_t timeout);
+int release_terminal(void);
+
+int terminal_vhangup_fd(int fd);
+int terminal_vhangup(const char *name);
+
+int chvt(int vt);
+
+int read_one_char(FILE *f, char *ret, usec_t timeout, bool *need_nl);
+int ask_char(char *ret, const char *replies, const char *text, ...) _printf_(3, 4);
+int ask_string(char **ret, const char *text, ...) _printf_(2, 3);
+
+int vt_disallocate(const char *name);
+
+int resolve_dev_console(char **ret);
+int get_kernel_consoles(char ***ret);
+bool tty_is_vc(const char *tty);
+bool tty_is_vc_resolve(const char *tty);
+bool tty_is_console(const char *tty) _pure_;
+int vtnr_from_tty(const char *tty);
+const char *default_term_for_tty(const char *tty);
+
+int make_console_stdio(void);
+
+int fd_columns(int fd);
+unsigned columns(void);
+int fd_lines(int fd);
+unsigned lines(void);
+
+void columns_lines_cache_reset(int _unused_ signum);
+void reset_terminal_feature_caches(void);
+
+bool on_tty(void);
+bool terminal_is_dumb(void);
+bool colors_enabled(void);
+bool underline_enabled(void);
+bool dev_console_colors_enabled(void);
+
+#define DEFINE_ANSI_FUNC(name, NAME) \
+ static inline const char *ansi_##name(void) { \
+ return colors_enabled() ? ANSI_##NAME : ""; \
+ }
+
+#define DEFINE_ANSI_FUNC_UNDERLINE(name, NAME, REPLACEMENT) \
+ static inline const char *ansi_##name(void) { \
+ return underline_enabled() ? ANSI_##NAME : \
+ colors_enabled() ? ANSI_##REPLACEMENT : ""; \
+ }
+
+DEFINE_ANSI_FUNC(normal, NORMAL);
+DEFINE_ANSI_FUNC(highlight, HIGHLIGHT);
+DEFINE_ANSI_FUNC(black, BLACK);
+DEFINE_ANSI_FUNC(red, RED);
+DEFINE_ANSI_FUNC(green, GREEN);
+DEFINE_ANSI_FUNC(yellow, YELLOW);
+DEFINE_ANSI_FUNC(blue, BLUE);
+DEFINE_ANSI_FUNC(magenta, MAGENTA);
+DEFINE_ANSI_FUNC(cyan, CYAN);
+DEFINE_ANSI_FUNC(white, WHITE);
+DEFINE_ANSI_FUNC(grey, GREY);
+
+DEFINE_ANSI_FUNC(bright_black, BRIGHT_BLACK);
+DEFINE_ANSI_FUNC(bright_red, BRIGHT_RED);
+DEFINE_ANSI_FUNC(bright_green, BRIGHT_GREEN);
+DEFINE_ANSI_FUNC(bright_yellow, BRIGHT_YELLOW);
+DEFINE_ANSI_FUNC(bright_blue, BRIGHT_BLUE);
+DEFINE_ANSI_FUNC(bright_magenta, BRIGHT_MAGENTA);
+DEFINE_ANSI_FUNC(bright_cyan, BRIGHT_CYAN);
+DEFINE_ANSI_FUNC(bright_white, BRIGHT_WHITE);
+
+DEFINE_ANSI_FUNC(highlight_black, HIGHLIGHT_BLACK);
+DEFINE_ANSI_FUNC(highlight_red, HIGHLIGHT_RED);
+DEFINE_ANSI_FUNC(highlight_green, HIGHLIGHT_GREEN);
+DEFINE_ANSI_FUNC(highlight_yellow, HIGHLIGHT_YELLOW);
+DEFINE_ANSI_FUNC(highlight_blue, HIGHLIGHT_BLUE);
+DEFINE_ANSI_FUNC(highlight_magenta, HIGHLIGHT_MAGENTA);
+DEFINE_ANSI_FUNC(highlight_cyan, HIGHLIGHT_CYAN);
+DEFINE_ANSI_FUNC(highlight_grey, HIGHLIGHT_GREY);
+DEFINE_ANSI_FUNC(highlight_white, HIGHLIGHT_WHITE);
+
+static inline const char* _ansi_highlight_yellow(void) {
+ return colors_enabled() ? _ANSI_HIGHLIGHT_YELLOW : "";
+}
+
+DEFINE_ANSI_FUNC_UNDERLINE(underline, UNDERLINE, NORMAL);
+DEFINE_ANSI_FUNC_UNDERLINE(highlight_underline, HIGHLIGHT_UNDERLINE, HIGHLIGHT);
+DEFINE_ANSI_FUNC_UNDERLINE(grey_underline, GREY_UNDERLINE, GREY);
+DEFINE_ANSI_FUNC_UNDERLINE(highlight_red_underline, HIGHLIGHT_RED_UNDERLINE, HIGHLIGHT_RED);
+DEFINE_ANSI_FUNC_UNDERLINE(highlight_green_underline, HIGHLIGHT_GREEN_UNDERLINE, HIGHLIGHT_GREEN);
+DEFINE_ANSI_FUNC_UNDERLINE(highlight_yellow_underline, HIGHLIGHT_YELLOW_UNDERLINE, HIGHLIGHT_YELLOW);
+DEFINE_ANSI_FUNC_UNDERLINE(highlight_blue_underline, HIGHLIGHT_BLUE_UNDERLINE, HIGHLIGHT_BLUE);
+DEFINE_ANSI_FUNC_UNDERLINE(highlight_magenta_underline, HIGHLIGHT_MAGENTA_UNDERLINE, HIGHLIGHT_MAGENTA);
+DEFINE_ANSI_FUNC_UNDERLINE(highlight_grey_underline, HIGHLIGHT_GREY_UNDERLINE, HIGHLIGHT_GREY);
+
+int get_ctty_devnr(pid_t pid, dev_t *d);
+int get_ctty(pid_t, dev_t *_devnr, char **r);
+
+int getttyname_malloc(int fd, char **r);
+int getttyname_harder(int fd, char **r);
+
+int ptsname_malloc(int fd, char **ret);
+
+int openpt_allocate(int flags, char **ret_slave);
+int openpt_allocate_in_namespace(pid_t pid, int flags, char **ret_slave);
+int open_terminal_in_namespace(pid_t pid, const char *name, int mode);
+
+int vt_default_utf8(void);
+int vt_reset_keyboard(int fd);
+int vt_restore(int fd);
+int vt_release(int fd, bool restore_vt);
+
+void get_log_colors(int priority, const char **on, const char **off, const char **highlight);
+
+/* This assumes there is a 'tty' group */
+#define TTY_MODE 0620
diff --git a/src/basic/time-util.c b/src/basic/time-util.c
new file mode 100644
index 0000000..5318d63
--- /dev/null
+++ b/src/basic/time-util.c
@@ -0,0 +1,1612 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <sys/timerfd.h>
+#include <sys/timex.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing_timerfd.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+static clockid_t map_clock_id(clockid_t c) {
+
+ /* Some more exotic archs (s390, ppc, …) lack the "ALARM" flavour of the clocks. Thus, clock_gettime() will
+ * fail for them. Since they are essentially the same as their non-ALARM pendants (their only difference is
+ * when timers are set on them), let's just map them accordingly. This way, we can get the correct time even on
+ * those archs. */
+
+ switch (c) {
+
+ case CLOCK_BOOTTIME_ALARM:
+ return CLOCK_BOOTTIME;
+
+ case CLOCK_REALTIME_ALARM:
+ return CLOCK_REALTIME;
+
+ default:
+ return c;
+ }
+}
+
+usec_t now(clockid_t clock_id) {
+ struct timespec ts;
+
+ assert_se(clock_gettime(map_clock_id(clock_id), &ts) == 0);
+
+ return timespec_load(&ts);
+}
+
+nsec_t now_nsec(clockid_t clock_id) {
+ struct timespec ts;
+
+ assert_se(clock_gettime(map_clock_id(clock_id), &ts) == 0);
+
+ return timespec_load_nsec(&ts);
+}
+
+dual_timestamp* dual_timestamp_get(dual_timestamp *ts) {
+ assert(ts);
+
+ ts->realtime = now(CLOCK_REALTIME);
+ ts->monotonic = now(CLOCK_MONOTONIC);
+
+ return ts;
+}
+
+triple_timestamp* triple_timestamp_get(triple_timestamp *ts) {
+ assert(ts);
+
+ ts->realtime = now(CLOCK_REALTIME);
+ ts->monotonic = now(CLOCK_MONOTONIC);
+ ts->boottime = clock_boottime_supported() ? now(CLOCK_BOOTTIME) : USEC_INFINITY;
+
+ return ts;
+}
+
+static usec_t map_clock_usec_internal(usec_t from, usec_t from_base, usec_t to_base) {
+
+ /* Maps the time 'from' between two clocks, based on a common reference point where the first clock
+ * is at 'from_base' and the second clock at 'to_base'. Basically calculates:
+ *
+ * from - from_base + to_base
+ *
+ * But takes care of overflows/underflows and avoids signed operations. */
+
+ if (from >= from_base) { /* In the future */
+ usec_t delta = from - from_base;
+
+ if (to_base >= USEC_INFINITY - delta) /* overflow? */
+ return USEC_INFINITY;
+
+ return to_base + delta;
+
+ } else { /* In the past */
+ usec_t delta = from_base - from;
+
+ if (to_base <= delta) /* underflow? */
+ return 0;
+
+ return to_base - delta;
+ }
+}
+
+usec_t map_clock_usec(usec_t from, clockid_t from_clock, clockid_t to_clock) {
+
+ /* Try to avoid any inaccuracy needlessly added in case we convert from effectively the same clock
+ * onto itself */
+ if (map_clock_id(from_clock) == map_clock_id(to_clock))
+ return from;
+
+ /* Keep infinity as is */
+ if (from == USEC_INFINITY)
+ return from;
+
+ return map_clock_usec_internal(from, now(from_clock), now(to_clock));
+}
+
+dual_timestamp* dual_timestamp_from_realtime(dual_timestamp *ts, usec_t u) {
+ assert(ts);
+
+ if (u == USEC_INFINITY || u == 0) {
+ ts->realtime = ts->monotonic = u;
+ return ts;
+ }
+
+ ts->realtime = u;
+ ts->monotonic = map_clock_usec(u, CLOCK_REALTIME, CLOCK_MONOTONIC);
+ return ts;
+}
+
+triple_timestamp* triple_timestamp_from_realtime(triple_timestamp *ts, usec_t u) {
+ usec_t nowr;
+
+ assert(ts);
+
+ if (u == USEC_INFINITY || u == 0) {
+ ts->realtime = ts->monotonic = ts->boottime = u;
+ return ts;
+ }
+
+ nowr = now(CLOCK_REALTIME);
+
+ ts->realtime = u;
+ ts->monotonic = map_clock_usec_internal(u, nowr, now(CLOCK_MONOTONIC));
+ ts->boottime = clock_boottime_supported() ?
+ map_clock_usec_internal(u, nowr, now(CLOCK_BOOTTIME)) :
+ USEC_INFINITY;
+
+ return ts;
+}
+
+dual_timestamp* dual_timestamp_from_monotonic(dual_timestamp *ts, usec_t u) {
+ assert(ts);
+
+ if (u == USEC_INFINITY) {
+ ts->realtime = ts->monotonic = USEC_INFINITY;
+ return ts;
+ }
+
+ ts->monotonic = u;
+ ts->realtime = map_clock_usec(u, CLOCK_MONOTONIC, CLOCK_REALTIME);
+ return ts;
+}
+
+dual_timestamp* dual_timestamp_from_boottime_or_monotonic(dual_timestamp *ts, usec_t u) {
+ clockid_t cid;
+ usec_t nowm;
+
+ if (u == USEC_INFINITY) {
+ ts->realtime = ts->monotonic = USEC_INFINITY;
+ return ts;
+ }
+
+ cid = clock_boottime_or_monotonic();
+ nowm = now(cid);
+
+ if (cid == CLOCK_MONOTONIC)
+ ts->monotonic = u;
+ else
+ ts->monotonic = map_clock_usec_internal(u, nowm, now(CLOCK_MONOTONIC));
+
+ ts->realtime = map_clock_usec_internal(u, nowm, now(CLOCK_REALTIME));
+ return ts;
+}
+
+usec_t triple_timestamp_by_clock(triple_timestamp *ts, clockid_t clock) {
+
+ switch (clock) {
+
+ case CLOCK_REALTIME:
+ case CLOCK_REALTIME_ALARM:
+ return ts->realtime;
+
+ case CLOCK_MONOTONIC:
+ return ts->monotonic;
+
+ case CLOCK_BOOTTIME:
+ case CLOCK_BOOTTIME_ALARM:
+ return ts->boottime;
+
+ default:
+ return USEC_INFINITY;
+ }
+}
+
+usec_t timespec_load(const struct timespec *ts) {
+ assert(ts);
+
+ if (ts->tv_sec < 0 || ts->tv_nsec < 0)
+ return USEC_INFINITY;
+
+ if ((usec_t) ts->tv_sec > (UINT64_MAX - (ts->tv_nsec / NSEC_PER_USEC)) / USEC_PER_SEC)
+ return USEC_INFINITY;
+
+ return
+ (usec_t) ts->tv_sec * USEC_PER_SEC +
+ (usec_t) ts->tv_nsec / NSEC_PER_USEC;
+}
+
+nsec_t timespec_load_nsec(const struct timespec *ts) {
+ assert(ts);
+
+ if (ts->tv_sec < 0 || ts->tv_nsec < 0)
+ return NSEC_INFINITY;
+
+ if ((nsec_t) ts->tv_sec >= (UINT64_MAX - ts->tv_nsec) / NSEC_PER_SEC)
+ return NSEC_INFINITY;
+
+ return (nsec_t) ts->tv_sec * NSEC_PER_SEC + (nsec_t) ts->tv_nsec;
+}
+
+struct timespec *timespec_store(struct timespec *ts, usec_t u) {
+ assert(ts);
+
+ if (u == USEC_INFINITY ||
+ u / USEC_PER_SEC >= TIME_T_MAX) {
+ ts->tv_sec = (time_t) -1;
+ ts->tv_nsec = -1L;
+ return ts;
+ }
+
+ ts->tv_sec = (time_t) (u / USEC_PER_SEC);
+ ts->tv_nsec = (long) ((u % USEC_PER_SEC) * NSEC_PER_USEC);
+
+ return ts;
+}
+
+struct timespec *timespec_store_nsec(struct timespec *ts, nsec_t n) {
+ assert(ts);
+
+ if (n == NSEC_INFINITY ||
+ n / NSEC_PER_SEC >= TIME_T_MAX) {
+ ts->tv_sec = (time_t) -1;
+ ts->tv_nsec = -1L;
+ return ts;
+ }
+
+ ts->tv_sec = (time_t) (n / NSEC_PER_SEC);
+ ts->tv_nsec = (long) (n % NSEC_PER_SEC);
+
+ return ts;
+}
+
+usec_t timeval_load(const struct timeval *tv) {
+ assert(tv);
+
+ if (tv->tv_sec < 0 || tv->tv_usec < 0)
+ return USEC_INFINITY;
+
+ if ((usec_t) tv->tv_sec > (UINT64_MAX - tv->tv_usec) / USEC_PER_SEC)
+ return USEC_INFINITY;
+
+ return
+ (usec_t) tv->tv_sec * USEC_PER_SEC +
+ (usec_t) tv->tv_usec;
+}
+
+struct timeval *timeval_store(struct timeval *tv, usec_t u) {
+ assert(tv);
+
+ if (u == USEC_INFINITY ||
+ u / USEC_PER_SEC > TIME_T_MAX) {
+ tv->tv_sec = (time_t) -1;
+ tv->tv_usec = (suseconds_t) -1;
+ } else {
+ tv->tv_sec = (time_t) (u / USEC_PER_SEC);
+ tv->tv_usec = (suseconds_t) (u % USEC_PER_SEC);
+ }
+
+ return tv;
+}
+
+char *format_timestamp_style(
+ char *buf,
+ size_t l,
+ usec_t t,
+ TimestampStyle style) {
+
+ /* The weekdays in non-localized (English) form. We use this instead of the localized form, so that our
+ * generated timestamps may be parsed with parse_timestamp(), and always read the same. */
+ static const char * const weekdays[] = {
+ [0] = "Sun",
+ [1] = "Mon",
+ [2] = "Tue",
+ [3] = "Wed",
+ [4] = "Thu",
+ [5] = "Fri",
+ [6] = "Sat",
+ };
+
+ struct tm tm;
+ time_t sec;
+ size_t n;
+ bool utc = false, us = false;
+
+ assert(buf);
+
+ switch (style) {
+ case TIMESTAMP_PRETTY:
+ break;
+ case TIMESTAMP_US:
+ us = true;
+ break;
+ case TIMESTAMP_UTC:
+ utc = true;
+ break;
+ case TIMESTAMP_US_UTC:
+ us = true;
+ utc = true;
+ break;
+ default:
+ return NULL;
+ }
+
+ if (l < (size_t) (3 + /* week day */
+ 1 + 10 + /* space and date */
+ 1 + 8 + /* space and time */
+ (us ? 1 + 6 : 0) + /* "." and microsecond part */
+ 1 + 1 + /* space and shortest possible zone */
+ 1))
+ return NULL; /* Not enough space even for the shortest form. */
+ if (t <= 0 || t == USEC_INFINITY)
+ return NULL; /* Timestamp is unset */
+
+ /* Let's not format times with years > 9999 */
+ if (t > USEC_TIMESTAMP_FORMATTABLE_MAX) {
+ assert(l >= STRLEN("--- XXXX-XX-XX XX:XX:XX") + 1);
+ strcpy(buf, "--- XXXX-XX-XX XX:XX:XX");
+ return buf;
+ }
+
+ sec = (time_t) (t / USEC_PER_SEC); /* Round down */
+
+ if (!localtime_or_gmtime_r(&sec, &tm, utc))
+ return NULL;
+
+ /* Start with the week day */
+ assert((size_t) tm.tm_wday < ELEMENTSOF(weekdays));
+ memcpy(buf, weekdays[tm.tm_wday], 4);
+
+ /* Add the main components */
+ if (strftime(buf + 3, l - 3, " %Y-%m-%d %H:%M:%S", &tm) <= 0)
+ return NULL; /* Doesn't fit */
+
+ /* Append the microseconds part, if that's requested */
+ if (us) {
+ n = strlen(buf);
+ if (n + 8 > l)
+ return NULL; /* Microseconds part doesn't fit. */
+
+ sprintf(buf + n, ".%06"PRI_USEC, t % USEC_PER_SEC);
+ }
+
+ /* Append the timezone */
+ n = strlen(buf);
+ if (utc) {
+ /* If this is UTC then let's explicitly use the "UTC" string here, because gmtime_r() normally uses the
+ * obsolete "GMT" instead. */
+ if (n + 5 > l)
+ return NULL; /* "UTC" doesn't fit. */
+
+ strcpy(buf + n, " UTC");
+
+ } else if (!isempty(tm.tm_zone)) {
+ size_t tn;
+
+ /* An explicit timezone is specified, let's use it, if it fits */
+ tn = strlen(tm.tm_zone);
+ if (n + 1 + tn + 1 > l) {
+ /* The full time zone does not fit in. Yuck. */
+
+ if (n + 1 + _POSIX_TZNAME_MAX + 1 > l)
+ return NULL; /* Not even enough space for the POSIX minimum (of 6)? In that case, complain that it doesn't fit */
+
+ /* So the time zone doesn't fit in fully, but the caller passed enough space for the POSIX
+ * minimum time zone length. In this case suppress the timezone entirely, in order not to dump
+ * an overly long, hard to read string on the user. This should be safe, because the user will
+ * assume the local timezone anyway if none is shown. And so does parse_timestamp(). */
+ } else {
+ buf[n++] = ' ';
+ strcpy(buf + n, tm.tm_zone);
+ }
+ }
+
+ return buf;
+}
+
+char *format_timestamp_relative(char *buf, size_t l, usec_t t) {
+ const char *s;
+ usec_t n, d;
+
+ if (t <= 0 || t == USEC_INFINITY)
+ return NULL;
+
+ n = now(CLOCK_REALTIME);
+ if (n > t) {
+ d = n - t;
+ s = "ago";
+ } else {
+ d = t - n;
+ s = "left";
+ }
+
+ if (d >= USEC_PER_YEAR)
+ snprintf(buf, l, USEC_FMT " years " USEC_FMT " months %s",
+ d / USEC_PER_YEAR,
+ (d % USEC_PER_YEAR) / USEC_PER_MONTH, s);
+ else if (d >= USEC_PER_MONTH)
+ snprintf(buf, l, USEC_FMT " months " USEC_FMT " days %s",
+ d / USEC_PER_MONTH,
+ (d % USEC_PER_MONTH) / USEC_PER_DAY, s);
+ else if (d >= USEC_PER_WEEK)
+ snprintf(buf, l, USEC_FMT " weeks " USEC_FMT " days %s",
+ d / USEC_PER_WEEK,
+ (d % USEC_PER_WEEK) / USEC_PER_DAY, s);
+ else if (d >= 2*USEC_PER_DAY)
+ snprintf(buf, l, USEC_FMT " days %s", d / USEC_PER_DAY, s);
+ else if (d >= 25*USEC_PER_HOUR)
+ snprintf(buf, l, "1 day " USEC_FMT "h %s",
+ (d - USEC_PER_DAY) / USEC_PER_HOUR, s);
+ else if (d >= 6*USEC_PER_HOUR)
+ snprintf(buf, l, USEC_FMT "h %s",
+ d / USEC_PER_HOUR, s);
+ else if (d >= USEC_PER_HOUR)
+ snprintf(buf, l, USEC_FMT "h " USEC_FMT "min %s",
+ d / USEC_PER_HOUR,
+ (d % USEC_PER_HOUR) / USEC_PER_MINUTE, s);
+ else if (d >= 5*USEC_PER_MINUTE)
+ snprintf(buf, l, USEC_FMT "min %s",
+ d / USEC_PER_MINUTE, s);
+ else if (d >= USEC_PER_MINUTE)
+ snprintf(buf, l, USEC_FMT "min " USEC_FMT "s %s",
+ d / USEC_PER_MINUTE,
+ (d % USEC_PER_MINUTE) / USEC_PER_SEC, s);
+ else if (d >= USEC_PER_SEC)
+ snprintf(buf, l, USEC_FMT "s %s",
+ d / USEC_PER_SEC, s);
+ else if (d >= USEC_PER_MSEC)
+ snprintf(buf, l, USEC_FMT "ms %s",
+ d / USEC_PER_MSEC, s);
+ else if (d > 0)
+ snprintf(buf, l, USEC_FMT"us %s",
+ d, s);
+ else
+ snprintf(buf, l, "now");
+
+ buf[l-1] = 0;
+ return buf;
+}
+
+char *format_timespan(char *buf, size_t l, usec_t t, usec_t accuracy) {
+ static const struct {
+ const char *suffix;
+ usec_t usec;
+ } table[] = {
+ { "y", USEC_PER_YEAR },
+ { "month", USEC_PER_MONTH },
+ { "w", USEC_PER_WEEK },
+ { "d", USEC_PER_DAY },
+ { "h", USEC_PER_HOUR },
+ { "min", USEC_PER_MINUTE },
+ { "s", USEC_PER_SEC },
+ { "ms", USEC_PER_MSEC },
+ { "us", 1 },
+ };
+
+ size_t i;
+ char *p = buf;
+ bool something = false;
+
+ assert(buf);
+ assert(l > 0);
+
+ if (t == USEC_INFINITY) {
+ strncpy(p, "infinity", l-1);
+ p[l-1] = 0;
+ return p;
+ }
+
+ if (t <= 0) {
+ strncpy(p, "0", l-1);
+ p[l-1] = 0;
+ return p;
+ }
+
+ /* The result of this function can be parsed with parse_sec */
+
+ for (i = 0; i < ELEMENTSOF(table); i++) {
+ int k = 0;
+ size_t n;
+ bool done = false;
+ usec_t a, b;
+
+ if (t <= 0)
+ break;
+
+ if (t < accuracy && something)
+ break;
+
+ if (t < table[i].usec)
+ continue;
+
+ if (l <= 1)
+ break;
+
+ a = t / table[i].usec;
+ b = t % table[i].usec;
+
+ /* Let's see if we should shows this in dot notation */
+ if (t < USEC_PER_MINUTE && b > 0) {
+ usec_t cc;
+ signed char j;
+
+ j = 0;
+ for (cc = table[i].usec; cc > 1; cc /= 10)
+ j++;
+
+ for (cc = accuracy; cc > 1; cc /= 10) {
+ b /= 10;
+ j--;
+ }
+
+ if (j > 0) {
+ k = snprintf(p, l,
+ "%s"USEC_FMT".%0*"PRI_USEC"%s",
+ p > buf ? " " : "",
+ a,
+ j,
+ b,
+ table[i].suffix);
+
+ t = 0;
+ done = true;
+ }
+ }
+
+ /* No? Then let's show it normally */
+ if (!done) {
+ k = snprintf(p, l,
+ "%s"USEC_FMT"%s",
+ p > buf ? " " : "",
+ a,
+ table[i].suffix);
+
+ t = b;
+ }
+
+ n = MIN((size_t) k, l);
+
+ l -= n;
+ p += n;
+
+ something = true;
+ }
+
+ *p = 0;
+
+ return buf;
+}
+
+static int parse_timestamp_impl(const char *t, usec_t *usec, bool with_tz) {
+ static const struct {
+ const char *name;
+ const int nr;
+ } day_nr[] = {
+ { "Sunday", 0 },
+ { "Sun", 0 },
+ { "Monday", 1 },
+ { "Mon", 1 },
+ { "Tuesday", 2 },
+ { "Tue", 2 },
+ { "Wednesday", 3 },
+ { "Wed", 3 },
+ { "Thursday", 4 },
+ { "Thu", 4 },
+ { "Friday", 5 },
+ { "Fri", 5 },
+ { "Saturday", 6 },
+ { "Sat", 6 },
+ };
+
+ const char *k, *utc = NULL, *tzn = NULL;
+ struct tm tm, copy;
+ time_t x;
+ usec_t x_usec, plus = 0, minus = 0, ret;
+ int r, weekday = -1, dst = -1;
+ size_t i;
+
+ /* Allowed syntaxes:
+ *
+ * 2012-09-22 16:34:22
+ * 2012-09-22 16:34 (seconds will be set to 0)
+ * 2012-09-22 (time will be set to 00:00:00)
+ * 16:34:22 (date will be set to today)
+ * 16:34 (date will be set to today, seconds to 0)
+ * now
+ * yesterday (time is set to 00:00:00)
+ * today (time is set to 00:00:00)
+ * tomorrow (time is set to 00:00:00)
+ * +5min
+ * -5days
+ * @2147483647 (seconds since epoch)
+ */
+
+ assert(t);
+
+ if (t[0] == '@' && !with_tz)
+ return parse_sec(t + 1, usec);
+
+ ret = now(CLOCK_REALTIME);
+
+ if (!with_tz) {
+ if (streq(t, "now"))
+ goto finish;
+
+ else if (t[0] == '+') {
+ r = parse_sec(t+1, &plus);
+ if (r < 0)
+ return r;
+
+ goto finish;
+
+ } else if (t[0] == '-') {
+ r = parse_sec(t+1, &minus);
+ if (r < 0)
+ return r;
+
+ goto finish;
+
+ } else if ((k = endswith(t, " ago"))) {
+ t = strndupa(t, k - t);
+
+ r = parse_sec(t, &minus);
+ if (r < 0)
+ return r;
+
+ goto finish;
+
+ } else if ((k = endswith(t, " left"))) {
+ t = strndupa(t, k - t);
+
+ r = parse_sec(t, &plus);
+ if (r < 0)
+ return r;
+
+ goto finish;
+ }
+
+ /* See if the timestamp is suffixed with UTC */
+ utc = endswith_no_case(t, " UTC");
+ if (utc)
+ t = strndupa(t, utc - t);
+ else {
+ const char *e = NULL;
+ int j;
+
+ tzset();
+
+ /* See if the timestamp is suffixed by either the DST or non-DST local timezone. Note that we only
+ * support the local timezones here, nothing else. Not because we wouldn't want to, but simply because
+ * there are no nice APIs available to cover this. By accepting the local time zone strings, we make
+ * sure that all timestamps written by format_timestamp() can be parsed correctly, even though we don't
+ * support arbitrary timezone specifications. */
+
+ for (j = 0; j <= 1; j++) {
+
+ if (isempty(tzname[j]))
+ continue;
+
+ e = endswith_no_case(t, tzname[j]);
+ if (!e)
+ continue;
+ if (e == t)
+ continue;
+ if (e[-1] != ' ')
+ continue;
+
+ break;
+ }
+
+ if (IN_SET(j, 0, 1)) {
+ /* Found one of the two timezones specified. */
+ t = strndupa(t, e - t - 1);
+ dst = j;
+ tzn = tzname[j];
+ }
+ }
+ }
+
+ x = (time_t) (ret / USEC_PER_SEC);
+ x_usec = 0;
+
+ if (!localtime_or_gmtime_r(&x, &tm, utc))
+ return -EINVAL;
+
+ tm.tm_isdst = dst;
+ if (!with_tz && tzn)
+ tm.tm_zone = tzn;
+
+ if (streq(t, "today")) {
+ tm.tm_sec = tm.tm_min = tm.tm_hour = 0;
+ goto from_tm;
+
+ } else if (streq(t, "yesterday")) {
+ tm.tm_mday--;
+ tm.tm_sec = tm.tm_min = tm.tm_hour = 0;
+ goto from_tm;
+
+ } else if (streq(t, "tomorrow")) {
+ tm.tm_mday++;
+ tm.tm_sec = tm.tm_min = tm.tm_hour = 0;
+ goto from_tm;
+ }
+
+ for (i = 0; i < ELEMENTSOF(day_nr); i++) {
+ size_t skip;
+
+ if (!startswith_no_case(t, day_nr[i].name))
+ continue;
+
+ skip = strlen(day_nr[i].name);
+ if (t[skip] != ' ')
+ continue;
+
+ weekday = day_nr[i].nr;
+ t += skip + 1;
+ break;
+ }
+
+ copy = tm;
+ k = strptime(t, "%y-%m-%d %H:%M:%S", &tm);
+ if (k) {
+ if (*k == '.')
+ goto parse_usec;
+ else if (*k == 0)
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%Y-%m-%d %H:%M:%S", &tm);
+ if (k) {
+ if (*k == '.')
+ goto parse_usec;
+ else if (*k == 0)
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%y-%m-%d %H:%M", &tm);
+ if (k && *k == 0) {
+ tm.tm_sec = 0;
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%Y-%m-%d %H:%M", &tm);
+ if (k && *k == 0) {
+ tm.tm_sec = 0;
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%y-%m-%d", &tm);
+ if (k && *k == 0) {
+ tm.tm_sec = tm.tm_min = tm.tm_hour = 0;
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%Y-%m-%d", &tm);
+ if (k && *k == 0) {
+ tm.tm_sec = tm.tm_min = tm.tm_hour = 0;
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%H:%M:%S", &tm);
+ if (k) {
+ if (*k == '.')
+ goto parse_usec;
+ else if (*k == 0)
+ goto from_tm;
+ }
+
+ tm = copy;
+ k = strptime(t, "%H:%M", &tm);
+ if (k && *k == 0) {
+ tm.tm_sec = 0;
+ goto from_tm;
+ }
+
+ return -EINVAL;
+
+parse_usec:
+ {
+ unsigned add;
+
+ k++;
+ r = parse_fractional_part_u(&k, 6, &add);
+ if (r < 0)
+ return -EINVAL;
+
+ if (*k)
+ return -EINVAL;
+
+ x_usec = add;
+ }
+
+from_tm:
+ if (weekday >= 0 && tm.tm_wday != weekday)
+ return -EINVAL;
+
+ x = mktime_or_timegm(&tm, utc);
+ if (x < 0)
+ return -EINVAL;
+
+ ret = (usec_t) x * USEC_PER_SEC + x_usec;
+ if (ret > USEC_TIMESTAMP_FORMATTABLE_MAX)
+ return -EINVAL;
+
+finish:
+ if (ret + plus < ret) /* overflow? */
+ return -EINVAL;
+ ret += plus;
+ if (ret > USEC_TIMESTAMP_FORMATTABLE_MAX)
+ return -EINVAL;
+
+ if (ret >= minus)
+ ret -= minus;
+ else
+ return -EINVAL;
+
+ if (usec)
+ *usec = ret;
+ return 0;
+}
+
+typedef struct ParseTimestampResult {
+ usec_t usec;
+ int return_value;
+} ParseTimestampResult;
+
+int parse_timestamp(const char *t, usec_t *usec) {
+ char *last_space, *tz = NULL;
+ ParseTimestampResult *shared, tmp;
+ int r;
+
+ last_space = strrchr(t, ' ');
+ if (last_space != NULL && timezone_is_valid(last_space + 1, LOG_DEBUG))
+ tz = last_space + 1;
+
+ if (!tz || endswith_no_case(t, " UTC"))
+ return parse_timestamp_impl(t, usec, false);
+
+ shared = mmap(NULL, sizeof *shared, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
+ if (shared == MAP_FAILED)
+ return negative_errno();
+
+ r = safe_fork("(sd-timestamp)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_WAIT, NULL);
+ if (r < 0) {
+ (void) munmap(shared, sizeof *shared);
+ return r;
+ }
+ if (r == 0) {
+ bool with_tz = true;
+ char *colon_tz;
+
+ /* tzset(3) says $TZ should be prefixed with ":" if we reference timezone files */
+ colon_tz = strjoina(":", tz);
+
+ if (setenv("TZ", colon_tz, 1) != 0) {
+ shared->return_value = negative_errno();
+ _exit(EXIT_FAILURE);
+ }
+
+ tzset();
+
+ /* If there is a timezone that matches the tzname fields, leave the parsing to the implementation.
+ * Otherwise just cut it off. */
+ with_tz = !STR_IN_SET(tz, tzname[0], tzname[1]);
+
+ /* Cut off the timezone if we don't need it. */
+ if (with_tz)
+ t = strndupa(t, last_space - t);
+
+ shared->return_value = parse_timestamp_impl(t, &shared->usec, with_tz);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ tmp = *shared;
+ if (munmap(shared, sizeof *shared) != 0)
+ return negative_errno();
+
+ if (tmp.return_value == 0 && usec)
+ *usec = tmp.usec;
+
+ return tmp.return_value;
+}
+
+static const char* extract_multiplier(const char *p, usec_t *multiplier) {
+ static const struct {
+ const char *suffix;
+ usec_t usec;
+ } table[] = {
+ { "seconds", USEC_PER_SEC },
+ { "second", USEC_PER_SEC },
+ { "sec", USEC_PER_SEC },
+ { "s", USEC_PER_SEC },
+ { "minutes", USEC_PER_MINUTE },
+ { "minute", USEC_PER_MINUTE },
+ { "min", USEC_PER_MINUTE },
+ { "months", USEC_PER_MONTH },
+ { "month", USEC_PER_MONTH },
+ { "M", USEC_PER_MONTH },
+ { "msec", USEC_PER_MSEC },
+ { "ms", USEC_PER_MSEC },
+ { "m", USEC_PER_MINUTE },
+ { "hours", USEC_PER_HOUR },
+ { "hour", USEC_PER_HOUR },
+ { "hr", USEC_PER_HOUR },
+ { "h", USEC_PER_HOUR },
+ { "days", USEC_PER_DAY },
+ { "day", USEC_PER_DAY },
+ { "d", USEC_PER_DAY },
+ { "weeks", USEC_PER_WEEK },
+ { "week", USEC_PER_WEEK },
+ { "w", USEC_PER_WEEK },
+ { "years", USEC_PER_YEAR },
+ { "year", USEC_PER_YEAR },
+ { "y", USEC_PER_YEAR },
+ { "usec", 1ULL },
+ { "us", 1ULL },
+ { "µs", 1ULL },
+ };
+ size_t i;
+
+ for (i = 0; i < ELEMENTSOF(table); i++) {
+ char *e;
+
+ e = startswith(p, table[i].suffix);
+ if (e) {
+ *multiplier = table[i].usec;
+ return e;
+ }
+ }
+
+ return p;
+}
+
+int parse_time(const char *t, usec_t *usec, usec_t default_unit) {
+ const char *p, *s;
+ usec_t r = 0;
+ bool something = false;
+
+ assert(t);
+ assert(default_unit > 0);
+
+ p = t;
+
+ p += strspn(p, WHITESPACE);
+ s = startswith(p, "infinity");
+ if (s) {
+ s += strspn(s, WHITESPACE);
+ if (*s != 0)
+ return -EINVAL;
+
+ if (usec)
+ *usec = USEC_INFINITY;
+ return 0;
+ }
+
+ for (;;) {
+ usec_t multiplier = default_unit, k;
+ long long l;
+ char *e;
+
+ p += strspn(p, WHITESPACE);
+
+ if (*p == 0) {
+ if (!something)
+ return -EINVAL;
+
+ break;
+ }
+
+ if (*p == '-') /* Don't allow "-0" */
+ return -ERANGE;
+
+ errno = 0;
+ l = strtoll(p, &e, 10);
+ if (errno > 0)
+ return -errno;
+ if (l < 0)
+ return -ERANGE;
+
+ if (*e == '.') {
+ p = e + 1;
+ p += strspn(p, DIGITS);
+ } else if (e == p)
+ return -EINVAL;
+ else
+ p = e;
+
+ s = extract_multiplier(p + strspn(p, WHITESPACE), &multiplier);
+ if (s == p && *s != '\0')
+ /* Don't allow '12.34.56', but accept '12.34 .56' or '12.34s.56'*/
+ return -EINVAL;
+
+ p = s;
+
+ if ((usec_t) l >= USEC_INFINITY / multiplier)
+ return -ERANGE;
+
+ k = (usec_t) l * multiplier;
+ if (k >= USEC_INFINITY - r)
+ return -ERANGE;
+
+ r += k;
+
+ something = true;
+
+ if (*e == '.') {
+ usec_t m = multiplier / 10;
+ const char *b;
+
+ for (b = e + 1; *b >= '0' && *b <= '9'; b++, m /= 10) {
+ k = (usec_t) (*b - '0') * m;
+ if (k >= USEC_INFINITY - r)
+ return -ERANGE;
+
+ r += k;
+ }
+
+ /* Don't allow "0.-0", "3.+1", "3. 1", "3.sec" or "3.hoge"*/
+ if (b == e + 1)
+ return -EINVAL;
+ }
+ }
+
+ if (usec)
+ *usec = r;
+ return 0;
+}
+
+int parse_sec(const char *t, usec_t *usec) {
+ return parse_time(t, usec, USEC_PER_SEC);
+}
+
+int parse_sec_fix_0(const char *t, usec_t *ret) {
+ usec_t k;
+ int r;
+
+ assert(t);
+ assert(ret);
+
+ r = parse_sec(t, &k);
+ if (r < 0)
+ return r;
+
+ *ret = k == 0 ? USEC_INFINITY : k;
+ return r;
+}
+
+int parse_sec_def_infinity(const char *t, usec_t *ret) {
+ t += strspn(t, WHITESPACE);
+ if (isempty(t)) {
+ *ret = USEC_INFINITY;
+ return 0;
+ }
+ return parse_sec(t, ret);
+}
+
+static const char* extract_nsec_multiplier(const char *p, nsec_t *multiplier) {
+ static const struct {
+ const char *suffix;
+ nsec_t nsec;
+ } table[] = {
+ { "seconds", NSEC_PER_SEC },
+ { "second", NSEC_PER_SEC },
+ { "sec", NSEC_PER_SEC },
+ { "s", NSEC_PER_SEC },
+ { "minutes", NSEC_PER_MINUTE },
+ { "minute", NSEC_PER_MINUTE },
+ { "min", NSEC_PER_MINUTE },
+ { "months", NSEC_PER_MONTH },
+ { "month", NSEC_PER_MONTH },
+ { "M", NSEC_PER_MONTH },
+ { "msec", NSEC_PER_MSEC },
+ { "ms", NSEC_PER_MSEC },
+ { "m", NSEC_PER_MINUTE },
+ { "hours", NSEC_PER_HOUR },
+ { "hour", NSEC_PER_HOUR },
+ { "hr", NSEC_PER_HOUR },
+ { "h", NSEC_PER_HOUR },
+ { "days", NSEC_PER_DAY },
+ { "day", NSEC_PER_DAY },
+ { "d", NSEC_PER_DAY },
+ { "weeks", NSEC_PER_WEEK },
+ { "week", NSEC_PER_WEEK },
+ { "w", NSEC_PER_WEEK },
+ { "years", NSEC_PER_YEAR },
+ { "year", NSEC_PER_YEAR },
+ { "y", NSEC_PER_YEAR },
+ { "usec", NSEC_PER_USEC },
+ { "us", NSEC_PER_USEC },
+ { "µs", NSEC_PER_USEC },
+ { "nsec", 1ULL },
+ { "ns", 1ULL },
+ { "", 1ULL }, /* default is nsec */
+ };
+ size_t i;
+
+ for (i = 0; i < ELEMENTSOF(table); i++) {
+ char *e;
+
+ e = startswith(p, table[i].suffix);
+ if (e) {
+ *multiplier = table[i].nsec;
+ return e;
+ }
+ }
+
+ return p;
+}
+
+int parse_nsec(const char *t, nsec_t *nsec) {
+ const char *p, *s;
+ nsec_t r = 0;
+ bool something = false;
+
+ assert(t);
+ assert(nsec);
+
+ p = t;
+
+ p += strspn(p, WHITESPACE);
+ s = startswith(p, "infinity");
+ if (s) {
+ s += strspn(s, WHITESPACE);
+ if (*s != 0)
+ return -EINVAL;
+
+ *nsec = NSEC_INFINITY;
+ return 0;
+ }
+
+ for (;;) {
+ nsec_t multiplier = 1, k;
+ long long l;
+ char *e;
+
+ p += strspn(p, WHITESPACE);
+
+ if (*p == 0) {
+ if (!something)
+ return -EINVAL;
+
+ break;
+ }
+
+ if (*p == '-') /* Don't allow "-0" */
+ return -ERANGE;
+
+ errno = 0;
+ l = strtoll(p, &e, 10);
+ if (errno > 0)
+ return -errno;
+ if (l < 0)
+ return -ERANGE;
+
+ if (*e == '.') {
+ p = e + 1;
+ p += strspn(p, DIGITS);
+ } else if (e == p)
+ return -EINVAL;
+ else
+ p = e;
+
+ s = extract_nsec_multiplier(p + strspn(p, WHITESPACE), &multiplier);
+ if (s == p && *s != '\0')
+ /* Don't allow '12.34.56', but accept '12.34 .56' or '12.34s.56'*/
+ return -EINVAL;
+
+ p = s;
+
+ if ((nsec_t) l >= NSEC_INFINITY / multiplier)
+ return -ERANGE;
+
+ k = (nsec_t) l * multiplier;
+ if (k >= NSEC_INFINITY - r)
+ return -ERANGE;
+
+ r += k;
+
+ something = true;
+
+ if (*e == '.') {
+ nsec_t m = multiplier / 10;
+ const char *b;
+
+ for (b = e + 1; *b >= '0' && *b <= '9'; b++, m /= 10) {
+ k = (nsec_t) (*b - '0') * m;
+ if (k >= NSEC_INFINITY - r)
+ return -ERANGE;
+
+ r += k;
+ }
+
+ /* Don't allow "0.-0", "3.+1", "3. 1", "3.sec" or "3.hoge"*/
+ if (b == e + 1)
+ return -EINVAL;
+ }
+ }
+
+ *nsec = r;
+
+ return 0;
+}
+
+bool ntp_synced(void) {
+ struct timex txc = {};
+
+ if (adjtimex(&txc) < 0)
+ return false;
+
+ /* Consider the system clock synchronized if the reported maximum error is smaller than the maximum
+ * value (16 seconds). Ignore the STA_UNSYNC flag as it may have been set to prevent the kernel from
+ * touching the RTC. */
+ if (txc.maxerror >= 16000000)
+ return false;
+
+ return true;
+}
+
+int get_timezones(char ***ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **zones = NULL;
+ size_t n_zones = 0, n_allocated = 0;
+ int r;
+
+ assert(ret);
+
+ zones = strv_new("UTC");
+ if (!zones)
+ return -ENOMEM;
+
+ n_allocated = 2;
+ n_zones = 1;
+
+ f = fopen("/usr/share/zoneinfo/zone1970.tab", "re");
+ if (f) {
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *p, *w;
+ size_t k;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ p = strstrip(line);
+
+ if (isempty(p) || *p == '#')
+ continue;
+
+ /* Skip over country code */
+ p += strcspn(p, WHITESPACE);
+ p += strspn(p, WHITESPACE);
+
+ /* Skip over coordinates */
+ p += strcspn(p, WHITESPACE);
+ p += strspn(p, WHITESPACE);
+
+ /* Found timezone name */
+ k = strcspn(p, WHITESPACE);
+ if (k <= 0)
+ continue;
+
+ w = strndup(p, k);
+ if (!w)
+ return -ENOMEM;
+
+ if (!GREEDY_REALLOC(zones, n_allocated, n_zones + 2)) {
+ free(w);
+ return -ENOMEM;
+ }
+
+ zones[n_zones++] = w;
+ zones[n_zones] = NULL;
+ }
+
+ strv_sort(zones);
+ strv_uniq(zones);
+
+ } else if (errno != ENOENT)
+ return -errno;
+
+ *ret = TAKE_PTR(zones);
+
+ return 0;
+}
+
+bool timezone_is_valid(const char *name, int log_level) {
+ bool slash = false;
+ const char *p, *t;
+ _cleanup_close_ int fd = -1;
+ char buf[4];
+ int r;
+
+ if (isempty(name))
+ return false;
+
+ /* Always accept "UTC" as valid timezone, since it's the fallback, even if user has no timezones installed. */
+ if (streq(name, "UTC"))
+ return true;
+
+ if (name[0] == '/')
+ return false;
+
+ for (p = name; *p; p++) {
+ if (!(*p >= '0' && *p <= '9') &&
+ !(*p >= 'a' && *p <= 'z') &&
+ !(*p >= 'A' && *p <= 'Z') &&
+ !IN_SET(*p, '-', '_', '+', '/'))
+ return false;
+
+ if (*p == '/') {
+
+ if (slash)
+ return false;
+
+ slash = true;
+ } else
+ slash = false;
+ }
+
+ if (slash)
+ return false;
+
+ if (p - name >= PATH_MAX)
+ return false;
+
+ t = strjoina("/usr/share/zoneinfo/", name);
+
+ fd = open(t, O_RDONLY|O_CLOEXEC);
+ if (fd < 0) {
+ log_full_errno(log_level, errno, "Failed to open timezone file '%s': %m", t);
+ return false;
+ }
+
+ r = fd_verify_regular(fd);
+ if (r < 0) {
+ log_full_errno(log_level, r, "Timezone file '%s' is not a regular file: %m", t);
+ return false;
+ }
+
+ r = loop_read_exact(fd, buf, 4, false);
+ if (r < 0) {
+ log_full_errno(log_level, r, "Failed to read from timezone file '%s': %m", t);
+ return false;
+ }
+
+ /* Magic from tzfile(5) */
+ if (memcmp(buf, "TZif", 4) != 0) {
+ log_full(log_level, "Timezone file '%s' has wrong magic bytes", t);
+ return false;
+ }
+
+ return true;
+}
+
+bool clock_boottime_supported(void) {
+ static int supported = -1;
+
+ /* Note that this checks whether CLOCK_BOOTTIME is available in general as well as available for timerfds()! */
+
+ if (supported < 0) {
+ int fd;
+
+ fd = timerfd_create(CLOCK_BOOTTIME, TFD_NONBLOCK|TFD_CLOEXEC);
+ if (fd < 0)
+ supported = false;
+ else {
+ safe_close(fd);
+ supported = true;
+ }
+ }
+
+ return supported;
+}
+
+clockid_t clock_boottime_or_monotonic(void) {
+ if (clock_boottime_supported())
+ return CLOCK_BOOTTIME;
+ else
+ return CLOCK_MONOTONIC;
+}
+
+bool clock_supported(clockid_t clock) {
+ struct timespec ts;
+
+ switch (clock) {
+
+ case CLOCK_MONOTONIC:
+ case CLOCK_REALTIME:
+ return true;
+
+ case CLOCK_BOOTTIME:
+ return clock_boottime_supported();
+
+ case CLOCK_BOOTTIME_ALARM:
+ if (!clock_boottime_supported())
+ return false;
+
+ _fallthrough_;
+ default:
+ /* For everything else, check properly */
+ return clock_gettime(clock, &ts) >= 0;
+ }
+}
+
+int get_timezone(char **ret) {
+ _cleanup_free_ char *t = NULL;
+ const char *e;
+ char *z;
+ int r;
+
+ r = readlink_malloc("/etc/localtime", &t);
+ if (r == -ENOENT) {
+ /* If the symlink does not exist, assume "UTC", like glibc does*/
+ z = strdup("UTC");
+ if (!z)
+ return -ENOMEM;
+
+ *ret = z;
+ return 0;
+ }
+ if (r < 0)
+ return r; /* returns EINVAL if not a symlink */
+
+ e = PATH_STARTSWITH_SET(t, "/usr/share/zoneinfo/", "../usr/share/zoneinfo/");
+ if (!e)
+ return -EINVAL;
+
+ if (!timezone_is_valid(e, LOG_DEBUG))
+ return -EINVAL;
+
+ z = strdup(e);
+ if (!z)
+ return -ENOMEM;
+
+ *ret = z;
+ return 0;
+}
+
+time_t mktime_or_timegm(struct tm *tm, bool utc) {
+ return utc ? timegm(tm) : mktime(tm);
+}
+
+struct tm *localtime_or_gmtime_r(const time_t *t, struct tm *tm, bool utc) {
+ return utc ? gmtime_r(t, tm) : localtime_r(t, tm);
+}
+
+static uint32_t sysconf_clock_ticks_cached(void) {
+ static thread_local uint32_t hz = 0;
+ long r;
+
+ if (hz == 0) {
+ r = sysconf(_SC_CLK_TCK);
+
+ assert(r > 0);
+ hz = r;
+ }
+
+ return hz;
+}
+
+uint32_t usec_to_jiffies(usec_t u) {
+ uint32_t hz = sysconf_clock_ticks_cached();
+ return DIV_ROUND_UP(u, USEC_PER_SEC / hz);
+}
+
+usec_t jiffies_to_usec(uint32_t j) {
+ uint32_t hz = sysconf_clock_ticks_cached();
+ return DIV_ROUND_UP(j * USEC_PER_SEC, hz);
+}
+
+usec_t usec_shift_clock(usec_t x, clockid_t from, clockid_t to) {
+ usec_t a, b;
+
+ if (x == USEC_INFINITY)
+ return USEC_INFINITY;
+ if (map_clock_id(from) == map_clock_id(to))
+ return x;
+
+ a = now(from);
+ b = now(to);
+
+ if (x > a)
+ /* x lies in the future */
+ return usec_add(b, usec_sub_unsigned(x, a));
+ else
+ /* x lies in the past */
+ return usec_sub_unsigned(b, usec_sub_unsigned(a, x));
+}
+
+bool in_utc_timezone(void) {
+ tzset();
+
+ return timezone == 0 && daylight == 0;
+}
+
+int time_change_fd(void) {
+
+ /* We only care for the cancellation event, hence we set the timeout to the latest possible value. */
+ static const struct itimerspec its = {
+ .it_value.tv_sec = TIME_T_MAX,
+ };
+
+ _cleanup_close_ int fd;
+
+ assert_cc(sizeof(time_t) == sizeof(TIME_T_MAX));
+
+ /* Uses TFD_TIMER_CANCEL_ON_SET to get notifications whenever CLOCK_REALTIME makes a jump relative to
+ * CLOCK_MONOTONIC. */
+
+ fd = timerfd_create(CLOCK_REALTIME, TFD_NONBLOCK|TFD_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (timerfd_settime(fd, TFD_TIMER_ABSTIME|TFD_TIMER_CANCEL_ON_SET, &its, NULL) >= 0)
+ return TAKE_FD(fd);
+
+ /* So apparently there are systems where time_t is 64bit, but the kernel actually doesn't support
+ * 64bit time_t. In that case configuring a timer to TIME_T_MAX will fail with EOPNOTSUPP or a
+ * similar error. If that's the case let's try with INT32_MAX instead, maybe that works. It's a bit
+ * of a black magic thing though, but what can we do?
+ *
+ * We don't want this code on x86-64, hence let's conditionalize this for systems with 64bit time_t
+ * but where "long" is shorter than 64bit, i.e. 32bit archs.
+ *
+ * See: https://github.com/systemd/systemd/issues/14362 */
+
+#if SIZEOF_TIME_T == 8 && ULONG_MAX < UINT64_MAX
+ if (ERRNO_IS_NOT_SUPPORTED(errno) || errno == EOVERFLOW) {
+ static const struct itimerspec its32 = {
+ .it_value.tv_sec = INT32_MAX,
+ };
+
+ if (timerfd_settime(fd, TFD_TIMER_ABSTIME|TFD_TIMER_CANCEL_ON_SET, &its32, NULL) >= 0)
+ return TAKE_FD(fd);
+ }
+#endif
+
+ return -errno;
+}
+
+static const char* const timestamp_style_table[_TIMESTAMP_STYLE_MAX] = {
+ [TIMESTAMP_PRETTY] = "pretty",
+ [TIMESTAMP_US] = "us",
+ [TIMESTAMP_UTC] = "utc",
+ [TIMESTAMP_US_UTC] = "us+utc",
+};
+
+/* Use the macro for enum → string to allow for aliases */
+_DEFINE_STRING_TABLE_LOOKUP_TO_STRING(timestamp_style, TimestampStyle,);
+
+/* For the string → enum mapping we use the generic implementation, but also support two aliases */
+TimestampStyle timestamp_style_from_string(const char *s) {
+ TimestampStyle t;
+
+ t = (TimestampStyle) string_table_lookup(timestamp_style_table, ELEMENTSOF(timestamp_style_table), s);
+ if (t >= 0)
+ return t;
+ if (streq_ptr(s, "µs"))
+ return TIMESTAMP_US;
+ if (streq_ptr(s, "µs+uts"))
+ return TIMESTAMP_US_UTC;
+ return t;
+}
diff --git a/src/basic/time-util.h b/src/basic/time-util.h
new file mode 100644
index 0000000..89ee8b4
--- /dev/null
+++ b/src/basic/time-util.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <time.h>
+
+typedef uint64_t usec_t;
+typedef uint64_t nsec_t;
+
+#define PRI_NSEC PRIu64
+#define PRI_USEC PRIu64
+#define NSEC_FMT "%" PRI_NSEC
+#define USEC_FMT "%" PRI_USEC
+
+#include "macro.h"
+
+typedef struct dual_timestamp {
+ usec_t realtime;
+ usec_t monotonic;
+} dual_timestamp;
+
+typedef struct triple_timestamp {
+ usec_t realtime;
+ usec_t monotonic;
+ usec_t boottime;
+} triple_timestamp;
+
+typedef enum TimestampStyle {
+ TIMESTAMP_PRETTY,
+ TIMESTAMP_US,
+ TIMESTAMP_UTC,
+ TIMESTAMP_US_UTC,
+ _TIMESTAMP_STYLE_MAX,
+ _TIMESTAMP_STYLE_INVALID = -1,
+} TimestampStyle;
+
+#define USEC_INFINITY ((usec_t) UINT64_MAX)
+#define NSEC_INFINITY ((nsec_t) UINT64_MAX)
+
+#define MSEC_PER_SEC 1000ULL
+#define USEC_PER_SEC ((usec_t) 1000000ULL)
+#define USEC_PER_MSEC ((usec_t) 1000ULL)
+#define NSEC_PER_SEC ((nsec_t) 1000000000ULL)
+#define NSEC_PER_MSEC ((nsec_t) 1000000ULL)
+#define NSEC_PER_USEC ((nsec_t) 1000ULL)
+
+#define USEC_PER_MINUTE ((usec_t) (60ULL*USEC_PER_SEC))
+#define NSEC_PER_MINUTE ((nsec_t) (60ULL*NSEC_PER_SEC))
+#define USEC_PER_HOUR ((usec_t) (60ULL*USEC_PER_MINUTE))
+#define NSEC_PER_HOUR ((nsec_t) (60ULL*NSEC_PER_MINUTE))
+#define USEC_PER_DAY ((usec_t) (24ULL*USEC_PER_HOUR))
+#define NSEC_PER_DAY ((nsec_t) (24ULL*NSEC_PER_HOUR))
+#define USEC_PER_WEEK ((usec_t) (7ULL*USEC_PER_DAY))
+#define NSEC_PER_WEEK ((nsec_t) (7ULL*NSEC_PER_DAY))
+#define USEC_PER_MONTH ((usec_t) (2629800ULL*USEC_PER_SEC))
+#define NSEC_PER_MONTH ((nsec_t) (2629800ULL*NSEC_PER_SEC))
+#define USEC_PER_YEAR ((usec_t) (31557600ULL*USEC_PER_SEC))
+#define NSEC_PER_YEAR ((nsec_t) (31557600ULL*NSEC_PER_SEC))
+
+/* We assume a maximum timezone length of 6. TZNAME_MAX is not defined on Linux, but glibc internally initializes this
+ * to 6. Let's rely on that. */
+#define FORMAT_TIMESTAMP_MAX (3U+1U+10U+1U+8U+1U+6U+1U+6U+1U)
+#define FORMAT_TIMESTAMP_WIDTH 28U /* when outputting, assume this width */
+#define FORMAT_TIMESTAMP_RELATIVE_MAX 256U
+#define FORMAT_TIMESPAN_MAX 64U
+
+#define TIME_T_MAX (time_t)((UINTMAX_C(1) << ((sizeof(time_t) << 3) - 1)) - 1)
+
+#define DUAL_TIMESTAMP_NULL ((struct dual_timestamp) {})
+#define TRIPLE_TIMESTAMP_NULL ((struct triple_timestamp) {})
+
+usec_t now(clockid_t clock);
+nsec_t now_nsec(clockid_t clock);
+
+usec_t map_clock_usec(usec_t from, clockid_t from_clock, clockid_t to_clock);
+
+dual_timestamp* dual_timestamp_get(dual_timestamp *ts);
+dual_timestamp* dual_timestamp_from_realtime(dual_timestamp *ts, usec_t u);
+dual_timestamp* dual_timestamp_from_monotonic(dual_timestamp *ts, usec_t u);
+dual_timestamp* dual_timestamp_from_boottime_or_monotonic(dual_timestamp *ts, usec_t u);
+
+triple_timestamp* triple_timestamp_get(triple_timestamp *ts);
+triple_timestamp* triple_timestamp_from_realtime(triple_timestamp *ts, usec_t u);
+
+#define DUAL_TIMESTAMP_HAS_CLOCK(clock) \
+ IN_SET(clock, CLOCK_REALTIME, CLOCK_REALTIME_ALARM, CLOCK_MONOTONIC)
+
+#define TRIPLE_TIMESTAMP_HAS_CLOCK(clock) \
+ IN_SET(clock, CLOCK_REALTIME, CLOCK_REALTIME_ALARM, CLOCK_MONOTONIC, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM)
+
+static inline bool timestamp_is_set(usec_t timestamp) {
+ return timestamp > 0 && timestamp != USEC_INFINITY;
+}
+
+static inline bool dual_timestamp_is_set(const dual_timestamp *ts) {
+ return timestamp_is_set(ts->realtime) ||
+ timestamp_is_set(ts->monotonic);
+}
+
+static inline bool triple_timestamp_is_set(const triple_timestamp *ts) {
+ return timestamp_is_set(ts->realtime) ||
+ timestamp_is_set(ts->monotonic) ||
+ timestamp_is_set(ts->boottime);
+}
+
+usec_t triple_timestamp_by_clock(triple_timestamp *ts, clockid_t clock);
+
+usec_t timespec_load(const struct timespec *ts) _pure_;
+nsec_t timespec_load_nsec(const struct timespec *ts) _pure_;
+struct timespec *timespec_store(struct timespec *ts, usec_t u);
+struct timespec *timespec_store_nsec(struct timespec *ts, nsec_t n);
+
+usec_t timeval_load(const struct timeval *tv) _pure_;
+struct timeval *timeval_store(struct timeval *tv, usec_t u);
+
+char *format_timestamp_style(char *buf, size_t l, usec_t t, TimestampStyle style);
+char *format_timestamp_relative(char *buf, size_t l, usec_t t);
+char *format_timespan(char *buf, size_t l, usec_t t, usec_t accuracy);
+
+static inline char *format_timestamp(char *buf, size_t l, usec_t t) {
+ return format_timestamp_style(buf, l, t, TIMESTAMP_PRETTY);
+}
+
+int parse_timestamp(const char *t, usec_t *usec);
+
+int parse_sec(const char *t, usec_t *usec);
+int parse_sec_fix_0(const char *t, usec_t *usec);
+int parse_sec_def_infinity(const char *t, usec_t *usec);
+int parse_time(const char *t, usec_t *usec, usec_t default_unit);
+int parse_nsec(const char *t, nsec_t *nsec);
+
+bool ntp_synced(void);
+
+int get_timezones(char ***l);
+bool timezone_is_valid(const char *name, int log_level);
+
+bool clock_boottime_supported(void);
+bool clock_supported(clockid_t clock);
+clockid_t clock_boottime_or_monotonic(void);
+
+usec_t usec_shift_clock(usec_t, clockid_t from, clockid_t to);
+
+int get_timezone(char **timezone);
+
+time_t mktime_or_timegm(struct tm *tm, bool utc);
+struct tm *localtime_or_gmtime_r(const time_t *t, struct tm *tm, bool utc);
+
+uint32_t usec_to_jiffies(usec_t usec);
+usec_t jiffies_to_usec(uint32_t jiffies);
+
+bool in_utc_timezone(void);
+
+static inline usec_t usec_add(usec_t a, usec_t b) {
+ usec_t c;
+
+ /* Adds two time values, and makes sure USEC_INFINITY as input results as USEC_INFINITY in output, and doesn't
+ * overflow. */
+
+ c = a + b;
+ if (c < a || c < b) /* overflow check */
+ return USEC_INFINITY;
+
+ return c;
+}
+
+static inline usec_t usec_sub_unsigned(usec_t timestamp, usec_t delta) {
+
+ if (timestamp == USEC_INFINITY) /* Make sure infinity doesn't degrade */
+ return USEC_INFINITY;
+ if (timestamp < delta)
+ return 0;
+
+ return timestamp - delta;
+}
+
+static inline usec_t usec_sub_signed(usec_t timestamp, int64_t delta) {
+ if (delta < 0)
+ return usec_add(timestamp, (usec_t) (-delta));
+ else
+ return usec_sub_unsigned(timestamp, (usec_t) delta);
+}
+
+#if SIZEOF_TIME_T == 8
+/* The last second we can format is 31. Dec 9999, 1s before midnight, because otherwise we'd enter 5 digit year
+ * territory. However, since we want to stay away from this in all timezones we take one day off. */
+#define USEC_TIMESTAMP_FORMATTABLE_MAX ((usec_t) 253402214399000000)
+#elif SIZEOF_TIME_T == 4
+/* With a 32bit time_t we can't go beyond 2038... */
+#define USEC_TIMESTAMP_FORMATTABLE_MAX ((usec_t) 2147483647000000)
+#else
+#error "Yuck, time_t is neither 4 nor 8 bytes wide?"
+#endif
+
+int time_change_fd(void);
+
+const char* timestamp_style_to_string(TimestampStyle t) _const_;
+TimestampStyle timestamp_style_from_string(const char *s) _pure_;
diff --git a/src/basic/tmpfile-util.c b/src/basic/tmpfile-util.c
new file mode 100644
index 0000000..49c3437
--- /dev/null
+++ b/src/basic/tmpfile-util.c
@@ -0,0 +1,338 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/mman.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hexdecoct.h"
+#include "macro.h"
+#include "memfd-util.h"
+#include "missing_fcntl.h"
+#include "missing_syscall.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+
+int fopen_temporary(const char *path, FILE **ret_f, char **ret_temp_path) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *t = NULL;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ if (path) {
+ r = tempfn_xxxxxx(path, NULL, &t);
+ if (r < 0)
+ return r;
+ } else {
+ const char *d;
+
+ r = tmp_dir(&d);
+ if (r < 0)
+ return r;
+
+ t = path_join(d, "XXXXXX");
+ if (!t)
+ return -ENOMEM;
+ }
+
+ fd = mkostemp_safe(t);
+ if (fd < 0)
+ return -errno;
+
+ /* This assumes that returned FILE object is short-lived and used within the same single-threaded
+ * context and never shared externally, hence locking is not necessary. */
+
+ r = take_fdopen_unlocked(&fd, "w", &f);
+ if (r < 0) {
+ (void) unlink(t);
+ return r;
+ }
+
+ if (ret_f)
+ *ret_f = TAKE_PTR(f);
+
+ if (ret_temp_path)
+ *ret_temp_path = TAKE_PTR(t);
+
+ return 0;
+}
+
+/* This is much like mkostemp() but is subject to umask(). */
+int mkostemp_safe(char *pattern) {
+ int fd = -1; /* avoid false maybe-uninitialized warning */
+
+ assert(pattern);
+
+ RUN_WITH_UMASK(0077)
+ fd = mkostemp(pattern, O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
+
+int fmkostemp_safe(char *pattern, const char *mode, FILE **ret_f) {
+ _cleanup_close_ int fd = -1;
+ FILE *f;
+
+ fd = mkostemp_safe(pattern);
+ if (fd < 0)
+ return fd;
+
+ f = take_fdopen(&fd, mode);
+ if (!f)
+ return -errno;
+
+ *ret_f = f;
+ return 0;
+}
+
+int tempfn_xxxxxx(const char *p, const char *extra, char **ret) {
+ const char *fn;
+ char *t;
+
+ assert(ret);
+
+ if (isempty(p))
+ return -EINVAL;
+ if (path_equal(p, "/"))
+ return -EINVAL;
+
+ /*
+ * Turns this:
+ * /foo/bar/waldo
+ *
+ * Into this:
+ * /foo/bar/.#<extra>waldoXXXXXX
+ */
+
+ fn = basename(p);
+ if (!filename_is_valid(fn))
+ return -EINVAL;
+
+ extra = strempty(extra);
+
+ t = new(char, strlen(p) + 2 + strlen(extra) + 6 + 1);
+ if (!t)
+ return -ENOMEM;
+
+ strcpy(stpcpy(stpcpy(stpcpy(mempcpy(t, p, fn - p), ".#"), extra), fn), "XXXXXX");
+
+ *ret = path_simplify(t, false);
+ return 0;
+}
+
+int tempfn_random(const char *p, const char *extra, char **ret) {
+ const char *fn;
+ char *t, *x;
+ uint64_t u;
+ unsigned i;
+
+ assert(ret);
+
+ if (isempty(p))
+ return -EINVAL;
+ if (path_equal(p, "/"))
+ return -EINVAL;
+
+ /*
+ * Turns this:
+ * /foo/bar/waldo
+ *
+ * Into this:
+ * /foo/bar/.#<extra>waldobaa2a261115984a9
+ */
+
+ fn = basename(p);
+ if (!filename_is_valid(fn))
+ return -EINVAL;
+
+ extra = strempty(extra);
+
+ t = new(char, strlen(p) + 2 + strlen(extra) + 16 + 1);
+ if (!t)
+ return -ENOMEM;
+
+ x = stpcpy(stpcpy(stpcpy(mempcpy(t, p, fn - p), ".#"), extra), fn);
+
+ u = random_u64();
+ for (i = 0; i < 16; i++) {
+ *(x++) = hexchar(u & 0xF);
+ u >>= 4;
+ }
+
+ *x = 0;
+
+ *ret = path_simplify(t, false);
+ return 0;
+}
+
+int tempfn_random_child(const char *p, const char *extra, char **ret) {
+ char *t, *x;
+ uint64_t u;
+ unsigned i;
+ int r;
+
+ assert(ret);
+
+ /* Turns this:
+ * /foo/bar/waldo
+ * Into this:
+ * /foo/bar/waldo/.#<extra>3c2b6219aa75d7d0
+ */
+
+ if (!p) {
+ r = tmp_dir(&p);
+ if (r < 0)
+ return r;
+ }
+
+ extra = strempty(extra);
+
+ t = new(char, strlen(p) + 3 + strlen(extra) + 16 + 1);
+ if (!t)
+ return -ENOMEM;
+
+ if (isempty(p))
+ x = stpcpy(stpcpy(t, ".#"), extra);
+ else
+ x = stpcpy(stpcpy(stpcpy(t, p), "/.#"), extra);
+
+ u = random_u64();
+ for (i = 0; i < 16; i++) {
+ *(x++) = hexchar(u & 0xF);
+ u >>= 4;
+ }
+
+ *x = 0;
+
+ *ret = path_simplify(t, false);
+ return 0;
+}
+
+int open_tmpfile_unlinkable(const char *directory, int flags) {
+ char *p;
+ int fd, r;
+
+ if (!directory) {
+ r = tmp_dir(&directory);
+ if (r < 0)
+ return r;
+ } else if (isempty(directory))
+ return -EINVAL;
+
+ /* Returns an unlinked temporary file that cannot be linked into the file system anymore */
+
+ /* Try O_TMPFILE first, if it is supported */
+ fd = open(directory, flags|O_TMPFILE|O_EXCL, S_IRUSR|S_IWUSR);
+ if (fd >= 0)
+ return fd;
+
+ /* Fall back to unguessable name + unlinking */
+ p = strjoina(directory, "/systemd-tmp-XXXXXX");
+
+ fd = mkostemp_safe(p);
+ if (fd < 0)
+ return fd;
+
+ (void) unlink(p);
+
+ return fd;
+}
+
+int open_tmpfile_linkable(const char *target, int flags, char **ret_path) {
+ _cleanup_free_ char *tmp = NULL;
+ int r, fd;
+
+ assert(target);
+ assert(ret_path);
+
+ /* Don't allow O_EXCL, as that has a special meaning for O_TMPFILE */
+ assert((flags & O_EXCL) == 0);
+
+ /* Creates a temporary file, that shall be renamed to "target" later. If possible, this uses O_TMPFILE – in
+ * which case "ret_path" will be returned as NULL. If not possible the temporary path name used is returned in
+ * "ret_path". Use link_tmpfile() below to rename the result after writing the file in full. */
+
+ fd = open_parent(target, O_TMPFILE|flags, 0640);
+ if (fd >= 0) {
+ *ret_path = NULL;
+ return fd;
+ }
+
+ log_debug_errno(fd, "Failed to use O_TMPFILE for %s: %m", target);
+
+ r = tempfn_random(target, NULL, &tmp);
+ if (r < 0)
+ return r;
+
+ fd = open(tmp, O_CREAT|O_EXCL|O_NOFOLLOW|O_NOCTTY|flags, 0640);
+ if (fd < 0)
+ return -errno;
+
+ *ret_path = TAKE_PTR(tmp);
+
+ return fd;
+}
+
+int link_tmpfile(int fd, const char *path, const char *target) {
+ int r;
+
+ assert(fd >= 0);
+ assert(target);
+
+ /* Moves a temporary file created with open_tmpfile() above into its final place. if "path" is NULL an fd
+ * created with O_TMPFILE is assumed, and linkat() is used. Otherwise it is assumed O_TMPFILE is not supported
+ * on the directory, and renameat2() is used instead.
+ *
+ * Note that in both cases we will not replace existing files. This is because linkat() does not support this
+ * operation currently (renameat2() does), and there is no nice way to emulate this. */
+
+ if (path) {
+ r = rename_noreplace(AT_FDCWD, path, AT_FDCWD, target);
+ if (r < 0)
+ return r;
+ } else {
+ char proc_fd_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(fd) + 1];
+
+ xsprintf(proc_fd_path, "/proc/self/fd/%i", fd);
+
+ if (linkat(AT_FDCWD, proc_fd_path, AT_FDCWD, target, AT_SYMLINK_FOLLOW) < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int mkdtemp_malloc(const char *template, char **ret) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(ret);
+
+ if (template)
+ p = strdup(template);
+ else {
+ const char *tmp;
+
+ r = tmp_dir(&tmp);
+ if (r < 0)
+ return r;
+
+ p = path_join(tmp, "XXXXXX");
+ }
+ if (!p)
+ return -ENOMEM;
+
+ if (!mkdtemp(p))
+ return -errno;
+
+ *ret = TAKE_PTR(p);
+ return 0;
+}
diff --git a/src/basic/tmpfile-util.h b/src/basic/tmpfile-util.h
new file mode 100644
index 0000000..45255fc
--- /dev/null
+++ b/src/basic/tmpfile-util.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdio.h>
+
+int fopen_temporary(const char *path, FILE **_f, char **_temp_path);
+int mkostemp_safe(char *pattern);
+int fmkostemp_safe(char *pattern, const char *mode, FILE**_f);
+
+int tempfn_xxxxxx(const char *p, const char *extra, char **ret);
+int tempfn_random(const char *p, const char *extra, char **ret);
+int tempfn_random_child(const char *p, const char *extra, char **ret);
+
+int open_tmpfile_unlinkable(const char *directory, int flags);
+int open_tmpfile_linkable(const char *target, int flags, char **ret_path);
+
+int link_tmpfile(int fd, const char *path, const char *target);
+
+int mkdtemp_malloc(const char *template, char **ret);
diff --git a/src/basic/umask-util.h b/src/basic/umask-util.h
new file mode 100644
index 0000000..bd7c2bd
--- /dev/null
+++ b/src/basic/umask-util.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+static inline void umaskp(mode_t *u) {
+ umask(*u & 0777);
+}
+
+#define _cleanup_umask_ _cleanup_(umaskp)
+
+/* We make use of the fact here that the umask() concept is using only the lower 9 bits of mode_t, although
+ * mode_t has space for the file type in the bits further up. We simply OR in the file type mask S_IFMT to
+ * distinguish the first and the second iteration of the RUN_WITH_UMASK() loop, so that we can run the first
+ * one, and exit on the second. */
+
+assert_cc((S_IFMT & 0777) == 0);
+
+#define RUN_WITH_UMASK(mask) \
+ for (_cleanup_umask_ mode_t _saved_umask_ = umask(mask) | S_IFMT; \
+ FLAGS_SET(_saved_umask_, S_IFMT); \
+ _saved_umask_ &= 0777)
diff --git a/src/basic/unaligned.h b/src/basic/unaligned.h
new file mode 100644
index 0000000..4100be0
--- /dev/null
+++ b/src/basic/unaligned.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <endian.h>
+#include <stdint.h>
+
+/* BE */
+
+static inline uint16_t unaligned_read_be16(const void *_u) {
+ const struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u;
+
+ return be16toh(u->x);
+}
+
+static inline uint32_t unaligned_read_be32(const void *_u) {
+ const struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u;
+
+ return be32toh(u->x);
+}
+
+static inline uint64_t unaligned_read_be64(const void *_u) {
+ const struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u;
+
+ return be64toh(u->x);
+}
+
+static inline void unaligned_write_be16(void *_u, uint16_t a) {
+ struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u;
+
+ u->x = be16toh(a);
+}
+
+static inline void unaligned_write_be32(void *_u, uint32_t a) {
+ struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u;
+
+ u->x = be32toh(a);
+}
+
+static inline void unaligned_write_be64(void *_u, uint64_t a) {
+ struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u;
+
+ u->x = be64toh(a);
+}
+
+/* LE */
+
+static inline uint16_t unaligned_read_le16(const void *_u) {
+ const struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u;
+
+ return le16toh(u->x);
+}
+
+static inline uint32_t unaligned_read_le32(const void *_u) {
+ const struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u;
+
+ return le32toh(u->x);
+}
+
+static inline uint64_t unaligned_read_le64(const void *_u) {
+ const struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u;
+
+ return le64toh(u->x);
+}
+
+static inline void unaligned_write_le16(void *_u, uint16_t a) {
+ struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u;
+
+ u->x = le16toh(a);
+}
+
+static inline void unaligned_write_le32(void *_u, uint32_t a) {
+ struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u;
+
+ u->x = le32toh(a);
+}
+
+static inline void unaligned_write_le64(void *_u, uint64_t a) {
+ struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u;
+
+ u->x = le64toh(a);
+}
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define unaligned_read_ne16 unaligned_read_be16
+#define unaligned_read_ne32 unaligned_read_be32
+#define unaligned_read_ne64 unaligned_read_be64
+
+#define unaligned_write_ne16 unaligned_write_be16
+#define unaligned_write_ne32 unaligned_write_be32
+#define unaligned_write_ne64 unaligned_write_be64
+#else
+#define unaligned_read_ne16 unaligned_read_le16
+#define unaligned_read_ne32 unaligned_read_le32
+#define unaligned_read_ne64 unaligned_read_le64
+
+#define unaligned_write_ne16 unaligned_write_le16
+#define unaligned_write_ne32 unaligned_write_le32
+#define unaligned_write_ne64 unaligned_write_le64
+#endif
diff --git a/src/basic/unit-def.c b/src/basic/unit-def.c
new file mode 100644
index 0000000..145399c
--- /dev/null
+++ b/src/basic/unit-def.c
@@ -0,0 +1,289 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-label.h"
+#include "string-table.h"
+#include "unit-def.h"
+#include "unit-name.h"
+
+char *unit_dbus_path_from_name(const char *name) {
+ _cleanup_free_ char *e = NULL;
+
+ assert(name);
+
+ e = bus_label_escape(name);
+ if (!e)
+ return NULL;
+
+ return strjoin("/org/freedesktop/systemd1/unit/", e);
+}
+
+int unit_name_from_dbus_path(const char *path, char **name) {
+ const char *e;
+ char *n;
+
+ e = startswith(path, "/org/freedesktop/systemd1/unit/");
+ if (!e)
+ return -EINVAL;
+
+ n = bus_label_unescape(e);
+ if (!n)
+ return -ENOMEM;
+
+ *name = n;
+ return 0;
+}
+
+const char* unit_dbus_interface_from_type(UnitType t) {
+
+ static const char *const table[_UNIT_TYPE_MAX] = {
+ [UNIT_SERVICE] = "org.freedesktop.systemd1.Service",
+ [UNIT_SOCKET] = "org.freedesktop.systemd1.Socket",
+ [UNIT_TARGET] = "org.freedesktop.systemd1.Target",
+ [UNIT_DEVICE] = "org.freedesktop.systemd1.Device",
+ [UNIT_MOUNT] = "org.freedesktop.systemd1.Mount",
+ [UNIT_AUTOMOUNT] = "org.freedesktop.systemd1.Automount",
+ [UNIT_SWAP] = "org.freedesktop.systemd1.Swap",
+ [UNIT_TIMER] = "org.freedesktop.systemd1.Timer",
+ [UNIT_PATH] = "org.freedesktop.systemd1.Path",
+ [UNIT_SLICE] = "org.freedesktop.systemd1.Slice",
+ [UNIT_SCOPE] = "org.freedesktop.systemd1.Scope",
+ };
+
+ if (t < 0)
+ return NULL;
+ if (t >= _UNIT_TYPE_MAX)
+ return NULL;
+
+ return table[t];
+}
+
+const char *unit_dbus_interface_from_name(const char *name) {
+ UnitType t;
+
+ t = unit_name_to_type(name);
+ if (t < 0)
+ return NULL;
+
+ return unit_dbus_interface_from_type(t);
+}
+
+static const char* const unit_type_table[_UNIT_TYPE_MAX] = {
+ [UNIT_SERVICE] = "service",
+ [UNIT_SOCKET] = "socket",
+ [UNIT_TARGET] = "target",
+ [UNIT_DEVICE] = "device",
+ [UNIT_MOUNT] = "mount",
+ [UNIT_AUTOMOUNT] = "automount",
+ [UNIT_SWAP] = "swap",
+ [UNIT_TIMER] = "timer",
+ [UNIT_PATH] = "path",
+ [UNIT_SLICE] = "slice",
+ [UNIT_SCOPE] = "scope",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_type, UnitType);
+
+static const char* const unit_load_state_table[_UNIT_LOAD_STATE_MAX] = {
+ [UNIT_STUB] = "stub",
+ [UNIT_LOADED] = "loaded",
+ [UNIT_NOT_FOUND] = "not-found",
+ [UNIT_BAD_SETTING] = "bad-setting",
+ [UNIT_ERROR] = "error",
+ [UNIT_MERGED] = "merged",
+ [UNIT_MASKED] = "masked"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_load_state, UnitLoadState);
+
+static const char* const unit_active_state_table[_UNIT_ACTIVE_STATE_MAX] = {
+ [UNIT_ACTIVE] = "active",
+ [UNIT_RELOADING] = "reloading",
+ [UNIT_INACTIVE] = "inactive",
+ [UNIT_FAILED] = "failed",
+ [UNIT_ACTIVATING] = "activating",
+ [UNIT_DEACTIVATING] = "deactivating",
+ [UNIT_MAINTENANCE] = "maintenance",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_active_state, UnitActiveState);
+
+static const char* const freezer_state_table[_FREEZER_STATE_MAX] = {
+ [FREEZER_RUNNING] = "running",
+ [FREEZER_FREEZING] = "freezing",
+ [FREEZER_FROZEN] = "frozen",
+ [FREEZER_THAWING] = "thawing",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(freezer_state, FreezerState);
+
+static const char* const automount_state_table[_AUTOMOUNT_STATE_MAX] = {
+ [AUTOMOUNT_DEAD] = "dead",
+ [AUTOMOUNT_WAITING] = "waiting",
+ [AUTOMOUNT_RUNNING] = "running",
+ [AUTOMOUNT_FAILED] = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(automount_state, AutomountState);
+
+static const char* const device_state_table[_DEVICE_STATE_MAX] = {
+ [DEVICE_DEAD] = "dead",
+ [DEVICE_TENTATIVE] = "tentative",
+ [DEVICE_PLUGGED] = "plugged",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(device_state, DeviceState);
+
+static const char* const mount_state_table[_MOUNT_STATE_MAX] = {
+ [MOUNT_DEAD] = "dead",
+ [MOUNT_MOUNTING] = "mounting",
+ [MOUNT_MOUNTING_DONE] = "mounting-done",
+ [MOUNT_MOUNTED] = "mounted",
+ [MOUNT_REMOUNTING] = "remounting",
+ [MOUNT_UNMOUNTING] = "unmounting",
+ [MOUNT_REMOUNTING_SIGTERM] = "remounting-sigterm",
+ [MOUNT_REMOUNTING_SIGKILL] = "remounting-sigkill",
+ [MOUNT_UNMOUNTING_SIGTERM] = "unmounting-sigterm",
+ [MOUNT_UNMOUNTING_SIGKILL] = "unmounting-sigkill",
+ [MOUNT_FAILED] = "failed",
+ [MOUNT_CLEANING] = "cleaning",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(mount_state, MountState);
+
+static const char* const path_state_table[_PATH_STATE_MAX] = {
+ [PATH_DEAD] = "dead",
+ [PATH_WAITING] = "waiting",
+ [PATH_RUNNING] = "running",
+ [PATH_FAILED] = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(path_state, PathState);
+
+static const char* const scope_state_table[_SCOPE_STATE_MAX] = {
+ [SCOPE_DEAD] = "dead",
+ [SCOPE_RUNNING] = "running",
+ [SCOPE_ABANDONED] = "abandoned",
+ [SCOPE_STOP_SIGTERM] = "stop-sigterm",
+ [SCOPE_STOP_SIGKILL] = "stop-sigkill",
+ [SCOPE_FAILED] = "failed",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(scope_state, ScopeState);
+
+static const char* const service_state_table[_SERVICE_STATE_MAX] = {
+ [SERVICE_DEAD] = "dead",
+ [SERVICE_CONDITION] = "condition",
+ [SERVICE_START_PRE] = "start-pre",
+ [SERVICE_START] = "start",
+ [SERVICE_START_POST] = "start-post",
+ [SERVICE_RUNNING] = "running",
+ [SERVICE_EXITED] = "exited",
+ [SERVICE_RELOAD] = "reload",
+ [SERVICE_STOP] = "stop",
+ [SERVICE_STOP_WATCHDOG] = "stop-watchdog",
+ [SERVICE_STOP_SIGTERM] = "stop-sigterm",
+ [SERVICE_STOP_SIGKILL] = "stop-sigkill",
+ [SERVICE_STOP_POST] = "stop-post",
+ [SERVICE_FINAL_WATCHDOG] = "final-watchdog",
+ [SERVICE_FINAL_SIGTERM] = "final-sigterm",
+ [SERVICE_FINAL_SIGKILL] = "final-sigkill",
+ [SERVICE_FAILED] = "failed",
+ [SERVICE_AUTO_RESTART] = "auto-restart",
+ [SERVICE_CLEANING] = "cleaning",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(service_state, ServiceState);
+
+static const char* const slice_state_table[_SLICE_STATE_MAX] = {
+ [SLICE_DEAD] = "dead",
+ [SLICE_ACTIVE] = "active"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(slice_state, SliceState);
+
+static const char* const socket_state_table[_SOCKET_STATE_MAX] = {
+ [SOCKET_DEAD] = "dead",
+ [SOCKET_START_PRE] = "start-pre",
+ [SOCKET_START_CHOWN] = "start-chown",
+ [SOCKET_START_POST] = "start-post",
+ [SOCKET_LISTENING] = "listening",
+ [SOCKET_RUNNING] = "running",
+ [SOCKET_STOP_PRE] = "stop-pre",
+ [SOCKET_STOP_PRE_SIGTERM] = "stop-pre-sigterm",
+ [SOCKET_STOP_PRE_SIGKILL] = "stop-pre-sigkill",
+ [SOCKET_STOP_POST] = "stop-post",
+ [SOCKET_FINAL_SIGTERM] = "final-sigterm",
+ [SOCKET_FINAL_SIGKILL] = "final-sigkill",
+ [SOCKET_FAILED] = "failed",
+ [SOCKET_CLEANING] = "cleaning",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(socket_state, SocketState);
+
+static const char* const swap_state_table[_SWAP_STATE_MAX] = {
+ [SWAP_DEAD] = "dead",
+ [SWAP_ACTIVATING] = "activating",
+ [SWAP_ACTIVATING_DONE] = "activating-done",
+ [SWAP_ACTIVE] = "active",
+ [SWAP_DEACTIVATING] = "deactivating",
+ [SWAP_DEACTIVATING_SIGTERM] = "deactivating-sigterm",
+ [SWAP_DEACTIVATING_SIGKILL] = "deactivating-sigkill",
+ [SWAP_FAILED] = "failed",
+ [SWAP_CLEANING] = "cleaning",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(swap_state, SwapState);
+
+static const char* const target_state_table[_TARGET_STATE_MAX] = {
+ [TARGET_DEAD] = "dead",
+ [TARGET_ACTIVE] = "active"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(target_state, TargetState);
+
+static const char* const timer_state_table[_TIMER_STATE_MAX] = {
+ [TIMER_DEAD] = "dead",
+ [TIMER_WAITING] = "waiting",
+ [TIMER_RUNNING] = "running",
+ [TIMER_ELAPSED] = "elapsed",
+ [TIMER_FAILED] = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(timer_state, TimerState);
+
+static const char* const unit_dependency_table[_UNIT_DEPENDENCY_MAX] = {
+ [UNIT_REQUIRES] = "Requires",
+ [UNIT_REQUISITE] = "Requisite",
+ [UNIT_WANTS] = "Wants",
+ [UNIT_BINDS_TO] = "BindsTo",
+ [UNIT_PART_OF] = "PartOf",
+ [UNIT_REQUIRED_BY] = "RequiredBy",
+ [UNIT_REQUISITE_OF] = "RequisiteOf",
+ [UNIT_WANTED_BY] = "WantedBy",
+ [UNIT_BOUND_BY] = "BoundBy",
+ [UNIT_CONSISTS_OF] = "ConsistsOf",
+ [UNIT_CONFLICTS] = "Conflicts",
+ [UNIT_CONFLICTED_BY] = "ConflictedBy",
+ [UNIT_BEFORE] = "Before",
+ [UNIT_AFTER] = "After",
+ [UNIT_ON_FAILURE] = "OnFailure",
+ [UNIT_TRIGGERS] = "Triggers",
+ [UNIT_TRIGGERED_BY] = "TriggeredBy",
+ [UNIT_PROPAGATES_RELOAD_TO] = "PropagatesReloadTo",
+ [UNIT_RELOAD_PROPAGATED_FROM] = "ReloadPropagatedFrom",
+ [UNIT_JOINS_NAMESPACE_OF] = "JoinsNamespaceOf",
+ [UNIT_REFERENCES] = "References",
+ [UNIT_REFERENCED_BY] = "ReferencedBy",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_dependency, UnitDependency);
+
+static const char* const notify_access_table[_NOTIFY_ACCESS_MAX] = {
+ [NOTIFY_NONE] = "none",
+ [NOTIFY_MAIN] = "main",
+ [NOTIFY_EXEC] = "exec",
+ [NOTIFY_ALL] = "all"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(notify_access, NotifyAccess);
diff --git a/src/basic/unit-def.h b/src/basic/unit-def.h
new file mode 100644
index 0000000..8535fbe
--- /dev/null
+++ b/src/basic/unit-def.h
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+/* The enum order is used to order unit jobs in the job queue
+ * when other criteria (cpu weight, nice level) are identical.
+ * In this case service units have the highest priority. */
+typedef enum UnitType {
+ UNIT_SERVICE,
+ UNIT_MOUNT,
+ UNIT_SWAP,
+ UNIT_SOCKET,
+ UNIT_TARGET,
+ UNIT_DEVICE,
+ UNIT_AUTOMOUNT,
+ UNIT_TIMER,
+ UNIT_PATH,
+ UNIT_SLICE,
+ UNIT_SCOPE,
+ _UNIT_TYPE_MAX,
+ _UNIT_TYPE_INVALID = -1
+} UnitType;
+
+typedef enum UnitLoadState {
+ UNIT_STUB,
+ UNIT_LOADED,
+ UNIT_NOT_FOUND, /* error condition #1: unit file not found */
+ UNIT_BAD_SETTING, /* error condition #2: we couldn't parse some essential unit file setting */
+ UNIT_ERROR, /* error condition #3: other "system" error, catchall for the rest */
+ UNIT_MERGED,
+ UNIT_MASKED,
+ _UNIT_LOAD_STATE_MAX,
+ _UNIT_LOAD_STATE_INVALID = -1
+} UnitLoadState;
+
+typedef enum UnitActiveState {
+ UNIT_ACTIVE,
+ UNIT_RELOADING,
+ UNIT_INACTIVE,
+ UNIT_FAILED,
+ UNIT_ACTIVATING,
+ UNIT_DEACTIVATING,
+ UNIT_MAINTENANCE,
+ _UNIT_ACTIVE_STATE_MAX,
+ _UNIT_ACTIVE_STATE_INVALID = -1
+} UnitActiveState;
+
+typedef enum FreezerState {
+ FREEZER_RUNNING,
+ FREEZER_FREEZING,
+ FREEZER_FROZEN,
+ FREEZER_THAWING,
+ _FREEZER_STATE_MAX,
+ _FREEZER_STATE_INVALID = -1
+} FreezerState;
+
+typedef enum AutomountState {
+ AUTOMOUNT_DEAD,
+ AUTOMOUNT_WAITING,
+ AUTOMOUNT_RUNNING,
+ AUTOMOUNT_FAILED,
+ _AUTOMOUNT_STATE_MAX,
+ _AUTOMOUNT_STATE_INVALID = -1
+} AutomountState;
+
+/* We simply watch devices, we cannot plug/unplug them. That
+ * simplifies the state engine greatly */
+typedef enum DeviceState {
+ DEVICE_DEAD,
+ DEVICE_TENTATIVE, /* mounted or swapped, but not (yet) announced by udev */
+ DEVICE_PLUGGED, /* announced by udev */
+ _DEVICE_STATE_MAX,
+ _DEVICE_STATE_INVALID = -1
+} DeviceState;
+
+typedef enum MountState {
+ MOUNT_DEAD,
+ MOUNT_MOUNTING, /* /usr/bin/mount is running, but the mount is not done yet. */
+ MOUNT_MOUNTING_DONE, /* /usr/bin/mount is running, and the mount is done. */
+ MOUNT_MOUNTED,
+ MOUNT_REMOUNTING,
+ MOUNT_UNMOUNTING,
+ MOUNT_REMOUNTING_SIGTERM,
+ MOUNT_REMOUNTING_SIGKILL,
+ MOUNT_UNMOUNTING_SIGTERM,
+ MOUNT_UNMOUNTING_SIGKILL,
+ MOUNT_FAILED,
+ MOUNT_CLEANING,
+ _MOUNT_STATE_MAX,
+ _MOUNT_STATE_INVALID = -1
+} MountState;
+
+typedef enum PathState {
+ PATH_DEAD,
+ PATH_WAITING,
+ PATH_RUNNING,
+ PATH_FAILED,
+ _PATH_STATE_MAX,
+ _PATH_STATE_INVALID = -1
+} PathState;
+
+typedef enum ScopeState {
+ SCOPE_DEAD,
+ SCOPE_RUNNING,
+ SCOPE_ABANDONED,
+ SCOPE_STOP_SIGTERM,
+ SCOPE_STOP_SIGKILL,
+ SCOPE_FAILED,
+ _SCOPE_STATE_MAX,
+ _SCOPE_STATE_INVALID = -1
+} ScopeState;
+
+typedef enum ServiceState {
+ SERVICE_DEAD,
+ SERVICE_CONDITION,
+ SERVICE_START_PRE,
+ SERVICE_START,
+ SERVICE_START_POST,
+ SERVICE_RUNNING,
+ SERVICE_EXITED, /* Nothing is running anymore, but RemainAfterExit is true hence this is OK */
+ SERVICE_RELOAD,
+ SERVICE_STOP, /* No STOP_PRE state, instead just register multiple STOP executables */
+ SERVICE_STOP_WATCHDOG,
+ SERVICE_STOP_SIGTERM,
+ SERVICE_STOP_SIGKILL,
+ SERVICE_STOP_POST,
+ SERVICE_FINAL_WATCHDOG, /* In case the STOP_POST executable needs to be aborted. */
+ SERVICE_FINAL_SIGTERM, /* In case the STOP_POST executable hangs, we shoot that down, too */
+ SERVICE_FINAL_SIGKILL,
+ SERVICE_FAILED,
+ SERVICE_AUTO_RESTART,
+ SERVICE_CLEANING,
+ _SERVICE_STATE_MAX,
+ _SERVICE_STATE_INVALID = -1
+} ServiceState;
+
+typedef enum SliceState {
+ SLICE_DEAD,
+ SLICE_ACTIVE,
+ _SLICE_STATE_MAX,
+ _SLICE_STATE_INVALID = -1
+} SliceState;
+
+typedef enum SocketState {
+ SOCKET_DEAD,
+ SOCKET_START_PRE,
+ SOCKET_START_CHOWN,
+ SOCKET_START_POST,
+ SOCKET_LISTENING,
+ SOCKET_RUNNING,
+ SOCKET_STOP_PRE,
+ SOCKET_STOP_PRE_SIGTERM,
+ SOCKET_STOP_PRE_SIGKILL,
+ SOCKET_STOP_POST,
+ SOCKET_FINAL_SIGTERM,
+ SOCKET_FINAL_SIGKILL,
+ SOCKET_FAILED,
+ SOCKET_CLEANING,
+ _SOCKET_STATE_MAX,
+ _SOCKET_STATE_INVALID = -1
+} SocketState;
+
+typedef enum SwapState {
+ SWAP_DEAD,
+ SWAP_ACTIVATING, /* /sbin/swapon is running, but the swap not yet enabled. */
+ SWAP_ACTIVATING_DONE, /* /sbin/swapon is running, and the swap is done. */
+ SWAP_ACTIVE,
+ SWAP_DEACTIVATING,
+ SWAP_DEACTIVATING_SIGTERM,
+ SWAP_DEACTIVATING_SIGKILL,
+ SWAP_FAILED,
+ SWAP_CLEANING,
+ _SWAP_STATE_MAX,
+ _SWAP_STATE_INVALID = -1
+} SwapState;
+
+typedef enum TargetState {
+ TARGET_DEAD,
+ TARGET_ACTIVE,
+ _TARGET_STATE_MAX,
+ _TARGET_STATE_INVALID = -1
+} TargetState;
+
+typedef enum TimerState {
+ TIMER_DEAD,
+ TIMER_WAITING,
+ TIMER_RUNNING,
+ TIMER_ELAPSED,
+ TIMER_FAILED,
+ _TIMER_STATE_MAX,
+ _TIMER_STATE_INVALID = -1
+} TimerState;
+
+typedef enum UnitDependency {
+ /* Positive dependencies */
+ UNIT_REQUIRES,
+ UNIT_REQUISITE,
+ UNIT_WANTS,
+ UNIT_BINDS_TO,
+ UNIT_PART_OF,
+
+ /* Inverse of the above */
+ UNIT_REQUIRED_BY, /* inverse of 'requires' is 'required_by' */
+ UNIT_REQUISITE_OF, /* inverse of 'requisite' is 'requisite_of' */
+ UNIT_WANTED_BY, /* inverse of 'wants' */
+ UNIT_BOUND_BY, /* inverse of 'binds_to' */
+ UNIT_CONSISTS_OF, /* inverse of 'part_of' */
+
+ /* Negative dependencies */
+ UNIT_CONFLICTS, /* inverse of 'conflicts' is 'conflicted_by' */
+ UNIT_CONFLICTED_BY,
+
+ /* Order */
+ UNIT_BEFORE, /* inverse of 'before' is 'after' and vice versa */
+ UNIT_AFTER,
+
+ /* On Failure */
+ UNIT_ON_FAILURE,
+
+ /* Triggers (i.e. a socket triggers a service) */
+ UNIT_TRIGGERS,
+ UNIT_TRIGGERED_BY,
+
+ /* Propagate reloads */
+ UNIT_PROPAGATES_RELOAD_TO,
+ UNIT_RELOAD_PROPAGATED_FROM,
+
+ /* Joins namespace of */
+ UNIT_JOINS_NAMESPACE_OF,
+
+ /* Reference information for GC logic */
+ UNIT_REFERENCES, /* Inverse of 'references' is 'referenced_by' */
+ UNIT_REFERENCED_BY,
+
+ _UNIT_DEPENDENCY_MAX,
+ _UNIT_DEPENDENCY_INVALID = -1
+} UnitDependency;
+
+typedef enum NotifyAccess {
+ NOTIFY_NONE,
+ NOTIFY_ALL,
+ NOTIFY_MAIN,
+ NOTIFY_EXEC,
+ _NOTIFY_ACCESS_MAX,
+ _NOTIFY_ACCESS_INVALID = -1
+} NotifyAccess;
+
+char *unit_dbus_path_from_name(const char *name);
+int unit_name_from_dbus_path(const char *path, char **name);
+
+const char* unit_dbus_interface_from_type(UnitType t);
+const char *unit_dbus_interface_from_name(const char *name);
+
+const char *unit_type_to_string(UnitType i) _const_;
+UnitType unit_type_from_string(const char *s) _pure_;
+
+const char *unit_load_state_to_string(UnitLoadState i) _const_;
+UnitLoadState unit_load_state_from_string(const char *s) _pure_;
+
+const char *unit_active_state_to_string(UnitActiveState i) _const_;
+UnitActiveState unit_active_state_from_string(const char *s) _pure_;
+
+const char *freezer_state_to_string(FreezerState i) _const_;
+FreezerState freezer_state_from_string(const char *s) _pure_;
+
+const char* automount_state_to_string(AutomountState i) _const_;
+AutomountState automount_state_from_string(const char *s) _pure_;
+
+const char* device_state_to_string(DeviceState i) _const_;
+DeviceState device_state_from_string(const char *s) _pure_;
+
+const char* mount_state_to_string(MountState i) _const_;
+MountState mount_state_from_string(const char *s) _pure_;
+
+const char* path_state_to_string(PathState i) _const_;
+PathState path_state_from_string(const char *s) _pure_;
+
+const char* scope_state_to_string(ScopeState i) _const_;
+ScopeState scope_state_from_string(const char *s) _pure_;
+
+const char* service_state_to_string(ServiceState i) _const_;
+ServiceState service_state_from_string(const char *s) _pure_;
+
+const char* slice_state_to_string(SliceState i) _const_;
+SliceState slice_state_from_string(const char *s) _pure_;
+
+const char* socket_state_to_string(SocketState i) _const_;
+SocketState socket_state_from_string(const char *s) _pure_;
+
+const char* swap_state_to_string(SwapState i) _const_;
+SwapState swap_state_from_string(const char *s) _pure_;
+
+const char* target_state_to_string(TargetState i) _const_;
+TargetState target_state_from_string(const char *s) _pure_;
+
+const char *timer_state_to_string(TimerState i) _const_;
+TimerState timer_state_from_string(const char *s) _pure_;
+
+const char *unit_dependency_to_string(UnitDependency i) _const_;
+UnitDependency unit_dependency_from_string(const char *s) _pure_;
+
+const char* notify_access_to_string(NotifyAccess i) _const_;
+NotifyAccess notify_access_from_string(const char *s) _pure_;
diff --git a/src/basic/unit-name.c b/src/basic/unit-name.c
new file mode 100644
index 0000000..c1529bb
--- /dev/null
+++ b/src/basic/unit-name.c
@@ -0,0 +1,796 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "glob-util.h"
+#include "hexdecoct.h"
+#include "path-util.h"
+#include "special.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+
+/* Characters valid in a unit name. */
+#define VALID_CHARS \
+ DIGITS \
+ LETTERS \
+ ":-_.\\"
+
+/* The same, but also permits the single @ character that may appear */
+#define VALID_CHARS_WITH_AT \
+ "@" \
+ VALID_CHARS
+
+/* All chars valid in a unit name glob */
+#define VALID_CHARS_GLOB \
+ VALID_CHARS_WITH_AT \
+ "[]!-*?"
+
+bool unit_name_is_valid(const char *n, UnitNameFlags flags) {
+ const char *e, *i, *at;
+
+ assert((flags & ~(UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE)) == 0);
+
+ if (_unlikely_(flags == 0))
+ return false;
+
+ if (isempty(n))
+ return false;
+
+ if (strlen(n) >= UNIT_NAME_MAX)
+ return false;
+
+ e = strrchr(n, '.');
+ if (!e || e == n)
+ return false;
+
+ if (unit_type_from_string(e + 1) < 0)
+ return false;
+
+ for (i = n, at = NULL; i < e; i++) {
+
+ if (*i == '@' && !at)
+ at = i;
+
+ if (!strchr("@" VALID_CHARS, *i))
+ return false;
+ }
+
+ if (at == n)
+ return false;
+
+ if (flags & UNIT_NAME_PLAIN)
+ if (!at)
+ return true;
+
+ if (flags & UNIT_NAME_INSTANCE)
+ if (at && e > at + 1)
+ return true;
+
+ if (flags & UNIT_NAME_TEMPLATE)
+ if (at && e == at + 1)
+ return true;
+
+ return false;
+}
+
+bool unit_prefix_is_valid(const char *p) {
+
+ /* We don't allow additional @ in the prefix string */
+
+ if (isempty(p))
+ return false;
+
+ return in_charset(p, VALID_CHARS);
+}
+
+bool unit_instance_is_valid(const char *i) {
+
+ /* The max length depends on the length of the string, so we
+ * don't really check this here. */
+
+ if (isempty(i))
+ return false;
+
+ /* We allow additional @ in the instance string, we do not
+ * allow them in the prefix! */
+
+ return in_charset(i, "@" VALID_CHARS);
+}
+
+bool unit_suffix_is_valid(const char *s) {
+ if (isempty(s))
+ return false;
+
+ if (s[0] != '.')
+ return false;
+
+ if (unit_type_from_string(s + 1) < 0)
+ return false;
+
+ return true;
+}
+
+int unit_name_to_prefix(const char *n, char **ret) {
+ const char *p;
+ char *s;
+
+ assert(n);
+ assert(ret);
+
+ if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ p = strchr(n, '@');
+ if (!p)
+ p = strrchr(n, '.');
+
+ assert_se(p);
+
+ s = strndup(n, p - n);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int unit_name_to_instance(const char *n, char **ret) {
+ const char *p, *d;
+
+ assert(n);
+
+ if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ /* Everything past the first @ and before the last . is the instance */
+ p = strchr(n, '@');
+ if (!p) {
+ if (ret)
+ *ret = NULL;
+ return UNIT_NAME_PLAIN;
+ }
+
+ p++;
+
+ d = strrchr(p, '.');
+ if (!d)
+ return -EINVAL;
+
+ if (ret) {
+ char *i = strndup(p, d-p);
+ if (!i)
+ return -ENOMEM;
+
+ *ret = i;
+ }
+ return d > p ? UNIT_NAME_INSTANCE : UNIT_NAME_TEMPLATE;
+}
+
+int unit_name_to_prefix_and_instance(const char *n, char **ret) {
+ const char *d;
+ char *s;
+
+ assert(n);
+ assert(ret);
+
+ if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ d = strrchr(n, '.');
+ if (!d)
+ return -EINVAL;
+
+ s = strndup(n, d - n);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+UnitType unit_name_to_type(const char *n) {
+ const char *e;
+
+ assert(n);
+
+ if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+ return _UNIT_TYPE_INVALID;
+
+ assert_se(e = strrchr(n, '.'));
+
+ return unit_type_from_string(e + 1);
+}
+
+int unit_name_change_suffix(const char *n, const char *suffix, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ size_t a, b;
+ char *e;
+
+ assert(n);
+ assert(suffix);
+ assert(ret);
+
+ if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ if (!unit_suffix_is_valid(suffix))
+ return -EINVAL;
+
+ assert_se(e = strrchr(n, '.'));
+
+ a = e - n;
+ b = strlen(suffix);
+
+ s = new(char, a + b + 1);
+ if (!s)
+ return -ENOMEM;
+
+ strcpy(mempcpy(s, n, a), suffix);
+
+ /* Make sure the name is still valid (i.e. didn't grow too large due to longer suffix) */
+ if (!unit_name_is_valid(s, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
+
+int unit_name_build(const char *prefix, const char *instance, const char *suffix, char **ret) {
+ UnitType type;
+
+ assert(prefix);
+ assert(suffix);
+ assert(ret);
+
+ if (suffix[0] != '.')
+ return -EINVAL;
+
+ type = unit_type_from_string(suffix + 1);
+ if (type < 0)
+ return -EINVAL;
+
+ return unit_name_build_from_type(prefix, instance, type, ret);
+}
+
+int unit_name_build_from_type(const char *prefix, const char *instance, UnitType type, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ const char *ut;
+
+ assert(prefix);
+ assert(type >= 0);
+ assert(type < _UNIT_TYPE_MAX);
+ assert(ret);
+
+ if (!unit_prefix_is_valid(prefix))
+ return -EINVAL;
+
+ ut = unit_type_to_string(type);
+
+ if (instance) {
+ if (!unit_instance_is_valid(instance))
+ return -EINVAL;
+
+ s = strjoin(prefix, "@", instance, ".", ut);
+ } else
+ s = strjoin(prefix, ".", ut);
+ if (!s)
+ return -ENOMEM;
+
+ /* Verify that this didn't grow too large (or otherwise is invalid) */
+ if (!unit_name_is_valid(s, instance ? UNIT_NAME_INSTANCE : UNIT_NAME_PLAIN))
+ return -EINVAL;
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
+
+static char *do_escape_char(char c, char *t) {
+ assert(t);
+
+ *(t++) = '\\';
+ *(t++) = 'x';
+ *(t++) = hexchar(c >> 4);
+ *(t++) = hexchar(c);
+
+ return t;
+}
+
+static char *do_escape(const char *f, char *t) {
+ assert(f);
+ assert(t);
+
+ /* do not create units with a leading '.', like for "/.dotdir" mount points */
+ if (*f == '.') {
+ t = do_escape_char(*f, t);
+ f++;
+ }
+
+ for (; *f; f++) {
+ if (*f == '/')
+ *(t++) = '-';
+ else if (IN_SET(*f, '-', '\\') || !strchr(VALID_CHARS, *f))
+ t = do_escape_char(*f, t);
+ else
+ *(t++) = *f;
+ }
+
+ return t;
+}
+
+char *unit_name_escape(const char *f) {
+ char *r, *t;
+
+ assert(f);
+
+ r = new(char, strlen(f)*4+1);
+ if (!r)
+ return NULL;
+
+ t = do_escape(f, r);
+ *t = 0;
+
+ return r;
+}
+
+int unit_name_unescape(const char *f, char **ret) {
+ _cleanup_free_ char *r = NULL;
+ char *t;
+
+ assert(f);
+
+ r = strdup(f);
+ if (!r)
+ return -ENOMEM;
+
+ for (t = r; *f; f++) {
+ if (*f == '-')
+ *(t++) = '/';
+ else if (*f == '\\') {
+ int a, b;
+
+ if (f[1] != 'x')
+ return -EINVAL;
+
+ a = unhexchar(f[2]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unhexchar(f[3]);
+ if (b < 0)
+ return -EINVAL;
+
+ *(t++) = (char) (((uint8_t) a << 4U) | (uint8_t) b);
+ f += 3;
+ } else
+ *(t++) = *f;
+ }
+
+ *t = 0;
+
+ *ret = TAKE_PTR(r);
+
+ return 0;
+}
+
+int unit_name_path_escape(const char *f, char **ret) {
+ char *p, *s;
+
+ assert(f);
+ assert(ret);
+
+ p = strdupa(f);
+ if (!p)
+ return -ENOMEM;
+
+ path_simplify(p, false);
+
+ if (empty_or_root(p))
+ s = strdup("-");
+ else {
+ if (!path_is_normalized(p))
+ return -EINVAL;
+
+ /* Truncate trailing slashes */
+ delete_trailing_chars(p, "/");
+
+ /* Truncate leading slashes */
+ p = skip_leading_chars(p, "/");
+
+ s = unit_name_escape(p);
+ }
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int unit_name_path_unescape(const char *f, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert(f);
+
+ if (isempty(f))
+ return -EINVAL;
+
+ if (streq(f, "-")) {
+ s = strdup("/");
+ if (!s)
+ return -ENOMEM;
+ } else {
+ _cleanup_free_ char *w = NULL;
+
+ r = unit_name_unescape(f, &w);
+ if (r < 0)
+ return r;
+
+ /* Don't accept trailing or leading slashes */
+ if (startswith(w, "/") || endswith(w, "/"))
+ return -EINVAL;
+
+ /* Prefix a slash again */
+ s = strjoin("/", w);
+ if (!s)
+ return -ENOMEM;
+
+ if (!path_is_normalized(s))
+ return -EINVAL;
+ }
+
+ if (ret)
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
+
+int unit_name_replace_instance(const char *f, const char *i, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ const char *p, *e;
+ size_t a, b;
+
+ assert(f);
+ assert(i);
+ assert(ret);
+
+ if (!unit_name_is_valid(f, UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE))
+ return -EINVAL;
+ if (!unit_instance_is_valid(i))
+ return -EINVAL;
+
+ assert_se(p = strchr(f, '@'));
+ assert_se(e = strrchr(f, '.'));
+
+ a = p - f;
+ b = strlen(i);
+
+ s = new(char, a + 1 + b + strlen(e) + 1);
+ if (!s)
+ return -ENOMEM;
+
+ strcpy(mempcpy(mempcpy(s, f, a + 1), i, b), e);
+
+ /* Make sure the resulting name still is valid, i.e. didn't grow too large */
+ if (!unit_name_is_valid(s, UNIT_NAME_INSTANCE))
+ return -EINVAL;
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
+
+int unit_name_template(const char *f, char **ret) {
+ const char *p, *e;
+ char *s;
+ size_t a;
+
+ assert(f);
+ assert(ret);
+
+ if (!unit_name_is_valid(f, UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE))
+ return -EINVAL;
+
+ assert_se(p = strchr(f, '@'));
+ assert_se(e = strrchr(f, '.'));
+
+ a = p - f;
+
+ s = new(char, a + 1 + strlen(e) + 1);
+ if (!s)
+ return -ENOMEM;
+
+ strcpy(mempcpy(s, f, a + 1), e);
+
+ *ret = s;
+ return 0;
+}
+
+int unit_name_from_path(const char *path, const char *suffix, char **ret) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ assert(path);
+ assert(suffix);
+ assert(ret);
+
+ if (!unit_suffix_is_valid(suffix))
+ return -EINVAL;
+
+ r = unit_name_path_escape(path, &p);
+ if (r < 0)
+ return r;
+
+ s = strjoin(p, suffix);
+ if (!s)
+ return -ENOMEM;
+
+ /* Refuse this if this got too long or for some other reason didn't result in a valid name */
+ if (!unit_name_is_valid(s, UNIT_NAME_PLAIN))
+ return -EINVAL;
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
+
+int unit_name_from_path_instance(const char *prefix, const char *path, const char *suffix, char **ret) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ assert(prefix);
+ assert(path);
+ assert(suffix);
+ assert(ret);
+
+ if (!unit_prefix_is_valid(prefix))
+ return -EINVAL;
+
+ if (!unit_suffix_is_valid(suffix))
+ return -EINVAL;
+
+ r = unit_name_path_escape(path, &p);
+ if (r < 0)
+ return r;
+
+ s = strjoin(prefix, "@", p, suffix);
+ if (!s)
+ return -ENOMEM;
+
+ /* Refuse this if this got too long or for some other reason didn't result in a valid name */
+ if (!unit_name_is_valid(s, UNIT_NAME_INSTANCE))
+ return -EINVAL;
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
+
+int unit_name_to_path(const char *name, char **ret) {
+ _cleanup_free_ char *prefix = NULL;
+ int r;
+
+ assert(name);
+
+ r = unit_name_to_prefix(name, &prefix);
+ if (r < 0)
+ return r;
+
+ return unit_name_path_unescape(prefix, ret);
+}
+
+static bool do_escape_mangle(const char *f, bool allow_globs, char *t) {
+ const char *valid_chars;
+ bool mangled = false;
+
+ assert(f);
+ assert(t);
+
+ /* We'll only escape the obvious characters here, to play safe.
+ *
+ * Returns true if any characters were mangled, false otherwise.
+ */
+
+ valid_chars = allow_globs ? VALID_CHARS_GLOB : VALID_CHARS_WITH_AT;
+
+ for (; *f; f++)
+ if (*f == '/') {
+ *(t++) = '-';
+ mangled = true;
+ } else if (!strchr(valid_chars, *f)) {
+ t = do_escape_char(*f, t);
+ mangled = true;
+ } else
+ *(t++) = *f;
+ *t = 0;
+
+ return mangled;
+}
+
+/**
+ * Convert a string to a unit name. /dev/blah is converted to dev-blah.device,
+ * /blah/blah is converted to blah-blah.mount, anything else is left alone,
+ * except that @suffix is appended if a valid unit suffix is not present.
+ *
+ * If @allow_globs, globs characters are preserved. Otherwise, they are escaped.
+ */
+int unit_name_mangle_with_suffix(const char *name, const char *operation, UnitNameMangle flags, const char *suffix, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ bool mangled, suggest_escape = true;
+ int r;
+
+ assert(name);
+ assert(suffix);
+ assert(ret);
+
+ if (isempty(name)) /* We cannot mangle empty unit names to become valid, sorry. */
+ return -EINVAL;
+
+ if (!unit_suffix_is_valid(suffix))
+ return -EINVAL;
+
+ /* Already a fully valid unit name? If so, no mangling is necessary... */
+ if (unit_name_is_valid(name, UNIT_NAME_ANY))
+ goto good;
+
+ /* Already a fully valid globbing expression? If so, no mangling is necessary either... */
+ if (string_is_glob(name) && in_charset(name, VALID_CHARS_GLOB)) {
+ if (flags & UNIT_NAME_MANGLE_GLOB)
+ goto good;
+ log_full(flags & UNIT_NAME_MANGLE_WARN ? LOG_NOTICE : LOG_DEBUG,
+ "Glob pattern passed%s%s, but globs are not supported for this.",
+ operation ? " " : "", strempty(operation));
+ suggest_escape = false;
+ }
+
+ if (is_device_path(name)) {
+ r = unit_name_from_path(name, ".device", ret);
+ if (r >= 0)
+ return 1;
+ if (r != -EINVAL)
+ return r;
+ }
+
+ if (path_is_absolute(name)) {
+ r = unit_name_from_path(name, ".mount", ret);
+ if (r >= 0)
+ return 1;
+ if (r != -EINVAL)
+ return r;
+ }
+
+ s = new(char, strlen(name) * 4 + strlen(suffix) + 1);
+ if (!s)
+ return -ENOMEM;
+
+ mangled = do_escape_mangle(name, flags & UNIT_NAME_MANGLE_GLOB, s);
+ if (mangled)
+ log_full(flags & UNIT_NAME_MANGLE_WARN ? LOG_NOTICE : LOG_DEBUG,
+ "Invalid unit name \"%s\" escaped as \"%s\"%s.",
+ name, s,
+ suggest_escape ? " (maybe you should use systemd-escape?)" : "");
+
+ /* Append a suffix if it doesn't have any, but only if this is not a glob, so that we can allow
+ * "foo.*" as a valid glob. */
+ if ((!(flags & UNIT_NAME_MANGLE_GLOB) || !string_is_glob(s)) && unit_name_to_type(s) < 0)
+ strcat(s, suffix);
+
+ /* Make sure mangling didn't grow this too large (but don't do this check if globbing is allowed,
+ * since globs generally do not qualify as valid unit names) */
+ if (!FLAGS_SET(flags, UNIT_NAME_MANGLE_GLOB) && !unit_name_is_valid(s, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ *ret = TAKE_PTR(s);
+ return 1;
+
+good:
+ s = strdup(name);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
+
+int slice_build_parent_slice(const char *slice, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ char *dash;
+ int r;
+
+ assert(slice);
+ assert(ret);
+
+ if (!slice_name_is_valid(slice))
+ return -EINVAL;
+
+ if (streq(slice, SPECIAL_ROOT_SLICE)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ s = strdup(slice);
+ if (!s)
+ return -ENOMEM;
+
+ dash = strrchr(s, '-');
+ if (dash)
+ strcpy(dash, ".slice");
+ else {
+ r = free_and_strdup(&s, SPECIAL_ROOT_SLICE);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(s);
+ return 1;
+}
+
+int slice_build_subslice(const char *slice, const char *name, char **ret) {
+ char *subslice;
+
+ assert(slice);
+ assert(name);
+ assert(ret);
+
+ if (!slice_name_is_valid(slice))
+ return -EINVAL;
+
+ if (!unit_prefix_is_valid(name))
+ return -EINVAL;
+
+ if (streq(slice, SPECIAL_ROOT_SLICE))
+ subslice = strjoin(name, ".slice");
+ else {
+ char *e;
+
+ assert_se(e = endswith(slice, ".slice"));
+
+ subslice = new(char, (e - slice) + 1 + strlen(name) + 6 + 1);
+ if (!subslice)
+ return -ENOMEM;
+
+ stpcpy(stpcpy(stpcpy(mempcpy(subslice, slice, e - slice), "-"), name), ".slice");
+ }
+
+ *ret = subslice;
+ return 0;
+}
+
+bool slice_name_is_valid(const char *name) {
+ const char *p, *e;
+ bool dash = false;
+
+ if (!unit_name_is_valid(name, UNIT_NAME_PLAIN))
+ return false;
+
+ if (streq(name, SPECIAL_ROOT_SLICE))
+ return true;
+
+ e = endswith(name, ".slice");
+ if (!e)
+ return false;
+
+ for (p = name; p < e; p++) {
+
+ if (*p == '-') {
+
+ /* Don't allow initial dash */
+ if (p == name)
+ return false;
+
+ /* Don't allow multiple dashes */
+ if (dash)
+ return false;
+
+ dash = true;
+ } else
+ dash = false;
+ }
+
+ /* Don't allow trailing hash */
+ if (dash)
+ return false;
+
+ return true;
+}
diff --git a/src/basic/unit-name.h b/src/basic/unit-name.h
new file mode 100644
index 0000000..c25672f
--- /dev/null
+++ b/src/basic/unit-name.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+#include "unit-def.h"
+
+#define UNIT_NAME_MAX 256
+
+typedef enum UnitNameFlags {
+ UNIT_NAME_PLAIN = 1 << 0, /* Allow foo.service */
+ UNIT_NAME_TEMPLATE = 1 << 1, /* Allow foo@.service */
+ UNIT_NAME_INSTANCE = 1 << 2, /* Allow foo@bar.service */
+ UNIT_NAME_ANY = UNIT_NAME_PLAIN|UNIT_NAME_TEMPLATE|UNIT_NAME_INSTANCE,
+ _UNIT_NAME_INVALID = -1,
+} UnitNameFlags;
+
+bool unit_name_is_valid(const char *n, UnitNameFlags flags) _pure_;
+bool unit_prefix_is_valid(const char *p) _pure_;
+bool unit_instance_is_valid(const char *i) _pure_;
+bool unit_suffix_is_valid(const char *s) _pure_;
+
+int unit_name_to_prefix(const char *n, char **ret);
+int unit_name_to_instance(const char *n, char **ret);
+static inline int unit_name_classify(const char *n) {
+ return unit_name_to_instance(n, NULL);
+}
+int unit_name_to_prefix_and_instance(const char *n, char **ret);
+
+UnitType unit_name_to_type(const char *n) _pure_;
+
+int unit_name_change_suffix(const char *n, const char *suffix, char **ret);
+
+int unit_name_build(const char *prefix, const char *instance, const char *suffix, char **ret);
+int unit_name_build_from_type(const char *prefix, const char *instance, UnitType, char **ret);
+
+char *unit_name_escape(const char *f);
+int unit_name_unescape(const char *f, char **ret);
+int unit_name_path_escape(const char *f, char **ret);
+int unit_name_path_unescape(const char *f, char **ret);
+
+int unit_name_replace_instance(const char *f, const char *i, char **ret);
+
+int unit_name_template(const char *f, char **ret);
+
+int unit_name_from_path(const char *path, const char *suffix, char **ret);
+int unit_name_from_path_instance(const char *prefix, const char *path, const char *suffix, char **ret);
+int unit_name_to_path(const char *name, char **ret);
+
+typedef enum UnitNameMangle {
+ UNIT_NAME_MANGLE_GLOB = 1 << 0,
+ UNIT_NAME_MANGLE_WARN = 1 << 1,
+} UnitNameMangle;
+
+int unit_name_mangle_with_suffix(const char *name, const char *operation, UnitNameMangle flags, const char *suffix, char **ret);
+
+static inline int unit_name_mangle(const char *name, UnitNameMangle flags, char **ret) {
+ return unit_name_mangle_with_suffix(name, NULL, flags, ".service", ret);
+}
+
+int slice_build_parent_slice(const char *slice, char **ret);
+int slice_build_subslice(const char *slice, const char *name, char **subslice);
+bool slice_name_is_valid(const char *name);
diff --git a/src/basic/user-util.c b/src/basic/user-util.c
new file mode 100644
index 0000000..933a398
--- /dev/null
+++ b/src/basic/user-util.c
@@ -0,0 +1,1074 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <utmp.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "path-util.h"
+#include "random-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "utf8.h"
+
+bool uid_is_valid(uid_t uid) {
+
+ /* Also see POSIX IEEE Std 1003.1-2008, 2016 Edition, 3.436. */
+
+ /* Some libc APIs use UID_INVALID as special placeholder */
+ if (uid == (uid_t) UINT32_C(0xFFFFFFFF))
+ return false;
+
+ /* A long time ago UIDs where 16bit, hence explicitly avoid the 16bit -1 too */
+ if (uid == (uid_t) UINT32_C(0xFFFF))
+ return false;
+
+ return true;
+}
+
+int parse_uid(const char *s, uid_t *ret) {
+ uint32_t uid = 0;
+ int r;
+
+ assert(s);
+
+ assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+
+ /* We are very strict when parsing UIDs, and prohibit +/- as prefix, leading zero as prefix, and
+ * whitespace. We do this, since this call is often used in a context where we parse things as UID
+ * first, and if that doesn't work we fall back to NSS. Thus we really want to make sure that UIDs
+ * are parsed as UIDs only if they really really look like UIDs. */
+ r = safe_atou32_full(s, 10
+ | SAFE_ATO_REFUSE_PLUS_MINUS
+ | SAFE_ATO_REFUSE_LEADING_ZERO
+ | SAFE_ATO_REFUSE_LEADING_WHITESPACE, &uid);
+ if (r < 0)
+ return r;
+
+ if (!uid_is_valid(uid))
+ return -ENXIO; /* we return ENXIO instead of EINVAL
+ * here, to make it easy to distinguish
+ * invalid numeric uids from invalid
+ * strings. */
+
+ if (ret)
+ *ret = uid;
+
+ return 0;
+}
+
+int parse_uid_range(const char *s, uid_t *ret_lower, uid_t *ret_upper) {
+ _cleanup_free_ char *word = NULL;
+ uid_t l, u;
+ int r;
+
+ assert(s);
+ assert(ret_lower);
+ assert(ret_upper);
+
+ r = extract_first_word(&s, &word, "-", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ r = parse_uid(word, &l);
+ if (r < 0)
+ return r;
+
+ /* Check for the upper bound and extract it if needed */
+ if (!s)
+ /* Single number with no dash. */
+ u = l;
+ else if (!*s)
+ /* Trailing dash is an error. */
+ return -EINVAL;
+ else {
+ r = parse_uid(s, &u);
+ if (r < 0)
+ return r;
+
+ if (l > u)
+ return -EINVAL;
+ }
+
+ *ret_lower = l;
+ *ret_upper = u;
+ return 0;
+}
+
+char* getlogname_malloc(void) {
+ uid_t uid;
+ struct stat st;
+
+ if (isatty(STDIN_FILENO) && fstat(STDIN_FILENO, &st) >= 0)
+ uid = st.st_uid;
+ else
+ uid = getuid();
+
+ return uid_to_name(uid);
+}
+
+char *getusername_malloc(void) {
+ const char *e;
+
+ e = secure_getenv("USER");
+ if (e)
+ return strdup(e);
+
+ return uid_to_name(getuid());
+}
+
+bool is_nologin_shell(const char *shell) {
+
+ return PATH_IN_SET(shell,
+ /* 'nologin' is the friendliest way to disable logins for a user account. It prints a nice
+ * message and exits. Different distributions place the binary at different places though,
+ * hence let's list them all. */
+ "/bin/nologin",
+ "/sbin/nologin",
+ "/usr/bin/nologin",
+ "/usr/sbin/nologin",
+ /* 'true' and 'false' work too for the same purpose, but are less friendly as they don't do
+ * any message printing. Different distributions place the binary at various places but at
+ * least not in the 'sbin' directory. */
+ "/bin/false",
+ "/usr/bin/false",
+ "/bin/true",
+ "/usr/bin/true");
+}
+
+static int synthesize_user_creds(
+ const char **username,
+ uid_t *uid, gid_t *gid,
+ const char **home,
+ const char **shell,
+ UserCredsFlags flags) {
+
+ /* We enforce some special rules for uid=0 and uid=65534: in order to avoid NSS lookups for root we hardcode
+ * their user record data. */
+
+ if (STR_IN_SET(*username, "root", "0")) {
+ *username = "root";
+
+ if (uid)
+ *uid = 0;
+ if (gid)
+ *gid = 0;
+
+ if (home)
+ *home = "/root";
+
+ if (shell)
+ *shell = "/bin/sh";
+
+ return 0;
+ }
+
+ if (synthesize_nobody() &&
+ STR_IN_SET(*username, NOBODY_USER_NAME, "65534")) {
+ *username = NOBODY_USER_NAME;
+
+ if (uid)
+ *uid = UID_NOBODY;
+ if (gid)
+ *gid = GID_NOBODY;
+
+ if (home)
+ *home = FLAGS_SET(flags, USER_CREDS_CLEAN) ? NULL : "/";
+
+ if (shell)
+ *shell = FLAGS_SET(flags, USER_CREDS_CLEAN) ? NULL : NOLOGIN;
+
+ return 0;
+ }
+
+ return -ENOMEDIUM;
+}
+
+int get_user_creds(
+ const char **username,
+ uid_t *uid, gid_t *gid,
+ const char **home,
+ const char **shell,
+ UserCredsFlags flags) {
+
+ uid_t u = UID_INVALID;
+ struct passwd *p;
+ int r;
+
+ assert(username);
+ assert(*username);
+
+ if (!FLAGS_SET(flags, USER_CREDS_PREFER_NSS) ||
+ (!home && !shell)) {
+
+ /* So here's the deal: normally, we'll try to synthesize all records we can synthesize, and override
+ * the user database with that. However, if the user specifies USER_CREDS_PREFER_NSS then the
+ * user database will override the synthetic records instead — except if the user is only interested in
+ * the UID and/or GID (but not the home directory, or the shell), in which case we'll always override
+ * the user database (i.e. the USER_CREDS_PREFER_NSS flag has no effect in this case). Why?
+ * Simply because there are valid usecase where the user might change the home directory or the shell
+ * of the relevant users, but changing the UID/GID mappings for them is something we explicitly don't
+ * support. */
+
+ r = synthesize_user_creds(username, uid, gid, home, shell, flags);
+ if (r >= 0)
+ return 0;
+ if (r != -ENOMEDIUM) /* not a username we can synthesize */
+ return r;
+ }
+
+ if (parse_uid(*username, &u) >= 0) {
+ errno = 0;
+ p = getpwuid(u);
+
+ /* If there are multiple users with the same id, make sure to leave $USER to the configured value
+ * instead of the first occurrence in the database. However if the uid was configured by a numeric uid,
+ * then let's pick the real username from /etc/passwd. */
+ if (p)
+ *username = p->pw_name;
+ else if (FLAGS_SET(flags, USER_CREDS_ALLOW_MISSING) && !gid && !home && !shell) {
+
+ /* If the specified user is a numeric UID and it isn't in the user database, and the caller
+ * passed USER_CREDS_ALLOW_MISSING and was only interested in the UID, then juts return that
+ * and don't complain. */
+
+ if (uid)
+ *uid = u;
+
+ return 0;
+ }
+ } else {
+ errno = 0;
+ p = getpwnam(*username);
+ }
+ if (!p) {
+ r = errno_or_else(ESRCH);
+
+ /* If the user requested that we only synthesize as fallback, do so now */
+ if (FLAGS_SET(flags, USER_CREDS_PREFER_NSS)) {
+ if (synthesize_user_creds(username, uid, gid, home, shell, flags) >= 0)
+ return 0;
+ }
+
+ return r;
+ }
+
+ if (uid) {
+ if (!uid_is_valid(p->pw_uid))
+ return -EBADMSG;
+
+ *uid = p->pw_uid;
+ }
+
+ if (gid) {
+ if (!gid_is_valid(p->pw_gid))
+ return -EBADMSG;
+
+ *gid = p->pw_gid;
+ }
+
+ if (home) {
+ if (FLAGS_SET(flags, USER_CREDS_CLEAN) &&
+ (empty_or_root(p->pw_dir) ||
+ !path_is_valid(p->pw_dir) ||
+ !path_is_absolute(p->pw_dir)))
+ *home = NULL; /* Note: we don't insist on normalized paths, since there are setups that have /./ in the path */
+ else
+ *home = p->pw_dir;
+ }
+
+ if (shell) {
+ if (FLAGS_SET(flags, USER_CREDS_CLEAN) &&
+ (isempty(p->pw_shell) ||
+ !path_is_valid(p->pw_dir) ||
+ !path_is_absolute(p->pw_shell) ||
+ is_nologin_shell(p->pw_shell)))
+ *shell = NULL;
+ else
+ *shell = p->pw_shell;
+ }
+
+ return 0;
+}
+
+int get_group_creds(const char **groupname, gid_t *gid, UserCredsFlags flags) {
+ struct group *g;
+ gid_t id;
+
+ assert(groupname);
+
+ /* We enforce some special rules for gid=0: in order to avoid NSS lookups for root we hardcode its data. */
+
+ if (STR_IN_SET(*groupname, "root", "0")) {
+ *groupname = "root";
+
+ if (gid)
+ *gid = 0;
+
+ return 0;
+ }
+
+ if (synthesize_nobody() &&
+ STR_IN_SET(*groupname, NOBODY_GROUP_NAME, "65534")) {
+ *groupname = NOBODY_GROUP_NAME;
+
+ if (gid)
+ *gid = GID_NOBODY;
+
+ return 0;
+ }
+
+ if (parse_gid(*groupname, &id) >= 0) {
+ errno = 0;
+ g = getgrgid(id);
+
+ if (g)
+ *groupname = g->gr_name;
+ else if (FLAGS_SET(flags, USER_CREDS_ALLOW_MISSING)) {
+ if (gid)
+ *gid = id;
+
+ return 0;
+ }
+ } else {
+ errno = 0;
+ g = getgrnam(*groupname);
+ }
+
+ if (!g)
+ return errno_or_else(ESRCH);
+
+ if (gid) {
+ if (!gid_is_valid(g->gr_gid))
+ return -EBADMSG;
+
+ *gid = g->gr_gid;
+ }
+
+ return 0;
+}
+
+char* uid_to_name(uid_t uid) {
+ char *ret;
+ int r;
+
+ /* Shortcut things to avoid NSS lookups */
+ if (uid == 0)
+ return strdup("root");
+ if (synthesize_nobody() &&
+ uid == UID_NOBODY)
+ return strdup(NOBODY_USER_NAME);
+
+ if (uid_is_valid(uid)) {
+ long bufsize;
+
+ bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
+ if (bufsize <= 0)
+ bufsize = 4096;
+
+ for (;;) {
+ struct passwd pwbuf, *pw = NULL;
+ _cleanup_free_ char *buf = NULL;
+
+ buf = malloc(bufsize);
+ if (!buf)
+ return NULL;
+
+ r = getpwuid_r(uid, &pwbuf, buf, (size_t) bufsize, &pw);
+ if (r == 0 && pw)
+ return strdup(pw->pw_name);
+ if (r != ERANGE)
+ break;
+
+ if (bufsize > LONG_MAX/2) /* overflow check */
+ return NULL;
+
+ bufsize *= 2;
+ }
+ }
+
+ if (asprintf(&ret, UID_FMT, uid) < 0)
+ return NULL;
+
+ return ret;
+}
+
+char* gid_to_name(gid_t gid) {
+ char *ret;
+ int r;
+
+ if (gid == 0)
+ return strdup("root");
+ if (synthesize_nobody() &&
+ gid == GID_NOBODY)
+ return strdup(NOBODY_GROUP_NAME);
+
+ if (gid_is_valid(gid)) {
+ long bufsize;
+
+ bufsize = sysconf(_SC_GETGR_R_SIZE_MAX);
+ if (bufsize <= 0)
+ bufsize = 4096;
+
+ for (;;) {
+ struct group grbuf, *gr = NULL;
+ _cleanup_free_ char *buf = NULL;
+
+ buf = malloc(bufsize);
+ if (!buf)
+ return NULL;
+
+ r = getgrgid_r(gid, &grbuf, buf, (size_t) bufsize, &gr);
+ if (r == 0 && gr)
+ return strdup(gr->gr_name);
+ if (r != ERANGE)
+ break;
+
+ if (bufsize > LONG_MAX/2) /* overflow check */
+ return NULL;
+
+ bufsize *= 2;
+ }
+ }
+
+ if (asprintf(&ret, GID_FMT, gid) < 0)
+ return NULL;
+
+ return ret;
+}
+
+static bool gid_list_has(const gid_t *list, size_t size, gid_t val) {
+ for (size_t i = 0; i < size; i++)
+ if (list[i] == val)
+ return true;
+ return false;
+}
+
+int in_gid(gid_t gid) {
+ _cleanup_free_ gid_t *gids = NULL;
+ int ngroups;
+
+ if (getgid() == gid)
+ return 1;
+
+ if (getegid() == gid)
+ return 1;
+
+ if (!gid_is_valid(gid))
+ return -EINVAL;
+
+ ngroups = getgroups_alloc(&gids);
+ if (ngroups < 0)
+ return ngroups;
+
+ return gid_list_has(gids, ngroups, gid);
+}
+
+int in_group(const char *name) {
+ int r;
+ gid_t gid;
+
+ r = get_group_creds(&name, &gid, 0);
+ if (r < 0)
+ return r;
+
+ return in_gid(gid);
+}
+
+int merge_gid_lists(const gid_t *list1, size_t size1, const gid_t *list2, size_t size2, gid_t **ret) {
+ size_t nresult = 0;
+ assert(ret);
+
+ if (size2 > INT_MAX - size1)
+ return -ENOBUFS;
+
+ gid_t *buf = new(gid_t, size1 + size2);
+ if (!buf)
+ return -ENOMEM;
+
+ /* Duplicates need to be skipped on merging, otherwise they'll be passed on and stored in the kernel. */
+ for (size_t i = 0; i < size1; i++)
+ if (!gid_list_has(buf, nresult, list1[i]))
+ buf[nresult++] = list1[i];
+ for (size_t i = 0; i < size2; i++)
+ if (!gid_list_has(buf, nresult, list2[i]))
+ buf[nresult++] = list2[i];
+ *ret = buf;
+ return (int)nresult;
+}
+
+int getgroups_alloc(gid_t** gids) {
+ gid_t *allocated;
+ _cleanup_free_ gid_t *p = NULL;
+ int ngroups = 8;
+ unsigned attempt = 0;
+
+ allocated = new(gid_t, ngroups);
+ if (!allocated)
+ return -ENOMEM;
+ p = allocated;
+
+ for (;;) {
+ ngroups = getgroups(ngroups, p);
+ if (ngroups >= 0)
+ break;
+ if (errno != EINVAL)
+ return -errno;
+
+ /* Give up eventually */
+ if (attempt++ > 10)
+ return -EINVAL;
+
+ /* Get actual size needed, and size the array explicitly. Note that this is potentially racy
+ * to use (in multi-threaded programs), hence let's call this in a loop. */
+ ngroups = getgroups(0, NULL);
+ if (ngroups < 0)
+ return -errno;
+ if (ngroups == 0)
+ return false;
+
+ free(allocated);
+
+ p = allocated = new(gid_t, ngroups);
+ if (!allocated)
+ return -ENOMEM;
+ }
+
+ *gids = TAKE_PTR(p);
+ return ngroups;
+}
+
+int get_home_dir(char **_h) {
+ struct passwd *p;
+ const char *e;
+ char *h;
+ uid_t u;
+
+ assert(_h);
+
+ /* Take the user specified one */
+ e = secure_getenv("HOME");
+ if (e && path_is_valid(e) && path_is_absolute(e)) {
+ h = strdup(e);
+ if (!h)
+ return -ENOMEM;
+
+ *_h = path_simplify(h, true);
+ return 0;
+ }
+
+ /* Hardcode home directory for root and nobody to avoid NSS */
+ u = getuid();
+ if (u == 0) {
+ h = strdup("/root");
+ if (!h)
+ return -ENOMEM;
+
+ *_h = h;
+ return 0;
+ }
+ if (synthesize_nobody() &&
+ u == UID_NOBODY) {
+ h = strdup("/");
+ if (!h)
+ return -ENOMEM;
+
+ *_h = h;
+ return 0;
+ }
+
+ /* Check the database... */
+ errno = 0;
+ p = getpwuid(u);
+ if (!p)
+ return errno_or_else(ESRCH);
+
+ if (!path_is_valid(p->pw_dir) ||
+ !path_is_absolute(p->pw_dir))
+ return -EINVAL;
+
+ h = strdup(p->pw_dir);
+ if (!h)
+ return -ENOMEM;
+
+ *_h = path_simplify(h, true);
+ return 0;
+}
+
+int get_shell(char **_s) {
+ struct passwd *p;
+ const char *e;
+ char *s;
+ uid_t u;
+
+ assert(_s);
+
+ /* Take the user specified one */
+ e = secure_getenv("SHELL");
+ if (e && path_is_valid(e) && path_is_absolute(e)) {
+ s = strdup(e);
+ if (!s)
+ return -ENOMEM;
+
+ *_s = path_simplify(s, true);
+ return 0;
+ }
+
+ /* Hardcode shell for root and nobody to avoid NSS */
+ u = getuid();
+ if (u == 0) {
+ s = strdup("/bin/sh");
+ if (!s)
+ return -ENOMEM;
+
+ *_s = s;
+ return 0;
+ }
+ if (synthesize_nobody() &&
+ u == UID_NOBODY) {
+ s = strdup(NOLOGIN);
+ if (!s)
+ return -ENOMEM;
+
+ *_s = s;
+ return 0;
+ }
+
+ /* Check the database... */
+ errno = 0;
+ p = getpwuid(u);
+ if (!p)
+ return errno_or_else(ESRCH);
+
+ if (!path_is_valid(p->pw_shell) ||
+ !path_is_absolute(p->pw_shell))
+ return -EINVAL;
+
+ s = strdup(p->pw_shell);
+ if (!s)
+ return -ENOMEM;
+
+ *_s = path_simplify(s, true);
+ return 0;
+}
+
+int reset_uid_gid(void) {
+ int r;
+
+ r = maybe_setgroups(0, NULL);
+ if (r < 0)
+ return r;
+
+ if (setresgid(0, 0, 0) < 0)
+ return -errno;
+
+ if (setresuid(0, 0, 0) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int take_etc_passwd_lock(const char *root) {
+
+ struct flock flock = {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ .l_start = 0,
+ .l_len = 0,
+ };
+
+ const char *path;
+ int fd, r;
+
+ /* This is roughly the same as lckpwdf(), but not as awful. We
+ * don't want to use alarm() and signals, hence we implement
+ * our own trivial version of this.
+ *
+ * Note that shadow-utils also takes per-database locks in
+ * addition to lckpwdf(). However, we don't given that they
+ * are redundant as they invoke lckpwdf() first and keep
+ * it during everything they do. The per-database locks are
+ * awfully racy, and thus we just won't do them. */
+
+ if (root)
+ path = prefix_roota(root, ETC_PASSWD_LOCK_PATH);
+ else
+ path = ETC_PASSWD_LOCK_PATH;
+
+ fd = open(path, O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0600);
+ if (fd < 0)
+ return log_debug_errno(errno, "Cannot open %s: %m", path);
+
+ r = fcntl(fd, F_SETLKW, &flock);
+ if (r < 0) {
+ safe_close(fd);
+ return log_debug_errno(errno, "Locking %s failed: %m", path);
+ }
+
+ return fd;
+}
+
+bool valid_user_group_name(const char *u, ValidUserFlags flags) {
+ const char *i;
+
+ /* Checks if the specified name is a valid user/group name. There are two flavours of this call:
+ * strict mode is the default which is POSIX plus some extra rules; and relaxed mode where we accept
+ * pretty much everything except the really worst offending names.
+ *
+ * Whenever we synthesize users ourselves we should use the strict mode. But when we process users
+ * created by other stuff, let's be more liberal. */
+
+ if (isempty(u)) /* An empty user name is never valid */
+ return false;
+
+ if (parse_uid(u, NULL) >= 0) /* Something that parses as numeric UID string is valid exactly when the
+ * flag for it is set */
+ return FLAGS_SET(flags, VALID_USER_ALLOW_NUMERIC);
+
+ if (FLAGS_SET(flags, VALID_USER_RELAX)) {
+
+ /* In relaxed mode we just check very superficially. Apparently SSSD and other stuff is
+ * extremely liberal (way too liberal if you ask me, even inserting "@" in user names, which
+ * is bound to cause problems for example when used with an MTA), hence only filter the most
+ * obvious cases, or where things would result in an invalid entry if such a user name would
+ * show up in /etc/passwd (or equivalent getent output).
+ *
+ * Note that we stepped far out of POSIX territory here. It's not our fault though, but
+ * SSSD's, Samba's and everybody else who ignored POSIX on this. (I mean, I am happy to step
+ * outside of POSIX' bounds any day, but I must say in this case I probably wouldn't
+ * have...) */
+
+ if (startswith(u, " ") || endswith(u, " ")) /* At least expect whitespace padding is removed
+ * at front and back (accept in the middle, since
+ * that's apparently a thing on Windows). Note
+ * that this also blocks usernames consisting of
+ * whitespace only. */
+ return false;
+
+ if (!utf8_is_valid(u)) /* We want to synthesize JSON from this, hence insist on UTF-8 */
+ return false;
+
+ if (string_has_cc(u, NULL)) /* CC characters are just dangerous (and \n in particular is the
+ * record separator in /etc/passwd), so we can't allow that. */
+ return false;
+
+ if (strpbrk(u, ":/")) /* Colons are the field separator in /etc/passwd, we can't allow
+ * that. Slashes are special to file systems paths and user names
+ * typically show up in the file system as home directories, hence
+ * don't allow slashes. */
+ return false;
+
+ if (in_charset(u, "0123456789")) /* Don't allow fully numeric strings, they might be confused
+ * with UIDs (note that this test is more broad than
+ * the parse_uid() test above, as it will cover more than
+ * the 32bit range, and it will detect 65535 (which is in
+ * invalid UID, even though in the unsigned 32 bit range) */
+ return false;
+
+ if (u[0] == '-' && in_charset(u + 1, "0123456789")) /* Don't allow negative fully numeric
+ * strings either. After all some people
+ * write 65535 as -1 (even though that's
+ * not even true on 32bit uid_t
+ * anyway) */
+ return false;
+
+ if (dot_or_dot_dot(u)) /* User names typically become home directory names, and these two are
+ * special in that context, don't allow that. */
+ return false;
+
+ /* Compare with strict result and warn if result doesn't match */
+ if (FLAGS_SET(flags, VALID_USER_WARN) && !valid_user_group_name(u, 0))
+ log_struct(LOG_NOTICE,
+ "MESSAGE=Accepting user/group name '%s', which does not match strict user/group name rules.", u,
+ "USER_GROUP_NAME=%s", u,
+ "MESSAGE_ID=" SD_MESSAGE_UNSAFE_USER_NAME_STR);
+
+ /* Note that we make no restrictions on the length in relaxed mode! */
+ } else {
+ long sz;
+ size_t l;
+
+ /* Also see POSIX IEEE Std 1003.1-2008, 2016 Edition, 3.437. We are a bit stricter here
+ * however. Specifically we deviate from POSIX rules:
+ *
+ * - We don't allow empty user names (see above)
+ * - We require that names fit into the appropriate utmp field
+ * - We don't allow any dots (this conflicts with chown syntax which permits dots as user/group name separator)
+ * - We don't allow dashes or digit as the first character
+ *
+ * Note that other systems are even more restrictive, and don't permit underscores or uppercase characters.
+ */
+
+ if (!(u[0] >= 'a' && u[0] <= 'z') &&
+ !(u[0] >= 'A' && u[0] <= 'Z') &&
+ u[0] != '_')
+ return false;
+
+ for (i = u+1; *i; i++)
+ if (!(*i >= 'a' && *i <= 'z') &&
+ !(*i >= 'A' && *i <= 'Z') &&
+ !(*i >= '0' && *i <= '9') &&
+ !IN_SET(*i, '_', '-'))
+ return false;
+
+ l = i - u;
+
+ sz = sysconf(_SC_LOGIN_NAME_MAX);
+ assert_se(sz > 0);
+
+ if (l > (size_t) sz)
+ return false;
+ if (l > FILENAME_MAX)
+ return false;
+ if (l > UT_NAMESIZE - 1)
+ return false;
+ }
+
+ return true;
+}
+
+bool valid_gecos(const char *d) {
+
+ if (!d)
+ return false;
+
+ if (!utf8_is_valid(d))
+ return false;
+
+ if (string_has_cc(d, NULL))
+ return false;
+
+ /* Colons are used as field separators, and hence not OK */
+ if (strchr(d, ':'))
+ return false;
+
+ return true;
+}
+
+char *mangle_gecos(const char *d) {
+ char *mangled;
+
+ /* Makes sure the provided string becomes valid as a GEGOS field, by dropping bad chars. glibc's
+ * putwent() only changes \n and : to spaces. We do more: replace all CC too, and remove invalid
+ * UTF-8 */
+
+ mangled = strdup(d);
+ if (!mangled)
+ return NULL;
+
+ for (char *i = mangled; *i; i++) {
+ int len;
+
+ if ((uint8_t) *i < (uint8_t) ' ' || *i == ':') {
+ *i = ' ';
+ continue;
+ }
+
+ len = utf8_encoded_valid_unichar(i, (size_t) -1);
+ if (len < 0) {
+ *i = ' ';
+ continue;
+ }
+
+ i += len - 1;
+ }
+
+ return mangled;
+}
+
+bool valid_home(const char *p) {
+ /* Note that this function is also called by valid_shell(), any
+ * changes must account for that. */
+
+ if (isempty(p))
+ return false;
+
+ if (!utf8_is_valid(p))
+ return false;
+
+ if (string_has_cc(p, NULL))
+ return false;
+
+ if (!path_is_absolute(p))
+ return false;
+
+ if (!path_is_normalized(p))
+ return false;
+
+ /* Colons are used as field separators, and hence not OK */
+ if (strchr(p, ':'))
+ return false;
+
+ return true;
+}
+
+int maybe_setgroups(size_t size, const gid_t *list) {
+ int r;
+
+ /* Check if setgroups is allowed before we try to drop all the auxiliary groups */
+ if (size == 0) { /* Dropping all aux groups? */
+ _cleanup_free_ char *setgroups_content = NULL;
+ bool can_setgroups;
+
+ r = read_one_line_file("/proc/self/setgroups", &setgroups_content);
+ if (r == -ENOENT)
+ /* Old kernels don't have /proc/self/setgroups, so assume we can use setgroups */
+ can_setgroups = true;
+ else if (r < 0)
+ return r;
+ else
+ can_setgroups = streq(setgroups_content, "allow");
+
+ if (!can_setgroups) {
+ log_debug("Skipping setgroups(), /proc/self/setgroups is set to 'deny'");
+ return 0;
+ }
+ }
+
+ if (setgroups(size, list) < 0)
+ return -errno;
+
+ return 0;
+}
+
+bool synthesize_nobody(void) {
+ /* Returns true when we shall synthesize the "nobody" user (which we do by default). This can be turned off by
+ * touching /etc/systemd/dont-synthesize-nobody in order to provide upgrade compatibility with legacy systems
+ * that used the "nobody" user name and group name for other UIDs/GIDs than 65534.
+ *
+ * Note that we do not employ any kind of synchronization on the following caching variable. If the variable is
+ * accessed in multi-threaded programs in the worst case it might happen that we initialize twice, but that
+ * shouldn't matter as each initialization should come to the same result. */
+ static int cache = -1;
+
+ if (cache < 0)
+ cache = access("/etc/systemd/dont-synthesize-nobody", F_OK) < 0;
+
+ return cache;
+}
+
+int putpwent_sane(const struct passwd *pw, FILE *stream) {
+ assert(pw);
+ assert(stream);
+
+ errno = 0;
+ if (putpwent(pw, stream) != 0)
+ return errno_or_else(EIO);
+
+ return 0;
+}
+
+int putspent_sane(const struct spwd *sp, FILE *stream) {
+ assert(sp);
+ assert(stream);
+
+ errno = 0;
+ if (putspent(sp, stream) != 0)
+ return errno_or_else(EIO);
+
+ return 0;
+}
+
+int putgrent_sane(const struct group *gr, FILE *stream) {
+ assert(gr);
+ assert(stream);
+
+ errno = 0;
+ if (putgrent(gr, stream) != 0)
+ return errno_or_else(EIO);
+
+ return 0;
+}
+
+#if ENABLE_GSHADOW
+int putsgent_sane(const struct sgrp *sg, FILE *stream) {
+ assert(sg);
+ assert(stream);
+
+ errno = 0;
+ if (putsgent(sg, stream) != 0)
+ return errno_or_else(EIO);
+
+ return 0;
+}
+#endif
+
+int fgetpwent_sane(FILE *stream, struct passwd **pw) {
+ struct passwd *p;
+
+ assert(pw);
+ assert(stream);
+
+ errno = 0;
+ p = fgetpwent(stream);
+ if (!p && errno != ENOENT)
+ return errno_or_else(EIO);
+
+ *pw = p;
+ return !!p;
+}
+
+int fgetspent_sane(FILE *stream, struct spwd **sp) {
+ struct spwd *s;
+
+ assert(sp);
+ assert(stream);
+
+ errno = 0;
+ s = fgetspent(stream);
+ if (!s && errno != ENOENT)
+ return errno_or_else(EIO);
+
+ *sp = s;
+ return !!s;
+}
+
+int fgetgrent_sane(FILE *stream, struct group **gr) {
+ struct group *g;
+
+ assert(gr);
+ assert(stream);
+
+ errno = 0;
+ g = fgetgrent(stream);
+ if (!g && errno != ENOENT)
+ return errno_or_else(EIO);
+
+ *gr = g;
+ return !!g;
+}
+
+#if ENABLE_GSHADOW
+int fgetsgent_sane(FILE *stream, struct sgrp **sg) {
+ struct sgrp *s;
+
+ assert(sg);
+ assert(stream);
+
+ errno = 0;
+ s = fgetsgent(stream);
+ if (!s && errno != ENOENT)
+ return errno_or_else(EIO);
+
+ *sg = s;
+ return !!s;
+}
+#endif
diff --git a/src/basic/user-util.h b/src/basic/user-util.h
new file mode 100644
index 0000000..20ff415
--- /dev/null
+++ b/src/basic/user-util.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <grp.h>
+#if ENABLE_GSHADOW
+# include <gshadow.h>
+#endif
+#include <pwd.h>
+#include <shadow.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+bool uid_is_valid(uid_t uid);
+
+static inline bool gid_is_valid(gid_t gid) {
+ return uid_is_valid((uid_t) gid);
+}
+
+int parse_uid(const char *s, uid_t* ret_uid);
+int parse_uid_range(const char *s, uid_t *ret_lower, uid_t *ret_upper);
+
+static inline int parse_gid(const char *s, gid_t *ret_gid) {
+ return parse_uid(s, (uid_t*) ret_gid);
+}
+
+char* getlogname_malloc(void);
+char* getusername_malloc(void);
+
+typedef enum UserCredsFlags {
+ USER_CREDS_PREFER_NSS = 1 << 0, /* if set, only synthesize user records if database lacks them. Normally we bypass the userdb entirely for the records we can synthesize */
+ USER_CREDS_ALLOW_MISSING = 1 << 1, /* if a numeric UID string is resolved, be OK if there's no record for it */
+ USER_CREDS_CLEAN = 1 << 2, /* try to clean up shell and home fields with invalid data */
+} UserCredsFlags;
+
+int get_user_creds(const char **username, uid_t *uid, gid_t *gid, const char **home, const char **shell, UserCredsFlags flags);
+int get_group_creds(const char **groupname, gid_t *gid, UserCredsFlags flags);
+
+char* uid_to_name(uid_t uid);
+char* gid_to_name(gid_t gid);
+
+int in_gid(gid_t gid);
+int in_group(const char *name);
+
+int merge_gid_lists(const gid_t *list1, size_t size1, const gid_t *list2, size_t size2, gid_t **result);
+int getgroups_alloc(gid_t** gids);
+
+int get_home_dir(char **ret);
+int get_shell(char **_ret);
+
+int reset_uid_gid(void);
+
+int take_etc_passwd_lock(const char *root);
+
+#define UID_INVALID ((uid_t) -1)
+#define GID_INVALID ((gid_t) -1)
+
+#define UID_NOBODY ((uid_t) 65534U)
+#define GID_NOBODY ((gid_t) 65534U)
+
+#define ETC_PASSWD_LOCK_PATH "/etc/.pwd.lock"
+
+/* The following macros add 1 when converting things, since UID 0 is a valid UID, while the pointer
+ * NULL is special */
+#define PTR_TO_UID(p) ((uid_t) (((uintptr_t) (p))-1))
+#define UID_TO_PTR(u) ((void*) (((uintptr_t) (u))+1))
+
+#define PTR_TO_GID(p) ((gid_t) (((uintptr_t) (p))-1))
+#define GID_TO_PTR(u) ((void*) (((uintptr_t) (u))+1))
+
+static inline bool userns_supported(void) {
+ return access("/proc/self/uid_map", F_OK) >= 0;
+}
+
+typedef enum ValidUserFlags {
+ VALID_USER_RELAX = 1 << 0,
+ VALID_USER_WARN = 1 << 1,
+ VALID_USER_ALLOW_NUMERIC = 1 << 2,
+} ValidUserFlags;
+
+bool valid_user_group_name(const char *u, ValidUserFlags flags);
+bool valid_gecos(const char *d);
+char *mangle_gecos(const char *d);
+bool valid_home(const char *p);
+
+static inline bool valid_shell(const char *p) {
+ /* We have the same requirements, so just piggy-back on the home check.
+ *
+ * Let's ignore /etc/shells because this is only applicable to real and
+ * not system users. It is also incompatible with the idea of empty /etc.
+ */
+ return valid_home(p);
+}
+
+int maybe_setgroups(size_t size, const gid_t *list);
+
+bool synthesize_nobody(void);
+
+int fgetpwent_sane(FILE *stream, struct passwd **pw);
+int fgetspent_sane(FILE *stream, struct spwd **sp);
+int fgetgrent_sane(FILE *stream, struct group **gr);
+int putpwent_sane(const struct passwd *pw, FILE *stream);
+int putspent_sane(const struct spwd *sp, FILE *stream);
+int putgrent_sane(const struct group *gr, FILE *stream);
+#if ENABLE_GSHADOW
+int fgetsgent_sane(FILE *stream, struct sgrp **sg);
+int putsgent_sane(const struct sgrp *sg, FILE *stream);
+#endif
+
+bool is_nologin_shell(const char *shell);
diff --git a/src/basic/utf8.c b/src/basic/utf8.c
new file mode 100644
index 0000000..59663c0
--- /dev/null
+++ b/src/basic/utf8.c
@@ -0,0 +1,585 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/* Parts of this file are based on the GLIB utf8 validation functions. The
+ * original license text follows. */
+
+/* gutf8.c - Operations on UTF-8 strings.
+ *
+ * Copyright (C) 1999 Tom Tromey
+ * Copyright (C) 2000 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "gunicode.h"
+#include "hexdecoct.h"
+#include "macro.h"
+#include "string-util.h"
+#include "utf8.h"
+
+bool unichar_is_valid(char32_t ch) {
+
+ if (ch >= 0x110000) /* End of unicode space */
+ return false;
+ if ((ch & 0xFFFFF800) == 0xD800) /* Reserved area for UTF-16 */
+ return false;
+ if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) /* Reserved */
+ return false;
+ if ((ch & 0xFFFE) == 0xFFFE) /* BOM (Byte Order Mark) */
+ return false;
+
+ return true;
+}
+
+static bool unichar_is_control(char32_t ch) {
+
+ /*
+ 0 to ' '-1 is the C0 range.
+ DEL=0x7F, and DEL+1 to 0x9F is C1 range.
+ '\t' is in C0 range, but more or less harmless and commonly used.
+ */
+
+ return (ch < ' ' && !IN_SET(ch, '\t', '\n')) ||
+ (0x7F <= ch && ch <= 0x9F);
+}
+
+/* count of characters used to encode one unicode char */
+static size_t utf8_encoded_expected_len(uint8_t c) {
+ if (c < 0x80)
+ return 1;
+ if ((c & 0xe0) == 0xc0)
+ return 2;
+ if ((c & 0xf0) == 0xe0)
+ return 3;
+ if ((c & 0xf8) == 0xf0)
+ return 4;
+ if ((c & 0xfc) == 0xf8)
+ return 5;
+ if ((c & 0xfe) == 0xfc)
+ return 6;
+
+ return 0;
+}
+
+/* decode one unicode char */
+int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar) {
+ char32_t unichar;
+ size_t len, i;
+
+ assert(str);
+
+ len = utf8_encoded_expected_len(str[0]);
+
+ switch (len) {
+ case 1:
+ *ret_unichar = (char32_t)str[0];
+ return 0;
+ case 2:
+ unichar = str[0] & 0x1f;
+ break;
+ case 3:
+ unichar = (char32_t)str[0] & 0x0f;
+ break;
+ case 4:
+ unichar = (char32_t)str[0] & 0x07;
+ break;
+ case 5:
+ unichar = (char32_t)str[0] & 0x03;
+ break;
+ case 6:
+ unichar = (char32_t)str[0] & 0x01;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ for (i = 1; i < len; i++) {
+ if (((char32_t)str[i] & 0xc0) != 0x80)
+ return -EINVAL;
+
+ unichar <<= 6;
+ unichar |= (char32_t)str[i] & 0x3f;
+ }
+
+ *ret_unichar = unichar;
+
+ return 0;
+}
+
+bool utf8_is_printable_newline(const char* str, size_t length, bool allow_newline) {
+ const char *p;
+
+ assert(str);
+
+ for (p = str; length > 0;) {
+ int encoded_len, r;
+ char32_t val;
+
+ encoded_len = utf8_encoded_valid_unichar(p, length);
+ if (encoded_len < 0)
+ return false;
+ assert(encoded_len > 0 && (size_t) encoded_len <= length);
+
+ r = utf8_encoded_to_unichar(p, &val);
+ if (r < 0 ||
+ unichar_is_control(val) ||
+ (!allow_newline && val == '\n'))
+ return false;
+
+ length -= encoded_len;
+ p += encoded_len;
+ }
+
+ return true;
+}
+
+char *utf8_is_valid_n(const char *str, size_t len_bytes) {
+ /* Check if the string is composed of valid utf8 characters. If length len_bytes is given, stop after
+ * len_bytes. Otherwise, stop at NUL. */
+
+ assert(str);
+
+ for (const char *p = str; len_bytes != (size_t) -1 ? (size_t) (p - str) < len_bytes : *p != '\0'; ) {
+ int len;
+
+ if (_unlikely_(*p == '\0') && len_bytes != (size_t) -1)
+ return NULL; /* embedded NUL */
+
+ len = utf8_encoded_valid_unichar(p,
+ len_bytes != (size_t) -1 ? len_bytes - (p - str) : (size_t) -1);
+ if (_unlikely_(len < 0))
+ return NULL; /* invalid character */
+
+ p += len;
+ }
+
+ return (char*) str;
+}
+
+char *utf8_escape_invalid(const char *str) {
+ char *p, *s;
+
+ assert(str);
+
+ p = s = malloc(strlen(str) * 4 + 1);
+ if (!p)
+ return NULL;
+
+ while (*str) {
+ int len;
+
+ len = utf8_encoded_valid_unichar(str, (size_t) -1);
+ if (len > 0) {
+ s = mempcpy(s, str, len);
+ str += len;
+ } else {
+ s = stpcpy(s, UTF8_REPLACEMENT_CHARACTER);
+ str += 1;
+ }
+ }
+
+ *s = '\0';
+ (void) str_realloc(&p);
+ return p;
+}
+
+static int utf8_char_console_width(const char *str) {
+ char32_t c;
+ int r;
+
+ r = utf8_encoded_to_unichar(str, &c);
+ if (r < 0)
+ return r;
+
+ /* TODO: we should detect combining characters */
+
+ return unichar_iswide(c) ? 2 : 1;
+}
+
+char *utf8_escape_non_printable_full(const char *str, size_t console_width) {
+ char *p, *s, *prev_s;
+ size_t n = 0; /* estimated print width */
+
+ assert(str);
+
+ if (console_width == 0)
+ return strdup("");
+
+ p = s = prev_s = malloc(strlen(str) * 4 + 1);
+ if (!p)
+ return NULL;
+
+ for (;;) {
+ int len;
+ char *saved_s = s;
+
+ if (!*str) /* done! */
+ goto finish;
+
+ len = utf8_encoded_valid_unichar(str, (size_t) -1);
+ if (len > 0) {
+ if (utf8_is_printable(str, len)) {
+ int w;
+
+ w = utf8_char_console_width(str);
+ assert(w >= 0);
+ if (n + w > console_width)
+ goto truncation;
+
+ s = mempcpy(s, str, len);
+ str += len;
+ n += w;
+
+ } else {
+ for (; len > 0; len--) {
+ if (n + 4 > console_width)
+ goto truncation;
+
+ *(s++) = '\\';
+ *(s++) = 'x';
+ *(s++) = hexchar((int) *str >> 4);
+ *(s++) = hexchar((int) *str);
+
+ str += 1;
+ n += 4;
+ }
+ }
+ } else {
+ if (n + 1 > console_width)
+ goto truncation;
+
+ s = mempcpy(s, UTF8_REPLACEMENT_CHARACTER, strlen(UTF8_REPLACEMENT_CHARACTER));
+ str += 1;
+ n += 1;
+ }
+
+ prev_s = saved_s;
+ }
+
+ truncation:
+ /* Try to go back one if we don't have enough space for the ellipsis */
+ if (n + 1 >= console_width)
+ s = prev_s;
+
+ s = mempcpy(s, "…", strlen("…"));
+
+ finish:
+ *s = '\0';
+ (void) str_realloc(&p);
+ return p;
+}
+
+char *ascii_is_valid(const char *str) {
+ const char *p;
+
+ /* Check whether the string consists of valid ASCII bytes,
+ * i.e values between 0 and 127, inclusive. */
+
+ assert(str);
+
+ for (p = str; *p; p++)
+ if ((unsigned char) *p >= 128)
+ return NULL;
+
+ return (char*) str;
+}
+
+char *ascii_is_valid_n(const char *str, size_t len) {
+ size_t i;
+
+ /* Very similar to ascii_is_valid(), but checks exactly len
+ * bytes and rejects any NULs in that range. */
+
+ assert(str);
+
+ for (i = 0; i < len; i++)
+ if ((unsigned char) str[i] >= 128 || str[i] == 0)
+ return NULL;
+
+ return (char*) str;
+}
+
+/**
+ * utf8_encode_unichar() - Encode single UCS-4 character as UTF-8
+ * @out_utf8: output buffer of at least 4 bytes or NULL
+ * @g: UCS-4 character to encode
+ *
+ * This encodes a single UCS-4 character as UTF-8 and writes it into @out_utf8.
+ * The length of the character is returned. It is not zero-terminated! If the
+ * output buffer is NULL, only the length is returned.
+ *
+ * Returns: The length in bytes that the UTF-8 representation does or would
+ * occupy.
+ */
+size_t utf8_encode_unichar(char *out_utf8, char32_t g) {
+
+ if (g < (1 << 7)) {
+ if (out_utf8)
+ out_utf8[0] = g & 0x7f;
+ return 1;
+ } else if (g < (1 << 11)) {
+ if (out_utf8) {
+ out_utf8[0] = 0xc0 | ((g >> 6) & 0x1f);
+ out_utf8[1] = 0x80 | (g & 0x3f);
+ }
+ return 2;
+ } else if (g < (1 << 16)) {
+ if (out_utf8) {
+ out_utf8[0] = 0xe0 | ((g >> 12) & 0x0f);
+ out_utf8[1] = 0x80 | ((g >> 6) & 0x3f);
+ out_utf8[2] = 0x80 | (g & 0x3f);
+ }
+ return 3;
+ } else if (g < (1 << 21)) {
+ if (out_utf8) {
+ out_utf8[0] = 0xf0 | ((g >> 18) & 0x07);
+ out_utf8[1] = 0x80 | ((g >> 12) & 0x3f);
+ out_utf8[2] = 0x80 | ((g >> 6) & 0x3f);
+ out_utf8[3] = 0x80 | (g & 0x3f);
+ }
+ return 4;
+ }
+
+ return 0;
+}
+
+char *utf16_to_utf8(const char16_t *s, size_t length /* bytes! */) {
+ const uint8_t *f;
+ char *r, *t;
+
+ assert(s);
+
+ /* Input length is in bytes, i.e. the shortest possible character takes 2 bytes. Each unicode character may
+ * take up to 4 bytes in UTF-8. Let's also account for a trailing NUL byte. */
+ if (length * 2 < length)
+ return NULL; /* overflow */
+
+ r = new(char, length * 2 + 1);
+ if (!r)
+ return NULL;
+
+ f = (const uint8_t*) s;
+ t = r;
+
+ while (f + 1 < (const uint8_t*) s + length) {
+ char16_t w1, w2;
+
+ /* see RFC 2781 section 2.2 */
+
+ w1 = f[1] << 8 | f[0];
+ f += 2;
+
+ if (!utf16_is_surrogate(w1)) {
+ t += utf8_encode_unichar(t, w1);
+ continue;
+ }
+
+ if (utf16_is_trailing_surrogate(w1))
+ continue; /* spurious trailing surrogate, ignore */
+
+ if (f + 1 >= (const uint8_t*) s + length)
+ break;
+
+ w2 = f[1] << 8 | f[0];
+ f += 2;
+
+ if (!utf16_is_trailing_surrogate(w2)) {
+ f -= 2;
+ continue; /* surrogate missing its trailing surrogate, ignore */
+ }
+
+ t += utf8_encode_unichar(t, utf16_surrogate_pair_to_unichar(w1, w2));
+ }
+
+ *t = 0;
+ return r;
+}
+
+size_t utf16_encode_unichar(char16_t *out, char32_t c) {
+
+ /* Note that this encodes as little-endian. */
+
+ switch (c) {
+
+ case 0 ... 0xd7ffU:
+ case 0xe000U ... 0xffffU:
+ out[0] = htole16(c);
+ return 1;
+
+ case 0x10000U ... 0x10ffffU:
+ c -= 0x10000U;
+ out[0] = htole16((c >> 10) + 0xd800U);
+ out[1] = htole16((c & 0x3ffU) + 0xdc00U);
+ return 2;
+
+ default: /* A surrogate (invalid) */
+ return 0;
+ }
+}
+
+char16_t *utf8_to_utf16(const char *s, size_t length) {
+ char16_t *n, *p;
+ size_t i;
+ int r;
+
+ assert(s);
+
+ n = new(char16_t, length + 1);
+ if (!n)
+ return NULL;
+
+ p = n;
+
+ for (i = 0; i < length;) {
+ char32_t unichar;
+ size_t e;
+
+ e = utf8_encoded_expected_len(s[i]);
+ if (e <= 1) /* Invalid and single byte characters are copied as they are */
+ goto copy;
+
+ if (i + e > length) /* sequence longer than input buffer, then copy as-is */
+ goto copy;
+
+ r = utf8_encoded_to_unichar(s + i, &unichar);
+ if (r < 0) /* sequence invalid, then copy as-is */
+ goto copy;
+
+ p += utf16_encode_unichar(p, unichar);
+ i += e;
+ continue;
+
+ copy:
+ *(p++) = htole16(s[i++]);
+ }
+
+ *p = 0;
+ return n;
+}
+
+size_t char16_strlen(const char16_t *s) {
+ size_t n = 0;
+
+ assert(s);
+
+ while (*s != 0)
+ n++, s++;
+
+ return n;
+}
+
+/* expected size used to encode one unicode char */
+static int utf8_unichar_to_encoded_len(char32_t unichar) {
+
+ if (unichar < 0x80)
+ return 1;
+ if (unichar < 0x800)
+ return 2;
+ if (unichar < 0x10000)
+ return 3;
+ if (unichar < 0x200000)
+ return 4;
+ if (unichar < 0x4000000)
+ return 5;
+
+ return 6;
+}
+
+/* validate one encoded unicode char and return its length */
+int utf8_encoded_valid_unichar(const char *str, size_t length /* bytes */) {
+ char32_t unichar;
+ size_t len, i;
+ int r;
+
+ assert(str);
+ assert(length > 0);
+
+ /* We read until NUL, at most length bytes. (size_t) -1 may be used to disable the length check. */
+
+ len = utf8_encoded_expected_len(str[0]);
+ if (len == 0)
+ return -EINVAL;
+
+ /* Do we have a truncated multi-byte character? */
+ if (len > length)
+ return -EINVAL;
+
+ /* ascii is valid */
+ if (len == 1)
+ return 1;
+
+ /* check if expected encoded chars are available */
+ for (i = 0; i < len; i++)
+ if ((str[i] & 0x80) != 0x80)
+ return -EINVAL;
+
+ r = utf8_encoded_to_unichar(str, &unichar);
+ if (r < 0)
+ return r;
+
+ /* check if encoded length matches encoded value */
+ if (utf8_unichar_to_encoded_len(unichar) != (int) len)
+ return -EINVAL;
+
+ /* check if value has valid range */
+ if (!unichar_is_valid(unichar))
+ return -EINVAL;
+
+ return (int) len;
+}
+
+size_t utf8_n_codepoints(const char *str) {
+ size_t n = 0;
+
+ /* Returns the number of UTF-8 codepoints in this string, or (size_t) -1 if the string is not valid UTF-8. */
+
+ while (*str != 0) {
+ int k;
+
+ k = utf8_encoded_valid_unichar(str, (size_t) -1);
+ if (k < 0)
+ return (size_t) -1;
+
+ str += k;
+ n++;
+ }
+
+ return n;
+}
+
+size_t utf8_console_width(const char *str) {
+ size_t n = 0;
+
+ /* Returns the approximate width a string will take on screen when printed on a character cell
+ * terminal/console. */
+
+ while (*str) {
+ int w;
+
+ w = utf8_char_console_width(str);
+ if (w < 0)
+ return (size_t) -1;
+
+ n += w;
+ str = utf8_next_char(str);
+ }
+
+ return n;
+}
diff --git a/src/basic/utf8.h b/src/basic/utf8.h
new file mode 100644
index 0000000..a6ea942
--- /dev/null
+++ b/src/basic/utf8.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <uchar.h>
+
+#include "macro.h"
+#include "missing_type.h"
+
+#define UTF8_REPLACEMENT_CHARACTER "\xef\xbf\xbd"
+#define UTF8_BYTE_ORDER_MARK "\xef\xbb\xbf"
+
+bool unichar_is_valid(char32_t c);
+
+char *utf8_is_valid_n(const char *str, size_t len_bytes) _pure_;
+static inline char *utf8_is_valid(const char *s) {
+ return utf8_is_valid_n(s, (size_t) -1);
+}
+char *ascii_is_valid(const char *s) _pure_;
+char *ascii_is_valid_n(const char *str, size_t len);
+
+bool utf8_is_printable_newline(const char* str, size_t length, bool allow_newline) _pure_;
+#define utf8_is_printable(str, length) utf8_is_printable_newline(str, length, true)
+
+char *utf8_escape_invalid(const char *s);
+char *utf8_escape_non_printable_full(const char *str, size_t console_width);
+static inline char *utf8_escape_non_printable(const char *str) {
+ return utf8_escape_non_printable_full(str, (size_t) -1);
+}
+
+size_t utf8_encode_unichar(char *out_utf8, char32_t g);
+size_t utf16_encode_unichar(char16_t *out, char32_t c);
+
+char *utf16_to_utf8(const char16_t *s, size_t length /* bytes! */);
+char16_t *utf8_to_utf16(const char *s, size_t length);
+
+size_t char16_strlen(const char16_t *s); /* returns the number of 16bit words in the string (not bytes!) */
+
+int utf8_encoded_valid_unichar(const char *str, size_t length);
+int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar);
+
+static inline bool utf16_is_surrogate(char16_t c) {
+ return c >= 0xd800U && c <= 0xdfffU;
+}
+
+static inline bool utf16_is_trailing_surrogate(char16_t c) {
+ return c >= 0xdc00U && c <= 0xdfffU;
+}
+
+static inline char32_t utf16_surrogate_pair_to_unichar(char16_t lead, char16_t trail) {
+ return ((((char32_t) lead - 0xd800U) << 10) + ((char32_t) trail - 0xdc00U) + 0x10000U);
+}
+
+size_t utf8_n_codepoints(const char *str);
+size_t utf8_console_width(const char *str);
diff --git a/src/basic/util.c b/src/basic/util.c
new file mode 100644
index 0000000..f98ecf3
--- /dev/null
+++ b/src/basic/util.c
@@ -0,0 +1,273 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include "alloc-util.h"
+#include "build.h"
+#include "dirent-util.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hostname-util.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "util.h"
+#include "virt.h"
+
+int saved_argc = 0;
+char **saved_argv = NULL;
+static int saved_in_initrd = -1;
+
+bool kexec_loaded(void) {
+ _cleanup_free_ char *s = NULL;
+
+ if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0)
+ return false;
+
+ return s[0] == '1';
+}
+
+int prot_from_flags(int flags) {
+
+ switch (flags & O_ACCMODE) {
+
+ case O_RDONLY:
+ return PROT_READ;
+
+ case O_WRONLY:
+ return PROT_WRITE;
+
+ case O_RDWR:
+ return PROT_READ|PROT_WRITE;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+bool in_initrd(void) {
+ struct statfs s;
+ int r;
+
+ if (saved_in_initrd >= 0)
+ return saved_in_initrd;
+
+ /* We make two checks here:
+ *
+ * 1. the flag file /etc/initrd-release must exist
+ * 2. the root file system must be a memory file system
+ *
+ * The second check is extra paranoia, since misdetecting an
+ * initrd can have bad consequences due the initrd
+ * emptying when transititioning to the main systemd.
+ */
+
+ r = getenv_bool_secure("SYSTEMD_IN_INITRD");
+ if (r < 0 && r != -ENXIO)
+ log_debug_errno(r, "Failed to parse $SYSTEMD_IN_INITRD, ignoring: %m");
+
+ if (r >= 0)
+ saved_in_initrd = r > 0;
+ else
+ saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
+ statfs("/", &s) >= 0 &&
+ is_temporary_fs(&s);
+
+ return saved_in_initrd;
+}
+
+void in_initrd_force(bool value) {
+ saved_in_initrd = value;
+}
+
+int on_ac_power(void) {
+ bool found_offline = false, found_online = false;
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ d = opendir("/sys/class/power_supply");
+ if (!d)
+ return errno == ENOENT ? true : -errno;
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ _cleanup_close_ int fd = -1, device = -1;
+ char contents[6];
+ ssize_t n;
+
+ device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (device < 0) {
+ if (IN_SET(errno, ENOENT, ENOTDIR))
+ continue;
+
+ return -errno;
+ }
+
+ fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ n = read(fd, contents, sizeof(contents));
+ if (n < 0)
+ return -errno;
+
+ if (n != 6 || memcmp(contents, "Mains\n", 6))
+ continue;
+
+ safe_close(fd);
+ fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ n = read(fd, contents, sizeof(contents));
+ if (n < 0)
+ return -errno;
+
+ if (n != 2 || contents[1] != '\n')
+ return -EIO;
+
+ if (contents[0] == '1') {
+ found_online = true;
+ break;
+ } else if (contents[0] == '0')
+ found_offline = true;
+ else
+ return -EIO;
+ }
+
+ return found_online || !found_offline;
+}
+
+int container_get_leader(const char *machine, pid_t *pid) {
+ _cleanup_free_ char *s = NULL, *class = NULL;
+ const char *p;
+ pid_t leader;
+ int r;
+
+ assert(machine);
+ assert(pid);
+
+ if (streq(machine, ".host")) {
+ *pid = 1;
+ return 0;
+ }
+
+ if (!machine_name_is_valid(machine))
+ return -EINVAL;
+
+ p = strjoina("/run/systemd/machines/", machine);
+ r = parse_env_file(NULL, p,
+ "LEADER", &s,
+ "CLASS", &class);
+ if (r == -ENOENT)
+ return -EHOSTDOWN;
+ if (r < 0)
+ return r;
+ if (!s)
+ return -EIO;
+
+ if (!streq_ptr(class, "container"))
+ return -EIO;
+
+ r = parse_pid(s, &leader);
+ if (r < 0)
+ return r;
+ if (leader <= 1)
+ return -EIO;
+
+ *pid = leader;
+ return 0;
+}
+
+int version(void) {
+ puts("systemd " STRINGIFY(PROJECT_VERSION) " (" GIT_VERSION ")\n"
+ SYSTEMD_FEATURES);
+ return 0;
+}
+
+/* This is a direct translation of str_verscmp from boot.c */
+static bool is_digit(int c) {
+ return c >= '0' && c <= '9';
+}
+
+static int c_order(int c) {
+ if (c == 0 || is_digit(c))
+ return 0;
+
+ if ((c >= 'a') && (c <= 'z'))
+ return c;
+
+ return c + 0x10000;
+}
+
+int str_verscmp(const char *s1, const char *s2) {
+ const char *os1, *os2;
+
+ assert(s1);
+ assert(s2);
+
+ os1 = s1;
+ os2 = s2;
+
+ while (*s1 || *s2) {
+ int first;
+
+ while ((*s1 && !is_digit(*s1)) || (*s2 && !is_digit(*s2))) {
+ int order;
+
+ order = c_order(*s1) - c_order(*s2);
+ if (order != 0)
+ return order;
+ s1++;
+ s2++;
+ }
+
+ while (*s1 == '0')
+ s1++;
+ while (*s2 == '0')
+ s2++;
+
+ first = 0;
+ while (is_digit(*s1) && is_digit(*s2)) {
+ if (first == 0)
+ first = *s1 - *s2;
+ s1++;
+ s2++;
+ }
+
+ if (is_digit(*s1))
+ return 1;
+ if (is_digit(*s2))
+ return -1;
+
+ if (first != 0)
+ return first;
+ }
+
+ return strcmp(os1, os2);
+}
+
+/* Turn off core dumps but only if we're running outside of a container. */
+void disable_coredumps(void) {
+ int r;
+
+ if (detect_container() > 0)
+ return;
+
+ r = write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_debug_errno(r, "Failed to turn off coredumps, ignoring: %m");
+}
diff --git a/src/basic/util.h b/src/basic/util.h
new file mode 100644
index 0000000..942d773
--- /dev/null
+++ b/src/basic/util.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdint.h>
+
+#include "macro.h"
+
+extern int saved_argc;
+extern char **saved_argv;
+
+static inline void save_argc_argv(int argc, char **argv) {
+ saved_argc = argc;
+ saved_argv = argv;
+}
+
+bool kexec_loaded(void);
+
+int prot_from_flags(int flags) _const_;
+
+bool in_initrd(void);
+void in_initrd_force(bool value);
+
+int on_ac_power(void);
+
+static inline unsigned u64log2(uint64_t n) {
+#if __SIZEOF_LONG_LONG__ == 8
+ return (n > 1) ? (unsigned) __builtin_clzll(n) ^ 63U : 0;
+#else
+#error "Wut?"
+#endif
+}
+
+static inline unsigned u32ctz(uint32_t n) {
+#if __SIZEOF_INT__ == 4
+ return n != 0 ? __builtin_ctz(n) : 32;
+#else
+#error "Wut?"
+#endif
+}
+
+static inline unsigned log2i(int x) {
+ assert(x > 0);
+
+ return __SIZEOF_INT__ * 8 - __builtin_clz(x) - 1;
+}
+
+static inline unsigned log2u(unsigned x) {
+ assert(x > 0);
+
+ return sizeof(unsigned) * 8 - __builtin_clz(x) - 1;
+}
+
+static inline unsigned log2u_round_up(unsigned x) {
+ assert(x > 0);
+
+ if (x == 1)
+ return 0;
+
+ return log2u(x - 1) + 1;
+}
+
+int container_get_leader(const char *machine, pid_t *pid);
+
+int version(void);
+
+int str_verscmp(const char *s1, const char *s2);
+
+void disable_coredumps(void);
diff --git a/src/basic/virt.c b/src/basic/virt.c
new file mode 100644
index 0000000..7d78a40
--- /dev/null
+++ b/src/basic/virt.c
@@ -0,0 +1,705 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if defined(__i386__) || defined(__x86_64__)
+#include <cpuid.h>
+#endif
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "process-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "virt.h"
+
+#if defined(__i386__) || defined(__x86_64__)
+static const char *const vm_table[_VIRTUALIZATION_MAX] = {
+ [VIRTUALIZATION_XEN] = "XenVMMXenVMM",
+ [VIRTUALIZATION_KVM] = "KVMKVMKVM",
+ [VIRTUALIZATION_QEMU] = "TCGTCGTCGTCG",
+ /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
+ [VIRTUALIZATION_VMWARE] = "VMwareVMware",
+ /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
+ [VIRTUALIZATION_MICROSOFT] = "Microsoft Hv",
+ /* https://wiki.freebsd.org/bhyve */
+ [VIRTUALIZATION_BHYVE] = "bhyve bhyve ",
+ [VIRTUALIZATION_QNX] = "QNXQVMBSQG",
+ /* https://projectacrn.org */
+ [VIRTUALIZATION_ACRN] = "ACRNACRNACRN",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(vm, int);
+#endif
+
+static int detect_vm_cpuid(void) {
+
+ /* CPUID is an x86 specific interface. */
+#if defined(__i386__) || defined(__x86_64__)
+
+ uint32_t eax, ebx, ecx, edx;
+ bool hypervisor;
+
+ /* http://lwn.net/Articles/301888/ */
+
+ /* First detect whether there is a hypervisor */
+ if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0)
+ return VIRTUALIZATION_NONE;
+
+ hypervisor = ecx & 0x80000000U;
+
+ if (hypervisor) {
+ union {
+ uint32_t sig32[3];
+ char text[13];
+ } sig = {};
+ int v;
+
+ /* There is a hypervisor, see what it is */
+ __cpuid(0x40000000U, eax, ebx, ecx, edx);
+
+ sig.sig32[0] = ebx;
+ sig.sig32[1] = ecx;
+ sig.sig32[2] = edx;
+
+ log_debug("Virtualization found, CPUID=%s", sig.text);
+
+ v = vm_from_string(sig.text);
+ if (v < 0)
+ return VIRTUALIZATION_VM_OTHER;
+
+ return v;
+ }
+#endif
+ log_debug("No virtualization found in CPUID");
+
+ return VIRTUALIZATION_NONE;
+}
+
+static int detect_vm_device_tree(void) {
+#if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
+ _cleanup_free_ char *hvtype = NULL;
+ int r;
+
+ r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
+ if (r == -ENOENT) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *dent;
+
+ if (access("/proc/device-tree/ibm,partition-name", F_OK) == 0 &&
+ access("/proc/device-tree/hmc-managed?", F_OK) == 0 &&
+ access("/proc/device-tree/chosen/qemu,graphic-width", F_OK) != 0)
+ return VIRTUALIZATION_POWERVM;
+
+ dir = opendir("/proc/device-tree");
+ if (!dir) {
+ if (errno == ENOENT) {
+ log_debug_errno(errno, "/proc/device-tree: %m");
+ return VIRTUALIZATION_NONE;
+ }
+ return -errno;
+ }
+
+ FOREACH_DIRENT(dent, dir, return -errno)
+ if (strstr(dent->d_name, "fw-cfg")) {
+ log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", dent->d_name);
+ return VIRTUALIZATION_QEMU;
+ }
+
+ log_debug("No virtualization found in /proc/device-tree/*");
+ return VIRTUALIZATION_NONE;
+ } else if (r < 0)
+ return r;
+
+ log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
+ if (streq(hvtype, "linux,kvm"))
+ return VIRTUALIZATION_KVM;
+ else if (strstr(hvtype, "xen"))
+ return VIRTUALIZATION_XEN;
+ else if (strstr(hvtype, "vmware"))
+ return VIRTUALIZATION_VMWARE;
+ else
+ return VIRTUALIZATION_VM_OTHER;
+#else
+ log_debug("This platform does not support /proc/device-tree");
+ return VIRTUALIZATION_NONE;
+#endif
+}
+
+static int detect_vm_dmi(void) {
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
+
+ static const char *const dmi_vendors[] = {
+ "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
+ "/sys/class/dmi/id/sys_vendor",
+ "/sys/class/dmi/id/board_vendor",
+ "/sys/class/dmi/id/bios_vendor"
+ };
+
+ static const struct {
+ const char *vendor;
+ int id;
+ } dmi_vendor_table[] = {
+ { "KVM", VIRTUALIZATION_KVM },
+ { "QEMU", VIRTUALIZATION_QEMU },
+ { "VMware", VIRTUALIZATION_VMWARE }, /* https://kb.vmware.com/s/article/1009458 */
+ { "VMW", VIRTUALIZATION_VMWARE },
+ { "innotek GmbH", VIRTUALIZATION_ORACLE },
+ { "Oracle Corporation", VIRTUALIZATION_ORACLE },
+ { "Xen", VIRTUALIZATION_XEN },
+ { "Bochs", VIRTUALIZATION_BOCHS },
+ { "Parallels", VIRTUALIZATION_PARALLELS },
+ /* https://wiki.freebsd.org/bhyve */
+ { "BHYVE", VIRTUALIZATION_BHYVE },
+ };
+ unsigned i;
+ int r;
+
+ for (i = 0; i < ELEMENTSOF(dmi_vendors); i++) {
+ _cleanup_free_ char *s = NULL;
+ unsigned j;
+
+ r = read_one_line_file(dmi_vendors[i], &s);
+ if (r < 0) {
+ if (r == -ENOENT)
+ continue;
+
+ return r;
+ }
+
+ for (j = 0; j < ELEMENTSOF(dmi_vendor_table); j++)
+ if (startswith(s, dmi_vendor_table[j].vendor)) {
+ log_debug("Virtualization %s found in DMI (%s)", s, dmi_vendors[i]);
+ return dmi_vendor_table[j].id;
+ }
+ }
+#endif
+
+ log_debug("No virtualization found in DMI");
+
+ return VIRTUALIZATION_NONE;
+}
+
+static int detect_vm_xen(void) {
+
+ /* Check for Dom0 will be executed later in detect_vm_xen_dom0
+ The presence of /proc/xen indicates some form of a Xen domain */
+ if (access("/proc/xen", F_OK) < 0) {
+ log_debug("Virtualization XEN not found, /proc/xen does not exist");
+ return VIRTUALIZATION_NONE;
+ }
+
+ log_debug("Virtualization XEN found (/proc/xen exists)");
+ return VIRTUALIZATION_XEN;
+}
+
+#define XENFEAT_dom0 11 /* xen/include/public/features.h */
+#define PATH_FEATURES "/sys/hypervisor/properties/features"
+/* Returns -errno, or 0 for domU, or 1 for dom0 */
+static int detect_vm_xen_dom0(void) {
+ _cleanup_free_ char *domcap = NULL;
+ int r;
+
+ r = read_one_line_file(PATH_FEATURES, &domcap);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ if (r >= 0) {
+ unsigned long features;
+
+ /* Here, we need to use sscanf() instead of safe_atoul()
+ * as the string lacks the leading "0x". */
+ r = sscanf(domcap, "%lx", &features);
+ if (r == 1) {
+ r = !!(features & (1U << XENFEAT_dom0));
+ log_debug("Virtualization XEN, found %s with value %08lx, "
+ "XENFEAT_dom0 (indicating the 'hardware domain') is%s set.",
+ PATH_FEATURES, features, r ? "" : " not");
+ return r;
+ }
+ log_debug("Virtualization XEN, found %s, unhandled content '%s'",
+ PATH_FEATURES, domcap);
+ }
+
+ r = read_one_line_file("/proc/xen/capabilities", &domcap);
+ if (r == -ENOENT) {
+ log_debug("Virtualization XEN because /proc/xen/capabilities does not exist");
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ for (const char *i = domcap;;) {
+ _cleanup_free_ char *cap = NULL;
+
+ r = extract_first_word(&i, &cap, ",", 0);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ log_debug("Virtualization XEN DomU found (/proc/xen/capabilities)");
+ return 0;
+ }
+
+ if (streq(cap, "control_d")) {
+ log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
+ return 1;
+ }
+ }
+}
+
+static int detect_vm_hypervisor(void) {
+ _cleanup_free_ char *hvtype = NULL;
+ int r;
+
+ r = read_one_line_file("/sys/hypervisor/type", &hvtype);
+ if (r == -ENOENT)
+ return VIRTUALIZATION_NONE;
+ if (r < 0)
+ return r;
+
+ log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
+
+ if (streq(hvtype, "xen"))
+ return VIRTUALIZATION_XEN;
+ else
+ return VIRTUALIZATION_VM_OTHER;
+}
+
+static int detect_vm_uml(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ /* Detect User-Mode Linux by reading /proc/cpuinfo */
+ f = fopen("/proc/cpuinfo", "re");
+ if (!f) {
+ if (errno == ENOENT) {
+ log_debug("/proc/cpuinfo not found, assuming no UML virtualization.");
+ return VIRTUALIZATION_NONE;
+ }
+ return -errno;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *t;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ t = startswith(line, "vendor_id\t: ");
+ if (t) {
+ if (startswith(t, "User Mode Linux")) {
+ log_debug("UML virtualization found in /proc/cpuinfo");
+ return VIRTUALIZATION_UML;
+ }
+
+ break;
+ }
+ }
+
+ log_debug("UML virtualization not found in /proc/cpuinfo.");
+ return VIRTUALIZATION_NONE;
+}
+
+static int detect_vm_zvm(void) {
+
+#if defined(__s390__)
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
+ if (r == -ENOENT)
+ return VIRTUALIZATION_NONE;
+ if (r < 0)
+ return r;
+
+ log_debug("Virtualization %s found in /proc/sysinfo", t);
+ if (streq(t, "z/VM"))
+ return VIRTUALIZATION_ZVM;
+ else
+ return VIRTUALIZATION_KVM;
+#else
+ log_debug("This platform does not support /proc/sysinfo");
+ return VIRTUALIZATION_NONE;
+#endif
+}
+
+/* Returns a short identifier for the various VM implementations */
+int detect_vm(void) {
+ static thread_local int cached_found = _VIRTUALIZATION_INVALID;
+ bool other = false;
+ int r, dmi;
+
+ if (cached_found >= 0)
+ return cached_found;
+
+ /* We have to use the correct order here:
+ *
+ * → First, try to detect Oracle Virtualbox, even if it uses KVM, as well as Xen even if it cloaks as Microsoft
+ * Hyper-V. Attempt to detect uml at this stage also since it runs as a user-process nested inside other VMs.
+ *
+ * → Second, try to detect from CPUID, this will report KVM for whatever software is used even if info in DMI is
+ * overwritten.
+ *
+ * → Third, try to detect from DMI. */
+
+ dmi = detect_vm_dmi();
+ if (IN_SET(dmi, VIRTUALIZATION_ORACLE, VIRTUALIZATION_XEN)) {
+ r = dmi;
+ goto finish;
+ }
+
+ /* Detect UML */
+ r = detect_vm_uml();
+ if (r < 0)
+ return r;
+ if (r == VIRTUALIZATION_VM_OTHER)
+ other = true;
+ else if (r != VIRTUALIZATION_NONE)
+ goto finish;
+
+ /* Detect from CPUID */
+ r = detect_vm_cpuid();
+ if (r < 0)
+ return r;
+ if (r == VIRTUALIZATION_VM_OTHER)
+ other = true;
+ else if (r != VIRTUALIZATION_NONE)
+ goto finish;
+
+ /* Now, let's get back to DMI */
+ if (dmi < 0)
+ return dmi;
+ if (dmi == VIRTUALIZATION_VM_OTHER)
+ other = true;
+ else if (dmi != VIRTUALIZATION_NONE) {
+ r = dmi;
+ goto finish;
+ }
+
+ /* x86 xen will most likely be detected by cpuid. If not (most likely
+ * because we're not an x86 guest), then we should try the /proc/xen
+ * directory next. If that's not found, then we check for the high-level
+ * hypervisor sysfs file.
+ */
+
+ r = detect_vm_xen();
+ if (r < 0)
+ return r;
+ if (r == VIRTUALIZATION_VM_OTHER)
+ other = true;
+ else if (r != VIRTUALIZATION_NONE)
+ goto finish;
+
+ r = detect_vm_hypervisor();
+ if (r < 0)
+ return r;
+ if (r == VIRTUALIZATION_VM_OTHER)
+ other = true;
+ else if (r != VIRTUALIZATION_NONE)
+ goto finish;
+
+ r = detect_vm_device_tree();
+ if (r < 0)
+ return r;
+ if (r == VIRTUALIZATION_VM_OTHER)
+ other = true;
+ else if (r != VIRTUALIZATION_NONE)
+ goto finish;
+
+ r = detect_vm_zvm();
+ if (r < 0)
+ return r;
+
+finish:
+ /* x86 xen Dom0 is detected as XEN in hypervisor and maybe others.
+ * In order to detect the Dom0 as not virtualization we need to
+ * double-check it */
+ if (r == VIRTUALIZATION_XEN) {
+ int dom0;
+
+ dom0 = detect_vm_xen_dom0();
+ if (dom0 < 0)
+ return dom0;
+ if (dom0 > 0)
+ r = VIRTUALIZATION_NONE;
+ } else if (r == VIRTUALIZATION_NONE && other)
+ r = VIRTUALIZATION_VM_OTHER;
+
+ cached_found = r;
+ log_debug("Found VM virtualization %s", virtualization_to_string(r));
+ return r;
+}
+
+static const char *const container_table[_VIRTUALIZATION_MAX] = {
+ [VIRTUALIZATION_LXC] = "lxc",
+ [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
+ [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
+ [VIRTUALIZATION_DOCKER] = "docker",
+ [VIRTUALIZATION_PODMAN] = "podman",
+ [VIRTUALIZATION_RKT] = "rkt",
+ [VIRTUALIZATION_WSL] = "wsl",
+ [VIRTUALIZATION_PROOT] = "proot",
+ [VIRTUALIZATION_POUCH] = "pouch",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(container, int);
+
+int detect_container(void) {
+ static thread_local int cached_found = _VIRTUALIZATION_INVALID;
+ _cleanup_free_ char *m = NULL;
+ _cleanup_free_ char *o = NULL;
+ _cleanup_free_ char *p = NULL;
+ const char *e = NULL;
+ int r;
+
+ if (cached_found >= 0)
+ return cached_found;
+
+ /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
+ if (access("/proc/vz", F_OK) >= 0 &&
+ access("/proc/bc", F_OK) < 0) {
+ r = VIRTUALIZATION_OPENVZ;
+ goto finish;
+ }
+
+ /* "Official" way of detecting WSL https://github.com/Microsoft/WSL/issues/423#issuecomment-221627364 */
+ r = read_one_line_file("/proc/sys/kernel/osrelease", &o);
+ if (r >= 0 &&
+ (strstr(o, "Microsoft") || strstr(o, "WSL"))) {
+ r = VIRTUALIZATION_WSL;
+ goto finish;
+ }
+
+ /* proot doesn't use PID namespacing, so we can just check if we have a matching tracer for this
+ * invocation without worrying about it being elsewhere.
+ */
+ r = get_proc_field("/proc/self/status", "TracerPid", WHITESPACE, &p);
+ if (r == 0 && !streq(p, "0")) {
+ pid_t ptrace_pid;
+ r = parse_pid(p, &ptrace_pid);
+ if (r == 0) {
+ const char *pf = procfs_file_alloca(ptrace_pid, "comm");
+ _cleanup_free_ char *ptrace_comm = NULL;
+ r = read_one_line_file(pf, &ptrace_comm);
+ if (r >= 0 && startswith(ptrace_comm, "proot")) {
+ r = VIRTUALIZATION_PROOT;
+ goto finish;
+ }
+ }
+ }
+
+ /* The container manager might have placed this in the /run/host hierarchy for us, which is best
+ * because we can be consumed just like that, without special privileges. */
+ r = read_one_line_file("/run/host/container-manager", &m);
+ if (r > 0) {
+ e = m;
+ goto translate_name;
+ }
+ if (!IN_SET(r, -ENOENT, 0))
+ return log_debug_errno(r, "Failed to read /run/systemd/container-manager: %m");
+
+ if (getpid_cached() == 1) {
+ /* If we are PID 1 we can just check our own environment variable, and that's authoritative.
+ * We distinguish three cases:
+ * - the variable is not defined → we jump to other checks
+ * - the variable is defined to an empty value → we are not in a container
+ * - anything else → some container, either one of the known ones or "container-other"
+ */
+ e = getenv("container");
+ if (!e)
+ goto check_sched;
+ if (isempty(e)) {
+ r = VIRTUALIZATION_NONE;
+ goto finish;
+ }
+
+ goto translate_name;
+ }
+
+ /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
+ * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
+ r = read_one_line_file("/run/systemd/container", &m);
+ if (r > 0) {
+ e = m;
+ goto translate_name;
+ }
+ if (!IN_SET(r, -ENOENT, 0))
+ return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
+
+ /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
+ r = getenv_for_pid(1, "container", &m);
+ if (r > 0) {
+ e = m;
+ goto translate_name;
+ }
+ if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
+ log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
+
+ /* Interestingly /proc/1/sched actually shows the host's PID for what we see as PID 1. If the PID
+ * shown there is not 1, we know we are in a PID namespace and hence a container. */
+ check_sched:
+ r = read_one_line_file("/proc/1/sched", &m);
+ if (r >= 0) {
+ const char *t;
+
+ t = strrchr(m, '(');
+ if (!t)
+ return -EIO;
+
+ if (!startswith(t, "(1,")) {
+ r = VIRTUALIZATION_CONTAINER_OTHER;
+ goto finish;
+ }
+ } else if (r != -ENOENT)
+ return r;
+
+ /* If that didn't work, give up, assume no container manager. */
+ r = VIRTUALIZATION_NONE;
+ goto finish;
+
+translate_name:
+ r = container_from_string(e);
+ if (r < 0)
+ r = VIRTUALIZATION_CONTAINER_OTHER;
+
+finish:
+ log_debug("Found container virtualization %s.", virtualization_to_string(r));
+ cached_found = r;
+ return r;
+}
+
+int detect_virtualization(void) {
+ int r;
+
+ r = detect_container();
+ if (r == 0)
+ r = detect_vm();
+
+ return r;
+}
+
+static int userns_has_mapping(const char *name) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *buf = NULL;
+ size_t n_allocated = 0;
+ ssize_t n;
+ uint32_t a, b, c;
+ int r;
+
+ f = fopen(name, "re");
+ if (!f) {
+ log_debug_errno(errno, "Failed to open %s: %m", name);
+ return errno == ENOENT ? false : -errno;
+ }
+
+ n = getline(&buf, &n_allocated, f);
+ if (n < 0) {
+ if (feof(f)) {
+ log_debug("%s is empty, we're in an uninitialized user namespace", name);
+ return true;
+ }
+
+ return log_debug_errno(errno, "Failed to read %s: %m", name);
+ }
+
+ r = sscanf(buf, "%"PRIu32" %"PRIu32" %"PRIu32, &a, &b, &c);
+ if (r < 3)
+ return log_debug_errno(errno, "Failed to parse %s: %m", name);
+
+ if (a == 0 && b == 0 && c == UINT32_MAX) {
+ /* The kernel calls mappings_overlap() and does not allow overlaps */
+ log_debug("%s has a full 1:1 mapping", name);
+ return false;
+ }
+
+ /* Anything else implies that we are in a user namespace */
+ log_debug("Mapping found in %s, we're in a user namespace", name);
+ return true;
+}
+
+int running_in_userns(void) {
+ _cleanup_free_ char *line = NULL;
+ int r;
+
+ r = userns_has_mapping("/proc/self/uid_map");
+ if (r != 0)
+ return r;
+
+ r = userns_has_mapping("/proc/self/gid_map");
+ if (r != 0)
+ return r;
+
+ /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also
+ * possible to compile a kernel without CONFIG_USER_NS, in which case "setgroups"
+ * also does not exist. We cannot distinguish those two cases, so assume that
+ * we're running on a stripped-down recent kernel, rather than on an old one,
+ * and if the file is not found, return false.
+ */
+ r = read_one_line_file("/proc/self/setgroups", &line);
+ if (r < 0) {
+ log_debug_errno(r, "/proc/self/setgroups: %m");
+ return r == -ENOENT ? false : r;
+ }
+
+ truncate_nl(line);
+ r = streq(line, "deny");
+ /* See user_namespaces(7) for a description of this "setgroups" contents. */
+ log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
+ return r;
+}
+
+int running_in_chroot(void) {
+ int r;
+
+ if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
+ return 0;
+
+ r = files_same("/proc/1/root", "/", 0);
+ if (r < 0)
+ return r;
+
+ return r == 0;
+}
+
+static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
+ [VIRTUALIZATION_NONE] = "none",
+ [VIRTUALIZATION_KVM] = "kvm",
+ [VIRTUALIZATION_QEMU] = "qemu",
+ [VIRTUALIZATION_BOCHS] = "bochs",
+ [VIRTUALIZATION_XEN] = "xen",
+ [VIRTUALIZATION_UML] = "uml",
+ [VIRTUALIZATION_VMWARE] = "vmware",
+ [VIRTUALIZATION_ORACLE] = "oracle",
+ [VIRTUALIZATION_MICROSOFT] = "microsoft",
+ [VIRTUALIZATION_ZVM] = "zvm",
+ [VIRTUALIZATION_PARALLELS] = "parallels",
+ [VIRTUALIZATION_BHYVE] = "bhyve",
+ [VIRTUALIZATION_QNX] = "qnx",
+ [VIRTUALIZATION_ACRN] = "acrn",
+ [VIRTUALIZATION_POWERVM] = "powervm",
+ [VIRTUALIZATION_VM_OTHER] = "vm-other",
+
+ [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
+ [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
+ [VIRTUALIZATION_LXC] = "lxc",
+ [VIRTUALIZATION_OPENVZ] = "openvz",
+ [VIRTUALIZATION_DOCKER] = "docker",
+ [VIRTUALIZATION_PODMAN] = "podman",
+ [VIRTUALIZATION_RKT] = "rkt",
+ [VIRTUALIZATION_WSL] = "wsl",
+ [VIRTUALIZATION_PROOT] = "proot",
+ [VIRTUALIZATION_POUCH] = "pouch",
+ [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(virtualization, int);
diff --git a/src/basic/virt.h b/src/basic/virt.h
new file mode 100644
index 0000000..42d63d5
--- /dev/null
+++ b/src/basic/virt.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+enum {
+ VIRTUALIZATION_NONE = 0,
+
+ VIRTUALIZATION_VM_FIRST,
+ VIRTUALIZATION_KVM = VIRTUALIZATION_VM_FIRST,
+ VIRTUALIZATION_QEMU,
+ VIRTUALIZATION_BOCHS,
+ VIRTUALIZATION_XEN,
+ VIRTUALIZATION_UML,
+ VIRTUALIZATION_VMWARE,
+ VIRTUALIZATION_ORACLE,
+ VIRTUALIZATION_MICROSOFT,
+ VIRTUALIZATION_ZVM,
+ VIRTUALIZATION_PARALLELS,
+ VIRTUALIZATION_BHYVE,
+ VIRTUALIZATION_QNX,
+ VIRTUALIZATION_ACRN,
+ VIRTUALIZATION_POWERVM,
+ VIRTUALIZATION_VM_OTHER,
+ VIRTUALIZATION_VM_LAST = VIRTUALIZATION_VM_OTHER,
+
+ VIRTUALIZATION_CONTAINER_FIRST,
+ VIRTUALIZATION_SYSTEMD_NSPAWN = VIRTUALIZATION_CONTAINER_FIRST,
+ VIRTUALIZATION_LXC_LIBVIRT,
+ VIRTUALIZATION_LXC,
+ VIRTUALIZATION_OPENVZ,
+ VIRTUALIZATION_DOCKER,
+ VIRTUALIZATION_PODMAN,
+ VIRTUALIZATION_RKT,
+ VIRTUALIZATION_WSL,
+ VIRTUALIZATION_PROOT,
+ VIRTUALIZATION_POUCH,
+ VIRTUALIZATION_CONTAINER_OTHER,
+ VIRTUALIZATION_CONTAINER_LAST = VIRTUALIZATION_CONTAINER_OTHER,
+
+ _VIRTUALIZATION_MAX,
+ _VIRTUALIZATION_INVALID = -1
+};
+
+static inline bool VIRTUALIZATION_IS_VM(int x) {
+ return x >= VIRTUALIZATION_VM_FIRST && x <= VIRTUALIZATION_VM_LAST;
+}
+
+static inline bool VIRTUALIZATION_IS_CONTAINER(int x) {
+ return x >= VIRTUALIZATION_CONTAINER_FIRST && x <= VIRTUALIZATION_CONTAINER_LAST;
+}
+
+int detect_vm(void);
+int detect_container(void);
+int detect_virtualization(void);
+
+int running_in_userns(void);
+int running_in_chroot(void);
+
+const char *virtualization_to_string(int v) _const_;
+int virtualization_from_string(const char *s) _pure_;
diff --git a/src/basic/xattr-util.c b/src/basic/xattr-util.c
new file mode 100644
index 0000000..b9a0dc5
--- /dev/null
+++ b/src/basic/xattr-util.c
@@ -0,0 +1,266 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/xattr.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "missing_syscall.h"
+#include "sparse-endian.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "xattr-util.h"
+
+int getxattr_malloc(
+ const char *path,
+ const char *name,
+ char **ret,
+ bool allow_symlink) {
+
+ size_t l = 100;
+
+ assert(path);
+ assert(name);
+ assert(ret);
+
+ for(;;) {
+ _cleanup_free_ char *v = NULL;
+ ssize_t n;
+
+ v = new0(char, l+1);
+ if (!v)
+ return -ENOMEM;
+
+ if (allow_symlink)
+ n = lgetxattr(path, name, v, l);
+ else
+ n = getxattr(path, name, v, l);
+ if (n < 0) {
+ if (errno != ERANGE)
+ return -errno;
+ } else {
+ v[n] = 0; /* NUL terminate */
+ *ret = TAKE_PTR(v);
+ return (int) n;
+ }
+
+ if (allow_symlink)
+ n = lgetxattr(path, name, NULL, 0);
+ else
+ n = getxattr(path, name, NULL, 0);
+ if (n < 0)
+ return -errno;
+ if (n > INT_MAX) /* We couldn't return this as 'int' anymore */
+ return -E2BIG;
+
+ l = (size_t) n;
+ }
+}
+
+int fgetxattr_malloc(
+ int fd,
+ const char *name,
+ char **ret) {
+
+ size_t l = 100;
+
+ assert(fd >= 0);
+ assert(name);
+ assert(ret);
+
+ for (;;) {
+ _cleanup_free_ char *v = NULL;
+ ssize_t n;
+
+ v = new(char, l+1);
+ if (!v)
+ return -ENOMEM;
+
+ n = fgetxattr(fd, name, v, l);
+ if (n < 0) {
+ if (errno != ERANGE)
+ return -errno;
+ } else {
+ v[n] = 0; /* NUL terminate */
+ *ret = TAKE_PTR(v);
+ return (int) n;
+ }
+
+ n = fgetxattr(fd, name, NULL, 0);
+ if (n < 0)
+ return -errno;
+ if (n > INT_MAX) /* We couldn't return this as 'int' anymore */
+ return -E2BIG;
+
+ l = (size_t) n;
+ }
+}
+
+int fgetxattrat_fake(
+ int dirfd,
+ const char *filename,
+ const char *attribute,
+ void *value, size_t size,
+ int flags,
+ size_t *ret_size) {
+
+ char fn[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
+ _cleanup_close_ int fd = -1;
+ ssize_t l;
+
+ /* The kernel doesn't have a fgetxattrat() command, hence let's emulate one */
+
+ if (flags & ~(AT_SYMLINK_NOFOLLOW|AT_EMPTY_PATH))
+ return -EINVAL;
+
+ if (isempty(filename)) {
+ if (!(flags & AT_EMPTY_PATH))
+ return -EINVAL;
+
+ xsprintf(fn, "/proc/self/fd/%i", dirfd);
+ } else {
+ fd = openat(dirfd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_NOFOLLOW ? O_NOFOLLOW : 0));
+ if (fd < 0)
+ return -errno;
+
+ xsprintf(fn, "/proc/self/fd/%i", fd);
+ }
+
+ l = getxattr(fn, attribute, value, size);
+ if (l < 0)
+ return -errno;
+
+ *ret_size = l;
+ return 0;
+}
+
+static int parse_crtime(le64_t le, usec_t *usec) {
+ uint64_t u;
+
+ assert(usec);
+
+ u = le64toh(le);
+ if (IN_SET(u, 0, (uint64_t) -1))
+ return -EIO;
+
+ *usec = (usec_t) u;
+ return 0;
+}
+
+int fd_getcrtime_at(int dirfd, const char *name, usec_t *ret, int flags) {
+ STRUCT_STATX_DEFINE(sx);
+ usec_t a, b;
+ le64_t le;
+ size_t n;
+ int r;
+
+ assert(ret);
+
+ if (flags & ~(AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW))
+ return -EINVAL;
+
+ /* So here's the deal: the creation/birth time (crtime/btime) of a file is a relatively newly supported concept
+ * on Linux (or more strictly speaking: a concept that only recently got supported in the API, it was
+ * implemented on various file systems on the lower level since a while, but never was accessible). However, we
+ * needed a concept like that for vaccuuming algorithms and such, hence we emulated it via a user xattr for a
+ * long time. Starting with Linux 4.11 there's statx() which exposes the timestamp to userspace for the first
+ * time, where it is available. Thius function will read it, but it tries to keep some compatibility with older
+ * systems: we try to read both the crtime/btime and the xattr, and then use whatever is older. After all the
+ * concept is useful for determining how "old" a file really is, and hence using the older of the two makes
+ * most sense. */
+
+ if (statx(dirfd, strempty(name), flags|AT_STATX_DONT_SYNC, STATX_BTIME, &sx) >= 0 &&
+ (sx.stx_mask & STATX_BTIME) &&
+ sx.stx_btime.tv_sec != 0)
+ a = (usec_t) sx.stx_btime.tv_sec * USEC_PER_SEC +
+ (usec_t) sx.stx_btime.tv_nsec / NSEC_PER_USEC;
+ else
+ a = USEC_INFINITY;
+
+ r = fgetxattrat_fake(dirfd, name, "user.crtime_usec", &le, sizeof(le), flags, &n);
+ if (r >= 0) {
+ if (n != sizeof(le))
+ r = -EIO;
+ else
+ r = parse_crtime(le, &b);
+ }
+ if (r < 0) {
+ if (a != USEC_INFINITY) {
+ *ret = a;
+ return 0;
+ }
+
+ return r;
+ }
+
+ if (a != USEC_INFINITY)
+ *ret = MIN(a, b);
+ else
+ *ret = b;
+
+ return 0;
+}
+
+int fd_getcrtime(int fd, usec_t *ret) {
+ return fd_getcrtime_at(fd, NULL, ret, AT_EMPTY_PATH);
+}
+
+int path_getcrtime(const char *p, usec_t *ret) {
+ return fd_getcrtime_at(AT_FDCWD, p, ret, 0);
+}
+
+int fd_setcrtime(int fd, usec_t usec) {
+ le64_t le;
+
+ assert(fd >= 0);
+
+ if (IN_SET(usec, 0, USEC_INFINITY))
+ usec = now(CLOCK_REALTIME);
+
+ le = htole64((uint64_t) usec);
+ if (fsetxattr(fd, "user.crtime_usec", &le, sizeof(le), 0) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int flistxattr_malloc(int fd, char **ret) {
+ size_t l = 100;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ for (;;) {
+ _cleanup_free_ char *v = NULL;
+ ssize_t n;
+
+ v = new(char, l+1);
+ if (!v)
+ return -ENOMEM;
+
+ n = flistxattr(fd, v, l);
+ if (n < 0) {
+ if (errno != ERANGE)
+ return -errno;
+ } else {
+ v[n] = 0; /* NUL terminate */
+ *ret = TAKE_PTR(v);
+ return (int) n;
+ }
+
+ n = flistxattr(fd, NULL, 0);
+ if (n < 0)
+ return -errno;
+ if (n > INT_MAX) /* We couldn't return this as 'int' anymore */
+ return -E2BIG;
+
+ l = (size_t) n;
+ }
+}
diff --git a/src/basic/xattr-util.h b/src/basic/xattr-util.h
new file mode 100644
index 0000000..560e34b
--- /dev/null
+++ b/src/basic/xattr-util.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <sys/types.h>
+
+#include "time-util.h"
+
+int getxattr_malloc(const char *path, const char *name, char **value, bool allow_symlink);
+int fgetxattr_malloc(int fd, const char *name, char **value);
+
+int fgetxattrat_fake(
+ int dirfd,
+ const char *filename,
+ const char *attribute,
+ void *value, size_t size,
+ int flags,
+ size_t *ret_size);
+
+int fd_setcrtime(int fd, usec_t usec);
+
+int fd_getcrtime(int fd, usec_t *usec);
+int path_getcrtime(const char *p, usec_t *usec);
+int fd_getcrtime_at(int dirfd, const char *name, usec_t *usec, int flags);
+
+int flistxattr_malloc(int fd, char **ret);
diff --git a/src/binfmt/binfmt.c b/src/binfmt/binfmt.c
new file mode 100644
index 0000000..43ed2f3
--- /dev/null
+++ b/src/binfmt/binfmt.c
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "alloc-util.h"
+#include "binfmt-util.h"
+#include "conf-files.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "main-func.h"
+#include "pager.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "strv.h"
+
+static bool arg_cat_config = false;
+static PagerFlags arg_pager_flags = 0;
+static bool arg_unregister = false;
+
+static int delete_rule(const char *rule) {
+ _cleanup_free_ char *x = NULL, *fn = NULL;
+ char *e;
+
+ assert(rule);
+ assert(rule[0]);
+
+ e = strchrnul(rule + 1, rule[0]);
+ x = strndup(rule + 1, e - rule - 1);
+ if (!x)
+ return log_oom();
+
+ if (!filename_is_valid(x) ||
+ STR_IN_SET(x, "register", "status"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Rule file name '%s' is not valid, refusing.", x);
+
+ fn = path_join("/proc/sys/fs/binfmt_misc", x);
+ if (!fn)
+ return log_oom();
+
+ return write_string_file(fn, "-1", WRITE_STRING_FILE_DISABLE_BUFFER);
+}
+
+static int apply_rule(const char *rule) {
+ int r;
+
+ (void) delete_rule(rule);
+
+ r = write_string_file("/proc/sys/fs/binfmt_misc/register", rule, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add binary format: %m");
+
+ return 0;
+}
+
+static int apply_file(const char *path, bool ignore_enoent) {
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(path);
+
+ r = search_and_fopen(path, "re", NULL, (const char**) CONF_PATHS_STRV("binfmt.d"), &f);
+ if (r < 0) {
+ if (ignore_enoent && r == -ENOENT)
+ return 0;
+
+ return log_error_errno(r, "Failed to open file '%s': %m", path);
+ }
+
+ log_debug("apply: %s", path);
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *p;
+ int k;
+
+ k = read_line(f, LONG_LINE_MAX, &line);
+ if (k < 0)
+ return log_error_errno(k, "Failed to read file '%s': %m", path);
+ if (k == 0)
+ break;
+
+ p = strstrip(line);
+ if (isempty(p))
+ continue;
+ if (strchr(COMMENTS, p[0]))
+ continue;
+
+ k = apply_rule(p);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-binfmt.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [CONFIGURATION FILE...]\n\n"
+ "Registers binary formats with the kernel.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --cat-config Show configuration files\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --unregister Unregister all existing entries\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_CAT_CONFIG,
+ ARG_NO_PAGER,
+ ARG_UNREGISTER,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "cat-config", no_argument, NULL, ARG_CAT_CONFIG },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "unregister", no_argument, NULL, ARG_UNREGISTER },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_CAT_CONFIG:
+ arg_cat_config = true;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_UNREGISTER:
+ arg_unregister = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if ((arg_unregister || arg_cat_config) && argc > optind)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Positional arguments are not allowed with --cat-config or --unregister");
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ int r, k;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+
+ log_setup_service();
+
+ umask(0022);
+
+ r = 0;
+
+ if (arg_unregister)
+ return disable_binfmt();
+
+ if (argc > optind) {
+ int i;
+
+ for (i = optind; i < argc; i++) {
+ k = apply_file(argv[i], false);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+ } else {
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+
+ r = conf_files_list_strv(&files, ".conf", NULL, 0, (const char**) CONF_PATHS_STRV("binfmt.d"));
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate binfmt.d files: %m");
+
+ if (arg_cat_config) {
+ (void) pager_open(arg_pager_flags);
+
+ return cat_files(NULL, files, 0);
+ }
+
+ /* Flush out all rules */
+ (void) write_string_file("/proc/sys/fs/binfmt_misc/status", "-1", WRITE_STRING_FILE_DISABLE_BUFFER);
+
+ STRV_FOREACH(f, files) {
+ k = apply_file(*f, true);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+ }
+
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/boot/bless-boot-generator.c b/src/boot/bless-boot-generator.c
new file mode 100644
index 0000000..001c85a
--- /dev/null
+++ b/src/boot/bless-boot-generator.c
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "efi-loader.h"
+#include "generator.h"
+#include "log.h"
+#include "mkdir.h"
+#include "special.h"
+#include "string-util.h"
+#include "util.h"
+#include "virt.h"
+
+/* This generator pulls systemd-bless-boot.service into the initial transaction if the "LoaderBootCountPath" EFI
+ * variable is set, i.e. the system boots up with boot counting in effect, which means we should mark the boot as
+ * "good" if we manage to boot up far enough. */
+
+static const char *arg_dest = "/tmp";
+
+int main(int argc, char *argv[]) {
+ const char *p;
+
+ log_setup_generator();
+
+ if (argc > 1 && argc != 4) {
+ log_error("This program takes three or no arguments.");
+ return EXIT_FAILURE;
+ }
+
+ if (argc > 1)
+ arg_dest = argv[2];
+
+ if (in_initrd() > 0) {
+ log_debug("Skipping generator, running in the initrd.");
+ return EXIT_SUCCESS;
+ }
+
+ if (detect_container() > 0) {
+ log_debug("Skipping generator, running in a container.");
+ return EXIT_SUCCESS;
+ }
+
+ if (!is_efi_boot()) {
+ log_debug("Skipping generator, not an EFI boot.");
+ return EXIT_SUCCESS;
+ }
+
+ if (access("/sys/firmware/efi/efivars/LoaderBootCountPath-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f", F_OK) < 0) {
+
+ if (errno == ENOENT) {
+ log_debug_errno(errno, "Skipping generator, not booted with boot counting in effect.");
+ return EXIT_SUCCESS;
+ }
+
+ log_error_errno(errno, "Failed to check if LoaderBootCountPath EFI variable exists: %m");
+ return EXIT_FAILURE;
+ }
+
+ /* We pull this in from basic.target so that it ends up in all "regular" boot ups, but not in rescue.target or
+ * even emergency.target. */
+ p = strjoina(arg_dest, "/" SPECIAL_BASIC_TARGET ".wants/systemd-bless-boot.service");
+ (void) mkdir_parents(p, 0755);
+ if (symlink(SYSTEM_DATA_UNIT_PATH "/systemd-bless-boot.service", p) < 0) {
+ log_error_errno(errno, "Failed to create symlink '%s': %m", p);
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/boot/bless-boot.c b/src/boot/bless-boot.c
new file mode 100644
index 0000000..cd34f88
--- /dev/null
+++ b/src/boot/bless-boot.c
@@ -0,0 +1,526 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "bootspec.h"
+#include "efi-loader.h"
+#include "efivars.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "terminal-util.h"
+#include "util.h"
+#include "verbs.h"
+#include "virt.h"
+
+static char **arg_path = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_path, strv_freep);
+
+static int help(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-bless-boot.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND\n"
+ "\n%sMark the boot process as good or bad.%s\n"
+ "\nCommands:\n"
+ " status Show status of current boot loader entry\n"
+ " good Mark this boot as good\n"
+ " bad Mark this boot as bad\n"
+ " indeterminate Undo any marking as good or bad\n"
+ "\nOptions:\n"
+ " -h --help Show this help\n"
+ " --version Print version\n"
+ " --path=PATH Path to the $BOOT partition (may be used multiple times)\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight()
+ , ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_PATH = 0x100,
+ ARG_VERSION,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "path", required_argument, NULL, ARG_PATH },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+ switch (c) {
+
+ case 'h':
+ help(0, NULL, NULL);
+ return 0;
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_PATH:
+ r = strv_extend(&arg_path, optarg);
+ if (r < 0)
+ return log_oom();
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unknown option");
+ }
+
+ return 1;
+}
+
+static int acquire_path(void) {
+ _cleanup_free_ char *esp_path = NULL, *xbootldr_path = NULL;
+ char **a;
+ int r;
+
+ if (!strv_isempty(arg_path))
+ return 0;
+
+ r = find_esp_and_warn(NULL, false, &esp_path, NULL, NULL, NULL, NULL);
+ if (r < 0 && r != -ENOKEY) /* ENOKEY means not found, and is the only error the function won't log about on its own */
+ return r;
+
+ r = find_xbootldr_and_warn(NULL, false, &xbootldr_path, NULL);
+ if (r < 0 && r != -ENOKEY)
+ return r;
+
+ if (!esp_path && !xbootldr_path)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT),
+ "Couldn't find $BOOT partition. It is recommended to mount it to /boot.\n"
+ "Alternatively, use --path= to specify path to mount point.");
+
+ if (esp_path)
+ a = strv_new(esp_path, xbootldr_path);
+ else
+ a = strv_new(xbootldr_path);
+ if (!a)
+ return log_oom();
+
+ strv_free_and_replace(arg_path, a);
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *j;
+
+ j = strv_join(arg_path, ":");
+ log_debug("Using %s as boot loader drop-in search path.", j);
+ }
+
+ return 0;
+}
+
+static int parse_counter(
+ const char *path,
+ const char **p,
+ uint64_t *ret_left,
+ uint64_t *ret_done) {
+
+ uint64_t left, done;
+ const char *z, *e;
+ size_t k;
+ int r;
+
+ assert(path);
+ assert(p);
+
+ e = *p;
+ assert(e);
+ assert(*e == '+');
+
+ e++;
+
+ k = strspn(e, DIGITS);
+ if (k == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Can't parse empty 'tries left' counter from LoaderBootCountPath: %s",
+ path);
+
+ z = strndupa(e, k);
+ r = safe_atou64(z, &left);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse 'tries left' counter from LoaderBootCountPath: %s", path);
+
+ e += k;
+
+ if (*e == '-') {
+ e++;
+
+ k = strspn(e, DIGITS);
+ if (k == 0) /* If there's a "-" there also needs to be at least one digit */
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Can't parse empty 'tries done' counter from LoaderBootCountPath: %s",
+ path);
+
+ z = strndupa(e, k);
+ r = safe_atou64(z, &done);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse 'tries done' counter from LoaderBootCountPath: %s", path);
+
+ e += k;
+ } else
+ done = 0;
+
+ if (done == 0)
+ log_warning("The 'tries done' counter is currently at zero. This can't really be, after all we are running, and this boot must hence count as one. Proceeding anyway.");
+
+ *p = e;
+
+ if (ret_left)
+ *ret_left = left;
+
+ if (ret_done)
+ *ret_done = done;
+
+ return 0;
+}
+
+static int acquire_boot_count_path(
+ char **ret_path,
+ char **ret_prefix,
+ uint64_t *ret_left,
+ uint64_t *ret_done,
+ char **ret_suffix) {
+
+ _cleanup_free_ char *path = NULL, *prefix = NULL, *suffix = NULL;
+ const char *last, *e;
+ uint64_t left, done;
+ int r;
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderBootCountPath", &path);
+ if (r == -ENOENT)
+ return -EUNATCH; /* in this case, let the caller print a message */
+ if (r < 0)
+ return log_error_errno(r, "Failed to read LoaderBootCountPath EFI variable: %m");
+
+ efi_tilt_backslashes(path);
+
+ if (!path_is_normalized(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path read from LoaderBootCountPath is not normalized, refusing: %s",
+ path);
+
+ if (!path_is_absolute(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path read from LoaderBootCountPath is not absolute, refusing: %s",
+ path);
+
+ last = last_path_component(path);
+ e = strrchr(last, '+');
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path read from LoaderBootCountPath does not contain a counter, refusing: %s",
+ path);
+
+ if (ret_prefix) {
+ prefix = strndup(path, e - path);
+ if (!prefix)
+ return log_oom();
+ }
+
+ r = parse_counter(path, &e, &left, &done);
+ if (r < 0)
+ return r;
+
+ if (ret_suffix) {
+ suffix = strdup(e);
+ if (!suffix)
+ return log_oom();
+
+ *ret_suffix = TAKE_PTR(suffix);
+ }
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(path);
+ if (ret_prefix)
+ *ret_prefix = TAKE_PTR(prefix);
+ if (ret_left)
+ *ret_left = left;
+ if (ret_done)
+ *ret_done = done;
+
+ return 0;
+}
+
+static int make_good(const char *prefix, const char *suffix, char **ret) {
+ _cleanup_free_ char *good = NULL;
+
+ assert(prefix);
+ assert(suffix);
+ assert(ret);
+
+ /* Generate the path we'd use on good boots. This one is easy. If we are successful, we simple drop the counter
+ * pair entirely from the name. After all, we know all is good, and the logs will contain information about the
+ * tries we needed to come here, hence it's safe to drop the counters from the name. */
+
+ good = strjoin(prefix, suffix);
+ if (!good)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(good);
+ return 0;
+}
+
+static int make_bad(const char *prefix, uint64_t done, const char *suffix, char **ret) {
+ _cleanup_free_ char *bad = NULL;
+
+ assert(prefix);
+ assert(suffix);
+ assert(ret);
+
+ /* Generate the path we'd use on bad boots. Let's simply set the 'left' counter to zero, and keep the 'done'
+ * counter. The information might be interesting to boot loaders, after all. */
+
+ if (done == 0) {
+ bad = strjoin(prefix, "+0", suffix);
+ if (!bad)
+ return -ENOMEM;
+ } else {
+ if (asprintf(&bad, "%s+0-%" PRIu64 "%s", prefix, done, suffix) < 0)
+ return -ENOMEM;
+ }
+
+ *ret = TAKE_PTR(bad);
+ return 0;
+}
+
+static const char *skip_slash(const char *path) {
+ assert(path);
+ assert(path[0] == '/');
+
+ return path + 1;
+}
+
+static int verb_status(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *path = NULL, *prefix = NULL, *suffix = NULL, *good = NULL, *bad = NULL;
+ uint64_t left, done;
+ char **p;
+ int r;
+
+ r = acquire_boot_count_path(&path, &prefix, &left, &done, &suffix);
+ if (r == -EUNATCH) { /* No boot count in place, then let's consider this a "clean" boot, as "good", "bad" or "indeterminate" don't apply. */
+ puts("clean");
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ r = acquire_path();
+ if (r < 0)
+ return r;
+
+ r = make_good(prefix, suffix, &good);
+ if (r < 0)
+ return log_oom();
+
+ r = make_bad(prefix, done, suffix, &bad);
+ if (r < 0)
+ return log_oom();
+
+ log_debug("Booted file: %s\n"
+ "The same modified for 'good': %s\n"
+ "The same modified for 'bad': %s\n",
+ path,
+ good,
+ bad);
+
+ log_debug("Tries left: %" PRIu64"\n"
+ "Tries done: %" PRIu64"\n",
+ left, done);
+
+ STRV_FOREACH(p, arg_path) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(*p, O_DIRECTORY|O_CLOEXEC|O_RDONLY);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ return log_error_errno(errno, "Failed to open $BOOT partition '%s': %m", *p);
+ }
+
+ if (faccessat(fd, skip_slash(path), F_OK, 0) >= 0) {
+ puts("indeterminate");
+ return 0;
+ }
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to check if '%s' exists: %m", path);
+
+ if (faccessat(fd, skip_slash(good), F_OK, 0) >= 0) {
+ puts("good");
+ return 0;
+ }
+
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to check if '%s' exists: %m", good);
+
+ if (faccessat(fd, skip_slash(bad), F_OK, 0) >= 0) {
+ puts("bad");
+ return 0;
+ }
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to check if '%s' exists: %m", bad);
+
+ /* We didn't find any of the three? If so, let's try the next directory, before we give up. */
+ }
+
+ return log_error_errno(SYNTHETIC_ERRNO(EBUSY), "Couldn't determine boot state: %m");
+}
+
+static int verb_set(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *path = NULL, *prefix = NULL, *suffix = NULL, *good = NULL, *bad = NULL, *parent = NULL;
+ const char *target, *source1, *source2;
+ uint64_t done;
+ char **p;
+ int r;
+
+ r = acquire_boot_count_path(&path, &prefix, NULL, &done, &suffix);
+ if (r == -EUNATCH) /* acquire_boot_count_path() won't log on its own for this specific error */
+ return log_error_errno(r, "Not booted with boot counting in effect.");
+ if (r < 0)
+ return r;
+
+ r = acquire_path();
+ if (r < 0)
+ return r;
+
+ r = make_good(prefix, suffix, &good);
+ if (r < 0)
+ return log_oom();
+
+ r = make_bad(prefix, done, suffix, &bad);
+ if (r < 0)
+ return log_oom();
+
+ /* Figure out what rename to what */
+ if (streq(argv[0], "good")) {
+ target = good;
+ source1 = path;
+ source2 = bad; /* Maybe this boot was previously marked as 'bad'? */
+ } else if (streq(argv[0], "bad")) {
+ target = bad;
+ source1 = path;
+ source2 = good; /* Maybe this boot was previously marked as 'good'? */
+ } else {
+ assert(streq(argv[0], "indeterminate"));
+ target = path;
+ source1 = good;
+ source2 = bad;
+ }
+
+ STRV_FOREACH(p, arg_path) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(*p, O_DIRECTORY|O_CLOEXEC|O_RDONLY);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open $BOOT partition '%s': %m", *p);
+
+ r = rename_noreplace(fd, skip_slash(source1), fd, skip_slash(target));
+ if (r == -EEXIST)
+ goto exists;
+ else if (r == -ENOENT) {
+
+ r = rename_noreplace(fd, skip_slash(source2), fd, skip_slash(target));
+ if (r == -EEXIST)
+ goto exists;
+ else if (r == -ENOENT) {
+
+ if (faccessat(fd, skip_slash(target), F_OK, 0) >= 0) /* Hmm, if we can't find either source file, maybe the destination already exists? */
+ goto exists;
+
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to determine if %s already exists: %m", target);
+
+ /* We found none of the snippets here, try the next directory */
+ continue;
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to rename '%s' to '%s': %m", source2, target);
+ else
+ log_debug("Successfully renamed '%s' to '%s'.", source2, target);
+
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to rename '%s' to '%s': %m", source1, target);
+ else
+ log_debug("Successfully renamed '%s' to '%s'.", source1, target);
+
+ /* First, fsync() the directory these files are located in */
+ parent = dirname_malloc(target);
+ if (!parent)
+ return log_oom();
+
+ r = fsync_path_at(fd, skip_slash(parent));
+ if (r < 0)
+ log_debug_errno(errno, "Failed to synchronize image directory, ignoring: %m");
+
+ /* Secondly, syncfs() the whole file system these files are located in */
+ if (syncfs(fd) < 0)
+ log_debug_errno(errno, "Failed to synchronize $BOOT partition, ignoring: %m");
+
+ log_info("Marked boot as '%s'. (Boot attempt counter is at %" PRIu64".)", argv[0], done);
+ }
+
+ log_error_errno(SYNTHETIC_ERRNO(EBUSY), "Can't find boot counter source file for '%s': %m", target);
+ return 1;
+
+exists:
+ log_debug("Operation already executed before, not doing anything.");
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "status", VERB_ANY, 1, VERB_DEFAULT, verb_status },
+ { "good", VERB_ANY, 1, 0, verb_set },
+ { "bad", VERB_ANY, 1, 0, verb_set },
+ { "indeterminate", VERB_ANY, 1, 0, verb_set },
+ {}
+ };
+
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (detect_container() > 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Marking a boot is not supported in containers.");
+
+ if (!is_efi_boot())
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Marking a boot is only supported on EFI systems.");
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/boot/boot-check-no-failures.c b/src/boot/boot-check-no-failures.c
new file mode 100644
index 0000000..92f3cd4
--- /dev/null
+++ b/src/boot/boot-check-no-failures.c
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "log.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-boot-check-no-failures.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n"
+ "\n%sVerify system operational state.%s\n\n"
+ " -h --help Show this help\n"
+ " --version Print version\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight()
+ , ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_PATH = 0x100,
+ ARG_VERSION,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+ switch (c) {
+
+ case 'h':
+ help();
+ return 0;
+
+ case ARG_VERSION:
+ return version();
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unknown option");
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ uint32_t n;
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to system bus: %m");
+
+ r = sd_bus_get_property_trivial(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "NFailedUnits",
+ &error,
+ 'u',
+ &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get failed units counter: %s", bus_error_message(&error, r));
+
+ if (n > 0)
+ log_notice("Health check: %" PRIu32 " units have failed.", n);
+ else
+ log_info("Health check: no failed units.");
+
+ return n > 0;
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/boot/bootctl.c b/src/boot/bootctl.c
new file mode 100644
index 0000000..511b010
--- /dev/null
+++ b/src/boot/bootctl.c
@@ -0,0 +1,1851 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ctype.h>
+#include <errno.h>
+#include <ftw.h>
+#include <getopt.h>
+#include <limits.h>
+#include <linux/magic.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "blkid-util.h"
+#include "bootspec.h"
+#include "copy.h"
+#include "dirent-util.h"
+#include "efi-loader.h"
+#include "efivars.h"
+#include "env-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "locale-util.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "random-util.h"
+#include "rm-rf.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+#include "utf8.h"
+#include "util.h"
+#include "verbs.h"
+#include "virt.h"
+
+static char *arg_esp_path = NULL;
+static char *arg_xbootldr_path = NULL;
+static bool arg_print_esp_path = false;
+static bool arg_print_dollar_boot_path = false;
+static bool arg_touch_variables = true;
+static PagerFlags arg_pager_flags = 0;
+static bool arg_graceful = false;
+
+STATIC_DESTRUCTOR_REGISTER(arg_esp_path, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_xbootldr_path, freep);
+
+static const char *arg_dollar_boot_path(void) {
+ /* $BOOT shall be the XBOOTLDR partition if it exists, and otherwise the ESP */
+ return arg_xbootldr_path ?: arg_esp_path;
+}
+
+static int acquire_esp(
+ bool unprivileged_mode,
+ uint32_t *ret_part,
+ uint64_t *ret_pstart,
+ uint64_t *ret_psize,
+ sd_id128_t *ret_uuid) {
+
+ char *np;
+ int r;
+
+ /* Find the ESP, and log about errors. Note that find_esp_and_warn() will log in all error cases on
+ * its own, except for ENOKEY (which is good, we want to show our own message in that case,
+ * suggesting use of --esp-path=) and EACCESS (only when we request unprivileged mode; in this case
+ * we simply eat up the error here, so that --list and --status work too, without noise about
+ * this). */
+
+ r = find_esp_and_warn(arg_esp_path, unprivileged_mode, &np, ret_part, ret_pstart, ret_psize, ret_uuid);
+ if (r == -ENOKEY)
+ return log_error_errno(r,
+ "Couldn't find EFI system partition. It is recommended to mount it to /boot or /efi.\n"
+ "Alternatively, use --esp-path= to specify path to mount point.");
+ if (r < 0)
+ return r;
+
+ free_and_replace(arg_esp_path, np);
+ log_debug("Using EFI System Partition at %s.", arg_esp_path);
+
+ return 1;
+}
+
+static int acquire_xbootldr(bool unprivileged_mode, sd_id128_t *ret_uuid) {
+ char *np;
+ int r;
+
+ r = find_xbootldr_and_warn(arg_xbootldr_path, unprivileged_mode, &np, ret_uuid);
+ if (r == -ENOKEY) {
+ log_debug_errno(r, "Didn't find an XBOOTLDR partition, using the ESP as $BOOT.");
+ if (ret_uuid)
+ *ret_uuid = SD_ID128_NULL;
+ arg_xbootldr_path = mfree(arg_xbootldr_path);
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ free_and_replace(arg_xbootldr_path, np);
+ log_debug("Using XBOOTLDR partition at %s as $BOOT.", arg_xbootldr_path);
+
+ return 1;
+}
+
+/* search for "#### LoaderInfo: systemd-boot 218 ####" string inside the binary */
+static int get_file_version(int fd, char **v) {
+ struct stat st;
+ char *buf;
+ const char *s, *e;
+ char *x = NULL;
+ int r;
+
+ assert(fd >= 0);
+ assert(v);
+
+ if (fstat(fd, &st) < 0)
+ return log_error_errno(errno, "Failed to stat EFI binary: %m");
+
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ return log_error_errno(r, "EFI binary is not a regular file: %m");
+
+ if (st.st_size < 27) {
+ *v = NULL;
+ return 0;
+ }
+
+ buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (buf == MAP_FAILED)
+ return log_error_errno(errno, "Failed to memory map EFI binary: %m");
+
+ s = memmem(buf, st.st_size - 8, "#### LoaderInfo: ", 17);
+ if (!s)
+ goto finish;
+ s += 17;
+
+ e = memmem(s, st.st_size - (s - buf), " ####", 5);
+ if (!e || e - s < 3) {
+ r = log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Malformed version string.");
+ goto finish;
+ }
+
+ x = strndup(s, e - s);
+ if (!x) {
+ r = log_oom();
+ goto finish;
+ }
+ r = 1;
+
+finish:
+ (void) munmap(buf, st.st_size);
+ *v = x;
+ return r;
+}
+
+static int enumerate_binaries(const char *esp_path, const char *path, const char *prefix) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ const char *p;
+ int c = 0, r;
+
+ assert(esp_path);
+ assert(path);
+
+ p = prefix_roota(esp_path, path);
+ d = opendir(p);
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to read \"%s\": %m", p);
+ }
+
+ FOREACH_DIRENT(de, d, break) {
+ _cleanup_free_ char *v = NULL;
+ _cleanup_close_ int fd = -1;
+
+ if (!endswith_no_case(de->d_name, ".efi"))
+ continue;
+
+ if (prefix && !startswith_no_case(de->d_name, prefix))
+ continue;
+
+ fd = openat(dirfd(d), de->d_name, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open \"%s/%s\" for reading: %m", p, de->d_name);
+
+ r = get_file_version(fd, &v);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ printf(" File: %s/%s/%s (%s%s%s)\n", special_glyph(SPECIAL_GLYPH_TREE_RIGHT), path, de->d_name, ansi_highlight(), v, ansi_normal());
+ else
+ printf(" File: %s/%s/%s\n", special_glyph(SPECIAL_GLYPH_TREE_RIGHT), path, de->d_name);
+
+ c++;
+ }
+
+ return c;
+}
+
+static int status_binaries(const char *esp_path, sd_id128_t partition) {
+ int r;
+
+ printf("Available Boot Loaders on ESP:\n");
+
+ if (!esp_path) {
+ printf(" ESP: Cannot find or access mount point of ESP.\n\n");
+ return -ENOENT;
+ }
+
+ printf(" ESP: %s", esp_path);
+ if (!sd_id128_is_null(partition))
+ printf(" (/dev/disk/by-partuuid/" SD_ID128_UUID_FORMAT_STR ")", SD_ID128_FORMAT_VAL(partition));
+ printf("\n");
+
+ r = enumerate_binaries(esp_path, "EFI/systemd", NULL);
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ log_info("systemd-boot not installed in ESP.");
+
+ r = enumerate_binaries(esp_path, "EFI/BOOT", "boot");
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ log_info("No default/fallback boot loader installed in ESP.");
+
+ r = 0;
+
+finish:
+ printf("\n");
+ return r;
+}
+
+static int print_efi_option(uint16_t id, bool in_order) {
+ _cleanup_free_ char *title = NULL;
+ _cleanup_free_ char *path = NULL;
+ sd_id128_t partition;
+ bool active;
+ int r;
+
+ r = efi_get_boot_option(id, &title, &partition, &path, &active);
+ if (r < 0)
+ return r;
+
+ /* print only configured entries with partition information */
+ if (!path || sd_id128_is_null(partition))
+ return 0;
+
+ efi_tilt_backslashes(path);
+
+ printf(" Title: %s%s%s\n", ansi_highlight(), strna(title), ansi_normal());
+ printf(" ID: 0x%04X\n", id);
+ printf(" Status: %sactive%s\n", active ? "" : "in", in_order ? ", boot-order" : "");
+ printf(" Partition: /dev/disk/by-partuuid/" SD_ID128_UUID_FORMAT_STR "\n",
+ SD_ID128_FORMAT_VAL(partition));
+ printf(" File: %s%s\n", special_glyph(SPECIAL_GLYPH_TREE_RIGHT), path);
+ printf("\n");
+
+ return 0;
+}
+
+static int status_variables(void) {
+ _cleanup_free_ uint16_t *options = NULL, *order = NULL;
+ int n_options, n_order, i;
+
+ n_options = efi_get_boot_options(&options);
+ if (n_options == -ENOENT)
+ return log_error_errno(n_options,
+ "Failed to access EFI variables, efivarfs"
+ " needs to be available at /sys/firmware/efi/efivars/.");
+ if (n_options < 0)
+ return log_error_errno(n_options, "Failed to read EFI boot entries: %m");
+
+ n_order = efi_get_boot_order(&order);
+ if (n_order == -ENOENT)
+ n_order = 0;
+ else if (n_order < 0)
+ return log_error_errno(n_order, "Failed to read EFI boot order: %m");
+
+ /* print entries in BootOrder first */
+ printf("Boot Loaders Listed in EFI Variables:\n");
+ for (i = 0; i < n_order; i++)
+ print_efi_option(order[i], true);
+
+ /* print remaining entries */
+ for (i = 0; i < n_options; i++) {
+ int j;
+
+ for (j = 0; j < n_order; j++)
+ if (options[i] == order[j])
+ goto next_option;
+
+ print_efi_option(options[i], false);
+
+ next_option:
+ continue;
+ }
+
+ return 0;
+}
+
+static int boot_entry_file_check(const char *root, const char *p) {
+ _cleanup_free_ char *path;
+
+ path = path_join(root, p);
+ if (!path)
+ return log_oom();
+
+ if (access(path, F_OK) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static void boot_entry_file_list(const char *field, const char *root, const char *p, int *ret_status) {
+ int status = boot_entry_file_check(root, p);
+
+ printf("%13s%s ", strempty(field), field ? ":" : " ");
+ if (status < 0) {
+ errno = -status;
+ printf("%s%s%s (%m)\n", ansi_highlight_red(), p, ansi_normal());
+ } else
+ printf("%s\n", p);
+
+ if (*ret_status == 0 && status < 0)
+ *ret_status = status;
+}
+
+static int boot_entry_show(const BootEntry *e, bool show_as_default) {
+ int status = 0;
+
+ /* Returns 0 on success, negative on processing error, and positive if something is wrong with the
+ boot entry itself. */
+
+ assert(e);
+
+ printf(" title: %s%s%s" "%s%s%s\n",
+ ansi_highlight(), boot_entry_title(e), ansi_normal(),
+ ansi_highlight_green(), show_as_default ? " (default)" : "", ansi_normal());
+
+ if (e->id)
+ printf(" id: %s\n", e->id);
+ if (e->path) {
+ _cleanup_free_ char *link = NULL;
+
+ /* Let's urlify the link to make it easy to view in an editor, but only if it is a text
+ * file. Unified images are binary ELFs, and EFI variables are not pure text either. */
+ if (e->type == BOOT_ENTRY_CONF)
+ (void) terminal_urlify_path(e->path, NULL, &link);
+
+ printf(" source: %s\n", link ?: e->path);
+ }
+ if (e->version)
+ printf(" version: %s\n", e->version);
+ if (e->machine_id)
+ printf(" machine-id: %s\n", e->machine_id);
+ if (e->architecture)
+ printf(" architecture: %s\n", e->architecture);
+ if (e->kernel)
+ boot_entry_file_list("linux", e->root, e->kernel, &status);
+
+ char **s;
+ STRV_FOREACH(s, e->initrd)
+ boot_entry_file_list(s == e->initrd ? "initrd" : NULL,
+ e->root,
+ *s,
+ &status);
+ if (!strv_isempty(e->options)) {
+ _cleanup_free_ char *t = NULL, *t2 = NULL;
+ _cleanup_strv_free_ char **ts = NULL;
+
+ t = strv_join(e->options, " ");
+ if (!t)
+ return log_oom();
+
+ ts = strv_split_newlines(t);
+ if (!ts)
+ return log_oom();
+
+ t2 = strv_join(ts, "\n ");
+ if (!t2)
+ return log_oom();
+
+ printf(" options: %s\n", t2);
+ }
+ if (e->device_tree)
+ boot_entry_file_list("devicetree", e->root, e->device_tree, &status);
+
+ return -status;
+}
+
+static int status_entries(
+ const char *esp_path,
+ sd_id128_t esp_partition_uuid,
+ const char *xbootldr_path,
+ sd_id128_t xbootldr_partition_uuid) {
+
+ _cleanup_(boot_config_free) BootConfig config = {};
+ sd_id128_t dollar_boot_partition_uuid;
+ const char *dollar_boot_path;
+ int r;
+
+ assert(esp_path || xbootldr_path);
+
+ if (xbootldr_path) {
+ dollar_boot_path = xbootldr_path;
+ dollar_boot_partition_uuid = xbootldr_partition_uuid;
+ } else {
+ dollar_boot_path = esp_path;
+ dollar_boot_partition_uuid = esp_partition_uuid;
+ }
+
+ printf("Boot Loader Entries:\n"
+ " $BOOT: %s", dollar_boot_path);
+ if (!sd_id128_is_null(dollar_boot_partition_uuid))
+ printf(" (/dev/disk/by-partuuid/" SD_ID128_UUID_FORMAT_STR ")",
+ SD_ID128_FORMAT_VAL(dollar_boot_partition_uuid));
+ printf("\n\n");
+
+ r = boot_entries_load_config(esp_path, xbootldr_path, &config);
+ if (r < 0)
+ return r;
+
+ if (config.default_entry < 0)
+ printf("%zu entries, no entry could be determined as default.\n", config.n_entries);
+ else {
+ printf("Default Boot Loader Entry:\n");
+
+ r = boot_entry_show(config.entries + config.default_entry, false);
+ if (r > 0)
+ /* < 0 is already logged by the function itself, let's just emit an extra warning if
+ the default entry is broken */
+ printf("\nWARNING: default boot entry is broken\n");
+ }
+
+ return 0;
+}
+
+static int compare_product(const char *a, const char *b) {
+ size_t x, y;
+
+ assert(a);
+ assert(b);
+
+ x = strcspn(a, " ");
+ y = strcspn(b, " ");
+ if (x != y)
+ return x < y ? -1 : x > y ? 1 : 0;
+
+ return strncmp(a, b, x);
+}
+
+static int compare_version(const char *a, const char *b) {
+ assert(a);
+ assert(b);
+
+ a += strcspn(a, " ");
+ a += strspn(a, " ");
+ b += strcspn(b, " ");
+ b += strspn(b, " ");
+
+ return strverscmp(a, b);
+}
+
+static int version_check(int fd_from, const char *from, int fd_to, const char *to) {
+ _cleanup_free_ char *a = NULL, *b = NULL;
+ int r;
+
+ assert(fd_from >= 0);
+ assert(from);
+ assert(fd_to >= 0);
+ assert(to);
+
+ r = get_file_version(fd_from, &a);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Source file \"%s\" does not carry version information!",
+ from);
+
+ r = get_file_version(fd_to, &b);
+ if (r < 0)
+ return r;
+ if (r == 0 || compare_product(a, b) != 0)
+ return log_notice_errno(SYNTHETIC_ERRNO(EEXIST),
+ "Skipping \"%s\", since it's owned by another boot loader.",
+ to);
+
+ if (compare_version(a, b) < 0)
+ return log_warning_errno(SYNTHETIC_ERRNO(ESTALE), "Skipping \"%s\", since a newer boot loader version exists already.", to);
+
+ return 0;
+}
+
+static int copy_file_with_version_check(const char *from, const char *to, bool force) {
+ _cleanup_close_ int fd_from = -1, fd_to = -1;
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ fd_from = open(from, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd_from < 0)
+ return log_error_errno(errno, "Failed to open \"%s\" for reading: %m", from);
+
+ if (!force) {
+ fd_to = open(to, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd_to < 0) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to open \"%s\" for reading: %m", to);
+ } else {
+ r = version_check(fd_from, from, fd_to, to);
+ if (r < 0)
+ return r;
+
+ if (lseek(fd_from, 0, SEEK_SET) == (off_t) -1)
+ return log_error_errno(errno, "Failed to seek in \"%s\": %m", from);
+
+ fd_to = safe_close(fd_to);
+ }
+ }
+
+ r = tempfn_random(to, NULL, &t);
+ if (r < 0)
+ return log_oom();
+
+ RUN_WITH_UMASK(0000) {
+ fd_to = open(t, O_WRONLY|O_CREAT|O_CLOEXEC|O_EXCL|O_NOFOLLOW, 0644);
+ if (fd_to < 0)
+ return log_error_errno(errno, "Failed to open \"%s\" for writing: %m", t);
+ }
+
+ r = copy_bytes(fd_from, fd_to, (uint64_t) -1, COPY_REFLINK);
+ if (r < 0) {
+ (void) unlink(t);
+ return log_error_errno(r, "Failed to copy data from \"%s\" to \"%s\": %m", from, t);
+ }
+
+ (void) copy_times(fd_from, fd_to, 0);
+
+ if (fsync(fd_to) < 0) {
+ (void) unlink_noerrno(t);
+ return log_error_errno(errno, "Failed to copy data from \"%s\" to \"%s\": %m", from, t);
+ }
+
+ (void) fsync_directory_of_file(fd_to);
+
+ if (renameat(AT_FDCWD, t, AT_FDCWD, to) < 0) {
+ (void) unlink_noerrno(t);
+ return log_error_errno(errno, "Failed to rename \"%s\" to \"%s\": %m", t, to);
+ }
+
+ log_info("Copied \"%s\" to \"%s\".", from, to);
+
+ return 0;
+}
+
+static int mkdir_one(const char *prefix, const char *suffix) {
+ _cleanup_free_ char *p = NULL;
+
+ p = path_join(prefix, suffix);
+ if (mkdir(p, 0700) < 0) {
+ if (errno != EEXIST)
+ return log_error_errno(errno, "Failed to create \"%s\": %m", p);
+ } else
+ log_info("Created \"%s\".", p);
+
+ return 0;
+}
+
+static const char *const esp_subdirs[] = {
+ /* The directories to place in the ESP */
+ "EFI",
+ "EFI/systemd",
+ "EFI/BOOT",
+ "loader",
+ NULL
+};
+
+static const char *const dollar_boot_subdirs[] = {
+ /* The directories to place in the XBOOTLDR partition or the ESP, depending what exists */
+ "loader",
+ "loader/entries", /* Type #1 entries */
+ "EFI",
+ "EFI/Linux", /* Type #2 entries */
+ NULL
+};
+
+static int create_subdirs(const char *root, const char * const *subdirs) {
+ const char *const *i;
+ int r;
+
+ STRV_FOREACH(i, subdirs) {
+ r = mkdir_one(root, *i);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int copy_one_file(const char *esp_path, const char *name, bool force) {
+ const char *e;
+ char *p, *q;
+ int r;
+
+ p = strjoina(BOOTLIBDIR "/", name);
+ q = strjoina(esp_path, "/EFI/systemd/", name);
+ r = copy_file_with_version_check(p, q, force);
+
+ e = startswith(name, "systemd-boot");
+ if (e) {
+ int k;
+ char *v;
+
+ /* Create the EFI default boot loader name (specified for removable devices) */
+ v = strjoina(esp_path, "/EFI/BOOT/BOOT", e);
+ ascii_strupper(strrchr(v, '/') + 1);
+
+ k = copy_file_with_version_check(p, v, force);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int install_binaries(const char *esp_path, bool force) {
+ struct dirent *de;
+ _cleanup_closedir_ DIR *d = NULL;
+ int r = 0;
+
+ d = opendir(BOOTLIBDIR);
+ if (!d)
+ return log_error_errno(errno, "Failed to open \""BOOTLIBDIR"\": %m");
+
+ FOREACH_DIRENT(de, d, return log_error_errno(errno, "Failed to read \""BOOTLIBDIR"\": %m")) {
+ int k;
+
+ if (!endswith_no_case(de->d_name, ".efi"))
+ continue;
+
+ k = copy_one_file(esp_path, de->d_name, force);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static bool same_entry(uint16_t id, sd_id128_t uuid, const char *path) {
+ _cleanup_free_ char *opath = NULL;
+ sd_id128_t ouuid;
+ int r;
+
+ r = efi_get_boot_option(id, NULL, &ouuid, &opath, NULL);
+ if (r < 0)
+ return false;
+ if (!sd_id128_equal(uuid, ouuid))
+ return false;
+ if (!streq_ptr(path, opath))
+ return false;
+
+ return true;
+}
+
+static int find_slot(sd_id128_t uuid, const char *path, uint16_t *id) {
+ _cleanup_free_ uint16_t *options = NULL;
+ int n, i;
+
+ n = efi_get_boot_options(&options);
+ if (n < 0)
+ return n;
+
+ /* find already existing systemd-boot entry */
+ for (i = 0; i < n; i++)
+ if (same_entry(options[i], uuid, path)) {
+ *id = options[i];
+ return 1;
+ }
+
+ /* find free slot in the sorted BootXXXX variable list */
+ for (i = 0; i < n; i++)
+ if (i != options[i]) {
+ *id = i;
+ return 1;
+ }
+
+ /* use the next one */
+ if (i == 0xffff)
+ return -ENOSPC;
+ *id = i;
+ return 0;
+}
+
+static int insert_into_order(uint16_t slot, bool first) {
+ _cleanup_free_ uint16_t *order = NULL;
+ uint16_t *t;
+ int n, i;
+
+ n = efi_get_boot_order(&order);
+ if (n <= 0)
+ /* no entry, add us */
+ return efi_set_boot_order(&slot, 1);
+
+ /* are we the first and only one? */
+ if (n == 1 && order[0] == slot)
+ return 0;
+
+ /* are we already in the boot order? */
+ for (i = 0; i < n; i++) {
+ if (order[i] != slot)
+ continue;
+
+ /* we do not require to be the first one, all is fine */
+ if (!first)
+ return 0;
+
+ /* move us to the first slot */
+ memmove(order + 1, order, i * sizeof(uint16_t));
+ order[0] = slot;
+ return efi_set_boot_order(order, n);
+ }
+
+ /* extend array */
+ t = reallocarray(order, n + 1, sizeof(uint16_t));
+ if (!t)
+ return -ENOMEM;
+ order = t;
+
+ /* add us to the top or end of the list */
+ if (first) {
+ memmove(order + 1, order, n * sizeof(uint16_t));
+ order[0] = slot;
+ } else
+ order[n] = slot;
+
+ return efi_set_boot_order(order, n + 1);
+}
+
+static int remove_from_order(uint16_t slot) {
+ _cleanup_free_ uint16_t *order = NULL;
+ int n, i;
+
+ n = efi_get_boot_order(&order);
+ if (n <= 0)
+ return n;
+
+ for (i = 0; i < n; i++) {
+ if (order[i] != slot)
+ continue;
+
+ if (i + 1 < n)
+ memmove(order + i, order + i+1, (n - i) * sizeof(uint16_t));
+ return efi_set_boot_order(order, n - 1);
+ }
+
+ return 0;
+}
+
+static int install_variables(const char *esp_path,
+ uint32_t part, uint64_t pstart, uint64_t psize,
+ sd_id128_t uuid, const char *path,
+ bool first) {
+ const char *p;
+ uint16_t slot;
+ int r;
+
+ if (!is_efi_boot()) {
+ log_warning("Not booted with EFI, skipping EFI variable setup.");
+ return 0;
+ }
+
+ p = prefix_roota(esp_path, path);
+ if (access(p, F_OK) < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Cannot access \"%s\": %m", p);
+ }
+
+ r = find_slot(uuid, path, &slot);
+ if (r < 0)
+ return log_error_errno(r,
+ r == -ENOENT ?
+ "Failed to access EFI variables. Is the \"efivarfs\" filesystem mounted?" :
+ "Failed to determine current boot order: %m");
+
+ if (first || r == 0) {
+ r = efi_add_boot_option(slot, "Linux Boot Manager",
+ part, pstart, psize,
+ uuid, path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create EFI Boot variable entry: %m");
+
+ log_info("Created EFI boot entry \"Linux Boot Manager\".");
+ }
+
+ return insert_into_order(slot, first);
+}
+
+static int remove_boot_efi(const char *esp_path) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ const char *p;
+ int r, c = 0;
+
+ p = prefix_roota(esp_path, "/EFI/BOOT");
+ d = opendir(p);
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open directory \"%s\": %m", p);
+ }
+
+ FOREACH_DIRENT(de, d, break) {
+ _cleanup_close_ int fd = -1;
+ _cleanup_free_ char *v = NULL;
+
+ if (!endswith_no_case(de->d_name, ".efi"))
+ continue;
+
+ if (!startswith_no_case(de->d_name, "boot"))
+ continue;
+
+ fd = openat(dirfd(d), de->d_name, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open \"%s/%s\" for reading: %m", p, de->d_name);
+
+ r = get_file_version(fd, &v);
+ if (r < 0)
+ return r;
+ if (r > 0 && startswith(v, "systemd-boot ")) {
+ r = unlinkat(dirfd(d), de->d_name, 0);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to remove \"%s/%s\": %m", p, de->d_name);
+
+ log_info("Removed \"%s/%s\".", p, de->d_name);
+ }
+
+ c++;
+ }
+
+ return c;
+}
+
+static int rmdir_one(const char *prefix, const char *suffix) {
+ const char *p;
+
+ p = prefix_roota(prefix, suffix);
+ if (rmdir(p) < 0) {
+ bool ignore = IN_SET(errno, ENOENT, ENOTEMPTY);
+
+ log_full_errno(ignore ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to remove directory \"%s\": %m", p);
+ if (!ignore)
+ return -errno;
+ } else
+ log_info("Removed \"%s\".", p);
+
+ return 0;
+}
+
+static int remove_subdirs(const char *root, const char *const *subdirs) {
+ int r, q;
+
+ /* We use recursion here to destroy the directories in reverse order. Which should be safe given how
+ * short the array is. */
+
+ if (!subdirs[0]) /* A the end of the list */
+ return 0;
+
+ r = remove_subdirs(root, subdirs + 1);
+ q = rmdir_one(root, subdirs[0]);
+
+ return r < 0 ? r : q;
+}
+
+static int remove_binaries(const char *esp_path) {
+ const char *p;
+ int r, q;
+
+ p = prefix_roota(esp_path, "/EFI/systemd");
+ r = rm_rf(p, REMOVE_ROOT|REMOVE_PHYSICAL);
+
+ q = remove_boot_efi(esp_path);
+ if (q < 0 && r == 0)
+ r = q;
+
+ return r;
+}
+
+static int remove_file(const char *root, const char *file) {
+ const char *p;
+
+ assert(root);
+ assert(file);
+
+ p = prefix_roota(root, file);
+ if (unlink(p) < 0) {
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to unlink file \"%s\": %m", p);
+
+ return errno == ENOENT ? 0 : -errno;
+ }
+
+ log_info("Removed \"%s\".", p);
+ return 1;
+}
+
+static int remove_variables(sd_id128_t uuid, const char *path, bool in_order) {
+ uint16_t slot;
+ int r;
+
+ if (!is_efi_boot())
+ return 0;
+
+ r = find_slot(uuid, path, &slot);
+ if (r != 1)
+ return 0;
+
+ r = efi_remove_boot_option(slot);
+ if (r < 0)
+ return r;
+
+ if (in_order)
+ return remove_from_order(slot);
+
+ return 0;
+}
+
+static int remove_loader_variables(void) {
+ const char *p;
+ int r = 0;
+
+ /* Remove all persistent loader variables we define */
+
+ FOREACH_STRING(p,
+ "LoaderConfigTimeout",
+ "LoaderConfigTimeoutOneShot",
+ "LoaderEntryDefault",
+ "LoaderEntryOneShot",
+ "LoaderSystemToken") {
+
+ int q;
+
+ q = efi_set_variable(EFI_VENDOR_LOADER, p, NULL, 0);
+ if (q == -ENOENT)
+ continue;
+ if (q < 0) {
+ log_warning_errno(q, "Failed to remove %s variable: %m", p);
+ if (r >= 0)
+ r = q;
+ } else
+ log_info("Removed EFI variable %s.", p);
+ }
+
+ return r;
+}
+
+static int install_loader_config(const char *esp_path) {
+ _cleanup_(unlink_and_freep) char *t = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_close_ int fd = -1;
+ const char *p;
+ int r;
+
+ p = prefix_roota(esp_path, "/loader/loader.conf");
+ if (access(p, F_OK) >= 0) /* Silently skip creation if the file already exists (early check) */
+ return 0;
+
+ fd = open_tmpfile_linkable(p, O_WRONLY|O_CLOEXEC, &t);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to open \"%s\" for writing: %m", p);
+
+ f = take_fdopen(&fd, "w");
+ if (!f)
+ return log_oom();
+
+ fprintf(f, "#timeout 3\n"
+ "#console-mode keep\n");
+
+ r = fflush_sync_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write \"%s\": %m", p);
+
+ r = link_tmpfile(fileno(f), t, p);
+ if (r == -EEXIST)
+ return 0; /* Silently skip creation if the file exists now (recheck) */
+ if (r < 0)
+ return log_error_errno(r, "Failed to move \"%s\" into place: %m", p);
+
+ t = mfree(t);
+ return 1;
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("bootctl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%1$s [OPTIONS...] COMMAND ...\n"
+ "\n%5$sControl EFI firmware boot settings and manage boot loader.%6$s\n"
+ "\n%3$sGeneric EFI Firmware/Boot Loader Commands:%4$s\n"
+ " status Show status of installed boot loader and EFI variables\n"
+ " reboot-to-firmware [BOOL]\n"
+ " Query or set reboot-to-firmware EFI flag\n"
+ " systemd-efi-options [STRING]\n"
+ " Query or set system options string in EFI variable\n"
+ "\n%3$sBoot Loader Specification Commands:%4$s\n"
+ " list List boot loader entries\n"
+ " set-default ID Set default boot loader entry\n"
+ " set-oneshot ID Set default boot loader entry, for next boot only\n"
+ "\n%3$ssystemd-boot Commands:%4$s\n"
+ " install Install systemd-boot to the ESP and EFI variables\n"
+ " update Update systemd-boot in the ESP and EFI variables\n"
+ " remove Remove systemd-boot from the ESP and EFI variables\n"
+ " is-installed Test whether systemd-boot is installed in the ESP\n"
+ " random-seed Initialize random seed in ESP and EFI variables\n"
+ "\n%3$sOptions:%4$s\n"
+ " -h --help Show this help\n"
+ " --version Print version\n"
+ " --esp-path=PATH Path to the EFI System Partition (ESP)\n"
+ " --boot-path=PATH Path to the $BOOT partition\n"
+ " -p --print-esp-path Print path to the EFI System Partition\n"
+ " -x --print-boot-path Print path to the $BOOT partition\n"
+ " --no-variables Don't touch EFI variables\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --graceful Don't fail when the ESP cannot be found or EFI\n"
+ " variables cannot be written\n"
+ "\nSee the %2$s for details.\n"
+ , program_invocation_short_name
+ , link
+ , ansi_underline(), ansi_normal()
+ , ansi_highlight(), ansi_normal()
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_ESP_PATH = 0x100,
+ ARG_BOOT_PATH,
+ ARG_VERSION,
+ ARG_NO_VARIABLES,
+ ARG_NO_PAGER,
+ ARG_GRACEFUL,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "esp-path", required_argument, NULL, ARG_ESP_PATH },
+ { "path", required_argument, NULL, ARG_ESP_PATH }, /* Compatibility alias */
+ { "boot-path", required_argument, NULL, ARG_BOOT_PATH },
+ { "print-esp-path", no_argument, NULL, 'p' },
+ { "print-path", no_argument, NULL, 'p' }, /* Compatibility alias */
+ { "print-boot-path", no_argument, NULL, 'x' },
+ { "no-variables", no_argument, NULL, ARG_NO_VARIABLES },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "graceful", no_argument, NULL, ARG_GRACEFUL },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hpx", options, NULL)) >= 0)
+ switch (c) {
+
+ case 'h':
+ help(0, NULL, NULL);
+ return 0;
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_ESP_PATH:
+ r = free_and_strdup(&arg_esp_path, optarg);
+ if (r < 0)
+ return log_oom();
+ break;
+
+ case ARG_BOOT_PATH:
+ r = free_and_strdup(&arg_xbootldr_path, optarg);
+ if (r < 0)
+ return log_oom();
+ break;
+
+ case 'p':
+ if (arg_print_dollar_boot_path)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--print-boot-path/-x cannot be combined with --print-esp-path/-p");
+ arg_print_esp_path = true;
+ break;
+
+ case 'x':
+ if (arg_print_esp_path)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--print-boot-path/-x cannot be combined with --print-esp-path/-p");
+ arg_print_dollar_boot_path = true;
+ break;
+
+ case ARG_NO_VARIABLES:
+ arg_touch_variables = false;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_GRACEFUL:
+ arg_graceful = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unknown option");
+ }
+
+ return 1;
+}
+
+static void read_loader_efi_var(const char *name, char **var) {
+ int r;
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, name, var);
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Failed to read EFI variable %s: %m", name);
+}
+
+static void print_yes_no_line(bool first, bool good, const char *name) {
+ printf("%s%s%s%s %s\n",
+ first ? " Features: " : " ",
+ good ? ansi_highlight_green() : ansi_highlight_red(),
+ good ? special_glyph(SPECIAL_GLYPH_CHECK_MARK) : special_glyph(SPECIAL_GLYPH_CROSS_MARK),
+ ansi_normal(),
+ name);
+}
+
+static int verb_status(int argc, char *argv[], void *userdata) {
+ sd_id128_t esp_uuid = SD_ID128_NULL, xbootldr_uuid = SD_ID128_NULL;
+ int r, k;
+
+ r = acquire_esp(geteuid() != 0, NULL, NULL, NULL, &esp_uuid);
+ if (arg_print_esp_path) {
+ if (r == -EACCES) /* If we couldn't acquire the ESP path, log about access errors (which is the only
+ * error the find_esp_and_warn() won't log on its own) */
+ return log_error_errno(r, "Failed to determine ESP location: %m");
+ if (r < 0)
+ return r;
+
+ puts(arg_esp_path);
+ }
+
+ r = acquire_xbootldr(geteuid() != 0, &xbootldr_uuid);
+ if (arg_print_dollar_boot_path) {
+ if (r == -EACCES)
+ return log_error_errno(r, "Failed to determine XBOOTLDR location: %m");
+ if (r < 0)
+ return r;
+
+ const char *path = arg_dollar_boot_path();
+ if (!path)
+ return log_error_errno(SYNTHETIC_ERRNO(EACCES), "Failed to determine XBOOTLDR location: %m");
+
+ puts(path);
+ }
+
+ if (arg_print_esp_path || arg_print_dollar_boot_path)
+ return 0;
+
+ r = 0; /* If we couldn't determine the path, then don't consider that a problem from here on, just show what we
+ * can show */
+
+ (void) pager_open(arg_pager_flags);
+
+ if (is_efi_boot()) {
+ static const struct {
+ uint64_t flag;
+ const char *name;
+ } flags[] = {
+ { EFI_LOADER_FEATURE_BOOT_COUNTING, "Boot counting" },
+ { EFI_LOADER_FEATURE_CONFIG_TIMEOUT, "Menu timeout control" },
+ { EFI_LOADER_FEATURE_CONFIG_TIMEOUT_ONE_SHOT, "One-shot menu timeout control" },
+ { EFI_LOADER_FEATURE_ENTRY_DEFAULT, "Default entry control" },
+ { EFI_LOADER_FEATURE_ENTRY_ONESHOT, "One-shot entry control" },
+ { EFI_LOADER_FEATURE_XBOOTLDR, "Support for XBOOTLDR partition" },
+ { EFI_LOADER_FEATURE_RANDOM_SEED, "Support for passing random seed to OS" },
+ };
+
+ _cleanup_free_ char *fw_type = NULL, *fw_info = NULL, *loader = NULL, *loader_path = NULL, *stub = NULL;
+ sd_id128_t loader_part_uuid = SD_ID128_NULL;
+ uint64_t loader_features = 0;
+ size_t i;
+
+ read_loader_efi_var("LoaderFirmwareType", &fw_type);
+ read_loader_efi_var("LoaderFirmwareInfo", &fw_info);
+ read_loader_efi_var("LoaderInfo", &loader);
+ read_loader_efi_var("StubInfo", &stub);
+ read_loader_efi_var("LoaderImageIdentifier", &loader_path);
+ (void) efi_loader_get_features(&loader_features);
+
+ if (loader_path)
+ efi_tilt_backslashes(loader_path);
+
+ k = efi_loader_get_device_part_uuid(&loader_part_uuid);
+ if (k < 0 && k != -ENOENT)
+ r = log_warning_errno(k, "Failed to read EFI variable LoaderDevicePartUUID: %m");
+
+ printf("System:\n");
+ printf(" Firmware: %s%s (%s)%s\n", ansi_highlight(), strna(fw_type), strna(fw_info), ansi_normal());
+ printf(" Secure Boot: %sd\n", enable_disable(is_efi_secure_boot()));
+ printf(" Setup Mode: %s\n", is_efi_secure_boot_setup_mode() ? "setup" : "user");
+
+ k = efi_get_reboot_to_firmware();
+ if (k > 0)
+ printf(" Boot into FW: %sactive%s\n", ansi_highlight_yellow(), ansi_normal());
+ else if (k == 0)
+ printf(" Boot into FW: supported\n");
+ else if (k == -EOPNOTSUPP)
+ printf(" Boot into FW: not supported\n");
+ else {
+ errno = -k;
+ printf(" Boot into FW: %sfailed%s (%m)\n", ansi_highlight_red(), ansi_normal());
+ }
+ printf("\n");
+
+ printf("Current Boot Loader:\n");
+ printf(" Product: %s%s%s\n", ansi_highlight(), strna(loader), ansi_normal());
+
+ for (i = 0; i < ELEMENTSOF(flags); i++)
+ print_yes_no_line(i == 0, FLAGS_SET(loader_features, flags[i].flag), flags[i].name);
+
+ sd_id128_t bootloader_esp_uuid;
+ bool have_bootloader_esp_uuid = efi_loader_get_device_part_uuid(&bootloader_esp_uuid) >= 0;
+
+ print_yes_no_line(false, have_bootloader_esp_uuid, "Boot loader sets ESP partition information");
+ if (have_bootloader_esp_uuid && !sd_id128_equal(esp_uuid, bootloader_esp_uuid))
+ printf("WARNING: The boot loader reports different ESP UUID then detected!\n");
+
+ if (stub)
+ printf(" Stub: %s\n", stub);
+ if (!sd_id128_is_null(loader_part_uuid))
+ printf(" ESP: /dev/disk/by-partuuid/" SD_ID128_UUID_FORMAT_STR "\n",
+ SD_ID128_FORMAT_VAL(loader_part_uuid));
+ else
+ printf(" ESP: n/a\n");
+ printf(" File: %s%s\n", special_glyph(SPECIAL_GLYPH_TREE_RIGHT), strna(loader_path));
+ printf("\n");
+
+ printf("Random Seed:\n");
+ printf(" Passed to OS: %s\n", yes_no(access("/sys/firmware/efi/efivars/LoaderRandomSeed-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f", F_OK) >= 0));
+ printf(" System Token: %s\n", access("/sys/firmware/efi/efivars/LoaderSystemToken-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f", F_OK) >= 0 ? "set" : "not set");
+
+ if (arg_esp_path) {
+ _cleanup_free_ char *p = NULL;
+
+ p = path_join(arg_esp_path, "/loader/random-seed");
+ if (!p)
+ return log_oom();
+
+ printf(" Exists: %s\n", yes_no(access(p, F_OK) >= 0));
+ }
+
+ printf("\n");
+ } else
+ printf("System:\n Not booted with EFI\n\n");
+
+ if (arg_esp_path) {
+ k = status_binaries(arg_esp_path, esp_uuid);
+ if (k < 0)
+ r = k;
+ }
+
+ if (is_efi_boot()) {
+ k = status_variables();
+ if (k < 0)
+ r = k;
+ }
+
+ if (arg_esp_path || arg_xbootldr_path) {
+ k = status_entries(arg_esp_path, esp_uuid, arg_xbootldr_path, xbootldr_uuid);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int verb_list(int argc, char *argv[], void *userdata) {
+ _cleanup_(boot_config_free) BootConfig config = {};
+ _cleanup_strv_free_ char **efi_entries = NULL;
+ int r;
+
+ /* If we lack privileges we invoke find_esp_and_warn() in "unprivileged mode" here, which does two things: turn
+ * off logging about access errors and turn off potentially privileged device probing. Here we're interested in
+ * the latter but not the former, hence request the mode, and log about EACCES. */
+
+ r = acquire_esp(geteuid() != 0, NULL, NULL, NULL, NULL);
+ if (r == -EACCES) /* We really need the ESP path for this call, hence also log about access errors */
+ return log_error_errno(r, "Failed to determine ESP: %m");
+ if (r < 0)
+ return r;
+
+ r = acquire_xbootldr(geteuid() != 0, NULL);
+ if (r == -EACCES)
+ return log_error_errno(r, "Failed to determine XBOOTLDR partition: %m");
+ if (r < 0)
+ return r;
+
+ r = boot_entries_load_config(arg_esp_path, arg_xbootldr_path, &config);
+ if (r < 0)
+ return r;
+
+ r = efi_loader_get_entries(&efi_entries);
+ if (r == -ENOENT || ERRNO_IS_NOT_SUPPORTED(r))
+ log_debug_errno(r, "Boot loader reported no entries.");
+ else if (r < 0)
+ log_warning_errno(r, "Failed to determine entries reported by boot loader, ignoring: %m");
+ else
+ (void) boot_entries_augment_from_loader(&config, efi_entries, false);
+
+ if (config.n_entries == 0)
+ log_info("No boot loader entries found.");
+ else {
+ size_t n;
+
+ (void) pager_open(arg_pager_flags);
+
+ printf("Boot Loader Entries:\n");
+
+ for (n = 0; n < config.n_entries; n++) {
+ r = boot_entry_show(config.entries + n, n == (size_t) config.default_entry);
+ if (r < 0)
+ return r;
+
+ if (n+1 < config.n_entries)
+ putchar('\n');
+ }
+ }
+
+ return 0;
+}
+
+static int install_random_seed(const char *esp) {
+ _cleanup_(unlink_and_freep) char *tmp = NULL;
+ _cleanup_free_ void *buffer = NULL;
+ _cleanup_free_ char *path = NULL;
+ _cleanup_close_ int fd = -1;
+ size_t sz, token_size;
+ ssize_t n;
+ int r;
+
+ assert(esp);
+
+ path = path_join(esp, "/loader/random-seed");
+ if (!path)
+ return log_oom();
+
+ sz = random_pool_size();
+
+ buffer = malloc(sz);
+ if (!buffer)
+ return log_oom();
+
+ r = genuine_random_bytes(buffer, sz, RANDOM_BLOCK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire random seed: %m");
+
+ /* Normally create_subdirs() should already have created everything we need, but in case "bootctl
+ * random-seed" is called we want to just create the minimum we need for it, and not the full
+ * list. */
+ r = mkdir_parents(path, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create parent directory for %s: %m", path);
+
+ r = tempfn_random(path, "bootctl", &tmp);
+ if (r < 0)
+ return log_oom();
+
+ fd = open(tmp, O_CREAT|O_EXCL|O_NOFOLLOW|O_NOCTTY|O_WRONLY|O_CLOEXEC, 0600);
+ if (fd < 0) {
+ tmp = mfree(tmp);
+ return log_error_errno(fd, "Failed to open random seed file for writing: %m");
+ }
+
+ n = write(fd, buffer, sz);
+ if (n < 0)
+ return log_error_errno(errno, "Failed to write random seed file: %m");
+ if ((size_t) n != sz)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short write while writing random seed file.");
+
+ if (rename(tmp, path) < 0)
+ return log_error_errno(r, "Failed to move random seed file into place: %m");
+
+ tmp = mfree(tmp);
+
+ log_info("Random seed file %s successfully written (%zu bytes).", path, sz);
+
+ if (!arg_touch_variables)
+ return 0;
+
+ if (!is_efi_boot()) {
+ log_notice("Not booted with EFI, skipping EFI variable setup.");
+ return 0;
+ }
+
+ r = getenv_bool("SYSTEMD_WRITE_SYSTEM_TOKEN");
+ if (r < 0) {
+ if (r != -ENXIO)
+ log_warning_errno(r, "Failed to parse $SYSTEMD_WRITE_SYSTEM_TOKEN, ignoring.");
+
+ if (detect_vm() > 0) {
+ /* Let's not write a system token if we detect we are running in a VM
+ * environment. Why? Our default security model for the random seed uses the system
+ * token as a mechanism to ensure we are not vulnerable to golden master sloppiness
+ * issues, i.e. that people initialize the random seed file, then copy the image to
+ * many systems and end up with the same random seed in each that is assumed to be
+ * valid but in reality is the same for all machines. By storing a system token in
+ * the EFI variable space we can make sure that even though the random seeds on disk
+ * are all the same they will be different on each system under the assumption that
+ * the EFI variable space is maintained separate from the random seed storage. That
+ * is generally the case on physical systems, as the ESP is stored on persistent
+ * storage, and the EFI variables in NVRAM. However in virtualized environments this
+ * is generally not true: the EFI variable set is typically stored along with the
+ * disk image itself. For example, using the OVMF EFI firmware the EFI variables are
+ * stored in a file in the ESP itself. */
+
+ log_notice("Not installing system token, since we are running in a virtualized environment.");
+ return 0;
+ }
+ } else if (r == 0) {
+ log_notice("Not writing system token, because $SYSTEMD_WRITE_SYSTEM_TOKEN is set to false.");
+ return 0;
+ }
+
+ r = efi_get_variable(EFI_VENDOR_LOADER, "LoaderSystemToken", NULL, NULL, &token_size);
+ if (r == -ENODATA)
+ log_debug_errno(r, "LoaderSystemToken EFI variable is invalid (too short?), replacing.");
+ else if (r < 0) {
+ if (r != -ENOENT)
+ return log_error_errno(r, "Failed to test system token validity: %m");
+ } else {
+ if (token_size >= sz) {
+ /* Let's avoid writes if we can, and initialize this only once. */
+ log_debug("System token already written, not updating.");
+ return 0;
+ }
+
+ log_debug("Existing system token size (%zu) does not match our expectations (%zu), replacing.", token_size, sz);
+ }
+
+ r = genuine_random_bytes(buffer, sz, RANDOM_BLOCK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire random seed: %m");
+
+ /* Let's write this variable with an umask in effect, so that unprivileged users can't see the token
+ * and possibly get identification information or too much insight into the kernel's entropy pool
+ * state. */
+ RUN_WITH_UMASK(0077) {
+ r = efi_set_variable(EFI_VENDOR_LOADER, "LoaderSystemToken", buffer, sz);
+ if (r < 0) {
+ if (!arg_graceful)
+ return log_error_errno(r, "Failed to write 'LoaderSystemToken' EFI variable: %m");
+
+ if (r == -EINVAL)
+ log_warning_errno(r, "Unable to write 'LoaderSystemToken' EFI variable (firmware problem?), ignoring: %m");
+ else
+ log_warning_errno(r, "Unable to write 'LoaderSystemToken' EFI variable, ignoring: %m");
+ } else
+ log_info("Successfully initialized system token in EFI variable with %zu bytes.", sz);
+ }
+
+ return 0;
+}
+
+static int sync_everything(void) {
+ int ret = 0, k;
+
+ if (arg_esp_path) {
+ k = syncfs_path(AT_FDCWD, arg_esp_path);
+ if (k < 0)
+ ret = log_error_errno(k, "Failed to synchronize the ESP '%s': %m", arg_esp_path);
+ }
+
+ if (arg_xbootldr_path) {
+ k = syncfs_path(AT_FDCWD, arg_xbootldr_path);
+ if (k < 0)
+ ret = log_error_errno(k, "Failed to synchronize $BOOT '%s': %m", arg_xbootldr_path);
+ }
+
+ return ret;
+}
+
+static int verb_install(int argc, char *argv[], void *userdata) {
+ sd_id128_t uuid = SD_ID128_NULL;
+ uint64_t pstart = 0, psize = 0;
+ uint32_t part = 0;
+ bool install;
+ int r;
+
+ r = acquire_esp(false, &part, &pstart, &psize, &uuid);
+ if (r < 0)
+ return r;
+
+ r = acquire_xbootldr(false, NULL);
+ if (r < 0)
+ return r;
+
+ install = streq(argv[0], "install");
+
+ RUN_WITH_UMASK(0002) {
+ if (install) {
+ /* Don't create any of these directories when we are just updating. When we update
+ * we'll drop-in our files (unless there are newer ones already), but we won't create
+ * the directories for them in the first place. */
+ r = create_subdirs(arg_esp_path, esp_subdirs);
+ if (r < 0)
+ return r;
+
+ r = create_subdirs(arg_dollar_boot_path(), dollar_boot_subdirs);
+ if (r < 0)
+ return r;
+ }
+
+ r = install_binaries(arg_esp_path, install);
+ if (r < 0)
+ return r;
+
+ if (install) {
+ r = install_loader_config(arg_esp_path);
+ if (r < 0)
+ return r;
+
+ r = install_random_seed(arg_esp_path);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ (void) sync_everything();
+
+ if (arg_touch_variables)
+ r = install_variables(arg_esp_path,
+ part, pstart, psize, uuid,
+ "/EFI/systemd/systemd-boot" EFI_MACHINE_TYPE_NAME ".efi",
+ install);
+
+ return r;
+}
+
+static int verb_remove(int argc, char *argv[], void *userdata) {
+ sd_id128_t uuid = SD_ID128_NULL;
+ int r, q;
+
+ r = acquire_esp(false, NULL, NULL, NULL, &uuid);
+ if (r < 0)
+ return r;
+
+ r = acquire_xbootldr(false, NULL);
+ if (r < 0)
+ return r;
+
+ r = remove_binaries(arg_esp_path);
+
+ q = remove_file(arg_esp_path, "/loader/loader.conf");
+ if (q < 0 && r >= 0)
+ r = q;
+
+ q = remove_file(arg_esp_path, "/loader/random-seed");
+ if (q < 0 && r >= 0)
+ r = q;
+
+ q = remove_subdirs(arg_esp_path, esp_subdirs);
+ if (q < 0 && r >= 0)
+ r = q;
+
+ q = remove_subdirs(arg_esp_path, dollar_boot_subdirs);
+ if (q < 0 && r >= 0)
+ r = q;
+
+ if (arg_xbootldr_path) {
+ /* Remove the latter two also in the XBOOTLDR partition if it exists */
+ q = remove_subdirs(arg_xbootldr_path, dollar_boot_subdirs);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ (void) sync_everything();
+
+ if (!arg_touch_variables)
+ return r;
+
+ q = remove_variables(uuid, "/EFI/systemd/systemd-boot" EFI_MACHINE_TYPE_NAME ".efi", true);
+ if (q < 0 && r >= 0)
+ r = q;
+
+ q = remove_loader_variables();
+ if (q < 0 && r >= 0)
+ r = q;
+
+ return r;
+}
+
+static int verb_is_installed(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ r = acquire_esp(false, NULL, NULL, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ /* Tests whether systemd-boot is installed. It's not obvious what to use as check here: we could
+ * check EFI variables, we could check what binary /EFI/BOOT/BOOT*.EFI points to, or whether the
+ * loader entries directory exists. Here we opted to check whether /EFI/systemd/ is non-empty, which
+ * should be a suitable and very minimal check for a number of reasons:
+ *
+ * → The check is architecture independent (i.e. we check if any systemd-boot loader is installed, not a
+ * specific one.)
+ *
+ * → It doesn't assume we are the only boot loader (i.e doesn't check if we own the main
+ * /EFI/BOOT/BOOT*.EFI fallback binary.
+ *
+ * → It specifically checks for systemd-boot, not for other boot loaders (which a check for
+ * /boot/loader/entries would do). */
+
+ p = path_join(arg_esp_path, "/EFI/systemd/");
+ if (!p)
+ return log_oom();
+
+ r = dir_is_empty(p);
+ if (r > 0 || r == -ENOENT) {
+ puts("no");
+ return EXIT_FAILURE;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to detect whether systemd-boot is installed: %m");
+
+ puts("yes");
+ return EXIT_SUCCESS;
+}
+
+static int parse_loader_entry_target_arg(const char *arg1, char16_t **ret_target, size_t *ret_target_size) {
+ int r;
+ if (streq(arg1, "@current")) {
+ r = efi_get_variable(EFI_VENDOR_LOADER, "LoaderEntrySelected", NULL, (void *) ret_target, ret_target_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get EFI variable 'LoaderEntrySelected': %m");
+ } else if (streq(arg1, "@oneshot")) {
+ r = efi_get_variable(EFI_VENDOR_LOADER, "LoaderEntryOneShot", NULL, (void *) ret_target, ret_target_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get EFI variable 'LoaderEntryOneShot': %m");
+ } else if (streq(arg1, "@default")) {
+ r = efi_get_variable(EFI_VENDOR_LOADER, "LoaderEntryDefault", NULL, (void *) ret_target, ret_target_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get EFI variable 'LoaderEntryDefault': %m");
+ } else {
+ char16_t *encoded = NULL;
+ encoded = utf8_to_utf16(arg1, strlen(arg1));
+ if (!encoded)
+ return log_oom();
+ *ret_target = encoded;
+ *ret_target_size = char16_strlen(encoded) * 2 + 2;
+ }
+ return 0;
+}
+
+static int verb_set_default(int argc, char *argv[], void *userdata) {
+ const char *name;
+ int r;
+
+ if (!is_efi_boot())
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Not booted with UEFI.");
+
+ if (access("/sys/firmware/efi/efivars/LoaderInfo-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f", F_OK) < 0) {
+ if (errno == ENOENT) {
+ log_error_errno(errno, "Not booted with a supported boot loader.");
+ return -EOPNOTSUPP;
+ }
+
+ return log_error_errno(errno, "Failed to detect whether boot loader supports '%s' operation: %m", argv[0]);
+ }
+
+ if (detect_container() > 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "'%s' operation not supported in a container.",
+ argv[0]);
+
+ if (!arg_touch_variables)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "'%s' operation cannot be combined with --touch-variables=no.",
+ argv[0]);
+
+ name = streq(argv[0], "set-default") ? "LoaderEntryDefault" : "LoaderEntryOneShot";
+
+ if (isempty(argv[1])) {
+ r = efi_set_variable(EFI_VENDOR_LOADER, name, NULL, 0);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "Failed to remove EFI variable '%s': %m", name);
+ } else {
+ _cleanup_free_ char16_t *target = NULL;
+ size_t target_size = 0;
+
+ r = parse_loader_entry_target_arg(argv[1], &target, &target_size);
+ if (r < 0)
+ return r;
+ r = efi_set_variable(EFI_VENDOR_LOADER, name, target, target_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update EFI variable '%s': %m", name);
+ }
+
+ return 0;
+}
+
+static int verb_random_seed(int argc, char *argv[], void *userdata) {
+ int r;
+
+ r = find_esp_and_warn(arg_esp_path, false, &arg_esp_path, NULL, NULL, NULL, NULL);
+ if (r == -ENOKEY) {
+ /* find_esp_and_warn() doesn't warn about ENOKEY, so let's do that on our own */
+ if (!arg_graceful)
+ return log_error_errno(r, "Unable to find ESP.");
+
+ log_notice("No ESP found, not initializing random seed.");
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ r = install_random_seed(arg_esp_path);
+ if (r < 0)
+ return r;
+
+ (void) sync_everything();
+ return 0;
+}
+
+static int verb_systemd_efi_options(int argc, char *argv[], void *userdata) {
+ int r;
+
+ if (argc == 1) {
+ _cleanup_free_ char *line = NULL;
+
+ r = systemd_efi_options_variable(&line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query SystemdOptions EFI variable: %m");
+
+ puts(line);
+
+ } else {
+ r = efi_set_variable_string(EFI_VENDOR_SYSTEMD, "SystemdOptions", argv[1]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set SystemdOptions EFI variable: %m");
+ }
+
+ return 0;
+}
+
+static int verb_reboot_to_firmware(int argc, char *argv[], void *userdata) {
+ int r;
+
+ if (argc < 2) {
+ r = efi_get_reboot_to_firmware();
+ if (r > 0) {
+ puts("active");
+ return EXIT_SUCCESS; /* success */
+ }
+ if (r == 0) {
+ puts("supported");
+ return 1; /* recognizable error #1 */
+ }
+ if (r == -EOPNOTSUPP) {
+ puts("not supported");
+ return 2; /* recognizable error #2 */
+ }
+
+ log_error_errno(r, "Failed to query reboot-to-firmware state: %m");
+ return 3; /* other kind of error */
+ } else {
+ r = parse_boolean(argv[1]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %s", argv[1]);
+
+ r = efi_set_reboot_to_firmware(r);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set reboot-to-firmware option: %m");
+
+ return 0;
+ }
+}
+
+static int bootctl_main(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "status", VERB_ANY, 1, VERB_DEFAULT, verb_status },
+ { "install", VERB_ANY, 1, 0, verb_install },
+ { "update", VERB_ANY, 1, 0, verb_install },
+ { "remove", VERB_ANY, 1, 0, verb_remove },
+ { "is-installed", VERB_ANY, 1, 0, verb_is_installed },
+ { "list", VERB_ANY, 1, 0, verb_list },
+ { "set-default", 2, 2, 0, verb_set_default },
+ { "set-oneshot", 2, 2, 0, verb_set_default },
+ { "random-seed", VERB_ANY, 1, 0, verb_random_seed },
+ { "systemd-efi-options", VERB_ANY, 2, 0, verb_systemd_efi_options },
+ { "reboot-to-firmware", VERB_ANY, 2, 0, verb_reboot_to_firmware },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ /* If we run in a container, automatically turn off EFI file system access */
+ if (detect_container() > 0)
+ arg_touch_variables = false;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ return bootctl_main(argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/boot/efi/boot.c b/src/boot/efi/boot.c
new file mode 100644
index 0000000..938e564
--- /dev/null
+++ b/src/boot/efi/boot.c
@@ -0,0 +1,2539 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <efi.h>
+#include <efigpt.h>
+#include <efilib.h>
+
+#include "console.h"
+#include "crc32.h"
+#include "disk.h"
+#include "graphics.h"
+#include "linux.h"
+#include "loader-features.h"
+#include "measure.h"
+#include "pe.h"
+#include "random-seed.h"
+#include "shim.h"
+#include "util.h"
+
+#ifndef EFI_OS_INDICATIONS_BOOT_TO_FW_UI
+#define EFI_OS_INDICATIONS_BOOT_TO_FW_UI 0x0000000000000001ULL
+#endif
+
+/* magic string to find in the binary image */
+static const char __attribute__((used)) magic[] = "#### LoaderInfo: systemd-boot " GIT_VERSION " ####";
+
+static const EFI_GUID global_guid = EFI_GLOBAL_VARIABLE;
+
+enum loader_type {
+ LOADER_UNDEFINED,
+ LOADER_EFI,
+ LOADER_LINUX,
+};
+
+typedef struct {
+ CHAR16 *id; /* The unique identifier for this entry */
+ CHAR16 *title_show;
+ CHAR16 *title;
+ CHAR16 *version;
+ CHAR16 *machine_id;
+ EFI_HANDLE *device;
+ enum loader_type type;
+ CHAR16 *loader;
+ CHAR16 *options;
+ CHAR16 key;
+ EFI_STATUS (*call)(VOID);
+ BOOLEAN no_autoselect;
+ BOOLEAN non_unique;
+ UINTN tries_done;
+ UINTN tries_left;
+ CHAR16 *path;
+ CHAR16 *current_name;
+ CHAR16 *next_name;
+} ConfigEntry;
+
+typedef struct {
+ ConfigEntry **entries;
+ UINTN entry_count;
+ INTN idx_default;
+ INTN idx_default_efivar;
+ UINTN timeout_sec;
+ UINTN timeout_sec_config;
+ INTN timeout_sec_efivar;
+ CHAR16 *entry_default_pattern;
+ CHAR16 *entry_oneshot;
+ CHAR16 *options_edit;
+ BOOLEAN editor;
+ BOOLEAN auto_entries;
+ BOOLEAN auto_firmware;
+ BOOLEAN force_menu;
+ UINTN console_mode;
+ enum console_mode_change_type console_mode_change;
+ RandomSeedMode random_seed_mode;
+} Config;
+
+static VOID cursor_left(UINTN *cursor, UINTN *first) {
+ if ((*cursor) > 0)
+ (*cursor)--;
+ else if ((*first) > 0)
+ (*first)--;
+}
+
+static VOID cursor_right(
+ UINTN *cursor,
+ UINTN *first,
+ UINTN x_max,
+ UINTN len) {
+
+ if ((*cursor)+1 < x_max)
+ (*cursor)++;
+ else if ((*first) + (*cursor) < len)
+ (*first)++;
+}
+
+static BOOLEAN line_edit(
+ CHAR16 *line_in,
+ CHAR16 **line_out,
+ UINTN x_max,
+ UINTN y_pos) {
+
+ _cleanup_freepool_ CHAR16 *line = NULL, *print = NULL;
+ UINTN size, len, first, cursor, clear;
+ BOOLEAN exit, enter;
+
+ if (!line_in)
+ line_in = L"";
+ size = StrLen(line_in) + 1024;
+ line = AllocatePool(size * sizeof(CHAR16));
+ StrCpy(line, line_in);
+ len = StrLen(line);
+ print = AllocatePool((x_max+1) * sizeof(CHAR16));
+
+ uefi_call_wrapper(ST->ConOut->EnableCursor, 2, ST->ConOut, TRUE);
+
+ first = 0;
+ cursor = 0;
+ clear = 0;
+ enter = FALSE;
+ exit = FALSE;
+ while (!exit) {
+ EFI_STATUS err;
+ UINT64 key;
+ UINTN i;
+
+ i = len - first;
+ if (i >= x_max-1)
+ i = x_max-1;
+ CopyMem(print, line + first, i * sizeof(CHAR16));
+ while (clear > 0 && i < x_max-1) {
+ clear--;
+ print[i++] = ' ';
+ }
+ print[i] = '\0';
+
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, 0, y_pos);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, print);
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, cursor, y_pos);
+
+ err = console_key_read(&key, TRUE);
+ if (EFI_ERROR(err))
+ continue;
+
+ switch (key) {
+ case KEYPRESS(0, SCAN_ESC, 0):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'c'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'g'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('c')):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('g')):
+ exit = TRUE;
+ break;
+
+ case KEYPRESS(0, SCAN_HOME, 0):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'a'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('a')):
+ /* beginning-of-line */
+ cursor = 0;
+ first = 0;
+ continue;
+
+ case KEYPRESS(0, SCAN_END, 0):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'e'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('e')):
+ /* end-of-line */
+ cursor = len - first;
+ if (cursor+1 >= x_max) {
+ cursor = x_max-1;
+ first = len - (x_max-1);
+ }
+ continue;
+
+ case KEYPRESS(0, SCAN_DOWN, 0):
+ case KEYPRESS(EFI_ALT_PRESSED, 0, 'f'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, SCAN_RIGHT, 0):
+ /* forward-word */
+ while (line[first + cursor] == ' ')
+ cursor_right(&cursor, &first, x_max, len);
+ while (line[first + cursor] && line[first + cursor] != ' ')
+ cursor_right(&cursor, &first, x_max, len);
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, cursor, y_pos);
+ continue;
+
+ case KEYPRESS(0, SCAN_UP, 0):
+ case KEYPRESS(EFI_ALT_PRESSED, 0, 'b'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, SCAN_LEFT, 0):
+ /* backward-word */
+ if ((first + cursor) > 0 && line[first + cursor-1] == ' ') {
+ cursor_left(&cursor, &first);
+ while ((first + cursor) > 0 && line[first + cursor] == ' ')
+ cursor_left(&cursor, &first);
+ }
+ while ((first + cursor) > 0 && line[first + cursor-1] != ' ')
+ cursor_left(&cursor, &first);
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, cursor, y_pos);
+ continue;
+
+ case KEYPRESS(0, SCAN_RIGHT, 0):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'f'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('f')):
+ /* forward-char */
+ if (first + cursor == len)
+ continue;
+ cursor_right(&cursor, &first, x_max, len);
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, cursor, y_pos);
+ continue;
+
+ case KEYPRESS(0, SCAN_LEFT, 0):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'b'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('b')):
+ /* backward-char */
+ cursor_left(&cursor, &first);
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, cursor, y_pos);
+ continue;
+
+ case KEYPRESS(EFI_ALT_PRESSED, 0, 'd'):
+ /* kill-word */
+ clear = 0;
+ for (i = first + cursor; i < len && line[i] == ' '; i++)
+ clear++;
+ for (; i < len && line[i] != ' '; i++)
+ clear++;
+
+ for (i = first + cursor; i + clear < len; i++)
+ line[i] = line[i + clear];
+ len -= clear;
+ line[len] = '\0';
+ continue;
+
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'w'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('w')):
+ case KEYPRESS(EFI_ALT_PRESSED, 0, CHAR_BACKSPACE):
+ /* backward-kill-word */
+ clear = 0;
+ if ((first + cursor) > 0 && line[first + cursor-1] == ' ') {
+ cursor_left(&cursor, &first);
+ clear++;
+ while ((first + cursor) > 0 && line[first + cursor] == ' ') {
+ cursor_left(&cursor, &first);
+ clear++;
+ }
+ }
+ while ((first + cursor) > 0 && line[first + cursor-1] != ' ') {
+ cursor_left(&cursor, &first);
+ clear++;
+ }
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, cursor, y_pos);
+
+ for (i = first + cursor; i + clear < len; i++)
+ line[i] = line[i + clear];
+ len -= clear;
+ line[len] = '\0';
+ continue;
+
+ case KEYPRESS(0, SCAN_DELETE, 0):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'd'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('d')):
+ if (len == 0)
+ continue;
+ if (first + cursor == len)
+ continue;
+ for (i = first + cursor; i < len; i++)
+ line[i] = line[i+1];
+ clear = 1;
+ len--;
+ continue;
+
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'k'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('k')):
+ /* kill-line */
+ line[first + cursor] = '\0';
+ clear = len - (first + cursor);
+ len = first + cursor;
+ continue;
+
+ case KEYPRESS(0, 0, CHAR_LINEFEED):
+ case KEYPRESS(0, 0, CHAR_CARRIAGE_RETURN):
+ case KEYPRESS(0, CHAR_CARRIAGE_RETURN, 0): /* EZpad Mini 4s firmware sends malformed events */
+ case KEYPRESS(0, CHAR_CARRIAGE_RETURN, CHAR_CARRIAGE_RETURN): /* Teclast X98+ II firmware sends malformed events */
+ if (StrCmp(line, line_in) != 0)
+ *line_out = TAKE_PTR(line);
+ enter = TRUE;
+ exit = TRUE;
+ break;
+
+ case KEYPRESS(0, 0, CHAR_BACKSPACE):
+ if (len == 0)
+ continue;
+ if (first == 0 && cursor == 0)
+ continue;
+ for (i = first + cursor-1; i < len; i++)
+ line[i] = line[i+1];
+ clear = 1;
+ len--;
+ if (cursor > 0)
+ cursor--;
+ if (cursor > 0 || first == 0)
+ continue;
+ /* show full line if it fits */
+ if (len < x_max) {
+ cursor = first;
+ first = 0;
+ continue;
+ }
+ /* jump left to see what we delete */
+ if (first > 10) {
+ first -= 10;
+ cursor = 10;
+ } else {
+ cursor = first;
+ first = 0;
+ }
+ continue;
+
+ case KEYPRESS(0, 0, ' ') ... KEYPRESS(0, 0, '~'):
+ case KEYPRESS(0, 0, 0x80) ... KEYPRESS(0, 0, 0xffff):
+ if (len+1 == size)
+ continue;
+ for (i = len; i > first + cursor; i--)
+ line[i] = line[i-1];
+ line[first + cursor] = KEYCHAR(key);
+ len++;
+ line[len] = '\0';
+ if (cursor+1 < x_max)
+ cursor++;
+ else if (first + cursor < len)
+ first++;
+ continue;
+ }
+ }
+
+ uefi_call_wrapper(ST->ConOut->EnableCursor, 2, ST->ConOut, FALSE);
+ return enter;
+}
+
+static UINTN entry_lookup_key(Config *config, UINTN start, CHAR16 key) {
+ UINTN i;
+
+ if (key == 0)
+ return -1;
+
+ /* select entry by number key */
+ if (key >= '1' && key <= '9') {
+ i = key - '0';
+ if (i > config->entry_count)
+ i = config->entry_count;
+ return i-1;
+ }
+
+ /* find matching key in config entries */
+ for (i = start; i < config->entry_count; i++)
+ if (config->entries[i]->key == key)
+ return i;
+
+ for (i = 0; i < start; i++)
+ if (config->entries[i]->key == key)
+ return i;
+
+ return -1;
+}
+
+static VOID print_status(Config *config, CHAR16 *loaded_image_path) {
+ UINT64 key;
+ UINTN i;
+ _cleanup_freepool_ CHAR8 *bootvar = NULL, *modevar = NULL, *indvar = NULL;
+ _cleanup_freepool_ CHAR16 *partstr = NULL, *defaultstr = NULL;
+ UINTN x, y, size;
+
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut, EFI_LIGHTGRAY|EFI_BACKGROUND_BLACK);
+ uefi_call_wrapper(ST->ConOut->ClearScreen, 1, ST->ConOut);
+
+ Print(L"systemd-boot version: " GIT_VERSION "\n");
+ Print(L"architecture: " EFI_MACHINE_TYPE_NAME "\n");
+ Print(L"loaded image: %s\n", loaded_image_path);
+ Print(L"UEFI specification: %d.%02d\n", ST->Hdr.Revision >> 16, ST->Hdr.Revision & 0xffff);
+ Print(L"firmware vendor: %s\n", ST->FirmwareVendor);
+ Print(L"firmware version: %d.%02d\n", ST->FirmwareRevision >> 16, ST->FirmwareRevision & 0xffff);
+
+ if (uefi_call_wrapper(ST->ConOut->QueryMode, 4, ST->ConOut, ST->ConOut->Mode->Mode, &x, &y) == EFI_SUCCESS)
+ Print(L"console size: %d x %d\n", x, y);
+
+ if (efivar_get_raw(&global_guid, L"SecureBoot", &bootvar, &size) == EFI_SUCCESS)
+ Print(L"SecureBoot: %s\n", yes_no(*bootvar > 0));
+
+ if (efivar_get_raw(&global_guid, L"SetupMode", &modevar, &size) == EFI_SUCCESS)
+ Print(L"SetupMode: %s\n", *modevar > 0 ? L"setup" : L"user");
+
+ if (shim_loaded())
+ Print(L"Shim: present\n");
+
+ if (efivar_get_raw(&global_guid, L"OsIndicationsSupported", &indvar, &size) == EFI_SUCCESS)
+ Print(L"OsIndicationsSupported: %d\n", (UINT64)*indvar);
+
+ Print(L"\n--- press key ---\n\n");
+ console_key_read(&key, TRUE);
+
+ Print(L"timeout: %u\n", config->timeout_sec);
+ if (config->timeout_sec_efivar >= 0)
+ Print(L"timeout (EFI var): %d\n", config->timeout_sec_efivar);
+ Print(L"timeout (config): %u\n", config->timeout_sec_config);
+ if (config->entry_default_pattern)
+ Print(L"default pattern: '%s'\n", config->entry_default_pattern);
+ Print(L"editor: %s\n", yes_no(config->editor));
+ Print(L"auto-entries: %s\n", yes_no(config->auto_entries));
+ Print(L"auto-firmware: %s\n", yes_no(config->auto_firmware));
+
+ switch (config->random_seed_mode) {
+ case RANDOM_SEED_OFF:
+ Print(L"random-seed-mode: off\n");
+ break;
+ case RANDOM_SEED_WITH_SYSTEM_TOKEN:
+ Print(L"random-seed-mode: with-system-token\n");
+ break;
+ case RANDOM_SEED_ALWAYS:
+ Print(L"random-seed-mode: always\n");
+ break;
+ default:
+ ;
+ }
+
+ Print(L"\n");
+
+ Print(L"config entry count: %d\n", config->entry_count);
+ Print(L"entry selected idx: %d\n", config->idx_default);
+ if (config->idx_default_efivar >= 0)
+ Print(L"entry EFI var idx: %d\n", config->idx_default_efivar);
+ Print(L"\n");
+
+ if (efivar_get_int(L"LoaderConfigTimeout", &i) == EFI_SUCCESS)
+ Print(L"LoaderConfigTimeout: %u\n", i);
+
+ if (config->entry_oneshot)
+ Print(L"LoaderEntryOneShot: %s\n", config->entry_oneshot);
+ if (efivar_get(L"LoaderDevicePartUUID", &partstr) == EFI_SUCCESS)
+ Print(L"LoaderDevicePartUUID: %s\n", partstr);
+ if (efivar_get(L"LoaderEntryDefault", &defaultstr) == EFI_SUCCESS)
+ Print(L"LoaderEntryDefault: %s\n", defaultstr);
+
+ Print(L"\n--- press key ---\n\n");
+ console_key_read(&key, TRUE);
+
+ for (i = 0; i < config->entry_count; i++) {
+ ConfigEntry *entry;
+
+ if (key == KEYPRESS(0, SCAN_ESC, 0) || key == KEYPRESS(0, 0, 'q'))
+ break;
+
+ entry = config->entries[i];
+ Print(L"config entry: %d/%d\n", i+1, config->entry_count);
+ if (entry->id)
+ Print(L"id '%s'\n", entry->id);
+ Print(L"title show '%s'\n", entry->title_show);
+ if (entry->title)
+ Print(L"title '%s'\n", entry->title);
+ if (entry->version)
+ Print(L"version '%s'\n", entry->version);
+ if (entry->machine_id)
+ Print(L"machine-id '%s'\n", entry->machine_id);
+ if (entry->device) {
+ EFI_DEVICE_PATH *device_path;
+
+ device_path = DevicePathFromHandle(entry->device);
+ if (device_path) {
+ _cleanup_freepool_ CHAR16 *str;
+
+ str = DevicePathToStr(device_path);
+ Print(L"device handle '%s'\n", str);
+ }
+ }
+ if (entry->loader)
+ Print(L"loader '%s'\n", entry->loader);
+ if (entry->options)
+ Print(L"options '%s'\n", entry->options);
+ Print(L"auto-select %s\n", yes_no(!entry->no_autoselect));
+ if (entry->call)
+ Print(L"internal call yes\n");
+
+ if (entry->tries_left != (UINTN) -1)
+ Print(L"counting boots yes\n"
+ "tries done %u\n"
+ "tries left %u\n"
+ "current path %s\\%s\n"
+ "next path %s\\%s\n",
+ entry->tries_done,
+ entry->tries_left,
+ entry->path, entry->current_name,
+ entry->path, entry->next_name);
+
+ Print(L"\n--- press key ---\n\n");
+ console_key_read(&key, TRUE);
+ }
+
+ uefi_call_wrapper(ST->ConOut->ClearScreen, 1, ST->ConOut);
+}
+
+static BOOLEAN menu_run(
+ Config *config,
+ ConfigEntry **chosen_entry,
+ CHAR16 *loaded_image_path) {
+
+ EFI_STATUS err;
+ UINTN visible_max;
+ UINTN idx_highlight;
+ UINTN idx_highlight_prev;
+ UINTN idx_first;
+ UINTN idx_last;
+ BOOLEAN refresh;
+ BOOLEAN highlight;
+ UINTN i;
+ UINTN line_width;
+ CHAR16 **lines;
+ UINTN x_start;
+ UINTN y_start;
+ UINTN x_max;
+ UINTN y_max;
+ CHAR16 *status;
+ CHAR16 *clearline;
+ INTN timeout_remain;
+ INT16 idx;
+ BOOLEAN exit = FALSE;
+ BOOLEAN run = TRUE;
+ BOOLEAN wait = FALSE;
+
+ graphics_mode(FALSE);
+ uefi_call_wrapper(ST->ConIn->Reset, 2, ST->ConIn, FALSE);
+ uefi_call_wrapper(ST->ConOut->EnableCursor, 2, ST->ConOut, FALSE);
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut, EFI_LIGHTGRAY|EFI_BACKGROUND_BLACK);
+
+ /* draw a single character to make ClearScreen work on some firmware */
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, L" ");
+
+ if (config->console_mode_change != CONSOLE_MODE_KEEP) {
+ err = console_set_mode(&config->console_mode, config->console_mode_change);
+ if (EFI_ERROR(err)) {
+ uefi_call_wrapper(ST->ConOut->ClearScreen, 1, ST->ConOut);
+ Print(L"Error switching console mode to %ld: %r.\r", (UINT64)config->console_mode, err);
+ }
+ } else
+ uefi_call_wrapper(ST->ConOut->ClearScreen, 1, ST->ConOut);
+
+ err = uefi_call_wrapper(ST->ConOut->QueryMode, 4, ST->ConOut, ST->ConOut->Mode->Mode, &x_max, &y_max);
+ if (EFI_ERROR(err)) {
+ x_max = 80;
+ y_max = 25;
+ }
+
+ /* we check 10 times per second for a keystroke */
+ if (config->timeout_sec > 0)
+ timeout_remain = config->timeout_sec * 10;
+ else
+ timeout_remain = -1;
+
+ idx_highlight = config->idx_default;
+ idx_highlight_prev = 0;
+
+ visible_max = y_max - 2;
+
+ if ((UINTN)config->idx_default >= visible_max)
+ idx_first = config->idx_default-1;
+ else
+ idx_first = 0;
+
+ idx_last = idx_first + visible_max-1;
+
+ refresh = TRUE;
+ highlight = FALSE;
+
+ /* length of the longest entry */
+ line_width = 5;
+ for (i = 0; i < config->entry_count; i++) {
+ UINTN entry_len;
+
+ entry_len = StrLen(config->entries[i]->title_show);
+ if (line_width < entry_len)
+ line_width = entry_len;
+ }
+ if (line_width > x_max-6)
+ line_width = x_max-6;
+
+ /* offsets to center the entries on the screen */
+ x_start = (x_max - (line_width)) / 2;
+ if (config->entry_count < visible_max)
+ y_start = ((visible_max - config->entry_count) / 2) + 1;
+ else
+ y_start = 0;
+
+ /* menu entries title lines */
+ lines = AllocatePool(sizeof(CHAR16 *) * config->entry_count);
+ for (i = 0; i < config->entry_count; i++) {
+ UINTN j, k;
+
+ lines[i] = AllocatePool(((x_max+1) * sizeof(CHAR16)));
+ for (j = 0; j < x_start; j++)
+ lines[i][j] = ' ';
+
+ for (k = 0; config->entries[i]->title_show[k] != '\0' && j < x_max; j++, k++)
+ lines[i][j] = config->entries[i]->title_show[k];
+
+ for (; j < x_max; j++)
+ lines[i][j] = ' ';
+ lines[i][x_max] = '\0';
+ }
+
+ status = NULL;
+ clearline = AllocatePool((x_max+1) * sizeof(CHAR16));
+ for (i = 0; i < x_max; i++)
+ clearline[i] = ' ';
+ clearline[i] = 0;
+
+ while (!exit) {
+ UINT64 key;
+
+ if (refresh) {
+ for (i = 0; i < config->entry_count; i++) {
+ if (i < idx_first || i > idx_last)
+ continue;
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, 0, y_start + i - idx_first);
+ if (i == idx_highlight)
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut,
+ EFI_BLACK|EFI_BACKGROUND_LIGHTGRAY);
+ else
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut,
+ EFI_LIGHTGRAY|EFI_BACKGROUND_BLACK);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, lines[i]);
+ if ((INTN)i == config->idx_default_efivar) {
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, x_start-3, y_start + i - idx_first);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, L"=>");
+ }
+ }
+ refresh = FALSE;
+ } else if (highlight) {
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, 0, y_start + idx_highlight_prev - idx_first);
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut, EFI_LIGHTGRAY|EFI_BACKGROUND_BLACK);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, lines[idx_highlight_prev]);
+ if ((INTN)idx_highlight_prev == config->idx_default_efivar) {
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, x_start-3, y_start + idx_highlight_prev - idx_first);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, L"=>");
+ }
+
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, 0, y_start + idx_highlight - idx_first);
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut, EFI_BLACK|EFI_BACKGROUND_LIGHTGRAY);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, lines[idx_highlight]);
+ if ((INTN)idx_highlight == config->idx_default_efivar) {
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, x_start-3, y_start + idx_highlight - idx_first);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, L"=>");
+ }
+ highlight = FALSE;
+ }
+
+ if (timeout_remain > 0) {
+ FreePool(status);
+ status = PoolPrint(L"Boot in %d sec.", (timeout_remain + 5) / 10);
+ }
+
+ /* print status at last line of screen */
+ if (status) {
+ UINTN len;
+ UINTN x;
+
+ /* center line */
+ len = StrLen(status);
+ if (len < x_max)
+ x = (x_max - len) / 2;
+ else
+ x = 0;
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut, EFI_LIGHTGRAY|EFI_BACKGROUND_BLACK);
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, 0, y_max-1);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, clearline + (x_max - x));
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, status);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, clearline+1 + x + len);
+ }
+
+ err = console_key_read(&key, wait);
+ if (EFI_ERROR(err)) {
+ /* timeout reached */
+ if (timeout_remain == 0) {
+ exit = TRUE;
+ break;
+ }
+
+ /* sleep and update status */
+ if (timeout_remain > 0) {
+ uefi_call_wrapper(BS->Stall, 1, 100 * 1000);
+ timeout_remain--;
+ continue;
+ }
+
+ /* timeout disabled, wait for next key */
+ wait = TRUE;
+ continue;
+ }
+
+ timeout_remain = -1;
+
+ /* clear status after keystroke */
+ if (status) {
+ FreePool(status);
+ status = NULL;
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut, EFI_LIGHTGRAY|EFI_BACKGROUND_BLACK);
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, 0, y_max-1);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, clearline+1);
+ }
+
+ idx_highlight_prev = idx_highlight;
+
+ switch (key) {
+ case KEYPRESS(0, SCAN_UP, 0):
+ case KEYPRESS(0, 0, 'k'):
+ if (idx_highlight > 0)
+ idx_highlight--;
+ break;
+
+ case KEYPRESS(0, SCAN_DOWN, 0):
+ case KEYPRESS(0, 0, 'j'):
+ if (idx_highlight < config->entry_count-1)
+ idx_highlight++;
+ break;
+
+ case KEYPRESS(0, SCAN_HOME, 0):
+ case KEYPRESS(EFI_ALT_PRESSED, 0, '<'):
+ if (idx_highlight > 0) {
+ refresh = TRUE;
+ idx_highlight = 0;
+ }
+ break;
+
+ case KEYPRESS(0, SCAN_END, 0):
+ case KEYPRESS(EFI_ALT_PRESSED, 0, '>'):
+ if (idx_highlight < config->entry_count-1) {
+ refresh = TRUE;
+ idx_highlight = config->entry_count-1;
+ }
+ break;
+
+ case KEYPRESS(0, SCAN_PAGE_UP, 0):
+ if (idx_highlight > visible_max)
+ idx_highlight -= visible_max;
+ else
+ idx_highlight = 0;
+ break;
+
+ case KEYPRESS(0, SCAN_PAGE_DOWN, 0):
+ idx_highlight += visible_max;
+ if (idx_highlight > config->entry_count-1)
+ idx_highlight = config->entry_count-1;
+ break;
+
+ case KEYPRESS(0, 0, CHAR_LINEFEED):
+ case KEYPRESS(0, 0, CHAR_CARRIAGE_RETURN):
+ case KEYPRESS(0, CHAR_CARRIAGE_RETURN, 0): /* EZpad Mini 4s firmware sends malformed events */
+ case KEYPRESS(0, CHAR_CARRIAGE_RETURN, CHAR_CARRIAGE_RETURN): /* Teclast X98+ II firmware sends malformed events */
+ case KEYPRESS(0, SCAN_RIGHT, 0):
+ exit = TRUE;
+ break;
+
+ case KEYPRESS(0, SCAN_F1, 0):
+ case KEYPRESS(0, 0, 'h'):
+ case KEYPRESS(0, 0, '?'):
+ status = StrDuplicate(L"(d)efault, (t/T)timeout, (e)dit, (v)ersion (Q)uit (P)rint (h)elp");
+ break;
+
+ case KEYPRESS(0, 0, 'Q'):
+ exit = TRUE;
+ run = FALSE;
+ break;
+
+ case KEYPRESS(0, 0, 'd'):
+ if (config->idx_default_efivar != (INTN)idx_highlight) {
+ /* store the selected entry in a persistent EFI variable */
+ efivar_set(L"LoaderEntryDefault", config->entries[idx_highlight]->id, TRUE);
+ config->idx_default_efivar = idx_highlight;
+ status = StrDuplicate(L"Default boot entry selected.");
+ } else {
+ /* clear the default entry EFI variable */
+ efivar_set(L"LoaderEntryDefault", NULL, TRUE);
+ config->idx_default_efivar = -1;
+ status = StrDuplicate(L"Default boot entry cleared.");
+ }
+ refresh = TRUE;
+ break;
+
+ case KEYPRESS(0, 0, '-'):
+ case KEYPRESS(0, 0, 'T'):
+ if (config->timeout_sec_efivar > 0) {
+ config->timeout_sec_efivar--;
+ efivar_set_int(L"LoaderConfigTimeout", config->timeout_sec_efivar, TRUE);
+ if (config->timeout_sec_efivar > 0)
+ status = PoolPrint(L"Menu timeout set to %d sec.", config->timeout_sec_efivar);
+ else
+ status = StrDuplicate(L"Menu disabled. Hold down key at bootup to show menu.");
+ } else if (config->timeout_sec_efivar <= 0){
+ config->timeout_sec_efivar = -1;
+ efivar_set(L"LoaderConfigTimeout", NULL, TRUE);
+ if (config->timeout_sec_config > 0)
+ status = PoolPrint(L"Menu timeout of %d sec is defined by configuration file.",
+ config->timeout_sec_config);
+ else
+ status = StrDuplicate(L"Menu disabled. Hold down key at bootup to show menu.");
+ }
+ break;
+
+ case KEYPRESS(0, 0, '+'):
+ case KEYPRESS(0, 0, 't'):
+ if (config->timeout_sec_efivar == -1 && config->timeout_sec_config == 0)
+ config->timeout_sec_efivar++;
+ config->timeout_sec_efivar++;
+ efivar_set_int(L"LoaderConfigTimeout", config->timeout_sec_efivar, TRUE);
+ if (config->timeout_sec_efivar > 0)
+ status = PoolPrint(L"Menu timeout set to %d sec.",
+ config->timeout_sec_efivar);
+ else
+ status = StrDuplicate(L"Menu disabled. Hold down key at bootup to show menu.");
+ break;
+
+ case KEYPRESS(0, 0, 'e'):
+ /* only the options of configured entries can be edited */
+ if (!config->editor || config->entries[idx_highlight]->type == LOADER_UNDEFINED)
+ break;
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut, EFI_LIGHTGRAY|EFI_BACKGROUND_BLACK);
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, 0, y_max-1);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, clearline+1);
+ if (line_edit(config->entries[idx_highlight]->options, &config->options_edit, x_max-1, y_max-1))
+ exit = TRUE;
+ uefi_call_wrapper(ST->ConOut->SetCursorPosition, 3, ST->ConOut, 0, y_max-1);
+ uefi_call_wrapper(ST->ConOut->OutputString, 2, ST->ConOut, clearline+1);
+ break;
+
+ case KEYPRESS(0, 0, 'v'):
+ status = PoolPrint(L"systemd-boot " GIT_VERSION " (" EFI_MACHINE_TYPE_NAME "), UEFI Specification %d.%02d, Vendor %s %d.%02d",
+ ST->Hdr.Revision >> 16, ST->Hdr.Revision & 0xffff,
+ ST->FirmwareVendor, ST->FirmwareRevision >> 16, ST->FirmwareRevision & 0xffff);
+ break;
+
+ case KEYPRESS(0, 0, 'P'):
+ print_status(config, loaded_image_path);
+ refresh = TRUE;
+ break;
+
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, 'l'):
+ case KEYPRESS(EFI_CONTROL_PRESSED, 0, CHAR_CTRL('l')):
+ refresh = TRUE;
+ break;
+
+ default:
+ /* jump with a hotkey directly to a matching entry */
+ idx = entry_lookup_key(config, idx_highlight+1, KEYCHAR(key));
+ if (idx < 0)
+ break;
+ idx_highlight = idx;
+ refresh = TRUE;
+ }
+
+ if (idx_highlight > idx_last) {
+ idx_last = idx_highlight;
+ idx_first = 1 + idx_highlight - visible_max;
+ refresh = TRUE;
+ } else if (idx_highlight < idx_first) {
+ idx_first = idx_highlight;
+ idx_last = idx_highlight + visible_max-1;
+ refresh = TRUE;
+ }
+
+ if (!refresh && idx_highlight != idx_highlight_prev)
+ highlight = TRUE;
+ }
+
+ *chosen_entry = config->entries[idx_highlight];
+
+ for (i = 0; i < config->entry_count; i++)
+ FreePool(lines[i]);
+ FreePool(lines);
+ FreePool(clearline);
+
+ uefi_call_wrapper(ST->ConOut->SetAttribute, 2, ST->ConOut, EFI_WHITE|EFI_BACKGROUND_BLACK);
+ uefi_call_wrapper(ST->ConOut->ClearScreen, 1, ST->ConOut);
+ return run;
+}
+
+static VOID config_add_entry(Config *config, ConfigEntry *entry) {
+ if ((config->entry_count & 15) == 0) {
+ UINTN i;
+
+ i = config->entry_count + 16;
+ if (config->entry_count == 0)
+ config->entries = AllocatePool(sizeof(VOID *) * i);
+ else
+ config->entries = ReallocatePool(config->entries,
+ sizeof(VOID *) * config->entry_count, sizeof(VOID *) * i);
+ }
+ config->entries[config->entry_count++] = entry;
+}
+
+static VOID config_entry_free(ConfigEntry *entry) {
+ if (!entry)
+ return;
+
+ FreePool(entry->id);
+ FreePool(entry->title_show);
+ FreePool(entry->title);
+ FreePool(entry->version);
+ FreePool(entry->machine_id);
+ FreePool(entry->loader);
+ FreePool(entry->options);
+ FreePool(entry->path);
+ FreePool(entry->current_name);
+ FreePool(entry->next_name);
+ FreePool(entry);
+}
+
+static BOOLEAN is_digit(CHAR16 c) {
+ return (c >= '0') && (c <= '9');
+}
+
+static UINTN c_order(CHAR16 c) {
+ if (c == '\0')
+ return 0;
+ if (is_digit(c))
+ return 0;
+ else if ((c >= 'a') && (c <= 'z'))
+ return c;
+ else
+ return c + 0x10000;
+}
+
+static INTN str_verscmp(CHAR16 *s1, CHAR16 *s2) {
+ CHAR16 *os1 = s1;
+ CHAR16 *os2 = s2;
+
+ while (*s1 || *s2) {
+ INTN first;
+
+ while ((*s1 && !is_digit(*s1)) || (*s2 && !is_digit(*s2))) {
+ INTN order;
+
+ order = c_order(*s1) - c_order(*s2);
+ if (order != 0)
+ return order;
+ s1++;
+ s2++;
+ }
+
+ while (*s1 == '0')
+ s1++;
+ while (*s2 == '0')
+ s2++;
+
+ first = 0;
+ while (is_digit(*s1) && is_digit(*s2)) {
+ if (first == 0)
+ first = *s1 - *s2;
+ s1++;
+ s2++;
+ }
+
+ if (is_digit(*s1))
+ return 1;
+ if (is_digit(*s2))
+ return -1;
+
+ if (first != 0)
+ return first;
+ }
+
+ return StrCmp(os1, os2);
+}
+
+static CHAR8 *line_get_key_value(
+ CHAR8 *content,
+ CHAR8 *sep,
+ UINTN *pos,
+ CHAR8 **key_ret,
+ CHAR8 **value_ret) {
+
+ CHAR8 *line;
+ UINTN linelen;
+ CHAR8 *value;
+
+skip:
+ line = content + *pos;
+ if (*line == '\0')
+ return NULL;
+
+ linelen = 0;
+ while (line[linelen] && !strchra((CHAR8 *)"\n\r", line[linelen]))
+ linelen++;
+
+ /* move pos to next line */
+ *pos += linelen;
+ if (content[*pos])
+ (*pos)++;
+
+ /* empty line */
+ if (linelen == 0)
+ goto skip;
+
+ /* terminate line */
+ line[linelen] = '\0';
+
+ /* remove leading whitespace */
+ while (strchra((CHAR8 *)" \t", *line)) {
+ line++;
+ linelen--;
+ }
+
+ /* remove trailing whitespace */
+ while (linelen > 0 && strchra((CHAR8 *)" \t", line[linelen-1]))
+ linelen--;
+ line[linelen] = '\0';
+
+ if (*line == '#')
+ goto skip;
+
+ /* split key/value */
+ value = line;
+ while (*value && !strchra(sep, *value))
+ value++;
+ if (*value == '\0')
+ goto skip;
+ *value = '\0';
+ value++;
+ while (*value && strchra(sep, *value))
+ value++;
+
+ /* unquote */
+ if (value[0] == '"' && line[linelen-1] == '"') {
+ value++;
+ line[linelen-1] = '\0';
+ }
+
+ *key_ret = line;
+ *value_ret = value;
+ return line;
+}
+
+static VOID config_defaults_load_from_file(Config *config, CHAR8 *content) {
+ CHAR8 *line;
+ UINTN pos = 0;
+ CHAR8 *key, *value;
+
+ while ((line = line_get_key_value(content, (CHAR8 *)" \t", &pos, &key, &value))) {
+ if (strcmpa((CHAR8 *)"timeout", key) == 0) {
+ _cleanup_freepool_ CHAR16 *s = NULL;
+
+ s = stra_to_str(value);
+ config->timeout_sec_config = Atoi(s);
+ config->timeout_sec = config->timeout_sec_config;
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"default", key) == 0) {
+ FreePool(config->entry_default_pattern);
+ config->entry_default_pattern = stra_to_str(value);
+ StrLwr(config->entry_default_pattern);
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"editor", key) == 0) {
+ BOOLEAN on;
+
+ if (EFI_ERROR(parse_boolean(value, &on)))
+ continue;
+
+ config->editor = on;
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"auto-entries", key) == 0) {
+ BOOLEAN on;
+
+ if (EFI_ERROR(parse_boolean(value, &on)))
+ continue;
+
+ config->auto_entries = on;
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"auto-firmware", key) == 0) {
+ BOOLEAN on;
+
+ if (EFI_ERROR(parse_boolean(value, &on)))
+ continue;
+
+ config->auto_firmware = on;
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"console-mode", key) == 0) {
+ if (strcmpa((CHAR8 *)"auto", value) == 0)
+ config->console_mode_change = CONSOLE_MODE_AUTO;
+ else if (strcmpa((CHAR8 *)"max", value) == 0)
+ config->console_mode_change = CONSOLE_MODE_MAX;
+ else if (strcmpa((CHAR8 *)"keep", value) == 0)
+ config->console_mode_change = CONSOLE_MODE_KEEP;
+ else {
+ _cleanup_freepool_ CHAR16 *s = NULL;
+
+ s = stra_to_str(value);
+ config->console_mode = Atoi(s);
+ config->console_mode_change = CONSOLE_MODE_SET;
+ }
+
+ continue;
+ }
+
+ if (strcmpa((CHAR8*) "random-seed-mode", key) == 0) {
+ if (strcmpa((CHAR8*) "off", value) == 0)
+ config->random_seed_mode = RANDOM_SEED_OFF;
+ else if (strcmpa((CHAR8*) "with-system-token", value) == 0)
+ config->random_seed_mode = RANDOM_SEED_WITH_SYSTEM_TOKEN;
+ else if (strcmpa((CHAR8*) "always", value) == 0)
+ config->random_seed_mode = RANDOM_SEED_ALWAYS;
+ else {
+ BOOLEAN on;
+
+ if (EFI_ERROR(parse_boolean(value, &on)))
+ continue;
+
+ config->random_seed_mode = on ? RANDOM_SEED_ALWAYS : RANDOM_SEED_OFF;
+ }
+ }
+ }
+}
+
+static VOID config_entry_parse_tries(
+ ConfigEntry *entry,
+ CHAR16 *path,
+ CHAR16 *file,
+ CHAR16 *suffix) {
+
+ UINTN left = (UINTN) -1, done = (UINTN) -1, factor = 1, i, next_left, next_done;
+ _cleanup_freepool_ CHAR16 *prefix = NULL;
+
+ /*
+ * Parses a suffix of two counters (one going down, one going up) in the form "+LEFT-DONE" from the end of the
+ * filename (but before the .efi/.conf suffix), where the "-DONE" part is optional and may be left out (in
+ * which case that counter as assumed to be zero, i.e. the missing part is synonymous to "-0").
+ *
+ * Names we grok, and the series they result in:
+ *
+ * foobar+3.efi → foobar+2-1.efi → foobar+1-2.efi → foobar+0-3.efi → STOP!
+ * foobar+4-0.efi → foobar+3-1.efi → foobar+2-2.efi → foobar+1-3.efi → foobar+0-4.efi → STOP!
+ */
+
+ i = StrLen(file);
+
+ /* Chop off any suffix such as ".conf" or ".efi" */
+ if (suffix) {
+ UINTN suffix_length;
+
+ suffix_length = StrLen(suffix);
+ if (i < suffix_length)
+ return;
+
+ i -= suffix_length;
+ }
+
+ /* Go backwards through the string and parse everything we encounter */
+ for (;;) {
+ if (i == 0)
+ return;
+
+ i--;
+
+ switch (file[i]) {
+
+ case '+':
+ if (left == (UINTN) -1) /* didn't read at least one digit for 'left'? */
+ return;
+
+ if (done == (UINTN) -1) /* no 'done' counter? If so, it's equivalent to 0 */
+ done = 0;
+
+ goto good;
+
+ case '-':
+ if (left == (UINTN) -1) /* didn't parse any digit yet? */
+ return;
+
+ if (done != (UINTN) -1) /* already encountered a dash earlier? */
+ return;
+
+ /* So we encountered a dash. This means this counter is of the form +LEFT-DONE. Let's assign
+ * what we already parsed to 'done', and start fresh for the 'left' part. */
+
+ done = left;
+ left = (UINTN) -1;
+ factor = 1;
+ break;
+
+ case '0'...'9': {
+ UINTN new_factor;
+
+ if (left == (UINTN) -1)
+ left = file[i] - '0';
+ else {
+ UINTN new_left, digit;
+
+ digit = file[i] - '0';
+ if (digit > (UINTN) -1 / factor) /* overflow check */
+ return;
+
+ new_left = left + digit * factor;
+ if (new_left < left) /* overflow check */
+ return;
+
+ if (new_left == (UINTN) -1) /* don't allow us to be confused */
+ return;
+ }
+
+ new_factor = factor * 10;
+ if (new_factor < factor) /* overflow check */
+ return;
+
+ factor = new_factor;
+ break;
+ }
+
+ default:
+ return;
+ }
+ }
+
+good:
+ entry->tries_left = left;
+ entry->tries_done = done;
+
+ entry->path = StrDuplicate(path);
+ entry->current_name = StrDuplicate(file);
+
+ next_left = left <= 0 ? 0 : left - 1;
+ next_done = done >= (UINTN) -2 ? (UINTN) -2 : done + 1;
+
+ prefix = StrDuplicate(file);
+ prefix[i] = 0;
+
+ entry->next_name = PoolPrint(L"%s+%u-%u%s", prefix, next_left, next_done, suffix ?: L"");
+}
+
+static VOID config_entry_bump_counters(
+ ConfigEntry *entry,
+ EFI_FILE_HANDLE root_dir) {
+
+ _cleanup_freepool_ CHAR16* old_path = NULL, *new_path = NULL;
+ _cleanup_(FileHandleClosep) EFI_FILE_HANDLE handle = NULL;
+ static EFI_GUID EfiFileInfoGuid = EFI_FILE_INFO_ID;
+ _cleanup_freepool_ EFI_FILE_INFO *file_info = NULL;
+ UINTN file_info_size, a, b;
+ EFI_STATUS r;
+
+ if (entry->tries_left == (UINTN) -1)
+ return;
+
+ if (!entry->path || !entry->current_name || !entry->next_name)
+ return;
+
+ old_path = PoolPrint(L"%s\\%s", entry->path, entry->current_name);
+
+ r = uefi_call_wrapper(root_dir->Open, 5, root_dir, &handle, old_path, EFI_FILE_MODE_READ|EFI_FILE_MODE_WRITE, 0ULL);
+ if (EFI_ERROR(r))
+ return;
+
+ a = StrLen(entry->current_name);
+ b = StrLen(entry->next_name);
+
+ file_info_size = OFFSETOF(EFI_FILE_INFO, FileName) + (a > b ? a : b) + 1;
+
+ for (;;) {
+ file_info = AllocatePool(file_info_size);
+
+ r = uefi_call_wrapper(handle->GetInfo, 4, handle, &EfiFileInfoGuid, &file_info_size, file_info);
+ if (!EFI_ERROR(r))
+ break;
+
+ if (r != EFI_BUFFER_TOO_SMALL || file_info_size * 2 < file_info_size) {
+ Print(L"\nFailed to get file info for '%s': %r\n", old_path, r);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return;
+ }
+
+ file_info_size *= 2;
+ FreePool(file_info);
+ }
+
+ /* And rename the file */
+ StrCpy(file_info->FileName, entry->next_name);
+ r = uefi_call_wrapper(handle->SetInfo, 4, handle, &EfiFileInfoGuid, file_info_size, file_info);
+ if (EFI_ERROR(r)) {
+ Print(L"\nFailed to rename '%s' to '%s', ignoring: %r\n", old_path, entry->next_name, r);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return;
+ }
+
+ /* Flush everything to disk, just in case… */
+ (void) uefi_call_wrapper(handle->Flush, 1, handle);
+
+ /* Let's tell the OS that we renamed this file, so that it knows what to rename to the counter-less name on
+ * success */
+ new_path = PoolPrint(L"%s\\%s", entry->path, entry->next_name);
+ efivar_set(L"LoaderBootCountPath", new_path, FALSE);
+
+ /* If the file we just renamed is the loader path, then let's update that. */
+ if (StrCmp(entry->loader, old_path) == 0) {
+ FreePool(entry->loader);
+ entry->loader = TAKE_PTR(new_path);
+ }
+}
+
+static VOID config_entry_add_from_file(
+ Config *config,
+ EFI_HANDLE *device,
+ EFI_FILE *root_dir,
+ CHAR16 *path,
+ CHAR16 *file,
+ CHAR8 *content,
+ CHAR16 *loaded_image_path) {
+
+ ConfigEntry *entry;
+ CHAR8 *line;
+ UINTN pos = 0;
+ CHAR8 *key, *value;
+ EFI_STATUS err;
+ EFI_FILE_HANDLE handle;
+ _cleanup_freepool_ CHAR16 *initrd = NULL;
+
+ entry = AllocatePool(sizeof(ConfigEntry));
+
+ *entry = (ConfigEntry) {
+ .tries_done = (UINTN) -1,
+ .tries_left = (UINTN) -1,
+ };
+
+ while ((line = line_get_key_value(content, (CHAR8 *)" \t", &pos, &key, &value))) {
+ if (strcmpa((CHAR8 *)"title", key) == 0) {
+ FreePool(entry->title);
+ entry->title = stra_to_str(value);
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"version", key) == 0) {
+ FreePool(entry->version);
+ entry->version = stra_to_str(value);
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"machine-id", key) == 0) {
+ FreePool(entry->machine_id);
+ entry->machine_id = stra_to_str(value);
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"linux", key) == 0) {
+ FreePool(entry->loader);
+ entry->type = LOADER_LINUX;
+ entry->loader = stra_to_path(value);
+ entry->key = 'l';
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"efi", key) == 0) {
+ entry->type = LOADER_EFI;
+ FreePool(entry->loader);
+ entry->loader = stra_to_path(value);
+
+ /* do not add an entry for ourselves */
+ if (loaded_image_path && StriCmp(entry->loader, loaded_image_path) == 0) {
+ entry->type = LOADER_UNDEFINED;
+ break;
+ }
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"architecture", key) == 0) {
+ /* do not add an entry for an EFI image of architecture not matching with that of the image */
+ if (strcmpa((CHAR8 *)EFI_MACHINE_TYPE_NAME, value) != 0) {
+ entry->type = LOADER_UNDEFINED;
+ break;
+ }
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"initrd", key) == 0) {
+ _cleanup_freepool_ CHAR16 *new = NULL;
+
+ new = stra_to_path(value);
+ if (initrd) {
+ CHAR16 *s;
+
+ s = PoolPrint(L"%s initrd=%s", initrd, new);
+ FreePool(initrd);
+ initrd = s;
+ } else
+ initrd = PoolPrint(L"initrd=%s", new);
+
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"options", key) == 0) {
+ _cleanup_freepool_ CHAR16 *new = NULL;
+
+ new = stra_to_str(value);
+ if (entry->options) {
+ CHAR16 *s;
+
+ s = PoolPrint(L"%s %s", entry->options, new);
+ FreePool(entry->options);
+ entry->options = s;
+ } else
+ entry->options = TAKE_PTR(new);
+
+ continue;
+ }
+ }
+
+ if (entry->type == LOADER_UNDEFINED) {
+ config_entry_free(entry);
+ return;
+ }
+
+ /* check existence */
+ err = uefi_call_wrapper(root_dir->Open, 5, root_dir, &handle, entry->loader, EFI_FILE_MODE_READ, 0ULL);
+ if (EFI_ERROR(err)) {
+ config_entry_free(entry);
+ return;
+ }
+ uefi_call_wrapper(handle->Close, 1, handle);
+
+ /* add initrd= to options */
+ if (entry->type == LOADER_LINUX && initrd) {
+ if (entry->options) {
+ CHAR16 *s;
+
+ s = PoolPrint(L"%s %s", initrd, entry->options);
+ FreePool(entry->options);
+ entry->options = s;
+ } else
+ entry->options = TAKE_PTR(initrd);
+ }
+
+ entry->device = device;
+ entry->id = StrDuplicate(file);
+ StrLwr(entry->id);
+
+ config_add_entry(config, entry);
+
+ config_entry_parse_tries(entry, path, file, L".conf");
+}
+
+static VOID config_load_defaults(Config *config, EFI_FILE *root_dir) {
+ _cleanup_freepool_ CHAR8 *content = NULL;
+ UINTN sec;
+ EFI_STATUS err;
+
+ *config = (Config) {
+ .editor = TRUE,
+ .auto_entries = TRUE,
+ .auto_firmware = TRUE,
+ .random_seed_mode = RANDOM_SEED_WITH_SYSTEM_TOKEN,
+ };
+
+ err = file_read(root_dir, L"\\loader\\loader.conf", 0, 0, &content, NULL);
+ if (!EFI_ERROR(err))
+ config_defaults_load_from_file(config, content);
+
+ err = efivar_get_int(L"LoaderConfigTimeout", &sec);
+ if (!EFI_ERROR(err)) {
+ config->timeout_sec_efivar = sec > INTN_MAX ? INTN_MAX : sec;
+ config->timeout_sec = sec;
+ } else
+ config->timeout_sec_efivar = -1;
+
+ err = efivar_get_int(L"LoaderConfigTimeoutOneShot", &sec);
+ if (!EFI_ERROR(err)) {
+ /* Unset variable now, after all it's "one shot". */
+ (void) efivar_set(L"LoaderConfigTimeoutOneShot", NULL, TRUE);
+
+ config->timeout_sec = sec;
+ config->force_menu = TRUE; /* force the menu when this is set */
+ }
+}
+
+static VOID config_load_entries(
+ Config *config,
+ EFI_HANDLE *device,
+ EFI_FILE *root_dir,
+ CHAR16 *loaded_image_path) {
+
+ EFI_FILE_HANDLE entries_dir;
+ EFI_STATUS err;
+
+ err = uefi_call_wrapper(root_dir->Open, 5, root_dir, &entries_dir, L"\\loader\\entries", EFI_FILE_MODE_READ, 0ULL);
+ if (!EFI_ERROR(err)) {
+ for (;;) {
+ CHAR16 buf[256];
+ UINTN bufsize;
+ EFI_FILE_INFO *f;
+ _cleanup_freepool_ CHAR8 *content = NULL;
+ UINTN len;
+
+ bufsize = sizeof(buf);
+ err = uefi_call_wrapper(entries_dir->Read, 3, entries_dir, &bufsize, buf);
+ if (bufsize == 0 || EFI_ERROR(err))
+ break;
+
+ f = (EFI_FILE_INFO *) buf;
+ if (f->FileName[0] == '.')
+ continue;
+ if (f->Attribute & EFI_FILE_DIRECTORY)
+ continue;
+
+ len = StrLen(f->FileName);
+ if (len < 6)
+ continue;
+ if (StriCmp(f->FileName + len - 5, L".conf") != 0)
+ continue;
+ if (StrnCmp(f->FileName, L"auto-", 5) == 0)
+ continue;
+
+ err = file_read(entries_dir, f->FileName, 0, 0, &content, NULL);
+ if (!EFI_ERROR(err))
+ config_entry_add_from_file(config, device, root_dir, L"\\loader\\entries", f->FileName, content, loaded_image_path);
+ }
+ uefi_call_wrapper(entries_dir->Close, 1, entries_dir);
+ }
+}
+
+static INTN config_entry_compare(ConfigEntry *a, ConfigEntry *b) {
+ INTN r;
+
+ /* Order entries that have no tries left to the beginning of the list */
+ if (a->tries_left != 0 && b->tries_left == 0)
+ return 1;
+ if (a->tries_left == 0 && b->tries_left != 0)
+ return -1;
+
+ r = str_verscmp(a->id, b->id);
+ if (r != 0)
+ return r;
+
+ if (a->tries_left == (UINTN) -1 ||
+ b->tries_left == (UINTN) -1)
+ return 0;
+
+ /* If both items have boot counting, and otherwise are identical, put the entry with more tries left last */
+ if (a->tries_left > b->tries_left)
+ return 1;
+ if (a->tries_left < b->tries_left)
+ return -1;
+
+ /* If they have the same number of tries left, then let the one win which was tried fewer times so far */
+ if (a->tries_done < b->tries_done)
+ return 1;
+ if (a->tries_done > b->tries_done)
+ return -1;
+
+ return 0;
+}
+
+static VOID config_sort_entries(Config *config) {
+ UINTN i;
+
+ for (i = 1; i < config->entry_count; i++) {
+ BOOLEAN more;
+ UINTN k;
+
+ more = FALSE;
+ for (k = 0; k < config->entry_count - i; k++) {
+ ConfigEntry *entry;
+
+ if (config_entry_compare(config->entries[k], config->entries[k+1]) <= 0)
+ continue;
+
+ entry = config->entries[k];
+ config->entries[k] = config->entries[k+1];
+ config->entries[k+1] = entry;
+ more = TRUE;
+ }
+ if (!more)
+ break;
+ }
+}
+
+static INTN config_entry_find(Config *config, CHAR16 *id) {
+ UINTN i;
+
+ for (i = 0; i < config->entry_count; i++)
+ if (StrCmp(config->entries[i]->id, id) == 0)
+ return (INTN) i;
+
+ return -1;
+}
+
+static VOID config_default_entry_select(Config *config) {
+ _cleanup_freepool_ CHAR16 *entry_oneshot = NULL, *entry_default = NULL;
+ EFI_STATUS err;
+ INTN i;
+
+ /*
+ * The EFI variable to specify a boot entry for the next, and only the
+ * next reboot. The variable is always cleared directly after it is read.
+ */
+ err = efivar_get(L"LoaderEntryOneShot", &entry_oneshot);
+ if (!EFI_ERROR(err)) {
+
+ config->entry_oneshot = StrDuplicate(entry_oneshot);
+ efivar_set(L"LoaderEntryOneShot", NULL, TRUE);
+
+ i = config_entry_find(config, entry_oneshot);
+ if (i >= 0) {
+ config->idx_default = i;
+ return;
+ }
+ }
+
+ /*
+ * The EFI variable to select the default boot entry overrides the
+ * configured pattern. The variable can be set and cleared by pressing
+ * the 'd' key in the loader selection menu, the entry is marked with
+ * an '*'.
+ */
+ err = efivar_get(L"LoaderEntryDefault", &entry_default);
+ if (!EFI_ERROR(err)) {
+
+ i = config_entry_find(config, entry_default);
+ if (i >= 0) {
+ config->idx_default = i;
+ config->idx_default_efivar = i;
+ return;
+ }
+ }
+ config->idx_default_efivar = -1;
+
+ if (config->entry_count == 0)
+ return;
+
+ /*
+ * Match the pattern from the end of the list to the start, find last
+ * entry (largest number) matching the given pattern.
+ */
+ if (config->entry_default_pattern) {
+ i = config->entry_count;
+ while (i--) {
+ if (config->entries[i]->no_autoselect)
+ continue;
+ if (MetaiMatch(config->entries[i]->id, config->entry_default_pattern)) {
+ config->idx_default = i;
+ return;
+ }
+ }
+ }
+
+ /* select the last suitable entry */
+ i = config->entry_count;
+ while (i--) {
+ if (config->entries[i]->no_autoselect)
+ continue;
+ config->idx_default = i;
+ return;
+ }
+
+ /* no entry found */
+ config->idx_default = -1;
+}
+
+static BOOLEAN find_nonunique(ConfigEntry **entries, UINTN entry_count) {
+ BOOLEAN non_unique = FALSE;
+ UINTN i, k;
+
+ for (i = 0; i < entry_count; i++)
+ entries[i]->non_unique = FALSE;
+
+ for (i = 0; i < entry_count; i++)
+ for (k = 0; k < entry_count; k++) {
+ if (i == k)
+ continue;
+ if (StrCmp(entries[i]->title_show, entries[k]->title_show) != 0)
+ continue;
+
+ non_unique = entries[i]->non_unique = entries[k]->non_unique = TRUE;
+ }
+
+ return non_unique;
+}
+
+/* generate a unique title, avoiding non-distinguishable menu entries */
+static VOID config_title_generate(Config *config) {
+ UINTN i;
+
+ /* set title */
+ for (i = 0; i < config->entry_count; i++) {
+ CHAR16 *title;
+
+ FreePool(config->entries[i]->title_show);
+ title = config->entries[i]->title;
+ if (!title)
+ title = config->entries[i]->id;
+ config->entries[i]->title_show = StrDuplicate(title);
+ }
+
+ if (!find_nonunique(config->entries, config->entry_count))
+ return;
+
+ /* add version to non-unique titles */
+ for (i = 0; i < config->entry_count; i++) {
+ CHAR16 *s;
+
+ if (!config->entries[i]->non_unique)
+ continue;
+ if (!config->entries[i]->version)
+ continue;
+
+ s = PoolPrint(L"%s (%s)", config->entries[i]->title_show, config->entries[i]->version);
+ FreePool(config->entries[i]->title_show);
+ config->entries[i]->title_show = s;
+ }
+
+ if (!find_nonunique(config->entries, config->entry_count))
+ return;
+
+ /* add machine-id to non-unique titles */
+ for (i = 0; i < config->entry_count; i++) {
+ CHAR16 *s;
+ _cleanup_freepool_ CHAR16 *m = NULL;
+
+ if (!config->entries[i]->non_unique)
+ continue;
+ if (!config->entries[i]->machine_id)
+ continue;
+
+ m = StrDuplicate(config->entries[i]->machine_id);
+ m[8] = '\0';
+ s = PoolPrint(L"%s (%s)", config->entries[i]->title_show, m);
+ FreePool(config->entries[i]->title_show);
+ config->entries[i]->title_show = s;
+ }
+
+ if (!find_nonunique(config->entries, config->entry_count))
+ return;
+
+ /* add file name to non-unique titles */
+ for (i = 0; i < config->entry_count; i++) {
+ CHAR16 *s;
+
+ if (!config->entries[i]->non_unique)
+ continue;
+ s = PoolPrint(L"%s (%s)", config->entries[i]->title_show, config->entries[i]->id);
+ FreePool(config->entries[i]->title_show);
+ config->entries[i]->title_show = s;
+ config->entries[i]->non_unique = FALSE;
+ }
+}
+
+static BOOLEAN config_entry_add_call(
+ Config *config,
+ CHAR16 *id,
+ CHAR16 *title,
+ EFI_STATUS (*call)(VOID)) {
+
+ ConfigEntry *entry;
+
+ entry = AllocatePool(sizeof(ConfigEntry));
+ *entry = (ConfigEntry) {
+ .id = StrDuplicate(id),
+ .title = StrDuplicate(title),
+ .call = call,
+ .no_autoselect = TRUE,
+ .tries_done = (UINTN) -1,
+ .tries_left = (UINTN) -1,
+ };
+
+ config_add_entry(config, entry);
+ return TRUE;
+}
+
+static ConfigEntry *config_entry_add_loader(
+ Config *config,
+ EFI_HANDLE *device,
+ enum loader_type type,
+ CHAR16 *id,
+ CHAR16 key,
+ CHAR16 *title,
+ CHAR16 *loader,
+ CHAR16 *version) {
+
+ ConfigEntry *entry;
+
+ entry = AllocatePool(sizeof(ConfigEntry));
+ *entry = (ConfigEntry) {
+ .type = type,
+ .title = StrDuplicate(title),
+ .version = StrDuplicate(version),
+ .device = device,
+ .loader = StrDuplicate(loader),
+ .id = StrDuplicate(id),
+ .key = key,
+ .tries_done = (UINTN) -1,
+ .tries_left = (UINTN) -1,
+ };
+
+ StrLwr(entry->id);
+
+ config_add_entry(config, entry);
+ return entry;
+}
+
+static BOOLEAN config_entry_add_loader_auto(
+ Config *config,
+ EFI_HANDLE *device,
+ EFI_FILE *root_dir,
+ CHAR16 *loaded_image_path,
+ CHAR16 *id,
+ CHAR16 key,
+ CHAR16 *title,
+ CHAR16 *loader) {
+
+ EFI_FILE_HANDLE handle;
+ ConfigEntry *entry;
+ EFI_STATUS err;
+
+ if (!config->auto_entries)
+ return FALSE;
+
+ /* do not add an entry for ourselves */
+ if (loaded_image_path) {
+ UINTN len;
+ _cleanup_freepool_ CHAR8 *content = NULL;
+
+ if (StriCmp(loader, loaded_image_path) == 0)
+ return FALSE;
+
+ /* look for systemd-boot magic string */
+ err = file_read(root_dir, loader, 0, 100*1024, &content, &len);
+ if (!EFI_ERROR(err)) {
+ CHAR8 *start = content;
+ CHAR8 *last = content + len - sizeof(magic) - 1;
+
+ for (; start <= last; start++)
+ if (start[0] == magic[0] && CompareMem(start, magic, sizeof(magic) - 1) == 0)
+ return FALSE;
+ }
+ }
+
+ /* check existence */
+ err = uefi_call_wrapper(root_dir->Open, 5, root_dir, &handle, loader, EFI_FILE_MODE_READ, 0ULL);
+ if (EFI_ERROR(err))
+ return FALSE;
+ uefi_call_wrapper(handle->Close, 1, handle);
+
+ entry = config_entry_add_loader(config, device, LOADER_UNDEFINED, id, key, title, loader, NULL);
+ if (!entry)
+ return FALSE;
+
+ /* do not boot right away into auto-detected entries */
+ entry->no_autoselect = TRUE;
+
+ return TRUE;
+}
+
+static VOID config_entry_add_osx(Config *config) {
+ EFI_STATUS err;
+ UINTN handle_count = 0;
+ _cleanup_freepool_ EFI_HANDLE *handles = NULL;
+
+ if (!config->auto_entries)
+ return;
+
+ err = LibLocateHandle(ByProtocol, &FileSystemProtocol, NULL, &handle_count, &handles);
+ if (!EFI_ERROR(err)) {
+ UINTN i;
+
+ for (i = 0; i < handle_count; i++) {
+ EFI_FILE *root;
+ BOOLEAN found;
+
+ root = LibOpenRoot(handles[i]);
+ if (!root)
+ continue;
+ found = config_entry_add_loader_auto(config, handles[i], root, NULL, L"auto-osx", 'a', L"macOS",
+ L"\\System\\Library\\CoreServices\\boot.efi");
+ uefi_call_wrapper(root->Close, 1, root);
+ if (found)
+ break;
+ }
+ }
+}
+
+static VOID config_entry_add_linux(
+ Config *config,
+ EFI_HANDLE *device,
+ EFI_FILE *root_dir) {
+
+ EFI_FILE_HANDLE linux_dir;
+ EFI_STATUS err;
+ ConfigEntry *entry;
+
+ err = uefi_call_wrapper(root_dir->Open, 5, root_dir, &linux_dir, L"\\EFI\\Linux", EFI_FILE_MODE_READ, 0ULL);
+ if (EFI_ERROR(err))
+ return;
+
+ for (;;) {
+ CHAR16 buf[256];
+ UINTN bufsize = sizeof buf;
+ EFI_FILE_INFO *f;
+ CHAR8 *sections[] = {
+ (CHAR8 *)".osrel",
+ (CHAR8 *)".cmdline",
+ NULL
+ };
+ UINTN offs[ELEMENTSOF(sections)-1] = {};
+ UINTN szs[ELEMENTSOF(sections)-1] = {};
+ UINTN addrs[ELEMENTSOF(sections)-1] = {};
+ CHAR8 *content = NULL;
+ UINTN len;
+ CHAR8 *line;
+ UINTN pos = 0;
+ CHAR8 *key, *value;
+ CHAR16 *os_name_pretty = NULL;
+ CHAR16 *os_name = NULL;
+ CHAR16 *os_id = NULL;
+ CHAR16 *os_version = NULL;
+ CHAR16 *os_version_id = NULL;
+ CHAR16 *os_build_id = NULL;
+
+ err = uefi_call_wrapper(linux_dir->Read, 3, linux_dir, &bufsize, buf);
+ if (bufsize == 0 || EFI_ERROR(err))
+ break;
+
+ f = (EFI_FILE_INFO *) buf;
+ if (f->FileName[0] == '.')
+ continue;
+ if (f->Attribute & EFI_FILE_DIRECTORY)
+ continue;
+ len = StrLen(f->FileName);
+ if (len < 5)
+ continue;
+ if (StriCmp(f->FileName + len - 4, L".efi") != 0)
+ continue;
+ if (StrnCmp(f->FileName, L"auto-", 5) == 0)
+ continue;
+
+ /* look for .osrel and .cmdline sections in the .efi binary */
+ err = pe_file_locate_sections(linux_dir, f->FileName, sections, addrs, offs, szs);
+ if (EFI_ERROR(err))
+ continue;
+
+ err = file_read(linux_dir, f->FileName, offs[0], szs[0], &content, NULL);
+ if (EFI_ERROR(err))
+ continue;
+
+ /* read properties from the embedded os-release file */
+ while ((line = line_get_key_value(content, (CHAR8 *)"=", &pos, &key, &value))) {
+ if (strcmpa((CHAR8 *)"PRETTY_NAME", key) == 0) {
+ FreePool(os_name_pretty);
+ os_name_pretty = stra_to_str(value);
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"NAME", key) == 0) {
+ FreePool(os_name);
+ os_name = stra_to_str(value);
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"ID", key) == 0) {
+ FreePool(os_id);
+ os_id = stra_to_str(value);
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"VERSION", key) == 0) {
+ FreePool(os_version);
+ os_version = stra_to_str(value);
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"VERSION_ID", key) == 0) {
+ FreePool(os_version_id);
+ os_version_id = stra_to_str(value);
+ continue;
+ }
+
+ if (strcmpa((CHAR8 *)"BUILD_ID", key) == 0) {
+ FreePool(os_build_id);
+ os_build_id = stra_to_str(value);
+ continue;
+ }
+ }
+
+ if ((os_name_pretty || os_name) && os_id && (os_version || os_version_id || os_build_id)) {
+ _cleanup_freepool_ CHAR16 *path = NULL;
+
+ path = PoolPrint(L"\\EFI\\Linux\\%s", f->FileName);
+
+ entry = config_entry_add_loader(config, device, LOADER_LINUX, f->FileName, 'l',
+ os_name_pretty ? : (os_name ? : os_id), path,
+ os_version ? : (os_version_id ? : os_build_id));
+
+ FreePool(content);
+ content = NULL;
+
+ /* read the embedded cmdline file */
+ err = file_read(linux_dir, f->FileName, offs[1], szs[1], &content, NULL);
+ if (!EFI_ERROR(err)) {
+
+ /* chomp the newline */
+ if (content[szs[1]-1] == '\n')
+ content[szs[1]-1] = '\0';
+
+ entry->options = stra_to_str(content);
+ }
+
+ config_entry_parse_tries(entry, L"\\EFI\\Linux", f->FileName, L".efi");
+ }
+
+ FreePool(os_name_pretty);
+ FreePool(os_name);
+ FreePool(os_id);
+ FreePool(os_version);
+ FreePool(os_version_id);
+ FreePool(os_build_id);
+ FreePool(content);
+ }
+
+ uefi_call_wrapper(linux_dir->Close, 1, linux_dir);
+}
+
+/* Note that this is in GUID format, i.e. the first 32bit, and the following pair of 16bit are byteswapped. */
+static const UINT8 xbootldr_guid[16] = {
+ 0xff, 0xc2, 0x13, 0xbc, 0xe6, 0x59, 0x62, 0x42, 0xa3, 0x52, 0xb2, 0x75, 0xfd, 0x6f, 0x71, 0x72
+};
+
+EFI_DEVICE_PATH *path_parent(EFI_DEVICE_PATH *path, EFI_DEVICE_PATH *node) {
+ EFI_DEVICE_PATH *parent;
+ UINTN len;
+
+ len = (UINT8*) NextDevicePathNode(node) - (UINT8*) path;
+ parent = (EFI_DEVICE_PATH*) AllocatePool(len + sizeof(EFI_DEVICE_PATH));
+ CopyMem(parent, path, len);
+ CopyMem((UINT8*) parent + len, EndDevicePath, sizeof(EFI_DEVICE_PATH));
+
+ return parent;
+}
+
+static VOID config_load_xbootldr(
+ Config *config,
+ EFI_HANDLE *device) {
+
+ EFI_DEVICE_PATH *partition_path, *node, *disk_path, *copy;
+ UINT32 found_partition_number = (UINT32) -1;
+ UINT64 found_partition_start = (UINT64) -1;
+ UINT64 found_partition_size = (UINT64) -1;
+ UINT8 found_partition_signature[16] = {};
+ EFI_HANDLE new_device;
+ EFI_FILE *root_dir;
+ EFI_STATUS r;
+
+ partition_path = DevicePathFromHandle(device);
+ if (!partition_path)
+ return;
+
+ for (node = partition_path; !IsDevicePathEnd(node); node = NextDevicePathNode(node)) {
+ EFI_HANDLE disk_handle;
+ EFI_BLOCK_IO *block_io;
+ EFI_DEVICE_PATH *p;
+ UINTN nr;
+
+ /* First, Let's look for the SCSI/SATA/USB/… device path node, i.e. one above the media
+ * devices */
+ if (DevicePathType(node) != MESSAGING_DEVICE_PATH)
+ continue;
+
+ /* Determine the device path one level up */
+ disk_path = path_parent(partition_path, node);
+ p = disk_path;
+ r = uefi_call_wrapper(BS->LocateDevicePath, 3, &BlockIoProtocol, &p, &disk_handle);
+ if (EFI_ERROR(r))
+ continue;
+
+ r = uefi_call_wrapper(BS->HandleProtocol, 3, disk_handle, &BlockIoProtocol, (VOID **)&block_io);
+ if (EFI_ERROR(r))
+ continue;
+
+ /* Filter out some block devices early. (We only care about block devices that aren't
+ * partitions themselves — we look for GPT partition tables to parse after all —, and only
+ * those which contain a medium and have at least 2 blocks.) */
+ if (block_io->Media->LogicalPartition ||
+ !block_io->Media->MediaPresent ||
+ block_io->Media->LastBlock <= 1)
+ continue;
+
+ /* Try both copies of the GPT header, in case one is corrupted */
+ for (nr = 0; nr < 2; nr++) {
+ _cleanup_freepool_ EFI_PARTITION_ENTRY* entries = NULL;
+ union {
+ EFI_PARTITION_TABLE_HEADER gpt_header;
+ uint8_t space[((sizeof(EFI_PARTITION_TABLE_HEADER) + 511) / 512) * 512];
+ } gpt_header_buffer;
+ const EFI_PARTITION_TABLE_HEADER *h = &gpt_header_buffer.gpt_header;
+ UINT64 where;
+ UINTN i, sz;
+ UINT32 c;
+
+ if (nr == 0)
+ /* Read the first copy at LBA 1 */
+ where = 1;
+ else
+ /* Read the second copy at the very last LBA of this block device */
+ where = block_io->Media->LastBlock;
+
+ /* Read the GPT header */
+ r = uefi_call_wrapper(block_io->ReadBlocks, 5,
+ block_io,
+ block_io->Media->MediaId,
+ where,
+ sizeof(gpt_header_buffer), &gpt_header_buffer);
+ if (EFI_ERROR(r))
+ continue;
+
+ /* Some superficial validation of the GPT header */
+ c = CompareMem(&h->Header.Signature, "EFI PART", sizeof(h->Header.Signature));
+ if (c != 0)
+ continue;
+
+ if (h->Header.HeaderSize < 92 ||
+ h->Header.HeaderSize > 512)
+ continue;
+
+ if (h->Header.Revision != 0x00010000U)
+ continue;
+
+ /* Calculate CRC check */
+ c = ~crc32_exclude_offset((UINT32) -1,
+ (const UINT8*) &gpt_header_buffer,
+ h->Header.HeaderSize,
+ OFFSETOF(EFI_PARTITION_TABLE_HEADER, Header.CRC32),
+ sizeof(h->Header.CRC32));
+ if (c != h->Header.CRC32)
+ continue;
+
+ if (h->MyLBA != where)
+ continue;
+
+ if (h->SizeOfPartitionEntry < sizeof(EFI_PARTITION_ENTRY))
+ continue;
+
+ if (h->NumberOfPartitionEntries <= 0 ||
+ h->NumberOfPartitionEntries > 1024)
+ continue;
+
+ if (h->SizeOfPartitionEntry > UINTN_MAX / h->NumberOfPartitionEntries) /* overflow check */
+ continue;
+
+ /* Now load the GPT entry table */
+ sz = ALIGN_TO((UINTN) h->SizeOfPartitionEntry * (UINTN) h->NumberOfPartitionEntries, 512);
+ entries = AllocatePool(sz);
+
+ r = uefi_call_wrapper(block_io->ReadBlocks, 5,
+ block_io,
+ block_io->Media->MediaId,
+ h->PartitionEntryLBA,
+ sz, entries);
+ if (EFI_ERROR(r))
+ continue;
+
+ /* Calculate CRC of entries array, too */
+ c = ~crc32((UINT32) -1, entries, sz);
+ if (c != h->PartitionEntryArrayCRC32)
+ continue;
+
+ for (i = 0; i < h->NumberOfPartitionEntries; i++) {
+ EFI_PARTITION_ENTRY *entry;
+
+ entry = (EFI_PARTITION_ENTRY*) ((UINT8*) entries + h->SizeOfPartitionEntry * i);
+
+ if (CompareMem(&entry->PartitionTypeGUID, xbootldr_guid, 16) == 0) {
+ UINT64 end;
+
+ /* Let's use memcpy(), in case the structs are not aligned (they really should be though) */
+ CopyMem(&found_partition_start, &entry->StartingLBA, sizeof(found_partition_start));
+ CopyMem(&end, &entry->EndingLBA, sizeof(end));
+
+ if (end < found_partition_start) /* Bogus? */
+ continue;
+
+ found_partition_size = end - found_partition_start + 1;
+ CopyMem(found_partition_signature, &entry->UniquePartitionGUID, sizeof(found_partition_signature));
+
+ found_partition_number = i + 1;
+ goto found;
+ }
+ }
+
+ break; /* This GPT was fully valid, but we didn't find what we are looking for. This
+ * means there's no reason to check the second copy of the GPT header */
+ }
+ }
+
+ return; /* Not found */
+
+found:
+ copy = DuplicateDevicePath(partition_path);
+
+ /* Patch in the data we found */
+ for (node = copy; !IsDevicePathEnd(node); node = NextDevicePathNode(node)) {
+ HARDDRIVE_DEVICE_PATH *hd;
+
+ if (DevicePathType(node) != MEDIA_DEVICE_PATH)
+ continue;
+
+ if (DevicePathSubType(node) != MEDIA_HARDDRIVE_DP)
+ continue;
+
+ hd = (HARDDRIVE_DEVICE_PATH*) node;
+ hd->PartitionNumber = found_partition_number;
+ hd->PartitionStart = found_partition_start;
+ hd->PartitionSize = found_partition_size;
+ CopyMem(hd->Signature, found_partition_signature, sizeof(hd->Signature));
+ hd->MBRType = MBR_TYPE_EFI_PARTITION_TABLE_HEADER;
+ hd->SignatureType = SIGNATURE_TYPE_GUID;
+ }
+
+ r = uefi_call_wrapper(BS->LocateDevicePath, 3, &BlockIoProtocol, &copy, &new_device);
+ if (EFI_ERROR(r))
+ return;
+
+ root_dir = LibOpenRoot(new_device);
+ if (!root_dir)
+ return;
+
+ config_entry_add_linux(config, new_device, root_dir);
+ config_load_entries(config, new_device, root_dir, NULL);
+}
+
+static EFI_STATUS image_start(
+ EFI_HANDLE parent_image,
+ const Config *config,
+ const ConfigEntry *entry) {
+
+ EFI_HANDLE image;
+ _cleanup_freepool_ EFI_DEVICE_PATH *path = NULL;
+ CHAR16 *options;
+ EFI_STATUS err;
+
+ path = FileDevicePath(entry->device, entry->loader);
+ if (!path) {
+ Print(L"Error getting device path.");
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return EFI_INVALID_PARAMETER;
+ }
+
+ err = uefi_call_wrapper(BS->LoadImage, 6, FALSE, parent_image, path, NULL, 0, &image);
+ if (EFI_ERROR(err)) {
+ Print(L"Error loading %s: %r", entry->loader, err);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return err;
+ }
+
+ if (config->options_edit)
+ options = config->options_edit;
+ else if (entry->options)
+ options = entry->options;
+ else
+ options = NULL;
+ if (options) {
+ EFI_LOADED_IMAGE *loaded_image;
+
+ err = uefi_call_wrapper(BS->OpenProtocol, 6, image, &LoadedImageProtocol, (VOID **)&loaded_image,
+ parent_image, NULL, EFI_OPEN_PROTOCOL_GET_PROTOCOL);
+ if (EFI_ERROR(err)) {
+ Print(L"Error getting LoadedImageProtocol handle: %r", err);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ goto out_unload;
+ }
+ loaded_image->LoadOptions = options;
+ loaded_image->LoadOptionsSize = (StrLen(loaded_image->LoadOptions)+1) * sizeof(CHAR16);
+
+#if ENABLE_TPM
+ /* Try to log any options to the TPM, especially to catch manually edited options */
+ err = tpm_log_event(SD_TPM_PCR,
+ (EFI_PHYSICAL_ADDRESS) (UINTN) loaded_image->LoadOptions,
+ loaded_image->LoadOptionsSize, loaded_image->LoadOptions);
+ if (EFI_ERROR(err)) {
+ Print(L"Unable to add image options measurement: %r", err);
+ uefi_call_wrapper(BS->Stall, 1, 200 * 1000);
+ }
+#endif
+ }
+
+ efivar_set_time_usec(L"LoaderTimeExecUSec", 0);
+ err = uefi_call_wrapper(BS->StartImage, 3, image, NULL, NULL);
+out_unload:
+ uefi_call_wrapper(BS->UnloadImage, 1, image);
+ return err;
+}
+
+static EFI_STATUS reboot_into_firmware(VOID) {
+ _cleanup_freepool_ CHAR8 *b = NULL;
+ UINTN size;
+ UINT64 osind;
+ EFI_STATUS err;
+
+ osind = EFI_OS_INDICATIONS_BOOT_TO_FW_UI;
+
+ err = efivar_get_raw(&global_guid, L"OsIndications", &b, &size);
+ if (!EFI_ERROR(err))
+ osind |= (UINT64)*b;
+
+ err = efivar_set_raw(&global_guid, L"OsIndications", &osind, sizeof(UINT64), TRUE);
+ if (EFI_ERROR(err))
+ return err;
+
+ err = uefi_call_wrapper(RT->ResetSystem, 4, EfiResetCold, EFI_SUCCESS, 0, NULL);
+ Print(L"Error calling ResetSystem: %r", err);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return err;
+}
+
+static VOID config_free(Config *config) {
+ UINTN i;
+
+ for (i = 0; i < config->entry_count; i++)
+ config_entry_free(config->entries[i]);
+ FreePool(config->entries);
+ FreePool(config->entry_default_pattern);
+ FreePool(config->options_edit);
+ FreePool(config->entry_oneshot);
+}
+
+static VOID config_write_entries_to_variable(Config *config) {
+ _cleanup_freepool_ CHAR16 *buffer = NULL;
+ UINTN i, sz = 0;
+ CHAR16 *p;
+
+ for (i = 0; i < config->entry_count; i++)
+ sz += StrLen(config->entries[i]->id) + 1;
+
+ p = buffer = AllocatePool(sz * sizeof(CHAR16));
+
+ for (i = 0; i < config->entry_count; i++) {
+ UINTN l;
+
+ l = StrLen(config->entries[i]->id) + 1;
+ CopyMem(p, config->entries[i]->id, l * sizeof(CHAR16));
+
+ p += l;
+ }
+
+ /* Store the full list of discovered entries. */
+ (void) efivar_set_raw(&loader_guid, L"LoaderEntries", buffer, (UINT8*) p - (UINT8*) buffer, FALSE);
+}
+
+EFI_STATUS efi_main(EFI_HANDLE image, EFI_SYSTEM_TABLE *sys_table) {
+ static const UINT64 loader_features =
+ EFI_LOADER_FEATURE_CONFIG_TIMEOUT |
+ EFI_LOADER_FEATURE_CONFIG_TIMEOUT_ONE_SHOT |
+ EFI_LOADER_FEATURE_ENTRY_DEFAULT |
+ EFI_LOADER_FEATURE_ENTRY_ONESHOT |
+ EFI_LOADER_FEATURE_BOOT_COUNTING |
+ EFI_LOADER_FEATURE_XBOOTLDR |
+ EFI_LOADER_FEATURE_RANDOM_SEED |
+ 0;
+
+ _cleanup_freepool_ CHAR16 *infostr = NULL, *typestr = NULL;
+ CHAR8 *b;
+ UINTN size;
+ EFI_LOADED_IMAGE *loaded_image;
+ EFI_FILE *root_dir;
+ CHAR16 *loaded_image_path;
+ EFI_STATUS err;
+ Config config;
+ UINT64 init_usec;
+ BOOLEAN menu = FALSE;
+ CHAR16 uuid[37];
+
+ InitializeLib(image, sys_table);
+ init_usec = time_usec();
+ efivar_set_time_usec(L"LoaderTimeInitUSec", init_usec);
+ efivar_set(L"LoaderInfo", L"systemd-boot " GIT_VERSION, FALSE);
+
+ infostr = PoolPrint(L"%s %d.%02d", ST->FirmwareVendor, ST->FirmwareRevision >> 16, ST->FirmwareRevision & 0xffff);
+ efivar_set(L"LoaderFirmwareInfo", infostr, FALSE);
+
+ typestr = PoolPrint(L"UEFI %d.%02d", ST->Hdr.Revision >> 16, ST->Hdr.Revision & 0xffff);
+ efivar_set(L"LoaderFirmwareType", typestr, FALSE);
+
+ (void) efivar_set_raw(&loader_guid, L"LoaderFeatures", &loader_features, sizeof(loader_features), FALSE);
+
+ err = uefi_call_wrapper(BS->OpenProtocol, 6, image, &LoadedImageProtocol, (VOID **)&loaded_image,
+ image, NULL, EFI_OPEN_PROTOCOL_GET_PROTOCOL);
+ if (EFI_ERROR(err)) {
+ Print(L"Error getting a LoadedImageProtocol handle: %r", err);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return err;
+ }
+
+ /* export the device path this image is started from */
+ if (disk_get_part_uuid(loaded_image->DeviceHandle, uuid) == EFI_SUCCESS)
+ efivar_set(L"LoaderDevicePartUUID", uuid, FALSE);
+
+ root_dir = LibOpenRoot(loaded_image->DeviceHandle);
+ if (!root_dir) {
+ Print(L"Unable to open root directory.");
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return EFI_LOAD_ERROR;
+ }
+
+ if (secure_boot_enabled() && shim_loaded()) {
+ err = security_policy_install();
+ if (EFI_ERROR(err)) {
+ Print(L"Error installing security policy: %r ", err);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return err;
+ }
+ }
+
+ /* the filesystem path to this image, to prevent adding ourselves to the menu */
+ loaded_image_path = DevicePathToStr(loaded_image->FilePath);
+ efivar_set(L"LoaderImageIdentifier", loaded_image_path, FALSE);
+
+ config_load_defaults(&config, root_dir);
+
+ /* scan /EFI/Linux/ directory */
+ config_entry_add_linux(&config, loaded_image->DeviceHandle, root_dir);
+
+ /* scan /loader/entries/\*.conf files */
+ config_load_entries(&config, loaded_image->DeviceHandle, root_dir, loaded_image_path);
+
+ /* Similar, but on any XBOOTLDR partition */
+ config_load_xbootldr(&config, loaded_image->DeviceHandle);
+
+ /* sort entries after version number */
+ config_sort_entries(&config);
+
+ /* if we find some well-known loaders, add them to the end of the list */
+ config_entry_add_loader_auto(&config, loaded_image->DeviceHandle, root_dir, NULL,
+ L"auto-windows", 'w', L"Windows Boot Manager", L"\\EFI\\Microsoft\\Boot\\bootmgfw.efi");
+ config_entry_add_loader_auto(&config, loaded_image->DeviceHandle, root_dir, NULL,
+ L"auto-efi-shell", 's', L"EFI Shell", L"\\shell" EFI_MACHINE_TYPE_NAME ".efi");
+ config_entry_add_loader_auto(&config, loaded_image->DeviceHandle, root_dir, loaded_image_path,
+ L"auto-efi-default", '\0', L"EFI Default Loader", L"\\EFI\\Boot\\boot" EFI_MACHINE_TYPE_NAME ".efi");
+ config_entry_add_osx(&config);
+
+ if (config.auto_firmware && efivar_get_raw(&global_guid, L"OsIndicationsSupported", &b, &size) == EFI_SUCCESS) {
+ UINT64 osind = (UINT64)*b;
+
+ if (osind & EFI_OS_INDICATIONS_BOOT_TO_FW_UI)
+ config_entry_add_call(&config,
+ L"auto-reboot-to-firmware-setup",
+ L"Reboot Into Firmware Interface",
+ reboot_into_firmware);
+ FreePool(b);
+ }
+
+ if (config.entry_count == 0) {
+ Print(L"No loader found. Configuration files in \\loader\\entries\\*.conf are needed.");
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ goto out;
+ }
+
+ config_write_entries_to_variable(&config);
+
+ config_title_generate(&config);
+
+ /* select entry by configured pattern or EFI LoaderDefaultEntry= variable */
+ config_default_entry_select(&config);
+
+ /* if no configured entry to select from was found, enable the menu */
+ if (config.idx_default == -1) {
+ config.idx_default = 0;
+ if (config.timeout_sec == 0)
+ config.timeout_sec = 10;
+ }
+
+ /* select entry or show menu when key is pressed or timeout is set */
+ if (config.force_menu || config.timeout_sec > 0)
+ menu = TRUE;
+ else {
+ UINT64 key;
+
+ err = console_key_read(&key, FALSE);
+
+ if (err == EFI_NOT_READY) {
+ uefi_call_wrapper(BS->Stall, 1, 100 * 1000);
+ err = console_key_read(&key, FALSE);
+ }
+
+ if (!EFI_ERROR(err)) {
+ INT16 idx;
+
+ /* find matching key in config entries */
+ idx = entry_lookup_key(&config, config.idx_default, KEYCHAR(key));
+ if (idx >= 0)
+ config.idx_default = idx;
+ else
+ menu = TRUE;
+ }
+ }
+
+ for (;;) {
+ ConfigEntry *entry;
+
+ entry = config.entries[config.idx_default];
+ if (menu) {
+ efivar_set_time_usec(L"LoaderTimeMenuUSec", 0);
+ uefi_call_wrapper(BS->SetWatchdogTimer, 4, 0, 0x10000, 0, NULL);
+ if (!menu_run(&config, &entry, loaded_image_path))
+ break;
+ }
+
+ /* run special entry like "reboot" */
+ if (entry->call) {
+ entry->call();
+ continue;
+ }
+
+ config_entry_bump_counters(entry, root_dir);
+
+ /* Export the selected boot entry to the system */
+ (VOID) efivar_set(L"LoaderEntrySelected", entry->id, FALSE);
+
+ /* Optionally, read a random seed off the ESP and pass it to the OS */
+ (VOID) process_random_seed(root_dir, config.random_seed_mode);
+
+ uefi_call_wrapper(BS->SetWatchdogTimer, 4, 5 * 60, 0x10000, 0, NULL);
+ err = image_start(image, &config, entry);
+ if (EFI_ERROR(err)) {
+ graphics_mode(FALSE);
+ Print(L"\nFailed to execute %s (%s): %r\n", entry->title, entry->loader, err);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ goto out;
+ }
+
+ menu = TRUE;
+ config.timeout_sec = 0;
+ }
+ err = EFI_SUCCESS;
+out:
+ FreePool(loaded_image_path);
+ config_free(&config);
+ uefi_call_wrapper(root_dir->Close, 1, root_dir);
+ uefi_call_wrapper(BS->CloseProtocol, 4, image, &LoadedImageProtocol, image, NULL);
+ return err;
+}
diff --git a/src/boot/efi/console.c b/src/boot/efi/console.c
new file mode 100644
index 0000000..2dd4543
--- /dev/null
+++ b/src/boot/efi/console.c
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "console.h"
+#include "util.h"
+
+#define SYSTEM_FONT_WIDTH 8
+#define SYSTEM_FONT_HEIGHT 19
+
+#define EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL_GUID \
+ { 0xdd9e7534, 0x7762, 0x4698, { 0x8c, 0x14, 0xf5, 0x85, 0x17, 0xa6, 0x25, 0xaa } }
+
+struct _EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL;
+
+typedef EFI_STATUS (EFIAPI *EFI_INPUT_RESET_EX)(
+ struct _EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL *This,
+ BOOLEAN ExtendedVerification
+);
+
+typedef UINT8 EFI_KEY_TOGGLE_STATE;
+
+typedef struct {
+ UINT32 KeyShiftState;
+ EFI_KEY_TOGGLE_STATE KeyToggleState;
+} EFI_KEY_STATE;
+
+typedef struct {
+ EFI_INPUT_KEY Key;
+ EFI_KEY_STATE KeyState;
+} EFI_KEY_DATA;
+
+typedef EFI_STATUS (EFIAPI *EFI_INPUT_READ_KEY_EX)(
+ struct _EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL *This,
+ EFI_KEY_DATA *KeyData
+);
+
+typedef EFI_STATUS (EFIAPI *EFI_SET_STATE)(
+ struct _EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL *This,
+ EFI_KEY_TOGGLE_STATE *KeyToggleState
+);
+
+typedef EFI_STATUS (EFIAPI *EFI_KEY_NOTIFY_FUNCTION)(
+ EFI_KEY_DATA *KeyData
+);
+
+typedef EFI_STATUS (EFIAPI *EFI_REGISTER_KEYSTROKE_NOTIFY)(
+ struct _EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL *This,
+ EFI_KEY_DATA KeyData,
+ EFI_KEY_NOTIFY_FUNCTION KeyNotificationFunction,
+ VOID **NotifyHandle
+);
+
+typedef EFI_STATUS (EFIAPI *EFI_UNREGISTER_KEYSTROKE_NOTIFY)(
+ struct _EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL *This,
+ VOID *NotificationHandle
+);
+
+typedef struct _EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL {
+ EFI_INPUT_RESET_EX Reset;
+ EFI_INPUT_READ_KEY_EX ReadKeyStrokeEx;
+ EFI_EVENT WaitForKeyEx;
+ EFI_SET_STATE SetState;
+ EFI_REGISTER_KEYSTROKE_NOTIFY RegisterKeyNotify;
+ EFI_UNREGISTER_KEYSTROKE_NOTIFY UnregisterKeyNotify;
+} EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL;
+
+EFI_STATUS console_key_read(UINT64 *key, BOOLEAN wait) {
+ EFI_GUID EfiSimpleTextInputExProtocolGuid = EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL_GUID;
+ static EFI_SIMPLE_TEXT_INPUT_EX_PROTOCOL *TextInputEx;
+ static BOOLEAN checked;
+ UINTN index;
+ EFI_INPUT_KEY k;
+ EFI_STATUS err;
+
+ if (!checked) {
+ err = LibLocateProtocol(&EfiSimpleTextInputExProtocolGuid, (VOID **)&TextInputEx);
+ if (EFI_ERROR(err))
+ TextInputEx = NULL;
+
+ checked = TRUE;
+ }
+
+ /* wait until key is pressed */
+ if (wait)
+ uefi_call_wrapper(BS->WaitForEvent, 3, 1, &ST->ConIn->WaitForKey, &index);
+
+ if (TextInputEx) {
+ EFI_KEY_DATA keydata;
+ UINT64 keypress;
+
+ err = uefi_call_wrapper(TextInputEx->ReadKeyStrokeEx, 2, TextInputEx, &keydata);
+ if (!EFI_ERROR(err)) {
+ UINT32 shift = 0;
+
+ /* do not distinguish between left and right keys */
+ if (keydata.KeyState.KeyShiftState & EFI_SHIFT_STATE_VALID) {
+ if (keydata.KeyState.KeyShiftState & (EFI_RIGHT_CONTROL_PRESSED|EFI_LEFT_CONTROL_PRESSED))
+ shift |= EFI_CONTROL_PRESSED;
+ if (keydata.KeyState.KeyShiftState & (EFI_RIGHT_ALT_PRESSED|EFI_LEFT_ALT_PRESSED))
+ shift |= EFI_ALT_PRESSED;
+ };
+
+ /* 32 bit modifier keys + 16 bit scan code + 16 bit unicode */
+ keypress = KEYPRESS(shift, keydata.Key.ScanCode, keydata.Key.UnicodeChar);
+ if (keypress > 0) {
+ *key = keypress;
+ return 0;
+ }
+ }
+ }
+
+ /* fallback for firmware which does not support SimpleTextInputExProtocol
+ *
+ * This is also called in case ReadKeyStrokeEx did not return a key, because
+ * some broken firmwares offer SimpleTextInputExProtocol, but never actually
+ * handle any key. */
+ err = uefi_call_wrapper(ST->ConIn->ReadKeyStroke, 2, ST->ConIn, &k);
+ if (EFI_ERROR(err))
+ return err;
+
+ *key = KEYPRESS(0, k.ScanCode, k.UnicodeChar);
+ return 0;
+}
+
+static EFI_STATUS change_mode(UINTN mode) {
+ EFI_STATUS err;
+
+ err = uefi_call_wrapper(ST->ConOut->SetMode, 2, ST->ConOut, mode);
+
+ /* Special case mode 1: when using OVMF and qemu, setting it returns error
+ * and breaks console output. */
+ if (EFI_ERROR(err) && mode == 1)
+ uefi_call_wrapper(ST->ConOut->SetMode, 2, ST->ConOut, (UINTN)0);
+
+ return err;
+}
+
+static UINT64 text_area_from_font_size(void) {
+ EFI_STATUS err;
+ UINT64 text_area;
+ UINTN rows, columns;
+
+ err = uefi_call_wrapper(ST->ConOut->QueryMode, 4, ST->ConOut, ST->ConOut->Mode->Mode, &columns, &rows);
+ if (EFI_ERROR(err)) {
+ columns = 80;
+ rows = 25;
+ }
+
+ text_area = SYSTEM_FONT_WIDTH * SYSTEM_FONT_HEIGHT * (UINT64)rows * (UINT64)columns;
+
+ return text_area;
+}
+
+static EFI_STATUS mode_auto(UINTN *mode) {
+ const UINT32 HORIZONTAL_MAX_OK = 1920;
+ const UINT32 VERTICAL_MAX_OK = 1080;
+ const UINT64 VIEWPORT_RATIO = 10;
+ UINT64 screen_area, text_area;
+ EFI_GUID GraphicsOutputProtocolGuid = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
+ EFI_GRAPHICS_OUTPUT_PROTOCOL *GraphicsOutput;
+ EFI_GRAPHICS_OUTPUT_MODE_INFORMATION *Info;
+ EFI_STATUS err;
+ BOOLEAN keep = FALSE;
+
+ err = LibLocateProtocol(&GraphicsOutputProtocolGuid, (VOID **)&GraphicsOutput);
+ if (!EFI_ERROR(err) && GraphicsOutput->Mode && GraphicsOutput->Mode->Info) {
+ Info = GraphicsOutput->Mode->Info;
+
+ /* Start verifying if we are in a resolution larger than Full HD
+ * (1920x1080). If we're not, assume we're in a good mode and do not
+ * try to change it. */
+ if (Info->HorizontalResolution <= HORIZONTAL_MAX_OK && Info->VerticalResolution <= VERTICAL_MAX_OK)
+ keep = TRUE;
+ /* For larger resolutions, calculate the ratio of the total screen
+ * area to the text viewport area. If it's less than 10 times bigger,
+ * then assume the text is readable and keep the text mode. */
+ else {
+ screen_area = (UINT64)Info->HorizontalResolution * (UINT64)Info->VerticalResolution;
+ text_area = text_area_from_font_size();
+
+ if (text_area != 0 && screen_area/text_area < VIEWPORT_RATIO)
+ keep = TRUE;
+ }
+ }
+
+ if (keep) {
+ /* Just clear the screen instead of changing the mode and return. */
+ *mode = ST->ConOut->Mode->Mode;
+ uefi_call_wrapper(ST->ConOut->ClearScreen, 1, ST->ConOut);
+ return EFI_SUCCESS;
+ }
+
+ /* If we reached here, then we have a high resolution screen and the text
+ * viewport is less than 10% the screen area, so the firmware developer
+ * screwed up. Try to switch to a better mode. Mode number 2 is first non
+ * standard mode, which is provided by the device manufacturer, so it should
+ * be a good mode.
+ * Note: MaxMode is the number of modes, not the last mode. */
+ if (ST->ConOut->Mode->MaxMode > 2)
+ *mode = 2;
+ /* Try again with mode different than zero (assume user requests
+ * auto mode due to some problem with mode zero). */
+ else if (ST->ConOut->Mode->MaxMode == 2)
+ *mode = 1;
+ /* Else force mode change to zero. */
+ else
+ *mode = 0;
+
+ return change_mode(*mode);
+}
+
+EFI_STATUS console_set_mode(UINTN *mode, enum console_mode_change_type how) {
+ if (how == CONSOLE_MODE_AUTO)
+ return mode_auto(mode);
+
+ if (how == CONSOLE_MODE_MAX) {
+ /* Note: MaxMode is the number of modes, not the last mode. */
+ if (ST->ConOut->Mode->MaxMode > 0)
+ *mode = ST->ConOut->Mode->MaxMode-1;
+ else
+ *mode = 0;
+ }
+
+ return change_mode(*mode);
+}
diff --git a/src/boot/efi/console.h b/src/boot/efi/console.h
new file mode 100644
index 0000000..41df3a4
--- /dev/null
+++ b/src/boot/efi/console.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#define EFI_SHIFT_STATE_VALID 0x80000000
+#define EFI_RIGHT_CONTROL_PRESSED 0x00000004
+#define EFI_LEFT_CONTROL_PRESSED 0x00000008
+#define EFI_RIGHT_ALT_PRESSED 0x00000010
+#define EFI_LEFT_ALT_PRESSED 0x00000020
+
+#define EFI_CONTROL_PRESSED (EFI_RIGHT_CONTROL_PRESSED|EFI_LEFT_CONTROL_PRESSED)
+#define EFI_ALT_PRESSED (EFI_RIGHT_ALT_PRESSED|EFI_LEFT_ALT_PRESSED)
+#define KEYPRESS(keys, scan, uni) ((((UINT64)keys) << 32) | (((UINT64)scan) << 16) | (uni))
+#define KEYCHAR(k) ((k) & 0xffff)
+#define CHAR_CTRL(c) ((c) - 'a' + 1)
+
+enum console_mode_change_type {
+ CONSOLE_MODE_KEEP = 0,
+ CONSOLE_MODE_SET,
+ CONSOLE_MODE_AUTO,
+ CONSOLE_MODE_MAX,
+};
+
+EFI_STATUS console_key_read(UINT64 *key, BOOLEAN wait);
+EFI_STATUS console_set_mode(UINTN *mode, enum console_mode_change_type how);
diff --git a/src/boot/efi/crc32.c b/src/boot/efi/crc32.c
new file mode 100644
index 0000000..5dfd3db
--- /dev/null
+++ b/src/boot/efi/crc32.c
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: LicenseRef-crc32-no-restriction */
+/* This is copied from util-linux, which in turn copied in the version from Gary S. Brown */
+
+/*
+ * COPYRIGHT (C) 1986 Gary S. Brown. You may use this program, or
+ * code or tables extracted from it, as desired without restriction.
+ *
+ * First, the polynomial itself and its table of feedback terms. The
+ * polynomial is
+ * X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
+ *
+ * Note that we take it "backwards" and put the highest-order term in
+ * the lowest-order bit. The X^32 term is "implied"; the LSB is the
+ * X^31 term, etc. The X^0 term (usually shown as "+1") results in
+ * the MSB being 1.
+ *
+ * Note that the usual hardware shift register implementation, which
+ * is what we're using (we're merely optimizing it by doing eight-bit
+ * chunks at a time) shifts bits into the lowest-order term. In our
+ * implementation, that means shifting towards the right. Why do we
+ * do it this way? Because the calculated CRC must be transmitted in
+ * order from highest-order term to lowest-order term. UARTs transmit
+ * characters in order from LSB to MSB. By storing the CRC this way,
+ * we hand it to the UART in the order low-byte to high-byte; the UART
+ * sends each low-bit to high-bit; and the result is transmission bit
+ * by bit from highest- to lowest-order term without requiring any bit
+ * shuffling on our part. Reception works similarly.
+ *
+ * The feedback terms table consists of 256, 32-bit entries. Notes
+ *
+ * The table can be generated at runtime if desired; code to do so
+ * is shown later. It might not be obvious, but the feedback
+ * terms simply represent the results of eight shift/xor opera-
+ * tions for all combinations of data and CRC register values.
+ *
+ * The values must be right-shifted by eight bits by the "updcrc"
+ * logic; the shift must be unsigned (bring in zeroes). On some
+ * hardware you could probably optimize the shift in assembler by
+ * using byte-swap instructions.
+ * polynomial $edb88320
+ *
+ */
+
+#include "crc32.h"
+
+static const UINT32 crc32_tab[] = {
+ 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
+ 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
+ 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
+ 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
+ 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
+ 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
+ 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
+ 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
+ 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
+ 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
+ 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
+ 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
+ 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
+ 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
+ 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
+ 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
+ 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
+ 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
+ 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
+ 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
+ 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
+ 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
+ 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
+ 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
+ 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
+ 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
+ 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
+ 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
+ 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
+ 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
+ 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
+ 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
+ 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
+ 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
+ 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
+ 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
+ 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
+ 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
+ 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
+ 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
+ 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
+ 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
+ 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
+ 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
+ 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
+ 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
+ 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
+ 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
+ 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
+ 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
+ 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
+ 0x2d02ef8dL
+};
+
+static inline UINT32 crc32_add_char(UINT32 crc, UINT8 c) {
+ return crc32_tab[(crc ^ c) & 0xff] ^ (crc >> 8);
+}
+
+/*
+ * This a generic crc32() function, it takes seed as an argument,
+ * and does __not__ xor at the end. Then individual users can do
+ * whatever they need.
+ */
+UINT32 crc32(UINT32 seed, const VOID *buf, UINTN len) {
+ const UINT8 *p = buf;
+ UINT32 crc = seed;
+
+ while (len > 0) {
+ crc = crc32_add_char(crc, *p++);
+ len--;
+ }
+
+ return crc;
+}
+
+UINT32 crc32_exclude_offset(
+ UINT32 seed,
+ const VOID *buf,
+ UINTN len,
+ UINTN exclude_off,
+ UINTN exclude_len) {
+
+ const UINT8 *p = buf;
+ UINT32 crc = seed;
+ UINTN i;
+
+ for (i = 0; i < len; i++) {
+ UINT8 x = *p++;
+
+ if (i >= exclude_off && i < exclude_off + exclude_len)
+ x = 0;
+
+ crc = crc32_add_char(crc, x);
+ }
+
+ return crc;
+}
diff --git a/src/boot/efi/crc32.h b/src/boot/efi/crc32.h
new file mode 100644
index 0000000..3af543b
--- /dev/null
+++ b/src/boot/efi/crc32.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LicenseRef-crc32-no-restriction */
+#pragma once
+
+#include <efi.h>
+#include <efilib.h>
+
+UINT32 crc32(UINT32 seed, const VOID *buf, UINTN len);
+UINT32 crc32_exclude_offset(UINT32 seed, const VOID *buf, UINTN len, UINTN exclude_off, UINTN exclude_len);
diff --git a/src/boot/efi/disk.c b/src/boot/efi/disk.c
new file mode 100644
index 0000000..89508f8
--- /dev/null
+++ b/src/boot/efi/disk.c
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "util.h"
+
+EFI_STATUS disk_get_part_uuid(EFI_HANDLE *handle, CHAR16 uuid[static 37]) {
+ EFI_DEVICE_PATH *device_path;
+
+ /* export the device path this image is started from */
+ device_path = DevicePathFromHandle(handle);
+ if (device_path) {
+ _cleanup_freepool_ EFI_DEVICE_PATH *paths = NULL;
+ EFI_DEVICE_PATH *path;
+
+ paths = UnpackDevicePath(device_path);
+ for (path = paths; !IsDevicePathEnd(path); path = NextDevicePathNode(path)) {
+ HARDDRIVE_DEVICE_PATH *drive;
+
+ if (DevicePathType(path) != MEDIA_DEVICE_PATH)
+ continue;
+ if (DevicePathSubType(path) != MEDIA_HARDDRIVE_DP)
+ continue;
+ drive = (HARDDRIVE_DEVICE_PATH *)path;
+ if (drive->SignatureType != SIGNATURE_TYPE_GUID)
+ continue;
+
+ GuidToString(uuid, (EFI_GUID *)&drive->Signature);
+ return EFI_SUCCESS;
+ }
+ }
+
+ return EFI_NOT_FOUND;
+}
diff --git a/src/boot/efi/disk.h b/src/boot/efi/disk.h
new file mode 100644
index 0000000..551a9ae
--- /dev/null
+++ b/src/boot/efi/disk.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+EFI_STATUS disk_get_part_uuid(EFI_HANDLE *handle, CHAR16 uuid[static 37]);
diff --git a/src/boot/efi/graphics.c b/src/boot/efi/graphics.c
new file mode 100644
index 0000000..f36ecb3
--- /dev/null
+++ b/src/boot/efi/graphics.c
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright © 2013 Intel Corporation
+ * Authored by Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+ */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "graphics.h"
+#include "util.h"
+
+EFI_STATUS graphics_mode(BOOLEAN on) {
+ #define EFI_CONSOLE_CONTROL_PROTOCOL_GUID \
+ { 0xf42f7782, 0x12e, 0x4c12, { 0x99, 0x56, 0x49, 0xf9, 0x43, 0x4, 0xf7, 0x21 } };
+
+ struct _EFI_CONSOLE_CONTROL_PROTOCOL;
+
+ typedef enum {
+ EfiConsoleControlScreenText,
+ EfiConsoleControlScreenGraphics,
+ EfiConsoleControlScreenMaxValue,
+ } EFI_CONSOLE_CONTROL_SCREEN_MODE;
+
+ typedef EFI_STATUS (EFIAPI *EFI_CONSOLE_CONTROL_PROTOCOL_GET_MODE)(
+ struct _EFI_CONSOLE_CONTROL_PROTOCOL *This,
+ EFI_CONSOLE_CONTROL_SCREEN_MODE *Mode,
+ BOOLEAN *UgaExists,
+ BOOLEAN *StdInLocked
+ );
+
+ typedef EFI_STATUS (EFIAPI *EFI_CONSOLE_CONTROL_PROTOCOL_SET_MODE)(
+ struct _EFI_CONSOLE_CONTROL_PROTOCOL *This,
+ EFI_CONSOLE_CONTROL_SCREEN_MODE Mode
+ );
+
+ typedef EFI_STATUS (EFIAPI *EFI_CONSOLE_CONTROL_PROTOCOL_LOCK_STD_IN)(
+ struct _EFI_CONSOLE_CONTROL_PROTOCOL *This,
+ CHAR16 *Password
+ );
+
+ typedef struct _EFI_CONSOLE_CONTROL_PROTOCOL {
+ EFI_CONSOLE_CONTROL_PROTOCOL_GET_MODE GetMode;
+ EFI_CONSOLE_CONTROL_PROTOCOL_SET_MODE SetMode;
+ EFI_CONSOLE_CONTROL_PROTOCOL_LOCK_STD_IN LockStdIn;
+ } EFI_CONSOLE_CONTROL_PROTOCOL;
+
+ EFI_GUID ConsoleControlProtocolGuid = EFI_CONSOLE_CONTROL_PROTOCOL_GUID;
+ EFI_CONSOLE_CONTROL_PROTOCOL *ConsoleControl = NULL;
+ EFI_CONSOLE_CONTROL_SCREEN_MODE new;
+ EFI_CONSOLE_CONTROL_SCREEN_MODE current;
+ BOOLEAN uga_exists;
+ BOOLEAN stdin_locked;
+ EFI_STATUS err;
+
+ err = LibLocateProtocol(&ConsoleControlProtocolGuid, (VOID **)&ConsoleControl);
+ if (EFI_ERROR(err))
+ /* console control protocol is nonstandard and might not exist. */
+ return err == EFI_NOT_FOUND ? EFI_SUCCESS : err;
+
+ /* check current mode */
+ err = uefi_call_wrapper(ConsoleControl->GetMode, 4, ConsoleControl, &current, &uga_exists, &stdin_locked);
+ if (EFI_ERROR(err))
+ return err;
+
+ /* do not touch the mode */
+ new = on ? EfiConsoleControlScreenGraphics : EfiConsoleControlScreenText;
+ if (new == current)
+ return EFI_SUCCESS;
+
+ err = uefi_call_wrapper(ConsoleControl->SetMode, 2, ConsoleControl, new);
+
+ /* some firmware enables the cursor when switching modes */
+ uefi_call_wrapper(ST->ConOut->EnableCursor, 2, ST->ConOut, FALSE);
+
+ return err;
+}
diff --git a/src/boot/efi/graphics.h b/src/boot/efi/graphics.h
new file mode 100644
index 0000000..116aae2
--- /dev/null
+++ b/src/boot/efi/graphics.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright © 2013 Intel Corporation
+ * Authored by Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+ */
+#pragma once
+
+EFI_STATUS graphics_mode(BOOLEAN on);
diff --git a/src/boot/efi/linux.c b/src/boot/efi/linux.c
new file mode 100644
index 0000000..4d44671
--- /dev/null
+++ b/src/boot/efi/linux.c
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "linux.h"
+#include "util.h"
+
+#ifdef __i386__
+#define __regparm0__ __attribute__((regparm(0)))
+#else
+#define __regparm0__
+#endif
+
+typedef VOID(*handover_f)(VOID *image, EFI_SYSTEM_TABLE *table, struct boot_params *params) __regparm0__;
+static VOID linux_efi_handover(EFI_HANDLE image, struct boot_params *params) {
+ handover_f handover;
+ UINTN start = (UINTN)params->hdr.code32_start;
+
+#ifdef __x86_64__
+ asm volatile ("cli");
+ start += 512;
+#endif
+ handover = (handover_f)(start + params->hdr.handover_offset);
+ handover(image, ST, params);
+}
+
+EFI_STATUS linux_exec(EFI_HANDLE *image,
+ CHAR8 *cmdline, UINTN cmdline_len,
+ UINTN linux_addr,
+ UINTN initrd_addr, UINTN initrd_size) {
+ struct boot_params *image_params;
+ struct boot_params *boot_params;
+ UINT8 setup_sectors;
+ EFI_PHYSICAL_ADDRESS addr;
+ EFI_STATUS err;
+
+ image_params = (struct boot_params *) linux_addr;
+
+ if (image_params->hdr.boot_flag != 0xAA55 ||
+ image_params->hdr.header != SETUP_MAGIC ||
+ image_params->hdr.version < 0x20b ||
+ !image_params->hdr.relocatable_kernel)
+ return EFI_LOAD_ERROR;
+
+ boot_params = (struct boot_params *) 0xFFFFFFFF;
+ err = uefi_call_wrapper(BS->AllocatePages, 4, AllocateMaxAddress, EfiLoaderData,
+ EFI_SIZE_TO_PAGES(0x4000), (EFI_PHYSICAL_ADDRESS*) &boot_params);
+ if (EFI_ERROR(err))
+ return err;
+
+ ZeroMem(boot_params, 0x4000);
+ CopyMem(&boot_params->hdr, &image_params->hdr, sizeof(struct setup_header));
+ boot_params->hdr.type_of_loader = 0xff;
+ setup_sectors = image_params->hdr.setup_sects > 0 ? image_params->hdr.setup_sects : 4;
+ boot_params->hdr.code32_start = (UINT32)linux_addr + (setup_sectors + 1) * 512;
+
+ if (cmdline) {
+ addr = 0xA0000;
+ err = uefi_call_wrapper(BS->AllocatePages, 4, AllocateMaxAddress, EfiLoaderData,
+ EFI_SIZE_TO_PAGES(cmdline_len + 1), &addr);
+ if (EFI_ERROR(err))
+ return err;
+ CopyMem((VOID *)(UINTN)addr, cmdline, cmdline_len);
+ ((CHAR8 *)(UINTN)addr)[cmdline_len] = 0;
+ boot_params->hdr.cmd_line_ptr = (UINT32)addr;
+ }
+
+ boot_params->hdr.ramdisk_image = (UINT32)initrd_addr;
+ boot_params->hdr.ramdisk_size = (UINT32)initrd_size;
+
+ linux_efi_handover(image, boot_params);
+ return EFI_LOAD_ERROR;
+}
diff --git a/src/boot/efi/linux.h b/src/boot/efi/linux.h
new file mode 100644
index 0000000..b92c27c
--- /dev/null
+++ b/src/boot/efi/linux.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#define SETUP_MAGIC 0x53726448 /* "HdrS" */
+
+struct setup_header {
+ UINT8 setup_sects;
+ UINT16 root_flags;
+ UINT32 syssize;
+ UINT16 ram_size;
+ UINT16 vid_mode;
+ UINT16 root_dev;
+ UINT16 boot_flag;
+ UINT16 jump;
+ UINT32 header;
+ UINT16 version;
+ UINT32 realmode_swtch;
+ UINT16 start_sys_seg;
+ UINT16 kernel_version;
+ UINT8 type_of_loader;
+ UINT8 loadflags;
+ UINT16 setup_move_size;
+ UINT32 code32_start;
+ UINT32 ramdisk_image;
+ UINT32 ramdisk_size;
+ UINT32 bootsect_kludge;
+ UINT16 heap_end_ptr;
+ UINT8 ext_loader_ver;
+ UINT8 ext_loader_type;
+ UINT32 cmd_line_ptr;
+ UINT32 initrd_addr_max;
+ UINT32 kernel_alignment;
+ UINT8 relocatable_kernel;
+ UINT8 min_alignment;
+ UINT16 xloadflags;
+ UINT32 cmdline_size;
+ UINT32 hardware_subarch;
+ UINT64 hardware_subarch_data;
+ UINT32 payload_offset;
+ UINT32 payload_length;
+ UINT64 setup_data;
+ UINT64 pref_address;
+ UINT32 init_size;
+ UINT32 handover_offset;
+} __attribute__((packed));
+
+/* adapted from linux' bootparam.h */
+struct boot_params {
+ UINT8 screen_info[64]; // was: struct screen_info
+ UINT8 apm_bios_info[20]; // was: struct apm_bios_info
+ UINT8 _pad2[4];
+ UINT64 tboot_addr;
+ UINT8 ist_info[16]; // was: struct ist_info
+ UINT8 _pad3[16];
+ UINT8 hd0_info[16];
+ UINT8 hd1_info[16];
+ UINT8 sys_desc_table[16]; // was: struct sys_desc_table
+ UINT8 olpc_ofw_header[16]; // was: struct olpc_ofw_header
+ UINT32 ext_ramdisk_image;
+ UINT32 ext_ramdisk_size;
+ UINT32 ext_cmd_line_ptr;
+ UINT8 _pad4[116];
+ UINT8 edid_info[128]; // was: struct edid_info
+ UINT8 efi_info[32]; // was: struct efi_info
+ UINT32 alt_mem_k;
+ UINT32 scratch;
+ UINT8 e820_entries;
+ UINT8 eddbuf_entries;
+ UINT8 edd_mbr_sig_buf_entries;
+ UINT8 kbd_status;
+ UINT8 secure_boot;
+ UINT8 _pad5[2];
+ UINT8 sentinel;
+ UINT8 _pad6[1];
+ struct setup_header hdr;
+ UINT8 _pad7[0x290-0x1f1-sizeof(struct setup_header)];
+ UINT32 edd_mbr_sig_buffer[16]; // was: edd_mbr_sig_buffer[EDD_MBR_SIG_MAX]
+ UINT8 e820_table[20*128]; // was: struct boot_e820_entry e820_table[E820_MAX_ENTRIES_ZEROPAGE]
+ UINT8 _pad8[48];
+ UINT8 eddbuf[6*82]; // was: struct edd_info eddbuf[EDDMAXNR]
+ UINT8 _pad9[276];
+} __attribute__((packed));
+
+EFI_STATUS linux_exec(EFI_HANDLE *image,
+ CHAR8 *cmdline, UINTN cmdline_size,
+ UINTN linux_addr,
+ UINTN initrd_addr, UINTN initrd_size);
diff --git a/src/boot/efi/loader-features.h b/src/boot/efi/loader-features.h
new file mode 100644
index 0000000..f07dacb
--- /dev/null
+++ b/src/boot/efi/loader-features.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#ifndef UINT64_C
+# define UINT64_C(c) (c ## ULL)
+#endif
+
+#define EFI_LOADER_FEATURE_CONFIG_TIMEOUT (UINT64_C(1) << 0)
+#define EFI_LOADER_FEATURE_CONFIG_TIMEOUT_ONE_SHOT (UINT64_C(1) << 1)
+#define EFI_LOADER_FEATURE_ENTRY_DEFAULT (UINT64_C(1) << 2)
+#define EFI_LOADER_FEATURE_ENTRY_ONESHOT (UINT64_C(1) << 3)
+#define EFI_LOADER_FEATURE_BOOT_COUNTING (UINT64_C(1) << 4)
+#define EFI_LOADER_FEATURE_XBOOTLDR (UINT64_C(1) << 5)
+#define EFI_LOADER_FEATURE_RANDOM_SEED (UINT64_C(1) << 6)
diff --git a/src/boot/efi/measure.c b/src/boot/efi/measure.c
new file mode 100644
index 0000000..ff876a6
--- /dev/null
+++ b/src/boot/efi/measure.c
@@ -0,0 +1,316 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if ENABLE_TPM
+
+#include <efi.h>
+#include <efilib.h>
+#include "measure.h"
+
+#define EFI_TCG_PROTOCOL_GUID { 0xf541796d, 0xa62e, 0x4954, {0xa7, 0x75, 0x95, 0x84, 0xf6, 0x1b, 0x9c, 0xdd} }
+
+typedef struct _TCG_VERSION {
+ UINT8 Major;
+ UINT8 Minor;
+ UINT8 RevMajor;
+ UINT8 RevMinor;
+} TCG_VERSION;
+
+typedef struct tdEFI_TCG2_VERSION {
+ UINT8 Major;
+ UINT8 Minor;
+} EFI_TCG2_VERSION;
+
+typedef struct _TCG_BOOT_SERVICE_CAPABILITY {
+ UINT8 Size;
+ struct _TCG_VERSION StructureVersion;
+ struct _TCG_VERSION ProtocolSpecVersion;
+ UINT8 HashAlgorithmBitmap;
+ BOOLEAN TPMPresentFlag;
+ BOOLEAN TPMDeactivatedFlag;
+} TCG_BOOT_SERVICE_CAPABILITY;
+
+typedef struct tdTREE_BOOT_SERVICE_CAPABILITY {
+ UINT8 Size;
+ EFI_TCG2_VERSION StructureVersion;
+ EFI_TCG2_VERSION ProtocolVersion;
+ UINT32 HashAlgorithmBitmap;
+ UINT32 SupportedEventLogs;
+ BOOLEAN TrEEPresentFlag;
+ UINT16 MaxCommandSize;
+ UINT16 MaxResponseSize;
+ UINT32 ManufacturerID;
+} TREE_BOOT_SERVICE_CAPABILITY;
+
+typedef UINT32 TCG_ALGORITHM_ID;
+#define TCG_ALG_SHA 0x00000004 // The SHA1 algorithm
+
+#define SHA1_DIGEST_SIZE 20
+
+typedef struct _TCG_DIGEST {
+ UINT8 Digest[SHA1_DIGEST_SIZE];
+} TCG_DIGEST;
+
+#define EV_IPL 13
+
+typedef struct _TCG_PCR_EVENT {
+ UINT32 PCRIndex;
+ UINT32 EventType;
+ struct _TCG_DIGEST digest;
+ UINT32 EventSize;
+ UINT8 Event[1];
+} TCG_PCR_EVENT;
+
+INTERFACE_DECL(_EFI_TCG);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG_STATUS_CHECK) (IN struct _EFI_TCG * This,
+ OUT struct _TCG_BOOT_SERVICE_CAPABILITY * ProtocolCapability,
+ OUT UINT32 * TCGFeatureFlags,
+ OUT EFI_PHYSICAL_ADDRESS * EventLogLocation,
+ OUT EFI_PHYSICAL_ADDRESS * EventLogLastEntry);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG_HASH_ALL) (IN struct _EFI_TCG * This,
+ IN UINT8 * HashData,
+ IN UINT64 HashDataLen,
+ IN TCG_ALGORITHM_ID AlgorithmId,
+ IN OUT UINT64 * HashedDataLen, IN OUT UINT8 ** HashedDataResult);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG_LOG_EVENT) (IN struct _EFI_TCG * This,
+ IN struct _TCG_PCR_EVENT * TCGLogData,
+ IN OUT UINT32 * EventNumber, IN UINT32 Flags);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG_PASS_THROUGH_TO_TPM) (IN struct _EFI_TCG * This,
+ IN UINT32 TpmInputParameterBlockSize,
+ IN UINT8 * TpmInputParameterBlock,
+ IN UINT32 TpmOutputParameterBlockSize,
+ IN UINT8 * TpmOutputParameterBlock);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG_HASH_LOG_EXTEND_EVENT) (IN struct _EFI_TCG * This,
+ IN EFI_PHYSICAL_ADDRESS HashData,
+ IN UINT64 HashDataLen,
+ IN TCG_ALGORITHM_ID AlgorithmId,
+ IN struct _TCG_PCR_EVENT * TCGLogData,
+ IN OUT UINT32 * EventNumber,
+ OUT EFI_PHYSICAL_ADDRESS * EventLogLastEntry);
+
+typedef struct _EFI_TCG {
+ EFI_TCG_STATUS_CHECK StatusCheck;
+ EFI_TCG_HASH_ALL HashAll;
+ EFI_TCG_LOG_EVENT LogEvent;
+ EFI_TCG_PASS_THROUGH_TO_TPM PassThroughToTPM;
+ EFI_TCG_HASH_LOG_EXTEND_EVENT HashLogExtendEvent;
+} EFI_TCG;
+
+#define EFI_TCG2_PROTOCOL_GUID {0x607f766c, 0x7455, 0x42be, { 0x93, 0x0b, 0xe4, 0xd7, 0x6d, 0xb2, 0x72, 0x0f }}
+
+typedef struct tdEFI_TCG2_PROTOCOL EFI_TCG2_PROTOCOL;
+
+typedef UINT32 EFI_TCG2_EVENT_LOG_BITMAP;
+typedef UINT32 EFI_TCG2_EVENT_LOG_FORMAT;
+typedef UINT32 EFI_TCG2_EVENT_ALGORITHM_BITMAP;
+
+typedef struct tdEFI_TCG2_BOOT_SERVICE_CAPABILITY {
+ UINT8 Size;
+ EFI_TCG2_VERSION StructureVersion;
+ EFI_TCG2_VERSION ProtocolVersion;
+ EFI_TCG2_EVENT_ALGORITHM_BITMAP HashAlgorithmBitmap;
+ EFI_TCG2_EVENT_LOG_BITMAP SupportedEventLogs;
+ BOOLEAN TPMPresentFlag;
+ UINT16 MaxCommandSize;
+ UINT16 MaxResponseSize;
+ UINT32 ManufacturerID;
+ UINT32 NumberOfPCRBanks;
+ EFI_TCG2_EVENT_ALGORITHM_BITMAP ActivePcrBanks;
+} EFI_TCG2_BOOT_SERVICE_CAPABILITY;
+
+#define EFI_TCG2_EVENT_HEADER_VERSION 1
+
+typedef struct {
+ UINT32 HeaderSize;
+ UINT16 HeaderVersion;
+ UINT32 PCRIndex;
+ UINT32 EventType;
+} __attribute__((packed)) EFI_TCG2_EVENT_HEADER;
+
+typedef struct tdEFI_TCG2_EVENT {
+ UINT32 Size;
+ EFI_TCG2_EVENT_HEADER Header;
+ UINT8 Event[1];
+} __attribute__((packed)) EFI_TCG2_EVENT;
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG2_GET_CAPABILITY) (IN EFI_TCG2_PROTOCOL * This,
+ IN OUT EFI_TCG2_BOOT_SERVICE_CAPABILITY * ProtocolCapability);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG2_GET_EVENT_LOG) (IN EFI_TCG2_PROTOCOL * This,
+ IN EFI_TCG2_EVENT_LOG_FORMAT EventLogFormat,
+ OUT EFI_PHYSICAL_ADDRESS * EventLogLocation,
+ OUT EFI_PHYSICAL_ADDRESS * EventLogLastEntry,
+ OUT BOOLEAN * EventLogTruncated);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG2_HASH_LOG_EXTEND_EVENT) (IN EFI_TCG2_PROTOCOL * This,
+ IN UINT64 Flags,
+ IN EFI_PHYSICAL_ADDRESS DataToHash,
+ IN UINT64 DataToHashLen, IN EFI_TCG2_EVENT * EfiTcgEvent);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG2_SUBMIT_COMMAND) (IN EFI_TCG2_PROTOCOL * This,
+ IN UINT32 InputParameterBlockSize,
+ IN UINT8 * InputParameterBlock,
+ IN UINT32 OutputParameterBlockSize, IN UINT8 * OutputParameterBlock);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG2_GET_ACTIVE_PCR_BANKS) (IN EFI_TCG2_PROTOCOL * This, OUT UINT32 * ActivePcrBanks);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG2_SET_ACTIVE_PCR_BANKS) (IN EFI_TCG2_PROTOCOL * This, IN UINT32 ActivePcrBanks);
+
+typedef EFI_STATUS(EFIAPI * EFI_TCG2_GET_RESULT_OF_SET_ACTIVE_PCR_BANKS) (IN EFI_TCG2_PROTOCOL * This,
+ OUT UINT32 * OperationPresent, OUT UINT32 * Response);
+
+typedef struct tdEFI_TCG2_PROTOCOL {
+ EFI_TCG2_GET_CAPABILITY GetCapability;
+ EFI_TCG2_GET_EVENT_LOG GetEventLog;
+ EFI_TCG2_HASH_LOG_EXTEND_EVENT HashLogExtendEvent;
+ EFI_TCG2_SUBMIT_COMMAND SubmitCommand;
+ EFI_TCG2_GET_ACTIVE_PCR_BANKS GetActivePcrBanks;
+ EFI_TCG2_SET_ACTIVE_PCR_BANKS SetActivePcrBanks;
+ EFI_TCG2_GET_RESULT_OF_SET_ACTIVE_PCR_BANKS GetResultOfSetActivePcrBanks;
+} EFI_TCG2;
+
+static EFI_STATUS tpm1_measure_to_pcr_and_event_log(const EFI_TCG *tcg, UINT32 pcrindex, const EFI_PHYSICAL_ADDRESS buffer,
+ UINTN buffer_size, const CHAR16 *description) {
+ EFI_STATUS status;
+ TCG_PCR_EVENT *tcg_event;
+ UINT32 event_number;
+ EFI_PHYSICAL_ADDRESS event_log_last;
+ UINTN desc_len;
+
+ desc_len = (StrLen(description) + 1) * sizeof(CHAR16);
+
+ tcg_event = AllocateZeroPool(desc_len + sizeof(TCG_PCR_EVENT));
+
+ if (!tcg_event)
+ return EFI_OUT_OF_RESOURCES;
+
+ tcg_event->EventSize = desc_len;
+ CopyMem((VOID *) & tcg_event->Event[0], (VOID *) description, desc_len);
+
+ tcg_event->PCRIndex = pcrindex;
+ tcg_event->EventType = EV_IPL;
+
+ event_number = 1;
+ status = uefi_call_wrapper(tcg->HashLogExtendEvent, 7,
+ (EFI_TCG *) tcg, buffer, buffer_size, TCG_ALG_SHA, tcg_event, &event_number, &event_log_last);
+
+ if (EFI_ERROR(status))
+ return status;
+
+ uefi_call_wrapper(BS->FreePool, 1, tcg_event);
+
+ return EFI_SUCCESS;
+}
+
+static EFI_STATUS tpm2_measure_to_pcr_and_event_log(const EFI_TCG2 *tcg, UINT32 pcrindex, const EFI_PHYSICAL_ADDRESS buffer,
+ UINT64 buffer_size, const CHAR16 *description) {
+ EFI_STATUS status;
+ EFI_TCG2_EVENT *tcg_event;
+ UINTN desc_len;
+
+ desc_len = StrLen(description) * sizeof(CHAR16);
+
+ tcg_event = AllocateZeroPool(sizeof(*tcg_event) - sizeof(tcg_event->Event) + desc_len + 1);
+
+ if (!tcg_event)
+ return EFI_OUT_OF_RESOURCES;
+
+ tcg_event->Size = sizeof(*tcg_event) - sizeof(tcg_event->Event) + desc_len + 1;
+ tcg_event->Header.HeaderSize = sizeof(EFI_TCG2_EVENT_HEADER);
+ tcg_event->Header.HeaderVersion = EFI_TCG2_EVENT_HEADER_VERSION;
+ tcg_event->Header.PCRIndex = pcrindex;
+ tcg_event->Header.EventType = EV_IPL;
+
+ CopyMem((VOID *) tcg_event->Event, (VOID *) description, desc_len);
+
+ status = uefi_call_wrapper(tcg->HashLogExtendEvent, 5, (EFI_TCG2 *) tcg, 0, buffer, (UINT64) buffer_size, tcg_event);
+
+ uefi_call_wrapper(BS->FreePool, 1, tcg_event);
+
+ if (EFI_ERROR(status))
+ return status;
+
+ return EFI_SUCCESS;
+}
+
+static EFI_TCG * tcg1_interface_check(void) {
+ EFI_GUID tpm_guid = EFI_TCG_PROTOCOL_GUID;
+ EFI_STATUS status;
+ EFI_TCG *tcg;
+ TCG_BOOT_SERVICE_CAPABILITY capability;
+ UINT32 features;
+ EFI_PHYSICAL_ADDRESS event_log_location;
+ EFI_PHYSICAL_ADDRESS event_log_last_entry;
+
+ status = LibLocateProtocol(&tpm_guid, (void **) &tcg);
+
+ if (EFI_ERROR(status))
+ return NULL;
+
+ capability.Size = (UINT8) sizeof(capability);
+ status = uefi_call_wrapper(tcg->StatusCheck, 5, tcg, &capability, &features, &event_log_location, &event_log_last_entry);
+
+ if (EFI_ERROR(status))
+ return NULL;
+
+ if (capability.TPMDeactivatedFlag)
+ return NULL;
+
+ if (!capability.TPMPresentFlag)
+ return NULL;
+
+ return tcg;
+}
+
+static EFI_TCG2 * tcg2_interface_check(void) {
+ EFI_GUID tpm2_guid = EFI_TCG2_PROTOCOL_GUID;
+ EFI_STATUS status;
+ EFI_TCG2 *tcg;
+ EFI_TCG2_BOOT_SERVICE_CAPABILITY capability;
+
+ status = LibLocateProtocol(&tpm2_guid, (void **) &tcg);
+
+ if (EFI_ERROR(status))
+ return NULL;
+
+ capability.Size = (UINT8) sizeof(EFI_TCG2_BOOT_SERVICE_CAPABILITY);
+ status = uefi_call_wrapper(tcg->GetCapability, 2, tcg, &capability);
+
+ if (EFI_ERROR(status))
+ return NULL;
+
+ if (capability.StructureVersion.Major == 1 &&
+ capability.StructureVersion.Minor == 0) {
+ TCG_BOOT_SERVICE_CAPABILITY *caps_1_0;
+ caps_1_0 = (TCG_BOOT_SERVICE_CAPABILITY *)&capability;
+ if (caps_1_0->TPMPresentFlag)
+ return tcg;
+ }
+
+ if (!capability.TPMPresentFlag)
+ return NULL;
+
+ return tcg;
+}
+
+EFI_STATUS tpm_log_event(UINT32 pcrindex, const EFI_PHYSICAL_ADDRESS buffer, UINTN buffer_size, const CHAR16 *description) {
+ EFI_TCG *tpm1;
+ EFI_TCG2 *tpm2;
+
+ tpm2 = tcg2_interface_check();
+ if (tpm2)
+ return tpm2_measure_to_pcr_and_event_log(tpm2, pcrindex, buffer, buffer_size, description);
+
+ tpm1 = tcg1_interface_check();
+ if (tpm1)
+ return tpm1_measure_to_pcr_and_event_log(tpm1, pcrindex, buffer, buffer_size, description);
+
+ /* No active TPM found, so don't return an error */
+ return EFI_SUCCESS;
+}
+
+#endif
diff --git a/src/boot/efi/measure.h b/src/boot/efi/measure.h
new file mode 100644
index 0000000..19e148d
--- /dev/null
+++ b/src/boot/efi/measure.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+EFI_STATUS tpm_log_event(UINT32 pcrindex, const EFI_PHYSICAL_ADDRESS buffer, UINTN buffer_size, const CHAR16 *description);
diff --git a/src/boot/efi/meson.build b/src/boot/efi/meson.build
new file mode 100644
index 0000000..24177f9
--- /dev/null
+++ b/src/boot/efi/meson.build
@@ -0,0 +1,260 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+efi_headers = files('''
+ console.h
+ crc32.h
+ disk.h
+ graphics.h
+ linux.h
+ measure.h
+ missing_efi.h
+ pe.h
+ random-seed.h
+ sha256.h
+ shim.h
+ splash.h
+ util.h
+'''.split())
+
+common_sources = '''
+ disk.c
+ graphics.c
+ measure.c
+ pe.c
+ util.c
+'''.split()
+
+systemd_boot_sources = '''
+ boot.c
+ console.c
+ crc32.c
+ random-seed.c
+ sha256.c
+ shim.c
+'''.split()
+
+stub_sources = '''
+ linux.c
+ splash.c
+ stub.c
+'''.split()
+
+if conf.get('ENABLE_EFI') == 1 and get_option('gnu-efi') != 'false'
+ efi_cc = get_option('efi-cc')
+ if efi_cc.length() == 0
+ efi_cc = cc.cmd_array()
+ endif
+ efi_ld = get_option('efi-ld')
+ if efi_ld == ''
+ efi_ld = find_program('ld', required: true)
+ endif
+ efi_incdir = get_option('efi-includedir')
+
+ gnu_efi_path_arch = ''
+ foreach name : [gnu_efi_arch, EFI_MACHINE_TYPE_NAME]
+ if (gnu_efi_path_arch == '' and name != '' and
+ cc.has_header('@0@/@1@/efibind.h'.format(efi_incdir, name)))
+ gnu_efi_path_arch = name
+ endif
+ endforeach
+
+ if gnu_efi_path_arch != '' and EFI_MACHINE_TYPE_NAME == ''
+ error('gnu-efi is available, but EFI_MACHINE_TYPE_NAME is unknown')
+ endif
+
+ efi_libdir = get_option('efi-libdir')
+ if efi_libdir == ''
+ # New location first introduced with gnu-efi 3.0.11
+ efi_libdir = join_paths('/usr/lib/gnuefi', EFI_MACHINE_TYPE_NAME)
+ cmd = run_command('test', '-e', efi_libdir)
+
+ if cmd.returncode() != 0
+ # Fall back to the old approach
+ cmd = run_command(efi_cc + ['-print-multi-os-directory'])
+ if cmd.returncode() == 0
+ path = join_paths('/usr/lib', cmd.stdout().strip())
+ cmd = run_command('realpath', '-e', path)
+ if cmd.returncode() == 0
+ efi_libdir = cmd.stdout().strip()
+ endif
+ endif
+ endif
+ endif
+
+ have_gnu_efi = gnu_efi_path_arch != '' and efi_libdir != ''
+else
+ have_gnu_efi = false
+endif
+
+if get_option('gnu-efi') == 'true' and not have_gnu_efi
+ error('gnu-efi support requested, but headers were not found')
+endif
+
+if have_gnu_efi
+ efi_conf = configuration_data()
+ efi_conf.set_quoted('EFI_MACHINE_TYPE_NAME', EFI_MACHINE_TYPE_NAME)
+ efi_conf.set10('ENABLE_TPM', get_option('tpm'))
+ efi_conf.set('SD_TPM_PCR', get_option('tpm-pcrindex'))
+
+ efi_config_h = configure_file(
+ output : 'efi_config.h',
+ configuration : efi_conf)
+
+ objcopy = find_program('objcopy')
+
+ efi_location_map = [
+ # New locations first introduced with gnu-efi 3.0.11
+ [join_paths(efi_libdir, 'efi.lds'),
+ join_paths(efi_libdir, 'crt0.o')],
+ # Older locations...
+ [join_paths(efi_libdir, 'gnuefi', 'elf_@0@_efi.lds'.format(gnu_efi_path_arch)),
+ join_paths(efi_libdir, 'gnuefi', 'crt0-efi-@0@.o'.format(gnu_efi_path_arch))],
+ [join_paths(efi_libdir, 'elf_@0@_efi.lds'.format(gnu_efi_path_arch)),
+ join_paths(efi_libdir, 'crt0-efi-@0@.o'.format(gnu_efi_path_arch))]]
+ efi_lds = ''
+ foreach location : efi_location_map
+ if efi_lds == ''
+ cmd = run_command('test', '-f', location[0])
+ if cmd.returncode() == 0
+ efi_lds = location[0]
+ efi_crt0 = location[1]
+ endif
+ endif
+ endforeach
+ if efi_lds == ''
+ if get_option('gnu-efi') == 'true'
+ error('gnu-efi support requested, but cannot find efi.lds')
+ else
+ have_gnu_efi = false
+ endif
+ endif
+endif
+
+if have_gnu_efi
+ compile_args = ['-Wall',
+ '-Wextra',
+ '-std=gnu90',
+ '-nostdinc',
+ '-fpic',
+ '-fshort-wchar',
+ '-ffreestanding',
+ '-fno-strict-aliasing',
+ '-fno-stack-protector',
+ '-Wsign-compare',
+ '-Wno-missing-field-initializers',
+ '-isystem', efi_incdir,
+ '-isystem', join_paths(efi_incdir, gnu_efi_path_arch),
+ '-include', efi_config_h,
+ '-include', version_h]
+ if efi_arch == 'x86_64'
+ compile_args += ['-mno-red-zone',
+ '-mno-sse',
+ '-mno-mmx',
+ '-DEFI_FUNCTION_WRAPPER',
+ '-DGNU_EFI_USE_MS_ABI']
+ elif efi_arch == 'ia32'
+ compile_args += ['-mno-sse',
+ '-mno-mmx']
+ elif efi_arch == 'arm'
+ if cc.has_argument('-mgeneral-regs-only')
+ compile_args += ['-mgeneral-regs-only']
+ endif
+
+ if cc.has_argument('-mfpu=none')
+ compile_args += ['-mfpu=none']
+ endif
+ endif
+ if get_option('werror') == true
+ compile_args += ['-Werror']
+ endif
+ if get_option('buildtype') == 'debug'
+ compile_args += ['-ggdb', '-O0']
+ elif get_option('buildtype') == 'debugoptimized'
+ compile_args += ['-ggdb', '-Og']
+ else
+ compile_args += ['-O2']
+ endif
+
+ efi_ldflags = ['-T', efi_lds,
+ '-shared',
+ '-Bsymbolic',
+ '-nostdlib',
+ '-znocombreloc',
+ '-L', efi_libdir,
+ efi_crt0]
+ if efi_arch == 'aarch64' or efi_arch == 'arm'
+ # Aarch64 and ARM32 don't have an EFI capable objcopy. Use 'binary'
+ # instead, and add required symbols manually.
+ efi_ldflags += ['--defsym=EFI_SUBSYSTEM=0xa']
+ efi_format = ['-O', 'binary']
+ else
+ efi_format = ['--target=efi-app-@0@'.format(gnu_efi_arch)]
+ endif
+
+ systemd_boot_objects = []
+ stub_objects = []
+ foreach file : common_sources + systemd_boot_sources + stub_sources
+ o_file = custom_target(file + '.o',
+ input : file,
+ output : file + '.o',
+ command : efi_cc + ['-c', '@INPUT@', '-o', '@OUTPUT@']
+ + compile_args,
+ depend_files : efi_headers)
+ if (common_sources + systemd_boot_sources).contains(file)
+ systemd_boot_objects += o_file
+ endif
+ if (common_sources + stub_sources).contains(file)
+ stub_objects += o_file
+ endif
+ endforeach
+
+ libgcc_file_name = run_command(efi_cc + ['-print-libgcc-file-name']).stdout().strip()
+ systemd_boot_efi_name = 'systemd-boot@0@.efi'.format(EFI_MACHINE_TYPE_NAME)
+ stub_efi_name = 'linux@0@.efi.stub'.format(EFI_MACHINE_TYPE_NAME)
+ no_undefined_symbols = find_program('no-undefined-symbols.sh')
+
+ foreach tuple : [['systemd_boot.so', systemd_boot_efi_name, systemd_boot_objects],
+ ['stub.so', stub_efi_name, stub_objects]]
+ so = custom_target(
+ tuple[0],
+ input : tuple[2],
+ output : tuple[0],
+ command : [efi_ld, '-o', '@OUTPUT@'] +
+ efi_ldflags + tuple[2] +
+ ['-lefi', '-lgnuefi', libgcc_file_name])
+
+ if want_tests != 'false'
+ test('no-undefined-symbols-' + tuple[0],
+ no_undefined_symbols,
+ args : [so])
+ endif
+
+ stub = custom_target(
+ tuple[1],
+ input : so,
+ output : tuple[1],
+ command : [objcopy,
+ '-j', '.text',
+ '-j', '.sdata',
+ '-j', '.data',
+ '-j', '.dynamic',
+ '-j', '.dynsym',
+ '-j', '.rel*']
+ + efi_format +
+ ['@INPUT@', '@OUTPUT@'],
+ install : true,
+ install_dir : bootlibdir)
+
+ set_variable(tuple[0].underscorify(), so)
+ set_variable(tuple[0].underscorify() + '_stub', stub)
+ endforeach
+
+ ############################################################
+
+ test_efi_disk_img = custom_target(
+ 'test-efi-disk.img',
+ input : [systemd_boot_so, stub_so_stub],
+ output : 'test-efi-disk.img',
+ command : [test_efi_create_disk_sh, '@OUTPUT@',
+ '@INPUT0@', '@INPUT1@', splash_bmp])
+endif
diff --git a/src/boot/efi/missing_efi.h b/src/boot/efi/missing_efi.h
new file mode 100644
index 0000000..1b838af
--- /dev/null
+++ b/src/boot/efi/missing_efi.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <efi.h>
+
+#ifndef EFI_RNG_PROTOCOL_GUID
+
+#define EFI_RNG_PROTOCOL_GUID \
+ { 0x3152bca5, 0xeade, 0x433d, {0x86, 0x2e, 0xc0, 0x1c, 0xdc, 0x29, 0x1f, 0x44} }
+
+typedef EFI_GUID EFI_RNG_ALGORITHM;
+
+#define EFI_RNG_ALGORITHM_SP800_90_HASH_256_GUID \
+ {0xa7af67cb, 0x603b, 0x4d42, {0xba, 0x21, 0x70, 0xbf, 0xb6, 0x29, 0x3f, 0x96} }
+
+#define EFI_RNG_ALGORITHM_SP800_90_HMAC_256_GUID \
+ {0xc5149b43, 0xae85, 0x4f53, {0x99, 0x82, 0xb9, 0x43, 0x35, 0xd3, 0xa9, 0xe7} }
+
+#define EFI_RNG_ALGORITHM_SP800_90_CTR_256_GUID \
+ {0x44f0de6e, 0x4d8c, 0x4045, {0xa8, 0xc7, 0x4d, 0xd1, 0x68, 0x85, 0x6b, 0x9e} }
+
+#define EFI_RNG_ALGORITHM_X9_31_3DES_GUID \
+ {0x63c4785a, 0xca34, 0x4012, {0xa3, 0xc8, 0x0b, 0x6a, 0x32, 0x4f, 0x55, 0x46} }
+
+#define EFI_RNG_ALGORITHM_X9_31_AES_GUID \
+ {0xacd03321, 0x777e, 0x4d3d, {0xb1, 0xc8, 0x20, 0xcf, 0xd8, 0x88, 0x20, 0xc9} }
+
+#define EFI_RNG_ALGORITHM_RAW \
+ {0xe43176d7, 0xb6e8, 0x4827, {0xb7, 0x84, 0x7f, 0xfd, 0xc4, 0xb6, 0x85, 0x61} }
+
+INTERFACE_DECL(_EFI_RNG_PROTOCOL);
+
+typedef
+EFI_STATUS
+(EFIAPI *EFI_RNG_GET_INFO) (
+ IN struct _EFI_RNG_PROTOCOL *This,
+ IN OUT UINTN *RNGAlgorithmListSize,
+ OUT EFI_RNG_ALGORITHM *RNGAlgorithmList
+);
+
+typedef
+EFI_STATUS
+(EFIAPI *EFI_RNG_GET_RNG) (
+ IN struct _EFI_RNG_PROTOCOL *This,
+ IN EFI_RNG_ALGORITHM *RNGAlgorithm, OPTIONAL
+ IN UINTN RNGValueLength,
+ OUT UINT8 *RNGValue
+);
+
+typedef struct _EFI_RNG_PROTOCOL {
+ EFI_RNG_GET_INFO GetInfo;
+ EFI_RNG_GET_RNG GetRNG;
+} EFI_RNG_PROTOCOL;
+
+#endif
diff --git a/src/boot/efi/no-undefined-symbols.sh b/src/boot/efi/no-undefined-symbols.sh
new file mode 100755
index 0000000..84cbd5b
--- /dev/null
+++ b/src/boot/efi/no-undefined-symbols.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+set -eu
+
+if nm -D -u "$1" | grep ' U '; then
+ echo "Undefined symbols detected!"
+ exit 1
+fi
diff --git a/src/boot/efi/pe.c b/src/boot/efi/pe.c
new file mode 100644
index 0000000..f99ecd0
--- /dev/null
+++ b/src/boot/efi/pe.c
@@ -0,0 +1,170 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "pe.h"
+#include "util.h"
+
+struct DosFileHeader {
+ UINT8 Magic[2];
+ UINT16 LastSize;
+ UINT16 nBlocks;
+ UINT16 nReloc;
+ UINT16 HdrSize;
+ UINT16 MinAlloc;
+ UINT16 MaxAlloc;
+ UINT16 ss;
+ UINT16 sp;
+ UINT16 Checksum;
+ UINT16 ip;
+ UINT16 cs;
+ UINT16 RelocPos;
+ UINT16 nOverlay;
+ UINT16 reserved[4];
+ UINT16 OEMId;
+ UINT16 OEMInfo;
+ UINT16 reserved2[10];
+ UINT32 ExeHeader;
+} __attribute__((packed));
+
+#define PE_HEADER_MACHINE_I386 0x014c
+#define PE_HEADER_MACHINE_X64 0x8664
+#define PE_HEADER_MACHINE_ARM64 0xaa64
+struct PeFileHeader {
+ UINT16 Machine;
+ UINT16 NumberOfSections;
+ UINT32 TimeDateStamp;
+ UINT32 PointerToSymbolTable;
+ UINT32 NumberOfSymbols;
+ UINT16 SizeOfOptionalHeader;
+ UINT16 Characteristics;
+} __attribute__((packed));
+
+struct PeHeader {
+ UINT8 Magic[4];
+ struct PeFileHeader FileHeader;
+} __attribute__((packed));
+
+struct PeSectionHeader {
+ UINT8 Name[8];
+ UINT32 VirtualSize;
+ UINT32 VirtualAddress;
+ UINT32 SizeOfRawData;
+ UINT32 PointerToRawData;
+ UINT32 PointerToRelocations;
+ UINT32 PointerToLinenumbers;
+ UINT16 NumberOfRelocations;
+ UINT16 NumberOfLinenumbers;
+ UINT32 Characteristics;
+} __attribute__((packed));
+
+EFI_STATUS pe_memory_locate_sections(CHAR8 *base, CHAR8 **sections, UINTN *addrs, UINTN *offsets, UINTN *sizes) {
+ struct DosFileHeader *dos;
+ struct PeHeader *pe;
+ UINTN i;
+ UINTN offset;
+
+ dos = (struct DosFileHeader *)base;
+
+ if (CompareMem(dos->Magic, "MZ", 2) != 0)
+ return EFI_LOAD_ERROR;
+
+ pe = (struct PeHeader *)&base[dos->ExeHeader];
+ if (CompareMem(pe->Magic, "PE\0\0", 4) != 0)
+ return EFI_LOAD_ERROR;
+
+ /* PE32+ Subsystem type */
+ if (pe->FileHeader.Machine != PE_HEADER_MACHINE_X64 &&
+ pe->FileHeader.Machine != PE_HEADER_MACHINE_ARM64 &&
+ pe->FileHeader.Machine != PE_HEADER_MACHINE_I386)
+ return EFI_LOAD_ERROR;
+
+ if (pe->FileHeader.NumberOfSections > 96)
+ return EFI_LOAD_ERROR;
+
+ offset = dos->ExeHeader + sizeof(*pe) + pe->FileHeader.SizeOfOptionalHeader;
+
+ for (i = 0; i < pe->FileHeader.NumberOfSections; i++) {
+ struct PeSectionHeader *sect;
+ UINTN j;
+
+ sect = (struct PeSectionHeader *)&base[offset];
+ for (j = 0; sections[j]; j++) {
+ if (CompareMem(sect->Name, sections[j], strlena(sections[j])) != 0)
+ continue;
+
+ if (addrs)
+ addrs[j] = (UINTN)sect->VirtualAddress;
+ if (offsets)
+ offsets[j] = (UINTN)sect->PointerToRawData;
+ if (sizes)
+ sizes[j] = (UINTN)sect->VirtualSize;
+ }
+ offset += sizeof(*sect);
+ }
+
+ return EFI_SUCCESS;
+}
+
+EFI_STATUS pe_file_locate_sections(EFI_FILE *dir, CHAR16 *path, CHAR8 **sections, UINTN *addrs, UINTN *offsets, UINTN *sizes) {
+ EFI_FILE_HANDLE handle;
+ struct DosFileHeader dos;
+ struct PeHeader pe;
+ UINTN len;
+ UINTN headerlen;
+ EFI_STATUS err;
+ _cleanup_freepool_ CHAR8 *header = NULL;
+
+ err = uefi_call_wrapper(dir->Open, 5, dir, &handle, path, EFI_FILE_MODE_READ, 0ULL);
+ if (EFI_ERROR(err))
+ return err;
+
+ /* MS-DOS stub */
+ len = sizeof(dos);
+ err = uefi_call_wrapper(handle->Read, 3, handle, &len, &dos);
+ if (EFI_ERROR(err))
+ goto out;
+ if (len != sizeof(dos)) {
+ err = EFI_LOAD_ERROR;
+ goto out;
+ }
+
+ err = uefi_call_wrapper(handle->SetPosition, 2, handle, dos.ExeHeader);
+ if (EFI_ERROR(err))
+ goto out;
+
+ len = sizeof(pe);
+ err = uefi_call_wrapper(handle->Read, 3, handle, &len, &pe);
+ if (EFI_ERROR(err))
+ goto out;
+ if (len != sizeof(pe)) {
+ err = EFI_LOAD_ERROR;
+ goto out;
+ }
+
+ headerlen = sizeof(dos) + sizeof(pe) + pe.FileHeader.SizeOfOptionalHeader + pe.FileHeader.NumberOfSections * sizeof(struct PeSectionHeader);
+ header = AllocatePool(headerlen);
+ if (!header) {
+ err = EFI_OUT_OF_RESOURCES;
+ goto out;
+ }
+ len = headerlen;
+ err = uefi_call_wrapper(handle->SetPosition, 2, handle, 0);
+ if (EFI_ERROR(err))
+ goto out;
+
+ err = uefi_call_wrapper(handle->Read, 3, handle, &len, header);
+ if (EFI_ERROR(err))
+ goto out;
+
+ if (len != headerlen) {
+ err = EFI_LOAD_ERROR;
+ goto out;
+ }
+
+ err = pe_memory_locate_sections(header, sections, addrs, offsets, sizes);
+out:
+ uefi_call_wrapper(handle->Close, 1, handle);
+ return err;
+}
diff --git a/src/boot/efi/pe.h b/src/boot/efi/pe.h
new file mode 100644
index 0000000..3e97d43
--- /dev/null
+++ b/src/boot/efi/pe.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+EFI_STATUS pe_memory_locate_sections(CHAR8 *base,
+ CHAR8 **sections, UINTN *addrs, UINTN *offsets, UINTN *sizes);
+EFI_STATUS pe_file_locate_sections(EFI_FILE *dir, CHAR16 *path,
+ CHAR8 **sections, UINTN *addrs, UINTN *offsets, UINTN *sizes);
diff --git a/src/boot/efi/random-seed.c b/src/boot/efi/random-seed.c
new file mode 100644
index 0000000..eda9260
--- /dev/null
+++ b/src/boot/efi/random-seed.c
@@ -0,0 +1,328 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "missing_efi.h"
+#include "random-seed.h"
+#include "sha256.h"
+#include "util.h"
+#include "shim.h"
+
+#define RANDOM_MAX_SIZE_MIN (32U)
+#define RANDOM_MAX_SIZE_MAX (32U*1024U)
+
+static const EFI_GUID rng_protocol_guid = EFI_RNG_PROTOCOL_GUID;
+
+/* SHA256 gives us 256/8=32 bytes */
+#define HASH_VALUE_SIZE 32
+
+static EFI_STATUS acquire_rng(UINTN size, VOID **ret) {
+ _cleanup_freepool_ VOID *data = NULL;
+ EFI_RNG_PROTOCOL *rng;
+ EFI_STATUS err;
+
+ /* Try to acquire the specified number of bytes from the UEFI RNG */
+
+ err = LibLocateProtocol((EFI_GUID*) &rng_protocol_guid, (VOID**) &rng);
+ if (EFI_ERROR(err))
+ return err;
+ if (!rng)
+ return EFI_UNSUPPORTED;
+
+ data = AllocatePool(size);
+ if (!data)
+ return log_oom();
+
+ err = uefi_call_wrapper(rng->GetRNG, 3, rng, NULL, size, data);
+ if (EFI_ERROR(err)) {
+ Print(L"Failed to acquire RNG data: %r\n", err);
+ return err;
+ }
+
+ *ret = TAKE_PTR(data);
+ return EFI_SUCCESS;
+}
+
+static VOID hash_once(
+ const VOID *old_seed,
+ const VOID *rng,
+ UINTN size,
+ const VOID *system_token,
+ UINTN system_token_size,
+ UINTN counter,
+ UINT8 ret[static HASH_VALUE_SIZE]) {
+
+ /* This hashes together:
+ *
+ * 1. The contents of the old seed file
+ * 2. Some random data acquired from the UEFI RNG (optional)
+ * 3. Some 'system token' the installer installed as EFI variable (optional)
+ * 4. A counter value
+ *
+ * And writes the result to the specified buffer.
+ */
+
+ struct sha256_ctx hash;
+
+ sha256_init_ctx(&hash);
+ sha256_process_bytes(old_seed, size, &hash);
+ if (rng)
+ sha256_process_bytes(rng, size, &hash);
+ if (system_token_size > 0)
+ sha256_process_bytes(system_token, system_token_size, &hash);
+ sha256_process_bytes(&counter, sizeof(counter), &hash);
+ sha256_finish_ctx(&hash, ret);
+}
+
+static EFI_STATUS hash_many(
+ const VOID *old_seed,
+ const VOID *rng,
+ UINTN size,
+ const VOID *system_token,
+ UINTN system_token_size,
+ UINTN counter_start,
+ UINTN n,
+ VOID **ret) {
+
+ _cleanup_freepool_ VOID *output = NULL;
+ UINTN i;
+
+ /* Hashes the specified parameters in counter mode, generating n hash values, with the counter in the
+ * range counter_start…counter_start+n-1. */
+
+ output = AllocatePool(n * HASH_VALUE_SIZE);
+ if (!output)
+ return log_oom();
+
+ for (i = 0; i < n; i++)
+ hash_once(old_seed, rng, size,
+ system_token, system_token_size,
+ counter_start + i,
+ (UINT8*) output + (i * HASH_VALUE_SIZE));
+
+ *ret = TAKE_PTR(output);
+ return EFI_SUCCESS;
+}
+
+static EFI_STATUS mangle_random_seed(
+ const VOID *old_seed,
+ const VOID *rng,
+ UINTN size,
+ const VOID *system_token,
+ UINTN system_token_size,
+ VOID **ret_new_seed,
+ VOID **ret_for_kernel) {
+
+ _cleanup_freepool_ VOID *new_seed = NULL, *for_kernel = NULL;
+ EFI_STATUS err;
+ UINTN n;
+
+ /* This takes the old seed file contents, an (optional) random number acquired from the UEFI RNG, an
+ * (optional) system 'token' installed once by the OS installer in an EFI variable, and hashes them
+ * together in counter mode, generating a new seed (to replace the file on disk) and the seed for the
+ * kernel. To keep things simple, the new seed and kernel data have the same size as the old seed and
+ * RNG data. */
+
+ n = (size + HASH_VALUE_SIZE - 1) / HASH_VALUE_SIZE;
+
+ /* Begin hashing in counter mode at counter 0 for the new seed for the disk */
+ err = hash_many(old_seed, rng, size, system_token, system_token_size, 0, n, &new_seed);
+ if (EFI_ERROR(err))
+ return err;
+
+ /* Continue counting at 'n' for the seed for the kernel */
+ err = hash_many(old_seed, rng, size, system_token, system_token_size, n, n, &for_kernel);
+ if (EFI_ERROR(err))
+ return err;
+
+ *ret_new_seed = TAKE_PTR(new_seed);
+ *ret_for_kernel = TAKE_PTR(for_kernel);
+
+ return EFI_SUCCESS;
+}
+
+EFI_STATUS acquire_system_token(VOID **ret, UINTN *ret_size) {
+ _cleanup_freepool_ CHAR8 *data = NULL;
+ EFI_STATUS err;
+ UINTN size;
+
+ err = efivar_get_raw(&loader_guid, L"LoaderSystemToken", &data, &size);
+ if (EFI_ERROR(err)) {
+ if (err != EFI_NOT_FOUND)
+ Print(L"Failed to read LoaderSystemToken EFI variable: %r", err);
+ return err;
+ }
+
+ if (size <= 0) {
+ Print(L"System token too short, ignoring.");
+ return EFI_NOT_FOUND;
+ }
+
+ *ret = TAKE_PTR(data);
+ *ret_size = size;
+
+ return EFI_SUCCESS;
+}
+
+static VOID validate_sha256(void) {
+
+#ifndef __OPTIMIZE__
+ /* Let's validate our SHA256 implementation. We stole it from glibc, and converted it to UEFI
+ * style. We better check whether it does the right stuff. We use the simpler test vectors from the
+ * SHA spec. Note that we strip this out in optimization builds. */
+
+ static const struct {
+ const char *string;
+ uint8_t hash[HASH_VALUE_SIZE];
+ } array[] = {
+ { "abc",
+ { 0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea,
+ 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae, 0x22, 0x23,
+ 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c,
+ 0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad }},
+
+ { "",
+ { 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
+ 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
+ 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
+ 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55 }},
+
+ { "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
+ { 0x24, 0x8d, 0x6a, 0x61, 0xd2, 0x06, 0x38, 0xb8,
+ 0xe5, 0xc0, 0x26, 0x93, 0x0c, 0x3e, 0x60, 0x39,
+ 0xa3, 0x3c, 0xe4, 0x59, 0x64, 0xff, 0x21, 0x67,
+ 0xf6, 0xec, 0xed, 0xd4, 0x19, 0xdb, 0x06, 0xc1 }},
+
+ { "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu",
+ { 0xcf, 0x5b, 0x16, 0xa7, 0x78, 0xaf, 0x83, 0x80,
+ 0x03, 0x6c, 0xe5, 0x9e, 0x7b, 0x04, 0x92, 0x37,
+ 0x0b, 0x24, 0x9b, 0x11, 0xe8, 0xf0, 0x7a, 0x51,
+ 0xaf, 0xac, 0x45, 0x03, 0x7a, 0xfe, 0xe9, 0xd1 }},
+ };
+
+ UINTN i;
+
+ for (i = 0; i < ELEMENTSOF(array); i++) {
+ struct sha256_ctx hash;
+ uint8_t result[HASH_VALUE_SIZE];
+
+ sha256_init_ctx(&hash);
+ sha256_process_bytes(array[i].string, strlena((const CHAR8*) array[i].string), &hash);
+ sha256_finish_ctx(&hash, result);
+
+ if (CompareMem(result, array[i].hash, HASH_VALUE_SIZE) != 0) {
+ Print(L"SHA256 failed validation.\n");
+ uefi_call_wrapper(BS->Stall, 1, 120 * 1000 * 1000);
+ return;
+ }
+ }
+
+ Print(L"SHA256 validated\n");
+#endif
+}
+
+EFI_STATUS process_random_seed(EFI_FILE *root_dir, RandomSeedMode mode) {
+ _cleanup_freepool_ VOID *seed = NULL, *new_seed = NULL, *rng = NULL, *for_kernel = NULL, *system_token = NULL;
+ _cleanup_(FileHandleClosep) EFI_FILE_HANDLE handle = NULL;
+ UINTN size, rsize, wsize, system_token_size = 0;
+ _cleanup_freepool_ EFI_FILE_INFO *info = NULL;
+ EFI_STATUS err;
+
+ validate_sha256();
+
+ if (mode == RANDOM_SEED_OFF)
+ return EFI_NOT_FOUND;
+
+ /* Let's better be safe than sorry, and for now disable this logic in SecureBoot mode, so that we
+ * don't credit a random seed that is not authenticated. */
+ if (secure_boot_enabled())
+ return EFI_NOT_FOUND;
+
+ /* Get some system specific seed that the installer might have placed in an EFI variable. We include
+ * it in our hash. This is protection against golden master image sloppiness, and it remains on the
+ * system, even when disk images are duplicated or swapped out. */
+ err = acquire_system_token(&system_token, &system_token_size);
+ if (mode != RANDOM_SEED_ALWAYS && EFI_ERROR(err))
+ return err;
+
+ err = uefi_call_wrapper(root_dir->Open, 5, root_dir, &handle, L"\\loader\\random-seed", EFI_FILE_MODE_READ|EFI_FILE_MODE_WRITE, 0ULL);
+ if (EFI_ERROR(err)) {
+ if (err != EFI_NOT_FOUND)
+ Print(L"Failed to open random seed file: %r\n", err);
+ return err;
+ }
+
+ info = LibFileInfo(handle);
+ if (!info)
+ return log_oom();
+
+ size = info->FileSize;
+ if (size < RANDOM_MAX_SIZE_MIN) {
+ Print(L"Random seed file is too short?\n");
+ return EFI_INVALID_PARAMETER;
+ }
+
+ if (size > RANDOM_MAX_SIZE_MAX) {
+ Print(L"Random seed file is too large?\n");
+ return EFI_INVALID_PARAMETER;
+ }
+
+ seed = AllocatePool(size);
+ if (!seed)
+ return log_oom();
+
+ rsize = size;
+ err = uefi_call_wrapper(handle->Read, 3, handle, &rsize, seed);
+ if (EFI_ERROR(err)) {
+ Print(L"Failed to read random seed file: %r\n", err);
+ return err;
+ }
+ if (rsize != size) {
+ Print(L"Short read on random seed file\n");
+ return EFI_PROTOCOL_ERROR;
+ }
+
+ err = uefi_call_wrapper(handle->SetPosition, 2, handle, 0);
+ if (EFI_ERROR(err)) {
+ Print(L"Failed to seek to beginning of random seed file: %r\n", err);
+ return err;
+ }
+
+ /* Request some random data from the UEFI RNG. We don't need this to work safely, but it's a good
+ * idea to use it because it helps us for cases where users mistakenly include a random seed in
+ * golden master images that are replicated many times. */
+ (VOID) acquire_rng(size, &rng); /* It's fine if this fails */
+
+ /* Calculate new random seed for the disk and what to pass to the kernel */
+ err = mangle_random_seed(seed, rng, size, system_token, system_token_size, &new_seed, &for_kernel);
+ if (EFI_ERROR(err))
+ return err;
+
+ /* Update the random seed on disk before we use it */
+ wsize = size;
+ err = uefi_call_wrapper(handle->Write, 3, handle, &wsize, new_seed);
+ if (EFI_ERROR(err)) {
+ Print(L"Failed to write random seed file: %r\n", err);
+ return err;
+ }
+ if (wsize != size) {
+ Print(L"Short write on random seed file\n");
+ return EFI_PROTOCOL_ERROR;
+ }
+
+ err = uefi_call_wrapper(handle->Flush, 1, handle);
+ if (EFI_ERROR(err)) {
+ Print(L"Failed to flush random seed file: %r\n");
+ return err;
+ }
+
+ /* We are good to go */
+ err = efivar_set_raw(&loader_guid, L"LoaderRandomSeed", for_kernel, size, FALSE);
+ if (EFI_ERROR(err)) {
+ Print(L"Failed to write random seed to EFI variable: %r\n", err);
+ return err;
+ }
+
+ return EFI_SUCCESS;
+}
diff --git a/src/boot/efi/random-seed.h b/src/boot/efi/random-seed.h
new file mode 100644
index 0000000..0f443e6
--- /dev/null
+++ b/src/boot/efi/random-seed.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <efi.h>
+
+typedef enum RandomSeedMode {
+ RANDOM_SEED_OFF,
+ RANDOM_SEED_WITH_SYSTEM_TOKEN,
+ RANDOM_SEED_ALWAYS,
+ _RANDOM_SEED_MODE_MAX,
+ _RANDOM_SEED_MODE_INVALID = -1,
+} RandomSeedMode;
+
+EFI_STATUS process_random_seed(EFI_FILE *root_dir, RandomSeedMode mode);
diff --git a/src/boot/efi/sha256.c b/src/boot/efi/sha256.c
new file mode 100644
index 0000000..f23066d
--- /dev/null
+++ b/src/boot/efi/sha256.c
@@ -0,0 +1,277 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/* Stolen from glibc and converted to UEFI style. In glibc it comes with the following copyright blurb: */
+
+/* Functions to compute SHA256 message digest of files or memory blocks.
+ according to the definition of SHA256 in FIPS 180-2.
+ Copyright (C) 2007-2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Written by Ulrich Drepper <drepper@redhat.com>, 2007. */
+
+#include "sha256.h"
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+# define SWAP(n) \
+ (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
+# define SWAP64(n) \
+ (((n) << 56) \
+ | (((n) & 0xff00) << 40) \
+ | (((n) & 0xff0000) << 24) \
+ | (((n) & 0xff000000) << 8) \
+ | (((n) >> 8) & 0xff000000) \
+ | (((n) >> 24) & 0xff0000) \
+ | (((n) >> 40) & 0xff00) \
+ | ((n) >> 56))
+#else
+# define SWAP(n) (n)
+# define SWAP64(n) (n)
+#endif
+
+/* This array contains the bytes used to pad the buffer to the next
+ 64-byte boundary. (FIPS 180-2:5.1.1) */
+static const UINT8 fillbuf[64] = {
+ 0x80, 0 /* , 0, 0, ... */
+};
+
+/* Constants for SHA256 from FIPS 180-2:4.2.2. */
+static const UINT32 K[64] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+static void sha256_process_block(const void *, UINTN, struct sha256_ctx *);
+
+/* Initialize structure containing state of computation.
+ (FIPS 180-2:5.3.2) */
+void sha256_init_ctx(struct sha256_ctx *ctx) {
+ ctx->H[0] = 0x6a09e667;
+ ctx->H[1] = 0xbb67ae85;
+ ctx->H[2] = 0x3c6ef372;
+ ctx->H[3] = 0xa54ff53a;
+ ctx->H[4] = 0x510e527f;
+ ctx->H[5] = 0x9b05688c;
+ ctx->H[6] = 0x1f83d9ab;
+ ctx->H[7] = 0x5be0cd19;
+
+ ctx->total64 = 0;
+ ctx->buflen = 0;
+}
+
+/* Process the remaining bytes in the internal buffer and the usual
+ prolog according to the standard and write the result to RESBUF.
+
+ IMPORTANT: On some systems it is required that RESBUF is correctly
+ aligned for a 32 bits value. */
+void *sha256_finish_ctx(struct sha256_ctx *ctx, void *resbuf) {
+ /* Take yet unprocessed bytes into account. */
+ UINT32 bytes = ctx->buflen;
+ UINTN pad, i;
+
+ /* Now count remaining bytes. */
+ ctx->total64 += bytes;
+
+ pad = bytes >= 56 ? 64 + 56 - bytes : 56 - bytes;
+ CopyMem (&ctx->buffer[bytes], fillbuf, pad);
+
+ /* Put the 64-bit file length in *bits* at the end of the buffer. */
+ ctx->buffer32[(bytes + pad + 4) / 4] = SWAP (ctx->total[TOTAL64_low] << 3);
+ ctx->buffer32[(bytes + pad) / 4] = SWAP ((ctx->total[TOTAL64_high] << 3)
+ | (ctx->total[TOTAL64_low] >> 29));
+
+ /* Process last bytes. */
+ sha256_process_block (ctx->buffer, bytes + pad + 8, ctx);
+
+ /* Put result from CTX in first 32 bytes following RESBUF. */
+ for (i = 0; i < 8; ++i)
+ ((UINT32 *) resbuf)[i] = SWAP (ctx->H[i]);
+
+ return resbuf;
+}
+
+void sha256_process_bytes(const void *buffer, UINTN len, struct sha256_ctx *ctx) {
+ /* When we already have some bits in our internal buffer concatenate
+ both inputs first. */
+
+ if (ctx->buflen != 0) {
+ UINTN left_over = ctx->buflen;
+ UINTN add = 128 - left_over > len ? len : 128 - left_over;
+
+ CopyMem (&ctx->buffer[left_over], buffer, add);
+ ctx->buflen += add;
+
+ if (ctx->buflen > 64) {
+ sha256_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
+
+ ctx->buflen &= 63;
+ /* The regions in the following copy operation cannot overlap. */
+ CopyMem (ctx->buffer, &ctx->buffer[(left_over + add) & ~63],
+ ctx->buflen);
+ }
+
+ buffer = (const char *) buffer + add;
+ len -= add;
+ }
+
+ /* Process available complete blocks. */
+ if (len >= 64) {
+#if !_STRING_ARCH_unaligned
+/* To check alignment gcc has an appropriate operator. Other
+ compilers don't. */
+# if __GNUC__ >= 2
+# define UNALIGNED_P(p) (((UINTN) p) % __alignof__ (UINT32) != 0)
+# else
+# define UNALIGNED_P(p) (((UINTN) p) % sizeof (UINT32) != 0)
+# endif
+ if (UNALIGNED_P (buffer))
+ while (len > 64) {
+ CopyMem (ctx->buffer, buffer, 64);
+ sha256_process_block (ctx->buffer, 64, ctx);
+ buffer = (const char *) buffer + 64;
+ len -= 64;
+ }
+ else
+#endif
+ {
+ sha256_process_block (buffer, len & ~63, ctx);
+ buffer = (const char *) buffer + (len & ~63);
+ len &= 63;
+ }
+ }
+
+ /* Move remaining bytes into internal buffer. */
+ if (len > 0) {
+ UINTN left_over = ctx->buflen;
+
+ CopyMem (&ctx->buffer[left_over], buffer, len);
+ left_over += len;
+ if (left_over >= 64) {
+ sha256_process_block (ctx->buffer, 64, ctx);
+ left_over -= 64;
+ CopyMem (ctx->buffer, &ctx->buffer[64], left_over);
+ }
+ ctx->buflen = left_over;
+ }
+}
+
+
+/* Process LEN bytes of BUFFER, accumulating context into CTX.
+ It is assumed that LEN % 64 == 0. */
+static void sha256_process_block(const void *buffer, UINTN len, struct sha256_ctx *ctx) {
+ const UINT32 *words = buffer;
+ UINTN nwords = len / sizeof (UINT32);
+ UINT32 a = ctx->H[0];
+ UINT32 b = ctx->H[1];
+ UINT32 c = ctx->H[2];
+ UINT32 d = ctx->H[3];
+ UINT32 e = ctx->H[4];
+ UINT32 f = ctx->H[5];
+ UINT32 g = ctx->H[6];
+ UINT32 h = ctx->H[7];
+
+ /* First increment the byte count. FIPS 180-2 specifies the possible
+ length of the file up to 2^64 bits. Here we only compute the
+ number of bytes. */
+ ctx->total64 += len;
+
+ /* Process all bytes in the buffer with 64 bytes in each round of
+ the loop. */
+ while (nwords > 0) {
+ UINT32 W[64];
+ UINT32 a_save = a;
+ UINT32 b_save = b;
+ UINT32 c_save = c;
+ UINT32 d_save = d;
+ UINT32 e_save = e;
+ UINT32 f_save = f;
+ UINT32 g_save = g;
+ UINT32 h_save = h;
+ UINTN t;
+
+ /* Operators defined in FIPS 180-2:4.1.2. */
+#define Ch(x, y, z) ((x & y) ^ (~x & z))
+#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
+#define S0(x) (CYCLIC (x, 2) ^ CYCLIC (x, 13) ^ CYCLIC (x, 22))
+#define S1(x) (CYCLIC (x, 6) ^ CYCLIC (x, 11) ^ CYCLIC (x, 25))
+#define R0(x) (CYCLIC (x, 7) ^ CYCLIC (x, 18) ^ (x >> 3))
+#define R1(x) (CYCLIC (x, 17) ^ CYCLIC (x, 19) ^ (x >> 10))
+
+ /* It is unfortunate that C does not provide an operator for
+ cyclic rotation. Hope the C compiler is smart enough. */
+#define CYCLIC(w, s) ((w >> s) | (w << (32 - s)))
+
+ /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */
+ for (t = 0; t < 16; ++t) {
+ W[t] = SWAP (*words);
+ ++words;
+ }
+ for (t = 16; t < 64; ++t)
+ W[t] = R1 (W[t - 2]) + W[t - 7] + R0 (W[t - 15]) + W[t - 16];
+
+ /* The actual computation according to FIPS 180-2:6.2.2 step 3. */
+ for (t = 0; t < 64; ++t) {
+ UINT32 T1 = h + S1 (e) + Ch (e, f, g) + K[t] + W[t];
+ UINT32 T2 = S0 (a) + Maj (a, b, c);
+ h = g;
+ g = f;
+ f = e;
+ e = d + T1;
+ d = c;
+ c = b;
+ b = a;
+ a = T1 + T2;
+ }
+
+ /* Add the starting values of the context according to FIPS 180-2:6.2.2
+ step 4. */
+ a += a_save;
+ b += b_save;
+ c += c_save;
+ d += d_save;
+ e += e_save;
+ f += f_save;
+ g += g_save;
+ h += h_save;
+
+ /* Prepare for the next round. */
+ nwords -= 16;
+ }
+
+ /* Put checksum in context given as argument. */
+ ctx->H[0] = a;
+ ctx->H[1] = b;
+ ctx->H[2] = c;
+ ctx->H[3] = d;
+ ctx->H[4] = e;
+ ctx->H[5] = f;
+ ctx->H[6] = g;
+ ctx->H[7] = h;
+}
diff --git a/src/boot/efi/sha256.h b/src/boot/efi/sha256.h
new file mode 100644
index 0000000..464be59
--- /dev/null
+++ b/src/boot/efi/sha256.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <efi.h>
+#include <efilib.h>
+
+struct sha256_ctx {
+ UINT32 H[8];
+
+ union {
+ UINT64 total64;
+#define TOTAL64_low (1 - (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
+#define TOTAL64_high (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+ UINT32 total[2];
+ };
+
+ UINT32 buflen;
+
+ union {
+ UINT8 buffer[128]; /* NB: always correctly aligned for UINT32. */
+ UINT32 buffer32[32];
+ UINT64 buffer64[16];
+ };
+};
+
+void sha256_init_ctx(struct sha256_ctx *ctx);
+void *sha256_finish_ctx(struct sha256_ctx *ctx, VOID *resbuf);
+void sha256_process_bytes(const void *buffer, UINTN len, struct sha256_ctx *ctx);
diff --git a/src/boot/efi/shim.c b/src/boot/efi/shim.c
new file mode 100644
index 0000000..3dc1008
--- /dev/null
+++ b/src/boot/efi/shim.c
@@ -0,0 +1,210 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Port to systemd-boot
+ * Copyright © 2017 Max Resch <resch.max@gmail.com>
+ *
+ * Security Policy Handling
+ * Copyright © 2012 <James.Bottomley@HansenPartnership.com>
+ * https://github.com/mjg59/efitools
+ */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "util.h"
+#include "shim.h"
+
+#if defined(__x86_64__) || defined(__i386__)
+#define __sysv_abi__ __attribute__((sysv_abi))
+#else
+#define __sysv_abi__
+#endif
+
+struct ShimLock {
+ EFI_STATUS __sysv_abi__ (*shim_verify) (VOID *buffer, UINT32 size);
+
+ /* context is actually a struct for the PE header, but it isn't needed so void is sufficient just do define the interface
+ * see shim.c/shim.h and PeHeader.h in the github shim repo */
+ EFI_STATUS __sysv_abi__ (*generate_hash) (VOID *data, UINT32 datasize, VOID *context, UINT8 *sha256hash, UINT8 *sha1hash);
+
+ EFI_STATUS __sysv_abi__ (*read_header) (VOID *data, UINT32 datasize, VOID *context);
+};
+
+static const EFI_GUID simple_fs_guid = SIMPLE_FILE_SYSTEM_PROTOCOL;
+static const EFI_GUID global_guid = EFI_GLOBAL_VARIABLE;
+
+static const EFI_GUID security_protocol_guid = { 0xa46423e3, 0x4617, 0x49f1, {0xb9, 0xff, 0xd1, 0xbf, 0xa9, 0x11, 0x58, 0x39 } };
+static const EFI_GUID security2_protocol_guid = { 0x94ab2f58, 0x1438, 0x4ef1, {0x91, 0x52, 0x18, 0x94, 0x1a, 0x3a, 0x0e, 0x68 } };
+static const EFI_GUID shim_lock_guid = { 0x605dab50, 0xe046, 0x4300, {0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23} };
+
+BOOLEAN shim_loaded(void) {
+ struct ShimLock *shim_lock;
+
+ return uefi_call_wrapper(BS->LocateProtocol, 3, (EFI_GUID*) &shim_lock_guid, NULL, (VOID**) &shim_lock) == EFI_SUCCESS;
+}
+
+static BOOLEAN shim_validate(VOID *data, UINT32 size) {
+ struct ShimLock *shim_lock;
+
+ if (!data)
+ return FALSE;
+
+ if (uefi_call_wrapper(BS->LocateProtocol, 3, (EFI_GUID*) &shim_lock_guid, NULL, (VOID**) &shim_lock) != EFI_SUCCESS)
+ return FALSE;
+
+ if (!shim_lock)
+ return FALSE;
+
+ return shim_lock->shim_verify(data, size) == EFI_SUCCESS;
+}
+
+BOOLEAN secure_boot_enabled(void) {
+ _cleanup_freepool_ CHAR8 *b = NULL;
+ UINTN size;
+
+ if (efivar_get_raw(&global_guid, L"SecureBoot", &b, &size) == EFI_SUCCESS)
+ return *b > 0;
+
+ return FALSE;
+}
+
+/*
+ * See the UEFI Platform Initialization manual (Vol2: DXE) for this
+ */
+struct _EFI_SECURITY2_PROTOCOL;
+struct _EFI_SECURITY_PROTOCOL;
+struct _EFI_DEVICE_PATH_PROTOCOL;
+
+typedef struct _EFI_SECURITY2_PROTOCOL EFI_SECURITY2_PROTOCOL;
+typedef struct _EFI_SECURITY_PROTOCOL EFI_SECURITY_PROTOCOL;
+typedef struct _EFI_DEVICE_PATH_PROTOCOL EFI_DEVICE_PATH_PROTOCOL;
+
+typedef EFI_STATUS (EFIAPI *EFI_SECURITY_FILE_AUTHENTICATION_STATE) (
+ const EFI_SECURITY_PROTOCOL *This,
+ UINT32 AuthenticationStatus,
+ const EFI_DEVICE_PATH_PROTOCOL *File
+);
+
+typedef EFI_STATUS (EFIAPI *EFI_SECURITY2_FILE_AUTHENTICATION) (
+ const EFI_SECURITY2_PROTOCOL *This,
+ const EFI_DEVICE_PATH_PROTOCOL *DevicePath,
+ VOID *FileBuffer,
+ UINTN FileSize,
+ BOOLEAN BootPolicy
+);
+
+struct _EFI_SECURITY2_PROTOCOL {
+ EFI_SECURITY2_FILE_AUTHENTICATION FileAuthentication;
+};
+
+struct _EFI_SECURITY_PROTOCOL {
+ EFI_SECURITY_FILE_AUTHENTICATION_STATE FileAuthenticationState;
+};
+
+/* Handle to the original authenticator for security1 protocol */
+static EFI_SECURITY_FILE_AUTHENTICATION_STATE esfas = NULL;
+
+/* Handle to the original authenticator for security2 protocol */
+static EFI_SECURITY2_FILE_AUTHENTICATION es2fa = NULL;
+
+/*
+ * Perform shim/MOK and Secure Boot authentication on a binary that's already been
+ * loaded into memory. This function does the platform SB authentication first
+ * but preserves its return value in case of its failure, so that it can be
+ * returned in case of a shim/MOK authentication failure. This is done because
+ * the SB failure code seems to vary from one implementation to another, and I
+ * don't want to interfere with that at this time.
+ */
+static EFIAPI EFI_STATUS security2_policy_authentication (const EFI_SECURITY2_PROTOCOL *this,
+ const EFI_DEVICE_PATH_PROTOCOL *device_path,
+ VOID *file_buffer, UINTN file_size, BOOLEAN boot_policy) {
+ EFI_STATUS status;
+
+ /* Chain original security policy */
+ status = uefi_call_wrapper(es2fa, 5, this, device_path, file_buffer, file_size, boot_policy);
+
+ /* if OK, don't bother with MOK check */
+ if (status == EFI_SUCCESS)
+ return status;
+
+ if (shim_validate(file_buffer, file_size))
+ return EFI_SUCCESS;
+
+ return status;
+}
+
+/*
+ * Perform both shim/MOK and platform Secure Boot authentication. This function loads
+ * the file and performs shim/MOK authentication first simply to avoid double loads
+ * of Linux kernels, which are much more likely to be shim/MOK-signed than platform-signed,
+ * since kernels are big and can take several seconds to load on some computers and
+ * filesystems. This also has the effect of returning whatever the platform code is for
+ * authentication failure, be it EFI_ACCESS_DENIED, EFI_SECURITY_VIOLATION, or something
+ * else. (This seems to vary between implementations.)
+ */
+static EFIAPI EFI_STATUS security_policy_authentication (const EFI_SECURITY_PROTOCOL *this, UINT32 authentication_status,
+ const EFI_DEVICE_PATH_PROTOCOL *device_path_const) {
+ EFI_STATUS status;
+ _cleanup_freepool_ EFI_DEVICE_PATH *dev_path = NULL;
+ _cleanup_freepool_ CHAR16 *dev_path_str = NULL;
+ EFI_HANDLE h;
+ EFI_FILE *root;
+ _cleanup_freepool_ CHAR8 *file_buffer = NULL;
+ UINTN file_size;
+
+ if (!device_path_const)
+ return EFI_INVALID_PARAMETER;
+
+ dev_path = DuplicateDevicePath((EFI_DEVICE_PATH*) device_path_const);
+
+ status = uefi_call_wrapper(BS->LocateDevicePath, 3, (EFI_GUID*) &simple_fs_guid, &dev_path, &h);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ /* No need to check return value, this already happened in efi_main() */
+ root = LibOpenRoot(h);
+ dev_path_str = DevicePathToStr(dev_path);
+
+ status = file_read(root, dev_path_str, 0, 0, &file_buffer, &file_size);
+ if (EFI_ERROR(status))
+ return status;
+ uefi_call_wrapper(root->Close, 1, root);
+
+ if (shim_validate(file_buffer, file_size))
+ return EFI_SUCCESS;
+
+ /* Try using the platform's native policy.... */
+ return uefi_call_wrapper(esfas, 3, this, authentication_status, device_path_const);
+}
+
+EFI_STATUS security_policy_install(void) {
+ EFI_SECURITY_PROTOCOL *security_protocol;
+ EFI_SECURITY2_PROTOCOL *security2_protocol = NULL;
+ EFI_STATUS status;
+
+ /* Already Installed */
+ if (esfas)
+ return EFI_ALREADY_STARTED;
+
+ /*
+ * Don't bother with status here. The call is allowed
+ * to fail, since SECURITY2 was introduced in PI 1.2.1.
+ * Use security2_protocol == NULL as indicator.
+ */
+ uefi_call_wrapper(BS->LocateProtocol, 3, (EFI_GUID*) &security2_protocol_guid, NULL, (VOID**) &security2_protocol);
+
+ status = uefi_call_wrapper(BS->LocateProtocol, 3, (EFI_GUID*) &security_protocol_guid, NULL, (VOID**) &security_protocol);
+ /* This one is mandatory, so there's a serious problem */
+ if (status != EFI_SUCCESS)
+ return status;
+
+ esfas = security_protocol->FileAuthenticationState;
+ security_protocol->FileAuthenticationState = security_policy_authentication;
+
+ if (security2_protocol) {
+ es2fa = security2_protocol->FileAuthentication;
+ security2_protocol->FileAuthentication = security2_policy_authentication;
+ }
+
+ return EFI_SUCCESS;
+}
diff --git a/src/boot/efi/shim.h b/src/boot/efi/shim.h
new file mode 100644
index 0000000..72ecf2e
--- /dev/null
+++ b/src/boot/efi/shim.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Port to systemd-boot
+ * Copyright © 2017 Max Resch <resch.max@gmail.com>
+ *
+ * Security Policy Handling
+ * Copyright © 2012 <James.Bottomley@HansenPartnership.com>
+ * https://github.com/mjg59/efitools
+ */
+#pragma once
+
+BOOLEAN shim_loaded(void);
+
+BOOLEAN secure_boot_enabled(void);
+
+EFI_STATUS security_policy_install(void);
diff --git a/src/boot/efi/splash.c b/src/boot/efi/splash.c
new file mode 100644
index 0000000..e166fec
--- /dev/null
+++ b/src/boot/efi/splash.c
@@ -0,0 +1,305 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "graphics.h"
+#include "splash.h"
+#include "util.h"
+
+struct bmp_file {
+ CHAR8 signature[2];
+ UINT32 size;
+ UINT16 reserved[2];
+ UINT32 offset;
+} __attribute__((packed));
+
+/* we require at least BITMAPINFOHEADER, later versions are
+ accepted, but their features ignored */
+struct bmp_dib {
+ UINT32 size;
+ UINT32 x;
+ UINT32 y;
+ UINT16 planes;
+ UINT16 depth;
+ UINT32 compression;
+ UINT32 image_size;
+ INT32 x_pixel_meter;
+ INT32 y_pixel_meter;
+ UINT32 colors_used;
+ UINT32 colors_important;
+} __attribute__((packed));
+
+struct bmp_map {
+ UINT8 blue;
+ UINT8 green;
+ UINT8 red;
+ UINT8 reserved;
+} __attribute__((packed));
+
+EFI_STATUS bmp_parse_header(UINT8 *bmp, UINTN size, struct bmp_dib **ret_dib,
+ struct bmp_map **ret_map, UINT8 **pixmap) {
+ struct bmp_file *file;
+ struct bmp_dib *dib;
+ struct bmp_map *map;
+ UINTN row_size;
+
+ if (size < sizeof(struct bmp_file) + sizeof(struct bmp_dib))
+ return EFI_INVALID_PARAMETER;
+
+ /* check file header */
+ file = (struct bmp_file *)bmp;
+ if (file->signature[0] != 'B' || file->signature[1] != 'M')
+ return EFI_INVALID_PARAMETER;
+ if (file->size != size)
+ return EFI_INVALID_PARAMETER;
+ if (file->size < file->offset)
+ return EFI_INVALID_PARAMETER;
+
+ /* check device-independent bitmap */
+ dib = (struct bmp_dib *)(bmp + sizeof(struct bmp_file));
+ if (dib->size < sizeof(struct bmp_dib))
+ return EFI_UNSUPPORTED;
+
+ switch (dib->depth) {
+ case 1:
+ case 4:
+ case 8:
+ case 24:
+ if (dib->compression != 0)
+ return EFI_UNSUPPORTED;
+
+ break;
+
+ case 16:
+ case 32:
+ if (dib->compression != 0 && dib->compression != 3)
+ return EFI_UNSUPPORTED;
+
+ break;
+
+ default:
+ return EFI_UNSUPPORTED;
+ }
+
+ row_size = ((UINTN) dib->depth * dib->x + 31) / 32 * 4;
+ if (file->size - file->offset < dib->y * row_size)
+ return EFI_INVALID_PARAMETER;
+ if (row_size * dib->y > 64 * 1024 * 1024)
+ return EFI_INVALID_PARAMETER;
+
+ /* check color table */
+ map = (struct bmp_map *)(bmp + sizeof(struct bmp_file) + dib->size);
+ if (file->offset < sizeof(struct bmp_file) + dib->size)
+ return EFI_INVALID_PARAMETER;
+
+ if (file->offset > sizeof(struct bmp_file) + dib->size) {
+ UINT32 map_count;
+ UINTN map_size;
+
+ if (dib->colors_used)
+ map_count = dib->colors_used;
+ else {
+ switch (dib->depth) {
+ case 1:
+ case 4:
+ case 8:
+ map_count = 1 << dib->depth;
+ break;
+
+ default:
+ map_count = 0;
+ break;
+ }
+ }
+
+ map_size = file->offset - (sizeof(struct bmp_file) + dib->size);
+ if (map_size != sizeof(struct bmp_map) * map_count)
+ return EFI_INVALID_PARAMETER;
+ }
+
+ *ret_map = map;
+ *ret_dib = dib;
+ *pixmap = bmp + file->offset;
+
+ return EFI_SUCCESS;
+}
+
+static VOID pixel_blend(UINT32 *dst, const UINT32 source) {
+ UINT32 alpha, src, src_rb, src_g, dst_rb, dst_g, rb, g;
+
+ alpha = (source & 0xff);
+
+ /* convert src from RGBA to XRGB */
+ src = source >> 8;
+
+ /* decompose into RB and G components */
+ src_rb = (src & 0xff00ff);
+ src_g = (src & 0x00ff00);
+
+ dst_rb = (*dst & 0xff00ff);
+ dst_g = (*dst & 0x00ff00);
+
+ /* blend */
+ rb = ((((src_rb - dst_rb) * alpha + 0x800080) >> 8) + dst_rb) & 0xff00ff;
+ g = ((((src_g - dst_g) * alpha + 0x008000) >> 8) + dst_g) & 0x00ff00;
+
+ *dst = (rb | g);
+}
+
+EFI_STATUS bmp_to_blt(EFI_GRAPHICS_OUTPUT_BLT_PIXEL *buf,
+ struct bmp_dib *dib, struct bmp_map *map,
+ UINT8 *pixmap) {
+ UINT8 *in;
+ UINTN y;
+
+ /* transform and copy pixels */
+ in = pixmap;
+ for (y = 0; y < dib->y; y++) {
+ EFI_GRAPHICS_OUTPUT_BLT_PIXEL *out;
+ UINTN row_size;
+ UINTN x;
+
+ out = &buf[(dib->y - y - 1) * dib->x];
+ for (x = 0; x < dib->x; x++, in++, out++) {
+ switch (dib->depth) {
+ case 1: {
+ UINTN i;
+
+ for (i = 0; i < 8 && x < dib->x; i++) {
+ out->Red = map[((*in) >> (7 - i)) & 1].red;
+ out->Green = map[((*in) >> (7 - i)) & 1].green;
+ out->Blue = map[((*in) >> (7 - i)) & 1].blue;
+ out++;
+ x++;
+ }
+ out--;
+ x--;
+ break;
+ }
+
+ case 4: {
+ UINTN i;
+
+ i = (*in) >> 4;
+ out->Red = map[i].red;
+ out->Green = map[i].green;
+ out->Blue = map[i].blue;
+ if (x < (dib->x - 1)) {
+ out++;
+ x++;
+ i = (*in) & 0x0f;
+ out->Red = map[i].red;
+ out->Green = map[i].green;
+ out->Blue = map[i].blue;
+ }
+ break;
+ }
+
+ case 8:
+ out->Red = map[*in].red;
+ out->Green = map[*in].green;
+ out->Blue = map[*in].blue;
+ break;
+
+ case 16: {
+ UINT16 i = *(UINT16 *) in;
+
+ out->Red = (i & 0x7c00) >> 7;
+ out->Green = (i & 0x3e0) >> 2;
+ out->Blue = (i & 0x1f) << 3;
+ in += 1;
+ break;
+ }
+
+ case 24:
+ out->Red = in[2];
+ out->Green = in[1];
+ out->Blue = in[0];
+ in += 2;
+ break;
+
+ case 32: {
+ UINT32 i = *(UINT32 *) in;
+
+ pixel_blend((UINT32 *)out, i);
+
+ in += 3;
+ break;
+ }
+ }
+ }
+
+ /* add row padding; new lines always start at 32 bit boundary */
+ row_size = in - pixmap;
+ in += ((row_size + 3) & ~3) - row_size;
+ }
+
+ return EFI_SUCCESS;
+}
+
+EFI_STATUS graphics_splash(UINT8 *content, UINTN len, const EFI_GRAPHICS_OUTPUT_BLT_PIXEL *background) {
+ EFI_GRAPHICS_OUTPUT_BLT_PIXEL pixel = {};
+ EFI_GUID GraphicsOutputProtocolGuid = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
+ EFI_GRAPHICS_OUTPUT_PROTOCOL *GraphicsOutput = NULL;
+ struct bmp_dib *dib;
+ struct bmp_map *map;
+ UINT8 *pixmap;
+ UINT64 blt_size;
+ _cleanup_freepool_ VOID *blt = NULL;
+ UINTN x_pos = 0;
+ UINTN y_pos = 0;
+ EFI_STATUS err;
+
+ if (!background) {
+ if (StriCmp(L"Apple", ST->FirmwareVendor) == 0) {
+ pixel.Red = 0xc0;
+ pixel.Green = 0xc0;
+ pixel.Blue = 0xc0;
+ }
+ background = &pixel;
+ }
+
+ err = LibLocateProtocol(&GraphicsOutputProtocolGuid, (VOID **)&GraphicsOutput);
+ if (EFI_ERROR(err))
+ return err;
+
+ err = bmp_parse_header(content, len, &dib, &map, &pixmap);
+ if (EFI_ERROR(err))
+ return err;
+
+ if (dib->x < GraphicsOutput->Mode->Info->HorizontalResolution)
+ x_pos = (GraphicsOutput->Mode->Info->HorizontalResolution - dib->x) / 2;
+ if (dib->y < GraphicsOutput->Mode->Info->VerticalResolution)
+ y_pos = (GraphicsOutput->Mode->Info->VerticalResolution - dib->y) / 2;
+
+ uefi_call_wrapper(GraphicsOutput->Blt, 10, GraphicsOutput,
+ (EFI_GRAPHICS_OUTPUT_BLT_PIXEL *)background,
+ EfiBltVideoFill, 0, 0, 0, 0,
+ GraphicsOutput->Mode->Info->HorizontalResolution,
+ GraphicsOutput->Mode->Info->VerticalResolution, 0);
+
+ /* EFI buffer */
+ blt_size = sizeof(EFI_GRAPHICS_OUTPUT_BLT_PIXEL) * dib->x * dib->y;
+ blt = AllocatePool(blt_size);
+ if (!blt)
+ return EFI_OUT_OF_RESOURCES;
+
+ err = uefi_call_wrapper(GraphicsOutput->Blt, 10, GraphicsOutput,
+ blt, EfiBltVideoToBltBuffer, x_pos, y_pos, 0, 0,
+ dib->x, dib->y, 0);
+ if (EFI_ERROR(err))
+ return err;
+
+ err = bmp_to_blt(blt, dib, map, pixmap);
+ if (EFI_ERROR(err))
+ return err;
+
+ err = graphics_mode(TRUE);
+ if (EFI_ERROR(err))
+ return err;
+
+ return uefi_call_wrapper(GraphicsOutput->Blt, 10, GraphicsOutput,
+ blt, EfiBltBufferToVideo, 0, 0, x_pos, y_pos,
+ dib->x, dib->y, 0);
+}
diff --git a/src/boot/efi/splash.h b/src/boot/efi/splash.h
new file mode 100644
index 0000000..0ba45a0
--- /dev/null
+++ b/src/boot/efi/splash.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+EFI_STATUS graphics_splash(UINT8 *content, UINTN len, const EFI_GRAPHICS_OUTPUT_BLT_PIXEL *background);
diff --git a/src/boot/efi/stub.c b/src/boot/efi/stub.c
new file mode 100644
index 0000000..a09f47c
--- /dev/null
+++ b/src/boot/efi/stub.c
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "disk.h"
+#include "graphics.h"
+#include "linux.h"
+#include "measure.h"
+#include "pe.h"
+#include "splash.h"
+#include "util.h"
+
+/* magic string to find in the binary image */
+static const char __attribute__((used)) magic[] = "#### LoaderInfo: systemd-stub " GIT_VERSION " ####";
+
+static const EFI_GUID global_guid = EFI_GLOBAL_VARIABLE;
+
+EFI_STATUS efi_main(EFI_HANDLE image, EFI_SYSTEM_TABLE *sys_table) {
+ EFI_LOADED_IMAGE *loaded_image;
+ _cleanup_freepool_ CHAR8 *b = NULL;
+ UINTN size;
+ BOOLEAN secure = FALSE;
+ CHAR8 *sections[] = {
+ (CHAR8 *)".cmdline",
+ (CHAR8 *)".linux",
+ (CHAR8 *)".initrd",
+ (CHAR8 *)".splash",
+ NULL
+ };
+ UINTN addrs[ELEMENTSOF(sections)-1] = {};
+ UINTN offs[ELEMENTSOF(sections)-1] = {};
+ UINTN szs[ELEMENTSOF(sections)-1] = {};
+ CHAR8 *cmdline = NULL;
+ UINTN cmdline_len;
+ CHAR16 uuid[37];
+ EFI_STATUS err;
+
+ InitializeLib(image, sys_table);
+
+ err = uefi_call_wrapper(BS->OpenProtocol, 6, image, &LoadedImageProtocol, (VOID **)&loaded_image,
+ image, NULL, EFI_OPEN_PROTOCOL_GET_PROTOCOL);
+ if (EFI_ERROR(err)) {
+ Print(L"Error getting a LoadedImageProtocol handle: %r ", err);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return err;
+ }
+
+ if (efivar_get_raw(&global_guid, L"SecureBoot", &b, &size) == EFI_SUCCESS)
+ if (*b > 0)
+ secure = TRUE;
+
+ err = pe_memory_locate_sections(loaded_image->ImageBase, sections, addrs, offs, szs);
+ if (EFI_ERROR(err)) {
+ Print(L"Unable to locate embedded .linux section: %r ", err);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return err;
+ }
+
+ if (szs[0] > 0)
+ cmdline = (CHAR8 *)(loaded_image->ImageBase + addrs[0]);
+
+ cmdline_len = szs[0];
+
+ /* if we are not in secure boot mode, or none was provided, accept a custom command line and replace the built-in one */
+ if ((!secure || cmdline_len == 0) && loaded_image->LoadOptionsSize > 0 && *(CHAR16 *)loaded_image->LoadOptions > 0x1F) {
+ CHAR16 *options;
+ CHAR8 *line;
+ UINTN i;
+
+ options = (CHAR16 *)loaded_image->LoadOptions;
+ cmdline_len = (loaded_image->LoadOptionsSize / sizeof(CHAR16)) * sizeof(CHAR8);
+ line = AllocatePool(cmdline_len);
+ for (i = 0; i < cmdline_len; i++)
+ line[i] = options[i];
+ cmdline = line;
+
+#if ENABLE_TPM
+ /* Try to log any options to the TPM, especially manually edited options */
+ err = tpm_log_event(SD_TPM_PCR,
+ (EFI_PHYSICAL_ADDRESS) (UINTN) loaded_image->LoadOptions,
+ loaded_image->LoadOptionsSize, loaded_image->LoadOptions);
+ if (EFI_ERROR(err)) {
+ Print(L"Unable to add image options measurement: %r", err);
+ uefi_call_wrapper(BS->Stall, 1, 200 * 1000);
+ }
+#endif
+ }
+
+ /* Export the device path this image is started from, if it's not set yet */
+ if (efivar_get_raw(&loader_guid, L"LoaderDevicePartUUID", NULL, NULL) != EFI_SUCCESS)
+ if (disk_get_part_uuid(loaded_image->DeviceHandle, uuid) == EFI_SUCCESS)
+ efivar_set(L"LoaderDevicePartUUID", uuid, FALSE);
+
+ /* if LoaderImageIdentifier is not set, assume the image with this stub was loaded directly from UEFI */
+ if (efivar_get_raw(&loader_guid, L"LoaderImageIdentifier", NULL, NULL) != EFI_SUCCESS) {
+ _cleanup_freepool_ CHAR16 *s;
+
+ s = DevicePathToStr(loaded_image->FilePath);
+ efivar_set(L"LoaderImageIdentifier", s, FALSE);
+ }
+
+ /* if LoaderFirmwareInfo is not set, let's set it */
+ if (efivar_get_raw(&loader_guid, L"LoaderFirmwareInfo", NULL, NULL) != EFI_SUCCESS) {
+ _cleanup_freepool_ CHAR16 *s;
+
+ s = PoolPrint(L"%s %d.%02d", ST->FirmwareVendor, ST->FirmwareRevision >> 16, ST->FirmwareRevision & 0xffff);
+ efivar_set(L"LoaderFirmwareInfo", s, FALSE);
+ }
+
+ /* ditto for LoaderFirmwareType */
+ if (efivar_get_raw(&loader_guid, L"LoaderFirmwareType", NULL, NULL) != EFI_SUCCESS) {
+ _cleanup_freepool_ CHAR16 *s;
+
+ s = PoolPrint(L"UEFI %d.%02d", ST->Hdr.Revision >> 16, ST->Hdr.Revision & 0xffff);
+ efivar_set(L"LoaderFirmwareType", s, FALSE);
+ }
+
+ /* add StubInfo */
+ if (efivar_get_raw(&loader_guid, L"StubInfo", NULL, NULL) != EFI_SUCCESS)
+ efivar_set(L"StubInfo", L"systemd-stub " GIT_VERSION, FALSE);
+
+ if (szs[3] > 0)
+ graphics_splash((UINT8 *)((UINTN)loaded_image->ImageBase + addrs[3]), szs[3], NULL);
+
+ err = linux_exec(image, cmdline, cmdline_len,
+ (UINTN)loaded_image->ImageBase + addrs[1],
+ (UINTN)loaded_image->ImageBase + addrs[2], szs[2]);
+
+ graphics_mode(FALSE);
+ Print(L"Execution of embedded linux image failed: %r\n", err);
+ uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return err;
+}
diff --git a/src/boot/efi/util.c b/src/boot/efi/util.c
new file mode 100644
index 0000000..2712c2d
--- /dev/null
+++ b/src/boot/efi/util.c
@@ -0,0 +1,358 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <efi.h>
+#include <efilib.h>
+
+#include "util.h"
+
+/*
+ * Allocated random UUID, intended to be shared across tools that implement
+ * the (ESP)\loader\entries\<vendor>-<revision>.conf convention and the
+ * associated EFI variables.
+ */
+const EFI_GUID loader_guid = { 0x4a67b082, 0x0a4c, 0x41cf, {0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f} };
+
+#ifdef __x86_64__
+UINT64 ticks_read(VOID) {
+ UINT64 a, d;
+ __asm__ volatile ("rdtsc" : "=a" (a), "=d" (d));
+ return (d << 32) | a;
+}
+#elif defined(__i386__)
+UINT64 ticks_read(VOID) {
+ UINT64 val;
+ __asm__ volatile ("rdtsc" : "=A" (val));
+ return val;
+}
+#else
+UINT64 ticks_read(VOID) {
+ UINT64 val = 1;
+ return val;
+}
+#endif
+
+/* count TSC ticks during a millisecond delay */
+UINT64 ticks_freq(VOID) {
+ UINT64 ticks_start, ticks_end;
+
+ ticks_start = ticks_read();
+ uefi_call_wrapper(BS->Stall, 1, 1000);
+ ticks_end = ticks_read();
+
+ return (ticks_end - ticks_start) * 1000UL;
+}
+
+UINT64 time_usec(VOID) {
+ UINT64 ticks;
+ static UINT64 freq;
+
+ ticks = ticks_read();
+ if (ticks == 0)
+ return 0;
+
+ if (freq == 0) {
+ freq = ticks_freq();
+ if (freq == 0)
+ return 0;
+ }
+
+ return 1000UL * 1000UL * ticks / freq;
+}
+
+EFI_STATUS parse_boolean(const CHAR8 *v, BOOLEAN *b) {
+ if (!v)
+ return EFI_INVALID_PARAMETER;
+
+ if (strcmpa(v, (CHAR8 *)"1") == 0 ||
+ strcmpa(v, (CHAR8 *)"yes") == 0 ||
+ strcmpa(v, (CHAR8 *)"y") == 0 ||
+ strcmpa(v, (CHAR8 *)"true") == 0) {
+ *b = TRUE;
+ return EFI_SUCCESS;
+ }
+
+ if (strcmpa(v, (CHAR8 *)"0") == 0 ||
+ strcmpa(v, (CHAR8 *)"no") == 0 ||
+ strcmpa(v, (CHAR8 *)"n") == 0 ||
+ strcmpa(v, (CHAR8 *)"false") == 0) {
+ *b = FALSE;
+ return EFI_SUCCESS;
+ }
+
+ return EFI_INVALID_PARAMETER;
+}
+
+EFI_STATUS efivar_set_raw(const EFI_GUID *vendor, const CHAR16 *name, const VOID *buf, UINTN size, BOOLEAN persistent) {
+ UINT32 flags;
+
+ flags = EFI_VARIABLE_BOOTSERVICE_ACCESS|EFI_VARIABLE_RUNTIME_ACCESS;
+ if (persistent)
+ flags |= EFI_VARIABLE_NON_VOLATILE;
+
+ return uefi_call_wrapper(RT->SetVariable, 5, (CHAR16*) name, (EFI_GUID *)vendor, flags, size, (VOID*) buf);
+}
+
+EFI_STATUS efivar_set(const CHAR16 *name, const CHAR16 *value, BOOLEAN persistent) {
+ return efivar_set_raw(&loader_guid, name, value, value ? (StrLen(value)+1) * sizeof(CHAR16) : 0, persistent);
+}
+
+EFI_STATUS efivar_set_int(CHAR16 *name, UINTN i, BOOLEAN persistent) {
+ CHAR16 str[32];
+
+ SPrint(str, 32, L"%u", i);
+ return efivar_set(name, str, persistent);
+}
+
+EFI_STATUS efivar_get(const CHAR16 *name, CHAR16 **value) {
+ _cleanup_freepool_ CHAR8 *buf = NULL;
+ EFI_STATUS err;
+ CHAR16 *val;
+ UINTN size;
+
+ err = efivar_get_raw(&loader_guid, name, &buf, &size);
+ if (EFI_ERROR(err))
+ return err;
+
+ /* Make sure there are no incomplete characters in the buffer */
+ if ((size % 2) != 0)
+ return EFI_INVALID_PARAMETER;
+
+ if (!value)
+ return EFI_SUCCESS;
+
+ /* Return buffer directly if it happens to be NUL terminated already */
+ if (size >= 2 && buf[size-2] == 0 && buf[size-1] == 0) {
+ *value = (CHAR16*) TAKE_PTR(buf);
+ return EFI_SUCCESS;
+ }
+
+ /* Make sure a terminating NUL is available at the end */
+ val = AllocatePool(size + 2);
+ if (!val)
+ return EFI_OUT_OF_RESOURCES;
+
+ CopyMem(val, buf, size);
+ val[size/2] = 0; /* NUL terminate */
+
+ *value = val;
+ return EFI_SUCCESS;
+}
+
+EFI_STATUS efivar_get_int(const CHAR16 *name, UINTN *i) {
+ _cleanup_freepool_ CHAR16 *val = NULL;
+ EFI_STATUS err;
+
+ err = efivar_get(name, &val);
+ if (!EFI_ERROR(err) && i)
+ *i = Atoi(val);
+
+ return err;
+}
+
+EFI_STATUS efivar_get_raw(const EFI_GUID *vendor, const CHAR16 *name, CHAR8 **buffer, UINTN *size) {
+ _cleanup_freepool_ CHAR8 *buf = NULL;
+ UINTN l;
+ EFI_STATUS err;
+
+ l = sizeof(CHAR16 *) * EFI_MAXIMUM_VARIABLE_SIZE;
+ buf = AllocatePool(l);
+ if (!buf)
+ return EFI_OUT_OF_RESOURCES;
+
+ err = uefi_call_wrapper(RT->GetVariable, 5, (CHAR16*) name, (EFI_GUID *)vendor, NULL, &l, buf);
+ if (!EFI_ERROR(err)) {
+
+ if (buffer)
+ *buffer = TAKE_PTR(buf);
+
+ if (size)
+ *size = l;
+ }
+
+ return err;
+}
+
+VOID efivar_set_time_usec(CHAR16 *name, UINT64 usec) {
+ CHAR16 str[32];
+
+ if (usec == 0)
+ usec = time_usec();
+ if (usec == 0)
+ return;
+
+ SPrint(str, 32, L"%ld", usec);
+ efivar_set(name, str, FALSE);
+}
+
+static INTN utf8_to_16(CHAR8 *stra, CHAR16 *c) {
+ CHAR16 unichar;
+ UINTN len;
+ UINTN i;
+
+ if (!(stra[0] & 0x80))
+ len = 1;
+ else if ((stra[0] & 0xe0) == 0xc0)
+ len = 2;
+ else if ((stra[0] & 0xf0) == 0xe0)
+ len = 3;
+ else if ((stra[0] & 0xf8) == 0xf0)
+ len = 4;
+ else if ((stra[0] & 0xfc) == 0xf8)
+ len = 5;
+ else if ((stra[0] & 0xfe) == 0xfc)
+ len = 6;
+ else
+ return -1;
+
+ switch (len) {
+ case 1:
+ unichar = stra[0];
+ break;
+ case 2:
+ unichar = stra[0] & 0x1f;
+ break;
+ case 3:
+ unichar = stra[0] & 0x0f;
+ break;
+ case 4:
+ unichar = stra[0] & 0x07;
+ break;
+ case 5:
+ unichar = stra[0] & 0x03;
+ break;
+ case 6:
+ unichar = stra[0] & 0x01;
+ break;
+ }
+
+ for (i = 1; i < len; i++) {
+ if ((stra[i] & 0xc0) != 0x80)
+ return -1;
+ unichar <<= 6;
+ unichar |= stra[i] & 0x3f;
+ }
+
+ *c = unichar;
+ return len;
+}
+
+CHAR16 *stra_to_str(CHAR8 *stra) {
+ UINTN strlen;
+ UINTN len;
+ UINTN i;
+ CHAR16 *str;
+
+ len = strlena(stra);
+ str = AllocatePool((len + 1) * sizeof(CHAR16));
+
+ strlen = 0;
+ i = 0;
+ while (i < len) {
+ INTN utf8len;
+
+ utf8len = utf8_to_16(stra + i, str + strlen);
+ if (utf8len <= 0) {
+ /* invalid utf8 sequence, skip the garbage */
+ i++;
+ continue;
+ }
+
+ strlen++;
+ i += utf8len;
+ }
+ str[strlen] = '\0';
+ return str;
+}
+
+CHAR16 *stra_to_path(CHAR8 *stra) {
+ CHAR16 *str;
+ UINTN strlen;
+ UINTN len;
+ UINTN i;
+
+ len = strlena(stra);
+ str = AllocatePool((len + 2) * sizeof(CHAR16));
+
+ str[0] = '\\';
+ strlen = 1;
+ i = 0;
+ while (i < len) {
+ INTN utf8len;
+
+ utf8len = utf8_to_16(stra + i, str + strlen);
+ if (utf8len <= 0) {
+ /* invalid utf8 sequence, skip the garbage */
+ i++;
+ continue;
+ }
+
+ if (str[strlen] == '/')
+ str[strlen] = '\\';
+ if (str[strlen] == '\\' && str[strlen-1] == '\\') {
+ /* skip double slashes */
+ i += utf8len;
+ continue;
+ }
+
+ strlen++;
+ i += utf8len;
+ }
+ str[strlen] = '\0';
+ return str;
+}
+
+CHAR8 *strchra(CHAR8 *s, CHAR8 c) {
+ do {
+ if (*s == c)
+ return s;
+ } while (*s++);
+ return NULL;
+}
+
+EFI_STATUS file_read(EFI_FILE_HANDLE dir, const CHAR16 *name, UINTN off, UINTN size, CHAR8 **ret, UINTN *ret_size) {
+ _cleanup_(FileHandleClosep) EFI_FILE_HANDLE handle = NULL;
+ _cleanup_freepool_ CHAR8 *buf = NULL;
+ EFI_STATUS err;
+
+ err = uefi_call_wrapper(dir->Open, 5, dir, &handle, (CHAR16*) name, EFI_FILE_MODE_READ, 0ULL);
+ if (EFI_ERROR(err))
+ return err;
+
+ if (size == 0) {
+ _cleanup_freepool_ EFI_FILE_INFO *info;
+
+ info = LibFileInfo(handle);
+ if (!info)
+ return EFI_OUT_OF_RESOURCES;
+
+ size = info->FileSize+1;
+ }
+
+ if (off > 0) {
+ err = uefi_call_wrapper(handle->SetPosition, 2, handle, off);
+ if (EFI_ERROR(err))
+ return err;
+ }
+
+ buf = AllocatePool(size + 1);
+ if (!buf)
+ return EFI_OUT_OF_RESOURCES;
+
+ err = uefi_call_wrapper(handle->Read, 3, handle, &size, buf);
+ if (EFI_ERROR(err))
+ return err;
+
+ buf[size] = '\0';
+
+ *ret = TAKE_PTR(buf);
+ if (ret_size)
+ *ret_size = size;
+
+ return err;
+}
+
+EFI_STATUS log_oom(void) {
+ Print(L"Out of memory.");
+ (void) uefi_call_wrapper(BS->Stall, 1, 3 * 1000 * 1000);
+ return EFI_OUT_OF_RESOURCES;
+}
diff --git a/src/boot/efi/util.h b/src/boot/efi/util.h
new file mode 100644
index 0000000..916519c
--- /dev/null
+++ b/src/boot/efi/util.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <efi.h>
+#include <efilib.h>
+
+#define ELEMENTSOF(x) (sizeof(x)/sizeof((x)[0]))
+#define OFFSETOF(x,y) __builtin_offsetof(x,y)
+
+static inline UINTN ALIGN_TO(UINTN l, UINTN ali) {
+ return ((l + ali - 1) & ~(ali - 1));
+}
+
+static inline const CHAR16 *yes_no(BOOLEAN b) {
+ return b ? L"yes" : L"no";
+}
+
+EFI_STATUS parse_boolean(const CHAR8 *v, BOOLEAN *b);
+
+UINT64 ticks_read(void);
+UINT64 ticks_freq(void);
+UINT64 time_usec(void);
+
+EFI_STATUS efivar_set(const CHAR16 *name, const CHAR16 *value, BOOLEAN persistent);
+EFI_STATUS efivar_set_raw(const EFI_GUID *vendor, const CHAR16 *name, const VOID *buf, UINTN size, BOOLEAN persistent);
+EFI_STATUS efivar_set_int(CHAR16 *name, UINTN i, BOOLEAN persistent);
+VOID efivar_set_time_usec(CHAR16 *name, UINT64 usec);
+
+EFI_STATUS efivar_get(const CHAR16 *name, CHAR16 **value);
+EFI_STATUS efivar_get_raw(const EFI_GUID *vendor, const CHAR16 *name, CHAR8 **buffer, UINTN *size);
+EFI_STATUS efivar_get_int(const CHAR16 *name, UINTN *i);
+
+CHAR8 *strchra(CHAR8 *s, CHAR8 c);
+CHAR16 *stra_to_path(CHAR8 *stra);
+CHAR16 *stra_to_str(CHAR8 *stra);
+
+EFI_STATUS file_read(EFI_FILE_HANDLE dir, const CHAR16 *name, UINTN off, UINTN size, CHAR8 **content, UINTN *content_size);
+
+static inline void FreePoolp(void *p) {
+ void *q = *(void**) p;
+
+ if (!q)
+ return;
+
+ FreePool(q);
+}
+
+#define _cleanup_(x) __attribute__((__cleanup__(x)))
+#define _cleanup_freepool_ _cleanup_(FreePoolp)
+
+static inline void FileHandleClosep(EFI_FILE_HANDLE *handle) {
+ if (!*handle)
+ return;
+
+ uefi_call_wrapper((*handle)->Close, 1, *handle);
+}
+
+extern const EFI_GUID loader_guid;
+
+#define UINTN_MAX (~(UINTN)0)
+#define INTN_MAX ((INTN)(UINTN_MAX>>1))
+
+#define TAKE_PTR(ptr) \
+ ({ \
+ typeof(ptr) _ptr_ = (ptr); \
+ (ptr) = NULL; \
+ _ptr_; \
+ })
+
+EFI_STATUS log_oom(void);
diff --git a/src/busctl/busctl-introspect.c b/src/busctl/busctl-introspect.c
new file mode 100644
index 0000000..7a5d57f
--- /dev/null
+++ b/src/busctl/busctl-introspect.c
@@ -0,0 +1,730 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "busctl-introspect.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "util.h"
+#include "xml.h"
+
+#define NODE_DEPTH_MAX 16
+
+typedef struct Context {
+ const XMLIntrospectOps *ops;
+ void *userdata;
+
+ char *interface_name;
+ uint64_t interface_flags;
+
+ char *member_name;
+ char *member_signature;
+ char *member_result;
+ uint64_t member_flags;
+ bool member_writable;
+
+ const char *current;
+ void *xml_state;
+} Context;
+
+static void context_reset_member(Context *c) {
+ free(c->member_name);
+ free(c->member_signature);
+ free(c->member_result);
+
+ c->member_name = c->member_signature = c->member_result = NULL;
+ c->member_flags = 0;
+ c->member_writable = false;
+}
+
+static void context_reset_interface(Context *c) {
+ c->interface_name = mfree(c->interface_name);
+ c->interface_flags = 0;
+
+ context_reset_member(c);
+}
+
+static int parse_xml_annotation(Context *context, uint64_t *flags) {
+
+ enum {
+ STATE_ANNOTATION,
+ STATE_NAME,
+ STATE_VALUE
+ } state = STATE_ANNOTATION;
+
+ _cleanup_free_ char *field = NULL, *value = NULL;
+
+ assert(context);
+
+ for (;;) {
+ _cleanup_free_ char *name = NULL;
+
+ int t;
+
+ t = xml_tokenize(&context->current, &name, &context->xml_state, NULL);
+ if (t < 0) {
+ log_error("XML parse error.");
+ return t;
+ }
+
+ if (t == XML_END)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Premature end of XML data.");
+
+ switch (state) {
+
+ case STATE_ANNOTATION:
+
+ if (t == XML_ATTRIBUTE_NAME) {
+
+ if (streq_ptr(name, "name"))
+ state = STATE_NAME;
+
+ else if (streq_ptr(name, "value"))
+ state = STATE_VALUE;
+
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Unexpected <annotation> attribute %s.",
+ name);
+
+ } else if (t == XML_TAG_CLOSE_EMPTY ||
+ (t == XML_TAG_CLOSE && streq_ptr(name, "annotation"))) {
+
+ if (flags) {
+ if (streq_ptr(field, "org.freedesktop.DBus.Deprecated")) {
+
+ if (streq_ptr(value, "true"))
+ *flags |= SD_BUS_VTABLE_DEPRECATED;
+
+ } else if (streq_ptr(field, "org.freedesktop.DBus.Method.NoReply")) {
+
+ if (streq_ptr(value, "true"))
+ *flags |= SD_BUS_VTABLE_METHOD_NO_REPLY;
+
+ } else if (streq_ptr(field, "org.freedesktop.DBus.Property.EmitsChangedSignal")) {
+
+ if (streq_ptr(value, "const"))
+ *flags = (*flags & ~(SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION|SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE)) | SD_BUS_VTABLE_PROPERTY_CONST;
+ else if (streq_ptr(value, "invalidates"))
+ *flags = (*flags & ~(SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE|SD_BUS_VTABLE_PROPERTY_CONST)) | SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION;
+ else if (streq_ptr(value, "false"))
+ *flags = *flags & ~(SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE|SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION);
+ }
+ }
+
+ return 0;
+
+ } else if (t != XML_TEXT || !in_charset(name, WHITESPACE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <annotation>. (1)");
+
+ break;
+
+ case STATE_NAME:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ free_and_replace(field, name);
+
+ state = STATE_ANNOTATION;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <annotation>. (2)");
+
+ break;
+
+ case STATE_VALUE:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ free_and_replace(value, name);
+
+ state = STATE_ANNOTATION;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <annotation>. (3)");
+
+ break;
+
+ default:
+ assert_not_reached("Bad state");
+ }
+ }
+}
+
+static int parse_xml_node(Context *context, const char *prefix, unsigned n_depth) {
+
+ enum {
+ STATE_NODE,
+ STATE_NODE_NAME,
+ STATE_INTERFACE,
+ STATE_INTERFACE_NAME,
+ STATE_METHOD,
+ STATE_METHOD_NAME,
+ STATE_METHOD_ARG,
+ STATE_METHOD_ARG_NAME,
+ STATE_METHOD_ARG_TYPE,
+ STATE_METHOD_ARG_DIRECTION,
+ STATE_SIGNAL,
+ STATE_SIGNAL_NAME,
+ STATE_SIGNAL_ARG,
+ STATE_SIGNAL_ARG_NAME,
+ STATE_SIGNAL_ARG_TYPE,
+ STATE_SIGNAL_ARG_DIRECTION,
+ STATE_PROPERTY,
+ STATE_PROPERTY_NAME,
+ STATE_PROPERTY_TYPE,
+ STATE_PROPERTY_ACCESS,
+ } state = STATE_NODE;
+
+ _cleanup_free_ char *node_path = NULL, *argument_type = NULL, *argument_direction = NULL;
+ const char *np = prefix;
+ int r;
+
+ assert(context);
+ assert(prefix);
+
+ if (n_depth > NODE_DEPTH_MAX)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "<node> depth too high.");
+
+ for (;;) {
+ _cleanup_free_ char *name = NULL;
+ int t;
+
+ t = xml_tokenize(&context->current, &name, &context->xml_state, NULL);
+ if (t < 0) {
+ log_error("XML parse error.");
+ return t;
+ }
+
+ if (t == XML_END)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Premature end of XML data.");
+
+ switch (state) {
+
+ case STATE_NODE:
+ if (t == XML_ATTRIBUTE_NAME) {
+
+ if (streq_ptr(name, "name"))
+ state = STATE_NODE_NAME;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Unexpected <node> attribute %s.", name);
+
+ } else if (t == XML_TAG_OPEN) {
+
+ if (streq_ptr(name, "interface"))
+ state = STATE_INTERFACE;
+ else if (streq_ptr(name, "node")) {
+
+ r = parse_xml_node(context, np, n_depth+1);
+ if (r < 0)
+ return r;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Unexpected <node> tag %s.", name);
+
+ } else if (t == XML_TAG_CLOSE_EMPTY ||
+ (t == XML_TAG_CLOSE && streq_ptr(name, "node"))) {
+
+ if (context->ops->on_path) {
+ r = context->ops->on_path(node_path ? node_path : np, context->userdata);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+
+ } else if (t != XML_TEXT || !in_charset(name, WHITESPACE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <node>. (1)");
+
+ break;
+
+ case STATE_NODE_NAME:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+
+ free(node_path);
+
+ if (name[0] == '/')
+ node_path = TAKE_PTR(name);
+ else {
+
+ node_path = path_join(prefix, name);
+ if (!node_path)
+ return log_oom();
+ }
+
+ np = node_path;
+ state = STATE_NODE;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <node>. (2)");
+
+ break;
+
+ case STATE_INTERFACE:
+
+ if (t == XML_ATTRIBUTE_NAME) {
+ if (streq_ptr(name, "name"))
+ state = STATE_INTERFACE_NAME;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Unexpected <interface> attribute %s.",
+ name);
+
+ } else if (t == XML_TAG_OPEN) {
+ if (streq_ptr(name, "method"))
+ state = STATE_METHOD;
+ else if (streq_ptr(name, "signal"))
+ state = STATE_SIGNAL;
+ else if (streq_ptr(name, "property")) {
+ context->member_flags |= SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE;
+ state = STATE_PROPERTY;
+ } else if (streq_ptr(name, "annotation")) {
+ r = parse_xml_annotation(context, &context->interface_flags);
+ if (r < 0)
+ return r;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected <interface> tag %s.", name);
+ } else if (t == XML_TAG_CLOSE_EMPTY ||
+ (t == XML_TAG_CLOSE && streq_ptr(name, "interface"))) {
+
+ if (n_depth == 0) {
+ if (context->ops->on_interface) {
+ r = context->ops->on_interface(context->interface_name, context->interface_flags, context->userdata);
+ if (r < 0)
+ return r;
+ }
+
+ context_reset_interface(context);
+ }
+
+ state = STATE_NODE;
+
+ } else if (t != XML_TEXT || !in_charset(name, WHITESPACE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <interface>. (1)");
+
+ break;
+
+ case STATE_INTERFACE_NAME:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ if (n_depth == 0)
+ free_and_replace(context->interface_name, name);
+
+ state = STATE_INTERFACE;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <interface>. (2)");
+
+ break;
+
+ case STATE_METHOD:
+
+ if (t == XML_ATTRIBUTE_NAME) {
+ if (streq_ptr(name, "name"))
+ state = STATE_METHOD_NAME;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Unexpected <method> attribute %s",
+ name);
+ } else if (t == XML_TAG_OPEN) {
+ if (streq_ptr(name, "arg"))
+ state = STATE_METHOD_ARG;
+ else if (streq_ptr(name, "annotation")) {
+ r = parse_xml_annotation(context, &context->member_flags);
+ if (r < 0)
+ return r;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected <method> tag %s.",
+ name);
+ } else if (t == XML_TAG_CLOSE_EMPTY ||
+ (t == XML_TAG_CLOSE && streq_ptr(name, "method"))) {
+
+ if (n_depth == 0) {
+ if (context->ops->on_method) {
+ r = context->ops->on_method(context->interface_name, context->member_name, context->member_signature, context->member_result, context->member_flags, context->userdata);
+ if (r < 0)
+ return r;
+ }
+
+ context_reset_member(context);
+ }
+
+ state = STATE_INTERFACE;
+
+ } else if (t != XML_TEXT || !in_charset(name, WHITESPACE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <method> (1).");
+
+ break;
+
+ case STATE_METHOD_NAME:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ if (n_depth == 0)
+ free_and_replace(context->member_name, name);
+
+ state = STATE_METHOD;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <method> (2).");
+
+ break;
+
+ case STATE_METHOD_ARG:
+
+ if (t == XML_ATTRIBUTE_NAME) {
+ if (streq_ptr(name, "name"))
+ state = STATE_METHOD_ARG_NAME;
+ else if (streq_ptr(name, "type"))
+ state = STATE_METHOD_ARG_TYPE;
+ else if (streq_ptr(name, "direction"))
+ state = STATE_METHOD_ARG_DIRECTION;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Unexpected method <arg> attribute %s.",
+ name);
+ } else if (t == XML_TAG_OPEN) {
+ if (streq_ptr(name, "annotation")) {
+ r = parse_xml_annotation(context, NULL);
+ if (r < 0)
+ return r;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected method <arg> tag %s.",
+ name);
+ } else if (t == XML_TAG_CLOSE_EMPTY ||
+ (t == XML_TAG_CLOSE && streq_ptr(name, "arg"))) {
+
+ if (n_depth == 0) {
+
+ if (argument_type) {
+ if (!argument_direction || streq(argument_direction, "in")) {
+ if (!strextend(&context->member_signature, argument_type, NULL))
+ return log_oom();
+ } else if (streq(argument_direction, "out")) {
+ if (!strextend(&context->member_result, argument_type, NULL))
+ return log_oom();
+ } else
+ log_error("Unexpected method <arg> direction value '%s'.", argument_direction);
+ }
+
+ argument_type = mfree(argument_type);
+ argument_direction = mfree(argument_direction);
+ }
+
+ state = STATE_METHOD;
+ } else if (t != XML_TEXT || !in_charset(name, WHITESPACE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in method <arg>. (1)");
+
+ break;
+
+ case STATE_METHOD_ARG_NAME:
+
+ if (t == XML_ATTRIBUTE_VALUE)
+ state = STATE_METHOD_ARG;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in method <arg>. (2)");
+
+ break;
+
+ case STATE_METHOD_ARG_TYPE:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ free_and_replace(argument_type, name);
+
+ state = STATE_METHOD_ARG;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in method <arg>. (3)");
+
+ break;
+
+ case STATE_METHOD_ARG_DIRECTION:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ free_and_replace(argument_direction, name);
+
+ state = STATE_METHOD_ARG;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in method <arg>. (4)");
+
+ break;
+
+ case STATE_SIGNAL:
+
+ if (t == XML_ATTRIBUTE_NAME) {
+ if (streq_ptr(name, "name"))
+ state = STATE_SIGNAL_NAME;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Unexpected <signal> attribute %s.",
+ name);
+ } else if (t == XML_TAG_OPEN) {
+ if (streq_ptr(name, "arg"))
+ state = STATE_SIGNAL_ARG;
+ else if (streq_ptr(name, "annotation")) {
+ r = parse_xml_annotation(context, &context->member_flags);
+ if (r < 0)
+ return r;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected <signal> tag %s.",
+ name);
+ } else if (t == XML_TAG_CLOSE_EMPTY ||
+ (t == XML_TAG_CLOSE && streq_ptr(name, "signal"))) {
+
+ if (n_depth == 0) {
+ if (context->ops->on_signal) {
+ r = context->ops->on_signal(context->interface_name, context->member_name, context->member_signature, context->member_flags, context->userdata);
+ if (r < 0)
+ return r;
+ }
+
+ context_reset_member(context);
+ }
+
+ state = STATE_INTERFACE;
+
+ } else if (t != XML_TEXT || !in_charset(name, WHITESPACE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <signal>. (1)");
+
+ break;
+
+ case STATE_SIGNAL_NAME:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ if (n_depth == 0)
+ free_and_replace(context->member_name, name);
+
+ state = STATE_SIGNAL;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <signal>. (2)");
+
+ break;
+
+ case STATE_SIGNAL_ARG:
+
+ if (t == XML_ATTRIBUTE_NAME) {
+ if (streq_ptr(name, "name"))
+ state = STATE_SIGNAL_ARG_NAME;
+ else if (streq_ptr(name, "type"))
+ state = STATE_SIGNAL_ARG_TYPE;
+ else if (streq_ptr(name, "direction"))
+ state = STATE_SIGNAL_ARG_DIRECTION;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Unexpected signal <arg> attribute %s.",
+ name);
+ } else if (t == XML_TAG_OPEN) {
+ if (streq_ptr(name, "annotation")) {
+ r = parse_xml_annotation(context, NULL);
+ if (r < 0)
+ return r;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected signal <arg> tag %s.",
+ name);
+ } else if (t == XML_TAG_CLOSE_EMPTY ||
+ (t == XML_TAG_CLOSE && streq_ptr(name, "arg"))) {
+
+ if (argument_type) {
+ if (!argument_direction || streq(argument_direction, "out")) {
+ if (!strextend(&context->member_signature, argument_type, NULL))
+ return log_oom();
+ } else
+ log_error("Unexpected signal <arg> direction value '%s'.", argument_direction);
+
+ argument_type = mfree(argument_type);
+ }
+
+ state = STATE_SIGNAL;
+ } else if (t != XML_TEXT || !in_charset(name, WHITESPACE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in signal <arg> (1).");
+
+ break;
+
+ case STATE_SIGNAL_ARG_NAME:
+
+ if (t == XML_ATTRIBUTE_VALUE)
+ state = STATE_SIGNAL_ARG;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in signal <arg> (2).");
+
+ break;
+
+ case STATE_SIGNAL_ARG_TYPE:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ free_and_replace(argument_type, name);
+
+ state = STATE_SIGNAL_ARG;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in signal <arg> (3).");
+
+ break;
+
+ case STATE_SIGNAL_ARG_DIRECTION:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ free_and_replace(argument_direction, name);
+
+ state = STATE_SIGNAL_ARG;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in signal <arg>. (4)");
+
+ break;
+
+ case STATE_PROPERTY:
+
+ if (t == XML_ATTRIBUTE_NAME) {
+ if (streq_ptr(name, "name"))
+ state = STATE_PROPERTY_NAME;
+ else if (streq_ptr(name, "type"))
+ state = STATE_PROPERTY_TYPE;
+ else if (streq_ptr(name, "access"))
+ state = STATE_PROPERTY_ACCESS;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Unexpected <property> attribute %s.",
+ name);
+ } else if (t == XML_TAG_OPEN) {
+
+ if (streq_ptr(name, "annotation")) {
+ r = parse_xml_annotation(context, &context->member_flags);
+ if (r < 0)
+ return r;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected <property> tag %s.",
+ name);
+
+ } else if (t == XML_TAG_CLOSE_EMPTY ||
+ (t == XML_TAG_CLOSE && streq_ptr(name, "property"))) {
+
+ if (n_depth == 0) {
+ if (context->ops->on_property) {
+ r = context->ops->on_property(context->interface_name, context->member_name, context->member_signature, context->member_writable, context->member_flags, context->userdata);
+ if (r < 0)
+ return r;
+ }
+
+ context_reset_member(context);
+ }
+
+ state = STATE_INTERFACE;
+
+ } else if (t != XML_TEXT || !in_charset(name, WHITESPACE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <property>. (1)");
+
+ break;
+
+ case STATE_PROPERTY_NAME:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ if (n_depth == 0)
+ free_and_replace(context->member_name, name);
+
+ state = STATE_PROPERTY;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <property>. (2)");
+
+ break;
+
+ case STATE_PROPERTY_TYPE:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+ if (n_depth == 0)
+ free_and_replace(context->member_signature, name);
+
+ state = STATE_PROPERTY;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <property>. (3)");
+
+ break;
+
+ case STATE_PROPERTY_ACCESS:
+
+ if (t == XML_ATTRIBUTE_VALUE) {
+
+ if (streq(name, "readwrite") || streq(name, "write"))
+ context->member_writable = true;
+
+ state = STATE_PROPERTY;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected token in <property>. (4)");
+
+ break;
+ }
+ }
+}
+
+int parse_xml_introspect(const char *prefix, const char *xml, const XMLIntrospectOps *ops, void *userdata) {
+ Context context = {
+ .ops = ops,
+ .userdata = userdata,
+ .current = xml,
+ };
+
+ int r;
+
+ assert(prefix);
+ assert(xml);
+ assert(ops);
+
+ for (;;) {
+ _cleanup_free_ char *name = NULL;
+
+ r = xml_tokenize(&context.current, &name, &context.xml_state, NULL);
+ if (r < 0) {
+ log_error("XML parse error");
+ goto finish;
+ }
+
+ if (r == XML_END) {
+ r = 0;
+ break;
+ }
+
+ if (r == XML_TAG_OPEN) {
+
+ if (streq(name, "node")) {
+ r = parse_xml_node(&context, prefix, 0);
+ if (r < 0)
+ goto finish;
+ } else {
+ log_error("Unexpected tag '%s' in introspection data.", name);
+ r = -EBADMSG;
+ goto finish;
+ }
+ } else if (r != XML_TEXT || !in_charset(name, WHITESPACE)) {
+ log_error("Unexpected token.");
+ r = -EBADMSG;
+ goto finish;
+ }
+ }
+
+finish:
+ context_reset_interface(&context);
+
+ return r;
+}
diff --git a/src/busctl/busctl-introspect.h b/src/busctl/busctl-introspect.h
new file mode 100644
index 0000000..1a045be
--- /dev/null
+++ b/src/busctl/busctl-introspect.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+typedef struct XMLIntrospectOps {
+ int (*on_path)(const char *path, void *userdata);
+ int (*on_interface)(const char *name, uint64_t flags, void *userdata);
+ int (*on_method)(const char *interface, const char *name, const char *signature, const char *result, uint64_t flags, void *userdata);
+ int (*on_signal)(const char *interface, const char *name, const char *signature, uint64_t flags, void *userdata);
+ int (*on_property)(const char *interface, const char *name, const char *signature, bool writable, uint64_t flags, void *userdata);
+} XMLIntrospectOps;
+
+int parse_xml_introspect(const char *prefix, const char *xml, const XMLIntrospectOps *ops, void *userdata);
diff --git a/src/busctl/busctl.c b/src/busctl/busctl.c
new file mode 100644
index 0000000..06a15dd
--- /dev/null
+++ b/src/busctl/busctl.c
@@ -0,0 +1,2607 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-dump.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-signature.h"
+#include "bus-type.h"
+#include "bus-util.h"
+#include "busctl-introspect.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-table.h"
+#include "json.h"
+#include "locale-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "set.h"
+#include "sort-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "user-util.h"
+#include "verbs.h"
+
+static enum {
+ JSON_OFF,
+ JSON_SHORT,
+ JSON_PRETTY,
+} arg_json = JSON_OFF;
+static PagerFlags arg_pager_flags = 0;
+static bool arg_legend = true;
+static bool arg_full = false;
+static const char *arg_address = NULL;
+static bool arg_unique = false;
+static bool arg_acquired = false;
+static bool arg_activatable = false;
+static bool arg_show_machine = false;
+static char **arg_matches = NULL;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static const char *arg_host = NULL;
+static bool arg_user = false;
+static size_t arg_snaplen = 4096;
+static bool arg_list = false;
+static bool arg_quiet = false;
+static bool arg_verbose = false;
+static bool arg_xml_interface = false;
+static bool arg_expect_reply = true;
+static bool arg_auto_start = true;
+static bool arg_allow_interactive_authorization = true;
+static bool arg_augment_creds = true;
+static bool arg_watch_bind = false;
+static usec_t arg_timeout = 0;
+static const char *arg_destination = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_matches, strv_freep);
+
+#define NAME_IS_ACQUIRED INT_TO_PTR(1)
+#define NAME_IS_ACTIVATABLE INT_TO_PTR(2)
+
+static int json_transform_message(sd_bus_message *m, JsonVariant **ret);
+static void json_dump_with_flags(JsonVariant *v, FILE *f);
+
+static int acquire_bus(bool set_monitor, sd_bus **ret) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ r = sd_bus_new(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate bus: %m");
+
+ if (set_monitor) {
+ r = sd_bus_set_monitor(bus, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set monitor mode: %m");
+
+ r = sd_bus_negotiate_creds(bus, true, _SD_BUS_CREDS_ALL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable credentials: %m");
+
+ r = sd_bus_negotiate_timestamp(bus, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable timestamps: %m");
+
+ r = sd_bus_negotiate_fds(bus, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable fds: %m");
+ }
+
+ r = sd_bus_set_bus_client(bus, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set bus client: %m");
+
+ r = sd_bus_set_watch_bind(bus, arg_watch_bind);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set watch-bind setting to '%s': %m",
+ yes_no(arg_watch_bind));
+
+ if (arg_address)
+ r = sd_bus_set_address(bus, arg_address);
+ else {
+ switch (arg_transport) {
+
+ case BUS_TRANSPORT_LOCAL:
+ if (arg_user)
+ r = bus_set_address_user(bus);
+ else
+ r = bus_set_address_system(bus);
+ break;
+
+ case BUS_TRANSPORT_REMOTE:
+ r = bus_set_address_system_remote(bus, arg_host);
+ break;
+
+ case BUS_TRANSPORT_MACHINE:
+ r = bus_set_address_system_machine(bus, arg_host);
+ break;
+
+ default:
+ assert_not_reached("Hmm, unknown transport type.");
+ }
+ }
+ if (r < 0)
+ return bus_log_address_error(r);
+
+ r = sd_bus_start(bus);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ *ret = TAKE_PTR(bus);
+
+ return 0;
+}
+
+static int list_bus_names(int argc, char **argv, void *userdata) {
+ _cleanup_strv_free_ char **acquired = NULL, **activatable = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_hashmap_free_ Hashmap *names = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ char **i, *k;
+ void *v;
+ int r;
+
+ enum {
+ COLUMN_ACTIVATABLE,
+ COLUMN_NAME,
+ COLUMN_PID,
+ COLUMN_PROCESS,
+ COLUMN_USER,
+ COLUMN_CONNECTION,
+ COLUMN_UNIT,
+ COLUMN_SESSION,
+ COLUMN_DESCRIPTION,
+ COLUMN_MACHINE,
+ };
+
+ if (!arg_unique && !arg_acquired && !arg_activatable)
+ arg_unique = arg_acquired = arg_activatable = true;
+
+ r = acquire_bus(false, &bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_list_names(bus,
+ (arg_acquired || arg_unique) ? &acquired : NULL,
+ arg_activatable ? &activatable : NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list names: %m");
+
+ names = hashmap_new(&string_hash_ops);
+ if (!names)
+ return log_oom();
+
+ STRV_FOREACH(i, acquired) {
+ r = hashmap_put(names, *i, NAME_IS_ACQUIRED);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add to hashmap: %m");
+ }
+
+ STRV_FOREACH(i, activatable) {
+ r = hashmap_put(names, *i, NAME_IS_ACTIVATABLE);
+ if (r < 0 && r != -EEXIST)
+ return log_error_errno(r, "Failed to add to hashmap: %m");
+ }
+
+ table = table_new("activatable",
+ "name",
+ "pid",
+ "process",
+ "user",
+ "connection",
+ "unit",
+ "session",
+ "description",
+ "machine");
+ if (!table)
+ return log_oom();
+
+ if (arg_full)
+ table_set_width(table, 0);
+
+ r = table_set_align_percent(table, table_get_cell(table, 0, COLUMN_PID), 100);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set alignment: %m");
+
+ r = table_set_empty_string(table, "-");
+ if (r < 0)
+ return log_error_errno(r, "Failed to set empty string: %m");
+
+ r = table_set_sort(table, (size_t) COLUMN_NAME, (size_t) -1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set sort column: %m");
+
+ if (arg_show_machine)
+ r = table_set_display(table, (size_t) COLUMN_NAME,
+ (size_t) COLUMN_PID,
+ (size_t) COLUMN_PROCESS,
+ (size_t) COLUMN_USER,
+ (size_t) COLUMN_CONNECTION,
+ (size_t) COLUMN_UNIT,
+ (size_t) COLUMN_SESSION,
+ (size_t) COLUMN_DESCRIPTION,
+ (size_t) COLUMN_MACHINE,
+ (size_t) -1);
+ else
+ r = table_set_display(table, (size_t) COLUMN_NAME,
+ (size_t) COLUMN_PID,
+ (size_t) COLUMN_PROCESS,
+ (size_t) COLUMN_USER,
+ (size_t) COLUMN_CONNECTION,
+ (size_t) COLUMN_UNIT,
+ (size_t) COLUMN_SESSION,
+ (size_t) COLUMN_DESCRIPTION,
+ (size_t) -1);
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to set columns to display: %m");
+
+ table_set_header(table, arg_legend);
+
+ HASHMAP_FOREACH_KEY(v, k, names) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+
+ if (v == NAME_IS_ACTIVATABLE) {
+ r = table_add_many(
+ table,
+ TABLE_INT, PTR_TO_INT(v),
+ TABLE_STRING, k,
+ TABLE_EMPTY,
+ TABLE_EMPTY,
+ TABLE_EMPTY,
+ TABLE_STRING, "(activatable)", TABLE_SET_COLOR, ansi_grey(),
+ TABLE_EMPTY,
+ TABLE_EMPTY,
+ TABLE_EMPTY,
+ TABLE_EMPTY);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ continue;
+ }
+
+ assert(v == NAME_IS_ACQUIRED);
+
+ if (!arg_unique && k[0] == ':')
+ continue;
+
+ if (!arg_acquired && k[0] != ':')
+ continue;
+
+ r = table_add_many(table,
+ TABLE_INT, PTR_TO_INT(v),
+ TABLE_STRING, k);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = sd_bus_get_name_creds(
+ bus, k,
+ (arg_augment_creds ? SD_BUS_CREDS_AUGMENT : 0) |
+ SD_BUS_CREDS_EUID|SD_BUS_CREDS_PID|SD_BUS_CREDS_COMM|
+ SD_BUS_CREDS_UNIQUE_NAME|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_SESSION|
+ SD_BUS_CREDS_DESCRIPTION, &creds);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to acquire credentials of service %s, ignoring: %m", k);
+
+ r = table_fill_empty(table, COLUMN_MACHINE);
+ } else {
+ const char *unique = NULL, *session = NULL, *unit = NULL, *cn = NULL;
+ pid_t pid;
+ uid_t uid;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r >= 0) {
+ const char *comm = NULL;
+
+ (void) sd_bus_creds_get_comm(creds, &comm);
+
+ r = table_add_many(table,
+ TABLE_PID, pid,
+ TABLE_STRING, strna(comm));
+ } else
+ r = table_add_many(table, TABLE_EMPTY, TABLE_EMPTY);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = sd_bus_creds_get_euid(creds, &uid);
+ if (r >= 0) {
+ _cleanup_free_ char *u = NULL;
+
+ u = uid_to_name(uid);
+ if (!u)
+ return log_oom();
+
+ r = table_add_cell(table, NULL, TABLE_STRING, u);
+ } else
+ r = table_add_cell(table, NULL, TABLE_EMPTY, NULL);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ (void) sd_bus_creds_get_unique_name(creds, &unique);
+ (void) sd_bus_creds_get_unit(creds, &unit);
+ (void) sd_bus_creds_get_session(creds, &session);
+ (void) sd_bus_creds_get_description(creds, &cn);
+
+ r = table_add_many(
+ table,
+ TABLE_STRING, unique,
+ TABLE_STRING, unit,
+ TABLE_STRING, session,
+ TABLE_STRING, cn);
+ }
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (arg_show_machine) {
+ sd_id128_t mid;
+
+ r = sd_bus_get_name_machine_id(bus, k, &mid);
+ if (r < 0)
+ log_debug_errno(r, "Failed to acquire credentials of service %s, ignoring: %m", k);
+ else {
+ char m[SD_ID128_STRING_MAX];
+
+ r = table_add_cell(table, NULL, TABLE_STRING, sd_id128_to_string(mid, m));
+ if (r < 0)
+ return table_log_add_error(r);
+
+ continue; /* line fully filled, no need to fill the remainder below */
+ }
+ }
+
+ r = table_fill_empty(table, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to fill line: %m");
+ }
+
+ (void) pager_open(arg_pager_flags);
+
+ if (arg_json)
+ r = table_print_json(table, stdout,
+ (arg_json == JSON_PRETTY ? JSON_FORMAT_PRETTY : JSON_FORMAT_NEWLINE) | JSON_FORMAT_COLOR_AUTO);
+ else
+ r = table_print(table, stdout);
+ if (r < 0)
+ return table_log_print_error(r);
+
+ return 0;
+}
+
+static void print_subtree(const char *prefix, const char *path, char **l) {
+ const char *vertical, *space;
+ char **n;
+
+ /* We assume the list is sorted. Let's first skip over the
+ * entry we are looking at. */
+ for (;;) {
+ if (!*l)
+ return;
+
+ if (!streq(*l, path))
+ break;
+
+ l++;
+ }
+
+ vertical = strjoina(prefix, special_glyph(SPECIAL_GLYPH_TREE_VERTICAL));
+ space = strjoina(prefix, special_glyph(SPECIAL_GLYPH_TREE_SPACE));
+
+ for (;;) {
+ bool has_more = false;
+
+ if (!*l || !path_startswith(*l, path))
+ break;
+
+ n = l + 1;
+ for (;;) {
+ if (!*n || !path_startswith(*n, path))
+ break;
+
+ if (!path_startswith(*n, *l)) {
+ has_more = true;
+ break;
+ }
+
+ n++;
+ }
+
+ printf("%s%s%s\n",
+ prefix,
+ special_glyph(has_more ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT),
+ *l);
+
+ print_subtree(has_more ? vertical : space, *l, l);
+ l = n;
+ }
+}
+
+static void print_tree(char **l) {
+ if (arg_list)
+ strv_print(l);
+ else if (strv_isempty(l))
+ printf("No objects discovered.\n");
+ else if (streq(l[0], "/") && !l[1])
+ printf("Only root object discovered.\n");
+ else
+ print_subtree("", "/", l);
+}
+
+static int on_path(const char *path, void *userdata) {
+ Set *paths = userdata;
+ int r;
+
+ assert(paths);
+
+ r = set_put_strdup(&paths, path);
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+static int find_nodes(sd_bus *bus, const char *service, const char *path, Set *paths) {
+ static const XMLIntrospectOps ops = {
+ .on_path = on_path,
+ };
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *xml;
+ int r;
+
+ r = sd_bus_call_method(bus, service, path,
+ "org.freedesktop.DBus.Introspectable", "Introspect",
+ &error, &reply, "");
+ if (r < 0) {
+ printf("%sFailed to introspect object %s of service %s: %s%s\n",
+ ansi_highlight_red(),
+ path, service, bus_error_message(&error, r),
+ ansi_normal());
+ return r;
+ }
+
+ r = sd_bus_message_read(reply, "s", &xml);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return parse_xml_introspect(path, xml, &ops, paths);
+}
+
+static int tree_one(sd_bus *bus, const char *service) {
+ _cleanup_set_free_ Set *paths = NULL, *done = NULL, *failed = NULL;
+ _cleanup_free_ char **l = NULL;
+ int r;
+
+ r = set_put_strdup(&paths, "/");
+ if (r < 0)
+ return log_oom();
+
+ done = set_new(&string_hash_ops_free);
+ if (!done)
+ return log_oom();
+
+ failed = set_new(&string_hash_ops_free);
+ if (!failed)
+ return log_oom();
+
+ for (;;) {
+ _cleanup_free_ char *p = NULL;
+ int q;
+
+ p = set_steal_first(paths);
+ if (!p)
+ break;
+
+ if (set_contains(done, p) ||
+ set_contains(failed, p))
+ continue;
+
+ q = find_nodes(bus, service, p, paths);
+ if (q < 0 && r >= 0)
+ r = q;
+
+ q = set_consume(q < 0 ? failed : done, TAKE_PTR(p));
+ assert(q != 0);
+ if (q < 0)
+ return log_oom();
+ }
+
+ (void) pager_open(arg_pager_flags);
+
+ l = set_get_strv(done);
+ if (!l)
+ return log_oom();
+
+ strv_sort(l);
+ print_tree(l);
+
+ fflush(stdout);
+
+ return r;
+}
+
+static int tree(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ char **i;
+ int r;
+
+ /* Do superficial verification of arguments before even opening the bus */
+ STRV_FOREACH(i, strv_skip(argv, 1))
+ if (!sd_bus_service_name_is_valid(*i))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid bus service name: %s", *i);
+
+ if (!arg_unique && !arg_acquired)
+ arg_acquired = true;
+
+ r = acquire_bus(false, &bus);
+ if (r < 0)
+ return r;
+
+ if (argc <= 1) {
+ _cleanup_strv_free_ char **names = NULL;
+ bool not_first = false;
+
+ r = sd_bus_list_names(bus, &names, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get name list: %m");
+
+ (void) pager_open(arg_pager_flags);
+
+ STRV_FOREACH(i, names) {
+ int q;
+
+ if (!arg_unique && (*i)[0] == ':')
+ continue;
+
+ if (!arg_acquired && (*i)[0] == ':')
+ continue;
+
+ if (not_first)
+ printf("\n");
+
+ printf("Service %s%s%s:\n", ansi_highlight(), *i, ansi_normal());
+
+ q = tree_one(bus, *i);
+ if (q < 0 && r >= 0)
+ r = q;
+
+ not_first = true;
+ }
+ } else
+ STRV_FOREACH(i, strv_skip(argv, 1)) {
+ int q;
+
+ if (i > argv+1)
+ printf("\n");
+
+ if (argv[2]) {
+ (void) pager_open(arg_pager_flags);
+ printf("Service %s%s%s:\n", ansi_highlight(), *i, ansi_normal());
+ }
+
+ q = tree_one(bus, *i);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int format_cmdline(sd_bus_message *m, FILE *f, bool needs_space) {
+ int r;
+
+ for (;;) {
+ const char *contents = NULL;
+ char type;
+ union {
+ uint8_t u8;
+ uint16_t u16;
+ int16_t s16;
+ uint32_t u32;
+ int32_t s32;
+ uint64_t u64;
+ int64_t s64;
+ double d64;
+ const char *string;
+ int i;
+ } basic;
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return needs_space;
+
+ if (bus_type_is_container(type) > 0) {
+
+ r = sd_bus_message_enter_container(m, type, contents);
+ if (r < 0)
+ return r;
+
+ if (type == SD_BUS_TYPE_ARRAY) {
+ unsigned n = 0;
+
+ /* count array entries */
+ for (;;) {
+
+ r = sd_bus_message_skip(m, contents);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ n++;
+ }
+
+ r = sd_bus_message_rewind(m, false);
+ if (r < 0)
+ return r;
+
+ if (needs_space)
+ fputc(' ', f);
+
+ fprintf(f, "%u", n);
+ needs_space = true;
+
+ } else if (type == SD_BUS_TYPE_VARIANT) {
+
+ if (needs_space)
+ fputc(' ', f);
+
+ fprintf(f, "%s", contents);
+ needs_space = true;
+ }
+
+ r = format_cmdline(m, f, needs_space);
+ if (r < 0)
+ return r;
+
+ needs_space = r > 0;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ r = sd_bus_message_read_basic(m, type, &basic);
+ if (r < 0)
+ return r;
+
+ if (needs_space)
+ fputc(' ', f);
+
+ switch (type) {
+ case SD_BUS_TYPE_BYTE:
+ fprintf(f, "%u", basic.u8);
+ break;
+
+ case SD_BUS_TYPE_BOOLEAN:
+ fputs(true_false(basic.i), f);
+ break;
+
+ case SD_BUS_TYPE_INT16:
+ fprintf(f, "%i", basic.s16);
+ break;
+
+ case SD_BUS_TYPE_UINT16:
+ fprintf(f, "%u", basic.u16);
+ break;
+
+ case SD_BUS_TYPE_INT32:
+ fprintf(f, "%i", basic.s32);
+ break;
+
+ case SD_BUS_TYPE_UINT32:
+ fprintf(f, "%u", basic.u32);
+ break;
+
+ case SD_BUS_TYPE_INT64:
+ fprintf(f, "%" PRIi64, basic.s64);
+ break;
+
+ case SD_BUS_TYPE_UINT64:
+ fprintf(f, "%" PRIu64, basic.u64);
+ break;
+
+ case SD_BUS_TYPE_DOUBLE:
+ fprintf(f, "%g", basic.d64);
+ break;
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE: {
+ _cleanup_free_ char *b = NULL;
+
+ b = cescape(basic.string);
+ if (!b)
+ return -ENOMEM;
+
+ fprintf(f, "\"%s\"", b);
+ break;
+ }
+
+ case SD_BUS_TYPE_UNIX_FD:
+ fprintf(f, "%i", basic.i);
+ break;
+
+ default:
+ assert_not_reached("Unknown basic type.");
+ }
+
+ needs_space = true;
+ }
+}
+
+typedef struct Member {
+ const char *type;
+ char *interface;
+ char *name;
+ char *signature;
+ char *result;
+ char *value;
+ bool writable;
+ uint64_t flags;
+} Member;
+
+static void member_hash_func(const Member *m, struct siphash *state) {
+ uint64_t arity = 1;
+
+ assert(m);
+ assert(m->type);
+
+ string_hash_func(m->type, state);
+
+ arity += !!m->name + !!m->interface;
+
+ uint64_hash_func(&arity, state);
+
+ if (m->name)
+ string_hash_func(m->name, state);
+
+ if (m->interface)
+ string_hash_func(m->interface, state);
+}
+
+static int member_compare_func(const Member *x, const Member *y) {
+ int d;
+
+ assert(x);
+ assert(y);
+ assert(x->type);
+ assert(y->type);
+
+ d = strcmp_ptr(x->interface, y->interface);
+ if (d != 0)
+ return d;
+
+ d = strcmp(x->type, y->type);
+ if (d != 0)
+ return d;
+
+ return strcmp_ptr(x->name, y->name);
+}
+
+static int member_compare_funcp(Member * const *a, Member * const *b) {
+ return member_compare_func(*a, *b);
+}
+
+static void member_free(Member *m) {
+ if (!m)
+ return;
+
+ free(m->interface);
+ free(m->name);
+ free(m->signature);
+ free(m->result);
+ free(m->value);
+ free(m);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Member*, member_free);
+
+static void member_set_free(Set *s) {
+ set_free_with_destructor(s, member_free);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Set*, member_set_free);
+
+static int on_interface(const char *interface, uint64_t flags, void *userdata) {
+ _cleanup_(member_freep) Member *m;
+ Set *members = userdata;
+ int r;
+
+ assert(interface);
+ assert(members);
+
+ m = new(Member, 1);
+ if (!m)
+ return log_oom();
+
+ *m = (Member) {
+ .type = "interface",
+ .flags = flags,
+ };
+
+ r = free_and_strdup(&m->interface, interface);
+ if (r < 0)
+ return log_oom();
+
+ r = set_put(members, m);
+ if (r == -EEXIST)
+ return log_error_errno(r, "Invalid introspection data: duplicate interface '%s'.", interface);
+ if (r < 0)
+ return log_oom();
+
+ m = NULL;
+ return 0;
+}
+
+static int on_method(const char *interface, const char *name, const char *signature, const char *result, uint64_t flags, void *userdata) {
+ _cleanup_(member_freep) Member *m;
+ Set *members = userdata;
+ int r;
+
+ assert(interface);
+ assert(name);
+
+ m = new(Member, 1);
+ if (!m)
+ return log_oom();
+
+ *m = (Member) {
+ .type = "method",
+ .flags = flags,
+ };
+
+ r = free_and_strdup(&m->interface, interface);
+ if (r < 0)
+ return log_oom();
+
+ r = free_and_strdup(&m->name, name);
+ if (r < 0)
+ return log_oom();
+
+ r = free_and_strdup(&m->signature, signature);
+ if (r < 0)
+ return log_oom();
+
+ r = free_and_strdup(&m->result, result);
+ if (r < 0)
+ return log_oom();
+
+ r = set_put(members, m);
+ if (r == -EEXIST)
+ return log_error_errno(r, "Invalid introspection data: duplicate method '%s' on interface '%s'.", name, interface);
+ if (r < 0)
+ return log_oom();
+
+ m = NULL;
+ return 0;
+}
+
+static int on_signal(const char *interface, const char *name, const char *signature, uint64_t flags, void *userdata) {
+ _cleanup_(member_freep) Member *m;
+ Set *members = userdata;
+ int r;
+
+ assert(interface);
+ assert(name);
+
+ m = new(Member, 1);
+ if (!m)
+ return log_oom();
+
+ *m = (Member) {
+ .type = "signal",
+ .flags = flags,
+ };
+
+ r = free_and_strdup(&m->interface, interface);
+ if (r < 0)
+ return log_oom();
+
+ r = free_and_strdup(&m->name, name);
+ if (r < 0)
+ return log_oom();
+
+ r = free_and_strdup(&m->signature, signature);
+ if (r < 0)
+ return log_oom();
+
+ r = set_put(members, m);
+ if (r == -EEXIST)
+ return log_error_errno(r, "Invalid introspection data: duplicate signal '%s' on interface '%s'.", name, interface);
+ if (r < 0)
+ return log_oom();
+
+ m = NULL;
+ return 0;
+}
+
+static int on_property(const char *interface, const char *name, const char *signature, bool writable, uint64_t flags, void *userdata) {
+ _cleanup_(member_freep) Member *m;
+ Set *members = userdata;
+ int r;
+
+ assert(interface);
+ assert(name);
+
+ m = new(Member, 1);
+ if (!m)
+ return log_oom();
+
+ *m = (Member) {
+ .type = "property",
+ .flags = flags,
+ .writable = writable,
+ };
+
+ r = free_and_strdup(&m->interface, interface);
+ if (r < 0)
+ return log_oom();
+
+ r = free_and_strdup(&m->name, name);
+ if (r < 0)
+ return log_oom();
+
+ r = free_and_strdup(&m->signature, signature);
+ if (r < 0)
+ return log_oom();
+
+ r = set_put(members, m);
+ if (r == -EEXIST)
+ return log_error_errno(r, "Invalid introspection data: duplicate property '%s' on interface '%s'.", name, interface);
+ if (r < 0)
+ return log_oom();
+
+ m = NULL;
+ return 0;
+}
+
+DEFINE_PRIVATE_HASH_OPS(member_hash_ops, Member, member_hash_func, member_compare_func);
+
+static int introspect(int argc, char **argv, void *userdata) {
+ static const XMLIntrospectOps ops = {
+ .on_interface = on_interface,
+ .on_method = on_method,
+ .on_signal = on_signal,
+ .on_property = on_property,
+ };
+
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply_xml = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(member_set_freep) Set *members = NULL;
+ unsigned name_width, type_width, signature_width, result_width, j, k = 0;
+ Member *m, **sorted = NULL;
+ const char *xml;
+ int r;
+
+ r = acquire_bus(false, &bus);
+ if (r < 0)
+ return r;
+
+ members = set_new(&member_hash_ops);
+ if (!members)
+ return log_oom();
+
+ r = sd_bus_call_method(bus, argv[1], argv[2],
+ "org.freedesktop.DBus.Introspectable", "Introspect",
+ &error, &reply_xml, "");
+ if (r < 0)
+ return log_error_errno(r, "Failed to introspect object %s of service %s: %s",
+ argv[2], argv[1], bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply_xml, "s", &xml);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (arg_xml_interface) {
+ /* Just dump the received XML and finish */
+ (void) pager_open(arg_pager_flags);
+ puts(xml);
+ return 0;
+ }
+
+ /* First, get list of all properties */
+ r = parse_xml_introspect(argv[2], xml, &ops, members);
+ if (r < 0)
+ return r;
+
+ /* Second, find the current values for them */
+ SET_FOREACH(m, members) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+
+ if (!streq(m->type, "property"))
+ continue;
+
+ if (m->value)
+ continue;
+
+ if (argv[3] && !streq(argv[3], m->interface))
+ continue;
+
+ r = sd_bus_call_method(bus, argv[1], argv[2],
+ "org.freedesktop.DBus.Properties", "GetAll",
+ &error, &reply, "s", m->interface);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get all properties on interface %s: %s",
+ m->interface, bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, 'a', "{sv}");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ Member *z;
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *mf = NULL;
+ size_t sz = 0;
+ const char *name;
+
+ r = sd_bus_message_enter_container(reply, 'e', "sv");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_read(reply, "s", &name);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_enter_container(reply, 'v', NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ mf = open_memstream_unlocked(&buf, &sz);
+ if (!mf)
+ return log_oom();
+
+ r = format_cmdline(reply, mf, false);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ mf = safe_fclose(mf);
+
+ z = set_get(members, &((Member) {
+ .type = "property",
+ .interface = m->interface,
+ .name = (char*) name }));
+ if (z)
+ free_and_replace(z->value, buf);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ name_width = strlen("NAME");
+ type_width = strlen("TYPE");
+ signature_width = strlen("SIGNATURE");
+ result_width = strlen("RESULT/VALUE");
+
+ sorted = newa(Member*, set_size(members));
+
+ SET_FOREACH(m, members) {
+ if (argv[3] && !streq(argv[3], m->interface))
+ continue;
+
+ if (m->interface)
+ name_width = MAX(name_width, strlen(m->interface));
+ if (m->name)
+ name_width = MAX(name_width, strlen(m->name) + 1);
+ if (m->type)
+ type_width = MAX(type_width, strlen(m->type));
+ if (m->signature)
+ signature_width = MAX(signature_width, strlen(m->signature));
+ if (m->result)
+ result_width = MAX(result_width, strlen(m->result));
+ if (m->value)
+ result_width = MAX(result_width, strlen(m->value));
+
+ sorted[k++] = m;
+ }
+
+ if (result_width > 40)
+ result_width = 40;
+
+ typesafe_qsort(sorted, k, member_compare_funcp);
+
+ (void) pager_open(arg_pager_flags);
+
+ if (arg_legend)
+ printf("%-*s %-*s %-*s %-*s %s\n",
+ (int) name_width, "NAME",
+ (int) type_width, "TYPE",
+ (int) signature_width, "SIGNATURE",
+ (int) result_width, "RESULT/VALUE",
+ "FLAGS");
+
+ for (j = 0; j < k; j++) {
+ _cleanup_free_ char *ellipsized = NULL;
+ const char *rv;
+ bool is_interface;
+
+ m = sorted[j];
+
+ if (argv[3] && !streq(argv[3], m->interface))
+ continue;
+
+ is_interface = streq(m->type, "interface");
+
+ if (argv[3] && is_interface)
+ continue;
+
+ if (m->value) {
+ ellipsized = ellipsize(m->value, result_width, 100);
+ if (!ellipsized)
+ return log_oom();
+
+ rv = ellipsized;
+ } else
+ rv = empty_to_dash(m->result);
+
+ printf("%s%s%-*s%s %-*s %-*s %-*s%s%s%s%s%s%s\n",
+ is_interface ? ansi_highlight() : "",
+ is_interface ? "" : ".",
+ - !is_interface + (int) name_width,
+ empty_to_dash(streq_ptr(m->type, "interface") ? m->interface : m->name),
+ is_interface ? ansi_normal() : "",
+ (int) type_width, empty_to_dash(m->type),
+ (int) signature_width, empty_to_dash(m->signature),
+ (int) result_width, rv,
+ (m->flags & SD_BUS_VTABLE_DEPRECATED) ? " deprecated" : (m->flags || m->writable ? "" : " -"),
+ (m->flags & SD_BUS_VTABLE_METHOD_NO_REPLY) ? " no-reply" : "",
+ (m->flags & SD_BUS_VTABLE_PROPERTY_CONST) ? " const" : "",
+ (m->flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE) ? " emits-change" : "",
+ (m->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION) ? " emits-invalidation" : "",
+ m->writable ? " writable" : "");
+ }
+
+ return 0;
+}
+
+static int message_dump(sd_bus_message *m, FILE *f) {
+ return sd_bus_message_dump(m, f, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+}
+
+static int message_pcap(sd_bus_message *m, FILE *f) {
+ return bus_message_pcap_frame(m, arg_snaplen, f);
+}
+
+static int message_json(sd_bus_message *m, FILE *f) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL, *w = NULL;
+ char e[2];
+ int r;
+
+ r = json_transform_message(m, &v);
+ if (r < 0)
+ return r;
+
+ e[0] = m->header->endian;
+ e[1] = 0;
+
+ r = json_build(&w, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("type", JSON_BUILD_STRING(bus_message_type_to_string(m->header->type))),
+ JSON_BUILD_PAIR("endian", JSON_BUILD_STRING(e)),
+ JSON_BUILD_PAIR("flags", JSON_BUILD_INTEGER(m->header->flags)),
+ JSON_BUILD_PAIR("version", JSON_BUILD_INTEGER(m->header->version)),
+ JSON_BUILD_PAIR("cookie", JSON_BUILD_INTEGER(BUS_MESSAGE_COOKIE(m))),
+ JSON_BUILD_PAIR_CONDITION(m->reply_cookie != 0, "reply_cookie", JSON_BUILD_INTEGER(m->reply_cookie)),
+ JSON_BUILD_PAIR_CONDITION(m->sender, "sender", JSON_BUILD_STRING(m->sender)),
+ JSON_BUILD_PAIR_CONDITION(m->destination, "destination", JSON_BUILD_STRING(m->destination)),
+ JSON_BUILD_PAIR_CONDITION(m->path, "path", JSON_BUILD_STRING(m->path)),
+ JSON_BUILD_PAIR_CONDITION(m->interface, "interface", JSON_BUILD_STRING(m->interface)),
+ JSON_BUILD_PAIR_CONDITION(m->member, "member", JSON_BUILD_STRING(m->member)),
+ JSON_BUILD_PAIR_CONDITION(m->monotonic != 0, "monotonic", JSON_BUILD_INTEGER(m->monotonic)),
+ JSON_BUILD_PAIR_CONDITION(m->realtime != 0, "realtime", JSON_BUILD_INTEGER(m->realtime)),
+ JSON_BUILD_PAIR_CONDITION(m->seqnum != 0, "seqnum", JSON_BUILD_INTEGER(m->seqnum)),
+ JSON_BUILD_PAIR_CONDITION(m->error.name, "error_name", JSON_BUILD_STRING(m->error.name)),
+ JSON_BUILD_PAIR("payload", JSON_BUILD_VARIANT(v))));
+ if (r < 0)
+ return log_error_errno(r, "Failed to build JSON object: %m");
+
+ json_dump_with_flags(w, f);
+ return 0;
+}
+
+static int monitor(int argc, char **argv, int (*dump)(sd_bus_message *m, FILE *f)) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *message = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ char **i;
+ uint32_t flags = 0;
+ const char *unique_name;
+ bool is_monitor = false;
+ int r;
+
+ r = acquire_bus(true, &bus);
+ if (r < 0)
+ return r;
+
+ /* upgrade connection; it's not used for anything else after this call */
+ r = sd_bus_message_new_method_call(bus,
+ &message,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus.Monitoring",
+ "BecomeMonitor");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(message, 'a', "s");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ STRV_FOREACH(i, argv+1) {
+ _cleanup_free_ char *m = NULL;
+
+ if (!sd_bus_service_name_is_valid(*i))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid service name '%s'", *i);
+
+ m = strjoin("sender='", *i, "'");
+ if (!m)
+ return log_oom();
+
+ r = sd_bus_message_append_basic(message, 's', m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ free(m);
+ m = strjoin("destination='", *i, "'");
+ if (!m)
+ return log_oom();
+
+ r = sd_bus_message_append_basic(message, 's', m);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ STRV_FOREACH(i, arg_matches) {
+ r = sd_bus_message_append_basic(message, 's', *i);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(message);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(message, 'u', &flags);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, message, arg_timeout, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Call to org.freedesktop.DBus.Monitoring.BecomeMonitor failed: %s",
+ bus_error_message(&error, r));
+
+ r = sd_bus_get_unique_name(bus, &unique_name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get unique name: %m");
+
+ log_info("Monitoring bus message stream.");
+
+ for (;;) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = sd_bus_process(bus, &m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to process bus: %m");
+
+ if (!is_monitor) {
+ const char *name;
+
+ /* wait until we lose our unique name */
+ if (sd_bus_message_is_signal(m, "org.freedesktop.DBus", "NameLost") <= 0)
+ continue;
+
+ r = sd_bus_message_read(m, "s", &name);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (streq(name, unique_name))
+ is_monitor = true;
+
+ continue;
+ }
+
+ if (m) {
+ dump(m, stdout);
+ fflush(stdout);
+
+ if (sd_bus_message_is_signal(m, "org.freedesktop.DBus.Local", "Disconnected") > 0) {
+ log_info("Connection terminated, exiting.");
+ return 0;
+ }
+
+ continue;
+ }
+
+ if (r > 0)
+ continue;
+
+ r = sd_bus_wait(bus, (uint64_t) -1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait for bus: %m");
+ }
+}
+
+static int verb_monitor(int argc, char **argv, void *userdata) {
+ return monitor(argc, argv, arg_json != JSON_OFF ? message_json : message_dump);
+}
+
+static int verb_capture(int argc, char **argv, void *userdata) {
+ int r;
+
+ if (isatty(fileno(stdout)) > 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Refusing to write message data to console, please redirect output to a file.");
+
+ bus_pcap_header(arg_snaplen, stdout);
+
+ r = monitor(argc, argv, message_pcap);
+ if (r < 0)
+ return r;
+
+ r = fflush_and_check(stdout);
+ if (r < 0)
+ return log_error_errno(r, "Couldn't write capture file: %m");
+
+ return r;
+}
+
+static int status(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ pid_t pid;
+ int r;
+
+ r = acquire_bus(false, &bus);
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ if (!isempty(argv[1])) {
+ r = parse_pid(argv[1], &pid);
+ if (r < 0)
+ r = sd_bus_get_name_creds(
+ bus,
+ argv[1],
+ (arg_augment_creds ? SD_BUS_CREDS_AUGMENT : 0) | _SD_BUS_CREDS_ALL,
+ &creds);
+ else
+ r = sd_bus_creds_new_from_pid(
+ &creds,
+ pid,
+ _SD_BUS_CREDS_ALL);
+ } else {
+ const char *scope, *address;
+ sd_id128_t bus_id;
+
+ r = sd_bus_get_address(bus, &address);
+ if (r >= 0)
+ printf("BusAddress=%s%s%s\n", ansi_highlight(), address, ansi_normal());
+
+ r = sd_bus_get_scope(bus, &scope);
+ if (r >= 0)
+ printf("BusScope=%s%s%s\n", ansi_highlight(), scope, ansi_normal());
+
+ r = sd_bus_get_bus_id(bus, &bus_id);
+ if (r >= 0)
+ printf("BusID=%s" SD_ID128_FORMAT_STR "%s\n",
+ ansi_highlight(), SD_ID128_FORMAT_VAL(bus_id), ansi_normal());
+
+ r = sd_bus_get_owner_creds(
+ bus,
+ (arg_augment_creds ? SD_BUS_CREDS_AUGMENT : 0) | _SD_BUS_CREDS_ALL,
+ &creds);
+ }
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to get credentials: %m");
+
+ bus_creds_dump(creds, NULL, false);
+ return 0;
+}
+
+static int message_append_cmdline(sd_bus_message *m, const char *signature, char ***x) {
+ char **p;
+ int r;
+
+ assert(m);
+ assert(signature);
+ assert(x);
+
+ p = *x;
+
+ for (;;) {
+ const char *v;
+ char t;
+
+ t = *signature;
+ v = *p;
+
+ if (t == 0)
+ break;
+ if (!v)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too few parameters for signature.");
+
+ signature++;
+ p++;
+
+ switch (t) {
+
+ case SD_BUS_TYPE_BOOLEAN:
+
+ r = parse_boolean(v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' as boolean: %m", v);
+
+ r = sd_bus_message_append_basic(m, t, &r);
+ break;
+
+ case SD_BUS_TYPE_BYTE: {
+ uint8_t z;
+
+ r = safe_atou8(v, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' as byte (unsigned 8bit integer): %m", v);
+
+ r = sd_bus_message_append_basic(m, t, &z);
+ break;
+ }
+
+ case SD_BUS_TYPE_INT16: {
+ int16_t z;
+
+ r = safe_atoi16(v, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' as signed 16bit integer: %m", v);
+
+ r = sd_bus_message_append_basic(m, t, &z);
+ break;
+ }
+
+ case SD_BUS_TYPE_UINT16: {
+ uint16_t z;
+
+ r = safe_atou16(v, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' as unsigned 16bit integer: %m", v);
+
+ r = sd_bus_message_append_basic(m, t, &z);
+ break;
+ }
+
+ case SD_BUS_TYPE_INT32: {
+ int32_t z;
+
+ r = safe_atoi32(v, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' as signed 32bit integer: %m", v);
+
+ r = sd_bus_message_append_basic(m, t, &z);
+ break;
+ }
+
+ case SD_BUS_TYPE_UINT32: {
+ uint32_t z;
+
+ r = safe_atou32(v, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' as unsigned 32bit integer: %m", v);
+
+ r = sd_bus_message_append_basic(m, t, &z);
+ break;
+ }
+
+ case SD_BUS_TYPE_INT64: {
+ int64_t z;
+
+ r = safe_atoi64(v, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' as signed 64bit integer: %m", v);
+
+ r = sd_bus_message_append_basic(m, t, &z);
+ break;
+ }
+
+ case SD_BUS_TYPE_UINT64: {
+ uint64_t z;
+
+ r = safe_atou64(v, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' as unsigned 64bit integer: %m", v);
+
+ r = sd_bus_message_append_basic(m, t, &z);
+ break;
+ }
+
+ case SD_BUS_TYPE_DOUBLE: {
+ double z;
+
+ r = safe_atod(v, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' as double precision floating point: %m", v);
+
+ r = sd_bus_message_append_basic(m, t, &z);
+ break;
+ }
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE:
+
+ r = sd_bus_message_append_basic(m, t, v);
+ break;
+
+ case SD_BUS_TYPE_ARRAY: {
+ uint32_t n;
+ size_t k;
+
+ r = safe_atou32(v, &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse '%s' number of array entries: %m", v);
+
+ r = signature_element_length(signature, &k);
+ if (r < 0)
+ return log_error_errno(r, "Invalid array signature: %m");
+
+ {
+ char s[k + 1];
+ memcpy(s, signature, k);
+ s[k] = 0;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_ARRAY, s);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (unsigned i = 0; i < n; i++) {
+ r = message_append_cmdline(m, s, &p);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ signature += k;
+
+ r = sd_bus_message_close_container(m);
+ break;
+ }
+
+ case SD_BUS_TYPE_VARIANT:
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_VARIANT, v);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = message_append_cmdline(m, v, &p);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ break;
+
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN: {
+ size_t k;
+
+ signature--;
+ p--;
+
+ r = signature_element_length(signature, &k);
+ if (r < 0)
+ return log_error_errno(r, "Invalid struct/dict entry signature: %m");
+
+ {
+ char s[k-1];
+ memcpy(s, signature + 1, k - 2);
+ s[k - 2] = 0;
+
+ const char ctype = t == SD_BUS_TYPE_STRUCT_BEGIN ?
+ SD_BUS_TYPE_STRUCT : SD_BUS_TYPE_DICT_ENTRY;
+ r = sd_bus_message_open_container(m, ctype, s);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = message_append_cmdline(m, s, &p);
+ if (r < 0)
+ return r;
+ }
+
+ signature += k;
+
+ r = sd_bus_message_close_container(m);
+ break;
+ }
+
+ case SD_BUS_TYPE_UNIX_FD:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "UNIX file descriptor not supported as type.");
+
+ default:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown signature type %c.", t);
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ *x = p;
+ return 0;
+}
+
+static int json_transform_one(sd_bus_message *m, JsonVariant **ret);
+
+static int json_transform_array_or_struct(sd_bus_message *m, JsonVariant **ret) {
+ size_t n_elements = 0, n_allocated = 0;
+ JsonVariant **elements = NULL;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ for (;;) {
+ r = sd_bus_message_at_end(m, false);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ goto finish;
+ }
+ if (r > 0)
+ break;
+
+ if (!GREEDY_REALLOC(elements, n_allocated, n_elements + 1)) {
+ r = log_oom();
+ goto finish;
+ }
+
+ r = json_transform_one(m, elements + n_elements);
+ if (r < 0)
+ goto finish;
+
+ n_elements++;
+ }
+
+ r = json_variant_new_array(ret, elements, n_elements);
+
+finish:
+ json_variant_unref_many(elements, n_elements);
+ free(elements);
+
+ return r;
+}
+
+static int json_transform_variant(sd_bus_message *m, const char *contents, JsonVariant **ret) {
+ _cleanup_(json_variant_unrefp) JsonVariant *value = NULL;
+ int r;
+
+ assert(m);
+ assert(contents);
+ assert(ret);
+
+ r = json_transform_one(m, &value);
+ if (r < 0)
+ return r;
+
+ r = json_build(ret, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("type", JSON_BUILD_STRING(contents)),
+ JSON_BUILD_PAIR("data", JSON_BUILD_VARIANT(value))));
+ if (r < 0)
+ return log_oom();
+
+ return r;
+}
+
+static int json_transform_dict_array(sd_bus_message *m, JsonVariant **ret) {
+ size_t n_elements = 0, n_allocated = 0;
+ JsonVariant **elements = NULL;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ for (;;) {
+ const char *contents;
+ char type;
+
+ r = sd_bus_message_at_end(m, false);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ goto finish;
+ }
+ if (r > 0)
+ break;
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return r;
+
+ assert(type == 'e');
+
+ if (!GREEDY_REALLOC(elements, n_allocated, n_elements + 2)) {
+ r = log_oom();
+ goto finish;
+ }
+
+ r = sd_bus_message_enter_container(m, type, contents);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ goto finish;
+ }
+
+ r = json_transform_one(m, elements + n_elements);
+ if (r < 0)
+ goto finish;
+
+ n_elements++;
+
+ r = json_transform_one(m, elements + n_elements);
+ if (r < 0)
+ goto finish;
+
+ n_elements++;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ goto finish;
+ }
+ }
+
+ r = json_variant_new_object(ret, elements, n_elements);
+
+finish:
+ json_variant_unref_many(elements, n_elements);
+ free(elements);
+
+ return r;
+}
+
+static int json_transform_one(sd_bus_message *m, JsonVariant **ret) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ const char *contents;
+ char type;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ switch (type) {
+
+ case SD_BUS_TYPE_BYTE: {
+ uint8_t b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_unsigned(&v, b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform byte: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_BOOLEAN: {
+ int b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_boolean(&v, b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform boolean: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_INT16: {
+ int16_t b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_integer(&v, b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform int16: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_UINT16: {
+ uint16_t b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_unsigned(&v, b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform uint16: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_INT32: {
+ int32_t b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_integer(&v, b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform int32: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_UINT32: {
+ uint32_t b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_unsigned(&v, b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform uint32: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_INT64: {
+ int64_t b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_integer(&v, b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform int64: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_UINT64: {
+ uint64_t b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_unsigned(&v, b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform uint64: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_DOUBLE: {
+ double d;
+
+ r = sd_bus_message_read_basic(m, type, &d);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_real(&v, d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform double: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE: {
+ const char *s;
+
+ r = sd_bus_message_read_basic(m, type, &s);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_string(&v, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform double: %m");
+
+ break;
+ }
+
+ case SD_BUS_TYPE_UNIX_FD:
+ r = sd_bus_message_read_basic(m, type, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = json_variant_new_null(&v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transform fd: %m");
+
+ break;
+
+ case SD_BUS_TYPE_ARRAY:
+ case SD_BUS_TYPE_VARIANT:
+ case SD_BUS_TYPE_STRUCT:
+ r = sd_bus_message_enter_container(m, type, contents);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (type == SD_BUS_TYPE_VARIANT)
+ r = json_transform_variant(m, contents, &v);
+ else if (type == SD_BUS_TYPE_ARRAY && contents[0] == '{')
+ r = json_transform_dict_array(m, &v);
+ else
+ r = json_transform_array_or_struct(m, &v);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ break;
+
+ default:
+ assert_not_reached("Unexpected element type");
+ }
+
+ *ret = TAKE_PTR(v);
+ return 0;
+}
+
+static int json_transform_message(sd_bus_message *m, JsonVariant **ret) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ const char *type;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ assert_se(type = sd_bus_message_get_signature(m, false));
+
+ r = json_transform_array_or_struct(m, &v);
+ if (r < 0)
+ return r;
+
+ r = json_build(ret, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("type", JSON_BUILD_STRING(type)),
+ JSON_BUILD_PAIR("data", JSON_BUILD_VARIANT(v))));
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+static void json_dump_with_flags(JsonVariant *v, FILE *f) {
+
+ json_variant_dump(v,
+ (arg_json == JSON_PRETTY ? JSON_FORMAT_PRETTY : JSON_FORMAT_NEWLINE) |
+ JSON_FORMAT_COLOR_AUTO,
+ f, NULL);
+}
+
+static int call(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ int r;
+
+ r = acquire_bus(false, &bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_call(bus, &m, argv[1], argv[2], argv[3], argv[4]);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_set_expect_reply(m, arg_expect_reply);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_set_auto_start(m, arg_auto_start);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_set_allow_interactive_authorization(m, arg_allow_interactive_authorization);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (!isempty(argv[5])) {
+ char **p;
+
+ p = argv+6;
+
+ r = message_append_cmdline(m, argv[5], &p);
+ if (r < 0)
+ return r;
+
+ if (*p)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many parameters for signature.");
+ }
+
+ if (!arg_expect_reply) {
+ r = sd_bus_send(bus, m, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send message: %m");
+
+ return 0;
+ }
+
+ r = sd_bus_call(bus, m, arg_timeout, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Call failed: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_is_empty(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (r == 0 && !arg_quiet) {
+
+ if (arg_json != JSON_OFF) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+
+ if (arg_json != JSON_SHORT)
+ (void) pager_open(arg_pager_flags);
+
+ r = json_transform_message(reply, &v);
+ if (r < 0)
+ return r;
+
+ json_dump_with_flags(v, stdout);
+
+ } else if (arg_verbose) {
+ (void) pager_open(arg_pager_flags);
+
+ r = sd_bus_message_dump(reply, stdout, 0);
+ if (r < 0)
+ return r;
+ } else {
+
+ fputs(sd_bus_message_get_signature(reply, true), stdout);
+ fputc(' ', stdout);
+
+ r = format_cmdline(reply, stdout, false);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ fputc('\n', stdout);
+ }
+ }
+
+ return 0;
+}
+
+static int emit_signal(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ r = acquire_bus(false, &bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_signal(bus, &m, argv[1], argv[2], argv[3]);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (arg_destination) {
+ r = sd_bus_message_set_destination(m, arg_destination);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_set_auto_start(m, arg_auto_start);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (!isempty(argv[4])) {
+ char **p;
+
+ p = argv+5;
+
+ r = message_append_cmdline(m, argv[4], &p);
+ if (r < 0)
+ return r;
+
+ if (*p)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many parameters for signature.");
+ }
+
+ r = sd_bus_send(bus, m, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send signal: %m");
+
+ return 0;
+}
+
+static int get_property(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ char **i;
+ int r;
+
+ r = acquire_bus(false, &bus);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, argv + 4) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *contents = NULL;
+ char type;
+
+ r = sd_bus_call_method(bus, argv[1], argv[2],
+ "org.freedesktop.DBus.Properties", "Get",
+ &error, &reply, "ss", argv[3], *i);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get property %s on interface %s: %s",
+ *i, argv[3],
+ bus_error_message(&error, r));
+
+ r = sd_bus_message_peek_type(reply, &type, &contents);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_enter_container(reply, 'v', contents);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (arg_json != JSON_OFF) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+
+ if (arg_json != JSON_SHORT)
+ (void) pager_open(arg_pager_flags);
+
+ r = json_transform_variant(reply, contents, &v);
+ if (r < 0)
+ return r;
+
+ json_dump_with_flags(v, stdout);
+
+ } else if (arg_verbose) {
+ (void) pager_open(arg_pager_flags);
+
+ r = sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_SUBTREE_ONLY);
+ if (r < 0)
+ return r;
+ } else {
+ fputs(contents, stdout);
+ fputc(' ', stdout);
+
+ r = format_cmdline(reply, stdout, false);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ fputc('\n', stdout);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ return 0;
+}
+
+static int set_property(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ char **p;
+ int r;
+
+ r = acquire_bus(false, &bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_call(bus, &m, argv[1], argv[2],
+ "org.freedesktop.DBus.Properties", "Set");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "ss", argv[3], argv[4]);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', argv[5]);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ p = argv + 6;
+ r = message_append_cmdline(m, argv[5], &p);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (*p)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Too many parameters for signature.");
+
+ r = sd_bus_call(bus, m, arg_timeout, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set property %s on interface %s: %s",
+ argv[4], argv[3],
+ bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("busctl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND ...\n\n"
+ "%sIntrospect the D-Bus IPC bus.%s\n"
+ "\nCommands:\n"
+ " list List bus names\n"
+ " status [SERVICE] Show bus service, process or bus owner credentials\n"
+ " monitor [SERVICE...] Show bus traffic\n"
+ " capture [SERVICE...] Capture bus traffic as pcap\n"
+ " tree [SERVICE...] Show object tree of service\n"
+ " introspect SERVICE OBJECT [INTERFACE]\n"
+ " call SERVICE OBJECT INTERFACE METHOD [SIGNATURE [ARGUMENT...]]\n"
+ " Call a method\n"
+ " emit OBJECT INTERFACE SIGNAL [SIGNATURE [ARGUMENT...]]\n"
+ " Emit a signal\n"
+ " get-property SERVICE OBJECT INTERFACE PROPERTY...\n"
+ " Get property value\n"
+ " set-property SERVICE OBJECT INTERFACE PROPERTY SIGNATURE ARGUMENT...\n"
+ " Set property value\n"
+ " help Show this help\n"
+ "\nOptions:\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-legend Do not show the headers and footers\n"
+ " -l --full Do not ellipsize output\n"
+ " --system Connect to system bus\n"
+ " --user Connect to user bus\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " --address=ADDRESS Connect to bus specified by address\n"
+ " --show-machine Show machine ID column in list\n"
+ " --unique Only show unique names\n"
+ " --acquired Only show acquired names\n"
+ " --activatable Only show activatable names\n"
+ " --match=MATCH Only show matching messages\n"
+ " --size=SIZE Maximum length of captured packet\n"
+ " --list Don't show tree, but simple object path list\n"
+ " -q --quiet Don't show method call reply\n"
+ " --verbose Show result values in long format\n"
+ " --json=MODE Output as JSON\n"
+ " -j Same as --json=pretty on tty, --json=short otherwise\n"
+ " --expect-reply=BOOL Expect a method call reply\n"
+ " --auto-start=BOOL Auto-start destination service\n"
+ " --allow-interactive-authorization=BOOL\n"
+ " Allow interactive authorization for operation\n"
+ " --timeout=SECS Maximum time to wait for method call completion\n"
+ " --augment-creds=BOOL Extend credential data with data read from /proc/$PID\n"
+ " --watch-bind=BOOL Wait for bus AF_UNIX socket to be bound in the file\n"
+ " system\n"
+ " --destination=SERVICE Destination service of a signal\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight()
+ , ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int verb_help(int argc, char **argv, void *userdata) {
+ return help();
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_PAGER,
+ ARG_NO_LEGEND,
+ ARG_SYSTEM,
+ ARG_USER,
+ ARG_ADDRESS,
+ ARG_MATCH,
+ ARG_SHOW_MACHINE,
+ ARG_UNIQUE,
+ ARG_ACQUIRED,
+ ARG_ACTIVATABLE,
+ ARG_SIZE,
+ ARG_LIST,
+ ARG_VERBOSE,
+ ARG_XML_INTERFACE,
+ ARG_EXPECT_REPLY,
+ ARG_AUTO_START,
+ ARG_ALLOW_INTERACTIVE_AUTHORIZATION,
+ ARG_TIMEOUT,
+ ARG_AUGMENT_CREDS,
+ ARG_WATCH_BIND,
+ ARG_JSON,
+ ARG_DESTINATION,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
+ { "full", no_argument, NULL, 'l' },
+ { "system", no_argument, NULL, ARG_SYSTEM },
+ { "user", no_argument, NULL, ARG_USER },
+ { "address", required_argument, NULL, ARG_ADDRESS },
+ { "show-machine", no_argument, NULL, ARG_SHOW_MACHINE },
+ { "unique", no_argument, NULL, ARG_UNIQUE },
+ { "acquired", no_argument, NULL, ARG_ACQUIRED },
+ { "activatable", no_argument, NULL, ARG_ACTIVATABLE },
+ { "match", required_argument, NULL, ARG_MATCH },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "size", required_argument, NULL, ARG_SIZE },
+ { "list", no_argument, NULL, ARG_LIST },
+ { "quiet", no_argument, NULL, 'q' },
+ { "verbose", no_argument, NULL, ARG_VERBOSE },
+ { "xml-interface", no_argument, NULL, ARG_XML_INTERFACE },
+ { "expect-reply", required_argument, NULL, ARG_EXPECT_REPLY },
+ { "auto-start", required_argument, NULL, ARG_AUTO_START },
+ { "allow-interactive-authorization", required_argument, NULL, ARG_ALLOW_INTERACTIVE_AUTHORIZATION },
+ { "timeout", required_argument, NULL, ARG_TIMEOUT },
+ { "augment-creds", required_argument, NULL, ARG_AUGMENT_CREDS },
+ { "watch-bind", required_argument, NULL, ARG_WATCH_BIND },
+ { "json", required_argument, NULL, ARG_JSON },
+ { "destination", required_argument, NULL, ARG_DESTINATION },
+ {},
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hH:M:qjl", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_LEGEND:
+ arg_legend = false;
+ break;
+
+ case 'l':
+ arg_full = true;
+ break;
+
+ case ARG_USER:
+ arg_user = true;
+ break;
+
+ case ARG_SYSTEM:
+ arg_user = false;
+ break;
+
+ case ARG_ADDRESS:
+ arg_address = optarg;
+ break;
+
+ case ARG_SHOW_MACHINE:
+ arg_show_machine = true;
+ break;
+
+ case ARG_UNIQUE:
+ arg_unique = true;
+ break;
+
+ case ARG_ACQUIRED:
+ arg_acquired = true;
+ break;
+
+ case ARG_ACTIVATABLE:
+ arg_activatable = true;
+ break;
+
+ case ARG_MATCH:
+ if (strv_extend(&arg_matches, optarg) < 0)
+ return log_oom();
+ break;
+
+ case ARG_SIZE: {
+ uint64_t sz;
+
+ r = parse_size(optarg, 1024, &sz);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse size '%s': %m", optarg);
+
+ if ((uint64_t) (size_t) sz != sz)
+ return log_error_errno(SYNTHETIC_ERRNO(E2BIG),
+ "Size out of range.");
+
+ arg_snaplen = (size_t) sz;
+ break;
+ }
+
+ case ARG_LIST:
+ arg_list = true;
+ break;
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case ARG_VERBOSE:
+ arg_verbose = true;
+ break;
+
+ case ARG_XML_INTERFACE:
+ arg_xml_interface = true;
+ break;
+
+ case ARG_EXPECT_REPLY:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --expect-reply= parameter '%s': %m", optarg);
+
+ arg_expect_reply = r;
+ break;
+
+ case ARG_AUTO_START:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --auto-start= parameter '%s': %m", optarg);
+
+ arg_auto_start = r;
+ break;
+
+ case ARG_ALLOW_INTERACTIVE_AUTHORIZATION:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --allow-interactive-authorization= parameter '%s': %m", optarg);
+
+ arg_allow_interactive_authorization = r;
+ break;
+
+ case ARG_TIMEOUT:
+ r = parse_sec(optarg, &arg_timeout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --timeout= parameter '%s': %m", optarg);
+
+ break;
+
+ case ARG_AUGMENT_CREDS:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --augment-creds= parameter '%s': %m", optarg);
+
+ arg_augment_creds = r;
+ break;
+
+ case ARG_WATCH_BIND:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --watch-bind= parameter '%s': %m", optarg);
+
+ arg_watch_bind = r;
+ break;
+
+ case 'j':
+ if (on_tty())
+ arg_json = JSON_PRETTY;
+ else
+ arg_json = JSON_SHORT;
+ break;
+
+ case ARG_JSON:
+ if (streq(optarg, "short"))
+ arg_json = JSON_SHORT;
+ else if (streq(optarg, "pretty"))
+ arg_json = JSON_PRETTY;
+ else if (streq(optarg, "help")) {
+ fputs("short\n"
+ "pretty\n", stdout);
+ return 0;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown JSON out mode: %s",
+ optarg);
+
+ break;
+
+ case ARG_DESTINATION:
+ arg_destination = optarg;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int busctl_main(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "list", VERB_ANY, 1, VERB_DEFAULT, list_bus_names },
+ { "status", VERB_ANY, 2, 0, status },
+ { "monitor", VERB_ANY, VERB_ANY, 0, verb_monitor },
+ { "capture", VERB_ANY, VERB_ANY, 0, verb_capture },
+ { "tree", VERB_ANY, VERB_ANY, 0, tree },
+ { "introspect", 3, 4, 0, introspect },
+ { "call", 5, VERB_ANY, 0, call },
+ { "emit", 4, VERB_ANY, 0, emit_signal },
+ { "get-property", 5, VERB_ANY, 0, get_property },
+ { "set-property", 6, VERB_ANY, 0, set_property },
+ { "help", VERB_ANY, VERB_ANY, 0, verb_help },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ log_setup_cli();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ return busctl_main(argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/cgls/cgls.c b/src/cgls/cgls.c
new file mode 100644
index 0000000..693b504
--- /dev/null
+++ b/src/cgls/cgls.c
@@ -0,0 +1,300 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "cgroup-show.h"
+#include "cgroup-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "main-func.h"
+#include "output-mode.h"
+#include "pager.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "util.h"
+
+static PagerFlags arg_pager_flags = 0;
+static bool arg_kernel_threads = false;
+static bool arg_all = false;
+
+static enum {
+ SHOW_UNIT_NONE,
+ SHOW_UNIT_SYSTEM,
+ SHOW_UNIT_USER,
+} arg_show_unit = SHOW_UNIT_NONE;
+static char **arg_names = NULL;
+
+static int arg_full = -1;
+static const char* arg_machine = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_names, freep); /* don't free the strings */
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-cgls", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [CGROUP...]\n\n"
+ "Recursively show control group contents.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " -a --all Show all groups, including empty\n"
+ " -u --unit Show the subtrees of specified system units\n"
+ " --user-unit Show the subtrees of specified user units\n"
+ " -l --full Do not ellipsize output\n"
+ " -k Include kernel threads in output\n"
+ " -M --machine= Show container\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_NO_PAGER = 0x100,
+ ARG_VERSION,
+ ARG_USER_UNIT,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "all", no_argument, NULL, 'a' },
+ { "full", no_argument, NULL, 'l' },
+ { "machine", required_argument, NULL, 'M' },
+ { "unit", optional_argument, NULL, 'u' },
+ { "user-unit", optional_argument, NULL, ARG_USER_UNIT },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 1);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "-hkalM:u::", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case 'a':
+ arg_all = true;
+ break;
+
+ case 'u':
+ arg_show_unit = SHOW_UNIT_SYSTEM;
+ if (strv_push(&arg_names, optarg) < 0) /* push optarg if not empty */
+ return log_oom();
+ break;
+
+ case ARG_USER_UNIT:
+ arg_show_unit = SHOW_UNIT_USER;
+ if (strv_push(&arg_names, optarg) < 0) /* push optarg if not empty */
+ return log_oom();
+ break;
+
+ case 1:
+ /* positional argument */
+ if (strv_push(&arg_names, optarg) < 0)
+ return log_oom();
+ break;
+
+ case 'l':
+ arg_full = true;
+ break;
+
+ case 'k':
+ arg_kernel_threads = true;
+ break;
+
+ case 'M':
+ arg_machine = optarg;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_machine && arg_show_unit != SHOW_UNIT_NONE)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot combine --unit or --user-unit with --machine=.");
+
+ return 1;
+}
+
+static void show_cg_info(const char *controller, const char *path) {
+
+ if (cg_all_unified() == 0 && controller && !streq(controller, SYSTEMD_CGROUP_CONTROLLER))
+ printf("Controller %s; ", controller);
+
+ printf("Control group %s:\n", empty_to_root(path));
+ fflush(stdout);
+}
+
+static int run(int argc, char *argv[]) {
+ int r, output_flags;
+
+ log_setup_cli();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = pager_open(arg_pager_flags);
+ if (r > 0 && arg_full < 0)
+ arg_full = true;
+
+ output_flags =
+ arg_all * OUTPUT_SHOW_ALL |
+ (arg_full > 0) * OUTPUT_FULL_WIDTH |
+ arg_kernel_threads * OUTPUT_KERNEL_THREADS;
+
+ if (arg_names) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *root = NULL;
+ char **name;
+
+ STRV_FOREACH(name, arg_names) {
+ int q;
+
+ if (arg_show_unit != SHOW_UNIT_NONE) {
+ /* Command line arguments are unit names */
+ _cleanup_free_ char *cgroup = NULL;
+
+ if (!bus) {
+ /* Connect to the bus only if necessary */
+ r = bus_connect_transport_systemd(BUS_TRANSPORT_LOCAL, NULL,
+ arg_show_unit == SHOW_UNIT_USER,
+ &bus);
+ if (r < 0)
+ return bus_log_connect_error(r);
+ }
+
+ q = show_cgroup_get_unit_path_and_warn(bus, *name, &cgroup);
+ if (q < 0)
+ goto failed;
+
+ if (isempty(cgroup)) {
+ log_warning("Unit %s not found.", *name);
+ q = -ENOENT;
+ goto failed;
+ }
+
+ printf("Unit %s (%s):\n", *name, cgroup);
+ fflush(stdout);
+
+ q = show_cgroup_by_path(cgroup, NULL, 0, output_flags);
+
+ } else if (path_startswith(*name, "/sys/fs/cgroup")) {
+
+ printf("Directory %s:\n", *name);
+ fflush(stdout);
+
+ q = show_cgroup_by_path(*name, NULL, 0, output_flags);
+ } else {
+ _cleanup_free_ char *c = NULL, *p = NULL, *j = NULL;
+ const char *controller, *path;
+
+ if (!root) {
+ /* Query root only if needed, treat error as fatal */
+ r = show_cgroup_get_path_and_warn(arg_machine, NULL, &root);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list cgroup tree: %m");
+ }
+
+ q = cg_split_spec(*name, &c, &p);
+ if (q < 0) {
+ log_error_errno(q, "Failed to split argument %s: %m", *name);
+ goto failed;
+ }
+
+ controller = c ?: SYSTEMD_CGROUP_CONTROLLER;
+ if (p) {
+ j = path_join(root, p);
+ if (!j)
+ return log_oom();
+
+ path_simplify(j, false);
+ path = j;
+ } else
+ path = root;
+
+ show_cg_info(controller, path);
+
+ q = show_cgroup(controller, path, NULL, 0, output_flags);
+ }
+
+ failed:
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ } else {
+ bool done = false;
+
+ if (!arg_machine) {
+ _cleanup_free_ char *cwd = NULL;
+
+ r = safe_getcwd(&cwd);
+ if (r < 0)
+ return log_error_errno(r, "Cannot determine current working directory: %m");
+
+ if (path_startswith(cwd, "/sys/fs/cgroup")) {
+ printf("Working directory %s:\n", cwd);
+ fflush(stdout);
+
+ r = show_cgroup_by_path(cwd, NULL, 0, output_flags);
+ done = true;
+ }
+ }
+
+ if (!done) {
+ _cleanup_free_ char *root = NULL;
+
+ r = show_cgroup_get_path_and_warn(arg_machine, NULL, &root);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list cgroup tree: %m");
+
+ show_cg_info(SYSTEMD_CGROUP_CONTROLLER, root);
+
+ printf("-.slice\n");
+ r = show_cgroup(SYSTEMD_CGROUP_CONTROLLER, root, NULL, 0, output_flags);
+ }
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to list cgroup tree: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/cgroups-agent/cgroups-agent.c b/src/cgroups-agent/cgroups-agent.c
new file mode 100644
index 0000000..eeb4ba8
--- /dev/null
+++ b/src/cgroups-agent/cgroups-agent.c
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdlib.h>
+
+#include "fd-util.h"
+#include "log.h"
+#include "socket-util.h"
+
+int main(int argc, char *argv[]) {
+
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/cgroups-agent",
+ };
+
+ _cleanup_close_ int fd = -1;
+ ssize_t n;
+ size_t l;
+
+ if (argc != 2) {
+ log_error("Incorrect number of arguments.");
+ return EXIT_FAILURE;
+ }
+
+ log_setup_service();
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0);
+ if (fd < 0) {
+ log_debug_errno(errno, "Failed to allocate socket: %m");
+ return EXIT_FAILURE;
+ }
+
+ l = strlen(argv[1]);
+
+ n = sendto(fd, argv[1], l, 0, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ if (n < 0) {
+ log_debug_errno(errno, "Failed to send cgroups agent message: %m");
+ return EXIT_FAILURE;
+ }
+
+ if ((size_t) n != l) {
+ log_debug("Datagram size mismatch");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c
new file mode 100644
index 0000000..e9e7ed2
--- /dev/null
+++ b/src/cgtop/cgtop.c
@@ -0,0 +1,1109 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "cgroup-show.h"
+#include "cgroup-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hashmap.h"
+#include "main-func.h"
+#include "missing_sched.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "procfs-util.h"
+#include "sort-util.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "unit-name.h"
+#include "virt.h"
+
+typedef struct Group {
+ char *path;
+
+ bool n_tasks_valid:1;
+ bool cpu_valid:1;
+ bool memory_valid:1;
+ bool io_valid:1;
+
+ uint64_t n_tasks;
+
+ unsigned cpu_iteration;
+ nsec_t cpu_usage;
+ nsec_t cpu_timestamp;
+ double cpu_fraction;
+
+ uint64_t memory;
+
+ unsigned io_iteration;
+ uint64_t io_input, io_output;
+ nsec_t io_timestamp;
+ uint64_t io_input_bps, io_output_bps;
+} Group;
+
+static unsigned arg_depth = 3;
+static unsigned arg_iterations = (unsigned) -1;
+static bool arg_batch = false;
+static bool arg_raw = false;
+static usec_t arg_delay = 1*USEC_PER_SEC;
+static char* arg_machine = NULL;
+static char* arg_root = NULL;
+static bool arg_recursive = true;
+static bool arg_recursive_unset = false;
+
+static enum {
+ COUNT_PIDS,
+ COUNT_USERSPACE_PROCESSES,
+ COUNT_ALL_PROCESSES,
+} arg_count = COUNT_PIDS;
+
+static enum {
+ ORDER_PATH,
+ ORDER_TASKS,
+ ORDER_CPU,
+ ORDER_MEMORY,
+ ORDER_IO,
+} arg_order = ORDER_CPU;
+
+static enum {
+ CPU_PERCENT,
+ CPU_TIME,
+} arg_cpu_type = CPU_PERCENT;
+
+static Group *group_free(Group *g) {
+ if (!g)
+ return NULL;
+
+ free(g->path);
+ return mfree(g);
+}
+
+
+static const char *maybe_format_timespan(char *buf, size_t l, usec_t t, usec_t accuracy) {
+ if (arg_raw) {
+ snprintf(buf, l, USEC_FMT, t);
+ return buf;
+ }
+ return format_timespan(buf, l, t, accuracy);
+}
+
+static const char *maybe_format_bytes(char *buf, size_t l, bool is_valid, uint64_t t) {
+ if (!is_valid)
+ return "-";
+ if (arg_raw) {
+ snprintf(buf, l, "%" PRIu64, t);
+ return buf;
+ }
+ return format_bytes(buf, l, t);
+}
+
+static bool is_root_cgroup(const char *path) {
+
+ /* Returns true if the specified path belongs to the root cgroup. The root cgroup is special on cgroup v2 as it
+ * carries only very few attributes in order not to export multiple truth about system state as most
+ * information is available elsewhere in /proc anyway. We need to be able to deal with that, and need to get
+ * our data from different sources in that case.
+ *
+ * There's one extra complication in all of this, though 😣: if the path to the cgroup indicates we are in the
+ * root cgroup this might actually not be the case, because cgroup namespacing might be in effect
+ * (CLONE_NEWCGROUP). Since there's no nice way to distinguish a real cgroup root from a fake namespaced one we
+ * do an explicit container check here, under the assumption that CLONE_NEWCGROUP is generally used when
+ * container managers are used too.
+ *
+ * Note that checking for a container environment is kinda ugly, since in theory people could use cgtop from
+ * inside a container where cgroup namespacing is turned off to watch the host system. However, that's mostly a
+ * theoretic usecase, and if people actually try all they'll lose is accounting for the top-level cgroup. Which
+ * isn't too bad. */
+
+ if (detect_container() > 0)
+ return false;
+
+ return empty_or_root(path);
+}
+
+static int process(
+ const char *controller,
+ const char *path,
+ Hashmap *a,
+ Hashmap *b,
+ unsigned iteration,
+ Group **ret) {
+
+ Group *g;
+ int r, all_unified;
+
+ assert(controller);
+ assert(path);
+ assert(a);
+
+ all_unified = cg_all_unified();
+ if (all_unified < 0)
+ return all_unified;
+
+ g = hashmap_get(a, path);
+ if (!g) {
+ g = hashmap_get(b, path);
+ if (!g) {
+ g = new0(Group, 1);
+ if (!g)
+ return -ENOMEM;
+
+ g->path = strdup(path);
+ if (!g->path) {
+ group_free(g);
+ return -ENOMEM;
+ }
+
+ r = hashmap_put(a, g->path, g);
+ if (r < 0) {
+ group_free(g);
+ return r;
+ }
+ } else {
+ r = hashmap_move_one(a, b, path);
+ if (r < 0)
+ return r;
+
+ g->cpu_valid = g->memory_valid = g->io_valid = g->n_tasks_valid = false;
+ }
+ }
+
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) &&
+ IN_SET(arg_count, COUNT_ALL_PROCESSES, COUNT_USERSPACE_PROCESSES)) {
+ _cleanup_fclose_ FILE *f = NULL;
+ pid_t pid;
+
+ r = cg_enumerate_processes(controller, path, &f);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ g->n_tasks = 0;
+ while (cg_read_pid(f, &pid) > 0) {
+
+ if (arg_count == COUNT_USERSPACE_PROCESSES && is_kernel_thread(pid) > 0)
+ continue;
+
+ g->n_tasks++;
+ }
+
+ if (g->n_tasks > 0)
+ g->n_tasks_valid = true;
+
+ } else if (streq(controller, "pids") && arg_count == COUNT_PIDS) {
+
+ if (is_root_cgroup(path)) {
+ r = procfs_tasks_get_current(&g->n_tasks);
+ if (r < 0)
+ return r;
+ } else {
+ _cleanup_free_ char *p = NULL, *v = NULL;
+
+ r = cg_get_path(controller, path, "pids.current", &p);
+ if (r < 0)
+ return r;
+
+ r = read_one_line_file(p, &v);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(v, &g->n_tasks);
+ if (r < 0)
+ return r;
+ }
+
+ if (g->n_tasks > 0)
+ g->n_tasks_valid = true;
+
+ } else if (streq(controller, "memory")) {
+
+ if (is_root_cgroup(path)) {
+ r = procfs_memory_get_used(&g->memory);
+ if (r < 0)
+ return r;
+ } else {
+ _cleanup_free_ char *p = NULL, *v = NULL;
+
+ if (all_unified)
+ r = cg_get_path(controller, path, "memory.current", &p);
+ else
+ r = cg_get_path(controller, path, "memory.usage_in_bytes", &p);
+ if (r < 0)
+ return r;
+
+ r = read_one_line_file(p, &v);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(v, &g->memory);
+ if (r < 0)
+ return r;
+ }
+
+ if (g->memory > 0)
+ g->memory_valid = true;
+
+ } else if ((streq(controller, "io") && all_unified) ||
+ (streq(controller, "blkio") && !all_unified)) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ uint64_t wr = 0, rd = 0;
+ nsec_t timestamp;
+
+ r = cg_get_path(controller, path, all_unified ? "io.stat" : "blkio.io_service_bytes", &p);
+ if (r < 0)
+ return r;
+
+ f = fopen(p, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+ return -errno;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ uint64_t k, *q;
+ char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ /* Trim and skip the device */
+ l = strstrip(line);
+ l += strcspn(l, WHITESPACE);
+ l += strspn(l, WHITESPACE);
+
+ if (all_unified) {
+ while (!isempty(l)) {
+ if (sscanf(l, "rbytes=%" SCNu64, &k))
+ rd += k;
+ else if (sscanf(l, "wbytes=%" SCNu64, &k))
+ wr += k;
+
+ l += strcspn(l, WHITESPACE);
+ l += strspn(l, WHITESPACE);
+ }
+ } else {
+ if (first_word(l, "Read")) {
+ l += 4;
+ q = &rd;
+ } else if (first_word(l, "Write")) {
+ l += 5;
+ q = &wr;
+ } else
+ continue;
+
+ l += strspn(l, WHITESPACE);
+ r = safe_atou64(l, &k);
+ if (r < 0)
+ continue;
+
+ *q += k;
+ }
+ }
+
+ timestamp = now_nsec(CLOCK_MONOTONIC);
+
+ if (g->io_iteration == iteration - 1) {
+ uint64_t x, yr, yw;
+
+ x = (uint64_t) (timestamp - g->io_timestamp);
+ if (x < 1)
+ x = 1;
+
+ if (rd > g->io_input)
+ yr = rd - g->io_input;
+ else
+ yr = 0;
+
+ if (wr > g->io_output)
+ yw = wr - g->io_output;
+ else
+ yw = 0;
+
+ if (yr > 0 || yw > 0) {
+ g->io_input_bps = (yr * 1000000000ULL) / x;
+ g->io_output_bps = (yw * 1000000000ULL) / x;
+ g->io_valid = true;
+ }
+ }
+
+ g->io_input = rd;
+ g->io_output = wr;
+ g->io_timestamp = timestamp;
+ g->io_iteration = iteration;
+ } else if (STR_IN_SET(controller, "cpu", "cpuacct") || cpu_accounting_is_cheap()) {
+ _cleanup_free_ char *p = NULL, *v = NULL;
+ uint64_t new_usage;
+ nsec_t timestamp;
+
+ if (is_root_cgroup(path)) {
+ r = procfs_cpu_get_usage(&new_usage);
+ if (r < 0)
+ return r;
+ } else if (all_unified) {
+ _cleanup_free_ char *val = NULL;
+
+ if (!streq(controller, "cpu"))
+ return 0;
+
+ r = cg_get_keyed_attribute("cpu", path, "cpu.stat", STRV_MAKE("usage_usec"), &val);
+ if (IN_SET(r, -ENOENT, -ENXIO))
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(val, &new_usage);
+ if (r < 0)
+ return r;
+
+ new_usage *= NSEC_PER_USEC;
+ } else {
+ if (!streq(controller, "cpuacct"))
+ return 0;
+
+ r = cg_get_path(controller, path, "cpuacct.usage", &p);
+ if (r < 0)
+ return r;
+
+ r = read_one_line_file(p, &v);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(v, &new_usage);
+ if (r < 0)
+ return r;
+ }
+
+ timestamp = now_nsec(CLOCK_MONOTONIC);
+
+ if (g->cpu_iteration == iteration - 1 &&
+ (nsec_t) new_usage > g->cpu_usage) {
+
+ nsec_t x, y;
+
+ x = timestamp - g->cpu_timestamp;
+ if (x < 1)
+ x = 1;
+
+ y = (nsec_t) new_usage - g->cpu_usage;
+ g->cpu_fraction = (double) y / (double) x;
+ g->cpu_valid = true;
+ }
+
+ g->cpu_usage = (nsec_t) new_usage;
+ g->cpu_timestamp = timestamp;
+ g->cpu_iteration = iteration;
+
+ }
+
+ if (ret)
+ *ret = g;
+
+ return 0;
+}
+
+static int refresh_one(
+ const char *controller,
+ const char *path,
+ Hashmap *a,
+ Hashmap *b,
+ unsigned iteration,
+ unsigned depth,
+ Group **ret) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ Group *ours = NULL;
+ int r;
+
+ assert(controller);
+ assert(path);
+ assert(a);
+
+ if (depth > arg_depth)
+ return 0;
+
+ r = process(controller, path, a, b, iteration, &ours);
+ if (r < 0)
+ return r;
+
+ r = cg_enumerate_subgroups(controller, path, &d);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *fn = NULL, *p = NULL;
+ Group *child = NULL;
+
+ r = cg_read_subgroup(d, &fn);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ p = path_join(path, fn);
+ if (!p)
+ return -ENOMEM;
+
+ path_simplify(p, false);
+
+ r = refresh_one(controller, p, a, b, iteration, depth + 1, &child);
+ if (r < 0)
+ return r;
+
+ if (arg_recursive &&
+ IN_SET(arg_count, COUNT_ALL_PROCESSES, COUNT_USERSPACE_PROCESSES) &&
+ child &&
+ child->n_tasks_valid &&
+ streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+
+ /* Recursively sum up processes */
+
+ if (ours->n_tasks_valid)
+ ours->n_tasks += child->n_tasks;
+ else {
+ ours->n_tasks = child->n_tasks;
+ ours->n_tasks_valid = true;
+ }
+ }
+ }
+
+ if (ret)
+ *ret = ours;
+
+ return 1;
+}
+
+static int refresh(const char *root, Hashmap *a, Hashmap *b, unsigned iteration) {
+ const char *c;
+ int r;
+
+ FOREACH_STRING(c, SYSTEMD_CGROUP_CONTROLLER, "cpu", "cpuacct", "memory", "io", "blkio", "pids") {
+ r = refresh_one(c, root, a, b, iteration, 0, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int group_compare(Group * const *a, Group * const *b) {
+ const Group *x = *a, *y = *b;
+ int r;
+
+ if (arg_order != ORDER_TASKS || arg_recursive) {
+ /* Let's make sure that the parent is always before
+ * the child. Except when ordering by tasks and
+ * recursive summing is off, since that is actually
+ * not accumulative for all children. */
+
+ if (path_startswith(empty_to_root(y->path), empty_to_root(x->path)))
+ return -1;
+ if (path_startswith(empty_to_root(x->path), empty_to_root(y->path)))
+ return 1;
+ }
+
+ switch (arg_order) {
+
+ case ORDER_PATH:
+ break;
+
+ case ORDER_CPU:
+ if (arg_cpu_type == CPU_PERCENT) {
+ if (x->cpu_valid && y->cpu_valid) {
+ r = CMP(y->cpu_fraction, x->cpu_fraction);
+ if (r != 0)
+ return r;
+ } else if (x->cpu_valid)
+ return -1;
+ else if (y->cpu_valid)
+ return 1;
+ } else {
+ r = CMP(y->cpu_usage, x->cpu_usage);
+ if (r != 0)
+ return r;
+ }
+
+ break;
+
+ case ORDER_TASKS:
+ if (x->n_tasks_valid && y->n_tasks_valid) {
+ r = CMP(y->n_tasks, x->n_tasks);
+ if (r != 0)
+ return r;
+ } else if (x->n_tasks_valid)
+ return -1;
+ else if (y->n_tasks_valid)
+ return 1;
+
+ break;
+
+ case ORDER_MEMORY:
+ if (x->memory_valid && y->memory_valid) {
+ r = CMP(y->memory, x->memory);
+ if (r != 0)
+ return r;
+ } else if (x->memory_valid)
+ return -1;
+ else if (y->memory_valid)
+ return 1;
+
+ break;
+
+ case ORDER_IO:
+ if (x->io_valid && y->io_valid) {
+ r = CMP(y->io_input_bps + y->io_output_bps, x->io_input_bps + x->io_output_bps);
+ if (r != 0)
+ return r;
+ } else if (x->io_valid)
+ return -1;
+ else if (y->io_valid)
+ return 1;
+ }
+
+ return path_compare(x->path, y->path);
+}
+
+static void display(Hashmap *a) {
+ Group *g;
+ Group **array;
+ signed path_columns;
+ unsigned rows, n = 0, j, maxtcpu = 0, maxtpath = 3; /* 3 for ellipsize() to work properly */
+ char buffer[MAX4(21U, FORMAT_BYTES_MAX, FORMAT_TIMESPAN_MAX, DECIMAL_STR_MAX(usec_t))];
+
+ assert(a);
+
+ if (!terminal_is_dumb())
+ fputs(ANSI_HOME_CLEAR, stdout);
+
+ array = newa(Group*, hashmap_size(a));
+
+ HASHMAP_FOREACH(g, a)
+ if (g->n_tasks_valid || g->cpu_valid || g->memory_valid || g->io_valid)
+ array[n++] = g;
+
+ typesafe_qsort(array, n, group_compare);
+
+ /* Find the longest names in one run */
+ for (j = 0; j < n; j++) {
+ unsigned cputlen, pathtlen;
+
+ maybe_format_timespan(buffer, sizeof(buffer), (usec_t) (array[j]->cpu_usage / NSEC_PER_USEC), 0);
+ cputlen = strlen(buffer);
+ maxtcpu = MAX(maxtcpu, cputlen);
+
+ pathtlen = strlen(array[j]->path);
+ maxtpath = MAX(maxtpath, pathtlen);
+ }
+
+ if (arg_cpu_type == CPU_PERCENT)
+ xsprintf(buffer, "%6s", "%CPU");
+ else
+ xsprintf(buffer, "%*s", maxtcpu, "CPU Time");
+
+ rows = lines();
+ if (rows <= 10)
+ rows = 10;
+
+ if (on_tty()) {
+ const char *on, *off;
+
+ path_columns = columns() - 36 - strlen(buffer);
+ if (path_columns < 10)
+ path_columns = 10;
+
+ on = ansi_highlight_underline();
+ off = ansi_underline();
+
+ printf("%s%s%-*s%s %s%7s%s %s%s%s %s%8s%s %s%8s%s %s%8s%s%s\n",
+ ansi_underline(),
+ arg_order == ORDER_PATH ? on : "", path_columns, "Control Group",
+ arg_order == ORDER_PATH ? off : "",
+ arg_order == ORDER_TASKS ? on : "", arg_count == COUNT_PIDS ? "Tasks" : arg_count == COUNT_USERSPACE_PROCESSES ? "Procs" : "Proc+",
+ arg_order == ORDER_TASKS ? off : "",
+ arg_order == ORDER_CPU ? on : "", buffer,
+ arg_order == ORDER_CPU ? off : "",
+ arg_order == ORDER_MEMORY ? on : "", "Memory",
+ arg_order == ORDER_MEMORY ? off : "",
+ arg_order == ORDER_IO ? on : "", "Input/s",
+ arg_order == ORDER_IO ? off : "",
+ arg_order == ORDER_IO ? on : "", "Output/s",
+ arg_order == ORDER_IO ? off : "",
+ ansi_normal());
+ } else
+ path_columns = maxtpath;
+
+ for (j = 0; j < n; j++) {
+ _cleanup_free_ char *ellipsized = NULL;
+ const char *path;
+
+ if (on_tty() && j + 6 > rows)
+ break;
+
+ g = array[j];
+
+ path = empty_to_root(g->path);
+ ellipsized = ellipsize(path, path_columns, 33);
+ printf("%-*s", path_columns, ellipsized ?: path);
+
+ if (g->n_tasks_valid)
+ printf(" %7" PRIu64, g->n_tasks);
+ else
+ fputs(" -", stdout);
+
+ if (arg_cpu_type == CPU_PERCENT) {
+ if (g->cpu_valid)
+ printf(" %6.1f", g->cpu_fraction*100);
+ else
+ fputs(" -", stdout);
+ } else
+ printf(" %*s", maxtcpu, maybe_format_timespan(buffer, sizeof(buffer), (usec_t) (g->cpu_usage / NSEC_PER_USEC), 0));
+
+ printf(" %8s", maybe_format_bytes(buffer, sizeof(buffer), g->memory_valid, g->memory));
+ printf(" %8s", maybe_format_bytes(buffer, sizeof(buffer), g->io_valid, g->io_input_bps));
+ printf(" %8s", maybe_format_bytes(buffer, sizeof(buffer), g->io_valid, g->io_output_bps));
+
+ putchar('\n');
+ }
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-cgtop", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [CGROUP]\n\n"
+ "Show top control groups by their resource usage.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " -p --order=path Order by path\n"
+ " -t --order=tasks Order by number of tasks/processes\n"
+ " -c --order=cpu Order by CPU load (default)\n"
+ " -m --order=memory Order by memory load\n"
+ " -i --order=io Order by IO load\n"
+ " -r --raw Provide raw (not human-readable) numbers\n"
+ " --cpu=percentage Show CPU usage as percentage (default)\n"
+ " --cpu=time Show CPU usage as time\n"
+ " -P Count userspace processes instead of tasks (excl. kernel)\n"
+ " -k Count all processes instead of tasks (incl. kernel)\n"
+ " --recursive=BOOL Sum up process count recursively\n"
+ " -d --delay=DELAY Delay between updates\n"
+ " -n --iterations=N Run for N iterations before exiting\n"
+ " -1 Shortcut for --iterations=1\n"
+ " -b --batch Run in batch mode, accepting no input\n"
+ " --depth=DEPTH Maximum traversal depth (default: %u)\n"
+ " -M --machine= Show container\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , arg_depth
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_DEPTH,
+ ARG_CPU_TYPE,
+ ARG_ORDER,
+ ARG_RECURSIVE,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "delay", required_argument, NULL, 'd' },
+ { "iterations", required_argument, NULL, 'n' },
+ { "batch", no_argument, NULL, 'b' },
+ { "raw", no_argument, NULL, 'r' },
+ { "depth", required_argument, NULL, ARG_DEPTH },
+ { "cpu", optional_argument, NULL, ARG_CPU_TYPE },
+ { "order", required_argument, NULL, ARG_ORDER },
+ { "recursive", required_argument, NULL, ARG_RECURSIVE },
+ { "machine", required_argument, NULL, 'M' },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 1);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hptcmin:brd:kPM:1", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_CPU_TYPE:
+ if (optarg) {
+ if (streq(optarg, "time"))
+ arg_cpu_type = CPU_TIME;
+ else if (streq(optarg, "percentage"))
+ arg_cpu_type = CPU_PERCENT;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown argument to --cpu=: %s",
+ optarg);
+ } else
+ arg_cpu_type = CPU_TIME;
+
+ break;
+
+ case ARG_DEPTH:
+ r = safe_atou(optarg, &arg_depth);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse depth parameter '%s': %m", optarg);
+
+ break;
+
+ case 'd':
+ r = parse_sec(optarg, &arg_delay);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse delay parameter '%s': %m", optarg);
+ if (arg_delay <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid delay parameter '%s'",
+ optarg);
+
+ break;
+
+ case 'n':
+ r = safe_atou(optarg, &arg_iterations);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse iterations parameter '%s': %m", optarg);
+
+ break;
+
+ case '1':
+ arg_iterations = 1;
+ break;
+
+ case 'b':
+ arg_batch = true;
+ break;
+
+ case 'r':
+ arg_raw = true;
+ break;
+
+ case 'p':
+ arg_order = ORDER_PATH;
+ break;
+
+ case 't':
+ arg_order = ORDER_TASKS;
+ break;
+
+ case 'c':
+ arg_order = ORDER_CPU;
+ break;
+
+ case 'm':
+ arg_order = ORDER_MEMORY;
+ break;
+
+ case 'i':
+ arg_order = ORDER_IO;
+ break;
+
+ case ARG_ORDER:
+ if (streq(optarg, "path"))
+ arg_order = ORDER_PATH;
+ else if (streq(optarg, "tasks"))
+ arg_order = ORDER_TASKS;
+ else if (streq(optarg, "cpu"))
+ arg_order = ORDER_CPU;
+ else if (streq(optarg, "memory"))
+ arg_order = ORDER_MEMORY;
+ else if (streq(optarg, "io"))
+ arg_order = ORDER_IO;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid argument to --order=: %s",
+ optarg);
+ break;
+
+ case 'k':
+ arg_count = COUNT_ALL_PROCESSES;
+ break;
+
+ case 'P':
+ arg_count = COUNT_USERSPACE_PROCESSES;
+ break;
+
+ case ARG_RECURSIVE:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --recursive= argument '%s': %m", optarg);
+
+ arg_recursive = r;
+ arg_recursive_unset = r == 0;
+ break;
+
+ case 'M':
+ arg_machine = optarg;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind == argc - 1)
+ arg_root = argv[optind];
+ else if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many arguments.");
+
+ return 1;
+}
+
+static const char* counting_what(void) {
+ if (arg_count == COUNT_PIDS)
+ return "tasks";
+ else if (arg_count == COUNT_ALL_PROCESSES)
+ return "all processes (incl. kernel)";
+ else
+ return "userspace processes (excl. kernel)";
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(group_hash_ops, char, path_hash_func, path_compare, Group, group_free);
+
+static int run(int argc, char *argv[]) {
+ _cleanup_hashmap_free_ Hashmap *a = NULL, *b = NULL;
+ unsigned iteration = 0;
+ usec_t last_refresh = 0;
+ bool quit = false, immediate_refresh = false;
+ _cleanup_free_ char *root = NULL;
+ CGroupMask mask;
+ int r;
+
+ log_setup_cli();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = cg_mask_supported(&mask);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine supported controllers: %m");
+
+ arg_count = (mask & CGROUP_MASK_PIDS) ? COUNT_PIDS : COUNT_USERSPACE_PROCESSES;
+
+ if (arg_recursive_unset && arg_count == COUNT_PIDS)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Non-recursive counting is only supported when counting processes, not tasks. Use -P or -k.");
+
+ r = show_cgroup_get_path_and_warn(arg_machine, arg_root, &root);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get root control group path: %m");
+ log_debug("CGroup path: %s", root);
+
+ a = hashmap_new(&group_hash_ops);
+ b = hashmap_new(&group_hash_ops);
+ if (!a || !b)
+ return log_oom();
+
+ signal(SIGWINCH, columns_lines_cache_reset);
+
+ if (arg_iterations == (unsigned) -1)
+ arg_iterations = on_tty() ? 0 : 1;
+
+ while (!quit) {
+ usec_t t;
+ char key;
+ char h[FORMAT_TIMESPAN_MAX];
+
+ t = now(CLOCK_MONOTONIC);
+
+ if (t >= last_refresh + arg_delay || immediate_refresh) {
+
+ r = refresh(root, a, b, iteration++);
+ if (r < 0)
+ return log_error_errno(r, "Failed to refresh: %m");
+
+ hashmap_clear(b);
+ SWAP_TWO(a, b);
+
+ last_refresh = t;
+ immediate_refresh = false;
+ }
+
+ display(b);
+
+ if (arg_iterations && iteration >= arg_iterations)
+ break;
+
+ if (!on_tty()) /* non-TTY: Empty newline as delimiter between polls */
+ fputs("\n", stdout);
+ fflush(stdout);
+
+ if (arg_batch)
+ (void) usleep(last_refresh + arg_delay - t);
+ else {
+ r = read_one_char(stdin, &key, last_refresh + arg_delay - t, NULL);
+ if (r == -ETIMEDOUT)
+ continue;
+ if (r < 0)
+ return log_error_errno(r, "Couldn't read key: %m");
+ }
+
+ if (on_tty()) { /* TTY: Clear any user keystroke */
+ fputs("\r \r", stdout);
+ fflush(stdout);
+ }
+
+ if (arg_batch)
+ continue;
+
+ switch (key) {
+
+ case ' ':
+ immediate_refresh = true;
+ break;
+
+ case 'q':
+ quit = true;
+ break;
+
+ case 'p':
+ arg_order = ORDER_PATH;
+ break;
+
+ case 't':
+ arg_order = ORDER_TASKS;
+ break;
+
+ case 'c':
+ arg_order = ORDER_CPU;
+ break;
+
+ case 'm':
+ arg_order = ORDER_MEMORY;
+ break;
+
+ case 'i':
+ arg_order = ORDER_IO;
+ break;
+
+ case '%':
+ arg_cpu_type = arg_cpu_type == CPU_TIME ? CPU_PERCENT : CPU_TIME;
+ break;
+
+ case 'k':
+ arg_count = arg_count != COUNT_ALL_PROCESSES ? COUNT_ALL_PROCESSES : COUNT_PIDS;
+ fprintf(stdout, "\nCounting: %s.", counting_what());
+ fflush(stdout);
+ sleep(1);
+ break;
+
+ case 'P':
+ arg_count = arg_count != COUNT_USERSPACE_PROCESSES ? COUNT_USERSPACE_PROCESSES : COUNT_PIDS;
+ fprintf(stdout, "\nCounting: %s.", counting_what());
+ fflush(stdout);
+ sleep(1);
+ break;
+
+ case 'r':
+ if (arg_count == COUNT_PIDS)
+ fprintf(stdout, "\n\aCannot toggle recursive counting, not available in task counting mode.");
+ else {
+ arg_recursive = !arg_recursive;
+ fprintf(stdout, "\nRecursive process counting: %s", yes_no(arg_recursive));
+ }
+ fflush(stdout);
+ sleep(1);
+ break;
+
+ case '+':
+ if (arg_delay < USEC_PER_SEC)
+ arg_delay += USEC_PER_MSEC*250;
+ else
+ arg_delay += USEC_PER_SEC;
+
+ fprintf(stdout, "\nIncreased delay to %s.", format_timespan(h, sizeof(h), arg_delay, 0));
+ fflush(stdout);
+ sleep(1);
+ break;
+
+ case '-':
+ if (arg_delay <= USEC_PER_MSEC*500)
+ arg_delay = USEC_PER_MSEC*250;
+ else if (arg_delay < USEC_PER_MSEC*1250)
+ arg_delay -= USEC_PER_MSEC*250;
+ else
+ arg_delay -= USEC_PER_SEC;
+
+ fprintf(stdout, "\nDecreased delay to %s.", format_timespan(h, sizeof(h), arg_delay, 0));
+ fflush(stdout);
+ sleep(1);
+ break;
+
+ case '?':
+ case 'h':
+
+#define ON ANSI_HIGHLIGHT
+#define OFF ANSI_NORMAL
+
+ fprintf(stdout,
+ "\t<" ON "p" OFF "> By path; <" ON "t" OFF "> By tasks/procs; <" ON "c" OFF "> By CPU; <" ON "m" OFF "> By memory; <" ON "i" OFF "> By I/O\n"
+ "\t<" ON "+" OFF "> Inc. delay; <" ON "-" OFF "> Dec. delay; <" ON "%%" OFF "> Toggle time; <" ON "SPACE" OFF "> Refresh\n"
+ "\t<" ON "P" OFF "> Toggle count userspace processes; <" ON "k" OFF "> Toggle count all processes\n"
+ "\t<" ON "r" OFF "> Count processes recursively; <" ON "q" OFF "> Quit");
+ fflush(stdout);
+ sleep(3);
+ break;
+
+ default:
+ if (key < ' ')
+ fprintf(stdout, "\nUnknown key '\\x%x'. Ignoring.", key);
+ else
+ fprintf(stdout, "\nUnknown key '%c'. Ignoring.", key);
+ fflush(stdout);
+ sleep(1);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/core/all-units.h b/src/core/all-units.h
new file mode 100644
index 0000000..fad814b
--- /dev/null
+++ b/src/core/all-units.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "unit.h"
+
+#include "automount.h"
+#include "device.h"
+#include "path.h"
+#include "scope.h"
+#include "service.h"
+#include "slice.h"
+#include "socket.h"
+#include "swap.h"
+#include "target.h"
+#include "timer.h"
diff --git a/src/core/apparmor-setup.c b/src/core/apparmor-setup.c
new file mode 100644
index 0000000..e856f5c
--- /dev/null
+++ b/src/core/apparmor-setup.c
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#if HAVE_APPARMOR
+# include <sys/apparmor.h>
+#endif
+#include <unistd.h>
+
+#include "apparmor-setup.h"
+#include "apparmor-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+
+#if HAVE_APPARMOR
+DEFINE_TRIVIAL_CLEANUP_FUNC(aa_policy_cache *, aa_policy_cache_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(aa_features *, aa_features_unref);
+#endif
+
+int mac_apparmor_setup(void) {
+#if HAVE_APPARMOR
+ int r;
+ _cleanup_(aa_policy_cache_unrefp) aa_policy_cache *policy_cache = NULL;
+ _cleanup_(aa_features_unrefp) aa_features *features = NULL;
+ const char *current_file;
+ _cleanup_free_ char *current_profile = NULL, *cache_dir_path = NULL;
+
+ if (!mac_apparmor_use()) {
+ log_debug("AppArmor either not supported by the kernel or disabled.");
+ return 0;
+ }
+
+ /* To enable LSM stacking a patch to the kernel is proposed to create a
+ * per-LSM subdirectory to distinguish between the LSMs. Therefore, we
+ * read the file from the LSM specific directory first and only if that
+ * fails the one from the generic directory.
+ */
+ FOREACH_STRING(current_file, "/proc/self/attr/apparmor/current", "/proc/self/attr/current") {
+ r = read_one_line_file(current_file, &current_profile);
+ if (r == -ENOENT)
+ continue;
+ else if (r < 0)
+ log_warning_errno(r, "Failed to read current AppArmor profile from file %s, ignoring: %m", current_file);
+ else
+ break;
+ }
+ if (!current_profile) {
+ log_warning("Failed to get the current AppArmor profile of systemd from /proc/self/attr/apparmor/current or /proc/self/attr/current, ignoring.");
+ return 0;
+ }
+ if (!streq(current_profile, "unconfined")) {
+ log_debug("We are already confined in an AppArmor profile.");
+ return 0;
+ }
+
+ r = aa_features_new_from_kernel(&features);
+ if (r < 0) {
+ log_warning_errno(errno, "Failed to get the AppArmor feature set from the kernel, ignoring: %m");
+ return 0;
+ }
+ cache_dir_path = aa_policy_cache_dir_path_preview(features, AT_FDCWD, "/etc/apparmor/earlypolicy");
+ if (!cache_dir_path) {
+ log_debug_errno(errno, "Failed to get the path of the early AppArmor policy cache directory.");
+ return 0;
+ }
+
+ /* aa_policy_cache_new will internally use the same path as aa_policy_cache_dir_path_preview has returned. */
+ r = aa_policy_cache_new(&policy_cache, features, AT_FDCWD, "/etc/apparmor/earlypolicy", 0);
+ if (r < 0) {
+ if (errno == ENOENT) {
+ log_debug_errno(errno, "The early AppArmor policy cache directory %s does not exist.", cache_dir_path);
+ return 0;
+ }
+ log_warning_errno(errno, "Failed to create a new AppArmor policy cache, ignoring: %m");
+ return 0;
+ }
+ r = aa_policy_cache_replace_all(policy_cache, NULL);
+ if (r < 0) {
+ log_warning_errno(errno, "Failed to load the profiles from the early AppArmor policy cache directory %s, ignoring: %m", cache_dir_path);
+ return 0;
+ }
+
+ log_info("Successfully loaded all binary profiles from AppArmor early policy cache at %s.", cache_dir_path);
+
+ r = aa_change_profile("systemd");
+ if (r < 0) {
+ if (errno == ENOENT)
+ log_debug_errno(errno, "Failed to change to AppArmor profile 'systemd'. Please ensure that one of the binary profile files in policy cache directory %s contains a profile with that name.", cache_dir_path);
+ else
+ log_error_errno(errno, "Failed to change to AppArmor profile 'systemd': %m");
+ return 0;
+ }
+
+ log_info("Changed to AppArmor profile systemd.");
+#endif
+ return 0;
+}
diff --git a/src/core/apparmor-setup.h b/src/core/apparmor-setup.h
new file mode 100644
index 0000000..f3b7382
--- /dev/null
+++ b/src/core/apparmor-setup.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int mac_apparmor_setup(void);
diff --git a/src/core/audit-fd.c b/src/core/audit-fd.c
new file mode 100644
index 0000000..097bea3
--- /dev/null
+++ b/src/core/audit-fd.c
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "audit-fd.h"
+
+#if HAVE_AUDIT
+
+#include <libaudit.h>
+#include <stdbool.h>
+
+#include "capability-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "util.h"
+
+static bool initialized = false;
+static int audit_fd;
+
+int get_audit_fd(void) {
+
+ if (!initialized) {
+ if (have_effective_cap(CAP_AUDIT_WRITE) == 0) {
+ audit_fd = -EPERM;
+ initialized = true;
+
+ return audit_fd;
+ }
+
+ audit_fd = audit_open();
+
+ if (audit_fd < 0) {
+ if (!IN_SET(errno, EAFNOSUPPORT, EPROTONOSUPPORT))
+ log_error_errno(errno, "Failed to connect to audit log: %m");
+
+ audit_fd = errno ? -errno : -EINVAL;
+ }
+
+ initialized = true;
+ }
+
+ return audit_fd;
+}
+
+void close_audit_fd(void) {
+
+ if (initialized && audit_fd >= 0)
+ safe_close(audit_fd);
+
+ initialized = true;
+ audit_fd = -ECONNRESET;
+}
+
+#else
+
+int get_audit_fd(void) {
+ return -EAFNOSUPPORT;
+}
+
+void close_audit_fd(void) {
+}
+
+#endif
diff --git a/src/core/audit-fd.h b/src/core/audit-fd.h
new file mode 100644
index 0000000..5cdf61e
--- /dev/null
+++ b/src/core/audit-fd.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int get_audit_fd(void);
+void close_audit_fd(void);
diff --git a/src/core/automount.c b/src/core/automount.c
new file mode 100644
index 0000000..a84cddb
--- /dev/null
+++ b/src/core/automount.c
@@ -0,0 +1,1135 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/auto_dev-ioctl.h>
+#include <linux/auto_fs4.h>
+#include <sys/epoll.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "async.h"
+#include "automount.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "dbus-automount.h"
+#include "dbus-unit.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "label.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mount.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "serialize.h"
+#include "special.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "unit-name.h"
+#include "unit.h"
+
+static const UnitActiveState state_translation_table[_AUTOMOUNT_STATE_MAX] = {
+ [AUTOMOUNT_DEAD] = UNIT_INACTIVE,
+ [AUTOMOUNT_WAITING] = UNIT_ACTIVE,
+ [AUTOMOUNT_RUNNING] = UNIT_ACTIVE,
+ [AUTOMOUNT_FAILED] = UNIT_FAILED
+};
+
+struct expire_data {
+ int dev_autofs_fd;
+ int ioctl_fd;
+};
+
+static void expire_data_free(struct expire_data *data) {
+ if (!data)
+ return;
+
+ safe_close(data->dev_autofs_fd);
+ safe_close(data->ioctl_fd);
+ free(data);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct expire_data*, expire_data_free);
+
+static int open_dev_autofs(Manager *m);
+static int automount_dispatch_io(sd_event_source *s, int fd, uint32_t events, void *userdata);
+static int automount_start_expire(Automount *a);
+static void automount_stop_expire(Automount *a);
+static int automount_send_ready(Automount *a, Set *tokens, int status);
+
+static void automount_init(Unit *u) {
+ Automount *a = AUTOMOUNT(u);
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ a->pipe_fd = -1;
+ a->directory_mode = 0755;
+ UNIT(a)->ignore_on_isolate = true;
+}
+
+static void unmount_autofs(Automount *a) {
+ int r;
+
+ assert(a);
+
+ if (a->pipe_fd < 0)
+ return;
+
+ a->pipe_event_source = sd_event_source_unref(a->pipe_event_source);
+ a->pipe_fd = safe_close(a->pipe_fd);
+
+ /* If we reload/reexecute things we keep the mount point around */
+ if (!IN_SET(UNIT(a)->manager->objective, MANAGER_RELOAD, MANAGER_REEXECUTE)) {
+
+ automount_send_ready(a, a->tokens, -EHOSTDOWN);
+ automount_send_ready(a, a->expire_tokens, -EHOSTDOWN);
+
+ if (a->where) {
+ r = repeat_unmount(a->where, MNT_DETACH|UMOUNT_NOFOLLOW);
+ if (r < 0)
+ log_error_errno(r, "Failed to unmount: %m");
+ }
+ }
+}
+
+static void automount_done(Unit *u) {
+ Automount *a = AUTOMOUNT(u);
+
+ assert(a);
+
+ unmount_autofs(a);
+
+ a->where = mfree(a->where);
+
+ a->tokens = set_free(a->tokens);
+ a->expire_tokens = set_free(a->expire_tokens);
+
+ a->expire_event_source = sd_event_source_unref(a->expire_event_source);
+}
+
+static int automount_add_trigger_dependencies(Automount *a) {
+ Unit *x;
+ int r;
+
+ assert(a);
+
+ r = unit_load_related_unit(UNIT(a), ".mount", &x);
+ if (r < 0)
+ return r;
+
+ return unit_add_two_dependencies(UNIT(a), UNIT_BEFORE, UNIT_TRIGGERS, x, true, UNIT_DEPENDENCY_IMPLICIT);
+}
+
+static int automount_add_mount_dependencies(Automount *a) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(a);
+
+ parent = dirname_malloc(a->where);
+ if (!parent)
+ return -ENOMEM;
+
+ return unit_require_mounts_for(UNIT(a), parent, UNIT_DEPENDENCY_IMPLICIT);
+}
+
+static int automount_add_default_dependencies(Automount *a) {
+ int r;
+
+ assert(a);
+
+ if (!UNIT(a)->default_dependencies)
+ return 0;
+
+ if (!MANAGER_IS_SYSTEM(UNIT(a)->manager))
+ return 0;
+
+ r = unit_add_dependency_by_name(UNIT(a), UNIT_BEFORE, SPECIAL_LOCAL_FS_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ r = unit_add_dependency_by_name(UNIT(a), UNIT_AFTER, SPECIAL_LOCAL_FS_PRE_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ r = unit_add_two_dependencies_by_name(UNIT(a), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int automount_verify(Automount *a) {
+ _cleanup_free_ char *e = NULL;
+ int r;
+
+ assert(a);
+ assert(UNIT(a)->load_state == UNIT_LOADED);
+
+ if (path_equal(a->where, "/")) {
+ log_unit_error(UNIT(a), "Cannot have an automount unit for the root directory. Refusing.");
+ return -ENOEXEC;
+ }
+
+ r = unit_name_from_path(a->where, ".automount", &e);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(a), r, "Failed to generate unit name from path: %m");
+
+ if (!unit_has_name(UNIT(a), e)) {
+ log_unit_error(UNIT(a), "Where= setting doesn't match unit name. Refusing.");
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
+static int automount_set_where(Automount *a) {
+ int r;
+
+ assert(a);
+
+ if (a->where)
+ return 0;
+
+ r = unit_name_to_path(UNIT(a)->id, &a->where);
+ if (r < 0)
+ return r;
+
+ path_simplify(a->where, false);
+ return 1;
+}
+
+static int automount_add_extras(Automount *a) {
+ int r;
+
+ r = automount_set_where(a);
+ if (r < 0)
+ return r;
+
+ r = automount_add_trigger_dependencies(a);
+ if (r < 0)
+ return r;
+
+ r = automount_add_mount_dependencies(a);
+ if (r < 0)
+ return r;
+
+ return automount_add_default_dependencies(a);
+}
+
+static int automount_load(Unit *u) {
+ Automount *a = AUTOMOUNT(u);
+ int r;
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ /* Load a .automount file */
+ r = unit_load_fragment_and_dropin(u, true);
+ if (r < 0)
+ return r;
+
+ if (u->load_state != UNIT_LOADED)
+ return 0;
+
+ r = automount_add_extras(a);
+ if (r < 0)
+ return r;
+
+ return automount_verify(a);
+}
+
+static void automount_set_state(Automount *a, AutomountState state) {
+ AutomountState old_state;
+ assert(a);
+
+ if (a->state != state)
+ bus_unit_send_pending_change_signal(UNIT(a), false);
+
+ old_state = a->state;
+ a->state = state;
+
+ if (state != AUTOMOUNT_RUNNING)
+ automount_stop_expire(a);
+
+ if (!IN_SET(state, AUTOMOUNT_WAITING, AUTOMOUNT_RUNNING))
+ unmount_autofs(a);
+
+ if (state != old_state)
+ log_unit_debug(UNIT(a), "Changed %s -> %s", automount_state_to_string(old_state), automount_state_to_string(state));
+
+ unit_notify(UNIT(a), state_translation_table[old_state], state_translation_table[state], 0);
+}
+
+static int automount_coldplug(Unit *u) {
+ Automount *a = AUTOMOUNT(u);
+ int r;
+
+ assert(a);
+ assert(a->state == AUTOMOUNT_DEAD);
+
+ if (a->deserialized_state == a->state)
+ return 0;
+
+ if (IN_SET(a->deserialized_state, AUTOMOUNT_WAITING, AUTOMOUNT_RUNNING)) {
+
+ r = automount_set_where(a);
+ if (r < 0)
+ return r;
+
+ r = open_dev_autofs(u->manager);
+ if (r < 0)
+ return r;
+
+ assert(a->pipe_fd >= 0);
+
+ r = sd_event_add_io(u->manager->event, &a->pipe_event_source, a->pipe_fd, EPOLLIN, automount_dispatch_io, u);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(a->pipe_event_source, "automount-io");
+ if (a->deserialized_state == AUTOMOUNT_RUNNING) {
+ r = automount_start_expire(a);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(a), r, "Failed to start expiration timer, ignoring: %m");
+ }
+
+ automount_set_state(a, a->deserialized_state);
+ }
+
+ return 0;
+}
+
+static void automount_dump(Unit *u, FILE *f, const char *prefix) {
+ char time_string[FORMAT_TIMESPAN_MAX];
+ Automount *a = AUTOMOUNT(u);
+
+ assert(a);
+
+ fprintf(f,
+ "%sAutomount State: %s\n"
+ "%sResult: %s\n"
+ "%sWhere: %s\n"
+ "%sDirectoryMode: %04o\n"
+ "%sTimeoutIdleUSec: %s\n",
+ prefix, automount_state_to_string(a->state),
+ prefix, automount_result_to_string(a->result),
+ prefix, a->where,
+ prefix, a->directory_mode,
+ prefix, format_timespan(time_string, FORMAT_TIMESPAN_MAX, a->timeout_idle_usec, USEC_PER_SEC));
+}
+
+static void automount_enter_dead(Automount *a, AutomountResult f) {
+ assert(a);
+
+ if (a->result == AUTOMOUNT_SUCCESS)
+ a->result = f;
+
+ unit_log_result(UNIT(a), a->result == AUTOMOUNT_SUCCESS, automount_result_to_string(a->result));
+ automount_set_state(a, a->result != AUTOMOUNT_SUCCESS ? AUTOMOUNT_FAILED : AUTOMOUNT_DEAD);
+}
+
+static int open_dev_autofs(Manager *m) {
+ struct autofs_dev_ioctl param;
+
+ assert(m);
+
+ if (m->dev_autofs_fd >= 0)
+ return m->dev_autofs_fd;
+
+ (void) label_fix("/dev/autofs", 0);
+
+ m->dev_autofs_fd = open("/dev/autofs", O_CLOEXEC|O_RDONLY);
+ if (m->dev_autofs_fd < 0)
+ return log_error_errno(errno, "Failed to open /dev/autofs: %m");
+
+ init_autofs_dev_ioctl(&param);
+ if (ioctl(m->dev_autofs_fd, AUTOFS_DEV_IOCTL_VERSION, &param) < 0) {
+ m->dev_autofs_fd = safe_close(m->dev_autofs_fd);
+ return -errno;
+ }
+
+ log_debug("Autofs kernel version %i.%i", param.ver_major, param.ver_minor);
+
+ return m->dev_autofs_fd;
+}
+
+static int open_ioctl_fd(int dev_autofs_fd, const char *where, dev_t devid) {
+ struct autofs_dev_ioctl *param;
+ size_t l;
+
+ assert(dev_autofs_fd >= 0);
+ assert(where);
+
+ l = sizeof(struct autofs_dev_ioctl) + strlen(where) + 1;
+ param = alloca(l);
+
+ init_autofs_dev_ioctl(param);
+ param->size = l;
+ param->ioctlfd = -1;
+ param->openmount.devid = devid;
+ strcpy(param->path, where);
+
+ if (ioctl(dev_autofs_fd, AUTOFS_DEV_IOCTL_OPENMOUNT, param) < 0)
+ return -errno;
+
+ if (param->ioctlfd < 0)
+ return -EIO;
+
+ (void) fd_cloexec(param->ioctlfd, true);
+ return param->ioctlfd;
+}
+
+static int autofs_protocol(int dev_autofs_fd, int ioctl_fd) {
+ uint32_t major, minor;
+ struct autofs_dev_ioctl param;
+
+ assert(dev_autofs_fd >= 0);
+ assert(ioctl_fd >= 0);
+
+ init_autofs_dev_ioctl(&param);
+ param.ioctlfd = ioctl_fd;
+
+ if (ioctl(dev_autofs_fd, AUTOFS_DEV_IOCTL_PROTOVER, &param) < 0)
+ return -errno;
+
+ major = param.protover.version;
+
+ init_autofs_dev_ioctl(&param);
+ param.ioctlfd = ioctl_fd;
+
+ if (ioctl(dev_autofs_fd, AUTOFS_DEV_IOCTL_PROTOSUBVER, &param) < 0)
+ return -errno;
+
+ minor = param.protosubver.sub_version;
+
+ log_debug("Autofs protocol version %i.%i", major, minor);
+ return 0;
+}
+
+static int autofs_set_timeout(int dev_autofs_fd, int ioctl_fd, usec_t usec) {
+ struct autofs_dev_ioctl param;
+
+ assert(dev_autofs_fd >= 0);
+ assert(ioctl_fd >= 0);
+
+ init_autofs_dev_ioctl(&param);
+ param.ioctlfd = ioctl_fd;
+
+ if (usec == USEC_INFINITY)
+ param.timeout.timeout = 0;
+ else
+ /* Convert to seconds, rounding up. */
+ param.timeout.timeout = DIV_ROUND_UP(usec, USEC_PER_SEC);
+
+ if (ioctl(dev_autofs_fd, AUTOFS_DEV_IOCTL_TIMEOUT, &param) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int autofs_send_ready(int dev_autofs_fd, int ioctl_fd, uint32_t token, int status) {
+ struct autofs_dev_ioctl param;
+
+ assert(dev_autofs_fd >= 0);
+ assert(ioctl_fd >= 0);
+
+ init_autofs_dev_ioctl(&param);
+ param.ioctlfd = ioctl_fd;
+
+ if (status != 0) {
+ param.fail.token = token;
+ param.fail.status = status;
+ } else
+ param.ready.token = token;
+
+ if (ioctl(dev_autofs_fd, status ? AUTOFS_DEV_IOCTL_FAIL : AUTOFS_DEV_IOCTL_READY, &param) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int automount_send_ready(Automount *a, Set *tokens, int status) {
+ _cleanup_close_ int ioctl_fd = -1;
+ unsigned token;
+ int r;
+
+ assert(a);
+ assert(status <= 0);
+
+ if (set_isempty(tokens))
+ return 0;
+
+ ioctl_fd = open_ioctl_fd(UNIT(a)->manager->dev_autofs_fd, a->where, a->dev_id);
+ if (ioctl_fd < 0)
+ return ioctl_fd;
+
+ if (status != 0)
+ log_unit_debug_errno(UNIT(a), status, "Sending failure: %m");
+ else
+ log_unit_debug(UNIT(a), "Sending success.");
+
+ r = 0;
+
+ /* Autofs thankfully does not hand out 0 as a token */
+ while ((token = PTR_TO_UINT(set_steal_first(tokens)))) {
+ int k;
+
+ /* Autofs fun fact:
+ *
+ * if you pass a positive status code here, kernels
+ * prior to 4.12 will freeze! Yay! */
+
+ k = autofs_send_ready(UNIT(a)->manager->dev_autofs_fd,
+ ioctl_fd,
+ token,
+ status);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static void automount_trigger_notify(Unit *u, Unit *other) {
+ Automount *a = AUTOMOUNT(u);
+ int r;
+
+ assert(a);
+ assert(other);
+
+ /* Filter out invocations with bogus state */
+ assert(UNIT_IS_LOAD_COMPLETE(other->load_state));
+ assert(other->type == UNIT_MOUNT);
+
+ /* Don't propagate state changes from the mount if we are already down */
+ if (!IN_SET(a->state, AUTOMOUNT_WAITING, AUTOMOUNT_RUNNING))
+ return;
+
+ /* Propagate start limit hit state */
+ if (other->start_limit_hit) {
+ automount_enter_dead(a, AUTOMOUNT_FAILURE_MOUNT_START_LIMIT_HIT);
+ return;
+ }
+
+ /* Don't propagate anything if there's still a job queued */
+ if (other->job)
+ return;
+
+ /* The mount is successfully established */
+ if (IN_SET(MOUNT(other)->state, MOUNT_MOUNTED, MOUNT_REMOUNTING)) {
+ (void) automount_send_ready(a, a->tokens, 0);
+
+ r = automount_start_expire(a);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(a), r, "Failed to start expiration timer, ignoring: %m");
+
+ automount_set_state(a, AUTOMOUNT_RUNNING);
+ }
+
+ if (IN_SET(MOUNT(other)->state,
+ MOUNT_MOUNTING, MOUNT_MOUNTING_DONE,
+ MOUNT_MOUNTED, MOUNT_REMOUNTING,
+ MOUNT_REMOUNTING_SIGTERM, MOUNT_REMOUNTING_SIGKILL,
+ MOUNT_UNMOUNTING_SIGTERM, MOUNT_UNMOUNTING_SIGKILL,
+ MOUNT_FAILED))
+ (void) automount_send_ready(a, a->expire_tokens, -ENODEV);
+
+ if (MOUNT(other)->state == MOUNT_DEAD)
+ (void) automount_send_ready(a, a->expire_tokens, 0);
+
+ /* The mount is in some unhappy state now, let's unfreeze any waiting clients */
+ if (IN_SET(MOUNT(other)->state,
+ MOUNT_DEAD, MOUNT_UNMOUNTING,
+ MOUNT_REMOUNTING_SIGTERM, MOUNT_REMOUNTING_SIGKILL,
+ MOUNT_UNMOUNTING_SIGTERM, MOUNT_UNMOUNTING_SIGKILL,
+ MOUNT_FAILED)) {
+
+ (void) automount_send_ready(a, a->tokens, -ENODEV);
+
+ automount_set_state(a, AUTOMOUNT_WAITING);
+ }
+}
+
+static void automount_enter_waiting(Automount *a) {
+ _cleanup_close_ int ioctl_fd = -1;
+ int p[2] = { -1, -1 };
+ char name[STRLEN("systemd-") + DECIMAL_STR_MAX(pid_t) + 1];
+ char options[STRLEN("fd=,pgrp=,minproto=5,maxproto=5,direct")
+ + DECIMAL_STR_MAX(int) + DECIMAL_STR_MAX(gid_t) + 1];
+ bool mounted = false;
+ int r, dev_autofs_fd;
+ struct stat st;
+
+ assert(a);
+ assert(a->pipe_fd < 0);
+ assert(a->where);
+
+ set_clear(a->tokens);
+
+ r = unit_fail_if_noncanonical(UNIT(a), a->where);
+ if (r < 0)
+ goto fail;
+
+ (void) mkdir_p_label(a->where, a->directory_mode);
+
+ unit_warn_if_dir_nonempty(UNIT(a), a->where);
+
+ dev_autofs_fd = open_dev_autofs(UNIT(a)->manager);
+ if (dev_autofs_fd < 0) {
+ r = dev_autofs_fd;
+ goto fail;
+ }
+
+ if (pipe2(p, O_CLOEXEC) < 0) {
+ r = -errno;
+ goto fail;
+ }
+ r = fd_nonblock(p[0], true);
+ if (r < 0)
+ goto fail;
+
+ xsprintf(options, "fd=%i,pgrp="PID_FMT",minproto=5,maxproto=5,direct", p[1], getpgrp());
+ xsprintf(name, "systemd-"PID_FMT, getpid_cached());
+ r = mount_nofollow(name, a->where, "autofs", 0, options);
+ if (r < 0)
+ goto fail;
+
+ mounted = true;
+
+ p[1] = safe_close(p[1]);
+
+ if (stat(a->where, &st) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ ioctl_fd = open_ioctl_fd(dev_autofs_fd, a->where, st.st_dev);
+ if (ioctl_fd < 0) {
+ r = ioctl_fd;
+ goto fail;
+ }
+
+ r = autofs_protocol(dev_autofs_fd, ioctl_fd);
+ if (r < 0)
+ goto fail;
+
+ r = autofs_set_timeout(dev_autofs_fd, ioctl_fd, a->timeout_idle_usec);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_add_io(UNIT(a)->manager->event, &a->pipe_event_source, p[0], EPOLLIN, automount_dispatch_io, a);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(a->pipe_event_source, "automount-io");
+
+ a->pipe_fd = p[0];
+ a->dev_id = st.st_dev;
+
+ automount_set_state(a, AUTOMOUNT_WAITING);
+
+ return;
+
+fail:
+ log_unit_error_errno(UNIT(a), r, "Failed to initialize automounter: %m");
+
+ safe_close_pair(p);
+
+ if (mounted) {
+ r = repeat_unmount(a->where, MNT_DETACH|UMOUNT_NOFOLLOW);
+ if (r < 0)
+ log_error_errno(r, "Failed to unmount, ignoring: %m");
+ }
+
+ automount_enter_dead(a, AUTOMOUNT_FAILURE_RESOURCES);
+}
+
+static void *expire_thread(void *p) {
+ struct autofs_dev_ioctl param;
+ _cleanup_(expire_data_freep) struct expire_data *data = (struct expire_data*)p;
+ int r;
+
+ assert(data->dev_autofs_fd >= 0);
+ assert(data->ioctl_fd >= 0);
+
+ init_autofs_dev_ioctl(&param);
+ param.ioctlfd = data->ioctl_fd;
+
+ do {
+ r = ioctl(data->dev_autofs_fd, AUTOFS_DEV_IOCTL_EXPIRE, &param);
+ } while (r >= 0);
+
+ if (errno != EAGAIN)
+ log_warning_errno(errno, "Failed to expire automount, ignoring: %m");
+
+ return NULL;
+}
+
+static int automount_dispatch_expire(sd_event_source *source, usec_t usec, void *userdata) {
+ Automount *a = AUTOMOUNT(userdata);
+ _cleanup_(expire_data_freep) struct expire_data *data = NULL;
+ int r;
+
+ assert(a);
+ assert(source == a->expire_event_source);
+
+ data = new0(struct expire_data, 1);
+ if (!data)
+ return log_oom();
+
+ data->ioctl_fd = -1;
+
+ data->dev_autofs_fd = fcntl(UNIT(a)->manager->dev_autofs_fd, F_DUPFD_CLOEXEC, 3);
+ if (data->dev_autofs_fd < 0)
+ return log_unit_error_errno(UNIT(a), errno, "Failed to duplicate autofs fd: %m");
+
+ data->ioctl_fd = open_ioctl_fd(UNIT(a)->manager->dev_autofs_fd, a->where, a->dev_id);
+ if (data->ioctl_fd < 0)
+ return log_unit_error_errno(UNIT(a), data->ioctl_fd, "Couldn't open autofs ioctl fd: %m");
+
+ r = asynchronous_job(expire_thread, data);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(a), r, "Failed to start expire job: %m");
+
+ data = NULL;
+
+ return automount_start_expire(a);
+}
+
+static int automount_start_expire(Automount *a) {
+ usec_t timeout;
+ int r;
+
+ assert(a);
+
+ if (a->timeout_idle_usec == 0)
+ return 0;
+
+ timeout = MAX(a->timeout_idle_usec/3, USEC_PER_SEC);
+
+ if (a->expire_event_source) {
+ r = sd_event_source_set_time_relative(a->expire_event_source, timeout);
+ if (r < 0)
+ return r;
+
+ return sd_event_source_set_enabled(a->expire_event_source, SD_EVENT_ONESHOT);
+ }
+
+ r = sd_event_add_time_relative(
+ UNIT(a)->manager->event,
+ &a->expire_event_source,
+ CLOCK_MONOTONIC, timeout, 0,
+ automount_dispatch_expire, a);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(a->expire_event_source, "automount-expire");
+
+ return 0;
+}
+
+static void automount_stop_expire(Automount *a) {
+ assert(a);
+
+ if (!a->expire_event_source)
+ return;
+
+ (void) sd_event_source_set_enabled(a->expire_event_source, SD_EVENT_OFF);
+}
+
+static void automount_enter_running(Automount *a) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ Unit *trigger;
+ struct stat st;
+ int r;
+
+ assert(a);
+
+ /* If the user masked our unit in the meantime, fail */
+ if (UNIT(a)->load_state != UNIT_LOADED) {
+ log_unit_error(UNIT(a), "Suppressing automount event since unit is no longer loaded.");
+ goto fail;
+ }
+
+ /* We don't take mount requests anymore if we are supposed to
+ * shut down anyway */
+ if (unit_stop_pending(UNIT(a))) {
+ log_unit_debug(UNIT(a), "Suppressing automount request since unit stop is scheduled.");
+ automount_send_ready(a, a->tokens, -EHOSTDOWN);
+ automount_send_ready(a, a->expire_tokens, -EHOSTDOWN);
+ return;
+ }
+
+ (void) mkdir_p_label(a->where, a->directory_mode);
+
+ /* Before we do anything, let's see if somebody is playing games with us? */
+ if (lstat(a->where, &st) < 0) {
+ log_unit_warning_errno(UNIT(a), errno, "Failed to stat automount point: %m");
+ goto fail;
+ }
+
+ /* The mount unit may have been explicitly started before we got the
+ * autofs request. Ack it to unblock anything waiting on the mount point. */
+ if (!S_ISDIR(st.st_mode) || st.st_dev != a->dev_id) {
+ log_unit_info(UNIT(a), "Automount point already active?");
+ automount_send_ready(a, a->tokens, 0);
+ return;
+ }
+
+ trigger = UNIT_TRIGGER(UNIT(a));
+ if (!trigger) {
+ log_unit_error(UNIT(a), "Unit to trigger vanished.");
+ goto fail;
+ }
+
+ r = manager_add_job(UNIT(a)->manager, JOB_START, trigger, JOB_REPLACE, NULL, &error, NULL);
+ if (r < 0) {
+ log_unit_warning(UNIT(a), "Failed to queue mount startup job: %s", bus_error_message(&error, r));
+ goto fail;
+ }
+
+ automount_set_state(a, AUTOMOUNT_RUNNING);
+ return;
+
+fail:
+ automount_enter_dead(a, AUTOMOUNT_FAILURE_RESOURCES);
+}
+
+static int automount_start(Unit *u) {
+ Automount *a = AUTOMOUNT(u);
+ int r;
+
+ assert(a);
+ assert(IN_SET(a->state, AUTOMOUNT_DEAD, AUTOMOUNT_FAILED));
+
+ if (path_is_mount_point(a->where, NULL, 0) > 0) {
+ log_unit_error(u, "Path %s is already a mount point, refusing start.", a->where);
+ return -EEXIST;
+ }
+
+ r = unit_test_trigger_loaded(u);
+ if (r < 0)
+ return r;
+
+ r = unit_test_start_limit(u);
+ if (r < 0) {
+ automount_enter_dead(a, AUTOMOUNT_FAILURE_START_LIMIT_HIT);
+ return r;
+ }
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ a->result = AUTOMOUNT_SUCCESS;
+ automount_enter_waiting(a);
+ return 1;
+}
+
+static int automount_stop(Unit *u) {
+ Automount *a = AUTOMOUNT(u);
+
+ assert(a);
+ assert(IN_SET(a->state, AUTOMOUNT_WAITING, AUTOMOUNT_RUNNING));
+
+ automount_enter_dead(a, AUTOMOUNT_SUCCESS);
+ return 1;
+}
+
+static int automount_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Automount *a = AUTOMOUNT(u);
+ void *p;
+ int r;
+
+ assert(a);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", automount_state_to_string(a->state));
+ (void) serialize_item(f, "result", automount_result_to_string(a->result));
+ (void) serialize_item_format(f, "dev-id", "%lu", (unsigned long) a->dev_id);
+
+ SET_FOREACH(p, a->tokens)
+ (void) serialize_item_format(f, "token", "%u", PTR_TO_UINT(p));
+ SET_FOREACH(p, a->expire_tokens)
+ (void) serialize_item_format(f, "expire-token", "%u", PTR_TO_UINT(p));
+
+ r = serialize_fd(f, fds, "pipe-fd", a->pipe_fd);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int automount_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Automount *a = AUTOMOUNT(u);
+ int r;
+
+ assert(a);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ AutomountState state;
+
+ state = automount_state_from_string(value);
+ if (state < 0)
+ log_unit_debug(u, "Failed to parse state value: %s", value);
+ else
+ a->deserialized_state = state;
+ } else if (streq(key, "result")) {
+ AutomountResult f;
+
+ f = automount_result_from_string(value);
+ if (f < 0)
+ log_unit_debug(u, "Failed to parse result value: %s", value);
+ else if (f != AUTOMOUNT_SUCCESS)
+ a->result = f;
+
+ } else if (streq(key, "dev-id")) {
+ unsigned long d;
+
+ if (safe_atolu(value, &d) < 0)
+ log_unit_debug(u, "Failed to parse dev-id value: %s", value);
+ else
+ a->dev_id = (dev_t) d;
+
+ } else if (streq(key, "token")) {
+ unsigned token;
+
+ if (safe_atou(value, &token) < 0)
+ log_unit_debug(u, "Failed to parse token value: %s", value);
+ else {
+ r = set_ensure_put(&a->tokens, NULL, UINT_TO_PTR(token));
+ if (r < 0)
+ log_unit_error_errno(u, r, "Failed to add token to set: %m");
+ }
+ } else if (streq(key, "expire-token")) {
+ unsigned token;
+
+ if (safe_atou(value, &token) < 0)
+ log_unit_debug(u, "Failed to parse token value: %s", value);
+ else {
+ r = set_ensure_put(&a->expire_tokens, NULL, UINT_TO_PTR(token));
+ if (r < 0)
+ log_unit_error_errno(u, r, "Failed to add expire token to set: %m");
+ }
+ } else if (streq(key, "pipe-fd")) {
+ int fd;
+
+ if (safe_atoi(value, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse pipe-fd value: %s", value);
+ else {
+ safe_close(a->pipe_fd);
+ a->pipe_fd = fdset_remove(fds, fd);
+ }
+ } else
+ log_unit_debug(u, "Unknown serialization key: %s", key);
+
+ return 0;
+}
+
+static UnitActiveState automount_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[AUTOMOUNT(u)->state];
+}
+
+static const char *automount_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return automount_state_to_string(AUTOMOUNT(u)->state);
+}
+
+static bool automount_may_gc(Unit *u) {
+ Unit *t;
+
+ assert(u);
+
+ t = UNIT_TRIGGER(u);
+ if (!t)
+ return true;
+
+ return UNIT_VTABLE(t)->may_gc(t);
+}
+
+static int automount_dispatch_io(sd_event_source *s, int fd, uint32_t events, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ union autofs_v5_packet_union packet;
+ Automount *a = AUTOMOUNT(userdata);
+ Unit *trigger;
+ int r;
+
+ assert(a);
+ assert(fd == a->pipe_fd);
+
+ if (events & (EPOLLHUP|EPOLLERR)) {
+ log_unit_error(UNIT(a), "Got hangup/error on autofs pipe from kernel. Likely our automount point has been unmounted by someone or something else?");
+ automount_enter_dead(a, AUTOMOUNT_FAILURE_UNMOUNTED);
+ return 0;
+ }
+
+ if (events != EPOLLIN) {
+ log_unit_error(UNIT(a), "Got invalid poll event %"PRIu32" on pipe (fd=%d)", events, fd);
+ goto fail;
+ }
+
+ r = loop_read_exact(a->pipe_fd, &packet, sizeof(packet), true);
+ if (r < 0) {
+ log_unit_error_errno(UNIT(a), r, "Invalid read from pipe: %m");
+ goto fail;
+ }
+
+ switch (packet.hdr.type) {
+
+ case autofs_ptype_missing_direct:
+
+ if (packet.v5_packet.pid > 0) {
+ _cleanup_free_ char *p = NULL;
+
+ (void) get_process_comm(packet.v5_packet.pid, &p);
+ log_unit_info(UNIT(a), "Got automount request for %s, triggered by %"PRIu32" (%s)", a->where, packet.v5_packet.pid, strna(p));
+ } else
+ log_unit_debug(UNIT(a), "Got direct mount request on %s", a->where);
+
+ r = set_ensure_put(&a->tokens, NULL, UINT_TO_PTR(packet.v5_packet.wait_queue_token));
+ if (r < 0) {
+ log_unit_error_errno(UNIT(a), r, "Failed to remember token: %m");
+ goto fail;
+ }
+
+ automount_enter_running(a);
+ break;
+
+ case autofs_ptype_expire_direct:
+ log_unit_debug(UNIT(a), "Got direct umount request on %s", a->where);
+
+ automount_stop_expire(a);
+
+ r = set_ensure_put(&a->expire_tokens, NULL, UINT_TO_PTR(packet.v5_packet.wait_queue_token));
+ if (r < 0) {
+ log_unit_error_errno(UNIT(a), r, "Failed to remember token: %m");
+ goto fail;
+ }
+
+ trigger = UNIT_TRIGGER(UNIT(a));
+ if (!trigger) {
+ log_unit_error(UNIT(a), "Unit to trigger vanished.");
+ goto fail;
+ }
+
+ r = manager_add_job(UNIT(a)->manager, JOB_STOP, trigger, JOB_REPLACE, NULL, &error, NULL);
+ if (r < 0) {
+ log_unit_warning(UNIT(a), "Failed to queue umount startup job: %s", bus_error_message(&error, r));
+ goto fail;
+ }
+ break;
+
+ default:
+ log_unit_error(UNIT(a), "Received unknown automount request %i", packet.hdr.type);
+ break;
+ }
+
+ return 0;
+
+fail:
+ automount_enter_dead(a, AUTOMOUNT_FAILURE_RESOURCES);
+ return 0;
+}
+
+static void automount_shutdown(Manager *m) {
+ assert(m);
+
+ m->dev_autofs_fd = safe_close(m->dev_autofs_fd);
+}
+
+static void automount_reset_failed(Unit *u) {
+ Automount *a = AUTOMOUNT(u);
+
+ assert(a);
+
+ if (a->state == AUTOMOUNT_FAILED)
+ automount_set_state(a, AUTOMOUNT_DEAD);
+
+ a->result = AUTOMOUNT_SUCCESS;
+}
+
+static bool automount_supported(void) {
+ static int supported = -1;
+
+ if (supported < 0)
+ supported = access("/dev/autofs", F_OK) >= 0;
+
+ return supported;
+}
+
+static const char* const automount_result_table[_AUTOMOUNT_RESULT_MAX] = {
+ [AUTOMOUNT_SUCCESS] = "success",
+ [AUTOMOUNT_FAILURE_RESOURCES] = "resources",
+ [AUTOMOUNT_FAILURE_START_LIMIT_HIT] = "start-limit-hit",
+ [AUTOMOUNT_FAILURE_MOUNT_START_LIMIT_HIT] = "mount-start-limit-hit",
+ [AUTOMOUNT_FAILURE_UNMOUNTED] = "unmounted",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(automount_result, AutomountResult);
+
+const UnitVTable automount_vtable = {
+ .object_size = sizeof(Automount),
+
+ .sections =
+ "Unit\0"
+ "Automount\0"
+ "Install\0",
+ .private_section = "Automount",
+
+ .can_transient = true,
+ .can_fail = true,
+ .can_trigger = true,
+
+ .init = automount_init,
+ .load = automount_load,
+ .done = automount_done,
+
+ .coldplug = automount_coldplug,
+
+ .dump = automount_dump,
+
+ .start = automount_start,
+ .stop = automount_stop,
+
+ .serialize = automount_serialize,
+ .deserialize_item = automount_deserialize_item,
+
+ .active_state = automount_active_state,
+ .sub_state_to_string = automount_sub_state_to_string,
+
+ .may_gc = automount_may_gc,
+
+ .trigger_notify = automount_trigger_notify,
+
+ .reset_failed = automount_reset_failed,
+
+ .bus_set_property = bus_automount_set_property,
+
+ .shutdown = automount_shutdown,
+ .supported = automount_supported,
+
+ .status_message_formats = {
+ .finished_start_job = {
+ [JOB_DONE] = "Set up automount %s.",
+ [JOB_FAILED] = "Failed to set up automount %s.",
+ },
+ .finished_stop_job = {
+ [JOB_DONE] = "Unset automount %s.",
+ [JOB_FAILED] = "Failed to unset automount %s.",
+ },
+ },
+};
diff --git a/src/core/automount.h b/src/core/automount.h
new file mode 100644
index 0000000..fe668d9
--- /dev/null
+++ b/src/core/automount.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct Automount Automount;
+
+#include "unit.h"
+
+typedef enum AutomountResult {
+ AUTOMOUNT_SUCCESS,
+ AUTOMOUNT_FAILURE_RESOURCES,
+ AUTOMOUNT_FAILURE_UNMOUNTED,
+ AUTOMOUNT_FAILURE_START_LIMIT_HIT,
+ AUTOMOUNT_FAILURE_MOUNT_START_LIMIT_HIT,
+ _AUTOMOUNT_RESULT_MAX,
+ _AUTOMOUNT_RESULT_INVALID = -1
+} AutomountResult;
+
+struct Automount {
+ Unit meta;
+
+ AutomountState state, deserialized_state;
+
+ char *where;
+ usec_t timeout_idle_usec;
+
+ int pipe_fd;
+ sd_event_source *pipe_event_source;
+ mode_t directory_mode;
+ dev_t dev_id;
+
+ Set *tokens;
+ Set *expire_tokens;
+
+ sd_event_source *expire_event_source;
+
+ AutomountResult result;
+};
+
+extern const UnitVTable automount_vtable;
+
+const char* automount_result_to_string(AutomountResult i) _const_;
+AutomountResult automount_result_from_string(const char *s) _pure_;
+
+DEFINE_CAST(AUTOMOUNT, Automount);
diff --git a/src/core/bpf-devices.c b/src/core/bpf-devices.c
new file mode 100644
index 0000000..1ad7ade
--- /dev/null
+++ b/src/core/bpf-devices.c
@@ -0,0 +1,529 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fnmatch.h>
+#include <linux/bpf_insn.h>
+
+#include "bpf-devices.h"
+#include "bpf-program.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "nulstr-util.h"
+#include "parse-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+#define PASS_JUMP_OFF 4096
+
+static int bpf_access_type(const char *acc) {
+ int r = 0;
+
+ assert(acc);
+
+ for (; *acc; acc++)
+ switch(*acc) {
+ case 'r':
+ r |= BPF_DEVCG_ACC_READ;
+ break;
+ case 'w':
+ r |= BPF_DEVCG_ACC_WRITE;
+ break;
+ case 'm':
+ r |= BPF_DEVCG_ACC_MKNOD;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return r;
+}
+
+static int bpf_prog_allow_list_device(
+ BPFProgram *prog,
+ char type,
+ int major,
+ int minor,
+ const char *acc) {
+
+ int r, access;
+
+ assert(prog);
+ assert(acc);
+
+ log_trace("%s: %c %d:%d %s", __func__, type, major, minor, acc);
+
+ access = bpf_access_type(acc);
+ if (access <= 0)
+ return -EINVAL;
+
+ assert(IN_SET(type, 'b', 'c'));
+ const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
+
+ const struct bpf_insn insn[] = {
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
+ BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 4), /* compare access type */
+
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 3), /* compare device type */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 2), /* compare major */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, 1), /* compare minor */
+ BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
+ };
+
+ if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
+ r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
+ else
+ r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ return r;
+}
+
+static int bpf_prog_allow_list_major(
+ BPFProgram *prog,
+ char type,
+ int major,
+ const char *acc) {
+
+ int r, access;
+
+ assert(prog);
+ assert(acc);
+
+ log_trace("%s: %c %d:* %s", __func__, type, major, acc);
+
+ access = bpf_access_type(acc);
+ if (access <= 0)
+ return -EINVAL;
+
+ assert(IN_SET(type, 'b', 'c'));
+ const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
+
+ const struct bpf_insn insn[] = {
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
+ BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 3), /* compare access type */
+
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 2), /* compare device type */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 1), /* compare major */
+ BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
+ };
+
+ if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
+ r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
+ else
+ r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ return r;
+}
+
+static int bpf_prog_allow_list_class(
+ BPFProgram *prog,
+ char type,
+ const char *acc) {
+
+ int r, access;
+
+ assert(prog);
+ assert(acc);
+
+ log_trace("%s: %c *:* %s", __func__, type, acc);
+
+ access = bpf_access_type(acc);
+ if (access <= 0)
+ return -EINVAL;
+
+ assert(IN_SET(type, 'b', 'c'));
+ const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
+
+ const struct bpf_insn insn[] = {
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
+ BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 2), /* compare access type */
+
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 1), /* compare device type */
+ BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
+ };
+
+ if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
+ r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
+ else
+ r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ return r;
+}
+
+int bpf_devices_cgroup_init(
+ BPFProgram **ret,
+ CGroupDevicePolicy policy,
+ bool allow_list) {
+
+ const struct bpf_insn pre_insn[] = {
+ /* load device type to r2 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, access_type)),
+ BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0xFFFF),
+
+ /* load access type to r3 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, access_type)),
+ BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+
+ /* load major number to r4 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, major)),
+
+ /* load minor number to r5 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, minor)),
+ };
+
+ _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
+ int r;
+
+ assert(ret);
+
+ if (policy == CGROUP_DEVICE_POLICY_AUTO && !allow_list)
+ return 0;
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &prog);
+ if (r < 0)
+ return log_error_errno(r, "Loading device control BPF program failed: %m");
+
+ if (policy == CGROUP_DEVICE_POLICY_CLOSED || allow_list) {
+ r = bpf_program_add_instructions(prog, pre_insn, ELEMENTSOF(pre_insn));
+ if (r < 0)
+ return log_error_errno(r, "Extending device control BPF program failed: %m");
+ }
+
+ *ret = TAKE_PTR(prog);
+
+ return 0;
+}
+
+int bpf_devices_apply_policy(
+ BPFProgram *prog,
+ CGroupDevicePolicy policy,
+ bool allow_list,
+ const char *cgroup_path,
+ BPFProgram **prog_installed) {
+
+ _cleanup_free_ char *controller_path = NULL;
+ int r;
+
+ /* This will assign *keep_program if everything goes well. */
+
+ if (!prog)
+ goto finish;
+
+ const bool deny_everything = policy == CGROUP_DEVICE_POLICY_STRICT && !allow_list;
+
+ const struct bpf_insn post_insn[] = {
+ /* return DENY */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+ };
+
+ const struct bpf_insn exit_insn[] = {
+ /* finally return DENY if deny_everything else ALLOW */
+ BPF_MOV64_IMM(BPF_REG_0, deny_everything ? 0 : 1),
+ BPF_EXIT_INSN()
+ };
+
+ if (!deny_everything) {
+ r = bpf_program_add_instructions(prog, post_insn, ELEMENTSOF(post_insn));
+ if (r < 0)
+ return log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ /* Fixup PASS_JUMP_OFF jump offsets. */
+ for (size_t off = 0; off < prog->n_instructions; off++) {
+ struct bpf_insn *ins = &prog->instructions[off];
+
+ if (ins->code == (BPF_JMP | BPF_JA) && ins->off == PASS_JUMP_OFF)
+ ins->off = prog->n_instructions - off - 1;
+ }
+ }
+
+ r = bpf_program_add_instructions(prog, exit_insn, ELEMENTSOF(exit_insn));
+ if (r < 0)
+ return log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, NULL, &controller_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine cgroup path: %m");
+
+ r = bpf_program_cgroup_attach(prog, BPF_CGROUP_DEVICE, controller_path, BPF_F_ALLOW_MULTI);
+ if (r < 0)
+ return log_error_errno(r, "Attaching device control BPF program to cgroup %s failed: %m",
+ cgroup_path);
+
+ finish:
+ /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program. */
+ if (prog_installed) {
+ bpf_program_unref(*prog_installed);
+ *prog_installed = bpf_program_ref(prog);
+ }
+ return 0;
+}
+
+int bpf_devices_supported(void) {
+ const struct bpf_insn trivial[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN()
+ };
+
+ _cleanup_(bpf_program_unrefp) BPFProgram *program = NULL;
+ static int supported = -1;
+ int r;
+
+ /* Checks whether BPF device controller is supported. For this, we check five things:
+ *
+ * a) whether we are privileged
+ * b) whether the unified hierarchy is being used
+ * c) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_DEVICE programs, which we require
+ */
+
+ if (supported >= 0)
+ return supported;
+
+ if (geteuid() != 0) {
+ log_debug("Not enough privileges, BPF device control is not supported.");
+ return supported = 0;
+ }
+
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
+ if (r == 0) {
+ log_debug("Not running with unified cgroups, BPF device control is not supported.");
+ return supported = 0;
+ }
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &program);
+ if (r < 0) {
+ log_debug_errno(r, "Can't allocate CGROUP DEVICE BPF program, BPF device control is not supported: %m");
+ return supported = 0;
+ }
+
+ r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial));
+ if (r < 0) {
+ log_debug_errno(r, "Can't add trivial instructions to CGROUP DEVICE BPF program, BPF device control is not supported: %m");
+ return supported = 0;
+ }
+
+ r = bpf_program_load_kernel(program, NULL, 0);
+ if (r < 0) {
+ log_debug_errno(r, "Can't load kernel CGROUP DEVICE BPF program, BPF device control is not supported: %m");
+ return supported = 0;
+ }
+
+ return supported = 1;
+}
+
+static int allow_list_device_pattern(
+ BPFProgram *prog,
+ const char *path,
+ char type,
+ const unsigned *maj,
+ const unsigned *min,
+ const char *acc) {
+
+ assert(IN_SET(type, 'b', 'c'));
+
+ if (cg_all_unified() > 0) {
+ if (!prog)
+ return 0;
+
+ if (maj && min)
+ return bpf_prog_allow_list_device(prog, type, *maj, *min, acc);
+ else if (maj)
+ return bpf_prog_allow_list_major(prog, type, *maj, acc);
+ else
+ return bpf_prog_allow_list_class(prog, type, acc);
+
+ } else {
+ char buf[2+DECIMAL_STR_MAX(unsigned)*2+2+4];
+ int r;
+
+ if (maj && min)
+ xsprintf(buf, "%c %u:%u %s", type, *maj, *min, acc);
+ else if (maj)
+ xsprintf(buf, "%c %u:* %s", type, *maj, acc);
+ else
+ xsprintf(buf, "%c *:* %s", type, acc);
+
+ /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore
+ * EINVAL here. */
+
+ r = cg_set_attribute("devices", path, "devices.allow", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING,
+ r, "Failed to set devices.allow on %s: %m", path);
+
+ return r;
+ }
+}
+
+int bpf_devices_allow_list_device(
+ BPFProgram *prog,
+ const char *path,
+ const char *node,
+ const char *acc) {
+
+ mode_t mode;
+ dev_t rdev;
+ int r;
+
+ assert(path);
+ assert(acc);
+ assert(strlen(acc) <= 3);
+
+ log_trace("%s: %s %s", __func__, node, acc);
+
+ /* Some special handling for /dev/block/%u:%u, /dev/char/%u:%u, /run/systemd/inaccessible/chr and
+ * /run/systemd/inaccessible/blk paths. Instead of stat()ing these we parse out the major/minor directly. This
+ * means clients can use these path without the device node actually around */
+ r = device_path_parse_major_minor(node, &mode, &rdev);
+ if (r < 0) {
+ if (r != -ENODEV)
+ return log_warning_errno(r, "Couldn't parse major/minor from device path '%s': %m", node);
+
+ struct stat st;
+ if (stat(node, &st) < 0)
+ return log_warning_errno(errno, "Couldn't stat device %s: %m", node);
+
+ if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode))
+ return log_warning_errno(SYNTHETIC_ERRNO(ENODEV), "%s is not a device.", node);
+
+ mode = st.st_mode;
+ rdev = (dev_t) st.st_rdev;
+ }
+
+ unsigned maj = major(rdev), min = minor(rdev);
+ return allow_list_device_pattern(prog, path, S_ISCHR(mode) ? 'c' : 'b', &maj, &min, acc);
+}
+
+int bpf_devices_allow_list_major(
+ BPFProgram *prog,
+ const char *path,
+ const char *name,
+ char type,
+ const char *acc) {
+
+ unsigned maj;
+ int r;
+
+ assert(path);
+ assert(acc);
+ assert(IN_SET(type, 'b', 'c'));
+
+ if (streq(name, "*"))
+ /* If the name is a wildcard, then apply this list to all devices of this type */
+ return allow_list_device_pattern(prog, path, type, NULL, NULL, acc);
+
+ if (safe_atou(name, &maj) >= 0 && DEVICE_MAJOR_VALID(maj))
+ /* The name is numeric and suitable as major. In that case, let's take its major, and create
+ * the entry directly. */
+ return allow_list_device_pattern(prog, path, type, &maj, NULL, acc);
+
+ _cleanup_fclose_ FILE *f = NULL;
+ bool good = false, any = false;
+
+ f = fopen("/proc/devices", "re");
+ if (!f)
+ return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s: %m", name);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *w, *p;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to read /proc/devices: %m");
+ if (r == 0)
+ break;
+
+ if (type == 'c' && streq(line, "Character devices:")) {
+ good = true;
+ continue;
+ }
+
+ if (type == 'b' && streq(line, "Block devices:")) {
+ good = true;
+ continue;
+ }
+
+ if (isempty(line)) {
+ good = false;
+ continue;
+ }
+
+ if (!good)
+ continue;
+
+ p = strstrip(line);
+
+ w = strpbrk(p, WHITESPACE);
+ if (!w)
+ continue;
+ *w = 0;
+
+ r = safe_atou(p, &maj);
+ if (r < 0)
+ continue;
+ if (maj <= 0)
+ continue;
+
+ w++;
+ w += strspn(w, WHITESPACE);
+
+ if (fnmatch(name, w, 0) != 0)
+ continue;
+
+ any = true;
+ (void) allow_list_device_pattern(prog, path, type, &maj, NULL, acc);
+ }
+
+ if (!any)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
+ "Device allow list pattern \"%s\" did not match anything.", name);
+
+ return 0;
+}
+
+int bpf_devices_allow_list_static(
+ BPFProgram *prog,
+ const char *path) {
+
+ static const char auto_devices[] =
+ "/dev/null\0" "rwm\0"
+ "/dev/zero\0" "rwm\0"
+ "/dev/full\0" "rwm\0"
+ "/dev/random\0" "rwm\0"
+ "/dev/urandom\0" "rwm\0"
+ "/dev/tty\0" "rwm\0"
+ "/dev/ptmx\0" "rwm\0"
+ /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
+ "/run/systemd/inaccessible/chr\0" "rwm\0"
+ "/run/systemd/inaccessible/blk\0" "rwm\0";
+ int r = 0, k;
+
+ const char *node, *acc;
+ NULSTR_FOREACH_PAIR(node, acc, auto_devices) {
+ k = bpf_devices_allow_list_device(prog, path, node, acc);
+ if (r >= 0 && k < 0)
+ r = k;
+ }
+
+ /* PTS (/dev/pts) devices may not be duplicated, but accessed */
+ k = bpf_devices_allow_list_major(prog, path, "pts", 'c', "rw");
+ if (r >= 0 && k < 0)
+ r = k;
+
+ return r;
+}
diff --git a/src/core/bpf-devices.h b/src/core/bpf-devices.h
new file mode 100644
index 0000000..19b4d39
--- /dev/null
+++ b/src/core/bpf-devices.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+
+#include "cgroup.h"
+
+typedef struct BPFProgram BPFProgram;
+
+int bpf_devices_cgroup_init(BPFProgram **ret, CGroupDevicePolicy policy, bool allow_list);
+int bpf_devices_apply_policy(
+ BPFProgram *prog,
+ CGroupDevicePolicy policy,
+ bool allow_list,
+ const char *cgroup_path,
+ BPFProgram **prog_installed);
+
+int bpf_devices_supported(void);
+int bpf_devices_allow_list_device(BPFProgram *prog, const char *path, const char *node, const char *acc);
+int bpf_devices_allow_list_major(BPFProgram *prog, const char *path, const char *name, char type, const char *acc);
+int bpf_devices_allow_list_static(BPFProgram *prog, const char *path);
diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c
new file mode 100644
index 0000000..99783ac
--- /dev/null
+++ b/src/core/bpf-firewall.c
@@ -0,0 +1,911 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/bpf_insn.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bpf-firewall.h"
+#include "bpf-program.h"
+#include "fd-util.h"
+#include "ip-address-access.h"
+#include "memory-util.h"
+#include "missing_syscall.h"
+#include "unit.h"
+#include "strv.h"
+#include "virt.h"
+
+enum {
+ MAP_KEY_PACKETS,
+ MAP_KEY_BYTES,
+};
+
+enum {
+ ACCESS_ALLOWED = 1,
+ ACCESS_DENIED = 2,
+};
+
+/* Compile instructions for one list of addresses, one direction and one specific verdict on matches. */
+
+static int add_lookup_instructions(
+ BPFProgram *p,
+ int map_fd,
+ int protocol,
+ bool is_ingress,
+ int verdict) {
+
+ int r, addr_offset, addr_size;
+
+ assert(p);
+ assert(map_fd >= 0);
+
+ switch (protocol) {
+
+ case ETH_P_IP:
+ addr_size = sizeof(uint32_t);
+ addr_offset = is_ingress ?
+ offsetof(struct iphdr, saddr) :
+ offsetof(struct iphdr, daddr);
+ break;
+
+ case ETH_P_IPV6:
+ addr_size = 4 * sizeof(uint32_t);
+ addr_offset = is_ingress ?
+ offsetof(struct ip6_hdr, ip6_src.s6_addr) :
+ offsetof(struct ip6_hdr, ip6_dst.s6_addr);
+ break;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ do {
+ /* Compare IPv4 with one word instruction (32bit) */
+ struct bpf_insn insn[] = {
+ /* If skb->protocol != ETH_P_IP, skip this whole block. The offset will be set later. */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, htobe16(protocol), 0),
+
+ /*
+ * Call into BPF_FUNC_skb_load_bytes to load the dst/src IP address
+ *
+ * R1: Pointer to the skb
+ * R2: Data offset
+ * R3: Destination buffer on the stack (r10 - 4)
+ * R4: Number of bytes to read (4)
+ */
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV32_IMM(BPF_REG_2, addr_offset),
+
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -addr_size),
+
+ BPF_MOV32_IMM(BPF_REG_4, addr_size),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
+
+ /*
+ * Call into BPF_FUNC_map_lookup_elem to see if the address matches any entry in the
+ * LPM trie map. For this to work, the prefixlen field of 'struct bpf_lpm_trie_key'
+ * has to be set to the maximum possible value.
+ *
+ * On success, the looked up value is stored in R0. For this application, the actual
+ * value doesn't matter, however; we just set the bit in @verdict in R8 if we found any
+ * matching value.
+ */
+
+ BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -addr_size - sizeof(uint32_t)),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, addr_size * 8),
+
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ALU32_IMM(BPF_OR, BPF_REG_8, verdict),
+ };
+
+ /* Jump label fixup */
+ insn[0].off = ELEMENTSOF(insn) - 1;
+
+ r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ return r;
+
+ } while (false);
+
+ return 0;
+}
+
+static int add_instructions_for_ip_any(
+ BPFProgram *p,
+ int verdict) {
+ int r;
+
+ assert(p);
+
+ const struct bpf_insn insn[] = {
+ BPF_ALU32_IMM(BPF_OR, BPF_REG_8, verdict),
+ };
+
+ r = bpf_program_add_instructions(p, insn, 1);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int bpf_firewall_compile_bpf(
+ Unit *u,
+ bool is_ingress,
+ BPFProgram **ret,
+ bool ip_allow_any,
+ bool ip_deny_any) {
+
+ const struct bpf_insn pre_insn[] = {
+ /*
+ * When the eBPF program is entered, R1 contains the address of the skb.
+ * However, R1-R5 are scratch registers that are not preserved when calling
+ * into kernel functions, so we need to save anything that's supposed to
+ * stay around to R6-R9. Save the skb to R6.
+ */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /*
+ * Although we cannot access the skb data directly from eBPF programs used in this
+ * scenario, the kernel has prepared some fields for us to access through struct __sk_buff.
+ * Load the protocol (IPv4, IPv6) used by the packet in flight once and cache it in R7
+ * for later use.
+ */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct __sk_buff, protocol)),
+
+ /*
+ * R8 is used to keep track of whether any address check has explicitly allowed or denied the packet
+ * through ACCESS_DENIED or ACCESS_ALLOWED bits. Reset them both to 0 in the beginning.
+ */
+ BPF_MOV32_IMM(BPF_REG_8, 0),
+ };
+
+ /*
+ * The access checkers compiled for the configured allowance and denial lists
+ * write to R8 at runtime. The following code prepares for an early exit that
+ * skip the accounting if the packet is denied.
+ *
+ * R0 = 1
+ * if (R8 == ACCESS_DENIED)
+ * R0 = 0
+ *
+ * This means that if both ACCESS_DENIED and ACCESS_ALLOWED are set, the packet
+ * is allowed to pass.
+ */
+ const struct bpf_insn post_insn[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_8, ACCESS_DENIED, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ };
+
+ _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
+ int accounting_map_fd, r;
+ bool access_enabled;
+
+ assert(u);
+ assert(ret);
+
+ accounting_map_fd = is_ingress ?
+ u->ip_accounting_ingress_map_fd :
+ u->ip_accounting_egress_map_fd;
+
+ access_enabled =
+ u->ipv4_allow_map_fd >= 0 ||
+ u->ipv6_allow_map_fd >= 0 ||
+ u->ipv4_deny_map_fd >= 0 ||
+ u->ipv6_deny_map_fd >= 0 ||
+ ip_allow_any ||
+ ip_deny_any;
+
+ if (accounting_map_fd < 0 && !access_enabled) {
+ *ret = NULL;
+ return 0;
+ }
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p);
+ if (r < 0)
+ return r;
+
+ r = bpf_program_add_instructions(p, pre_insn, ELEMENTSOF(pre_insn));
+ if (r < 0)
+ return r;
+
+ if (access_enabled) {
+ /*
+ * The simple rule this function translates into eBPF instructions is:
+ *
+ * - Access will be granted when an address matches an entry in @list_allow
+ * - Otherwise, access will be denied when an address matches an entry in @list_deny
+ * - Otherwise, access will be granted
+ */
+
+ if (u->ipv4_deny_map_fd >= 0) {
+ r = add_lookup_instructions(p, u->ipv4_deny_map_fd, ETH_P_IP, is_ingress, ACCESS_DENIED);
+ if (r < 0)
+ return r;
+ }
+
+ if (u->ipv6_deny_map_fd >= 0) {
+ r = add_lookup_instructions(p, u->ipv6_deny_map_fd, ETH_P_IPV6, is_ingress, ACCESS_DENIED);
+ if (r < 0)
+ return r;
+ }
+
+ if (u->ipv4_allow_map_fd >= 0) {
+ r = add_lookup_instructions(p, u->ipv4_allow_map_fd, ETH_P_IP, is_ingress, ACCESS_ALLOWED);
+ if (r < 0)
+ return r;
+ }
+
+ if (u->ipv6_allow_map_fd >= 0) {
+ r = add_lookup_instructions(p, u->ipv6_allow_map_fd, ETH_P_IPV6, is_ingress, ACCESS_ALLOWED);
+ if (r < 0)
+ return r;
+ }
+
+ if (ip_allow_any) {
+ r = add_instructions_for_ip_any(p, ACCESS_ALLOWED);
+ if (r < 0)
+ return r;
+ }
+
+ if (ip_deny_any) {
+ r = add_instructions_for_ip_any(p, ACCESS_DENIED);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ r = bpf_program_add_instructions(p, post_insn, ELEMENTSOF(post_insn));
+ if (r < 0)
+ return r;
+
+ if (accounting_map_fd >= 0) {
+ struct bpf_insn insn[] = {
+ /*
+ * If R0 == 0, the packet will be denied; skip the accounting instructions in this case.
+ * The jump label will be fixed up later.
+ */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0),
+
+ /* Count packets */
+ BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd), /* load map fd to r1 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+ /* Count bytes */
+ BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+ /* Allow the packet to pass */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ };
+
+ /* Jump label fixup */
+ insn[0].off = ELEMENTSOF(insn) - 1;
+
+ r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ return r;
+ }
+
+ do {
+ /*
+ * Exit from the eBPF program, R0 contains the verdict.
+ * 0 means the packet is denied, 1 means the packet may pass.
+ */
+ const struct bpf_insn insn[] = {
+ BPF_EXIT_INSN()
+ };
+
+ r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ return r;
+ } while (false);
+
+ *ret = TAKE_PTR(p);
+
+ return 0;
+}
+
+static int bpf_firewall_count_access_items(IPAddressAccessItem *list, size_t *n_ipv4, size_t *n_ipv6) {
+ IPAddressAccessItem *a;
+
+ assert(n_ipv4);
+ assert(n_ipv6);
+
+ LIST_FOREACH(items, a, list) {
+ switch (a->family) {
+
+ case AF_INET:
+ (*n_ipv4)++;
+ break;
+
+ case AF_INET6:
+ (*n_ipv6)++;
+ break;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+ }
+
+ return 0;
+}
+
+static int bpf_firewall_add_access_items(
+ IPAddressAccessItem *list,
+ int ipv4_map_fd,
+ int ipv6_map_fd,
+ int verdict) {
+
+ struct bpf_lpm_trie_key *key_ipv4, *key_ipv6;
+ uint64_t value = verdict;
+ IPAddressAccessItem *a;
+ int r;
+
+ key_ipv4 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t));
+ key_ipv6 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t) * 4);
+
+ LIST_FOREACH(items, a, list) {
+ switch (a->family) {
+
+ case AF_INET:
+ key_ipv4->prefixlen = a->prefixlen;
+ memcpy(key_ipv4->data, &a->address, sizeof(uint32_t));
+
+ r = bpf_map_update_element(ipv4_map_fd, key_ipv4, &value);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case AF_INET6:
+ key_ipv6->prefixlen = a->prefixlen;
+ memcpy(key_ipv6->data, &a->address, 4 * sizeof(uint32_t));
+
+ r = bpf_map_update_element(ipv6_map_fd, key_ipv6, &value);
+ if (r < 0)
+ return r;
+
+ break;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+ }
+
+ return 0;
+}
+
+static int bpf_firewall_prepare_access_maps(
+ Unit *u,
+ int verdict,
+ int *ret_ipv4_map_fd,
+ int *ret_ipv6_map_fd,
+ bool *ret_has_any) {
+
+ _cleanup_close_ int ipv4_map_fd = -1, ipv6_map_fd = -1;
+ size_t n_ipv4 = 0, n_ipv6 = 0;
+ IPAddressAccessItem *list;
+ Unit *p;
+ int r;
+
+ assert(ret_ipv4_map_fd);
+ assert(ret_ipv6_map_fd);
+ assert(ret_has_any);
+
+ for (p = u; p; p = UNIT_DEREF(p->slice)) {
+ CGroupContext *cc;
+
+ cc = unit_get_cgroup_context(p);
+ if (!cc)
+ continue;
+
+ list = verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny;
+
+ bpf_firewall_count_access_items(list, &n_ipv4, &n_ipv6);
+
+ /* Skip making the LPM trie map in cases where we are using "any" in order to hack around
+ * needing CAP_SYS_ADMIN for allocating LPM trie map. */
+ if (ip_address_access_item_is_any(list)) {
+ *ret_has_any = true;
+ return 0;
+ }
+ }
+
+ if (n_ipv4 > 0) {
+ ipv4_map_fd = bpf_map_new(
+ BPF_MAP_TYPE_LPM_TRIE,
+ offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t),
+ sizeof(uint64_t),
+ n_ipv4,
+ BPF_F_NO_PREALLOC);
+ if (ipv4_map_fd < 0)
+ return ipv4_map_fd;
+ }
+
+ if (n_ipv6 > 0) {
+ ipv6_map_fd = bpf_map_new(
+ BPF_MAP_TYPE_LPM_TRIE,
+ offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t)*4,
+ sizeof(uint64_t),
+ n_ipv6,
+ BPF_F_NO_PREALLOC);
+ if (ipv6_map_fd < 0)
+ return ipv6_map_fd;
+ }
+
+ for (p = u; p; p = UNIT_DEREF(p->slice)) {
+ CGroupContext *cc;
+
+ cc = unit_get_cgroup_context(p);
+ if (!cc)
+ continue;
+
+ r = bpf_firewall_add_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny,
+ ipv4_map_fd, ipv6_map_fd, verdict);
+ if (r < 0)
+ return r;
+ }
+
+ *ret_ipv4_map_fd = TAKE_FD(ipv4_map_fd);
+ *ret_ipv6_map_fd = TAKE_FD(ipv6_map_fd);
+ *ret_has_any = false;
+ return 0;
+}
+
+static int bpf_firewall_prepare_accounting_maps(Unit *u, bool enabled, int *fd_ingress, int *fd_egress) {
+ int r;
+
+ assert(u);
+ assert(fd_ingress);
+ assert(fd_egress);
+
+ if (enabled) {
+ if (*fd_ingress < 0) {
+ r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
+ if (r < 0)
+ return r;
+
+ *fd_ingress = r;
+ }
+
+ if (*fd_egress < 0) {
+
+ r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
+ if (r < 0)
+ return r;
+
+ *fd_egress = r;
+ }
+
+ } else {
+ *fd_ingress = safe_close(*fd_ingress);
+ *fd_egress = safe_close(*fd_egress);
+
+ zero(u->ip_accounting_extra);
+ }
+
+ return 0;
+}
+
+int bpf_firewall_compile(Unit *u) {
+ CGroupContext *cc;
+ int r, supported;
+ bool ip_allow_any = false, ip_deny_any = false;
+
+ assert(u);
+
+ cc = unit_get_cgroup_context(u);
+ if (!cc)
+ return -EINVAL;
+
+ supported = bpf_firewall_supported();
+ if (supported < 0)
+ return supported;
+ if (supported == BPF_FIREWALL_UNSUPPORTED)
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "BPF firewalling not supported on this manager, proceeding without.");
+ if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI && u->type == UNIT_SLICE)
+ /* If BPF_F_ALLOW_MULTI is not supported we don't support any BPF magic on inner nodes (i.e. on slice
+ * units), since that would mean leaf nodes couldn't do any BPF anymore at all. Under the assumption
+ * that BPF is more interesting on leaf nodes we hence avoid it on inner nodes in that case. This is
+ * consistent with old systemd behaviour from before v238, where BPF wasn't supported in inner nodes at
+ * all, either. */
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units.");
+
+ /* Note that when we compile a new firewall we first flush out the access maps and the BPF programs themselves,
+ * but we reuse the accounting maps. That way the firewall in effect always maps to the actual
+ * configuration, but we don't flush out the accounting unnecessarily */
+
+ u->ip_bpf_ingress = bpf_program_unref(u->ip_bpf_ingress);
+ u->ip_bpf_egress = bpf_program_unref(u->ip_bpf_egress);
+
+ u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
+ u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
+
+ u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd);
+ u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
+
+ if (u->type != UNIT_SLICE) {
+ /* In inner nodes we only do accounting, we do not actually bother with access control. However, leaf
+ * nodes will incorporate all IP access rules set on all their parent nodes. This has the benefit that
+ * they can optionally cancel out system-wide rules. Since inner nodes can't contain processes this
+ * means that all configure IP access rules *will* take effect on processes, even though we never
+ * compile them for inner nodes. */
+
+ r = bpf_firewall_prepare_access_maps(u, ACCESS_ALLOWED, &u->ipv4_allow_map_fd, &u->ipv6_allow_map_fd, &ip_allow_any);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Preparation of eBPF allow maps failed: %m");
+
+ r = bpf_firewall_prepare_access_maps(u, ACCESS_DENIED, &u->ipv4_deny_map_fd, &u->ipv6_deny_map_fd, &ip_deny_any);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Preparation of eBPF deny maps failed: %m");
+ }
+
+ r = bpf_firewall_prepare_accounting_maps(u, cc->ip_accounting, &u->ip_accounting_ingress_map_fd, &u->ip_accounting_egress_map_fd);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Preparation of eBPF accounting maps failed: %m");
+
+ r = bpf_firewall_compile_bpf(u, true, &u->ip_bpf_ingress, ip_allow_any, ip_deny_any);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Compilation for ingress BPF program failed: %m");
+
+ r = bpf_firewall_compile_bpf(u, false, &u->ip_bpf_egress, ip_allow_any, ip_deny_any);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Compilation for egress BPF program failed: %m");
+
+ return 0;
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(filter_prog_hash_ops, void, trivial_hash_func, trivial_compare_func, BPFProgram, bpf_program_unref);
+
+static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set) {
+ char **bpf_fs_path;
+
+ set_clear(*set);
+
+ STRV_FOREACH(bpf_fs_path, filter_paths) {
+ _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
+ int r;
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &prog);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Can't allocate CGROUP SKB BPF program: %m");
+
+ r = bpf_program_load_from_bpf_fs(prog, *bpf_fs_path);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Loading of ingress BPF program %s failed: %m", *bpf_fs_path);
+
+ r = set_ensure_consume(set, &filter_prog_hash_ops, TAKE_PTR(prog));
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m");
+ }
+
+ return 0;
+}
+
+int bpf_firewall_load_custom(Unit *u) {
+ CGroupContext *cc;
+ int r, supported;
+
+ assert(u);
+
+ cc = unit_get_cgroup_context(u);
+ if (!cc)
+ return 0;
+
+ if (!(cc->ip_filters_ingress || cc->ip_filters_egress))
+ return 0;
+
+ supported = bpf_firewall_supported();
+ if (supported < 0)
+ return supported;
+
+ if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI)
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "BPF_F_ALLOW_MULTI not supported on this manager, cannot attach custom BPF programs.");
+
+ r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_ingress, &u->ip_bpf_custom_ingress);
+ if (r < 0)
+ return r;
+ r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_egress, &u->ip_bpf_custom_egress);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int attach_custom_bpf_progs(Unit *u, const char *path, int attach_type, Set **set, Set **set_installed) {
+ BPFProgram *prog;
+ int r;
+
+ assert(u);
+
+ set_clear(*set_installed);
+
+ SET_FOREACH(prog, *set) {
+ r = bpf_program_cgroup_attach(prog, attach_type, path, BPF_F_ALLOW_MULTI);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Attaching custom egress BPF program to cgroup %s failed: %m", path);
+
+ /* Remember that these BPF programs are installed now. */
+ r = set_ensure_put(set_installed, &filter_prog_hash_ops, prog);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m");
+ bpf_program_ref(prog);
+ }
+
+ return 0;
+}
+
+int bpf_firewall_install(Unit *u) {
+ _cleanup_free_ char *path = NULL;
+ CGroupContext *cc;
+ int r, supported;
+ uint32_t flags;
+
+ assert(u);
+
+ cc = unit_get_cgroup_context(u);
+ if (!cc)
+ return -EINVAL;
+ if (!u->cgroup_path)
+ return -EINVAL;
+ if (!u->cgroup_realized)
+ return -EINVAL;
+
+ supported = bpf_firewall_supported();
+ if (supported < 0)
+ return supported;
+ if (supported == BPF_FIREWALL_UNSUPPORTED) {
+ log_unit_debug(u, "BPF firewalling not supported on this manager, proceeding without.");
+ return -EOPNOTSUPP;
+ }
+ if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI && u->type == UNIT_SLICE) {
+ log_unit_debug(u, "BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units.");
+ return -EOPNOTSUPP;
+ }
+ if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI &&
+ (!set_isempty(u->ip_bpf_custom_ingress) || !set_isempty(u->ip_bpf_custom_egress)))
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "BPF_F_ALLOW_MULTI not supported on this manager, cannot attach custom BPF programs.");
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to determine cgroup path: %m");
+
+ flags = (supported == BPF_FIREWALL_SUPPORTED_WITH_MULTI &&
+ (u->type == UNIT_SLICE || unit_cgroup_delegate(u))) ? BPF_F_ALLOW_MULTI : 0;
+
+ /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program, to
+ * minimize the time window when we don't account for IP traffic. */
+ u->ip_bpf_egress_installed = bpf_program_unref(u->ip_bpf_egress_installed);
+ u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed);
+
+ if (u->ip_bpf_egress) {
+ r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path,
+ flags | (set_isempty(u->ip_bpf_custom_egress) ? 0 : BPF_F_ALLOW_MULTI));
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Attaching egress BPF program to cgroup %s failed: %m", path);
+
+ /* Remember that this BPF program is installed now. */
+ u->ip_bpf_egress_installed = bpf_program_ref(u->ip_bpf_egress);
+ }
+
+ if (u->ip_bpf_ingress) {
+ r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path,
+ flags | (set_isempty(u->ip_bpf_custom_ingress) ? 0 : BPF_F_ALLOW_MULTI));
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Attaching ingress BPF program to cgroup %s failed: %m", path);
+
+ u->ip_bpf_ingress_installed = bpf_program_ref(u->ip_bpf_ingress);
+ }
+
+ r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_EGRESS, &u->ip_bpf_custom_egress, &u->ip_bpf_custom_egress_installed);
+ if (r < 0)
+ return r;
+
+ r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_INGRESS, &u->ip_bpf_custom_ingress, &u->ip_bpf_custom_ingress_installed);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets) {
+ uint64_t key, packets;
+ int r;
+
+ if (map_fd < 0)
+ return -EBADF;
+
+ if (ret_packets) {
+ key = MAP_KEY_PACKETS;
+ r = bpf_map_lookup_element(map_fd, &key, &packets);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_bytes) {
+ key = MAP_KEY_BYTES;
+ r = bpf_map_lookup_element(map_fd, &key, ret_bytes);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_packets)
+ *ret_packets = packets;
+
+ return 0;
+}
+
+int bpf_firewall_reset_accounting(int map_fd) {
+ uint64_t key, value = 0;
+ int r;
+
+ if (map_fd < 0)
+ return -EBADF;
+
+ key = MAP_KEY_PACKETS;
+ r = bpf_map_update_element(map_fd, &key, &value);
+ if (r < 0)
+ return r;
+
+ key = MAP_KEY_BYTES;
+ return bpf_map_update_element(map_fd, &key, &value);
+}
+
+static int bpf_firewall_unsupported_reason = 0;
+
+int bpf_firewall_supported(void) {
+ const struct bpf_insn trivial[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN()
+ };
+
+ _cleanup_(bpf_program_unrefp) BPFProgram *program = NULL;
+ static int supported = -1;
+ union bpf_attr attr;
+ int r;
+
+ /* Checks whether BPF firewalling is supported. For this, we check the following things:
+ *
+ * - whether the unified hierarchy is being used
+ * - the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_SKB programs, which we require
+ * - the BPF implementation in the kernel supports the BPF_PROG_DETACH call, which we require
+ */
+ if (supported >= 0)
+ return supported;
+
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
+ if (r == 0) {
+ bpf_firewall_unsupported_reason =
+ log_debug_errno(SYNTHETIC_ERRNO(EUCLEAN),
+ "Not running with unified cgroups, BPF firewalling is not supported.");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &program);
+ if (r < 0) {
+ bpf_firewall_unsupported_reason =
+ log_debug_errno(r, "Can't allocate CGROUP SKB BPF program, BPF firewalling is not supported: %m");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial));
+ if (r < 0) {
+ bpf_firewall_unsupported_reason =
+ log_debug_errno(r, "Can't add trivial instructions to CGROUP SKB BPF program, BPF firewalling is not supported: %m");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ r = bpf_program_load_kernel(program, NULL, 0);
+ if (r < 0) {
+ bpf_firewall_unsupported_reason =
+ log_debug_errno(r, "Can't load kernel CGROUP SKB BPF program, BPF firewalling is not supported: %m");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ /* Unfortunately the kernel allows us to create BPF_PROG_TYPE_CGROUP_SKB programs even when CONFIG_CGROUP_BPF
+ * is turned off at kernel compilation time. This sucks of course: why does it allow us to create a cgroup BPF
+ * program if we can't do a thing with it later?
+ *
+ * We detect this case by issuing the BPF_PROG_DETACH bpf() call with invalid file descriptors: if
+ * CONFIG_CGROUP_BPF is turned off, then the call will fail early with EINVAL. If it is turned on the
+ * parameters are validated however, and that'll fail with EBADF then. */
+
+ // FIXME: Clang doesn't 0-pad with structured initialization, causing
+ // the kernel to reject the bpf_attr as invalid. See:
+ // https://github.com/torvalds/linux/blob/v5.9/kernel/bpf/syscall.c#L65
+ // Ideally it should behave like GCC, so that we can remove these workarounds.
+ zero(attr);
+ attr.attach_type = BPF_CGROUP_INET_EGRESS;
+ attr.target_fd = -1;
+ attr.attach_bpf_fd = -1;
+
+ if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0) {
+ if (errno != EBADF) {
+ bpf_firewall_unsupported_reason =
+ log_debug_errno(errno, "Didn't get EBADF from BPF_PROG_DETACH, BPF firewalling is not supported: %m");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ /* YAY! */
+ } else {
+ log_debug("Wut? Kernel accepted our invalid BPF_PROG_DETACH call? Something is weird, assuming BPF firewalling is broken and hence not supported.");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ /* So now we know that the BPF program is generally available, let's see if BPF_F_ALLOW_MULTI is also supported
+ * (which was added in kernel 4.15). We use a similar logic as before, but this time we use the BPF_PROG_ATTACH
+ * bpf() call and the BPF_F_ALLOW_MULTI flags value. Since the flags are checked early in the system call we'll
+ * get EINVAL if it's not supported, and EBADF as before if it is available. */
+
+ zero(attr);
+ attr.attach_type = BPF_CGROUP_INET_EGRESS;
+ attr.target_fd = -1;
+ attr.attach_bpf_fd = -1;
+ attr.attach_flags = BPF_F_ALLOW_MULTI;
+
+ if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0) {
+ if (errno == EBADF) {
+ log_debug_errno(errno, "Got EBADF when using BPF_F_ALLOW_MULTI, which indicates it is supported. Yay!");
+ return supported = BPF_FIREWALL_SUPPORTED_WITH_MULTI;
+ }
+
+ if (errno == EINVAL)
+ log_debug_errno(errno, "Got EINVAL error when using BPF_F_ALLOW_MULTI, which indicates it's not supported.");
+ else
+ log_debug_errno(errno, "Got unexpected error when using BPF_F_ALLOW_MULTI, assuming it's not supported: %m");
+
+ return supported = BPF_FIREWALL_SUPPORTED;
+ } else {
+ log_debug("Wut? Kernel accepted our invalid BPF_PROG_ATTACH+BPF_F_ALLOW_MULTI call? Something is weird, assuming BPF firewalling is broken and hence not supported.");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+}
+
+void emit_bpf_firewall_warning(Unit *u) {
+ static bool warned = false;
+
+ if (!warned) {
+ bool quiet = bpf_firewall_unsupported_reason == -EPERM && detect_container() > 0;
+
+ log_unit_full_errno(u, quiet ? LOG_DEBUG : LOG_WARNING, bpf_firewall_unsupported_reason,
+ "unit configures an IP firewall, but %s.\n"
+ "(This warning is only shown for the first unit using IP firewalling.)",
+ getuid() != 0 ? "not running as root" :
+ "the local system does not support BPF/cgroup firewalling");
+ warned = true;
+ }
+}
diff --git a/src/core/bpf-firewall.h b/src/core/bpf-firewall.h
new file mode 100644
index 0000000..08d7742
--- /dev/null
+++ b/src/core/bpf-firewall.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+
+#include "unit.h"
+
+enum {
+ BPF_FIREWALL_UNSUPPORTED = 0,
+ BPF_FIREWALL_SUPPORTED = 1,
+ BPF_FIREWALL_SUPPORTED_WITH_MULTI = 2,
+};
+
+int bpf_firewall_supported(void);
+
+int bpf_firewall_compile(Unit *u);
+int bpf_firewall_install(Unit *u);
+int bpf_firewall_load_custom(Unit *u);
+
+int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets);
+int bpf_firewall_reset_accounting(int map_fd);
+
+void emit_bpf_firewall_warning(Unit *u);
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
new file mode 100644
index 0000000..7dc6c20
--- /dev/null
+++ b/src/core/cgroup.c
@@ -0,0 +1,3778 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "blockdev-util.h"
+#include "bpf-devices.h"
+#include "bpf-firewall.h"
+#include "btrfs-util.h"
+#include "bus-error.h"
+#include "cgroup-setup.h"
+#include "cgroup-util.h"
+#include "cgroup.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "limits-util.h"
+#include "nulstr-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "procfs-util.h"
+#include "special.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "virt.h"
+
+#define CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
+
+/* Returns the log level to use when cgroup attribute writes fail. When an attribute is missing or we have access
+ * problems we downgrade to LOG_DEBUG. This is supposed to be nice to container managers and kernels which want to mask
+ * out specific attributes from us. */
+#define LOG_LEVEL_CGROUP_WRITE(r) (IN_SET(abs(r), ENOENT, EROFS, EACCES, EPERM) ? LOG_DEBUG : LOG_WARNING)
+
+uint64_t tasks_max_resolve(const TasksMax *tasks_max) {
+ if (tasks_max->scale == 0)
+ return tasks_max->value;
+
+ return system_tasks_max_scale(tasks_max->value, tasks_max->scale);
+}
+
+bool manager_owns_host_root_cgroup(Manager *m) {
+ assert(m);
+
+ /* Returns true if we are managing the root cgroup. Note that it isn't sufficient to just check whether the
+ * group root path equals "/" since that will also be the case if CLONE_NEWCGROUP is in the mix. Since there's
+ * appears to be no nice way to detect whether we are in a CLONE_NEWCGROUP namespace we instead just check if
+ * we run in any kind of container virtualization. */
+
+ if (MANAGER_IS_USER(m))
+ return false;
+
+ if (detect_container() > 0)
+ return false;
+
+ return empty_or_root(m->cgroup_root);
+}
+
+bool unit_has_host_root_cgroup(Unit *u) {
+ assert(u);
+
+ /* Returns whether this unit manages the root cgroup. This will return true if this unit is the root slice and
+ * the manager manages the root cgroup. */
+
+ if (!manager_owns_host_root_cgroup(u->manager))
+ return false;
+
+ return unit_has_name(u, SPECIAL_ROOT_SLICE);
+}
+
+static int set_attribute_and_warn(Unit *u, const char *controller, const char *attribute, const char *value) {
+ int r;
+
+ r = cg_set_attribute(controller, u->cgroup_path, attribute, value);
+ if (r < 0)
+ log_unit_full_errno(u, LOG_LEVEL_CGROUP_WRITE(r), r, "Failed to set '%s' attribute on '%s' to '%.*s': %m",
+ strna(attribute), isempty(u->cgroup_path) ? "/" : u->cgroup_path, (int) strcspn(value, NEWLINE), value);
+
+ return r;
+}
+
+static void cgroup_compat_warn(void) {
+ static bool cgroup_compat_warned = false;
+
+ if (cgroup_compat_warned)
+ return;
+
+ log_warning("cgroup compatibility translation between legacy and unified hierarchy settings activated. "
+ "See cgroup-compat debug messages for details.");
+
+ cgroup_compat_warned = true;
+}
+
+#define log_cgroup_compat(unit, fmt, ...) do { \
+ cgroup_compat_warn(); \
+ log_unit_debug(unit, "cgroup-compat: " fmt, ##__VA_ARGS__); \
+ } while (false)
+
+void cgroup_context_init(CGroupContext *c) {
+ assert(c);
+
+ /* Initialize everything to the kernel defaults. */
+
+ *c = (CGroupContext) {
+ .cpu_weight = CGROUP_WEIGHT_INVALID,
+ .startup_cpu_weight = CGROUP_WEIGHT_INVALID,
+ .cpu_quota_per_sec_usec = USEC_INFINITY,
+ .cpu_quota_period_usec = USEC_INFINITY,
+
+ .cpu_shares = CGROUP_CPU_SHARES_INVALID,
+ .startup_cpu_shares = CGROUP_CPU_SHARES_INVALID,
+
+ .memory_high = CGROUP_LIMIT_MAX,
+ .memory_max = CGROUP_LIMIT_MAX,
+ .memory_swap_max = CGROUP_LIMIT_MAX,
+
+ .memory_limit = CGROUP_LIMIT_MAX,
+
+ .io_weight = CGROUP_WEIGHT_INVALID,
+ .startup_io_weight = CGROUP_WEIGHT_INVALID,
+
+ .blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID,
+ .startup_blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID,
+
+ .tasks_max = TASKS_MAX_UNSET,
+
+ .moom_swap = MANAGED_OOM_AUTO,
+ .moom_mem_pressure = MANAGED_OOM_AUTO,
+ };
+}
+
+void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
+ assert(c);
+ assert(a);
+
+ LIST_REMOVE(device_allow, c->device_allow, a);
+ free(a->path);
+ free(a);
+}
+
+void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w) {
+ assert(c);
+ assert(w);
+
+ LIST_REMOVE(device_weights, c->io_device_weights, w);
+ free(w->path);
+ free(w);
+}
+
+void cgroup_context_free_io_device_latency(CGroupContext *c, CGroupIODeviceLatency *l) {
+ assert(c);
+ assert(l);
+
+ LIST_REMOVE(device_latencies, c->io_device_latencies, l);
+ free(l->path);
+ free(l);
+}
+
+void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l) {
+ assert(c);
+ assert(l);
+
+ LIST_REMOVE(device_limits, c->io_device_limits, l);
+ free(l->path);
+ free(l);
+}
+
+void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
+ assert(c);
+ assert(w);
+
+ LIST_REMOVE(device_weights, c->blockio_device_weights, w);
+ free(w->path);
+ free(w);
+}
+
+void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
+ assert(c);
+ assert(b);
+
+ LIST_REMOVE(device_bandwidths, c->blockio_device_bandwidths, b);
+ free(b->path);
+ free(b);
+}
+
+void cgroup_context_done(CGroupContext *c) {
+ assert(c);
+
+ while (c->io_device_weights)
+ cgroup_context_free_io_device_weight(c, c->io_device_weights);
+
+ while (c->io_device_latencies)
+ cgroup_context_free_io_device_latency(c, c->io_device_latencies);
+
+ while (c->io_device_limits)
+ cgroup_context_free_io_device_limit(c, c->io_device_limits);
+
+ while (c->blockio_device_weights)
+ cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
+
+ while (c->blockio_device_bandwidths)
+ cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
+
+ while (c->device_allow)
+ cgroup_context_free_device_allow(c, c->device_allow);
+
+ c->ip_address_allow = ip_address_access_free_all(c->ip_address_allow);
+ c->ip_address_deny = ip_address_access_free_all(c->ip_address_deny);
+
+ c->ip_filters_ingress = strv_free(c->ip_filters_ingress);
+ c->ip_filters_egress = strv_free(c->ip_filters_egress);
+
+ cpu_set_reset(&c->cpuset_cpus);
+ cpu_set_reset(&c->cpuset_mems);
+}
+
+static int unit_get_kernel_memory_limit(Unit *u, const char *file, uint64_t *ret) {
+ assert(u);
+
+ if (!u->cgroup_realized)
+ return -EOWNERDEAD;
+
+ return cg_get_attribute_as_uint64("memory", u->cgroup_path, file, ret);
+}
+
+static int unit_compare_memory_limit(Unit *u, const char *property_name, uint64_t *ret_unit_value, uint64_t *ret_kernel_value) {
+ CGroupContext *c;
+ CGroupMask m;
+ const char *file;
+ uint64_t unit_value;
+ int r;
+
+ /* Compare kernel memcg configuration against our internal systemd state. Unsupported (and will
+ * return -ENODATA) on cgroup v1.
+ *
+ * Returns:
+ *
+ * <0: On error.
+ * 0: If the kernel memory setting doesn't match our configuration.
+ * >0: If the kernel memory setting matches our configuration.
+ *
+ * The following values are only guaranteed to be populated on return >=0:
+ *
+ * - ret_unit_value will contain our internal expected value for the unit, page-aligned.
+ * - ret_kernel_value will contain the actual value presented by the kernel. */
+
+ assert(u);
+
+ r = cg_all_unified();
+ if (r < 0)
+ return log_debug_errno(r, "Failed to determine cgroup hierarchy version: %m");
+
+ /* Unsupported on v1.
+ *
+ * We don't return ENOENT, since that could actually mask a genuine problem where somebody else has
+ * silently masked the controller. */
+ if (r == 0)
+ return -ENODATA;
+
+ /* The root slice doesn't have any controller files, so we can't compare anything. */
+ if (unit_has_name(u, SPECIAL_ROOT_SLICE))
+ return -ENODATA;
+
+ /* It's possible to have MemoryFoo set without systemd wanting to have the memory controller enabled,
+ * for example, in the case of DisableControllers= or cgroup_disable on the kernel command line. To
+ * avoid specious errors in these scenarios, check that we even expect the memory controller to be
+ * enabled at all. */
+ m = unit_get_target_mask(u);
+ if (!FLAGS_SET(m, CGROUP_MASK_MEMORY))
+ return -ENODATA;
+
+ c = unit_get_cgroup_context(u);
+ assert(c);
+
+ if (streq(property_name, "MemoryLow")) {
+ unit_value = unit_get_ancestor_memory_low(u);
+ file = "memory.low";
+ } else if (streq(property_name, "MemoryMin")) {
+ unit_value = unit_get_ancestor_memory_min(u);
+ file = "memory.min";
+ } else if (streq(property_name, "MemoryHigh")) {
+ unit_value = c->memory_high;
+ file = "memory.high";
+ } else if (streq(property_name, "MemoryMax")) {
+ unit_value = c->memory_max;
+ file = "memory.max";
+ } else if (streq(property_name, "MemorySwapMax")) {
+ unit_value = c->memory_swap_max;
+ file = "memory.swap.max";
+ } else
+ return -EINVAL;
+
+ r = unit_get_kernel_memory_limit(u, file, ret_kernel_value);
+ if (r < 0)
+ return log_unit_debug_errno(u, r, "Failed to parse %s: %m", file);
+
+ /* It's intended (soon) in a future kernel to not expose cgroup memory limits rounded to page
+ * boundaries, but instead separate the user-exposed limit, which is whatever userspace told us, from
+ * our internal page-counting. To support those future kernels, just check the value itself first
+ * without any page-alignment. */
+ if (*ret_kernel_value == unit_value) {
+ *ret_unit_value = unit_value;
+ return 1;
+ }
+
+ /* The current kernel behaviour, by comparison, is that even if you write a particular number of
+ * bytes into a cgroup memory file, it always returns that number page-aligned down (since the kernel
+ * internally stores cgroup limits in pages). As such, so long as it aligns properly, everything is
+ * cricket. */
+ if (unit_value != CGROUP_LIMIT_MAX)
+ unit_value = PAGE_ALIGN_DOWN(unit_value);
+
+ *ret_unit_value = unit_value;
+
+ return *ret_kernel_value == *ret_unit_value;
+}
+
+#define FORMAT_CGROUP_DIFF_MAX 128
+
+static char *format_cgroup_memory_limit_comparison(char *buf, size_t l, Unit *u, const char *property_name) {
+ uint64_t kval, sval;
+ int r;
+
+ assert(u);
+ assert(buf);
+ assert(l > 0);
+
+ r = unit_compare_memory_limit(u, property_name, &sval, &kval);
+
+ /* memory.swap.max is special in that it relies on CONFIG_MEMCG_SWAP (and the default swapaccount=1).
+ * In the absence of reliably being able to detect whether memcg swap support is available or not,
+ * only complain if the error is not ENOENT. */
+ if (r > 0 || IN_SET(r, -ENODATA, -EOWNERDEAD) ||
+ (r == -ENOENT && streq(property_name, "MemorySwapMax"))) {
+ buf[0] = 0;
+ return buf;
+ }
+
+ if (r < 0) {
+ snprintf(buf, l, " (error getting kernel value: %s)", strerror_safe(r));
+ return buf;
+ }
+
+ snprintf(buf, l, " (different value in kernel: %" PRIu64 ")", kval);
+
+ return buf;
+}
+
+void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
+ _cleanup_free_ char *disable_controllers_str = NULL, *cpuset_cpus = NULL, *cpuset_mems = NULL;
+ CGroupIODeviceLimit *il;
+ CGroupIODeviceWeight *iw;
+ CGroupIODeviceLatency *l;
+ CGroupBlockIODeviceBandwidth *b;
+ CGroupBlockIODeviceWeight *w;
+ CGroupDeviceAllow *a;
+ CGroupContext *c;
+ IPAddressAccessItem *iaai;
+ char **path;
+ char q[FORMAT_TIMESPAN_MAX];
+ char v[FORMAT_TIMESPAN_MAX];
+
+ char cda[FORMAT_CGROUP_DIFF_MAX];
+ char cdb[FORMAT_CGROUP_DIFF_MAX];
+ char cdc[FORMAT_CGROUP_DIFF_MAX];
+ char cdd[FORMAT_CGROUP_DIFF_MAX];
+ char cde[FORMAT_CGROUP_DIFF_MAX];
+
+ assert(u);
+ assert(f);
+
+ c = unit_get_cgroup_context(u);
+ assert(c);
+
+ prefix = strempty(prefix);
+
+ (void) cg_mask_to_string(c->disable_controllers, &disable_controllers_str);
+
+ cpuset_cpus = cpu_set_to_range_string(&c->cpuset_cpus);
+ cpuset_mems = cpu_set_to_range_string(&c->cpuset_mems);
+
+ fprintf(f,
+ "%sCPUAccounting: %s\n"
+ "%sIOAccounting: %s\n"
+ "%sBlockIOAccounting: %s\n"
+ "%sMemoryAccounting: %s\n"
+ "%sTasksAccounting: %s\n"
+ "%sIPAccounting: %s\n"
+ "%sCPUWeight: %" PRIu64 "\n"
+ "%sStartupCPUWeight: %" PRIu64 "\n"
+ "%sCPUShares: %" PRIu64 "\n"
+ "%sStartupCPUShares: %" PRIu64 "\n"
+ "%sCPUQuotaPerSecSec: %s\n"
+ "%sCPUQuotaPeriodSec: %s\n"
+ "%sAllowedCPUs: %s\n"
+ "%sAllowedMemoryNodes: %s\n"
+ "%sIOWeight: %" PRIu64 "\n"
+ "%sStartupIOWeight: %" PRIu64 "\n"
+ "%sBlockIOWeight: %" PRIu64 "\n"
+ "%sStartupBlockIOWeight: %" PRIu64 "\n"
+ "%sDefaultMemoryMin: %" PRIu64 "\n"
+ "%sDefaultMemoryLow: %" PRIu64 "\n"
+ "%sMemoryMin: %" PRIu64 "%s\n"
+ "%sMemoryLow: %" PRIu64 "%s\n"
+ "%sMemoryHigh: %" PRIu64 "%s\n"
+ "%sMemoryMax: %" PRIu64 "%s\n"
+ "%sMemorySwapMax: %" PRIu64 "%s\n"
+ "%sMemoryLimit: %" PRIu64 "\n"
+ "%sTasksMax: %" PRIu64 "\n"
+ "%sDevicePolicy: %s\n"
+ "%sDisableControllers: %s\n"
+ "%sDelegate: %s\n"
+ "%sManagedOOMSwap: %s\n"
+ "%sManagedOOMMemoryPressure: %s\n"
+ "%sManagedOOMMemoryPressureLimitPercent: %d%%\n",
+ prefix, yes_no(c->cpu_accounting),
+ prefix, yes_no(c->io_accounting),
+ prefix, yes_no(c->blockio_accounting),
+ prefix, yes_no(c->memory_accounting),
+ prefix, yes_no(c->tasks_accounting),
+ prefix, yes_no(c->ip_accounting),
+ prefix, c->cpu_weight,
+ prefix, c->startup_cpu_weight,
+ prefix, c->cpu_shares,
+ prefix, c->startup_cpu_shares,
+ prefix, format_timespan(q, sizeof(q), c->cpu_quota_per_sec_usec, 1),
+ prefix, format_timespan(v, sizeof(v), c->cpu_quota_period_usec, 1),
+ prefix, strempty(cpuset_cpus),
+ prefix, strempty(cpuset_mems),
+ prefix, c->io_weight,
+ prefix, c->startup_io_weight,
+ prefix, c->blockio_weight,
+ prefix, c->startup_blockio_weight,
+ prefix, c->default_memory_min,
+ prefix, c->default_memory_low,
+ prefix, c->memory_min, format_cgroup_memory_limit_comparison(cda, sizeof(cda), u, "MemoryMin"),
+ prefix, c->memory_low, format_cgroup_memory_limit_comparison(cdb, sizeof(cdb), u, "MemoryLow"),
+ prefix, c->memory_high, format_cgroup_memory_limit_comparison(cdc, sizeof(cdc), u, "MemoryHigh"),
+ prefix, c->memory_max, format_cgroup_memory_limit_comparison(cdd, sizeof(cdd), u, "MemoryMax"),
+ prefix, c->memory_swap_max, format_cgroup_memory_limit_comparison(cde, sizeof(cde), u, "MemorySwapMax"),
+ prefix, c->memory_limit,
+ prefix, tasks_max_resolve(&c->tasks_max),
+ prefix, cgroup_device_policy_to_string(c->device_policy),
+ prefix, strempty(disable_controllers_str),
+ prefix, yes_no(c->delegate),
+ prefix, managed_oom_mode_to_string(c->moom_swap),
+ prefix, managed_oom_mode_to_string(c->moom_mem_pressure),
+ prefix, c->moom_mem_pressure_limit);
+
+ if (c->delegate) {
+ _cleanup_free_ char *t = NULL;
+
+ (void) cg_mask_to_string(c->delegate_controllers, &t);
+
+ fprintf(f, "%sDelegateControllers: %s\n",
+ prefix,
+ strempty(t));
+ }
+
+ LIST_FOREACH(device_allow, a, c->device_allow)
+ fprintf(f,
+ "%sDeviceAllow: %s %s%s%s\n",
+ prefix,
+ a->path,
+ a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
+
+ LIST_FOREACH(device_weights, iw, c->io_device_weights)
+ fprintf(f,
+ "%sIODeviceWeight: %s %" PRIu64 "\n",
+ prefix,
+ iw->path,
+ iw->weight);
+
+ LIST_FOREACH(device_latencies, l, c->io_device_latencies)
+ fprintf(f,
+ "%sIODeviceLatencyTargetSec: %s %s\n",
+ prefix,
+ l->path,
+ format_timespan(q, sizeof(q), l->target_usec, 1));
+
+ LIST_FOREACH(device_limits, il, c->io_device_limits) {
+ char buf[FORMAT_BYTES_MAX];
+ CGroupIOLimitType type;
+
+ for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++)
+ if (il->limits[type] != cgroup_io_limit_defaults[type])
+ fprintf(f,
+ "%s%s: %s %s\n",
+ prefix,
+ cgroup_io_limit_type_to_string(type),
+ il->path,
+ format_bytes(buf, sizeof(buf), il->limits[type]));
+ }
+
+ LIST_FOREACH(device_weights, w, c->blockio_device_weights)
+ fprintf(f,
+ "%sBlockIODeviceWeight: %s %" PRIu64,
+ prefix,
+ w->path,
+ w->weight);
+
+ LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
+ char buf[FORMAT_BYTES_MAX];
+
+ if (b->rbps != CGROUP_LIMIT_MAX)
+ fprintf(f,
+ "%sBlockIOReadBandwidth: %s %s\n",
+ prefix,
+ b->path,
+ format_bytes(buf, sizeof(buf), b->rbps));
+ if (b->wbps != CGROUP_LIMIT_MAX)
+ fprintf(f,
+ "%sBlockIOWriteBandwidth: %s %s\n",
+ prefix,
+ b->path,
+ format_bytes(buf, sizeof(buf), b->wbps));
+ }
+
+ LIST_FOREACH(items, iaai, c->ip_address_allow) {
+ _cleanup_free_ char *k = NULL;
+
+ (void) in_addr_to_string(iaai->family, &iaai->address, &k);
+ fprintf(f, "%sIPAddressAllow: %s/%u\n", prefix, strnull(k), iaai->prefixlen);
+ }
+
+ LIST_FOREACH(items, iaai, c->ip_address_deny) {
+ _cleanup_free_ char *k = NULL;
+
+ (void) in_addr_to_string(iaai->family, &iaai->address, &k);
+ fprintf(f, "%sIPAddressDeny: %s/%u\n", prefix, strnull(k), iaai->prefixlen);
+ }
+
+ STRV_FOREACH(path, c->ip_filters_ingress)
+ fprintf(f, "%sIPIngressFilterPath: %s\n", prefix, *path);
+
+ STRV_FOREACH(path, c->ip_filters_egress)
+ fprintf(f, "%sIPEgressFilterPath: %s\n", prefix, *path);
+}
+
+int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode) {
+ _cleanup_free_ CGroupDeviceAllow *a = NULL;
+ _cleanup_free_ char *d = NULL;
+
+ assert(c);
+ assert(dev);
+ assert(isempty(mode) || in_charset(mode, "rwm"));
+
+ a = new(CGroupDeviceAllow, 1);
+ if (!a)
+ return -ENOMEM;
+
+ d = strdup(dev);
+ if (!d)
+ return -ENOMEM;
+
+ *a = (CGroupDeviceAllow) {
+ .path = TAKE_PTR(d),
+ .r = isempty(mode) || strchr(mode, 'r'),
+ .w = isempty(mode) || strchr(mode, 'w'),
+ .m = isempty(mode) || strchr(mode, 'm'),
+ };
+
+ LIST_PREPEND(device_allow, c->device_allow, a);
+ TAKE_PTR(a);
+
+ return 0;
+}
+
+#define UNIT_DEFINE_ANCESTOR_MEMORY_LOOKUP(entry) \
+ uint64_t unit_get_ancestor_##entry(Unit *u) { \
+ CGroupContext *c; \
+ \
+ /* 1. Is entry set in this unit? If so, use that. \
+ * 2. Is the default for this entry set in any \
+ * ancestor? If so, use that. \
+ * 3. Otherwise, return CGROUP_LIMIT_MIN. */ \
+ \
+ assert(u); \
+ \
+ c = unit_get_cgroup_context(u); \
+ if (c && c->entry##_set) \
+ return c->entry; \
+ \
+ while ((u = UNIT_DEREF(u->slice))) { \
+ c = unit_get_cgroup_context(u); \
+ if (c && c->default_##entry##_set) \
+ return c->default_##entry; \
+ } \
+ \
+ /* We've reached the root, but nobody had default for \
+ * this entry set, so set it to the kernel default. */ \
+ return CGROUP_LIMIT_MIN; \
+}
+
+UNIT_DEFINE_ANCESTOR_MEMORY_LOOKUP(memory_low);
+UNIT_DEFINE_ANCESTOR_MEMORY_LOOKUP(memory_min);
+
+static void cgroup_xattr_apply(Unit *u) {
+ char ids[SD_ID128_STRING_MAX];
+ int r;
+
+ assert(u);
+
+ if (!MANAGER_IS_SYSTEM(u->manager))
+ return;
+
+ if (!sd_id128_is_null(u->invocation_id)) {
+ r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
+ "trusted.invocation_id",
+ sd_id128_to_string(u->invocation_id, ids), 32,
+ 0);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path);
+ }
+
+ if (unit_cgroup_delegate(u)) {
+ r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
+ "trusted.delegate",
+ "1", 1,
+ 0);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to set delegate flag on control group %s, ignoring: %m", u->cgroup_path);
+ } else {
+ r = cg_remove_xattr(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "trusted.delegate");
+ if (r != -ENODATA)
+ log_unit_debug_errno(u, r, "Failed to remove delegate flag on control group %s, ignoring: %m", u->cgroup_path);
+ }
+}
+
+static int lookup_block_device(const char *p, dev_t *ret) {
+ dev_t rdev, dev = 0;
+ mode_t mode;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ r = device_path_parse_major_minor(p, &mode, &rdev);
+ if (r == -ENODEV) { /* not a parsable device node, need to go to disk */
+ struct stat st;
+
+ if (stat(p, &st) < 0)
+ return log_warning_errno(errno, "Couldn't stat device '%s': %m", p);
+
+ mode = st.st_mode;
+ rdev = st.st_rdev;
+ dev = st.st_dev;
+ } else if (r < 0)
+ return log_warning_errno(r, "Failed to parse major/minor from path '%s': %m", p);
+
+ if (S_ISCHR(mode))
+ return log_warning_errno(SYNTHETIC_ERRNO(ENOTBLK),
+ "Device node '%s' is a character device, but block device needed.", p);
+ if (S_ISBLK(mode))
+ *ret = rdev;
+ else if (major(dev) != 0)
+ *ret = dev; /* If this is not a device node then use the block device this file is stored on */
+ else {
+ /* If this is btrfs, getting the backing block device is a bit harder */
+ r = btrfs_get_block_device(p, ret);
+ if (r == -ENOTTY)
+ return log_warning_errno(SYNTHETIC_ERRNO(ENODEV),
+ "'%s' is not a block device node, and file system block device cannot be determined or is not local.", p);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to determine block device backing btrfs file system '%s': %m", p);
+ }
+
+ /* If this is a LUKS/DM device, recursively try to get the originating block device */
+ while (block_get_originating(*ret, ret) > 0);
+
+ /* If this is a partition, try to get the originating block device */
+ (void) block_get_whole_disk(*ret, ret);
+ return 0;
+}
+
+static bool cgroup_context_has_cpu_weight(CGroupContext *c) {
+ return c->cpu_weight != CGROUP_WEIGHT_INVALID ||
+ c->startup_cpu_weight != CGROUP_WEIGHT_INVALID;
+}
+
+static bool cgroup_context_has_cpu_shares(CGroupContext *c) {
+ return c->cpu_shares != CGROUP_CPU_SHARES_INVALID ||
+ c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID;
+}
+
+static uint64_t cgroup_context_cpu_weight(CGroupContext *c, ManagerState state) {
+ if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+ c->startup_cpu_weight != CGROUP_WEIGHT_INVALID)
+ return c->startup_cpu_weight;
+ else if (c->cpu_weight != CGROUP_WEIGHT_INVALID)
+ return c->cpu_weight;
+ else
+ return CGROUP_WEIGHT_DEFAULT;
+}
+
+static uint64_t cgroup_context_cpu_shares(CGroupContext *c, ManagerState state) {
+ if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+ c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID)
+ return c->startup_cpu_shares;
+ else if (c->cpu_shares != CGROUP_CPU_SHARES_INVALID)
+ return c->cpu_shares;
+ else
+ return CGROUP_CPU_SHARES_DEFAULT;
+}
+
+usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution, usec_t max_period) {
+ /* kernel uses a minimum resolution of 1ms, so both period and (quota * period)
+ * need to be higher than that boundary. quota is specified in USecPerSec.
+ * Additionally, period must be at most max_period. */
+ assert(quota > 0);
+
+ return MIN(MAX3(period, resolution, resolution * USEC_PER_SEC / quota), max_period);
+}
+
+static usec_t cgroup_cpu_adjust_period_and_log(Unit *u, usec_t period, usec_t quota) {
+ usec_t new_period;
+
+ if (quota == USEC_INFINITY)
+ /* Always use default period for infinity quota. */
+ return CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC;
+
+ if (period == USEC_INFINITY)
+ /* Default period was requested. */
+ period = CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC;
+
+ /* Clamp to interval [1ms, 1s] */
+ new_period = cgroup_cpu_adjust_period(period, quota, USEC_PER_MSEC, USEC_PER_SEC);
+
+ if (new_period != period) {
+ char v[FORMAT_TIMESPAN_MAX];
+ log_unit_full(u, u->warned_clamping_cpu_quota_period ? LOG_DEBUG : LOG_WARNING,
+ "Clamping CPU interval for cpu.max: period is now %s",
+ format_timespan(v, sizeof(v), new_period, 1));
+ u->warned_clamping_cpu_quota_period = true;
+ }
+
+ return new_period;
+}
+
+static void cgroup_apply_unified_cpu_weight(Unit *u, uint64_t weight) {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 2];
+
+ xsprintf(buf, "%" PRIu64 "\n", weight);
+ (void) set_attribute_and_warn(u, "cpu", "cpu.weight", buf);
+}
+
+static void cgroup_apply_unified_cpu_quota(Unit *u, usec_t quota, usec_t period) {
+ char buf[(DECIMAL_STR_MAX(usec_t) + 1) * 2 + 1];
+
+ period = cgroup_cpu_adjust_period_and_log(u, period, quota);
+ if (quota != USEC_INFINITY)
+ xsprintf(buf, USEC_FMT " " USEC_FMT "\n",
+ MAX(quota * period / USEC_PER_SEC, USEC_PER_MSEC), period);
+ else
+ xsprintf(buf, "max " USEC_FMT "\n", period);
+ (void) set_attribute_and_warn(u, "cpu", "cpu.max", buf);
+}
+
+static void cgroup_apply_legacy_cpu_shares(Unit *u, uint64_t shares) {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 2];
+
+ xsprintf(buf, "%" PRIu64 "\n", shares);
+ (void) set_attribute_and_warn(u, "cpu", "cpu.shares", buf);
+}
+
+static void cgroup_apply_legacy_cpu_quota(Unit *u, usec_t quota, usec_t period) {
+ char buf[DECIMAL_STR_MAX(usec_t) + 2];
+
+ period = cgroup_cpu_adjust_period_and_log(u, period, quota);
+
+ xsprintf(buf, USEC_FMT "\n", period);
+ (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_period_us", buf);
+
+ if (quota != USEC_INFINITY) {
+ xsprintf(buf, USEC_FMT "\n", MAX(quota * period / USEC_PER_SEC, USEC_PER_MSEC));
+ (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_quota_us", buf);
+ } else
+ (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_quota_us", "-1\n");
+}
+
+static uint64_t cgroup_cpu_shares_to_weight(uint64_t shares) {
+ return CLAMP(shares * CGROUP_WEIGHT_DEFAULT / CGROUP_CPU_SHARES_DEFAULT,
+ CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX);
+}
+
+static uint64_t cgroup_cpu_weight_to_shares(uint64_t weight) {
+ return CLAMP(weight * CGROUP_CPU_SHARES_DEFAULT / CGROUP_WEIGHT_DEFAULT,
+ CGROUP_CPU_SHARES_MIN, CGROUP_CPU_SHARES_MAX);
+}
+
+static void cgroup_apply_unified_cpuset(Unit *u, const CPUSet *cpus, const char *name) {
+ _cleanup_free_ char *buf = NULL;
+
+ buf = cpu_set_to_range_string(cpus);
+ if (!buf) {
+ log_oom();
+ return;
+ }
+
+ (void) set_attribute_and_warn(u, "cpuset", name, buf);
+}
+
+static bool cgroup_context_has_io_config(CGroupContext *c) {
+ return c->io_accounting ||
+ c->io_weight != CGROUP_WEIGHT_INVALID ||
+ c->startup_io_weight != CGROUP_WEIGHT_INVALID ||
+ c->io_device_weights ||
+ c->io_device_latencies ||
+ c->io_device_limits;
+}
+
+static bool cgroup_context_has_blockio_config(CGroupContext *c) {
+ return c->blockio_accounting ||
+ c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ||
+ c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ||
+ c->blockio_device_weights ||
+ c->blockio_device_bandwidths;
+}
+
+static uint64_t cgroup_context_io_weight(CGroupContext *c, ManagerState state) {
+ if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+ c->startup_io_weight != CGROUP_WEIGHT_INVALID)
+ return c->startup_io_weight;
+ else if (c->io_weight != CGROUP_WEIGHT_INVALID)
+ return c->io_weight;
+ else
+ return CGROUP_WEIGHT_DEFAULT;
+}
+
+static uint64_t cgroup_context_blkio_weight(CGroupContext *c, ManagerState state) {
+ if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+ c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID)
+ return c->startup_blockio_weight;
+ else if (c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID)
+ return c->blockio_weight;
+ else
+ return CGROUP_BLKIO_WEIGHT_DEFAULT;
+}
+
+static uint64_t cgroup_weight_blkio_to_io(uint64_t blkio_weight) {
+ return CLAMP(blkio_weight * CGROUP_WEIGHT_DEFAULT / CGROUP_BLKIO_WEIGHT_DEFAULT,
+ CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX);
+}
+
+static uint64_t cgroup_weight_io_to_blkio(uint64_t io_weight) {
+ return CLAMP(io_weight * CGROUP_BLKIO_WEIGHT_DEFAULT / CGROUP_WEIGHT_DEFAULT,
+ CGROUP_BLKIO_WEIGHT_MIN, CGROUP_BLKIO_WEIGHT_MAX);
+}
+
+static void cgroup_apply_io_device_weight(Unit *u, const char *dev_path, uint64_t io_weight) {
+ char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1];
+ dev_t dev;
+ int r;
+
+ r = lookup_block_device(dev_path, &dev);
+ if (r < 0)
+ return;
+
+ xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), io_weight);
+ (void) set_attribute_and_warn(u, "io", "io.weight", buf);
+}
+
+static void cgroup_apply_blkio_device_weight(Unit *u, const char *dev_path, uint64_t blkio_weight) {
+ char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1];
+ dev_t dev;
+ int r;
+
+ r = lookup_block_device(dev_path, &dev);
+ if (r < 0)
+ return;
+
+ xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), blkio_weight);
+ (void) set_attribute_and_warn(u, "blkio", "blkio.weight_device", buf);
+}
+
+static void cgroup_apply_io_device_latency(Unit *u, const char *dev_path, usec_t target) {
+ char buf[DECIMAL_STR_MAX(dev_t)*2+2+7+DECIMAL_STR_MAX(uint64_t)+1];
+ dev_t dev;
+ int r;
+
+ r = lookup_block_device(dev_path, &dev);
+ if (r < 0)
+ return;
+
+ if (target != USEC_INFINITY)
+ xsprintf(buf, "%u:%u target=%" PRIu64 "\n", major(dev), minor(dev), target);
+ else
+ xsprintf(buf, "%u:%u target=max\n", major(dev), minor(dev));
+
+ (void) set_attribute_and_warn(u, "io", "io.latency", buf);
+}
+
+static void cgroup_apply_io_device_limit(Unit *u, const char *dev_path, uint64_t *limits) {
+ char limit_bufs[_CGROUP_IO_LIMIT_TYPE_MAX][DECIMAL_STR_MAX(uint64_t)];
+ char buf[DECIMAL_STR_MAX(dev_t)*2+2+(6+DECIMAL_STR_MAX(uint64_t)+1)*4];
+ CGroupIOLimitType type;
+ dev_t dev;
+ int r;
+
+ r = lookup_block_device(dev_path, &dev);
+ if (r < 0)
+ return;
+
+ for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++)
+ if (limits[type] != cgroup_io_limit_defaults[type])
+ xsprintf(limit_bufs[type], "%" PRIu64, limits[type]);
+ else
+ xsprintf(limit_bufs[type], "%s", limits[type] == CGROUP_LIMIT_MAX ? "max" : "0");
+
+ xsprintf(buf, "%u:%u rbps=%s wbps=%s riops=%s wiops=%s\n", major(dev), minor(dev),
+ limit_bufs[CGROUP_IO_RBPS_MAX], limit_bufs[CGROUP_IO_WBPS_MAX],
+ limit_bufs[CGROUP_IO_RIOPS_MAX], limit_bufs[CGROUP_IO_WIOPS_MAX]);
+ (void) set_attribute_and_warn(u, "io", "io.max", buf);
+}
+
+static void cgroup_apply_blkio_device_limit(Unit *u, const char *dev_path, uint64_t rbps, uint64_t wbps) {
+ char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1];
+ dev_t dev;
+ int r;
+
+ r = lookup_block_device(dev_path, &dev);
+ if (r < 0)
+ return;
+
+ sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), rbps);
+ (void) set_attribute_and_warn(u, "blkio", "blkio.throttle.read_bps_device", buf);
+
+ sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), wbps);
+ (void) set_attribute_and_warn(u, "blkio", "blkio.throttle.write_bps_device", buf);
+}
+
+static bool unit_has_unified_memory_config(Unit *u) {
+ CGroupContext *c;
+
+ assert(u);
+
+ c = unit_get_cgroup_context(u);
+ assert(c);
+
+ return unit_get_ancestor_memory_min(u) > 0 || unit_get_ancestor_memory_low(u) > 0 ||
+ c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX ||
+ c->memory_swap_max != CGROUP_LIMIT_MAX;
+}
+
+static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_t v) {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 1] = "max\n";
+
+ if (v != CGROUP_LIMIT_MAX)
+ xsprintf(buf, "%" PRIu64 "\n", v);
+
+ (void) set_attribute_and_warn(u, "memory", file, buf);
+}
+
+static void cgroup_apply_firewall(Unit *u) {
+ assert(u);
+
+ /* Best-effort: let's apply IP firewalling and/or accounting if that's enabled */
+
+ if (bpf_firewall_compile(u) < 0)
+ return;
+
+ (void) bpf_firewall_load_custom(u);
+ (void) bpf_firewall_install(u);
+}
+
+static int cgroup_apply_devices(Unit *u) {
+ _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
+ const char *path;
+ CGroupContext *c;
+ CGroupDeviceAllow *a;
+ CGroupDevicePolicy policy;
+ int r;
+
+ assert_se(c = unit_get_cgroup_context(u));
+ assert_se(path = u->cgroup_path);
+
+ policy = c->device_policy;
+
+ if (cg_all_unified() > 0) {
+ r = bpf_devices_cgroup_init(&prog, policy, c->device_allow);
+ if (r < 0)
+ return log_unit_warning_errno(u, r, "Failed to initialize device control bpf program: %m");
+
+ } else {
+ /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore
+ * EINVAL here. */
+
+ if (c->device_allow || policy != CGROUP_DEVICE_POLICY_AUTO)
+ r = cg_set_attribute("devices", path, "devices.deny", "a");
+ else
+ r = cg_set_attribute("devices", path, "devices.allow", "a");
+ if (r < 0)
+ log_unit_full_errno(u, IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to reset devices.allow/devices.deny: %m");
+ }
+
+ bool allow_list_static = policy == CGROUP_DEVICE_POLICY_CLOSED ||
+ (policy == CGROUP_DEVICE_POLICY_AUTO && c->device_allow);
+ if (allow_list_static)
+ (void) bpf_devices_allow_list_static(prog, path);
+
+ bool any = allow_list_static;
+ LIST_FOREACH(device_allow, a, c->device_allow) {
+ char acc[4], *val;
+ unsigned k = 0;
+
+ if (a->r)
+ acc[k++] = 'r';
+ if (a->w)
+ acc[k++] = 'w';
+ if (a->m)
+ acc[k++] = 'm';
+ if (k == 0)
+ continue;
+ acc[k++] = 0;
+
+ if (path_startswith(a->path, "/dev/"))
+ r = bpf_devices_allow_list_device(prog, path, a->path, acc);
+ else if ((val = startswith(a->path, "block-")))
+ r = bpf_devices_allow_list_major(prog, path, val, 'b', acc);
+ else if ((val = startswith(a->path, "char-")))
+ r = bpf_devices_allow_list_major(prog, path, val, 'c', acc);
+ else {
+ log_unit_debug(u, "Ignoring device '%s' while writing cgroup attribute.", a->path);
+ continue;
+ }
+
+ if (r >= 0)
+ any = true;
+ }
+
+ if (prog && !any) {
+ log_unit_warning_errno(u, SYNTHETIC_ERRNO(ENODEV), "No devices matched by device filter.");
+
+ /* The kernel verifier would reject a program we would build with the normal intro and outro
+ but no allow-listing rules (outro would contain an unreachable instruction for successful
+ return). */
+ policy = CGROUP_DEVICE_POLICY_STRICT;
+ }
+
+ r = bpf_devices_apply_policy(prog, policy, any, path, &u->bpf_device_control_installed);
+ if (r < 0) {
+ static bool warned = false;
+
+ log_full_errno(warned ? LOG_DEBUG : LOG_WARNING, r,
+ "Unit %s configures device ACL, but the local system doesn't seem to support the BPF-based device controller.\n"
+ "Proceeding WITHOUT applying ACL (all devices will be accessible)!\n"
+ "(This warning is only shown for the first loaded unit using device ACL.)", u->id);
+
+ warned = true;
+ }
+ return r;
+}
+
+static void cgroup_context_apply(
+ Unit *u,
+ CGroupMask apply_mask,
+ ManagerState state) {
+
+ const char *path;
+ CGroupContext *c;
+ bool is_host_root, is_local_root;
+ int r;
+
+ assert(u);
+
+ /* Nothing to do? Exit early! */
+ if (apply_mask == 0)
+ return;
+
+ /* Some cgroup attributes are not supported on the host root cgroup, hence silently ignore them here. And other
+ * attributes should only be managed for cgroups further down the tree. */
+ is_local_root = unit_has_name(u, SPECIAL_ROOT_SLICE);
+ is_host_root = unit_has_host_root_cgroup(u);
+
+ assert_se(c = unit_get_cgroup_context(u));
+ assert_se(path = u->cgroup_path);
+
+ if (is_local_root) /* Make sure we don't try to display messages with an empty path. */
+ path = "/";
+
+ /* We generally ignore errors caused by read-only mounted cgroup trees (assuming we are running in a container
+ * then), and missing cgroups, i.e. EROFS and ENOENT. */
+
+ /* In fully unified mode these attributes don't exist on the host cgroup root. On legacy the weights exist, but
+ * setting the weight makes very little sense on the host root cgroup, as there are no other cgroups at this
+ * level. The quota exists there too, but any attempt to write to it is refused with EINVAL. Inside of
+ * containers we want to leave control of these to the container manager (and if cgroup v2 delegation is used
+ * we couldn't even write to them if we wanted to). */
+ if ((apply_mask & CGROUP_MASK_CPU) && !is_local_root) {
+
+ if (cg_all_unified() > 0) {
+ uint64_t weight;
+
+ if (cgroup_context_has_cpu_weight(c))
+ weight = cgroup_context_cpu_weight(c, state);
+ else if (cgroup_context_has_cpu_shares(c)) {
+ uint64_t shares;
+
+ shares = cgroup_context_cpu_shares(c, state);
+ weight = cgroup_cpu_shares_to_weight(shares);
+
+ log_cgroup_compat(u, "Applying [Startup]CPUShares=%" PRIu64 " as [Startup]CPUWeight=%" PRIu64 " on %s",
+ shares, weight, path);
+ } else
+ weight = CGROUP_WEIGHT_DEFAULT;
+
+ cgroup_apply_unified_cpu_weight(u, weight);
+ cgroup_apply_unified_cpu_quota(u, c->cpu_quota_per_sec_usec, c->cpu_quota_period_usec);
+
+ } else {
+ uint64_t shares;
+
+ if (cgroup_context_has_cpu_weight(c)) {
+ uint64_t weight;
+
+ weight = cgroup_context_cpu_weight(c, state);
+ shares = cgroup_cpu_weight_to_shares(weight);
+
+ log_cgroup_compat(u, "Applying [Startup]CPUWeight=%" PRIu64 " as [Startup]CPUShares=%" PRIu64 " on %s",
+ weight, shares, path);
+ } else if (cgroup_context_has_cpu_shares(c))
+ shares = cgroup_context_cpu_shares(c, state);
+ else
+ shares = CGROUP_CPU_SHARES_DEFAULT;
+
+ cgroup_apply_legacy_cpu_shares(u, shares);
+ cgroup_apply_legacy_cpu_quota(u, c->cpu_quota_per_sec_usec, c->cpu_quota_period_usec);
+ }
+ }
+
+ if ((apply_mask & CGROUP_MASK_CPUSET) && !is_local_root) {
+ cgroup_apply_unified_cpuset(u, &c->cpuset_cpus, "cpuset.cpus");
+ cgroup_apply_unified_cpuset(u, &c->cpuset_mems, "cpuset.mems");
+ }
+
+ /* The 'io' controller attributes are not exported on the host's root cgroup (being a pure cgroup v2
+ * controller), and in case of containers we want to leave control of these attributes to the container manager
+ * (and we couldn't access that stuff anyway, even if we tried if proper delegation is used). */
+ if ((apply_mask & CGROUP_MASK_IO) && !is_local_root) {
+ char buf[8+DECIMAL_STR_MAX(uint64_t)+1];
+ bool has_io, has_blockio;
+ uint64_t weight;
+
+ has_io = cgroup_context_has_io_config(c);
+ has_blockio = cgroup_context_has_blockio_config(c);
+
+ if (has_io)
+ weight = cgroup_context_io_weight(c, state);
+ else if (has_blockio) {
+ uint64_t blkio_weight;
+
+ blkio_weight = cgroup_context_blkio_weight(c, state);
+ weight = cgroup_weight_blkio_to_io(blkio_weight);
+
+ log_cgroup_compat(u, "Applying [Startup]BlockIOWeight=%" PRIu64 " as [Startup]IOWeight=%" PRIu64,
+ blkio_weight, weight);
+ } else
+ weight = CGROUP_WEIGHT_DEFAULT;
+
+ xsprintf(buf, "default %" PRIu64 "\n", weight);
+ (void) set_attribute_and_warn(u, "io", "io.weight", buf);
+
+ /* FIXME: drop this when distro kernels properly support BFQ through "io.weight"
+ * See also: https://github.com/systemd/systemd/pull/13335 */
+ xsprintf(buf, "%" PRIu64 "\n", weight);
+ (void) set_attribute_and_warn(u, "io", "io.bfq.weight", buf);
+
+ if (has_io) {
+ CGroupIODeviceLatency *latency;
+ CGroupIODeviceLimit *limit;
+ CGroupIODeviceWeight *w;
+
+ LIST_FOREACH(device_weights, w, c->io_device_weights)
+ cgroup_apply_io_device_weight(u, w->path, w->weight);
+
+ LIST_FOREACH(device_limits, limit, c->io_device_limits)
+ cgroup_apply_io_device_limit(u, limit->path, limit->limits);
+
+ LIST_FOREACH(device_latencies, latency, c->io_device_latencies)
+ cgroup_apply_io_device_latency(u, latency->path, latency->target_usec);
+
+ } else if (has_blockio) {
+ CGroupBlockIODeviceWeight *w;
+ CGroupBlockIODeviceBandwidth *b;
+
+ LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
+ weight = cgroup_weight_blkio_to_io(w->weight);
+
+ log_cgroup_compat(u, "Applying BlockIODeviceWeight=%" PRIu64 " as IODeviceWeight=%" PRIu64 " for %s",
+ w->weight, weight, w->path);
+
+ cgroup_apply_io_device_weight(u, w->path, weight);
+ }
+
+ LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
+ uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX];
+ CGroupIOLimitType type;
+
+ for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++)
+ limits[type] = cgroup_io_limit_defaults[type];
+
+ limits[CGROUP_IO_RBPS_MAX] = b->rbps;
+ limits[CGROUP_IO_WBPS_MAX] = b->wbps;
+
+ log_cgroup_compat(u, "Applying BlockIO{Read|Write}Bandwidth=%" PRIu64 " %" PRIu64 " as IO{Read|Write}BandwidthMax= for %s",
+ b->rbps, b->wbps, b->path);
+
+ cgroup_apply_io_device_limit(u, b->path, limits);
+ }
+ }
+ }
+
+ if (apply_mask & CGROUP_MASK_BLKIO) {
+ bool has_io, has_blockio;
+
+ has_io = cgroup_context_has_io_config(c);
+ has_blockio = cgroup_context_has_blockio_config(c);
+
+ /* Applying a 'weight' never makes sense for the host root cgroup, and for containers this should be
+ * left to our container manager, too. */
+ if (!is_local_root) {
+ char buf[DECIMAL_STR_MAX(uint64_t)+1];
+ uint64_t weight;
+
+ if (has_io) {
+ uint64_t io_weight;
+
+ io_weight = cgroup_context_io_weight(c, state);
+ weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state));
+
+ log_cgroup_compat(u, "Applying [Startup]IOWeight=%" PRIu64 " as [Startup]BlockIOWeight=%" PRIu64,
+ io_weight, weight);
+ } else if (has_blockio)
+ weight = cgroup_context_blkio_weight(c, state);
+ else
+ weight = CGROUP_BLKIO_WEIGHT_DEFAULT;
+
+ xsprintf(buf, "%" PRIu64 "\n", weight);
+ (void) set_attribute_and_warn(u, "blkio", "blkio.weight", buf);
+
+ /* FIXME: drop this when distro kernels properly support BFQ through "blkio.weight"
+ * See also: https://github.com/systemd/systemd/pull/13335 */
+ xsprintf(buf, "%" PRIu64 "\n", weight);
+ (void) set_attribute_and_warn(u, "blkio", "blkio.bfq.weight", buf);
+
+ if (has_io) {
+ CGroupIODeviceWeight *w;
+
+ LIST_FOREACH(device_weights, w, c->io_device_weights) {
+ weight = cgroup_weight_io_to_blkio(w->weight);
+
+ log_cgroup_compat(u, "Applying IODeviceWeight=%" PRIu64 " as BlockIODeviceWeight=%" PRIu64 " for %s",
+ w->weight, weight, w->path);
+
+ cgroup_apply_blkio_device_weight(u, w->path, weight);
+ }
+ } else if (has_blockio) {
+ CGroupBlockIODeviceWeight *w;
+
+ LIST_FOREACH(device_weights, w, c->blockio_device_weights)
+ cgroup_apply_blkio_device_weight(u, w->path, w->weight);
+ }
+ }
+
+ /* The bandwidth limits are something that make sense to be applied to the host's root but not container
+ * roots, as there we want the container manager to handle it */
+ if (is_host_root || !is_local_root) {
+ if (has_io) {
+ CGroupIODeviceLimit *l;
+
+ LIST_FOREACH(device_limits, l, c->io_device_limits) {
+ log_cgroup_compat(u, "Applying IO{Read|Write}Bandwidth=%" PRIu64 " %" PRIu64 " as BlockIO{Read|Write}BandwidthMax= for %s",
+ l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX], l->path);
+
+ cgroup_apply_blkio_device_limit(u, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX]);
+ }
+ } else if (has_blockio) {
+ CGroupBlockIODeviceBandwidth *b;
+
+ LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths)
+ cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps);
+ }
+ }
+ }
+
+ /* In unified mode 'memory' attributes do not exist on the root cgroup. In legacy mode 'memory.limit_in_bytes'
+ * exists on the root cgroup, but any writes to it are refused with EINVAL. And if we run in a container we
+ * want to leave control to the container manager (and if proper cgroup v2 delegation is used we couldn't even
+ * write to this if we wanted to.) */
+ if ((apply_mask & CGROUP_MASK_MEMORY) && !is_local_root) {
+
+ if (cg_all_unified() > 0) {
+ uint64_t max, swap_max = CGROUP_LIMIT_MAX;
+
+ if (unit_has_unified_memory_config(u)) {
+ max = c->memory_max;
+ swap_max = c->memory_swap_max;
+ } else {
+ max = c->memory_limit;
+
+ if (max != CGROUP_LIMIT_MAX)
+ log_cgroup_compat(u, "Applying MemoryLimit=%" PRIu64 " as MemoryMax=", max);
+ }
+
+ cgroup_apply_unified_memory_limit(u, "memory.min", unit_get_ancestor_memory_min(u));
+ cgroup_apply_unified_memory_limit(u, "memory.low", unit_get_ancestor_memory_low(u));
+ cgroup_apply_unified_memory_limit(u, "memory.high", c->memory_high);
+ cgroup_apply_unified_memory_limit(u, "memory.max", max);
+ cgroup_apply_unified_memory_limit(u, "memory.swap.max", swap_max);
+
+ (void) set_attribute_and_warn(u, "memory", "memory.oom.group", one_zero(c->memory_oom_group));
+
+ } else {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 1];
+ uint64_t val;
+
+ if (unit_has_unified_memory_config(u)) {
+ val = c->memory_max;
+ log_cgroup_compat(u, "Applying MemoryMax=%" PRIi64 " as MemoryLimit=", val);
+ } else
+ val = c->memory_limit;
+
+ if (val == CGROUP_LIMIT_MAX)
+ strncpy(buf, "-1\n", sizeof(buf));
+ else
+ xsprintf(buf, "%" PRIu64 "\n", val);
+
+ (void) set_attribute_and_warn(u, "memory", "memory.limit_in_bytes", buf);
+ }
+ }
+
+ /* On cgroup v2 we can apply BPF everywhere. On cgroup v1 we apply it everywhere except for the root of
+ * containers, where we leave this to the manager */
+ if ((apply_mask & (CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES)) &&
+ (is_host_root || cg_all_unified() > 0 || !is_local_root))
+ (void) cgroup_apply_devices(u);
+
+ if (apply_mask & CGROUP_MASK_PIDS) {
+
+ if (is_host_root) {
+ /* So, the "pids" controller does not expose anything on the root cgroup, in order not to
+ * replicate knobs exposed elsewhere needlessly. We abstract this away here however, and when
+ * the knobs of the root cgroup are modified propagate this to the relevant sysctls. There's a
+ * non-obvious asymmetry however: unlike the cgroup properties we don't really want to take
+ * exclusive ownership of the sysctls, but we still want to honour things if the user sets
+ * limits. Hence we employ sort of a one-way strategy: when the user sets a bounded limit
+ * through us it counts. When the user afterwards unsets it again (i.e. sets it to unbounded)
+ * it also counts. But if the user never set a limit through us (i.e. we are the default of
+ * "unbounded") we leave things unmodified. For this we manage a global boolean that we turn on
+ * the first time we set a limit. Note that this boolean is flushed out on manager reload,
+ * which is desirable so that there's an official way to release control of the sysctl from
+ * systemd: set the limit to unbounded and reload. */
+
+ if (tasks_max_isset(&c->tasks_max)) {
+ u->manager->sysctl_pid_max_changed = true;
+ r = procfs_tasks_set_limit(tasks_max_resolve(&c->tasks_max));
+ } else if (u->manager->sysctl_pid_max_changed)
+ r = procfs_tasks_set_limit(TASKS_MAX);
+ else
+ r = 0;
+ if (r < 0)
+ log_unit_full_errno(u, LOG_LEVEL_CGROUP_WRITE(r), r,
+ "Failed to write to tasks limit sysctls: %m");
+ }
+
+ /* The attribute itself is not available on the host root cgroup, and in the container case we want to
+ * leave it for the container manager. */
+ if (!is_local_root) {
+ if (tasks_max_isset(&c->tasks_max)) {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 1];
+
+ xsprintf(buf, "%" PRIu64 "\n", tasks_max_resolve(&c->tasks_max));
+ (void) set_attribute_and_warn(u, "pids", "pids.max", buf);
+ } else
+ (void) set_attribute_and_warn(u, "pids", "pids.max", "max\n");
+ }
+ }
+
+ if (apply_mask & CGROUP_MASK_BPF_FIREWALL)
+ cgroup_apply_firewall(u);
+}
+
+static bool unit_get_needs_bpf_firewall(Unit *u) {
+ CGroupContext *c;
+ Unit *p;
+ assert(u);
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return false;
+
+ if (c->ip_accounting ||
+ c->ip_address_allow ||
+ c->ip_address_deny ||
+ c->ip_filters_ingress ||
+ c->ip_filters_egress)
+ return true;
+
+ /* If any parent slice has an IP access list defined, it applies too */
+ for (p = UNIT_DEREF(u->slice); p; p = UNIT_DEREF(p->slice)) {
+ c = unit_get_cgroup_context(p);
+ if (!c)
+ return false;
+
+ if (c->ip_address_allow ||
+ c->ip_address_deny)
+ return true;
+ }
+
+ return false;
+}
+
+static CGroupMask unit_get_cgroup_mask(Unit *u) {
+ CGroupMask mask = 0;
+ CGroupContext *c;
+
+ assert(u);
+
+ c = unit_get_cgroup_context(u);
+
+ assert(c);
+
+ /* Figure out which controllers we need, based on the cgroup context object */
+
+ if (c->cpu_accounting)
+ mask |= get_cpu_accounting_mask();
+
+ if (cgroup_context_has_cpu_weight(c) ||
+ cgroup_context_has_cpu_shares(c) ||
+ c->cpu_quota_per_sec_usec != USEC_INFINITY)
+ mask |= CGROUP_MASK_CPU;
+
+ if (c->cpuset_cpus.set || c->cpuset_mems.set)
+ mask |= CGROUP_MASK_CPUSET;
+
+ if (cgroup_context_has_io_config(c) || cgroup_context_has_blockio_config(c))
+ mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;
+
+ if (c->memory_accounting ||
+ c->memory_limit != CGROUP_LIMIT_MAX ||
+ unit_has_unified_memory_config(u))
+ mask |= CGROUP_MASK_MEMORY;
+
+ if (c->device_allow ||
+ c->device_policy != CGROUP_DEVICE_POLICY_AUTO)
+ mask |= CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES;
+
+ if (c->tasks_accounting ||
+ tasks_max_isset(&c->tasks_max))
+ mask |= CGROUP_MASK_PIDS;
+
+ return CGROUP_MASK_EXTEND_JOINED(mask);
+}
+
+static CGroupMask unit_get_bpf_mask(Unit *u) {
+ CGroupMask mask = 0;
+
+ /* Figure out which controllers we need, based on the cgroup context, possibly taking into account children
+ * too. */
+
+ if (unit_get_needs_bpf_firewall(u))
+ mask |= CGROUP_MASK_BPF_FIREWALL;
+
+ return mask;
+}
+
+CGroupMask unit_get_own_mask(Unit *u) {
+ CGroupContext *c;
+
+ /* Returns the mask of controllers the unit needs for itself. If a unit is not properly loaded, return an empty
+ * mask, as we shouldn't reflect it in the cgroup hierarchy then. */
+
+ if (u->load_state != UNIT_LOADED)
+ return 0;
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return 0;
+
+ return unit_get_cgroup_mask(u) | unit_get_bpf_mask(u) | unit_get_delegate_mask(u);
+}
+
+CGroupMask unit_get_delegate_mask(Unit *u) {
+ CGroupContext *c;
+
+ /* If delegation is turned on, then turn on selected controllers, unless we are on the legacy hierarchy and the
+ * process we fork into is known to drop privileges, and hence shouldn't get access to the controllers.
+ *
+ * Note that on the unified hierarchy it is safe to delegate controllers to unprivileged services. */
+
+ if (!unit_cgroup_delegate(u))
+ return 0;
+
+ if (cg_all_unified() <= 0) {
+ ExecContext *e;
+
+ e = unit_get_exec_context(u);
+ if (e && !exec_context_maintains_privileges(e))
+ return 0;
+ }
+
+ assert_se(c = unit_get_cgroup_context(u));
+ return CGROUP_MASK_EXTEND_JOINED(c->delegate_controllers);
+}
+
+static CGroupMask unit_get_subtree_mask(Unit *u) {
+
+ /* Returns the mask of this subtree, meaning of the group
+ * itself and its children. */
+
+ return unit_get_own_mask(u) | unit_get_members_mask(u);
+}
+
+CGroupMask unit_get_members_mask(Unit *u) {
+ assert(u);
+
+ /* Returns the mask of controllers all of the unit's children require, merged */
+
+ if (u->cgroup_members_mask_valid)
+ return u->cgroup_members_mask; /* Use cached value if possible */
+
+ u->cgroup_members_mask = 0;
+
+ if (u->type == UNIT_SLICE) {
+ void *v;
+ Unit *member;
+
+ HASHMAP_FOREACH_KEY(v, member, u->dependencies[UNIT_BEFORE])
+ if (UNIT_DEREF(member->slice) == u)
+ u->cgroup_members_mask |= unit_get_subtree_mask(member); /* note that this calls ourselves again, for the children */
+ }
+
+ u->cgroup_members_mask_valid = true;
+ return u->cgroup_members_mask;
+}
+
+CGroupMask unit_get_siblings_mask(Unit *u) {
+ assert(u);
+
+ /* Returns the mask of controllers all of the unit's siblings
+ * require, i.e. the members mask of the unit's parent slice
+ * if there is one. */
+
+ if (UNIT_ISSET(u->slice))
+ return unit_get_members_mask(UNIT_DEREF(u->slice));
+
+ return unit_get_subtree_mask(u); /* we are the top-level slice */
+}
+
+static CGroupMask unit_get_disable_mask(Unit *u) {
+ CGroupContext *c;
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return 0;
+
+ return c->disable_controllers;
+}
+
+CGroupMask unit_get_ancestor_disable_mask(Unit *u) {
+ CGroupMask mask;
+
+ assert(u);
+ mask = unit_get_disable_mask(u);
+
+ /* Returns the mask of controllers which are marked as forcibly
+ * disabled in any ancestor unit or the unit in question. */
+
+ if (UNIT_ISSET(u->slice))
+ mask |= unit_get_ancestor_disable_mask(UNIT_DEREF(u->slice));
+
+ return mask;
+}
+
+CGroupMask unit_get_target_mask(Unit *u) {
+ CGroupMask mask;
+
+ /* This returns the cgroup mask of all controllers to enable
+ * for a specific cgroup, i.e. everything it needs itself,
+ * plus all that its children need, plus all that its siblings
+ * need. This is primarily useful on the legacy cgroup
+ * hierarchy, where we need to duplicate each cgroup in each
+ * hierarchy that shall be enabled for it. */
+
+ mask = unit_get_own_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
+
+ if (mask & CGROUP_MASK_BPF_FIREWALL & ~u->manager->cgroup_supported)
+ emit_bpf_firewall_warning(u);
+
+ mask &= u->manager->cgroup_supported;
+ mask &= ~unit_get_ancestor_disable_mask(u);
+
+ return mask;
+}
+
+CGroupMask unit_get_enable_mask(Unit *u) {
+ CGroupMask mask;
+
+ /* This returns the cgroup mask of all controllers to enable
+ * for the children of a specific cgroup. This is primarily
+ * useful for the unified cgroup hierarchy, where each cgroup
+ * controls which controllers are enabled for its children. */
+
+ mask = unit_get_members_mask(u);
+ mask &= u->manager->cgroup_supported;
+ mask &= ~unit_get_ancestor_disable_mask(u);
+
+ return mask;
+}
+
+void unit_invalidate_cgroup_members_masks(Unit *u) {
+ assert(u);
+
+ /* Recurse invalidate the member masks cache all the way up the tree */
+ u->cgroup_members_mask_valid = false;
+
+ if (UNIT_ISSET(u->slice))
+ unit_invalidate_cgroup_members_masks(UNIT_DEREF(u->slice));
+}
+
+const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask) {
+
+ /* Returns the realized cgroup path of the specified unit where all specified controllers are available. */
+
+ while (u) {
+
+ if (u->cgroup_path &&
+ u->cgroup_realized &&
+ FLAGS_SET(u->cgroup_realized_mask, mask))
+ return u->cgroup_path;
+
+ u = UNIT_DEREF(u->slice);
+ }
+
+ return NULL;
+}
+
+static const char *migrate_callback(CGroupMask mask, void *userdata) {
+ /* If not realized at all, migrate to root ("").
+ * It may happen if we're upgrading from older version that didn't clean up.
+ */
+ return strempty(unit_get_realized_cgroup_path(userdata, mask));
+}
+
+char *unit_default_cgroup_path(const Unit *u) {
+ _cleanup_free_ char *escaped = NULL, *slice = NULL;
+ int r;
+
+ assert(u);
+
+ if (unit_has_name(u, SPECIAL_ROOT_SLICE))
+ return strdup(u->manager->cgroup_root);
+
+ if (UNIT_ISSET(u->slice) && !unit_has_name(UNIT_DEREF(u->slice), SPECIAL_ROOT_SLICE)) {
+ r = cg_slice_to_path(UNIT_DEREF(u->slice)->id, &slice);
+ if (r < 0)
+ return NULL;
+ }
+
+ escaped = cg_escape(u->id);
+ if (!escaped)
+ return NULL;
+
+ return path_join(empty_to_root(u->manager->cgroup_root), slice, escaped);
+}
+
+int unit_set_cgroup_path(Unit *u, const char *path) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(u);
+
+ if (streq_ptr(u->cgroup_path, path))
+ return 0;
+
+ if (path) {
+ p = strdup(path);
+ if (!p)
+ return -ENOMEM;
+ }
+
+ if (p) {
+ r = hashmap_put(u->manager->cgroup_unit, p, u);
+ if (r < 0)
+ return r;
+ }
+
+ unit_release_cgroup(u);
+ u->cgroup_path = TAKE_PTR(p);
+
+ return 1;
+}
+
+int unit_watch_cgroup(Unit *u) {
+ _cleanup_free_ char *events = NULL;
+ int r;
+
+ assert(u);
+
+ /* Watches the "cgroups.events" attribute of this unit's cgroup for "empty" events, but only if
+ * cgroupv2 is available. */
+
+ if (!u->cgroup_path)
+ return 0;
+
+ if (u->cgroup_control_inotify_wd >= 0)
+ return 0;
+
+ /* Only applies to the unified hierarchy */
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether the name=systemd hierarchy is unified: %m");
+ if (r == 0)
+ return 0;
+
+ /* No point in watch the top-level slice, it's never going to run empty. */
+ if (unit_has_name(u, SPECIAL_ROOT_SLICE))
+ return 0;
+
+ r = hashmap_ensure_allocated(&u->manager->cgroup_control_inotify_wd_unit, &trivial_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "cgroup.events", &events);
+ if (r < 0)
+ return log_oom();
+
+ u->cgroup_control_inotify_wd = inotify_add_watch(u->manager->cgroup_inotify_fd, events, IN_MODIFY);
+ if (u->cgroup_control_inotify_wd < 0) {
+
+ if (errno == ENOENT) /* If the directory is already gone we don't need to track it, so this
+ * is not an error */
+ return 0;
+
+ return log_unit_error_errno(u, errno, "Failed to add control inotify watch descriptor for control group %s: %m", u->cgroup_path);
+ }
+
+ r = hashmap_put(u->manager->cgroup_control_inotify_wd_unit, INT_TO_PTR(u->cgroup_control_inotify_wd), u);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to add control inotify watch descriptor to hash map: %m");
+
+ return 0;
+}
+
+int unit_watch_cgroup_memory(Unit *u) {
+ _cleanup_free_ char *events = NULL;
+ CGroupContext *c;
+ int r;
+
+ assert(u);
+
+ /* Watches the "memory.events" attribute of this unit's cgroup for "oom_kill" events, but only if
+ * cgroupv2 is available. */
+
+ if (!u->cgroup_path)
+ return 0;
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return 0;
+
+ /* The "memory.events" attribute is only available if the memory controller is on. Let's hence tie
+ * this to memory accounting, in a way watching for OOM kills is a form of memory accounting after
+ * all. */
+ if (!c->memory_accounting)
+ return 0;
+
+ /* Don't watch inner nodes, as the kernel doesn't report oom_kill events recursively currently, and
+ * we also don't want to generate a log message for each parent cgroup of a process. */
+ if (u->type == UNIT_SLICE)
+ return 0;
+
+ if (u->cgroup_memory_inotify_wd >= 0)
+ return 0;
+
+ /* Only applies to the unified hierarchy */
+ r = cg_all_unified();
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether the memory controller is unified: %m");
+ if (r == 0)
+ return 0;
+
+ r = hashmap_ensure_allocated(&u->manager->cgroup_memory_inotify_wd_unit, &trivial_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "memory.events", &events);
+ if (r < 0)
+ return log_oom();
+
+ u->cgroup_memory_inotify_wd = inotify_add_watch(u->manager->cgroup_inotify_fd, events, IN_MODIFY);
+ if (u->cgroup_memory_inotify_wd < 0) {
+
+ if (errno == ENOENT) /* If the directory is already gone we don't need to track it, so this
+ * is not an error */
+ return 0;
+
+ return log_unit_error_errno(u, errno, "Failed to add memory inotify watch descriptor for control group %s: %m", u->cgroup_path);
+ }
+
+ r = hashmap_put(u->manager->cgroup_memory_inotify_wd_unit, INT_TO_PTR(u->cgroup_memory_inotify_wd), u);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to add memory inotify watch descriptor to hash map: %m");
+
+ return 0;
+}
+
+int unit_pick_cgroup_path(Unit *u) {
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ assert(u);
+
+ if (u->cgroup_path)
+ return 0;
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return -EINVAL;
+
+ path = unit_default_cgroup_path(u);
+ if (!path)
+ return log_oom();
+
+ r = unit_set_cgroup_path(u, path);
+ if (r == -EEXIST)
+ return log_unit_error_errno(u, r, "Control group %s exists already.", path);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to set unit's control group path to %s: %m", path);
+
+ return 0;
+}
+
+static int cg_v1_errno_to_log_level(int r) {
+ return r == -EROFS ? LOG_DEBUG : LOG_WARNING;
+}
+
+static int unit_update_cgroup(
+ Unit *u,
+ CGroupMask target_mask,
+ CGroupMask enable_mask,
+ ManagerState state) {
+
+ bool created, is_root_slice;
+ CGroupMask migrate_mask = 0;
+ int r;
+
+ assert(u);
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return 0;
+
+ /* Figure out our cgroup path */
+ r = unit_pick_cgroup_path(u);
+ if (r < 0)
+ return r;
+
+ /* First, create our own group */
+ r = cg_create_everywhere(u->manager->cgroup_supported, target_mask, u->cgroup_path);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to create cgroup %s: %m", u->cgroup_path);
+ created = r;
+
+ /* Start watching it */
+ (void) unit_watch_cgroup(u);
+ (void) unit_watch_cgroup_memory(u);
+
+
+ /* For v2 we preserve enabled controllers in delegated units, adjust others,
+ * for v1 we figure out which controller hierarchies need migration. */
+ if (created || !u->cgroup_realized || !unit_cgroup_delegate(u)) {
+ CGroupMask result_mask = 0;
+
+ /* Enable all controllers we need */
+ r = cg_enable_everywhere(u->manager->cgroup_supported, enable_mask, u->cgroup_path, &result_mask);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to enable/disable controllers on cgroup %s, ignoring: %m", u->cgroup_path);
+
+ /* Remember what's actually enabled now */
+ u->cgroup_enabled_mask = result_mask;
+
+ migrate_mask = u->cgroup_realized_mask ^ target_mask;
+ }
+
+ /* Keep track that this is now realized */
+ u->cgroup_realized = true;
+ u->cgroup_realized_mask = target_mask;
+
+ /* Migrate processes in controller hierarchies both downwards (enabling) and upwards (disabling).
+ *
+ * Unnecessary controller cgroups are trimmed (after emptied by upward migration).
+ * We perform migration also with whole slices for cases when users don't care about leave
+ * granularity. Since delegated_mask is subset of target mask, we won't trim slice subtree containing
+ * delegated units.
+ *
+ * If we're in an nspawn container and using legacy cgroups, the controller hierarchies are mounted
+ * read-only into the container. We skip migration/trim in this scenario since it would fail
+ * regardless with noisy "Read-only filesystem" warnings.
+ */
+ if (cg_all_unified() == 0) {
+ r = cg_migrate_v1_controllers(u->manager->cgroup_supported, migrate_mask, u->cgroup_path, migrate_callback, u);
+ if (r < 0)
+ log_unit_full_errno(
+ u,
+ cg_v1_errno_to_log_level(r),
+ r,
+ "Failed to migrate controller cgroups from %s, ignoring: %m",
+ u->cgroup_path);
+
+ is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
+ r = cg_trim_v1_controllers(u->manager->cgroup_supported, ~target_mask, u->cgroup_path, !is_root_slice);
+ if (r < 0)
+ log_unit_full_errno(
+ u,
+ cg_v1_errno_to_log_level(r),
+ r,
+ "Failed to delete controller cgroups %s, ignoring: %m",
+ u->cgroup_path);
+ }
+
+ /* Set attributes */
+ cgroup_context_apply(u, target_mask, state);
+ cgroup_xattr_apply(u);
+
+ return 0;
+}
+
+static int unit_attach_pid_to_cgroup_via_bus(Unit *u, pid_t pid, const char *suffix_path) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ char *pp;
+ int r;
+
+ assert(u);
+
+ if (MANAGER_IS_SYSTEM(u->manager))
+ return -EINVAL;
+
+ if (!u->manager->system_bus)
+ return -EIO;
+
+ if (!u->cgroup_path)
+ return -EINVAL;
+
+ /* Determine this unit's cgroup path relative to our cgroup root */
+ pp = path_startswith(u->cgroup_path, u->manager->cgroup_root);
+ if (!pp)
+ return -EINVAL;
+
+ pp = strjoina("/", pp, suffix_path);
+ path_simplify(pp, false);
+
+ r = sd_bus_call_method(u->manager->system_bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "AttachProcessesToUnit",
+ &error, NULL,
+ "ssau",
+ NULL /* empty unit name means client's unit, i.e. us */, pp, 1, (uint32_t) pid);
+ if (r < 0)
+ return log_unit_debug_errno(u, r, "Failed to attach unit process " PID_FMT " via the bus: %s", pid, bus_error_message(&error, r));
+
+ return 0;
+}
+
+int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) {
+ CGroupMask delegated_mask;
+ const char *p;
+ void *pidp;
+ int r, q;
+
+ assert(u);
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return -EINVAL;
+
+ if (set_isempty(pids))
+ return 0;
+
+ /* Load any custom firewall BPF programs here once to test if they are existing and actually loadable.
+ * Fail here early since later errors in the call chain unit_realize_cgroup to cgroup_context_apply are ignored. */
+ r = bpf_firewall_load_custom(u);
+ if (r < 0)
+ return r;
+
+ r = unit_realize_cgroup(u);
+ if (r < 0)
+ return r;
+
+ if (isempty(suffix_path))
+ p = u->cgroup_path;
+ else
+ p = prefix_roota(u->cgroup_path, suffix_path);
+
+ delegated_mask = unit_get_delegate_mask(u);
+
+ r = 0;
+ SET_FOREACH(pidp, pids) {
+ pid_t pid = PTR_TO_PID(pidp);
+ CGroupController c;
+
+ /* First, attach the PID to the main cgroup hierarchy */
+ q = cg_attach(SYSTEMD_CGROUP_CONTROLLER, p, pid);
+ if (q < 0) {
+ log_unit_debug_errno(u, q, "Couldn't move process " PID_FMT " to requested cgroup '%s': %m", pid, p);
+
+ if (MANAGER_IS_USER(u->manager) && ERRNO_IS_PRIVILEGE(q)) {
+ int z;
+
+ /* If we are in a user instance, and we can't move the process ourselves due to
+ * permission problems, let's ask the system instance about it instead. Since it's more
+ * privileged it might be able to move the process across the leaves of a subtree who's
+ * top node is not owned by us. */
+
+ z = unit_attach_pid_to_cgroup_via_bus(u, pid, suffix_path);
+ if (z < 0)
+ log_unit_debug_errno(u, z, "Couldn't move process " PID_FMT " to requested cgroup '%s' via the system bus either: %m", pid, p);
+ else
+ continue; /* When the bus thing worked via the bus we are fully done for this PID. */
+ }
+
+ if (r >= 0)
+ r = q; /* Remember first error */
+
+ continue;
+ }
+
+ q = cg_all_unified();
+ if (q < 0)
+ return q;
+ if (q > 0)
+ continue;
+
+ /* In the legacy hierarchy, attach the process to the request cgroup if possible, and if not to the
+ * innermost realized one */
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *realized;
+
+ if (!(u->manager->cgroup_supported & bit))
+ continue;
+
+ /* If this controller is delegated and realized, honour the caller's request for the cgroup suffix. */
+ if (delegated_mask & u->cgroup_realized_mask & bit) {
+ q = cg_attach(cgroup_controller_to_string(c), p, pid);
+ if (q >= 0)
+ continue; /* Success! */
+
+ log_unit_debug_errno(u, q, "Failed to attach PID " PID_FMT " to requested cgroup %s in controller %s, falling back to unit's cgroup: %m",
+ pid, p, cgroup_controller_to_string(c));
+ }
+
+ /* So this controller is either not delegate or realized, or something else weird happened. In
+ * that case let's attach the PID at least to the closest cgroup up the tree that is
+ * realized. */
+ realized = unit_get_realized_cgroup_path(u, bit);
+ if (!realized)
+ continue; /* Not even realized in the root slice? Then let's not bother */
+
+ q = cg_attach(cgroup_controller_to_string(c), realized, pid);
+ if (q < 0)
+ log_unit_debug_errno(u, q, "Failed to attach PID " PID_FMT " to realized cgroup %s in controller %s, ignoring: %m",
+ pid, realized, cgroup_controller_to_string(c));
+ }
+ }
+
+ return r;
+}
+
+static bool unit_has_mask_realized(
+ Unit *u,
+ CGroupMask target_mask,
+ CGroupMask enable_mask) {
+
+ assert(u);
+
+ /* Returns true if this unit is fully realized. We check four things:
+ *
+ * 1. Whether the cgroup was created at all
+ * 2. Whether the cgroup was created in all the hierarchies we need it to be created in (in case of cgroup v1)
+ * 3. Whether the cgroup has all the right controllers enabled (in case of cgroup v2)
+ * 4. Whether the invalidation mask is currently zero
+ *
+ * If you wonder why we mask the target realization and enable mask with CGROUP_MASK_V1/CGROUP_MASK_V2: note
+ * that there are three sets of bitmasks: CGROUP_MASK_V1 (for real cgroup v1 controllers), CGROUP_MASK_V2 (for
+ * real cgroup v2 controllers) and CGROUP_MASK_BPF (for BPF-based pseudo-controllers). Now, cgroup_realized_mask
+ * is only matters for cgroup v1 controllers, and cgroup_enabled_mask only used for cgroup v2, and if they
+ * differ in the others, we don't really care. (After all, the cgroup_enabled_mask tracks with controllers are
+ * enabled through cgroup.subtree_control, and since the BPF pseudo-controllers don't show up there, they
+ * simply don't matter. */
+
+ return u->cgroup_realized &&
+ ((u->cgroup_realized_mask ^ target_mask) & CGROUP_MASK_V1) == 0 &&
+ ((u->cgroup_enabled_mask ^ enable_mask) & CGROUP_MASK_V2) == 0 &&
+ u->cgroup_invalidated_mask == 0;
+}
+
+static bool unit_has_mask_disables_realized(
+ Unit *u,
+ CGroupMask target_mask,
+ CGroupMask enable_mask) {
+
+ assert(u);
+
+ /* Returns true if all controllers which should be disabled are indeed disabled.
+ *
+ * Unlike unit_has_mask_realized, we don't care what was enabled, only that anything we want to remove is
+ * already removed. */
+
+ return !u->cgroup_realized ||
+ (FLAGS_SET(u->cgroup_realized_mask, target_mask & CGROUP_MASK_V1) &&
+ FLAGS_SET(u->cgroup_enabled_mask, enable_mask & CGROUP_MASK_V2));
+}
+
+static bool unit_has_mask_enables_realized(
+ Unit *u,
+ CGroupMask target_mask,
+ CGroupMask enable_mask) {
+
+ assert(u);
+
+ /* Returns true if all controllers which should be enabled are indeed enabled.
+ *
+ * Unlike unit_has_mask_realized, we don't care about the controllers that are not present, only that anything
+ * we want to add is already added. */
+
+ return u->cgroup_realized &&
+ ((u->cgroup_realized_mask | target_mask) & CGROUP_MASK_V1) == (u->cgroup_realized_mask & CGROUP_MASK_V1) &&
+ ((u->cgroup_enabled_mask | enable_mask) & CGROUP_MASK_V2) == (u->cgroup_enabled_mask & CGROUP_MASK_V2);
+}
+
+static void unit_add_to_cgroup_realize_queue(Unit *u) {
+ assert(u);
+
+ if (u->in_cgroup_realize_queue)
+ return;
+
+ LIST_APPEND(cgroup_realize_queue, u->manager->cgroup_realize_queue, u);
+ u->in_cgroup_realize_queue = true;
+}
+
+static void unit_remove_from_cgroup_realize_queue(Unit *u) {
+ assert(u);
+
+ if (!u->in_cgroup_realize_queue)
+ return;
+
+ LIST_REMOVE(cgroup_realize_queue, u->manager->cgroup_realize_queue, u);
+ u->in_cgroup_realize_queue = false;
+}
+
+/* Controllers can only be enabled breadth-first, from the root of the
+ * hierarchy downwards to the unit in question. */
+static int unit_realize_cgroup_now_enable(Unit *u, ManagerState state) {
+ CGroupMask target_mask, enable_mask, new_target_mask, new_enable_mask;
+ int r;
+
+ assert(u);
+
+ /* First go deal with this unit's parent, or we won't be able to enable
+ * any new controllers at this layer. */
+ if (UNIT_ISSET(u->slice)) {
+ r = unit_realize_cgroup_now_enable(UNIT_DEREF(u->slice), state);
+ if (r < 0)
+ return r;
+ }
+
+ target_mask = unit_get_target_mask(u);
+ enable_mask = unit_get_enable_mask(u);
+
+ /* We can only enable in this direction, don't try to disable anything.
+ */
+ if (unit_has_mask_enables_realized(u, target_mask, enable_mask))
+ return 0;
+
+ new_target_mask = u->cgroup_realized_mask | target_mask;
+ new_enable_mask = u->cgroup_enabled_mask | enable_mask;
+
+ return unit_update_cgroup(u, new_target_mask, new_enable_mask, state);
+}
+
+/* Controllers can only be disabled depth-first, from the leaves of the
+ * hierarchy upwards to the unit in question. */
+static int unit_realize_cgroup_now_disable(Unit *u, ManagerState state) {
+ Unit *m;
+ void *v;
+
+ assert(u);
+
+ if (u->type != UNIT_SLICE)
+ return 0;
+
+ HASHMAP_FOREACH_KEY(v, m, u->dependencies[UNIT_BEFORE]) {
+ CGroupMask target_mask, enable_mask, new_target_mask, new_enable_mask;
+ int r;
+
+ if (UNIT_DEREF(m->slice) != u)
+ continue;
+
+ /* The cgroup for this unit might not actually be fully
+ * realised yet, in which case it isn't holding any controllers
+ * open anyway. */
+ if (!m->cgroup_realized)
+ continue;
+
+ /* We must disable those below us first in order to release the
+ * controller. */
+ if (m->type == UNIT_SLICE)
+ (void) unit_realize_cgroup_now_disable(m, state);
+
+ target_mask = unit_get_target_mask(m);
+ enable_mask = unit_get_enable_mask(m);
+
+ /* We can only disable in this direction, don't try to enable
+ * anything. */
+ if (unit_has_mask_disables_realized(m, target_mask, enable_mask))
+ continue;
+
+ new_target_mask = m->cgroup_realized_mask & target_mask;
+ new_enable_mask = m->cgroup_enabled_mask & enable_mask;
+
+ r = unit_update_cgroup(m, new_target_mask, new_enable_mask, state);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+/* Check if necessary controllers and attributes for a unit are in place.
+ *
+ * - If so, do nothing.
+ * - If not, create paths, move processes over, and set attributes.
+ *
+ * Controllers can only be *enabled* in a breadth-first way, and *disabled* in
+ * a depth-first way. As such the process looks like this:
+ *
+ * Suppose we have a cgroup hierarchy which looks like this:
+ *
+ * root
+ * / \
+ * / \
+ * / \
+ * a b
+ * / \ / \
+ * / \ / \
+ * c d e f
+ * / \ / \ / \ / \
+ * h i j k l m n o
+ *
+ * 1. We want to realise cgroup "d" now.
+ * 2. cgroup "a" has DisableControllers=cpu in the associated unit.
+ * 3. cgroup "k" just started requesting the memory controller.
+ *
+ * To make this work we must do the following in order:
+ *
+ * 1. Disable CPU controller in k, j
+ * 2. Disable CPU controller in d
+ * 3. Enable memory controller in root
+ * 4. Enable memory controller in a
+ * 5. Enable memory controller in d
+ * 6. Enable memory controller in k
+ *
+ * Notice that we need to touch j in one direction, but not the other. We also
+ * don't go beyond d when disabling -- it's up to "a" to get realized if it
+ * wants to disable further. The basic rules are therefore:
+ *
+ * - If you're disabling something, you need to realise all of the cgroups from
+ * your recursive descendants to the root. This starts from the leaves.
+ * - If you're enabling something, you need to realise from the root cgroup
+ * downwards, but you don't need to iterate your recursive descendants.
+ *
+ * Returns 0 on success and < 0 on failure. */
+static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
+ CGroupMask target_mask, enable_mask;
+ int r;
+
+ assert(u);
+
+ unit_remove_from_cgroup_realize_queue(u);
+
+ target_mask = unit_get_target_mask(u);
+ enable_mask = unit_get_enable_mask(u);
+
+ if (unit_has_mask_realized(u, target_mask, enable_mask))
+ return 0;
+
+ /* Disable controllers below us, if there are any */
+ r = unit_realize_cgroup_now_disable(u, state);
+ if (r < 0)
+ return r;
+
+ /* Enable controllers above us, if there are any */
+ if (UNIT_ISSET(u->slice)) {
+ r = unit_realize_cgroup_now_enable(UNIT_DEREF(u->slice), state);
+ if (r < 0)
+ return r;
+ }
+
+ /* Now actually deal with the cgroup we were trying to realise and set attributes */
+ r = unit_update_cgroup(u, target_mask, enable_mask, state);
+ if (r < 0)
+ return r;
+
+ /* Now, reset the invalidation mask */
+ u->cgroup_invalidated_mask = 0;
+ return 0;
+}
+
+unsigned manager_dispatch_cgroup_realize_queue(Manager *m) {
+ ManagerState state;
+ unsigned n = 0;
+ Unit *i;
+ int r;
+
+ assert(m);
+
+ state = manager_state(m);
+
+ while ((i = m->cgroup_realize_queue)) {
+ assert(i->in_cgroup_realize_queue);
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(i))) {
+ /* Maybe things changed, and the unit is not actually active anymore? */
+ unit_remove_from_cgroup_realize_queue(i);
+ continue;
+ }
+
+ r = unit_realize_cgroup_now(i, state);
+ if (r < 0)
+ log_warning_errno(r, "Failed to realize cgroups for queued unit %s, ignoring: %m", i->id);
+
+ n++;
+ }
+
+ return n;
+}
+
+void unit_add_family_to_cgroup_realize_queue(Unit *u) {
+ assert(u);
+ assert(u->type == UNIT_SLICE);
+
+ /* Family of a unit for is defined as (immediate) children of the unit and immediate children of all
+ * its ancestors.
+ *
+ * Ideally we would enqueue ancestor path only (bottom up). However, on cgroup-v1 scheduling becomes
+ * very weird if two units that own processes reside in the same slice, but one is realized in the
+ * "cpu" hierarchy and one is not (for example because one has CPUWeight= set and the other does
+ * not), because that means individual processes need to be scheduled against whole cgroups. Let's
+ * avoid this asymmetry by always ensuring that siblings of a unit are always realized in their v1
+ * controller hierarchies too (if unit requires the controller to be realized).
+ *
+ * The function must invalidate cgroup_members_mask of all ancestors in order to calculate up to date
+ * masks. */
+
+ do {
+ Unit *m;
+ void *v;
+
+ /* Children of u likely changed when we're called */
+ u->cgroup_members_mask_valid = false;
+
+ HASHMAP_FOREACH_KEY(v, m, u->dependencies[UNIT_BEFORE]) {
+ /* Skip units that have a dependency on the slice but aren't actually in it. */
+ if (UNIT_DEREF(m->slice) != u)
+ continue;
+
+ /* No point in doing cgroup application for units without active processes. */
+ if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m)))
+ continue;
+
+ /* We only enqueue siblings if they were realized once at least, in the main
+ * hierarchy. */
+ if (!m->cgroup_realized)
+ continue;
+
+ /* If the unit doesn't need any new controllers and has current ones realized, it
+ * doesn't need any changes. */
+ if (unit_has_mask_realized(m,
+ unit_get_target_mask(m),
+ unit_get_enable_mask(m)))
+ continue;
+
+ unit_add_to_cgroup_realize_queue(m);
+ }
+
+ /* Parent comes after children */
+ unit_add_to_cgroup_realize_queue(u);
+ } while ((u = UNIT_DEREF(u->slice)));
+}
+
+int unit_realize_cgroup(Unit *u) {
+ assert(u);
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return 0;
+
+ /* So, here's the deal: when realizing the cgroups for this unit, we need to first create all
+ * parents, but there's more actually: for the weight-based controllers we also need to make sure
+ * that all our siblings (i.e. units that are in the same slice as we are) have cgroups, too. On the
+ * other hand, when a controller is removed from realized set, it may become unnecessary in siblings
+ * and ancestors and they should be (de)realized too.
+ *
+ * This call will defer work on the siblings and derealized ancestors to the next event loop
+ * iteration and synchronously creates the parent cgroups (unit_realize_cgroup_now). */
+
+ if (UNIT_ISSET(u->slice))
+ unit_add_family_to_cgroup_realize_queue(UNIT_DEREF(u->slice));
+
+ /* And realize this one now (and apply the values) */
+ return unit_realize_cgroup_now(u, manager_state(u->manager));
+}
+
+void unit_release_cgroup(Unit *u) {
+ assert(u);
+
+ /* Forgets all cgroup details for this cgroup — but does *not* destroy the cgroup. This is hence OK to call
+ * when we close down everything for reexecution, where we really want to leave the cgroup in place. */
+
+ if (u->cgroup_path) {
+ (void) hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
+ u->cgroup_path = mfree(u->cgroup_path);
+ }
+
+ if (u->cgroup_control_inotify_wd >= 0) {
+ if (inotify_rm_watch(u->manager->cgroup_inotify_fd, u->cgroup_control_inotify_wd) < 0)
+ log_unit_debug_errno(u, errno, "Failed to remove cgroup control inotify watch %i for %s, ignoring: %m", u->cgroup_control_inotify_wd, u->id);
+
+ (void) hashmap_remove(u->manager->cgroup_control_inotify_wd_unit, INT_TO_PTR(u->cgroup_control_inotify_wd));
+ u->cgroup_control_inotify_wd = -1;
+ }
+
+ if (u->cgroup_memory_inotify_wd >= 0) {
+ if (inotify_rm_watch(u->manager->cgroup_inotify_fd, u->cgroup_memory_inotify_wd) < 0)
+ log_unit_debug_errno(u, errno, "Failed to remove cgroup memory inotify watch %i for %s, ignoring: %m", u->cgroup_memory_inotify_wd, u->id);
+
+ (void) hashmap_remove(u->manager->cgroup_memory_inotify_wd_unit, INT_TO_PTR(u->cgroup_memory_inotify_wd));
+ u->cgroup_memory_inotify_wd = -1;
+ }
+}
+
+bool unit_maybe_release_cgroup(Unit *u) {
+ int r;
+
+ assert(u);
+
+ if (!u->cgroup_path)
+ return true;
+
+ /* Don't release the cgroup if there are still processes under it. If we get notified later when all the
+ * processes exit (e.g. the processes were in D-state and exited after the unit was marked as failed)
+ * we need the cgroup paths to continue to be tracked by the manager so they can be looked up and cleaned
+ * up later. */
+ r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Error checking if the cgroup is recursively empty, ignoring: %m");
+ else if (r == 1) {
+ unit_release_cgroup(u);
+ return true;
+ }
+
+ return false;
+}
+
+void unit_prune_cgroup(Unit *u) {
+ int r;
+ bool is_root_slice;
+
+ assert(u);
+
+ /* Removes the cgroup, if empty and possible, and stops watching it. */
+
+ if (!u->cgroup_path)
+ return;
+
+ (void) unit_get_cpu_usage(u, NULL); /* Cache the last CPU usage value before we destroy the cgroup */
+
+ is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
+
+ r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice);
+ if (r < 0)
+ /* One reason we could have failed here is, that the cgroup still contains a process.
+ * However, if the cgroup becomes removable at a later time, it might be removed when
+ * the containing slice is stopped. So even if we failed now, this unit shouldn't assume
+ * that the cgroup is still realized the next time it is started. Do not return early
+ * on error, continue cleanup. */
+ log_unit_full_errno(u, r == -EBUSY ? LOG_DEBUG : LOG_WARNING, r, "Failed to destroy cgroup %s, ignoring: %m", u->cgroup_path);
+
+ if (is_root_slice)
+ return;
+
+ if (!unit_maybe_release_cgroup(u)) /* Returns true if the cgroup was released */
+ return;
+
+ u->cgroup_realized = false;
+ u->cgroup_realized_mask = 0;
+ u->cgroup_enabled_mask = 0;
+
+ u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed);
+}
+
+int unit_search_main_pid(Unit *u, pid_t *ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ pid_t pid = 0, npid;
+ int r;
+
+ assert(u);
+ assert(ret);
+
+ if (!u->cgroup_path)
+ return -ENXIO;
+
+ r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f);
+ if (r < 0)
+ return r;
+
+ while (cg_read_pid(f, &npid) > 0) {
+
+ if (npid == pid)
+ continue;
+
+ if (pid_is_my_child(npid) == 0)
+ continue;
+
+ if (pid != 0)
+ /* Dang, there's more than one daemonized PID
+ in this group, so we don't know what process
+ is the main process. */
+
+ return -ENODATA;
+
+ pid = npid;
+ }
+
+ *ret = pid;
+ return 0;
+}
+
+static int unit_watch_pids_in_path(Unit *u, const char *path) {
+ _cleanup_closedir_ DIR *d = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int ret = 0, r;
+
+ assert(u);
+ assert(path);
+
+ r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, path, &f);
+ if (r < 0)
+ ret = r;
+ else {
+ pid_t pid;
+
+ while ((r = cg_read_pid(f, &pid)) > 0) {
+ r = unit_watch_pid(u, pid, false);
+ if (r < 0 && ret >= 0)
+ ret = r;
+ }
+
+ if (r < 0 && ret >= 0)
+ ret = r;
+ }
+
+ r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
+ if (r < 0) {
+ if (ret >= 0)
+ ret = r;
+ } else {
+ char *fn;
+
+ while ((r = cg_read_subgroup(d, &fn)) > 0) {
+ _cleanup_free_ char *p = NULL;
+
+ p = path_join(empty_to_root(path), fn);
+ free(fn);
+
+ if (!p)
+ return -ENOMEM;
+
+ r = unit_watch_pids_in_path(u, p);
+ if (r < 0 && ret >= 0)
+ ret = r;
+ }
+
+ if (r < 0 && ret >= 0)
+ ret = r;
+ }
+
+ return ret;
+}
+
+int unit_synthesize_cgroup_empty_event(Unit *u) {
+ int r;
+
+ assert(u);
+
+ /* Enqueue a synthetic cgroup empty event if this unit doesn't watch any PIDs anymore. This is compatibility
+ * support for non-unified systems where notifications aren't reliable, and hence need to take whatever we can
+ * get as notification source as soon as we stopped having any useful PIDs to watch for. */
+
+ if (!u->cgroup_path)
+ return -ENOENT;
+
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return r;
+ if (r > 0) /* On unified we have reliable notifications, and don't need this */
+ return 0;
+
+ if (!set_isempty(u->pids))
+ return 0;
+
+ unit_add_to_cgroup_empty_queue(u);
+ return 0;
+}
+
+int unit_watch_all_pids(Unit *u) {
+ int r;
+
+ assert(u);
+
+ /* Adds all PIDs from our cgroup to the set of PIDs we
+ * watch. This is a fallback logic for cases where we do not
+ * get reliable cgroup empty notifications: we try to use
+ * SIGCHLD as replacement. */
+
+ if (!u->cgroup_path)
+ return -ENOENT;
+
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return r;
+ if (r > 0) /* On unified we can use proper notifications */
+ return 0;
+
+ return unit_watch_pids_in_path(u, u->cgroup_path);
+}
+
+static int on_cgroup_empty_event(sd_event_source *s, void *userdata) {
+ Manager *m = userdata;
+ Unit *u;
+ int r;
+
+ assert(s);
+ assert(m);
+
+ u = m->cgroup_empty_queue;
+ if (!u)
+ return 0;
+
+ assert(u->in_cgroup_empty_queue);
+ u->in_cgroup_empty_queue = false;
+ LIST_REMOVE(cgroup_empty_queue, m->cgroup_empty_queue, u);
+
+ if (m->cgroup_empty_queue) {
+ /* More stuff queued, let's make sure we remain enabled */
+ r = sd_event_source_set_enabled(s, SD_EVENT_ONESHOT);
+ if (r < 0)
+ log_debug_errno(r, "Failed to reenable cgroup empty event source, ignoring: %m");
+ }
+
+ unit_add_to_gc_queue(u);
+
+ if (UNIT_VTABLE(u)->notify_cgroup_empty)
+ UNIT_VTABLE(u)->notify_cgroup_empty(u);
+
+ return 0;
+}
+
+void unit_add_to_cgroup_empty_queue(Unit *u) {
+ int r;
+
+ assert(u);
+
+ /* Note that there are four different ways how cgroup empty events reach us:
+ *
+ * 1. On the unified hierarchy we get an inotify event on the cgroup
+ *
+ * 2. On the legacy hierarchy, when running in system mode, we get a datagram on the cgroup agent socket
+ *
+ * 3. On the legacy hierarchy, when running in user mode, we get a D-Bus signal on the system bus
+ *
+ * 4. On the legacy hierarchy, in service units we start watching all processes of the cgroup for SIGCHLD as
+ * soon as we get one SIGCHLD, to deal with unreliable cgroup notifications.
+ *
+ * Regardless which way we got the notification, we'll verify it here, and then add it to a separate
+ * queue. This queue will be dispatched at a lower priority than the SIGCHLD handler, so that we always use
+ * SIGCHLD if we can get it first, and only use the cgroup empty notifications if there's no SIGCHLD pending
+ * (which might happen if the cgroup doesn't contain processes that are our own child, which is typically the
+ * case for scope units). */
+
+ if (u->in_cgroup_empty_queue)
+ return;
+
+ /* Let's verify that the cgroup is really empty */
+ if (!u->cgroup_path)
+ return;
+
+ r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path);
+ if (r < 0) {
+ log_unit_debug_errno(u, r, "Failed to determine whether cgroup %s is empty: %m", u->cgroup_path);
+ return;
+ }
+ if (r == 0)
+ return;
+
+ LIST_PREPEND(cgroup_empty_queue, u->manager->cgroup_empty_queue, u);
+ u->in_cgroup_empty_queue = true;
+
+ /* Trigger the defer event */
+ r = sd_event_source_set_enabled(u->manager->cgroup_empty_event_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ log_debug_errno(r, "Failed to enable cgroup empty event source: %m");
+}
+
+static void unit_remove_from_cgroup_empty_queue(Unit *u) {
+ assert(u);
+
+ if (!u->in_cgroup_empty_queue)
+ return;
+
+ LIST_REMOVE(cgroup_empty_queue, u->manager->cgroup_empty_queue, u);
+ u->in_cgroup_empty_queue = false;
+}
+
+int unit_check_oomd_kill(Unit *u) {
+ _cleanup_free_ char *value = NULL;
+ bool increased;
+ uint64_t n = 0;
+ int r;
+
+ if (!u->cgroup_path)
+ return 0;
+
+ r = cg_all_unified();
+ if (r < 0)
+ return log_unit_debug_errno(u, r, "Couldn't determine whether we are in all unified mode: %m");
+ else if (r == 0)
+ return 0;
+
+ r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "user.systemd_oomd_kill", &value);
+ if (r < 0 && r != -ENODATA)
+ return r;
+
+ if (!isempty(value)) {
+ r = safe_atou64(value, &n);
+ if (r < 0)
+ return r;
+ }
+
+ increased = n > u->managed_oom_kill_last;
+ u->managed_oom_kill_last = n;
+
+ if (!increased)
+ return 0;
+
+ if (n > 0)
+ log_struct(LOG_NOTICE,
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_OOMD_KILL_STR,
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ LOG_UNIT_MESSAGE(u, "systemd-oomd killed %"PRIu64" process(es) in this unit.", n));
+
+ return 1;
+}
+
+int unit_check_oom(Unit *u) {
+ _cleanup_free_ char *oom_kill = NULL;
+ bool increased;
+ uint64_t c;
+ int r;
+
+ if (!u->cgroup_path)
+ return 0;
+
+ r = cg_get_keyed_attribute("memory", u->cgroup_path, "memory.events", STRV_MAKE("oom_kill"), &oom_kill);
+ if (r < 0)
+ return log_unit_debug_errno(u, r, "Failed to read oom_kill field of memory.events cgroup attribute: %m");
+
+ r = safe_atou64(oom_kill, &c);
+ if (r < 0)
+ return log_unit_debug_errno(u, r, "Failed to parse oom_kill field: %m");
+
+ increased = c > u->oom_kill_last;
+ u->oom_kill_last = c;
+
+ if (!increased)
+ return 0;
+
+ log_struct(LOG_NOTICE,
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_OUT_OF_MEMORY_STR,
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ LOG_UNIT_MESSAGE(u, "A process of this unit has been killed by the OOM killer."));
+
+ if (UNIT_VTABLE(u)->notify_cgroup_oom)
+ UNIT_VTABLE(u)->notify_cgroup_oom(u);
+
+ return 1;
+}
+
+static int on_cgroup_oom_event(sd_event_source *s, void *userdata) {
+ Manager *m = userdata;
+ Unit *u;
+ int r;
+
+ assert(s);
+ assert(m);
+
+ u = m->cgroup_oom_queue;
+ if (!u)
+ return 0;
+
+ assert(u->in_cgroup_oom_queue);
+ u->in_cgroup_oom_queue = false;
+ LIST_REMOVE(cgroup_oom_queue, m->cgroup_oom_queue, u);
+
+ if (m->cgroup_oom_queue) {
+ /* More stuff queued, let's make sure we remain enabled */
+ r = sd_event_source_set_enabled(s, SD_EVENT_ONESHOT);
+ if (r < 0)
+ log_debug_errno(r, "Failed to reenable cgroup oom event source, ignoring: %m");
+ }
+
+ (void) unit_check_oom(u);
+ return 0;
+}
+
+static void unit_add_to_cgroup_oom_queue(Unit *u) {
+ int r;
+
+ assert(u);
+
+ if (u->in_cgroup_oom_queue)
+ return;
+ if (!u->cgroup_path)
+ return;
+
+ LIST_PREPEND(cgroup_oom_queue, u->manager->cgroup_oom_queue, u);
+ u->in_cgroup_oom_queue = true;
+
+ /* Trigger the defer event */
+ if (!u->manager->cgroup_oom_event_source) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL;
+
+ r = sd_event_add_defer(u->manager->event, &s, on_cgroup_oom_event, u->manager);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create cgroup oom event source: %m");
+ return;
+ }
+
+ r = sd_event_source_set_priority(s, SD_EVENT_PRIORITY_NORMAL-8);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set priority of cgroup oom event source: %m");
+ return;
+ }
+
+ (void) sd_event_source_set_description(s, "cgroup-oom");
+ u->manager->cgroup_oom_event_source = TAKE_PTR(s);
+ }
+
+ r = sd_event_source_set_enabled(u->manager->cgroup_oom_event_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ log_error_errno(r, "Failed to enable cgroup oom event source: %m");
+}
+
+static int unit_check_cgroup_events(Unit *u) {
+ char *values[2] = {};
+ int r;
+
+ assert(u);
+
+ r = cg_get_keyed_attribute_graceful(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "cgroup.events",
+ STRV_MAKE("populated", "frozen"), values);
+ if (r < 0)
+ return r;
+
+ /* The cgroup.events notifications can be merged together so act as we saw the given state for the
+ * first time. The functions we call to handle given state are idempotent, which makes them
+ * effectively remember the previous state. */
+ if (values[0]) {
+ if (streq(values[0], "1"))
+ unit_remove_from_cgroup_empty_queue(u);
+ else
+ unit_add_to_cgroup_empty_queue(u);
+ }
+
+ /* Disregard freezer state changes due to operations not initiated by us */
+ if (values[1] && IN_SET(u->freezer_state, FREEZER_FREEZING, FREEZER_THAWING)) {
+ if (streq(values[1], "0"))
+ unit_thawed(u);
+ else
+ unit_frozen(u);
+ }
+
+ free(values[0]);
+ free(values[1]);
+
+ return 0;
+}
+
+static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+
+ assert(s);
+ assert(fd >= 0);
+ assert(m);
+
+ for (;;) {
+ union inotify_event_buffer buffer;
+ struct inotify_event *e;
+ ssize_t l;
+
+ l = read(fd, &buffer, sizeof(buffer));
+ if (l < 0) {
+ if (IN_SET(errno, EINTR, EAGAIN))
+ return 0;
+
+ return log_error_errno(errno, "Failed to read control group inotify events: %m");
+ }
+
+ FOREACH_INOTIFY_EVENT(e, buffer, l) {
+ Unit *u;
+
+ if (e->wd < 0)
+ /* Queue overflow has no watch descriptor */
+ continue;
+
+ if (e->mask & IN_IGNORED)
+ /* The watch was just removed */
+ continue;
+
+ /* Note that inotify might deliver events for a watch even after it was removed,
+ * because it was queued before the removal. Let's ignore this here safely. */
+
+ u = hashmap_get(m->cgroup_control_inotify_wd_unit, INT_TO_PTR(e->wd));
+ if (u)
+ unit_check_cgroup_events(u);
+
+ u = hashmap_get(m->cgroup_memory_inotify_wd_unit, INT_TO_PTR(e->wd));
+ if (u)
+ unit_add_to_cgroup_oom_queue(u);
+ }
+ }
+}
+
+static int cg_bpf_mask_supported(CGroupMask *ret) {
+ CGroupMask mask = 0;
+ int r;
+
+ /* BPF-based firewall */
+ r = bpf_firewall_supported();
+ if (r > 0)
+ mask |= CGROUP_MASK_BPF_FIREWALL;
+
+ /* BPF-based device access control */
+ r = bpf_devices_supported();
+ if (r > 0)
+ mask |= CGROUP_MASK_BPF_DEVICES;
+
+ *ret = mask;
+ return 0;
+}
+
+int manager_setup_cgroup(Manager *m) {
+ _cleanup_free_ char *path = NULL;
+ const char *scope_path;
+ CGroupController c;
+ int r, all_unified;
+ CGroupMask mask;
+ char *e;
+
+ assert(m);
+
+ /* 1. Determine hierarchy */
+ m->cgroup_root = mfree(m->cgroup_root);
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
+ if (r < 0)
+ return log_error_errno(r, "Cannot determine cgroup we are running in: %m");
+
+ /* Chop off the init scope, if we are already located in it */
+ e = endswith(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
+
+ /* LEGACY: Also chop off the system slice if we are in
+ * it. This is to support live upgrades from older systemd
+ * versions where PID 1 was moved there. Also see
+ * cg_get_root_path(). */
+ if (!e && MANAGER_IS_SYSTEM(m)) {
+ e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
+ if (!e)
+ e = endswith(m->cgroup_root, "/system"); /* even more legacy */
+ }
+ if (e)
+ *e = 0;
+
+ /* And make sure to store away the root value without trailing slash, even for the root dir, so that we can
+ * easily prepend it everywhere. */
+ delete_trailing_chars(m->cgroup_root, "/");
+
+ /* 2. Show data */
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
+ if (r < 0)
+ return log_error_errno(r, "Cannot find cgroup mount point: %m");
+
+ r = cg_unified();
+ if (r < 0)
+ return log_error_errno(r, "Couldn't determine if we are running in the unified hierarchy: %m");
+
+ all_unified = cg_all_unified();
+ if (all_unified < 0)
+ return log_error_errno(all_unified, "Couldn't determine whether we are in all unified mode: %m");
+ if (all_unified > 0)
+ log_debug("Unified cgroup hierarchy is located at %s.", path);
+ else {
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether systemd's own controller is in unified mode: %m");
+ if (r > 0)
+ log_debug("Unified cgroup hierarchy is located at %s. Controllers are on legacy hierarchies.", path);
+ else
+ log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER_LEGACY ". File system hierarchy is at %s.", path);
+ }
+
+ /* 3. Allocate cgroup empty defer event source */
+ m->cgroup_empty_event_source = sd_event_source_unref(m->cgroup_empty_event_source);
+ r = sd_event_add_defer(m->event, &m->cgroup_empty_event_source, on_cgroup_empty_event, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create cgroup empty event source: %m");
+
+ /* Schedule cgroup empty checks early, but after having processed service notification messages or
+ * SIGCHLD signals, so that a cgroup running empty is always just the last safety net of
+ * notification, and we collected the metadata the notification and SIGCHLD stuff offers first. */
+ r = sd_event_source_set_priority(m->cgroup_empty_event_source, SD_EVENT_PRIORITY_NORMAL-5);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set priority of cgroup empty event source: %m");
+
+ r = sd_event_source_set_enabled(m->cgroup_empty_event_source, SD_EVENT_OFF);
+ if (r < 0)
+ return log_error_errno(r, "Failed to disable cgroup empty event source: %m");
+
+ (void) sd_event_source_set_description(m->cgroup_empty_event_source, "cgroup-empty");
+
+ /* 4. Install notifier inotify object, or agent */
+ if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0) {
+
+ /* In the unified hierarchy we can get cgroup empty notifications via inotify. */
+
+ m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
+ safe_close(m->cgroup_inotify_fd);
+
+ m->cgroup_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (m->cgroup_inotify_fd < 0)
+ return log_error_errno(errno, "Failed to create control group inotify object: %m");
+
+ r = sd_event_add_io(m->event, &m->cgroup_inotify_event_source, m->cgroup_inotify_fd, EPOLLIN, on_cgroup_inotify_event, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch control group inotify object: %m");
+
+ /* Process cgroup empty notifications early. Note that when this event is dispatched it'll
+ * just add the unit to a cgroup empty queue, hence let's run earlier than that. Also see
+ * handling of cgroup agent notifications, for the classic cgroup hierarchy support. */
+ r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-9);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set priority of inotify event source: %m");
+
+ (void) sd_event_source_set_description(m->cgroup_inotify_event_source, "cgroup-inotify");
+
+ } else if (MANAGER_IS_SYSTEM(m) && manager_owns_host_root_cgroup(m) && !MANAGER_IS_TEST_RUN(m)) {
+
+ /* On the legacy hierarchy we only get notifications via cgroup agents. (Which isn't really reliable,
+ * since it does not generate events when control groups with children run empty. */
+
+ r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
+ if (r < 0)
+ log_warning_errno(r, "Failed to install release agent, ignoring: %m");
+ else if (r > 0)
+ log_debug("Installed release agent.");
+ else if (r == 0)
+ log_debug("Release agent already installed.");
+ }
+
+ /* 5. Make sure we are in the special "init.scope" unit in the root slice. */
+ scope_path = strjoina(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
+ r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
+ if (r >= 0) {
+ /* Also, move all other userspace processes remaining in the root cgroup into that scope. */
+ r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
+ if (r < 0)
+ log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m");
+
+ /* 6. And pin it, so that it cannot be unmounted */
+ safe_close(m->pin_cgroupfs_fd);
+ m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
+ if (m->pin_cgroupfs_fd < 0)
+ return log_error_errno(errno, "Failed to open pin file: %m");
+
+ } else if (!MANAGER_IS_TEST_RUN(m))
+ return log_error_errno(r, "Failed to create %s control group: %m", scope_path);
+
+ /* 7. Always enable hierarchical support if it exists... */
+ if (!all_unified && !MANAGER_IS_TEST_RUN(m))
+ (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
+
+ /* 8. Figure out which controllers are supported */
+ r = cg_mask_supported(&m->cgroup_supported);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine supported controllers: %m");
+
+ /* 9. Figure out which bpf-based pseudo-controllers are supported */
+ r = cg_bpf_mask_supported(&mask);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine supported bpf-based pseudo-controllers: %m");
+ m->cgroup_supported |= mask;
+
+ /* 10. Log which controllers are supported */
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++)
+ log_debug("Controller '%s' supported: %s", cgroup_controller_to_string(c), yes_no(m->cgroup_supported & CGROUP_CONTROLLER_TO_MASK(c)));
+
+ return 0;
+}
+
+void manager_shutdown_cgroup(Manager *m, bool delete) {
+ assert(m);
+
+ /* We can't really delete the group, since we are in it. But
+ * let's trim it. */
+ if (delete && m->cgroup_root && m->test_run_flags != MANAGER_TEST_RUN_MINIMAL)
+ (void) cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
+
+ m->cgroup_empty_event_source = sd_event_source_unref(m->cgroup_empty_event_source);
+
+ m->cgroup_control_inotify_wd_unit = hashmap_free(m->cgroup_control_inotify_wd_unit);
+ m->cgroup_memory_inotify_wd_unit = hashmap_free(m->cgroup_memory_inotify_wd_unit);
+
+ m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
+ m->cgroup_inotify_fd = safe_close(m->cgroup_inotify_fd);
+
+ m->pin_cgroupfs_fd = safe_close(m->pin_cgroupfs_fd);
+
+ m->cgroup_root = mfree(m->cgroup_root);
+}
+
+Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
+ char *p;
+ Unit *u;
+
+ assert(m);
+ assert(cgroup);
+
+ u = hashmap_get(m->cgroup_unit, cgroup);
+ if (u)
+ return u;
+
+ p = strdupa(cgroup);
+ for (;;) {
+ char *e;
+
+ e = strrchr(p, '/');
+ if (!e || e == p)
+ return hashmap_get(m->cgroup_unit, SPECIAL_ROOT_SLICE);
+
+ *e = 0;
+
+ u = hashmap_get(m->cgroup_unit, p);
+ if (u)
+ return u;
+ }
+}
+
+Unit *manager_get_unit_by_pid_cgroup(Manager *m, pid_t pid) {
+ _cleanup_free_ char *cgroup = NULL;
+
+ assert(m);
+
+ if (!pid_is_valid(pid))
+ return NULL;
+
+ if (cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup) < 0)
+ return NULL;
+
+ return manager_get_unit_by_cgroup(m, cgroup);
+}
+
+Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
+ Unit *u, **array;
+
+ assert(m);
+
+ /* Note that a process might be owned by multiple units, we return only one here, which is good enough for most
+ * cases, though not strictly correct. We prefer the one reported by cgroup membership, as that's the most
+ * relevant one as children of the process will be assigned to that one, too, before all else. */
+
+ if (!pid_is_valid(pid))
+ return NULL;
+
+ if (pid == getpid_cached())
+ return hashmap_get(m->units, SPECIAL_INIT_SCOPE);
+
+ u = manager_get_unit_by_pid_cgroup(m, pid);
+ if (u)
+ return u;
+
+ u = hashmap_get(m->watch_pids, PID_TO_PTR(pid));
+ if (u)
+ return u;
+
+ array = hashmap_get(m->watch_pids, PID_TO_PTR(-pid));
+ if (array)
+ return array[0];
+
+ return NULL;
+}
+
+int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
+ Unit *u;
+
+ assert(m);
+ assert(cgroup);
+
+ /* Called on the legacy hierarchy whenever we get an explicit cgroup notification from the cgroup agent process
+ * or from the --system instance */
+
+ log_debug("Got cgroup empty notification for: %s", cgroup);
+
+ u = manager_get_unit_by_cgroup(m, cgroup);
+ if (!u)
+ return 0;
+
+ unit_add_to_cgroup_empty_queue(u);
+ return 1;
+}
+
+int unit_get_memory_current(Unit *u, uint64_t *ret) {
+ int r;
+
+ assert(u);
+ assert(ret);
+
+ if (!UNIT_CGROUP_BOOL(u, memory_accounting))
+ return -ENODATA;
+
+ if (!u->cgroup_path)
+ return -ENODATA;
+
+ /* The root cgroup doesn't expose this information, let's get it from /proc instead */
+ if (unit_has_host_root_cgroup(u))
+ return procfs_memory_get_used(ret);
+
+ if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0)
+ return -ENODATA;
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+
+ return cg_get_attribute_as_uint64("memory", u->cgroup_path, r > 0 ? "memory.current" : "memory.usage_in_bytes", ret);
+}
+
+int unit_get_tasks_current(Unit *u, uint64_t *ret) {
+ assert(u);
+ assert(ret);
+
+ if (!UNIT_CGROUP_BOOL(u, tasks_accounting))
+ return -ENODATA;
+
+ if (!u->cgroup_path)
+ return -ENODATA;
+
+ /* The root cgroup doesn't expose this information, let's get it from /proc instead */
+ if (unit_has_host_root_cgroup(u))
+ return procfs_tasks_get_current(ret);
+
+ if ((u->cgroup_realized_mask & CGROUP_MASK_PIDS) == 0)
+ return -ENODATA;
+
+ return cg_get_attribute_as_uint64("pids", u->cgroup_path, "pids.current", ret);
+}
+
+static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
+ uint64_t ns;
+ int r;
+
+ assert(u);
+ assert(ret);
+
+ if (!u->cgroup_path)
+ return -ENODATA;
+
+ /* The root cgroup doesn't expose this information, let's get it from /proc instead */
+ if (unit_has_host_root_cgroup(u))
+ return procfs_cpu_get_usage(ret);
+
+ /* Requisite controllers for CPU accounting are not enabled */
+ if ((get_cpu_accounting_mask() & ~u->cgroup_realized_mask) != 0)
+ return -ENODATA;
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ _cleanup_free_ char *val = NULL;
+ uint64_t us;
+
+ r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", STRV_MAKE("usage_usec"), &val);
+ if (IN_SET(r, -ENOENT, -ENXIO))
+ return -ENODATA;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(val, &us);
+ if (r < 0)
+ return r;
+
+ ns = us * NSEC_PER_USEC;
+ } else
+ return cg_get_attribute_as_uint64("cpuacct", u->cgroup_path, "cpuacct.usage", ret);
+
+ *ret = ns;
+ return 0;
+}
+
+int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
+ nsec_t ns;
+ int r;
+
+ assert(u);
+
+ /* Retrieve the current CPU usage counter. This will subtract the CPU counter taken when the unit was
+ * started. If the cgroup has been removed already, returns the last cached value. To cache the value, simply
+ * call this function with a NULL return value. */
+
+ if (!UNIT_CGROUP_BOOL(u, cpu_accounting))
+ return -ENODATA;
+
+ r = unit_get_cpu_usage_raw(u, &ns);
+ if (r == -ENODATA && u->cpu_usage_last != NSEC_INFINITY) {
+ /* If we can't get the CPU usage anymore (because the cgroup was already removed, for example), use our
+ * cached value. */
+
+ if (ret)
+ *ret = u->cpu_usage_last;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ if (ns > u->cpu_usage_base)
+ ns -= u->cpu_usage_base;
+ else
+ ns = 0;
+
+ u->cpu_usage_last = ns;
+ if (ret)
+ *ret = ns;
+
+ return 0;
+}
+
+int unit_get_ip_accounting(
+ Unit *u,
+ CGroupIPAccountingMetric metric,
+ uint64_t *ret) {
+
+ uint64_t value;
+ int fd, r;
+
+ assert(u);
+ assert(metric >= 0);
+ assert(metric < _CGROUP_IP_ACCOUNTING_METRIC_MAX);
+ assert(ret);
+
+ if (!UNIT_CGROUP_BOOL(u, ip_accounting))
+ return -ENODATA;
+
+ fd = IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_INGRESS_PACKETS) ?
+ u->ip_accounting_ingress_map_fd :
+ u->ip_accounting_egress_map_fd;
+ if (fd < 0)
+ return -ENODATA;
+
+ if (IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_EGRESS_BYTES))
+ r = bpf_firewall_read_accounting(fd, &value, NULL);
+ else
+ r = bpf_firewall_read_accounting(fd, NULL, &value);
+ if (r < 0)
+ return r;
+
+ /* Add in additional metrics from a previous runtime. Note that when reexecing/reloading the daemon we compile
+ * all BPF programs and maps anew, but serialize the old counters. When deserializing we store them in the
+ * ip_accounting_extra[] field, and add them in here transparently. */
+
+ *ret = value + u->ip_accounting_extra[metric];
+
+ return r;
+}
+
+static int unit_get_io_accounting_raw(Unit *u, uint64_t ret[static _CGROUP_IO_ACCOUNTING_METRIC_MAX]) {
+ static const char *const field_names[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = {
+ [CGROUP_IO_READ_BYTES] = "rbytes=",
+ [CGROUP_IO_WRITE_BYTES] = "wbytes=",
+ [CGROUP_IO_READ_OPERATIONS] = "rios=",
+ [CGROUP_IO_WRITE_OPERATIONS] = "wios=",
+ };
+ uint64_t acc[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = {};
+ _cleanup_free_ char *path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(u);
+
+ if (!u->cgroup_path)
+ return -ENODATA;
+
+ if (unit_has_host_root_cgroup(u))
+ return -ENODATA; /* TODO: return useful data for the top-level cgroup */
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r == 0) /* TODO: support cgroupv1 */
+ return -ENODATA;
+
+ if (!FLAGS_SET(u->cgroup_realized_mask, CGROUP_MASK_IO))
+ return -ENODATA;
+
+ r = cg_get_path("io", u->cgroup_path, "io.stat", &path);
+ if (r < 0)
+ return r;
+
+ f = fopen(path, "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *p;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ p = line;
+ p += strcspn(p, WHITESPACE); /* Skip over device major/minor */
+ p += strspn(p, WHITESPACE); /* Skip over following whitespace */
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_RETAIN_ESCAPE);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ for (CGroupIOAccountingMetric i = 0; i < _CGROUP_IO_ACCOUNTING_METRIC_MAX; i++) {
+ const char *x;
+
+ x = startswith(word, field_names[i]);
+ if (x) {
+ uint64_t w;
+
+ r = safe_atou64(x, &w);
+ if (r < 0)
+ return r;
+
+ /* Sum up the stats of all devices */
+ acc[i] += w;
+ break;
+ }
+ }
+ }
+ }
+
+ memcpy(ret, acc, sizeof(acc));
+ return 0;
+}
+
+int unit_get_io_accounting(
+ Unit *u,
+ CGroupIOAccountingMetric metric,
+ bool allow_cache,
+ uint64_t *ret) {
+
+ uint64_t raw[_CGROUP_IO_ACCOUNTING_METRIC_MAX];
+ int r;
+
+ /* Retrieve an IO account parameter. This will subtract the counter when the unit was started. */
+
+ if (!UNIT_CGROUP_BOOL(u, io_accounting))
+ return -ENODATA;
+
+ if (allow_cache && u->io_accounting_last[metric] != UINT64_MAX)
+ goto done;
+
+ r = unit_get_io_accounting_raw(u, raw);
+ if (r == -ENODATA && u->io_accounting_last[metric] != UINT64_MAX)
+ goto done;
+ if (r < 0)
+ return r;
+
+ for (CGroupIOAccountingMetric i = 0; i < _CGROUP_IO_ACCOUNTING_METRIC_MAX; i++) {
+ /* Saturated subtraction */
+ if (raw[i] > u->io_accounting_base[i])
+ u->io_accounting_last[i] = raw[i] - u->io_accounting_base[i];
+ else
+ u->io_accounting_last[i] = 0;
+ }
+
+done:
+ if (ret)
+ *ret = u->io_accounting_last[metric];
+
+ return 0;
+}
+
+int unit_reset_cpu_accounting(Unit *u) {
+ int r;
+
+ assert(u);
+
+ u->cpu_usage_last = NSEC_INFINITY;
+
+ r = unit_get_cpu_usage_raw(u, &u->cpu_usage_base);
+ if (r < 0) {
+ u->cpu_usage_base = 0;
+ return r;
+ }
+
+ return 0;
+}
+
+int unit_reset_ip_accounting(Unit *u) {
+ int r = 0, q = 0;
+
+ assert(u);
+
+ if (u->ip_accounting_ingress_map_fd >= 0)
+ r = bpf_firewall_reset_accounting(u->ip_accounting_ingress_map_fd);
+
+ if (u->ip_accounting_egress_map_fd >= 0)
+ q = bpf_firewall_reset_accounting(u->ip_accounting_egress_map_fd);
+
+ zero(u->ip_accounting_extra);
+
+ return r < 0 ? r : q;
+}
+
+int unit_reset_io_accounting(Unit *u) {
+ int r;
+
+ assert(u);
+
+ for (CGroupIOAccountingMetric i = 0; i < _CGROUP_IO_ACCOUNTING_METRIC_MAX; i++)
+ u->io_accounting_last[i] = UINT64_MAX;
+
+ r = unit_get_io_accounting_raw(u, u->io_accounting_base);
+ if (r < 0) {
+ zero(u->io_accounting_base);
+ return r;
+ }
+
+ return 0;
+}
+
+int unit_reset_accounting(Unit *u) {
+ int r, q, v;
+
+ assert(u);
+
+ r = unit_reset_cpu_accounting(u);
+ q = unit_reset_io_accounting(u);
+ v = unit_reset_ip_accounting(u);
+
+ return r < 0 ? r : q < 0 ? q : v;
+}
+
+void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
+ assert(u);
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return;
+
+ if (m == 0)
+ return;
+
+ /* always invalidate compat pairs together */
+ if (m & (CGROUP_MASK_IO | CGROUP_MASK_BLKIO))
+ m |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;
+
+ if (m & (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT))
+ m |= CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT;
+
+ if (FLAGS_SET(u->cgroup_invalidated_mask, m)) /* NOP? */
+ return;
+
+ u->cgroup_invalidated_mask |= m;
+ unit_add_to_cgroup_realize_queue(u);
+}
+
+void unit_invalidate_cgroup_bpf(Unit *u) {
+ assert(u);
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return;
+
+ if (u->cgroup_invalidated_mask & CGROUP_MASK_BPF_FIREWALL) /* NOP? */
+ return;
+
+ u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL;
+ unit_add_to_cgroup_realize_queue(u);
+
+ /* If we are a slice unit, we also need to put compile a new BPF program for all our children, as the IP access
+ * list of our children includes our own. */
+ if (u->type == UNIT_SLICE) {
+ Unit *member;
+ void *v;
+
+ HASHMAP_FOREACH_KEY(v, member, u->dependencies[UNIT_BEFORE])
+ if (UNIT_DEREF(member->slice) == u)
+ unit_invalidate_cgroup_bpf(member);
+ }
+}
+
+bool unit_cgroup_delegate(Unit *u) {
+ CGroupContext *c;
+
+ assert(u);
+
+ if (!UNIT_VTABLE(u)->can_delegate)
+ return false;
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return false;
+
+ return c->delegate;
+}
+
+void manager_invalidate_startup_units(Manager *m) {
+ Unit *u;
+
+ assert(m);
+
+ SET_FOREACH(u, m->startup_units)
+ unit_invalidate_cgroup(u, CGROUP_MASK_CPU|CGROUP_MASK_IO|CGROUP_MASK_BLKIO);
+}
+
+static int unit_get_nice(Unit *u) {
+ ExecContext *ec;
+
+ ec = unit_get_exec_context(u);
+ return ec ? ec->nice : 0;
+}
+
+static uint64_t unit_get_cpu_weight(Unit *u) {
+ ManagerState state = manager_state(u->manager);
+ CGroupContext *cc;
+
+ cc = unit_get_cgroup_context(u);
+ return cc ? cgroup_context_cpu_weight(cc, state) : CGROUP_WEIGHT_DEFAULT;
+}
+
+int compare_job_priority(const void *a, const void *b) {
+ const Job *x = a, *y = b;
+ int nice_x, nice_y;
+ uint64_t weight_x, weight_y;
+ int ret;
+
+ if ((ret = CMP(x->unit->type, y->unit->type)) != 0)
+ return -ret;
+
+ weight_x = unit_get_cpu_weight(x->unit);
+ weight_y = unit_get_cpu_weight(y->unit);
+
+ if ((ret = CMP(weight_x, weight_y)) != 0)
+ return -ret;
+
+ nice_x = unit_get_nice(x->unit);
+ nice_y = unit_get_nice(y->unit);
+
+ if ((ret = CMP(nice_x, nice_y)) != 0)
+ return ret;
+
+ return strcmp(x->unit->id, y->unit->id);
+}
+
+int unit_cgroup_freezer_action(Unit *u, FreezerAction action) {
+ _cleanup_free_ char *path = NULL;
+ FreezerState target, kernel = _FREEZER_STATE_INVALID;
+ int r;
+
+ assert(u);
+ assert(IN_SET(action, FREEZER_FREEZE, FREEZER_THAW));
+
+ if (!cg_freezer_supported())
+ return 0;
+
+ if (!u->cgroup_realized)
+ return -EBUSY;
+
+ target = action == FREEZER_FREEZE ? FREEZER_FROZEN : FREEZER_RUNNING;
+
+ r = unit_freezer_state_kernel(u, &kernel);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to obtain cgroup freezer state: %m");
+
+ if (target == kernel) {
+ u->freezer_state = target;
+ return 0;
+ }
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "cgroup.freeze", &path);
+ if (r < 0)
+ return r;
+
+ log_unit_debug(u, "%s unit.", action == FREEZER_FREEZE ? "Freezing" : "Thawing");
+
+ if (action == FREEZER_FREEZE)
+ u->freezer_state = FREEZER_FREEZING;
+ else
+ u->freezer_state = FREEZER_THAWING;
+
+ r = write_string_file(path, one_zero(action == FREEZER_FREEZE), WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
+ [CGROUP_DEVICE_POLICY_AUTO] = "auto",
+ [CGROUP_DEVICE_POLICY_CLOSED] = "closed",
+ [CGROUP_DEVICE_POLICY_STRICT] = "strict",
+};
+
+int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name) {
+ _cleanup_free_ char *v = NULL;
+ int r;
+
+ assert(u);
+ assert(cpus);
+
+ if (!u->cgroup_path)
+ return -ENODATA;
+
+ if ((u->cgroup_realized_mask & CGROUP_MASK_CPUSET) == 0)
+ return -ENODATA;
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENODATA;
+
+ r = cg_get_attribute("cpuset", u->cgroup_path, name, &v);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+
+ return parse_cpu_set_full(v, cpus, false, NULL, NULL, 0, NULL);
+}
+
+DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);
+
+static const char* const freezer_action_table[_FREEZER_ACTION_MAX] = {
+ [FREEZER_FREEZE] = "freeze",
+ [FREEZER_THAW] = "thaw",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(freezer_action, FreezerAction);
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
new file mode 100644
index 0000000..66f3a63
--- /dev/null
+++ b/src/core/cgroup.h
@@ -0,0 +1,296 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "cgroup-util.h"
+#include "cpu-set-util.h"
+#include "ip-address-access.h"
+#include "list.h"
+#include "time-util.h"
+
+typedef struct TasksMax {
+ /* If scale == 0, just use value; otherwise, value / scale.
+ * See tasks_max_resolve(). */
+ uint64_t value;
+ uint64_t scale;
+} TasksMax;
+
+#define TASKS_MAX_UNSET ((TasksMax) { .value = UINT64_MAX, .scale = 0 })
+
+static inline bool tasks_max_isset(const TasksMax *tasks_max) {
+ return tasks_max->value != UINT64_MAX || tasks_max->scale != 0;
+}
+
+uint64_t tasks_max_resolve(const TasksMax *tasks_max);
+
+typedef struct CGroupContext CGroupContext;
+typedef struct CGroupDeviceAllow CGroupDeviceAllow;
+typedef struct CGroupIODeviceWeight CGroupIODeviceWeight;
+typedef struct CGroupIODeviceLimit CGroupIODeviceLimit;
+typedef struct CGroupIODeviceLatency CGroupIODeviceLatency;
+typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight;
+typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth;
+
+typedef enum CGroupDevicePolicy {
+ /* When devices listed, will allow those, plus built-in ones, if none are listed will allow
+ * everything. */
+ CGROUP_DEVICE_POLICY_AUTO,
+
+ /* Everything forbidden, except built-in ones and listed ones. */
+ CGROUP_DEVICE_POLICY_CLOSED,
+
+ /* Everything forbidden, except for the listed devices */
+ CGROUP_DEVICE_POLICY_STRICT,
+
+ _CGROUP_DEVICE_POLICY_MAX,
+ _CGROUP_DEVICE_POLICY_INVALID = -1
+} CGroupDevicePolicy;
+
+typedef enum FreezerAction {
+ FREEZER_FREEZE,
+ FREEZER_THAW,
+
+ _FREEZER_ACTION_MAX,
+ _FREEZER_ACTION_INVALID = -1,
+} FreezerAction;
+
+struct CGroupDeviceAllow {
+ LIST_FIELDS(CGroupDeviceAllow, device_allow);
+ char *path;
+ bool r:1;
+ bool w:1;
+ bool m:1;
+};
+
+struct CGroupIODeviceWeight {
+ LIST_FIELDS(CGroupIODeviceWeight, device_weights);
+ char *path;
+ uint64_t weight;
+};
+
+struct CGroupIODeviceLimit {
+ LIST_FIELDS(CGroupIODeviceLimit, device_limits);
+ char *path;
+ uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX];
+};
+
+struct CGroupIODeviceLatency {
+ LIST_FIELDS(CGroupIODeviceLatency, device_latencies);
+ char *path;
+ usec_t target_usec;
+};
+
+struct CGroupBlockIODeviceWeight {
+ LIST_FIELDS(CGroupBlockIODeviceWeight, device_weights);
+ char *path;
+ uint64_t weight;
+};
+
+struct CGroupBlockIODeviceBandwidth {
+ LIST_FIELDS(CGroupBlockIODeviceBandwidth, device_bandwidths);
+ char *path;
+ uint64_t rbps;
+ uint64_t wbps;
+};
+
+struct CGroupContext {
+ bool cpu_accounting;
+ bool io_accounting;
+ bool blockio_accounting;
+ bool memory_accounting;
+ bool tasks_accounting;
+ bool ip_accounting;
+
+ /* Configures the memory.oom.group attribute (on unified) */
+ bool memory_oom_group;
+
+ bool delegate;
+ CGroupMask delegate_controllers;
+ CGroupMask disable_controllers;
+
+ /* For unified hierarchy */
+ uint64_t cpu_weight;
+ uint64_t startup_cpu_weight;
+ usec_t cpu_quota_per_sec_usec;
+ usec_t cpu_quota_period_usec;
+
+ CPUSet cpuset_cpus;
+ CPUSet cpuset_mems;
+
+ uint64_t io_weight;
+ uint64_t startup_io_weight;
+ LIST_HEAD(CGroupIODeviceWeight, io_device_weights);
+ LIST_HEAD(CGroupIODeviceLimit, io_device_limits);
+ LIST_HEAD(CGroupIODeviceLatency, io_device_latencies);
+
+ uint64_t default_memory_min;
+ uint64_t default_memory_low;
+ uint64_t memory_min;
+ uint64_t memory_low;
+ uint64_t memory_high;
+ uint64_t memory_max;
+ uint64_t memory_swap_max;
+
+ bool default_memory_min_set;
+ bool default_memory_low_set;
+ bool memory_min_set;
+ bool memory_low_set;
+
+ LIST_HEAD(IPAddressAccessItem, ip_address_allow);
+ LIST_HEAD(IPAddressAccessItem, ip_address_deny);
+
+ char **ip_filters_ingress;
+ char **ip_filters_egress;
+
+ /* For legacy hierarchies */
+ uint64_t cpu_shares;
+ uint64_t startup_cpu_shares;
+
+ uint64_t blockio_weight;
+ uint64_t startup_blockio_weight;
+ LIST_HEAD(CGroupBlockIODeviceWeight, blockio_device_weights);
+ LIST_HEAD(CGroupBlockIODeviceBandwidth, blockio_device_bandwidths);
+
+ uint64_t memory_limit;
+
+ CGroupDevicePolicy device_policy;
+ LIST_HEAD(CGroupDeviceAllow, device_allow);
+
+ /* Common */
+ TasksMax tasks_max;
+
+ /* Settings for systemd-oomd */
+ ManagedOOMMode moom_swap;
+ ManagedOOMMode moom_mem_pressure;
+ int moom_mem_pressure_limit;
+};
+
+/* Used when querying IP accounting data */
+typedef enum CGroupIPAccountingMetric {
+ CGROUP_IP_INGRESS_BYTES,
+ CGROUP_IP_INGRESS_PACKETS,
+ CGROUP_IP_EGRESS_BYTES,
+ CGROUP_IP_EGRESS_PACKETS,
+ _CGROUP_IP_ACCOUNTING_METRIC_MAX,
+ _CGROUP_IP_ACCOUNTING_METRIC_INVALID = -1,
+} CGroupIPAccountingMetric;
+
+/* Used when querying IO accounting data */
+typedef enum CGroupIOAccountingMetric {
+ CGROUP_IO_READ_BYTES,
+ CGROUP_IO_WRITE_BYTES,
+ CGROUP_IO_READ_OPERATIONS,
+ CGROUP_IO_WRITE_OPERATIONS,
+ _CGROUP_IO_ACCOUNTING_METRIC_MAX,
+ _CGROUP_IO_ACCOUNTING_METRIC_INVALID = -1,
+} CGroupIOAccountingMetric;
+
+typedef struct Unit Unit;
+typedef struct Manager Manager;
+
+usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution, usec_t max_period);
+
+void cgroup_context_init(CGroupContext *c);
+void cgroup_context_done(CGroupContext *c);
+void cgroup_context_dump(Unit *u, FILE* f, const char *prefix);
+
+void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a);
+void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w);
+void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l);
+void cgroup_context_free_io_device_latency(CGroupContext *c, CGroupIODeviceLatency *l);
+void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w);
+void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b);
+
+int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode);
+
+CGroupMask unit_get_own_mask(Unit *u);
+CGroupMask unit_get_delegate_mask(Unit *u);
+CGroupMask unit_get_members_mask(Unit *u);
+CGroupMask unit_get_siblings_mask(Unit *u);
+CGroupMask unit_get_ancestor_disable_mask(Unit *u);
+
+CGroupMask unit_get_target_mask(Unit *u);
+CGroupMask unit_get_enable_mask(Unit *u);
+
+void unit_invalidate_cgroup_members_masks(Unit *u);
+
+void unit_add_family_to_cgroup_realize_queue(Unit *u);
+
+const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask);
+char *unit_default_cgroup_path(const Unit *u);
+int unit_set_cgroup_path(Unit *u, const char *path);
+int unit_pick_cgroup_path(Unit *u);
+
+int unit_realize_cgroup(Unit *u);
+void unit_prune_cgroup(Unit *u);
+int unit_watch_cgroup(Unit *u);
+int unit_watch_cgroup_memory(Unit *u);
+
+void unit_release_cgroup(Unit *u);
+/* Releases the cgroup only if it is recursively empty.
+ * Returns true if the cgroup was released, false otherwise. */
+bool unit_maybe_release_cgroup(Unit *u);
+
+void unit_add_to_cgroup_empty_queue(Unit *u);
+int unit_check_oomd_kill(Unit *u);
+int unit_check_oom(Unit *u);
+
+int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path);
+
+int manager_setup_cgroup(Manager *m);
+void manager_shutdown_cgroup(Manager *m, bool delete);
+
+unsigned manager_dispatch_cgroup_realize_queue(Manager *m);
+
+Unit *manager_get_unit_by_cgroup(Manager *m, const char *cgroup);
+Unit *manager_get_unit_by_pid_cgroup(Manager *m, pid_t pid);
+Unit* manager_get_unit_by_pid(Manager *m, pid_t pid);
+
+uint64_t unit_get_ancestor_memory_min(Unit *u);
+uint64_t unit_get_ancestor_memory_low(Unit *u);
+
+int unit_search_main_pid(Unit *u, pid_t *ret);
+int unit_watch_all_pids(Unit *u);
+
+int unit_synthesize_cgroup_empty_event(Unit *u);
+
+int unit_get_memory_current(Unit *u, uint64_t *ret);
+int unit_get_tasks_current(Unit *u, uint64_t *ret);
+int unit_get_cpu_usage(Unit *u, nsec_t *ret);
+int unit_get_io_accounting(Unit *u, CGroupIOAccountingMetric metric, bool allow_cache, uint64_t *ret);
+int unit_get_ip_accounting(Unit *u, CGroupIPAccountingMetric metric, uint64_t *ret);
+
+int unit_reset_cpu_accounting(Unit *u);
+int unit_reset_ip_accounting(Unit *u);
+int unit_reset_io_accounting(Unit *u);
+int unit_reset_accounting(Unit *u);
+
+#define UNIT_CGROUP_BOOL(u, name) \
+ ({ \
+ CGroupContext *cc = unit_get_cgroup_context(u); \
+ cc ? cc->name : false; \
+ })
+
+bool manager_owns_host_root_cgroup(Manager *m);
+bool unit_has_host_root_cgroup(Unit *u);
+
+int manager_notify_cgroup_empty(Manager *m, const char *group);
+
+void unit_invalidate_cgroup(Unit *u, CGroupMask m);
+void unit_invalidate_cgroup_bpf(Unit *u);
+
+void manager_invalidate_startup_units(Manager *m);
+
+const char* cgroup_device_policy_to_string(CGroupDevicePolicy i) _const_;
+CGroupDevicePolicy cgroup_device_policy_from_string(const char *s) _pure_;
+
+bool unit_cgroup_delegate(Unit *u);
+
+int compare_job_priority(const void *a, const void *b);
+
+int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name);
+int unit_cgroup_freezer_action(Unit *u, FreezerAction action);
+
+const char* freezer_action_to_string(FreezerAction a) _const_;
+FreezerAction freezer_action_from_string(const char *s) _pure_;
diff --git a/src/core/core-varlink.c b/src/core/core-varlink.c
new file mode 100644
index 0000000..dd6c11a
--- /dev/null
+++ b/src/core/core-varlink.c
@@ -0,0 +1,482 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "core-varlink.h"
+#include "mkdir.h"
+#include "strv.h"
+#include "user-util.h"
+#include "varlink.h"
+
+typedef struct LookupParameters {
+ const char *user_name;
+ const char *group_name;
+ union {
+ uid_t uid;
+ gid_t gid;
+ };
+ const char *service;
+} LookupParameters;
+
+static const char* const managed_oom_mode_properties[] = {
+ "ManagedOOMSwap",
+ "ManagedOOMMemoryPressure",
+};
+
+static int build_user_json(const char *user_name, uid_t uid, JsonVariant **ret) {
+ assert(user_name);
+ assert(uid_is_valid(uid));
+ assert(ret);
+
+ return json_build(ret, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("record", JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(user_name)),
+ JSON_BUILD_PAIR("uid", JSON_BUILD_UNSIGNED(uid)),
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(uid)),
+ JSON_BUILD_PAIR("realName", JSON_BUILD_STRING("Dynamic User")),
+ JSON_BUILD_PAIR("homeDirectory", JSON_BUILD_STRING("/")),
+ JSON_BUILD_PAIR("shell", JSON_BUILD_STRING(NOLOGIN)),
+ JSON_BUILD_PAIR("locked", JSON_BUILD_BOOLEAN(true)),
+ JSON_BUILD_PAIR("service", JSON_BUILD_STRING("io.systemd.DynamicUser")),
+ JSON_BUILD_PAIR("disposition", JSON_BUILD_STRING("dynamic"))))));
+}
+
+static bool user_match_lookup_parameters(LookupParameters *p, const char *name, uid_t uid) {
+ assert(p);
+
+ if (p->user_name && !streq(name, p->user_name))
+ return false;
+
+ if (uid_is_valid(p->uid) && uid != p->uid)
+ return false;
+
+ return true;
+}
+
+static int build_managed_oom_json_array_element(Unit *u, const char *property, JsonVariant **ret_v) {
+ bool use_limit = false;
+ CGroupContext *c;
+ const char *mode;
+
+ assert(u);
+ assert(property);
+ assert(ret_v);
+
+ if (!UNIT_VTABLE(u)->can_set_managed_oom)
+ return -EOPNOTSUPP;
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return -EINVAL;
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(u)))
+ /* systemd-oomd should always treat inactive units as though they didn't enable any action since they
+ * should not have a valid cgroup */
+ mode = managed_oom_mode_to_string(MANAGED_OOM_AUTO);
+ else if (streq(property, "ManagedOOMSwap"))
+ mode = managed_oom_mode_to_string(c->moom_swap);
+ else if (streq(property, "ManagedOOMMemoryPressure")) {
+ mode = managed_oom_mode_to_string(c->moom_mem_pressure);
+ use_limit = true;
+ } else
+ return -EINVAL;
+
+ return json_build(ret_v, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("mode", JSON_BUILD_STRING(mode)),
+ JSON_BUILD_PAIR("path", JSON_BUILD_STRING(u->cgroup_path)),
+ JSON_BUILD_PAIR("property", JSON_BUILD_STRING(property)),
+ JSON_BUILD_PAIR_CONDITION(use_limit, "limit", JSON_BUILD_UNSIGNED(c->moom_mem_pressure_limit))));
+}
+
+int manager_varlink_send_managed_oom_update(Unit *u) {
+ _cleanup_(json_variant_unrefp) JsonVariant *arr = NULL, *v = NULL;
+ CGroupContext *c;
+ int r;
+
+ assert(u);
+
+ if (!UNIT_VTABLE(u)->can_set_managed_oom || !u->manager || !u->manager->managed_oom_varlink_request || !u->cgroup_path)
+ return 0;
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return 0;
+
+ r = json_build(&arr, JSON_BUILD_EMPTY_ARRAY);
+ if (r < 0)
+ return r;
+
+ for (size_t i = 0; i < ELEMENTSOF(managed_oom_mode_properties); i++) {
+ _cleanup_(json_variant_unrefp) JsonVariant *e = NULL;
+
+ r = build_managed_oom_json_array_element(u, managed_oom_mode_properties[i], &e);
+ if (r < 0)
+ return r;
+
+ r = json_variant_append_array(&arr, e);
+ if (r < 0)
+ return r;
+ }
+
+ r = json_build(&v, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("cgroups", JSON_BUILD_VARIANT(arr))));
+ if (r < 0)
+ return r;
+
+ return varlink_notify(u->manager->managed_oom_varlink_request, v);
+}
+
+static int vl_method_subscribe_managed_oom_cgroups(
+ Varlink *link,
+ JsonVariant *parameters,
+ VarlinkMethodFlags flags,
+ void *userdata) {
+ static const UnitType supported_unit_types[] = { UNIT_SLICE, UNIT_SERVICE, UNIT_SCOPE };
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL, *arr = NULL;
+ Manager *m = userdata;
+ int r;
+
+ assert(link);
+ assert(m);
+
+ if (json_variant_elements(parameters) > 0)
+ return varlink_error_invalid_parameter(link, parameters);
+
+ /* We only take one subscriber for this method so return an error if there's already an existing one.
+ * This shouldn't happen since systemd-oomd is the only client of this method. */
+ if (FLAGS_SET(flags, VARLINK_METHOD_MORE) && m->managed_oom_varlink_request)
+ return varlink_error(m->managed_oom_varlink_request, VARLINK_ERROR_SUBSCRIPTION_TAKEN, NULL);
+
+ r = json_build(&arr, JSON_BUILD_EMPTY_ARRAY);
+ if (r < 0)
+ return r;
+
+ for (size_t i = 0; i < ELEMENTSOF(supported_unit_types); i++) {
+ Unit *u;
+
+ LIST_FOREACH(units_by_type, u, m->units_by_type[supported_unit_types[i]]) {
+ CGroupContext *c;
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(u)))
+ continue;
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ continue;
+
+ for (size_t j = 0; j < ELEMENTSOF(managed_oom_mode_properties); j++) {
+ _cleanup_(json_variant_unrefp) JsonVariant *e = NULL;
+
+ /* For the initial varlink call we only care about units that enabled (i.e. mode is not
+ * set to "auto") oomd properties. */
+ if (!(streq(managed_oom_mode_properties[j], "ManagedOOMSwap") && c->moom_swap == MANAGED_OOM_KILL) &&
+ !(streq(managed_oom_mode_properties[j], "ManagedOOMMemoryPressure") && c->moom_mem_pressure == MANAGED_OOM_KILL))
+ continue;
+
+ r = build_managed_oom_json_array_element(u, managed_oom_mode_properties[j], &e);
+ if (r < 0)
+ return r;
+
+ r = json_variant_append_array(&arr, e);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ r = json_build(&v, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("cgroups", JSON_BUILD_VARIANT(arr))));
+ if (r < 0)
+ return r;
+
+ if (!FLAGS_SET(flags, VARLINK_METHOD_MORE))
+ return varlink_reply(link, v);
+
+ m->managed_oom_varlink_request = varlink_ref(link);
+ return varlink_notify(m->managed_oom_varlink_request, v);
+}
+
+static int vl_method_get_user_record(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+
+ static const JsonDispatch dispatch_table[] = {
+ { "uid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(LookupParameters, uid), 0 },
+ { "userName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, user_name), JSON_SAFE },
+ { "service", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, service), 0 },
+ {}
+ };
+
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ LookupParameters p = {
+ .uid = UID_INVALID,
+ };
+ _cleanup_free_ char *found_name = NULL;
+ uid_t found_uid = UID_INVALID, uid;
+ Manager *m = userdata;
+ const char *un;
+ int r;
+
+ assert(parameters);
+ assert(m);
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &p);
+ if (r < 0)
+ return r;
+
+ if (!streq_ptr(p.service, "io.systemd.DynamicUser"))
+ return varlink_error(link, "io.systemd.UserDatabase.BadService", NULL);
+
+ if (uid_is_valid(p.uid))
+ r = dynamic_user_lookup_uid(m, p.uid, &found_name);
+ else if (p.user_name)
+ r = dynamic_user_lookup_name(m, p.user_name, &found_uid);
+ else {
+ DynamicUser *d;
+
+ HASHMAP_FOREACH(d, m->dynamic_users) {
+ r = dynamic_user_current(d, &uid);
+ if (r == -EAGAIN) /* not realized yet? */
+ continue;
+ if (r < 0)
+ return r;
+
+ if (!user_match_lookup_parameters(&p, d->name, uid))
+ continue;
+
+ if (v) {
+ r = varlink_notify(link, v);
+ if (r < 0)
+ return r;
+
+ v = json_variant_unref(v);
+ }
+
+ r = build_user_json(d->name, uid, &v);
+ if (r < 0)
+ return r;
+ }
+
+ if (!v)
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+
+ return varlink_reply(link, v);
+ }
+ if (r == -ESRCH)
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+ if (r < 0)
+ return r;
+
+ uid = uid_is_valid(found_uid) ? found_uid : p.uid;
+ un = found_name ?: p.user_name;
+
+ if (!user_match_lookup_parameters(&p, un, uid))
+ return varlink_error(link, "io.systemd.UserDatabase.ConflictingRecordFound", NULL);
+
+ r = build_user_json(un, uid, &v);
+ if (r < 0)
+ return r;
+
+ return varlink_reply(link, v);
+}
+
+static int build_group_json(const char *group_name, gid_t gid, JsonVariant **ret) {
+ assert(group_name);
+ assert(gid_is_valid(gid));
+ assert(ret);
+
+ return json_build(ret, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("record", JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(group_name)),
+ JSON_BUILD_PAIR("description", JSON_BUILD_STRING("Dynamic Group")),
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(gid)),
+ JSON_BUILD_PAIR("service", JSON_BUILD_STRING("io.systemd.DynamicUser")),
+ JSON_BUILD_PAIR("disposition", JSON_BUILD_STRING("dynamic"))))));
+ }
+
+static bool group_match_lookup_parameters(LookupParameters *p, const char *name, gid_t gid) {
+ assert(p);
+
+ if (p->group_name && !streq(name, p->group_name))
+ return false;
+
+ if (gid_is_valid(p->gid) && gid != p->gid)
+ return false;
+
+ return true;
+}
+
+static int vl_method_get_group_record(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+
+ static const JsonDispatch dispatch_table[] = {
+ { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(LookupParameters, gid), 0 },
+ { "groupName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, group_name), JSON_SAFE },
+ { "service", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, service), 0 },
+ {}
+ };
+
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ LookupParameters p = {
+ .gid = GID_INVALID,
+ };
+ _cleanup_free_ char *found_name = NULL;
+ uid_t found_gid = GID_INVALID, gid;
+ Manager *m = userdata;
+ const char *gn;
+ int r;
+
+ assert(parameters);
+ assert(m);
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &p);
+ if (r < 0)
+ return r;
+
+ if (!streq_ptr(p.service, "io.systemd.DynamicUser"))
+ return varlink_error(link, "io.systemd.UserDatabase.BadService", NULL);
+
+ if (gid_is_valid(p.gid))
+ r = dynamic_user_lookup_uid(m, (uid_t) p.gid, &found_name);
+ else if (p.group_name)
+ r = dynamic_user_lookup_name(m, p.group_name, (uid_t*) &found_gid);
+ else {
+ DynamicUser *d;
+
+ HASHMAP_FOREACH(d, m->dynamic_users) {
+ uid_t uid;
+
+ r = dynamic_user_current(d, &uid);
+ if (r == -EAGAIN)
+ continue;
+ if (r < 0)
+ return r;
+
+ if (!group_match_lookup_parameters(&p, d->name, (gid_t) uid))
+ continue;
+
+ if (v) {
+ r = varlink_notify(link, v);
+ if (r < 0)
+ return r;
+
+ v = json_variant_unref(v);
+ }
+
+ r = build_group_json(d->name, (gid_t) uid, &v);
+ if (r < 0)
+ return r;
+ }
+
+ if (!v)
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+
+ return varlink_reply(link, v);
+ }
+ if (r == -ESRCH)
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+ if (r < 0)
+ return r;
+
+ gid = gid_is_valid(found_gid) ? found_gid : p.gid;
+ gn = found_name ?: p.group_name;
+
+ if (!group_match_lookup_parameters(&p, gn, gid))
+ return varlink_error(link, "io.systemd.UserDatabase.ConflictingRecordFound", NULL);
+
+ r = build_group_json(gn, gid, &v);
+ if (r < 0)
+ return r;
+
+ return varlink_reply(link, v);
+}
+
+static int vl_method_get_memberships(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+
+ static const JsonDispatch dispatch_table[] = {
+ { "userName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, user_name), JSON_SAFE },
+ { "groupName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, group_name), JSON_SAFE },
+ { "service", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, service), 0 },
+ {}
+ };
+
+ LookupParameters p = {};
+ int r;
+
+ assert(parameters);
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &p);
+ if (r < 0)
+ return r;
+
+ if (!streq_ptr(p.service, "io.systemd.DynamicUser"))
+ return varlink_error(link, "io.systemd.UserDatabase.BadService", NULL);
+
+ /* We don't support auxiliary groups with dynamic users. */
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+}
+
+static void vl_disconnect(VarlinkServer *s, Varlink *link, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+ assert(s);
+ assert(link);
+
+ if (link == m->managed_oom_varlink_request)
+ m->managed_oom_varlink_request = varlink_unref(link);
+}
+
+int manager_varlink_init(Manager *m) {
+ _cleanup_(varlink_server_unrefp) VarlinkServer *s = NULL;
+ int r;
+
+ assert(m);
+
+ if (m->varlink_server)
+ return 0;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return 0;
+
+ r = varlink_server_new(&s, VARLINK_SERVER_ACCOUNT_UID);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate varlink server object: %m");
+
+ varlink_server_set_userdata(s, m);
+
+ r = varlink_server_bind_method_many(
+ s,
+ "io.systemd.UserDatabase.GetUserRecord", vl_method_get_user_record,
+ "io.systemd.UserDatabase.GetGroupRecord", vl_method_get_group_record,
+ "io.systemd.UserDatabase.GetMemberships", vl_method_get_memberships,
+ "io.systemd.ManagedOOM.SubscribeManagedOOMCGroups", vl_method_subscribe_managed_oom_cgroups);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register varlink methods: %m");
+
+ r = varlink_server_bind_disconnect(s, vl_disconnect);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register varlink disconnect handler: %m");
+
+ if (!MANAGER_IS_TEST_RUN(m)) {
+ (void) mkdir_p_label("/run/systemd/userdb", 0755);
+
+ r = varlink_server_listen_address(s, "/run/systemd/userdb/io.systemd.DynamicUser", 0666);
+ if (r < 0)
+ return log_error_errno(r, "Failed to bind to varlink socket: %m");
+
+ r = varlink_server_listen_address(s, VARLINK_ADDR_PATH_MANAGED_OOM, 0666);
+ if (r < 0)
+ return log_error_errno(r, "Failed to bind to varlink socket: %m");
+ }
+
+ r = varlink_server_attach_event(s, m->event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach varlink connection to event loop: %m");
+
+ m->varlink_server = TAKE_PTR(s);
+ return 0;
+}
+
+void manager_varlink_done(Manager *m) {
+ assert(m);
+
+ /* Send the final message if we still have a subscribe request open. */
+ if (m->managed_oom_varlink_request)
+ m->managed_oom_varlink_request = varlink_close_unref(m->managed_oom_varlink_request);
+
+ m->varlink_server = varlink_server_unref(m->varlink_server);
+}
diff --git a/src/core/core-varlink.h b/src/core/core-varlink.h
new file mode 100644
index 0000000..20507a4
--- /dev/null
+++ b/src/core/core-varlink.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "manager.h"
+
+int manager_varlink_init(Manager *m);
+void manager_varlink_done(Manager *m);
+
+/* The manager is expected to send an update to systemd-oomd if one of the following occurs:
+ * - The value of ManagedOOM*= properties change
+ * - A unit with ManagedOOM*= properties changes unit active state */
+int manager_varlink_send_managed_oom_update(Unit *u);
diff --git a/src/core/dbus-automount.c b/src/core/dbus-automount.c
new file mode 100644
index 0000000..3f74488
--- /dev/null
+++ b/src/core/dbus-automount.c
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "automount.h"
+#include "bus-get-properties.h"
+#include "dbus-automount.h"
+#include "dbus-util.h"
+#include "string-util.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, automount_result, AutomountResult);
+
+const sd_bus_vtable bus_automount_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Where", "s", NULL, offsetof(Automount, where), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DirectoryMode", "u", bus_property_get_mode, offsetof(Automount, directory_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Automount, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("TimeoutIdleUSec", "t", bus_property_get_usec, offsetof(Automount, timeout_idle_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_VTABLE_END
+};
+
+static int bus_automount_set_transient_property(
+ Automount *a,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Unit *u = UNIT(a);
+
+ assert(a);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "Where"))
+ return bus_set_transient_path(u, name, &a->where, message, flags, error);
+
+ if (streq(name, "TimeoutIdleUSec"))
+ return bus_set_transient_usec_fix_0(u, name, &a->timeout_idle_usec, message, flags, error);
+
+ if (streq(name, "DirectoryMode"))
+ return bus_set_transient_mode_t(u, name, &a->directory_mode, message, flags, error);
+
+ return 0;
+}
+
+int bus_automount_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Automount *a = AUTOMOUNT(u);
+
+ assert(a);
+ assert(name);
+ assert(message);
+
+ if (u->transient && u->load_state == UNIT_STUB) /* This is a transient unit? let's load a little more */
+ return bus_automount_set_transient_property(a, name, message, flags, error);
+
+ return 0;
+}
diff --git a/src/core/dbus-automount.h b/src/core/dbus-automount.h
new file mode 100644
index 0000000..cfceaec
--- /dev/null
+++ b/src/core/dbus-automount.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "unit.h"
+
+extern const sd_bus_vtable bus_automount_vtable[];
+
+int bus_automount_set_property(Unit *u, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
new file mode 100644
index 0000000..37c581f
--- /dev/null
+++ b/src/core/dbus-cgroup.c
@@ -0,0 +1,1718 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "bpf-firewall.h"
+#include "bus-get-properties.h"
+#include "cgroup-util.h"
+#include "cgroup.h"
+#include "core-varlink.h"
+#include "dbus-cgroup.h"
+#include "dbus-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "limits-util.h"
+#include "path-util.h"
+
+BUS_DEFINE_PROPERTY_GET(bus_property_get_tasks_max, "t", TasksMax, tasks_max_resolve);
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_cgroup_device_policy, cgroup_device_policy, CGroupDevicePolicy);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_managed_oom_mode, managed_oom_mode, ManagedOOMMode);
+
+static int property_get_cgroup_mask(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupMask *mask = userdata;
+ CGroupController ctrl;
+ int r;
+
+ assert(bus);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ for (ctrl = 0; ctrl < _CGROUP_CONTROLLER_MAX; ctrl++) {
+ if ((*mask & CGROUP_CONTROLLER_TO_MASK(ctrl)) == 0)
+ continue;
+
+ r = sd_bus_message_append(reply, "s", cgroup_controller_to_string(ctrl));
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_delegate_controllers(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupContext *c = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ if (!c->delegate)
+ return sd_bus_message_append(reply, "as", 0);
+
+ return property_get_cgroup_mask(bus, path, interface, property, reply, &c->delegate_controllers, error);
+}
+
+static int property_get_cpuset(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CPUSet *cpus = userdata;
+ _cleanup_free_ uint8_t *array = NULL;
+ size_t allocated;
+
+ assert(bus);
+ assert(reply);
+ assert(cpus);
+
+ (void) cpu_set_to_dbus(cpus, &array, &allocated);
+ return sd_bus_message_append_array(reply, 'y', array, allocated);
+}
+
+static int property_get_io_device_weight(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupContext *c = userdata;
+ CGroupIODeviceWeight *w;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(device_weights, w, c->io_device_weights) {
+ r = sd_bus_message_append(reply, "(st)", w->path, w->weight);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_io_device_limits(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupContext *c = userdata;
+ CGroupIODeviceLimit *l;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(device_limits, l, c->io_device_limits) {
+ CGroupIOLimitType type;
+
+ type = cgroup_io_limit_type_from_string(property);
+ if (type < 0 || l->limits[type] == cgroup_io_limit_defaults[type])
+ continue;
+
+ r = sd_bus_message_append(reply, "(st)", l->path, l->limits[type]);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_io_device_latency(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupContext *c = userdata;
+ CGroupIODeviceLatency *l;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(device_latencies, l, c->io_device_latencies) {
+ r = sd_bus_message_append(reply, "(st)", l->path, l->target_usec);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_blockio_device_weight(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupContext *c = userdata;
+ CGroupBlockIODeviceWeight *w;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
+ r = sd_bus_message_append(reply, "(st)", w->path, w->weight);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_blockio_device_bandwidths(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupContext *c = userdata;
+ CGroupBlockIODeviceBandwidth *b;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
+ uint64_t v;
+
+ if (streq(property, "BlockIOReadBandwidth"))
+ v = b->rbps;
+ else
+ v = b->wbps;
+
+ if (v == CGROUP_LIMIT_MAX)
+ continue;
+
+ r = sd_bus_message_append(reply, "(st)", b->path, v);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_device_allow(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupContext *c = userdata;
+ CGroupDeviceAllow *a;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(device_allow, a, c->device_allow) {
+ unsigned k = 0;
+ char rwm[4];
+
+ if (a->r)
+ rwm[k++] = 'r';
+ if (a->w)
+ rwm[k++] = 'w';
+ if (a->m)
+ rwm[k++] = 'm';
+
+ rwm[k] = 0;
+
+ r = sd_bus_message_append(reply, "(ss)", a->path, rwm);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_ip_address_access(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ IPAddressAccessItem** items = userdata, *i;
+ int r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(iayu)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(items, i, *items) {
+
+ r = sd_bus_message_open_container(reply, 'r', "iayu");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "i", i->family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', &i->address, FAMILY_ADDRESS_SIZE(i->family));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "u", (uint32_t) i->prefixlen);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+const sd_bus_vtable bus_cgroup_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
+ SD_BUS_PROPERTY("DelegateControllers", "as", property_get_delegate_controllers, 0, 0),
+ SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0),
+ SD_BUS_PROPERTY("CPUWeight", "t", NULL, offsetof(CGroupContext, cpu_weight), 0),
+ SD_BUS_PROPERTY("StartupCPUWeight", "t", NULL, offsetof(CGroupContext, startup_cpu_weight), 0),
+ SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0),
+ SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0),
+ SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0),
+ SD_BUS_PROPERTY("CPUQuotaPeriodUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_period_usec), 0),
+ SD_BUS_PROPERTY("AllowedCPUs", "ay", property_get_cpuset, offsetof(CGroupContext, cpuset_cpus), 0),
+ SD_BUS_PROPERTY("AllowedMemoryNodes", "ay", property_get_cpuset, offsetof(CGroupContext, cpuset_mems), 0),
+ SD_BUS_PROPERTY("IOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, io_accounting), 0),
+ SD_BUS_PROPERTY("IOWeight", "t", NULL, offsetof(CGroupContext, io_weight), 0),
+ SD_BUS_PROPERTY("StartupIOWeight", "t", NULL, offsetof(CGroupContext, startup_io_weight), 0),
+ SD_BUS_PROPERTY("IODeviceWeight", "a(st)", property_get_io_device_weight, 0, 0),
+ SD_BUS_PROPERTY("IOReadBandwidthMax", "a(st)", property_get_io_device_limits, 0, 0),
+ SD_BUS_PROPERTY("IOWriteBandwidthMax", "a(st)", property_get_io_device_limits, 0, 0),
+ SD_BUS_PROPERTY("IOReadIOPSMax", "a(st)", property_get_io_device_limits, 0, 0),
+ SD_BUS_PROPERTY("IOWriteIOPSMax", "a(st)", property_get_io_device_limits, 0, 0),
+ SD_BUS_PROPERTY("IODeviceLatencyTargetUSec", "a(st)", property_get_io_device_latency, 0, 0),
+ SD_BUS_PROPERTY("BlockIOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, blockio_accounting), 0),
+ SD_BUS_PROPERTY("BlockIOWeight", "t", NULL, offsetof(CGroupContext, blockio_weight), 0),
+ SD_BUS_PROPERTY("StartupBlockIOWeight", "t", NULL, offsetof(CGroupContext, startup_blockio_weight), 0),
+ SD_BUS_PROPERTY("BlockIODeviceWeight", "a(st)", property_get_blockio_device_weight, 0, 0),
+ SD_BUS_PROPERTY("BlockIOReadBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0),
+ SD_BUS_PROPERTY("BlockIOWriteBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0),
+ SD_BUS_PROPERTY("MemoryAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, memory_accounting), 0),
+ SD_BUS_PROPERTY("DefaultMemoryLow", "t", NULL, offsetof(CGroupContext, default_memory_low), 0),
+ SD_BUS_PROPERTY("DefaultMemoryMin", "t", NULL, offsetof(CGroupContext, default_memory_min), 0),
+ SD_BUS_PROPERTY("MemoryMin", "t", NULL, offsetof(CGroupContext, memory_min), 0),
+ SD_BUS_PROPERTY("MemoryLow", "t", NULL, offsetof(CGroupContext, memory_low), 0),
+ SD_BUS_PROPERTY("MemoryHigh", "t", NULL, offsetof(CGroupContext, memory_high), 0),
+ SD_BUS_PROPERTY("MemoryMax", "t", NULL, offsetof(CGroupContext, memory_max), 0),
+ SD_BUS_PROPERTY("MemorySwapMax", "t", NULL, offsetof(CGroupContext, memory_swap_max), 0),
+ SD_BUS_PROPERTY("MemoryLimit", "t", NULL, offsetof(CGroupContext, memory_limit), 0),
+ SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0),
+ SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0),
+ SD_BUS_PROPERTY("TasksAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, tasks_accounting), 0),
+ SD_BUS_PROPERTY("TasksMax", "t", bus_property_get_tasks_max, offsetof(CGroupContext, tasks_max), 0),
+ SD_BUS_PROPERTY("IPAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, ip_accounting), 0),
+ SD_BUS_PROPERTY("IPAddressAllow", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_allow), 0),
+ SD_BUS_PROPERTY("IPAddressDeny", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_deny), 0),
+ SD_BUS_PROPERTY("IPIngressFilterPath", "as", NULL, offsetof(CGroupContext, ip_filters_ingress), 0),
+ SD_BUS_PROPERTY("IPEgressFilterPath", "as", NULL, offsetof(CGroupContext, ip_filters_egress), 0),
+ SD_BUS_PROPERTY("DisableControllers", "as", property_get_cgroup_mask, offsetof(CGroupContext, disable_controllers), 0),
+ SD_BUS_PROPERTY("ManagedOOMSwap", "s", property_get_managed_oom_mode, offsetof(CGroupContext, moom_swap), 0),
+ SD_BUS_PROPERTY("ManagedOOMMemoryPressure", "s", property_get_managed_oom_mode, offsetof(CGroupContext, moom_mem_pressure), 0),
+ SD_BUS_PROPERTY("ManagedOOMMemoryPressureLimitPercent", "s", bus_property_get_percent, offsetof(CGroupContext, moom_mem_pressure_limit), 0),
+ SD_BUS_VTABLE_END
+};
+
+static int bus_cgroup_set_transient_property(
+ Unit *u,
+ CGroupContext *c,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(u);
+ assert(c);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "Delegate")) {
+ int b;
+
+ if (!UNIT_VTABLE(u)->can_delegate)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Delegation not available for unit type");
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->delegate = b;
+ c->delegate_controllers = b ? _CGROUP_MASK_ALL : 0;
+
+ unit_write_settingf(u, flags, name, "Delegate=%s", yes_no(b));
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "DelegateControllers", "DisableControllers")) {
+ CGroupMask mask = 0;
+
+ if (streq(name, "DelegateControllers") && !UNIT_VTABLE(u)->can_delegate)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Delegation not available for unit type");
+
+ r = sd_bus_message_enter_container(message, 'a', "s");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ CGroupController cc;
+ const char *t;
+
+ r = sd_bus_message_read(message, "s", &t);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ cc = cgroup_controller_from_string(t);
+ if (cc < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown cgroup controller '%s'", t);
+
+ mask |= CGROUP_CONTROLLER_TO_MASK(cc);
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *t = NULL;
+
+ r = cg_mask_to_string(mask, &t);
+ if (r < 0)
+ return r;
+
+ if (streq(name, "DelegateControllers")) {
+
+ c->delegate = true;
+ if (mask == 0)
+ c->delegate_controllers = 0;
+ else
+ c->delegate_controllers |= mask;
+
+ unit_write_settingf(u, flags, name, "Delegate=%s", strempty(t));
+
+ } else if (streq(name, "DisableControllers")) {
+
+ if (mask == 0)
+ c->disable_controllers = 0;
+ else
+ c->disable_controllers |= mask;
+
+ unit_write_settingf(u, flags, name, "%s=%s", name, strempty(t));
+ }
+ }
+
+ return 1;
+ } else if (STR_IN_SET(name, "IPIngressFilterPath", "IPEgressFilterPath")) {
+ char ***filters;
+ size_t n = 0;
+
+ filters = streq(name, "IPIngressFilterPath") ? &c->ip_filters_ingress : &c->ip_filters_egress;
+ r = sd_bus_message_enter_container(message, 'a', "s");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *path;
+
+ r = sd_bus_message_read(message, "s", &path);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (!path_is_normalized(path) || !path_is_absolute(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "%s= expects a normalized absolute path.", name);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags) && !strv_contains(*filters, path)) {
+ r = strv_extend(filters, path);
+ if (r < 0)
+ return log_oom();
+ }
+ n++;
+ }
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ char **entry;
+ size_t size = 0;
+
+ if (n == 0)
+ *filters = strv_free(*filters);
+
+ unit_invalidate_cgroup_bpf(u);
+ f = open_memstream_unlocked(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ fputs(name, f);
+ fputs("=\n", f);
+
+ STRV_FOREACH(entry, *filters)
+ fprintf(f, "%s=%s\n", name, *entry);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ unit_write_setting(u, flags, name, buf);
+
+ if (*filters) {
+ r = bpf_firewall_supported();
+ if (r < 0)
+ return r;
+ if (r != BPF_FIREWALL_SUPPORTED_WITH_MULTI) {
+ static bool warned = false;
+
+ log_full(warned ? LOG_DEBUG : LOG_WARNING,
+ "Transient unit %s configures an IP firewall with BPF, but the local system does not support BPF/cgroup firewalling with multiple filters.\n"
+ "Starting this unit will fail! (This warning is only shown for the first started transient unit using IP firewalling.)", u->id);
+ warned = true;
+ }
+ }
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_cgroup_set_boolean(
+ Unit *u,
+ const char *name,
+ bool *p,
+ CGroupMask mask,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ int b, r;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ *p = b;
+ unit_invalidate_cgroup(u, mask);
+ unit_write_settingf(u, flags, name, "%s=%s", name, yes_no(b));
+ }
+
+ return 1;
+}
+
+#define BUS_DEFINE_SET_CGROUP_WEIGHT(function, mask, check, val) \
+ static int bus_cgroup_set_##function( \
+ Unit *u, \
+ const char *name, \
+ uint64_t *p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ uint64_t v; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, "t", &v); \
+ if (r < 0) \
+ return r; \
+ \
+ if (!check(v)) \
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, \
+ "Value specified in %s is out of range", name); \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ *p = v; \
+ unit_invalidate_cgroup(u, mask); \
+ \
+ if (v == (val)) \
+ unit_write_settingf(u, flags, name, \
+ "%s=", name); \
+ else \
+ unit_write_settingf(u, flags, name, \
+ "%s=%" PRIu64, name, v); \
+ } \
+ \
+ return 1; \
+ }
+
+#define BUS_DEFINE_SET_CGROUP_LIMIT(function, mask, scale, minimum) \
+ static int bus_cgroup_set_##function( \
+ Unit *u, \
+ const char *name, \
+ uint64_t *p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ uint64_t v; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, "t", &v); \
+ if (r < 0) \
+ return r; \
+ \
+ if (v < minimum) \
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, \
+ "Value specified in %s is out of range", name); \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ *p = v; \
+ unit_invalidate_cgroup(u, mask); \
+ \
+ if (v == CGROUP_LIMIT_MAX) \
+ unit_write_settingf(u, flags, name, \
+ "%s=infinity", name); \
+ else \
+ unit_write_settingf(u, flags, name, \
+ "%s=%" PRIu64, name, v); \
+ } \
+ \
+ return 1; \
+ } \
+ static int bus_cgroup_set_##function##_scale( \
+ Unit *u, \
+ const char *name, \
+ uint64_t *p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ uint64_t v; \
+ uint32_t raw; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, "u", &raw); \
+ if (r < 0) \
+ return r; \
+ \
+ v = scale(raw, UINT32_MAX); \
+ if (v < minimum || v >= UINT64_MAX) \
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, \
+ "Value specified in %s is out of range", name); \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ *p = v; \
+ unit_invalidate_cgroup(u, mask); \
+ \
+ /* Prepare to chop off suffix */ \
+ assert_se(endswith(name, "Scale")); \
+ \
+ uint32_t scaled = DIV_ROUND_UP((uint64_t) raw * 1000, (uint64_t) UINT32_MAX); \
+ unit_write_settingf(u, flags, name, "%.*s=%" PRIu32 ".%" PRIu32 "%%", \
+ (int)(strlen(name) - strlen("Scale")), name, \
+ scaled / 10, scaled % 10); \
+ } \
+ \
+ return 1; \
+ }
+
+DISABLE_WARNING_TYPE_LIMITS;
+BUS_DEFINE_SET_CGROUP_WEIGHT(cpu_weight, CGROUP_MASK_CPU, CGROUP_WEIGHT_IS_OK, CGROUP_WEIGHT_INVALID);
+BUS_DEFINE_SET_CGROUP_WEIGHT(cpu_shares, CGROUP_MASK_CPU, CGROUP_CPU_SHARES_IS_OK, CGROUP_CPU_SHARES_INVALID);
+BUS_DEFINE_SET_CGROUP_WEIGHT(io_weight, CGROUP_MASK_IO, CGROUP_WEIGHT_IS_OK, CGROUP_WEIGHT_INVALID);
+BUS_DEFINE_SET_CGROUP_WEIGHT(blockio_weight, CGROUP_MASK_BLKIO, CGROUP_BLKIO_WEIGHT_IS_OK, CGROUP_BLKIO_WEIGHT_INVALID);
+BUS_DEFINE_SET_CGROUP_LIMIT(memory, CGROUP_MASK_MEMORY, physical_memory_scale, 1);
+BUS_DEFINE_SET_CGROUP_LIMIT(memory_protection, CGROUP_MASK_MEMORY, physical_memory_scale, 0);
+BUS_DEFINE_SET_CGROUP_LIMIT(swap, CGROUP_MASK_MEMORY, physical_memory_scale, 0);
+REENABLE_WARNING;
+
+static int bus_cgroup_set_tasks_max(
+ Unit *u,
+ const char *name,
+ TasksMax *p,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ uint64_t v;
+ int r;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "t", &v);
+ if (r < 0)
+ return r;
+
+ if (v < 1)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Value specified in %s is out of range", name);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ *p = (TasksMax) { .value = v, .scale = 0 }; /* When .scale==0, .value is the absolute value */
+ unit_invalidate_cgroup(u, CGROUP_MASK_PIDS);
+
+ if (v == CGROUP_LIMIT_MAX)
+ unit_write_settingf(u, flags, name,
+ "%s=infinity", name);
+ else
+ unit_write_settingf(u, flags, name,
+ "%s=%" PRIu64, name, v);
+ }
+
+ return 1;
+}
+
+static int bus_cgroup_set_tasks_max_scale(
+ Unit *u,
+ const char *name,
+ TasksMax *p,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ uint32_t v;
+ int r;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "u", &v);
+ if (r < 0)
+ return r;
+
+ if (v < 1 || v >= UINT32_MAX)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Value specified in %s is out of range", name);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ *p = (TasksMax) { v, UINT32_MAX }; /* .scale is not 0, so this is interpreted as v/UINT32_MAX. */
+ unit_invalidate_cgroup(u, CGROUP_MASK_PIDS);
+
+ uint32_t scaled = DIV_ROUND_UP((uint64_t) v * 100U, (uint64_t) UINT32_MAX);
+ unit_write_settingf(u, flags, name, "%s=%" PRIu32 ".%" PRIu32 "%%", "TasksMax",
+ scaled / 10, scaled % 10);
+ }
+
+ return 1;
+}
+
+int bus_cgroup_set_property(
+ Unit *u,
+ CGroupContext *c,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ CGroupIOLimitType iol_type;
+ int r;
+
+ assert(u);
+ assert(c);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "CPUAccounting"))
+ return bus_cgroup_set_boolean(u, name, &c->cpu_accounting, get_cpu_accounting_mask(), message, flags, error);
+
+ if (streq(name, "CPUWeight"))
+ return bus_cgroup_set_cpu_weight(u, name, &c->cpu_weight, message, flags, error);
+
+ if (streq(name, "StartupCPUWeight"))
+ return bus_cgroup_set_cpu_weight(u, name, &c->startup_cpu_weight, message, flags, error);
+
+ if (streq(name, "CPUShares"))
+ return bus_cgroup_set_cpu_shares(u, name, &c->cpu_shares, message, flags, error);
+
+ if (streq(name, "StartupCPUShares"))
+ return bus_cgroup_set_cpu_shares(u, name, &c->startup_cpu_shares, message, flags, error);
+
+ if (streq(name, "IOAccounting"))
+ return bus_cgroup_set_boolean(u, name, &c->io_accounting, CGROUP_MASK_IO, message, flags, error);
+
+ if (streq(name, "IOWeight"))
+ return bus_cgroup_set_io_weight(u, name, &c->io_weight, message, flags, error);
+
+ if (streq(name, "StartupIOWeight"))
+ return bus_cgroup_set_io_weight(u, name, &c->startup_io_weight, message, flags, error);
+
+ if (streq(name, "BlockIOAccounting"))
+ return bus_cgroup_set_boolean(u, name, &c->blockio_accounting, CGROUP_MASK_BLKIO, message, flags, error);
+
+ if (streq(name, "BlockIOWeight"))
+ return bus_cgroup_set_blockio_weight(u, name, &c->blockio_weight, message, flags, error);
+
+ if (streq(name, "StartupBlockIOWeight"))
+ return bus_cgroup_set_blockio_weight(u, name, &c->startup_blockio_weight, message, flags, error);
+
+ if (streq(name, "MemoryAccounting"))
+ return bus_cgroup_set_boolean(u, name, &c->memory_accounting, CGROUP_MASK_MEMORY, message, flags, error);
+
+ if (streq(name, "MemoryMin")) {
+ r = bus_cgroup_set_memory_protection(u, name, &c->memory_min, message, flags, error);
+ if (r > 0)
+ c->memory_min_set = true;
+ return r;
+ }
+
+ if (streq(name, "MemoryLow")) {
+ r = bus_cgroup_set_memory_protection(u, name, &c->memory_low, message, flags, error);
+ if (r > 0)
+ c->memory_low_set = true;
+ return r;
+ }
+
+ if (streq(name, "DefaultMemoryMin")) {
+ r = bus_cgroup_set_memory_protection(u, name, &c->default_memory_min, message, flags, error);
+ if (r > 0)
+ c->default_memory_min_set = true;
+ return r;
+ }
+
+ if (streq(name, "DefaultMemoryLow")) {
+ r = bus_cgroup_set_memory_protection(u, name, &c->default_memory_low, message, flags, error);
+ if (r > 0)
+ c->default_memory_low_set = true;
+ return r;
+ }
+
+ if (streq(name, "MemoryHigh"))
+ return bus_cgroup_set_memory(u, name, &c->memory_high, message, flags, error);
+
+ if (streq(name, "MemorySwapMax"))
+ return bus_cgroup_set_swap(u, name, &c->memory_swap_max, message, flags, error);
+
+ if (streq(name, "MemoryMax"))
+ return bus_cgroup_set_memory(u, name, &c->memory_max, message, flags, error);
+
+ if (streq(name, "MemoryLimit"))
+ return bus_cgroup_set_memory(u, name, &c->memory_limit, message, flags, error);
+
+ if (streq(name, "MemoryMinScale")) {
+ r = bus_cgroup_set_memory_protection_scale(u, name, &c->memory_min, message, flags, error);
+ if (r > 0)
+ c->memory_min_set = true;
+ return r;
+ }
+
+ if (streq(name, "MemoryLowScale")) {
+ r = bus_cgroup_set_memory_protection_scale(u, name, &c->memory_low, message, flags, error);
+ if (r > 0)
+ c->memory_low_set = true;
+ return r;
+ }
+
+ if (streq(name, "DefaultMemoryMinScale")) {
+ r = bus_cgroup_set_memory_protection_scale(u, name, &c->default_memory_min, message, flags, error);
+ if (r > 0)
+ c->default_memory_min_set = true;
+ return r;
+ }
+
+ if (streq(name, "DefaultMemoryLowScale")) {
+ r = bus_cgroup_set_memory_protection_scale(u, name, &c->default_memory_low, message, flags, error);
+ if (r > 0)
+ c->default_memory_low_set = true;
+ return r;
+ }
+
+ if (streq(name, "MemoryHighScale"))
+ return bus_cgroup_set_memory_scale(u, name, &c->memory_high, message, flags, error);
+
+ if (streq(name, "MemorySwapMaxScale"))
+ return bus_cgroup_set_swap_scale(u, name, &c->memory_swap_max, message, flags, error);
+
+ if (streq(name, "MemoryMaxScale"))
+ return bus_cgroup_set_memory_scale(u, name, &c->memory_max, message, flags, error);
+
+ if (streq(name, "MemoryLimitScale"))
+ return bus_cgroup_set_memory_scale(u, name, &c->memory_limit, message, flags, error);
+
+ if (streq(name, "TasksAccounting"))
+ return bus_cgroup_set_boolean(u, name, &c->tasks_accounting, CGROUP_MASK_PIDS, message, flags, error);
+
+ if (streq(name, "TasksMax"))
+ return bus_cgroup_set_tasks_max(u, name, &c->tasks_max, message, flags, error);
+
+ if (streq(name, "TasksMaxScale"))
+ return bus_cgroup_set_tasks_max_scale(u, name, &c->tasks_max, message, flags, error);
+
+ if (streq(name, "CPUQuotaPerSecUSec")) {
+ uint64_t u64;
+
+ r = sd_bus_message_read(message, "t", &u64);
+ if (r < 0)
+ return r;
+
+ if (u64 <= 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "CPUQuotaPerSecUSec= value out of range");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->cpu_quota_per_sec_usec = u64;
+ u->warned_clamping_cpu_quota_period = false;
+ unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
+
+ if (c->cpu_quota_per_sec_usec == USEC_INFINITY)
+ unit_write_setting(u, flags, "CPUQuota", "CPUQuota=");
+ else
+ /* config_parse_cpu_quota() requires an integer, so truncating division is used on
+ * purpose here. */
+ unit_write_settingf(u, flags, "CPUQuota",
+ "CPUQuota=%0.f%%",
+ (double) (c->cpu_quota_per_sec_usec / 10000));
+ }
+
+ return 1;
+
+ } else if (streq(name, "CPUQuotaPeriodUSec")) {
+ uint64_t u64;
+
+ r = sd_bus_message_read(message, "t", &u64);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->cpu_quota_period_usec = u64;
+ u->warned_clamping_cpu_quota_period = false;
+ unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
+ if (c->cpu_quota_period_usec == USEC_INFINITY)
+ unit_write_setting(u, flags, "CPUQuotaPeriodSec", "CPUQuotaPeriodSec=");
+ else {
+ char v[FORMAT_TIMESPAN_MAX];
+ unit_write_settingf(u, flags, "CPUQuotaPeriodSec",
+ "CPUQuotaPeriodSec=%s",
+ format_timespan(v, sizeof(v), c->cpu_quota_period_usec, 1));
+ }
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "AllowedCPUs", "AllowedMemoryNodes")) {
+ const void *a;
+ size_t n;
+ _cleanup_(cpu_set_reset) CPUSet new_set = {};
+
+ r = sd_bus_message_read_array(message, 'y', &a, &n);
+ if (r < 0)
+ return r;
+
+ r = cpu_set_from_dbus(a, n, &new_set);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *setstr = NULL;
+ CPUSet *set;
+
+ setstr = cpu_set_to_range_string(&new_set);
+ if (!setstr)
+ return -ENOMEM;
+
+ if (streq(name, "AllowedCPUs"))
+ set = &c->cpuset_cpus;
+ else
+ set = &c->cpuset_mems;
+
+ cpu_set_reset(set);
+ *set = new_set;
+ new_set = (CPUSet) {};
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_CPUSET);
+ unit_write_settingf(u, flags, name, "%s=%s", name, setstr);
+ }
+
+ return 1;
+
+ } else if ((iol_type = cgroup_io_limit_type_from_string(name)) >= 0) {
+ const char *path;
+ unsigned n = 0;
+ uint64_t u64;
+
+ r = sd_bus_message_enter_container(message, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(st)", &path, &u64)) > 0) {
+
+ if (!path_is_normalized(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path '%s' specified in %s= is not normalized.", name, path);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ CGroupIODeviceLimit *a = NULL, *b;
+
+ LIST_FOREACH(device_limits, b, c->io_device_limits) {
+ if (path_equal(path, b->path)) {
+ a = b;
+ break;
+ }
+ }
+
+ if (!a) {
+ CGroupIOLimitType type;
+
+ a = new0(CGroupIODeviceLimit, 1);
+ if (!a)
+ return -ENOMEM;
+
+ a->path = strdup(path);
+ if (!a->path) {
+ free(a);
+ return -ENOMEM;
+ }
+
+ for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++)
+ a->limits[type] = cgroup_io_limit_defaults[type];
+
+ LIST_PREPEND(device_limits, c->io_device_limits, a);
+ }
+
+ a->limits[iol_type] = u64;
+ }
+
+ n++;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ CGroupIODeviceLimit *a;
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ size_t size = 0;
+
+ if (n == 0) {
+ LIST_FOREACH(device_limits, a, c->io_device_limits)
+ a->limits[iol_type] = cgroup_io_limit_defaults[iol_type];
+ }
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_IO);
+
+ f = open_memstream_unlocked(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ fprintf(f, "%s=\n", name);
+ LIST_FOREACH(device_limits, a, c->io_device_limits)
+ if (a->limits[iol_type] != cgroup_io_limit_defaults[iol_type])
+ fprintf(f, "%s=%s %" PRIu64 "\n", name, a->path, a->limits[iol_type]);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+ unit_write_setting(u, flags, name, buf);
+ }
+
+ return 1;
+
+ } else if (streq(name, "IODeviceWeight")) {
+ const char *path;
+ uint64_t weight;
+ unsigned n = 0;
+
+ r = sd_bus_message_enter_container(message, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(st)", &path, &weight)) > 0) {
+
+ if (!path_is_normalized(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path '%s' specified in %s= is not normalized.", name, path);
+
+ if (!CGROUP_WEIGHT_IS_OK(weight) || weight == CGROUP_WEIGHT_INVALID)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "IODeviceWeight= value out of range");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ CGroupIODeviceWeight *a = NULL, *b;
+
+ LIST_FOREACH(device_weights, b, c->io_device_weights) {
+ if (path_equal(b->path, path)) {
+ a = b;
+ break;
+ }
+ }
+
+ if (!a) {
+ a = new0(CGroupIODeviceWeight, 1);
+ if (!a)
+ return -ENOMEM;
+
+ a->path = strdup(path);
+ if (!a->path) {
+ free(a);
+ return -ENOMEM;
+ }
+ LIST_PREPEND(device_weights, c->io_device_weights, a);
+ }
+
+ a->weight = weight;
+ }
+
+ n++;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ CGroupIODeviceWeight *a;
+ size_t size = 0;
+
+ if (n == 0) {
+ while (c->io_device_weights)
+ cgroup_context_free_io_device_weight(c, c->io_device_weights);
+ }
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_IO);
+
+ f = open_memstream_unlocked(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ fputs("IODeviceWeight=\n", f);
+ LIST_FOREACH(device_weights, a, c->io_device_weights)
+ fprintf(f, "IODeviceWeight=%s %" PRIu64 "\n", a->path, a->weight);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+ unit_write_setting(u, flags, name, buf);
+ }
+
+ return 1;
+
+ } else if (streq(name, "IODeviceLatencyTargetUSec")) {
+ const char *path;
+ uint64_t target;
+ unsigned n = 0;
+
+ r = sd_bus_message_enter_container(message, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(st)", &path, &target)) > 0) {
+
+ if (!path_is_normalized(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path '%s' specified in %s= is not normalized.", name, path);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ CGroupIODeviceLatency *a = NULL, *b;
+
+ LIST_FOREACH(device_latencies, b, c->io_device_latencies) {
+ if (path_equal(b->path, path)) {
+ a = b;
+ break;
+ }
+ }
+
+ if (!a) {
+ a = new0(CGroupIODeviceLatency, 1);
+ if (!a)
+ return -ENOMEM;
+
+ a->path = strdup(path);
+ if (!a->path) {
+ free(a);
+ return -ENOMEM;
+ }
+ LIST_PREPEND(device_latencies, c->io_device_latencies, a);
+ }
+
+ a->target_usec = target;
+ }
+
+ n++;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ char ts[FORMAT_TIMESPAN_MAX];
+ CGroupIODeviceLatency *a;
+ size_t size = 0;
+
+ if (n == 0) {
+ while (c->io_device_latencies)
+ cgroup_context_free_io_device_latency(c, c->io_device_latencies);
+ }
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_IO);
+
+ f = open_memstream_unlocked(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ fputs("IODeviceLatencyTargetSec=\n", f);
+ LIST_FOREACH(device_latencies, a, c->io_device_latencies)
+ fprintf(f, "IODeviceLatencyTargetSec=%s %s\n",
+ a->path, format_timespan(ts, sizeof(ts), a->target_usec, 1));
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+ unit_write_setting(u, flags, name, buf);
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) {
+ const char *path;
+ bool read = true;
+ unsigned n = 0;
+ uint64_t u64;
+
+ if (streq(name, "BlockIOWriteBandwidth"))
+ read = false;
+
+ r = sd_bus_message_enter_container(message, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(st)", &path, &u64)) > 0) {
+
+ if (!path_is_normalized(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path '%s' specified in %s= is not normalized.", name, path);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ CGroupBlockIODeviceBandwidth *a = NULL, *b;
+
+ LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
+ if (path_equal(path, b->path)) {
+ a = b;
+ break;
+ }
+ }
+
+ if (!a) {
+ a = new0(CGroupBlockIODeviceBandwidth, 1);
+ if (!a)
+ return -ENOMEM;
+
+ a->rbps = CGROUP_LIMIT_MAX;
+ a->wbps = CGROUP_LIMIT_MAX;
+ a->path = strdup(path);
+ if (!a->path) {
+ free(a);
+ return -ENOMEM;
+ }
+
+ LIST_PREPEND(device_bandwidths, c->blockio_device_bandwidths, a);
+ }
+
+ if (read)
+ a->rbps = u64;
+ else
+ a->wbps = u64;
+ }
+
+ n++;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ CGroupBlockIODeviceBandwidth *a;
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ size_t size = 0;
+
+ if (n == 0) {
+ LIST_FOREACH(device_bandwidths, a, c->blockio_device_bandwidths) {
+ if (read)
+ a->rbps = CGROUP_LIMIT_MAX;
+ else
+ a->wbps = CGROUP_LIMIT_MAX;
+ }
+ }
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_BLKIO);
+
+ f = open_memstream_unlocked(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ if (read) {
+ fputs("BlockIOReadBandwidth=\n", f);
+ LIST_FOREACH(device_bandwidths, a, c->blockio_device_bandwidths)
+ if (a->rbps != CGROUP_LIMIT_MAX)
+ fprintf(f, "BlockIOReadBandwidth=%s %" PRIu64 "\n", a->path, a->rbps);
+ } else {
+ fputs("BlockIOWriteBandwidth=\n", f);
+ LIST_FOREACH(device_bandwidths, a, c->blockio_device_bandwidths)
+ if (a->wbps != CGROUP_LIMIT_MAX)
+ fprintf(f, "BlockIOWriteBandwidth=%s %" PRIu64 "\n", a->path, a->wbps);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ unit_write_setting(u, flags, name, buf);
+ }
+
+ return 1;
+
+ } else if (streq(name, "BlockIODeviceWeight")) {
+ const char *path;
+ uint64_t weight;
+ unsigned n = 0;
+
+ r = sd_bus_message_enter_container(message, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(st)", &path, &weight)) > 0) {
+
+ if (!path_is_normalized(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path '%s' specified in %s= is not normalized.", name, path);
+
+ if (!CGROUP_BLKIO_WEIGHT_IS_OK(weight) || weight == CGROUP_BLKIO_WEIGHT_INVALID)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "BlockIODeviceWeight= out of range");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ CGroupBlockIODeviceWeight *a = NULL, *b;
+
+ LIST_FOREACH(device_weights, b, c->blockio_device_weights) {
+ if (path_equal(b->path, path)) {
+ a = b;
+ break;
+ }
+ }
+
+ if (!a) {
+ a = new0(CGroupBlockIODeviceWeight, 1);
+ if (!a)
+ return -ENOMEM;
+
+ a->path = strdup(path);
+ if (!a->path) {
+ free(a);
+ return -ENOMEM;
+ }
+ LIST_PREPEND(device_weights, c->blockio_device_weights, a);
+ }
+
+ a->weight = weight;
+ }
+
+ n++;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ CGroupBlockIODeviceWeight *a;
+ size_t size = 0;
+
+ if (n == 0) {
+ while (c->blockio_device_weights)
+ cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
+ }
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_BLKIO);
+
+ f = open_memstream_unlocked(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ fputs("BlockIODeviceWeight=\n", f);
+ LIST_FOREACH(device_weights, a, c->blockio_device_weights)
+ fprintf(f, "BlockIODeviceWeight=%s %" PRIu64 "\n", a->path, a->weight);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ unit_write_setting(u, flags, name, buf);
+ }
+
+ return 1;
+
+ } else if (streq(name, "DevicePolicy")) {
+ const char *policy;
+ CGroupDevicePolicy p;
+
+ r = sd_bus_message_read(message, "s", &policy);
+ if (r < 0)
+ return r;
+
+ p = cgroup_device_policy_from_string(policy);
+ if (p < 0)
+ return -EINVAL;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->device_policy = p;
+ unit_invalidate_cgroup(u, CGROUP_MASK_DEVICES);
+ unit_write_settingf(u, flags, name, "DevicePolicy=%s", policy);
+ }
+
+ return 1;
+
+ } else if (streq(name, "DeviceAllow")) {
+ const char *path, *rwm;
+ unsigned n = 0;
+
+ r = sd_bus_message_enter_container(message, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(ss)", &path, &rwm)) > 0) {
+
+ if (!valid_device_allow_pattern(path) || strpbrk(path, WHITESPACE))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "DeviceAllow= requires device node or pattern");
+
+ if (isempty(rwm))
+ rwm = "rwm";
+ else if (!in_charset(rwm, "rwm"))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "DeviceAllow= requires combination of rwm flags");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ CGroupDeviceAllow *a = NULL, *b;
+
+ LIST_FOREACH(device_allow, b, c->device_allow) {
+ if (path_equal(b->path, path)) {
+ a = b;
+ break;
+ }
+ }
+
+ if (!a) {
+ a = new0(CGroupDeviceAllow, 1);
+ if (!a)
+ return -ENOMEM;
+
+ a->path = strdup(path);
+ if (!a->path) {
+ free(a);
+ return -ENOMEM;
+ }
+
+ LIST_PREPEND(device_allow, c->device_allow, a);
+ }
+
+ a->r = strchr(rwm, 'r');
+ a->w = strchr(rwm, 'w');
+ a->m = strchr(rwm, 'm');
+ }
+
+ n++;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ CGroupDeviceAllow *a;
+ size_t size = 0;
+
+ if (n == 0) {
+ while (c->device_allow)
+ cgroup_context_free_device_allow(c, c->device_allow);
+ }
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_DEVICES);
+
+ f = open_memstream_unlocked(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ fputs("DeviceAllow=\n", f);
+ LIST_FOREACH(device_allow, a, c->device_allow)
+ fprintf(f, "DeviceAllow=%s %s%s%s\n", a->path, a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+ unit_write_setting(u, flags, name, buf);
+ }
+
+ return 1;
+
+ } else if (streq(name, "IPAccounting")) {
+ int b;
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->ip_accounting = b;
+
+ unit_invalidate_cgroup_bpf(u);
+ unit_write_settingf(u, flags, name, "IPAccounting=%s", yes_no(b));
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "IPAddressAllow", "IPAddressDeny")) {
+ IPAddressAccessItem **list;
+ size_t n = 0;
+
+ list = streq(name, "IPAddressAllow") ? &c->ip_address_allow : &c->ip_address_deny;
+
+ r = sd_bus_message_enter_container(message, 'a', "(iayu)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const void *ap;
+ int32_t family;
+ uint32_t prefixlen;
+ size_t an;
+
+ r = sd_bus_message_enter_container(message, 'r', "iayu");
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_read(message, "i", &family);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "%s= expects IPv4 or IPv6 addresses only.", name);
+
+ r = sd_bus_message_read_array(message, 'y', &ap, &an);
+ if (r < 0)
+ return r;
+
+ if (an != FAMILY_ADDRESS_SIZE(family))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "IP address has wrong size for family (%s, expected %zu, got %zu)",
+ af_to_name(family), FAMILY_ADDRESS_SIZE(family), an);
+
+ r = sd_bus_message_read(message, "u", &prefixlen);
+ if (r < 0)
+ return r;
+
+ if (prefixlen > FAMILY_ADDRESS_SIZE(family)*8)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Prefix length %" PRIu32 " too large for address family %s.", prefixlen, af_to_name(family));
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ IPAddressAccessItem *item;
+
+ item = new0(IPAddressAccessItem, 1);
+ if (!item)
+ return -ENOMEM;
+
+ item->family = family;
+ item->prefixlen = prefixlen;
+ memcpy(&item->address, ap, an);
+
+ LIST_PREPEND(items, *list, item);
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ n++;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ *list = ip_address_access_reduce(*list);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ IPAddressAccessItem *item;
+ size_t size = 0;
+
+ if (n == 0)
+ *list = ip_address_access_free_all(*list);
+
+ unit_invalidate_cgroup_bpf(u);
+ f = open_memstream_unlocked(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ fputs(name, f);
+ fputs("=\n", f);
+
+ LIST_FOREACH(items, item, *list) {
+ char buffer[CONST_MAX(INET_ADDRSTRLEN, INET6_ADDRSTRLEN)];
+
+ errno = 0;
+ if (!inet_ntop(item->family, &item->address, buffer, sizeof(buffer)))
+ return errno_or_else(EINVAL);
+
+ fprintf(f, "%s=%s/%u\n", name, buffer, item->prefixlen);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ unit_write_setting(u, flags, name, buf);
+ }
+
+ return 1;
+ }
+
+ if (STR_IN_SET(name, "ManagedOOMSwap", "ManagedOOMMemoryPressure")) {
+ ManagedOOMMode *cgroup_mode = streq(name, "ManagedOOMSwap") ? &c->moom_swap : &c->moom_mem_pressure;
+ ManagedOOMMode m;
+ const char *mode;
+
+ if (!UNIT_VTABLE(u)->can_set_managed_oom)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Cannot set %s for this unit type", name);
+
+ r = sd_bus_message_read(message, "s", &mode);
+ if (r < 0)
+ return r;
+
+ m = managed_oom_mode_from_string(mode);
+ if (m < 0)
+ return -EINVAL;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ *cgroup_mode = m;
+ unit_write_settingf(u, flags, name, "%s=%s", name, mode);
+ }
+
+ (void) manager_varlink_send_managed_oom_update(u);
+ return 1;
+ }
+
+ if (streq(name, "ManagedOOMMemoryPressureLimitPercent")) {
+ if (!UNIT_VTABLE(u)->can_set_managed_oom)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Cannot set %s for this unit type", name);
+
+ r = bus_set_transient_percent(u, name, &c->moom_mem_pressure_limit, message, flags, error);
+ if (r < 0)
+ return r;
+
+ if (c->moom_mem_pressure == MANAGED_OOM_KILL)
+ (void) manager_varlink_send_managed_oom_update(u);
+
+ return 1;
+ }
+
+ if (streq(name, "DisableControllers") || (u->transient && u->load_state == UNIT_STUB))
+ return bus_cgroup_set_transient_property(u, c, name, message, flags, error);
+
+ return 0;
+}
diff --git a/src/core/dbus-cgroup.h b/src/core/dbus-cgroup.h
new file mode 100644
index 0000000..5bf45eb
--- /dev/null
+++ b/src/core/dbus-cgroup.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "unit.h"
+#include "cgroup.h"
+
+extern const sd_bus_vtable bus_cgroup_vtable[];
+
+int bus_property_get_tasks_max(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error);
+
+int bus_cgroup_set_property(Unit *u, CGroupContext *c, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
diff --git a/src/core/dbus-device.c b/src/core/dbus-device.c
new file mode 100644
index 0000000..b5e18d8
--- /dev/null
+++ b/src/core/dbus-device.c
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "dbus-device.h"
+#include "device.h"
+#include "unit.h"
+
+const sd_bus_vtable bus_device_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("SysFSPath", "s", NULL, offsetof(Device, sysfs), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_VTABLE_END
+};
diff --git a/src/core/dbus-device.h b/src/core/dbus-device.h
new file mode 100644
index 0000000..bfb5770
--- /dev/null
+++ b/src/core/dbus-device.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus-vtable.h"
+
+extern const sd_bus_vtable bus_device_vtable[];
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
new file mode 100644
index 0000000..0473535
--- /dev/null
+++ b/src/core/dbus-execute.c
@@ -0,0 +1,3459 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/mount.h>
+#include <sys/prctl.h>
+
+#if HAVE_SECCOMP
+#include <seccomp.h>
+#endif
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "bus-get-properties.h"
+#include "cap-list.h"
+#include "capability-util.h"
+#include "cpu-set-util.h"
+#include "dbus-execute.h"
+#include "dbus-util.h"
+#include "env-util.h"
+#include "errno-list.h"
+#include "escape.h"
+#include "execute.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hexdecoct.h"
+#include "io-util.h"
+#include "ioprio.h"
+#include "journal-file.h"
+#include "mountpoint-util.h"
+#include "namespace.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#if HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
+#include "securebits-util.h"
+#include "specifier.h"
+#include "strv.h"
+#include "syslog-util.h"
+#include "unit-printf.h"
+#include "user-util.h"
+#include "utf8.h"
+
+BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_exec_output, exec_output, ExecOutput);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_input, exec_input, ExecInput);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_utmp_mode, exec_utmp_mode, ExecUtmpMode);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_preserve_mode, exec_preserve_mode, ExecPreserveMode);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_keyring_mode, exec_keyring_mode, ExecKeyringMode);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_protect_proc, protect_proc, ProtectProc);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_proc_subset, proc_subset, ProcSubset);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_protect_home, protect_home, ProtectHome);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_protect_system, protect_system, ProtectSystem);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_personality, personality, unsigned long);
+static BUS_DEFINE_PROPERTY_GET(property_get_ioprio, "i", ExecContext, exec_context_get_effective_ioprio);
+static BUS_DEFINE_PROPERTY_GET(property_get_mount_apivfs, "b", ExecContext, exec_context_get_effective_mount_apivfs);
+static BUS_DEFINE_PROPERTY_GET2(property_get_ioprio_class, "i", ExecContext, exec_context_get_effective_ioprio, IOPRIO_PRIO_CLASS);
+static BUS_DEFINE_PROPERTY_GET2(property_get_ioprio_priority, "i", ExecContext, exec_context_get_effective_ioprio, IOPRIO_PRIO_DATA);
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_empty_string, "s", NULL);
+static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_level, "i", int, LOG_PRI);
+static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_facility, "i", int, LOG_FAC);
+static BUS_DEFINE_PROPERTY_GET(property_get_cpu_affinity_from_numa, "b", ExecContext, exec_context_get_cpu_affinity_from_numa);
+
+static int property_get_environment_files(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ char **j;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'a', "(sb)");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(j, c->environment_files) {
+ const char *fn = *j;
+
+ r = sd_bus_message_append(reply, "(sb)", fn[0] == '-' ? fn + 1 : fn, fn[0] == '-');
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_oom_score_adjust(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ int32_t n;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ if (c->oom_score_adjust_set)
+ n = c->oom_score_adjust;
+ else {
+ _cleanup_free_ char *t = NULL;
+
+ n = 0;
+ r = read_one_line_file("/proc/self/oom_score_adj", &t);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read /proc/self/oom_score_adj, ignoring: %m");
+ else {
+ r = safe_atoi32(t, &n);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse \"%s\" from /proc/self/oom_score_adj, ignoring: %m", t);
+ }
+ }
+
+ return sd_bus_message_append(reply, "i", n);
+}
+
+static int property_get_coredump_filter(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ uint64_t n;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ if (c->coredump_filter_set)
+ n = c->coredump_filter;
+ else {
+ _cleanup_free_ char *t = NULL;
+
+ n = COREDUMP_FILTER_MASK_DEFAULT;
+ r = read_one_line_file("/proc/self/coredump_filter", &t);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read /proc/self/coredump_filter, ignoring: %m");
+ else {
+ r = safe_atoux64(t, &n);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse \"%s\" from /proc/self/coredump_filter, ignoring: %m", t);
+ }
+ }
+
+ return sd_bus_message_append(reply, "t", n);
+}
+
+static int property_get_nice(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ int32_t n;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ if (c->nice_set)
+ n = c->nice;
+ else {
+ errno = 0;
+ n = getpriority(PRIO_PROCESS, 0);
+ if (errno > 0)
+ n = 0;
+ }
+
+ return sd_bus_message_append(reply, "i", n);
+}
+
+static int property_get_cpu_sched_policy(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ int32_t n;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ if (c->cpu_sched_set)
+ n = c->cpu_sched_policy;
+ else {
+ n = sched_getscheduler(0);
+ if (n < 0)
+ n = SCHED_OTHER;
+ }
+
+ return sd_bus_message_append(reply, "i", n);
+}
+
+static int property_get_cpu_sched_priority(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ int32_t n;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ if (c->cpu_sched_set)
+ n = c->cpu_sched_priority;
+ else {
+ struct sched_param p = {};
+
+ if (sched_getparam(0, &p) >= 0)
+ n = p.sched_priority;
+ else
+ n = 0;
+ }
+
+ return sd_bus_message_append(reply, "i", n);
+}
+
+static int property_get_cpu_affinity(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ _cleanup_(cpu_set_reset) CPUSet s = {};
+ _cleanup_free_ uint8_t *array = NULL;
+ size_t allocated;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ if (c->cpu_affinity_from_numa) {
+ int r;
+
+ r = numa_to_cpu_set(&c->numa_policy, &s);
+ if (r < 0)
+ return r;
+ }
+
+ (void) cpu_set_to_dbus(c->cpu_affinity_from_numa ? &s : &c->cpu_set, &array, &allocated);
+
+ return sd_bus_message_append_array(reply, 'y', array, allocated);
+}
+
+static int property_get_numa_mask(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ _cleanup_free_ uint8_t *array = NULL;
+ size_t allocated;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ (void) cpu_set_to_dbus(&c->numa_policy.nodes, &array, &allocated);
+
+ return sd_bus_message_append_array(reply, 'y', array, allocated);
+}
+
+static int property_get_numa_policy(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+ ExecContext *c = userdata;
+ int32_t policy;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ policy = numa_policy_get_type(&c->numa_policy);
+
+ return sd_bus_message_append_basic(reply, 'i', &policy);
+}
+
+static int property_get_timer_slack_nsec(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ uint64_t u;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ if (c->timer_slack_nsec != NSEC_INFINITY)
+ u = (uint64_t) c->timer_slack_nsec;
+ else
+ u = (uint64_t) prctl(PR_GET_TIMERSLACK);
+
+ return sd_bus_message_append(reply, "t", u);
+}
+
+static int property_get_syscall_filter(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ _cleanup_strv_free_ char **l = NULL;
+ int r;
+
+#if HAVE_SECCOMP
+ void *id, *val;
+#endif
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'r', "bas");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "b", c->syscall_allow_list);
+ if (r < 0)
+ return r;
+
+#if HAVE_SECCOMP
+ HASHMAP_FOREACH_KEY(val, id, c->syscall_filter) {
+ _cleanup_free_ char *name = NULL;
+ const char *e = NULL;
+ char *s;
+ int num = PTR_TO_INT(val);
+
+ name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
+ if (!name)
+ continue;
+
+ if (num >= 0) {
+ e = seccomp_errno_or_action_to_string(num);
+ if (e) {
+ s = strjoin(name, ":", e);
+ if (!s)
+ return -ENOMEM;
+ } else {
+ r = asprintf(&s, "%s:%d", name, num);
+ if (r < 0)
+ return -ENOMEM;
+ }
+ } else
+ s = TAKE_PTR(name);
+
+ r = strv_consume(&l, s);
+ if (r < 0)
+ return r;
+ }
+#endif
+
+ strv_sort(l);
+
+ r = sd_bus_message_append_strv(reply, l);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_syscall_log(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ _cleanup_strv_free_ char **l = NULL;
+ int r;
+
+#if HAVE_SECCOMP
+ void *id, *val;
+#endif
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'r', "bas");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "b", c->syscall_log_allow_list);
+ if (r < 0)
+ return r;
+
+#if HAVE_SECCOMP
+ HASHMAP_FOREACH_KEY(val, id, c->syscall_log) {
+ char *name = NULL;
+
+ name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
+ if (!name)
+ continue;
+
+ r = strv_consume(&l, name);
+ if (r < 0)
+ return r;
+ }
+#endif
+
+ strv_sort(l);
+
+ r = sd_bus_message_append_strv(reply, l);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_syscall_archs(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ _cleanup_strv_free_ char **l = NULL;
+ int r;
+
+#if HAVE_SECCOMP
+ void *id;
+#endif
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+#if HAVE_SECCOMP
+ SET_FOREACH(id, c->syscall_archs) {
+ const char *name;
+
+ name = seccomp_arch_to_string(PTR_TO_UINT32(id) - 1);
+ if (!name)
+ continue;
+
+ r = strv_extend(&l, name);
+ if (r < 0)
+ return -ENOMEM;
+ }
+#endif
+
+ strv_sort(l);
+
+ r = sd_bus_message_append_strv(reply, l);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int property_get_selinux_context(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ return sd_bus_message_append(reply, "(bs)", c->selinux_context_ignore, c->selinux_context);
+}
+
+static int property_get_apparmor_profile(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ return sd_bus_message_append(reply, "(bs)", c->apparmor_profile_ignore, c->apparmor_profile);
+}
+
+static int property_get_smack_process_label(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ return sd_bus_message_append(reply, "(bs)", c->smack_process_label_ignore, c->smack_process_label);
+}
+
+static int property_get_address_families(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ _cleanup_strv_free_ char **l = NULL;
+ void *af;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'r', "bas");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "b", c->address_families_allow_list);
+ if (r < 0)
+ return r;
+
+ SET_FOREACH(af, c->address_families) {
+ const char *name;
+
+ name = af_to_name(PTR_TO_INT(af));
+ if (!name)
+ continue;
+
+ r = strv_extend(&l, name);
+ if (r < 0)
+ return -ENOMEM;
+ }
+
+ strv_sort(l);
+
+ r = sd_bus_message_append_strv(reply, l);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_working_directory(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ const char *wd;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ if (c->working_directory_home)
+ wd = "~";
+ else
+ wd = c->working_directory;
+
+ if (c->working_directory_missing_ok)
+ wd = strjoina("!", wd);
+
+ return sd_bus_message_append(reply, "s", wd);
+}
+
+static int property_get_stdio_fdname(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ int fileno;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ if (streq(property, "StandardInputFileDescriptorName"))
+ fileno = STDIN_FILENO;
+ else if (streq(property, "StandardOutputFileDescriptorName"))
+ fileno = STDOUT_FILENO;
+ else {
+ assert(streq(property, "StandardErrorFileDescriptorName"));
+ fileno = STDERR_FILENO;
+ }
+
+ return sd_bus_message_append(reply, "s", exec_context_fdname(c, fileno));
+}
+
+static int property_get_input_data(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ return sd_bus_message_append_array(reply, 'y', c->stdin_data, c->stdin_data_size);
+}
+
+static int property_get_bind_paths(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ unsigned i;
+ bool ro;
+ int r;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ ro = strstr(property, "ReadOnly");
+
+ r = sd_bus_message_open_container(reply, 'a', "(ssbt)");
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < c->n_bind_mounts; i++) {
+
+ if (ro != c->bind_mounts[i].read_only)
+ continue;
+
+ r = sd_bus_message_append(
+ reply, "(ssbt)",
+ c->bind_mounts[i].source,
+ c->bind_mounts[i].destination,
+ c->bind_mounts[i].ignore_enoent,
+ c->bind_mounts[i].recursive ? (uint64_t) MS_REC : (uint64_t) 0);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_temporary_filesystems(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ unsigned i;
+ int r;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < c->n_temporary_filesystems; i++) {
+ TemporaryFileSystem *t = c->temporary_filesystems + i;
+
+ r = sd_bus_message_append(
+ reply, "(ss)",
+ t->path,
+ t->options);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_log_extra_fields(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ size_t i;
+ int r;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "ay");
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < c->n_log_extra_fields; i++) {
+ r = sd_bus_message_append_array(reply, 'y', c->log_extra_fields[i].iov_base, c->log_extra_fields[i].iov_len);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_set_credential(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ ExecSetCredential *sc;
+ int r;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "(say)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(sc, c->set_credentials) {
+
+ r = sd_bus_message_open_container(reply, 'r', "say");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", sc->id);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', sc->data, sc->size);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_load_credential(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ char **i, **j;
+ int r;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH_PAIR(i, j, c->load_credentials) {
+ r = sd_bus_message_append(reply, "(ss)", *i, *j);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_root_hash(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ return sd_bus_message_append_array(reply, 'y', c->root_hash, c->root_hash_size);
+}
+
+static int property_get_root_hash_sig(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ return sd_bus_message_append_array(reply, 'y', c->root_hash_sig, c->root_hash_sig_size);
+}
+
+static int property_get_root_image_options(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ MountOptions *m;
+ int r;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(mount_options, m, c->root_image_options) {
+ r = sd_bus_message_append(reply, "(ss)",
+ partition_designator_to_string(m->partition_designator),
+ m->options);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_mount_images(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ int r;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "(ssba(ss))");
+ if (r < 0)
+ return r;
+
+ for (size_t i = 0; i < c->n_mount_images; i++) {
+ MountOptions *m;
+
+ r = sd_bus_message_open_container(reply, SD_BUS_TYPE_STRUCT, "ssba(ss)");
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(
+ reply, "ssb",
+ c->mount_images[i].source,
+ c->mount_images[i].destination,
+ c->mount_images[i].ignore_enoent);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_open_container(reply, 'a', "(ss)");
+ if (r < 0)
+ return r;
+ LIST_FOREACH(mount_options, m, c->mount_images[i].mount_options) {
+ r = sd_bus_message_append(reply, "(ss)",
+ partition_designator_to_string(m->partition_designator),
+ m->options);
+ if (r < 0)
+ return r;
+ }
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+const sd_bus_vtable bus_exec_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(ExecContext, environment), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("EnvironmentFiles", "a(sb)", property_get_environment_files, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PassEnvironment", "as", NULL, offsetof(ExecContext, pass_environment), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("UnsetEnvironment", "as", NULL, offsetof(ExecContext, unset_environment), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("UMask", "u", bus_property_get_mode, offsetof(ExecContext, umask), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitCPU", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_CPU]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitCPUSoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_CPU]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitFSIZE", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_FSIZE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitFSIZESoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_FSIZE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitDATA", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_DATA]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitDATASoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_DATA]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitSTACK", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_STACK]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitSTACKSoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_STACK]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitCORE", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_CORE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitCORESoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_CORE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitRSS", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_RSS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitRSSSoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_RSS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitNOFILE", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_NOFILE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitNOFILESoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_NOFILE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitAS", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_AS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitASSoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_AS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitNPROC", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_NPROC]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitNPROCSoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_NPROC]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitMEMLOCK", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_MEMLOCK]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitMEMLOCKSoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_MEMLOCK]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitLOCKS", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_LOCKS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitLOCKSSoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_LOCKS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitSIGPENDING", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_SIGPENDING]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitSIGPENDINGSoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_SIGPENDING]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitMSGQUEUE", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_MSGQUEUE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitMSGQUEUESoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_MSGQUEUE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitNICE", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_NICE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitNICESoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_NICE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitRTPRIO", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_RTPRIO]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitRTPRIOSoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_RTPRIO]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitRTTIME", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LimitRTTIMESoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("WorkingDirectory", "s", property_get_working_directory, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RootDirectory", "s", NULL, offsetof(ExecContext, root_directory), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RootImage", "s", NULL, offsetof(ExecContext, root_image), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RootImageOptions", "a(ss)", property_get_root_image_options, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RootHash", "ay", property_get_root_hash, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RootHashPath", "s", NULL, offsetof(ExecContext, root_hash_path), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RootHashSignature", "ay", property_get_root_hash_sig, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RootHashSignaturePath", "s", NULL, offsetof(ExecContext, root_hash_sig_path), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RootVerity", "s", NULL, offsetof(ExecContext, root_verity), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MountImages", "a(ssba(ss))", property_get_mount_images, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("OOMScoreAdjust", "i", property_get_oom_score_adjust, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CoredumpFilter", "t", property_get_coredump_filter, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Nice", "i", property_get_nice, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("IOSchedulingClass", "i", property_get_ioprio_class, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("IOSchedulingPriority", "i", property_get_ioprio_priority, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CPUSchedulingPolicy", "i", property_get_cpu_sched_policy, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CPUSchedulingPriority", "i", property_get_cpu_sched_priority, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CPUAffinity", "ay", property_get_cpu_affinity, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CPUAffinityFromNUMA", "b", property_get_cpu_affinity_from_numa, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NUMAPolicy", "i", property_get_numa_policy, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NUMAMask", "ay", property_get_numa_mask, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CPUSchedulingResetOnFork", "b", bus_property_get_bool, offsetof(ExecContext, cpu_sched_reset_on_fork), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NonBlocking", "b", bus_property_get_bool, offsetof(ExecContext, non_blocking), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardInput", "s", property_get_exec_input, offsetof(ExecContext, std_input), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardInputFileDescriptorName", "s", property_get_stdio_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardInputData", "ay", property_get_input_data, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardOutput", "s", bus_property_get_exec_output, offsetof(ExecContext, std_output), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardOutputFileDescriptorName", "s", property_get_stdio_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardError", "s", bus_property_get_exec_output, offsetof(ExecContext, std_error), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardErrorFileDescriptorName", "s", property_get_stdio_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TTYPath", "s", NULL, offsetof(ExecContext, tty_path), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TTYReset", "b", bus_property_get_bool, offsetof(ExecContext, tty_reset), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TTYVHangup", "b", bus_property_get_bool, offsetof(ExecContext, tty_vhangup), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TTYVTDisallocate", "b", bus_property_get_bool, offsetof(ExecContext, tty_vt_disallocate), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SyslogPriority", "i", bus_property_get_int, offsetof(ExecContext, syslog_priority), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SyslogIdentifier", "s", NULL, offsetof(ExecContext, syslog_identifier), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SyslogLevelPrefix", "b", bus_property_get_bool, offsetof(ExecContext, syslog_level_prefix), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SyslogLevel", "i", property_get_syslog_level, offsetof(ExecContext, syslog_priority), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SyslogFacility", "i", property_get_syslog_facility, offsetof(ExecContext, syslog_priority), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LogLevelMax", "i", bus_property_get_int, offsetof(ExecContext, log_level_max), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LogRateLimitIntervalUSec", "t", bus_property_get_usec, offsetof(ExecContext, log_ratelimit_interval_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LogRateLimitBurst", "u", bus_property_get_unsigned, offsetof(ExecContext, log_ratelimit_burst), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LogExtraFields", "aay", property_get_log_extra_fields, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LogNamespace", "s", NULL, offsetof(ExecContext, log_namespace), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SecureBits", "i", bus_property_get_int, offsetof(ExecContext, secure_bits), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CapabilityBoundingSet", "t", NULL, offsetof(ExecContext, capability_bounding_set), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("AmbientCapabilities", "t", NULL, offsetof(ExecContext, capability_ambient_set), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("User", "s", NULL, offsetof(ExecContext, user), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Group", "s", NULL, offsetof(ExecContext, group), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DynamicUser", "b", bus_property_get_bool, offsetof(ExecContext, dynamic_user), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RemoveIPC", "b", bus_property_get_bool, offsetof(ExecContext, remove_ipc), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SetCredential", "a(say)", property_get_set_credential, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LoadCredential", "a(ss)", property_get_load_credential, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SupplementaryGroups", "as", NULL, offsetof(ExecContext, supplementary_groups), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PAMName", "s", NULL, offsetof(ExecContext, pam_name), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ReadWritePaths", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ReadOnlyPaths", "as", NULL, offsetof(ExecContext, read_only_paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("InaccessiblePaths", "as", NULL, offsetof(ExecContext, inaccessible_paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_flags), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectClock", "b", bus_property_get_bool, offsetof(ExecContext, protect_clock), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectKernelModules", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_modules), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectKernelLogs", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_logs), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PrivateMounts", "b", bus_property_get_bool, offsetof(ExecContext, private_mounts), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectHome", "s", property_get_protect_home, offsetof(ExecContext, protect_home), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectSystem", "s", property_get_protect_system, offsetof(ExecContext, protect_system), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SameProcessGroup", "b", bus_property_get_bool, offsetof(ExecContext, same_pgrp), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("UtmpIdentifier", "s", NULL, offsetof(ExecContext, utmp_id), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("UtmpMode", "s", property_get_exec_utmp_mode, offsetof(ExecContext, utmp_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SELinuxContext", "(bs)", property_get_selinux_context, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("AppArmorProfile", "(bs)", property_get_apparmor_profile, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SmackProcessLabel", "(bs)", property_get_smack_process_label, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("IgnoreSIGPIPE", "b", bus_property_get_bool, offsetof(ExecContext, ignore_sigpipe), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NoNewPrivileges", "b", bus_property_get_bool, offsetof(ExecContext, no_new_privileges), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SystemCallFilter", "(bas)", property_get_syscall_filter, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SystemCallArchitectures", "as", property_get_syscall_archs, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SystemCallErrorNumber", "i", bus_property_get_int, offsetof(ExecContext, syscall_errno), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SystemCallLog", "(bas)", property_get_syscall_log, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Personality", "s", property_get_personality, offsetof(ExecContext, personality), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LockPersonality", "b", bus_property_get_bool, offsetof(ExecContext, lock_personality), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RuntimeDirectoryPreserve", "s", property_get_exec_preserve_mode, offsetof(ExecContext, runtime_directory_preserve_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RuntimeDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, directories[EXEC_DIRECTORY_RUNTIME].mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RuntimeDirectory", "as", NULL, offsetof(ExecContext, directories[EXEC_DIRECTORY_RUNTIME].paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StateDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, directories[EXEC_DIRECTORY_STATE].mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StateDirectory", "as", NULL, offsetof(ExecContext, directories[EXEC_DIRECTORY_STATE].paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CacheDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, directories[EXEC_DIRECTORY_CACHE].mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CacheDirectory", "as", NULL, offsetof(ExecContext, directories[EXEC_DIRECTORY_CACHE].paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LogsDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, directories[EXEC_DIRECTORY_LOGS].mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LogsDirectory", "as", NULL, offsetof(ExecContext, directories[EXEC_DIRECTORY_LOGS].paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ConfigurationDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, directories[EXEC_DIRECTORY_CONFIGURATION].mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ConfigurationDirectory", "as", NULL, offsetof(ExecContext, directories[EXEC_DIRECTORY_CONFIGURATION].paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TimeoutCleanUSec", "t", bus_property_get_usec, offsetof(ExecContext, timeout_clean_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MemoryDenyWriteExecute", "b", bus_property_get_bool, offsetof(ExecContext, memory_deny_write_execute), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RestrictRealtime", "b", bus_property_get_bool, offsetof(ExecContext, restrict_realtime), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RestrictSUIDSGID", "b", bus_property_get_bool, offsetof(ExecContext, restrict_suid_sgid), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RestrictNamespaces", "t", bus_property_get_ulong, offsetof(ExecContext, restrict_namespaces), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("BindPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("BindReadOnlyPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TemporaryFileSystem", "a(ss)", property_get_temporary_filesystems, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MountAPIVFS", "b", property_get_mount_apivfs, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KeyringMode", "s", property_get_exec_keyring_mode, offsetof(ExecContext, keyring_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectProc", "s", property_get_protect_proc, offsetof(ExecContext, protect_proc), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProcSubset", "s", property_get_proc_subset, offsetof(ExecContext, proc_subset), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectHostname", "b", bus_property_get_bool, offsetof(ExecContext, protect_hostname), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NetworkNamespacePath", "s", NULL, offsetof(ExecContext, network_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST),
+
+ /* Obsolete/redundant properties: */
+ SD_BUS_PROPERTY("Capabilities", "s", property_get_empty_string, 0, SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("ReadWriteDirectories", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("ReadOnlyDirectories", "as", NULL, offsetof(ExecContext, read_only_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("InaccessibleDirectories", "as", NULL, offsetof(ExecContext, inaccessible_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("IOScheduling", "i", property_get_ioprio, 0, SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+
+ SD_BUS_VTABLE_END
+};
+
+static int append_exec_command(sd_bus_message *reply, ExecCommand *c) {
+ int r;
+
+ assert(reply);
+ assert(c);
+
+ if (!c->path)
+ return 0;
+
+ r = sd_bus_message_open_container(reply, 'r', "sasbttttuii");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", c->path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_strv(reply, c->argv);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "bttttuii",
+ !!(c->flags & EXEC_COMMAND_IGNORE_FAILURE),
+ c->exec_status.start_timestamp.realtime,
+ c->exec_status.start_timestamp.monotonic,
+ c->exec_status.exit_timestamp.realtime,
+ c->exec_status.exit_timestamp.monotonic,
+ (uint32_t) c->exec_status.pid,
+ (int32_t) c->exec_status.code,
+ (int32_t) c->exec_status.status);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int append_exec_ex_command(sd_bus_message *reply, ExecCommand *c) {
+ _cleanup_strv_free_ char **ex_opts = NULL;
+ int r;
+
+ assert(reply);
+ assert(c);
+
+ if (!c->path)
+ return 0;
+
+ r = sd_bus_message_open_container(reply, 'r', "sasasttttuii");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", c->path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_strv(reply, c->argv);
+ if (r < 0)
+ return r;
+
+ r = exec_command_flags_to_strv(c->flags, &ex_opts);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_strv(reply, ex_opts);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "ttttuii",
+ c->exec_status.start_timestamp.realtime,
+ c->exec_status.start_timestamp.monotonic,
+ c->exec_status.exit_timestamp.realtime,
+ c->exec_status.exit_timestamp.monotonic,
+ (uint32_t) c->exec_status.pid,
+ (int32_t) c->exec_status.code,
+ (int32_t) c->exec_status.status);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
+int bus_property_get_exec_command(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *ret_error) {
+
+ ExecCommand *c = (ExecCommand*) userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "(sasbttttuii)");
+ if (r < 0)
+ return r;
+
+ r = append_exec_command(reply, c);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
+int bus_property_get_exec_command_list(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *ret_error) {
+
+ ExecCommand *c = *(ExecCommand**) userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "(sasbttttuii)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(command, c, c) {
+ r = append_exec_command(reply, c);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+int bus_property_get_exec_ex_command_list(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *ret_error) {
+
+ ExecCommand *c, *exec_command = *(ExecCommand**) userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "(sasasttttuii)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(command, c, exec_command) {
+ r = append_exec_ex_command(reply, c);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static char *exec_command_flags_to_exec_chars(ExecCommandFlags flags) {
+ return strjoin(FLAGS_SET(flags, EXEC_COMMAND_IGNORE_FAILURE) ? "-" : "",
+ FLAGS_SET(flags, EXEC_COMMAND_NO_ENV_EXPAND) ? ":" : "",
+ FLAGS_SET(flags, EXEC_COMMAND_FULLY_PRIVILEGED) ? "+" : "",
+ FLAGS_SET(flags, EXEC_COMMAND_NO_SETUID) ? "!" : "",
+ FLAGS_SET(flags, EXEC_COMMAND_AMBIENT_MAGIC) ? "!!" : "");
+}
+
+int bus_set_transient_exec_command(
+ Unit *u,
+ const char *name,
+ ExecCommand **exec_command,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+ bool is_ex_prop = endswith(name, "Ex");
+ unsigned n = 0;
+ int r;
+
+ r = sd_bus_message_enter_container(message, 'a', is_ex_prop ? "(sasas)" : "(sasb)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_enter_container(message, 'r', is_ex_prop ? "sasas" : "sasb")) > 0) {
+ _cleanup_strv_free_ char **argv = NULL, **ex_opts = NULL;
+ const char *path;
+ int b;
+
+ r = sd_bus_message_read(message, "s", &path);
+ if (r < 0)
+ return r;
+
+ if (!path_is_absolute(path) && !filename_is_valid(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "\"%s\" is neither a valid executable name nor an absolute path",
+ path);
+
+ r = sd_bus_message_read_strv(message, &argv);
+ if (r < 0)
+ return r;
+
+ r = is_ex_prop ? sd_bus_message_read_strv(message, &ex_opts) : sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ ExecCommand *c;
+
+ c = new0(ExecCommand, 1);
+ if (!c)
+ return -ENOMEM;
+
+ c->path = strdup(path);
+ if (!c->path) {
+ free(c);
+ return -ENOMEM;
+ }
+
+ c->argv = TAKE_PTR(argv);
+
+ if (is_ex_prop) {
+ r = exec_command_flags_from_strv(ex_opts, &c->flags);
+ if (r < 0)
+ return r;
+ } else
+ c->flags = b ? EXEC_COMMAND_IGNORE_FAILURE : 0;
+
+ path_simplify(c->path, false);
+ exec_command_append_list(exec_command, c);
+ }
+
+ n++;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ ExecCommand *c;
+ size_t size = 0;
+
+ if (n == 0)
+ *exec_command = exec_command_free_list(*exec_command);
+
+ f = open_memstream_unlocked(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ fprintf(f, "%s=\n", name);
+
+ LIST_FOREACH(command, c, *exec_command) {
+ _cleanup_free_ char *a = NULL, *exec_chars = NULL;
+
+ exec_chars = exec_command_flags_to_exec_chars(c->flags);
+ if (!exec_chars)
+ return -ENOMEM;
+
+ a = unit_concat_strv(c->argv, UNIT_ESCAPE_C|UNIT_ESCAPE_SPECIFIERS);
+ if (!a)
+ return -ENOMEM;
+
+ if (streq_ptr(c->path, c->argv ? c->argv[0] : NULL))
+ fprintf(f, "%s=%s%s\n", name, exec_chars, a);
+ else {
+ _cleanup_free_ char *t = NULL;
+ const char *p;
+
+ p = unit_escape_setting(c->path, UNIT_ESCAPE_C|UNIT_ESCAPE_SPECIFIERS, &t);
+ if (!p)
+ return -ENOMEM;
+
+ fprintf(f, "%s=%s@%s %s\n", name, exec_chars, p, a);
+ }
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ unit_write_setting(u, flags, name, buf);
+ }
+
+ return 1;
+}
+
+static int parse_personality(const char *s, unsigned long *p) {
+ unsigned long v;
+
+ assert(p);
+
+ v = personality_from_string(s);
+ if (v == PERSONALITY_INVALID)
+ return -EINVAL;
+
+ *p = v;
+ return 0;
+}
+
+static const char* mount_propagation_flags_to_string_with_check(unsigned long n) {
+ if (!IN_SET(n, 0, MS_SHARED, MS_PRIVATE, MS_SLAVE))
+ return NULL;
+
+ return mount_propagation_flags_to_string(n);
+}
+
+static BUS_DEFINE_SET_TRANSIENT(nsec, "t", uint64_t, nsec_t, NSEC_FMT);
+static BUS_DEFINE_SET_TRANSIENT_IS_VALID(log_level, "i", int32_t, int, "%" PRIi32, log_level_is_valid);
+#if HAVE_SECCOMP
+static BUS_DEFINE_SET_TRANSIENT_IS_VALID(errno, "i", int32_t, int, "%" PRIi32, seccomp_errno_or_action_is_valid);
+#endif
+static BUS_DEFINE_SET_TRANSIENT_PARSE(std_input, ExecInput, exec_input_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(std_output, ExecOutput, exec_output_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(utmp_mode, ExecUtmpMode, exec_utmp_mode_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(protect_system, ProtectSystem, protect_system_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(protect_home, ProtectHome, protect_home_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(keyring_mode, ExecKeyringMode, exec_keyring_mode_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(protect_proc, ProtectProc, protect_proc_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(proc_subset, ProcSubset, proc_subset_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(preserve_mode, ExecPreserveMode, exec_preserve_mode_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE_PTR(personality, unsigned long, parse_personality);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(secure_bits, "i", int32_t, int, "%" PRIi32, secure_bits_to_string_alloc_with_check);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(capability, "t", uint64_t, uint64_t, "%" PRIu64, capability_set_to_string_alloc);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(namespace_flag, "t", uint64_t, unsigned long, "%" PRIu64, namespace_flags_to_string);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING(mount_flags, "t", uint64_t, unsigned long, "%" PRIu64, mount_propagation_flags_to_string_with_check);
+
+/* ret_format_str is an accumulator, so if it has any pre-existing content, new options will be appended to it */
+static int read_mount_options(sd_bus_message *message, sd_bus_error *error, MountOptions **ret_options, char **ret_format_str, const char *separator) {
+ _cleanup_(mount_options_free_allp) MountOptions *options = NULL;
+ _cleanup_free_ char *format_str = NULL;
+ const char *mount_options, *partition;
+ int r;
+
+ assert(message);
+ assert(ret_options);
+ assert(ret_format_str);
+ assert(separator);
+
+ r = sd_bus_message_enter_container(message, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(ss)", &partition, &mount_options)) > 0) {
+ _cleanup_free_ char *previous = NULL, *escaped = NULL;
+ _cleanup_free_ MountOptions *o = NULL;
+ PartitionDesignator partition_designator;
+
+ if (chars_intersect(mount_options, WHITESPACE))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Invalid mount options string, contains whitespace character(s): %s", mount_options);
+
+ partition_designator = partition_designator_from_string(partition);
+ if (partition_designator < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid partition name %s", partition);
+
+ /* Need to store them in the unit with the escapes, so that they can be parsed again */
+ escaped = shell_escape(mount_options, ":");
+ if (!escaped)
+ return -ENOMEM;
+
+ previous = TAKE_PTR(format_str);
+ format_str = strjoin(previous, previous ? separator : "", partition, ":", escaped);
+ if (!format_str)
+ return -ENOMEM;
+
+ o = new(MountOptions, 1);
+ if (!o)
+ return -ENOMEM;
+ *o = (MountOptions) {
+ .partition_designator = partition_designator,
+ .options = strdup(mount_options),
+ };
+ if (!o->options)
+ return -ENOMEM;
+ LIST_APPEND(mount_options, options, TAKE_PTR(o));
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!LIST_IS_EMPTY(options)) {
+ char *final = strjoin(*ret_format_str, !isempty(*ret_format_str) ? separator : "", format_str);
+ if (!final)
+ return -ENOMEM;
+ free_and_replace(*ret_format_str, final);
+ LIST_JOIN(mount_options, *ret_options, options);
+ }
+
+ return 0;
+}
+
+int bus_exec_context_set_transient_property(
+ Unit *u,
+ ExecContext *c,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ const char *suffix;
+ int r;
+
+ assert(u);
+ assert(c);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "User"))
+ return bus_set_transient_user_relaxed(u, name, &c->user, message, flags, error);
+
+ if (streq(name, "Group"))
+ return bus_set_transient_user_relaxed(u, name, &c->group, message, flags, error);
+
+ if (streq(name, "TTYPath"))
+ return bus_set_transient_path(u, name, &c->tty_path, message, flags, error);
+
+ if (streq(name, "RootImage"))
+ return bus_set_transient_path(u, name, &c->root_image, message, flags, error);
+
+ if (streq(name, "RootImageOptions")) {
+ _cleanup_(mount_options_free_allp) MountOptions *options = NULL;
+ _cleanup_free_ char *format_str = NULL;
+
+ r = read_mount_options(message, error, &options, &format_str, " ");
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (LIST_IS_EMPTY(options)) {
+ c->root_image_options = mount_options_free_all(c->root_image_options);
+ unit_write_settingf(u, flags, name, "%s=", name);
+ } else {
+ LIST_JOIN(mount_options, c->root_image_options, options);
+ unit_write_settingf(
+ u, flags|UNIT_ESCAPE_SPECIFIERS, name,
+ "%s=%s",
+ name,
+ format_str);
+ }
+ }
+
+ return 1;
+ }
+
+ if (streq(name, "RootHash")) {
+ const void *roothash_decoded;
+ size_t roothash_decoded_size;
+
+ r = sd_bus_message_read_array(message, 'y', &roothash_decoded, &roothash_decoded_size);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *encoded = NULL;
+
+ if (roothash_decoded_size == 0) {
+ c->root_hash_path = mfree(c->root_hash_path);
+ c->root_hash = mfree(c->root_hash);
+ c->root_hash_size = 0;
+
+ unit_write_settingf(u, flags, name, "RootHash=");
+ } else {
+ _cleanup_free_ void *p;
+
+ encoded = hexmem(roothash_decoded, roothash_decoded_size);
+ if (!encoded)
+ return -ENOMEM;
+
+ p = memdup(roothash_decoded, roothash_decoded_size);
+ if (!p)
+ return -ENOMEM;
+
+ free_and_replace(c->root_hash, p);
+ c->root_hash_size = roothash_decoded_size;
+ c->root_hash_path = mfree(c->root_hash_path);
+
+ unit_write_settingf(u, flags, name, "RootHash=%s", encoded);
+ }
+ }
+
+ return 1;
+ }
+
+ if (streq(name, "RootHashPath")) {
+ c->root_hash_size = 0;
+ c->root_hash = mfree(c->root_hash);
+
+ return bus_set_transient_path(u, "RootHash", &c->root_hash_path, message, flags, error);
+ }
+
+ if (streq(name, "RootHashSignature")) {
+ const void *roothash_sig_decoded;
+ size_t roothash_sig_decoded_size;
+
+ r = sd_bus_message_read_array(message, 'y', &roothash_sig_decoded, &roothash_sig_decoded_size);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *encoded = NULL;
+
+ if (roothash_sig_decoded_size == 0) {
+ c->root_hash_sig_path = mfree(c->root_hash_sig_path);
+ c->root_hash_sig = mfree(c->root_hash_sig);
+ c->root_hash_sig_size = 0;
+
+ unit_write_settingf(u, flags, name, "RootHashSignature=");
+ } else {
+ _cleanup_free_ void *p;
+ ssize_t len;
+
+ len = base64mem(roothash_sig_decoded, roothash_sig_decoded_size, &encoded);
+ if (len < 0)
+ return -ENOMEM;
+
+ p = memdup(roothash_sig_decoded, roothash_sig_decoded_size);
+ if (!p)
+ return -ENOMEM;
+
+ free_and_replace(c->root_hash_sig, p);
+ c->root_hash_sig_size = roothash_sig_decoded_size;
+ c->root_hash_sig_path = mfree(c->root_hash_sig_path);
+
+ unit_write_settingf(u, flags, name, "RootHashSignature=base64:%s", encoded);
+ }
+ }
+
+ return 1;
+ }
+
+ if (streq(name, "RootHashSignaturePath")) {
+ c->root_hash_sig_size = 0;
+ c->root_hash_sig = mfree(c->root_hash_sig);
+
+ return bus_set_transient_path(u, "RootHashSignature", &c->root_hash_sig_path, message, flags, error);
+ }
+
+ if (streq(name, "RootVerity"))
+ return bus_set_transient_path(u, name, &c->root_verity, message, flags, error);
+
+ if (streq(name, "RootDirectory"))
+ return bus_set_transient_path(u, name, &c->root_directory, message, flags, error);
+
+ if (streq(name, "SyslogIdentifier"))
+ return bus_set_transient_string(u, name, &c->syslog_identifier, message, flags, error);
+
+ if (streq(name, "LogLevelMax"))
+ return bus_set_transient_log_level(u, name, &c->log_level_max, message, flags, error);
+
+ if (streq(name, "LogRateLimitIntervalUSec"))
+ return bus_set_transient_usec(u, name, &c->log_ratelimit_interval_usec, message, flags, error);
+
+ if (streq(name, "LogRateLimitBurst"))
+ return bus_set_transient_unsigned(u, name, &c->log_ratelimit_burst, message, flags, error);
+
+ if (streq(name, "Personality"))
+ return bus_set_transient_personality(u, name, &c->personality, message, flags, error);
+
+ if (streq(name, "StandardInput"))
+ return bus_set_transient_std_input(u, name, &c->std_input, message, flags, error);
+
+ if (streq(name, "StandardOutput"))
+ return bus_set_transient_std_output(u, name, &c->std_output, message, flags, error);
+
+ if (streq(name, "StandardError"))
+ return bus_set_transient_std_output(u, name, &c->std_error, message, flags, error);
+
+ if (streq(name, "IgnoreSIGPIPE"))
+ return bus_set_transient_bool(u, name, &c->ignore_sigpipe, message, flags, error);
+
+ if (streq(name, "TTYVHangup"))
+ return bus_set_transient_bool(u, name, &c->tty_vhangup, message, flags, error);
+
+ if (streq(name, "TTYReset"))
+ return bus_set_transient_bool(u, name, &c->tty_reset, message, flags, error);
+
+ if (streq(name, "TTYVTDisallocate"))
+ return bus_set_transient_bool(u, name, &c->tty_vt_disallocate, message, flags, error);
+
+ if (streq(name, "PrivateTmp"))
+ return bus_set_transient_bool(u, name, &c->private_tmp, message, flags, error);
+
+ if (streq(name, "PrivateDevices"))
+ return bus_set_transient_bool(u, name, &c->private_devices, message, flags, error);
+
+ if (streq(name, "PrivateMounts"))
+ return bus_set_transient_bool(u, name, &c->private_mounts, message, flags, error);
+
+ if (streq(name, "PrivateNetwork"))
+ return bus_set_transient_bool(u, name, &c->private_network, message, flags, error);
+
+ if (streq(name, "PrivateUsers"))
+ return bus_set_transient_bool(u, name, &c->private_users, message, flags, error);
+
+ if (streq(name, "NoNewPrivileges"))
+ return bus_set_transient_bool(u, name, &c->no_new_privileges, message, flags, error);
+
+ if (streq(name, "SyslogLevelPrefix"))
+ return bus_set_transient_bool(u, name, &c->syslog_level_prefix, message, flags, error);
+
+ if (streq(name, "MemoryDenyWriteExecute"))
+ return bus_set_transient_bool(u, name, &c->memory_deny_write_execute, message, flags, error);
+
+ if (streq(name, "RestrictRealtime"))
+ return bus_set_transient_bool(u, name, &c->restrict_realtime, message, flags, error);
+
+ if (streq(name, "RestrictSUIDSGID"))
+ return bus_set_transient_bool(u, name, &c->restrict_suid_sgid, message, flags, error);
+
+ if (streq(name, "DynamicUser"))
+ return bus_set_transient_bool(u, name, &c->dynamic_user, message, flags, error);
+
+ if (streq(name, "RemoveIPC"))
+ return bus_set_transient_bool(u, name, &c->remove_ipc, message, flags, error);
+
+ if (streq(name, "ProtectKernelTunables"))
+ return bus_set_transient_bool(u, name, &c->protect_kernel_tunables, message, flags, error);
+
+ if (streq(name, "ProtectKernelModules"))
+ return bus_set_transient_bool(u, name, &c->protect_kernel_modules, message, flags, error);
+
+ if (streq(name, "ProtectKernelLogs"))
+ return bus_set_transient_bool(u, name, &c->protect_kernel_logs, message, flags, error);
+
+ if (streq(name, "ProtectClock"))
+ return bus_set_transient_bool(u, name, &c->protect_clock, message, flags, error);
+
+ if (streq(name, "ProtectControlGroups"))
+ return bus_set_transient_bool(u, name, &c->protect_control_groups, message, flags, error);
+
+ if (streq(name, "CPUSchedulingResetOnFork"))
+ return bus_set_transient_bool(u, name, &c->cpu_sched_reset_on_fork, message, flags, error);
+
+ if (streq(name, "NonBlocking"))
+ return bus_set_transient_bool(u, name, &c->non_blocking, message, flags, error);
+
+ if (streq(name, "LockPersonality"))
+ return bus_set_transient_bool(u, name, &c->lock_personality, message, flags, error);
+
+ if (streq(name, "ProtectHostname"))
+ return bus_set_transient_bool(u, name, &c->protect_hostname, message, flags, error);
+
+ if (streq(name, "UtmpIdentifier"))
+ return bus_set_transient_string(u, name, &c->utmp_id, message, flags, error);
+
+ if (streq(name, "UtmpMode"))
+ return bus_set_transient_utmp_mode(u, name, &c->utmp_mode, message, flags, error);
+
+ if (streq(name, "PAMName"))
+ return bus_set_transient_string(u, name, &c->pam_name, message, flags, error);
+
+ if (streq(name, "TimerSlackNSec"))
+ return bus_set_transient_nsec(u, name, &c->timer_slack_nsec, message, flags, error);
+
+ if (streq(name, "ProtectSystem"))
+ return bus_set_transient_protect_system(u, name, &c->protect_system, message, flags, error);
+
+ if (streq(name, "ProtectHome"))
+ return bus_set_transient_protect_home(u, name, &c->protect_home, message, flags, error);
+
+ if (streq(name, "KeyringMode"))
+ return bus_set_transient_keyring_mode(u, name, &c->keyring_mode, message, flags, error);
+
+ if (streq(name, "ProtectProc"))
+ return bus_set_transient_protect_proc(u, name, &c->protect_proc, message, flags, error);
+
+ if (streq(name, "ProcSubset"))
+ return bus_set_transient_proc_subset(u, name, &c->proc_subset, message, flags, error);
+
+ if (streq(name, "RuntimeDirectoryPreserve"))
+ return bus_set_transient_preserve_mode(u, name, &c->runtime_directory_preserve_mode, message, flags, error);
+
+ if (streq(name, "UMask"))
+ return bus_set_transient_mode_t(u, name, &c->umask, message, flags, error);
+
+ if (streq(name, "RuntimeDirectoryMode"))
+ return bus_set_transient_mode_t(u, name, &c->directories[EXEC_DIRECTORY_RUNTIME].mode, message, flags, error);
+
+ if (streq(name, "StateDirectoryMode"))
+ return bus_set_transient_mode_t(u, name, &c->directories[EXEC_DIRECTORY_STATE].mode, message, flags, error);
+
+ if (streq(name, "CacheDirectoryMode"))
+ return bus_set_transient_mode_t(u, name, &c->directories[EXEC_DIRECTORY_CACHE].mode, message, flags, error);
+
+ if (streq(name, "LogsDirectoryMode"))
+ return bus_set_transient_mode_t(u, name, &c->directories[EXEC_DIRECTORY_LOGS].mode, message, flags, error);
+
+ if (streq(name, "ConfigurationDirectoryMode"))
+ return bus_set_transient_mode_t(u, name, &c->directories[EXEC_DIRECTORY_CONFIGURATION].mode, message, flags, error);
+
+ if (streq(name, "SELinuxContext"))
+ return bus_set_transient_string(u, name, &c->selinux_context, message, flags, error);
+
+ if (streq(name, "SecureBits"))
+ return bus_set_transient_secure_bits(u, name, &c->secure_bits, message, flags, error);
+
+ if (streq(name, "CapabilityBoundingSet"))
+ return bus_set_transient_capability(u, name, &c->capability_bounding_set, message, flags, error);
+
+ if (streq(name, "AmbientCapabilities"))
+ return bus_set_transient_capability(u, name, &c->capability_ambient_set, message, flags, error);
+
+ if (streq(name, "RestrictNamespaces"))
+ return bus_set_transient_namespace_flag(u, name, &c->restrict_namespaces, message, flags, error);
+
+ if (streq(name, "MountFlags"))
+ return bus_set_transient_mount_flags(u, name, &c->mount_flags, message, flags, error);
+
+ if (streq(name, "NetworkNamespacePath"))
+ return bus_set_transient_path(u, name, &c->network_namespace_path, message, flags, error);
+
+ if (streq(name, "SupplementaryGroups")) {
+ _cleanup_strv_free_ char **l = NULL;
+ char **p;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, l)
+ if (!isempty(*p) && !valid_user_group_name(*p, VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX|VALID_USER_WARN))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Invalid supplementary group names");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (strv_isempty(l)) {
+ c->supplementary_groups = strv_free(c->supplementary_groups);
+ unit_write_settingf(u, flags, name, "%s=", name);
+ } else {
+ _cleanup_free_ char *joined = NULL;
+
+ r = strv_extend_strv(&c->supplementary_groups, l, true);
+ if (r < 0)
+ return -ENOMEM;
+
+ joined = strv_join(c->supplementary_groups, " ");
+ if (!joined)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "%s=%s", name, joined);
+ }
+ }
+
+ return 1;
+
+ } else if (streq(name, "SetCredential")) {
+ bool isempty = true;
+
+ r = sd_bus_message_enter_container(message, 'a', "(say)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *id;
+ const void *p;
+ size_t sz;
+
+ r = sd_bus_message_enter_container(message, 'r', "say");
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_read(message, "s", &id);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_array(message, 'y', &p, &sz);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!credential_name_valid(id))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Credential ID is invalid: %s", id);
+
+ isempty = false;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *a = NULL, *b = NULL;
+ _cleanup_free_ void *copy = NULL;
+ ExecSetCredential *old;
+
+ copy = memdup(p, sz);
+ if (!copy)
+ return -ENOMEM;
+
+ old = hashmap_get(c->set_credentials, id);
+ if (old) {
+ free_and_replace(old->data, copy);
+ old->size = sz;
+ } else {
+ _cleanup_(exec_set_credential_freep) ExecSetCredential *sc = NULL;
+
+ sc = new0(ExecSetCredential, 1);
+ if (!sc)
+ return -ENOMEM;
+
+ sc->id = strdup(id);
+ if (!sc->id)
+ return -ENOMEM;
+
+ sc->data = TAKE_PTR(copy);
+ sc->size = sz;
+
+ r = hashmap_ensure_allocated(&c->set_credentials, &exec_set_credential_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(c->set_credentials, sc->id, sc);
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(sc);
+ }
+
+ a = specifier_escape(id);
+ if (!a)
+ return -ENOMEM;
+
+ b = cescape_length(p, sz);
+ if (!b)
+ return -ENOMEM;
+
+ (void) unit_write_settingf(u, flags, name, "%s=%s:%s", name, a, b);
+ }
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags) && isempty) {
+ c->set_credentials = hashmap_free(c->set_credentials);
+ (void) unit_write_settingf(u, flags, name, "%s=", name);
+ }
+
+ return 1;
+
+ } else if (streq(name, "LoadCredential")) {
+ bool isempty = true;
+
+ r = sd_bus_message_enter_container(message, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *id, *source;
+
+ r = sd_bus_message_read(message, "(ss)", &id, &source);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (!credential_name_valid(id))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Credential ID is invalid: %s", id);
+
+ if (!(path_is_absolute(source) ? path_is_normalized(source) : credential_name_valid(source)))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Credential source is invalid: %s", source);
+
+ isempty = false;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = strv_extend_strv(&c->load_credentials, STRV_MAKE(id, source), /* filter_duplicates = */ false);
+ if (r < 0)
+ return r;
+
+ (void) unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "%s=%s:%s", name, id, source);
+ }
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags) && isempty) {
+ c->load_credentials = strv_free(c->load_credentials);
+ (void) unit_write_settingf(u, flags, name, "%s=", name);
+ }
+
+ return 1;
+
+ } else if (streq(name, "SyslogLevel")) {
+ int32_t level;
+
+ r = sd_bus_message_read(message, "i", &level);
+ if (r < 0)
+ return r;
+
+ if (!log_level_is_valid(level))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Log level value out of range");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->syslog_priority = (c->syslog_priority & LOG_FACMASK) | level;
+ unit_write_settingf(u, flags, name, "SyslogLevel=%i", level);
+ }
+
+ return 1;
+
+ } else if (streq(name, "SyslogFacility")) {
+ int32_t facility;
+
+ r = sd_bus_message_read(message, "i", &facility);
+ if (r < 0)
+ return r;
+
+ if (!log_facility_unshifted_is_valid(facility))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Log facility value out of range");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->syslog_priority = (facility << 3) | LOG_PRI(c->syslog_priority);
+ unit_write_settingf(u, flags, name, "SyslogFacility=%i", facility);
+ }
+
+ return 1;
+
+ } else if (streq(name, "LogNamespace")) {
+ const char *n;
+
+ r = sd_bus_message_read(message, "s", &n);
+ if (r < 0)
+ return r;
+
+ if (!isempty(n) && !log_namespace_name_valid(n))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Log namespace name not valid");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+
+ if (isempty(n)) {
+ c->log_namespace = mfree(c->log_namespace);
+ unit_write_settingf(u, flags, name, "%s=", name);
+ } else {
+ r = free_and_strdup(&c->log_namespace, n);
+ if (r < 0)
+ return r;
+
+ unit_write_settingf(u, flags, name, "%s=%s", name, n);
+ }
+ }
+
+ return 1;
+
+ } else if (streq(name, "LogExtraFields")) {
+ size_t n = 0;
+
+ r = sd_bus_message_enter_container(message, 'a', "ay");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ void *copy = NULL;
+ struct iovec *t;
+ const char *eq;
+ const void *p;
+ size_t sz;
+
+ /* Note that we expect a byte array for each field, instead of a string. That's because on the
+ * lower-level journal fields can actually contain binary data and are not restricted to text,
+ * and we should not "lose precision" in our types on the way. That said, I am pretty sure
+ * actually encoding binary data as unit metadata is not a good idea. Hence we actually refuse
+ * any actual binary data, and only accept UTF-8. This allows us to eventually lift this
+ * limitation, should a good, valid usecase arise. */
+
+ r = sd_bus_message_read_array(message, 'y', &p, &sz);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (memchr(p, 0, sz))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Journal field contains zero byte");
+
+ eq = memchr(p, '=', sz);
+ if (!eq)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Journal field contains no '=' character");
+ if (!journal_field_valid(p, eq - (const char*) p, false))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Journal field invalid");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ t = reallocarray(c->log_extra_fields, c->n_log_extra_fields+1, sizeof(struct iovec));
+ if (!t)
+ return -ENOMEM;
+ c->log_extra_fields = t;
+ }
+
+ copy = malloc(sz + 1);
+ if (!copy)
+ return -ENOMEM;
+
+ memcpy(copy, p, sz);
+ ((uint8_t*) copy)[sz] = 0;
+
+ if (!utf8_is_valid(copy))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Journal field is not valid UTF-8");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->log_extra_fields[c->n_log_extra_fields++] = IOVEC_MAKE(copy, sz);
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS|UNIT_ESCAPE_C, name, "LogExtraFields=%s", (char*) copy);
+
+ copy = NULL;
+ }
+
+ n++;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags) && n == 0) {
+ exec_context_free_log_extra_fields(c);
+ unit_write_setting(u, flags, name, "LogExtraFields=");
+ }
+
+ return 1;
+ }
+
+#if HAVE_SECCOMP
+
+ if (streq(name, "SystemCallErrorNumber"))
+ return bus_set_transient_errno(u, name, &c->syscall_errno, message, flags, error);
+
+ if (streq(name, "SystemCallFilter")) {
+ int allow_list;
+ _cleanup_strv_free_ char **l = NULL;
+
+ r = sd_bus_message_enter_container(message, 'r', "bas");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "b", &allow_list);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *joined = NULL;
+ SeccompParseFlags invert_flag = allow_list ? 0 : SECCOMP_PARSE_INVERT;
+ char **s;
+
+ if (strv_isempty(l)) {
+ c->syscall_allow_list = false;
+ c->syscall_filter = hashmap_free(c->syscall_filter);
+
+ unit_write_settingf(u, flags, name, "SystemCallFilter=");
+ return 1;
+ }
+
+ if (!c->syscall_filter) {
+ c->syscall_filter = hashmap_new(NULL);
+ if (!c->syscall_filter)
+ return log_oom();
+
+ c->syscall_allow_list = allow_list;
+
+ if (c->syscall_allow_list) {
+ r = seccomp_parse_syscall_filter("@default",
+ -1,
+ c->syscall_filter,
+ SECCOMP_PARSE_PERMISSIVE |
+ SECCOMP_PARSE_ALLOW_LIST | invert_flag,
+ u->id,
+ NULL, 0);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ STRV_FOREACH(s, l) {
+ _cleanup_free_ char *n = NULL;
+ int e;
+
+ r = parse_syscall_and_errno(*s, &n, &e);
+ if (r < 0)
+ return r;
+
+ r = seccomp_parse_syscall_filter(n,
+ e,
+ c->syscall_filter,
+ SECCOMP_PARSE_LOG | SECCOMP_PARSE_PERMISSIVE |
+ invert_flag |
+ (c->syscall_allow_list ? SECCOMP_PARSE_ALLOW_LIST : 0),
+ u->id,
+ NULL, 0);
+ if (r < 0)
+ return r;
+ }
+
+ joined = strv_join(l, " ");
+ if (!joined)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags, name, "SystemCallFilter=%s%s", allow_list ? "" : "~", joined);
+ }
+
+ return 1;
+
+ } else if (streq(name, "SystemCallLog")) {
+ int allow_list;
+ _cleanup_strv_free_ char **l = NULL;
+
+ r = sd_bus_message_enter_container(message, 'r', "bas");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "b", &allow_list);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *joined = NULL;
+ SeccompParseFlags invert_flag = allow_list ? 0 : SECCOMP_PARSE_INVERT;
+ char **s;
+
+ if (strv_isempty(l)) {
+ c->syscall_log_allow_list = false;
+ c->syscall_log = hashmap_free(c->syscall_log);
+
+ unit_write_settingf(u, flags, name, "SystemCallLog=");
+ return 1;
+ }
+
+ if (!c->syscall_log) {
+ c->syscall_log = hashmap_new(NULL);
+ if (!c->syscall_log)
+ return log_oom();
+
+ c->syscall_log_allow_list = allow_list;
+ }
+
+ STRV_FOREACH(s, l) {
+ _cleanup_free_ char *n = NULL;
+ int e;
+
+ r = parse_syscall_and_errno(*s, &n, &e);
+ if (r < 0)
+ return r;
+
+ r = seccomp_parse_syscall_filter(n,
+ 0, /* errno not used */
+ c->syscall_log,
+ SECCOMP_PARSE_LOG | SECCOMP_PARSE_PERMISSIVE |
+ invert_flag |
+ (c->syscall_log_allow_list ? SECCOMP_PARSE_ALLOW_LIST : 0),
+ u->id,
+ NULL, 0);
+ if (r < 0)
+ return r;
+ }
+
+ joined = strv_join(l, " ");
+ if (!joined)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags, name, "SystemCallLog=%s%s", allow_list ? "" : "~", joined);
+ }
+
+ return 1;
+
+ } else if (streq(name, "SystemCallArchitectures")) {
+ _cleanup_strv_free_ char **l = NULL;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *joined = NULL;
+
+ if (strv_isempty(l))
+ c->syscall_archs = set_free(c->syscall_archs);
+ else {
+ char **s;
+
+ STRV_FOREACH(s, l) {
+ uint32_t a;
+
+ r = seccomp_arch_from_string(*s, &a);
+ if (r < 0)
+ return r;
+
+ r = set_ensure_put(&c->syscall_archs, NULL, UINT32_TO_PTR(a + 1));
+ if (r < 0)
+ return r;
+ }
+
+ }
+
+ joined = strv_join(l, " ");
+ if (!joined)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags, name, "%s=%s", name, joined);
+ }
+
+ return 1;
+
+ } else if (streq(name, "RestrictAddressFamilies")) {
+ int allow_list;
+ _cleanup_strv_free_ char **l = NULL;
+
+ r = sd_bus_message_enter_container(message, 'r', "bas");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "b", &allow_list);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *joined = NULL;
+ char **s;
+
+ if (strv_isempty(l)) {
+ c->address_families_allow_list = false;
+ c->address_families = set_free(c->address_families);
+
+ unit_write_settingf(u, flags, name, "RestrictAddressFamilies=");
+ return 1;
+ }
+
+ if (!c->address_families) {
+ c->address_families = set_new(NULL);
+ if (!c->address_families)
+ return log_oom();
+
+ c->address_families_allow_list = allow_list;
+ }
+
+ STRV_FOREACH(s, l) {
+ int af;
+
+ af = af_from_name(*s);
+ if (af < 0)
+ return af;
+
+ if (allow_list == c->address_families_allow_list) {
+ r = set_put(c->address_families, INT_TO_PTR(af));
+ if (r < 0)
+ return r;
+ } else
+ (void) set_remove(c->address_families, INT_TO_PTR(af));
+ }
+
+ joined = strv_join(l, " ");
+ if (!joined)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags, name, "RestrictAddressFamilies=%s%s", allow_list ? "" : "~", joined);
+ }
+
+ return 1;
+ }
+#endif
+ if (STR_IN_SET(name, "CPUAffinity", "NUMAMask")) {
+ const void *a;
+ size_t n;
+ bool affinity = streq(name, "CPUAffinity");
+ _cleanup_(cpu_set_reset) CPUSet set = {};
+
+ r = sd_bus_message_read_array(message, 'y', &a, &n);
+ if (r < 0)
+ return r;
+
+ r = cpu_set_from_dbus(a, n, &set);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (n == 0) {
+ cpu_set_reset(affinity ? &c->cpu_set : &c->numa_policy.nodes);
+ unit_write_settingf(u, flags, name, "%s=", name);
+ } else {
+ _cleanup_free_ char *str = NULL;
+
+ str = cpu_set_to_string(&set);
+ if (!str)
+ return -ENOMEM;
+
+ /* We forego any optimizations here, and always create the structure using
+ * cpu_set_add_all(), because we don't want to care if the existing size we
+ * got over dbus is appropriate. */
+ r = cpu_set_add_all(affinity ? &c->cpu_set : &c->numa_policy.nodes, &set);
+ if (r < 0)
+ return r;
+
+ unit_write_settingf(u, flags, name, "%s=%s", name, str);
+ }
+ }
+
+ return 1;
+
+ } else if (streq(name, "CPUAffinityFromNUMA")) {
+ int q;
+
+ r = sd_bus_message_read_basic(message, 'b', &q);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->cpu_affinity_from_numa = q;
+ unit_write_settingf(u, flags, name, "%s=%s", "CPUAffinity", "numa");
+ }
+
+ return 1;
+
+ } else if (streq(name, "NUMAPolicy")) {
+ int32_t type;
+
+ r = sd_bus_message_read(message, "i", &type);
+ if (r < 0)
+ return r;
+
+ if (!mpol_is_valid(type))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid NUMAPolicy value: %i", type);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags))
+ c->numa_policy.type = type;
+
+ return 1;
+
+ } else if (streq(name, "Nice")) {
+ int32_t q;
+
+ r = sd_bus_message_read(message, "i", &q);
+ if (r < 0)
+ return r;
+
+ if (!nice_is_valid(q))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid Nice value: %i", q);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->nice = q;
+ c->nice_set = true;
+
+ unit_write_settingf(u, flags, name, "Nice=%i", q);
+ }
+
+ return 1;
+
+ } else if (streq(name, "CPUSchedulingPolicy")) {
+ int32_t q;
+
+ r = sd_bus_message_read(message, "i", &q);
+ if (r < 0)
+ return r;
+
+ if (!sched_policy_is_valid(q))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid CPU scheduling policy: %i", q);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *s = NULL;
+
+ r = sched_policy_to_string_alloc(q, &s);
+ if (r < 0)
+ return r;
+
+ c->cpu_sched_policy = q;
+ c->cpu_sched_priority = CLAMP(c->cpu_sched_priority, sched_get_priority_min(q), sched_get_priority_max(q));
+ c->cpu_sched_set = true;
+
+ unit_write_settingf(u, flags, name, "CPUSchedulingPolicy=%s", s);
+ }
+
+ return 1;
+
+ } else if (streq(name, "CPUSchedulingPriority")) {
+ int32_t p, min, max;
+
+ r = sd_bus_message_read(message, "i", &p);
+ if (r < 0)
+ return r;
+
+ min = sched_get_priority_min(c->cpu_sched_policy);
+ max = sched_get_priority_max(c->cpu_sched_policy);
+ if (p < min || p > max)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid CPU scheduling priority: %i", p);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->cpu_sched_priority = p;
+ c->cpu_sched_set = true;
+
+ unit_write_settingf(u, flags, name, "CPUSchedulingPriority=%i", p);
+ }
+
+ return 1;
+
+ } else if (streq(name, "IOSchedulingClass")) {
+ int32_t q;
+
+ r = sd_bus_message_read(message, "i", &q);
+ if (r < 0)
+ return r;
+
+ if (!ioprio_class_is_valid(q))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid IO scheduling class: %i", q);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *s = NULL;
+
+ r = ioprio_class_to_string_alloc(q, &s);
+ if (r < 0)
+ return r;
+
+ c->ioprio = IOPRIO_PRIO_VALUE(q, IOPRIO_PRIO_DATA(c->ioprio));
+ c->ioprio_set = true;
+
+ unit_write_settingf(u, flags, name, "IOSchedulingClass=%s", s);
+ }
+
+ return 1;
+
+ } else if (streq(name, "IOSchedulingPriority")) {
+ int32_t p;
+
+ r = sd_bus_message_read(message, "i", &p);
+ if (r < 0)
+ return r;
+
+ if (!ioprio_priority_is_valid(p))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid IO scheduling priority: %i", p);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_PRIO_CLASS(c->ioprio), p);
+ c->ioprio_set = true;
+
+ unit_write_settingf(u, flags, name, "IOSchedulingPriority=%i", p);
+ }
+
+ return 1;
+
+ } else if (streq(name, "MountAPIVFS")) {
+ bool b;
+
+ r = bus_set_transient_bool(u, name, &b, message, flags, error);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->mount_apivfs = b;
+ c->mount_apivfs_set = true;
+ }
+
+ return 1;
+
+ } else if (streq(name, "WorkingDirectory")) {
+ const char *s;
+ bool missing_ok;
+
+ r = sd_bus_message_read(message, "s", &s);
+ if (r < 0)
+ return r;
+
+ if (s[0] == '-') {
+ missing_ok = true;
+ s++;
+ } else
+ missing_ok = false;
+
+ if (!isempty(s) && !streq(s, "~") && !path_is_absolute(s))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "WorkingDirectory= expects an absolute path or '~'");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (streq(s, "~")) {
+ c->working_directory = mfree(c->working_directory);
+ c->working_directory_home = true;
+ } else {
+ r = free_and_strdup(&c->working_directory, empty_to_null(s));
+ if (r < 0)
+ return r;
+
+ c->working_directory_home = false;
+ }
+
+ c->working_directory_missing_ok = missing_ok;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "WorkingDirectory=%s%s", missing_ok ? "-" : "", s);
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name,
+ "StandardInputFileDescriptorName", "StandardOutputFileDescriptorName", "StandardErrorFileDescriptorName")) {
+ const char *s;
+
+ r = sd_bus_message_read(message, "s", &s);
+ if (r < 0)
+ return r;
+
+ if (!isempty(s) && !fdname_is_valid(s))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid file descriptor name");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+
+ if (streq(name, "StandardInputFileDescriptorName")) {
+ r = free_and_strdup(c->stdio_fdname + STDIN_FILENO, empty_to_null(s));
+ if (r < 0)
+ return r;
+
+ c->std_input = EXEC_INPUT_NAMED_FD;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardInput=fd:%s", exec_context_fdname(c, STDIN_FILENO));
+
+ } else if (streq(name, "StandardOutputFileDescriptorName")) {
+ r = free_and_strdup(c->stdio_fdname + STDOUT_FILENO, empty_to_null(s));
+ if (r < 0)
+ return r;
+
+ c->std_output = EXEC_OUTPUT_NAMED_FD;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardOutput=fd:%s", exec_context_fdname(c, STDOUT_FILENO));
+
+ } else {
+ assert(streq(name, "StandardErrorFileDescriptorName"));
+
+ r = free_and_strdup(&c->stdio_fdname[STDERR_FILENO], empty_to_null(s));
+ if (r < 0)
+ return r;
+
+ c->std_error = EXEC_OUTPUT_NAMED_FD;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardError=fd:%s", exec_context_fdname(c, STDERR_FILENO));
+ }
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name,
+ "StandardInputFile",
+ "StandardOutputFile", "StandardOutputFileToAppend",
+ "StandardErrorFile", "StandardErrorFileToAppend")) {
+ const char *s;
+
+ r = sd_bus_message_read(message, "s", &s);
+ if (r < 0)
+ return r;
+
+ if (!isempty(s)) {
+ if (!path_is_absolute(s))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path %s is not absolute", s);
+ if (!path_is_normalized(s))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path %s is not normalized", s);
+ }
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+
+ if (streq(name, "StandardInputFile")) {
+ r = free_and_strdup(&c->stdio_file[STDIN_FILENO], empty_to_null(s));
+ if (r < 0)
+ return r;
+
+ c->std_input = EXEC_INPUT_FILE;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardInput=file:%s", s);
+
+ } else if (STR_IN_SET(name, "StandardOutputFile", "StandardOutputFileToAppend")) {
+ r = free_and_strdup(&c->stdio_file[STDOUT_FILENO], empty_to_null(s));
+ if (r < 0)
+ return r;
+
+ if (streq(name, "StandardOutputFile")) {
+ c->std_output = EXEC_OUTPUT_FILE;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardOutput=file:%s", s);
+ } else {
+ assert(streq(name, "StandardOutputFileToAppend"));
+ c->std_output = EXEC_OUTPUT_FILE_APPEND;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardOutput=append:%s", s);
+ }
+ } else {
+ assert(STR_IN_SET(name, "StandardErrorFile", "StandardErrorFileToAppend"));
+
+ r = free_and_strdup(&c->stdio_file[STDERR_FILENO], empty_to_null(s));
+ if (r < 0)
+ return r;
+
+ if (streq(name, "StandardErrorFile")) {
+ c->std_error = EXEC_OUTPUT_FILE;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardError=file:%s", s);
+ } else {
+ assert(streq(name, "StandardErrorFileToAppend"));
+ c->std_error = EXEC_OUTPUT_FILE_APPEND;
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "StandardError=append:%s", s);
+ }
+ }
+ }
+
+ return 1;
+
+ } else if (streq(name, "StandardInputData")) {
+ const void *p;
+ size_t sz;
+
+ r = sd_bus_message_read_array(message, 'y', &p, &sz);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *encoded = NULL;
+
+ if (sz == 0) {
+ c->stdin_data = mfree(c->stdin_data);
+ c->stdin_data_size = 0;
+
+ unit_write_settingf(u, flags, name, "StandardInputData=");
+ } else {
+ void *q;
+ ssize_t n;
+
+ if (c->stdin_data_size + sz < c->stdin_data_size || /* check for overflow */
+ c->stdin_data_size + sz > EXEC_STDIN_DATA_MAX)
+ return -E2BIG;
+
+ n = base64mem(p, sz, &encoded);
+ if (n < 0)
+ return (int) n;
+
+ q = realloc(c->stdin_data, c->stdin_data_size + sz);
+ if (!q)
+ return -ENOMEM;
+
+ memcpy((uint8_t*) q + c->stdin_data_size, p, sz);
+
+ c->stdin_data = q;
+ c->stdin_data_size += sz;
+
+ unit_write_settingf(u, flags, name, "StandardInputData=%s", encoded);
+ }
+ }
+
+ return 1;
+
+ } else if (streq(name, "Environment")) {
+
+ _cleanup_strv_free_ char **l = NULL;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ if (!strv_env_is_valid(l))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid environment block.");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (strv_isempty(l)) {
+ c->environment = strv_free(c->environment);
+ unit_write_setting(u, flags, name, "Environment=");
+ } else {
+ _cleanup_free_ char *joined = NULL;
+ char **e;
+
+ joined = unit_concat_strv(l, UNIT_ESCAPE_SPECIFIERS|UNIT_ESCAPE_C);
+ if (!joined)
+ return -ENOMEM;
+
+ e = strv_env_merge(2, c->environment, l);
+ if (!e)
+ return -ENOMEM;
+
+ strv_free_and_replace(c->environment, e);
+ unit_write_settingf(u, flags, name, "Environment=%s", joined);
+ }
+ }
+
+ return 1;
+
+ } else if (streq(name, "UnsetEnvironment")) {
+
+ _cleanup_strv_free_ char **l = NULL;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ if (!strv_env_name_or_assignment_is_valid(l))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid UnsetEnvironment= list.");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (strv_isempty(l)) {
+ c->unset_environment = strv_free(c->unset_environment);
+ unit_write_setting(u, flags, name, "UnsetEnvironment=");
+ } else {
+ _cleanup_free_ char *joined = NULL;
+ char **e;
+
+ joined = unit_concat_strv(l, UNIT_ESCAPE_SPECIFIERS|UNIT_ESCAPE_C);
+ if (!joined)
+ return -ENOMEM;
+
+ e = strv_env_merge(2, c->unset_environment, l);
+ if (!e)
+ return -ENOMEM;
+
+ strv_free_and_replace(c->unset_environment, e);
+ unit_write_settingf(u, flags, name, "UnsetEnvironment=%s", joined);
+ }
+ }
+
+ return 1;
+
+ } else if (streq(name, "OOMScoreAdjust")) {
+ int oa;
+
+ r = sd_bus_message_read(message, "i", &oa);
+ if (r < 0)
+ return r;
+
+ if (!oom_score_adjust_is_valid(oa))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "OOM score adjust value out of range");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->oom_score_adjust = oa;
+ c->oom_score_adjust_set = true;
+ unit_write_settingf(u, flags, name, "OOMScoreAdjust=%i", oa);
+ }
+
+ return 1;
+
+ } else if (streq(name, "CoredumpFilter")) {
+ uint64_t f;
+
+ r = sd_bus_message_read(message, "t", &f);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->coredump_filter = f;
+ c->coredump_filter_set = true;
+ unit_write_settingf(u, flags, name, "CoredumpFilter=0x%"PRIx64, f);
+ }
+
+ return 1;
+
+ } else if (streq(name, "EnvironmentFiles")) {
+
+ _cleanup_free_ char *joined = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ size_t size = 0;
+ char **i;
+
+ r = sd_bus_message_enter_container(message, 'a', "(sb)");
+ if (r < 0)
+ return r;
+
+ f = open_memstream_unlocked(&joined, &size);
+ if (!f)
+ return -ENOMEM;
+
+ fputs("EnvironmentFile=\n", f);
+
+ STRV_FOREACH(i, c->environment_files) {
+ _cleanup_free_ char *q = NULL;
+
+ q = specifier_escape(*i);
+ if (!q)
+ return -ENOMEM;
+
+ fprintf(f, "EnvironmentFile=%s\n", q);
+ }
+
+ while ((r = sd_bus_message_enter_container(message, 'r', "sb")) > 0) {
+ const char *path;
+ int b;
+
+ r = sd_bus_message_read(message, "sb", &path, &b);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!path_is_absolute(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path %s is not absolute.", path);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *q = NULL, *buf = NULL;
+
+ buf = strjoin(b ? "-" : "", path);
+ if (!buf)
+ return -ENOMEM;
+
+ q = specifier_escape(buf);
+ if (!q)
+ return -ENOMEM;
+
+ fprintf(f, "EnvironmentFile=%s\n", q);
+
+ r = strv_consume(&l, TAKE_PTR(buf));
+ if (r < 0)
+ return r;
+ }
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (strv_isempty(l)) {
+ c->environment_files = strv_free(c->environment_files);
+ unit_write_setting(u, flags, name, "EnvironmentFile=");
+ } else {
+ r = strv_extend_strv(&c->environment_files, l, true);
+ if (r < 0)
+ return r;
+
+ unit_write_setting(u, flags, name, joined);
+ }
+ }
+
+ return 1;
+
+ } else if (streq(name, "PassEnvironment")) {
+
+ _cleanup_strv_free_ char **l = NULL;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ if (!strv_env_name_is_valid(l))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid PassEnvironment= block.");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (strv_isempty(l)) {
+ c->pass_environment = strv_free(c->pass_environment);
+ unit_write_setting(u, flags, name, "PassEnvironment=");
+ } else {
+ _cleanup_free_ char *joined = NULL;
+
+ r = strv_extend_strv(&c->pass_environment, l, true);
+ if (r < 0)
+ return r;
+
+ /* We write just the new settings out to file, with unresolved specifiers. */
+ joined = unit_concat_strv(l, UNIT_ESCAPE_SPECIFIERS);
+ if (!joined)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags, name, "PassEnvironment=%s", joined);
+ }
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "ReadWriteDirectories", "ReadOnlyDirectories", "InaccessibleDirectories",
+ "ReadWritePaths", "ReadOnlyPaths", "InaccessiblePaths")) {
+ _cleanup_strv_free_ char **l = NULL;
+ char ***dirs;
+ char **p;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, l) {
+ char *i = *p;
+ size_t offset;
+
+ offset = i[0] == '-';
+ offset += i[offset] == '+';
+ if (!path_is_absolute(i + offset))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid %s", name);
+
+ path_simplify(i + offset, false);
+ }
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (STR_IN_SET(name, "ReadWriteDirectories", "ReadWritePaths"))
+ dirs = &c->read_write_paths;
+ else if (STR_IN_SET(name, "ReadOnlyDirectories", "ReadOnlyPaths"))
+ dirs = &c->read_only_paths;
+ else /* "InaccessiblePaths" */
+ dirs = &c->inaccessible_paths;
+
+ if (strv_isempty(l)) {
+ *dirs = strv_free(*dirs);
+ unit_write_settingf(u, flags, name, "%s=", name);
+ } else {
+ _cleanup_free_ char *joined = NULL;
+
+ joined = unit_concat_strv(l, UNIT_ESCAPE_SPECIFIERS);
+ if (!joined)
+ return -ENOMEM;
+
+ r = strv_extend_strv(dirs, l, true);
+ if (r < 0)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags, name, "%s=%s", name, joined);
+ }
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "RuntimeDirectory", "StateDirectory", "CacheDirectory", "LogsDirectory", "ConfigurationDirectory")) {
+ _cleanup_strv_free_ char **l = NULL;
+ char **p;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, l) {
+ if (!path_is_normalized(*p))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "%s= path is not normalized: %s", name, *p);
+
+ if (path_is_absolute(*p))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "%s= path is absolute: %s", name, *p);
+
+ if (path_startswith(*p, "private"))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "%s= path can't be 'private': %s", name, *p);
+ }
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ ExecDirectoryType i;
+ ExecDirectory *d;
+
+ assert_se((i = exec_directory_type_from_string(name)) >= 0);
+ d = c->directories + i;
+
+ if (strv_isempty(l)) {
+ d->paths = strv_free(d->paths);
+ unit_write_settingf(u, flags, name, "%s=", name);
+ } else {
+ _cleanup_free_ char *joined = NULL;
+
+ r = strv_extend_strv(&d->paths, l, true);
+ if (r < 0)
+ return r;
+
+ joined = unit_concat_strv(l, UNIT_ESCAPE_SPECIFIERS);
+ if (!joined)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags, name, "%s=%s", name, joined);
+ }
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "AppArmorProfile", "SmackProcessLabel")) {
+ int ignore;
+ const char *s;
+
+ r = sd_bus_message_read(message, "(bs)", &ignore, &s);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ char **p;
+ bool *b;
+
+ if (streq(name, "AppArmorProfile")) {
+ p = &c->apparmor_profile;
+ b = &c->apparmor_profile_ignore;
+ } else { /* "SmackProcessLabel" */
+ p = &c->smack_process_label;
+ b = &c->smack_process_label_ignore;
+ }
+
+ if (isempty(s)) {
+ *p = mfree(*p);
+ *b = false;
+ } else {
+ if (free_and_strdup(p, s) < 0)
+ return -ENOMEM;
+ *b = ignore;
+ }
+
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "%s=%s%s", name, ignore ? "-" : "", strempty(s));
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "BindPaths", "BindReadOnlyPaths")) {
+ char *source, *destination;
+ int ignore_enoent;
+ uint64_t mount_flags;
+ bool empty = true;
+
+ r = sd_bus_message_enter_container(message, 'a', "(ssbt)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(ssbt)", &source, &destination, &ignore_enoent, &mount_flags)) > 0) {
+
+ if (!path_is_absolute(source))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path %s is not absolute.", source);
+ if (!path_is_absolute(destination))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Destination path %s is not absolute.", destination);
+ if (!IN_SET(mount_flags, 0, MS_REC))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown mount flags.");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = bind_mount_add(&c->bind_mounts, &c->n_bind_mounts,
+ &(BindMount) {
+ .source = source,
+ .destination = destination,
+ .read_only = !!strstr(name, "ReadOnly"),
+ .recursive = !!(mount_flags & MS_REC),
+ .ignore_enoent = ignore_enoent,
+ });
+ if (r < 0)
+ return r;
+
+ unit_write_settingf(
+ u, flags|UNIT_ESCAPE_SPECIFIERS, name,
+ "%s=%s%s:%s:%s",
+ name,
+ ignore_enoent ? "-" : "",
+ source,
+ destination,
+ (mount_flags & MS_REC) ? "rbind" : "norbind");
+ }
+
+ empty = false;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (empty) {
+ bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
+ c->bind_mounts = NULL;
+ c->n_bind_mounts = 0;
+
+ unit_write_settingf(u, flags, name, "%s=", name);
+ }
+
+ return 1;
+
+ } else if (streq(name, "TemporaryFileSystem")) {
+ const char *path, *options;
+ bool empty = true;
+
+ r = sd_bus_message_enter_container(message, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(ss)", &path, &options)) > 0) {
+
+ if (!path_is_absolute(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Mount point %s is not absolute.", path);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = temporary_filesystem_add(&c->temporary_filesystems, &c->n_temporary_filesystems, path, options);
+ if (r < 0)
+ return r;
+
+ unit_write_settingf(
+ u, flags|UNIT_ESCAPE_SPECIFIERS, name,
+ "%s=%s:%s",
+ name,
+ path,
+ options);
+ }
+
+ empty = false;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (empty) {
+ temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
+ c->temporary_filesystems = NULL;
+ c->n_temporary_filesystems = 0;
+
+ unit_write_settingf(u, flags, name, "%s=", name);
+ }
+
+ return 1;
+
+ } else if ((suffix = startswith(name, "Limit"))) {
+ const char *soft = NULL;
+ int ri;
+
+ ri = rlimit_from_string(suffix);
+ if (ri < 0) {
+ soft = endswith(suffix, "Soft");
+ if (soft) {
+ const char *n;
+
+ n = strndupa(suffix, soft - suffix);
+ ri = rlimit_from_string(n);
+ if (ri >= 0)
+ name = strjoina("Limit", n);
+ }
+ }
+
+ if (ri >= 0) {
+ uint64_t rl;
+ rlim_t x;
+
+ r = sd_bus_message_read(message, "t", &rl);
+ if (r < 0)
+ return r;
+
+ if (rl == (uint64_t) -1)
+ x = RLIM_INFINITY;
+ else {
+ x = (rlim_t) rl;
+
+ if ((uint64_t) x != rl)
+ return -ERANGE;
+ }
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *f = NULL;
+ struct rlimit nl;
+
+ if (c->rlimit[ri]) {
+ nl = *c->rlimit[ri];
+
+ if (soft)
+ nl.rlim_cur = x;
+ else
+ nl.rlim_max = x;
+ } else
+ /* When the resource limit is not initialized yet, then assign the value to both fields */
+ nl = (struct rlimit) {
+ .rlim_cur = x,
+ .rlim_max = x,
+ };
+
+ r = rlimit_format(&nl, &f);
+ if (r < 0)
+ return r;
+
+ if (c->rlimit[ri])
+ *c->rlimit[ri] = nl;
+ else {
+ c->rlimit[ri] = newdup(struct rlimit, &nl, 1);
+ if (!c->rlimit[ri])
+ return -ENOMEM;
+ }
+
+ unit_write_settingf(u, flags, name, "%s=%s", name, f);
+ }
+
+ return 1;
+ }
+
+ } else if (streq(name, "MountImages")) {
+ _cleanup_free_ char *format_str = NULL;
+ MountImage *mount_images = NULL;
+ size_t n_mount_images = 0;
+ char *source, *destination;
+ int permissive;
+
+ r = sd_bus_message_enter_container(message, 'a', "(ssba(ss))");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_(mount_options_free_allp) MountOptions *options = NULL;
+ _cleanup_free_ char *source_escaped = NULL, *destination_escaped = NULL;
+ char *tuple;
+
+ r = sd_bus_message_enter_container(message, 'r', "ssba(ss)");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "ssb", &source, &destination, &permissive);
+ if (r <= 0)
+ break;
+
+ if (!path_is_absolute(source))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path %s is not absolute.", source);
+ if (!path_is_normalized(source))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path %s is not normalized.", source);
+ if (!path_is_absolute(destination))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Destination path %s is not absolute.", destination);
+ if (!path_is_normalized(destination))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Destination path %s is not normalized.", destination);
+
+ /* Need to store them in the unit with the escapes, so that they can be parsed again */
+ source_escaped = shell_escape(source, ":");
+ if (!source_escaped)
+ return -ENOMEM;
+ destination_escaped = shell_escape(destination, ":");
+ if (!destination_escaped)
+ return -ENOMEM;
+
+ tuple = strjoin(format_str,
+ format_str ? " " : "",
+ permissive ? "-" : "",
+ source_escaped,
+ ":",
+ destination_escaped);
+ if (!tuple)
+ return -ENOMEM;
+ free_and_replace(format_str, tuple);
+
+ r = read_mount_options(message, error, &options, &format_str, ":");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ r = mount_image_add(&mount_images, &n_mount_images,
+ &(MountImage) {
+ .source = source,
+ .destination = destination,
+ .mount_options = options,
+ .ignore_enoent = permissive,
+ });
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (n_mount_images == 0) {
+ c->mount_images = mount_image_free_many(c->mount_images, &c->n_mount_images);
+
+ unit_write_settingf(u, flags, name, "%s=", name);
+ } else {
+ for (size_t i = 0; i < n_mount_images; ++i) {
+ r = mount_image_add(&c->mount_images, &c->n_mount_images, &mount_images[i]);
+ if (r < 0)
+ return r;
+ }
+
+ unit_write_settingf(u, flags|UNIT_ESCAPE_C|UNIT_ESCAPE_SPECIFIERS,
+ name,
+ "%s=%s",
+ name,
+ format_str);
+ }
+ }
+
+ mount_images = mount_image_free_many(mount_images, &n_mount_images);
+
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/src/core/dbus-execute.h b/src/core/dbus-execute.h
new file mode 100644
index 0000000..c538341
--- /dev/null
+++ b/src/core/dbus-execute.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "execute.h"
+
+#define BUS_EXEC_STATUS_VTABLE(prefix, offset, flags) \
+ BUS_PROPERTY_DUAL_TIMESTAMP(prefix "StartTimestamp", (offset) + offsetof(ExecStatus, start_timestamp), flags), \
+ BUS_PROPERTY_DUAL_TIMESTAMP(prefix "ExitTimestamp", (offset) + offsetof(ExecStatus, exit_timestamp), flags), \
+ SD_BUS_PROPERTY(prefix "PID", "u", bus_property_get_pid, (offset) + offsetof(ExecStatus, pid), flags), \
+ SD_BUS_PROPERTY(prefix "Code", "i", bus_property_get_int, (offset) + offsetof(ExecStatus, code), flags), \
+ SD_BUS_PROPERTY(prefix "Status", "i", bus_property_get_int, (offset) + offsetof(ExecStatus, status), flags)
+
+#define BUS_EXEC_COMMAND_VTABLE(name, offset, flags) \
+ SD_BUS_PROPERTY(name, "a(sasbttttuii)", bus_property_get_exec_command, offset, flags)
+
+#define BUS_EXEC_COMMAND_LIST_VTABLE(name, offset, flags) \
+ SD_BUS_PROPERTY(name, "a(sasbttttuii)", bus_property_get_exec_command_list, offset, flags)
+
+#define BUS_EXEC_EX_COMMAND_LIST_VTABLE(name, offset, flags) \
+ SD_BUS_PROPERTY(name, "a(sasasttttuii)", bus_property_get_exec_ex_command_list, offset, flags)
+
+extern const sd_bus_vtable bus_exec_vtable[];
+
+int bus_property_get_exec_output(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error);
+int bus_property_get_exec_command(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error);
+int bus_property_get_exec_command_list(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error);
+int bus_property_get_exec_ex_command_list(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error);
+
+int bus_exec_context_set_transient_property(Unit *u, ExecContext *c, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_set_transient_exec_command(Unit *u, const char *name, ExecCommand **exec_command, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
diff --git a/src/core/dbus-job.c b/src/core/dbus-job.c
new file mode 100644
index 0000000..1526b31
--- /dev/null
+++ b/src/core/dbus-job.c
@@ -0,0 +1,378 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-get-properties.h"
+#include "bus-util.h"
+#include "dbus-job.h"
+#include "dbus-unit.h"
+#include "dbus.h"
+#include "job.h"
+#include "log.h"
+#include "selinux-access.h"
+#include "string-util.h"
+#include "strv.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, job_type, JobType);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_state, job_state, JobState);
+
+static int property_get_unit(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *p = NULL;
+ Job *j = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(j);
+
+ p = unit_dbus_path(j->unit);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_message_append(reply, "(so)", j->unit->id, p);
+}
+
+int bus_job_method_cancel(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Job *j = userdata;
+ int r;
+
+ assert(message);
+ assert(j);
+
+ r = mac_selinux_unit_access_check(j->unit, message, "stop", error);
+ if (r < 0)
+ return r;
+
+ /* Access is granted to the job owner */
+ if (!sd_bus_track_contains(j->bus_track, sd_bus_message_get_sender(message))) {
+
+ /* And for everybody else consult polkit */
+ r = bus_verify_manage_units_async(j->unit->manager, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+ }
+
+ job_finish_and_invalidate(j, JOB_CANCELED, true, false);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_job_method_get_waiting_jobs(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ Job **list = NULL;
+ Job *j = userdata;
+ int r, i, n;
+
+ if (strstr(sd_bus_message_get_member(message), "After"))
+ n = job_get_after(j, &list);
+ else
+ n = job_get_before(j, &list);
+ if (n < 0)
+ return n;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(usssoo)");
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < n; i ++) {
+ _cleanup_free_ char *unit_path = NULL, *job_path = NULL;
+
+ job_path = job_dbus_path(list[i]);
+ if (!job_path)
+ return -ENOMEM;
+
+ unit_path = unit_dbus_path(list[i]->unit);
+ if (!unit_path)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "(usssoo)",
+ list[i]->id,
+ list[i]->unit->id,
+ job_type_to_string(list[i]->type),
+ job_state_to_string(list[i]->state),
+ job_path,
+ unit_path);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+const sd_bus_vtable bus_job_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_METHOD("Cancel", NULL, NULL, bus_job_method_cancel, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetAfter",
+ NULL,,
+ "a(usssoo)",
+ SD_BUS_PARAM(jobs),
+ bus_job_method_get_waiting_jobs,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetBefore",
+ NULL,,
+ "a(usssoo)",
+ SD_BUS_PARAM(jobs),
+ bus_job_method_get_waiting_jobs,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_PROPERTY("Id", "u", NULL, offsetof(Job, id), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Unit", "(so)", property_get_unit, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("JobType", "s", property_get_type, offsetof(Job, type), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("State", "s", property_get_state, offsetof(Job, state), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_VTABLE_END
+};
+
+static int bus_job_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ Job *j;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ r = manager_get_job_from_dbus_path(m, path, &j);
+ if (r < 0)
+ return 0;
+
+ *found = j;
+ return 1;
+}
+
+static int bus_job_enumerate(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ unsigned k = 0;
+ Job *j;
+
+ l = new0(char*, hashmap_size(m->jobs)+1);
+ if (!l)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(j, m->jobs) {
+ l[k] = job_dbus_path(j);
+ if (!l[k])
+ return -ENOMEM;
+
+ k++;
+ }
+
+ assert(hashmap_size(m->jobs) == k);
+
+ *nodes = TAKE_PTR(l);
+
+ return k;
+}
+
+const BusObjectImplementation job_object = {
+ "/org/freedesktop/systemd1/job",
+ "org.freedesktop.systemd1.Job",
+ .fallback_vtables = BUS_FALLBACK_VTABLES({bus_job_vtable, bus_job_find}),
+ .node_enumerator = bus_job_enumerate,
+};
+
+static int send_new_signal(sd_bus *bus, void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *p = NULL;
+ Job *j = userdata;
+ int r;
+
+ assert(bus);
+ assert(j);
+
+ p = job_dbus_path(j);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_new_signal(
+ bus,
+ &m,
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "JobNew");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "uos", j->id, p, j->unit->id);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+static int send_changed_signal(sd_bus *bus, void *userdata) {
+ _cleanup_free_ char *p = NULL;
+ Job *j = userdata;
+
+ assert(bus);
+ assert(j);
+
+ p = job_dbus_path(j);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_emit_properties_changed(bus, p, "org.freedesktop.systemd1.Job", "State", NULL);
+}
+
+void bus_job_send_change_signal(Job *j) {
+ int r;
+
+ assert(j);
+
+ /* Make sure that any change signal on the unit is reflected before we send out the change signal on the job */
+ bus_unit_send_pending_change_signal(j->unit, true);
+
+ if (j->in_dbus_queue) {
+ LIST_REMOVE(dbus_queue, j->manager->dbus_job_queue, j);
+ j->in_dbus_queue = false;
+ }
+
+ r = bus_foreach_bus(j->manager, j->bus_track, j->sent_dbus_new_signal ? send_changed_signal : send_new_signal, j);
+ if (r < 0)
+ log_debug_errno(r, "Failed to send job change signal for %u: %m", j->id);
+
+ j->sent_dbus_new_signal = true;
+}
+
+void bus_job_send_pending_change_signal(Job *j, bool including_new) {
+ assert(j);
+
+ if (!j->in_dbus_queue)
+ return;
+
+ if (!j->sent_dbus_new_signal && !including_new)
+ return;
+
+ if (MANAGER_IS_RELOADING(j->unit->manager))
+ return;
+
+ bus_job_send_change_signal(j);
+}
+
+static int send_removed_signal(sd_bus *bus, void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *p = NULL;
+ Job *j = userdata;
+ int r;
+
+ assert(bus);
+ assert(j);
+
+ p = job_dbus_path(j);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_new_signal(
+ bus,
+ &m,
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "JobRemoved");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "uoss", j->id, p, j->unit->id, job_result_to_string(j->result));
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+void bus_job_send_removed_signal(Job *j) {
+ int r;
+
+ assert(j);
+
+ if (!j->sent_dbus_new_signal)
+ bus_job_send_change_signal(j);
+
+ /* Make sure that any change signal on the unit is reflected before we send out the change signal on the job */
+ bus_unit_send_pending_change_signal(j->unit, true);
+
+ r = bus_foreach_bus(j->manager, j->bus_track, send_removed_signal, j);
+ if (r < 0)
+ log_debug_errno(r, "Failed to send job remove signal for %u: %m", j->id);
+}
+
+static int bus_job_track_handler(sd_bus_track *t, void *userdata) {
+ Job *j = userdata;
+
+ assert(t);
+ assert(j);
+
+ j->bus_track = sd_bus_track_unref(j->bus_track); /* make sure we aren't called again */
+
+ /* Last client dropped off the bus, maybe we should GC this now? */
+ job_add_to_gc_queue(j);
+ return 0;
+}
+
+static int bus_job_allocate_bus_track(Job *j) {
+
+ assert(j);
+
+ if (j->bus_track)
+ return 0;
+
+ return sd_bus_track_new(j->unit->manager->api_bus, &j->bus_track, bus_job_track_handler, j);
+}
+
+int bus_job_coldplug_bus_track(Job *j) {
+ int r;
+ _cleanup_strv_free_ char **deserialized_clients = NULL;
+
+ assert(j);
+
+ deserialized_clients = TAKE_PTR(j->deserialized_clients);
+
+ if (strv_isempty(deserialized_clients))
+ return 0;
+
+ if (!j->manager->api_bus)
+ return 0;
+
+ r = bus_job_allocate_bus_track(j);
+ if (r < 0)
+ return r;
+
+ return bus_track_add_name_many(j->bus_track, deserialized_clients);
+}
+
+int bus_job_track_sender(Job *j, sd_bus_message *m) {
+ int r;
+
+ assert(j);
+ assert(m);
+
+ if (sd_bus_message_get_bus(m) != j->unit->manager->api_bus) {
+ j->ref_by_private_bus = true;
+ return 0;
+ }
+
+ r = bus_job_allocate_bus_track(j);
+ if (r < 0)
+ return r;
+
+ return sd_bus_track_add_sender(j->bus_track, m);
+}
diff --git a/src/core/dbus-job.h b/src/core/dbus-job.h
new file mode 100644
index 0000000..6f00581
--- /dev/null
+++ b/src/core/dbus-job.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "unit.h"
+#include "bus-object.h"
+
+extern const sd_bus_vtable bus_job_vtable[];
+extern const BusObjectImplementation job_object;
+
+int bus_job_method_cancel(sd_bus_message *message, void *job, sd_bus_error *error);
+int bus_job_method_get_waiting_jobs(sd_bus_message *message, void *userdata, sd_bus_error *error);
+
+void bus_job_send_change_signal(Job *j);
+void bus_job_send_pending_change_signal(Job *j, bool including_new);
+void bus_job_send_removed_signal(Job *j);
+
+int bus_job_coldplug_bus_track(Job *j);
+int bus_job_track_sender(Job *j, sd_bus_message *m);
diff --git a/src/core/dbus-kill.c b/src/core/dbus-kill.c
new file mode 100644
index 0000000..6333f3b
--- /dev/null
+++ b/src/core/dbus-kill.c
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-get-properties.h"
+#include "dbus-kill.h"
+#include "dbus-util.h"
+#include "kill.h"
+#include "signal-util.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_kill_mode, kill_mode, KillMode);
+
+static int property_get_restart_kill_signal(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+ KillContext *c = userdata;
+ int s;
+
+ assert(c);
+
+ s = restart_kill_signal(c);
+ return sd_bus_message_append_basic(reply, 'i', &s);
+}
+
+const sd_bus_vtable bus_kill_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("KillMode", "s", property_get_kill_mode, offsetof(KillContext, kill_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KillSignal", "i", bus_property_get_int, offsetof(KillContext, kill_signal), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RestartKillSignal", "i", property_get_restart_kill_signal, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FinalKillSignal", "i", bus_property_get_int, offsetof(KillContext, final_kill_signal), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SendSIGKILL", "b", bus_property_get_bool, offsetof(KillContext, send_sigkill), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SendSIGHUP", "b", bus_property_get_bool, offsetof(KillContext, send_sighup), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("WatchdogSignal", "i", bus_property_get_int, offsetof(KillContext, watchdog_signal), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_VTABLE_END
+};
+
+static BUS_DEFINE_SET_TRANSIENT_PARSE(kill_mode, KillMode, kill_mode_from_string);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING(kill_signal, "i", int32_t, int, "%" PRIi32, signal_to_string_with_check);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING(restart_kill_signal, "i", int32_t, int, "%" PRIi32, signal_to_string_with_check);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING(final_kill_signal, "i", int32_t, int, "%" PRIi32, signal_to_string_with_check);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING(watchdog_signal, "i", int32_t, int, "%" PRIi32, signal_to_string_with_check);
+
+int bus_kill_context_set_transient_property(
+ Unit *u,
+ KillContext *c,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ assert(u);
+ assert(c);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "KillMode"))
+ return bus_set_transient_kill_mode(u, name, &c->kill_mode, message, flags, error);
+
+ if (streq(name, "SendSIGHUP"))
+ return bus_set_transient_bool(u, name, &c->send_sighup, message, flags, error);
+
+ if (streq(name, "SendSIGKILL"))
+ return bus_set_transient_bool(u, name, &c->send_sigkill, message, flags, error);
+
+ if (streq(name, "KillSignal"))
+ return bus_set_transient_kill_signal(u, name, &c->kill_signal, message, flags, error);
+
+ if (streq(name, "RestartKillSignal"))
+ return bus_set_transient_restart_kill_signal(u, name, &c->restart_kill_signal, message, flags, error);
+
+ if (streq(name, "FinalKillSignal"))
+ return bus_set_transient_final_kill_signal(u, name, &c->final_kill_signal, message, flags, error);
+
+ if (streq(name, "WatchdogSignal"))
+ return bus_set_transient_watchdog_signal(u, name, &c->watchdog_signal, message, flags, error);
+
+ return 0;
+}
diff --git a/src/core/dbus-kill.h b/src/core/dbus-kill.h
new file mode 100644
index 0000000..5a90287
--- /dev/null
+++ b/src/core/dbus-kill.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "kill.h"
+#include "unit.h"
+
+extern const sd_bus_vtable bus_kill_vtable[];
+
+int bus_kill_context_set_transient_property(Unit *u, KillContext *c, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c
new file mode 100644
index 0000000..b37ed7c
--- /dev/null
+++ b/src/core/dbus-manager.c
@@ -0,0 +1,3317 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/prctl.h>
+#include <sys/statvfs.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "architecture.h"
+#include "build.h"
+#include "bus-common-errors.h"
+#include "bus-get-properties.h"
+#include "bus-log-control-api.h"
+#include "dbus-cgroup.h"
+#include "dbus-execute.h"
+#include "dbus-job.h"
+#include "dbus-manager.h"
+#include "dbus-scope.h"
+#include "dbus-unit.h"
+#include "dbus.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "install.h"
+#include "log.h"
+#include "os-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "selinux-access.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "syslog-util.h"
+#include "user-util.h"
+#include "virt.h"
+#include "watchdog.h"
+
+/* Require 16MiB free in /run/systemd for reloading/reexecing. After all we need to serialize our state there, and if
+ * we can't we'll fail badly. */
+#define RELOAD_DISK_SPACE_MIN (UINT64_C(16) * UINT64_C(1024) * UINT64_C(1024))
+
+static UnitFileFlags unit_file_bools_to_flags(bool runtime, bool force) {
+ return (runtime ? UNIT_FILE_RUNTIME : 0) |
+ (force ? UNIT_FILE_FORCE : 0);
+}
+
+BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_oom_policy, oom_policy, OOMPolicy);
+
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_version, "s", GIT_VERSION);
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_features, "s", SYSTEMD_FEATURES);
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_architecture, "s", architecture_to_string(uname_architecture()));
+static BUS_DEFINE_PROPERTY_GET2(property_get_system_state, "s", Manager, manager_state, manager_state_to_string);
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_timer_slack_nsec, "t", (uint64_t) prctl(PR_GET_TIMERSLACK));
+static BUS_DEFINE_PROPERTY_GET_REF(property_get_hashmap_size, "u", Hashmap *, hashmap_size);
+static BUS_DEFINE_PROPERTY_GET_REF(property_get_set_size, "u", Set *, set_size);
+static BUS_DEFINE_PROPERTY_GET(property_get_default_timeout_abort_usec, "t", Manager, manager_default_timeout_abort_usec);
+
+static int property_get_virtualization(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int v;
+
+ assert(bus);
+ assert(reply);
+
+ v = detect_virtualization();
+
+ /* Make sure to return the empty string when we detect no virtualization, as that is the API.
+ *
+ * https://github.com/systemd/systemd/issues/1423
+ */
+
+ return sd_bus_message_append(
+ reply, "s",
+ v == VIRTUALIZATION_NONE ? NULL : virtualization_to_string(v));
+}
+
+static int property_get_tainted(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *s = NULL;
+ Manager *m = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ s = manager_taint_string(m);
+ if (!s)
+ return log_oom();
+
+ return sd_bus_message_append(reply, "s", s);
+}
+
+static int property_set_log_target(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *value,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ const char *t;
+ int r;
+
+ assert(bus);
+ assert(value);
+
+ r = sd_bus_message_read(value, "s", &t);
+ if (r < 0)
+ return r;
+
+ if (isempty(t))
+ manager_restore_original_log_target(m);
+ else {
+ LogTarget target;
+
+ target = log_target_from_string(t);
+ if (target < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid log target '%s'", t);
+
+ manager_override_log_target(m, target);
+ }
+
+ return 0;
+}
+
+static int property_set_log_level(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *value,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ const char *t;
+ int r;
+
+ assert(bus);
+ assert(value);
+
+ r = sd_bus_message_read(value, "s", &t);
+ if (r < 0)
+ return r;
+
+ if (isempty(t))
+ manager_restore_original_log_level(m);
+ else {
+ int level;
+
+ level = log_level_from_string(t);
+ if (level < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid log level '%s'", t);
+
+ manager_override_log_level(m, level);
+ }
+
+ return 0;
+}
+
+static int property_get_progress(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ double d;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ if (MANAGER_IS_FINISHED(m))
+ d = 1.0;
+ else
+ d = 1.0 - ((double) hashmap_size(m->jobs) / (double) m->n_installed_jobs);
+
+ return sd_bus_message_append(reply, "d", d);
+}
+
+static int property_get_environment(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ r = manager_get_effective_environment(m, &l);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_append_strv(reply, l);
+}
+
+static int property_get_show_status(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+
+ assert(m);
+ assert(bus);
+ assert(reply);
+
+ return sd_bus_message_append(reply, "b", manager_get_show_status_on(m));
+}
+
+static int property_get_runtime_watchdog(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+
+ assert(m);
+ assert(bus);
+ assert(reply);
+
+ return sd_bus_message_append(reply, "t", manager_get_watchdog(m, WATCHDOG_RUNTIME));
+}
+
+static int property_get_reboot_watchdog(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+
+ assert(m);
+ assert(bus);
+ assert(reply);
+
+ return sd_bus_message_append(reply, "t", manager_get_watchdog(m, WATCHDOG_REBOOT));
+}
+
+static int property_get_kexec_watchdog(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+
+ assert(m);
+ assert(bus);
+ assert(reply);
+
+ return sd_bus_message_append(reply, "t", manager_get_watchdog(m, WATCHDOG_KEXEC));
+}
+
+static int property_set_watchdog(Manager *m, WatchdogType type, sd_bus_message *value) {
+ usec_t timeout;
+ int r;
+
+ assert(m);
+ assert(value);
+
+ assert_cc(sizeof(usec_t) == sizeof(uint64_t));
+
+ r = sd_bus_message_read(value, "t", &timeout);
+ if (r < 0)
+ return r;
+
+ return manager_override_watchdog(m, type, timeout);
+}
+
+static int property_set_runtime_watchdog(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *value,
+ void *userdata,
+ sd_bus_error *error) {
+
+ return property_set_watchdog(userdata, WATCHDOG_RUNTIME, value);
+}
+
+static int property_set_reboot_watchdog(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *value,
+ void *userdata,
+ sd_bus_error *error) {
+
+ return property_set_watchdog(userdata, WATCHDOG_REBOOT, value);
+}
+
+static int property_set_kexec_watchdog(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *value,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _unused_ Manager *m = userdata;
+
+ assert(m);
+ assert(bus);
+ assert(value);
+
+ return property_set_watchdog(userdata, WATCHDOG_KEXEC, value);
+}
+
+static int bus_get_unit_by_name(Manager *m, sd_bus_message *message, const char *name, Unit **ret_unit, sd_bus_error *error) {
+ Unit *u;
+ int r;
+
+ assert(m);
+ assert(message);
+ assert(ret_unit);
+
+ /* More or less a wrapper around manager_get_unit() that generates nice errors and has one trick up its sleeve:
+ * if the name is specified empty we use the client's unit. */
+
+ if (isempty(name)) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ pid_t pid;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+
+ u = manager_get_unit_by_pid(m, pid);
+ if (!u)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Client not member of any unit.");
+ } else {
+ u = manager_get_unit(m, name);
+ if (!u)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Unit %s not loaded.", name);
+ }
+
+ *ret_unit = u;
+ return 0;
+}
+
+static int bus_load_unit_by_name(Manager *m, sd_bus_message *message, const char *name, Unit **ret_unit, sd_bus_error *error) {
+ assert(m);
+ assert(message);
+ assert(ret_unit);
+
+ /* Pretty much the same as bus_get_unit_by_name(), but we also load the unit if necessary. */
+
+ if (isempty(name))
+ return bus_get_unit_by_name(m, message, name, ret_unit, error);
+
+ return manager_load_unit(m, name, NULL, error, ret_unit);
+}
+
+static int reply_unit_path(Unit *u, sd_bus_message *message, sd_bus_error *error) {
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ assert(u);
+ assert(message);
+
+ r = mac_selinux_unit_access_check(u, message, "status", error);
+ if (r < 0)
+ return r;
+
+ path = unit_dbus_path(u);
+ if (!path)
+ return log_oom();
+
+ return sd_bus_reply_method_return(message, "o", path);
+}
+
+static int method_get_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = bus_get_unit_by_name(m, message, name, &u, error);
+ if (r < 0)
+ return r;
+
+ return reply_unit_path(u, message, error);
+}
+
+static int method_get_unit_by_pid(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ pid_t pid;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(pid_t) == sizeof(uint32_t));
+
+ /* Anyone can call this method */
+
+ r = sd_bus_message_read(message, "u", &pid);
+ if (r < 0)
+ return r;
+ if (pid < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid PID " PID_FMT, pid);
+
+ if (pid == 0) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+ }
+
+ u = manager_get_unit_by_pid(m, pid);
+ if (!u)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_UNIT_FOR_PID, "PID "PID_FMT" does not belong to any loaded unit.", pid);
+
+ return reply_unit_path(u, message, error);
+}
+
+static int method_get_unit_by_invocation_id(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *path = NULL;
+ Manager *m = userdata;
+ sd_id128_t id;
+ const void *a;
+ Unit *u;
+ size_t sz;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = sd_bus_message_read_array(message, 'y', &a, &sz);
+ if (r < 0)
+ return r;
+ if (sz == 0)
+ id = SD_ID128_NULL;
+ else if (sz == 16)
+ memcpy(&id, a, sz);
+ else
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid invocation ID");
+
+ if (sd_id128_is_null(id)) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ pid_t pid;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+
+ u = manager_get_unit_by_pid(m, pid);
+ if (!u)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Client " PID_FMT " not member of any unit.", pid);
+ } else {
+ u = hashmap_get(m->units_by_invocation_id, &id);
+ if (!u)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, "No unit with the specified invocation ID " SD_ID128_FORMAT_STR " known.", SD_ID128_FORMAT_VAL(id));
+ }
+
+ r = mac_selinux_unit_access_check(u, message, "status", error);
+ if (r < 0)
+ return r;
+
+ /* So here's a special trick: the bus path we return actually references the unit by its invocation ID instead
+ * of the unit name. This means it stays valid only as long as the invocation ID stays the same. */
+ path = unit_dbus_path_invocation_id(u);
+ if (!path)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", path);
+}
+
+static int method_get_unit_by_control_group(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *cgroup;
+ Unit *u;
+ int r;
+
+ r = sd_bus_message_read(message, "s", &cgroup);
+ if (r < 0)
+ return r;
+
+ u = manager_get_unit_by_cgroup(m, cgroup);
+ if (!u)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Control group '%s' is not valid or not managed by this instance", cgroup);
+
+ return reply_unit_path(u, message, error);
+}
+
+static int method_load_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = bus_load_unit_by_name(m, message, name, &u, error);
+ if (r < 0)
+ return r;
+
+ return reply_unit_path(u, message, error);
+}
+
+static int method_start_unit_generic(sd_bus_message *message, Manager *m, JobType job_type, bool reload_if_possible, sd_bus_error *error) {
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_load_unit(m, name, NULL, error, &u);
+ if (r < 0)
+ return r;
+
+ return bus_unit_method_start_generic(message, u, job_type, reload_if_possible, error);
+}
+
+static int method_start_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_start_unit_generic(message, userdata, JOB_START, false, error);
+}
+
+static int method_stop_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_start_unit_generic(message, userdata, JOB_STOP, false, error);
+}
+
+static int method_reload_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_start_unit_generic(message, userdata, JOB_RELOAD, false, error);
+}
+
+static int method_restart_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_start_unit_generic(message, userdata, JOB_RESTART, false, error);
+}
+
+static int method_try_restart_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_start_unit_generic(message, userdata, JOB_TRY_RESTART, false, error);
+}
+
+static int method_reload_or_restart_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_start_unit_generic(message, userdata, JOB_RESTART, true, error);
+}
+
+static int method_reload_or_try_restart_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_start_unit_generic(message, userdata, JOB_TRY_RESTART, true, error);
+}
+
+typedef enum GenericUnitOperationFlags {
+ GENERIC_UNIT_LOAD = 1 << 0, /* Load if the unit is not loaded yet */
+ GENERIC_UNIT_VALIDATE_LOADED = 1 << 1, /* Verify unit is properly loaded before forwarding call */
+} GenericUnitOperationFlags;
+
+static int method_generic_unit_operation(
+ sd_bus_message *message,
+ Manager *m,
+ sd_bus_error *error,
+ sd_bus_message_handler_t handler,
+ GenericUnitOperationFlags flags) {
+
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Read the first argument from the command and pass the operation to the specified per-unit
+ * method. */
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ if (!isempty(name) && FLAGS_SET(flags, GENERIC_UNIT_LOAD))
+ r = manager_load_unit(m, name, NULL, error, &u);
+ else
+ r = bus_get_unit_by_name(m, message, name, &u, error);
+ if (r < 0)
+ return r;
+
+ if (FLAGS_SET(flags, GENERIC_UNIT_VALIDATE_LOADED)) {
+ r = bus_unit_validate_load_state(u, error);
+ if (r < 0)
+ return r;
+ }
+
+ return handler(message, u, error);
+}
+
+static int method_enqueue_unit_job(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ /* We don't bother with GENERIC_UNIT_VALIDATE_LOADED here, as the job logic validates that anyway */
+ return method_generic_unit_operation(message, userdata, error, bus_unit_method_enqueue_job, GENERIC_UNIT_LOAD);
+}
+
+static int method_start_unit_replace(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *old_name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &old_name);
+ if (r < 0)
+ return r;
+
+ r = bus_get_unit_by_name(m, message, old_name, &u, error);
+ if (r < 0)
+ return r;
+ if (!u->job || u->job->type != JOB_START)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_JOB, "No job queued for unit %s", old_name);
+
+ return method_start_unit_generic(message, m, JOB_START, false, error);
+}
+
+static int method_kill_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ /* We don't bother with GENERIC_UNIT_LOAD nor GENERIC_UNIT_VALIDATE_LOADED here, as it shouldn't
+ * matter whether a unit is loaded for killing any processes possibly in the unit's cgroup. */
+ return method_generic_unit_operation(message, userdata, error, bus_unit_method_kill, 0);
+}
+
+static int method_clean_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ /* Load the unit if necessary, in order to load it, and insist on the unit being loaded to be
+ * cleaned */
+ return method_generic_unit_operation(message, userdata, error, bus_unit_method_clean, GENERIC_UNIT_LOAD|GENERIC_UNIT_VALIDATE_LOADED);
+}
+
+static int method_freeze_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_generic_unit_operation(message, userdata, error, bus_unit_method_freeze, 0);
+}
+
+static int method_thaw_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_generic_unit_operation(message, userdata, error, bus_unit_method_thaw, 0);
+}
+
+static int method_reset_failed_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ /* Don't load the unit (because unloaded units can't be in failed state), and don't insist on the
+ * unit to be loaded properly (since a failed unit might have its unit file disappeared) */
+ return method_generic_unit_operation(message, userdata, error, bus_unit_method_reset_failed, 0);
+}
+
+static int method_set_unit_properties(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ /* Only change properties on fully loaded units, and load them in order to set properties */
+ return method_generic_unit_operation(message, userdata, error, bus_unit_method_set_properties, GENERIC_UNIT_LOAD|GENERIC_UNIT_VALIDATE_LOADED);
+}
+
+static int method_ref_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ /* Only allow reffing of fully loaded units, and make sure reffing a unit loads it. */
+ return method_generic_unit_operation(message, userdata, error, bus_unit_method_ref, GENERIC_UNIT_LOAD|GENERIC_UNIT_VALIDATE_LOADED);
+}
+
+static int method_unref_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ /* Dropping a ref OTOH should not require the unit to still be loaded. And since a reffed unit is a
+ * loaded unit there's no need to load the unit for unreffing it. */
+ return method_generic_unit_operation(message, userdata, error, bus_unit_method_unref, 0);
+}
+
+static int reply_unit_info(sd_bus_message *reply, Unit *u) {
+ _cleanup_free_ char *unit_path = NULL, *job_path = NULL;
+ Unit *following;
+
+ following = unit_following(u);
+
+ unit_path = unit_dbus_path(u);
+ if (!unit_path)
+ return -ENOMEM;
+
+ if (u->job) {
+ job_path = job_dbus_path(u->job);
+ if (!job_path)
+ return -ENOMEM;
+ }
+
+ return sd_bus_message_append(
+ reply, "(ssssssouso)",
+ u->id,
+ unit_description(u),
+ unit_load_state_to_string(u->load_state),
+ unit_active_state_to_string(unit_active_state(u)),
+ unit_sub_state_to_string(u),
+ following ? following->id : "",
+ unit_path,
+ u->job ? u->job->id : 0,
+ u->job ? job_type_to_string(u->job->type) : "",
+ empty_to_root(job_path));
+}
+
+static int method_list_units_by_names(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ int r;
+ char **unit;
+ _cleanup_strv_free_ char **units = NULL;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read_strv(message, &units);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(ssssssouso)");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(unit, units) {
+ Unit *u;
+
+ if (!unit_name_is_valid(*unit, UNIT_NAME_ANY))
+ continue;
+
+ r = bus_load_unit_by_name(m, message, *unit, &u, error);
+ if (r < 0)
+ return r;
+
+ r = reply_unit_info(reply, u);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_get_unit_processes(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ /* Don't load a unit (since it won't have any processes if it's not loaded), but don't insist on the
+ * unit being loaded (because even improperly loaded units might still have processes around */
+ return method_generic_unit_operation(message, userdata, error, bus_unit_method_get_processes, 0);
+}
+
+static int method_attach_processes_to_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ /* Don't allow attaching new processes to units that aren't loaded. Don't bother with loading a unit
+ * for this purpose though, as an unloaded unit is a stopped unit, and we don't allow attaching
+ * processes to stopped units anyway. */
+ return method_generic_unit_operation(message, userdata, error, bus_unit_method_attach_processes, GENERIC_UNIT_VALIDATE_LOADED);
+}
+
+static int transient_unit_from_message(
+ Manager *m,
+ sd_bus_message *message,
+ const char *name,
+ Unit **unit,
+ sd_bus_error *error) {
+
+ UnitType t;
+ Unit *u;
+ int r;
+
+ assert(m);
+ assert(message);
+ assert(name);
+
+ t = unit_name_to_type(name);
+ if (t < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid unit name or type.");
+
+ if (!unit_vtable[t]->can_transient)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unit type %s does not support transient units.", unit_type_to_string(t));
+
+ r = manager_load_unit(m, name, NULL, error, &u);
+ if (r < 0)
+ return r;
+
+ if (!unit_is_pristine(u))
+ return sd_bus_error_setf(error, BUS_ERROR_UNIT_EXISTS, "Unit %s already exists.", name);
+
+ /* OK, the unit failed to load and is unreferenced, now let's
+ * fill in the transient data instead */
+ r = unit_make_transient(u);
+ if (r < 0)
+ return r;
+
+ /* Set our properties */
+ r = bus_unit_set_properties(u, message, UNIT_RUNTIME, false, error);
+ if (r < 0)
+ return r;
+
+ /* If the client asked for it, automatically add a reference to this unit. */
+ if (u->bus_track_add) {
+ r = bus_unit_track_add_sender(u, message);
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch sender: %m");
+ }
+
+ /* Now load the missing bits of the unit we just created */
+ unit_add_to_load_queue(u);
+ manager_dispatch_load_queue(m);
+
+ *unit = u;
+
+ return 0;
+}
+
+static int transient_aux_units_from_message(
+ Manager *m,
+ sd_bus_message *message,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(m);
+ assert(message);
+
+ r = sd_bus_message_enter_container(message, 'a', "(sa(sv))");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_enter_container(message, 'r', "sa(sv)")) > 0) {
+ const char *name = NULL;
+ Unit *u;
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = transient_unit_from_message(m, message, name, &u, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int method_start_transient_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *name, *smode;
+ Manager *m = userdata;
+ JobMode mode;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "start", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "ss", &name, &smode);
+ if (r < 0)
+ return r;
+
+ mode = job_mode_from_string(smode);
+ if (mode < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Job mode %s is invalid.", smode);
+
+ r = bus_verify_manage_units_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = transient_unit_from_message(m, message, name, &u, error);
+ if (r < 0)
+ return r;
+
+ r = transient_aux_units_from_message(m, message, error);
+ if (r < 0)
+ return r;
+
+ /* Finally, start it */
+ return bus_unit_queue_job(message, u, JOB_START, mode, 0, error);
+}
+
+static int method_get_job(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *path = NULL;
+ Manager *m = userdata;
+ uint32_t id;
+ Job *j;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = sd_bus_message_read(message, "u", &id);
+ if (r < 0)
+ return r;
+
+ j = manager_get_job(m, id);
+ if (!j)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_JOB, "Job %u does not exist.", (unsigned) id);
+
+ r = mac_selinux_unit_access_check(j->unit, message, "status", error);
+ if (r < 0)
+ return r;
+
+ path = job_dbus_path(j);
+ if (!path)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", path);
+}
+
+static int method_cancel_job(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ uint32_t id;
+ Job *j;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "u", &id);
+ if (r < 0)
+ return r;
+
+ j = manager_get_job(m, id);
+ if (!j)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_JOB, "Job %u does not exist.", (unsigned) id);
+
+ return bus_job_method_cancel(message, j, error);
+}
+
+static int method_clear_jobs(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "reload", error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_units_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ manager_clear_jobs(m);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_reset_failed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "reload", error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_units_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ manager_reset_failed(m);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int list_units_filtered(sd_bus_message *message, void *userdata, sd_bus_error *error, char **states, char **patterns) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ const char *k;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = mac_selinux_access_check(message, "status", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(ssssssouso)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH_KEY(u, k, m->units) {
+ if (k != u->id)
+ continue;
+
+ if (!strv_isempty(states) &&
+ !strv_contains(states, unit_load_state_to_string(u->load_state)) &&
+ !strv_contains(states, unit_active_state_to_string(unit_active_state(u))) &&
+ !strv_contains(states, unit_sub_state_to_string(u)))
+ continue;
+
+ if (!strv_isempty(patterns) &&
+ !strv_fnmatch_or_empty(patterns, u->id, FNM_NOESCAPE))
+ continue;
+
+ r = reply_unit_info(reply, u);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_list_units(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return list_units_filtered(message, userdata, error, NULL, NULL);
+}
+
+static int method_list_units_filtered(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_strv_free_ char **states = NULL;
+ int r;
+
+ r = sd_bus_message_read_strv(message, &states);
+ if (r < 0)
+ return r;
+
+ return list_units_filtered(message, userdata, error, states, NULL);
+}
+
+static int method_list_units_by_patterns(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_strv_free_ char **states = NULL;
+ _cleanup_strv_free_ char **patterns = NULL;
+ int r;
+
+ r = sd_bus_message_read_strv(message, &states);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &patterns);
+ if (r < 0)
+ return r;
+
+ return list_units_filtered(message, userdata, error, states, patterns);
+}
+
+static int method_list_jobs(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ Job *j;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = mac_selinux_access_check(message, "status", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(usssoo)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(j, m->jobs) {
+ _cleanup_free_ char *unit_path = NULL, *job_path = NULL;
+
+ job_path = job_dbus_path(j);
+ if (!job_path)
+ return -ENOMEM;
+
+ unit_path = unit_dbus_path(j->unit);
+ if (!unit_path)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(
+ reply, "(usssoo)",
+ j->id,
+ j->unit->id,
+ job_type_to_string(j->type),
+ job_state_to_string(j->state),
+ job_path,
+ unit_path);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_subscribe(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = mac_selinux_access_check(message, "status", error);
+ if (r < 0)
+ return r;
+
+ if (sd_bus_message_get_bus(message) == m->api_bus) {
+
+ /* Note that direct bus connection subscribe by
+ * default, we only track peers on the API bus here */
+
+ if (!m->subscribed) {
+ r = sd_bus_track_new(sd_bus_message_get_bus(message), &m->subscribed, NULL, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_track_add_sender(m->subscribed, message);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, BUS_ERROR_ALREADY_SUBSCRIBED, "Client is already subscribed.");
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_unsubscribe(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = mac_selinux_access_check(message, "status", error);
+ if (r < 0)
+ return r;
+
+ if (sd_bus_message_get_bus(message) == m->api_bus) {
+ r = sd_bus_track_remove_sender(m->subscribed, message);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, BUS_ERROR_NOT_SUBSCRIBED, "Client is not subscribed.");
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int dump_impl(sd_bus_message *message, void *userdata, sd_bus_error *error, int (*reply)(sd_bus_message *, char *)) {
+ _cleanup_free_ char *dump = NULL;
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = mac_selinux_access_check(message, "status", error);
+ if (r < 0)
+ return r;
+
+ r = manager_get_dump_string(m, &dump);
+ if (r < 0)
+ return r;
+
+ return reply(message, dump);
+}
+
+static int reply_dump(sd_bus_message *message, char *dump) {
+ return sd_bus_reply_method_return(message, "s", dump);
+}
+
+static int method_dump(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return dump_impl(message, userdata, error, reply_dump);
+}
+
+static int reply_dump_by_fd(sd_bus_message *message, char *dump) {
+ _cleanup_close_ int fd = -1;
+
+ fd = acquire_data_fd(dump, strlen(dump), 0);
+ if (fd < 0)
+ return fd;
+
+ return sd_bus_reply_method_return(message, "h", fd);
+}
+
+static int method_dump_by_fd(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return dump_impl(message, userdata, error, reply_dump_by_fd);
+}
+
+static int method_refuse_snapshot(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Support for snapshots has been removed.");
+}
+
+static int verify_run_space(const char *message, sd_bus_error *error) {
+ struct statvfs svfs;
+ uint64_t available;
+
+ if (statvfs("/run/systemd", &svfs) < 0)
+ return sd_bus_error_set_errnof(error, errno, "Failed to statvfs(/run/systemd): %m");
+
+ available = (uint64_t) svfs.f_bfree * (uint64_t) svfs.f_bsize;
+
+ if (available < RELOAD_DISK_SPACE_MIN) {
+ char fb_available[FORMAT_BYTES_MAX], fb_need[FORMAT_BYTES_MAX];
+ return sd_bus_error_setf(error,
+ BUS_ERROR_DISK_FULL,
+ "%s, not enough space available on /run/systemd. "
+ "Currently, %s are free, but a safety buffer of %s is enforced.",
+ message,
+ format_bytes(fb_available, sizeof(fb_available), available),
+ format_bytes(fb_need, sizeof(fb_need), RELOAD_DISK_SPACE_MIN));
+ }
+
+ return 0;
+}
+
+int verify_run_space_and_log(const char *message) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ r = verify_run_space(message, &error);
+ if (r < 0)
+ return log_error_errno(r, "%s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int method_reload(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = verify_run_space("Refusing to reload", error);
+ if (r < 0)
+ return r;
+
+ r = mac_selinux_access_check(message, "reload", error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_reload_daemon_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ /* Instead of sending the reply back right away, we just
+ * remember that we need to and then send it after the reload
+ * is finished. That way the caller knows when the reload
+ * finished. */
+
+ assert(!m->pending_reload_message);
+ r = sd_bus_message_new_method_return(message, &m->pending_reload_message);
+ if (r < 0)
+ return r;
+
+ m->objective = MANAGER_RELOAD;
+
+ return 1;
+}
+
+static int method_reexecute(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = verify_run_space("Refusing to reexecute", error);
+ if (r < 0)
+ return r;
+
+ r = mac_selinux_access_check(message, "reload", error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_reload_daemon_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ /* We don't send a reply back here, the client should
+ * just wait for us disconnecting. */
+
+ m->objective = MANAGER_REEXECUTE;
+ return 1;
+}
+
+static int method_exit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "halt", error);
+ if (r < 0)
+ return r;
+
+ /* Exit() (in contrast to SetExitCode()) is actually allowed even if
+ * we are running on the host. It will fall back on reboot() in
+ * systemd-shutdown if it cannot do the exit() because it isn't a
+ * container. */
+
+ m->objective = MANAGER_EXIT;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_reboot(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "reboot", error);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Reboot is only supported for system managers.");
+
+ m->objective = MANAGER_REBOOT;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_poweroff(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "halt", error);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Powering off is only supported for system managers.");
+
+ m->objective = MANAGER_POWEROFF;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_halt(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "halt", error);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Halt is only supported for system managers.");
+
+ m->objective = MANAGER_HALT;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_kexec(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "reboot", error);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "KExec is only supported for system managers.");
+
+ m->objective = MANAGER_KEXEC;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_switch_root(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *ri = NULL, *rt = NULL;
+ const char *root, *init;
+ Manager *m = userdata;
+ struct statvfs svfs;
+ uint64_t available;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ if (statvfs("/run/systemd", &svfs) < 0)
+ return sd_bus_error_set_errnof(error, errno, "Failed to statvfs(/run/systemd): %m");
+
+ available = (uint64_t) svfs.f_bfree * (uint64_t) svfs.f_bsize;
+
+ if (available < RELOAD_DISK_SPACE_MIN) {
+ char fb_available[FORMAT_BYTES_MAX], fb_need[FORMAT_BYTES_MAX];
+ log_warning("Dangerously low amount of free space on /run/systemd, root switching operation might not complete successfully. "
+ "Currently, %s are free, but %s are suggested. Proceeding anyway.",
+ format_bytes(fb_available, sizeof(fb_available), available),
+ format_bytes(fb_need, sizeof(fb_need), RELOAD_DISK_SPACE_MIN));
+ }
+
+ r = mac_selinux_access_check(message, "reboot", error);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Root switching is only supported by system manager.");
+
+ r = sd_bus_message_read(message, "ss", &root, &init);
+ if (r < 0)
+ return r;
+
+ if (isempty(root))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "New root directory may not be the empty string.");
+ if (!path_is_absolute(root))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "New root path '%s' is not absolute.", root);
+ if (path_equal(root, "/"))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "New root directory cannot be the old root directory.");
+
+ /* Safety check */
+ if (isempty(init)) {
+ r = path_is_os_tree(root);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to determine whether root path '%s' contains an OS tree: %m", root);
+ if (r == 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Specified switch root path '%s' does not seem to be an OS tree. os-release file is missing.", root);
+ } else {
+ _cleanup_free_ char *chased = NULL;
+
+ if (!path_is_absolute(init))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path to init binary '%s' not absolute.", init);
+
+ r = chase_symlinks(init, root, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &chased, NULL);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Could not resolve init executable %s: %m", init);
+
+ if (laccess(chased, X_OK) < 0) {
+ if (errno == EACCES)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Init binary %s is not executable.", init);
+
+ return sd_bus_error_set_errnof(error, r, "Could not check whether init binary %s is executable: %m", init);
+ }
+ }
+
+ rt = strdup(root);
+ if (!rt)
+ return -ENOMEM;
+
+ if (!isempty(init)) {
+ ri = strdup(init);
+ if (!ri)
+ return -ENOMEM;
+ }
+
+ free_and_replace(m->switch_root, rt);
+ free_and_replace(m->switch_root_init, ri);
+
+ m->objective = MANAGER_SWITCH_ROOT;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_set_environment(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_strv_free_ char **plus = NULL;
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "reload", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &plus);
+ if (r < 0)
+ return r;
+ if (!strv_env_is_valid(plus))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid environment assignments");
+
+ r = bus_verify_set_environment_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = manager_client_environment_modify(m, NULL, plus);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_unset_environment(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_strv_free_ char **minus = NULL;
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "reload", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &minus);
+ if (r < 0)
+ return r;
+
+ if (!strv_env_name_or_assignment_is_valid(minus))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid environment variable names or assignments");
+
+ r = bus_verify_set_environment_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = manager_client_environment_modify(m, minus, NULL);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_unset_and_set_environment(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_strv_free_ char **minus = NULL, **plus = NULL;
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "reload", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &minus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &plus);
+ if (r < 0)
+ return r;
+
+ if (!strv_env_name_or_assignment_is_valid(minus))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid environment variable names or assignments");
+ if (!strv_env_is_valid(plus))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid environment assignments");
+
+ r = bus_verify_set_environment_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = manager_client_environment_modify(m, minus, plus);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_set_exit_code(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ uint8_t code;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "exit", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_basic(message, 'y', &code);
+ if (r < 0)
+ return r;
+
+ if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "ExitCode can only be set for user service managers or in containers.");
+
+ m->return_value = code;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_lookup_dynamic_user_by_name(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ uid_t uid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read_basic(message, 's', &name);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Dynamic users are only supported in the system instance.");
+ if (!valid_user_group_name(name, VALID_USER_RELAX))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "User name invalid: %s", name);
+
+ r = dynamic_user_lookup_name(m, name, &uid);
+ if (r == -ESRCH)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_DYNAMIC_USER, "Dynamic user %s does not exist.", name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "u", (uint32_t) uid);
+}
+
+static int method_lookup_dynamic_user_by_uid(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *name = NULL;
+ Manager *m = userdata;
+ uid_t uid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+ r = sd_bus_message_read_basic(message, 'u', &uid);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Dynamic users are only supported in the system instance.");
+ if (!uid_is_valid(uid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "User ID invalid: " UID_FMT, uid);
+
+ r = dynamic_user_lookup_uid(m, uid, &name);
+ if (r == -ESRCH)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_DYNAMIC_USER, "Dynamic user ID " UID_FMT " does not exist.", uid);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "s", name);
+}
+
+static int method_get_dynamic_users(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ DynamicUser *d;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Dynamic users are only supported in the system instance.");
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(us)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(d, m->dynamic_users) {
+ uid_t uid;
+
+ r = dynamic_user_current(d, &uid);
+ if (r == -EAGAIN) /* not realized yet? */
+ continue;
+ if (r < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Failed to look up a dynamic user.");
+
+ r = sd_bus_message_append(reply, "(us)", uid, d->name);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int list_unit_files_by_patterns(sd_bus_message *message, void *userdata, sd_bus_error *error, char **states, char **patterns) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ UnitFileList *item;
+ Hashmap *h;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = mac_selinux_access_check(message, "status", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ h = hashmap_new(&string_hash_ops);
+ if (!h)
+ return -ENOMEM;
+
+ r = unit_file_get_list(m->unit_file_scope, NULL, h, states, patterns);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_open_container(reply, 'a', "(ss)");
+ if (r < 0)
+ goto fail;
+
+ HASHMAP_FOREACH(item, h) {
+
+ r = sd_bus_message_append(reply, "(ss)", item->path, unit_file_state_to_string(item->state));
+ if (r < 0)
+ goto fail;
+ }
+
+ unit_file_list_free(h);
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+
+fail:
+ unit_file_list_free(h);
+ return r;
+}
+
+static int method_list_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return list_unit_files_by_patterns(message, userdata, error, NULL, NULL);
+}
+
+static int method_list_unit_files_by_patterns(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_strv_free_ char **states = NULL;
+ _cleanup_strv_free_ char **patterns = NULL;
+ int r;
+
+ r = sd_bus_message_read_strv(message, &states);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &patterns);
+ if (r < 0)
+ return r;
+
+ return list_unit_files_by_patterns(message, userdata, error, states, patterns);
+}
+
+static int method_get_unit_file_state(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ UnitFileState state;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = mac_selinux_access_check(message, "status", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = unit_file_get_state(m->unit_file_scope, NULL, name, &state);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "s", unit_file_state_to_string(state));
+}
+
+static int method_get_default_target(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *default_target = NULL;
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = mac_selinux_access_check(message, "status", error);
+ if (r < 0)
+ return r;
+
+ r = unit_file_get_default(m->unit_file_scope, NULL, &default_target);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "s", default_target);
+}
+
+static int send_unit_files_changed(sd_bus *bus, void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *message = NULL;
+ int r;
+
+ assert(bus);
+
+ r = sd_bus_message_new_signal(bus, &message, "/org/freedesktop/systemd1", "org.freedesktop.systemd1.Manager", "UnitFilesChanged");
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(bus, message, NULL);
+}
+
+/* Create an error reply, using the error information from changes[]
+ * if possible, and fall back to generating an error from error code c.
+ * The error message only describes the first error.
+ *
+ * Coordinate with unit_file_dump_changes() in install.c.
+ */
+static int install_error(
+ sd_bus_error *error,
+ int c,
+ UnitFileChange *changes,
+ size_t n_changes) {
+
+ size_t i;
+ int r;
+
+ for (i = 0; i < n_changes; i++)
+
+ switch(changes[i].type) {
+
+ case 0 ... INT_MAX:
+ continue;
+
+ case -EEXIST:
+ if (changes[i].source)
+ r = sd_bus_error_setf(error, BUS_ERROR_UNIT_EXISTS,
+ "File %s already exists and is a symlink to %s.",
+ changes[i].path, changes[i].source);
+ else
+ r = sd_bus_error_setf(error, BUS_ERROR_UNIT_EXISTS,
+ "File %s already exists.",
+ changes[i].path);
+ goto found;
+
+ case -ERFKILL:
+ r = sd_bus_error_setf(error, BUS_ERROR_UNIT_MASKED,
+ "Unit file %s is masked.", changes[i].path);
+ goto found;
+
+ case -EADDRNOTAVAIL:
+ r = sd_bus_error_setf(error, BUS_ERROR_UNIT_GENERATED,
+ "Unit %s is transient or generated.", changes[i].path);
+ goto found;
+
+ case -EUCLEAN:
+ r = sd_bus_error_setf(error, BUS_ERROR_BAD_UNIT_SETTING,
+ "\"%s\" is not a valid unit name.",
+ changes[i].path);
+ goto found;
+
+ case -ELOOP:
+ r = sd_bus_error_setf(error, BUS_ERROR_UNIT_LINKED,
+ "Refusing to operate on alias name or linked unit file: %s",
+ changes[i].path);
+ goto found;
+
+ case -ENOENT:
+ r = sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT,
+ "Unit file %s does not exist.", changes[i].path);
+ goto found;
+
+ default:
+ r = sd_bus_error_set_errnof(error, changes[i].type, "File %s: %m", changes[i].path);
+ goto found;
+ }
+
+ r = c < 0 ? c : -EINVAL;
+
+ found:
+ unit_file_changes_free(changes, n_changes);
+ return r;
+}
+
+static int reply_unit_file_changes_and_free(
+ Manager *m,
+ sd_bus_message *message,
+ int carries_install_info,
+ UnitFileChange *changes,
+ size_t n_changes,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ bool bad = false, good = false;
+ size_t i;
+ int r;
+
+ if (unit_file_changes_have_modification(changes, n_changes)) {
+ r = bus_foreach_bus(m, NULL, send_unit_files_changed, NULL);
+ if (r < 0)
+ log_debug_errno(r, "Failed to send UnitFilesChanged signal: %m");
+ }
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ goto fail;
+
+ if (carries_install_info >= 0) {
+ r = sd_bus_message_append(reply, "b", carries_install_info);
+ if (r < 0)
+ goto fail;
+ }
+
+ r = sd_bus_message_open_container(reply, 'a', "(sss)");
+ if (r < 0)
+ goto fail;
+
+ for (i = 0; i < n_changes; i++) {
+
+ if (changes[i].type < 0) {
+ bad = true;
+ continue;
+ }
+
+ r = sd_bus_message_append(
+ reply, "(sss)",
+ unit_file_change_type_to_string(changes[i].type),
+ changes[i].path,
+ changes[i].source);
+ if (r < 0)
+ goto fail;
+
+ good = true;
+ }
+
+ /* If there was a failed change, and no successful change, then return the first failure as proper method call
+ * error. */
+ if (bad && !good)
+ return install_error(error, 0, changes, n_changes);
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ goto fail;
+
+ unit_file_changes_free(changes, n_changes);
+ return sd_bus_send(NULL, reply, NULL);
+
+fail:
+ unit_file_changes_free(changes, n_changes);
+ return r;
+}
+
+static int method_enable_unit_files_generic(
+ sd_bus_message *message,
+ Manager *m,
+ int (*call)(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char *files[], UnitFileChange **changes, size_t *n_changes),
+ bool carries_install_info,
+ sd_bus_error *error) {
+
+ _cleanup_strv_free_ char **l = NULL;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ UnitFileFlags flags;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ if (sd_bus_message_is_method_call(message, NULL, "EnableUnitFilesWithFlags")) {
+ uint64_t raw_flags;
+
+ r = sd_bus_message_read(message, "t", &raw_flags);
+ if (r < 0)
+ return r;
+ if ((raw_flags & ~_UNIT_FILE_FLAGS_MASK_PUBLIC) != 0)
+ return -EINVAL;
+ flags = raw_flags;
+ } else {
+ int runtime, force;
+
+ r = sd_bus_message_read(message, "bb", &runtime, &force);
+ if (r < 0)
+ return r;
+ flags = unit_file_bools_to_flags(runtime, force);
+ }
+
+ r = bus_verify_manage_unit_files_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = call(m->unit_file_scope, flags, NULL, l, &changes, &n_changes);
+ if (r < 0)
+ return install_error(error, r, changes, n_changes);
+
+ return reply_unit_file_changes_and_free(m, message, carries_install_info ? r : -1, changes, n_changes, error);
+}
+
+static int method_enable_unit_files_with_flags(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_enable_unit_files_generic(message, userdata, unit_file_enable, true, error);
+}
+
+static int method_enable_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_enable_unit_files_generic(message, userdata, unit_file_enable, true, error);
+}
+
+static int method_reenable_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_enable_unit_files_generic(message, userdata, unit_file_reenable, true, error);
+}
+
+static int method_link_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_enable_unit_files_generic(message, userdata, unit_file_link, false, error);
+}
+
+static int unit_file_preset_without_mode(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char **files, UnitFileChange **changes, size_t *n_changes) {
+ return unit_file_preset(scope, flags, root_dir, files, UNIT_FILE_PRESET_FULL, changes, n_changes);
+}
+
+static int method_preset_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_enable_unit_files_generic(message, userdata, unit_file_preset_without_mode, true, error);
+}
+
+static int method_mask_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_enable_unit_files_generic(message, userdata, unit_file_mask, false, error);
+}
+
+static int method_preset_unit_files_with_mode(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+
+ _cleanup_strv_free_ char **l = NULL;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ Manager *m = userdata;
+ UnitFilePresetMode mm;
+ int runtime, force, r;
+ UnitFileFlags flags;
+ const char *mode;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "sbb", &mode, &runtime, &force);
+ if (r < 0)
+ return r;
+
+ flags = unit_file_bools_to_flags(runtime, force);
+
+ if (isempty(mode))
+ mm = UNIT_FILE_PRESET_FULL;
+ else {
+ mm = unit_file_preset_mode_from_string(mode);
+ if (mm < 0)
+ return -EINVAL;
+ }
+
+ r = bus_verify_manage_unit_files_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = unit_file_preset(m->unit_file_scope, flags, NULL, l, mm, &changes, &n_changes);
+ if (r < 0)
+ return install_error(error, r, changes, n_changes);
+
+ return reply_unit_file_changes_and_free(m, message, r, changes, n_changes, error);
+}
+
+static int method_disable_unit_files_generic(
+ sd_bus_message *message,
+ Manager *m,
+ int (*call)(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char *files[], UnitFileChange **changes, size_t *n_changes),
+ sd_bus_error *error) {
+
+ _cleanup_strv_free_ char **l = NULL;
+ UnitFileChange *changes = NULL;
+ UnitFileFlags flags;
+ size_t n_changes = 0;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ if (sd_bus_message_is_method_call(message, NULL, "DisableUnitFilesWithFlags")) {
+ uint64_t raw_flags;
+
+ r = sd_bus_message_read(message, "t", &raw_flags);
+ if (r < 0)
+ return r;
+ if ((raw_flags & ~_UNIT_FILE_FLAGS_MASK_PUBLIC) != 0 ||
+ FLAGS_SET(raw_flags, UNIT_FILE_FORCE))
+ return -EINVAL;
+ flags = raw_flags;
+ } else {
+ int runtime;
+
+ r = sd_bus_message_read(message, "b", &runtime);
+ if (r < 0)
+ return r;
+ flags = unit_file_bools_to_flags(runtime, false);
+ }
+
+ r = bus_verify_manage_unit_files_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = call(m->unit_file_scope, flags, NULL, l, &changes, &n_changes);
+ if (r < 0)
+ return install_error(error, r, changes, n_changes);
+
+ return reply_unit_file_changes_and_free(m, message, -1, changes, n_changes, error);
+}
+
+static int method_disable_unit_files_with_flags(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_disable_unit_files_generic(message, userdata, unit_file_disable, error);
+}
+
+static int method_disable_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_disable_unit_files_generic(message, userdata, unit_file_disable, error);
+}
+
+static int method_unmask_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_disable_unit_files_generic(message, userdata, unit_file_unmask, error);
+}
+
+static int method_revert_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_unit_files_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = unit_file_revert(m->unit_file_scope, NULL, l, &changes, &n_changes);
+ if (r < 0)
+ return install_error(error, r, changes, n_changes);
+
+ return reply_unit_file_changes_and_free(m, message, -1, changes, n_changes, error);
+}
+
+static int method_set_default_target(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ Manager *m = userdata;
+ const char *name;
+ int force, r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "enable", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "sb", &name, &force);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_unit_files_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = unit_file_set_default(m->unit_file_scope, force ? UNIT_FILE_FORCE : 0, NULL, name, &changes, &n_changes);
+ if (r < 0)
+ return install_error(error, r, changes, n_changes);
+
+ return reply_unit_file_changes_and_free(m, message, -1, changes, n_changes, error);
+}
+
+static int method_preset_all_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ Manager *m = userdata;
+ UnitFilePresetMode mm;
+ const char *mode;
+ UnitFileFlags flags;
+ int force, runtime, r;
+
+ assert(message);
+ assert(m);
+
+ r = mac_selinux_access_check(message, "enable", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "sbb", &mode, &runtime, &force);
+ if (r < 0)
+ return r;
+
+ flags = unit_file_bools_to_flags(runtime, force);
+
+ if (isempty(mode))
+ mm = UNIT_FILE_PRESET_FULL;
+ else {
+ mm = unit_file_preset_mode_from_string(mode);
+ if (mm < 0)
+ return -EINVAL;
+ }
+
+ r = bus_verify_manage_unit_files_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = unit_file_preset_all(m->unit_file_scope, flags, NULL, mm, &changes, &n_changes);
+ if (r < 0)
+ return install_error(error, r, changes, n_changes);
+
+ return reply_unit_file_changes_and_free(m, message, -1, changes, n_changes, error);
+}
+
+static int method_add_dependency_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ int runtime, force, r;
+ char *target, *type;
+ UnitDependency dep;
+ UnitFileFlags flags;
+
+ assert(message);
+ assert(m);
+
+ r = bus_verify_manage_unit_files_async(m, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "ssbb", &target, &type, &runtime, &force);
+ if (r < 0)
+ return r;
+
+ flags = unit_file_bools_to_flags(runtime, force);
+
+ dep = unit_dependency_from_string(type);
+ if (dep < 0)
+ return -EINVAL;
+
+ r = unit_file_add_dependency(m->unit_file_scope, flags, NULL, l, target, dep, &changes, &n_changes);
+ if (r < 0)
+ return install_error(error, r, changes, n_changes);
+
+ return reply_unit_file_changes_and_free(m, message, -1, changes, n_changes, error);
+}
+
+static int method_get_unit_file_links(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0, i;
+ UnitFileFlags flags;
+ const char *name;
+ char **p;
+ int runtime, r;
+
+ r = sd_bus_message_read(message, "sb", &name, &runtime);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, SD_BUS_TYPE_ARRAY, "s");
+ if (r < 0)
+ return r;
+
+ p = STRV_MAKE(name);
+ flags = UNIT_FILE_DRY_RUN |
+ (runtime ? UNIT_FILE_RUNTIME : 0);
+
+ r = unit_file_disable(UNIT_FILE_SYSTEM, flags, NULL, p, &changes, &n_changes);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get file links for %s: %m", name);
+
+ for (i = 0; i < n_changes; i++)
+ if (changes[i].type == UNIT_FILE_UNLINK) {
+ r = sd_bus_message_append(reply, "s", changes[i].path);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_get_job_waiting(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ uint32_t id;
+ Job *j;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "u", &id);
+ if (r < 0)
+ return r;
+
+ j = manager_get_job(m, id);
+ if (!j)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_JOB, "Job %u does not exist.", (unsigned) id);
+
+ return bus_job_method_get_waiting_jobs(message, j, error);
+}
+
+static int method_abandon_scope(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = bus_get_unit_by_name(m, message, name, &u, error);
+ if (r < 0)
+ return r;
+
+ if (u->type != UNIT_SCOPE)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unit '%s' is not a scope unit, refusing.", name);
+
+ return bus_scope_method_abandon(message, u, error);
+}
+
+static int method_set_show_status(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ ShowStatus mode = _SHOW_STATUS_INVALID;
+ const char *t;
+ int r;
+
+ assert(m);
+ assert(message);
+
+ r = sd_bus_message_read(message, "s", &t);
+ if (r < 0)
+ return r;
+
+ if (!isempty(t)) {
+ mode = show_status_from_string(t);
+ if (mode < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid show status '%s'", t);
+ }
+
+ manager_override_show_status(m, mode, "bus");
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+const sd_bus_vtable bus_manager_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("Version", "s", property_get_version, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Features", "s", property_get_features, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Virtualization", "s", property_get_virtualization, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Architecture", "s", property_get_architecture, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Tainted", "s", property_get_tainted, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("FirmwareTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_FIRMWARE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("LoaderTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_LOADER]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("KernelTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_KERNEL]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("UserspaceTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_USERSPACE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("FinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("SecurityStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_SECURITY_START]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("SecurityFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_SECURITY_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("GeneratorsStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_GENERATORS_START]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("GeneratorsFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_GENERATORS_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("UnitsLoadStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_UNITS_LOAD_START]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("UnitsLoadFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_UNITS_LOAD_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDSecurityStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_SECURITY_START]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDSecurityFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_SECURITY_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDGeneratorsStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_GENERATORS_START]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDGeneratorsFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_GENERATORS_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDUnitsLoadStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_START]), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InitRDUnitsLoadFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_WRITABLE_PROPERTY("LogLevel", "s", bus_property_get_log_level, property_set_log_level, 0, 0),
+ SD_BUS_WRITABLE_PROPERTY("LogTarget", "s", bus_property_get_log_target, property_set_log_target, 0, 0),
+ SD_BUS_PROPERTY("NNames", "u", property_get_hashmap_size, offsetof(Manager, units), 0),
+ SD_BUS_PROPERTY("NFailedUnits", "u", property_get_set_size, offsetof(Manager, failed_units), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("NJobs", "u", property_get_hashmap_size, offsetof(Manager, jobs), 0),
+ SD_BUS_PROPERTY("NInstalledJobs", "u", bus_property_get_unsigned, offsetof(Manager, n_installed_jobs), 0),
+ SD_BUS_PROPERTY("NFailedJobs", "u", bus_property_get_unsigned, offsetof(Manager, n_failed_jobs), 0),
+ SD_BUS_PROPERTY("Progress", "d", property_get_progress, 0, 0),
+ SD_BUS_PROPERTY("Environment", "as", property_get_environment, 0, 0),
+ SD_BUS_PROPERTY("ConfirmSpawn", "b", bus_property_get_bool, offsetof(Manager, confirm_spawn), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ShowStatus", "b", property_get_show_status, 0, 0),
+ SD_BUS_PROPERTY("UnitPath", "as", NULL, offsetof(Manager, lookup_paths.search_path), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultStandardOutput", "s", bus_property_get_exec_output, offsetof(Manager, default_std_output), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultStandardError", "s", bus_property_get_exec_output, offsetof(Manager, default_std_output), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_WRITABLE_PROPERTY("RuntimeWatchdogUSec", "t", property_get_runtime_watchdog, property_set_runtime_watchdog, 0, 0),
+ SD_BUS_WRITABLE_PROPERTY("RebootWatchdogUSec", "t", property_get_reboot_watchdog, property_set_reboot_watchdog, 0, 0),
+ /* The following item is an obsolete alias */
+ SD_BUS_WRITABLE_PROPERTY("ShutdownWatchdogUSec", "t", property_get_reboot_watchdog, property_set_reboot_watchdog, 0, SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_WRITABLE_PROPERTY("KExecWatchdogUSec", "t", property_get_kexec_watchdog, property_set_kexec_watchdog, 0, 0),
+ SD_BUS_WRITABLE_PROPERTY("ServiceWatchdogs", "b", bus_property_get_bool, bus_property_set_bool, offsetof(Manager, service_watchdogs), 0),
+ SD_BUS_PROPERTY("ControlGroup", "s", NULL, offsetof(Manager, cgroup_root), 0),
+ SD_BUS_PROPERTY("SystemState", "s", property_get_system_state, 0, 0),
+ SD_BUS_PROPERTY("ExitCode", "y", bus_property_get_unsigned, offsetof(Manager, return_value), 0),
+ SD_BUS_PROPERTY("DefaultTimerAccuracyUSec", "t", bus_property_get_usec, offsetof(Manager, default_timer_accuracy_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultTimeoutStartUSec", "t", bus_property_get_usec, offsetof(Manager, default_timeout_start_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultTimeoutStopUSec", "t", bus_property_get_usec, offsetof(Manager, default_timeout_stop_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultTimeoutAbortUSec", "t", property_get_default_timeout_abort_usec, 0, 0),
+ SD_BUS_PROPERTY("DefaultRestartUSec", "t", bus_property_get_usec, offsetof(Manager, default_restart_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultStartLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Manager, default_start_limit_interval), SD_BUS_VTABLE_PROPERTY_CONST),
+ /* The following two items are obsolete alias */
+ SD_BUS_PROPERTY("DefaultStartLimitIntervalSec", "t", bus_property_get_usec, offsetof(Manager, default_start_limit_interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("DefaultStartLimitInterval", "t", bus_property_get_usec, offsetof(Manager, default_start_limit_interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("DefaultStartLimitBurst", "u", bus_property_get_unsigned, offsetof(Manager, default_start_limit_burst), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultCPUAccounting", "b", bus_property_get_bool, offsetof(Manager, default_cpu_accounting), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultBlockIOAccounting", "b", bus_property_get_bool, offsetof(Manager, default_blockio_accounting), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultMemoryAccounting", "b", bus_property_get_bool, offsetof(Manager, default_memory_accounting), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultTasksAccounting", "b", bus_property_get_bool, offsetof(Manager, default_tasks_accounting), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitCPU", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_CPU]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitCPUSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_CPU]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitFSIZE", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_FSIZE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitFSIZESoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_FSIZE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitDATA", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_DATA]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitDATASoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_DATA]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitSTACK", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_STACK]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitSTACKSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_STACK]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitCORE", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_CORE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitCORESoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_CORE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitRSS", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RSS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitRSSSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RSS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitNOFILE", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_NOFILE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitNOFILESoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_NOFILE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitAS", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_AS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitASSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_AS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitNPROC", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_NPROC]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitNPROCSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_NPROC]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitMEMLOCK", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_MEMLOCK]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitMEMLOCKSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_MEMLOCK]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitLOCKS", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_LOCKS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitLOCKSSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_LOCKS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitSIGPENDING", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_SIGPENDING]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitSIGPENDINGSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_SIGPENDING]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitMSGQUEUE", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_MSGQUEUE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitMSGQUEUESoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_MSGQUEUE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitNICE", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_NICE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitNICESoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_NICE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitRTPRIO", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTPRIO]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitRTPRIOSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTPRIO]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitRTTIME", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultLimitRTTIMESoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultTasksMax", "t", bus_property_get_tasks_max, offsetof(Manager, default_tasks_max), 0),
+ SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultOOMPolicy", "s", bus_property_get_oom_policy, offsetof(Manager, default_oom_policy), SD_BUS_VTABLE_PROPERTY_CONST),
+
+ SD_BUS_METHOD_WITH_NAMES("GetUnit",
+ "s",
+ SD_BUS_PARAM(name),
+ "o",
+ SD_BUS_PARAM(unit),
+ method_get_unit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetUnitByPID",
+ "u",
+ SD_BUS_PARAM(pid),
+ "o",
+ SD_BUS_PARAM(unit),
+ method_get_unit_by_pid,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetUnitByInvocationID",
+ "ay",
+ SD_BUS_PARAM(invocation_id),
+ "o",
+ SD_BUS_PARAM(unit),
+ method_get_unit_by_invocation_id,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetUnitByControlGroup",
+ "s",
+ SD_BUS_PARAM(cgroup),
+ "o",
+ SD_BUS_PARAM(unit),
+ method_get_unit_by_control_group,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("LoadUnit",
+ "s",
+ SD_BUS_PARAM(name),
+ "o",
+ SD_BUS_PARAM(unit),
+ method_load_unit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("StartUnit",
+ "ss",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(mode),
+ "o",
+ SD_BUS_PARAM(job),
+ method_start_unit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("StartUnitReplace",
+ "sss",
+ SD_BUS_PARAM(old_unit)
+ SD_BUS_PARAM(new_unit)
+ SD_BUS_PARAM(mode),
+ "o",
+ SD_BUS_PARAM(job),
+ method_start_unit_replace,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("StopUnit",
+ "ss",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(mode),
+ "o",
+ SD_BUS_PARAM(job),
+ method_stop_unit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ReloadUnit",
+ "ss",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(mode),
+ "o",
+ SD_BUS_PARAM(job),
+ method_reload_unit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("RestartUnit",
+ "ss",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(mode),
+ "o",
+ SD_BUS_PARAM(job),
+ method_restart_unit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("TryRestartUnit",
+ "ss",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(mode),
+ "o",
+ SD_BUS_PARAM(job),
+ method_try_restart_unit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ReloadOrRestartUnit",
+ "ss",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(mode),
+ "o",
+ SD_BUS_PARAM(job),
+ method_reload_or_restart_unit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ReloadOrTryRestartUnit",
+ "ss",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(mode),
+ "o",
+ SD_BUS_PARAM(job),
+ method_reload_or_try_restart_unit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("EnqueueUnitJob",
+ "sss",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(job_type)
+ SD_BUS_PARAM(job_mode),
+ "uososa(uosos)",
+ SD_BUS_PARAM(job_id)
+ SD_BUS_PARAM(job_path)
+ SD_BUS_PARAM(unit_id)
+ SD_BUS_PARAM(unit_path)
+ SD_BUS_PARAM(job_type)
+ SD_BUS_PARAM(affected_jobs),
+ method_enqueue_unit_job,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("KillUnit",
+ "ssi",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(whom)
+ SD_BUS_PARAM(signal),
+ NULL,,
+ method_kill_unit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CleanUnit",
+ "sas",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(mask),
+ NULL,,
+ method_clean_unit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("FreezeUnit",
+ "s",
+ SD_BUS_PARAM(name),
+ NULL,,
+ method_freeze_unit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ThawUnit",
+ "s",
+ SD_BUS_PARAM(name),
+ NULL,,
+ method_thaw_unit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ResetFailedUnit",
+ "s",
+ SD_BUS_PARAM(name),
+ NULL,,
+ method_reset_failed_unit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetUnitProperties",
+ "sba(sv)",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(runtime)
+ SD_BUS_PARAM(properties),
+ NULL,,
+ method_set_unit_properties,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("RefUnit",
+ "s",
+ SD_BUS_PARAM(name),
+ NULL,,
+ method_ref_unit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("UnrefUnit",
+ "s",
+ SD_BUS_PARAM(name),
+ NULL,,
+ method_unref_unit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("StartTransientUnit",
+ "ssa(sv)a(sa(sv))",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(mode)
+ SD_BUS_PARAM(properties)
+ SD_BUS_PARAM(aux),
+ "o",
+ SD_BUS_PARAM(job),
+ method_start_transient_unit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetUnitProcesses",
+ "s",
+ SD_BUS_PARAM(name),
+ "a(sus)",
+ SD_BUS_PARAM(processes),
+ method_get_unit_processes,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("AttachProcessesToUnit",
+ "ssau",
+ SD_BUS_PARAM(unit_name)
+ SD_BUS_PARAM(subcgroup)
+ SD_BUS_PARAM(pids),
+ NULL,,
+ method_attach_processes_to_unit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("AbandonScope",
+ "s",
+ SD_BUS_PARAM(name),
+ NULL,,
+ method_abandon_scope,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetJob",
+ "u",
+ SD_BUS_PARAM(id),
+ "o",
+ SD_BUS_PARAM(job),
+ method_get_job,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetJobAfter",
+ "u",
+ SD_BUS_PARAM(id),
+ "a(usssoo)",
+ SD_BUS_PARAM(jobs),
+ method_get_job_waiting,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetJobBefore",
+ "u",
+ SD_BUS_PARAM(id),
+ "a(usssoo)",
+ SD_BUS_PARAM(jobs),
+ method_get_job_waiting,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CancelJob",
+ "u",
+ SD_BUS_PARAM(id),
+ NULL,,
+ method_cancel_job,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ClearJobs",
+ NULL,
+ NULL,
+ method_clear_jobs,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ResetFailed",
+ NULL,
+ NULL,
+ method_reset_failed,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetShowStatus",
+ "s",
+ SD_BUS_PARAM(mode),
+ NULL,,
+ method_set_show_status,
+ SD_BUS_VTABLE_CAPABILITY(CAP_SYS_ADMIN)),
+ SD_BUS_METHOD_WITH_NAMES("ListUnits",
+ NULL,,
+ "a(ssssssouso)",
+ SD_BUS_PARAM(units),
+ method_list_units,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ListUnitsFiltered",
+ "as",
+ SD_BUS_PARAM(states),
+ "a(ssssssouso)",
+ SD_BUS_PARAM(units),
+ method_list_units_filtered,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ListUnitsByPatterns",
+ "asas",
+ SD_BUS_PARAM(states)
+ SD_BUS_PARAM(patterns),
+ "a(ssssssouso)",
+ SD_BUS_PARAM(units),
+ method_list_units_by_patterns,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ListUnitsByNames",
+ "as",
+ SD_BUS_PARAM(names),
+ "a(ssssssouso)",
+ SD_BUS_PARAM(units),
+ method_list_units_by_names,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ListJobs",
+ NULL,,
+ "a(usssoo)",
+ SD_BUS_PARAM(jobs),
+ method_list_jobs,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Subscribe",
+ NULL,
+ NULL,
+ method_subscribe,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Unsubscribe",
+ NULL,
+ NULL,
+ method_unsubscribe,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("Dump",
+ NULL,,
+ "s",
+ SD_BUS_PARAM(output),
+ method_dump,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("DumpByFileDescriptor",
+ NULL,,
+ "h",
+ SD_BUS_PARAM(fd),
+ method_dump_by_fd,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CreateSnapshot",
+ "sb",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(cleanup),
+ "o",
+ SD_BUS_PARAM(unit),
+ method_refuse_snapshot,
+ SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_METHOD_WITH_NAMES("RemoveSnapshot",
+ "s",
+ SD_BUS_PARAM(name),
+ NULL,,
+ method_refuse_snapshot,
+ SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_METHOD("Reload",
+ NULL,
+ NULL,
+ method_reload,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Reexecute",
+ NULL,
+ NULL,
+ method_reexecute,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Exit",
+ NULL,
+ NULL,
+ method_exit,
+ 0),
+ SD_BUS_METHOD("Reboot",
+ NULL,
+ NULL,
+ method_reboot,
+ SD_BUS_VTABLE_CAPABILITY(CAP_SYS_BOOT)),
+ SD_BUS_METHOD("PowerOff",
+ NULL,
+ NULL,
+ method_poweroff,
+ SD_BUS_VTABLE_CAPABILITY(CAP_SYS_BOOT)),
+ SD_BUS_METHOD("Halt",
+ NULL,
+ NULL,
+ method_halt,
+ SD_BUS_VTABLE_CAPABILITY(CAP_SYS_BOOT)),
+ SD_BUS_METHOD("KExec",
+ NULL,
+ NULL,
+ method_kexec,
+ SD_BUS_VTABLE_CAPABILITY(CAP_SYS_BOOT)),
+ SD_BUS_METHOD_WITH_NAMES("SwitchRoot",
+ "ss",
+ SD_BUS_PARAM(new_root)
+ SD_BUS_PARAM(init),
+ NULL,,
+ method_switch_root,
+ SD_BUS_VTABLE_CAPABILITY(CAP_SYS_BOOT)),
+ SD_BUS_METHOD_WITH_NAMES("SetEnvironment",
+ "as",
+ SD_BUS_PARAM(assignments),
+ NULL,,
+ method_set_environment,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("UnsetEnvironment",
+ "as",
+ SD_BUS_PARAM(names),
+ NULL,,
+ method_unset_environment,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("UnsetAndSetEnvironment",
+ "asas",
+ SD_BUS_PARAM(names)
+ SD_BUS_PARAM(assignments),
+ NULL,,
+ method_unset_and_set_environment,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ListUnitFiles",
+ NULL,,
+ "a(ss)",
+ SD_BUS_PARAM(unit_files),
+ method_list_unit_files,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ListUnitFilesByPatterns",
+ "asas",
+ SD_BUS_PARAM(states)
+ SD_BUS_PARAM(patterns),
+ "a(ss)",
+ SD_BUS_PARAM(unit_files),
+ method_list_unit_files_by_patterns,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetUnitFileState",
+ "s",
+ SD_BUS_PARAM(file),
+ "s",
+ SD_BUS_PARAM(state),
+ method_get_unit_file_state,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("EnableUnitFiles",
+ "asbb",
+ SD_BUS_PARAM(files)
+ SD_BUS_PARAM(runtime)
+ SD_BUS_PARAM(force),
+ "ba(sss)",
+ SD_BUS_PARAM(carries_install_info)
+ SD_BUS_PARAM(changes),
+ method_enable_unit_files,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("DisableUnitFiles",
+ "asb",
+ SD_BUS_PARAM(files)
+ SD_BUS_PARAM(runtime),
+ "a(sss)",
+ SD_BUS_PARAM(changes),
+ method_disable_unit_files,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("EnableUnitFilesWithFlags",
+ "ast",
+ SD_BUS_PARAM(files)
+ SD_BUS_PARAM(flags),
+ "ba(sss)",
+ SD_BUS_PARAM(carries_install_info)
+ SD_BUS_PARAM(changes),
+ method_enable_unit_files_with_flags,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("DisableUnitFilesWithFlags",
+ "ast",
+ SD_BUS_PARAM(files)
+ SD_BUS_PARAM(flags),
+ "a(sss)",
+ SD_BUS_PARAM(changes),
+ method_disable_unit_files_with_flags,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ReenableUnitFiles",
+ "asbb",
+ SD_BUS_PARAM(files)
+ SD_BUS_PARAM(runtime)
+ SD_BUS_PARAM(force),
+ "ba(sss)",
+ SD_BUS_PARAM(carries_install_info)
+ SD_BUS_PARAM(changes),
+ method_reenable_unit_files,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("LinkUnitFiles",
+ "asbb",
+ SD_BUS_PARAM(files)
+ SD_BUS_PARAM(runtime)
+ SD_BUS_PARAM(force),
+ "a(sss)",
+ SD_BUS_PARAM(changes),
+ method_link_unit_files,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("PresetUnitFiles",
+ "asbb",
+ SD_BUS_PARAM(files)
+ SD_BUS_PARAM(runtime)
+ SD_BUS_PARAM(force),
+ "ba(sss)",
+ SD_BUS_PARAM(carries_install_info)
+ SD_BUS_PARAM(changes),
+ method_preset_unit_files,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("PresetUnitFilesWithMode",
+ "assbb",
+ SD_BUS_PARAM(files)
+ SD_BUS_PARAM(mode)
+ SD_BUS_PARAM(runtime)
+ SD_BUS_PARAM(force),
+ "ba(sss)",
+ SD_BUS_PARAM(carries_install_info)
+ SD_BUS_PARAM(changes),
+ method_preset_unit_files_with_mode,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("MaskUnitFiles",
+ "asbb",
+ SD_BUS_PARAM(files)
+ SD_BUS_PARAM(runtime)
+ SD_BUS_PARAM(force),
+ "a(sss)",
+ SD_BUS_PARAM(changes),
+ method_mask_unit_files,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("UnmaskUnitFiles",
+ "asb",
+ SD_BUS_PARAM(files)
+ SD_BUS_PARAM(runtime),
+ "a(sss)",
+ SD_BUS_PARAM(changes),
+ method_unmask_unit_files,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("RevertUnitFiles",
+ "as",
+ SD_BUS_PARAM(files),
+ "a(sss)",
+ SD_BUS_PARAM(changes),
+ method_revert_unit_files,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetDefaultTarget",
+ "sb",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(force),
+ "a(sss)",
+ SD_BUS_PARAM(changes),
+ method_set_default_target,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetDefaultTarget",
+ NULL,,
+ "s",
+ SD_BUS_PARAM(name),
+ method_get_default_target,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("PresetAllUnitFiles",
+ "sbb",
+ SD_BUS_PARAM(mode)
+ SD_BUS_PARAM(runtime)
+ SD_BUS_PARAM(force),
+ "a(sss)",
+ SD_BUS_PARAM(changes),
+ method_preset_all_unit_files,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("AddDependencyUnitFiles",
+ "asssbb",
+ SD_BUS_PARAM(files)
+ SD_BUS_PARAM(target)
+ SD_BUS_PARAM(type)
+ SD_BUS_PARAM(runtime)
+ SD_BUS_PARAM(force),
+ "a(sss)",
+ SD_BUS_PARAM(changes),
+ method_add_dependency_unit_files,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetUnitFileLinks",
+ "sb",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(runtime),
+ "as",
+ SD_BUS_PARAM(links),
+ method_get_unit_file_links,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetExitCode",
+ "y",
+ SD_BUS_PARAM(number),
+ NULL,,
+ method_set_exit_code,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("LookupDynamicUserByName",
+ "s",
+ SD_BUS_PARAM(name),
+ "u",
+ SD_BUS_PARAM(uid),
+ method_lookup_dynamic_user_by_name,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("LookupDynamicUserByUID",
+ "u",
+ SD_BUS_PARAM(uid),
+ "s",
+ SD_BUS_PARAM(name),
+ method_lookup_dynamic_user_by_uid,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetDynamicUsers",
+ NULL,,
+ "a(us)",
+ SD_BUS_PARAM(users),
+ method_get_dynamic_users,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_SIGNAL_WITH_NAMES("UnitNew",
+ "so",
+ SD_BUS_PARAM(id)
+ SD_BUS_PARAM(unit),
+ 0),
+ SD_BUS_SIGNAL_WITH_NAMES("UnitRemoved",
+ "so",
+ SD_BUS_PARAM(id)
+ SD_BUS_PARAM(unit),
+ 0),
+ SD_BUS_SIGNAL_WITH_NAMES("JobNew",
+ "uos",
+ SD_BUS_PARAM(id)
+ SD_BUS_PARAM(job)
+ SD_BUS_PARAM(unit),
+ 0),
+ SD_BUS_SIGNAL_WITH_NAMES("JobRemoved",
+ "uoss",
+ SD_BUS_PARAM(id)
+ SD_BUS_PARAM(job)
+ SD_BUS_PARAM(unit)
+ SD_BUS_PARAM(result),
+ 0),
+ SD_BUS_SIGNAL_WITH_NAMES("StartupFinished",
+ "tttttt",
+ SD_BUS_PARAM(firmware)
+ SD_BUS_PARAM(loader)
+ SD_BUS_PARAM(kernel)
+ SD_BUS_PARAM(initrd)
+ SD_BUS_PARAM(userspace)
+ SD_BUS_PARAM(total),
+ 0),
+ SD_BUS_SIGNAL("UnitFilesChanged", NULL, 0),
+ SD_BUS_SIGNAL_WITH_NAMES("Reloading",
+ "b",
+ SD_BUS_PARAM(active),
+ 0),
+
+ SD_BUS_VTABLE_END
+};
+
+const sd_bus_vtable bus_manager_log_control_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ /* We define a private version of this interface here, since we want slightly different
+ * implementations for the setters. We'll still use the generic getters however, and we share the
+ * setters with the implementations for the Manager interface above (which pre-dates the generic
+ * service API interface). */
+
+ SD_BUS_WRITABLE_PROPERTY("LogLevel", "s", bus_property_get_log_level, property_set_log_level, 0, 0),
+ SD_BUS_WRITABLE_PROPERTY("LogTarget", "s", bus_property_get_log_target, property_set_log_target, 0, 0),
+ SD_BUS_PROPERTY("SyslogIdentifier", "s", bus_property_get_syslog_identifier, 0, 0),
+
+ SD_BUS_VTABLE_END,
+};
+
+static int send_finished(sd_bus *bus, void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *message = NULL;
+ usec_t *times = userdata;
+ int r;
+
+ assert(bus);
+ assert(times);
+
+ r = sd_bus_message_new_signal(bus, &message, "/org/freedesktop/systemd1", "org.freedesktop.systemd1.Manager", "StartupFinished");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(message, "tttttt", times[0], times[1], times[2], times[3], times[4], times[5]);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(bus, message, NULL);
+}
+
+void bus_manager_send_finished(
+ Manager *m,
+ usec_t firmware_usec,
+ usec_t loader_usec,
+ usec_t kernel_usec,
+ usec_t initrd_usec,
+ usec_t userspace_usec,
+ usec_t total_usec) {
+
+ int r;
+
+ assert(m);
+
+ r = bus_foreach_bus(
+ m,
+ NULL,
+ send_finished,
+ (usec_t[6]) {
+ firmware_usec,
+ loader_usec,
+ kernel_usec,
+ initrd_usec,
+ userspace_usec,
+ total_usec
+ });
+ if (r < 0)
+ log_debug_errno(r, "Failed to send finished signal: %m");
+}
+
+static int send_reloading(sd_bus *bus, void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *message = NULL;
+ int r;
+
+ assert(bus);
+
+ r = sd_bus_message_new_signal(bus, &message, "/org/freedesktop/systemd1", "org.freedesktop.systemd1.Manager", "Reloading");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(message, "b", PTR_TO_INT(userdata));
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(bus, message, NULL);
+}
+
+void bus_manager_send_reloading(Manager *m, bool active) {
+ int r;
+
+ assert(m);
+
+ r = bus_foreach_bus(m, NULL, send_reloading, INT_TO_PTR(active));
+ if (r < 0)
+ log_debug_errno(r, "Failed to send reloading signal: %m");
+}
+
+static int send_changed_signal(sd_bus *bus, void *userdata) {
+ assert(bus);
+
+ return sd_bus_emit_properties_changed_strv(bus,
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ NULL);
+}
+
+void bus_manager_send_change_signal(Manager *m) {
+ int r;
+
+ assert(m);
+
+ r = bus_foreach_bus(m, NULL, send_changed_signal, NULL);
+ if (r < 0)
+ log_debug_errno(r, "Failed to send manager change signal: %m");
+}
diff --git a/src/core/dbus-manager.h b/src/core/dbus-manager.h
new file mode 100644
index 0000000..f3862fc
--- /dev/null
+++ b/src/core/dbus-manager.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus-vtable.h"
+
+#include "manager.h"
+
+extern const sd_bus_vtable bus_manager_vtable[];
+extern const sd_bus_vtable bus_manager_log_control_vtable[];
+
+void bus_manager_send_finished(Manager *m, usec_t firmware_usec, usec_t loader_usec, usec_t kernel_usec, usec_t initrd_usec, usec_t userspace_usec, usec_t total_usec);
+void bus_manager_send_reloading(Manager *m, bool active);
+void bus_manager_send_change_signal(Manager *m);
+
+int verify_run_space_and_log(const char *message);
+
+int bus_property_get_oom_policy(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error);
diff --git a/src/core/dbus-mount.c b/src/core/dbus-mount.c
new file mode 100644
index 0000000..73702b1
--- /dev/null
+++ b/src/core/dbus-mount.c
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-get-properties.h"
+#include "dbus-cgroup.h"
+#include "dbus-execute.h"
+#include "dbus-kill.h"
+#include "dbus-mount.h"
+#include "dbus-util.h"
+#include "mount.h"
+#include "string-util.h"
+#include "unit.h"
+
+static const char *mount_get_what(const Mount *m) {
+ if (m->from_proc_self_mountinfo && m->parameters_proc_self_mountinfo.what)
+ return m->parameters_proc_self_mountinfo.what;
+ if (m->from_fragment && m->parameters_fragment.what)
+ return m->parameters_fragment.what;
+ return NULL;
+}
+
+static const char *mount_get_options(const Mount *m) {
+ if (m->from_proc_self_mountinfo && m->parameters_proc_self_mountinfo.options)
+ return m->parameters_proc_self_mountinfo.options;
+ if (m->from_fragment && m->parameters_fragment.options)
+ return m->parameters_fragment.options;
+ return NULL;
+}
+
+static const char *mount_get_fstype(const Mount *m) {
+ if (m->from_proc_self_mountinfo && m->parameters_proc_self_mountinfo.fstype)
+ return m->parameters_proc_self_mountinfo.fstype;
+ else if (m->from_fragment && m->parameters_fragment.fstype)
+ return m->parameters_fragment.fstype;
+ return NULL;
+}
+
+static BUS_DEFINE_PROPERTY_GET(property_get_what, "s", Mount, mount_get_what);
+static BUS_DEFINE_PROPERTY_GET(property_get_options, "s", Mount, mount_get_options);
+static BUS_DEFINE_PROPERTY_GET(property_get_type, "s", Mount, mount_get_fstype);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, mount_result, MountResult);
+
+const sd_bus_vtable bus_mount_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Where", "s", NULL, offsetof(Mount, where), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("What", "s", property_get_what, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Options","s", property_get_options, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Type", "s", property_get_type, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("TimeoutUSec", "t", bus_property_get_usec, offsetof(Mount, timeout_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Mount, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("DirectoryMode", "u", bus_property_get_mode, offsetof(Mount, directory_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SloppyOptions", "b", bus_property_get_bool, offsetof(Mount, sloppy_options), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LazyUnmount", "b", bus_property_get_bool, offsetof(Mount, lazy_unmount), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ForceUnmount", "b", bus_property_get_bool, offsetof(Mount, force_unmount), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ReadWriteOnly", "b", bus_property_get_bool, offsetof(Mount, read_write_only), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Mount, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("UID", "u", bus_property_get_uid, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("GID", "u", bus_property_get_gid, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_EXEC_COMMAND_VTABLE("ExecMount", offsetof(Mount, exec_command[MOUNT_EXEC_MOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_VTABLE("ExecUnmount", offsetof(Mount, exec_command[MOUNT_EXEC_UNMOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_VTABLE("ExecRemount", offsetof(Mount, exec_command[MOUNT_EXEC_REMOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_VTABLE_END
+};
+
+static int bus_mount_set_transient_property(
+ Mount *m,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Unit *u = UNIT(m);
+
+ assert(m);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "Where"))
+ return bus_set_transient_path(u, name, &m->where, message, flags, error);
+
+ if (streq(name, "What"))
+ return bus_set_transient_string(u, name, &m->parameters_fragment.what, message, flags, error);
+
+ if (streq(name, "Options"))
+ return bus_set_transient_string(u, name, &m->parameters_fragment.options, message, flags, error);
+
+ if (streq(name, "Type"))
+ return bus_set_transient_string(u, name, &m->parameters_fragment.fstype, message, flags, error);
+
+ if (streq(name, "TimeoutUSec"))
+ return bus_set_transient_usec_fix_0(u, name, &m->timeout_usec, message, flags, error);
+
+ if (streq(name, "DirectoryMode"))
+ return bus_set_transient_mode_t(u, name, &m->directory_mode, message, flags, error);
+
+ if (streq(name, "SloppyOptions"))
+ return bus_set_transient_bool(u, name, &m->sloppy_options, message, flags, error);
+
+ if (streq(name, "LazyUnmount"))
+ return bus_set_transient_bool(u, name, &m->lazy_unmount, message, flags, error);
+
+ if (streq(name, "ForceUnmount"))
+ return bus_set_transient_bool(u, name, &m->force_unmount, message, flags, error);
+
+ if (streq(name, "ReadWriteOnly"))
+ return bus_set_transient_bool(u, name, &m->read_write_only, message, flags, error);
+
+ return 0;
+}
+
+int bus_mount_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Mount *m = MOUNT(u);
+ int r;
+
+ assert(m);
+ assert(name);
+ assert(message);
+
+ r = bus_cgroup_set_property(u, &m->cgroup_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ if (u->transient && u->load_state == UNIT_STUB) {
+ /* This is a transient unit, let's load a little more */
+
+ r = bus_mount_set_transient_property(m, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ r = bus_exec_context_set_transient_property(u, &m->exec_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ r = bus_kill_context_set_transient_property(u, &m->kill_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int bus_mount_commit_properties(Unit *u) {
+ assert(u);
+
+ unit_realize_cgroup(u);
+
+ return 0;
+}
diff --git a/src/core/dbus-mount.h b/src/core/dbus-mount.h
new file mode 100644
index 0000000..5a848d3
--- /dev/null
+++ b/src/core/dbus-mount.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "unit.h"
+
+extern const sd_bus_vtable bus_mount_vtable[];
+
+int bus_mount_set_property(Unit *u, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_mount_commit_properties(Unit *u);
diff --git a/src/core/dbus-path.c b/src/core/dbus-path.c
new file mode 100644
index 0000000..14e77d7
--- /dev/null
+++ b/src/core/dbus-path.c
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-get-properties.h"
+#include "dbus-path.h"
+#include "dbus-util.h"
+#include "list.h"
+#include "path.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "unit.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, path_result, PathResult);
+
+static int property_get_paths(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Path *p = userdata;
+ PathSpec *k;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(p);
+
+ r = sd_bus_message_open_container(reply, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(spec, k, p->specs) {
+ r = sd_bus_message_append(reply, "(ss)", path_type_to_string(k->type), k->path);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+const sd_bus_vtable bus_path_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Unit", "s", bus_property_get_triggered_unit, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Paths", "a(ss)", property_get_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MakeDirectory", "b", bus_property_get_bool, offsetof(Path, make_directory), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DirectoryMode", "u", bus_property_get_mode, offsetof(Path, directory_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Path, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_VTABLE_END
+};
+
+static int bus_path_set_transient_property(
+ Path *p,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Unit *u = UNIT(p);
+ int r;
+
+ assert(p);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "MakeDirectory"))
+ return bus_set_transient_bool(u, name, &p->make_directory, message, flags, error);
+
+ if (streq(name, "DirectoryMode"))
+ return bus_set_transient_mode_t(u, name, &p->directory_mode, message, flags, error);
+
+ if (streq(name, "Paths")) {
+ const char *type_name, *path;
+ bool empty = true;
+
+ r = sd_bus_message_enter_container(message, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(ss)", &type_name, &path)) > 0) {
+ PathType t;
+
+ t = path_type_from_string(type_name);
+ if (t < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown path type: %s", type_name);
+
+ if (isempty(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path in %s is empty", type_name);
+
+ if (!path_is_absolute(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path in %s is not absolute: %s", type_name, path);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *k;
+ PathSpec *s;
+
+ k = strdup(path);
+ if (!k)
+ return -ENOMEM;
+
+ path_simplify(k, false);
+
+ s = new0(PathSpec, 1);
+ if (!s)
+ return -ENOMEM;
+
+ s->unit = u;
+ s->path = TAKE_PTR(k);
+ s->type = t;
+ s->inotify_fd = -1;
+
+ LIST_PREPEND(spec, p->specs, s);
+
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "%s=%s", type_name, path);
+ }
+
+ empty = false;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags) && empty) {
+ path_free_specs(p);
+ unit_write_settingf(u, flags, name, "PathExists=");
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int bus_path_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags mode,
+ sd_bus_error *error) {
+
+ Path *p = PATH(u);
+
+ assert(p);
+ assert(name);
+ assert(message);
+
+ if (u->transient && u->load_state == UNIT_STUB)
+ return bus_path_set_transient_property(p, name, message, mode, error);
+
+ return 0;
+}
diff --git a/src/core/dbus-path.h b/src/core/dbus-path.h
new file mode 100644
index 0000000..b5018b0
--- /dev/null
+++ b/src/core/dbus-path.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "unit.h"
+
+extern const sd_bus_vtable bus_path_vtable[];
+
+int bus_path_set_property(Unit *u, const char *name, sd_bus_message *i, UnitWriteFlags flags, sd_bus_error *error);
diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c
new file mode 100644
index 0000000..1bcb483
--- /dev/null
+++ b/src/core/dbus-scope.c
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-get-properties.h"
+#include "dbus-cgroup.h"
+#include "dbus-kill.h"
+#include "dbus-scope.h"
+#include "dbus-unit.h"
+#include "dbus-util.h"
+#include "dbus.h"
+#include "scope.h"
+#include "selinux-access.h"
+#include "unit.h"
+
+int bus_scope_method_abandon(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Scope *s = userdata;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = mac_selinux_unit_access_check(UNIT(s), message, "stop", error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_units_async(UNIT(s)->manager, message, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = scope_abandon(s);
+ if (r == -ESTALE)
+ return sd_bus_error_setf(error, BUS_ERROR_SCOPE_NOT_RUNNING, "Scope %s is not running, cannot abandon.", UNIT(s)->id);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, scope_result, ScopeResult);
+
+const sd_bus_vtable bus_scope_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Controller", "s", NULL, offsetof(Scope, controller), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("TimeoutStopUSec", "t", bus_property_get_usec, offsetof(Scope, timeout_stop_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Scope, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("RuntimeMaxUSec", "t", bus_property_get_usec, offsetof(Scope, runtime_max_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_SIGNAL("RequestStop", NULL, 0),
+ SD_BUS_METHOD("Abandon", NULL, NULL, bus_scope_method_abandon, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_VTABLE_END
+};
+
+static int bus_scope_set_transient_property(
+ Scope *s,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Unit *u = UNIT(s);
+ int r;
+
+ assert(s);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "TimeoutStopUSec"))
+ return bus_set_transient_usec(u, name, &s->timeout_stop_usec, message, flags, error);
+
+ if (streq(name, "RuntimeMaxUSec"))
+ return bus_set_transient_usec(u, name, &s->runtime_max_usec, message, flags, error);
+
+ if (streq(name, "PIDs")) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ unsigned n = 0;
+
+ r = sd_bus_message_enter_container(message, 'a', "u");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ uint32_t upid;
+ pid_t pid;
+
+ r = sd_bus_message_read(message, "u", &upid);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (upid == 0) {
+ if (!creds) {
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+ } else
+ pid = (uid_t) upid;
+
+ r = unit_pid_attachable(u, pid, error);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = unit_watch_pid(u, pid, false);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ n++;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (n <= 0)
+ return -EINVAL;
+
+ return 1;
+
+ } else if (streq(name, "Controller")) {
+ const char *controller;
+
+ /* We can't support direct connections with this, as direct connections know no service or unique name
+ * concept, but the Controller field stores exactly that. */
+ if (sd_bus_message_get_bus(message) != u->manager->api_bus)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Sorry, Controller= logic only supported via the bus.");
+
+ r = sd_bus_message_read(message, "s", &controller);
+ if (r < 0)
+ return r;
+
+ if (!isempty(controller) && !sd_bus_service_name_is_valid(controller))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Controller '%s' is not a valid bus name.", controller);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = free_and_strdup(&s->controller, empty_to_null(controller));
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int bus_scope_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Scope *s = SCOPE(u);
+ int r;
+
+ assert(s);
+ assert(name);
+ assert(message);
+
+ r = bus_cgroup_set_property(u, &s->cgroup_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ if (u->load_state == UNIT_STUB) {
+ /* While we are created we still accept PIDs */
+
+ r = bus_scope_set_transient_property(s, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ r = bus_kill_context_set_transient_property(u, &s->kill_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int bus_scope_commit_properties(Unit *u) {
+ assert(u);
+
+ unit_realize_cgroup(u);
+
+ return 0;
+}
+
+int bus_scope_send_request_stop(Scope *s) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(s);
+
+ if (!s->controller)
+ return 0;
+
+ p = unit_dbus_path(UNIT(s));
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_new_signal(
+ UNIT(s)->manager->api_bus,
+ &m,
+ p,
+ "org.freedesktop.systemd1.Scope",
+ "RequestStop");
+ if (r < 0)
+ return r;
+
+ return sd_bus_send_to(UNIT(s)->manager->api_bus, m, s->controller, NULL);
+}
+
+static int on_controller_gone(sd_bus_track *track, void *userdata) {
+ Scope *s = userdata;
+
+ assert(track);
+
+ if (s->controller) {
+ log_unit_debug(UNIT(s), "Controller %s disappeared from bus.", s->controller);
+ unit_add_to_dbus_queue(UNIT(s));
+ s->controller = mfree(s->controller);
+ }
+
+ s->controller_track = sd_bus_track_unref(s->controller_track);
+
+ return 0;
+}
+
+int bus_scope_track_controller(Scope *s) {
+ int r;
+
+ assert(s);
+
+ if (!s->controller || s->controller_track)
+ return 0;
+
+ r = sd_bus_track_new(UNIT(s)->manager->api_bus, &s->controller_track, on_controller_gone, s);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_track_add_name(s->controller_track, s->controller);
+ if (r < 0) {
+ s->controller_track = sd_bus_track_unref(s->controller_track);
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/core/dbus-scope.h b/src/core/dbus-scope.h
new file mode 100644
index 0000000..8f1bc02
--- /dev/null
+++ b/src/core/dbus-scope.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "scope.h"
+#include "unit.h"
+
+extern const sd_bus_vtable bus_scope_vtable[];
+
+int bus_scope_set_property(Unit *u, const char *name, sd_bus_message *i, UnitWriteFlags flags, sd_bus_error *error);
+int bus_scope_commit_properties(Unit *u);
+
+int bus_scope_send_request_stop(Scope *s);
+
+int bus_scope_method_abandon(sd_bus_message *message, void *userdata, sd_bus_error *error);
+
+int bus_scope_track_controller(Scope *s);
diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c
new file mode 100644
index 0000000..64f9d4a
--- /dev/null
+++ b/src/core/dbus-service.c
@@ -0,0 +1,462 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+
+#include "alloc-util.h"
+#include "async.h"
+#include "bus-get-properties.h"
+#include "dbus-cgroup.h"
+#include "dbus-execute.h"
+#include "dbus-kill.h"
+#include "dbus-manager.h"
+#include "dbus-service.h"
+#include "dbus-util.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "service.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, service_type, ServiceType);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, service_result, ServiceResult);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_restart, service_restart, ServiceRestart);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_notify_access, notify_access, NotifyAccess);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_emergency_action, emergency_action, EmergencyAction);
+static BUS_DEFINE_PROPERTY_GET(property_get_timeout_abort_usec, "t", Service, service_timeout_abort_usec);
+static BUS_DEFINE_PROPERTY_GET(property_get_watchdog_usec, "t", Service, service_get_watchdog_usec);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_timeout_failure_mode, service_timeout_failure_mode, ServiceTimeoutFailureMode);
+
+static int property_get_exit_status_set(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ const ExitStatusSet *status_set = userdata;
+ unsigned n;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(status_set);
+
+ r = sd_bus_message_open_container(reply, 'r', "aiai");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "i");
+ if (r < 0)
+ return r;
+
+ BITMAP_FOREACH(n, &status_set->status) {
+ assert(n < 256);
+
+ r = sd_bus_message_append_basic(reply, 'i', &n);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "i");
+ if (r < 0)
+ return r;
+
+ BITMAP_FOREACH(n, &status_set->signal) {
+ const char *str;
+
+ str = signal_to_string(n);
+ if (!str)
+ continue;
+
+ r = sd_bus_message_append_basic(reply, 'i', &n);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
+const sd_bus_vtable bus_service_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Type", "s", property_get_type, offsetof(Service, type), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Restart", "s", property_get_restart, offsetof(Service, restart), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PIDFile", "s", NULL, offsetof(Service, pid_file), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NotifyAccess", "s", property_get_notify_access, offsetof(Service, notify_access), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RestartUSec", "t", bus_property_get_usec, offsetof(Service, restart_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TimeoutStartUSec", "t", bus_property_get_usec, offsetof(Service, timeout_start_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TimeoutStopUSec", "t", bus_property_get_usec, offsetof(Service, timeout_stop_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TimeoutAbortUSec", "t", property_get_timeout_abort_usec, 0, 0),
+ SD_BUS_PROPERTY("TimeoutStartFailureMode", "s", property_get_timeout_failure_mode, offsetof(Service, timeout_start_failure_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TimeoutStopFailureMode", "s", property_get_timeout_failure_mode, offsetof(Service, timeout_stop_failure_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RuntimeMaxUSec", "t", bus_property_get_usec, offsetof(Service, runtime_max_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("WatchdogUSec", "t", property_get_watchdog_usec, 0, 0),
+ BUS_PROPERTY_DUAL_TIMESTAMP("WatchdogTimestamp", offsetof(Service, watchdog_timestamp), 0),
+ SD_BUS_PROPERTY("PermissionsStartOnly", "b", bus_property_get_bool, offsetof(Service, permissions_start_only), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), /* 😷 deprecated */
+ SD_BUS_PROPERTY("RootDirectoryStartOnly", "b", bus_property_get_bool, offsetof(Service, root_directory_start_only), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RemainAfterExit", "b", bus_property_get_bool, offsetof(Service, remain_after_exit), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("GuessMainPID", "b", bus_property_get_bool, offsetof(Service, guess_main_pid), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RestartPreventExitStatus", "(aiai)", property_get_exit_status_set, offsetof(Service, restart_prevent_status), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RestartForceExitStatus", "(aiai)", property_get_exit_status_set, offsetof(Service, restart_force_status), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SuccessExitStatus", "(aiai)", property_get_exit_status_set, offsetof(Service, success_status), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MainPID", "u", bus_property_get_pid, offsetof(Service, main_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Service, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("BusName", "s", NULL, offsetof(Service, bus_name), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FileDescriptorStoreMax", "u", bus_property_get_unsigned, offsetof(Service, n_fd_store_max), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NFileDescriptorStore", "u", bus_property_get_unsigned, offsetof(Service, n_fd_store), 0),
+ SD_BUS_PROPERTY("StatusText", "s", NULL, offsetof(Service, status_text), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("StatusErrno", "i", bus_property_get_int, offsetof(Service, status_errno), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Service, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("ReloadResult", "s", property_get_result, offsetof(Service, reload_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("CleanResult", "s", property_get_result, offsetof(Service, clean_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("USBFunctionDescriptors", "s", NULL, offsetof(Service, usb_function_descriptors), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("USBFunctionStrings", "s", NULL, offsetof(Service, usb_function_strings), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("UID", "u", bus_property_get_uid, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("GID", "u", bus_property_get_gid, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("NRestarts", "u", bus_property_get_unsigned, offsetof(Service, n_restarts), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("OOMPolicy", "s", bus_property_get_oom_policy, offsetof(Service, oom_policy), SD_BUS_VTABLE_PROPERTY_CONST),
+
+ BUS_EXEC_STATUS_VTABLE("ExecMain", offsetof(Service, main_exec_status), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecCondition", offsetof(Service, exec_command[SERVICE_EXEC_CONDITION]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_EX_COMMAND_LIST_VTABLE("ExecConditionEx", offsetof(Service, exec_command[SERVICE_EXEC_CONDITION]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_EX_COMMAND_LIST_VTABLE("ExecStartPreEx", offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecStart", offsetof(Service, exec_command[SERVICE_EXEC_START]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_EX_COMMAND_LIST_VTABLE("ExecStartEx", offsetof(Service, exec_command[SERVICE_EXEC_START]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPost", offsetof(Service, exec_command[SERVICE_EXEC_START_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_EX_COMMAND_LIST_VTABLE("ExecStartPostEx", offsetof(Service, exec_command[SERVICE_EXEC_START_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecReload", offsetof(Service, exec_command[SERVICE_EXEC_RELOAD]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_EX_COMMAND_LIST_VTABLE("ExecReloadEx", offsetof(Service, exec_command[SERVICE_EXEC_RELOAD]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecStop", offsetof(Service, exec_command[SERVICE_EXEC_STOP]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_EX_COMMAND_LIST_VTABLE("ExecStopEx", offsetof(Service, exec_command[SERVICE_EXEC_STOP]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPost", offsetof(Service, exec_command[SERVICE_EXEC_STOP_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_EX_COMMAND_LIST_VTABLE("ExecStopPostEx", offsetof(Service, exec_command[SERVICE_EXEC_STOP_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+
+ /* The following four are obsolete, and thus marked hidden here. They moved into the Unit interface */
+ SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_ratelimit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_ratelimit.burst), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("FailureAction", "s", property_get_emergency_action, offsetof(Unit, failure_action), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_VTABLE_END
+};
+
+static int bus_set_transient_exit_status(
+ Unit *u,
+ const char *name,
+ ExitStatusSet *status_set,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ const int32_t *status, *signal;
+ size_t n_status, n_signal, i;
+ int r;
+
+ r = sd_bus_message_enter_container(message, 'r', "aiai");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_array(message, 'i', (const void **) &status, &n_status);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_array(message, 'i', (const void **) &signal, &n_signal);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ n_status /= sizeof(int32_t);
+ n_signal /= sizeof(int32_t);
+
+ if (n_status == 0 && n_signal == 0 && !UNIT_WRITE_FLAGS_NOOP(flags)) {
+ exit_status_set_free(status_set);
+ unit_write_settingf(u, flags, name, "%s=", name);
+ return 1;
+ }
+
+ for (i = 0; i < n_status; i++) {
+ if (status[i] < 0 || status[i] > 255)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid status code in %s: %"PRIi32, name, status[i]);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = bitmap_set(&status_set->status, status[i]);
+ if (r < 0)
+ return r;
+
+ unit_write_settingf(u, flags, name, "%s=%"PRIi32, name, status[i]);
+ }
+ }
+
+ for (i = 0; i < n_signal; i++) {
+ const char *str;
+
+ str = signal_to_string((int) signal[i]);
+ if (!str)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid signal in %s: %"PRIi32, name, signal[i]);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = bitmap_set(&status_set->signal, signal[i]);
+ if (r < 0)
+ return r;
+
+ unit_write_settingf(u, flags, name, "%s=%s", name, str);
+ }
+ }
+
+ return 1;
+}
+
+static int bus_set_transient_std_fd(
+ Unit *u,
+ const char *name,
+ int *p,
+ bool *b,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ int fd, r;
+
+ assert(p);
+ assert(b);
+
+ r = sd_bus_message_read(message, "h", &fd);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ int copy;
+
+ copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (copy < 0)
+ return -errno;
+
+ asynchronous_close(*p);
+ *p = copy;
+ *b = true;
+ }
+
+ return 1;
+}
+static BUS_DEFINE_SET_TRANSIENT_PARSE(notify_access, NotifyAccess, notify_access_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(service_type, ServiceType, service_type_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(service_restart, ServiceRestart, service_restart_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(oom_policy, OOMPolicy, oom_policy_from_string);
+static BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(bus_name, sd_bus_service_name_is_valid);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(timeout_failure_mode, ServiceTimeoutFailureMode, service_timeout_failure_mode_from_string);
+
+static int bus_service_set_transient_property(
+ Service *s,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Unit *u = UNIT(s);
+ ServiceExecCommand ci;
+ int r;
+
+ assert(s);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "PermissionsStartOnly"))
+ return bus_set_transient_bool(u, name, &s->permissions_start_only, message, flags, error);
+
+ if (streq(name, "RootDirectoryStartOnly"))
+ return bus_set_transient_bool(u, name, &s->root_directory_start_only, message, flags, error);
+
+ if (streq(name, "RemainAfterExit"))
+ return bus_set_transient_bool(u, name, &s->remain_after_exit, message, flags, error);
+
+ if (streq(name, "GuessMainPID"))
+ return bus_set_transient_bool(u, name, &s->guess_main_pid, message, flags, error);
+
+ if (streq(name, "Type"))
+ return bus_set_transient_service_type(u, name, &s->type, message, flags, error);
+
+ if (streq(name, "OOMPolicy"))
+ return bus_set_transient_oom_policy(u, name, &s->oom_policy, message, flags, error);
+
+ if (streq(name, "RestartUSec"))
+ return bus_set_transient_usec(u, name, &s->restart_usec, message, flags, error);
+
+ if (streq(name, "TimeoutStartUSec")) {
+ r = bus_set_transient_usec(u, name, &s->timeout_start_usec, message, flags, error);
+ if (r >= 0 && !UNIT_WRITE_FLAGS_NOOP(flags))
+ s->start_timeout_defined = true;
+
+ return r;
+ }
+
+ if (streq(name, "TimeoutStopUSec"))
+ return bus_set_transient_usec(u, name, &s->timeout_stop_usec, message, flags, error);
+
+ if (streq(name, "TimeoutAbortUSec")) {
+ r = bus_set_transient_usec(u, name, &s->timeout_abort_usec, message, flags, error);
+ if (r >= 0 && !UNIT_WRITE_FLAGS_NOOP(flags))
+ s->timeout_abort_set = true;
+ return r;
+ }
+
+ if (streq(name, "TimeoutStartFailureMode"))
+ return bus_set_transient_timeout_failure_mode(u, name, &s->timeout_start_failure_mode, message, flags, error);
+
+ if (streq(name, "TimeoutStopFailureMode"))
+ return bus_set_transient_timeout_failure_mode(u, name, &s->timeout_stop_failure_mode, message, flags, error);
+
+ if (streq(name, "RuntimeMaxUSec"))
+ return bus_set_transient_usec(u, name, &s->runtime_max_usec, message, flags, error);
+
+ if (streq(name, "WatchdogUSec"))
+ return bus_set_transient_usec(u, name, &s->watchdog_usec, message, flags, error);
+
+ if (streq(name, "FileDescriptorStoreMax"))
+ return bus_set_transient_unsigned(u, name, &s->n_fd_store_max, message, flags, error);
+
+ if (streq(name, "NotifyAccess"))
+ return bus_set_transient_notify_access(u, name, &s->notify_access, message, flags, error);
+
+ if (streq(name, "PIDFile")) {
+ _cleanup_free_ char *n = NULL;
+ const char *v, *e;
+
+ r = sd_bus_message_read(message, "s", &v);
+ if (r < 0)
+ return r;
+
+ if (!isempty(v)) {
+ n = path_make_absolute(v, u->manager->prefix[EXEC_DIRECTORY_RUNTIME]);
+ if (!n)
+ return -ENOMEM;
+
+ path_simplify(n, true);
+
+ if (!path_is_normalized(n))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "PIDFile= path '%s' is not valid", n);
+
+ e = path_startswith(n, "/var/run/");
+ if (e) {
+ char *z;
+
+ z = path_join("/run", e);
+ if (!z)
+ return log_oom();
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags))
+ log_unit_notice(u, "Transient unit's PIDFile= property references path below legacy directory /var/run, updating %s → %s; please update client accordingly.", n, z);
+
+ free_and_replace(n, z);
+ }
+ }
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ free_and_replace(s->pid_file, n);
+ unit_write_settingf(u, flags, name, "%s=%s", name, strempty(s->pid_file));
+ }
+
+ return 1;
+ }
+
+ if (streq(name, "USBFunctionDescriptors"))
+ return bus_set_transient_path(u, name, &s->usb_function_descriptors, message, flags, error);
+
+ if (streq(name, "USBFunctionStrings"))
+ return bus_set_transient_path(u, name, &s->usb_function_strings, message, flags, error);
+
+ if (streq(name, "BusName"))
+ return bus_set_transient_bus_name(u, name, &s->bus_name, message, flags, error);
+
+ if (streq(name, "Restart"))
+ return bus_set_transient_service_restart(u, name, &s->restart, message, flags, error);
+
+ if (streq(name, "RestartPreventExitStatus"))
+ return bus_set_transient_exit_status(u, name, &s->restart_prevent_status, message, flags, error);
+
+ if (streq(name, "RestartForceExitStatus"))
+ return bus_set_transient_exit_status(u, name, &s->restart_force_status, message, flags, error);
+
+ if (streq(name, "SuccessExitStatus"))
+ return bus_set_transient_exit_status(u, name, &s->success_status, message, flags, error);
+
+ ci = service_exec_command_from_string(name);
+ ci = (ci >= 0) ? ci : service_exec_ex_command_from_string(name);
+ if (ci >= 0)
+ return bus_set_transient_exec_command(u, name, &s->exec_command[ci], message, flags, error);
+
+ if (streq(name, "StandardInputFileDescriptor"))
+ return bus_set_transient_std_fd(u, name, &s->stdin_fd, &s->exec_context.stdio_as_fds, message, flags, error);
+
+ if (streq(name, "StandardOutputFileDescriptor"))
+ return bus_set_transient_std_fd(u, name, &s->stdout_fd, &s->exec_context.stdio_as_fds, message, flags, error);
+
+ if (streq(name, "StandardErrorFileDescriptor"))
+ return bus_set_transient_std_fd(u, name, &s->stderr_fd, &s->exec_context.stdio_as_fds, message, flags, error);
+
+ return 0;
+}
+
+int bus_service_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Service *s = SERVICE(u);
+ int r;
+
+ assert(s);
+ assert(name);
+ assert(message);
+
+ r = bus_cgroup_set_property(u, &s->cgroup_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ if (u->transient && u->load_state == UNIT_STUB) {
+ /* This is a transient unit, let's load a little more */
+
+ r = bus_service_set_transient_property(s, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ r = bus_exec_context_set_transient_property(u, &s->exec_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ r = bus_kill_context_set_transient_property(u, &s->kill_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int bus_service_commit_properties(Unit *u) {
+ assert(u);
+
+ unit_realize_cgroup(u);
+
+ return 0;
+}
diff --git a/src/core/dbus-service.h b/src/core/dbus-service.h
new file mode 100644
index 0000000..6931167
--- /dev/null
+++ b/src/core/dbus-service.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "unit.h"
+
+extern const sd_bus_vtable bus_service_vtable[];
+
+int bus_service_set_property(Unit *u, const char *name, sd_bus_message *i, UnitWriteFlags flags, sd_bus_error *error);
+int bus_service_commit_properties(Unit *u);
diff --git a/src/core/dbus-slice.c b/src/core/dbus-slice.c
new file mode 100644
index 0000000..de41d65
--- /dev/null
+++ b/src/core/dbus-slice.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "dbus-cgroup.h"
+#include "dbus-slice.h"
+#include "slice.h"
+#include "unit.h"
+
+const sd_bus_vtable bus_slice_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_VTABLE_END
+};
+
+int bus_slice_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Slice *s = SLICE(u);
+
+ assert(name);
+ assert(u);
+
+ return bus_cgroup_set_property(u, &s->cgroup_context, name, message, flags, error);
+}
+
+int bus_slice_commit_properties(Unit *u) {
+ assert(u);
+
+ unit_realize_cgroup(u);
+
+ return 0;
+}
diff --git a/src/core/dbus-slice.h b/src/core/dbus-slice.h
new file mode 100644
index 0000000..eb71916
--- /dev/null
+++ b/src/core/dbus-slice.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "unit.h"
+
+extern const sd_bus_vtable bus_slice_vtable[];
+
+int bus_slice_set_property(Unit *u, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_slice_commit_properties(Unit *u);
diff --git a/src/core/dbus-socket.c b/src/core/dbus-socket.c
new file mode 100644
index 0000000..2c9da74
--- /dev/null
+++ b/src/core/dbus-socket.c
@@ -0,0 +1,485 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-get-properties.h"
+#include "dbus-cgroup.h"
+#include "dbus-execute.h"
+#include "dbus-kill.h"
+#include "dbus-socket.h"
+#include "dbus-util.h"
+#include "fd-util.h"
+#include "ip-protocol-list.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "socket.h"
+#include "socket-netlink.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "unit.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, socket_result, SocketResult);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_bind_ipv6_only, socket_address_bind_ipv6_only, SocketAddressBindIPv6Only);
+static BUS_DEFINE_PROPERTY_GET(property_get_fdname, "s", Socket, socket_fdname);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_timestamping, socket_timestamping, SocketTimestamping);
+
+static int property_get_listen(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Socket *s = SOCKET(userdata);
+ SocketPort *p;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ r = sd_bus_message_open_container(reply, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(port, p, s->ports) {
+ _cleanup_free_ char *address = NULL;
+ const char *a;
+
+ switch (p->type) {
+ case SOCKET_SOCKET: {
+ r = socket_address_print(&p->address, &address);
+ if (r)
+ return r;
+
+ a = address;
+ break;
+ }
+
+ case SOCKET_SPECIAL:
+ case SOCKET_MQUEUE:
+ case SOCKET_FIFO:
+ case SOCKET_USB_FUNCTION:
+ a = p->path;
+ break;
+
+ default:
+ assert_not_reached("Unknown socket type");
+ }
+
+ r = sd_bus_message_append(reply, "(ss)", socket_port_type_to_string(p), a);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+const sd_bus_vtable bus_socket_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("BindIPv6Only", "s", property_get_bind_ipv6_only, offsetof(Socket, bind_ipv6_only), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Backlog", "u", bus_property_get_unsigned, offsetof(Socket, backlog), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TimeoutUSec", "t", bus_property_get_usec, offsetof(Socket, timeout_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("BindToDevice", "s", NULL, offsetof(Socket, bind_to_device), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SocketUser", "s", NULL, offsetof(Socket, user), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SocketGroup", "s", NULL, offsetof(Socket, group), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SocketMode", "u", bus_property_get_mode, offsetof(Socket, socket_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DirectoryMode", "u", bus_property_get_mode, offsetof(Socket, directory_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Accept", "b", bus_property_get_bool, offsetof(Socket, accept), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FlushPending", "b", bus_property_get_bool, offsetof(Socket, flush_pending), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Writable", "b", bus_property_get_bool, offsetof(Socket, writable), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KeepAlive", "b", bus_property_get_bool, offsetof(Socket, keep_alive), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KeepAliveTimeUSec", "t", bus_property_get_usec, offsetof(Socket, keep_alive_time), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KeepAliveIntervalUSec", "t", bus_property_get_usec, offsetof(Socket, keep_alive_interval), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KeepAliveProbes", "u", bus_property_get_unsigned, offsetof(Socket, keep_alive_cnt), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DeferAcceptUSec" , "t", bus_property_get_usec, offsetof(Socket, defer_accept), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NoDelay", "b", bus_property_get_bool, offsetof(Socket, no_delay), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Priority", "i", bus_property_get_int, offsetof(Socket, priority), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ReceiveBuffer", "t", bus_property_get_size, offsetof(Socket, receive_buffer), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SendBuffer", "t", bus_property_get_size, offsetof(Socket, send_buffer), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("IPTOS", "i", bus_property_get_int, offsetof(Socket, ip_tos), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("IPTTL", "i", bus_property_get_int, offsetof(Socket, ip_ttl), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PipeSize", "t", bus_property_get_size, offsetof(Socket, pipe_size), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FreeBind", "b", bus_property_get_bool, offsetof(Socket, free_bind), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Transparent", "b", bus_property_get_bool, offsetof(Socket, transparent), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Broadcast", "b", bus_property_get_bool, offsetof(Socket, broadcast), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PassCredentials", "b", bus_property_get_bool, offsetof(Socket, pass_cred), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PassSecurity", "b", bus_property_get_bool, offsetof(Socket, pass_sec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PassPacketInfo", "b", bus_property_get_bool, offsetof(Socket, pass_pktinfo), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Timestamping", "s", property_get_timestamping, offsetof(Socket, timestamping), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RemoveOnStop", "b", bus_property_get_bool, offsetof(Socket, remove_on_stop), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Listen", "a(ss)", property_get_listen, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Symlinks", "as", NULL, offsetof(Socket, symlinks), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Mark", "i", bus_property_get_int, offsetof(Socket, mark), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MaxConnections", "u", bus_property_get_unsigned, offsetof(Socket, max_connections), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MaxConnectionsPerSource", "u", bus_property_get_unsigned, offsetof(Socket, max_connections_per_source), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MessageQueueMaxMessages", "x", bus_property_get_long, offsetof(Socket, mq_maxmsg), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MessageQueueMessageSize", "x", bus_property_get_long, offsetof(Socket, mq_msgsize), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TCPCongestion", "s", NULL, offsetof(Socket, tcp_congestion), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ReusePort", "b", bus_property_get_bool, offsetof(Socket, reuse_port), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SmackLabel", "s", NULL, offsetof(Socket, smack), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SmackLabelIPIn", "s", NULL, offsetof(Socket, smack_ip_in), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SmackLabelIPOut", "s", NULL, offsetof(Socket, smack_ip_out), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Socket, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Socket, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("NConnections", "u", bus_property_get_unsigned, offsetof(Socket, n_connections), 0),
+ SD_BUS_PROPERTY("NAccepted", "u", bus_property_get_unsigned, offsetof(Socket, n_accepted), 0),
+ SD_BUS_PROPERTY("NRefused", "u", bus_property_get_unsigned, offsetof(Socket, n_refused), 0),
+ SD_BUS_PROPERTY("FileDescriptorName", "s", property_get_fdname, 0, 0),
+ SD_BUS_PROPERTY("SocketProtocol", "i", bus_property_get_int, offsetof(Socket, socket_protocol), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TriggerLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Socket, trigger_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TriggerLimitBurst", "u", bus_property_get_unsigned, offsetof(Socket, trigger_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("UID", "u", bus_property_get_uid, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("GID", "u", bus_property_get_gid, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Socket, exec_command[SOCKET_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPost", offsetof(Socket, exec_command[SOCKET_EXEC_START_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPre", offsetof(Socket, exec_command[SOCKET_EXEC_STOP_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPost", offsetof(Socket, exec_command[SOCKET_EXEC_STOP_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_VTABLE_END
+};
+
+static bool check_size_t_truncation(uint64_t t) {
+ return (size_t) t == t;
+}
+
+static const char* socket_protocol_to_string(int32_t i) {
+ if (i == IPPROTO_IP)
+ return "";
+
+ if (!IN_SET(i, IPPROTO_UDPLITE, IPPROTO_SCTP))
+ return NULL;
+
+ return ip_protocol_to_name(i);
+}
+
+static BUS_DEFINE_SET_TRANSIENT(int, "i", int32_t, int, "%" PRIi32);
+static BUS_DEFINE_SET_TRANSIENT(message_queue, "x", int64_t, long, "%" PRIi64);
+static BUS_DEFINE_SET_TRANSIENT_IS_VALID(size_t_check_truncation, "t", uint64_t, size_t, "%" PRIu64, check_size_t_truncation);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(bind_ipv6_only, SocketAddressBindIPv6Only, socket_address_bind_ipv6_only_or_bool_from_string);
+static BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(fdname, fdname_is_valid);
+static BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(ifname, ifname_valid);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(ip_tos, "i", int32_t, int, "%" PRIi32, ip_tos_to_string_alloc);
+static BUS_DEFINE_SET_TRANSIENT_TO_STRING(socket_protocol, "i", int32_t, int, "%" PRIi32, socket_protocol_to_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(socket_timestamping, SocketTimestamping, socket_timestamping_from_string_harder);
+
+static int bus_socket_set_transient_property(
+ Socket *s,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ SocketExecCommand ci;
+ Unit *u = UNIT(s);
+ int r;
+
+ assert(s);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "Accept"))
+ return bus_set_transient_bool(u, name, &s->accept, message, flags, error);
+
+ if (streq(name, "FlushPending"))
+ return bus_set_transient_bool(u, name, &s->flush_pending, message, flags, error);
+
+ if (streq(name, "Writable"))
+ return bus_set_transient_bool(u, name, &s->writable, message, flags, error);
+
+ if (streq(name, "KeepAlive"))
+ return bus_set_transient_bool(u, name, &s->keep_alive, message, flags, error);
+
+ if (streq(name, "NoDelay"))
+ return bus_set_transient_bool(u, name, &s->no_delay, message, flags, error);
+
+ if (streq(name, "FreeBind"))
+ return bus_set_transient_bool(u, name, &s->free_bind, message, flags, error);
+
+ if (streq(name, "Transparent"))
+ return bus_set_transient_bool(u, name, &s->transparent, message, flags, error);
+
+ if (streq(name, "Broadcast"))
+ return bus_set_transient_bool(u, name, &s->broadcast, message, flags, error);
+
+ if (streq(name, "PassCredentials"))
+ return bus_set_transient_bool(u, name, &s->pass_cred, message, flags, error);
+
+ if (streq(name, "PassSecurity"))
+ return bus_set_transient_bool(u, name, &s->pass_sec, message, flags, error);
+
+ if (streq(name, "PassPacketInfo"))
+ return bus_set_transient_bool(u, name, &s->pass_pktinfo, message, flags, error);
+
+ if (streq(name, "Timestamping"))
+ return bus_set_transient_socket_timestamping(u, name, &s->timestamping, message, flags, error);
+
+ if (streq(name, "ReusePort"))
+ return bus_set_transient_bool(u, name, &s->reuse_port, message, flags, error);
+
+ if (streq(name, "RemoveOnStop"))
+ return bus_set_transient_bool(u, name, &s->remove_on_stop, message, flags, error);
+
+ if (streq(name, "SELinuxContextFromNet"))
+ return bus_set_transient_bool(u, name, &s->selinux_context_from_net, message, flags, error);
+
+ if (streq(name, "Priority"))
+ return bus_set_transient_int(u, name, &s->priority, message, flags, error);
+
+ if (streq(name, "IPTTL"))
+ return bus_set_transient_int(u, name, &s->ip_ttl, message, flags, error);
+
+ if (streq(name, "Mark"))
+ return bus_set_transient_int(u, name, &s->mark, message, flags, error);
+
+ if (streq(name, "Backlog"))
+ return bus_set_transient_unsigned(u, name, &s->backlog, message, flags, error);
+
+ if (streq(name, "MaxConnections"))
+ return bus_set_transient_unsigned(u, name, &s->max_connections, message, flags, error);
+
+ if (streq(name, "MaxConnectionsPerSource"))
+ return bus_set_transient_unsigned(u, name, &s->max_connections_per_source, message, flags, error);
+
+ if (streq(name, "KeepAliveProbes"))
+ return bus_set_transient_unsigned(u, name, &s->keep_alive_cnt, message, flags, error);
+
+ if (streq(name, "TriggerLimitBurst"))
+ return bus_set_transient_unsigned(u, name, &s->trigger_limit.burst, message, flags, error);
+
+ if (streq(name, "SocketMode"))
+ return bus_set_transient_mode_t(u, name, &s->socket_mode, message, flags, error);
+
+ if (streq(name, "DirectoryMode"))
+ return bus_set_transient_mode_t(u, name, &s->directory_mode, message, flags, error);
+
+ if (streq(name, "MessageQueueMaxMessages"))
+ return bus_set_transient_message_queue(u, name, &s->mq_maxmsg, message, flags, error);
+
+ if (streq(name, "MessageQueueMessageSize"))
+ return bus_set_transient_message_queue(u, name, &s->mq_msgsize, message, flags, error);
+
+ if (streq(name, "TimeoutUSec"))
+ return bus_set_transient_usec_fix_0(u, name, &s->timeout_usec, message, flags, error);
+
+ if (streq(name, "KeepAliveTimeUSec"))
+ return bus_set_transient_usec(u, name, &s->keep_alive_time, message, flags, error);
+
+ if (streq(name, "KeepAliveIntervalUSec"))
+ return bus_set_transient_usec(u, name, &s->keep_alive_interval, message, flags, error);
+
+ if (streq(name, "DeferAcceptUSec"))
+ return bus_set_transient_usec(u, name, &s->defer_accept, message, flags, error);
+
+ if (streq(name, "TriggerLimitIntervalUSec"))
+ return bus_set_transient_usec(u, name, &s->trigger_limit.interval, message, flags, error);
+
+ if (streq(name, "SmackLabel"))
+ return bus_set_transient_string(u, name, &s->smack, message, flags, error);
+
+ if (streq(name, "SmackLabelIPin"))
+ return bus_set_transient_string(u, name, &s->smack_ip_in, message, flags, error);
+
+ if (streq(name, "SmackLabelIPOut"))
+ return bus_set_transient_string(u, name, &s->smack_ip_out, message, flags, error);
+
+ if (streq(name, "TCPCongestion"))
+ return bus_set_transient_string(u, name, &s->tcp_congestion, message, flags, error);
+
+ if (streq(name, "FileDescriptorName"))
+ return bus_set_transient_fdname(u, name, &s->fdname, message, flags, error);
+
+ if (streq(name, "SocketUser"))
+ return bus_set_transient_user_relaxed(u, name, &s->user, message, flags, error);
+
+ if (streq(name, "SocketGroup"))
+ return bus_set_transient_user_relaxed(u, name, &s->group, message, flags, error);
+
+ if (streq(name, "BindIPv6Only"))
+ return bus_set_transient_bind_ipv6_only(u, name, &s->bind_ipv6_only, message, flags, error);
+
+ if (streq(name, "ReceiveBuffer"))
+ return bus_set_transient_size_t_check_truncation(u, name, &s->receive_buffer, message, flags, error);
+
+ if (streq(name, "SendBuffer"))
+ return bus_set_transient_size_t_check_truncation(u, name, &s->send_buffer, message, flags, error);
+
+ if (streq(name, "PipeSize"))
+ return bus_set_transient_size_t_check_truncation(u, name, &s->pipe_size, message, flags, error);
+
+ if (streq(name, "BindToDevice"))
+ return bus_set_transient_ifname(u, name, &s->bind_to_device, message, flags, error);
+
+ if (streq(name, "IPTOS"))
+ return bus_set_transient_ip_tos(u, name, &s->ip_tos, message, flags, error);
+
+ if (streq(name, "SocketProtocol"))
+ return bus_set_transient_socket_protocol(u, name, &s->socket_protocol, message, flags, error);
+
+ ci = socket_exec_command_from_string(name);
+ if (ci >= 0)
+ return bus_set_transient_exec_command(u, name,
+ &s->exec_command[ci],
+ message, flags, error);
+
+ if (streq(name, "Symlinks")) {
+ _cleanup_strv_free_ char **l = NULL;
+ char **p;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, l) {
+ if (!path_is_absolute(*p))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Symlink path is not absolute: %s", *p);
+ }
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (strv_isempty(l)) {
+ s->symlinks = strv_free(s->symlinks);
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "%s=", name);
+ } else {
+ _cleanup_free_ char *joined = NULL;
+
+ r = strv_extend_strv(&s->symlinks, l, true);
+ if (r < 0)
+ return -ENOMEM;
+
+ joined = strv_join(l, " ");
+ if (!joined)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "%s=%s", name, joined);
+ }
+ }
+
+ return 1;
+
+ } else if (streq(name, "Listen")) {
+ const char *t, *a;
+ bool empty = true;
+
+ r = sd_bus_message_enter_container(message, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(ss)", &t, &a)) > 0) {
+ _cleanup_free_ SocketPort *p = NULL;
+
+ p = new(SocketPort, 1);
+ if (!p)
+ return log_oom();
+
+ *p = (SocketPort) {
+ .fd = -1,
+ .socket = s,
+ };
+
+ p->type = socket_port_type_from_string(t);
+ if (p->type < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown Socket type: %s", t);
+
+ if (p->type != SOCKET_SOCKET) {
+ if (!path_is_valid(p->path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid socket path: %s", t);
+
+ p->path = strdup(a);
+ if (!p->path)
+ return log_oom();
+
+ path_simplify(p->path, false);
+
+ } else if (streq(t, "Netlink")) {
+ r = socket_address_parse_netlink(&p->address, a);
+ if (r < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid netlink address: %s", a);
+
+ } else {
+ r = socket_address_parse(&p->address, a);
+ if (r < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid address: %s", a);
+
+ p->address.type = socket_address_type_from_string(t);
+ if (p->address.type < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid address type: %s", t);
+
+ if (socket_address_family(&p->address) != AF_LOCAL && p->address.type == SOCK_SEQPACKET)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Address family not supported: %s", a);
+ }
+
+ empty = false;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ LIST_APPEND(port, s->ports, TAKE_PTR(p));
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "Listen%s=%s", t, a);
+ }
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags) && empty) {
+ socket_free_ports(s);
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "ListenStream=");
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int bus_socket_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Socket *s = SOCKET(u);
+ int r;
+
+ assert(s);
+ assert(name);
+ assert(message);
+
+ assert(s);
+ assert(name);
+ assert(message);
+
+ r = bus_cgroup_set_property(u, &s->cgroup_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ if (u->transient && u->load_state == UNIT_STUB) {
+ /* This is a transient unit, let's load a little more */
+
+ r = bus_socket_set_transient_property(s, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ r = bus_exec_context_set_transient_property(u, &s->exec_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+
+ r = bus_kill_context_set_transient_property(u, &s->kill_context, name, message, flags, error);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int bus_socket_commit_properties(Unit *u) {
+ assert(u);
+
+ unit_realize_cgroup(u);
+
+ return 0;
+}
diff --git a/src/core/dbus-socket.h b/src/core/dbus-socket.h
new file mode 100644
index 0000000..f9f36a2
--- /dev/null
+++ b/src/core/dbus-socket.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "unit.h"
+
+extern const sd_bus_vtable bus_socket_vtable[];
+
+int bus_socket_set_property(Unit *u, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_socket_commit_properties(Unit *u);
diff --git a/src/core/dbus-swap.c b/src/core/dbus-swap.c
new file mode 100644
index 0000000..0fa8dd1
--- /dev/null
+++ b/src/core/dbus-swap.c
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2010 Maarten Lankhorst
+***/
+
+#include "bus-get-properties.h"
+#include "dbus-cgroup.h"
+#include "dbus-execute.h"
+#include "dbus-swap.h"
+#include "string-util.h"
+#include "swap.h"
+#include "unit.h"
+
+static int swap_get_priority(Swap *s) {
+ assert(s);
+
+ if (s->from_proc_swaps && s->parameters_proc_swaps.priority_set)
+ return s->parameters_proc_swaps.priority;
+
+ if (s->from_fragment && s->parameters_fragment.priority_set)
+ return s->parameters_fragment.priority;
+
+ return -1;
+}
+
+static const char *swap_get_options(Swap *s) {
+ assert(s);
+
+ if (s->from_fragment)
+ return s->parameters_fragment.options;
+
+ return NULL;
+}
+
+static BUS_DEFINE_PROPERTY_GET(property_get_priority, "i", Swap, swap_get_priority);
+static BUS_DEFINE_PROPERTY_GET(property_get_options, "s", Swap, swap_get_options);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, swap_result, SwapResult);
+
+const sd_bus_vtable bus_swap_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("What", "s", NULL, offsetof(Swap, what), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Priority", "i", property_get_priority, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Options", "s", property_get_options, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("TimeoutUSec", "t", bus_property_get_usec, offsetof(Swap, timeout_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Swap, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Swap, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("UID", "u", bus_property_get_uid, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("GID", "u", bus_property_get_gid, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_EXEC_COMMAND_VTABLE("ExecActivate", offsetof(Swap, exec_command[SWAP_EXEC_ACTIVATE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ BUS_EXEC_COMMAND_VTABLE("ExecDeactivate", offsetof(Swap, exec_command[SWAP_EXEC_DEACTIVATE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_VTABLE_END
+};
+
+int bus_swap_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Swap *s = SWAP(u);
+
+ assert(s);
+ assert(name);
+ assert(message);
+
+ return bus_cgroup_set_property(u, &s->cgroup_context, name, message, flags, error);
+}
+
+int bus_swap_commit_properties(Unit *u) {
+ assert(u);
+
+ unit_realize_cgroup(u);
+
+ return 0;
+}
diff --git a/src/core/dbus-swap.h b/src/core/dbus-swap.h
new file mode 100644
index 0000000..9d651b5
--- /dev/null
+++ b/src/core/dbus-swap.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2010 Maarten Lankhorst
+***/
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "unit.h"
+
+extern const sd_bus_vtable bus_swap_vtable[];
+
+int bus_swap_set_property(Unit *u, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_swap_commit_properties(Unit *u);
diff --git a/src/core/dbus-target.c b/src/core/dbus-target.c
new file mode 100644
index 0000000..e979fb7
--- /dev/null
+++ b/src/core/dbus-target.c
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "dbus-target.h"
+#include "unit.h"
+
+const sd_bus_vtable bus_target_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_VTABLE_END
+};
diff --git a/src/core/dbus-target.h b/src/core/dbus-target.h
new file mode 100644
index 0000000..fedd4a9
--- /dev/null
+++ b/src/core/dbus-target.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus-vtable.h"
+
+extern const sd_bus_vtable bus_target_vtable[];
diff --git a/src/core/dbus-timer.c b/src/core/dbus-timer.c
new file mode 100644
index 0000000..8e69c17
--- /dev/null
+++ b/src/core/dbus-timer.c
@@ -0,0 +1,382 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-get-properties.h"
+#include "dbus-timer.h"
+#include "dbus-util.h"
+#include "strv.h"
+#include "timer.h"
+#include "unit.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, timer_result, TimerResult);
+
+static int property_get_monotonic_timers(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Timer *t = userdata;
+ TimerValue *v;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(t);
+
+ r = sd_bus_message_open_container(reply, 'a', "(stt)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(value, v, t->values) {
+ _cleanup_free_ char *buf = NULL;
+ const char *s;
+ size_t l;
+
+ if (v->base == TIMER_CALENDAR)
+ continue;
+
+ s = timer_base_to_string(v->base);
+ assert(endswith(s, "Sec"));
+
+ /* s/Sec/USec/ */
+ l = strlen(s);
+ buf = new(char, l+2);
+ if (!buf)
+ return -ENOMEM;
+
+ memcpy(buf, s, l-3);
+ memcpy(buf+l-3, "USec", 5);
+
+ r = sd_bus_message_append(reply, "(stt)", buf, v->value, v->next_elapse);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_calendar_timers(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Timer *t = userdata;
+ TimerValue *v;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(t);
+
+ r = sd_bus_message_open_container(reply, 'a', "(sst)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(value, v, t->values) {
+ _cleanup_free_ char *buf = NULL;
+
+ if (v->base != TIMER_CALENDAR)
+ continue;
+
+ r = calendar_spec_to_string(v->calendar_spec, &buf);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "(sst)", timer_base_to_string(v->base), buf, v->next_elapse);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_next_elapse_monotonic(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Timer *t = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(t);
+
+ return sd_bus_message_append(reply, "t",
+ (uint64_t) usec_shift_clock(t->next_elapse_monotonic_or_boottime,
+ TIMER_MONOTONIC_CLOCK(t), CLOCK_MONOTONIC));
+}
+
+const sd_bus_vtable bus_timer_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Unit", "s", bus_property_get_triggered_unit, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TimersMonotonic", "a(stt)", property_get_monotonic_timers, 0, SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_PROPERTY("TimersCalendar", "a(sst)", property_get_calendar_timers, 0, SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_PROPERTY("OnClockChange", "b", bus_property_get_bool, offsetof(Timer, on_clock_change), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("OnTimezoneChange", "b", bus_property_get_bool, offsetof(Timer, on_timezone_change), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NextElapseUSecRealtime", "t", bus_property_get_usec, offsetof(Timer, next_elapse_realtime), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("NextElapseUSecMonotonic", "t", property_get_next_elapse_monotonic, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_PROPERTY_DUAL_TIMESTAMP("LastTriggerUSec", offsetof(Timer, last_trigger), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Timer, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("AccuracyUSec", "t", bus_property_get_usec, offsetof(Timer, accuracy_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RandomizedDelayUSec", "t", bus_property_get_usec, offsetof(Timer, random_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FixedRandomDelay", "b", bus_property_get_bool, offsetof(Timer, fixed_random_delay), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Persistent", "b", bus_property_get_bool, offsetof(Timer, persistent), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("WakeSystem", "b", bus_property_get_bool, offsetof(Timer, wake_system), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RemainAfterElapse", "b", bus_property_get_bool, offsetof(Timer, remain_after_elapse), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_VTABLE_END
+};
+
+static int timer_add_one_monotonic_spec(
+ Timer *t,
+ const char *name,
+ TimerBase base,
+ UnitWriteFlags flags,
+ usec_t usec,
+ sd_bus_error *error) {
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ TimerValue *v;
+
+ unit_write_settingf(UNIT(t), flags|UNIT_ESCAPE_SPECIFIERS, name,
+ "%s=%s",
+ timer_base_to_string(base),
+ format_timespan(ts, sizeof ts, usec, USEC_PER_MSEC));
+
+ v = new(TimerValue, 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (TimerValue) {
+ .base = base,
+ .value = usec,
+ };
+
+ LIST_PREPEND(value, t->values, v);
+ }
+
+ return 1;
+}
+
+static int timer_add_one_calendar_spec(
+ Timer *t,
+ const char *name,
+ TimerBase base,
+ UnitWriteFlags flags,
+ const char *str,
+ sd_bus_error *error) {
+
+ _cleanup_(calendar_spec_freep) CalendarSpec *c = NULL;
+ int r;
+
+ r = calendar_spec_from_string(str, &c);
+ if (r == -EINVAL)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid calendar spec");
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ unit_write_settingf(UNIT(t), flags|UNIT_ESCAPE_SPECIFIERS, name,
+ "%s=%s", timer_base_to_string(base), str);
+
+ TimerValue *v = new(TimerValue, 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (TimerValue) {
+ .base = base,
+ .calendar_spec = TAKE_PTR(c),
+ };
+
+ LIST_PREPEND(value, t->values, v);
+ }
+
+ return 1;
+};
+
+static int bus_timer_set_transient_property(
+ Timer *t,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ Unit *u = UNIT(t);
+ int r;
+
+ assert(t);
+ assert(name);
+ assert(message);
+
+ flags |= UNIT_PRIVATE;
+
+ if (streq(name, "AccuracyUSec"))
+ return bus_set_transient_usec(u, name, &t->accuracy_usec, message, flags, error);
+
+ if (streq(name, "AccuracySec")) {
+ log_notice("Client is using obsolete AccuracySec= transient property, please use AccuracyUSec= instead.");
+ return bus_set_transient_usec(u, "AccuracyUSec", &t->accuracy_usec, message, flags, error);
+ }
+
+ if (streq(name, "RandomizedDelayUSec"))
+ return bus_set_transient_usec(u, name, &t->random_usec, message, flags, error);
+
+ if (streq(name, "FixedRandomDelay"))
+ return bus_set_transient_bool(u, name, &t->fixed_random_delay, message, flags, error);
+
+ if (streq(name, "WakeSystem"))
+ return bus_set_transient_bool(u, name, &t->wake_system, message, flags, error);
+
+ if (streq(name, "Persistent"))
+ return bus_set_transient_bool(u, name, &t->persistent, message, flags, error);
+
+ if (streq(name, "RemainAfterElapse"))
+ return bus_set_transient_bool(u, name, &t->remain_after_elapse, message, flags, error);
+
+ if (streq(name, "OnTimezoneChange"))
+ return bus_set_transient_bool(u, name, &t->on_timezone_change, message, flags, error);
+
+ if (streq(name, "OnClockChange"))
+ return bus_set_transient_bool(u, name, &t->on_clock_change, message, flags, error);
+
+ if (streq(name, "TimersMonotonic")) {
+ const char *base_name;
+ usec_t usec;
+ bool empty = true;
+
+ r = sd_bus_message_enter_container(message, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(st)", &base_name, &usec)) > 0) {
+ TimerBase b;
+
+ b = timer_base_from_string(base_name);
+ if (b < 0 || b == TIMER_CALENDAR)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Invalid timer base: %s", base_name);
+
+ r = timer_add_one_monotonic_spec(t, name, b, flags, usec, error);
+ if (r < 0)
+ return r;
+
+ empty = false;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags) && empty) {
+ timer_free_values(t);
+ unit_write_setting(u, flags, name, "OnActiveSec=");
+ }
+
+ return 1;
+
+ } else if (streq(name, "TimersCalendar")) {
+ const char *base_name, *str;
+ bool empty = true;
+
+ r = sd_bus_message_enter_container(message, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(ss)", &base_name, &str)) > 0) {
+ TimerBase b;
+
+ b = timer_base_from_string(base_name);
+ if (b != TIMER_CALENDAR)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Invalid timer base: %s", base_name);
+
+ r = timer_add_one_calendar_spec(t, name, b, flags, str, error);
+ if (r < 0)
+ return r;
+
+ empty = false;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags) && empty) {
+ timer_free_values(t);
+ unit_write_setting(u, flags, name, "OnCalendar=");
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name,
+ "OnActiveSec",
+ "OnBootSec",
+ "OnStartupSec",
+ "OnUnitActiveSec",
+ "OnUnitInactiveSec")) {
+
+ TimerBase b;
+ usec_t usec;
+
+ log_notice("Client is using obsolete %s= transient property, please use TimersMonotonic= instead.", name);
+
+ b = timer_base_from_string(name);
+ if (b < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown timer base");
+
+ r = sd_bus_message_read(message, "t", &usec);
+ if (r < 0)
+ return r;
+
+ return timer_add_one_monotonic_spec(t, name, b, flags, usec, error);
+
+ } else if (streq(name, "OnCalendar")) {
+
+ const char *str;
+
+ log_notice("Client is using obsolete %s= transient property, please use TimersCalendar= instead.", name);
+
+ r = sd_bus_message_read(message, "s", &str);
+ if (r < 0)
+ return r;
+
+ return timer_add_one_calendar_spec(t, name, TIMER_CALENDAR, flags, str, error);
+ }
+
+ return 0;
+}
+
+int bus_timer_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags mode,
+ sd_bus_error *error) {
+
+ Timer *t = TIMER(u);
+
+ assert(t);
+ assert(name);
+ assert(message);
+
+ if (u->transient && u->load_state == UNIT_STUB)
+ return bus_timer_set_transient_property(t, name, message, mode, error);
+
+ return 0;
+}
diff --git a/src/core/dbus-timer.h b/src/core/dbus-timer.h
new file mode 100644
index 0000000..ac436f1
--- /dev/null
+++ b/src/core/dbus-timer.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-bus-vtable.h"
+
+#include "unit.h"
+
+extern const sd_bus_vtable bus_timer_vtable[];
+
+int bus_timer_set_property(Unit *u, const char *name, sd_bus_message *i, UnitWriteFlags flags, sd_bus_error *error);
diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c
new file mode 100644
index 0000000..427152a
--- /dev/null
+++ b/src/core/dbus-unit.c
@@ -0,0 +1,2475 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bpf-firewall.h"
+#include "bus-common-errors.h"
+#include "bus-get-properties.h"
+#include "bus-polkit.h"
+#include "cgroup-util.h"
+#include "condition.h"
+#include "dbus-job.h"
+#include "dbus-unit.h"
+#include "dbus-util.h"
+#include "dbus.h"
+#include "fd-util.h"
+#include "install.h"
+#include "locale-util.h"
+#include "log.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "selinux-access.h"
+#include "signal-util.h"
+#include "special.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "web-util.h"
+
+static bool unit_can_start_refuse_manual(Unit *u) {
+ return unit_can_start(u) && !u->refuse_manual_start;
+}
+
+static bool unit_can_stop_refuse_manual(Unit *u) {
+ return unit_can_stop(u) && !u->refuse_manual_stop;
+}
+
+static bool unit_can_isolate_refuse_manual(Unit *u) {
+ return unit_can_isolate(u) && !u->refuse_manual_start;
+}
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_collect_mode, collect_mode, CollectMode);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_load_state, unit_load_state, UnitLoadState);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_job_mode, job_mode, JobMode);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_emergency_action, emergency_action, EmergencyAction);
+static BUS_DEFINE_PROPERTY_GET(property_get_description, "s", Unit, unit_description);
+static BUS_DEFINE_PROPERTY_GET2(property_get_active_state, "s", Unit, unit_active_state, unit_active_state_to_string);
+static BUS_DEFINE_PROPERTY_GET2(property_get_freezer_state, "s", Unit, unit_freezer_state, freezer_state_to_string);
+static BUS_DEFINE_PROPERTY_GET(property_get_sub_state, "s", Unit, unit_sub_state_to_string);
+static BUS_DEFINE_PROPERTY_GET2(property_get_unit_file_state, "s", Unit, unit_get_unit_file_state, unit_file_state_to_string);
+static BUS_DEFINE_PROPERTY_GET(property_get_can_reload, "b", Unit, unit_can_reload);
+static BUS_DEFINE_PROPERTY_GET(property_get_can_start, "b", Unit, unit_can_start_refuse_manual);
+static BUS_DEFINE_PROPERTY_GET(property_get_can_stop, "b", Unit, unit_can_stop_refuse_manual);
+static BUS_DEFINE_PROPERTY_GET(property_get_can_isolate, "b", Unit, unit_can_isolate_refuse_manual);
+static BUS_DEFINE_PROPERTY_GET(property_get_can_freeze, "b", Unit, unit_can_freeze);
+static BUS_DEFINE_PROPERTY_GET(property_get_need_daemon_reload, "b", Unit, unit_need_daemon_reload);
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_empty_strv, "as", 0);
+
+static int property_get_can_clean(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Unit *u = userdata;
+ ExecCleanMask mask;
+ int r;
+
+ assert(bus);
+ assert(reply);
+
+ r = unit_can_clean(u, &mask);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
+ if (!FLAGS_SET(mask, 1U << t))
+ continue;
+
+ r = sd_bus_message_append(reply, "s", exec_resource_type_to_string(t));
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_names(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Unit *u = userdata;
+ const char *t;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", u->id);
+ if (r < 0)
+ return r;
+
+ SET_FOREACH(t, u->aliases) {
+ r = sd_bus_message_append(reply, "s", t);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_following(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Unit *u = userdata, *f;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ f = unit_following(u);
+ return sd_bus_message_append(reply, "s", f ? f->id : NULL);
+}
+
+static int property_get_dependencies(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Hashmap **h = userdata;
+ Unit *u;
+ void *v;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(h);
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH_KEY(v, u, *h) {
+ r = sd_bus_message_append(reply, "s", u->id);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_requires_mounts_for(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Hashmap **h = userdata;
+ const char *p;
+ void *v;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(h);
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH_KEY(v, p, *h) {
+ r = sd_bus_message_append(reply, "s", p);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_unit_file_preset(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Unit *u = userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ r = unit_get_unit_file_preset(u);
+
+ return sd_bus_message_append(reply, "s",
+ r < 0 ? NULL:
+ r > 0 ? "enabled" : "disabled");
+}
+
+static int property_get_job(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *p = NULL;
+ Job **j = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(j);
+
+ if (!*j)
+ return sd_bus_message_append(reply, "(uo)", 0, "/");
+
+ p = job_dbus_path(*j);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_message_append(reply, "(uo)", (*j)->id, p);
+}
+
+static int property_get_conditions(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ const char *(*to_string)(ConditionType type) = NULL;
+ Condition **list = userdata, *c;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(list);
+
+ to_string = streq(property, "Asserts") ? assert_type_to_string : condition_type_to_string;
+
+ r = sd_bus_message_open_container(reply, 'a', "(sbbsi)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(conditions, c, *list) {
+ int tristate;
+
+ tristate =
+ c->result == CONDITION_UNTESTED ? 0 :
+ c->result == CONDITION_SUCCEEDED ? 1 : -1;
+
+ r = sd_bus_message_append(reply, "(sbbsi)",
+ to_string(c->type),
+ c->trigger, c->negate,
+ c->parameter, tristate);
+ if (r < 0)
+ return r;
+
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_load_error(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error e = SD_BUS_ERROR_NULL;
+ Unit *u = userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ r = bus_unit_validate_load_state(u, &e);
+ if (r < 0)
+ return sd_bus_message_append(reply, "(ss)", e.name, e.message);
+
+ return sd_bus_message_append(reply, "(ss)", NULL, NULL);
+}
+
+static int bus_verify_manage_units_async_full(
+ Unit *u,
+ const char *verb,
+ int capability,
+ const char *polkit_message,
+ bool interactive,
+ sd_bus_message *call,
+ sd_bus_error *error) {
+
+ const char *details[9] = {
+ "unit", u->id,
+ "verb", verb,
+ };
+
+ if (polkit_message) {
+ details[4] = "polkit.message";
+ details[5] = polkit_message;
+ details[6] = "polkit.gettext_domain";
+ details[7] = GETTEXT_PACKAGE;
+ }
+
+ return bus_verify_polkit_async(
+ call,
+ capability,
+ "org.freedesktop.systemd1.manage-units",
+ details,
+ interactive,
+ UID_INVALID,
+ &u->manager->polkit_registry,
+ error);
+}
+
+static const char *const polkit_message_for_job[_JOB_TYPE_MAX] = {
+ [JOB_START] = N_("Authentication is required to start '$(unit)'."),
+ [JOB_STOP] = N_("Authentication is required to stop '$(unit)'."),
+ [JOB_RELOAD] = N_("Authentication is required to reload '$(unit)'."),
+ [JOB_RESTART] = N_("Authentication is required to restart '$(unit)'."),
+ [JOB_TRY_RESTART] = N_("Authentication is required to restart '$(unit)'."),
+};
+
+int bus_unit_method_start_generic(
+ sd_bus_message *message,
+ Unit *u,
+ JobType job_type,
+ bool reload_if_possible,
+ sd_bus_error *error) {
+
+ const char *smode, *verb;
+ JobMode mode;
+ int r;
+
+ assert(message);
+ assert(u);
+ assert(job_type >= 0 && job_type < _JOB_TYPE_MAX);
+
+ r = mac_selinux_unit_access_check(
+ u, message,
+ job_type_to_access_method(job_type),
+ error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "s", &smode);
+ if (r < 0)
+ return r;
+
+ mode = job_mode_from_string(smode);
+ if (mode < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Job mode %s invalid", smode);
+
+ if (reload_if_possible)
+ verb = strjoina("reload-or-", job_type_to_string(job_type));
+ else
+ verb = job_type_to_string(job_type);
+
+ r = bus_verify_manage_units_async_full(
+ u,
+ verb,
+ CAP_SYS_ADMIN,
+ polkit_message_for_job[job_type],
+ true,
+ message,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ return bus_unit_queue_job(message, u, job_type, mode,
+ reload_if_possible ? BUS_UNIT_QUEUE_RELOAD_IF_POSSIBLE : 0, error);
+}
+
+static int method_start(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_unit_method_start_generic(message, userdata, JOB_START, false, error);
+}
+
+static int method_stop(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_unit_method_start_generic(message, userdata, JOB_STOP, false, error);
+}
+
+static int method_reload(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_unit_method_start_generic(message, userdata, JOB_RELOAD, false, error);
+}
+
+static int method_restart(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_unit_method_start_generic(message, userdata, JOB_RESTART, false, error);
+}
+
+static int method_try_restart(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_unit_method_start_generic(message, userdata, JOB_TRY_RESTART, false, error);
+}
+
+static int method_reload_or_restart(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_unit_method_start_generic(message, userdata, JOB_RESTART, true, error);
+}
+
+static int method_reload_or_try_restart(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_unit_method_start_generic(message, userdata, JOB_TRY_RESTART, true, error);
+}
+
+int bus_unit_method_enqueue_job(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ BusUnitQueueFlags flags = BUS_UNIT_QUEUE_VERBOSE_REPLY;
+ const char *jtype, *smode;
+ Unit *u = userdata;
+ JobType type;
+ JobMode mode;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = sd_bus_message_read(message, "ss", &jtype, &smode);
+ if (r < 0)
+ return r;
+
+ /* Parse the two magic reload types "reload-or-…" manually */
+ if (streq(jtype, "reload-or-restart")) {
+ type = JOB_RESTART;
+ flags |= BUS_UNIT_QUEUE_RELOAD_IF_POSSIBLE;
+ } else if (streq(jtype, "reload-or-try-restart")) {
+ type = JOB_TRY_RESTART;
+ flags |= BUS_UNIT_QUEUE_RELOAD_IF_POSSIBLE;
+ } else {
+ /* And the rest generically */
+ type = job_type_from_string(jtype);
+ if (type < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Job type %s invalid", jtype);
+ }
+
+ mode = job_mode_from_string(smode);
+ if (mode < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Job mode %s invalid", smode);
+
+ r = mac_selinux_unit_access_check(
+ u, message,
+ job_type_to_access_method(type),
+ error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_units_async_full(
+ u,
+ jtype,
+ CAP_SYS_ADMIN,
+ polkit_message_for_job[type],
+ true,
+ message,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ return bus_unit_queue_job(message, u, type, mode, flags, error);
+}
+
+int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Unit *u = userdata;
+ const char *swho;
+ int32_t signo;
+ KillWho who;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = mac_selinux_unit_access_check(u, message, "stop", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "si", &swho, &signo);
+ if (r < 0)
+ return r;
+
+ if (isempty(swho))
+ who = KILL_ALL;
+ else {
+ who = kill_who_from_string(swho);
+ if (who < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid who argument %s", swho);
+ }
+
+ if (!SIGNAL_VALID(signo))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Signal number out of range.");
+
+ r = bus_verify_manage_units_async_full(
+ u,
+ "kill",
+ CAP_KILL,
+ N_("Authentication is required to send a UNIX signal to the processes of '$(unit)'."),
+ true,
+ message,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = unit_kill(u, who, signo, error);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Unit *u = userdata;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = mac_selinux_unit_access_check(u, message, "reload", error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_units_async_full(
+ u,
+ "reset-failed",
+ CAP_SYS_ADMIN,
+ N_("Authentication is required to reset the \"failed\" state of '$(unit)'."),
+ true,
+ message,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ unit_reset_failed(u);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Unit *u = userdata;
+ int runtime, r;
+
+ assert(message);
+ assert(u);
+
+ r = mac_selinux_unit_access_check(u, message, "start", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "b", &runtime);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_units_async_full(
+ u,
+ "set-property",
+ CAP_SYS_ADMIN,
+ N_("Authentication is required to set properties on '$(unit)'."),
+ true,
+ message,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = bus_unit_set_properties(u, message, runtime ? UNIT_RUNTIME : UNIT_PERSISTENT, true, error);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_unit_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Unit *u = userdata;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = mac_selinux_unit_access_check(u, message, "start", error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_units_async_full(
+ u,
+ "ref",
+ CAP_SYS_ADMIN,
+ NULL,
+ false,
+ message,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = bus_unit_track_add_sender(u, message);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Unit *u = userdata;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = bus_unit_track_remove_sender(u, message);
+ if (r == -EUNATCH)
+ return sd_bus_error_setf(error, BUS_ERROR_NOT_REFERENCED, "Unit has not been referenced yet.");
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_unit_method_clean(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ ExecCleanMask mask = 0;
+ Unit *u = userdata;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = mac_selinux_unit_access_check(u, message, "stop", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(message, 'a', "s");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *i;
+
+ r = sd_bus_message_read(message, "s", &i);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (streq(i, "all"))
+ mask |= EXEC_CLEAN_ALL;
+ else {
+ ExecDirectoryType t;
+
+ t = exec_resource_type_from_string(i);
+ if (t < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid resource type: %s", i);
+
+ mask |= 1U << t;
+ }
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_units_async_full(
+ u,
+ "clean",
+ CAP_DAC_OVERRIDE,
+ N_("Authentication is required to delete files and directories associated with '$(unit)'."),
+ true,
+ message,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = unit_clean(u, mask);
+ if (r == -EOPNOTSUPP)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Unit '%s' does not supporting cleaning.", u->id);
+ if (r == -EUNATCH)
+ return sd_bus_error_setf(error, BUS_ERROR_NOTHING_TO_CLEAN, "No matching resources found.");
+ if (r == -EBUSY)
+ return sd_bus_error_setf(error, BUS_ERROR_UNIT_BUSY, "Unit is not inactive or has pending job.");
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int bus_unit_method_freezer_generic(sd_bus_message *message, void *userdata, sd_bus_error *error, FreezerAction action) {
+ const char* perm;
+ int (*method)(Unit*);
+ Unit *u = userdata;
+ bool reply_no_delay = false;
+ int r;
+
+ assert(message);
+ assert(u);
+ assert(IN_SET(action, FREEZER_FREEZE, FREEZER_THAW));
+
+ if (action == FREEZER_FREEZE) {
+ perm = "stop";
+ method = unit_freeze;
+ } else {
+ perm = "start";
+ method = unit_thaw;
+ }
+
+ r = mac_selinux_unit_access_check(u, message, perm, error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_units_async_full(
+ u,
+ perm,
+ CAP_SYS_ADMIN,
+ N_("Authentication is required to freeze or thaw the processes of '$(unit)' unit."),
+ true,
+ message,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = method(u);
+ if (r == -EOPNOTSUPP)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Unit '%s' does not support freezing.", u->id);
+ if (r == -EBUSY)
+ return sd_bus_error_setf(error, BUS_ERROR_UNIT_BUSY, "Unit has a pending job.");
+ if (r == -EHOSTDOWN)
+ return sd_bus_error_setf(error, BUS_ERROR_UNIT_INACTIVE, "Unit is inactive.");
+ if (r == -EALREADY)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Previously requested freezer operation for unit '%s' is still in progress.", u->id);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ reply_no_delay = true;
+
+ assert(!u->pending_freezer_message);
+
+ r = sd_bus_message_new_method_return(message, &u->pending_freezer_message);
+ if (r < 0)
+ return r;
+
+ if (reply_no_delay) {
+ r = bus_unit_send_pending_freezer_message(u);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+int bus_unit_method_thaw(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_unit_method_freezer_generic(message, userdata, error, FREEZER_THAW);
+}
+
+int bus_unit_method_freeze(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_unit_method_freezer_generic(message, userdata, error, FREEZER_FREEZE);
+}
+
+static int property_get_refs(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Unit *u = userdata;
+ const char *i;
+ int r;
+
+ assert(bus);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ for (i = sd_bus_track_first(u->bus_track); i; i = sd_bus_track_next(u->bus_track)) {
+ int c, k;
+
+ c = sd_bus_track_count_name(u->bus_track, i);
+ if (c < 0)
+ return c;
+
+ /* Add the item multiple times if the ref count for each is above 1 */
+ for (k = 0; k < c; k++) {
+ r = sd_bus_message_append(reply, "s", i);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+const sd_bus_vtable bus_unit_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("Id", "s", NULL, offsetof(Unit, id), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Names", "as", property_get_names, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Following", "s", property_get_following, 0, 0),
+ SD_BUS_PROPERTY("Requires", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_REQUIRES]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Requisite", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_REQUISITE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Wants", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_WANTS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("BindsTo", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_BINDS_TO]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PartOf", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_PART_OF]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RequiredBy", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_REQUIRED_BY]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RequisiteOf", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_REQUISITE_OF]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("WantedBy", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_WANTED_BY]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("BoundBy", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_BOUND_BY]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ConsistsOf", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_CONSISTS_OF]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Conflicts", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_CONFLICTS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ConflictedBy", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_CONFLICTED_BY]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Before", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_BEFORE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("After", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_AFTER]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("OnFailure", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_ON_FAILURE]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Triggers", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_TRIGGERS]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TriggeredBy", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_TRIGGERED_BY]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PropagatesReloadTo", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_PROPAGATES_RELOAD_TO]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ReloadPropagatedFrom", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_RELOAD_PROPAGATED_FROM]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("JoinsNamespaceOf", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_JOINS_NAMESPACE_OF]), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RequiresMountsFor", "as", property_get_requires_mounts_for, offsetof(Unit, requires_mounts_for), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Documentation", "as", NULL, offsetof(Unit, documentation), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Description", "s", property_get_description, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LoadState", "s", property_get_load_state, offsetof(Unit, load_state), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ActiveState", "s", property_get_active_state, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("FreezerState", "s", property_get_freezer_state, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("SubState", "s", property_get_sub_state, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("FragmentPath", "s", NULL, offsetof(Unit, fragment_path), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SourcePath", "s", NULL, offsetof(Unit, source_path), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DropInPaths", "as", NULL, offsetof(Unit, dropin_paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("UnitFileState", "s", property_get_unit_file_state, 0, 0),
+ SD_BUS_PROPERTY("UnitFilePreset", "s", property_get_unit_file_preset, 0, 0),
+ BUS_PROPERTY_DUAL_TIMESTAMP("StateChangeTimestamp", offsetof(Unit, state_change_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InactiveExitTimestamp", offsetof(Unit, inactive_exit_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_PROPERTY_DUAL_TIMESTAMP("ActiveEnterTimestamp", offsetof(Unit, active_enter_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_PROPERTY_DUAL_TIMESTAMP("ActiveExitTimestamp", offsetof(Unit, active_exit_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_PROPERTY_DUAL_TIMESTAMP("InactiveEnterTimestamp", offsetof(Unit, inactive_enter_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("CanStart", "b", property_get_can_start, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CanStop", "b", property_get_can_stop, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CanReload", "b", property_get_can_reload, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CanIsolate", "b", property_get_can_isolate, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CanClean", "as", property_get_can_clean, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CanFreeze", "b", property_get_can_freeze, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Job", "(uo)", property_get_job, offsetof(Unit, job), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("StopWhenUnneeded", "b", bus_property_get_bool, offsetof(Unit, stop_when_unneeded), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RefuseManualStart", "b", bus_property_get_bool, offsetof(Unit, refuse_manual_start), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RefuseManualStop", "b", bus_property_get_bool, offsetof(Unit, refuse_manual_stop), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("AllowIsolate", "b", bus_property_get_bool, offsetof(Unit, allow_isolate), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DefaultDependencies", "b", bus_property_get_bool, offsetof(Unit, default_dependencies), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("OnFailureJobMode", "s", property_get_job_mode, offsetof(Unit, on_failure_job_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("IgnoreOnIsolate", "b", bus_property_get_bool, offsetof(Unit, ignore_on_isolate), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NeedDaemonReload", "b", property_get_need_daemon_reload, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("JobTimeoutUSec", "t", bus_property_get_usec, offsetof(Unit, job_timeout), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("JobRunningTimeoutUSec", "t", bus_property_get_usec, offsetof(Unit, job_running_timeout), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("JobTimeoutAction", "s", property_get_emergency_action, offsetof(Unit, job_timeout_action), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("JobTimeoutRebootArgument", "s", NULL, offsetof(Unit, job_timeout_reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ConditionResult", "b", bus_property_get_bool, offsetof(Unit, condition_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("AssertResult", "b", bus_property_get_bool, offsetof(Unit, assert_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_PROPERTY_DUAL_TIMESTAMP("ConditionTimestamp", offsetof(Unit, condition_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ BUS_PROPERTY_DUAL_TIMESTAMP("AssertTimestamp", offsetof(Unit, assert_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Conditions", "a(sbbsi)", property_get_conditions, offsetof(Unit, conditions), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_PROPERTY("Asserts", "a(sbbsi)", property_get_conditions, offsetof(Unit, asserts), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_PROPERTY("LoadError", "(ss)", property_get_load_error, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Transient", "b", bus_property_get_bool, offsetof(Unit, transient), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Perpetual", "b", bus_property_get_bool, offsetof(Unit, perpetual), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StartLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Unit, start_ratelimit.interval), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_ratelimit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FailureAction", "s", property_get_emergency_action, offsetof(Unit, failure_action), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FailureActionExitStatus", "i", bus_property_get_int, offsetof(Unit, failure_action_exit_status), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SuccessAction", "s", property_get_emergency_action, offsetof(Unit, success_action), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SuccessActionExitStatus", "i", bus_property_get_int, offsetof(Unit, success_action_exit_status), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("InvocationID", "ay", bus_property_get_id128, offsetof(Unit, invocation_id), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("CollectMode", "s", property_get_collect_mode, offsetof(Unit, collect_mode), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Refs", "as", property_get_refs, 0, 0),
+
+ SD_BUS_METHOD_WITH_NAMES("Start",
+ "s",
+ SD_BUS_PARAM(mode),
+ "o",
+ SD_BUS_PARAM(job),
+ method_start,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("Stop",
+ "s",
+ SD_BUS_PARAM(mode),
+ "o",
+ SD_BUS_PARAM(job),
+ method_stop,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("Reload",
+ "s",
+ SD_BUS_PARAM(mode),
+ "o",
+ SD_BUS_PARAM(job),
+ method_reload,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("Restart",
+ "s",
+ SD_BUS_PARAM(mode),
+ "o",
+ SD_BUS_PARAM(job),
+ method_restart,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("TryRestart",
+ "s",
+ SD_BUS_PARAM(mode),
+ "o",
+ SD_BUS_PARAM(job),
+ method_try_restart,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ReloadOrRestart",
+ "s",
+ SD_BUS_PARAM(mode),
+ "o",
+ SD_BUS_PARAM(job),
+ method_reload_or_restart,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ReloadOrTryRestart",
+ "s",
+ SD_BUS_PARAM(mode),
+ "o",
+ SD_BUS_PARAM(job),
+ method_reload_or_try_restart,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("EnqueueJob",
+ "ss",
+ SD_BUS_PARAM(job_type)
+ SD_BUS_PARAM(job_mode),
+ "uososa(uosos)",
+ SD_BUS_PARAM(job_id)
+ SD_BUS_PARAM(job_path)
+ SD_BUS_PARAM(unit_id)
+ SD_BUS_PARAM(unit_path)
+ SD_BUS_PARAM(job_type)
+ SD_BUS_PARAM(affected_jobs),
+ bus_unit_method_enqueue_job,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("Kill",
+ "si",
+ SD_BUS_PARAM(whom)
+ SD_BUS_PARAM(signal),
+ NULL,,
+ bus_unit_method_kill,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ResetFailed",
+ NULL,
+ NULL,
+ bus_unit_method_reset_failed,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetProperties",
+ "ba(sv)",
+ SD_BUS_PARAM(runtime)
+ SD_BUS_PARAM(properties),
+ NULL,,
+ bus_unit_method_set_properties,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Ref",
+ NULL,
+ NULL,
+ bus_unit_method_ref,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Unref",
+ NULL,
+ NULL,
+ bus_unit_method_unref,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("Clean",
+ "as",
+ SD_BUS_PARAM(mask),
+ NULL,,
+ bus_unit_method_clean,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Freeze",
+ NULL,
+ NULL,
+ bus_unit_method_freeze,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Thaw",
+ NULL,
+ NULL,
+ bus_unit_method_thaw,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ /* For dependency types we don't support anymore always return an empty array */
+ SD_BUS_PROPERTY("RequiresOverridable", "as", property_get_empty_strv, 0, SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("RequisiteOverridable", "as", property_get_empty_strv, 0, SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("RequiredByOverridable", "as", property_get_empty_strv, 0, SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("RequisiteOfOverridable", "as", property_get_empty_strv, 0, SD_BUS_VTABLE_HIDDEN),
+ /* Obsolete alias names */
+ SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_ratelimit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("StartLimitIntervalSec", "t", bus_property_get_usec, offsetof(Unit, start_ratelimit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+
+ SD_BUS_VTABLE_END
+};
+
+static int property_get_slice(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Unit *u = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ return sd_bus_message_append(reply, "s", unit_slice_name(u));
+}
+
+static int property_get_current_memory(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ uint64_t sz = (uint64_t) -1;
+ Unit *u = userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ r = unit_get_memory_current(u, &sz);
+ if (r < 0 && r != -ENODATA)
+ log_unit_warning_errno(u, r, "Failed to get memory.usage_in_bytes attribute: %m");
+
+ return sd_bus_message_append(reply, "t", sz);
+}
+
+static int property_get_current_tasks(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ uint64_t cn = (uint64_t) -1;
+ Unit *u = userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ r = unit_get_tasks_current(u, &cn);
+ if (r < 0 && r != -ENODATA)
+ log_unit_warning_errno(u, r, "Failed to get pids.current attribute: %m");
+
+ return sd_bus_message_append(reply, "t", cn);
+}
+
+static int property_get_cpu_usage(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ nsec_t ns = (nsec_t) -1;
+ Unit *u = userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ r = unit_get_cpu_usage(u, &ns);
+ if (r < 0 && r != -ENODATA)
+ log_unit_warning_errno(u, r, "Failed to get cpuacct.usage attribute: %m");
+
+ return sd_bus_message_append(reply, "t", ns);
+}
+
+static int property_get_cpuset_cpus(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Unit *u = userdata;
+ _cleanup_(cpu_set_reset) CPUSet cpus = {};
+ _cleanup_free_ uint8_t *array = NULL;
+ size_t allocated;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ (void) unit_get_cpuset(u, &cpus, "cpuset.cpus.effective");
+ (void) cpu_set_to_dbus(&cpus, &array, &allocated);
+ return sd_bus_message_append_array(reply, 'y', array, allocated);
+}
+
+static int property_get_cpuset_mems(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Unit *u = userdata;
+ _cleanup_(cpu_set_reset) CPUSet mems = {};
+ _cleanup_free_ uint8_t *array = NULL;
+ size_t allocated;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ (void) unit_get_cpuset(u, &mems, "cpuset.mems.effective");
+ (void) cpu_set_to_dbus(&mems, &array, &allocated);
+ return sd_bus_message_append_array(reply, 'y', array, allocated);
+}
+
+static int property_get_cgroup(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Unit *u = userdata;
+ const char *t = NULL;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ /* Three cases: a) u->cgroup_path is NULL, in which case the
+ * unit has no control group, which we report as the empty
+ * string. b) u->cgroup_path is the empty string, which
+ * indicates the root cgroup, which we report as "/". c) all
+ * other cases we report as-is. */
+
+ if (u->cgroup_path)
+ t = empty_to_root(u->cgroup_path);
+
+ return sd_bus_message_append(reply, "s", t);
+}
+
+static int append_process(sd_bus_message *reply, const char *p, pid_t pid, Set *pids) {
+ _cleanup_free_ char *buf = NULL, *cmdline = NULL;
+ int r;
+
+ assert(reply);
+ assert(pid > 0);
+
+ r = set_put(pids, PID_TO_PTR(pid));
+ if (IN_SET(r, 0, -EEXIST))
+ return 0;
+ if (r < 0)
+ return r;
+
+ if (!p) {
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &buf);
+ if (r == -ESRCH)
+ return 0;
+ if (r < 0)
+ return r;
+
+ p = buf;
+ }
+
+ (void) get_process_cmdline(pid, SIZE_MAX, PROCESS_CMDLINE_COMM_FALLBACK, &cmdline);
+
+ return sd_bus_message_append(reply,
+ "(sus)",
+ p,
+ (uint32_t) pid,
+ cmdline);
+}
+
+static int append_cgroup(sd_bus_message *reply, const char *p, Set *pids) {
+ _cleanup_closedir_ DIR *d = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(reply);
+ assert(p);
+
+ r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, p, &f);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ pid_t pid;
+
+ r = cg_read_pid(f, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (is_kernel_thread(pid) > 0)
+ continue;
+
+ r = append_process(reply, p, pid, pids);
+ if (r < 0)
+ return r;
+ }
+
+ r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, p, &d);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *g = NULL, *j = NULL;
+
+ r = cg_read_subgroup(d, &g);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ j = path_join(empty_to_root(p), g);
+ if (!j)
+ return -ENOMEM;
+
+ r = append_cgroup(reply, j, pids);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int bus_unit_method_get_processes(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_set_free_ Set *pids = NULL;
+ Unit *u = userdata;
+ pid_t pid;
+ int r;
+
+ assert(message);
+
+ r = mac_selinux_unit_access_check(u, message, "status", error);
+ if (r < 0)
+ return r;
+
+ pids = set_new(NULL);
+ if (!pids)
+ return -ENOMEM;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(sus)");
+ if (r < 0)
+ return r;
+
+ if (u->cgroup_path) {
+ r = append_cgroup(reply, u->cgroup_path, pids);
+ if (r < 0)
+ return r;
+ }
+
+ /* The main and control pids might live outside of the cgroup, hence fetch them separately */
+ pid = unit_main_pid(u);
+ if (pid > 0) {
+ r = append_process(reply, NULL, pid, pids);
+ if (r < 0)
+ return r;
+ }
+
+ pid = unit_control_pid(u);
+ if (pid > 0) {
+ r = append_process(reply, NULL, pid, pids);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int property_get_ip_counter(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ static const char *const table[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
+ [CGROUP_IP_INGRESS_BYTES] = "IPIngressBytes",
+ [CGROUP_IP_EGRESS_BYTES] = "IPEgressBytes",
+ [CGROUP_IP_INGRESS_PACKETS] = "IPIngressPackets",
+ [CGROUP_IP_EGRESS_PACKETS] = "IPEgressPackets",
+ };
+
+ uint64_t value = UINT64_MAX;
+ Unit *u = userdata;
+ ssize_t metric;
+
+ assert(bus);
+ assert(reply);
+ assert(property);
+ assert(u);
+
+ assert_se((metric = string_table_lookup(table, ELEMENTSOF(table), property)) >= 0);
+ (void) unit_get_ip_accounting(u, metric, &value);
+ return sd_bus_message_append(reply, "t", value);
+}
+
+static int property_get_io_counter(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ static const char *const table[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = {
+ [CGROUP_IO_READ_BYTES] = "IOReadBytes",
+ [CGROUP_IO_WRITE_BYTES] = "IOWriteBytes",
+ [CGROUP_IO_READ_OPERATIONS] = "IOReadOperations",
+ [CGROUP_IO_WRITE_OPERATIONS] = "IOWriteOperations",
+ };
+
+ uint64_t value = UINT64_MAX;
+ Unit *u = userdata;
+ ssize_t metric;
+
+ assert(bus);
+ assert(reply);
+ assert(property);
+ assert(u);
+
+ assert_se((metric = string_table_lookup(table, ELEMENTSOF(table), property)) >= 0);
+ (void) unit_get_io_accounting(u, metric, false, &value);
+ return sd_bus_message_append(reply, "t", value);
+}
+
+int bus_unit_method_attach_processes(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ _cleanup_set_free_ Set *pids = NULL;
+ Unit *u = userdata;
+ const char *path;
+ int r;
+
+ assert(message);
+
+ /* This migrates the processes with the specified PIDs into the cgroup of this unit, optionally below a
+ * specified cgroup path. Obviously this only works for units that actually maintain a cgroup
+ * representation. If a process is already in the cgroup no operation is executed – in this case the specified
+ * subcgroup path has no effect! */
+
+ r = mac_selinux_unit_access_check(u, message, "start", error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "s", &path);
+ if (r < 0)
+ return r;
+
+ path = empty_to_null(path);
+ if (path) {
+ if (!path_is_absolute(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Control group path is not absolute: %s", path);
+
+ if (!path_is_normalized(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Control group path is not normalized: %s", path);
+ }
+
+ if (!unit_cgroup_delegate(u))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Process migration not available on non-delegated units.");
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(u)))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unit is not active, refusing.");
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID|SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(message, 'a', "u");
+ if (r < 0)
+ return r;
+ for (;;) {
+ uid_t process_uid, sender_uid;
+ uint32_t upid;
+ pid_t pid;
+
+ r = sd_bus_message_read(message, "u", &upid);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (upid == 0) {
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+ } else
+ pid = (uid_t) upid;
+
+ /* Filter out duplicates */
+ if (set_contains(pids, PID_TO_PTR(pid)))
+ continue;
+
+ /* Check if this process is suitable for attaching to this unit */
+ r = unit_pid_attachable(u, pid, error);
+ if (r < 0)
+ return r;
+
+ /* Let's query the sender's UID, so that we can make our security decisions */
+ r = sd_bus_creds_get_euid(creds, &sender_uid);
+ if (r < 0)
+ return r;
+
+ /* Let's validate security: if the sender is root, then all is OK. If the sender is any other unit,
+ * then the process' UID and the target unit's UID have to match the sender's UID */
+ if (sender_uid != 0 && sender_uid != getuid()) {
+ r = get_process_uid(pid, &process_uid);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to retrieve process UID: %m");
+
+ if (process_uid != sender_uid)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Process " PID_FMT " not owned by client's UID. Refusing.", pid);
+ if (process_uid != u->ref_uid)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Process " PID_FMT " not owned by target unit's UID. Refusing.", pid);
+ }
+
+ if (!pids) {
+ pids = set_new(NULL);
+ if (!pids)
+ return -ENOMEM;
+ }
+
+ r = set_put(pids, PID_TO_PTR(pid));
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ r = unit_attach_pids_to_cgroup(u, pids, path);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to attach processes to control group: %m");
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+const sd_bus_vtable bus_unit_cgroup_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Slice", "s", property_get_slice, 0, 0),
+ SD_BUS_PROPERTY("ControlGroup", "s", property_get_cgroup, 0, 0),
+ SD_BUS_PROPERTY("MemoryCurrent", "t", property_get_current_memory, 0, 0),
+ SD_BUS_PROPERTY("CPUUsageNSec", "t", property_get_cpu_usage, 0, 0),
+ SD_BUS_PROPERTY("EffectiveCPUs", "ay", property_get_cpuset_cpus, 0, 0),
+ SD_BUS_PROPERTY("EffectiveMemoryNodes", "ay", property_get_cpuset_mems, 0, 0),
+ SD_BUS_PROPERTY("TasksCurrent", "t", property_get_current_tasks, 0, 0),
+ SD_BUS_PROPERTY("IPIngressBytes", "t", property_get_ip_counter, 0, 0),
+ SD_BUS_PROPERTY("IPIngressPackets", "t", property_get_ip_counter, 0, 0),
+ SD_BUS_PROPERTY("IPEgressBytes", "t", property_get_ip_counter, 0, 0),
+ SD_BUS_PROPERTY("IPEgressPackets", "t", property_get_ip_counter, 0, 0),
+ SD_BUS_PROPERTY("IOReadBytes", "t", property_get_io_counter, 0, 0),
+ SD_BUS_PROPERTY("IOReadOperations", "t", property_get_io_counter, 0, 0),
+ SD_BUS_PROPERTY("IOWriteBytes", "t", property_get_io_counter, 0, 0),
+ SD_BUS_PROPERTY("IOWriteOperations", "t", property_get_io_counter, 0, 0),
+
+ SD_BUS_METHOD_WITH_NAMES("GetProcesses",
+ NULL,,
+ "a(sus)",
+ SD_BUS_PARAM(processes),
+ bus_unit_method_get_processes,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_METHOD_WITH_NAMES("AttachProcesses",
+ "sau",
+ SD_BUS_PARAM(subcgroup)
+ SD_BUS_PARAM(pids),
+ NULL,,
+ bus_unit_method_attach_processes,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_VTABLE_END
+};
+
+static int send_new_signal(sd_bus *bus, void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *p = NULL;
+ Unit *u = userdata;
+ int r;
+
+ assert(bus);
+ assert(u);
+
+ p = unit_dbus_path(u);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_new_signal(
+ bus,
+ &m,
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "UnitNew");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "so", u->id, p);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+static int send_changed_signal(sd_bus *bus, void *userdata) {
+ _cleanup_free_ char *p = NULL;
+ Unit *u = userdata;
+ int r;
+
+ assert(bus);
+ assert(u);
+
+ p = unit_dbus_path(u);
+ if (!p)
+ return -ENOMEM;
+
+ /* Send a properties changed signal. First for the specific
+ * type, then for the generic unit. The clients may rely on
+ * this order to get atomic behavior if needed. */
+
+ r = sd_bus_emit_properties_changed_strv(
+ bus, p,
+ unit_dbus_interface_from_type(u->type),
+ NULL);
+ if (r < 0)
+ return r;
+
+ return sd_bus_emit_properties_changed_strv(
+ bus, p,
+ "org.freedesktop.systemd1.Unit",
+ NULL);
+}
+
+void bus_unit_send_change_signal(Unit *u) {
+ int r;
+ assert(u);
+
+ if (u->in_dbus_queue) {
+ LIST_REMOVE(dbus_queue, u->manager->dbus_unit_queue, u);
+ u->in_dbus_queue = false;
+ }
+
+ if (!u->id)
+ return;
+
+ r = bus_foreach_bus(u->manager, u->bus_track, u->sent_dbus_new_signal ? send_changed_signal : send_new_signal, u);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to send unit change signal for %s: %m", u->id);
+
+ u->sent_dbus_new_signal = true;
+}
+
+void bus_unit_send_pending_change_signal(Unit *u, bool including_new) {
+
+ /* Sends out any pending change signals, but only if they really are pending. This call is used when we are
+ * about to change state in order to force out a PropertiesChanged signal beforehand if there was one pending
+ * so that clients can follow the full state transition */
+
+ if (!u->in_dbus_queue) /* If not enqueued, don't bother */
+ return;
+
+ if (!u->sent_dbus_new_signal && !including_new) /* If the unit was never announced, don't bother, it's fine if
+ * the unit appears in the new state right-away (except if the
+ * caller explicitly asked us to send it anyway) */
+ return;
+
+ if (MANAGER_IS_RELOADING(u->manager)) /* Don't generate unnecessary PropertiesChanged signals for the same unit
+ * when we are reloading. */
+ return;
+
+ bus_unit_send_change_signal(u);
+}
+
+int bus_unit_send_pending_freezer_message(Unit *u) {
+ int r;
+
+ assert(u);
+
+ if (!u->pending_freezer_message)
+ return 0;
+
+ r = sd_bus_send(NULL, u->pending_freezer_message, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to send queued message, ignoring: %m");
+
+ u->pending_freezer_message = sd_bus_message_unref(u->pending_freezer_message);
+
+ return 0;
+}
+
+static int send_removed_signal(sd_bus *bus, void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *p = NULL;
+ Unit *u = userdata;
+ int r;
+
+ assert(bus);
+ assert(u);
+
+ p = unit_dbus_path(u);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_new_signal(
+ bus,
+ &m,
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "UnitRemoved");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "so", u->id, p);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+void bus_unit_send_removed_signal(Unit *u) {
+ int r;
+ assert(u);
+
+ if (!u->sent_dbus_new_signal || u->in_dbus_queue)
+ bus_unit_send_change_signal(u);
+
+ if (!u->id)
+ return;
+
+ r = bus_foreach_bus(u->manager, u->bus_track, send_removed_signal, u);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to send unit remove signal for %s: %m", u->id);
+}
+
+int bus_unit_queue_job(
+ sd_bus_message *message,
+ Unit *u,
+ JobType type,
+ JobMode mode,
+ BusUnitQueueFlags flags,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *job_path = NULL, *unit_path = NULL;
+ _cleanup_set_free_ Set *affected = NULL;
+ Job *j, *a;
+ int r;
+
+ assert(message);
+ assert(u);
+ assert(type >= 0 && type < _JOB_TYPE_MAX);
+ assert(mode >= 0 && mode < _JOB_MODE_MAX);
+
+ r = mac_selinux_unit_access_check(
+ u, message,
+ job_type_to_access_method(type),
+ error);
+ if (r < 0)
+ return r;
+
+ if (FLAGS_SET(flags, BUS_UNIT_QUEUE_RELOAD_IF_POSSIBLE) && unit_can_reload(u)) {
+ if (type == JOB_RESTART)
+ type = JOB_RELOAD_OR_START;
+ else if (type == JOB_TRY_RESTART)
+ type = JOB_TRY_RELOAD;
+ }
+
+ if (type == JOB_STOP &&
+ IN_SET(u->load_state, UNIT_NOT_FOUND, UNIT_ERROR, UNIT_BAD_SETTING) &&
+ unit_active_state(u) == UNIT_INACTIVE)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Unit %s not loaded.", u->id);
+
+ if ((type == JOB_START && u->refuse_manual_start) ||
+ (type == JOB_STOP && u->refuse_manual_stop) ||
+ (IN_SET(type, JOB_RESTART, JOB_TRY_RESTART) && (u->refuse_manual_start || u->refuse_manual_stop)) ||
+ (type == JOB_RELOAD_OR_START && job_type_collapse(type, u) == JOB_START && u->refuse_manual_start))
+ return sd_bus_error_setf(error, BUS_ERROR_ONLY_BY_DEPENDENCY, "Operation refused, unit %s may be requested by dependency only (it is configured to refuse manual start/stop).", u->id);
+
+ if (FLAGS_SET(flags, BUS_UNIT_QUEUE_VERBOSE_REPLY)) {
+ affected = set_new(NULL);
+ if (!affected)
+ return -ENOMEM;
+ }
+
+ r = manager_add_job(u->manager, type, u, mode, affected, error, &j);
+ if (r < 0)
+ return r;
+
+ r = bus_job_track_sender(j, message);
+ if (r < 0)
+ return r;
+
+ /* Before we send the method reply, force out the announcement JobNew for this job */
+ bus_job_send_pending_change_signal(j, true);
+
+ job_path = job_dbus_path(j);
+ if (!job_path)
+ return -ENOMEM;
+
+ /* The classic response is just a job object path */
+ if (!FLAGS_SET(flags, BUS_UNIT_QUEUE_VERBOSE_REPLY))
+ return sd_bus_reply_method_return(message, "o", job_path);
+
+ /* In verbose mode respond with the anchor job plus everything that has been affected */
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ unit_path = unit_dbus_path(j->unit);
+ if (!unit_path)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "uosos",
+ j->id, job_path,
+ j->unit->id, unit_path,
+ job_type_to_string(j->type));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(uosos)");
+ if (r < 0)
+ return r;
+
+ SET_FOREACH(a, affected) {
+
+ if (a->id == j->id)
+ continue;
+
+ /* Free paths from previous iteration */
+ job_path = mfree(job_path);
+ unit_path = mfree(unit_path);
+
+ job_path = job_dbus_path(a);
+ if (!job_path)
+ return -ENOMEM;
+
+ unit_path = unit_dbus_path(a->unit);
+ if (!unit_path)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "(uosos)",
+ a->id, job_path,
+ a->unit->id, unit_path,
+ job_type_to_string(a->type));
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int bus_unit_set_live_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(u);
+ assert(name);
+ assert(message);
+
+ /* Handles setting properties both "live" (i.e. at any time during runtime), and during creation (for transient
+ * units that are being created). */
+
+ if (streq(name, "Description")) {
+ const char *d;
+
+ r = sd_bus_message_read(message, "s", &d);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = unit_set_description(u, d);
+ if (r < 0)
+ return r;
+
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "Description=%s", d);
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_set_transient_emergency_action(
+ Unit *u,
+ const char *name,
+ EmergencyAction *p,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ const char *s;
+ EmergencyAction v;
+ int r;
+ bool system;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "s", &s);
+ if (r < 0)
+ return r;
+
+ system = MANAGER_IS_SYSTEM(u->manager);
+ r = parse_emergency_action(s, system, &v);
+ if (r < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ r == -EOPNOTSUPP ? "%s setting invalid for manager type: %s"
+ : "Invalid %s setting: %s",
+ name, s);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ *p = v;
+ unit_write_settingf(u, flags, name,
+ "%s=%s", name, s);
+ }
+
+ return 1;
+}
+
+static int bus_set_transient_exit_status(
+ Unit *u,
+ const char *name,
+ int *p,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ int32_t k;
+ int r;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "i", &k);
+ if (r < 0)
+ return r;
+
+ if (k > 255)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Exit status must be in range 0…255 or negative.");
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ *p = k < 0 ? -1 : k;
+
+ if (k < 0)
+ unit_write_settingf(u, flags, name, "%s=", name);
+ else
+ unit_write_settingf(u, flags, name, "%s=%i", name, k);
+ }
+
+ return 1;
+}
+
+static BUS_DEFINE_SET_TRANSIENT_PARSE(collect_mode, CollectMode, collect_mode_from_string);
+static BUS_DEFINE_SET_TRANSIENT_PARSE(job_mode, JobMode, job_mode_from_string);
+
+static int bus_set_transient_conditions(
+ Unit *u,
+ const char *name,
+ Condition **list,
+ bool is_condition,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ const char *type_name, *param;
+ int trigger, negate, r;
+ bool empty = true;
+
+ assert(list);
+
+ r = sd_bus_message_enter_container(message, 'a', "(sbbs)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(sbbs)", &type_name, &trigger, &negate, &param)) > 0) {
+ ConditionType t;
+
+ t = is_condition ? condition_type_from_string(type_name) : assert_type_from_string(type_name);
+ if (t < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid condition type: %s", type_name);
+
+ if (isempty(param))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Condition parameter in %s is empty", type_name);
+
+ if (condition_takes_path(t) && !path_is_absolute(param))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path in condition %s is not absolute: %s", type_name, param);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ Condition *c;
+
+ c = condition_new(t, param, trigger, negate);
+ if (!c)
+ return -ENOMEM;
+
+ LIST_PREPEND(conditions, *list, c);
+
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name,
+ "%s=%s%s%s", type_name,
+ trigger ? "|" : "", negate ? "!" : "", param);
+ }
+
+ empty = false;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags) && empty) {
+ *list = condition_free_list(*list);
+ unit_write_settingf(u, flags, name, "%sNull=", is_condition ? "Condition" : "Assert");
+ }
+
+ return 1;
+}
+
+static int bus_unit_set_transient_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ UnitDependency d = _UNIT_DEPENDENCY_INVALID;
+ int r;
+
+ assert(u);
+ assert(name);
+ assert(message);
+
+ /* Handles settings when transient units are created. This settings cannot be altered anymore after the unit
+ * has been created. */
+
+ if (streq(name, "SourcePath"))
+ return bus_set_transient_path(u, name, &u->source_path, message, flags, error);
+
+ if (streq(name, "StopWhenUnneeded"))
+ return bus_set_transient_bool(u, name, &u->stop_when_unneeded, message, flags, error);
+
+ if (streq(name, "RefuseManualStart"))
+ return bus_set_transient_bool(u, name, &u->refuse_manual_start, message, flags, error);
+
+ if (streq(name, "RefuseManualStop"))
+ return bus_set_transient_bool(u, name, &u->refuse_manual_stop, message, flags, error);
+
+ if (streq(name, "AllowIsolate"))
+ return bus_set_transient_bool(u, name, &u->allow_isolate, message, flags, error);
+
+ if (streq(name, "DefaultDependencies"))
+ return bus_set_transient_bool(u, name, &u->default_dependencies, message, flags, error);
+
+ if (streq(name, "OnFailureJobMode"))
+ return bus_set_transient_job_mode(u, name, &u->on_failure_job_mode, message, flags, error);
+
+ if (streq(name, "IgnoreOnIsolate"))
+ return bus_set_transient_bool(u, name, &u->ignore_on_isolate, message, flags, error);
+
+ if (streq(name, "JobTimeoutUSec")) {
+ r = bus_set_transient_usec_fix_0(u, name, &u->job_timeout, message, flags, error);
+ if (r >= 0 && !UNIT_WRITE_FLAGS_NOOP(flags) && !u->job_running_timeout_set)
+ u->job_running_timeout = u->job_timeout;
+ }
+
+ if (streq(name, "JobRunningTimeoutUSec")) {
+ r = bus_set_transient_usec_fix_0(u, name, &u->job_running_timeout, message, flags, error);
+ if (r >= 0 && !UNIT_WRITE_FLAGS_NOOP(flags))
+ u->job_running_timeout_set = true;
+
+ return r;
+ }
+
+ if (streq(name, "JobTimeoutAction"))
+ return bus_set_transient_emergency_action(u, name, &u->job_timeout_action, message, flags, error);
+
+ if (streq(name, "JobTimeoutRebootArgument"))
+ return bus_set_transient_string(u, name, &u->job_timeout_reboot_arg, message, flags, error);
+
+ if (streq(name, "StartLimitIntervalUSec"))
+ return bus_set_transient_usec(u, name, &u->start_ratelimit.interval, message, flags, error);
+
+ if (streq(name, "StartLimitBurst"))
+ return bus_set_transient_unsigned(u, name, &u->start_ratelimit.burst, message, flags, error);
+
+ if (streq(name, "StartLimitAction"))
+ return bus_set_transient_emergency_action(u, name, &u->start_limit_action, message, flags, error);
+
+ if (streq(name, "FailureAction"))
+ return bus_set_transient_emergency_action(u, name, &u->failure_action, message, flags, error);
+
+ if (streq(name, "SuccessAction"))
+ return bus_set_transient_emergency_action(u, name, &u->success_action, message, flags, error);
+
+ if (streq(name, "FailureActionExitStatus"))
+ return bus_set_transient_exit_status(u, name, &u->failure_action_exit_status, message, flags, error);
+
+ if (streq(name, "SuccessActionExitStatus"))
+ return bus_set_transient_exit_status(u, name, &u->success_action_exit_status, message, flags, error);
+
+ if (streq(name, "RebootArgument"))
+ return bus_set_transient_string(u, name, &u->reboot_arg, message, flags, error);
+
+ if (streq(name, "CollectMode"))
+ return bus_set_transient_collect_mode(u, name, &u->collect_mode, message, flags, error);
+
+ if (streq(name, "Conditions"))
+ return bus_set_transient_conditions(u, name, &u->conditions, true, message, flags, error);
+
+ if (streq(name, "Asserts"))
+ return bus_set_transient_conditions(u, name, &u->asserts, false, message, flags, error);
+
+ if (streq(name, "Documentation")) {
+ _cleanup_strv_free_ char **l = NULL;
+ char **p;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, l) {
+ if (!documentation_url_is_valid(*p))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid URL in %s: %s", name, *p);
+ }
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ if (strv_isempty(l)) {
+ u->documentation = strv_free(u->documentation);
+ unit_write_settingf(u, flags, name, "%s=", name);
+ } else {
+ strv_extend_strv(&u->documentation, l, false);
+
+ STRV_FOREACH(p, l)
+ unit_write_settingf(u, flags, name, "%s=%s", name, *p);
+ }
+ }
+
+ return 1;
+
+ } else if (streq(name, "Slice")) {
+ Unit *slice;
+ const char *s;
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "The slice property is only available for units with control groups.");
+ if (u->type == UNIT_SLICE)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Slice may not be set for slice units.");
+ if (unit_has_name(u, SPECIAL_INIT_SCOPE))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Cannot set slice for init.scope");
+
+ r = sd_bus_message_read(message, "s", &s);
+ if (r < 0)
+ return r;
+
+ if (!unit_name_is_valid(s, UNIT_NAME_PLAIN))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid unit name '%s'", s);
+
+ /* Note that we do not dispatch the load queue here yet, as we don't want our own transient unit to be
+ * loaded while we are still setting it up. Or in other words, we use manager_load_unit_prepare()
+ * instead of manager_load_unit() on purpose, here. */
+ r = manager_load_unit_prepare(u->manager, s, NULL, error, &slice);
+ if (r < 0)
+ return r;
+
+ if (slice->type != UNIT_SLICE)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unit name '%s' is not a slice", s);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = unit_set_slice(u, slice);
+ if (r < 0)
+ return r;
+
+ unit_write_settingf(u, flags|UNIT_PRIVATE, name, "Slice=%s", s);
+ }
+
+ return 1;
+
+ } else if (streq(name, "RequiresMountsFor")) {
+ _cleanup_strv_free_ char **l = NULL;
+ char **p;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, l) {
+ path_simplify(*p, true);
+
+ if (!path_is_absolute(*p))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path specified in %s is not absolute: %s", name, *p);
+
+ if (!path_is_valid(*p))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path specified in %s has invalid length: %s", name, *p);
+
+ if (!path_is_normalized(*p))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path specified in %s is not normalized: %s", name, *p);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = unit_require_mounts_for(u, *p, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Failed to add required mount \"%s\": %m", *p);
+
+ unit_write_settingf(u, flags, name, "%s=%s", name, *p);
+ }
+ }
+
+ return 1;
+ }
+
+ if (streq(name, "RequiresOverridable"))
+ d = UNIT_REQUIRES; /* redirect for obsolete unit dependency type */
+ else if (streq(name, "RequisiteOverridable"))
+ d = UNIT_REQUISITE; /* same here */
+ else
+ d = unit_dependency_from_string(name);
+
+ if (d >= 0) {
+ const char *other;
+
+ if (!IN_SET(d,
+ UNIT_REQUIRES,
+ UNIT_REQUISITE,
+ UNIT_WANTS,
+ UNIT_BINDS_TO,
+ UNIT_PART_OF,
+ UNIT_CONFLICTS,
+ UNIT_BEFORE,
+ UNIT_AFTER,
+ UNIT_ON_FAILURE,
+ UNIT_PROPAGATES_RELOAD_TO,
+ UNIT_RELOAD_PROPAGATED_FROM,
+ UNIT_JOINS_NAMESPACE_OF))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Dependency type %s may not be created transiently.", unit_dependency_to_string(d));
+
+ r = sd_bus_message_enter_container(message, 'a', "s");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "s", &other)) > 0) {
+ if (!unit_name_is_valid(other, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid unit name %s", other);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *label = NULL;
+
+ r = unit_add_dependency_by_name(u, d, other, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+
+ label = strjoin(name, "-", other);
+ if (!label)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags, label, "%s=%s", unit_dependency_to_string(d), other);
+ }
+
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ return 1;
+
+ } else if (streq(name, "AddRef")) {
+
+ int b;
+
+ /* Why is this called "AddRef" rather than just "Ref", or "Reference"? There's already a "Ref()" method
+ * on the Unit interface, and it's probably not a good idea to expose a property and a method on the
+ * same interface (well, strictly speaking AddRef isn't exposed as full property, we just read it for
+ * transient units, but still). And "References" and "ReferencedBy" is already used as unit reference
+ * dependency type, hence let's not confuse things with that.
+ *
+ * Note that we don't actually add the reference to the bus track. We do that only after the setup of
+ * the transient unit is complete, so that setting this property multiple times in the same transient
+ * unit creation call doesn't count as individual references. */
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags))
+ u->bus_track_add = b;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int bus_unit_set_properties(
+ Unit *u,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ bool commit,
+ sd_bus_error *error) {
+
+ bool for_real = false;
+ unsigned n = 0;
+ int r;
+
+ assert(u);
+ assert(message);
+
+ /* We iterate through the array twice. First run we just check
+ * if all passed data is valid, second run actually applies
+ * it. This is to implement transaction-like behaviour without
+ * actually providing full transactions. */
+
+ r = sd_bus_message_enter_container(message, 'a', "(sv)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *name;
+ UnitWriteFlags f;
+
+ r = sd_bus_message_enter_container(message, 'r', "sv");
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (for_real || UNIT_WRITE_FLAGS_NOOP(flags))
+ break;
+
+ /* Reached EOF. Let's try again, and this time for realz... */
+ r = sd_bus_message_rewind(message, false);
+ if (r < 0)
+ return r;
+
+ for_real = true;
+ continue;
+ }
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_VTABLE(u)->bus_set_property)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_PROPERTY_READ_ONLY, "Objects of this type do not support setting properties.");
+
+ r = sd_bus_message_enter_container(message, 'v', NULL);
+ if (r < 0)
+ return r;
+
+ /* If not for real, then mask out the two target flags */
+ f = for_real ? flags : (flags & ~(UNIT_RUNTIME|UNIT_PERSISTENT));
+
+ r = UNIT_VTABLE(u)->bus_set_property(u, name, message, f, error);
+ if (r == 0 && u->transient && u->load_state == UNIT_STUB)
+ r = bus_unit_set_transient_property(u, name, message, f, error);
+ if (r == 0)
+ r = bus_unit_set_live_property(u, name, message, f, error);
+ if (r < 0)
+ return r;
+
+ if (r == 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_PROPERTY_READ_ONLY, "Cannot set property %s, or unknown property.", name);
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ n += for_real;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (commit && n > 0 && UNIT_VTABLE(u)->bus_commit_properties)
+ UNIT_VTABLE(u)->bus_commit_properties(u);
+
+ return n;
+}
+
+int bus_unit_validate_load_state(Unit *u, sd_bus_error *error) {
+ assert(u);
+
+ /* Generates a pretty error if a unit isn't properly loaded. */
+
+ switch (u->load_state) {
+
+ case UNIT_LOADED:
+ return 0;
+
+ case UNIT_NOT_FOUND:
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Unit %s not found.", u->id);
+
+ case UNIT_BAD_SETTING:
+ return sd_bus_error_setf(error, BUS_ERROR_BAD_UNIT_SETTING, "Unit %s has a bad unit file setting.", u->id);
+
+ case UNIT_ERROR: /* Only show .load_error in UNIT_ERROR state */
+ return sd_bus_error_set_errnof(error, u->load_error, "Unit %s failed to load properly: %m.", u->id);
+
+ case UNIT_MASKED:
+ return sd_bus_error_setf(error, BUS_ERROR_UNIT_MASKED, "Unit %s is masked.", u->id);
+
+ case UNIT_STUB:
+ case UNIT_MERGED:
+ default:
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Unexpected load state of unit %s", u->id);
+ }
+}
+
+static int bus_unit_track_handler(sd_bus_track *t, void *userdata) {
+ Unit *u = userdata;
+
+ assert(t);
+ assert(u);
+
+ u->bus_track = sd_bus_track_unref(u->bus_track); /* make sure we aren't called again */
+
+ /* If the client that tracks us disappeared, then there's reason to believe that the cgroup is empty now too,
+ * let's see */
+ unit_add_to_cgroup_empty_queue(u);
+
+ /* Also add the unit to the GC queue, after all if the client left it might be time to GC this unit */
+ unit_add_to_gc_queue(u);
+
+ return 0;
+}
+
+static int bus_unit_allocate_bus_track(Unit *u) {
+ int r;
+
+ assert(u);
+
+ if (u->bus_track)
+ return 0;
+
+ r = sd_bus_track_new(u->manager->api_bus, &u->bus_track, bus_unit_track_handler, u);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_track_set_recursive(u->bus_track, true);
+ if (r < 0) {
+ u->bus_track = sd_bus_track_unref(u->bus_track);
+ return r;
+ }
+
+ return 0;
+}
+
+int bus_unit_track_add_name(Unit *u, const char *name) {
+ int r;
+
+ assert(u);
+
+ r = bus_unit_allocate_bus_track(u);
+ if (r < 0)
+ return r;
+
+ return sd_bus_track_add_name(u->bus_track, name);
+}
+
+int bus_unit_track_add_sender(Unit *u, sd_bus_message *m) {
+ int r;
+
+ assert(u);
+
+ r = bus_unit_allocate_bus_track(u);
+ if (r < 0)
+ return r;
+
+ return sd_bus_track_add_sender(u->bus_track, m);
+}
+
+int bus_unit_track_remove_sender(Unit *u, sd_bus_message *m) {
+ assert(u);
+
+ /* If we haven't allocated the bus track object yet, then there's definitely no reference taken yet, return an
+ * error */
+ if (!u->bus_track)
+ return -EUNATCH;
+
+ return sd_bus_track_remove_sender(u->bus_track, m);
+}
diff --git a/src/core/dbus-unit.h b/src/core/dbus-unit.h
new file mode 100644
index 0000000..1da3cfe
--- /dev/null
+++ b/src/core/dbus-unit.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "unit.h"
+
+extern const sd_bus_vtable bus_unit_vtable[];
+extern const sd_bus_vtable bus_unit_cgroup_vtable[];
+
+void bus_unit_send_change_signal(Unit *u);
+void bus_unit_send_pending_change_signal(Unit *u, bool including_new);
+int bus_unit_send_pending_freezer_message(Unit *u);
+void bus_unit_send_removed_signal(Unit *u);
+
+int bus_unit_method_start_generic(sd_bus_message *message, Unit *u, JobType job_type, bool reload_if_possible, sd_bus_error *error);
+int bus_unit_method_enqueue_job(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus_error *error);
+
+int bus_unit_set_properties(Unit *u, sd_bus_message *message, UnitWriteFlags flags, bool commit, sd_bus_error *error);
+int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_get_processes(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_attach_processes(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_clean(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_freeze(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_thaw(sd_bus_message *message, void *userdata, sd_bus_error *error);
+
+typedef enum BusUnitQueueFlags {
+ BUS_UNIT_QUEUE_RELOAD_IF_POSSIBLE = 1 << 0,
+ BUS_UNIT_QUEUE_VERBOSE_REPLY = 1 << 1,
+} BusUnitQueueFlags;
+
+int bus_unit_queue_job(sd_bus_message *message, Unit *u, JobType type, JobMode mode, BusUnitQueueFlags flags, sd_bus_error *error);
+int bus_unit_validate_load_state(Unit *u, sd_bus_error *error);
+
+int bus_unit_track_add_name(Unit *u, const char *name);
+int bus_unit_track_add_sender(Unit *u, sd_bus_message *m);
+int bus_unit_track_remove_sender(Unit *u, sd_bus_message *m);
diff --git a/src/core/dbus-util.c b/src/core/dbus-util.c
new file mode 100644
index 0000000..d6223db
--- /dev/null
+++ b/src/core/dbus-util.c
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-util.h"
+#include "dbus-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "unit-printf.h"
+#include "user-util.h"
+#include "unit.h"
+
+int bus_property_get_triggered_unit(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Unit *u = userdata, *trigger;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ trigger = UNIT_TRIGGER(u);
+
+ return sd_bus_message_append(reply, "s", trigger ? trigger->id : NULL);
+}
+
+BUS_DEFINE_SET_TRANSIENT(mode_t, "u", uint32_t, mode_t, "%040o");
+BUS_DEFINE_SET_TRANSIENT(unsigned, "u", uint32_t, unsigned, "%" PRIu32);
+
+static inline bool valid_user_group_name_or_id_relaxed(const char *u) {
+ return valid_user_group_name(u, VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX);
+}
+
+BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(user_relaxed, valid_user_group_name_or_id_relaxed);
+BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(path, path_is_absolute);
+
+int bus_set_transient_string(
+ Unit *u,
+ const char *name,
+ char **p,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ const char *v;
+ int r;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "s", &v);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ r = free_and_strdup(p, empty_to_null(v));
+ if (r < 0)
+ return r;
+
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name,
+ "%s=%s", name, strempty(v));
+ }
+
+ return 1;
+}
+
+int bus_set_transient_bool(
+ Unit *u,
+ const char *name,
+ bool *p,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ int v, r;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "b", &v);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ *p = v;
+ unit_write_settingf(u, flags, name, "%s=%s", name, yes_no(v));
+ }
+
+ return 1;
+}
+
+int bus_set_transient_percent(
+ Unit *u,
+ const char *name,
+ int *p,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ const char *v;
+ int r;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "s", &v);
+ if (r < 0)
+ return r;
+
+ r = parse_percent(v);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ *p = r;
+ unit_write_settingf(u, flags, name, "%s=%d%%", name, r);
+ }
+
+ return 1;
+}
+
+int bus_set_transient_usec_internal(
+ Unit *u,
+ const char *name,
+ usec_t *p,
+ bool fix_0,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ uint64_t v;
+ int r;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "t", &v);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ char *n, ts[FORMAT_TIMESPAN_MAX];
+
+ if (fix_0)
+ *p = v != 0 ? v: USEC_INFINITY;
+ else
+ *p = v;
+
+ n = strndupa(name, strlen(name) - 4);
+ unit_write_settingf(u, flags, name, "%sSec=%s", n,
+ format_timespan(ts, sizeof(ts), v, USEC_PER_MSEC));
+ }
+
+ return 1;
+}
diff --git a/src/core/dbus-util.h b/src/core/dbus-util.h
new file mode 100644
index 0000000..4e7c68e
--- /dev/null
+++ b/src/core/dbus-util.h
@@ -0,0 +1,250 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "unit.h"
+
+int bus_property_get_triggered_unit(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+
+#define BUS_DEFINE_SET_TRANSIENT(function, bus_type, type, cast_type, fmt) \
+ int bus_set_transient_##function( \
+ Unit *u, \
+ const char *name, \
+ cast_type *p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ type v; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, bus_type, &v); \
+ if (r < 0) \
+ return r; \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ *p = (cast_type) v; \
+ unit_write_settingf(u, flags, name, \
+ "%s=" fmt, name, v); \
+ } \
+ \
+ return 1; \
+ }
+
+#define BUS_DEFINE_SET_TRANSIENT_IS_VALID(function, bus_type, type, cast_type, fmt, check) \
+ int bus_set_transient_##function( \
+ Unit *u, \
+ const char *name, \
+ cast_type *p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ type v; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, bus_type, &v); \
+ if (r < 0) \
+ return r; \
+ \
+ if (!check(v)) \
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, \
+ "Invalid %s setting: " fmt, name, v); \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ *p = (cast_type) v; \
+ unit_write_settingf(u, flags, name, \
+ "%s=" fmt, name, v); \
+ } \
+ \
+ return 1; \
+ }
+
+#define BUS_DEFINE_SET_TRANSIENT_TO_STRING(function, bus_type, type, cast_type, fmt, to_string) \
+ int bus_set_transient_##function( \
+ Unit *u, \
+ const char *name, \
+ cast_type *p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ const char *s; \
+ type v; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, bus_type, &v); \
+ if (r < 0) \
+ return r; \
+ \
+ s = to_string(v); \
+ if (!s) \
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, \
+ "Invalid %s setting: " fmt, name, v); \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ *p = (cast_type) v; \
+ unit_write_settingf(u, flags, name, \
+ "%s=%s", name, s); \
+ } \
+ \
+ return 1; \
+ }
+
+#define BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(function, bus_type, type, cast_type, fmt, to_string) \
+ int bus_set_transient_##function( \
+ Unit *u, \
+ const char *name, \
+ cast_type *p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ _cleanup_free_ char *s = NULL; \
+ type v; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, bus_type, &v); \
+ if (r < 0) \
+ return r; \
+ \
+ r = to_string(v, &s); \
+ if (r == -EINVAL) \
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, \
+ "Invalid %s setting: " fmt, name, v); \
+ if (r < 0) \
+ return r; \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ *p = (cast_type) v; \
+ unit_write_settingf(u, flags, name, \
+ "%s=%s", \
+ name, strempty(s)); \
+ } \
+ \
+ return 1; \
+ }
+
+#define BUS_DEFINE_SET_TRANSIENT_PARSE(function, type, parse) \
+ int bus_set_transient_##function( \
+ Unit *u, \
+ const char *name, \
+ type *p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ const char *s; \
+ type v; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, "s", &s); \
+ if (r < 0) \
+ return r; \
+ \
+ v = parse(s); \
+ if (v < 0) \
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, \
+ "Invalid %s setting: %s", name, s); \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ *p = v; \
+ unit_write_settingf(u, flags, name, \
+ "%s=%s", name, s); \
+ } \
+ \
+ return 1; \
+ }
+
+#define BUS_DEFINE_SET_TRANSIENT_PARSE_PTR(function, type, parse) \
+ int bus_set_transient_##function( \
+ Unit *u, \
+ const char *name, \
+ type *p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ const char *s; \
+ type v; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, "s", &s); \
+ if (r < 0) \
+ return r; \
+ \
+ r = parse(s, &v); \
+ if (r < 0) \
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, \
+ "Invalid %s setting: %s", name, s); \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ *p = v; \
+ unit_write_settingf(u, flags, name, \
+ "%s=%s", name, strempty(s)); \
+ } \
+ \
+ return 1; \
+ }
+
+#define BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(function, check) \
+ int bus_set_transient_##function( \
+ Unit *u, \
+ const char *name, \
+ char **p, \
+ sd_bus_message *message, \
+ UnitWriteFlags flags, \
+ sd_bus_error *error) { \
+ \
+ const char *v; \
+ int r; \
+ \
+ assert(p); \
+ \
+ r = sd_bus_message_read(message, "s", &v); \
+ if (r < 0) \
+ return r; \
+ \
+ if (!isempty(v) && !check(v)) \
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, \
+ "Invalid %s setting: %s", name, v); \
+ \
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { \
+ r = free_and_strdup(p, empty_to_null(v)); \
+ if (r < 0) \
+ return r; \
+ \
+ unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, \
+ "%s=%s", name, strempty(v)); \
+ } \
+ \
+ return 1; \
+ }
+
+int bus_set_transient_mode_t(Unit *u, const char *name, mode_t *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_set_transient_unsigned(Unit *u, const char *name, unsigned *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_set_transient_user_relaxed(Unit *u, const char *name, char **p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_set_transient_path(Unit *u, const char *name, char **p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_set_transient_string(Unit *u, const char *name, char **p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_set_transient_bool(Unit *u, const char *name, bool *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_set_transient_percent(Unit *u, const char *name, int *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_set_transient_usec_internal(Unit *u, const char *name, usec_t *p, bool fix_0, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+static inline int bus_set_transient_usec(Unit *u, const char *name, usec_t *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error) {
+ return bus_set_transient_usec_internal(u, name, p, false, message, flags, error);
+}
+static inline int bus_set_transient_usec_fix_0(Unit *u, const char *name, usec_t *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error) {
+ return bus_set_transient_usec_internal(u, name, p, true, message, flags, error);
+}
diff --git a/src/core/dbus.c b/src/core/dbus.c
new file mode 100644
index 0000000..3e435c9
--- /dev/null
+++ b/src/core/dbus.c
@@ -0,0 +1,1250 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-internal.h"
+#include "bus-polkit.h"
+#include "bus-util.h"
+#include "dbus-automount.h"
+#include "dbus-cgroup.h"
+#include "dbus-device.h"
+#include "dbus-execute.h"
+#include "dbus-job.h"
+#include "dbus-kill.h"
+#include "dbus-manager.h"
+#include "dbus-mount.h"
+#include "dbus-path.h"
+#include "dbus-scope.h"
+#include "dbus-service.h"
+#include "dbus-slice.h"
+#include "dbus-socket.h"
+#include "dbus-swap.h"
+#include "dbus-target.h"
+#include "dbus-timer.h"
+#include "dbus-unit.h"
+#include "dbus.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "mkdir.h"
+#include "process-util.h"
+#include "selinux-access.h"
+#include "serialize.h"
+#include "service.h"
+#include "special.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "user-util.h"
+
+#define CONNECTIONS_MAX 4096
+
+static void destroy_bus(Manager *m, sd_bus **bus);
+
+int bus_send_pending_reload_message(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (!m->pending_reload_message)
+ return 0;
+
+ /* If we cannot get rid of this message we won't dispatch any D-Bus messages, so that we won't end up wanting
+ * to queue another message. */
+
+ r = sd_bus_send(NULL, m->pending_reload_message, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to send queued message, ignoring: %m");
+
+ m->pending_reload_message = sd_bus_message_unref(m->pending_reload_message);
+
+ return 0;
+}
+
+int bus_forward_agent_released(Manager *m, const char *path) {
+ int r;
+
+ assert(m);
+ assert(path);
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return 0;
+
+ if (!m->system_bus)
+ return 0;
+
+ /* If we are running a system instance we forward the agent message on the system bus, so that the user
+ * instances get notified about this, too */
+
+ r = sd_bus_emit_signal(m->system_bus,
+ "/org/freedesktop/systemd1/agent",
+ "org.freedesktop.systemd1.Agent",
+ "Released",
+ "s", path);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to propagate agent release message: %m");
+
+ return 1;
+}
+
+static int signal_agent_released(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ Manager *m = userdata;
+ const char *cgroup;
+ uid_t sender_uid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* only accept org.freedesktop.systemd1.Agent from UID=0 */
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_euid(creds, &sender_uid);
+ if (r < 0 || sender_uid != 0)
+ return 0;
+
+ /* parse 'cgroup-empty' notification */
+ r = sd_bus_message_read(message, "s", &cgroup);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ manager_notify_cgroup_empty(m, cgroup);
+ return 0;
+}
+
+static int signal_disconnected(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ sd_bus *bus;
+
+ assert(message);
+ assert(m);
+ assert_se(bus = sd_bus_message_get_bus(message));
+
+ if (bus == m->api_bus)
+ bus_done_api(m);
+ if (bus == m->system_bus)
+ bus_done_system(m);
+
+ if (set_remove(m->private_buses, bus)) {
+ log_debug("Got disconnect on private connection.");
+ destroy_bus(m, &bus);
+ }
+
+ return 0;
+}
+
+static int signal_activation_request(sd_bus_message *message, void *userdata, sd_bus_error *ret_error) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ if (manager_unit_inactive_or_pending(m, SPECIAL_DBUS_SERVICE) ||
+ manager_unit_inactive_or_pending(m, SPECIAL_DBUS_SOCKET)) {
+ r = sd_bus_error_setf(&error, BUS_ERROR_SHUTTING_DOWN, "Refusing activation, D-Bus is shutting down.");
+ goto failed;
+ }
+
+ r = manager_load_unit(m, name, NULL, &error, &u);
+ if (r < 0)
+ goto failed;
+
+ if (u->refuse_manual_start) {
+ r = sd_bus_error_setf(&error, BUS_ERROR_ONLY_BY_DEPENDENCY, "Operation refused, %s may be requested by dependency only (it is configured to refuse manual start/stop).", u->id);
+ goto failed;
+ }
+
+ r = manager_add_job(m, JOB_START, u, JOB_REPLACE, NULL, &error, NULL);
+ if (r < 0)
+ goto failed;
+
+ /* Successfully queued, that's it for us */
+ return 0;
+
+failed:
+ if (!sd_bus_error_is_set(&error))
+ sd_bus_error_set_errno(&error, r);
+
+ log_debug("D-Bus activation failed for %s: %s", name, bus_error_message(&error, r));
+
+ r = sd_bus_message_new_signal(sd_bus_message_get_bus(message), &reply, "/org/freedesktop/systemd1", "org.freedesktop.systemd1.Activator", "ActivationFailure");
+ if (r < 0) {
+ bus_log_create_error(r);
+ return 0;
+ }
+
+ r = sd_bus_message_append(reply, "sss", name, error.name, error.message);
+ if (r < 0) {
+ bus_log_create_error(r);
+ return 0;
+ }
+
+ r = sd_bus_send_to(NULL, reply, "org.freedesktop.DBus", NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to respond with to bus activation request: %m");
+
+ return 0;
+}
+
+#if HAVE_SELINUX
+static int mac_selinux_filter(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *verb, *path;
+ Unit *u = NULL;
+ Job *j;
+ int r;
+
+ assert(message);
+
+ /* Our own method calls are all protected individually with
+ * selinux checks, but the built-in interfaces need to be
+ * protected too. */
+
+ if (sd_bus_message_is_method_call(message, "org.freedesktop.DBus.Properties", "Set"))
+ verb = "reload";
+ else if (sd_bus_message_is_method_call(message, "org.freedesktop.DBus.Introspectable", NULL) ||
+ sd_bus_message_is_method_call(message, "org.freedesktop.DBus.Properties", NULL) ||
+ sd_bus_message_is_method_call(message, "org.freedesktop.DBus.ObjectManager", NULL) ||
+ sd_bus_message_is_method_call(message, "org.freedesktop.DBus.Peer", NULL))
+ verb = "status";
+ else
+ return 0;
+
+ path = sd_bus_message_get_path(message);
+
+ if (object_path_startswith("/org/freedesktop/systemd1", path)) {
+ r = mac_selinux_access_check(message, verb, error);
+ if (r < 0)
+ return r;
+
+ return 0;
+ }
+
+ if (streq_ptr(path, "/org/freedesktop/systemd1/unit/self")) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ pid_t pid;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return 0;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return 0;
+
+ u = manager_get_unit_by_pid(m, pid);
+ } else {
+ r = manager_get_job_from_dbus_path(m, path, &j);
+ if (r >= 0)
+ u = j->unit;
+ else
+ manager_load_unit_from_dbus_path(m, path, NULL, &u);
+ }
+ if (!u)
+ return 0;
+
+ r = mac_selinux_unit_access_check(u, message, verb, error);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+#endif
+
+static int find_unit(Manager *m, sd_bus *bus, const char *path, Unit **unit, sd_bus_error *error) {
+ Unit *u = NULL; /* just to appease gcc, initialization is not really necessary */
+ int r;
+
+ assert(m);
+ assert(bus);
+ assert(path);
+
+ if (streq_ptr(path, "/org/freedesktop/systemd1/unit/self")) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ sd_bus_message *message;
+ pid_t pid;
+
+ message = sd_bus_get_current_message(bus);
+ if (!message)
+ return 0;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+
+ u = manager_get_unit_by_pid(m, pid);
+ if (!u)
+ return 0;
+ } else {
+ r = manager_load_unit_from_dbus_path(m, path, error, &u);
+ if (r < 0)
+ return 0;
+ assert(u);
+ }
+
+ *unit = u;
+ return 1;
+}
+
+static int bus_unit_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ return find_unit(m, bus, path, (Unit**) found, error);
+}
+
+static int bus_unit_interface_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ Unit *u;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ r = find_unit(m, bus, path, &u, error);
+ if (r <= 0)
+ return r;
+
+ if (!streq_ptr(interface, unit_dbus_interface_from_type(u->type)))
+ return 0;
+
+ *found = u;
+ return 1;
+}
+
+static int bus_unit_cgroup_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ Unit *u;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ r = find_unit(m, bus, path, &u, error);
+ if (r <= 0)
+ return r;
+
+ if (!streq_ptr(interface, unit_dbus_interface_from_type(u->type)))
+ return 0;
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return 0;
+
+ *found = u;
+ return 1;
+}
+
+static int bus_cgroup_context_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ CGroupContext *c;
+ Unit *u;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ r = find_unit(m, bus, path, &u, error);
+ if (r <= 0)
+ return r;
+
+ if (!streq_ptr(interface, unit_dbus_interface_from_type(u->type)))
+ return 0;
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return 0;
+
+ *found = c;
+ return 1;
+}
+
+static int bus_exec_context_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ ExecContext *c;
+ Unit *u;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ r = find_unit(m, bus, path, &u, error);
+ if (r <= 0)
+ return r;
+
+ if (!streq_ptr(interface, unit_dbus_interface_from_type(u->type)))
+ return 0;
+
+ c = unit_get_exec_context(u);
+ if (!c)
+ return 0;
+
+ *found = c;
+ return 1;
+}
+
+static int bus_kill_context_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ KillContext *c;
+ Unit *u;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ r = find_unit(m, bus, path, &u, error);
+ if (r <= 0)
+ return r;
+
+ if (!streq_ptr(interface, unit_dbus_interface_from_type(u->type)))
+ return 0;
+
+ c = unit_get_kill_context(u);
+ if (!c)
+ return 0;
+
+ *found = c;
+ return 1;
+}
+
+static int bus_unit_enumerate(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ unsigned k = 0;
+ Unit *u;
+
+ l = new0(char*, hashmap_size(m->units)+1);
+ if (!l)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(u, m->units) {
+ l[k] = unit_dbus_path(u);
+ if (!l[k])
+ return -ENOMEM;
+
+ k++;
+ }
+
+ *nodes = TAKE_PTR(l);
+
+ return k;
+}
+
+static const BusObjectImplementation unit_object = {
+ "/org/freedesktop/systemd1/unit",
+ "org.freedesktop.systemd1.Unit",
+ .fallback_vtables = BUS_FALLBACK_VTABLES(
+ { bus_unit_vtable, bus_unit_find }),
+ .node_enumerator = bus_unit_enumerate,
+};
+
+static const BusObjectImplementation bus_automount_object = {
+ "/org/freedesktop/systemd1/unit",
+ "org.freedesktop.systemd1.Automount",
+ .fallback_vtables = BUS_FALLBACK_VTABLES(
+ { bus_automount_vtable, bus_unit_interface_find }),
+};
+
+static const BusObjectImplementation bus_device_object = {
+ "/org/freedesktop/systemd1/unit",
+ "org.freedesktop.systemd1.Device",
+ .fallback_vtables = BUS_FALLBACK_VTABLES(
+ { bus_device_vtable, bus_unit_interface_find }),
+};
+
+static const BusObjectImplementation bus_mount_object = {
+ "/org/freedesktop/systemd1/unit",
+ "org.freedesktop.systemd1.Mount",
+ .fallback_vtables = BUS_FALLBACK_VTABLES(
+ { bus_mount_vtable, bus_unit_interface_find },
+ { bus_unit_cgroup_vtable, bus_unit_cgroup_find },
+ { bus_cgroup_vtable, bus_cgroup_context_find },
+ { bus_exec_vtable, bus_exec_context_find },
+ { bus_kill_vtable, bus_kill_context_find }),
+};
+
+static const BusObjectImplementation bus_path_object = {
+ "/org/freedesktop/systemd1/unit",
+ "org.freedesktop.systemd1.Path",
+ .fallback_vtables = BUS_FALLBACK_VTABLES(
+ { bus_path_vtable, bus_unit_interface_find }),
+};
+
+static const BusObjectImplementation bus_scope_object = {
+ "/org/freedesktop/systemd1/unit",
+ "org.freedesktop.systemd1.Scope",
+ .fallback_vtables = BUS_FALLBACK_VTABLES(
+ { bus_scope_vtable, bus_unit_interface_find },
+ { bus_unit_cgroup_vtable, bus_unit_cgroup_find },
+ { bus_cgroup_vtable, bus_cgroup_context_find },
+ { bus_kill_vtable, bus_kill_context_find }),
+};
+
+static const BusObjectImplementation bus_service_object = {
+ "/org/freedesktop/systemd1/unit",
+ "org.freedesktop.systemd1.Service",
+ .fallback_vtables = BUS_FALLBACK_VTABLES(
+ { bus_service_vtable, bus_unit_interface_find },
+ { bus_unit_cgroup_vtable, bus_unit_cgroup_find },
+ { bus_cgroup_vtable, bus_cgroup_context_find },
+ { bus_exec_vtable, bus_exec_context_find },
+ { bus_kill_vtable, bus_kill_context_find }),
+};
+
+static const BusObjectImplementation bus_slice_object = {
+ "/org/freedesktop/systemd1/unit",
+ "org.freedesktop.systemd1.Slice",
+ .fallback_vtables = BUS_FALLBACK_VTABLES(
+ { bus_slice_vtable, bus_unit_interface_find },
+ { bus_unit_cgroup_vtable, bus_unit_cgroup_find },
+ { bus_cgroup_vtable, bus_cgroup_context_find }),
+};
+
+static const BusObjectImplementation bus_socket_object = {
+ "/org/freedesktop/systemd1/unit",
+ "org.freedesktop.systemd1.Socket",
+ .fallback_vtables = BUS_FALLBACK_VTABLES(
+ { bus_socket_vtable, bus_unit_interface_find },
+ { bus_unit_cgroup_vtable, bus_unit_cgroup_find },
+ { bus_cgroup_vtable, bus_cgroup_context_find },
+ { bus_exec_vtable, bus_exec_context_find },
+ { bus_kill_vtable, bus_kill_context_find }),
+};
+
+static const BusObjectImplementation bus_swap_object = {
+ "/org/freedesktop/systemd1/unit",
+ "org.freedesktop.systemd1.Swap",
+ .fallback_vtables = BUS_FALLBACK_VTABLES(
+ { bus_swap_vtable, bus_unit_interface_find },
+ { bus_unit_cgroup_vtable, bus_unit_cgroup_find },
+ { bus_cgroup_vtable, bus_cgroup_context_find },
+ { bus_exec_vtable, bus_exec_context_find },
+ { bus_kill_vtable, bus_kill_context_find }),
+};
+
+static const BusObjectImplementation bus_target_object = {
+ "/org/freedesktop/systemd1/unit",
+ "org.freedesktop.systemd1.Target",
+ .fallback_vtables = BUS_FALLBACK_VTABLES(
+ { bus_target_vtable, bus_unit_interface_find }),
+};
+
+static const BusObjectImplementation bus_timer_object = {
+ "/org/freedesktop/systemd1/unit",
+ "org.freedesktop.systemd1.Timer",
+ .fallback_vtables = BUS_FALLBACK_VTABLES(
+ { bus_timer_vtable, bus_unit_interface_find }),
+};
+
+static const BusObjectImplementation bus_manager_object = {
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ .vtables = BUS_VTABLES(bus_manager_vtable),
+ .children = BUS_IMPLEMENTATIONS(
+ &job_object,
+ &unit_object,
+ &bus_automount_object,
+ &bus_device_object,
+ &bus_mount_object,
+ &bus_path_object,
+ &bus_scope_object,
+ &bus_service_object,
+ &bus_slice_object,
+ &bus_socket_object,
+ &bus_swap_object,
+ &bus_target_object,
+ &bus_timer_object),
+};
+
+static const BusObjectImplementation manager_log_control_object = {
+ "/org/freedesktop/LogControl1",
+ "org.freedesktop.LogControl1",
+ .vtables = BUS_VTABLES(bus_manager_log_control_vtable),
+};
+
+int bus_manager_introspect_implementations(FILE *out, const char *pattern) {
+ return bus_introspect_implementations(
+ out,
+ pattern,
+ BUS_IMPLEMENTATIONS(&bus_manager_object,
+ &manager_log_control_object));
+}
+
+static int bus_setup_api_vtables(Manager *m, sd_bus *bus) {
+ int r;
+
+ assert(m);
+ assert(bus);
+
+#if HAVE_SELINUX
+ r = sd_bus_add_filter(bus, NULL, mac_selinux_filter, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add SELinux access filter: %m");
+#endif
+
+ r = bus_add_implementation(bus, &bus_manager_object, m);
+ if (r < 0)
+ return r;
+
+ return bus_add_implementation(bus, &manager_log_control_object, m);
+}
+
+static int bus_setup_disconnected_match(Manager *m, sd_bus *bus) {
+ int r;
+
+ assert(m);
+ assert(bus);
+
+ r = sd_bus_match_signal_async(
+ bus,
+ NULL,
+ "org.freedesktop.DBus.Local",
+ "/org/freedesktop/DBus/Local",
+ "org.freedesktop.DBus.Local",
+ "Disconnected",
+ signal_disconnected, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match for Disconnected message: %m");
+
+ return 0;
+}
+
+static int bus_on_connection(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_close_ int nfd = -1;
+ Manager *m = userdata;
+ sd_id128_t id;
+ int r;
+
+ assert(s);
+ assert(m);
+
+ nfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (nfd < 0) {
+ if (ERRNO_IS_ACCEPT_AGAIN(errno))
+ return 0;
+
+ log_warning_errno(errno, "Failed to accept private connection, ignoring: %m");
+ return 0;
+ }
+
+ if (set_size(m->private_buses) >= CONNECTIONS_MAX) {
+ log_warning("Too many concurrent connections, refusing");
+ return 0;
+ }
+
+ r = set_ensure_allocated(&m->private_buses, NULL);
+ if (r < 0) {
+ log_oom();
+ return 0;
+ }
+
+ r = sd_bus_new(&bus);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to allocate new private connection bus: %m");
+ return 0;
+ }
+
+ (void) sd_bus_set_description(bus, "private-bus-connection");
+
+ r = sd_bus_set_fd(bus, nfd, nfd);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to set fd on new connection bus: %m");
+ return 0;
+ }
+
+ nfd = -1;
+
+ r = bus_check_peercred(bus);
+ if (r < 0) {
+ log_warning_errno(r, "Incoming private connection from unprivileged client, refusing: %m");
+ return 0;
+ }
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+
+ r = sd_bus_set_server(bus, 1, id);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to enable server support for new connection bus: %m");
+ return 0;
+ }
+
+ r = sd_bus_negotiate_creds(bus, 1,
+ SD_BUS_CREDS_PID|SD_BUS_CREDS_UID|
+ SD_BUS_CREDS_EUID|SD_BUS_CREDS_EFFECTIVE_CAPS|
+ SD_BUS_CREDS_SELINUX_CONTEXT);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to enable credentials for new connection: %m");
+ return 0;
+ }
+
+ r = sd_bus_set_sender(bus, "org.freedesktop.systemd1");
+ if (r < 0) {
+ log_warning_errno(r, "Failed to set direct connection sender: %m");
+ return 0;
+ }
+
+ r = sd_bus_start(bus);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to start new connection bus: %m");
+ return 0;
+ }
+
+ r = sd_bus_attach_event(bus, m->event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to attach new connection bus to event loop: %m");
+ return 0;
+ }
+
+ r = bus_setup_disconnected_match(m, bus);
+ if (r < 0)
+ return 0;
+
+ r = bus_setup_api_vtables(m, bus);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to set up API vtables on new connection bus: %m");
+ return 0;
+ }
+
+ r = set_put(m->private_buses, bus);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to add new connection bus to set: %m");
+ return 0;
+ }
+
+ bus = NULL;
+
+ log_debug("Accepted new private connection.");
+
+ return 0;
+}
+
+static int bus_setup_api(Manager *m, sd_bus *bus) {
+ char *name;
+ Unit *u;
+ int r;
+
+ assert(m);
+ assert(bus);
+
+ /* Let's make sure we have enough credential bits so that we can make security and selinux decisions */
+ r = sd_bus_negotiate_creds(bus, 1,
+ SD_BUS_CREDS_PID|SD_BUS_CREDS_UID|
+ SD_BUS_CREDS_EUID|SD_BUS_CREDS_EFFECTIVE_CAPS|
+ SD_BUS_CREDS_SELINUX_CONTEXT);
+ if (r < 0)
+ log_warning_errno(r, "Failed to enable credential passing, ignoring: %m");
+
+ r = bus_setup_api_vtables(m, bus);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH_KEY(u, name, m->watch_bus) {
+ r = unit_install_bus_match(u, bus, name);
+ if (r < 0)
+ log_error_errno(r, "Failed to subscribe to NameOwnerChanged signal for '%s': %m", name);
+ }
+
+ r = sd_bus_match_signal_async(
+ bus,
+ NULL,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.systemd1.Activator",
+ "ActivationRequest",
+ signal_activation_request, NULL, m);
+ if (r < 0)
+ log_warning_errno(r, "Failed to subscribe to activation signal: %m");
+
+ /* Allow replacing of our name, to ease implementation of reexecution, where we keep the old connection open
+ * until after the new connection is set up and the name installed to allow clients to synchronously wait for
+ * reexecution to finish */
+ r = sd_bus_request_name_async(bus, NULL, "org.freedesktop.systemd1", SD_BUS_NAME_REPLACE_EXISTING|SD_BUS_NAME_ALLOW_REPLACEMENT, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ log_debug("Successfully connected to API bus.");
+
+ return 0;
+}
+
+int bus_init_api(Manager *m) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ if (m->api_bus)
+ return 0;
+
+ /* The API and system bus is the same if we are running in system mode */
+ if (MANAGER_IS_SYSTEM(m) && m->system_bus)
+ bus = sd_bus_ref(m->system_bus);
+ else {
+ if (MANAGER_IS_SYSTEM(m))
+ r = sd_bus_open_system_with_description(&bus, "bus-api-system");
+ else
+ r = sd_bus_open_user_with_description(&bus, "bus-api-user");
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to API bus: %m");
+
+ r = sd_bus_attach_event(bus, m->event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach API bus to event loop: %m");
+
+ r = bus_setup_disconnected_match(m, bus);
+ if (r < 0)
+ return r;
+ }
+
+ r = bus_setup_api(m, bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up API bus: %m");
+
+ m->api_bus = TAKE_PTR(bus);
+
+ return 0;
+}
+
+static int bus_setup_system(Manager *m, sd_bus *bus) {
+ int r;
+
+ assert(m);
+ assert(bus);
+
+ /* if we are a user instance we get the Released message via the system bus */
+ if (MANAGER_IS_USER(m)) {
+ r = sd_bus_match_signal_async(
+ bus,
+ NULL,
+ NULL,
+ "/org/freedesktop/systemd1/agent",
+ "org.freedesktop.systemd1.Agent",
+ "Released",
+ signal_agent_released, NULL, m);
+ if (r < 0)
+ log_warning_errno(r, "Failed to request Released match on system bus: %m");
+ }
+
+ log_debug("Successfully connected to system bus.");
+ return 0;
+}
+
+int bus_init_system(Manager *m) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ if (m->system_bus)
+ return 0;
+
+ /* The API and system bus is the same if we are running in system mode */
+ if (MANAGER_IS_SYSTEM(m) && m->api_bus)
+ bus = sd_bus_ref(m->api_bus);
+ else {
+ r = sd_bus_open_system_with_description(&bus, "bus-system");
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to system bus: %m");
+
+ r = sd_bus_attach_event(bus, m->event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach system bus to event loop: %m");
+
+ r = bus_setup_disconnected_match(m, bus);
+ if (r < 0)
+ return r;
+ }
+
+ r = bus_setup_system(m, bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up system bus: %m");
+
+ m->system_bus = TAKE_PTR(bus);
+
+ return 0;
+}
+
+int bus_init_private(Manager *m) {
+ _cleanup_close_ int fd = -1;
+ union sockaddr_union sa;
+ socklen_t sa_len;
+ sd_event_source *s;
+ int r;
+
+ assert(m);
+
+ if (m->private_listen_fd >= 0)
+ return 0;
+
+ if (MANAGER_IS_SYSTEM(m)) {
+
+ /* We want the private bus only when running as init */
+ if (getpid_cached() != 1)
+ return 0;
+
+ r = sockaddr_un_set_path(&sa.un, "/run/systemd/private");
+ } else {
+ const char *e, *joined;
+
+ e = secure_getenv("XDG_RUNTIME_DIR");
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(EHOSTDOWN),
+ "XDG_RUNTIME_DIR is not set, refusing.");
+
+ joined = strjoina(e, "/systemd/private");
+ r = sockaddr_un_set_path(&sa.un, joined);
+ }
+ if (r < 0)
+ return log_error_errno(r, "Can't set path for AF_UNIX socket to bind to: %m");
+ sa_len = r;
+
+ (void) mkdir_parents_label(sa.un.sun_path, 0755);
+ (void) sockaddr_un_unlink(&sa.un);
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to allocate private socket: %m");
+
+ r = bind(fd, &sa.sa, sa_len);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to bind private socket: %m");
+
+ r = listen(fd, SOMAXCONN);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to make private socket listening: %m");
+
+ /* Generate an inotify event in case somebody waits for this socket to appear using inotify() */
+ (void) touch(sa.un.sun_path);
+
+ r = sd_event_add_io(m->event, &s, fd, EPOLLIN, bus_on_connection, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event source: %m");
+
+ (void) sd_event_source_set_description(s, "bus-connection");
+
+ m->private_listen_fd = TAKE_FD(fd);
+ m->private_listen_event_source = s;
+
+ log_debug("Successfully created private D-Bus server.");
+
+ return 0;
+}
+
+static void destroy_bus(Manager *m, sd_bus **bus) {
+ Unit *u;
+ Job *j;
+
+ assert(m);
+ assert(bus);
+
+ if (!*bus)
+ return;
+
+ /* Make sure all bus slots watching names are released. */
+ HASHMAP_FOREACH(u, m->watch_bus) {
+ if (u->match_bus_slot && sd_bus_slot_get_bus(u->match_bus_slot) == *bus)
+ u->match_bus_slot = sd_bus_slot_unref(u->match_bus_slot);
+ if (u->get_name_owner_slot && sd_bus_slot_get_bus(u->get_name_owner_slot) == *bus)
+ u->get_name_owner_slot = sd_bus_slot_unref(u->get_name_owner_slot);
+ }
+
+ /* Get rid of tracked clients on this bus */
+ if (m->subscribed && sd_bus_track_get_bus(m->subscribed) == *bus)
+ m->subscribed = sd_bus_track_unref(m->subscribed);
+
+ HASHMAP_FOREACH(j, m->jobs)
+ if (j->bus_track && sd_bus_track_get_bus(j->bus_track) == *bus)
+ j->bus_track = sd_bus_track_unref(j->bus_track);
+
+ HASHMAP_FOREACH(u, m->units) {
+ if (u->bus_track && sd_bus_track_get_bus(u->bus_track) == *bus)
+ u->bus_track = sd_bus_track_unref(u->bus_track);
+
+ /* Get rid of pending freezer messages on this bus */
+ if (u->pending_freezer_message && sd_bus_message_get_bus(u->pending_freezer_message) == *bus)
+ u->pending_freezer_message = sd_bus_message_unref(u->pending_freezer_message);
+ }
+
+ /* Get rid of queued message on this bus */
+ if (m->pending_reload_message && sd_bus_message_get_bus(m->pending_reload_message) == *bus)
+ m->pending_reload_message = sd_bus_message_unref(m->pending_reload_message);
+
+ /* Possibly flush unwritten data, but only if we are
+ * unprivileged, since we don't want to sync here */
+ if (!MANAGER_IS_SYSTEM(m))
+ sd_bus_flush(*bus);
+
+ /* And destroy the object */
+ *bus = sd_bus_close_unref(*bus);
+}
+
+void bus_done_api(Manager *m) {
+ destroy_bus(m, &m->api_bus);
+}
+
+void bus_done_system(Manager *m) {
+ destroy_bus(m, &m->system_bus);
+}
+
+void bus_done_private(Manager *m) {
+ sd_bus *b;
+
+ assert(m);
+
+ while ((b = set_steal_first(m->private_buses)))
+ destroy_bus(m, &b);
+
+ m->private_buses = set_free(m->private_buses);
+
+ m->private_listen_event_source = sd_event_source_unref(m->private_listen_event_source);
+ m->private_listen_fd = safe_close(m->private_listen_fd);
+}
+
+void bus_done(Manager *m) {
+ assert(m);
+
+ bus_done_api(m);
+ bus_done_system(m);
+ bus_done_private(m);
+
+ assert(!m->subscribed);
+
+ m->deserialized_subscribed = strv_free(m->deserialized_subscribed);
+ bus_verify_polkit_async_registry_free(m->polkit_registry);
+}
+
+int bus_fdset_add_all(Manager *m, FDSet *fds) {
+ sd_bus *b;
+ int fd;
+
+ assert(m);
+ assert(fds);
+
+ /* When we are about to reexecute we add all D-Bus fds to the
+ * set to pass over to the newly executed systemd. They won't
+ * be used there however, except thatt they are closed at the
+ * very end of deserialization, those making it possible for
+ * clients to synchronously wait for systemd to reexec by
+ * simply waiting for disconnection */
+
+ if (m->api_bus) {
+ fd = sd_bus_get_fd(m->api_bus);
+ if (fd >= 0) {
+ fd = fdset_put_dup(fds, fd);
+ if (fd < 0)
+ return fd;
+ }
+ }
+
+ SET_FOREACH(b, m->private_buses) {
+ fd = sd_bus_get_fd(b);
+ if (fd >= 0) {
+ fd = fdset_put_dup(fds, fd);
+ if (fd < 0)
+ return fd;
+ }
+ }
+
+ /* We don't offer any APIs on the system bus (well, unless it
+ * is the same as the API bus) hence we don't bother with it
+ * here */
+
+ return 0;
+}
+
+int bus_foreach_bus(
+ Manager *m,
+ sd_bus_track *subscribed2,
+ int (*send_message)(sd_bus *bus, void *userdata),
+ void *userdata) {
+
+ sd_bus *b;
+ int r, ret = 0;
+
+ /* Send to all direct buses, unconditionally */
+ SET_FOREACH(b, m->private_buses) {
+
+ /* Don't bother with enqueuing these messages to clients that haven't started yet */
+ if (sd_bus_is_ready(b) <= 0)
+ continue;
+
+ r = send_message(b, userdata);
+ if (r < 0)
+ ret = r;
+ }
+
+ /* Send to API bus, but only if somebody is subscribed */
+ if (m->api_bus &&
+ (sd_bus_track_count(m->subscribed) > 0 ||
+ sd_bus_track_count(subscribed2) > 0)) {
+ r = send_message(m->api_bus, userdata);
+ if (r < 0)
+ ret = r;
+ }
+
+ return ret;
+}
+
+void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix) {
+ const char *n;
+
+ assert(f);
+ assert(prefix);
+
+ for (n = sd_bus_track_first(t); n; n = sd_bus_track_next(t)) {
+ int c, j;
+
+ c = sd_bus_track_count_name(t, n);
+ for (j = 0; j < c; j++)
+ (void) serialize_item(f, prefix, n);
+ }
+}
+
+int bus_track_coldplug(Manager *m, sd_bus_track **t, bool recursive, char **l) {
+ int r;
+
+ assert(m);
+ assert(t);
+
+ if (strv_isempty(l))
+ return 0;
+
+ if (!m->api_bus)
+ return 0;
+
+ if (!*t) {
+ r = sd_bus_track_new(m->api_bus, t, NULL, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_track_set_recursive(*t, recursive);
+ if (r < 0)
+ return r;
+
+ return bus_track_add_name_many(*t, l);
+}
+
+int bus_verify_manage_units_async(Manager *m, sd_bus_message *call, sd_bus_error *error) {
+ return bus_verify_polkit_async(call, CAP_SYS_ADMIN, "org.freedesktop.systemd1.manage-units", NULL, false, UID_INVALID, &m->polkit_registry, error);
+}
+
+int bus_verify_manage_unit_files_async(Manager *m, sd_bus_message *call, sd_bus_error *error) {
+ return bus_verify_polkit_async(call, CAP_SYS_ADMIN, "org.freedesktop.systemd1.manage-unit-files", NULL, false, UID_INVALID, &m->polkit_registry, error);
+}
+
+int bus_verify_reload_daemon_async(Manager *m, sd_bus_message *call, sd_bus_error *error) {
+ return bus_verify_polkit_async(call, CAP_SYS_ADMIN, "org.freedesktop.systemd1.reload-daemon", NULL, false, UID_INVALID, &m->polkit_registry, error);
+}
+
+int bus_verify_set_environment_async(Manager *m, sd_bus_message *call, sd_bus_error *error) {
+ return bus_verify_polkit_async(call, CAP_SYS_ADMIN, "org.freedesktop.systemd1.set-environment", NULL, false, UID_INVALID, &m->polkit_registry, error);
+}
+
+uint64_t manager_bus_n_queued_write(Manager *m) {
+ uint64_t c = 0;
+ sd_bus *b;
+ int r;
+
+ /* Returns the total number of messages queued for writing on all our direct and API buses. */
+
+ SET_FOREACH(b, m->private_buses) {
+ uint64_t k;
+
+ r = sd_bus_get_n_queued_write(b, &k);
+ if (r < 0)
+ log_debug_errno(r, "Failed to query queued messages for private bus: %m");
+ else
+ c += k;
+ }
+
+ if (m->api_bus) {
+ uint64_t k;
+
+ r = sd_bus_get_n_queued_write(m->api_bus, &k);
+ if (r < 0)
+ log_debug_errno(r, "Failed to query queued messages for API bus: %m");
+ else
+ c += k;
+ }
+
+ return c;
+}
+
+static void vtable_dump_bus_properties(FILE *f, const sd_bus_vtable *table) {
+ const sd_bus_vtable *i;
+
+ for (i = table; i->type != _SD_BUS_VTABLE_END; i++) {
+ if (!IN_SET(i->type, _SD_BUS_VTABLE_PROPERTY, _SD_BUS_VTABLE_WRITABLE_PROPERTY) ||
+ (i->flags & (SD_BUS_VTABLE_DEPRECATED | SD_BUS_VTABLE_HIDDEN)) != 0)
+ continue;
+
+ fprintf(f, "%s\n", i->x.property.member);
+ }
+}
+
+void dump_bus_properties(FILE *f) {
+ assert(f);
+
+ vtable_dump_bus_properties(f, bus_automount_vtable);
+ vtable_dump_bus_properties(f, bus_cgroup_vtable);
+ vtable_dump_bus_properties(f, bus_device_vtable);
+ vtable_dump_bus_properties(f, bus_exec_vtable);
+ vtable_dump_bus_properties(f, bus_job_vtable);
+ vtable_dump_bus_properties(f, bus_kill_vtable);
+ vtable_dump_bus_properties(f, bus_manager_vtable);
+ vtable_dump_bus_properties(f, bus_mount_vtable);
+ vtable_dump_bus_properties(f, bus_path_vtable);
+ vtable_dump_bus_properties(f, bus_scope_vtable);
+ vtable_dump_bus_properties(f, bus_service_vtable);
+ vtable_dump_bus_properties(f, bus_slice_vtable);
+ vtable_dump_bus_properties(f, bus_socket_vtable);
+ vtable_dump_bus_properties(f, bus_swap_vtable);
+ vtable_dump_bus_properties(f, bus_target_vtable);
+ vtable_dump_bus_properties(f, bus_timer_vtable);
+ vtable_dump_bus_properties(f, bus_unit_vtable);
+ vtable_dump_bus_properties(f, bus_unit_cgroup_vtable);
+}
diff --git a/src/core/dbus.h b/src/core/dbus.h
new file mode 100644
index 0000000..369d9f5
--- /dev/null
+++ b/src/core/dbus.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "manager.h"
+
+int bus_send_pending_reload_message(Manager *m);
+
+int bus_init_private(Manager *m);
+int bus_init_api(Manager *m);
+int bus_init_system(Manager *m);
+
+void bus_done_private(Manager *m);
+void bus_done_api(Manager *m);
+void bus_done_system(Manager *m);
+void bus_done(Manager *m);
+
+int bus_fdset_add_all(Manager *m, FDSet *fds);
+
+void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix);
+int bus_track_coldplug(Manager *m, sd_bus_track **t, bool recursive, char **l);
+
+int bus_foreach_bus(Manager *m, sd_bus_track *subscribed2, int (*send_message)(sd_bus *bus, void *userdata), void *userdata);
+
+int bus_verify_manage_units_async(Manager *m, sd_bus_message *call, sd_bus_error *error);
+int bus_verify_manage_unit_files_async(Manager *m, sd_bus_message *call, sd_bus_error *error);
+int bus_verify_reload_daemon_async(Manager *m, sd_bus_message *call, sd_bus_error *error);
+int bus_verify_set_environment_async(Manager *m, sd_bus_message *call, sd_bus_error *error);
+
+int bus_forward_agent_released(Manager *m, const char *path);
+
+uint64_t manager_bus_n_queued_write(Manager *m);
+
+void dump_bus_properties(FILE *f);
+int bus_manager_introspect_implementations(FILE *out, const char *pattern);
diff --git a/src/core/device.c b/src/core/device.c
new file mode 100644
index 0000000..9a1d882
--- /dev/null
+++ b/src/core/device.c
@@ -0,0 +1,1121 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/epoll.h>
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "dbus-device.h"
+#include "dbus-unit.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "device.h"
+#include "log.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "serialize.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "swap.h"
+#include "udev-util.h"
+#include "unit-name.h"
+#include "unit.h"
+
+static const UnitActiveState state_translation_table[_DEVICE_STATE_MAX] = {
+ [DEVICE_DEAD] = UNIT_INACTIVE,
+ [DEVICE_TENTATIVE] = UNIT_ACTIVATING,
+ [DEVICE_PLUGGED] = UNIT_ACTIVE,
+};
+
+static int device_dispatch_io(sd_device_monitor *monitor, sd_device *dev, void *userdata);
+static void device_update_found_one(Device *d, DeviceFound found, DeviceFound mask);
+
+static void device_unset_sysfs(Device *d) {
+ Hashmap *devices;
+ Device *first;
+
+ assert(d);
+
+ if (!d->sysfs)
+ return;
+
+ /* Remove this unit from the chain of devices which share the
+ * same sysfs path. */
+ devices = UNIT(d)->manager->devices_by_sysfs;
+ first = hashmap_get(devices, d->sysfs);
+ LIST_REMOVE(same_sysfs, first, d);
+
+ if (first)
+ hashmap_remove_and_replace(devices, d->sysfs, first->sysfs, first);
+ else
+ hashmap_remove(devices, d->sysfs);
+
+ d->sysfs = mfree(d->sysfs);
+}
+
+static int device_set_sysfs(Device *d, const char *sysfs) {
+ _cleanup_free_ char *copy = NULL;
+ Device *first;
+ int r;
+
+ assert(d);
+
+ if (streq_ptr(d->sysfs, sysfs))
+ return 0;
+
+ r = hashmap_ensure_allocated(&UNIT(d)->manager->devices_by_sysfs, &path_hash_ops);
+ if (r < 0)
+ return r;
+
+ copy = strdup(sysfs);
+ if (!copy)
+ return -ENOMEM;
+
+ device_unset_sysfs(d);
+
+ first = hashmap_get(UNIT(d)->manager->devices_by_sysfs, sysfs);
+ LIST_PREPEND(same_sysfs, first, d);
+
+ r = hashmap_replace(UNIT(d)->manager->devices_by_sysfs, copy, first);
+ if (r < 0) {
+ LIST_REMOVE(same_sysfs, first, d);
+ return r;
+ }
+
+ d->sysfs = TAKE_PTR(copy);
+ unit_add_to_dbus_queue(UNIT(d));
+
+ return 0;
+}
+
+static void device_init(Unit *u) {
+ Device *d = DEVICE(u);
+
+ assert(d);
+ assert(UNIT(d)->load_state == UNIT_STUB);
+
+ /* In contrast to all other unit types we timeout jobs waiting
+ * for devices by default. This is because they otherwise wait
+ * indefinitely for plugged in devices, something which cannot
+ * happen for the other units since their operations time out
+ * anyway. */
+ u->job_running_timeout = u->manager->default_timeout_start_usec;
+
+ u->ignore_on_isolate = true;
+
+ d->deserialized_state = _DEVICE_STATE_INVALID;
+}
+
+static void device_done(Unit *u) {
+ Device *d = DEVICE(u);
+
+ assert(d);
+
+ device_unset_sysfs(d);
+ d->wants_property = strv_free(d->wants_property);
+}
+
+static int device_load(Unit *u) {
+ int r;
+
+ r = unit_load_fragment_and_dropin(u, false);
+ if (r < 0)
+ return r;
+
+ if (!u->description) {
+ /* Generate a description based on the path, to be used until the
+ device is initialized properly */
+ r = unit_name_to_path(u->id, &u->description);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to unescape name: %m");
+ }
+
+ return 0;
+}
+
+static void device_set_state(Device *d, DeviceState state) {
+ DeviceState old_state;
+ assert(d);
+
+ if (d->state != state)
+ bus_unit_send_pending_change_signal(UNIT(d), false);
+
+ old_state = d->state;
+ d->state = state;
+
+ if (state == DEVICE_DEAD)
+ device_unset_sysfs(d);
+
+ if (state != old_state)
+ log_unit_debug(UNIT(d), "Changed %s -> %s", device_state_to_string(old_state), device_state_to_string(state));
+
+ unit_notify(UNIT(d), state_translation_table[old_state], state_translation_table[state], 0);
+}
+
+static int device_coldplug(Unit *u) {
+ Device *d = DEVICE(u);
+
+ assert(d);
+ assert(d->state == DEVICE_DEAD);
+
+ /* First, let's put the deserialized state and found mask into effect, if we have it. */
+
+ if (d->deserialized_state < 0 ||
+ (d->deserialized_state == d->state &&
+ d->deserialized_found == d->found))
+ return 0;
+
+ d->found = d->deserialized_found;
+ device_set_state(d, d->deserialized_state);
+ return 0;
+}
+
+static void device_catchup(Unit *u) {
+ Device *d = DEVICE(u);
+
+ assert(d);
+
+ /* Second, let's update the state with the enumerated state if it's different */
+ if (d->enumerated_found == d->found)
+ return;
+
+ device_update_found_one(d, d->enumerated_found, DEVICE_FOUND_MASK);
+}
+
+static const struct {
+ DeviceFound flag;
+ const char *name;
+} device_found_map[] = {
+ { DEVICE_FOUND_UDEV, "found-udev" },
+ { DEVICE_FOUND_MOUNT, "found-mount" },
+ { DEVICE_FOUND_SWAP, "found-swap" },
+};
+
+static int device_found_to_string_many(DeviceFound flags, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ unsigned i;
+
+ assert(ret);
+
+ for (i = 0; i < ELEMENTSOF(device_found_map); i++) {
+ if (!FLAGS_SET(flags, device_found_map[i].flag))
+ continue;
+
+ if (!strextend_with_separator(&s, ",", device_found_map[i].name, NULL))
+ return -ENOMEM;
+ }
+
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
+
+static int device_found_from_string_many(const char *name, DeviceFound *ret) {
+ DeviceFound flags = 0;
+ int r;
+
+ assert(ret);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ DeviceFound f = 0;
+ unsigned i;
+
+ r = extract_first_word(&name, &word, ",", 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ for (i = 0; i < ELEMENTSOF(device_found_map); i++)
+ if (streq(word, device_found_map[i].name)) {
+ f = device_found_map[i].flag;
+ break;
+ }
+
+ if (f == 0)
+ return -EINVAL;
+
+ flags |= f;
+ }
+
+ *ret = flags;
+ return 0;
+}
+
+static int device_serialize(Unit *u, FILE *f, FDSet *fds) {
+ _cleanup_free_ char *s = NULL;
+ Device *d = DEVICE(u);
+
+ assert(u);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", device_state_to_string(d->state));
+
+ if (device_found_to_string_many(d->found, &s) >= 0)
+ (void) serialize_item(f, "found", s);
+
+ return 0;
+}
+
+static int device_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Device *d = DEVICE(u);
+ int r;
+
+ assert(u);
+ assert(key);
+ assert(value);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ DeviceState state;
+
+ state = device_state_from_string(value);
+ if (state < 0)
+ log_unit_debug(u, "Failed to parse state value, ignoring: %s", value);
+ else
+ d->deserialized_state = state;
+
+ } else if (streq(key, "found")) {
+ r = device_found_from_string_many(value, &d->deserialized_found);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to parse found value '%s', ignoring: %m", value);
+
+ } else
+ log_unit_debug(u, "Unknown serialization key: %s", key);
+
+ return 0;
+}
+
+static void device_dump(Unit *u, FILE *f, const char *prefix) {
+ Device *d = DEVICE(u);
+ _cleanup_free_ char *s = NULL;
+
+ assert(d);
+
+ (void) device_found_to_string_many(d->found, &s);
+
+ fprintf(f,
+ "%sDevice State: %s\n"
+ "%sSysfs Path: %s\n"
+ "%sFound: %s\n",
+ prefix, device_state_to_string(d->state),
+ prefix, strna(d->sysfs),
+ prefix, strna(s));
+
+ if (!strv_isempty(d->wants_property)) {
+ char **i;
+
+ STRV_FOREACH(i, d->wants_property)
+ fprintf(f, "%sudev SYSTEMD_WANTS: %s\n",
+ prefix, *i);
+ }
+}
+
+_pure_ static UnitActiveState device_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[DEVICE(u)->state];
+}
+
+_pure_ static const char *device_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return device_state_to_string(DEVICE(u)->state);
+}
+
+static int device_update_description(Unit *u, sd_device *dev, const char *path) {
+ _cleanup_free_ char *j = NULL;
+ const char *model, *label, *desc;
+ int r;
+
+ assert(u);
+ assert(path);
+
+ desc = path;
+
+ if (dev &&
+ (sd_device_get_property_value(dev, "ID_MODEL_FROM_DATABASE", &model) >= 0 ||
+ sd_device_get_property_value(dev, "ID_MODEL", &model) >= 0)) {
+ desc = model;
+
+ /* Try to concatenate the device model string with a label, if there is one */
+ if (sd_device_get_property_value(dev, "ID_FS_LABEL", &label) >= 0 ||
+ sd_device_get_property_value(dev, "ID_PART_ENTRY_NAME", &label) >= 0 ||
+ sd_device_get_property_value(dev, "ID_PART_ENTRY_NUMBER", &label) >= 0) {
+
+ desc = j = strjoin(model, " ", label);
+ if (!j)
+ return log_oom();
+ }
+ }
+
+ r = unit_set_description(u, desc);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to set device description: %m");
+
+ return 0;
+}
+
+static int device_add_udev_wants(Unit *u, sd_device *dev) {
+ _cleanup_strv_free_ char **added = NULL;
+ const char *wants, *property;
+ Device *d = DEVICE(u);
+ int r;
+
+ assert(d);
+ assert(dev);
+
+ property = MANAGER_IS_USER(u->manager) ? "SYSTEMD_USER_WANTS" : "SYSTEMD_WANTS";
+
+ r = sd_device_get_property_value(dev, property, &wants);
+ if (r < 0)
+ return 0;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&wants, &word, NULL, EXTRACT_UNQUOTE);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to parse property %s with value %s: %m", property, wants);
+
+ if (unit_name_is_valid(word, UNIT_NAME_TEMPLATE) && d->sysfs) {
+ _cleanup_free_ char *escaped = NULL;
+
+ /* If the unit name is specified as template, then automatically fill in the sysfs path of the
+ * device as instance name, properly escaped. */
+
+ r = unit_name_path_escape(d->sysfs, &escaped);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to escape %s: %m", d->sysfs);
+
+ r = unit_name_replace_instance(word, escaped, &k);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to build %s instance of template %s: %m", escaped, word);
+ } else {
+ /* If this is not a template, then let's mangle it so, that it becomes a valid unit name. */
+
+ r = unit_name_mangle(word, UNIT_NAME_MANGLE_WARN, &k);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to mangle unit name \"%s\": %m", word);
+ }
+
+ r = unit_add_dependency_by_name(u, UNIT_WANTS, k, true, UNIT_DEPENDENCY_UDEV);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to add Wants= dependency: %m");
+
+ r = strv_push(&added, k);
+ if (r < 0)
+ return log_oom();
+
+ k = NULL;
+ }
+
+ if (d->state != DEVICE_DEAD) {
+ char **i;
+
+ /* So here's a special hack, to compensate for the fact that the udev database's reload cycles are not
+ * synchronized with our own reload cycles: when we detect that the SYSTEMD_WANTS property of a device
+ * changes while the device unit is already up, let's manually trigger any new units listed in it not
+ * seen before. This typically happens during the boot-time switch root transition, as udev devices
+ * will generally already be up in the initrd, but SYSTEMD_WANTS properties get then added through udev
+ * rules only available on the host system, and thus only when the initial udev coldplug trigger runs.
+ *
+ * We do this only if the device has been up already when we parse this, as otherwise the usual
+ * dependency logic that is run from the dead → plugged transition will trigger these deps. */
+
+ STRV_FOREACH(i, added) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ if (strv_contains(d->wants_property, *i)) /* Was this unit already listed before? */
+ continue;
+
+ r = manager_add_job_by_name(u->manager, JOB_START, *i, JOB_FAIL, NULL, &error, NULL);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to enqueue SYSTEMD_WANTS= job, ignoring: %s", bus_error_message(&error, r));
+ }
+ }
+
+ return strv_free_and_replace(d->wants_property, added);
+}
+
+static bool device_is_bound_by_mounts(Device *d, sd_device *dev) {
+ const char *bound_by;
+ int r;
+
+ assert(d);
+ assert(dev);
+
+ if (sd_device_get_property_value(dev, "SYSTEMD_MOUNT_DEVICE_BOUND", &bound_by) >= 0) {
+ r = parse_boolean(bound_by);
+ if (r < 0)
+ log_device_warning_errno(dev, r, "Failed to parse SYSTEMD_MOUNT_DEVICE_BOUND='%s' udev property, ignoring: %m", bound_by);
+
+ d->bind_mounts = r > 0;
+ } else
+ d->bind_mounts = false;
+
+ return d->bind_mounts;
+}
+
+static void device_upgrade_mount_deps(Unit *u) {
+ Unit *other;
+ void *v;
+ int r;
+
+ /* Let's upgrade Requires= to BindsTo= on us. (Used when SYSTEMD_MOUNT_DEVICE_BOUND is set) */
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_REQUIRED_BY]) {
+ if (other->type != UNIT_MOUNT)
+ continue;
+
+ r = unit_add_dependency(other, UNIT_BINDS_TO, u, true, UNIT_DEPENDENCY_UDEV);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to add BindsTo= dependency between device and mount unit, ignoring: %m");
+ }
+}
+
+static int device_setup_unit(Manager *m, sd_device *dev, const char *path, bool main) {
+ _cleanup_free_ char *e = NULL;
+ const char *sysfs = NULL;
+ Unit *u = NULL;
+ bool delete;
+ int r;
+
+ assert(m);
+ assert(path);
+
+ if (dev) {
+ r = sd_device_get_syspath(dev, &sysfs);
+ if (r < 0) {
+ log_device_debug_errno(dev, r, "Couldn't get syspath from device, ignoring: %m");
+ return 0;
+ }
+ }
+
+ r = unit_name_from_path(path, ".device", &e);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to generate unit name from device path: %m");
+
+ u = manager_get_unit(m, e);
+ if (u) {
+ /* The device unit can still be present even if the device was unplugged: a mount unit can reference it
+ * hence preventing the GC to have garbaged it. That's desired since the device unit may have a
+ * dependency on the mount unit which was added during the loading of the later. When the device is
+ * plugged the sysfs might not be initialized yet, as we serialize the device's state but do not
+ * serialize the sysfs path across reloads/reexecs. Hence, when coming back from a reload/restart we
+ * might have the state valid, but not the sysfs path. Hence, let's filter out conflicting devices, but
+ * let's accept devices in any state with no sysfs path set. */
+
+ if (DEVICE(u)->state == DEVICE_PLUGGED &&
+ DEVICE(u)->sysfs &&
+ sysfs &&
+ !path_equal(DEVICE(u)->sysfs, sysfs)) {
+ log_unit_debug(u, "Device %s appeared twice with different sysfs paths %s and %s, ignoring the latter.",
+ e, DEVICE(u)->sysfs, sysfs);
+ return -EEXIST;
+ }
+
+ delete = false;
+
+ /* Let's remove all dependencies generated due to udev properties. We'll re-add whatever is configured
+ * now below. */
+ unit_remove_dependencies(u, UNIT_DEPENDENCY_UDEV);
+ } else {
+ delete = true;
+
+ r = unit_new_for_name(m, sizeof(Device), e, &u);
+ if (r < 0) {
+ log_device_error_errno(dev, r, "Failed to allocate device unit %s: %m", e);
+ goto fail;
+ }
+
+ unit_add_to_load_queue(u);
+ }
+
+ /* If this was created via some dependency and has not actually been seen yet ->sysfs will not be
+ * initialized. Hence initialize it if necessary. */
+ if (sysfs) {
+ r = device_set_sysfs(DEVICE(u), sysfs);
+ if (r < 0) {
+ log_unit_error_errno(u, r, "Failed to set sysfs path %s: %m", sysfs);
+ goto fail;
+ }
+
+ /* The additional systemd udev properties we only interpret for the main object */
+ if (main)
+ (void) device_add_udev_wants(u, dev);
+ }
+
+ (void) device_update_description(u, dev, path);
+
+ /* So the user wants the mount units to be bound to the device but a mount unit might has been seen by systemd
+ * before the device appears on its radar. In this case the device unit is partially initialized and includes
+ * the deps on the mount unit but at that time the "bind mounts" flag wasn't not present. Fix this up now. */
+ if (dev && device_is_bound_by_mounts(DEVICE(u), dev))
+ device_upgrade_mount_deps(u);
+
+ return 0;
+
+fail:
+ if (delete)
+ unit_free(u);
+
+ return r;
+}
+
+static int device_process_new(Manager *m, sd_device *dev) {
+ const char *sysfs, *dn, *alias;
+ dev_t devnum;
+ int r;
+
+ assert(m);
+
+ if (sd_device_get_syspath(dev, &sysfs) < 0)
+ return 0;
+
+ /* Add the main unit named after the sysfs path */
+ r = device_setup_unit(m, dev, sysfs, true);
+ if (r < 0)
+ return r;
+
+ /* Add an additional unit for the device node */
+ if (sd_device_get_devname(dev, &dn) >= 0)
+ (void) device_setup_unit(m, dev, dn, false);
+
+ /* Add additional units for all symlinks */
+ if (sd_device_get_devnum(dev, &devnum) >= 0) {
+ const char *p;
+
+ FOREACH_DEVICE_DEVLINK(dev, p) {
+ struct stat st;
+
+ if (PATH_STARTSWITH_SET(p, "/dev/block/", "/dev/char/"))
+ continue;
+
+ /* Verify that the symlink in the FS actually belongs
+ * to this device. This is useful to deal with
+ * conflicting devices, e.g. when two disks want the
+ * same /dev/disk/by-label/xxx link because they have
+ * the same label. We want to make sure that the same
+ * device that won the symlink wins in systemd, so we
+ * check the device node major/minor */
+ if (stat(p, &st) >= 0 &&
+ ((!S_ISBLK(st.st_mode) && !S_ISCHR(st.st_mode)) ||
+ st.st_rdev != devnum))
+ continue;
+
+ (void) device_setup_unit(m, dev, p, false);
+ }
+ }
+
+ /* Add additional units for all explicitly configured aliases */
+ if (sd_device_get_property_value(dev, "SYSTEMD_ALIAS", &alias) < 0)
+ return 0;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&alias, &word, NULL, EXTRACT_UNQUOTE);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_device_warning_errno(dev, r, "Failed to parse SYSTEMD_ALIAS property: %m");
+
+ if (!path_is_absolute(word))
+ log_device_warning(dev, "SYSTEMD_ALIAS is not an absolute path, ignoring: %s", word);
+ else if (!path_is_normalized(word))
+ log_device_warning(dev, "SYSTEMD_ALIAS is not a normalized path, ignoring: %s", word);
+ else
+ (void) device_setup_unit(m, dev, word, false);
+ }
+
+ return 0;
+}
+
+static void device_found_changed(Device *d, DeviceFound previous, DeviceFound now) {
+ assert(d);
+
+ /* Didn't exist before, but does now? if so, generate a new invocation ID for it */
+ if (previous == DEVICE_NOT_FOUND && now != DEVICE_NOT_FOUND)
+ (void) unit_acquire_invocation_id(UNIT(d));
+
+ if (FLAGS_SET(now, DEVICE_FOUND_UDEV))
+ /* When the device is known to udev we consider it plugged. */
+ device_set_state(d, DEVICE_PLUGGED);
+ else if (now != DEVICE_NOT_FOUND && !FLAGS_SET(previous, DEVICE_FOUND_UDEV))
+ /* If the device has not been seen by udev yet, but is now referenced by the kernel, then we assume the
+ * kernel knows it now, and udev might soon too. */
+ device_set_state(d, DEVICE_TENTATIVE);
+ else
+ /* If nobody sees the device, or if the device was previously seen by udev and now is only referenced
+ * from the kernel, then we consider the device is gone, the kernel just hasn't noticed it yet. */
+ device_set_state(d, DEVICE_DEAD);
+}
+
+static void device_update_found_one(Device *d, DeviceFound found, DeviceFound mask) {
+ Manager *m;
+
+ assert(d);
+
+ m = UNIT(d)->manager;
+
+ if (MANAGER_IS_RUNNING(m) && (m->honor_device_enumeration || MANAGER_IS_USER(m))) {
+ DeviceFound n, previous;
+
+ /* When we are already running, then apply the new mask right-away, and trigger state changes
+ * right-away */
+
+ n = (d->found & ~mask) | (found & mask);
+ if (n == d->found)
+ return;
+
+ previous = d->found;
+ d->found = n;
+
+ device_found_changed(d, previous, n);
+ } else
+ /* We aren't running yet, let's apply the new mask to the shadow variable instead, which we'll apply as
+ * soon as we catch-up with the state. */
+ d->enumerated_found = (d->enumerated_found & ~mask) | (found & mask);
+}
+
+static void device_update_found_by_sysfs(Manager *m, const char *sysfs, DeviceFound found, DeviceFound mask) {
+ Device *d, *l, *n;
+
+ assert(m);
+ assert(sysfs);
+
+ if (mask == 0)
+ return;
+
+ l = hashmap_get(m->devices_by_sysfs, sysfs);
+ LIST_FOREACH_SAFE(same_sysfs, d, n, l)
+ device_update_found_one(d, found, mask);
+}
+
+static int device_update_found_by_name(Manager *m, const char *path, DeviceFound found, DeviceFound mask) {
+ _cleanup_free_ char *e = NULL;
+ Unit *u;
+ int r;
+
+ assert(m);
+ assert(path);
+
+ if (mask == 0)
+ return 0;
+
+ r = unit_name_from_path(path, ".device", &e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name from device path: %m");
+
+ u = manager_get_unit(m, e);
+ if (!u)
+ return 0;
+
+ device_update_found_one(DEVICE(u), found, mask);
+ return 0;
+}
+
+static bool device_is_ready(sd_device *dev) {
+ const char *ready;
+
+ assert(dev);
+
+ if (device_is_renaming(dev) > 0)
+ return false;
+
+ /* Is it really tagged as 'systemd' right now? */
+ if (sd_device_has_current_tag(dev, "systemd") <= 0)
+ return false;
+
+ if (sd_device_get_property_value(dev, "SYSTEMD_READY", &ready) < 0)
+ return true;
+
+ return parse_boolean(ready) != 0;
+}
+
+static Unit *device_following(Unit *u) {
+ Device *d = DEVICE(u);
+ Device *other, *first = NULL;
+
+ assert(d);
+
+ if (startswith(u->id, "sys-"))
+ return NULL;
+
+ /* Make everybody follow the unit that's named after the sysfs path */
+ LIST_FOREACH_AFTER(same_sysfs, other, d)
+ if (startswith(UNIT(other)->id, "sys-"))
+ return UNIT(other);
+
+ LIST_FOREACH_BEFORE(same_sysfs, other, d) {
+ if (startswith(UNIT(other)->id, "sys-"))
+ return UNIT(other);
+
+ first = other;
+ }
+
+ return UNIT(first);
+}
+
+static int device_following_set(Unit *u, Set **_set) {
+ Device *d = DEVICE(u), *other;
+ _cleanup_set_free_ Set *set = NULL;
+ int r;
+
+ assert(d);
+ assert(_set);
+
+ if (LIST_JUST_US(same_sysfs, d)) {
+ *_set = NULL;
+ return 0;
+ }
+
+ set = set_new(NULL);
+ if (!set)
+ return -ENOMEM;
+
+ LIST_FOREACH_AFTER(same_sysfs, other, d) {
+ r = set_put(set, other);
+ if (r < 0)
+ return r;
+ }
+
+ LIST_FOREACH_BEFORE(same_sysfs, other, d) {
+ r = set_put(set, other);
+ if (r < 0)
+ return r;
+ }
+
+ *_set = TAKE_PTR(set);
+ return 1;
+}
+
+static void device_shutdown(Manager *m) {
+ assert(m);
+
+ m->device_monitor = sd_device_monitor_unref(m->device_monitor);
+ m->devices_by_sysfs = hashmap_free(m->devices_by_sysfs);
+}
+
+static void device_enumerate(Manager *m) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *dev;
+ int r;
+
+ assert(m);
+
+ if (!m->device_monitor) {
+ r = sd_device_monitor_new(&m->device_monitor);
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate device monitor: %m");
+ goto fail;
+ }
+
+ /* This will fail if we are unprivileged, but that
+ * should not matter much, as user instances won't run
+ * during boot. */
+ (void) sd_device_monitor_set_receive_buffer_size(m->device_monitor, 128*1024*1024);
+
+ r = sd_device_monitor_filter_add_match_tag(m->device_monitor, "systemd");
+ if (r < 0) {
+ log_error_errno(r, "Failed to add udev tag match: %m");
+ goto fail;
+ }
+
+ r = sd_device_monitor_attach_event(m->device_monitor, m->event);
+ if (r < 0) {
+ log_error_errno(r, "Failed to attach event to device monitor: %m");
+ goto fail;
+ }
+
+ r = sd_device_monitor_start(m->device_monitor, device_dispatch_io, m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to start device monitor: %m");
+ goto fail;
+ }
+ }
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate device enumerator: %m");
+ goto fail;
+ }
+
+ r = sd_device_enumerator_add_match_tag(e, "systemd");
+ if (r < 0) {
+ log_error_errno(r, "Failed to set tag for device enumeration: %m");
+ goto fail;
+ }
+
+ FOREACH_DEVICE(e, dev) {
+ const char *sysfs;
+
+ if (!device_is_ready(dev))
+ continue;
+
+ (void) device_process_new(m, dev);
+
+ if (sd_device_get_syspath(dev, &sysfs) < 0)
+ continue;
+
+ device_update_found_by_sysfs(m, sysfs, DEVICE_FOUND_UDEV, DEVICE_FOUND_UDEV);
+ }
+
+ return;
+
+fail:
+ device_shutdown(m);
+}
+
+static void device_propagate_reload_by_sysfs(Manager *m, const char *sysfs) {
+ Device *d, *l, *n;
+ int r;
+
+ assert(m);
+ assert(sysfs);
+
+ l = hashmap_get(m->devices_by_sysfs, sysfs);
+ LIST_FOREACH_SAFE(same_sysfs, d, n, l) {
+ if (d->state == DEVICE_DEAD)
+ continue;
+
+ r = manager_propagate_reload(m, UNIT(d), JOB_REPLACE, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to propagate reload, ignoring: %m");
+ }
+}
+
+static int device_remove_old(Manager *m, sd_device *dev) {
+ _cleanup_free_ char *syspath_old = NULL, *e = NULL;
+ const char *devpath_old;
+ int r;
+
+ r = sd_device_get_property_value(dev, "DEVPATH_OLD", &devpath_old);
+ if (r < 0) {
+ log_device_debug_errno(dev, r, "Failed to get DEVPATH_OLD= property on 'move' uevent, ignoring: %m");
+ return 0;
+ }
+
+ syspath_old = path_join("/sys", devpath_old);
+ if (!syspath_old)
+ return log_oom();
+
+ r = unit_name_from_path(syspath_old, ".device", &e);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to generate unit name from old device path: %m");
+
+ device_update_found_by_sysfs(m, syspath_old, 0, DEVICE_FOUND_UDEV|DEVICE_FOUND_MOUNT|DEVICE_FOUND_SWAP);
+ return 0;
+}
+
+static int device_dispatch_io(sd_device_monitor *monitor, sd_device *dev, void *userdata) {
+ Manager *m = userdata;
+ DeviceAction action;
+ const char *sysfs;
+ int r;
+
+ assert(m);
+ assert(dev);
+
+ r = sd_device_get_syspath(dev, &sysfs);
+ if (r < 0) {
+ log_device_error_errno(dev, r, "Failed to get device sys path: %m");
+ return 0;
+ }
+
+ r = device_get_action(dev, &action);
+ if (r < 0) {
+ log_device_error_errno(dev, r, "Failed to get udev action: %m");
+ return 0;
+ }
+
+ if (!IN_SET(action, DEVICE_ACTION_ADD, DEVICE_ACTION_REMOVE, DEVICE_ACTION_MOVE))
+ device_propagate_reload_by_sysfs(m, sysfs);
+
+ if (action == DEVICE_ACTION_MOVE)
+ (void) device_remove_old(m, dev);
+
+ /* A change event can signal that a device is becoming ready, in particular if the device is using
+ * the SYSTEMD_READY logic in udev so we need to reach the else block of the following if, even for
+ * change events */
+ if (action == DEVICE_ACTION_REMOVE) {
+ r = swap_process_device_remove(m, dev);
+ if (r < 0)
+ log_device_warning_errno(dev, r, "Failed to process swap device remove event, ignoring: %m");
+
+ /* If we get notified that a device was removed by udev, then it's completely gone, hence
+ * unset all found bits */
+ device_update_found_by_sysfs(m, sysfs, 0, DEVICE_FOUND_UDEV|DEVICE_FOUND_MOUNT|DEVICE_FOUND_SWAP);
+
+ } else if (device_is_ready(dev)) {
+
+ (void) device_process_new(m, dev);
+
+ r = swap_process_device_new(m, dev);
+ if (r < 0)
+ log_device_warning_errno(dev, r, "Failed to process swap device new event, ignoring: %m");
+
+ manager_dispatch_load_queue(m);
+
+ /* The device is found now, set the udev found bit */
+ device_update_found_by_sysfs(m, sysfs, DEVICE_FOUND_UDEV, DEVICE_FOUND_UDEV);
+
+ } else
+ /* The device is nominally around, but not ready for us. Hence unset the udev bit, but leave
+ * the rest around. */
+ device_update_found_by_sysfs(m, sysfs, 0, DEVICE_FOUND_UDEV);
+
+ return 0;
+}
+
+static bool device_supported(void) {
+ static int read_only = -1;
+
+ /* If /sys is read-only we don't support device units, and any
+ * attempts to start one should fail immediately. */
+
+ if (read_only < 0)
+ read_only = path_is_read_only_fs("/sys");
+
+ return read_only <= 0;
+}
+
+static int validate_node(Manager *m, const char *node, sd_device **ret) {
+ struct stat st;
+ int r;
+
+ assert(m);
+ assert(node);
+ assert(ret);
+
+ /* Validates a device node that showed up in /proc/swaps or /proc/self/mountinfo if it makes sense for us to
+ * track. Note that this validator is fine within missing device nodes, but not with badly set up ones! */
+
+ if (!path_startswith(node, "/dev")) {
+ *ret = NULL;
+ return 0; /* bad! */
+ }
+
+ if (stat(node, &st) < 0) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to stat() device node file %s: %m", node);
+
+ *ret = NULL;
+ return 1; /* good! (though missing) */
+
+ } else {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+
+ r = device_new_from_stat_rdev(&dev, &st);
+ if (r == -ENOENT) {
+ *ret = NULL;
+ return 1; /* good! (though missing) */
+ } else if (r == -ENOTTY) {
+ *ret = NULL;
+ return 0; /* bad! (not a device node but some other kind of file system node) */
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to get udev device from devnum %u:%u: %m", major(st.st_rdev), minor(st.st_rdev));
+
+ *ret = TAKE_PTR(dev);
+ return 1; /* good! */
+ }
+}
+
+void device_found_node(Manager *m, const char *node, DeviceFound found, DeviceFound mask) {
+ int r;
+
+ assert(m);
+ assert(node);
+
+ if (!device_supported())
+ return;
+
+ if (mask == 0)
+ return;
+
+ /* This is called whenever we find a device referenced in /proc/swaps or /proc/self/mounts. Such a device might
+ * be mounted/enabled at a time where udev has not finished probing it yet, and we thus haven't learned about
+ * it yet. In this case we will set the device unit to "tentative" state.
+ *
+ * This takes a pair of DeviceFound flags parameters. The 'mask' parameter is a bit mask that indicates which
+ * bits of 'found' to copy into the per-device DeviceFound flags field. Thus, this function may be used to set
+ * and unset individual bits in a single call, while merging partially with previous state. */
+
+ if ((found & mask) != 0) {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+
+ /* If the device is known in the kernel and newly appeared, then we'll create a device unit for it,
+ * under the name referenced in /proc/swaps or /proc/self/mountinfo. But first, let's validate if
+ * everything is alright with the device node. */
+
+ r = validate_node(m, node, &dev);
+ if (r <= 0)
+ return; /* Don't create a device unit for this if the device node is borked. */
+
+ (void) device_setup_unit(m, dev, node, false);
+ }
+
+ /* Update the device unit's state, should it exist */
+ (void) device_update_found_by_name(m, node, found, mask);
+}
+
+bool device_shall_be_bound_by(Unit *device, Unit *u) {
+ assert(device);
+ assert(u);
+
+ if (u->type != UNIT_MOUNT)
+ return false;
+
+ return DEVICE(device)->bind_mounts;
+}
+
+const UnitVTable device_vtable = {
+ .object_size = sizeof(Device),
+ .sections =
+ "Unit\0"
+ "Device\0"
+ "Install\0",
+
+ .gc_jobs = true,
+
+ .init = device_init,
+ .done = device_done,
+ .load = device_load,
+
+ .coldplug = device_coldplug,
+ .catchup = device_catchup,
+
+ .serialize = device_serialize,
+ .deserialize_item = device_deserialize_item,
+
+ .dump = device_dump,
+
+ .active_state = device_active_state,
+ .sub_state_to_string = device_sub_state_to_string,
+
+ .following = device_following,
+ .following_set = device_following_set,
+
+ .enumerate = device_enumerate,
+ .shutdown = device_shutdown,
+ .supported = device_supported,
+
+ .status_message_formats = {
+ .starting_stopping = {
+ [0] = "Expecting device %s...",
+ },
+ .finished_start_job = {
+ [JOB_DONE] = "Found device %s.",
+ [JOB_TIMEOUT] = "Timed out waiting for device %s.",
+ },
+ },
+};
diff --git a/src/core/device.h b/src/core/device.h
new file mode 100644
index 0000000..dfe8a13
--- /dev/null
+++ b/src/core/device.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "unit.h"
+
+typedef struct Device Device;
+
+/* A mask specifying where we have seen the device currently. This is a bitmask because the device might show up
+ * asynchronously from each other at various places. For example, in very common case a device might already be mounted
+ * before udev finished probing it (think: a script setting up a loopback block device, formatting it and mounting it
+ * in quick succession). Hence we need to track precisely where it is already visible and where not. */
+typedef enum DeviceFound {
+ DEVICE_NOT_FOUND = 0,
+ DEVICE_FOUND_UDEV = 1 << 0, /* The device has shown up in the udev database */
+ DEVICE_FOUND_MOUNT = 1 << 1, /* The device has shown up in /proc/self/mountinfo */
+ DEVICE_FOUND_SWAP = 1 << 2, /* The device has shown up in /proc/swaps */
+ DEVICE_FOUND_MASK = DEVICE_FOUND_UDEV|DEVICE_FOUND_MOUNT|DEVICE_FOUND_SWAP,
+} DeviceFound;
+
+struct Device {
+ Unit meta;
+
+ char *sysfs;
+
+ /* In order to be able to distinguish dependencies on different device nodes we might end up creating multiple
+ * devices for the same sysfs path. We chain them up here. */
+ LIST_FIELDS(struct Device, same_sysfs);
+
+ DeviceState state, deserialized_state;
+ DeviceFound found, deserialized_found, enumerated_found;
+
+ bool bind_mounts;
+
+ /* The SYSTEMD_WANTS udev property for this device the last time we saw it */
+ char **wants_property;
+};
+
+extern const UnitVTable device_vtable;
+
+void device_found_node(Manager *m, const char *node, DeviceFound found, DeviceFound mask);
+bool device_shall_be_bound_by(Unit *device, Unit *u);
+
+DEFINE_CAST(DEVICE, Device);
diff --git a/src/core/dynamic-user.c b/src/core/dynamic-user.c
new file mode 100644
index 0000000..7da87fd
--- /dev/null
+++ b/src/core/dynamic-user.c
@@ -0,0 +1,825 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "clean-ipc.h"
+#include "dynamic-user.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "nscd-flush.h"
+#include "parse-util.h"
+#include "random-util.h"
+#include "serialize.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-record.h"
+#include "user-util.h"
+
+/* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
+#define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (DYNAMIC_UID_MAX - DYNAMIC_UID_MIN + 1)) + DYNAMIC_UID_MIN)
+
+DEFINE_PRIVATE_TRIVIAL_REF_FUNC(DynamicUser, dynamic_user);
+
+static DynamicUser* dynamic_user_free(DynamicUser *d) {
+ if (!d)
+ return NULL;
+
+ if (d->manager)
+ (void) hashmap_remove(d->manager->dynamic_users, d->name);
+
+ safe_close_pair(d->storage_socket);
+ return mfree(d);
+}
+
+static int dynamic_user_add(Manager *m, const char *name, int storage_socket[static 2], DynamicUser **ret) {
+ DynamicUser *d;
+ int r;
+
+ assert(m);
+ assert(name);
+ assert(storage_socket);
+
+ r = hashmap_ensure_allocated(&m->dynamic_users, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ d = malloc0(offsetof(DynamicUser, name) + strlen(name) + 1);
+ if (!d)
+ return -ENOMEM;
+
+ strcpy(d->name, name);
+
+ d->storage_socket[0] = storage_socket[0];
+ d->storage_socket[1] = storage_socket[1];
+
+ r = hashmap_put(m->dynamic_users, d->name, d);
+ if (r < 0) {
+ free(d);
+ return r;
+ }
+
+ d->manager = m;
+
+ if (ret)
+ *ret = d;
+
+ return 0;
+}
+
+static int dynamic_user_acquire(Manager *m, const char *name, DynamicUser** ret) {
+ _cleanup_close_pair_ int storage_socket[2] = { -1, -1 };
+ DynamicUser *d;
+ int r;
+
+ assert(m);
+ assert(name);
+
+ /* Return the DynamicUser structure for a specific user name. Note that this won't actually allocate a UID for
+ * it, but just prepare the data structure for it. The UID is allocated only on demand, when it's really
+ * needed, and in the child process we fork off, since allocation involves NSS checks which are not OK to do
+ * from PID 1. To allow the children and PID 1 share information about allocated UIDs we use an anonymous
+ * AF_UNIX/SOCK_DGRAM socket (called the "storage socket") that contains at most one datagram with the
+ * allocated UID number, plus an fd referencing the lock file for the UID
+ * (i.e. /run/systemd/dynamic-uid/$UID). Why involve the socket pair? So that PID 1 and all its children can
+ * share the same storage for the UID and lock fd, simply by inheriting the storage socket fds. The socket pair
+ * may exist in three different states:
+ *
+ * a) no datagram stored. This is the initial state. In this case the dynamic user was never realized.
+ *
+ * b) a datagram containing a UID stored, but no lock fd attached to it. In this case there was already a
+ * statically assigned UID by the same name, which we are reusing.
+ *
+ * c) a datagram containing a UID stored, and a lock fd is attached to it. In this case we allocated a dynamic
+ * UID and locked it in the file system, using the lock fd.
+ *
+ * As PID 1 and various children might access the socket pair simultaneously, and pop the datagram or push it
+ * back in any time, we also maintain a lock on the socket pair. Note one peculiarity regarding locking here:
+ * the UID lock on disk is protected via a BSD file lock (i.e. an fd-bound lock), so that the lock is kept in
+ * place as long as there's a reference to the fd open. The lock on the storage socket pair however is a POSIX
+ * file lock (i.e. a process-bound lock), as all users share the same fd of this (after all it is anonymous,
+ * nobody else could get any access to it except via our own fd) and we want to synchronize access between all
+ * processes that have access to it. */
+
+ d = hashmap_get(m->dynamic_users, name);
+ if (d) {
+ if (ret) {
+ /* We already have a structure for the dynamic user, let's increase the ref count and reuse it */
+ d->n_ref++;
+ *ret = d;
+ }
+ return 0;
+ }
+
+ if (!valid_user_group_name(name, VALID_USER_ALLOW_NUMERIC))
+ return -EINVAL;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, storage_socket) < 0)
+ return -errno;
+
+ r = dynamic_user_add(m, name, storage_socket, &d);
+ if (r < 0)
+ return r;
+
+ storage_socket[0] = storage_socket[1] = -1;
+
+ if (ret) {
+ d->n_ref++;
+ *ret = d;
+ }
+
+ return 1;
+}
+
+static int make_uid_symlinks(uid_t uid, const char *name, bool b) {
+
+ char path1[STRLEN("/run/systemd/dynamic-uid/direct:") + DECIMAL_STR_MAX(uid_t) + 1];
+ const char *path2;
+ int r = 0, k;
+
+ /* Add direct additional symlinks for direct lookups of dynamic UIDs and their names by userspace code. The
+ * only reason we have this is because dbus-daemon cannot use D-Bus for resolving users and groups (since it
+ * would be its own client then). We hence keep these world-readable symlinks in place, so that the
+ * unprivileged dbus user can read the mappings when it needs them via these symlinks instead of having to go
+ * via the bus. Ideally, we'd use the lock files we keep for this anyway, but we can't since we use BSD locks
+ * on them and as those may be taken by any user with read access we can't make them world-readable. */
+
+ xsprintf(path1, "/run/systemd/dynamic-uid/direct:" UID_FMT, uid);
+ if (unlink(path1) < 0 && errno != ENOENT)
+ r = -errno;
+
+ if (b && symlink(name, path1) < 0) {
+ k = log_warning_errno(errno, "Failed to symlink \"%s\": %m", path1);
+ if (r == 0)
+ r = k;
+ }
+
+ path2 = strjoina("/run/systemd/dynamic-uid/direct:", name);
+ if (unlink(path2) < 0 && errno != ENOENT) {
+ k = -errno;
+ if (r == 0)
+ r = k;
+ }
+
+ if (b && symlink(path1 + STRLEN("/run/systemd/dynamic-uid/direct:"), path2) < 0) {
+ k = log_warning_errno(errno, "Failed to symlink \"%s\": %m", path2);
+ if (r == 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int pick_uid(char **suggested_paths, const char *name, uid_t *ret_uid) {
+
+ /* Find a suitable free UID. We use the following strategy to find a suitable UID:
+ *
+ * 1. Initially, we try to read the UID of a number of specified paths. If any of these UIDs works, we use
+ * them. We use in order to increase the chance of UID reuse, if StateDirectory=, CacheDirectory= or
+ * LogsDirectory= are used, as reusing the UID these directories are owned by saves us from having to
+ * recursively chown() them to new users.
+ *
+ * 2. If that didn't yield a currently unused UID, we hash the user name, and try to use that. This should be
+ * pretty good, as the use ris by default derived from the unit name, and hence the same service and same
+ * user should usually get the same UID as long as our hashing doesn't clash.
+ *
+ * 3. Finally, if that didn't work, we randomly pick UIDs, until we find one that is empty.
+ *
+ * Since the dynamic UID space is relatively small we'll stop trying after 100 iterations, giving up. */
+
+ enum {
+ PHASE_SUGGESTED, /* the first phase, reusing directory ownership UIDs */
+ PHASE_HASHED, /* the second phase, deriving a UID from the username by hashing */
+ PHASE_RANDOM, /* the last phase, randomly picking UIDs */
+ } phase = PHASE_SUGGESTED;
+
+ static const uint8_t hash_key[] = {
+ 0x37, 0x53, 0x7e, 0x31, 0xcf, 0xce, 0x48, 0xf5,
+ 0x8a, 0xbb, 0x39, 0x57, 0x8d, 0xd9, 0xec, 0x59
+ };
+
+ unsigned n_tries = 100, current_suggested = 0;
+ int r;
+
+ (void) mkdir("/run/systemd/dynamic-uid", 0755);
+
+ for (;;) {
+ char lock_path[STRLEN("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
+ _cleanup_close_ int lock_fd = -1;
+ uid_t candidate;
+ ssize_t l;
+
+ if (--n_tries <= 0) /* Give up retrying eventually */
+ return -EBUSY;
+
+ switch (phase) {
+
+ case PHASE_SUGGESTED: {
+ struct stat st;
+
+ if (!suggested_paths || !suggested_paths[current_suggested]) {
+ /* We reached the end of the suggested paths list, let's try by hashing the name */
+ phase = PHASE_HASHED;
+ continue;
+ }
+
+ if (stat(suggested_paths[current_suggested++], &st) < 0)
+ continue; /* We can't read the UID of this path, but that doesn't matter, just try the next */
+
+ candidate = st.st_uid;
+ break;
+ }
+
+ case PHASE_HASHED:
+ /* A static user by this name does not exist yet. Let's find a free ID then, and use that. We
+ * start with a UID generated as hash from the user name. */
+ candidate = UID_CLAMP_INTO_RANGE(siphash24(name, strlen(name), hash_key));
+
+ /* If this one fails, we should proceed with random tries */
+ phase = PHASE_RANDOM;
+ break;
+
+ case PHASE_RANDOM:
+
+ /* Pick another random UID, and see if that works for us. */
+ random_bytes(&candidate, sizeof(candidate));
+ candidate = UID_CLAMP_INTO_RANGE(candidate);
+ break;
+
+ default:
+ assert_not_reached("unknown phase");
+ }
+
+ /* Make sure whatever we picked here actually is in the right range */
+ if (!uid_is_dynamic(candidate))
+ continue;
+
+ xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, candidate);
+
+ for (;;) {
+ struct stat st;
+
+ lock_fd = open(lock_path, O_CREAT|O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NOCTTY, 0600);
+ if (lock_fd < 0)
+ return -errno;
+
+ r = flock(lock_fd, LOCK_EX|LOCK_NB); /* Try to get a BSD file lock on the UID lock file */
+ if (r < 0) {
+ if (IN_SET(errno, EBUSY, EAGAIN))
+ goto next; /* already in use */
+
+ return -errno;
+ }
+
+ if (fstat(lock_fd, &st) < 0)
+ return -errno;
+ if (st.st_nlink > 0)
+ break;
+
+ /* Oh, bummer, we got the lock, but the file was unlinked between the time we opened it and
+ * got the lock. Close it, and try again. */
+ lock_fd = safe_close(lock_fd);
+ }
+
+ /* Some superficial check whether this UID/GID might already be taken by some static user */
+ if (getpwuid(candidate) ||
+ getgrgid((gid_t) candidate) ||
+ search_ipc(candidate, (gid_t) candidate) != 0) {
+ (void) unlink(lock_path);
+ continue;
+ }
+
+ /* Let's store the user name in the lock file, so that we can use it for looking up the username for a UID */
+ l = pwritev(lock_fd,
+ (struct iovec[2]) {
+ IOVEC_INIT_STRING(name),
+ IOVEC_INIT((char[1]) { '\n' }, 1),
+ }, 2, 0);
+ if (l < 0) {
+ r = -errno;
+ (void) unlink(lock_path);
+ return r;
+ }
+
+ (void) ftruncate(lock_fd, l);
+ (void) make_uid_symlinks(candidate, name, true); /* also add direct lookup symlinks */
+
+ *ret_uid = candidate;
+ return TAKE_FD(lock_fd);
+
+ next:
+ ;
+ }
+}
+
+static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) {
+ uid_t uid = UID_INVALID;
+ struct iovec iov = IOVEC_INIT(&uid, sizeof(uid));
+ int lock_fd;
+ ssize_t k;
+
+ assert(d);
+ assert(ret_uid);
+ assert(ret_lock_fd);
+
+ /* Read the UID and lock fd that is stored in the storage AF_UNIX socket. This should be called with the lock
+ * on the socket taken. */
+
+ k = receive_one_fd_iov(d->storage_socket[0], &iov, 1, MSG_DONTWAIT, &lock_fd);
+ if (k < 0)
+ return (int) k;
+
+ *ret_uid = uid;
+ *ret_lock_fd = lock_fd;
+
+ return 0;
+}
+
+static int dynamic_user_push(DynamicUser *d, uid_t uid, int lock_fd) {
+ struct iovec iov = IOVEC_INIT(&uid, sizeof(uid));
+
+ assert(d);
+
+ /* Store the UID and lock_fd in the storage socket. This should be called with the socket pair lock taken. */
+ return send_one_fd_iov(d->storage_socket[1], lock_fd, &iov, 1, MSG_DONTWAIT);
+}
+
+static void unlink_uid_lock(int lock_fd, uid_t uid, const char *name) {
+ char lock_path[STRLEN("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
+
+ if (lock_fd < 0)
+ return;
+
+ xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid);
+ (void) unlink(lock_path);
+
+ (void) make_uid_symlinks(uid, name, false); /* remove direct lookup symlinks */
+}
+
+static int lockfp(int fd, int *fd_lock) {
+ if (lockf(fd, F_LOCK, 0) < 0)
+ return -errno;
+ *fd_lock = fd;
+ return 0;
+}
+
+static void unlockfp(int *fd_lock) {
+ if (*fd_lock < 0)
+ return;
+ lockf(*fd_lock, F_ULOCK, 0);
+ *fd_lock = -1;
+}
+
+static int dynamic_user_realize(
+ DynamicUser *d,
+ char **suggested_dirs,
+ uid_t *ret_uid, gid_t *ret_gid,
+ bool is_user) {
+
+ _cleanup_(unlockfp) int storage_socket0_lock = -1;
+ _cleanup_close_ int uid_lock_fd = -1;
+ _cleanup_close_ int etc_passwd_lock_fd = -1;
+ uid_t num = UID_INVALID; /* a uid if is_user, and a gid otherwise */
+ gid_t gid = GID_INVALID; /* a gid if is_user, ignored otherwise */
+ bool flush_cache = false;
+ int r;
+
+ assert(d);
+ assert(is_user == !!ret_uid);
+ assert(ret_gid);
+
+ /* Acquire a UID for the user name. This will allocate a UID for the user name if the user doesn't exist
+ * yet. If it already exists its existing UID/GID will be reused. */
+
+ r = lockfp(d->storage_socket[0], &storage_socket0_lock);
+ if (r < 0)
+ return r;
+
+ r = dynamic_user_pop(d, &num, &uid_lock_fd);
+ if (r < 0) {
+ int new_uid_lock_fd;
+ uid_t new_uid;
+
+ if (r != -EAGAIN)
+ return r;
+
+ /* OK, nothing stored yet, let's try to find something useful. While we are working on this release the
+ * lock however, so that nobody else blocks on our NSS lookups. */
+ unlockfp(&storage_socket0_lock);
+
+ /* Let's see if a proper, static user or group by this name exists. Try to take the lock on
+ * /etc/passwd, if that fails with EROFS then /etc is read-only. In that case it's fine if we don't
+ * take the lock, given that users can't be added there anyway in this case. */
+ etc_passwd_lock_fd = take_etc_passwd_lock(NULL);
+ if (etc_passwd_lock_fd < 0 && etc_passwd_lock_fd != -EROFS)
+ return etc_passwd_lock_fd;
+
+ /* First, let's parse this as numeric UID */
+ r = parse_uid(d->name, &num);
+ if (r < 0) {
+ struct passwd *p;
+ struct group *g;
+
+ if (is_user) {
+ /* OK, this is not a numeric UID. Let's see if there's a user by this name */
+ p = getpwnam(d->name);
+ if (p) {
+ num = p->pw_uid;
+ gid = p->pw_gid;
+ } else {
+ /* if the user does not exist but the group with the same name exists, refuse operation */
+ g = getgrnam(d->name);
+ if (g)
+ return -EILSEQ;
+ }
+ } else {
+ /* Let's see if there's a group by this name */
+ g = getgrnam(d->name);
+ if (g)
+ num = (uid_t) g->gr_gid;
+ else {
+ /* if the group does not exist but the user with the same name exists, refuse operation */
+ p = getpwnam(d->name);
+ if (p)
+ return -EILSEQ;
+ }
+ }
+ }
+
+ if (num == UID_INVALID) {
+ /* No static UID assigned yet, excellent. Let's pick a new dynamic one, and lock it. */
+
+ uid_lock_fd = pick_uid(suggested_dirs, d->name, &num);
+ if (uid_lock_fd < 0)
+ return uid_lock_fd;
+ }
+
+ /* So, we found a working UID/lock combination. Let's see if we actually still need it. */
+ r = lockfp(d->storage_socket[0], &storage_socket0_lock);
+ if (r < 0) {
+ unlink_uid_lock(uid_lock_fd, num, d->name);
+ return r;
+ }
+
+ r = dynamic_user_pop(d, &new_uid, &new_uid_lock_fd);
+ if (r < 0) {
+ if (r != -EAGAIN) {
+ /* OK, something bad happened, let's get rid of the bits we acquired. */
+ unlink_uid_lock(uid_lock_fd, num, d->name);
+ return r;
+ }
+
+ /* Great! Nothing is stored here, still. Store our newly acquired data. */
+ flush_cache = true;
+ } else {
+ /* Hmm, so as it appears there's now something stored in the storage socket. Throw away what we
+ * acquired, and use what's stored now. */
+
+ unlink_uid_lock(uid_lock_fd, num, d->name);
+ safe_close(uid_lock_fd);
+
+ num = new_uid;
+ uid_lock_fd = new_uid_lock_fd;
+ }
+ } else if (is_user && !uid_is_dynamic(num)) {
+ struct passwd *p;
+
+ /* Statically allocated user may have different uid and gid. So, let's obtain the gid. */
+ errno = 0;
+ p = getpwuid(num);
+ if (!p)
+ return errno_or_else(ESRCH);
+
+ gid = p->pw_gid;
+ }
+
+ /* If the UID/GID was already allocated dynamically, push the data we popped out back in. If it was already
+ * allocated statically, push the UID back too, but do not push the lock fd in. If we allocated the UID
+ * dynamically right here, push that in along with the lock fd for it. */
+ r = dynamic_user_push(d, num, uid_lock_fd);
+ if (r < 0)
+ return r;
+
+ if (flush_cache) {
+ /* If we allocated a new dynamic UID, refresh nscd, so that it forgets about potentially cached
+ * negative entries. But let's do so after we release the /etc/passwd lock, so that there's no
+ * potential for nscd wanting to lock that for completing the invalidation. */
+ etc_passwd_lock_fd = safe_close(etc_passwd_lock_fd);
+ (void) nscd_flush_cache(STRV_MAKE("passwd", "group"));
+ }
+
+ if (is_user) {
+ *ret_uid = num;
+ *ret_gid = gid != GID_INVALID ? gid : num;
+ } else
+ *ret_gid = num;
+
+ return 0;
+}
+
+int dynamic_user_current(DynamicUser *d, uid_t *ret) {
+ _cleanup_(unlockfp) int storage_socket0_lock = -1;
+ _cleanup_close_ int lock_fd = -1;
+ uid_t uid;
+ int r;
+
+ assert(d);
+
+ /* Get the currently assigned UID for the user, if there's any. This simply pops the data from the storage socket, and pushes it back in right-away. */
+
+ r = lockfp(d->storage_socket[0], &storage_socket0_lock);
+ if (r < 0)
+ return r;
+
+ r = dynamic_user_pop(d, &uid, &lock_fd);
+ if (r < 0)
+ return r;
+
+ r = dynamic_user_push(d, uid, lock_fd);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = uid;
+
+ return 0;
+}
+
+static DynamicUser* dynamic_user_unref(DynamicUser *d) {
+ if (!d)
+ return NULL;
+
+ /* Note that this doesn't actually release any resources itself. If a dynamic user should be fully destroyed
+ * and its UID released, use dynamic_user_destroy() instead. NB: the dynamic user table may contain entries
+ * with no references, which is commonly the case right before a daemon reload. */
+
+ assert(d->n_ref > 0);
+ d->n_ref--;
+
+ return NULL;
+}
+
+static int dynamic_user_close(DynamicUser *d) {
+ _cleanup_(unlockfp) int storage_socket0_lock = -1;
+ _cleanup_close_ int lock_fd = -1;
+ uid_t uid;
+ int r;
+
+ /* Release the user ID, by releasing the lock on it, and emptying the storage socket. After this the user is
+ * unrealized again, much like it was after it the DynamicUser object was first allocated. */
+
+ r = lockfp(d->storage_socket[0], &storage_socket0_lock);
+ if (r < 0)
+ return r;
+
+ r = dynamic_user_pop(d, &uid, &lock_fd);
+ if (r == -EAGAIN)
+ /* User wasn't realized yet, nothing to do. */
+ return 0;
+ if (r < 0)
+ return r;
+
+ /* This dynamic user was realized and dynamically allocated. In this case, let's remove the lock file. */
+ unlink_uid_lock(lock_fd, uid, d->name);
+
+ (void) nscd_flush_cache(STRV_MAKE("passwd", "group"));
+ return 1;
+}
+
+static DynamicUser* dynamic_user_destroy(DynamicUser *d) {
+ if (!d)
+ return NULL;
+
+ /* Drop a reference to a DynamicUser object, and destroy the user completely if this was the last
+ * reference. This is called whenever a service is shut down and wants its dynamic UID gone. Note that
+ * dynamic_user_unref() is what is called whenever a service is simply freed, for example during a reload
+ * cycle, where the dynamic users should not be destroyed, but our datastructures should. */
+
+ dynamic_user_unref(d);
+
+ if (d->n_ref > 0)
+ return NULL;
+
+ (void) dynamic_user_close(d);
+ return dynamic_user_free(d);
+}
+
+int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds) {
+ DynamicUser *d;
+
+ assert(m);
+ assert(f);
+ assert(fds);
+
+ /* Dump the dynamic user database into the manager serialization, to deal with daemon reloads. */
+
+ HASHMAP_FOREACH(d, m->dynamic_users) {
+ int copy0, copy1;
+
+ copy0 = fdset_put_dup(fds, d->storage_socket[0]);
+ if (copy0 < 0)
+ return log_error_errno(copy0, "Failed to add dynamic user storage fd to serialization: %m");
+
+ copy1 = fdset_put_dup(fds, d->storage_socket[1]);
+ if (copy1 < 0)
+ return log_error_errno(copy1, "Failed to add dynamic user storage fd to serialization: %m");
+
+ (void) serialize_item_format(f, "dynamic-user", "%s %i %i", d->name, copy0, copy1);
+ }
+
+ return 0;
+}
+
+void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds) {
+ _cleanup_free_ char *name = NULL, *s0 = NULL, *s1 = NULL;
+ int r, fd0, fd1;
+
+ assert(m);
+ assert(value);
+ assert(fds);
+
+ /* Parse the serialization again, after a daemon reload */
+
+ r = extract_many_words(&value, NULL, 0, &name, &s0, &s1, NULL);
+ if (r != 3 || !isempty(value)) {
+ log_debug("Unable to parse dynamic user line.");
+ return;
+ }
+
+ if (safe_atoi(s0, &fd0) < 0 || !fdset_contains(fds, fd0)) {
+ log_debug("Unable to process dynamic user fd specification.");
+ return;
+ }
+
+ if (safe_atoi(s1, &fd1) < 0 || !fdset_contains(fds, fd1)) {
+ log_debug("Unable to process dynamic user fd specification.");
+ return;
+ }
+
+ r = dynamic_user_add(m, name, (int[]) { fd0, fd1 }, NULL);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add dynamic user: %m");
+ return;
+ }
+
+ (void) fdset_remove(fds, fd0);
+ (void) fdset_remove(fds, fd1);
+}
+
+void dynamic_user_vacuum(Manager *m, bool close_user) {
+ DynamicUser *d;
+
+ assert(m);
+
+ /* Empty the dynamic user database, optionally cleaning up orphaned dynamic users, i.e. destroy and free users
+ * to which no reference exist. This is called after a daemon reload finished, in order to destroy users which
+ * might not be referenced anymore. */
+
+ HASHMAP_FOREACH(d, m->dynamic_users) {
+ if (d->n_ref > 0)
+ continue;
+
+ if (close_user) {
+ log_debug("Removing orphaned dynamic user %s", d->name);
+ (void) dynamic_user_close(d);
+ }
+
+ dynamic_user_free(d);
+ }
+}
+
+int dynamic_user_lookup_uid(Manager *m, uid_t uid, char **ret) {
+ char lock_path[STRLEN("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
+ _cleanup_free_ char *user = NULL;
+ uid_t check_uid;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ /* A friendly way to translate a dynamic user's UID into a name. */
+ if (!uid_is_dynamic(uid))
+ return -ESRCH;
+
+ xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid);
+ r = read_one_line_file(lock_path, &user);
+ if (IN_SET(r, -ENOENT, 0))
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ /* The lock file might be stale, hence let's verify the data before we return it */
+ r = dynamic_user_lookup_name(m, user, &check_uid);
+ if (r < 0)
+ return r;
+ if (check_uid != uid) /* lock file doesn't match our own idea */
+ return -ESRCH;
+
+ *ret = TAKE_PTR(user);
+
+ return 0;
+}
+
+int dynamic_user_lookup_name(Manager *m, const char *name, uid_t *ret) {
+ DynamicUser *d;
+ int r;
+
+ assert(m);
+ assert(name);
+
+ /* A friendly call for translating a dynamic user's name into its UID */
+
+ d = hashmap_get(m->dynamic_users, name);
+ if (!d)
+ return -ESRCH;
+
+ r = dynamic_user_current(d, ret);
+ if (r == -EAGAIN) /* not realized yet? */
+ return -ESRCH;
+
+ return r;
+}
+
+int dynamic_creds_acquire(DynamicCreds *creds, Manager *m, const char *user, const char *group) {
+ bool acquired = false;
+ int r;
+
+ assert(creds);
+ assert(m);
+
+ /* A DynamicUser object encapsulates an allocation of both a UID and a GID for a specific name. However, some
+ * services use different user and groups. For cases like that there's DynamicCreds containing a pair of user
+ * and group. This call allocates a pair. */
+
+ if (!creds->user && user) {
+ r = dynamic_user_acquire(m, user, &creds->user);
+ if (r < 0)
+ return r;
+
+ acquired = true;
+ }
+
+ if (!creds->group) {
+
+ if (creds->user && (!group || streq_ptr(user, group)))
+ creds->group = dynamic_user_ref(creds->user);
+ else if (group) {
+ r = dynamic_user_acquire(m, group, &creds->group);
+ if (r < 0) {
+ if (acquired)
+ creds->user = dynamic_user_unref(creds->user);
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int dynamic_creds_realize(DynamicCreds *creds, char **suggested_paths, uid_t *uid, gid_t *gid) {
+ uid_t u = UID_INVALID;
+ gid_t g = GID_INVALID;
+ int r;
+
+ assert(creds);
+ assert(uid);
+ assert(gid);
+
+ /* Realize both the referenced user and group */
+
+ if (creds->user) {
+ r = dynamic_user_realize(creds->user, suggested_paths, &u, &g, true);
+ if (r < 0)
+ return r;
+ }
+
+ if (creds->group && creds->group != creds->user) {
+ r = dynamic_user_realize(creds->group, suggested_paths, NULL, &g, false);
+ if (r < 0)
+ return r;
+ }
+
+ *uid = u;
+ *gid = g;
+ return 0;
+}
+
+void dynamic_creds_unref(DynamicCreds *creds) {
+ assert(creds);
+
+ creds->user = dynamic_user_unref(creds->user);
+ creds->group = dynamic_user_unref(creds->group);
+}
+
+void dynamic_creds_destroy(DynamicCreds *creds) {
+ assert(creds);
+
+ creds->user = dynamic_user_destroy(creds->user);
+ creds->group = dynamic_user_destroy(creds->group);
+}
diff --git a/src/core/dynamic-user.h b/src/core/dynamic-user.h
new file mode 100644
index 0000000..847ef47
--- /dev/null
+++ b/src/core/dynamic-user.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct DynamicUser DynamicUser;
+
+typedef struct DynamicCreds {
+ /* A combination of a dynamic user and group */
+ DynamicUser *user;
+ DynamicUser *group;
+} DynamicCreds;
+
+#include "manager.h"
+
+/* Note that this object always allocates a pair of user and group under the same name, even if one of them isn't
+ * used. This means, if you want to allocate a group and user pair, and they might have two different names, then you
+ * need to allocated two of these objects. DynamicCreds below makes that easy. */
+struct DynamicUser {
+ Manager *manager;
+ unsigned n_ref;
+
+ /* An AF_UNIX socket pair that contains a datagram containing both the numeric ID assigned, as well as a lock
+ * file fd locking the user ID we picked. */
+ int storage_socket[2];
+
+ char name[];
+};
+
+int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds);
+void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds);
+void dynamic_user_vacuum(Manager *m, bool close_user);
+
+int dynamic_user_current(DynamicUser *d, uid_t *ret);
+int dynamic_user_lookup_uid(Manager *m, uid_t uid, char **ret);
+int dynamic_user_lookup_name(Manager *m, const char *name, uid_t *ret);
+
+int dynamic_creds_acquire(DynamicCreds *creds, Manager *m, const char *user, const char *group);
+int dynamic_creds_realize(DynamicCreds *creds, char **suggested_paths, uid_t *uid, gid_t *gid);
+
+void dynamic_creds_unref(DynamicCreds *creds);
+void dynamic_creds_destroy(DynamicCreds *creds);
diff --git a/src/core/efi-random.c b/src/core/efi-random.c
new file mode 100644
index 0000000..2bc74fa
--- /dev/null
+++ b/src/core/efi-random.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "chattr-util.h"
+#include "efi-random.h"
+#include "efivars.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "random-util.h"
+#include "strv.h"
+
+/* If a random seed was passed by the boot loader in the LoaderRandomSeed EFI variable, let's credit it to
+ * the kernel's random pool, but only once per boot. If this is run very early during initialization we can
+ * instantly boot up with a filled random pool.
+ *
+ * This makes no judgement on the entropy passed, it's the job of the boot loader to only pass us a seed that
+ * is suitably validated. */
+
+static void lock_down_efi_variables(void) {
+ const char *p;
+ int r;
+
+ /* Paranoia: let's restrict access modes of these a bit, so that unprivileged users can't use them to
+ * identify the system or gain too much insight into what we might have credited to the entropy
+ * pool. */
+ FOREACH_STRING(p,
+ "/sys/firmware/efi/efivars/LoaderRandomSeed-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f",
+ "/sys/firmware/efi/efivars/LoaderSystemToken-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f") {
+
+ r = chattr_path(p, 0, FS_IMMUTABLE_FL, NULL);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ log_warning_errno(r, "Failed to drop FS_IMMUTABLE_FL from %s, ignoring: %m", p);
+
+ if (chmod(p, 0600) < 0)
+ log_warning_errno(errno, "Failed to reduce access mode of %s, ignoring: %m", p);
+ }
+}
+
+int efi_take_random_seed(void) {
+ _cleanup_free_ void *value = NULL;
+ _cleanup_close_ int random_fd = -1;
+ size_t size;
+ int r;
+
+ /* Paranoia comes first. */
+ lock_down_efi_variables();
+
+ if (access("/run/systemd/efi-random-seed-taken", F_OK) < 0) {
+ if (errno != ENOENT) {
+ log_warning_errno(errno, "Failed to determine whether we already used the random seed token, not using it.");
+ return 0;
+ }
+
+ /* ENOENT means we haven't used it yet. */
+ } else {
+ log_debug("EFI random seed already used, not using again.");
+ return 0;
+ }
+
+ r = efi_get_variable(EFI_VENDOR_LOADER, "LoaderRandomSeed", NULL, &value, &size);
+ if (r == -EOPNOTSUPP) {
+ log_debug_errno(r, "System lacks EFI support, not initializing random seed from EFI variable.");
+ return 0;
+ }
+ if (r == -ENOENT) {
+ log_debug_errno(r, "Boot loader did not pass LoaderRandomSeed EFI variable, not crediting any entropy.");
+ return 0;
+ }
+ if (r < 0)
+ return log_warning_errno(r, "Failed to read LoaderRandomSeed EFI variable, ignoring: %m");
+
+ if (size == 0)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), "Random seed passed from boot loader has zero size? Ignoring.");
+
+ random_fd = open("/dev/urandom", O_WRONLY|O_CLOEXEC|O_NOCTTY);
+ if (random_fd < 0)
+ return log_warning_errno(errno, "Failed to open /dev/urandom for writing, ignoring: %m");
+
+ /* Before we use the seed, let's mark it as used, so that we never credit it twice. Also, it's a nice
+ * way to let users known that we successfully acquired entropy from the boot laoder. */
+ r = touch("/run/systemd/efi-random-seed-taken");
+ if (r < 0)
+ return log_warning_errno(r, "Unable to mark EFI random seed as used, not using it: %m");
+
+ r = random_write_entropy(random_fd, value, size, true);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to credit entropy, ignoring: %m");
+
+ log_info("Successfully credited entropy passed from boot loader.");
+ return 1;
+}
diff --git a/src/core/efi-random.h b/src/core/efi-random.h
new file mode 100644
index 0000000..7d20fff
--- /dev/null
+++ b/src/core/efi-random.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int efi_take_random_seed(void);
diff --git a/src/core/emergency-action.c b/src/core/emergency-action.c
new file mode 100644
index 0000000..9e8c79e
--- /dev/null
+++ b/src/core/emergency-action.c
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/reboot.h>
+
+#include "bus-error.h"
+#include "bus-util.h"
+#include "emergency-action.h"
+#include "raw-reboot.h"
+#include "reboot-util.h"
+#include "special.h"
+#include "string-table.h"
+#include "terminal-util.h"
+#include "virt.h"
+
+static const char* const emergency_action_table[_EMERGENCY_ACTION_MAX] = {
+ [EMERGENCY_ACTION_NONE] = "none",
+ [EMERGENCY_ACTION_REBOOT] = "reboot",
+ [EMERGENCY_ACTION_REBOOT_FORCE] = "reboot-force",
+ [EMERGENCY_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate",
+ [EMERGENCY_ACTION_POWEROFF] = "poweroff",
+ [EMERGENCY_ACTION_POWEROFF_FORCE] = "poweroff-force",
+ [EMERGENCY_ACTION_POWEROFF_IMMEDIATE] = "poweroff-immediate",
+ [EMERGENCY_ACTION_EXIT] = "exit",
+ [EMERGENCY_ACTION_EXIT_FORCE] = "exit-force",
+};
+
+static void log_and_status(Manager *m, bool warn, const char *message, const char *reason) {
+ log_full(warn ? LOG_WARNING : LOG_DEBUG, "%s: %s", message, reason);
+ if (warn)
+ manager_status_printf(m, STATUS_TYPE_EMERGENCY,
+ ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
+ "%s: %s", message, reason);
+}
+
+void emergency_action(
+ Manager *m,
+ EmergencyAction action,
+ EmergencyActionFlags options,
+ const char *reboot_arg,
+ int exit_status,
+ const char *reason) {
+
+ Unit *u;
+
+ assert(m);
+ assert(action >= 0);
+ assert(action < _EMERGENCY_ACTION_MAX);
+
+ /* Is the special shutdown target active or queued? If so, we are in shutdown state */
+ if (IN_SET(action, EMERGENCY_ACTION_REBOOT, EMERGENCY_ACTION_POWEROFF, EMERGENCY_ACTION_EXIT)) {
+ u = manager_get_unit(m, SPECIAL_SHUTDOWN_TARGET);
+ if (u && unit_active_or_pending(u)) {
+ log_notice("Shutdown is already active. Skipping emergency action request %s.",
+ emergency_action_table[action]);
+ return;
+ }
+ }
+
+ if (action == EMERGENCY_ACTION_NONE)
+ return;
+
+ if (FLAGS_SET(options, EMERGENCY_ACTION_IS_WATCHDOG) && !m->service_watchdogs) {
+ log_warning("Watchdog disabled! Not acting on: %s", reason);
+ return;
+ }
+
+ bool warn = FLAGS_SET(options, EMERGENCY_ACTION_WARN);
+
+ switch (action) {
+
+ case EMERGENCY_ACTION_REBOOT:
+ log_and_status(m, warn, "Rebooting", reason);
+
+ (void) update_reboot_parameter_and_warn(reboot_arg, true);
+ (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_REBOOT_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL, NULL);
+ break;
+
+ case EMERGENCY_ACTION_REBOOT_FORCE:
+ log_and_status(m, warn, "Forcibly rebooting", reason);
+
+ (void) update_reboot_parameter_and_warn(reboot_arg, true);
+ m->objective = MANAGER_REBOOT;
+
+ break;
+
+ case EMERGENCY_ACTION_REBOOT_IMMEDIATE:
+ log_and_status(m, warn, "Rebooting immediately", reason);
+
+ sync();
+
+ if (!isempty(reboot_arg)) {
+ log_info("Rebooting with argument '%s'.", reboot_arg);
+ (void) raw_reboot(LINUX_REBOOT_CMD_RESTART2, reboot_arg);
+ log_warning_errno(errno, "Failed to reboot with parameter, retrying without: %m");
+ }
+
+ log_info("Rebooting.");
+ (void) reboot(RB_AUTOBOOT);
+ break;
+
+ case EMERGENCY_ACTION_EXIT:
+
+ if (exit_status >= 0)
+ m->return_value = exit_status;
+
+ if (MANAGER_IS_USER(m) || detect_container() > 0) {
+ log_and_status(m, warn, "Exiting", reason);
+ (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_EXIT_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL, NULL);
+ break;
+ }
+
+ log_notice("Doing \"poweroff\" action instead of an \"exit\" emergency action.");
+ _fallthrough_;
+
+ case EMERGENCY_ACTION_POWEROFF:
+ log_and_status(m, warn, "Powering off", reason);
+ (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_POWEROFF_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL, NULL);
+ break;
+
+ case EMERGENCY_ACTION_EXIT_FORCE:
+
+ if (exit_status >= 0)
+ m->return_value = exit_status;
+
+ if (MANAGER_IS_USER(m) || detect_container() > 0) {
+ log_and_status(m, warn, "Exiting immediately", reason);
+ m->objective = MANAGER_EXIT;
+ break;
+ }
+
+ log_notice("Doing \"poweroff-force\" action instead of an \"exit-force\" emergency action.");
+ _fallthrough_;
+
+ case EMERGENCY_ACTION_POWEROFF_FORCE:
+ log_and_status(m, warn, "Forcibly powering off", reason);
+ m->objective = MANAGER_POWEROFF;
+ break;
+
+ case EMERGENCY_ACTION_POWEROFF_IMMEDIATE:
+ log_and_status(m, warn, "Powering off immediately", reason);
+
+ sync();
+
+ log_info("Powering off.");
+ (void) reboot(RB_POWER_OFF);
+ break;
+
+ default:
+ assert_not_reached("Unknown emergency action");
+ }
+}
+
+DEFINE_STRING_TABLE_LOOKUP(emergency_action, EmergencyAction);
+
+int parse_emergency_action(
+ const char *value,
+ bool system,
+ EmergencyAction *ret) {
+
+ EmergencyAction x;
+
+ x = emergency_action_from_string(value);
+ if (x < 0)
+ return -EINVAL;
+
+ if (!system && x != EMERGENCY_ACTION_NONE && x < _EMERGENCY_ACTION_FIRST_USER_ACTION)
+ return -EOPNOTSUPP;
+
+ *ret = x;
+ return 0;
+}
diff --git a/src/core/emergency-action.h b/src/core/emergency-action.h
new file mode 100644
index 0000000..95d49a8
--- /dev/null
+++ b/src/core/emergency-action.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef enum EmergencyAction {
+ EMERGENCY_ACTION_NONE,
+ EMERGENCY_ACTION_REBOOT,
+ EMERGENCY_ACTION_REBOOT_FORCE,
+ EMERGENCY_ACTION_REBOOT_IMMEDIATE,
+ EMERGENCY_ACTION_POWEROFF,
+ EMERGENCY_ACTION_POWEROFF_FORCE,
+ EMERGENCY_ACTION_POWEROFF_IMMEDIATE,
+ EMERGENCY_ACTION_EXIT,
+ _EMERGENCY_ACTION_FIRST_USER_ACTION = EMERGENCY_ACTION_EXIT,
+ EMERGENCY_ACTION_EXIT_FORCE,
+ _EMERGENCY_ACTION_MAX,
+ _EMERGENCY_ACTION_INVALID = -1
+} EmergencyAction;
+
+typedef enum EmergencyActionFlags {
+ EMERGENCY_ACTION_IS_WATCHDOG = 1 << 0,
+ EMERGENCY_ACTION_WARN = 1 << 1,
+} EmergencyActionFlags;
+
+#include "macro.h"
+#include "manager.h"
+
+void emergency_action(Manager *m,
+ EmergencyAction action, EmergencyActionFlags options,
+ const char *reboot_arg, int exit_status, const char *reason);
+
+const char* emergency_action_to_string(EmergencyAction i) _const_;
+EmergencyAction emergency_action_from_string(const char *s) _pure_;
+
+int parse_emergency_action(const char *value, bool system, EmergencyAction *ret);
diff --git a/src/core/execute.c b/src/core/execute.c
new file mode 100644
index 0000000..c992b8d
--- /dev/null
+++ b/src/core/execute.c
@@ -0,0 +1,6512 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/personality.h>
+#include <sys/prctl.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <unistd.h>
+#include <utmpx.h>
+
+#if HAVE_PAM
+#include <security/pam_appl.h>
+#endif
+
+#if HAVE_SELINUX
+#include <selinux/selinux.h>
+#endif
+
+#if HAVE_SECCOMP
+#include <seccomp.h>
+#endif
+
+#if HAVE_APPARMOR
+#include <sys/apparmor.h>
+#endif
+
+#include "sd-messages.h"
+
+#include "acl-util.h"
+#include "af-list.h"
+#include "alloc-util.h"
+#if HAVE_APPARMOR
+#include "apparmor-util.h"
+#endif
+#include "async.h"
+#include "barrier.h"
+#include "cap-list.h"
+#include "capability-util.h"
+#include "cgroup-setup.h"
+#include "chown-recursive.h"
+#include "cpu-set-util.h"
+#include "def.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "errno-list.h"
+#include "execute.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "glob-util.h"
+#include "hexdecoct.h"
+#include "io-util.h"
+#include "ioprio.h"
+#include "label.h"
+#include "log.h"
+#include "macro.h"
+#include "manager.h"
+#include "memory-util.h"
+#include "missing_fs.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "namespace.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "rlimit-util.h"
+#include "rm-rf.h"
+#if HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
+#include "securebits-util.h"
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "smack-util.h"
+#include "socket-util.h"
+#include "special.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "syslog-util.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+#include "unit.h"
+#include "user-util.h"
+#include "utmp-wtmp.h"
+
+#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
+#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
+
+#define SNDBUF_SIZE (8*1024*1024)
+
+static int shift_fds(int fds[], size_t n_fds) {
+ if (n_fds <= 0)
+ return 0;
+
+ /* Modifies the fds array! (sorts it) */
+
+ assert(fds);
+
+ for (int start = 0;;) {
+ int restart_from = -1;
+
+ for (int i = start; i < (int) n_fds; i++) {
+ int nfd;
+
+ /* Already at right index? */
+ if (fds[i] == i+3)
+ continue;
+
+ nfd = fcntl(fds[i], F_DUPFD, i + 3);
+ if (nfd < 0)
+ return -errno;
+
+ safe_close(fds[i]);
+ fds[i] = nfd;
+
+ /* Hmm, the fd we wanted isn't free? Then
+ * let's remember that and try again from here */
+ if (nfd != i+3 && restart_from < 0)
+ restart_from = i;
+ }
+
+ if (restart_from < 0)
+ break;
+
+ start = restart_from;
+ }
+
+ return 0;
+}
+
+static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
+ size_t n_fds;
+ int r;
+
+ n_fds = n_socket_fds + n_storage_fds;
+ if (n_fds <= 0)
+ return 0;
+
+ assert(fds);
+
+ /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
+ * O_NONBLOCK only applies to socket activation though. */
+
+ for (size_t i = 0; i < n_fds; i++) {
+
+ if (i < n_socket_fds) {
+ r = fd_nonblock(fds[i], nonblock);
+ if (r < 0)
+ return r;
+ }
+
+ /* We unconditionally drop FD_CLOEXEC from the fds,
+ * since after all we want to pass these fds to our
+ * children */
+
+ r = fd_cloexec(fds[i], false);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static const char *exec_context_tty_path(const ExecContext *context) {
+ assert(context);
+
+ if (context->stdio_as_fds)
+ return NULL;
+
+ if (context->tty_path)
+ return context->tty_path;
+
+ return "/dev/console";
+}
+
+static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
+ const char *path;
+
+ assert(context);
+
+ path = exec_context_tty_path(context);
+
+ if (context->tty_vhangup) {
+ if (p && p->stdin_fd >= 0)
+ (void) terminal_vhangup_fd(p->stdin_fd);
+ else if (path)
+ (void) terminal_vhangup(path);
+ }
+
+ if (context->tty_reset) {
+ if (p && p->stdin_fd >= 0)
+ (void) reset_terminal_fd(p->stdin_fd, true);
+ else if (path)
+ (void) reset_terminal(path);
+ }
+
+ if (context->tty_vt_disallocate && path)
+ (void) vt_disallocate(path);
+}
+
+static bool is_terminal_input(ExecInput i) {
+ return IN_SET(i,
+ EXEC_INPUT_TTY,
+ EXEC_INPUT_TTY_FORCE,
+ EXEC_INPUT_TTY_FAIL);
+}
+
+static bool is_terminal_output(ExecOutput o) {
+ return IN_SET(o,
+ EXEC_OUTPUT_TTY,
+ EXEC_OUTPUT_KMSG_AND_CONSOLE,
+ EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
+}
+
+static bool is_kmsg_output(ExecOutput o) {
+ return IN_SET(o,
+ EXEC_OUTPUT_KMSG,
+ EXEC_OUTPUT_KMSG_AND_CONSOLE);
+}
+
+static bool exec_context_needs_term(const ExecContext *c) {
+ assert(c);
+
+ /* Return true if the execution context suggests we should set $TERM to something useful. */
+
+ if (is_terminal_input(c->std_input))
+ return true;
+
+ if (is_terminal_output(c->std_output))
+ return true;
+
+ if (is_terminal_output(c->std_error))
+ return true;
+
+ return !!c->tty_path;
+}
+
+static int open_null_as(int flags, int nfd) {
+ int fd;
+
+ assert(nfd >= 0);
+
+ fd = open("/dev/null", flags|O_NOCTTY);
+ if (fd < 0)
+ return -errno;
+
+ return move_fd(fd, nfd, false);
+}
+
+static int connect_journal_socket(
+ int fd,
+ const char *log_namespace,
+ uid_t uid,
+ gid_t gid) {
+
+ union sockaddr_union sa;
+ socklen_t sa_len;
+ uid_t olduid = UID_INVALID;
+ gid_t oldgid = GID_INVALID;
+ const char *j;
+ int r;
+
+ j = log_namespace ?
+ strjoina("/run/systemd/journal.", log_namespace, "/stdout") :
+ "/run/systemd/journal/stdout";
+ r = sockaddr_un_set_path(&sa.un, j);
+ if (r < 0)
+ return r;
+ sa_len = r;
+
+ if (gid_is_valid(gid)) {
+ oldgid = getgid();
+
+ if (setegid(gid) < 0)
+ return -errno;
+ }
+
+ if (uid_is_valid(uid)) {
+ olduid = getuid();
+
+ if (seteuid(uid) < 0) {
+ r = -errno;
+ goto restore_gid;
+ }
+ }
+
+ r = connect(fd, &sa.sa, sa_len) < 0 ? -errno : 0;
+
+ /* If we fail to restore the uid or gid, things will likely
+ fail later on. This should only happen if an LSM interferes. */
+
+ if (uid_is_valid(uid))
+ (void) seteuid(olduid);
+
+ restore_gid:
+ if (gid_is_valid(gid))
+ (void) setegid(oldgid);
+
+ return r;
+}
+
+static int connect_logger_as(
+ const Unit *unit,
+ const ExecContext *context,
+ const ExecParameters *params,
+ ExecOutput output,
+ const char *ident,
+ int nfd,
+ uid_t uid,
+ gid_t gid) {
+
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(context);
+ assert(params);
+ assert(output < _EXEC_OUTPUT_MAX);
+ assert(ident);
+ assert(nfd >= 0);
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0)
+ return -errno;
+
+ r = connect_journal_socket(fd, context->log_namespace, uid, gid);
+ if (r < 0)
+ return r;
+
+ if (shutdown(fd, SHUT_RD) < 0)
+ return -errno;
+
+ (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
+
+ if (dprintf(fd,
+ "%s\n"
+ "%s\n"
+ "%i\n"
+ "%i\n"
+ "%i\n"
+ "%i\n"
+ "%i\n",
+ context->syslog_identifier ?: ident,
+ params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
+ context->syslog_priority,
+ !!context->syslog_level_prefix,
+ false,
+ is_kmsg_output(output),
+ is_terminal_output(output)) < 0)
+ return -errno;
+
+ return move_fd(TAKE_FD(fd), nfd, false);
+}
+
+static int open_terminal_as(const char *path, int flags, int nfd) {
+ int fd;
+
+ assert(path);
+ assert(nfd >= 0);
+
+ fd = open_terminal(path, flags | O_NOCTTY);
+ if (fd < 0)
+ return fd;
+
+ return move_fd(fd, nfd, false);
+}
+
+static int acquire_path(const char *path, int flags, mode_t mode) {
+ union sockaddr_union sa;
+ socklen_t sa_len;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(path);
+
+ if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
+ flags |= O_CREAT;
+
+ fd = open(path, flags|O_NOCTTY, mode);
+ if (fd >= 0)
+ return TAKE_FD(fd);
+
+ if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
+ return -errno;
+
+ /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
+
+ r = sockaddr_un_set_path(&sa.un, path);
+ if (r < 0)
+ return r == -EINVAL ? -ENXIO : r;
+ sa_len = r;
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0)
+ return -errno;
+
+ if (connect(fd, &sa.sa, sa_len) < 0)
+ return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
+ * indication that this wasn't an AF_UNIX socket after all */
+
+ if ((flags & O_ACCMODE) == O_RDONLY)
+ r = shutdown(fd, SHUT_WR);
+ else if ((flags & O_ACCMODE) == O_WRONLY)
+ r = shutdown(fd, SHUT_RD);
+ else
+ r = 0;
+ if (r < 0)
+ return -errno;
+
+ return TAKE_FD(fd);
+}
+
+static int fixup_input(
+ const ExecContext *context,
+ int socket_fd,
+ bool apply_tty_stdin) {
+
+ ExecInput std_input;
+
+ assert(context);
+
+ std_input = context->std_input;
+
+ if (is_terminal_input(std_input) && !apply_tty_stdin)
+ return EXEC_INPUT_NULL;
+
+ if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
+ return EXEC_INPUT_NULL;
+
+ if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
+ return EXEC_INPUT_NULL;
+
+ return std_input;
+}
+
+static int fixup_output(ExecOutput std_output, int socket_fd) {
+
+ if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
+ return EXEC_OUTPUT_INHERIT;
+
+ return std_output;
+}
+
+static int setup_input(
+ const ExecContext *context,
+ const ExecParameters *params,
+ int socket_fd,
+ const int named_iofds[static 3]) {
+
+ ExecInput i;
+
+ assert(context);
+ assert(params);
+ assert(named_iofds);
+
+ if (params->stdin_fd >= 0) {
+ if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
+ return -errno;
+
+ /* Try to make this the controlling tty, if it is a tty, and reset it */
+ if (isatty(STDIN_FILENO)) {
+ (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
+ (void) reset_terminal_fd(STDIN_FILENO, true);
+ }
+
+ return STDIN_FILENO;
+ }
+
+ i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
+
+ switch (i) {
+
+ case EXEC_INPUT_NULL:
+ return open_null_as(O_RDONLY, STDIN_FILENO);
+
+ case EXEC_INPUT_TTY:
+ case EXEC_INPUT_TTY_FORCE:
+ case EXEC_INPUT_TTY_FAIL: {
+ int fd;
+
+ fd = acquire_terminal(exec_context_tty_path(context),
+ i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
+ i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
+ ACQUIRE_TERMINAL_WAIT,
+ USEC_INFINITY);
+ if (fd < 0)
+ return fd;
+
+ return move_fd(fd, STDIN_FILENO, false);
+ }
+
+ case EXEC_INPUT_SOCKET:
+ assert(socket_fd >= 0);
+
+ return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
+
+ case EXEC_INPUT_NAMED_FD:
+ assert(named_iofds[STDIN_FILENO] >= 0);
+
+ (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
+ return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
+
+ case EXEC_INPUT_DATA: {
+ int fd;
+
+ fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
+ if (fd < 0)
+ return fd;
+
+ return move_fd(fd, STDIN_FILENO, false);
+ }
+
+ case EXEC_INPUT_FILE: {
+ bool rw;
+ int fd;
+
+ assert(context->stdio_file[STDIN_FILENO]);
+
+ rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
+ (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
+
+ fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
+ if (fd < 0)
+ return fd;
+
+ return move_fd(fd, STDIN_FILENO, false);
+ }
+
+ default:
+ assert_not_reached("Unknown input type");
+ }
+}
+
+static bool can_inherit_stderr_from_stdout(
+ const ExecContext *context,
+ ExecOutput o,
+ ExecOutput e) {
+
+ assert(context);
+
+ /* Returns true, if given the specified STDERR and STDOUT output we can directly dup() the stdout fd to the
+ * stderr fd */
+
+ if (e == EXEC_OUTPUT_INHERIT)
+ return true;
+ if (e != o)
+ return false;
+
+ if (e == EXEC_OUTPUT_NAMED_FD)
+ return streq_ptr(context->stdio_fdname[STDOUT_FILENO], context->stdio_fdname[STDERR_FILENO]);
+
+ if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND))
+ return streq_ptr(context->stdio_file[STDOUT_FILENO], context->stdio_file[STDERR_FILENO]);
+
+ return true;
+}
+
+static int setup_output(
+ const Unit *unit,
+ const ExecContext *context,
+ const ExecParameters *params,
+ int fileno,
+ int socket_fd,
+ const int named_iofds[static 3],
+ const char *ident,
+ uid_t uid,
+ gid_t gid,
+ dev_t *journal_stream_dev,
+ ino_t *journal_stream_ino) {
+
+ ExecOutput o;
+ ExecInput i;
+ int r;
+
+ assert(unit);
+ assert(context);
+ assert(params);
+ assert(ident);
+ assert(journal_stream_dev);
+ assert(journal_stream_ino);
+
+ if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
+
+ if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
+ return -errno;
+
+ return STDOUT_FILENO;
+ }
+
+ if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
+ if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
+ return -errno;
+
+ return STDERR_FILENO;
+ }
+
+ i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
+ o = fixup_output(context->std_output, socket_fd);
+
+ if (fileno == STDERR_FILENO) {
+ ExecOutput e;
+ e = fixup_output(context->std_error, socket_fd);
+
+ /* This expects the input and output are already set up */
+
+ /* Don't change the stderr file descriptor if we inherit all
+ * the way and are not on a tty */
+ if (e == EXEC_OUTPUT_INHERIT &&
+ o == EXEC_OUTPUT_INHERIT &&
+ i == EXEC_INPUT_NULL &&
+ !is_terminal_input(context->std_input) &&
+ getppid () != 1)
+ return fileno;
+
+ /* Duplicate from stdout if possible */
+ if (can_inherit_stderr_from_stdout(context, o, e))
+ return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
+
+ o = e;
+
+ } else if (o == EXEC_OUTPUT_INHERIT) {
+ /* If input got downgraded, inherit the original value */
+ if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
+ return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
+
+ /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
+ if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
+ return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
+
+ /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
+ if (getppid() != 1)
+ return fileno;
+
+ /* We need to open /dev/null here anew, to get the right access mode. */
+ return open_null_as(O_WRONLY, fileno);
+ }
+
+ switch (o) {
+
+ case EXEC_OUTPUT_NULL:
+ return open_null_as(O_WRONLY, fileno);
+
+ case EXEC_OUTPUT_TTY:
+ if (is_terminal_input(i))
+ return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
+
+ /* We don't reset the terminal if this is just about output */
+ return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
+
+ case EXEC_OUTPUT_KMSG:
+ case EXEC_OUTPUT_KMSG_AND_CONSOLE:
+ case EXEC_OUTPUT_JOURNAL:
+ case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
+ r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
+ if (r < 0) {
+ log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
+ r = open_null_as(O_WRONLY, fileno);
+ } else {
+ struct stat st;
+
+ /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
+ * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
+ * services to detect whether they are connected to the journal or not.
+ *
+ * If both stdout and stderr are connected to a stream then let's make sure to store the data
+ * about STDERR as that's usually the best way to do logging. */
+
+ if (fstat(fileno, &st) >= 0 &&
+ (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
+ *journal_stream_dev = st.st_dev;
+ *journal_stream_ino = st.st_ino;
+ }
+ }
+ return r;
+
+ case EXEC_OUTPUT_SOCKET:
+ assert(socket_fd >= 0);
+
+ return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
+
+ case EXEC_OUTPUT_NAMED_FD:
+ assert(named_iofds[fileno] >= 0);
+
+ (void) fd_nonblock(named_iofds[fileno], false);
+ return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
+
+ case EXEC_OUTPUT_FILE:
+ case EXEC_OUTPUT_FILE_APPEND: {
+ bool rw;
+ int fd, flags;
+
+ assert(context->stdio_file[fileno]);
+
+ rw = context->std_input == EXEC_INPUT_FILE &&
+ streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
+
+ if (rw)
+ return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
+
+ flags = O_WRONLY;
+ if (o == EXEC_OUTPUT_FILE_APPEND)
+ flags |= O_APPEND;
+
+ fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
+ if (fd < 0)
+ return fd;
+
+ return move_fd(fd, fileno, 0);
+ }
+
+ default:
+ assert_not_reached("Unknown error type");
+ }
+}
+
+static int chown_terminal(int fd, uid_t uid) {
+ int r;
+
+ assert(fd >= 0);
+
+ /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
+ if (isatty(fd) < 1) {
+ if (IN_SET(errno, EINVAL, ENOTTY))
+ return 0; /* not a tty */
+
+ return -errno;
+ }
+
+ /* This might fail. What matters are the results. */
+ r = fchmod_and_chown(fd, TTY_MODE, uid, -1);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
+ _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
+ int r;
+
+ assert(_saved_stdin);
+ assert(_saved_stdout);
+
+ saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
+ if (saved_stdin < 0)
+ return -errno;
+
+ saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
+ if (saved_stdout < 0)
+ return -errno;
+
+ fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
+ if (fd < 0)
+ return fd;
+
+ r = chown_terminal(fd, getuid());
+ if (r < 0)
+ return r;
+
+ r = reset_terminal_fd(fd, true);
+ if (r < 0)
+ return r;
+
+ r = rearrange_stdio(fd, fd, STDERR_FILENO);
+ fd = -1;
+ if (r < 0)
+ return r;
+
+ *_saved_stdin = saved_stdin;
+ *_saved_stdout = saved_stdout;
+
+ saved_stdin = saved_stdout = -1;
+
+ return 0;
+}
+
+static void write_confirm_error_fd(int err, int fd, const Unit *u) {
+ assert(err < 0);
+
+ if (err == -ETIMEDOUT)
+ dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
+ else {
+ errno = -err;
+ dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
+ }
+}
+
+static void write_confirm_error(int err, const char *vc, const Unit *u) {
+ _cleanup_close_ int fd = -1;
+
+ assert(vc);
+
+ fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return;
+
+ write_confirm_error_fd(err, fd, u);
+}
+
+static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
+ int r = 0;
+
+ assert(saved_stdin);
+ assert(saved_stdout);
+
+ release_terminal();
+
+ if (*saved_stdin >= 0)
+ if (dup2(*saved_stdin, STDIN_FILENO) < 0)
+ r = -errno;
+
+ if (*saved_stdout >= 0)
+ if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
+ r = -errno;
+
+ *saved_stdin = safe_close(*saved_stdin);
+ *saved_stdout = safe_close(*saved_stdout);
+
+ return r;
+}
+
+enum {
+ CONFIRM_PRETEND_FAILURE = -1,
+ CONFIRM_PRETEND_SUCCESS = 0,
+ CONFIRM_EXECUTE = 1,
+};
+
+static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
+ int saved_stdout = -1, saved_stdin = -1, r;
+ _cleanup_free_ char *e = NULL;
+ char c;
+
+ /* For any internal errors, assume a positive response. */
+ r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
+ if (r < 0) {
+ write_confirm_error(r, vc, u);
+ return CONFIRM_EXECUTE;
+ }
+
+ /* confirm_spawn might have been disabled while we were sleeping. */
+ if (manager_is_confirm_spawn_disabled(u->manager)) {
+ r = 1;
+ goto restore_stdio;
+ }
+
+ e = ellipsize(cmdline, 60, 100);
+ if (!e) {
+ log_oom();
+ r = CONFIRM_EXECUTE;
+ goto restore_stdio;
+ }
+
+ for (;;) {
+ r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
+ if (r < 0) {
+ write_confirm_error_fd(r, STDOUT_FILENO, u);
+ r = CONFIRM_EXECUTE;
+ goto restore_stdio;
+ }
+
+ switch (c) {
+ case 'c':
+ printf("Resuming normal execution.\n");
+ manager_disable_confirm_spawn();
+ r = 1;
+ break;
+ case 'D':
+ unit_dump(u, stdout, " ");
+ continue; /* ask again */
+ case 'f':
+ printf("Failing execution.\n");
+ r = CONFIRM_PRETEND_FAILURE;
+ break;
+ case 'h':
+ printf(" c - continue, proceed without asking anymore\n"
+ " D - dump, show the state of the unit\n"
+ " f - fail, don't execute the command and pretend it failed\n"
+ " h - help\n"
+ " i - info, show a short summary of the unit\n"
+ " j - jobs, show jobs that are in progress\n"
+ " s - skip, don't execute the command and pretend it succeeded\n"
+ " y - yes, execute the command\n");
+ continue; /* ask again */
+ case 'i':
+ printf(" Description: %s\n"
+ " Unit: %s\n"
+ " Command: %s\n",
+ u->id, u->description, cmdline);
+ continue; /* ask again */
+ case 'j':
+ manager_dump_jobs(u->manager, stdout, " ");
+ continue; /* ask again */
+ case 'n':
+ /* 'n' was removed in favor of 'f'. */
+ printf("Didn't understand 'n', did you mean 'f'?\n");
+ continue; /* ask again */
+ case 's':
+ printf("Skipping execution.\n");
+ r = CONFIRM_PRETEND_SUCCESS;
+ break;
+ case 'y':
+ r = CONFIRM_EXECUTE;
+ break;
+ default:
+ assert_not_reached("Unhandled choice");
+ }
+ break;
+ }
+
+restore_stdio:
+ restore_confirm_stdio(&saved_stdin, &saved_stdout);
+ return r;
+}
+
+static int get_fixed_user(const ExecContext *c, const char **user,
+ uid_t *uid, gid_t *gid,
+ const char **home, const char **shell) {
+ int r;
+ const char *name;
+
+ assert(c);
+
+ if (!c->user)
+ return 0;
+
+ /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
+ * (i.e. are "/" or "/bin/nologin"). */
+
+ name = c->user;
+ r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN);
+ if (r < 0)
+ return r;
+
+ *user = name;
+ return 0;
+}
+
+static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
+ int r;
+ const char *name;
+
+ assert(c);
+
+ if (!c->group)
+ return 0;
+
+ name = c->group;
+ r = get_group_creds(&name, gid, 0);
+ if (r < 0)
+ return r;
+
+ *group = name;
+ return 0;
+}
+
+static int get_supplementary_groups(const ExecContext *c, const char *user,
+ const char *group, gid_t gid,
+ gid_t **supplementary_gids, int *ngids) {
+ char **i;
+ int r, k = 0;
+ int ngroups_max;
+ bool keep_groups = false;
+ gid_t *groups = NULL;
+ _cleanup_free_ gid_t *l_gids = NULL;
+
+ assert(c);
+
+ /*
+ * If user is given, then lookup GID and supplementary groups list.
+ * We avoid NSS lookups for gid=0. Also we have to initialize groups
+ * here and as early as possible so we keep the list of supplementary
+ * groups of the caller.
+ */
+ if (user && gid_is_valid(gid) && gid != 0) {
+ /* First step, initialize groups from /etc/groups */
+ if (initgroups(user, gid) < 0)
+ return -errno;
+
+ keep_groups = true;
+ }
+
+ if (strv_isempty(c->supplementary_groups))
+ return 0;
+
+ /*
+ * If SupplementaryGroups= was passed then NGROUPS_MAX has to
+ * be positive, otherwise fail.
+ */
+ errno = 0;
+ ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
+ if (ngroups_max <= 0)
+ return errno_or_else(EOPNOTSUPP);
+
+ l_gids = new(gid_t, ngroups_max);
+ if (!l_gids)
+ return -ENOMEM;
+
+ if (keep_groups) {
+ /*
+ * Lookup the list of groups that the user belongs to, we
+ * avoid NSS lookups here too for gid=0.
+ */
+ k = ngroups_max;
+ if (getgrouplist(user, gid, l_gids, &k) < 0)
+ return -EINVAL;
+ } else
+ k = 0;
+
+ STRV_FOREACH(i, c->supplementary_groups) {
+ const char *g;
+
+ if (k >= ngroups_max)
+ return -E2BIG;
+
+ g = *i;
+ r = get_group_creds(&g, l_gids+k, 0);
+ if (r < 0)
+ return r;
+
+ k++;
+ }
+
+ /*
+ * Sets ngids to zero to drop all supplementary groups, happens
+ * when we are under root and SupplementaryGroups= is empty.
+ */
+ if (k == 0) {
+ *ngids = 0;
+ return 0;
+ }
+
+ /* Otherwise get the final list of supplementary groups */
+ groups = memdup(l_gids, sizeof(gid_t) * k);
+ if (!groups)
+ return -ENOMEM;
+
+ *supplementary_gids = groups;
+ *ngids = k;
+
+ groups = NULL;
+
+ return 0;
+}
+
+static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
+ int r;
+
+ /* Handle SupplementaryGroups= if it is not empty */
+ if (ngids > 0) {
+ r = maybe_setgroups(ngids, supplementary_gids);
+ if (r < 0)
+ return r;
+ }
+
+ if (gid_is_valid(gid)) {
+ /* Then set our gids */
+ if (setresgid(gid, gid, gid) < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int set_securebits(int bits, int mask) {
+ int current, applied;
+ current = prctl(PR_GET_SECUREBITS);
+ if (current < 0)
+ return -errno;
+ /* Clear all securebits defined in mask and set bits */
+ applied = (current & ~mask) | bits;
+ if (current == applied)
+ return 0;
+ if (prctl(PR_SET_SECUREBITS, applied) < 0)
+ return -errno;
+ return 1;
+}
+
+static int enforce_user(const ExecContext *context, uid_t uid) {
+ assert(context);
+ int r;
+
+ if (!uid_is_valid(uid))
+ return 0;
+
+ /* Sets (but doesn't look up) the uid and make sure we keep the
+ * capabilities while doing so. For setting secure bits the capability CAP_SETPCAP is
+ * required, so we also need keep-caps in this case.
+ */
+
+ if (context->capability_ambient_set != 0 || context->secure_bits != 0) {
+
+ /* First step: If we need to keep capabilities but
+ * drop privileges we need to make sure we keep our
+ * caps, while we drop privileges. */
+ if (uid != 0) {
+ /* Add KEEP_CAPS to the securebits */
+ r = set_securebits(1<<SECURE_KEEP_CAPS, 0);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* Second step: actually set the uids */
+ if (setresuid(uid, uid, uid) < 0)
+ return -errno;
+
+ /* At this point we should have all necessary capabilities but
+ are otherwise a normal user. However, the caps might got
+ corrupted due to the setresuid() so we need clean them up
+ later. This is done outside of this call. */
+
+ return 0;
+}
+
+#if HAVE_PAM
+
+static int null_conv(
+ int num_msg,
+ const struct pam_message **msg,
+ struct pam_response **resp,
+ void *appdata_ptr) {
+
+ /* We don't support conversations */
+
+ return PAM_CONV_ERR;
+}
+
+#endif
+
+static int setup_pam(
+ const char *name,
+ const char *user,
+ uid_t uid,
+ gid_t gid,
+ const char *tty,
+ char ***env,
+ const int fds[], size_t n_fds) {
+
+#if HAVE_PAM
+
+ static const struct pam_conv conv = {
+ .conv = null_conv,
+ .appdata_ptr = NULL
+ };
+
+ _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
+ pam_handle_t *handle = NULL;
+ sigset_t old_ss;
+ int pam_code = PAM_SUCCESS, r;
+ char **nv, **e = NULL;
+ bool close_session = false;
+ pid_t pam_pid = 0, parent_pid;
+ int flags = 0;
+
+ assert(name);
+ assert(user);
+ assert(env);
+
+ /* We set up PAM in the parent process, then fork. The child
+ * will then stay around until killed via PR_GET_PDEATHSIG or
+ * systemd via the cgroup logic. It will then remove the PAM
+ * session again. The parent process will exec() the actual
+ * daemon. We do things this way to ensure that the main PID
+ * of the daemon is the one we initially fork()ed. */
+
+ r = barrier_create(&barrier);
+ if (r < 0)
+ goto fail;
+
+ if (log_get_max_level() < LOG_DEBUG)
+ flags |= PAM_SILENT;
+
+ pam_code = pam_start(name, user, &conv, &handle);
+ if (pam_code != PAM_SUCCESS) {
+ handle = NULL;
+ goto fail;
+ }
+
+ if (!tty) {
+ _cleanup_free_ char *q = NULL;
+
+ /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
+ * out if that's the case, and read the TTY off it. */
+
+ if (getttyname_malloc(STDIN_FILENO, &q) >= 0)
+ tty = strjoina("/dev/", q);
+ }
+
+ if (tty) {
+ pam_code = pam_set_item(handle, PAM_TTY, tty);
+ if (pam_code != PAM_SUCCESS)
+ goto fail;
+ }
+
+ STRV_FOREACH(nv, *env) {
+ pam_code = pam_putenv(handle, *nv);
+ if (pam_code != PAM_SUCCESS)
+ goto fail;
+ }
+
+ pam_code = pam_acct_mgmt(handle, flags);
+ if (pam_code != PAM_SUCCESS)
+ goto fail;
+
+ pam_code = pam_setcred(handle, PAM_ESTABLISH_CRED | flags);
+ if (pam_code != PAM_SUCCESS)
+ log_debug("pam_setcred() failed, ignoring: %s", pam_strerror(handle, pam_code));
+
+ pam_code = pam_open_session(handle, flags);
+ if (pam_code != PAM_SUCCESS)
+ goto fail;
+
+ close_session = true;
+
+ e = pam_getenvlist(handle);
+ if (!e) {
+ pam_code = PAM_BUF_ERR;
+ goto fail;
+ }
+
+ /* Block SIGTERM, so that we know that it won't get lost in
+ * the child */
+
+ assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
+
+ parent_pid = getpid_cached();
+
+ r = safe_fork("(sd-pam)", 0, &pam_pid);
+ if (r < 0)
+ goto fail;
+ if (r == 0) {
+ int sig, ret = EXIT_PAM;
+
+ /* The child's job is to reset the PAM session on
+ * termination */
+ barrier_set_role(&barrier, BARRIER_CHILD);
+
+ /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
+ * are open here that have been opened by PAM. */
+ (void) close_many(fds, n_fds);
+
+ /* Drop privileges - we don't need any to pam_close_session
+ * and this will make PR_SET_PDEATHSIG work in most cases.
+ * If this fails, ignore the error - but expect sd-pam threads
+ * to fail to exit normally */
+
+ r = maybe_setgroups(0, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
+ if (setresgid(gid, gid, gid) < 0)
+ log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
+ if (setresuid(uid, uid, uid) < 0)
+ log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
+
+ (void) ignore_signals(SIGPIPE, -1);
+
+ /* Wait until our parent died. This will only work if
+ * the above setresuid() succeeds, otherwise the kernel
+ * will not allow unprivileged parents kill their privileged
+ * children this way. We rely on the control groups kill logic
+ * to do the rest for us. */
+ if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
+ goto child_finish;
+
+ /* Tell the parent that our setup is done. This is especially
+ * important regarding dropping privileges. Otherwise, unit
+ * setup might race against our setresuid(2) call.
+ *
+ * If the parent aborted, we'll detect this below, hence ignore
+ * return failure here. */
+ (void) barrier_place(&barrier);
+
+ /* Check if our parent process might already have died? */
+ if (getppid() == parent_pid) {
+ sigset_t ss;
+
+ assert_se(sigemptyset(&ss) >= 0);
+ assert_se(sigaddset(&ss, SIGTERM) >= 0);
+
+ for (;;) {
+ if (sigwait(&ss, &sig) < 0) {
+ if (errno == EINTR)
+ continue;
+
+ goto child_finish;
+ }
+
+ assert(sig == SIGTERM);
+ break;
+ }
+ }
+
+ pam_code = pam_setcred(handle, PAM_DELETE_CRED | flags);
+ if (pam_code != PAM_SUCCESS)
+ goto child_finish;
+
+ /* If our parent died we'll end the session */
+ if (getppid() != parent_pid) {
+ pam_code = pam_close_session(handle, flags);
+ if (pam_code != PAM_SUCCESS)
+ goto child_finish;
+ }
+
+ ret = 0;
+
+ child_finish:
+ pam_end(handle, pam_code | flags);
+ _exit(ret);
+ }
+
+ barrier_set_role(&barrier, BARRIER_PARENT);
+
+ /* If the child was forked off successfully it will do all the
+ * cleanups, so forget about the handle here. */
+ handle = NULL;
+
+ /* Unblock SIGTERM again in the parent */
+ assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
+
+ /* We close the log explicitly here, since the PAM modules
+ * might have opened it, but we don't want this fd around. */
+ closelog();
+
+ /* Synchronously wait for the child to initialize. We don't care for
+ * errors as we cannot recover. However, warn loudly if it happens. */
+ if (!barrier_place_and_sync(&barrier))
+ log_error("PAM initialization failed");
+
+ return strv_free_and_replace(*env, e);
+
+fail:
+ if (pam_code != PAM_SUCCESS) {
+ log_error("PAM failed: %s", pam_strerror(handle, pam_code));
+ r = -EPERM; /* PAM errors do not map to errno */
+ } else
+ log_error_errno(r, "PAM failed: %m");
+
+ if (handle) {
+ if (close_session)
+ pam_code = pam_close_session(handle, flags);
+
+ pam_end(handle, pam_code | flags);
+ }
+
+ strv_free(e);
+ closelog();
+
+ return r;
+#else
+ return 0;
+#endif
+}
+
+static void rename_process_from_path(const char *path) {
+ char process_name[11];
+ const char *p;
+ size_t l;
+
+ /* This resulting string must fit in 10 chars (i.e. the length
+ * of "/sbin/init") to look pretty in /bin/ps */
+
+ p = basename(path);
+ if (isempty(p)) {
+ rename_process("(...)");
+ return;
+ }
+
+ l = strlen(p);
+ if (l > 8) {
+ /* The end of the process name is usually more
+ * interesting, since the first bit might just be
+ * "systemd-" */
+ p = p + l - 8;
+ l = 8;
+ }
+
+ process_name[0] = '(';
+ memcpy(process_name+1, p, l);
+ process_name[1+l] = ')';
+ process_name[1+l+1] = 0;
+
+ rename_process(process_name);
+}
+
+static bool context_has_address_families(const ExecContext *c) {
+ assert(c);
+
+ return c->address_families_allow_list ||
+ !set_isempty(c->address_families);
+}
+
+static bool context_has_syscall_filters(const ExecContext *c) {
+ assert(c);
+
+ return c->syscall_allow_list ||
+ !hashmap_isempty(c->syscall_filter);
+}
+
+static bool context_has_syscall_logs(const ExecContext *c) {
+ assert(c);
+
+ return c->syscall_log_allow_list ||
+ !hashmap_isempty(c->syscall_log);
+}
+
+static bool context_has_no_new_privileges(const ExecContext *c) {
+ assert(c);
+
+ if (c->no_new_privileges)
+ return true;
+
+ if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
+ return false;
+
+ /* We need NNP if we have any form of seccomp and are unprivileged */
+ return context_has_address_families(c) ||
+ c->memory_deny_write_execute ||
+ c->restrict_realtime ||
+ c->restrict_suid_sgid ||
+ exec_context_restrict_namespaces_set(c) ||
+ c->protect_clock ||
+ c->protect_kernel_tunables ||
+ c->protect_kernel_modules ||
+ c->protect_kernel_logs ||
+ c->private_devices ||
+ context_has_syscall_filters(c) ||
+ context_has_syscall_logs(c) ||
+ !set_isempty(c->syscall_archs) ||
+ c->lock_personality ||
+ c->protect_hostname;
+}
+
+static bool exec_context_has_credentials(const ExecContext *context) {
+
+ assert(context);
+
+ return !hashmap_isempty(context->set_credentials) ||
+ context->load_credentials;
+}
+
+#if HAVE_SECCOMP
+
+static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
+
+ if (is_seccomp_available())
+ return false;
+
+ log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
+ return true;
+}
+
+static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
+ uint32_t negative_action, default_action, action;
+ int r;
+
+ assert(u);
+ assert(c);
+
+ if (!context_has_syscall_filters(c))
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "SystemCallFilter="))
+ return 0;
+
+ negative_action = c->syscall_errno == SECCOMP_ERROR_NUMBER_KILL ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
+
+ if (c->syscall_allow_list) {
+ default_action = negative_action;
+ action = SCMP_ACT_ALLOW;
+ } else {
+ default_action = SCMP_ACT_ALLOW;
+ action = negative_action;
+ }
+
+ if (needs_ambient_hack) {
+ r = seccomp_filter_set_add(c->syscall_filter, c->syscall_allow_list, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
+ if (r < 0)
+ return r;
+ }
+
+ return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
+}
+
+static int apply_syscall_log(const Unit* u, const ExecContext *c) {
+#ifdef SCMP_ACT_LOG
+ uint32_t default_action, action;
+#endif
+
+ assert(u);
+ assert(c);
+
+ if (!context_has_syscall_logs(c))
+ return 0;
+
+#ifdef SCMP_ACT_LOG
+ if (skip_seccomp_unavailable(u, "SystemCallLog="))
+ return 0;
+
+ if (c->syscall_log_allow_list) {
+ /* Log nothing but the ones listed */
+ default_action = SCMP_ACT_ALLOW;
+ action = SCMP_ACT_LOG;
+ } else {
+ /* Log everything but the ones listed */
+ default_action = SCMP_ACT_LOG;
+ action = SCMP_ACT_ALLOW;
+ }
+
+ return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_log, action, false);
+#else
+ /* old libseccomp */
+ log_unit_debug(u, "SECCOMP feature SCMP_ACT_LOG not available, skipping SystemCallLog=");
+ return 0;
+#endif
+}
+
+static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ if (set_isempty(c->syscall_archs))
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
+ return 0;
+
+ return seccomp_restrict_archs(c->syscall_archs);
+}
+
+static int apply_address_families(const Unit* u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ if (!context_has_address_families(c))
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
+ return 0;
+
+ return seccomp_restrict_address_families(c->address_families, c->address_families_allow_list);
+}
+
+static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ if (!c->memory_deny_write_execute)
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
+ return 0;
+
+ return seccomp_memory_deny_write_execute();
+}
+
+static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ if (!c->restrict_realtime)
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "RestrictRealtime="))
+ return 0;
+
+ return seccomp_restrict_realtime();
+}
+
+static int apply_restrict_suid_sgid(const Unit* u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ if (!c->restrict_suid_sgid)
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "RestrictSUIDSGID="))
+ return 0;
+
+ return seccomp_restrict_suid_sgid();
+}
+
+static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
+ * let's protect even those systems where this is left on in the kernel. */
+
+ if (!c->protect_kernel_tunables)
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
+ return 0;
+
+ return seccomp_protect_sysctl();
+}
+
+static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ /* Turn off module syscalls on ProtectKernelModules=yes */
+
+ if (!c->protect_kernel_modules)
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
+ return 0;
+
+ return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false);
+}
+
+static int apply_protect_kernel_logs(const Unit *u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ if (!c->protect_kernel_logs)
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "ProtectKernelLogs="))
+ return 0;
+
+ return seccomp_protect_syslog();
+}
+
+static int apply_protect_clock(const Unit *u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ if (!c->protect_clock)
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "ProtectClock="))
+ return 0;
+
+ return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_CLOCK, SCMP_ACT_ERRNO(EPERM), false);
+}
+
+static int apply_private_devices(const Unit *u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
+
+ if (!c->private_devices)
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "PrivateDevices="))
+ return 0;
+
+ return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false);
+}
+
+static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
+ assert(u);
+ assert(c);
+
+ if (!exec_context_restrict_namespaces_set(c))
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
+ return 0;
+
+ return seccomp_restrict_namespaces(c->restrict_namespaces);
+}
+
+static int apply_lock_personality(const Unit* u, const ExecContext *c) {
+ unsigned long personality;
+ int r;
+
+ assert(u);
+ assert(c);
+
+ if (!c->lock_personality)
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "LockPersonality="))
+ return 0;
+
+ personality = c->personality;
+
+ /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
+ if (personality == PERSONALITY_INVALID) {
+
+ r = opinionated_personality(&personality);
+ if (r < 0)
+ return r;
+ }
+
+ return seccomp_lock_personality(personality);
+}
+
+#endif
+
+static int apply_protect_hostname(const Unit *u, const ExecContext *c, int *ret_exit_status) {
+ assert(u);
+ assert(c);
+
+ if (!c->protect_hostname)
+ return 0;
+
+ if (ns_type_supported(NAMESPACE_UTS)) {
+ if (unshare(CLONE_NEWUTS) < 0) {
+ if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) {
+ *ret_exit_status = EXIT_NAMESPACE;
+ return log_unit_error_errno(u, errno, "Failed to set up UTS namespacing: %m");
+ }
+
+ log_unit_warning(u, "ProtectHostname=yes is configured, but UTS namespace setup is prohibited (container manager?), ignoring namespace setup.");
+ }
+ } else
+ log_unit_warning(u, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
+
+#if HAVE_SECCOMP
+ int r;
+
+ if (skip_seccomp_unavailable(u, "ProtectHostname="))
+ return 0;
+
+ r = seccomp_protect_hostname();
+ if (r < 0) {
+ *ret_exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(u, r, "Failed to apply hostname restrictions: %m");
+ }
+#endif
+
+ return 0;
+}
+
+static void do_idle_pipe_dance(int idle_pipe[static 4]) {
+ assert(idle_pipe);
+
+ idle_pipe[1] = safe_close(idle_pipe[1]);
+ idle_pipe[2] = safe_close(idle_pipe[2]);
+
+ if (idle_pipe[0] >= 0) {
+ int r;
+
+ r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
+
+ if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
+ ssize_t n;
+
+ /* Signal systemd that we are bored and want to continue. */
+ n = write(idle_pipe[3], "x", 1);
+ if (n > 0)
+ /* Wait for systemd to react to the signal above. */
+ (void) fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
+ }
+
+ idle_pipe[0] = safe_close(idle_pipe[0]);
+
+ }
+
+ idle_pipe[3] = safe_close(idle_pipe[3]);
+}
+
+static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
+
+static int build_environment(
+ const Unit *u,
+ const ExecContext *c,
+ const ExecParameters *p,
+ size_t n_fds,
+ const char *home,
+ const char *username,
+ const char *shell,
+ dev_t journal_stream_dev,
+ ino_t journal_stream_ino,
+ char ***ret) {
+
+ _cleanup_strv_free_ char **our_env = NULL;
+ size_t n_env = 0;
+ char *x;
+
+ assert(u);
+ assert(c);
+ assert(p);
+ assert(ret);
+
+#define N_ENV_VARS 16
+ our_env = new0(char*, N_ENV_VARS + _EXEC_DIRECTORY_TYPE_MAX);
+ if (!our_env)
+ return -ENOMEM;
+
+ if (n_fds > 0) {
+ _cleanup_free_ char *joined = NULL;
+
+ if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+
+ if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+
+ joined = strv_join(p->fd_names, ":");
+ if (!joined)
+ return -ENOMEM;
+
+ x = strjoin("LISTEN_FDNAMES=", joined);
+ if (!x)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+ }
+
+ if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
+ if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+
+ if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+ }
+
+ /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
+ * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
+ * check the database directly. */
+ if (p->flags & EXEC_NSS_BYPASS_BUS) {
+ x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
+ if (!x)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+ }
+
+ if (home) {
+ x = strjoin("HOME=", home);
+ if (!x)
+ return -ENOMEM;
+
+ path_simplify(x + 5, true);
+ our_env[n_env++] = x;
+ }
+
+ if (username) {
+ x = strjoin("LOGNAME=", username);
+ if (!x)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+
+ x = strjoin("USER=", username);
+ if (!x)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+ }
+
+ if (shell) {
+ x = strjoin("SHELL=", shell);
+ if (!x)
+ return -ENOMEM;
+
+ path_simplify(x + 6, true);
+ our_env[n_env++] = x;
+ }
+
+ if (!sd_id128_is_null(u->invocation_id)) {
+ if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
+ return -ENOMEM;
+
+ our_env[n_env++] = x;
+ }
+
+ if (exec_context_needs_term(c)) {
+ const char *tty_path, *term = NULL;
+
+ tty_path = exec_context_tty_path(c);
+
+ /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try
+ * to inherit the $TERM set for PID 1. This is useful for containers so that the $TERM the
+ * container manager passes to PID 1 ends up all the way in the console login shown. */
+
+ if (path_equal_ptr(tty_path, "/dev/console") && getppid() == 1)
+ term = getenv("TERM");
+
+ if (!term)
+ term = default_term_for_tty(tty_path);
+
+ x = strjoin("TERM=", term);
+ if (!x)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+ }
+
+ if (journal_stream_dev != 0 && journal_stream_ino != 0) {
+ if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
+ return -ENOMEM;
+
+ our_env[n_env++] = x;
+ }
+
+ if (c->log_namespace) {
+ x = strjoin("LOG_NAMESPACE=", c->log_namespace);
+ if (!x)
+ return -ENOMEM;
+
+ our_env[n_env++] = x;
+ }
+
+ for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
+ _cleanup_free_ char *pre = NULL, *joined = NULL;
+ const char *n;
+
+ if (!p->prefix[t])
+ continue;
+
+ if (strv_isempty(c->directories[t].paths))
+ continue;
+
+ n = exec_directory_env_name_to_string(t);
+ if (!n)
+ continue;
+
+ pre = strjoin(p->prefix[t], "/");
+ if (!pre)
+ return -ENOMEM;
+
+ joined = strv_join_full(c->directories[t].paths, ":", pre, true);
+ if (!joined)
+ return -ENOMEM;
+
+ x = strjoin(n, "=", joined);
+ if (!x)
+ return -ENOMEM;
+
+ our_env[n_env++] = x;
+ }
+
+ if (exec_context_has_credentials(c) && p->prefix[EXEC_DIRECTORY_RUNTIME]) {
+ x = strjoin("CREDENTIALS_DIRECTORY=", p->prefix[EXEC_DIRECTORY_RUNTIME], "/credentials/", u->id);
+ if (!x)
+ return -ENOMEM;
+
+ our_env[n_env++] = x;
+ }
+
+ our_env[n_env++] = NULL;
+ assert(n_env <= N_ENV_VARS + _EXEC_DIRECTORY_TYPE_MAX);
+#undef N_ENV_VARS
+
+ *ret = TAKE_PTR(our_env);
+
+ return 0;
+}
+
+static int build_pass_environment(const ExecContext *c, char ***ret) {
+ _cleanup_strv_free_ char **pass_env = NULL;
+ size_t n_env = 0, n_bufsize = 0;
+ char **i;
+
+ STRV_FOREACH(i, c->pass_environment) {
+ _cleanup_free_ char *x = NULL;
+ char *v;
+
+ v = getenv(*i);
+ if (!v)
+ continue;
+ x = strjoin(*i, "=", v);
+ if (!x)
+ return -ENOMEM;
+
+ if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
+ return -ENOMEM;
+
+ pass_env[n_env++] = TAKE_PTR(x);
+ pass_env[n_env] = NULL;
+ }
+
+ *ret = TAKE_PTR(pass_env);
+
+ return 0;
+}
+
+static bool exec_needs_mount_namespace(
+ const ExecContext *context,
+ const ExecParameters *params,
+ const ExecRuntime *runtime) {
+
+ assert(context);
+ assert(params);
+
+ if (context->root_image)
+ return true;
+
+ if (!strv_isempty(context->read_write_paths) ||
+ !strv_isempty(context->read_only_paths) ||
+ !strv_isempty(context->inaccessible_paths))
+ return true;
+
+ if (context->n_bind_mounts > 0)
+ return true;
+
+ if (context->n_temporary_filesystems > 0)
+ return true;
+
+ if (context->n_mount_images > 0)
+ return true;
+
+ if (!IN_SET(context->mount_flags, 0, MS_SHARED))
+ return true;
+
+ if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
+ return true;
+
+ if (context->private_devices ||
+ context->private_mounts ||
+ context->protect_system != PROTECT_SYSTEM_NO ||
+ context->protect_home != PROTECT_HOME_NO ||
+ context->protect_kernel_tunables ||
+ context->protect_kernel_modules ||
+ context->protect_kernel_logs ||
+ context->protect_control_groups ||
+ context->protect_proc != PROTECT_PROC_DEFAULT ||
+ context->proc_subset != PROC_SUBSET_ALL)
+ return true;
+
+ if (context->root_directory) {
+ if (exec_context_get_effective_mount_apivfs(context))
+ return true;
+
+ for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
+ if (!params->prefix[t])
+ continue;
+
+ if (!strv_isempty(context->directories[t].paths))
+ return true;
+ }
+ }
+
+ if (context->dynamic_user &&
+ (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
+ !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
+ !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
+ return true;
+
+ if (context->log_namespace)
+ return true;
+
+ return false;
+}
+
+static int setup_private_users(uid_t ouid, gid_t ogid, uid_t uid, gid_t gid) {
+ _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
+ _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
+ _cleanup_close_ int unshare_ready_fd = -1;
+ _cleanup_(sigkill_waitp) pid_t pid = 0;
+ uint64_t c = 1;
+ ssize_t n;
+ int r;
+
+ /* Set up a user namespace and map the original UID/GID (IDs from before any user or group changes, i.e.
+ * the IDs from the user or system manager(s)) to itself, the selected UID/GID to itself, and everything else to
+ * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
+ * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
+ * which waits for the parent to create the new user namespace while staying in the original namespace. The
+ * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
+ * continues execution normally.
+ * For unprivileged users (i.e. without capabilities), the root to root mapping is excluded. As such, it
+ * does not need CAP_SETUID to write the single line mapping to itself. */
+
+ /* Can only set up multiple mappings with CAP_SETUID. */
+ if (have_effective_cap(CAP_SETUID) && uid != ouid && uid_is_valid(uid))
+ r = asprintf(&uid_map,
+ UID_FMT " " UID_FMT " 1\n" /* Map $OUID → $OUID */
+ UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
+ ouid, ouid, uid, uid);
+ else
+ r = asprintf(&uid_map,
+ UID_FMT " " UID_FMT " 1\n", /* Map $OUID → $OUID */
+ ouid, ouid);
+
+ if (r < 0)
+ return -ENOMEM;
+
+ /* Can only set up multiple mappings with CAP_SETGID. */
+ if (have_effective_cap(CAP_SETGID) && gid != ogid && gid_is_valid(gid))
+ r = asprintf(&gid_map,
+ GID_FMT " " GID_FMT " 1\n" /* Map $OGID → $OGID */
+ GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
+ ogid, ogid, gid, gid);
+ else
+ r = asprintf(&gid_map,
+ GID_FMT " " GID_FMT " 1\n", /* Map $OGID -> $OGID */
+ ogid, ogid);
+
+ if (r < 0)
+ return -ENOMEM;
+
+ /* Create a communication channel so that the parent can tell the child when it finished creating the user
+ * namespace. */
+ unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
+ if (unshare_ready_fd < 0)
+ return -errno;
+
+ /* Create a communication channel so that the child can tell the parent a proper error code in case it
+ * failed. */
+ if (pipe2(errno_pipe, O_CLOEXEC) < 0)
+ return -errno;
+
+ r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ _cleanup_close_ int fd = -1;
+ const char *a;
+ pid_t ppid;
+
+ /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
+ * here, after the parent opened its own user namespace. */
+
+ ppid = getppid();
+ errno_pipe[0] = safe_close(errno_pipe[0]);
+
+ /* Wait until the parent unshared the user namespace */
+ if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ /* Disable the setgroups() system call in the child user namespace, for good. */
+ a = procfs_file_alloca(ppid, "setgroups");
+ fd = open(a, O_WRONLY|O_CLOEXEC);
+ if (fd < 0) {
+ if (errno != ENOENT) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ /* If the file is missing the kernel is too old, let's continue anyway. */
+ } else {
+ if (write(fd, "deny\n", 5) < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ fd = safe_close(fd);
+ }
+
+ /* First write the GID map */
+ a = procfs_file_alloca(ppid, "gid_map");
+ fd = open(a, O_WRONLY|O_CLOEXEC);
+ if (fd < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+ if (write(fd, gid_map, strlen(gid_map)) < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+ fd = safe_close(fd);
+
+ /* The write the UID map */
+ a = procfs_file_alloca(ppid, "uid_map");
+ fd = open(a, O_WRONLY|O_CLOEXEC);
+ if (fd < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+ if (write(fd, uid_map, strlen(uid_map)) < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ _exit(EXIT_SUCCESS);
+
+ child_fail:
+ (void) write(errno_pipe[1], &r, sizeof(r));
+ _exit(EXIT_FAILURE);
+ }
+
+ errno_pipe[1] = safe_close(errno_pipe[1]);
+
+ if (unshare(CLONE_NEWUSER) < 0)
+ return -errno;
+
+ /* Let the child know that the namespace is ready now */
+ if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
+ return -errno;
+
+ /* Try to read an error code from the child */
+ n = read(errno_pipe[0], &r, sizeof(r));
+ if (n < 0)
+ return -errno;
+ if (n == sizeof(r)) { /* an error code was sent to us */
+ if (r < 0)
+ return r;
+ return -EIO;
+ }
+ if (n != 0) /* on success we should have read 0 bytes */
+ return -EIO;
+
+ r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
+ pid = 0;
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
+ return -EIO;
+
+ return 0;
+}
+
+static bool exec_directory_is_private(const ExecContext *context, ExecDirectoryType type) {
+ if (!context->dynamic_user)
+ return false;
+
+ if (type == EXEC_DIRECTORY_CONFIGURATION)
+ return false;
+
+ if (type == EXEC_DIRECTORY_RUNTIME && context->runtime_directory_preserve_mode == EXEC_PRESERVE_NO)
+ return false;
+
+ return true;
+}
+
+static int setup_exec_directory(
+ const ExecContext *context,
+ const ExecParameters *params,
+ uid_t uid,
+ gid_t gid,
+ ExecDirectoryType type,
+ int *exit_status) {
+
+ static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
+ [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
+ [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
+ [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
+ [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
+ [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
+ };
+ char **rt;
+ int r;
+
+ assert(context);
+ assert(params);
+ assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
+ assert(exit_status);
+
+ if (!params->prefix[type])
+ return 0;
+
+ if (params->flags & EXEC_CHOWN_DIRECTORIES) {
+ if (!uid_is_valid(uid))
+ uid = 0;
+ if (!gid_is_valid(gid))
+ gid = 0;
+ }
+
+ STRV_FOREACH(rt, context->directories[type].paths) {
+ _cleanup_free_ char *p = NULL, *pp = NULL;
+
+ p = path_join(params->prefix[type], *rt);
+ if (!p) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ r = mkdir_parents_label(p, 0755);
+ if (r < 0)
+ goto fail;
+
+ if (exec_directory_is_private(context, type)) {
+ _cleanup_free_ char *private_root = NULL;
+
+ /* So, here's one extra complication when dealing with DynamicUser=1 units. In that
+ * case we want to avoid leaving a directory around fully accessible that is owned by
+ * a dynamic user whose UID is later on reused. To lock this down we use the same
+ * trick used by container managers to prohibit host users to get access to files of
+ * the same UID in containers: we place everything inside a directory that has an
+ * access mode of 0700 and is owned root:root, so that it acts as security boundary
+ * for unprivileged host code. We then use fs namespacing to make this directory
+ * permeable for the service itself.
+ *
+ * Specifically: for a service which wants a special directory "foo/" we first create
+ * a directory "private/" with access mode 0700 owned by root:root. Then we place
+ * "foo" inside of that directory (i.e. "private/foo/"), and make "foo" a symlink to
+ * "private/foo". This way, privileged host users can access "foo/" as usual, but
+ * unprivileged host users can't look into it. Inside of the namespace of the unit
+ * "private/" is replaced by a more liberally accessible tmpfs, into which the host's
+ * "private/foo/" is mounted under the same name, thus disabling the access boundary
+ * for the service and making sure it only gets access to the dirs it needs but no
+ * others. Tricky? Yes, absolutely, but it works!
+ *
+ * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not
+ * to be owned by the service itself.
+ *
+ * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used
+ * for sharing files or sockets with other services. */
+
+ private_root = path_join(params->prefix[type], "private");
+ if (!private_root) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
+ r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ goto fail;
+
+ pp = path_join(private_root, *rt);
+ if (!pp) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ /* Create all directories between the configured directory and this private root, and mark them 0755 */
+ r = mkdir_parents_label(pp, 0755);
+ if (r < 0)
+ goto fail;
+
+ if (is_dir(p, false) > 0 &&
+ (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
+
+ /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
+ * it over. Most likely the service has been upgraded from one that didn't use
+ * DynamicUser=1, to one that does. */
+
+ log_info("Found pre-existing public %s= directory %s, migrating to %s.\n"
+ "Apparently, service previously had DynamicUser= turned off, and has now turned it on.",
+ exec_directory_type_to_string(type), p, pp);
+
+ if (rename(p, pp) < 0) {
+ r = -errno;
+ goto fail;
+ }
+ } else {
+ /* Otherwise, create the actual directory for the service */
+
+ r = mkdir_label(pp, context->directories[type].mode);
+ if (r < 0 && r != -EEXIST)
+ goto fail;
+ }
+
+ /* And link it up from the original place */
+ r = symlink_idempotent(pp, p, true);
+ if (r < 0)
+ goto fail;
+
+ } else {
+ _cleanup_free_ char *target = NULL;
+
+ if (type != EXEC_DIRECTORY_CONFIGURATION &&
+ readlink_and_make_absolute(p, &target) >= 0) {
+ _cleanup_free_ char *q = NULL, *q_resolved = NULL, *target_resolved = NULL;
+
+ /* This already exists and is a symlink? Interesting. Maybe it's one created
+ * by DynamicUser=1 (see above)?
+ *
+ * We do this for all directory types except for ConfigurationDirectory=,
+ * since they all support the private/ symlink logic at least in some
+ * configurations, see above. */
+
+ r = chase_symlinks(target, NULL, 0, &target_resolved, NULL);
+ if (r < 0)
+ goto fail;
+
+ q = path_join(params->prefix[type], "private", *rt);
+ if (!q) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ /* /var/lib or friends may be symlinks. So, let's chase them also. */
+ r = chase_symlinks(q, NULL, CHASE_NONEXISTENT, &q_resolved, NULL);
+ if (r < 0)
+ goto fail;
+
+ if (path_equal(q_resolved, target_resolved)) {
+
+ /* Hmm, apparently DynamicUser= was once turned on for this service,
+ * but is no longer. Let's move the directory back up. */
+
+ log_info("Found pre-existing private %s= directory %s, migrating to %s.\n"
+ "Apparently, service previously had DynamicUser= turned on, and has now turned it off.",
+ exec_directory_type_to_string(type), q, p);
+
+ if (unlink(p) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (rename(q, p) < 0) {
+ r = -errno;
+ goto fail;
+ }
+ }
+ }
+
+ r = mkdir_label(p, context->directories[type].mode);
+ if (r < 0) {
+ if (r != -EEXIST)
+ goto fail;
+
+ if (type == EXEC_DIRECTORY_CONFIGURATION) {
+ struct stat st;
+
+ /* Don't change the owner/access mode of the configuration directory,
+ * as in the common case it is not written to by a service, and shall
+ * not be writable. */
+
+ if (stat(p, &st) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ /* Still complain if the access mode doesn't match */
+ if (((st.st_mode ^ context->directories[type].mode) & 07777) != 0)
+ log_warning("%s \'%s\' already exists but the mode is different. "
+ "(File system: %o %sMode: %o)",
+ exec_directory_type_to_string(type), *rt,
+ st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
+
+ continue;
+ }
+ }
+ }
+
+ /* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
+ * specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
+ * current UID/GID ownership.) */
+ r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
+ if (r < 0)
+ goto fail;
+
+ /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
+ * drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
+ * assignments to exist.*/
+ r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777);
+ if (r < 0)
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ *exit_status = exit_status_table[type];
+ return r;
+}
+
+static int write_credential(
+ int dfd,
+ const char *id,
+ const void *data,
+ size_t size,
+ uid_t uid,
+ bool ownership_ok) {
+
+ _cleanup_(unlink_and_freep) char *tmp = NULL;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ r = tempfn_random_child("", "cred", &tmp);
+ if (r < 0)
+ return r;
+
+ fd = openat(dfd, tmp, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL|O_NOFOLLOW|O_NOCTTY, 0600);
+ if (fd < 0) {
+ tmp = mfree(tmp);
+ return -errno;
+ }
+
+ r = loop_write(fd, data, size, /* do_pool = */ false);
+ if (r < 0)
+ return r;
+
+ if (fchmod(fd, 0400) < 0) /* Take away "w" bit */
+ return -errno;
+
+ if (uid_is_valid(uid) && uid != getuid()) {
+ r = fd_add_uid_acl_permission(fd, uid, ACL_READ);
+ if (r < 0) {
+ if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
+ return r;
+
+ if (!ownership_ok) /* Ideally we use ACLs, since we can neatly express what we want
+ * to express: that the user gets read access and nothing
+ * else. But if the backing fs can't support that (e.g. ramfs)
+ * then we can use file ownership instead. But that's only safe if
+ * we can then re-mount the whole thing read-only, so that the
+ * user can no longer chmod() the file to gain write access. */
+ return r;
+
+ if (fchown(fd, uid, (gid_t) -1) < 0)
+ return -errno;
+ }
+ }
+
+ if (renameat(dfd, tmp, dfd, id) < 0)
+ return -errno;
+
+ tmp = mfree(tmp);
+ return 0;
+}
+
+#define CREDENTIALS_BYTES_MAX (1024LU * 1024LU) /* Refuse to pass more than 1M, after all this is unswappable memory */
+
+static int acquire_credentials(
+ const ExecContext *context,
+ const ExecParameters *params,
+ const char *unit,
+ const char *p,
+ uid_t uid,
+ bool ownership_ok) {
+
+ uint64_t left = CREDENTIALS_BYTES_MAX;
+ _cleanup_close_ int dfd = -1;
+ ExecSetCredential *sc;
+ char **id, **fn;
+ int r;
+
+ assert(context);
+ assert(p);
+
+ dfd = open(p, O_DIRECTORY|O_CLOEXEC);
+ if (dfd < 0)
+ return -errno;
+
+ /* First we use the literally specified credentials. Note that they might be overridden again below,
+ * and thus act as a "default" if the same credential is specified multiple times */
+ HASHMAP_FOREACH(sc, context->set_credentials) {
+ size_t add;
+
+ add = strlen(sc->id) + sc->size;
+ if (add > left)
+ return -E2BIG;
+
+ r = write_credential(dfd, sc->id, sc->data, sc->size, uid, ownership_ok);
+ if (r < 0)
+ return r;
+
+ left -= add;
+ }
+
+ /* Then, load credential off disk (or acquire via AF_UNIX socket) */
+ STRV_FOREACH_PAIR(id, fn, context->load_credentials) {
+ ReadFullFileFlags flags = READ_FULL_FILE_SECURE;
+ _cleanup_(erase_and_freep) char *data = NULL;
+ _cleanup_free_ char *j = NULL, *bindname = NULL;
+ const char *source;
+ size_t size, add;
+
+ if (path_is_absolute(*fn)) {
+ /* If this is an absolute path, read the data directly from it, and support AF_UNIX sockets */
+ source = *fn;
+ flags |= READ_FULL_FILE_CONNECT_SOCKET;
+
+ /* Pass some minimal info about the unit and the credential name we are looking to acquire
+ * via the source socket address in case we read off an AF_UNIX socket. */
+ if (asprintf(&bindname, "@%" PRIx64"/unit/%s/%s", random_u64(), unit, *id) < 0)
+ return -ENOMEM;
+
+ } else if (params->received_credentials) {
+ /* If this is a relative path, take it relative to the credentials we received
+ * ourselves. We don't support the AF_UNIX stuff in this mode, since we are operating
+ * on a credential store, i.e. this is guaranteed to be regular files. */
+ j = path_join(params->received_credentials, *fn);
+ if (!j)
+ return -ENOMEM;
+
+ source = j;
+ } else
+ source = NULL;
+
+
+ if (source)
+ r = read_full_file_full(AT_FDCWD, source, flags, bindname, &data, &size);
+ else
+ r = -ENOENT;
+ if (r == -ENOENT &&
+ faccessat(dfd, *id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0) /* If the source file doesn't exist, but we already acquired the key otherwise, then don't fail */
+ continue;
+ if (r < 0)
+ return r;
+
+ add = strlen(*id) + size;
+ if (add > left)
+ return -E2BIG;
+
+ r = write_credential(dfd, *id, data, size, uid, ownership_ok);
+ if (r < 0)
+ return r;
+
+ left -= add;
+ }
+
+ if (fchmod(dfd, 0500) < 0) /* Now take away the "w" bit */
+ return -errno;
+
+ /* After we created all keys with the right perms, also make sure the credential store as a whole is
+ * accessible */
+
+ if (uid_is_valid(uid) && uid != getuid()) {
+ r = fd_add_uid_acl_permission(dfd, uid, ACL_READ | ACL_EXECUTE);
+ if (r < 0) {
+ if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
+ return r;
+
+ if (!ownership_ok)
+ return r;
+
+ if (fchown(dfd, uid, (gid_t) -1) < 0)
+ return -errno;
+ }
+ }
+
+ return 0;
+}
+
+static int setup_credentials_internal(
+ const ExecContext *context,
+ const ExecParameters *params,
+ const char *unit,
+ const char *final, /* This is where the credential store shall eventually end up at */
+ const char *workspace, /* This is where we can prepare it before moving it to the final place */
+ bool reuse_workspace, /* Whether to reuse any existing workspace mount if it already is a mount */
+ bool must_mount, /* Whether to require that we mount something, it's not OK to use the plain directory fall back */
+ uid_t uid) {
+
+ int r, workspace_mounted; /* negative if we don't know yet whether we have/can mount something; true
+ * if we mounted something; false if we definitely can't mount anything */
+ bool final_mounted;
+ const char *where;
+
+ assert(context);
+ assert(final);
+ assert(workspace);
+
+ if (reuse_workspace) {
+ r = path_is_mount_point(workspace, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ workspace_mounted = true; /* If this is already a mount, and we are supposed to reuse it, let's keep this in mind */
+ else
+ workspace_mounted = -1; /* We need to figure out if we can mount something to the workspace */
+ } else
+ workspace_mounted = -1; /* ditto */
+
+ r = path_is_mount_point(final, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* If the final place already has something mounted, we use that. If the workspace also has
+ * something mounted we assume it's actually the same mount (but with MS_RDONLY
+ * different). */
+ final_mounted = true;
+
+ if (workspace_mounted < 0) {
+ /* If the final place is mounted, but the workspace we isn't, then let's bind mount
+ * the final version to the workspace, and make it writable, so that we can make
+ * changes */
+
+ r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
+ if (r < 0)
+ return r;
+
+ r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL);
+ if (r < 0)
+ return r;
+
+ workspace_mounted = true;
+ }
+ } else
+ final_mounted = false;
+
+ if (workspace_mounted < 0) {
+ /* Nothing is mounted on the workspace yet, let's try to mount something now */
+ for (int try = 0;; try++) {
+
+ if (try == 0) {
+ /* Try "ramfs" first, since it's not swap backed */
+ r = mount_nofollow_verbose(LOG_DEBUG, "ramfs", workspace, "ramfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, "mode=0700");
+ if (r >= 0) {
+ workspace_mounted = true;
+ break;
+ }
+
+ } else if (try == 1) {
+ _cleanup_free_ char *opts = NULL;
+
+ if (asprintf(&opts, "mode=0700,nr_inodes=1024,size=%lu", CREDENTIALS_BYTES_MAX) < 0)
+ return -ENOMEM;
+
+ /* Fall back to "tmpfs" otherwise */
+ r = mount_nofollow_verbose(LOG_DEBUG, "tmpfs", workspace, "tmpfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, opts);
+ if (r >= 0) {
+ workspace_mounted = true;
+ break;
+ }
+
+ } else {
+ /* If that didn't work, try to make a bind mount from the final to the workspace, so that we can make it writable there. */
+ r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
+ if (r < 0) {
+ if (!ERRNO_IS_PRIVILEGE(r)) /* Propagate anything that isn't a permission problem */
+ return r;
+
+ if (must_mount) /* If we it's not OK to use the plain directory
+ * fallback, propagate all errors too */
+ return r;
+
+ /* If we lack privileges to bind mount stuff, then let's gracefully
+ * proceed for compat with container envs, and just use the final dir
+ * as is. */
+
+ workspace_mounted = false;
+ break;
+ }
+
+ /* Make the new bind mount writable (i.e. drop MS_RDONLY) */
+ r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL);
+ if (r < 0)
+ return r;
+
+ workspace_mounted = true;
+ break;
+ }
+ }
+ }
+
+ assert(!must_mount || workspace_mounted > 0);
+ where = workspace_mounted ? workspace : final;
+
+ r = acquire_credentials(context, params, unit, where, uid, workspace_mounted);
+ if (r < 0)
+ return r;
+
+ if (workspace_mounted) {
+ /* Make workspace read-only now, so that any bind mount we make from it defaults to read-only too */
+ r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL);
+ if (r < 0)
+ return r;
+
+ /* And mount it to the final place, read-only */
+ if (final_mounted)
+ r = umount_verbose(LOG_DEBUG, workspace, MNT_DETACH|UMOUNT_NOFOLLOW);
+ else
+ r = mount_nofollow_verbose(LOG_DEBUG, workspace, final, NULL, MS_MOVE, NULL);
+ if (r < 0)
+ return r;
+ } else {
+ _cleanup_free_ char *parent = NULL;
+
+ /* If we do not have our own mount put used the plain directory fallback, then we need to
+ * open access to the top-level credential directory and the per-service directory now */
+
+ parent = dirname_malloc(final);
+ if (!parent)
+ return -ENOMEM;
+ if (chmod(parent, 0755) < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int setup_credentials(
+ const ExecContext *context,
+ const ExecParameters *params,
+ const char *unit,
+ uid_t uid) {
+
+ _cleanup_free_ char *p = NULL, *q = NULL;
+ const char *i;
+ int r;
+
+ assert(context);
+ assert(params);
+
+ if (!exec_context_has_credentials(context))
+ return 0;
+
+ if (!params->prefix[EXEC_DIRECTORY_RUNTIME])
+ return -EINVAL;
+
+ /* This where we'll place stuff when we are done; this main credentials directory is world-readable,
+ * and the subdir we mount over with a read-only file system readable by the service's user */
+ q = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "credentials");
+ if (!q)
+ return -ENOMEM;
+
+ r = mkdir_label(q, 0755); /* top-level dir: world readable/searchable */
+ if (r < 0 && r != -EEXIST)
+ return r;
+
+ p = path_join(q, unit);
+ if (!p)
+ return -ENOMEM;
+
+ r = mkdir_label(p, 0700); /* per-unit dir: private to user */
+ if (r < 0 && r != -EEXIST)
+ return r;
+
+ r = safe_fork("(sd-mkdcreds)", FORK_DEATHSIG|FORK_WAIT|FORK_NEW_MOUNTNS, NULL);
+ if (r < 0) {
+ _cleanup_free_ char *t = NULL, *u = NULL;
+
+ /* If this is not a privilege or support issue then propagate the error */
+ if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
+ return r;
+
+ /* Temporary workspace, that remains inaccessible all the time. We prepare stuff there before moving
+ * it into place, so that users can't access half-initialized credential stores. */
+ t = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "systemd/temporary-credentials");
+ if (!t)
+ return -ENOMEM;
+
+ /* We can't set up a mount namespace. In that case operate on a fixed, inaccessible per-unit
+ * directory outside of /run/credentials/ first, and then move it over to /run/credentials/
+ * after it is fully set up */
+ u = path_join(t, unit);
+ if (!u)
+ return -ENOMEM;
+
+ FOREACH_STRING(i, t, u) {
+ r = mkdir_label(i, 0700);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ r = setup_credentials_internal(
+ context,
+ params,
+ unit,
+ p, /* final mount point */
+ u, /* temporary workspace to overmount */
+ true, /* reuse the workspace if it is already a mount */
+ false, /* it's OK to fall back to a plain directory if we can't mount anything */
+ uid);
+
+ (void) rmdir(u); /* remove the workspace again if we can. */
+
+ if (r < 0)
+ return r;
+
+ } else if (r == 0) {
+
+ /* We managed to set up a mount namespace, and are now in a child. That's great. In this case
+ * we can use the same directory for all cases, after turning off propagation. Question
+ * though is: where do we turn off propagation exactly, and where do we place the workspace
+ * directory? We need some place that is guaranteed to be a mount point in the host, and
+ * which is guaranteed to have a subdir we can mount over. /run/ is not suitable for this,
+ * since we ultimately want to move the resulting file system there, i.e. we need propagation
+ * for /run/ eventually. We could use our own /run/systemd/bind mount on itself, but that
+ * would be visible in the host mount table all the time, which we want to avoid. Hence, what
+ * we do here instead we use /dev/ and /dev/shm/ for our purposes. We know for sure that
+ * /dev/ is a mount point and we now for sure that /dev/shm/ exists. Hence we can turn off
+ * propagation on the former, and then overmount the latter.
+ *
+ * Yes it's nasty playing games with /dev/ and /dev/shm/ like this, since it does not exist
+ * for this purpose, but there are few other candidates that work equally well for us, and
+ * given that the we do this in a privately namespaced short-lived single-threaded process
+ * that no one else sees this should be OK to do.*/
+
+ r = mount_nofollow_verbose(LOG_DEBUG, NULL, "/dev", NULL, MS_SLAVE|MS_REC, NULL); /* Turn off propagation from our namespace to host */
+ if (r < 0)
+ goto child_fail;
+
+ r = setup_credentials_internal(
+ context,
+ params,
+ unit,
+ p, /* final mount point */
+ "/dev/shm", /* temporary workspace to overmount */
+ false, /* do not reuse /dev/shm if it is already a mount, under no circumstances */
+ true, /* insist that something is mounted, do not allow fallback to plain directory */
+ uid);
+ if (r < 0)
+ goto child_fail;
+
+ _exit(EXIT_SUCCESS);
+
+ child_fail:
+ _exit(EXIT_FAILURE);
+ }
+
+ return 0;
+}
+
+#if ENABLE_SMACK
+static int setup_smack(
+ const ExecContext *context,
+ const char *executable) {
+ int r;
+
+ assert(context);
+ assert(executable);
+
+ if (context->smack_process_label) {
+ r = mac_smack_apply_pid(0, context->smack_process_label);
+ if (r < 0)
+ return r;
+ }
+#ifdef SMACK_DEFAULT_PROCESS_LABEL
+ else {
+ _cleanup_free_ char *exec_label = NULL;
+
+ r = mac_smack_read(executable, SMACK_ATTR_EXEC, &exec_label);
+ if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
+ return r;
+
+ r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
+ if (r < 0)
+ return r;
+ }
+#endif
+
+ return 0;
+}
+#endif
+
+static int compile_bind_mounts(
+ const ExecContext *context,
+ const ExecParameters *params,
+ BindMount **ret_bind_mounts,
+ size_t *ret_n_bind_mounts,
+ char ***ret_empty_directories) {
+
+ _cleanup_strv_free_ char **empty_directories = NULL;
+ BindMount *bind_mounts;
+ size_t n, h = 0;
+ int r;
+
+ assert(context);
+ assert(params);
+ assert(ret_bind_mounts);
+ assert(ret_n_bind_mounts);
+ assert(ret_empty_directories);
+
+ n = context->n_bind_mounts;
+ for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
+ if (!params->prefix[t])
+ continue;
+
+ n += strv_length(context->directories[t].paths);
+ }
+
+ if (n <= 0) {
+ *ret_bind_mounts = NULL;
+ *ret_n_bind_mounts = 0;
+ *ret_empty_directories = NULL;
+ return 0;
+ }
+
+ bind_mounts = new(BindMount, n);
+ if (!bind_mounts)
+ return -ENOMEM;
+
+ for (size_t i = 0; i < context->n_bind_mounts; i++) {
+ BindMount *item = context->bind_mounts + i;
+ char *s, *d;
+
+ s = strdup(item->source);
+ if (!s) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ d = strdup(item->destination);
+ if (!d) {
+ free(s);
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ bind_mounts[h++] = (BindMount) {
+ .source = s,
+ .destination = d,
+ .read_only = item->read_only,
+ .recursive = item->recursive,
+ .ignore_enoent = item->ignore_enoent,
+ };
+ }
+
+ for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
+ char **suffix;
+
+ if (!params->prefix[t])
+ continue;
+
+ if (strv_isempty(context->directories[t].paths))
+ continue;
+
+ if (exec_directory_is_private(context, t) &&
+ !exec_context_with_rootfs(context)) {
+ char *private_root;
+
+ /* So this is for a dynamic user, and we need to make sure the process can access its own
+ * directory. For that we overmount the usually inaccessible "private" subdirectory with a
+ * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
+
+ private_root = path_join(params->prefix[t], "private");
+ if (!private_root) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ r = strv_consume(&empty_directories, private_root);
+ if (r < 0)
+ goto finish;
+ }
+
+ STRV_FOREACH(suffix, context->directories[t].paths) {
+ char *s, *d;
+
+ if (exec_directory_is_private(context, t))
+ s = path_join(params->prefix[t], "private", *suffix);
+ else
+ s = path_join(params->prefix[t], *suffix);
+ if (!s) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ if (exec_directory_is_private(context, t) &&
+ exec_context_with_rootfs(context))
+ /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
+ * directory is not created on the root directory. So, let's bind-mount the directory
+ * on the 'non-private' place. */
+ d = path_join(params->prefix[t], *suffix);
+ else
+ d = strdup(s);
+ if (!d) {
+ free(s);
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ bind_mounts[h++] = (BindMount) {
+ .source = s,
+ .destination = d,
+ .read_only = false,
+ .nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
+ .recursive = true,
+ .ignore_enoent = false,
+ };
+ }
+ }
+
+ assert(h == n);
+
+ *ret_bind_mounts = bind_mounts;
+ *ret_n_bind_mounts = n;
+ *ret_empty_directories = TAKE_PTR(empty_directories);
+
+ return (int) n;
+
+finish:
+ bind_mount_free_many(bind_mounts, h);
+ return r;
+}
+
+static bool insist_on_sandboxing(
+ const ExecContext *context,
+ const char *root_dir,
+ const char *root_image,
+ const BindMount *bind_mounts,
+ size_t n_bind_mounts) {
+
+ assert(context);
+ assert(n_bind_mounts == 0 || bind_mounts);
+
+ /* Checks whether we need to insist on fs namespacing. i.e. whether we have settings configured that
+ * would alter the view on the file system beyond making things read-only or invisible, i.e. would
+ * rearrange stuff in a way we cannot ignore gracefully. */
+
+ if (context->n_temporary_filesystems > 0)
+ return true;
+
+ if (root_dir || root_image)
+ return true;
+
+ if (context->n_mount_images > 0)
+ return true;
+
+ if (context->dynamic_user)
+ return true;
+
+ /* If there are any bind mounts set that don't map back onto themselves, fs namespacing becomes
+ * essential. */
+ for (size_t i = 0; i < n_bind_mounts; i++)
+ if (!path_equal(bind_mounts[i].source, bind_mounts[i].destination))
+ return true;
+
+ if (context->log_namespace)
+ return true;
+
+ return false;
+}
+
+static int apply_mount_namespace(
+ const Unit *u,
+ ExecCommandFlags command_flags,
+ const ExecContext *context,
+ const ExecParameters *params,
+ const ExecRuntime *runtime,
+ char **error_path) {
+
+ _cleanup_strv_free_ char **empty_directories = NULL;
+ const char *tmp_dir = NULL, *var_tmp_dir = NULL;
+ const char *root_dir = NULL, *root_image = NULL;
+ _cleanup_free_ char *creds_path = NULL;
+ NamespaceInfo ns_info;
+ bool needs_sandboxing;
+ BindMount *bind_mounts = NULL;
+ size_t n_bind_mounts = 0;
+ int r;
+
+ assert(context);
+
+ if (params->flags & EXEC_APPLY_CHROOT) {
+ root_image = context->root_image;
+
+ if (!root_image)
+ root_dir = context->root_directory;
+ }
+
+ r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
+ if (r < 0)
+ return r;
+
+ needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command_flags & EXEC_COMMAND_FULLY_PRIVILEGED);
+ if (needs_sandboxing) {
+ /* The runtime struct only contains the parent of the private /tmp,
+ * which is non-accessible to world users. Inside of it there's a /tmp
+ * that is sticky, and that's the one we want to use here.
+ * This does not apply when we are using /run/systemd/empty as fallback. */
+
+ if (context->private_tmp && runtime) {
+ if (streq_ptr(runtime->tmp_dir, RUN_SYSTEMD_EMPTY))
+ tmp_dir = runtime->tmp_dir;
+ else if (runtime->tmp_dir)
+ tmp_dir = strjoina(runtime->tmp_dir, "/tmp");
+
+ if (streq_ptr(runtime->var_tmp_dir, RUN_SYSTEMD_EMPTY))
+ var_tmp_dir = runtime->var_tmp_dir;
+ else if (runtime->var_tmp_dir)
+ var_tmp_dir = strjoina(runtime->var_tmp_dir, "/tmp");
+ }
+
+ ns_info = (NamespaceInfo) {
+ .ignore_protect_paths = false,
+ .private_dev = context->private_devices,
+ .protect_control_groups = context->protect_control_groups,
+ .protect_kernel_tunables = context->protect_kernel_tunables,
+ .protect_kernel_modules = context->protect_kernel_modules,
+ .protect_kernel_logs = context->protect_kernel_logs,
+ .protect_hostname = context->protect_hostname,
+ .mount_apivfs = exec_context_get_effective_mount_apivfs(context),
+ .private_mounts = context->private_mounts,
+ .protect_home = context->protect_home,
+ .protect_system = context->protect_system,
+ .protect_proc = context->protect_proc,
+ .proc_subset = context->proc_subset,
+ };
+ } else if (!context->dynamic_user && root_dir)
+ /*
+ * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
+ * sandbox info, otherwise enforce it, don't ignore protected paths and
+ * fail if we are enable to apply the sandbox inside the mount namespace.
+ */
+ ns_info = (NamespaceInfo) {
+ .ignore_protect_paths = true,
+ };
+ else
+ ns_info = (NamespaceInfo) {};
+
+ if (context->mount_flags == MS_SHARED)
+ log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
+
+ if (exec_context_has_credentials(context) && params->prefix[EXEC_DIRECTORY_RUNTIME]) {
+ creds_path = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "credentials", u->id);
+ if (!creds_path) {
+ r = -ENOMEM;
+ goto finalize;
+ }
+ }
+
+ r = setup_namespace(root_dir, root_image, context->root_image_options,
+ &ns_info, context->read_write_paths,
+ needs_sandboxing ? context->read_only_paths : NULL,
+ needs_sandboxing ? context->inaccessible_paths : NULL,
+ empty_directories,
+ bind_mounts,
+ n_bind_mounts,
+ context->temporary_filesystems,
+ context->n_temporary_filesystems,
+ context->mount_images,
+ context->n_mount_images,
+ tmp_dir,
+ var_tmp_dir,
+ creds_path,
+ context->log_namespace,
+ context->mount_flags,
+ context->root_hash, context->root_hash_size, context->root_hash_path,
+ context->root_hash_sig, context->root_hash_sig_size, context->root_hash_sig_path,
+ context->root_verity,
+ DISSECT_IMAGE_DISCARD_ON_LOOP|DISSECT_IMAGE_RELAX_VAR_CHECK|DISSECT_IMAGE_FSCK,
+ error_path);
+
+ /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
+ * that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively
+ * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
+ * completely different execution environment. */
+ if (r == -ENOANO) {
+ if (insist_on_sandboxing(
+ context,
+ root_dir, root_image,
+ bind_mounts,
+ n_bind_mounts)) {
+ log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
+ "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
+ n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
+
+ r = -EOPNOTSUPP;
+ } else {
+ log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
+ r = 0;
+ }
+ }
+
+finalize:
+ bind_mount_free_many(bind_mounts, n_bind_mounts);
+ return r;
+}
+
+static int apply_working_directory(
+ const ExecContext *context,
+ const ExecParameters *params,
+ const char *home,
+ int *exit_status) {
+
+ const char *d, *wd;
+
+ assert(context);
+ assert(exit_status);
+
+ if (context->working_directory_home) {
+
+ if (!home) {
+ *exit_status = EXIT_CHDIR;
+ return -ENXIO;
+ }
+
+ wd = home;
+
+ } else
+ wd = empty_to_root(context->working_directory);
+
+ if (params->flags & EXEC_APPLY_CHROOT)
+ d = wd;
+ else
+ d = prefix_roota(context->root_directory, wd);
+
+ if (chdir(d) < 0 && !context->working_directory_missing_ok) {
+ *exit_status = EXIT_CHDIR;
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int apply_root_directory(
+ const ExecContext *context,
+ const ExecParameters *params,
+ const bool needs_mount_ns,
+ int *exit_status) {
+
+ assert(context);
+ assert(exit_status);
+
+ if (params->flags & EXEC_APPLY_CHROOT)
+ if (!needs_mount_ns && context->root_directory)
+ if (chroot(context->root_directory) < 0) {
+ *exit_status = EXIT_CHROOT;
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int setup_keyring(
+ const Unit *u,
+ const ExecContext *context,
+ const ExecParameters *p,
+ uid_t uid, gid_t gid) {
+
+ key_serial_t keyring;
+ int r = 0;
+ uid_t saved_uid;
+ gid_t saved_gid;
+
+ assert(u);
+ assert(context);
+ assert(p);
+
+ /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
+ * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
+ * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
+ * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
+ * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
+ * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
+
+ if (context->keyring_mode == EXEC_KEYRING_INHERIT)
+ return 0;
+
+ /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
+ * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
+ * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
+ * & group is just as nasty as acquiring a reference to the user keyring. */
+
+ saved_uid = getuid();
+ saved_gid = getgid();
+
+ if (gid_is_valid(gid) && gid != saved_gid) {
+ if (setregid(gid, -1) < 0)
+ return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
+ }
+
+ if (uid_is_valid(uid) && uid != saved_uid) {
+ if (setreuid(uid, -1) < 0) {
+ r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
+ goto out;
+ }
+ }
+
+ keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
+ if (keyring == -1) {
+ if (errno == ENOSYS)
+ log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
+ else if (ERRNO_IS_PRIVILEGE(errno))
+ log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
+ else if (errno == EDQUOT)
+ log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
+ else
+ r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
+
+ goto out;
+ }
+
+ /* When requested link the user keyring into the session keyring. */
+ if (context->keyring_mode == EXEC_KEYRING_SHARED) {
+
+ if (keyctl(KEYCTL_LINK,
+ KEY_SPEC_USER_KEYRING,
+ KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
+ r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
+ goto out;
+ }
+ }
+
+ /* Restore uid/gid back */
+ if (uid_is_valid(uid) && uid != saved_uid) {
+ if (setreuid(saved_uid, -1) < 0) {
+ r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
+ goto out;
+ }
+ }
+
+ if (gid_is_valid(gid) && gid != saved_gid) {
+ if (setregid(saved_gid, -1) < 0)
+ return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
+ }
+
+ /* Populate they keyring with the invocation ID by default, as original saved_uid. */
+ if (!sd_id128_is_null(u->invocation_id)) {
+ key_serial_t key;
+
+ key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
+ if (key == -1)
+ log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
+ else {
+ if (keyctl(KEYCTL_SETPERM, key,
+ KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
+ KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
+ r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
+ }
+ }
+
+out:
+ /* Revert back uid & gid for the last time, and exit */
+ /* no extra logging, as only the first already reported error matters */
+ if (getuid() != saved_uid)
+ (void) setreuid(saved_uid, -1);
+
+ if (getgid() != saved_gid)
+ (void) setregid(saved_gid, -1);
+
+ return r;
+}
+
+static void append_socket_pair(int *array, size_t *n, const int pair[static 2]) {
+ assert(array);
+ assert(n);
+ assert(pair);
+
+ if (pair[0] >= 0)
+ array[(*n)++] = pair[0];
+ if (pair[1] >= 0)
+ array[(*n)++] = pair[1];
+}
+
+static int close_remaining_fds(
+ const ExecParameters *params,
+ const ExecRuntime *runtime,
+ const DynamicCreds *dcreds,
+ int user_lookup_fd,
+ int socket_fd,
+ int exec_fd,
+ const int *fds, size_t n_fds) {
+
+ size_t n_dont_close = 0;
+ int dont_close[n_fds + 12];
+
+ assert(params);
+
+ if (params->stdin_fd >= 0)
+ dont_close[n_dont_close++] = params->stdin_fd;
+ if (params->stdout_fd >= 0)
+ dont_close[n_dont_close++] = params->stdout_fd;
+ if (params->stderr_fd >= 0)
+ dont_close[n_dont_close++] = params->stderr_fd;
+
+ if (socket_fd >= 0)
+ dont_close[n_dont_close++] = socket_fd;
+ if (exec_fd >= 0)
+ dont_close[n_dont_close++] = exec_fd;
+ if (n_fds > 0) {
+ memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
+ n_dont_close += n_fds;
+ }
+
+ if (runtime)
+ append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
+
+ if (dcreds) {
+ if (dcreds->user)
+ append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
+ if (dcreds->group)
+ append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
+ }
+
+ if (user_lookup_fd >= 0)
+ dont_close[n_dont_close++] = user_lookup_fd;
+
+ return close_all_fds(dont_close, n_dont_close);
+}
+
+static int send_user_lookup(
+ Unit *unit,
+ int user_lookup_fd,
+ uid_t uid,
+ gid_t gid) {
+
+ assert(unit);
+
+ /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
+ * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
+ * specified. */
+
+ if (user_lookup_fd < 0)
+ return 0;
+
+ if (!uid_is_valid(uid) && !gid_is_valid(gid))
+ return 0;
+
+ if (writev(user_lookup_fd,
+ (struct iovec[]) {
+ IOVEC_INIT(&uid, sizeof(uid)),
+ IOVEC_INIT(&gid, sizeof(gid)),
+ IOVEC_INIT_STRING(unit->id) }, 3) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
+ int r;
+
+ assert(c);
+ assert(home);
+ assert(buf);
+
+ /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
+
+ if (*home)
+ return 0;
+
+ if (!c->working_directory_home)
+ return 0;
+
+ r = get_home_dir(buf);
+ if (r < 0)
+ return r;
+
+ *home = *buf;
+ return 1;
+}
+
+static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
+ _cleanup_strv_free_ char ** list = NULL;
+ int r;
+
+ assert(c);
+ assert(p);
+ assert(ret);
+
+ assert(c->dynamic_user);
+
+ /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
+ * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
+ * directories. */
+
+ for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
+ char **i;
+
+ if (t == EXEC_DIRECTORY_CONFIGURATION)
+ continue;
+
+ if (!p->prefix[t])
+ continue;
+
+ STRV_FOREACH(i, c->directories[t].paths) {
+ char *e;
+
+ if (exec_directory_is_private(c, t))
+ e = path_join(p->prefix[t], "private", *i);
+ else
+ e = path_join(p->prefix[t], *i);
+ if (!e)
+ return -ENOMEM;
+
+ r = strv_consume(&list, e);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ *ret = TAKE_PTR(list);
+
+ return 0;
+}
+
+static char *exec_command_line(char **argv);
+
+static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
+ bool using_subcgroup;
+ char *p;
+
+ assert(params);
+ assert(ret);
+
+ if (!params->cgroup_path)
+ return -EINVAL;
+
+ /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
+ * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
+ * processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
+ * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
+ * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
+ * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
+ * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
+ * flag, which is only passed for the former statements, not for the latter. */
+
+ using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
+ if (using_subcgroup)
+ p = path_join(params->cgroup_path, ".control");
+ else
+ p = strdup(params->cgroup_path);
+ if (!p)
+ return -ENOMEM;
+
+ *ret = p;
+ return using_subcgroup;
+}
+
+static int exec_context_cpu_affinity_from_numa(const ExecContext *c, CPUSet *ret) {
+ _cleanup_(cpu_set_reset) CPUSet s = {};
+ int r;
+
+ assert(c);
+ assert(ret);
+
+ if (!c->numa_policy.nodes.set) {
+ log_debug("Can't derive CPU affinity mask from NUMA mask because NUMA mask is not set, ignoring");
+ return 0;
+ }
+
+ r = numa_to_cpu_set(&c->numa_policy, &s);
+ if (r < 0)
+ return r;
+
+ cpu_set_reset(ret);
+
+ return cpu_set_add_all(ret, &s);
+}
+
+bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c) {
+ assert(c);
+
+ return c->cpu_affinity_from_numa;
+}
+
+static int exec_child(
+ Unit *unit,
+ const ExecCommand *command,
+ const ExecContext *context,
+ const ExecParameters *params,
+ ExecRuntime *runtime,
+ DynamicCreds *dcreds,
+ int socket_fd,
+ const int named_iofds[static 3],
+ int *fds,
+ size_t n_socket_fds,
+ size_t n_storage_fds,
+ char **files_env,
+ int user_lookup_fd,
+ int *exit_status) {
+
+ _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **replaced_argv = NULL;
+ int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
+ _cleanup_free_ gid_t *supplementary_gids = NULL;
+ const char *username = NULL, *groupname = NULL;
+ _cleanup_free_ char *home_buffer = NULL;
+ const char *home = NULL, *shell = NULL;
+ char **final_argv = NULL;
+ dev_t journal_stream_dev = 0;
+ ino_t journal_stream_ino = 0;
+ bool userns_set_up = false;
+ bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
+ needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
+ needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
+ needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
+#if HAVE_SELINUX
+ _cleanup_free_ char *mac_selinux_context_net = NULL;
+ bool use_selinux = false;
+#endif
+#if ENABLE_SMACK
+ bool use_smack = false;
+#endif
+#if HAVE_APPARMOR
+ bool use_apparmor = false;
+#endif
+ uid_t saved_uid = getuid();
+ gid_t saved_gid = getgid();
+ uid_t uid = UID_INVALID;
+ gid_t gid = GID_INVALID;
+ size_t n_fds;
+ int secure_bits;
+ _cleanup_free_ gid_t *gids_after_pam = NULL;
+ int ngids_after_pam = 0;
+
+ assert(unit);
+ assert(command);
+ assert(context);
+ assert(params);
+ assert(exit_status);
+
+ rename_process_from_path(command->path);
+
+ /* We reset exactly these signals, since they are the
+ * only ones we set to SIG_IGN in the main daemon. All
+ * others we leave untouched because we set them to
+ * SIG_DFL or a valid handler initially, both of which
+ * will be demoted to SIG_DFL. */
+ (void) default_signals(SIGNALS_CRASH_HANDLER,
+ SIGNALS_IGNORE, -1);
+
+ if (context->ignore_sigpipe)
+ (void) ignore_signals(SIGPIPE, -1);
+
+ r = reset_signal_mask();
+ if (r < 0) {
+ *exit_status = EXIT_SIGNAL_MASK;
+ return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
+ }
+
+ if (params->idle_pipe)
+ do_idle_pipe_dance(params->idle_pipe);
+
+ /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
+ * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
+ * any fds open we don't really want open during the transition. In order to make logging work, we switch the
+ * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
+
+ log_forget_fds();
+ log_set_open_when_needed(true);
+
+ /* In case anything used libc syslog(), close this here, too */
+ closelog();
+
+ n_fds = n_socket_fds + n_storage_fds;
+ r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
+ if (r < 0) {
+ *exit_status = EXIT_FDS;
+ return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
+ }
+
+ if (!context->same_pgrp &&
+ setsid() < 0) {
+ *exit_status = EXIT_SETSID;
+ return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
+ }
+
+ exec_context_tty_reset(context, params);
+
+ if (unit_shall_confirm_spawn(unit)) {
+ const char *vc = params->confirm_spawn;
+ _cleanup_free_ char *cmdline = NULL;
+
+ cmdline = exec_command_line(command->argv);
+ if (!cmdline) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+
+ r = ask_for_confirmation(vc, unit, cmdline);
+ if (r != CONFIRM_EXECUTE) {
+ if (r == CONFIRM_PRETEND_SUCCESS) {
+ *exit_status = EXIT_SUCCESS;
+ return 0;
+ }
+ *exit_status = EXIT_CONFIRM;
+ return log_unit_error_errno(unit, SYNTHETIC_ERRNO(ECANCELED),
+ "Execution cancelled by the user");
+ }
+ }
+
+ /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
+ * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
+ * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
+ * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
+ * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
+ if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
+ setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
+ *exit_status = EXIT_MEMORY;
+ return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
+ }
+
+ if (context->dynamic_user && dcreds) {
+ _cleanup_strv_free_ char **suggested_paths = NULL;
+
+ /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
+ * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
+ if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
+ *exit_status = EXIT_USER;
+ return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
+ }
+
+ r = compile_suggested_paths(context, params, &suggested_paths);
+ if (r < 0) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+
+ r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
+ if (r < 0) {
+ *exit_status = EXIT_USER;
+ if (r == -EILSEQ) {
+ log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
+ return -EOPNOTSUPP;
+ }
+ return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
+ }
+
+ if (!uid_is_valid(uid)) {
+ *exit_status = EXIT_USER;
+ log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
+ return -ESRCH;
+ }
+
+ if (!gid_is_valid(gid)) {
+ *exit_status = EXIT_USER;
+ log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
+ return -ESRCH;
+ }
+
+ if (dcreds->user)
+ username = dcreds->user->name;
+
+ } else {
+ r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
+ if (r < 0) {
+ *exit_status = EXIT_USER;
+ return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
+ }
+
+ r = get_fixed_group(context, &groupname, &gid);
+ if (r < 0) {
+ *exit_status = EXIT_GROUP;
+ return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
+ }
+ }
+
+ /* Initialize user supplementary groups and get SupplementaryGroups= ones */
+ r = get_supplementary_groups(context, username, groupname, gid,
+ &supplementary_gids, &ngids);
+ if (r < 0) {
+ *exit_status = EXIT_GROUP;
+ return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
+ }
+
+ r = send_user_lookup(unit, user_lookup_fd, uid, gid);
+ if (r < 0) {
+ *exit_status = EXIT_USER;
+ return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
+ }
+
+ user_lookup_fd = safe_close(user_lookup_fd);
+
+ r = acquire_home(context, uid, &home, &home_buffer);
+ if (r < 0) {
+ *exit_status = EXIT_CHDIR;
+ return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
+ }
+
+ /* If a socket is connected to STDIN/STDOUT/STDERR, we
+ * must sure to drop O_NONBLOCK */
+ if (socket_fd >= 0)
+ (void) fd_nonblock(socket_fd, false);
+
+ /* Journald will try to look-up our cgroup in order to populate _SYSTEMD_CGROUP and _SYSTEMD_UNIT fields.
+ * Hence we need to migrate to the target cgroup from init.scope before connecting to journald */
+ if (params->cgroup_path) {
+ _cleanup_free_ char *p = NULL;
+
+ r = exec_parameters_get_cgroup_path(params, &p);
+ if (r < 0) {
+ *exit_status = EXIT_CGROUP;
+ return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
+ }
+
+ r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
+ if (r < 0) {
+ *exit_status = EXIT_CGROUP;
+ return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
+ }
+ }
+
+ if (context->network_namespace_path && runtime && runtime->netns_storage_socket[0] >= 0) {
+ r = open_netns_path(runtime->netns_storage_socket, context->network_namespace_path);
+ if (r < 0) {
+ *exit_status = EXIT_NETWORK;
+ return log_unit_error_errno(unit, r, "Failed to open network namespace path %s: %m", context->network_namespace_path);
+ }
+ }
+
+ r = setup_input(context, params, socket_fd, named_iofds);
+ if (r < 0) {
+ *exit_status = EXIT_STDIN;
+ return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
+ }
+
+ r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
+ if (r < 0) {
+ *exit_status = EXIT_STDOUT;
+ return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
+ }
+
+ r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
+ if (r < 0) {
+ *exit_status = EXIT_STDERR;
+ return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
+ }
+
+ if (context->oom_score_adjust_set) {
+ /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
+ * prohibit write access to this file, and we shouldn't trip up over that. */
+ r = set_oom_score_adjust(context->oom_score_adjust);
+ if (ERRNO_IS_PRIVILEGE(r))
+ log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
+ else if (r < 0) {
+ *exit_status = EXIT_OOM_ADJUST;
+ return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
+ }
+ }
+
+ if (context->coredump_filter_set) {
+ r = set_coredump_filter(context->coredump_filter);
+ if (ERRNO_IS_PRIVILEGE(r))
+ log_unit_debug_errno(unit, r, "Failed to adjust coredump_filter, ignoring: %m");
+ else if (r < 0)
+ return log_unit_error_errno(unit, r, "Failed to adjust coredump_filter: %m");
+ }
+
+ if (context->nice_set) {
+ r = setpriority_closest(context->nice);
+ if (r < 0)
+ return log_unit_error_errno(unit, r, "Failed to set up process scheduling priority (nice level): %m");
+ }
+
+ if (context->cpu_sched_set) {
+ struct sched_param param = {
+ .sched_priority = context->cpu_sched_priority,
+ };
+
+ r = sched_setscheduler(0,
+ context->cpu_sched_policy |
+ (context->cpu_sched_reset_on_fork ?
+ SCHED_RESET_ON_FORK : 0),
+ &param);
+ if (r < 0) {
+ *exit_status = EXIT_SETSCHEDULER;
+ return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
+ }
+ }
+
+ if (context->cpu_affinity_from_numa || context->cpu_set.set) {
+ _cleanup_(cpu_set_reset) CPUSet converted_cpu_set = {};
+ const CPUSet *cpu_set;
+
+ if (context->cpu_affinity_from_numa) {
+ r = exec_context_cpu_affinity_from_numa(context, &converted_cpu_set);
+ if (r < 0) {
+ *exit_status = EXIT_CPUAFFINITY;
+ return log_unit_error_errno(unit, r, "Failed to derive CPU affinity mask from NUMA mask: %m");
+ }
+
+ cpu_set = &converted_cpu_set;
+ } else
+ cpu_set = &context->cpu_set;
+
+ if (sched_setaffinity(0, cpu_set->allocated, cpu_set->set) < 0) {
+ *exit_status = EXIT_CPUAFFINITY;
+ return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
+ }
+ }
+
+ if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
+ r = apply_numa_policy(&context->numa_policy);
+ if (r == -EOPNOTSUPP)
+ log_unit_debug_errno(unit, r, "NUMA support not available, ignoring.");
+ else if (r < 0) {
+ *exit_status = EXIT_NUMA_POLICY;
+ return log_unit_error_errno(unit, r, "Failed to set NUMA memory policy: %m");
+ }
+ }
+
+ if (context->ioprio_set)
+ if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
+ *exit_status = EXIT_IOPRIO;
+ return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
+ }
+
+ if (context->timer_slack_nsec != NSEC_INFINITY)
+ if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
+ *exit_status = EXIT_TIMERSLACK;
+ return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
+ }
+
+ if (context->personality != PERSONALITY_INVALID) {
+ r = safe_personality(context->personality);
+ if (r < 0) {
+ *exit_status = EXIT_PERSONALITY;
+ return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
+ }
+ }
+
+ if (context->utmp_id)
+ utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
+ context->tty_path,
+ context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
+ context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
+ USER_PROCESS,
+ username);
+
+ if (uid_is_valid(uid)) {
+ r = chown_terminal(STDIN_FILENO, uid);
+ if (r < 0) {
+ *exit_status = EXIT_STDIN;
+ return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
+ }
+ }
+
+ /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
+ * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
+ * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
+ * touch a single hierarchy too. */
+ if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
+ r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
+ if (r < 0) {
+ *exit_status = EXIT_CGROUP;
+ return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
+ }
+ }
+
+ for (ExecDirectoryType dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
+ r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
+ if (r < 0)
+ return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
+ }
+
+ if (FLAGS_SET(params->flags, EXEC_WRITE_CREDENTIALS)) {
+ r = setup_credentials(context, params, unit->id, uid);
+ if (r < 0) {
+ *exit_status = EXIT_CREDENTIALS;
+ return log_unit_error_errno(unit, r, "Failed to set up credentials: %m");
+ }
+ }
+
+ r = build_environment(
+ unit,
+ context,
+ params,
+ n_fds,
+ home,
+ username,
+ shell,
+ journal_stream_dev,
+ journal_stream_ino,
+ &our_env);
+ if (r < 0) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+
+ r = build_pass_environment(context, &pass_env);
+ if (r < 0) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+
+ accum_env = strv_env_merge(5,
+ params->environment,
+ our_env,
+ pass_env,
+ context->environment,
+ files_env);
+ if (!accum_env) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+ accum_env = strv_env_clean(accum_env);
+
+ (void) umask(context->umask);
+
+ r = setup_keyring(unit, context, params, uid, gid);
+ if (r < 0) {
+ *exit_status = EXIT_KEYRING;
+ return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
+ }
+
+ /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
+ needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
+
+ /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
+ needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
+
+ /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
+ if (needs_ambient_hack)
+ needs_setuid = false;
+ else
+ needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
+
+ if (needs_sandboxing) {
+ /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
+ * present. The actual MAC context application will happen later, as late as possible, to avoid
+ * impacting our own code paths. */
+
+#if HAVE_SELINUX
+ use_selinux = mac_selinux_use();
+#endif
+#if ENABLE_SMACK
+ use_smack = mac_smack_use();
+#endif
+#if HAVE_APPARMOR
+ use_apparmor = mac_apparmor_use();
+#endif
+ }
+
+ if (needs_sandboxing) {
+ int which_failed;
+
+ /* Let's set the resource limits before we call into PAM, so that pam_limits wins over what
+ * is set here. (See below.) */
+
+ r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
+ if (r < 0) {
+ *exit_status = EXIT_LIMITS;
+ return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
+ }
+ }
+
+ if (needs_setuid && context->pam_name && username) {
+ /* Let's call into PAM after we set up our own idea of resource limits to that pam_limits
+ * wins here. (See above.) */
+
+ r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
+ if (r < 0) {
+ *exit_status = EXIT_PAM;
+ return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
+ }
+
+ ngids_after_pam = getgroups_alloc(&gids_after_pam);
+ if (ngids_after_pam < 0) {
+ *exit_status = EXIT_MEMORY;
+ return log_unit_error_errno(unit, ngids_after_pam, "Failed to obtain groups after setting up PAM: %m");
+ }
+ }
+
+ if (needs_sandboxing && context->private_users && !have_effective_cap(CAP_SYS_ADMIN)) {
+ /* If we're unprivileged, set up the user namespace first to enable use of the other namespaces.
+ * Users with CAP_SYS_ADMIN can set up user namespaces last because they will be able to
+ * set up the all of the other namespaces (i.e. network, mount, UTS) without a user namespace. */
+
+ userns_set_up = true;
+ r = setup_private_users(saved_uid, saved_gid, uid, gid);
+ if (r < 0) {
+ *exit_status = EXIT_USER;
+ return log_unit_error_errno(unit, r, "Failed to set up user namespacing for unprivileged user: %m");
+ }
+ }
+
+ if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
+
+ if (ns_type_supported(NAMESPACE_NET)) {
+ r = setup_netns(runtime->netns_storage_socket);
+ if (r == -EPERM)
+ log_unit_warning_errno(unit, r,
+ "PrivateNetwork=yes is configured, but network namespace setup failed, ignoring: %m");
+ else if (r < 0) {
+ *exit_status = EXIT_NETWORK;
+ return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
+ }
+ } else if (context->network_namespace_path) {
+ *exit_status = EXIT_NETWORK;
+ return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "NetworkNamespacePath= is not supported, refusing.");
+ } else
+ log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
+ }
+
+ needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
+ if (needs_mount_namespace) {
+ _cleanup_free_ char *error_path = NULL;
+
+ r = apply_mount_namespace(unit, command->flags, context, params, runtime, &error_path);
+ if (r < 0) {
+ *exit_status = EXIT_NAMESPACE;
+ return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m",
+ error_path ? ": " : "", strempty(error_path));
+ }
+ }
+
+ if (needs_sandboxing) {
+ r = apply_protect_hostname(unit, context, exit_status);
+ if (r < 0)
+ return r;
+ }
+
+ /* Drop groups as early as possible.
+ * This needs to be done after PrivateDevices=y setup as device nodes should be owned by the host's root.
+ * For non-root in a userns, devices will be owned by the user/group before the group change, and nobody. */
+ if (needs_setuid) {
+ _cleanup_free_ gid_t *gids_to_enforce = NULL;
+ int ngids_to_enforce = 0;
+
+ ngids_to_enforce = merge_gid_lists(supplementary_gids,
+ ngids,
+ gids_after_pam,
+ ngids_after_pam,
+ &gids_to_enforce);
+ if (ngids_to_enforce < 0) {
+ *exit_status = EXIT_MEMORY;
+ return log_unit_error_errno(unit,
+ ngids_to_enforce,
+ "Failed to merge group lists. Group membership might be incorrect: %m");
+ }
+
+ r = enforce_groups(gid, gids_to_enforce, ngids_to_enforce);
+ if (r < 0) {
+ *exit_status = EXIT_GROUP;
+ return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
+ }
+ }
+
+ /* If the user namespace was not set up above, try to do it now.
+ * It's preferred to set up the user namespace later (after all other namespaces) so as not to be
+ * restricted by rules pertaining to combining user namspaces with other namespaces (e.g. in the
+ * case of mount namespaces being less privileged when the mount point list is copied from a
+ * different user namespace). */
+
+ if (needs_sandboxing && context->private_users && !userns_set_up) {
+ r = setup_private_users(saved_uid, saved_gid, uid, gid);
+ if (r < 0) {
+ *exit_status = EXIT_USER;
+ return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
+ }
+ }
+
+ /* Now that the mount namespace has been set up and privileges adjusted, let's look for the thing we
+ * shall execute. */
+
+ _cleanup_free_ char *executable = NULL;
+ r = find_executable_full(command->path, false, &executable);
+ if (r < 0) {
+ if (r != -ENOMEM && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
+ log_struct_errno(LOG_INFO, r,
+ "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
+ LOG_UNIT_ID(unit),
+ LOG_UNIT_INVOCATION_ID(unit),
+ LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
+ command->path),
+ "EXECUTABLE=%s", command->path);
+ return 0;
+ }
+
+ *exit_status = EXIT_EXEC;
+ return log_struct_errno(LOG_INFO, r,
+ "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
+ LOG_UNIT_ID(unit),
+ LOG_UNIT_INVOCATION_ID(unit),
+ LOG_UNIT_MESSAGE(unit, "Failed to locate executable %s: %m",
+ command->path),
+ "EXECUTABLE=%s", command->path);
+ }
+
+#if HAVE_SELINUX
+ if (needs_sandboxing && use_selinux && params->selinux_context_net && socket_fd >= 0) {
+ r = mac_selinux_get_child_mls_label(socket_fd, executable, context->selinux_context, &mac_selinux_context_net);
+ if (r < 0) {
+ *exit_status = EXIT_SELINUX_CONTEXT;
+ return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
+ }
+ }
+#endif
+
+ /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
+ * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
+ * however if we have it as we want to keep it open until the final execve(). */
+
+ if (params->exec_fd >= 0) {
+ exec_fd = params->exec_fd;
+
+ if (exec_fd < 3 + (int) n_fds) {
+ int moved_fd;
+
+ /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
+ * process we are about to execute. */
+
+ moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
+ if (moved_fd < 0) {
+ *exit_status = EXIT_FDS;
+ return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
+ }
+
+ CLOSE_AND_REPLACE(exec_fd, moved_fd);
+ } else {
+ /* This fd should be FD_CLOEXEC already, but let's make sure. */
+ r = fd_cloexec(exec_fd, true);
+ if (r < 0) {
+ *exit_status = EXIT_FDS;
+ return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
+ }
+ }
+
+ fds_with_exec_fd = newa(int, n_fds + 1);
+ memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
+ fds_with_exec_fd[n_fds] = exec_fd;
+ n_fds_with_exec_fd = n_fds + 1;
+ } else {
+ fds_with_exec_fd = fds;
+ n_fds_with_exec_fd = n_fds;
+ }
+
+ r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
+ if (r >= 0)
+ r = shift_fds(fds, n_fds);
+ if (r >= 0)
+ r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
+ if (r < 0) {
+ *exit_status = EXIT_FDS;
+ return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
+ }
+
+ /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
+ * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
+ * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
+ * came this far. */
+
+ secure_bits = context->secure_bits;
+
+ if (needs_sandboxing) {
+ uint64_t bset;
+
+ /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly
+ * requested. (Note this is placed after the general resource limit initialization, see
+ * above, in order to take precedence.) */
+ if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
+ if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
+ *exit_status = EXIT_LIMITS;
+ return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
+ }
+ }
+
+#if ENABLE_SMACK
+ /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
+ * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
+ if (use_smack) {
+ r = setup_smack(context, executable);
+ if (r < 0) {
+ *exit_status = EXIT_SMACK_PROCESS_LABEL;
+ return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
+ }
+ }
+#endif
+
+ bset = context->capability_bounding_set;
+ /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
+ * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
+ * instead of us doing that */
+ if (needs_ambient_hack)
+ bset |= (UINT64_C(1) << CAP_SETPCAP) |
+ (UINT64_C(1) << CAP_SETUID) |
+ (UINT64_C(1) << CAP_SETGID);
+
+ if (!cap_test_all(bset)) {
+ r = capability_bounding_set_drop(bset, false);
+ if (r < 0) {
+ *exit_status = EXIT_CAPABILITIES;
+ return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
+ }
+ }
+
+ /* Ambient capabilities are cleared during setresuid() (in enforce_user()) even with
+ * keep-caps set.
+ * To be able to raise the ambient capabilities after setresuid() they have to be
+ * added to the inherited set and keep caps has to be set (done in enforce_user()).
+ * After setresuid() the ambient capabilities can be raised as they are present in
+ * the permitted and inhertiable set. However it is possible that someone wants to
+ * set ambient capabilities without changing the user, so we also set the ambient
+ * capabilities here.
+ * The requested ambient capabilities are raised in the inheritable set if the
+ * second argument is true. */
+ if (!needs_ambient_hack) {
+ r = capability_ambient_set_apply(context->capability_ambient_set, true);
+ if (r < 0) {
+ *exit_status = EXIT_CAPABILITIES;
+ return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
+ }
+ }
+ }
+
+ /* chroot to root directory first, before we lose the ability to chroot */
+ r = apply_root_directory(context, params, needs_mount_namespace, exit_status);
+ if (r < 0)
+ return log_unit_error_errno(unit, r, "Chrooting to the requested root directory failed: %m");
+
+ if (needs_setuid) {
+ if (uid_is_valid(uid)) {
+ r = enforce_user(context, uid);
+ if (r < 0) {
+ *exit_status = EXIT_USER;
+ return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
+ }
+
+ if (!needs_ambient_hack &&
+ context->capability_ambient_set != 0) {
+
+ /* Raise the ambient capabilities after user change. */
+ r = capability_ambient_set_apply(context->capability_ambient_set, false);
+ if (r < 0) {
+ *exit_status = EXIT_CAPABILITIES;
+ return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
+ }
+ }
+ }
+ }
+
+ /* Apply working directory here, because the working directory might be on NFS and only the user running
+ * this service might have the correct privilege to change to the working directory */
+ r = apply_working_directory(context, params, home, exit_status);
+ if (r < 0)
+ return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
+
+ if (needs_sandboxing) {
+ /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
+ * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
+ * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
+ * are restricted. */
+
+#if HAVE_SELINUX
+ if (use_selinux) {
+ char *exec_context = mac_selinux_context_net ?: context->selinux_context;
+
+ if (exec_context) {
+ r = setexeccon(exec_context);
+ if (r < 0) {
+ *exit_status = EXIT_SELINUX_CONTEXT;
+ return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
+ }
+ }
+ }
+#endif
+
+#if HAVE_APPARMOR
+ if (use_apparmor && context->apparmor_profile) {
+ r = aa_change_onexec(context->apparmor_profile);
+ if (r < 0 && !context->apparmor_profile_ignore) {
+ *exit_status = EXIT_APPARMOR_PROFILE;
+ return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
+ }
+ }
+#endif
+
+ /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
+ * we'll try not to call PR_SET_SECUREBITS unless necessary. Setting securebits requires
+ * CAP_SETPCAP. */
+ if (prctl(PR_GET_SECUREBITS) != secure_bits) {
+ /* CAP_SETPCAP is required to set securebits. This capability is raised into the
+ * effective set here.
+ * The effective set is overwritten during execve with the following values:
+ * - ambient set (for non-root processes)
+ * - (inheritable | bounding) set for root processes)
+ *
+ * Hence there is no security impact to raise it in the effective set before execve
+ */
+ r = capability_gain_cap_setpcap(NULL);
+ if (r < 0) {
+ *exit_status = EXIT_CAPABILITIES;
+ return log_unit_error_errno(unit, r, "Failed to gain CAP_SETPCAP for setting secure bits");
+ }
+ if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
+ *exit_status = EXIT_SECUREBITS;
+ return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
+ }
+ }
+
+ if (context_has_no_new_privileges(context))
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
+ *exit_status = EXIT_NO_NEW_PRIVILEGES;
+ return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
+ }
+
+#if HAVE_SECCOMP
+ r = apply_address_families(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_ADDRESS_FAMILIES;
+ return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
+ }
+
+ r = apply_memory_deny_write_execute(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
+ }
+
+ r = apply_restrict_realtime(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
+ }
+
+ r = apply_restrict_suid_sgid(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to apply SUID/SGID restrictions: %m");
+ }
+
+ r = apply_restrict_namespaces(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
+ }
+
+ r = apply_protect_sysctl(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
+ }
+
+ r = apply_protect_kernel_modules(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
+ }
+
+ r = apply_protect_kernel_logs(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to apply kernel log restrictions: %m");
+ }
+
+ r = apply_protect_clock(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to apply clock restrictions: %m");
+ }
+
+ r = apply_private_devices(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
+ }
+
+ r = apply_syscall_archs(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
+ }
+
+ r = apply_lock_personality(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
+ }
+
+ r = apply_syscall_log(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to apply system call log filters: %m");
+ }
+
+ /* This really should remain the last step before the execve(), to make sure our own code is unaffected
+ * by the filter as little as possible. */
+ r = apply_syscall_filter(unit, context, needs_ambient_hack);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
+ }
+#endif
+ }
+
+ if (!strv_isempty(context->unset_environment)) {
+ char **ee = NULL;
+
+ ee = strv_env_delete(accum_env, 1, context->unset_environment);
+ if (!ee) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+
+ strv_free_and_replace(accum_env, ee);
+ }
+
+ if (!FLAGS_SET(command->flags, EXEC_COMMAND_NO_ENV_EXPAND)) {
+ replaced_argv = replace_env_argv(command->argv, accum_env);
+ if (!replaced_argv) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+ final_argv = replaced_argv;
+ } else
+ final_argv = command->argv;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *line;
+
+ line = exec_command_line(final_argv);
+ if (line)
+ log_struct(LOG_DEBUG,
+ "EXECUTABLE=%s", executable,
+ LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
+ LOG_UNIT_ID(unit),
+ LOG_UNIT_INVOCATION_ID(unit));
+ }
+
+ if (exec_fd >= 0) {
+ uint8_t hot = 1;
+
+ /* We have finished with all our initializations. Let's now let the manager know that. From this point
+ * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
+
+ if (write(exec_fd, &hot, sizeof(hot)) < 0) {
+ *exit_status = EXIT_EXEC;
+ return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
+ }
+ }
+
+ execve(executable, final_argv, accum_env);
+ r = -errno;
+
+ if (exec_fd >= 0) {
+ uint8_t hot = 0;
+
+ /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
+ * that POLLHUP on it no longer means execve() succeeded. */
+
+ if (write(exec_fd, &hot, sizeof(hot)) < 0) {
+ *exit_status = EXIT_EXEC;
+ return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
+ }
+ }
+
+ *exit_status = EXIT_EXEC;
+ return log_unit_error_errno(unit, r, "Failed to execute %s: %m", executable);
+}
+
+static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
+static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[static 3]);
+
+int exec_spawn(Unit *unit,
+ ExecCommand *command,
+ const ExecContext *context,
+ const ExecParameters *params,
+ ExecRuntime *runtime,
+ DynamicCreds *dcreds,
+ pid_t *ret) {
+
+ int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
+ _cleanup_free_ char *subcgroup_path = NULL;
+ _cleanup_strv_free_ char **files_env = NULL;
+ size_t n_storage_fds = 0, n_socket_fds = 0;
+ _cleanup_free_ char *line = NULL;
+ pid_t pid;
+
+ assert(unit);
+ assert(command);
+ assert(context);
+ assert(ret);
+ assert(params);
+ assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
+
+ if (context->std_input == EXEC_INPUT_SOCKET ||
+ context->std_output == EXEC_OUTPUT_SOCKET ||
+ context->std_error == EXEC_OUTPUT_SOCKET) {
+
+ if (params->n_socket_fds > 1) {
+ log_unit_error(unit, "Got more than one socket.");
+ return -EINVAL;
+ }
+
+ if (params->n_socket_fds == 0) {
+ log_unit_error(unit, "Got no socket.");
+ return -EINVAL;
+ }
+
+ socket_fd = params->fds[0];
+ } else {
+ socket_fd = -1;
+ fds = params->fds;
+ n_socket_fds = params->n_socket_fds;
+ n_storage_fds = params->n_storage_fds;
+ }
+
+ r = exec_context_named_iofds(context, params, named_iofds);
+ if (r < 0)
+ return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
+
+ r = exec_context_load_environment(unit, context, &files_env);
+ if (r < 0)
+ return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
+
+ line = exec_command_line(command->argv);
+ if (!line)
+ return log_oom();
+
+ /* Fork with up-to-date SELinux label database, so the child inherits the up-to-date db
+ and, until the next SELinux policy changes, we save further reloads in future children. */
+ mac_selinux_maybe_reload();
+
+ log_struct(LOG_DEBUG,
+ LOG_UNIT_MESSAGE(unit, "About to execute %s", line),
+ "EXECUTABLE=%s", command->path, /* We won't know the real executable path until we create
+ the mount namespace in the child, but we want to log
+ from the parent, so we need to use the (possibly
+ inaccurate) path here. */
+ LOG_UNIT_ID(unit),
+ LOG_UNIT_INVOCATION_ID(unit));
+
+ if (params->cgroup_path) {
+ r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
+ if (r < 0)
+ return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
+ if (r > 0) { /* We are using a child cgroup */
+ r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
+ if (r < 0)
+ return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
+ }
+ }
+
+ pid = fork();
+ if (pid < 0)
+ return log_unit_error_errno(unit, errno, "Failed to fork: %m");
+
+ if (pid == 0) {
+ int exit_status = EXIT_SUCCESS;
+
+ r = exec_child(unit,
+ command,
+ context,
+ params,
+ runtime,
+ dcreds,
+ socket_fd,
+ named_iofds,
+ fds,
+ n_socket_fds,
+ n_storage_fds,
+ files_env,
+ unit->manager->user_lookup_fds[1],
+ &exit_status);
+
+ if (r < 0) {
+ const char *status =
+ exit_status_to_string(exit_status,
+ EXIT_STATUS_LIBC | EXIT_STATUS_SYSTEMD);
+
+ log_struct_errno(LOG_ERR, r,
+ "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
+ LOG_UNIT_ID(unit),
+ LOG_UNIT_INVOCATION_ID(unit),
+ LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
+ status, command->path),
+ "EXECUTABLE=%s", command->path);
+ }
+
+ _exit(exit_status);
+ }
+
+ log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
+
+ /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
+ * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
+ * process will be killed too). */
+ if (subcgroup_path)
+ (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
+
+ exec_status_start(&command->exec_status, pid);
+
+ *ret = pid;
+ return 0;
+}
+
+void exec_context_init(ExecContext *c) {
+ assert(c);
+
+ c->umask = 0022;
+ c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
+ c->cpu_sched_policy = SCHED_OTHER;
+ c->syslog_priority = LOG_DAEMON|LOG_INFO;
+ c->syslog_level_prefix = true;
+ c->ignore_sigpipe = true;
+ c->timer_slack_nsec = NSEC_INFINITY;
+ c->personality = PERSONALITY_INVALID;
+ for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++)
+ c->directories[t].mode = 0755;
+ c->timeout_clean_usec = USEC_INFINITY;
+ c->capability_bounding_set = CAP_ALL;
+ assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
+ c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
+ c->log_level_max = -1;
+#if HAVE_SECCOMP
+ c->syscall_errno = SECCOMP_ERROR_NUMBER_KILL;
+#endif
+ numa_policy_reset(&c->numa_policy);
+}
+
+void exec_context_done(ExecContext *c) {
+ assert(c);
+
+ c->environment = strv_free(c->environment);
+ c->environment_files = strv_free(c->environment_files);
+ c->pass_environment = strv_free(c->pass_environment);
+ c->unset_environment = strv_free(c->unset_environment);
+
+ rlimit_free_all(c->rlimit);
+
+ for (size_t l = 0; l < 3; l++) {
+ c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
+ c->stdio_file[l] = mfree(c->stdio_file[l]);
+ }
+
+ c->working_directory = mfree(c->working_directory);
+ c->root_directory = mfree(c->root_directory);
+ c->root_image = mfree(c->root_image);
+ c->root_image_options = mount_options_free_all(c->root_image_options);
+ c->root_hash = mfree(c->root_hash);
+ c->root_hash_size = 0;
+ c->root_hash_path = mfree(c->root_hash_path);
+ c->root_hash_sig = mfree(c->root_hash_sig);
+ c->root_hash_sig_size = 0;
+ c->root_hash_sig_path = mfree(c->root_hash_sig_path);
+ c->root_verity = mfree(c->root_verity);
+ c->tty_path = mfree(c->tty_path);
+ c->syslog_identifier = mfree(c->syslog_identifier);
+ c->user = mfree(c->user);
+ c->group = mfree(c->group);
+
+ c->supplementary_groups = strv_free(c->supplementary_groups);
+
+ c->pam_name = mfree(c->pam_name);
+
+ c->read_only_paths = strv_free(c->read_only_paths);
+ c->read_write_paths = strv_free(c->read_write_paths);
+ c->inaccessible_paths = strv_free(c->inaccessible_paths);
+
+ bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
+ c->bind_mounts = NULL;
+ c->n_bind_mounts = 0;
+ temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
+ c->temporary_filesystems = NULL;
+ c->n_temporary_filesystems = 0;
+ c->mount_images = mount_image_free_many(c->mount_images, &c->n_mount_images);
+
+ cpu_set_reset(&c->cpu_set);
+ numa_policy_reset(&c->numa_policy);
+
+ c->utmp_id = mfree(c->utmp_id);
+ c->selinux_context = mfree(c->selinux_context);
+ c->apparmor_profile = mfree(c->apparmor_profile);
+ c->smack_process_label = mfree(c->smack_process_label);
+
+ c->syscall_filter = hashmap_free(c->syscall_filter);
+ c->syscall_archs = set_free(c->syscall_archs);
+ c->address_families = set_free(c->address_families);
+
+ for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++)
+ c->directories[t].paths = strv_free(c->directories[t].paths);
+
+ c->log_level_max = -1;
+
+ exec_context_free_log_extra_fields(c);
+
+ c->log_ratelimit_interval_usec = 0;
+ c->log_ratelimit_burst = 0;
+
+ c->stdin_data = mfree(c->stdin_data);
+ c->stdin_data_size = 0;
+
+ c->network_namespace_path = mfree(c->network_namespace_path);
+
+ c->log_namespace = mfree(c->log_namespace);
+
+ c->load_credentials = strv_free(c->load_credentials);
+ c->set_credentials = hashmap_free(c->set_credentials);
+}
+
+int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
+ char **i;
+
+ assert(c);
+
+ if (!runtime_prefix)
+ return 0;
+
+ STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
+ _cleanup_free_ char *p;
+
+ if (exec_directory_is_private(c, EXEC_DIRECTORY_RUNTIME))
+ p = path_join(runtime_prefix, "private", *i);
+ else
+ p = path_join(runtime_prefix, *i);
+ if (!p)
+ return -ENOMEM;
+
+ /* We execute this synchronously, since we need to be sure this is gone when we start the
+ * service next. */
+ (void) rm_rf(p, REMOVE_ROOT);
+ }
+
+ return 0;
+}
+
+int exec_context_destroy_credentials(const ExecContext *c, const char *runtime_prefix, const char *unit) {
+ _cleanup_free_ char *p = NULL;
+
+ assert(c);
+
+ if (!runtime_prefix || !unit)
+ return 0;
+
+ p = path_join(runtime_prefix, "credentials", unit);
+ if (!p)
+ return -ENOMEM;
+
+ /* This is either a tmpfs/ramfs of its own, or a plain directory. Either way, let's first try to
+ * unmount it, and afterwards remove the mount point */
+ (void) umount2(p, MNT_DETACH|UMOUNT_NOFOLLOW);
+ (void) rm_rf(p, REMOVE_ROOT|REMOVE_CHMOD);
+
+ return 0;
+}
+
+static void exec_command_done(ExecCommand *c) {
+ assert(c);
+
+ c->path = mfree(c->path);
+ c->argv = strv_free(c->argv);
+}
+
+void exec_command_done_array(ExecCommand *c, size_t n) {
+ size_t i;
+
+ for (i = 0; i < n; i++)
+ exec_command_done(c+i);
+}
+
+ExecCommand* exec_command_free_list(ExecCommand *c) {
+ ExecCommand *i;
+
+ while ((i = c)) {
+ LIST_REMOVE(command, c, i);
+ exec_command_done(i);
+ free(i);
+ }
+
+ return NULL;
+}
+
+void exec_command_free_array(ExecCommand **c, size_t n) {
+ for (size_t i = 0; i < n; i++)
+ c[i] = exec_command_free_list(c[i]);
+}
+
+void exec_command_reset_status_array(ExecCommand *c, size_t n) {
+ for (size_t i = 0; i < n; i++)
+ exec_status_reset(&c[i].exec_status);
+}
+
+void exec_command_reset_status_list_array(ExecCommand **c, size_t n) {
+ for (size_t i = 0; i < n; i++) {
+ ExecCommand *z;
+
+ LIST_FOREACH(command, z, c[i])
+ exec_status_reset(&z->exec_status);
+ }
+}
+
+typedef struct InvalidEnvInfo {
+ const Unit *unit;
+ const char *path;
+} InvalidEnvInfo;
+
+static void invalid_env(const char *p, void *userdata) {
+ InvalidEnvInfo *info = userdata;
+
+ log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
+}
+
+const char* exec_context_fdname(const ExecContext *c, int fd_index) {
+ assert(c);
+
+ switch (fd_index) {
+
+ case STDIN_FILENO:
+ if (c->std_input != EXEC_INPUT_NAMED_FD)
+ return NULL;
+
+ return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
+
+ case STDOUT_FILENO:
+ if (c->std_output != EXEC_OUTPUT_NAMED_FD)
+ return NULL;
+
+ return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
+
+ case STDERR_FILENO:
+ if (c->std_error != EXEC_OUTPUT_NAMED_FD)
+ return NULL;
+
+ return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
+
+ default:
+ return NULL;
+ }
+}
+
+static int exec_context_named_iofds(
+ const ExecContext *c,
+ const ExecParameters *p,
+ int named_iofds[static 3]) {
+
+ size_t targets;
+ const char* stdio_fdname[3];
+ size_t n_fds;
+
+ assert(c);
+ assert(p);
+ assert(named_iofds);
+
+ targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
+ (c->std_output == EXEC_OUTPUT_NAMED_FD) +
+ (c->std_error == EXEC_OUTPUT_NAMED_FD);
+
+ for (size_t i = 0; i < 3; i++)
+ stdio_fdname[i] = exec_context_fdname(c, i);
+
+ n_fds = p->n_storage_fds + p->n_socket_fds;
+
+ for (size_t i = 0; i < n_fds && targets > 0; i++)
+ if (named_iofds[STDIN_FILENO] < 0 &&
+ c->std_input == EXEC_INPUT_NAMED_FD &&
+ stdio_fdname[STDIN_FILENO] &&
+ streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
+
+ named_iofds[STDIN_FILENO] = p->fds[i];
+ targets--;
+
+ } else if (named_iofds[STDOUT_FILENO] < 0 &&
+ c->std_output == EXEC_OUTPUT_NAMED_FD &&
+ stdio_fdname[STDOUT_FILENO] &&
+ streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
+
+ named_iofds[STDOUT_FILENO] = p->fds[i];
+ targets--;
+
+ } else if (named_iofds[STDERR_FILENO] < 0 &&
+ c->std_error == EXEC_OUTPUT_NAMED_FD &&
+ stdio_fdname[STDERR_FILENO] &&
+ streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
+
+ named_iofds[STDERR_FILENO] = p->fds[i];
+ targets--;
+ }
+
+ return targets == 0 ? 0 : -ENOENT;
+}
+
+static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
+ char **i, **r = NULL;
+
+ assert(c);
+ assert(l);
+
+ STRV_FOREACH(i, c->environment_files) {
+ char *fn;
+ int k;
+ bool ignore = false;
+ char **p;
+ _cleanup_globfree_ glob_t pglob = {};
+
+ fn = *i;
+
+ if (fn[0] == '-') {
+ ignore = true;
+ fn++;
+ }
+
+ if (!path_is_absolute(fn)) {
+ if (ignore)
+ continue;
+
+ strv_free(r);
+ return -EINVAL;
+ }
+
+ /* Filename supports globbing, take all matching files */
+ k = safe_glob(fn, 0, &pglob);
+ if (k < 0) {
+ if (ignore)
+ continue;
+
+ strv_free(r);
+ return k;
+ }
+
+ /* When we don't match anything, -ENOENT should be returned */
+ assert(pglob.gl_pathc > 0);
+
+ for (unsigned n = 0; n < pglob.gl_pathc; n++) {
+ k = load_env_file(NULL, pglob.gl_pathv[n], &p);
+ if (k < 0) {
+ if (ignore)
+ continue;
+
+ strv_free(r);
+ return k;
+ }
+ /* Log invalid environment variables with filename */
+ if (p) {
+ InvalidEnvInfo info = {
+ .unit = unit,
+ .path = pglob.gl_pathv[n]
+ };
+
+ p = strv_env_clean_with_callback(p, invalid_env, &info);
+ }
+
+ if (!r)
+ r = p;
+ else {
+ char **m;
+
+ m = strv_env_merge(2, r, p);
+ strv_free(r);
+ strv_free(p);
+ if (!m)
+ return -ENOMEM;
+
+ r = m;
+ }
+ }
+ }
+
+ *l = r;
+
+ return 0;
+}
+
+static bool tty_may_match_dev_console(const char *tty) {
+ _cleanup_free_ char *resolved = NULL;
+
+ if (!tty)
+ return true;
+
+ tty = skip_dev_prefix(tty);
+
+ /* trivial identity? */
+ if (streq(tty, "console"))
+ return true;
+
+ if (resolve_dev_console(&resolved) < 0)
+ return true; /* if we could not resolve, assume it may */
+
+ /* "tty0" means the active VC, so it may be the same sometimes */
+ return path_equal(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
+}
+
+static bool exec_context_may_touch_tty(const ExecContext *ec) {
+ assert(ec);
+
+ return ec->tty_reset ||
+ ec->tty_vhangup ||
+ ec->tty_vt_disallocate ||
+ is_terminal_input(ec->std_input) ||
+ is_terminal_output(ec->std_output) ||
+ is_terminal_output(ec->std_error);
+}
+
+bool exec_context_may_touch_console(const ExecContext *ec) {
+
+ return exec_context_may_touch_tty(ec) &&
+ tty_may_match_dev_console(exec_context_tty_path(ec));
+}
+
+static void strv_fprintf(FILE *f, char **l) {
+ char **g;
+
+ assert(f);
+
+ STRV_FOREACH(g, l)
+ fprintf(f, " %s", *g);
+}
+
+void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
+ char **e, **d, buf_clean[FORMAT_TIMESPAN_MAX];
+ int r;
+
+ assert(c);
+ assert(f);
+
+ prefix = strempty(prefix);
+
+ fprintf(f,
+ "%sUMask: %04o\n"
+ "%sWorkingDirectory: %s\n"
+ "%sRootDirectory: %s\n"
+ "%sNonBlocking: %s\n"
+ "%sPrivateTmp: %s\n"
+ "%sPrivateDevices: %s\n"
+ "%sProtectKernelTunables: %s\n"
+ "%sProtectKernelModules: %s\n"
+ "%sProtectKernelLogs: %s\n"
+ "%sProtectClock: %s\n"
+ "%sProtectControlGroups: %s\n"
+ "%sPrivateNetwork: %s\n"
+ "%sPrivateUsers: %s\n"
+ "%sProtectHome: %s\n"
+ "%sProtectSystem: %s\n"
+ "%sMountAPIVFS: %s\n"
+ "%sIgnoreSIGPIPE: %s\n"
+ "%sMemoryDenyWriteExecute: %s\n"
+ "%sRestrictRealtime: %s\n"
+ "%sRestrictSUIDSGID: %s\n"
+ "%sKeyringMode: %s\n"
+ "%sProtectHostname: %s\n"
+ "%sProtectProc: %s\n"
+ "%sProcSubset: %s\n",
+ prefix, c->umask,
+ prefix, empty_to_root(c->working_directory),
+ prefix, empty_to_root(c->root_directory),
+ prefix, yes_no(c->non_blocking),
+ prefix, yes_no(c->private_tmp),
+ prefix, yes_no(c->private_devices),
+ prefix, yes_no(c->protect_kernel_tunables),
+ prefix, yes_no(c->protect_kernel_modules),
+ prefix, yes_no(c->protect_kernel_logs),
+ prefix, yes_no(c->protect_clock),
+ prefix, yes_no(c->protect_control_groups),
+ prefix, yes_no(c->private_network),
+ prefix, yes_no(c->private_users),
+ prefix, protect_home_to_string(c->protect_home),
+ prefix, protect_system_to_string(c->protect_system),
+ prefix, yes_no(exec_context_get_effective_mount_apivfs(c)),
+ prefix, yes_no(c->ignore_sigpipe),
+ prefix, yes_no(c->memory_deny_write_execute),
+ prefix, yes_no(c->restrict_realtime),
+ prefix, yes_no(c->restrict_suid_sgid),
+ prefix, exec_keyring_mode_to_string(c->keyring_mode),
+ prefix, yes_no(c->protect_hostname),
+ prefix, protect_proc_to_string(c->protect_proc),
+ prefix, proc_subset_to_string(c->proc_subset));
+
+ if (c->root_image)
+ fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
+
+ if (c->root_image_options) {
+ MountOptions *o;
+
+ fprintf(f, "%sRootImageOptions:", prefix);
+ LIST_FOREACH(mount_options, o, c->root_image_options)
+ if (!isempty(o->options))
+ fprintf(f, " %s:%s",
+ partition_designator_to_string(o->partition_designator),
+ o->options);
+ fprintf(f, "\n");
+ }
+
+ if (c->root_hash) {
+ _cleanup_free_ char *encoded = NULL;
+ encoded = hexmem(c->root_hash, c->root_hash_size);
+ if (encoded)
+ fprintf(f, "%sRootHash: %s\n", prefix, encoded);
+ }
+
+ if (c->root_hash_path)
+ fprintf(f, "%sRootHash: %s\n", prefix, c->root_hash_path);
+
+ if (c->root_hash_sig) {
+ _cleanup_free_ char *encoded = NULL;
+ ssize_t len;
+ len = base64mem(c->root_hash_sig, c->root_hash_sig_size, &encoded);
+ if (len)
+ fprintf(f, "%sRootHashSignature: base64:%s\n", prefix, encoded);
+ }
+
+ if (c->root_hash_sig_path)
+ fprintf(f, "%sRootHashSignature: %s\n", prefix, c->root_hash_sig_path);
+
+ if (c->root_verity)
+ fprintf(f, "%sRootVerity: %s\n", prefix, c->root_verity);
+
+ STRV_FOREACH(e, c->environment)
+ fprintf(f, "%sEnvironment: %s\n", prefix, *e);
+
+ STRV_FOREACH(e, c->environment_files)
+ fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
+
+ STRV_FOREACH(e, c->pass_environment)
+ fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
+
+ STRV_FOREACH(e, c->unset_environment)
+ fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
+
+ fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
+
+ for (ExecDirectoryType dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
+ fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
+
+ STRV_FOREACH(d, c->directories[dt].paths)
+ fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
+ }
+
+ fprintf(f,
+ "%sTimeoutCleanSec: %s\n",
+ prefix, format_timespan(buf_clean, sizeof(buf_clean), c->timeout_clean_usec, USEC_PER_SEC));
+
+ if (c->nice_set)
+ fprintf(f,
+ "%sNice: %i\n",
+ prefix, c->nice);
+
+ if (c->oom_score_adjust_set)
+ fprintf(f,
+ "%sOOMScoreAdjust: %i\n",
+ prefix, c->oom_score_adjust);
+
+ if (c->coredump_filter_set)
+ fprintf(f,
+ "%sCoredumpFilter: 0x%"PRIx64"\n",
+ prefix, c->coredump_filter);
+
+ for (unsigned i = 0; i < RLIM_NLIMITS; i++)
+ if (c->rlimit[i]) {
+ fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
+ prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
+ fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n",
+ prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
+ }
+
+ if (c->ioprio_set) {
+ _cleanup_free_ char *class_str = NULL;
+
+ r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
+ if (r >= 0)
+ fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
+
+ fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
+ }
+
+ if (c->cpu_sched_set) {
+ _cleanup_free_ char *policy_str = NULL;
+
+ r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
+ if (r >= 0)
+ fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
+
+ fprintf(f,
+ "%sCPUSchedulingPriority: %i\n"
+ "%sCPUSchedulingResetOnFork: %s\n",
+ prefix, c->cpu_sched_priority,
+ prefix, yes_no(c->cpu_sched_reset_on_fork));
+ }
+
+ if (c->cpu_set.set) {
+ _cleanup_free_ char *affinity = NULL;
+
+ affinity = cpu_set_to_range_string(&c->cpu_set);
+ fprintf(f, "%sCPUAffinity: %s\n", prefix, affinity);
+ }
+
+ if (mpol_is_valid(numa_policy_get_type(&c->numa_policy))) {
+ _cleanup_free_ char *nodes = NULL;
+
+ nodes = cpu_set_to_range_string(&c->numa_policy.nodes);
+ fprintf(f, "%sNUMAPolicy: %s\n", prefix, mpol_to_string(numa_policy_get_type(&c->numa_policy)));
+ fprintf(f, "%sNUMAMask: %s\n", prefix, strnull(nodes));
+ }
+
+ if (c->timer_slack_nsec != NSEC_INFINITY)
+ fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
+
+ fprintf(f,
+ "%sStandardInput: %s\n"
+ "%sStandardOutput: %s\n"
+ "%sStandardError: %s\n",
+ prefix, exec_input_to_string(c->std_input),
+ prefix, exec_output_to_string(c->std_output),
+ prefix, exec_output_to_string(c->std_error));
+
+ if (c->std_input == EXEC_INPUT_NAMED_FD)
+ fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
+ if (c->std_output == EXEC_OUTPUT_NAMED_FD)
+ fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
+ if (c->std_error == EXEC_OUTPUT_NAMED_FD)
+ fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
+
+ if (c->std_input == EXEC_INPUT_FILE)
+ fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
+ if (c->std_output == EXEC_OUTPUT_FILE)
+ fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
+ if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
+ fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
+ if (c->std_error == EXEC_OUTPUT_FILE)
+ fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
+ if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
+ fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
+
+ if (c->tty_path)
+ fprintf(f,
+ "%sTTYPath: %s\n"
+ "%sTTYReset: %s\n"
+ "%sTTYVHangup: %s\n"
+ "%sTTYVTDisallocate: %s\n",
+ prefix, c->tty_path,
+ prefix, yes_no(c->tty_reset),
+ prefix, yes_no(c->tty_vhangup),
+ prefix, yes_no(c->tty_vt_disallocate));
+
+ if (IN_SET(c->std_output,
+ EXEC_OUTPUT_KMSG,
+ EXEC_OUTPUT_JOURNAL,
+ EXEC_OUTPUT_KMSG_AND_CONSOLE,
+ EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
+ IN_SET(c->std_error,
+ EXEC_OUTPUT_KMSG,
+ EXEC_OUTPUT_JOURNAL,
+ EXEC_OUTPUT_KMSG_AND_CONSOLE,
+ EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
+
+ _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
+
+ r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
+ if (r >= 0)
+ fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
+
+ r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
+ if (r >= 0)
+ fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
+ }
+
+ if (c->log_level_max >= 0) {
+ _cleanup_free_ char *t = NULL;
+
+ (void) log_level_to_string_alloc(c->log_level_max, &t);
+
+ fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
+ }
+
+ if (c->log_ratelimit_interval_usec > 0) {
+ char buf_timespan[FORMAT_TIMESPAN_MAX];
+
+ fprintf(f,
+ "%sLogRateLimitIntervalSec: %s\n",
+ prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_ratelimit_interval_usec, USEC_PER_SEC));
+ }
+
+ if (c->log_ratelimit_burst > 0)
+ fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_ratelimit_burst);
+
+ for (size_t j = 0; j < c->n_log_extra_fields; j++) {
+ fprintf(f, "%sLogExtraFields: ", prefix);
+ fwrite(c->log_extra_fields[j].iov_base,
+ 1, c->log_extra_fields[j].iov_len,
+ f);
+ fputc('\n', f);
+ }
+
+ if (c->log_namespace)
+ fprintf(f, "%sLogNamespace: %s\n", prefix, c->log_namespace);
+
+ if (c->secure_bits) {
+ _cleanup_free_ char *str = NULL;
+
+ r = secure_bits_to_string_alloc(c->secure_bits, &str);
+ if (r >= 0)
+ fprintf(f, "%sSecure Bits: %s\n", prefix, str);
+ }
+
+ if (c->capability_bounding_set != CAP_ALL) {
+ _cleanup_free_ char *str = NULL;
+
+ r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
+ if (r >= 0)
+ fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
+ }
+
+ if (c->capability_ambient_set != 0) {
+ _cleanup_free_ char *str = NULL;
+
+ r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
+ if (r >= 0)
+ fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
+ }
+
+ if (c->user)
+ fprintf(f, "%sUser: %s\n", prefix, c->user);
+ if (c->group)
+ fprintf(f, "%sGroup: %s\n", prefix, c->group);
+
+ fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
+
+ if (!strv_isempty(c->supplementary_groups)) {
+ fprintf(f, "%sSupplementaryGroups:", prefix);
+ strv_fprintf(f, c->supplementary_groups);
+ fputs("\n", f);
+ }
+
+ if (c->pam_name)
+ fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
+
+ if (!strv_isempty(c->read_write_paths)) {
+ fprintf(f, "%sReadWritePaths:", prefix);
+ strv_fprintf(f, c->read_write_paths);
+ fputs("\n", f);
+ }
+
+ if (!strv_isempty(c->read_only_paths)) {
+ fprintf(f, "%sReadOnlyPaths:", prefix);
+ strv_fprintf(f, c->read_only_paths);
+ fputs("\n", f);
+ }
+
+ if (!strv_isempty(c->inaccessible_paths)) {
+ fprintf(f, "%sInaccessiblePaths:", prefix);
+ strv_fprintf(f, c->inaccessible_paths);
+ fputs("\n", f);
+ }
+
+ for (size_t i = 0; i < c->n_bind_mounts; i++)
+ fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
+ c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
+ c->bind_mounts[i].ignore_enoent ? "-": "",
+ c->bind_mounts[i].source,
+ c->bind_mounts[i].destination,
+ c->bind_mounts[i].recursive ? "rbind" : "norbind");
+
+ for (size_t i = 0; i < c->n_temporary_filesystems; i++) {
+ const TemporaryFileSystem *t = c->temporary_filesystems + i;
+
+ fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
+ t->path,
+ isempty(t->options) ? "" : ":",
+ strempty(t->options));
+ }
+
+ if (c->utmp_id)
+ fprintf(f,
+ "%sUtmpIdentifier: %s\n",
+ prefix, c->utmp_id);
+
+ if (c->selinux_context)
+ fprintf(f,
+ "%sSELinuxContext: %s%s\n",
+ prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
+
+ if (c->apparmor_profile)
+ fprintf(f,
+ "%sAppArmorProfile: %s%s\n",
+ prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
+
+ if (c->smack_process_label)
+ fprintf(f,
+ "%sSmackProcessLabel: %s%s\n",
+ prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
+
+ if (c->personality != PERSONALITY_INVALID)
+ fprintf(f,
+ "%sPersonality: %s\n",
+ prefix, strna(personality_to_string(c->personality)));
+
+ fprintf(f,
+ "%sLockPersonality: %s\n",
+ prefix, yes_no(c->lock_personality));
+
+ if (c->syscall_filter) {
+#if HAVE_SECCOMP
+ void *id, *val;
+ bool first = true;
+#endif
+
+ fprintf(f,
+ "%sSystemCallFilter: ",
+ prefix);
+
+ if (!c->syscall_allow_list)
+ fputc('~', f);
+
+#if HAVE_SECCOMP
+ HASHMAP_FOREACH_KEY(val, id, c->syscall_filter) {
+ _cleanup_free_ char *name = NULL;
+ const char *errno_name = NULL;
+ int num = PTR_TO_INT(val);
+
+ if (first)
+ first = false;
+ else
+ fputc(' ', f);
+
+ name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
+ fputs(strna(name), f);
+
+ if (num >= 0) {
+ errno_name = seccomp_errno_or_action_to_string(num);
+ if (errno_name)
+ fprintf(f, ":%s", errno_name);
+ else
+ fprintf(f, ":%d", num);
+ }
+ }
+#endif
+
+ fputc('\n', f);
+ }
+
+ if (c->syscall_archs) {
+#if HAVE_SECCOMP
+ void *id;
+#endif
+
+ fprintf(f,
+ "%sSystemCallArchitectures:",
+ prefix);
+
+#if HAVE_SECCOMP
+ SET_FOREACH(id, c->syscall_archs)
+ fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
+#endif
+ fputc('\n', f);
+ }
+
+ if (exec_context_restrict_namespaces_set(c)) {
+ _cleanup_free_ char *s = NULL;
+
+ r = namespace_flags_to_string(c->restrict_namespaces, &s);
+ if (r >= 0)
+ fprintf(f, "%sRestrictNamespaces: %s\n",
+ prefix, strna(s));
+ }
+
+ if (c->network_namespace_path)
+ fprintf(f,
+ "%sNetworkNamespacePath: %s\n",
+ prefix, c->network_namespace_path);
+
+ if (c->syscall_errno > 0) {
+#if HAVE_SECCOMP
+ const char *errno_name;
+#endif
+
+ fprintf(f, "%sSystemCallErrorNumber: ", prefix);
+
+#if HAVE_SECCOMP
+ errno_name = seccomp_errno_or_action_to_string(c->syscall_errno);
+ if (errno_name)
+ fputs(errno_name, f);
+ else
+ fprintf(f, "%d", c->syscall_errno);
+#endif
+ fputc('\n', f);
+ }
+
+ for (size_t i = 0; i < c->n_mount_images; i++) {
+ MountOptions *o;
+
+ fprintf(f, "%sMountImages: %s%s:%s%s", prefix,
+ c->mount_images[i].ignore_enoent ? "-": "",
+ c->mount_images[i].source,
+ c->mount_images[i].destination,
+ LIST_IS_EMPTY(c->mount_images[i].mount_options) ? "": ":");
+ LIST_FOREACH(mount_options, o, c->mount_images[i].mount_options)
+ fprintf(f, "%s:%s",
+ partition_designator_to_string(o->partition_designator),
+ o->options);
+ fprintf(f, "\n");
+ }
+}
+
+bool exec_context_maintains_privileges(const ExecContext *c) {
+ assert(c);
+
+ /* Returns true if the process forked off would run under
+ * an unchanged UID or as root. */
+
+ if (!c->user)
+ return true;
+
+ if (streq(c->user, "root") || streq(c->user, "0"))
+ return true;
+
+ return false;
+}
+
+int exec_context_get_effective_ioprio(const ExecContext *c) {
+ int p;
+
+ assert(c);
+
+ if (c->ioprio_set)
+ return c->ioprio;
+
+ p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
+ if (p < 0)
+ return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
+
+ return p;
+}
+
+bool exec_context_get_effective_mount_apivfs(const ExecContext *c) {
+ assert(c);
+
+ /* Explicit setting wins */
+ if (c->mount_apivfs_set)
+ return c->mount_apivfs;
+
+ /* Default to "yes" if root directory or image are specified */
+ if (exec_context_with_rootfs(c))
+ return true;
+
+ return false;
+}
+
+void exec_context_free_log_extra_fields(ExecContext *c) {
+ assert(c);
+
+ for (size_t l = 0; l < c->n_log_extra_fields; l++)
+ free(c->log_extra_fields[l].iov_base);
+ c->log_extra_fields = mfree(c->log_extra_fields);
+ c->n_log_extra_fields = 0;
+}
+
+void exec_context_revert_tty(ExecContext *c) {
+ int r;
+
+ assert(c);
+
+ /* First, reset the TTY (possibly kicking everybody else from the TTY) */
+ exec_context_tty_reset(c, NULL);
+
+ /* And then undo what chown_terminal() did earlier. Note that we only do this if we have a path
+ * configured. If the TTY was passed to us as file descriptor we assume the TTY is opened and managed
+ * by whoever passed it to us and thus knows better when and how to chmod()/chown() it back. */
+
+ if (exec_context_may_touch_tty(c)) {
+ const char *path;
+
+ path = exec_context_tty_path(c);
+ if (path) {
+ r = chmod_and_chown(path, TTY_MODE, 0, TTY_GID);
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Failed to reset TTY ownership/access mode of %s, ignoring: %m", path);
+ }
+ }
+}
+
+int exec_context_get_clean_directories(
+ ExecContext *c,
+ char **prefix,
+ ExecCleanMask mask,
+ char ***ret) {
+
+ _cleanup_strv_free_ char **l = NULL;
+ int r;
+
+ assert(c);
+ assert(prefix);
+ assert(ret);
+
+ for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
+ char **i;
+
+ if (!FLAGS_SET(mask, 1U << t))
+ continue;
+
+ if (!prefix[t])
+ continue;
+
+ STRV_FOREACH(i, c->directories[t].paths) {
+ char *j;
+
+ j = path_join(prefix[t], *i);
+ if (!j)
+ return -ENOMEM;
+
+ r = strv_consume(&l, j);
+ if (r < 0)
+ return r;
+
+ /* Also remove private directories unconditionally. */
+ if (t != EXEC_DIRECTORY_CONFIGURATION) {
+ j = path_join(prefix[t], "private", *i);
+ if (!j)
+ return -ENOMEM;
+
+ r = strv_consume(&l, j);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ *ret = TAKE_PTR(l);
+ return 0;
+}
+
+int exec_context_get_clean_mask(ExecContext *c, ExecCleanMask *ret) {
+ ExecCleanMask mask = 0;
+
+ assert(c);
+ assert(ret);
+
+ for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++)
+ if (!strv_isempty(c->directories[t].paths))
+ mask |= 1U << t;
+
+ *ret = mask;
+ return 0;
+}
+
+void exec_status_start(ExecStatus *s, pid_t pid) {
+ assert(s);
+
+ *s = (ExecStatus) {
+ .pid = pid,
+ };
+
+ dual_timestamp_get(&s->start_timestamp);
+}
+
+void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
+ assert(s);
+
+ if (s->pid != pid)
+ *s = (ExecStatus) {
+ .pid = pid,
+ };
+
+ dual_timestamp_get(&s->exit_timestamp);
+
+ s->code = code;
+ s->status = status;
+
+ if (context && context->utmp_id)
+ (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
+}
+
+void exec_status_reset(ExecStatus *s) {
+ assert(s);
+
+ *s = (ExecStatus) {};
+}
+
+void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+
+ assert(s);
+ assert(f);
+
+ if (s->pid <= 0)
+ return;
+
+ prefix = strempty(prefix);
+
+ fprintf(f,
+ "%sPID: "PID_FMT"\n",
+ prefix, s->pid);
+
+ if (dual_timestamp_is_set(&s->start_timestamp))
+ fprintf(f,
+ "%sStart Timestamp: %s\n",
+ prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
+
+ if (dual_timestamp_is_set(&s->exit_timestamp))
+ fprintf(f,
+ "%sExit Timestamp: %s\n"
+ "%sExit Code: %s\n"
+ "%sExit Status: %i\n",
+ prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
+ prefix, sigchld_code_to_string(s->code),
+ prefix, s->status);
+}
+
+static char *exec_command_line(char **argv) {
+ size_t k;
+ char *n, *p, **a;
+ bool first = true;
+
+ assert(argv);
+
+ k = 1;
+ STRV_FOREACH(a, argv)
+ k += strlen(*a)+3;
+
+ n = new(char, k);
+ if (!n)
+ return NULL;
+
+ p = n;
+ STRV_FOREACH(a, argv) {
+
+ if (!first)
+ *(p++) = ' ';
+ else
+ first = false;
+
+ if (strpbrk(*a, WHITESPACE)) {
+ *(p++) = '\'';
+ p = stpcpy(p, *a);
+ *(p++) = '\'';
+ } else
+ p = stpcpy(p, *a);
+
+ }
+
+ *p = 0;
+
+ /* FIXME: this doesn't really handle arguments that have
+ * spaces and ticks in them */
+
+ return n;
+}
+
+static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
+ _cleanup_free_ char *cmd = NULL;
+ const char *prefix2;
+
+ assert(c);
+ assert(f);
+
+ prefix = strempty(prefix);
+ prefix2 = strjoina(prefix, "\t");
+
+ cmd = exec_command_line(c->argv);
+ fprintf(f,
+ "%sCommand Line: %s\n",
+ prefix, cmd ? cmd : strerror_safe(ENOMEM));
+
+ exec_status_dump(&c->exec_status, f, prefix2);
+}
+
+void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
+ assert(f);
+
+ prefix = strempty(prefix);
+
+ LIST_FOREACH(command, c, c)
+ exec_command_dump(c, f, prefix);
+}
+
+void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
+ ExecCommand *end;
+
+ assert(l);
+ assert(e);
+
+ if (*l) {
+ /* It's kind of important, that we keep the order here */
+ LIST_FIND_TAIL(command, *l, end);
+ LIST_INSERT_AFTER(command, *l, end, e);
+ } else
+ *l = e;
+}
+
+int exec_command_set(ExecCommand *c, const char *path, ...) {
+ va_list ap;
+ char **l, *p;
+
+ assert(c);
+ assert(path);
+
+ va_start(ap, path);
+ l = strv_new_ap(path, ap);
+ va_end(ap);
+
+ if (!l)
+ return -ENOMEM;
+
+ p = strdup(path);
+ if (!p) {
+ strv_free(l);
+ return -ENOMEM;
+ }
+
+ free_and_replace(c->path, p);
+
+ return strv_free_and_replace(c->argv, l);
+}
+
+int exec_command_append(ExecCommand *c, const char *path, ...) {
+ _cleanup_strv_free_ char **l = NULL;
+ va_list ap;
+ int r;
+
+ assert(c);
+ assert(path);
+
+ va_start(ap, path);
+ l = strv_new_ap(path, ap);
+ va_end(ap);
+
+ if (!l)
+ return -ENOMEM;
+
+ r = strv_extend_strv(&c->argv, l, false);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static void *remove_tmpdir_thread(void *p) {
+ _cleanup_free_ char *path = p;
+
+ (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
+ return NULL;
+}
+
+static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
+ int r;
+
+ if (!rt)
+ return NULL;
+
+ if (rt->manager)
+ (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
+
+ /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
+
+ if (destroy && rt->tmp_dir && !streq(rt->tmp_dir, RUN_SYSTEMD_EMPTY)) {
+ log_debug("Spawning thread to nuke %s", rt->tmp_dir);
+
+ r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
+ if (r < 0)
+ log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
+ else
+ rt->tmp_dir = NULL;
+ }
+
+ if (destroy && rt->var_tmp_dir && !streq(rt->var_tmp_dir, RUN_SYSTEMD_EMPTY)) {
+ log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
+
+ r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
+ if (r < 0)
+ log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
+ else
+ rt->var_tmp_dir = NULL;
+ }
+
+ rt->id = mfree(rt->id);
+ rt->tmp_dir = mfree(rt->tmp_dir);
+ rt->var_tmp_dir = mfree(rt->var_tmp_dir);
+ safe_close_pair(rt->netns_storage_socket);
+ return mfree(rt);
+}
+
+static void exec_runtime_freep(ExecRuntime **rt) {
+ (void) exec_runtime_free(*rt, false);
+}
+
+static int exec_runtime_allocate(ExecRuntime **ret, const char *id) {
+ _cleanup_free_ char *id_copy = NULL;
+ ExecRuntime *n;
+
+ assert(ret);
+
+ id_copy = strdup(id);
+ if (!id_copy)
+ return -ENOMEM;
+
+ n = new(ExecRuntime, 1);
+ if (!n)
+ return -ENOMEM;
+
+ *n = (ExecRuntime) {
+ .id = TAKE_PTR(id_copy),
+ .netns_storage_socket = { -1, -1 },
+ };
+
+ *ret = n;
+ return 0;
+}
+
+static int exec_runtime_add(
+ Manager *m,
+ const char *id,
+ char **tmp_dir,
+ char **var_tmp_dir,
+ int netns_storage_socket[2],
+ ExecRuntime **ret) {
+
+ _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
+ int r;
+
+ assert(m);
+ assert(id);
+
+ /* tmp_dir, var_tmp_dir, netns_storage_socket fds are donated on success */
+
+ r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = exec_runtime_allocate(&rt, id);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
+ if (r < 0)
+ return r;
+
+ assert(!!rt->tmp_dir == !!rt->var_tmp_dir); /* We require both to be set together */
+ rt->tmp_dir = TAKE_PTR(*tmp_dir);
+ rt->var_tmp_dir = TAKE_PTR(*var_tmp_dir);
+
+ if (netns_storage_socket) {
+ rt->netns_storage_socket[0] = TAKE_FD(netns_storage_socket[0]);
+ rt->netns_storage_socket[1] = TAKE_FD(netns_storage_socket[1]);
+ }
+
+ rt->manager = m;
+
+ if (ret)
+ *ret = rt;
+ /* do not remove created ExecRuntime object when the operation succeeds. */
+ TAKE_PTR(rt);
+ return 0;
+}
+
+static int exec_runtime_make(
+ Manager *m,
+ const ExecContext *c,
+ const char *id,
+ ExecRuntime **ret) {
+
+ _cleanup_(namespace_cleanup_tmpdirp) char *tmp_dir = NULL, *var_tmp_dir = NULL;
+ _cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(id);
+
+ /* It is not necessary to create ExecRuntime object. */
+ if (!c->private_network && !c->private_tmp && !c->network_namespace_path) {
+ *ret = NULL;
+ return 0;
+ }
+
+ if (c->private_tmp &&
+ !(prefixed_path_strv_contains(c->inaccessible_paths, "/tmp") &&
+ (prefixed_path_strv_contains(c->inaccessible_paths, "/var/tmp") ||
+ prefixed_path_strv_contains(c->inaccessible_paths, "/var")))) {
+ r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
+ if (r < 0)
+ return r;
+ }
+
+ if (c->private_network || c->network_namespace_path) {
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
+ return -errno;
+ }
+
+ r = exec_runtime_add(m, id, &tmp_dir, &var_tmp_dir, netns_storage_socket, ret);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
+ ExecRuntime *rt;
+ int r;
+
+ assert(m);
+ assert(id);
+ assert(ret);
+
+ rt = hashmap_get(m->exec_runtime_by_id, id);
+ if (rt)
+ /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
+ goto ref;
+
+ if (!create) {
+ *ret = NULL;
+ return 0;
+ }
+
+ /* If not found, then create a new object. */
+ r = exec_runtime_make(m, c, id, &rt);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* When r == 0, it is not necessary to create ExecRuntime object. */
+ *ret = NULL;
+ return 0;
+ }
+
+ref:
+ /* increment reference counter. */
+ rt->n_ref++;
+ *ret = rt;
+ return 1;
+}
+
+ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
+ if (!rt)
+ return NULL;
+
+ assert(rt->n_ref > 0);
+
+ rt->n_ref--;
+ if (rt->n_ref > 0)
+ return NULL;
+
+ return exec_runtime_free(rt, destroy);
+}
+
+int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
+ ExecRuntime *rt;
+
+ assert(m);
+ assert(f);
+ assert(fds);
+
+ HASHMAP_FOREACH(rt, m->exec_runtime_by_id) {
+ fprintf(f, "exec-runtime=%s", rt->id);
+
+ if (rt->tmp_dir)
+ fprintf(f, " tmp-dir=%s", rt->tmp_dir);
+
+ if (rt->var_tmp_dir)
+ fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
+
+ if (rt->netns_storage_socket[0] >= 0) {
+ int copy;
+
+ copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
+ if (copy < 0)
+ return copy;
+
+ fprintf(f, " netns-socket-0=%i", copy);
+ }
+
+ if (rt->netns_storage_socket[1] >= 0) {
+ int copy;
+
+ copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
+ if (copy < 0)
+ return copy;
+
+ fprintf(f, " netns-socket-1=%i", copy);
+ }
+
+ fputc('\n', f);
+ }
+
+ return 0;
+}
+
+int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
+ _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
+ ExecRuntime *rt;
+ int r;
+
+ /* This is for the migration from old (v237 or earlier) deserialization text.
+ * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
+ * Even if the ExecRuntime object originally created by the other unit, we cannot judge
+ * so or not from the serialized text, then we always creates a new object owned by this. */
+
+ assert(u);
+ assert(key);
+ assert(value);
+
+ /* Manager manages ExecRuntime objects by the unit id.
+ * So, we omit the serialized text when the unit does not have id (yet?)... */
+ if (isempty(u->id)) {
+ log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
+ return 0;
+ }
+
+ r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
+ if (r < 0) {
+ log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
+ return 0;
+ }
+
+ rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
+ if (!rt) {
+ r = exec_runtime_allocate(&rt_create, u->id);
+ if (r < 0)
+ return log_oom();
+
+ rt = rt_create;
+ }
+
+ if (streq(key, "tmp-dir")) {
+ char *copy;
+
+ copy = strdup(value);
+ if (!copy)
+ return log_oom();
+
+ free_and_replace(rt->tmp_dir, copy);
+
+ } else if (streq(key, "var-tmp-dir")) {
+ char *copy;
+
+ copy = strdup(value);
+ if (!copy)
+ return log_oom();
+
+ free_and_replace(rt->var_tmp_dir, copy);
+
+ } else if (streq(key, "netns-socket-0")) {
+ int fd;
+
+ if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
+ log_unit_debug(u, "Failed to parse netns socket value: %s", value);
+ return 0;
+ }
+
+ safe_close(rt->netns_storage_socket[0]);
+ rt->netns_storage_socket[0] = fdset_remove(fds, fd);
+
+ } else if (streq(key, "netns-socket-1")) {
+ int fd;
+
+ if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
+ log_unit_debug(u, "Failed to parse netns socket value: %s", value);
+ return 0;
+ }
+
+ safe_close(rt->netns_storage_socket[1]);
+ rt->netns_storage_socket[1] = fdset_remove(fds, fd);
+ } else
+ return 0;
+
+ /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
+ if (rt_create) {
+ r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
+ if (r < 0) {
+ log_unit_debug_errno(u, r, "Failed to put runtime parameter to manager's storage: %m");
+ return 0;
+ }
+
+ rt_create->manager = u->manager;
+
+ /* Avoid cleanup */
+ TAKE_PTR(rt_create);
+ }
+
+ return 1;
+}
+
+int exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
+ _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
+ char *id = NULL;
+ int r, fdpair[] = {-1, -1};
+ const char *p, *v = value;
+ size_t n;
+
+ assert(m);
+ assert(value);
+ assert(fds);
+
+ n = strcspn(v, " ");
+ id = strndupa(v, n);
+ if (v[n] != ' ')
+ goto finalize;
+ p = v + n + 1;
+
+ v = startswith(p, "tmp-dir=");
+ if (v) {
+ n = strcspn(v, " ");
+ tmp_dir = strndup(v, n);
+ if (!tmp_dir)
+ return log_oom();
+ if (v[n] != ' ')
+ goto finalize;
+ p = v + n + 1;
+ }
+
+ v = startswith(p, "var-tmp-dir=");
+ if (v) {
+ n = strcspn(v, " ");
+ var_tmp_dir = strndup(v, n);
+ if (!var_tmp_dir)
+ return log_oom();
+ if (v[n] != ' ')
+ goto finalize;
+ p = v + n + 1;
+ }
+
+ v = startswith(p, "netns-socket-0=");
+ if (v) {
+ char *buf;
+
+ n = strcspn(v, " ");
+ buf = strndupa(v, n);
+
+ r = safe_atoi(buf, &fdpair[0]);
+ if (r < 0)
+ return log_debug_errno(r, "Unable to parse exec-runtime specification netns-socket-0=%s: %m", buf);
+ if (!fdset_contains(fds, fdpair[0]))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADF),
+ "exec-runtime specification netns-socket-0= refers to unknown fd %d: %m", fdpair[0]);
+ fdpair[0] = fdset_remove(fds, fdpair[0]);
+ if (v[n] != ' ')
+ goto finalize;
+ p = v + n + 1;
+ }
+
+ v = startswith(p, "netns-socket-1=");
+ if (v) {
+ char *buf;
+
+ n = strcspn(v, " ");
+ buf = strndupa(v, n);
+ r = safe_atoi(buf, &fdpair[1]);
+ if (r < 0)
+ return log_debug_errno(r, "Unable to parse exec-runtime specification netns-socket-1=%s: %m", buf);
+ if (!fdset_contains(fds, fdpair[1]))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADF),
+ "exec-runtime specification netns-socket-1= refers to unknown fd %d: %m", fdpair[1]);
+ fdpair[1] = fdset_remove(fds, fdpair[1]);
+ }
+
+finalize:
+ r = exec_runtime_add(m, id, &tmp_dir, &var_tmp_dir, fdpair, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add exec-runtime: %m");
+ return 0;
+}
+
+void exec_runtime_vacuum(Manager *m) {
+ ExecRuntime *rt;
+
+ assert(m);
+
+ /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
+
+ HASHMAP_FOREACH(rt, m->exec_runtime_by_id) {
+ if (rt->n_ref > 0)
+ continue;
+
+ (void) exec_runtime_free(rt, false);
+ }
+}
+
+void exec_params_clear(ExecParameters *p) {
+ if (!p)
+ return;
+
+ p->environment = strv_free(p->environment);
+ p->fd_names = strv_free(p->fd_names);
+ p->fds = mfree(p->fds);
+ p->exec_fd = safe_close(p->exec_fd);
+}
+
+ExecSetCredential *exec_set_credential_free(ExecSetCredential *sc) {
+ if (!sc)
+ return NULL;
+
+ free(sc->id);
+ free(sc->data);
+ return mfree(sc);
+}
+
+DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(exec_set_credential_hash_ops, char, string_hash_func, string_compare_func, ExecSetCredential, exec_set_credential_free);
+
+static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
+ [EXEC_INPUT_NULL] = "null",
+ [EXEC_INPUT_TTY] = "tty",
+ [EXEC_INPUT_TTY_FORCE] = "tty-force",
+ [EXEC_INPUT_TTY_FAIL] = "tty-fail",
+ [EXEC_INPUT_SOCKET] = "socket",
+ [EXEC_INPUT_NAMED_FD] = "fd",
+ [EXEC_INPUT_DATA] = "data",
+ [EXEC_INPUT_FILE] = "file",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
+
+static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
+ [EXEC_OUTPUT_INHERIT] = "inherit",
+ [EXEC_OUTPUT_NULL] = "null",
+ [EXEC_OUTPUT_TTY] = "tty",
+ [EXEC_OUTPUT_KMSG] = "kmsg",
+ [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
+ [EXEC_OUTPUT_JOURNAL] = "journal",
+ [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
+ [EXEC_OUTPUT_SOCKET] = "socket",
+ [EXEC_OUTPUT_NAMED_FD] = "fd",
+ [EXEC_OUTPUT_FILE] = "file",
+ [EXEC_OUTPUT_FILE_APPEND] = "append",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
+
+static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
+ [EXEC_UTMP_INIT] = "init",
+ [EXEC_UTMP_LOGIN] = "login",
+ [EXEC_UTMP_USER] = "user",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
+
+static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
+ [EXEC_PRESERVE_NO] = "no",
+ [EXEC_PRESERVE_YES] = "yes",
+ [EXEC_PRESERVE_RESTART] = "restart",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
+
+/* This table maps ExecDirectoryType to the setting it is configured with in the unit */
+static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
+ [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
+ [EXEC_DIRECTORY_STATE] = "StateDirectory",
+ [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
+ [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
+ [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
+
+/* And this table maps ExecDirectoryType too, but to a generic term identifying the type of resource. This
+ * one is supposed to be generic enough to be used for unit types that don't use ExecContext and per-unit
+ * directories, specifically .timer units with their timestamp touch file. */
+static const char* const exec_resource_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
+ [EXEC_DIRECTORY_RUNTIME] = "runtime",
+ [EXEC_DIRECTORY_STATE] = "state",
+ [EXEC_DIRECTORY_CACHE] = "cache",
+ [EXEC_DIRECTORY_LOGS] = "logs",
+ [EXEC_DIRECTORY_CONFIGURATION] = "configuration",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(exec_resource_type, ExecDirectoryType);
+
+/* And this table also maps ExecDirectoryType, to the environment variable we pass the selected directory to
+ * the service payload in. */
+static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
+ [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
+ [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
+ [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
+ [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
+ [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType);
+
+static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
+ [EXEC_KEYRING_INHERIT] = "inherit",
+ [EXEC_KEYRING_PRIVATE] = "private",
+ [EXEC_KEYRING_SHARED] = "shared",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);
diff --git a/src/core/execute.h b/src/core/execute.h
new file mode 100644
index 0000000..33d7e16
--- /dev/null
+++ b/src/core/execute.h
@@ -0,0 +1,472 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct ExecStatus ExecStatus;
+typedef struct ExecCommand ExecCommand;
+typedef struct ExecContext ExecContext;
+typedef struct ExecRuntime ExecRuntime;
+typedef struct ExecParameters ExecParameters;
+typedef struct Manager Manager;
+
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/capability.h>
+
+#include "cgroup-util.h"
+#include "coredump-util.h"
+#include "cpu-set-util.h"
+#include "exec-util.h"
+#include "fdset.h"
+#include "list.h"
+#include "missing_resource.h"
+#include "namespace.h"
+#include "nsflags.h"
+#include "numa-util.h"
+#include "path-util.h"
+#include "time-util.h"
+
+#define EXEC_STDIN_DATA_MAX (64U*1024U*1024U)
+
+typedef enum ExecUtmpMode {
+ EXEC_UTMP_INIT,
+ EXEC_UTMP_LOGIN,
+ EXEC_UTMP_USER,
+ _EXEC_UTMP_MODE_MAX,
+ _EXEC_UTMP_MODE_INVALID = -1
+} ExecUtmpMode;
+
+typedef enum ExecInput {
+ EXEC_INPUT_NULL,
+ EXEC_INPUT_TTY,
+ EXEC_INPUT_TTY_FORCE,
+ EXEC_INPUT_TTY_FAIL,
+ EXEC_INPUT_SOCKET,
+ EXEC_INPUT_NAMED_FD,
+ EXEC_INPUT_DATA,
+ EXEC_INPUT_FILE,
+ _EXEC_INPUT_MAX,
+ _EXEC_INPUT_INVALID = -1
+} ExecInput;
+
+typedef enum ExecOutput {
+ EXEC_OUTPUT_INHERIT,
+ EXEC_OUTPUT_NULL,
+ EXEC_OUTPUT_TTY,
+ EXEC_OUTPUT_KMSG,
+ EXEC_OUTPUT_KMSG_AND_CONSOLE,
+ EXEC_OUTPUT_JOURNAL,
+ EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
+ EXEC_OUTPUT_SOCKET,
+ EXEC_OUTPUT_NAMED_FD,
+ EXEC_OUTPUT_FILE,
+ EXEC_OUTPUT_FILE_APPEND,
+ _EXEC_OUTPUT_MAX,
+ _EXEC_OUTPUT_INVALID = -1
+} ExecOutput;
+
+typedef enum ExecPreserveMode {
+ EXEC_PRESERVE_NO,
+ EXEC_PRESERVE_YES,
+ EXEC_PRESERVE_RESTART,
+ _EXEC_PRESERVE_MODE_MAX,
+ _EXEC_PRESERVE_MODE_INVALID = -1
+} ExecPreserveMode;
+
+typedef enum ExecKeyringMode {
+ EXEC_KEYRING_INHERIT,
+ EXEC_KEYRING_PRIVATE,
+ EXEC_KEYRING_SHARED,
+ _EXEC_KEYRING_MODE_MAX,
+ _EXEC_KEYRING_MODE_INVALID = -1,
+} ExecKeyringMode;
+
+/* Contains start and exit information about an executed command. */
+struct ExecStatus {
+ dual_timestamp start_timestamp;
+ dual_timestamp exit_timestamp;
+ pid_t pid;
+ int code; /* as in siginfo_t::si_code */
+ int status; /* as in siginfo_t::si_status */
+};
+
+/* Stores information about commands we execute. Covers both configuration settings as well as runtime data. */
+struct ExecCommand {
+ char *path;
+ char **argv;
+ ExecStatus exec_status;
+ ExecCommandFlags flags;
+ LIST_FIELDS(ExecCommand, command); /* useful for chaining commands */
+};
+
+/* Encapsulates certain aspects of the runtime environment that is to be shared between multiple otherwise separate
+ * invocations of commands. Specifically, this allows sharing of /tmp and /var/tmp data as well as network namespaces
+ * between invocations of commands. This is a reference counted object, with one reference taken by each currently
+ * active command invocation that wants to share this runtime. */
+struct ExecRuntime {
+ unsigned n_ref;
+
+ Manager *manager;
+
+ char *id; /* Unit id of the owner */
+
+ char *tmp_dir;
+ char *var_tmp_dir;
+
+ /* An AF_UNIX socket pair, that contains a datagram containing a file descriptor referring to the network
+ * namespace. */
+ int netns_storage_socket[2];
+};
+
+typedef enum ExecDirectoryType {
+ EXEC_DIRECTORY_RUNTIME = 0,
+ EXEC_DIRECTORY_STATE,
+ EXEC_DIRECTORY_CACHE,
+ EXEC_DIRECTORY_LOGS,
+ EXEC_DIRECTORY_CONFIGURATION,
+ _EXEC_DIRECTORY_TYPE_MAX,
+ _EXEC_DIRECTORY_TYPE_INVALID = -1,
+} ExecDirectoryType;
+
+typedef struct ExecDirectory {
+ char **paths;
+ mode_t mode;
+} ExecDirectory;
+
+typedef enum ExecCleanMask {
+ /* In case you wonder why the bitmask below doesn't use "directory" in its name: we want to keep this
+ * generic so that .timer timestamp files can nicely be covered by this too, and similar. */
+ EXEC_CLEAN_RUNTIME = 1U << EXEC_DIRECTORY_RUNTIME,
+ EXEC_CLEAN_STATE = 1U << EXEC_DIRECTORY_STATE,
+ EXEC_CLEAN_CACHE = 1U << EXEC_DIRECTORY_CACHE,
+ EXEC_CLEAN_LOGS = 1U << EXEC_DIRECTORY_LOGS,
+ EXEC_CLEAN_CONFIGURATION = 1U << EXEC_DIRECTORY_CONFIGURATION,
+ EXEC_CLEAN_NONE = 0,
+ EXEC_CLEAN_ALL = (1U << _EXEC_DIRECTORY_TYPE_MAX) - 1,
+ _EXEC_CLEAN_MASK_INVALID = -1,
+} ExecCleanMask;
+
+/* A credential configured with SetCredential= */
+typedef struct ExecSetCredential {
+ char *id;
+ void *data;
+ size_t size;
+} ExecSetCredential;
+
+/* Encodes configuration parameters applied to invoked commands. Does not carry runtime data, but only configuration
+ * changes sourced from unit files and suchlike. ExecContext objects are usually embedded into Unit objects, and do not
+ * change after being loaded. */
+struct ExecContext {
+ char **environment;
+ char **environment_files;
+ char **pass_environment;
+ char **unset_environment;
+
+ struct rlimit *rlimit[_RLIMIT_MAX];
+ char *working_directory, *root_directory, *root_image, *root_verity, *root_hash_path, *root_hash_sig_path;
+ void *root_hash, *root_hash_sig;
+ size_t root_hash_size, root_hash_sig_size;
+ LIST_HEAD(MountOptions, root_image_options);
+ bool working_directory_missing_ok:1;
+ bool working_directory_home:1;
+
+ bool oom_score_adjust_set:1;
+ bool coredump_filter_set:1;
+ bool nice_set:1;
+ bool ioprio_set:1;
+ bool cpu_sched_set:1;
+ bool mount_apivfs_set:1;
+
+ /* This is not exposed to the user but available internally. We need it to make sure that whenever we
+ * spawn /usr/bin/mount it is run in the same process group as us so that the autofs logic detects
+ * that it belongs to us and we don't enter a trigger loop. */
+ bool same_pgrp;
+
+ bool cpu_sched_reset_on_fork;
+ bool non_blocking;
+
+ mode_t umask;
+ int oom_score_adjust;
+ int nice;
+ int ioprio;
+ int cpu_sched_policy;
+ int cpu_sched_priority;
+ uint64_t coredump_filter;
+
+ CPUSet cpu_set;
+ NUMAPolicy numa_policy;
+ bool cpu_affinity_from_numa;
+
+ ExecInput std_input;
+ ExecOutput std_output;
+ ExecOutput std_error;
+ bool stdio_as_fds;
+ char *stdio_fdname[3];
+ char *stdio_file[3];
+
+ void *stdin_data;
+ size_t stdin_data_size;
+
+ nsec_t timer_slack_nsec;
+
+ char *tty_path;
+
+ bool tty_reset;
+ bool tty_vhangup;
+ bool tty_vt_disallocate;
+
+ bool ignore_sigpipe;
+
+ ExecKeyringMode keyring_mode;
+
+ /* Since resolving these names might involve socket
+ * connections and we don't want to deadlock ourselves these
+ * names are resolved on execution only and in the child
+ * process. */
+ char *user;
+ char *group;
+ char **supplementary_groups;
+
+ char *pam_name;
+
+ char *utmp_id;
+ ExecUtmpMode utmp_mode;
+
+ bool no_new_privileges;
+
+ bool selinux_context_ignore;
+ bool apparmor_profile_ignore;
+ bool smack_process_label_ignore;
+
+ char *selinux_context;
+ char *apparmor_profile;
+ char *smack_process_label;
+
+ char **read_write_paths, **read_only_paths, **inaccessible_paths;
+ unsigned long mount_flags;
+ BindMount *bind_mounts;
+ size_t n_bind_mounts;
+ TemporaryFileSystem *temporary_filesystems;
+ size_t n_temporary_filesystems;
+ MountImage *mount_images;
+ size_t n_mount_images;
+
+ uint64_t capability_bounding_set;
+ uint64_t capability_ambient_set;
+ int secure_bits;
+
+ int syslog_priority;
+ bool syslog_level_prefix;
+ char *syslog_identifier;
+
+ struct iovec* log_extra_fields;
+ size_t n_log_extra_fields;
+
+ usec_t log_ratelimit_interval_usec;
+ unsigned log_ratelimit_burst;
+
+ int log_level_max;
+
+ char *log_namespace;
+
+ ProtectProc protect_proc; /* hidepid= */
+ ProcSubset proc_subset; /* subset= */
+
+ bool private_tmp;
+ bool private_network;
+ bool private_devices;
+ bool private_users;
+ bool private_mounts;
+ bool protect_kernel_tunables;
+ bool protect_kernel_modules;
+ bool protect_kernel_logs;
+ bool protect_clock;
+ bool protect_control_groups;
+ ProtectSystem protect_system;
+ ProtectHome protect_home;
+ bool protect_hostname;
+ bool mount_apivfs;
+
+ bool dynamic_user;
+ bool remove_ipc;
+
+ bool memory_deny_write_execute;
+ bool restrict_realtime;
+ bool restrict_suid_sgid;
+
+ bool lock_personality;
+ unsigned long personality;
+
+ unsigned long restrict_namespaces; /* The CLONE_NEWxyz flags permitted to the unit's processes */
+
+ Hashmap *syscall_filter;
+ Set *syscall_archs;
+ int syscall_errno;
+ bool syscall_allow_list:1;
+
+ Hashmap *syscall_log;
+ bool syscall_log_allow_list:1; /* Log listed system calls */
+
+ bool address_families_allow_list:1;
+ Set *address_families;
+
+ char *network_namespace_path;
+
+ ExecDirectory directories[_EXEC_DIRECTORY_TYPE_MAX];
+ ExecPreserveMode runtime_directory_preserve_mode;
+ usec_t timeout_clean_usec;
+
+ Hashmap *set_credentials; /* output id → ExecSetCredential */
+ char **load_credentials; /* pairs of output id, path/input id */
+};
+
+static inline bool exec_context_restrict_namespaces_set(const ExecContext *c) {
+ assert(c);
+
+ return (c->restrict_namespaces & NAMESPACE_FLAGS_ALL) != NAMESPACE_FLAGS_ALL;
+}
+
+static inline bool exec_context_with_rootfs(const ExecContext *c) {
+ assert(c);
+
+ /* Checks if RootDirectory= or RootImage= are used */
+
+ return !empty_or_root(c->root_directory) || c->root_image;
+}
+
+typedef enum ExecFlags {
+ EXEC_APPLY_SANDBOXING = 1 << 0,
+ EXEC_APPLY_CHROOT = 1 << 1,
+ EXEC_APPLY_TTY_STDIN = 1 << 2,
+ EXEC_PASS_LOG_UNIT = 1 << 3, /* Whether to pass the unit name to the service's journal stream connection */
+ EXEC_CHOWN_DIRECTORIES = 1 << 4, /* chown() the runtime/state/cache/log directories to the user we run as, under all conditions */
+ EXEC_NSS_BYPASS_BUS = 1 << 5, /* Set the SYSTEMD_NSS_BYPASS_BUS environment variable, to disable nss-systemd for dbus */
+ EXEC_CGROUP_DELEGATE = 1 << 6,
+ EXEC_IS_CONTROL = 1 << 7,
+ EXEC_CONTROL_CGROUP = 1 << 8, /* Place the process not in the indicated cgroup but in a subcgroup '/.control', but only EXEC_CGROUP_DELEGATE and EXEC_IS_CONTROL is set, too */
+ EXEC_WRITE_CREDENTIALS = 1 << 9, /* Set up the credential store logic */
+
+ /* The following are not used by execute.c, but by consumers internally */
+ EXEC_PASS_FDS = 1 << 10,
+ EXEC_SETENV_RESULT = 1 << 11,
+ EXEC_SET_WATCHDOG = 1 << 12,
+} ExecFlags;
+
+/* Parameters for a specific invocation of a command. This structure is put together right before a command is
+ * executed. */
+struct ExecParameters {
+ char **environment;
+
+ int *fds;
+ char **fd_names;
+ size_t n_socket_fds;
+ size_t n_storage_fds;
+
+ ExecFlags flags;
+ bool selinux_context_net:1;
+
+ CGroupMask cgroup_supported;
+ const char *cgroup_path;
+
+ char **prefix;
+ const char *received_credentials;
+
+ const char *confirm_spawn;
+
+ usec_t watchdog_usec;
+
+ int *idle_pipe;
+
+ int stdin_fd;
+ int stdout_fd;
+ int stderr_fd;
+
+ /* An fd that is closed by the execve(), and thus will result in EOF when the execve() is done */
+ int exec_fd;
+};
+
+#include "unit.h"
+#include "dynamic-user.h"
+
+int exec_spawn(Unit *unit,
+ ExecCommand *command,
+ const ExecContext *context,
+ const ExecParameters *exec_params,
+ ExecRuntime *runtime,
+ DynamicCreds *dynamic_creds,
+ pid_t *ret);
+
+void exec_command_done_array(ExecCommand *c, size_t n);
+ExecCommand* exec_command_free_list(ExecCommand *c);
+void exec_command_free_array(ExecCommand **c, size_t n);
+void exec_command_reset_status_array(ExecCommand *c, size_t n);
+void exec_command_reset_status_list_array(ExecCommand **c, size_t n);
+void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix);
+void exec_command_append_list(ExecCommand **l, ExecCommand *e);
+int exec_command_set(ExecCommand *c, const char *path, ...) _sentinel_;
+int exec_command_append(ExecCommand *c, const char *path, ...) _sentinel_;
+
+void exec_context_init(ExecContext *c);
+void exec_context_done(ExecContext *c);
+void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix);
+
+int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_root);
+int exec_context_destroy_credentials(const ExecContext *c, const char *runtime_root, const char *unit);
+
+const char* exec_context_fdname(const ExecContext *c, int fd_index);
+
+bool exec_context_may_touch_console(const ExecContext *c);
+bool exec_context_maintains_privileges(const ExecContext *c);
+
+int exec_context_get_effective_ioprio(const ExecContext *c);
+bool exec_context_get_effective_mount_apivfs(const ExecContext *c);
+
+void exec_context_free_log_extra_fields(ExecContext *c);
+
+void exec_context_revert_tty(ExecContext *c);
+
+int exec_context_get_clean_directories(ExecContext *c, char **prefix, ExecCleanMask mask, char ***ret);
+int exec_context_get_clean_mask(ExecContext *c, ExecCleanMask *ret);
+
+void exec_status_start(ExecStatus *s, pid_t pid);
+void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status);
+void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix);
+void exec_status_reset(ExecStatus *s);
+
+int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *name, bool create, ExecRuntime **ret);
+ExecRuntime *exec_runtime_unref(ExecRuntime *r, bool destroy);
+
+int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds);
+int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds);
+int exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds);
+void exec_runtime_vacuum(Manager *m);
+
+void exec_params_clear(ExecParameters *p);
+
+bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c);
+
+ExecSetCredential *exec_set_credential_free(ExecSetCredential *sc);
+DEFINE_TRIVIAL_CLEANUP_FUNC(ExecSetCredential*, exec_set_credential_free);
+
+extern const struct hash_ops exec_set_credential_hash_ops;
+
+const char* exec_output_to_string(ExecOutput i) _const_;
+ExecOutput exec_output_from_string(const char *s) _pure_;
+
+const char* exec_input_to_string(ExecInput i) _const_;
+ExecInput exec_input_from_string(const char *s) _pure_;
+
+const char* exec_utmp_mode_to_string(ExecUtmpMode i) _const_;
+ExecUtmpMode exec_utmp_mode_from_string(const char *s) _pure_;
+
+const char* exec_preserve_mode_to_string(ExecPreserveMode i) _const_;
+ExecPreserveMode exec_preserve_mode_from_string(const char *s) _pure_;
+
+const char* exec_keyring_mode_to_string(ExecKeyringMode i) _const_;
+ExecKeyringMode exec_keyring_mode_from_string(const char *s) _pure_;
+
+const char* exec_directory_type_to_string(ExecDirectoryType i) _const_;
+ExecDirectoryType exec_directory_type_from_string(const char *s) _pure_;
+
+const char* exec_resource_type_to_string(ExecDirectoryType i) _const_;
+ExecDirectoryType exec_resource_type_from_string(const char *s) _pure_;
diff --git a/src/core/generator-setup.c b/src/core/generator-setup.c
new file mode 100644
index 0000000..9173951
--- /dev/null
+++ b/src/core/generator-setup.c
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "generator-setup.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "rm-rf.h"
+
+int lookup_paths_mkdir_generator(LookupPaths *p) {
+ int r, q;
+
+ assert(p);
+
+ if (!p->generator || !p->generator_early || !p->generator_late)
+ return -EINVAL;
+
+ r = mkdir_p_label(p->generator, 0755);
+
+ q = mkdir_p_label(p->generator_early, 0755);
+ if (q < 0 && r >= 0)
+ r = q;
+
+ q = mkdir_p_label(p->generator_late, 0755);
+ if (q < 0 && r >= 0)
+ r = q;
+
+ return r;
+}
+
+void lookup_paths_trim_generator(LookupPaths *p) {
+ assert(p);
+
+ /* Trim empty dirs */
+
+ if (p->generator)
+ (void) rmdir(p->generator);
+ if (p->generator_early)
+ (void) rmdir(p->generator_early);
+ if (p->generator_late)
+ (void) rmdir(p->generator_late);
+}
+
+void lookup_paths_flush_generator(LookupPaths *p) {
+ assert(p);
+
+ /* Flush the generated unit files in full */
+
+ if (p->generator)
+ (void) rm_rf(p->generator, REMOVE_ROOT|REMOVE_PHYSICAL);
+ if (p->generator_early)
+ (void) rm_rf(p->generator_early, REMOVE_ROOT|REMOVE_PHYSICAL);
+ if (p->generator_late)
+ (void) rm_rf(p->generator_late, REMOVE_ROOT|REMOVE_PHYSICAL);
+
+ if (p->temporary_dir)
+ (void) rm_rf(p->temporary_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
+}
diff --git a/src/core/generator-setup.h b/src/core/generator-setup.h
new file mode 100644
index 0000000..1cc816b
--- /dev/null
+++ b/src/core/generator-setup.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "path-lookup.h"
+
+int lookup_paths_mkdir_generator(LookupPaths *p);
+void lookup_paths_trim_generator(LookupPaths *p);
+void lookup_paths_flush_generator(LookupPaths *p);
diff --git a/src/core/hostname-setup.c b/src/core/hostname-setup.c
new file mode 100644
index 0000000..867ea19
--- /dev/null
+++ b/src/core/hostname-setup.c
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "hostname-setup.h"
+#include "hostname-util.h"
+#include "log.h"
+#include "macro.h"
+#include "proc-cmdline.h"
+#include "string-util.h"
+#include "util.h"
+
+int hostname_setup(void) {
+ _cleanup_free_ char *b = NULL;
+ const char *hn = NULL;
+ bool enoent = false;
+ int r;
+
+ r = proc_cmdline_get_key("systemd.hostname", 0, &b);
+ if (r < 0)
+ log_warning_errno(r, "Failed to retrieve system hostname from kernel command line, ignoring: %m");
+ else if (r > 0) {
+ if (hostname_is_valid(b, true))
+ hn = b;
+ else {
+ log_warning("Hostname specified on kernel command line is invalid, ignoring: %s", b);
+ b = mfree(b);
+ }
+ }
+
+ if (!hn) {
+ r = read_etc_hostname(NULL, &b);
+ if (r < 0) {
+ if (r == -ENOENT)
+ enoent = true;
+ else
+ log_warning_errno(r, "Failed to read configured hostname: %m");
+ } else
+ hn = b;
+ }
+
+ if (isempty(hn)) {
+ /* Don't override the hostname if it is already set and not explicitly configured */
+ if (hostname_is_set())
+ return 0;
+
+ if (enoent)
+ log_info("No hostname configured.");
+
+ hn = FALLBACK_HOSTNAME;
+ }
+
+ r = sethostname_idempotent(hn);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to set hostname to <%s>: %m", hn);
+
+ log_info("Set hostname to <%s>.", hn);
+ return 0;
+}
diff --git a/src/core/hostname-setup.h b/src/core/hostname-setup.h
new file mode 100644
index 0000000..7fd0a02
--- /dev/null
+++ b/src/core/hostname-setup.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int hostname_setup(void);
diff --git a/src/core/ima-setup.c b/src/core/ima-setup.c
new file mode 100644
index 0000000..7f517a0
--- /dev/null
+++ b/src/core/ima-setup.c
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2012 Roberto Sassu - Politecnico di Torino, Italy
+ TORSEC group — http://security.polito.it
+***/
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "ima-setup.h"
+#include "log.h"
+
+#define IMA_SECFS_DIR "/sys/kernel/security/ima"
+#define IMA_SECFS_POLICY IMA_SECFS_DIR "/policy"
+#define IMA_POLICY_PATH "/etc/ima/ima-policy"
+
+int ima_setup(void) {
+#if ENABLE_IMA
+ _cleanup_fclose_ FILE *input = NULL;
+ _cleanup_close_ int imafd = -1;
+ unsigned lineno = 0;
+ int r;
+
+ if (access(IMA_SECFS_DIR, F_OK) < 0) {
+ log_debug_errno(errno, "IMA support is disabled in the kernel, ignoring: %m");
+ return 0;
+ }
+
+ if (access(IMA_SECFS_POLICY, W_OK) < 0) {
+ log_warning_errno(errno, "Another IMA custom policy has already been loaded, ignoring: %m");
+ return 0;
+ }
+
+ if (access(IMA_POLICY_PATH, F_OK) < 0) {
+ log_debug_errno(errno, "No IMA custom policy file "IMA_POLICY_PATH", ignoring: %m");
+ return 0;
+ }
+
+ imafd = open(IMA_SECFS_POLICY, O_WRONLY|O_CLOEXEC);
+ if (imafd < 0) {
+ log_error_errno(errno, "Failed to open the IMA kernel interface "IMA_SECFS_POLICY", ignoring: %m");
+ return 0;
+ }
+
+ /* attempt to write the name of the policy file into sysfs file */
+ if (write(imafd, IMA_POLICY_PATH, STRLEN(IMA_POLICY_PATH)) > 0)
+ goto done;
+
+ /* fall back to copying the policy line-by-line */
+ input = fopen(IMA_POLICY_PATH, "re");
+ if (!input) {
+ log_warning_errno(errno, "Failed to open the IMA custom policy file "IMA_POLICY_PATH", ignoring: %m");
+ return 0;
+ }
+
+ safe_close(imafd);
+
+ imafd = open(IMA_SECFS_POLICY, O_WRONLY|O_CLOEXEC);
+ if (imafd < 0) {
+ log_error_errno(errno, "Failed to open the IMA kernel interface "IMA_SECFS_POLICY", ignoring: %m");
+ return 0;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ size_t len;
+
+ r = read_line(input, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read the IMA custom policy file "IMA_POLICY_PATH": %m");
+ if (r == 0)
+ break;
+
+ len = strlen(line);
+ lineno++;
+
+ if (len > 0 && write(imafd, line, len) < 0)
+ return log_error_errno(errno, "Failed to load the IMA custom policy file "IMA_POLICY_PATH"%u: %m",
+ lineno);
+ }
+
+done:
+ log_info("Successfully loaded the IMA custom policy "IMA_POLICY_PATH".");
+#endif /* ENABLE_IMA */
+ return 0;
+}
diff --git a/src/core/ima-setup.h b/src/core/ima-setup.h
new file mode 100644
index 0000000..f964c7b
--- /dev/null
+++ b/src/core/ima-setup.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2012 Roberto Sassu - Politecnico di Torino, Italy
+ TORSEC group — http://security.polito.it
+***/
+
+int ima_setup(void);
diff --git a/src/core/ip-address-access.c b/src/core/ip-address-access.c
new file mode 100644
index 0000000..a11251e
--- /dev/null
+++ b/src/core/ip-address-access.c
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "bpf-firewall.h"
+#include "extract-word.h"
+#include "hostname-util.h"
+#include "ip-address-access.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+int config_parse_ip_address_access(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ IPAddressAccessItem **list = data;
+ const char *p;
+ int r;
+
+ assert(list);
+
+ if (isempty(rvalue)) {
+ *list = ip_address_access_free_all(*list);
+ return 0;
+ }
+
+ p = rvalue;
+
+ for (;;) {
+ _cleanup_free_ IPAddressAccessItem *a = NULL;
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ break;
+ }
+
+ a = new0(IPAddressAccessItem, 1);
+ if (!a)
+ return log_oom();
+
+ if (streq(word, "any")) {
+ /* "any" is a shortcut for 0.0.0.0/0 and ::/0 */
+
+ a->family = AF_INET;
+ LIST_APPEND(items, *list, a);
+
+ a = new0(IPAddressAccessItem, 1);
+ if (!a)
+ return log_oom();
+
+ a->family = AF_INET6;
+
+ } else if (is_localhost(word)) {
+ /* "localhost" is a shortcut for 127.0.0.0/8 and ::1/128 */
+
+ a->family = AF_INET;
+ a->address.in.s_addr = htobe32(0x7f000000);
+ a->prefixlen = 8;
+ LIST_APPEND(items, *list, a);
+
+ a = new0(IPAddressAccessItem, 1);
+ if (!a)
+ return log_oom();
+
+ a->family = AF_INET6;
+ a->address.in6 = (struct in6_addr) IN6ADDR_LOOPBACK_INIT;
+ a->prefixlen = 128;
+
+ } else if (streq(word, "link-local")) {
+
+ /* "link-local" is a shortcut for 169.254.0.0/16 and fe80::/64 */
+
+ a->family = AF_INET;
+ a->address.in.s_addr = htobe32((UINT32_C(169) << 24 | UINT32_C(254) << 16));
+ a->prefixlen = 16;
+ LIST_APPEND(items, *list, a);
+
+ a = new0(IPAddressAccessItem, 1);
+ if (!a)
+ return log_oom();
+
+ a->family = AF_INET6;
+ a->address.in6 = (struct in6_addr) {
+ .s6_addr32[0] = htobe32(0xfe800000)
+ };
+ a->prefixlen = 64;
+
+ } else if (streq(word, "multicast")) {
+
+ /* "multicast" is a shortcut for 224.0.0.0/4 and ff00::/8 */
+
+ a->family = AF_INET;
+ a->address.in.s_addr = htobe32((UINT32_C(224) << 24));
+ a->prefixlen = 4;
+ LIST_APPEND(items, *list, a);
+
+ a = new0(IPAddressAccessItem, 1);
+ if (!a)
+ return log_oom();
+
+ a->family = AF_INET6;
+ a->address.in6 = (struct in6_addr) {
+ .s6_addr32[0] = htobe32(0xff000000)
+ };
+ a->prefixlen = 8;
+
+ } else {
+ r = in_addr_prefix_from_string_auto(word, &a->family, &a->address, &a->prefixlen);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Address prefix is invalid, ignoring assignment: %s", word);
+ return 0;
+ }
+ }
+
+ LIST_APPEND(items, *list, a);
+ a = NULL;
+ }
+
+ *list = ip_address_access_reduce(*list);
+
+ return 0;
+}
+
+IPAddressAccessItem* ip_address_access_free_all(IPAddressAccessItem *first) {
+ IPAddressAccessItem *next, *p = first;
+
+ while (p) {
+ next = p->items_next;
+ free(p);
+
+ p = next;
+ }
+
+ return NULL;
+}
+
+IPAddressAccessItem* ip_address_access_reduce(IPAddressAccessItem *first) {
+ IPAddressAccessItem *a, *b, *tmp;
+ int r;
+
+ /* Drops all entries from the list that are covered by another entry in full, thus removing all redundant
+ * entries. */
+
+ LIST_FOREACH_SAFE(items, a, tmp, first) {
+
+ /* Drop irrelevant bits */
+ (void) in_addr_mask(a->family, &a->address, a->prefixlen);
+
+ LIST_FOREACH(items, b, first) {
+
+ if (a == b)
+ continue;
+
+ if (a->family != b->family)
+ continue;
+
+ if (b->prefixlen > a->prefixlen)
+ continue;
+
+ r = in_addr_prefix_covers(b->family,
+ &b->address,
+ b->prefixlen,
+ &a->address);
+ if (r > 0) {
+ /* b covers a fully, then let's drop a */
+ LIST_REMOVE(items, first, a);
+ free(a);
+ break;
+ }
+ }
+ }
+
+ return first;
+}
+
+bool ip_address_access_item_is_any(IPAddressAccessItem *first) {
+ /* Check for exactly two entries */
+ if (!first || !first->items_next || first->items_next->items_next)
+ return false;
+
+ /* Check both entries cover the full range */
+ if (first->prefixlen != 0 || first->items_next->prefixlen != 0)
+ return false;
+
+ /* Check that one of them is the IPv4 and the other IPv6 */
+ if (!((first->family == AF_INET && first->items_next->family == AF_INET6) ||
+ (first->family == AF_INET6 && first->items_next->family == AF_INET)))
+ return false;
+
+ /* No need to check the actual addresses, they don't matter if the prefix is zero */
+ return true;
+}
diff --git a/src/core/ip-address-access.h b/src/core/ip-address-access.h
new file mode 100644
index 0000000..71b5459
--- /dev/null
+++ b/src/core/ip-address-access.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "conf-parser.h"
+#include "in-addr-util.h"
+#include "list.h"
+
+typedef struct IPAddressAccessItem IPAddressAccessItem;
+
+struct IPAddressAccessItem {
+ int family;
+ unsigned char prefixlen;
+ union in_addr_union address;
+ LIST_FIELDS(IPAddressAccessItem, items);
+};
+
+CONFIG_PARSER_PROTOTYPE(config_parse_ip_address_access);
+
+IPAddressAccessItem* ip_address_access_free_all(IPAddressAccessItem *first);
+
+IPAddressAccessItem* ip_address_access_reduce(IPAddressAccessItem *first);
+
+/* Returns true if a list consists of only the two items necessary for "any"
+ * (0.0.0.0/0 and ::/0). */
+bool ip_address_access_item_is_any(IPAddressAccessItem *first);
diff --git a/src/core/job.c b/src/core/job.c
new file mode 100644
index 0000000..f3c1a02
--- /dev/null
+++ b/src/core/job.c
@@ -0,0 +1,1698 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "sd-id128.h"
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "async.h"
+#include "cgroup.h"
+#include "dbus-job.h"
+#include "dbus.h"
+#include "escape.h"
+#include "fileio.h"
+#include "job.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "serialize.h"
+#include "set.h"
+#include "sort-util.h"
+#include "special.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "unit.h"
+#include "virt.h"
+
+Job* job_new_raw(Unit *unit) {
+ Job *j;
+
+ /* used for deserialization */
+
+ assert(unit);
+
+ j = new(Job, 1);
+ if (!j)
+ return NULL;
+
+ *j = (Job) {
+ .manager = unit->manager,
+ .unit = unit,
+ .type = _JOB_TYPE_INVALID,
+ };
+
+ return j;
+}
+
+Job* job_new(Unit *unit, JobType type) {
+ Job *j;
+
+ assert(type < _JOB_TYPE_MAX);
+
+ j = job_new_raw(unit);
+ if (!j)
+ return NULL;
+
+ j->id = j->manager->current_job_id++;
+ j->type = type;
+
+ /* We don't link it here, that's what job_dependency() is for */
+
+ return j;
+}
+
+void job_unlink(Job *j) {
+ assert(j);
+ assert(!j->installed);
+ assert(!j->transaction_prev);
+ assert(!j->transaction_next);
+ assert(!j->subject_list);
+ assert(!j->object_list);
+
+ if (j->in_run_queue) {
+ prioq_remove(j->manager->run_queue, j, &j->run_queue_idx);
+ j->in_run_queue = false;
+ }
+
+ if (j->in_dbus_queue) {
+ LIST_REMOVE(dbus_queue, j->manager->dbus_job_queue, j);
+ j->in_dbus_queue = false;
+ }
+
+ if (j->in_gc_queue) {
+ LIST_REMOVE(gc_queue, j->manager->gc_job_queue, j);
+ j->in_gc_queue = false;
+ }
+
+ j->timer_event_source = sd_event_source_unref(j->timer_event_source);
+}
+
+Job* job_free(Job *j) {
+ assert(j);
+ assert(!j->installed);
+ assert(!j->transaction_prev);
+ assert(!j->transaction_next);
+ assert(!j->subject_list);
+ assert(!j->object_list);
+
+ job_unlink(j);
+
+ sd_bus_track_unref(j->bus_track);
+ strv_free(j->deserialized_clients);
+
+ return mfree(j);
+}
+
+static void job_set_state(Job *j, JobState state) {
+ assert(j);
+ assert(state >= 0);
+ assert(state < _JOB_STATE_MAX);
+
+ if (j->state == state)
+ return;
+
+ j->state = state;
+
+ if (!j->installed)
+ return;
+
+ if (j->state == JOB_RUNNING)
+ j->unit->manager->n_running_jobs++;
+ else {
+ assert(j->state == JOB_WAITING);
+ assert(j->unit->manager->n_running_jobs > 0);
+
+ j->unit->manager->n_running_jobs--;
+
+ if (j->unit->manager->n_running_jobs <= 0)
+ j->unit->manager->jobs_in_progress_event_source = sd_event_source_unref(j->unit->manager->jobs_in_progress_event_source);
+ }
+}
+
+void job_uninstall(Job *j) {
+ Job **pj;
+
+ assert(j->installed);
+
+ job_set_state(j, JOB_WAITING);
+
+ pj = (j->type == JOB_NOP) ? &j->unit->nop_job : &j->unit->job;
+ assert(*pj == j);
+
+ /* Detach from next 'bigger' objects */
+
+ /* daemon-reload should be transparent to job observers */
+ if (!MANAGER_IS_RELOADING(j->manager))
+ bus_job_send_removed_signal(j);
+
+ *pj = NULL;
+
+ unit_add_to_gc_queue(j->unit);
+
+ unit_add_to_dbus_queue(j->unit); /* The Job property of the unit has changed now */
+
+ hashmap_remove_value(j->manager->jobs, UINT32_TO_PTR(j->id), j);
+ j->installed = false;
+}
+
+static bool job_type_allows_late_merge(JobType t) {
+ /* Tells whether it is OK to merge a job of type 't' with an already
+ * running job.
+ * Reloads cannot be merged this way. Think of the sequence:
+ * 1. Reload of a daemon is in progress; the daemon has already loaded
+ * its config file, but hasn't completed the reload operation yet.
+ * 2. Edit foo's config file.
+ * 3. Trigger another reload to have the daemon use the new config.
+ * Should the second reload job be merged into the first one, the daemon
+ * would not know about the new config.
+ * JOB_RESTART jobs on the other hand can be merged, because they get
+ * patched into JOB_START after stopping the unit. So if we see a
+ * JOB_RESTART running, it means the unit hasn't stopped yet and at
+ * this time the merge is still allowed. */
+ return t != JOB_RELOAD;
+}
+
+static void job_merge_into_installed(Job *j, Job *other) {
+ assert(j->installed);
+ assert(j->unit == other->unit);
+
+ if (j->type != JOB_NOP)
+ assert_se(job_type_merge_and_collapse(&j->type, other->type, j->unit) == 0);
+ else
+ assert(other->type == JOB_NOP);
+
+ j->irreversible = j->irreversible || other->irreversible;
+ j->ignore_order = j->ignore_order || other->ignore_order;
+}
+
+Job* job_install(Job *j) {
+ Job **pj;
+ Job *uj;
+
+ assert(!j->installed);
+ assert(j->type < _JOB_TYPE_MAX_IN_TRANSACTION);
+ assert(j->state == JOB_WAITING);
+
+ pj = (j->type == JOB_NOP) ? &j->unit->nop_job : &j->unit->job;
+ uj = *pj;
+
+ if (uj) {
+ if (job_type_is_conflicting(uj->type, j->type))
+ job_finish_and_invalidate(uj, JOB_CANCELED, false, false);
+ else {
+ /* not conflicting, i.e. mergeable */
+
+ if (uj->state == JOB_WAITING ||
+ (job_type_allows_late_merge(j->type) && job_type_is_superset(uj->type, j->type))) {
+ job_merge_into_installed(uj, j);
+ log_unit_debug(uj->unit,
+ "Merged %s/%s into installed job %s/%s as %"PRIu32,
+ j->unit->id, job_type_to_string(j->type), uj->unit->id,
+ job_type_to_string(uj->type), uj->id);
+ return uj;
+ } else {
+ /* already running and not safe to merge into */
+ /* Patch uj to become a merged job and re-run it. */
+ /* XXX It should be safer to queue j to run after uj finishes, but it is
+ * not currently possible to have more than one installed job per unit. */
+ job_merge_into_installed(uj, j);
+ log_unit_debug(uj->unit,
+ "Merged into running job, re-running: %s/%s as %"PRIu32,
+ uj->unit->id, job_type_to_string(uj->type), uj->id);
+
+ job_set_state(uj, JOB_WAITING);
+ return uj;
+ }
+ }
+ }
+
+ /* Install the job */
+ *pj = j;
+ j->installed = true;
+
+ j->manager->n_installed_jobs++;
+ log_unit_debug(j->unit,
+ "Installed new job %s/%s as %u",
+ j->unit->id, job_type_to_string(j->type), (unsigned) j->id);
+
+ job_add_to_gc_queue(j);
+
+ job_add_to_dbus_queue(j); /* announce this job to clients */
+ unit_add_to_dbus_queue(j->unit); /* The Job property of the unit has changed now */
+
+ return j;
+}
+
+int job_install_deserialized(Job *j) {
+ Job **pj;
+ int r;
+
+ assert(!j->installed);
+
+ if (j->type < 0 || j->type >= _JOB_TYPE_MAX_IN_TRANSACTION)
+ return log_unit_debug_errno(j->unit, SYNTHETIC_ERRNO(EINVAL),
+ "Invalid job type %s in deserialization.",
+ strna(job_type_to_string(j->type)));
+
+ pj = (j->type == JOB_NOP) ? &j->unit->nop_job : &j->unit->job;
+ if (*pj)
+ return log_unit_debug_errno(j->unit, SYNTHETIC_ERRNO(EEXIST),
+ "Unit already has a job installed. Not installing deserialized job.");
+
+ r = hashmap_ensure_allocated(&j->manager->jobs, NULL);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(j->manager->jobs, UINT32_TO_PTR(j->id), j);
+ if (r == -EEXIST)
+ return log_unit_debug_errno(j->unit, r, "Job ID %" PRIu32 " already used, cannot deserialize job.", j->id);
+ if (r < 0)
+ return log_unit_debug_errno(j->unit, r, "Failed to insert job into jobs hash table: %m");
+
+ *pj = j;
+ j->installed = true;
+
+ if (j->state == JOB_RUNNING)
+ j->unit->manager->n_running_jobs++;
+
+ log_unit_debug(j->unit,
+ "Reinstalled deserialized job %s/%s as %u",
+ j->unit->id, job_type_to_string(j->type), (unsigned) j->id);
+ return 0;
+}
+
+JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts) {
+ JobDependency *l;
+
+ assert(object);
+
+ /* Adds a new job link, which encodes that the 'subject' job
+ * needs the 'object' job in some way. If 'subject' is NULL
+ * this means the 'anchor' job (i.e. the one the user
+ * explicitly asked for) is the requester. */
+
+ l = new0(JobDependency, 1);
+ if (!l)
+ return NULL;
+
+ l->subject = subject;
+ l->object = object;
+ l->matters = matters;
+ l->conflicts = conflicts;
+
+ if (subject)
+ LIST_PREPEND(subject, subject->subject_list, l);
+
+ LIST_PREPEND(object, object->object_list, l);
+
+ return l;
+}
+
+void job_dependency_free(JobDependency *l) {
+ assert(l);
+
+ if (l->subject)
+ LIST_REMOVE(subject, l->subject->subject_list, l);
+
+ LIST_REMOVE(object, l->object->object_list, l);
+
+ free(l);
+}
+
+void job_dump(Job *j, FILE *f, const char *prefix) {
+ assert(j);
+ assert(f);
+
+ prefix = strempty(prefix);
+
+ fprintf(f,
+ "%s-> Job %u:\n"
+ "%s\tAction: %s -> %s\n"
+ "%s\tState: %s\n"
+ "%s\tIrreversible: %s\n"
+ "%s\tMay GC: %s\n",
+ prefix, j->id,
+ prefix, j->unit->id, job_type_to_string(j->type),
+ prefix, job_state_to_string(j->state),
+ prefix, yes_no(j->irreversible),
+ prefix, yes_no(job_may_gc(j)));
+}
+
+/*
+ * Merging is commutative, so imagine the matrix as symmetric. We store only
+ * its lower triangle to avoid duplication. We don't store the main diagonal,
+ * because A merged with A is simply A.
+ *
+ * If the resulting type is collapsed immediately afterwards (to get rid of
+ * the JOB_RELOAD_OR_START, which lies outside the lookup function's domain),
+ * the following properties hold:
+ *
+ * Merging is associative! A merged with B, and then merged with C is the same
+ * as A merged with the result of B merged with C.
+ *
+ * Mergeability is transitive! If A can be merged with B and B with C then
+ * A also with C.
+ *
+ * Also, if A merged with B cannot be merged with C, then either A or B cannot
+ * be merged with C either.
+ */
+static const JobType job_merging_table[] = {
+/* What \ With * JOB_START JOB_VERIFY_ACTIVE JOB_STOP JOB_RELOAD */
+/*********************************************************************************/
+/*JOB_START */
+/*JOB_VERIFY_ACTIVE */ JOB_START,
+/*JOB_STOP */ -1, -1,
+/*JOB_RELOAD */ JOB_RELOAD_OR_START, JOB_RELOAD, -1,
+/*JOB_RESTART */ JOB_RESTART, JOB_RESTART, -1, JOB_RESTART,
+};
+
+JobType job_type_lookup_merge(JobType a, JobType b) {
+ assert_cc(ELEMENTSOF(job_merging_table) == _JOB_TYPE_MAX_MERGING * (_JOB_TYPE_MAX_MERGING - 1) / 2);
+ assert(a >= 0 && a < _JOB_TYPE_MAX_MERGING);
+ assert(b >= 0 && b < _JOB_TYPE_MAX_MERGING);
+
+ if (a == b)
+ return a;
+
+ if (a < b) {
+ JobType tmp = a;
+ a = b;
+ b = tmp;
+ }
+
+ return job_merging_table[(a - 1) * a / 2 + b];
+}
+
+bool job_type_is_redundant(JobType a, UnitActiveState b) {
+ switch (a) {
+
+ case JOB_START:
+ return IN_SET(b, UNIT_ACTIVE, UNIT_RELOADING);
+
+ case JOB_STOP:
+ return IN_SET(b, UNIT_INACTIVE, UNIT_FAILED);
+
+ case JOB_VERIFY_ACTIVE:
+ return IN_SET(b, UNIT_ACTIVE, UNIT_RELOADING);
+
+ case JOB_RELOAD:
+ return
+ b == UNIT_RELOADING;
+
+ case JOB_RESTART:
+ return
+ b == UNIT_ACTIVATING;
+
+ case JOB_NOP:
+ return true;
+
+ default:
+ assert_not_reached("Invalid job type");
+ }
+}
+
+JobType job_type_collapse(JobType t, Unit *u) {
+ UnitActiveState s;
+
+ switch (t) {
+
+ case JOB_TRY_RESTART:
+ s = unit_active_state(u);
+ if (!UNIT_IS_ACTIVE_OR_RELOADING(s))
+ return JOB_NOP;
+
+ return JOB_RESTART;
+
+ case JOB_TRY_RELOAD:
+ s = unit_active_state(u);
+ if (!UNIT_IS_ACTIVE_OR_RELOADING(s))
+ return JOB_NOP;
+
+ return JOB_RELOAD;
+
+ case JOB_RELOAD_OR_START:
+ s = unit_active_state(u);
+ if (!UNIT_IS_ACTIVE_OR_RELOADING(s))
+ return JOB_START;
+
+ return JOB_RELOAD;
+
+ default:
+ return t;
+ }
+}
+
+int job_type_merge_and_collapse(JobType *a, JobType b, Unit *u) {
+ JobType t;
+
+ t = job_type_lookup_merge(*a, b);
+ if (t < 0)
+ return -EEXIST;
+
+ *a = job_type_collapse(t, u);
+ return 0;
+}
+
+static bool job_is_runnable(Job *j) {
+ Unit *other;
+ void *v;
+
+ assert(j);
+ assert(j->installed);
+
+ /* Checks whether there is any job running for the units this
+ * job needs to be running after (in the case of a 'positive'
+ * job type) or before (in the case of a 'negative' job
+ * type. */
+
+ /* Note that unit types have a say in what is runnable,
+ * too. For example, if they return -EAGAIN from
+ * unit_start() they can indicate they are not
+ * runnable yet. */
+
+ /* First check if there is an override */
+ if (j->ignore_order)
+ return true;
+
+ if (j->type == JOB_NOP)
+ return true;
+
+ HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_AFTER])
+ if (other->job && job_compare(j, other->job, UNIT_AFTER) > 0) {
+ log_unit_debug(j->unit,
+ "starting held back, waiting for: %s",
+ other->id);
+ return false;
+ }
+
+ HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_BEFORE])
+ if (other->job && job_compare(j, other->job, UNIT_BEFORE) > 0) {
+ log_unit_debug(j->unit,
+ "stopping held back, waiting for: %s",
+ other->id);
+ return false;
+ }
+
+ return true;
+}
+
+static void job_change_type(Job *j, JobType newtype) {
+ assert(j);
+
+ log_unit_debug(j->unit,
+ "Converting job %s/%s -> %s/%s",
+ j->unit->id, job_type_to_string(j->type),
+ j->unit->id, job_type_to_string(newtype));
+
+ j->type = newtype;
+}
+
+_pure_ static const char* job_get_begin_status_message_format(Unit *u, JobType t) {
+ const char *format;
+
+ assert(u);
+
+ if (t == JOB_RELOAD)
+ return "Reloading %s.";
+
+ assert(IN_SET(t, JOB_START, JOB_STOP));
+
+ format = UNIT_VTABLE(u)->status_message_formats.starting_stopping[t == JOB_STOP];
+ if (format)
+ return format;
+
+ /* Return generic strings */
+ if (t == JOB_START)
+ return "Starting %s.";
+ else {
+ assert(t == JOB_STOP);
+ return "Stopping %s.";
+ }
+}
+
+static void job_print_begin_status_message(Unit *u, JobType t) {
+ const char *format;
+
+ assert(u);
+
+ /* Reload status messages have traditionally not been printed to console. */
+ if (!IN_SET(t, JOB_START, JOB_STOP))
+ return;
+
+ format = job_get_begin_status_message_format(u, t);
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ unit_status_printf(u, STATUS_TYPE_NORMAL, "", format);
+ REENABLE_WARNING;
+}
+
+static void job_log_begin_status_message(Unit *u, uint32_t job_id, JobType t) {
+ const char *format, *mid;
+ char buf[LINE_MAX];
+
+ assert(u);
+ assert(t >= 0);
+ assert(t < _JOB_TYPE_MAX);
+
+ if (!IN_SET(t, JOB_START, JOB_STOP, JOB_RELOAD))
+ return;
+
+ if (log_on_console()) /* Skip this if it would only go on the console anyway */
+ return;
+
+ /* We log status messages for all units and all operations. */
+
+ format = job_get_begin_status_message_format(u, t);
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ (void) snprintf(buf, sizeof buf, format, unit_status_string(u));
+ REENABLE_WARNING;
+
+ mid = t == JOB_START ? "MESSAGE_ID=" SD_MESSAGE_UNIT_STARTING_STR :
+ t == JOB_STOP ? "MESSAGE_ID=" SD_MESSAGE_UNIT_STOPPING_STR :
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_RELOADING_STR;
+
+ /* Note that we deliberately use LOG_MESSAGE() instead of
+ * LOG_UNIT_MESSAGE() here, since this is supposed to mimic
+ * closely what is written to screen using the status output,
+ * which is supposed the highest level, friendliest output
+ * possible, which means we should avoid the low-level unit
+ * name. */
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("%s", buf),
+ "JOB_ID=%" PRIu32, job_id,
+ "JOB_TYPE=%s", job_type_to_string(t),
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ mid);
+}
+
+static void job_emit_begin_status_message(Unit *u, uint32_t job_id, JobType t) {
+ assert(u);
+ assert(t >= 0);
+ assert(t < _JOB_TYPE_MAX);
+
+ job_log_begin_status_message(u, job_id, t);
+ job_print_begin_status_message(u, t);
+}
+
+static int job_perform_on_unit(Job **j) {
+ uint32_t id;
+ Manager *m;
+ JobType t;
+ Unit *u;
+ int r;
+
+ /* While we execute this operation the job might go away (for
+ * example: because it finishes immediately or is replaced by
+ * a new, conflicting job.) To make sure we don't access a
+ * freed job later on we store the id here, so that we can
+ * verify the job is still valid. */
+
+ assert(j);
+ assert(*j);
+
+ m = (*j)->manager;
+ u = (*j)->unit;
+ t = (*j)->type;
+ id = (*j)->id;
+
+ switch (t) {
+ case JOB_START:
+ r = unit_start(u);
+ break;
+
+ case JOB_RESTART:
+ t = JOB_STOP;
+ _fallthrough_;
+ case JOB_STOP:
+ r = unit_stop(u);
+ break;
+
+ case JOB_RELOAD:
+ r = unit_reload(u);
+ break;
+
+ default:
+ assert_not_reached("Invalid job type");
+ }
+
+ /* Log if the job still exists and the start/stop/reload function actually did something. Note that this means
+ * for units for which there's no 'activating' phase (i.e. because we transition directly from 'inactive' to
+ * 'active') we'll possibly skip the "Starting..." message. */
+ *j = manager_get_job(m, id);
+ if (*j && r > 0)
+ job_emit_begin_status_message(u, id, t);
+
+ return r;
+}
+
+int job_run_and_invalidate(Job *j) {
+ int r;
+
+ assert(j);
+ assert(j->installed);
+ assert(j->type < _JOB_TYPE_MAX_IN_TRANSACTION);
+ assert(j->in_run_queue);
+
+ prioq_remove(j->manager->run_queue, j, &j->run_queue_idx);
+ j->in_run_queue = false;
+
+ if (j->state != JOB_WAITING)
+ return 0;
+
+ if (!job_is_runnable(j))
+ return -EAGAIN;
+
+ job_start_timer(j, true);
+ job_set_state(j, JOB_RUNNING);
+ job_add_to_dbus_queue(j);
+
+ switch (j->type) {
+
+ case JOB_VERIFY_ACTIVE: {
+ UnitActiveState t;
+
+ t = unit_active_state(j->unit);
+ if (UNIT_IS_ACTIVE_OR_RELOADING(t))
+ r = -EALREADY;
+ else if (t == UNIT_ACTIVATING)
+ r = -EAGAIN;
+ else
+ r = -EBADR;
+ break;
+ }
+
+ case JOB_START:
+ case JOB_STOP:
+ case JOB_RESTART:
+ r = job_perform_on_unit(&j);
+
+ /* If the unit type does not support starting/stopping, then simply wait. */
+ if (r == -EBADR)
+ r = 0;
+ break;
+
+ case JOB_RELOAD:
+ r = job_perform_on_unit(&j);
+ break;
+
+ case JOB_NOP:
+ r = -EALREADY;
+ break;
+
+ default:
+ assert_not_reached("Unknown job type");
+ }
+
+ if (j) {
+ if (r == -EAGAIN)
+ job_set_state(j, JOB_WAITING); /* Hmm, not ready after all, let's return to JOB_WAITING state */
+ else if (r == -EALREADY) /* already being executed */
+ r = job_finish_and_invalidate(j, JOB_DONE, true, true);
+ else if (r == -ECOMM) /* condition failed, but all is good */
+ r = job_finish_and_invalidate(j, JOB_DONE, true, false);
+ else if (r == -EBADR)
+ r = job_finish_and_invalidate(j, JOB_SKIPPED, true, false);
+ else if (r == -ENOEXEC)
+ r = job_finish_and_invalidate(j, JOB_INVALID, true, false);
+ else if (r == -EPROTO)
+ r = job_finish_and_invalidate(j, JOB_ASSERT, true, false);
+ else if (r == -EOPNOTSUPP)
+ r = job_finish_and_invalidate(j, JOB_UNSUPPORTED, true, false);
+ else if (r == -ENOLINK)
+ r = job_finish_and_invalidate(j, JOB_DEPENDENCY, true, false);
+ else if (r == -ESTALE)
+ r = job_finish_and_invalidate(j, JOB_ONCE, true, false);
+ else if (r < 0)
+ r = job_finish_and_invalidate(j, JOB_FAILED, true, false);
+ }
+
+ return r;
+}
+
+_pure_ static const char *job_get_done_status_message_format(Unit *u, JobType t, JobResult result) {
+
+ static const char *const generic_finished_start_job[_JOB_RESULT_MAX] = {
+ [JOB_DONE] = "Started %s.",
+ [JOB_TIMEOUT] = "Timed out starting %s.",
+ [JOB_FAILED] = "Failed to start %s.",
+ [JOB_DEPENDENCY] = "Dependency failed for %s.",
+ [JOB_ASSERT] = "Assertion failed for %s.",
+ [JOB_UNSUPPORTED] = "Starting of %s not supported.",
+ [JOB_COLLECTED] = "Unnecessary job for %s was removed.",
+ [JOB_ONCE] = "Unit %s has been started before and cannot be started again."
+ };
+ static const char *const generic_finished_stop_job[_JOB_RESULT_MAX] = {
+ [JOB_DONE] = "Stopped %s.",
+ [JOB_FAILED] = "Stopped (with error) %s.",
+ [JOB_TIMEOUT] = "Timed out stopping %s.",
+ };
+ static const char *const generic_finished_reload_job[_JOB_RESULT_MAX] = {
+ [JOB_DONE] = "Reloaded %s.",
+ [JOB_FAILED] = "Reload failed for %s.",
+ [JOB_TIMEOUT] = "Timed out reloading %s.",
+ };
+ /* When verify-active detects the unit is inactive, report it.
+ * Most likely a DEPEND warning from a requisiting unit will
+ * occur next and it's nice to see what was requisited. */
+ static const char *const generic_finished_verify_active_job[_JOB_RESULT_MAX] = {
+ [JOB_SKIPPED] = "%s is not active.",
+ };
+
+ const char *format;
+
+ assert(u);
+ assert(t >= 0);
+ assert(t < _JOB_TYPE_MAX);
+
+ if (IN_SET(t, JOB_START, JOB_STOP, JOB_RESTART)) {
+ const UnitStatusMessageFormats *formats = &UNIT_VTABLE(u)->status_message_formats;
+ if (formats->finished_job) {
+ format = formats->finished_job(u, t, result);
+ if (format)
+ return format;
+ }
+ format = t == JOB_START ?
+ formats->finished_start_job[result] :
+ formats->finished_stop_job[result];
+ if (format)
+ return format;
+ }
+
+ /* Return generic strings */
+ if (t == JOB_START)
+ return generic_finished_start_job[result];
+ else if (IN_SET(t, JOB_STOP, JOB_RESTART))
+ return generic_finished_stop_job[result];
+ else if (t == JOB_RELOAD)
+ return generic_finished_reload_job[result];
+ else if (t == JOB_VERIFY_ACTIVE)
+ return generic_finished_verify_active_job[result];
+
+ return NULL;
+}
+
+static const struct {
+ const char *color, *word;
+} job_print_done_status_messages[_JOB_RESULT_MAX] = {
+ [JOB_DONE] = { ANSI_OK_COLOR, " OK " },
+ [JOB_TIMEOUT] = { ANSI_HIGHLIGHT_RED, " TIME " },
+ [JOB_FAILED] = { ANSI_HIGHLIGHT_RED, "FAILED" },
+ [JOB_DEPENDENCY] = { ANSI_HIGHLIGHT_YELLOW, "DEPEND" },
+ [JOB_SKIPPED] = { ANSI_HIGHLIGHT, " INFO " },
+ [JOB_ASSERT] = { ANSI_HIGHLIGHT_YELLOW, "ASSERT" },
+ [JOB_UNSUPPORTED] = { ANSI_HIGHLIGHT_YELLOW, "UNSUPP" },
+ /* JOB_COLLECTED */
+ [JOB_ONCE] = { ANSI_HIGHLIGHT_RED, " ONCE " },
+};
+
+static void job_print_done_status_message(Unit *u, JobType t, JobResult result) {
+ const char *format;
+ const char *status;
+
+ assert(u);
+ assert(t >= 0);
+ assert(t < _JOB_TYPE_MAX);
+
+ /* Reload status messages have traditionally not been printed to console. */
+ if (t == JOB_RELOAD)
+ return;
+
+ /* No message if the job did not actually do anything due to failed condition. */
+ if (t == JOB_START && result == JOB_DONE && !u->condition_result)
+ return;
+
+ if (!job_print_done_status_messages[result].word)
+ return;
+
+ format = job_get_done_status_message_format(u, t, result);
+ if (!format)
+ return;
+
+ if (log_get_show_color())
+ status = strjoina(job_print_done_status_messages[result].color,
+ job_print_done_status_messages[result].word,
+ ANSI_NORMAL);
+ else
+ status = job_print_done_status_messages[result].word;
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ unit_status_printf(u,
+ result == JOB_DONE ? STATUS_TYPE_NORMAL : STATUS_TYPE_NOTICE,
+ status, format);
+ REENABLE_WARNING;
+
+ if (t == JOB_START && result == JOB_FAILED) {
+ _cleanup_free_ char *quoted;
+
+ quoted = shell_maybe_quote(u->id, ESCAPE_BACKSLASH);
+ manager_status_printf(u->manager, STATUS_TYPE_NORMAL, NULL, "See 'systemctl status %s' for details.", strna(quoted));
+ }
+}
+
+static void job_log_done_status_message(Unit *u, uint32_t job_id, JobType t, JobResult result) {
+ const char *format, *mid;
+ char buf[LINE_MAX];
+ static const int job_result_log_level[_JOB_RESULT_MAX] = {
+ [JOB_DONE] = LOG_INFO,
+ [JOB_CANCELED] = LOG_INFO,
+ [JOB_TIMEOUT] = LOG_ERR,
+ [JOB_FAILED] = LOG_ERR,
+ [JOB_DEPENDENCY] = LOG_WARNING,
+ [JOB_SKIPPED] = LOG_NOTICE,
+ [JOB_INVALID] = LOG_INFO,
+ [JOB_ASSERT] = LOG_WARNING,
+ [JOB_UNSUPPORTED] = LOG_WARNING,
+ [JOB_COLLECTED] = LOG_INFO,
+ [JOB_ONCE] = LOG_ERR,
+ };
+
+ assert(u);
+ assert(t >= 0);
+ assert(t < _JOB_TYPE_MAX);
+
+ /* Skip printing if output goes to the console, and job_print_status_message()
+ will actually print something to the console. */
+ if (log_on_console() && job_print_done_status_messages[result].word)
+ return;
+
+ /* Show condition check message if the job did not actually do anything due to failed condition. */
+ if ((t == JOB_START && result == JOB_DONE && !u->condition_result) ||
+ (t == JOB_START && result == JOB_SKIPPED)) {
+ log_struct(LOG_INFO,
+ "MESSAGE=Condition check resulted in %s being skipped.", unit_status_string(u),
+ "JOB_ID=%" PRIu32, job_id,
+ "JOB_TYPE=%s", job_type_to_string(t),
+ "JOB_RESULT=%s", job_result_to_string(result),
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_STARTED_STR);
+
+ return;
+ }
+
+ format = job_get_done_status_message_format(u, t, result);
+ if (!format)
+ return;
+
+ /* The description might be longer than the buffer, but that's OK,
+ * we'll just truncate it here. Note that we use snprintf() rather than
+ * xsprintf() on purpose here: we are fine with truncation and don't
+ * consider that an error. */
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ (void) snprintf(buf, sizeof(buf), format, unit_status_string(u));
+ REENABLE_WARNING;
+
+ switch (t) {
+
+ case JOB_START:
+ if (result == JOB_DONE)
+ mid = "MESSAGE_ID=" SD_MESSAGE_UNIT_STARTED_STR;
+ else
+ mid = "MESSAGE_ID=" SD_MESSAGE_UNIT_FAILED_STR;
+ break;
+
+ case JOB_RELOAD:
+ mid = "MESSAGE_ID=" SD_MESSAGE_UNIT_RELOADED_STR;
+ break;
+
+ case JOB_STOP:
+ case JOB_RESTART:
+ mid = "MESSAGE_ID=" SD_MESSAGE_UNIT_STOPPED_STR;
+ break;
+
+ default:
+ log_struct(job_result_log_level[result],
+ LOG_MESSAGE("%s", buf),
+ "JOB_ID=%" PRIu32, job_id,
+ "JOB_TYPE=%s", job_type_to_string(t),
+ "JOB_RESULT=%s", job_result_to_string(result),
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u));
+ return;
+ }
+
+ log_struct(job_result_log_level[result],
+ LOG_MESSAGE("%s", buf),
+ "JOB_ID=%" PRIu32, job_id,
+ "JOB_TYPE=%s", job_type_to_string(t),
+ "JOB_RESULT=%s", job_result_to_string(result),
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ mid);
+}
+
+static void job_emit_done_status_message(Unit *u, uint32_t job_id, JobType t, JobResult result) {
+ assert(u);
+
+ job_log_done_status_message(u, job_id, t, result);
+ job_print_done_status_message(u, t, result);
+}
+
+static void job_fail_dependencies(Unit *u, UnitDependency d) {
+ Unit *other;
+ void *v;
+
+ assert(u);
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[d]) {
+ Job *j = other->job;
+
+ if (!j)
+ continue;
+ if (!IN_SET(j->type, JOB_START, JOB_VERIFY_ACTIVE))
+ continue;
+
+ job_finish_and_invalidate(j, JOB_DEPENDENCY, true, false);
+ }
+}
+
+int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool already) {
+ Unit *u;
+ Unit *other;
+ JobType t;
+ void *v;
+
+ assert(j);
+ assert(j->installed);
+ assert(j->type < _JOB_TYPE_MAX_IN_TRANSACTION);
+
+ u = j->unit;
+ t = j->type;
+
+ j->result = result;
+
+ log_unit_debug(u, "Job %" PRIu32 " %s/%s finished, result=%s",
+ j->id, u->id, job_type_to_string(t), job_result_to_string(result));
+
+ /* If this job did nothing to the respective unit we don't log the status message */
+ if (!already)
+ job_emit_done_status_message(u, j->id, t, result);
+
+ /* Patch restart jobs so that they become normal start jobs */
+ if (result == JOB_DONE && t == JOB_RESTART) {
+
+ job_change_type(j, JOB_START);
+ job_set_state(j, JOB_WAITING);
+
+ job_add_to_dbus_queue(j);
+ job_add_to_run_queue(j);
+ job_add_to_gc_queue(j);
+
+ goto finish;
+ }
+
+ if (IN_SET(result, JOB_FAILED, JOB_INVALID))
+ j->manager->n_failed_jobs++;
+
+ job_uninstall(j);
+ job_free(j);
+
+ /* Fail depending jobs on failure */
+ if (result != JOB_DONE && recursive) {
+ if (IN_SET(t, JOB_START, JOB_VERIFY_ACTIVE)) {
+ job_fail_dependencies(u, UNIT_REQUIRED_BY);
+ job_fail_dependencies(u, UNIT_REQUISITE_OF);
+ job_fail_dependencies(u, UNIT_BOUND_BY);
+ } else if (t == JOB_STOP)
+ job_fail_dependencies(u, UNIT_CONFLICTED_BY);
+ }
+
+ /* A special check to make sure we take down anything RequisiteOf if we
+ * aren't active. This is when the verify-active job merges with a
+ * satisfying job type, and then loses it's invalidation effect, as the
+ * result there is JOB_DONE for the start job we merged into, while we
+ * should be failing the depending job if the said unit isn't in fact
+ * active. Oneshots are an example of this, where going directly from
+ * activating to inactive is success.
+ *
+ * This happens when you use ConditionXYZ= in a unit too, since in that
+ * case the job completes with the JOB_DONE result, but the unit never
+ * really becomes active. Note that such a case still involves merging:
+ *
+ * A start job waits for something else, and a verify-active comes in
+ * and merges in the installed job. Then, later, when it becomes
+ * runnable, it finishes with JOB_DONE result as execution on conditions
+ * not being met is skipped, breaking our dependency semantics.
+ *
+ * Also, depending on if start job waits or not, the merging may or may
+ * not happen (the verify-active job may trigger after it finishes), so
+ * you get undeterministic results without this check.
+ */
+ if (result == JOB_DONE && recursive && !UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u))) {
+ if (IN_SET(t, JOB_START, JOB_RELOAD))
+ job_fail_dependencies(u, UNIT_REQUISITE_OF);
+ }
+ /* Trigger OnFailure dependencies that are not generated by
+ * the unit itself. We don't treat JOB_CANCELED as failure in
+ * this context. And JOB_FAILURE is already handled by the
+ * unit itself. */
+ if (IN_SET(result, JOB_TIMEOUT, JOB_DEPENDENCY)) {
+ log_struct(LOG_NOTICE,
+ "JOB_TYPE=%s", job_type_to_string(t),
+ "JOB_RESULT=%s", job_result_to_string(result),
+ LOG_UNIT_ID(u),
+ LOG_UNIT_MESSAGE(u, "Job %s/%s failed with result '%s'.",
+ u->id,
+ job_type_to_string(t),
+ job_result_to_string(result)));
+
+ unit_start_on_failure(u);
+ }
+
+ unit_trigger_notify(u);
+
+finish:
+ /* Try to start the next jobs that can be started */
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_AFTER])
+ if (other->job) {
+ job_add_to_run_queue(other->job);
+ job_add_to_gc_queue(other->job);
+ }
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_BEFORE])
+ if (other->job) {
+ job_add_to_run_queue(other->job);
+ job_add_to_gc_queue(other->job);
+ }
+
+ manager_check_finished(u->manager);
+
+ return 0;
+}
+
+static int job_dispatch_timer(sd_event_source *s, uint64_t monotonic, void *userdata) {
+ Job *j = userdata;
+ Unit *u;
+
+ assert(j);
+ assert(s == j->timer_event_source);
+
+ log_unit_warning(j->unit, "Job %s/%s timed out.", j->unit->id, job_type_to_string(j->type));
+
+ u = j->unit;
+ job_finish_and_invalidate(j, JOB_TIMEOUT, true, false);
+
+ emergency_action(u->manager, u->job_timeout_action,
+ EMERGENCY_ACTION_IS_WATCHDOG|EMERGENCY_ACTION_WARN,
+ u->job_timeout_reboot_arg, -1, "job timed out");
+
+ return 0;
+}
+
+int job_start_timer(Job *j, bool job_running) {
+ int r;
+ usec_t timeout_time, old_timeout_time;
+
+ if (job_running) {
+ j->begin_running_usec = now(CLOCK_MONOTONIC);
+
+ if (j->unit->job_running_timeout == USEC_INFINITY)
+ return 0;
+
+ timeout_time = usec_add(j->begin_running_usec, j->unit->job_running_timeout);
+
+ if (j->timer_event_source) {
+ /* Update only if JobRunningTimeoutSec= results in earlier timeout */
+ r = sd_event_source_get_time(j->timer_event_source, &old_timeout_time);
+ if (r < 0)
+ return r;
+
+ if (old_timeout_time <= timeout_time)
+ return 0;
+
+ return sd_event_source_set_time(j->timer_event_source, timeout_time);
+ }
+ } else {
+ if (j->timer_event_source)
+ return 0;
+
+ j->begin_usec = now(CLOCK_MONOTONIC);
+
+ if (j->unit->job_timeout == USEC_INFINITY)
+ return 0;
+
+ timeout_time = usec_add(j->begin_usec, j->unit->job_timeout);
+ }
+
+ r = sd_event_add_time(
+ j->manager->event,
+ &j->timer_event_source,
+ CLOCK_MONOTONIC,
+ timeout_time, 0,
+ job_dispatch_timer, j);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(j->timer_event_source, "job-start");
+
+ return 0;
+}
+
+void job_add_to_run_queue(Job *j) {
+ int r;
+
+ assert(j);
+ assert(j->installed);
+
+ if (j->in_run_queue)
+ return;
+
+ if (prioq_isempty(j->manager->run_queue)) {
+ r = sd_event_source_set_enabled(j->manager->run_queue_event_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ log_warning_errno(r, "Failed to enable job run queue event source, ignoring: %m");
+ }
+
+ r = prioq_put(j->manager->run_queue, j, &j->run_queue_idx);
+ if (r < 0)
+ log_warning_errno(r, "Failed put job in run queue, ignoring: %m");
+ else
+ j->in_run_queue = true;
+}
+
+void job_add_to_dbus_queue(Job *j) {
+ assert(j);
+ assert(j->installed);
+
+ if (j->in_dbus_queue)
+ return;
+
+ /* We don't check if anybody is subscribed here, since this
+ * job might just have been created and not yet assigned to a
+ * connection/client. */
+
+ LIST_PREPEND(dbus_queue, j->manager->dbus_job_queue, j);
+ j->in_dbus_queue = true;
+}
+
+char *job_dbus_path(Job *j) {
+ char *p;
+
+ assert(j);
+
+ if (asprintf(&p, "/org/freedesktop/systemd1/job/%"PRIu32, j->id) < 0)
+ return NULL;
+
+ return p;
+}
+
+int job_serialize(Job *j, FILE *f) {
+ assert(j);
+ assert(f);
+
+ (void) serialize_item_format(f, "job-id", "%u", j->id);
+ (void) serialize_item(f, "job-type", job_type_to_string(j->type));
+ (void) serialize_item(f, "job-state", job_state_to_string(j->state));
+ (void) serialize_bool(f, "job-irreversible", j->irreversible);
+ (void) serialize_bool(f, "job-sent-dbus-new-signal", j->sent_dbus_new_signal);
+ (void) serialize_bool(f, "job-ignore-order", j->ignore_order);
+
+ if (j->begin_usec > 0)
+ (void) serialize_usec(f, "job-begin", j->begin_usec);
+ if (j->begin_running_usec > 0)
+ (void) serialize_usec(f, "job-begin-running", j->begin_running_usec);
+
+ bus_track_serialize(j->bus_track, f, "subscribed");
+
+ /* End marker */
+ fputc('\n', f);
+ return 0;
+}
+
+int job_deserialize(Job *j, FILE *f) {
+ int r;
+
+ assert(j);
+ assert(f);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l, *v;
+ size_t k;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read serialization line: %m");
+ if (r == 0)
+ return 0;
+
+ l = strstrip(line);
+
+ /* End marker */
+ if (isempty(l))
+ return 0;
+
+ k = strcspn(l, "=");
+
+ if (l[k] == '=') {
+ l[k] = 0;
+ v = l+k+1;
+ } else
+ v = l+k;
+
+ if (streq(l, "job-id")) {
+
+ if (safe_atou32(v, &j->id) < 0)
+ log_debug("Failed to parse job id value: %s", v);
+
+ } else if (streq(l, "job-type")) {
+ JobType t;
+
+ t = job_type_from_string(v);
+ if (t < 0)
+ log_debug("Failed to parse job type: %s", v);
+ else if (t >= _JOB_TYPE_MAX_IN_TRANSACTION)
+ log_debug("Cannot deserialize job of type: %s", v);
+ else
+ j->type = t;
+
+ } else if (streq(l, "job-state")) {
+ JobState s;
+
+ s = job_state_from_string(v);
+ if (s < 0)
+ log_debug("Failed to parse job state: %s", v);
+ else
+ job_set_state(j, s);
+
+ } else if (streq(l, "job-irreversible")) {
+ int b;
+
+ b = parse_boolean(v);
+ if (b < 0)
+ log_debug("Failed to parse job irreversible flag: %s", v);
+ else
+ j->irreversible = j->irreversible || b;
+
+ } else if (streq(l, "job-sent-dbus-new-signal")) {
+ int b;
+
+ b = parse_boolean(v);
+ if (b < 0)
+ log_debug("Failed to parse job sent_dbus_new_signal flag: %s", v);
+ else
+ j->sent_dbus_new_signal = j->sent_dbus_new_signal || b;
+
+ } else if (streq(l, "job-ignore-order")) {
+ int b;
+
+ b = parse_boolean(v);
+ if (b < 0)
+ log_debug("Failed to parse job ignore_order flag: %s", v);
+ else
+ j->ignore_order = j->ignore_order || b;
+
+ } else if (streq(l, "job-begin"))
+ (void) deserialize_usec(v, &j->begin_usec);
+
+ else if (streq(l, "job-begin-running"))
+ (void) deserialize_usec(v, &j->begin_running_usec);
+
+ else if (streq(l, "subscribed")) {
+ if (strv_extend(&j->deserialized_clients, v) < 0)
+ return log_oom();
+ } else
+ log_debug("Unknown job serialization key: %s", l);
+ }
+}
+
+int job_coldplug(Job *j) {
+ int r;
+ usec_t timeout_time = USEC_INFINITY;
+
+ assert(j);
+
+ /* After deserialization is complete and the bus connection
+ * set up again, let's start watching our subscribers again */
+ (void) bus_job_coldplug_bus_track(j);
+
+ if (j->state == JOB_WAITING)
+ job_add_to_run_queue(j);
+
+ /* Maybe due to new dependencies we don't actually need this job anymore? */
+ job_add_to_gc_queue(j);
+
+ /* Create timer only when job began or began running and the respective timeout is finite.
+ * Follow logic of job_start_timer() if both timeouts are finite */
+ if (j->begin_usec == 0)
+ return 0;
+
+ if (j->unit->job_timeout != USEC_INFINITY)
+ timeout_time = usec_add(j->begin_usec, j->unit->job_timeout);
+
+ if (timestamp_is_set(j->begin_running_usec))
+ timeout_time = MIN(timeout_time, usec_add(j->begin_running_usec, j->unit->job_running_timeout));
+
+ if (timeout_time == USEC_INFINITY)
+ return 0;
+
+ j->timer_event_source = sd_event_source_unref(j->timer_event_source);
+
+ r = sd_event_add_time(
+ j->manager->event,
+ &j->timer_event_source,
+ CLOCK_MONOTONIC,
+ timeout_time, 0,
+ job_dispatch_timer, j);
+ if (r < 0)
+ log_debug_errno(r, "Failed to restart timeout for job: %m");
+
+ (void) sd_event_source_set_description(j->timer_event_source, "job-timeout");
+
+ return r;
+}
+
+void job_shutdown_magic(Job *j) {
+ assert(j);
+
+ /* The shutdown target gets some special treatment here: we
+ * tell the kernel to begin with flushing its disk caches, to
+ * optimize shutdown time a bit. Ideally we wouldn't hardcode
+ * this magic into PID 1. However all other processes aren't
+ * options either since they'd exit much sooner than PID 1 and
+ * asynchronous sync() would cause their exit to be
+ * delayed. */
+
+ if (j->type != JOB_START)
+ return;
+
+ if (!MANAGER_IS_SYSTEM(j->unit->manager))
+ return;
+
+ if (!unit_has_name(j->unit, SPECIAL_SHUTDOWN_TARGET))
+ return;
+
+ /* In case messages on console has been disabled on boot */
+ j->unit->manager->no_console_output = false;
+
+ if (detect_container() > 0)
+ return;
+
+ (void) asynchronous_sync(NULL);
+}
+
+int job_get_timeout(Job *j, usec_t *timeout) {
+ usec_t x = USEC_INFINITY, y = USEC_INFINITY;
+ Unit *u = j->unit;
+ int r;
+
+ assert(u);
+
+ if (j->timer_event_source) {
+ r = sd_event_source_get_time(j->timer_event_source, &x);
+ if (r < 0)
+ return r;
+ }
+
+ if (UNIT_VTABLE(u)->get_timeout) {
+ r = UNIT_VTABLE(u)->get_timeout(u, &y);
+ if (r < 0)
+ return r;
+ }
+
+ if (x == USEC_INFINITY && y == USEC_INFINITY)
+ return 0;
+
+ *timeout = MIN(x, y);
+ return 1;
+}
+
+bool job_may_gc(Job *j) {
+ Unit *other;
+ void *v;
+
+ assert(j);
+
+ /* Checks whether this job should be GC'ed away. We only do this for jobs of units that have no effect on their
+ * own and just track external state. For now the only unit type that qualifies for this are .device units.
+ * Returns true if the job can be collected. */
+
+ if (!UNIT_VTABLE(j->unit)->gc_jobs)
+ return false;
+
+ if (sd_bus_track_count(j->bus_track) > 0)
+ return false;
+
+ /* FIXME: So this is a bit ugly: for now we don't properly track references made via private bus connections
+ * (because it's nasty, as sd_bus_track doesn't apply to it). We simply remember that the job was once
+ * referenced by one, and reset this whenever we notice that no private bus connections are around. This means
+ * the GC is a bit too conservative when it comes to jobs created by private bus connections. */
+ if (j->ref_by_private_bus) {
+ if (set_isempty(j->unit->manager->private_buses))
+ j->ref_by_private_bus = false;
+ else
+ return false;
+ }
+
+ if (j->type == JOB_NOP)
+ return false;
+
+ /* The logic is inverse to job_is_runnable, we cannot GC as long as we block any job. */
+ HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_BEFORE])
+ if (other->job && job_compare(j, other->job, UNIT_BEFORE) < 0)
+ return false;
+
+ HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_AFTER])
+ if (other->job && job_compare(j, other->job, UNIT_AFTER) < 0)
+ return false;
+
+ return true;
+}
+
+void job_add_to_gc_queue(Job *j) {
+ assert(j);
+
+ if (j->in_gc_queue)
+ return;
+
+ if (!job_may_gc(j))
+ return;
+
+ LIST_PREPEND(gc_queue, j->unit->manager->gc_job_queue, j);
+ j->in_gc_queue = true;
+}
+
+static int job_compare_id(Job * const *a, Job * const *b) {
+ return CMP((*a)->id, (*b)->id);
+}
+
+static size_t sort_job_list(Job **list, size_t n) {
+ Job *previous = NULL;
+ size_t a, b;
+
+ /* Order by numeric IDs */
+ typesafe_qsort(list, n, job_compare_id);
+
+ /* Filter out duplicates */
+ for (a = 0, b = 0; a < n; a++) {
+
+ if (previous == list[a])
+ continue;
+
+ previous = list[b++] = list[a];
+ }
+
+ return b;
+}
+
+int job_get_before(Job *j, Job*** ret) {
+ _cleanup_free_ Job** list = NULL;
+ size_t n = 0, n_allocated = 0;
+ Unit *other = NULL;
+ void *v;
+
+ /* Returns a list of all pending jobs that need to finish before this job may be started. */
+
+ assert(j);
+ assert(ret);
+
+ if (j->ignore_order) {
+ *ret = NULL;
+ return 0;
+ }
+
+ HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_AFTER]) {
+ if (!other->job)
+ continue;
+ if (job_compare(j, other->job, UNIT_AFTER) <= 0)
+ continue;
+
+ if (!GREEDY_REALLOC(list, n_allocated, n+1))
+ return -ENOMEM;
+ list[n++] = other->job;
+ }
+
+ HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_BEFORE]) {
+ if (!other->job)
+ continue;
+ if (job_compare(j, other->job, UNIT_BEFORE) <= 0)
+ continue;
+
+ if (!GREEDY_REALLOC(list, n_allocated, n+1))
+ return -ENOMEM;
+ list[n++] = other->job;
+ }
+
+ n = sort_job_list(list, n);
+
+ *ret = TAKE_PTR(list);
+
+ return (int) n;
+}
+
+int job_get_after(Job *j, Job*** ret) {
+ _cleanup_free_ Job** list = NULL;
+ size_t n = 0, n_allocated = 0;
+ Unit *other = NULL;
+ void *v;
+
+ assert(j);
+ assert(ret);
+
+ /* Returns a list of all pending jobs that are waiting for this job to finish. */
+
+ HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_BEFORE]) {
+ if (!other->job)
+ continue;
+
+ if (other->job->ignore_order)
+ continue;
+
+ if (job_compare(j, other->job, UNIT_BEFORE) >= 0)
+ continue;
+
+ if (!GREEDY_REALLOC(list, n_allocated, n+1))
+ return -ENOMEM;
+ list[n++] = other->job;
+ }
+
+ HASHMAP_FOREACH_KEY(v, other, j->unit->dependencies[UNIT_AFTER]) {
+ if (!other->job)
+ continue;
+
+ if (other->job->ignore_order)
+ continue;
+
+ if (job_compare(j, other->job, UNIT_AFTER) >= 0)
+ continue;
+
+ if (!GREEDY_REALLOC(list, n_allocated, n+1))
+ return -ENOMEM;
+ list[n++] = other->job;
+ }
+
+ n = sort_job_list(list, n);
+
+ *ret = TAKE_PTR(list);
+
+ return (int) n;
+}
+
+static const char* const job_state_table[_JOB_STATE_MAX] = {
+ [JOB_WAITING] = "waiting",
+ [JOB_RUNNING] = "running",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(job_state, JobState);
+
+static const char* const job_type_table[_JOB_TYPE_MAX] = {
+ [JOB_START] = "start",
+ [JOB_VERIFY_ACTIVE] = "verify-active",
+ [JOB_STOP] = "stop",
+ [JOB_RELOAD] = "reload",
+ [JOB_RELOAD_OR_START] = "reload-or-start",
+ [JOB_RESTART] = "restart",
+ [JOB_TRY_RESTART] = "try-restart",
+ [JOB_TRY_RELOAD] = "try-reload",
+ [JOB_NOP] = "nop",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(job_type, JobType);
+
+static const char* const job_mode_table[_JOB_MODE_MAX] = {
+ [JOB_FAIL] = "fail",
+ [JOB_REPLACE] = "replace",
+ [JOB_REPLACE_IRREVERSIBLY] = "replace-irreversibly",
+ [JOB_ISOLATE] = "isolate",
+ [JOB_FLUSH] = "flush",
+ [JOB_IGNORE_DEPENDENCIES] = "ignore-dependencies",
+ [JOB_IGNORE_REQUIREMENTS] = "ignore-requirements",
+ [JOB_TRIGGERING] = "triggering",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(job_mode, JobMode);
+
+static const char* const job_result_table[_JOB_RESULT_MAX] = {
+ [JOB_DONE] = "done",
+ [JOB_CANCELED] = "canceled",
+ [JOB_TIMEOUT] = "timeout",
+ [JOB_FAILED] = "failed",
+ [JOB_DEPENDENCY] = "dependency",
+ [JOB_SKIPPED] = "skipped",
+ [JOB_INVALID] = "invalid",
+ [JOB_ASSERT] = "assert",
+ [JOB_UNSUPPORTED] = "unsupported",
+ [JOB_COLLECTED] = "collected",
+ [JOB_ONCE] = "once",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(job_result, JobResult);
+
+const char* job_type_to_access_method(JobType t) {
+ assert(t >= 0);
+ assert(t < _JOB_TYPE_MAX);
+
+ if (IN_SET(t, JOB_START, JOB_RESTART, JOB_TRY_RESTART))
+ return "start";
+ else if (t == JOB_STOP)
+ return "stop";
+ else
+ return "reload";
+}
+
+/*
+ * assume_dep assumed dependency between units (a is before/after b)
+ *
+ * Returns
+ * 0 jobs are independent,
+ * >0 a should run after b,
+ * <0 a should run before b,
+ *
+ * The logic means that for a service a and a service b where b.After=a:
+ *
+ * start a + start b → 1st step start a, 2nd step start b
+ * start a + stop b → 1st step stop b, 2nd step start a
+ * stop a + start b → 1st step stop a, 2nd step start b
+ * stop a + stop b → 1st step stop b, 2nd step stop a
+ *
+ * This has the side effect that restarts are properly
+ * synchronized too.
+ */
+int job_compare(Job *a, Job *b, UnitDependency assume_dep) {
+ assert(a->type < _JOB_TYPE_MAX_IN_TRANSACTION);
+ assert(b->type < _JOB_TYPE_MAX_IN_TRANSACTION);
+ assert(IN_SET(assume_dep, UNIT_AFTER, UNIT_BEFORE));
+
+ /* Trivial cases first */
+ if (a->type == JOB_NOP || b->type == JOB_NOP)
+ return 0;
+
+ if (a->ignore_order || b->ignore_order)
+ return 0;
+
+ if (assume_dep == UNIT_AFTER)
+ return -job_compare(b, a, UNIT_BEFORE);
+
+ /* Let's make it simple, JOB_STOP goes always first (in case both ua and ub stop,
+ * then ub's stop goes first anyway).
+ * JOB_RESTART is JOB_STOP in disguise (before it is patched to JOB_START). */
+ if (IN_SET(b->type, JOB_STOP, JOB_RESTART))
+ return 1;
+ else
+ return -1;
+}
diff --git a/src/core/job.h b/src/core/job.h
new file mode 100644
index 0000000..1b3ddc7
--- /dev/null
+++ b/src/core/job.h
@@ -0,0 +1,243 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-event.h"
+
+#include "list.h"
+#include "unit-name.h"
+
+typedef struct Job Job;
+typedef struct JobDependency JobDependency;
+typedef enum JobType JobType;
+typedef enum JobState JobState;
+typedef enum JobMode JobMode;
+typedef enum JobResult JobResult;
+
+/* Be careful when changing the job types! Adjust job_merging_table[] accordingly! */
+enum JobType {
+ JOB_START, /* if a unit does not support being started, we'll just wait until it becomes active */
+ JOB_VERIFY_ACTIVE,
+
+ JOB_STOP,
+
+ JOB_RELOAD, /* if running, reload */
+
+ /* Note that restarts are first treated like JOB_STOP, but
+ * then instead of finishing are patched to become
+ * JOB_START. */
+ JOB_RESTART, /* If running, stop. Then start unconditionally. */
+
+ _JOB_TYPE_MAX_MERGING,
+
+ /* JOB_NOP can enter into a transaction, but as it won't pull in
+ * any dependencies and it uses the special 'nop_job' slot in Unit,
+ * it won't have to merge with anything (except possibly into another
+ * JOB_NOP, previously installed). JOB_NOP is special-cased in
+ * job_type_is_*() functions so that the transaction can be
+ * activated. */
+ JOB_NOP = _JOB_TYPE_MAX_MERGING, /* do nothing */
+
+ _JOB_TYPE_MAX_IN_TRANSACTION,
+
+ /* JOB_TRY_RESTART can never appear in a transaction, because
+ * it always collapses into JOB_RESTART or JOB_NOP before entering.
+ * Thus we never need to merge it with anything. */
+ JOB_TRY_RESTART = _JOB_TYPE_MAX_IN_TRANSACTION, /* if running, stop and then start */
+
+ /* Similar to JOB_TRY_RESTART but collapses to JOB_RELOAD or JOB_NOP */
+ JOB_TRY_RELOAD,
+
+ /* JOB_RELOAD_OR_START won't enter into a transaction and cannot result
+ * from transaction merging (there's no way for JOB_RELOAD and
+ * JOB_START to meet in one transaction). It can result from a merge
+ * during job installation, but then it will immediately collapse into
+ * one of the two simpler types. */
+ JOB_RELOAD_OR_START, /* if running, reload, otherwise start */
+
+ _JOB_TYPE_MAX,
+ _JOB_TYPE_INVALID = -1
+};
+
+enum JobState {
+ JOB_WAITING,
+ JOB_RUNNING,
+ _JOB_STATE_MAX,
+ _JOB_STATE_INVALID = -1
+};
+
+enum JobMode {
+ JOB_FAIL, /* Fail if a conflicting job is already queued */
+ JOB_REPLACE, /* Replace an existing conflicting job */
+ JOB_REPLACE_IRREVERSIBLY,/* Like JOB_REPLACE + produce irreversible jobs */
+ JOB_ISOLATE, /* Start a unit, and stop all others */
+ JOB_FLUSH, /* Flush out all other queued jobs when queueing this one */
+ JOB_IGNORE_DEPENDENCIES, /* Ignore both requirement and ordering dependencies */
+ JOB_IGNORE_REQUIREMENTS, /* Ignore requirement dependencies */
+ JOB_TRIGGERING, /* Adds TRIGGERED_BY dependencies to the same transaction */
+ _JOB_MODE_MAX,
+ _JOB_MODE_INVALID = -1
+};
+
+enum JobResult {
+ JOB_DONE, /* Job completed successfully (or skipped due to a failed ConditionXYZ=) */
+ JOB_CANCELED, /* Job canceled by a conflicting job installation or by explicit cancel request */
+ JOB_TIMEOUT, /* Job timeout elapsed */
+ JOB_FAILED, /* Job failed */
+ JOB_DEPENDENCY, /* A required dependency job did not result in JOB_DONE */
+ JOB_SKIPPED, /* Negative result of JOB_VERIFY_ACTIVE or skip due to ExecCondition= */
+ JOB_INVALID, /* JOB_RELOAD of inactive unit */
+ JOB_ASSERT, /* Couldn't start a unit, because an assert didn't hold */
+ JOB_UNSUPPORTED, /* Couldn't start a unit, because the unit type is not supported on the system */
+ JOB_COLLECTED, /* Job was garbage collected, since nothing needed it anymore */
+ JOB_ONCE, /* Unit was started before, and hence can't be started again */
+ _JOB_RESULT_MAX,
+ _JOB_RESULT_INVALID = -1
+};
+
+#include "unit.h"
+
+struct JobDependency {
+ /* Encodes that the 'subject' job needs the 'object' job in
+ * some way. This structure is used only while building a transaction. */
+ Job *subject;
+ Job *object;
+
+ LIST_FIELDS(JobDependency, subject);
+ LIST_FIELDS(JobDependency, object);
+
+ bool matters:1;
+ bool conflicts:1;
+};
+
+struct Job {
+ Manager *manager;
+ Unit *unit;
+
+ LIST_FIELDS(Job, transaction);
+ LIST_FIELDS(Job, dbus_queue);
+ LIST_FIELDS(Job, gc_queue);
+
+ LIST_HEAD(JobDependency, subject_list);
+ LIST_HEAD(JobDependency, object_list);
+
+ /* Used for graph algs as a "I have been here" marker */
+ Job* marker;
+ unsigned generation;
+
+ uint32_t id;
+
+ JobType type;
+ JobState state;
+
+ sd_event_source *timer_event_source;
+ usec_t begin_usec;
+ usec_t begin_running_usec;
+
+ /*
+ * This tracks where to send signals, and also which clients
+ * are allowed to call DBus methods on the job (other than
+ * root).
+ *
+ * There can be more than one client, because of job merging.
+ */
+ sd_bus_track *bus_track;
+ char **deserialized_clients;
+
+ JobResult result;
+
+ unsigned run_queue_idx;
+
+ bool installed:1;
+ bool in_run_queue:1;
+ bool matters_to_anchor:1;
+ bool in_dbus_queue:1;
+ bool sent_dbus_new_signal:1;
+ bool ignore_order:1;
+ bool irreversible:1;
+ bool in_gc_queue:1;
+ bool ref_by_private_bus:1;
+};
+
+Job* job_new(Unit *unit, JobType type);
+Job* job_new_raw(Unit *unit);
+void job_unlink(Job *job);
+Job* job_free(Job *job);
+Job* job_install(Job *j);
+int job_install_deserialized(Job *j);
+void job_uninstall(Job *j);
+void job_dump(Job *j, FILE *f, const char *prefix);
+int job_serialize(Job *j, FILE *f);
+int job_deserialize(Job *j, FILE *f);
+int job_coldplug(Job *j);
+
+JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts);
+void job_dependency_free(JobDependency *l);
+
+int job_merge(Job *j, Job *other);
+
+JobType job_type_lookup_merge(JobType a, JobType b) _pure_;
+
+_pure_ static inline bool job_type_is_mergeable(JobType a, JobType b) {
+ return job_type_lookup_merge(a, b) >= 0;
+}
+
+_pure_ static inline bool job_type_is_conflicting(JobType a, JobType b) {
+ return a != JOB_NOP && b != JOB_NOP && !job_type_is_mergeable(a, b);
+}
+
+_pure_ static inline bool job_type_is_superset(JobType a, JobType b) {
+ /* Checks whether operation a is a "superset" of b in its actions */
+ if (b == JOB_NOP)
+ return true;
+ if (a == JOB_NOP)
+ return false;
+ return a == job_type_lookup_merge(a, b);
+}
+
+bool job_type_is_redundant(JobType a, UnitActiveState b) _pure_;
+
+/* Collapses a state-dependent job type into a simpler type by observing
+ * the state of the unit which it is going to be applied to. */
+JobType job_type_collapse(JobType t, Unit *u);
+
+int job_type_merge_and_collapse(JobType *a, JobType b, Unit *u);
+
+void job_add_to_run_queue(Job *j);
+void job_add_to_dbus_queue(Job *j);
+
+int job_start_timer(Job *j, bool job_running);
+
+int job_run_and_invalidate(Job *j);
+int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool already);
+
+char *job_dbus_path(Job *j);
+
+void job_shutdown_magic(Job *j);
+
+int job_get_timeout(Job *j, usec_t *timeout) _pure_;
+
+bool job_may_gc(Job *j);
+void job_add_to_gc_queue(Job *j);
+
+int job_get_before(Job *j, Job*** ret);
+int job_get_after(Job *j, Job*** ret);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Job*, job_free);
+
+const char* job_type_to_string(JobType t) _const_;
+JobType job_type_from_string(const char *s) _pure_;
+
+const char* job_state_to_string(JobState t) _const_;
+JobState job_state_from_string(const char *s) _pure_;
+
+const char* job_mode_to_string(JobMode t) _const_;
+JobMode job_mode_from_string(const char *s) _pure_;
+
+const char* job_result_to_string(JobResult t) _const_;
+JobResult job_result_from_string(const char *s) _pure_;
+
+const char* job_type_to_access_method(JobType t);
+
+int job_compare(Job *a, Job *b, UnitDependency assume_dep);
diff --git a/src/core/kill.c b/src/core/kill.c
new file mode 100644
index 0000000..e858ae9
--- /dev/null
+++ b/src/core/kill.c
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "kill.h"
+#include "signal-util.h"
+#include "string-table.h"
+#include "util.h"
+
+void kill_context_init(KillContext *c) {
+ assert(c);
+
+ c->kill_signal = SIGTERM;
+ /* restart_kill_signal is unset by default and we fall back to kill_signal */
+ c->final_kill_signal = SIGKILL;
+ c->send_sigkill = true;
+ c->send_sighup = false;
+ c->watchdog_signal = SIGABRT;
+}
+
+void kill_context_dump(KillContext *c, FILE *f, const char *prefix) {
+ assert(c);
+
+ prefix = strempty(prefix);
+
+ fprintf(f,
+ "%sKillMode: %s\n"
+ "%sKillSignal: SIG%s\n"
+ "%sRestartKillSignal: SIG%s\n"
+ "%sFinalKillSignal: SIG%s\n"
+ "%sSendSIGKILL: %s\n"
+ "%sSendSIGHUP: %s\n",
+ prefix, kill_mode_to_string(c->kill_mode),
+ prefix, signal_to_string(c->kill_signal),
+ prefix, signal_to_string(restart_kill_signal(c)),
+ prefix, signal_to_string(c->final_kill_signal),
+ prefix, yes_no(c->send_sigkill),
+ prefix, yes_no(c->send_sighup));
+}
+
+static const char* const kill_mode_table[_KILL_MODE_MAX] = {
+ [KILL_CONTROL_GROUP] = "control-group",
+ [KILL_PROCESS] = "process",
+ [KILL_MIXED] = "mixed",
+ [KILL_NONE] = "none",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(kill_mode, KillMode);
+
+static const char* const kill_who_table[_KILL_WHO_MAX] = {
+ [KILL_MAIN] = "main",
+ [KILL_CONTROL] = "control",
+ [KILL_ALL] = "all",
+ [KILL_MAIN_FAIL] = "main-fail",
+ [KILL_CONTROL_FAIL] = "control-fail",
+ [KILL_ALL_FAIL] = "all-fail",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);
diff --git a/src/core/kill.h b/src/core/kill.h
new file mode 100644
index 0000000..012e433
--- /dev/null
+++ b/src/core/kill.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct KillContext KillContext;
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "macro.h"
+
+typedef enum KillMode {
+ /* The kill mode is a property of a unit. */
+ KILL_CONTROL_GROUP = 0,
+ KILL_PROCESS,
+ KILL_MIXED,
+ KILL_NONE,
+ _KILL_MODE_MAX,
+ _KILL_MODE_INVALID = -1
+} KillMode;
+
+struct KillContext {
+ KillMode kill_mode;
+ int kill_signal;
+ int restart_kill_signal;
+ int final_kill_signal;
+ int watchdog_signal;
+ bool send_sigkill;
+ bool send_sighup;
+};
+
+typedef enum KillWho {
+ /* Kill who is a property of an operation */
+ KILL_MAIN,
+ KILL_CONTROL,
+ KILL_ALL,
+ KILL_MAIN_FAIL,
+ KILL_CONTROL_FAIL,
+ KILL_ALL_FAIL,
+ _KILL_WHO_MAX,
+ _KILL_WHO_INVALID = -1
+} KillWho;
+
+void kill_context_init(KillContext *c);
+void kill_context_dump(KillContext *c, FILE *f, const char *prefix);
+
+const char *kill_mode_to_string(KillMode k) _const_;
+KillMode kill_mode_from_string(const char *s) _pure_;
+
+const char *kill_who_to_string(KillWho k) _const_;
+KillWho kill_who_from_string(const char *s) _pure_;
+
+static inline int restart_kill_signal(const KillContext *c) {
+ if (c->restart_kill_signal != 0)
+ return c->restart_kill_signal;
+ return c->kill_signal;
+}
diff --git a/src/core/killall.c b/src/core/killall.c
new file mode 100644
index 0000000..6f60f09
--- /dev/null
+++ b/src/core/killall.c
@@ -0,0 +1,283 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2010 ProFUSION embedded systems
+***/
+
+#include <errno.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "def.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "killall.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "set.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static bool ignore_proc(pid_t pid, bool warn_rootfs) {
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *p;
+ char c = 0;
+ uid_t uid;
+ int r;
+
+ /* We are PID 1, let's not commit suicide */
+ if (pid <= 1)
+ return true;
+
+ /* Ignore kernel threads */
+ r = is_kernel_thread(pid);
+ if (r != 0)
+ return true; /* also ignore processes where we can't determine this */
+
+ r = get_process_uid(pid, &uid);
+ if (r < 0)
+ return true; /* not really, but better safe than sorry */
+
+ /* Non-root processes otherwise are always subject to be killed */
+ if (uid != 0)
+ return false;
+
+ p = procfs_file_alloca(pid, "cmdline");
+ f = fopen(p, "re");
+ if (!f)
+ return true; /* not really, but has the desired effect */
+
+ /* Try to read the first character of the command line. If the cmdline is empty (which might be the case for
+ * kernel threads but potentially also other stuff), this line won't do anything, but we don't care much, as
+ * actual kernel threads are already filtered out above. */
+ (void) fread(&c, 1, 1, f);
+
+ /* Processes with argv[0][0] = '@' we ignore from the killing spree.
+ *
+ * https://systemd.io/ROOT_STORAGE_DAEMONS */
+ if (c != '@')
+ return false;
+
+ if (warn_rootfs &&
+ pid_from_same_root_fs(pid) == 0) {
+
+ _cleanup_free_ char *comm = NULL;
+
+ (void) get_process_comm(pid, &comm);
+
+ log_notice("Process " PID_FMT " (%s) has been marked to be excluded from killing. It is "
+ "running from the root file system, and thus likely to block re-mounting of the "
+ "root file system to read-only. Please consider moving it into an initrd file "
+ "system instead.", pid, strna(comm));
+ }
+
+ return true;
+}
+
+static void log_children_no_yet_killed(Set *pids) {
+ _cleanup_free_ char *lst_child = NULL;
+ void *p;
+
+ SET_FOREACH(p, pids) {
+ _cleanup_free_ char *s = NULL;
+
+ if (get_process_comm(PTR_TO_PID(p), &s) < 0)
+ (void) asprintf(&s, PID_FMT, PTR_TO_PID(p));
+
+ if (!strextend(&lst_child, ", ", s, NULL)) {
+ log_oom();
+ return;
+ }
+ }
+
+ if (isempty(lst_child))
+ return;
+
+ log_warning("Waiting for process: %s", lst_child + 2);
+}
+
+static int wait_for_children(Set *pids, sigset_t *mask, usec_t timeout) {
+ usec_t until, date_log_child, n;
+
+ assert(mask);
+
+ /* Return the number of children remaining in the pids set: That correspond to the number
+ * of processes still "alive" after the timeout */
+
+ if (set_isempty(pids))
+ return 0;
+
+ n = now(CLOCK_MONOTONIC);
+ until = usec_add(n, timeout);
+ date_log_child = usec_add(n, 10u * USEC_PER_SEC);
+ if (date_log_child > until)
+ date_log_child = usec_add(n, timeout / 2u);
+
+ for (;;) {
+ struct timespec ts;
+ int k;
+ void *p;
+
+ /* First, let the kernel inform us about killed
+ * children. Most processes will probably be our
+ * children, but some are not (might be our
+ * grandchildren instead...). */
+ for (;;) {
+ pid_t pid;
+
+ pid = waitpid(-1, NULL, WNOHANG);
+ if (pid == 0)
+ break;
+ if (pid < 0) {
+ if (errno == ECHILD)
+ break;
+
+ return log_error_errno(errno, "waitpid() failed: %m");
+ }
+
+ (void) set_remove(pids, PID_TO_PTR(pid));
+ }
+
+ /* Now explicitly check who might be remaining, who
+ * might not be our child. */
+ SET_FOREACH(p, pids) {
+
+ /* kill(pid, 0) sends no signal, but it tells
+ * us whether the process still exists. */
+ if (kill(PTR_TO_PID(p), 0) == 0)
+ continue;
+
+ if (errno != ESRCH)
+ continue;
+
+ set_remove(pids, p);
+ }
+
+ if (set_isempty(pids))
+ return 0;
+
+ n = now(CLOCK_MONOTONIC);
+ if (date_log_child > 0 && n >= date_log_child) {
+ log_children_no_yet_killed(pids);
+ /* Log the children not yet killed only once */
+ date_log_child = 0;
+ }
+
+ if (n >= until)
+ return set_size(pids);
+
+ if (date_log_child > 0)
+ timespec_store(&ts, MIN(until - n, date_log_child - n));
+ else
+ timespec_store(&ts, until - n);
+
+ k = sigtimedwait(mask, NULL, &ts);
+ if (k != SIGCHLD) {
+
+ if (k < 0 && errno != EAGAIN)
+ return log_error_errno(errno, "sigtimedwait() failed: %m");
+
+ if (k >= 0)
+ log_warning("sigtimedwait() returned unexpected signal.");
+ }
+ }
+}
+
+static int killall(int sig, Set *pids, bool send_sighup) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *d;
+ int n_killed = 0;
+
+ /* Send the specified signal to all remaining processes, if not excluded by ignore_proc().
+ * Returns the number of processes to which the specified signal was sent */
+
+ dir = opendir("/proc");
+ if (!dir)
+ return log_warning_errno(errno, "opendir(/proc) failed: %m");
+
+ FOREACH_DIRENT_ALL(d, dir, break) {
+ pid_t pid;
+ int r;
+
+ if (!IN_SET(d->d_type, DT_DIR, DT_UNKNOWN))
+ continue;
+
+ if (parse_pid(d->d_name, &pid) < 0)
+ continue;
+
+ if (ignore_proc(pid, sig == SIGKILL && !in_initrd()))
+ continue;
+
+ if (sig == SIGKILL) {
+ _cleanup_free_ char *s = NULL;
+
+ (void) get_process_comm(pid, &s);
+ log_notice("Sending SIGKILL to PID "PID_FMT" (%s).", pid, strna(s));
+ }
+
+ if (kill(pid, sig) >= 0) {
+ n_killed++;
+ if (pids) {
+ r = set_put(pids, PID_TO_PTR(pid));
+ if (r < 0)
+ log_oom();
+ }
+ } else if (errno != ENOENT)
+ log_warning_errno(errno, "Could not kill %d: %m", pid);
+
+ if (send_sighup) {
+ /* Optionally, also send a SIGHUP signal, but
+ only if the process has a controlling
+ tty. This is useful to allow handling of
+ shells which ignore SIGTERM but react to
+ SIGHUP. We do not send this to processes that
+ have no controlling TTY since we don't want to
+ trigger reloads of daemon processes. Also we
+ make sure to only send this after SIGTERM so
+ that SIGTERM is always first in the queue. */
+
+ if (get_ctty_devnr(pid, NULL) >= 0)
+ /* it's OK if the process is gone, just ignore the result */
+ (void) kill(pid, SIGHUP);
+ }
+ }
+
+ return n_killed;
+}
+
+int broadcast_signal(int sig, bool wait_for_exit, bool send_sighup, usec_t timeout) {
+ int n_children_left;
+ sigset_t mask, oldmask;
+ _cleanup_set_free_ Set *pids = NULL;
+
+ /* Send the specified signal to all remaining processes, if not excluded by ignore_proc().
+ * Return:
+ * - The number of processes still "alive" after the timeout (that should have been killed)
+ * if the function needs to wait for the end of the processes (wait_for_exit).
+ * - Otherwise, the number of processes to which the specified signal was sent */
+
+ if (wait_for_exit)
+ pids = set_new(NULL);
+
+ assert_se(sigemptyset(&mask) == 0);
+ assert_se(sigaddset(&mask, SIGCHLD) == 0);
+ assert_se(sigprocmask(SIG_BLOCK, &mask, &oldmask) == 0);
+
+ if (kill(-1, SIGSTOP) < 0 && errno != ESRCH)
+ log_warning_errno(errno, "kill(-1, SIGSTOP) failed: %m");
+
+ n_children_left = killall(sig, pids, send_sighup);
+
+ if (kill(-1, SIGCONT) < 0 && errno != ESRCH)
+ log_warning_errno(errno, "kill(-1, SIGCONT) failed: %m");
+
+ if (wait_for_exit && n_children_left > 0)
+ n_children_left = wait_for_children(pids, &mask, timeout);
+
+ assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) == 0);
+
+ return n_children_left;
+}
diff --git a/src/core/killall.h b/src/core/killall.h
new file mode 100644
index 0000000..d8ef96f
--- /dev/null
+++ b/src/core/killall.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "time-util.h"
+
+int broadcast_signal(int sig, bool wait_for_exit, bool send_sighup, usec_t timeout);
diff --git a/src/core/kmod-setup.c b/src/core/kmod-setup.c
new file mode 100644
index 0000000..8a7f828
--- /dev/null
+++ b/src/core/kmod-setup.c
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ftw.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "capability-util.h"
+#include "fileio.h"
+#include "kmod-setup.h"
+#include "macro.h"
+#include "string-util.h"
+
+#if HAVE_KMOD
+#include "module-util.h"
+
+static void systemd_kmod_log(
+ void *data,
+ int priority,
+ const char *file, int line,
+ const char *fn,
+ const char *format,
+ va_list args) {
+
+ /* library logging is enabled at debug only */
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ log_internalv(LOG_DEBUG, 0, file, line, fn, format, args);
+ REENABLE_WARNING;
+}
+
+static int has_virtio_rng_nftw_cb(
+ const char *fpath,
+ const struct stat *sb,
+ int tflag,
+ struct FTW *ftwbuf) {
+
+ _cleanup_free_ char *alias = NULL;
+ int r;
+
+ if ((FTW_D == tflag) && (ftwbuf->level > 2))
+ return FTW_SKIP_SUBTREE;
+
+ if (FTW_F != tflag)
+ return FTW_CONTINUE;
+
+ if (!endswith(fpath, "/modalias"))
+ return FTW_CONTINUE;
+
+ r = read_one_line_file(fpath, &alias);
+ if (r < 0)
+ return FTW_SKIP_SIBLINGS;
+
+ if (startswith(alias, "pci:v00001AF4d00001005"))
+ return FTW_STOP;
+
+ if (startswith(alias, "pci:v00001AF4d00001044"))
+ return FTW_STOP;
+
+ return FTW_SKIP_SIBLINGS;
+}
+
+static bool has_virtio_rng(void) {
+ return (nftw("/sys/devices/pci0000:00", has_virtio_rng_nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL) == FTW_STOP);
+}
+#endif
+
+int kmod_setup(void) {
+#if HAVE_KMOD
+
+ static const struct {
+ const char *module;
+ const char *path;
+ bool warn_if_unavailable:1;
+ bool warn_if_module:1;
+ bool (*condition_fn)(void);
+ } kmod_table[] = {
+ /* This one we need to load explicitly, since auto-loading on use doesn't work
+ * before udev created the ghost device nodes, and we need it earlier than that. */
+ { "autofs4", "/sys/class/misc/autofs", true, false, NULL },
+
+ /* This one we need to load explicitly, since auto-loading of IPv6 is not done when
+ * we try to configure ::1 on the loopback device. */
+ { "ipv6", "/sys/module/ipv6", false, true, NULL },
+
+ /* This should never be a module */
+ { "unix", "/proc/net/unix", true, true, NULL },
+
+#if HAVE_LIBIPTC
+ /* netfilter is needed by networkd, nspawn among others, and cannot be autoloaded */
+ { "ip_tables", "/proc/net/ip_tables_names", false, false, NULL },
+#endif
+ /* virtio_rng would be loaded by udev later, but real entropy might be needed very early */
+ { "virtio_rng", NULL, false, false, has_virtio_rng },
+ };
+ _cleanup_(kmod_unrefp) struct kmod_ctx *ctx = NULL;
+ unsigned i;
+
+ if (have_effective_cap(CAP_SYS_MODULE) == 0)
+ return 0;
+
+ for (i = 0; i < ELEMENTSOF(kmod_table); i++) {
+ if (kmod_table[i].path && access(kmod_table[i].path, F_OK) >= 0)
+ continue;
+
+ if (kmod_table[i].condition_fn && !kmod_table[i].condition_fn())
+ continue;
+
+ if (kmod_table[i].warn_if_module)
+ log_debug("Your kernel apparently lacks built-in %s support. Might be "
+ "a good idea to compile it in. We'll now try to work around "
+ "this by loading the module...", kmod_table[i].module);
+
+ if (!ctx) {
+ ctx = kmod_new(NULL, NULL);
+ if (!ctx)
+ return log_oom();
+
+ kmod_set_log_fn(ctx, systemd_kmod_log, NULL);
+ kmod_load_resources(ctx);
+ }
+
+ (void) module_load_and_warn(ctx, kmod_table[i].module, kmod_table[i].warn_if_unavailable);
+ }
+
+#endif
+ return 0;
+}
diff --git a/src/core/kmod-setup.h b/src/core/kmod-setup.h
new file mode 100644
index 0000000..1c842d3
--- /dev/null
+++ b/src/core/kmod-setup.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int kmod_setup(void);
diff --git a/src/core/load-dropin.c b/src/core/load-dropin.c
new file mode 100644
index 0000000..d1c85e2
--- /dev/null
+++ b/src/core/load-dropin.c
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "conf-parser.h"
+#include "fs-util.h"
+#include "load-dropin.h"
+#include "load-fragment.h"
+#include "log.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "unit.h"
+
+static int process_deps(Unit *u, UnitDependency dependency, const char *dir_suffix) {
+ _cleanup_strv_free_ char **paths = NULL;
+ char **p;
+ int r;
+
+ r = unit_file_find_dropin_paths(NULL,
+ u->manager->lookup_paths.search_path,
+ u->manager->unit_path_cache,
+ dir_suffix, NULL,
+ u->id, u->aliases,
+ &paths);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, paths) {
+ _cleanup_free_ char *target = NULL;
+ const char *entry;
+
+ entry = basename(*p);
+
+ if (null_or_empty_path(*p) > 0) {
+ /* an error usually means an invalid symlink, which is not a mask */
+ log_unit_debug(u, "%s dependency on %s is masked by %s, ignoring.",
+ unit_dependency_to_string(dependency), entry, *p);
+ continue;
+ }
+
+ r = is_symlink(*p);
+ if (r < 0) {
+ log_unit_warning_errno(u, r, "%s dropin %s unreadable, ignoring: %m",
+ unit_dependency_to_string(dependency), *p);
+ continue;
+ }
+ if (r == 0) {
+ log_unit_warning(u, "%s dependency dropin %s is not a symlink, ignoring.",
+ unit_dependency_to_string(dependency), *p);
+ continue;
+ }
+
+ if (!unit_name_is_valid(entry, UNIT_NAME_ANY)) {
+ log_unit_warning(u, "%s dependency dropin %s is not a valid unit name, ignoring.",
+ unit_dependency_to_string(dependency), *p);
+ continue;
+ }
+
+ r = readlink_malloc(*p, &target);
+ if (r < 0) {
+ log_unit_warning_errno(u, r, "readlink(\"%s\") failed, ignoring: %m", *p);
+ continue;
+ }
+
+ /* We don't treat this as an error, especially because we didn't check this for a
+ * long time. Nevertheless, we warn, because such mismatch can be mighty confusing. */
+ r = unit_symlink_name_compatible(entry, basename(target), u->instance);
+ if (r < 0) {
+ log_unit_warning_errno(u, r, "Can't check if names %s and %s are compatible, ignoring: %m",
+ entry, basename(target));
+ continue;
+ }
+ if (r == 0)
+ log_unit_warning(u, "%s dependency dropin %s target %s has different name",
+ unit_dependency_to_string(dependency), *p, target);
+
+ r = unit_add_dependency_by_name(u, dependency, entry, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Cannot add %s dependency on %s, ignoring: %m",
+ unit_dependency_to_string(dependency), entry);
+ }
+
+ return 0;
+}
+
+int unit_load_dropin(Unit *u) {
+ _cleanup_strv_free_ char **l = NULL;
+ char **f;
+ int r;
+
+ assert(u);
+
+ /* Load dependencies from .wants and .requires directories */
+ r = process_deps(u, UNIT_WANTS, ".wants");
+ if (r < 0)
+ return r;
+
+ r = process_deps(u, UNIT_REQUIRES, ".requires");
+ if (r < 0)
+ return r;
+
+ /* Load .conf dropins */
+ r = unit_find_dropin_paths(u, &l);
+ if (r <= 0)
+ return 0;
+
+ if (!u->dropin_paths)
+ u->dropin_paths = TAKE_PTR(l);
+ else {
+ r = strv_extend_strv(&u->dropin_paths, l, true);
+ if (r < 0)
+ return log_oom();
+ }
+
+ STRV_FOREACH(f, u->dropin_paths)
+ (void) config_parse(
+ u->id, *f, NULL,
+ UNIT_VTABLE(u)->sections,
+ config_item_perf_lookup, load_fragment_gperf_lookup,
+ 0,
+ u,
+ &u->dropin_mtime);
+
+ return 0;
+}
diff --git a/src/core/load-dropin.h b/src/core/load-dropin.h
new file mode 100644
index 0000000..f0b87d3
--- /dev/null
+++ b/src/core/load-dropin.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "dropin.h"
+#include "unit.h"
+
+/* Read service data supplementary drop-in directories */
+
+static inline int unit_find_dropin_paths(Unit *u, char ***paths) {
+ assert(u);
+
+ return unit_file_find_dropin_paths(NULL,
+ u->manager->lookup_paths.search_path,
+ u->manager->unit_path_cache,
+ ".d", ".conf",
+ u->id, u->aliases,
+ paths);
+}
+
+int unit_load_dropin(Unit *u);
diff --git a/src/core/load-fragment-gperf-nulstr.awk b/src/core/load-fragment-gperf-nulstr.awk
new file mode 100644
index 0000000..44bc1fb
--- /dev/null
+++ b/src/core/load-fragment-gperf-nulstr.awk
@@ -0,0 +1,14 @@
+BEGIN{
+ keywords=0 ; FS="," ;
+ print "extern const char load_fragment_gperf_nulstr[];" ;
+ print "const char load_fragment_gperf_nulstr[] ="
+}
+keyword==1 {
+ print "\"" $1 "\\0\""
+}
+/%%/ {
+ keyword=1
+}
+END {
+ print ";"
+}
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
new file mode 100644
index 0000000..946862c
--- /dev/null
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -0,0 +1,509 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include "all-units.h"
+#include "conf-parser.h"
+#include "load-fragment.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name load_fragment_gperf_hash
+%define lookup-function-name load_fragment_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+m4_dnl Define the context options only once
+m4_define(`EXEC_CONTEXT_CONFIG_ITEMS',
+`$1.WorkingDirectory, config_parse_working_directory, 0, offsetof($1, exec_context)
+$1.RootDirectory, config_parse_unit_path_printf, true, offsetof($1, exec_context.root_directory)
+$1.RootImage, config_parse_unit_path_printf, true, offsetof($1, exec_context.root_image)
+$1.RootImageOptions, config_parse_root_image_options, 0, offsetof($1, exec_context)
+$1.RootHash, config_parse_exec_root_hash, 0, offsetof($1, exec_context)
+$1.RootHashSignature, config_parse_exec_root_hash_sig, 0, offsetof($1, exec_context)
+$1.RootVerity, config_parse_unit_path_printf, true, offsetof($1, exec_context.root_verity)
+$1.MountImages, config_parse_mount_images, 0, offsetof($1, exec_context)
+$1.User, config_parse_user_group_compat, 0, offsetof($1, exec_context.user)
+$1.Group, config_parse_user_group_compat, 0, offsetof($1, exec_context.group)
+$1.SupplementaryGroups, config_parse_user_group_strv_compat, 0, offsetof($1, exec_context.supplementary_groups)
+$1.Nice, config_parse_exec_nice, 0, offsetof($1, exec_context)
+$1.OOMScoreAdjust, config_parse_exec_oom_score_adjust, 0, offsetof($1, exec_context)
+$1.CoredumpFilter, config_parse_exec_coredump_filter, 0, offsetof($1, exec_context)
+$1.IOSchedulingClass, config_parse_exec_io_class, 0, offsetof($1, exec_context)
+$1.IOSchedulingPriority, config_parse_exec_io_priority, 0, offsetof($1, exec_context)
+$1.CPUSchedulingPolicy, config_parse_exec_cpu_sched_policy, 0, offsetof($1, exec_context)
+$1.CPUSchedulingPriority, config_parse_exec_cpu_sched_prio, 0, offsetof($1, exec_context)
+$1.CPUSchedulingResetOnFork, config_parse_bool, 0, offsetof($1, exec_context.cpu_sched_reset_on_fork)
+$1.CPUAffinity, config_parse_exec_cpu_affinity, 0, offsetof($1, exec_context)
+$1.NUMAPolicy, config_parse_numa_policy, 0, offsetof($1, exec_context.numa_policy.type)
+$1.NUMAMask, config_parse_numa_mask, 0, offsetof($1, exec_context.numa_policy)
+$1.UMask, config_parse_mode, 0, offsetof($1, exec_context.umask)
+$1.Environment, config_parse_environ, 0, offsetof($1, exec_context.environment)
+$1.EnvironmentFile, config_parse_unit_env_file, 0, offsetof($1, exec_context.environment_files)
+$1.PassEnvironment, config_parse_pass_environ, 0, offsetof($1, exec_context.pass_environment)
+$1.UnsetEnvironment, config_parse_unset_environ, 0, offsetof($1, exec_context.unset_environment)
+$1.DynamicUser, config_parse_bool, true, offsetof($1, exec_context.dynamic_user)
+$1.RemoveIPC, config_parse_bool, 0, offsetof($1, exec_context.remove_ipc)
+$1.StandardInput, config_parse_exec_input, 0, offsetof($1, exec_context)
+$1.StandardOutput, config_parse_exec_output, 0, offsetof($1, exec_context)
+$1.StandardError, config_parse_exec_output, 0, offsetof($1, exec_context)
+$1.StandardInputText, config_parse_exec_input_text, 0, offsetof($1, exec_context)
+$1.StandardInputData, config_parse_exec_input_data, 0, offsetof($1, exec_context)
+$1.TTYPath, config_parse_unit_path_printf, 0, offsetof($1, exec_context.tty_path)
+$1.TTYReset, config_parse_bool, 0, offsetof($1, exec_context.tty_reset)
+$1.TTYVHangup, config_parse_bool, 0, offsetof($1, exec_context.tty_vhangup)
+$1.TTYVTDisallocate, config_parse_bool, 0, offsetof($1, exec_context.tty_vt_disallocate)
+$1.SyslogIdentifier, config_parse_unit_string_printf, 0, offsetof($1, exec_context.syslog_identifier)
+$1.SyslogFacility, config_parse_log_facility, 0, offsetof($1, exec_context.syslog_priority)
+$1.SyslogLevel, config_parse_log_level, 0, offsetof($1, exec_context.syslog_priority)
+$1.SyslogLevelPrefix, config_parse_bool, 0, offsetof($1, exec_context.syslog_level_prefix)
+$1.LogLevelMax, config_parse_log_level, 0, offsetof($1, exec_context.log_level_max)
+$1.LogRateLimitIntervalSec, config_parse_sec, 0, offsetof($1, exec_context.log_ratelimit_interval_usec)
+$1.LogRateLimitBurst, config_parse_unsigned, 0, offsetof($1, exec_context.log_ratelimit_burst)
+$1.LogExtraFields, config_parse_log_extra_fields, 0, offsetof($1, exec_context)
+$1.Capabilities, config_parse_warn_compat, DISABLED_LEGACY, offsetof($1, exec_context)
+$1.SecureBits, config_parse_exec_secure_bits, 0, offsetof($1, exec_context.secure_bits)
+$1.CapabilityBoundingSet, config_parse_capability_set, 0, offsetof($1, exec_context.capability_bounding_set)
+$1.AmbientCapabilities, config_parse_capability_set, 0, offsetof($1, exec_context.capability_ambient_set)
+$1.TimerSlackNSec, config_parse_nsec, 0, offsetof($1, exec_context.timer_slack_nsec)
+$1.NoNewPrivileges, config_parse_bool, 0, offsetof($1, exec_context.no_new_privileges)
+$1.KeyringMode, config_parse_exec_keyring_mode, 0, offsetof($1, exec_context.keyring_mode)
+$1.ProtectProc, config_parse_protect_proc, 0, offsetof($1, exec_context.protect_proc)
+$1.ProcSubset, config_parse_proc_subset, 0, offsetof($1, exec_context.proc_subset)
+m4_ifdef(`HAVE_SECCOMP',
+`$1.SystemCallFilter, config_parse_syscall_filter, 0, offsetof($1, exec_context)
+$1.SystemCallArchitectures, config_parse_syscall_archs, 0, offsetof($1, exec_context.syscall_archs)
+$1.SystemCallErrorNumber, config_parse_syscall_errno, 0, offsetof($1, exec_context)
+$1.SystemCallLog, config_parse_syscall_log, 0, offsetof($1, exec_context)
+$1.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof($1, exec_context.memory_deny_write_execute)
+$1.RestrictNamespaces, config_parse_restrict_namespaces, 0, offsetof($1, exec_context)
+$1.RestrictRealtime, config_parse_bool, 0, offsetof($1, exec_context.restrict_realtime)
+$1.RestrictSUIDSGID, config_parse_bool, 0, offsetof($1, exec_context.restrict_suid_sgid)
+$1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)
+$1.LockPersonality, config_parse_bool, 0, offsetof($1, exec_context.lock_personality)',
+`$1.SystemCallFilter, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.SystemCallArchitectures, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.SystemCallErrorNumber, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.SystemCallLog, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.MemoryDenyWriteExecute, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.RestrictNamespaces, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.RestrictRealtime, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.RestrictSUIDSGID, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.LockPersonality, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
+$1.LimitCPU, config_parse_rlimit, RLIMIT_CPU, offsetof($1, exec_context.rlimit)
+$1.LimitFSIZE, config_parse_rlimit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit)
+$1.LimitDATA, config_parse_rlimit, RLIMIT_DATA, offsetof($1, exec_context.rlimit)
+$1.LimitSTACK, config_parse_rlimit, RLIMIT_STACK, offsetof($1, exec_context.rlimit)
+$1.LimitCORE, config_parse_rlimit, RLIMIT_CORE, offsetof($1, exec_context.rlimit)
+$1.LimitRSS, config_parse_rlimit, RLIMIT_RSS, offsetof($1, exec_context.rlimit)
+$1.LimitNOFILE, config_parse_rlimit, RLIMIT_NOFILE, offsetof($1, exec_context.rlimit)
+$1.LimitAS, config_parse_rlimit, RLIMIT_AS, offsetof($1, exec_context.rlimit)
+$1.LimitNPROC, config_parse_rlimit, RLIMIT_NPROC, offsetof($1, exec_context.rlimit)
+$1.LimitMEMLOCK, config_parse_rlimit, RLIMIT_MEMLOCK, offsetof($1, exec_context.rlimit)
+$1.LimitLOCKS, config_parse_rlimit, RLIMIT_LOCKS, offsetof($1, exec_context.rlimit)
+$1.LimitSIGPENDING, config_parse_rlimit, RLIMIT_SIGPENDING, offsetof($1, exec_context.rlimit)
+$1.LimitMSGQUEUE, config_parse_rlimit, RLIMIT_MSGQUEUE, offsetof($1, exec_context.rlimit)
+$1.LimitNICE, config_parse_rlimit, RLIMIT_NICE, offsetof($1, exec_context.rlimit)
+$1.LimitRTPRIO, config_parse_rlimit, RLIMIT_RTPRIO, offsetof($1, exec_context.rlimit)
+$1.LimitRTTIME, config_parse_rlimit, RLIMIT_RTTIME, offsetof($1, exec_context.rlimit)
+$1.ReadWriteDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_write_paths)
+$1.ReadOnlyDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_paths)
+$1.InaccessibleDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths)
+$1.ReadWritePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_write_paths)
+$1.ReadOnlyPaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_paths)
+$1.InaccessiblePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths)
+$1.BindPaths, config_parse_bind_paths, 0, offsetof($1, exec_context)
+$1.BindReadOnlyPaths, config_parse_bind_paths, 0, offsetof($1, exec_context)
+$1.TemporaryFileSystem, config_parse_temporary_filesystems, 0, offsetof($1, exec_context)
+$1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp)
+$1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices)
+$1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables)
+$1.ProtectKernelModules, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_modules)
+$1.ProtectKernelLogs, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_logs)
+$1.ProtectClock, config_parse_bool, 0, offsetof($1, exec_context.protect_clock)
+$1.ProtectControlGroups, config_parse_bool, 0, offsetof($1, exec_context.protect_control_groups)
+$1.NetworkNamespacePath, config_parse_unit_path_printf, 0, offsetof($1, exec_context.network_namespace_path)
+$1.LogNamespace, config_parse_log_namespace, 0, offsetof($1, exec_context)
+$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
+$1.PrivateUsers, config_parse_bool, 0, offsetof($1, exec_context.private_users)
+$1.PrivateMounts, config_parse_bool, 0, offsetof($1, exec_context.private_mounts)
+$1.ProtectSystem, config_parse_protect_system, 0, offsetof($1, exec_context.protect_system)
+$1.ProtectHome, config_parse_protect_home, 0, offsetof($1, exec_context.protect_home)
+$1.MountFlags, config_parse_exec_mount_flags, 0, offsetof($1, exec_context.mount_flags)
+$1.MountAPIVFS, config_parse_bool, 0, offsetof($1, exec_context.mount_apivfs)
+$1.Personality, config_parse_personality, 0, offsetof($1, exec_context.personality)
+$1.RuntimeDirectoryPreserve, config_parse_runtime_preserve_mode, 0, offsetof($1, exec_context.runtime_directory_preserve_mode)
+$1.RuntimeDirectoryMode, config_parse_mode, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_RUNTIME].mode)
+$1.RuntimeDirectory, config_parse_exec_directories, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_RUNTIME].paths)
+$1.StateDirectoryMode, config_parse_mode, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_STATE].mode)
+$1.StateDirectory, config_parse_exec_directories, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_STATE].paths)
+$1.CacheDirectoryMode, config_parse_mode, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_CACHE].mode)
+$1.CacheDirectory, config_parse_exec_directories, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_CACHE].paths)
+$1.LogsDirectoryMode, config_parse_mode, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_LOGS].mode)
+$1.LogsDirectory, config_parse_exec_directories, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_LOGS].paths)
+$1.ConfigurationDirectoryMode, config_parse_mode, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_CONFIGURATION].mode)
+$1.ConfigurationDirectory, config_parse_exec_directories, 0, offsetof($1, exec_context.directories[EXEC_DIRECTORY_CONFIGURATION].paths)
+$1.SetCredential, config_parse_set_credential, 0, offsetof($1, exec_context)
+$1.LoadCredential, config_parse_load_credential, 0, offsetof($1, exec_context)
+$1.TimeoutCleanSec, config_parse_sec, 0, offsetof($1, exec_context.timeout_clean_usec)
+$1.ProtectHostname, config_parse_bool, 0, offsetof($1, exec_context.protect_hostname)
+m4_ifdef(`HAVE_PAM',
+`$1.PAMName, config_parse_unit_string_printf, 0, offsetof($1, exec_context.pam_name)',
+`$1.PAMName, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
+$1.IgnoreSIGPIPE, config_parse_bool, 0, offsetof($1, exec_context.ignore_sigpipe)
+$1.UtmpIdentifier, config_parse_unit_string_printf, 0, offsetof($1, exec_context.utmp_id)
+$1.UtmpMode, config_parse_exec_utmp_mode, 0, offsetof($1, exec_context.utmp_mode)
+m4_ifdef(`HAVE_SELINUX',
+`$1.SELinuxContext, config_parse_exec_selinux_context, 0, offsetof($1, exec_context)',
+`$1.SELinuxContext, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
+m4_ifdef(`HAVE_APPARMOR',
+`$1.AppArmorProfile, config_parse_exec_apparmor_profile, 0, offsetof($1, exec_context)',
+`$1.AppArmorProfile, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
+m4_ifdef(`ENABLE_SMACK',
+`$1.SmackProcessLabel, config_parse_exec_smack_process_label, 0, offsetof($1, exec_context)',
+`$1.SmackProcessLabel, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')'
+)m4_dnl
+m4_define(`KILL_CONTEXT_CONFIG_ITEMS',
+`$1.SendSIGKILL, config_parse_bool, 0, offsetof($1, kill_context.send_sigkill)
+$1.SendSIGHUP, config_parse_bool, 0, offsetof($1, kill_context.send_sighup)
+$1.KillMode, config_parse_kill_mode, 0, offsetof($1, kill_context.kill_mode)
+$1.KillSignal, config_parse_signal, 0, offsetof($1, kill_context.kill_signal)
+$1.RestartKillSignal, config_parse_signal, 0, offsetof($1, kill_context.restart_kill_signal)
+$1.FinalKillSignal, config_parse_signal, 0, offsetof($1, kill_context.final_kill_signal)
+$1.WatchdogSignal, config_parse_signal, 0, offsetof($1, kill_context.watchdog_signal)'
+)m4_dnl
+m4_define(`CGROUP_CONTEXT_CONFIG_ITEMS',
+`$1.Slice, config_parse_unit_slice, 0, 0
+$1.AllowedCPUs, config_parse_allowed_cpus, 0, offsetof($1, cgroup_context)
+$1.AllowedMemoryNodes, config_parse_allowed_mems, 0, offsetof($1, cgroup_context)
+$1.CPUAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.cpu_accounting)
+$1.CPUWeight, config_parse_cg_weight, 0, offsetof($1, cgroup_context.cpu_weight)
+$1.StartupCPUWeight, config_parse_cg_weight, 0, offsetof($1, cgroup_context.startup_cpu_weight)
+$1.CPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.cpu_shares)
+$1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares)
+$1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context)
+$1.CPUQuotaPeriodSec, config_parse_sec_def_infinity, 0, offsetof($1, cgroup_context.cpu_quota_period_usec)
+$1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting)
+$1.MemoryMin, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.DefaultMemoryMin, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.DefaultMemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.MemoryHigh, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.MemoryMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.MemorySwapMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.MemoryLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.DeviceAllow, config_parse_device_allow, 0, offsetof($1, cgroup_context)
+$1.DevicePolicy, config_parse_device_policy, 0, offsetof($1, cgroup_context.device_policy)
+$1.IOAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.io_accounting)
+$1.IOWeight, config_parse_cg_weight, 0, offsetof($1, cgroup_context.io_weight)
+$1.StartupIOWeight, config_parse_cg_weight, 0, offsetof($1, cgroup_context.startup_io_weight)
+$1.IODeviceWeight, config_parse_io_device_weight, 0, offsetof($1, cgroup_context)
+$1.IOReadBandwidthMax, config_parse_io_limit, 0, offsetof($1, cgroup_context)
+$1.IOWriteBandwidthMax, config_parse_io_limit, 0, offsetof($1, cgroup_context)
+$1.IOReadIOPSMax, config_parse_io_limit, 0, offsetof($1, cgroup_context)
+$1.IOWriteIOPSMax, config_parse_io_limit, 0, offsetof($1, cgroup_context)
+$1.IODeviceLatencyTargetSec, config_parse_io_device_latency, 0, offsetof($1, cgroup_context)
+$1.BlockIOAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.blockio_accounting)
+$1.BlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.blockio_weight)
+$1.StartupBlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.startup_blockio_weight)
+$1.BlockIODeviceWeight, config_parse_blockio_device_weight, 0, offsetof($1, cgroup_context)
+$1.BlockIOReadBandwidth, config_parse_blockio_bandwidth, 0, offsetof($1, cgroup_context)
+$1.BlockIOWriteBandwidth, config_parse_blockio_bandwidth, 0, offsetof($1, cgroup_context)
+$1.TasksAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.tasks_accounting)
+$1.TasksMax, config_parse_tasks_max, 0, offsetof($1, cgroup_context.tasks_max)
+$1.Delegate, config_parse_delegate, 0, offsetof($1, cgroup_context)
+$1.DisableControllers, config_parse_disable_controllers, 0, offsetof($1, cgroup_context)
+$1.IPAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.ip_accounting)
+$1.IPAddressAllow, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_allow)
+$1.IPAddressDeny, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_deny)
+$1.IPIngressFilterPath, config_parse_ip_filter_bpf_progs, 0, offsetof($1, cgroup_context.ip_filters_ingress)
+$1.IPEgressFilterPath, config_parse_ip_filter_bpf_progs, 0, offsetof($1, cgroup_context.ip_filters_egress)
+$1.ManagedOOMSwap, config_parse_managed_oom_mode, 0, offsetof($1, cgroup_context.moom_swap)
+$1.ManagedOOMMemoryPressure, config_parse_managed_oom_mode, 0, offsetof($1, cgroup_context.moom_mem_pressure)
+$1.ManagedOOMMemoryPressureLimitPercent, config_parse_managed_oom_mem_pressure_limit, 0, offsetof($1, cgroup_context.moom_mem_pressure_limit)
+$1.NetClass, config_parse_warn_compat, DISABLED_LEGACY, 0'
+)m4_dnl
+Unit.Description, config_parse_unit_string_printf, 0, offsetof(Unit, description)
+Unit.Documentation, config_parse_documentation, 0, offsetof(Unit, documentation)
+Unit.SourcePath, config_parse_unit_path_printf, 0, offsetof(Unit, source_path)
+Unit.Requires, config_parse_unit_deps, UNIT_REQUIRES, 0
+Unit.Requisite, config_parse_unit_deps, UNIT_REQUISITE, 0
+Unit.Wants, config_parse_unit_deps, UNIT_WANTS, 0
+Unit.BindsTo, config_parse_unit_deps, UNIT_BINDS_TO, 0
+Unit.BindTo, config_parse_unit_deps, UNIT_BINDS_TO, 0
+Unit.Conflicts, config_parse_unit_deps, UNIT_CONFLICTS, 0
+Unit.Before, config_parse_unit_deps, UNIT_BEFORE, 0
+Unit.After, config_parse_unit_deps, UNIT_AFTER, 0
+Unit.OnFailure, config_parse_unit_deps, UNIT_ON_FAILURE, 0
+Unit.PropagatesReloadTo, config_parse_unit_deps, UNIT_PROPAGATES_RELOAD_TO, 0
+Unit.PropagateReloadTo, config_parse_unit_deps, UNIT_PROPAGATES_RELOAD_TO, 0
+Unit.ReloadPropagatedFrom, config_parse_unit_deps, UNIT_RELOAD_PROPAGATED_FROM, 0
+Unit.PropagateReloadFrom, config_parse_unit_deps, UNIT_RELOAD_PROPAGATED_FROM, 0
+Unit.PartOf, config_parse_unit_deps, UNIT_PART_OF, 0
+Unit.JoinsNamespaceOf, config_parse_unit_deps, UNIT_JOINS_NAMESPACE_OF, 0
+Unit.RequiresOverridable, config_parse_obsolete_unit_deps, UNIT_REQUIRES, 0
+Unit.RequisiteOverridable, config_parse_obsolete_unit_deps, UNIT_REQUISITE, 0
+Unit.RequiresMountsFor, config_parse_unit_requires_mounts_for, 0, 0
+Unit.StopWhenUnneeded, config_parse_bool, 0, offsetof(Unit, stop_when_unneeded)
+Unit.RefuseManualStart, config_parse_bool, 0, offsetof(Unit, refuse_manual_start)
+Unit.RefuseManualStop, config_parse_bool, 0, offsetof(Unit, refuse_manual_stop)
+Unit.AllowIsolate, config_parse_bool, 0, offsetof(Unit, allow_isolate)
+Unit.DefaultDependencies, config_parse_bool, 0, offsetof(Unit, default_dependencies)
+Unit.OnFailureJobMode, config_parse_job_mode, 0, offsetof(Unit, on_failure_job_mode)
+m4_dnl The following is a legacy alias name for compatibility
+Unit.OnFailureIsolate, config_parse_job_mode_isolate, 0, offsetof(Unit, on_failure_job_mode)
+Unit.IgnoreOnIsolate, config_parse_bool, 0, offsetof(Unit, ignore_on_isolate)
+Unit.IgnoreOnSnapshot, config_parse_warn_compat, DISABLED_LEGACY, 0
+Unit.JobTimeoutSec, config_parse_job_timeout_sec, 0, 0
+Unit.JobRunningTimeoutSec, config_parse_job_running_timeout_sec, 0, 0
+Unit.JobTimeoutAction, config_parse_emergency_action, 0, offsetof(Unit, job_timeout_action)
+Unit.JobTimeoutRebootArgument, config_parse_unit_string_printf, 0, offsetof(Unit, job_timeout_reboot_arg)
+Unit.StartLimitIntervalSec, config_parse_sec, 0, offsetof(Unit, start_ratelimit.interval)
+m4_dnl The following is a legacy alias name for compatibility
+Unit.StartLimitInterval, config_parse_sec, 0, offsetof(Unit, start_ratelimit.interval)
+Unit.StartLimitBurst, config_parse_unsigned, 0, offsetof(Unit, start_ratelimit.burst)
+Unit.StartLimitAction, config_parse_emergency_action, 0, offsetof(Unit, start_limit_action)
+Unit.FailureAction, config_parse_emergency_action, 0, offsetof(Unit, failure_action)
+Unit.SuccessAction, config_parse_emergency_action, 0, offsetof(Unit, success_action)
+Unit.FailureActionExitStatus, config_parse_exit_status, 0, offsetof(Unit, failure_action_exit_status)
+Unit.SuccessActionExitStatus, config_parse_exit_status, 0, offsetof(Unit, success_action_exit_status)
+Unit.RebootArgument, config_parse_unit_string_printf, 0, offsetof(Unit, reboot_arg)
+Unit.ConditionPathExists, config_parse_unit_condition_path, CONDITION_PATH_EXISTS, offsetof(Unit, conditions)
+Unit.ConditionPathExistsGlob, config_parse_unit_condition_path, CONDITION_PATH_EXISTS_GLOB, offsetof(Unit, conditions)
+Unit.ConditionPathIsDirectory, config_parse_unit_condition_path, CONDITION_PATH_IS_DIRECTORY, offsetof(Unit, conditions)
+Unit.ConditionPathIsSymbolicLink, config_parse_unit_condition_path, CONDITION_PATH_IS_SYMBOLIC_LINK, offsetof(Unit, conditions)
+Unit.ConditionPathIsMountPoint, config_parse_unit_condition_path, CONDITION_PATH_IS_MOUNT_POINT, offsetof(Unit, conditions)
+Unit.ConditionPathIsReadWrite, config_parse_unit_condition_path, CONDITION_PATH_IS_READ_WRITE, offsetof(Unit, conditions)
+Unit.ConditionPathIsEncrypted, config_parse_unit_condition_path, CONDITION_PATH_IS_ENCRYPTED, offsetof(Unit, conditions)
+Unit.ConditionDirectoryNotEmpty, config_parse_unit_condition_path, CONDITION_DIRECTORY_NOT_EMPTY, offsetof(Unit, conditions)
+Unit.ConditionFileNotEmpty, config_parse_unit_condition_path, CONDITION_FILE_NOT_EMPTY, offsetof(Unit, conditions)
+Unit.ConditionFileIsExecutable, config_parse_unit_condition_path, CONDITION_FILE_IS_EXECUTABLE, offsetof(Unit, conditions)
+Unit.ConditionNeedsUpdate, config_parse_unit_condition_path, CONDITION_NEEDS_UPDATE, offsetof(Unit, conditions)
+Unit.ConditionFirstBoot, config_parse_unit_condition_string, CONDITION_FIRST_BOOT, offsetof(Unit, conditions)
+Unit.ConditionArchitecture, config_parse_unit_condition_string, CONDITION_ARCHITECTURE, offsetof(Unit, conditions)
+Unit.ConditionVirtualization, config_parse_unit_condition_string, CONDITION_VIRTUALIZATION, offsetof(Unit, conditions)
+Unit.ConditionHost, config_parse_unit_condition_string, CONDITION_HOST, offsetof(Unit, conditions)
+Unit.ConditionKernelCommandLine, config_parse_unit_condition_string, CONDITION_KERNEL_COMMAND_LINE, offsetof(Unit, conditions)
+Unit.ConditionKernelVersion, config_parse_unit_condition_string, CONDITION_KERNEL_VERSION, offsetof(Unit, conditions)
+Unit.ConditionSecurity, config_parse_unit_condition_string, CONDITION_SECURITY, offsetof(Unit, conditions)
+Unit.ConditionCapability, config_parse_unit_condition_string, CONDITION_CAPABILITY, offsetof(Unit, conditions)
+Unit.ConditionACPower, config_parse_unit_condition_string, CONDITION_AC_POWER, offsetof(Unit, conditions)
+Unit.ConditionMemory, config_parse_unit_condition_string, CONDITION_MEMORY, offsetof(Unit, conditions)
+Unit.ConditionCPUs, config_parse_unit_condition_string, CONDITION_CPUS, offsetof(Unit, conditions)
+Unit.ConditionEnvironment, config_parse_unit_condition_string, CONDITION_ENVIRONMENT, offsetof(Unit, conditions)
+Unit.ConditionUser, config_parse_unit_condition_string, CONDITION_USER, offsetof(Unit, conditions)
+Unit.ConditionGroup, config_parse_unit_condition_string, CONDITION_GROUP, offsetof(Unit, conditions)
+Unit.ConditionControlGroupController, config_parse_unit_condition_string, CONDITION_CONTROL_GROUP_CONTROLLER, offsetof(Unit, conditions)
+Unit.AssertPathExists, config_parse_unit_condition_path, CONDITION_PATH_EXISTS, offsetof(Unit, asserts)
+Unit.AssertPathExistsGlob, config_parse_unit_condition_path, CONDITION_PATH_EXISTS_GLOB, offsetof(Unit, asserts)
+Unit.AssertPathIsDirectory, config_parse_unit_condition_path, CONDITION_PATH_IS_DIRECTORY, offsetof(Unit, asserts)
+Unit.AssertPathIsSymbolicLink, config_parse_unit_condition_path, CONDITION_PATH_IS_SYMBOLIC_LINK, offsetof(Unit, asserts)
+Unit.AssertPathIsMountPoint, config_parse_unit_condition_path, CONDITION_PATH_IS_MOUNT_POINT, offsetof(Unit, asserts)
+Unit.AssertPathIsReadWrite, config_parse_unit_condition_path, CONDITION_PATH_IS_READ_WRITE, offsetof(Unit, asserts)
+Unit.AssertPathIsEncrypted, config_parse_unit_condition_path, CONDITION_PATH_IS_ENCRYPTED, offsetof(Unit, asserts)
+Unit.AssertDirectoryNotEmpty, config_parse_unit_condition_path, CONDITION_DIRECTORY_NOT_EMPTY, offsetof(Unit, asserts)
+Unit.AssertFileNotEmpty, config_parse_unit_condition_path, CONDITION_FILE_NOT_EMPTY, offsetof(Unit, asserts)
+Unit.AssertFileIsExecutable, config_parse_unit_condition_path, CONDITION_FILE_IS_EXECUTABLE, offsetof(Unit, asserts)
+Unit.AssertNeedsUpdate, config_parse_unit_condition_path, CONDITION_NEEDS_UPDATE, offsetof(Unit, asserts)
+Unit.AssertFirstBoot, config_parse_unit_condition_string, CONDITION_FIRST_BOOT, offsetof(Unit, asserts)
+Unit.AssertArchitecture, config_parse_unit_condition_string, CONDITION_ARCHITECTURE, offsetof(Unit, asserts)
+Unit.AssertVirtualization, config_parse_unit_condition_string, CONDITION_VIRTUALIZATION, offsetof(Unit, asserts)
+Unit.AssertHost, config_parse_unit_condition_string, CONDITION_HOST, offsetof(Unit, asserts)
+Unit.AssertKernelCommandLine, config_parse_unit_condition_string, CONDITION_KERNEL_COMMAND_LINE, offsetof(Unit, asserts)
+Unit.AssertKernelVersion, config_parse_unit_condition_string, CONDITION_KERNEL_VERSION, offsetof(Unit, asserts)
+Unit.AssertSecurity, config_parse_unit_condition_string, CONDITION_SECURITY, offsetof(Unit, asserts)
+Unit.AssertCapability, config_parse_unit_condition_string, CONDITION_CAPABILITY, offsetof(Unit, asserts)
+Unit.AssertACPower, config_parse_unit_condition_string, CONDITION_AC_POWER, offsetof(Unit, asserts)
+Unit.AssertMemory, config_parse_unit_condition_string, CONDITION_MEMORY, offsetof(Unit, asserts)
+Unit.AssertCPUs, config_parse_unit_condition_string, CONDITION_CPUS, offsetof(Unit, asserts)
+Unit.AssertEnvironment, config_parse_unit_condition_string, CONDITION_ENVIRONMENT, offsetof(Unit, asserts)
+Unit.AssertUser, config_parse_unit_condition_string, CONDITION_USER, offsetof(Unit, asserts)
+Unit.AssertGroup, config_parse_unit_condition_string, CONDITION_GROUP, offsetof(Unit, asserts)
+Unit.AssertControlGroupController, config_parse_unit_condition_string, CONDITION_CONTROL_GROUP_CONTROLLER, offsetof(Unit, asserts)
+Unit.CollectMode, config_parse_collect_mode, 0, offsetof(Unit, collect_mode)
+m4_dnl
+Service.PIDFile, config_parse_pid_file, 0, offsetof(Service, pid_file)
+Service.ExecCondition, config_parse_exec, SERVICE_EXEC_CONDITION, offsetof(Service, exec_command)
+Service.ExecStartPre, config_parse_exec, SERVICE_EXEC_START_PRE, offsetof(Service, exec_command)
+Service.ExecStart, config_parse_exec, SERVICE_EXEC_START, offsetof(Service, exec_command)
+Service.ExecStartPost, config_parse_exec, SERVICE_EXEC_START_POST, offsetof(Service, exec_command)
+Service.ExecReload, config_parse_exec, SERVICE_EXEC_RELOAD, offsetof(Service, exec_command)
+Service.ExecStop, config_parse_exec, SERVICE_EXEC_STOP, offsetof(Service, exec_command)
+Service.ExecStopPost, config_parse_exec, SERVICE_EXEC_STOP_POST, offsetof(Service, exec_command)
+Service.RestartSec, config_parse_sec, 0, offsetof(Service, restart_usec)
+Service.TimeoutSec, config_parse_service_timeout, 0, 0
+Service.TimeoutStartSec, config_parse_service_timeout, 0, 0
+Service.TimeoutStopSec, config_parse_sec_fix_0, 0, offsetof(Service, timeout_stop_usec)
+Service.TimeoutAbortSec, config_parse_service_timeout_abort, 0, 0
+Service.TimeoutStartFailureMode, config_parse_service_timeout_failure_mode, 0, offsetof(Service, timeout_start_failure_mode)
+Service.TimeoutStopFailureMode, config_parse_service_timeout_failure_mode, 0, offsetof(Service, timeout_stop_failure_mode)
+Service.RuntimeMaxSec, config_parse_sec, 0, offsetof(Service, runtime_max_usec)
+Service.WatchdogSec, config_parse_sec, 0, offsetof(Service, watchdog_usec)
+m4_dnl The following five only exist for compatibility, they moved into Unit, see above
+Service.StartLimitInterval, config_parse_sec, 0, offsetof(Unit, start_ratelimit.interval)
+Service.StartLimitBurst, config_parse_unsigned, 0, offsetof(Unit, start_ratelimit.burst)
+Service.StartLimitAction, config_parse_emergency_action, 0, offsetof(Unit, start_limit_action)
+Service.FailureAction, config_parse_emergency_action, 0, offsetof(Unit, failure_action)
+Service.RebootArgument, config_parse_unit_string_printf, 0, offsetof(Unit, reboot_arg)
+Service.Type, config_parse_service_type, 0, offsetof(Service, type)
+Service.Restart, config_parse_service_restart, 0, offsetof(Service, restart)
+Service.PermissionsStartOnly, config_parse_bool, 0, offsetof(Service, permissions_start_only)
+Service.RootDirectoryStartOnly, config_parse_bool, 0, offsetof(Service, root_directory_start_only)
+Service.RemainAfterExit, config_parse_bool, 0, offsetof(Service, remain_after_exit)
+Service.GuessMainPID, config_parse_bool, 0, offsetof(Service, guess_main_pid)
+Service.RestartPreventExitStatus, config_parse_set_status, 0, offsetof(Service, restart_prevent_status)
+Service.RestartForceExitStatus, config_parse_set_status, 0, offsetof(Service, restart_force_status)
+Service.SuccessExitStatus, config_parse_set_status, 0, offsetof(Service, success_status)
+Service.SysVStartPriority, config_parse_warn_compat, DISABLED_LEGACY, 0
+Service.NonBlocking, config_parse_bool, 0, offsetof(Service, exec_context.non_blocking)
+Service.BusName, config_parse_bus_name, 0, offsetof(Service, bus_name)
+Service.FileDescriptorStoreMax, config_parse_unsigned, 0, offsetof(Service, n_fd_store_max)
+Service.NotifyAccess, config_parse_notify_access, 0, offsetof(Service, notify_access)
+Service.Sockets, config_parse_service_sockets, 0, 0
+Service.BusPolicy, config_parse_warn_compat, DISABLED_LEGACY, 0
+Service.USBFunctionDescriptors, config_parse_unit_path_printf, 0, offsetof(Service, usb_function_descriptors)
+Service.USBFunctionStrings, config_parse_unit_path_printf, 0, offsetof(Service, usb_function_strings)
+Service.OOMPolicy, config_parse_oom_policy, 0, offsetof(Service, oom_policy)
+EXEC_CONTEXT_CONFIG_ITEMS(Service)m4_dnl
+CGROUP_CONTEXT_CONFIG_ITEMS(Service)m4_dnl
+KILL_CONTEXT_CONFIG_ITEMS(Service)m4_dnl
+m4_dnl
+Socket.ListenStream, config_parse_socket_listen, SOCKET_SOCKET, 0
+Socket.ListenDatagram, config_parse_socket_listen, SOCKET_SOCKET, 0
+Socket.ListenSequentialPacket, config_parse_socket_listen, SOCKET_SOCKET, 0
+Socket.ListenFIFO, config_parse_socket_listen, SOCKET_FIFO, 0
+Socket.ListenNetlink, config_parse_socket_listen, SOCKET_SOCKET, 0
+Socket.ListenSpecial, config_parse_socket_listen, SOCKET_SPECIAL, 0
+Socket.ListenMessageQueue, config_parse_socket_listen, SOCKET_MQUEUE, 0
+Socket.ListenUSBFunction, config_parse_socket_listen, SOCKET_USB_FUNCTION, 0
+Socket.SocketProtocol, config_parse_socket_protocol, 0, offsetof(Socket, socket_protocol)
+Socket.BindIPv6Only, config_parse_socket_bind, 0, offsetof(Socket, bind_ipv6_only)
+Socket.Backlog, config_parse_unsigned, 0, offsetof(Socket, backlog)
+Socket.BindToDevice, config_parse_socket_bindtodevice, 0, 0
+Socket.ExecStartPre, config_parse_exec, SOCKET_EXEC_START_PRE, offsetof(Socket, exec_command)
+Socket.ExecStartPost, config_parse_exec, SOCKET_EXEC_START_POST, offsetof(Socket, exec_command)
+Socket.ExecStopPre, config_parse_exec, SOCKET_EXEC_STOP_PRE, offsetof(Socket, exec_command)
+Socket.ExecStopPost, config_parse_exec, SOCKET_EXEC_STOP_POST, offsetof(Socket, exec_command)
+Socket.TimeoutSec, config_parse_sec_fix_0, 0, offsetof(Socket, timeout_usec)
+Socket.SocketUser, config_parse_user_group_compat, 0, offsetof(Socket, user)
+Socket.SocketGroup, config_parse_user_group_compat, 0, offsetof(Socket, group)
+Socket.SocketMode, config_parse_mode, 0, offsetof(Socket, socket_mode)
+Socket.DirectoryMode, config_parse_mode, 0, offsetof(Socket, directory_mode)
+Socket.Accept, config_parse_bool, 0, offsetof(Socket, accept)
+Socket.FlushPending, config_parse_bool, 0, offsetof(Socket, flush_pending)
+Socket.Writable, config_parse_bool, 0, offsetof(Socket, writable)
+Socket.MaxConnections, config_parse_unsigned, 0, offsetof(Socket, max_connections)
+Socket.MaxConnectionsPerSource, config_parse_unsigned, 0, offsetof(Socket, max_connections_per_source)
+Socket.KeepAlive, config_parse_bool, 0, offsetof(Socket, keep_alive)
+Socket.KeepAliveTimeSec, config_parse_sec, 0, offsetof(Socket, keep_alive_time)
+Socket.KeepAliveIntervalSec, config_parse_sec, 0, offsetof(Socket, keep_alive_interval)
+Socket.KeepAliveProbes, config_parse_unsigned, 0, offsetof(Socket, keep_alive_cnt)
+Socket.DeferAcceptSec, config_parse_sec, 0, offsetof(Socket, defer_accept)
+Socket.NoDelay, config_parse_bool, 0, offsetof(Socket, no_delay)
+Socket.Priority, config_parse_int, 0, offsetof(Socket, priority)
+Socket.ReceiveBuffer, config_parse_iec_size, 0, offsetof(Socket, receive_buffer)
+Socket.SendBuffer, config_parse_iec_size, 0, offsetof(Socket, send_buffer)
+Socket.IPTOS, config_parse_ip_tos, 0, offsetof(Socket, ip_tos)
+Socket.IPTTL, config_parse_int, 0, offsetof(Socket, ip_ttl)
+Socket.Mark, config_parse_int, 0, offsetof(Socket, mark)
+Socket.PipeSize, config_parse_iec_size, 0, offsetof(Socket, pipe_size)
+Socket.FreeBind, config_parse_bool, 0, offsetof(Socket, free_bind)
+Socket.Transparent, config_parse_bool, 0, offsetof(Socket, transparent)
+Socket.Broadcast, config_parse_bool, 0, offsetof(Socket, broadcast)
+Socket.PassCredentials, config_parse_bool, 0, offsetof(Socket, pass_cred)
+Socket.PassSecurity, config_parse_bool, 0, offsetof(Socket, pass_sec)
+Socket.PassPacketInfo, config_parse_bool, 0, offsetof(Socket, pass_pktinfo)
+Socket.Timestamping, config_parse_socket_timestamping, 0, offsetof(Socket, timestamping)
+Socket.TCPCongestion, config_parse_string, 0, offsetof(Socket, tcp_congestion)
+Socket.ReusePort, config_parse_bool, 0, offsetof(Socket, reuse_port)
+Socket.MessageQueueMaxMessages, config_parse_long, 0, offsetof(Socket, mq_maxmsg)
+Socket.MessageQueueMessageSize, config_parse_long, 0, offsetof(Socket, mq_msgsize)
+Socket.RemoveOnStop, config_parse_bool, 0, offsetof(Socket, remove_on_stop)
+Socket.Symlinks, config_parse_unit_path_strv_printf, 0, offsetof(Socket, symlinks)
+Socket.FileDescriptorName, config_parse_fdname, 0, 0
+Socket.Service, config_parse_socket_service, 0, 0
+Socket.TriggerLimitIntervalSec, config_parse_sec, 0, offsetof(Socket, trigger_limit.interval)
+Socket.TriggerLimitBurst, config_parse_unsigned, 0, offsetof(Socket, trigger_limit.burst)
+m4_ifdef(`ENABLE_SMACK',
+`Socket.SmackLabel, config_parse_unit_string_printf, 0, offsetof(Socket, smack)
+Socket.SmackLabelIPIn, config_parse_unit_string_printf, 0, offsetof(Socket, smack_ip_in)
+Socket.SmackLabelIPOut, config_parse_unit_string_printf, 0, offsetof(Socket, smack_ip_out)',
+`Socket.SmackLabel, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+Socket.SmackLabelIPIn, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+Socket.SmackLabelIPOut, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
+m4_ifdef(`HAVE_SELINUX',
+`Socket.SELinuxContextFromNet, config_parse_bool, 0, offsetof(Socket, selinux_context_from_net)',
+`Socket.SELinuxContextFromNet, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
+EXEC_CONTEXT_CONFIG_ITEMS(Socket)m4_dnl
+CGROUP_CONTEXT_CONFIG_ITEMS(Socket)m4_dnl
+KILL_CONTEXT_CONFIG_ITEMS(Socket)m4_dnl
+m4_dnl
+Mount.What, config_parse_unit_string_printf, 0, offsetof(Mount, parameters_fragment.what)
+Mount.Where, config_parse_unit_path_printf, 0, offsetof(Mount, where)
+Mount.Options, config_parse_unit_string_printf, 0, offsetof(Mount, parameters_fragment.options)
+Mount.Type, config_parse_unit_string_printf, 0, offsetof(Mount, parameters_fragment.fstype)
+Mount.TimeoutSec, config_parse_sec_fix_0, 0, offsetof(Mount, timeout_usec)
+Mount.DirectoryMode, config_parse_mode, 0, offsetof(Mount, directory_mode)
+Mount.SloppyOptions, config_parse_bool, 0, offsetof(Mount, sloppy_options)
+Mount.LazyUnmount, config_parse_bool, 0, offsetof(Mount, lazy_unmount)
+Mount.ForceUnmount, config_parse_bool, 0, offsetof(Mount, force_unmount)
+Mount.ReadWriteOnly, config_parse_bool, 0, offsetof(Mount, read_write_only)
+EXEC_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
+CGROUP_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
+KILL_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
+m4_dnl
+Automount.Where, config_parse_unit_path_printf, 0, offsetof(Automount, where)
+Automount.DirectoryMode, config_parse_mode, 0, offsetof(Automount, directory_mode)
+Automount.TimeoutIdleSec, config_parse_sec_fix_0, 0, offsetof(Automount, timeout_idle_usec)
+m4_dnl
+Swap.What, config_parse_unit_path_printf, 0, offsetof(Swap, parameters_fragment.what)
+Swap.Priority, config_parse_swap_priority, 0, 0
+Swap.Options, config_parse_unit_string_printf, 0, offsetof(Swap, parameters_fragment.options)
+Swap.TimeoutSec, config_parse_sec_fix_0, 0, offsetof(Swap, timeout_usec)
+EXEC_CONTEXT_CONFIG_ITEMS(Swap)m4_dnl
+CGROUP_CONTEXT_CONFIG_ITEMS(Swap)m4_dnl
+KILL_CONTEXT_CONFIG_ITEMS(Swap)m4_dnl
+m4_dnl
+Timer.OnCalendar, config_parse_timer, TIMER_CALENDAR, 0
+Timer.OnActiveSec, config_parse_timer, TIMER_ACTIVE, 0
+Timer.OnBootSec, config_parse_timer, TIMER_BOOT, 0
+Timer.OnStartupSec, config_parse_timer, TIMER_STARTUP, 0
+Timer.OnUnitActiveSec, config_parse_timer, TIMER_UNIT_ACTIVE, 0
+Timer.OnUnitInactiveSec, config_parse_timer, TIMER_UNIT_INACTIVE, 0
+Timer.OnClockChange, config_parse_bool, 0, offsetof(Timer, on_clock_change)
+Timer.OnTimezoneChange, config_parse_bool, 0, offsetof(Timer, on_timezone_change)
+Timer.Persistent, config_parse_bool, 0, offsetof(Timer, persistent)
+Timer.WakeSystem, config_parse_bool, 0, offsetof(Timer, wake_system)
+Timer.RemainAfterElapse, config_parse_bool, 0, offsetof(Timer, remain_after_elapse)
+Timer.FixedRandomDelay, config_parse_bool, 0, offsetof(Timer, fixed_random_delay)
+Timer.AccuracySec, config_parse_sec, 0, offsetof(Timer, accuracy_usec)
+Timer.RandomizedDelaySec, config_parse_sec, 0, offsetof(Timer, random_usec)
+Timer.Unit, config_parse_trigger_unit, 0, 0
+m4_dnl
+Path.PathExists, config_parse_path_spec, 0, 0
+Path.PathExistsGlob, config_parse_path_spec, 0, 0
+Path.PathChanged, config_parse_path_spec, 0, 0
+Path.PathModified, config_parse_path_spec, 0, 0
+Path.DirectoryNotEmpty, config_parse_path_spec, 0, 0
+Path.Unit, config_parse_trigger_unit, 0, 0
+Path.MakeDirectory, config_parse_bool, 0, offsetof(Path, make_directory)
+Path.DirectoryMode, config_parse_mode, 0, offsetof(Path, directory_mode)
+m4_dnl
+CGROUP_CONTEXT_CONFIG_ITEMS(Slice)m4_dnl
+m4_dnl
+CGROUP_CONTEXT_CONFIG_ITEMS(Scope)m4_dnl
+KILL_CONTEXT_CONFIG_ITEMS(Scope)m4_dnl
+Scope.RuntimeMaxSec, config_parse_sec, 0, offsetof(Scope, runtime_max_usec)
+Scope.TimeoutStopSec, config_parse_sec, 0, offsetof(Scope, timeout_stop_usec)
+m4_dnl The [Install] section is ignored here.
+Install.Alias, NULL, 0, 0
+Install.WantedBy, NULL, 0, 0
+Install.RequiredBy, NULL, 0, 0
+Install.Also, NULL, 0, 0
+Install.DefaultInstance, NULL, 0, 0
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
new file mode 100644
index 0000000..4964249
--- /dev/null
+++ b/src/core/load-fragment.c
@@ -0,0 +1,5851 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2012 Holger Hans Peter Freyther
+***/
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fs.h>
+#include <linux/oom.h>
+#if HAVE_SECCOMP
+#include <seccomp.h>
+#endif
+#include <sched.h>
+#include <sys/resource.h>
+
+#include "sd-messages.h"
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "all-units.h"
+#include "bpf-firewall.h"
+#include "bus-error.h"
+#include "bus-internal.h"
+#include "bus-util.h"
+#include "cap-list.h"
+#include "capability-util.h"
+#include "cgroup-setup.h"
+#include "conf-parser.h"
+#include "core-varlink.h"
+#include "cpu-set-util.h"
+#include "env-util.h"
+#include "errno-list.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hexdecoct.h"
+#include "io-util.h"
+#include "ioprio.h"
+#include "ip-protocol-list.h"
+#include "journal-file.h"
+#include "limits-util.h"
+#include "load-fragment.h"
+#include "log.h"
+#include "mountpoint-util.h"
+#include "nulstr-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#if HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
+#include "securebits-util.h"
+#include "signal-util.h"
+#include "socket-netlink.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "syslog-util.h"
+#include "time-util.h"
+#include "unit-name.h"
+#include "unit-printf.h"
+#include "user-util.h"
+#include "utf8.h"
+#include "web-util.h"
+
+static int parse_socket_protocol(const char *s) {
+ int r;
+
+ r = parse_ip_protocol(s);
+ if (r < 0)
+ return r;
+ if (!IN_SET(r, IPPROTO_UDPLITE, IPPROTO_SCTP))
+ return -EPROTONOSUPPORT;
+
+ return r;
+}
+
+int parse_crash_chvt(const char *value, int *data) {
+ int b;
+
+ if (safe_atoi(value, data) >= 0)
+ return 0;
+
+ b = parse_boolean(value);
+ if (b < 0)
+ return b;
+
+ if (b > 0)
+ *data = 0; /* switch to where kmsg goes */
+ else
+ *data = -1; /* turn off switching */
+
+ return 0;
+}
+
+int parse_confirm_spawn(const char *value, char **console) {
+ char *s;
+ int r;
+
+ r = value ? parse_boolean(value) : 1;
+ if (r == 0) {
+ *console = NULL;
+ return 0;
+ } else if (r > 0) /* on with default tty */
+ s = strdup("/dev/console");
+ else if (is_path(value)) /* on with fully qualified path */
+ s = strdup(value);
+ else /* on with only a tty file name, not a fully qualified path */
+ s = path_join("/dev/", value);
+ if (!s)
+ return -ENOMEM;
+
+ *console = s;
+ return 0;
+}
+
+DEFINE_CONFIG_PARSE(config_parse_socket_protocol, parse_socket_protocol, "Failed to parse socket protocol");
+DEFINE_CONFIG_PARSE(config_parse_exec_secure_bits, secure_bits_from_string, "Failed to parse secure bits");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_collect_mode, collect_mode, CollectMode, "Failed to parse garbage collection mode");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_device_policy, cgroup_device_policy, CGroupDevicePolicy, "Failed to parse device policy");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_keyring_mode, exec_keyring_mode, ExecKeyringMode, "Failed to parse keyring mode");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_protect_proc, protect_proc, ProtectProc, "Failed to parse /proc/ protection mode");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_proc_subset, proc_subset, ProcSubset, "Failed to parse /proc/ subset mode");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_utmp_mode, exec_utmp_mode, ExecUtmpMode, "Failed to parse utmp mode");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_job_mode, job_mode, JobMode, "Failed to parse job mode");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_notify_access, notify_access, NotifyAccess, "Failed to parse notify access specifier");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_protect_home, protect_home, ProtectHome, "Failed to parse protect home value");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_protect_system, protect_system, ProtectSystem, "Failed to parse protect system value");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_runtime_preserve_mode, exec_preserve_mode, ExecPreserveMode, "Failed to parse runtime directory preserve mode");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_service_type, service_type, ServiceType, "Failed to parse service type");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_service_restart, service_restart, ServiceRestart, "Failed to parse service restart specifier");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_service_timeout_failure_mode, service_timeout_failure_mode, ServiceTimeoutFailureMode, "Failed to parse timeout failure mode");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_socket_bind, socket_address_bind_ipv6_only_or_bool, SocketAddressBindIPv6Only, "Failed to parse bind IPv6 only value");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_oom_policy, oom_policy, OOMPolicy, "Failed to parse OOM policy");
+DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_ip_tos, ip_tos, int, -1, "Failed to parse IP TOS value");
+DEFINE_CONFIG_PARSE_PTR(config_parse_blockio_weight, cg_blkio_weight_parse, uint64_t, "Invalid block IO weight");
+DEFINE_CONFIG_PARSE_PTR(config_parse_cg_weight, cg_weight_parse, uint64_t, "Invalid weight");
+DEFINE_CONFIG_PARSE_PTR(config_parse_cpu_shares, cg_cpu_shares_parse, uint64_t, "Invalid CPU shares");
+DEFINE_CONFIG_PARSE_PTR(config_parse_exec_mount_flags, mount_propagation_flags_from_string, unsigned long, "Failed to parse mount flag");
+DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_numa_policy, mpol, int, -1, "Invalid NUMA policy type");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_status_unit_format, status_unit_format, StatusUnitFormat, "Failed to parse status unit format");
+DEFINE_CONFIG_PARSE_ENUM_FULL(config_parse_socket_timestamping, socket_timestamping_from_string_harder, SocketTimestamping, "Failed to parse timestamping precision");
+
+int config_parse_unit_deps(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ UnitDependency d = ltype;
+ Unit *u = userdata;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+ int r;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_RETAIN_ESCAPE);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = unit_name_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", word);
+ continue;
+ }
+
+ r = unit_add_dependency_by_name(u, d, k, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to add dependency on %s, ignoring: %m", k);
+ }
+}
+
+int config_parse_obsolete_unit_deps(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Unit dependency type %s= is obsolete, replacing by %s=, please update your unit file", lvalue, unit_dependency_to_string(ltype));
+
+ return config_parse_unit_deps(unit, filename, line, section, section_line, lvalue, ltype, rvalue, data, userdata);
+}
+
+int config_parse_unit_string_printf(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *k = NULL;
+ const Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ return config_parse_string(unit, filename, line, section, section_line, lvalue, ltype, k, data, userdata);
+}
+
+int config_parse_unit_strv_printf(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ const Unit *u = userdata;
+ _cleanup_free_ char *k = NULL;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ return config_parse_strv(unit, filename, line, section, section_line, lvalue, ltype, k, data, userdata);
+}
+
+int config_parse_unit_path_printf(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *k = NULL;
+ const Unit *u = userdata;
+ int r;
+ bool fatal = ltype;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ /* Let's not bother with anything that is too long */
+ if (strlen(rvalue) >= PATH_MAX) {
+ log_syntax(unit, fatal ? LOG_ERR : LOG_WARNING, filename, line, 0,
+ "%s value too long%s.",
+ lvalue, fatal ? "" : ", ignoring");
+ return fatal ? -ENAMETOOLONG : 0;
+ }
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, fatal ? LOG_ERR : LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in '%s'%s: %m",
+ rvalue, fatal ? "" : ", ignoring");
+ return fatal ? -ENOEXEC : 0;
+ }
+
+ return config_parse_path(unit, filename, line, section, section_line, lvalue, ltype, k, data, userdata);
+}
+
+int config_parse_unit_path_strv_printf(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char ***x = data;
+ const Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue)) {
+ *x = strv_free(*x);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in '%s', ignoring: %m", word);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(k, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ r = strv_consume(x, TAKE_PTR(k));
+ if (r < 0)
+ return log_oom();
+ }
+}
+
+static int patch_var_run(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *lvalue,
+ char **path) {
+
+ const char *e;
+ char *z;
+
+ e = path_startswith(*path, "/var/run/");
+ if (!e)
+ return 0;
+
+ z = path_join("/run/", e);
+ if (!z)
+ return log_oom();
+
+ log_syntax(unit, LOG_NOTICE, filename, line, 0,
+ "%s= references a path below legacy directory /var/run/, updating %s → %s; "
+ "please update the unit file accordingly.", lvalue, *path, z);
+
+ free_and_replace(*path, z);
+
+ return 1;
+}
+
+int config_parse_socket_listen(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ SocketPort *p = NULL;
+ SocketPort *tail;
+ Socket *s;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ s = SOCKET(data);
+
+ if (isempty(rvalue)) {
+ /* An empty assignment removes all ports */
+ socket_free_ports(s);
+ return 0;
+ }
+
+ p = new0(SocketPort, 1);
+ if (!p)
+ return log_oom();
+
+ if (ltype != SOCKET_SOCKET) {
+ _cleanup_free_ char *k = NULL;
+
+ r = unit_full_printf(UNIT(s), rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(k, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ if (ltype == SOCKET_FIFO) {
+ r = patch_var_run(unit, filename, line, lvalue, &k);
+ if (r < 0)
+ return r;
+ }
+
+ free_and_replace(p->path, k);
+ p->type = ltype;
+
+ } else if (streq(lvalue, "ListenNetlink")) {
+ _cleanup_free_ char *k = NULL;
+
+ r = unit_full_printf(UNIT(s), rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ r = socket_address_parse_netlink(&p->address, k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse address value in '%s', ignoring: %m", k);
+ return 0;
+ }
+
+ p->type = SOCKET_SOCKET;
+
+ } else {
+ _cleanup_free_ char *k = NULL;
+
+ r = unit_full_printf(UNIT(s), rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (k[0] == '/') { /* Only for AF_UNIX file system sockets… */
+ r = patch_var_run(unit, filename, line, lvalue, &k);
+ if (r < 0)
+ return r;
+ }
+
+ r = socket_address_parse_and_warn(&p->address, k);
+ if (r < 0) {
+ if (r != -EAFNOSUPPORT)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse address value in '%s', ignoring: %m", k);
+ return 0;
+ }
+
+ if (streq(lvalue, "ListenStream"))
+ p->address.type = SOCK_STREAM;
+ else if (streq(lvalue, "ListenDatagram"))
+ p->address.type = SOCK_DGRAM;
+ else {
+ assert(streq(lvalue, "ListenSequentialPacket"));
+ p->address.type = SOCK_SEQPACKET;
+ }
+
+ if (socket_address_family(&p->address) != AF_LOCAL && p->address.type == SOCK_SEQPACKET) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Address family not supported, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ p->type = SOCKET_SOCKET;
+ }
+
+ p->fd = -1;
+ p->auxiliary_fds = NULL;
+ p->n_auxiliary_fds = 0;
+ p->socket = s;
+
+ LIST_FIND_TAIL(port, s->ports, tail);
+ LIST_INSERT_AFTER(port, s->ports, tail, p);
+
+ p = NULL;
+
+ return 0;
+}
+
+int config_parse_exec_nice(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ int priority, r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ c->nice_set = false;
+ return 0;
+ }
+
+ r = parse_nice(rvalue, &priority);
+ if (r < 0) {
+ if (r == -ERANGE)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Nice priority out of range, ignoring: %s", rvalue);
+ else
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse nice priority '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ c->nice = priority;
+ c->nice_set = true;
+
+ return 0;
+}
+
+int config_parse_exec_oom_score_adjust(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ int oa, r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ c->oom_score_adjust_set = false;
+ return 0;
+ }
+
+ r = parse_oom_score_adjust(rvalue, &oa);
+ if (r < 0) {
+ if (r == -ERANGE)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "OOM score adjust value out of range, ignoring: %s", rvalue);
+ else
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse the OOM score adjust value '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ c->oom_score_adjust = oa;
+ c->oom_score_adjust_set = true;
+
+ return 0;
+}
+
+int config_parse_exec_coredump_filter(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ c->coredump_filter = 0;
+ c->coredump_filter_set = false;
+ return 0;
+ }
+
+ uint64_t f;
+ r = coredump_filter_mask_from_string(rvalue, &f);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse the CoredumpFilter=%s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ c->coredump_filter |= f;
+ c->oom_score_adjust_set = true;
+ return 0;
+}
+
+int config_parse_kill_mode(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ KillMode *k = data, m;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *k = KILL_CONTROL_GROUP;
+ return 0;
+ }
+
+ m = kill_mode_from_string(rvalue);
+ if (m < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to parse kill mode specification, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (m == KILL_NONE)
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Unit configured to use KillMode=none. "
+ "This is unsafe, as it disables systemd's process lifecycle management for the service. "
+ "Please update your service to use a safer KillMode=, such as 'mixed' or 'control-group'. "
+ "Support for KillMode=none is deprecated and will eventually be removed.");
+
+ *k = m;
+ return 0;
+}
+
+int config_parse_exec(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecCommand **e = data;
+ const Unit *u = userdata;
+ const char *p;
+ bool semicolon;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(e);
+
+ e += ltype;
+
+ if (isempty(rvalue)) {
+ /* An empty assignment resets the list */
+ *e = exec_command_free_list(*e);
+ return 0;
+ }
+
+ p = rvalue;
+ do {
+ _cleanup_free_ char *path = NULL, *firstword = NULL;
+ ExecCommandFlags flags = 0;
+ bool ignore = false, separate_argv0 = false;
+ _cleanup_free_ ExecCommand *nce = NULL;
+ _cleanup_strv_free_ char **n = NULL;
+ size_t nlen = 0, nbufsize = 0;
+ const char *f;
+
+ semicolon = false;
+
+ r = extract_first_word_and_warn(&p, &firstword, NULL, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE, unit, filename, line, rvalue);
+ if (r <= 0)
+ return 0;
+
+ /* A lone ";" is a separator. Let's make sure we don't treat it as an executable name. */
+ if (streq(firstword, ";")) {
+ semicolon = true;
+ continue;
+ }
+
+ f = firstword;
+ for (;;) {
+ /* We accept an absolute path as first argument. If it's prefixed with - and the path doesn't
+ * exist, we ignore it instead of erroring out; if it's prefixed with @, we allow overriding of
+ * argv[0]; if it's prefixed with :, we will not do environment variable substitution;
+ * if it's prefixed with +, it will be run with full privileges and no sandboxing; if
+ * it's prefixed with '!' we apply sandboxing, but do not change user/group credentials; if
+ * it's prefixed with '!!', then we apply user/group credentials if the kernel supports ambient
+ * capabilities -- if it doesn't we don't apply the credentials themselves, but do apply most
+ * other sandboxing, with some special exceptions for changing UID.
+ *
+ * The idea is that '!!' may be used to write services that can take benefit of systemd's
+ * UID/GID dropping if the kernel supports ambient creds, but provide an automatic fallback to
+ * privilege dropping within the daemon if the kernel does not offer that. */
+
+ if (*f == '-' && !(flags & EXEC_COMMAND_IGNORE_FAILURE)) {
+ flags |= EXEC_COMMAND_IGNORE_FAILURE;
+ ignore = true;
+ } else if (*f == '@' && !separate_argv0)
+ separate_argv0 = true;
+ else if (*f == ':' && !(flags & EXEC_COMMAND_NO_ENV_EXPAND))
+ flags |= EXEC_COMMAND_NO_ENV_EXPAND;
+ else if (*f == '+' && !(flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID|EXEC_COMMAND_AMBIENT_MAGIC)))
+ flags |= EXEC_COMMAND_FULLY_PRIVILEGED;
+ else if (*f == '!' && !(flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID|EXEC_COMMAND_AMBIENT_MAGIC)))
+ flags |= EXEC_COMMAND_NO_SETUID;
+ else if (*f == '!' && !(flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_AMBIENT_MAGIC))) {
+ flags &= ~EXEC_COMMAND_NO_SETUID;
+ flags |= EXEC_COMMAND_AMBIENT_MAGIC;
+ } else
+ break;
+ f++;
+ }
+
+ r = unit_full_printf(u, f, &path);
+ if (r < 0) {
+ log_syntax(unit, ignore ? LOG_WARNING : LOG_ERR, filename, line, r,
+ "Failed to resolve unit specifiers in '%s'%s: %m",
+ f, ignore ? ", ignoring" : "");
+ return ignore ? 0 : -ENOEXEC;
+ }
+
+ if (isempty(path)) {
+ /* First word is either "-" or "@" with no command. */
+ log_syntax(unit, ignore ? LOG_WARNING : LOG_ERR, filename, line, 0,
+ "Empty path in command line%s: '%s'",
+ ignore ? ", ignoring" : "", rvalue);
+ return ignore ? 0 : -ENOEXEC;
+ }
+ if (!string_is_safe(path)) {
+ log_syntax(unit, ignore ? LOG_WARNING : LOG_ERR, filename, line, 0,
+ "Executable name contains special characters%s: %s",
+ ignore ? ", ignoring" : "", path);
+ return ignore ? 0 : -ENOEXEC;
+ }
+ if (endswith(path, "/")) {
+ log_syntax(unit, ignore ? LOG_WARNING : LOG_ERR, filename, line, 0,
+ "Executable path specifies a directory%s: %s",
+ ignore ? ", ignoring" : "", path);
+ return ignore ? 0 : -ENOEXEC;
+ }
+
+ if (!path_is_absolute(path) && !filename_is_valid(path)) {
+ log_syntax(unit, ignore ? LOG_WARNING : LOG_ERR, filename, line, 0,
+ "Neither a valid executable name nor an absolute path%s: %s",
+ ignore ? ", ignoring" : "", path);
+ return ignore ? 0 : -ENOEXEC;
+ }
+
+ if (!separate_argv0) {
+ char *w = NULL;
+
+ if (!GREEDY_REALLOC(n, nbufsize, nlen + 2))
+ return log_oom();
+
+ w = strdup(path);
+ if (!w)
+ return log_oom();
+ n[nlen++] = w;
+ n[nlen] = NULL;
+ }
+
+ path_simplify(path, false);
+
+ while (!isempty(p)) {
+ _cleanup_free_ char *word = NULL, *resolved = NULL;
+
+ /* Check explicitly for an unquoted semicolon as
+ * command separator token. */
+ if (p[0] == ';' && (!p[1] || strchr(WHITESPACE, p[1]))) {
+ p++;
+ p += strspn(p, WHITESPACE);
+ semicolon = true;
+ break;
+ }
+
+ /* Check for \; explicitly, to not confuse it with \\; or "\;" or "\\;" etc.
+ * extract_first_word() would return the same for all of those. */
+ if (p[0] == '\\' && p[1] == ';' && (!p[2] || strchr(WHITESPACE, p[2]))) {
+ char *w;
+
+ p += 2;
+ p += strspn(p, WHITESPACE);
+
+ if (!GREEDY_REALLOC(n, nbufsize, nlen + 2))
+ return log_oom();
+
+ w = strdup(";");
+ if (!w)
+ return log_oom();
+ n[nlen++] = w;
+ n[nlen] = NULL;
+ continue;
+ }
+
+ r = extract_first_word_and_warn(&p, &word, NULL, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE, unit, filename, line, rvalue);
+ if (r == 0)
+ break;
+ if (r < 0)
+ return ignore ? 0 : -ENOEXEC;
+
+ r = unit_full_printf(u, word, &resolved);
+ if (r < 0) {
+ log_syntax(unit, ignore ? LOG_WARNING : LOG_ERR, filename, line, r,
+ "Failed to resolve unit specifiers in %s%s: %m",
+ word, ignore ? ", ignoring" : "");
+ return ignore ? 0 : -ENOEXEC;
+ }
+
+ if (!GREEDY_REALLOC(n, nbufsize, nlen + 2))
+ return log_oom();
+
+ n[nlen++] = TAKE_PTR(resolved);
+ n[nlen] = NULL;
+ }
+
+ if (!n || !n[0]) {
+ log_syntax(unit, ignore ? LOG_WARNING : LOG_ERR, filename, line, 0,
+ "Empty executable name or zeroeth argument%s: %s",
+ ignore ? ", ignoring" : "", rvalue);
+ return ignore ? 0 : -ENOEXEC;
+ }
+
+ nce = new0(ExecCommand, 1);
+ if (!nce)
+ return log_oom();
+
+ nce->argv = TAKE_PTR(n);
+ nce->path = TAKE_PTR(path);
+ nce->flags = flags;
+
+ exec_command_append_list(e, nce);
+
+ /* Do not _cleanup_free_ these. */
+ nce = NULL;
+
+ rvalue = p;
+ } while (semicolon);
+
+ return 0;
+}
+
+int config_parse_socket_bindtodevice(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Socket *s = data;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue) || streq(rvalue, "*")) {
+ s->bind_to_device = mfree(s->bind_to_device);
+ return 0;
+ }
+
+ if (!ifname_valid(rvalue)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid interface name, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (free_and_strdup(&s->bind_to_device, rvalue) < 0)
+ return log_oom();
+
+ return 0;
+}
+
+int config_parse_exec_input(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ const Unit *u = userdata;
+ const char *n;
+ ExecInput ei;
+ int r;
+
+ assert(data);
+ assert(filename);
+ assert(line);
+ assert(rvalue);
+
+ n = startswith(rvalue, "fd:");
+ if (n) {
+ _cleanup_free_ char *resolved = NULL;
+
+ r = unit_full_printf(u, n, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", n);
+ return 0;
+ }
+
+ if (isempty(resolved))
+ resolved = mfree(resolved);
+ else if (!fdname_is_valid(resolved)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid file descriptor name, ignoring: %s", resolved);
+ return 0;
+ }
+
+ free_and_replace(c->stdio_fdname[STDIN_FILENO], resolved);
+
+ ei = EXEC_INPUT_NAMED_FD;
+
+ } else if ((n = startswith(rvalue, "file:"))) {
+ _cleanup_free_ char *resolved = NULL;
+
+ r = unit_full_printf(u, n, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", n);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE | PATH_CHECK_FATAL, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ free_and_replace(c->stdio_file[STDIN_FILENO], resolved);
+
+ ei = EXEC_INPUT_FILE;
+
+ } else {
+ ei = exec_input_from_string(rvalue);
+ if (ei < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse input specifier, ignoring: %s", rvalue);
+ return 0;
+ }
+ }
+
+ c->std_input = ei;
+ return 0;
+}
+
+int config_parse_exec_input_text(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *unescaped = NULL, *resolved = NULL;
+ ExecContext *c = data;
+ const Unit *u = userdata;
+ size_t sz;
+ void *p;
+ int r;
+
+ assert(data);
+ assert(filename);
+ assert(line);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ /* Reset if the empty string is assigned */
+ c->stdin_data = mfree(c->stdin_data);
+ c->stdin_data_size = 0;
+ return 0;
+ }
+
+ r = cunescape(rvalue, 0, &unescaped);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to decode C escaped text '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(u, unescaped, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in '%s', ignoring: %m", unescaped);
+ return 0;
+ }
+
+ sz = strlen(resolved);
+ if (c->stdin_data_size + sz + 1 < c->stdin_data_size || /* check for overflow */
+ c->stdin_data_size + sz + 1 > EXEC_STDIN_DATA_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Standard input data too large (%zu), maximum of %zu permitted, ignoring.",
+ c->stdin_data_size + sz, (size_t) EXEC_STDIN_DATA_MAX);
+ return 0;
+ }
+
+ p = realloc(c->stdin_data, c->stdin_data_size + sz + 1);
+ if (!p)
+ return log_oom();
+
+ *((char*) mempcpy((char*) p + c->stdin_data_size, resolved, sz)) = '\n';
+
+ c->stdin_data = p;
+ c->stdin_data_size += sz + 1;
+
+ return 0;
+}
+
+int config_parse_exec_input_data(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ void *p = NULL;
+ ExecContext *c = data;
+ size_t sz;
+ void *q;
+ int r;
+
+ assert(data);
+ assert(filename);
+ assert(line);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ /* Reset if the empty string is assigned */
+ c->stdin_data = mfree(c->stdin_data);
+ c->stdin_data_size = 0;
+ return 0;
+ }
+
+ r = unbase64mem(rvalue, (size_t) -1, &p, &sz);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to decode base64 data, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ assert(sz > 0);
+
+ if (c->stdin_data_size + sz < c->stdin_data_size || /* check for overflow */
+ c->stdin_data_size + sz > EXEC_STDIN_DATA_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Standard input data too large (%zu), maximum of %zu permitted, ignoring.",
+ c->stdin_data_size + sz, (size_t) EXEC_STDIN_DATA_MAX);
+ return 0;
+ }
+
+ q = realloc(c->stdin_data, c->stdin_data_size + sz);
+ if (!q)
+ return log_oom();
+
+ memcpy((uint8_t*) q + c->stdin_data_size, p, sz);
+
+ c->stdin_data = q;
+ c->stdin_data_size += sz;
+
+ return 0;
+}
+
+int config_parse_exec_output(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *resolved = NULL;
+ const char *n;
+ ExecContext *c = data;
+ const Unit *u = userdata;
+ bool obsolete = false;
+ ExecOutput eo;
+ int r;
+
+ assert(data);
+ assert(filename);
+ assert(line);
+ assert(lvalue);
+ assert(rvalue);
+
+ n = startswith(rvalue, "fd:");
+ if (n) {
+ r = unit_full_printf(u, n, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in %s: %m", n);
+ return 0;
+ }
+
+ if (isempty(resolved))
+ resolved = mfree(resolved);
+ else if (!fdname_is_valid(resolved)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid file descriptor name, ignoring: %s", resolved);
+ return 0;
+ }
+
+ eo = EXEC_OUTPUT_NAMED_FD;
+
+ } else if (streq(rvalue, "syslog")) {
+ eo = EXEC_OUTPUT_JOURNAL;
+ obsolete = true;
+
+ } else if (streq(rvalue, "syslog+console")) {
+ eo = EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
+ obsolete = true;
+
+ } else if ((n = startswith(rvalue, "file:"))) {
+
+ r = unit_full_printf(u, n, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", n);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE | PATH_CHECK_FATAL, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ eo = EXEC_OUTPUT_FILE;
+
+ } else if ((n = startswith(rvalue, "append:"))) {
+
+ r = unit_full_printf(u, n, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", n);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE | PATH_CHECK_FATAL, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ eo = EXEC_OUTPUT_FILE_APPEND;
+ } else {
+ eo = exec_output_from_string(rvalue);
+ if (eo < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse output specifier, ignoring: %s", rvalue);
+ return 0;
+ }
+ }
+
+ if (obsolete)
+ log_syntax(unit, LOG_NOTICE, filename, line, 0,
+ "Standard output type %s is obsolete, automatically updating to %s. Please update your unit file, and consider removing the setting altogether.",
+ rvalue, exec_output_to_string(eo));
+
+ if (streq(lvalue, "StandardOutput")) {
+ if (eo == EXEC_OUTPUT_NAMED_FD)
+ free_and_replace(c->stdio_fdname[STDOUT_FILENO], resolved);
+ else
+ free_and_replace(c->stdio_file[STDOUT_FILENO], resolved);
+
+ c->std_output = eo;
+
+ } else {
+ assert(streq(lvalue, "StandardError"));
+
+ if (eo == EXEC_OUTPUT_NAMED_FD)
+ free_and_replace(c->stdio_fdname[STDERR_FILENO], resolved);
+ else
+ free_and_replace(c->stdio_file[STDERR_FILENO], resolved);
+
+ c->std_error = eo;
+ }
+
+ return 0;
+}
+
+int config_parse_exec_io_class(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ int x;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ c->ioprio_set = false;
+ c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
+ return 0;
+ }
+
+ x = ioprio_class_from_string(rvalue);
+ if (x < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse IO scheduling class, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ c->ioprio = IOPRIO_PRIO_VALUE(x, IOPRIO_PRIO_DATA(c->ioprio));
+ c->ioprio_set = true;
+
+ return 0;
+}
+
+int config_parse_exec_io_priority(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ int i, r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ c->ioprio_set = false;
+ c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
+ return 0;
+ }
+
+ r = ioprio_parse_priority(rvalue, &i);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse IO priority, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_PRIO_CLASS(c->ioprio), i);
+ c->ioprio_set = true;
+
+ return 0;
+}
+
+int config_parse_exec_cpu_sched_policy(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ int x;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ c->cpu_sched_set = false;
+ c->cpu_sched_policy = SCHED_OTHER;
+ c->cpu_sched_priority = 0;
+ return 0;
+ }
+
+ x = sched_policy_from_string(rvalue);
+ if (x < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse CPU scheduling policy, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ c->cpu_sched_policy = x;
+ /* Moving to or from real-time policy? We need to adjust the priority */
+ c->cpu_sched_priority = CLAMP(c->cpu_sched_priority, sched_get_priority_min(x), sched_get_priority_max(x));
+ c->cpu_sched_set = true;
+
+ return 0;
+}
+
+int config_parse_exec_mount_apivfs(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ int k;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ c->mount_apivfs_set = false;
+ c->mount_apivfs = false;
+ return 0;
+ }
+
+ k = parse_boolean(rvalue);
+ if (k < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, k,
+ "Failed to parse boolean value, ignoring: %s",
+ rvalue);
+ return 0;
+ }
+
+ c->mount_apivfs_set = true;
+ c->mount_apivfs = k;
+ return 0;
+}
+
+int config_parse_numa_mask(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ int r;
+ NUMAPolicy *p = data;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(rvalue, "all")) {
+ r = numa_mask_add_all(&p->nodes);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to create NUMA mask representing \"all\" NUMA nodes, ignoring: %m");
+ } else {
+ r = parse_cpu_set_extend(rvalue, &p->nodes, true, unit, filename, line, lvalue);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse NUMA node mask, ignoring: %s", rvalue);
+ }
+
+ return 0;
+}
+
+int config_parse_exec_cpu_sched_prio(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ int i, min, max, r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atoi(rvalue, &i);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse CPU scheduling priority, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ /* On Linux RR/FIFO range from 1 to 99 and OTHER/BATCH may only be 0 */
+ min = sched_get_priority_min(c->cpu_sched_policy);
+ max = sched_get_priority_max(c->cpu_sched_policy);
+
+ if (i < min || i > max) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "CPU scheduling priority is out of range, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ c->cpu_sched_priority = i;
+ c->cpu_sched_set = true;
+
+ return 0;
+}
+
+int config_parse_root_image_options(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(mount_options_free_allp) MountOptions *options = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ char **first = NULL, **second = NULL;
+ ExecContext *c = data;
+ const Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ c->root_image_options = mount_options_free_all(c->root_image_options);
+ return 0;
+ }
+
+ r = strv_split_colon_pairs(&l, rvalue);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse %s, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ STRV_FOREACH_PAIR(first, second, l) {
+ MountOptions *o = NULL;
+ _cleanup_free_ char *mount_options_resolved = NULL;
+ const char *mount_options = NULL, *partition = "root";
+ PartitionDesignator partition_designator;
+
+ /* Format is either 'root:foo' or 'foo' (root is implied) */
+ if (!isempty(*second)) {
+ partition = *first;
+ mount_options = *second;
+ } else
+ mount_options = *first;
+
+ partition_designator = partition_designator_from_string(partition);
+ if (partition_designator < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid partition name %s, ignoring", partition);
+ continue;
+ }
+ r = unit_full_printf(u, mount_options, &mount_options_resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", mount_options);
+ continue;
+ }
+
+ o = new(MountOptions, 1);
+ if (!o)
+ return log_oom();
+ *o = (MountOptions) {
+ .partition_designator = partition_designator,
+ .options = TAKE_PTR(mount_options_resolved),
+ };
+ LIST_APPEND(mount_options, options, TAKE_PTR(o));
+ }
+
+ /* empty spaces/separators only */
+ if (LIST_IS_EMPTY(options))
+ c->root_image_options = mount_options_free_all(c->root_image_options);
+ else
+ LIST_JOIN(mount_options, c->root_image_options, options);
+
+ return 0;
+}
+
+int config_parse_exec_root_hash(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ void *roothash_decoded = NULL;
+ ExecContext *c = data;
+ size_t roothash_decoded_size = 0;
+ int r;
+
+ assert(data);
+ assert(filename);
+ assert(line);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ /* Reset if the empty string is assigned */
+ c->root_hash_path = mfree(c->root_hash_path);
+ c->root_hash = mfree(c->root_hash);
+ c->root_hash_size = 0;
+ return 0;
+ }
+
+ if (path_is_absolute(rvalue)) {
+ /* We have the path to a roothash to load and decode, eg: RootHash=/foo/bar.roothash */
+ _cleanup_free_ char *p = NULL;
+
+ p = strdup(rvalue);
+ if (!p)
+ return -ENOMEM;
+
+ free_and_replace(c->root_hash_path, p);
+ c->root_hash = mfree(c->root_hash);
+ c->root_hash_size = 0;
+ return 0;
+ }
+
+ /* We have a roothash to decode, eg: RootHash=012345789abcdef */
+ r = unhexmem(rvalue, strlen(rvalue), &roothash_decoded, &roothash_decoded_size);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to decode RootHash=, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (roothash_decoded_size < sizeof(sd_id128_t)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "RootHash= is too short, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ free_and_replace(c->root_hash, roothash_decoded);
+ c->root_hash_size = roothash_decoded_size;
+ c->root_hash_path = mfree(c->root_hash_path);
+
+ return 0;
+}
+
+int config_parse_exec_root_hash_sig(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ void *roothash_sig_decoded = NULL;
+ char *value;
+ ExecContext *c = data;
+ size_t roothash_sig_decoded_size = 0;
+ int r;
+
+ assert(data);
+ assert(filename);
+ assert(line);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ /* Reset if the empty string is assigned */
+ c->root_hash_sig_path = mfree(c->root_hash_sig_path);
+ c->root_hash_sig = mfree(c->root_hash_sig);
+ c->root_hash_sig_size = 0;
+ return 0;
+ }
+
+ if (path_is_absolute(rvalue)) {
+ /* We have the path to a roothash signature to load and decode, eg: RootHashSignature=/foo/bar.roothash.p7s */
+ _cleanup_free_ char *p = NULL;
+
+ p = strdup(rvalue);
+ if (!p)
+ return log_oom();
+
+ free_and_replace(c->root_hash_sig_path, p);
+ c->root_hash_sig = mfree(c->root_hash_sig);
+ c->root_hash_sig_size = 0;
+ return 0;
+ }
+
+ if (!(value = startswith(rvalue, "base64:"))) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to decode RootHashSignature=, not a path but doesn't start with 'base64:', ignoring: %s", rvalue);
+ return 0;
+ }
+
+ /* We have a roothash signature to decode, eg: RootHashSignature=base64:012345789abcdef */
+ r = unbase64mem(value, strlen(value), &roothash_sig_decoded, &roothash_sig_decoded_size);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to decode RootHashSignature=, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ free_and_replace(c->root_hash_sig, roothash_sig_decoded);
+ c->root_hash_sig_size = roothash_sig_decoded_size;
+ c->root_hash_sig_path = mfree(c->root_hash_sig_path);
+
+ return 0;
+}
+
+int config_parse_exec_cpu_affinity(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(rvalue, "numa")) {
+ c->cpu_affinity_from_numa = true;
+ cpu_set_reset(&c->cpu_set);
+
+ return 0;
+ }
+
+ r = parse_cpu_set_extend(rvalue, &c->cpu_set, true, unit, filename, line, lvalue);
+ if (r >= 0)
+ c->cpu_affinity_from_numa = false;
+
+ return r;
+}
+
+int config_parse_capability_set(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t *capability_set = data;
+ uint64_t sum = 0, initial = 0;
+ bool invert = false;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (rvalue[0] == '~') {
+ invert = true;
+ rvalue++;
+ }
+
+ if (streq(lvalue, "CapabilityBoundingSet"))
+ initial = CAP_ALL; /* initialized to all bits on */
+ /* else "AmbientCapabilities" initialized to all bits off */
+
+ r = capability_set_from_string(rvalue, &sum);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse %s= specifier '%s', ignoring: %m", lvalue, rvalue);
+ return 0;
+ }
+
+ if (sum == 0 || *capability_set == initial)
+ /* "", "~" or uninitialized data -> replace */
+ *capability_set = invert ? ~sum : sum;
+ else {
+ /* previous data -> merge */
+ if (invert)
+ *capability_set &= ~sum;
+ else
+ *capability_set |= sum;
+ }
+
+ return 0;
+}
+
+int config_parse_exec_selinux_context(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ const Unit *u = userdata;
+ bool ignore;
+ char *k;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ c->selinux_context = mfree(c->selinux_context);
+ c->selinux_context_ignore = false;
+ return 0;
+ }
+
+ if (rvalue[0] == '-') {
+ ignore = true;
+ rvalue++;
+ } else
+ ignore = false;
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, ignore ? LOG_WARNING : LOG_ERR, filename, line, r,
+ "Failed to resolve unit specifiers in '%s'%s: %m",
+ rvalue, ignore ? ", ignoring" : "");
+ return ignore ? 0 : -ENOEXEC;
+ }
+
+ free_and_replace(c->selinux_context, k);
+ c->selinux_context_ignore = ignore;
+
+ return 0;
+}
+
+int config_parse_exec_apparmor_profile(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ const Unit *u = userdata;
+ bool ignore;
+ char *k;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ c->apparmor_profile = mfree(c->apparmor_profile);
+ c->apparmor_profile_ignore = false;
+ return 0;
+ }
+
+ if (rvalue[0] == '-') {
+ ignore = true;
+ rvalue++;
+ } else
+ ignore = false;
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, ignore ? LOG_WARNING : LOG_ERR, filename, line, r,
+ "Failed to resolve unit specifiers in '%s'%s: %m",
+ rvalue, ignore ? ", ignoring" : "");
+ return ignore ? 0 : -ENOEXEC;
+ }
+
+ free_and_replace(c->apparmor_profile, k);
+ c->apparmor_profile_ignore = ignore;
+
+ return 0;
+}
+
+int config_parse_exec_smack_process_label(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ const Unit *u = userdata;
+ bool ignore;
+ char *k;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ c->smack_process_label = mfree(c->smack_process_label);
+ c->smack_process_label_ignore = false;
+ return 0;
+ }
+
+ if (rvalue[0] == '-') {
+ ignore = true;
+ rvalue++;
+ } else
+ ignore = false;
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, ignore ? LOG_WARNING : LOG_ERR, filename, line, r,
+ "Failed to resolve unit specifiers in '%s'%s: %m",
+ rvalue, ignore ? ", ignoring" : "");
+ return ignore ? 0 : -ENOEXEC;
+ }
+
+ free_and_replace(c->smack_process_label, k);
+ c->smack_process_label_ignore = ignore;
+
+ return 0;
+}
+
+int config_parse_timer(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(calendar_spec_freep) CalendarSpec *c = NULL;
+ _cleanup_free_ char *k = NULL;
+ const Unit *u = userdata;
+ Timer *t = data;
+ usec_t usec = 0;
+ TimerValue *v;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets list */
+ timer_free_values(t);
+ return 0;
+ }
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (ltype == TIMER_CALENDAR) {
+ r = calendar_spec_from_string(k, &c);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse calendar specification, ignoring: %s", k);
+ return 0;
+ }
+ } else {
+ r = parse_sec(k, &usec);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse timer value, ignoring: %s", k);
+ return 0;
+ }
+ }
+
+ v = new(TimerValue, 1);
+ if (!v)
+ return log_oom();
+
+ *v = (TimerValue) {
+ .base = ltype,
+ .value = usec,
+ .calendar_spec = TAKE_PTR(c),
+ };
+
+ LIST_PREPEND(value, t->values, v);
+
+ return 0;
+}
+
+int config_parse_trigger_unit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *p = NULL;
+ Unit *u = data;
+ UnitType type;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (!hashmap_isempty(u->dependencies[UNIT_TRIGGERS])) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Multiple units to trigger specified, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = unit_name_printf(u, rvalue, &p);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ type = unit_name_to_type(p);
+ if (type < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Unit type not valid, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (unit_has_name(u, p)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Units cannot trigger themselves, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = unit_add_two_dependencies_by_name(u, UNIT_BEFORE, UNIT_TRIGGERS, p, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to add trigger on %s, ignoring: %m", p);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_path_spec(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Path *p = data;
+ PathSpec *s;
+ PathType b;
+ _cleanup_free_ char *k = NULL;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment clears list */
+ path_free_specs(p);
+ return 0;
+ }
+
+ b = path_type_from_string(lvalue);
+ if (b < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse path type, ignoring: %s", lvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(UNIT(p), rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(k, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ s = new0(PathSpec, 1);
+ if (!s)
+ return log_oom();
+
+ s->unit = UNIT(p);
+ s->path = TAKE_PTR(k);
+ s->type = b;
+ s->inotify_fd = -1;
+
+ LIST_PREPEND(spec, p->specs, s);
+
+ return 0;
+}
+
+int config_parse_socket_service(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *p = NULL;
+ Socket *s = data;
+ Unit *x;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = unit_name_printf(UNIT(s), rvalue, &p);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (!endswith(p, ".service")) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Unit must be of type service, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = manager_load_unit(UNIT(s)->manager, p, NULL, &error, &x);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to load unit %s, ignoring: %s", rvalue, bus_error_message(&error, r));
+ return 0;
+ }
+
+ unit_ref_set(&s->service, UNIT(s), x);
+
+ return 0;
+}
+
+int config_parse_fdname(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *p = NULL;
+ Socket *s = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ s->fdname = mfree(s->fdname);
+ return 0;
+ }
+
+ r = unit_full_printf(UNIT(s), rvalue, &p);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (!fdname_is_valid(p)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid file descriptor name, ignoring: %s", p);
+ return 0;
+ }
+
+ return free_and_replace(s->fdname, p);
+}
+
+int config_parse_service_sockets(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Service *s = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Trailing garbage in sockets, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = unit_name_printf(UNIT(s), word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", word);
+ continue;
+ }
+
+ if (!endswith(k, ".socket")) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Unit must be of type socket, ignoring: %s", k);
+ continue;
+ }
+
+ r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_WANTS, UNIT_AFTER, k, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to add dependency on %s, ignoring: %m", k);
+
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_TRIGGERED_BY, k, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to add dependency on %s, ignoring: %m", k);
+ }
+}
+
+int config_parse_bus_name(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *k = NULL;
+ const Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (!sd_bus_service_name_is_valid(k)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid bus name, ignoring: %s", k);
+ return 0;
+ }
+
+ return config_parse_string(unit, filename, line, section, section_line, lvalue, ltype, k, data, userdata);
+}
+
+int config_parse_service_timeout(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Service *s = userdata;
+ usec_t usec;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(s);
+
+ /* This is called for two cases: TimeoutSec= and TimeoutStartSec=. */
+
+ /* Traditionally, these options accepted 0 to disable the timeouts. However, a timeout of 0 suggests it happens
+ * immediately, hence fix this to become USEC_INFINITY instead. This is in-line with how we internally handle
+ * all other timeouts. */
+ r = parse_sec_fix_0(rvalue, &usec);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse %s= parameter, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ s->start_timeout_defined = true;
+ s->timeout_start_usec = usec;
+
+ if (streq(lvalue, "TimeoutSec"))
+ s->timeout_stop_usec = usec;
+
+ return 0;
+}
+
+int config_parse_timeout_abort(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ usec_t *ret = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(ret);
+
+ /* Note: apart from setting the arg, this returns an extra bit of information in the return value. */
+
+ if (isempty(rvalue)) {
+ *ret = 0;
+ return 0; /* "not set" */
+ }
+
+ r = parse_sec(rvalue, ret);
+ if (r < 0)
+ return log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse %s= setting, ignoring: %s", lvalue, rvalue);
+
+ return 1; /* "set" */
+}
+
+int config_parse_service_timeout_abort(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Service *s = userdata;
+ int r;
+
+ assert(s);
+
+ r = config_parse_timeout_abort(unit, filename, line, section, section_line, lvalue, ltype, rvalue,
+ &s->timeout_abort_usec, s);
+ if (r >= 0)
+ s->timeout_abort_set = r;
+ return 0;
+}
+
+int config_parse_sec_fix_0(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ usec_t *usec = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(usec);
+
+ /* This is pretty much like config_parse_sec(), except that this treats a time of 0 as infinity, for
+ * compatibility with older versions of systemd where 0 instead of infinity was used as indicator to turn off a
+ * timeout. */
+
+ r = parse_sec_fix_0(rvalue, usec);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse %s= parameter, ignoring: %s", lvalue, rvalue);
+
+ return 0;
+}
+
+int config_parse_user_group_compat(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *k = NULL;
+ char **user = data;
+ const Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue)) {
+ *user = mfree(*user);
+ return 0;
+ }
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s: %m", rvalue);
+ return -ENOEXEC;
+ }
+
+ if (!valid_user_group_name(k, VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX|VALID_USER_WARN)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid user/group name or numeric ID: %s", k);
+ return -ENOEXEC;
+ }
+
+ if (strstr(lvalue, "User") && streq(k, NOBODY_USER_NAME))
+ log_struct(LOG_NOTICE,
+ "MESSAGE=%s:%u: Special user %s configured, this is not safe!", filename, line, k,
+ "UNIT=%s", unit,
+ "MESSAGE_ID=" SD_MESSAGE_NOBODY_USER_UNSUITABLE_STR,
+ "OFFENDING_USER=%s", k,
+ "CONFIG_FILE=%s", filename,
+ "CONFIG_LINE=%u", line);
+
+ return free_and_replace(*user, k);
+}
+
+int config_parse_user_group_strv_compat(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char ***users = data;
+ const Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue)) {
+ *users = strv_free(*users);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid syntax: %s", rvalue);
+ return -ENOEXEC;
+ }
+
+ r = unit_full_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s: %m", word);
+ return -ENOEXEC;
+ }
+
+ if (!valid_user_group_name(k, VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX|VALID_USER_WARN)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid user/group name or numeric ID: %s", k);
+ return -ENOEXEC;
+ }
+
+ r = strv_push(users, k);
+ if (r < 0)
+ return log_oom();
+
+ k = NULL;
+ }
+}
+
+int config_parse_working_directory(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ const Unit *u = userdata;
+ bool missing_ok;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(c);
+ assert(u);
+
+ if (isempty(rvalue)) {
+ c->working_directory_home = false;
+ c->working_directory = mfree(c->working_directory);
+ return 0;
+ }
+
+ if (rvalue[0] == '-') {
+ missing_ok = true;
+ rvalue++;
+ } else
+ missing_ok = false;
+
+ if (streq(rvalue, "~")) {
+ c->working_directory_home = true;
+ c->working_directory = mfree(c->working_directory);
+ } else {
+ _cleanup_free_ char *k = NULL;
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, missing_ok ? LOG_WARNING : LOG_ERR, filename, line, r,
+ "Failed to resolve unit specifiers in working directory path '%s'%s: %m",
+ rvalue, missing_ok ? ", ignoring" : "");
+ return missing_ok ? 0 : -ENOEXEC;
+ }
+
+ r = path_simplify_and_warn(k, PATH_CHECK_ABSOLUTE | (missing_ok ? 0 : PATH_CHECK_FATAL), unit, filename, line, lvalue);
+ if (r < 0)
+ return missing_ok ? 0 : -ENOEXEC;
+
+ c->working_directory_home = false;
+ free_and_replace(c->working_directory, k);
+ }
+
+ c->working_directory_missing_ok = missing_ok;
+ return 0;
+}
+
+int config_parse_unit_env_file(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char ***env = data;
+ const Unit *u = userdata;
+ _cleanup_free_ char *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment frees the list */
+ *env = strv_free(*env);
+ return 0;
+ }
+
+ r = unit_full_printf(u, rvalue, &n);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(n[0] == '-' ? n + 1 : n, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ r = strv_push(env, n);
+ if (r < 0)
+ return log_oom();
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_environ(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ const Unit *u = userdata;
+ char ***env = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ *env = strv_free(*env);
+ return 0;
+ }
+
+ for (const char *p = rvalue;; ) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_CUNESCAPE|EXTRACT_UNQUOTE);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (u) {
+ r = unit_full_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in %s, ignoring: %m", word);
+ continue;
+ }
+ } else
+ k = TAKE_PTR(word);
+
+ if (!env_assignment_is_valid(k)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid environment assignment, ignoring: %s", k);
+ continue;
+ }
+
+ r = strv_env_replace(env, k);
+ if (r < 0)
+ return log_oom();
+
+ k = NULL;
+ }
+}
+
+int config_parse_pass_environ(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_strv_free_ char **n = NULL;
+ size_t nlen = 0, nbufsize = 0;
+ char*** passenv = data;
+ const Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ *passenv = strv_free(*passenv);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Trailing garbage in %s, ignoring: %s", lvalue, rvalue);
+ break;
+ }
+
+ if (u) {
+ r = unit_full_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve specifiers in %s, ignoring: %m", word);
+ continue;
+ }
+ } else
+ k = TAKE_PTR(word);
+
+ if (!env_name_is_valid(k)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid environment name for %s, ignoring: %s", lvalue, k);
+ continue;
+ }
+
+ if (!GREEDY_REALLOC(n, nbufsize, nlen + 2))
+ return log_oom();
+
+ n[nlen++] = TAKE_PTR(k);
+ n[nlen] = NULL;
+ }
+
+ if (n) {
+ r = strv_extend_strv(passenv, n, true);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int config_parse_unset_environ(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_strv_free_ char **n = NULL;
+ size_t nlen = 0, nbufsize = 0;
+ char*** unsetenv = data;
+ const Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ *unsetenv = strv_free(*unsetenv);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_CUNESCAPE|EXTRACT_UNQUOTE);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Trailing garbage in %s, ignoring: %s", lvalue, rvalue);
+ break;
+ }
+
+ if (u) {
+ r = unit_full_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in %s, ignoring: %m", word);
+ continue;
+ }
+ } else
+ k = TAKE_PTR(word);
+
+ if (!env_assignment_is_valid(k) && !env_name_is_valid(k)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid environment name or assignment %s, ignoring: %s", lvalue, k);
+ continue;
+ }
+
+ if (!GREEDY_REALLOC(n, nbufsize, nlen + 2))
+ return log_oom();
+
+ n[nlen++] = TAKE_PTR(k);
+ n[nlen] = NULL;
+ }
+
+ if (n) {
+ r = strv_extend_strv(unsetenv, n, true);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int config_parse_log_extra_fields(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ const Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(c);
+
+ if (isempty(rvalue)) {
+ exec_context_free_log_extra_fields(c);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+ struct iovec *t;
+ const char *eq;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_CUNESCAPE|EXTRACT_UNQUOTE);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", word);
+ continue;
+ }
+
+ eq = strchr(k, '=');
+ if (!eq) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Log field lacks '=' character, ignoring: %s", k);
+ continue;
+ }
+
+ if (!journal_field_valid(k, eq-k, false)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Log field name is invalid, ignoring: %s", k);
+ continue;
+ }
+
+ t = reallocarray(c->log_extra_fields, c->n_log_extra_fields+1, sizeof(struct iovec));
+ if (!t)
+ return log_oom();
+
+ c->log_extra_fields = t;
+ c->log_extra_fields[c->n_log_extra_fields++] = IOVEC_MAKE_STRING(k);
+
+ k = NULL;
+ }
+}
+
+int config_parse_log_namespace(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *k = NULL;
+ ExecContext *c = data;
+ const Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(c);
+
+ if (isempty(rvalue)) {
+ c->log_namespace = mfree(c->log_namespace);
+ return 0;
+ }
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (!log_namespace_name_valid(k)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Specified log namespace name is not valid, ignoring: %s", k);
+ return 0;
+ }
+
+ free_and_replace(c->log_namespace, k);
+ return 0;
+}
+
+int config_parse_unit_condition_path(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *p = NULL;
+ Condition **list = data, *c;
+ ConditionType t = ltype;
+ bool trigger, negate;
+ const Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ *list = condition_free_list(*list);
+ return 0;
+ }
+
+ trigger = rvalue[0] == '|';
+ if (trigger)
+ rvalue++;
+
+ negate = rvalue[0] == '!';
+ if (negate)
+ rvalue++;
+
+ r = unit_full_printf(u, rvalue, &p);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(p, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ c = condition_new(t, p, trigger, negate);
+ if (!c)
+ return log_oom();
+
+ LIST_PREPEND(conditions, *list, c);
+ return 0;
+}
+
+int config_parse_unit_condition_string(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *s = NULL;
+ Condition **list = data, *c;
+ ConditionType t = ltype;
+ bool trigger, negate;
+ const Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ *list = condition_free_list(*list);
+ return 0;
+ }
+
+ trigger = *rvalue == '|';
+ if (trigger)
+ rvalue += 1 + strspn(rvalue + 1, WHITESPACE);
+
+ negate = *rvalue == '!';
+ if (negate)
+ rvalue += 1 + strspn(rvalue + 1, WHITESPACE);
+
+ r = unit_full_printf(u, rvalue, &s);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ c = condition_new(t, s, trigger, negate);
+ if (!c)
+ return log_oom();
+
+ LIST_PREPEND(conditions, *list, c);
+ return 0;
+}
+
+int config_parse_unit_requires_mounts_for(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL, *resolved = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(u, word, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", word);
+ continue;
+ }
+
+ r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ continue;
+
+ r = unit_require_mounts_for(u, resolved, UNIT_DEPENDENCY_FILE);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to add required mount '%s', ignoring: %m", resolved);
+ continue;
+ }
+ }
+}
+
+int config_parse_documentation(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Unit *u = userdata;
+ int r;
+ char **a, **b;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ u->documentation = strv_free(u->documentation);
+ return 0;
+ }
+
+ r = config_parse_unit_strv_printf(unit, filename, line, section, section_line, lvalue, ltype,
+ rvalue, data, userdata);
+ if (r < 0)
+ return r;
+
+ for (a = b = u->documentation; a && *a; a++) {
+
+ if (documentation_url_is_valid(*a))
+ *(b++) = *a;
+ else {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid URL, ignoring: %s", *a);
+ free(*a);
+ }
+ }
+ if (b)
+ *b = NULL;
+
+ return r;
+}
+
+#if HAVE_SECCOMP
+int config_parse_syscall_filter(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ _unused_ const Unit *u = userdata;
+ bool invert = false;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ c->syscall_filter = hashmap_free(c->syscall_filter);
+ c->syscall_allow_list = false;
+ return 0;
+ }
+
+ if (rvalue[0] == '~') {
+ invert = true;
+ rvalue++;
+ }
+
+ if (!c->syscall_filter) {
+ c->syscall_filter = hashmap_new(NULL);
+ if (!c->syscall_filter)
+ return log_oom();
+
+ if (invert)
+ /* Allow everything but the ones listed */
+ c->syscall_allow_list = false;
+ else {
+ /* Allow nothing but the ones listed */
+ c->syscall_allow_list = true;
+
+ /* Accept default syscalls if we are on a allow_list */
+ r = seccomp_parse_syscall_filter(
+ "@default", -1, c->syscall_filter,
+ SECCOMP_PARSE_PERMISSIVE|SECCOMP_PARSE_ALLOW_LIST,
+ unit,
+ NULL, 0);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL, *name = NULL;
+ int num;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = parse_syscall_and_errno(word, &name, &num);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse syscall:errno, ignoring: %s", word);
+ continue;
+ }
+
+ r = seccomp_parse_syscall_filter(
+ name, num, c->syscall_filter,
+ SECCOMP_PARSE_LOG|SECCOMP_PARSE_PERMISSIVE|
+ (invert ? SECCOMP_PARSE_INVERT : 0)|
+ (c->syscall_allow_list ? SECCOMP_PARSE_ALLOW_LIST : 0),
+ unit, filename, line);
+ if (r < 0)
+ return r;
+ }
+}
+
+int config_parse_syscall_log(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ _unused_ const Unit *u = userdata;
+ bool invert = false;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ c->syscall_log = hashmap_free(c->syscall_log);
+ c->syscall_log_allow_list = false;
+ return 0;
+ }
+
+ if (rvalue[0] == '~') {
+ invert = true;
+ rvalue++;
+ }
+
+ if (!c->syscall_log) {
+ c->syscall_log = hashmap_new(NULL);
+ if (!c->syscall_log)
+ return log_oom();
+
+ if (invert)
+ /* Log everything but the ones listed */
+ c->syscall_log_allow_list = false;
+ else
+ /* Log nothing but the ones listed */
+ c->syscall_log_allow_list = true;
+ }
+
+ p = rvalue;
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *name = NULL;
+ int num;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = parse_syscall_and_errno(word, &name, &num);
+ if (r < 0 || num >= 0) { /* errno code not allowed */
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse syscall, ignoring: %s", word);
+ continue;
+ }
+
+ r = seccomp_parse_syscall_filter(
+ name, 0, c->syscall_log,
+ SECCOMP_PARSE_LOG|SECCOMP_PARSE_PERMISSIVE|
+ (invert ? SECCOMP_PARSE_INVERT : 0)|
+ (c->syscall_log_allow_list ? SECCOMP_PARSE_ALLOW_LIST : 0),
+ unit, filename, line);
+ if (r < 0)
+ return r;
+ }
+}
+
+int config_parse_syscall_archs(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Set **archs = data;
+ int r;
+
+ if (isempty(rvalue)) {
+ *archs = set_free(*archs);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL;
+ uint32_t a;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = seccomp_arch_from_string(word, &a);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse system call architecture \"%s\", ignoring: %m", word);
+ continue;
+ }
+
+ r = set_ensure_put(archs, NULL, UINT32_TO_PTR(a + 1));
+ if (r < 0)
+ return log_oom();
+ }
+}
+
+int config_parse_syscall_errno(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ int e;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue) || streq(rvalue, "kill")) {
+ /* Empty assignment resets to KILL */
+ c->syscall_errno = SECCOMP_ERROR_NUMBER_KILL;
+ return 0;
+ }
+
+ e = parse_errno(rvalue);
+ if (e <= 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse error number, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ c->syscall_errno = e;
+ return 0;
+}
+
+int config_parse_address_families(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ bool invert = false;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ c->address_families = set_free(c->address_families);
+ c->address_families_allow_list = false;
+ return 0;
+ }
+
+ if (rvalue[0] == '~') {
+ invert = true;
+ rvalue++;
+ }
+
+ if (!c->address_families) {
+ c->address_families = set_new(NULL);
+ if (!c->address_families)
+ return log_oom();
+
+ c->address_families_allow_list = !invert;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL;
+ int af;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ af = af_from_name(word);
+ if (af < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, af,
+ "Failed to parse address family, ignoring: %s", word);
+ continue;
+ }
+
+ /* If we previously wanted to forbid an address family and now
+ * we want to allow it, then just remove it from the list.
+ */
+ if (!invert == c->address_families_allow_list) {
+ r = set_put(c->address_families, INT_TO_PTR(af));
+ if (r < 0)
+ return log_oom();
+ } else
+ set_remove(c->address_families, INT_TO_PTR(af));
+ }
+}
+
+int config_parse_restrict_namespaces(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ unsigned long flags;
+ bool invert = false;
+ int r;
+
+ if (isempty(rvalue)) {
+ /* Reset to the default. */
+ c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
+ return 0;
+ }
+
+ /* Boolean parameter ignores the previous settings */
+ r = parse_boolean(rvalue);
+ if (r > 0) {
+ c->restrict_namespaces = 0;
+ return 0;
+ } else if (r == 0) {
+ c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
+ return 0;
+ }
+
+ if (rvalue[0] == '~') {
+ invert = true;
+ rvalue++;
+ }
+
+ /* Not a boolean argument, in this case it's a list of namespace types. */
+ r = namespace_flags_from_string(rvalue, &flags);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse namespace type string, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (c->restrict_namespaces == NAMESPACE_FLAGS_INITIAL)
+ /* Initial assignment. Just set the value. */
+ c->restrict_namespaces = invert ? (~flags) & NAMESPACE_FLAGS_ALL : flags;
+ else
+ /* Merge the value with the previous one. */
+ SET_FLAG(c->restrict_namespaces, flags, !invert);
+
+ return 0;
+}
+#endif
+
+int config_parse_unit_slice(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *k = NULL;
+ Unit *u = userdata, *slice;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ r = unit_name_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ r = manager_load_unit(u->manager, k, NULL, &error, &slice);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to load slice unit %s, ignoring: %s", k, bus_error_message(&error, r));
+ return 0;
+ }
+
+ r = unit_set_slice(u, slice);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to assign slice %s to unit %s, ignoring: %m", slice->id, u->id);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_cpu_quota(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ CGroupContext *c = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ c->cpu_quota_per_sec_usec = USEC_INFINITY;
+ return 0;
+ }
+
+ r = parse_permille_unbounded(rvalue);
+ if (r <= 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid CPU quota '%s', ignoring.", rvalue);
+ return 0;
+ }
+
+ c->cpu_quota_per_sec_usec = ((usec_t) r * USEC_PER_SEC) / 1000U;
+ return 0;
+}
+
+int config_parse_allowed_cpus(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ CGroupContext *c = data;
+
+ (void) parse_cpu_set_extend(rvalue, &c->cpuset_cpus, true, unit, filename, line, lvalue);
+
+ return 0;
+}
+
+int config_parse_allowed_mems(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ CGroupContext *c = data;
+
+ (void) parse_cpu_set_extend(rvalue, &c->cpuset_mems, true, unit, filename, line, lvalue);
+
+ return 0;
+}
+
+int config_parse_memory_limit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ CGroupContext *c = data;
+ uint64_t bytes = CGROUP_LIMIT_MAX;
+ int r;
+
+ if (isempty(rvalue) && STR_IN_SET(lvalue, "DefaultMemoryLow",
+ "DefaultMemoryMin",
+ "MemoryLow",
+ "MemoryMin"))
+ bytes = CGROUP_LIMIT_MIN;
+ else if (!isempty(rvalue) && !streq(rvalue, "infinity")) {
+
+ r = parse_permille(rvalue);
+ if (r < 0) {
+ r = parse_size(rvalue, 1024, &bytes);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid memory limit '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+ } else
+ bytes = physical_memory_scale(r, 1000U);
+
+ if (bytes >= UINT64_MAX ||
+ (bytes <= 0 && !STR_IN_SET(lvalue, "MemorySwapMax", "MemoryLow", "MemoryMin", "DefaultMemoryLow", "DefaultMemoryMin"))) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Memory limit '%s' out of range, ignoring.", rvalue);
+ return 0;
+ }
+ }
+
+ if (streq(lvalue, "DefaultMemoryLow")) {
+ c->default_memory_low = bytes;
+ c->default_memory_low_set = true;
+ } else if (streq(lvalue, "DefaultMemoryMin")) {
+ c->default_memory_min = bytes;
+ c->default_memory_min_set = true;
+ } else if (streq(lvalue, "MemoryMin")) {
+ c->memory_min = bytes;
+ c->memory_min_set = true;
+ } else if (streq(lvalue, "MemoryLow")) {
+ c->memory_low = bytes;
+ c->memory_low_set = true;
+ } else if (streq(lvalue, "MemoryHigh"))
+ c->memory_high = bytes;
+ else if (streq(lvalue, "MemoryMax"))
+ c->memory_max = bytes;
+ else if (streq(lvalue, "MemorySwapMax"))
+ c->memory_swap_max = bytes;
+ else if (streq(lvalue, "MemoryLimit"))
+ c->memory_limit = bytes;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+int config_parse_tasks_max(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ const Unit *u = userdata;
+ TasksMax *tasks_max = data;
+ uint64_t v;
+ int r;
+
+ if (isempty(rvalue)) {
+ *tasks_max = u ? u->manager->default_tasks_max : TASKS_MAX_UNSET;
+ return 0;
+ }
+
+ if (streq(rvalue, "infinity")) {
+ *tasks_max = TASKS_MAX_UNSET;
+ return 0;
+ }
+
+ r = parse_permille(rvalue);
+ if (r >= 0)
+ *tasks_max = (TasksMax) { r, 1000U }; /* r‰ */
+ else {
+ r = safe_atou64(rvalue, &v);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid maximum tasks value '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (v <= 0 || v >= UINT64_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Maximum tasks value '%s' out of range, ignoring.", rvalue);
+ return 0;
+ }
+
+ *tasks_max = (TasksMax) { v };
+ }
+
+ return 0;
+}
+
+int config_parse_delegate(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ CGroupContext *c = data;
+ UnitType t;
+ int r;
+
+ t = unit_name_to_type(unit);
+ assert(t != _UNIT_TYPE_INVALID);
+
+ if (!unit_vtable[t]->can_delegate) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Delegate= setting not supported for this unit type, ignoring.");
+ return 0;
+ }
+
+ /* We either accept a boolean value, which may be used to turn on delegation for all controllers, or turn it
+ * off for all. Or it takes a list of controller names, in which case we add the specified controllers to the
+ * mask to delegate. */
+
+ if (isempty(rvalue)) {
+ /* An empty string resets controllers and set Delegate=yes. */
+ c->delegate = true;
+ c->delegate_controllers = 0;
+ return 0;
+ }
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ CGroupMask mask = 0;
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL;
+ CGroupController cc;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ cc = cgroup_controller_from_string(word);
+ if (cc < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid controller name '%s', ignoring", word);
+ continue;
+ }
+
+ mask |= CGROUP_CONTROLLER_TO_MASK(cc);
+ }
+
+ c->delegate = true;
+ c->delegate_controllers |= mask;
+
+ } else if (r > 0) {
+ c->delegate = true;
+ c->delegate_controllers = _CGROUP_MASK_ALL;
+ } else {
+ c->delegate = false;
+ c->delegate_controllers = 0;
+ }
+
+ return 0;
+}
+
+int config_parse_managed_oom_mode(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ ManagedOOMMode *mode = data, m;
+ UnitType t;
+
+ t = unit_name_to_type(unit);
+ assert(t != _UNIT_TYPE_INVALID);
+
+ if (!unit_vtable[t]->can_set_managed_oom)
+ return log_syntax(unit, LOG_WARNING, filename, line, 0, "%s= is not supported for this unit type, ignoring.", lvalue);
+
+ if (isempty(rvalue)) {
+ *mode = MANAGED_OOM_AUTO;
+ return 0;
+ }
+
+ m = managed_oom_mode_from_string(rvalue);
+ if (m < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+ *mode = m;
+ return 0;
+}
+
+int config_parse_managed_oom_mem_pressure_limit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ int *limit = data;
+ UnitType t;
+ int r;
+
+ t = unit_name_to_type(unit);
+ assert(t != _UNIT_TYPE_INVALID);
+
+ if (!unit_vtable[t]->can_set_managed_oom)
+ return log_syntax(unit, LOG_WARNING, filename, line, 0, "%s= is not supported for this unit type, ignoring.", lvalue);
+
+ if (isempty(rvalue)) {
+ *limit = 0;
+ return 0;
+ }
+
+ r = parse_percent(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse limit percent value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *limit = r;
+ return 0;
+}
+
+int config_parse_device_allow(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *path = NULL, *resolved = NULL;
+ CGroupContext *c = data;
+ const char *p = rvalue;
+ int r;
+
+ if (isempty(rvalue)) {
+ while (c->device_allow)
+ cgroup_context_free_device_allow(c, c->device_allow);
+
+ return 0;
+ }
+
+ r = extract_first_word(&p, &path, NULL, EXTRACT_UNQUOTE);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to extract device path and rights from '%s', ignoring.", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(userdata, path, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in '%s', ignoring: %m", path);
+ return 0;
+ }
+
+ if (!STARTSWITH_SET(resolved, "block-", "char-")) {
+
+ r = path_simplify_and_warn(resolved, 0, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ if (!valid_device_node_path(resolved)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid device node path '%s', ignoring.", resolved);
+ return 0;
+ }
+ }
+
+ if (!isempty(p) && !in_charset(p, "rwm")) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid device rights '%s', ignoring.", p);
+ return 0;
+ }
+
+ return cgroup_add_device_allow(c, resolved, p);
+}
+
+int config_parse_io_device_weight(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *path = NULL, *resolved = NULL;
+ CGroupIODeviceWeight *w;
+ CGroupContext *c = data;
+ const char *p = rvalue;
+ uint64_t u;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ while (c->io_device_weights)
+ cgroup_context_free_io_device_weight(c, c->io_device_weights);
+
+ return 0;
+ }
+
+ r = extract_first_word(&p, &path, NULL, EXTRACT_UNQUOTE);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0 || isempty(p)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to extract device path and weight from '%s', ignoring.", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(userdata, path, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in '%s', ignoring: %m", path);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(resolved, 0, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ r = cg_weight_parse(p, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "IO weight '%s' invalid, ignoring: %m", p);
+ return 0;
+ }
+
+ assert(u != CGROUP_WEIGHT_INVALID);
+
+ w = new0(CGroupIODeviceWeight, 1);
+ if (!w)
+ return log_oom();
+
+ w->path = TAKE_PTR(resolved);
+ w->weight = u;
+
+ LIST_PREPEND(device_weights, c->io_device_weights, w);
+ return 0;
+}
+
+int config_parse_io_device_latency(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *path = NULL, *resolved = NULL;
+ CGroupIODeviceLatency *l;
+ CGroupContext *c = data;
+ const char *p = rvalue;
+ usec_t usec;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ while (c->io_device_latencies)
+ cgroup_context_free_io_device_latency(c, c->io_device_latencies);
+
+ return 0;
+ }
+
+ r = extract_first_word(&p, &path, NULL, EXTRACT_UNQUOTE);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0 || isempty(p)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to extract device path and latency from '%s', ignoring.", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(userdata, path, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in '%s', ignoring: %m", path);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(resolved, 0, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ r = parse_sec(p, &usec);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse timer value, ignoring: %s", p);
+ return 0;
+ }
+
+ l = new0(CGroupIODeviceLatency, 1);
+ if (!l)
+ return log_oom();
+
+ l->path = TAKE_PTR(resolved);
+ l->target_usec = usec;
+
+ LIST_PREPEND(device_latencies, c->io_device_latencies, l);
+ return 0;
+}
+
+int config_parse_io_limit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *path = NULL, *resolved = NULL;
+ CGroupIODeviceLimit *l = NULL, *t;
+ CGroupContext *c = data;
+ CGroupIOLimitType type;
+ const char *p = rvalue;
+ uint64_t num;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ type = cgroup_io_limit_type_from_string(lvalue);
+ assert(type >= 0);
+
+ if (isempty(rvalue)) {
+ LIST_FOREACH(device_limits, l, c->io_device_limits)
+ l->limits[type] = cgroup_io_limit_defaults[type];
+ return 0;
+ }
+
+ r = extract_first_word(&p, &path, NULL, EXTRACT_UNQUOTE);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0 || isempty(p)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to extract device node and bandwidth from '%s', ignoring.", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(userdata, path, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in '%s', ignoring: %m", path);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(resolved, 0, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ if (streq("infinity", p))
+ num = CGROUP_LIMIT_MAX;
+ else {
+ r = parse_size(p, 1000, &num);
+ if (r < 0 || num <= 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid IO limit '%s', ignoring.", p);
+ return 0;
+ }
+ }
+
+ LIST_FOREACH(device_limits, t, c->io_device_limits) {
+ if (path_equal(resolved, t->path)) {
+ l = t;
+ break;
+ }
+ }
+
+ if (!l) {
+ CGroupIOLimitType ttype;
+
+ l = new0(CGroupIODeviceLimit, 1);
+ if (!l)
+ return log_oom();
+
+ l->path = TAKE_PTR(resolved);
+ for (ttype = 0; ttype < _CGROUP_IO_LIMIT_TYPE_MAX; ttype++)
+ l->limits[ttype] = cgroup_io_limit_defaults[ttype];
+
+ LIST_PREPEND(device_limits, c->io_device_limits, l);
+ }
+
+ l->limits[type] = num;
+
+ return 0;
+}
+
+int config_parse_blockio_device_weight(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *path = NULL, *resolved = NULL;
+ CGroupBlockIODeviceWeight *w;
+ CGroupContext *c = data;
+ const char *p = rvalue;
+ uint64_t u;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ while (c->blockio_device_weights)
+ cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
+
+ return 0;
+ }
+
+ r = extract_first_word(&p, &path, NULL, EXTRACT_UNQUOTE);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0 || isempty(p)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to extract device node and weight from '%s', ignoring.", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(userdata, path, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in '%s', ignoring: %m", path);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(resolved, 0, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ r = cg_blkio_weight_parse(p, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid block IO weight '%s', ignoring: %m", p);
+ return 0;
+ }
+
+ assert(u != CGROUP_BLKIO_WEIGHT_INVALID);
+
+ w = new0(CGroupBlockIODeviceWeight, 1);
+ if (!w)
+ return log_oom();
+
+ w->path = TAKE_PTR(resolved);
+ w->weight = u;
+
+ LIST_PREPEND(device_weights, c->blockio_device_weights, w);
+ return 0;
+}
+
+int config_parse_blockio_bandwidth(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *path = NULL, *resolved = NULL;
+ CGroupBlockIODeviceBandwidth *b = NULL, *t;
+ CGroupContext *c = data;
+ const char *p = rvalue;
+ uint64_t bytes;
+ bool read;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ read = streq("BlockIOReadBandwidth", lvalue);
+
+ if (isempty(rvalue)) {
+ LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
+ b->rbps = CGROUP_LIMIT_MAX;
+ b->wbps = CGROUP_LIMIT_MAX;
+ }
+ return 0;
+ }
+
+ r = extract_first_word(&p, &path, NULL, EXTRACT_UNQUOTE);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0 || isempty(p)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to extract device node and bandwidth from '%s', ignoring.", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(userdata, path, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in '%s', ignoring: %m", path);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(resolved, 0, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ r = parse_size(p, 1000, &bytes);
+ if (r < 0 || bytes <= 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid Block IO Bandwidth '%s', ignoring.", p);
+ return 0;
+ }
+
+ LIST_FOREACH(device_bandwidths, t, c->blockio_device_bandwidths) {
+ if (path_equal(resolved, t->path)) {
+ b = t;
+ break;
+ }
+ }
+
+ if (!t) {
+ b = new0(CGroupBlockIODeviceBandwidth, 1);
+ if (!b)
+ return log_oom();
+
+ b->path = TAKE_PTR(resolved);
+ b->rbps = CGROUP_LIMIT_MAX;
+ b->wbps = CGROUP_LIMIT_MAX;
+
+ LIST_PREPEND(device_bandwidths, c->blockio_device_bandwidths, b);
+ }
+
+ if (read)
+ b->rbps = bytes;
+ else
+ b->wbps = bytes;
+
+ return 0;
+}
+
+int config_parse_job_mode_isolate(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ JobMode *m = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse boolean, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ log_notice("%s is deprecated. Please use OnFailureJobMode= instead", lvalue);
+
+ *m = r ? JOB_ISOLATE : JOB_REPLACE;
+ return 0;
+}
+
+int config_parse_exec_directories(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char***rt = data;
+ const Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ *rt = strv_free(*rt);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ r = unit_full_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in \"%s\", ignoring: %m", word);
+ continue;
+ }
+
+ r = path_simplify_and_warn(k, PATH_CHECK_RELATIVE, unit, filename, line, lvalue);
+ if (r < 0)
+ continue;
+
+ if (path_startswith(k, "private")) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "%s= path can't be 'private', ignoring assignment: %s", lvalue, word);
+ continue;
+ }
+
+ r = strv_push(rt, k);
+ if (r < 0)
+ return log_oom();
+ k = NULL;
+ }
+}
+
+int config_parse_set_credential(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *word = NULL, *k = NULL, *unescaped = NULL;
+ ExecContext *context = data;
+ ExecSetCredential *old;
+ Unit *u = userdata;
+ const char *p;
+ int r, l;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(context);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ context->set_credentials = hashmap_free(context->set_credentials);
+ return 0;
+ }
+
+ p = rvalue;
+ r = extract_first_word(&p, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r <= 0 || !p) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in \"%s\", ignoring: %m", word);
+ return 0;
+ }
+ if (!credential_name_valid(k)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Credential name \"%s\" not valid, ignoring.", k);
+ return 0;
+ }
+
+ /* We support escape codes here, so that users can insert trailing \n if they like */
+ l = cunescape(p, UNESCAPE_ACCEPT_NUL, &unescaped);
+ if (l < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, l, "Can't unescape \"%s\", ignoring: %m", p);
+ return 0;
+ }
+
+ old = hashmap_get(context->set_credentials, k);
+ if (old) {
+ free_and_replace(old->data, unescaped);
+ old->size = l;
+ } else {
+ _cleanup_(exec_set_credential_freep) ExecSetCredential *sc = NULL;
+
+ sc = new0(ExecSetCredential, 1);
+ if (!sc)
+ return log_oom();
+
+ sc->id = TAKE_PTR(k);
+ sc->data = TAKE_PTR(unescaped);
+ sc->size = l;
+
+ r = hashmap_ensure_allocated(&context->set_credentials, &exec_set_credential_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(context->set_credentials, sc->id, sc);
+ if (r < 0)
+ return log_oom();
+
+ TAKE_PTR(sc);
+ }
+
+ return 0;
+}
+
+int config_parse_load_credential(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *word = NULL, *k = NULL, *q = NULL;
+ ExecContext *context = data;
+ Unit *u = userdata;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(context);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ context->load_credentials = strv_free(context->load_credentials);
+ return 0;
+ }
+
+ p = rvalue;
+ r = extract_first_word(&p, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r <= 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in \"%s\", ignoring: %m", word);
+ return 0;
+ }
+ if (!credential_name_valid(k)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Credential name \"%s\" not valid, ignoring.", k);
+ return 0;
+ }
+ r = unit_full_printf(u, p, &q);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in \"%s\", ignoring: %m", p);
+ return 0;
+ }
+ if (path_is_absolute(q) ? !path_is_normalized(q) : !credential_name_valid(q)) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Credential source \"%s\" not valid, ignoring.", q);
+ return 0;
+ }
+
+ r = strv_consume_pair(&context->load_credentials, TAKE_PTR(k), TAKE_PTR(q));
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+int config_parse_set_status(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExitStatusSet *status_set = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(status_set);
+
+ /* Empty assignment resets the list */
+ if (isempty(rvalue)) {
+ exit_status_set_free(status_set);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL;
+ Bitmap *bitmap;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse %s=%s, ignoring: %m", lvalue, rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ /* We need to call exit_status_from_string() first, because we want
+ * to parse numbers as exit statuses, not signals. */
+
+ r = exit_status_from_string(word);
+ if (r >= 0) {
+ assert(r >= 0 && r < 256);
+ bitmap = &status_set->status;
+ } else {
+ r = signal_from_string(word);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to parse value, ignoring: %s", word);
+ continue;
+ }
+ bitmap = &status_set->signal;
+ }
+
+ r = bitmap_set(bitmap, r);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to set signal or status %s, ignoring: %m", word);
+ }
+}
+
+int config_parse_namespace_path_strv(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ const Unit *u = userdata;
+ char*** sv = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ *sv = strv_free(*sv);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL, *resolved = NULL, *joined = NULL;
+ const char *w;
+ bool ignore_enoent = false, shall_prefix = false;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to extract first word, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ w = word;
+ if (startswith(w, "-")) {
+ ignore_enoent = true;
+ w++;
+ }
+ if (startswith(w, "+")) {
+ shall_prefix = true;
+ w++;
+ }
+
+ r = unit_full_printf(u, w, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in %s: %m", w);
+ continue;
+ }
+
+ r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ continue;
+
+ joined = strjoin(ignore_enoent ? "-" : "",
+ shall_prefix ? "+" : "",
+ resolved);
+
+ r = strv_push(sv, joined);
+ if (r < 0)
+ return log_oom();
+
+ joined = NULL;
+ }
+
+ return 0;
+}
+
+int config_parse_temporary_filesystems(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ const Unit *u = userdata;
+ ExecContext *c = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
+ c->temporary_filesystems = NULL;
+ c->n_temporary_filesystems = 0;
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL, *path = NULL, *resolved = NULL;
+ const char *w;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to extract first word, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ w = word;
+ r = extract_first_word(&w, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to extract first word, ignoring: %s", word);
+ continue;
+ }
+ if (r == 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid syntax, ignoring: %s", word);
+ continue;
+ }
+
+ r = unit_full_printf(u, path, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", path);
+ continue;
+ }
+
+ r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ continue;
+
+ r = temporary_filesystem_add(&c->temporary_filesystems, &c->n_temporary_filesystems, resolved, w);
+ if (r < 0)
+ return log_oom();
+ }
+}
+
+int config_parse_bind_paths(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ const Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
+ c->bind_mounts = NULL;
+ c->n_bind_mounts = 0;
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *source = NULL, *destination = NULL;
+ _cleanup_free_ char *sresolved = NULL, *dresolved = NULL;
+ char *s = NULL, *d = NULL;
+ bool rbind = true, ignore_enoent = false;
+
+ r = extract_first_word(&p, &source, ":" WHITESPACE, EXTRACT_UNQUOTE|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse %s, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ r = unit_full_printf(u, source, &sresolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in \"%s\", ignoring: %m", source);
+ continue;
+ }
+
+ s = sresolved;
+ if (s[0] == '-') {
+ ignore_enoent = true;
+ s++;
+ }
+
+ r = path_simplify_and_warn(s, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ continue;
+
+ /* Optionally, the destination is specified. */
+ if (p && p[-1] == ':') {
+ r = extract_first_word(&p, &destination, ":" WHITESPACE, EXTRACT_UNQUOTE|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse %s, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+ if (r == 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Missing argument after ':', ignoring: %s", s);
+ continue;
+ }
+
+ r = unit_full_printf(u, destination, &dresolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve specifiers in \"%s\", ignoring: %m", destination);
+ continue;
+ }
+
+ r = path_simplify_and_warn(dresolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ continue;
+
+ d = dresolved;
+
+ /* Optionally, there's also a short option string specified */
+ if (p && p[-1] == ':') {
+ _cleanup_free_ char *options = NULL;
+
+ r = extract_first_word(&p, &options, NULL, EXTRACT_UNQUOTE);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse %s: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ if (isempty(options) || streq(options, "rbind"))
+ rbind = true;
+ else if (streq(options, "norbind"))
+ rbind = false;
+ else {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid option string, ignoring setting: %s", options);
+ continue;
+ }
+ }
+ } else
+ d = s;
+
+ r = bind_mount_add(&c->bind_mounts, &c->n_bind_mounts,
+ &(BindMount) {
+ .source = s,
+ .destination = d,
+ .read_only = !!strstr(lvalue, "ReadOnly"),
+ .recursive = rbind,
+ .ignore_enoent = ignore_enoent,
+ });
+ if (r < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+int config_parse_mount_images(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ const Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ c->mount_images = mount_image_free_many(c->mount_images, &c->n_mount_images);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_(mount_options_free_allp) MountOptions *options = NULL;
+ _cleanup_free_ char *first = NULL, *second = NULL, *tuple = NULL;
+ _cleanup_free_ char *sresolved = NULL, *dresolved = NULL;
+ const char *q = NULL;
+ char *s = NULL;
+ bool permissive = false;
+
+ r = extract_first_word(&p, &tuple, NULL, EXTRACT_UNQUOTE|EXTRACT_RETAIN_ESCAPE);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax %s=%s, ignoring: %m", lvalue, rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ q = tuple;
+ r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &first, &second, NULL);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax in %s=, ignoring: %s", lvalue, tuple);
+ return 0;
+ }
+ if (r == 0)
+ continue;
+
+ r = unit_full_printf(u, first, &sresolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve unit specifiers in \"%s\", ignoring: %m", first);
+ continue;
+ }
+
+ s = sresolved;
+ if (s[0] == '-') {
+ permissive = true;
+ s++;
+ }
+
+ r = path_simplify_and_warn(s, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ continue;
+
+ if (isempty(second)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Missing destination in %s, ignoring: %s", lvalue, rvalue);
+ continue;
+ }
+
+ r = unit_full_printf(u, second, &dresolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to resolve specifiers in \"%s\", ignoring: %m", second);
+ continue;
+ }
+
+ r = path_simplify_and_warn(dresolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ continue;
+
+ for (;;) {
+ _cleanup_free_ char *partition = NULL, *mount_options = NULL, *mount_options_resolved = NULL;
+ MountOptions *o = NULL;
+ PartitionDesignator partition_designator;
+
+ r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &partition, &mount_options, NULL);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", q);
+ return 0;
+ }
+ if (r == 0)
+ break;
+ /* Single set of options, applying to the root partition/single filesystem */
+ if (r == 1) {
+ r = unit_full_printf(u, partition, &mount_options_resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", first);
+ continue;
+ }
+
+ o = new(MountOptions, 1);
+ if (!o)
+ return log_oom();
+ *o = (MountOptions) {
+ .partition_designator = PARTITION_ROOT,
+ .options = TAKE_PTR(mount_options_resolved),
+ };
+ LIST_APPEND(mount_options, options, o);
+
+ break;
+ }
+
+ partition_designator = partition_designator_from_string(partition);
+ if (partition_designator < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid partition name %s, ignoring", partition);
+ continue;
+ }
+ r = unit_full_printf(u, mount_options, &mount_options_resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", mount_options);
+ continue;
+ }
+
+ o = new(MountOptions, 1);
+ if (!o)
+ return log_oom();
+ *o = (MountOptions) {
+ .partition_designator = partition_designator,
+ .options = TAKE_PTR(mount_options_resolved),
+ };
+ LIST_APPEND(mount_options, options, o);
+ }
+
+ r = mount_image_add(&c->mount_images, &c->n_mount_images,
+ &(MountImage) {
+ .source = s,
+ .destination = dresolved,
+ .mount_options = options,
+ .ignore_enoent = permissive,
+ });
+ if (r < 0)
+ return log_oom();
+ }
+}
+
+int config_parse_job_timeout_sec(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Unit *u = data;
+ usec_t usec;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ r = parse_sec_fix_0(rvalue, &usec);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse JobTimeoutSec= parameter, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ /* If the user explicitly changed JobTimeoutSec= also change JobRunningTimeoutSec=, for compatibility with old
+ * versions. If JobRunningTimeoutSec= was explicitly set, avoid this however as whatever the user picked should
+ * count. */
+
+ if (!u->job_running_timeout_set)
+ u->job_running_timeout = usec;
+
+ u->job_timeout = usec;
+
+ return 0;
+}
+
+int config_parse_job_running_timeout_sec(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Unit *u = data;
+ usec_t usec;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ r = parse_sec_fix_0(rvalue, &usec);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse JobRunningTimeoutSec= parameter, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ u->job_running_timeout = usec;
+ u->job_running_timeout_set = true;
+
+ return 0;
+}
+
+int config_parse_emergency_action(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Manager *m = NULL;
+ EmergencyAction *x = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (unit)
+ m = ((Unit*) userdata)->manager;
+ else
+ m = data;
+
+ r = parse_emergency_action(rvalue, MANAGER_IS_SYSTEM(m), x);
+ if (r < 0) {
+ if (r == -EOPNOTSUPP && MANAGER_IS_USER(m)) {
+ /* Compat mode: remove for systemd 241. */
+
+ log_syntax(unit, LOG_INFO, filename, line, r,
+ "%s= in user mode specified as \"%s\", using \"exit-force\" instead.",
+ lvalue, rvalue);
+ *x = EMERGENCY_ACTION_EXIT_FORCE;
+ return 0;
+ }
+
+ if (r == -EOPNOTSUPP)
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "%s= specified as %s mode action, ignoring: %s",
+ lvalue, MANAGER_IS_SYSTEM(m) ? "user" : "system", rvalue);
+ else
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse %s=, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_pid_file(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *k = NULL, *n = NULL;
+ const Unit *u = userdata;
+ char **s = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue)) {
+ /* An empty assignment removes already set value. */
+ *s = mfree(*s);
+ return 0;
+ }
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ /* If this is a relative path make it absolute by prefixing the /run */
+ n = path_make_absolute(k, u->manager->prefix[EXEC_DIRECTORY_RUNTIME]);
+ if (!n)
+ return log_oom();
+
+ /* Check that the result is a sensible path */
+ r = path_simplify_and_warn(n, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ return r;
+
+ r = patch_var_run(unit, filename, line, lvalue, &n);
+ if (r < 0)
+ return r;
+
+ free_and_replace(*s, n);
+ return 0;
+}
+
+int config_parse_exit_status(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int *exit_status = data, r;
+ uint8_t u;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(exit_status);
+
+ if (isempty(rvalue)) {
+ *exit_status = -1;
+ return 0;
+ }
+
+ r = safe_atou8(rvalue, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse exit status '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ *exit_status = u;
+ return 0;
+}
+
+int config_parse_disable_controllers(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int r;
+ CGroupContext *c = data;
+ CGroupMask disabled_mask;
+
+ /* 1. If empty, make all controllers eligible for use again.
+ * 2. If non-empty, merge all listed controllers, space separated. */
+
+ if (isempty(rvalue)) {
+ c->disable_controllers = 0;
+ return 0;
+ }
+
+ r = cg_mask_from_string(rvalue, &disabled_mask);
+ if (r < 0 || disabled_mask <= 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid cgroup string: %s, ignoring", rvalue);
+ return 0;
+ }
+
+ c->disable_controllers |= disabled_mask;
+
+ return 0;
+}
+
+int config_parse_ip_filter_bpf_progs(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *resolved = NULL;
+ const Unit *u = userdata;
+ char ***paths = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(paths);
+
+ if (isempty(rvalue)) {
+ *paths = strv_free(*paths);
+ return 0;
+ }
+
+ r = unit_full_printf(u, rvalue, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+ if (r < 0)
+ return 0;
+
+ if (strv_contains(*paths, resolved))
+ return 0;
+
+ r = strv_extend(paths, resolved);
+ if (r < 0)
+ return log_oom();
+
+ r = bpf_firewall_supported();
+ if (r < 0)
+ return r;
+ if (r != BPF_FIREWALL_SUPPORTED_WITH_MULTI) {
+ static bool warned = false;
+
+ log_full(warned ? LOG_DEBUG : LOG_WARNING,
+ "File %s:%u configures an IP firewall with BPF programs (%s=%s), but the local system does not support BPF/cgroup based firewalling with multiple filters.\n"
+ "Starting this unit will fail! (This warning is only shown for the first loaded unit using IP firewalling.)", filename, line, lvalue, rvalue);
+
+ warned = true;
+ }
+
+ return 0;
+}
+
+static int merge_by_names(Unit **u, Set *names, const char *id) {
+ char *k;
+ int r;
+
+ assert(u);
+ assert(*u);
+
+ /* Let's try to add in all names that are aliases of this unit */
+ while ((k = set_steal_first(names))) {
+ _cleanup_free_ _unused_ char *free_k = k;
+
+ /* First try to merge in the other name into our unit */
+ r = unit_merge_by_name(*u, k);
+ if (r < 0) {
+ Unit *other;
+
+ /* Hmm, we couldn't merge the other unit into ours? Then let's try it the other way
+ * round. */
+
+ other = manager_get_unit((*u)->manager, k);
+ if (!other)
+ return r; /* return previous failure */
+
+ r = unit_merge(other, *u);
+ if (r < 0)
+ return r;
+
+ *u = other;
+ return merge_by_names(u, names, NULL);
+ }
+
+ if (streq_ptr(id, k))
+ unit_choose_id(*u, id);
+ }
+
+ return 0;
+}
+
+int unit_load_fragment(Unit *u) {
+ const char *fragment;
+ _cleanup_set_free_free_ Set *names = NULL;
+ int r;
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+ assert(u->id);
+
+ if (u->transient) {
+ u->load_state = UNIT_LOADED;
+ return 0;
+ }
+
+ /* Possibly rebuild the fragment map to catch new units */
+ r = unit_file_build_name_map(&u->manager->lookup_paths,
+ &u->manager->unit_cache_timestamp_hash,
+ &u->manager->unit_id_map,
+ &u->manager->unit_name_map,
+ &u->manager->unit_path_cache);
+ if (r < 0)
+ return log_error_errno(r, "Failed to rebuild name map: %m");
+
+ r = unit_file_find_fragment(u->manager->unit_id_map,
+ u->manager->unit_name_map,
+ u->id,
+ &fragment,
+ &names);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ if (fragment) {
+ /* Open the file, check if this is a mask, otherwise read. */
+ _cleanup_fclose_ FILE *f = NULL;
+ struct stat st;
+
+ /* Try to open the file name. A symlink is OK, for example for linked files or masks. We
+ * expect that all symlinks within the lookup paths have been already resolved, but we don't
+ * verify this here. */
+ f = fopen(fragment, "re");
+ if (!f)
+ return log_unit_notice_errno(u, errno, "Failed to open %s: %m", fragment);
+
+ if (fstat(fileno(f), &st) < 0)
+ return -errno;
+
+ r = free_and_strdup(&u->fragment_path, fragment);
+ if (r < 0)
+ return r;
+
+ if (null_or_empty(&st)) {
+ /* Unit file is masked */
+
+ u->load_state = u->perpetual ? UNIT_LOADED : UNIT_MASKED; /* don't allow perpetual units to ever be masked */
+ u->fragment_mtime = 0;
+ } else {
+ u->load_state = UNIT_LOADED;
+ u->fragment_mtime = timespec_load(&st.st_mtim);
+
+ /* Now, parse the file contents */
+ r = config_parse(u->id, fragment, f,
+ UNIT_VTABLE(u)->sections,
+ config_item_perf_lookup, load_fragment_gperf_lookup,
+ 0,
+ u,
+ NULL);
+ if (r == -ENOEXEC)
+ log_unit_notice_errno(u, r, "Unit configuration has fatal error, unit will not be started.");
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* We do the merge dance here because for some unit types, the unit might have aliases which are not
+ * declared in the file system. In particular, this is true (and frequent) for device and swap units.
+ */
+ Unit *merged;
+ const char *id = u->id;
+ _cleanup_free_ char *free_id = NULL;
+
+ if (fragment) {
+ id = basename(fragment);
+ if (unit_name_is_valid(id, UNIT_NAME_TEMPLATE)) {
+ assert(u->instance); /* If we're not trying to use a template for non-instanced unit,
+ * this must be set. */
+
+ r = unit_name_replace_instance(id, u->instance, &free_id);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to build id (%s + %s): %m", id, u->instance);
+ id = free_id;
+ }
+ }
+
+ merged = u;
+ r = merge_by_names(&merged, names, id);
+ if (r < 0)
+ return r;
+
+ if (merged != u)
+ u->load_state = UNIT_MERGED;
+
+ return 0;
+}
+
+void unit_dump_config_items(FILE *f) {
+ static const struct {
+ const ConfigParserCallback callback;
+ const char *rvalue;
+ } table[] = {
+ { config_parse_warn_compat, "NOTSUPPORTED" },
+ { config_parse_int, "INTEGER" },
+ { config_parse_unsigned, "UNSIGNED" },
+ { config_parse_iec_size, "SIZE" },
+ { config_parse_iec_uint64, "SIZE" },
+ { config_parse_si_uint64, "SIZE" },
+ { config_parse_bool, "BOOLEAN" },
+ { config_parse_string, "STRING" },
+ { config_parse_path, "PATH" },
+ { config_parse_unit_path_printf, "PATH" },
+ { config_parse_strv, "STRING [...]" },
+ { config_parse_exec_nice, "NICE" },
+ { config_parse_exec_oom_score_adjust, "OOMSCOREADJUST" },
+ { config_parse_exec_io_class, "IOCLASS" },
+ { config_parse_exec_io_priority, "IOPRIORITY" },
+ { config_parse_exec_cpu_sched_policy, "CPUSCHEDPOLICY" },
+ { config_parse_exec_cpu_sched_prio, "CPUSCHEDPRIO" },
+ { config_parse_exec_cpu_affinity, "CPUAFFINITY" },
+ { config_parse_mode, "MODE" },
+ { config_parse_unit_env_file, "FILE" },
+ { config_parse_exec_output, "OUTPUT" },
+ { config_parse_exec_input, "INPUT" },
+ { config_parse_log_facility, "FACILITY" },
+ { config_parse_log_level, "LEVEL" },
+ { config_parse_exec_secure_bits, "SECUREBITS" },
+ { config_parse_capability_set, "BOUNDINGSET" },
+ { config_parse_rlimit, "LIMIT" },
+ { config_parse_unit_deps, "UNIT [...]" },
+ { config_parse_exec, "PATH [ARGUMENT [...]]" },
+ { config_parse_service_type, "SERVICETYPE" },
+ { config_parse_service_restart, "SERVICERESTART" },
+ { config_parse_service_timeout_failure_mode, "TIMEOUTMODE" },
+ { config_parse_kill_mode, "KILLMODE" },
+ { config_parse_signal, "SIGNAL" },
+ { config_parse_socket_listen, "SOCKET [...]" },
+ { config_parse_socket_bind, "SOCKETBIND" },
+ { config_parse_socket_bindtodevice, "NETWORKINTERFACE" },
+ { config_parse_sec, "SECONDS" },
+ { config_parse_nsec, "NANOSECONDS" },
+ { config_parse_namespace_path_strv, "PATH [...]" },
+ { config_parse_bind_paths, "PATH[:PATH[:OPTIONS]] [...]" },
+ { config_parse_unit_requires_mounts_for, "PATH [...]" },
+ { config_parse_exec_mount_flags, "MOUNTFLAG [...]" },
+ { config_parse_unit_string_printf, "STRING" },
+ { config_parse_trigger_unit, "UNIT" },
+ { config_parse_timer, "TIMER" },
+ { config_parse_path_spec, "PATH" },
+ { config_parse_notify_access, "ACCESS" },
+ { config_parse_ip_tos, "TOS" },
+ { config_parse_unit_condition_path, "CONDITION" },
+ { config_parse_unit_condition_string, "CONDITION" },
+ { config_parse_unit_slice, "SLICE" },
+ { config_parse_documentation, "URL" },
+ { config_parse_service_timeout, "SECONDS" },
+ { config_parse_emergency_action, "ACTION" },
+ { config_parse_set_status, "STATUS" },
+ { config_parse_service_sockets, "SOCKETS" },
+ { config_parse_environ, "ENVIRON" },
+#if HAVE_SECCOMP
+ { config_parse_syscall_filter, "SYSCALLS" },
+ { config_parse_syscall_archs, "ARCHS" },
+ { config_parse_syscall_errno, "ERRNO" },
+ { config_parse_syscall_log, "SYSCALLS" },
+ { config_parse_address_families, "FAMILIES" },
+ { config_parse_restrict_namespaces, "NAMESPACES" },
+#endif
+ { config_parse_cpu_shares, "SHARES" },
+ { config_parse_cg_weight, "WEIGHT" },
+ { config_parse_memory_limit, "LIMIT" },
+ { config_parse_device_allow, "DEVICE" },
+ { config_parse_device_policy, "POLICY" },
+ { config_parse_io_limit, "LIMIT" },
+ { config_parse_io_device_weight, "DEVICEWEIGHT" },
+ { config_parse_io_device_latency, "DEVICELATENCY" },
+ { config_parse_blockio_bandwidth, "BANDWIDTH" },
+ { config_parse_blockio_weight, "WEIGHT" },
+ { config_parse_blockio_device_weight, "DEVICEWEIGHT" },
+ { config_parse_long, "LONG" },
+ { config_parse_socket_service, "SERVICE" },
+#if HAVE_SELINUX
+ { config_parse_exec_selinux_context, "LABEL" },
+#endif
+ { config_parse_job_mode, "MODE" },
+ { config_parse_job_mode_isolate, "BOOLEAN" },
+ { config_parse_personality, "PERSONALITY" },
+ };
+
+ const char *prev = NULL;
+ const char *i;
+
+ assert(f);
+
+ NULSTR_FOREACH(i, load_fragment_gperf_nulstr) {
+ const char *rvalue = "OTHER", *lvalue;
+ const ConfigPerfItem *p;
+ const char *dot;
+
+ assert_se(p = load_fragment_gperf_lookup(i, strlen(i)));
+
+ /* Hide legacy settings */
+ if (p->parse == config_parse_warn_compat &&
+ p->ltype == DISABLED_LEGACY)
+ continue;
+
+ for (size_t j = 0; j < ELEMENTSOF(table); j++)
+ if (p->parse == table[j].callback) {
+ rvalue = table[j].rvalue;
+ break;
+ }
+
+ dot = strchr(i, '.');
+ lvalue = dot ? dot + 1 : i;
+
+ if (dot) {
+ size_t prefix_len = dot - i;
+
+ if (!prev || !strneq(prev, i, prefix_len+1)) {
+ if (prev)
+ fputc('\n', f);
+
+ fprintf(f, "[%.*s]\n", (int) prefix_len, i);
+ }
+ }
+
+ fprintf(f, "%s=%s\n", lvalue, rvalue);
+ prev = i;
+ }
+}
+
+int config_parse_cpu_affinity2(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ CPUSet *affinity = data;
+
+ assert(affinity);
+
+ (void) parse_cpu_set_extend(rvalue, affinity, true, unit, filename, line, lvalue);
+
+ return 0;
+}
+
+int config_parse_show_status(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int k;
+ ShowStatus *b = data;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ k = parse_show_status(rvalue, b);
+ if (k < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, k, "Failed to parse show status setting, ignoring: %s", rvalue);
+
+ return 0;
+}
+
+int config_parse_output_restricted(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecOutput t, *eo = data;
+ bool obsolete = false;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(rvalue, "syslog")) {
+ t = EXEC_OUTPUT_JOURNAL;
+ obsolete = true;
+ } else if (streq(rvalue, "syslog+console")) {
+ t = EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
+ obsolete = true;
+ } else {
+ t = exec_output_from_string(rvalue);
+ if (t < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse output type, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (IN_SET(t, EXEC_OUTPUT_SOCKET, EXEC_OUTPUT_NAMED_FD, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Standard output types socket, fd:, file:, append: are not supported as defaults, ignoring: %s", rvalue);
+ return 0;
+ }
+ }
+
+ if (obsolete)
+ log_syntax(unit, LOG_NOTICE, filename, line, 0,
+ "Standard output type %s is obsolete, automatically updating to %s. Please update your configuration.",
+ rvalue, exec_output_to_string(t));
+
+ *eo = t;
+ return 0;
+}
+
+int config_parse_crash_chvt(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_crash_chvt(rvalue, data);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse CrashChangeVT= setting, ignoring: %s", rvalue);
+
+ return 0;
+}
+
+int config_parse_swap_priority(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Swap *s = userdata;
+ int r, priority;
+
+ assert(s);
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ s->parameters_fragment.priority = -1;
+ s->parameters_fragment.priority_set = false;
+ return 0;
+ }
+
+ r = safe_atoi(rvalue, &priority);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid swap priority '%s', ignoring.", rvalue);
+ return 0;
+ }
+
+ if (priority < -1) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Sorry, swap priorities smaller than -1 may only be assigned by the kernel itself, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (priority > 32767) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Swap priority out of range, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ s->parameters_fragment.priority = priority;
+ s->parameters_fragment.priority_set = true;
+ return 0;
+}
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
new file mode 100644
index 0000000..6b2175c
--- /dev/null
+++ b/src/core/load-fragment.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "conf-parser.h"
+#include "unit.h"
+
+/* Config-parsing helpers relevant only for sources under src/core/ */
+int parse_crash_chvt(const char *value, int *data);
+int parse_confirm_spawn(const char *value, char **console);
+
+/* Read service data from .desktop file style configuration fragments */
+
+int unit_load_fragment(Unit *u);
+
+void unit_dump_config_items(FILE *f);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_deps);
+CONFIG_PARSER_PROTOTYPE(config_parse_obsolete_unit_deps);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_string_printf);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_strv_printf);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_path_printf);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_path_strv_printf);
+CONFIG_PARSER_PROTOTYPE(config_parse_documentation);
+CONFIG_PARSER_PROTOTYPE(config_parse_socket_listen);
+CONFIG_PARSER_PROTOTYPE(config_parse_socket_protocol);
+CONFIG_PARSER_PROTOTYPE(config_parse_socket_bind);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_nice);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_oom_score_adjust);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_coredump_filter);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec);
+CONFIG_PARSER_PROTOTYPE(config_parse_service_timeout);
+CONFIG_PARSER_PROTOTYPE(config_parse_service_timeout_abort);
+CONFIG_PARSER_PROTOTYPE(config_parse_service_timeout_failure_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_service_type);
+CONFIG_PARSER_PROTOTYPE(config_parse_service_restart);
+CONFIG_PARSER_PROTOTYPE(config_parse_socket_bindtodevice);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_output);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_input);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_input_text);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_input_data);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_io_class);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_io_priority);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_cpu_sched_policy);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_cpu_sched_prio);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_cpu_affinity);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_mount_apivfs);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_secure_bits);
+CONFIG_PARSER_PROTOTYPE(config_parse_root_image_options);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_root_hash);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_root_hash_sig);
+CONFIG_PARSER_PROTOTYPE(config_parse_capability_set);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_mount_flags);
+CONFIG_PARSER_PROTOTYPE(config_parse_timer);
+CONFIG_PARSER_PROTOTYPE(config_parse_trigger_unit);
+CONFIG_PARSER_PROTOTYPE(config_parse_path_spec);
+CONFIG_PARSER_PROTOTYPE(config_parse_socket_service);
+CONFIG_PARSER_PROTOTYPE(config_parse_service_sockets);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_env_file);
+CONFIG_PARSER_PROTOTYPE(config_parse_ip_tos);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_condition_path);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_condition_string);
+CONFIG_PARSER_PROTOTYPE(config_parse_kill_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_notify_access);
+CONFIG_PARSER_PROTOTYPE(config_parse_emergency_action);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_requires_mounts_for);
+CONFIG_PARSER_PROTOTYPE(config_parse_syscall_filter);
+CONFIG_PARSER_PROTOTYPE(config_parse_syscall_archs);
+CONFIG_PARSER_PROTOTYPE(config_parse_syscall_errno);
+CONFIG_PARSER_PROTOTYPE(config_parse_syscall_log);
+CONFIG_PARSER_PROTOTYPE(config_parse_environ);
+CONFIG_PARSER_PROTOTYPE(config_parse_pass_environ);
+CONFIG_PARSER_PROTOTYPE(config_parse_unset_environ);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_slice);
+CONFIG_PARSER_PROTOTYPE(config_parse_cg_weight);
+CONFIG_PARSER_PROTOTYPE(config_parse_cpu_shares);
+CONFIG_PARSER_PROTOTYPE(config_parse_memory_limit);
+CONFIG_PARSER_PROTOTYPE(config_parse_tasks_max);
+CONFIG_PARSER_PROTOTYPE(config_parse_delegate);
+CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_mem_pressure_limit);
+CONFIG_PARSER_PROTOTYPE(config_parse_device_policy);
+CONFIG_PARSER_PROTOTYPE(config_parse_device_allow);
+CONFIG_PARSER_PROTOTYPE(config_parse_io_device_latency);
+CONFIG_PARSER_PROTOTYPE(config_parse_io_device_weight);
+CONFIG_PARSER_PROTOTYPE(config_parse_io_limit);
+CONFIG_PARSER_PROTOTYPE(config_parse_blockio_weight);
+CONFIG_PARSER_PROTOTYPE(config_parse_blockio_device_weight);
+CONFIG_PARSER_PROTOTYPE(config_parse_blockio_bandwidth);
+CONFIG_PARSER_PROTOTYPE(config_parse_job_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_job_mode_isolate);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_selinux_context);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_apparmor_profile);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_smack_process_label);
+CONFIG_PARSER_PROTOTYPE(config_parse_address_families);
+CONFIG_PARSER_PROTOTYPE(config_parse_runtime_preserve_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_directories);
+CONFIG_PARSER_PROTOTYPE(config_parse_set_credential);
+CONFIG_PARSER_PROTOTYPE(config_parse_load_credential);
+CONFIG_PARSER_PROTOTYPE(config_parse_set_status);
+CONFIG_PARSER_PROTOTYPE(config_parse_namespace_path_strv);
+CONFIG_PARSER_PROTOTYPE(config_parse_temporary_filesystems);
+CONFIG_PARSER_PROTOTYPE(config_parse_cpu_quota);
+CONFIG_PARSER_PROTOTYPE(config_parse_allowed_cpus);
+CONFIG_PARSER_PROTOTYPE(config_parse_allowed_mems);
+CONFIG_PARSER_PROTOTYPE(config_parse_protect_home);
+CONFIG_PARSER_PROTOTYPE(config_parse_protect_system);
+CONFIG_PARSER_PROTOTYPE(config_parse_bus_name);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_utmp_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_working_directory);
+CONFIG_PARSER_PROTOTYPE(config_parse_fdname);
+CONFIG_PARSER_PROTOTYPE(config_parse_sec_fix_0);
+CONFIG_PARSER_PROTOTYPE(config_parse_user_group_compat);
+CONFIG_PARSER_PROTOTYPE(config_parse_user_group_strv_compat);
+CONFIG_PARSER_PROTOTYPE(config_parse_restrict_namespaces);
+CONFIG_PARSER_PROTOTYPE(config_parse_bind_paths);
+CONFIG_PARSER_PROTOTYPE(config_parse_exec_keyring_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_protect_proc);
+CONFIG_PARSER_PROTOTYPE(config_parse_proc_subset);
+CONFIG_PARSER_PROTOTYPE(config_parse_job_timeout_sec);
+CONFIG_PARSER_PROTOTYPE(config_parse_job_running_timeout_sec);
+CONFIG_PARSER_PROTOTYPE(config_parse_log_extra_fields);
+CONFIG_PARSER_PROTOTYPE(config_parse_log_namespace);
+CONFIG_PARSER_PROTOTYPE(config_parse_collect_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_pid_file);
+CONFIG_PARSER_PROTOTYPE(config_parse_exit_status);
+CONFIG_PARSER_PROTOTYPE(config_parse_disable_controllers);
+CONFIG_PARSER_PROTOTYPE(config_parse_oom_policy);
+CONFIG_PARSER_PROTOTYPE(config_parse_numa_policy);
+CONFIG_PARSER_PROTOTYPE(config_parse_numa_mask);
+CONFIG_PARSER_PROTOTYPE(config_parse_ip_filter_bpf_progs);
+CONFIG_PARSER_PROTOTYPE(config_parse_cpu_affinity2);
+CONFIG_PARSER_PROTOTYPE(config_parse_show_status);
+CONFIG_PARSER_PROTOTYPE(config_parse_status_unit_format);
+CONFIG_PARSER_PROTOTYPE(config_parse_output_restricted);
+CONFIG_PARSER_PROTOTYPE(config_parse_crash_chvt);
+CONFIG_PARSER_PROTOTYPE(config_parse_timeout_abort);
+CONFIG_PARSER_PROTOTYPE(config_parse_swap_priority);
+CONFIG_PARSER_PROTOTYPE(config_parse_mount_images);
+CONFIG_PARSER_PROTOTYPE(config_parse_socket_timestamping);
+
+/* gperf prototypes */
+const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+extern const char load_fragment_gperf_nulstr[];
diff --git a/src/core/locale-setup.c b/src/core/locale-setup.c
new file mode 100644
index 0000000..64761dd
--- /dev/null
+++ b/src/core/locale-setup.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdlib.h>
+
+#include "env-file.h"
+#include "env-util.h"
+#include "locale-setup.h"
+#include "locale-util.h"
+#include "proc-cmdline.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+#include "virt.h"
+
+int locale_setup(char ***environment) {
+ _cleanup_(locale_variables_freep) char *variables[_VARIABLE_LC_MAX] = {};
+ _cleanup_strv_free_ char **add = NULL;
+ LocaleVariable i;
+ int r;
+
+ r = proc_cmdline_get_key_many(PROC_CMDLINE_STRIP_RD_PREFIX,
+ "locale.LANG", &variables[VARIABLE_LANG],
+ "locale.LANGUAGE", &variables[VARIABLE_LANGUAGE],
+ "locale.LC_CTYPE", &variables[VARIABLE_LC_CTYPE],
+ "locale.LC_NUMERIC", &variables[VARIABLE_LC_NUMERIC],
+ "locale.LC_TIME", &variables[VARIABLE_LC_TIME],
+ "locale.LC_COLLATE", &variables[VARIABLE_LC_COLLATE],
+ "locale.LC_MONETARY", &variables[VARIABLE_LC_MONETARY],
+ "locale.LC_MESSAGES", &variables[VARIABLE_LC_MESSAGES],
+ "locale.LC_PAPER", &variables[VARIABLE_LC_PAPER],
+ "locale.LC_NAME", &variables[VARIABLE_LC_NAME],
+ "locale.LC_ADDRESS", &variables[VARIABLE_LC_ADDRESS],
+ "locale.LC_TELEPHONE", &variables[VARIABLE_LC_TELEPHONE],
+ "locale.LC_MEASUREMENT", &variables[VARIABLE_LC_MEASUREMENT],
+ "locale.LC_IDENTIFICATION", &variables[VARIABLE_LC_IDENTIFICATION]);
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Failed to read /proc/cmdline: %m");
+
+ /* Hmm, nothing set on the kernel cmd line? Then let's try /etc/locale.conf */
+ if (r <= 0) {
+ r = parse_env_file(NULL, "/etc/locale.conf",
+ "LANG", &variables[VARIABLE_LANG],
+ "LANGUAGE", &variables[VARIABLE_LANGUAGE],
+ "LC_CTYPE", &variables[VARIABLE_LC_CTYPE],
+ "LC_NUMERIC", &variables[VARIABLE_LC_NUMERIC],
+ "LC_TIME", &variables[VARIABLE_LC_TIME],
+ "LC_COLLATE", &variables[VARIABLE_LC_COLLATE],
+ "LC_MONETARY", &variables[VARIABLE_LC_MONETARY],
+ "LC_MESSAGES", &variables[VARIABLE_LC_MESSAGES],
+ "LC_PAPER", &variables[VARIABLE_LC_PAPER],
+ "LC_NAME", &variables[VARIABLE_LC_NAME],
+ "LC_ADDRESS", &variables[VARIABLE_LC_ADDRESS],
+ "LC_TELEPHONE", &variables[VARIABLE_LC_TELEPHONE],
+ "LC_MEASUREMENT", &variables[VARIABLE_LC_MEASUREMENT],
+ "LC_IDENTIFICATION", &variables[VARIABLE_LC_IDENTIFICATION]);
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Failed to read /etc/locale.conf: %m");
+ }
+
+ for (i = 0; i < _VARIABLE_LC_MAX; i++) {
+ char *s;
+
+ if (!variables[i])
+ continue;
+
+ s = strjoin(locale_variable_to_string(i), "=", variables[i]);
+ if (!s)
+ return -ENOMEM;
+
+ if (strv_consume(&add, s) < 0)
+ return -ENOMEM;
+ }
+
+ if (strv_isempty(add)) {
+ /* If no locale is configured then default to compile-time default. */
+
+ add = strv_new("LANG=" SYSTEMD_DEFAULT_LOCALE);
+ if (!add)
+ return -ENOMEM;
+ }
+
+ if (strv_isempty(*environment))
+ strv_free_and_replace(*environment, add);
+ else {
+ char **merged;
+
+ merged = strv_env_merge(2, *environment, add);
+ if (!merged)
+ return -ENOMEM;
+
+ strv_free_and_replace(*environment, merged);
+ }
+
+ return 0;
+}
diff --git a/src/core/locale-setup.h b/src/core/locale-setup.h
new file mode 100644
index 0000000..d554ad3
--- /dev/null
+++ b/src/core/locale-setup.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int locale_setup(char ***environment);
diff --git a/src/core/loopback-setup.c b/src/core/loopback-setup.c
new file mode 100644
index 0000000..76022ce
--- /dev/null
+++ b/src/core/loopback-setup.c
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+#include <stdlib.h>
+
+#include "sd-netlink.h"
+
+#include "loopback-setup.h"
+#include "missing_network.h"
+#include "netlink-util.h"
+#include "time-util.h"
+
+#define LOOPBACK_SETUP_TIMEOUT_USEC (5 * USEC_PER_SEC)
+
+struct state {
+ unsigned n_messages;
+ int rcode;
+ const char *error_message;
+ const char *success_message;
+};
+
+static int generic_handler(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) {
+ struct state *s = userdata;
+ int r;
+
+ assert(s);
+ assert(s->n_messages > 0);
+ s->n_messages--;
+
+ errno = 0;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ log_debug_errno(r, "%s: %m", s->error_message);
+ else
+ log_debug("%s", s->success_message);
+
+ s->rcode = r;
+ return 0;
+}
+
+static int start_loopback(sd_netlink *rtnl, struct state *s) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(s);
+
+ r = sd_rtnl_message_new_link(rtnl, &req, RTM_SETLINK, LOOPBACK_IFINDEX);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_link_set_flags(req, IFF_UP, IFF_UP);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call_async(rtnl, NULL, req, generic_handler, NULL, s, LOOPBACK_SETUP_TIMEOUT_USEC, "systemd-start-loopback");
+ if (r < 0)
+ return r;
+
+ s->n_messages ++;
+ return 0;
+}
+
+static int add_ipv4_address(sd_netlink *rtnl, struct state *s) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(s);
+
+ r = sd_rtnl_message_new_addr(rtnl, &req, RTM_NEWADDR, LOOPBACK_IFINDEX, AF_INET);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_addr_set_prefixlen(req, 8);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_addr_set_flags(req, IFA_F_PERMANENT);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_addr_set_scope(req, RT_SCOPE_HOST);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_in_addr(req, IFA_LOCAL, &(struct in_addr) { .s_addr = htobe32(INADDR_LOOPBACK) } );
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call_async(rtnl, NULL, req, generic_handler, NULL, s, USEC_INFINITY, "systemd-loopback-ipv4");
+ if (r < 0)
+ return r;
+
+ s->n_messages ++;
+ return 0;
+}
+
+static int add_ipv6_address(sd_netlink *rtnl, struct state *s) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(s);
+
+ r = sd_rtnl_message_new_addr(rtnl, &req, RTM_NEWADDR, LOOPBACK_IFINDEX, AF_INET6);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_addr_set_prefixlen(req, 128);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_addr_set_flags(req, IFA_F_PERMANENT);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_addr_set_scope(req, RT_SCOPE_HOST);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_in6_addr(req, IFA_LOCAL, &in6addr_loopback);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call_async(rtnl, NULL, req, generic_handler, NULL, s, USEC_INFINITY, "systemd-loopback-ipv6");
+ if (r < 0)
+ return r;
+
+ s->n_messages ++;
+ return 0;
+}
+
+static bool check_loopback(sd_netlink *rtnl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ unsigned flags;
+ int r;
+
+ r = sd_rtnl_message_new_link(rtnl, &req, RTM_GETLINK, LOOPBACK_IFINDEX);
+ if (r < 0)
+ return false;
+
+ r = sd_netlink_call(rtnl, req, USEC_INFINITY, &reply);
+ if (r < 0)
+ return false;
+
+ r = sd_rtnl_message_link_get_flags(reply, &flags);
+ if (r < 0)
+ return false;
+
+ return flags & IFF_UP;
+}
+
+int loopback_setup(void) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ struct state state_4 = {
+ .error_message = "Failed to add address 127.0.0.1 to loopback interface",
+ .success_message = "Successfully added address 127.0.0.1 to loopback interface",
+ }, state_6 = {
+ .error_message = "Failed to add address ::1 to loopback interface",
+ .success_message = "Successfully added address ::1 to loopback interface",
+ }, state_up = {
+ .error_message = "Failed to bring loopback interface up",
+ .success_message = "Successfully brought loopback interface up",
+ };
+ int r;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink: %m");
+
+ /* Note that we add the IP addresses here explicitly even though the kernel does that too implicitly when
+ * setting up the loopback device. The reason we do this here a second time (and possibly race against the
+ * kernel) is that we want to synchronously wait until the IP addresses are set up correctly, see
+ *
+ * https://github.com/systemd/systemd/issues/5641 */
+
+ r = add_ipv4_address(rtnl, &state_4);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enqueue IPv4 loopback address add request: %m");
+
+ r = add_ipv6_address(rtnl, &state_6);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enqueue IPv6 loopback address add request: %m");
+
+ r = start_loopback(rtnl, &state_up);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enqueue loopback interface start request: %m");
+
+ while (state_4.n_messages + state_6.n_messages + state_up.n_messages > 0) {
+ r = sd_netlink_wait(rtnl, LOOPBACK_SETUP_TIMEOUT_USEC);
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait for netlink event: %m");
+
+ r = sd_netlink_process(rtnl, NULL);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to process netlink event: %m");
+ }
+
+ /* Note that we don't really care whether the addresses could be added or not */
+ if (state_up.rcode != 0) {
+ /* If we lack the permissions to configure the loopback device,
+ * but we find it to be already configured, let's exit cleanly,
+ * in order to supported unprivileged containers. */
+ if (state_up.rcode == -EPERM && check_loopback(rtnl))
+ return 0;
+
+ return log_warning_errno(state_up.rcode, "Failed to configure loopback device: %m");
+ }
+
+ return 0;
+}
diff --git a/src/core/loopback-setup.h b/src/core/loopback-setup.h
new file mode 100644
index 0000000..a7ee2da
--- /dev/null
+++ b/src/core/loopback-setup.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int loopback_setup(void);
diff --git a/src/core/machine-id-setup.c b/src/core/machine-id-setup.c
new file mode 100644
index 0000000..6d15f9c
--- /dev/null
+++ b/src/core/machine-id-setup.c
@@ -0,0 +1,282 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sched.h>
+#include <sys/mount.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "id128-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "machine-id-setup.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "namespace-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "umask-util.h"
+#include "util.h"
+#include "virt.h"
+
+static int generate_machine_id(const char *root, sd_id128_t *ret) {
+ const char *dbus_machine_id;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(ret);
+
+ /* First, try reading the D-Bus machine id, unless it is a symlink */
+ dbus_machine_id = prefix_roota(root, "/var/lib/dbus/machine-id");
+ fd = open(dbus_machine_id, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd >= 0) {
+ if (id128_read_fd(fd, ID128_PLAIN, ret) >= 0) {
+ log_info("Initializing machine ID from D-Bus machine ID.");
+ return 0;
+ }
+
+ fd = safe_close(fd);
+ }
+
+ if (isempty(root) && running_in_chroot() <= 0) {
+ /* If that didn't work, see if we are running in a container,
+ * and a machine ID was passed in via $container_uuid the way
+ * libvirt/LXC does it */
+
+ if (detect_container() > 0) {
+ _cleanup_free_ char *e = NULL;
+
+ if (getenv_for_pid(1, "container_uuid", &e) > 0 &&
+ sd_id128_from_string(e, ret) >= 0) {
+ log_info("Initializing machine ID from container UUID.");
+ return 0;
+ }
+
+ } else if (detect_vm() == VIRTUALIZATION_KVM) {
+
+ /* If we are not running in a container, see if we are
+ * running in qemu/kvm and a machine ID was passed in
+ * via -uuid on the qemu/kvm command line */
+
+ if (id128_read("/sys/class/dmi/id/product_uuid", ID128_UUID, ret) >= 0) {
+ log_info("Initializing machine ID from KVM UUID.");
+ return 0;
+ }
+ /* on POWER, it's exported here instead */
+ if (id128_read("/sys/firmware/devicetree/base/vm,uuid", ID128_UUID, ret) >= 0) {
+ log_info("Initializing machine ID from KVM UUID.");
+ return 0;
+ }
+ }
+ }
+
+ /* If that didn't work, generate a random machine id */
+ r = sd_id128_randomize(ret);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate randomized machine ID: %m");
+
+ log_info("Initializing machine ID from random generator.");
+ return 0;
+}
+
+int machine_id_setup(const char *root, bool force_transient, sd_id128_t machine_id, sd_id128_t *ret) {
+ const char *etc_machine_id, *run_machine_id;
+ _cleanup_close_ int fd = -1;
+ bool writable;
+ int r;
+
+ etc_machine_id = prefix_roota(root, "/etc/machine-id");
+
+ RUN_WITH_UMASK(0000) {
+ /* We create this 0444, to indicate that this isn't really
+ * something you should ever modify. Of course, since the file
+ * will be owned by root it doesn't matter much, but maybe
+ * people look. */
+
+ (void) mkdir_parents(etc_machine_id, 0755);
+ fd = open(etc_machine_id, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY, 0444);
+ if (fd < 0) {
+ int old_errno = errno;
+
+ fd = open(etc_machine_id, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0) {
+ if (old_errno == EROFS && errno == ENOENT)
+ return log_error_errno(errno,
+ "System cannot boot: Missing /etc/machine-id and /etc is mounted read-only.\n"
+ "Booting up is supported only when:\n"
+ "1) /etc/machine-id exists and is populated.\n"
+ "2) /etc/machine-id exists and is empty.\n"
+ "3) /etc/machine-id is missing and /etc is writable.\n");
+ else
+ return log_error_errno(errno, "Cannot open %s: %m", etc_machine_id);
+ }
+
+ writable = false;
+ } else
+ writable = true;
+ }
+
+ /* A we got a valid machine ID argument, that's what counts */
+ if (sd_id128_is_null(machine_id)) {
+
+ /* Try to read any existing machine ID */
+ if (id128_read_fd(fd, ID128_PLAIN, ret) >= 0)
+ return 0;
+
+ /* Hmm, so, the id currently stored is not useful, then let's generate one */
+ r = generate_machine_id(root, &machine_id);
+ if (r < 0)
+ return r;
+ }
+
+ if (writable) {
+ if (lseek(fd, 0, SEEK_SET) == (off_t) -1)
+ return log_error_errno(errno, "Failed to seek %s: %m", etc_machine_id);
+
+ if (ftruncate(fd, 0) < 0)
+ return log_error_errno(errno, "Failed to truncate %s: %m", etc_machine_id);
+
+ /* If the caller requested a transient machine-id, write the string "uninitialized\n" to
+ * disk and overmount it with a transient file.
+ *
+ * Otherwise write the machine-id directly to disk. */
+ if (force_transient) {
+ r = loop_write(fd, "uninitialized\n", strlen("uninitialized\n"), false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write uninitialized %s: %m", etc_machine_id);
+
+ r = fsync_full(fd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to sync %s: %m", etc_machine_id);
+ } else {
+ r = id128_write_fd(fd, ID128_PLAIN, machine_id, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write %s: %m", etc_machine_id);
+ else
+ goto finish;
+ }
+ }
+
+ fd = safe_close(fd);
+
+ /* Hmm, we couldn't or shouldn't write the machine-id to /etc?
+ * So let's write it to /run/machine-id as a replacement */
+
+ run_machine_id = prefix_roota(root, "/run/machine-id");
+
+ RUN_WITH_UMASK(0022)
+ r = id128_write(run_machine_id, ID128_PLAIN, machine_id, false);
+ if (r < 0) {
+ (void) unlink(run_machine_id);
+ return log_error_errno(r, "Cannot write %s: %m", run_machine_id);
+ }
+
+ /* And now, let's mount it over */
+ r = mount_follow_verbose(LOG_ERR, run_machine_id, etc_machine_id, NULL, MS_BIND, NULL);
+ if (r < 0) {
+ (void) unlink(run_machine_id);
+ return r;
+ }
+
+ log_full(force_transient ? LOG_DEBUG : LOG_INFO, "Installed transient %s file.", etc_machine_id);
+
+ /* Mark the mount read-only */
+ r = mount_follow_verbose(LOG_WARNING, NULL, etc_machine_id, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, NULL);
+ if (r < 0)
+ return r;
+
+finish:
+ if (ret)
+ *ret = machine_id;
+
+ return 0;
+}
+
+int machine_id_commit(const char *root) {
+ _cleanup_close_ int fd = -1, initial_mntns_fd = -1;
+ const char *etc_machine_id, *sync_path;
+ sd_id128_t id;
+ int r;
+
+ /* Before doing anything, sync everything to ensure any changes by first-boot units are persisted.
+ *
+ * First, explicitly sync the file systems we care about and check if it worked. */
+ FOREACH_STRING(sync_path, "/etc/", "/var/") {
+ r = syncfs_path(AT_FDCWD, sync_path);
+ if (r < 0)
+ return log_error_errno(r, "Cannot sync %s: %m", sync_path);
+ }
+
+ /* Afterwards, sync() the rest too, but we can't check the return value for these. */
+ sync();
+
+ /* Replaces a tmpfs bind mount of /etc/machine-id by a proper file, atomically. For this, the umount is removed
+ * in a mount namespace, a new file is created at the right place. Afterwards the mount is also removed in the
+ * original mount namespace, thus revealing the file that was just created. */
+
+ etc_machine_id = prefix_roota(root, "/etc/machine-id");
+
+ r = path_is_mount_point(etc_machine_id, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", etc_machine_id);
+ if (r == 0) {
+ log_debug("%s is not a mount point. Nothing to do.", etc_machine_id);
+ return 0;
+ }
+
+ /* Read existing machine-id */
+ fd = open(etc_machine_id, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return log_error_errno(errno, "Cannot open %s: %m", etc_machine_id);
+
+ r = fd_is_temporary_fs(fd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether %s is on a temporary file system: %m", etc_machine_id);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EROFS),
+ "%s is not on a temporary file system.",
+ etc_machine_id);
+
+ r = id128_read_fd(fd, ID128_PLAIN, &id);
+ if (r < 0)
+ return log_error_errno(r, "We didn't find a valid machine ID in %s: %m", etc_machine_id);
+
+ fd = safe_close(fd);
+
+ /* Store current mount namespace */
+ r = namespace_open(0, NULL, &initial_mntns_fd, NULL, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Can't fetch current mount namespace: %m");
+
+ /* Switch to a new mount namespace, isolate ourself and unmount etc_machine_id in our new namespace */
+ r = detach_mount_namespace();
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up new mount namespace: %m");
+
+ r = umount_verbose(LOG_ERR, etc_machine_id, 0);
+ if (r < 0)
+ return r;
+
+ /* Update a persistent version of etc_machine_id */
+ r = id128_write(etc_machine_id, ID128_PLAIN, id, true);
+ if (r < 0)
+ return log_error_errno(r, "Cannot write %s. This is mandatory to get a persistent machine ID: %m", etc_machine_id);
+
+ /* Return to initial namespace and proceed a lazy tmpfs unmount */
+ r = namespace_enter(-1, initial_mntns_fd, -1, -1, -1);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to switch back to initial mount namespace: %m.\nWe'll keep transient %s file until next reboot.", etc_machine_id);
+
+ if (umount2(etc_machine_id, MNT_DETACH) < 0)
+ return log_warning_errno(errno, "Failed to unmount transient %s file: %m.\nWe keep that mount until next reboot.", etc_machine_id);
+
+ return 0;
+}
diff --git a/src/core/machine-id-setup.h b/src/core/machine-id-setup.h
new file mode 100644
index 0000000..cce5819
--- /dev/null
+++ b/src/core/machine-id-setup.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+int machine_id_commit(const char *root);
+int machine_id_setup(const char *root, bool force_transient, sd_id128_t requested, sd_id128_t *ret);
diff --git a/src/core/macros.systemd.in b/src/core/macros.systemd.in
new file mode 100644
index 0000000..1c40328
--- /dev/null
+++ b/src/core/macros.systemd.in
@@ -0,0 +1,165 @@
+# -*- Mode: rpm-spec; indent-tabs-mode: nil -*- */
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+
+# RPM macros for packages installing systemd unit files
+
+%_systemd_util_dir @rootlibexecdir@
+%_unitdir @systemunitdir@
+%_userunitdir @userunitdir@
+%_presetdir @systempresetdir@
+%_userpresetdir @userpresetdir@
+%_udevhwdbdir @udevhwdbdir@
+%_udevrulesdir @udevrulesdir@
+%_journalcatalogdir @catalogdir@
+%_binfmtdir @binfmtdir@
+%_sysctldir @sysctldir@
+%_sysusersdir @sysusersdir@
+%_tmpfilesdir @tmpfilesdir@
+%_environmentdir @environmentdir@
+%_modulesloaddir @modulesloaddir@
+%_modprobedir @modprobedir@
+%_systemdgeneratordir @systemgeneratordir@
+%_systemdusergeneratordir @usergeneratordir@
+%_systemd_system_env_generator_dir @systemenvgeneratordir@
+%_systemd_user_env_generator_dir @userenvgeneratordir@
+
+# Because we had one release with a typo...
+# This is temporary (Remove after systemd 240 is released)
+%_environmnentdir %{warn:Use %%_environmentdir instead}%_environmentdir
+
+%systemd_requires \
+Requires(post): systemd \
+Requires(preun): systemd \
+Requires(postun): systemd \
+%{nil}
+
+%systemd_ordering \
+OrderWithRequires(post): systemd \
+OrderWithRequires(preun): systemd \
+OrderWithRequires(postun): systemd \
+%{nil}
+
+%__systemd_someargs_0(:) %{error:The %%%1 macro requires some arguments}
+%__systemd_twoargs_2() %{nil}
+
+%systemd_post() \
+%{expand:%%{?__systemd_someargs_%#:%%__systemd_someargs_%# systemd_post}} \
+if [ $1 -eq 1 ] && [ -x @bindir@/systemctl ]; then \
+ # Initial installation \
+ @bindir@/systemctl --no-reload preset %{?*} || : \
+fi \
+%{nil}
+
+%systemd_user_post() %{expand:%systemd_post \\--global %%{?*}}
+
+%systemd_preun() \
+%{expand:%%{?__systemd_someargs_%#:%%__systemd_someargs_%# systemd_preun}} \
+if [ $1 -eq 0 ] && [ -x @bindir@/systemctl ]; then \
+ # Package removal, not upgrade \
+ @bindir@/systemctl --no-reload disable --now %{?*} || : \
+fi \
+%{nil}
+
+%systemd_user_preun() \
+%{expand:%%{?__systemd_someargs_%#:%%__systemd_someargs_%# systemd_user_preun}} \
+if [ $1 -eq 0 ] && [ -x @bindir@/systemctl ]; then \
+ # Package removal, not upgrade \
+ @bindir@/systemctl --global disable %{?*} || : \
+fi \
+%{nil}
+
+%systemd_postun() \
+%{expand:%%{?__systemd_someargs_%#:%%__systemd_someargs_%# systemd_postun}} \
+%{nil}
+
+%systemd_user_postun() \
+%{expand:%%{?__systemd_someargs_%#:%%__systemd_someargs_%# systemd_user_postun}} \
+%{nil}
+
+%systemd_postun_with_restart() \
+%{expand:%%{?__systemd_someargs_%#:%%__systemd_someargs_%# systemd_postun_with_restart}} \
+if [ $1 -ge 1 ] && [ -x @bindir@/systemctl ]; then \
+ # Package upgrade, not uninstall \
+ @bindir@/systemctl try-restart %{?*} || : \
+fi \
+%{nil}
+
+%systemd_user_postun_with_restart() \
+%{expand:%%{?__systemd_someargs_%#:%%__systemd_someargs_%# systemd_postun_with_restart}} \
+%{nil}
+
+%udev_hwdb_update() %{nil}
+
+%udev_rules_update() %{nil}
+
+%journal_catalog_update() %{nil}
+
+# Deprecated. Use %tmpfiles_create_package instead
+%tmpfiles_create() \
+%{expand:%%{?__systemd_someargs_%#:%%__systemd_someargs_%# tmpfiles_create}} \
+[ -x @bindir@/systemd-tmpfiles ] && @bindir@/systemd-tmpfiles --create %{?*} || : \
+%{nil}
+
+# Deprecated. Use %sysusers_create_package instead
+%sysusers_create() \
+%{expand:%%{?__systemd_someargs_%#:%%__systemd_someargs_%# sysusers_create}} \
+[ -x @bindir@/systemd-sysusers ] && @bindir@/systemd-sysusers %{?*} || : \
+%{nil}
+
+%sysusers_create_inline() \
+[ -x @bindir@/systemd-sysusers ] && @bindir@/systemd-sysusers - <<SYSTEMD_INLINE_EOF || : \
+%{?*} \
+SYSTEMD_INLINE_EOF\
+%{nil}
+
+# This should be used by package installation scripts which require users or
+# groups to be present before the files installed by the package are present on
+# disk (for example because some files are owned by those users or groups).
+#
+# Example:
+# Source1: %{name}-sysusers.conf
+# ...
+# %install
+# install -D %SOURCE1 %{buildroot}%{_sysusersdir}/%{name}.conf
+# %pre
+# %sysusers_create_package %{name} %SOURCE1
+# %files
+# %{_sysusersdir}/%{name}.conf
+%sysusers_create_package() \
+%{expand:%%{?!__systemd_twoargs_%#:%%{error:The %%%%sysusers_create_package macro requires two arguments}}} \
+systemd-sysusers --replace=%_sysusersdir/%1.conf - <<SYSTEMD_INLINE_EOF || : \
+%(cat %2) \
+SYSTEMD_INLINE_EOF\
+%{nil}
+
+# This may be used by package installation scripts to create files according to
+# their tmpfiles configuration from a package installation script, even before
+# the files of that package are installed on disk.
+#
+# Example:
+# Source1: %{name}-tmpfiles.conf
+# ...
+# %install
+# install -D %SOURCE1 %{buildroot}%{_tmpfilesdir}/%{name}.conf
+# %pre
+# %tmpfiles_create_package %{name} %SOURCE1
+# %files
+# %{_tmpfilesdir}/%{name}.conf
+%tmpfiles_create_package() \
+%{expand:%%{?!__systemd_twoargs_%#:%%{error:The %%%%tmpfiles_create_package macro requires two arguments}}} \
+systemd-tmpfiles --replace=%_tmpfilesdir/%1.conf --create - <<SYSTEMD_INLINE_EOF || : \
+%(cat %2) \
+SYSTEMD_INLINE_EOF\
+%{nil}
+
+%sysctl_apply() \
+%{expand:%%{?__systemd_someargs_%#:%%__systemd_someargs_%# sysctl_apply}} \
+[ -x @rootlibexecdir@/systemd-sysctl ] && @rootlibexecdir@/systemd-sysctl %{?*} || : \
+%{nil}
+
+%binfmt_apply() \
+%{expand:%%{?__systemd_someargs_%#:%%__systemd_someargs_%# binfmt_apply}} \
+[ -x @rootlibexecdir@/systemd-binfmt ] && @rootlibexecdir@/systemd-binfmt %{?*} || : \
+%{nil}
diff --git a/src/core/main.c b/src/core/main.c
new file mode 100644
index 0000000..a280b75
--- /dev/null
+++ b/src/core/main.c
@@ -0,0 +1,2935 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/reboot.h>
+#include <unistd.h>
+#if HAVE_SECCOMP
+#include <seccomp.h>
+#endif
+#if HAVE_VALGRIND_VALGRIND_H
+#include <valgrind/valgrind.h>
+#endif
+
+#include "sd-bus.h"
+#include "sd-daemon.h"
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "apparmor-setup.h"
+#include "architecture.h"
+#include "build.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "capability-util.h"
+#include "cgroup-util.h"
+#include "clock-util.h"
+#include "conf-parser.h"
+#include "cpu-set-util.h"
+#include "dbus-manager.h"
+#include "dbus.h"
+#include "def.h"
+#include "dev-setup.h"
+#include "efi-random.h"
+#include "efivars.h"
+#include "emergency-action.h"
+#include "env-util.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "fdset.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hexdecoct.h"
+#include "hostname-setup.h"
+#include "ima-setup.h"
+#include "killall.h"
+#include "kmod-setup.h"
+#include "limits-util.h"
+#include "load-fragment.h"
+#include "log.h"
+#include "loopback-setup.h"
+#include "machine-id-setup.h"
+#include "manager.h"
+#include "mkdir.h"
+#include "mount-setup.h"
+#include "os-util.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "raw-clone.h"
+#include "rlimit-util.h"
+#if HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
+#include "selinux-setup.h"
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "smack-setup.h"
+#include "special.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "switch-root.h"
+#include "sysctl-util.h"
+#include "terminal-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+#include "util.h"
+#include "virt.h"
+#include "watchdog.h"
+
+#if HAS_FEATURE_ADDRESS_SANITIZER
+#include <sanitizer/lsan_interface.h>
+#endif
+
+#define DEFAULT_TASKS_MAX ((TasksMax) { 15U, 100U }) /* 15% */
+
+static enum {
+ ACTION_RUN,
+ ACTION_HELP,
+ ACTION_VERSION,
+ ACTION_TEST,
+ ACTION_DUMP_CONFIGURATION_ITEMS,
+ ACTION_DUMP_BUS_PROPERTIES,
+ ACTION_BUS_INTROSPECT,
+} arg_action = ACTION_RUN;
+
+static const char *arg_bus_introspect = NULL;
+
+/* Those variables are initialized to 0 automatically, so we avoid uninitialized memory access. Real
+ * defaults are assigned in reset_arguments() below. */
+static char *arg_default_unit;
+static bool arg_system;
+static bool arg_dump_core;
+static int arg_crash_chvt;
+static bool arg_crash_shell;
+static bool arg_crash_reboot;
+static char *arg_confirm_spawn;
+static ShowStatus arg_show_status;
+static StatusUnitFormat arg_status_unit_format;
+static bool arg_switched_root;
+static PagerFlags arg_pager_flags;
+static bool arg_service_watchdogs;
+static ExecOutput arg_default_std_output;
+static ExecOutput arg_default_std_error;
+static usec_t arg_default_restart_usec;
+static usec_t arg_default_timeout_start_usec;
+static usec_t arg_default_timeout_stop_usec;
+static usec_t arg_default_timeout_abort_usec;
+static bool arg_default_timeout_abort_set;
+static usec_t arg_default_start_limit_interval;
+static unsigned arg_default_start_limit_burst;
+static usec_t arg_runtime_watchdog;
+static usec_t arg_reboot_watchdog;
+static usec_t arg_kexec_watchdog;
+static char *arg_early_core_pattern;
+static char *arg_watchdog_device;
+static char **arg_default_environment;
+static struct rlimit *arg_default_rlimit[_RLIMIT_MAX];
+static uint64_t arg_capability_bounding_set;
+static bool arg_no_new_privs;
+static nsec_t arg_timer_slack_nsec;
+static usec_t arg_default_timer_accuracy_usec;
+static Set* arg_syscall_archs;
+static FILE* arg_serialization;
+static int arg_default_cpu_accounting;
+static bool arg_default_io_accounting;
+static bool arg_default_ip_accounting;
+static bool arg_default_blockio_accounting;
+static bool arg_default_memory_accounting;
+static bool arg_default_tasks_accounting;
+static TasksMax arg_default_tasks_max;
+static sd_id128_t arg_machine_id;
+static EmergencyAction arg_cad_burst_action;
+static OOMPolicy arg_default_oom_policy;
+static CPUSet arg_cpu_affinity;
+static NUMAPolicy arg_numa_policy;
+static usec_t arg_clock_usec;
+static void *arg_random_seed;
+static size_t arg_random_seed_size;
+
+/* A copy of the original environment block */
+static char **saved_env = NULL;
+
+static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
+ const struct rlimit *saved_rlimit_memlock);
+
+_noreturn_ static void freeze_or_exit_or_reboot(void) {
+
+ /* If we are running in a container, let's prefer exiting, after all we can propagate an exit code to
+ * the container manager, and thus inform it that something went wrong. */
+ if (detect_container() > 0) {
+ log_emergency("Exiting PID 1...");
+ _exit(EXIT_EXCEPTION);
+ }
+
+ if (arg_crash_reboot) {
+ log_notice("Rebooting in 10s...");
+ (void) sleep(10);
+
+ log_notice("Rebooting now...");
+ (void) reboot(RB_AUTOBOOT);
+ log_emergency_errno(errno, "Failed to reboot: %m");
+ }
+
+ log_emergency("Freezing execution.");
+ freeze();
+}
+
+_noreturn_ static void crash(int sig) {
+ struct sigaction sa;
+ pid_t pid;
+
+ if (getpid_cached() != 1)
+ /* Pass this on immediately, if this is not PID 1 */
+ (void) raise(sig);
+ else if (!arg_dump_core)
+ log_emergency("Caught <%s>, not dumping core.", signal_to_string(sig));
+ else {
+ sa = (struct sigaction) {
+ .sa_handler = nop_signal_handler,
+ .sa_flags = SA_NOCLDSTOP|SA_RESTART,
+ };
+
+ /* We want to wait for the core process, hence let's enable SIGCHLD */
+ (void) sigaction(SIGCHLD, &sa, NULL);
+
+ pid = raw_clone(SIGCHLD);
+ if (pid < 0)
+ log_emergency_errno(errno, "Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
+ else if (pid == 0) {
+ /* Enable default signal handler for core dump */
+
+ sa = (struct sigaction) {
+ .sa_handler = SIG_DFL,
+ };
+ (void) sigaction(sig, &sa, NULL);
+
+ /* Don't limit the coredump size */
+ (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY));
+
+ /* Just to be sure... */
+ (void) chdir("/");
+
+ /* Raise the signal again */
+ pid = raw_getpid();
+ (void) kill(pid, sig); /* raise() would kill the parent */
+
+ assert_not_reached("We shouldn't be here...");
+ _exit(EXIT_EXCEPTION);
+ } else {
+ siginfo_t status;
+ int r;
+
+ /* Order things nicely. */
+ r = wait_for_terminate(pid, &status);
+ if (r < 0)
+ log_emergency_errno(r, "Caught <%s>, waitpid() failed: %m", signal_to_string(sig));
+ else if (status.si_code != CLD_DUMPED) {
+ const char *s = status.si_code == CLD_EXITED
+ ? exit_status_to_string(status.si_status, EXIT_STATUS_LIBC)
+ : signal_to_string(status.si_status);
+
+ log_emergency("Caught <%s>, core dump failed (child "PID_FMT", code=%s, status=%i/%s).",
+ signal_to_string(sig),
+ pid,
+ sigchld_code_to_string(status.si_code),
+ status.si_status, strna(s));
+ } else
+ log_emergency("Caught <%s>, dumped core as pid "PID_FMT".",
+ signal_to_string(sig), pid);
+ }
+ }
+
+ if (arg_crash_chvt >= 0)
+ (void) chvt(arg_crash_chvt);
+
+ sa = (struct sigaction) {
+ .sa_handler = SIG_IGN,
+ .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
+ };
+
+ /* Let the kernel reap children for us */
+ (void) sigaction(SIGCHLD, &sa, NULL);
+
+ if (arg_crash_shell) {
+ log_notice("Executing crash shell in 10s...");
+ (void) sleep(10);
+
+ pid = raw_clone(SIGCHLD);
+ if (pid < 0)
+ log_emergency_errno(errno, "Failed to fork off crash shell: %m");
+ else if (pid == 0) {
+ (void) setsid();
+ (void) make_console_stdio();
+ (void) rlimit_nofile_safe();
+ (void) execle("/bin/sh", "/bin/sh", NULL, environ);
+
+ log_emergency_errno(errno, "execle() failed: %m");
+ _exit(EXIT_EXCEPTION);
+ } else {
+ log_info("Spawned crash shell as PID "PID_FMT".", pid);
+ (void) wait_for_terminate(pid, NULL);
+ }
+ }
+
+ freeze_or_exit_or_reboot();
+}
+
+static void install_crash_handler(void) {
+ static const struct sigaction sa = {
+ .sa_handler = crash,
+ .sa_flags = SA_NODEFER, /* So that we can raise the signal again from the signal handler */
+ };
+ int r;
+
+ /* We ignore the return value here, since, we don't mind if we
+ * cannot set up a crash handler */
+ r = sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
+ if (r < 0)
+ log_debug_errno(r, "I had trouble setting up the crash handler, ignoring: %m");
+}
+
+static int console_setup(void) {
+ _cleanup_close_ int tty_fd = -1;
+ int r;
+
+ tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
+ if (tty_fd < 0)
+ return log_error_errno(tty_fd, "Failed to open /dev/console: %m");
+
+ /* We don't want to force text mode. plymouth may be showing
+ * pictures already from initrd. */
+ r = reset_terminal_fd(tty_fd, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to reset /dev/console: %m");
+
+ return 0;
+}
+
+static int set_machine_id(const char *m) {
+ sd_id128_t t;
+ assert(m);
+
+ if (sd_id128_from_string(m, &t) < 0)
+ return -EINVAL;
+
+ if (sd_id128_is_null(t))
+ return -EINVAL;
+
+ arg_machine_id = t;
+ return 0;
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ int r;
+
+ assert(key);
+
+ if (STR_IN_SET(key, "systemd.unit", "rd.systemd.unit")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (!unit_name_is_valid(value, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
+ log_warning("Unit name specified on %s= is not valid, ignoring: %s", key, value);
+ else if (in_initrd() == !!startswith(key, "rd."))
+ return free_and_strdup_warn(&arg_default_unit, value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.dump_core")) {
+
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse dump core switch %s, ignoring: %m", value);
+ else
+ arg_dump_core = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.early_core_pattern")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (path_is_absolute(value))
+ (void) parse_path_argument_and_warn(value, false, &arg_early_core_pattern);
+ else
+ log_warning("Specified core pattern '%s' is not an absolute path, ignoring.", value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) {
+
+ if (!value)
+ arg_crash_chvt = 0; /* turn on */
+ else {
+ r = parse_crash_chvt(value, &arg_crash_chvt);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse crash chvt switch %s, ignoring: %m", value);
+ }
+
+ } else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) {
+
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse crash shell switch %s, ignoring: %m", value);
+ else
+ arg_crash_shell = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.crash_reboot")) {
+
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse crash reboot switch %s, ignoring: %m", value);
+ else
+ arg_crash_reboot = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.confirm_spawn")) {
+ char *s;
+
+ r = parse_confirm_spawn(value, &s);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse confirm_spawn switch %s, ignoring: %m", value);
+ else
+ free_and_replace(arg_confirm_spawn, s);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.service_watchdogs")) {
+
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse service watchdog switch %s, ignoring: %m", value);
+ else
+ arg_service_watchdogs = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.show_status")) {
+
+ if (value) {
+ r = parse_show_status(value, &arg_show_status);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse show status switch %s, ignoring: %m", value);
+ } else
+ arg_show_status = SHOW_STATUS_YES;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.status_unit_format")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = status_unit_format_from_string(value);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse %s=%s, ignoring: %m", key, value);
+ else
+ arg_status_unit_format = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.default_standard_output")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = exec_output_from_string(value);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse default standard output switch %s, ignoring: %m", value);
+ else
+ arg_default_std_output = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.default_standard_error")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = exec_output_from_string(value);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse default standard error switch %s, ignoring: %m", value);
+ else
+ arg_default_std_error = r;
+
+ } else if (streq(key, "systemd.setenv")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (env_assignment_is_valid(value)) {
+ char **env;
+
+ env = strv_env_set(arg_default_environment, value);
+ if (!env)
+ return log_oom();
+
+ arg_default_environment = env;
+ } else
+ log_warning("Environment variable name '%s' is not valid. Ignoring.", value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.machine_id")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = set_machine_id(value);
+ if (r < 0)
+ log_warning_errno(r, "MachineID '%s' is not valid, ignoring: %m", value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = parse_sec(value, &arg_default_timeout_start_usec);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse default start timeout '%s', ignoring: %m", value);
+
+ if (arg_default_timeout_start_usec <= 0)
+ arg_default_timeout_start_usec = USEC_INFINITY;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.cpu_affinity")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = parse_cpu_set(value, &arg_cpu_affinity);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse CPU affinity mask '%s', ignoring: %m", value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.watchdog_device")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ (void) parse_path_argument_and_warn(value, false, &arg_watchdog_device);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.clock_usec")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = safe_atou64(value, &arg_clock_usec);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse systemd.clock_usec= argument, ignoring: %s", value);
+
+ } else if (proc_cmdline_key_streq(key, "systemd.random_seed")) {
+ void *p;
+ size_t sz;
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = unbase64mem(value, (size_t) -1, &p, &sz);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse systemd.random_seed= argument, ignoring: %s", value);
+
+ free(arg_random_seed);
+ arg_random_seed = sz > 0 ? p : mfree(p);
+ arg_random_seed_size = sz;
+
+ } else if (streq(key, "quiet") && !value) {
+
+ if (arg_show_status == _SHOW_STATUS_INVALID)
+ arg_show_status = SHOW_STATUS_ERROR;
+
+ } else if (streq(key, "debug") && !value) {
+
+ /* Note that log_parse_environment() handles 'debug'
+ * too, and sets the log level to LOG_DEBUG. */
+
+ if (detect_container() > 0)
+ log_set_target(LOG_TARGET_CONSOLE);
+
+ } else if (!value) {
+ const char *target;
+
+ /* Compatible with SysV, but supported independently even if SysV compatibility is disabled. */
+ target = runlevel_to_target(key);
+ if (target)
+ return free_and_strdup_warn(&arg_default_unit, target);
+ }
+
+ return 0;
+}
+
+#define DEFINE_SETTER(name, func, descr) \
+ static int name(const char *unit, \
+ const char *filename, \
+ unsigned line, \
+ const char *section, \
+ unsigned section_line, \
+ const char *lvalue, \
+ int ltype, \
+ const char *rvalue, \
+ void *data, \
+ void *userdata) { \
+ \
+ int r; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ \
+ r = func(rvalue); \
+ if (r < 0) \
+ log_syntax(unit, LOG_ERR, filename, line, r, \
+ "Invalid " descr "'%s': %m", \
+ rvalue); \
+ \
+ return 0; \
+ }
+
+DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level");
+DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target");
+DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color");
+DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location");
+DEFINE_SETTER(config_parse_time, log_show_time_from_string, "time");
+
+static int config_parse_default_timeout_abort(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ int r;
+
+ r = config_parse_timeout_abort(unit, filename, line, section, section_line, lvalue, ltype, rvalue,
+ &arg_default_timeout_abort_usec, userdata);
+ if (r >= 0)
+ arg_default_timeout_abort_set = r;
+ return 0;
+}
+
+static int parse_config_file(void) {
+ const ConfigTableItem items[] = {
+ { "Manager", "LogLevel", config_parse_level2, 0, NULL },
+ { "Manager", "LogTarget", config_parse_target, 0, NULL },
+ { "Manager", "LogColor", config_parse_color, 0, NULL },
+ { "Manager", "LogLocation", config_parse_location, 0, NULL },
+ { "Manager", "LogTime", config_parse_time, 0, NULL },
+ { "Manager", "DumpCore", config_parse_bool, 0, &arg_dump_core },
+ { "Manager", "CrashChVT", /* legacy */ config_parse_crash_chvt, 0, &arg_crash_chvt },
+ { "Manager", "CrashChangeVT", config_parse_crash_chvt, 0, &arg_crash_chvt },
+ { "Manager", "CrashShell", config_parse_bool, 0, &arg_crash_shell },
+ { "Manager", "CrashReboot", config_parse_bool, 0, &arg_crash_reboot },
+ { "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status },
+ { "Manager", "StatusUnitFormat", config_parse_status_unit_format, 0, &arg_status_unit_format },
+ { "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, &arg_cpu_affinity },
+ { "Manager", "NUMAPolicy", config_parse_numa_policy, 0, &arg_numa_policy.type },
+ { "Manager", "NUMAMask", config_parse_numa_mask, 0, &arg_numa_policy },
+ { "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL },
+ { "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog },
+ { "Manager", "RebootWatchdogSec", config_parse_sec, 0, &arg_reboot_watchdog },
+ { "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_reboot_watchdog }, /* obsolete alias */
+ { "Manager", "KExecWatchdogSec", config_parse_sec, 0, &arg_kexec_watchdog },
+ { "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
+ { "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
+ { "Manager", "NoNewPrivileges", config_parse_bool, 0, &arg_no_new_privs },
+#if HAVE_SECCOMP
+ { "Manager", "SystemCallArchitectures", config_parse_syscall_archs, 0, &arg_syscall_archs },
+#endif
+ { "Manager", "TimerSlackNSec", config_parse_nsec, 0, &arg_timer_slack_nsec },
+ { "Manager", "DefaultTimerAccuracySec", config_parse_sec, 0, &arg_default_timer_accuracy_usec },
+ { "Manager", "DefaultStandardOutput", config_parse_output_restricted, 0, &arg_default_std_output },
+ { "Manager", "DefaultStandardError", config_parse_output_restricted, 0, &arg_default_std_error },
+ { "Manager", "DefaultTimeoutStartSec", config_parse_sec, 0, &arg_default_timeout_start_usec },
+ { "Manager", "DefaultTimeoutStopSec", config_parse_sec, 0, &arg_default_timeout_stop_usec },
+ { "Manager", "DefaultTimeoutAbortSec", config_parse_default_timeout_abort, 0, NULL },
+ { "Manager", "DefaultRestartSec", config_parse_sec, 0, &arg_default_restart_usec },
+ { "Manager", "DefaultStartLimitInterval", config_parse_sec, 0, &arg_default_start_limit_interval }, /* obsolete alias */
+ { "Manager", "DefaultStartLimitIntervalSec", config_parse_sec, 0, &arg_default_start_limit_interval },
+ { "Manager", "DefaultStartLimitBurst", config_parse_unsigned, 0, &arg_default_start_limit_burst },
+ { "Manager", "DefaultEnvironment", config_parse_environ, 0, &arg_default_environment },
+ { "Manager", "DefaultLimitCPU", config_parse_rlimit, RLIMIT_CPU, arg_default_rlimit },
+ { "Manager", "DefaultLimitFSIZE", config_parse_rlimit, RLIMIT_FSIZE, arg_default_rlimit },
+ { "Manager", "DefaultLimitDATA", config_parse_rlimit, RLIMIT_DATA, arg_default_rlimit },
+ { "Manager", "DefaultLimitSTACK", config_parse_rlimit, RLIMIT_STACK, arg_default_rlimit },
+ { "Manager", "DefaultLimitCORE", config_parse_rlimit, RLIMIT_CORE, arg_default_rlimit },
+ { "Manager", "DefaultLimitRSS", config_parse_rlimit, RLIMIT_RSS, arg_default_rlimit },
+ { "Manager", "DefaultLimitNOFILE", config_parse_rlimit, RLIMIT_NOFILE, arg_default_rlimit },
+ { "Manager", "DefaultLimitAS", config_parse_rlimit, RLIMIT_AS, arg_default_rlimit },
+ { "Manager", "DefaultLimitNPROC", config_parse_rlimit, RLIMIT_NPROC, arg_default_rlimit },
+ { "Manager", "DefaultLimitMEMLOCK", config_parse_rlimit, RLIMIT_MEMLOCK, arg_default_rlimit },
+ { "Manager", "DefaultLimitLOCKS", config_parse_rlimit, RLIMIT_LOCKS, arg_default_rlimit },
+ { "Manager", "DefaultLimitSIGPENDING", config_parse_rlimit, RLIMIT_SIGPENDING, arg_default_rlimit },
+ { "Manager", "DefaultLimitMSGQUEUE", config_parse_rlimit, RLIMIT_MSGQUEUE, arg_default_rlimit },
+ { "Manager", "DefaultLimitNICE", config_parse_rlimit, RLIMIT_NICE, arg_default_rlimit },
+ { "Manager", "DefaultLimitRTPRIO", config_parse_rlimit, RLIMIT_RTPRIO, arg_default_rlimit },
+ { "Manager", "DefaultLimitRTTIME", config_parse_rlimit, RLIMIT_RTTIME, arg_default_rlimit },
+ { "Manager", "DefaultCPUAccounting", config_parse_tristate, 0, &arg_default_cpu_accounting },
+ { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
+ { "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting },
+ { "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
+ { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
+ { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
+ { "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max },
+ { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action },
+ { "Manager", "DefaultOOMPolicy", config_parse_oom_policy, 0, &arg_default_oom_policy },
+ {}
+ };
+
+ const char *fn, *conf_dirs_nulstr;
+
+ fn = arg_system ?
+ PKGSYSCONFDIR "/system.conf" :
+ PKGSYSCONFDIR "/user.conf";
+
+ conf_dirs_nulstr = arg_system ?
+ CONF_PATHS_NULSTR("systemd/system.conf.d") :
+ CONF_PATHS_NULSTR("systemd/user.conf.d");
+
+ (void) config_parse_many_nulstr(
+ fn, conf_dirs_nulstr,
+ "Manager\0",
+ config_item_table_lookup, items,
+ CONFIG_PARSE_WARN,
+ NULL,
+ NULL);
+
+ /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY
+ * like everywhere else. */
+ if (arg_default_timeout_start_usec <= 0)
+ arg_default_timeout_start_usec = USEC_INFINITY;
+ if (arg_default_timeout_stop_usec <= 0)
+ arg_default_timeout_stop_usec = USEC_INFINITY;
+
+ return 0;
+}
+
+static void set_manager_defaults(Manager *m) {
+
+ assert(m);
+
+ /* Propagates the various default unit property settings into the manager object, i.e. properties that do not
+ * affect the manager itself, but are just what newly allocated units will have set if they haven't set
+ * anything else. (Also see set_manager_settings() for the settings that affect the manager's own behaviour) */
+
+ m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec;
+ m->default_std_output = arg_default_std_output;
+ m->default_std_error = arg_default_std_error;
+ m->default_timeout_start_usec = arg_default_timeout_start_usec;
+ m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
+ m->default_timeout_abort_usec = arg_default_timeout_abort_usec;
+ m->default_timeout_abort_set = arg_default_timeout_abort_set;
+ m->default_restart_usec = arg_default_restart_usec;
+ m->default_start_limit_interval = arg_default_start_limit_interval;
+ m->default_start_limit_burst = arg_default_start_limit_burst;
+
+ /* On 4.15+ with unified hierarchy, CPU accounting is essentially free as it doesn't require the CPU
+ * controller to be enabled, so the default is to enable it unless we got told otherwise. */
+ if (arg_default_cpu_accounting >= 0)
+ m->default_cpu_accounting = arg_default_cpu_accounting;
+ else
+ m->default_cpu_accounting = cpu_accounting_is_cheap();
+
+ m->default_io_accounting = arg_default_io_accounting;
+ m->default_ip_accounting = arg_default_ip_accounting;
+ m->default_blockio_accounting = arg_default_blockio_accounting;
+ m->default_memory_accounting = arg_default_memory_accounting;
+ m->default_tasks_accounting = arg_default_tasks_accounting;
+ m->default_tasks_max = arg_default_tasks_max;
+ m->default_oom_policy = arg_default_oom_policy;
+
+ (void) manager_set_default_rlimits(m, arg_default_rlimit);
+
+ (void) manager_default_environment(m);
+ (void) manager_transient_environment_add(m, arg_default_environment);
+}
+
+static void set_manager_settings(Manager *m) {
+
+ assert(m);
+
+ /* Propagates the various manager settings into the manager object, i.e. properties that
+ * effect the manager itself (as opposed to just being inherited into newly allocated
+ * units, see set_manager_defaults() above). */
+
+ m->confirm_spawn = arg_confirm_spawn;
+ m->service_watchdogs = arg_service_watchdogs;
+ m->cad_burst_action = arg_cad_burst_action;
+
+ manager_set_watchdog(m, WATCHDOG_RUNTIME, arg_runtime_watchdog);
+ manager_set_watchdog(m, WATCHDOG_REBOOT, arg_reboot_watchdog);
+ manager_set_watchdog(m, WATCHDOG_KEXEC, arg_kexec_watchdog);
+
+ manager_set_show_status(m, arg_show_status, "commandline");
+ m->status_unit_format = arg_status_unit_format;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_LOG_LEVEL = 0x100,
+ ARG_LOG_TARGET,
+ ARG_LOG_COLOR,
+ ARG_LOG_LOCATION,
+ ARG_LOG_TIME,
+ ARG_UNIT,
+ ARG_SYSTEM,
+ ARG_USER,
+ ARG_TEST,
+ ARG_NO_PAGER,
+ ARG_VERSION,
+ ARG_DUMP_CONFIGURATION_ITEMS,
+ ARG_DUMP_BUS_PROPERTIES,
+ ARG_BUS_INTROSPECT,
+ ARG_DUMP_CORE,
+ ARG_CRASH_CHVT,
+ ARG_CRASH_SHELL,
+ ARG_CRASH_REBOOT,
+ ARG_CONFIRM_SPAWN,
+ ARG_SHOW_STATUS,
+ ARG_DESERIALIZE,
+ ARG_SWITCHED_ROOT,
+ ARG_DEFAULT_STD_OUTPUT,
+ ARG_DEFAULT_STD_ERROR,
+ ARG_MACHINE_ID,
+ ARG_SERVICE_WATCHDOGS,
+ };
+
+ static const struct option options[] = {
+ { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
+ { "log-target", required_argument, NULL, ARG_LOG_TARGET },
+ { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
+ { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
+ { "log-time", optional_argument, NULL, ARG_LOG_TIME },
+ { "unit", required_argument, NULL, ARG_UNIT },
+ { "system", no_argument, NULL, ARG_SYSTEM },
+ { "user", no_argument, NULL, ARG_USER },
+ { "test", no_argument, NULL, ARG_TEST },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "dump-configuration-items", no_argument, NULL, ARG_DUMP_CONFIGURATION_ITEMS },
+ { "dump-bus-properties", no_argument, NULL, ARG_DUMP_BUS_PROPERTIES },
+ { "bus-introspect", required_argument, NULL, ARG_BUS_INTROSPECT },
+ { "dump-core", optional_argument, NULL, ARG_DUMP_CORE },
+ { "crash-chvt", required_argument, NULL, ARG_CRASH_CHVT },
+ { "crash-shell", optional_argument, NULL, ARG_CRASH_SHELL },
+ { "crash-reboot", optional_argument, NULL, ARG_CRASH_REBOOT },
+ { "confirm-spawn", optional_argument, NULL, ARG_CONFIRM_SPAWN },
+ { "show-status", optional_argument, NULL, ARG_SHOW_STATUS },
+ { "deserialize", required_argument, NULL, ARG_DESERIALIZE },
+ { "switched-root", no_argument, NULL, ARG_SWITCHED_ROOT },
+ { "default-standard-output", required_argument, NULL, ARG_DEFAULT_STD_OUTPUT, },
+ { "default-standard-error", required_argument, NULL, ARG_DEFAULT_STD_ERROR, },
+ { "machine-id", required_argument, NULL, ARG_MACHINE_ID },
+ { "service-watchdogs", required_argument, NULL, ARG_SERVICE_WATCHDOGS },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 1);
+ assert(argv);
+
+ if (getpid_cached() == 1)
+ opterr = 0;
+
+ while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case ARG_LOG_LEVEL:
+ r = log_set_max_level_from_string(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse log level \"%s\": %m", optarg);
+
+ break;
+
+ case ARG_LOG_TARGET:
+ r = log_set_target_from_string(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse log target \"%s\": %m", optarg);
+
+ break;
+
+ case ARG_LOG_COLOR:
+
+ if (optarg) {
+ r = log_show_color_from_string(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse log color setting \"%s\": %m",
+ optarg);
+ } else
+ log_show_color(true);
+
+ break;
+
+ case ARG_LOG_LOCATION:
+ if (optarg) {
+ r = log_show_location_from_string(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse log location setting \"%s\": %m",
+ optarg);
+ } else
+ log_show_location(true);
+
+ break;
+
+ case ARG_LOG_TIME:
+
+ if (optarg) {
+ r = log_show_time_from_string(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse log time setting \"%s\": %m",
+ optarg);
+ } else
+ log_show_time(true);
+
+ break;
+
+ case ARG_DEFAULT_STD_OUTPUT:
+ r = exec_output_from_string(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse default standard output setting \"%s\": %m",
+ optarg);
+ arg_default_std_output = r;
+ break;
+
+ case ARG_DEFAULT_STD_ERROR:
+ r = exec_output_from_string(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse default standard error output setting \"%s\": %m",
+ optarg);
+ arg_default_std_error = r;
+ break;
+
+ case ARG_UNIT:
+ r = free_and_strdup(&arg_default_unit, optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set default unit \"%s\": %m", optarg);
+
+ break;
+
+ case ARG_SYSTEM:
+ arg_system = true;
+ break;
+
+ case ARG_USER:
+ arg_system = false;
+ break;
+
+ case ARG_TEST:
+ arg_action = ACTION_TEST;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_VERSION:
+ arg_action = ACTION_VERSION;
+ break;
+
+ case ARG_DUMP_CONFIGURATION_ITEMS:
+ arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
+ break;
+
+ case ARG_DUMP_BUS_PROPERTIES:
+ arg_action = ACTION_DUMP_BUS_PROPERTIES;
+ break;
+
+ case ARG_BUS_INTROSPECT:
+ arg_bus_introspect = optarg;
+ arg_action = ACTION_BUS_INTROSPECT;
+ break;
+
+ case ARG_DUMP_CORE:
+ if (!optarg)
+ arg_dump_core = true;
+ else {
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse dump core boolean: \"%s\": %m",
+ optarg);
+ arg_dump_core = r;
+ }
+ break;
+
+ case ARG_CRASH_CHVT:
+ r = parse_crash_chvt(optarg, &arg_crash_chvt);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse crash virtual terminal index: \"%s\": %m",
+ optarg);
+ break;
+
+ case ARG_CRASH_SHELL:
+ if (!optarg)
+ arg_crash_shell = true;
+ else {
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse crash shell boolean: \"%s\": %m",
+ optarg);
+ arg_crash_shell = r;
+ }
+ break;
+
+ case ARG_CRASH_REBOOT:
+ if (!optarg)
+ arg_crash_reboot = true;
+ else {
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse crash shell boolean: \"%s\": %m",
+ optarg);
+ arg_crash_reboot = r;
+ }
+ break;
+
+ case ARG_CONFIRM_SPAWN:
+ arg_confirm_spawn = mfree(arg_confirm_spawn);
+
+ r = parse_confirm_spawn(optarg, &arg_confirm_spawn);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse confirm spawn option: \"%s\": %m",
+ optarg);
+ break;
+
+ case ARG_SERVICE_WATCHDOGS:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse service watchdogs boolean: \"%s\": %m",
+ optarg);
+ arg_service_watchdogs = r;
+ break;
+
+ case ARG_SHOW_STATUS:
+ if (optarg) {
+ r = parse_show_status(optarg, &arg_show_status);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse show status boolean: \"%s\": %m",
+ optarg);
+ } else
+ arg_show_status = SHOW_STATUS_YES;
+ break;
+
+ case ARG_DESERIALIZE: {
+ int fd;
+ FILE *f;
+
+ r = safe_atoi(optarg, &fd);
+ if (r < 0)
+ log_error_errno(r, "Failed to parse deserialize option \"%s\": %m", optarg);
+ if (fd < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid deserialize fd: %d",
+ fd);
+
+ (void) fd_cloexec(fd, true);
+
+ f = fdopen(fd, "r");
+ if (!f)
+ return log_error_errno(errno, "Failed to open serialization fd %d: %m", fd);
+
+ safe_fclose(arg_serialization);
+ arg_serialization = f;
+
+ break;
+ }
+
+ case ARG_SWITCHED_ROOT:
+ arg_switched_root = true;
+ break;
+
+ case ARG_MACHINE_ID:
+ r = set_machine_id(optarg);
+ if (r < 0)
+ return log_error_errno(r, "MachineID '%s' is not valid: %m", optarg);
+ break;
+
+ case 'h':
+ arg_action = ACTION_HELP;
+ break;
+
+ case 'D':
+ log_set_max_level(LOG_DEBUG);
+ break;
+
+ case 'b':
+ case 's':
+ case 'z':
+ /* Just to eat away the sysvinit kernel cmdline args that we'll parse in
+ * parse_proc_cmdline_item() or ignore, without any getopt() error messages.
+ */
+ case '?':
+ if (getpid_cached() != 1)
+ return -EINVAL;
+ else
+ return 0;
+
+ default:
+ assert_not_reached("Unhandled option code.");
+ }
+
+ if (optind < argc && getpid_cached() != 1)
+ /* Hmm, when we aren't run as init system
+ * let's complain about excess arguments */
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Excess arguments.");
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "%sStarts and monitors system and user services.%s\n\n"
+ "This program takes no positional arguments.\n\n"
+ "%sOptions%s:\n"
+ " -h --help Show this help\n"
+ " --version Show version\n"
+ " --test Determine initial transaction, dump it and exit\n"
+ " --system In combination with --test: operate as system service manager\n"
+ " --user In combination with --test: operate as per-user service manager\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --dump-configuration-items Dump understood unit configuration items\n"
+ " --dump-bus-properties Dump exposed bus properties\n"
+ " --bus-introspect=PATH Write XML introspection data\n"
+ " --unit=UNIT Set default unit\n"
+ " --dump-core[=BOOL] Dump core on crash\n"
+ " --crash-vt=NR Change to specified VT on crash\n"
+ " --crash-reboot[=BOOL] Reboot on crash\n"
+ " --crash-shell[=BOOL] Run shell on crash\n"
+ " --confirm-spawn[=BOOL] Ask for confirmation when spawning processes\n"
+ " --show-status[=BOOL] Show status updates on the console during bootup\n"
+ " --log-target=TARGET Set log target (console, journal, kmsg, journal-or-kmsg, null)\n"
+ " --log-level=LEVEL Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
+ " --log-color[=BOOL] Highlight important log messages\n"
+ " --log-location[=BOOL] Include code location in log messages\n"
+ " --log-time[=BOOL] Prefix log messages with current time\n"
+ " --default-standard-output= Set default standard output for services\n"
+ " --default-standard-error= Set default standard error output for services\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight(), ansi_normal()
+ , ansi_underline(), ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int prepare_reexecute(
+ Manager *m,
+ FILE **ret_f,
+ FDSet **ret_fds,
+ bool switching_root) {
+
+ _cleanup_fdset_free_ FDSet *fds = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(m);
+ assert(ret_f);
+ assert(ret_fds);
+
+ r = manager_open_serialization(m, &f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create serialization file: %m");
+
+ /* Make sure nothing is really destructed when we shut down */
+ m->n_reloading++;
+ bus_manager_send_reloading(m, true);
+
+ fds = fdset_new();
+ if (!fds)
+ return log_oom();
+
+ r = manager_serialize(m, f, fds, switching_root);
+ if (r < 0)
+ return r;
+
+ if (fseeko(f, 0, SEEK_SET) == (off_t) -1)
+ return log_error_errno(errno, "Failed to rewind serialization fd: %m");
+
+ r = fd_cloexec(fileno(f), false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
+
+ r = fdset_cloexec(fds, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
+
+ *ret_f = TAKE_PTR(f);
+ *ret_fds = TAKE_PTR(fds);
+
+ return 0;
+}
+
+static void bump_file_max_and_nr_open(void) {
+
+ /* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large numbers of file
+ * descriptors are no longer a performance problem and their memory is properly tracked by memcg, thus counting
+ * them and limiting them in another two layers of limits is unnecessary and just complicates things. This
+ * function hence turns off 2 of the 4 levels of limits on file descriptors, and makes RLIMIT_NOLIMIT (soft +
+ * hard) the only ones that really matter. */
+
+#if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN
+ int r;
+#endif
+
+#if BUMP_PROC_SYS_FS_FILE_MAX
+ /* The maximum the kernel allows for this since 5.2 is LONG_MAX, use that. (Previously thing where
+ * different but the operation would fail silently.) */
+ r = sysctl_writef("fs/file-max", "%li\n", LONG_MAX);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.file-max, ignoring: %m");
+#endif
+
+#if BUMP_PROC_SYS_FS_NR_OPEN
+ int v = INT_MAX;
+
+ /* Arg! The kernel enforces maximum and minimum values on the fs.nr_open, but we don't really know what they
+ * are. The expression by which the maximum is determined is dependent on the architecture, and is something we
+ * don't really want to copy to userspace, as it is dependent on implementation details of the kernel. Since
+ * the kernel doesn't expose the maximum value to us, we can only try and hope. Hence, let's start with
+ * INT_MAX, and then keep halving the value until we find one that works. Ugly? Yes, absolutely, but kernel
+ * APIs are kernel APIs, so what do can we do... 🤯 */
+
+ for (;;) {
+ int k;
+
+ v &= ~(__SIZEOF_POINTER__ - 1); /* Round down to next multiple of the pointer size */
+ if (v < 1024) {
+ log_warning("Can't bump fs.nr_open, value too small.");
+ break;
+ }
+
+ k = read_nr_open();
+ if (k < 0) {
+ log_error_errno(k, "Failed to read fs.nr_open: %m");
+ break;
+ }
+ if (k >= v) { /* Already larger */
+ log_debug("Skipping bump, value is already larger.");
+ break;
+ }
+
+ r = sysctl_writef("fs/nr_open", "%i\n", v);
+ if (r == -EINVAL) {
+ log_debug("Couldn't write fs.nr_open as %i, halving it.", v);
+ v /= 2;
+ continue;
+ }
+ if (r < 0) {
+ log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.nr_open, ignoring: %m");
+ break;
+ }
+
+ log_debug("Successfully bumped fs.nr_open to %i", v);
+ break;
+ }
+#endif
+}
+
+static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
+ struct rlimit new_rlimit;
+ int r, nr;
+
+ /* Get the underlying absolute limit the kernel enforces */
+ nr = read_nr_open();
+
+ /* Calculate the new limits to use for us. Never lower from what we inherited. */
+ new_rlimit = (struct rlimit) {
+ .rlim_cur = MAX((rlim_t) nr, saved_rlimit->rlim_cur),
+ .rlim_max = MAX((rlim_t) nr, saved_rlimit->rlim_max),
+ };
+
+ /* Shortcut if nothing changes. */
+ if (saved_rlimit->rlim_max >= new_rlimit.rlim_max &&
+ saved_rlimit->rlim_cur >= new_rlimit.rlim_cur) {
+ log_debug("RLIMIT_NOFILE is already as high or higher than we need it, not bumping.");
+ return 0;
+ }
+
+ /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows, for
+ * both hard and soft. */
+ r = setrlimit_closest(RLIMIT_NOFILE, &new_rlimit);
+ if (r < 0)
+ return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
+
+ return 0;
+}
+
+static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
+ struct rlimit new_rlimit;
+ uint64_t mm;
+ int r;
+
+ /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even if we have CAP_IPC_LOCK which should
+ * normally disable such checks. We need them to implement IPAddressAllow= and IPAddressDeny=, hence let's bump
+ * the value high enough for our user. */
+
+ /* Using MAX() on resource limits only is safe if RLIM_INFINITY is > 0. POSIX declares that rlim_t
+ * must be unsigned, hence this is a given, but let's make this clear here. */
+ assert_cc(RLIM_INFINITY > 0);
+
+ mm = physical_memory() / 8; /* Let's scale how much we allow to be locked by the amount of physical
+ * RAM. We allow an eighth to be locked by us, just to pick a value. */
+
+ new_rlimit = (struct rlimit) {
+ .rlim_cur = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_cur, mm),
+ .rlim_max = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_max, mm),
+ };
+
+ if (saved_rlimit->rlim_max >= new_rlimit.rlim_cur &&
+ saved_rlimit->rlim_cur >= new_rlimit.rlim_max) {
+ log_debug("RLIMIT_MEMLOCK is already as high or higher than we need it, not bumping.");
+ return 0;
+ }
+
+ r = setrlimit_closest(RLIMIT_MEMLOCK, &new_rlimit);
+ if (r < 0)
+ return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
+
+ return 0;
+}
+
+static void test_usr(void) {
+
+ /* Check that /usr is either on the same file system as / or mounted already. */
+
+ if (dir_is_empty("/usr") <= 0)
+ return;
+
+ log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
+ "Some things will probably break (sometimes even silently) in mysterious ways. "
+ "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
+}
+
+static int enforce_syscall_archs(Set *archs) {
+#if HAVE_SECCOMP
+ int r;
+
+ if (!is_seccomp_available())
+ return 0;
+
+ r = seccomp_restrict_archs(arg_syscall_archs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enforce system call architecture restrication: %m");
+#endif
+ return 0;
+}
+
+static int status_welcome(void) {
+ _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
+ int r;
+
+ if (!show_status_on(arg_show_status))
+ return 0;
+
+ r = parse_os_release(NULL,
+ "PRETTY_NAME", &pretty_name,
+ "ANSI_COLOR", &ansi_color,
+ NULL);
+ if (r < 0)
+ log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to read os-release file, ignoring: %m");
+
+ if (log_get_show_color())
+ return status_printf(NULL, 0,
+ "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
+ isempty(ansi_color) ? "1" : ansi_color,
+ isempty(pretty_name) ? "Linux" : pretty_name);
+ else
+ return status_printf(NULL, 0,
+ "\nWelcome to %s!\n",
+ isempty(pretty_name) ? "Linux" : pretty_name);
+}
+
+static int write_container_id(void) {
+ const char *c;
+ int r;
+
+ c = getenv("container");
+ if (isempty(c))
+ return 0;
+
+ RUN_WITH_UMASK(0022)
+ r = write_string_file("/run/systemd/container", c, WRITE_STRING_FILE_CREATE);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to write /run/systemd/container, ignoring: %m");
+
+ return 1;
+}
+
+static int bump_unix_max_dgram_qlen(void) {
+ _cleanup_free_ char *qlen = NULL;
+ unsigned long v;
+ int r;
+
+ /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel default of 16 is simply too low. We set the value
+ * really really early during boot, so that it is actually applied to all our sockets, including the
+ * $NOTIFY_SOCKET one. */
+
+ r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen);
+ if (r < 0)
+ return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
+
+ r = safe_atolu(qlen, &v);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse AF_UNIX datagram queue length '%s', ignoring: %m", qlen);
+
+ if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN)
+ return 0;
+
+ r = write_string_filef("/proc/sys/net/unix/max_dgram_qlen", WRITE_STRING_FILE_DISABLE_BUFFER, "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN);
+ if (r < 0)
+ return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
+
+ return 1;
+}
+
+static int fixup_environment(void) {
+ _cleanup_free_ char *term = NULL;
+ const char *t;
+ int r;
+
+ /* Only fix up the environment when we are started as PID 1 */
+ if (getpid_cached() != 1)
+ return 0;
+
+ /* We expect the environment to be set correctly if run inside a container. */
+ if (detect_container() > 0)
+ return 0;
+
+ /* When started as PID1, the kernel uses /dev/console for our stdios and uses TERM=linux whatever the backend
+ * device used by the console. We try to make a better guess here since some consoles might not have support
+ * for color mode for example.
+ *
+ * However if TERM was configured through the kernel command line then leave it alone. */
+ r = proc_cmdline_get_key("TERM", 0, &term);
+ if (r < 0)
+ return r;
+
+ t = term ?: default_term_for_tty("/dev/console");
+
+ if (setenv("TERM", t, 1) < 0)
+ return -errno;
+
+ /* The kernels sets HOME=/ for init. Let's undo this. */
+ if (path_equal_ptr(getenv("HOME"), "/"))
+ assert_se(unsetenv("HOME") == 0);
+
+ return 0;
+}
+
+static void redirect_telinit(int argc, char *argv[]) {
+
+ /* This is compatibility support for SysV, where calling init as a user is identical to telinit. */
+
+#if HAVE_SYSV_COMPAT
+ if (getpid_cached() == 1)
+ return;
+
+ if (!strstr(program_invocation_short_name, "init"))
+ return;
+
+ execv(SYSTEMCTL_BINARY_PATH, argv);
+ log_error_errno(errno, "Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
+ exit(EXIT_FAILURE);
+#endif
+}
+
+static int become_shutdown(
+ const char *shutdown_verb,
+ int retval) {
+
+ char log_level[DECIMAL_STR_MAX(int) + 1],
+ exit_code[DECIMAL_STR_MAX(uint8_t) + 1],
+ timeout[DECIMAL_STR_MAX(usec_t) + 1];
+
+ const char* command_line[13] = {
+ SYSTEMD_SHUTDOWN_BINARY_PATH,
+ shutdown_verb,
+ "--timeout", timeout,
+ "--log-level", log_level,
+ "--log-target",
+ };
+
+ _cleanup_strv_free_ char **env_block = NULL;
+ size_t pos = 7;
+ int r;
+ usec_t watchdog_timer = 0;
+
+ assert(shutdown_verb);
+ assert(!command_line[pos]);
+ env_block = strv_copy(environ);
+
+ xsprintf(log_level, "%d", log_get_max_level());
+ xsprintf(timeout, "%" PRI_USEC "us", arg_default_timeout_stop_usec);
+
+ switch (log_get_target()) {
+
+ case LOG_TARGET_KMSG:
+ case LOG_TARGET_JOURNAL_OR_KMSG:
+ case LOG_TARGET_SYSLOG_OR_KMSG:
+ command_line[pos++] = "kmsg";
+ break;
+
+ case LOG_TARGET_NULL:
+ command_line[pos++] = "null";
+ break;
+
+ case LOG_TARGET_CONSOLE:
+ default:
+ command_line[pos++] = "console";
+ break;
+ };
+
+ if (log_get_show_color())
+ command_line[pos++] = "--log-color";
+
+ if (log_get_show_location())
+ command_line[pos++] = "--log-location";
+
+ if (log_get_show_time())
+ command_line[pos++] = "--log-time";
+
+ if (streq(shutdown_verb, "exit")) {
+ command_line[pos++] = "--exit-code";
+ command_line[pos++] = exit_code;
+ xsprintf(exit_code, "%d", retval);
+ }
+
+ assert(pos < ELEMENTSOF(command_line));
+
+ if (streq(shutdown_verb, "reboot"))
+ watchdog_timer = arg_reboot_watchdog;
+ else if (streq(shutdown_verb, "kexec"))
+ watchdog_timer = arg_kexec_watchdog;
+
+ if (watchdog_timer > 0 && watchdog_timer != USEC_INFINITY) {
+
+ char *e;
+
+ /* If we reboot or kexec let's set the shutdown
+ * watchdog and tell the shutdown binary to
+ * repeatedly ping it */
+ r = watchdog_set_timeout(&watchdog_timer);
+ watchdog_close(r < 0);
+
+ /* Tell the binary how often to ping, ignore failure */
+ if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, watchdog_timer) > 0)
+ (void) strv_consume(&env_block, e);
+
+ if (arg_watchdog_device &&
+ asprintf(&e, "WATCHDOG_DEVICE=%s", arg_watchdog_device) > 0)
+ (void) strv_consume(&env_block, e);
+ } else
+ watchdog_close(true);
+
+ /* Avoid the creation of new processes forked by the
+ * kernel; at this point, we will not listen to the
+ * signals anyway */
+ if (detect_container() <= 0)
+ (void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
+
+ execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
+ return -errno;
+}
+
+static void initialize_clock(void) {
+ int r;
+
+ /* This is called very early on, before we parse the kernel command line or otherwise figure out why
+ * we are running, but only once. */
+
+ if (clock_is_localtime(NULL) > 0) {
+ int min;
+
+ /*
+ * The very first call of settimeofday() also does a time warp in the kernel.
+ *
+ * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to take care
+ * of maintaining the RTC and do all adjustments. This matches the behavior of Windows, which leaves
+ * the RTC alone if the registry tells that the RTC runs in UTC.
+ */
+ r = clock_set_timezone(&min);
+ if (r < 0)
+ log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
+ else
+ log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
+
+ } else if (!in_initrd())
+ /*
+ * Do a dummy very first call to seal the kernel's time warp magic.
+ *
+ * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with LOCAL, but the
+ * real system could be set up that way. In such case, we need to delay the time-warp or the sealing
+ * until we reach the real system.
+ *
+ * Do no set the kernel's timezone. The concept of local time cannot be supported reliably, the time
+ * will jump or be incorrect at every daylight saving time change. All kernel local time concepts will
+ * be treated as UTC that way.
+ */
+ (void) clock_reset_timewarp();
+
+ r = clock_apply_epoch();
+ if (r < 0)
+ log_error_errno(r, "Current system time is before build time, but cannot correct: %m");
+ else if (r > 0)
+ log_info("System time before build time, advancing clock.");
+}
+
+static void apply_clock_update(void) {
+ struct timespec ts;
+
+ /* This is called later than initialize_clock(), i.e. after we parsed configuration files/kernel
+ * command line and such. */
+
+ if (arg_clock_usec == 0)
+ return;
+
+ if (getpid_cached() != 1)
+ return;
+
+ if (clock_settime(CLOCK_REALTIME, timespec_store(&ts, arg_clock_usec)) < 0)
+ log_error_errno(errno, "Failed to set system clock to time specified on kernel command line: %m");
+ else {
+ char buf[FORMAT_TIMESTAMP_MAX];
+
+ log_info("Set system clock to %s, as specified on the kernel command line.",
+ format_timestamp(buf, sizeof(buf), arg_clock_usec));
+ }
+}
+
+static void cmdline_take_random_seed(void) {
+ _cleanup_close_ int random_fd = -1;
+ size_t suggested;
+ int r;
+
+ if (arg_random_seed_size == 0)
+ return;
+
+ if (getpid_cached() != 1)
+ return;
+
+ assert(arg_random_seed);
+ suggested = random_pool_size();
+
+ if (arg_random_seed_size < suggested)
+ log_warning("Random seed specified on kernel command line has size %zu, but %zu bytes required to fill entropy pool.",
+ arg_random_seed_size, suggested);
+
+ random_fd = open("/dev/urandom", O_WRONLY|O_CLOEXEC|O_NOCTTY);
+ if (random_fd < 0) {
+ log_warning_errno(errno, "Failed to open /dev/urandom for writing, ignoring: %m");
+ return;
+ }
+
+ r = random_write_entropy(random_fd, arg_random_seed, arg_random_seed_size, true);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to credit entropy specified on kernel command line, ignoring: %m");
+ return;
+ }
+
+ log_notice("Successfully credited entropy passed on kernel command line.\n"
+ "Note that the seed provided this way is accessible to unprivileged programs. This functionality should not be used outside of testing environments.");
+}
+
+static void initialize_coredump(bool skip_setup) {
+#if ENABLE_COREDUMP
+ if (getpid_cached() != 1)
+ return;
+
+ /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
+ * will process core dumps for system services by default. */
+ if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
+ log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
+
+ /* But at the same time, turn off the core_pattern logic by default, so that no
+ * coredumps are stored until the systemd-coredump tool is enabled via
+ * sysctl. However it can be changed via the kernel command line later so core
+ * dumps can still be generated during early startup and in initramfs. */
+ if (!skip_setup)
+ disable_coredumps();
+#endif
+}
+
+static void initialize_core_pattern(bool skip_setup) {
+ int r;
+
+ if (skip_setup || !arg_early_core_pattern)
+ return;
+
+ if (getpid_cached() != 1)
+ return;
+
+ r = write_string_file("/proc/sys/kernel/core_pattern", arg_early_core_pattern, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_warning_errno(r, "Failed to write '%s' to /proc/sys/kernel/core_pattern, ignoring: %m", arg_early_core_pattern);
+}
+
+static void update_cpu_affinity(bool skip_setup) {
+ _cleanup_free_ char *mask = NULL;
+
+ if (skip_setup || !arg_cpu_affinity.set)
+ return;
+
+ assert(arg_cpu_affinity.allocated > 0);
+
+ mask = cpu_set_to_string(&arg_cpu_affinity);
+ log_debug("Setting CPU affinity to %s.", strnull(mask));
+
+ if (sched_setaffinity(0, arg_cpu_affinity.allocated, arg_cpu_affinity.set) < 0)
+ log_warning_errno(errno, "Failed to set CPU affinity: %m");
+}
+
+static void update_numa_policy(bool skip_setup) {
+ int r;
+ _cleanup_free_ char *nodes = NULL;
+ const char * policy = NULL;
+
+ if (skip_setup || !mpol_is_valid(numa_policy_get_type(&arg_numa_policy)))
+ return;
+
+ if (DEBUG_LOGGING) {
+ policy = mpol_to_string(numa_policy_get_type(&arg_numa_policy));
+ nodes = cpu_set_to_range_string(&arg_numa_policy.nodes);
+ log_debug("Setting NUMA policy to %s, with nodes %s.", strnull(policy), strnull(nodes));
+ }
+
+ r = apply_numa_policy(&arg_numa_policy);
+ if (r == -EOPNOTSUPP)
+ log_debug_errno(r, "NUMA support not available, ignoring.");
+ else if (r < 0)
+ log_warning_errno(r, "Failed to set NUMA memory policy: %m");
+}
+
+static void do_reexecute(
+ int argc,
+ char *argv[],
+ const struct rlimit *saved_rlimit_nofile,
+ const struct rlimit *saved_rlimit_memlock,
+ FDSet *fds,
+ const char *switch_root_dir,
+ const char *switch_root_init,
+ const char **ret_error_message) {
+
+ unsigned i, j, args_size;
+ const char **args;
+ int r;
+
+ assert(saved_rlimit_nofile);
+ assert(saved_rlimit_memlock);
+ assert(ret_error_message);
+
+ /* Close and disarm the watchdog, so that the new instance can reinitialize it, but doesn't get rebooted while
+ * we do that */
+ watchdog_close(true);
+
+ /* Reset RLIMIT_NOFILE + RLIMIT_MEMLOCK back to the kernel defaults, so that the new systemd can pass
+ * the kernel default to its child processes */
+ if (saved_rlimit_nofile->rlim_cur != 0)
+ (void) setrlimit(RLIMIT_NOFILE, saved_rlimit_nofile);
+ if (saved_rlimit_memlock->rlim_cur != RLIM_INFINITY)
+ (void) setrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock);
+
+ if (switch_root_dir) {
+ /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
+ * SIGCHLD for them after deserializing. */
+ broadcast_signal(SIGTERM, false, true, arg_default_timeout_stop_usec);
+
+ /* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
+ r = switch_root(switch_root_dir, "/mnt", true, MS_MOVE);
+ if (r < 0)
+ log_error_errno(r, "Failed to switch root, trying to continue: %m");
+ }
+
+ args_size = MAX(6, argc+1);
+ args = newa(const char*, args_size);
+
+ if (!switch_root_init) {
+ char sfd[DECIMAL_STR_MAX(int) + 1];
+
+ /* First try to spawn ourselves with the right path, and with full serialization. We do this only if
+ * the user didn't specify an explicit init to spawn. */
+
+ assert(arg_serialization);
+ assert(fds);
+
+ xsprintf(sfd, "%i", fileno(arg_serialization));
+
+ i = 0;
+ args[i++] = SYSTEMD_BINARY_PATH;
+ if (switch_root_dir)
+ args[i++] = "--switched-root";
+ args[i++] = arg_system ? "--system" : "--user";
+ args[i++] = "--deserialize";
+ args[i++] = sfd;
+ args[i++] = NULL;
+
+ assert(i <= args_size);
+
+ /*
+ * We want valgrind to print its memory usage summary before reexecution. Valgrind won't do this is on
+ * its own on exec(), but it will do it on exit(). Hence, to ensure we get a summary here, fork() off
+ * a child, let it exit() cleanly, so that it prints the summary, and wait() for it in the parent,
+ * before proceeding into the exec().
+ */
+ valgrind_summary_hack();
+
+ (void) execv(args[0], (char* const*) args);
+ log_debug_errno(errno, "Failed to execute our own binary, trying fallback: %m");
+ }
+
+ /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and envp[]. (Well,
+ * modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[], but let's hope that
+ * doesn't matter.) */
+
+ arg_serialization = safe_fclose(arg_serialization);
+ fds = fdset_free(fds);
+
+ /* Reopen the console */
+ (void) make_console_stdio();
+
+ for (j = 1, i = 1; j < (unsigned) argc; j++)
+ args[i++] = argv[j];
+ args[i++] = NULL;
+ assert(i <= args_size);
+
+ /* Re-enable any blocked signals, especially important if we switch from initial ramdisk to init=... */
+ (void) reset_all_signal_handlers();
+ (void) reset_signal_mask();
+ (void) rlimit_nofile_safe();
+
+ if (switch_root_init) {
+ args[0] = switch_root_init;
+ (void) execve(args[0], (char* const*) args, saved_env);
+ log_warning_errno(errno, "Failed to execute configured init, trying fallback: %m");
+ }
+
+ args[0] = "/sbin/init";
+ (void) execv(args[0], (char* const*) args);
+ r = -errno;
+
+ manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
+ ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
+ "Failed to execute /sbin/init");
+
+ if (r == -ENOENT) {
+ log_warning("No /sbin/init, trying fallback");
+
+ args[0] = "/bin/sh";
+ args[1] = NULL;
+ (void) execve(args[0], (char* const*) args, saved_env);
+ log_error_errno(errno, "Failed to execute /bin/sh, giving up: %m");
+ } else
+ log_warning_errno(r, "Failed to execute /sbin/init, giving up: %m");
+
+ *ret_error_message = "Failed to execute fallback shell";
+}
+
+static int invoke_main_loop(
+ Manager *m,
+ const struct rlimit *saved_rlimit_nofile,
+ const struct rlimit *saved_rlimit_memlock,
+ bool *ret_reexecute,
+ int *ret_retval, /* Return parameters relevant for shutting down */
+ const char **ret_shutdown_verb, /* … */
+ FDSet **ret_fds, /* Return parameters for reexecuting */
+ char **ret_switch_root_dir, /* … */
+ char **ret_switch_root_init, /* … */
+ const char **ret_error_message) {
+
+ int r;
+
+ assert(m);
+ assert(saved_rlimit_nofile);
+ assert(saved_rlimit_memlock);
+ assert(ret_reexecute);
+ assert(ret_retval);
+ assert(ret_shutdown_verb);
+ assert(ret_fds);
+ assert(ret_switch_root_dir);
+ assert(ret_switch_root_init);
+ assert(ret_error_message);
+
+ for (;;) {
+ r = manager_loop(m);
+ if (r < 0) {
+ *ret_error_message = "Failed to run main loop";
+ return log_emergency_errno(r, "Failed to run main loop: %m");
+ }
+
+ switch ((ManagerObjective) r) {
+
+ case MANAGER_RELOAD: {
+ LogTarget saved_log_target;
+ int saved_log_level;
+
+ log_info("Reloading.");
+
+ /* First, save any overridden log level/target, then parse the configuration file, which might
+ * change the log level to new settings. */
+
+ saved_log_level = m->log_level_overridden ? log_get_max_level() : -1;
+ saved_log_target = m->log_target_overridden ? log_get_target() : _LOG_TARGET_INVALID;
+
+ (void) parse_configuration(saved_rlimit_nofile, saved_rlimit_memlock);
+
+ set_manager_defaults(m);
+ set_manager_settings(m);
+
+ update_cpu_affinity(false);
+ update_numa_policy(false);
+
+ if (saved_log_level >= 0)
+ manager_override_log_level(m, saved_log_level);
+ if (saved_log_target >= 0)
+ manager_override_log_target(m, saved_log_target);
+
+ r = manager_reload(m);
+ if (r < 0)
+ /* Reloading failed before the point of no return. Let's continue running as if nothing happened. */
+ m->objective = MANAGER_OK;
+
+ break;
+ }
+
+ case MANAGER_REEXECUTE:
+
+ r = prepare_reexecute(m, &arg_serialization, ret_fds, false);
+ if (r < 0) {
+ *ret_error_message = "Failed to prepare for reexecution";
+ return r;
+ }
+
+ log_notice("Reexecuting.");
+
+ *ret_reexecute = true;
+ *ret_retval = EXIT_SUCCESS;
+ *ret_shutdown_verb = NULL;
+ *ret_switch_root_dir = *ret_switch_root_init = NULL;
+
+ return 0;
+
+ case MANAGER_SWITCH_ROOT:
+ if (!m->switch_root_init) {
+ r = prepare_reexecute(m, &arg_serialization, ret_fds, true);
+ if (r < 0) {
+ *ret_error_message = "Failed to prepare for reexecution";
+ return r;
+ }
+ } else
+ *ret_fds = NULL;
+
+ log_notice("Switching root.");
+
+ *ret_reexecute = true;
+ *ret_retval = EXIT_SUCCESS;
+ *ret_shutdown_verb = NULL;
+
+ /* Steal the switch root parameters */
+ *ret_switch_root_dir = TAKE_PTR(m->switch_root);
+ *ret_switch_root_init = TAKE_PTR(m->switch_root_init);
+
+ return 0;
+
+ case MANAGER_EXIT:
+
+ if (MANAGER_IS_USER(m)) {
+ log_debug("Exit.");
+
+ *ret_reexecute = false;
+ *ret_retval = m->return_value;
+ *ret_shutdown_verb = NULL;
+ *ret_fds = NULL;
+ *ret_switch_root_dir = *ret_switch_root_init = NULL;
+
+ return 0;
+ }
+
+ _fallthrough_;
+ case MANAGER_REBOOT:
+ case MANAGER_POWEROFF:
+ case MANAGER_HALT:
+ case MANAGER_KEXEC: {
+ static const char * const table[_MANAGER_OBJECTIVE_MAX] = {
+ [MANAGER_EXIT] = "exit",
+ [MANAGER_REBOOT] = "reboot",
+ [MANAGER_POWEROFF] = "poweroff",
+ [MANAGER_HALT] = "halt",
+ [MANAGER_KEXEC] = "kexec",
+ };
+
+ log_notice("Shutting down.");
+
+ *ret_reexecute = false;
+ *ret_retval = m->return_value;
+ assert_se(*ret_shutdown_verb = table[m->objective]);
+ *ret_fds = NULL;
+ *ret_switch_root_dir = *ret_switch_root_init = NULL;
+
+ return 0;
+ }
+
+ default:
+ assert_not_reached("Unknown or unexpected manager objective.");
+ }
+ }
+}
+
+static void log_execution_mode(bool *ret_first_boot) {
+ assert(ret_first_boot);
+
+ if (arg_system) {
+ int v;
+
+ log_info("systemd " GIT_VERSION " running in %ssystem mode. (" SYSTEMD_FEATURES ")",
+ arg_action == ACTION_TEST ? "test " : "" );
+
+ v = detect_virtualization();
+ if (v > 0)
+ log_info("Detected virtualization %s.", virtualization_to_string(v));
+
+ log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
+
+ if (in_initrd()) {
+ *ret_first_boot = false;
+ log_info("Running in initial RAM disk.");
+ } else {
+ int r;
+ _cleanup_free_ char *id_text = NULL;
+
+ /* Let's check whether we are in first boot. We use /etc/machine-id as flag file
+ * for this: If it is missing or contains the value "uninitialized", this is the
+ * first boot. In any other case, it is not. This allows container managers and
+ * installers to provision a couple of files already. If the container manager
+ * wants to provision the machine ID itself it should pass $container_uuid to PID 1. */
+
+ r = read_one_line_file("/etc/machine-id", &id_text);
+ if (r < 0 || streq(id_text, "uninitialized")) {
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Unexpected error while reading /etc/machine-id, ignoring: %m");
+
+ *ret_first_boot = true;
+ log_info("Detected first boot.");
+ } else {
+ *ret_first_boot = false;
+ log_debug("Detected initialized system, this is not the first boot.");
+ }
+ }
+ } else {
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *t;
+
+ t = uid_to_name(getuid());
+ log_debug("systemd " GIT_VERSION " running in %suser mode for user " UID_FMT "/%s. (" SYSTEMD_FEATURES ")",
+ arg_action == ACTION_TEST ? " test" : "", getuid(), strna(t));
+ }
+
+ *ret_first_boot = false;
+ }
+}
+
+static int initialize_runtime(
+ bool skip_setup,
+ bool first_boot,
+ struct rlimit *saved_rlimit_nofile,
+ struct rlimit *saved_rlimit_memlock,
+ const char **ret_error_message) {
+ int r;
+
+ assert(ret_error_message);
+
+ /* Sets up various runtime parameters. Many of these initializations are conditionalized:
+ *
+ * - Some only apply to --system instances
+ * - Some only apply to --user instances
+ * - Some only apply when we first start up, but not when we reexecute
+ */
+
+ if (arg_action != ACTION_RUN)
+ return 0;
+
+ update_cpu_affinity(skip_setup);
+ update_numa_policy(skip_setup);
+
+ if (arg_system) {
+ /* Make sure we leave a core dump without panicking the kernel. */
+ install_crash_handler();
+
+ if (!skip_setup) {
+ r = mount_cgroup_controllers();
+ if (r < 0) {
+ *ret_error_message = "Failed to mount cgroup hierarchies";
+ return r;
+ }
+
+ status_welcome();
+ hostname_setup();
+ /* Force transient machine-id on first boot. */
+ machine_id_setup(NULL, first_boot, arg_machine_id, NULL);
+ (void) loopback_setup();
+ bump_unix_max_dgram_qlen();
+ bump_file_max_and_nr_open();
+ test_usr();
+ write_container_id();
+ }
+
+ if (arg_watchdog_device) {
+ r = watchdog_set_device(arg_watchdog_device);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m", arg_watchdog_device);
+ }
+ } else {
+ _cleanup_free_ char *p = NULL;
+
+ /* Create the runtime directory and place the inaccessible device nodes there, if we run in
+ * user mode. In system mode mount_setup() already did that. */
+
+ r = xdg_user_runtime_dir(&p, "/systemd");
+ if (r < 0) {
+ *ret_error_message = "$XDG_RUNTIME_DIR is not set";
+ return log_emergency_errno(r, "Failed to determine $XDG_RUNTIME_DIR path: %m");
+ }
+
+ (void) mkdir_p_label(p, 0755);
+ (void) make_inaccessible_nodes(p, UID_INVALID, GID_INVALID);
+ }
+
+ if (arg_timer_slack_nsec != NSEC_INFINITY)
+ if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
+ log_warning_errno(errno, "Failed to adjust timer slack, ignoring: %m");
+
+ if (arg_system && !cap_test_all(arg_capability_bounding_set)) {
+ r = capability_bounding_set_drop_usermode(arg_capability_bounding_set);
+ if (r < 0) {
+ *ret_error_message = "Failed to drop capability bounding set of usermode helpers";
+ return log_emergency_errno(r, "Failed to drop capability bounding set of usermode helpers: %m");
+ }
+
+ r = capability_bounding_set_drop(arg_capability_bounding_set, true);
+ if (r < 0) {
+ *ret_error_message = "Failed to drop capability bounding set";
+ return log_emergency_errno(r, "Failed to drop capability bounding set: %m");
+ }
+ }
+
+ if (arg_system && arg_no_new_privs) {
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
+ *ret_error_message = "Failed to disable new privileges";
+ return log_emergency_errno(errno, "Failed to disable new privileges: %m");
+ }
+ }
+
+ if (arg_syscall_archs) {
+ r = enforce_syscall_archs(arg_syscall_archs);
+ if (r < 0) {
+ *ret_error_message = "Failed to set syscall architectures";
+ return r;
+ }
+ }
+
+ if (!arg_system)
+ /* Become reaper of our children */
+ if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
+ log_warning_errno(errno, "Failed to make us a subreaper: %m");
+
+ /* Bump up RLIMIT_NOFILE for systemd itself */
+ (void) bump_rlimit_nofile(saved_rlimit_nofile);
+ (void) bump_rlimit_memlock(saved_rlimit_memlock);
+
+ return 0;
+}
+
+static int do_queue_default_job(
+ Manager *m,
+ const char **ret_error_message) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *unit;
+ Job *job;
+ Unit *target;
+ int r;
+
+ if (arg_default_unit)
+ unit = arg_default_unit;
+ else if (in_initrd())
+ unit = SPECIAL_INITRD_TARGET;
+ else
+ unit = SPECIAL_DEFAULT_TARGET;
+
+ log_debug("Activating default unit: %s", unit);
+
+ r = manager_load_startable_unit_or_warn(m, unit, NULL, &target);
+ if (r < 0 && in_initrd() && !arg_default_unit) {
+ /* Fall back to default.target, which we used to always use by default. Only do this if no
+ * explicit configuration was given. */
+
+ log_info("Falling back to " SPECIAL_DEFAULT_TARGET ".");
+
+ r = manager_load_startable_unit_or_warn(m, SPECIAL_DEFAULT_TARGET, NULL, &target);
+ }
+ if (r < 0) {
+ log_info("Falling back to " SPECIAL_RESCUE_TARGET ".");
+
+ r = manager_load_startable_unit_or_warn(m, SPECIAL_RESCUE_TARGET, NULL, &target);
+ if (r < 0) {
+ *ret_error_message = r == -ERFKILL ? SPECIAL_RESCUE_TARGET " masked"
+ : "Failed to load " SPECIAL_RESCUE_TARGET;
+ return r;
+ }
+ }
+
+ assert(target->load_state == UNIT_LOADED);
+
+ r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, NULL, &error, &job);
+ if (r == -EPERM) {
+ log_debug_errno(r, "Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
+
+ sd_bus_error_free(&error);
+
+ r = manager_add_job(m, JOB_START, target, JOB_REPLACE, NULL, &error, &job);
+ if (r < 0) {
+ *ret_error_message = "Failed to start default target";
+ return log_emergency_errno(r, "Failed to start default target: %s", bus_error_message(&error, r));
+ }
+
+ } else if (r < 0) {
+ *ret_error_message = "Failed to isolate default target";
+ return log_emergency_errno(r, "Failed to isolate default target: %s", bus_error_message(&error, r));
+ } else
+ log_info("Queued %s job for default target %s.",
+ job_type_to_string(job->type),
+ unit_status_string(job->unit));
+
+ m->default_unit_job_id = job->id;
+
+ return 0;
+}
+
+static void save_rlimits(struct rlimit *saved_rlimit_nofile,
+ struct rlimit *saved_rlimit_memlock) {
+
+ assert(saved_rlimit_nofile);
+ assert(saved_rlimit_memlock);
+
+ if (getrlimit(RLIMIT_NOFILE, saved_rlimit_nofile) < 0)
+ log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
+
+ if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock) < 0)
+ log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
+}
+
+static void fallback_rlimit_nofile(const struct rlimit *saved_rlimit_nofile) {
+ struct rlimit *rl;
+
+ if (arg_default_rlimit[RLIMIT_NOFILE])
+ return;
+
+ /* Make sure forked processes get limits based on the original kernel setting */
+
+ rl = newdup(struct rlimit, saved_rlimit_nofile, 1);
+ if (!rl) {
+ log_oom();
+ return;
+ }
+
+ /* Bump the hard limit for system services to a substantially higher value. The default
+ * hard limit current kernels set is pretty low (4K), mostly for historical
+ * reasons. According to kernel developers, the fd handling in recent kernels has been
+ * optimized substantially enough, so that we can bump the limit now, without paying too
+ * high a price in memory or performance. Note however that we only bump the hard limit,
+ * not the soft limit. That's because select() works the way it works, and chokes on fds
+ * >= 1024. If we'd bump the soft limit globally, it might accidentally happen to
+ * unexpecting programs that they get fds higher than what they can process using
+ * select(). By only bumping the hard limit but leaving the low limit as it is we avoid
+ * this pitfall: programs that are written by folks aware of the select() problem in mind
+ * (and thus use poll()/epoll instead of select(), the way everybody should) can
+ * explicitly opt into high fds by bumping their soft limit beyond 1024, to the hard limit
+ * we pass. */
+ if (arg_system) {
+ int nr;
+
+ /* Get the underlying absolute limit the kernel enforces */
+ nr = read_nr_open();
+
+ rl->rlim_max = MIN((rlim_t) nr, MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE));
+ }
+
+ /* If for some reason we were invoked with a soft limit above 1024 (which should never
+ * happen!, but who knows what we get passed in from pam_limit when invoked as --user
+ * instance), then lower what we pass on to not confuse our children */
+ rl->rlim_cur = MIN(rl->rlim_cur, (rlim_t) FD_SETSIZE);
+
+ arg_default_rlimit[RLIMIT_NOFILE] = rl;
+}
+
+static void fallback_rlimit_memlock(const struct rlimit *saved_rlimit_memlock) {
+ struct rlimit *rl;
+
+ /* Pass the original value down to invoked processes */
+
+ if (arg_default_rlimit[RLIMIT_MEMLOCK])
+ return;
+
+ rl = newdup(struct rlimit, saved_rlimit_memlock, 1);
+ if (!rl) {
+ log_oom();
+ return;
+ }
+
+ arg_default_rlimit[RLIMIT_MEMLOCK] = rl;
+}
+
+static void reset_arguments(void) {
+ /* Frees/resets arg_* variables, with a few exceptions commented below. */
+
+ arg_default_unit = mfree(arg_default_unit);
+
+ /* arg_system — ignore */
+
+ arg_dump_core = true;
+ arg_crash_chvt = -1;
+ arg_crash_shell = false;
+ arg_crash_reboot = false;
+ arg_confirm_spawn = mfree(arg_confirm_spawn);
+ arg_show_status = _SHOW_STATUS_INVALID;
+ arg_status_unit_format = STATUS_UNIT_FORMAT_DEFAULT;
+ arg_switched_root = false;
+ arg_pager_flags = 0;
+ arg_service_watchdogs = true;
+ arg_default_std_output = EXEC_OUTPUT_JOURNAL;
+ arg_default_std_error = EXEC_OUTPUT_INHERIT;
+ arg_default_restart_usec = DEFAULT_RESTART_USEC;
+ arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
+ arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
+ arg_default_timeout_abort_usec = DEFAULT_TIMEOUT_USEC;
+ arg_default_timeout_abort_set = false;
+ arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
+ arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
+ arg_runtime_watchdog = 0;
+ arg_reboot_watchdog = 10 * USEC_PER_MINUTE;
+ arg_kexec_watchdog = 0;
+ arg_early_core_pattern = NULL;
+ arg_watchdog_device = NULL;
+
+ arg_default_environment = strv_free(arg_default_environment);
+ rlimit_free_all(arg_default_rlimit);
+
+ arg_capability_bounding_set = CAP_ALL;
+ arg_no_new_privs = false;
+ arg_timer_slack_nsec = NSEC_INFINITY;
+ arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
+
+ arg_syscall_archs = set_free(arg_syscall_archs);
+
+ /* arg_serialization — ignore */
+
+ arg_default_cpu_accounting = -1;
+ arg_default_io_accounting = false;
+ arg_default_ip_accounting = false;
+ arg_default_blockio_accounting = false;
+ arg_default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
+ arg_default_tasks_accounting = true;
+ arg_default_tasks_max = DEFAULT_TASKS_MAX;
+ arg_machine_id = (sd_id128_t) {};
+ arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
+ arg_default_oom_policy = OOM_STOP;
+
+ cpu_set_reset(&arg_cpu_affinity);
+ numa_policy_reset(&arg_numa_policy);
+
+ arg_random_seed = mfree(arg_random_seed);
+ arg_random_seed_size = 0;
+ arg_clock_usec = 0;
+}
+
+static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
+ const struct rlimit *saved_rlimit_memlock) {
+ int r;
+
+ assert(saved_rlimit_nofile);
+ assert(saved_rlimit_memlock);
+
+ /* Assign configuration defaults */
+ reset_arguments();
+
+ r = parse_config_file();
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse config file, ignoring: %m");
+
+ if (arg_system) {
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+ }
+
+ /* Initialize some default rlimits for services if they haven't been configured */
+ fallback_rlimit_nofile(saved_rlimit_nofile);
+ fallback_rlimit_memlock(saved_rlimit_memlock);
+
+ /* Note that this also parses bits from the kernel command line, including "debug". */
+ log_parse_environment();
+
+ /* Initialize the show status setting if it hasn't been set explicitly yet */
+ if (arg_show_status == _SHOW_STATUS_INVALID)
+ arg_show_status = SHOW_STATUS_YES;
+
+ return 0;
+}
+
+static int safety_checks(void) {
+
+ if (getpid_cached() == 1 &&
+ arg_action != ACTION_RUN)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Unsupported execution mode while PID 1.");
+
+ if (getpid_cached() == 1 &&
+ !arg_system)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Can't run --user mode as PID 1.");
+
+ if (arg_action == ACTION_RUN &&
+ arg_system &&
+ getpid_cached() != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Can't run system mode unless PID 1.");
+
+ if (arg_action == ACTION_TEST &&
+ geteuid() == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Don't run test mode as root.");
+
+ if (!arg_system &&
+ arg_action == ACTION_RUN &&
+ sd_booted() <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Trying to run as user instance, but the system has not been booted with systemd.");
+
+ if (!arg_system &&
+ arg_action == ACTION_RUN &&
+ !getenv("XDG_RUNTIME_DIR"))
+ return log_error_errno(SYNTHETIC_ERRNO(EUNATCH),
+ "Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
+
+ if (arg_system &&
+ arg_action == ACTION_RUN &&
+ running_in_chroot() > 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Cannot be run in a chroot() environment.");
+
+ return 0;
+}
+
+static int initialize_security(
+ bool *loaded_policy,
+ dual_timestamp *security_start_timestamp,
+ dual_timestamp *security_finish_timestamp,
+ const char **ret_error_message) {
+
+ int r;
+
+ assert(loaded_policy);
+ assert(security_start_timestamp);
+ assert(security_finish_timestamp);
+ assert(ret_error_message);
+
+ dual_timestamp_get(security_start_timestamp);
+
+ r = mac_selinux_setup(loaded_policy);
+ if (r < 0) {
+ *ret_error_message = "Failed to load SELinux policy";
+ return r;
+ }
+
+ r = mac_smack_setup(loaded_policy);
+ if (r < 0) {
+ *ret_error_message = "Failed to load SMACK policy";
+ return r;
+ }
+
+ r = mac_apparmor_setup();
+ if (r < 0) {
+ *ret_error_message = "Failed to load AppArmor policy";
+ return r;
+ }
+
+ r = ima_setup();
+ if (r < 0) {
+ *ret_error_message = "Failed to load IMA policy";
+ return r;
+ }
+
+ dual_timestamp_get(security_finish_timestamp);
+ return 0;
+}
+
+static void test_summary(Manager *m) {
+ assert(m);
+
+ printf("-> By units:\n");
+ manager_dump_units(m, stdout, "\t");
+
+ printf("-> By jobs:\n");
+ manager_dump_jobs(m, stdout, "\t");
+}
+
+static int collect_fds(FDSet **ret_fds, const char **ret_error_message) {
+ int r;
+
+ assert(ret_fds);
+ assert(ret_error_message);
+
+ r = fdset_new_fill(ret_fds);
+ if (r < 0) {
+ *ret_error_message = "Failed to allocate fd set";
+ return log_emergency_errno(r, "Failed to allocate fd set: %m");
+ }
+
+ fdset_cloexec(*ret_fds, true);
+
+ if (arg_serialization)
+ assert_se(fdset_remove(*ret_fds, fileno(arg_serialization)) >= 0);
+
+ return 0;
+}
+
+static void setup_console_terminal(bool skip_setup) {
+
+ if (!arg_system)
+ return;
+
+ /* Become a session leader if we aren't one yet. */
+ (void) setsid();
+
+ /* If we are init, we connect stdin/stdout/stderr to /dev/null and make sure we don't have a controlling
+ * tty. */
+ (void) release_terminal();
+
+ /* Reset the console, but only if this is really init and we are freshly booted */
+ if (getpid_cached() == 1 && !skip_setup)
+ (void) console_setup();
+}
+
+static bool early_skip_setup_check(int argc, char *argv[]) {
+ bool found_deserialize = false;
+ int i;
+
+ /* Determine if this is a reexecution or normal bootup. We do the full command line parsing much later, so
+ * let's just have a quick peek here. Note that if we have switched root, do all the special setup things
+ * anyway, even if in that case we also do deserialization. */
+
+ for (i = 1; i < argc; i++) {
+ if (streq(argv[i], "--switched-root"))
+ return false; /* If we switched root, don't skip the setup. */
+ else if (streq(argv[i], "--deserialize"))
+ found_deserialize = true;
+ }
+
+ return found_deserialize; /* When we are deserializing, then we are reexecuting, hence avoid the extensive setup */
+}
+
+static int save_env(void) {
+ char **l;
+
+ l = strv_copy(environ);
+ if (!l)
+ return -ENOMEM;
+
+ strv_free_and_replace(saved_env, l);
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+
+ dual_timestamp initrd_timestamp = DUAL_TIMESTAMP_NULL, userspace_timestamp = DUAL_TIMESTAMP_NULL, kernel_timestamp = DUAL_TIMESTAMP_NULL,
+ security_start_timestamp = DUAL_TIMESTAMP_NULL, security_finish_timestamp = DUAL_TIMESTAMP_NULL;
+ struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0),
+ saved_rlimit_memlock = RLIMIT_MAKE_CONST(RLIM_INFINITY); /* The original rlimits we passed
+ * in. Note we use different values
+ * for the two that indicate whether
+ * these fields are initialized! */
+ bool skip_setup, loaded_policy = false, queue_default_job = false, first_boot = false, reexecute = false;
+ char *switch_root_dir = NULL, *switch_root_init = NULL;
+ usec_t before_startup, after_startup;
+ static char systemd[] = "systemd";
+ char timespan[FORMAT_TIMESPAN_MAX];
+ const char *shutdown_verb = NULL, *error_message = NULL;
+ int r, retval = EXIT_FAILURE;
+ Manager *m = NULL;
+ FDSet *fds = NULL;
+
+ /* SysV compatibility: redirect init → telinit */
+ redirect_telinit(argc, argv);
+
+ /* Take timestamps early on */
+ dual_timestamp_from_monotonic(&kernel_timestamp, 0);
+ dual_timestamp_get(&userspace_timestamp);
+
+ /* Figure out whether we need to do initialize the system, or if we already did that because we are
+ * reexecuting */
+ skip_setup = early_skip_setup_check(argc, argv);
+
+ /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent reexecution we
+ * are then called 'systemd'. That is confusing, hence let's call us systemd right-away. */
+ program_invocation_short_name = systemd;
+ (void) prctl(PR_SET_NAME, systemd);
+
+ /* Save the original command line */
+ save_argc_argv(argc, argv);
+
+ /* Save the original environment as we might need to restore it if we're requested to execute another
+ * system manager later. */
+ r = save_env();
+ if (r < 0) {
+ error_message = "Failed to copy environment block";
+ goto finish;
+ }
+
+ /* Make sure that if the user says "syslog" we actually log to the journal. */
+ log_set_upgrade_syslog_to_journal(true);
+
+ if (getpid_cached() == 1) {
+ /* When we run as PID 1 force system mode */
+ arg_system = true;
+
+ /* Disable the umask logic */
+ umask(0);
+
+ /* Make sure that at least initially we do not ever log to journald/syslogd, because it might not be
+ * activated yet (even though the log socket for it exists). */
+ log_set_prohibit_ipc(true);
+
+ /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This is
+ * important so that we never end up logging to any foreign stderr, for example if we have to log in a
+ * child process right before execve()'ing the actual binary, at a point in time where socket
+ * activation stderr/stdout area already set up. */
+ log_set_always_reopen_console(true);
+
+ if (detect_container() <= 0) {
+
+ /* Running outside of a container as PID 1 */
+ log_set_target(LOG_TARGET_KMSG);
+ log_open();
+
+ if (in_initrd())
+ initrd_timestamp = userspace_timestamp;
+
+ if (!skip_setup) {
+ r = mount_setup_early();
+ if (r < 0) {
+ error_message = "Failed to mount early API filesystems";
+ goto finish;
+ }
+
+ /* Let's open the log backend a second time, in case the first time didn't
+ * work. Quite possibly we have mounted /dev just now, so /dev/kmsg became
+ * available, and it previously wasn't. */
+ log_open();
+
+ disable_printk_ratelimit();
+
+ r = initialize_security(
+ &loaded_policy,
+ &security_start_timestamp,
+ &security_finish_timestamp,
+ &error_message);
+ if (r < 0)
+ goto finish;
+ }
+
+ if (mac_selinux_init() < 0) {
+ error_message = "Failed to initialize SELinux support";
+ goto finish;
+ }
+
+ if (!skip_setup)
+ initialize_clock();
+
+ /* Set the default for later on, but don't actually open the logs like this for now. Note that
+ * if we are transitioning from the initrd there might still be journal fd open, and we
+ * shouldn't attempt opening that before we parsed /proc/cmdline which might redirect output
+ * elsewhere. */
+ log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
+
+ } else {
+ /* Running inside a container, as PID 1 */
+ log_set_target(LOG_TARGET_CONSOLE);
+ log_open();
+
+ /* For later on, see above... */
+ log_set_target(LOG_TARGET_JOURNAL);
+
+ /* clear the kernel timestamp, because we are in a container */
+ kernel_timestamp = DUAL_TIMESTAMP_NULL;
+ }
+
+ initialize_coredump(skip_setup);
+
+ r = fixup_environment();
+ if (r < 0) {
+ log_emergency_errno(r, "Failed to fix up PID 1 environment: %m");
+ error_message = "Failed to fix up PID1 environment";
+ goto finish;
+ }
+
+ /* Try to figure out if we can use colors with the console. No need to do that for user instances since
+ * they never log into the console. */
+ log_show_color(colors_enabled());
+
+ r = make_null_stdio();
+ if (r < 0)
+ log_warning_errno(r, "Failed to redirect standard streams to /dev/null, ignoring: %m");
+
+ /* Load the kernel modules early. */
+ if (!skip_setup)
+ kmod_setup();
+
+ /* Mount /proc, /sys and friends, so that /proc/cmdline and /proc/$PID/fd is available. */
+ r = mount_setup(loaded_policy, skip_setup);
+ if (r < 0) {
+ error_message = "Failed to mount API filesystems";
+ goto finish;
+ }
+
+ /* The efivarfs is now mounted, let's read the random seed off it */
+ (void) efi_take_random_seed();
+
+ /* Cache command-line options passed from EFI variables */
+ if (!skip_setup)
+ (void) cache_efi_options_variable();
+ } else {
+ /* Running as user instance */
+ arg_system = false;
+ log_set_target(LOG_TARGET_AUTO);
+ log_open();
+
+ /* clear the kernel timestamp, because we are not PID 1 */
+ kernel_timestamp = DUAL_TIMESTAMP_NULL;
+
+ if (mac_selinux_init() < 0) {
+ error_message = "Failed to initialize SELinux support";
+ goto finish;
+ }
+ }
+
+ /* Save the original RLIMIT_NOFILE/RLIMIT_MEMLOCK so that we can reset it later when
+ * transitioning from the initrd to the main systemd or suchlike. */
+ save_rlimits(&saved_rlimit_nofile, &saved_rlimit_memlock);
+
+ /* Reset all signal handlers. */
+ (void) reset_all_signal_handlers();
+ (void) ignore_signals(SIGNALS_IGNORE, -1);
+
+ (void) parse_configuration(&saved_rlimit_nofile, &saved_rlimit_memlock);
+
+ r = parse_argv(argc, argv);
+ if (r < 0) {
+ error_message = "Failed to parse commandline arguments";
+ goto finish;
+ }
+
+ r = safety_checks();
+ if (r < 0)
+ goto finish;
+
+ if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS, ACTION_DUMP_BUS_PROPERTIES, ACTION_BUS_INTROSPECT))
+ (void) pager_open(arg_pager_flags);
+
+ if (arg_action != ACTION_RUN)
+ skip_setup = true;
+
+ if (arg_action == ACTION_HELP) {
+ retval = help() < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+ goto finish;
+ } else if (arg_action == ACTION_VERSION) {
+ retval = version();
+ goto finish;
+ } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
+ unit_dump_config_items(stdout);
+ retval = EXIT_SUCCESS;
+ goto finish;
+ } else if (arg_action == ACTION_DUMP_BUS_PROPERTIES) {
+ dump_bus_properties(stdout);
+ retval = EXIT_SUCCESS;
+ goto finish;
+ } else if (arg_action == ACTION_BUS_INTROSPECT) {
+ r = bus_manager_introspect_implementations(stdout, arg_bus_introspect);
+ retval = r >= 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+ goto finish;
+ }
+
+ assert_se(IN_SET(arg_action, ACTION_RUN, ACTION_TEST));
+
+ /* Move out of the way, so that we won't block unmounts */
+ assert_se(chdir("/") == 0);
+
+ if (arg_action == ACTION_RUN) {
+ if (!skip_setup) {
+ /* Apply the systemd.clock_usec= kernel command line switch */
+ apply_clock_update();
+
+ /* Apply random seed from kernel command line */
+ cmdline_take_random_seed();
+ }
+
+ /* A core pattern might have been specified via the cmdline. */
+ initialize_core_pattern(skip_setup);
+
+ /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
+ log_close();
+
+ /* Remember open file descriptors for later deserialization */
+ r = collect_fds(&fds, &error_message);
+ if (r < 0)
+ goto finish;
+
+ /* Give up any control of the console, but make sure its initialized. */
+ setup_console_terminal(skip_setup);
+
+ /* Open the logging devices, if possible and necessary */
+ log_open();
+ }
+
+ log_execution_mode(&first_boot);
+
+ r = initialize_runtime(skip_setup,
+ first_boot,
+ &saved_rlimit_nofile,
+ &saved_rlimit_memlock,
+ &error_message);
+ if (r < 0)
+ goto finish;
+
+ r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
+ arg_action == ACTION_TEST ? MANAGER_TEST_FULL : 0,
+ &m);
+ if (r < 0) {
+ log_emergency_errno(r, "Failed to allocate manager object: %m");
+ error_message = "Failed to allocate manager object";
+ goto finish;
+ }
+
+ m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;
+ m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;
+ m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;
+ m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_START)] = security_start_timestamp;
+ m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_FINISH)] = security_finish_timestamp;
+
+ set_manager_defaults(m);
+ set_manager_settings(m);
+ manager_set_first_boot(m, first_boot);
+
+ /* Remember whether we should queue the default job */
+ queue_default_job = !arg_serialization || arg_switched_root;
+
+ before_startup = now(CLOCK_MONOTONIC);
+
+ r = manager_startup(m, arg_serialization, fds);
+ if (r < 0) {
+ error_message = "Failed to start up manager";
+ goto finish;
+ }
+
+ /* This will close all file descriptors that were opened, but not claimed by any unit. */
+ fds = fdset_free(fds);
+ arg_serialization = safe_fclose(arg_serialization);
+
+ if (queue_default_job) {
+ r = do_queue_default_job(m, &error_message);
+ if (r < 0)
+ goto finish;
+ }
+
+ after_startup = now(CLOCK_MONOTONIC);
+
+ log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
+ "Loaded units and determined initial transaction in %s.",
+ format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 100 * USEC_PER_MSEC));
+
+ if (arg_action == ACTION_TEST) {
+ test_summary(m);
+ retval = EXIT_SUCCESS;
+ goto finish;
+ }
+
+ (void) invoke_main_loop(m,
+ &saved_rlimit_nofile,
+ &saved_rlimit_memlock,
+ &reexecute,
+ &retval,
+ &shutdown_verb,
+ &fds,
+ &switch_root_dir,
+ &switch_root_init,
+ &error_message);
+
+finish:
+ pager_close();
+
+ if (m) {
+ arg_reboot_watchdog = manager_get_watchdog(m, WATCHDOG_REBOOT);
+ arg_kexec_watchdog = manager_get_watchdog(m, WATCHDOG_KEXEC);
+ m = manager_free(m);
+ }
+
+ mac_selinux_finish();
+
+ if (reexecute)
+ do_reexecute(argc, argv,
+ &saved_rlimit_nofile,
+ &saved_rlimit_memlock,
+ fds,
+ switch_root_dir,
+ switch_root_init,
+ &error_message); /* This only returns if reexecution failed */
+
+ arg_serialization = safe_fclose(arg_serialization);
+ fds = fdset_free(fds);
+
+ saved_env = strv_free(saved_env);
+
+#if HAVE_VALGRIND_VALGRIND_H
+ /* If we are PID 1 and running under valgrind, then let's exit
+ * here explicitly. valgrind will only generate nice output on
+ * exit(), not on exec(), hence let's do the former not the
+ * latter here. */
+ if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
+ /* Cleanup watchdog_device strings for valgrind. We need them
+ * in become_shutdown() so normally we cannot free them yet. */
+ watchdog_free_device();
+ arg_watchdog_device = mfree(arg_watchdog_device);
+ reset_arguments();
+ return retval;
+ }
+#endif
+
+#if HAS_FEATURE_ADDRESS_SANITIZER
+ __lsan_do_leak_check();
+#endif
+
+ if (shutdown_verb) {
+ r = become_shutdown(shutdown_verb, retval);
+ log_error_errno(r, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
+ error_message = "Failed to execute shutdown binary";
+ }
+
+ watchdog_free_device();
+ arg_watchdog_device = mfree(arg_watchdog_device);
+
+ if (getpid_cached() == 1) {
+ if (error_message)
+ manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
+ ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL,
+ "%s.", error_message);
+ freeze_or_exit_or_reboot();
+ }
+
+ reset_arguments();
+ return retval;
+}
diff --git a/src/core/manager.c b/src/core/manager.c
new file mode 100644
index 0000000..a1d6f7c
--- /dev/null
+++ b/src/core/manager.c
@@ -0,0 +1,4975 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/kd.h>
+#include <sys/epoll.h>
+#include <sys/inotify.h>
+#include <sys/ioctl.h>
+#include <sys/reboot.h>
+#include <sys/timerfd.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#if HAVE_AUDIT
+#include <libaudit.h>
+#endif
+
+#include "sd-daemon.h"
+#include "sd-messages.h"
+#include "sd-path.h"
+
+#include "all-units.h"
+#include "alloc-util.h"
+#include "audit-fd.h"
+#include "boot-timestamps.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-kernel.h"
+#include "bus-util.h"
+#include "clean-ipc.h"
+#include "clock-util.h"
+#include "core-varlink.h"
+#include "dbus-job.h"
+#include "dbus-manager.h"
+#include "dbus-unit.h"
+#include "dbus.h"
+#include "def.h"
+#include "dirent-util.h"
+#include "env-util.h"
+#include "escape.h"
+#include "exec-util.h"
+#include "execute.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "generator-setup.h"
+#include "hashmap.h"
+#include "install.h"
+#include "io-util.h"
+#include "label.h"
+#include "locale-setup.h"
+#include "load-fragment.h"
+#include "log.h"
+#include "macro.h"
+#include "manager.h"
+#include "memory-util.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-lookup.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "ratelimit.h"
+#include "rlimit-util.h"
+#include "rm-rf.h"
+#include "selinux-util.h"
+#include "serialize.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "special.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "sysctl-util.h"
+#include "syslog-util.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "transaction.h"
+#include "umask-util.h"
+#include "unit-name.h"
+#include "user-util.h"
+#include "virt.h"
+#include "watchdog.h"
+
+#define NOTIFY_RCVBUF_SIZE (8*1024*1024)
+#define CGROUPS_AGENT_RCVBUF_SIZE (8*1024*1024)
+
+/* Initial delay and the interval for printing status messages about running jobs */
+#define JOBS_IN_PROGRESS_WAIT_USEC (2*USEC_PER_SEC)
+#define JOBS_IN_PROGRESS_QUIET_WAIT_USEC (25*USEC_PER_SEC)
+#define JOBS_IN_PROGRESS_PERIOD_USEC (USEC_PER_SEC / 3)
+#define JOBS_IN_PROGRESS_PERIOD_DIVISOR 3
+
+/* If there are more than 1K bus messages queue across our API and direct buses, then let's not add more on top until
+ * the queue gets more empty. */
+#define MANAGER_BUS_BUSY_THRESHOLD 1024LU
+
+/* How many units and jobs to process of the bus queue before returning to the event loop. */
+#define MANAGER_BUS_MESSAGE_BUDGET 100U
+
+static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata);
+static int manager_dispatch_run_queue(sd_event_source *source, void *userdata);
+static int manager_dispatch_sigchld(sd_event_source *source, void *userdata);
+static int manager_dispatch_timezone_change(sd_event_source *source, const struct inotify_event *event, void *userdata);
+static int manager_run_environment_generators(Manager *m);
+static int manager_run_generators(Manager *m);
+static void manager_vacuum(Manager *m);
+
+static usec_t manager_watch_jobs_next_time(Manager *m) {
+ return usec_add(now(CLOCK_MONOTONIC),
+ show_status_on(m->show_status) ? JOBS_IN_PROGRESS_WAIT_USEC :
+ JOBS_IN_PROGRESS_QUIET_WAIT_USEC);
+}
+
+static void manager_watch_jobs_in_progress(Manager *m) {
+ usec_t next;
+ int r;
+
+ assert(m);
+
+ /* We do not want to show the cylon animation if the user
+ * needs to confirm service executions otherwise confirmation
+ * messages will be screwed by the cylon animation. */
+ if (!manager_is_confirm_spawn_disabled(m))
+ return;
+
+ if (m->jobs_in_progress_event_source)
+ return;
+
+ next = manager_watch_jobs_next_time(m);
+ r = sd_event_add_time(
+ m->event,
+ &m->jobs_in_progress_event_source,
+ CLOCK_MONOTONIC,
+ next, 0,
+ manager_dispatch_jobs_in_progress, m);
+ if (r < 0)
+ return;
+
+ (void) sd_event_source_set_description(m->jobs_in_progress_event_source, "manager-jobs-in-progress");
+}
+
+#define CYLON_BUFFER_EXTRA (2*STRLEN(ANSI_RED) + STRLEN(ANSI_HIGHLIGHT_RED) + 2*STRLEN(ANSI_NORMAL))
+
+static void draw_cylon(char buffer[], size_t buflen, unsigned width, unsigned pos) {
+ char *p = buffer;
+
+ assert(buflen >= CYLON_BUFFER_EXTRA + width + 1);
+ assert(pos <= width+1); /* 0 or width+1 mean that the center light is behind the corner */
+
+ if (pos > 1) {
+ if (pos > 2)
+ p = mempset(p, ' ', pos-2);
+ if (log_get_show_color())
+ p = stpcpy(p, ANSI_RED);
+ *p++ = '*';
+ }
+
+ if (pos > 0 && pos <= width) {
+ if (log_get_show_color())
+ p = stpcpy(p, ANSI_HIGHLIGHT_RED);
+ *p++ = '*';
+ }
+
+ if (log_get_show_color())
+ p = stpcpy(p, ANSI_NORMAL);
+
+ if (pos < width) {
+ if (log_get_show_color())
+ p = stpcpy(p, ANSI_RED);
+ *p++ = '*';
+ if (pos < width-1)
+ p = mempset(p, ' ', width-1-pos);
+ if (log_get_show_color())
+ strcpy(p, ANSI_NORMAL);
+ }
+}
+
+static void manager_flip_auto_status(Manager *m, bool enable, const char *reason) {
+ assert(m);
+
+ if (enable) {
+ if (m->show_status == SHOW_STATUS_AUTO)
+ manager_set_show_status(m, SHOW_STATUS_TEMPORARY, reason);
+ } else {
+ if (m->show_status == SHOW_STATUS_TEMPORARY)
+ manager_set_show_status(m, SHOW_STATUS_AUTO, reason);
+ }
+}
+
+static void manager_print_jobs_in_progress(Manager *m) {
+ _cleanup_free_ char *job_of_n = NULL;
+ Job *j;
+ unsigned counter = 0, print_nr;
+ char cylon[6 + CYLON_BUFFER_EXTRA + 1];
+ unsigned cylon_pos;
+ char time[FORMAT_TIMESPAN_MAX], limit[FORMAT_TIMESPAN_MAX] = "no limit";
+ uint64_t x;
+
+ assert(m);
+ assert(m->n_running_jobs > 0);
+
+ manager_flip_auto_status(m, true, "delay");
+
+ print_nr = (m->jobs_in_progress_iteration / JOBS_IN_PROGRESS_PERIOD_DIVISOR) % m->n_running_jobs;
+
+ HASHMAP_FOREACH(j, m->jobs)
+ if (j->state == JOB_RUNNING && counter++ == print_nr)
+ break;
+
+ /* m->n_running_jobs must be consistent with the contents of m->jobs,
+ * so the above loop must have succeeded in finding j. */
+ assert(counter == print_nr + 1);
+ assert(j);
+
+ cylon_pos = m->jobs_in_progress_iteration % 14;
+ if (cylon_pos >= 8)
+ cylon_pos = 14 - cylon_pos;
+ draw_cylon(cylon, sizeof(cylon), 6, cylon_pos);
+
+ m->jobs_in_progress_iteration++;
+
+ if (m->n_running_jobs > 1) {
+ if (asprintf(&job_of_n, "(%u of %u) ", counter, m->n_running_jobs) < 0)
+ job_of_n = NULL;
+ }
+
+ format_timespan(time, sizeof(time), now(CLOCK_MONOTONIC) - j->begin_usec, 1*USEC_PER_SEC);
+ if (job_get_timeout(j, &x) > 0)
+ format_timespan(limit, sizeof(limit), x - j->begin_usec, 1*USEC_PER_SEC);
+
+ manager_status_printf(m, STATUS_TYPE_EPHEMERAL, cylon,
+ "%sA %s job is running for %s (%s / %s)",
+ strempty(job_of_n),
+ job_type_to_string(j->type),
+ unit_status_string(j->unit),
+ time, limit);
+}
+
+static int have_ask_password(void) {
+ _cleanup_closedir_ DIR *dir;
+ struct dirent *de;
+
+ dir = opendir("/run/systemd/ask-password");
+ if (!dir) {
+ if (errno == ENOENT)
+ return false;
+ else
+ return -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, dir, return -errno) {
+ if (startswith(de->d_name, "ask."))
+ return true;
+ }
+ return false;
+}
+
+static int manager_dispatch_ask_password_fd(sd_event_source *source,
+ int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+
+ (void) flush_fd(fd);
+
+ m->have_ask_password = have_ask_password();
+ if (m->have_ask_password < 0)
+ /* Log error but continue. Negative have_ask_password
+ * is treated as unknown status. */
+ log_error_errno(m->have_ask_password, "Failed to list /run/systemd/ask-password: %m");
+
+ return 0;
+}
+
+static void manager_close_ask_password(Manager *m) {
+ assert(m);
+
+ m->ask_password_event_source = sd_event_source_unref(m->ask_password_event_source);
+ m->ask_password_inotify_fd = safe_close(m->ask_password_inotify_fd);
+ m->have_ask_password = -EINVAL;
+}
+
+static int manager_check_ask_password(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (!m->ask_password_event_source) {
+ assert(m->ask_password_inotify_fd < 0);
+
+ (void) mkdir_p_label("/run/systemd/ask-password", 0755);
+
+ m->ask_password_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (m->ask_password_inotify_fd < 0)
+ return log_error_errno(errno, "Failed to create inotify object: %m");
+
+ r = inotify_add_watch_and_warn(m->ask_password_inotify_fd,
+ "/run/systemd/ask-password",
+ IN_CREATE|IN_DELETE|IN_MOVE);
+ if (r < 0) {
+ manager_close_ask_password(m);
+ return r;
+ }
+
+ r = sd_event_add_io(m->event, &m->ask_password_event_source,
+ m->ask_password_inotify_fd, EPOLLIN,
+ manager_dispatch_ask_password_fd, m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add event source for /run/systemd/ask-password: %m");
+ manager_close_ask_password(m);
+ return r;
+ }
+
+ (void) sd_event_source_set_description(m->ask_password_event_source, "manager-ask-password");
+
+ /* Queries might have been added meanwhile... */
+ manager_dispatch_ask_password_fd(m->ask_password_event_source,
+ m->ask_password_inotify_fd, EPOLLIN, m);
+ }
+
+ return m->have_ask_password;
+}
+
+static int manager_watch_idle_pipe(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (m->idle_pipe_event_source)
+ return 0;
+
+ if (m->idle_pipe[2] < 0)
+ return 0;
+
+ r = sd_event_add_io(m->event, &m->idle_pipe_event_source, m->idle_pipe[2], EPOLLIN, manager_dispatch_idle_pipe_fd, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch idle pipe: %m");
+
+ (void) sd_event_source_set_description(m->idle_pipe_event_source, "manager-idle-pipe");
+
+ return 0;
+}
+
+static void manager_close_idle_pipe(Manager *m) {
+ assert(m);
+
+ m->idle_pipe_event_source = sd_event_source_unref(m->idle_pipe_event_source);
+
+ safe_close_pair(m->idle_pipe);
+ safe_close_pair(m->idle_pipe + 2);
+}
+
+static int manager_setup_time_change(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (MANAGER_IS_TEST_RUN(m))
+ return 0;
+
+ m->time_change_event_source = sd_event_source_unref(m->time_change_event_source);
+ m->time_change_fd = safe_close(m->time_change_fd);
+
+ m->time_change_fd = time_change_fd();
+ if (m->time_change_fd < 0)
+ return log_error_errno(m->time_change_fd, "Failed to create timer change timer fd: %m");
+
+ r = sd_event_add_io(m->event, &m->time_change_event_source, m->time_change_fd, EPOLLIN, manager_dispatch_time_change_fd, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create time change event source: %m");
+
+ /* Schedule this slightly earlier than the .timer event sources */
+ r = sd_event_source_set_priority(m->time_change_event_source, SD_EVENT_PRIORITY_NORMAL-1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set priority of time change event sources: %m");
+
+ (void) sd_event_source_set_description(m->time_change_event_source, "manager-time-change");
+
+ log_debug("Set up TFD_TIMER_CANCEL_ON_SET timerfd.");
+
+ return 0;
+}
+
+static int manager_read_timezone_stat(Manager *m) {
+ struct stat st;
+ bool changed;
+
+ assert(m);
+
+ /* Read the current stat() data of /etc/localtime so that we detect changes */
+ if (lstat("/etc/localtime", &st) < 0) {
+ log_debug_errno(errno, "Failed to stat /etc/localtime, ignoring: %m");
+ changed = m->etc_localtime_accessible;
+ m->etc_localtime_accessible = false;
+ } else {
+ usec_t k;
+
+ k = timespec_load(&st.st_mtim);
+ changed = !m->etc_localtime_accessible || k != m->etc_localtime_mtime;
+
+ m->etc_localtime_mtime = k;
+ m->etc_localtime_accessible = true;
+ }
+
+ return changed;
+}
+
+static int manager_setup_timezone_change(Manager *m) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *new_event = NULL;
+ int r;
+
+ assert(m);
+
+ if (MANAGER_IS_TEST_RUN(m))
+ return 0;
+
+ /* We watch /etc/localtime for three events: change of the link count (which might mean removal from /etc even
+ * though another link might be kept), renames, and file close operations after writing. Note we don't bother
+ * with IN_DELETE_SELF, as that would just report when the inode is removed entirely, i.e. after the link count
+ * went to zero and all fds to it are closed.
+ *
+ * Note that we never follow symlinks here. This is a simplification, but should cover almost all cases
+ * correctly.
+ *
+ * Note that we create the new event source first here, before releasing the old one. This should optimize
+ * behaviour as this way sd-event can reuse the old watch in case the inode didn't change. */
+
+ r = sd_event_add_inotify(m->event, &new_event, "/etc/localtime",
+ IN_ATTRIB|IN_MOVE_SELF|IN_CLOSE_WRITE|IN_DONT_FOLLOW, manager_dispatch_timezone_change, m);
+ if (r == -ENOENT) {
+ /* If the file doesn't exist yet, subscribe to /etc instead, and wait until it is created either by
+ * O_CREATE or by rename() */
+
+ log_debug_errno(r, "/etc/localtime doesn't exist yet, watching /etc instead.");
+ r = sd_event_add_inotify(m->event, &new_event, "/etc",
+ IN_CREATE|IN_MOVED_TO|IN_ONLYDIR, manager_dispatch_timezone_change, m);
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to create timezone change event source: %m");
+
+ /* Schedule this slightly earlier than the .timer event sources */
+ r = sd_event_source_set_priority(new_event, SD_EVENT_PRIORITY_NORMAL-1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set priority of timezone change event sources: %m");
+
+ sd_event_source_unref(m->timezone_change_event_source);
+ m->timezone_change_event_source = TAKE_PTR(new_event);
+
+ return 0;
+}
+
+static int enable_special_signals(Manager *m) {
+ _cleanup_close_ int fd = -1;
+
+ assert(m);
+
+ if (MANAGER_IS_TEST_RUN(m))
+ return 0;
+
+ /* Enable that we get SIGINT on control-alt-del. In containers
+ * this will fail with EPERM (older) or EINVAL (newer), so
+ * ignore that. */
+ if (reboot(RB_DISABLE_CAD) < 0 && !IN_SET(errno, EPERM, EINVAL))
+ log_warning_errno(errno, "Failed to enable ctrl-alt-del handling: %m");
+
+ fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0) {
+ /* Support systems without virtual console */
+ if (fd != -ENOENT)
+ log_warning_errno(errno, "Failed to open /dev/tty0: %m");
+ } else {
+ /* Enable that we get SIGWINCH on kbrequest */
+ if (ioctl(fd, KDSIGACCEPT, SIGWINCH) < 0)
+ log_warning_errno(errno, "Failed to enable kbrequest handling: %m");
+ }
+
+ return 0;
+}
+
+#define RTSIG_IF_AVAILABLE(signum) (signum <= SIGRTMAX ? signum : -1)
+
+static int manager_setup_signals(Manager *m) {
+ struct sigaction sa = {
+ .sa_handler = SIG_DFL,
+ .sa_flags = SA_NOCLDSTOP|SA_RESTART,
+ };
+ sigset_t mask;
+ int r;
+
+ assert(m);
+
+ assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
+
+ /* We make liberal use of realtime signals here. On
+ * Linux/glibc we have 30 of them (with the exception of Linux
+ * on hppa, see below), between SIGRTMIN+0 ... SIGRTMIN+30
+ * (aka SIGRTMAX). */
+
+ assert_se(sigemptyset(&mask) == 0);
+ sigset_add_many(&mask,
+ SIGCHLD, /* Child died */
+ SIGTERM, /* Reexecute daemon */
+ SIGHUP, /* Reload configuration */
+ SIGUSR1, /* systemd/upstart: reconnect to D-Bus */
+ SIGUSR2, /* systemd: dump status */
+ SIGINT, /* Kernel sends us this on control-alt-del */
+ SIGWINCH, /* Kernel sends us this on kbrequest (alt-arrowup) */
+ SIGPWR, /* Some kernel drivers and upsd send us this on power failure */
+
+ SIGRTMIN+0, /* systemd: start default.target */
+ SIGRTMIN+1, /* systemd: isolate rescue.target */
+ SIGRTMIN+2, /* systemd: isolate emergency.target */
+ SIGRTMIN+3, /* systemd: start halt.target */
+ SIGRTMIN+4, /* systemd: start poweroff.target */
+ SIGRTMIN+5, /* systemd: start reboot.target */
+ SIGRTMIN+6, /* systemd: start kexec.target */
+
+ /* ... space for more special targets ... */
+
+ SIGRTMIN+13, /* systemd: Immediate halt */
+ SIGRTMIN+14, /* systemd: Immediate poweroff */
+ SIGRTMIN+15, /* systemd: Immediate reboot */
+ SIGRTMIN+16, /* systemd: Immediate kexec */
+
+ /* ... space for more immediate system state changes ... */
+
+ SIGRTMIN+20, /* systemd: enable status messages */
+ SIGRTMIN+21, /* systemd: disable status messages */
+ SIGRTMIN+22, /* systemd: set log level to LOG_DEBUG */
+ SIGRTMIN+23, /* systemd: set log level to LOG_INFO */
+ SIGRTMIN+24, /* systemd: Immediate exit (--user only) */
+
+ /* .. one free signal here ... */
+
+ /* Apparently Linux on hppa had fewer RT signals until v3.18,
+ * SIGRTMAX was SIGRTMIN+25, and then SIGRTMIN was lowered,
+ * see commit v3.17-7614-g1f25df2eff.
+ *
+ * We cannot unconditionally make use of those signals here,
+ * so let's use a runtime check. Since these commands are
+ * accessible by different means and only really a safety
+ * net, the missing functionality on hppa shouldn't matter.
+ */
+
+ RTSIG_IF_AVAILABLE(SIGRTMIN+26), /* systemd: set log target to journal-or-kmsg */
+ RTSIG_IF_AVAILABLE(SIGRTMIN+27), /* systemd: set log target to console */
+ RTSIG_IF_AVAILABLE(SIGRTMIN+28), /* systemd: set log target to kmsg */
+ RTSIG_IF_AVAILABLE(SIGRTMIN+29), /* systemd: set log target to syslog-or-kmsg (obsolete) */
+
+ /* ... one free signal here SIGRTMIN+30 ... */
+ -1);
+ assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
+
+ m->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
+ if (m->signal_fd < 0)
+ return -errno;
+
+ r = sd_event_add_io(m->event, &m->signal_event_source, m->signal_fd, EPOLLIN, manager_dispatch_signal_fd, m);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(m->signal_event_source, "manager-signal");
+
+ /* Process signals a bit earlier than the rest of things, but later than notify_fd processing, so that the
+ * notify processing can still figure out to which process/service a message belongs, before we reap the
+ * process. Also, process this before handling cgroup notifications, so that we always collect child exit
+ * status information before detecting that there's no process in a cgroup. */
+ r = sd_event_source_set_priority(m->signal_event_source, SD_EVENT_PRIORITY_NORMAL-6);
+ if (r < 0)
+ return r;
+
+ if (MANAGER_IS_SYSTEM(m))
+ return enable_special_signals(m);
+
+ return 0;
+}
+
+static char** sanitize_environment(char **l) {
+
+ /* Let's remove some environment variables that we need ourselves to communicate with our clients */
+ strv_env_unset_many(
+ l,
+ "CACHE_DIRECTORY",
+ "CONFIGURATION_DIRECTORY",
+ "CREDENTIALS_DIRECTORY",
+ "EXIT_CODE",
+ "EXIT_STATUS",
+ "INVOCATION_ID",
+ "JOURNAL_STREAM",
+ "LISTEN_FDNAMES",
+ "LISTEN_FDS",
+ "LISTEN_PID",
+ "LOGS_DIRECTORY",
+ "MAINPID",
+ "MANAGERPID",
+ "NOTIFY_SOCKET",
+ "PIDFILE",
+ "REMOTE_ADDR",
+ "REMOTE_PORT",
+ "RUNTIME_DIRECTORY",
+ "SERVICE_RESULT",
+ "STATE_DIRECTORY",
+ "WATCHDOG_PID",
+ "WATCHDOG_USEC",
+ NULL);
+
+ /* Let's order the environment alphabetically, just to make it pretty */
+ strv_sort(l);
+
+ return l;
+}
+
+int manager_default_environment(Manager *m) {
+ int r;
+
+ assert(m);
+
+ m->transient_environment = strv_free(m->transient_environment);
+
+ if (MANAGER_IS_SYSTEM(m)) {
+ /* The system manager always starts with a clean
+ * environment for its children. It does not import
+ * the kernel's or the parents' exported variables.
+ *
+ * The initial passed environment is untouched to keep
+ * /proc/self/environ valid; it is used for tagging
+ * the init process inside containers. */
+ m->transient_environment = strv_new("PATH=" DEFAULT_PATH);
+ if (!m->transient_environment)
+ return log_oom();
+
+ /* Import locale variables LC_*= from configuration */
+ (void) locale_setup(&m->transient_environment);
+ } else {
+ _cleanup_free_ char *k = NULL;
+
+ /* The user manager passes its own environment
+ * along to its children, except for $PATH. */
+ m->transient_environment = strv_copy(environ);
+ if (!m->transient_environment)
+ return log_oom();
+
+ k = strdup("PATH=" DEFAULT_USER_PATH);
+ if (!k)
+ return log_oom();
+
+ r = strv_env_replace(&m->transient_environment, k);
+ if (r < 0)
+ return log_oom();
+ TAKE_PTR(k);
+ }
+
+ sanitize_environment(m->transient_environment);
+
+ return 0;
+}
+
+static int manager_setup_prefix(Manager *m) {
+ struct table_entry {
+ uint64_t type;
+ const char *suffix;
+ };
+
+ static const struct table_entry paths_system[_EXEC_DIRECTORY_TYPE_MAX] = {
+ [EXEC_DIRECTORY_RUNTIME] = { SD_PATH_SYSTEM_RUNTIME, NULL },
+ [EXEC_DIRECTORY_STATE] = { SD_PATH_SYSTEM_STATE_PRIVATE, NULL },
+ [EXEC_DIRECTORY_CACHE] = { SD_PATH_SYSTEM_STATE_CACHE, NULL },
+ [EXEC_DIRECTORY_LOGS] = { SD_PATH_SYSTEM_STATE_LOGS, NULL },
+ [EXEC_DIRECTORY_CONFIGURATION] = { SD_PATH_SYSTEM_CONFIGURATION, NULL },
+ };
+
+ static const struct table_entry paths_user[_EXEC_DIRECTORY_TYPE_MAX] = {
+ [EXEC_DIRECTORY_RUNTIME] = { SD_PATH_USER_RUNTIME, NULL },
+ [EXEC_DIRECTORY_STATE] = { SD_PATH_USER_CONFIGURATION, NULL },
+ [EXEC_DIRECTORY_CACHE] = { SD_PATH_USER_STATE_CACHE, NULL },
+ [EXEC_DIRECTORY_LOGS] = { SD_PATH_USER_CONFIGURATION, "log" },
+ [EXEC_DIRECTORY_CONFIGURATION] = { SD_PATH_USER_CONFIGURATION, NULL },
+ };
+
+ assert(m);
+
+ const struct table_entry *p = MANAGER_IS_SYSTEM(m) ? paths_system : paths_user;
+ int r;
+
+ for (ExecDirectoryType i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++) {
+ r = sd_path_lookup(p[i].type, p[i].suffix, &m->prefix[i]);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static void manager_free_unit_name_maps(Manager *m) {
+ m->unit_id_map = hashmap_free(m->unit_id_map);
+ m->unit_name_map = hashmap_free(m->unit_name_map);
+ m->unit_path_cache = set_free(m->unit_path_cache);
+ m->unit_cache_timestamp_hash = 0;
+}
+
+static int manager_setup_run_queue(Manager *m) {
+ int r;
+
+ assert(m);
+ assert(!m->run_queue_event_source);
+
+ r = sd_event_add_defer(m->event, &m->run_queue_event_source, manager_dispatch_run_queue, m);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(m->run_queue_event_source, SD_EVENT_PRIORITY_IDLE);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(m->run_queue_event_source, SD_EVENT_OFF);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(m->run_queue_event_source, "manager-run-queue");
+
+ return 0;
+}
+
+static int manager_setup_sigchld_event_source(Manager *m) {
+ int r;
+
+ assert(m);
+ assert(!m->sigchld_event_source);
+
+ r = sd_event_add_defer(m->event, &m->sigchld_event_source, manager_dispatch_sigchld, m);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(m->sigchld_event_source, SD_EVENT_PRIORITY_NORMAL-7);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(m->sigchld_event_source, SD_EVENT_OFF);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(m->sigchld_event_source, "manager-sigchld");
+
+ return 0;
+}
+
+int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager **_m) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ const char *e;
+ int r;
+
+ assert(_m);
+ assert(IN_SET(scope, UNIT_FILE_SYSTEM, UNIT_FILE_USER));
+
+ m = new(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (Manager) {
+ .unit_file_scope = scope,
+ .objective = _MANAGER_OBJECTIVE_INVALID,
+
+ .status_unit_format = STATUS_UNIT_FORMAT_DEFAULT,
+
+ .default_timer_accuracy_usec = USEC_PER_MINUTE,
+ .default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT,
+ .default_tasks_accounting = true,
+ .default_tasks_max = TASKS_MAX_UNSET,
+ .default_timeout_start_usec = DEFAULT_TIMEOUT_USEC,
+ .default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC,
+ .default_restart_usec = DEFAULT_RESTART_USEC,
+
+ .original_log_level = -1,
+ .original_log_target = _LOG_TARGET_INVALID,
+
+ .watchdog_overridden[WATCHDOG_RUNTIME] = USEC_INFINITY,
+ .watchdog_overridden[WATCHDOG_REBOOT] = USEC_INFINITY,
+ .watchdog_overridden[WATCHDOG_KEXEC] = USEC_INFINITY,
+
+ .show_status_overridden = _SHOW_STATUS_INVALID,
+
+ .notify_fd = -1,
+ .cgroups_agent_fd = -1,
+ .signal_fd = -1,
+ .time_change_fd = -1,
+ .user_lookup_fds = { -1, -1 },
+ .private_listen_fd = -1,
+ .dev_autofs_fd = -1,
+ .cgroup_inotify_fd = -1,
+ .pin_cgroupfs_fd = -1,
+ .ask_password_inotify_fd = -1,
+ .idle_pipe = { -1, -1, -1, -1},
+
+ /* start as id #1, so that we can leave #0 around as "null-like" value */
+ .current_job_id = 1,
+
+ .have_ask_password = -EINVAL, /* we don't know */
+ .first_boot = -1,
+ .test_run_flags = test_run_flags,
+
+ .default_oom_policy = OOM_STOP,
+ };
+
+#if ENABLE_EFI
+ if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0)
+ boot_timestamps(m->timestamps + MANAGER_TIMESTAMP_USERSPACE,
+ m->timestamps + MANAGER_TIMESTAMP_FIRMWARE,
+ m->timestamps + MANAGER_TIMESTAMP_LOADER);
+#endif
+
+ /* Prepare log fields we can use for structured logging */
+ if (MANAGER_IS_SYSTEM(m)) {
+ m->unit_log_field = "UNIT=";
+ m->unit_log_format_string = "UNIT=%s";
+
+ m->invocation_log_field = "INVOCATION_ID=";
+ m->invocation_log_format_string = "INVOCATION_ID=%s";
+ } else {
+ m->unit_log_field = "USER_UNIT=";
+ m->unit_log_format_string = "USER_UNIT=%s";
+
+ m->invocation_log_field = "USER_INVOCATION_ID=";
+ m->invocation_log_format_string = "USER_INVOCATION_ID=%s";
+ }
+
+ /* Reboot immediately if the user hits C-A-D more often than 7x per 2s */
+ m->ctrl_alt_del_ratelimit = (RateLimit) { .interval = 2 * USEC_PER_SEC, .burst = 7 };
+
+ r = manager_default_environment(m);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&m->units, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&m->cgroup_unit, &path_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&m->watch_bus, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = prioq_ensure_allocated(&m->run_queue, compare_job_priority);
+ if (r < 0)
+ return r;
+
+ r = manager_setup_prefix(m);
+ if (r < 0)
+ return r;
+
+ e = secure_getenv("CREDENTIALS_DIRECTORY");
+ if (e) {
+ m->received_credentials = strdup(e);
+ if (!m->received_credentials)
+ return -ENOMEM;
+ }
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ r = manager_setup_run_queue(m);
+ if (r < 0)
+ return r;
+
+ if (test_run_flags == MANAGER_TEST_RUN_MINIMAL) {
+ m->cgroup_root = strdup("");
+ if (!m->cgroup_root)
+ return -ENOMEM;
+ } else {
+ r = manager_setup_signals(m);
+ if (r < 0)
+ return r;
+
+ r = manager_setup_cgroup(m);
+ if (r < 0)
+ return r;
+
+ r = manager_setup_time_change(m);
+ if (r < 0)
+ return r;
+
+ r = manager_read_timezone_stat(m);
+ if (r < 0)
+ return r;
+
+ (void) manager_setup_timezone_change(m);
+
+ r = manager_setup_sigchld_event_source(m);
+ if (r < 0)
+ return r;
+ }
+
+ if (test_run_flags == 0) {
+ if (MANAGER_IS_SYSTEM(m))
+ r = mkdir_label("/run/systemd/units", 0755);
+ else {
+ _cleanup_free_ char *units_path = NULL;
+ r = xdg_user_runtime_dir(&units_path, "/systemd/units");
+ if (r < 0)
+ return r;
+ r = mkdir_p_label(units_path, 0755);
+ }
+
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ m->taint_usr =
+ !in_initrd() &&
+ dir_is_empty("/usr") > 0;
+
+ /* Note that we do not set up the notify fd here. We do that after deserialization,
+ * since they might have gotten serialized across the reexec. */
+
+ *_m = TAKE_PTR(m);
+
+ return 0;
+}
+
+static int manager_setup_notify(Manager *m) {
+ int r;
+
+ if (MANAGER_IS_TEST_RUN(m))
+ return 0;
+
+ if (m->notify_fd < 0) {
+ _cleanup_close_ int fd = -1;
+ union sockaddr_union sa;
+ socklen_t sa_len;
+
+ /* First free all secondary fields */
+ m->notify_socket = mfree(m->notify_socket);
+ m->notify_event_source = sd_event_source_unref(m->notify_event_source);
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to allocate notification socket: %m");
+
+ fd_inc_rcvbuf(fd, NOTIFY_RCVBUF_SIZE);
+
+ m->notify_socket = path_join(m->prefix[EXEC_DIRECTORY_RUNTIME], "systemd/notify");
+ if (!m->notify_socket)
+ return log_oom();
+
+ r = sockaddr_un_set_path(&sa.un, m->notify_socket);
+ if (r < 0)
+ return log_error_errno(r, "Notify socket '%s' not valid for AF_UNIX socket address, refusing.",
+ m->notify_socket);
+ sa_len = r;
+
+ (void) mkdir_parents_label(m->notify_socket, 0755);
+ (void) sockaddr_un_unlink(&sa.un);
+
+ r = mac_selinux_bind(fd, &sa.sa, sa_len);
+ if (r < 0)
+ return log_error_errno(r, "bind(%s) failed: %m", m->notify_socket);
+
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return log_error_errno(r, "SO_PASSCRED failed: %m");
+
+ m->notify_fd = TAKE_FD(fd);
+
+ log_debug("Using notification socket %s", m->notify_socket);
+ }
+
+ if (!m->notify_event_source) {
+ r = sd_event_add_io(m->event, &m->notify_event_source, m->notify_fd, EPOLLIN, manager_dispatch_notify_fd, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate notify event source: %m");
+
+ /* Process notification messages a bit earlier than SIGCHLD, so that we can still identify to which
+ * service an exit message belongs. */
+ r = sd_event_source_set_priority(m->notify_event_source, SD_EVENT_PRIORITY_NORMAL-8);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set priority of notify event source: %m");
+
+ (void) sd_event_source_set_description(m->notify_event_source, "manager-notify");
+ }
+
+ return 0;
+}
+
+static int manager_setup_cgroups_agent(Manager *m) {
+
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/cgroups-agent",
+ };
+ int r;
+
+ /* This creates a listening socket we receive cgroups agent messages on. We do not use D-Bus for delivering
+ * these messages from the cgroups agent binary to PID 1, as the cgroups agent binary is very short-living, and
+ * each instance of it needs a new D-Bus connection. Since D-Bus connections are SOCK_STREAM/AF_UNIX, on
+ * overloaded systems the backlog of the D-Bus socket becomes relevant, as not more than the configured number
+ * of D-Bus connections may be queued until the kernel will start dropping further incoming connections,
+ * possibly resulting in lost cgroups agent messages. To avoid this, we'll use a private SOCK_DGRAM/AF_UNIX
+ * socket, where no backlog is relevant as communication may take place without an actual connect() cycle, and
+ * we thus won't lose messages.
+ *
+ * Note that PID 1 will forward the agent message to system bus, so that the user systemd instance may listen
+ * to it. The system instance hence listens on this special socket, but the user instances listen on the system
+ * bus for these messages. */
+
+ if (MANAGER_IS_TEST_RUN(m))
+ return 0;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return 0;
+
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether unified cgroups hierarchy is used: %m");
+ if (r > 0) /* We don't need this anymore on the unified hierarchy */
+ return 0;
+
+ if (m->cgroups_agent_fd < 0) {
+ _cleanup_close_ int fd = -1;
+
+ /* First free all secondary fields */
+ m->cgroups_agent_event_source = sd_event_source_unref(m->cgroups_agent_event_source);
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to allocate cgroups agent socket: %m");
+
+ fd_inc_rcvbuf(fd, CGROUPS_AGENT_RCVBUF_SIZE);
+
+ (void) sockaddr_un_unlink(&sa.un);
+
+ /* Only allow root to connect to this socket */
+ RUN_WITH_UMASK(0077)
+ r = bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ if (r < 0)
+ return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
+
+ m->cgroups_agent_fd = TAKE_FD(fd);
+ }
+
+ if (!m->cgroups_agent_event_source) {
+ r = sd_event_add_io(m->event, &m->cgroups_agent_event_source, m->cgroups_agent_fd, EPOLLIN, manager_dispatch_cgroups_agent_fd, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate cgroups agent event source: %m");
+
+ /* Process cgroups notifications early. Note that when the agent notification is received
+ * we'll just enqueue the unit in the cgroup empty queue, hence pick a high priority than
+ * that. Also see handling of cgroup inotify for the unified cgroup stuff. */
+ r = sd_event_source_set_priority(m->cgroups_agent_event_source, SD_EVENT_PRIORITY_NORMAL-9);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set priority of cgroups agent event source: %m");
+
+ (void) sd_event_source_set_description(m->cgroups_agent_event_source, "manager-cgroups-agent");
+ }
+
+ return 0;
+}
+
+static int manager_setup_user_lookup_fd(Manager *m) {
+ int r;
+
+ assert(m);
+
+ /* Set up the socket pair used for passing UID/GID resolution results from forked off processes to PID
+ * 1. Background: we can't do name lookups (NSS) from PID 1, since it might involve IPC and thus activation,
+ * and we might hence deadlock on ourselves. Hence we do all user/group lookups asynchronously from the forked
+ * off processes right before executing the binaries to start. In order to be able to clean up any IPC objects
+ * created by a unit (see RemoveIPC=) we need to know in PID 1 the used UID/GID of the executed processes,
+ * hence we establish this communication channel so that forked off processes can pass their UID/GID
+ * information back to PID 1. The forked off processes send their resolved UID/GID to PID 1 in a simple
+ * datagram, along with their unit name, so that we can share one communication socket pair among all units for
+ * this purpose.
+ *
+ * You might wonder why we need a communication channel for this that is independent of the usual notification
+ * socket scheme (i.e. $NOTIFY_SOCKET). The primary difference is about trust: data sent via the $NOTIFY_SOCKET
+ * channel is only accepted if it originates from the right unit and if reception was enabled for it. The user
+ * lookup socket OTOH is only accessible by PID 1 and its children until they exec(), and always available.
+ *
+ * Note that this function is called under two circumstances: when we first initialize (in which case we
+ * allocate both the socket pair and the event source to listen on it), and when we deserialize after a reload
+ * (in which case the socket pair already exists but we still need to allocate the event source for it). */
+
+ if (m->user_lookup_fds[0] < 0) {
+
+ /* Free all secondary fields */
+ safe_close_pair(m->user_lookup_fds);
+ m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source);
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, m->user_lookup_fds) < 0)
+ return log_error_errno(errno, "Failed to allocate user lookup socket: %m");
+
+ (void) fd_inc_rcvbuf(m->user_lookup_fds[0], NOTIFY_RCVBUF_SIZE);
+ }
+
+ if (!m->user_lookup_event_source) {
+ r = sd_event_add_io(m->event, &m->user_lookup_event_source, m->user_lookup_fds[0], EPOLLIN, manager_dispatch_user_lookup_fd, m);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to allocate user lookup event source: %m");
+
+ /* Process even earlier than the notify event source, so that we always know first about valid UID/GID
+ * resolutions */
+ r = sd_event_source_set_priority(m->user_lookup_event_source, SD_EVENT_PRIORITY_NORMAL-11);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to set priority of user lookup event source: %m");
+
+ (void) sd_event_source_set_description(m->user_lookup_event_source, "user-lookup");
+ }
+
+ return 0;
+}
+
+static unsigned manager_dispatch_cleanup_queue(Manager *m) {
+ Unit *u;
+ unsigned n = 0;
+
+ assert(m);
+
+ while ((u = m->cleanup_queue)) {
+ assert(u->in_cleanup_queue);
+
+ unit_free(u);
+ n++;
+ }
+
+ return n;
+}
+
+enum {
+ GC_OFFSET_IN_PATH, /* This one is on the path we were traveling */
+ GC_OFFSET_UNSURE, /* No clue */
+ GC_OFFSET_GOOD, /* We still need this unit */
+ GC_OFFSET_BAD, /* We don't need this unit anymore */
+ _GC_OFFSET_MAX
+};
+
+static void unit_gc_mark_good(Unit *u, unsigned gc_marker) {
+ Unit *other;
+ void *v;
+
+ u->gc_marker = gc_marker + GC_OFFSET_GOOD;
+
+ /* Recursively mark referenced units as GOOD as well */
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_REFERENCES])
+ if (other->gc_marker == gc_marker + GC_OFFSET_UNSURE)
+ unit_gc_mark_good(other, gc_marker);
+}
+
+static void unit_gc_sweep(Unit *u, unsigned gc_marker) {
+ Unit *other;
+ bool is_bad;
+ void *v;
+
+ assert(u);
+
+ if (IN_SET(u->gc_marker - gc_marker,
+ GC_OFFSET_GOOD, GC_OFFSET_BAD, GC_OFFSET_UNSURE, GC_OFFSET_IN_PATH))
+ return;
+
+ if (u->in_cleanup_queue)
+ goto bad;
+
+ if (!unit_may_gc(u))
+ goto good;
+
+ u->gc_marker = gc_marker + GC_OFFSET_IN_PATH;
+
+ is_bad = true;
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_REFERENCED_BY]) {
+ unit_gc_sweep(other, gc_marker);
+
+ if (other->gc_marker == gc_marker + GC_OFFSET_GOOD)
+ goto good;
+
+ if (other->gc_marker != gc_marker + GC_OFFSET_BAD)
+ is_bad = false;
+ }
+
+ if (u->refs_by_target) {
+ const UnitRef *ref;
+
+ LIST_FOREACH(refs_by_target, ref, u->refs_by_target) {
+ unit_gc_sweep(ref->source, gc_marker);
+
+ if (ref->source->gc_marker == gc_marker + GC_OFFSET_GOOD)
+ goto good;
+
+ if (ref->source->gc_marker != gc_marker + GC_OFFSET_BAD)
+ is_bad = false;
+ }
+ }
+
+ if (is_bad)
+ goto bad;
+
+ /* We were unable to find anything out about this entry, so
+ * let's investigate it later */
+ u->gc_marker = gc_marker + GC_OFFSET_UNSURE;
+ unit_add_to_gc_queue(u);
+ return;
+
+bad:
+ /* We definitely know that this one is not useful anymore, so
+ * let's mark it for deletion */
+ u->gc_marker = gc_marker + GC_OFFSET_BAD;
+ unit_add_to_cleanup_queue(u);
+ return;
+
+good:
+ unit_gc_mark_good(u, gc_marker);
+}
+
+static unsigned manager_dispatch_gc_unit_queue(Manager *m) {
+ unsigned n = 0, gc_marker;
+ Unit *u;
+
+ assert(m);
+
+ /* log_debug("Running GC..."); */
+
+ m->gc_marker += _GC_OFFSET_MAX;
+ if (m->gc_marker + _GC_OFFSET_MAX <= _GC_OFFSET_MAX)
+ m->gc_marker = 1;
+
+ gc_marker = m->gc_marker;
+
+ while ((u = m->gc_unit_queue)) {
+ assert(u->in_gc_queue);
+
+ unit_gc_sweep(u, gc_marker);
+
+ LIST_REMOVE(gc_queue, m->gc_unit_queue, u);
+ u->in_gc_queue = false;
+
+ n++;
+
+ if (IN_SET(u->gc_marker - gc_marker,
+ GC_OFFSET_BAD, GC_OFFSET_UNSURE)) {
+ if (u->id)
+ log_unit_debug(u, "Collecting.");
+ u->gc_marker = gc_marker + GC_OFFSET_BAD;
+ unit_add_to_cleanup_queue(u);
+ }
+ }
+
+ return n;
+}
+
+static unsigned manager_dispatch_gc_job_queue(Manager *m) {
+ unsigned n = 0;
+ Job *j;
+
+ assert(m);
+
+ while ((j = m->gc_job_queue)) {
+ assert(j->in_gc_queue);
+
+ LIST_REMOVE(gc_queue, m->gc_job_queue, j);
+ j->in_gc_queue = false;
+
+ n++;
+
+ if (!job_may_gc(j))
+ continue;
+
+ log_unit_debug(j->unit, "Collecting job.");
+ (void) job_finish_and_invalidate(j, JOB_COLLECTED, false, false);
+ }
+
+ return n;
+}
+
+static unsigned manager_dispatch_stop_when_unneeded_queue(Manager *m) {
+ unsigned n = 0;
+ Unit *u;
+ int r;
+
+ assert(m);
+
+ while ((u = m->stop_when_unneeded_queue)) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ assert(m->stop_when_unneeded_queue);
+
+ assert(u->in_stop_when_unneeded_queue);
+ LIST_REMOVE(stop_when_unneeded_queue, m->stop_when_unneeded_queue, u);
+ u->in_stop_when_unneeded_queue = false;
+
+ n++;
+
+ if (!unit_is_unneeded(u))
+ continue;
+
+ log_unit_debug(u, "Unit is not needed anymore.");
+
+ /* If stopping a unit fails continuously we might enter a stop loop here, hence stop acting on the
+ * service being unnecessary after a while. */
+
+ if (!ratelimit_below(&u->auto_stop_ratelimit)) {
+ log_unit_warning(u, "Unit not needed anymore, but not stopping since we tried this too often recently.");
+ continue;
+ }
+
+ /* Ok, nobody needs us anymore. Sniff. Then let's commit suicide */
+ r = manager_add_job(u->manager, JOB_STOP, u, JOB_FAIL, NULL, &error, NULL);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to enqueue stop job, ignoring: %s", bus_error_message(&error, r));
+ }
+
+ return n;
+}
+
+static void manager_clear_jobs_and_units(Manager *m) {
+ Unit *u;
+
+ assert(m);
+
+ while ((u = hashmap_first(m->units)))
+ unit_free(u);
+
+ manager_dispatch_cleanup_queue(m);
+
+ assert(!m->load_queue);
+ assert(prioq_isempty(m->run_queue));
+ assert(!m->dbus_unit_queue);
+ assert(!m->dbus_job_queue);
+ assert(!m->cleanup_queue);
+ assert(!m->gc_unit_queue);
+ assert(!m->gc_job_queue);
+ assert(!m->stop_when_unneeded_queue);
+
+ assert(hashmap_isempty(m->jobs));
+ assert(hashmap_isempty(m->units));
+
+ m->n_on_console = 0;
+ m->n_running_jobs = 0;
+ m->n_installed_jobs = 0;
+ m->n_failed_jobs = 0;
+}
+
+Manager* manager_free(Manager *m) {
+ if (!m)
+ return NULL;
+
+ manager_clear_jobs_and_units(m);
+
+ for (UnitType c = 0; c < _UNIT_TYPE_MAX; c++)
+ if (unit_vtable[c]->shutdown)
+ unit_vtable[c]->shutdown(m);
+
+ /* Keep the cgroup hierarchy in place except when we know we are going down for good */
+ manager_shutdown_cgroup(m, IN_SET(m->objective, MANAGER_EXIT, MANAGER_REBOOT, MANAGER_POWEROFF, MANAGER_HALT, MANAGER_KEXEC));
+
+ lookup_paths_flush_generator(&m->lookup_paths);
+
+ bus_done(m);
+ manager_varlink_done(m);
+
+ exec_runtime_vacuum(m);
+ hashmap_free(m->exec_runtime_by_id);
+
+ dynamic_user_vacuum(m, false);
+ hashmap_free(m->dynamic_users);
+
+ hashmap_free(m->units);
+ hashmap_free(m->units_by_invocation_id);
+ hashmap_free(m->jobs);
+ hashmap_free(m->watch_pids);
+ hashmap_free(m->watch_bus);
+
+ prioq_free(m->run_queue);
+
+ set_free(m->startup_units);
+ set_free(m->failed_units);
+
+ sd_event_source_unref(m->signal_event_source);
+ sd_event_source_unref(m->sigchld_event_source);
+ sd_event_source_unref(m->notify_event_source);
+ sd_event_source_unref(m->cgroups_agent_event_source);
+ sd_event_source_unref(m->time_change_event_source);
+ sd_event_source_unref(m->timezone_change_event_source);
+ sd_event_source_unref(m->jobs_in_progress_event_source);
+ sd_event_source_unref(m->run_queue_event_source);
+ sd_event_source_unref(m->user_lookup_event_source);
+
+ safe_close(m->signal_fd);
+ safe_close(m->notify_fd);
+ safe_close(m->cgroups_agent_fd);
+ safe_close(m->time_change_fd);
+ safe_close_pair(m->user_lookup_fds);
+
+ manager_close_ask_password(m);
+
+ manager_close_idle_pipe(m);
+
+ sd_event_unref(m->event);
+
+ free(m->notify_socket);
+
+ lookup_paths_free(&m->lookup_paths);
+ strv_free(m->transient_environment);
+ strv_free(m->client_environment);
+
+ hashmap_free(m->cgroup_unit);
+ manager_free_unit_name_maps(m);
+
+ free(m->switch_root);
+ free(m->switch_root_init);
+
+ rlimit_free_all(m->rlimit);
+
+ assert(hashmap_isempty(m->units_requiring_mounts_for));
+ hashmap_free(m->units_requiring_mounts_for);
+
+ hashmap_free(m->uid_refs);
+ hashmap_free(m->gid_refs);
+
+ for (ExecDirectoryType dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++)
+ m->prefix[dt] = mfree(m->prefix[dt]);
+ free(m->received_credentials);
+
+ return mfree(m);
+}
+
+static void manager_enumerate_perpetual(Manager *m) {
+ assert(m);
+
+ if (m->test_run_flags == MANAGER_TEST_RUN_MINIMAL)
+ return;
+
+ /* Let's ask every type to load all units from disk/kernel that it might know */
+ for (UnitType c = 0; c < _UNIT_TYPE_MAX; c++) {
+ if (!unit_type_supported(c)) {
+ log_debug("Unit type .%s is not supported on this system.", unit_type_to_string(c));
+ continue;
+ }
+
+ if (unit_vtable[c]->enumerate_perpetual)
+ unit_vtable[c]->enumerate_perpetual(m);
+ }
+}
+
+static void manager_enumerate(Manager *m) {
+ assert(m);
+
+ if (m->test_run_flags == MANAGER_TEST_RUN_MINIMAL)
+ return;
+
+ /* Let's ask every type to load all units from disk/kernel that it might know */
+ for (UnitType c = 0; c < _UNIT_TYPE_MAX; c++) {
+ if (!unit_type_supported(c)) {
+ log_debug("Unit type .%s is not supported on this system.", unit_type_to_string(c));
+ continue;
+ }
+
+ if (unit_vtable[c]->enumerate)
+ unit_vtable[c]->enumerate(m);
+ }
+
+ manager_dispatch_load_queue(m);
+}
+
+static void manager_coldplug(Manager *m) {
+ Unit *u;
+ char *k;
+ int r;
+
+ assert(m);
+
+ log_debug("Invoking unit coldplug() handlers…");
+
+ /* Let's place the units back into their deserialized state */
+ HASHMAP_FOREACH_KEY(u, k, m->units) {
+
+ /* ignore aliases */
+ if (u->id != k)
+ continue;
+
+ r = unit_coldplug(u);
+ if (r < 0)
+ log_warning_errno(r, "We couldn't coldplug %s, proceeding anyway: %m", u->id);
+ }
+}
+
+static void manager_catchup(Manager *m) {
+ Unit *u;
+ char *k;
+
+ assert(m);
+
+ log_debug("Invoking unit catchup() handlers…");
+
+ /* Let's catch up on any state changes that happened while we were reloading/reexecing */
+ HASHMAP_FOREACH_KEY(u, k, m->units) {
+
+ /* ignore aliases */
+ if (u->id != k)
+ continue;
+
+ unit_catchup(u);
+ }
+}
+
+static void manager_distribute_fds(Manager *m, FDSet *fds) {
+ Unit *u;
+
+ assert(m);
+
+ HASHMAP_FOREACH(u, m->units) {
+
+ if (fdset_size(fds) <= 0)
+ break;
+
+ if (!UNIT_VTABLE(u)->distribute_fds)
+ continue;
+
+ UNIT_VTABLE(u)->distribute_fds(u, fds);
+ }
+}
+
+static bool manager_dbus_is_running(Manager *m, bool deserialized) {
+ Unit *u;
+
+ assert(m);
+
+ /* This checks whether the dbus instance we are supposed to expose our APIs on is up. We check both the socket
+ * and the service unit. If the 'deserialized' parameter is true we'll check the deserialized state of the unit
+ * rather than the current one. */
+
+ if (MANAGER_IS_TEST_RUN(m))
+ return false;
+
+ u = manager_get_unit(m, SPECIAL_DBUS_SOCKET);
+ if (!u)
+ return false;
+ if ((deserialized ? SOCKET(u)->deserialized_state : SOCKET(u)->state) != SOCKET_RUNNING)
+ return false;
+
+ u = manager_get_unit(m, SPECIAL_DBUS_SERVICE);
+ if (!u)
+ return false;
+ if (!IN_SET((deserialized ? SERVICE(u)->deserialized_state : SERVICE(u)->state), SERVICE_RUNNING, SERVICE_RELOAD))
+ return false;
+
+ return true;
+}
+
+static void manager_setup_bus(Manager *m) {
+ assert(m);
+
+ /* Let's set up our private bus connection now, unconditionally */
+ (void) bus_init_private(m);
+
+ /* If we are in --user mode also connect to the system bus now */
+ if (MANAGER_IS_USER(m))
+ (void) bus_init_system(m);
+
+ /* Let's connect to the bus now, but only if the unit is supposed to be up */
+ if (manager_dbus_is_running(m, MANAGER_IS_RELOADING(m))) {
+ (void) bus_init_api(m);
+
+ if (MANAGER_IS_SYSTEM(m))
+ (void) bus_init_system(m);
+ }
+}
+
+static void manager_preset_all(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (m->first_boot <= 0)
+ return;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return;
+
+ if (MANAGER_IS_TEST_RUN(m))
+ return;
+
+ /* If this is the first boot, and we are in the host system, then preset everything */
+ r = unit_file_preset_all(UNIT_FILE_SYSTEM, 0, NULL, UNIT_FILE_PRESET_ENABLE_ONLY, NULL, 0);
+ if (r < 0)
+ log_full_errno(r == -EEXIST ? LOG_NOTICE : LOG_WARNING, r,
+ "Failed to populate /etc with preset unit settings, ignoring: %m");
+ else
+ log_info("Populated /etc with preset unit settings.");
+}
+
+static void manager_ready(Manager *m) {
+ assert(m);
+
+ /* After having loaded everything, do the final round of catching up with what might have changed */
+
+ m->objective = MANAGER_OK; /* Tell everyone we are up now */
+
+ /* It might be safe to log to the journal now and connect to dbus */
+ manager_recheck_journal(m);
+ manager_recheck_dbus(m);
+
+ /* Let's finally catch up with any changes that took place while we were reloading/reexecing */
+ manager_catchup(m);
+
+ m->honor_device_enumeration = true;
+}
+
+static Manager* manager_reloading_start(Manager *m) {
+ m->n_reloading++;
+ return m;
+}
+static void manager_reloading_stopp(Manager **m) {
+ if (*m) {
+ assert((*m)->n_reloading > 0);
+ (*m)->n_reloading--;
+ }
+}
+
+int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
+ int r;
+
+ assert(m);
+
+ /* If we are running in test mode, we still want to run the generators,
+ * but we should not touch the real generator directories. */
+ r = lookup_paths_init(&m->lookup_paths, m->unit_file_scope,
+ MANAGER_IS_TEST_RUN(m) ? LOOKUP_PATHS_TEMPORARY_GENERATED : 0,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize path lookup table: %m");
+
+ dual_timestamp_get(m->timestamps + manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_GENERATORS_START));
+ r = manager_run_environment_generators(m);
+ if (r >= 0)
+ r = manager_run_generators(m);
+ dual_timestamp_get(m->timestamps + manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_GENERATORS_FINISH));
+ if (r < 0)
+ return r;
+
+ manager_preset_all(m);
+
+ lookup_paths_log(&m->lookup_paths);
+
+ {
+ /* This block is (optionally) done with the reloading counter bumped */
+ _cleanup_(manager_reloading_stopp) Manager *reloading = NULL;
+
+ /* If we will deserialize make sure that during enumeration this is already known, so we increase the
+ * counter here already */
+ if (serialization)
+ reloading = manager_reloading_start(m);
+
+ /* First, enumerate what we can from all config files */
+ dual_timestamp_get(m->timestamps + manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_UNITS_LOAD_START));
+ manager_enumerate_perpetual(m);
+ manager_enumerate(m);
+ dual_timestamp_get(m->timestamps + manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_UNITS_LOAD_FINISH));
+
+ /* Second, deserialize if there is something to deserialize */
+ if (serialization) {
+ r = manager_deserialize(m, serialization, fds);
+ if (r < 0)
+ return log_error_errno(r, "Deserialization failed: %m");
+ }
+
+ /* Any fds left? Find some unit which wants them. This is useful to allow container managers to pass
+ * some file descriptors to us pre-initialized. This enables socket-based activation of entire
+ * containers. */
+ manager_distribute_fds(m, fds);
+
+ /* We might have deserialized the notify fd, but if we didn't then let's create the bus now */
+ r = manager_setup_notify(m);
+ if (r < 0)
+ /* No sense to continue without notifications, our children would fail anyway. */
+ return r;
+
+ r = manager_setup_cgroups_agent(m);
+ if (r < 0)
+ /* Likewise, no sense to continue without empty cgroup notifications. */
+ return r;
+
+ r = manager_setup_user_lookup_fd(m);
+ if (r < 0)
+ /* This shouldn't fail, except if things are really broken. */
+ return r;
+
+ /* Connect to the bus if we are good for it */
+ manager_setup_bus(m);
+
+ /* Now that we are connected to all possible buses, let's deserialize who is tracking us. */
+ r = bus_track_coldplug(m, &m->subscribed, false, m->deserialized_subscribed);
+ if (r < 0)
+ log_warning_errno(r, "Failed to deserialized tracked clients, ignoring: %m");
+ m->deserialized_subscribed = strv_free(m->deserialized_subscribed);
+
+ r = manager_varlink_init(m);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set up Varlink server, ignoring: %m");
+
+ /* Third, fire things up! */
+ manager_coldplug(m);
+
+ /* Clean up runtime objects */
+ manager_vacuum(m);
+
+ if (serialization)
+ /* Let's wait for the UnitNew/JobNew messages being sent, before we notify that the
+ * reload is finished */
+ m->send_reloading_done = true;
+ }
+
+ manager_ready(m);
+
+ return 0;
+}
+
+int manager_add_job(
+ Manager *m,
+ JobType type,
+ Unit *unit,
+ JobMode mode,
+ Set *affected_jobs,
+ sd_bus_error *error,
+ Job **ret) {
+
+ Transaction *tr;
+ int r;
+
+ assert(m);
+ assert(type < _JOB_TYPE_MAX);
+ assert(unit);
+ assert(mode < _JOB_MODE_MAX);
+
+ if (mode == JOB_ISOLATE && type != JOB_START)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Isolate is only valid for start.");
+
+ if (mode == JOB_ISOLATE && !unit->allow_isolate)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_ISOLATION, "Operation refused, unit may not be isolated.");
+
+ if (mode == JOB_TRIGGERING && type != JOB_STOP)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "--job-mode=triggering is only valid for stop.");
+
+ log_unit_debug(unit, "Trying to enqueue job %s/%s/%s", unit->id, job_type_to_string(type), job_mode_to_string(mode));
+
+ type = job_type_collapse(type, unit);
+
+ tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY);
+ if (!tr)
+ return -ENOMEM;
+
+ r = transaction_add_job_and_dependencies(tr, type, unit, NULL, true, false,
+ IN_SET(mode, JOB_IGNORE_DEPENDENCIES, JOB_IGNORE_REQUIREMENTS),
+ mode == JOB_IGNORE_DEPENDENCIES, error);
+ if (r < 0)
+ goto tr_abort;
+
+ if (mode == JOB_ISOLATE) {
+ r = transaction_add_isolate_jobs(tr, m);
+ if (r < 0)
+ goto tr_abort;
+ }
+
+ if (mode == JOB_TRIGGERING) {
+ r = transaction_add_triggering_jobs(tr, unit);
+ if (r < 0)
+ goto tr_abort;
+ }
+
+ r = transaction_activate(tr, m, mode, affected_jobs, error);
+ if (r < 0)
+ goto tr_abort;
+
+ log_unit_debug(unit,
+ "Enqueued job %s/%s as %u", unit->id,
+ job_type_to_string(type), (unsigned) tr->anchor_job->id);
+
+ if (ret)
+ *ret = tr->anchor_job;
+
+ transaction_free(tr);
+ return 0;
+
+tr_abort:
+ transaction_abort(tr);
+ transaction_free(tr);
+ return r;
+}
+
+int manager_add_job_by_name(Manager *m, JobType type, const char *name, JobMode mode, Set *affected_jobs, sd_bus_error *e, Job **ret) {
+ Unit *unit = NULL; /* just to appease gcc, initialization is not really necessary */
+ int r;
+
+ assert(m);
+ assert(type < _JOB_TYPE_MAX);
+ assert(name);
+ assert(mode < _JOB_MODE_MAX);
+
+ r = manager_load_unit(m, name, NULL, NULL, &unit);
+ if (r < 0)
+ return r;
+ assert(unit);
+
+ return manager_add_job(m, type, unit, mode, affected_jobs, e, ret);
+}
+
+int manager_add_job_by_name_and_warn(Manager *m, JobType type, const char *name, JobMode mode, Set *affected_jobs, Job **ret) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(m);
+ assert(type < _JOB_TYPE_MAX);
+ assert(name);
+ assert(mode < _JOB_MODE_MAX);
+
+ r = manager_add_job_by_name(m, type, name, mode, affected_jobs, &error, ret);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to enqueue %s job for %s: %s", job_mode_to_string(mode), name, bus_error_message(&error, r));
+
+ return r;
+}
+
+int manager_propagate_reload(Manager *m, Unit *unit, JobMode mode, sd_bus_error *e) {
+ int r;
+ Transaction *tr;
+
+ assert(m);
+ assert(unit);
+ assert(mode < _JOB_MODE_MAX);
+ assert(mode != JOB_ISOLATE); /* Isolate is only valid for start */
+
+ tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY);
+ if (!tr)
+ return -ENOMEM;
+
+ /* We need an anchor job */
+ r = transaction_add_job_and_dependencies(tr, JOB_NOP, unit, NULL, false, false, true, true, e);
+ if (r < 0)
+ goto tr_abort;
+
+ /* Failure in adding individual dependencies is ignored, so this always succeeds. */
+ transaction_add_propagate_reload_jobs(tr, unit, tr->anchor_job, mode == JOB_IGNORE_DEPENDENCIES, e);
+
+ r = transaction_activate(tr, m, mode, NULL, e);
+ if (r < 0)
+ goto tr_abort;
+
+ transaction_free(tr);
+ return 0;
+
+tr_abort:
+ transaction_abort(tr);
+ transaction_free(tr);
+ return r;
+}
+
+Job *manager_get_job(Manager *m, uint32_t id) {
+ assert(m);
+
+ return hashmap_get(m->jobs, UINT32_TO_PTR(id));
+}
+
+Unit *manager_get_unit(Manager *m, const char *name) {
+ assert(m);
+ assert(name);
+
+ return hashmap_get(m->units, name);
+}
+
+static int manager_dispatch_target_deps_queue(Manager *m) {
+ Unit *u;
+ int r = 0;
+
+ static const UnitDependency deps[] = {
+ UNIT_REQUIRED_BY,
+ UNIT_REQUISITE_OF,
+ UNIT_WANTED_BY,
+ UNIT_BOUND_BY
+ };
+
+ assert(m);
+
+ while ((u = m->target_deps_queue)) {
+ assert(u->in_target_deps_queue);
+
+ LIST_REMOVE(target_deps_queue, u->manager->target_deps_queue, u);
+ u->in_target_deps_queue = false;
+
+ for (size_t k = 0; k < ELEMENTSOF(deps); k++) {
+ Unit *target;
+ void *v;
+
+ HASHMAP_FOREACH_KEY(v, target, u->dependencies[deps[k]]) {
+ r = unit_add_default_target_dependency(u, target);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ return r;
+}
+
+unsigned manager_dispatch_load_queue(Manager *m) {
+ Unit *u;
+ unsigned n = 0;
+
+ assert(m);
+
+ /* Make sure we are not run recursively */
+ if (m->dispatching_load_queue)
+ return 0;
+
+ m->dispatching_load_queue = true;
+
+ /* Dispatches the load queue. Takes a unit from the queue and
+ * tries to load its data until the queue is empty */
+
+ while ((u = m->load_queue)) {
+ assert(u->in_load_queue);
+
+ unit_load(u);
+ n++;
+ }
+
+ m->dispatching_load_queue = false;
+
+ /* Dispatch the units waiting for their target dependencies to be added now, as all targets that we know about
+ * should be loaded and have aliases resolved */
+ (void) manager_dispatch_target_deps_queue(m);
+
+ return n;
+}
+
+bool manager_unit_cache_should_retry_load(Unit *u) {
+ assert(u);
+
+ /* Automatic reloading from disk only applies to units which were not found sometime in the past, and
+ * the not-found stub is kept pinned in the unit graph by dependencies. For units that were
+ * previously loaded, we don't do automatic reloading, and daemon-reload is necessary to update. */
+ if (u->load_state != UNIT_NOT_FOUND)
+ return false;
+
+ /* The cache has been updated since the last time we tried to load the unit. There might be new
+ * fragment paths to read. */
+ if (u->manager->unit_cache_timestamp_hash != u->fragment_not_found_timestamp_hash)
+ return true;
+
+ /* The cache needs to be updated because there are modifications on disk. */
+ return !lookup_paths_timestamp_hash_same(&u->manager->lookup_paths, u->manager->unit_cache_timestamp_hash, NULL);
+}
+
+int manager_load_unit_prepare(
+ Manager *m,
+ const char *name,
+ const char *path,
+ sd_bus_error *e,
+ Unit **_ret) {
+
+ _cleanup_(unit_freep) Unit *cleanup_ret = NULL;
+ Unit *ret;
+ UnitType t;
+ int r;
+
+ assert(m);
+ assert(_ret);
+
+ /* This will prepare the unit for loading, but not actually load anything from disk. */
+
+ if (path && !path_is_absolute(path))
+ return sd_bus_error_setf(e, SD_BUS_ERROR_INVALID_ARGS, "Path %s is not absolute.", path);
+
+ if (!name) {
+ /* 'name' and 'path' must not both be null. Check here 'path' using assert_se() to
+ * workaround a bug in gcc that generates a -Wnonnull warning when calling basename(),
+ * but this cannot be possible in any code path (See #6119). */
+ assert_se(path);
+ name = basename(path);
+ }
+
+ t = unit_name_to_type(name);
+
+ if (t == _UNIT_TYPE_INVALID || !unit_name_is_valid(name, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE)) {
+ if (unit_name_is_valid(name, UNIT_NAME_TEMPLATE))
+ return sd_bus_error_setf(e, SD_BUS_ERROR_INVALID_ARGS, "Unit name %s is missing the instance name.", name);
+
+ return sd_bus_error_setf(e, SD_BUS_ERROR_INVALID_ARGS, "Unit name %s is not valid.", name);
+ }
+
+ ret = manager_get_unit(m, name);
+ if (ret) {
+ /* The time-based cache allows to start new units without daemon-reload,
+ * but if they are already referenced (because of dependencies or ordering)
+ * then we have to force a load of the fragment. As an optimization, check
+ * first if anything in the usual paths was modified since the last time
+ * the cache was loaded. Also check if the last time an attempt to load the
+ * unit was made was before the most recent cache refresh, so that we know
+ * we need to try again — even if the cache is current, it might have been
+ * updated in a different context before we had a chance to retry loading
+ * this particular unit. */
+ if (manager_unit_cache_should_retry_load(ret))
+ ret->load_state = UNIT_STUB;
+ else {
+ *_ret = ret;
+ return 1;
+ }
+ } else {
+ ret = cleanup_ret = unit_new(m, unit_vtable[t]->object_size);
+ if (!ret)
+ return -ENOMEM;
+ }
+
+ if (path) {
+ r = free_and_strdup(&ret->fragment_path, path);
+ if (r < 0)
+ return r;
+ }
+
+ r = unit_add_name(ret, name);
+ if (r < 0)
+ return r;
+
+ unit_add_to_load_queue(ret);
+ unit_add_to_dbus_queue(ret);
+ unit_add_to_gc_queue(ret);
+
+ *_ret = ret;
+ cleanup_ret = NULL;
+
+ return 0;
+}
+
+int manager_load_unit(
+ Manager *m,
+ const char *name,
+ const char *path,
+ sd_bus_error *e,
+ Unit **_ret) {
+
+ int r;
+
+ assert(m);
+ assert(_ret);
+
+ /* This will load the service information files, but not actually
+ * start any services or anything. */
+
+ r = manager_load_unit_prepare(m, name, path, e, _ret);
+ if (r != 0)
+ return r;
+
+ manager_dispatch_load_queue(m);
+
+ *_ret = unit_follow_merge(*_ret);
+ return 0;
+}
+
+int manager_load_startable_unit_or_warn(
+ Manager *m,
+ const char *name,
+ const char *path,
+ Unit **ret) {
+
+ /* Load a unit, make sure it loaded fully and is not masked. */
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ Unit *unit;
+ int r;
+
+ r = manager_load_unit(m, name, path, &error, &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to load %s %s: %s",
+ name ? "unit" : "unit file", name ?: path,
+ bus_error_message(&error, r));
+
+ r = bus_unit_validate_load_state(unit, &error);
+ if (r < 0)
+ return log_error_errno(r, "%s", bus_error_message(&error, r));
+
+ *ret = unit;
+ return 0;
+}
+
+void manager_dump_jobs(Manager *s, FILE *f, const char *prefix) {
+ Job *j;
+
+ assert(s);
+ assert(f);
+
+ HASHMAP_FOREACH(j, s->jobs)
+ job_dump(j, f, prefix);
+}
+
+void manager_dump_units(Manager *s, FILE *f, const char *prefix) {
+ Unit *u;
+ const char *t;
+
+ assert(s);
+ assert(f);
+
+ HASHMAP_FOREACH_KEY(u, t, s->units)
+ if (u->id == t)
+ unit_dump(u, f, prefix);
+}
+
+void manager_dump(Manager *m, FILE *f, const char *prefix) {
+ assert(m);
+ assert(f);
+
+ for (ManagerTimestamp q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) {
+ const dual_timestamp *t = m->timestamps + q;
+ char buf[CONST_MAX(FORMAT_TIMESPAN_MAX, FORMAT_TIMESTAMP_MAX)];
+
+ if (dual_timestamp_is_set(t))
+ fprintf(f, "%sTimestamp %s: %s\n",
+ strempty(prefix),
+ manager_timestamp_to_string(q),
+ timestamp_is_set(t->realtime) ? format_timestamp(buf, sizeof buf, t->realtime) :
+ format_timespan(buf, sizeof buf, t->monotonic, 1));
+ }
+
+ manager_dump_units(m, f, prefix);
+ manager_dump_jobs(m, f, prefix);
+}
+
+int manager_get_dump_string(Manager *m, char **ret) {
+ _cleanup_free_ char *dump = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ size_t size;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ f = open_memstream_unlocked(&dump, &size);
+ if (!f)
+ return -errno;
+
+ manager_dump(m, f, NULL);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ f = safe_fclose(f);
+
+ *ret = TAKE_PTR(dump);
+
+ return 0;
+}
+
+void manager_clear_jobs(Manager *m) {
+ Job *j;
+
+ assert(m);
+
+ while ((j = hashmap_first(m->jobs)))
+ /* No need to recurse. We're cancelling all jobs. */
+ job_finish_and_invalidate(j, JOB_CANCELED, false, false);
+}
+
+void manager_unwatch_pid(Manager *m, pid_t pid) {
+ assert(m);
+
+ /* First let's drop the unit keyed as "pid". */
+ (void) hashmap_remove(m->watch_pids, PID_TO_PTR(pid));
+
+ /* Then, let's also drop the array keyed by -pid. */
+ free(hashmap_remove(m->watch_pids, PID_TO_PTR(-pid)));
+}
+
+static int manager_dispatch_run_queue(sd_event_source *source, void *userdata) {
+ Manager *m = userdata;
+ Job *j;
+
+ assert(source);
+ assert(m);
+
+ while ((j = prioq_peek(m->run_queue))) {
+ assert(j->installed);
+ assert(j->in_run_queue);
+
+ (void) job_run_and_invalidate(j);
+ }
+
+ if (m->n_running_jobs > 0)
+ manager_watch_jobs_in_progress(m);
+
+ if (m->n_on_console > 0)
+ manager_watch_idle_pipe(m);
+
+ return 1;
+}
+
+static unsigned manager_dispatch_dbus_queue(Manager *m) {
+ unsigned n = 0, budget;
+ Unit *u;
+ Job *j;
+
+ assert(m);
+
+ /* When we are reloading, let's not wait with generating signals, since we need to exit the manager as quickly
+ * as we can. There's no point in throttling generation of signals in that case. */
+ if (MANAGER_IS_RELOADING(m) || m->send_reloading_done || m->pending_reload_message)
+ budget = (unsigned) -1; /* infinite budget in this case */
+ else {
+ /* Anything to do at all? */
+ if (!m->dbus_unit_queue && !m->dbus_job_queue)
+ return 0;
+
+ /* Do we have overly many messages queued at the moment? If so, let's not enqueue more on top, let's
+ * sit this cycle out, and process things in a later cycle when the queues got a bit emptier. */
+ if (manager_bus_n_queued_write(m) > MANAGER_BUS_BUSY_THRESHOLD)
+ return 0;
+
+ /* Only process a certain number of units/jobs per event loop iteration. Even if the bus queue wasn't
+ * overly full before this call we shouldn't increase it in size too wildly in one step, and we
+ * shouldn't monopolize CPU time with generating these messages. Note the difference in counting of
+ * this "budget" and the "threshold" above: the "budget" is decreased only once per generated message,
+ * regardless how many buses/direct connections it is enqueued on, while the "threshold" is applied to
+ * each queued instance of bus message, i.e. if the same message is enqueued to five buses/direct
+ * connections it will be counted five times. This difference in counting ("references"
+ * vs. "instances") is primarily a result of the fact that it's easier to implement it this way,
+ * however it also reflects the thinking that the "threshold" should put a limit on used queue memory,
+ * i.e. space, while the "budget" should put a limit on time. Also note that the "threshold" is
+ * currently chosen much higher than the "budget". */
+ budget = MANAGER_BUS_MESSAGE_BUDGET;
+ }
+
+ while (budget != 0 && (u = m->dbus_unit_queue)) {
+
+ assert(u->in_dbus_queue);
+
+ bus_unit_send_change_signal(u);
+ n++;
+
+ if (budget != (unsigned) -1)
+ budget--;
+ }
+
+ while (budget != 0 && (j = m->dbus_job_queue)) {
+ assert(j->in_dbus_queue);
+
+ bus_job_send_change_signal(j);
+ n++;
+
+ if (budget != (unsigned) -1)
+ budget--;
+ }
+
+ if (m->send_reloading_done) {
+ m->send_reloading_done = false;
+ bus_manager_send_reloading(m, false);
+ n++;
+ }
+
+ if (m->pending_reload_message) {
+ bus_send_pending_reload_message(m);
+ n++;
+ }
+
+ return n;
+}
+
+static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+ char buf[PATH_MAX];
+ ssize_t n;
+
+ n = recv(fd, buf, sizeof(buf), 0);
+ if (n < 0)
+ return log_error_errno(errno, "Failed to read cgroups agent message: %m");
+ if (n == 0) {
+ log_error("Got zero-length cgroups agent message, ignoring.");
+ return 0;
+ }
+ if ((size_t) n >= sizeof(buf)) {
+ log_error("Got overly long cgroups agent message, ignoring.");
+ return 0;
+ }
+
+ if (memchr(buf, 0, n)) {
+ log_error("Got cgroups agent message with embedded NUL byte, ignoring.");
+ return 0;
+ }
+ buf[n] = 0;
+
+ manager_notify_cgroup_empty(m, buf);
+ (void) bus_forward_agent_released(m, buf);
+
+ return 0;
+}
+
+static bool manager_process_barrier_fd(char * const *tags, FDSet *fds) {
+
+ /* nothing else must be sent when using BARRIER=1 */
+ if (strv_contains(tags, "BARRIER=1")) {
+ if (strv_length(tags) == 1) {
+ if (fdset_size(fds) != 1)
+ log_warning("Got incorrect number of fds with BARRIER=1, closing them.");
+ } else
+ log_warning("Extra notification messages sent with BARRIER=1, ignoring everything.");
+
+ /* Drop the message if BARRIER=1 was found */
+ return true;
+ }
+
+ return false;
+}
+
+static void manager_invoke_notify_message(
+ Manager *m,
+ Unit *u,
+ const struct ucred *ucred,
+ char * const *tags,
+ FDSet *fds) {
+
+ assert(m);
+ assert(u);
+ assert(ucred);
+ assert(tags);
+
+ if (u->notifygen == m->notifygen) /* Already invoked on this same unit in this same iteration? */
+ return;
+ u->notifygen = m->notifygen;
+
+ if (UNIT_VTABLE(u)->notify_message)
+ UNIT_VTABLE(u)->notify_message(u, ucred, tags, fds);
+
+ else if (DEBUG_LOGGING) {
+ _cleanup_free_ char *buf = NULL, *x = NULL, *y = NULL;
+
+ buf = strv_join(tags, ", ");
+ if (buf)
+ x = ellipsize(buf, 20, 90);
+ if (x)
+ y = cescape(x);
+
+ log_unit_debug(u, "Got notification message \"%s\", ignoring.", strnull(y));
+ }
+}
+
+static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+
+ _cleanup_fdset_free_ FDSet *fds = NULL;
+ Manager *m = userdata;
+ char buf[NOTIFY_BUFFER_MAX+1];
+ struct iovec iovec = {
+ .iov_base = buf,
+ .iov_len = sizeof(buf)-1,
+ };
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(int) * NOTIFY_FD_MAX)) control;
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+
+ struct cmsghdr *cmsg;
+ struct ucred *ucred = NULL;
+ _cleanup_free_ Unit **array_copy = NULL;
+ _cleanup_strv_free_ char **tags = NULL;
+ Unit *u1, *u2, **array;
+ int r, *fd_array = NULL;
+ size_t n_fds = 0;
+ bool found = false;
+ ssize_t n;
+
+ assert(m);
+ assert(m->notify_fd == fd);
+
+ if (revents != EPOLLIN) {
+ log_warning("Got unexpected poll event for notify fd.");
+ return 0;
+ }
+
+ n = recvmsg_safe(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC|MSG_TRUNC);
+ if (IN_SET(n, -EAGAIN, -EINTR))
+ return 0; /* Spurious wakeup, try again */
+ if (n == -EXFULL) {
+ log_warning("Got message with truncated control data (too many fds sent?), ignoring.");
+ return 0;
+ }
+ if (n < 0)
+ /* If this is any other, real error, then let's stop processing this socket. This of course
+ * means we won't take notification messages anymore, but that's still better than busy
+ * looping around this: being woken up over and over again but being unable to actually read
+ * the message off the socket. */
+ return log_error_errno(n, "Failed to receive notification message: %m");
+
+ CMSG_FOREACH(cmsg, &msghdr) {
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
+
+ assert(!fd_array);
+ fd_array = (int*) CMSG_DATA(cmsg);
+ n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+
+ } else if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) {
+
+ assert(!ucred);
+ ucred = (struct ucred*) CMSG_DATA(cmsg);
+ }
+ }
+
+ if (n_fds > 0) {
+ assert(fd_array);
+
+ r = fdset_new_array(&fds, fd_array, n_fds);
+ if (r < 0) {
+ close_many(fd_array, n_fds);
+ log_oom();
+ return 0;
+ }
+ }
+
+ if (!ucred || !pid_is_valid(ucred->pid)) {
+ log_warning("Received notify message without valid credentials. Ignoring.");
+ return 0;
+ }
+
+ if ((size_t) n >= sizeof(buf) || (msghdr.msg_flags & MSG_TRUNC)) {
+ log_warning("Received notify message exceeded maximum size. Ignoring.");
+ return 0;
+ }
+
+ /* As extra safety check, let's make sure the string we get doesn't contain embedded NUL bytes. We permit one
+ * trailing NUL byte in the message, but don't expect it. */
+ if (n > 1 && memchr(buf, 0, n-1)) {
+ log_warning("Received notify message with embedded NUL bytes. Ignoring.");
+ return 0;
+ }
+
+ /* Make sure it's NUL-terminated, then parse it to obtain the tags list */
+ buf[n] = 0;
+ tags = strv_split_newlines(buf);
+ if (!tags) {
+ log_oom();
+ return 0;
+ }
+
+ /* possibly a barrier fd, let's see */
+ if (manager_process_barrier_fd(tags, fds))
+ return 0;
+
+ /* Increase the generation counter used for filtering out duplicate unit invocations. */
+ m->notifygen++;
+
+ /* Notify every unit that might be interested, which might be multiple. */
+ u1 = manager_get_unit_by_pid_cgroup(m, ucred->pid);
+ u2 = hashmap_get(m->watch_pids, PID_TO_PTR(ucred->pid));
+ array = hashmap_get(m->watch_pids, PID_TO_PTR(-ucred->pid));
+ if (array) {
+ size_t k = 0;
+
+ while (array[k])
+ k++;
+
+ array_copy = newdup(Unit*, array, k+1);
+ if (!array_copy)
+ log_oom();
+ }
+ /* And now invoke the per-unit callbacks. Note that manager_invoke_notify_message() will handle duplicate units
+ * make sure we only invoke each unit's handler once. */
+ if (u1) {
+ manager_invoke_notify_message(m, u1, ucred, tags, fds);
+ found = true;
+ }
+ if (u2) {
+ manager_invoke_notify_message(m, u2, ucred, tags, fds);
+ found = true;
+ }
+ if (array_copy)
+ for (size_t i = 0; array_copy[i]; i++) {
+ manager_invoke_notify_message(m, array_copy[i], ucred, tags, fds);
+ found = true;
+ }
+
+ if (!found)
+ log_warning("Cannot find unit for notify message of PID "PID_FMT", ignoring.", ucred->pid);
+
+ if (fdset_size(fds) > 0)
+ log_warning("Got extra auxiliary fds with notification message, closing them.");
+
+ return 0;
+}
+
+static void manager_invoke_sigchld_event(
+ Manager *m,
+ Unit *u,
+ const siginfo_t *si) {
+
+ assert(m);
+ assert(u);
+ assert(si);
+
+ /* Already invoked the handler of this unit in this iteration? Then don't process this again */
+ if (u->sigchldgen == m->sigchldgen)
+ return;
+ u->sigchldgen = m->sigchldgen;
+
+ log_unit_debug(u, "Child "PID_FMT" belongs to %s.", si->si_pid, u->id);
+ unit_unwatch_pid(u, si->si_pid);
+
+ if (UNIT_VTABLE(u)->sigchld_event)
+ UNIT_VTABLE(u)->sigchld_event(u, si->si_pid, si->si_code, si->si_status);
+}
+
+static int manager_dispatch_sigchld(sd_event_source *source, void *userdata) {
+ Manager *m = userdata;
+ siginfo_t si = {};
+ int r;
+
+ assert(source);
+ assert(m);
+
+ /* First we call waitid() for a PID and do not reap the zombie. That way we can still access /proc/$PID for it
+ * while it is a zombie. */
+
+ if (waitid(P_ALL, 0, &si, WEXITED|WNOHANG|WNOWAIT) < 0) {
+
+ if (errno != ECHILD)
+ log_error_errno(errno, "Failed to peek for child with waitid(), ignoring: %m");
+
+ goto turn_off;
+ }
+
+ if (si.si_pid <= 0)
+ goto turn_off;
+
+ if (IN_SET(si.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED)) {
+ _cleanup_free_ Unit **array_copy = NULL;
+ _cleanup_free_ char *name = NULL;
+ Unit *u1, *u2, **array;
+
+ (void) get_process_comm(si.si_pid, &name);
+
+ log_debug("Child "PID_FMT" (%s) died (code=%s, status=%i/%s)",
+ si.si_pid, strna(name),
+ sigchld_code_to_string(si.si_code),
+ si.si_status,
+ strna(si.si_code == CLD_EXITED
+ ? exit_status_to_string(si.si_status, EXIT_STATUS_FULL)
+ : signal_to_string(si.si_status)));
+
+ /* Increase the generation counter used for filtering out duplicate unit invocations */
+ m->sigchldgen++;
+
+ /* And now figure out the unit this belongs to, it might be multiple... */
+ u1 = manager_get_unit_by_pid_cgroup(m, si.si_pid);
+ u2 = hashmap_get(m->watch_pids, PID_TO_PTR(si.si_pid));
+ array = hashmap_get(m->watch_pids, PID_TO_PTR(-si.si_pid));
+ if (array) {
+ size_t n = 0;
+
+ /* Count how many entries the array has */
+ while (array[n])
+ n++;
+
+ /* Make a copy of the array so that we don't trip up on the array changing beneath us */
+ array_copy = newdup(Unit*, array, n+1);
+ if (!array_copy)
+ log_oom();
+ }
+
+ /* Finally, execute them all. Note that u1, u2 and the array might contain duplicates, but
+ * that's fine, manager_invoke_sigchld_event() will ensure we only invoke the handlers once for
+ * each iteration. */
+ if (u1) {
+ /* We check for oom condition, in case we got SIGCHLD before the oom notification.
+ * We only do this for the cgroup the PID belonged to. */
+ (void) unit_check_oom(u1);
+
+ /* This only logs for now. In the future when the interface for kills/notifications
+ * is more stable we can extend service results table similar to how kernel oom kills
+ * are managed. */
+ (void) unit_check_oomd_kill(u1);
+
+ manager_invoke_sigchld_event(m, u1, &si);
+ }
+ if (u2)
+ manager_invoke_sigchld_event(m, u2, &si);
+ if (array_copy)
+ for (size_t i = 0; array_copy[i]; i++)
+ manager_invoke_sigchld_event(m, array_copy[i], &si);
+ }
+
+ /* And now, we actually reap the zombie. */
+ if (waitid(P_PID, si.si_pid, &si, WEXITED) < 0) {
+ log_error_errno(errno, "Failed to dequeue child, ignoring: %m");
+ return 0;
+ }
+
+ return 0;
+
+turn_off:
+ /* All children processed for now, turn off event source */
+
+ r = sd_event_source_set_enabled(m->sigchld_event_source, SD_EVENT_OFF);
+ if (r < 0)
+ return log_error_errno(r, "Failed to disable SIGCHLD event source: %m");
+
+ return 0;
+}
+
+static void manager_start_target(Manager *m, const char *name, JobMode mode) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ log_debug("Activating special unit %s", name);
+
+ r = manager_add_job_by_name(m, JOB_START, name, mode, NULL, &error, NULL);
+ if (r < 0)
+ log_error("Failed to enqueue %s job: %s", name, bus_error_message(&error, r));
+}
+
+static void manager_handle_ctrl_alt_del(Manager *m) {
+ /* If the user presses C-A-D more than
+ * 7 times within 2s, we reboot/shutdown immediately,
+ * unless it was disabled in system.conf */
+
+ if (ratelimit_below(&m->ctrl_alt_del_ratelimit) || m->cad_burst_action == EMERGENCY_ACTION_NONE)
+ manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY);
+ else
+ emergency_action(m, m->cad_burst_action, EMERGENCY_ACTION_WARN, NULL, -1,
+ "Ctrl-Alt-Del was pressed more than 7 times within 2s");
+}
+
+static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+ ssize_t n;
+ struct signalfd_siginfo sfsi;
+ int r;
+
+ assert(m);
+ assert(m->signal_fd == fd);
+
+ if (revents != EPOLLIN) {
+ log_warning("Got unexpected events from signal file descriptor.");
+ return 0;
+ }
+
+ n = read(m->signal_fd, &sfsi, sizeof(sfsi));
+ if (n != sizeof(sfsi)) {
+ if (n >= 0) {
+ log_warning("Truncated read from signal fd (%zu bytes), ignoring!", n);
+ return 0;
+ }
+
+ if (IN_SET(errno, EINTR, EAGAIN))
+ return 0;
+
+ /* We return an error here, which will kill this handler,
+ * to avoid a busy loop on read error. */
+ return log_error_errno(errno, "Reading from signal fd failed: %m");
+ }
+
+ log_received_signal(sfsi.ssi_signo == SIGCHLD ||
+ (sfsi.ssi_signo == SIGTERM && MANAGER_IS_USER(m))
+ ? LOG_DEBUG : LOG_INFO,
+ &sfsi);
+
+ switch (sfsi.ssi_signo) {
+
+ case SIGCHLD:
+ r = sd_event_source_set_enabled(m->sigchld_event_source, SD_EVENT_ON);
+ if (r < 0)
+ log_warning_errno(r, "Failed to enable SIGCHLD event source, ignoring: %m");
+
+ break;
+
+ case SIGTERM:
+ if (MANAGER_IS_SYSTEM(m)) {
+ /* This is for compatibility with the original sysvinit */
+ if (verify_run_space_and_log("Refusing to reexecute") < 0)
+ break;
+
+ m->objective = MANAGER_REEXECUTE;
+ break;
+ }
+
+ _fallthrough_;
+ case SIGINT:
+ if (MANAGER_IS_SYSTEM(m))
+ manager_handle_ctrl_alt_del(m);
+ else
+ manager_start_target(m, SPECIAL_EXIT_TARGET,
+ JOB_REPLACE_IRREVERSIBLY);
+ break;
+
+ case SIGWINCH:
+ /* This is a nop on non-init */
+ if (MANAGER_IS_SYSTEM(m))
+ manager_start_target(m, SPECIAL_KBREQUEST_TARGET, JOB_REPLACE);
+
+ break;
+
+ case SIGPWR:
+ /* This is a nop on non-init */
+ if (MANAGER_IS_SYSTEM(m))
+ manager_start_target(m, SPECIAL_SIGPWR_TARGET, JOB_REPLACE);
+
+ break;
+
+ case SIGUSR1:
+ if (manager_dbus_is_running(m, false)) {
+ log_info("Trying to reconnect to bus...");
+
+ (void) bus_init_api(m);
+
+ if (MANAGER_IS_SYSTEM(m))
+ (void) bus_init_system(m);
+ } else {
+ log_info("Starting D-Bus service...");
+ manager_start_target(m, SPECIAL_DBUS_SERVICE, JOB_REPLACE);
+ }
+
+ break;
+
+ case SIGUSR2: {
+ _cleanup_free_ char *dump = NULL;
+
+ r = manager_get_dump_string(m, &dump);
+ if (r < 0) {
+ log_warning_errno(errno, "Failed to acquire manager dump: %m");
+ break;
+ }
+
+ log_dump(LOG_INFO, dump);
+ break;
+ }
+
+ case SIGHUP:
+ if (verify_run_space_and_log("Refusing to reload") < 0)
+ break;
+
+ m->objective = MANAGER_RELOAD;
+ break;
+
+ default: {
+
+ /* Starting SIGRTMIN+0 */
+ static const struct {
+ const char *target;
+ JobMode mode;
+ } target_table[] = {
+ [0] = { SPECIAL_DEFAULT_TARGET, JOB_ISOLATE },
+ [1] = { SPECIAL_RESCUE_TARGET, JOB_ISOLATE },
+ [2] = { SPECIAL_EMERGENCY_TARGET, JOB_ISOLATE },
+ [3] = { SPECIAL_HALT_TARGET, JOB_REPLACE_IRREVERSIBLY },
+ [4] = { SPECIAL_POWEROFF_TARGET, JOB_REPLACE_IRREVERSIBLY },
+ [5] = { SPECIAL_REBOOT_TARGET, JOB_REPLACE_IRREVERSIBLY },
+ [6] = { SPECIAL_KEXEC_TARGET, JOB_REPLACE_IRREVERSIBLY },
+ };
+
+ /* Starting SIGRTMIN+13, so that target halt and system halt are 10 apart */
+ static const ManagerObjective objective_table[] = {
+ [0] = MANAGER_HALT,
+ [1] = MANAGER_POWEROFF,
+ [2] = MANAGER_REBOOT,
+ [3] = MANAGER_KEXEC,
+ };
+
+ if ((int) sfsi.ssi_signo >= SIGRTMIN+0 &&
+ (int) sfsi.ssi_signo < SIGRTMIN+(int) ELEMENTSOF(target_table)) {
+ int idx = (int) sfsi.ssi_signo - SIGRTMIN;
+ manager_start_target(m, target_table[idx].target,
+ target_table[idx].mode);
+ break;
+ }
+
+ if ((int) sfsi.ssi_signo >= SIGRTMIN+13 &&
+ (int) sfsi.ssi_signo < SIGRTMIN+13+(int) ELEMENTSOF(objective_table)) {
+ m->objective = objective_table[sfsi.ssi_signo - SIGRTMIN - 13];
+ break;
+ }
+
+ switch (sfsi.ssi_signo - SIGRTMIN) {
+
+ case 20:
+ manager_override_show_status(m, SHOW_STATUS_YES, "signal");
+ break;
+
+ case 21:
+ manager_override_show_status(m, SHOW_STATUS_NO, "signal");
+ break;
+
+ case 22:
+ manager_override_log_level(m, LOG_DEBUG);
+ break;
+
+ case 23:
+ manager_restore_original_log_level(m);
+ break;
+
+ case 24:
+ if (MANAGER_IS_USER(m)) {
+ m->objective = MANAGER_EXIT;
+ return 0;
+ }
+
+ /* This is a nop on init */
+ break;
+
+ case 26:
+ case 29: /* compatibility: used to be mapped to LOG_TARGET_SYSLOG_OR_KMSG */
+ manager_restore_original_log_target(m);
+ break;
+
+ case 27:
+ manager_override_log_target(m, LOG_TARGET_CONSOLE);
+ break;
+
+ case 28:
+ manager_override_log_target(m, LOG_TARGET_KMSG);
+ break;
+
+ default:
+ log_warning("Got unhandled signal <%s>.", signal_to_string(sfsi.ssi_signo));
+ }
+ }}
+
+ return 0;
+}
+
+static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+ Unit *u;
+
+ assert(m);
+ assert(m->time_change_fd == fd);
+
+ log_struct(LOG_DEBUG,
+ "MESSAGE_ID=" SD_MESSAGE_TIME_CHANGE_STR,
+ LOG_MESSAGE("Time has been changed"));
+
+ /* Restart the watch */
+ (void) manager_setup_time_change(m);
+
+ HASHMAP_FOREACH(u, m->units)
+ if (UNIT_VTABLE(u)->time_change)
+ UNIT_VTABLE(u)->time_change(u);
+
+ return 0;
+}
+
+static int manager_dispatch_timezone_change(
+ sd_event_source *source,
+ const struct inotify_event *e,
+ void *userdata) {
+
+ Manager *m = userdata;
+ int changed;
+ Unit *u;
+
+ assert(m);
+
+ log_debug("inotify event for /etc/localtime");
+
+ changed = manager_read_timezone_stat(m);
+ if (changed <= 0)
+ return changed;
+
+ /* Something changed, restart the watch, to ensure we watch the new /etc/localtime if it changed */
+ (void) manager_setup_timezone_change(m);
+
+ /* Read the new timezone */
+ tzset();
+
+ log_debug("Timezone has been changed (now: %s).", tzname[daylight]);
+
+ HASHMAP_FOREACH(u, m->units)
+ if (UNIT_VTABLE(u)->timezone_change)
+ UNIT_VTABLE(u)->timezone_change(u);
+
+ return 0;
+}
+
+static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+ assert(m->idle_pipe[2] == fd);
+
+ /* There's at least one Type=idle child that just gave up on us waiting for the boot process to complete. Let's
+ * now turn off any further console output if there's at least one service that needs console access, so that
+ * from now on our own output should not spill into that service's output anymore. After all, we support
+ * Type=idle only to beautify console output and it generally is set on services that want to own the console
+ * exclusively without our interference. */
+ m->no_console_output = m->n_on_console > 0;
+
+ /* Acknowledge the child's request, and let all all other children know too that they shouldn't wait any longer
+ * by closing the pipes towards them, which is what they are waiting for. */
+ manager_close_idle_pipe(m);
+
+ return 0;
+}
+
+static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata) {
+ Manager *m = userdata;
+ int r;
+
+ assert(m);
+ assert(source);
+
+ manager_print_jobs_in_progress(m);
+
+ r = sd_event_source_set_time_relative(source, JOBS_IN_PROGRESS_PERIOD_USEC);
+ if (r < 0)
+ return r;
+
+ return sd_event_source_set_enabled(source, SD_EVENT_ONESHOT);
+}
+
+int manager_loop(Manager *m) {
+ RateLimit rl = { .interval = 1*USEC_PER_SEC, .burst = 50000 };
+ int r;
+
+ assert(m);
+ assert(m->objective == MANAGER_OK); /* Ensure manager_startup() has been called */
+
+ manager_check_finished(m);
+
+ /* There might still be some zombies hanging around from before we were exec()'ed. Let's reap them. */
+ r = sd_event_source_set_enabled(m->sigchld_event_source, SD_EVENT_ON);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable SIGCHLD event source: %m");
+
+ while (m->objective == MANAGER_OK) {
+ usec_t wait_usec, watchdog_usec;
+
+ watchdog_usec = manager_get_watchdog(m, WATCHDOG_RUNTIME);
+ if (m->runtime_watchdog_running)
+ (void) watchdog_ping();
+ else if (timestamp_is_set(watchdog_usec))
+ manager_retry_runtime_watchdog(m);
+
+ if (!ratelimit_below(&rl)) {
+ /* Yay, something is going seriously wrong, pause a little */
+ log_warning("Looping too fast. Throttling execution a little.");
+ sleep(1);
+ }
+
+ if (manager_dispatch_load_queue(m) > 0)
+ continue;
+
+ if (manager_dispatch_gc_job_queue(m) > 0)
+ continue;
+
+ if (manager_dispatch_gc_unit_queue(m) > 0)
+ continue;
+
+ if (manager_dispatch_cleanup_queue(m) > 0)
+ continue;
+
+ if (manager_dispatch_cgroup_realize_queue(m) > 0)
+ continue;
+
+ if (manager_dispatch_stop_when_unneeded_queue(m) > 0)
+ continue;
+
+ if (manager_dispatch_dbus_queue(m) > 0)
+ continue;
+
+ /* Sleep for watchdog runtime wait time */
+ if (timestamp_is_set(watchdog_usec))
+ wait_usec = watchdog_runtime_wait();
+ else
+ wait_usec = USEC_INFINITY;
+
+ r = sd_event_run(m->event, wait_usec);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+ }
+
+ return m->objective;
+}
+
+int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e, Unit **_u) {
+ _cleanup_free_ char *n = NULL;
+ sd_id128_t invocation_id;
+ Unit *u;
+ int r;
+
+ assert(m);
+ assert(s);
+ assert(_u);
+
+ r = unit_name_from_dbus_path(s, &n);
+ if (r < 0)
+ return r;
+
+ /* Permit addressing units by invocation ID: if the passed bus path is suffixed by a 128bit ID then we use it
+ * as invocation ID. */
+ r = sd_id128_from_string(n, &invocation_id);
+ if (r >= 0) {
+ u = hashmap_get(m->units_by_invocation_id, &invocation_id);
+ if (u) {
+ *_u = u;
+ return 0;
+ }
+
+ return sd_bus_error_setf(e, BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID,
+ "No unit with the specified invocation ID " SD_ID128_FORMAT_STR " known.",
+ SD_ID128_FORMAT_VAL(invocation_id));
+ }
+
+ /* If this didn't work, we check if this is a unit name */
+ if (!unit_name_is_valid(n, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE)) {
+ _cleanup_free_ char *nn = NULL;
+
+ nn = cescape(n);
+ return sd_bus_error_setf(e, SD_BUS_ERROR_INVALID_ARGS,
+ "Unit name %s is neither a valid invocation ID nor unit name.", strnull(nn));
+ }
+
+ r = manager_load_unit(m, n, NULL, e, &u);
+ if (r < 0)
+ return r;
+
+ *_u = u;
+ return 0;
+}
+
+int manager_get_job_from_dbus_path(Manager *m, const char *s, Job **_j) {
+ const char *p;
+ unsigned id;
+ Job *j;
+ int r;
+
+ assert(m);
+ assert(s);
+ assert(_j);
+
+ p = startswith(s, "/org/freedesktop/systemd1/job/");
+ if (!p)
+ return -EINVAL;
+
+ r = safe_atou(p, &id);
+ if (r < 0)
+ return r;
+
+ j = manager_get_job(m, id);
+ if (!j)
+ return -ENOENT;
+
+ *_j = j;
+
+ return 0;
+}
+
+void manager_send_unit_audit(Manager *m, Unit *u, int type, bool success) {
+
+#if HAVE_AUDIT
+ _cleanup_free_ char *p = NULL;
+ const char *msg;
+ int audit_fd, r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return;
+
+ audit_fd = get_audit_fd();
+ if (audit_fd < 0)
+ return;
+
+ /* Don't generate audit events if the service was already
+ * started and we're just deserializing */
+ if (MANAGER_IS_RELOADING(m))
+ return;
+
+ if (u->type != UNIT_SERVICE)
+ return;
+
+ r = unit_name_to_prefix_and_instance(u->id, &p);
+ if (r < 0) {
+ log_error_errno(r, "Failed to extract prefix and instance of unit name: %m");
+ return;
+ }
+
+ msg = strjoina("unit=", p);
+ if (audit_log_user_comm_message(audit_fd, type, msg, "systemd", NULL, NULL, NULL, success) < 0) {
+ if (errno == EPERM)
+ /* We aren't allowed to send audit messages?
+ * Then let's not retry again. */
+ close_audit_fd();
+ else
+ log_warning_errno(errno, "Failed to send audit message: %m");
+ }
+#endif
+
+}
+
+void manager_send_unit_plymouth(Manager *m, Unit *u) {
+ static const union sockaddr_union sa = PLYMOUTH_SOCKET;
+ _cleanup_free_ char *message = NULL;
+ _cleanup_close_ int fd = -1;
+ int n = 0;
+
+ /* Don't generate plymouth events if the service was already
+ * started and we're just deserializing */
+ if (MANAGER_IS_RELOADING(m))
+ return;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return;
+
+ if (detect_container() > 0)
+ return;
+
+ if (!IN_SET(u->type, UNIT_SERVICE, UNIT_MOUNT, UNIT_SWAP))
+ return;
+
+ /* We set SOCK_NONBLOCK here so that we rather drop the
+ * message then wait for plymouth */
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0) {
+ log_error_errno(errno, "socket() failed: %m");
+ return;
+ }
+
+ if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
+ if (!IN_SET(errno, EAGAIN, ENOENT) && !ERRNO_IS_DISCONNECT(errno))
+ log_error_errno(errno, "connect() failed: %m");
+ return;
+ }
+
+ if (asprintf(&message, "U\002%c%s%n", (int) (strlen(u->id) + 1), u->id, &n) < 0) {
+ log_oom();
+ return;
+ }
+
+ errno = 0;
+ if (write(fd, message, n + 1) != n + 1)
+ if (!IN_SET(errno, EAGAIN, ENOENT) && !ERRNO_IS_DISCONNECT(errno))
+ log_error_errno(errno, "Failed to write Plymouth message: %m");
+}
+
+int manager_open_serialization(Manager *m, FILE **_f) {
+ _cleanup_close_ int fd = -1;
+ FILE *f;
+
+ assert(_f);
+
+ fd = open_serialization_fd("systemd-state");
+ if (fd < 0)
+ return fd;
+
+ f = take_fdopen(&fd, "w+");
+ if (!f)
+ return -errno;
+
+ *_f = f;
+ return 0;
+}
+
+static bool manager_timestamp_shall_serialize(ManagerTimestamp t) {
+
+ if (!in_initrd())
+ return true;
+
+ /* The following timestamps only apply to the host system, hence only serialize them there */
+ return !IN_SET(t,
+ MANAGER_TIMESTAMP_USERSPACE, MANAGER_TIMESTAMP_FINISH,
+ MANAGER_TIMESTAMP_SECURITY_START, MANAGER_TIMESTAMP_SECURITY_FINISH,
+ MANAGER_TIMESTAMP_GENERATORS_START, MANAGER_TIMESTAMP_GENERATORS_FINISH,
+ MANAGER_TIMESTAMP_UNITS_LOAD_START, MANAGER_TIMESTAMP_UNITS_LOAD_FINISH);
+}
+
+#define DESTROY_IPC_FLAG (UINT32_C(1) << 31)
+
+static void manager_serialize_uid_refs_internal(
+ Manager *m,
+ FILE *f,
+ Hashmap **uid_refs,
+ const char *field_name) {
+
+ void *p, *k;
+
+ assert(m);
+ assert(f);
+ assert(uid_refs);
+ assert(field_name);
+
+ /* Serialize the UID reference table. Or actually, just the IPC destruction flag of it, as
+ * the actual counter of it is better rebuild after a reload/reexec. */
+
+ HASHMAP_FOREACH_KEY(p, k, *uid_refs) {
+ uint32_t c;
+ uid_t uid;
+
+ uid = PTR_TO_UID(k);
+ c = PTR_TO_UINT32(p);
+
+ if (!(c & DESTROY_IPC_FLAG))
+ continue;
+
+ (void) serialize_item_format(f, field_name, UID_FMT, uid);
+ }
+}
+
+static void manager_serialize_uid_refs(Manager *m, FILE *f) {
+ manager_serialize_uid_refs_internal(m, f, &m->uid_refs, "destroy-ipc-uid");
+}
+
+static void manager_serialize_gid_refs(Manager *m, FILE *f) {
+ manager_serialize_uid_refs_internal(m, f, &m->gid_refs, "destroy-ipc-gid");
+}
+
+int manager_serialize(
+ Manager *m,
+ FILE *f,
+ FDSet *fds,
+ bool switching_root) {
+
+ const char *t;
+ Unit *u;
+ int r;
+
+ assert(m);
+ assert(f);
+ assert(fds);
+
+ _cleanup_(manager_reloading_stopp) _unused_ Manager *reloading = manager_reloading_start(m);
+
+ (void) serialize_item_format(f, "current-job-id", "%" PRIu32, m->current_job_id);
+ (void) serialize_item_format(f, "n-installed-jobs", "%u", m->n_installed_jobs);
+ (void) serialize_item_format(f, "n-failed-jobs", "%u", m->n_failed_jobs);
+ (void) serialize_bool(f, "taint-usr", m->taint_usr);
+ (void) serialize_bool(f, "ready-sent", m->ready_sent);
+ (void) serialize_bool(f, "taint-logged", m->taint_logged);
+ (void) serialize_bool(f, "service-watchdogs", m->service_watchdogs);
+
+ /* After switching root, udevd has not been started yet. So, enumeration results should not be emitted. */
+ (void) serialize_bool(f, "honor-device-enumeration", !switching_root);
+
+ if (m->show_status_overridden != _SHOW_STATUS_INVALID)
+ (void) serialize_item(f, "show-status-overridden",
+ show_status_to_string(m->show_status_overridden));
+
+ if (m->log_level_overridden)
+ (void) serialize_item_format(f, "log-level-override", "%i", log_get_max_level());
+ if (m->log_target_overridden)
+ (void) serialize_item(f, "log-target-override", log_target_to_string(log_get_target()));
+
+ (void) serialize_usec(f, "runtime-watchdog-overridden", m->watchdog_overridden[WATCHDOG_RUNTIME]);
+ (void) serialize_usec(f, "reboot-watchdog-overridden", m->watchdog_overridden[WATCHDOG_REBOOT]);
+ (void) serialize_usec(f, "kexec-watchdog-overridden", m->watchdog_overridden[WATCHDOG_KEXEC]);
+
+ for (ManagerTimestamp q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) {
+ _cleanup_free_ char *joined = NULL;
+
+ if (!manager_timestamp_shall_serialize(q))
+ continue;
+
+ joined = strjoin(manager_timestamp_to_string(q), "-timestamp");
+ if (!joined)
+ return log_oom();
+
+ (void) serialize_dual_timestamp(f, joined, m->timestamps + q);
+ }
+
+ if (!switching_root)
+ (void) serialize_strv(f, "env", m->client_environment);
+
+ if (m->notify_fd >= 0) {
+ r = serialize_fd(f, fds, "notify-fd", m->notify_fd);
+ if (r < 0)
+ return r;
+
+ (void) serialize_item(f, "notify-socket", m->notify_socket);
+ }
+
+ if (m->cgroups_agent_fd >= 0) {
+ r = serialize_fd(f, fds, "cgroups-agent-fd", m->cgroups_agent_fd);
+ if (r < 0)
+ return r;
+ }
+
+ if (m->user_lookup_fds[0] >= 0) {
+ int copy0, copy1;
+
+ copy0 = fdset_put_dup(fds, m->user_lookup_fds[0]);
+ if (copy0 < 0)
+ return log_error_errno(copy0, "Failed to add user lookup fd to serialization: %m");
+
+ copy1 = fdset_put_dup(fds, m->user_lookup_fds[1]);
+ if (copy1 < 0)
+ return log_error_errno(copy1, "Failed to add user lookup fd to serialization: %m");
+
+ (void) serialize_item_format(f, "user-lookup", "%i %i", copy0, copy1);
+ }
+
+ bus_track_serialize(m->subscribed, f, "subscribed");
+
+ r = dynamic_user_serialize(m, f, fds);
+ if (r < 0)
+ return r;
+
+ manager_serialize_uid_refs(m, f);
+ manager_serialize_gid_refs(m, f);
+
+ r = exec_runtime_serialize(m, f, fds);
+ if (r < 0)
+ return r;
+
+ (void) fputc('\n', f);
+
+ HASHMAP_FOREACH_KEY(u, t, m->units) {
+ if (u->id != t)
+ continue;
+
+ /* Start marker */
+ fputs(u->id, f);
+ fputc('\n', f);
+
+ r = unit_serialize(u, f, fds, !switching_root);
+ if (r < 0)
+ return r;
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to flush serialization: %m");
+
+ r = bus_fdset_add_all(m, fds);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add bus sockets to serialization: %m");
+
+ return 0;
+}
+
+static int manager_deserialize_one_unit(Manager *m, const char *name, FILE *f, FDSet *fds) {
+ Unit *u;
+ int r;
+
+ r = manager_load_unit(m, name, NULL, NULL, &u);
+ if (r < 0) {
+ if (r == -ENOMEM)
+ return r;
+ return log_notice_errno(r, "Failed to load unit \"%s\", skipping deserialization: %m", name);
+ }
+
+ r = unit_deserialize(u, f, fds);
+ if (r < 0) {
+ if (r == -ENOMEM)
+ return r;
+ return log_notice_errno(r, "Failed to deserialize unit \"%s\", skipping: %m", name);
+ }
+
+ return 0;
+}
+
+static int manager_deserialize_units(Manager *m, FILE *f, FDSet *fds) {
+ const char *unit_name;
+ int r;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ /* Start marker */
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read serialization line: %m");
+ if (r == 0)
+ break;
+
+ unit_name = strstrip(line);
+
+ r = manager_deserialize_one_unit(m, unit_name, f, fds);
+ if (r == -ENOMEM)
+ return r;
+ if (r < 0) {
+ r = unit_deserialize_skip(f);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+usec_t manager_get_watchdog(Manager *m, WatchdogType t) {
+ assert(m);
+
+ if (MANAGER_IS_USER(m))
+ return USEC_INFINITY;
+
+ if (timestamp_is_set(m->watchdog_overridden[t]))
+ return m->watchdog_overridden[t];
+
+ return m->watchdog[t];
+}
+
+void manager_set_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
+ int r = 0;
+
+ assert(m);
+
+ if (MANAGER_IS_USER(m))
+ return;
+
+ if (m->watchdog[t] == timeout)
+ return;
+
+ if (t == WATCHDOG_RUNTIME)
+ if (!timestamp_is_set(m->watchdog_overridden[WATCHDOG_RUNTIME])) {
+ if (timestamp_is_set(timeout)) {
+ r = watchdog_set_timeout(&timeout);
+
+ if (r >= 0)
+ m->runtime_watchdog_running = true;
+ } else {
+ watchdog_close(true);
+ m->runtime_watchdog_running = false;
+ }
+ }
+
+ m->watchdog[t] = timeout;
+}
+
+int manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
+ int r = 0;
+
+ assert(m);
+
+ if (MANAGER_IS_USER(m))
+ return 0;
+
+ if (m->watchdog_overridden[t] == timeout)
+ return 0;
+
+ if (t == WATCHDOG_RUNTIME) {
+ usec_t *p;
+
+ p = timestamp_is_set(timeout) ? &timeout : &m->watchdog[t];
+ if (timestamp_is_set(*p)) {
+ r = watchdog_set_timeout(p);
+
+ if (r >= 0)
+ m->runtime_watchdog_running = true;
+ } else {
+ watchdog_close(true);
+ m->runtime_watchdog_running = false;
+ }
+ }
+
+ m->watchdog_overridden[t] = timeout;
+
+ return 0;
+}
+
+void manager_retry_runtime_watchdog(Manager *m) {
+ int r = 0;
+
+ assert(m);
+
+ if (timestamp_is_set(m->watchdog_overridden[WATCHDOG_RUNTIME]))
+ r = watchdog_set_timeout(&m->watchdog_overridden[WATCHDOG_RUNTIME]);
+ else
+ r = watchdog_set_timeout(&m->watchdog[WATCHDOG_RUNTIME]);
+
+ if (r >= 0)
+ m->runtime_watchdog_running = true;
+}
+
+static void manager_deserialize_uid_refs_one_internal(
+ Manager *m,
+ Hashmap** uid_refs,
+ const char *value) {
+
+ uid_t uid;
+ uint32_t c;
+ int r;
+
+ assert(m);
+ assert(uid_refs);
+ assert(value);
+
+ r = parse_uid(value, &uid);
+ if (r < 0 || uid == 0) {
+ log_debug("Unable to parse UID reference serialization: " UID_FMT, uid);
+ return;
+ }
+
+ r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops);
+ if (r < 0) {
+ log_oom();
+ return;
+ }
+
+ c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
+ if (c & DESTROY_IPC_FLAG)
+ return;
+
+ c |= DESTROY_IPC_FLAG;
+
+ r = hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add UID reference entry: %m");
+ return;
+ }
+}
+
+static void manager_deserialize_uid_refs_one(Manager *m, const char *value) {
+ manager_deserialize_uid_refs_one_internal(m, &m->uid_refs, value);
+}
+
+static void manager_deserialize_gid_refs_one(Manager *m, const char *value) {
+ manager_deserialize_uid_refs_one_internal(m, &m->gid_refs, value);
+}
+
+int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
+ int r = 0;
+
+ assert(m);
+ assert(f);
+
+ if (DEBUG_LOGGING) {
+ if (fdset_isempty(fds))
+ log_debug("No file descriptors passed");
+ else {
+ int fd;
+
+ FDSET_FOREACH(fd, fds) {
+ _cleanup_free_ char *fn = NULL;
+
+ r = fd_get_path(fd, &fn);
+ if (r < 0)
+ log_debug_errno(r, "Received serialized fd %i → %m", fd);
+ else
+ log_debug("Received serialized fd %i → %s", fd, strna(fn));
+ }
+ }
+ }
+
+ log_debug("Deserializing state...");
+
+ /* If we are not in reload mode yet, enter it now. Not that this is recursive, a caller might already have
+ * increased it to non-zero, which is why we just increase it by one here and down again at the end of this
+ * call. */
+ _cleanup_(manager_reloading_stopp) _unused_ Manager *reloading = manager_reloading_start(m);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *val, *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read serialization line: %m");
+ if (r == 0)
+ break;
+
+ l = strstrip(line);
+ if (isempty(l)) /* end marker */
+ break;
+
+ if ((val = startswith(l, "current-job-id="))) {
+ uint32_t id;
+
+ if (safe_atou32(val, &id) < 0)
+ log_notice("Failed to parse current job id value '%s', ignoring.", val);
+ else
+ m->current_job_id = MAX(m->current_job_id, id);
+
+ } else if ((val = startswith(l, "n-installed-jobs="))) {
+ uint32_t n;
+
+ if (safe_atou32(val, &n) < 0)
+ log_notice("Failed to parse installed jobs counter '%s', ignoring.", val);
+ else
+ m->n_installed_jobs += n;
+
+ } else if ((val = startswith(l, "n-failed-jobs="))) {
+ uint32_t n;
+
+ if (safe_atou32(val, &n) < 0)
+ log_notice("Failed to parse failed jobs counter '%s', ignoring.", val);
+ else
+ m->n_failed_jobs += n;
+
+ } else if ((val = startswith(l, "taint-usr="))) {
+ int b;
+
+ b = parse_boolean(val);
+ if (b < 0)
+ log_notice("Failed to parse taint /usr flag '%s', ignoring.", val);
+ else
+ m->taint_usr = m->taint_usr || b;
+
+ } else if ((val = startswith(l, "ready-sent="))) {
+ int b;
+
+ b = parse_boolean(val);
+ if (b < 0)
+ log_notice("Failed to parse ready-sent flag '%s', ignoring.", val);
+ else
+ m->ready_sent = m->ready_sent || b;
+
+ } else if ((val = startswith(l, "taint-logged="))) {
+ int b;
+
+ b = parse_boolean(val);
+ if (b < 0)
+ log_notice("Failed to parse taint-logged flag '%s', ignoring.", val);
+ else
+ m->taint_logged = m->taint_logged || b;
+
+ } else if ((val = startswith(l, "service-watchdogs="))) {
+ int b;
+
+ b = parse_boolean(val);
+ if (b < 0)
+ log_notice("Failed to parse service-watchdogs flag '%s', ignoring.", val);
+ else
+ m->service_watchdogs = b;
+
+ } else if ((val = startswith(l, "honor-device-enumeration="))) {
+ int b;
+
+ b = parse_boolean(val);
+ if (b < 0)
+ log_notice("Failed to parse honor-device-enumeration flag '%s', ignoring.", val);
+ else
+ m->honor_device_enumeration = b;
+
+ } else if ((val = startswith(l, "show-status-overridden="))) {
+ ShowStatus s;
+
+ s = show_status_from_string(val);
+ if (s < 0)
+ log_notice("Failed to parse show-status-overridden flag '%s', ignoring.", val);
+ else
+ manager_override_show_status(m, s, "deserialize");
+
+ } else if ((val = startswith(l, "log-level-override="))) {
+ int level;
+
+ level = log_level_from_string(val);
+ if (level < 0)
+ log_notice("Failed to parse log-level-override value '%s', ignoring.", val);
+ else
+ manager_override_log_level(m, level);
+
+ } else if ((val = startswith(l, "log-target-override="))) {
+ LogTarget target;
+
+ target = log_target_from_string(val);
+ if (target < 0)
+ log_notice("Failed to parse log-target-override value '%s', ignoring.", val);
+ else
+ manager_override_log_target(m, target);
+
+ } else if ((val = startswith(l, "runtime-watchdog-overridden="))) {
+ usec_t t;
+
+ if (deserialize_usec(val, &t) < 0)
+ log_notice("Failed to parse runtime-watchdog-overridden value '%s', ignoring.", val);
+ else
+ manager_override_watchdog(m, WATCHDOG_RUNTIME, t);
+
+ } else if ((val = startswith(l, "reboot-watchdog-overridden="))) {
+ usec_t t;
+
+ if (deserialize_usec(val, &t) < 0)
+ log_notice("Failed to parse reboot-watchdog-overridden value '%s', ignoring.", val);
+ else
+ manager_override_watchdog(m, WATCHDOG_REBOOT, t);
+
+ } else if ((val = startswith(l, "kexec-watchdog-overridden="))) {
+ usec_t t;
+
+ if (deserialize_usec(val, &t) < 0)
+ log_notice("Failed to parse kexec-watchdog-overridden value '%s', ignoring.", val);
+ else
+ manager_override_watchdog(m, WATCHDOG_KEXEC, t);
+
+ } else if (startswith(l, "env=")) {
+ r = deserialize_environment(l + 4, &m->client_environment);
+ if (r < 0)
+ log_notice_errno(r, "Failed to parse environment entry: \"%s\", ignoring: %m", l);
+
+ } else if ((val = startswith(l, "notify-fd="))) {
+ int fd;
+
+ if (safe_atoi(val, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
+ log_notice("Failed to parse notify fd, ignoring: \"%s\"", val);
+ else {
+ m->notify_event_source = sd_event_source_unref(m->notify_event_source);
+ safe_close(m->notify_fd);
+ m->notify_fd = fdset_remove(fds, fd);
+ }
+
+ } else if ((val = startswith(l, "notify-socket="))) {
+ r = free_and_strdup(&m->notify_socket, val);
+ if (r < 0)
+ return r;
+
+ } else if ((val = startswith(l, "cgroups-agent-fd="))) {
+ int fd;
+
+ if (safe_atoi(val, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
+ log_notice("Failed to parse cgroups agent fd, ignoring.: %s", val);
+ else {
+ m->cgroups_agent_event_source = sd_event_source_unref(m->cgroups_agent_event_source);
+ safe_close(m->cgroups_agent_fd);
+ m->cgroups_agent_fd = fdset_remove(fds, fd);
+ }
+
+ } else if ((val = startswith(l, "user-lookup="))) {
+ int fd0, fd1;
+
+ if (sscanf(val, "%i %i", &fd0, &fd1) != 2 || fd0 < 0 || fd1 < 0 || fd0 == fd1 || !fdset_contains(fds, fd0) || !fdset_contains(fds, fd1))
+ log_notice("Failed to parse user lookup fd, ignoring: %s", val);
+ else {
+ m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source);
+ safe_close_pair(m->user_lookup_fds);
+ m->user_lookup_fds[0] = fdset_remove(fds, fd0);
+ m->user_lookup_fds[1] = fdset_remove(fds, fd1);
+ }
+
+ } else if ((val = startswith(l, "dynamic-user=")))
+ dynamic_user_deserialize_one(m, val, fds);
+ else if ((val = startswith(l, "destroy-ipc-uid=")))
+ manager_deserialize_uid_refs_one(m, val);
+ else if ((val = startswith(l, "destroy-ipc-gid=")))
+ manager_deserialize_gid_refs_one(m, val);
+ else if ((val = startswith(l, "exec-runtime=")))
+ (void) exec_runtime_deserialize_one(m, val, fds);
+ else if ((val = startswith(l, "subscribed="))) {
+
+ if (strv_extend(&m->deserialized_subscribed, val) < 0)
+ return -ENOMEM;
+
+ } else {
+ ManagerTimestamp q;
+
+ for (q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) {
+ val = startswith(l, manager_timestamp_to_string(q));
+ if (!val)
+ continue;
+
+ val = startswith(val, "-timestamp=");
+ if (val)
+ break;
+ }
+
+ if (q < _MANAGER_TIMESTAMP_MAX) /* found it */
+ (void) deserialize_dual_timestamp(val, m->timestamps + q);
+ else if (!startswith(l, "kdbus-fd=")) /* ignore kdbus */
+ log_notice("Unknown serialization item '%s', ignoring.", l);
+ }
+ }
+
+ return manager_deserialize_units(m, f, fds);
+}
+
+int manager_reload(Manager *m) {
+ _cleanup_(manager_reloading_stopp) Manager *reloading = NULL;
+ _cleanup_fdset_free_ FDSet *fds = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(m);
+
+ r = manager_open_serialization(m, &f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create serialization file: %m");
+
+ fds = fdset_new();
+ if (!fds)
+ return log_oom();
+
+ /* We are officially in reload mode from here on. */
+ reloading = manager_reloading_start(m);
+
+ r = manager_serialize(m, f, fds, false);
+ if (r < 0)
+ return r;
+
+ if (fseeko(f, 0, SEEK_SET) < 0)
+ return log_error_errno(errno, "Failed to seek to beginning of serialization: %m");
+
+ /* 💀 This is the point of no return, from here on there is no way back. 💀 */
+ reloading = NULL;
+
+ bus_manager_send_reloading(m, true);
+
+ /* Start by flushing out all jobs and units, all generated units, all runtime environments, all dynamic users
+ * and everything else that is worth flushing out. We'll get it all back from the serialization — if we need
+ * it.*/
+
+ manager_clear_jobs_and_units(m);
+ lookup_paths_flush_generator(&m->lookup_paths);
+ lookup_paths_free(&m->lookup_paths);
+ exec_runtime_vacuum(m);
+ dynamic_user_vacuum(m, false);
+ m->uid_refs = hashmap_free(m->uid_refs);
+ m->gid_refs = hashmap_free(m->gid_refs);
+
+ r = lookup_paths_init(&m->lookup_paths, m->unit_file_scope, 0, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to initialize path lookup table, ignoring: %m");
+
+ (void) manager_run_environment_generators(m);
+ (void) manager_run_generators(m);
+
+ lookup_paths_log(&m->lookup_paths);
+
+ /* We flushed out generated files, for which we don't watch mtime, so we should flush the old map. */
+ manager_free_unit_name_maps(m);
+
+ /* First, enumerate what we can from kernel and suchlike */
+ manager_enumerate_perpetual(m);
+ manager_enumerate(m);
+
+ /* Second, deserialize our stored data */
+ r = manager_deserialize(m, f, fds);
+ if (r < 0)
+ log_warning_errno(r, "Deserialization failed, proceeding anyway: %m");
+
+ /* We don't need the serialization anymore */
+ f = safe_fclose(f);
+
+ /* Re-register notify_fd as event source, and set up other sockets/communication channels we might need */
+ (void) manager_setup_notify(m);
+ (void) manager_setup_cgroups_agent(m);
+ (void) manager_setup_user_lookup_fd(m);
+
+ /* Third, fire things up! */
+ manager_coldplug(m);
+
+ /* Clean up runtime objects no longer referenced */
+ manager_vacuum(m);
+
+ /* Consider the reload process complete now. */
+ assert(m->n_reloading > 0);
+ m->n_reloading--;
+
+ /* On manager reloading, device tag data should exists, thus, we should honor the results of device
+ * enumeration. The flag should be always set correctly by the serialized data, but it may fail. So,
+ * let's always set the flag here for safety. */
+ m->honor_device_enumeration = true;
+
+ manager_ready(m);
+
+ m->send_reloading_done = true;
+ return 0;
+}
+
+void manager_reset_failed(Manager *m) {
+ Unit *u;
+
+ assert(m);
+
+ HASHMAP_FOREACH(u, m->units)
+ unit_reset_failed(u);
+}
+
+bool manager_unit_inactive_or_pending(Manager *m, const char *name) {
+ Unit *u;
+
+ assert(m);
+ assert(name);
+
+ /* Returns true if the unit is inactive or going down */
+ u = manager_get_unit(m, name);
+ if (!u)
+ return true;
+
+ return unit_inactive_or_pending(u);
+}
+
+static void log_taint_string(Manager *m) {
+ _cleanup_free_ char *taint = NULL;
+
+ assert(m);
+
+ if (MANAGER_IS_USER(m) || m->taint_logged)
+ return;
+
+ m->taint_logged = true; /* only check for taint once */
+
+ taint = manager_taint_string(m);
+ if (isempty(taint))
+ return;
+
+ log_struct(LOG_NOTICE,
+ LOG_MESSAGE("System is tainted: %s", taint),
+ "TAINT=%s", taint,
+ "MESSAGE_ID=" SD_MESSAGE_TAINTED_STR);
+}
+
+static void manager_notify_finished(Manager *m) {
+ char userspace[FORMAT_TIMESPAN_MAX], initrd[FORMAT_TIMESPAN_MAX], kernel[FORMAT_TIMESPAN_MAX], sum[FORMAT_TIMESPAN_MAX];
+ usec_t firmware_usec, loader_usec, kernel_usec, initrd_usec, userspace_usec, total_usec;
+
+ if (MANAGER_IS_TEST_RUN(m))
+ return;
+
+ if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ char buf[FORMAT_TIMESPAN_MAX + STRLEN(" (firmware) + ") + FORMAT_TIMESPAN_MAX + STRLEN(" (loader) + ")]
+ = {};
+ char *p = buf;
+ size_t size = sizeof buf;
+
+ /* Note that MANAGER_TIMESTAMP_KERNEL's monotonic value is always at 0, and
+ * MANAGER_TIMESTAMP_FIRMWARE's and MANAGER_TIMESTAMP_LOADER's monotonic value should be considered
+ * negative values. */
+
+ firmware_usec = m->timestamps[MANAGER_TIMESTAMP_FIRMWARE].monotonic - m->timestamps[MANAGER_TIMESTAMP_LOADER].monotonic;
+ loader_usec = m->timestamps[MANAGER_TIMESTAMP_LOADER].monotonic - m->timestamps[MANAGER_TIMESTAMP_KERNEL].monotonic;
+ userspace_usec = m->timestamps[MANAGER_TIMESTAMP_FINISH].monotonic - m->timestamps[MANAGER_TIMESTAMP_USERSPACE].monotonic;
+ total_usec = m->timestamps[MANAGER_TIMESTAMP_FIRMWARE].monotonic + m->timestamps[MANAGER_TIMESTAMP_FINISH].monotonic;
+
+ if (firmware_usec > 0)
+ size = strpcpyf(&p, size, "%s (firmware) + ", format_timespan(ts, sizeof(ts), firmware_usec, USEC_PER_MSEC));
+ if (loader_usec > 0)
+ size = strpcpyf(&p, size, "%s (loader) + ", format_timespan(ts, sizeof(ts), loader_usec, USEC_PER_MSEC));
+
+ if (dual_timestamp_is_set(&m->timestamps[MANAGER_TIMESTAMP_INITRD])) {
+
+ /* The initrd case on bare-metal*/
+ kernel_usec = m->timestamps[MANAGER_TIMESTAMP_INITRD].monotonic - m->timestamps[MANAGER_TIMESTAMP_KERNEL].monotonic;
+ initrd_usec = m->timestamps[MANAGER_TIMESTAMP_USERSPACE].monotonic - m->timestamps[MANAGER_TIMESTAMP_INITRD].monotonic;
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_STARTUP_FINISHED_STR,
+ "KERNEL_USEC="USEC_FMT, kernel_usec,
+ "INITRD_USEC="USEC_FMT, initrd_usec,
+ "USERSPACE_USEC="USEC_FMT, userspace_usec,
+ LOG_MESSAGE("Startup finished in %s%s (kernel) + %s (initrd) + %s (userspace) = %s.",
+ buf,
+ format_timespan(kernel, sizeof(kernel), kernel_usec, USEC_PER_MSEC),
+ format_timespan(initrd, sizeof(initrd), initrd_usec, USEC_PER_MSEC),
+ format_timespan(userspace, sizeof(userspace), userspace_usec, USEC_PER_MSEC),
+ format_timespan(sum, sizeof(sum), total_usec, USEC_PER_MSEC)));
+ } else {
+ /* The initrd-less case on bare-metal*/
+
+ kernel_usec = m->timestamps[MANAGER_TIMESTAMP_USERSPACE].monotonic - m->timestamps[MANAGER_TIMESTAMP_KERNEL].monotonic;
+ initrd_usec = 0;
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_STARTUP_FINISHED_STR,
+ "KERNEL_USEC="USEC_FMT, kernel_usec,
+ "USERSPACE_USEC="USEC_FMT, userspace_usec,
+ LOG_MESSAGE("Startup finished in %s%s (kernel) + %s (userspace) = %s.",
+ buf,
+ format_timespan(kernel, sizeof(kernel), kernel_usec, USEC_PER_MSEC),
+ format_timespan(userspace, sizeof(userspace), userspace_usec, USEC_PER_MSEC),
+ format_timespan(sum, sizeof(sum), total_usec, USEC_PER_MSEC)));
+ }
+ } else {
+ /* The container and --user case */
+ firmware_usec = loader_usec = initrd_usec = kernel_usec = 0;
+ total_usec = userspace_usec = m->timestamps[MANAGER_TIMESTAMP_FINISH].monotonic - m->timestamps[MANAGER_TIMESTAMP_USERSPACE].monotonic;
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_USER_STARTUP_FINISHED_STR,
+ "USERSPACE_USEC="USEC_FMT, userspace_usec,
+ LOG_MESSAGE("Startup finished in %s.",
+ format_timespan(sum, sizeof(sum), total_usec, USEC_PER_MSEC)));
+ }
+
+ bus_manager_send_finished(m, firmware_usec, loader_usec, kernel_usec, initrd_usec, userspace_usec, total_usec);
+
+ sd_notifyf(false,
+ m->ready_sent ? "STATUS=Startup finished in %s."
+ : "READY=1\n"
+ "STATUS=Startup finished in %s.",
+ format_timespan(sum, sizeof(sum), total_usec, USEC_PER_MSEC));
+ m->ready_sent = true;
+
+ log_taint_string(m);
+}
+
+static void manager_send_ready(Manager *m) {
+ assert(m);
+
+ /* We send READY=1 on reaching basic.target only when running in --user mode. */
+ if (!MANAGER_IS_USER(m) || m->ready_sent)
+ return;
+
+ m->ready_sent = true;
+
+ sd_notifyf(false,
+ "READY=1\n"
+ "STATUS=Reached " SPECIAL_BASIC_TARGET ".");
+}
+
+static void manager_check_basic_target(Manager *m) {
+ Unit *u;
+
+ assert(m);
+
+ /* Small shortcut */
+ if (m->ready_sent && m->taint_logged)
+ return;
+
+ u = manager_get_unit(m, SPECIAL_BASIC_TARGET);
+ if (!u || !UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u)))
+ return;
+
+ /* For user managers, send out READY=1 as soon as we reach basic.target */
+ manager_send_ready(m);
+
+ /* Log the taint string as soon as we reach basic.target */
+ log_taint_string(m);
+}
+
+void manager_check_finished(Manager *m) {
+ assert(m);
+
+ if (MANAGER_IS_RELOADING(m))
+ return;
+
+ /* Verify that we have entered the event loop already, and not left it again. */
+ if (!MANAGER_IS_RUNNING(m))
+ return;
+
+ manager_check_basic_target(m);
+
+ if (hashmap_size(m->jobs) > 0) {
+ if (m->jobs_in_progress_event_source)
+ /* Ignore any failure, this is only for feedback */
+ (void) sd_event_source_set_time(m->jobs_in_progress_event_source,
+ manager_watch_jobs_next_time(m));
+ return;
+ }
+
+ /* The jobs hashmap tends to grow a lot during boot, and then it's not reused until shutdown. Let's
+ kill the hashmap if it is relatively large. */
+ if (hashmap_buckets(m->jobs) > hashmap_size(m->units) / 10)
+ m->jobs = hashmap_free(m->jobs);
+
+ manager_flip_auto_status(m, false, "boot finished");
+
+ /* Notify Type=idle units that we are done now */
+ manager_close_idle_pipe(m);
+
+ /* Turn off confirm spawn now */
+ m->confirm_spawn = NULL;
+
+ /* No need to update ask password status when we're going non-interactive */
+ manager_close_ask_password(m);
+
+ /* This is no longer the first boot */
+ manager_set_first_boot(m, false);
+
+ if (MANAGER_IS_FINISHED(m))
+ return;
+
+ dual_timestamp_get(m->timestamps + MANAGER_TIMESTAMP_FINISH);
+
+ manager_notify_finished(m);
+
+ manager_invalidate_startup_units(m);
+}
+
+static bool generator_path_any(const char* const* paths) {
+ char **path;
+ bool found = false;
+
+ /* Optimize by skipping the whole process by not creating output directories
+ * if no generators are found. */
+ STRV_FOREACH(path, (char**) paths)
+ if (access(*path, F_OK) == 0)
+ found = true;
+ else if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to open generator directory %s: %m", *path);
+
+ return found;
+}
+
+static int manager_run_environment_generators(Manager *m) {
+ char **tmp = NULL; /* this is only used in the forked process, no cleanup here */
+ _cleanup_strv_free_ char **paths = NULL;
+ void* args[] = {
+ [STDOUT_GENERATE] = &tmp,
+ [STDOUT_COLLECT] = &tmp,
+ [STDOUT_CONSUME] = &m->transient_environment,
+ };
+ int r;
+
+ if (MANAGER_IS_TEST_RUN(m) && !(m->test_run_flags & MANAGER_TEST_RUN_ENV_GENERATORS))
+ return 0;
+
+ paths = env_generator_binary_paths(MANAGER_IS_SYSTEM(m));
+ if (!paths)
+ return log_oom();
+
+ if (!generator_path_any((const char* const*) paths))
+ return 0;
+
+ RUN_WITH_UMASK(0022)
+ r = execute_directories((const char* const*) paths, DEFAULT_TIMEOUT_USEC, gather_environment,
+ args, NULL, m->transient_environment, EXEC_DIR_PARALLEL | EXEC_DIR_IGNORE_ERRORS);
+ return r;
+}
+
+static int manager_run_generators(Manager *m) {
+ _cleanup_strv_free_ char **paths = NULL;
+ const char *argv[5];
+ int r;
+
+ assert(m);
+
+ if (MANAGER_IS_TEST_RUN(m) && !(m->test_run_flags & MANAGER_TEST_RUN_GENERATORS))
+ return 0;
+
+ paths = generator_binary_paths(m->unit_file_scope);
+ if (!paths)
+ return log_oom();
+
+ if (!generator_path_any((const char* const*) paths))
+ return 0;
+
+ r = lookup_paths_mkdir_generator(&m->lookup_paths);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create generator directories: %m");
+ goto finish;
+ }
+
+ argv[0] = NULL; /* Leave this empty, execute_directory() will fill something in */
+ argv[1] = m->lookup_paths.generator;
+ argv[2] = m->lookup_paths.generator_early;
+ argv[3] = m->lookup_paths.generator_late;
+ argv[4] = NULL;
+
+ RUN_WITH_UMASK(0022)
+ (void) execute_directories((const char* const*) paths, DEFAULT_TIMEOUT_USEC, NULL, NULL,
+ (char**) argv, m->transient_environment, EXEC_DIR_PARALLEL | EXEC_DIR_IGNORE_ERRORS);
+
+ r = 0;
+
+finish:
+ lookup_paths_trim_generator(&m->lookup_paths);
+ return r;
+}
+
+int manager_transient_environment_add(Manager *m, char **plus) {
+ char **a;
+
+ assert(m);
+
+ if (strv_isempty(plus))
+ return 0;
+
+ a = strv_env_merge(2, m->transient_environment, plus);
+ if (!a)
+ return log_oom();
+
+ sanitize_environment(a);
+
+ return strv_free_and_replace(m->transient_environment, a);
+}
+
+int manager_client_environment_modify(
+ Manager *m,
+ char **minus,
+ char **plus) {
+
+ char **a = NULL, **b = NULL, **l;
+
+ assert(m);
+
+ if (strv_isempty(minus) && strv_isempty(plus))
+ return 0;
+
+ l = m->client_environment;
+
+ if (!strv_isempty(minus)) {
+ a = strv_env_delete(l, 1, minus);
+ if (!a)
+ return -ENOMEM;
+
+ l = a;
+ }
+
+ if (!strv_isempty(plus)) {
+ b = strv_env_merge(2, l, plus);
+ if (!b) {
+ strv_free(a);
+ return -ENOMEM;
+ }
+
+ l = b;
+ }
+
+ if (m->client_environment != l)
+ strv_free(m->client_environment);
+
+ if (a != l)
+ strv_free(a);
+ if (b != l)
+ strv_free(b);
+
+ m->client_environment = sanitize_environment(l);
+ return 0;
+}
+
+int manager_get_effective_environment(Manager *m, char ***ret) {
+ char **l;
+
+ assert(m);
+ assert(ret);
+
+ l = strv_env_merge(2, m->transient_environment, m->client_environment);
+ if (!l)
+ return -ENOMEM;
+
+ *ret = l;
+ return 0;
+}
+
+int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit) {
+ assert(m);
+
+ for (unsigned i = 0; i < _RLIMIT_MAX; i++) {
+ m->rlimit[i] = mfree(m->rlimit[i]);
+
+ if (!default_rlimit[i])
+ continue;
+
+ m->rlimit[i] = newdup(struct rlimit, default_rlimit[i], 1);
+ if (!m->rlimit[i])
+ return log_oom();
+ }
+
+ return 0;
+}
+
+void manager_recheck_dbus(Manager *m) {
+ assert(m);
+
+ /* Connects to the bus if the dbus service and socket are running. If we are running in user mode this is all
+ * it does. In system mode we'll also connect to the system bus (which will most likely just reuse the
+ * connection of the API bus). That's because the system bus after all runs as service of the system instance,
+ * while in the user instance we can assume it's already there. */
+
+ if (MANAGER_IS_RELOADING(m))
+ return; /* don't check while we are reloading… */
+
+ if (manager_dbus_is_running(m, false)) {
+ (void) bus_init_api(m);
+
+ if (MANAGER_IS_SYSTEM(m))
+ (void) bus_init_system(m);
+ } else {
+ (void) bus_done_api(m);
+
+ if (MANAGER_IS_SYSTEM(m))
+ (void) bus_done_system(m);
+ }
+}
+
+static bool manager_journal_is_running(Manager *m) {
+ Unit *u;
+
+ assert(m);
+
+ if (MANAGER_IS_TEST_RUN(m))
+ return false;
+
+ /* If we are the user manager we can safely assume that the journal is up */
+ if (!MANAGER_IS_SYSTEM(m))
+ return true;
+
+ /* Check that the socket is not only up, but in RUNNING state */
+ u = manager_get_unit(m, SPECIAL_JOURNALD_SOCKET);
+ if (!u)
+ return false;
+ if (SOCKET(u)->state != SOCKET_RUNNING)
+ return false;
+
+ /* Similar, check if the daemon itself is fully up, too */
+ u = manager_get_unit(m, SPECIAL_JOURNALD_SERVICE);
+ if (!u)
+ return false;
+ if (!IN_SET(SERVICE(u)->state, SERVICE_RELOAD, SERVICE_RUNNING))
+ return false;
+
+ return true;
+}
+
+void disable_printk_ratelimit(void) {
+ /* Disable kernel's printk ratelimit.
+ *
+ * Logging to /dev/kmsg is most useful during early boot and shutdown, where normal logging
+ * mechanisms are not available. The semantics of this sysctl are such that any kernel command-line
+ * setting takes precedence. */
+ int r;
+
+ r = sysctl_write("kernel/printk_devkmsg", "on");
+ if (r < 0)
+ log_debug_errno(r, "Failed to set sysctl kernel.printk_devkmsg=on: %m");
+}
+
+void manager_recheck_journal(Manager *m) {
+
+ assert(m);
+
+ /* Don't bother with this unless we are in the special situation of being PID 1 */
+ if (getpid_cached() != 1)
+ return;
+
+ /* Don't check this while we are reloading, things might still change */
+ if (MANAGER_IS_RELOADING(m))
+ return;
+
+ /* The journal is fully and entirely up? If so, let's permit logging to it, if that's configured. If the
+ * journal is down, don't ever log to it, otherwise we might end up deadlocking ourselves as we might trigger
+ * an activation ourselves we can't fulfill. */
+ log_set_prohibit_ipc(!manager_journal_is_running(m));
+ log_open();
+}
+
+static ShowStatus manager_get_show_status(Manager *m) {
+ assert(m);
+
+ if (MANAGER_IS_USER(m))
+ return _SHOW_STATUS_INVALID;
+
+ if (m->show_status_overridden != _SHOW_STATUS_INVALID)
+ return m->show_status_overridden;
+
+ return m->show_status;
+}
+
+bool manager_get_show_status_on(Manager *m) {
+ assert(m);
+
+ return show_status_on(manager_get_show_status(m));
+}
+
+static void set_show_status_marker(bool b) {
+ if (b)
+ (void) touch("/run/systemd/show-status");
+ else
+ (void) unlink("/run/systemd/show-status");
+}
+
+void manager_set_show_status(Manager *m, ShowStatus mode, const char *reason) {
+ assert(m);
+ assert(reason);
+ assert(mode >= 0 && mode < _SHOW_STATUS_MAX);
+
+ if (MANAGER_IS_USER(m))
+ return;
+
+ if (mode == m->show_status)
+ return;
+
+ if (m->show_status_overridden == _SHOW_STATUS_INVALID) {
+ bool enabled;
+
+ enabled = show_status_on(mode);
+ log_debug("%s (%s) showing of status (%s).",
+ enabled ? "Enabling" : "Disabling",
+ strna(show_status_to_string(mode)),
+ reason);
+
+ set_show_status_marker(enabled);
+ }
+
+ m->show_status = mode;
+}
+
+void manager_override_show_status(Manager *m, ShowStatus mode, const char *reason) {
+ assert(m);
+ assert(mode < _SHOW_STATUS_MAX);
+
+ if (MANAGER_IS_USER(m))
+ return;
+
+ if (mode == m->show_status_overridden)
+ return;
+
+ m->show_status_overridden = mode;
+
+ if (mode == _SHOW_STATUS_INVALID)
+ mode = m->show_status;
+
+ log_debug("%s (%s) showing of status (%s).",
+ m->show_status_overridden != _SHOW_STATUS_INVALID ? "Overriding" : "Restoring",
+ strna(show_status_to_string(mode)),
+ reason);
+
+ set_show_status_marker(show_status_on(mode));
+}
+
+const char *manager_get_confirm_spawn(Manager *m) {
+ static int last_errno = 0;
+ struct stat st;
+ int r;
+
+ assert(m);
+
+ /* Here's the deal: we want to test the validity of the console but don't want
+ * PID1 to go through the whole console process which might block. But we also
+ * want to warn the user only once if something is wrong with the console so we
+ * cannot do the sanity checks after spawning our children. So here we simply do
+ * really basic tests to hopefully trap common errors.
+ *
+ * If the console suddenly disappear at the time our children will really it
+ * then they will simply fail to acquire it and a positive answer will be
+ * assumed. New children will fall back to /dev/console though.
+ *
+ * Note: TTYs are devices that can come and go any time, and frequently aren't
+ * available yet during early boot (consider a USB rs232 dongle...). If for any
+ * reason the configured console is not ready, we fall back to the default
+ * console. */
+
+ if (!m->confirm_spawn || path_equal(m->confirm_spawn, "/dev/console"))
+ return m->confirm_spawn;
+
+ if (stat(m->confirm_spawn, &st) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (!S_ISCHR(st.st_mode)) {
+ r = -ENOTTY;
+ goto fail;
+ }
+
+ last_errno = 0;
+ return m->confirm_spawn;
+
+fail:
+ if (last_errno != r)
+ last_errno = log_warning_errno(r, "Failed to open %s, using default console: %m", m->confirm_spawn);
+
+ return "/dev/console";
+}
+
+void manager_set_first_boot(Manager *m, bool b) {
+ assert(m);
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return;
+
+ if (m->first_boot != (int) b) {
+ if (b)
+ (void) touch("/run/systemd/first-boot");
+ else
+ (void) unlink("/run/systemd/first-boot");
+ }
+
+ m->first_boot = b;
+}
+
+void manager_disable_confirm_spawn(void) {
+ (void) touch("/run/systemd/confirm_spawn_disabled");
+}
+
+bool manager_is_confirm_spawn_disabled(Manager *m) {
+ if (!m->confirm_spawn)
+ return true;
+
+ return access("/run/systemd/confirm_spawn_disabled", F_OK) >= 0;
+}
+
+static bool manager_should_show_status(Manager *m, StatusType type) {
+ assert(m);
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return false;
+
+ if (m->no_console_output)
+ return false;
+
+ if (!IN_SET(manager_state(m), MANAGER_INITIALIZING, MANAGER_STARTING, MANAGER_STOPPING))
+ return false;
+
+ /* If we cannot find out the status properly, just proceed. */
+ if (type != STATUS_TYPE_EMERGENCY && manager_check_ask_password(m) > 0)
+ return false;
+
+ if (type == STATUS_TYPE_NOTICE && m->show_status != SHOW_STATUS_NO)
+ return true;
+
+ return manager_get_show_status_on(m);
+}
+
+void manager_status_printf(Manager *m, StatusType type, const char *status, const char *format, ...) {
+ va_list ap;
+
+ /* If m is NULL, assume we're after shutdown and let the messages through. */
+
+ if (m && !manager_should_show_status(m, type))
+ return;
+
+ /* XXX We should totally drop the check for ephemeral here
+ * and thus effectively make 'Type=idle' pointless. */
+ if (type == STATUS_TYPE_EPHEMERAL && m && m->n_on_console > 0)
+ return;
+
+ va_start(ap, format);
+ status_vprintf(status, SHOW_STATUS_ELLIPSIZE|(type == STATUS_TYPE_EPHEMERAL ? SHOW_STATUS_EPHEMERAL : 0), format, ap);
+ va_end(ap);
+}
+
+Set *manager_get_units_requiring_mounts_for(Manager *m, const char *path) {
+ char p[strlen(path)+1];
+
+ assert(m);
+ assert(path);
+
+ strcpy(p, path);
+ path_simplify(p, false);
+
+ return hashmap_get(m->units_requiring_mounts_for, streq(p, "/") ? "" : p);
+}
+
+int manager_update_failed_units(Manager *m, Unit *u, bool failed) {
+ unsigned size;
+ int r;
+
+ assert(m);
+ assert(u->manager == m);
+
+ size = set_size(m->failed_units);
+
+ if (failed) {
+ r = set_ensure_put(&m->failed_units, NULL, u);
+ if (r < 0)
+ return log_oom();
+ } else
+ (void) set_remove(m->failed_units, u);
+
+ if (set_size(m->failed_units) != size)
+ bus_manager_send_change_signal(m);
+
+ return 0;
+}
+
+ManagerState manager_state(Manager *m) {
+ Unit *u;
+
+ assert(m);
+
+ /* Is the special shutdown target active or queued? If so, we are in shutdown state */
+ u = manager_get_unit(m, SPECIAL_SHUTDOWN_TARGET);
+ if (u && unit_active_or_pending(u))
+ return MANAGER_STOPPING;
+
+ /* Did we ever finish booting? If not then we are still starting up */
+ if (!MANAGER_IS_FINISHED(m)) {
+
+ u = manager_get_unit(m, SPECIAL_BASIC_TARGET);
+ if (!u || !UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u)))
+ return MANAGER_INITIALIZING;
+
+ return MANAGER_STARTING;
+ }
+
+ if (MANAGER_IS_SYSTEM(m)) {
+ /* Are the rescue or emergency targets active or queued? If so we are in maintenance state */
+ u = manager_get_unit(m, SPECIAL_RESCUE_TARGET);
+ if (u && unit_active_or_pending(u))
+ return MANAGER_MAINTENANCE;
+
+ u = manager_get_unit(m, SPECIAL_EMERGENCY_TARGET);
+ if (u && unit_active_or_pending(u))
+ return MANAGER_MAINTENANCE;
+ }
+
+ /* Are there any failed units? If so, we are in degraded mode */
+ if (set_size(m->failed_units) > 0)
+ return MANAGER_DEGRADED;
+
+ return MANAGER_RUNNING;
+}
+
+static void manager_unref_uid_internal(
+ Manager *m,
+ Hashmap **uid_refs,
+ uid_t uid,
+ bool destroy_now,
+ int (*_clean_ipc)(uid_t uid)) {
+
+ uint32_t c, n;
+
+ assert(m);
+ assert(uid_refs);
+ assert(uid_is_valid(uid));
+ assert(_clean_ipc);
+
+ /* A generic implementation, covering both manager_unref_uid() and manager_unref_gid(), under the assumption
+ * that uid_t and gid_t are actually defined the same way, with the same validity rules.
+ *
+ * We store a hashmap where the UID/GID is they key and the value is a 32bit reference counter, whose highest
+ * bit is used as flag for marking UIDs/GIDs whose IPC objects to remove when the last reference to the UID/GID
+ * is dropped. The flag is set to on, once at least one reference from a unit where RemoveIPC= is set is added
+ * on a UID/GID. It is reset when the UID's/GID's reference counter drops to 0 again. */
+
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+ assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+ if (uid == 0) /* We don't keep track of root, and will never destroy it */
+ return;
+
+ c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
+
+ n = c & ~DESTROY_IPC_FLAG;
+ assert(n > 0);
+ n--;
+
+ if (destroy_now && n == 0) {
+ hashmap_remove(*uid_refs, UID_TO_PTR(uid));
+
+ if (c & DESTROY_IPC_FLAG) {
+ log_debug("%s " UID_FMT " is no longer referenced, cleaning up its IPC.",
+ _clean_ipc == clean_ipc_by_uid ? "UID" : "GID",
+ uid);
+ (void) _clean_ipc(uid);
+ }
+ } else {
+ c = n | (c & DESTROY_IPC_FLAG);
+ assert_se(hashmap_update(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c)) >= 0);
+ }
+}
+
+void manager_unref_uid(Manager *m, uid_t uid, bool destroy_now) {
+ manager_unref_uid_internal(m, &m->uid_refs, uid, destroy_now, clean_ipc_by_uid);
+}
+
+void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now) {
+ manager_unref_uid_internal(m, &m->gid_refs, (uid_t) gid, destroy_now, clean_ipc_by_gid);
+}
+
+static int manager_ref_uid_internal(
+ Manager *m,
+ Hashmap **uid_refs,
+ uid_t uid,
+ bool clean_ipc) {
+
+ uint32_t c, n;
+ int r;
+
+ assert(m);
+ assert(uid_refs);
+ assert(uid_is_valid(uid));
+
+ /* A generic implementation, covering both manager_ref_uid() and manager_ref_gid(), under the assumption
+ * that uid_t and gid_t are actually defined the same way, with the same validity rules. */
+
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+ assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+ if (uid == 0) /* We don't keep track of root, and will never destroy it */
+ return 0;
+
+ r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops);
+ if (r < 0)
+ return r;
+
+ c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
+
+ n = c & ~DESTROY_IPC_FLAG;
+ n++;
+
+ if (n & DESTROY_IPC_FLAG) /* check for overflow */
+ return -EOVERFLOW;
+
+ c = n | (c & DESTROY_IPC_FLAG) | (clean_ipc ? DESTROY_IPC_FLAG : 0);
+
+ return hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c));
+}
+
+int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc) {
+ return manager_ref_uid_internal(m, &m->uid_refs, uid, clean_ipc);
+}
+
+int manager_ref_gid(Manager *m, gid_t gid, bool clean_ipc) {
+ return manager_ref_uid_internal(m, &m->gid_refs, (uid_t) gid, clean_ipc);
+}
+
+static void manager_vacuum_uid_refs_internal(
+ Manager *m,
+ Hashmap **uid_refs,
+ int (*_clean_ipc)(uid_t uid)) {
+
+ void *p, *k;
+
+ assert(m);
+ assert(uid_refs);
+ assert(_clean_ipc);
+
+ HASHMAP_FOREACH_KEY(p, k, *uid_refs) {
+ uint32_t c, n;
+ uid_t uid;
+
+ uid = PTR_TO_UID(k);
+ c = PTR_TO_UINT32(p);
+
+ n = c & ~DESTROY_IPC_FLAG;
+ if (n > 0)
+ continue;
+
+ if (c & DESTROY_IPC_FLAG) {
+ log_debug("Found unreferenced %s " UID_FMT " after reload/reexec. Cleaning up.",
+ _clean_ipc == clean_ipc_by_uid ? "UID" : "GID",
+ uid);
+ (void) _clean_ipc(uid);
+ }
+
+ assert_se(hashmap_remove(*uid_refs, k) == p);
+ }
+}
+
+static void manager_vacuum_uid_refs(Manager *m) {
+ manager_vacuum_uid_refs_internal(m, &m->uid_refs, clean_ipc_by_uid);
+}
+
+static void manager_vacuum_gid_refs(Manager *m) {
+ manager_vacuum_uid_refs_internal(m, &m->gid_refs, clean_ipc_by_gid);
+}
+
+static void manager_vacuum(Manager *m) {
+ assert(m);
+
+ /* Release any dynamic users no longer referenced */
+ dynamic_user_vacuum(m, true);
+
+ /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */
+ manager_vacuum_uid_refs(m);
+ manager_vacuum_gid_refs(m);
+
+ /* Release any runtimes no longer referenced */
+ exec_runtime_vacuum(m);
+}
+
+int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ struct buffer {
+ uid_t uid;
+ gid_t gid;
+ char unit_name[UNIT_NAME_MAX+1];
+ } _packed_ buffer;
+
+ Manager *m = userdata;
+ ssize_t l;
+ size_t n;
+ Unit *u;
+
+ assert_se(source);
+ assert_se(m);
+
+ /* Invoked whenever a child process succeeded resolving its user/group to use and sent us the resulting UID/GID
+ * in a datagram. We parse the datagram here and pass it off to the unit, so that it can add a reference to the
+ * UID/GID so that it can destroy the UID/GID's IPC objects when the reference counter drops to 0. */
+
+ l = recv(fd, &buffer, sizeof(buffer), MSG_DONTWAIT);
+ if (l < 0) {
+ if (IN_SET(errno, EINTR, EAGAIN))
+ return 0;
+
+ return log_error_errno(errno, "Failed to read from user lookup fd: %m");
+ }
+
+ if ((size_t) l <= offsetof(struct buffer, unit_name)) {
+ log_warning("Received too short user lookup message, ignoring.");
+ return 0;
+ }
+
+ if ((size_t) l > offsetof(struct buffer, unit_name) + UNIT_NAME_MAX) {
+ log_warning("Received too long user lookup message, ignoring.");
+ return 0;
+ }
+
+ if (!uid_is_valid(buffer.uid) && !gid_is_valid(buffer.gid)) {
+ log_warning("Got user lookup message with invalid UID/GID pair, ignoring.");
+ return 0;
+ }
+
+ n = (size_t) l - offsetof(struct buffer, unit_name);
+ if (memchr(buffer.unit_name, 0, n)) {
+ log_warning("Received lookup message with embedded NUL character, ignoring.");
+ return 0;
+ }
+
+ buffer.unit_name[n] = 0;
+ u = manager_get_unit(m, buffer.unit_name);
+ if (!u) {
+ log_debug("Got user lookup message but unit doesn't exist, ignoring.");
+ return 0;
+ }
+
+ log_unit_debug(u, "User lookup succeeded: uid=" UID_FMT " gid=" GID_FMT, buffer.uid, buffer.gid);
+
+ unit_notify_user_lookup(u, buffer.uid, buffer.gid);
+ return 0;
+}
+
+char *manager_taint_string(Manager *m) {
+ _cleanup_free_ char *destination = NULL, *overflowuid = NULL, *overflowgid = NULL;
+ char *buf, *e;
+ int r;
+
+ /* Returns a "taint string", e.g. "local-hwclock:var-run-bad".
+ * Only things that are detected at runtime should be tagged
+ * here. For stuff that is set during compilation, emit a warning
+ * in the configuration phase. */
+
+ assert(m);
+
+ buf = new(char, sizeof("split-usr:"
+ "cgroups-missing:"
+ "local-hwclock:"
+ "var-run-bad:"
+ "overflowuid-not-65534:"
+ "overflowgid-not-65534:"));
+ if (!buf)
+ return NULL;
+
+ e = buf;
+ buf[0] = 0;
+
+ if (m->taint_usr)
+ e = stpcpy(e, "split-usr:");
+
+ if (access("/proc/cgroups", F_OK) < 0)
+ e = stpcpy(e, "cgroups-missing:");
+
+ if (clock_is_localtime(NULL) > 0)
+ e = stpcpy(e, "local-hwclock:");
+
+ r = readlink_malloc("/var/run", &destination);
+ if (r < 0 || !PATH_IN_SET(destination, "../run", "/run"))
+ e = stpcpy(e, "var-run-bad:");
+
+ r = read_one_line_file("/proc/sys/kernel/overflowuid", &overflowuid);
+ if (r >= 0 && !streq(overflowuid, "65534"))
+ e = stpcpy(e, "overflowuid-not-65534:");
+
+ r = read_one_line_file("/proc/sys/kernel/overflowgid", &overflowgid);
+ if (r >= 0 && !streq(overflowgid, "65534"))
+ e = stpcpy(e, "overflowgid-not-65534:");
+
+ /* remove the last ':' */
+ if (e != buf)
+ e[-1] = 0;
+
+ return buf;
+}
+
+void manager_ref_console(Manager *m) {
+ assert(m);
+
+ m->n_on_console++;
+}
+
+void manager_unref_console(Manager *m) {
+
+ assert(m->n_on_console > 0);
+ m->n_on_console--;
+
+ if (m->n_on_console == 0)
+ m->no_console_output = false; /* unset no_console_output flag, since the console is definitely free now */
+}
+
+void manager_override_log_level(Manager *m, int level) {
+ _cleanup_free_ char *s = NULL;
+ assert(m);
+
+ if (!m->log_level_overridden) {
+ m->original_log_level = log_get_max_level();
+ m->log_level_overridden = true;
+ }
+
+ (void) log_level_to_string_alloc(level, &s);
+ log_info("Setting log level to %s.", strna(s));
+
+ log_set_max_level(level);
+}
+
+void manager_restore_original_log_level(Manager *m) {
+ _cleanup_free_ char *s = NULL;
+ assert(m);
+
+ if (!m->log_level_overridden)
+ return;
+
+ (void) log_level_to_string_alloc(m->original_log_level, &s);
+ log_info("Restoring log level to original (%s).", strna(s));
+
+ log_set_max_level(m->original_log_level);
+ m->log_level_overridden = false;
+}
+
+void manager_override_log_target(Manager *m, LogTarget target) {
+ assert(m);
+
+ if (!m->log_target_overridden) {
+ m->original_log_target = log_get_target();
+ m->log_target_overridden = true;
+ }
+
+ log_info("Setting log target to %s.", log_target_to_string(target));
+ log_set_target(target);
+}
+
+void manager_restore_original_log_target(Manager *m) {
+ assert(m);
+
+ if (!m->log_target_overridden)
+ return;
+
+ log_info("Restoring log target to original %s.", log_target_to_string(m->original_log_target));
+
+ log_set_target(m->original_log_target);
+ m->log_target_overridden = false;
+}
+
+ManagerTimestamp manager_timestamp_initrd_mangle(ManagerTimestamp s) {
+ if (in_initrd() &&
+ s >= MANAGER_TIMESTAMP_SECURITY_START &&
+ s <= MANAGER_TIMESTAMP_UNITS_LOAD_FINISH)
+ return s - MANAGER_TIMESTAMP_SECURITY_START + MANAGER_TIMESTAMP_INITRD_SECURITY_START;
+ return s;
+}
+
+static const char *const manager_state_table[_MANAGER_STATE_MAX] = {
+ [MANAGER_INITIALIZING] = "initializing",
+ [MANAGER_STARTING] = "starting",
+ [MANAGER_RUNNING] = "running",
+ [MANAGER_DEGRADED] = "degraded",
+ [MANAGER_MAINTENANCE] = "maintenance",
+ [MANAGER_STOPPING] = "stopping",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(manager_state, ManagerState);
+
+static const char *const manager_timestamp_table[_MANAGER_TIMESTAMP_MAX] = {
+ [MANAGER_TIMESTAMP_FIRMWARE] = "firmware",
+ [MANAGER_TIMESTAMP_LOADER] = "loader",
+ [MANAGER_TIMESTAMP_KERNEL] = "kernel",
+ [MANAGER_TIMESTAMP_INITRD] = "initrd",
+ [MANAGER_TIMESTAMP_USERSPACE] = "userspace",
+ [MANAGER_TIMESTAMP_FINISH] = "finish",
+ [MANAGER_TIMESTAMP_SECURITY_START] = "security-start",
+ [MANAGER_TIMESTAMP_SECURITY_FINISH] = "security-finish",
+ [MANAGER_TIMESTAMP_GENERATORS_START] = "generators-start",
+ [MANAGER_TIMESTAMP_GENERATORS_FINISH] = "generators-finish",
+ [MANAGER_TIMESTAMP_UNITS_LOAD_START] = "units-load-start",
+ [MANAGER_TIMESTAMP_UNITS_LOAD_FINISH] = "units-load-finish",
+ [MANAGER_TIMESTAMP_INITRD_SECURITY_START] = "initrd-security-start",
+ [MANAGER_TIMESTAMP_INITRD_SECURITY_FINISH] = "initrd-security-finish",
+ [MANAGER_TIMESTAMP_INITRD_GENERATORS_START] = "initrd-generators-start",
+ [MANAGER_TIMESTAMP_INITRD_GENERATORS_FINISH] = "initrd-generators-finish",
+ [MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_START] = "initrd-units-load-start",
+ [MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_FINISH] = "initrd-units-load-finish",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(manager_timestamp, ManagerTimestamp);
+
+static const char* const oom_policy_table[_OOM_POLICY_MAX] = {
+ [OOM_CONTINUE] = "continue",
+ [OOM_STOP] = "stop",
+ [OOM_KILL] = "kill",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(oom_policy, OOMPolicy);
diff --git a/src/core/manager.h b/src/core/manager.h
new file mode 100644
index 0000000..19df889
--- /dev/null
+++ b/src/core/manager.h
@@ -0,0 +1,570 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "sd-bus.h"
+#include "sd-device.h"
+#include "sd-event.h"
+
+#include "cgroup-util.h"
+#include "cgroup.h"
+#include "fdset.h"
+#include "hashmap.h"
+#include "ip-address-access.h"
+#include "list.h"
+#include "prioq.h"
+#include "ratelimit.h"
+#include "varlink.h"
+
+struct libmnt_monitor;
+typedef struct Unit Unit;
+
+/* Enforce upper limit how many names we allow */
+#define MANAGER_MAX_NAMES 131072 /* 128K */
+
+typedef struct Manager Manager;
+
+/* An externally visible state. We don't actually maintain this as state variable, but derive it from various fields
+ * when requested */
+typedef enum ManagerState {
+ MANAGER_INITIALIZING,
+ MANAGER_STARTING,
+ MANAGER_RUNNING,
+ MANAGER_DEGRADED,
+ MANAGER_MAINTENANCE,
+ MANAGER_STOPPING,
+ _MANAGER_STATE_MAX,
+ _MANAGER_STATE_INVALID = -1
+} ManagerState;
+
+typedef enum ManagerObjective {
+ MANAGER_OK,
+ MANAGER_EXIT,
+ MANAGER_RELOAD,
+ MANAGER_REEXECUTE,
+ MANAGER_REBOOT,
+ MANAGER_POWEROFF,
+ MANAGER_HALT,
+ MANAGER_KEXEC,
+ MANAGER_SWITCH_ROOT,
+ _MANAGER_OBJECTIVE_MAX,
+ _MANAGER_OBJECTIVE_INVALID = -1
+} ManagerObjective;
+
+typedef enum StatusType {
+ STATUS_TYPE_EPHEMERAL,
+ STATUS_TYPE_NORMAL,
+ STATUS_TYPE_NOTICE,
+ STATUS_TYPE_EMERGENCY,
+} StatusType;
+
+typedef enum OOMPolicy {
+ OOM_CONTINUE, /* The kernel kills the process it wants to kill, and that's it */
+ OOM_STOP, /* The kernel kills the process it wants to kill, and we stop the unit */
+ OOM_KILL, /* The kernel kills the process it wants to kill, and all others in the unit, and we stop the unit */
+ _OOM_POLICY_MAX,
+ _OOM_POLICY_INVALID = -1
+} OOMPolicy;
+
+/* Notes:
+ * 1. TIMESTAMP_FIRMWARE, TIMESTAMP_LOADER, TIMESTAMP_KERNEL, TIMESTAMP_INITRD,
+ * TIMESTAMP_SECURITY_START, and TIMESTAMP_SECURITY_FINISH are set only when
+ * the manager is system and not running under container environment.
+ *
+ * 2. The monotonic timestamp of TIMESTAMP_KERNEL is always zero.
+ *
+ * 3. The realtime timestamp of TIMESTAMP_KERNEL will be unset if the system does not
+ * have RTC.
+ *
+ * 4. TIMESTAMP_FIRMWARE and TIMESTAMP_LOADER will be unset if the system does not
+ * have RTC, or systemd is built without EFI support.
+ *
+ * 5. The monotonic timestamps of TIMESTAMP_FIRMWARE and TIMESTAMP_LOADER are stored as
+ * negative of the actual value.
+ *
+ * 6. TIMESTAMP_USERSPACE is the timestamp of when the manager was started.
+ *
+ * 7. TIMESTAMP_INITRD_* are set only when the system is booted with an initrd.
+ */
+
+typedef enum ManagerTimestamp {
+ MANAGER_TIMESTAMP_FIRMWARE,
+ MANAGER_TIMESTAMP_LOADER,
+ MANAGER_TIMESTAMP_KERNEL,
+ MANAGER_TIMESTAMP_INITRD,
+ MANAGER_TIMESTAMP_USERSPACE,
+ MANAGER_TIMESTAMP_FINISH,
+
+ MANAGER_TIMESTAMP_SECURITY_START,
+ MANAGER_TIMESTAMP_SECURITY_FINISH,
+ MANAGER_TIMESTAMP_GENERATORS_START,
+ MANAGER_TIMESTAMP_GENERATORS_FINISH,
+ MANAGER_TIMESTAMP_UNITS_LOAD_START,
+ MANAGER_TIMESTAMP_UNITS_LOAD_FINISH,
+
+ MANAGER_TIMESTAMP_INITRD_SECURITY_START,
+ MANAGER_TIMESTAMP_INITRD_SECURITY_FINISH,
+ MANAGER_TIMESTAMP_INITRD_GENERATORS_START,
+ MANAGER_TIMESTAMP_INITRD_GENERATORS_FINISH,
+ MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_START,
+ MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_FINISH,
+ _MANAGER_TIMESTAMP_MAX,
+ _MANAGER_TIMESTAMP_INVALID = -1,
+} ManagerTimestamp;
+
+typedef enum WatchdogType {
+ WATCHDOG_RUNTIME,
+ WATCHDOG_REBOOT,
+ WATCHDOG_KEXEC,
+ _WATCHDOG_TYPE_MAX,
+} WatchdogType;
+
+#include "execute.h"
+#include "job.h"
+#include "path-lookup.h"
+#include "show-status.h"
+#include "unit-name.h"
+
+typedef enum ManagerTestRunFlags {
+ MANAGER_TEST_NORMAL = 0, /* run normally */
+ MANAGER_TEST_RUN_MINIMAL = 1 << 0, /* create basic data structures */
+ MANAGER_TEST_RUN_BASIC = 1 << 1, /* interact with the environment */
+ MANAGER_TEST_RUN_ENV_GENERATORS = 1 << 2, /* also run env generators */
+ MANAGER_TEST_RUN_GENERATORS = 1 << 3, /* also run unit generators */
+ MANAGER_TEST_FULL = MANAGER_TEST_RUN_BASIC | MANAGER_TEST_RUN_ENV_GENERATORS | MANAGER_TEST_RUN_GENERATORS,
+} ManagerTestRunFlags;
+
+assert_cc((MANAGER_TEST_FULL & UINT8_MAX) == MANAGER_TEST_FULL);
+
+struct Manager {
+ /* Note that the set of units we know of is allowed to be
+ * inconsistent. However the subset of it that is loaded may
+ * not, and the list of jobs may neither. */
+
+ /* Active jobs and units */
+ Hashmap *units; /* name string => Unit object n:1 */
+ Hashmap *units_by_invocation_id;
+ Hashmap *jobs; /* job id => Job object 1:1 */
+
+ /* To make it easy to iterate through the units of a specific
+ * type we maintain a per type linked list */
+ LIST_HEAD(Unit, units_by_type[_UNIT_TYPE_MAX]);
+
+ /* Units that need to be loaded */
+ LIST_HEAD(Unit, load_queue); /* this is actually more a stack than a queue, but uh. */
+
+ /* Jobs that need to be run */
+ struct Prioq *run_queue;
+
+ /* Units and jobs that have not yet been announced via
+ * D-Bus. When something about a job changes it is added here
+ * if it is not in there yet. This allows easy coalescing of
+ * D-Bus change signals. */
+ LIST_HEAD(Unit, dbus_unit_queue);
+ LIST_HEAD(Job, dbus_job_queue);
+
+ /* Units to remove */
+ LIST_HEAD(Unit, cleanup_queue);
+
+ /* Units and jobs to check when doing GC */
+ LIST_HEAD(Unit, gc_unit_queue);
+ LIST_HEAD(Job, gc_job_queue);
+
+ /* Units that should be realized */
+ LIST_HEAD(Unit, cgroup_realize_queue);
+
+ /* Units whose cgroup ran empty */
+ LIST_HEAD(Unit, cgroup_empty_queue);
+
+ /* Units whose memory.event fired */
+ LIST_HEAD(Unit, cgroup_oom_queue);
+
+ /* Target units whose default target dependencies haven't been set yet */
+ LIST_HEAD(Unit, target_deps_queue);
+
+ /* Units that might be subject to StopWhenUnneeded= clean-up */
+ LIST_HEAD(Unit, stop_when_unneeded_queue);
+
+ sd_event *event;
+
+ /* This maps PIDs we care about to units that are interested in. We allow multiple units to he interested in
+ * the same PID and multiple PIDs to be relevant to the same unit. Since in most cases only a single unit will
+ * be interested in the same PID we use a somewhat special encoding here: the first unit interested in a PID is
+ * stored directly in the hashmap, keyed by the PID unmodified. If there are other units interested too they'll
+ * be stored in a NULL-terminated array, and keyed by the negative PID. This is safe as pid_t is signed and
+ * negative PIDs are not used for regular processes but process groups, which we don't care about in this
+ * context, but this allows us to use the negative range for our own purposes. */
+ Hashmap *watch_pids; /* pid => unit as well as -pid => array of units */
+
+ /* A set contains all units which cgroup should be refreshed after startup */
+ Set *startup_units;
+
+ /* A set which contains all currently failed units */
+ Set *failed_units;
+
+ sd_event_source *run_queue_event_source;
+
+ char *notify_socket;
+ int notify_fd;
+ sd_event_source *notify_event_source;
+
+ int cgroups_agent_fd;
+ sd_event_source *cgroups_agent_event_source;
+
+ int signal_fd;
+ sd_event_source *signal_event_source;
+
+ sd_event_source *sigchld_event_source;
+
+ int time_change_fd;
+ sd_event_source *time_change_event_source;
+
+ sd_event_source *timezone_change_event_source;
+
+ sd_event_source *jobs_in_progress_event_source;
+
+ int user_lookup_fds[2];
+ sd_event_source *user_lookup_event_source;
+
+ UnitFileScope unit_file_scope;
+ LookupPaths lookup_paths;
+ Hashmap *unit_id_map;
+ Hashmap *unit_name_map;
+ Set *unit_path_cache;
+ uint64_t unit_cache_timestamp_hash;
+
+ char **transient_environment; /* The environment, as determined from config files, kernel cmdline and environment generators */
+ char **client_environment; /* Environment variables created by clients through the bus API */
+
+ usec_t watchdog[_WATCHDOG_TYPE_MAX];
+ usec_t watchdog_overridden[_WATCHDOG_TYPE_MAX];
+
+ bool runtime_watchdog_running; /* Whether the runtime HW watchdog was started, so we know if we still need to get the real timeout from the hardware */
+
+ dual_timestamp timestamps[_MANAGER_TIMESTAMP_MAX];
+
+ /* Data specific to the device subsystem */
+ sd_device_monitor *device_monitor;
+ Hashmap *devices_by_sysfs;
+
+ /* Data specific to the mount subsystem */
+ struct libmnt_monitor *mount_monitor;
+ sd_event_source *mount_event_source;
+
+ /* Data specific to the swap filesystem */
+ FILE *proc_swaps;
+ sd_event_source *swap_event_source;
+ Hashmap *swaps_by_devnode;
+
+ /* Data specific to the D-Bus subsystem */
+ sd_bus *api_bus, *system_bus;
+ Set *private_buses;
+ int private_listen_fd;
+ sd_event_source *private_listen_event_source;
+
+ /* Contains all the clients that are subscribed to signals via
+ the API bus. Note that private bus connections are always
+ considered subscribes, since they last for very short only,
+ and it is much simpler that way. */
+ sd_bus_track *subscribed;
+ char **deserialized_subscribed;
+
+ /* This is used during reloading: before the reload we queue
+ * the reply message here, and afterwards we send it */
+ sd_bus_message *pending_reload_message;
+
+ Hashmap *watch_bus; /* D-Bus names => Unit object n:1 */
+
+ bool send_reloading_done;
+
+ uint32_t current_job_id;
+ uint32_t default_unit_job_id;
+
+ /* Data specific to the Automount subsystem */
+ int dev_autofs_fd;
+
+ /* Data specific to the cgroup subsystem */
+ Hashmap *cgroup_unit;
+ CGroupMask cgroup_supported;
+ char *cgroup_root;
+
+ /* Notifications from cgroups, when the unified hierarchy is used is done via inotify. */
+ int cgroup_inotify_fd;
+ sd_event_source *cgroup_inotify_event_source;
+
+ /* Maps for finding the unit for each inotify watch descriptor for the cgroup.events and
+ * memory.events cgroupv2 attributes. */
+ Hashmap *cgroup_control_inotify_wd_unit;
+ Hashmap *cgroup_memory_inotify_wd_unit;
+
+ /* A defer event for handling cgroup empty events and processing them after SIGCHLD in all cases. */
+ sd_event_source *cgroup_empty_event_source;
+ sd_event_source *cgroup_oom_event_source;
+
+ /* Make sure the user cannot accidentally unmount our cgroup
+ * file system */
+ int pin_cgroupfs_fd;
+
+ unsigned gc_marker;
+
+ /* The stat() data the last time we saw /etc/localtime */
+ usec_t etc_localtime_mtime;
+ bool etc_localtime_accessible:1;
+
+ ManagerObjective objective:5;
+
+ /* Flags */
+ bool dispatching_load_queue:1;
+
+ bool taint_usr:1;
+
+ /* Have we already sent out the READY=1 notification? */
+ bool ready_sent:1;
+
+ /* Have we already printed the taint line if necessary? */
+ bool taint_logged:1;
+
+ /* Have we ever changed the "kernel.pid_max" sysctl? */
+ bool sysctl_pid_max_changed:1;
+
+ ManagerTestRunFlags test_run_flags:8;
+
+ /* If non-zero, exit with the following value when the systemd
+ * process terminate. Useful for containers: systemd-nspawn could get
+ * the return value. */
+ uint8_t return_value;
+
+ ShowStatus show_status;
+ ShowStatus show_status_overridden;
+ StatusUnitFormat status_unit_format;
+ char *confirm_spawn;
+ bool no_console_output;
+ bool service_watchdogs;
+
+ ExecOutput default_std_output, default_std_error;
+
+ usec_t default_restart_usec, default_timeout_start_usec, default_timeout_stop_usec;
+ usec_t default_timeout_abort_usec;
+ bool default_timeout_abort_set;
+
+ usec_t default_start_limit_interval;
+ unsigned default_start_limit_burst;
+
+ bool default_cpu_accounting;
+ bool default_memory_accounting;
+ bool default_io_accounting;
+ bool default_blockio_accounting;
+ bool default_tasks_accounting;
+ bool default_ip_accounting;
+
+ TasksMax default_tasks_max;
+ usec_t default_timer_accuracy_usec;
+
+ OOMPolicy default_oom_policy;
+
+ int original_log_level;
+ LogTarget original_log_target;
+ bool log_level_overridden:1;
+ bool log_target_overridden:1;
+
+ struct rlimit *rlimit[_RLIMIT_MAX];
+
+ /* non-zero if we are reloading or reexecuting, */
+ int n_reloading;
+
+ unsigned n_installed_jobs;
+ unsigned n_failed_jobs;
+
+ /* Jobs in progress watching */
+ unsigned n_running_jobs;
+ unsigned n_on_console;
+ unsigned jobs_in_progress_iteration;
+
+ /* Do we have any outstanding password prompts? */
+ int have_ask_password;
+ int ask_password_inotify_fd;
+ sd_event_source *ask_password_event_source;
+
+ /* Type=idle pipes */
+ int idle_pipe[4];
+ sd_event_source *idle_pipe_event_source;
+
+ char *switch_root;
+ char *switch_root_init;
+
+ /* This maps all possible path prefixes to the units needing
+ * them. It's a hashmap with a path string as key and a Set as
+ * value where Unit objects are contained. */
+ Hashmap *units_requiring_mounts_for;
+
+ /* Used for processing polkit authorization responses */
+ Hashmap *polkit_registry;
+
+ /* Dynamic users/groups, indexed by their name */
+ Hashmap *dynamic_users;
+
+ /* Keep track of all UIDs and GIDs any of our services currently use. This is useful for the RemoveIPC= logic. */
+ Hashmap *uid_refs;
+ Hashmap *gid_refs;
+
+ /* ExecRuntime, indexed by their owner unit id */
+ Hashmap *exec_runtime_by_id;
+
+ /* When the user hits C-A-D more than 7 times per 2s, do something immediately... */
+ RateLimit ctrl_alt_del_ratelimit;
+ EmergencyAction cad_burst_action;
+
+ const char *unit_log_field;
+ const char *unit_log_format_string;
+
+ const char *invocation_log_field;
+ const char *invocation_log_format_string;
+
+ int first_boot; /* tri-state */
+
+ /* Prefixes of e.g. RuntimeDirectory= */
+ char *prefix[_EXEC_DIRECTORY_TYPE_MAX];
+ char *received_credentials;
+
+ /* Used in the SIGCHLD and sd_notify() message invocation logic to avoid that we dispatch the same event
+ * multiple times on the same unit. */
+ unsigned sigchldgen;
+ unsigned notifygen;
+
+ bool honor_device_enumeration;
+
+ VarlinkServer *varlink_server;
+ /* Only systemd-oomd should be using this to subscribe to changes in ManagedOOM settings */
+ Varlink *managed_oom_varlink_request;
+};
+
+static inline usec_t manager_default_timeout_abort_usec(Manager *m) {
+ assert(m);
+ return m->default_timeout_abort_set ? m->default_timeout_abort_usec : m->default_timeout_stop_usec;
+}
+
+#define MANAGER_IS_SYSTEM(m) ((m)->unit_file_scope == UNIT_FILE_SYSTEM)
+#define MANAGER_IS_USER(m) ((m)->unit_file_scope != UNIT_FILE_SYSTEM)
+
+#define MANAGER_IS_RELOADING(m) ((m)->n_reloading > 0)
+
+#define MANAGER_IS_FINISHED(m) (dual_timestamp_is_set((m)->timestamps + MANAGER_TIMESTAMP_FINISH))
+
+/* The objective is set to OK as soon as we enter the main loop, and set otherwise as soon as we are done with it */
+#define MANAGER_IS_RUNNING(m) ((m)->objective == MANAGER_OK)
+
+#define MANAGER_IS_TEST_RUN(m) ((m)->test_run_flags != 0)
+
+int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager **m);
+Manager* manager_free(Manager *m);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
+
+int manager_startup(Manager *m, FILE *serialization, FDSet *fds);
+
+Job *manager_get_job(Manager *m, uint32_t id);
+Unit *manager_get_unit(Manager *m, const char *name);
+
+int manager_get_job_from_dbus_path(Manager *m, const char *s, Job **_j);
+
+bool manager_unit_cache_should_retry_load(Unit *u);
+int manager_load_unit_prepare(Manager *m, const char *name, const char *path, sd_bus_error *e, Unit **_ret);
+int manager_load_unit(Manager *m, const char *name, const char *path, sd_bus_error *e, Unit **_ret);
+int manager_load_startable_unit_or_warn(Manager *m, const char *name, const char *path, Unit **ret);
+int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e, Unit **_u);
+
+int manager_add_job(Manager *m, JobType type, Unit *unit, JobMode mode, Set *affected_jobs, sd_bus_error *e, Job **_ret);
+int manager_add_job_by_name(Manager *m, JobType type, const char *name, JobMode mode, Set *affected_jobs, sd_bus_error *e, Job **_ret);
+int manager_add_job_by_name_and_warn(Manager *m, JobType type, const char *name, JobMode mode, Set *affected_jobs, Job **ret);
+int manager_propagate_reload(Manager *m, Unit *unit, JobMode mode, sd_bus_error *e);
+
+void manager_dump_units(Manager *s, FILE *f, const char *prefix);
+void manager_dump_jobs(Manager *s, FILE *f, const char *prefix);
+void manager_dump(Manager *s, FILE *f, const char *prefix);
+int manager_get_dump_string(Manager *m, char **ret);
+
+void manager_clear_jobs(Manager *m);
+
+void manager_unwatch_pid(Manager *m, pid_t pid);
+
+unsigned manager_dispatch_load_queue(Manager *m);
+
+int manager_default_environment(Manager *m);
+int manager_transient_environment_add(Manager *m, char **plus);
+int manager_client_environment_modify(Manager *m, char **minus, char **plus);
+int manager_get_effective_environment(Manager *m, char ***ret);
+
+int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit);
+
+int manager_loop(Manager *m);
+
+int manager_open_serialization(Manager *m, FILE **_f);
+
+int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root);
+int manager_deserialize(Manager *m, FILE *f, FDSet *fds);
+
+int manager_reload(Manager *m);
+
+void manager_reset_failed(Manager *m);
+
+void manager_send_unit_audit(Manager *m, Unit *u, int type, bool success);
+void manager_send_unit_plymouth(Manager *m, Unit *u);
+
+bool manager_unit_inactive_or_pending(Manager *m, const char *name);
+
+void manager_check_finished(Manager *m);
+
+void disable_printk_ratelimit(void);
+void manager_recheck_dbus(Manager *m);
+void manager_recheck_journal(Manager *m);
+
+bool manager_get_show_status_on(Manager *m);
+void manager_set_show_status(Manager *m, ShowStatus mode, const char *reason);
+void manager_override_show_status(Manager *m, ShowStatus mode, const char *reason);
+
+void manager_set_first_boot(Manager *m, bool b);
+
+void manager_status_printf(Manager *m, StatusType type, const char *status, const char *format, ...) _printf_(4,5);
+
+Set *manager_get_units_requiring_mounts_for(Manager *m, const char *path);
+
+ManagerState manager_state(Manager *m);
+
+int manager_update_failed_units(Manager *m, Unit *u, bool failed);
+
+void manager_unref_uid(Manager *m, uid_t uid, bool destroy_now);
+int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc);
+
+void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now);
+int manager_ref_gid(Manager *m, gid_t gid, bool destroy_now);
+
+char *manager_taint_string(Manager *m);
+
+void manager_ref_console(Manager *m);
+void manager_unref_console(Manager *m);
+
+void manager_override_log_level(Manager *m, int level);
+void manager_restore_original_log_level(Manager *m);
+
+void manager_override_log_target(Manager *m, LogTarget target);
+void manager_restore_original_log_target(Manager *m);
+
+const char *manager_state_to_string(ManagerState m) _const_;
+ManagerState manager_state_from_string(const char *s) _pure_;
+
+const char *manager_get_confirm_spawn(Manager *m);
+bool manager_is_confirm_spawn_disabled(Manager *m);
+void manager_disable_confirm_spawn(void);
+
+const char *manager_timestamp_to_string(ManagerTimestamp m) _const_;
+ManagerTimestamp manager_timestamp_from_string(const char *s) _pure_;
+ManagerTimestamp manager_timestamp_initrd_mangle(ManagerTimestamp s);
+
+usec_t manager_get_watchdog(Manager *m, WatchdogType t);
+void manager_set_watchdog(Manager *m, WatchdogType t, usec_t timeout);
+int manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout);
+void manager_retry_runtime_watchdog(Manager *m);
+
+const char* oom_policy_to_string(OOMPolicy i) _const_;
+OOMPolicy oom_policy_from_string(const char *s) _pure_;
diff --git a/src/core/meson.build b/src/core/meson.build
new file mode 100644
index 0000000..77767eb
--- /dev/null
+++ b/src/core/meson.build
@@ -0,0 +1,226 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+libcore_shared_sources = '''
+ killall.c
+ killall.h
+ loopback-setup.c
+ loopback-setup.h
+ machine-id-setup.c
+ machine-id-setup.h
+ mount-setup.c
+ mount-setup.h
+'''.split()
+
+libcore_sources = '''
+ apparmor-setup.c
+ apparmor-setup.h
+ audit-fd.c
+ audit-fd.h
+ automount.c
+ automount.h
+ bpf-devices.c
+ bpf-devices.h
+ bpf-firewall.c
+ bpf-firewall.h
+ cgroup.c
+ cgroup.h
+ core-varlink.c
+ core-varlink.h
+ dbus-automount.c
+ dbus-automount.h
+ dbus-cgroup.c
+ dbus-cgroup.h
+ dbus-device.c
+ dbus-device.h
+ dbus-execute.c
+ dbus-execute.h
+ dbus-job.c
+ dbus-job.h
+ dbus-kill.c
+ dbus-kill.h
+ dbus-manager.c
+ dbus-manager.h
+ dbus-mount.c
+ dbus-mount.h
+ dbus-path.c
+ dbus-path.h
+ dbus-scope.c
+ dbus-scope.h
+ dbus-service.c
+ dbus-service.h
+ dbus-slice.c
+ dbus-slice.h
+ dbus-socket.c
+ dbus-socket.h
+ dbus-swap.c
+ dbus-swap.h
+ dbus-target.c
+ dbus-target.h
+ dbus-timer.c
+ dbus-timer.h
+ dbus-unit.c
+ dbus-unit.h
+ dbus-util.c
+ dbus-util.h
+ dbus.c
+ dbus.h
+ device.c
+ device.h
+ dynamic-user.c
+ dynamic-user.h
+ efi-random.c
+ efi-random.h
+ emergency-action.c
+ emergency-action.h
+ execute.c
+ execute.h
+ generator-setup.c
+ generator-setup.h
+ hostname-setup.c
+ hostname-setup.h
+ ima-setup.c
+ ima-setup.h
+ ip-address-access.c
+ ip-address-access.h
+ job.c
+ job.h
+ kill.c
+ kill.h
+ kmod-setup.c
+ kmod-setup.h
+ load-dropin.c
+ load-dropin.h
+ load-fragment.c
+ load-fragment.h
+ locale-setup.c
+ locale-setup.h
+ manager.c
+ manager.h
+ mount.c
+ mount.h
+ namespace.c
+ namespace.h
+ path.c
+ path.h
+ scope.c
+ scope.h
+ selinux-access.c
+ selinux-access.h
+ selinux-setup.c
+ selinux-setup.h
+ service.c
+ service.h
+ show-status.c
+ show-status.h
+ slice.c
+ slice.h
+ smack-setup.c
+ smack-setup.h
+ socket.c
+ socket.h
+ swap.c
+ swap.h
+ target.c
+ target.h
+ timer.c
+ timer.h
+ transaction.c
+ transaction.h
+ unit-printf.c
+ unit-printf.h
+ unit.c
+ unit.h
+'''.split()
+
+load_fragment_gperf_gperf = custom_target(
+ 'load-fragment-gperf.gperf',
+ input : 'load-fragment-gperf.gperf.m4',
+ output: 'load-fragment-gperf.gperf',
+ command : [meson_apply_m4, config_h, '@INPUT@'],
+ capture : true)
+
+load_fragment_gperf_c = custom_target(
+ 'load-fragment-gperf.c',
+ input : load_fragment_gperf_gperf,
+ output : 'load-fragment-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+awkscript = 'load-fragment-gperf-nulstr.awk'
+load_fragment_gperf_nulstr_c = custom_target(
+ 'load-fragment-gperf-nulstr.c',
+ input : [awkscript, load_fragment_gperf_gperf],
+ output : 'load-fragment-gperf-nulstr.c',
+ command : [awk, '-f', '@INPUT0@', '@INPUT1@'],
+ capture : true)
+
+# A convenience library to share code with other binaries:
+# systemd-shutdown, systemd-remount-fs, systemd-machine-id-setup, …
+libcore_shared = static_library(
+ 'core-shared',
+ libcore_shared_sources,
+ include_directories : includes,
+ dependencies : [versiondep,
+ libmount])
+
+libcore = static_library(
+ 'core',
+ libcore_sources,
+ load_fragment_gperf_c,
+ load_fragment_gperf_nulstr_c,
+ include_directories : includes,
+ link_whole : libcore_shared,
+ dependencies : [versiondep,
+ threads,
+ librt,
+ libseccomp,
+ libpam,
+ libaudit,
+ libkmod,
+ libapparmor,
+ libselinux,
+ libmount,
+ libacl])
+
+systemd_sources = files('main.c')
+
+in_files = [['macros.systemd', rpmmacrosdir],
+ ['system.conf', pkgsysconfdir],
+ ['user.conf', pkgsysconfdir],
+ ['systemd.pc', pkgconfigdatadir],
+ ['triggers.systemd', '']]
+
+foreach item : in_files
+ file = item[0]
+ dir = item[1]
+ if install_sysconfdir or dir != pkgsysconfdir
+ configure_file(
+ input : file + '.in',
+ output : file,
+ configuration : substs,
+ install_dir : dir == 'no' ? '' : dir)
+ endif
+
+endforeach
+
+install_data('org.freedesktop.systemd1.conf',
+ install_dir : dbuspolicydir)
+install_data('org.freedesktop.systemd1.service',
+ install_dir : dbussystemservicedir)
+
+policy = configure_file(
+ input : 'org.freedesktop.systemd1.policy.in',
+ output : 'org.freedesktop.systemd1.policy',
+ configuration : substs)
+install_data(policy,
+ install_dir : polkitpolicydir)
+
+meson.add_install_script('sh', '-c', mkdir_p.format(systemshutdowndir))
+meson.add_install_script('sh', '-c', mkdir_p.format(systemsleepdir))
+meson.add_install_script('sh', '-c', mkdir_p.format(systemgeneratordir))
+meson.add_install_script('sh', '-c', mkdir_p.format(usergeneratordir))
+
+if install_sysconfdir
+ meson.add_install_script('sh', '-c', mkdir_p.format(join_paths(pkgsysconfdir, 'system')))
+ meson.add_install_script('sh', '-c', mkdir_p.format(join_paths(pkgsysconfdir, 'user')))
+ meson.add_install_script('sh', '-c', mkdir_p.format(join_paths(sysconfdir, 'xdg/systemd')))
+endif
diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c
new file mode 100644
index 0000000..915b101
--- /dev/null
+++ b/src/core/mount-setup.c
@@ -0,0 +1,561 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <ftw.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/statvfs.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "cgroup-util.h"
+#include "conf-files.h"
+#include "cgroup-setup.h"
+#include "dev-setup.h"
+#include "dirent-util.h"
+#include "efi-loader.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "label.h"
+#include "log.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "mount-setup.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "nulstr-util.h"
+#include "path-util.h"
+#include "set.h"
+#include "smack-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "virt.h"
+
+typedef enum MountMode {
+ MNT_NONE = 0,
+ MNT_FATAL = 1 << 0,
+ MNT_IN_CONTAINER = 1 << 1,
+ MNT_CHECK_WRITABLE = 1 << 2,
+ MNT_FOLLOW_SYMLINK = 1 << 3,
+} MountMode;
+
+typedef struct MountPoint {
+ const char *what;
+ const char *where;
+ const char *type;
+ const char *options;
+ unsigned long flags;
+ bool (*condition_fn)(void);
+ MountMode mode;
+} MountPoint;
+
+/* The first three entries we might need before SELinux is up. The
+ * fourth (securityfs) is needed by IMA to load a custom policy. The
+ * other ones we can delay until SELinux and IMA are loaded. When
+ * SMACK is enabled we need smackfs, too, so it's a fifth one. */
+#if ENABLE_SMACK
+#define N_EARLY_MOUNT 5
+#else
+#define N_EARLY_MOUNT 4
+#endif
+
+static const MountPoint mount_table[] = {
+ { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER|MNT_FOLLOW_SYMLINK },
+ { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER },
+ { "devtmpfs", "/dev", "devtmpfs", "mode=755" TMPFS_LIMITS_DEV, MS_NOSUID|MS_NOEXEC|MS_STRICTATIME,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER },
+ { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_NONE },
+#if ENABLE_SMACK
+ { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ mac_smack_use, MNT_FATAL },
+ { "tmpfs", "/dev/shm", "tmpfs", "mode=1777,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ mac_smack_use, MNT_FATAL },
+#endif
+ { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER },
+ { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
+ NULL, MNT_IN_CONTAINER },
+#if ENABLE_SMACK
+ { "tmpfs", "/run", "tmpfs", "mode=755,smackfsroot=*" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ mac_smack_use, MNT_FATAL },
+#endif
+ { "tmpfs", "/run", "tmpfs", "mode=755" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER },
+ { "cgroup2", "/sys/fs/cgroup", "cgroup2", "nsdelegate,memory_recursiveprot", MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
+ { "cgroup2", "/sys/fs/cgroup", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
+ { "cgroup2", "/sys/fs/cgroup", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
+ { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755" TMPFS_LIMITS_SYS_FS_CGROUP, MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
+ cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
+ { "cgroup2", "/sys/fs/cgroup/unified", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
+ { "cgroup2", "/sys/fs/cgroup/unified", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
+ { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ cg_is_legacy_wanted, MNT_IN_CONTAINER },
+ { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
+ { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_NONE },
+#if ENABLE_EFI
+ { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ is_efi_boot, MNT_NONE },
+#endif
+ { "bpf", "/sys/fs/bpf", "bpf", "mode=700", MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_NONE, },
+};
+
+bool mount_point_is_api(const char *path) {
+ unsigned i;
+
+ /* Checks if this mount point is considered "API", and hence
+ * should be ignored */
+
+ for (i = 0; i < ELEMENTSOF(mount_table); i ++)
+ if (path_equal(path, mount_table[i].where))
+ return true;
+
+ return path_startswith(path, "/sys/fs/cgroup/");
+}
+
+bool mount_point_ignore(const char *path) {
+
+ const char *i;
+
+ /* These are API file systems that might be mounted by other software, we just list them here so that
+ * we know that we should ignore them. */
+ FOREACH_STRING(i,
+ /* SELinux file systems */
+ "/sys/fs/selinux",
+ /* Container bind mounts */
+ "/dev/console",
+ "/proc/kmsg",
+ "/proc/sys",
+ "/proc/sys/kernel/random/boot_id")
+ if (path_equal(path, i))
+ return true;
+
+ if (path_startswith(path, "/run/host")) /* All mounts passed in from the container manager are
+ * something we better ignore. */
+ return true;
+
+ return false;
+}
+
+static int mount_one(const MountPoint *p, bool relabel) {
+ int r, priority;
+
+ assert(p);
+
+ priority = (p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG;
+
+ if (p->condition_fn && !p->condition_fn())
+ return 0;
+
+ /* Relabel first, just in case */
+ if (relabel)
+ (void) label_fix(p->where, LABEL_IGNORE_ENOENT|LABEL_IGNORE_EROFS);
+
+ r = path_is_mount_point(p->where, NULL, AT_SYMLINK_FOLLOW);
+ if (r < 0 && r != -ENOENT) {
+ log_full_errno(priority, r, "Failed to determine whether %s is a mount point: %m", p->where);
+ return (p->mode & MNT_FATAL) ? r : 0;
+ }
+ if (r > 0)
+ return 0;
+
+ /* Skip securityfs in a container */
+ if (!(p->mode & MNT_IN_CONTAINER) && detect_container() > 0)
+ return 0;
+
+ /* The access mode here doesn't really matter too much, since
+ * the mounted file system will take precedence anyway. */
+ if (relabel)
+ (void) mkdir_p_label(p->where, 0755);
+ else
+ (void) mkdir_p(p->where, 0755);
+
+ log_debug("Mounting %s to %s of type %s with options %s.",
+ p->what,
+ p->where,
+ p->type,
+ strna(p->options));
+
+ if (FLAGS_SET(p->mode, MNT_FOLLOW_SYMLINK))
+ r = mount(p->what, p->where, p->type, p->flags, p->options) < 0 ? -errno : 0;
+ else
+ r = mount_nofollow(p->what, p->where, p->type, p->flags, p->options);
+ if (r < 0) {
+ log_full_errno(priority, r, "Failed to mount %s at %s: %m", p->type, p->where);
+ return (p->mode & MNT_FATAL) ? r : 0;
+ }
+
+ /* Relabel again, since we now mounted something fresh here */
+ if (relabel)
+ (void) label_fix(p->where, 0);
+
+ if (p->mode & MNT_CHECK_WRITABLE) {
+ if (access(p->where, W_OK) < 0) {
+ r = -errno;
+
+ (void) umount2(p->where, UMOUNT_NOFOLLOW);
+ (void) rmdir(p->where);
+
+ log_full_errno(priority, r, "Mount point %s not writable after mounting: %m", p->where);
+ return (p->mode & MNT_FATAL) ? r : 0;
+ }
+ }
+
+ return 1;
+}
+
+static int mount_points_setup(unsigned n, bool loaded_policy) {
+ unsigned i;
+ int r = 0;
+
+ for (i = 0; i < n; i ++) {
+ int j;
+
+ j = mount_one(mount_table + i, loaded_policy);
+ if (j != 0 && r >= 0)
+ r = j;
+ }
+
+ return r;
+}
+
+int mount_setup_early(void) {
+ assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
+
+ /* Do a minimal mount of /proc and friends to enable the most
+ * basic stuff, such as SELinux */
+ return mount_points_setup(N_EARLY_MOUNT, false);
+}
+
+static const char *join_with(const char *controller) {
+
+ static const char* const pairs[] = {
+ "cpu", "cpuacct",
+ "net_cls", "net_prio",
+ NULL
+ };
+
+ const char *const *x, *const *y;
+
+ assert(controller);
+
+ /* This will lookup which controller to mount another controller with. Input is a controller name, and output
+ * is the other controller name. The function works both ways: you can input one and get the other, and input
+ * the other to get the one. */
+
+ STRV_FOREACH_PAIR(x, y, pairs) {
+ if (streq(controller, *x))
+ return *y;
+ if (streq(controller, *y))
+ return *x;
+ }
+
+ return NULL;
+}
+
+static int symlink_controller(const char *target, const char *alias) {
+ const char *a;
+ int r;
+
+ assert(target);
+ assert(alias);
+
+ a = strjoina("/sys/fs/cgroup/", alias);
+
+ r = symlink_idempotent(target, a, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create symlink %s: %m", a);
+
+#ifdef SMACK_RUN_LABEL
+ const char *p;
+
+ p = strjoina("/sys/fs/cgroup/", target);
+
+ r = mac_smack_copy(a, p);
+ if (r < 0 && r != -EOPNOTSUPP)
+ return log_error_errno(r, "Failed to copy smack label from %s to %s: %m", p, a);
+#endif
+
+ return 0;
+}
+
+int mount_cgroup_controllers(void) {
+ _cleanup_set_free_free_ Set *controllers = NULL;
+ int r;
+
+ if (!cg_is_legacy_wanted())
+ return 0;
+
+ /* Mount all available cgroup controllers that are built into the kernel. */
+ r = cg_kernel_controllers(&controllers);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate cgroup controllers: %m");
+
+ for (;;) {
+ _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL;
+ const char *other_controller;
+ MountPoint p = {
+ .what = "cgroup",
+ .type = "cgroup",
+ .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ .mode = MNT_IN_CONTAINER,
+ };
+
+ controller = set_steal_first(controllers);
+ if (!controller)
+ break;
+
+ /* Check if we shall mount this together with another controller */
+ other_controller = join_with(controller);
+ if (other_controller) {
+ _cleanup_free_ char *c = NULL;
+
+ /* Check if the other controller is actually available in the kernel too */
+ c = set_remove(controllers, other_controller);
+ if (c) {
+
+ /* Join the two controllers into one string, and maintain a stable ordering */
+ if (strcmp(controller, other_controller) < 0)
+ options = strjoin(controller, ",", other_controller);
+ else
+ options = strjoin(other_controller, ",", controller);
+ if (!options)
+ return log_oom();
+ }
+ }
+
+ /* The simple case, where there's only one controller to mount together */
+ if (!options)
+ options = TAKE_PTR(controller);
+
+ where = path_join("/sys/fs/cgroup", options);
+ if (!where)
+ return log_oom();
+
+ p.where = where;
+ p.options = options;
+
+ r = mount_one(&p, true);
+ if (r < 0)
+ return r;
+
+ /* Create symlinks from the individual controller names, in case we have a joined mount */
+ if (controller)
+ (void) symlink_controller(options, controller);
+ if (other_controller)
+ (void) symlink_controller(options, other_controller);
+ }
+
+ /* Now that we mounted everything, let's make the tmpfs the cgroup file systems are mounted into read-only. */
+ (void) mount_nofollow("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755" TMPFS_LIMITS_SYS_FS_CGROUP);
+
+ return 0;
+}
+
+#if HAVE_SELINUX || ENABLE_SMACK
+static int nftw_cb(
+ const char *fpath,
+ const struct stat *sb,
+ int tflag,
+ struct FTW *ftwbuf) {
+
+ /* No need to label /dev twice in a row... */
+ if (_unlikely_(ftwbuf->level == 0))
+ return FTW_CONTINUE;
+
+ (void) label_fix(fpath, 0);
+
+ /* /run/initramfs is static data and big, no need to
+ * dynamically relabel its contents at boot... */
+ if (_unlikely_(ftwbuf->level == 1 &&
+ tflag == FTW_D &&
+ streq(fpath, "/run/initramfs")))
+ return FTW_SKIP_SUBTREE;
+
+ return FTW_CONTINUE;
+};
+
+static int relabel_cgroup_filesystems(void) {
+ int r;
+ struct statfs st;
+
+ r = cg_all_unified();
+ if (r == 0) {
+ /* Temporarily remount the root cgroup filesystem to give it a proper label. Do this
+ only when the filesystem has been already populated by a previous instance of systemd
+ running from initrd. Otherwise don't remount anything and leave the filesystem read-write
+ for the cgroup filesystems to be mounted inside. */
+ if (statfs("/sys/fs/cgroup", &st) < 0)
+ return log_error_errno(errno, "Failed to determine mount flags for /sys/fs/cgroup: %m");
+
+ if (st.f_flags & ST_RDONLY)
+ (void) mount_nofollow(NULL, "/sys/fs/cgroup", NULL, MS_REMOUNT, NULL);
+
+ (void) label_fix("/sys/fs/cgroup", 0);
+ (void) nftw("/sys/fs/cgroup", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
+
+ if (st.f_flags & ST_RDONLY)
+ (void) mount_nofollow(NULL, "/sys/fs/cgroup", NULL, MS_REMOUNT|MS_RDONLY, NULL);
+
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to determine whether we are in all unified mode: %m");
+
+ return 0;
+}
+
+static int relabel_extra(void) {
+ _cleanup_strv_free_ char **files = NULL;
+ char **file;
+ int r, c = 0;
+
+ /* Support for relabelling additional files or directories after loading the policy. For this, code in the
+ * initrd simply has to drop in *.relabel files into /run/systemd/relabel-extra.d/. We'll read all such files
+ * expecting one absolute path by line and will relabel each (and everyone below that in case the path refers
+ * to a directory). These drop-in files are supposed to be absolutely minimal, and do not understand comments
+ * and such. After the operation succeeded the files are removed, and the drop-in directory as well, if
+ * possible.
+ */
+
+ r = conf_files_list(&files, ".relabel", NULL,
+ CONF_FILES_FILTER_MASKED | CONF_FILES_REGULAR,
+ "/run/systemd/relabel-extra.d/");
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate /run/systemd/relabel-extra.d/, ignoring: %m");
+
+ STRV_FOREACH(file, files) {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ f = fopen(*file, "re");
+ if (!f) {
+ log_warning_errno(errno, "Failed to open %s, ignoring: %m", *file);
+ continue;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to read %s, ignoring: %m", *file);
+ break;
+ }
+ if (r == 0) /* EOF */
+ break;
+
+ path_simplify(line, true);
+
+ if (!path_is_normalized(line)) {
+ log_warning("Path to relabel is not normalized, ignoring: %s", line);
+ continue;
+ }
+
+ if (!path_is_absolute(line)) {
+ log_warning("Path to relabel is not absolute, ignoring: %s", line);
+ continue;
+ }
+
+ log_debug("Relabelling additional file/directory '%s'.", line);
+ (void) label_fix(line, 0);
+ (void) nftw(line, nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
+ c++;
+ }
+
+ if (unlink(*file) < 0)
+ log_warning_errno(errno, "Failed to remove %s, ignoring: %m", *file);
+ }
+
+ /* Remove when we complete things. */
+ if (rmdir("/run/systemd/relabel-extra.d") < 0 &&
+ errno != ENOENT)
+ log_warning_errno(errno, "Failed to remove /run/systemd/relabel-extra.d/ directory: %m");
+
+ return c;
+}
+#endif
+
+int mount_setup(bool loaded_policy, bool leave_propagation) {
+ int r;
+
+ r = mount_points_setup(ELEMENTSOF(mount_table), loaded_policy);
+ if (r < 0)
+ return r;
+
+#if HAVE_SELINUX || ENABLE_SMACK
+ /* Nodes in devtmpfs and /run need to be manually updated for
+ * the appropriate labels, after mounting. The other virtual
+ * API file systems like /sys and /proc do not need that, they
+ * use the same label for all their files. */
+ if (loaded_policy) {
+ usec_t before_relabel, after_relabel;
+ char timespan[FORMAT_TIMESPAN_MAX];
+ const char *i;
+ int n_extra;
+
+ before_relabel = now(CLOCK_MONOTONIC);
+
+ FOREACH_STRING(i, "/dev", "/dev/shm", "/run")
+ (void) nftw(i, nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
+
+ (void) relabel_cgroup_filesystems();
+
+ n_extra = relabel_extra();
+
+ after_relabel = now(CLOCK_MONOTONIC);
+
+ log_info("Relabelled /dev, /dev/shm, /run, /sys/fs/cgroup%s in %s.",
+ n_extra > 0 ? ", additional files" : "",
+ format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0));
+ }
+#endif
+
+ /* Create a few default symlinks, which are normally created
+ * by udevd, but some scripts might need them before we start
+ * udevd. */
+ dev_setup(NULL, UID_INVALID, GID_INVALID);
+
+ /* Mark the root directory as shared in regards to mount propagation. The kernel defaults to "private", but we
+ * think it makes more sense to have a default of "shared" so that nspawn and the container tools work out of
+ * the box. If specific setups need other settings they can reset the propagation mode to private if
+ * needed. Note that we set this only when we are invoked directly by the kernel. If we are invoked by a
+ * container manager we assume the container manager knows what it is doing (for example, because it set up
+ * some directories with different propagation modes). */
+ if (detect_container() <= 0 && !leave_propagation)
+ if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
+ log_warning_errno(errno, "Failed to set up the root directory for shared mount propagation: %m");
+
+ /* Create a few directories we always want around, Note that sd_booted() checks for /run/systemd/system, so
+ * this mkdir really needs to stay for good, otherwise software that copied sd-daemon.c into their sources will
+ * misdetect systemd. */
+ (void) mkdir_label("/run/systemd", 0755);
+ (void) mkdir_label("/run/systemd/system", 0755);
+
+ /* Make sure we have a mount point to hide in sandboxes */
+ (void) mkdir_label("/run/credentials", 0755);
+
+ /* Also create /run/systemd/inaccessible nodes, so that we always have something to mount
+ * inaccessible nodes from. If we run in a container the host might have created these for us already
+ * in /run/host/inaccessible/. Use those if we can, since that way we likely get access to block/char
+ * device nodes that are inaccessible, and if userns is used to nodes that are on mounts owned by a
+ * userns outside the container and thus nicely read-only and not remountable. */
+ if (access("/run/host/inaccessible/", F_OK) < 0) {
+ if (errno != ENOENT)
+ log_debug_errno(errno, "Failed to check if /run/host/inaccessible exists, ignoring: %m");
+
+ (void) make_inaccessible_nodes("/run/systemd", UID_INVALID, GID_INVALID);
+ } else
+ (void) symlink("../host/inaccessible", "/run/systemd/inaccessible");
+
+ return 0;
+}
diff --git a/src/core/mount-setup.h b/src/core/mount-setup.h
new file mode 100644
index 0000000..29bd62f
--- /dev/null
+++ b/src/core/mount-setup.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+int mount_setup_early(void);
+int mount_setup(bool loaded_policy, bool leave_propagation);
+
+int mount_cgroup_controllers(void);
+
+bool mount_point_is_api(const char *path);
+bool mount_point_ignore(const char *path);
diff --git a/src/core/mount.c b/src/core/mount.c
new file mode 100644
index 0000000..8e83de0
--- /dev/null
+++ b/src/core/mount.c
@@ -0,0 +1,2204 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <sys/epoll.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "dbus-mount.h"
+#include "dbus-unit.h"
+#include "device.h"
+#include "exit-status.h"
+#include "format-util.h"
+#include "fstab-util.h"
+#include "libmount-util.h"
+#include "log.h"
+#include "manager.h"
+#include "mkdir.h"
+#include "mount-setup.h"
+#include "mount.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "serialize.h"
+#include "special.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "unit.h"
+
+#define RETRY_UMOUNT_MAX 32
+
+static const UnitActiveState state_translation_table[_MOUNT_STATE_MAX] = {
+ [MOUNT_DEAD] = UNIT_INACTIVE,
+ [MOUNT_MOUNTING] = UNIT_ACTIVATING,
+ [MOUNT_MOUNTING_DONE] = UNIT_ACTIVATING,
+ [MOUNT_MOUNTED] = UNIT_ACTIVE,
+ [MOUNT_REMOUNTING] = UNIT_RELOADING,
+ [MOUNT_UNMOUNTING] = UNIT_DEACTIVATING,
+ [MOUNT_REMOUNTING_SIGTERM] = UNIT_RELOADING,
+ [MOUNT_REMOUNTING_SIGKILL] = UNIT_RELOADING,
+ [MOUNT_UNMOUNTING_SIGTERM] = UNIT_DEACTIVATING,
+ [MOUNT_UNMOUNTING_SIGKILL] = UNIT_DEACTIVATING,
+ [MOUNT_FAILED] = UNIT_FAILED,
+ [MOUNT_CLEANING] = UNIT_MAINTENANCE,
+};
+
+static int mount_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
+static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int mount_process_proc_self_mountinfo(Manager *m);
+
+static bool MOUNT_STATE_WITH_PROCESS(MountState state) {
+ return IN_SET(state,
+ MOUNT_MOUNTING,
+ MOUNT_MOUNTING_DONE,
+ MOUNT_REMOUNTING,
+ MOUNT_REMOUNTING_SIGTERM,
+ MOUNT_REMOUNTING_SIGKILL,
+ MOUNT_UNMOUNTING,
+ MOUNT_UNMOUNTING_SIGTERM,
+ MOUNT_UNMOUNTING_SIGKILL,
+ MOUNT_CLEANING);
+}
+
+static bool mount_is_automount(const MountParameters *p) {
+ assert(p);
+
+ return fstab_test_option(p->options,
+ "comment=systemd.automount\0"
+ "x-systemd.automount\0");
+}
+
+static bool mount_is_network(const MountParameters *p) {
+ assert(p);
+
+ if (fstab_test_option(p->options, "_netdev\0"))
+ return true;
+
+ if (p->fstype && fstype_is_network(p->fstype))
+ return true;
+
+ return false;
+}
+
+static bool mount_is_nofail(const Mount *m) {
+ assert(m);
+
+ if (!m->from_fragment)
+ return false;
+
+ return fstab_test_yes_no_option(m->parameters_fragment.options, "nofail\0" "fail\0");
+}
+
+static bool mount_is_loop(const MountParameters *p) {
+ assert(p);
+
+ if (fstab_test_option(p->options, "loop\0"))
+ return true;
+
+ return false;
+}
+
+static bool mount_is_bind(const MountParameters *p) {
+ assert(p);
+
+ if (fstab_test_option(p->options, "bind\0" "rbind\0"))
+ return true;
+
+ if (p->fstype && STR_IN_SET(p->fstype, "bind", "rbind"))
+ return true;
+
+ return false;
+}
+
+static bool mount_is_bound_to_device(const Mount *m) {
+ const MountParameters *p;
+
+ if (m->from_fragment)
+ return true;
+
+ p = &m->parameters_proc_self_mountinfo;
+ return fstab_test_option(p->options, "x-systemd.device-bound\0");
+}
+
+static bool mount_needs_quota(const MountParameters *p) {
+ assert(p);
+
+ /* Quotas are not enabled on network filesystems, but we want them, for example, on storage connected via
+ * iscsi. We hence don't use mount_is_network() here, as that would also return true for _netdev devices. */
+ if (p->fstype && fstype_is_network(p->fstype))
+ return false;
+
+ if (mount_is_bind(p))
+ return false;
+
+ return fstab_test_option(p->options,
+ "usrquota\0" "grpquota\0" "quota\0" "usrjquota\0" "grpjquota\0");
+}
+
+static void mount_init(Unit *u) {
+ Mount *m = MOUNT(u);
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ m->timeout_usec = u->manager->default_timeout_start_usec;
+
+ m->exec_context.std_output = u->manager->default_std_output;
+ m->exec_context.std_error = u->manager->default_std_error;
+
+ m->directory_mode = 0755;
+
+ /* We need to make sure that /usr/bin/mount is always called
+ * in the same process group as us, so that the autofs kernel
+ * side doesn't send us another mount request while we are
+ * already trying to comply its last one. */
+ m->exec_context.same_pgrp = true;
+
+ m->control_command_id = _MOUNT_EXEC_COMMAND_INVALID;
+
+ u->ignore_on_isolate = true;
+}
+
+static int mount_arm_timer(Mount *m, usec_t usec) {
+ int r;
+
+ assert(m);
+
+ if (m->timer_event_source) {
+ r = sd_event_source_set_time(m->timer_event_source, usec);
+ if (r < 0)
+ return r;
+
+ return sd_event_source_set_enabled(m->timer_event_source, SD_EVENT_ONESHOT);
+ }
+
+ if (usec == USEC_INFINITY)
+ return 0;
+
+ r = sd_event_add_time(
+ UNIT(m)->manager->event,
+ &m->timer_event_source,
+ CLOCK_MONOTONIC,
+ usec, 0,
+ mount_dispatch_timer, m);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(m->timer_event_source, "mount-timer");
+
+ return 0;
+}
+
+static void mount_unwatch_control_pid(Mount *m) {
+ assert(m);
+
+ if (m->control_pid <= 0)
+ return;
+
+ unit_unwatch_pid(UNIT(m), m->control_pid);
+ m->control_pid = 0;
+}
+
+static void mount_parameters_done(MountParameters *p) {
+ assert(p);
+
+ p->what = mfree(p->what);
+ p->options = mfree(p->options);
+ p->fstype = mfree(p->fstype);
+}
+
+static void mount_done(Unit *u) {
+ Mount *m = MOUNT(u);
+
+ assert(m);
+
+ m->where = mfree(m->where);
+
+ mount_parameters_done(&m->parameters_proc_self_mountinfo);
+ mount_parameters_done(&m->parameters_fragment);
+
+ m->exec_runtime = exec_runtime_unref(m->exec_runtime, false);
+ exec_command_done_array(m->exec_command, _MOUNT_EXEC_COMMAND_MAX);
+ m->control_command = NULL;
+
+ dynamic_creds_unref(&m->dynamic_creds);
+
+ mount_unwatch_control_pid(m);
+
+ m->timer_event_source = sd_event_source_unref(m->timer_event_source);
+}
+
+static MountParameters* get_mount_parameters_fragment(Mount *m) {
+ assert(m);
+
+ if (m->from_fragment)
+ return &m->parameters_fragment;
+
+ return NULL;
+}
+
+static MountParameters* get_mount_parameters(Mount *m) {
+ assert(m);
+
+ if (m->from_proc_self_mountinfo)
+ return &m->parameters_proc_self_mountinfo;
+
+ return get_mount_parameters_fragment(m);
+}
+
+static int update_parameters_proc_self_mountinfo(
+ Mount *m,
+ const char *what,
+ const char *options,
+ const char *fstype) {
+
+ MountParameters *p;
+ int r, q, w;
+
+ p = &m->parameters_proc_self_mountinfo;
+
+ r = free_and_strdup(&p->what, what);
+ if (r < 0)
+ return r;
+
+ q = free_and_strdup(&p->options, options);
+ if (q < 0)
+ return q;
+
+ w = free_and_strdup(&p->fstype, fstype);
+ if (w < 0)
+ return w;
+
+ return r > 0 || q > 0 || w > 0;
+}
+
+static int mount_add_mount_dependencies(Mount *m) {
+ MountParameters *pm;
+ Unit *other;
+ Set *s;
+ int r;
+
+ assert(m);
+
+ if (!path_equal(m->where, "/")) {
+ _cleanup_free_ char *parent = NULL;
+
+ /* Adds in links to other mount points that might lie further up in the hierarchy */
+
+ parent = dirname_malloc(m->where);
+ if (!parent)
+ return -ENOMEM;
+
+ r = unit_require_mounts_for(UNIT(m), parent, UNIT_DEPENDENCY_IMPLICIT);
+ if (r < 0)
+ return r;
+ }
+
+ /* Adds in dependencies to other mount points that might be needed for the source path (if this is a bind mount
+ * or a loop mount) to be available. */
+ pm = get_mount_parameters_fragment(m);
+ if (pm && pm->what &&
+ path_is_absolute(pm->what) &&
+ (mount_is_bind(pm) || mount_is_loop(pm) || !mount_is_network(pm))) {
+
+ r = unit_require_mounts_for(UNIT(m), pm->what, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+
+ /* Adds in dependencies to other units that use this path or paths further down in the hierarchy */
+ s = manager_get_units_requiring_mounts_for(UNIT(m)->manager, m->where);
+ SET_FOREACH(other, s) {
+
+ if (other->load_state != UNIT_LOADED)
+ continue;
+
+ if (other == UNIT(m))
+ continue;
+
+ r = unit_add_dependency(other, UNIT_AFTER, UNIT(m), true, UNIT_DEPENDENCY_PATH);
+ if (r < 0)
+ return r;
+
+ if (UNIT(m)->fragment_path) {
+ /* If we have fragment configuration, then make this dependency required */
+ r = unit_add_dependency(other, UNIT_REQUIRES, UNIT(m), true, UNIT_DEPENDENCY_PATH);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int mount_add_device_dependencies(Mount *m) {
+ UnitDependencyMask mask;
+ MountParameters *p;
+ UnitDependency dep;
+ int r;
+
+ assert(m);
+
+ p = get_mount_parameters(m);
+ if (!p)
+ return 0;
+
+ if (!p->what)
+ return 0;
+
+ if (mount_is_bind(p))
+ return 0;
+
+ if (!is_device_path(p->what))
+ return 0;
+
+ /* /dev/root is a really weird thing, it's not a real device, but just a path the kernel exports for
+ * the root file system specified on the kernel command line. Ignore it here. */
+ if (PATH_IN_SET(p->what, "/dev/root", "/dev/nfs"))
+ return 0;
+
+ if (path_equal(m->where, "/"))
+ return 0;
+
+ /* Mount units from /proc/self/mountinfo are not bound to devices by default since they're subject to
+ * races when devices are unplugged. But the user can still force this dep with an appropriate option
+ * (or udev property) so the mount units are automatically stopped when the device disappears
+ * suddenly. */
+ dep = mount_is_bound_to_device(m) ? UNIT_BINDS_TO : UNIT_REQUIRES;
+
+ /* We always use 'what' from /proc/self/mountinfo if mounted */
+ mask = m->from_proc_self_mountinfo ? UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT : UNIT_DEPENDENCY_FILE;
+
+ r = unit_add_node_dependency(UNIT(m), p->what, dep, mask);
+ if (r < 0)
+ return r;
+
+ return unit_add_blockdev_dependency(UNIT(m), p->what, mask);
+}
+
+static int mount_add_quota_dependencies(Mount *m) {
+ UnitDependencyMask mask;
+ MountParameters *p;
+ int r;
+
+ assert(m);
+
+ if (!MANAGER_IS_SYSTEM(UNIT(m)->manager))
+ return 0;
+
+ p = get_mount_parameters_fragment(m);
+ if (!p)
+ return 0;
+
+ if (!mount_needs_quota(p))
+ return 0;
+
+ mask = m->from_fragment ? UNIT_DEPENDENCY_FILE : UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT;
+
+ r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_WANTS, SPECIAL_QUOTACHECK_SERVICE, true, mask);
+ if (r < 0)
+ return r;
+
+ r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_WANTS, SPECIAL_QUOTAON_SERVICE, true, mask);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static bool mount_is_extrinsic(Unit *u) {
+ MountParameters *p;
+ Mount *m = MOUNT(u);
+ assert(m);
+
+ /* Returns true for all units that are "magic" and should be excluded from the usual
+ * start-up and shutdown dependencies. We call them "extrinsic" here, as they are generally
+ * mounted outside of the systemd dependency logic. We shouldn't attempt to manage them
+ * ourselves but it's fine if the user operates on them with us. */
+
+ /* We only automatically manage mounts if we are in system mode */
+ if (MANAGER_IS_USER(u->manager))
+ return true;
+
+ p = get_mount_parameters(m);
+ if (p && fstab_is_extrinsic(m->where, p->options))
+ return true;
+
+ return false;
+}
+
+static int mount_add_default_ordering_dependencies(
+ Mount *m,
+ MountParameters *p,
+ UnitDependencyMask mask) {
+
+ const char *after, *before, *e;
+ int r;
+
+ assert(m);
+
+ e = path_startswith(m->where, "/sysroot");
+ if (e && in_initrd()) {
+ /* All mounts under /sysroot need to happen later, at initrd-fs.target time. IOW,
+ * it's not technically part of the basic initrd filesystem itself, and so
+ * shouldn't inherit the default Before=local-fs.target dependency. */
+
+ after = NULL;
+ before = isempty(e) ? SPECIAL_INITRD_ROOT_FS_TARGET : SPECIAL_INITRD_FS_TARGET;
+
+ } else if (mount_is_network(p)) {
+ after = SPECIAL_REMOTE_FS_PRE_TARGET;
+ before = SPECIAL_REMOTE_FS_TARGET;
+
+ } else {
+ after = SPECIAL_LOCAL_FS_PRE_TARGET;
+ before = SPECIAL_LOCAL_FS_TARGET;
+ }
+
+ if (!mount_is_nofail(m) && !mount_is_automount(p)) {
+ r = unit_add_dependency_by_name(UNIT(m), UNIT_BEFORE, before, true, mask);
+ if (r < 0)
+ return r;
+ }
+
+ if (after) {
+ r = unit_add_dependency_by_name(UNIT(m), UNIT_AFTER, after, true, mask);
+ if (r < 0)
+ return r;
+ }
+
+ return unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_CONFLICTS,
+ SPECIAL_UMOUNT_TARGET, true, mask);
+}
+
+static int mount_add_default_dependencies(Mount *m) {
+ UnitDependencyMask mask;
+ MountParameters *p;
+ int r;
+
+ assert(m);
+
+ if (!UNIT(m)->default_dependencies)
+ return 0;
+
+ /* We do not add any default dependencies to /, /usr or /run/initramfs/, since they are
+ * guaranteed to stay mounted the whole time, since our system is on it. Also, don't
+ * bother with anything mounted below virtual file systems, it's also going to be virtual,
+ * and hence not worth the effort. */
+ if (mount_is_extrinsic(UNIT(m)))
+ return 0;
+
+ p = get_mount_parameters(m);
+ if (!p)
+ return 0;
+
+ mask = m->from_fragment ? UNIT_DEPENDENCY_FILE : UNIT_DEPENDENCY_MOUNTINFO_DEFAULT;
+
+ r = mount_add_default_ordering_dependencies(m, p, mask);
+ if (r < 0)
+ return r;
+
+ if (mount_is_network(p)) {
+ /* We order ourselves after network.target. This is primarily useful at shutdown:
+ * services that take down the network should order themselves before
+ * network.target, so that they are shut down only after this mount unit is
+ * stopped. */
+
+ r = unit_add_dependency_by_name(UNIT(m), UNIT_AFTER, SPECIAL_NETWORK_TARGET, true, mask);
+ if (r < 0)
+ return r;
+
+ /* We pull in network-online.target, and order ourselves after it. This is useful
+ * at start-up to actively pull in tools that want to be started before we start
+ * mounting network file systems, and whose purpose it is to delay this until the
+ * network is "up". */
+
+ r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_WANTS, UNIT_AFTER, SPECIAL_NETWORK_ONLINE_TARGET, true, mask);
+ if (r < 0)
+ return r;
+ }
+
+ /* If this is a tmpfs mount then we have to unmount it before we try to deactivate swaps */
+ if (streq_ptr(p->fstype, "tmpfs")) {
+ r = unit_add_dependency_by_name(UNIT(m), UNIT_AFTER, SPECIAL_SWAP_TARGET, true, mask);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int mount_verify(Mount *m) {
+ _cleanup_free_ char *e = NULL;
+ MountParameters *p;
+ int r;
+
+ assert(m);
+ assert(UNIT(m)->load_state == UNIT_LOADED);
+
+ if (!m->from_fragment && !m->from_proc_self_mountinfo && !UNIT(m)->perpetual)
+ return -ENOENT;
+
+ r = unit_name_from_path(m->where, ".mount", &e);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(m), r, "Failed to generate unit name from mount path: %m");
+
+ if (!unit_has_name(UNIT(m), e)) {
+ log_unit_error(UNIT(m), "Where= setting doesn't match unit name. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (mount_point_is_api(m->where) || mount_point_ignore(m->where)) {
+ log_unit_error(UNIT(m), "Cannot create mount unit for API file system %s. Refusing.", m->where);
+ return -ENOEXEC;
+ }
+
+ p = get_mount_parameters_fragment(m);
+ if (p && !p->what && !UNIT(m)->perpetual)
+ return log_unit_error_errno(UNIT(m), SYNTHETIC_ERRNO(ENOEXEC),
+ "What= setting is missing. Refusing.");
+
+ if (m->exec_context.pam_name && m->kill_context.kill_mode != KILL_CONTROL_GROUP) {
+ log_unit_error(UNIT(m), "Unit has PAM enabled. Kill mode must be set to control-group'. Refusing.");
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
+static int mount_add_non_exec_dependencies(Mount *m) {
+ int r;
+ assert(m);
+
+ /* Adds in all dependencies directly responsible for ordering the mount, as opposed to dependencies
+ * resulting from the ExecContext and such. */
+
+ r = mount_add_device_dependencies(m);
+ if (r < 0)
+ return r;
+
+ r = mount_add_mount_dependencies(m);
+ if (r < 0)
+ return r;
+
+ r = mount_add_quota_dependencies(m);
+ if (r < 0)
+ return r;
+
+ r = mount_add_default_dependencies(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int mount_add_extras(Mount *m) {
+ Unit *u = UNIT(m);
+ int r;
+
+ assert(m);
+
+ /* Note: this call might be called after we already have been loaded once (and even when it has already been
+ * activated), in case data from /proc/self/mountinfo has changed. This means all code here needs to be ready
+ * to run with an already set up unit. */
+
+ if (u->fragment_path)
+ m->from_fragment = true;
+
+ if (!m->where) {
+ r = unit_name_to_path(u->id, &m->where);
+ if (r < 0)
+ return r;
+ }
+
+ path_simplify(m->where, false);
+
+ if (!u->description) {
+ r = unit_set_description(u, m->where);
+ if (r < 0)
+ return r;
+ }
+
+ r = unit_patch_contexts(u);
+ if (r < 0)
+ return r;
+
+ r = unit_add_exec_dependencies(u, &m->exec_context);
+ if (r < 0)
+ return r;
+
+ r = unit_set_default_slice(u);
+ if (r < 0)
+ return r;
+
+ r = mount_add_non_exec_dependencies(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static void mount_load_root_mount(Unit *u) {
+ assert(u);
+
+ if (!unit_has_name(u, SPECIAL_ROOT_MOUNT))
+ return;
+
+ u->perpetual = true;
+ u->default_dependencies = false;
+
+ /* The stdio/kmsg bridge socket is on /, in order to avoid a dep loop, don't use kmsg logging for -.mount */
+ MOUNT(u)->exec_context.std_output = EXEC_OUTPUT_NULL;
+ MOUNT(u)->exec_context.std_input = EXEC_INPUT_NULL;
+
+ if (!u->description)
+ u->description = strdup("Root Mount");
+}
+
+static int mount_load(Unit *u) {
+ Mount *m = MOUNT(u);
+ int r, q = 0;
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ mount_load_root_mount(u);
+
+ bool fragment_optional = m->from_proc_self_mountinfo || u->perpetual;
+ r = unit_load_fragment_and_dropin(u, !fragment_optional);
+
+ /* Add in some extras. Note we do this in all cases (even if we failed to load the unit) when announced by the
+ * kernel, because we need some things to be set up no matter what when the kernel establishes a mount and thus
+ * we need to update the state in our unit to track it. After all, consider that we don't allow changing the
+ * 'slice' field for a unit once it is active. */
+ if (u->load_state == UNIT_LOADED || m->from_proc_self_mountinfo || u->perpetual)
+ q = mount_add_extras(m);
+
+ if (r < 0)
+ return r;
+ if (q < 0)
+ return q;
+ if (u->load_state != UNIT_LOADED)
+ return 0;
+
+ return mount_verify(m);
+}
+
+static void mount_set_state(Mount *m, MountState state) {
+ MountState old_state;
+ assert(m);
+
+ if (m->state != state)
+ bus_unit_send_pending_change_signal(UNIT(m), false);
+
+ old_state = m->state;
+ m->state = state;
+
+ if (!MOUNT_STATE_WITH_PROCESS(state)) {
+ m->timer_event_source = sd_event_source_unref(m->timer_event_source);
+ mount_unwatch_control_pid(m);
+ m->control_command = NULL;
+ m->control_command_id = _MOUNT_EXEC_COMMAND_INVALID;
+ }
+
+ if (state != old_state)
+ log_unit_debug(UNIT(m), "Changed %s -> %s", mount_state_to_string(old_state), mount_state_to_string(state));
+
+ unit_notify(UNIT(m), state_translation_table[old_state], state_translation_table[state],
+ m->reload_result == MOUNT_SUCCESS ? 0 : UNIT_NOTIFY_RELOAD_FAILURE);
+}
+
+static int mount_coldplug(Unit *u) {
+ Mount *m = MOUNT(u);
+ MountState new_state = MOUNT_DEAD;
+ int r;
+
+ assert(m);
+ assert(m->state == MOUNT_DEAD);
+
+ if (m->deserialized_state != m->state)
+ new_state = m->deserialized_state;
+ else if (m->from_proc_self_mountinfo)
+ new_state = MOUNT_MOUNTED;
+
+ if (new_state == m->state)
+ return 0;
+
+ if (m->control_pid > 0 &&
+ pid_is_unwaited(m->control_pid) &&
+ MOUNT_STATE_WITH_PROCESS(new_state)) {
+
+ r = unit_watch_pid(UNIT(m), m->control_pid, false);
+ if (r < 0)
+ return r;
+
+ r = mount_arm_timer(m, usec_add(u->state_change_timestamp.monotonic, m->timeout_usec));
+ if (r < 0)
+ return r;
+ }
+
+ if (!IN_SET(new_state, MOUNT_DEAD, MOUNT_FAILED)) {
+ (void) unit_setup_dynamic_creds(u);
+ (void) unit_setup_exec_runtime(u);
+ }
+
+ mount_set_state(m, new_state);
+ return 0;
+}
+
+static void mount_dump(Unit *u, FILE *f, const char *prefix) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ Mount *m = MOUNT(u);
+ MountParameters *p;
+
+ assert(m);
+ assert(f);
+
+ p = get_mount_parameters(m);
+
+ fprintf(f,
+ "%sMount State: %s\n"
+ "%sResult: %s\n"
+ "%sClean Result: %s\n"
+ "%sWhere: %s\n"
+ "%sWhat: %s\n"
+ "%sFile System Type: %s\n"
+ "%sOptions: %s\n"
+ "%sFrom /proc/self/mountinfo: %s\n"
+ "%sFrom fragment: %s\n"
+ "%sExtrinsic: %s\n"
+ "%sDirectoryMode: %04o\n"
+ "%sSloppyOptions: %s\n"
+ "%sLazyUnmount: %s\n"
+ "%sForceUnmount: %s\n"
+ "%sReadWriteOnly: %s\n"
+ "%sTimeoutSec: %s\n",
+ prefix, mount_state_to_string(m->state),
+ prefix, mount_result_to_string(m->result),
+ prefix, mount_result_to_string(m->clean_result),
+ prefix, m->where,
+ prefix, p ? strna(p->what) : "n/a",
+ prefix, p ? strna(p->fstype) : "n/a",
+ prefix, p ? strna(p->options) : "n/a",
+ prefix, yes_no(m->from_proc_self_mountinfo),
+ prefix, yes_no(m->from_fragment),
+ prefix, yes_no(mount_is_extrinsic(u)),
+ prefix, m->directory_mode,
+ prefix, yes_no(m->sloppy_options),
+ prefix, yes_no(m->lazy_unmount),
+ prefix, yes_no(m->force_unmount),
+ prefix, yes_no(m->read_write_only),
+ prefix, format_timespan(buf, sizeof(buf), m->timeout_usec, USEC_PER_SEC));
+
+ if (m->control_pid > 0)
+ fprintf(f,
+ "%sControl PID: "PID_FMT"\n",
+ prefix, m->control_pid);
+
+ exec_context_dump(&m->exec_context, f, prefix);
+ kill_context_dump(&m->kill_context, f, prefix);
+ cgroup_context_dump(UNIT(m), f, prefix);
+}
+
+static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
+
+ _cleanup_(exec_params_clear) ExecParameters exec_params = {
+ .flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .stderr_fd = -1,
+ .exec_fd = -1,
+ };
+ pid_t pid;
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(_pid);
+
+ r = unit_prepare_exec(UNIT(m));
+ if (r < 0)
+ return r;
+
+ r = mount_arm_timer(m, usec_add(now(CLOCK_MONOTONIC), m->timeout_usec));
+ if (r < 0)
+ return r;
+
+ r = unit_set_exec_params(UNIT(m), &exec_params);
+ if (r < 0)
+ return r;
+
+ r = exec_spawn(UNIT(m),
+ c,
+ &m->exec_context,
+ &exec_params,
+ m->exec_runtime,
+ &m->dynamic_creds,
+ &pid);
+ if (r < 0)
+ return r;
+
+ r = unit_watch_pid(UNIT(m), pid, true);
+ if (r < 0)
+ return r;
+
+ *_pid = pid;
+
+ return 0;
+}
+
+static void mount_enter_dead(Mount *m, MountResult f) {
+ assert(m);
+
+ if (m->result == MOUNT_SUCCESS)
+ m->result = f;
+
+ unit_log_result(UNIT(m), m->result == MOUNT_SUCCESS, mount_result_to_string(m->result));
+ unit_warn_leftover_processes(UNIT(m), unit_log_leftover_process_stop);
+
+ mount_set_state(m, m->result != MOUNT_SUCCESS ? MOUNT_FAILED : MOUNT_DEAD);
+
+ m->exec_runtime = exec_runtime_unref(m->exec_runtime, true);
+
+ unit_destroy_runtime_data(UNIT(m), &m->exec_context);
+
+ unit_unref_uid_gid(UNIT(m), true);
+
+ dynamic_creds_destroy(&m->dynamic_creds);
+
+ /* Any dependencies based on /proc/self/mountinfo are now stale */
+ unit_remove_dependencies(UNIT(m), UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT);
+}
+
+static void mount_enter_mounted(Mount *m, MountResult f) {
+ assert(m);
+
+ if (m->result == MOUNT_SUCCESS)
+ m->result = f;
+
+ mount_set_state(m, MOUNT_MOUNTED);
+}
+
+static void mount_enter_dead_or_mounted(Mount *m, MountResult f) {
+ assert(m);
+
+ /* Enter DEAD or MOUNTED state, depending on what the kernel currently says about the mount point. We use this
+ * whenever we executed an operation, so that our internal state reflects what the kernel says again, after all
+ * ultimately we just mirror the kernel's internal state on this. */
+
+ if (m->from_proc_self_mountinfo)
+ mount_enter_mounted(m, f);
+ else
+ mount_enter_dead(m, f);
+}
+
+static int state_to_kill_operation(MountState state) {
+ switch (state) {
+
+ case MOUNT_REMOUNTING_SIGTERM:
+ return KILL_RESTART;
+
+ case MOUNT_UNMOUNTING_SIGTERM:
+ return KILL_TERMINATE;
+
+ case MOUNT_REMOUNTING_SIGKILL:
+ case MOUNT_UNMOUNTING_SIGKILL:
+ return KILL_KILL;
+
+ default:
+ return _KILL_OPERATION_INVALID;
+ }
+}
+
+static void mount_enter_signal(Mount *m, MountState state, MountResult f) {
+ int r;
+
+ assert(m);
+
+ if (m->result == MOUNT_SUCCESS)
+ m->result = f;
+
+ r = unit_kill_context(
+ UNIT(m),
+ &m->kill_context,
+ state_to_kill_operation(state),
+ -1,
+ m->control_pid,
+ false);
+ if (r < 0)
+ goto fail;
+
+ if (r > 0) {
+ r = mount_arm_timer(m, usec_add(now(CLOCK_MONOTONIC), m->timeout_usec));
+ if (r < 0)
+ goto fail;
+
+ mount_set_state(m, state);
+ } else if (state == MOUNT_REMOUNTING_SIGTERM && m->kill_context.send_sigkill)
+ mount_enter_signal(m, MOUNT_REMOUNTING_SIGKILL, MOUNT_SUCCESS);
+ else if (IN_SET(state, MOUNT_REMOUNTING_SIGTERM, MOUNT_REMOUNTING_SIGKILL))
+ mount_enter_mounted(m, MOUNT_SUCCESS);
+ else if (state == MOUNT_UNMOUNTING_SIGTERM && m->kill_context.send_sigkill)
+ mount_enter_signal(m, MOUNT_UNMOUNTING_SIGKILL, MOUNT_SUCCESS);
+ else
+ mount_enter_dead_or_mounted(m, MOUNT_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(m), r, "Failed to kill processes: %m");
+ mount_enter_dead_or_mounted(m, MOUNT_FAILURE_RESOURCES);
+}
+
+static void mount_enter_unmounting(Mount *m) {
+ int r;
+
+ assert(m);
+
+ /* Start counting our attempts */
+ if (!IN_SET(m->state,
+ MOUNT_UNMOUNTING,
+ MOUNT_UNMOUNTING_SIGTERM,
+ MOUNT_UNMOUNTING_SIGKILL))
+ m->n_retry_umount = 0;
+
+ m->control_command_id = MOUNT_EXEC_UNMOUNT;
+ m->control_command = m->exec_command + MOUNT_EXEC_UNMOUNT;
+
+ r = exec_command_set(m->control_command, UMOUNT_PATH, m->where, "-c", NULL);
+ if (r >= 0 && m->lazy_unmount)
+ r = exec_command_append(m->control_command, "-l", NULL);
+ if (r >= 0 && m->force_unmount)
+ r = exec_command_append(m->control_command, "-f", NULL);
+ if (r < 0)
+ goto fail;
+
+ mount_unwatch_control_pid(m);
+
+ r = mount_spawn(m, m->control_command, &m->control_pid);
+ if (r < 0)
+ goto fail;
+
+ mount_set_state(m, MOUNT_UNMOUNTING);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(m), r, "Failed to run 'umount' task: %m");
+ mount_enter_dead_or_mounted(m, MOUNT_FAILURE_RESOURCES);
+}
+
+static void mount_enter_mounting(Mount *m) {
+ int r;
+ MountParameters *p;
+
+ assert(m);
+
+ r = unit_fail_if_noncanonical(UNIT(m), m->where);
+ if (r < 0)
+ goto fail;
+
+ (void) mkdir_p_label(m->where, m->directory_mode);
+
+ unit_warn_if_dir_nonempty(UNIT(m), m->where);
+ unit_warn_leftover_processes(UNIT(m), unit_log_leftover_process_start);
+
+ m->control_command_id = MOUNT_EXEC_MOUNT;
+ m->control_command = m->exec_command + MOUNT_EXEC_MOUNT;
+
+ /* Create the source directory for bind-mounts if needed */
+ p = get_mount_parameters_fragment(m);
+ if (p && mount_is_bind(p)) {
+ r = mkdir_p_label(p->what, m->directory_mode);
+ if (r < 0)
+ log_unit_error_errno(UNIT(m), r, "Failed to make bind mount source '%s': %m", p->what);
+ }
+
+ if (p) {
+ _cleanup_free_ char *opts = NULL;
+
+ r = fstab_filter_options(p->options, "nofail\0" "noauto\0" "auto\0", NULL, NULL, &opts);
+ if (r < 0)
+ goto fail;
+
+ r = exec_command_set(m->control_command, MOUNT_PATH, p->what, m->where, NULL);
+ if (r >= 0 && m->sloppy_options)
+ r = exec_command_append(m->control_command, "-s", NULL);
+ if (r >= 0 && m->read_write_only)
+ r = exec_command_append(m->control_command, "-w", NULL);
+ if (r >= 0 && p->fstype)
+ r = exec_command_append(m->control_command, "-t", p->fstype, NULL);
+ if (r >= 0 && !isempty(opts))
+ r = exec_command_append(m->control_command, "-o", opts, NULL);
+ } else
+ r = -ENOENT;
+ if (r < 0)
+ goto fail;
+
+ mount_unwatch_control_pid(m);
+
+ r = mount_spawn(m, m->control_command, &m->control_pid);
+ if (r < 0)
+ goto fail;
+
+ mount_set_state(m, MOUNT_MOUNTING);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(m), r, "Failed to run 'mount' task: %m");
+ mount_enter_dead_or_mounted(m, MOUNT_FAILURE_RESOURCES);
+}
+
+static void mount_set_reload_result(Mount *m, MountResult result) {
+ assert(m);
+
+ /* Only store the first error we encounter */
+ if (m->reload_result != MOUNT_SUCCESS)
+ return;
+
+ m->reload_result = result;
+}
+
+static void mount_enter_remounting(Mount *m) {
+ int r;
+ MountParameters *p;
+
+ assert(m);
+
+ /* Reset reload result when we are about to start a new remount operation */
+ m->reload_result = MOUNT_SUCCESS;
+
+ m->control_command_id = MOUNT_EXEC_REMOUNT;
+ m->control_command = m->exec_command + MOUNT_EXEC_REMOUNT;
+
+ p = get_mount_parameters_fragment(m);
+ if (p) {
+ const char *o;
+
+ if (p->options)
+ o = strjoina("remount,", p->options);
+ else
+ o = "remount";
+
+ r = exec_command_set(m->control_command, MOUNT_PATH,
+ p->what, m->where,
+ "-o", o, NULL);
+ if (r >= 0 && m->sloppy_options)
+ r = exec_command_append(m->control_command, "-s", NULL);
+ if (r >= 0 && m->read_write_only)
+ r = exec_command_append(m->control_command, "-w", NULL);
+ if (r >= 0 && p->fstype)
+ r = exec_command_append(m->control_command, "-t", p->fstype, NULL);
+ } else
+ r = -ENOENT;
+ if (r < 0)
+ goto fail;
+
+ mount_unwatch_control_pid(m);
+
+ r = mount_spawn(m, m->control_command, &m->control_pid);
+ if (r < 0)
+ goto fail;
+
+ mount_set_state(m, MOUNT_REMOUNTING);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(m), r, "Failed to run 'remount' task: %m");
+ mount_set_reload_result(m, MOUNT_FAILURE_RESOURCES);
+ mount_enter_dead_or_mounted(m, MOUNT_SUCCESS);
+}
+
+static void mount_cycle_clear(Mount *m) {
+ assert(m);
+
+ /* Clear all state we shall forget for this new cycle */
+
+ m->result = MOUNT_SUCCESS;
+ m->reload_result = MOUNT_SUCCESS;
+ exec_command_reset_status_array(m->exec_command, _MOUNT_EXEC_COMMAND_MAX);
+ UNIT(m)->reset_accounting = true;
+}
+
+static int mount_start(Unit *u) {
+ Mount *m = MOUNT(u);
+ int r;
+
+ assert(m);
+
+ /* We cannot fulfill this request right now, try again later
+ * please! */
+ if (IN_SET(m->state,
+ MOUNT_UNMOUNTING,
+ MOUNT_UNMOUNTING_SIGTERM,
+ MOUNT_UNMOUNTING_SIGKILL,
+ MOUNT_CLEANING))
+ return -EAGAIN;
+
+ /* Already on it! */
+ if (IN_SET(m->state, MOUNT_MOUNTING, MOUNT_MOUNTING_DONE))
+ return 0;
+
+ assert(IN_SET(m->state, MOUNT_DEAD, MOUNT_FAILED));
+
+ r = unit_test_start_limit(u);
+ if (r < 0) {
+ mount_enter_dead(m, MOUNT_FAILURE_START_LIMIT_HIT);
+ return r;
+ }
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ mount_cycle_clear(m);
+ mount_enter_mounting(m);
+
+ return 1;
+}
+
+static int mount_stop(Unit *u) {
+ Mount *m = MOUNT(u);
+
+ assert(m);
+
+ switch (m->state) {
+
+ case MOUNT_UNMOUNTING:
+ case MOUNT_UNMOUNTING_SIGKILL:
+ case MOUNT_UNMOUNTING_SIGTERM:
+ /* Already on it */
+ return 0;
+
+ case MOUNT_MOUNTING:
+ case MOUNT_MOUNTING_DONE:
+ case MOUNT_REMOUNTING:
+ /* If we are still waiting for /bin/mount, we go directly into kill mode. */
+ mount_enter_signal(m, MOUNT_UNMOUNTING_SIGTERM, MOUNT_SUCCESS);
+ return 0;
+
+ case MOUNT_REMOUNTING_SIGTERM:
+ /* If we are already waiting for a hung remount, convert this to the matching unmounting state */
+ mount_set_state(m, MOUNT_UNMOUNTING_SIGTERM);
+ return 0;
+
+ case MOUNT_REMOUNTING_SIGKILL:
+ /* as above */
+ mount_set_state(m, MOUNT_UNMOUNTING_SIGKILL);
+ return 0;
+
+ case MOUNT_MOUNTED:
+ mount_enter_unmounting(m);
+ return 1;
+
+ case MOUNT_CLEANING:
+ /* If we are currently cleaning, then abort it, brutally. */
+ mount_enter_signal(m, MOUNT_UNMOUNTING_SIGKILL, MOUNT_SUCCESS);
+ return 0;
+
+ default:
+ assert_not_reached("Unexpected state.");
+ }
+}
+
+static int mount_reload(Unit *u) {
+ Mount *m = MOUNT(u);
+
+ assert(m);
+ assert(m->state == MOUNT_MOUNTED);
+
+ mount_enter_remounting(m);
+
+ return 1;
+}
+
+static int mount_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Mount *m = MOUNT(u);
+
+ assert(m);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", mount_state_to_string(m->state));
+ (void) serialize_item(f, "result", mount_result_to_string(m->result));
+ (void) serialize_item(f, "reload-result", mount_result_to_string(m->reload_result));
+ (void) serialize_item_format(f, "n-retry-umount", "%u", m->n_retry_umount);
+
+ if (m->control_pid > 0)
+ (void) serialize_item_format(f, "control-pid", PID_FMT, m->control_pid);
+
+ if (m->control_command_id >= 0)
+ (void) serialize_item(f, "control-command", mount_exec_command_to_string(m->control_command_id));
+
+ return 0;
+}
+
+static int mount_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Mount *m = MOUNT(u);
+ int r;
+
+ assert(u);
+ assert(key);
+ assert(value);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ MountState state;
+
+ if ((state = mount_state_from_string(value)) < 0)
+ log_unit_debug(u, "Failed to parse state value: %s", value);
+ else
+ m->deserialized_state = state;
+
+ } else if (streq(key, "result")) {
+ MountResult f;
+
+ f = mount_result_from_string(value);
+ if (f < 0)
+ log_unit_debug(u, "Failed to parse result value: %s", value);
+ else if (f != MOUNT_SUCCESS)
+ m->result = f;
+
+ } else if (streq(key, "reload-result")) {
+ MountResult f;
+
+ f = mount_result_from_string(value);
+ if (f < 0)
+ log_unit_debug(u, "Failed to parse reload result value: %s", value);
+ else if (f != MOUNT_SUCCESS)
+ m->reload_result = f;
+
+ } else if (streq(key, "n-retry-umount")) {
+
+ r = safe_atou(value, &m->n_retry_umount);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse n-retry-umount value: %s", value);
+
+ } else if (streq(key, "control-pid")) {
+
+ if (parse_pid(value, &m->control_pid) < 0)
+ log_unit_debug(u, "Failed to parse control-pid value: %s", value);
+
+ } else if (streq(key, "control-command")) {
+ MountExecCommand id;
+
+ id = mount_exec_command_from_string(value);
+ if (id < 0)
+ log_unit_debug(u, "Failed to parse exec-command value: %s", value);
+ else {
+ m->control_command_id = id;
+ m->control_command = m->exec_command + id;
+ }
+ } else
+ log_unit_debug(u, "Unknown serialization key: %s", key);
+
+ return 0;
+}
+
+_pure_ static UnitActiveState mount_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[MOUNT(u)->state];
+}
+
+_pure_ static const char *mount_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return mount_state_to_string(MOUNT(u)->state);
+}
+
+_pure_ static bool mount_may_gc(Unit *u) {
+ Mount *m = MOUNT(u);
+
+ assert(m);
+
+ if (m->from_proc_self_mountinfo)
+ return false;
+
+ return true;
+}
+
+static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) {
+ Mount *m = MOUNT(u);
+ MountResult f;
+
+ assert(m);
+ assert(pid >= 0);
+
+ if (pid != m->control_pid)
+ return;
+
+ /* So here's the thing, we really want to know before /usr/bin/mount or /usr/bin/umount exit whether
+ * they established/remove a mount. This is important when mounting, but even more so when unmounting
+ * since we need to deal with nested mounts and otherwise cannot safely determine whether to repeat
+ * the unmounts. In theory, the kernel fires /proc/self/mountinfo changes off before returning from
+ * the mount() or umount() syscalls, and thus we should see the changes to the proc file before we
+ * process the waitid() for the /usr/bin/(u)mount processes. However, this is unfortunately racy: we
+ * have to waitid() for processes using P_ALL (since we need to reap unexpected children that got
+ * reparented to PID 1), but when using P_ALL we might end up reaping processes that terminated just
+ * instants ago, i.e. already after our last event loop iteration (i.e. after the last point we might
+ * have noticed /proc/self/mountinfo events via epoll). This means event loop priorities for
+ * processing SIGCHLD vs. /proc/self/mountinfo IO events are not as relevant as we want. To fix that
+ * race, let's explicitly scan /proc/self/mountinfo before we start processing /usr/bin/(u)mount
+ * dying. It's ugly, but it makes our ordering systematic again, and makes sure we always see
+ * /proc/self/mountinfo changes before our mount/umount exits. */
+ (void) mount_process_proc_self_mountinfo(u->manager);
+
+ m->control_pid = 0;
+
+ if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
+ f = MOUNT_SUCCESS;
+ else if (code == CLD_EXITED)
+ f = MOUNT_FAILURE_EXIT_CODE;
+ else if (code == CLD_KILLED)
+ f = MOUNT_FAILURE_SIGNAL;
+ else if (code == CLD_DUMPED)
+ f = MOUNT_FAILURE_CORE_DUMP;
+ else
+ assert_not_reached("Unknown code");
+
+ if (IN_SET(m->state, MOUNT_REMOUNTING, MOUNT_REMOUNTING_SIGKILL, MOUNT_REMOUNTING_SIGTERM))
+ mount_set_reload_result(m, f);
+ else if (m->result == MOUNT_SUCCESS)
+ m->result = f;
+
+ if (m->control_command) {
+ exec_status_exit(&m->control_command->exec_status, &m->exec_context, pid, code, status);
+
+ m->control_command = NULL;
+ m->control_command_id = _MOUNT_EXEC_COMMAND_INVALID;
+ }
+
+ unit_log_process_exit(
+ u,
+ "Mount process",
+ mount_exec_command_to_string(m->control_command_id),
+ f == MOUNT_SUCCESS,
+ code, status);
+
+ /* Note that due to the io event priority logic, we can be sure the new mountinfo is loaded
+ * before we process the SIGCHLD for the mount command. */
+
+ switch (m->state) {
+
+ case MOUNT_MOUNTING:
+ /* Our mount point has not appeared in mountinfo. Something went wrong. */
+
+ if (f == MOUNT_SUCCESS) {
+ /* Either /bin/mount has an unexpected definition of success,
+ * or someone raced us and we lost. */
+ log_unit_warning(UNIT(m), "Mount process finished, but there is no mount.");
+ f = MOUNT_FAILURE_PROTOCOL;
+ }
+ mount_enter_dead(m, f);
+ break;
+
+ case MOUNT_MOUNTING_DONE:
+ mount_enter_mounted(m, f);
+ break;
+
+ case MOUNT_REMOUNTING:
+ case MOUNT_REMOUNTING_SIGTERM:
+ case MOUNT_REMOUNTING_SIGKILL:
+ mount_enter_dead_or_mounted(m, MOUNT_SUCCESS);
+ break;
+
+ case MOUNT_UNMOUNTING:
+
+ if (f == MOUNT_SUCCESS && m->from_proc_self_mountinfo) {
+
+ /* Still a mount point? If so, let's try again. Most likely there were multiple mount points
+ * stacked on top of each other. We might exceed the timeout specified by the user overall,
+ * but we will stop as soon as any one umount times out. */
+
+ if (m->n_retry_umount < RETRY_UMOUNT_MAX) {
+ log_unit_debug(u, "Mount still present, trying again.");
+ m->n_retry_umount++;
+ mount_enter_unmounting(m);
+ } else {
+ log_unit_warning(u, "Mount still present after %u attempts to unmount, giving up.", m->n_retry_umount);
+ mount_enter_mounted(m, f);
+ }
+ } else
+ mount_enter_dead_or_mounted(m, f);
+
+ break;
+
+ case MOUNT_UNMOUNTING_SIGKILL:
+ case MOUNT_UNMOUNTING_SIGTERM:
+ mount_enter_dead_or_mounted(m, f);
+ break;
+
+ case MOUNT_CLEANING:
+ if (m->clean_result == MOUNT_SUCCESS)
+ m->clean_result = f;
+
+ mount_enter_dead(m, MOUNT_SUCCESS);
+ break;
+
+ default:
+ assert_not_reached("Uh, control process died at wrong time.");
+ }
+
+ /* Notify clients about changed exit status */
+ unit_add_to_dbus_queue(u);
+}
+
+static int mount_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
+ Mount *m = MOUNT(userdata);
+
+ assert(m);
+ assert(m->timer_event_source == source);
+
+ switch (m->state) {
+
+ case MOUNT_MOUNTING:
+ case MOUNT_MOUNTING_DONE:
+ log_unit_warning(UNIT(m), "Mounting timed out. Terminating.");
+ mount_enter_signal(m, MOUNT_UNMOUNTING_SIGTERM, MOUNT_FAILURE_TIMEOUT);
+ break;
+
+ case MOUNT_REMOUNTING:
+ log_unit_warning(UNIT(m), "Remounting timed out. Terminating remount process.");
+ mount_set_reload_result(m, MOUNT_FAILURE_TIMEOUT);
+ mount_enter_signal(m, MOUNT_REMOUNTING_SIGTERM, MOUNT_SUCCESS);
+ break;
+
+ case MOUNT_REMOUNTING_SIGTERM:
+ mount_set_reload_result(m, MOUNT_FAILURE_TIMEOUT);
+
+ if (m->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(m), "Remounting timed out. Killing.");
+ mount_enter_signal(m, MOUNT_REMOUNTING_SIGKILL, MOUNT_SUCCESS);
+ } else {
+ log_unit_warning(UNIT(m), "Remounting timed out. Skipping SIGKILL. Ignoring.");
+ mount_enter_dead_or_mounted(m, MOUNT_SUCCESS);
+ }
+ break;
+
+ case MOUNT_REMOUNTING_SIGKILL:
+ mount_set_reload_result(m, MOUNT_FAILURE_TIMEOUT);
+
+ log_unit_warning(UNIT(m), "Mount process still around after SIGKILL. Ignoring.");
+ mount_enter_dead_or_mounted(m, MOUNT_SUCCESS);
+ break;
+
+ case MOUNT_UNMOUNTING:
+ log_unit_warning(UNIT(m), "Unmounting timed out. Terminating.");
+ mount_enter_signal(m, MOUNT_UNMOUNTING_SIGTERM, MOUNT_FAILURE_TIMEOUT);
+ break;
+
+ case MOUNT_UNMOUNTING_SIGTERM:
+ if (m->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(m), "Mount process timed out. Killing.");
+ mount_enter_signal(m, MOUNT_UNMOUNTING_SIGKILL, MOUNT_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(m), "Mount process timed out. Skipping SIGKILL. Ignoring.");
+ mount_enter_dead_or_mounted(m, MOUNT_FAILURE_TIMEOUT);
+ }
+ break;
+
+ case MOUNT_UNMOUNTING_SIGKILL:
+ log_unit_warning(UNIT(m), "Mount process still around after SIGKILL. Ignoring.");
+ mount_enter_dead_or_mounted(m, MOUNT_FAILURE_TIMEOUT);
+ break;
+
+ case MOUNT_CLEANING:
+ log_unit_warning(UNIT(m), "Cleaning timed out. killing.");
+
+ if (m->clean_result == MOUNT_SUCCESS)
+ m->clean_result = MOUNT_FAILURE_TIMEOUT;
+
+ mount_enter_signal(m, MOUNT_UNMOUNTING_SIGKILL, 0);
+ break;
+
+ default:
+ assert_not_reached("Timeout at wrong time.");
+ }
+
+ return 0;
+}
+
+static int mount_setup_new_unit(
+ Manager *m,
+ const char *name,
+ const char *what,
+ const char *where,
+ const char *options,
+ const char *fstype,
+ MountProcFlags *ret_flags,
+ Unit **ret) {
+
+ _cleanup_(unit_freep) Unit *u = NULL;
+ int r;
+
+ assert(m);
+ assert(name);
+ assert(ret_flags);
+ assert(ret);
+
+ r = unit_new_for_name(m, sizeof(Mount), name, &u);
+ if (r < 0)
+ return r;
+
+ r = free_and_strdup(&u->source_path, "/proc/self/mountinfo");
+ if (r < 0)
+ return r;
+
+ r = free_and_strdup(&MOUNT(u)->where, where);
+ if (r < 0)
+ return r;
+
+ r = update_parameters_proc_self_mountinfo(MOUNT(u), what, options, fstype);
+ if (r < 0)
+ return r;
+
+ /* This unit was generated because /proc/self/mountinfo reported it. Remember this, so that by the time we load
+ * the unit file for it (and thus add in extra deps right after) we know what source to attributes the deps
+ * to.*/
+ MOUNT(u)->from_proc_self_mountinfo = true;
+
+ /* We have only allocated the stub now, let's enqueue this unit for loading now, so that everything else is
+ * loaded in now. */
+ unit_add_to_load_queue(u);
+
+ *ret_flags = MOUNT_PROC_IS_MOUNTED | MOUNT_PROC_JUST_MOUNTED | MOUNT_PROC_JUST_CHANGED;
+ *ret = TAKE_PTR(u);
+ return 0;
+}
+
+static int mount_setup_existing_unit(
+ Unit *u,
+ const char *what,
+ const char *where,
+ const char *options,
+ const char *fstype,
+ MountProcFlags *ret_flags) {
+
+ int r;
+
+ assert(u);
+ assert(ret_flags);
+
+ if (!MOUNT(u)->where) {
+ MOUNT(u)->where = strdup(where);
+ if (!MOUNT(u)->where)
+ return -ENOMEM;
+ }
+
+ /* In case we have multiple mounts established on the same mount point, let's merge flags set already
+ * for the current unit. Note that the flags field is reset on each iteration of reading
+ * /proc/self/mountinfo, hence we know for sure anything already set here is from the current
+ * iteration and thus worthy of taking into account. */
+ MountProcFlags flags =
+ MOUNT(u)->proc_flags | MOUNT_PROC_IS_MOUNTED;
+
+ r = update_parameters_proc_self_mountinfo(MOUNT(u), what, options, fstype);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ flags |= MOUNT_PROC_JUST_CHANGED;
+
+ /* There are two conditions when we consider a mount point just mounted: when we haven't seen it in
+ * /proc/self/mountinfo before or when MOUNT_MOUNTING is our current state. Why bother with the
+ * latter? Shouldn't that be covered by the former? No, during reload it is not because we might then
+ * encounter a new /proc/self/mountinfo in combination with an old mount unit state (since it stems
+ * from the serialized state), and need to catch up. Since we know that the MOUNT_MOUNTING state is
+ * reached when we wait for the mount to appear we hence can assume that if we are in it, we are
+ * actually seeing it established for the first time. */
+ if (!MOUNT(u)->from_proc_self_mountinfo || MOUNT(u)->state == MOUNT_MOUNTING)
+ flags |= MOUNT_PROC_JUST_MOUNTED;
+
+ MOUNT(u)->from_proc_self_mountinfo = true;
+
+ if (IN_SET(u->load_state, UNIT_NOT_FOUND, UNIT_BAD_SETTING, UNIT_ERROR)) {
+ /* The unit was previously not found or otherwise not loaded. Now that the unit shows up in
+ * /proc/self/mountinfo we should reconsider it this, hence set it to UNIT_LOADED. */
+ u->load_state = UNIT_LOADED;
+ u->load_error = 0;
+
+ flags |= MOUNT_PROC_JUST_CHANGED;
+ }
+
+ if (FLAGS_SET(flags, MOUNT_PROC_JUST_CHANGED)) {
+ /* If things changed, then make sure that all deps are regenerated. Let's
+ * first remove all automatic deps, and then add in the new ones. */
+
+ unit_remove_dependencies(u, UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT);
+
+ r = mount_add_non_exec_dependencies(MOUNT(u));
+ if (r < 0)
+ return r;
+ }
+
+ *ret_flags = flags;
+ return 0;
+}
+
+static int mount_setup_unit(
+ Manager *m,
+ const char *what,
+ const char *where,
+ const char *options,
+ const char *fstype,
+ bool set_flags) {
+
+ _cleanup_free_ char *e = NULL;
+ MountProcFlags flags;
+ Unit *u;
+ int r;
+
+ assert(m);
+ assert(what);
+ assert(where);
+ assert(options);
+ assert(fstype);
+
+ /* Ignore API mount points. They should never be referenced in
+ * dependencies ever. */
+ if (mount_point_is_api(where) || mount_point_ignore(where))
+ return 0;
+
+ if (streq(fstype, "autofs"))
+ return 0;
+
+ /* probably some kind of swap, ignore */
+ if (!is_path(where))
+ return 0;
+
+ /* Mount unit names have to be (like all other unit names) short enough to fit into file names. This
+ * means there's a good chance that overly long mount point paths after mangling them to look like a
+ * unit name would result in unit names we don't actually consider valid. This should be OK however
+ * as such long mount point paths should not happen on regular systems — and if they appear
+ * nonetheless they are generally synthesized by software, and thus managed by that other
+ * software. Having such long names just means you cannot use systemd to manage those specific mount
+ * points, which should be an OK restriction to make. After all we don't have to be able to manage
+ * all mount points in the world — as long as we don't choke on them when we encounter them. */
+ r = unit_name_from_path(where, ".mount", &e);
+ if (r < 0) {
+ static RateLimit rate_limit = { /* Let's log about this at warning level at most once every
+ * 5s. Given that we generate this whenever we read the file
+ * otherwise we probably shouldn't flood the logs with
+ * this */
+ .interval = 5 * USEC_PER_SEC,
+ .burst = 1,
+ };
+
+ return log_struct_errno(
+ ratelimit_below(&rate_limit) ? LOG_WARNING : LOG_DEBUG, r,
+ "MESSAGE_ID=" SD_MESSAGE_MOUNT_POINT_PATH_NOT_SUITABLE_STR,
+ "MOUNT_POINT=%s", where,
+ LOG_MESSAGE("Failed to generate valid unit name from path '%s', ignoring mount point: %m", where));
+ }
+
+ u = manager_get_unit(m, e);
+ if (u)
+ r = mount_setup_existing_unit(u, what, where, options, fstype, &flags);
+ else
+ /* First time we see this mount point meaning that it's not been initiated by a mount unit but rather
+ * by the sysadmin having called mount(8) directly. */
+ r = mount_setup_new_unit(m, e, what, where, options, fstype, &flags, &u);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to set up mount unit for '%s': %m", where);
+
+ /* If the mount changed properties or state, let's notify our clients */
+ if (flags & (MOUNT_PROC_JUST_CHANGED|MOUNT_PROC_JUST_MOUNTED))
+ unit_add_to_dbus_queue(u);
+
+ if (set_flags)
+ MOUNT(u)->proc_flags = flags;
+
+ return 0;
+}
+
+static int mount_load_proc_self_mountinfo(Manager *m, bool set_flags) {
+ _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
+ _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
+ int r;
+
+ assert(m);
+
+ r = libmount_parse(NULL, NULL, &table, &iter);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse /proc/self/mountinfo: %m");
+
+ for (;;) {
+ struct libmnt_fs *fs;
+ const char *device, *path, *options, *fstype;
+
+ r = mnt_table_next_fs(table, iter, &fs);
+ if (r == 1)
+ break;
+ if (r < 0)
+ return log_error_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
+
+ device = mnt_fs_get_source(fs);
+ path = mnt_fs_get_target(fs);
+ options = mnt_fs_get_options(fs);
+ fstype = mnt_fs_get_fstype(fs);
+
+ if (!device || !path)
+ continue;
+
+ device_found_node(m, device, DEVICE_FOUND_MOUNT, DEVICE_FOUND_MOUNT);
+
+ (void) mount_setup_unit(m, device, path, options, fstype, set_flags);
+ }
+
+ return 0;
+}
+
+static void mount_shutdown(Manager *m) {
+ assert(m);
+
+ m->mount_event_source = sd_event_source_unref(m->mount_event_source);
+
+ mnt_unref_monitor(m->mount_monitor);
+ m->mount_monitor = NULL;
+}
+
+static int mount_get_timeout(Unit *u, usec_t *timeout) {
+ Mount *m = MOUNT(u);
+ usec_t t;
+ int r;
+
+ if (!m->timer_event_source)
+ return 0;
+
+ r = sd_event_source_get_time(m->timer_event_source, &t);
+ if (r < 0)
+ return r;
+ if (t == USEC_INFINITY)
+ return 0;
+
+ *timeout = t;
+ return 1;
+}
+
+static void mount_enumerate_perpetual(Manager *m) {
+ Unit *u;
+ int r;
+
+ assert(m);
+
+ /* Whatever happens, we know for sure that the root directory is around, and cannot go away. Let's
+ * unconditionally synthesize it here and mark it as perpetual. */
+
+ u = manager_get_unit(m, SPECIAL_ROOT_MOUNT);
+ if (!u) {
+ r = unit_new_for_name(m, sizeof(Mount), SPECIAL_ROOT_MOUNT, &u);
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate the special " SPECIAL_ROOT_MOUNT " unit: %m");
+ return;
+ }
+ }
+
+ u->perpetual = true;
+ MOUNT(u)->deserialized_state = MOUNT_MOUNTED;
+
+ unit_add_to_load_queue(u);
+ unit_add_to_dbus_queue(u);
+}
+
+static bool mount_is_mounted(Mount *m) {
+ assert(m);
+
+ return UNIT(m)->perpetual || FLAGS_SET(m->proc_flags, MOUNT_PROC_IS_MOUNTED);
+}
+
+static void mount_enumerate(Manager *m) {
+ int r;
+
+ assert(m);
+
+ mnt_init_debug(0);
+
+ if (!m->mount_monitor) {
+ int fd;
+
+ m->mount_monitor = mnt_new_monitor();
+ if (!m->mount_monitor) {
+ log_oom();
+ goto fail;
+ }
+
+ r = mnt_monitor_enable_kernel(m->mount_monitor, 1);
+ if (r < 0) {
+ log_error_errno(r, "Failed to enable watching of kernel mount events: %m");
+ goto fail;
+ }
+
+ r = mnt_monitor_enable_userspace(m->mount_monitor, 1, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to enable watching of userspace mount events: %m");
+ goto fail;
+ }
+
+ /* mnt_unref_monitor() will close the fd */
+ fd = r = mnt_monitor_get_fd(m->mount_monitor);
+ if (r < 0) {
+ log_error_errno(r, "Failed to acquire watch file descriptor: %m");
+ goto fail;
+ }
+
+ r = sd_event_add_io(m->event, &m->mount_event_source, fd, EPOLLIN, mount_dispatch_io, m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to watch mount file descriptor: %m");
+ goto fail;
+ }
+
+ r = sd_event_source_set_priority(m->mount_event_source, SD_EVENT_PRIORITY_NORMAL-10);
+ if (r < 0) {
+ log_error_errno(r, "Failed to adjust mount watch priority: %m");
+ goto fail;
+ }
+
+ (void) sd_event_source_set_description(m->mount_event_source, "mount-monitor-dispatch");
+ }
+
+ r = mount_load_proc_self_mountinfo(m, false);
+ if (r < 0)
+ goto fail;
+
+ return;
+
+fail:
+ mount_shutdown(m);
+}
+
+static int drain_libmount(Manager *m) {
+ bool rescan = false;
+ int r;
+
+ assert(m);
+
+ /* Drain all events and verify that the event is valid.
+ *
+ * Note that libmount also monitors /run/mount mkdir if the directory does not exist yet. The mkdir
+ * may generate event which is irrelevant for us.
+ *
+ * error: r < 0; valid: r == 0, false positive: r == 1 */
+ do {
+ r = mnt_monitor_next_change(m->mount_monitor, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to drain libmount events: %m");
+ if (r == 0)
+ rescan = true;
+ } while (r == 0);
+
+ return rescan;
+}
+
+static int mount_process_proc_self_mountinfo(Manager *m) {
+ _cleanup_set_free_free_ Set *around = NULL, *gone = NULL;
+ const char *what;
+ Unit *u;
+ int r;
+
+ assert(m);
+
+ r = drain_libmount(m);
+ if (r <= 0)
+ return r;
+
+ r = mount_load_proc_self_mountinfo(m, true);
+ if (r < 0) {
+ /* Reset flags, just in case, for later calls */
+ LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_MOUNT])
+ MOUNT(u)->proc_flags = 0;
+
+ return 0;
+ }
+
+ manager_dispatch_load_queue(m);
+
+ LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_MOUNT]) {
+ Mount *mount = MOUNT(u);
+
+ if (!mount_is_mounted(mount)) {
+
+ /* A mount point is not around right now. It
+ * might be gone, or might never have
+ * existed. */
+
+ if (mount->from_proc_self_mountinfo &&
+ mount->parameters_proc_self_mountinfo.what) {
+
+ /* Remember that this device might just have disappeared */
+ if (set_ensure_allocated(&gone, &path_hash_ops) < 0 ||
+ set_put_strdup(&gone, mount->parameters_proc_self_mountinfo.what) < 0)
+ log_oom(); /* we don't care too much about OOM here... */
+ }
+
+ mount->from_proc_self_mountinfo = false;
+ assert_se(update_parameters_proc_self_mountinfo(mount, NULL, NULL, NULL) >= 0);
+
+ switch (mount->state) {
+
+ case MOUNT_MOUNTED:
+ /* This has just been unmounted by somebody else, follow the state change. */
+ mount_enter_dead(mount, MOUNT_SUCCESS);
+ break;
+
+ case MOUNT_MOUNTING_DONE:
+ /* The mount command may add the corresponding proc mountinfo entry and
+ * then remove it because of an internal error. E.g., fuse.sshfs seems
+ * to do that when the connection fails. See #17617. To handle such the
+ * case, let's once set the state back to mounting. Then, the unit can
+ * correctly enter the failed state later in mount_sigchld(). */
+ mount_set_state(mount, MOUNT_MOUNTING);
+ break;
+
+ default:
+ break;
+ }
+
+ } else if (mount->proc_flags & (MOUNT_PROC_JUST_MOUNTED|MOUNT_PROC_JUST_CHANGED)) {
+
+ /* A mount point was added or changed */
+
+ switch (mount->state) {
+
+ case MOUNT_DEAD:
+ case MOUNT_FAILED:
+
+ /* This has just been mounted by somebody else, follow the state change, but let's
+ * generate a new invocation ID for this implicitly and automatically. */
+ (void) unit_acquire_invocation_id(u);
+ mount_cycle_clear(mount);
+ mount_enter_mounted(mount, MOUNT_SUCCESS);
+ break;
+
+ case MOUNT_MOUNTING:
+ mount_set_state(mount, MOUNT_MOUNTING_DONE);
+ break;
+
+ default:
+ /* Nothing really changed, but let's
+ * issue an notification call
+ * nonetheless, in case somebody is
+ * waiting for this. (e.g. file system
+ * ro/rw remounts.) */
+ mount_set_state(mount, mount->state);
+ break;
+ }
+ }
+
+ if (mount_is_mounted(mount) &&
+ mount->from_proc_self_mountinfo &&
+ mount->parameters_proc_self_mountinfo.what) {
+ /* Track devices currently used */
+
+ if (set_ensure_allocated(&around, &path_hash_ops) < 0 ||
+ set_put_strdup(&around, mount->parameters_proc_self_mountinfo.what) < 0)
+ log_oom();
+ }
+
+ /* Reset the flags for later calls */
+ mount->proc_flags = 0;
+ }
+
+ SET_FOREACH(what, gone) {
+ if (set_contains(around, what))
+ continue;
+
+ /* Let the device units know that the device is no longer mounted */
+ device_found_node(m, what, 0, DEVICE_FOUND_MOUNT);
+ }
+
+ return 0;
+}
+
+static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+ assert(revents & EPOLLIN);
+
+ return mount_process_proc_self_mountinfo(m);
+}
+
+static void mount_reset_failed(Unit *u) {
+ Mount *m = MOUNT(u);
+
+ assert(m);
+
+ if (m->state == MOUNT_FAILED)
+ mount_set_state(m, MOUNT_DEAD);
+
+ m->result = MOUNT_SUCCESS;
+ m->reload_result = MOUNT_SUCCESS;
+ m->clean_result = MOUNT_SUCCESS;
+}
+
+static int mount_kill(Unit *u, KillWho who, int signo, sd_bus_error *error) {
+ Mount *m = MOUNT(u);
+
+ assert(m);
+
+ return unit_kill_common(u, who, signo, -1, m->control_pid, error);
+}
+
+static int mount_control_pid(Unit *u) {
+ Mount *m = MOUNT(u);
+
+ assert(m);
+
+ return m->control_pid;
+}
+
+static int mount_clean(Unit *u, ExecCleanMask mask) {
+ _cleanup_strv_free_ char **l = NULL;
+ Mount *m = MOUNT(u);
+ int r;
+
+ assert(m);
+ assert(mask != 0);
+
+ if (m->state != MOUNT_DEAD)
+ return -EBUSY;
+
+ r = exec_context_get_clean_directories(&m->exec_context, u->manager->prefix, mask, &l);
+ if (r < 0)
+ return r;
+
+ if (strv_isempty(l))
+ return -EUNATCH;
+
+ mount_unwatch_control_pid(m);
+ m->clean_result = MOUNT_SUCCESS;
+ m->control_command = NULL;
+ m->control_command_id = _MOUNT_EXEC_COMMAND_INVALID;
+
+ r = mount_arm_timer(m, usec_add(now(CLOCK_MONOTONIC), m->exec_context.timeout_clean_usec));
+ if (r < 0)
+ goto fail;
+
+ r = unit_fork_and_watch_rm_rf(u, l, &m->control_pid);
+ if (r < 0)
+ goto fail;
+
+ mount_set_state(m, MOUNT_CLEANING);
+
+ return 0;
+
+fail:
+ log_unit_warning_errno(u, r, "Failed to initiate cleaning: %m");
+ m->clean_result = MOUNT_FAILURE_RESOURCES;
+ m->timer_event_source = sd_event_source_unref(m->timer_event_source);
+ return r;
+}
+
+static int mount_can_clean(Unit *u, ExecCleanMask *ret) {
+ Mount *m = MOUNT(u);
+
+ assert(m);
+
+ return exec_context_get_clean_mask(&m->exec_context, ret);
+}
+
+static const char* const mount_exec_command_table[_MOUNT_EXEC_COMMAND_MAX] = {
+ [MOUNT_EXEC_MOUNT] = "ExecMount",
+ [MOUNT_EXEC_UNMOUNT] = "ExecUnmount",
+ [MOUNT_EXEC_REMOUNT] = "ExecRemount",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(mount_exec_command, MountExecCommand);
+
+static const char* const mount_result_table[_MOUNT_RESULT_MAX] = {
+ [MOUNT_SUCCESS] = "success",
+ [MOUNT_FAILURE_RESOURCES] = "resources",
+ [MOUNT_FAILURE_TIMEOUT] = "timeout",
+ [MOUNT_FAILURE_EXIT_CODE] = "exit-code",
+ [MOUNT_FAILURE_SIGNAL] = "signal",
+ [MOUNT_FAILURE_CORE_DUMP] = "core-dump",
+ [MOUNT_FAILURE_START_LIMIT_HIT] = "start-limit-hit",
+ [MOUNT_FAILURE_PROTOCOL] = "protocol",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(mount_result, MountResult);
+
+const UnitVTable mount_vtable = {
+ .object_size = sizeof(Mount),
+ .exec_context_offset = offsetof(Mount, exec_context),
+ .cgroup_context_offset = offsetof(Mount, cgroup_context),
+ .kill_context_offset = offsetof(Mount, kill_context),
+ .exec_runtime_offset = offsetof(Mount, exec_runtime),
+ .dynamic_creds_offset = offsetof(Mount, dynamic_creds),
+
+ .sections =
+ "Unit\0"
+ "Mount\0"
+ "Install\0",
+ .private_section = "Mount",
+
+ .can_transient = true,
+ .can_fail = true,
+
+ .init = mount_init,
+ .load = mount_load,
+ .done = mount_done,
+
+ .coldplug = mount_coldplug,
+
+ .dump = mount_dump,
+
+ .start = mount_start,
+ .stop = mount_stop,
+ .reload = mount_reload,
+
+ .kill = mount_kill,
+ .clean = mount_clean,
+ .can_clean = mount_can_clean,
+
+ .serialize = mount_serialize,
+ .deserialize_item = mount_deserialize_item,
+
+ .active_state = mount_active_state,
+ .sub_state_to_string = mount_sub_state_to_string,
+
+ .will_restart = unit_will_restart_default,
+
+ .may_gc = mount_may_gc,
+ .is_extrinsic = mount_is_extrinsic,
+
+ .sigchld_event = mount_sigchld_event,
+
+ .reset_failed = mount_reset_failed,
+
+ .control_pid = mount_control_pid,
+
+ .bus_set_property = bus_mount_set_property,
+ .bus_commit_properties = bus_mount_commit_properties,
+
+ .get_timeout = mount_get_timeout,
+
+ .enumerate_perpetual = mount_enumerate_perpetual,
+ .enumerate = mount_enumerate,
+ .shutdown = mount_shutdown,
+
+ .status_message_formats = {
+ .starting_stopping = {
+ [0] = "Mounting %s...",
+ [1] = "Unmounting %s...",
+ },
+ .finished_start_job = {
+ [JOB_DONE] = "Mounted %s.",
+ [JOB_FAILED] = "Failed to mount %s.",
+ [JOB_TIMEOUT] = "Timed out mounting %s.",
+ },
+ .finished_stop_job = {
+ [JOB_DONE] = "Unmounted %s.",
+ [JOB_FAILED] = "Failed unmounting %s.",
+ [JOB_TIMEOUT] = "Timed out unmounting %s.",
+ },
+ },
+};
diff --git a/src/core/mount.h b/src/core/mount.h
new file mode 100644
index 0000000..ad0e016
--- /dev/null
+++ b/src/core/mount.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct Mount Mount;
+
+#include "kill.h"
+#include "dynamic-user.h"
+#include "unit.h"
+
+typedef enum MountExecCommand {
+ MOUNT_EXEC_MOUNT,
+ MOUNT_EXEC_UNMOUNT,
+ MOUNT_EXEC_REMOUNT,
+ _MOUNT_EXEC_COMMAND_MAX,
+ _MOUNT_EXEC_COMMAND_INVALID = -1
+} MountExecCommand;
+
+typedef enum MountResult {
+ MOUNT_SUCCESS,
+ MOUNT_FAILURE_RESOURCES, /* a bit of a misnomer, just our catch-all error for errnos we didn't expect */
+ MOUNT_FAILURE_TIMEOUT,
+ MOUNT_FAILURE_EXIT_CODE,
+ MOUNT_FAILURE_SIGNAL,
+ MOUNT_FAILURE_CORE_DUMP,
+ MOUNT_FAILURE_START_LIMIT_HIT,
+ MOUNT_FAILURE_PROTOCOL,
+ _MOUNT_RESULT_MAX,
+ _MOUNT_RESULT_INVALID = -1
+} MountResult;
+
+typedef struct MountParameters {
+ char *what;
+ char *options;
+ char *fstype;
+} MountParameters;
+
+/* Used while looking for mount points that vanished or got added from/to /proc/self/mountinfo */
+typedef enum MountProcFlags {
+ MOUNT_PROC_IS_MOUNTED = 1 << 0,
+ MOUNT_PROC_JUST_MOUNTED = 1 << 1,
+ MOUNT_PROC_JUST_CHANGED = 1 << 2,
+} MountProcFlags;
+
+struct Mount {
+ Unit meta;
+
+ char *where;
+
+ MountParameters parameters_proc_self_mountinfo;
+ MountParameters parameters_fragment;
+
+ bool from_proc_self_mountinfo:1;
+ bool from_fragment:1;
+
+ MountProcFlags proc_flags;
+
+ bool sloppy_options;
+
+ bool lazy_unmount;
+ bool force_unmount;
+
+ bool read_write_only;
+
+ MountResult result;
+ MountResult reload_result;
+ MountResult clean_result;
+
+ mode_t directory_mode;
+
+ usec_t timeout_usec;
+
+ ExecCommand exec_command[_MOUNT_EXEC_COMMAND_MAX];
+
+ ExecContext exec_context;
+ KillContext kill_context;
+ CGroupContext cgroup_context;
+
+ ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
+
+ MountState state, deserialized_state;
+
+ ExecCommand* control_command;
+ MountExecCommand control_command_id;
+ pid_t control_pid;
+
+ sd_event_source *timer_event_source;
+
+ unsigned n_retry_umount;
+};
+
+extern const UnitVTable mount_vtable;
+
+void mount_fd_event(Manager *m, int events);
+
+const char* mount_exec_command_to_string(MountExecCommand i) _const_;
+MountExecCommand mount_exec_command_from_string(const char *s) _pure_;
+
+const char* mount_result_to_string(MountResult i) _const_;
+MountResult mount_result_from_string(const char *s) _pure_;
+
+DEFINE_CAST(MOUNT, Mount);
diff --git a/src/core/namespace.c b/src/core/namespace.c
new file mode 100644
index 0000000..cdf427a
--- /dev/null
+++ b/src/core/namespace.c
@@ -0,0 +1,2384 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <linux/loop.h>
+#include <sched.h>
+#include <stdio.h>
+#include <sys/mount.h>
+#include <unistd.h>
+#include <linux/fs.h>
+
+#include "alloc-util.h"
+#include "base-filesystem.h"
+#include "dev-setup.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "label.h"
+#include "list.h"
+#include "loop-util.h"
+#include "loopback-setup.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "namespace-util.h"
+#include "namespace.h"
+#include "nulstr-util.h"
+#include "path-util.h"
+#include "selinux-util.h"
+#include "socket-util.h"
+#include "sort-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+
+#define DEV_MOUNT_OPTIONS (MS_NOSUID|MS_STRICTATIME|MS_NOEXEC)
+
+typedef enum MountMode {
+ /* This is ordered by priority! */
+ INACCESSIBLE,
+ MOUNT_IMAGES,
+ BIND_MOUNT,
+ BIND_MOUNT_RECURSIVE,
+ PRIVATE_TMP,
+ PRIVATE_TMP_READONLY,
+ PRIVATE_DEV,
+ BIND_DEV,
+ EMPTY_DIR,
+ SYSFS,
+ PROCFS,
+ READONLY,
+ READWRITE,
+ TMPFS,
+ READWRITE_IMPLICIT, /* Should have the lowest priority. */
+ _MOUNT_MODE_MAX,
+} MountMode;
+
+typedef struct MountEntry {
+ const char *path_const; /* Memory allocated on stack or static */
+ MountMode mode:5;
+ bool ignore:1; /* Ignore if path does not exist? */
+ bool has_prefix:1; /* Already is prefixed by the root dir? */
+ bool read_only:1; /* Shall this mount point be read-only? */
+ bool nosuid:1; /* Shall set MS_NOSUID on the mount itself */
+ bool applied:1; /* Already applied */
+ char *path_malloc; /* Use this instead of 'path_const' if we had to allocate memory */
+ const char *source_const; /* The source path, for bind mounts or images */
+ char *source_malloc;
+ const char *options_const;/* Mount options for tmpfs */
+ char *options_malloc;
+ unsigned long flags; /* Mount flags used by EMPTY_DIR and TMPFS. Do not include MS_RDONLY here, but please use read_only. */
+ unsigned n_followed;
+ LIST_HEAD(MountOptions, image_options);
+} MountEntry;
+
+/* If MountAPIVFS= is used, let's mount /sys and /proc into the it, but only as a fallback if the user hasn't mounted
+ * something there already. These mounts are hence overridden by any other explicitly configured mounts. */
+static const MountEntry apivfs_table[] = {
+ { "/proc", PROCFS, false },
+ { "/dev", BIND_DEV, false },
+ { "/sys", SYSFS, false },
+};
+
+/* ProtectKernelTunables= option and the related filesystem APIs */
+static const MountEntry protect_kernel_tunables_table[] = {
+ { "/proc/acpi", READONLY, true },
+ { "/proc/apm", READONLY, true }, /* Obsolete API, there's no point in permitting access to this, ever */
+ { "/proc/asound", READONLY, true },
+ { "/proc/bus", READONLY, true },
+ { "/proc/fs", READONLY, true },
+ { "/proc/irq", READONLY, true },
+ { "/proc/kallsyms", INACCESSIBLE, true },
+ { "/proc/kcore", INACCESSIBLE, true },
+ { "/proc/latency_stats", READONLY, true },
+ { "/proc/mtrr", READONLY, true },
+ { "/proc/scsi", READONLY, true },
+ { "/proc/sys", READONLY, true },
+ { "/proc/sysrq-trigger", READONLY, true },
+ { "/proc/timer_stats", READONLY, true },
+ { "/sys", READONLY, false },
+ { "/sys/fs/bpf", READONLY, true },
+ { "/sys/fs/cgroup", READWRITE_IMPLICIT, false }, /* READONLY is set by ProtectControlGroups= option */
+ { "/sys/fs/selinux", READWRITE_IMPLICIT, true },
+ { "/sys/kernel/debug", READONLY, true },
+ { "/sys/kernel/tracing", READONLY, true },
+};
+
+/* ProtectKernelModules= option */
+static const MountEntry protect_kernel_modules_table[] = {
+#if HAVE_SPLIT_USR
+ { "/lib/modules", INACCESSIBLE, true },
+#endif
+ { "/usr/lib/modules", INACCESSIBLE, true },
+};
+
+/* ProtectKernelLogs= option */
+static const MountEntry protect_kernel_logs_table[] = {
+ { "/proc/kmsg", INACCESSIBLE, true },
+ { "/dev/kmsg", INACCESSIBLE, true },
+};
+
+/*
+ * ProtectHome=read-only table, protect $HOME and $XDG_RUNTIME_DIR and rest of
+ * system should be protected by ProtectSystem=
+ */
+static const MountEntry protect_home_read_only_table[] = {
+ { "/home", READONLY, true },
+ { "/run/user", READONLY, true },
+ { "/root", READONLY, true },
+};
+
+/* ProtectHome=tmpfs table */
+static const MountEntry protect_home_tmpfs_table[] = {
+ { "/home", TMPFS, true, .read_only = true, .options_const = "mode=0755" TMPFS_LIMITS_EMPTY_OR_ALMOST, .flags = MS_NODEV|MS_STRICTATIME },
+ { "/run/user", TMPFS, true, .read_only = true, .options_const = "mode=0755" TMPFS_LIMITS_EMPTY_OR_ALMOST, .flags = MS_NODEV|MS_STRICTATIME },
+ { "/root", TMPFS, true, .read_only = true, .options_const = "mode=0700" TMPFS_LIMITS_EMPTY_OR_ALMOST, .flags = MS_NODEV|MS_STRICTATIME },
+};
+
+/* ProtectHome=yes table */
+static const MountEntry protect_home_yes_table[] = {
+ { "/home", INACCESSIBLE, true },
+ { "/run/user", INACCESSIBLE, true },
+ { "/root", INACCESSIBLE, true },
+};
+
+/* ProtectSystem=yes table */
+static const MountEntry protect_system_yes_table[] = {
+ { "/usr", READONLY, false },
+ { "/boot", READONLY, true },
+ { "/efi", READONLY, true },
+#if HAVE_SPLIT_USR
+ { "/lib", READONLY, true },
+ { "/lib64", READONLY, true },
+ { "/bin", READONLY, true },
+# if HAVE_SPLIT_BIN
+ { "/sbin", READONLY, true },
+# endif
+#endif
+};
+
+/* ProtectSystem=full includes ProtectSystem=yes */
+static const MountEntry protect_system_full_table[] = {
+ { "/usr", READONLY, false },
+ { "/boot", READONLY, true },
+ { "/efi", READONLY, true },
+ { "/etc", READONLY, false },
+#if HAVE_SPLIT_USR
+ { "/lib", READONLY, true },
+ { "/lib64", READONLY, true },
+ { "/bin", READONLY, true },
+# if HAVE_SPLIT_BIN
+ { "/sbin", READONLY, true },
+# endif
+#endif
+};
+
+/*
+ * ProtectSystem=strict table. In this strict mode, we mount everything
+ * read-only, except for /proc, /dev, /sys which are the kernel API VFS,
+ * which are left writable, but PrivateDevices= + ProtectKernelTunables=
+ * protect those, and these options should be fully orthogonal.
+ * (And of course /home and friends are also left writable, as ProtectHome=
+ * shall manage those, orthogonally).
+ */
+static const MountEntry protect_system_strict_table[] = {
+ { "/", READONLY, false },
+ { "/proc", READWRITE_IMPLICIT, false }, /* ProtectKernelTunables= */
+ { "/sys", READWRITE_IMPLICIT, false }, /* ProtectKernelTunables= */
+ { "/dev", READWRITE_IMPLICIT, false }, /* PrivateDevices= */
+ { "/home", READWRITE_IMPLICIT, true }, /* ProtectHome= */
+ { "/run/user", READWRITE_IMPLICIT, true }, /* ProtectHome= */
+ { "/root", READWRITE_IMPLICIT, true }, /* ProtectHome= */
+};
+
+static const char * const mount_mode_table[_MOUNT_MODE_MAX] = {
+ [INACCESSIBLE] = "inaccessible",
+ [BIND_MOUNT] = "bind",
+ [BIND_MOUNT_RECURSIVE] = "rbind",
+ [PRIVATE_TMP] = "private-tmp",
+ [PRIVATE_DEV] = "private-dev",
+ [BIND_DEV] = "bind-dev",
+ [EMPTY_DIR] = "empty",
+ [SYSFS] = "sysfs",
+ [PROCFS] = "procfs",
+ [READONLY] = "read-only",
+ [READWRITE] = "read-write",
+ [TMPFS] = "tmpfs",
+ [MOUNT_IMAGES] = "mount-images",
+ [READWRITE_IMPLICIT] = "rw-implicit",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(mount_mode, MountMode);
+
+static const char *mount_entry_path(const MountEntry *p) {
+ assert(p);
+
+ /* Returns the path of this bind mount. If the malloc()-allocated ->path_buffer field is set we return that,
+ * otherwise the stack/static ->path field is returned. */
+
+ return p->path_malloc ?: p->path_const;
+}
+
+static bool mount_entry_read_only(const MountEntry *p) {
+ assert(p);
+
+ return p->read_only || IN_SET(p->mode, READONLY, INACCESSIBLE, PRIVATE_TMP_READONLY);
+}
+
+static const char *mount_entry_source(const MountEntry *p) {
+ assert(p);
+
+ return p->source_malloc ?: p->source_const;
+}
+
+static const char *mount_entry_options(const MountEntry *p) {
+ assert(p);
+
+ return p->options_malloc ?: p->options_const;
+}
+
+static void mount_entry_done(MountEntry *p) {
+ assert(p);
+
+ p->path_malloc = mfree(p->path_malloc);
+ p->source_malloc = mfree(p->source_malloc);
+ p->options_malloc = mfree(p->options_malloc);
+ p->image_options = mount_options_free_all(p->image_options);
+}
+
+static int append_access_mounts(MountEntry **p, char **strv, MountMode mode, bool forcibly_require_prefix) {
+ char **i;
+
+ assert(p);
+
+ /* Adds a list of user-supplied READWRITE/READWRITE_IMPLICIT/READONLY/INACCESSIBLE entries */
+
+ STRV_FOREACH(i, strv) {
+ bool ignore = false, needs_prefix = false;
+ const char *e = *i;
+
+ /* Look for any prefixes */
+ if (startswith(e, "-")) {
+ e++;
+ ignore = true;
+ }
+ if (startswith(e, "+")) {
+ e++;
+ needs_prefix = true;
+ }
+
+ if (!path_is_absolute(e))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path is not absolute: %s", e);
+
+ *((*p)++) = (MountEntry) {
+ .path_const = e,
+ .mode = mode,
+ .ignore = ignore,
+ .has_prefix = !needs_prefix && !forcibly_require_prefix,
+ };
+ }
+
+ return 0;
+}
+
+static int append_empty_dir_mounts(MountEntry **p, char **strv) {
+ char **i;
+
+ assert(p);
+
+ /* Adds tmpfs mounts to provide readable but empty directories. This is primarily used to implement the
+ * "/private/" boundary directories for DynamicUser=1. */
+
+ STRV_FOREACH(i, strv) {
+
+ *((*p)++) = (MountEntry) {
+ .path_const = *i,
+ .mode = EMPTY_DIR,
+ .ignore = false,
+ .read_only = true,
+ .options_const = "mode=755" TMPFS_LIMITS_EMPTY_OR_ALMOST,
+ .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
+ };
+ }
+
+ return 0;
+}
+
+static int append_bind_mounts(MountEntry **p, const BindMount *binds, size_t n) {
+ size_t i;
+
+ assert(p);
+
+ for (i = 0; i < n; i++) {
+ const BindMount *b = binds + i;
+
+ *((*p)++) = (MountEntry) {
+ .path_const = b->destination,
+ .mode = b->recursive ? BIND_MOUNT_RECURSIVE : BIND_MOUNT,
+ .read_only = b->read_only,
+ .nosuid = b->nosuid,
+ .source_const = b->source,
+ .ignore = b->ignore_enoent,
+ };
+ }
+
+ return 0;
+}
+
+static int append_mount_images(MountEntry **p, const MountImage *mount_images, size_t n) {
+ assert(p);
+
+ for (size_t i = 0; i < n; i++) {
+ const MountImage *m = mount_images + i;
+
+ *((*p)++) = (MountEntry) {
+ .path_const = m->destination,
+ .mode = MOUNT_IMAGES,
+ .source_const = m->source,
+ .image_options = m->mount_options,
+ .ignore = m->ignore_enoent,
+ };
+ }
+
+ return 0;
+}
+
+static int append_tmpfs_mounts(MountEntry **p, const TemporaryFileSystem *tmpfs, size_t n) {
+ assert(p);
+
+ for (size_t i = 0; i < n; i++) {
+ const TemporaryFileSystem *t = tmpfs + i;
+ _cleanup_free_ char *o = NULL, *str = NULL;
+ unsigned long flags;
+ bool ro = false;
+ int r;
+
+ if (!path_is_absolute(t->path))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path is not absolute: %s",
+ t->path);
+
+ str = strjoin("mode=0755" NESTED_TMPFS_LIMITS ",", t->options);
+ if (!str)
+ return -ENOMEM;
+
+ r = mount_option_mangle(str, MS_NODEV|MS_STRICTATIME, &flags, &o);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse mount option '%s': %m", str);
+
+ ro = flags & MS_RDONLY;
+ if (ro)
+ flags ^= MS_RDONLY;
+
+ *((*p)++) = (MountEntry) {
+ .path_const = t->path,
+ .mode = TMPFS,
+ .read_only = ro,
+ .options_malloc = TAKE_PTR(o),
+ .flags = flags,
+ };
+ }
+
+ return 0;
+}
+
+static int append_static_mounts(MountEntry **p, const MountEntry *mounts, size_t n, bool ignore_protect) {
+ size_t i;
+
+ assert(p);
+ assert(mounts);
+
+ /* Adds a list of static pre-defined entries */
+
+ for (i = 0; i < n; i++)
+ *((*p)++) = (MountEntry) {
+ .path_const = mount_entry_path(mounts+i),
+ .mode = mounts[i].mode,
+ .ignore = mounts[i].ignore || ignore_protect,
+ };
+
+ return 0;
+}
+
+static int append_protect_home(MountEntry **p, ProtectHome protect_home, bool ignore_protect) {
+ assert(p);
+
+ switch (protect_home) {
+
+ case PROTECT_HOME_NO:
+ return 0;
+
+ case PROTECT_HOME_READ_ONLY:
+ return append_static_mounts(p, protect_home_read_only_table, ELEMENTSOF(protect_home_read_only_table), ignore_protect);
+
+ case PROTECT_HOME_TMPFS:
+ return append_static_mounts(p, protect_home_tmpfs_table, ELEMENTSOF(protect_home_tmpfs_table), ignore_protect);
+
+ case PROTECT_HOME_YES:
+ return append_static_mounts(p, protect_home_yes_table, ELEMENTSOF(protect_home_yes_table), ignore_protect);
+
+ default:
+ assert_not_reached("Unexpected ProtectHome= value");
+ }
+}
+
+static int append_protect_system(MountEntry **p, ProtectSystem protect_system, bool ignore_protect) {
+ assert(p);
+
+ switch (protect_system) {
+
+ case PROTECT_SYSTEM_NO:
+ return 0;
+
+ case PROTECT_SYSTEM_STRICT:
+ return append_static_mounts(p, protect_system_strict_table, ELEMENTSOF(protect_system_strict_table), ignore_protect);
+
+ case PROTECT_SYSTEM_YES:
+ return append_static_mounts(p, protect_system_yes_table, ELEMENTSOF(protect_system_yes_table), ignore_protect);
+
+ case PROTECT_SYSTEM_FULL:
+ return append_static_mounts(p, protect_system_full_table, ELEMENTSOF(protect_system_full_table), ignore_protect);
+
+ default:
+ assert_not_reached("Unexpected ProtectSystem= value");
+ }
+}
+
+static int mount_path_compare(const MountEntry *a, const MountEntry *b) {
+ int d;
+
+ /* If the paths are not equal, then order prefixes first */
+ d = path_compare(mount_entry_path(a), mount_entry_path(b));
+ if (d != 0)
+ return d;
+
+ /* If the paths are equal, check the mode */
+ return CMP((int) a->mode, (int) b->mode);
+}
+
+static int prefix_where_needed(MountEntry *m, size_t n, const char *root_directory) {
+ size_t i;
+
+ /* Prefixes all paths in the bind mount table with the root directory if the entry needs that. */
+
+ for (i = 0; i < n; i++) {
+ char *s;
+
+ if (m[i].has_prefix)
+ continue;
+
+ s = path_join(root_directory, mount_entry_path(m+i));
+ if (!s)
+ return -ENOMEM;
+
+ free_and_replace(m[i].path_malloc, s);
+ m[i].has_prefix = true;
+ }
+
+ return 0;
+}
+
+static void drop_duplicates(MountEntry *m, size_t *n) {
+ MountEntry *f, *t, *previous;
+
+ assert(m);
+ assert(n);
+
+ /* Drops duplicate entries. Expects that the array is properly ordered already. */
+
+ for (f = m, t = m, previous = NULL; f < m + *n; f++) {
+
+ /* The first one wins (which is the one with the more restrictive mode), see mount_path_compare()
+ * above. Note that we only drop duplicates that haven't been mounted yet. */
+ if (previous &&
+ path_equal(mount_entry_path(f), mount_entry_path(previous)) &&
+ !f->applied && !previous->applied) {
+ log_debug("%s (%s) is duplicate.", mount_entry_path(f), mount_mode_to_string(f->mode));
+ previous->read_only = previous->read_only || mount_entry_read_only(f); /* Propagate the read-only flag to the remaining entry */
+ mount_entry_done(f);
+ continue;
+ }
+
+ *t = *f;
+ previous = t;
+ t++;
+ }
+
+ *n = t - m;
+}
+
+static void drop_inaccessible(MountEntry *m, size_t *n) {
+ MountEntry *f, *t;
+ const char *clear = NULL;
+
+ assert(m);
+ assert(n);
+
+ /* Drops all entries obstructed by another entry further up the tree. Expects that the array is properly
+ * ordered already. */
+
+ for (f = m, t = m; f < m + *n; f++) {
+
+ /* If we found a path set for INACCESSIBLE earlier, and this entry has it as prefix we should drop
+ * it, as inaccessible paths really should drop the entire subtree. */
+ if (clear && path_startswith(mount_entry_path(f), clear)) {
+ log_debug("%s is masked by %s.", mount_entry_path(f), clear);
+ mount_entry_done(f);
+ continue;
+ }
+
+ clear = f->mode == INACCESSIBLE ? mount_entry_path(f) : NULL;
+
+ *t = *f;
+ t++;
+ }
+
+ *n = t - m;
+}
+
+static void drop_nop(MountEntry *m, size_t *n) {
+ MountEntry *f, *t;
+
+ assert(m);
+ assert(n);
+
+ /* Drops all entries which have an immediate parent that has the same type, as they are redundant. Assumes the
+ * list is ordered by prefixes. */
+
+ for (f = m, t = m; f < m + *n; f++) {
+
+ /* Only suppress such subtrees for READONLY, READWRITE and READWRITE_IMPLICIT entries */
+ if (IN_SET(f->mode, READONLY, READWRITE, READWRITE_IMPLICIT)) {
+ MountEntry *p;
+ bool found = false;
+
+ /* Now let's find the first parent of the entry we are looking at. */
+ for (p = t-1; p >= m; p--) {
+ if (path_startswith(mount_entry_path(f), mount_entry_path(p))) {
+ found = true;
+ break;
+ }
+ }
+
+ /* We found it, let's see if it's the same mode, if so, we can drop this entry */
+ if (found && p->mode == f->mode) {
+ log_debug("%s (%s) is made redundant by %s (%s)",
+ mount_entry_path(f), mount_mode_to_string(f->mode),
+ mount_entry_path(p), mount_mode_to_string(p->mode));
+ mount_entry_done(f);
+ continue;
+ }
+ }
+
+ *t = *f;
+ t++;
+ }
+
+ *n = t - m;
+}
+
+static void drop_outside_root(const char *root_directory, MountEntry *m, size_t *n) {
+ MountEntry *f, *t;
+
+ assert(m);
+ assert(n);
+
+ /* Nothing to do */
+ if (!root_directory)
+ return;
+
+ /* Drops all mounts that are outside of the root directory. */
+
+ for (f = m, t = m; f < m + *n; f++) {
+
+ if (!path_startswith(mount_entry_path(f), root_directory)) {
+ log_debug("%s is outside of root directory.", mount_entry_path(f));
+ mount_entry_done(f);
+ continue;
+ }
+
+ *t = *f;
+ t++;
+ }
+
+ *n = t - m;
+}
+
+static int clone_device_node(
+ const char *d,
+ const char *temporary_mount,
+ bool *make_devnode) {
+
+ _cleanup_free_ char *sl = NULL;
+ const char *dn, *bn, *t;
+ struct stat st;
+ int r;
+
+ if (stat(d, &st) < 0) {
+ if (errno == ENOENT) {
+ log_debug_errno(errno, "Device node '%s' to clone does not exist, ignoring.", d);
+ return -ENXIO;
+ }
+
+ return log_debug_errno(errno, "Failed to stat() device node '%s' to clone, ignoring: %m", d);
+ }
+
+ if (!S_ISBLK(st.st_mode) &&
+ !S_ISCHR(st.st_mode))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Device node '%s' to clone is not a device node, ignoring.",
+ d);
+
+ dn = strjoina(temporary_mount, d);
+
+ /* First, try to create device node properly */
+ if (*make_devnode) {
+ mac_selinux_create_file_prepare(d, st.st_mode);
+ r = mknod(dn, st.st_mode, st.st_rdev);
+ mac_selinux_create_file_clear();
+ if (r >= 0)
+ goto add_symlink;
+ if (errno != EPERM)
+ return log_debug_errno(errno, "mknod failed for %s: %m", d);
+
+ /* This didn't work, let's not try this again for the next iterations. */
+ *make_devnode = false;
+ }
+
+ /* We're about to fall back to bind-mounting the device
+ * node. So create a dummy bind-mount target.
+ * Do not prepare device-node SELinux label (see issue 13762) */
+ r = mknod(dn, S_IFREG, 0);
+ if (r < 0 && errno != EEXIST)
+ return log_debug_errno(errno, "mknod() fallback failed for '%s': %m", d);
+
+ /* Fallback to bind-mounting: The assumption here is that all used device nodes carry standard
+ * properties. Specifically, the devices nodes we bind-mount should either be owned by root:root or
+ * root:tty (e.g. /dev/tty, /dev/ptmx) and should not carry ACLs. */
+ r = mount_nofollow_verbose(LOG_DEBUG, d, dn, NULL, MS_BIND, NULL);
+ if (r < 0)
+ return r;
+
+add_symlink:
+ bn = path_startswith(d, "/dev/");
+ if (!bn)
+ return 0;
+
+ /* Create symlinks like /dev/char/1:9 → ../urandom */
+ if (asprintf(&sl, "%s/dev/%s/%u:%u",
+ temporary_mount,
+ S_ISCHR(st.st_mode) ? "char" : "block",
+ major(st.st_rdev), minor(st.st_rdev)) < 0)
+ return log_oom();
+
+ (void) mkdir_parents(sl, 0755);
+
+ t = strjoina("../", bn);
+ if (symlink(t, sl) < 0)
+ log_debug_errno(errno, "Failed to symlink '%s' to '%s', ignoring: %m", t, sl);
+
+ return 0;
+}
+
+static int mount_private_dev(MountEntry *m) {
+ static const char devnodes[] =
+ "/dev/null\0"
+ "/dev/zero\0"
+ "/dev/full\0"
+ "/dev/random\0"
+ "/dev/urandom\0"
+ "/dev/tty\0";
+
+ char temporary_mount[] = "/tmp/namespace-dev-XXXXXX";
+ const char *d, *dev = NULL, *devpts = NULL, *devshm = NULL, *devhugepages = NULL, *devmqueue = NULL, *devlog = NULL, *devptmx = NULL;
+ bool can_mknod = true;
+ _cleanup_umask_ mode_t u;
+ int r;
+
+ assert(m);
+
+ u = umask(0000);
+
+ if (!mkdtemp(temporary_mount))
+ return log_debug_errno(errno, "Failed to create temporary directory '%s': %m", temporary_mount);
+
+ dev = strjoina(temporary_mount, "/dev");
+ (void) mkdir(dev, 0755);
+ r = mount_nofollow_verbose(LOG_DEBUG, "tmpfs", dev, "tmpfs", DEV_MOUNT_OPTIONS, "mode=755" TMPFS_LIMITS_DEV);
+ if (r < 0)
+ goto fail;
+
+ r = label_fix_container(dev, "/dev", 0);
+ if (r < 0) {
+ log_debug_errno(errno, "Failed to fix label of '%s' as /dev: %m", dev);
+ goto fail;
+ }
+
+ devpts = strjoina(temporary_mount, "/dev/pts");
+ (void) mkdir(devpts, 0755);
+ r = mount_nofollow_verbose(LOG_DEBUG, "/dev/pts", devpts, NULL, MS_BIND, NULL);
+ if (r < 0)
+ goto fail;
+
+ /* /dev/ptmx can either be a device node or a symlink to /dev/pts/ptmx.
+ * When /dev/ptmx a device node, /dev/pts/ptmx has 000 permissions making it inaccessible.
+ * Thus, in that case make a clone.
+ * In nspawn and other containers it will be a symlink, in that case make it a symlink. */
+ r = is_symlink("/dev/ptmx");
+ if (r < 0) {
+ log_debug_errno(r, "Failed to detect whether /dev/ptmx is a symlink or not: %m");
+ goto fail;
+ } else if (r > 0) {
+ devptmx = strjoina(temporary_mount, "/dev/ptmx");
+ if (symlink("pts/ptmx", devptmx) < 0) {
+ r = log_debug_errno(errno, "Failed to create a symlink '%s' to pts/ptmx: %m", devptmx);
+ goto fail;
+ }
+ } else {
+ r = clone_device_node("/dev/ptmx", temporary_mount, &can_mknod);
+ if (r < 0)
+ goto fail;
+ }
+
+ devshm = strjoina(temporary_mount, "/dev/shm");
+ (void) mkdir(devshm, 0755);
+ r = mount_nofollow_verbose(LOG_DEBUG, "/dev/shm", devshm, NULL, MS_BIND, NULL);
+ if (r < 0)
+ goto fail;
+
+ devmqueue = strjoina(temporary_mount, "/dev/mqueue");
+ (void) mkdir(devmqueue, 0755);
+ (void) mount_nofollow_verbose(LOG_DEBUG, "/dev/mqueue", devmqueue, NULL, MS_BIND, NULL);
+
+ devhugepages = strjoina(temporary_mount, "/dev/hugepages");
+ (void) mkdir(devhugepages, 0755);
+ (void) mount_nofollow_verbose(LOG_DEBUG, "/dev/hugepages", devhugepages, NULL, MS_BIND, NULL);
+
+ devlog = strjoina(temporary_mount, "/dev/log");
+ if (symlink("/run/systemd/journal/dev-log", devlog) < 0)
+ log_debug_errno(errno, "Failed to create a symlink '%s' to /run/systemd/journal/dev-log, ignoring: %m", devlog);
+
+ NULSTR_FOREACH(d, devnodes) {
+ r = clone_device_node(d, temporary_mount, &can_mknod);
+ /* ENXIO means the *source* is not a device file, skip creation in that case */
+ if (r < 0 && r != -ENXIO)
+ goto fail;
+ }
+
+ r = dev_setup(temporary_mount, UID_INVALID, GID_INVALID);
+ if (r < 0)
+ log_debug_errno(r, "Failed to set up basic device tree at '%s', ignoring: %m", temporary_mount);
+
+ /* Create the /dev directory if missing. It is more likely to be
+ * missing when the service is started with RootDirectory. This is
+ * consistent with mount units creating the mount points when missing.
+ */
+ (void) mkdir_p_label(mount_entry_path(m), 0755);
+
+ /* Unmount everything in old /dev */
+ r = umount_recursive(mount_entry_path(m), 0);
+ if (r < 0)
+ log_debug_errno(r, "Failed to unmount directories below '%s', ignoring: %m", mount_entry_path(m));
+
+ r = mount_nofollow_verbose(LOG_DEBUG, dev, mount_entry_path(m), NULL, MS_MOVE, NULL);
+ if (r < 0)
+ goto fail;
+
+ (void) rmdir(dev);
+ (void) rmdir(temporary_mount);
+
+ return 0;
+
+fail:
+ if (devpts)
+ (void) umount_verbose(LOG_DEBUG, devpts, UMOUNT_NOFOLLOW);
+
+ if (devshm)
+ (void) umount_verbose(LOG_DEBUG, devshm, UMOUNT_NOFOLLOW);
+
+ if (devhugepages)
+ (void) umount_verbose(LOG_DEBUG, devhugepages, UMOUNT_NOFOLLOW);
+
+ if (devmqueue)
+ (void) umount_verbose(LOG_DEBUG, devmqueue, UMOUNT_NOFOLLOW);
+
+ (void) umount_verbose(LOG_DEBUG, dev, UMOUNT_NOFOLLOW);
+ (void) rmdir(dev);
+ (void) rmdir(temporary_mount);
+
+ return r;
+}
+
+static int mount_bind_dev(const MountEntry *m) {
+ int r;
+
+ assert(m);
+
+ /* Implements the little brother of mount_private_dev(): simply bind mounts the host's /dev into the service's
+ * /dev. This is only used when RootDirectory= is set. */
+
+ (void) mkdir_p_label(mount_entry_path(m), 0755);
+
+ r = path_is_mount_point(mount_entry_path(m), NULL, 0);
+ if (r < 0)
+ return log_debug_errno(r, "Unable to determine whether /dev is already mounted: %m");
+ if (r > 0) /* make this a NOP if /dev is already a mount point */
+ return 0;
+
+ r = mount_nofollow_verbose(LOG_DEBUG, "/dev", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int mount_sysfs(const MountEntry *m) {
+ int r;
+
+ assert(m);
+
+ (void) mkdir_p_label(mount_entry_path(m), 0755);
+
+ r = path_is_mount_point(mount_entry_path(m), NULL, 0);
+ if (r < 0)
+ return log_debug_errno(r, "Unable to determine whether /sys is already mounted: %m");
+ if (r > 0) /* make this a NOP if /sys is already a mount point */
+ return 0;
+
+ /* Bind mount the host's version so that we get all child mounts of it, too. */
+ r = mount_nofollow_verbose(LOG_DEBUG, "/sys", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
+ const char *entry_path;
+ int r;
+
+ assert(m);
+ assert(ns_info);
+
+ entry_path = mount_entry_path(m);
+
+ /* Mount a new instance, so that we get the one that matches our user namespace, if we are running in
+ * one. i.e we don't reuse existing mounts here under any condition, we want a new instance owned by
+ * our user namespace and with our hidepid= settings applied. Hence, let's get rid of everything
+ * mounted on /proc/ first. */
+
+ (void) mkdir_p_label(entry_path, 0755);
+ (void) umount_recursive(entry_path, 0);
+
+ if (ns_info->protect_proc != PROTECT_PROC_DEFAULT ||
+ ns_info->proc_subset != PROC_SUBSET_ALL) {
+ _cleanup_free_ char *opts = NULL;
+
+ /* Starting with kernel 5.8 procfs' hidepid= logic is truly per-instance (previously it
+ * pretended to be per-instance but actually was per-namespace), hence let's make use of it
+ * if requested. To make sure this logic succeeds only on kernels where hidepid= is
+ * per-instance, we'll exclusively use the textual value for hidepid=, since support was
+ * added in the same commit: if it's supported it is thus also per-instance. */
+
+ opts = strjoin("hidepid=",
+ ns_info->protect_proc == PROTECT_PROC_DEFAULT ? "off" :
+ protect_proc_to_string(ns_info->protect_proc),
+ ns_info->proc_subset == PROC_SUBSET_PID ? ",subset=pid" : "");
+ if (!opts)
+ return -ENOMEM;
+
+ r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, opts);
+ if (r < 0) {
+ if (r != -EINVAL)
+ return r;
+
+ /* If this failed with EINVAL then this likely means the textual hidepid= stuff is
+ * not supported by the kernel, and thus the per-instance hidepid= neither, which
+ * means we really don't want to use it, since it would affect our host's /proc
+ * mount. Hence let's gracefully fallback to a classic, unrestricted version. */
+ } else
+ return 1;
+ }
+
+ r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int mount_tmpfs(const MountEntry *m) {
+ const char *entry_path, *inner_path;
+ int r;
+
+ assert(m);
+
+ entry_path = mount_entry_path(m);
+ inner_path = m->path_const;
+
+ /* First, get rid of everything that is below if there is anything. Then, overmount with our new tmpfs */
+
+ (void) mkdir_p_label(entry_path, 0755);
+ (void) umount_recursive(entry_path, 0);
+
+ r = mount_nofollow_verbose(LOG_DEBUG, "tmpfs", entry_path, "tmpfs", m->flags, mount_entry_options(m));
+ if (r < 0)
+ return r;
+
+ r = label_fix_container(entry_path, inner_path, 0);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to fix label of '%s' as '%s': %m", entry_path, inner_path);
+
+ return 1;
+}
+
+static int mount_images(const MountEntry *m) {
+ _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
+ _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL;
+ _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
+ _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
+ DissectImageFlags dissect_image_flags;
+ int r;
+
+ assert(m);
+
+ r = verity_settings_load(&verity, mount_entry_source(m), NULL, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to load root hash: %m");
+
+ dissect_image_flags =
+ (m->read_only ? DISSECT_IMAGE_READ_ONLY : 0) |
+ (verity.data_path ? DISSECT_IMAGE_NO_PARTITION_TABLE : 0);
+
+ r = loop_device_make_by_path(
+ mount_entry_source(m),
+ m->read_only ? O_RDONLY : -1 /* < 0 means writable if possible, read-only as fallback */,
+ verity.data_path ? 0 : LO_FLAGS_PARTSCAN,
+ &loop_device);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create loop device for image: %m");
+
+ r = dissect_image(
+ loop_device->fd,
+ &verity,
+ m->image_options,
+ dissect_image_flags,
+ &dissected_image);
+ /* No partition table? Might be a single-filesystem image, try again */
+ if (!verity.data_path && r == -ENOPKG)
+ r = dissect_image(
+ loop_device->fd,
+ &verity,
+ m->image_options,
+ dissect_image_flags|DISSECT_IMAGE_NO_PARTITION_TABLE,
+ &dissected_image);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to dissect image: %m");
+
+ r = dissected_image_decrypt(
+ dissected_image,
+ NULL,
+ &verity,
+ dissect_image_flags,
+ &decrypted_image);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to decrypt dissected image: %m");
+
+ r = mkdir_p_label(mount_entry_path(m), 0755);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create destination directory %s: %m", mount_entry_path(m));
+ r = umount_recursive(mount_entry_path(m), 0);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to umount under destination directory %s: %m", mount_entry_path(m));
+
+ r = dissected_image_mount(dissected_image, mount_entry_path(m), UID_INVALID, dissect_image_flags);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to mount image: %m");
+
+ if (decrypted_image) {
+ r = decrypted_image_relinquish(decrypted_image);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to relinquish decrypted image: %m");
+ }
+
+ loop_device_relinquish(loop_device);
+
+ return 1;
+}
+
+static int follow_symlink(
+ const char *root_directory,
+ MountEntry *m) {
+
+ _cleanup_free_ char *target = NULL;
+ int r;
+
+ /* Let's chase symlinks, but only one step at a time. That's because depending where the symlink points we
+ * might need to change the order in which we mount stuff. Hence: let's normalize piecemeal, and do one step at
+ * a time by specifying CHASE_STEP. This function returns 0 if we resolved one step, and > 0 if we reached the
+ * end and already have a fully normalized name. */
+
+ r = chase_symlinks(mount_entry_path(m), root_directory, CHASE_STEP|CHASE_NONEXISTENT, &target, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to chase symlinks '%s': %m", mount_entry_path(m));
+ if (r > 0) /* Reached the end, nothing more to resolve */
+ return 1;
+
+ if (m->n_followed >= CHASE_SYMLINKS_MAX) /* put a boundary on things */
+ return log_debug_errno(SYNTHETIC_ERRNO(ELOOP),
+ "Symlink loop on '%s'.",
+ mount_entry_path(m));
+
+ log_debug("Followed mount entry path symlink %s → %s.", mount_entry_path(m), target);
+
+ free_and_replace(m->path_malloc, target);
+ m->has_prefix = true;
+
+ m->n_followed ++;
+
+ return 0;
+}
+
+static int apply_mount(
+ const char *root_directory,
+ MountEntry *m,
+ const NamespaceInfo *ns_info) {
+
+ _cleanup_free_ char *inaccessible = NULL;
+ bool rbind = true, make = false;
+ const char *what;
+ int r;
+
+ assert(m);
+ assert(ns_info);
+
+ log_debug("Applying namespace mount on %s", mount_entry_path(m));
+
+ switch (m->mode) {
+
+ case INACCESSIBLE: {
+ _cleanup_free_ char *tmp = NULL;
+ const char *runtime_dir;
+ struct stat target;
+
+ /* First, get rid of everything that is below if there
+ * is anything... Then, overmount it with an
+ * inaccessible path. */
+ (void) umount_recursive(mount_entry_path(m), 0);
+
+ if (lstat(mount_entry_path(m), &target) < 0) {
+ if (errno == ENOENT && m->ignore)
+ return 0;
+
+ return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m",
+ mount_entry_path(m));
+ }
+
+ if (geteuid() == 0)
+ runtime_dir = "/run";
+ else {
+ if (asprintf(&tmp, "/run/user/" UID_FMT, geteuid()) < 0)
+ return -ENOMEM;
+
+ runtime_dir = tmp;
+ }
+
+ r = mode_to_inaccessible_node(runtime_dir, target.st_mode, &inaccessible);
+ if (r < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(ELOOP),
+ "File type not supported for inaccessible mounts. Note that symlinks are not allowed");
+ what = inaccessible;
+ break;
+ }
+
+ case READONLY:
+ case READWRITE:
+ case READWRITE_IMPLICIT:
+ r = path_is_mount_point(mount_entry_path(m), root_directory, 0);
+ if (r == -ENOENT && m->ignore)
+ return 0;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m",
+ mount_entry_path(m));
+ if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY
+ * bit for the mount point if needed. */
+ return 0;
+ /* This isn't a mount point yet, let's make it one. */
+ what = mount_entry_path(m);
+ break;
+
+ case BIND_MOUNT:
+ rbind = false;
+
+ _fallthrough_;
+ case BIND_MOUNT_RECURSIVE: {
+ _cleanup_free_ char *chased = NULL;
+
+ /* Since mount() will always follow symlinks we chase the symlinks on our own first. Note
+ * that bind mount source paths are always relative to the host root, hence we pass NULL as
+ * root directory to chase_symlinks() here. */
+
+ r = chase_symlinks(mount_entry_source(m), NULL, CHASE_TRAIL_SLASH, &chased, NULL);
+ if (r == -ENOENT && m->ignore) {
+ log_debug_errno(r, "Path %s does not exist, ignoring.", mount_entry_source(m));
+ return 0;
+ }
+ if (r < 0)
+ return log_debug_errno(r, "Failed to follow symlinks on %s: %m", mount_entry_source(m));
+
+ log_debug("Followed source symlinks %s → %s.", mount_entry_source(m), chased);
+
+ free_and_replace(m->source_malloc, chased);
+
+ what = mount_entry_source(m);
+ make = true;
+ break;
+ }
+
+ case EMPTY_DIR:
+ case TMPFS:
+ return mount_tmpfs(m);
+
+ case PRIVATE_TMP:
+ case PRIVATE_TMP_READONLY:
+ what = mount_entry_source(m);
+ make = true;
+ break;
+
+ case PRIVATE_DEV:
+ return mount_private_dev(m);
+
+ case BIND_DEV:
+ return mount_bind_dev(m);
+
+ case SYSFS:
+ return mount_sysfs(m);
+
+ case PROCFS:
+ return mount_procfs(m, ns_info);
+
+ case MOUNT_IMAGES:
+ return mount_images(m);
+
+ default:
+ assert_not_reached("Unknown mode");
+ }
+
+ assert(what);
+
+ r = mount_nofollow_verbose(LOG_DEBUG, what, mount_entry_path(m), NULL, MS_BIND|(rbind ? MS_REC : 0), NULL);
+ if (r < 0) {
+ bool try_again = false;
+
+ if (r == -ENOENT && make) {
+ struct stat st;
+
+ /* Hmm, either the source or the destination are missing. Let's see if we can create
+ the destination, then try again. */
+
+ if (stat(what, &st) < 0)
+ log_error_errno(errno, "Mount point source '%s' is not accessible: %m", what);
+ else {
+ int q;
+
+ (void) mkdir_parents(mount_entry_path(m), 0755);
+
+ if (S_ISDIR(st.st_mode))
+ q = mkdir(mount_entry_path(m), 0755) < 0 ? -errno : 0;
+ else
+ q = touch(mount_entry_path(m));
+
+ if (q < 0)
+ log_error_errno(q, "Failed to create destination mount point node '%s': %m",
+ mount_entry_path(m));
+ else
+ try_again = true;
+ }
+ }
+
+ if (try_again)
+ r = mount_nofollow_verbose(LOG_DEBUG, what, mount_entry_path(m), NULL, MS_BIND|(rbind ? MS_REC : 0), NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mount %s to %s: %m", what, mount_entry_path(m));
+ }
+
+ log_debug("Successfully mounted %s to %s", what, mount_entry_path(m));
+ return 0;
+}
+
+static int make_read_only(const MountEntry *m, char **deny_list, FILE *proc_self_mountinfo) {
+ unsigned long new_flags = 0, flags_mask = 0;
+ bool submounts = false;
+ int r = 0;
+
+ assert(m);
+ assert(proc_self_mountinfo);
+
+ if (mount_entry_read_only(m) || m->mode == PRIVATE_DEV) {
+ new_flags |= MS_RDONLY;
+ flags_mask |= MS_RDONLY;
+ }
+
+ if (m->nosuid) {
+ new_flags |= MS_NOSUID;
+ flags_mask |= MS_NOSUID;
+ }
+
+ if (flags_mask == 0) /* No Change? */
+ return 0;
+
+ /* We generally apply these changes recursively, except for /dev, and the cases we know there's
+ * nothing further down. Set /dev readonly, but not submounts like /dev/shm. Also, we only set the
+ * per-mount read-only flag. We can't set it on the superblock, if we are inside a user namespace
+ * and running Linux <= 4.17. */
+ submounts =
+ mount_entry_read_only(m) &&
+ !IN_SET(m->mode, EMPTY_DIR, TMPFS);
+ if (submounts)
+ r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), new_flags, flags_mask, deny_list, proc_self_mountinfo);
+ else
+ r = bind_remount_one_with_mountinfo(mount_entry_path(m), new_flags, flags_mask, proc_self_mountinfo);
+
+ /* Not that we only turn on the MS_RDONLY flag here, we never turn it off. Something that was marked
+ * read-only already stays this way. This improves compatibility with container managers, where we
+ * won't attempt to undo read-only mounts already applied. */
+
+ if (r == -ENOENT && m->ignore)
+ return 0;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to re-mount '%s'%s: %m", mount_entry_path(m),
+ submounts ? " and its submounts" : "");
+ return 0;
+}
+
+static bool namespace_info_mount_apivfs(const NamespaceInfo *ns_info) {
+ assert(ns_info);
+
+ /*
+ * ProtectControlGroups= and ProtectKernelTunables= imply MountAPIVFS=,
+ * since to protect the API VFS mounts, they need to be around in the
+ * first place...
+ */
+
+ return ns_info->mount_apivfs ||
+ ns_info->protect_control_groups ||
+ ns_info->protect_kernel_tunables ||
+ ns_info->protect_proc != PROTECT_PROC_DEFAULT ||
+ ns_info->proc_subset != PROC_SUBSET_ALL;
+}
+
+static size_t namespace_calculate_mounts(
+ const NamespaceInfo *ns_info,
+ char** read_write_paths,
+ char** read_only_paths,
+ char** inaccessible_paths,
+ char** empty_directories,
+ size_t n_bind_mounts,
+ size_t n_temporary_filesystems,
+ size_t n_mount_images,
+ const char* tmp_dir,
+ const char* var_tmp_dir,
+ const char *creds_path,
+ const char* log_namespace) {
+
+ size_t protect_home_cnt;
+ size_t protect_system_cnt =
+ (ns_info->protect_system == PROTECT_SYSTEM_STRICT ?
+ ELEMENTSOF(protect_system_strict_table) :
+ ((ns_info->protect_system == PROTECT_SYSTEM_FULL) ?
+ ELEMENTSOF(protect_system_full_table) :
+ ((ns_info->protect_system == PROTECT_SYSTEM_YES) ?
+ ELEMENTSOF(protect_system_yes_table) : 0)));
+
+ protect_home_cnt =
+ (ns_info->protect_home == PROTECT_HOME_YES ?
+ ELEMENTSOF(protect_home_yes_table) :
+ ((ns_info->protect_home == PROTECT_HOME_READ_ONLY) ?
+ ELEMENTSOF(protect_home_read_only_table) :
+ ((ns_info->protect_home == PROTECT_HOME_TMPFS) ?
+ ELEMENTSOF(protect_home_tmpfs_table) : 0)));
+
+ return !!tmp_dir + !!var_tmp_dir +
+ strv_length(read_write_paths) +
+ strv_length(read_only_paths) +
+ strv_length(inaccessible_paths) +
+ strv_length(empty_directories) +
+ n_bind_mounts +
+ n_mount_images +
+ n_temporary_filesystems +
+ ns_info->private_dev +
+ (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
+ (ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table) : 0) +
+ (ns_info->protect_kernel_logs ? ELEMENTSOF(protect_kernel_logs_table) : 0) +
+ (ns_info->protect_control_groups ? 1 : 0) +
+ protect_home_cnt + protect_system_cnt +
+ (ns_info->protect_hostname ? 2 : 0) +
+ (namespace_info_mount_apivfs(ns_info) ? ELEMENTSOF(apivfs_table) : 0) +
+ (creds_path ? 2 : 1) +
+ !!log_namespace;
+}
+
+static void normalize_mounts(const char *root_directory, MountEntry *mounts, size_t *n_mounts) {
+ assert(root_directory);
+ assert(n_mounts);
+ assert(mounts || *n_mounts == 0);
+
+ typesafe_qsort(mounts, *n_mounts, mount_path_compare);
+
+ drop_duplicates(mounts, n_mounts);
+ drop_outside_root(root_directory, mounts, n_mounts);
+ drop_inaccessible(mounts, n_mounts);
+ drop_nop(mounts, n_mounts);
+}
+
+static bool root_read_only(
+ char **read_only_paths,
+ ProtectSystem protect_system) {
+
+ /* Determine whether the root directory is going to be read-only given the configured settings. */
+
+ if (protect_system == PROTECT_SYSTEM_STRICT)
+ return true;
+
+ if (prefixed_path_strv_contains(read_only_paths, "/"))
+ return true;
+
+ return false;
+}
+
+static bool home_read_only(
+ char** read_only_paths,
+ char** inaccessible_paths,
+ char** empty_directories,
+ const BindMount *bind_mounts,
+ size_t n_bind_mounts,
+ const TemporaryFileSystem *temporary_filesystems,
+ size_t n_temporary_filesystems,
+ ProtectHome protect_home) {
+
+ size_t i;
+
+ /* Determine whether the /home directory is going to be read-only given the configured settings. Yes,
+ * this is a bit sloppy, since we don't bother checking for cases where / is affected by multiple
+ * settings. */
+
+ if (protect_home != PROTECT_HOME_NO)
+ return true;
+
+ if (prefixed_path_strv_contains(read_only_paths, "/home") ||
+ prefixed_path_strv_contains(inaccessible_paths, "/home") ||
+ prefixed_path_strv_contains(empty_directories, "/home"))
+ return true;
+
+ for (i = 0; i < n_temporary_filesystems; i++)
+ if (path_equal(temporary_filesystems[i].path, "/home"))
+ return true;
+
+ /* If /home is overmounted with some dir from the host it's not writable. */
+ for (i = 0; i < n_bind_mounts; i++)
+ if (path_equal(bind_mounts[i].destination, "/home"))
+ return true;
+
+ return false;
+}
+
+static int verity_settings_prepare(
+ VeritySettings *verity,
+ const char *root_image,
+ const void *root_hash,
+ size_t root_hash_size,
+ const char *root_hash_path,
+ const void *root_hash_sig,
+ size_t root_hash_sig_size,
+ const char *root_hash_sig_path,
+ const char *verity_data_path) {
+
+ int r;
+
+ assert(verity);
+
+ if (root_hash) {
+ void *d;
+
+ d = memdup(root_hash, root_hash_size);
+ if (!d)
+ return -ENOMEM;
+
+ free_and_replace(verity->root_hash, d);
+ verity->root_hash_size = root_hash_size;
+ verity->designator = PARTITION_ROOT;
+ }
+
+ if (root_hash_sig) {
+ void *d;
+
+ d = memdup(root_hash_sig, root_hash_sig_size);
+ if (!d)
+ return -ENOMEM;
+
+ free_and_replace(verity->root_hash_sig, d);
+ verity->root_hash_sig_size = root_hash_sig_size;
+ verity->designator = PARTITION_ROOT;
+ }
+
+ if (verity_data_path) {
+ r = free_and_strdup(&verity->data_path, verity_data_path);
+ if (r < 0)
+ return r;
+ }
+
+ r = verity_settings_load(
+ verity,
+ root_image,
+ root_hash_path,
+ root_hash_sig_path);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to load root hash: %m");
+
+ return 0;
+}
+
+int setup_namespace(
+ const char* root_directory,
+ const char* root_image,
+ const MountOptions *root_image_options,
+ const NamespaceInfo *ns_info,
+ char** read_write_paths,
+ char** read_only_paths,
+ char** inaccessible_paths,
+ char** empty_directories,
+ const BindMount *bind_mounts,
+ size_t n_bind_mounts,
+ const TemporaryFileSystem *temporary_filesystems,
+ size_t n_temporary_filesystems,
+ const MountImage *mount_images,
+ size_t n_mount_images,
+ const char* tmp_dir,
+ const char* var_tmp_dir,
+ const char *creds_path,
+ const char *log_namespace,
+ unsigned long mount_flags,
+ const void *root_hash,
+ size_t root_hash_size,
+ const char *root_hash_path,
+ const void *root_hash_sig,
+ size_t root_hash_sig_size,
+ const char *root_hash_sig_path,
+ const char *verity_data_path,
+ DissectImageFlags dissect_image_flags,
+ char **error_path) {
+
+ _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
+ _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL;
+ _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
+ _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
+ MountEntry *m = NULL, *mounts = NULL;
+ bool require_prefix = false;
+ const char *root;
+ size_t n_mounts;
+ int r;
+
+ assert(ns_info);
+
+ if (mount_flags == 0)
+ mount_flags = MS_SHARED;
+
+ if (root_image) {
+ dissect_image_flags |= DISSECT_IMAGE_REQUIRE_ROOT;
+
+ /* Make the whole image read-only if we can determine that we only access it in a read-only fashion. */
+ if (root_read_only(read_only_paths,
+ ns_info->protect_system) &&
+ home_read_only(read_only_paths, inaccessible_paths, empty_directories,
+ bind_mounts, n_bind_mounts, temporary_filesystems, n_temporary_filesystems,
+ ns_info->protect_home) &&
+ strv_isempty(read_write_paths))
+ dissect_image_flags |= DISSECT_IMAGE_READ_ONLY;
+
+ r = verity_settings_prepare(
+ &verity,
+ root_image,
+ root_hash, root_hash_size, root_hash_path,
+ root_hash_sig, root_hash_sig_size, root_hash_sig_path,
+ verity_data_path);
+ if (r < 0)
+ return r;
+
+ SET_FLAG(dissect_image_flags, DISSECT_IMAGE_NO_PARTITION_TABLE, verity.data_path);
+
+ r = loop_device_make_by_path(
+ root_image,
+ FLAGS_SET(dissect_image_flags, DISSECT_IMAGE_READ_ONLY) ? O_RDONLY : -1 /* < 0 means writable if possible, read-only as fallback */,
+ FLAGS_SET(dissect_image_flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN,
+ &loop_device);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create loop device for root image: %m");
+
+ r = dissect_image(
+ loop_device->fd,
+ &verity,
+ root_image_options,
+ dissect_image_flags,
+ &dissected_image);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to dissect image: %m");
+
+ r = dissected_image_decrypt(
+ dissected_image,
+ NULL,
+ &verity,
+ dissect_image_flags,
+ &decrypted_image);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to decrypt dissected image: %m");
+ }
+
+ if (root_directory)
+ root = root_directory;
+ else {
+ /* /run/systemd should have been created by PID 1 early on already, but in some cases, like
+ * when running tests (test-execute), it might not have been created yet so let's make sure
+ * we create it if it doesn't already exist. */
+ (void) mkdir_p_label("/run/systemd", 0755);
+
+ /* Always create the mount namespace in a temporary directory, instead of operating
+ * directly in the root. The temporary directory prevents any mounts from being
+ * potentially obscured my other mounts we already applied.
+ * We use the same mount point for all images, which is safe, since they all live
+ * in their own namespaces after all, and hence won't see each other. */
+
+ root = "/run/systemd/unit-root";
+ (void) mkdir_label(root, 0700);
+ require_prefix = true;
+ }
+
+ n_mounts = namespace_calculate_mounts(
+ ns_info,
+ read_write_paths,
+ read_only_paths,
+ inaccessible_paths,
+ empty_directories,
+ n_bind_mounts,
+ n_temporary_filesystems,
+ n_mount_images,
+ tmp_dir, var_tmp_dir,
+ creds_path,
+ log_namespace);
+
+ if (n_mounts > 0) {
+ m = mounts = new0(MountEntry, n_mounts);
+ if (!mounts)
+ return -ENOMEM;
+
+ r = append_access_mounts(&m, read_write_paths, READWRITE, require_prefix);
+ if (r < 0)
+ goto finish;
+
+ r = append_access_mounts(&m, read_only_paths, READONLY, require_prefix);
+ if (r < 0)
+ goto finish;
+
+ r = append_access_mounts(&m, inaccessible_paths, INACCESSIBLE, require_prefix);
+ if (r < 0)
+ goto finish;
+
+ r = append_empty_dir_mounts(&m, empty_directories);
+ if (r < 0)
+ goto finish;
+
+ r = append_bind_mounts(&m, bind_mounts, n_bind_mounts);
+ if (r < 0)
+ goto finish;
+
+ r = append_tmpfs_mounts(&m, temporary_filesystems, n_temporary_filesystems);
+ if (r < 0)
+ goto finish;
+
+ if (tmp_dir) {
+ bool ro = streq(tmp_dir, RUN_SYSTEMD_EMPTY);
+
+ *(m++) = (MountEntry) {
+ .path_const = "/tmp",
+ .mode = ro ? PRIVATE_TMP_READONLY : PRIVATE_TMP,
+ .source_const = tmp_dir,
+ };
+ }
+
+ if (var_tmp_dir) {
+ bool ro = streq(var_tmp_dir, RUN_SYSTEMD_EMPTY);
+
+ *(m++) = (MountEntry) {
+ .path_const = "/var/tmp",
+ .mode = ro ? PRIVATE_TMP_READONLY : PRIVATE_TMP,
+ .source_const = var_tmp_dir,
+ };
+ }
+
+ r = append_mount_images(&m, mount_images, n_mount_images);
+ if (r < 0)
+ goto finish;
+
+ if (ns_info->private_dev)
+ *(m++) = (MountEntry) {
+ .path_const = "/dev",
+ .mode = PRIVATE_DEV,
+ .flags = DEV_MOUNT_OPTIONS,
+ };
+
+ if (ns_info->protect_kernel_tunables) {
+ r = append_static_mounts(&m,
+ protect_kernel_tunables_table,
+ ELEMENTSOF(protect_kernel_tunables_table),
+ ns_info->ignore_protect_paths);
+ if (r < 0)
+ goto finish;
+ }
+
+ if (ns_info->protect_kernel_modules) {
+ r = append_static_mounts(&m,
+ protect_kernel_modules_table,
+ ELEMENTSOF(protect_kernel_modules_table),
+ ns_info->ignore_protect_paths);
+ if (r < 0)
+ goto finish;
+ }
+
+ if (ns_info->protect_kernel_logs) {
+ r = append_static_mounts(&m,
+ protect_kernel_logs_table,
+ ELEMENTSOF(protect_kernel_logs_table),
+ ns_info->ignore_protect_paths);
+ if (r < 0)
+ goto finish;
+ }
+
+ if (ns_info->protect_control_groups)
+ *(m++) = (MountEntry) {
+ .path_const = "/sys/fs/cgroup",
+ .mode = READONLY,
+ };
+
+ r = append_protect_home(&m, ns_info->protect_home, ns_info->ignore_protect_paths);
+ if (r < 0)
+ goto finish;
+
+ r = append_protect_system(&m, ns_info->protect_system, false);
+ if (r < 0)
+ goto finish;
+
+ if (namespace_info_mount_apivfs(ns_info)) {
+ r = append_static_mounts(&m,
+ apivfs_table,
+ ELEMENTSOF(apivfs_table),
+ ns_info->ignore_protect_paths);
+ if (r < 0)
+ goto finish;
+ }
+
+ if (ns_info->protect_hostname) {
+ *(m++) = (MountEntry) {
+ .path_const = "/proc/sys/kernel/hostname",
+ .mode = READONLY,
+ };
+ *(m++) = (MountEntry) {
+ .path_const = "/proc/sys/kernel/domainname",
+ .mode = READONLY,
+ };
+ }
+
+ if (creds_path) {
+ /* If our service has a credentials store configured, then bind that one in, but hide
+ * everything else. */
+
+ *(m++) = (MountEntry) {
+ .path_const = "/run/credentials",
+ .mode = TMPFS,
+ .read_only = true,
+ .options_const = "mode=0755" TMPFS_LIMITS_EMPTY_OR_ALMOST,
+ .flags = MS_NODEV|MS_STRICTATIME|MS_NOSUID|MS_NOEXEC,
+ };
+
+ *(m++) = (MountEntry) {
+ .path_const = creds_path,
+ .mode = BIND_MOUNT,
+ .read_only = true,
+ .source_const = creds_path,
+ };
+ } else {
+ /* If our service has no credentials store configured, then make the whole
+ * credentials tree inaccessible wholesale. */
+
+ *(m++) = (MountEntry) {
+ .path_const = "/run/credentials",
+ .mode = INACCESSIBLE,
+ .ignore = true,
+ };
+ }
+
+ if (log_namespace) {
+ _cleanup_free_ char *q;
+
+ q = strjoin("/run/systemd/journal.", log_namespace);
+ if (!q) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ *(m++) = (MountEntry) {
+ .path_const = "/run/systemd/journal",
+ .mode = BIND_MOUNT_RECURSIVE,
+ .read_only = true,
+ .source_malloc = TAKE_PTR(q),
+ };
+ }
+
+ assert(mounts + n_mounts == m);
+
+ /* Prepend the root directory where that's necessary */
+ r = prefix_where_needed(mounts, n_mounts, root);
+ if (r < 0)
+ goto finish;
+
+ normalize_mounts(root, mounts, &n_mounts);
+ }
+
+ /* All above is just preparation, figuring out what to do. Let's now actually start doing something. */
+
+ if (unshare(CLONE_NEWNS) < 0) {
+ r = log_debug_errno(errno, "Failed to unshare the mount namespace: %m");
+ if (IN_SET(r, -EACCES, -EPERM, -EOPNOTSUPP, -ENOSYS))
+ /* If the kernel doesn't support namespaces, or when there's a MAC or seccomp filter
+ * in place that doesn't allow us to create namespaces (or a missing cap), then
+ * propagate a recognizable error back, which the caller can use to detect this case
+ * (and only this) and optionally continue without namespacing applied. */
+ r = -ENOANO;
+
+ goto finish;
+ }
+
+ /* Remount / as SLAVE so that nothing now mounted in the namespace
+ * shows up in the parent */
+ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
+ r = log_debug_errno(errno, "Failed to remount '/' as SLAVE: %m");
+ goto finish;
+ }
+
+ if (root_image) {
+ /* A root image is specified, mount it to the right place */
+ r = dissected_image_mount(dissected_image, root, UID_INVALID, dissect_image_flags);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to mount root image: %m");
+ goto finish;
+ }
+
+ if (decrypted_image) {
+ r = decrypted_image_relinquish(decrypted_image);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to relinquish decrypted image: %m");
+ goto finish;
+ }
+ }
+
+ loop_device_relinquish(loop_device);
+
+ } else if (root_directory) {
+
+ /* A root directory is specified. Turn its directory into bind mount, if it isn't one yet. */
+ r = path_is_mount_point(root, NULL, AT_SYMLINK_FOLLOW);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to detect that %s is a mount point or not: %m", root);
+ goto finish;
+ }
+ if (r == 0) {
+ r = mount_nofollow_verbose(LOG_DEBUG, root, root, NULL, MS_BIND|MS_REC, NULL);
+ if (r < 0)
+ goto finish;
+ }
+
+ } else {
+ /* Let's mount the main root directory to the root directory to use */
+ r = mount_nofollow_verbose(LOG_DEBUG, "/", root, NULL, MS_BIND|MS_REC, NULL);
+ if (r < 0)
+ goto finish;
+ }
+
+ /* Try to set up the new root directory before mounting anything else there. */
+ if (root_image || root_directory)
+ (void) base_filesystem_create(root, UID_INVALID, GID_INVALID);
+
+ if (n_mounts > 0) {
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+ _cleanup_free_ char **deny_list = NULL;
+ size_t j;
+
+ /* Open /proc/self/mountinfo now as it may become unavailable if we mount anything on top of
+ * /proc. For example, this is the case with the option: 'InaccessiblePaths=/proc'. */
+ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!proc_self_mountinfo) {
+ r = log_debug_errno(errno, "Failed to open /proc/self/mountinfo: %m");
+ if (error_path)
+ *error_path = strdup("/proc/self/mountinfo");
+ goto finish;
+ }
+
+ /* First round, establish all mounts we need */
+ for (;;) {
+ bool again = false;
+
+ for (m = mounts; m < mounts + n_mounts; ++m) {
+
+ if (m->applied)
+ continue;
+
+ r = follow_symlink(root, m);
+ if (r < 0) {
+ if (error_path && mount_entry_path(m))
+ *error_path = strdup(mount_entry_path(m));
+ goto finish;
+ }
+ if (r == 0) {
+ /* We hit a symlinked mount point. The entry got rewritten and might
+ * point to a very different place now. Let's normalize the changed
+ * list, and start from the beginning. After all to mount the entry
+ * at the new location we might need some other mounts first */
+ again = true;
+ break;
+ }
+
+ r = apply_mount(root, m, ns_info);
+ if (r < 0) {
+ if (error_path && mount_entry_path(m))
+ *error_path = strdup(mount_entry_path(m));
+ goto finish;
+ }
+
+ m->applied = true;
+ }
+
+ if (!again)
+ break;
+
+ normalize_mounts(root, mounts, &n_mounts);
+ }
+
+ /* Create a deny list we can pass to bind_mount_recursive() */
+ deny_list = new(char*, n_mounts+1);
+ if (!deny_list) {
+ r = -ENOMEM;
+ goto finish;
+ }
+ for (j = 0; j < n_mounts; j++)
+ deny_list[j] = (char*) mount_entry_path(mounts+j);
+ deny_list[j] = NULL;
+
+ /* Second round, flip the ro bits if necessary. */
+ for (m = mounts; m < mounts + n_mounts; ++m) {
+ r = make_read_only(m, deny_list, proc_self_mountinfo);
+ if (r < 0) {
+ if (error_path && mount_entry_path(m))
+ *error_path = strdup(mount_entry_path(m));
+ goto finish;
+ }
+ }
+ }
+
+ /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */
+ r = mount_move_root(root);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to mount root with MS_MOVE: %m");
+ goto finish;
+ }
+
+ /* Remount / as the desired mode. Note that this will not
+ * reestablish propagation from our side to the host, since
+ * what's disconnected is disconnected. */
+ if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) {
+ r = log_debug_errno(errno, "Failed to remount '/' with desired mount flags: %m");
+ goto finish;
+ }
+
+ r = 0;
+
+finish:
+ if (n_mounts > 0)
+ for (m = mounts; m < mounts + n_mounts; m++)
+ mount_entry_done(m);
+
+ free(mounts);
+
+ return r;
+}
+
+void bind_mount_free_many(BindMount *b, size_t n) {
+ size_t i;
+
+ assert(b || n == 0);
+
+ for (i = 0; i < n; i++) {
+ free(b[i].source);
+ free(b[i].destination);
+ }
+
+ free(b);
+}
+
+int bind_mount_add(BindMount **b, size_t *n, const BindMount *item) {
+ _cleanup_free_ char *s = NULL, *d = NULL;
+ BindMount *c;
+
+ assert(b);
+ assert(n);
+ assert(item);
+
+ s = strdup(item->source);
+ if (!s)
+ return -ENOMEM;
+
+ d = strdup(item->destination);
+ if (!d)
+ return -ENOMEM;
+
+ c = reallocarray(*b, *n + 1, sizeof(BindMount));
+ if (!c)
+ return -ENOMEM;
+
+ *b = c;
+
+ c[(*n) ++] = (BindMount) {
+ .source = TAKE_PTR(s),
+ .destination = TAKE_PTR(d),
+ .read_only = item->read_only,
+ .nosuid = item->nosuid,
+ .recursive = item->recursive,
+ .ignore_enoent = item->ignore_enoent,
+ };
+
+ return 0;
+}
+
+MountImage* mount_image_free_many(MountImage *m, size_t *n) {
+ size_t i;
+
+ assert(n);
+ assert(m || *n == 0);
+
+ for (i = 0; i < *n; i++) {
+ free(m[i].source);
+ free(m[i].destination);
+ mount_options_free_all(m[i].mount_options);
+ }
+
+ free(m);
+ *n = 0;
+ return NULL;
+}
+
+int mount_image_add(MountImage **m, size_t *n, const MountImage *item) {
+ _cleanup_free_ char *s = NULL, *d = NULL;
+ _cleanup_(mount_options_free_allp) MountOptions *options = NULL;
+ MountOptions *i;
+ MountImage *c;
+
+ assert(m);
+ assert(n);
+ assert(item);
+
+ s = strdup(item->source);
+ if (!s)
+ return -ENOMEM;
+
+ d = strdup(item->destination);
+ if (!d)
+ return -ENOMEM;
+
+ LIST_FOREACH(mount_options, i, item->mount_options) {
+ _cleanup_(mount_options_free_allp) MountOptions *o;
+
+ o = new(MountOptions, 1);
+ if (!o)
+ return -ENOMEM;
+
+ *o = (MountOptions) {
+ .partition_designator = i->partition_designator,
+ .options = strdup(i->options),
+ };
+ if (!o->options)
+ return -ENOMEM;
+
+ LIST_APPEND(mount_options, options, TAKE_PTR(o));
+ }
+
+ c = reallocarray(*m, *n + 1, sizeof(MountImage));
+ if (!c)
+ return -ENOMEM;
+
+ *m = c;
+
+ c[(*n) ++] = (MountImage) {
+ .source = TAKE_PTR(s),
+ .destination = TAKE_PTR(d),
+ .mount_options = TAKE_PTR(options),
+ .ignore_enoent = item->ignore_enoent,
+ };
+
+ return 0;
+}
+
+void temporary_filesystem_free_many(TemporaryFileSystem *t, size_t n) {
+ size_t i;
+
+ assert(t || n == 0);
+
+ for (i = 0; i < n; i++) {
+ free(t[i].path);
+ free(t[i].options);
+ }
+
+ free(t);
+}
+
+int temporary_filesystem_add(
+ TemporaryFileSystem **t,
+ size_t *n,
+ const char *path,
+ const char *options) {
+
+ _cleanup_free_ char *p = NULL, *o = NULL;
+ TemporaryFileSystem *c;
+
+ assert(t);
+ assert(n);
+ assert(path);
+
+ p = strdup(path);
+ if (!p)
+ return -ENOMEM;
+
+ if (!isempty(options)) {
+ o = strdup(options);
+ if (!o)
+ return -ENOMEM;
+ }
+
+ c = reallocarray(*t, *n + 1, sizeof(TemporaryFileSystem));
+ if (!c)
+ return -ENOMEM;
+
+ *t = c;
+
+ c[(*n) ++] = (TemporaryFileSystem) {
+ .path = TAKE_PTR(p),
+ .options = TAKE_PTR(o),
+ };
+
+ return 0;
+}
+
+static int make_tmp_prefix(const char *prefix) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ /* Don't do anything unless we know the dir is actually missing */
+ r = access(prefix, F_OK);
+ if (r >= 0)
+ return 0;
+ if (errno != ENOENT)
+ return -errno;
+
+ r = mkdir_parents(prefix, 0755);
+ if (r < 0)
+ return r;
+
+ r = tempfn_random(prefix, NULL, &t);
+ if (r < 0)
+ return r;
+
+ if (mkdir(t, 0777) < 0)
+ return -errno;
+
+ if (chmod(t, 01777) < 0) {
+ r = -errno;
+ (void) rmdir(t);
+ return r;
+ }
+
+ if (rename(t, prefix) < 0) {
+ r = -errno;
+ (void) rmdir(t);
+ return r == -EEXIST ? 0 : r; /* it's fine if someone else created the dir by now */
+ }
+
+ return 0;
+
+}
+
+static int setup_one_tmp_dir(const char *id, const char *prefix, char **path, char **tmp_path) {
+ _cleanup_free_ char *x = NULL;
+ _cleanup_free_ char *y = NULL;
+ char bid[SD_ID128_STRING_MAX];
+ sd_id128_t boot_id;
+ bool rw = true;
+ int r;
+
+ assert(id);
+ assert(prefix);
+ assert(path);
+
+ /* We include the boot id in the directory so that after a
+ * reboot we can easily identify obsolete directories. */
+
+ r = sd_id128_get_boot(&boot_id);
+ if (r < 0)
+ return r;
+
+ x = strjoin(prefix, "/systemd-private-", sd_id128_to_string(boot_id, bid), "-", id, "-XXXXXX");
+ if (!x)
+ return -ENOMEM;
+
+ r = make_tmp_prefix(prefix);
+ if (r < 0)
+ return r;
+
+ RUN_WITH_UMASK(0077)
+ if (!mkdtemp(x)) {
+ if (errno == EROFS || ERRNO_IS_DISK_SPACE(errno))
+ rw = false;
+ else
+ return -errno;
+ }
+
+ if (rw) {
+ y = strjoin(x, "/tmp");
+ if (!y)
+ return -ENOMEM;
+
+ RUN_WITH_UMASK(0000) {
+ if (mkdir(y, 0777 | S_ISVTX) < 0)
+ return -errno;
+ }
+
+ r = label_fix_container(y, prefix, 0);
+ if (r < 0)
+ return r;
+
+ if (tmp_path)
+ *tmp_path = TAKE_PTR(y);
+ } else {
+ /* Trouble: we failed to create the directory. Instead of failing, let's simulate /tmp being
+ * read-only. This way the service will get the EROFS result as if it was writing to the real
+ * file system. */
+ r = mkdir_p(RUN_SYSTEMD_EMPTY, 0500);
+ if (r < 0)
+ return r;
+
+ r = free_and_strdup(&x, RUN_SYSTEMD_EMPTY);
+ if (r < 0)
+ return r;
+ }
+
+ *path = TAKE_PTR(x);
+ return 0;
+}
+
+int setup_tmp_dirs(const char *id, char **tmp_dir, char **var_tmp_dir) {
+ _cleanup_(namespace_cleanup_tmpdirp) char *a = NULL;
+ _cleanup_(rmdir_and_freep) char *a_tmp = NULL;
+ char *b;
+ int r;
+
+ assert(id);
+ assert(tmp_dir);
+ assert(var_tmp_dir);
+
+ r = setup_one_tmp_dir(id, "/tmp", &a, &a_tmp);
+ if (r < 0)
+ return r;
+
+ r = setup_one_tmp_dir(id, "/var/tmp", &b, NULL);
+ if (r < 0)
+ return r;
+
+ a_tmp = mfree(a_tmp); /* avoid rmdir */
+ *tmp_dir = TAKE_PTR(a);
+ *var_tmp_dir = TAKE_PTR(b);
+
+ return 0;
+}
+
+int setup_netns(const int netns_storage_socket[static 2]) {
+ _cleanup_close_ int netns = -1;
+ int r, q;
+
+ assert(netns_storage_socket);
+ assert(netns_storage_socket[0] >= 0);
+ assert(netns_storage_socket[1] >= 0);
+
+ /* We use the passed socketpair as a storage buffer for our
+ * namespace reference fd. Whatever process runs this first
+ * shall create a new namespace, all others should just join
+ * it. To serialize that we use a file lock on the socket
+ * pair.
+ *
+ * It's a bit crazy, but hey, works great! */
+
+ if (lockf(netns_storage_socket[0], F_LOCK, 0) < 0)
+ return -errno;
+
+ netns = receive_one_fd(netns_storage_socket[0], MSG_DONTWAIT);
+ if (netns == -EAGAIN) {
+ /* Nothing stored yet, so let's create a new namespace. */
+
+ if (unshare(CLONE_NEWNET) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ (void) loopback_setup();
+
+ netns = open("/proc/self/ns/net", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (netns < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ r = 1;
+
+ } else if (netns < 0) {
+ r = netns;
+ goto fail;
+
+ } else {
+ /* Yay, found something, so let's join the namespace */
+ if (setns(netns, CLONE_NEWNET) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ r = 0;
+ }
+
+ q = send_one_fd(netns_storage_socket[1], netns, MSG_DONTWAIT);
+ if (q < 0) {
+ r = q;
+ goto fail;
+ }
+
+fail:
+ (void) lockf(netns_storage_socket[0], F_ULOCK, 0);
+ return r;
+}
+
+int open_netns_path(const int netns_storage_socket[static 2], const char *path) {
+ _cleanup_close_ int netns = -1;
+ int q, r;
+
+ assert(netns_storage_socket);
+ assert(netns_storage_socket[0] >= 0);
+ assert(netns_storage_socket[1] >= 0);
+ assert(path);
+
+ /* If the storage socket doesn't contain a netns fd yet, open one via the file system and store it in
+ * it. This is supposed to be called ahead of time, i.e. before setup_netns() which will allocate a
+ * new anonymous netns if needed. */
+
+ if (lockf(netns_storage_socket[0], F_LOCK, 0) < 0)
+ return -errno;
+
+ netns = receive_one_fd(netns_storage_socket[0], MSG_DONTWAIT);
+ if (netns == -EAGAIN) {
+ /* Nothing stored yet. Open the file from the file system. */
+
+ netns = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (netns < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ r = fd_is_network_ns(netns);
+ if (r == 0) { /* Not a netns? Refuse early. */
+ r = -EINVAL;
+ goto fail;
+ }
+ if (r < 0 && r != -EUCLEAN) /* EUCLEAN: we don't know */
+ goto fail;
+
+ r = 1;
+
+ } else if (netns < 0) {
+ r = netns;
+ goto fail;
+ } else
+ r = 0; /* Already allocated */
+
+ q = send_one_fd(netns_storage_socket[1], netns, MSG_DONTWAIT);
+ if (q < 0) {
+ r = q;
+ goto fail;
+ }
+
+fail:
+ (void) lockf(netns_storage_socket[0], F_ULOCK, 0);
+ return r;
+}
+
+bool ns_type_supported(NamespaceType type) {
+ const char *t, *ns_proc;
+
+ t = namespace_type_to_string(type);
+ if (!t) /* Don't know how to translate this? Then it's not supported */
+ return false;
+
+ ns_proc = strjoina("/proc/self/ns/", t);
+ return access(ns_proc, F_OK) == 0;
+}
+
+static const char *const protect_home_table[_PROTECT_HOME_MAX] = {
+ [PROTECT_HOME_NO] = "no",
+ [PROTECT_HOME_YES] = "yes",
+ [PROTECT_HOME_READ_ONLY] = "read-only",
+ [PROTECT_HOME_TMPFS] = "tmpfs",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(protect_home, ProtectHome, PROTECT_HOME_YES);
+
+static const char *const protect_system_table[_PROTECT_SYSTEM_MAX] = {
+ [PROTECT_SYSTEM_NO] = "no",
+ [PROTECT_SYSTEM_YES] = "yes",
+ [PROTECT_SYSTEM_FULL] = "full",
+ [PROTECT_SYSTEM_STRICT] = "strict",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(protect_system, ProtectSystem, PROTECT_SYSTEM_YES);
+
+static const char* const namespace_type_table[] = {
+ [NAMESPACE_MOUNT] = "mnt",
+ [NAMESPACE_CGROUP] = "cgroup",
+ [NAMESPACE_UTS] = "uts",
+ [NAMESPACE_IPC] = "ipc",
+ [NAMESPACE_USER] = "user",
+ [NAMESPACE_PID] = "pid",
+ [NAMESPACE_NET] = "net",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(namespace_type, NamespaceType);
+
+static const char* const protect_proc_table[_PROTECT_PROC_MAX] = {
+ [PROTECT_PROC_DEFAULT] = "default",
+ [PROTECT_PROC_NOACCESS] = "noaccess",
+ [PROTECT_PROC_INVISIBLE] = "invisible",
+ [PROTECT_PROC_PTRACEABLE] = "ptraceable",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(protect_proc, ProtectProc);
+
+static const char* const proc_subset_table[_PROC_SUBSET_MAX] = {
+ [PROC_SUBSET_ALL] = "all",
+ [PROC_SUBSET_PID] = "pid",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(proc_subset, ProcSubset);
diff --git a/src/core/namespace.h b/src/core/namespace.h
new file mode 100644
index 0000000..da0861c
--- /dev/null
+++ b/src/core/namespace.h
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2016 Djalal Harouni
+***/
+
+typedef struct NamespaceInfo NamespaceInfo;
+typedef struct BindMount BindMount;
+typedef struct TemporaryFileSystem TemporaryFileSystem;
+typedef struct MountImage MountImage;
+
+#include <stdbool.h>
+
+#include "dissect-image.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "string-util.h"
+
+typedef enum ProtectHome {
+ PROTECT_HOME_NO,
+ PROTECT_HOME_YES,
+ PROTECT_HOME_READ_ONLY,
+ PROTECT_HOME_TMPFS,
+ _PROTECT_HOME_MAX,
+ _PROTECT_HOME_INVALID = -1
+} ProtectHome;
+
+typedef enum NamespaceType {
+ NAMESPACE_MOUNT,
+ NAMESPACE_CGROUP,
+ NAMESPACE_UTS,
+ NAMESPACE_IPC,
+ NAMESPACE_USER,
+ NAMESPACE_PID,
+ NAMESPACE_NET,
+ _NAMESPACE_TYPE_MAX,
+ _NAMESPACE_TYPE_INVALID = -1,
+} NamespaceType;
+
+typedef enum ProtectSystem {
+ PROTECT_SYSTEM_NO,
+ PROTECT_SYSTEM_YES,
+ PROTECT_SYSTEM_FULL,
+ PROTECT_SYSTEM_STRICT,
+ _PROTECT_SYSTEM_MAX,
+ _PROTECT_SYSTEM_INVALID = -1
+} ProtectSystem;
+
+typedef enum ProtectProc {
+ PROTECT_PROC_DEFAULT,
+ PROTECT_PROC_NOACCESS, /* hidepid=noaccess */
+ PROTECT_PROC_INVISIBLE, /* hidepid=invisible */
+ PROTECT_PROC_PTRACEABLE, /* hidepid=ptraceable */
+ _PROTECT_PROC_MAX,
+ _PROTECT_PROC_INVALID = -1,
+} ProtectProc;
+
+typedef enum ProcSubset {
+ PROC_SUBSET_ALL,
+ PROC_SUBSET_PID, /* subset=pid */
+ _PROC_SUBSET_MAX,
+ _PROC_SUBSET_INVALID = -1,
+} ProcSubset;
+
+struct NamespaceInfo {
+ bool ignore_protect_paths;
+ bool private_dev;
+ bool private_mounts;
+ bool protect_control_groups;
+ bool protect_kernel_tunables;
+ bool protect_kernel_modules;
+ bool protect_kernel_logs;
+ bool mount_apivfs;
+ bool protect_hostname;
+ ProtectHome protect_home;
+ ProtectSystem protect_system;
+ ProtectProc protect_proc;
+ ProcSubset proc_subset;
+};
+
+struct BindMount {
+ char *source;
+ char *destination;
+ bool read_only;
+ bool nosuid;
+ bool recursive;
+ bool ignore_enoent;
+};
+
+struct TemporaryFileSystem {
+ char *path;
+ char *options;
+};
+
+struct MountImage {
+ char *source;
+ char *destination;
+ LIST_HEAD(MountOptions, mount_options);
+ bool ignore_enoent;
+};
+
+int setup_namespace(
+ const char *root_directory,
+ const char *root_image,
+ const MountOptions *root_image_options,
+ const NamespaceInfo *ns_info,
+ char **read_write_paths,
+ char **read_only_paths,
+ char **inaccessible_paths,
+ char **empty_directories,
+ const BindMount *bind_mounts,
+ size_t n_bind_mounts,
+ const TemporaryFileSystem *temporary_filesystems,
+ size_t n_temporary_filesystems,
+ const MountImage *mount_images,
+ size_t n_mount_images,
+ const char *tmp_dir,
+ const char *var_tmp_dir,
+ const char *creds_path,
+ const char *log_namespace,
+ unsigned long mount_flags,
+ const void *root_hash,
+ size_t root_hash_size,
+ const char *root_hash_path,
+ const void *root_hash_sig,
+ size_t root_hash_sig_size,
+ const char *root_hash_sig_path,
+ const char *root_verity,
+ DissectImageFlags dissected_image_flags,
+ char **error_path);
+
+#define RUN_SYSTEMD_EMPTY "/run/systemd/empty"
+
+static inline void namespace_cleanup_tmpdir(char *p) {
+ PROTECT_ERRNO;
+ if (!streq_ptr(p, RUN_SYSTEMD_EMPTY))
+ (void) rmdir(p);
+ free(p);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, namespace_cleanup_tmpdir);
+
+int setup_tmp_dirs(
+ const char *id,
+ char **tmp_dir,
+ char **var_tmp_dir);
+
+int setup_netns(const int netns_storage_socket[static 2]);
+int open_netns_path(const int netns_storage_socket[static 2], const char *path);
+
+const char* protect_home_to_string(ProtectHome p) _const_;
+ProtectHome protect_home_from_string(const char *s) _pure_;
+
+const char* protect_system_to_string(ProtectSystem p) _const_;
+ProtectSystem protect_system_from_string(const char *s) _pure_;
+
+const char* protect_proc_to_string(ProtectProc i) _const_;
+ProtectProc protect_proc_from_string(const char *s) _pure_;
+
+const char* proc_subset_to_string(ProcSubset i) _const_;
+ProcSubset proc_subset_from_string(const char *s) _pure_;
+
+void bind_mount_free_many(BindMount *b, size_t n);
+int bind_mount_add(BindMount **b, size_t *n, const BindMount *item);
+
+void temporary_filesystem_free_many(TemporaryFileSystem *t, size_t n);
+int temporary_filesystem_add(TemporaryFileSystem **t, size_t *n,
+ const char *path, const char *options);
+
+MountImage* mount_image_free_many(MountImage *m, size_t *n);
+int mount_image_add(MountImage **m, size_t *n, const MountImage *item);
+
+const char* namespace_type_to_string(NamespaceType t) _const_;
+NamespaceType namespace_type_from_string(const char *s) _pure_;
+
+bool ns_type_supported(NamespaceType type);
diff --git a/src/core/org.freedesktop.systemd1.conf b/src/core/org.freedesktop.systemd1.conf
new file mode 100644
index 0000000..8b32379
--- /dev/null
+++ b/src/core/org.freedesktop.systemd1.conf
@@ -0,0 +1,404 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="root">
+ <allow own="org.freedesktop.systemd1"/>
+
+ <!-- Root clients can do everything -->
+ <allow send_destination="org.freedesktop.systemd1"/>
+ <allow receive_sender="org.freedesktop.systemd1"/>
+
+ <!-- systemd may receive activator requests -->
+ <allow receive_interface="org.freedesktop.systemd1.Activator"
+ receive_member="ActivationRequest"/>
+ </policy>
+
+ <policy context="default">
+ <deny send_destination="org.freedesktop.systemd1"/>
+
+ <!-- Completely open to anyone: org.freedesktop.DBus.* interfaces -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.DBus.Introspectable"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.DBus.Peer"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="Get"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="GetAll"/>
+
+ <!-- Completely open to anyone: org.freedesktop.systemd1.Manager interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetUnitByPID"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetUnitByInvocationID"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetUnitByControlGroup"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="LoadUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetUnitProcesses"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetJob"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetJobAfter"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetJobBefore"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ListUnits"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ListUnitsFiltered"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ListUnitsByPatterns"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ListUnitsByNames"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ListJobs"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="Subscribe"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="Unsubscribe"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="Dump"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="DumpByFileDescriptor"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ListUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ListUnitFilesByPatterns"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetUnitFileState"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetDefaultTarget"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetUnitFileLinks"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="LookupDynamicUserByName"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="LookupDynamicUserByUID"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetDynamicUsers"/>
+
+ <!-- Completely open to anyone: org.freedesktop.systemd1.Unit interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Service"
+ send_member="GetProcesses"/>
+
+ <!-- Completely open to anyone: org.freedesktop.systemd1.Slice interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Slice"
+ send_member="GetProcesses"/>
+
+ <!-- Completely open to anyone: org.freedesktop.systemd1.Scope interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Scope"
+ send_member="GetProcesses"/>
+
+ <!-- Completely open to anyone: org.freedesktop.systemd1.Socket interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Socket"
+ send_member="GetProcesses"/>
+
+ <!-- Completely open to anyone: org.freedesktop.systemd1.Mount interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Mount"
+ send_member="GetProcesses"/>
+
+ <!-- Completely open to anyone: org.freedesktop.systemd1.Swap interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Swap"
+ send_member="GetProcesses"/>
+
+ <!-- Managed via polkit or other criteria: org.freedesktop.systemd1.Manager interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="StartUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="StartUnitReplace"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="StopUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ReloadUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="RestartUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="TryRestartUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ReloadOrRestartUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ReloadOrTryRestartUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="KillUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ResetFailedUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="SetUnitProperties"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="RefUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="UnrefUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="StartTransientUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="AttachProcessesToUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="CancelJob"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ClearJobs"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ResetFailed"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="Reload"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="Reexecute"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="EnableUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="DisableUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="ReenableUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="LinkUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="PresetUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="PresetUnitFilesWithMode"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="MaskUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="UnmaskUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="RevertUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="SetDefaultTarget"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="PresetAllUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="AddDependencyUnitFiles"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="SetShowStatus"/>
+
+ <!-- Managed via polkit or other criteria: org.freedesktop.systemd1.Job interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Job"
+ send_member="Cancel"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Job"
+ send_member="GetAfter"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Job"
+ send_member="GetBefore"/>
+
+ <!-- Managed via polkit or other criteria: org.freedesktop.systemd1.Unit interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="Start"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="Stop"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="Reload"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="Restart"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="TryRestart"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="ReloadOrRestart"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="ReloadOrTryRestart"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="Kill"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="ResetFailed"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="SetProperties"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="Ref"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Unit"
+ send_member="Unref"/>
+
+ <!-- Managed via polkit or other criteria: org.freedesktop.systemd1.Service interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Service"
+ send_member="AttachProcesses"/>
+
+ <!-- Managed via polkit or other criteria: org.freedesktop.systemd1.Scope interface -->
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Scope"
+ send_member="AttachProcesses"/>
+
+ <allow receive_sender="org.freedesktop.systemd1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/core/org.freedesktop.systemd1.policy.in b/src/core/org.freedesktop.systemd1.policy.in
new file mode 100644
index 0000000..a6d40d7
--- /dev/null
+++ b/src/core/org.freedesktop.systemd1.policy.in
@@ -0,0 +1,73 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.systemd1.reply-password">
+ <description gettext-domain="systemd">Send passphrase back to system</description>
+ <message gettext-domain="systemd">Authentication is required to send the entered passphrase back to the system.</message>
+ <defaults>
+ <allow_any>no</allow_any>
+ <allow_inactive>no</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.exec.path">@rootlibexecdir@/systemd-reply-password</annotate>
+ </action>
+
+ <action id="org.freedesktop.systemd1.manage-units">
+ <description gettext-domain="systemd">Manage system services or other units</description>
+ <message gettext-domain="systemd">Authentication is required to manage system services or other units.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.systemd1.manage-unit-files">
+ <description gettext-domain="systemd">Manage system service or unit files</description>
+ <message gettext-domain="systemd">Authentication is required to manage system service or unit files.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.systemd1.reload-daemon org.freedesktop.systemd1.manage-units</annotate>
+ </action>
+
+ <action id="org.freedesktop.systemd1.set-environment">
+ <description gettext-domain="systemd">Set or unset system and service manager environment variables</description>
+ <message gettext-domain="systemd">Authentication is required to set or unset system and service manager environment variables.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.systemd1.reload-daemon">
+ <description gettext-domain="systemd">Reload the systemd state</description>
+ <message gettext-domain="systemd">Authentication is required to reload the systemd state.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+</policyconfig>
diff --git a/src/core/org.freedesktop.systemd1.service b/src/core/org.freedesktop.systemd1.service
new file mode 100644
index 0000000..082125f
--- /dev/null
+++ b/src/core/org.freedesktop.systemd1.service
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.systemd1
+Exec=/bin/false
+User=root
diff --git a/src/core/path.c b/src/core/path.c
new file mode 100644
index 0000000..ca3a91d
--- /dev/null
+++ b/src/core/path.c
@@ -0,0 +1,846 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/epoll.h>
+#include <sys/inotify.h>
+#include <unistd.h>
+
+#include "bus-error.h"
+#include "bus-util.h"
+#include "dbus-path.h"
+#include "dbus-unit.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "glob-util.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path.h"
+#include "path-util.h"
+#include "serialize.h"
+#include "special.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "unit-name.h"
+#include "unit.h"
+
+static const UnitActiveState state_translation_table[_PATH_STATE_MAX] = {
+ [PATH_DEAD] = UNIT_INACTIVE,
+ [PATH_WAITING] = UNIT_ACTIVE,
+ [PATH_RUNNING] = UNIT_ACTIVE,
+ [PATH_FAILED] = UNIT_FAILED,
+};
+
+static int path_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+
+int path_spec_watch(PathSpec *s, sd_event_io_handler_t handler) {
+ static const int flags_table[_PATH_TYPE_MAX] = {
+ [PATH_EXISTS] = IN_DELETE_SELF|IN_MOVE_SELF|IN_ATTRIB,
+ [PATH_EXISTS_GLOB] = IN_DELETE_SELF|IN_MOVE_SELF|IN_ATTRIB,
+ [PATH_CHANGED] = IN_DELETE_SELF|IN_MOVE_SELF|IN_ATTRIB|IN_CLOSE_WRITE|IN_CREATE|IN_DELETE|IN_MOVED_FROM|IN_MOVED_TO,
+ [PATH_MODIFIED] = IN_DELETE_SELF|IN_MOVE_SELF|IN_ATTRIB|IN_CLOSE_WRITE|IN_CREATE|IN_DELETE|IN_MOVED_FROM|IN_MOVED_TO|IN_MODIFY,
+ [PATH_DIRECTORY_NOT_EMPTY] = IN_DELETE_SELF|IN_MOVE_SELF|IN_ATTRIB|IN_CREATE|IN_MOVED_TO,
+ };
+
+ bool exists = false;
+ char *slash, *oldslash = NULL;
+ int r;
+
+ assert(s);
+ assert(s->unit);
+ assert(handler);
+
+ path_spec_unwatch(s);
+
+ s->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (s->inotify_fd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ r = sd_event_add_io(s->unit->manager->event, &s->event_source, s->inotify_fd, EPOLLIN, handler, s);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(s->event_source, "path");
+
+ /* This function assumes the path was passed through path_simplify()! */
+ assert(!strstr(s->path, "//"));
+
+ for (slash = strchr(s->path, '/'); ; slash = strchr(slash+1, '/')) {
+ char *cut = NULL;
+ int flags;
+ char tmp;
+
+ if (slash) {
+ cut = slash + (slash == s->path);
+ tmp = *cut;
+ *cut = '\0';
+
+ flags = IN_MOVE_SELF | IN_DELETE_SELF | IN_ATTRIB | IN_CREATE | IN_MOVED_TO;
+ } else
+ flags = flags_table[s->type];
+
+ r = inotify_add_watch(s->inotify_fd, s->path, flags);
+ if (r < 0) {
+ if (IN_SET(errno, EACCES, ENOENT)) {
+ if (cut)
+ *cut = tmp;
+ break;
+ }
+
+ /* This second call to inotify_add_watch() should fail like the previous
+ * one and is done for logging the error in a comprehensive way. */
+ r = inotify_add_watch_and_warn(s->inotify_fd, s->path, flags);
+ if (r < 0) {
+ if (cut)
+ *cut = tmp;
+ goto fail;
+ }
+
+ /* Hmm, we succeeded in adding the watch this time... let's continue. */
+ }
+ exists = true;
+
+ /* Path exists, we don't need to watch parent too closely. */
+ if (oldslash) {
+ char *cut2 = oldslash + (oldslash == s->path);
+ char tmp2 = *cut2;
+ *cut2 = '\0';
+
+ (void) inotify_add_watch(s->inotify_fd, s->path, IN_MOVE_SELF);
+ /* Error is ignored, the worst can happen is we get spurious events. */
+
+ *cut2 = tmp2;
+ }
+
+ if (cut)
+ *cut = tmp;
+
+ if (slash)
+ oldslash = slash;
+ else {
+ /* whole path has been iterated over */
+ s->primary_wd = r;
+ break;
+ }
+ }
+
+ if (!exists) {
+ r = log_error_errno(errno, "Failed to add watch on any of the components of %s: %m", s->path);
+ /* either EACCESS or ENOENT */
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ path_spec_unwatch(s);
+ return r;
+}
+
+void path_spec_unwatch(PathSpec *s) {
+ assert(s);
+
+ s->event_source = sd_event_source_unref(s->event_source);
+ s->inotify_fd = safe_close(s->inotify_fd);
+}
+
+int path_spec_fd_event(PathSpec *s, uint32_t revents) {
+ union inotify_event_buffer buffer;
+ struct inotify_event *e;
+ ssize_t l;
+ int r = 0;
+
+ if (revents != EPOLLIN)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Got invalid poll event on inotify.");
+
+ l = read(s->inotify_fd, &buffer, sizeof(buffer));
+ if (l < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return log_error_errno(errno, "Failed to read inotify event: %m");
+ }
+
+ FOREACH_INOTIFY_EVENT(e, buffer, l) {
+ if (IN_SET(s->type, PATH_CHANGED, PATH_MODIFIED) &&
+ s->primary_wd == e->wd)
+ r = 1;
+ }
+
+ return r;
+}
+
+static bool path_spec_check_good(PathSpec *s, bool initial, bool from_trigger_notify) {
+ bool b, good = false;
+
+ switch (s->type) {
+
+ case PATH_EXISTS:
+ good = access(s->path, F_OK) >= 0;
+ break;
+
+ case PATH_EXISTS_GLOB:
+ good = glob_exists(s->path) > 0;
+ break;
+
+ case PATH_DIRECTORY_NOT_EMPTY: {
+ int k;
+
+ k = dir_is_empty(s->path);
+ good = !(k == -ENOENT || k > 0);
+ break;
+ }
+
+ case PATH_CHANGED:
+ case PATH_MODIFIED:
+ b = access(s->path, F_OK) >= 0;
+ good = !initial && !from_trigger_notify && b != s->previous_exists;
+ s->previous_exists = b;
+ break;
+
+ default:
+ ;
+ }
+
+ return good;
+}
+
+static void path_spec_mkdir(PathSpec *s, mode_t mode) {
+ int r;
+
+ if (IN_SET(s->type, PATH_EXISTS, PATH_EXISTS_GLOB))
+ return;
+
+ r = mkdir_p_label(s->path, mode);
+ if (r < 0)
+ log_warning_errno(r, "mkdir(%s) failed: %m", s->path);
+}
+
+static void path_spec_dump(PathSpec *s, FILE *f, const char *prefix) {
+ const char *type;
+
+ assert_se(type = path_type_to_string(s->type));
+ fprintf(f, "%s%s: %s\n", prefix, type, s->path);
+}
+
+void path_spec_done(PathSpec *s) {
+ assert(s);
+ assert(s->inotify_fd == -1);
+
+ free(s->path);
+}
+
+static void path_init(Unit *u) {
+ Path *p = PATH(u);
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ p->directory_mode = 0755;
+}
+
+void path_free_specs(Path *p) {
+ PathSpec *s;
+
+ assert(p);
+
+ while ((s = p->specs)) {
+ path_spec_unwatch(s);
+ LIST_REMOVE(spec, p->specs, s);
+ path_spec_done(s);
+ free(s);
+ }
+}
+
+static void path_done(Unit *u) {
+ Path *p = PATH(u);
+
+ assert(p);
+
+ path_free_specs(p);
+}
+
+static int path_add_mount_dependencies(Path *p) {
+ PathSpec *s;
+ int r;
+
+ assert(p);
+
+ LIST_FOREACH(spec, s, p->specs) {
+ r = unit_require_mounts_for(UNIT(p), s->path, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int path_verify(Path *p) {
+ assert(p);
+ assert(UNIT(p)->load_state == UNIT_LOADED);
+
+ if (!p->specs) {
+ log_unit_error(UNIT(p), "Path unit lacks path setting. Refusing.");
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
+static int path_add_default_dependencies(Path *p) {
+ int r;
+
+ assert(p);
+
+ if (!UNIT(p)->default_dependencies)
+ return 0;
+
+ r = unit_add_dependency_by_name(UNIT(p), UNIT_BEFORE, SPECIAL_PATHS_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ if (MANAGER_IS_SYSTEM(UNIT(p)->manager)) {
+ r = unit_add_two_dependencies_by_name(UNIT(p), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+ }
+
+ return unit_add_two_dependencies_by_name(UNIT(p), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+}
+
+static int path_add_trigger_dependencies(Path *p) {
+ Unit *x;
+ int r;
+
+ assert(p);
+
+ if (!hashmap_isempty(UNIT(p)->dependencies[UNIT_TRIGGERS]))
+ return 0;
+
+ r = unit_load_related_unit(UNIT(p), ".service", &x);
+ if (r < 0)
+ return r;
+
+ return unit_add_two_dependencies(UNIT(p), UNIT_BEFORE, UNIT_TRIGGERS, x, true, UNIT_DEPENDENCY_IMPLICIT);
+}
+
+static int path_add_extras(Path *p) {
+ int r;
+
+ r = path_add_trigger_dependencies(p);
+ if (r < 0)
+ return r;
+
+ r = path_add_mount_dependencies(p);
+ if (r < 0)
+ return r;
+
+ return path_add_default_dependencies(p);
+}
+
+static int path_load(Unit *u) {
+ Path *p = PATH(u);
+ int r;
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ r = unit_load_fragment_and_dropin(u, true);
+ if (r < 0)
+ return r;
+
+ if (u->load_state != UNIT_LOADED)
+ return 0;
+
+ r = path_add_extras(p);
+ if (r < 0)
+ return r;
+
+ return path_verify(p);
+}
+
+static void path_dump(Unit *u, FILE *f, const char *prefix) {
+ Path *p = PATH(u);
+ Unit *trigger;
+ PathSpec *s;
+
+ assert(p);
+ assert(f);
+
+ trigger = UNIT_TRIGGER(u);
+
+ fprintf(f,
+ "%sPath State: %s\n"
+ "%sResult: %s\n"
+ "%sUnit: %s\n"
+ "%sMakeDirectory: %s\n"
+ "%sDirectoryMode: %04o\n",
+ prefix, path_state_to_string(p->state),
+ prefix, path_result_to_string(p->result),
+ prefix, trigger ? trigger->id : "n/a",
+ prefix, yes_no(p->make_directory),
+ prefix, p->directory_mode);
+
+ LIST_FOREACH(spec, s, p->specs)
+ path_spec_dump(s, f, prefix);
+}
+
+static void path_unwatch(Path *p) {
+ PathSpec *s;
+
+ assert(p);
+
+ LIST_FOREACH(spec, s, p->specs)
+ path_spec_unwatch(s);
+}
+
+static int path_watch(Path *p) {
+ int r;
+ PathSpec *s;
+
+ assert(p);
+
+ LIST_FOREACH(spec, s, p->specs) {
+ r = path_spec_watch(s, path_dispatch_io);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static void path_set_state(Path *p, PathState state) {
+ PathState old_state;
+ assert(p);
+
+ if (p->state != state)
+ bus_unit_send_pending_change_signal(UNIT(p), false);
+
+ old_state = p->state;
+ p->state = state;
+
+ if (!IN_SET(state, PATH_WAITING, PATH_RUNNING))
+ path_unwatch(p);
+
+ if (state != old_state)
+ log_unit_debug(UNIT(p), "Changed %s -> %s", path_state_to_string(old_state), path_state_to_string(state));
+
+ unit_notify(UNIT(p), state_translation_table[old_state], state_translation_table[state], 0);
+}
+
+static void path_enter_waiting(Path *p, bool initial, bool from_trigger_notify);
+
+static int path_coldplug(Unit *u) {
+ Path *p = PATH(u);
+
+ assert(p);
+ assert(p->state == PATH_DEAD);
+
+ if (p->deserialized_state != p->state) {
+
+ if (IN_SET(p->deserialized_state, PATH_WAITING, PATH_RUNNING))
+ path_enter_waiting(p, true, false);
+ else
+ path_set_state(p, p->deserialized_state);
+ }
+
+ return 0;
+}
+
+static void path_enter_dead(Path *p, PathResult f) {
+ assert(p);
+
+ if (p->result == PATH_SUCCESS)
+ p->result = f;
+
+ unit_log_result(UNIT(p), p->result == PATH_SUCCESS, path_result_to_string(p->result));
+ path_set_state(p, p->result != PATH_SUCCESS ? PATH_FAILED : PATH_DEAD);
+}
+
+static void path_enter_running(Path *p) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ Unit *trigger;
+ int r;
+
+ assert(p);
+
+ /* Don't start job if we are supposed to go down */
+ if (unit_stop_pending(UNIT(p)))
+ return;
+
+ trigger = UNIT_TRIGGER(UNIT(p));
+ if (!trigger) {
+ log_unit_error(UNIT(p), "Unit to trigger vanished.");
+ path_enter_dead(p, PATH_FAILURE_RESOURCES);
+ return;
+ }
+
+ r = manager_add_job(UNIT(p)->manager, JOB_START, trigger, JOB_REPLACE, NULL, &error, NULL);
+ if (r < 0)
+ goto fail;
+
+ path_set_state(p, PATH_RUNNING);
+ path_unwatch(p);
+
+ return;
+
+fail:
+ log_unit_warning(UNIT(p), "Failed to queue unit startup job: %s", bus_error_message(&error, r));
+ path_enter_dead(p, PATH_FAILURE_RESOURCES);
+}
+
+static bool path_check_good(Path *p, bool initial, bool from_trigger_notify) {
+ PathSpec *s;
+
+ assert(p);
+
+ LIST_FOREACH(spec, s, p->specs)
+ if (path_spec_check_good(s, initial, from_trigger_notify))
+ return true;
+
+ return false;
+}
+
+static void path_enter_waiting(Path *p, bool initial, bool from_trigger_notify) {
+ Unit *trigger;
+ int r;
+
+ /* If the triggered unit is already running, so are we */
+ trigger = UNIT_TRIGGER(UNIT(p));
+ if (trigger && !UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(trigger))) {
+ path_set_state(p, PATH_RUNNING);
+ path_unwatch(p);
+ return;
+ }
+
+ if (path_check_good(p, initial, from_trigger_notify)) {
+ log_unit_debug(UNIT(p), "Got triggered.");
+ path_enter_running(p);
+ return;
+ }
+
+ r = path_watch(p);
+ if (r < 0)
+ goto fail;
+
+ /* Hmm, so now we have created inotify watches, but the file
+ * might have appeared/been removed by now, so we must
+ * recheck */
+
+ if (path_check_good(p, false, from_trigger_notify)) {
+ log_unit_debug(UNIT(p), "Got triggered.");
+ path_enter_running(p);
+ return;
+ }
+
+ path_set_state(p, PATH_WAITING);
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(p), r, "Failed to enter waiting state: %m");
+ path_enter_dead(p, PATH_FAILURE_RESOURCES);
+}
+
+static void path_mkdir(Path *p) {
+ PathSpec *s;
+
+ assert(p);
+
+ if (!p->make_directory)
+ return;
+
+ LIST_FOREACH(spec, s, p->specs)
+ path_spec_mkdir(s, p->directory_mode);
+}
+
+static int path_start(Unit *u) {
+ Path *p = PATH(u);
+ int r;
+
+ assert(p);
+ assert(IN_SET(p->state, PATH_DEAD, PATH_FAILED));
+
+ r = unit_test_trigger_loaded(u);
+ if (r < 0)
+ return r;
+
+ r = unit_test_start_limit(u);
+ if (r < 0) {
+ path_enter_dead(p, PATH_FAILURE_START_LIMIT_HIT);
+ return r;
+ }
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ path_mkdir(p);
+
+ p->result = PATH_SUCCESS;
+ path_enter_waiting(p, true, false);
+
+ return 1;
+}
+
+static int path_stop(Unit *u) {
+ Path *p = PATH(u);
+
+ assert(p);
+ assert(IN_SET(p->state, PATH_WAITING, PATH_RUNNING));
+
+ path_enter_dead(p, PATH_SUCCESS);
+ return 1;
+}
+
+static int path_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Path *p = PATH(u);
+ PathSpec *s;
+
+ assert(u);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", path_state_to_string(p->state));
+ (void) serialize_item(f, "result", path_result_to_string(p->result));
+
+ LIST_FOREACH(spec, s, p->specs) {
+ const char *type;
+ _cleanup_free_ char *escaped = NULL;
+
+ escaped = cescape(s->path);
+ if (!escaped)
+ return log_oom();
+
+ assert_se(type = path_type_to_string(s->type));
+ (void) serialize_item_format(f, "path-spec", "%s %i %s",
+ type,
+ s->previous_exists,
+ escaped);
+ }
+
+ return 0;
+}
+
+static int path_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Path *p = PATH(u);
+
+ assert(u);
+ assert(key);
+ assert(value);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ PathState state;
+
+ state = path_state_from_string(value);
+ if (state < 0)
+ log_unit_debug(u, "Failed to parse state value: %s", value);
+ else
+ p->deserialized_state = state;
+
+ } else if (streq(key, "result")) {
+ PathResult f;
+
+ f = path_result_from_string(value);
+ if (f < 0)
+ log_unit_debug(u, "Failed to parse result value: %s", value);
+ else if (f != PATH_SUCCESS)
+ p->result = f;
+
+ } else if (streq(key, "path-spec")) {
+ int previous_exists, skip = 0, r;
+ _cleanup_free_ char *type_str = NULL;
+
+ if (sscanf(value, "%ms %i %n", &type_str, &previous_exists, &skip) < 2)
+ log_unit_debug(u, "Failed to parse path-spec value: %s", value);
+ else {
+ _cleanup_free_ char *unescaped = NULL;
+ PathType type;
+ PathSpec *s;
+
+ type = path_type_from_string(type_str);
+ if (type < 0) {
+ log_unit_warning(u, "Unknown path type \"%s\", ignoring.", type_str);
+ return 0;
+ }
+
+ r = cunescape(value+skip, 0, &unescaped);
+ if (r < 0) {
+ log_unit_warning_errno(u, r, "Failed to unescape serialize path: %m");
+ return 0;
+ }
+
+ LIST_FOREACH(spec, s, p->specs)
+ if (s->type == type &&
+ path_equal(s->path, unescaped)) {
+
+ s->previous_exists = previous_exists;
+ break;
+ }
+ }
+
+ } else
+ log_unit_debug(u, "Unknown serialization key: %s", key);
+
+ return 0;
+}
+
+_pure_ static UnitActiveState path_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[PATH(u)->state];
+}
+
+_pure_ static const char *path_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return path_state_to_string(PATH(u)->state);
+}
+
+static int path_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ PathSpec *s = userdata;
+ Path *p;
+ int changed;
+
+ assert(s);
+ assert(s->unit);
+ assert(fd >= 0);
+
+ p = PATH(s->unit);
+
+ if (!IN_SET(p->state, PATH_WAITING, PATH_RUNNING))
+ return 0;
+
+ /* log_debug("inotify wakeup on %s.", UNIT(p)->id); */
+
+ LIST_FOREACH(spec, s, p->specs)
+ if (path_spec_owns_inotify_fd(s, fd))
+ break;
+
+ if (!s) {
+ log_error("Got event on unknown fd.");
+ goto fail;
+ }
+
+ changed = path_spec_fd_event(s, revents);
+ if (changed < 0)
+ goto fail;
+
+ if (changed)
+ path_enter_running(p);
+ else
+ path_enter_waiting(p, false, false);
+
+ return 0;
+
+fail:
+ path_enter_dead(p, PATH_FAILURE_RESOURCES);
+ return 0;
+}
+
+static void path_trigger_notify(Unit *u, Unit *other) {
+ Path *p = PATH(u);
+
+ assert(u);
+ assert(other);
+
+ /* Invoked whenever the unit we trigger changes state or gains or loses a job */
+
+ /* Filter out invocations with bogus state */
+ assert(UNIT_IS_LOAD_COMPLETE(other->load_state));
+
+ /* Don't propagate state changes from the triggered unit if we are already down */
+ if (!IN_SET(p->state, PATH_WAITING, PATH_RUNNING))
+ return;
+
+ /* Propagate start limit hit state */
+ if (other->start_limit_hit) {
+ path_enter_dead(p, PATH_FAILURE_UNIT_START_LIMIT_HIT);
+ return;
+ }
+
+ /* Don't propagate anything if there's still a job queued */
+ if (other->job)
+ return;
+
+ if (p->state == PATH_RUNNING &&
+ UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(other))) {
+ log_unit_debug(UNIT(p), "Got notified about unit deactivation.");
+ path_enter_waiting(p, false, true);
+ } else if (p->state == PATH_WAITING &&
+ !UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(other))) {
+ log_unit_debug(UNIT(p), "Got notified about unit activation.");
+ path_enter_waiting(p, false, true);
+ }
+}
+
+static void path_reset_failed(Unit *u) {
+ Path *p = PATH(u);
+
+ assert(p);
+
+ if (p->state == PATH_FAILED)
+ path_set_state(p, PATH_DEAD);
+
+ p->result = PATH_SUCCESS;
+}
+
+static const char* const path_type_table[_PATH_TYPE_MAX] = {
+ [PATH_EXISTS] = "PathExists",
+ [PATH_EXISTS_GLOB] = "PathExistsGlob",
+ [PATH_DIRECTORY_NOT_EMPTY] = "DirectoryNotEmpty",
+ [PATH_CHANGED] = "PathChanged",
+ [PATH_MODIFIED] = "PathModified",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(path_type, PathType);
+
+static const char* const path_result_table[_PATH_RESULT_MAX] = {
+ [PATH_SUCCESS] = "success",
+ [PATH_FAILURE_RESOURCES] = "resources",
+ [PATH_FAILURE_START_LIMIT_HIT] = "start-limit-hit",
+ [PATH_FAILURE_UNIT_START_LIMIT_HIT] = "unit-start-limit-hit",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(path_result, PathResult);
+
+const UnitVTable path_vtable = {
+ .object_size = sizeof(Path),
+
+ .sections =
+ "Unit\0"
+ "Path\0"
+ "Install\0",
+ .private_section = "Path",
+
+ .can_transient = true,
+ .can_fail = true,
+ .can_trigger = true,
+
+ .init = path_init,
+ .done = path_done,
+ .load = path_load,
+
+ .coldplug = path_coldplug,
+
+ .dump = path_dump,
+
+ .start = path_start,
+ .stop = path_stop,
+
+ .serialize = path_serialize,
+ .deserialize_item = path_deserialize_item,
+
+ .active_state = path_active_state,
+ .sub_state_to_string = path_sub_state_to_string,
+
+ .trigger_notify = path_trigger_notify,
+
+ .reset_failed = path_reset_failed,
+
+ .bus_set_property = bus_path_set_property,
+};
diff --git a/src/core/path.h b/src/core/path.h
new file mode 100644
index 0000000..fb33b12
--- /dev/null
+++ b/src/core/path.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct Path Path;
+typedef struct PathSpec PathSpec;
+
+#include "unit.h"
+
+typedef enum PathType {
+ PATH_EXISTS,
+ PATH_EXISTS_GLOB,
+ PATH_DIRECTORY_NOT_EMPTY,
+ PATH_CHANGED,
+ PATH_MODIFIED,
+ _PATH_TYPE_MAX,
+ _PATH_TYPE_INVALID = -1
+} PathType;
+
+typedef struct PathSpec {
+ Unit *unit;
+
+ char *path;
+
+ sd_event_source *event_source;
+
+ LIST_FIELDS(struct PathSpec, spec);
+
+ PathType type;
+ int inotify_fd;
+ int primary_wd;
+
+ bool previous_exists;
+} PathSpec;
+
+int path_spec_watch(PathSpec *s, sd_event_io_handler_t handler);
+void path_spec_unwatch(PathSpec *s);
+int path_spec_fd_event(PathSpec *s, uint32_t events);
+void path_spec_done(PathSpec *s);
+
+static inline bool path_spec_owns_inotify_fd(PathSpec *s, int fd) {
+ return s->inotify_fd == fd;
+}
+
+typedef enum PathResult {
+ PATH_SUCCESS,
+ PATH_FAILURE_RESOURCES,
+ PATH_FAILURE_START_LIMIT_HIT,
+ PATH_FAILURE_UNIT_START_LIMIT_HIT,
+ _PATH_RESULT_MAX,
+ _PATH_RESULT_INVALID = -1
+} PathResult;
+
+struct Path {
+ Unit meta;
+
+ LIST_HEAD(PathSpec, specs);
+
+ PathState state, deserialized_state;
+
+ bool make_directory;
+ mode_t directory_mode;
+
+ PathResult result;
+};
+
+void path_free_specs(Path *p);
+
+extern const UnitVTable path_vtable;
+
+const char* path_type_to_string(PathType i) _const_;
+PathType path_type_from_string(const char *s) _pure_;
+
+const char* path_result_to_string(PathResult i) _const_;
+PathResult path_result_from_string(const char *s) _pure_;
+
+DEFINE_CAST(PATH, Path);
diff --git a/src/core/scope.c b/src/core/scope.c
new file mode 100644
index 0000000..5448d44
--- /dev/null
+++ b/src/core/scope.c
@@ -0,0 +1,700 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dbus-scope.h"
+#include "dbus-unit.h"
+#include "load-dropin.h"
+#include "log.h"
+#include "process-util.h"
+#include "scope.h"
+#include "serialize.h"
+#include "special.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "unit.h"
+
+static const UnitActiveState state_translation_table[_SCOPE_STATE_MAX] = {
+ [SCOPE_DEAD] = UNIT_INACTIVE,
+ [SCOPE_RUNNING] = UNIT_ACTIVE,
+ [SCOPE_ABANDONED] = UNIT_ACTIVE,
+ [SCOPE_STOP_SIGTERM] = UNIT_DEACTIVATING,
+ [SCOPE_STOP_SIGKILL] = UNIT_DEACTIVATING,
+ [SCOPE_FAILED] = UNIT_FAILED
+};
+
+static int scope_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
+
+static void scope_init(Unit *u) {
+ Scope *s = SCOPE(u);
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ s->runtime_max_usec = USEC_INFINITY;
+ s->timeout_stop_usec = u->manager->default_timeout_stop_usec;
+ u->ignore_on_isolate = true;
+}
+
+static void scope_done(Unit *u) {
+ Scope *s = SCOPE(u);
+
+ assert(u);
+
+ s->controller = mfree(s->controller);
+ s->controller_track = sd_bus_track_unref(s->controller_track);
+
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+}
+
+static int scope_arm_timer(Scope *s, usec_t usec) {
+ int r;
+
+ assert(s);
+
+ if (s->timer_event_source) {
+ r = sd_event_source_set_time(s->timer_event_source, usec);
+ if (r < 0)
+ return r;
+
+ return sd_event_source_set_enabled(s->timer_event_source, SD_EVENT_ONESHOT);
+ }
+
+ if (usec == USEC_INFINITY)
+ return 0;
+
+ r = sd_event_add_time(
+ UNIT(s)->manager->event,
+ &s->timer_event_source,
+ CLOCK_MONOTONIC,
+ usec, 0,
+ scope_dispatch_timer, s);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(s->timer_event_source, "scope-timer");
+
+ return 0;
+}
+
+static void scope_set_state(Scope *s, ScopeState state) {
+ ScopeState old_state;
+ assert(s);
+
+ if (s->state != state)
+ bus_unit_send_pending_change_signal(UNIT(s), false);
+
+ old_state = s->state;
+ s->state = state;
+
+ if (!IN_SET(state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+
+ if (IN_SET(state, SCOPE_DEAD, SCOPE_FAILED)) {
+ unit_unwatch_all_pids(UNIT(s));
+ unit_dequeue_rewatch_pids(UNIT(s));
+ }
+
+ if (state != old_state)
+ log_debug("%s changed %s -> %s", UNIT(s)->id, scope_state_to_string(old_state), scope_state_to_string(state));
+
+ unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], 0);
+}
+
+static int scope_add_default_dependencies(Scope *s) {
+ int r;
+
+ assert(s);
+
+ if (!UNIT(s)->default_dependencies)
+ return 0;
+
+ /* Make sure scopes are unloaded on shutdown */
+ r = unit_add_two_dependencies_by_name(
+ UNIT(s),
+ UNIT_BEFORE, UNIT_CONFLICTS,
+ SPECIAL_SHUTDOWN_TARGET, true,
+ UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int scope_verify(Scope *s) {
+ assert(s);
+ assert(UNIT(s)->load_state == UNIT_LOADED);
+
+ if (set_isempty(UNIT(s)->pids) &&
+ !MANAGER_IS_RELOADING(UNIT(s)->manager) &&
+ !unit_has_name(UNIT(s), SPECIAL_INIT_SCOPE)) {
+ log_unit_error(UNIT(s), "Scope has no PIDs. Refusing.");
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int scope_load_init_scope(Unit *u) {
+ assert(u);
+
+ if (!unit_has_name(u, SPECIAL_INIT_SCOPE))
+ return 0;
+
+ u->transient = true;
+ u->perpetual = true;
+
+ /* init.scope is a bit special, as it has to stick around forever. Because of its special semantics we
+ * synthesize it here, instead of relying on the unit file on disk. */
+
+ u->default_dependencies = false;
+
+ /* Prettify things, if we can. */
+ if (!u->description)
+ u->description = strdup("System and Service Manager");
+ if (!u->documentation)
+ (void) strv_extend(&u->documentation, "man:systemd(1)");
+
+ return 1;
+}
+
+static int scope_add_extras(Scope *s) {
+ int r;
+
+ r = unit_patch_contexts(UNIT(s));
+ if (r < 0)
+ return r;
+
+ r = unit_set_default_slice(UNIT(s));
+ if (r < 0)
+ return r;
+
+ return scope_add_default_dependencies(s);
+}
+
+static int scope_load(Unit *u) {
+ Scope *s = SCOPE(u);
+ int r;
+
+ assert(s);
+ assert(u->load_state == UNIT_STUB);
+
+ if (!u->transient && !MANAGER_IS_RELOADING(u->manager))
+ /* Refuse to load non-transient scope units, but allow them while reloading. */
+ return -ENOENT;
+
+ r = scope_load_init_scope(u);
+ if (r < 0)
+ return r;
+
+ r = unit_load_fragment_and_dropin(u, false);
+ if (r < 0)
+ return r;
+
+ if (u->load_state != UNIT_LOADED)
+ return 0;
+
+ r = scope_add_extras(s);
+ if (r < 0)
+ return r;
+
+ return scope_verify(s);
+}
+
+static usec_t scope_coldplug_timeout(Scope *s) {
+ assert(s);
+
+ switch (s->deserialized_state) {
+
+ case SCOPE_RUNNING:
+ return usec_add(UNIT(s)->active_enter_timestamp.monotonic, s->runtime_max_usec);
+
+ case SCOPE_STOP_SIGKILL:
+ case SCOPE_STOP_SIGTERM:
+ return usec_add(UNIT(s)->state_change_timestamp.monotonic, s->timeout_stop_usec);
+
+ default:
+ return USEC_INFINITY;
+ }
+}
+
+static int scope_coldplug(Unit *u) {
+ Scope *s = SCOPE(u);
+ int r;
+
+ assert(s);
+ assert(s->state == SCOPE_DEAD);
+
+ if (s->deserialized_state == s->state)
+ return 0;
+
+ r = scope_arm_timer(s, scope_coldplug_timeout(s));
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(s->deserialized_state, SCOPE_DEAD, SCOPE_FAILED)) {
+ if (u->pids) {
+ void *pidp;
+
+ SET_FOREACH(pidp, u->pids) {
+ r = unit_watch_pid(u, PTR_TO_PID(pidp), false);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+ } else
+ (void) unit_enqueue_rewatch_pids(u);
+ }
+
+ bus_scope_track_controller(s);
+
+ scope_set_state(s, s->deserialized_state);
+ return 0;
+}
+
+static void scope_dump(Unit *u, FILE *f, const char *prefix) {
+ Scope *s = SCOPE(u);
+ char buf_runtime[FORMAT_TIMESPAN_MAX];
+
+ assert(s);
+ assert(f);
+
+ fprintf(f,
+ "%sScope State: %s\n"
+ "%sResult: %s\n"
+ "%sRuntimeMaxSec: %s\n",
+ prefix, scope_state_to_string(s->state),
+ prefix, scope_result_to_string(s->result),
+ prefix, format_timespan(buf_runtime, sizeof(buf_runtime), s->runtime_max_usec, USEC_PER_SEC));
+
+ cgroup_context_dump(UNIT(s), f, prefix);
+ kill_context_dump(&s->kill_context, f, prefix);
+}
+
+static void scope_enter_dead(Scope *s, ScopeResult f) {
+ assert(s);
+
+ if (s->result == SCOPE_SUCCESS)
+ s->result = f;
+
+ unit_log_result(UNIT(s), s->result == SCOPE_SUCCESS, scope_result_to_string(s->result));
+ scope_set_state(s, s->result != SCOPE_SUCCESS ? SCOPE_FAILED : SCOPE_DEAD);
+}
+
+static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) {
+ bool skip_signal = false;
+ int r;
+
+ assert(s);
+
+ if (s->result == SCOPE_SUCCESS)
+ s->result = f;
+
+ /* Before sending any signal, make sure we track all members of this cgroup */
+ (void) unit_watch_all_pids(UNIT(s));
+
+ /* Also, enqueue a job that we recheck all our PIDs a bit later, given that it's likely some processes have
+ * died now */
+ (void) unit_enqueue_rewatch_pids(UNIT(s));
+
+ /* If we have a controller set let's ask the controller nicely to terminate the scope, instead of us going
+ * directly into SIGTERM berserk mode */
+ if (state == SCOPE_STOP_SIGTERM)
+ skip_signal = bus_scope_send_request_stop(s) > 0;
+
+ if (skip_signal)
+ r = 1; /* wait */
+ else {
+ r = unit_kill_context(
+ UNIT(s),
+ &s->kill_context,
+ state != SCOPE_STOP_SIGTERM ? KILL_KILL :
+ s->was_abandoned ? KILL_TERMINATE_AND_LOG :
+ KILL_TERMINATE,
+ -1, -1, false);
+ if (r < 0)
+ goto fail;
+ }
+
+ if (r > 0) {
+ r = scope_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_stop_usec));
+ if (r < 0)
+ goto fail;
+
+ scope_set_state(s, state);
+ } else if (state == SCOPE_STOP_SIGTERM)
+ scope_enter_signal(s, SCOPE_STOP_SIGKILL, SCOPE_SUCCESS);
+ else
+ scope_enter_dead(s, SCOPE_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to kill processes: %m");
+
+ scope_enter_dead(s, SCOPE_FAILURE_RESOURCES);
+}
+
+static int scope_start(Unit *u) {
+ Scope *s = SCOPE(u);
+ int r;
+
+ assert(s);
+
+ if (unit_has_name(u, SPECIAL_INIT_SCOPE))
+ return -EPERM;
+
+ if (s->state == SCOPE_FAILED)
+ return -EPERM;
+
+ /* We can't fulfill this right now, please try again later */
+ if (IN_SET(s->state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
+ return -EAGAIN;
+
+ assert(s->state == SCOPE_DEAD);
+
+ if (!u->transient && !MANAGER_IS_RELOADING(u->manager))
+ return -ENOENT;
+
+ (void) bus_scope_track_controller(s);
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ (void) unit_realize_cgroup(u);
+ (void) unit_reset_accounting(u);
+
+ unit_export_state_files(u);
+
+ r = unit_attach_pids_to_cgroup(u, u->pids, NULL);
+ if (r < 0) {
+ log_unit_warning_errno(u, r, "Failed to add PIDs to scope's control group: %m");
+ scope_enter_dead(s, SCOPE_FAILURE_RESOURCES);
+ return r;
+ }
+
+ s->result = SCOPE_SUCCESS;
+
+ scope_set_state(s, SCOPE_RUNNING);
+
+ /* Set the maximum runtime timeout. */
+ scope_arm_timer(s, usec_add(UNIT(s)->active_enter_timestamp.monotonic, s->runtime_max_usec));
+
+ /* On unified we use proper notifications hence we can unwatch the PIDs
+ * we just attached to the scope. This can also be done on legacy as
+ * we're going to update the list of the processes we watch with the
+ * PIDs currently in the scope anyway. */
+ unit_unwatch_all_pids(u);
+
+ /* Start watching the PIDs currently in the scope (legacy hierarchy only) */
+ (void) unit_enqueue_rewatch_pids(u);
+ return 1;
+}
+
+static int scope_stop(Unit *u) {
+ Scope *s = SCOPE(u);
+
+ assert(s);
+
+ if (IN_SET(s->state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
+ return 0;
+
+ assert(IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED));
+
+ scope_enter_signal(s, SCOPE_STOP_SIGTERM, SCOPE_SUCCESS);
+ return 1;
+}
+
+static void scope_reset_failed(Unit *u) {
+ Scope *s = SCOPE(u);
+
+ assert(s);
+
+ if (s->state == SCOPE_FAILED)
+ scope_set_state(s, SCOPE_DEAD);
+
+ s->result = SCOPE_SUCCESS;
+}
+
+static int scope_kill(Unit *u, KillWho who, int signo, sd_bus_error *error) {
+ return unit_kill_common(u, who, signo, -1, -1, error);
+}
+
+static int scope_get_timeout(Unit *u, usec_t *timeout) {
+ Scope *s = SCOPE(u);
+ usec_t t;
+ int r;
+
+ if (!s->timer_event_source)
+ return 0;
+
+ r = sd_event_source_get_time(s->timer_event_source, &t);
+ if (r < 0)
+ return r;
+ if (t == USEC_INFINITY)
+ return 0;
+
+ *timeout = t;
+ return 1;
+}
+
+static int scope_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Scope *s = SCOPE(u);
+ void *pidp;
+
+ assert(s);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", scope_state_to_string(s->state));
+ (void) serialize_bool(f, "was-abandoned", s->was_abandoned);
+
+ if (s->controller)
+ (void) serialize_item(f, "controller", s->controller);
+
+ SET_FOREACH(pidp, u->pids)
+ serialize_item_format(f, "pids", PID_FMT, PTR_TO_PID(pidp));
+
+ return 0;
+}
+
+static int scope_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Scope *s = SCOPE(u);
+ int r;
+
+ assert(u);
+ assert(key);
+ assert(value);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ ScopeState state;
+
+ state = scope_state_from_string(value);
+ if (state < 0)
+ log_unit_debug(u, "Failed to parse state value: %s", value);
+ else
+ s->deserialized_state = state;
+
+ } else if (streq(key, "was-abandoned")) {
+ int k;
+
+ k = parse_boolean(value);
+ if (k < 0)
+ log_unit_debug(u, "Failed to parse boolean value: %s", value);
+ else
+ s->was_abandoned = k;
+ } else if (streq(key, "controller")) {
+
+ r = free_and_strdup(&s->controller, value);
+ if (r < 0)
+ return log_oom();
+
+ } else if (streq(key, "pids")) {
+ pid_t pid;
+
+ if (parse_pid(value, &pid) < 0)
+ log_unit_debug(u, "Failed to parse pids value: %s", value);
+ else {
+ r = set_ensure_allocated(&u->pids, NULL);
+ if (r < 0)
+ return r;
+
+ r = set_put(u->pids, PID_TO_PTR(pid));
+ if (r < 0)
+ return r;
+ }
+ } else
+ log_unit_debug(u, "Unknown serialization key: %s", key);
+
+ return 0;
+}
+
+static void scope_notify_cgroup_empty_event(Unit *u) {
+ Scope *s = SCOPE(u);
+ assert(u);
+
+ log_unit_debug(u, "cgroup is empty");
+
+ if (IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
+ scope_enter_dead(s, SCOPE_SUCCESS);
+
+ /* If the cgroup empty notification comes when the unit is not active, we must have failed to clean
+ * up the cgroup earlier and should do it now. */
+ if (IN_SET(s->state, SCOPE_DEAD, SCOPE_FAILED))
+ unit_prune_cgroup(u);
+}
+
+static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
+ assert(u);
+
+ /* If we get a SIGCHLD event for one of the processes we were interested in, then we look for others to
+ * watch, under the assumption that we'll sooner or later get a SIGCHLD for them, as the original
+ * process we watched was probably the parent of them, and they are hence now our children. */
+
+ (void) unit_enqueue_rewatch_pids(u);
+}
+
+static int scope_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
+ Scope *s = SCOPE(userdata);
+
+ assert(s);
+ assert(s->timer_event_source == source);
+
+ switch (s->state) {
+
+ case SCOPE_RUNNING:
+ log_unit_warning(UNIT(s), "Scope reached runtime time limit. Stopping.");
+ scope_enter_signal(s, SCOPE_STOP_SIGTERM, SCOPE_FAILURE_TIMEOUT);
+ break;
+
+ case SCOPE_STOP_SIGTERM:
+ if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "Stopping timed out. Killing.");
+ scope_enter_signal(s, SCOPE_STOP_SIGKILL, SCOPE_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "Stopping timed out. Skipping SIGKILL.");
+ scope_enter_dead(s, SCOPE_FAILURE_TIMEOUT);
+ }
+
+ break;
+
+ case SCOPE_STOP_SIGKILL:
+ log_unit_warning(UNIT(s), "Still around after SIGKILL. Ignoring.");
+ scope_enter_dead(s, SCOPE_FAILURE_TIMEOUT);
+ break;
+
+ default:
+ assert_not_reached("Timeout at wrong time.");
+ }
+
+ return 0;
+}
+
+int scope_abandon(Scope *s) {
+ assert(s);
+
+ if (unit_has_name(UNIT(s), SPECIAL_INIT_SCOPE))
+ return -EPERM;
+
+ if (!IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED))
+ return -ESTALE;
+
+ s->was_abandoned = true;
+
+ s->controller = mfree(s->controller);
+ s->controller_track = sd_bus_track_unref(s->controller_track);
+
+ scope_set_state(s, SCOPE_ABANDONED);
+
+ /* The client is no longer watching the remaining processes, so let's step in here, under the assumption that
+ * the remaining processes will be sooner or later reassigned to us as parent. */
+ (void) unit_enqueue_rewatch_pids(UNIT(s));
+
+ return 0;
+}
+
+_pure_ static UnitActiveState scope_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[SCOPE(u)->state];
+}
+
+_pure_ static const char *scope_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return scope_state_to_string(SCOPE(u)->state);
+}
+
+static void scope_enumerate_perpetual(Manager *m) {
+ Unit *u;
+ int r;
+
+ assert(m);
+
+ /* Let's unconditionally add the "init.scope" special unit
+ * that encapsulates PID 1. Note that PID 1 already is in the
+ * cgroup for this, we hence just need to allocate the object
+ * for it and that's it. */
+
+ u = manager_get_unit(m, SPECIAL_INIT_SCOPE);
+ if (!u) {
+ r = unit_new_for_name(m, sizeof(Scope), SPECIAL_INIT_SCOPE, &u);
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate the special " SPECIAL_INIT_SCOPE " unit: %m");
+ return;
+ }
+ }
+
+ u->transient = true;
+ u->perpetual = true;
+ SCOPE(u)->deserialized_state = SCOPE_RUNNING;
+
+ unit_add_to_load_queue(u);
+ unit_add_to_dbus_queue(u);
+}
+
+static const char* const scope_result_table[_SCOPE_RESULT_MAX] = {
+ [SCOPE_SUCCESS] = "success",
+ [SCOPE_FAILURE_RESOURCES] = "resources",
+ [SCOPE_FAILURE_TIMEOUT] = "timeout",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(scope_result, ScopeResult);
+
+const UnitVTable scope_vtable = {
+ .object_size = sizeof(Scope),
+ .cgroup_context_offset = offsetof(Scope, cgroup_context),
+ .kill_context_offset = offsetof(Scope, kill_context),
+
+ .sections =
+ "Unit\0"
+ "Scope\0"
+ "Install\0",
+ .private_section = "Scope",
+
+ .can_transient = true,
+ .can_delegate = true,
+ .can_fail = true,
+ .once_only = true,
+ .can_set_managed_oom = true,
+
+ .init = scope_init,
+ .load = scope_load,
+ .done = scope_done,
+
+ .coldplug = scope_coldplug,
+
+ .dump = scope_dump,
+
+ .start = scope_start,
+ .stop = scope_stop,
+
+ .kill = scope_kill,
+
+ .freeze = unit_freeze_vtable_common,
+ .thaw = unit_thaw_vtable_common,
+
+ .get_timeout = scope_get_timeout,
+
+ .serialize = scope_serialize,
+ .deserialize_item = scope_deserialize_item,
+
+ .active_state = scope_active_state,
+ .sub_state_to_string = scope_sub_state_to_string,
+
+ .sigchld_event = scope_sigchld_event,
+
+ .reset_failed = scope_reset_failed,
+
+ .notify_cgroup_empty = scope_notify_cgroup_empty_event,
+
+ .bus_set_property = bus_scope_set_property,
+ .bus_commit_properties = bus_scope_commit_properties,
+
+ .enumerate_perpetual = scope_enumerate_perpetual,
+};
diff --git a/src/core/scope.h b/src/core/scope.h
new file mode 100644
index 0000000..5f791b7
--- /dev/null
+++ b/src/core/scope.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct Scope Scope;
+
+#include "cgroup.h"
+#include "kill.h"
+#include "unit.h"
+
+typedef enum ScopeResult {
+ SCOPE_SUCCESS,
+ SCOPE_FAILURE_RESOURCES,
+ SCOPE_FAILURE_TIMEOUT,
+ _SCOPE_RESULT_MAX,
+ _SCOPE_RESULT_INVALID = -1
+} ScopeResult;
+
+struct Scope {
+ Unit meta;
+
+ CGroupContext cgroup_context;
+ KillContext kill_context;
+
+ ScopeState state, deserialized_state;
+ ScopeResult result;
+
+ usec_t runtime_max_usec;
+ usec_t timeout_stop_usec;
+
+ char *controller;
+ sd_bus_track *controller_track;
+
+ bool was_abandoned;
+
+ sd_event_source *timer_event_source;
+};
+
+extern const UnitVTable scope_vtable;
+
+int scope_abandon(Scope *s);
+
+const char* scope_result_to_string(ScopeResult i) _const_;
+ScopeResult scope_result_from_string(const char *s) _pure_;
+
+DEFINE_CAST(SCOPE, Scope);
diff --git a/src/core/selinux-access.c b/src/core/selinux-access.c
new file mode 100644
index 0000000..18f6fb5
--- /dev/null
+++ b/src/core/selinux-access.c
@@ -0,0 +1,292 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "selinux-access.h"
+
+#if HAVE_SELINUX
+
+#include <errno.h>
+#include <selinux/avc.h>
+#include <selinux/selinux.h>
+#if HAVE_AUDIT
+#include <libaudit.h>
+#endif
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "audit-fd.h"
+#include "bus-util.h"
+#include "errno-util.h"
+#include "format-util.h"
+#include "log.h"
+#include "path-util.h"
+#include "selinux-util.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "util.h"
+
+static bool initialized = false;
+
+struct audit_info {
+ sd_bus_creds *creds;
+ const char *path;
+ const char *cmdline;
+};
+
+/*
+ Any time an access gets denied this callback will be called
+ with the audit data. We then need to just copy the audit data into the msgbuf.
+*/
+static int audit_callback(
+ void *auditdata,
+ security_class_t cls,
+ char *msgbuf,
+ size_t msgbufsize) {
+
+ const struct audit_info *audit = auditdata;
+ uid_t uid = 0, login_uid = 0;
+ gid_t gid = 0;
+ char login_uid_buf[DECIMAL_STR_MAX(uid_t) + 1] = "n/a";
+ char uid_buf[DECIMAL_STR_MAX(uid_t) + 1] = "n/a";
+ char gid_buf[DECIMAL_STR_MAX(gid_t) + 1] = "n/a";
+
+ if (sd_bus_creds_get_audit_login_uid(audit->creds, &login_uid) >= 0)
+ xsprintf(login_uid_buf, UID_FMT, login_uid);
+ if (sd_bus_creds_get_euid(audit->creds, &uid) >= 0)
+ xsprintf(uid_buf, UID_FMT, uid);
+ if (sd_bus_creds_get_egid(audit->creds, &gid) >= 0)
+ xsprintf(gid_buf, GID_FMT, gid);
+
+ snprintf(msgbuf, msgbufsize,
+ "auid=%s uid=%s gid=%s%s%s%s%s%s%s",
+ login_uid_buf, uid_buf, gid_buf,
+ audit->path ? " path=\"" : "", strempty(audit->path), audit->path ? "\"" : "",
+ audit->cmdline ? " cmdline=\"" : "", strempty(audit->cmdline), audit->cmdline ? "\"" : "");
+
+ return 0;
+}
+
+static int callback_type_to_priority(int type) {
+ switch(type) {
+
+ case SELINUX_ERROR:
+ return LOG_ERR;
+
+ case SELINUX_WARNING:
+ return LOG_WARNING;
+
+ case SELINUX_INFO:
+ return LOG_INFO;
+
+ case SELINUX_AVC:
+ default:
+ return LOG_NOTICE;
+ }
+}
+
+/*
+ libselinux uses this callback when access gets denied or other
+ events happen. If audit is turned on, messages will be reported
+ using audit netlink, otherwise they will be logged using the usual
+ channels.
+
+ Code copied from dbus and modified.
+*/
+_printf_(2, 3) static int log_callback(int type, const char *fmt, ...) {
+ va_list ap;
+ const char *fmt2;
+
+#if HAVE_AUDIT
+ int fd;
+
+ fd = get_audit_fd();
+
+ if (fd >= 0) {
+ _cleanup_free_ char *buf = NULL;
+ int r;
+
+ va_start(ap, fmt);
+ r = vasprintf(&buf, fmt, ap);
+ va_end(ap);
+
+ if (r >= 0) {
+ if (type == SELINUX_AVC)
+ audit_log_user_avc_message(get_audit_fd(), AUDIT_USER_AVC, buf, NULL, NULL, NULL, 0);
+ else if (type == SELINUX_ERROR)
+ audit_log_user_avc_message(get_audit_fd(), AUDIT_USER_SELINUX_ERR, buf, NULL, NULL, NULL, 0);
+
+ return 0;
+ }
+ }
+#endif
+
+ fmt2 = strjoina("selinux: ", fmt);
+
+ va_start(ap, fmt);
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ log_internalv(LOG_AUTH | callback_type_to_priority(type),
+ 0, PROJECT_FILE, __LINE__, __FUNCTION__,
+ fmt2, ap);
+ REENABLE_WARNING;
+ va_end(ap);
+
+ return 0;
+}
+
+static int access_init(sd_bus_error *error) {
+
+ if (!mac_selinux_use())
+ return 0;
+
+ if (initialized)
+ return 1;
+
+ if (avc_open(NULL, 0) != 0) {
+ int saved_errno = errno;
+ bool enforce;
+
+ enforce = security_getenforce() != 0;
+ log_full_errno(enforce ? LOG_ERR : LOG_WARNING, saved_errno, "Failed to open the SELinux AVC: %m");
+
+ /* If enforcement isn't on, then let's suppress this
+ * error, and just don't do any AVC checks. The
+ * warning we printed is hence all the admin will
+ * see. */
+ if (!enforce)
+ return 0;
+
+ /* Return an access denied error, if we couldn't load
+ * the AVC but enforcing mode was on, or we couldn't
+ * determine whether it is one. */
+ return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Failed to open the SELinux AVC: %s", strerror_safe(saved_errno));
+ }
+
+ selinux_set_callback(SELINUX_CB_AUDIT, (union selinux_callback) audit_callback);
+ selinux_set_callback(SELINUX_CB_LOG, (union selinux_callback) log_callback);
+
+ initialized = true;
+ return 1;
+}
+
+/*
+ This function communicates with the kernel to check whether or not it should
+ allow the access.
+ If the machine is in permissive mode it will return ok. Audit messages will
+ still be generated if the access would be denied in enforcing mode.
+*/
+int mac_selinux_generic_access_check(
+ sd_bus_message *message,
+ const char *path,
+ const char *permission,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ const char *tclass, *scon;
+ _cleanup_free_ char *cl = NULL;
+ _cleanup_freecon_ char *fcon = NULL;
+ char **cmdline = NULL;
+ bool enforce;
+ int r = 0;
+
+ assert(message);
+ assert(permission);
+ assert(error);
+
+ r = access_init(error);
+ if (r <= 0)
+ return r;
+
+ /* delay call until we checked in `access_init()` if SELinux is actually enabled */
+ enforce = mac_selinux_enforcing();
+
+ r = sd_bus_query_sender_creds(
+ message,
+ SD_BUS_CREDS_PID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_EGID|
+ SD_BUS_CREDS_CMDLINE|SD_BUS_CREDS_AUDIT_LOGIN_UID|
+ SD_BUS_CREDS_SELINUX_CONTEXT|
+ SD_BUS_CREDS_AUGMENT /* get more bits from /proc */,
+ &creds);
+ if (r < 0)
+ return r;
+
+ /* The SELinux context is something we really should have
+ * gotten directly from the message or sender, and not be an
+ * augmented field. If it was augmented we cannot use it for
+ * authorization, since this is racy and vulnerable. Let's add
+ * an extra check, just in case, even though this really
+ * shouldn't be possible. */
+ assert_return((sd_bus_creds_get_augmented_mask(creds) & SD_BUS_CREDS_SELINUX_CONTEXT) == 0, -EPERM);
+
+ r = sd_bus_creds_get_selinux_context(creds, &scon);
+ if (r < 0)
+ return r;
+
+ if (path) {
+ /* Get the file context of the unit file */
+
+ if (getfilecon_raw(path, &fcon) < 0) {
+ r = -errno;
+
+ log_warning_errno(r, "SELinux getfilecon_raw() on '%s' failed%s (perm=%s): %m",
+ path,
+ enforce ? "" : ", ignoring",
+ permission);
+ if (!enforce)
+ return 0;
+
+ return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Failed to get file context on %s.", path);
+ }
+
+ tclass = "service";
+
+ } else {
+ if (getcon_raw(&fcon) < 0) {
+ r = -errno;
+
+ log_warning_errno(r, "SELinux getcon_raw() failed%s (perm=%s): %m",
+ enforce ? "" : ", ignoring",
+ permission);
+ if (!enforce)
+ return 0;
+
+ return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Failed to get current context.");
+ }
+
+ tclass = "system";
+ }
+
+ sd_bus_creds_get_cmdline(creds, &cmdline);
+ cl = strv_join(cmdline, " ");
+
+ struct audit_info audit_info = {
+ .creds = creds,
+ .path = path,
+ .cmdline = cl,
+ };
+
+ r = selinux_check_access(scon, fcon, tclass, permission, &audit_info);
+ if (r < 0) {
+ r = errno_or_else(EPERM);
+
+ if (enforce)
+ sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "SELinux policy denies access.");
+ }
+
+ log_debug_errno(r, "SELinux access check scon=%s tcon=%s tclass=%s perm=%s state=%s path=%s cmdline=%s: %m",
+ scon, fcon, tclass, permission, enforce ? "enforcing" : "permissive", path, cl);
+ return enforce ? r : 0;
+}
+
+#else /* HAVE_SELINUX */
+
+int mac_selinux_generic_access_check(
+ sd_bus_message *message,
+ const char *path,
+ const char *permission,
+ sd_bus_error *error) {
+
+ return 0;
+}
+
+#endif /* HAVE_SELINUX */
diff --git a/src/core/selinux-access.h b/src/core/selinux-access.h
new file mode 100644
index 0000000..c6bfb32
--- /dev/null
+++ b/src/core/selinux-access.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "manager.h"
+
+int mac_selinux_generic_access_check(sd_bus_message *message, const char *path, const char *permission, sd_bus_error *error);
+
+#define mac_selinux_access_check(message, permission, error) \
+ mac_selinux_generic_access_check((message), NULL, (permission), (error))
+
+#define mac_selinux_unit_access_check(unit, message, permission, error) \
+ mac_selinux_generic_access_check((message), unit_label_path(unit), (permission), (error))
diff --git a/src/core/selinux-setup.c b/src/core/selinux-setup.c
new file mode 100644
index 0000000..1ac05b8
--- /dev/null
+++ b/src/core/selinux-setup.c
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#if HAVE_SELINUX
+#include <selinux/selinux.h>
+#endif
+
+#include "log.h"
+#include "macro.h"
+#include "selinux-setup.h"
+#include "selinux-util.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "util.h"
+
+#if HAVE_SELINUX
+_printf_(2,3)
+static int null_log(int type, const char *fmt, ...) {
+ return 0;
+}
+#endif
+
+int mac_selinux_setup(bool *loaded_policy) {
+
+#if HAVE_SELINUX
+ int enforce = 0;
+ usec_t before_load, after_load;
+ char *con;
+ int r;
+ static const union selinux_callback cb = {
+ .func_log = null_log,
+ };
+
+ bool initialized = false;
+
+ assert(loaded_policy);
+
+ /* Turn off all of SELinux' own logging, we want to do that */
+ selinux_set_callback(SELINUX_CB_LOG, cb);
+
+ /* Don't load policy in the initrd if we don't appear to have
+ * it. For the real root, we check below if we've already
+ * loaded policy, and return gracefully.
+ */
+ if (in_initrd() && access(selinux_path(), F_OK) < 0)
+ return 0;
+
+ /* Already initialized by somebody else? */
+ r = getcon_raw(&con);
+ /* getcon_raw can return 0, and still give us a NULL pointer if
+ * /proc/self/attr/current is empty. SELinux guarantees this won't
+ * happen, but that file isn't specific to SELinux, and may be provided
+ * by some other arbitrary LSM with different semantics. */
+ if (r == 0 && con) {
+ initialized = !streq(con, "kernel");
+ freecon(con);
+ }
+
+ /* Make sure we have no fds open while loading the policy and
+ * transitioning */
+ log_close();
+
+ /* Now load the policy */
+ before_load = now(CLOCK_MONOTONIC);
+ r = selinux_init_load_policy(&enforce);
+ if (r == 0) {
+ _cleanup_(mac_selinux_freep) char *label = NULL;
+ char timespan[FORMAT_TIMESPAN_MAX];
+
+ mac_selinux_retest();
+
+ /* Transition to the new context */
+ r = mac_selinux_get_create_label_from_exe(SYSTEMD_BINARY_PATH, &label);
+ if (r < 0 || !label) {
+ log_open();
+ log_error("Failed to compute init label, ignoring.");
+ } else {
+ r = setcon_raw(label);
+
+ log_open();
+ if (r < 0)
+ log_error("Failed to transition into init label '%s', ignoring.", label);
+ }
+
+ after_load = now(CLOCK_MONOTONIC);
+
+ log_info("Successfully loaded SELinux policy in %s.",
+ format_timespan(timespan, sizeof(timespan), after_load - before_load, 0));
+
+ *loaded_policy = true;
+
+ } else {
+ log_open();
+
+ if (enforce > 0) {
+ if (!initialized)
+ return log_emergency_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to load SELinux policy.");
+
+ log_warning("Failed to load new SELinux policy. Continuing with old policy.");
+ } else
+ log_debug("Unable to load SELinux policy. Ignoring.");
+ }
+#endif
+
+ return 0;
+}
diff --git a/src/core/selinux-setup.h b/src/core/selinux-setup.h
new file mode 100644
index 0000000..cdff51d
--- /dev/null
+++ b/src/core/selinux-setup.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+int mac_selinux_setup(bool *loaded_policy);
diff --git a/src/core/service.c b/src/core/service.c
new file mode 100644
index 0000000..d7bdeb7
--- /dev/null
+++ b/src/core/service.c
@@ -0,0 +1,4612 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "async.h"
+#include "bus-error.h"
+#include "bus-kernel.h"
+#include "bus-util.h"
+#include "dbus-service.h"
+#include "dbus-unit.h"
+#include "def.h"
+#include "env-util.h"
+#include "escape.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "load-dropin.h"
+#include "load-fragment.h"
+#include "log.h"
+#include "manager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "serialize.h"
+#include "service.h"
+#include "signal-util.h"
+#include "special.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "unit.h"
+#include "utf8.h"
+#include "util.h"
+
+static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = {
+ [SERVICE_DEAD] = UNIT_INACTIVE,
+ [SERVICE_CONDITION] = UNIT_ACTIVATING,
+ [SERVICE_START_PRE] = UNIT_ACTIVATING,
+ [SERVICE_START] = UNIT_ACTIVATING,
+ [SERVICE_START_POST] = UNIT_ACTIVATING,
+ [SERVICE_RUNNING] = UNIT_ACTIVE,
+ [SERVICE_EXITED] = UNIT_ACTIVE,
+ [SERVICE_RELOAD] = UNIT_RELOADING,
+ [SERVICE_STOP] = UNIT_DEACTIVATING,
+ [SERVICE_STOP_WATCHDOG] = UNIT_DEACTIVATING,
+ [SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING,
+ [SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING,
+ [SERVICE_STOP_POST] = UNIT_DEACTIVATING,
+ [SERVICE_FINAL_WATCHDOG] = UNIT_DEACTIVATING,
+ [SERVICE_FINAL_SIGTERM] = UNIT_DEACTIVATING,
+ [SERVICE_FINAL_SIGKILL] = UNIT_DEACTIVATING,
+ [SERVICE_FAILED] = UNIT_FAILED,
+ [SERVICE_AUTO_RESTART] = UNIT_ACTIVATING,
+ [SERVICE_CLEANING] = UNIT_MAINTENANCE,
+};
+
+/* For Type=idle we never want to delay any other jobs, hence we
+ * consider idle jobs active as soon as we start working on them */
+static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] = {
+ [SERVICE_DEAD] = UNIT_INACTIVE,
+ [SERVICE_CONDITION] = UNIT_ACTIVE,
+ [SERVICE_START_PRE] = UNIT_ACTIVE,
+ [SERVICE_START] = UNIT_ACTIVE,
+ [SERVICE_START_POST] = UNIT_ACTIVE,
+ [SERVICE_RUNNING] = UNIT_ACTIVE,
+ [SERVICE_EXITED] = UNIT_ACTIVE,
+ [SERVICE_RELOAD] = UNIT_RELOADING,
+ [SERVICE_STOP] = UNIT_DEACTIVATING,
+ [SERVICE_STOP_WATCHDOG] = UNIT_DEACTIVATING,
+ [SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING,
+ [SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING,
+ [SERVICE_STOP_POST] = UNIT_DEACTIVATING,
+ [SERVICE_FINAL_WATCHDOG] = UNIT_DEACTIVATING,
+ [SERVICE_FINAL_SIGTERM] = UNIT_DEACTIVATING,
+ [SERVICE_FINAL_SIGKILL] = UNIT_DEACTIVATING,
+ [SERVICE_FAILED] = UNIT_FAILED,
+ [SERVICE_AUTO_RESTART] = UNIT_ACTIVATING,
+ [SERVICE_CLEANING] = UNIT_MAINTENANCE,
+};
+
+static int service_dispatch_inotify_io(sd_event_source *source, int fd, uint32_t events, void *userdata);
+static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
+static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void *userdata);
+static int service_dispatch_exec_io(sd_event_source *source, int fd, uint32_t events, void *userdata);
+
+static void service_enter_signal(Service *s, ServiceState state, ServiceResult f);
+static void service_enter_reload_by_notify(Service *s);
+
+static void service_init(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ s->timeout_start_usec = u->manager->default_timeout_start_usec;
+ s->timeout_stop_usec = u->manager->default_timeout_stop_usec;
+ s->timeout_abort_usec = u->manager->default_timeout_abort_usec;
+ s->timeout_abort_set = u->manager->default_timeout_abort_set;
+ s->restart_usec = u->manager->default_restart_usec;
+ s->runtime_max_usec = USEC_INFINITY;
+ s->type = _SERVICE_TYPE_INVALID;
+ s->socket_fd = -1;
+ s->stdin_fd = s->stdout_fd = s->stderr_fd = -1;
+ s->guess_main_pid = true;
+
+ s->control_command_id = _SERVICE_EXEC_COMMAND_INVALID;
+
+ s->exec_context.keyring_mode = MANAGER_IS_SYSTEM(u->manager) ?
+ EXEC_KEYRING_PRIVATE : EXEC_KEYRING_INHERIT;
+
+ s->watchdog_original_usec = USEC_INFINITY;
+
+ s->oom_policy = _OOM_POLICY_INVALID;
+}
+
+static void service_unwatch_control_pid(Service *s) {
+ assert(s);
+
+ if (s->control_pid <= 0)
+ return;
+
+ unit_unwatch_pid(UNIT(s), s->control_pid);
+ s->control_pid = 0;
+}
+
+static void service_unwatch_main_pid(Service *s) {
+ assert(s);
+
+ if (s->main_pid <= 0)
+ return;
+
+ unit_unwatch_pid(UNIT(s), s->main_pid);
+ s->main_pid = 0;
+}
+
+static void service_unwatch_pid_file(Service *s) {
+ if (!s->pid_file_pathspec)
+ return;
+
+ log_unit_debug(UNIT(s), "Stopping watch for PID file %s", s->pid_file_pathspec->path);
+ path_spec_unwatch(s->pid_file_pathspec);
+ path_spec_done(s->pid_file_pathspec);
+ s->pid_file_pathspec = mfree(s->pid_file_pathspec);
+}
+
+static int service_set_main_pid(Service *s, pid_t pid) {
+ assert(s);
+
+ if (pid <= 1)
+ return -EINVAL;
+
+ if (pid == getpid_cached())
+ return -EINVAL;
+
+ if (s->main_pid == pid && s->main_pid_known)
+ return 0;
+
+ if (s->main_pid != pid) {
+ service_unwatch_main_pid(s);
+ exec_status_start(&s->main_exec_status, pid);
+ }
+
+ s->main_pid = pid;
+ s->main_pid_known = true;
+ s->main_pid_alien = pid_is_my_child(pid) == 0;
+
+ if (s->main_pid_alien)
+ log_unit_warning(UNIT(s), "Supervising process "PID_FMT" which is not our child. We'll most likely not notice when it exits.", pid);
+
+ return 0;
+}
+
+void service_close_socket_fd(Service *s) {
+ assert(s);
+
+ /* Undo the effect of service_set_socket_fd(). */
+
+ s->socket_fd = asynchronous_close(s->socket_fd);
+
+ if (UNIT_ISSET(s->accept_socket)) {
+ socket_connection_unref(SOCKET(UNIT_DEREF(s->accept_socket)));
+ unit_ref_unset(&s->accept_socket);
+ }
+}
+
+static void service_stop_watchdog(Service *s) {
+ assert(s);
+
+ s->watchdog_event_source = sd_event_source_unref(s->watchdog_event_source);
+ s->watchdog_timestamp = DUAL_TIMESTAMP_NULL;
+}
+
+static void service_start_watchdog(Service *s) {
+ usec_t watchdog_usec;
+ int r;
+
+ assert(s);
+
+ watchdog_usec = service_get_watchdog_usec(s);
+ if (IN_SET(watchdog_usec, 0, USEC_INFINITY)) {
+ service_stop_watchdog(s);
+ return;
+ }
+
+ if (s->watchdog_event_source) {
+ r = sd_event_source_set_time(s->watchdog_event_source, usec_add(s->watchdog_timestamp.monotonic, watchdog_usec));
+ if (r < 0) {
+ log_unit_warning_errno(UNIT(s), r, "Failed to reset watchdog timer: %m");
+ return;
+ }
+
+ r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ONESHOT);
+ } else {
+ r = sd_event_add_time(
+ UNIT(s)->manager->event,
+ &s->watchdog_event_source,
+ CLOCK_MONOTONIC,
+ usec_add(s->watchdog_timestamp.monotonic, watchdog_usec), 0,
+ service_dispatch_watchdog, s);
+ if (r < 0) {
+ log_unit_warning_errno(UNIT(s), r, "Failed to add watchdog timer: %m");
+ return;
+ }
+
+ (void) sd_event_source_set_description(s->watchdog_event_source, "service-watchdog");
+
+ /* Let's process everything else which might be a sign
+ * of living before we consider a service died. */
+ r = sd_event_source_set_priority(s->watchdog_event_source, SD_EVENT_PRIORITY_IDLE);
+ }
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "Failed to install watchdog timer: %m");
+}
+
+static void service_extend_event_source_timeout(Service *s, sd_event_source *source, usec_t extended) {
+ usec_t current;
+ int r;
+
+ assert(s);
+
+ /* Extends the specified event source timer to at least the specified time, unless it is already later
+ * anyway. */
+
+ if (!source)
+ return;
+
+ r = sd_event_source_get_time(source, &current);
+ if (r < 0) {
+ const char *desc;
+ (void) sd_event_source_get_description(s->timer_event_source, &desc);
+ log_unit_warning_errno(UNIT(s), r, "Failed to retrieve timeout time for event source '%s', ignoring: %m", strna(desc));
+ return;
+ }
+
+ if (current >= extended) /* Current timeout is already longer, ignore this. */
+ return;
+
+ r = sd_event_source_set_time(source, extended);
+ if (r < 0) {
+ const char *desc;
+ (void) sd_event_source_get_description(s->timer_event_source, &desc);
+ log_unit_warning_errno(UNIT(s), r, "Failed to set timeout time for even source '%s', ignoring %m", strna(desc));
+ }
+}
+
+static void service_extend_timeout(Service *s, usec_t extend_timeout_usec) {
+ usec_t extended;
+
+ assert(s);
+
+ if (IN_SET(extend_timeout_usec, 0, USEC_INFINITY))
+ return;
+
+ extended = usec_add(now(CLOCK_MONOTONIC), extend_timeout_usec);
+
+ service_extend_event_source_timeout(s, s->timer_event_source, extended);
+ service_extend_event_source_timeout(s, s->watchdog_event_source, extended);
+}
+
+static void service_reset_watchdog(Service *s) {
+ assert(s);
+
+ dual_timestamp_get(&s->watchdog_timestamp);
+ service_start_watchdog(s);
+}
+
+static void service_override_watchdog_timeout(Service *s, usec_t watchdog_override_usec) {
+ assert(s);
+
+ s->watchdog_override_enable = true;
+ s->watchdog_override_usec = watchdog_override_usec;
+ service_reset_watchdog(s);
+
+ log_unit_debug(UNIT(s), "watchdog_usec="USEC_FMT, s->watchdog_usec);
+ log_unit_debug(UNIT(s), "watchdog_override_usec="USEC_FMT, s->watchdog_override_usec);
+}
+
+static void service_fd_store_unlink(ServiceFDStore *fs) {
+
+ if (!fs)
+ return;
+
+ if (fs->service) {
+ assert(fs->service->n_fd_store > 0);
+ LIST_REMOVE(fd_store, fs->service->fd_store, fs);
+ fs->service->n_fd_store--;
+ }
+
+ sd_event_source_disable_unref(fs->event_source);
+
+ free(fs->fdname);
+ safe_close(fs->fd);
+ free(fs);
+}
+
+static void service_release_fd_store(Service *s) {
+ assert(s);
+
+ if (s->n_keep_fd_store > 0)
+ return;
+
+ log_unit_debug(UNIT(s), "Releasing all stored fds");
+ while (s->fd_store)
+ service_fd_store_unlink(s->fd_store);
+
+ assert(s->n_fd_store == 0);
+}
+
+static void service_release_resources(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ if (!s->fd_store && s->stdin_fd < 0 && s->stdout_fd < 0 && s->stderr_fd < 0)
+ return;
+
+ log_unit_debug(u, "Releasing resources.");
+
+ s->stdin_fd = safe_close(s->stdin_fd);
+ s->stdout_fd = safe_close(s->stdout_fd);
+ s->stderr_fd = safe_close(s->stderr_fd);
+
+ service_release_fd_store(s);
+}
+
+static void service_done(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ s->pid_file = mfree(s->pid_file);
+ s->status_text = mfree(s->status_text);
+
+ s->exec_runtime = exec_runtime_unref(s->exec_runtime, false);
+ exec_command_free_array(s->exec_command, _SERVICE_EXEC_COMMAND_MAX);
+ s->control_command = NULL;
+ s->main_command = NULL;
+
+ dynamic_creds_unref(&s->dynamic_creds);
+
+ exit_status_set_free(&s->restart_prevent_status);
+ exit_status_set_free(&s->restart_force_status);
+ exit_status_set_free(&s->success_status);
+
+ /* This will leak a process, but at least no memory or any of
+ * our resources */
+ service_unwatch_main_pid(s);
+ service_unwatch_control_pid(s);
+ service_unwatch_pid_file(s);
+
+ if (s->bus_name) {
+ unit_unwatch_bus_name(u, s->bus_name);
+ s->bus_name = mfree(s->bus_name);
+ }
+
+ s->bus_name_owner = mfree(s->bus_name_owner);
+
+ s->usb_function_descriptors = mfree(s->usb_function_descriptors);
+ s->usb_function_strings = mfree(s->usb_function_strings);
+
+ service_close_socket_fd(s);
+ s->peer = socket_peer_unref(s->peer);
+
+ unit_ref_unset(&s->accept_socket);
+
+ service_stop_watchdog(s);
+
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+ s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source);
+
+ service_release_resources(u);
+}
+
+static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
+ ServiceFDStore *fs = userdata;
+
+ assert(e);
+ assert(fs);
+
+ /* If we get either EPOLLHUP or EPOLLERR, it's time to remove this entry from the fd store */
+ log_unit_debug(UNIT(fs->service),
+ "Received %s on stored fd %d (%s), closing.",
+ revents & EPOLLERR ? "EPOLLERR" : "EPOLLHUP",
+ fs->fd, strna(fs->fdname));
+ service_fd_store_unlink(fs);
+ return 0;
+}
+
+static int service_add_fd_store(Service *s, int fd, const char *name, bool do_poll) {
+ ServiceFDStore *fs;
+ int r;
+
+ /* fd is always consumed if we return >= 0 */
+
+ assert(s);
+ assert(fd >= 0);
+
+ if (s->n_fd_store >= s->n_fd_store_max)
+ return -EXFULL; /* Our store is full.
+ * Use this errno rather than E[NM]FILE to distinguish from
+ * the case where systemd itself hits the file limit. */
+
+ LIST_FOREACH(fd_store, fs, s->fd_store) {
+ r = same_fd(fs->fd, fd);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ safe_close(fd);
+ return 0; /* fd already included */
+ }
+ }
+
+ fs = new(ServiceFDStore, 1);
+ if (!fs)
+ return -ENOMEM;
+
+ *fs = (ServiceFDStore) {
+ .fd = fd,
+ .service = s,
+ .do_poll = do_poll,
+ .fdname = strdup(name ?: "stored"),
+ };
+
+ if (!fs->fdname) {
+ free(fs);
+ return -ENOMEM;
+ }
+
+ if (do_poll) {
+ r = sd_event_add_io(UNIT(s)->manager->event, &fs->event_source, fd, 0, on_fd_store_io, fs);
+ if (r < 0 && r != -EPERM) { /* EPERM indicates fds that aren't pollable, which is OK */
+ free(fs->fdname);
+ free(fs);
+ return r;
+ } else if (r >= 0)
+ (void) sd_event_source_set_description(fs->event_source, "service-fd-store");
+ }
+
+ LIST_PREPEND(fd_store, s->fd_store, fs);
+ s->n_fd_store++;
+
+ return 1; /* fd newly stored */
+}
+
+static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name, bool do_poll) {
+ int r;
+
+ assert(s);
+
+ while (fdset_size(fds) > 0) {
+ _cleanup_close_ int fd = -1;
+
+ fd = fdset_steal_first(fds);
+ if (fd < 0)
+ break;
+
+ r = service_add_fd_store(s, fd, name, do_poll);
+ if (r == -EXFULL)
+ return log_unit_warning_errno(UNIT(s), r,
+ "Cannot store more fds than FileDescriptorStoreMax=%u, closing remaining.",
+ s->n_fd_store_max);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to add fd to store: %m");
+ if (r > 0)
+ log_unit_debug(UNIT(s), "Added fd %u (%s) to fd store.", fd, strna(name));
+ fd = -1;
+ }
+
+ return 0;
+}
+
+static void service_remove_fd_store(Service *s, const char *name) {
+ ServiceFDStore *fs, *n;
+
+ assert(s);
+ assert(name);
+
+ LIST_FOREACH_SAFE(fd_store, fs, n, s->fd_store) {
+ if (!streq(fs->fdname, name))
+ continue;
+
+ log_unit_debug(UNIT(s), "Got explicit request to remove fd %i (%s), closing.", fs->fd, name);
+ service_fd_store_unlink(fs);
+ }
+}
+
+static int service_arm_timer(Service *s, usec_t usec) {
+ int r;
+
+ assert(s);
+
+ if (s->timer_event_source) {
+ r = sd_event_source_set_time(s->timer_event_source, usec);
+ if (r < 0)
+ return r;
+
+ return sd_event_source_set_enabled(s->timer_event_source, SD_EVENT_ONESHOT);
+ }
+
+ if (usec == USEC_INFINITY)
+ return 0;
+
+ r = sd_event_add_time(
+ UNIT(s)->manager->event,
+ &s->timer_event_source,
+ CLOCK_MONOTONIC,
+ usec, 0,
+ service_dispatch_timer, s);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(s->timer_event_source, "service-timer");
+
+ return 0;
+}
+
+static int service_verify(Service *s) {
+ assert(s);
+ assert(UNIT(s)->load_state == UNIT_LOADED);
+
+ if (!s->exec_command[SERVICE_EXEC_START] && !s->exec_command[SERVICE_EXEC_STOP]
+ && UNIT(s)->success_action == EMERGENCY_ACTION_NONE) {
+ /* FailureAction= only makes sense if one of the start or stop commands is specified.
+ * SuccessAction= will be executed unconditionally if no commands are specified. Hence,
+ * either a command or SuccessAction= are required. */
+
+ log_unit_error(UNIT(s), "Service has no ExecStart=, ExecStop=, or SuccessAction=. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->type != SERVICE_ONESHOT && !s->exec_command[SERVICE_EXEC_START]) {
+ log_unit_error(UNIT(s), "Service has no ExecStart= setting, which is only allowed for Type=oneshot services. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (!s->remain_after_exit && !s->exec_command[SERVICE_EXEC_START] && UNIT(s)->success_action == EMERGENCY_ACTION_NONE) {
+ log_unit_error(UNIT(s), "Service has no ExecStart= and no SuccessAction= settings and does not have RemainAfterExit=yes set. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->type != SERVICE_ONESHOT && s->exec_command[SERVICE_EXEC_START]->command_next) {
+ log_unit_error(UNIT(s), "Service has more than one ExecStart= setting, which is only allowed for Type=oneshot services. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->type == SERVICE_ONESHOT
+ && !IN_SET(s->restart, SERVICE_RESTART_NO, SERVICE_RESTART_ON_FAILURE, SERVICE_RESTART_ON_ABNORMAL, SERVICE_RESTART_ON_WATCHDOG, SERVICE_RESTART_ON_ABORT)) {
+ log_unit_error(UNIT(s), "Service has Restart= set to either always or on-success, which isn't allowed for Type=oneshot services. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->type == SERVICE_ONESHOT && !exit_status_set_is_empty(&s->restart_force_status)) {
+ log_unit_error(UNIT(s), "Service has RestartForceStatus= set, which isn't allowed for Type=oneshot services. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->type == SERVICE_DBUS && !s->bus_name) {
+ log_unit_error(UNIT(s), "Service is of type D-Bus but no D-Bus service name has been specified. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->exec_context.pam_name && !IN_SET(s->kill_context.kill_mode, KILL_CONTROL_GROUP, KILL_MIXED)) {
+ log_unit_error(UNIT(s), "Service has PAM enabled. Kill mode must be set to 'control-group' or 'mixed'. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->usb_function_descriptors && !s->usb_function_strings)
+ log_unit_warning(UNIT(s), "Service has USBFunctionDescriptors= setting, but no USBFunctionStrings=. Ignoring.");
+
+ if (!s->usb_function_descriptors && s->usb_function_strings)
+ log_unit_warning(UNIT(s), "Service has USBFunctionStrings= setting, but no USBFunctionDescriptors=. Ignoring.");
+
+ if (s->runtime_max_usec != USEC_INFINITY && s->type == SERVICE_ONESHOT)
+ log_unit_warning(UNIT(s), "RuntimeMaxSec= has no effect in combination with Type=oneshot. Ignoring.");
+
+ return 0;
+}
+
+static int service_add_default_dependencies(Service *s) {
+ int r;
+
+ assert(s);
+
+ if (!UNIT(s)->default_dependencies)
+ return 0;
+
+ /* Add a number of automatic dependencies useful for the
+ * majority of services. */
+
+ if (MANAGER_IS_SYSTEM(UNIT(s)->manager)) {
+ /* First, pull in the really early boot stuff, and
+ * require it, so that we fail if we can't acquire
+ * it. */
+
+ r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+ } else {
+
+ /* In the --user instance there's no sysinit.target,
+ * in that case require basic.target instead. */
+
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_REQUIRES, SPECIAL_BASIC_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+ }
+
+ /* Second, if the rest of the base system is in the same
+ * transaction, order us after it, but do not pull it in or
+ * even require it. */
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_BASIC_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ /* Third, add us in for normal shutdown. */
+ return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+}
+
+static void service_fix_stdio(Service *s) {
+ assert(s);
+
+ /* Note that EXEC_INPUT_NULL and EXEC_OUTPUT_INHERIT play a special role here: they are both the
+ * default value that is subject to automatic overriding triggered by other settings and an explicit
+ * choice the user can make. We don't distinguish between these cases currently. */
+
+ if (s->exec_context.std_input == EXEC_INPUT_NULL &&
+ s->exec_context.stdin_data_size > 0)
+ s->exec_context.std_input = EXEC_INPUT_DATA;
+
+ if (IN_SET(s->exec_context.std_input,
+ EXEC_INPUT_TTY,
+ EXEC_INPUT_TTY_FORCE,
+ EXEC_INPUT_TTY_FAIL,
+ EXEC_INPUT_SOCKET,
+ EXEC_INPUT_NAMED_FD))
+ return;
+
+ /* We assume these listed inputs refer to bidirectional streams, and hence duplicating them from
+ * stdin to stdout/stderr makes sense and hence leaving EXEC_OUTPUT_INHERIT in place makes sense,
+ * too. Outputs such as regular files or sealed data memfds otoh don't really make sense to be
+ * duplicated for both input and output at the same time (since they then would cause a feedback
+ * loop), hence override EXEC_OUTPUT_INHERIT with the default stderr/stdout setting. */
+
+ if (s->exec_context.std_error == EXEC_OUTPUT_INHERIT &&
+ s->exec_context.std_output == EXEC_OUTPUT_INHERIT)
+ s->exec_context.std_error = UNIT(s)->manager->default_std_error;
+
+ if (s->exec_context.std_output == EXEC_OUTPUT_INHERIT)
+ s->exec_context.std_output = UNIT(s)->manager->default_std_output;
+}
+
+static int service_setup_bus_name(Service *s) {
+ int r;
+
+ assert(s);
+
+ if (s->type != SERVICE_DBUS)
+ return 0;
+
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_REQUIRES, SPECIAL_DBUS_SOCKET, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to add dependency on " SPECIAL_DBUS_SOCKET ": %m");
+
+ /* We always want to be ordered against dbus.socket if both are in the transaction. */
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_DBUS_SOCKET, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to add dependency on " SPECIAL_DBUS_SOCKET ": %m");
+
+ r = unit_watch_bus_name(UNIT(s), s->bus_name);
+ if (r == -EEXIST)
+ return log_unit_error_errno(UNIT(s), r, "Two services allocated for the same bus name %s, refusing operation.", s->bus_name);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Cannot watch bus name %s: %m", s->bus_name);
+
+ return 0;
+}
+
+static int service_add_extras(Service *s) {
+ int r;
+
+ assert(s);
+
+ if (s->type == _SERVICE_TYPE_INVALID) {
+ /* Figure out a type automatically */
+ if (s->bus_name)
+ s->type = SERVICE_DBUS;
+ else if (s->exec_command[SERVICE_EXEC_START])
+ s->type = SERVICE_SIMPLE;
+ else
+ s->type = SERVICE_ONESHOT;
+ }
+
+ /* Oneshot services have disabled start timeout by default */
+ if (s->type == SERVICE_ONESHOT && !s->start_timeout_defined)
+ s->timeout_start_usec = USEC_INFINITY;
+
+ service_fix_stdio(s);
+
+ r = unit_patch_contexts(UNIT(s));
+ if (r < 0)
+ return r;
+
+ r = unit_add_exec_dependencies(UNIT(s), &s->exec_context);
+ if (r < 0)
+ return r;
+
+ r = unit_set_default_slice(UNIT(s));
+ if (r < 0)
+ return r;
+
+ /* If the service needs the notify socket, let's enable it automatically. */
+ if (s->notify_access == NOTIFY_NONE &&
+ (s->type == SERVICE_NOTIFY || s->watchdog_usec > 0 || s->n_fd_store_max > 0))
+ s->notify_access = NOTIFY_MAIN;
+
+ /* If no OOM policy was explicitly set, then default to the configure default OOM policy. Except when
+ * delegation is on, in that case it we assume the payload knows better what to do and can process
+ * things in a more focused way. */
+ if (s->oom_policy < 0)
+ s->oom_policy = s->cgroup_context.delegate ? OOM_CONTINUE : UNIT(s)->manager->default_oom_policy;
+
+ /* Let the kernel do the killing if that's requested. */
+ s->cgroup_context.memory_oom_group = s->oom_policy == OOM_KILL;
+
+ r = service_add_default_dependencies(s);
+ if (r < 0)
+ return r;
+
+ r = service_setup_bus_name(s);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int service_load(Unit *u) {
+ Service *s = SERVICE(u);
+ int r;
+
+ r = unit_load_fragment_and_dropin(u, true);
+ if (r < 0)
+ return r;
+
+ if (u->load_state != UNIT_LOADED)
+ return 0;
+
+ /* This is a new unit? Then let's add in some extras */
+ r = service_add_extras(s);
+ if (r < 0)
+ return r;
+
+ return service_verify(s);
+}
+
+static void service_dump(Unit *u, FILE *f, const char *prefix) {
+ char buf_restart[FORMAT_TIMESPAN_MAX], buf_start[FORMAT_TIMESPAN_MAX], buf_stop[FORMAT_TIMESPAN_MAX],
+ buf_runtime[FORMAT_TIMESPAN_MAX], buf_watchdog[FORMAT_TIMESPAN_MAX], buf_abort[FORMAT_TIMESPAN_MAX];
+ ServiceExecCommand c;
+ Service *s = SERVICE(u);
+ const char *prefix2;
+
+ assert(s);
+
+ prefix = strempty(prefix);
+ prefix2 = strjoina(prefix, "\t");
+
+ fprintf(f,
+ "%sService State: %s\n"
+ "%sResult: %s\n"
+ "%sReload Result: %s\n"
+ "%sClean Result: %s\n"
+ "%sPermissionsStartOnly: %s\n"
+ "%sRootDirectoryStartOnly: %s\n"
+ "%sRemainAfterExit: %s\n"
+ "%sGuessMainPID: %s\n"
+ "%sType: %s\n"
+ "%sRestart: %s\n"
+ "%sNotifyAccess: %s\n"
+ "%sNotifyState: %s\n"
+ "%sOOMPolicy: %s\n",
+ prefix, service_state_to_string(s->state),
+ prefix, service_result_to_string(s->result),
+ prefix, service_result_to_string(s->reload_result),
+ prefix, service_result_to_string(s->clean_result),
+ prefix, yes_no(s->permissions_start_only),
+ prefix, yes_no(s->root_directory_start_only),
+ prefix, yes_no(s->remain_after_exit),
+ prefix, yes_no(s->guess_main_pid),
+ prefix, service_type_to_string(s->type),
+ prefix, service_restart_to_string(s->restart),
+ prefix, notify_access_to_string(s->notify_access),
+ prefix, notify_state_to_string(s->notify_state),
+ prefix, oom_policy_to_string(s->oom_policy));
+
+ if (s->control_pid > 0)
+ fprintf(f,
+ "%sControl PID: "PID_FMT"\n",
+ prefix, s->control_pid);
+
+ if (s->main_pid > 0)
+ fprintf(f,
+ "%sMain PID: "PID_FMT"\n"
+ "%sMain PID Known: %s\n"
+ "%sMain PID Alien: %s\n",
+ prefix, s->main_pid,
+ prefix, yes_no(s->main_pid_known),
+ prefix, yes_no(s->main_pid_alien));
+
+ if (s->pid_file)
+ fprintf(f,
+ "%sPIDFile: %s\n",
+ prefix, s->pid_file);
+
+ if (s->bus_name)
+ fprintf(f,
+ "%sBusName: %s\n"
+ "%sBus Name Good: %s\n",
+ prefix, s->bus_name,
+ prefix, yes_no(s->bus_name_good));
+
+ if (UNIT_ISSET(s->accept_socket))
+ fprintf(f,
+ "%sAccept Socket: %s\n",
+ prefix, UNIT_DEREF(s->accept_socket)->id);
+
+ fprintf(f,
+ "%sRestartSec: %s\n"
+ "%sTimeoutStartSec: %s\n"
+ "%sTimeoutStopSec: %s\n"
+ "%sTimeoutStartFailureMode: %s\n"
+ "%sTimeoutStopFailureMode: %s\n",
+ prefix, format_timespan(buf_restart, sizeof(buf_restart), s->restart_usec, USEC_PER_SEC),
+ prefix, format_timespan(buf_start, sizeof(buf_start), s->timeout_start_usec, USEC_PER_SEC),
+ prefix, format_timespan(buf_stop, sizeof(buf_stop), s->timeout_stop_usec, USEC_PER_SEC),
+ prefix, service_timeout_failure_mode_to_string(s->timeout_start_failure_mode),
+ prefix, service_timeout_failure_mode_to_string(s->timeout_stop_failure_mode));
+
+ if (s->timeout_abort_set)
+ fprintf(f,
+ "%sTimeoutAbortSec: %s\n",
+ prefix, format_timespan(buf_abort, sizeof(buf_abort), s->timeout_abort_usec, USEC_PER_SEC));
+
+ fprintf(f,
+ "%sRuntimeMaxSec: %s\n"
+ "%sWatchdogSec: %s\n",
+ prefix, format_timespan(buf_runtime, sizeof(buf_runtime), s->runtime_max_usec, USEC_PER_SEC),
+ prefix, format_timespan(buf_watchdog, sizeof(buf_watchdog), s->watchdog_usec, USEC_PER_SEC));
+
+ kill_context_dump(&s->kill_context, f, prefix);
+ exec_context_dump(&s->exec_context, f, prefix);
+
+ for (c = 0; c < _SERVICE_EXEC_COMMAND_MAX; c++) {
+
+ if (!s->exec_command[c])
+ continue;
+
+ fprintf(f, "%s-> %s:\n",
+ prefix, service_exec_command_to_string(c));
+
+ exec_command_dump_list(s->exec_command[c], f, prefix2);
+ }
+
+ if (s->status_text)
+ fprintf(f, "%sStatus Text: %s\n",
+ prefix, s->status_text);
+
+ if (s->n_fd_store_max > 0)
+ fprintf(f,
+ "%sFile Descriptor Store Max: %u\n"
+ "%sFile Descriptor Store Current: %zu\n",
+ prefix, s->n_fd_store_max,
+ prefix, s->n_fd_store);
+
+ cgroup_context_dump(UNIT(s), f, prefix);
+}
+
+static int service_is_suitable_main_pid(Service *s, pid_t pid, int prio) {
+ Unit *owner;
+
+ assert(s);
+ assert(pid_is_valid(pid));
+
+ /* Checks whether the specified PID is suitable as main PID for this service. returns negative if not, 0 if the
+ * PID is questionnable but should be accepted if the source of configuration is trusted. > 0 if the PID is
+ * good */
+
+ if (pid == getpid_cached() || pid == 1) {
+ log_unit_full(UNIT(s), prio, "New main PID "PID_FMT" is the manager, refusing.", pid);
+ return -EPERM;
+ }
+
+ if (pid == s->control_pid) {
+ log_unit_full(UNIT(s), prio, "New main PID "PID_FMT" is the control process, refusing.", pid);
+ return -EPERM;
+ }
+
+ if (!pid_is_alive(pid)) {
+ log_unit_full(UNIT(s), prio, "New main PID "PID_FMT" does not exist or is a zombie.", pid);
+ return -ESRCH;
+ }
+
+ owner = manager_get_unit_by_pid(UNIT(s)->manager, pid);
+ if (owner == UNIT(s)) {
+ log_unit_debug(UNIT(s), "New main PID "PID_FMT" belongs to service, we are happy.", pid);
+ return 1; /* Yay, it's definitely a good PID */
+ }
+
+ return 0; /* Hmm it's a suspicious PID, let's accept it if configuration source is trusted */
+}
+
+static int service_load_pid_file(Service *s, bool may_warn) {
+ char procfs[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ bool questionable_pid_file = false;
+ _cleanup_free_ char *k = NULL;
+ _cleanup_close_ int fd = -1;
+ int r, prio;
+ pid_t pid;
+
+ assert(s);
+
+ if (!s->pid_file)
+ return -ENOENT;
+
+ prio = may_warn ? LOG_INFO : LOG_DEBUG;
+
+ r = chase_symlinks(s->pid_file, NULL, CHASE_SAFE, NULL, &fd);
+ if (r == -ENOLINK) {
+ log_unit_debug_errno(UNIT(s), r,
+ "Potentially unsafe symlink chain, will now retry with relaxed checks: %s", s->pid_file);
+
+ questionable_pid_file = true;
+
+ r = chase_symlinks(s->pid_file, NULL, 0, NULL, &fd);
+ }
+ if (r < 0)
+ return log_unit_full_errno(UNIT(s), prio, fd,
+ "Can't open PID file %s (yet?) after %s: %m", s->pid_file, service_state_to_string(s->state));
+
+ /* Let's read the PID file now that we chased it down. But we need to convert the O_PATH fd
+ * chase_symlinks() returned us into a proper fd first. */
+ xsprintf(procfs, "/proc/self/fd/%i", fd);
+ r = read_one_line_file(procfs, &k);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r,
+ "Can't convert PID files %s O_PATH file descriptor to proper file descriptor: %m",
+ s->pid_file);
+
+ r = parse_pid(k, &pid);
+ if (r < 0)
+ return log_unit_full_errno(UNIT(s), prio, r, "Failed to parse PID from file %s: %m", s->pid_file);
+
+ if (s->main_pid_known && pid == s->main_pid)
+ return 0;
+
+ r = service_is_suitable_main_pid(s, pid, prio);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ struct stat st;
+
+ if (questionable_pid_file) {
+ log_unit_error(UNIT(s), "Refusing to accept PID outside of service control group, acquired through unsafe symlink chain: %s", s->pid_file);
+ return -EPERM;
+ }
+
+ /* Hmm, it's not clear if the new main PID is safe. Let's allow this if the PID file is owned by root */
+
+ if (fstat(fd, &st) < 0)
+ return log_unit_error_errno(UNIT(s), errno, "Failed to fstat() PID file O_PATH fd: %m");
+
+ if (st.st_uid != 0) {
+ log_unit_error(UNIT(s), "New main PID "PID_FMT" does not belong to service, and PID file is not owned by root. Refusing.", pid);
+ return -EPERM;
+ }
+
+ log_unit_debug(UNIT(s), "New main PID "PID_FMT" does not belong to service, but we'll accept it since PID file is owned by root.", pid);
+ }
+
+ if (s->main_pid_known) {
+ log_unit_debug(UNIT(s), "Main PID changing: "PID_FMT" -> "PID_FMT, s->main_pid, pid);
+
+ service_unwatch_main_pid(s);
+ s->main_pid_known = false;
+ } else
+ log_unit_debug(UNIT(s), "Main PID loaded: "PID_FMT, pid);
+
+ r = service_set_main_pid(s, pid);
+ if (r < 0)
+ return r;
+
+ r = unit_watch_pid(UNIT(s), pid, false);
+ if (r < 0) /* FIXME: we need to do something here */
+ return log_unit_warning_errno(UNIT(s), r, "Failed to watch PID "PID_FMT" for service: %m", pid);
+
+ return 1;
+}
+
+static void service_search_main_pid(Service *s) {
+ pid_t pid = 0;
+ int r;
+
+ assert(s);
+
+ /* If we know it anyway, don't ever fall back to unreliable
+ * heuristics */
+ if (s->main_pid_known)
+ return;
+
+ if (!s->guess_main_pid)
+ return;
+
+ assert(s->main_pid <= 0);
+
+ if (unit_search_main_pid(UNIT(s), &pid) < 0)
+ return;
+
+ log_unit_debug(UNIT(s), "Main PID guessed: "PID_FMT, pid);
+ if (service_set_main_pid(s, pid) < 0)
+ return;
+
+ r = unit_watch_pid(UNIT(s), pid, false);
+ if (r < 0)
+ /* FIXME: we need to do something here */
+ log_unit_warning_errno(UNIT(s), r, "Failed to watch PID "PID_FMT" from: %m", pid);
+}
+
+static void service_set_state(Service *s, ServiceState state) {
+ ServiceState old_state;
+ const UnitActiveState *table;
+
+ assert(s);
+
+ if (s->state != state)
+ bus_unit_send_pending_change_signal(UNIT(s), false);
+
+ table = s->type == SERVICE_IDLE ? state_translation_table_idle : state_translation_table;
+
+ old_state = s->state;
+ s->state = state;
+
+ service_unwatch_pid_file(s);
+
+ if (!IN_SET(state,
+ SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
+ SERVICE_RUNNING,
+ SERVICE_RELOAD,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
+ SERVICE_AUTO_RESTART,
+ SERVICE_CLEANING))
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+
+ if (!IN_SET(state,
+ SERVICE_START, SERVICE_START_POST,
+ SERVICE_RUNNING, SERVICE_RELOAD,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
+ service_unwatch_main_pid(s);
+ s->main_command = NULL;
+ }
+
+ if (!IN_SET(state,
+ SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
+ SERVICE_RELOAD,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
+ SERVICE_CLEANING)) {
+ service_unwatch_control_pid(s);
+ s->control_command = NULL;
+ s->control_command_id = _SERVICE_EXEC_COMMAND_INVALID;
+ }
+
+ if (IN_SET(state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART)) {
+ unit_unwatch_all_pids(UNIT(s));
+ unit_dequeue_rewatch_pids(UNIT(s));
+ }
+
+ if (!IN_SET(state,
+ SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
+ SERVICE_RUNNING, SERVICE_RELOAD,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL) &&
+ !(state == SERVICE_DEAD && UNIT(s)->job))
+ service_close_socket_fd(s);
+
+ if (state != SERVICE_START)
+ s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source);
+
+ if (!IN_SET(state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD))
+ service_stop_watchdog(s);
+
+ /* For the inactive states unit_notify() will trim the cgroup,
+ * but for exit we have to do that ourselves... */
+ if (state == SERVICE_EXITED && !MANAGER_IS_RELOADING(UNIT(s)->manager))
+ unit_prune_cgroup(UNIT(s));
+
+ if (old_state != state)
+ log_unit_debug(UNIT(s), "Changed %s -> %s", service_state_to_string(old_state), service_state_to_string(state));
+
+ unit_notify(UNIT(s), table[old_state], table[state],
+ (s->reload_result == SERVICE_SUCCESS ? 0 : UNIT_NOTIFY_RELOAD_FAILURE) |
+ (s->will_auto_restart ? UNIT_NOTIFY_WILL_AUTO_RESTART : 0) |
+ (s->result == SERVICE_SKIP_CONDITION ? UNIT_NOTIFY_SKIP_CONDITION : 0));
+}
+
+static usec_t service_coldplug_timeout(Service *s) {
+ assert(s);
+
+ switch (s->deserialized_state) {
+
+ case SERVICE_CONDITION:
+ case SERVICE_START_PRE:
+ case SERVICE_START:
+ case SERVICE_START_POST:
+ case SERVICE_RELOAD:
+ return usec_add(UNIT(s)->state_change_timestamp.monotonic, s->timeout_start_usec);
+
+ case SERVICE_RUNNING:
+ return usec_add(UNIT(s)->active_enter_timestamp.monotonic, s->runtime_max_usec);
+
+ case SERVICE_STOP:
+ case SERVICE_STOP_SIGTERM:
+ case SERVICE_STOP_SIGKILL:
+ case SERVICE_STOP_POST:
+ case SERVICE_FINAL_SIGTERM:
+ case SERVICE_FINAL_SIGKILL:
+ return usec_add(UNIT(s)->state_change_timestamp.monotonic, s->timeout_stop_usec);
+
+ case SERVICE_STOP_WATCHDOG:
+ case SERVICE_FINAL_WATCHDOG:
+ return usec_add(UNIT(s)->state_change_timestamp.monotonic, service_timeout_abort_usec(s));
+
+ case SERVICE_AUTO_RESTART:
+ return usec_add(UNIT(s)->inactive_enter_timestamp.monotonic, s->restart_usec);
+
+ case SERVICE_CLEANING:
+ return usec_add(UNIT(s)->state_change_timestamp.monotonic, s->exec_context.timeout_clean_usec);
+
+ default:
+ return USEC_INFINITY;
+ }
+}
+
+static int service_coldplug(Unit *u) {
+ Service *s = SERVICE(u);
+ int r;
+
+ assert(s);
+ assert(s->state == SERVICE_DEAD);
+
+ if (s->deserialized_state == s->state)
+ return 0;
+
+ r = service_arm_timer(s, service_coldplug_timeout(s));
+ if (r < 0)
+ return r;
+
+ if (s->main_pid > 0 &&
+ pid_is_unwaited(s->main_pid) &&
+ (IN_SET(s->deserialized_state,
+ SERVICE_START, SERVICE_START_POST,
+ SERVICE_RUNNING, SERVICE_RELOAD,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))) {
+ r = unit_watch_pid(UNIT(s), s->main_pid, false);
+ if (r < 0)
+ return r;
+ }
+
+ if (s->control_pid > 0 &&
+ pid_is_unwaited(s->control_pid) &&
+ IN_SET(s->deserialized_state,
+ SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
+ SERVICE_RELOAD,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
+ SERVICE_CLEANING)) {
+ r = unit_watch_pid(UNIT(s), s->control_pid, false);
+ if (r < 0)
+ return r;
+ }
+
+ if (!IN_SET(s->deserialized_state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART, SERVICE_CLEANING)) {
+ (void) unit_enqueue_rewatch_pids(u);
+ (void) unit_setup_dynamic_creds(u);
+ (void) unit_setup_exec_runtime(u);
+ }
+
+ if (IN_SET(s->deserialized_state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD))
+ service_start_watchdog(s);
+
+ if (UNIT_ISSET(s->accept_socket)) {
+ Socket* socket = SOCKET(UNIT_DEREF(s->accept_socket));
+
+ if (socket->max_connections_per_source > 0) {
+ SocketPeer *peer;
+
+ /* Make a best-effort attempt at bumping the connection count */
+ if (socket_acquire_peer(socket, s->socket_fd, &peer) > 0) {
+ socket_peer_unref(s->peer);
+ s->peer = peer;
+ }
+ }
+ }
+
+ service_set_state(s, s->deserialized_state);
+ return 0;
+}
+
+static int service_collect_fds(
+ Service *s,
+ int **fds,
+ char ***fd_names,
+ size_t *n_socket_fds,
+ size_t *n_storage_fds) {
+
+ _cleanup_strv_free_ char **rfd_names = NULL;
+ _cleanup_free_ int *rfds = NULL;
+ size_t rn_socket_fds = 0, rn_storage_fds = 0;
+ int r;
+
+ assert(s);
+ assert(fds);
+ assert(fd_names);
+ assert(n_socket_fds);
+ assert(n_storage_fds);
+
+ if (s->socket_fd >= 0) {
+
+ /* Pass the per-connection socket */
+
+ rfds = new(int, 1);
+ if (!rfds)
+ return -ENOMEM;
+ rfds[0] = s->socket_fd;
+
+ rfd_names = strv_new("connection");
+ if (!rfd_names)
+ return -ENOMEM;
+
+ rn_socket_fds = 1;
+ } else {
+ void *v;
+ Unit *u;
+
+ /* Pass all our configured sockets for singleton services */
+
+ HASHMAP_FOREACH_KEY(v, u, UNIT(s)->dependencies[UNIT_TRIGGERED_BY]) {
+ _cleanup_free_ int *cfds = NULL;
+ Socket *sock;
+ int cn_fds;
+
+ if (u->type != UNIT_SOCKET)
+ continue;
+
+ sock = SOCKET(u);
+
+ cn_fds = socket_collect_fds(sock, &cfds);
+ if (cn_fds < 0)
+ return cn_fds;
+
+ if (cn_fds <= 0)
+ continue;
+
+ if (!rfds) {
+ rfds = TAKE_PTR(cfds);
+ rn_socket_fds = cn_fds;
+ } else {
+ int *t;
+
+ t = reallocarray(rfds, rn_socket_fds + cn_fds, sizeof(int));
+ if (!t)
+ return -ENOMEM;
+
+ memcpy(t + rn_socket_fds, cfds, cn_fds * sizeof(int));
+
+ rfds = t;
+ rn_socket_fds += cn_fds;
+ }
+
+ r = strv_extend_n(&rfd_names, socket_fdname(sock), cn_fds);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (s->n_fd_store > 0) {
+ ServiceFDStore *fs;
+ size_t n_fds;
+ char **nl;
+ int *t;
+
+ t = reallocarray(rfds, rn_socket_fds + s->n_fd_store, sizeof(int));
+ if (!t)
+ return -ENOMEM;
+
+ rfds = t;
+
+ nl = reallocarray(rfd_names, rn_socket_fds + s->n_fd_store + 1, sizeof(char *));
+ if (!nl)
+ return -ENOMEM;
+
+ rfd_names = nl;
+ n_fds = rn_socket_fds;
+
+ LIST_FOREACH(fd_store, fs, s->fd_store) {
+ rfds[n_fds] = fs->fd;
+ rfd_names[n_fds] = strdup(strempty(fs->fdname));
+ if (!rfd_names[n_fds])
+ return -ENOMEM;
+
+ rn_storage_fds++;
+ n_fds++;
+ }
+
+ rfd_names[n_fds] = NULL;
+ }
+
+ *fds = TAKE_PTR(rfds);
+ *fd_names = TAKE_PTR(rfd_names);
+ *n_socket_fds = rn_socket_fds;
+ *n_storage_fds = rn_storage_fds;
+
+ return 0;
+}
+
+static int service_allocate_exec_fd_event_source(
+ Service *s,
+ int fd,
+ sd_event_source **ret_event_source) {
+
+ _cleanup_(sd_event_source_unrefp) sd_event_source *source = NULL;
+ int r;
+
+ assert(s);
+ assert(fd >= 0);
+ assert(ret_event_source);
+
+ r = sd_event_add_io(UNIT(s)->manager->event, &source, fd, 0, service_dispatch_exec_io, s);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to allocate exec_fd event source: %m");
+
+ /* This is a bit lower priority than SIGCHLD, as that carries a lot more interesting failure information */
+
+ r = sd_event_source_set_priority(source, SD_EVENT_PRIORITY_NORMAL-3);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to adjust priority of exec_fd event source: %m");
+
+ (void) sd_event_source_set_description(source, "service event_fd");
+
+ r = sd_event_source_set_io_fd_own(source, true);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to pass ownership of fd to event source: %m");
+
+ *ret_event_source = TAKE_PTR(source);
+ return 0;
+}
+
+static int service_allocate_exec_fd(
+ Service *s,
+ sd_event_source **ret_event_source,
+ int* ret_exec_fd) {
+
+ _cleanup_close_pair_ int p[2] = { -1, -1 };
+ int r;
+
+ assert(s);
+ assert(ret_event_source);
+ assert(ret_exec_fd);
+
+ if (pipe2(p, O_CLOEXEC|O_NONBLOCK) < 0)
+ return log_unit_error_errno(UNIT(s), errno, "Failed to allocate exec_fd pipe: %m");
+
+ r = service_allocate_exec_fd_event_source(s, p[0], ret_event_source);
+ if (r < 0)
+ return r;
+
+ p[0] = -1;
+ *ret_exec_fd = TAKE_FD(p[1]);
+
+ return 0;
+}
+
+static bool service_exec_needs_notify_socket(Service *s, ExecFlags flags) {
+ assert(s);
+
+ /* Notifications are accepted depending on the process and
+ * the access setting of the service:
+ * process: \ access: NONE MAIN EXEC ALL
+ * main no yes yes yes
+ * control no no yes yes
+ * other (forked) no no no yes */
+
+ if (flags & EXEC_IS_CONTROL)
+ /* A control process */
+ return IN_SET(s->notify_access, NOTIFY_EXEC, NOTIFY_ALL);
+
+ /* We only spawn main processes and control processes, so any
+ * process that is not a control process is a main process */
+ return s->notify_access != NOTIFY_NONE;
+}
+
+static int service_spawn(
+ Service *s,
+ ExecCommand *c,
+ usec_t timeout,
+ ExecFlags flags,
+ pid_t *_pid) {
+
+ _cleanup_(exec_params_clear) ExecParameters exec_params = {
+ .flags = flags,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .stderr_fd = -1,
+ .exec_fd = -1,
+ };
+ _cleanup_(sd_event_source_unrefp) sd_event_source *exec_fd_source = NULL;
+ _cleanup_strv_free_ char **final_env = NULL, **our_env = NULL;
+ size_t n_env = 0;
+ pid_t pid;
+ int r;
+
+ assert(s);
+ assert(c);
+ assert(_pid);
+
+ r = unit_prepare_exec(UNIT(s)); /* This realizes the cgroup, among other things */
+ if (r < 0)
+ return r;
+
+ if (flags & EXEC_IS_CONTROL) {
+ /* If this is a control process, mask the permissions/chroot application if this is requested. */
+ if (s->permissions_start_only)
+ exec_params.flags &= ~EXEC_APPLY_SANDBOXING;
+ if (s->root_directory_start_only)
+ exec_params.flags &= ~EXEC_APPLY_CHROOT;
+ }
+
+ if ((flags & EXEC_PASS_FDS) ||
+ s->exec_context.std_input == EXEC_INPUT_SOCKET ||
+ s->exec_context.std_output == EXEC_OUTPUT_SOCKET ||
+ s->exec_context.std_error == EXEC_OUTPUT_SOCKET) {
+
+ r = service_collect_fds(s,
+ &exec_params.fds,
+ &exec_params.fd_names,
+ &exec_params.n_socket_fds,
+ &exec_params.n_storage_fds);
+ if (r < 0)
+ return r;
+
+ log_unit_debug(UNIT(s), "Passing %zu fds to service", exec_params.n_socket_fds + exec_params.n_storage_fds);
+ }
+
+ if (!FLAGS_SET(flags, EXEC_IS_CONTROL) && s->type == SERVICE_EXEC) {
+ assert(!s->exec_fd_event_source);
+
+ r = service_allocate_exec_fd(s, &exec_fd_source, &exec_params.exec_fd);
+ if (r < 0)
+ return r;
+ }
+
+ r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), timeout));
+ if (r < 0)
+ return r;
+
+ our_env = new0(char*, 10);
+ if (!our_env)
+ return -ENOMEM;
+
+ if (service_exec_needs_notify_socket(s, flags))
+ if (asprintf(our_env + n_env++, "NOTIFY_SOCKET=%s", UNIT(s)->manager->notify_socket) < 0)
+ return -ENOMEM;
+
+ if (s->main_pid > 0)
+ if (asprintf(our_env + n_env++, "MAINPID="PID_FMT, s->main_pid) < 0)
+ return -ENOMEM;
+
+ if (MANAGER_IS_USER(UNIT(s)->manager))
+ if (asprintf(our_env + n_env++, "MANAGERPID="PID_FMT, getpid_cached()) < 0)
+ return -ENOMEM;
+
+ if (s->pid_file)
+ if (asprintf(our_env + n_env++, "PIDFILE=%s", s->pid_file) < 0)
+ return -ENOMEM;
+
+ if (s->socket_fd >= 0) {
+ union sockaddr_union sa;
+ socklen_t salen = sizeof(sa);
+
+ /* If this is a per-connection service instance, let's set $REMOTE_ADDR and $REMOTE_PORT to something
+ * useful. Note that we do this only when we are still connected at this point in time, which we might
+ * very well not be. Hence we ignore all errors when retrieving peer information (as that might result
+ * in ENOTCONN), and just use whate we can use. */
+
+ if (getpeername(s->socket_fd, &sa.sa, &salen) >= 0 &&
+ IN_SET(sa.sa.sa_family, AF_INET, AF_INET6, AF_VSOCK)) {
+ _cleanup_free_ char *addr = NULL;
+ char *t;
+ unsigned port;
+
+ r = sockaddr_pretty(&sa.sa, salen, true, false, &addr);
+ if (r < 0)
+ return r;
+
+ t = strjoin("REMOTE_ADDR=", addr);
+ if (!t)
+ return -ENOMEM;
+ our_env[n_env++] = t;
+
+ r = sockaddr_port(&sa.sa, &port);
+ if (r < 0)
+ return r;
+
+ if (asprintf(&t, "REMOTE_PORT=%u", port) < 0)
+ return -ENOMEM;
+ our_env[n_env++] = t;
+ }
+ }
+
+ if (flags & EXEC_SETENV_RESULT) {
+ if (asprintf(our_env + n_env++, "SERVICE_RESULT=%s", service_result_to_string(s->result)) < 0)
+ return -ENOMEM;
+
+ if (s->main_exec_status.pid > 0 &&
+ dual_timestamp_is_set(&s->main_exec_status.exit_timestamp)) {
+ if (asprintf(our_env + n_env++, "EXIT_CODE=%s", sigchld_code_to_string(s->main_exec_status.code)) < 0)
+ return -ENOMEM;
+
+ if (s->main_exec_status.code == CLD_EXITED)
+ r = asprintf(our_env + n_env++, "EXIT_STATUS=%i", s->main_exec_status.status);
+ else
+ r = asprintf(our_env + n_env++, "EXIT_STATUS=%s", signal_to_string(s->main_exec_status.status));
+ if (r < 0)
+ return -ENOMEM;
+ }
+ }
+
+ r = unit_set_exec_params(UNIT(s), &exec_params);
+ if (r < 0)
+ return r;
+
+ final_env = strv_env_merge(2, exec_params.environment, our_env, NULL);
+ if (!final_env)
+ return -ENOMEM;
+
+ /* System D-Bus needs nss-systemd disabled, so that we don't deadlock */
+ SET_FLAG(exec_params.flags, EXEC_NSS_BYPASS_BUS,
+ MANAGER_IS_SYSTEM(UNIT(s)->manager) && unit_has_name(UNIT(s), SPECIAL_DBUS_SERVICE));
+
+ strv_free_and_replace(exec_params.environment, final_env);
+ exec_params.watchdog_usec = service_get_watchdog_usec(s);
+ exec_params.selinux_context_net = s->socket_fd_selinux_context_net;
+ if (s->type == SERVICE_IDLE)
+ exec_params.idle_pipe = UNIT(s)->manager->idle_pipe;
+ exec_params.stdin_fd = s->stdin_fd;
+ exec_params.stdout_fd = s->stdout_fd;
+ exec_params.stderr_fd = s->stderr_fd;
+
+ r = exec_spawn(UNIT(s),
+ c,
+ &s->exec_context,
+ &exec_params,
+ s->exec_runtime,
+ &s->dynamic_creds,
+ &pid);
+ if (r < 0)
+ return r;
+
+ s->exec_fd_event_source = TAKE_PTR(exec_fd_source);
+ s->exec_fd_hot = false;
+
+ r = unit_watch_pid(UNIT(s), pid, true);
+ if (r < 0)
+ return r;
+
+ *_pid = pid;
+
+ return 0;
+}
+
+static int main_pid_good(Service *s) {
+ assert(s);
+
+ /* Returns 0 if the pid is dead, > 0 if it is good, < 0 if we don't know */
+
+ /* If we know the pid file, then let's just check if it is
+ * still valid */
+ if (s->main_pid_known) {
+
+ /* If it's an alien child let's check if it is still
+ * alive ... */
+ if (s->main_pid_alien && s->main_pid > 0)
+ return pid_is_alive(s->main_pid);
+
+ /* .. otherwise assume we'll get a SIGCHLD for it,
+ * which we really should wait for to collect exit
+ * status and code */
+ return s->main_pid > 0;
+ }
+
+ /* We don't know the pid */
+ return -EAGAIN;
+}
+
+static int control_pid_good(Service *s) {
+ assert(s);
+
+ /* Returns 0 if the control PID is dead, > 0 if it is good. We never actually return < 0 here, but in order to
+ * make this function as similar as possible to main_pid_good() and cgroup_good(), we pretend that < 0 also
+ * means: we can't figure it out. */
+
+ return s->control_pid > 0;
+}
+
+static int cgroup_good(Service *s) {
+ int r;
+
+ assert(s);
+
+ /* Returns 0 if the cgroup is empty or doesn't exist, > 0 if it is exists and is populated, < 0 if we can't
+ * figure it out */
+
+ if (!UNIT(s)->cgroup_path)
+ return 0;
+
+ r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, UNIT(s)->cgroup_path);
+ if (r < 0)
+ return r;
+
+ return r == 0;
+}
+
+static bool service_shall_restart(Service *s, const char **reason) {
+ assert(s);
+
+ /* Don't restart after manual stops */
+ if (s->forbid_restart) {
+ *reason = "manual stop";
+ return false;
+ }
+
+ /* Never restart if this is configured as special exception */
+ if (exit_status_set_test(&s->restart_prevent_status, s->main_exec_status.code, s->main_exec_status.status)) {
+ *reason = "prevented by exit status";
+ return false;
+ }
+
+ /* Restart if the exit code/status are configured as restart triggers */
+ if (exit_status_set_test(&s->restart_force_status, s->main_exec_status.code, s->main_exec_status.status)) {
+ *reason = "forced by exit status";
+ return true;
+ }
+
+ *reason = "restart setting";
+ switch (s->restart) {
+
+ case SERVICE_RESTART_NO:
+ return false;
+
+ case SERVICE_RESTART_ALWAYS:
+ return true;
+
+ case SERVICE_RESTART_ON_SUCCESS:
+ return s->result == SERVICE_SUCCESS;
+
+ case SERVICE_RESTART_ON_FAILURE:
+ return !IN_SET(s->result, SERVICE_SUCCESS, SERVICE_SKIP_CONDITION);
+
+ case SERVICE_RESTART_ON_ABNORMAL:
+ return !IN_SET(s->result, SERVICE_SUCCESS, SERVICE_FAILURE_EXIT_CODE, SERVICE_SKIP_CONDITION);
+
+ case SERVICE_RESTART_ON_WATCHDOG:
+ return s->result == SERVICE_FAILURE_WATCHDOG;
+
+ case SERVICE_RESTART_ON_ABORT:
+ return IN_SET(s->result, SERVICE_FAILURE_SIGNAL, SERVICE_FAILURE_CORE_DUMP);
+
+ default:
+ assert_not_reached("unknown restart setting");
+ }
+}
+
+static bool service_will_restart(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ if (s->will_auto_restart)
+ return true;
+ if (s->state == SERVICE_AUTO_RESTART)
+ return true;
+
+ return unit_will_restart_default(u);
+}
+
+static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) {
+ ServiceState end_state;
+ int r;
+
+ assert(s);
+
+ /* If there's a stop job queued before we enter the DEAD state, we shouldn't act on Restart=, in order to not
+ * undo what has already been enqueued. */
+ if (unit_stop_pending(UNIT(s)))
+ allow_restart = false;
+
+ if (s->result == SERVICE_SUCCESS)
+ s->result = f;
+
+ if (s->result == SERVICE_SUCCESS) {
+ unit_log_success(UNIT(s));
+ end_state = SERVICE_DEAD;
+ } else if (s->result == SERVICE_SKIP_CONDITION) {
+ unit_log_skip(UNIT(s), service_result_to_string(s->result));
+ end_state = SERVICE_DEAD;
+ } else {
+ unit_log_failure(UNIT(s), service_result_to_string(s->result));
+ end_state = SERVICE_FAILED;
+ }
+ unit_warn_leftover_processes(UNIT(s), unit_log_leftover_process_stop);
+
+ if (!allow_restart)
+ log_unit_debug(UNIT(s), "Service restart not allowed.");
+ else {
+ const char *reason;
+ bool shall_restart;
+
+ shall_restart = service_shall_restart(s, &reason);
+ log_unit_debug(UNIT(s), "Service will %srestart (%s)",
+ shall_restart ? "" : "not ",
+ reason);
+ if (shall_restart)
+ s->will_auto_restart = true;
+ }
+
+ /* Make sure service_release_resources() doesn't destroy our FD store, while we are changing through
+ * SERVICE_FAILED/SERVICE_DEAD before entering into SERVICE_AUTO_RESTART. */
+ s->n_keep_fd_store ++;
+
+ service_set_state(s, end_state);
+
+ if (s->will_auto_restart) {
+ s->will_auto_restart = false;
+
+ r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->restart_usec));
+ if (r < 0) {
+ s->n_keep_fd_store--;
+ goto fail;
+ }
+
+ service_set_state(s, SERVICE_AUTO_RESTART);
+ } else
+ /* If we shan't restart, then flush out the restart counter. But don't do that immediately, so that the
+ * user can still introspect the counter. Do so on the next start. */
+ s->flush_n_restarts = true;
+
+ /* The new state is in effect, let's decrease the fd store ref counter again. Let's also re-add us to the GC
+ * queue, so that the fd store is possibly gc'ed again */
+ s->n_keep_fd_store--;
+ unit_add_to_gc_queue(UNIT(s));
+
+ /* The next restart might not be a manual stop, hence reset the flag indicating manual stops */
+ s->forbid_restart = false;
+
+ /* We want fresh tmpdirs in case service is started again immediately */
+ s->exec_runtime = exec_runtime_unref(s->exec_runtime, true);
+
+ /* Also, remove the runtime directory */
+ unit_destroy_runtime_data(UNIT(s), &s->exec_context);
+
+ /* Get rid of the IPC bits of the user */
+ unit_unref_uid_gid(UNIT(s), true);
+
+ /* Release the user, and destroy it if we are the only remaining owner */
+ dynamic_creds_destroy(&s->dynamic_creds);
+
+ /* Try to delete the pid file. At this point it will be
+ * out-of-date, and some software might be confused by it, so
+ * let's remove it. */
+ if (s->pid_file)
+ (void) unlink(s->pid_file);
+
+ /* Reset TTY ownership if necessary */
+ exec_context_revert_tty(&s->exec_context);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run install restart timer: %m");
+ service_enter_dead(s, SERVICE_FAILURE_RESOURCES, false);
+}
+
+static void service_enter_stop_post(Service *s, ServiceResult f) {
+ int r;
+ assert(s);
+
+ if (s->result == SERVICE_SUCCESS)
+ s->result = f;
+
+ service_unwatch_control_pid(s);
+ (void) unit_enqueue_rewatch_pids(UNIT(s));
+
+ s->control_command = s->exec_command[SERVICE_EXEC_STOP_POST];
+ if (s->control_command) {
+ s->control_command_id = SERVICE_EXEC_STOP_POST;
+
+ r = service_spawn(s,
+ s->control_command,
+ s->timeout_stop_usec,
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_IS_CONTROL|EXEC_SETENV_RESULT|EXEC_CONTROL_CGROUP,
+ &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ service_set_state(s, SERVICE_STOP_POST);
+ } else
+ service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'stop-post' task: %m");
+ service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_FAILURE_RESOURCES);
+}
+
+static int state_to_kill_operation(Service *s, ServiceState state) {
+ switch (state) {
+
+ case SERVICE_STOP_WATCHDOG:
+ case SERVICE_FINAL_WATCHDOG:
+ return KILL_WATCHDOG;
+
+ case SERVICE_STOP_SIGTERM:
+ if (unit_has_job_type(UNIT(s), JOB_RESTART))
+ return KILL_RESTART;
+ _fallthrough_;
+
+ case SERVICE_FINAL_SIGTERM:
+ return KILL_TERMINATE;
+
+ case SERVICE_STOP_SIGKILL:
+ case SERVICE_FINAL_SIGKILL:
+ return KILL_KILL;
+
+ default:
+ return _KILL_OPERATION_INVALID;
+ }
+}
+
+static void service_enter_signal(Service *s, ServiceState state, ServiceResult f) {
+ int kill_operation, r;
+
+ assert(s);
+
+ if (s->result == SERVICE_SUCCESS)
+ s->result = f;
+
+ /* Before sending any signal, make sure we track all members of this cgroup */
+ (void) unit_watch_all_pids(UNIT(s));
+
+ /* Also, enqueue a job that we recheck all our PIDs a bit later, given that it's likely some processes have
+ * died now */
+ (void) unit_enqueue_rewatch_pids(UNIT(s));
+
+ kill_operation = state_to_kill_operation(s, state);
+ r = unit_kill_context(
+ UNIT(s),
+ &s->kill_context,
+ kill_operation,
+ s->main_pid,
+ s->control_pid,
+ s->main_pid_alien);
+ if (r < 0)
+ goto fail;
+
+ if (r > 0) {
+ r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC),
+ kill_operation == KILL_WATCHDOG ? service_timeout_abort_usec(s) : s->timeout_stop_usec));
+ if (r < 0)
+ goto fail;
+
+ service_set_state(s, state);
+ } else if (IN_SET(state, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM) && s->kill_context.send_sigkill)
+ service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_SUCCESS);
+ else if (IN_SET(state, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL))
+ service_enter_stop_post(s, SERVICE_SUCCESS);
+ else if (IN_SET(state, SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM) && s->kill_context.send_sigkill)
+ service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_SUCCESS);
+ else
+ service_enter_dead(s, SERVICE_SUCCESS, true);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to kill processes: %m");
+
+ if (IN_SET(state, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL))
+ service_enter_stop_post(s, SERVICE_FAILURE_RESOURCES);
+ else
+ service_enter_dead(s, SERVICE_FAILURE_RESOURCES, true);
+}
+
+static void service_enter_stop_by_notify(Service *s) {
+ assert(s);
+
+ (void) unit_enqueue_rewatch_pids(UNIT(s));
+
+ service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_stop_usec));
+
+ /* The service told us it's stopping, so it's as if we SIGTERM'd it. */
+ service_set_state(s, SERVICE_STOP_SIGTERM);
+}
+
+static void service_enter_stop(Service *s, ServiceResult f) {
+ int r;
+
+ assert(s);
+
+ if (s->result == SERVICE_SUCCESS)
+ s->result = f;
+
+ service_unwatch_control_pid(s);
+ (void) unit_enqueue_rewatch_pids(UNIT(s));
+
+ s->control_command = s->exec_command[SERVICE_EXEC_STOP];
+ if (s->control_command) {
+ s->control_command_id = SERVICE_EXEC_STOP;
+
+ r = service_spawn(s,
+ s->control_command,
+ s->timeout_stop_usec,
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_SETENV_RESULT|EXEC_CONTROL_CGROUP,
+ &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ service_set_state(s, SERVICE_STOP);
+ } else
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'stop' task: %m");
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_RESOURCES);
+}
+
+static bool service_good(Service *s) {
+ int main_pid_ok;
+ assert(s);
+
+ if (s->type == SERVICE_DBUS && !s->bus_name_good)
+ return false;
+
+ main_pid_ok = main_pid_good(s);
+ if (main_pid_ok > 0) /* It's alive */
+ return true;
+ if (main_pid_ok == 0) /* It's dead */
+ return false;
+
+ /* OK, we don't know anything about the main PID, maybe
+ * because there is none. Let's check the control group
+ * instead. */
+
+ return cgroup_good(s) != 0;
+}
+
+static void service_enter_running(Service *s, ServiceResult f) {
+ assert(s);
+
+ if (s->result == SERVICE_SUCCESS)
+ s->result = f;
+
+ service_unwatch_control_pid(s);
+
+ if (s->result != SERVICE_SUCCESS)
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, f);
+ else if (service_good(s)) {
+
+ /* If there are any queued up sd_notify() notifications, process them now */
+ if (s->notify_state == NOTIFY_RELOADING)
+ service_enter_reload_by_notify(s);
+ else if (s->notify_state == NOTIFY_STOPPING)
+ service_enter_stop_by_notify(s);
+ else {
+ service_set_state(s, SERVICE_RUNNING);
+ service_arm_timer(s, usec_add(UNIT(s)->active_enter_timestamp.monotonic, s->runtime_max_usec));
+ }
+
+ } else if (s->remain_after_exit)
+ service_set_state(s, SERVICE_EXITED);
+ else
+ service_enter_stop(s, SERVICE_SUCCESS);
+}
+
+static void service_enter_start_post(Service *s) {
+ int r;
+ assert(s);
+
+ service_unwatch_control_pid(s);
+ service_reset_watchdog(s);
+
+ s->control_command = s->exec_command[SERVICE_EXEC_START_POST];
+ if (s->control_command) {
+ s->control_command_id = SERVICE_EXEC_START_POST;
+
+ r = service_spawn(s,
+ s->control_command,
+ s->timeout_start_usec,
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP,
+ &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ service_set_state(s, SERVICE_START_POST);
+ } else
+ service_enter_running(s, SERVICE_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'start-post' task: %m");
+ service_enter_stop(s, SERVICE_FAILURE_RESOURCES);
+}
+
+static void service_kill_control_process(Service *s) {
+ int r;
+
+ assert(s);
+
+ if (s->control_pid <= 0)
+ return;
+
+ r = kill_and_sigcont(s->control_pid, SIGKILL);
+ if (r < 0) {
+ _cleanup_free_ char *comm = NULL;
+
+ (void) get_process_comm(s->control_pid, &comm);
+
+ log_unit_debug_errno(UNIT(s), r, "Failed to kill control process " PID_FMT " (%s), ignoring: %m",
+ s->control_pid, strna(comm));
+ }
+}
+
+static int service_adverse_to_leftover_processes(Service *s) {
+ assert(s);
+
+ /* KillMode=mixed and control group are used to indicate that all process should be killed off.
+ * SendSIGKILL= is used for services that require a clean shutdown. These are typically database
+ * service where a SigKilled process would result in a lengthy recovery and who's shutdown or startup
+ * time is quite variable (so Timeout settings aren't of use).
+ *
+ * Here we take these two factors and refuse to start a service if there are existing processes
+ * within a control group. Databases, while generally having some protection against multiple
+ * instances running, lets not stress the rigor of these. Also ExecStartPre= parts of the service
+ * aren't as rigoriously written to protect aganst against multiple use. */
+
+ if (unit_warn_leftover_processes(UNIT(s), unit_log_leftover_process_start) > 0 &&
+ IN_SET(s->kill_context.kill_mode, KILL_MIXED, KILL_CONTROL_GROUP) &&
+ !s->kill_context.send_sigkill)
+ return log_unit_error_errno(UNIT(s), SYNTHETIC_ERRNO(EBUSY),
+ "Will not start SendSIGKILL=no service of type KillMode=control-group or mixed while processes exist");
+
+ return 0;
+}
+
+static void service_enter_start(Service *s) {
+ ExecCommand *c;
+ usec_t timeout;
+ pid_t pid;
+ int r;
+
+ assert(s);
+
+ service_unwatch_control_pid(s);
+ service_unwatch_main_pid(s);
+
+ r = service_adverse_to_leftover_processes(s);
+ if (r < 0)
+ goto fail;
+
+ if (s->type == SERVICE_FORKING) {
+ s->control_command_id = SERVICE_EXEC_START;
+ c = s->control_command = s->exec_command[SERVICE_EXEC_START];
+
+ s->main_command = NULL;
+ } else {
+ s->control_command_id = _SERVICE_EXEC_COMMAND_INVALID;
+ s->control_command = NULL;
+
+ c = s->main_command = s->exec_command[SERVICE_EXEC_START];
+ }
+
+ if (!c) {
+ if (s->type != SERVICE_ONESHOT) {
+ /* There's no command line configured for the main command? Hmm, that is strange.
+ * This can only happen if the configuration changes at runtime. In this case,
+ * let's enter a failure state. */
+ log_unit_error(UNIT(s), "There's no 'start' task anymore we could start.");
+ r = -ENXIO;
+ goto fail;
+ }
+
+ /* We force a fake state transition here. Otherwise, the unit would go directly from
+ * SERVICE_DEAD to SERVICE_DEAD without SERVICE_ACTIVATING or SERVICE_ACTIVE
+ * in between. This way we can later trigger actions that depend on the state
+ * transition, including SuccessAction=. */
+ service_set_state(s, SERVICE_START);
+
+ service_enter_start_post(s);
+ return;
+ }
+
+ if (IN_SET(s->type, SERVICE_SIMPLE, SERVICE_IDLE))
+ /* For simple + idle this is the main process. We don't apply any timeout here, but
+ * service_enter_running() will later apply the .runtime_max_usec timeout. */
+ timeout = USEC_INFINITY;
+ else
+ timeout = s->timeout_start_usec;
+
+ r = service_spawn(s,
+ c,
+ timeout,
+ EXEC_PASS_FDS|EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_SET_WATCHDOG|EXEC_WRITE_CREDENTIALS,
+ &pid);
+ if (r < 0)
+ goto fail;
+
+ if (IN_SET(s->type, SERVICE_SIMPLE, SERVICE_IDLE)) {
+ /* For simple services we immediately start
+ * the START_POST binaries. */
+
+ service_set_main_pid(s, pid);
+ service_enter_start_post(s);
+
+ } else if (s->type == SERVICE_FORKING) {
+
+ /* For forking services we wait until the start
+ * process exited. */
+
+ s->control_pid = pid;
+ service_set_state(s, SERVICE_START);
+
+ } else if (IN_SET(s->type, SERVICE_ONESHOT, SERVICE_DBUS, SERVICE_NOTIFY, SERVICE_EXEC)) {
+
+ /* For oneshot services we wait until the start process exited, too, but it is our main process. */
+
+ /* For D-Bus services we know the main pid right away, but wait for the bus name to appear on the
+ * bus. 'notify' and 'exec' services are similar. */
+
+ service_set_main_pid(s, pid);
+ service_set_state(s, SERVICE_START);
+ } else
+ assert_not_reached("Unknown service type");
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'start' task: %m");
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_RESOURCES);
+}
+
+static void service_enter_start_pre(Service *s) {
+ int r;
+
+ assert(s);
+
+ service_unwatch_control_pid(s);
+
+ s->control_command = s->exec_command[SERVICE_EXEC_START_PRE];
+ if (s->control_command) {
+
+ r = service_adverse_to_leftover_processes(s);
+ if (r < 0)
+ goto fail;
+
+ s->control_command_id = SERVICE_EXEC_START_PRE;
+
+ r = service_spawn(s,
+ s->control_command,
+ s->timeout_start_usec,
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_APPLY_TTY_STDIN,
+ &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ service_set_state(s, SERVICE_START_PRE);
+ } else
+ service_enter_start(s);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'start-pre' task: %m");
+ service_enter_dead(s, SERVICE_FAILURE_RESOURCES, true);
+}
+
+static void service_enter_condition(Service *s) {
+ int r;
+
+ assert(s);
+
+ service_unwatch_control_pid(s);
+
+ s->control_command = s->exec_command[SERVICE_EXEC_CONDITION];
+ if (s->control_command) {
+
+ r = service_adverse_to_leftover_processes(s);
+ if (r < 0)
+ goto fail;
+
+ s->control_command_id = SERVICE_EXEC_CONDITION;
+
+ r = service_spawn(s,
+ s->control_command,
+ s->timeout_start_usec,
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_APPLY_TTY_STDIN,
+ &s->control_pid);
+
+ if (r < 0)
+ goto fail;
+
+ service_set_state(s, SERVICE_CONDITION);
+ } else
+ service_enter_start_pre(s);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'exec-condition' task: %m");
+ service_enter_dead(s, SERVICE_FAILURE_RESOURCES, true);
+}
+
+static void service_enter_restart(Service *s) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(s);
+
+ if (unit_has_job_type(UNIT(s), JOB_STOP)) {
+ /* Don't restart things if we are going down anyway */
+ log_unit_info(UNIT(s), "Stop job pending for unit, delaying automatic restart.");
+
+ r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->restart_usec));
+ if (r < 0)
+ goto fail;
+
+ return;
+ }
+
+ /* Any units that are bound to this service must also be
+ * restarted. We use JOB_RESTART (instead of the more obvious
+ * JOB_START) here so that those dependency jobs will be added
+ * as well. */
+ r = manager_add_job(UNIT(s)->manager, JOB_RESTART, UNIT(s), JOB_REPLACE, NULL, &error, NULL);
+ if (r < 0)
+ goto fail;
+
+ /* Count the jobs we enqueue for restarting. This counter is maintained as long as the unit isn't fully
+ * stopped, i.e. as long as it remains up or remains in auto-start states. The use can reset the counter
+ * explicitly however via the usual "systemctl reset-failure" logic. */
+ s->n_restarts ++;
+ s->flush_n_restarts = false;
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR,
+ LOG_UNIT_ID(UNIT(s)),
+ LOG_UNIT_INVOCATION_ID(UNIT(s)),
+ LOG_UNIT_MESSAGE(UNIT(s), "Scheduled restart job, restart counter is at %u.", s->n_restarts),
+ "N_RESTARTS=%u", s->n_restarts);
+
+ /* Notify clients about changed restart counter */
+ unit_add_to_dbus_queue(UNIT(s));
+
+ /* Note that we stay in the SERVICE_AUTO_RESTART state here,
+ * it will be canceled as part of the service_stop() call that
+ * is executed as part of JOB_RESTART. */
+
+ return;
+
+fail:
+ log_unit_warning(UNIT(s), "Failed to schedule restart job: %s", bus_error_message(&error, r));
+ service_enter_dead(s, SERVICE_FAILURE_RESOURCES, false);
+}
+
+static void service_enter_reload_by_notify(Service *s) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(s);
+
+ service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_start_usec));
+ service_set_state(s, SERVICE_RELOAD);
+
+ /* service_enter_reload_by_notify is never called during a reload, thus no loops are possible. */
+ r = manager_propagate_reload(UNIT(s)->manager, UNIT(s), JOB_FAIL, &error);
+ if (r < 0)
+ log_unit_warning(UNIT(s), "Failed to schedule propagation of reload: %s", bus_error_message(&error, r));
+}
+
+static void service_enter_reload(Service *s) {
+ int r;
+
+ assert(s);
+
+ service_unwatch_control_pid(s);
+ s->reload_result = SERVICE_SUCCESS;
+
+ s->control_command = s->exec_command[SERVICE_EXEC_RELOAD];
+ if (s->control_command) {
+ s->control_command_id = SERVICE_EXEC_RELOAD;
+
+ r = service_spawn(s,
+ s->control_command,
+ s->timeout_start_usec,
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP,
+ &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ service_set_state(s, SERVICE_RELOAD);
+ } else
+ service_enter_running(s, SERVICE_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'reload' task: %m");
+ s->reload_result = SERVICE_FAILURE_RESOURCES;
+ service_enter_running(s, SERVICE_SUCCESS);
+}
+
+static void service_run_next_control(Service *s) {
+ usec_t timeout;
+ int r;
+
+ assert(s);
+ assert(s->control_command);
+ assert(s->control_command->command_next);
+
+ assert(s->control_command_id != SERVICE_EXEC_START);
+
+ s->control_command = s->control_command->command_next;
+ service_unwatch_control_pid(s);
+
+ if (IN_SET(s->state, SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD))
+ timeout = s->timeout_start_usec;
+ else
+ timeout = s->timeout_stop_usec;
+
+ r = service_spawn(s,
+ s->control_command,
+ timeout,
+ EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|
+ (IN_SET(s->control_command_id, SERVICE_EXEC_CONDITION, SERVICE_EXEC_START_PRE, SERVICE_EXEC_STOP_POST) ? EXEC_APPLY_TTY_STDIN : 0)|
+ (IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_SETENV_RESULT : 0)|
+ (IN_SET(s->control_command_id, SERVICE_EXEC_START_POST, SERVICE_EXEC_RELOAD, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_CONTROL_CGROUP : 0),
+ &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run next control task: %m");
+
+ if (IN_SET(s->state, SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START_POST, SERVICE_STOP))
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_RESOURCES);
+ else if (s->state == SERVICE_STOP_POST)
+ service_enter_dead(s, SERVICE_FAILURE_RESOURCES, true);
+ else if (s->state == SERVICE_RELOAD) {
+ s->reload_result = SERVICE_FAILURE_RESOURCES;
+ service_enter_running(s, SERVICE_SUCCESS);
+ } else
+ service_enter_stop(s, SERVICE_FAILURE_RESOURCES);
+}
+
+static void service_run_next_main(Service *s) {
+ pid_t pid;
+ int r;
+
+ assert(s);
+ assert(s->main_command);
+ assert(s->main_command->command_next);
+ assert(s->type == SERVICE_ONESHOT);
+
+ s->main_command = s->main_command->command_next;
+ service_unwatch_main_pid(s);
+
+ r = service_spawn(s,
+ s->main_command,
+ s->timeout_start_usec,
+ EXEC_PASS_FDS|EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_SET_WATCHDOG,
+ &pid);
+ if (r < 0)
+ goto fail;
+
+ service_set_main_pid(s, pid);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run next main task: %m");
+ service_enter_stop(s, SERVICE_FAILURE_RESOURCES);
+}
+
+static int service_start(Unit *u) {
+ Service *s = SERVICE(u);
+ int r;
+
+ assert(s);
+
+ /* We cannot fulfill this request right now, try again later
+ * please! */
+ if (IN_SET(s->state,
+ SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL, SERVICE_CLEANING))
+ return -EAGAIN;
+
+ /* Already on it! */
+ if (IN_SET(s->state, SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST))
+ return 0;
+
+ /* A service that will be restarted must be stopped first to
+ * trigger BindsTo and/or OnFailure dependencies. If a user
+ * does not want to wait for the holdoff time to elapse, the
+ * service should be manually restarted, not started. We
+ * simply return EAGAIN here, so that any start jobs stay
+ * queued, and assume that the auto restart timer will
+ * eventually trigger the restart. */
+ if (s->state == SERVICE_AUTO_RESTART)
+ return -EAGAIN;
+
+ assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED));
+
+ /* Make sure we don't enter a busy loop of some kind. */
+ r = unit_test_start_limit(u);
+ if (r < 0) {
+ service_enter_dead(s, SERVICE_FAILURE_START_LIMIT_HIT, false);
+ return r;
+ }
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ s->result = SERVICE_SUCCESS;
+ s->reload_result = SERVICE_SUCCESS;
+ s->main_pid_known = false;
+ s->main_pid_alien = false;
+ s->forbid_restart = false;
+
+ s->status_text = mfree(s->status_text);
+ s->status_errno = 0;
+
+ s->notify_state = NOTIFY_UNKNOWN;
+
+ s->watchdog_original_usec = s->watchdog_usec;
+ s->watchdog_override_enable = false;
+ s->watchdog_override_usec = USEC_INFINITY;
+
+ exec_command_reset_status_list_array(s->exec_command, _SERVICE_EXEC_COMMAND_MAX);
+ exec_status_reset(&s->main_exec_status);
+
+ /* This is not an automatic restart? Flush the restart counter then */
+ if (s->flush_n_restarts) {
+ s->n_restarts = 0;
+ s->flush_n_restarts = false;
+ }
+
+ u->reset_accounting = true;
+
+ service_enter_condition(s);
+ return 1;
+}
+
+static int service_stop(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ /* Don't create restart jobs from manual stops. */
+ s->forbid_restart = true;
+
+ /* Already on it */
+ if (IN_SET(s->state,
+ SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+ SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))
+ return 0;
+
+ /* A restart will be scheduled or is in progress. */
+ if (s->state == SERVICE_AUTO_RESTART) {
+ service_set_state(s, SERVICE_DEAD);
+ return 0;
+ }
+
+ /* If there's already something running we go directly into
+ * kill mode. */
+ if (IN_SET(s->state, SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RELOAD, SERVICE_STOP_WATCHDOG)) {
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_SUCCESS);
+ return 0;
+ }
+
+ /* If we are currently cleaning, then abort it, brutally. */
+ if (s->state == SERVICE_CLEANING) {
+ service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_SUCCESS);
+ return 0;
+ }
+
+ assert(IN_SET(s->state, SERVICE_RUNNING, SERVICE_EXITED));
+
+ service_enter_stop(s, SERVICE_SUCCESS);
+ return 1;
+}
+
+static int service_reload(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ assert(IN_SET(s->state, SERVICE_RUNNING, SERVICE_EXITED));
+
+ service_enter_reload(s);
+ return 1;
+}
+
+_pure_ static bool service_can_reload(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ return !!s->exec_command[SERVICE_EXEC_RELOAD];
+}
+
+static unsigned service_exec_command_index(Unit *u, ServiceExecCommand id, ExecCommand *current) {
+ Service *s = SERVICE(u);
+ unsigned idx = 0;
+ ExecCommand *first, *c;
+
+ assert(s);
+ assert(id >= 0);
+ assert(id < _SERVICE_EXEC_COMMAND_MAX);
+
+ first = s->exec_command[id];
+
+ /* Figure out where we are in the list by walking back to the beginning */
+ for (c = current; c != first; c = c->command_prev)
+ idx++;
+
+ return idx;
+}
+
+static int service_serialize_exec_command(Unit *u, FILE *f, ExecCommand *command) {
+ _cleanup_free_ char *args = NULL, *p = NULL;
+ size_t allocated = 0, length = 0;
+ Service *s = SERVICE(u);
+ const char *type, *key;
+ ServiceExecCommand id;
+ unsigned idx;
+ char **arg;
+
+ assert(s);
+ assert(f);
+
+ if (!command)
+ return 0;
+
+ if (command == s->control_command) {
+ type = "control";
+ id = s->control_command_id;
+ } else {
+ type = "main";
+ id = SERVICE_EXEC_START;
+ }
+
+ idx = service_exec_command_index(u, id, command);
+
+ STRV_FOREACH(arg, command->argv) {
+ _cleanup_free_ char *e = NULL;
+ size_t n;
+
+ e = cescape(*arg);
+ if (!e)
+ return log_oom();
+
+ n = strlen(e);
+ if (!GREEDY_REALLOC(args, allocated, length + 2 + n + 2))
+ return log_oom();
+
+ if (length > 0)
+ args[length++] = ' ';
+
+ args[length++] = '"';
+ memcpy(args + length, e, n);
+ length += n;
+ args[length++] = '"';
+ }
+
+ if (!GREEDY_REALLOC(args, allocated, length + 1))
+ return log_oom();
+
+ args[length++] = 0;
+
+ p = cescape(command->path);
+ if (!p)
+ return log_oom();
+
+ key = strjoina(type, "-command");
+ (void) serialize_item_format(f, key, "%s %u %s %s", service_exec_command_to_string(id), idx, p, args);
+
+ return 0;
+}
+
+static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Service *s = SERVICE(u);
+ ServiceFDStore *fs;
+ int r;
+
+ assert(u);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", service_state_to_string(s->state));
+ (void) serialize_item(f, "result", service_result_to_string(s->result));
+ (void) serialize_item(f, "reload-result", service_result_to_string(s->reload_result));
+
+ if (s->control_pid > 0)
+ (void) serialize_item_format(f, "control-pid", PID_FMT, s->control_pid);
+
+ if (s->main_pid_known && s->main_pid > 0)
+ (void) serialize_item_format(f, "main-pid", PID_FMT, s->main_pid);
+
+ (void) serialize_bool(f, "main-pid-known", s->main_pid_known);
+ (void) serialize_bool(f, "bus-name-good", s->bus_name_good);
+ (void) serialize_bool(f, "bus-name-owner", s->bus_name_owner);
+
+ (void) serialize_item_format(f, "n-restarts", "%u", s->n_restarts);
+ (void) serialize_bool(f, "flush-n-restarts", s->flush_n_restarts);
+
+ r = serialize_item_escaped(f, "status-text", s->status_text);
+ if (r < 0)
+ return r;
+
+ service_serialize_exec_command(u, f, s->control_command);
+ service_serialize_exec_command(u, f, s->main_command);
+
+ r = serialize_fd(f, fds, "stdin-fd", s->stdin_fd);
+ if (r < 0)
+ return r;
+ r = serialize_fd(f, fds, "stdout-fd", s->stdout_fd);
+ if (r < 0)
+ return r;
+ r = serialize_fd(f, fds, "stderr-fd", s->stderr_fd);
+ if (r < 0)
+ return r;
+
+ if (s->exec_fd_event_source) {
+ r = serialize_fd(f, fds, "exec-fd", sd_event_source_get_io_fd(s->exec_fd_event_source));
+ if (r < 0)
+ return r;
+
+ (void) serialize_bool(f, "exec-fd-hot", s->exec_fd_hot);
+ }
+
+ if (UNIT_ISSET(s->accept_socket)) {
+ r = serialize_item(f, "accept-socket", UNIT_DEREF(s->accept_socket)->id);
+ if (r < 0)
+ return r;
+ }
+
+ r = serialize_fd(f, fds, "socket-fd", s->socket_fd);
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(fd_store, fs, s->fd_store) {
+ _cleanup_free_ char *c = NULL;
+ int copy;
+
+ copy = fdset_put_dup(fds, fs->fd);
+ if (copy < 0)
+ return log_error_errno(copy, "Failed to copy file descriptor for serialization: %m");
+
+ c = cescape(fs->fdname);
+ if (!c)
+ return log_oom();
+
+ (void) serialize_item_format(f, "fd-store-fd", "%i \"%s\" %i", copy, c, fs->do_poll);
+ }
+
+ if (s->main_exec_status.pid > 0) {
+ (void) serialize_item_format(f, "main-exec-status-pid", PID_FMT, s->main_exec_status.pid);
+ (void) serialize_dual_timestamp(f, "main-exec-status-start", &s->main_exec_status.start_timestamp);
+ (void) serialize_dual_timestamp(f, "main-exec-status-exit", &s->main_exec_status.exit_timestamp);
+
+ if (dual_timestamp_is_set(&s->main_exec_status.exit_timestamp)) {
+ (void) serialize_item_format(f, "main-exec-status-code", "%i", s->main_exec_status.code);
+ (void) serialize_item_format(f, "main-exec-status-status", "%i", s->main_exec_status.status);
+ }
+ }
+
+ (void) serialize_dual_timestamp(f, "watchdog-timestamp", &s->watchdog_timestamp);
+ (void) serialize_bool(f, "forbid-restart", s->forbid_restart);
+
+ if (s->watchdog_override_enable)
+ (void) serialize_item_format(f, "watchdog-override-usec", USEC_FMT, s->watchdog_override_usec);
+
+ if (s->watchdog_original_usec != USEC_INFINITY)
+ (void) serialize_item_format(f, "watchdog-original-usec", USEC_FMT, s->watchdog_original_usec);
+
+ return 0;
+}
+
+static int service_deserialize_exec_command(
+ Unit *u,
+ const char *key,
+ const char *value) {
+
+ Service *s = SERVICE(u);
+ int r;
+ unsigned idx = 0, i;
+ bool control, found = false;
+ ServiceExecCommand id = _SERVICE_EXEC_COMMAND_INVALID;
+ ExecCommand *command = NULL;
+ _cleanup_free_ char *path = NULL;
+ _cleanup_strv_free_ char **argv = NULL;
+
+ enum ExecCommandState {
+ STATE_EXEC_COMMAND_TYPE,
+ STATE_EXEC_COMMAND_INDEX,
+ STATE_EXEC_COMMAND_PATH,
+ STATE_EXEC_COMMAND_ARGS,
+ _STATE_EXEC_COMMAND_MAX,
+ _STATE_EXEC_COMMAND_INVALID = -1,
+ } state;
+
+ assert(s);
+ assert(key);
+ assert(value);
+
+ control = streq(key, "control-command");
+
+ state = STATE_EXEC_COMMAND_TYPE;
+
+ for (;;) {
+ _cleanup_free_ char *arg = NULL;
+
+ r = extract_first_word(&value, &arg, NULL, EXTRACT_CUNESCAPE | EXTRACT_UNQUOTE);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ switch (state) {
+ case STATE_EXEC_COMMAND_TYPE:
+ id = service_exec_command_from_string(arg);
+ if (id < 0)
+ return -EINVAL;
+
+ state = STATE_EXEC_COMMAND_INDEX;
+ break;
+ case STATE_EXEC_COMMAND_INDEX:
+ r = safe_atou(arg, &idx);
+ if (r < 0)
+ return -EINVAL;
+
+ state = STATE_EXEC_COMMAND_PATH;
+ break;
+ case STATE_EXEC_COMMAND_PATH:
+ path = TAKE_PTR(arg);
+ state = STATE_EXEC_COMMAND_ARGS;
+
+ if (!path_is_absolute(path))
+ return -EINVAL;
+ break;
+ case STATE_EXEC_COMMAND_ARGS:
+ r = strv_extend(&argv, arg);
+ if (r < 0)
+ return -ENOMEM;
+ break;
+ default:
+ assert_not_reached("Unknown error at deserialization of exec command");
+ break;
+ }
+ }
+
+ if (state != STATE_EXEC_COMMAND_ARGS)
+ return -EINVAL;
+
+ /* Let's check whether exec command on given offset matches data that we just deserialized */
+ for (command = s->exec_command[id], i = 0; command; command = command->command_next, i++) {
+ if (i != idx)
+ continue;
+
+ found = strv_equal(argv, command->argv) && streq(command->path, path);
+ break;
+ }
+
+ if (!found) {
+ /* Command at the index we serialized is different, let's look for command that exactly
+ * matches but is on different index. If there is no such command we will not resume execution. */
+ for (command = s->exec_command[id]; command; command = command->command_next)
+ if (strv_equal(command->argv, argv) && streq(command->path, path))
+ break;
+ }
+
+ if (command && control) {
+ s->control_command = command;
+ s->control_command_id = id;
+ } else if (command)
+ s->main_command = command;
+ else
+ log_unit_warning(u, "Current command vanished from the unit file, execution of the command list won't be resumed.");
+
+ return 0;
+}
+
+static int service_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Service *s = SERVICE(u);
+ int r;
+
+ assert(u);
+ assert(key);
+ assert(value);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ ServiceState state;
+
+ state = service_state_from_string(value);
+ if (state < 0)
+ log_unit_debug(u, "Failed to parse state value: %s", value);
+ else
+ s->deserialized_state = state;
+ } else if (streq(key, "result")) {
+ ServiceResult f;
+
+ f = service_result_from_string(value);
+ if (f < 0)
+ log_unit_debug(u, "Failed to parse result value: %s", value);
+ else if (f != SERVICE_SUCCESS)
+ s->result = f;
+
+ } else if (streq(key, "reload-result")) {
+ ServiceResult f;
+
+ f = service_result_from_string(value);
+ if (f < 0)
+ log_unit_debug(u, "Failed to parse reload result value: %s", value);
+ else if (f != SERVICE_SUCCESS)
+ s->reload_result = f;
+
+ } else if (streq(key, "control-pid")) {
+ pid_t pid;
+
+ if (parse_pid(value, &pid) < 0)
+ log_unit_debug(u, "Failed to parse control-pid value: %s", value);
+ else
+ s->control_pid = pid;
+ } else if (streq(key, "main-pid")) {
+ pid_t pid;
+
+ if (parse_pid(value, &pid) < 0)
+ log_unit_debug(u, "Failed to parse main-pid value: %s", value);
+ else
+ (void) service_set_main_pid(s, pid);
+ } else if (streq(key, "main-pid-known")) {
+ int b;
+
+ b = parse_boolean(value);
+ if (b < 0)
+ log_unit_debug(u, "Failed to parse main-pid-known value: %s", value);
+ else
+ s->main_pid_known = b;
+ } else if (streq(key, "bus-name-good")) {
+ int b;
+
+ b = parse_boolean(value);
+ if (b < 0)
+ log_unit_debug(u, "Failed to parse bus-name-good value: %s", value);
+ else
+ s->bus_name_good = b;
+ } else if (streq(key, "bus-name-owner")) {
+ r = free_and_strdup(&s->bus_name_owner, value);
+ if (r < 0)
+ log_unit_error_errno(u, r, "Unable to deserialize current bus owner %s: %m", value);
+ } else if (streq(key, "status-text")) {
+ char *t;
+
+ r = cunescape(value, 0, &t);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to unescape status text '%s': %m", value);
+ else
+ free_and_replace(s->status_text, t);
+
+ } else if (streq(key, "accept-socket")) {
+ Unit *socket;
+
+ r = manager_load_unit(u->manager, value, NULL, NULL, &socket);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to load accept-socket unit '%s': %m", value);
+ else {
+ unit_ref_set(&s->accept_socket, u, socket);
+ SOCKET(socket)->n_connections++;
+ }
+
+ } else if (streq(key, "socket-fd")) {
+ int fd;
+
+ if (safe_atoi(value, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse socket-fd value: %s", value);
+ else {
+ asynchronous_close(s->socket_fd);
+ s->socket_fd = fdset_remove(fds, fd);
+ }
+ } else if (streq(key, "fd-store-fd")) {
+ _cleanup_free_ char *fdv = NULL, *fdn = NULL, *fdp = NULL;
+ int fd;
+ int do_poll;
+
+ r = extract_first_word(&value, &fdv, NULL, 0);
+ if (r <= 0 || safe_atoi(fdv, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd)) {
+ log_unit_debug(u, "Failed to parse fd-store-fd value: %s", value);
+ return 0;
+ }
+
+ r = extract_first_word(&value, &fdn, NULL, EXTRACT_CUNESCAPE | EXTRACT_UNQUOTE);
+ if (r <= 0) {
+ log_unit_debug_errno(u, r, "Failed to parse fd-store-fd value \"%s\": %m", value);
+ return 0;
+ }
+
+ r = extract_first_word(&value, &fdp, NULL, 0);
+ if (r == 0) {
+ /* If the value is not present, we assume the default */
+ do_poll = 1;
+ } else if (r < 0 || safe_atoi(fdp, &do_poll) < 0) {
+ log_unit_debug_errno(u, r, "Failed to parse fd-store-fd value \"%s\": %m", value);
+ return 0;
+ }
+
+ r = service_add_fd_store(s, fd, fdn, do_poll);
+ if (r < 0)
+ log_unit_error_errno(u, r, "Failed to add fd to store: %m");
+ else
+ fdset_remove(fds, fd);
+ } else if (streq(key, "main-exec-status-pid")) {
+ pid_t pid;
+
+ if (parse_pid(value, &pid) < 0)
+ log_unit_debug(u, "Failed to parse main-exec-status-pid value: %s", value);
+ else
+ s->main_exec_status.pid = pid;
+ } else if (streq(key, "main-exec-status-code")) {
+ int i;
+
+ if (safe_atoi(value, &i) < 0)
+ log_unit_debug(u, "Failed to parse main-exec-status-code value: %s", value);
+ else
+ s->main_exec_status.code = i;
+ } else if (streq(key, "main-exec-status-status")) {
+ int i;
+
+ if (safe_atoi(value, &i) < 0)
+ log_unit_debug(u, "Failed to parse main-exec-status-status value: %s", value);
+ else
+ s->main_exec_status.status = i;
+ } else if (streq(key, "main-exec-status-start"))
+ deserialize_dual_timestamp(value, &s->main_exec_status.start_timestamp);
+ else if (streq(key, "main-exec-status-exit"))
+ deserialize_dual_timestamp(value, &s->main_exec_status.exit_timestamp);
+ else if (streq(key, "watchdog-timestamp"))
+ deserialize_dual_timestamp(value, &s->watchdog_timestamp);
+ else if (streq(key, "forbid-restart")) {
+ int b;
+
+ b = parse_boolean(value);
+ if (b < 0)
+ log_unit_debug(u, "Failed to parse forbid-restart value: %s", value);
+ else
+ s->forbid_restart = b;
+ } else if (streq(key, "stdin-fd")) {
+ int fd;
+
+ if (safe_atoi(value, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse stdin-fd value: %s", value);
+ else {
+ asynchronous_close(s->stdin_fd);
+ s->stdin_fd = fdset_remove(fds, fd);
+ s->exec_context.stdio_as_fds = true;
+ }
+ } else if (streq(key, "stdout-fd")) {
+ int fd;
+
+ if (safe_atoi(value, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse stdout-fd value: %s", value);
+ else {
+ asynchronous_close(s->stdout_fd);
+ s->stdout_fd = fdset_remove(fds, fd);
+ s->exec_context.stdio_as_fds = true;
+ }
+ } else if (streq(key, "stderr-fd")) {
+ int fd;
+
+ if (safe_atoi(value, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse stderr-fd value: %s", value);
+ else {
+ asynchronous_close(s->stderr_fd);
+ s->stderr_fd = fdset_remove(fds, fd);
+ s->exec_context.stdio_as_fds = true;
+ }
+ } else if (streq(key, "exec-fd")) {
+ int fd;
+
+ if (safe_atoi(value, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse exec-fd value: %s", value);
+ else {
+ s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source);
+
+ fd = fdset_remove(fds, fd);
+ if (service_allocate_exec_fd_event_source(s, fd, &s->exec_fd_event_source) < 0)
+ safe_close(fd);
+ }
+ } else if (streq(key, "watchdog-override-usec")) {
+ if (deserialize_usec(value, &s->watchdog_override_usec) < 0)
+ log_unit_debug(u, "Failed to parse watchdog_override_usec value: %s", value);
+ else
+ s->watchdog_override_enable = true;
+
+ } else if (streq(key, "watchdog-original-usec")) {
+ if (deserialize_usec(value, &s->watchdog_original_usec) < 0)
+ log_unit_debug(u, "Failed to parse watchdog_original_usec value: %s", value);
+
+ } else if (STR_IN_SET(key, "main-command", "control-command")) {
+ r = service_deserialize_exec_command(u, key, value);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to parse serialized command \"%s\": %m", value);
+
+ } else if (streq(key, "n-restarts")) {
+ r = safe_atou(value, &s->n_restarts);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to parse serialized restart counter '%s': %m", value);
+
+ } else if (streq(key, "flush-n-restarts")) {
+ r = parse_boolean(value);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to parse serialized flush restart counter setting '%s': %m", value);
+ else
+ s->flush_n_restarts = r;
+ } else
+ log_unit_debug(u, "Unknown serialization key: %s", key);
+
+ return 0;
+}
+
+_pure_ static UnitActiveState service_active_state(Unit *u) {
+ const UnitActiveState *table;
+
+ assert(u);
+
+ table = SERVICE(u)->type == SERVICE_IDLE ? state_translation_table_idle : state_translation_table;
+
+ return table[SERVICE(u)->state];
+}
+
+static const char *service_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return service_state_to_string(SERVICE(u)->state);
+}
+
+static bool service_may_gc(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ /* Never clean up services that still have a process around, even if the service is formally dead. Note that
+ * unit_may_gc() already checked our cgroup for us, we just check our two additional PIDs, too, in case they
+ * have moved outside of the cgroup. */
+
+ if (main_pid_good(s) > 0 ||
+ control_pid_good(s) > 0)
+ return false;
+
+ return true;
+}
+
+static int service_retry_pid_file(Service *s) {
+ int r;
+
+ assert(s->pid_file);
+ assert(IN_SET(s->state, SERVICE_START, SERVICE_START_POST));
+
+ r = service_load_pid_file(s, false);
+ if (r < 0)
+ return r;
+
+ service_unwatch_pid_file(s);
+
+ service_enter_running(s, SERVICE_SUCCESS);
+ return 0;
+}
+
+static int service_watch_pid_file(Service *s) {
+ int r;
+
+ log_unit_debug(UNIT(s), "Setting watch for PID file %s", s->pid_file_pathspec->path);
+
+ r = path_spec_watch(s->pid_file_pathspec, service_dispatch_inotify_io);
+ if (r < 0)
+ goto fail;
+
+ /* the pidfile might have appeared just before we set the watch */
+ log_unit_debug(UNIT(s), "Trying to read PID file %s in case it changed", s->pid_file_pathspec->path);
+ service_retry_pid_file(s);
+
+ return 0;
+fail:
+ log_unit_error_errno(UNIT(s), r, "Failed to set a watch for PID file %s: %m", s->pid_file_pathspec->path);
+ service_unwatch_pid_file(s);
+ return r;
+}
+
+static int service_demand_pid_file(Service *s) {
+ PathSpec *ps;
+
+ assert(s->pid_file);
+ assert(!s->pid_file_pathspec);
+
+ ps = new0(PathSpec, 1);
+ if (!ps)
+ return -ENOMEM;
+
+ ps->unit = UNIT(s);
+ ps->path = strdup(s->pid_file);
+ if (!ps->path) {
+ free(ps);
+ return -ENOMEM;
+ }
+
+ path_simplify(ps->path, false);
+
+ /* PATH_CHANGED would not be enough. There are daemons (sendmail) that
+ * keep their PID file open all the time. */
+ ps->type = PATH_MODIFIED;
+ ps->inotify_fd = -1;
+
+ s->pid_file_pathspec = ps;
+
+ return service_watch_pid_file(s);
+}
+
+static int service_dispatch_inotify_io(sd_event_source *source, int fd, uint32_t events, void *userdata) {
+ PathSpec *p = userdata;
+ Service *s;
+
+ assert(p);
+
+ s = SERVICE(p->unit);
+
+ assert(s);
+ assert(fd >= 0);
+ assert(IN_SET(s->state, SERVICE_START, SERVICE_START_POST));
+ assert(s->pid_file_pathspec);
+ assert(path_spec_owns_inotify_fd(s->pid_file_pathspec, fd));
+
+ log_unit_debug(UNIT(s), "inotify event");
+
+ if (path_spec_fd_event(p, events) < 0)
+ goto fail;
+
+ if (service_retry_pid_file(s) == 0)
+ return 0;
+
+ if (service_watch_pid_file(s) < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ service_unwatch_pid_file(s);
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_RESOURCES);
+ return 0;
+}
+
+static int service_dispatch_exec_io(sd_event_source *source, int fd, uint32_t events, void *userdata) {
+ Service *s = SERVICE(userdata);
+
+ assert(s);
+
+ log_unit_debug(UNIT(s), "got exec-fd event");
+
+ /* If Type=exec is set, we'll consider a service started successfully the instant we invoked execve()
+ * successfully for it. We implement this through a pipe() towards the child, which the kernel automatically
+ * closes for us due to O_CLOEXEC on execve() in the child, which then triggers EOF on the pipe in the
+ * parent. We need to be careful however, as there are other reasons that we might cause the child's side of
+ * the pipe to be closed (for example, a simple exit()). To deal with that we'll ignore EOFs on the pipe unless
+ * the child signalled us first that it is about to call the execve(). It does so by sending us a simple
+ * non-zero byte via the pipe. We also provide the child with a way to inform us in case execve() failed: if it
+ * sends a zero byte we'll ignore POLLHUP on the fd again. */
+
+ for (;;) {
+ uint8_t x;
+ ssize_t n;
+
+ n = read(fd, &x, sizeof(x));
+ if (n < 0) {
+ if (errno == EAGAIN) /* O_NONBLOCK in effect → everything queued has now been processed. */
+ return 0;
+
+ return log_unit_error_errno(UNIT(s), errno, "Failed to read from exec_fd: %m");
+ }
+ if (n == 0) { /* EOF → the event we are waiting for */
+
+ s->exec_fd_event_source = sd_event_source_unref(s->exec_fd_event_source);
+
+ if (s->exec_fd_hot) { /* Did the child tell us to expect EOF now? */
+ log_unit_debug(UNIT(s), "Got EOF on exec-fd");
+
+ s->exec_fd_hot = false;
+
+ /* Nice! This is what we have been waiting for. Transition to next state. */
+ if (s->type == SERVICE_EXEC && s->state == SERVICE_START)
+ service_enter_start_post(s);
+ } else
+ log_unit_debug(UNIT(s), "Got EOF on exec-fd while it was disabled, ignoring.");
+
+ return 0;
+ }
+
+ /* A byte was read → this turns on/off the exec fd logic */
+ assert(n == sizeof(x));
+ s->exec_fd_hot = x;
+ }
+
+ return 0;
+}
+
+static void service_notify_cgroup_empty_event(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(u);
+
+ log_unit_debug(u, "Control group is empty.");
+
+ switch (s->state) {
+
+ /* Waiting for SIGCHLD is usually more interesting,
+ * because it includes return codes/signals. Which is
+ * why we ignore the cgroup events for most cases,
+ * except when we don't know pid which to expect the
+ * SIGCHLD for. */
+
+ case SERVICE_START:
+ if (s->type == SERVICE_NOTIFY &&
+ main_pid_good(s) == 0 &&
+ control_pid_good(s) == 0) {
+ /* No chance of getting a ready notification anymore */
+ service_enter_stop_post(s, SERVICE_FAILURE_PROTOCOL);
+ break;
+ }
+
+ _fallthrough_;
+ case SERVICE_START_POST:
+ if (s->pid_file_pathspec &&
+ main_pid_good(s) == 0 &&
+ control_pid_good(s) == 0) {
+
+ /* Give up hoping for the daemon to write its PID file */
+ log_unit_warning(u, "Daemon never wrote its PID file. Failing.");
+
+ service_unwatch_pid_file(s);
+ if (s->state == SERVICE_START)
+ service_enter_stop_post(s, SERVICE_FAILURE_PROTOCOL);
+ else
+ service_enter_stop(s, SERVICE_FAILURE_PROTOCOL);
+ }
+ break;
+
+ case SERVICE_RUNNING:
+ /* service_enter_running() will figure out what to do */
+ service_enter_running(s, SERVICE_SUCCESS);
+ break;
+
+ case SERVICE_STOP_WATCHDOG:
+ case SERVICE_STOP_SIGTERM:
+ case SERVICE_STOP_SIGKILL:
+
+ if (main_pid_good(s) <= 0 && control_pid_good(s) <= 0)
+ service_enter_stop_post(s, SERVICE_SUCCESS);
+
+ break;
+
+ case SERVICE_STOP_POST:
+ case SERVICE_FINAL_WATCHDOG:
+ case SERVICE_FINAL_SIGTERM:
+ case SERVICE_FINAL_SIGKILL:
+ if (main_pid_good(s) <= 0 && control_pid_good(s) <= 0)
+ service_enter_dead(s, SERVICE_SUCCESS, true);
+
+ break;
+
+ /* If the cgroup empty notification comes when the unit is not active, we must have failed to clean
+ * up the cgroup earlier and should do it now. */
+ case SERVICE_DEAD:
+ case SERVICE_FAILED:
+ unit_prune_cgroup(u);
+ break;
+
+ default:
+ ;
+ }
+}
+
+static void service_notify_cgroup_oom_event(Unit *u) {
+ Service *s = SERVICE(u);
+
+ log_unit_debug(u, "Process of control group was killed by the OOM killer.");
+
+ if (s->oom_policy == OOM_CONTINUE)
+ return;
+
+ switch (s->state) {
+
+ case SERVICE_CONDITION:
+ case SERVICE_START_PRE:
+ case SERVICE_START:
+ case SERVICE_START_POST:
+ case SERVICE_STOP:
+ if (s->oom_policy == OOM_STOP)
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_OOM_KILL);
+ else if (s->oom_policy == OOM_KILL)
+ service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_OOM_KILL);
+
+ break;
+
+ case SERVICE_EXITED:
+ case SERVICE_RUNNING:
+ if (s->oom_policy == OOM_STOP)
+ service_enter_stop(s, SERVICE_FAILURE_OOM_KILL);
+ else if (s->oom_policy == OOM_KILL)
+ service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_OOM_KILL);
+
+ break;
+
+ case SERVICE_STOP_WATCHDOG:
+ case SERVICE_STOP_SIGTERM:
+ service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_OOM_KILL);
+ break;
+
+ case SERVICE_STOP_SIGKILL:
+ case SERVICE_FINAL_SIGKILL:
+ if (s->result == SERVICE_SUCCESS)
+ s->result = SERVICE_FAILURE_OOM_KILL;
+ break;
+
+ case SERVICE_STOP_POST:
+ case SERVICE_FINAL_SIGTERM:
+ service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_FAILURE_OOM_KILL);
+ break;
+
+ default:
+ ;
+ }
+}
+
+static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
+ bool notify_dbus = true;
+ Service *s = SERVICE(u);
+ ServiceResult f;
+ ExitClean clean_mode;
+
+ assert(s);
+ assert(pid >= 0);
+
+ /* Oneshot services and non-SERVICE_EXEC_START commands should not be
+ * considered daemons as they are typically not long running. */
+ if (s->type == SERVICE_ONESHOT || (s->control_pid == pid && s->control_command_id != SERVICE_EXEC_START))
+ clean_mode = EXIT_CLEAN_COMMAND;
+ else
+ clean_mode = EXIT_CLEAN_DAEMON;
+
+ if (is_clean_exit(code, status, clean_mode, &s->success_status))
+ f = SERVICE_SUCCESS;
+ else if (code == CLD_EXITED)
+ f = SERVICE_FAILURE_EXIT_CODE;
+ else if (code == CLD_KILLED)
+ f = SERVICE_FAILURE_SIGNAL;
+ else if (code == CLD_DUMPED)
+ f = SERVICE_FAILURE_CORE_DUMP;
+ else
+ assert_not_reached("Unknown code");
+
+ if (s->main_pid == pid) {
+ /* Forking services may occasionally move to a new PID.
+ * As long as they update the PID file before exiting the old
+ * PID, they're fine. */
+ if (service_load_pid_file(s, false) > 0)
+ return;
+
+ s->main_pid = 0;
+ exec_status_exit(&s->main_exec_status, &s->exec_context, pid, code, status);
+
+ if (s->main_command) {
+ /* If this is not a forking service than the
+ * main process got started and hence we copy
+ * the exit status so that it is recorded both
+ * as main and as control process exit
+ * status */
+
+ s->main_command->exec_status = s->main_exec_status;
+
+ if (s->main_command->flags & EXEC_COMMAND_IGNORE_FAILURE)
+ f = SERVICE_SUCCESS;
+ } else if (s->exec_command[SERVICE_EXEC_START]) {
+
+ /* If this is a forked process, then we should
+ * ignore the return value if this was
+ * configured for the starter process */
+
+ if (s->exec_command[SERVICE_EXEC_START]->flags & EXEC_COMMAND_IGNORE_FAILURE)
+ f = SERVICE_SUCCESS;
+ }
+
+ unit_log_process_exit(
+ u,
+ "Main process",
+ service_exec_command_to_string(SERVICE_EXEC_START),
+ f == SERVICE_SUCCESS,
+ code, status);
+
+ if (s->result == SERVICE_SUCCESS)
+ s->result = f;
+
+ if (s->main_command &&
+ s->main_command->command_next &&
+ s->type == SERVICE_ONESHOT &&
+ f == SERVICE_SUCCESS) {
+
+ /* There is another command to *
+ * execute, so let's do that. */
+
+ log_unit_debug(u, "Running next main command for state %s.", service_state_to_string(s->state));
+ service_run_next_main(s);
+
+ } else {
+
+ /* The service exited, so the service is officially
+ * gone. */
+ s->main_command = NULL;
+
+ switch (s->state) {
+
+ case SERVICE_START_POST:
+ case SERVICE_RELOAD:
+ case SERVICE_STOP:
+ /* Need to wait until the operation is
+ * done */
+ break;
+
+ case SERVICE_START:
+ if (s->type == SERVICE_ONESHOT) {
+ /* This was our main goal, so let's go on */
+ if (f == SERVICE_SUCCESS)
+ service_enter_start_post(s);
+ else
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, f);
+ break;
+ } else if (s->type == SERVICE_NOTIFY) {
+ /* Only enter running through a notification, so that the
+ * SERVICE_START state signifies that no ready notification
+ * has been received */
+ if (f != SERVICE_SUCCESS)
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, f);
+ else if (!s->remain_after_exit || s->notify_access == NOTIFY_MAIN)
+ /* The service has never been and will never be active */
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_PROTOCOL);
+ break;
+ }
+
+ _fallthrough_;
+ case SERVICE_RUNNING:
+ service_enter_running(s, f);
+ break;
+
+ case SERVICE_STOP_WATCHDOG:
+ case SERVICE_STOP_SIGTERM:
+ case SERVICE_STOP_SIGKILL:
+
+ if (control_pid_good(s) <= 0)
+ service_enter_stop_post(s, f);
+
+ /* If there is still a control process, wait for that first */
+ break;
+
+ case SERVICE_STOP_POST:
+
+ if (control_pid_good(s) <= 0)
+ service_enter_signal(s, SERVICE_FINAL_SIGTERM, f);
+
+ break;
+
+ case SERVICE_FINAL_WATCHDOG:
+ case SERVICE_FINAL_SIGTERM:
+ case SERVICE_FINAL_SIGKILL:
+
+ if (control_pid_good(s) <= 0)
+ service_enter_dead(s, f, true);
+ break;
+
+ default:
+ assert_not_reached("Uh, main process died at wrong time.");
+ }
+ }
+
+ } else if (s->control_pid == pid) {
+ s->control_pid = 0;
+
+ /* ExecCondition= calls that exit with (0, 254] should invoke skip-like behavior instead of failing */
+ if (f == SERVICE_FAILURE_EXIT_CODE && s->state == SERVICE_CONDITION && status < 255)
+ f = SERVICE_SKIP_CONDITION;
+
+ if (s->control_command) {
+ exec_status_exit(&s->control_command->exec_status, &s->exec_context, pid, code, status);
+
+ if (s->control_command->flags & EXEC_COMMAND_IGNORE_FAILURE)
+ f = SERVICE_SUCCESS;
+ }
+
+ unit_log_process_exit(
+ u,
+ "Control process",
+ service_exec_command_to_string(s->control_command_id),
+ f == SERVICE_SUCCESS,
+ code, status);
+
+ if (s->state != SERVICE_RELOAD && s->result == SERVICE_SUCCESS)
+ s->result = f;
+
+ if (s->control_command &&
+ s->control_command->command_next &&
+ f == SERVICE_SUCCESS) {
+
+ /* There is another command to *
+ * execute, so let's do that. */
+
+ log_unit_debug(u, "Running next control command for state %s.", service_state_to_string(s->state));
+ service_run_next_control(s);
+
+ } else {
+ /* No further commands for this step, so let's
+ * figure out what to do next */
+
+ s->control_command = NULL;
+ s->control_command_id = _SERVICE_EXEC_COMMAND_INVALID;
+
+ log_unit_debug(u, "Got final SIGCHLD for state %s.", service_state_to_string(s->state));
+
+ switch (s->state) {
+
+ case SERVICE_CONDITION:
+ if (f == SERVICE_SUCCESS)
+ service_enter_start_pre(s);
+ else
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, f);
+ break;
+
+ case SERVICE_START_PRE:
+ if (f == SERVICE_SUCCESS)
+ service_enter_start(s);
+ else
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, f);
+ break;
+
+ case SERVICE_START:
+ if (s->type != SERVICE_FORKING)
+ /* Maybe spurious event due to a reload that changed the type? */
+ break;
+
+ if (f != SERVICE_SUCCESS) {
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, f);
+ break;
+ }
+
+ if (s->pid_file) {
+ bool has_start_post;
+ int r;
+
+ /* Let's try to load the pid file here if we can.
+ * The PID file might actually be created by a START_POST
+ * script. In that case don't worry if the loading fails. */
+
+ has_start_post = s->exec_command[SERVICE_EXEC_START_POST];
+ r = service_load_pid_file(s, !has_start_post);
+ if (!has_start_post && r < 0) {
+ r = service_demand_pid_file(s);
+ if (r < 0 || cgroup_good(s) == 0)
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_PROTOCOL);
+ break;
+ }
+ } else
+ service_search_main_pid(s);
+
+ service_enter_start_post(s);
+ break;
+
+ case SERVICE_START_POST:
+ if (f != SERVICE_SUCCESS) {
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, f);
+ break;
+ }
+
+ if (s->pid_file) {
+ int r;
+
+ r = service_load_pid_file(s, true);
+ if (r < 0) {
+ r = service_demand_pid_file(s);
+ if (r < 0 || cgroup_good(s) == 0)
+ service_enter_stop(s, SERVICE_FAILURE_PROTOCOL);
+ break;
+ }
+ } else
+ service_search_main_pid(s);
+
+ service_enter_running(s, SERVICE_SUCCESS);
+ break;
+
+ case SERVICE_RELOAD:
+ if (f == SERVICE_SUCCESS)
+ if (service_load_pid_file(s, true) < 0)
+ service_search_main_pid(s);
+
+ s->reload_result = f;
+ service_enter_running(s, SERVICE_SUCCESS);
+ break;
+
+ case SERVICE_STOP:
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, f);
+ break;
+
+ case SERVICE_STOP_WATCHDOG:
+ case SERVICE_STOP_SIGTERM:
+ case SERVICE_STOP_SIGKILL:
+ if (main_pid_good(s) <= 0)
+ service_enter_stop_post(s, f);
+
+ /* If there is still a service process around, wait until
+ * that one quit, too */
+ break;
+
+ case SERVICE_STOP_POST:
+ if (main_pid_good(s) <= 0)
+ service_enter_signal(s, SERVICE_FINAL_SIGTERM, f);
+ break;
+
+ case SERVICE_FINAL_WATCHDOG:
+ case SERVICE_FINAL_SIGTERM:
+ case SERVICE_FINAL_SIGKILL:
+ if (main_pid_good(s) <= 0)
+ service_enter_dead(s, f, true);
+ break;
+
+ case SERVICE_CLEANING:
+
+ if (s->clean_result == SERVICE_SUCCESS)
+ s->clean_result = f;
+
+ service_enter_dead(s, SERVICE_SUCCESS, false);
+ break;
+
+ default:
+ assert_not_reached("Uh, control process died at wrong time.");
+ }
+ }
+ } else /* Neither control nor main PID? If so, don't notify about anything */
+ notify_dbus = false;
+
+ /* Notify clients about changed exit status */
+ if (notify_dbus)
+ unit_add_to_dbus_queue(u);
+
+ /* We watch the main/control process otherwise we can't retrieve the unit they
+ * belong to with cgroupv1. But if they are not our direct child, we won't get a
+ * SIGCHLD for them. Therefore we need to look for others to watch so we can
+ * detect when the cgroup becomes empty. Note that the control process is always
+ * our child so it's pointless to watch all other processes. */
+ if (!control_pid_good(s))
+ if (!s->main_pid_known || s->main_pid_alien)
+ (void) unit_enqueue_rewatch_pids(u);
+}
+
+static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
+ Service *s = SERVICE(userdata);
+
+ assert(s);
+ assert(source == s->timer_event_source);
+
+ switch (s->state) {
+
+ case SERVICE_CONDITION:
+ case SERVICE_START_PRE:
+ case SERVICE_START:
+ case SERVICE_START_POST:
+ switch (s->timeout_start_failure_mode) {
+
+ case SERVICE_TIMEOUT_TERMINATE:
+ log_unit_warning(UNIT(s), "%s operation timed out. Terminating.", service_state_to_string(s->state));
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_TIMEOUT_ABORT:
+ log_unit_warning(UNIT(s), "%s operation timed out. Aborting.", service_state_to_string(s->state));
+ service_enter_signal(s, SERVICE_STOP_WATCHDOG, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_TIMEOUT_KILL:
+ if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "%s operation timed out. Killing.", service_state_to_string(s->state));
+ service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "%s operation timed out. Skipping SIGKILL.", service_state_to_string(s->state));
+ service_enter_stop_post(s, SERVICE_FAILURE_TIMEOUT);
+ }
+ break;
+
+ default:
+ assert_not_reached("unknown timeout mode");
+ }
+ break;
+
+ case SERVICE_RUNNING:
+ log_unit_warning(UNIT(s), "Service reached runtime time limit. Stopping.");
+ service_enter_stop(s, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_RELOAD:
+ log_unit_warning(UNIT(s), "Reload operation timed out. Killing reload process.");
+ service_kill_control_process(s);
+ s->reload_result = SERVICE_FAILURE_TIMEOUT;
+ service_enter_running(s, SERVICE_SUCCESS);
+ break;
+
+ case SERVICE_STOP:
+ switch (s->timeout_stop_failure_mode) {
+
+ case SERVICE_TIMEOUT_TERMINATE:
+ log_unit_warning(UNIT(s), "Stopping timed out. Terminating.");
+ service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_TIMEOUT_ABORT:
+ log_unit_warning(UNIT(s), "Stopping timed out. Aborting.");
+ service_enter_signal(s, SERVICE_STOP_WATCHDOG, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_TIMEOUT_KILL:
+ if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "Stopping timed out. Killing.");
+ service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "Stopping timed out. Skipping SIGKILL.");
+ service_enter_stop_post(s, SERVICE_FAILURE_TIMEOUT);
+ }
+ break;
+
+ default:
+ assert_not_reached("unknown timeout mode");
+ }
+ break;
+
+ case SERVICE_STOP_WATCHDOG:
+ if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "State 'stop-watchdog' timed out. Killing.");
+ service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "State 'stop-watchdog' timed out. Skipping SIGKILL.");
+ service_enter_stop_post(s, SERVICE_FAILURE_TIMEOUT);
+ }
+ break;
+
+ case SERVICE_STOP_SIGTERM:
+ if (s->timeout_stop_failure_mode == SERVICE_TIMEOUT_ABORT) {
+ log_unit_warning(UNIT(s), "State 'stop-sigterm' timed out. Aborting.");
+ service_enter_signal(s, SERVICE_STOP_WATCHDOG, SERVICE_FAILURE_TIMEOUT);
+ } else if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "State 'stop-sigterm' timed out. Killing.");
+ service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "State 'stop-sigterm' timed out. Skipping SIGKILL.");
+ service_enter_stop_post(s, SERVICE_FAILURE_TIMEOUT);
+ }
+
+ break;
+
+ case SERVICE_STOP_SIGKILL:
+ /* Uh, we sent a SIGKILL and it is still not gone?
+ * Must be something we cannot kill, so let's just be
+ * weirded out and continue */
+
+ log_unit_warning(UNIT(s), "Processes still around after SIGKILL. Ignoring.");
+ service_enter_stop_post(s, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_STOP_POST:
+ switch (s->timeout_stop_failure_mode) {
+
+ case SERVICE_TIMEOUT_TERMINATE:
+ log_unit_warning(UNIT(s), "State 'stop-post' timed out. Terminating.");
+ service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_TIMEOUT_ABORT:
+ log_unit_warning(UNIT(s), "State 'stop-post' timed out. Aborting.");
+ service_enter_signal(s, SERVICE_FINAL_WATCHDOG, SERVICE_FAILURE_TIMEOUT);
+ break;
+
+ case SERVICE_TIMEOUT_KILL:
+ if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "State 'stop-post' timed out. Killing.");
+ service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "State 'stop-post' timed out. Skipping SIGKILL. Entering failed mode.");
+ service_enter_dead(s, SERVICE_FAILURE_TIMEOUT, false);
+ }
+ break;
+
+ default:
+ assert_not_reached("unknown timeout mode");
+ }
+ break;
+
+ case SERVICE_FINAL_WATCHDOG:
+ if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "State 'final-watchdog' timed out. Killing.");
+ service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "State 'final-watchdog' timed out. Skipping SIGKILL. Entering failed mode.");
+ service_enter_dead(s, SERVICE_FAILURE_TIMEOUT, false);
+ }
+ break;
+
+ case SERVICE_FINAL_SIGTERM:
+ if (s->timeout_stop_failure_mode == SERVICE_TIMEOUT_ABORT) {
+ log_unit_warning(UNIT(s), "State 'final-sigterm' timed out. Aborting.");
+ service_enter_signal(s, SERVICE_FINAL_WATCHDOG, SERVICE_FAILURE_TIMEOUT);
+ } else if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "State 'final-sigterm' timed out. Killing.");
+ service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "State 'final-sigterm' timed out. Skipping SIGKILL. Entering failed mode.");
+ service_enter_dead(s, SERVICE_FAILURE_TIMEOUT, false);
+ }
+
+ break;
+
+ case SERVICE_FINAL_SIGKILL:
+ log_unit_warning(UNIT(s), "Processes still around after final SIGKILL. Entering failed mode.");
+ service_enter_dead(s, SERVICE_FAILURE_TIMEOUT, true);
+ break;
+
+ case SERVICE_AUTO_RESTART:
+ if (s->restart_usec > 0) {
+ char buf_restart[FORMAT_TIMESPAN_MAX];
+ log_unit_debug(UNIT(s),
+ "Service RestartSec=%s expired, scheduling restart.",
+ format_timespan(buf_restart, sizeof buf_restart, s->restart_usec, USEC_PER_SEC));
+ } else
+ log_unit_debug(UNIT(s),
+ "Service has no hold-off time (RestartSec=0), scheduling restart.");
+
+ service_enter_restart(s);
+ break;
+
+ case SERVICE_CLEANING:
+ log_unit_warning(UNIT(s), "Cleaning timed out. killing.");
+
+ if (s->clean_result == SERVICE_SUCCESS)
+ s->clean_result = SERVICE_FAILURE_TIMEOUT;
+
+ service_enter_signal(s, SERVICE_FINAL_SIGKILL, 0);
+ break;
+
+ default:
+ assert_not_reached("Timeout at wrong time.");
+ }
+
+ return 0;
+}
+
+static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void *userdata) {
+ Service *s = SERVICE(userdata);
+ char t[FORMAT_TIMESPAN_MAX];
+ usec_t watchdog_usec;
+
+ assert(s);
+ assert(source == s->watchdog_event_source);
+
+ watchdog_usec = service_get_watchdog_usec(s);
+
+ if (UNIT(s)->manager->service_watchdogs) {
+ log_unit_error(UNIT(s), "Watchdog timeout (limit %s)!",
+ format_timespan(t, sizeof(t), watchdog_usec, 1));
+
+ service_enter_signal(s, SERVICE_STOP_WATCHDOG, SERVICE_FAILURE_WATCHDOG);
+ } else
+ log_unit_warning(UNIT(s), "Watchdog disabled! Ignoring watchdog timeout (limit %s)!",
+ format_timespan(t, sizeof(t), watchdog_usec, 1));
+
+ return 0;
+}
+
+static bool service_notify_message_authorized(Service *s, pid_t pid, FDSet *fds) {
+ assert(s);
+
+ if (s->notify_access == NOTIFY_NONE) {
+ log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception is disabled.", pid);
+ return false;
+ }
+
+ if (s->notify_access == NOTIFY_MAIN && pid != s->main_pid) {
+ if (s->main_pid != 0)
+ log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID "PID_FMT, pid, s->main_pid);
+ else
+ log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID which is currently not known", pid);
+
+ return false;
+ }
+
+ if (s->notify_access == NOTIFY_EXEC && pid != s->main_pid && pid != s->control_pid) {
+ if (s->main_pid != 0 && s->control_pid != 0)
+ log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID "PID_FMT" and control PID "PID_FMT,
+ pid, s->main_pid, s->control_pid);
+ else if (s->main_pid != 0)
+ log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID "PID_FMT, pid, s->main_pid);
+ else if (s->control_pid != 0)
+ log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for control PID "PID_FMT, pid, s->control_pid);
+ else
+ log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID and control PID which are currently not known", pid);
+
+ return false;
+ }
+
+ return true;
+}
+
+static void service_force_watchdog(Service *s) {
+ if (!UNIT(s)->manager->service_watchdogs)
+ return;
+
+ log_unit_error(UNIT(s), "Watchdog request (last status: %s)!",
+ s->status_text ? s->status_text : "<unset>");
+
+ service_enter_signal(s, SERVICE_STOP_WATCHDOG, SERVICE_FAILURE_WATCHDOG);
+}
+
+static void service_notify_message(
+ Unit *u,
+ const struct ucred *ucred,
+ char * const *tags,
+ FDSet *fds) {
+
+ Service *s = SERVICE(u);
+ bool notify_dbus = false;
+ const char *e;
+ char * const *i;
+ int r;
+
+ assert(u);
+ assert(ucred);
+
+ if (!service_notify_message_authorized(SERVICE(u), ucred->pid, fds))
+ return;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *cc = NULL;
+
+ cc = strv_join(tags, ", ");
+ log_unit_debug(u, "Got notification message from PID "PID_FMT" (%s)", ucred->pid, isempty(cc) ? "n/a" : cc);
+ }
+
+ /* Interpret MAINPID= */
+ e = strv_find_startswith(tags, "MAINPID=");
+ if (e && IN_SET(s->state, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD)) {
+ pid_t new_main_pid;
+
+ if (parse_pid(e, &new_main_pid) < 0)
+ log_unit_warning(u, "Failed to parse MAINPID= field in notification message, ignoring: %s", e);
+ else if (!s->main_pid_known || new_main_pid != s->main_pid) {
+
+ r = service_is_suitable_main_pid(s, new_main_pid, LOG_WARNING);
+ if (r == 0) {
+ /* The new main PID is a bit suspicious, which is OK if the sender is privileged. */
+
+ if (ucred->uid == 0) {
+ log_unit_debug(u, "New main PID "PID_FMT" does not belong to service, but we'll accept it as the request to change it came from a privileged process.", new_main_pid);
+ r = 1;
+ } else
+ log_unit_debug(u, "New main PID "PID_FMT" does not belong to service, refusing.", new_main_pid);
+ }
+ if (r > 0) {
+ service_set_main_pid(s, new_main_pid);
+
+ r = unit_watch_pid(UNIT(s), new_main_pid, false);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "Failed to watch new main PID "PID_FMT" for service: %m", new_main_pid);
+
+ notify_dbus = true;
+ }
+ }
+ }
+
+ /* Interpret READY=/STOPPING=/RELOADING=. Last one wins. */
+ STRV_FOREACH_BACKWARDS(i, tags) {
+
+ if (streq(*i, "READY=1")) {
+ s->notify_state = NOTIFY_READY;
+
+ /* Type=notify services inform us about completed
+ * initialization with READY=1 */
+ if (s->type == SERVICE_NOTIFY && s->state == SERVICE_START)
+ service_enter_start_post(s);
+
+ /* Sending READY=1 while we are reloading informs us
+ * that the reloading is complete */
+ if (s->state == SERVICE_RELOAD && s->control_pid == 0)
+ service_enter_running(s, SERVICE_SUCCESS);
+
+ notify_dbus = true;
+ break;
+
+ } else if (streq(*i, "RELOADING=1")) {
+ s->notify_state = NOTIFY_RELOADING;
+
+ if (s->state == SERVICE_RUNNING)
+ service_enter_reload_by_notify(s);
+
+ notify_dbus = true;
+ break;
+
+ } else if (streq(*i, "STOPPING=1")) {
+ s->notify_state = NOTIFY_STOPPING;
+
+ if (s->state == SERVICE_RUNNING)
+ service_enter_stop_by_notify(s);
+
+ notify_dbus = true;
+ break;
+ }
+ }
+
+ /* Interpret STATUS= */
+ e = strv_find_startswith(tags, "STATUS=");
+ if (e) {
+ _cleanup_free_ char *t = NULL;
+
+ if (!isempty(e)) {
+ /* Note that this size limit check is mostly paranoia: since the datagram size we are willing
+ * to process is already limited to NOTIFY_BUFFER_MAX, this limit here should never be hit. */
+ if (strlen(e) > STATUS_TEXT_MAX)
+ log_unit_warning(u, "Status message overly long (%zu > %u), ignoring.", strlen(e), STATUS_TEXT_MAX);
+ else if (!utf8_is_valid(e))
+ log_unit_warning(u, "Status message in notification message is not UTF-8 clean, ignoring.");
+ else {
+ t = strdup(e);
+ if (!t)
+ log_oom();
+ }
+ }
+
+ if (!streq_ptr(s->status_text, t)) {
+ free_and_replace(s->status_text, t);
+ notify_dbus = true;
+ }
+ }
+
+ /* Interpret ERRNO= */
+ e = strv_find_startswith(tags, "ERRNO=");
+ if (e) {
+ int status_errno;
+
+ status_errno = parse_errno(e);
+ if (status_errno < 0)
+ log_unit_warning_errno(u, status_errno,
+ "Failed to parse ERRNO= field value '%s' in notification message: %m", e);
+ else if (s->status_errno != status_errno) {
+ s->status_errno = status_errno;
+ notify_dbus = true;
+ }
+ }
+
+ /* Interpret EXTEND_TIMEOUT= */
+ e = strv_find_startswith(tags, "EXTEND_TIMEOUT_USEC=");
+ if (e) {
+ usec_t extend_timeout_usec;
+ if (safe_atou64(e, &extend_timeout_usec) < 0)
+ log_unit_warning(u, "Failed to parse EXTEND_TIMEOUT_USEC=%s", e);
+ else
+ service_extend_timeout(s, extend_timeout_usec);
+ }
+
+ /* Interpret WATCHDOG= */
+ e = strv_find_startswith(tags, "WATCHDOG=");
+ if (e) {
+ if (streq(e, "1"))
+ service_reset_watchdog(s);
+ else if (streq(e, "trigger"))
+ service_force_watchdog(s);
+ else
+ log_unit_warning(u, "Passed WATCHDOG= field is invalid, ignoring.");
+ }
+
+ e = strv_find_startswith(tags, "WATCHDOG_USEC=");
+ if (e) {
+ usec_t watchdog_override_usec;
+ if (safe_atou64(e, &watchdog_override_usec) < 0)
+ log_unit_warning(u, "Failed to parse WATCHDOG_USEC=%s", e);
+ else
+ service_override_watchdog_timeout(s, watchdog_override_usec);
+ }
+
+ /* Process FD store messages. Either FDSTOREREMOVE=1 for removal, or FDSTORE=1 for addition. In both cases,
+ * process FDNAME= for picking the file descriptor name to use. Note that FDNAME= is required when removing
+ * fds, but optional when pushing in new fds, for compatibility reasons. */
+ if (strv_find(tags, "FDSTOREREMOVE=1")) {
+ const char *name;
+
+ name = strv_find_startswith(tags, "FDNAME=");
+ if (!name || !fdname_is_valid(name))
+ log_unit_warning(u, "FDSTOREREMOVE=1 requested, but no valid file descriptor name passed, ignoring.");
+ else
+ service_remove_fd_store(s, name);
+
+ } else if (strv_find(tags, "FDSTORE=1")) {
+ const char *name;
+
+ name = strv_find_startswith(tags, "FDNAME=");
+ if (name && !fdname_is_valid(name)) {
+ log_unit_warning(u, "Passed FDNAME= name is invalid, ignoring.");
+ name = NULL;
+ }
+
+ (void) service_add_fd_store_set(s, fds, name, !strv_contains(tags, "FDPOLL=0"));
+ }
+
+ /* Notify clients about changed status or main pid */
+ if (notify_dbus)
+ unit_add_to_dbus_queue(u);
+}
+
+static int service_get_timeout(Unit *u, usec_t *timeout) {
+ Service *s = SERVICE(u);
+ uint64_t t;
+ int r;
+
+ if (!s->timer_event_source)
+ return 0;
+
+ r = sd_event_source_get_time(s->timer_event_source, &t);
+ if (r < 0)
+ return r;
+ if (t == USEC_INFINITY)
+ return 0;
+
+ *timeout = t;
+ return 1;
+}
+
+static void service_bus_name_owner_change(Unit *u, const char *new_owner) {
+
+ Service *s = SERVICE(u);
+ int r;
+
+ assert(s);
+
+ if (new_owner)
+ log_unit_debug(u, "D-Bus name %s now owned by %s", s->bus_name, new_owner);
+ else
+ log_unit_debug(u, "D-Bus name %s now not owned by anyone.", s->bus_name);
+
+ s->bus_name_good = new_owner;
+
+ /* Track the current owner, so we can reconstruct changes after a daemon reload */
+ r = free_and_strdup(&s->bus_name_owner, new_owner);
+ if (r < 0) {
+ log_unit_error_errno(u, r, "Unable to set new bus name owner %s: %m", new_owner);
+ return;
+ }
+
+ if (s->type == SERVICE_DBUS) {
+
+ /* service_enter_running() will figure out what to
+ * do */
+ if (s->state == SERVICE_RUNNING)
+ service_enter_running(s, SERVICE_SUCCESS);
+ else if (s->state == SERVICE_START && new_owner)
+ service_enter_start_post(s);
+
+ } else if (new_owner &&
+ s->main_pid <= 0 &&
+ IN_SET(s->state,
+ SERVICE_START,
+ SERVICE_START_POST,
+ SERVICE_RUNNING,
+ SERVICE_RELOAD)) {
+
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ pid_t pid;
+
+ /* Try to acquire PID from bus service */
+
+ r = sd_bus_get_name_creds(u->manager->api_bus, s->bus_name, SD_BUS_CREDS_PID, &creds);
+ if (r >= 0)
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r >= 0) {
+ log_unit_debug(u, "D-Bus name %s is now owned by process " PID_FMT, s->bus_name, pid);
+
+ service_set_main_pid(s, pid);
+ unit_watch_pid(UNIT(s), pid, false);
+ }
+ }
+}
+
+int service_set_socket_fd(Service *s, int fd, Socket *sock, bool selinux_context_net) {
+ _cleanup_free_ char *peer = NULL;
+ int r;
+
+ assert(s);
+ assert(fd >= 0);
+
+ /* This is called by the socket code when instantiating a new service for a stream socket and the socket needs
+ * to be configured. We take ownership of the passed fd on success. */
+
+ if (UNIT(s)->load_state != UNIT_LOADED)
+ return -EINVAL;
+
+ if (s->socket_fd >= 0)
+ return -EBUSY;
+
+ if (s->state != SERVICE_DEAD)
+ return -EAGAIN;
+
+ if (getpeername_pretty(fd, true, &peer) >= 0) {
+
+ if (UNIT(s)->description) {
+ _cleanup_free_ char *a;
+
+ a = strjoin(UNIT(s)->description, " (", peer, ")");
+ if (!a)
+ return -ENOMEM;
+
+ r = unit_set_description(UNIT(s), a);
+ } else
+ r = unit_set_description(UNIT(s), peer);
+
+ if (r < 0)
+ return r;
+ }
+
+ r = unit_add_two_dependencies(UNIT(sock), UNIT_BEFORE, UNIT_TRIGGERS, UNIT(s), false, UNIT_DEPENDENCY_IMPLICIT);
+ if (r < 0)
+ return r;
+
+ s->socket_fd = fd;
+ s->socket_fd_selinux_context_net = selinux_context_net;
+
+ unit_ref_set(&s->accept_socket, UNIT(s), UNIT(sock));
+ return 0;
+}
+
+static void service_reset_failed(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ if (s->state == SERVICE_FAILED)
+ service_set_state(s, SERVICE_DEAD);
+
+ s->result = SERVICE_SUCCESS;
+ s->reload_result = SERVICE_SUCCESS;
+ s->clean_result = SERVICE_SUCCESS;
+ s->n_restarts = 0;
+ s->flush_n_restarts = false;
+}
+
+static int service_kill(Unit *u, KillWho who, int signo, sd_bus_error *error) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ return unit_kill_common(u, who, signo, s->main_pid, s->control_pid, error);
+}
+
+static int service_main_pid(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ return s->main_pid;
+}
+
+static int service_control_pid(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ return s->control_pid;
+}
+
+static bool service_needs_console(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ /* We provide our own implementation of this here, instead of relying of the generic implementation
+ * unit_needs_console() provides, since we want to return false if we are in SERVICE_EXITED state. */
+
+ if (!exec_context_may_touch_console(&s->exec_context))
+ return false;
+
+ return IN_SET(s->state,
+ SERVICE_CONDITION,
+ SERVICE_START_PRE,
+ SERVICE_START,
+ SERVICE_START_POST,
+ SERVICE_RUNNING,
+ SERVICE_RELOAD,
+ SERVICE_STOP,
+ SERVICE_STOP_WATCHDOG,
+ SERVICE_STOP_SIGTERM,
+ SERVICE_STOP_SIGKILL,
+ SERVICE_STOP_POST,
+ SERVICE_FINAL_WATCHDOG,
+ SERVICE_FINAL_SIGTERM,
+ SERVICE_FINAL_SIGKILL);
+}
+
+static int service_exit_status(Unit *u) {
+ Service *s = SERVICE(u);
+
+ assert(u);
+
+ if (s->main_exec_status.pid <= 0 ||
+ !dual_timestamp_is_set(&s->main_exec_status.exit_timestamp))
+ return -ENODATA;
+
+ if (s->main_exec_status.code != CLD_EXITED)
+ return -EBADE;
+
+ return s->main_exec_status.status;
+}
+
+static int service_clean(Unit *u, ExecCleanMask mask) {
+ _cleanup_strv_free_ char **l = NULL;
+ Service *s = SERVICE(u);
+ int r;
+
+ assert(s);
+ assert(mask != 0);
+
+ if (s->state != SERVICE_DEAD)
+ return -EBUSY;
+
+ r = exec_context_get_clean_directories(&s->exec_context, u->manager->prefix, mask, &l);
+ if (r < 0)
+ return r;
+
+ if (strv_isempty(l))
+ return -EUNATCH;
+
+ service_unwatch_control_pid(s);
+ s->clean_result = SERVICE_SUCCESS;
+ s->control_command = NULL;
+ s->control_command_id = _SERVICE_EXEC_COMMAND_INVALID;
+
+ r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->exec_context.timeout_clean_usec));
+ if (r < 0)
+ goto fail;
+
+ r = unit_fork_and_watch_rm_rf(u, l, &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ service_set_state(s, SERVICE_CLEANING);
+
+ return 0;
+
+fail:
+ log_unit_warning_errno(u, r, "Failed to initiate cleaning: %m");
+ s->clean_result = SERVICE_FAILURE_RESOURCES;
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+ return r;
+}
+
+static int service_can_clean(Unit *u, ExecCleanMask *ret) {
+ Service *s = SERVICE(u);
+
+ assert(s);
+
+ return exec_context_get_clean_mask(&s->exec_context, ret);
+}
+
+static const char *service_finished_job(Unit *u, JobType t, JobResult result) {
+ if (t == JOB_START && result == JOB_DONE) {
+ Service *s = SERVICE(u);
+
+ if (s->type == SERVICE_ONESHOT)
+ return "Finished %s.";
+ }
+
+ /* Fall back to generic */
+ return NULL;
+}
+
+static const char* const service_restart_table[_SERVICE_RESTART_MAX] = {
+ [SERVICE_RESTART_NO] = "no",
+ [SERVICE_RESTART_ON_SUCCESS] = "on-success",
+ [SERVICE_RESTART_ON_FAILURE] = "on-failure",
+ [SERVICE_RESTART_ON_ABNORMAL] = "on-abnormal",
+ [SERVICE_RESTART_ON_WATCHDOG] = "on-watchdog",
+ [SERVICE_RESTART_ON_ABORT] = "on-abort",
+ [SERVICE_RESTART_ALWAYS] = "always",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(service_restart, ServiceRestart);
+
+static const char* const service_type_table[_SERVICE_TYPE_MAX] = {
+ [SERVICE_SIMPLE] = "simple",
+ [SERVICE_FORKING] = "forking",
+ [SERVICE_ONESHOT] = "oneshot",
+ [SERVICE_DBUS] = "dbus",
+ [SERVICE_NOTIFY] = "notify",
+ [SERVICE_IDLE] = "idle",
+ [SERVICE_EXEC] = "exec",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(service_type, ServiceType);
+
+static const char* const service_exec_command_table[_SERVICE_EXEC_COMMAND_MAX] = {
+ [SERVICE_EXEC_CONDITION] = "ExecCondition",
+ [SERVICE_EXEC_START_PRE] = "ExecStartPre",
+ [SERVICE_EXEC_START] = "ExecStart",
+ [SERVICE_EXEC_START_POST] = "ExecStartPost",
+ [SERVICE_EXEC_RELOAD] = "ExecReload",
+ [SERVICE_EXEC_STOP] = "ExecStop",
+ [SERVICE_EXEC_STOP_POST] = "ExecStopPost",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(service_exec_command, ServiceExecCommand);
+
+static const char* const service_exec_ex_command_table[_SERVICE_EXEC_COMMAND_MAX] = {
+ [SERVICE_EXEC_CONDITION] = "ExecConditionEx",
+ [SERVICE_EXEC_START_PRE] = "ExecStartPreEx",
+ [SERVICE_EXEC_START] = "ExecStartEx",
+ [SERVICE_EXEC_START_POST] = "ExecStartPostEx",
+ [SERVICE_EXEC_RELOAD] = "ExecReloadEx",
+ [SERVICE_EXEC_STOP] = "ExecStopEx",
+ [SERVICE_EXEC_STOP_POST] = "ExecStopPostEx",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(service_exec_ex_command, ServiceExecCommand);
+
+static const char* const notify_state_table[_NOTIFY_STATE_MAX] = {
+ [NOTIFY_UNKNOWN] = "unknown",
+ [NOTIFY_READY] = "ready",
+ [NOTIFY_RELOADING] = "reloading",
+ [NOTIFY_STOPPING] = "stopping",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(notify_state, NotifyState);
+
+static const char* const service_result_table[_SERVICE_RESULT_MAX] = {
+ [SERVICE_SUCCESS] = "success",
+ [SERVICE_FAILURE_RESOURCES] = "resources",
+ [SERVICE_FAILURE_PROTOCOL] = "protocol",
+ [SERVICE_FAILURE_TIMEOUT] = "timeout",
+ [SERVICE_FAILURE_EXIT_CODE] = "exit-code",
+ [SERVICE_FAILURE_SIGNAL] = "signal",
+ [SERVICE_FAILURE_CORE_DUMP] = "core-dump",
+ [SERVICE_FAILURE_WATCHDOG] = "watchdog",
+ [SERVICE_FAILURE_START_LIMIT_HIT] = "start-limit-hit",
+ [SERVICE_FAILURE_OOM_KILL] = "oom-kill",
+ [SERVICE_SKIP_CONDITION] = "exec-condition",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(service_result, ServiceResult);
+
+static const char* const service_timeout_failure_mode_table[_SERVICE_TIMEOUT_FAILURE_MODE_MAX] = {
+ [SERVICE_TIMEOUT_TERMINATE] = "terminate",
+ [SERVICE_TIMEOUT_ABORT] = "abort",
+ [SERVICE_TIMEOUT_KILL] = "kill",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(service_timeout_failure_mode, ServiceTimeoutFailureMode);
+
+const UnitVTable service_vtable = {
+ .object_size = sizeof(Service),
+ .exec_context_offset = offsetof(Service, exec_context),
+ .cgroup_context_offset = offsetof(Service, cgroup_context),
+ .kill_context_offset = offsetof(Service, kill_context),
+ .exec_runtime_offset = offsetof(Service, exec_runtime),
+ .dynamic_creds_offset = offsetof(Service, dynamic_creds),
+
+ .sections =
+ "Unit\0"
+ "Service\0"
+ "Install\0",
+ .private_section = "Service",
+
+ .can_transient = true,
+ .can_delegate = true,
+ .can_fail = true,
+ .can_set_managed_oom = true,
+
+ .init = service_init,
+ .done = service_done,
+ .load = service_load,
+ .release_resources = service_release_resources,
+
+ .coldplug = service_coldplug,
+
+ .dump = service_dump,
+
+ .start = service_start,
+ .stop = service_stop,
+ .reload = service_reload,
+
+ .can_reload = service_can_reload,
+
+ .kill = service_kill,
+ .clean = service_clean,
+ .can_clean = service_can_clean,
+
+ .freeze = unit_freeze_vtable_common,
+ .thaw = unit_thaw_vtable_common,
+
+ .serialize = service_serialize,
+ .deserialize_item = service_deserialize_item,
+
+ .active_state = service_active_state,
+ .sub_state_to_string = service_sub_state_to_string,
+
+ .will_restart = service_will_restart,
+
+ .may_gc = service_may_gc,
+
+ .sigchld_event = service_sigchld_event,
+
+ .reset_failed = service_reset_failed,
+
+ .notify_cgroup_empty = service_notify_cgroup_empty_event,
+ .notify_cgroup_oom = service_notify_cgroup_oom_event,
+ .notify_message = service_notify_message,
+
+ .main_pid = service_main_pid,
+ .control_pid = service_control_pid,
+
+ .bus_name_owner_change = service_bus_name_owner_change,
+
+ .bus_set_property = bus_service_set_property,
+ .bus_commit_properties = bus_service_commit_properties,
+
+ .get_timeout = service_get_timeout,
+ .needs_console = service_needs_console,
+ .exit_status = service_exit_status,
+
+ .status_message_formats = {
+ .starting_stopping = {
+ [0] = "Starting %s...",
+ [1] = "Stopping %s...",
+ },
+ .finished_start_job = {
+ [JOB_FAILED] = "Failed to start %s.",
+ [JOB_SKIPPED] = "Skipped %s.",
+ },
+ .finished_stop_job = {
+ [JOB_DONE] = "Stopped %s.",
+ [JOB_FAILED] = "Stopped (with error) %s.",
+ },
+ .finished_job = service_finished_job,
+ },
+};
diff --git a/src/core/service.h b/src/core/service.h
new file mode 100644
index 0000000..11c3d3f
--- /dev/null
+++ b/src/core/service.h
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct Service Service;
+typedef struct ServiceFDStore ServiceFDStore;
+
+#include "exit-status.h"
+#include "kill.h"
+#include "path.h"
+#include "ratelimit.h"
+#include "socket.h"
+#include "unit.h"
+
+typedef enum ServiceRestart {
+ SERVICE_RESTART_NO,
+ SERVICE_RESTART_ON_SUCCESS,
+ SERVICE_RESTART_ON_FAILURE,
+ SERVICE_RESTART_ON_ABNORMAL,
+ SERVICE_RESTART_ON_WATCHDOG,
+ SERVICE_RESTART_ON_ABORT,
+ SERVICE_RESTART_ALWAYS,
+ _SERVICE_RESTART_MAX,
+ _SERVICE_RESTART_INVALID = -1
+} ServiceRestart;
+
+typedef enum ServiceType {
+ SERVICE_SIMPLE, /* we fork and go on right-away (i.e. modern socket activated daemons) */
+ SERVICE_FORKING, /* forks by itself (i.e. traditional daemons) */
+ SERVICE_ONESHOT, /* we fork and wait until the program finishes (i.e. programs like fsck which run and need to finish before we continue) */
+ SERVICE_DBUS, /* we fork and wait until a specific D-Bus name appears on the bus */
+ SERVICE_NOTIFY, /* we fork and wait until a daemon sends us a ready message with sd_notify() */
+ SERVICE_IDLE, /* much like simple, but delay exec() until all jobs are dispatched. */
+ SERVICE_EXEC, /* we fork and wait until we execute exec() (this means our own setup is waited for) */
+ _SERVICE_TYPE_MAX,
+ _SERVICE_TYPE_INVALID = -1
+} ServiceType;
+
+typedef enum ServiceExecCommand {
+ SERVICE_EXEC_CONDITION,
+ SERVICE_EXEC_START_PRE,
+ SERVICE_EXEC_START,
+ SERVICE_EXEC_START_POST,
+ SERVICE_EXEC_RELOAD,
+ SERVICE_EXEC_STOP,
+ SERVICE_EXEC_STOP_POST,
+ _SERVICE_EXEC_COMMAND_MAX,
+ _SERVICE_EXEC_COMMAND_INVALID = -1
+} ServiceExecCommand;
+
+typedef enum NotifyState {
+ NOTIFY_UNKNOWN,
+ NOTIFY_READY,
+ NOTIFY_RELOADING,
+ NOTIFY_STOPPING,
+ _NOTIFY_STATE_MAX,
+ _NOTIFY_STATE_INVALID = -1
+} NotifyState;
+
+/* The values of this enum are referenced in man/systemd.exec.xml and src/shared/bus-unit-util.c.
+ * Update those sources for each change to this enum. */
+typedef enum ServiceResult {
+ SERVICE_SUCCESS,
+ SERVICE_FAILURE_RESOURCES, /* a bit of a misnomer, just our catch-all error for errnos we didn't expect */
+ SERVICE_FAILURE_PROTOCOL,
+ SERVICE_FAILURE_TIMEOUT,
+ SERVICE_FAILURE_EXIT_CODE,
+ SERVICE_FAILURE_SIGNAL,
+ SERVICE_FAILURE_CORE_DUMP,
+ SERVICE_FAILURE_WATCHDOG,
+ SERVICE_FAILURE_START_LIMIT_HIT,
+ SERVICE_FAILURE_OOM_KILL,
+ SERVICE_SKIP_CONDITION,
+ _SERVICE_RESULT_MAX,
+ _SERVICE_RESULT_INVALID = -1
+} ServiceResult;
+
+typedef enum ServiceTimeoutFailureMode {
+ SERVICE_TIMEOUT_TERMINATE,
+ SERVICE_TIMEOUT_ABORT,
+ SERVICE_TIMEOUT_KILL,
+ _SERVICE_TIMEOUT_FAILURE_MODE_MAX,
+ _SERVICE_TIMEOUT_FAILURE_MODE_INVALID = -1
+} ServiceTimeoutFailureMode;
+
+struct ServiceFDStore {
+ Service *service;
+
+ int fd;
+ char *fdname;
+ sd_event_source *event_source;
+ bool do_poll;
+
+ LIST_FIELDS(ServiceFDStore, fd_store);
+};
+
+struct Service {
+ Unit meta;
+
+ ServiceType type;
+ ServiceRestart restart;
+ ExitStatusSet restart_prevent_status;
+ ExitStatusSet restart_force_status;
+ ExitStatusSet success_status;
+
+ /* If set we'll read the main daemon PID from this file */
+ char *pid_file;
+
+ usec_t restart_usec;
+ usec_t timeout_start_usec;
+ usec_t timeout_stop_usec;
+ usec_t timeout_abort_usec;
+ bool timeout_abort_set;
+ usec_t runtime_max_usec;
+ ServiceTimeoutFailureMode timeout_start_failure_mode;
+ ServiceTimeoutFailureMode timeout_stop_failure_mode;
+
+ dual_timestamp watchdog_timestamp;
+ usec_t watchdog_usec; /* the requested watchdog timeout in the unit file */
+ usec_t watchdog_original_usec; /* the watchdog timeout that was in effect when the unit was started, i.e. the timeout the forked off processes currently see */
+ usec_t watchdog_override_usec; /* the watchdog timeout requested by the service itself through sd_notify() */
+ bool watchdog_override_enable;
+ sd_event_source *watchdog_event_source;
+
+ ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX];
+
+ ExecContext exec_context;
+ KillContext kill_context;
+ CGroupContext cgroup_context;
+
+ ServiceState state, deserialized_state;
+
+ /* The exit status of the real main process */
+ ExecStatus main_exec_status;
+
+ /* The currently executed control process */
+ ExecCommand *control_command;
+
+ /* The currently executed main process, which may be NULL if
+ * the main process got started via forking mode and not by
+ * us */
+ ExecCommand *main_command;
+
+ /* The ID of the control command currently being executed */
+ ServiceExecCommand control_command_id;
+
+ /* Runtime data of the execution context */
+ ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
+
+ pid_t main_pid, control_pid;
+ int socket_fd;
+ SocketPeer *peer;
+ bool socket_fd_selinux_context_net;
+
+ bool permissions_start_only;
+ bool root_directory_start_only;
+ bool remain_after_exit;
+ bool guess_main_pid;
+
+ /* If we shut down, remember why */
+ ServiceResult result;
+ ServiceResult reload_result;
+ ServiceResult clean_result;
+
+ bool main_pid_known:1;
+ bool main_pid_alien:1;
+ bool bus_name_good:1;
+ bool forbid_restart:1;
+ /* Keep restart intention between UNIT_FAILED and UNIT_ACTIVATING */
+ bool will_auto_restart:1;
+ bool start_timeout_defined:1;
+ bool exec_fd_hot:1;
+
+ char *bus_name;
+ char *bus_name_owner; /* unique name of the current owner */
+
+ char *status_text;
+ int status_errno;
+
+ UnitRef accept_socket;
+
+ sd_event_source *timer_event_source;
+ PathSpec *pid_file_pathspec;
+
+ NotifyAccess notify_access;
+ NotifyState notify_state;
+
+ sd_event_source *exec_fd_event_source;
+
+ ServiceFDStore *fd_store;
+ size_t n_fd_store;
+ unsigned n_fd_store_max;
+ unsigned n_keep_fd_store;
+
+ char *usb_function_descriptors;
+ char *usb_function_strings;
+
+ int stdin_fd;
+ int stdout_fd;
+ int stderr_fd;
+
+ unsigned n_restarts;
+ bool flush_n_restarts;
+
+ OOMPolicy oom_policy;
+};
+
+static inline usec_t service_timeout_abort_usec(Service *s) {
+ assert(s);
+ return s->timeout_abort_set ? s->timeout_abort_usec : s->timeout_stop_usec;
+}
+
+static inline usec_t service_get_watchdog_usec(Service *s) {
+ assert(s);
+ return s->watchdog_override_enable ? s->watchdog_override_usec : s->watchdog_original_usec;
+}
+
+extern const UnitVTable service_vtable;
+
+int service_set_socket_fd(Service *s, int fd, struct Socket *socket, bool selinux_context_net);
+void service_close_socket_fd(Service *s);
+
+const char* service_restart_to_string(ServiceRestart i) _const_;
+ServiceRestart service_restart_from_string(const char *s) _pure_;
+
+const char* service_type_to_string(ServiceType i) _const_;
+ServiceType service_type_from_string(const char *s) _pure_;
+
+const char* service_exec_command_to_string(ServiceExecCommand i) _const_;
+ServiceExecCommand service_exec_command_from_string(const char *s) _pure_;
+
+const char* service_exec_ex_command_to_string(ServiceExecCommand i) _const_;
+ServiceExecCommand service_exec_ex_command_from_string(const char *s) _pure_;
+
+const char* notify_state_to_string(NotifyState i) _const_;
+NotifyState notify_state_from_string(const char *s) _pure_;
+
+const char* service_result_to_string(ServiceResult i) _const_;
+ServiceResult service_result_from_string(const char *s) _pure_;
+
+const char* service_timeout_failure_mode_to_string(ServiceTimeoutFailureMode i) _const_;
+ServiceTimeoutFailureMode service_timeout_failure_mode_from_string(const char *s) _pure_;
+
+DEFINE_CAST(SERVICE, Service);
+
+#define STATUS_TEXT_MAX (16U*1024U)
diff --git a/src/core/show-status.c b/src/core/show-status.c
new file mode 100644
index 0000000..a74423c
--- /dev/null
+++ b/src/core/show-status.c
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "parse-util.h"
+#include "show-status.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static const char* const show_status_table[_SHOW_STATUS_MAX] = {
+ [SHOW_STATUS_NO] = "no",
+ [SHOW_STATUS_ERROR] = "error",
+ [SHOW_STATUS_AUTO] = "auto",
+ [SHOW_STATUS_TEMPORARY] = "temporary",
+ [SHOW_STATUS_YES] = "yes",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(show_status, ShowStatus, SHOW_STATUS_YES);
+
+int parse_show_status(const char *v, ShowStatus *ret) {
+ ShowStatus s;
+
+ assert(ret);
+
+ s = show_status_from_string(v);
+ if (s < 0 || s == SHOW_STATUS_TEMPORARY)
+ return -EINVAL;
+
+ *ret = s;
+ return 0;
+}
+
+int status_vprintf(const char *status, ShowStatusFlags flags, const char *format, va_list ap) {
+ static const char status_indent[] = " "; /* "[" STATUS "] " */
+ _cleanup_free_ char *s = NULL;
+ _cleanup_close_ int fd = -1;
+ struct iovec iovec[7] = {};
+ int n = 0;
+ static bool prev_ephemeral;
+
+ assert(format);
+
+ /* This is independent of logging, as status messages are
+ * optional and go exclusively to the console. */
+
+ if (vasprintf(&s, format, ap) < 0)
+ return log_oom();
+
+ /* Before you ask: yes, on purpose we open/close the console for each status line we write individually. This
+ * is a good strategy to avoid PID 1 getting killed by the kernel's SAK concept (it doesn't fix this entirely,
+ * but minimizes the time window the kernel might end up killing PID 1 due to SAK). It also makes things easier
+ * for us so that we don't have to recover from hangups and suchlike triggered on the console. */
+
+ fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ if (FLAGS_SET(flags, SHOW_STATUS_ELLIPSIZE)) {
+ char *e;
+ size_t emax, sl;
+ int c;
+
+ c = fd_columns(fd);
+ if (c <= 0)
+ c = 80;
+
+ sl = status ? sizeof(status_indent)-1 : 0;
+
+ emax = c - sl - 1;
+ if (emax < 3)
+ emax = 3;
+
+ e = ellipsize(s, emax, 50);
+ if (e)
+ free_and_replace(s, e);
+ }
+
+ if (prev_ephemeral)
+ iovec[n++] = IOVEC_MAKE_STRING(ANSI_REVERSE_LINEFEED "\r" ANSI_ERASE_TO_END_OF_LINE);
+
+ if (status) {
+ if (!isempty(status)) {
+ iovec[n++] = IOVEC_MAKE_STRING("[");
+ iovec[n++] = IOVEC_MAKE_STRING(status);
+ iovec[n++] = IOVEC_MAKE_STRING("] ");
+ } else
+ iovec[n++] = IOVEC_MAKE_STRING(status_indent);
+ }
+
+ iovec[n++] = IOVEC_MAKE_STRING(s);
+ iovec[n++] = IOVEC_MAKE_STRING("\n");
+
+ if (prev_ephemeral && !FLAGS_SET(flags, SHOW_STATUS_EPHEMERAL))
+ iovec[n++] = IOVEC_MAKE_STRING(ANSI_ERASE_TO_END_OF_LINE);
+ prev_ephemeral = FLAGS_SET(flags, SHOW_STATUS_EPHEMERAL) ;
+
+ if (writev(fd, iovec, n) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int status_printf(const char *status, ShowStatusFlags flags, const char *format, ...) {
+ va_list ap;
+ int r;
+
+ assert(format);
+
+ va_start(ap, format);
+ r = status_vprintf(status, flags, format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+static const char* const status_unit_format_table[_STATUS_UNIT_FORMAT_MAX] = {
+ [STATUS_UNIT_FORMAT_NAME] = "name",
+ [STATUS_UNIT_FORMAT_DESCRIPTION] = "description",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(status_unit_format, StatusUnitFormat);
diff --git a/src/core/show-status.h b/src/core/show-status.h
new file mode 100644
index 0000000..c37ccd9
--- /dev/null
+++ b/src/core/show-status.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+/* Manager status */
+
+typedef enum ShowStatus {
+ SHOW_STATUS_NO, /* printing of status is disabled */
+ SHOW_STATUS_ERROR, /* only print errors */
+ SHOW_STATUS_AUTO, /* disabled but may flip to _TEMPORARY */
+ SHOW_STATUS_TEMPORARY, /* enabled temporarily, may flip back to _AUTO */
+ SHOW_STATUS_YES, /* printing of status is enabled */
+ _SHOW_STATUS_MAX,
+ _SHOW_STATUS_INVALID = -1,
+} ShowStatus;
+
+typedef enum ShowStatusFlags {
+ SHOW_STATUS_ELLIPSIZE = 1 << 0,
+ SHOW_STATUS_EPHEMERAL = 1 << 1,
+} ShowStatusFlags;
+
+typedef enum StatusUnitFormat {
+ STATUS_UNIT_FORMAT_NAME,
+ STATUS_UNIT_FORMAT_DESCRIPTION,
+ _STATUS_UNIT_FORMAT_MAX,
+ _STATUS_UNIT_FORMAT_INVALID = -1,
+} StatusUnitFormat;
+
+static inline bool show_status_on(ShowStatus s) {
+ return IN_SET(s, SHOW_STATUS_TEMPORARY, SHOW_STATUS_YES);
+}
+ShowStatus show_status_from_string(const char *v) _const_;
+const char* show_status_to_string(ShowStatus s) _pure_;
+int parse_show_status(const char *v, ShowStatus *ret);
+
+StatusUnitFormat status_unit_format_from_string(const char *v) _const_;
+const char* status_unit_format_to_string(StatusUnitFormat s) _pure_;
+
+int status_vprintf(const char *status, ShowStatusFlags flags, const char *format, va_list ap) _printf_(3,0);
+int status_printf(const char *status, ShowStatusFlags flags, const char *format, ...) _printf_(3,4);
diff --git a/src/core/slice.c b/src/core/slice.c
new file mode 100644
index 0000000..ee5c259
--- /dev/null
+++ b/src/core/slice.c
@@ -0,0 +1,475 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "dbus-slice.h"
+#include "dbus-unit.h"
+#include "fd-util.h"
+#include "log.h"
+#include "serialize.h"
+#include "slice.h"
+#include "special.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "unit.h"
+
+static const UnitActiveState state_translation_table[_SLICE_STATE_MAX] = {
+ [SLICE_DEAD] = UNIT_INACTIVE,
+ [SLICE_ACTIVE] = UNIT_ACTIVE
+};
+
+static void slice_init(Unit *u) {
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ u->ignore_on_isolate = true;
+}
+
+static void slice_set_state(Slice *t, SliceState state) {
+ SliceState old_state;
+ assert(t);
+
+ if (t->state != state)
+ bus_unit_send_pending_change_signal(UNIT(t), false);
+
+ old_state = t->state;
+ t->state = state;
+
+ if (state != old_state)
+ log_debug("%s changed %s -> %s",
+ UNIT(t)->id,
+ slice_state_to_string(old_state),
+ slice_state_to_string(state));
+
+ unit_notify(UNIT(t), state_translation_table[old_state], state_translation_table[state], 0);
+}
+
+static int slice_add_parent_slice(Slice *s) {
+ Unit *u = UNIT(s), *parent;
+ _cleanup_free_ char *a = NULL;
+ int r;
+
+ assert(s);
+
+ if (UNIT_ISSET(u->slice))
+ return 0;
+
+ r = slice_build_parent_slice(u->id, &a);
+ if (r <= 0) /* 0 means root slice */
+ return r;
+
+ r = manager_load_unit(u->manager, a, NULL, NULL, &parent);
+ if (r < 0)
+ return r;
+
+ unit_ref_set(&u->slice, u, parent);
+ return 0;
+}
+
+static int slice_add_default_dependencies(Slice *s) {
+ int r;
+
+ assert(s);
+
+ if (!UNIT(s)->default_dependencies)
+ return 0;
+
+ /* Make sure slices are unloaded on shutdown */
+ r = unit_add_two_dependencies_by_name(
+ UNIT(s),
+ UNIT_BEFORE, UNIT_CONFLICTS,
+ SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int slice_verify(Slice *s) {
+ _cleanup_free_ char *parent = NULL;
+ int r;
+
+ assert(s);
+ assert(UNIT(s)->load_state == UNIT_LOADED);
+
+ if (!slice_name_is_valid(UNIT(s)->id)) {
+ log_unit_error(UNIT(s), "Slice name %s is not valid. Refusing.", UNIT(s)->id);
+ return -ENOEXEC;
+ }
+
+ r = slice_build_parent_slice(UNIT(s)->id, &parent);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to determine parent slice: %m");
+
+ if (parent ? !unit_has_name(UNIT_DEREF(UNIT(s)->slice), parent) : UNIT_ISSET(UNIT(s)->slice)) {
+ log_unit_error(UNIT(s), "Located outside of parent slice. Refusing.");
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
+static int slice_load_root_slice(Unit *u) {
+ assert(u);
+
+ if (!unit_has_name(u, SPECIAL_ROOT_SLICE))
+ return 0;
+
+ u->perpetual = true;
+
+ /* The root slice is a bit special. For example it is always running and cannot be terminated. Because of its
+ * special semantics we synthesize it here, instead of relying on the unit file on disk. */
+
+ u->default_dependencies = false;
+
+ if (!u->description)
+ u->description = strdup("Root Slice");
+ if (!u->documentation)
+ u->documentation = strv_new("man:systemd.special(7)");
+
+ return 1;
+}
+
+static int slice_load_system_slice(Unit *u) {
+ assert(u);
+
+ if (!MANAGER_IS_SYSTEM(u->manager))
+ return 0;
+ if (!unit_has_name(u, SPECIAL_SYSTEM_SLICE))
+ return 0;
+
+ u->perpetual = true;
+
+ /* The system slice is a bit special. For example it is always running and cannot be terminated. Because of its
+ * special semantics we synthesize it here, instead of relying on the unit file on disk. */
+
+ u->default_dependencies = false;
+
+ if (!u->description)
+ u->description = strdup("System Slice");
+ if (!u->documentation)
+ u->documentation = strv_new("man:systemd.special(7)");
+
+ return 1;
+}
+
+static int slice_load(Unit *u) {
+ Slice *s = SLICE(u);
+ int r;
+
+ assert(s);
+ assert(u->load_state == UNIT_STUB);
+
+ r = slice_load_root_slice(u);
+ if (r < 0)
+ return r;
+ r = slice_load_system_slice(u);
+ if (r < 0)
+ return r;
+
+ r = unit_load_fragment_and_dropin(u, false);
+ if (r < 0)
+ return r;
+
+ if (u->load_state != UNIT_LOADED)
+ return 0;
+
+ /* This is a new unit? Then let's add in some extras */
+ r = unit_patch_contexts(u);
+ if (r < 0)
+ return r;
+
+ r = slice_add_parent_slice(s);
+ if (r < 0)
+ return r;
+
+ r = slice_add_default_dependencies(s);
+ if (r < 0)
+ return r;
+
+ return slice_verify(s);
+}
+
+static int slice_coldplug(Unit *u) {
+ Slice *t = SLICE(u);
+
+ assert(t);
+ assert(t->state == SLICE_DEAD);
+
+ if (t->deserialized_state != t->state)
+ slice_set_state(t, t->deserialized_state);
+
+ return 0;
+}
+
+static void slice_dump(Unit *u, FILE *f, const char *prefix) {
+ Slice *t = SLICE(u);
+
+ assert(t);
+ assert(f);
+
+ fprintf(f,
+ "%sSlice State: %s\n",
+ prefix, slice_state_to_string(t->state));
+
+ cgroup_context_dump(UNIT(t), f, prefix);
+}
+
+static int slice_start(Unit *u) {
+ Slice *t = SLICE(u);
+ int r;
+
+ assert(t);
+ assert(t->state == SLICE_DEAD);
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ (void) unit_realize_cgroup(u);
+ (void) unit_reset_accounting(u);
+
+ slice_set_state(t, SLICE_ACTIVE);
+ return 1;
+}
+
+static int slice_stop(Unit *u) {
+ Slice *t = SLICE(u);
+
+ assert(t);
+ assert(t->state == SLICE_ACTIVE);
+
+ /* We do not need to destroy the cgroup explicitly,
+ * unit_notify() will do that for us anyway. */
+
+ slice_set_state(t, SLICE_DEAD);
+ return 1;
+}
+
+static int slice_kill(Unit *u, KillWho who, int signo, sd_bus_error *error) {
+ return unit_kill_common(u, who, signo, -1, -1, error);
+}
+
+static int slice_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Slice *s = SLICE(u);
+
+ assert(s);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", slice_state_to_string(s->state));
+
+ return 0;
+}
+
+static int slice_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Slice *s = SLICE(u);
+
+ assert(u);
+ assert(key);
+ assert(value);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ SliceState state;
+
+ state = slice_state_from_string(value);
+ if (state < 0)
+ log_debug("Failed to parse state value %s", value);
+ else
+ s->deserialized_state = state;
+
+ } else
+ log_debug("Unknown serialization key '%s'", key);
+
+ return 0;
+}
+
+_pure_ static UnitActiveState slice_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[SLICE(u)->state];
+}
+
+_pure_ static const char *slice_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return slice_state_to_string(SLICE(u)->state);
+}
+
+static int slice_make_perpetual(Manager *m, const char *name, Unit **ret) {
+ Unit *u;
+ int r;
+
+ assert(m);
+ assert(name);
+
+ u = manager_get_unit(m, name);
+ if (!u) {
+ r = unit_new_for_name(m, sizeof(Slice), name, &u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate the special %s unit: %m", name);
+ }
+
+ u->perpetual = true;
+ SLICE(u)->deserialized_state = SLICE_ACTIVE;
+
+ unit_add_to_load_queue(u);
+ unit_add_to_dbus_queue(u);
+
+ if (ret)
+ *ret = u;
+
+ return 0;
+}
+
+static void slice_enumerate_perpetual(Manager *m) {
+ Unit *u;
+ int r;
+
+ assert(m);
+
+ r = slice_make_perpetual(m, SPECIAL_ROOT_SLICE, &u);
+ if (r >= 0 && manager_owns_host_root_cgroup(m)) {
+ Slice *s = SLICE(u);
+
+ /* If we are managing the root cgroup then this means our root slice covers the whole system, which
+ * means the kernel will track CPU/tasks/memory for us anyway, and it is all available in /proc. Let's
+ * hence turn accounting on here, so that our APIs to query this data are available. */
+
+ s->cgroup_context.cpu_accounting = true;
+ s->cgroup_context.tasks_accounting = true;
+ s->cgroup_context.memory_accounting = true;
+ }
+
+ if (MANAGER_IS_SYSTEM(m))
+ (void) slice_make_perpetual(m, SPECIAL_SYSTEM_SLICE, NULL);
+}
+
+static bool slice_freezer_action_supported_by_children(Unit *s) {
+ Unit *member;
+ void *v;
+
+ assert(s);
+
+ HASHMAP_FOREACH_KEY(v, member, s->dependencies[UNIT_BEFORE]) {
+ int r;
+
+ if (UNIT_DEREF(member->slice) != s)
+ continue;
+
+ if (member->type == UNIT_SLICE) {
+ r = slice_freezer_action_supported_by_children(member);
+ if (!r)
+ return r;
+ }
+
+ if (!UNIT_VTABLE(member)->freeze)
+ return false;
+ }
+
+ return true;
+}
+
+static int slice_freezer_action(Unit *s, FreezerAction action) {
+ Unit *member;
+ void *v;
+ int r;
+
+ assert(s);
+ assert(IN_SET(action, FREEZER_FREEZE, FREEZER_THAW));
+
+ if (!slice_freezer_action_supported_by_children(s)) {
+ log_unit_warning(s, "Requested freezer operation is not supported by all children of the slice");
+ return 0;
+ }
+
+ HASHMAP_FOREACH_KEY(v, member, s->dependencies[UNIT_BEFORE]) {
+ if (UNIT_DEREF(member->slice) != s)
+ continue;
+
+ if (action == FREEZER_FREEZE)
+ r = UNIT_VTABLE(member)->freeze(member);
+ else
+ r = UNIT_VTABLE(member)->thaw(member);
+
+ if (r < 0)
+ return r;
+ }
+
+ r = unit_cgroup_freezer_action(s, action);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int slice_freeze(Unit *s) {
+ assert(s);
+
+ return slice_freezer_action(s, FREEZER_FREEZE);
+}
+
+static int slice_thaw(Unit *s) {
+ assert(s);
+
+ return slice_freezer_action(s, FREEZER_THAW);
+}
+
+static bool slice_can_freeze(Unit *s) {
+ assert(s);
+
+ return slice_freezer_action_supported_by_children(s);
+}
+
+const UnitVTable slice_vtable = {
+ .object_size = sizeof(Slice),
+ .cgroup_context_offset = offsetof(Slice, cgroup_context),
+
+ .sections =
+ "Unit\0"
+ "Slice\0"
+ "Install\0",
+ .private_section = "Slice",
+
+ .can_transient = true,
+ .can_set_managed_oom = true,
+
+ .init = slice_init,
+ .load = slice_load,
+
+ .coldplug = slice_coldplug,
+
+ .dump = slice_dump,
+
+ .start = slice_start,
+ .stop = slice_stop,
+
+ .kill = slice_kill,
+
+ .freeze = slice_freeze,
+ .thaw = slice_thaw,
+ .can_freeze = slice_can_freeze,
+
+ .serialize = slice_serialize,
+ .deserialize_item = slice_deserialize_item,
+
+ .active_state = slice_active_state,
+ .sub_state_to_string = slice_sub_state_to_string,
+
+ .bus_set_property = bus_slice_set_property,
+ .bus_commit_properties = bus_slice_commit_properties,
+
+ .enumerate_perpetual = slice_enumerate_perpetual,
+
+ .status_message_formats = {
+ .finished_start_job = {
+ [JOB_DONE] = "Created slice %s.",
+ },
+ .finished_stop_job = {
+ [JOB_DONE] = "Removed slice %s.",
+ },
+ },
+};
diff --git a/src/core/slice.h b/src/core/slice.h
new file mode 100644
index 0000000..e2f9274
--- /dev/null
+++ b/src/core/slice.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "unit.h"
+
+typedef struct Slice Slice;
+
+struct Slice {
+ Unit meta;
+
+ SliceState state, deserialized_state;
+
+ CGroupContext cgroup_context;
+};
+
+extern const UnitVTable slice_vtable;
+
+DEFINE_CAST(SLICE, Slice);
diff --git a/src/core/smack-setup.c b/src/core/smack-setup.c
new file mode 100644
index 0000000..1fe592a
--- /dev/null
+++ b/src/core/smack-setup.c
@@ -0,0 +1,396 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2013 Intel Corporation
+ Authors:
+ Nathaniel Chen <nathaniel.chen@intel.com>
+***/
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "smack-setup.h"
+#include "string-util.h"
+#include "util.h"
+
+#if ENABLE_SMACK
+
+static int fdopen_unlocked_at(int dfd, const char *dir, const char *name, int *status, FILE **ret_file) {
+ int fd, r;
+ FILE *f;
+
+ fd = openat(dfd, name, O_RDONLY|O_CLOEXEC);
+ if (fd < 0) {
+ if (*status == 0)
+ *status = -errno;
+
+ return log_warning_errno(errno, "Failed to open \"%s/%s\": %m", dir, name);
+ }
+
+ r = fdopen_unlocked(fd, "r", &f);
+ if (r < 0) {
+ if (*status == 0)
+ *status = r;
+
+ safe_close(fd);
+ return log_error_errno(r, "Failed to open \"%s/%s\": %m", dir, name);
+ }
+
+ *ret_file = f;
+ return 0;
+}
+
+static int write_access2_rules(const char *srcdir) {
+ _cleanup_close_ int load2_fd = -1, change_fd = -1;
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *entry;
+ int dfd = -1, r = 0;
+
+ load2_fd = open("/sys/fs/smackfs/load2", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (load2_fd < 0) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to open '/sys/fs/smackfs/load2': %m");
+ return -errno; /* negative error */
+ }
+
+ change_fd = open("/sys/fs/smackfs/change-rule", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (change_fd < 0) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to open '/sys/fs/smackfs/change-rule': %m");
+ return -errno; /* negative error */
+ }
+
+ /* write rules to load2 or change-rule from every file in the directory */
+ dir = opendir(srcdir);
+ if (!dir) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to opendir '%s': %m", srcdir);
+ return errno; /* positive on purpose */
+ }
+
+ dfd = dirfd(dir);
+ assert(dfd >= 0);
+
+ FOREACH_DIRENT(entry, dir, return 0) {
+ _cleanup_fclose_ FILE *policy = NULL;
+
+ dirent_ensure_type(dir, entry);
+ if (!dirent_is_file(entry))
+ continue;
+
+ if (fdopen_unlocked_at(dfd, srcdir, entry->d_name, &r, &policy) < 0)
+ continue;
+
+ /* load2 write rules in the kernel require a line buffered stream */
+ for (;;) {
+ _cleanup_free_ char *buf = NULL, *sbj = NULL, *obj = NULL, *acc1 = NULL, *acc2 = NULL;
+ int q;
+
+ q = read_line(policy, NAME_MAX, &buf);
+ if (q < 0)
+ return log_error_errno(q, "Failed to read line from '%s': %m", entry->d_name);
+ if (q == 0)
+ break;
+
+ if (isempty(buf) || strchr(COMMENTS, buf[0]))
+ continue;
+
+ /* if 3 args -> load rule : subject object access1 */
+ /* if 4 args -> change rule : subject object access1 access2 */
+ if (sscanf(buf, "%ms %ms %ms %ms", &sbj, &obj, &acc1, &acc2) < 3) {
+ log_error_errno(errno, "Failed to parse rule '%s' in '%s', ignoring.", buf, entry->d_name);
+ continue;
+ }
+
+ if (write(isempty(acc2) ? load2_fd : change_fd, buf, strlen(buf)) < 0) {
+ if (r == 0)
+ r = -errno;
+ log_error_errno(errno, "Failed to write '%s' to '%s' in '%s': %m",
+ buf, isempty(acc2) ? "/sys/fs/smackfs/load2" : "/sys/fs/smackfs/change-rule", entry->d_name);
+ }
+ }
+ }
+
+ return r;
+}
+
+static int write_cipso2_rules(const char *srcdir) {
+ _cleanup_close_ int cipso2_fd = -1;
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *entry;
+ int dfd = -1, r = 0;
+
+ cipso2_fd = open("/sys/fs/smackfs/cipso2", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (cipso2_fd < 0) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to open '/sys/fs/smackfs/cipso2': %m");
+ return -errno; /* negative error */
+ }
+
+ /* write rules to cipso2 from every file in the directory */
+ dir = opendir(srcdir);
+ if (!dir) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to opendir '%s': %m", srcdir);
+ return errno; /* positive on purpose */
+ }
+
+ dfd = dirfd(dir);
+ assert(dfd >= 0);
+
+ FOREACH_DIRENT(entry, dir, return 0) {
+ _cleanup_fclose_ FILE *policy = NULL;
+
+ dirent_ensure_type(dir, entry);
+ if (!dirent_is_file(entry))
+ continue;
+
+ if (fdopen_unlocked_at(dfd, srcdir, entry->d_name, &r, &policy) < 0)
+ continue;
+
+ /* cipso2 write rules in the kernel require a line buffered stream */
+ for (;;) {
+ _cleanup_free_ char *buf = NULL;
+ int q;
+
+ q = read_line(policy, NAME_MAX, &buf);
+ if (q < 0)
+ return log_error_errno(q, "Failed to read line from '%s': %m", entry->d_name);
+ if (q == 0)
+ break;
+
+ if (isempty(buf) || strchr(COMMENTS, buf[0]))
+ continue;
+
+ if (write(cipso2_fd, buf, strlen(buf)) < 0) {
+ if (r == 0)
+ r = -errno;
+ log_error_errno(errno, "Failed to write '%s' to '/sys/fs/smackfs/cipso2' in '%s': %m",
+ buf, entry->d_name);
+ break;
+ }
+ }
+ }
+
+ return r;
+}
+
+static int write_netlabel_rules(const char *srcdir) {
+ _cleanup_fclose_ FILE *dst = NULL;
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *entry;
+ int dfd = -1, r = 0;
+
+ dst = fopen("/sys/fs/smackfs/netlabel", "we");
+ if (!dst) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to open /sys/fs/smackfs/netlabel: %m");
+ return -errno; /* negative error */
+ }
+
+ /* write rules to dst from every file in the directory */
+ dir = opendir(srcdir);
+ if (!dir) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to opendir %s: %m", srcdir);
+ return errno; /* positive on purpose */
+ }
+
+ dfd = dirfd(dir);
+ assert(dfd >= 0);
+
+ FOREACH_DIRENT(entry, dir, return 0) {
+ _cleanup_fclose_ FILE *policy = NULL;
+
+ if (fdopen_unlocked_at(dfd, srcdir, entry->d_name, &r, &policy) < 0)
+ continue;
+
+ /* load2 write rules in the kernel require a line buffered stream */
+ for (;;) {
+ _cleanup_free_ char *buf = NULL;
+ int q;
+
+ q = read_line(policy, NAME_MAX, &buf);
+ if (q < 0)
+ return log_error_errno(q, "Failed to read line from %s: %m", entry->d_name);
+ if (q == 0)
+ break;
+
+ if (!fputs(buf, dst)) {
+ if (r == 0)
+ r = -EINVAL;
+ log_error_errno(errno, "Failed to write line to /sys/fs/smackfs/netlabel: %m");
+ break;
+ }
+ q = fflush_and_check(dst);
+ if (q < 0) {
+ if (r == 0)
+ r = q;
+ log_error_errno(q, "Failed to flush writes to /sys/fs/smackfs/netlabel: %m");
+ break;
+ }
+ }
+ }
+
+ return r;
+}
+
+static int write_onlycap_list(void) {
+ _cleanup_close_ int onlycap_fd = -1;
+ _cleanup_free_ char *list = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ size_t len = 0, allocated = 0;
+ int r;
+
+ f = fopen("/etc/smack/onlycap", "re");
+ if (!f) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to read '/etc/smack/onlycap': %m");
+
+ return errno == ENOENT ? ENOENT : -errno;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *buf = NULL;
+ size_t l;
+
+ r = read_line(f, LONG_LINE_MAX, &buf);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read line from /etc/smack/onlycap: %m");
+ if (r == 0)
+ break;
+
+ if (isempty(buf) || strchr(COMMENTS, *buf))
+ continue;
+
+ l = strlen(buf);
+ if (!GREEDY_REALLOC(list, allocated, len + l + 1))
+ return log_oom();
+
+ stpcpy(list + len, buf)[0] = ' ';
+ len += l + 1;
+ }
+
+ if (len == 0)
+ return 0;
+
+ list[len - 1] = 0;
+
+ onlycap_fd = open("/sys/fs/smackfs/onlycap", O_WRONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (onlycap_fd < 0) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to open '/sys/fs/smackfs/onlycap': %m");
+ return -errno; /* negative error */
+ }
+
+ r = write(onlycap_fd, list, len);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to write onlycap list(%s) to '/sys/fs/smackfs/onlycap': %m", list);
+
+ return 0;
+}
+
+#endif
+
+int mac_smack_setup(bool *loaded_policy) {
+
+#if ENABLE_SMACK
+
+ int r;
+
+ assert(loaded_policy);
+
+ r = write_access2_rules("/etc/smack/accesses.d/");
+ switch(r) {
+ case -ENOENT:
+ log_debug("Smack is not enabled in the kernel.");
+ return 0;
+ case ENOENT:
+ log_debug("Smack access rules directory '/etc/smack/accesses.d/' not found");
+ return 0;
+ case 0:
+ log_info("Successfully loaded Smack policies.");
+ break;
+ default:
+ log_warning_errno(r, "Failed to load Smack access rules, ignoring: %m");
+ return 0;
+ }
+
+#ifdef SMACK_RUN_LABEL
+ r = write_string_file("/proc/self/attr/current", SMACK_RUN_LABEL, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set SMACK label \"" SMACK_RUN_LABEL "\" on self: %m");
+ r = write_string_file("/sys/fs/smackfs/ambient", SMACK_RUN_LABEL, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set SMACK ambient label \"" SMACK_RUN_LABEL "\": %m");
+ r = write_string_file("/sys/fs/smackfs/netlabel",
+ "0.0.0.0/0 " SMACK_RUN_LABEL, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set SMACK netlabel rule \"0.0.0.0/0 " SMACK_RUN_LABEL "\": %m");
+ r = write_string_file("/sys/fs/smackfs/netlabel", "127.0.0.1 -CIPSO", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set SMACK netlabel rule \"127.0.0.1 -CIPSO\": %m");
+#endif
+
+ r = write_cipso2_rules("/etc/smack/cipso.d/");
+ switch(r) {
+ case -ENOENT:
+ log_debug("Smack/CIPSO is not enabled in the kernel.");
+ return 0;
+ case ENOENT:
+ log_debug("Smack/CIPSO access rules directory '/etc/smack/cipso.d/' not found");
+ break;
+ case 0:
+ log_info("Successfully loaded Smack/CIPSO policies.");
+ break;
+ default:
+ log_warning_errno(r, "Failed to load Smack/CIPSO access rules, ignoring: %m");
+ break;
+ }
+
+ r = write_netlabel_rules("/etc/smack/netlabel.d/");
+ switch(r) {
+ case -ENOENT:
+ log_debug("Smack/CIPSO is not enabled in the kernel.");
+ return 0;
+ case ENOENT:
+ log_debug("Smack network host rules directory '/etc/smack/netlabel.d/' not found");
+ break;
+ case 0:
+ log_info("Successfully loaded Smack network host rules.");
+ break;
+ default:
+ log_warning_errno(r, "Failed to load Smack network host rules: %m, ignoring.");
+ break;
+ }
+
+ r = write_onlycap_list();
+ switch(r) {
+ case -ENOENT:
+ log_debug("Smack is not enabled in the kernel.");
+ break;
+ case ENOENT:
+ log_debug("Smack onlycap list file '/etc/smack/onlycap' not found");
+ break;
+ case 0:
+ log_info("Successfully wrote Smack onlycap list.");
+ break;
+ default:
+ log_emergency_errno(r, "Failed to write Smack onlycap list: %m");
+ return r;
+ }
+
+ *loaded_policy = true;
+
+#endif
+
+ return 0;
+}
diff --git a/src/core/smack-setup.h b/src/core/smack-setup.h
new file mode 100644
index 0000000..d29370d
--- /dev/null
+++ b/src/core/smack-setup.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2013 Intel Corporation
+ Authors:
+ Nathaniel Chen <nathaniel.chen@intel.com>
+***/
+
+int mac_smack_setup(bool *loaded_policy);
diff --git a/src/core/socket.c b/src/core/socket.c
new file mode 100644
index 0000000..7f8ac4e
--- /dev/null
+++ b/src/core/socket.c
@@ -0,0 +1,3533 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <mqueue.h>
+#include <netinet/tcp.h>
+#include <sys/epoll.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/sctp.h>
+
+#include "alloc-util.h"
+#include "bpf-firewall.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "copy.h"
+#include "dbus-socket.h"
+#include "dbus-unit.h"
+#include "def.h"
+#include "errno-list.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "in-addr-util.h"
+#include "io-util.h"
+#include "ip-protocol-list.h"
+#include "label.h"
+#include "log.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "serialize.h"
+#include "signal-util.h"
+#include "smack-util.h"
+#include "socket.h"
+#include "socket-netlink.h"
+#include "special.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "unit.h"
+#include "user-util.h"
+
+struct SocketPeer {
+ unsigned n_ref;
+
+ Socket *socket;
+ union sockaddr_union peer;
+ socklen_t peer_salen;
+};
+
+static const UnitActiveState state_translation_table[_SOCKET_STATE_MAX] = {
+ [SOCKET_DEAD] = UNIT_INACTIVE,
+ [SOCKET_START_PRE] = UNIT_ACTIVATING,
+ [SOCKET_START_CHOWN] = UNIT_ACTIVATING,
+ [SOCKET_START_POST] = UNIT_ACTIVATING,
+ [SOCKET_LISTENING] = UNIT_ACTIVE,
+ [SOCKET_RUNNING] = UNIT_ACTIVE,
+ [SOCKET_STOP_PRE] = UNIT_DEACTIVATING,
+ [SOCKET_STOP_PRE_SIGTERM] = UNIT_DEACTIVATING,
+ [SOCKET_STOP_PRE_SIGKILL] = UNIT_DEACTIVATING,
+ [SOCKET_STOP_POST] = UNIT_DEACTIVATING,
+ [SOCKET_FINAL_SIGTERM] = UNIT_DEACTIVATING,
+ [SOCKET_FINAL_SIGKILL] = UNIT_DEACTIVATING,
+ [SOCKET_FAILED] = UNIT_FAILED,
+ [SOCKET_CLEANING] = UNIT_MAINTENANCE,
+};
+
+static int socket_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int socket_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
+static void flush_ports(Socket *s);
+
+static void socket_init(Unit *u) {
+ Socket *s = SOCKET(u);
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ s->backlog = SOMAXCONN;
+ s->timeout_usec = u->manager->default_timeout_start_usec;
+ s->directory_mode = 0755;
+ s->socket_mode = 0666;
+
+ s->max_connections = 64;
+
+ s->priority = -1;
+ s->ip_tos = -1;
+ s->ip_ttl = -1;
+ s->mark = -1;
+
+ s->exec_context.std_output = u->manager->default_std_output;
+ s->exec_context.std_error = u->manager->default_std_error;
+
+ s->control_command_id = _SOCKET_EXEC_COMMAND_INVALID;
+
+ s->trigger_limit.interval = USEC_INFINITY;
+ s->trigger_limit.burst = (unsigned) -1;
+}
+
+static void socket_unwatch_control_pid(Socket *s) {
+ assert(s);
+
+ if (s->control_pid <= 0)
+ return;
+
+ unit_unwatch_pid(UNIT(s), s->control_pid);
+ s->control_pid = 0;
+}
+
+static void socket_cleanup_fd_list(SocketPort *p) {
+ assert(p);
+
+ close_many(p->auxiliary_fds, p->n_auxiliary_fds);
+ p->auxiliary_fds = mfree(p->auxiliary_fds);
+ p->n_auxiliary_fds = 0;
+}
+
+void socket_free_ports(Socket *s) {
+ SocketPort *p;
+
+ assert(s);
+
+ while ((p = s->ports)) {
+ LIST_REMOVE(port, s->ports, p);
+
+ sd_event_source_unref(p->event_source);
+
+ socket_cleanup_fd_list(p);
+ safe_close(p->fd);
+ free(p->path);
+ free(p);
+ }
+}
+
+static void socket_done(Unit *u) {
+ Socket *s = SOCKET(u);
+ SocketPeer *p;
+
+ assert(s);
+
+ socket_free_ports(s);
+
+ while ((p = set_steal_first(s->peers_by_address)))
+ p->socket = NULL;
+
+ s->peers_by_address = set_free(s->peers_by_address);
+
+ s->exec_runtime = exec_runtime_unref(s->exec_runtime, false);
+ exec_command_free_array(s->exec_command, _SOCKET_EXEC_COMMAND_MAX);
+ s->control_command = NULL;
+
+ dynamic_creds_unref(&s->dynamic_creds);
+
+ socket_unwatch_control_pid(s);
+
+ unit_ref_unset(&s->service);
+
+ s->tcp_congestion = mfree(s->tcp_congestion);
+ s->bind_to_device = mfree(s->bind_to_device);
+
+ s->smack = mfree(s->smack);
+ s->smack_ip_in = mfree(s->smack_ip_in);
+ s->smack_ip_out = mfree(s->smack_ip_out);
+
+ strv_free(s->symlinks);
+
+ s->user = mfree(s->user);
+ s->group = mfree(s->group);
+
+ s->fdname = mfree(s->fdname);
+
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+}
+
+static int socket_arm_timer(Socket *s, usec_t usec) {
+ int r;
+
+ assert(s);
+
+ if (s->timer_event_source) {
+ r = sd_event_source_set_time(s->timer_event_source, usec);
+ if (r < 0)
+ return r;
+
+ return sd_event_source_set_enabled(s->timer_event_source, SD_EVENT_ONESHOT);
+ }
+
+ if (usec == USEC_INFINITY)
+ return 0;
+
+ r = sd_event_add_time(
+ UNIT(s)->manager->event,
+ &s->timer_event_source,
+ CLOCK_MONOTONIC,
+ usec, 0,
+ socket_dispatch_timer, s);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(s->timer_event_source, "socket-timer");
+
+ return 0;
+}
+
+static bool have_non_accept_socket(Socket *s) {
+ SocketPort *p;
+
+ assert(s);
+
+ if (!s->accept)
+ return true;
+
+ LIST_FOREACH(port, p, s->ports) {
+
+ if (p->type != SOCKET_SOCKET)
+ return true;
+
+ if (!socket_address_can_accept(&p->address))
+ return true;
+ }
+
+ return false;
+}
+
+static int socket_add_mount_dependencies(Socket *s) {
+ SocketPort *p;
+ int r;
+
+ assert(s);
+
+ LIST_FOREACH(port, p, s->ports) {
+ const char *path = NULL;
+
+ if (p->type == SOCKET_SOCKET)
+ path = socket_address_get_path(&p->address);
+ else if (IN_SET(p->type, SOCKET_FIFO, SOCKET_SPECIAL, SOCKET_USB_FUNCTION))
+ path = p->path;
+
+ if (!path)
+ continue;
+
+ r = unit_require_mounts_for(UNIT(s), path, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int socket_add_device_dependencies(Socket *s) {
+ char *t;
+
+ assert(s);
+
+ if (!s->bind_to_device || streq(s->bind_to_device, "lo"))
+ return 0;
+
+ t = strjoina("/sys/subsystem/net/devices/", s->bind_to_device);
+ return unit_add_node_dependency(UNIT(s), t, UNIT_BINDS_TO, UNIT_DEPENDENCY_FILE);
+}
+
+static int socket_add_default_dependencies(Socket *s) {
+ int r;
+ assert(s);
+
+ if (!UNIT(s)->default_dependencies)
+ return 0;
+
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_BEFORE, SPECIAL_SOCKETS_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ if (MANAGER_IS_SYSTEM(UNIT(s)->manager)) {
+ r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+ }
+
+ return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+}
+
+_pure_ static bool socket_has_exec(Socket *s) {
+ unsigned i;
+ assert(s);
+
+ for (i = 0; i < _SOCKET_EXEC_COMMAND_MAX; i++)
+ if (s->exec_command[i])
+ return true;
+
+ return false;
+}
+
+static int socket_add_extras(Socket *s) {
+ Unit *u = UNIT(s);
+ int r;
+
+ assert(s);
+
+ /* Pick defaults for the trigger limit, if nothing was explicitly configured. We pick a relatively high limit
+ * in Accept=yes mode, and a lower limit for Accept=no. Reason: in Accept=yes mode we are invoking accept()
+ * ourselves before the trigger limit can hit, thus incoming connections are taken off the socket queue quickly
+ * and reliably. This is different for Accept=no, where the spawned service has to take the incoming traffic
+ * off the queues, which it might not necessarily do. Moreover, while Accept=no services are supposed to
+ * process whatever is queued in one go, and thus should normally never have to be started frequently. This is
+ * different for Accept=yes where each connection is processed by a new service instance, and thus frequent
+ * service starts are typical. */
+
+ if (s->trigger_limit.interval == USEC_INFINITY)
+ s->trigger_limit.interval = 2 * USEC_PER_SEC;
+
+ if (s->trigger_limit.burst == (unsigned) -1) {
+ if (s->accept)
+ s->trigger_limit.burst = 200;
+ else
+ s->trigger_limit.burst = 20;
+ }
+
+ if (have_non_accept_socket(s)) {
+
+ if (!UNIT_DEREF(s->service)) {
+ Unit *x;
+
+ r = unit_load_related_unit(u, ".service", &x);
+ if (r < 0)
+ return r;
+
+ unit_ref_set(&s->service, u, x);
+ }
+
+ r = unit_add_two_dependencies(u, UNIT_BEFORE, UNIT_TRIGGERS, UNIT_DEREF(s->service), true, UNIT_DEPENDENCY_IMPLICIT);
+ if (r < 0)
+ return r;
+ }
+
+ r = socket_add_mount_dependencies(s);
+ if (r < 0)
+ return r;
+
+ r = socket_add_device_dependencies(s);
+ if (r < 0)
+ return r;
+
+ r = unit_patch_contexts(u);
+ if (r < 0)
+ return r;
+
+ if (socket_has_exec(s)) {
+ r = unit_add_exec_dependencies(u, &s->exec_context);
+ if (r < 0)
+ return r;
+ }
+
+ r = unit_set_default_slice(u);
+ if (r < 0)
+ return r;
+
+ r = socket_add_default_dependencies(s);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static const char *socket_find_symlink_target(Socket *s) {
+ const char *found = NULL;
+ SocketPort *p;
+
+ LIST_FOREACH(port, p, s->ports) {
+ const char *f = NULL;
+
+ switch (p->type) {
+
+ case SOCKET_FIFO:
+ f = p->path;
+ break;
+
+ case SOCKET_SOCKET:
+ f = socket_address_get_path(&p->address);
+ break;
+
+ default:
+ break;
+ }
+
+ if (f) {
+ if (found)
+ return NULL;
+
+ found = f;
+ }
+ }
+
+ return found;
+}
+
+static int socket_verify(Socket *s) {
+ assert(s);
+ assert(UNIT(s)->load_state == UNIT_LOADED);
+
+ if (!s->ports) {
+ log_unit_error(UNIT(s), "Unit has no Listen setting (ListenStream=, ListenDatagram=, ListenFIFO=, ...). Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->accept && have_non_accept_socket(s)) {
+ log_unit_error(UNIT(s), "Unit configured for accepting sockets, but sockets are non-accepting. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->accept && s->max_connections <= 0) {
+ log_unit_error(UNIT(s), "MaxConnection= setting too small. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->accept && UNIT_DEREF(s->service)) {
+ log_unit_error(UNIT(s), "Explicit service configuration for accepting socket units not supported. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (s->exec_context.pam_name && s->kill_context.kill_mode != KILL_CONTROL_GROUP) {
+ log_unit_error(UNIT(s), "Unit has PAM enabled. Kill mode must be set to 'control-group'. Refusing.");
+ return -ENOEXEC;
+ }
+
+ if (!strv_isempty(s->symlinks) && !socket_find_symlink_target(s)) {
+ log_unit_error(UNIT(s), "Unit has symlinks set but none or more than one node in the file system. Refusing.");
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
+static void peer_address_hash_func(const SocketPeer *s, struct siphash *state) {
+ assert(s);
+
+ if (s->peer.sa.sa_family == AF_INET)
+ siphash24_compress(&s->peer.in.sin_addr, sizeof(s->peer.in.sin_addr), state);
+ else if (s->peer.sa.sa_family == AF_INET6)
+ siphash24_compress(&s->peer.in6.sin6_addr, sizeof(s->peer.in6.sin6_addr), state);
+ else if (s->peer.sa.sa_family == AF_VSOCK)
+ siphash24_compress(&s->peer.vm.svm_cid, sizeof(s->peer.vm.svm_cid), state);
+ else
+ assert_not_reached("Unknown address family.");
+}
+
+static int peer_address_compare_func(const SocketPeer *x, const SocketPeer *y) {
+ int r;
+
+ r = CMP(x->peer.sa.sa_family, y->peer.sa.sa_family);
+ if (r != 0)
+ return r;
+
+ switch(x->peer.sa.sa_family) {
+ case AF_INET:
+ return memcmp(&x->peer.in.sin_addr, &y->peer.in.sin_addr, sizeof(x->peer.in.sin_addr));
+ case AF_INET6:
+ return memcmp(&x->peer.in6.sin6_addr, &y->peer.in6.sin6_addr, sizeof(x->peer.in6.sin6_addr));
+ case AF_VSOCK:
+ return CMP(x->peer.vm.svm_cid, y->peer.vm.svm_cid);
+ }
+ assert_not_reached("Black sheep in the family!");
+}
+
+DEFINE_PRIVATE_HASH_OPS(peer_address_hash_ops, SocketPeer, peer_address_hash_func, peer_address_compare_func);
+
+static int socket_load(Unit *u) {
+ Socket *s = SOCKET(u);
+ int r;
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ r = set_ensure_allocated(&s->peers_by_address, &peer_address_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = unit_load_fragment_and_dropin(u, true);
+ if (r < 0)
+ return r;
+
+ if (u->load_state != UNIT_LOADED)
+ return 0;
+
+ /* This is a new unit? Then let's add in some extras */
+ r = socket_add_extras(s);
+ if (r < 0)
+ return r;
+
+ return socket_verify(s);
+}
+
+static SocketPeer *socket_peer_new(void) {
+ SocketPeer *p;
+
+ p = new0(SocketPeer, 1);
+ if (!p)
+ return NULL;
+
+ p->n_ref = 1;
+
+ return p;
+}
+
+static SocketPeer *socket_peer_free(SocketPeer *p) {
+ assert(p);
+
+ if (p->socket)
+ set_remove(p->socket->peers_by_address, p);
+
+ return mfree(p);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(SocketPeer, socket_peer, socket_peer_free);
+
+int socket_acquire_peer(Socket *s, int fd, SocketPeer **p) {
+ _cleanup_(socket_peer_unrefp) SocketPeer *remote = NULL;
+ SocketPeer sa = {}, *i;
+ socklen_t salen = sizeof(sa.peer);
+ int r;
+
+ assert(fd >= 0);
+ assert(s);
+
+ if (getpeername(fd, &sa.peer.sa, &salen) < 0)
+ return log_unit_error_errno(UNIT(s), errno, "getpeername failed: %m");
+
+ if (!IN_SET(sa.peer.sa.sa_family, AF_INET, AF_INET6, AF_VSOCK)) {
+ *p = NULL;
+ return 0;
+ }
+
+ i = set_get(s->peers_by_address, &sa);
+ if (i) {
+ *p = socket_peer_ref(i);
+ return 1;
+ }
+
+ remote = socket_peer_new();
+ if (!remote)
+ return log_oom();
+
+ remote->peer = sa.peer;
+ remote->peer_salen = salen;
+
+ r = set_put(s->peers_by_address, remote);
+ if (r < 0)
+ return r;
+
+ remote->socket = s;
+
+ *p = TAKE_PTR(remote);
+
+ return 1;
+}
+
+_const_ static const char* listen_lookup(int family, int type) {
+
+ if (family == AF_NETLINK)
+ return "ListenNetlink";
+
+ if (type == SOCK_STREAM)
+ return "ListenStream";
+ else if (type == SOCK_DGRAM)
+ return "ListenDatagram";
+ else if (type == SOCK_SEQPACKET)
+ return "ListenSequentialPacket";
+
+ assert_not_reached("Unknown socket type");
+ return NULL;
+}
+
+static void socket_dump(Unit *u, FILE *f, const char *prefix) {
+ char time_string[FORMAT_TIMESPAN_MAX];
+ SocketExecCommand c;
+ Socket *s = SOCKET(u);
+ SocketPort *p;
+ const char *prefix2, *str;
+
+ assert(s);
+ assert(f);
+
+ prefix = strempty(prefix);
+ prefix2 = strjoina(prefix, "\t");
+
+ fprintf(f,
+ "%sSocket State: %s\n"
+ "%sResult: %s\n"
+ "%sClean Result: %s\n"
+ "%sBindIPv6Only: %s\n"
+ "%sBacklog: %u\n"
+ "%sSocketMode: %04o\n"
+ "%sDirectoryMode: %04o\n"
+ "%sKeepAlive: %s\n"
+ "%sNoDelay: %s\n"
+ "%sFreeBind: %s\n"
+ "%sTransparent: %s\n"
+ "%sBroadcast: %s\n"
+ "%sPassCredentials: %s\n"
+ "%sPassSecurity: %s\n"
+ "%sPassPacketInfo: %s\n"
+ "%sTCPCongestion: %s\n"
+ "%sRemoveOnStop: %s\n"
+ "%sWritable: %s\n"
+ "%sFileDescriptorName: %s\n"
+ "%sSELinuxContextFromNet: %s\n",
+ prefix, socket_state_to_string(s->state),
+ prefix, socket_result_to_string(s->result),
+ prefix, socket_result_to_string(s->clean_result),
+ prefix, socket_address_bind_ipv6_only_to_string(s->bind_ipv6_only),
+ prefix, s->backlog,
+ prefix, s->socket_mode,
+ prefix, s->directory_mode,
+ prefix, yes_no(s->keep_alive),
+ prefix, yes_no(s->no_delay),
+ prefix, yes_no(s->free_bind),
+ prefix, yes_no(s->transparent),
+ prefix, yes_no(s->broadcast),
+ prefix, yes_no(s->pass_cred),
+ prefix, yes_no(s->pass_sec),
+ prefix, yes_no(s->pass_pktinfo),
+ prefix, strna(s->tcp_congestion),
+ prefix, yes_no(s->remove_on_stop),
+ prefix, yes_no(s->writable),
+ prefix, socket_fdname(s),
+ prefix, yes_no(s->selinux_context_from_net));
+
+ if (s->timestamping != SOCKET_TIMESTAMPING_OFF)
+ fprintf(f,
+ "%sTimestamping: %s\n",
+ prefix, socket_timestamping_to_string(s->timestamping));
+
+ if (s->control_pid > 0)
+ fprintf(f,
+ "%sControl PID: "PID_FMT"\n",
+ prefix, s->control_pid);
+
+ if (s->bind_to_device)
+ fprintf(f,
+ "%sBindToDevice: %s\n",
+ prefix, s->bind_to_device);
+
+ if (s->accept)
+ fprintf(f,
+ "%sAccepted: %u\n"
+ "%sNConnections: %u\n"
+ "%sMaxConnections: %u\n"
+ "%sMaxConnectionsPerSource: %u\n",
+ prefix, s->n_accepted,
+ prefix, s->n_connections,
+ prefix, s->max_connections,
+ prefix, s->max_connections_per_source);
+ else
+ fprintf(f,
+ "%sFlushPending: %s\n",
+ prefix, yes_no(s->flush_pending));
+
+
+ if (s->priority >= 0)
+ fprintf(f,
+ "%sPriority: %i\n",
+ prefix, s->priority);
+
+ if (s->receive_buffer > 0)
+ fprintf(f,
+ "%sReceiveBuffer: %zu\n",
+ prefix, s->receive_buffer);
+
+ if (s->send_buffer > 0)
+ fprintf(f,
+ "%sSendBuffer: %zu\n",
+ prefix, s->send_buffer);
+
+ if (s->ip_tos >= 0)
+ fprintf(f,
+ "%sIPTOS: %i\n",
+ prefix, s->ip_tos);
+
+ if (s->ip_ttl >= 0)
+ fprintf(f,
+ "%sIPTTL: %i\n",
+ prefix, s->ip_ttl);
+
+ if (s->pipe_size > 0)
+ fprintf(f,
+ "%sPipeSize: %zu\n",
+ prefix, s->pipe_size);
+
+ if (s->mark >= 0)
+ fprintf(f,
+ "%sMark: %i\n",
+ prefix, s->mark);
+
+ if (s->mq_maxmsg > 0)
+ fprintf(f,
+ "%sMessageQueueMaxMessages: %li\n",
+ prefix, s->mq_maxmsg);
+
+ if (s->mq_msgsize > 0)
+ fprintf(f,
+ "%sMessageQueueMessageSize: %li\n",
+ prefix, s->mq_msgsize);
+
+ if (s->reuse_port)
+ fprintf(f,
+ "%sReusePort: %s\n",
+ prefix, yes_no(s->reuse_port));
+
+ if (s->smack)
+ fprintf(f,
+ "%sSmackLabel: %s\n",
+ prefix, s->smack);
+
+ if (s->smack_ip_in)
+ fprintf(f,
+ "%sSmackLabelIPIn: %s\n",
+ prefix, s->smack_ip_in);
+
+ if (s->smack_ip_out)
+ fprintf(f,
+ "%sSmackLabelIPOut: %s\n",
+ prefix, s->smack_ip_out);
+
+ if (!isempty(s->user) || !isempty(s->group))
+ fprintf(f,
+ "%sSocketUser: %s\n"
+ "%sSocketGroup: %s\n",
+ prefix, strna(s->user),
+ prefix, strna(s->group));
+
+ if (s->keep_alive_time > 0)
+ fprintf(f,
+ "%sKeepAliveTimeSec: %s\n",
+ prefix, format_timespan(time_string, FORMAT_TIMESPAN_MAX, s->keep_alive_time, USEC_PER_SEC));
+
+ if (s->keep_alive_interval > 0)
+ fprintf(f,
+ "%sKeepAliveIntervalSec: %s\n",
+ prefix, format_timespan(time_string, FORMAT_TIMESPAN_MAX, s->keep_alive_interval, USEC_PER_SEC));
+
+ if (s->keep_alive_cnt > 0)
+ fprintf(f,
+ "%sKeepAliveProbes: %u\n",
+ prefix, s->keep_alive_cnt);
+
+ if (s->defer_accept > 0)
+ fprintf(f,
+ "%sDeferAcceptSec: %s\n",
+ prefix, format_timespan(time_string, FORMAT_TIMESPAN_MAX, s->defer_accept, USEC_PER_SEC));
+
+ LIST_FOREACH(port, p, s->ports) {
+
+ switch (p->type) {
+ case SOCKET_SOCKET: {
+ _cleanup_free_ char *k = NULL;
+ const char *t;
+ int r;
+
+ r = socket_address_print(&p->address, &k);
+ if (r < 0)
+ t = strerror_safe(r);
+ else
+ t = k;
+
+ fprintf(f, "%s%s: %s\n", prefix, listen_lookup(socket_address_family(&p->address), p->address.type), t);
+ break;
+ }
+ case SOCKET_SPECIAL:
+ fprintf(f, "%sListenSpecial: %s\n", prefix, p->path);
+ break;
+ case SOCKET_USB_FUNCTION:
+ fprintf(f, "%sListenUSBFunction: %s\n", prefix, p->path);
+ break;
+ case SOCKET_MQUEUE:
+ fprintf(f, "%sListenMessageQueue: %s\n", prefix, p->path);
+ break;
+ default:
+ fprintf(f, "%sListenFIFO: %s\n", prefix, p->path);
+ }
+ }
+
+ fprintf(f,
+ "%sTriggerLimitIntervalSec: %s\n"
+ "%sTriggerLimitBurst: %u\n",
+ prefix, format_timespan(time_string, FORMAT_TIMESPAN_MAX, s->trigger_limit.interval, USEC_PER_SEC),
+ prefix, s->trigger_limit.burst);
+
+ str = ip_protocol_to_name(s->socket_protocol);
+ if (str)
+ fprintf(f, "%sSocketProtocol: %s\n", prefix, str);
+
+ if (!strv_isempty(s->symlinks)) {
+ char **q;
+
+ fprintf(f, "%sSymlinks:", prefix);
+ STRV_FOREACH(q, s->symlinks)
+ fprintf(f, " %s", *q);
+
+ fprintf(f, "\n");
+ }
+
+ fprintf(f,
+ "%sTimeoutSec: %s\n",
+ prefix, format_timespan(time_string, FORMAT_TIMESPAN_MAX, s->timeout_usec, USEC_PER_SEC));
+
+ exec_context_dump(&s->exec_context, f, prefix);
+ kill_context_dump(&s->kill_context, f, prefix);
+
+ for (c = 0; c < _SOCKET_EXEC_COMMAND_MAX; c++) {
+ if (!s->exec_command[c])
+ continue;
+
+ fprintf(f, "%s-> %s:\n",
+ prefix, socket_exec_command_to_string(c));
+
+ exec_command_dump_list(s->exec_command[c], f, prefix2);
+ }
+
+ cgroup_context_dump(UNIT(s), f, prefix);
+}
+
+static int instance_from_socket(int fd, unsigned nr, char **instance) {
+ socklen_t l;
+ char *r;
+ union sockaddr_union local, remote;
+
+ assert(fd >= 0);
+ assert(instance);
+
+ l = sizeof(local);
+ if (getsockname(fd, &local.sa, &l) < 0)
+ return -errno;
+
+ l = sizeof(remote);
+ if (getpeername(fd, &remote.sa, &l) < 0)
+ return -errno;
+
+ switch (local.sa.sa_family) {
+
+ case AF_INET: {
+ uint32_t
+ a = be32toh(local.in.sin_addr.s_addr),
+ b = be32toh(remote.in.sin_addr.s_addr);
+
+ if (asprintf(&r,
+ "%u-%u.%u.%u.%u:%u-%u.%u.%u.%u:%u",
+ nr,
+ a >> 24, (a >> 16) & 0xFF, (a >> 8) & 0xFF, a & 0xFF,
+ be16toh(local.in.sin_port),
+ b >> 24, (b >> 16) & 0xFF, (b >> 8) & 0xFF, b & 0xFF,
+ be16toh(remote.in.sin_port)) < 0)
+ return -ENOMEM;
+
+ break;
+ }
+
+ case AF_INET6: {
+ static const unsigned char ipv4_prefix[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF
+ };
+
+ if (memcmp(&local.in6.sin6_addr, ipv4_prefix, sizeof(ipv4_prefix)) == 0 &&
+ memcmp(&remote.in6.sin6_addr, ipv4_prefix, sizeof(ipv4_prefix)) == 0) {
+ const uint8_t
+ *a = local.in6.sin6_addr.s6_addr+12,
+ *b = remote.in6.sin6_addr.s6_addr+12;
+
+ if (asprintf(&r,
+ "%u-%u.%u.%u.%u:%u-%u.%u.%u.%u:%u",
+ nr,
+ a[0], a[1], a[2], a[3],
+ be16toh(local.in6.sin6_port),
+ b[0], b[1], b[2], b[3],
+ be16toh(remote.in6.sin6_port)) < 0)
+ return -ENOMEM;
+ } else {
+ char a[INET6_ADDRSTRLEN], b[INET6_ADDRSTRLEN];
+
+ if (asprintf(&r,
+ "%u-%s:%u-%s:%u",
+ nr,
+ inet_ntop(AF_INET6, &local.in6.sin6_addr, a, sizeof(a)),
+ be16toh(local.in6.sin6_port),
+ inet_ntop(AF_INET6, &remote.in6.sin6_addr, b, sizeof(b)),
+ be16toh(remote.in6.sin6_port)) < 0)
+ return -ENOMEM;
+ }
+
+ break;
+ }
+
+ case AF_UNIX: {
+ struct ucred ucred;
+ int k;
+
+ k = getpeercred(fd, &ucred);
+ if (k >= 0) {
+ if (asprintf(&r,
+ "%u-"PID_FMT"-"UID_FMT,
+ nr, ucred.pid, ucred.uid) < 0)
+ return -ENOMEM;
+ } else if (k == -ENODATA) {
+ /* This handles the case where somebody is
+ * connecting from another pid/uid namespace
+ * (e.g. from outside of our container). */
+ if (asprintf(&r,
+ "%u-unknown",
+ nr) < 0)
+ return -ENOMEM;
+ } else
+ return k;
+
+ break;
+ }
+
+ case AF_VSOCK:
+ if (asprintf(&r,
+ "%u-%u:%u-%u:%u",
+ nr,
+ local.vm.svm_cid, local.vm.svm_port,
+ remote.vm.svm_cid, remote.vm.svm_port) < 0)
+ return -ENOMEM;
+
+ break;
+
+ default:
+ assert_not_reached("Unhandled socket type.");
+ }
+
+ *instance = r;
+ return 0;
+}
+
+static void socket_close_fds(Socket *s) {
+ SocketPort *p;
+ char **i;
+
+ assert(s);
+
+ LIST_FOREACH(port, p, s->ports) {
+ bool was_open;
+
+ was_open = p->fd >= 0;
+
+ p->event_source = sd_event_source_unref(p->event_source);
+ p->fd = safe_close(p->fd);
+ socket_cleanup_fd_list(p);
+
+ /* One little note: we should normally not delete any sockets in the file system here! After all some
+ * other process we spawned might still have a reference of this fd and wants to continue to use
+ * it. Therefore we normally delete sockets in the file system before we create a new one, not after we
+ * stopped using one! That all said, if the user explicitly requested this, we'll delete them here
+ * anyway, but only then. */
+
+ if (!was_open || !s->remove_on_stop)
+ continue;
+
+ switch (p->type) {
+
+ case SOCKET_FIFO:
+ (void) unlink(p->path);
+ break;
+
+ case SOCKET_MQUEUE:
+ (void) mq_unlink(p->path);
+ break;
+
+ case SOCKET_SOCKET:
+ (void) socket_address_unlink(&p->address);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ if (s->remove_on_stop)
+ STRV_FOREACH(i, s->symlinks)
+ (void) unlink(*i);
+}
+
+static void socket_apply_socket_options(Socket *s, SocketPort *p, int fd) {
+ int r;
+
+ assert(s);
+ assert(p);
+ assert(fd >= 0);
+
+ if (s->keep_alive) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_KEEPALIVE, true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_KEEPALIVE failed: %m");
+ }
+
+ if (s->keep_alive_time > 0) {
+ r = setsockopt_int(fd, SOL_TCP, TCP_KEEPIDLE, s->keep_alive_time / USEC_PER_SEC);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "TCP_KEEPIDLE failed: %m");
+ }
+
+ if (s->keep_alive_interval > 0) {
+ r = setsockopt_int(fd, SOL_TCP, TCP_KEEPINTVL, s->keep_alive_interval / USEC_PER_SEC);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "TCP_KEEPINTVL failed: %m");
+ }
+
+ if (s->keep_alive_cnt > 0) {
+ r = setsockopt_int(fd, SOL_TCP, TCP_KEEPCNT, s->keep_alive_cnt);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "TCP_KEEPCNT failed: %m");
+ }
+
+ if (s->defer_accept > 0) {
+ r = setsockopt_int(fd, SOL_TCP, TCP_DEFER_ACCEPT, s->defer_accept / USEC_PER_SEC);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "TCP_DEFER_ACCEPT failed: %m");
+ }
+
+ if (s->no_delay) {
+ if (s->socket_protocol == IPPROTO_SCTP) {
+ r = setsockopt_int(fd, SOL_SCTP, SCTP_NODELAY, true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SCTP_NODELAY failed: %m");
+ } else {
+ r = setsockopt_int(fd, SOL_TCP, TCP_NODELAY, true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "TCP_NODELAY failed: %m");
+ }
+ }
+
+ if (s->broadcast) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_BROADCAST, true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_BROADCAST failed: %m");
+ }
+
+ if (s->pass_cred) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_PASSCRED failed: %m");
+ }
+
+ if (s->pass_sec) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PASSSEC, true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_PASSSEC failed: %m");
+ }
+
+ if (s->pass_pktinfo) {
+ r = socket_set_recvpktinfo(fd, socket_address_family(&p->address), true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "Failed to enable packet info socket option: %m");
+ }
+
+ if (s->timestamping != SOCKET_TIMESTAMPING_OFF) {
+ r = setsockopt_int(fd, SOL_SOCKET,
+ s->timestamping == SOCKET_TIMESTAMPING_NS ? SO_TIMESTAMPNS : SO_TIMESTAMP,
+ true);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "Failed to enable timestamping socket option, ignoring: %m");
+ }
+
+ if (s->priority >= 0) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PRIORITY, s->priority);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_PRIORITY failed: %m");
+ }
+
+ if (s->receive_buffer > 0) {
+ r = fd_set_rcvbuf(fd, s->receive_buffer, false);
+ if (r < 0)
+ log_unit_full_errno(UNIT(s), ERRNO_IS_PRIVILEGE(r) ? LOG_DEBUG : LOG_WARNING, r,
+ "SO_RCVBUF/SO_RCVBUFFORCE failed: %m");
+ }
+
+ if (s->send_buffer > 0) {
+ r = fd_set_sndbuf(fd, s->send_buffer, false);
+ if (r < 0)
+ log_unit_full_errno(UNIT(s), ERRNO_IS_PRIVILEGE(r) ? LOG_DEBUG : LOG_WARNING, r,
+ "SO_SNDBUF/SO_SNDBUFFORCE failed: %m");
+ }
+
+ if (s->mark >= 0) {
+ r = setsockopt_int(fd, SOL_SOCKET, SO_MARK, s->mark);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "SO_MARK failed: %m");
+ }
+
+ if (s->ip_tos >= 0) {
+ r = setsockopt_int(fd, IPPROTO_IP, IP_TOS, s->ip_tos);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "IP_TOS failed: %m");
+ }
+
+ if (s->ip_ttl >= 0) {
+ r = socket_set_ttl(fd, socket_address_family(&p->address), s->ip_ttl);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "IP_TTL/IPV6_UNICAST_HOPS failed: %m");
+ }
+
+ if (s->tcp_congestion)
+ if (setsockopt(fd, SOL_TCP, TCP_CONGESTION, s->tcp_congestion, strlen(s->tcp_congestion)+1) < 0)
+ log_unit_warning_errno(UNIT(s), errno, "TCP_CONGESTION failed: %m");
+
+ if (s->smack_ip_in) {
+ r = mac_smack_apply_fd(fd, SMACK_ATTR_IPIN, s->smack_ip_in);
+ if (r < 0)
+ log_unit_error_errno(UNIT(s), r, "mac_smack_apply_ip_in_fd: %m");
+ }
+
+ if (s->smack_ip_out) {
+ r = mac_smack_apply_fd(fd, SMACK_ATTR_IPOUT, s->smack_ip_out);
+ if (r < 0)
+ log_unit_error_errno(UNIT(s), r, "mac_smack_apply_ip_out_fd: %m");
+ }
+}
+
+static void socket_apply_fifo_options(Socket *s, int fd) {
+ int r;
+
+ assert(s);
+ assert(fd >= 0);
+
+ if (s->pipe_size > 0)
+ if (fcntl(fd, F_SETPIPE_SZ, s->pipe_size) < 0)
+ log_unit_warning_errno(UNIT(s), errno, "Setting pipe size failed, ignoring: %m");
+
+ if (s->smack) {
+ r = mac_smack_apply_fd(fd, SMACK_ATTR_ACCESS, s->smack);
+ if (r < 0)
+ log_unit_error_errno(UNIT(s), r, "SMACK relabelling failed, ignoring: %m");
+ }
+}
+
+static int fifo_address_create(
+ const char *path,
+ mode_t directory_mode,
+ mode_t socket_mode) {
+
+ _cleanup_close_ int fd = -1;
+ mode_t old_mask;
+ struct stat st;
+ int r;
+
+ assert(path);
+
+ (void) mkdir_parents_label(path, directory_mode);
+
+ r = mac_selinux_create_file_prepare(path, S_IFIFO);
+ if (r < 0)
+ return r;
+
+ /* Enforce the right access mode for the fifo */
+ old_mask = umask(~socket_mode);
+
+ /* Include the original umask in our mask */
+ (void) umask(~socket_mode | old_mask);
+
+ r = mkfifo(path, socket_mode);
+ (void) umask(old_mask);
+
+ if (r < 0 && errno != EEXIST) {
+ r = -errno;
+ goto fail;
+ }
+
+ fd = open(path, O_RDWR | O_CLOEXEC | O_NOCTTY | O_NONBLOCK | O_NOFOLLOW);
+ if (fd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ mac_selinux_create_file_clear();
+
+ if (fstat(fd, &st) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (!S_ISFIFO(st.st_mode) ||
+ (st.st_mode & 0777) != (socket_mode & ~old_mask) ||
+ st.st_uid != getuid() ||
+ st.st_gid != getgid()) {
+ r = -EEXIST;
+ goto fail;
+ }
+
+ return TAKE_FD(fd);
+
+fail:
+ mac_selinux_create_file_clear();
+ return r;
+}
+
+static int special_address_create(const char *path, bool writable) {
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+
+ assert(path);
+
+ fd = open(path, (writable ? O_RDWR : O_RDONLY)|O_CLOEXEC|O_NOCTTY|O_NONBLOCK|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /* Check whether this is a /proc, /sys or /dev file or char device */
+ if (!S_ISREG(st.st_mode) && !S_ISCHR(st.st_mode))
+ return -EEXIST;
+
+ return TAKE_FD(fd);
+}
+
+static int usbffs_address_create(const char *path) {
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+
+ assert(path);
+
+ fd = open(path, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /* Check whether this is a regular file (ffs endpoint) */
+ if (!S_ISREG(st.st_mode))
+ return -EEXIST;
+
+ return TAKE_FD(fd);
+}
+
+static int mq_address_create(
+ const char *path,
+ mode_t mq_mode,
+ long maxmsg,
+ long msgsize) {
+
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+ mode_t old_mask;
+ struct mq_attr _attr, *attr = NULL;
+
+ assert(path);
+
+ if (maxmsg > 0 && msgsize > 0) {
+ _attr = (struct mq_attr) {
+ .mq_flags = O_NONBLOCK,
+ .mq_maxmsg = maxmsg,
+ .mq_msgsize = msgsize,
+ };
+ attr = &_attr;
+ }
+
+ /* Enforce the right access mode for the mq */
+ old_mask = umask(~mq_mode);
+
+ /* Include the original umask in our mask */
+ (void) umask(~mq_mode | old_mask);
+ fd = mq_open(path, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_CREAT, mq_mode, attr);
+ (void) umask(old_mask);
+
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if ((st.st_mode & 0777) != (mq_mode & ~old_mask) ||
+ st.st_uid != getuid() ||
+ st.st_gid != getgid())
+ return -EEXIST;
+
+ return TAKE_FD(fd);
+}
+
+static int socket_symlink(Socket *s) {
+ const char *p;
+ char **i;
+ int r;
+
+ assert(s);
+
+ p = socket_find_symlink_target(s);
+ if (!p)
+ return 0;
+
+ STRV_FOREACH(i, s->symlinks) {
+ (void) mkdir_parents_label(*i, s->directory_mode);
+
+ r = symlink_idempotent(p, *i, false);
+
+ if (r == -EEXIST && s->remove_on_stop) {
+ /* If there's already something where we want to create the symlink, and the destructive
+ * RemoveOnStop= mode is set, then we might as well try to remove what already exists and try
+ * again. */
+
+ if (unlink(*i) >= 0)
+ r = symlink_idempotent(p, *i, false);
+ }
+
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "Failed to create symlink %s → %s, ignoring: %m", p, *i);
+ }
+
+ return 0;
+}
+
+static int usbffs_write_descs(int fd, Service *s) {
+ int r;
+
+ if (!s->usb_function_descriptors || !s->usb_function_strings)
+ return -EINVAL;
+
+ r = copy_file_fd(s->usb_function_descriptors, fd, 0);
+ if (r < 0)
+ return r;
+
+ return copy_file_fd(s->usb_function_strings, fd, 0);
+}
+
+static int usbffs_select_ep(const struct dirent *d) {
+ return d->d_name[0] != '.' && !streq(d->d_name, "ep0");
+}
+
+static int usbffs_dispatch_eps(SocketPort *p) {
+ _cleanup_free_ struct dirent **ent = NULL;
+ size_t n, k, i;
+ int r;
+
+ r = scandir(p->path, &ent, usbffs_select_ep, alphasort);
+ if (r < 0)
+ return -errno;
+
+ n = (size_t) r;
+ p->auxiliary_fds = new(int, n);
+ if (!p->auxiliary_fds) {
+ r = -ENOMEM;
+ goto clear;
+ }
+
+ p->n_auxiliary_fds = n;
+
+ k = 0;
+ for (i = 0; i < n; ++i) {
+ _cleanup_free_ char *ep = NULL;
+
+ ep = path_make_absolute(ent[i]->d_name, p->path);
+ if (!ep) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ path_simplify(ep, false);
+
+ r = usbffs_address_create(ep);
+ if (r < 0)
+ goto fail;
+
+ p->auxiliary_fds[k++] = r;
+ }
+
+ r = 0;
+ goto clear;
+
+fail:
+ close_many(p->auxiliary_fds, k);
+ p->auxiliary_fds = mfree(p->auxiliary_fds);
+ p->n_auxiliary_fds = 0;
+
+clear:
+ for (i = 0; i < n; ++i)
+ free(ent[i]);
+
+ return r;
+}
+
+int socket_load_service_unit(Socket *s, int cfd, Unit **ret) {
+ /* Figure out what the unit that will be used to handle the connections on the socket looks like.
+ *
+ * If cfd < 0, then we don't have a connection yet. In case of Accept=yes sockets, use a fake
+ * instance name.
+ */
+
+ if (UNIT_ISSET(s->service)) {
+ *ret = UNIT_DEREF(s->service);
+ return 0;
+ }
+
+ if (!s->accept)
+ return -ENODATA;
+
+ /* Build the instance name and load the unit */
+ _cleanup_free_ char *prefix = NULL, *instance = NULL, *name = NULL;
+ int r;
+
+ r = unit_name_to_prefix(UNIT(s)->id, &prefix);
+ if (r < 0)
+ return r;
+
+ if (cfd >= 0) {
+ r = instance_from_socket(cfd, s->n_accepted, &instance);
+ if (ERRNO_IS_DISCONNECT(r))
+ /* ENOTCONN is legitimate if TCP RST was received. Other socket families might return
+ * different errors. This connection is over, but the socket unit lives on. */
+ return log_unit_debug_errno(UNIT(s), r,
+ "Got %s on incoming socket, assuming aborted connection attempt, ignoring.",
+ errno_to_name(r));
+ if (r < 0)
+ return r;
+ }
+
+ /* For accepting sockets, we don't know how the instance will be called until we get a connection and
+ * can figure out what the peer name is. So let's use "internal" as the instance to make it clear
+ * that this is not an actual peer name. We use "unknown" when we cannot figure out the peer. */
+ r = unit_name_build(prefix, instance ?: "internal", ".service", &name);
+ if (r < 0)
+ return r;
+
+ return manager_load_unit(UNIT(s)->manager, name, NULL, NULL, ret);
+}
+
+static int socket_determine_selinux_label(Socket *s, char **ret) {
+ int r;
+
+ assert(s);
+ assert(ret);
+
+ if (s->selinux_context_from_net) {
+ /* If this is requested, get the label from the network label */
+
+ r = mac_selinux_get_our_label(ret);
+ if (r == -EOPNOTSUPP)
+ goto no_label;
+
+ } else {
+ /* Otherwise, get it from the executable we are about to start. */
+
+ Unit *service;
+ ExecCommand *c;
+ _cleanup_free_ char *path = NULL;
+
+ r = socket_load_service_unit(s, -1, &service);
+ if (r == -ENODATA)
+ goto no_label;
+ if (r < 0)
+ return r;
+
+ c = SERVICE(service)->exec_command[SERVICE_EXEC_START];
+ if (!c)
+ goto no_label;
+
+ r = chase_symlinks(c->path, SERVICE(service)->exec_context.root_directory, CHASE_PREFIX_ROOT, &path, NULL);
+ if (r < 0)
+ goto no_label;
+
+ r = mac_selinux_get_create_label_from_exe(path, ret);
+ if (IN_SET(r, -EPERM, -EOPNOTSUPP))
+ goto no_label;
+ }
+
+ return r;
+
+no_label:
+ *ret = NULL;
+ return 0;
+}
+
+static int socket_address_listen_do(
+ Socket *s,
+ const SocketAddress *address,
+ const char *label) {
+
+ assert(s);
+ assert(address);
+
+ return socket_address_listen(
+ address,
+ SOCK_CLOEXEC|SOCK_NONBLOCK,
+ s->backlog,
+ s->bind_ipv6_only,
+ s->bind_to_device,
+ s->reuse_port,
+ s->free_bind,
+ s->transparent,
+ s->directory_mode,
+ s->socket_mode,
+ label);
+}
+
+#define log_address_error_errno(u, address, error, fmt) \
+ ({ \
+ _cleanup_free_ char *_t = NULL; \
+ \
+ (void) socket_address_print(address, &_t); \
+ log_unit_error_errno(u, error, fmt, strna(_t)); \
+ })
+
+static int fork_needed(const SocketAddress *address, const ExecContext *context) {
+ int r;
+
+ assert(address);
+ assert(context);
+
+ /* Check if we need to do the cgroup or netns stuff. If not we can do things much simpler. */
+
+ if (IN_SET(address->sockaddr.sa.sa_family, AF_INET, AF_INET6)) {
+ r = bpf_firewall_supported();
+ if (r < 0)
+ return r;
+ if (r != BPF_FIREWALL_UNSUPPORTED) /* If BPF firewalling isn't supported anyway — there's no point in this forking complexity */
+ return true;
+ }
+
+ return context->private_network || context->network_namespace_path;
+}
+
+static int socket_address_listen_in_cgroup(
+ Socket *s,
+ const SocketAddress *address,
+ const char *label) {
+
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ int fd, r;
+ pid_t pid;
+
+ assert(s);
+ assert(address);
+
+ /* This is a wrapper around socket_address_listen(), that forks off a helper process inside the
+ * socket's cgroup and network namespace in which the socket is actually created. This way we ensure
+ * the socket is actually properly attached to the unit's cgroup for the purpose of BPF filtering and
+ * such. */
+
+ r = fork_needed(address, &s->exec_context);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Shortcut things... */
+ fd = socket_address_listen_do(s, address, label);
+ if (fd < 0)
+ return log_address_error_errno(UNIT(s), address, fd, "Failed to create listening socket (%s): %m");
+
+ return fd;
+ }
+
+ r = unit_setup_exec_runtime(UNIT(s));
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed acquire runtime: %m");
+
+ if (s->exec_context.network_namespace_path &&
+ s->exec_runtime &&
+ s->exec_runtime->netns_storage_socket[0] >= 0) {
+ r = open_netns_path(s->exec_runtime->netns_storage_socket, s->exec_context.network_namespace_path);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to open network namespace path %s: %m", s->exec_context.network_namespace_path);
+ }
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
+ return log_unit_error_errno(UNIT(s), errno, "Failed to create communication channel: %m");
+
+ r = unit_fork_helper_process(UNIT(s), "(sd-listen)", &pid);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to fork off listener stub process: %m");
+ if (r == 0) {
+ /* Child */
+
+ pair[0] = safe_close(pair[0]);
+
+ if ((s->exec_context.private_network || s->exec_context.network_namespace_path) &&
+ s->exec_runtime &&
+ s->exec_runtime->netns_storage_socket[0] >= 0) {
+
+ if (ns_type_supported(NAMESPACE_NET)) {
+ r = setup_netns(s->exec_runtime->netns_storage_socket);
+ if (r < 0) {
+ log_unit_error_errno(UNIT(s), r, "Failed to join network namespace: %m");
+ _exit(EXIT_NETWORK);
+ }
+ } else if (s->exec_context.network_namespace_path) {
+ log_unit_error(UNIT(s), "Network namespace path configured but network namespaces not supported.");
+ _exit(EXIT_NETWORK);
+ } else
+ log_unit_warning(UNIT(s), "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
+ }
+
+ fd = socket_address_listen_do(s, address, label);
+ if (fd < 0) {
+ log_address_error_errno(UNIT(s), address, fd, "Failed to create listening socket (%s): %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ r = send_one_fd(pair[1], fd, 0);
+ if (r < 0) {
+ log_address_error_errno(UNIT(s), address, r, "Failed to send listening socket (%s) to parent: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+ fd = receive_one_fd(pair[0], 0);
+
+ /* We synchronously wait for the helper, as it shouldn't be slow */
+ r = wait_for_terminate_and_check("(sd-listen)", pid, WAIT_LOG_ABNORMAL);
+ if (r < 0) {
+ safe_close(fd);
+ return r;
+ }
+
+ if (fd < 0)
+ return log_address_error_errno(UNIT(s), address, fd, "Failed to receive listening socket (%s): %m");
+
+ return fd;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Socket *, socket_close_fds);
+
+static int socket_open_fds(Socket *_s) {
+ _cleanup_(socket_close_fdsp) Socket *s = _s;
+ _cleanup_(mac_selinux_freep) char *label = NULL;
+ bool know_label = false;
+ SocketPort *p;
+ int r;
+
+ assert(s);
+
+ LIST_FOREACH(port, p, s->ports) {
+
+ if (p->fd >= 0)
+ continue;
+
+ switch (p->type) {
+
+ case SOCKET_SOCKET:
+
+ if (!know_label) {
+ /* Figure out the label, if we don't it know yet. We do it once for the first
+ * socket where we need this and remember it for the rest. */
+
+ r = socket_determine_selinux_label(s, &label);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to determine SELinux label: %m");
+
+ know_label = true;
+ }
+
+ /* Apply the socket protocol */
+ switch (p->address.type) {
+
+ case SOCK_STREAM:
+ case SOCK_SEQPACKET:
+ if (s->socket_protocol == IPPROTO_SCTP)
+ p->address.protocol = s->socket_protocol;
+ break;
+
+ case SOCK_DGRAM:
+ if (s->socket_protocol == IPPROTO_UDPLITE)
+ p->address.protocol = s->socket_protocol;
+ break;
+ }
+
+ p->fd = socket_address_listen_in_cgroup(s, &p->address, label);
+ if (p->fd < 0)
+ return p->fd;
+
+ socket_apply_socket_options(s, p, p->fd);
+ socket_symlink(s);
+ break;
+
+ case SOCKET_SPECIAL:
+
+ p->fd = special_address_create(p->path, s->writable);
+ if (p->fd < 0)
+ return log_unit_error_errno(UNIT(s), p->fd, "Failed to open special file %s: %m", p->path);
+ break;
+
+ case SOCKET_FIFO:
+
+ p->fd = fifo_address_create(
+ p->path,
+ s->directory_mode,
+ s->socket_mode);
+ if (p->fd < 0)
+ return log_unit_error_errno(UNIT(s), p->fd, "Failed to open FIFO %s: %m", p->path);
+
+ socket_apply_fifo_options(s, p->fd);
+ socket_symlink(s);
+ break;
+
+ case SOCKET_MQUEUE:
+
+ p->fd = mq_address_create(
+ p->path,
+ s->socket_mode,
+ s->mq_maxmsg,
+ s->mq_msgsize);
+ if (p->fd < 0)
+ return log_unit_error_errno(UNIT(s), p->fd, "Failed to open message queue %s: %m", p->path);
+ break;
+
+ case SOCKET_USB_FUNCTION: {
+ _cleanup_free_ char *ep = NULL;
+
+ ep = path_make_absolute("ep0", p->path);
+ if (!ep)
+ return -ENOMEM;
+
+ p->fd = usbffs_address_create(ep);
+ if (p->fd < 0)
+ return p->fd;
+
+ r = usbffs_write_descs(p->fd, SERVICE(UNIT_DEREF(s->service)));
+ if (r < 0)
+ return r;
+
+ r = usbffs_dispatch_eps(p);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+ default:
+ assert_not_reached("Unknown port type");
+ }
+ }
+
+ s = NULL;
+ return 0;
+}
+
+static void socket_unwatch_fds(Socket *s) {
+ SocketPort *p;
+ int r;
+
+ assert(s);
+
+ LIST_FOREACH(port, p, s->ports) {
+ if (p->fd < 0)
+ continue;
+
+ if (!p->event_source)
+ continue;
+
+ r = sd_event_source_set_enabled(p->event_source, SD_EVENT_OFF);
+ if (r < 0)
+ log_unit_debug_errno(UNIT(s), r, "Failed to disable event source: %m");
+ }
+}
+
+static int socket_watch_fds(Socket *s) {
+ SocketPort *p;
+ int r;
+
+ assert(s);
+
+ LIST_FOREACH(port, p, s->ports) {
+ if (p->fd < 0)
+ continue;
+
+ if (p->event_source) {
+ r = sd_event_source_set_enabled(p->event_source, SD_EVENT_ON);
+ if (r < 0)
+ goto fail;
+ } else {
+ r = sd_event_add_io(UNIT(s)->manager->event, &p->event_source, p->fd, EPOLLIN, socket_dispatch_io, p);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(p->event_source, "socket-port-io");
+ }
+ }
+
+ return 0;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to watch listening fds: %m");
+ socket_unwatch_fds(s);
+ return r;
+}
+
+enum {
+ SOCKET_OPEN_NONE,
+ SOCKET_OPEN_SOME,
+ SOCKET_OPEN_ALL,
+};
+
+static int socket_check_open(Socket *s) {
+ bool have_open = false, have_closed = false;
+ SocketPort *p;
+
+ assert(s);
+
+ LIST_FOREACH(port, p, s->ports) {
+ if (p->fd < 0)
+ have_closed = true;
+ else
+ have_open = true;
+
+ if (have_open && have_closed)
+ return SOCKET_OPEN_SOME;
+ }
+
+ if (have_open)
+ return SOCKET_OPEN_ALL;
+
+ return SOCKET_OPEN_NONE;
+}
+
+static void socket_set_state(Socket *s, SocketState state) {
+ SocketState old_state;
+ assert(s);
+
+ if (s->state != state)
+ bus_unit_send_pending_change_signal(UNIT(s), false);
+
+ old_state = s->state;
+ s->state = state;
+
+ if (!IN_SET(state,
+ SOCKET_START_PRE,
+ SOCKET_START_CHOWN,
+ SOCKET_START_POST,
+ SOCKET_STOP_PRE,
+ SOCKET_STOP_PRE_SIGTERM,
+ SOCKET_STOP_PRE_SIGKILL,
+ SOCKET_STOP_POST,
+ SOCKET_FINAL_SIGTERM,
+ SOCKET_FINAL_SIGKILL,
+ SOCKET_CLEANING)) {
+
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+ socket_unwatch_control_pid(s);
+ s->control_command = NULL;
+ s->control_command_id = _SOCKET_EXEC_COMMAND_INVALID;
+ }
+
+ if (state != SOCKET_LISTENING)
+ socket_unwatch_fds(s);
+
+ if (!IN_SET(state,
+ SOCKET_START_CHOWN,
+ SOCKET_START_POST,
+ SOCKET_LISTENING,
+ SOCKET_RUNNING,
+ SOCKET_STOP_PRE,
+ SOCKET_STOP_PRE_SIGTERM,
+ SOCKET_STOP_PRE_SIGKILL,
+ SOCKET_CLEANING))
+ socket_close_fds(s);
+
+ if (state != old_state)
+ log_unit_debug(UNIT(s), "Changed %s -> %s", socket_state_to_string(old_state), socket_state_to_string(state));
+
+ unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], 0);
+}
+
+static int socket_coldplug(Unit *u) {
+ Socket *s = SOCKET(u);
+ int r;
+
+ assert(s);
+ assert(s->state == SOCKET_DEAD);
+
+ if (s->deserialized_state == s->state)
+ return 0;
+
+ if (s->control_pid > 0 &&
+ pid_is_unwaited(s->control_pid) &&
+ IN_SET(s->deserialized_state,
+ SOCKET_START_PRE,
+ SOCKET_START_CHOWN,
+ SOCKET_START_POST,
+ SOCKET_STOP_PRE,
+ SOCKET_STOP_PRE_SIGTERM,
+ SOCKET_STOP_PRE_SIGKILL,
+ SOCKET_STOP_POST,
+ SOCKET_FINAL_SIGTERM,
+ SOCKET_FINAL_SIGKILL,
+ SOCKET_CLEANING)) {
+
+ r = unit_watch_pid(UNIT(s), s->control_pid, false);
+ if (r < 0)
+ return r;
+
+ r = socket_arm_timer(s, usec_add(u->state_change_timestamp.monotonic, s->timeout_usec));
+ if (r < 0)
+ return r;
+ }
+
+ if (IN_SET(s->deserialized_state,
+ SOCKET_START_CHOWN,
+ SOCKET_START_POST,
+ SOCKET_LISTENING,
+ SOCKET_RUNNING)) {
+
+ /* Originally, we used to simply reopen all sockets here that we didn't have file descriptors
+ * for. However, this is problematic, as we won't traverse through the SOCKET_START_CHOWN state for
+ * them, and thus the UID/GID wouldn't be right. Hence, instead simply check if we have all fds open,
+ * and if there's a mismatch, warn loudly. */
+
+ r = socket_check_open(s);
+ if (r == SOCKET_OPEN_NONE)
+ log_unit_warning(UNIT(s),
+ "Socket unit configuration has changed while unit has been running, "
+ "no open socket file descriptor left. "
+ "The socket unit is not functional until restarted.");
+ else if (r == SOCKET_OPEN_SOME)
+ log_unit_warning(UNIT(s),
+ "Socket unit configuration has changed while unit has been running, "
+ "and some socket file descriptors have not been opened yet. "
+ "The socket unit is not fully functional until restarted.");
+ }
+
+ if (s->deserialized_state == SOCKET_LISTENING) {
+ r = socket_watch_fds(s);
+ if (r < 0)
+ return r;
+ }
+
+ if (!IN_SET(s->deserialized_state, SOCKET_DEAD, SOCKET_FAILED, SOCKET_CLEANING)) {
+ (void) unit_setup_dynamic_creds(u);
+ (void) unit_setup_exec_runtime(u);
+ }
+
+ socket_set_state(s, s->deserialized_state);
+ return 0;
+}
+
+static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
+
+ _cleanup_(exec_params_clear) ExecParameters exec_params = {
+ .flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .stderr_fd = -1,
+ .exec_fd = -1,
+ };
+ pid_t pid;
+ int r;
+
+ assert(s);
+ assert(c);
+ assert(_pid);
+
+ r = unit_prepare_exec(UNIT(s));
+ if (r < 0)
+ return r;
+
+ r = socket_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec));
+ if (r < 0)
+ return r;
+
+ r = unit_set_exec_params(UNIT(s), &exec_params);
+ if (r < 0)
+ return r;
+
+ r = exec_spawn(UNIT(s),
+ c,
+ &s->exec_context,
+ &exec_params,
+ s->exec_runtime,
+ &s->dynamic_creds,
+ &pid);
+ if (r < 0)
+ return r;
+
+ r = unit_watch_pid(UNIT(s), pid, true);
+ if (r < 0)
+ return r;
+
+ *_pid = pid;
+
+ return 0;
+}
+
+static int socket_chown(Socket *s, pid_t *_pid) {
+ pid_t pid;
+ int r;
+
+ r = socket_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec));
+ if (r < 0)
+ goto fail;
+
+ /* We have to resolve the user names out-of-process, hence
+ * let's fork here. It's messy, but well, what can we do? */
+
+ r = unit_fork_helper_process(UNIT(s), "(sd-chown)", &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ uid_t uid = UID_INVALID;
+ gid_t gid = GID_INVALID;
+ SocketPort *p;
+
+ /* Child */
+
+ if (!isempty(s->user)) {
+ const char *user = s->user;
+
+ r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
+ if (r < 0) {
+ log_unit_error_errno(UNIT(s), r, "Failed to resolve user %s: %m", user);
+ _exit(EXIT_USER);
+ }
+ }
+
+ if (!isempty(s->group)) {
+ const char *group = s->group;
+
+ r = get_group_creds(&group, &gid, 0);
+ if (r < 0) {
+ log_unit_error_errno(UNIT(s), r, "Failed to resolve group %s: %m", group);
+ _exit(EXIT_GROUP);
+ }
+ }
+
+ LIST_FOREACH(port, p, s->ports) {
+ const char *path = NULL;
+
+ if (p->type == SOCKET_SOCKET)
+ path = socket_address_get_path(&p->address);
+ else if (p->type == SOCKET_FIFO)
+ path = p->path;
+
+ if (!path)
+ continue;
+
+ if (chown(path, uid, gid) < 0) {
+ log_unit_error_errno(UNIT(s), errno, "Failed to chown(): %m");
+ _exit(EXIT_CHOWN);
+ }
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ r = unit_watch_pid(UNIT(s), pid, true);
+ if (r < 0)
+ goto fail;
+
+ *_pid = pid;
+ return 0;
+
+fail:
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+ return r;
+}
+
+static void socket_enter_dead(Socket *s, SocketResult f) {
+ assert(s);
+
+ if (s->result == SOCKET_SUCCESS)
+ s->result = f;
+
+ if (s->result == SOCKET_SUCCESS)
+ unit_log_success(UNIT(s));
+ else
+ unit_log_failure(UNIT(s), socket_result_to_string(s->result));
+
+ unit_warn_leftover_processes(UNIT(s), unit_log_leftover_process_stop);
+
+ socket_set_state(s, s->result != SOCKET_SUCCESS ? SOCKET_FAILED : SOCKET_DEAD);
+
+ s->exec_runtime = exec_runtime_unref(s->exec_runtime, true);
+
+ unit_destroy_runtime_data(UNIT(s), &s->exec_context);
+
+ unit_unref_uid_gid(UNIT(s), true);
+
+ dynamic_creds_destroy(&s->dynamic_creds);
+}
+
+static void socket_enter_signal(Socket *s, SocketState state, SocketResult f);
+
+static void socket_enter_stop_post(Socket *s, SocketResult f) {
+ int r;
+ assert(s);
+
+ if (s->result == SOCKET_SUCCESS)
+ s->result = f;
+
+ socket_unwatch_control_pid(s);
+ s->control_command_id = SOCKET_EXEC_STOP_POST;
+ s->control_command = s->exec_command[SOCKET_EXEC_STOP_POST];
+
+ if (s->control_command) {
+ r = socket_spawn(s, s->control_command, &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ socket_set_state(s, SOCKET_STOP_POST);
+ } else
+ socket_enter_signal(s, SOCKET_FINAL_SIGTERM, SOCKET_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'stop-post' task: %m");
+ socket_enter_signal(s, SOCKET_FINAL_SIGTERM, SOCKET_FAILURE_RESOURCES);
+}
+
+static int state_to_kill_operation(Socket *s, SocketState state) {
+ if (state == SOCKET_STOP_PRE_SIGTERM && unit_has_job_type(UNIT(s), JOB_RESTART))
+ return KILL_RESTART;
+
+ if (state == SOCKET_FINAL_SIGTERM)
+ return KILL_TERMINATE;
+
+ return KILL_KILL;
+}
+
+static void socket_enter_signal(Socket *s, SocketState state, SocketResult f) {
+ int r;
+
+ assert(s);
+
+ if (s->result == SOCKET_SUCCESS)
+ s->result = f;
+
+ r = unit_kill_context(
+ UNIT(s),
+ &s->kill_context,
+ state_to_kill_operation(s, state),
+ -1,
+ s->control_pid,
+ false);
+ if (r < 0)
+ goto fail;
+
+ if (r > 0) {
+ r = socket_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec));
+ if (r < 0)
+ goto fail;
+
+ socket_set_state(s, state);
+ } else if (state == SOCKET_STOP_PRE_SIGTERM)
+ socket_enter_signal(s, SOCKET_STOP_PRE_SIGKILL, SOCKET_SUCCESS);
+ else if (state == SOCKET_STOP_PRE_SIGKILL)
+ socket_enter_stop_post(s, SOCKET_SUCCESS);
+ else if (state == SOCKET_FINAL_SIGTERM)
+ socket_enter_signal(s, SOCKET_FINAL_SIGKILL, SOCKET_SUCCESS);
+ else
+ socket_enter_dead(s, SOCKET_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to kill processes: %m");
+
+ if (IN_SET(state, SOCKET_STOP_PRE_SIGTERM, SOCKET_STOP_PRE_SIGKILL))
+ socket_enter_stop_post(s, SOCKET_FAILURE_RESOURCES);
+ else
+ socket_enter_dead(s, SOCKET_FAILURE_RESOURCES);
+}
+
+static void socket_enter_stop_pre(Socket *s, SocketResult f) {
+ int r;
+ assert(s);
+
+ if (s->result == SOCKET_SUCCESS)
+ s->result = f;
+
+ socket_unwatch_control_pid(s);
+ s->control_command_id = SOCKET_EXEC_STOP_PRE;
+ s->control_command = s->exec_command[SOCKET_EXEC_STOP_PRE];
+
+ if (s->control_command) {
+ r = socket_spawn(s, s->control_command, &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ socket_set_state(s, SOCKET_STOP_PRE);
+ } else
+ socket_enter_stop_post(s, SOCKET_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'stop-pre' task: %m");
+ socket_enter_stop_post(s, SOCKET_FAILURE_RESOURCES);
+}
+
+static void socket_enter_listening(Socket *s) {
+ int r;
+ assert(s);
+
+ if (!s->accept && s->flush_pending) {
+ log_unit_debug(UNIT(s), "Flushing socket before listening.");
+ flush_ports(s);
+ }
+
+ r = socket_watch_fds(s);
+ if (r < 0) {
+ log_unit_warning_errno(UNIT(s), r, "Failed to watch sockets: %m");
+ goto fail;
+ }
+
+ socket_set_state(s, SOCKET_LISTENING);
+ return;
+
+fail:
+ socket_enter_stop_pre(s, SOCKET_FAILURE_RESOURCES);
+}
+
+static void socket_enter_start_post(Socket *s) {
+ int r;
+ assert(s);
+
+ socket_unwatch_control_pid(s);
+ s->control_command_id = SOCKET_EXEC_START_POST;
+ s->control_command = s->exec_command[SOCKET_EXEC_START_POST];
+
+ if (s->control_command) {
+ r = socket_spawn(s, s->control_command, &s->control_pid);
+ if (r < 0) {
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'start-post' task: %m");
+ goto fail;
+ }
+
+ socket_set_state(s, SOCKET_START_POST);
+ } else
+ socket_enter_listening(s);
+
+ return;
+
+fail:
+ socket_enter_stop_pre(s, SOCKET_FAILURE_RESOURCES);
+}
+
+static void socket_enter_start_chown(Socket *s) {
+ int r;
+
+ assert(s);
+
+ r = socket_open_fds(s);
+ if (r < 0) {
+ log_unit_warning_errno(UNIT(s), r, "Failed to listen on sockets: %m");
+ goto fail;
+ }
+
+ if (!isempty(s->user) || !isempty(s->group)) {
+
+ socket_unwatch_control_pid(s);
+ s->control_command_id = SOCKET_EXEC_START_CHOWN;
+ s->control_command = NULL;
+
+ r = socket_chown(s, &s->control_pid);
+ if (r < 0) {
+ log_unit_warning_errno(UNIT(s), r, "Failed to fork 'start-chown' task: %m");
+ goto fail;
+ }
+
+ socket_set_state(s, SOCKET_START_CHOWN);
+ } else
+ socket_enter_start_post(s);
+
+ return;
+
+fail:
+ socket_enter_stop_pre(s, SOCKET_FAILURE_RESOURCES);
+}
+
+static void socket_enter_start_pre(Socket *s) {
+ int r;
+ assert(s);
+
+ socket_unwatch_control_pid(s);
+
+ unit_warn_leftover_processes(UNIT(s), unit_log_leftover_process_start);
+
+ s->control_command_id = SOCKET_EXEC_START_PRE;
+ s->control_command = s->exec_command[SOCKET_EXEC_START_PRE];
+
+ if (s->control_command) {
+ r = socket_spawn(s, s->control_command, &s->control_pid);
+ if (r < 0) {
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'start-pre' task: %m");
+ goto fail;
+ }
+
+ socket_set_state(s, SOCKET_START_PRE);
+ } else
+ socket_enter_start_chown(s);
+
+ return;
+
+fail:
+ socket_enter_dead(s, SOCKET_FAILURE_RESOURCES);
+}
+
+static void flush_ports(Socket *s) {
+ SocketPort *p;
+
+ /* Flush all incoming traffic, regardless if actual bytes or new connections, so that this socket isn't busy
+ * anymore */
+
+ LIST_FOREACH(port, p, s->ports) {
+ if (p->fd < 0)
+ continue;
+
+ (void) flush_accept(p->fd);
+ (void) flush_fd(p->fd);
+ }
+}
+
+static void socket_enter_running(Socket *s, int cfd_in) {
+ /* Note that this call takes possession of the connection fd passed. It either has to assign it
+ * somewhere or close it. */
+ _cleanup_close_ int cfd = cfd_in;
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(s);
+
+ /* We don't take connections anymore if we are supposed to shut down anyway */
+ if (unit_stop_pending(UNIT(s))) {
+
+ log_unit_debug(UNIT(s), "Suppressing connection request since unit stop is scheduled.");
+
+ if (cfd >= 0)
+ goto refuse;
+
+ flush_ports(s);
+ return;
+ }
+
+ if (!ratelimit_below(&s->trigger_limit)) {
+ log_unit_warning(UNIT(s), "Trigger limit hit, refusing further activation.");
+ socket_enter_stop_pre(s, SOCKET_FAILURE_TRIGGER_LIMIT_HIT);
+ goto refuse;
+ }
+
+ if (cfd < 0) {
+ bool pending = false;
+ Unit *other;
+ void *v;
+
+ /* If there's already a start pending don't bother to
+ * do anything */
+ HASHMAP_FOREACH_KEY(v, other, UNIT(s)->dependencies[UNIT_TRIGGERS])
+ if (unit_active_or_pending(other)) {
+ pending = true;
+ break;
+ }
+
+ if (!pending) {
+ if (!UNIT_ISSET(s->service)) {
+ r = log_unit_error_errno(UNIT(s), SYNTHETIC_ERRNO(ENOENT),
+ "Service to activate vanished, refusing activation.");
+ goto fail;
+ }
+
+ r = manager_add_job(UNIT(s)->manager, JOB_START, UNIT_DEREF(s->service), JOB_REPLACE, NULL, &error, NULL);
+ if (r < 0)
+ goto fail;
+ }
+
+ socket_set_state(s, SOCKET_RUNNING);
+ } else {
+ _cleanup_(socket_peer_unrefp) SocketPeer *p = NULL;
+ Unit *service;
+
+ if (s->n_connections >= s->max_connections) {
+ log_unit_warning(UNIT(s), "Too many incoming connections (%u), dropping connection.",
+ s->n_connections);
+ goto refuse;
+ }
+
+ if (s->max_connections_per_source > 0) {
+ r = socket_acquire_peer(s, cfd, &p);
+ if (ERRNO_IS_DISCONNECT(r))
+ return;
+ if (r < 0) /* We didn't have enough resources to acquire peer information, let's fail. */
+ goto fail;
+ if (r > 0 && p->n_ref > s->max_connections_per_source) {
+ _cleanup_free_ char *t = NULL;
+
+ (void) sockaddr_pretty(&p->peer.sa, p->peer_salen, true, false, &t);
+
+ log_unit_warning(UNIT(s),
+ "Too many incoming connections (%u) from source %s, dropping connection.",
+ p->n_ref, strnull(t));
+ goto refuse;
+ }
+ }
+
+ r = socket_load_service_unit(s, cfd, &service);
+ if (ERRNO_IS_DISCONNECT(r))
+ return;
+ if (r < 0)
+ goto fail;
+
+ r = unit_add_two_dependencies(UNIT(s), UNIT_BEFORE, UNIT_TRIGGERS, service,
+ false, UNIT_DEPENDENCY_IMPLICIT);
+ if (r < 0)
+ goto fail;
+
+ s->n_accepted++;
+
+ r = service_set_socket_fd(SERVICE(service), cfd, s, s->selinux_context_from_net);
+ if (ERRNO_IS_DISCONNECT(r))
+ return;
+ if (r < 0)
+ goto fail;
+
+ TAKE_FD(cfd); /* We passed ownership of the fd to the service now. Forget it here. */
+ s->n_connections++;
+
+ SERVICE(service)->peer = TAKE_PTR(p); /* Pass ownership of the peer reference */
+
+ r = manager_add_job(UNIT(s)->manager, JOB_START, service, JOB_REPLACE, NULL, &error, NULL);
+ if (r < 0) {
+ /* We failed to activate the new service, but it still exists. Let's make sure the
+ * service closes and forgets the connection fd again, immediately. */
+ service_close_socket_fd(SERVICE(service));
+ goto fail;
+ }
+
+ /* Notify clients about changed counters */
+ unit_add_to_dbus_queue(UNIT(s));
+ }
+
+ TAKE_FD(cfd);
+ return;
+
+refuse:
+ s->n_refused++;
+ return;
+
+fail:
+ if (ERRNO_IS_RESOURCE(r))
+ log_unit_warning(UNIT(s), "Failed to queue service startup job: %s",
+ bus_error_message(&error, r));
+ else
+ log_unit_warning(UNIT(s), "Failed to queue service startup job (Maybe the service file is missing or not a %s unit?): %s",
+ cfd >= 0 ? "template" : "non-template",
+ bus_error_message(&error, r));
+
+ socket_enter_stop_pre(s, SOCKET_FAILURE_RESOURCES);
+}
+
+static void socket_run_next(Socket *s) {
+ int r;
+
+ assert(s);
+ assert(s->control_command);
+ assert(s->control_command->command_next);
+
+ socket_unwatch_control_pid(s);
+
+ s->control_command = s->control_command->command_next;
+
+ r = socket_spawn(s, s->control_command, &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run next task: %m");
+
+ if (s->state == SOCKET_START_POST)
+ socket_enter_stop_pre(s, SOCKET_FAILURE_RESOURCES);
+ else if (s->state == SOCKET_STOP_POST)
+ socket_enter_dead(s, SOCKET_FAILURE_RESOURCES);
+ else
+ socket_enter_signal(s, SOCKET_FINAL_SIGTERM, SOCKET_FAILURE_RESOURCES);
+}
+
+static int socket_start(Unit *u) {
+ Socket *s = SOCKET(u);
+ int r;
+
+ assert(s);
+
+ /* We cannot fulfill this request right now, try again later
+ * please! */
+ if (IN_SET(s->state,
+ SOCKET_STOP_PRE,
+ SOCKET_STOP_PRE_SIGKILL,
+ SOCKET_STOP_PRE_SIGTERM,
+ SOCKET_STOP_POST,
+ SOCKET_FINAL_SIGTERM,
+ SOCKET_FINAL_SIGKILL,
+ SOCKET_CLEANING))
+ return -EAGAIN;
+
+ /* Already on it! */
+ if (IN_SET(s->state,
+ SOCKET_START_PRE,
+ SOCKET_START_CHOWN,
+ SOCKET_START_POST))
+ return 0;
+
+ /* Cannot run this without the service being around */
+ if (UNIT_ISSET(s->service)) {
+ Service *service;
+
+ service = SERVICE(UNIT_DEREF(s->service));
+
+ if (UNIT(service)->load_state != UNIT_LOADED) {
+ log_unit_error(u, "Socket service %s not loaded, refusing.", UNIT(service)->id);
+ return -ENOENT;
+ }
+
+ /* If the service is already active we cannot start the
+ * socket */
+ if (!IN_SET(service->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART)) {
+ log_unit_error(u, "Socket service %s already active, refusing.", UNIT(service)->id);
+ return -EBUSY;
+ }
+ }
+
+ assert(IN_SET(s->state, SOCKET_DEAD, SOCKET_FAILED));
+
+ r = unit_test_start_limit(u);
+ if (r < 0) {
+ socket_enter_dead(s, SOCKET_FAILURE_START_LIMIT_HIT);
+ return r;
+ }
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ s->result = SOCKET_SUCCESS;
+ exec_command_reset_status_list_array(s->exec_command, _SOCKET_EXEC_COMMAND_MAX);
+
+ u->reset_accounting = true;
+
+ socket_enter_start_pre(s);
+ return 1;
+}
+
+static int socket_stop(Unit *u) {
+ Socket *s = SOCKET(u);
+
+ assert(s);
+
+ /* Already on it */
+ if (IN_SET(s->state,
+ SOCKET_STOP_PRE,
+ SOCKET_STOP_PRE_SIGTERM,
+ SOCKET_STOP_PRE_SIGKILL,
+ SOCKET_STOP_POST,
+ SOCKET_FINAL_SIGTERM,
+ SOCKET_FINAL_SIGKILL))
+ return 0;
+
+ /* If there's already something running we go directly into
+ * kill mode. */
+ if (IN_SET(s->state,
+ SOCKET_START_PRE,
+ SOCKET_START_CHOWN,
+ SOCKET_START_POST)) {
+ socket_enter_signal(s, SOCKET_STOP_PRE_SIGTERM, SOCKET_SUCCESS);
+ return -EAGAIN;
+ }
+
+ /* If we are currently cleaning, then abort it, brutally. */
+ if (s->state == SOCKET_CLEANING) {
+ socket_enter_signal(s, SOCKET_FINAL_SIGKILL, SOCKET_SUCCESS);
+ return 0;
+ }
+
+ assert(IN_SET(s->state, SOCKET_LISTENING, SOCKET_RUNNING));
+
+ socket_enter_stop_pre(s, SOCKET_SUCCESS);
+ return 1;
+}
+
+static int socket_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Socket *s = SOCKET(u);
+ SocketPort *p;
+ int r;
+
+ assert(u);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", socket_state_to_string(s->state));
+ (void) serialize_item(f, "result", socket_result_to_string(s->result));
+ (void) serialize_item_format(f, "n-accepted", "%u", s->n_accepted);
+ (void) serialize_item_format(f, "n-refused", "%u", s->n_refused);
+
+ if (s->control_pid > 0)
+ (void) serialize_item_format(f, "control-pid", PID_FMT, s->control_pid);
+
+ if (s->control_command_id >= 0)
+ (void) serialize_item(f, "control-command", socket_exec_command_to_string(s->control_command_id));
+
+ LIST_FOREACH(port, p, s->ports) {
+ int copy;
+
+ if (p->fd < 0)
+ continue;
+
+ copy = fdset_put_dup(fds, p->fd);
+ if (copy < 0)
+ return log_unit_warning_errno(u, copy, "Failed to serialize socket fd: %m");
+
+ if (p->type == SOCKET_SOCKET) {
+ _cleanup_free_ char *t = NULL;
+
+ r = socket_address_print(&p->address, &t);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to format socket address: %m");
+
+ if (socket_address_family(&p->address) == AF_NETLINK)
+ (void) serialize_item_format(f, "netlink", "%i %s", copy, t);
+ else
+ (void) serialize_item_format(f, "socket", "%i %i %s", copy, p->address.type, t);
+ } else if (p->type == SOCKET_SPECIAL)
+ (void) serialize_item_format(f, "special", "%i %s", copy, p->path);
+ else if (p->type == SOCKET_MQUEUE)
+ (void) serialize_item_format(f, "mqueue", "%i %s", copy, p->path);
+ else if (p->type == SOCKET_USB_FUNCTION)
+ (void) serialize_item_format(f, "ffs", "%i %s", copy, p->path);
+ else {
+ assert(p->type == SOCKET_FIFO);
+ (void) serialize_item_format(f, "fifo", "%i %s", copy, p->path);
+ }
+ }
+
+ return 0;
+}
+
+static void socket_port_take_fd(SocketPort *p, FDSet *fds, int fd) {
+ assert(p);
+
+ safe_close(p->fd);
+ p->fd = fdset_remove(fds, fd);
+}
+
+static int socket_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Socket *s = SOCKET(u);
+
+ assert(u);
+ assert(key);
+ assert(value);
+
+ if (streq(key, "state")) {
+ SocketState state;
+
+ state = socket_state_from_string(value);
+ if (state < 0)
+ log_unit_debug(u, "Failed to parse state value: %s", value);
+ else
+ s->deserialized_state = state;
+ } else if (streq(key, "result")) {
+ SocketResult f;
+
+ f = socket_result_from_string(value);
+ if (f < 0)
+ log_unit_debug(u, "Failed to parse result value: %s", value);
+ else if (f != SOCKET_SUCCESS)
+ s->result = f;
+
+ } else if (streq(key, "n-accepted")) {
+ unsigned k;
+
+ if (safe_atou(value, &k) < 0)
+ log_unit_debug(u, "Failed to parse n-accepted value: %s", value);
+ else
+ s->n_accepted += k;
+ } else if (streq(key, "n-refused")) {
+ unsigned k;
+
+ if (safe_atou(value, &k) < 0)
+ log_unit_debug(u, "Failed to parse n-refused value: %s", value);
+ else
+ s->n_refused += k;
+ } else if (streq(key, "control-pid")) {
+ pid_t pid;
+
+ if (parse_pid(value, &pid) < 0)
+ log_unit_debug(u, "Failed to parse control-pid value: %s", value);
+ else
+ s->control_pid = pid;
+ } else if (streq(key, "control-command")) {
+ SocketExecCommand id;
+
+ id = socket_exec_command_from_string(value);
+ if (id < 0)
+ log_unit_debug(u, "Failed to parse exec-command value: %s", value);
+ else {
+ s->control_command_id = id;
+ s->control_command = s->exec_command[id];
+ }
+ } else if (streq(key, "fifo")) {
+ int fd, skip = 0;
+ SocketPort *p;
+
+ if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse fifo value: %s", value);
+ else
+ LIST_FOREACH(port, p, s->ports)
+ if (p->type == SOCKET_FIFO &&
+ path_equal_or_files_same(p->path, value+skip, 0)) {
+ socket_port_take_fd(p, fds, fd);
+ break;
+ }
+
+ } else if (streq(key, "special")) {
+ int fd, skip = 0;
+ SocketPort *p;
+
+ if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse special value: %s", value);
+ else
+ LIST_FOREACH(port, p, s->ports)
+ if (p->type == SOCKET_SPECIAL &&
+ path_equal_or_files_same(p->path, value+skip, 0)) {
+ socket_port_take_fd(p, fds, fd);
+ break;
+ }
+
+ } else if (streq(key, "mqueue")) {
+ int fd, skip = 0;
+ SocketPort *p;
+
+ if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse mqueue value: %s", value);
+ else
+ LIST_FOREACH(port, p, s->ports)
+ if (p->type == SOCKET_MQUEUE &&
+ streq(p->path, value+skip)) {
+ socket_port_take_fd(p, fds, fd);
+ break;
+ }
+
+ } else if (streq(key, "socket")) {
+ int fd, type, skip = 0;
+ SocketPort *p;
+
+ if (sscanf(value, "%i %i %n", &fd, &type, &skip) < 2 || fd < 0 || type < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse socket value: %s", value);
+ else
+ LIST_FOREACH(port, p, s->ports)
+ if (socket_address_is(&p->address, value+skip, type)) {
+ socket_port_take_fd(p, fds, fd);
+ break;
+ }
+
+ } else if (streq(key, "netlink")) {
+ int fd, skip = 0;
+ SocketPort *p;
+
+ if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse socket value: %s", value);
+ else
+ LIST_FOREACH(port, p, s->ports)
+ if (socket_address_is_netlink(&p->address, value+skip)) {
+ socket_port_take_fd(p, fds, fd);
+ break;
+ }
+
+ } else if (streq(key, "ffs")) {
+ int fd, skip = 0;
+ SocketPort *p;
+
+ if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
+ log_unit_debug(u, "Failed to parse ffs value: %s", value);
+ else
+ LIST_FOREACH(port, p, s->ports)
+ if (p->type == SOCKET_USB_FUNCTION &&
+ path_equal_or_files_same(p->path, value+skip, 0)) {
+ socket_port_take_fd(p, fds, fd);
+ break;
+ }
+
+ } else
+ log_unit_debug(UNIT(s), "Unknown serialization key: %s", key);
+
+ return 0;
+}
+
+static void socket_distribute_fds(Unit *u, FDSet *fds) {
+ Socket *s = SOCKET(u);
+ SocketPort *p;
+
+ assert(u);
+
+ LIST_FOREACH(port, p, s->ports) {
+ int fd;
+
+ if (p->type != SOCKET_SOCKET)
+ continue;
+
+ if (p->fd >= 0)
+ continue;
+
+ FDSET_FOREACH(fd, fds) {
+ if (socket_address_matches_fd(&p->address, fd)) {
+ p->fd = fdset_remove(fds, fd);
+ s->deserialized_state = SOCKET_LISTENING;
+ break;
+ }
+ }
+ }
+}
+
+_pure_ static UnitActiveState socket_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[SOCKET(u)->state];
+}
+
+_pure_ static const char *socket_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return socket_state_to_string(SOCKET(u)->state);
+}
+
+const char* socket_port_type_to_string(SocketPort *p) {
+
+ assert(p);
+
+ switch (p->type) {
+
+ case SOCKET_SOCKET:
+
+ switch (p->address.type) {
+
+ case SOCK_STREAM:
+ return "Stream";
+
+ case SOCK_DGRAM:
+ return "Datagram";
+
+ case SOCK_SEQPACKET:
+ return "SequentialPacket";
+
+ case SOCK_RAW:
+ if (socket_address_family(&p->address) == AF_NETLINK)
+ return "Netlink";
+
+ _fallthrough_;
+ default:
+ return NULL;
+ }
+
+ case SOCKET_SPECIAL:
+ return "Special";
+
+ case SOCKET_MQUEUE:
+ return "MessageQueue";
+
+ case SOCKET_FIFO:
+ return "FIFO";
+
+ case SOCKET_USB_FUNCTION:
+ return "USBFunction";
+
+ default:
+ return NULL;
+ }
+}
+
+SocketType socket_port_type_from_string(const char *s) {
+ assert(s);
+
+ if (STR_IN_SET(s, "Stream", "Datagram", "SequentialPacket", "Netlink"))
+ return SOCKET_SOCKET;
+ else if (streq(s, "Special"))
+ return SOCKET_SPECIAL;
+ else if (streq(s, "MessageQueue"))
+ return SOCKET_MQUEUE;
+ else if (streq(s, "FIFO"))
+ return SOCKET_FIFO;
+ else if (streq(s, "USBFunction"))
+ return SOCKET_USB_FUNCTION;
+ else
+ return _SOCKET_TYPE_INVALID;
+}
+
+_pure_ static bool socket_may_gc(Unit *u) {
+ Socket *s = SOCKET(u);
+
+ assert(u);
+
+ return s->n_connections == 0;
+}
+
+static int socket_accept_do(Socket *s, int fd) {
+ int cfd;
+
+ assert(s);
+ assert(fd >= 0);
+
+ cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (cfd < 0)
+ /* Convert transient network errors into clean and well-defined EAGAIN */
+ return ERRNO_IS_ACCEPT_AGAIN(errno) ? -EAGAIN : -errno;
+
+ return cfd;
+}
+
+static int socket_accept_in_cgroup(Socket *s, SocketPort *p, int fd) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ int cfd, r;
+ pid_t pid;
+
+ assert(s);
+ assert(p);
+ assert(fd >= 0);
+
+ /* Similar to socket_address_listen_in_cgroup(), but for accept() rather than socket(): make sure that any
+ * connection socket is also properly associated with the cgroup. */
+
+ if (!IN_SET(p->address.sockaddr.sa.sa_family, AF_INET, AF_INET6))
+ goto shortcut;
+
+ r = bpf_firewall_supported();
+ if (r < 0)
+ return r;
+ if (r == BPF_FIREWALL_UNSUPPORTED)
+ goto shortcut;
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
+ return log_unit_error_errno(UNIT(s), errno, "Failed to create communication channel: %m");
+
+ r = unit_fork_helper_process(UNIT(s), "(sd-accept)", &pid);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to fork off accept stub process: %m");
+ if (r == 0) {
+ /* Child */
+
+ pair[0] = safe_close(pair[0]);
+
+ cfd = socket_accept_do(s, fd);
+ if (cfd == -EAGAIN) /* spurious accept() */
+ _exit(EXIT_SUCCESS);
+ if (cfd < 0) {
+ log_unit_error_errno(UNIT(s), cfd, "Failed to accept connection socket: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ r = send_one_fd(pair[1], cfd, 0);
+ if (r < 0) {
+ log_unit_error_errno(UNIT(s), r, "Failed to send connection socket to parent: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+ cfd = receive_one_fd(pair[0], 0);
+
+ /* We synchronously wait for the helper, as it shouldn't be slow */
+ r = wait_for_terminate_and_check("(sd-accept)", pid, WAIT_LOG_ABNORMAL);
+ if (r < 0) {
+ safe_close(cfd);
+ return r;
+ }
+
+ /* If we received no fd, we got EIO here. If this happens with a process exit code of EXIT_SUCCESS
+ * this is a spurious accept(), let's convert that back to EAGAIN here. */
+ if (cfd == -EIO)
+ return -EAGAIN;
+ if (cfd < 0)
+ return log_unit_error_errno(UNIT(s), cfd, "Failed to receive connection socket: %m");
+
+ return cfd;
+
+shortcut:
+ cfd = socket_accept_do(s, fd);
+ if (cfd == -EAGAIN) /* spurious accept(), skip it silently */
+ return -EAGAIN;
+ if (cfd < 0)
+ return log_unit_error_errno(UNIT(s), cfd, "Failed to accept connection socket: %m");
+
+ return cfd;
+}
+
+static int socket_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ SocketPort *p = userdata;
+ int cfd = -1;
+
+ assert(p);
+ assert(fd >= 0);
+
+ if (p->socket->state != SOCKET_LISTENING)
+ return 0;
+
+ log_unit_debug(UNIT(p->socket), "Incoming traffic");
+
+ if (revents != EPOLLIN) {
+ if (revents & EPOLLHUP)
+ log_unit_error(UNIT(p->socket), "Got POLLHUP on a listening socket. The service probably invoked shutdown() on it, and should better not do that.");
+ else
+ log_unit_error(UNIT(p->socket), "Got unexpected poll event (0x%x) on socket.", revents);
+ goto fail;
+ }
+
+ if (p->socket->accept &&
+ p->type == SOCKET_SOCKET &&
+ socket_address_can_accept(&p->address)) {
+
+ cfd = socket_accept_in_cgroup(p->socket, p, fd);
+ if (cfd == -EAGAIN) /* Spurious accept() */
+ return 0;
+ if (cfd < 0)
+ goto fail;
+
+ socket_apply_socket_options(p->socket, p, cfd);
+ }
+
+ socket_enter_running(p->socket, cfd);
+ return 0;
+
+fail:
+ socket_enter_stop_pre(p->socket, SOCKET_FAILURE_RESOURCES);
+ return 0;
+}
+
+static void socket_sigchld_event(Unit *u, pid_t pid, int code, int status) {
+ Socket *s = SOCKET(u);
+ SocketResult f;
+
+ assert(s);
+ assert(pid >= 0);
+
+ if (pid != s->control_pid)
+ return;
+
+ s->control_pid = 0;
+
+ if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
+ f = SOCKET_SUCCESS;
+ else if (code == CLD_EXITED)
+ f = SOCKET_FAILURE_EXIT_CODE;
+ else if (code == CLD_KILLED)
+ f = SOCKET_FAILURE_SIGNAL;
+ else if (code == CLD_DUMPED)
+ f = SOCKET_FAILURE_CORE_DUMP;
+ else
+ assert_not_reached("Unknown sigchld code");
+
+ if (s->control_command) {
+ exec_status_exit(&s->control_command->exec_status, &s->exec_context, pid, code, status);
+
+ if (s->control_command->flags & EXEC_COMMAND_IGNORE_FAILURE)
+ f = SOCKET_SUCCESS;
+ }
+
+ unit_log_process_exit(
+ u,
+ "Control process",
+ socket_exec_command_to_string(s->control_command_id),
+ f == SOCKET_SUCCESS,
+ code, status);
+
+ if (s->result == SOCKET_SUCCESS)
+ s->result = f;
+
+ if (s->control_command &&
+ s->control_command->command_next &&
+ f == SOCKET_SUCCESS) {
+
+ log_unit_debug(u, "Running next command for state %s", socket_state_to_string(s->state));
+ socket_run_next(s);
+ } else {
+ s->control_command = NULL;
+ s->control_command_id = _SOCKET_EXEC_COMMAND_INVALID;
+
+ /* No further commands for this step, so let's figure
+ * out what to do next */
+
+ log_unit_debug(u, "Got final SIGCHLD for state %s", socket_state_to_string(s->state));
+
+ switch (s->state) {
+
+ case SOCKET_START_PRE:
+ if (f == SOCKET_SUCCESS)
+ socket_enter_start_chown(s);
+ else
+ socket_enter_signal(s, SOCKET_FINAL_SIGTERM, f);
+ break;
+
+ case SOCKET_START_CHOWN:
+ if (f == SOCKET_SUCCESS)
+ socket_enter_start_post(s);
+ else
+ socket_enter_stop_pre(s, f);
+ break;
+
+ case SOCKET_START_POST:
+ if (f == SOCKET_SUCCESS)
+ socket_enter_listening(s);
+ else
+ socket_enter_stop_pre(s, f);
+ break;
+
+ case SOCKET_STOP_PRE:
+ case SOCKET_STOP_PRE_SIGTERM:
+ case SOCKET_STOP_PRE_SIGKILL:
+ socket_enter_stop_post(s, f);
+ break;
+
+ case SOCKET_STOP_POST:
+ case SOCKET_FINAL_SIGTERM:
+ case SOCKET_FINAL_SIGKILL:
+ socket_enter_dead(s, f);
+ break;
+
+ case SOCKET_CLEANING:
+
+ if (s->clean_result == SOCKET_SUCCESS)
+ s->clean_result = f;
+
+ socket_enter_dead(s, SOCKET_SUCCESS);
+ break;
+
+ default:
+ assert_not_reached("Uh, control process died at wrong time.");
+ }
+ }
+
+ /* Notify clients about changed exit status */
+ unit_add_to_dbus_queue(u);
+}
+
+static int socket_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
+ Socket *s = SOCKET(userdata);
+
+ assert(s);
+ assert(s->timer_event_source == source);
+
+ switch (s->state) {
+
+ case SOCKET_START_PRE:
+ log_unit_warning(UNIT(s), "Starting timed out. Terminating.");
+ socket_enter_signal(s, SOCKET_FINAL_SIGTERM, SOCKET_FAILURE_TIMEOUT);
+ break;
+
+ case SOCKET_START_CHOWN:
+ case SOCKET_START_POST:
+ log_unit_warning(UNIT(s), "Starting timed out. Stopping.");
+ socket_enter_stop_pre(s, SOCKET_FAILURE_TIMEOUT);
+ break;
+
+ case SOCKET_STOP_PRE:
+ log_unit_warning(UNIT(s), "Stopping timed out. Terminating.");
+ socket_enter_signal(s, SOCKET_STOP_PRE_SIGTERM, SOCKET_FAILURE_TIMEOUT);
+ break;
+
+ case SOCKET_STOP_PRE_SIGTERM:
+ if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "Stopping timed out. Killing.");
+ socket_enter_signal(s, SOCKET_STOP_PRE_SIGKILL, SOCKET_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "Stopping timed out. Skipping SIGKILL. Ignoring.");
+ socket_enter_stop_post(s, SOCKET_FAILURE_TIMEOUT);
+ }
+ break;
+
+ case SOCKET_STOP_PRE_SIGKILL:
+ log_unit_warning(UNIT(s), "Processes still around after SIGKILL. Ignoring.");
+ socket_enter_stop_post(s, SOCKET_FAILURE_TIMEOUT);
+ break;
+
+ case SOCKET_STOP_POST:
+ log_unit_warning(UNIT(s), "Stopping timed out (2). Terminating.");
+ socket_enter_signal(s, SOCKET_FINAL_SIGTERM, SOCKET_FAILURE_TIMEOUT);
+ break;
+
+ case SOCKET_FINAL_SIGTERM:
+ if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "Stopping timed out (2). Killing.");
+ socket_enter_signal(s, SOCKET_FINAL_SIGKILL, SOCKET_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "Stopping timed out (2). Skipping SIGKILL. Ignoring.");
+ socket_enter_dead(s, SOCKET_FAILURE_TIMEOUT);
+ }
+ break;
+
+ case SOCKET_FINAL_SIGKILL:
+ log_unit_warning(UNIT(s), "Still around after SIGKILL (2). Entering failed mode.");
+ socket_enter_dead(s, SOCKET_FAILURE_TIMEOUT);
+ break;
+
+ case SOCKET_CLEANING:
+ log_unit_warning(UNIT(s), "Cleaning timed out. killing.");
+
+ if (s->clean_result == SOCKET_SUCCESS)
+ s->clean_result = SOCKET_FAILURE_TIMEOUT;
+
+ socket_enter_signal(s, SOCKET_FINAL_SIGKILL, 0);
+ break;
+
+ default:
+ assert_not_reached("Timeout at wrong time.");
+ }
+
+ return 0;
+}
+
+int socket_collect_fds(Socket *s, int **fds) {
+ size_t k = 0, n = 0;
+ SocketPort *p;
+ int *rfds;
+
+ assert(s);
+ assert(fds);
+
+ /* Called from the service code for requesting our fds */
+
+ LIST_FOREACH(port, p, s->ports) {
+ if (p->fd >= 0)
+ n++;
+ n += p->n_auxiliary_fds;
+ }
+
+ if (n <= 0) {
+ *fds = NULL;
+ return 0;
+ }
+
+ rfds = new(int, n);
+ if (!rfds)
+ return -ENOMEM;
+
+ LIST_FOREACH(port, p, s->ports) {
+ size_t i;
+
+ if (p->fd >= 0)
+ rfds[k++] = p->fd;
+ for (i = 0; i < p->n_auxiliary_fds; ++i)
+ rfds[k++] = p->auxiliary_fds[i];
+ }
+
+ assert(k == n);
+
+ *fds = rfds;
+ return (int) n;
+}
+
+static void socket_reset_failed(Unit *u) {
+ Socket *s = SOCKET(u);
+
+ assert(s);
+
+ if (s->state == SOCKET_FAILED)
+ socket_set_state(s, SOCKET_DEAD);
+
+ s->result = SOCKET_SUCCESS;
+ s->clean_result = SOCKET_SUCCESS;
+}
+
+void socket_connection_unref(Socket *s) {
+ assert(s);
+
+ /* The service is dead. Yay!
+ *
+ * This is strictly for one-instance-per-connection
+ * services. */
+
+ assert(s->n_connections > 0);
+ s->n_connections--;
+
+ log_unit_debug(UNIT(s), "One connection closed, %u left.", s->n_connections);
+}
+
+static void socket_trigger_notify(Unit *u, Unit *other) {
+ Socket *s = SOCKET(u);
+
+ assert(u);
+ assert(other);
+
+ /* Filter out invocations with bogus state */
+ assert(UNIT_IS_LOAD_COMPLETE(other->load_state));
+ assert(other->type == UNIT_SERVICE);
+
+ /* Don't propagate state changes from the service if we are already down */
+ if (!IN_SET(s->state, SOCKET_RUNNING, SOCKET_LISTENING))
+ return;
+
+ /* We don't care for the service state if we are in Accept=yes mode */
+ if (s->accept)
+ return;
+
+ /* Propagate start limit hit state */
+ if (other->start_limit_hit) {
+ socket_enter_stop_pre(s, SOCKET_FAILURE_SERVICE_START_LIMIT_HIT);
+ return;
+ }
+
+ /* Don't propagate anything if there's still a job queued */
+ if (other->job)
+ return;
+
+ if (IN_SET(SERVICE(other)->state,
+ SERVICE_DEAD, SERVICE_FAILED,
+ SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
+ SERVICE_AUTO_RESTART))
+ socket_enter_listening(s);
+
+ if (SERVICE(other)->state == SERVICE_RUNNING)
+ socket_set_state(s, SOCKET_RUNNING);
+}
+
+static int socket_kill(Unit *u, KillWho who, int signo, sd_bus_error *error) {
+ return unit_kill_common(u, who, signo, -1, SOCKET(u)->control_pid, error);
+}
+
+static int socket_get_timeout(Unit *u, usec_t *timeout) {
+ Socket *s = SOCKET(u);
+ usec_t t;
+ int r;
+
+ if (!s->timer_event_source)
+ return 0;
+
+ r = sd_event_source_get_time(s->timer_event_source, &t);
+ if (r < 0)
+ return r;
+ if (t == USEC_INFINITY)
+ return 0;
+
+ *timeout = t;
+ return 1;
+}
+
+char *socket_fdname(Socket *s) {
+ assert(s);
+
+ /* Returns the name to use for $LISTEN_NAMES. If the user
+ * didn't specify anything specifically, use the socket unit's
+ * name as fallback. */
+
+ return s->fdname ?: UNIT(s)->id;
+}
+
+static int socket_control_pid(Unit *u) {
+ Socket *s = SOCKET(u);
+
+ assert(s);
+
+ return s->control_pid;
+}
+
+static int socket_clean(Unit *u, ExecCleanMask mask) {
+ _cleanup_strv_free_ char **l = NULL;
+ Socket *s = SOCKET(u);
+ int r;
+
+ assert(s);
+ assert(mask != 0);
+
+ if (s->state != SOCKET_DEAD)
+ return -EBUSY;
+
+ r = exec_context_get_clean_directories(&s->exec_context, u->manager->prefix, mask, &l);
+ if (r < 0)
+ return r;
+
+ if (strv_isempty(l))
+ return -EUNATCH;
+
+ socket_unwatch_control_pid(s);
+ s->clean_result = SOCKET_SUCCESS;
+ s->control_command = NULL;
+ s->control_command_id = _SOCKET_EXEC_COMMAND_INVALID;
+
+ r = socket_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->exec_context.timeout_clean_usec));
+ if (r < 0)
+ goto fail;
+
+ r = unit_fork_and_watch_rm_rf(u, l, &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ socket_set_state(s, SOCKET_CLEANING);
+
+ return 0;
+
+fail:
+ log_unit_warning_errno(u, r, "Failed to initiate cleaning: %m");
+ s->clean_result = SOCKET_FAILURE_RESOURCES;
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+ return r;
+}
+
+static int socket_can_clean(Unit *u, ExecCleanMask *ret) {
+ Socket *s = SOCKET(u);
+
+ assert(s);
+
+ return exec_context_get_clean_mask(&s->exec_context, ret);
+}
+
+static const char* const socket_exec_command_table[_SOCKET_EXEC_COMMAND_MAX] = {
+ [SOCKET_EXEC_START_PRE] = "ExecStartPre",
+ [SOCKET_EXEC_START_CHOWN] = "ExecStartChown",
+ [SOCKET_EXEC_START_POST] = "ExecStartPost",
+ [SOCKET_EXEC_STOP_PRE] = "ExecStopPre",
+ [SOCKET_EXEC_STOP_POST] = "ExecStopPost"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(socket_exec_command, SocketExecCommand);
+
+static const char* const socket_result_table[_SOCKET_RESULT_MAX] = {
+ [SOCKET_SUCCESS] = "success",
+ [SOCKET_FAILURE_RESOURCES] = "resources",
+ [SOCKET_FAILURE_TIMEOUT] = "timeout",
+ [SOCKET_FAILURE_EXIT_CODE] = "exit-code",
+ [SOCKET_FAILURE_SIGNAL] = "signal",
+ [SOCKET_FAILURE_CORE_DUMP] = "core-dump",
+ [SOCKET_FAILURE_START_LIMIT_HIT] = "start-limit-hit",
+ [SOCKET_FAILURE_TRIGGER_LIMIT_HIT] = "trigger-limit-hit",
+ [SOCKET_FAILURE_SERVICE_START_LIMIT_HIT] = "service-start-limit-hit"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(socket_result, SocketResult);
+
+static const char* const socket_timestamping_table[_SOCKET_TIMESTAMPING_MAX] = {
+ [SOCKET_TIMESTAMPING_OFF] = "off",
+ [SOCKET_TIMESTAMPING_US] = "us",
+ [SOCKET_TIMESTAMPING_NS] = "ns",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(socket_timestamping, SocketTimestamping);
+
+SocketTimestamping socket_timestamping_from_string_harder(const char *p) {
+ SocketTimestamping t;
+ int r;
+
+ if (!p)
+ return _SOCKET_TIMESTAMPING_INVALID;
+
+ t = socket_timestamping_from_string(p);
+ if (t >= 0)
+ return t;
+
+ /* Let's alternatively support the various other aliases parse_time() accepts for ns and µs here,
+ * too. */
+ if (streq(p, "nsec"))
+ return SOCKET_TIMESTAMPING_NS;
+ if (STR_IN_SET(p, "usec", "µs"))
+ return SOCKET_TIMESTAMPING_US;
+
+ r = parse_boolean(p);
+ if (r < 0)
+ return _SOCKET_TIMESTAMPING_INVALID;
+
+ return r ? SOCKET_TIMESTAMPING_NS : SOCKET_TIMESTAMPING_OFF; /* If boolean yes, default to ns accuracy */
+}
+
+const UnitVTable socket_vtable = {
+ .object_size = sizeof(Socket),
+ .exec_context_offset = offsetof(Socket, exec_context),
+ .cgroup_context_offset = offsetof(Socket, cgroup_context),
+ .kill_context_offset = offsetof(Socket, kill_context),
+ .exec_runtime_offset = offsetof(Socket, exec_runtime),
+ .dynamic_creds_offset = offsetof(Socket, dynamic_creds),
+
+ .sections =
+ "Unit\0"
+ "Socket\0"
+ "Install\0",
+ .private_section = "Socket",
+
+ .can_transient = true,
+ .can_trigger = true,
+ .can_fail = true,
+
+ .init = socket_init,
+ .done = socket_done,
+ .load = socket_load,
+
+ .coldplug = socket_coldplug,
+
+ .dump = socket_dump,
+
+ .start = socket_start,
+ .stop = socket_stop,
+
+ .kill = socket_kill,
+ .clean = socket_clean,
+ .can_clean = socket_can_clean,
+
+ .get_timeout = socket_get_timeout,
+
+ .serialize = socket_serialize,
+ .deserialize_item = socket_deserialize_item,
+ .distribute_fds = socket_distribute_fds,
+
+ .active_state = socket_active_state,
+ .sub_state_to_string = socket_sub_state_to_string,
+
+ .will_restart = unit_will_restart_default,
+
+ .may_gc = socket_may_gc,
+
+ .sigchld_event = socket_sigchld_event,
+
+ .trigger_notify = socket_trigger_notify,
+
+ .reset_failed = socket_reset_failed,
+
+ .control_pid = socket_control_pid,
+
+ .bus_set_property = bus_socket_set_property,
+ .bus_commit_properties = bus_socket_commit_properties,
+
+ .status_message_formats = {
+ /*.starting_stopping = {
+ [0] = "Starting socket %s...",
+ [1] = "Stopping socket %s...",
+ },*/
+ .finished_start_job = {
+ [JOB_DONE] = "Listening on %s.",
+ [JOB_FAILED] = "Failed to listen on %s.",
+ [JOB_TIMEOUT] = "Timed out starting %s.",
+ },
+ .finished_stop_job = {
+ [JOB_DONE] = "Closed %s.",
+ [JOB_FAILED] = "Failed stopping %s.",
+ [JOB_TIMEOUT] = "Timed out stopping %s.",
+ },
+ },
+};
diff --git a/src/core/socket.h b/src/core/socket.h
new file mode 100644
index 0000000..ebe85c2
--- /dev/null
+++ b/src/core/socket.h
@@ -0,0 +1,198 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct Socket Socket;
+typedef struct SocketPeer SocketPeer;
+
+#include "mount.h"
+#include "service.h"
+#include "socket-util.h"
+#include "unit.h"
+
+typedef enum SocketExecCommand {
+ SOCKET_EXEC_START_PRE,
+ SOCKET_EXEC_START_CHOWN,
+ SOCKET_EXEC_START_POST,
+ SOCKET_EXEC_STOP_PRE,
+ SOCKET_EXEC_STOP_POST,
+ _SOCKET_EXEC_COMMAND_MAX,
+ _SOCKET_EXEC_COMMAND_INVALID = -1
+} SocketExecCommand;
+
+typedef enum SocketType {
+ SOCKET_SOCKET,
+ SOCKET_FIFO,
+ SOCKET_SPECIAL,
+ SOCKET_MQUEUE,
+ SOCKET_USB_FUNCTION,
+ _SOCKET_TYPE_MAX,
+ _SOCKET_TYPE_INVALID = -1
+} SocketType;
+
+typedef enum SocketResult {
+ SOCKET_SUCCESS,
+ SOCKET_FAILURE_RESOURCES,
+ SOCKET_FAILURE_TIMEOUT,
+ SOCKET_FAILURE_EXIT_CODE,
+ SOCKET_FAILURE_SIGNAL,
+ SOCKET_FAILURE_CORE_DUMP,
+ SOCKET_FAILURE_START_LIMIT_HIT,
+ SOCKET_FAILURE_TRIGGER_LIMIT_HIT,
+ SOCKET_FAILURE_SERVICE_START_LIMIT_HIT,
+ _SOCKET_RESULT_MAX,
+ _SOCKET_RESULT_INVALID = -1
+} SocketResult;
+
+typedef struct SocketPort {
+ Socket *socket;
+
+ SocketType type;
+ int fd;
+ int *auxiliary_fds;
+ size_t n_auxiliary_fds;
+
+ SocketAddress address;
+ char *path;
+ sd_event_source *event_source;
+
+ LIST_FIELDS(struct SocketPort, port);
+} SocketPort;
+
+typedef enum SocketTimestamping {
+ SOCKET_TIMESTAMPING_OFF,
+ SOCKET_TIMESTAMPING_US, /* SO_TIMESTAMP */
+ SOCKET_TIMESTAMPING_NS, /* SO_TIMESTAMPNS */
+ _SOCKET_TIMESTAMPING_MAX,
+ _SOCKET_TIMESTAMPING_INVALID = -1,
+} SocketTimestamping;
+
+struct Socket {
+ Unit meta;
+
+ LIST_HEAD(SocketPort, ports);
+
+ Set *peers_by_address;
+
+ unsigned n_accepted;
+ unsigned n_connections;
+ unsigned n_refused;
+ unsigned max_connections;
+ unsigned max_connections_per_source;
+
+ unsigned backlog;
+ unsigned keep_alive_cnt;
+ usec_t timeout_usec;
+ usec_t keep_alive_time;
+ usec_t keep_alive_interval;
+ usec_t defer_accept;
+
+ ExecCommand* exec_command[_SOCKET_EXEC_COMMAND_MAX];
+ ExecContext exec_context;
+ KillContext kill_context;
+ CGroupContext cgroup_context;
+
+ ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
+
+ /* For Accept=no sockets refers to the one service we'll
+ * activate. For Accept=yes sockets is either NULL, or filled
+ * to refer to the next service we spawn. */
+ UnitRef service;
+
+ SocketState state, deserialized_state;
+
+ sd_event_source *timer_event_source;
+
+ ExecCommand* control_command;
+ SocketExecCommand control_command_id;
+ pid_t control_pid;
+
+ mode_t directory_mode;
+ mode_t socket_mode;
+
+ SocketResult result;
+ SocketResult clean_result;
+
+ char **symlinks;
+
+ bool accept;
+ bool remove_on_stop;
+ bool writable;
+ bool flush_pending;
+
+ int socket_protocol;
+
+ /* Socket options */
+ bool keep_alive;
+ bool no_delay;
+ bool free_bind;
+ bool transparent;
+ bool broadcast;
+ bool pass_cred;
+ bool pass_sec;
+ bool pass_pktinfo;
+ SocketTimestamping timestamping;
+
+ /* Only for INET6 sockets: issue IPV6_V6ONLY sockopt */
+ SocketAddressBindIPv6Only bind_ipv6_only;
+
+ int priority;
+ int mark;
+ size_t receive_buffer;
+ size_t send_buffer;
+ int ip_tos;
+ int ip_ttl;
+ size_t pipe_size;
+ char *bind_to_device;
+ char *tcp_congestion;
+ bool reuse_port;
+ long mq_maxmsg;
+ long mq_msgsize;
+
+ char *smack;
+ char *smack_ip_in;
+ char *smack_ip_out;
+
+ bool selinux_context_from_net;
+
+ char *user, *group;
+
+ char *fdname;
+
+ RateLimit trigger_limit;
+};
+
+SocketPeer *socket_peer_ref(SocketPeer *p);
+SocketPeer *socket_peer_unref(SocketPeer *p);
+int socket_acquire_peer(Socket *s, int fd, SocketPeer **p);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(SocketPeer*, socket_peer_unref);
+
+/* Called from the service code when collecting fds */
+int socket_collect_fds(Socket *s, int **fds);
+
+/* Called from the service code when a per-connection service ended */
+void socket_connection_unref(Socket *s);
+
+void socket_free_ports(Socket *s);
+
+int socket_load_service_unit(Socket *s, int cfd, Unit **ret);
+
+char *socket_fdname(Socket *s);
+
+extern const UnitVTable socket_vtable;
+
+const char* socket_exec_command_to_string(SocketExecCommand i) _const_;
+SocketExecCommand socket_exec_command_from_string(const char *s) _pure_;
+
+const char* socket_result_to_string(SocketResult i) _const_;
+SocketResult socket_result_from_string(const char *s) _pure_;
+
+const char* socket_port_type_to_string(SocketPort *p) _pure_;
+SocketType socket_port_type_from_string(const char *p) _pure_;
+
+const char* socket_timestamping_to_string(SocketTimestamping p) _const_;
+SocketTimestamping socket_timestamping_from_string(const char *p) _pure_;
+SocketTimestamping socket_timestamping_from_string_harder(const char *p) _pure_;
+
+DEFINE_CAST(SOCKET, Socket);
diff --git a/src/core/swap.c b/src/core/swap.c
new file mode 100644
index 0000000..76e491a
--- /dev/null
+++ b/src/core/swap.c
@@ -0,0 +1,1694 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/epoll.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "dbus-swap.h"
+#include "dbus-unit.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "device.h"
+#include "escape.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fstab-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "serialize.h"
+#include "special.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "swap.h"
+#include "unit-name.h"
+#include "unit.h"
+#include "virt.h"
+
+static const UnitActiveState state_translation_table[_SWAP_STATE_MAX] = {
+ [SWAP_DEAD] = UNIT_INACTIVE,
+ [SWAP_ACTIVATING] = UNIT_ACTIVATING,
+ [SWAP_ACTIVATING_DONE] = UNIT_ACTIVE,
+ [SWAP_ACTIVE] = UNIT_ACTIVE,
+ [SWAP_DEACTIVATING] = UNIT_DEACTIVATING,
+ [SWAP_DEACTIVATING_SIGTERM] = UNIT_DEACTIVATING,
+ [SWAP_DEACTIVATING_SIGKILL] = UNIT_DEACTIVATING,
+ [SWAP_FAILED] = UNIT_FAILED,
+ [SWAP_CLEANING] = UNIT_MAINTENANCE,
+};
+
+static int swap_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
+static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int swap_process_proc_swaps(Manager *m);
+
+static bool SWAP_STATE_WITH_PROCESS(SwapState state) {
+ return IN_SET(state,
+ SWAP_ACTIVATING,
+ SWAP_ACTIVATING_DONE,
+ SWAP_DEACTIVATING,
+ SWAP_DEACTIVATING_SIGTERM,
+ SWAP_DEACTIVATING_SIGKILL,
+ SWAP_CLEANING);
+}
+
+_pure_ static UnitActiveState swap_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[SWAP(u)->state];
+}
+
+_pure_ static const char *swap_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return swap_state_to_string(SWAP(u)->state);
+}
+
+_pure_ static bool swap_may_gc(Unit *u) {
+ Swap *s = SWAP(u);
+
+ assert(s);
+
+ if (s->from_proc_swaps)
+ return false;
+
+ return true;
+}
+
+_pure_ static bool swap_is_extrinsic(Unit *u) {
+ assert(SWAP(u));
+
+ return MANAGER_IS_USER(u->manager);
+}
+
+static void swap_unset_proc_swaps(Swap *s) {
+ assert(s);
+
+ if (!s->from_proc_swaps)
+ return;
+
+ s->parameters_proc_swaps.what = mfree(s->parameters_proc_swaps.what);
+ s->from_proc_swaps = false;
+}
+
+static int swap_set_devnode(Swap *s, const char *devnode) {
+ Hashmap *swaps;
+ Swap *first;
+ int r;
+
+ assert(s);
+
+ r = hashmap_ensure_allocated(&UNIT(s)->manager->swaps_by_devnode, &path_hash_ops);
+ if (r < 0)
+ return r;
+
+ swaps = UNIT(s)->manager->swaps_by_devnode;
+
+ if (s->devnode) {
+ first = hashmap_get(swaps, s->devnode);
+
+ LIST_REMOVE(same_devnode, first, s);
+ if (first)
+ hashmap_replace(swaps, first->devnode, first);
+ else
+ hashmap_remove(swaps, s->devnode);
+
+ s->devnode = mfree(s->devnode);
+ }
+
+ if (devnode) {
+ s->devnode = strdup(devnode);
+ if (!s->devnode)
+ return -ENOMEM;
+
+ first = hashmap_get(swaps, s->devnode);
+ LIST_PREPEND(same_devnode, first, s);
+
+ return hashmap_replace(swaps, first->devnode, first);
+ }
+
+ return 0;
+}
+
+static void swap_init(Unit *u) {
+ Swap *s = SWAP(u);
+
+ assert(s);
+ assert(UNIT(s)->load_state == UNIT_STUB);
+
+ s->timeout_usec = u->manager->default_timeout_start_usec;
+
+ s->exec_context.std_output = u->manager->default_std_output;
+ s->exec_context.std_error = u->manager->default_std_error;
+
+ s->control_command_id = _SWAP_EXEC_COMMAND_INVALID;
+
+ u->ignore_on_isolate = true;
+}
+
+static void swap_unwatch_control_pid(Swap *s) {
+ assert(s);
+
+ if (s->control_pid <= 0)
+ return;
+
+ unit_unwatch_pid(UNIT(s), s->control_pid);
+ s->control_pid = 0;
+}
+
+static void swap_done(Unit *u) {
+ Swap *s = SWAP(u);
+
+ assert(s);
+
+ swap_unset_proc_swaps(s);
+ swap_set_devnode(s, NULL);
+
+ s->what = mfree(s->what);
+ s->parameters_fragment.what = mfree(s->parameters_fragment.what);
+ s->parameters_fragment.options = mfree(s->parameters_fragment.options);
+
+ s->exec_runtime = exec_runtime_unref(s->exec_runtime, false);
+ exec_command_done_array(s->exec_command, _SWAP_EXEC_COMMAND_MAX);
+ s->control_command = NULL;
+
+ dynamic_creds_unref(&s->dynamic_creds);
+
+ swap_unwatch_control_pid(s);
+
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+}
+
+static int swap_arm_timer(Swap *s, usec_t usec) {
+ int r;
+
+ assert(s);
+
+ if (s->timer_event_source) {
+ r = sd_event_source_set_time(s->timer_event_source, usec);
+ if (r < 0)
+ return r;
+
+ return sd_event_source_set_enabled(s->timer_event_source, SD_EVENT_ONESHOT);
+ }
+
+ if (usec == USEC_INFINITY)
+ return 0;
+
+ r = sd_event_add_time(
+ UNIT(s)->manager->event,
+ &s->timer_event_source,
+ CLOCK_MONOTONIC,
+ usec, 0,
+ swap_dispatch_timer, s);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(s->timer_event_source, "swap-timer");
+
+ return 0;
+}
+
+static SwapParameters* swap_get_parameters(Swap *s) {
+ assert(s);
+
+ if (s->from_proc_swaps)
+ return &s->parameters_proc_swaps;
+
+ if (s->from_fragment)
+ return &s->parameters_fragment;
+
+ return NULL;
+}
+
+static int swap_add_device_dependencies(Swap *s) {
+ UnitDependencyMask mask;
+ SwapParameters *p;
+ int r;
+
+ assert(s);
+
+ if (!s->what)
+ return 0;
+
+ p = swap_get_parameters(s);
+ if (!p || !p->what)
+ return 0;
+
+ mask = s->from_proc_swaps ? UNIT_DEPENDENCY_PROC_SWAP : UNIT_DEPENDENCY_FILE;
+
+ if (is_device_path(p->what)) {
+ r = unit_add_node_dependency(UNIT(s), p->what, UNIT_REQUIRES, mask);
+ if (r < 0)
+ return r;
+
+ return unit_add_blockdev_dependency(UNIT(s), p->what, mask);
+ }
+
+ /* File based swap devices need to be ordered after systemd-remount-fs.service, since they might need
+ * a writable file system. */
+ return unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_REMOUNT_FS_SERVICE, true, mask);
+}
+
+static int swap_add_default_dependencies(Swap *s) {
+ int r;
+
+ assert(s);
+
+ if (!UNIT(s)->default_dependencies)
+ return 0;
+
+ if (!MANAGER_IS_SYSTEM(UNIT(s)->manager))
+ return 0;
+
+ if (detect_container() > 0)
+ return 0;
+
+ /* swap units generated for the swap dev links are missing the
+ * ordering dep against the swap target. */
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_BEFORE, SPECIAL_SWAP_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+}
+
+static int swap_verify(Swap *s) {
+ _cleanup_free_ char *e = NULL;
+ int r;
+
+ assert(UNIT(s)->load_state == UNIT_LOADED);
+
+ r = unit_name_from_path(s->what, ".swap", &e);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to generate unit name from path: %m");
+
+ if (!unit_has_name(UNIT(s), e)) {
+ log_unit_error(UNIT(s), "Value of What= and unit name do not match, not loading.");
+ return -ENOEXEC;
+ }
+
+ if (s->exec_context.pam_name && s->kill_context.kill_mode != KILL_CONTROL_GROUP) {
+ log_unit_error(UNIT(s), "Unit has PAM enabled. Kill mode must be set to 'control-group'. Refusing to load.");
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
+static int swap_load_devnode(Swap *s) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ struct stat st;
+ const char *p;
+ int r;
+
+ assert(s);
+
+ if (stat(s->what, &st) < 0 || !S_ISBLK(st.st_mode))
+ return 0;
+
+ r = device_new_from_stat_rdev(&d, &st);
+ if (r < 0) {
+ log_unit_full_errno(UNIT(s), r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to allocate device for swap %s: %m", s->what);
+ return 0;
+ }
+
+ if (sd_device_get_devname(d, &p) < 0)
+ return 0;
+
+ return swap_set_devnode(s, p);
+}
+
+static int swap_add_extras(Swap *s) {
+ int r;
+
+ assert(s);
+
+ if (UNIT(s)->fragment_path)
+ s->from_fragment = true;
+
+ if (!s->what) {
+ if (s->parameters_fragment.what)
+ s->what = strdup(s->parameters_fragment.what);
+ else if (s->parameters_proc_swaps.what)
+ s->what = strdup(s->parameters_proc_swaps.what);
+ else {
+ r = unit_name_to_path(UNIT(s)->id, &s->what);
+ if (r < 0)
+ return r;
+ }
+
+ if (!s->what)
+ return -ENOMEM;
+ }
+
+ path_simplify(s->what, false);
+
+ if (!UNIT(s)->description) {
+ r = unit_set_description(UNIT(s), s->what);
+ if (r < 0)
+ return r;
+ }
+
+ r = unit_require_mounts_for(UNIT(s), s->what, UNIT_DEPENDENCY_IMPLICIT);
+ if (r < 0)
+ return r;
+
+ r = swap_add_device_dependencies(s);
+ if (r < 0)
+ return r;
+
+ r = swap_load_devnode(s);
+ if (r < 0)
+ return r;
+
+ r = unit_patch_contexts(UNIT(s));
+ if (r < 0)
+ return r;
+
+ r = unit_add_exec_dependencies(UNIT(s), &s->exec_context);
+ if (r < 0)
+ return r;
+
+ r = unit_set_default_slice(UNIT(s));
+ if (r < 0)
+ return r;
+
+ r = swap_add_default_dependencies(s);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int swap_load(Unit *u) {
+ Swap *s = SWAP(u);
+ int r, q = 0;
+
+ assert(s);
+ assert(u->load_state == UNIT_STUB);
+
+ /* Load a .swap file */
+ bool fragment_optional = s->from_proc_swaps;
+ r = unit_load_fragment_and_dropin(u, !fragment_optional);
+
+ /* Add in some extras, and do so either when we successfully loaded something or when /proc/swaps is
+ * already active. */
+ if (u->load_state == UNIT_LOADED || s->from_proc_swaps)
+ q = swap_add_extras(s);
+
+ if (r < 0)
+ return r;
+ if (q < 0)
+ return q;
+ if (u->load_state != UNIT_LOADED)
+ return 0;
+
+ return swap_verify(s);
+}
+
+static int swap_setup_unit(
+ Manager *m,
+ const char *what,
+ const char *what_proc_swaps,
+ int priority,
+ bool set_flags) {
+
+ _cleanup_free_ char *e = NULL;
+ bool delete = false;
+ Unit *u = NULL;
+ int r;
+ SwapParameters *p;
+
+ assert(m);
+ assert(what);
+ assert(what_proc_swaps);
+
+ r = unit_name_from_path(what, ".swap", &e);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to generate unit name from path: %m");
+
+ u = manager_get_unit(m, e);
+ if (u &&
+ SWAP(u)->from_proc_swaps &&
+ !path_equal(SWAP(u)->parameters_proc_swaps.what, what_proc_swaps))
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "Swap %s appeared twice with different device paths %s and %s",
+ e, SWAP(u)->parameters_proc_swaps.what, what_proc_swaps);
+
+ if (!u) {
+ delete = true;
+
+ r = unit_new_for_name(m, sizeof(Swap), e, &u);
+ if (r < 0)
+ goto fail;
+
+ SWAP(u)->what = strdup(what);
+ if (!SWAP(u)->what) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ unit_add_to_load_queue(u);
+ } else
+ delete = false;
+
+ p = &SWAP(u)->parameters_proc_swaps;
+
+ if (!p->what) {
+ p->what = strdup(what_proc_swaps);
+ if (!p->what) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ /* The unit is definitely around now, mark it as loaded if it was previously referenced but could not be
+ * loaded. After all we can load it now, from the data in /proc/swaps. */
+ if (IN_SET(u->load_state, UNIT_NOT_FOUND, UNIT_BAD_SETTING, UNIT_ERROR)) {
+ u->load_state = UNIT_LOADED;
+ u->load_error = 0;
+ }
+
+ if (set_flags) {
+ SWAP(u)->is_active = true;
+ SWAP(u)->just_activated = !SWAP(u)->from_proc_swaps;
+ }
+
+ SWAP(u)->from_proc_swaps = true;
+
+ p->priority = priority;
+ p->priority_set = true;
+
+ unit_add_to_dbus_queue(u);
+ return 0;
+
+fail:
+ log_unit_warning_errno(u, r, "Failed to load swap unit: %m");
+
+ if (delete)
+ unit_free(u);
+
+ return r;
+}
+
+static int swap_process_new(Manager *m, const char *device, int prio, bool set_flags) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ const char *dn, *devlink;
+ struct stat st, st_link;
+ int r;
+
+ assert(m);
+
+ r = swap_setup_unit(m, device, device, prio, set_flags);
+ if (r < 0)
+ return r;
+
+ /* If this is a block device, then let's add duplicates for
+ * all other names of this block device */
+ if (stat(device, &st) < 0 || !S_ISBLK(st.st_mode))
+ return 0;
+
+ r = device_new_from_stat_rdev(&d, &st);
+ if (r < 0) {
+ log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to allocate device for swap %s: %m", device);
+ return 0;
+ }
+
+ /* Add the main device node */
+ if (sd_device_get_devname(d, &dn) >= 0 && !streq(dn, device))
+ swap_setup_unit(m, dn, device, prio, set_flags);
+
+ /* Add additional units for all symlinks */
+ FOREACH_DEVICE_DEVLINK(d, devlink) {
+
+ /* Don't bother with the /dev/block links */
+ if (streq(devlink, device))
+ continue;
+
+ if (path_startswith(devlink, "/dev/block/"))
+ continue;
+
+ if (stat(devlink, &st_link) >= 0 &&
+ (!S_ISBLK(st_link.st_mode) ||
+ st_link.st_rdev != st.st_rdev))
+ continue;
+
+ swap_setup_unit(m, devlink, device, prio, set_flags);
+ }
+
+ return 0;
+}
+
+static void swap_set_state(Swap *s, SwapState state) {
+ SwapState old_state;
+ Swap *other;
+
+ assert(s);
+
+ if (s->state != state)
+ bus_unit_send_pending_change_signal(UNIT(s), false);
+
+ old_state = s->state;
+ s->state = state;
+
+ if (!SWAP_STATE_WITH_PROCESS(state)) {
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+ swap_unwatch_control_pid(s);
+ s->control_command = NULL;
+ s->control_command_id = _SWAP_EXEC_COMMAND_INVALID;
+ }
+
+ if (state != old_state)
+ log_unit_debug(UNIT(s), "Changed %s -> %s", swap_state_to_string(old_state), swap_state_to_string(state));
+
+ unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], 0);
+
+ /* If there other units for the same device node have a job
+ queued it might be worth checking again if it is runnable
+ now. This is necessary, since swap_start() refuses
+ operation with EAGAIN if there's already another job for
+ the same device node queued. */
+ LIST_FOREACH_OTHERS(same_devnode, other, s)
+ if (UNIT(other)->job)
+ job_add_to_run_queue(UNIT(other)->job);
+}
+
+static int swap_coldplug(Unit *u) {
+ Swap *s = SWAP(u);
+ SwapState new_state = SWAP_DEAD;
+ int r;
+
+ assert(s);
+ assert(s->state == SWAP_DEAD);
+
+ if (s->deserialized_state != s->state)
+ new_state = s->deserialized_state;
+ else if (s->from_proc_swaps)
+ new_state = SWAP_ACTIVE;
+
+ if (new_state == s->state)
+ return 0;
+
+ if (s->control_pid > 0 &&
+ pid_is_unwaited(s->control_pid) &&
+ SWAP_STATE_WITH_PROCESS(new_state)) {
+
+ r = unit_watch_pid(UNIT(s), s->control_pid, false);
+ if (r < 0)
+ return r;
+
+ r = swap_arm_timer(s, usec_add(u->state_change_timestamp.monotonic, s->timeout_usec));
+ if (r < 0)
+ return r;
+ }
+
+ if (!IN_SET(new_state, SWAP_DEAD, SWAP_FAILED)) {
+ (void) unit_setup_dynamic_creds(u);
+ (void) unit_setup_exec_runtime(u);
+ }
+
+ swap_set_state(s, new_state);
+ return 0;
+}
+
+static void swap_dump(Unit *u, FILE *f, const char *prefix) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ Swap *s = SWAP(u);
+ SwapParameters *p;
+
+ assert(s);
+ assert(f);
+
+ if (s->from_proc_swaps)
+ p = &s->parameters_proc_swaps;
+ else if (s->from_fragment)
+ p = &s->parameters_fragment;
+ else
+ p = NULL;
+
+ fprintf(f,
+ "%sSwap State: %s\n"
+ "%sResult: %s\n"
+ "%sClean Result: %s\n"
+ "%sWhat: %s\n"
+ "%sFrom /proc/swaps: %s\n"
+ "%sFrom fragment: %s\n"
+ "%sExtrinsic: %s\n",
+ prefix, swap_state_to_string(s->state),
+ prefix, swap_result_to_string(s->result),
+ prefix, swap_result_to_string(s->clean_result),
+ prefix, s->what,
+ prefix, yes_no(s->from_proc_swaps),
+ prefix, yes_no(s->from_fragment),
+ prefix, yes_no(swap_is_extrinsic(u)));
+
+ if (s->devnode)
+ fprintf(f, "%sDevice Node: %s\n", prefix, s->devnode);
+
+ if (p)
+ fprintf(f,
+ "%sPriority: %i\n"
+ "%sOptions: %s\n",
+ prefix, p->priority,
+ prefix, strempty(p->options));
+
+ fprintf(f,
+ "%sTimeoutSec: %s\n",
+ prefix, format_timespan(buf, sizeof(buf), s->timeout_usec, USEC_PER_SEC));
+
+ if (s->control_pid > 0)
+ fprintf(f,
+ "%sControl PID: "PID_FMT"\n",
+ prefix, s->control_pid);
+
+ exec_context_dump(&s->exec_context, f, prefix);
+ kill_context_dump(&s->kill_context, f, prefix);
+ cgroup_context_dump(UNIT(s), f, prefix);
+}
+
+static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
+
+ _cleanup_(exec_params_clear) ExecParameters exec_params = {
+ .flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .stderr_fd = -1,
+ .exec_fd = -1,
+ };
+ pid_t pid;
+ int r;
+
+ assert(s);
+ assert(c);
+ assert(_pid);
+
+ r = unit_prepare_exec(UNIT(s));
+ if (r < 0)
+ return r;
+
+ r = swap_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec));
+ if (r < 0)
+ goto fail;
+
+ r = unit_set_exec_params(UNIT(s), &exec_params);
+ if (r < 0)
+ goto fail;
+
+ r = exec_spawn(UNIT(s),
+ c,
+ &s->exec_context,
+ &exec_params,
+ s->exec_runtime,
+ &s->dynamic_creds,
+ &pid);
+ if (r < 0)
+ goto fail;
+
+ r = unit_watch_pid(UNIT(s), pid, true);
+ if (r < 0)
+ goto fail;
+
+ *_pid = pid;
+
+ return 0;
+
+fail:
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+
+ return r;
+}
+
+static void swap_enter_dead(Swap *s, SwapResult f) {
+ assert(s);
+
+ if (s->result == SWAP_SUCCESS)
+ s->result = f;
+
+ unit_log_result(UNIT(s), s->result == SWAP_SUCCESS, swap_result_to_string(s->result));
+ unit_warn_leftover_processes(UNIT(s), unit_log_leftover_process_stop);
+ swap_set_state(s, s->result != SWAP_SUCCESS ? SWAP_FAILED : SWAP_DEAD);
+
+ s->exec_runtime = exec_runtime_unref(s->exec_runtime, true);
+
+ unit_destroy_runtime_data(UNIT(s), &s->exec_context);
+
+ unit_unref_uid_gid(UNIT(s), true);
+
+ dynamic_creds_destroy(&s->dynamic_creds);
+}
+
+static void swap_enter_active(Swap *s, SwapResult f) {
+ assert(s);
+
+ if (s->result == SWAP_SUCCESS)
+ s->result = f;
+
+ swap_set_state(s, SWAP_ACTIVE);
+}
+
+static void swap_enter_dead_or_active(Swap *s, SwapResult f) {
+ assert(s);
+
+ if (s->from_proc_swaps) {
+ Swap *other;
+
+ swap_enter_active(s, f);
+
+ LIST_FOREACH_OTHERS(same_devnode, other, s)
+ if (UNIT(other)->job)
+ swap_enter_dead_or_active(other, f);
+ } else
+ swap_enter_dead(s, f);
+}
+
+static int state_to_kill_operation(Swap *s, SwapState state) {
+ if (state == SWAP_DEACTIVATING_SIGTERM) {
+ if (unit_has_job_type(UNIT(s), JOB_RESTART))
+ return KILL_RESTART;
+ else
+ return KILL_TERMINATE;
+ }
+
+ return KILL_KILL;
+}
+
+static void swap_enter_signal(Swap *s, SwapState state, SwapResult f) {
+ int r;
+
+ assert(s);
+
+ if (s->result == SWAP_SUCCESS)
+ s->result = f;
+
+ r = unit_kill_context(UNIT(s),
+ &s->kill_context,
+ state_to_kill_operation(s, state),
+ -1,
+ s->control_pid,
+ false);
+ if (r < 0)
+ goto fail;
+
+ if (r > 0) {
+ r = swap_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec));
+ if (r < 0)
+ goto fail;
+
+ swap_set_state(s, state);
+ } else if (state == SWAP_DEACTIVATING_SIGTERM && s->kill_context.send_sigkill)
+ swap_enter_signal(s, SWAP_DEACTIVATING_SIGKILL, SWAP_SUCCESS);
+ else
+ swap_enter_dead_or_active(s, SWAP_SUCCESS);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to kill processes: %m");
+ swap_enter_dead_or_active(s, SWAP_FAILURE_RESOURCES);
+}
+
+static void swap_enter_activating(Swap *s) {
+ _cleanup_free_ char *opts = NULL;
+ int r;
+
+ assert(s);
+
+ unit_warn_leftover_processes(UNIT(s), unit_log_leftover_process_start);
+
+ s->control_command_id = SWAP_EXEC_ACTIVATE;
+ s->control_command = s->exec_command + SWAP_EXEC_ACTIVATE;
+
+ if (s->from_fragment) {
+ int priority = 0;
+
+ r = fstab_find_pri(s->parameters_fragment.options, &priority);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "Failed to parse swap priority \"%s\", ignoring: %m", s->parameters_fragment.options);
+ else if (r > 0 && s->parameters_fragment.priority_set)
+ log_unit_warning(UNIT(s), "Duplicate swap priority configuration by Priority= and Options= fields.");
+
+ if (r <= 0 && s->parameters_fragment.priority_set) {
+ if (s->parameters_fragment.options)
+ r = asprintf(&opts, "%s,pri=%i", s->parameters_fragment.options, s->parameters_fragment.priority);
+ else
+ r = asprintf(&opts, "pri=%i", s->parameters_fragment.priority);
+ if (r < 0) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+ }
+
+ r = exec_command_set(s->control_command, "/sbin/swapon", NULL);
+ if (r < 0)
+ goto fail;
+
+ if (s->parameters_fragment.options || opts) {
+ r = exec_command_append(s->control_command, "-o",
+ opts ?: s->parameters_fragment.options, NULL);
+ if (r < 0)
+ goto fail;
+ }
+
+ r = exec_command_append(s->control_command, s->what, NULL);
+ if (r < 0)
+ goto fail;
+
+ swap_unwatch_control_pid(s);
+
+ r = swap_spawn(s, s->control_command, &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ swap_set_state(s, SWAP_ACTIVATING);
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'swapon' task: %m");
+ swap_enter_dead_or_active(s, SWAP_FAILURE_RESOURCES);
+}
+
+static void swap_enter_deactivating(Swap *s) {
+ int r;
+
+ assert(s);
+
+ s->control_command_id = SWAP_EXEC_DEACTIVATE;
+ s->control_command = s->exec_command + SWAP_EXEC_DEACTIVATE;
+
+ r = exec_command_set(s->control_command,
+ "/sbin/swapoff",
+ s->what,
+ NULL);
+ if (r < 0)
+ goto fail;
+
+ swap_unwatch_control_pid(s);
+
+ r = swap_spawn(s, s->control_command, &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ swap_set_state(s, SWAP_DEACTIVATING);
+
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(s), r, "Failed to run 'swapoff' task: %m");
+ swap_enter_dead_or_active(s, SWAP_FAILURE_RESOURCES);
+}
+
+static void swap_cycle_clear(Swap *s) {
+ assert(s);
+
+ s->result = SWAP_SUCCESS;
+ exec_command_reset_status_array(s->exec_command, _SWAP_EXEC_COMMAND_MAX);
+ UNIT(s)->reset_accounting = true;
+}
+
+static int swap_start(Unit *u) {
+ Swap *s = SWAP(u), *other;
+ int r;
+
+ assert(s);
+
+ /* We cannot fulfill this request right now, try again later please! */
+ if (IN_SET(s->state,
+ SWAP_DEACTIVATING,
+ SWAP_DEACTIVATING_SIGTERM,
+ SWAP_DEACTIVATING_SIGKILL,
+ SWAP_CLEANING))
+ return -EAGAIN;
+
+ /* Already on it! */
+ if (s->state == SWAP_ACTIVATING)
+ return 0;
+
+ assert(IN_SET(s->state, SWAP_DEAD, SWAP_FAILED));
+
+ if (detect_container() > 0)
+ return -EPERM;
+
+ /* If there's a job for another swap unit for the same node
+ * running, then let's not dispatch this one for now, and wait
+ * until that other job has finished. */
+ LIST_FOREACH_OTHERS(same_devnode, other, s)
+ if (UNIT(other)->job && UNIT(other)->job->state == JOB_RUNNING)
+ return -EAGAIN;
+
+ r = unit_test_start_limit(u);
+ if (r < 0) {
+ swap_enter_dead(s, SWAP_FAILURE_START_LIMIT_HIT);
+ return r;
+ }
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ swap_cycle_clear(s);
+ swap_enter_activating(s);
+ return 1;
+}
+
+static int swap_stop(Unit *u) {
+ Swap *s = SWAP(u);
+
+ assert(s);
+
+ switch (s->state) {
+
+ case SWAP_DEACTIVATING:
+ case SWAP_DEACTIVATING_SIGTERM:
+ case SWAP_DEACTIVATING_SIGKILL:
+ /* Already on it */
+ return 0;
+
+ case SWAP_ACTIVATING:
+ case SWAP_ACTIVATING_DONE:
+ /* There's a control process pending, directly enter kill mode */
+ swap_enter_signal(s, SWAP_DEACTIVATING_SIGTERM, SWAP_SUCCESS);
+ return 0;
+
+ case SWAP_ACTIVE:
+ if (detect_container() > 0)
+ return -EPERM;
+
+ swap_enter_deactivating(s);
+ return 1;
+
+ case SWAP_CLEANING:
+ /* If we are currently cleaning, then abort it, brutally. */
+ swap_enter_signal(s, SWAP_DEACTIVATING_SIGKILL, SWAP_SUCCESS);
+ return 0;
+
+ default:
+ assert_not_reached("Unexpected state.");
+ }
+}
+
+static int swap_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Swap *s = SWAP(u);
+
+ assert(s);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", swap_state_to_string(s->state));
+ (void) serialize_item(f, "result", swap_result_to_string(s->result));
+
+ if (s->control_pid > 0)
+ (void) serialize_item_format(f, "control-pid", PID_FMT, s->control_pid);
+
+ if (s->control_command_id >= 0)
+ (void) serialize_item(f, "control-command", swap_exec_command_to_string(s->control_command_id));
+
+ return 0;
+}
+
+static int swap_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Swap *s = SWAP(u);
+
+ assert(s);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ SwapState state;
+
+ state = swap_state_from_string(value);
+ if (state < 0)
+ log_unit_debug(u, "Failed to parse state value: %s", value);
+ else
+ s->deserialized_state = state;
+ } else if (streq(key, "result")) {
+ SwapResult f;
+
+ f = swap_result_from_string(value);
+ if (f < 0)
+ log_unit_debug(u, "Failed to parse result value: %s", value);
+ else if (f != SWAP_SUCCESS)
+ s->result = f;
+ } else if (streq(key, "control-pid")) {
+ pid_t pid;
+
+ if (parse_pid(value, &pid) < 0)
+ log_unit_debug(u, "Failed to parse control-pid value: %s", value);
+ else
+ s->control_pid = pid;
+
+ } else if (streq(key, "control-command")) {
+ SwapExecCommand id;
+
+ id = swap_exec_command_from_string(value);
+ if (id < 0)
+ log_unit_debug(u, "Failed to parse exec-command value: %s", value);
+ else {
+ s->control_command_id = id;
+ s->control_command = s->exec_command + id;
+ }
+ } else
+ log_unit_debug(u, "Unknown serialization key: %s", key);
+
+ return 0;
+}
+
+static void swap_sigchld_event(Unit *u, pid_t pid, int code, int status) {
+ Swap *s = SWAP(u);
+ SwapResult f;
+
+ assert(s);
+ assert(pid >= 0);
+
+ if (pid != s->control_pid)
+ return;
+
+ /* Let's scan /proc/swaps before we process SIGCHLD. For the reasoning see the similar code in
+ * mount.c */
+ (void) swap_process_proc_swaps(u->manager);
+
+ s->control_pid = 0;
+
+ if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
+ f = SWAP_SUCCESS;
+ else if (code == CLD_EXITED)
+ f = SWAP_FAILURE_EXIT_CODE;
+ else if (code == CLD_KILLED)
+ f = SWAP_FAILURE_SIGNAL;
+ else if (code == CLD_DUMPED)
+ f = SWAP_FAILURE_CORE_DUMP;
+ else
+ assert_not_reached("Unknown code");
+
+ if (s->result == SWAP_SUCCESS)
+ s->result = f;
+
+ if (s->control_command) {
+ exec_status_exit(&s->control_command->exec_status, &s->exec_context, pid, code, status);
+
+ s->control_command = NULL;
+ s->control_command_id = _SWAP_EXEC_COMMAND_INVALID;
+ }
+
+ unit_log_process_exit(
+ u,
+ "Swap process",
+ swap_exec_command_to_string(s->control_command_id),
+ f == SWAP_SUCCESS,
+ code, status);
+
+ switch (s->state) {
+
+ case SWAP_ACTIVATING:
+ case SWAP_ACTIVATING_DONE:
+
+ if (f == SWAP_SUCCESS || s->from_proc_swaps)
+ swap_enter_active(s, f);
+ else
+ swap_enter_dead(s, f);
+ break;
+
+ case SWAP_DEACTIVATING:
+ case SWAP_DEACTIVATING_SIGKILL:
+ case SWAP_DEACTIVATING_SIGTERM:
+
+ swap_enter_dead_or_active(s, f);
+ break;
+
+ case SWAP_CLEANING:
+ if (s->clean_result == SWAP_SUCCESS)
+ s->clean_result = f;
+
+ swap_enter_dead(s, SWAP_SUCCESS);
+ break;
+
+ default:
+ assert_not_reached("Uh, control process died at wrong time.");
+ }
+
+ /* Notify clients about changed exit status */
+ unit_add_to_dbus_queue(u);
+}
+
+static int swap_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
+ Swap *s = SWAP(userdata);
+
+ assert(s);
+ assert(s->timer_event_source == source);
+
+ switch (s->state) {
+
+ case SWAP_ACTIVATING:
+ case SWAP_ACTIVATING_DONE:
+ log_unit_warning(UNIT(s), "Activation timed out. Stopping.");
+ swap_enter_signal(s, SWAP_DEACTIVATING_SIGTERM, SWAP_FAILURE_TIMEOUT);
+ break;
+
+ case SWAP_DEACTIVATING:
+ log_unit_warning(UNIT(s), "Deactivation timed out. Stopping.");
+ swap_enter_signal(s, SWAP_DEACTIVATING_SIGTERM, SWAP_FAILURE_TIMEOUT);
+ break;
+
+ case SWAP_DEACTIVATING_SIGTERM:
+ if (s->kill_context.send_sigkill) {
+ log_unit_warning(UNIT(s), "Swap process timed out. Killing.");
+ swap_enter_signal(s, SWAP_DEACTIVATING_SIGKILL, SWAP_FAILURE_TIMEOUT);
+ } else {
+ log_unit_warning(UNIT(s), "Swap process timed out. Skipping SIGKILL. Ignoring.");
+ swap_enter_dead_or_active(s, SWAP_FAILURE_TIMEOUT);
+ }
+ break;
+
+ case SWAP_DEACTIVATING_SIGKILL:
+ log_unit_warning(UNIT(s), "Swap process still around after SIGKILL. Ignoring.");
+ swap_enter_dead_or_active(s, SWAP_FAILURE_TIMEOUT);
+ break;
+
+ case SWAP_CLEANING:
+ log_unit_warning(UNIT(s), "Cleaning timed out. killing.");
+
+ if (s->clean_result == SWAP_SUCCESS)
+ s->clean_result = SWAP_FAILURE_TIMEOUT;
+
+ swap_enter_signal(s, SWAP_DEACTIVATING_SIGKILL, 0);
+ break;
+
+ default:
+ assert_not_reached("Timeout at wrong time.");
+ }
+
+ return 0;
+}
+
+static int swap_load_proc_swaps(Manager *m, bool set_flags) {
+ unsigned i;
+
+ assert(m);
+
+ rewind(m->proc_swaps);
+
+ (void) fscanf(m->proc_swaps, "%*s %*s %*s %*s %*s\n");
+
+ for (i = 1;; i++) {
+ _cleanup_free_ char *dev = NULL, *d = NULL;
+ int prio = 0, k;
+
+ k = fscanf(m->proc_swaps,
+ "%ms " /* device/file */
+ "%*s " /* type of swap */
+ "%*s " /* swap size */
+ "%*s " /* used */
+ "%i\n", /* priority */
+ &dev, &prio);
+ if (k != 2) {
+ if (k == EOF)
+ break;
+
+ log_warning("Failed to parse /proc/swaps:%u.", i);
+ continue;
+ }
+
+ if (cunescape(dev, UNESCAPE_RELAX, &d) < 0)
+ return log_oom();
+
+ device_found_node(m, d, DEVICE_FOUND_SWAP, DEVICE_FOUND_SWAP);
+
+ (void) swap_process_new(m, d, prio, set_flags);
+ }
+
+ return 0;
+}
+
+static int swap_process_proc_swaps(Manager *m) {
+ Unit *u;
+ int r;
+
+ assert(m);
+
+ r = swap_load_proc_swaps(m, true);
+ if (r < 0) {
+ log_error_errno(r, "Failed to reread /proc/swaps: %m");
+
+ /* Reset flags, just in case, for late calls */
+ LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_SWAP]) {
+ Swap *swap = SWAP(u);
+
+ swap->is_active = swap->just_activated = false;
+ }
+
+ return 0;
+ }
+
+ manager_dispatch_load_queue(m);
+
+ LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_SWAP]) {
+ Swap *swap = SWAP(u);
+
+ if (!swap->is_active) {
+
+ swap_unset_proc_swaps(swap);
+
+ switch (swap->state) {
+
+ case SWAP_ACTIVE:
+ /* This has just been deactivated */
+ swap_enter_dead(swap, SWAP_SUCCESS);
+ break;
+
+ default:
+ /* Fire again */
+ swap_set_state(swap, swap->state);
+ break;
+ }
+
+ if (swap->what)
+ device_found_node(m, swap->what, 0, DEVICE_FOUND_SWAP);
+
+ } else if (swap->just_activated) {
+
+ /* New swap entry */
+
+ switch (swap->state) {
+
+ case SWAP_DEAD:
+ case SWAP_FAILED:
+ (void) unit_acquire_invocation_id(u);
+ swap_cycle_clear(swap);
+ swap_enter_active(swap, SWAP_SUCCESS);
+ break;
+
+ case SWAP_ACTIVATING:
+ swap_set_state(swap, SWAP_ACTIVATING_DONE);
+ break;
+
+ default:
+ /* Nothing really changed, but let's
+ * issue an notification call
+ * nonetheless, in case somebody is
+ * waiting for this. */
+ swap_set_state(swap, swap->state);
+ break;
+ }
+ }
+
+ /* Reset the flags for later calls */
+ swap->is_active = swap->just_activated = false;
+ }
+
+ return 1;
+}
+
+static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+ assert(revents & EPOLLPRI);
+
+ return swap_process_proc_swaps(m);
+}
+
+static Unit *swap_following(Unit *u) {
+ Swap *s = SWAP(u);
+ Swap *other, *first = NULL;
+
+ assert(s);
+
+ /* If the user configured the swap through /etc/fstab or
+ * a device unit, follow that. */
+
+ if (s->from_fragment)
+ return NULL;
+
+ LIST_FOREACH_OTHERS(same_devnode, other, s)
+ if (other->from_fragment)
+ return UNIT(other);
+
+ /* Otherwise, make everybody follow the unit that's named after
+ * the swap device in the kernel */
+
+ if (streq_ptr(s->what, s->devnode))
+ return NULL;
+
+ LIST_FOREACH_AFTER(same_devnode, other, s)
+ if (streq_ptr(other->what, other->devnode))
+ return UNIT(other);
+
+ LIST_FOREACH_BEFORE(same_devnode, other, s) {
+ if (streq_ptr(other->what, other->devnode))
+ return UNIT(other);
+
+ first = other;
+ }
+
+ /* Fall back to the first on the list */
+ return UNIT(first);
+}
+
+static int swap_following_set(Unit *u, Set **_set) {
+ Swap *s = SWAP(u), *other;
+ _cleanup_set_free_ Set *set = NULL;
+ int r;
+
+ assert(s);
+ assert(_set);
+
+ if (LIST_JUST_US(same_devnode, s)) {
+ *_set = NULL;
+ return 0;
+ }
+
+ set = set_new(NULL);
+ if (!set)
+ return -ENOMEM;
+
+ LIST_FOREACH_OTHERS(same_devnode, other, s) {
+ r = set_put(set, other);
+ if (r < 0)
+ return r;
+ }
+
+ *_set = TAKE_PTR(set);
+ return 1;
+}
+
+static void swap_shutdown(Manager *m) {
+ assert(m);
+
+ m->swap_event_source = sd_event_source_unref(m->swap_event_source);
+ m->proc_swaps = safe_fclose(m->proc_swaps);
+ m->swaps_by_devnode = hashmap_free(m->swaps_by_devnode);
+}
+
+static void swap_enumerate(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (!m->proc_swaps) {
+ m->proc_swaps = fopen("/proc/swaps", "re");
+ if (!m->proc_swaps) {
+ if (errno == ENOENT)
+ log_debug_errno(errno, "Not swap enabled, skipping enumeration.");
+ else
+ log_warning_errno(errno, "Failed to open /proc/swaps, ignoring: %m");
+
+ return;
+ }
+
+ r = sd_event_add_io(m->event, &m->swap_event_source, fileno(m->proc_swaps), EPOLLPRI, swap_dispatch_io, m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to watch /proc/swaps: %m");
+ goto fail;
+ }
+
+ /* Dispatch this before we dispatch SIGCHLD, so that
+ * we always get the events from /proc/swaps before
+ * the SIGCHLD of /sbin/swapon. */
+ r = sd_event_source_set_priority(m->swap_event_source, SD_EVENT_PRIORITY_NORMAL-10);
+ if (r < 0) {
+ log_error_errno(r, "Failed to change /proc/swaps priority: %m");
+ goto fail;
+ }
+
+ (void) sd_event_source_set_description(m->swap_event_source, "swap-proc");
+ }
+
+ r = swap_load_proc_swaps(m, false);
+ if (r < 0)
+ goto fail;
+
+ return;
+
+fail:
+ swap_shutdown(m);
+}
+
+int swap_process_device_new(Manager *m, sd_device *dev) {
+ _cleanup_free_ char *e = NULL;
+ const char *dn, *devlink;
+ Unit *u;
+ int r;
+
+ assert(m);
+ assert(dev);
+
+ r = sd_device_get_devname(dev, &dn);
+ if (r < 0)
+ return 0;
+
+ r = unit_name_from_path(dn, ".swap", &e);
+ if (r < 0)
+ return r;
+
+ u = manager_get_unit(m, e);
+ if (u)
+ r = swap_set_devnode(SWAP(u), dn);
+
+ FOREACH_DEVICE_DEVLINK(dev, devlink) {
+ _cleanup_free_ char *n = NULL;
+ int q;
+
+ q = unit_name_from_path(devlink, ".swap", &n);
+ if (q < 0)
+ return q;
+
+ u = manager_get_unit(m, n);
+ if (u) {
+ q = swap_set_devnode(SWAP(u), dn);
+ if (q < 0)
+ r = q;
+ }
+ }
+
+ return r;
+}
+
+int swap_process_device_remove(Manager *m, sd_device *dev) {
+ const char *dn;
+ int r;
+ Swap *s;
+
+ r = sd_device_get_devname(dev, &dn);
+ if (r < 0)
+ return 0;
+
+ while ((s = hashmap_get(m->swaps_by_devnode, dn))) {
+ int q;
+
+ q = swap_set_devnode(s, NULL);
+ if (q < 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static void swap_reset_failed(Unit *u) {
+ Swap *s = SWAP(u);
+
+ assert(s);
+
+ if (s->state == SWAP_FAILED)
+ swap_set_state(s, SWAP_DEAD);
+
+ s->result = SWAP_SUCCESS;
+ s->clean_result = SWAP_SUCCESS;
+}
+
+static int swap_kill(Unit *u, KillWho who, int signo, sd_bus_error *error) {
+ return unit_kill_common(u, who, signo, -1, SWAP(u)->control_pid, error);
+}
+
+static int swap_get_timeout(Unit *u, usec_t *timeout) {
+ Swap *s = SWAP(u);
+ usec_t t;
+ int r;
+
+ if (!s->timer_event_source)
+ return 0;
+
+ r = sd_event_source_get_time(s->timer_event_source, &t);
+ if (r < 0)
+ return r;
+ if (t == USEC_INFINITY)
+ return 0;
+
+ *timeout = t;
+ return 1;
+}
+
+static bool swap_supported(void) {
+ static int supported = -1;
+
+ /* If swap support is not available in the kernel, or we are
+ * running in a container we don't support swap units, and any
+ * attempts to starting one should fail immediately. */
+
+ if (supported < 0)
+ supported =
+ access("/proc/swaps", F_OK) >= 0 &&
+ detect_container() <= 0;
+
+ return supported;
+}
+
+static int swap_control_pid(Unit *u) {
+ Swap *s = SWAP(u);
+
+ assert(s);
+
+ return s->control_pid;
+}
+
+static int swap_clean(Unit *u, ExecCleanMask mask) {
+ _cleanup_strv_free_ char **l = NULL;
+ Swap *s = SWAP(u);
+ int r;
+
+ assert(s);
+ assert(mask != 0);
+
+ if (s->state != SWAP_DEAD)
+ return -EBUSY;
+
+ r = exec_context_get_clean_directories(&s->exec_context, u->manager->prefix, mask, &l);
+ if (r < 0)
+ return r;
+
+ if (strv_isempty(l))
+ return -EUNATCH;
+
+ swap_unwatch_control_pid(s);
+ s->clean_result = SWAP_SUCCESS;
+ s->control_command = NULL;
+ s->control_command_id = _SWAP_EXEC_COMMAND_INVALID;
+
+ r = swap_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->exec_context.timeout_clean_usec));
+ if (r < 0)
+ goto fail;
+
+ r = unit_fork_and_watch_rm_rf(u, l, &s->control_pid);
+ if (r < 0)
+ goto fail;
+
+ swap_set_state(s, SWAP_CLEANING);
+
+ return 0;
+
+fail:
+ log_unit_warning_errno(u, r, "Failed to initiate cleaning: %m");
+ s->clean_result = SWAP_FAILURE_RESOURCES;
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+ return r;
+}
+
+static int swap_can_clean(Unit *u, ExecCleanMask *ret) {
+ Swap *s = SWAP(u);
+
+ assert(s);
+
+ return exec_context_get_clean_mask(&s->exec_context, ret);
+}
+
+static const char* const swap_exec_command_table[_SWAP_EXEC_COMMAND_MAX] = {
+ [SWAP_EXEC_ACTIVATE] = "ExecActivate",
+ [SWAP_EXEC_DEACTIVATE] = "ExecDeactivate",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(swap_exec_command, SwapExecCommand);
+
+static const char* const swap_result_table[_SWAP_RESULT_MAX] = {
+ [SWAP_SUCCESS] = "success",
+ [SWAP_FAILURE_RESOURCES] = "resources",
+ [SWAP_FAILURE_TIMEOUT] = "timeout",
+ [SWAP_FAILURE_EXIT_CODE] = "exit-code",
+ [SWAP_FAILURE_SIGNAL] = "signal",
+ [SWAP_FAILURE_CORE_DUMP] = "core-dump",
+ [SWAP_FAILURE_START_LIMIT_HIT] = "start-limit-hit",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(swap_result, SwapResult);
+
+const UnitVTable swap_vtable = {
+ .object_size = sizeof(Swap),
+ .exec_context_offset = offsetof(Swap, exec_context),
+ .cgroup_context_offset = offsetof(Swap, cgroup_context),
+ .kill_context_offset = offsetof(Swap, kill_context),
+ .exec_runtime_offset = offsetof(Swap, exec_runtime),
+ .dynamic_creds_offset = offsetof(Swap, dynamic_creds),
+
+ .sections =
+ "Unit\0"
+ "Swap\0"
+ "Install\0",
+ .private_section = "Swap",
+
+ .can_fail = true,
+
+ .init = swap_init,
+ .load = swap_load,
+ .done = swap_done,
+
+ .coldplug = swap_coldplug,
+
+ .dump = swap_dump,
+
+ .start = swap_start,
+ .stop = swap_stop,
+
+ .kill = swap_kill,
+ .clean = swap_clean,
+ .can_clean = swap_can_clean,
+
+ .get_timeout = swap_get_timeout,
+
+ .serialize = swap_serialize,
+ .deserialize_item = swap_deserialize_item,
+
+ .active_state = swap_active_state,
+ .sub_state_to_string = swap_sub_state_to_string,
+
+ .will_restart = unit_will_restart_default,
+
+ .may_gc = swap_may_gc,
+ .is_extrinsic = swap_is_extrinsic,
+
+ .sigchld_event = swap_sigchld_event,
+
+ .reset_failed = swap_reset_failed,
+
+ .control_pid = swap_control_pid,
+
+ .bus_set_property = bus_swap_set_property,
+ .bus_commit_properties = bus_swap_commit_properties,
+
+ .following = swap_following,
+ .following_set = swap_following_set,
+
+ .enumerate = swap_enumerate,
+ .shutdown = swap_shutdown,
+ .supported = swap_supported,
+
+ .status_message_formats = {
+ .starting_stopping = {
+ [0] = "Activating swap %s...",
+ [1] = "Deactivating swap %s...",
+ },
+ .finished_start_job = {
+ [JOB_DONE] = "Activated swap %s.",
+ [JOB_FAILED] = "Failed to activate swap %s.",
+ [JOB_TIMEOUT] = "Timed out activating swap %s.",
+ },
+ .finished_stop_job = {
+ [JOB_DONE] = "Deactivated swap %s.",
+ [JOB_FAILED] = "Failed deactivating swap %s.",
+ [JOB_TIMEOUT] = "Timed out deactivating swap %s.",
+ },
+ },
+};
diff --git a/src/core/swap.h b/src/core/swap.h
new file mode 100644
index 0000000..6ce9bfd
--- /dev/null
+++ b/src/core/swap.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2010 Maarten Lankhorst
+***/
+
+#include "sd-device.h"
+#include "unit.h"
+
+typedef struct Swap Swap;
+
+typedef enum SwapExecCommand {
+ SWAP_EXEC_ACTIVATE,
+ SWAP_EXEC_DEACTIVATE,
+ _SWAP_EXEC_COMMAND_MAX,
+ _SWAP_EXEC_COMMAND_INVALID = -1
+} SwapExecCommand;
+
+typedef enum SwapResult {
+ SWAP_SUCCESS,
+ SWAP_FAILURE_RESOURCES,
+ SWAP_FAILURE_TIMEOUT,
+ SWAP_FAILURE_EXIT_CODE,
+ SWAP_FAILURE_SIGNAL,
+ SWAP_FAILURE_CORE_DUMP,
+ SWAP_FAILURE_START_LIMIT_HIT,
+ _SWAP_RESULT_MAX,
+ _SWAP_RESULT_INVALID = -1
+} SwapResult;
+
+typedef struct SwapParameters {
+ char *what;
+ char *options;
+ int priority;
+ bool priority_set;
+} SwapParameters;
+
+struct Swap {
+ Unit meta;
+
+ char *what;
+
+ /* If the device has already shown up, this is the device
+ * node, which might be different from what, due to
+ * symlinks */
+ char *devnode;
+
+ SwapParameters parameters_proc_swaps;
+ SwapParameters parameters_fragment;
+
+ bool from_proc_swaps:1;
+ bool from_fragment:1;
+
+ /* Used while looking for swaps that vanished or got added
+ * from/to /proc/swaps */
+ bool is_active:1;
+ bool just_activated:1;
+
+ SwapResult result;
+ SwapResult clean_result;
+
+ usec_t timeout_usec;
+
+ ExecCommand exec_command[_SWAP_EXEC_COMMAND_MAX];
+ ExecContext exec_context;
+ KillContext kill_context;
+ CGroupContext cgroup_context;
+
+ ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
+
+ SwapState state, deserialized_state;
+
+ ExecCommand* control_command;
+ SwapExecCommand control_command_id;
+ pid_t control_pid;
+
+ sd_event_source *timer_event_source;
+
+ /* In order to be able to distinguish dependencies on
+ different device nodes we might end up creating multiple
+ devices for the same swap. We chain them up here. */
+
+ LIST_FIELDS(struct Swap, same_devnode);
+};
+
+extern const UnitVTable swap_vtable;
+
+int swap_process_device_new(Manager *m, sd_device *dev);
+int swap_process_device_remove(Manager *m, sd_device *dev);
+
+const char* swap_exec_command_to_string(SwapExecCommand i) _const_;
+SwapExecCommand swap_exec_command_from_string(const char *s) _pure_;
+
+const char* swap_result_to_string(SwapResult i) _const_;
+SwapResult swap_result_from_string(const char *s) _pure_;
+
+DEFINE_CAST(SWAP, Swap);
diff --git a/src/core/system.conf.in b/src/core/system.conf.in
new file mode 100644
index 0000000..40bb548
--- /dev/null
+++ b/src/core/system.conf.in
@@ -0,0 +1,71 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See systemd-system.conf(5) for details.
+
+[Manager]
+#LogLevel=info
+#LogTarget=journal-or-kmsg
+#LogColor=yes
+#LogLocation=no
+#LogTime=no
+#DumpCore=yes
+#ShowStatus=yes
+#CrashChangeVT=no
+#CrashShell=no
+#CrashReboot=no
+#CtrlAltDelBurstAction=reboot-force
+#CPUAffinity=1 2
+#NUMAPolicy=default
+#NUMAMask=
+#RuntimeWatchdogSec=0
+#RebootWatchdogSec=10min
+#ShutdownWatchdogSec=10min
+#KExecWatchdogSec=0
+#WatchdogDevice=
+#CapabilityBoundingSet=
+#NoNewPrivileges=no
+#SystemCallArchitectures=
+#TimerSlackNSec=
+#StatusUnitFormat=@STATUS_UNIT_FORMAT_DEFAULT@
+#DefaultTimerAccuracySec=1min
+#DefaultStandardOutput=journal
+#DefaultStandardError=inherit
+#DefaultTimeoutStartSec=90s
+#DefaultTimeoutStopSec=90s
+#DefaultTimeoutAbortSec=
+#DefaultRestartSec=100ms
+#DefaultStartLimitIntervalSec=10s
+#DefaultStartLimitBurst=5
+#DefaultEnvironment=
+#DefaultCPUAccounting=no
+#DefaultIOAccounting=no
+#DefaultIPAccounting=no
+#DefaultBlockIOAccounting=no
+#DefaultMemoryAccounting=@MEMORY_ACCOUNTING_DEFAULT@
+#DefaultTasksAccounting=yes
+#DefaultTasksMax=15%
+#DefaultLimitCPU=
+#DefaultLimitFSIZE=
+#DefaultLimitDATA=
+#DefaultLimitSTACK=
+#DefaultLimitCORE=
+#DefaultLimitRSS=
+#DefaultLimitNOFILE=1024:@HIGH_RLIMIT_NOFILE@
+#DefaultLimitAS=
+#DefaultLimitNPROC=
+#DefaultLimitMEMLOCK=
+#DefaultLimitLOCKS=
+#DefaultLimitSIGPENDING=
+#DefaultLimitMSGQUEUE=
+#DefaultLimitNICE=
+#DefaultLimitRTPRIO=
+#DefaultLimitRTTIME=
diff --git a/src/core/systemd.pc.in b/src/core/systemd.pc.in
new file mode 100644
index 0000000..f2c0455
--- /dev/null
+++ b/src/core/systemd.pc.in
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+# Names with prefixes are preferred, and the run-together names should be
+# considered deprecated (though there is no plan to remove them). New names
+# shall have underscores.
+
+prefix=/usr
+root_prefix=@rootprefix_noslash@
+rootprefix=${root_prefix}
+sysconf_dir=@sysconfdir@
+sysconfdir=${sysconf_dir}
+
+systemd_util_dir=${root_prefix}/lib/systemd
+systemdutildir=${systemd_util_dir}
+
+systemd_system_unit_dir=${rootprefix}/lib/systemd/system
+systemdsystemunitdir=${systemd_system_unit_dir}
+
+systemd_system_preset_dir=${rootprefix}/lib/systemd/system-preset
+systemdsystempresetdir=${systemd_system_preset_dir}
+
+systemd_user_unit_dir=/usr/lib/systemd/user
+systemduserunitdir=${systemd_user_unit_dir}
+
+systemd_user_preset_dir=/usr/lib/systemd/user-preset
+systemduserpresetdir=${systemd_user_preset_dir}
+
+systemd_system_conf_dir=${sysconfdir}/systemd/system
+systemdsystemconfdir=${systemd_system_conf_dir}
+
+systemd_user_conf_dir=${sysconfdir}/systemd/user
+systemduserconfdir=${systemd_user_conf_dir}
+
+systemd_system_unit_path=${systemd_system_conf_dir}:/etc/systemd/system:/run/systemd/system:/usr/local/lib/systemd/system:${systemd_system_unit_dir}:/usr/lib/systemd/system:/lib/systemd/system
+systemdsystemunitpath=${systemd_system_unit_path}
+
+systemd_user_unit_path=${systemd_user_conf_dir}:/etc/systemd/user:/run/systemd/user:/usr/local/lib/systemd/user:/usr/local/share/systemd/user:${systemd_user_unit_dir}:/usr/lib/systemd/user:/usr/share/systemd/user
+systemduserunitpath=${systemd_user_unit_path}
+
+systemd_system_generator_dir=${root_prefix}/lib/systemd/system-generators
+systemdsystemgeneratordir=${systemd_system_generator_dir}
+
+systemd_user_generator_dir=/usr/lib/systemd/user-generators
+systemdusergeneratordir=${systemd_user_generator_dir}
+
+systemd_system_generator_path=/run/systemd/system-generators:/etc/systemd/system-generators:/usr/local/lib/systemd/system-generators:${systemd_system_generator_dir}
+systemdsystemgeneratorpath=${systemd_system_generator_path}
+
+systemd_user_generator_path=/run/systemd/user-generators:/etc/systemd/user-generators:/usr/local/lib/systemd/user-generators:${systemd_user_generator_dir}
+systemdusergeneratorpath=${systemd_user_generator_path}
+
+systemd_sleep_dir=${root_prefix}/lib/systemd/system-sleep
+systemdsleepdir=${systemd_sleep_dir}
+
+systemd_shutdown_dir=${root_prefix}/lib/systemd/system-shutdown
+systemdshutdowndir=${systemd_shutdown_dir}
+
+tmpfiles_dir=/usr/lib/tmpfiles.d
+tmpfilesdir=${tmpfiles_dir}
+
+sysusers_dir=${rootprefix}/lib/sysusers.d
+sysusersdir=${sysusers_dir}
+
+sysctl_dir=${rootprefix}/lib/sysctl.d
+sysctldir=${sysctl_dir}
+
+binfmt_dir=${rootprefix}/lib/binfmt.d
+binfmtdir=${binfmt_dir}
+
+modules_load_dir=${rootprefix}/lib/modules-load.d
+modulesloaddir=${modules_load_dir}
+
+catalog_dir=/usr/lib/systemd/catalog
+catalogdir=${catalog_dir}
+
+system_uid_max=@SYSTEM_UID_MAX@
+systemuidmax=${system_uid_max}
+system_gid_max=@SYSTEM_GID_MAX@
+systemgidmax=${system_gid_max}
+
+dynamic_uid_min=@dynamicuidmin@
+dynamicuidmin=${dynamic_uid_min}
+dynamic_uid_max=@dynamicuidmax@
+dynamicuidmax=${dynamic_uid_max}
+
+container_uid_base_min=@containeruidbasemin@
+containeruidbasemin=${container_uid_base_min}
+container_uid_base_max=@containeruidbasemax@
+containeruidbasemax=${container_uid_base_max}
+
+Name: systemd
+Description: systemd System and Service Manager
+URL: @PROJECT_URL@
+Version: @PROJECT_VERSION@
diff --git a/src/core/target.c b/src/core/target.c
new file mode 100644
index 0000000..a422056
--- /dev/null
+++ b/src/core/target.c
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "dbus-target.h"
+#include "dbus-unit.h"
+#include "log.h"
+#include "serialize.h"
+#include "special.h"
+#include "string-util.h"
+#include "target.h"
+#include "unit-name.h"
+#include "unit.h"
+
+static const UnitActiveState state_translation_table[_TARGET_STATE_MAX] = {
+ [TARGET_DEAD] = UNIT_INACTIVE,
+ [TARGET_ACTIVE] = UNIT_ACTIVE
+};
+
+static void target_set_state(Target *t, TargetState state) {
+ TargetState old_state;
+ assert(t);
+
+ if (t->state != state)
+ bus_unit_send_pending_change_signal(UNIT(t), false);
+
+ old_state = t->state;
+ t->state = state;
+
+ if (state != old_state)
+ log_debug("%s changed %s -> %s",
+ UNIT(t)->id,
+ target_state_to_string(old_state),
+ target_state_to_string(state));
+
+ unit_notify(UNIT(t), state_translation_table[old_state], state_translation_table[state], 0);
+}
+
+static int target_add_default_dependencies(Target *t) {
+
+ static const UnitDependency deps[] = {
+ UNIT_REQUIRES,
+ UNIT_REQUISITE,
+ UNIT_WANTS,
+ UNIT_BINDS_TO,
+ UNIT_PART_OF
+ };
+
+ int r;
+ unsigned k;
+
+ assert(t);
+
+ if (!UNIT(t)->default_dependencies)
+ return 0;
+
+ /* Imply ordering for requirement dependencies on target units. Note that when the user created a contradicting
+ * ordering manually we won't add anything in here to make sure we don't create a loop. */
+
+ for (k = 0; k < ELEMENTSOF(deps); k++) {
+ Unit *other;
+ void *v;
+
+ HASHMAP_FOREACH_KEY(v, other, UNIT(t)->dependencies[deps[k]]) {
+ r = unit_add_default_target_dependency(other, UNIT(t));
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (unit_has_name(UNIT(t), SPECIAL_SHUTDOWN_TARGET))
+ return 0;
+
+ /* Make sure targets are unloaded on shutdown */
+ return unit_add_two_dependencies_by_name(UNIT(t), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+}
+
+static int target_load(Unit *u) {
+ Target *t = TARGET(u);
+ int r;
+
+ assert(t);
+
+ r = unit_load_fragment_and_dropin(u, true);
+ if (r < 0)
+ return r;
+
+ if (u->load_state != UNIT_LOADED)
+ return 0;
+
+ /* This is a new unit? Then let's add in some extras */
+ return target_add_default_dependencies(t);
+}
+
+static int target_coldplug(Unit *u) {
+ Target *t = TARGET(u);
+
+ assert(t);
+ assert(t->state == TARGET_DEAD);
+
+ if (t->deserialized_state != t->state)
+ target_set_state(t, t->deserialized_state);
+
+ return 0;
+}
+
+static void target_dump(Unit *u, FILE *f, const char *prefix) {
+ Target *t = TARGET(u);
+
+ assert(t);
+ assert(f);
+
+ fprintf(f,
+ "%sTarget State: %s\n",
+ prefix, target_state_to_string(t->state));
+}
+
+static int target_start(Unit *u) {
+ Target *t = TARGET(u);
+ int r;
+
+ assert(t);
+ assert(t->state == TARGET_DEAD);
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ target_set_state(t, TARGET_ACTIVE);
+ return 1;
+}
+
+static int target_stop(Unit *u) {
+ Target *t = TARGET(u);
+
+ assert(t);
+ assert(t->state == TARGET_ACTIVE);
+
+ target_set_state(t, TARGET_DEAD);
+ return 1;
+}
+
+static int target_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Target *s = TARGET(u);
+
+ assert(s);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", target_state_to_string(s->state));
+ return 0;
+}
+
+static int target_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Target *s = TARGET(u);
+
+ assert(u);
+ assert(key);
+ assert(value);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ TargetState state;
+
+ state = target_state_from_string(value);
+ if (state < 0)
+ log_debug("Failed to parse state value %s", value);
+ else
+ s->deserialized_state = state;
+
+ } else
+ log_debug("Unknown serialization key '%s'", key);
+
+ return 0;
+}
+
+_pure_ static UnitActiveState target_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[TARGET(u)->state];
+}
+
+_pure_ static const char *target_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return target_state_to_string(TARGET(u)->state);
+}
+
+const UnitVTable target_vtable = {
+ .object_size = sizeof(Target),
+
+ .sections =
+ "Unit\0"
+ "Target\0"
+ "Install\0",
+
+ .can_fail = true,
+
+ .load = target_load,
+ .coldplug = target_coldplug,
+
+ .dump = target_dump,
+
+ .start = target_start,
+ .stop = target_stop,
+
+ .serialize = target_serialize,
+ .deserialize_item = target_deserialize_item,
+
+ .active_state = target_active_state,
+ .sub_state_to_string = target_sub_state_to_string,
+
+ .status_message_formats = {
+ .finished_start_job = {
+ [JOB_DONE] = "Reached target %s.",
+ },
+ .finished_stop_job = {
+ [JOB_DONE] = "Stopped target %s.",
+ },
+ },
+};
diff --git a/src/core/target.h b/src/core/target.h
new file mode 100644
index 0000000..bb909d6
--- /dev/null
+++ b/src/core/target.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "unit.h"
+
+typedef struct Target Target;
+
+struct Target {
+ Unit meta;
+
+ TargetState state, deserialized_state;
+};
+
+extern const UnitVTable target_vtable;
+
+DEFINE_CAST(TARGET, Target);
diff --git a/src/core/timer.c b/src/core/timer.c
new file mode 100644
index 0000000..651f18b
--- /dev/null
+++ b/src/core/timer.c
@@ -0,0 +1,963 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "dbus-timer.h"
+#include "dbus-unit.h"
+#include "fs-util.h"
+#include "parse-util.h"
+#include "random-util.h"
+#include "serialize.h"
+#include "special.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "timer.h"
+#include "unit-name.h"
+#include "unit.h"
+#include "user-util.h"
+#include "virt.h"
+
+static const UnitActiveState state_translation_table[_TIMER_STATE_MAX] = {
+ [TIMER_DEAD] = UNIT_INACTIVE,
+ [TIMER_WAITING] = UNIT_ACTIVE,
+ [TIMER_RUNNING] = UNIT_ACTIVE,
+ [TIMER_ELAPSED] = UNIT_ACTIVE,
+ [TIMER_FAILED] = UNIT_FAILED
+};
+
+static int timer_dispatch(sd_event_source *s, uint64_t usec, void *userdata);
+
+static void timer_init(Unit *u) {
+ Timer *t = TIMER(u);
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ t->next_elapse_monotonic_or_boottime = USEC_INFINITY;
+ t->next_elapse_realtime = USEC_INFINITY;
+ t->accuracy_usec = u->manager->default_timer_accuracy_usec;
+ t->remain_after_elapse = true;
+}
+
+void timer_free_values(Timer *t) {
+ TimerValue *v;
+
+ assert(t);
+
+ while ((v = t->values)) {
+ LIST_REMOVE(value, t->values, v);
+ calendar_spec_free(v->calendar_spec);
+ free(v);
+ }
+}
+
+static void timer_done(Unit *u) {
+ Timer *t = TIMER(u);
+
+ assert(t);
+
+ timer_free_values(t);
+
+ t->monotonic_event_source = sd_event_source_unref(t->monotonic_event_source);
+ t->realtime_event_source = sd_event_source_unref(t->realtime_event_source);
+
+ free(t->stamp_path);
+}
+
+static int timer_verify(Timer *t) {
+ assert(t);
+ assert(UNIT(t)->load_state == UNIT_LOADED);
+
+ if (!t->values && !t->on_clock_change && !t->on_timezone_change) {
+ log_unit_error(UNIT(t), "Timer unit lacks value setting. Refusing.");
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
+static int timer_add_default_dependencies(Timer *t) {
+ int r;
+ TimerValue *v;
+
+ assert(t);
+
+ if (!UNIT(t)->default_dependencies)
+ return 0;
+
+ r = unit_add_dependency_by_name(UNIT(t), UNIT_BEFORE, SPECIAL_TIMERS_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ if (MANAGER_IS_SYSTEM(UNIT(t)->manager)) {
+ r = unit_add_two_dependencies_by_name(UNIT(t), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(value, v, t->values) {
+ if (v->base == TIMER_CALENDAR) {
+ r = unit_add_dependency_by_name(UNIT(t), UNIT_AFTER, SPECIAL_TIME_SYNC_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+ if (r < 0)
+ return r;
+ break;
+ }
+ }
+ }
+
+ return unit_add_two_dependencies_by_name(UNIT(t), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
+}
+
+static int timer_add_trigger_dependencies(Timer *t) {
+ Unit *x;
+ int r;
+
+ assert(t);
+
+ if (!hashmap_isempty(UNIT(t)->dependencies[UNIT_TRIGGERS]))
+ return 0;
+
+ r = unit_load_related_unit(UNIT(t), ".service", &x);
+ if (r < 0)
+ return r;
+
+ return unit_add_two_dependencies(UNIT(t), UNIT_BEFORE, UNIT_TRIGGERS, x, true, UNIT_DEPENDENCY_IMPLICIT);
+}
+
+static int timer_setup_persistent(Timer *t) {
+ int r;
+
+ assert(t);
+
+ if (!t->persistent)
+ return 0;
+
+ if (MANAGER_IS_SYSTEM(UNIT(t)->manager)) {
+
+ r = unit_require_mounts_for(UNIT(t), "/var/lib/systemd/timers", UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+
+ t->stamp_path = strjoin("/var/lib/systemd/timers/stamp-", UNIT(t)->id);
+ } else {
+ const char *e;
+
+ e = getenv("XDG_DATA_HOME");
+ if (e)
+ t->stamp_path = strjoin(e, "/systemd/timers/stamp-", UNIT(t)->id);
+ else {
+
+ _cleanup_free_ char *h = NULL;
+
+ r = get_home_dir(&h);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(t), r, "Failed to determine home directory: %m");
+
+ t->stamp_path = strjoin(h, "/.local/share/systemd/timers/stamp-", UNIT(t)->id);
+ }
+ }
+
+ if (!t->stamp_path)
+ return log_oom();
+
+ return 0;
+}
+
+static uint64_t timer_get_fixed_delay_hash(Timer *t) {
+ static const uint8_t hash_key[] = {
+ 0x51, 0x0a, 0xdb, 0x76, 0x29, 0x51, 0x42, 0xc2,
+ 0x80, 0x35, 0xea, 0xe6, 0x8e, 0x3a, 0x37, 0xbd
+ };
+
+ struct siphash state;
+ sd_id128_t machine_id;
+ uid_t uid;
+ int r;
+
+ assert(t);
+
+ uid = getuid();
+ r = sd_id128_get_machine(&machine_id);
+ if (r < 0) {
+ log_unit_debug_errno(UNIT(t), r,
+ "Failed to get machine ID for the fixed delay calculation, proceeding with 0: %m");
+ machine_id = SD_ID128_NULL;
+ }
+
+ siphash24_init(&state, hash_key);
+ siphash24_compress(&machine_id, sizeof(sd_id128_t), &state);
+ siphash24_compress_boolean(MANAGER_IS_SYSTEM(UNIT(t)->manager), &state);
+ siphash24_compress(&uid, sizeof(uid_t), &state);
+ siphash24_compress_string(UNIT(t)->id, &state);
+
+ return siphash24_finalize(&state);
+}
+
+static int timer_load(Unit *u) {
+ Timer *t = TIMER(u);
+ int r;
+
+ assert(u);
+ assert(u->load_state == UNIT_STUB);
+
+ r = unit_load_fragment_and_dropin(u, true);
+ if (r < 0)
+ return r;
+
+ if (u->load_state != UNIT_LOADED)
+ return 0;
+
+ /* This is a new unit? Then let's add in some extras */
+ r = timer_add_trigger_dependencies(t);
+ if (r < 0)
+ return r;
+
+ r = timer_setup_persistent(t);
+ if (r < 0)
+ return r;
+
+ r = timer_add_default_dependencies(t);
+ if (r < 0)
+ return r;
+
+ return timer_verify(t);
+}
+
+static void timer_dump(Unit *u, FILE *f, const char *prefix) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ Timer *t = TIMER(u);
+ Unit *trigger;
+ TimerValue *v;
+
+ trigger = UNIT_TRIGGER(u);
+
+ fprintf(f,
+ "%sTimer State: %s\n"
+ "%sResult: %s\n"
+ "%sUnit: %s\n"
+ "%sPersistent: %s\n"
+ "%sWakeSystem: %s\n"
+ "%sAccuracy: %s\n"
+ "%sRemainAfterElapse: %s\n"
+ "%sFixedRandomDelay: %s\n"
+ "%sOnClockChange: %s\n"
+ "%sOnTimeZoneChange: %s\n",
+ prefix, timer_state_to_string(t->state),
+ prefix, timer_result_to_string(t->result),
+ prefix, trigger ? trigger->id : "n/a",
+ prefix, yes_no(t->persistent),
+ prefix, yes_no(t->wake_system),
+ prefix, format_timespan(buf, sizeof(buf), t->accuracy_usec, 1),
+ prefix, yes_no(t->remain_after_elapse),
+ prefix, yes_no(t->fixed_random_delay),
+ prefix, yes_no(t->on_clock_change),
+ prefix, yes_no(t->on_timezone_change));
+
+ LIST_FOREACH(value, v, t->values) {
+
+ if (v->base == TIMER_CALENDAR) {
+ _cleanup_free_ char *p = NULL;
+
+ (void) calendar_spec_to_string(v->calendar_spec, &p);
+
+ fprintf(f,
+ "%s%s: %s\n",
+ prefix,
+ timer_base_to_string(v->base),
+ strna(p));
+ } else {
+ char timespan1[FORMAT_TIMESPAN_MAX];
+
+ fprintf(f,
+ "%s%s: %s\n",
+ prefix,
+ timer_base_to_string(v->base),
+ format_timespan(timespan1, sizeof(timespan1), v->value, 0));
+ }
+ }
+}
+
+static void timer_set_state(Timer *t, TimerState state) {
+ TimerState old_state;
+ assert(t);
+
+ if (t->state != state)
+ bus_unit_send_pending_change_signal(UNIT(t), false);
+
+ old_state = t->state;
+ t->state = state;
+
+ if (state != TIMER_WAITING) {
+ t->monotonic_event_source = sd_event_source_unref(t->monotonic_event_source);
+ t->realtime_event_source = sd_event_source_unref(t->realtime_event_source);
+ t->next_elapse_monotonic_or_boottime = USEC_INFINITY;
+ t->next_elapse_realtime = USEC_INFINITY;
+ }
+
+ if (state != old_state)
+ log_unit_debug(UNIT(t), "Changed %s -> %s", timer_state_to_string(old_state), timer_state_to_string(state));
+
+ unit_notify(UNIT(t), state_translation_table[old_state], state_translation_table[state], 0);
+}
+
+static void timer_enter_waiting(Timer *t, bool time_change);
+
+static int timer_coldplug(Unit *u) {
+ Timer *t = TIMER(u);
+
+ assert(t);
+ assert(t->state == TIMER_DEAD);
+
+ if (t->deserialized_state == t->state)
+ return 0;
+
+ if (t->deserialized_state == TIMER_WAITING)
+ timer_enter_waiting(t, false);
+ else
+ timer_set_state(t, t->deserialized_state);
+
+ return 0;
+}
+
+static void timer_enter_dead(Timer *t, TimerResult f) {
+ assert(t);
+
+ if (t->result == TIMER_SUCCESS)
+ t->result = f;
+
+ unit_log_result(UNIT(t), t->result == TIMER_SUCCESS, timer_result_to_string(t->result));
+ timer_set_state(t, t->result != TIMER_SUCCESS ? TIMER_FAILED : TIMER_DEAD);
+}
+
+static void timer_enter_elapsed(Timer *t, bool leave_around) {
+ assert(t);
+
+ /* If a unit is marked with RemainAfterElapse=yes we leave it
+ * around even after it elapsed once, so that starting it
+ * later again does not necessarily mean immediate
+ * retriggering. We unconditionally leave units with
+ * TIMER_UNIT_ACTIVE or TIMER_UNIT_INACTIVE triggers around,
+ * since they might be restarted automatically at any time
+ * later on. */
+
+ if (t->remain_after_elapse || leave_around)
+ timer_set_state(t, TIMER_ELAPSED);
+ else
+ timer_enter_dead(t, TIMER_SUCCESS);
+}
+
+static void add_random(Timer *t, usec_t *v) {
+ char s[FORMAT_TIMESPAN_MAX];
+ usec_t add;
+
+ assert(t);
+ assert(v);
+
+ if (t->random_usec == 0)
+ return;
+ if (*v == USEC_INFINITY)
+ return;
+
+ add = (t->fixed_random_delay ? timer_get_fixed_delay_hash(t) : random_u64()) % t->random_usec;
+
+ if (*v + add < *v) /* overflow */
+ *v = (usec_t) -2; /* Highest possible value, that is not USEC_INFINITY */
+ else
+ *v += add;
+
+ log_unit_debug(UNIT(t), "Adding %s random time.", format_timespan(s, sizeof(s), add, 0));
+}
+
+static void timer_enter_waiting(Timer *t, bool time_change) {
+ bool found_monotonic = false, found_realtime = false;
+ bool leave_around = false;
+ triple_timestamp ts;
+ TimerValue *v;
+ Unit *trigger;
+ int r;
+
+ assert(t);
+
+ trigger = UNIT_TRIGGER(UNIT(t));
+ if (!trigger) {
+ log_unit_error(UNIT(t), "Unit to trigger vanished.");
+ timer_enter_dead(t, TIMER_FAILURE_RESOURCES);
+ return;
+ }
+
+ triple_timestamp_get(&ts);
+ t->next_elapse_monotonic_or_boottime = t->next_elapse_realtime = 0;
+
+ LIST_FOREACH(value, v, t->values) {
+ if (v->disabled)
+ continue;
+
+ if (v->base == TIMER_CALENDAR) {
+ usec_t b, rebased;
+
+ /* If we know the last time this was
+ * triggered, schedule the job based relative
+ * to that. If we don't, just start from
+ * the activation time. */
+
+ if (t->last_trigger.realtime > 0)
+ b = t->last_trigger.realtime;
+ else {
+ if (state_translation_table[t->state] == UNIT_ACTIVE)
+ b = UNIT(t)->inactive_exit_timestamp.realtime;
+ else
+ b = ts.realtime;
+ }
+
+ r = calendar_spec_next_usec(v->calendar_spec, b, &v->next_elapse);
+ if (r < 0)
+ continue;
+
+ /* To make the delay due to RandomizedDelaySec= work even at boot, if the scheduled
+ * time has already passed, set the time when systemd first started as the scheduled
+ * time. Note that we base this on the monotonic timestamp of the boot, not the
+ * realtime one, since the wallclock might have been off during boot. */
+ rebased = map_clock_usec(UNIT(t)->manager->timestamps[MANAGER_TIMESTAMP_USERSPACE].monotonic,
+ CLOCK_MONOTONIC, CLOCK_REALTIME);
+ if (v->next_elapse < rebased)
+ v->next_elapse = rebased;
+
+ if (!found_realtime)
+ t->next_elapse_realtime = v->next_elapse;
+ else
+ t->next_elapse_realtime = MIN(t->next_elapse_realtime, v->next_elapse);
+
+ found_realtime = true;
+
+ } else {
+ usec_t base;
+
+ switch (v->base) {
+
+ case TIMER_ACTIVE:
+ if (state_translation_table[t->state] == UNIT_ACTIVE)
+ base = UNIT(t)->inactive_exit_timestamp.monotonic;
+ else
+ base = ts.monotonic;
+ break;
+
+ case TIMER_BOOT:
+ if (detect_container() <= 0) {
+ /* CLOCK_MONOTONIC equals the uptime on Linux */
+ base = 0;
+ break;
+ }
+ /* In a container we don't want to include the time the host
+ * was already up when the container started, so count from
+ * our own startup. */
+ _fallthrough_;
+ case TIMER_STARTUP:
+ base = UNIT(t)->manager->timestamps[MANAGER_TIMESTAMP_USERSPACE].monotonic;
+ break;
+
+ case TIMER_UNIT_ACTIVE:
+ leave_around = true;
+ base = MAX(trigger->inactive_exit_timestamp.monotonic, t->last_trigger.monotonic);
+ if (base <= 0)
+ continue;
+ break;
+
+ case TIMER_UNIT_INACTIVE:
+ leave_around = true;
+ base = MAX(trigger->inactive_enter_timestamp.monotonic, t->last_trigger.monotonic);
+ if (base <= 0)
+ continue;
+ break;
+
+ default:
+ assert_not_reached("Unknown timer base");
+ }
+
+ v->next_elapse = usec_add(usec_shift_clock(base, CLOCK_MONOTONIC, TIMER_MONOTONIC_CLOCK(t)), v->value);
+
+ if (dual_timestamp_is_set(&t->last_trigger) &&
+ !time_change &&
+ v->next_elapse < triple_timestamp_by_clock(&ts, TIMER_MONOTONIC_CLOCK(t)) &&
+ IN_SET(v->base, TIMER_ACTIVE, TIMER_BOOT, TIMER_STARTUP)) {
+ /* This is a one time trigger, disable it now */
+ v->disabled = true;
+ continue;
+ }
+
+ if (!found_monotonic)
+ t->next_elapse_monotonic_or_boottime = v->next_elapse;
+ else
+ t->next_elapse_monotonic_or_boottime = MIN(t->next_elapse_monotonic_or_boottime, v->next_elapse);
+
+ found_monotonic = true;
+ }
+ }
+
+ if (!found_monotonic && !found_realtime && !t->on_timezone_change && !t->on_clock_change) {
+ log_unit_debug(UNIT(t), "Timer is elapsed.");
+ timer_enter_elapsed(t, leave_around);
+ return;
+ }
+
+ if (found_monotonic) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ usec_t left;
+
+ add_random(t, &t->next_elapse_monotonic_or_boottime);
+
+ left = usec_sub_unsigned(t->next_elapse_monotonic_or_boottime, triple_timestamp_by_clock(&ts, TIMER_MONOTONIC_CLOCK(t)));
+ log_unit_debug(UNIT(t), "Monotonic timer elapses in %s.", format_timespan(buf, sizeof(buf), left, 0));
+
+ if (t->monotonic_event_source) {
+ r = sd_event_source_set_time(t->monotonic_event_source, t->next_elapse_monotonic_or_boottime);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_enabled(t->monotonic_event_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ goto fail;
+ } else {
+
+ r = sd_event_add_time(
+ UNIT(t)->manager->event,
+ &t->monotonic_event_source,
+ t->wake_system ? CLOCK_BOOTTIME_ALARM : CLOCK_MONOTONIC,
+ t->next_elapse_monotonic_or_boottime, t->accuracy_usec,
+ timer_dispatch, t);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(t->monotonic_event_source, "timer-monotonic");
+ }
+
+ } else if (t->monotonic_event_source) {
+
+ r = sd_event_source_set_enabled(t->monotonic_event_source, SD_EVENT_OFF);
+ if (r < 0)
+ goto fail;
+ }
+
+ if (found_realtime) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+
+ add_random(t, &t->next_elapse_realtime);
+
+ log_unit_debug(UNIT(t), "Realtime timer elapses at %s.", format_timestamp(buf, sizeof(buf), t->next_elapse_realtime));
+
+ if (t->realtime_event_source) {
+ r = sd_event_source_set_time(t->realtime_event_source, t->next_elapse_realtime);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_enabled(t->realtime_event_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ goto fail;
+ } else {
+ r = sd_event_add_time(
+ UNIT(t)->manager->event,
+ &t->realtime_event_source,
+ t->wake_system ? CLOCK_REALTIME_ALARM : CLOCK_REALTIME,
+ t->next_elapse_realtime, t->accuracy_usec,
+ timer_dispatch, t);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(t->realtime_event_source, "timer-realtime");
+ }
+
+ } else if (t->realtime_event_source) {
+
+ r = sd_event_source_set_enabled(t->realtime_event_source, SD_EVENT_OFF);
+ if (r < 0)
+ goto fail;
+ }
+
+ timer_set_state(t, TIMER_WAITING);
+ return;
+
+fail:
+ log_unit_warning_errno(UNIT(t), r, "Failed to enter waiting state: %m");
+ timer_enter_dead(t, TIMER_FAILURE_RESOURCES);
+}
+
+static void timer_enter_running(Timer *t) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ Unit *trigger;
+ int r;
+
+ assert(t);
+
+ /* Don't start job if we are supposed to go down */
+ if (unit_stop_pending(UNIT(t)))
+ return;
+
+ trigger = UNIT_TRIGGER(UNIT(t));
+ if (!trigger) {
+ log_unit_error(UNIT(t), "Unit to trigger vanished.");
+ timer_enter_dead(t, TIMER_FAILURE_RESOURCES);
+ return;
+ }
+
+ r = manager_add_job(UNIT(t)->manager, JOB_START, trigger, JOB_REPLACE, NULL, &error, NULL);
+ if (r < 0)
+ goto fail;
+
+ dual_timestamp_get(&t->last_trigger);
+
+ if (t->stamp_path)
+ touch_file(t->stamp_path, true, t->last_trigger.realtime, UID_INVALID, GID_INVALID, MODE_INVALID);
+
+ timer_set_state(t, TIMER_RUNNING);
+ return;
+
+fail:
+ log_unit_warning(UNIT(t), "Failed to queue unit startup job: %s", bus_error_message(&error, r));
+ timer_enter_dead(t, TIMER_FAILURE_RESOURCES);
+}
+
+static int timer_start(Unit *u) {
+ Timer *t = TIMER(u);
+ TimerValue *v;
+ int r;
+
+ assert(t);
+ assert(IN_SET(t->state, TIMER_DEAD, TIMER_FAILED));
+
+ r = unit_test_trigger_loaded(u);
+ if (r < 0)
+ return r;
+
+ r = unit_test_start_limit(u);
+ if (r < 0) {
+ timer_enter_dead(t, TIMER_FAILURE_START_LIMIT_HIT);
+ return r;
+ }
+
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
+ t->last_trigger = DUAL_TIMESTAMP_NULL;
+
+ /* Reenable all timers that depend on unit activation time */
+ LIST_FOREACH(value, v, t->values)
+ if (v->base == TIMER_ACTIVE)
+ v->disabled = false;
+
+ if (t->stamp_path) {
+ struct stat st;
+
+ if (stat(t->stamp_path, &st) >= 0) {
+ usec_t ft;
+
+ /* Load the file timestamp, but only if it is actually in the past. If it is in the future,
+ * something is wrong with the system clock. */
+
+ ft = timespec_load(&st.st_mtim);
+ if (ft < now(CLOCK_REALTIME))
+ t->last_trigger.realtime = ft;
+ else {
+ char z[FORMAT_TIMESTAMP_MAX];
+
+ log_unit_warning(u, "Not using persistent file timestamp %s as it is in the future.",
+ format_timestamp(z, sizeof(z), ft));
+ }
+
+ } else if (errno == ENOENT)
+ /* The timer has never run before,
+ * make sure a stamp file exists.
+ */
+ (void) touch_file(t->stamp_path, true, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID);
+ }
+
+ t->result = TIMER_SUCCESS;
+ timer_enter_waiting(t, false);
+ return 1;
+}
+
+static int timer_stop(Unit *u) {
+ Timer *t = TIMER(u);
+
+ assert(t);
+ assert(IN_SET(t->state, TIMER_WAITING, TIMER_RUNNING, TIMER_ELAPSED));
+
+ timer_enter_dead(t, TIMER_SUCCESS);
+ return 1;
+}
+
+static int timer_serialize(Unit *u, FILE *f, FDSet *fds) {
+ Timer *t = TIMER(u);
+
+ assert(u);
+ assert(f);
+ assert(fds);
+
+ (void) serialize_item(f, "state", timer_state_to_string(t->state));
+ (void) serialize_item(f, "result", timer_result_to_string(t->result));
+
+ if (t->last_trigger.realtime > 0)
+ (void) serialize_usec(f, "last-trigger-realtime", t->last_trigger.realtime);
+
+ if (t->last_trigger.monotonic > 0)
+ (void) serialize_usec(f, "last-trigger-monotonic", t->last_trigger.monotonic);
+
+ return 0;
+}
+
+static int timer_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
+ Timer *t = TIMER(u);
+
+ assert(u);
+ assert(key);
+ assert(value);
+ assert(fds);
+
+ if (streq(key, "state")) {
+ TimerState state;
+
+ state = timer_state_from_string(value);
+ if (state < 0)
+ log_unit_debug(u, "Failed to parse state value: %s", value);
+ else
+ t->deserialized_state = state;
+
+ } else if (streq(key, "result")) {
+ TimerResult f;
+
+ f = timer_result_from_string(value);
+ if (f < 0)
+ log_unit_debug(u, "Failed to parse result value: %s", value);
+ else if (f != TIMER_SUCCESS)
+ t->result = f;
+
+ } else if (streq(key, "last-trigger-realtime"))
+ (void) deserialize_usec(value, &t->last_trigger.realtime);
+ else if (streq(key, "last-trigger-monotonic"))
+ (void) deserialize_usec(value, &t->last_trigger.monotonic);
+ else
+ log_unit_debug(u, "Unknown serialization key: %s", key);
+
+ return 0;
+}
+
+_pure_ static UnitActiveState timer_active_state(Unit *u) {
+ assert(u);
+
+ return state_translation_table[TIMER(u)->state];
+}
+
+_pure_ static const char *timer_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return timer_state_to_string(TIMER(u)->state);
+}
+
+static int timer_dispatch(sd_event_source *s, uint64_t usec, void *userdata) {
+ Timer *t = TIMER(userdata);
+
+ assert(t);
+
+ if (t->state != TIMER_WAITING)
+ return 0;
+
+ log_unit_debug(UNIT(t), "Timer elapsed.");
+ timer_enter_running(t);
+ return 0;
+}
+
+static void timer_trigger_notify(Unit *u, Unit *other) {
+ Timer *t = TIMER(u);
+ TimerValue *v;
+
+ assert(u);
+ assert(other);
+
+ /* Filter out invocations with bogus state */
+ assert(UNIT_IS_LOAD_COMPLETE(other->load_state));
+
+ /* Reenable all timers that depend on unit state */
+ LIST_FOREACH(value, v, t->values)
+ if (IN_SET(v->base, TIMER_UNIT_ACTIVE, TIMER_UNIT_INACTIVE))
+ v->disabled = false;
+
+ switch (t->state) {
+
+ case TIMER_WAITING:
+ case TIMER_ELAPSED:
+
+ /* Recalculate sleep time */
+ timer_enter_waiting(t, false);
+ break;
+
+ case TIMER_RUNNING:
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(other))) {
+ log_unit_debug(UNIT(t), "Got notified about unit deactivation.");
+ timer_enter_waiting(t, false);
+ }
+ break;
+
+ case TIMER_DEAD:
+ case TIMER_FAILED:
+ break;
+
+ default:
+ assert_not_reached("Unknown timer state");
+ }
+}
+
+static void timer_reset_failed(Unit *u) {
+ Timer *t = TIMER(u);
+
+ assert(t);
+
+ if (t->state == TIMER_FAILED)
+ timer_set_state(t, TIMER_DEAD);
+
+ t->result = TIMER_SUCCESS;
+}
+
+static void timer_time_change(Unit *u) {
+ Timer *t = TIMER(u);
+ usec_t ts;
+
+ assert(u);
+
+ if (t->state != TIMER_WAITING)
+ return;
+
+ /* If we appear to have triggered in the future, the system clock must
+ * have been set backwards. So let's rewind our own clock and allow
+ * the future trigger(s) to happen again :). Exactly the same as when
+ * you start a timer unit with Persistent=yes. */
+ ts = now(CLOCK_REALTIME);
+ if (t->last_trigger.realtime > ts)
+ t->last_trigger.realtime = ts;
+
+ if (t->on_clock_change) {
+ log_unit_debug(u, "Time change, triggering activation.");
+ timer_enter_running(t);
+ } else {
+ log_unit_debug(u, "Time change, recalculating next elapse.");
+ timer_enter_waiting(t, true);
+ }
+}
+
+static void timer_timezone_change(Unit *u) {
+ Timer *t = TIMER(u);
+
+ assert(u);
+
+ if (t->state != TIMER_WAITING)
+ return;
+
+ if (t->on_timezone_change) {
+ log_unit_debug(u, "Timezone change, triggering activation.");
+ timer_enter_running(t);
+ } else {
+ log_unit_debug(u, "Timezone change, recalculating next elapse.");
+ timer_enter_waiting(t, false);
+ }
+}
+
+static int timer_clean(Unit *u, ExecCleanMask mask) {
+ Timer *t = TIMER(u);
+ int r;
+
+ assert(t);
+ assert(mask != 0);
+
+ if (t->state != TIMER_DEAD)
+ return -EBUSY;
+
+ if (!IN_SET(mask, EXEC_CLEAN_STATE))
+ return -EUNATCH;
+
+ r = timer_setup_persistent(t);
+ if (r < 0)
+ return r;
+
+ if (!t->stamp_path)
+ return -EUNATCH;
+
+ if (unlink(t->stamp_path) && errno != ENOENT)
+ return log_unit_error_errno(u, errno, "Failed to clean stamp file of timer: %m");
+
+ return 0;
+}
+
+static int timer_can_clean(Unit *u, ExecCleanMask *ret) {
+ Timer *t = TIMER(u);
+
+ assert(t);
+
+ *ret = t->persistent ? EXEC_CLEAN_STATE : 0;
+ return 0;
+}
+
+static const char* const timer_base_table[_TIMER_BASE_MAX] = {
+ [TIMER_ACTIVE] = "OnActiveSec",
+ [TIMER_BOOT] = "OnBootSec",
+ [TIMER_STARTUP] = "OnStartupSec",
+ [TIMER_UNIT_ACTIVE] = "OnUnitActiveSec",
+ [TIMER_UNIT_INACTIVE] = "OnUnitInactiveSec",
+ [TIMER_CALENDAR] = "OnCalendar"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(timer_base, TimerBase);
+
+static const char* const timer_result_table[_TIMER_RESULT_MAX] = {
+ [TIMER_SUCCESS] = "success",
+ [TIMER_FAILURE_RESOURCES] = "resources",
+ [TIMER_FAILURE_START_LIMIT_HIT] = "start-limit-hit",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(timer_result, TimerResult);
+
+const UnitVTable timer_vtable = {
+ .object_size = sizeof(Timer),
+
+ .sections =
+ "Unit\0"
+ "Timer\0"
+ "Install\0",
+ .private_section = "Timer",
+
+ .can_transient = true,
+ .can_fail = true,
+ .can_trigger = true,
+
+ .init = timer_init,
+ .done = timer_done,
+ .load = timer_load,
+
+ .coldplug = timer_coldplug,
+
+ .dump = timer_dump,
+
+ .start = timer_start,
+ .stop = timer_stop,
+
+ .clean = timer_clean,
+ .can_clean = timer_can_clean,
+
+ .serialize = timer_serialize,
+ .deserialize_item = timer_deserialize_item,
+
+ .active_state = timer_active_state,
+ .sub_state_to_string = timer_sub_state_to_string,
+
+ .trigger_notify = timer_trigger_notify,
+
+ .reset_failed = timer_reset_failed,
+ .time_change = timer_time_change,
+ .timezone_change = timer_timezone_change,
+
+ .bus_set_property = bus_timer_set_property,
+};
diff --git a/src/core/timer.h b/src/core/timer.h
new file mode 100644
index 0000000..14fa317
--- /dev/null
+++ b/src/core/timer.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct Timer Timer;
+
+#include "calendarspec.h"
+#include "unit.h"
+
+typedef enum TimerBase {
+ TIMER_ACTIVE,
+ TIMER_BOOT,
+ TIMER_STARTUP,
+ TIMER_UNIT_ACTIVE,
+ TIMER_UNIT_INACTIVE,
+ TIMER_CALENDAR,
+ _TIMER_BASE_MAX,
+ _TIMER_BASE_INVALID = -1
+} TimerBase;
+
+typedef struct TimerValue {
+ TimerBase base;
+ bool disabled;
+
+ usec_t value; /* only for monotonic events */
+ CalendarSpec *calendar_spec; /* only for calendar events */
+ usec_t next_elapse;
+
+ LIST_FIELDS(struct TimerValue, value);
+} TimerValue;
+
+typedef enum TimerResult {
+ TIMER_SUCCESS,
+ TIMER_FAILURE_RESOURCES,
+ TIMER_FAILURE_START_LIMIT_HIT,
+ _TIMER_RESULT_MAX,
+ _TIMER_RESULT_INVALID = -1
+} TimerResult;
+
+struct Timer {
+ Unit meta;
+
+ usec_t accuracy_usec;
+ usec_t random_usec;
+
+ LIST_HEAD(TimerValue, values);
+ usec_t next_elapse_realtime;
+ usec_t next_elapse_monotonic_or_boottime;
+ dual_timestamp last_trigger;
+
+ TimerState state, deserialized_state;
+
+ sd_event_source *monotonic_event_source;
+ sd_event_source *realtime_event_source;
+
+ TimerResult result;
+
+ bool persistent;
+ bool wake_system;
+ bool remain_after_elapse;
+ bool on_clock_change;
+ bool on_timezone_change;
+ bool fixed_random_delay;
+
+ char *stamp_path;
+};
+
+#define TIMER_MONOTONIC_CLOCK(t) ((t)->wake_system && clock_boottime_supported() ? CLOCK_BOOTTIME_ALARM : CLOCK_MONOTONIC)
+
+void timer_free_values(Timer *t);
+
+extern const UnitVTable timer_vtable;
+
+const char *timer_base_to_string(TimerBase i) _const_;
+TimerBase timer_base_from_string(const char *s) _pure_;
+
+const char* timer_result_to_string(TimerResult i) _const_;
+TimerResult timer_result_from_string(const char *s) _pure_;
+
+DEFINE_CAST(TIMER, Timer);
diff --git a/src/core/transaction.c b/src/core/transaction.c
new file mode 100644
index 0000000..ae77bae
--- /dev/null
+++ b/src/core/transaction.c
@@ -0,0 +1,1201 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "dbus-unit.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "transaction.h"
+
+static void transaction_unlink_job(Transaction *tr, Job *j, bool delete_dependencies);
+
+static void transaction_delete_job(Transaction *tr, Job *j, bool delete_dependencies) {
+ assert(tr);
+ assert(j);
+
+ /* Deletes one job from the transaction */
+
+ transaction_unlink_job(tr, j, delete_dependencies);
+
+ job_free(j);
+}
+
+static void transaction_delete_unit(Transaction *tr, Unit *u) {
+ Job *j;
+
+ /* Deletes all jobs associated with a certain unit from the
+ * transaction */
+
+ while ((j = hashmap_get(tr->jobs, u)))
+ transaction_delete_job(tr, j, true);
+}
+
+void transaction_abort(Transaction *tr) {
+ Job *j;
+
+ assert(tr);
+
+ while ((j = hashmap_first(tr->jobs)))
+ transaction_delete_job(tr, j, false);
+
+ assert(hashmap_isempty(tr->jobs));
+}
+
+static void transaction_find_jobs_that_matter_to_anchor(Job *j, unsigned generation) {
+ JobDependency *l;
+
+ /* A recursive sweep through the graph that marks all units
+ * that matter to the anchor job, i.e. are directly or
+ * indirectly a dependency of the anchor job via paths that
+ * are fully marked as mattering. */
+
+ j->matters_to_anchor = true;
+ j->generation = generation;
+
+ LIST_FOREACH(subject, l, j->subject_list) {
+
+ /* This link does not matter */
+ if (!l->matters)
+ continue;
+
+ /* This unit has already been marked */
+ if (l->object->generation == generation)
+ continue;
+
+ transaction_find_jobs_that_matter_to_anchor(l->object, generation);
+ }
+}
+
+static void transaction_merge_and_delete_job(Transaction *tr, Job *j, Job *other, JobType t) {
+ JobDependency *l, *last;
+
+ assert(j);
+ assert(other);
+ assert(j->unit == other->unit);
+ assert(!j->installed);
+
+ /* Merges 'other' into 'j' and then deletes 'other'. */
+
+ j->type = t;
+ j->state = JOB_WAITING;
+ j->irreversible = j->irreversible || other->irreversible;
+ j->matters_to_anchor = j->matters_to_anchor || other->matters_to_anchor;
+
+ /* Patch us in as new owner of the JobDependency objects */
+ last = NULL;
+ LIST_FOREACH(subject, l, other->subject_list) {
+ assert(l->subject == other);
+ l->subject = j;
+ last = l;
+ }
+
+ /* Merge both lists */
+ if (last) {
+ last->subject_next = j->subject_list;
+ if (j->subject_list)
+ j->subject_list->subject_prev = last;
+ j->subject_list = other->subject_list;
+ }
+
+ /* Patch us in as new owner of the JobDependency objects */
+ last = NULL;
+ LIST_FOREACH(object, l, other->object_list) {
+ assert(l->object == other);
+ l->object = j;
+ last = l;
+ }
+
+ /* Merge both lists */
+ if (last) {
+ last->object_next = j->object_list;
+ if (j->object_list)
+ j->object_list->object_prev = last;
+ j->object_list = other->object_list;
+ }
+
+ /* Kill the other job */
+ other->subject_list = NULL;
+ other->object_list = NULL;
+ transaction_delete_job(tr, other, true);
+}
+
+_pure_ static bool job_is_conflicted_by(Job *j) {
+ JobDependency *l;
+
+ assert(j);
+
+ /* Returns true if this job is pulled in by a least one
+ * ConflictedBy dependency. */
+
+ LIST_FOREACH(object, l, j->object_list)
+ if (l->conflicts)
+ return true;
+
+ return false;
+}
+
+static int delete_one_unmergeable_job(Transaction *tr, Job *j) {
+ Job *k;
+
+ assert(j);
+
+ /* Tries to delete one item in the linked list
+ * j->transaction_next->transaction_next->... that conflicts
+ * with another one, in an attempt to make an inconsistent
+ * transaction work. */
+
+ /* We rely here on the fact that if a merged with b does not
+ * merge with c, either a or b merge with c neither */
+ LIST_FOREACH(transaction, j, j)
+ LIST_FOREACH(transaction, k, j->transaction_next) {
+ Job *d;
+
+ /* Is this one mergeable? Then skip it */
+ if (job_type_is_mergeable(j->type, k->type))
+ continue;
+
+ /* Ok, we found two that conflict, let's see if we can
+ * drop one of them */
+ if (!j->matters_to_anchor && !k->matters_to_anchor) {
+
+ /* Both jobs don't matter, so let's
+ * find the one that is smarter to
+ * remove. Let's think positive and
+ * rather remove stops then starts --
+ * except if something is being
+ * stopped because it is conflicted by
+ * another unit in which case we
+ * rather remove the start. */
+
+ log_unit_debug(j->unit,
+ "Looking at job %s/%s conflicted_by=%s",
+ j->unit->id, job_type_to_string(j->type),
+ yes_no(j->type == JOB_STOP && job_is_conflicted_by(j)));
+ log_unit_debug(k->unit,
+ "Looking at job %s/%s conflicted_by=%s",
+ k->unit->id, job_type_to_string(k->type),
+ yes_no(k->type == JOB_STOP && job_is_conflicted_by(k)));
+
+ if (j->type == JOB_STOP) {
+
+ if (job_is_conflicted_by(j))
+ d = k;
+ else
+ d = j;
+
+ } else if (k->type == JOB_STOP) {
+
+ if (job_is_conflicted_by(k))
+ d = j;
+ else
+ d = k;
+ } else
+ d = j;
+
+ } else if (!j->matters_to_anchor)
+ d = j;
+ else if (!k->matters_to_anchor)
+ d = k;
+ else
+ return -ENOEXEC;
+
+ /* Ok, we can drop one, so let's do so. */
+ log_unit_debug(d->unit,
+ "Fixing conflicting jobs %s/%s,%s/%s by deleting job %s/%s",
+ j->unit->id, job_type_to_string(j->type),
+ k->unit->id, job_type_to_string(k->type),
+ d->unit->id, job_type_to_string(d->type));
+ transaction_delete_job(tr, d, true);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int transaction_merge_jobs(Transaction *tr, sd_bus_error *e) {
+ Job *j;
+ int r;
+
+ assert(tr);
+
+ /* First step, check whether any of the jobs for one specific
+ * task conflict. If so, try to drop one of them. */
+ HASHMAP_FOREACH(j, tr->jobs) {
+ JobType t;
+ Job *k;
+
+ t = j->type;
+ LIST_FOREACH(transaction, k, j->transaction_next) {
+ if (job_type_merge_and_collapse(&t, k->type, j->unit) >= 0)
+ continue;
+
+ /* OK, we could not merge all jobs for this
+ * action. Let's see if we can get rid of one
+ * of them */
+
+ r = delete_one_unmergeable_job(tr, j);
+ if (r >= 0)
+ /* Ok, we managed to drop one, now
+ * let's ask our callers to call us
+ * again after garbage collecting */
+ return -EAGAIN;
+
+ /* We couldn't merge anything. Failure */
+ return sd_bus_error_setf(e, BUS_ERROR_TRANSACTION_JOBS_CONFLICTING,
+ "Transaction contains conflicting jobs '%s' and '%s' for %s. "
+ "Probably contradicting requirement dependencies configured.",
+ job_type_to_string(t),
+ job_type_to_string(k->type),
+ k->unit->id);
+ }
+ }
+
+ /* Second step, merge the jobs. */
+ HASHMAP_FOREACH(j, tr->jobs) {
+ JobType t = j->type;
+ Job *k;
+
+ /* Merge all transaction jobs for j->unit */
+ LIST_FOREACH(transaction, k, j->transaction_next)
+ assert_se(job_type_merge_and_collapse(&t, k->type, j->unit) == 0);
+
+ while ((k = j->transaction_next)) {
+ if (tr->anchor_job == k) {
+ transaction_merge_and_delete_job(tr, k, j, t);
+ j = k;
+ } else
+ transaction_merge_and_delete_job(tr, j, k, t);
+ }
+
+ assert(!j->transaction_next);
+ assert(!j->transaction_prev);
+ }
+
+ return 0;
+}
+
+static void transaction_drop_redundant(Transaction *tr) {
+ bool again;
+
+ /* Goes through the transaction and removes all jobs of the units whose jobs are all noops. If not
+ * all of a unit's jobs are redundant, they are kept. */
+
+ assert(tr);
+
+ do {
+ Job *j;
+
+ again = false;
+
+ HASHMAP_FOREACH(j, tr->jobs) {
+ bool keep = false;
+ Job *k;
+
+ LIST_FOREACH(transaction, k, j)
+ if (tr->anchor_job == k ||
+ !job_type_is_redundant(k->type, unit_active_state(k->unit)) ||
+ (k->unit->job && job_type_is_conflicting(k->type, k->unit->job->type))) {
+ keep = true;
+ break;
+ }
+
+ if (!keep) {
+ log_trace("Found redundant job %s/%s, dropping from transaction.",
+ j->unit->id, job_type_to_string(j->type));
+ transaction_delete_job(tr, j, false);
+ again = true;
+ break;
+ }
+ }
+ } while (again);
+}
+
+_pure_ static bool unit_matters_to_anchor(Unit *u, Job *j) {
+ assert(u);
+ assert(!j->transaction_prev);
+
+ /* Checks whether at least one of the jobs for this unit
+ * matters to the anchor. */
+
+ LIST_FOREACH(transaction, j, j)
+ if (j->matters_to_anchor)
+ return true;
+
+ return false;
+}
+
+static char* merge_unit_ids(const char* unit_log_field, char **pairs) {
+ char **unit_id, **job_type, *ans = NULL;
+ size_t alloc = 0, size = 0, next;
+
+ STRV_FOREACH_PAIR(unit_id, job_type, pairs) {
+ next = strlen(unit_log_field) + strlen(*unit_id);
+ if (!GREEDY_REALLOC(ans, alloc, size + next + 1))
+ return mfree(ans);
+
+ sprintf(ans + size, "%s%s", unit_log_field, *unit_id);
+ if (*(unit_id+1))
+ ans[size + next] = '\n';
+ size += next + 1;
+ }
+
+ return ans;
+}
+
+static int transaction_verify_order_one(Transaction *tr, Job *j, Job *from, unsigned generation, sd_bus_error *e) {
+ Unit *u;
+ void *v;
+ int r;
+ static const UnitDependency directions[] = {
+ UNIT_BEFORE,
+ UNIT_AFTER,
+ };
+ size_t d;
+
+ assert(tr);
+ assert(j);
+ assert(!j->transaction_prev);
+
+ /* Does a recursive sweep through the ordering graph, looking
+ * for a cycle. If we find a cycle we try to break it. */
+
+ /* Have we seen this before? */
+ if (j->generation == generation) {
+ Job *k, *delete = NULL;
+ _cleanup_free_ char **array = NULL, *unit_ids = NULL;
+ char **unit_id, **job_type;
+
+ /* If the marker is NULL we have been here already and
+ * decided the job was loop-free from here. Hence
+ * shortcut things and return right-away. */
+ if (!j->marker)
+ return 0;
+
+ /* So, the marker is not NULL and we already have been here. We have
+ * a cycle. Let's try to break it. We go backwards in our path and
+ * try to find a suitable job to remove. We use the marker to find
+ * our way back, since smart how we are we stored our way back in
+ * there. */
+
+ for (k = from; k; k = ((k->generation == generation && k->marker != k) ? k->marker : NULL)) {
+
+ /* For logging below */
+ if (strv_push_pair(&array, k->unit->id, (char*) job_type_to_string(k->type)) < 0)
+ log_oom();
+
+ if (!delete && hashmap_get(tr->jobs, k->unit) && !unit_matters_to_anchor(k->unit, k))
+ /* Ok, we can drop this one, so let's do so. */
+ delete = k;
+
+ /* Check if this in fact was the beginning of the cycle */
+ if (k == j)
+ break;
+ }
+
+ unit_ids = merge_unit_ids(j->manager->unit_log_field, array); /* ignore error */
+
+ STRV_FOREACH_PAIR(unit_id, job_type, array)
+ /* logging for j not k here to provide a consistent narrative */
+ log_struct(LOG_WARNING,
+ "MESSAGE=%s: Found %s on %s/%s",
+ j->unit->id,
+ unit_id == array ? "ordering cycle" : "dependency",
+ *unit_id, *job_type,
+ unit_ids);
+
+ if (delete) {
+ const char *status;
+ /* logging for j not k here to provide a consistent narrative */
+ log_struct(LOG_ERR,
+ "MESSAGE=%s: Job %s/%s deleted to break ordering cycle starting with %s/%s",
+ j->unit->id, delete->unit->id, job_type_to_string(delete->type),
+ j->unit->id, job_type_to_string(j->type),
+ unit_ids);
+
+ if (log_get_show_color())
+ status = ANSI_HIGHLIGHT_RED " SKIP " ANSI_NORMAL;
+ else
+ status = " SKIP ";
+
+ unit_status_printf(delete->unit,
+ STATUS_TYPE_NOTICE,
+ status,
+ "Ordering cycle found, skipping %s");
+ transaction_delete_unit(tr, delete->unit);
+ return -EAGAIN;
+ }
+
+ log_struct(LOG_ERR,
+ "MESSAGE=%s: Unable to break cycle starting with %s/%s",
+ j->unit->id, j->unit->id, job_type_to_string(j->type),
+ unit_ids);
+
+ return sd_bus_error_setf(e, BUS_ERROR_TRANSACTION_ORDER_IS_CYCLIC,
+ "Transaction order is cyclic. See system logs for details.");
+ }
+
+ /* Make the marker point to where we come from, so that we can
+ * find our way backwards if we want to break a cycle. We use
+ * a special marker for the beginning: we point to
+ * ourselves. */
+ j->marker = from ? from : j;
+ j->generation = generation;
+
+ /* Actual ordering of jobs depends on the unit ordering dependency and job types. We need to traverse
+ * the graph over 'before' edges in the actual job execution order. We traverse over both unit
+ * ordering dependencies and we test with job_compare() whether it is the 'before' edge in the job
+ * execution ordering. */
+ for (d = 0; d < ELEMENTSOF(directions); d++) {
+ HASHMAP_FOREACH_KEY(v, u, j->unit->dependencies[directions[d]]) {
+ Job *o;
+
+ /* Is there a job for this unit? */
+ o = hashmap_get(tr->jobs, u);
+ if (!o) {
+ /* Ok, there is no job for this in the
+ * transaction, but maybe there is already one
+ * running? */
+ o = u->job;
+ if (!o)
+ continue;
+ }
+
+ /* Cut traversing if the job j is not really *before* o. */
+ if (job_compare(j, o, directions[d]) >= 0)
+ continue;
+
+ r = transaction_verify_order_one(tr, o, j, generation, e);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* Ok, let's backtrack, and remember that this entry is not on
+ * our path anymore. */
+ j->marker = NULL;
+
+ return 0;
+}
+
+static int transaction_verify_order(Transaction *tr, unsigned *generation, sd_bus_error *e) {
+ Job *j;
+ int r;
+ unsigned g;
+
+ assert(tr);
+ assert(generation);
+
+ /* Check if the ordering graph is cyclic. If it is, try to fix
+ * that up by dropping one of the jobs. */
+
+ g = (*generation)++;
+
+ HASHMAP_FOREACH(j, tr->jobs) {
+ r = transaction_verify_order_one(tr, j, NULL, g, e);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static void transaction_collect_garbage(Transaction *tr) {
+ bool again;
+
+ assert(tr);
+
+ /* Drop jobs that are not required by any other job */
+
+ do {
+ Job *j;
+
+ again = false;
+
+ HASHMAP_FOREACH(j, tr->jobs) {
+ if (tr->anchor_job == j)
+ continue;
+
+ if (!j->object_list) {
+ log_trace("Garbage collecting job %s/%s", j->unit->id, job_type_to_string(j->type));
+ transaction_delete_job(tr, j, true);
+ again = true;
+ break;
+ }
+
+ log_trace("Keeping job %s/%s because of %s/%s",
+ j->unit->id, job_type_to_string(j->type),
+ j->object_list->subject ? j->object_list->subject->unit->id : "root",
+ j->object_list->subject ? job_type_to_string(j->object_list->subject->type) : "root");
+ }
+
+ } while (again);
+}
+
+static int transaction_is_destructive(Transaction *tr, JobMode mode, sd_bus_error *e) {
+ Job *j;
+
+ assert(tr);
+
+ /* Checks whether applying this transaction means that
+ * existing jobs would be replaced */
+
+ HASHMAP_FOREACH(j, tr->jobs) {
+
+ /* Assume merged */
+ assert(!j->transaction_prev);
+ assert(!j->transaction_next);
+
+ if (j->unit->job && (mode == JOB_FAIL || j->unit->job->irreversible) &&
+ job_type_is_conflicting(j->unit->job->type, j->type))
+ return sd_bus_error_setf(e, BUS_ERROR_TRANSACTION_IS_DESTRUCTIVE,
+ "Transaction for %s/%s is destructive (%s has '%s' job queued, but '%s' is included in transaction).",
+ tr->anchor_job->unit->id, job_type_to_string(tr->anchor_job->type),
+ j->unit->id, job_type_to_string(j->unit->job->type), job_type_to_string(j->type));
+ }
+
+ return 0;
+}
+
+static void transaction_minimize_impact(Transaction *tr) {
+ Job *j;
+
+ assert(tr);
+
+ /* Drops all unnecessary jobs that reverse already active jobs
+ * or that stop a running service. */
+
+rescan:
+ HASHMAP_FOREACH(j, tr->jobs) {
+ LIST_FOREACH(transaction, j, j) {
+ bool stops_running_service, changes_existing_job;
+
+ /* If it matters, we shouldn't drop it */
+ if (j->matters_to_anchor)
+ continue;
+
+ /* Would this stop a running service?
+ * Would this change an existing job?
+ * If so, let's drop this entry */
+
+ stops_running_service =
+ j->type == JOB_STOP && UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(j->unit));
+
+ changes_existing_job =
+ j->unit->job &&
+ job_type_is_conflicting(j->type, j->unit->job->type);
+
+ if (!stops_running_service && !changes_existing_job)
+ continue;
+
+ if (stops_running_service)
+ log_unit_debug(j->unit,
+ "%s/%s would stop a running service.",
+ j->unit->id, job_type_to_string(j->type));
+
+ if (changes_existing_job)
+ log_unit_debug(j->unit,
+ "%s/%s would change existing job.",
+ j->unit->id, job_type_to_string(j->type));
+
+ /* Ok, let's get rid of this */
+ log_unit_debug(j->unit,
+ "Deleting %s/%s to minimize impact.",
+ j->unit->id, job_type_to_string(j->type));
+
+ transaction_delete_job(tr, j, true);
+ goto rescan;
+ }
+ }
+}
+
+static int transaction_apply(
+ Transaction *tr,
+ Manager *m,
+ JobMode mode,
+ Set *affected_jobs) {
+
+ Job *j;
+ int r;
+
+ /* Moves the transaction jobs to the set of active jobs */
+
+ if (IN_SET(mode, JOB_ISOLATE, JOB_FLUSH)) {
+
+ /* When isolating first kill all installed jobs which
+ * aren't part of the new transaction */
+ HASHMAP_FOREACH(j, m->jobs) {
+ assert(j->installed);
+
+ if (j->unit->ignore_on_isolate)
+ continue;
+
+ if (hashmap_get(tr->jobs, j->unit))
+ continue;
+
+ /* Not invalidating recursively. Avoids triggering
+ * OnFailure= actions of dependent jobs. Also avoids
+ * invalidating our iterator. */
+ job_finish_and_invalidate(j, JOB_CANCELED, false, false);
+ }
+ }
+
+ HASHMAP_FOREACH(j, tr->jobs) {
+ /* Assume merged */
+ assert(!j->transaction_prev);
+ assert(!j->transaction_next);
+
+ r = hashmap_ensure_allocated(&m->jobs, NULL);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(m->jobs, UINT32_TO_PTR(j->id), j);
+ if (r < 0)
+ goto rollback;
+ }
+
+ while ((j = hashmap_steal_first(tr->jobs))) {
+ Job *installed_job;
+
+ /* Clean the job dependencies */
+ transaction_unlink_job(tr, j, false);
+
+ installed_job = job_install(j);
+ if (installed_job != j) {
+ /* j has been merged into a previously installed job */
+ if (tr->anchor_job == j)
+ tr->anchor_job = installed_job;
+ hashmap_remove(m->jobs, UINT32_TO_PTR(j->id));
+ job_free(j);
+ j = installed_job;
+ }
+
+ job_add_to_run_queue(j);
+ job_add_to_dbus_queue(j);
+ job_start_timer(j, false);
+ job_shutdown_magic(j);
+
+ /* When 'affected' is specified, let's track all in it all jobs that were touched because of
+ * this transaction. */
+ if (affected_jobs)
+ (void) set_put(affected_jobs, j);
+ }
+
+ return 0;
+
+rollback:
+
+ HASHMAP_FOREACH(j, tr->jobs)
+ hashmap_remove(m->jobs, UINT32_TO_PTR(j->id));
+
+ return r;
+}
+
+int transaction_activate(
+ Transaction *tr,
+ Manager *m,
+ JobMode mode,
+ Set *affected_jobs,
+ sd_bus_error *e) {
+
+ Job *j;
+ int r;
+ unsigned generation = 1;
+
+ assert(tr);
+
+ /* This applies the changes recorded in tr->jobs to
+ * the actual list of jobs, if possible. */
+
+ /* Reset the generation counter of all installed jobs. The detection of cycles
+ * looks at installed jobs. If they had a non-zero generation from some previous
+ * walk of the graph, the algorithm would break. */
+ HASHMAP_FOREACH(j, m->jobs)
+ j->generation = 0;
+
+ /* First step: figure out which jobs matter */
+ transaction_find_jobs_that_matter_to_anchor(tr->anchor_job, generation++);
+
+ /* Second step: Try not to stop any running services if
+ * we don't have to. Don't try to reverse running
+ * jobs if we don't have to. */
+ if (mode == JOB_FAIL)
+ transaction_minimize_impact(tr);
+
+ /* Third step: Drop redundant jobs */
+ transaction_drop_redundant(tr);
+
+ for (;;) {
+ /* Fourth step: Let's remove unneeded jobs that might
+ * be lurking. */
+ if (mode != JOB_ISOLATE)
+ transaction_collect_garbage(tr);
+
+ /* Fifth step: verify order makes sense and correct
+ * cycles if necessary and possible */
+ r = transaction_verify_order(tr, &generation, e);
+ if (r >= 0)
+ break;
+
+ if (r != -EAGAIN)
+ return log_warning_errno(r, "Requested transaction contains an unfixable cyclic ordering dependency: %s", bus_error_message(e, r));
+
+ /* Let's see if the resulting transaction ordering
+ * graph is still cyclic... */
+ }
+
+ for (;;) {
+ /* Sixth step: let's drop unmergeable entries if
+ * necessary and possible, merge entries we can
+ * merge */
+ r = transaction_merge_jobs(tr, e);
+ if (r >= 0)
+ break;
+
+ if (r != -EAGAIN)
+ return log_warning_errno(r, "Requested transaction contains unmergeable jobs: %s", bus_error_message(e, r));
+
+ /* Seventh step: an entry got dropped, let's garbage
+ * collect its dependencies. */
+ if (mode != JOB_ISOLATE)
+ transaction_collect_garbage(tr);
+
+ /* Let's see if the resulting transaction still has
+ * unmergeable entries ... */
+ }
+
+ /* Eights step: Drop redundant jobs again, if the merging now allows us to drop more. */
+ transaction_drop_redundant(tr);
+
+ /* Ninth step: check whether we can actually apply this */
+ r = transaction_is_destructive(tr, mode, e);
+ if (r < 0)
+ return log_notice_errno(r, "Requested transaction contradicts existing jobs: %s", bus_error_message(e, r));
+
+ /* Tenth step: apply changes */
+ r = transaction_apply(tr, m, mode, affected_jobs);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to apply transaction: %m");
+
+ assert(hashmap_isempty(tr->jobs));
+
+ if (!hashmap_isempty(m->jobs)) {
+ /* Are there any jobs now? Then make sure we have the
+ * idle pipe around. We don't really care too much
+ * whether this works or not, as the idle pipe is a
+ * feature for cosmetics, not actually useful for
+ * anything beyond that. */
+
+ if (m->idle_pipe[0] < 0 && m->idle_pipe[1] < 0 &&
+ m->idle_pipe[2] < 0 && m->idle_pipe[3] < 0) {
+ (void) pipe2(m->idle_pipe, O_NONBLOCK|O_CLOEXEC);
+ (void) pipe2(m->idle_pipe + 2, O_NONBLOCK|O_CLOEXEC);
+ }
+ }
+
+ return 0;
+}
+
+static Job* transaction_add_one_job(Transaction *tr, JobType type, Unit *unit, bool *is_new) {
+ Job *j, *f;
+
+ assert(tr);
+ assert(unit);
+
+ /* Looks for an existing prospective job and returns that. If
+ * it doesn't exist it is created and added to the prospective
+ * jobs list. */
+
+ f = hashmap_get(tr->jobs, unit);
+
+ LIST_FOREACH(transaction, j, f) {
+ assert(j->unit == unit);
+
+ if (j->type == type) {
+ if (is_new)
+ *is_new = false;
+ return j;
+ }
+ }
+
+ j = job_new(unit, type);
+ if (!j)
+ return NULL;
+
+ j->generation = 0;
+ j->marker = NULL;
+ j->matters_to_anchor = false;
+ j->irreversible = tr->irreversible;
+
+ LIST_PREPEND(transaction, f, j);
+
+ if (hashmap_replace(tr->jobs, unit, f) < 0) {
+ LIST_REMOVE(transaction, f, j);
+ job_free(j);
+ return NULL;
+ }
+
+ if (is_new)
+ *is_new = true;
+
+ log_trace("Added job %s/%s to transaction.", unit->id, job_type_to_string(type));
+
+ return j;
+}
+
+static void transaction_unlink_job(Transaction *tr, Job *j, bool delete_dependencies) {
+ assert(tr);
+ assert(j);
+
+ if (j->transaction_prev)
+ j->transaction_prev->transaction_next = j->transaction_next;
+ else if (j->transaction_next)
+ hashmap_replace(tr->jobs, j->unit, j->transaction_next);
+ else
+ hashmap_remove_value(tr->jobs, j->unit, j);
+
+ if (j->transaction_next)
+ j->transaction_next->transaction_prev = j->transaction_prev;
+
+ j->transaction_prev = j->transaction_next = NULL;
+
+ while (j->subject_list)
+ job_dependency_free(j->subject_list);
+
+ while (j->object_list) {
+ Job *other = j->object_list->matters ? j->object_list->subject : NULL;
+
+ job_dependency_free(j->object_list);
+
+ if (other && delete_dependencies) {
+ log_unit_debug(other->unit,
+ "Deleting job %s/%s as dependency of job %s/%s",
+ other->unit->id, job_type_to_string(other->type),
+ j->unit->id, job_type_to_string(j->type));
+ transaction_delete_job(tr, other, delete_dependencies);
+ }
+ }
+}
+
+void transaction_add_propagate_reload_jobs(Transaction *tr, Unit *unit, Job *by, bool ignore_order, sd_bus_error *e) {
+ JobType nt;
+ Unit *dep;
+ void *v;
+ int r;
+
+ assert(tr);
+ assert(unit);
+
+ HASHMAP_FOREACH_KEY(v, dep, unit->dependencies[UNIT_PROPAGATES_RELOAD_TO]) {
+ nt = job_type_collapse(JOB_TRY_RELOAD, dep);
+ if (nt == JOB_NOP)
+ continue;
+
+ r = transaction_add_job_and_dependencies(tr, nt, dep, by, false, false, false, ignore_order, e);
+ if (r < 0) {
+ log_unit_warning(dep,
+ "Cannot add dependency reload job, ignoring: %s",
+ bus_error_message(e, r));
+ sd_bus_error_free(e);
+ }
+ }
+}
+
+int transaction_add_job_and_dependencies(
+ Transaction *tr,
+ JobType type,
+ Unit *unit,
+ Job *by,
+ bool matters,
+ bool conflicts,
+ bool ignore_requirements,
+ bool ignore_order,
+ sd_bus_error *e) {
+
+ bool is_new;
+ Unit *dep;
+ Job *ret;
+ void *v;
+ int r;
+
+ assert(tr);
+ assert(type < _JOB_TYPE_MAX);
+ assert(type < _JOB_TYPE_MAX_IN_TRANSACTION);
+ assert(unit);
+
+ /* Before adding jobs for this unit, let's ensure that its state has been loaded
+ * This matters when jobs are spawned as part of coldplugging itself (see e. g. path_coldplug()).
+ * This way, we "recursively" coldplug units, ensuring that we do not look at state of
+ * not-yet-coldplugged units. */
+ if (MANAGER_IS_RELOADING(unit->manager))
+ unit_coldplug(unit);
+
+ if (by)
+ log_trace("Pulling in %s/%s from %s/%s", unit->id, job_type_to_string(type), by->unit->id, job_type_to_string(by->type));
+
+ /* Safety check that the unit is a valid state, i.e. not in UNIT_STUB or UNIT_MERGED which should only be set
+ * temporarily. */
+ if (!UNIT_IS_LOAD_COMPLETE(unit->load_state))
+ return sd_bus_error_setf(e, BUS_ERROR_LOAD_FAILED, "Unit %s is not loaded properly.", unit->id);
+
+ if (type != JOB_STOP) {
+ r = bus_unit_validate_load_state(unit, e);
+ /* The time-based cache allows to start new units without daemon-reload,
+ * but if they are already referenced (because of dependencies or ordering)
+ * then we have to force a load of the fragment. As an optimization, check
+ * first if anything in the usual paths was modified since the last time
+ * the cache was loaded. Also check if the last time an attempt to load the
+ * unit was made was before the most recent cache refresh, so that we know
+ * we need to try again — even if the cache is current, it might have been
+ * updated in a different context before we had a chance to retry loading
+ * this particular unit.
+ *
+ * Given building up the transaction is a synchronous operation, attempt
+ * to load the unit immediately. */
+ if (r < 0 && manager_unit_cache_should_retry_load(unit)) {
+ sd_bus_error_free(e);
+ unit->load_state = UNIT_STUB;
+ r = unit_load(unit);
+ if (r < 0 || unit->load_state == UNIT_STUB)
+ unit->load_state = UNIT_NOT_FOUND;
+ r = bus_unit_validate_load_state(unit, e);
+ }
+ if (r < 0)
+ return r;
+ }
+
+ if (!unit_job_is_applicable(unit, type))
+ return sd_bus_error_setf(e, BUS_ERROR_JOB_TYPE_NOT_APPLICABLE,
+ "Job type %s is not applicable for unit %s.",
+ job_type_to_string(type), unit->id);
+
+ /* First add the job. */
+ ret = transaction_add_one_job(tr, type, unit, &is_new);
+ if (!ret)
+ return -ENOMEM;
+
+ ret->ignore_order = ret->ignore_order || ignore_order;
+
+ /* Then, add a link to the job. */
+ if (by) {
+ if (!job_dependency_new(by, ret, matters, conflicts))
+ return -ENOMEM;
+ } else {
+ /* If the job has no parent job, it is the anchor job. */
+ assert(!tr->anchor_job);
+ tr->anchor_job = ret;
+ }
+
+ if (is_new && !ignore_requirements && type != JOB_NOP) {
+ Set *following;
+
+ /* If we are following some other unit, make sure we
+ * add all dependencies of everybody following. */
+ if (unit_following_set(ret->unit, &following) > 0) {
+ SET_FOREACH(dep, following) {
+ r = transaction_add_job_and_dependencies(tr, type, dep, ret, false, false, false, ignore_order, e);
+ if (r < 0) {
+ log_unit_full_errno(dep, r == -ERFKILL ? LOG_INFO : LOG_WARNING, r,
+ "Cannot add dependency job, ignoring: %s",
+ bus_error_message(e, r));
+ sd_bus_error_free(e);
+ }
+ }
+
+ set_free(following);
+ }
+
+ /* Finally, recursively add in all dependencies. */
+ if (IN_SET(type, JOB_START, JOB_RESTART)) {
+ HASHMAP_FOREACH_KEY(v, dep, ret->unit->dependencies[UNIT_REQUIRES]) {
+ r = transaction_add_job_and_dependencies(tr, JOB_START, dep, ret, true, false, false, ignore_order, e);
+ if (r < 0) {
+ if (r != -EBADR) /* job type not applicable */
+ goto fail;
+
+ sd_bus_error_free(e);
+ }
+ }
+
+ HASHMAP_FOREACH_KEY(v, dep, ret->unit->dependencies[UNIT_BINDS_TO]) {
+ r = transaction_add_job_and_dependencies(tr, JOB_START, dep, ret, true, false, false, ignore_order, e);
+ if (r < 0) {
+ if (r != -EBADR) /* job type not applicable */
+ goto fail;
+
+ sd_bus_error_free(e);
+ }
+ }
+
+ HASHMAP_FOREACH_KEY(v, dep, ret->unit->dependencies[UNIT_WANTS]) {
+ r = transaction_add_job_and_dependencies(tr, JOB_START, dep, ret, false, false, false, ignore_order, e);
+ if (r < 0) {
+ /* unit masked, job type not applicable and unit not found are not considered as errors. */
+ log_unit_full_errno(dep,
+ IN_SET(r, -ERFKILL, -EBADR, -ENOENT) ? LOG_DEBUG : LOG_WARNING,
+ r, "Cannot add dependency job, ignoring: %s",
+ bus_error_message(e, r));
+ sd_bus_error_free(e);
+ }
+ }
+
+ HASHMAP_FOREACH_KEY(v, dep, ret->unit->dependencies[UNIT_REQUISITE]) {
+ r = transaction_add_job_and_dependencies(tr, JOB_VERIFY_ACTIVE, dep, ret, true, false, false, ignore_order, e);
+ if (r < 0) {
+ if (r != -EBADR) /* job type not applicable */
+ goto fail;
+
+ sd_bus_error_free(e);
+ }
+ }
+
+ HASHMAP_FOREACH_KEY(v, dep, ret->unit->dependencies[UNIT_CONFLICTS]) {
+ r = transaction_add_job_and_dependencies(tr, JOB_STOP, dep, ret, true, true, false, ignore_order, e);
+ if (r < 0) {
+ if (r != -EBADR) /* job type not applicable */
+ goto fail;
+
+ sd_bus_error_free(e);
+ }
+ }
+
+ HASHMAP_FOREACH_KEY(v, dep, ret->unit->dependencies[UNIT_CONFLICTED_BY]) {
+ r = transaction_add_job_and_dependencies(tr, JOB_STOP, dep, ret, false, false, false, ignore_order, e);
+ if (r < 0) {
+ log_unit_warning(dep,
+ "Cannot add dependency job, ignoring: %s",
+ bus_error_message(e, r));
+ sd_bus_error_free(e);
+ }
+ }
+
+ }
+
+ if (IN_SET(type, JOB_STOP, JOB_RESTART)) {
+ static const UnitDependency propagate_deps[] = {
+ UNIT_REQUIRED_BY,
+ UNIT_REQUISITE_OF,
+ UNIT_BOUND_BY,
+ UNIT_CONSISTS_OF,
+ };
+
+ JobType ptype;
+ unsigned j;
+
+ /* We propagate STOP as STOP, but RESTART only
+ * as TRY_RESTART, in order not to start
+ * dependencies that are not around. */
+ ptype = type == JOB_RESTART ? JOB_TRY_RESTART : type;
+
+ for (j = 0; j < ELEMENTSOF(propagate_deps); j++)
+ HASHMAP_FOREACH_KEY(v, dep, ret->unit->dependencies[propagate_deps[j]]) {
+ JobType nt;
+
+ nt = job_type_collapse(ptype, dep);
+ if (nt == JOB_NOP)
+ continue;
+
+ r = transaction_add_job_and_dependencies(tr, nt, dep, ret, true, false, false, ignore_order, e);
+ if (r < 0) {
+ if (r != -EBADR) /* job type not applicable */
+ goto fail;
+
+ sd_bus_error_free(e);
+ }
+ }
+ }
+
+ if (type == JOB_RELOAD)
+ transaction_add_propagate_reload_jobs(tr, ret->unit, ret, ignore_order, e);
+
+ /* JOB_VERIFY_ACTIVE requires no dependency handling */
+ }
+
+ return 0;
+
+fail:
+ return r;
+}
+
+int transaction_add_isolate_jobs(Transaction *tr, Manager *m) {
+ Unit *u;
+ char *k;
+ int r;
+
+ assert(tr);
+ assert(m);
+
+ HASHMAP_FOREACH_KEY(u, k, m->units) {
+
+ /* ignore aliases */
+ if (u->id != k)
+ continue;
+
+ if (u->ignore_on_isolate)
+ continue;
+
+ /* No need to stop inactive jobs */
+ if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(u)) && !u->job)
+ continue;
+
+ /* Is there already something listed for this? */
+ if (hashmap_get(tr->jobs, u))
+ continue;
+
+ r = transaction_add_job_and_dependencies(tr, JOB_STOP, u, tr->anchor_job, true, false, false, false, NULL);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Cannot add isolate job, ignoring: %m");
+ }
+
+ return 0;
+}
+
+int transaction_add_triggering_jobs(Transaction *tr, Unit *u) {
+ void *v;
+ Unit *trigger;
+ int r;
+
+ assert(tr);
+ assert(u);
+
+ HASHMAP_FOREACH_KEY(v, trigger, u->dependencies[UNIT_TRIGGERED_BY]) {
+ /* No need to stop inactive jobs */
+ if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(trigger)) && !trigger->job)
+ continue;
+
+ /* Is there already something listed for this? */
+ if (hashmap_get(tr->jobs, trigger))
+ continue;
+
+ r = transaction_add_job_and_dependencies(tr, JOB_STOP, trigger, tr->anchor_job, true, false, false, false, NULL);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Cannot add triggered by job, ignoring: %m");
+ }
+
+ return 0;
+}
+
+Transaction *transaction_new(bool irreversible) {
+ Transaction *tr;
+
+ tr = new0(Transaction, 1);
+ if (!tr)
+ return NULL;
+
+ tr->jobs = hashmap_new(NULL);
+ if (!tr->jobs)
+ return mfree(tr);
+
+ tr->irreversible = irreversible;
+
+ return tr;
+}
+
+void transaction_free(Transaction *tr) {
+ assert(hashmap_isempty(tr->jobs));
+ hashmap_free(tr->jobs);
+ free(tr);
+}
diff --git a/src/core/transaction.h b/src/core/transaction.h
new file mode 100644
index 0000000..c431271
--- /dev/null
+++ b/src/core/transaction.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct Transaction Transaction;
+
+#include "hashmap.h"
+#include "job.h"
+#include "manager.h"
+#include "unit.h"
+
+struct Transaction {
+ /* Jobs to be added */
+ Hashmap *jobs; /* Unit object => Job object list 1:1 */
+ Job *anchor_job; /* the job the user asked for */
+ bool irreversible;
+};
+
+Transaction *transaction_new(bool irreversible);
+void transaction_free(Transaction *tr);
+
+void transaction_add_propagate_reload_jobs(Transaction *tr, Unit *unit, Job *by, bool ignore_order, sd_bus_error *e);
+int transaction_add_job_and_dependencies(
+ Transaction *tr,
+ JobType type,
+ Unit *unit,
+ Job *by,
+ bool matters,
+ bool conflicts,
+ bool ignore_requirements,
+ bool ignore_order,
+ sd_bus_error *e);
+int transaction_activate(Transaction *tr, Manager *m, JobMode mode, Set *affected, sd_bus_error *e);
+int transaction_add_isolate_jobs(Transaction *tr, Manager *m);
+int transaction_add_triggering_jobs(Transaction *tr, Unit *u);
+void transaction_abort(Transaction *tr);
diff --git a/src/core/triggers.systemd.in b/src/core/triggers.systemd.in
new file mode 100644
index 0000000..2d25db3
--- /dev/null
+++ b/src/core/triggers.systemd.in
@@ -0,0 +1,143 @@
+# -*- Mode: rpm-spec; indent-tabs-mode: nil -*- */
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+# Copyright © 2018 Neal Gompa
+
+# The contents of this are an example to be copied into systemd.spec.
+#
+# Minimum rpm version supported: 4.13.0
+
+%transfiletriggerin -P 900900 -p <lua> -- @systemunitdir@ /etc/systemd/system
+-- This script will run after any package is initially installed or
+-- upgraded. We care about the case where a package is initially
+-- installed, because other cases are covered by the *un scriptlets,
+-- so sometimes we will reload needlessly.
+
+if posix.access("/run/systemd/system") then
+ pid = posix.fork()
+ if pid == 0 then
+ assert(posix.exec("%{_bindir}/systemctl", "daemon-reload"))
+ elseif pid > 0 then
+ posix.wait(pid)
+ end
+end
+
+%transfiletriggerun -p <lua> -- @systemunitdir@ /etc/systemd/system
+-- On removal, we need to run daemon-reload after any units have been
+-- removed. %transfiletriggerpostun would be ideal, but it does not get
+-- executed for some reason.
+-- On upgrade, we need to run daemon-reload after any new unit files
+-- have been installed, but before %postun scripts in packages get
+-- executed. %transfiletriggerun gets the right list of files
+-- but it is invoked too early (before changes happen).
+-- %filetriggerpostun happens at the right time, but it fires for
+-- every package.
+-- To execute the reload at the right time, we create a state
+-- file in %transfiletriggerun and execute the daemon-reload in
+-- the first %filetriggerpostun.
+
+if posix.access("/run/systemd/system") then
+ posix.mkdir("%{_localstatedir}/lib")
+ posix.mkdir("%{_localstatedir}/lib/rpm-state")
+ posix.mkdir("%{_localstatedir}/lib/rpm-state/systemd")
+ io.open("%{_localstatedir}/lib/rpm-state/systemd/needs-reload", "w")
+end
+
+%filetriggerpostun -P 1000100 -p <lua> -- @systemunitdir@ /etc/systemd/system
+if posix.access("%{_localstatedir}/lib/rpm-state/systemd/needs-reload") then
+ posix.unlink("%{_localstatedir}/lib/rpm-state/systemd/needs-reload")
+ posix.rmdir("%{_localstatedir}/lib/rpm-state/systemd")
+ pid = posix.fork()
+ if pid == 0 then
+ assert(posix.exec("%{_bindir}/systemctl", "daemon-reload"))
+ elseif pid > 0 then
+ posix.wait(pid)
+ end
+end
+
+%transfiletriggerin -P 100700 -p <lua> -- @sysusersdir@
+-- This script will process files installed in @sysusersdir@ to create
+-- specified users automatically. The priority is set such that it
+-- will run before the tmpfiles file trigger.
+if posix.access("/run/systemd/system") then
+ pid = posix.fork()
+ if pid == 0 then
+ assert(posix.exec("%{_bindir}/systemd-sysusers"))
+ elseif pid > 0 then
+ posix.wait(pid)
+ end
+end
+
+%transfiletriggerin -P 100500 -p <lua> -- @tmpfilesdir@
+-- This script will process files installed in @tmpfilesdir@ to create
+-- tmpfiles automatically. The priority is set such that it will run
+-- after the sysusers file trigger, but before any other triggers.
+if posix.access("/run/systemd/system") then
+ pid = posix.fork()
+ if pid == 0 then
+ assert(posix.exec("%{_bindir}/systemd-tmpfiles", "--create"))
+ elseif pid > 0 then
+ posix.wait(pid)
+ end
+end
+
+%transfiletriggerin -p <lua> -- @udevhwdbdir@
+-- This script will automatically invoke hwdb update if files have been
+-- installed or updated in @udevhwdbdir@.
+if posix.access("/run/systemd/system") then
+ pid = posix.fork()
+ if pid == 0 then
+ assert(posix.exec("%{_bindir}/systemd-hwdb", "update"))
+ elseif pid > 0 then
+ posix.wait(pid)
+ end
+end
+
+%transfiletriggerin -p <lua> -- @catalogdir@
+-- This script will automatically invoke journal catalog update if files
+-- have been installed or updated in @catalogdir@.
+if posix.access("/run/systemd/system") then
+ pid = posix.fork()
+ if pid == 0 then
+ assert(posix.exec("%{_bindir}/journalctl", "--update-catalog"))
+ elseif pid > 0 then
+ posix.wait(pid)
+ end
+end
+
+%transfiletriggerin -p <lua> -- @udevrulesdir@
+-- This script will automatically update udev with new rules if files
+-- have been installed or updated in @udevrulesdir@.
+if posix.access("/run/systemd/system") then
+ pid = posix.fork()
+ if pid == 0 then
+ assert(posix.exec("%{_bindir}/udevadm", "control", "--reload"))
+ elseif pid > 0 then
+ posix.wait(pid)
+ end
+end
+
+%transfiletriggerin -p <lua> -- @sysctldir@
+-- This script will automatically apply sysctl rules if files have been
+-- installed or updated in @sysctldir@.
+if posix.access("/run/systemd/system") then
+ pid = posix.fork()
+ if pid == 0 then
+ assert(posix.exec("@rootlibexecdir@/systemd-sysctl"))
+ elseif pid > 0 then
+ posix.wait(pid)
+ end
+end
+
+%transfiletriggerin -p <lua> -- @binfmtdir@
+-- This script will automatically apply binfmt rules if files have been
+-- installed or updated in @binfmtdir@.
+if posix.access("/run/systemd/system") then
+ pid = posix.fork()
+ if pid == 0 then
+ assert(posix.exec("@rootlibexecdir@/systemd-binfmt"))
+ elseif pid > 0 then
+ posix.wait(pid)
+ end
+end
diff --git a/src/core/unit-printf.c b/src/core/unit-printf.c
new file mode 100644
index 0000000..0c1e20d
--- /dev/null
+++ b/src/core/unit-printf.c
@@ -0,0 +1,269 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "cgroup-util.h"
+#include "format-util.h"
+#include "macro.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "unit-printf.h"
+#include "unit.h"
+#include "user-util.h"
+
+static int specifier_prefix_and_instance(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
+
+ assert(u);
+
+ return unit_name_to_prefix_and_instance(u->id, ret);
+}
+
+static int specifier_prefix(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
+
+ assert(u);
+
+ return unit_name_to_prefix(u->id, ret);
+}
+
+static int specifier_prefix_unescaped(char specifier, const void *data, const void *userdata, char **ret) {
+ _cleanup_free_ char *p = NULL;
+ const Unit *u = userdata;
+ int r;
+
+ assert(u);
+
+ r = unit_name_to_prefix(u->id, &p);
+ if (r < 0)
+ return r;
+
+ return unit_name_unescape(p, ret);
+}
+
+static int specifier_instance_unescaped(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
+
+ assert(u);
+
+ return unit_name_unescape(strempty(u->instance), ret);
+}
+
+static int specifier_last_component(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
+ _cleanup_free_ char *prefix = NULL;
+ char *dash;
+ int r;
+
+ assert(u);
+
+ r = unit_name_to_prefix(u->id, &prefix);
+ if (r < 0)
+ return r;
+
+ dash = strrchr(prefix, '-');
+ if (dash)
+ return specifier_string(specifier, dash + 1, userdata, ret);
+
+ *ret = TAKE_PTR(prefix);
+ return 0;
+}
+
+static int specifier_last_component_unescaped(char specifier, const void *data, const void *userdata, char **ret) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ r = specifier_last_component(specifier, data, userdata, &p);
+ if (r < 0)
+ return r;
+
+ return unit_name_unescape(p, ret);
+}
+
+static int specifier_filename(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
+
+ assert(u);
+
+ if (u->instance)
+ return unit_name_path_unescape(u->instance, ret);
+ else
+ return unit_name_to_path(u->id, ret);
+}
+
+static void bad_specifier(const Unit *u, char specifier) {
+ log_unit_warning(u, "Specifier '%%%c' used in unit configuration, which is deprecated. Please update your unit file, as it does not work as intended.", specifier);
+}
+
+static int specifier_cgroup(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
+ char *n;
+
+ assert(u);
+
+ bad_specifier(u, specifier);
+
+ if (u->cgroup_path)
+ n = strdup(u->cgroup_path);
+ else
+ n = unit_default_cgroup_path(u);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+static int specifier_cgroup_root(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
+ char *n;
+
+ assert(u);
+
+ bad_specifier(u, specifier);
+
+ n = strdup(u->manager->cgroup_root);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+static int specifier_cgroup_slice(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
+ char *n;
+
+ assert(u);
+
+ bad_specifier(u, specifier);
+
+ if (UNIT_ISSET(u->slice)) {
+ const Unit *slice;
+
+ slice = UNIT_DEREF(u->slice);
+
+ if (slice->cgroup_path)
+ n = strdup(slice->cgroup_path);
+ else
+ n = unit_default_cgroup_path(slice);
+ } else
+ n = strdup(u->manager->cgroup_root);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+static int specifier_special_directory(char specifier, const void *data, const void *userdata, char **ret) {
+ const Unit *u = userdata;
+ char *n = NULL;
+
+ assert(u);
+
+ n = strdup(u->manager->prefix[PTR_TO_UINT(data)]);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int unit_name_printf(const Unit *u, const char* format, char **ret) {
+
+ /*
+ * This will use the passed string as format string and replace the following specifiers (which should all be
+ * safe for inclusion in unit names):
+ *
+ * %n: the full id of the unit (foo-aaa@bar.waldo)
+ * %N: the id of the unit without the suffix (foo-aaa@bar)
+ * %p: the prefix (foo-aaa)
+ * %i: the instance (bar)
+ * %j: the last componet of the prefix (aaa)
+ */
+
+ const Specifier table[] = {
+ { 'i', specifier_string, u->instance },
+ { 'j', specifier_last_component, NULL },
+ { 'n', specifier_string, u->id },
+ { 'N', specifier_prefix_and_instance, NULL },
+ { 'p', specifier_prefix, NULL },
+
+ COMMON_SYSTEM_SPECIFIERS,
+
+ COMMON_CREDS_SPECIFIERS,
+ {}
+ };
+
+ assert(u);
+ assert(format);
+ assert(ret);
+
+ return specifier_printf(format, table, u, ret);
+}
+
+int unit_full_printf(const Unit *u, const char *format, char **ret) {
+ /* This is similar to unit_name_printf() but also supports unescaping. Also, adds a couple of additional codes
+ * (which are likely not suitable for unescaped inclusion in unit names):
+ *
+ * %f: the unescaped instance if set, otherwise the id unescaped as path
+ *
+ * %c: cgroup path of unit (deprecated)
+ * %r: where units in this slice are placed in the cgroup tree (deprecated)
+ * %R: the root of this systemd's instance tree (deprecated)
+ *
+ * %C: the cache directory root (e.g. /var/cache or $XDG_CACHE_HOME)
+ * %E: the configuration directory root (e.g. /etc or $XDG_CONFIG_HOME)
+ * %L: the log directory root (e.g. /var/log or $XDG_CONFIG_HOME/log)
+ * %S: the state directory root (e.g. /var/lib or $XDG_CONFIG_HOME)
+ * %t: the runtime directory root (e.g. /run or $XDG_RUNTIME_DIR)
+ *
+ * %h: the homedir of the running user
+ * %s: the shell of the running user
+ *
+ * NOTICE: When you add new entries here, please be careful: specifiers which depend on settings of the unit
+ * file itself are broken by design, as they would resolve differently depending on whether they are used
+ * before or after the relevant configuration setting. Hence: don't add them.
+ */
+
+ assert(u);
+ assert(format);
+ assert(ret);
+
+ const Specifier table[] = {
+ { 'i', specifier_string, u->instance },
+ { 'I', specifier_instance_unescaped, NULL },
+ { 'j', specifier_last_component, NULL },
+ { 'J', specifier_last_component_unescaped, NULL },
+ { 'n', specifier_string, u->id },
+ { 'N', specifier_prefix_and_instance, NULL },
+ { 'p', specifier_prefix, NULL },
+ { 'P', specifier_prefix_unescaped, NULL },
+
+ { 'f', specifier_filename, NULL },
+
+ { 'c', specifier_cgroup, NULL },
+ { 'r', specifier_cgroup_slice, NULL },
+ { 'R', specifier_cgroup_root, NULL },
+
+ { 'C', specifier_special_directory, UINT_TO_PTR(EXEC_DIRECTORY_CACHE) },
+ { 'E', specifier_special_directory, UINT_TO_PTR(EXEC_DIRECTORY_CONFIGURATION) },
+ { 'L', specifier_special_directory, UINT_TO_PTR(EXEC_DIRECTORY_LOGS) },
+ { 'S', specifier_special_directory, UINT_TO_PTR(EXEC_DIRECTORY_STATE) },
+ { 't', specifier_special_directory, UINT_TO_PTR(EXEC_DIRECTORY_RUNTIME) },
+
+ { 'h', specifier_user_home, NULL },
+ { 's', specifier_user_shell, NULL },
+
+ COMMON_SYSTEM_SPECIFIERS,
+
+ COMMON_CREDS_SPECIFIERS,
+
+ COMMON_TMP_SPECIFIERS,
+ {}
+ };
+
+ return specifier_printf(format, table, u, ret);
+}
diff --git a/src/core/unit-printf.h b/src/core/unit-printf.h
new file mode 100644
index 0000000..de5183c
--- /dev/null
+++ b/src/core/unit-printf.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "unit.h"
+
+int unit_name_printf(const Unit *u, const char* text, char **ret);
+int unit_full_printf(const Unit *u, const char *text, char **ret);
diff --git a/src/core/unit.c b/src/core/unit.c
new file mode 100644
index 0000000..45a417a
--- /dev/null
+++ b/src/core/unit.c
@@ -0,0 +1,6368 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+#include "sd-messages.h"
+
+#include "all-units.h"
+#include "alloc-util.h"
+#include "bpf-firewall.h"
+#include "bus-common-errors.h"
+#include "bus-util.h"
+#include "cgroup-setup.h"
+#include "cgroup-util.h"
+#include "core-varlink.h"
+#include "dbus-unit.h"
+#include "dbus.h"
+#include "dropin.h"
+#include "escape.h"
+#include "execute.h"
+#include "fd-util.h"
+#include "fileio-label.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "id128-util.h"
+#include "io-util.h"
+#include "install.h"
+#include "label.h"
+#include "load-dropin.h"
+#include "load-fragment.h"
+#include "log.h"
+#include "macro.h"
+#include "missing_audit.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "rm-rf.h"
+#include "serialize.h"
+#include "set.h"
+#include "signal-util.h"
+#include "sparse-endian.h"
+#include "special.h"
+#include "specifier.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+#include "unit-name.h"
+#include "unit.h"
+#include "user-util.h"
+#include "virt.h"
+
+/* Thresholds for logging at INFO level about resource consumption */
+#define MENTIONWORTHY_CPU_NSEC (1 * NSEC_PER_SEC)
+#define MENTIONWORTHY_IO_BYTES (1024 * 1024ULL)
+#define MENTIONWORTHY_IP_BYTES (0ULL)
+
+/* Thresholds for logging at INFO level about resource consumption */
+#define NOTICEWORTHY_CPU_NSEC (10*60 * NSEC_PER_SEC) /* 10 minutes */
+#define NOTICEWORTHY_IO_BYTES (10 * 1024 * 1024ULL) /* 10 MB */
+#define NOTICEWORTHY_IP_BYTES (128 * 1024 * 1024ULL) /* 128 MB */
+
+const UnitVTable * const unit_vtable[_UNIT_TYPE_MAX] = {
+ [UNIT_SERVICE] = &service_vtable,
+ [UNIT_SOCKET] = &socket_vtable,
+ [UNIT_TARGET] = &target_vtable,
+ [UNIT_DEVICE] = &device_vtable,
+ [UNIT_MOUNT] = &mount_vtable,
+ [UNIT_AUTOMOUNT] = &automount_vtable,
+ [UNIT_SWAP] = &swap_vtable,
+ [UNIT_TIMER] = &timer_vtable,
+ [UNIT_PATH] = &path_vtable,
+ [UNIT_SLICE] = &slice_vtable,
+ [UNIT_SCOPE] = &scope_vtable,
+};
+
+static void maybe_warn_about_dependency(Unit *u, const char *other, UnitDependency dependency);
+
+Unit *unit_new(Manager *m, size_t size) {
+ Unit *u;
+
+ assert(m);
+ assert(size >= sizeof(Unit));
+
+ u = malloc0(size);
+ if (!u)
+ return NULL;
+
+ u->manager = m;
+ u->type = _UNIT_TYPE_INVALID;
+ u->default_dependencies = true;
+ u->unit_file_state = _UNIT_FILE_STATE_INVALID;
+ u->unit_file_preset = -1;
+ u->on_failure_job_mode = JOB_REPLACE;
+ u->cgroup_control_inotify_wd = -1;
+ u->cgroup_memory_inotify_wd = -1;
+ u->job_timeout = USEC_INFINITY;
+ u->job_running_timeout = USEC_INFINITY;
+ u->ref_uid = UID_INVALID;
+ u->ref_gid = GID_INVALID;
+ u->cpu_usage_last = NSEC_INFINITY;
+ u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL;
+ u->failure_action_exit_status = u->success_action_exit_status = -1;
+
+ u->ip_accounting_ingress_map_fd = -1;
+ u->ip_accounting_egress_map_fd = -1;
+ u->ipv4_allow_map_fd = -1;
+ u->ipv6_allow_map_fd = -1;
+ u->ipv4_deny_map_fd = -1;
+ u->ipv6_deny_map_fd = -1;
+
+ u->last_section_private = -1;
+
+ u->start_ratelimit = (RateLimit) { m->default_start_limit_interval, m->default_start_limit_burst };
+ u->auto_stop_ratelimit = (RateLimit) { 10 * USEC_PER_SEC, 16 };
+
+ for (CGroupIOAccountingMetric i = 0; i < _CGROUP_IO_ACCOUNTING_METRIC_MAX; i++)
+ u->io_accounting_last[i] = UINT64_MAX;
+
+ return u;
+}
+
+int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret) {
+ _cleanup_(unit_freep) Unit *u = NULL;
+ int r;
+
+ u = unit_new(m, size);
+ if (!u)
+ return -ENOMEM;
+
+ r = unit_add_name(u, name);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(u);
+
+ return r;
+}
+
+bool unit_has_name(const Unit *u, const char *name) {
+ assert(u);
+ assert(name);
+
+ return streq_ptr(name, u->id) ||
+ set_contains(u->aliases, name);
+}
+
+static void unit_init(Unit *u) {
+ CGroupContext *cc;
+ ExecContext *ec;
+ KillContext *kc;
+
+ assert(u);
+ assert(u->manager);
+ assert(u->type >= 0);
+
+ cc = unit_get_cgroup_context(u);
+ if (cc) {
+ cgroup_context_init(cc);
+
+ /* Copy in the manager defaults into the cgroup
+ * context, _before_ the rest of the settings have
+ * been initialized */
+
+ cc->cpu_accounting = u->manager->default_cpu_accounting;
+ cc->io_accounting = u->manager->default_io_accounting;
+ cc->blockio_accounting = u->manager->default_blockio_accounting;
+ cc->memory_accounting = u->manager->default_memory_accounting;
+ cc->tasks_accounting = u->manager->default_tasks_accounting;
+ cc->ip_accounting = u->manager->default_ip_accounting;
+
+ if (u->type != UNIT_SLICE)
+ cc->tasks_max = u->manager->default_tasks_max;
+ }
+
+ ec = unit_get_exec_context(u);
+ if (ec) {
+ exec_context_init(ec);
+
+ if (MANAGER_IS_SYSTEM(u->manager))
+ ec->keyring_mode = EXEC_KEYRING_SHARED;
+ else {
+ ec->keyring_mode = EXEC_KEYRING_INHERIT;
+
+ /* User manager might have its umask redefined by PAM or UMask=. In this
+ * case let the units it manages inherit this value by default. They can
+ * still tune this value through their own unit file */
+ (void) get_process_umask(getpid_cached(), &ec->umask);
+ }
+ }
+
+ kc = unit_get_kill_context(u);
+ if (kc)
+ kill_context_init(kc);
+
+ if (UNIT_VTABLE(u)->init)
+ UNIT_VTABLE(u)->init(u);
+}
+
+static int unit_add_alias(Unit *u, char *donated_name) {
+ int r;
+
+ /* Make sure that u->names is allocated. We may leave u->names
+ * empty if we fail later, but this is not a problem. */
+ r = set_ensure_put(&u->aliases, &string_hash_ops, donated_name);
+ if (r < 0)
+ return r;
+ assert(r > 0);
+
+ return 0;
+}
+
+int unit_add_name(Unit *u, const char *text) {
+ _cleanup_free_ char *name = NULL, *instance = NULL;
+ UnitType t;
+ int r;
+
+ assert(u);
+ assert(text);
+
+ if (unit_name_is_valid(text, UNIT_NAME_TEMPLATE)) {
+ if (!u->instance)
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EINVAL),
+ "instance is not set when adding name '%s': %m", text);
+
+ r = unit_name_replace_instance(text, u->instance, &name);
+ if (r < 0)
+ return log_unit_debug_errno(u, r,
+ "failed to build instance name from '%s': %m", text);
+ } else {
+ name = strdup(text);
+ if (!name)
+ return -ENOMEM;
+ }
+
+ if (unit_has_name(u, name))
+ return 0;
+
+ if (hashmap_contains(u->manager->units, name))
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EEXIST),
+ "unit already exist when adding name '%s': %m", name);
+
+ if (!unit_name_is_valid(name, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EINVAL),
+ "name '%s' is invalid: %m", name);
+
+ t = unit_name_to_type(name);
+ if (t < 0)
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EINVAL),
+ "failed to derive unit type from name '%s': %m", name);
+
+ if (u->type != _UNIT_TYPE_INVALID && t != u->type)
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EINVAL),
+ "unit type is illegal: u->type(%d) and t(%d) for name '%s': %m",
+ u->type, t, name);
+
+ r = unit_name_to_instance(name, &instance);
+ if (r < 0)
+ return log_unit_debug_errno(u, r, "failed to extract instance from name '%s': %m", name);
+
+ if (instance && !unit_type_may_template(t))
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EINVAL), "templates are not allowed for name '%s': %m", name);
+
+ /* Ensure that this unit either has no instance, or that the instance matches. */
+ if (u->type != _UNIT_TYPE_INVALID && !streq_ptr(u->instance, instance))
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EINVAL),
+ "cannot add name %s, the instances don't match (\"%s\" != \"%s\").",
+ name, instance, u->instance);
+
+ if (u->id && !unit_type_may_alias(t))
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EEXIST),
+ "cannot add name %s, aliases are not allowed for %s units.",
+ name, unit_type_to_string(t));
+
+ if (hashmap_size(u->manager->units) >= MANAGER_MAX_NAMES)
+ return log_unit_warning_errno(u, SYNTHETIC_ERRNO(E2BIG), "cannot add name, manager has too many units: %m");
+
+ /* Add name to the global hashmap first, because that's easier to undo */
+ r = hashmap_put(u->manager->units, name, u);
+ if (r < 0)
+ return log_unit_debug_errno(u, r, "add unit to hashmap failed for name '%s': %m", text);
+
+ if (u->id) {
+ r = unit_add_alias(u, name); /* unit_add_alias() takes ownership of the name on success */
+ if (r < 0) {
+ hashmap_remove(u->manager->units, name);
+ return r;
+ }
+ TAKE_PTR(name);
+
+ } else {
+ /* A new name, we don't need the set yet. */
+ assert(u->type == _UNIT_TYPE_INVALID);
+ assert(!u->instance);
+
+ u->type = t;
+ u->id = TAKE_PTR(name);
+ u->instance = TAKE_PTR(instance);
+
+ LIST_PREPEND(units_by_type, u->manager->units_by_type[t], u);
+ unit_init(u);
+ }
+
+ unit_add_to_dbus_queue(u);
+ return 0;
+}
+
+int unit_choose_id(Unit *u, const char *name) {
+ _cleanup_free_ char *t = NULL;
+ char *s;
+ int r;
+
+ assert(u);
+ assert(name);
+
+ if (unit_name_is_valid(name, UNIT_NAME_TEMPLATE)) {
+ if (!u->instance)
+ return -EINVAL;
+
+ r = unit_name_replace_instance(name, u->instance, &t);
+ if (r < 0)
+ return r;
+
+ name = t;
+ }
+
+ if (streq_ptr(u->id, name))
+ return 0; /* Nothing to do. */
+
+ /* Selects one of the aliases of this unit as the id */
+ s = set_get(u->aliases, (char*) name);
+ if (!s)
+ return -ENOENT;
+
+ if (u->id) {
+ r = set_remove_and_put(u->aliases, name, u->id);
+ if (r < 0)
+ return r;
+ } else
+ assert_se(set_remove(u->aliases, name)); /* see set_get() above… */
+
+ u->id = s; /* Old u->id is now stored in the set, and s is not stored anywhere */
+ unit_add_to_dbus_queue(u);
+
+ return 0;
+}
+
+int unit_set_description(Unit *u, const char *description) {
+ int r;
+
+ assert(u);
+
+ r = free_and_strdup(&u->description, empty_to_null(description));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ unit_add_to_dbus_queue(u);
+
+ return 0;
+}
+
+bool unit_may_gc(Unit *u) {
+ UnitActiveState state;
+ int r;
+
+ assert(u);
+
+ /* Checks whether the unit is ready to be unloaded for garbage collection.
+ * Returns true when the unit may be collected, and false if there's some
+ * reason to keep it loaded.
+ *
+ * References from other units are *not* checked here. Instead, this is done
+ * in unit_gc_sweep(), but using markers to properly collect dependency loops.
+ */
+
+ if (u->job)
+ return false;
+
+ if (u->nop_job)
+ return false;
+
+ state = unit_active_state(u);
+
+ /* If the unit is inactive and failed and no job is queued for it, then release its runtime resources */
+ if (UNIT_IS_INACTIVE_OR_FAILED(state) &&
+ UNIT_VTABLE(u)->release_resources)
+ UNIT_VTABLE(u)->release_resources(u);
+
+ if (u->perpetual)
+ return false;
+
+ if (sd_bus_track_count(u->bus_track) > 0)
+ return false;
+
+ /* But we keep the unit object around for longer when it is referenced or configured to not be gc'ed */
+ switch (u->collect_mode) {
+
+ case COLLECT_INACTIVE:
+ if (state != UNIT_INACTIVE)
+ return false;
+
+ break;
+
+ case COLLECT_INACTIVE_OR_FAILED:
+ if (!IN_SET(state, UNIT_INACTIVE, UNIT_FAILED))
+ return false;
+
+ break;
+
+ default:
+ assert_not_reached("Unknown garbage collection mode");
+ }
+
+ if (u->cgroup_path) {
+ /* If the unit has a cgroup, then check whether there's anything in it. If so, we should stay
+ * around. Units with active processes should never be collected. */
+
+ r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to determine whether cgroup %s is empty: %m", u->cgroup_path);
+ if (r <= 0)
+ return false;
+ }
+
+ if (UNIT_VTABLE(u)->may_gc && !UNIT_VTABLE(u)->may_gc(u))
+ return false;
+
+ return true;
+}
+
+void unit_add_to_load_queue(Unit *u) {
+ assert(u);
+ assert(u->type != _UNIT_TYPE_INVALID);
+
+ if (u->load_state != UNIT_STUB || u->in_load_queue)
+ return;
+
+ LIST_PREPEND(load_queue, u->manager->load_queue, u);
+ u->in_load_queue = true;
+}
+
+void unit_add_to_cleanup_queue(Unit *u) {
+ assert(u);
+
+ if (u->in_cleanup_queue)
+ return;
+
+ LIST_PREPEND(cleanup_queue, u->manager->cleanup_queue, u);
+ u->in_cleanup_queue = true;
+}
+
+void unit_add_to_gc_queue(Unit *u) {
+ assert(u);
+
+ if (u->in_gc_queue || u->in_cleanup_queue)
+ return;
+
+ if (!unit_may_gc(u))
+ return;
+
+ LIST_PREPEND(gc_queue, u->manager->gc_unit_queue, u);
+ u->in_gc_queue = true;
+}
+
+void unit_add_to_dbus_queue(Unit *u) {
+ assert(u);
+ assert(u->type != _UNIT_TYPE_INVALID);
+
+ if (u->load_state == UNIT_STUB || u->in_dbus_queue)
+ return;
+
+ /* Shortcut things if nobody cares */
+ if (sd_bus_track_count(u->manager->subscribed) <= 0 &&
+ sd_bus_track_count(u->bus_track) <= 0 &&
+ set_isempty(u->manager->private_buses)) {
+ u->sent_dbus_new_signal = true;
+ return;
+ }
+
+ LIST_PREPEND(dbus_queue, u->manager->dbus_unit_queue, u);
+ u->in_dbus_queue = true;
+}
+
+void unit_submit_to_stop_when_unneeded_queue(Unit *u) {
+ assert(u);
+
+ if (u->in_stop_when_unneeded_queue)
+ return;
+
+ if (!u->stop_when_unneeded)
+ return;
+
+ if (!UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u)))
+ return;
+
+ LIST_PREPEND(stop_when_unneeded_queue, u->manager->stop_when_unneeded_queue, u);
+ u->in_stop_when_unneeded_queue = true;
+}
+
+static void bidi_set_free(Unit *u, Hashmap *h) {
+ Unit *other;
+ void *v;
+
+ assert(u);
+
+ /* Frees the hashmap and makes sure we are dropped from the inverse pointers */
+
+ HASHMAP_FOREACH_KEY(v, other, h) {
+ for (UnitDependency d = 0; d < _UNIT_DEPENDENCY_MAX; d++)
+ hashmap_remove(other->dependencies[d], u);
+
+ unit_add_to_gc_queue(other);
+ }
+
+ hashmap_free(h);
+}
+
+static void unit_remove_transient(Unit *u) {
+ char **i;
+
+ assert(u);
+
+ if (!u->transient)
+ return;
+
+ if (u->fragment_path)
+ (void) unlink(u->fragment_path);
+
+ STRV_FOREACH(i, u->dropin_paths) {
+ _cleanup_free_ char *p = NULL, *pp = NULL;
+
+ p = dirname_malloc(*i); /* Get the drop-in directory from the drop-in file */
+ if (!p)
+ continue;
+
+ pp = dirname_malloc(p); /* Get the config directory from the drop-in directory */
+ if (!pp)
+ continue;
+
+ /* Only drop transient drop-ins */
+ if (!path_equal(u->manager->lookup_paths.transient, pp))
+ continue;
+
+ (void) unlink(*i);
+ (void) rmdir(p);
+ }
+}
+
+static void unit_free_requires_mounts_for(Unit *u) {
+ assert(u);
+
+ for (;;) {
+ _cleanup_free_ char *path;
+
+ path = hashmap_steal_first_key(u->requires_mounts_for);
+ if (!path)
+ break;
+ else {
+ char s[strlen(path) + 1];
+
+ PATH_FOREACH_PREFIX_MORE(s, path) {
+ char *y;
+ Set *x;
+
+ x = hashmap_get2(u->manager->units_requiring_mounts_for, s, (void**) &y);
+ if (!x)
+ continue;
+
+ (void) set_remove(x, u);
+
+ if (set_isempty(x)) {
+ (void) hashmap_remove(u->manager->units_requiring_mounts_for, y);
+ free(y);
+ set_free(x);
+ }
+ }
+ }
+ }
+
+ u->requires_mounts_for = hashmap_free(u->requires_mounts_for);
+}
+
+static void unit_done(Unit *u) {
+ ExecContext *ec;
+ CGroupContext *cc;
+
+ assert(u);
+
+ if (u->type < 0)
+ return;
+
+ if (UNIT_VTABLE(u)->done)
+ UNIT_VTABLE(u)->done(u);
+
+ ec = unit_get_exec_context(u);
+ if (ec)
+ exec_context_done(ec);
+
+ cc = unit_get_cgroup_context(u);
+ if (cc)
+ cgroup_context_done(cc);
+}
+
+void unit_free(Unit *u) {
+ char *t;
+
+ if (!u)
+ return;
+
+ u->transient_file = safe_fclose(u->transient_file);
+
+ if (!MANAGER_IS_RELOADING(u->manager))
+ unit_remove_transient(u);
+
+ bus_unit_send_removed_signal(u);
+
+ unit_done(u);
+
+ unit_dequeue_rewatch_pids(u);
+
+ sd_bus_slot_unref(u->match_bus_slot);
+ sd_bus_track_unref(u->bus_track);
+ u->deserialized_refs = strv_free(u->deserialized_refs);
+ u->pending_freezer_message = sd_bus_message_unref(u->pending_freezer_message);
+
+ unit_free_requires_mounts_for(u);
+
+ SET_FOREACH(t, u->aliases)
+ hashmap_remove_value(u->manager->units, t, u);
+ if (u->id)
+ hashmap_remove_value(u->manager->units, u->id, u);
+
+ if (!sd_id128_is_null(u->invocation_id))
+ hashmap_remove_value(u->manager->units_by_invocation_id, &u->invocation_id, u);
+
+ if (u->job) {
+ Job *j = u->job;
+ job_uninstall(j);
+ job_free(j);
+ }
+
+ if (u->nop_job) {
+ Job *j = u->nop_job;
+ job_uninstall(j);
+ job_free(j);
+ }
+
+ for (UnitDependency d = 0; d < _UNIT_DEPENDENCY_MAX; d++)
+ bidi_set_free(u, u->dependencies[d]);
+
+ /* A unit is being dropped from the tree, make sure our family is realized properly. Do this after we
+ * detach the unit from slice tree in order to eliminate its effect on controller masks. */
+ if (UNIT_ISSET(u->slice))
+ unit_add_family_to_cgroup_realize_queue(UNIT_DEREF(u->slice));
+
+ if (u->on_console)
+ manager_unref_console(u->manager);
+
+ unit_release_cgroup(u);
+
+ if (!MANAGER_IS_RELOADING(u->manager))
+ unit_unlink_state_files(u);
+
+ unit_unref_uid_gid(u, false);
+
+ (void) manager_update_failed_units(u->manager, u, false);
+ set_remove(u->manager->startup_units, u);
+
+ unit_unwatch_all_pids(u);
+
+ unit_ref_unset(&u->slice);
+ while (u->refs_by_target)
+ unit_ref_unset(u->refs_by_target);
+
+ if (u->type != _UNIT_TYPE_INVALID)
+ LIST_REMOVE(units_by_type, u->manager->units_by_type[u->type], u);
+
+ if (u->in_load_queue)
+ LIST_REMOVE(load_queue, u->manager->load_queue, u);
+
+ if (u->in_dbus_queue)
+ LIST_REMOVE(dbus_queue, u->manager->dbus_unit_queue, u);
+
+ if (u->in_gc_queue)
+ LIST_REMOVE(gc_queue, u->manager->gc_unit_queue, u);
+
+ if (u->in_cgroup_realize_queue)
+ LIST_REMOVE(cgroup_realize_queue, u->manager->cgroup_realize_queue, u);
+
+ if (u->in_cgroup_empty_queue)
+ LIST_REMOVE(cgroup_empty_queue, u->manager->cgroup_empty_queue, u);
+
+ if (u->in_cleanup_queue)
+ LIST_REMOVE(cleanup_queue, u->manager->cleanup_queue, u);
+
+ if (u->in_target_deps_queue)
+ LIST_REMOVE(target_deps_queue, u->manager->target_deps_queue, u);
+
+ if (u->in_stop_when_unneeded_queue)
+ LIST_REMOVE(stop_when_unneeded_queue, u->manager->stop_when_unneeded_queue, u);
+
+ safe_close(u->ip_accounting_ingress_map_fd);
+ safe_close(u->ip_accounting_egress_map_fd);
+
+ safe_close(u->ipv4_allow_map_fd);
+ safe_close(u->ipv6_allow_map_fd);
+ safe_close(u->ipv4_deny_map_fd);
+ safe_close(u->ipv6_deny_map_fd);
+
+ bpf_program_unref(u->ip_bpf_ingress);
+ bpf_program_unref(u->ip_bpf_ingress_installed);
+ bpf_program_unref(u->ip_bpf_egress);
+ bpf_program_unref(u->ip_bpf_egress_installed);
+
+ set_free(u->ip_bpf_custom_ingress);
+ set_free(u->ip_bpf_custom_egress);
+ set_free(u->ip_bpf_custom_ingress_installed);
+ set_free(u->ip_bpf_custom_egress_installed);
+
+ bpf_program_unref(u->bpf_device_control_installed);
+
+ condition_free_list(u->conditions);
+ condition_free_list(u->asserts);
+
+ free(u->description);
+ strv_free(u->documentation);
+ free(u->fragment_path);
+ free(u->source_path);
+ strv_free(u->dropin_paths);
+ free(u->instance);
+
+ free(u->job_timeout_reboot_arg);
+ free(u->reboot_arg);
+
+ set_free_free(u->aliases);
+ free(u->id);
+
+ free(u);
+}
+
+FreezerState unit_freezer_state(Unit *u) {
+ assert(u);
+
+ return u->freezer_state;
+}
+
+int unit_freezer_state_kernel(Unit *u, FreezerState *ret) {
+ char *values[1] = {};
+ int r;
+
+ assert(u);
+
+ r = cg_get_keyed_attribute(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "cgroup.events",
+ STRV_MAKE("frozen"), values);
+ if (r < 0)
+ return r;
+
+ r = _FREEZER_STATE_INVALID;
+
+ if (values[0]) {
+ if (streq(values[0], "0"))
+ r = FREEZER_RUNNING;
+ else if (streq(values[0], "1"))
+ r = FREEZER_FROZEN;
+ }
+
+ free(values[0]);
+ *ret = r;
+
+ return 0;
+}
+
+UnitActiveState unit_active_state(Unit *u) {
+ assert(u);
+
+ if (u->load_state == UNIT_MERGED)
+ return unit_active_state(unit_follow_merge(u));
+
+ /* After a reload it might happen that a unit is not correctly
+ * loaded but still has a process around. That's why we won't
+ * shortcut failed loading to UNIT_INACTIVE_FAILED. */
+
+ return UNIT_VTABLE(u)->active_state(u);
+}
+
+const char* unit_sub_state_to_string(Unit *u) {
+ assert(u);
+
+ return UNIT_VTABLE(u)->sub_state_to_string(u);
+}
+
+static int hashmap_complete_move(Hashmap **s, Hashmap **other) {
+ assert(s);
+ assert(other);
+
+ if (!*other)
+ return 0;
+
+ if (*s)
+ return hashmap_move(*s, *other);
+ else
+ *s = TAKE_PTR(*other);
+
+ return 0;
+}
+
+static int merge_names(Unit *u, Unit *other) {
+ char *name;
+ int r;
+
+ assert(u);
+ assert(other);
+
+ r = unit_add_alias(u, other->id);
+ if (r < 0)
+ return r;
+
+ r = set_move(u->aliases, other->aliases);
+ if (r < 0) {
+ set_remove(u->aliases, other->id);
+ return r;
+ }
+
+ TAKE_PTR(other->id);
+ other->aliases = set_free_free(other->aliases);
+
+ SET_FOREACH(name, u->aliases)
+ assert_se(hashmap_replace(u->manager->units, name, u) == 0);
+
+ return 0;
+}
+
+static int reserve_dependencies(Unit *u, Unit *other, UnitDependency d) {
+ unsigned n_reserve;
+
+ assert(u);
+ assert(other);
+ assert(d < _UNIT_DEPENDENCY_MAX);
+
+ /*
+ * If u does not have this dependency set allocated, there is no need
+ * to reserve anything. In that case other's set will be transferred
+ * as a whole to u by complete_move().
+ */
+ if (!u->dependencies[d])
+ return 0;
+
+ /* merge_dependencies() will skip a u-on-u dependency */
+ n_reserve = hashmap_size(other->dependencies[d]) - !!hashmap_get(other->dependencies[d], u);
+
+ return hashmap_reserve(u->dependencies[d], n_reserve);
+}
+
+static void merge_dependencies(Unit *u, Unit *other, const char *other_id, UnitDependency d) {
+ Unit *back;
+ void *v;
+ int r;
+
+ /* Merges all dependencies of type 'd' of the unit 'other' into the deps of the unit 'u' */
+
+ assert(u);
+ assert(other);
+ assert(d < _UNIT_DEPENDENCY_MAX);
+
+ /* Fix backwards pointers. Let's iterate through all dependent units of the other unit. */
+ HASHMAP_FOREACH_KEY(v, back, other->dependencies[d])
+
+ /* Let's now iterate through the dependencies of that dependencies of the other units,
+ * looking for pointers back, and let's fix them up, to instead point to 'u'. */
+ for (UnitDependency k = 0; k < _UNIT_DEPENDENCY_MAX; k++)
+ if (back == u) {
+ /* Do not add dependencies between u and itself. */
+ if (hashmap_remove(back->dependencies[k], other))
+ maybe_warn_about_dependency(u, other_id, k);
+ } else {
+ UnitDependencyInfo di_u, di_other;
+
+ /* Let's drop this dependency between "back" and "other", and let's create it between
+ * "back" and "u" instead. Let's merge the bit masks of the dependency we are moving,
+ * and any such dependency which might already exist */
+
+ di_other.data = hashmap_get(back->dependencies[k], other);
+ if (!di_other.data)
+ continue; /* dependency isn't set, let's try the next one */
+
+ di_u.data = hashmap_get(back->dependencies[k], u);
+
+ UnitDependencyInfo di_merged = {
+ .origin_mask = di_u.origin_mask | di_other.origin_mask,
+ .destination_mask = di_u.destination_mask | di_other.destination_mask,
+ };
+
+ r = hashmap_remove_and_replace(back->dependencies[k], other, u, di_merged.data);
+ if (r < 0)
+ log_warning_errno(r, "Failed to remove/replace: back=%s other=%s u=%s: %m", back->id, other_id, u->id);
+ assert(r >= 0);
+
+ /* assert_se(hashmap_remove_and_replace(back->dependencies[k], other, u, di_merged.data) >= 0); */
+ }
+
+ /* Also do not move dependencies on u to itself */
+ back = hashmap_remove(other->dependencies[d], u);
+ if (back)
+ maybe_warn_about_dependency(u, other_id, d);
+
+ /* The move cannot fail. The caller must have performed a reservation. */
+ assert_se(hashmap_complete_move(&u->dependencies[d], &other->dependencies[d]) == 0);
+
+ other->dependencies[d] = hashmap_free(other->dependencies[d]);
+}
+
+int unit_merge(Unit *u, Unit *other) {
+ const char *other_id = NULL;
+ int r;
+
+ assert(u);
+ assert(other);
+ assert(u->manager == other->manager);
+ assert(u->type != _UNIT_TYPE_INVALID);
+
+ other = unit_follow_merge(other);
+
+ if (other == u)
+ return 0;
+
+ if (u->type != other->type)
+ return -EINVAL;
+
+ if (!unit_type_may_alias(u->type)) /* Merging only applies to unit names that support aliases */
+ return -EEXIST;
+
+ if (!IN_SET(other->load_state, UNIT_STUB, UNIT_NOT_FOUND))
+ return -EEXIST;
+
+ if (!streq_ptr(u->instance, other->instance))
+ return -EINVAL;
+
+ if (other->job)
+ return -EEXIST;
+
+ if (other->nop_job)
+ return -EEXIST;
+
+ if (!UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(other)))
+ return -EEXIST;
+
+ if (other->id)
+ other_id = strdupa(other->id);
+
+ /* Make reservations to ensure merge_dependencies() won't fail */
+ for (UnitDependency d = 0; d < _UNIT_DEPENDENCY_MAX; d++) {
+ r = reserve_dependencies(u, other, d);
+ /*
+ * We don't rollback reservations if we fail. We don't have
+ * a way to undo reservations. A reservation is not a leak.
+ */
+ if (r < 0)
+ return r;
+ }
+
+ /* Merge names */
+ r = merge_names(u, other);
+ if (r < 0)
+ return r;
+
+ /* Redirect all references */
+ while (other->refs_by_target)
+ unit_ref_set(other->refs_by_target, other->refs_by_target->source, u);
+
+ /* Merge dependencies */
+ for (UnitDependency d = 0; d < _UNIT_DEPENDENCY_MAX; d++)
+ merge_dependencies(u, other, other_id, d);
+
+ other->load_state = UNIT_MERGED;
+ other->merged_into = u;
+
+ /* If there is still some data attached to the other node, we
+ * don't need it anymore, and can free it. */
+ if (other->load_state != UNIT_STUB)
+ if (UNIT_VTABLE(other)->done)
+ UNIT_VTABLE(other)->done(other);
+
+ unit_add_to_dbus_queue(u);
+ unit_add_to_cleanup_queue(other);
+
+ return 0;
+}
+
+int unit_merge_by_name(Unit *u, const char *name) {
+ _cleanup_free_ char *s = NULL;
+ Unit *other;
+ int r;
+
+ /* Either add name to u, or if a unit with name already exists, merge it with u.
+ * If name is a template, do the same for name@instance, where instance is u's instance. */
+
+ assert(u);
+ assert(name);
+
+ if (unit_name_is_valid(name, UNIT_NAME_TEMPLATE)) {
+ if (!u->instance)
+ return -EINVAL;
+
+ r = unit_name_replace_instance(name, u->instance, &s);
+ if (r < 0)
+ return r;
+
+ name = s;
+ }
+
+ other = manager_get_unit(u->manager, name);
+ if (other)
+ return unit_merge(u, other);
+
+ return unit_add_name(u, name);
+}
+
+Unit* unit_follow_merge(Unit *u) {
+ assert(u);
+
+ while (u->load_state == UNIT_MERGED)
+ assert_se(u = u->merged_into);
+
+ return u;
+}
+
+int unit_add_exec_dependencies(Unit *u, ExecContext *c) {
+ int r;
+
+ assert(u);
+ assert(c);
+
+ if (c->working_directory && !c->working_directory_missing_ok) {
+ r = unit_require_mounts_for(u, c->working_directory, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+
+ if (c->root_directory) {
+ r = unit_require_mounts_for(u, c->root_directory, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+
+ if (c->root_image) {
+ r = unit_require_mounts_for(u, c->root_image, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+
+ for (ExecDirectoryType dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
+ if (!u->manager->prefix[dt])
+ continue;
+
+ char **dp;
+ STRV_FOREACH(dp, c->directories[dt].paths) {
+ _cleanup_free_ char *p;
+
+ p = path_join(u->manager->prefix[dt], *dp);
+ if (!p)
+ return -ENOMEM;
+
+ r = unit_require_mounts_for(u, p, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (!MANAGER_IS_SYSTEM(u->manager))
+ return 0;
+
+ /* For the following three directory types we need write access, and /var/ is possibly on the root
+ * fs. Hence order after systemd-remount-fs.service, to ensure things are writable. */
+ if (!strv_isempty(c->directories[EXEC_DIRECTORY_STATE].paths) ||
+ !strv_isempty(c->directories[EXEC_DIRECTORY_CACHE].paths) ||
+ !strv_isempty(c->directories[EXEC_DIRECTORY_LOGS].paths)) {
+ r = unit_add_dependency_by_name(u, UNIT_AFTER, SPECIAL_REMOUNT_FS_SERVICE, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+
+ if (c->private_tmp) {
+ const char *p;
+
+ FOREACH_STRING(p, "/tmp", "/var/tmp") {
+ r = unit_require_mounts_for(u, p, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+
+ r = unit_add_dependency_by_name(u, UNIT_AFTER, SPECIAL_TMPFILES_SETUP_SERVICE, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+
+ if (c->root_image) {
+ /* We need to wait for /dev/loopX to appear when doing RootImage=, hence let's add an
+ * implicit dependency on udev */
+
+ r = unit_add_dependency_by_name(u, UNIT_AFTER, SPECIAL_UDEVD_SERVICE, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+
+ if (!IN_SET(c->std_output,
+ EXEC_OUTPUT_JOURNAL, EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
+ EXEC_OUTPUT_KMSG, EXEC_OUTPUT_KMSG_AND_CONSOLE) &&
+ !IN_SET(c->std_error,
+ EXEC_OUTPUT_JOURNAL, EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
+ EXEC_OUTPUT_KMSG, EXEC_OUTPUT_KMSG_AND_CONSOLE) &&
+ !c->log_namespace)
+ return 0;
+
+ /* If syslog or kernel logging is requested (or log namespacing is), make sure our own logging daemon
+ * is run first. */
+
+ if (c->log_namespace) {
+ _cleanup_free_ char *socket_unit = NULL, *varlink_socket_unit = NULL;
+
+ r = unit_name_build_from_type("systemd-journald", c->log_namespace, UNIT_SOCKET, &socket_unit);
+ if (r < 0)
+ return r;
+
+ r = unit_add_two_dependencies_by_name(u, UNIT_AFTER, UNIT_REQUIRES, socket_unit, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+
+ r = unit_name_build_from_type("systemd-journald-varlink", c->log_namespace, UNIT_SOCKET, &varlink_socket_unit);
+ if (r < 0)
+ return r;
+
+ r = unit_add_two_dependencies_by_name(u, UNIT_AFTER, UNIT_REQUIRES, varlink_socket_unit, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ } else
+ r = unit_add_dependency_by_name(u, UNIT_AFTER, SPECIAL_JOURNALD_SOCKET, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+const char *unit_description(Unit *u) {
+ assert(u);
+
+ if (u->description)
+ return u->description;
+
+ return strna(u->id);
+}
+
+const char *unit_status_string(Unit *u) {
+ assert(u);
+
+ if (u->manager->status_unit_format == STATUS_UNIT_FORMAT_NAME && u->id)
+ return u->id;
+
+ return unit_description(u);
+}
+
+static void print_unit_dependency_mask(FILE *f, const char *kind, UnitDependencyMask mask, bool *space) {
+ const struct {
+ UnitDependencyMask mask;
+ const char *name;
+ } table[] = {
+ { UNIT_DEPENDENCY_FILE, "file" },
+ { UNIT_DEPENDENCY_IMPLICIT, "implicit" },
+ { UNIT_DEPENDENCY_DEFAULT, "default" },
+ { UNIT_DEPENDENCY_UDEV, "udev" },
+ { UNIT_DEPENDENCY_PATH, "path" },
+ { UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT, "mountinfo-implicit" },
+ { UNIT_DEPENDENCY_MOUNTINFO_DEFAULT, "mountinfo-default" },
+ { UNIT_DEPENDENCY_PROC_SWAP, "proc-swap" },
+ };
+
+ assert(f);
+ assert(kind);
+ assert(space);
+
+ for (size_t i = 0; i < ELEMENTSOF(table); i++) {
+
+ if (mask == 0)
+ break;
+
+ if (FLAGS_SET(mask, table[i].mask)) {
+ if (*space)
+ fputc(' ', f);
+ else
+ *space = true;
+
+ fputs(kind, f);
+ fputs("-", f);
+ fputs(table[i].name, f);
+
+ mask &= ~table[i].mask;
+ }
+ }
+
+ assert(mask == 0);
+}
+
+void unit_dump(Unit *u, FILE *f, const char *prefix) {
+ char *t, **j;
+ const char *prefix2;
+ char timestamp[5][FORMAT_TIMESTAMP_MAX], timespan[FORMAT_TIMESPAN_MAX];
+ Unit *following;
+ _cleanup_set_free_ Set *following_set = NULL;
+ CGroupMask m;
+ int r;
+
+ assert(u);
+ assert(u->type >= 0);
+
+ prefix = strempty(prefix);
+ prefix2 = strjoina(prefix, "\t");
+
+ fprintf(f,
+ "%s-> Unit %s:\n",
+ prefix, u->id);
+
+ SET_FOREACH(t, u->aliases)
+ fprintf(f, "%s\tAlias: %s\n", prefix, t);
+
+ fprintf(f,
+ "%s\tDescription: %s\n"
+ "%s\tInstance: %s\n"
+ "%s\tUnit Load State: %s\n"
+ "%s\tUnit Active State: %s\n"
+ "%s\tState Change Timestamp: %s\n"
+ "%s\tInactive Exit Timestamp: %s\n"
+ "%s\tActive Enter Timestamp: %s\n"
+ "%s\tActive Exit Timestamp: %s\n"
+ "%s\tInactive Enter Timestamp: %s\n"
+ "%s\tMay GC: %s\n"
+ "%s\tNeed Daemon Reload: %s\n"
+ "%s\tTransient: %s\n"
+ "%s\tPerpetual: %s\n"
+ "%s\tGarbage Collection Mode: %s\n"
+ "%s\tSlice: %s\n"
+ "%s\tCGroup: %s\n"
+ "%s\tCGroup realized: %s\n",
+ prefix, unit_description(u),
+ prefix, strna(u->instance),
+ prefix, unit_load_state_to_string(u->load_state),
+ prefix, unit_active_state_to_string(unit_active_state(u)),
+ prefix, strna(format_timestamp(timestamp[0], sizeof(timestamp[0]), u->state_change_timestamp.realtime)),
+ prefix, strna(format_timestamp(timestamp[1], sizeof(timestamp[1]), u->inactive_exit_timestamp.realtime)),
+ prefix, strna(format_timestamp(timestamp[2], sizeof(timestamp[2]), u->active_enter_timestamp.realtime)),
+ prefix, strna(format_timestamp(timestamp[3], sizeof(timestamp[3]), u->active_exit_timestamp.realtime)),
+ prefix, strna(format_timestamp(timestamp[4], sizeof(timestamp[4]), u->inactive_enter_timestamp.realtime)),
+ prefix, yes_no(unit_may_gc(u)),
+ prefix, yes_no(unit_need_daemon_reload(u)),
+ prefix, yes_no(u->transient),
+ prefix, yes_no(u->perpetual),
+ prefix, collect_mode_to_string(u->collect_mode),
+ prefix, strna(unit_slice_name(u)),
+ prefix, strna(u->cgroup_path),
+ prefix, yes_no(u->cgroup_realized));
+
+ if (u->cgroup_realized_mask != 0) {
+ _cleanup_free_ char *s = NULL;
+ (void) cg_mask_to_string(u->cgroup_realized_mask, &s);
+ fprintf(f, "%s\tCGroup realized mask: %s\n", prefix, strnull(s));
+ }
+
+ if (u->cgroup_enabled_mask != 0) {
+ _cleanup_free_ char *s = NULL;
+ (void) cg_mask_to_string(u->cgroup_enabled_mask, &s);
+ fprintf(f, "%s\tCGroup enabled mask: %s\n", prefix, strnull(s));
+ }
+
+ m = unit_get_own_mask(u);
+ if (m != 0) {
+ _cleanup_free_ char *s = NULL;
+ (void) cg_mask_to_string(m, &s);
+ fprintf(f, "%s\tCGroup own mask: %s\n", prefix, strnull(s));
+ }
+
+ m = unit_get_members_mask(u);
+ if (m != 0) {
+ _cleanup_free_ char *s = NULL;
+ (void) cg_mask_to_string(m, &s);
+ fprintf(f, "%s\tCGroup members mask: %s\n", prefix, strnull(s));
+ }
+
+ m = unit_get_delegate_mask(u);
+ if (m != 0) {
+ _cleanup_free_ char *s = NULL;
+ (void) cg_mask_to_string(m, &s);
+ fprintf(f, "%s\tCGroup delegate mask: %s\n", prefix, strnull(s));
+ }
+
+ if (!sd_id128_is_null(u->invocation_id))
+ fprintf(f, "%s\tInvocation ID: " SD_ID128_FORMAT_STR "\n",
+ prefix, SD_ID128_FORMAT_VAL(u->invocation_id));
+
+ STRV_FOREACH(j, u->documentation)
+ fprintf(f, "%s\tDocumentation: %s\n", prefix, *j);
+
+ following = unit_following(u);
+ if (following)
+ fprintf(f, "%s\tFollowing: %s\n", prefix, following->id);
+
+ r = unit_following_set(u, &following_set);
+ if (r >= 0) {
+ Unit *other;
+
+ SET_FOREACH(other, following_set)
+ fprintf(f, "%s\tFollowing Set Member: %s\n", prefix, other->id);
+ }
+
+ if (u->fragment_path)
+ fprintf(f, "%s\tFragment Path: %s\n", prefix, u->fragment_path);
+
+ if (u->source_path)
+ fprintf(f, "%s\tSource Path: %s\n", prefix, u->source_path);
+
+ STRV_FOREACH(j, u->dropin_paths)
+ fprintf(f, "%s\tDropIn Path: %s\n", prefix, *j);
+
+ if (u->failure_action != EMERGENCY_ACTION_NONE)
+ fprintf(f, "%s\tFailure Action: %s\n", prefix, emergency_action_to_string(u->failure_action));
+ if (u->failure_action_exit_status >= 0)
+ fprintf(f, "%s\tFailure Action Exit Status: %i\n", prefix, u->failure_action_exit_status);
+ if (u->success_action != EMERGENCY_ACTION_NONE)
+ fprintf(f, "%s\tSuccess Action: %s\n", prefix, emergency_action_to_string(u->success_action));
+ if (u->success_action_exit_status >= 0)
+ fprintf(f, "%s\tSuccess Action Exit Status: %i\n", prefix, u->success_action_exit_status);
+
+ if (u->job_timeout != USEC_INFINITY)
+ fprintf(f, "%s\tJob Timeout: %s\n", prefix, format_timespan(timespan, sizeof(timespan), u->job_timeout, 0));
+
+ if (u->job_timeout_action != EMERGENCY_ACTION_NONE)
+ fprintf(f, "%s\tJob Timeout Action: %s\n", prefix, emergency_action_to_string(u->job_timeout_action));
+
+ if (u->job_timeout_reboot_arg)
+ fprintf(f, "%s\tJob Timeout Reboot Argument: %s\n", prefix, u->job_timeout_reboot_arg);
+
+ condition_dump_list(u->conditions, f, prefix, condition_type_to_string);
+ condition_dump_list(u->asserts, f, prefix, assert_type_to_string);
+
+ if (dual_timestamp_is_set(&u->condition_timestamp))
+ fprintf(f,
+ "%s\tCondition Timestamp: %s\n"
+ "%s\tCondition Result: %s\n",
+ prefix, strna(format_timestamp(timestamp[0], sizeof(timestamp[0]), u->condition_timestamp.realtime)),
+ prefix, yes_no(u->condition_result));
+
+ if (dual_timestamp_is_set(&u->assert_timestamp))
+ fprintf(f,
+ "%s\tAssert Timestamp: %s\n"
+ "%s\tAssert Result: %s\n",
+ prefix, strna(format_timestamp(timestamp[0], sizeof(timestamp[0]), u->assert_timestamp.realtime)),
+ prefix, yes_no(u->assert_result));
+
+ for (UnitDependency d = 0; d < _UNIT_DEPENDENCY_MAX; d++) {
+ UnitDependencyInfo di;
+ Unit *other;
+
+ HASHMAP_FOREACH_KEY(di.data, other, u->dependencies[d]) {
+ bool space = false;
+
+ fprintf(f, "%s\t%s: %s (", prefix, unit_dependency_to_string(d), other->id);
+
+ print_unit_dependency_mask(f, "origin", di.origin_mask, &space);
+ print_unit_dependency_mask(f, "destination", di.destination_mask, &space);
+
+ fputs(")\n", f);
+ }
+ }
+
+ if (!hashmap_isempty(u->requires_mounts_for)) {
+ UnitDependencyInfo di;
+ const char *path;
+
+ HASHMAP_FOREACH_KEY(di.data, path, u->requires_mounts_for) {
+ bool space = false;
+
+ fprintf(f, "%s\tRequiresMountsFor: %s (", prefix, path);
+
+ print_unit_dependency_mask(f, "origin", di.origin_mask, &space);
+ print_unit_dependency_mask(f, "destination", di.destination_mask, &space);
+
+ fputs(")\n", f);
+ }
+ }
+
+ if (u->load_state == UNIT_LOADED) {
+
+ fprintf(f,
+ "%s\tStopWhenUnneeded: %s\n"
+ "%s\tRefuseManualStart: %s\n"
+ "%s\tRefuseManualStop: %s\n"
+ "%s\tDefaultDependencies: %s\n"
+ "%s\tOnFailureJobMode: %s\n"
+ "%s\tIgnoreOnIsolate: %s\n",
+ prefix, yes_no(u->stop_when_unneeded),
+ prefix, yes_no(u->refuse_manual_start),
+ prefix, yes_no(u->refuse_manual_stop),
+ prefix, yes_no(u->default_dependencies),
+ prefix, job_mode_to_string(u->on_failure_job_mode),
+ prefix, yes_no(u->ignore_on_isolate));
+
+ if (UNIT_VTABLE(u)->dump)
+ UNIT_VTABLE(u)->dump(u, f, prefix2);
+
+ } else if (u->load_state == UNIT_MERGED)
+ fprintf(f,
+ "%s\tMerged into: %s\n",
+ prefix, u->merged_into->id);
+ else if (u->load_state == UNIT_ERROR)
+ fprintf(f, "%s\tLoad Error Code: %s\n", prefix, strerror_safe(u->load_error));
+
+ for (const char *n = sd_bus_track_first(u->bus_track); n; n = sd_bus_track_next(u->bus_track))
+ fprintf(f, "%s\tBus Ref: %s\n", prefix, n);
+
+ if (u->job)
+ job_dump(u->job, f, prefix2);
+
+ if (u->nop_job)
+ job_dump(u->nop_job, f, prefix2);
+}
+
+/* Common implementation for multiple backends */
+int unit_load_fragment_and_dropin(Unit *u, bool fragment_required) {
+ int r;
+
+ assert(u);
+
+ /* Load a .{service,socket,...} file */
+ r = unit_load_fragment(u);
+ if (r < 0)
+ return r;
+
+ if (u->load_state == UNIT_STUB) {
+ if (fragment_required)
+ return -ENOENT;
+
+ u->load_state = UNIT_LOADED;
+ }
+
+ /* Load drop-in directory data. If u is an alias, we might be reloading the
+ * target unit needlessly. But we cannot be sure which drops-ins have already
+ * been loaded and which not, at least without doing complicated book-keeping,
+ * so let's always reread all drop-ins. */
+ r = unit_load_dropin(unit_follow_merge(u));
+ if (r < 0)
+ return r;
+
+ if (u->source_path) {
+ struct stat st;
+
+ if (stat(u->source_path, &st) >= 0)
+ u->source_mtime = timespec_load(&st.st_mtim);
+ else
+ u->source_mtime = 0;
+ }
+
+ return 0;
+}
+
+void unit_add_to_target_deps_queue(Unit *u) {
+ Manager *m = u->manager;
+
+ assert(u);
+
+ if (u->in_target_deps_queue)
+ return;
+
+ LIST_PREPEND(target_deps_queue, m->target_deps_queue, u);
+ u->in_target_deps_queue = true;
+}
+
+int unit_add_default_target_dependency(Unit *u, Unit *target) {
+ assert(u);
+ assert(target);
+
+ if (target->type != UNIT_TARGET)
+ return 0;
+
+ /* Only add the dependency if both units are loaded, so that
+ * that loop check below is reliable */
+ if (u->load_state != UNIT_LOADED ||
+ target->load_state != UNIT_LOADED)
+ return 0;
+
+ /* If either side wants no automatic dependencies, then let's
+ * skip this */
+ if (!u->default_dependencies ||
+ !target->default_dependencies)
+ return 0;
+
+ /* Don't create loops */
+ if (hashmap_get(target->dependencies[UNIT_BEFORE], u))
+ return 0;
+
+ return unit_add_dependency(target, UNIT_AFTER, u, true, UNIT_DEPENDENCY_DEFAULT);
+}
+
+static int unit_add_slice_dependencies(Unit *u) {
+ assert(u);
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return 0;
+
+ /* Slice units are implicitly ordered against their parent slices (as this relationship is encoded in the
+ name), while all other units are ordered based on configuration (as in their case Slice= configures the
+ relationship). */
+ UnitDependencyMask mask = u->type == UNIT_SLICE ? UNIT_DEPENDENCY_IMPLICIT : UNIT_DEPENDENCY_FILE;
+
+ if (UNIT_ISSET(u->slice))
+ return unit_add_two_dependencies(u, UNIT_AFTER, UNIT_REQUIRES, UNIT_DEREF(u->slice), true, mask);
+
+ if (unit_has_name(u, SPECIAL_ROOT_SLICE))
+ return 0;
+
+ return unit_add_two_dependencies_by_name(u, UNIT_AFTER, UNIT_REQUIRES, SPECIAL_ROOT_SLICE, true, mask);
+}
+
+static int unit_add_mount_dependencies(Unit *u) {
+ UnitDependencyInfo di;
+ const char *path;
+ int r;
+
+ assert(u);
+
+ HASHMAP_FOREACH_KEY(di.data, path, u->requires_mounts_for) {
+ char prefix[strlen(path) + 1];
+
+ PATH_FOREACH_PREFIX_MORE(prefix, path) {
+ _cleanup_free_ char *p = NULL;
+ Unit *m;
+
+ r = unit_name_from_path(prefix, ".mount", &p);
+ if (r < 0)
+ return r;
+
+ m = manager_get_unit(u->manager, p);
+ if (!m) {
+ /* Make sure to load the mount unit if
+ * it exists. If so the dependencies
+ * on this unit will be added later
+ * during the loading of the mount
+ * unit. */
+ (void) manager_load_unit_prepare(u->manager, p, NULL, NULL, &m);
+ continue;
+ }
+ if (m == u)
+ continue;
+
+ if (m->load_state != UNIT_LOADED)
+ continue;
+
+ r = unit_add_dependency(u, UNIT_AFTER, m, true, di.origin_mask);
+ if (r < 0)
+ return r;
+
+ if (m->fragment_path) {
+ r = unit_add_dependency(u, UNIT_REQUIRES, m, true, di.origin_mask);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int unit_add_oomd_dependencies(Unit *u) {
+ CGroupContext *c;
+ bool wants_oomd;
+ int r;
+
+ assert(u);
+
+ if (!u->default_dependencies)
+ return 0;
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return 0;
+
+ wants_oomd = (c->moom_swap == MANAGED_OOM_KILL || c->moom_mem_pressure == MANAGED_OOM_KILL);
+ if (!wants_oomd)
+ return 0;
+
+ r = unit_add_two_dependencies_by_name(u, UNIT_AFTER, UNIT_WANTS, "systemd-oomd.service", true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int unit_add_startup_units(Unit *u) {
+ CGroupContext *c;
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return 0;
+
+ if (c->startup_cpu_shares == CGROUP_CPU_SHARES_INVALID &&
+ c->startup_io_weight == CGROUP_WEIGHT_INVALID &&
+ c->startup_blockio_weight == CGROUP_BLKIO_WEIGHT_INVALID)
+ return 0;
+
+ return set_ensure_put(&u->manager->startup_units, NULL, u);
+}
+
+int unit_load(Unit *u) {
+ int r;
+
+ assert(u);
+
+ if (u->in_load_queue) {
+ LIST_REMOVE(load_queue, u->manager->load_queue, u);
+ u->in_load_queue = false;
+ }
+
+ if (u->type == _UNIT_TYPE_INVALID)
+ return -EINVAL;
+
+ if (u->load_state != UNIT_STUB)
+ return 0;
+
+ if (u->transient_file) {
+ /* Finalize transient file: if this is a transient unit file, as soon as we reach unit_load() the setup
+ * is complete, hence let's synchronize the unit file we just wrote to disk. */
+
+ r = fflush_and_check(u->transient_file);
+ if (r < 0)
+ goto fail;
+
+ u->transient_file = safe_fclose(u->transient_file);
+ u->fragment_mtime = now(CLOCK_REALTIME);
+ }
+
+ r = UNIT_VTABLE(u)->load(u);
+ if (r < 0)
+ goto fail;
+
+ assert(u->load_state != UNIT_STUB);
+
+ if (u->load_state == UNIT_LOADED) {
+ unit_add_to_target_deps_queue(u);
+
+ r = unit_add_slice_dependencies(u);
+ if (r < 0)
+ goto fail;
+
+ r = unit_add_mount_dependencies(u);
+ if (r < 0)
+ goto fail;
+
+ r = unit_add_oomd_dependencies(u);
+ if (r < 0)
+ goto fail;
+
+ r = unit_add_startup_units(u);
+ if (r < 0)
+ goto fail;
+
+ if (u->on_failure_job_mode == JOB_ISOLATE && hashmap_size(u->dependencies[UNIT_ON_FAILURE]) > 1) {
+ log_unit_error(u, "More than one OnFailure= dependencies specified but OnFailureJobMode=isolate set. Refusing.");
+ r = -ENOEXEC;
+ goto fail;
+ }
+
+ if (u->job_running_timeout != USEC_INFINITY && u->job_running_timeout > u->job_timeout)
+ log_unit_warning(u, "JobRunningTimeoutSec= is greater than JobTimeoutSec=, it has no effect.");
+
+ /* We finished loading, let's ensure our parents recalculate the members mask */
+ unit_invalidate_cgroup_members_masks(u);
+ }
+
+ assert((u->load_state != UNIT_MERGED) == !u->merged_into);
+
+ unit_add_to_dbus_queue(unit_follow_merge(u));
+ unit_add_to_gc_queue(u);
+ (void) manager_varlink_send_managed_oom_update(u);
+
+ return 0;
+
+fail:
+ /* We convert ENOEXEC errors to the UNIT_BAD_SETTING load state here. Configuration parsing code
+ * should hence return ENOEXEC to ensure units are placed in this state after loading. */
+
+ u->load_state = u->load_state == UNIT_STUB ? UNIT_NOT_FOUND :
+ r == -ENOEXEC ? UNIT_BAD_SETTING :
+ UNIT_ERROR;
+ u->load_error = r;
+
+ /* Record the timestamp on the cache, so that if the cache gets updated between now and the next time
+ * an attempt is made to load this unit, we know we need to check again. */
+ if (u->load_state == UNIT_NOT_FOUND)
+ u->fragment_not_found_timestamp_hash = u->manager->unit_cache_timestamp_hash;
+
+ unit_add_to_dbus_queue(u);
+ unit_add_to_gc_queue(u);
+
+ return log_unit_debug_errno(u, r, "Failed to load configuration: %m");
+}
+
+_printf_(7, 8)
+static int log_unit_internal(void *userdata, int level, int error, const char *file, int line, const char *func, const char *format, ...) {
+ Unit *u = userdata;
+ va_list ap;
+ int r;
+
+ va_start(ap, format);
+ if (u)
+ r = log_object_internalv(level, error, file, line, func,
+ u->manager->unit_log_field,
+ u->id,
+ u->manager->invocation_log_field,
+ u->invocation_id_string,
+ format, ap);
+ else
+ r = log_internalv(level, error, file, line, func, format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+static bool unit_test_condition(Unit *u) {
+ _cleanup_strv_free_ char **env = NULL;
+ int r;
+
+ assert(u);
+
+ dual_timestamp_get(&u->condition_timestamp);
+
+ r = manager_get_effective_environment(u->manager, &env);
+ if (r < 0) {
+ log_unit_error_errno(u, r, "Failed to determine effective environment: %m");
+ u->condition_result = CONDITION_ERROR;
+ } else
+ u->condition_result = condition_test_list(
+ u->conditions,
+ env,
+ condition_type_to_string,
+ log_unit_internal,
+ u);
+
+ unit_add_to_dbus_queue(u);
+ return u->condition_result;
+}
+
+static bool unit_test_assert(Unit *u) {
+ _cleanup_strv_free_ char **env = NULL;
+ int r;
+
+ assert(u);
+
+ dual_timestamp_get(&u->assert_timestamp);
+
+ r = manager_get_effective_environment(u->manager, &env);
+ if (r < 0) {
+ log_unit_error_errno(u, r, "Failed to determine effective environment: %m");
+ u->assert_result = CONDITION_ERROR;
+ } else
+ u->assert_result = condition_test_list(
+ u->asserts,
+ env,
+ assert_type_to_string,
+ log_unit_internal,
+ u);
+
+ unit_add_to_dbus_queue(u);
+ return u->assert_result;
+}
+
+void unit_status_printf(Unit *u, StatusType status_type, const char *status, const char *unit_status_msg_format) {
+ const char *d;
+
+ d = unit_status_string(u);
+ if (log_get_show_color())
+ d = strjoina(ANSI_HIGHLIGHT, d, ANSI_NORMAL);
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ manager_status_printf(u->manager, status_type, status, unit_status_msg_format, d);
+ REENABLE_WARNING;
+}
+
+int unit_test_start_limit(Unit *u) {
+ const char *reason;
+
+ assert(u);
+
+ if (ratelimit_below(&u->start_ratelimit)) {
+ u->start_limit_hit = false;
+ return 0;
+ }
+
+ log_unit_warning(u, "Start request repeated too quickly.");
+ u->start_limit_hit = true;
+
+ reason = strjoina("unit ", u->id, " failed");
+
+ emergency_action(u->manager, u->start_limit_action,
+ EMERGENCY_ACTION_IS_WATCHDOG|EMERGENCY_ACTION_WARN,
+ u->reboot_arg, -1, reason);
+
+ return -ECANCELED;
+}
+
+bool unit_shall_confirm_spawn(Unit *u) {
+ assert(u);
+
+ if (manager_is_confirm_spawn_disabled(u->manager))
+ return false;
+
+ /* For some reasons units remaining in the same process group
+ * as PID 1 fail to acquire the console even if it's not used
+ * by any process. So skip the confirmation question for them. */
+ return !unit_get_exec_context(u)->same_pgrp;
+}
+
+static bool unit_verify_deps(Unit *u) {
+ Unit *other;
+ void *v;
+
+ assert(u);
+
+ /* Checks whether all BindsTo= dependencies of this unit are fulfilled — if they are also combined with
+ * After=. We do not check Requires= or Requisite= here as they only should have an effect on the job
+ * processing, but do not have any effect afterwards. We don't check BindsTo= dependencies that are not used in
+ * conjunction with After= as for them any such check would make things entirely racy. */
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_BINDS_TO]) {
+
+ if (!hashmap_contains(u->dependencies[UNIT_AFTER], other))
+ continue;
+
+ if (!UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(other))) {
+ log_unit_notice(u, "Bound to unit %s, but unit isn't active.", other->id);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/* Errors that aren't really errors:
+ * -EALREADY: Unit is already started.
+ * -ECOMM: Condition failed
+ * -EAGAIN: An operation is already in progress. Retry later.
+ *
+ * Errors that are real errors:
+ * -EBADR: This unit type does not support starting.
+ * -ECANCELED: Start limit hit, too many requests for now
+ * -EPROTO: Assert failed
+ * -EINVAL: Unit not loaded
+ * -EOPNOTSUPP: Unit type not supported
+ * -ENOLINK: The necessary dependencies are not fulfilled.
+ * -ESTALE: This unit has been started before and can't be started a second time
+ * -ENOENT: This is a triggering unit and unit to trigger is not loaded
+ */
+int unit_start(Unit *u) {
+ UnitActiveState state;
+ Unit *following;
+
+ assert(u);
+
+ /* If this is already started, then this will succeed. Note that this will even succeed if this unit
+ * is not startable by the user. This is relied on to detect when we need to wait for units and when
+ * waiting is finished. */
+ state = unit_active_state(u);
+ if (UNIT_IS_ACTIVE_OR_RELOADING(state))
+ return -EALREADY;
+ if (state == UNIT_MAINTENANCE)
+ return -EAGAIN;
+
+ /* Units that aren't loaded cannot be started */
+ if (u->load_state != UNIT_LOADED)
+ return -EINVAL;
+
+ /* Refuse starting scope units more than once */
+ if (UNIT_VTABLE(u)->once_only && dual_timestamp_is_set(&u->inactive_enter_timestamp))
+ return -ESTALE;
+
+ /* If the conditions failed, don't do anything at all. If we already are activating this call might
+ * still be useful to speed up activation in case there is some hold-off time, but we don't want to
+ * recheck the condition in that case. */
+ if (state != UNIT_ACTIVATING &&
+ !unit_test_condition(u))
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(ECOMM), "Starting requested but condition failed. Not starting unit.");
+
+ /* If the asserts failed, fail the entire job */
+ if (state != UNIT_ACTIVATING &&
+ !unit_test_assert(u))
+ return log_unit_notice_errno(u, SYNTHETIC_ERRNO(EPROTO), "Starting requested but asserts failed.");
+
+ /* Units of types that aren't supported cannot be started. Note that we do this test only after the
+ * condition checks, so that we rather return condition check errors (which are usually not
+ * considered a true failure) than "not supported" errors (which are considered a failure).
+ */
+ if (!unit_type_supported(u->type))
+ return -EOPNOTSUPP;
+
+ /* Let's make sure that the deps really are in order before we start this. Normally the job engine
+ * should have taken care of this already, but let's check this here again. After all, our
+ * dependencies might not be in effect anymore, due to a reload or due to a failed condition. */
+ if (!unit_verify_deps(u))
+ return -ENOLINK;
+
+ /* Forward to the main object, if we aren't it. */
+ following = unit_following(u);
+ if (following) {
+ log_unit_debug(u, "Redirecting start request from %s to %s.", u->id, following->id);
+ return unit_start(following);
+ }
+
+ /* If it is stopped, but we cannot start it, then fail */
+ if (!UNIT_VTABLE(u)->start)
+ return -EBADR;
+
+ /* We don't suppress calls to ->start() here when we are already starting, to allow this request to
+ * be used as a "hurry up" call, for example when the unit is in some "auto restart" state where it
+ * waits for a holdoff timer to elapse before it will start again. */
+
+ unit_add_to_dbus_queue(u);
+ unit_cgroup_freezer_action(u, FREEZER_THAW);
+
+ return UNIT_VTABLE(u)->start(u);
+}
+
+bool unit_can_start(Unit *u) {
+ assert(u);
+
+ if (u->load_state != UNIT_LOADED)
+ return false;
+
+ if (!unit_type_supported(u->type))
+ return false;
+
+ /* Scope units may be started only once */
+ if (UNIT_VTABLE(u)->once_only && dual_timestamp_is_set(&u->inactive_exit_timestamp))
+ return false;
+
+ return !!UNIT_VTABLE(u)->start;
+}
+
+bool unit_can_isolate(Unit *u) {
+ assert(u);
+
+ return unit_can_start(u) &&
+ u->allow_isolate;
+}
+
+/* Errors:
+ * -EBADR: This unit type does not support stopping.
+ * -EALREADY: Unit is already stopped.
+ * -EAGAIN: An operation is already in progress. Retry later.
+ */
+int unit_stop(Unit *u) {
+ UnitActiveState state;
+ Unit *following;
+
+ assert(u);
+
+ state = unit_active_state(u);
+ if (UNIT_IS_INACTIVE_OR_FAILED(state))
+ return -EALREADY;
+
+ following = unit_following(u);
+ if (following) {
+ log_unit_debug(u, "Redirecting stop request from %s to %s.", u->id, following->id);
+ return unit_stop(following);
+ }
+
+ if (!UNIT_VTABLE(u)->stop)
+ return -EBADR;
+
+ unit_add_to_dbus_queue(u);
+ unit_cgroup_freezer_action(u, FREEZER_THAW);
+
+ return UNIT_VTABLE(u)->stop(u);
+}
+
+bool unit_can_stop(Unit *u) {
+ assert(u);
+
+ /* Note: if we return true here, it does not mean that the unit may be successfully stopped.
+ * Extrinsic units follow external state and they may stop following external state changes
+ * (hence we return true here), but an attempt to do this through the manager will fail. */
+
+ if (!unit_type_supported(u->type))
+ return false;
+
+ if (u->perpetual)
+ return false;
+
+ return !!UNIT_VTABLE(u)->stop;
+}
+
+/* Errors:
+ * -EBADR: This unit type does not support reloading.
+ * -ENOEXEC: Unit is not started.
+ * -EAGAIN: An operation is already in progress. Retry later.
+ */
+int unit_reload(Unit *u) {
+ UnitActiveState state;
+ Unit *following;
+
+ assert(u);
+
+ if (u->load_state != UNIT_LOADED)
+ return -EINVAL;
+
+ if (!unit_can_reload(u))
+ return -EBADR;
+
+ state = unit_active_state(u);
+ if (state == UNIT_RELOADING)
+ return -EAGAIN;
+
+ if (state != UNIT_ACTIVE) {
+ log_unit_warning(u, "Unit cannot be reloaded because it is inactive.");
+ return -ENOEXEC;
+ }
+
+ following = unit_following(u);
+ if (following) {
+ log_unit_debug(u, "Redirecting reload request from %s to %s.", u->id, following->id);
+ return unit_reload(following);
+ }
+
+ unit_add_to_dbus_queue(u);
+
+ if (!UNIT_VTABLE(u)->reload) {
+ /* Unit doesn't have a reload function, but we need to propagate the reload anyway */
+ unit_notify(u, unit_active_state(u), unit_active_state(u), 0);
+ return 0;
+ }
+
+ unit_cgroup_freezer_action(u, FREEZER_THAW);
+
+ return UNIT_VTABLE(u)->reload(u);
+}
+
+bool unit_can_reload(Unit *u) {
+ assert(u);
+
+ if (UNIT_VTABLE(u)->can_reload)
+ return UNIT_VTABLE(u)->can_reload(u);
+
+ if (!hashmap_isempty(u->dependencies[UNIT_PROPAGATES_RELOAD_TO]))
+ return true;
+
+ return UNIT_VTABLE(u)->reload;
+}
+
+bool unit_is_unneeded(Unit *u) {
+ static const UnitDependency deps[] = {
+ UNIT_REQUIRED_BY,
+ UNIT_REQUISITE_OF,
+ UNIT_WANTED_BY,
+ UNIT_BOUND_BY,
+ };
+
+ assert(u);
+
+ if (!u->stop_when_unneeded)
+ return false;
+
+ /* Don't clean up while the unit is transitioning or is even inactive. */
+ if (!UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u)))
+ return false;
+ if (u->job)
+ return false;
+
+ for (size_t j = 0; j < ELEMENTSOF(deps); j++) {
+ Unit *other;
+ void *v;
+
+ /* If a dependent unit has a job queued, is active or transitioning, or is marked for
+ * restart, then don't clean this one up. */
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[deps[j]]) {
+ if (other->job)
+ return false;
+
+ if (!UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(other)))
+ return false;
+
+ if (unit_will_restart(other))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void check_unneeded_dependencies(Unit *u) {
+
+ static const UnitDependency deps[] = {
+ UNIT_REQUIRES,
+ UNIT_REQUISITE,
+ UNIT_WANTS,
+ UNIT_BINDS_TO,
+ };
+
+ assert(u);
+
+ /* Add all units this unit depends on to the queue that processes StopWhenUnneeded= behaviour. */
+
+ for (size_t j = 0; j < ELEMENTSOF(deps); j++) {
+ Unit *other;
+ void *v;
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[deps[j]])
+ unit_submit_to_stop_when_unneeded_queue(other);
+ }
+}
+
+static void unit_check_binds_to(Unit *u) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ bool stop = false;
+ Unit *other;
+ void *v;
+ int r;
+
+ assert(u);
+
+ if (u->job)
+ return;
+
+ if (unit_active_state(u) != UNIT_ACTIVE)
+ return;
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_BINDS_TO]) {
+ if (other->job)
+ continue;
+
+ if (!other->coldplugged)
+ /* We might yet create a job for the other unit… */
+ continue;
+
+ if (!UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(other)))
+ continue;
+
+ stop = true;
+ break;
+ }
+
+ if (!stop)
+ return;
+
+ /* If stopping a unit fails continuously we might enter a stop
+ * loop here, hence stop acting on the service being
+ * unnecessary after a while. */
+ if (!ratelimit_below(&u->auto_stop_ratelimit)) {
+ log_unit_warning(u, "Unit is bound to inactive unit %s, but not stopping since we tried this too often recently.", other->id);
+ return;
+ }
+
+ assert(other);
+ log_unit_info(u, "Unit is bound to inactive unit %s. Stopping, too.", other->id);
+
+ /* A unit we need to run is gone. Sniff. Let's stop this. */
+ r = manager_add_job(u->manager, JOB_STOP, u, JOB_FAIL, NULL, &error, NULL);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to enqueue stop job, ignoring: %s", bus_error_message(&error, r));
+}
+
+static void retroactively_start_dependencies(Unit *u) {
+ Unit *other;
+ void *v;
+
+ assert(u);
+ assert(UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(u)));
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_REQUIRES])
+ if (!hashmap_get(u->dependencies[UNIT_AFTER], other) &&
+ !UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(other)))
+ manager_add_job(u->manager, JOB_START, other, JOB_REPLACE, NULL, NULL, NULL);
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_BINDS_TO])
+ if (!hashmap_get(u->dependencies[UNIT_AFTER], other) &&
+ !UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(other)))
+ manager_add_job(u->manager, JOB_START, other, JOB_REPLACE, NULL, NULL, NULL);
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_WANTS])
+ if (!hashmap_get(u->dependencies[UNIT_AFTER], other) &&
+ !UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(other)))
+ manager_add_job(u->manager, JOB_START, other, JOB_FAIL, NULL, NULL, NULL);
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_CONFLICTS])
+ if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other)))
+ manager_add_job(u->manager, JOB_STOP, other, JOB_REPLACE, NULL, NULL, NULL);
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_CONFLICTED_BY])
+ if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other)))
+ manager_add_job(u->manager, JOB_STOP, other, JOB_REPLACE, NULL, NULL, NULL);
+}
+
+static void retroactively_stop_dependencies(Unit *u) {
+ Unit *other;
+ void *v;
+
+ assert(u);
+ assert(UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(u)));
+
+ /* Pull down units which are bound to us recursively if enabled */
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_BOUND_BY])
+ if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other)))
+ manager_add_job(u->manager, JOB_STOP, other, JOB_REPLACE, NULL, NULL, NULL);
+}
+
+void unit_start_on_failure(Unit *u) {
+ Unit *other;
+ void *v;
+ int r;
+
+ assert(u);
+
+ if (hashmap_size(u->dependencies[UNIT_ON_FAILURE]) <= 0)
+ return;
+
+ log_unit_info(u, "Triggering OnFailure= dependencies.");
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_ON_FAILURE]) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ r = manager_add_job(u->manager, JOB_START, other, u->on_failure_job_mode, NULL, &error, NULL);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to enqueue OnFailure= job, ignoring: %s", bus_error_message(&error, r));
+ }
+}
+
+void unit_trigger_notify(Unit *u) {
+ Unit *other;
+ void *v;
+
+ assert(u);
+
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_TRIGGERED_BY])
+ if (UNIT_VTABLE(other)->trigger_notify)
+ UNIT_VTABLE(other)->trigger_notify(other, u);
+}
+
+static int raise_level(int log_level, bool condition_info, bool condition_notice) {
+ if (condition_notice && log_level > LOG_NOTICE)
+ return LOG_NOTICE;
+ if (condition_info && log_level > LOG_INFO)
+ return LOG_INFO;
+ return log_level;
+}
+
+static int unit_log_resources(Unit *u) {
+ struct iovec iovec[1 + _CGROUP_IP_ACCOUNTING_METRIC_MAX + _CGROUP_IO_ACCOUNTING_METRIC_MAX + 4];
+ bool any_traffic = false, have_ip_accounting = false, any_io = false, have_io_accounting = false;
+ _cleanup_free_ char *igress = NULL, *egress = NULL, *rr = NULL, *wr = NULL;
+ int log_level = LOG_DEBUG; /* May be raised if resources consumed over a threshold */
+ size_t n_message_parts = 0, n_iovec = 0;
+ char* message_parts[1 + 2 + 2 + 1], *t;
+ nsec_t nsec = NSEC_INFINITY;
+ int r;
+ const char* const ip_fields[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
+ [CGROUP_IP_INGRESS_BYTES] = "IP_METRIC_INGRESS_BYTES",
+ [CGROUP_IP_INGRESS_PACKETS] = "IP_METRIC_INGRESS_PACKETS",
+ [CGROUP_IP_EGRESS_BYTES] = "IP_METRIC_EGRESS_BYTES",
+ [CGROUP_IP_EGRESS_PACKETS] = "IP_METRIC_EGRESS_PACKETS",
+ };
+ const char* const io_fields[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = {
+ [CGROUP_IO_READ_BYTES] = "IO_METRIC_READ_BYTES",
+ [CGROUP_IO_WRITE_BYTES] = "IO_METRIC_WRITE_BYTES",
+ [CGROUP_IO_READ_OPERATIONS] = "IO_METRIC_READ_OPERATIONS",
+ [CGROUP_IO_WRITE_OPERATIONS] = "IO_METRIC_WRITE_OPERATIONS",
+ };
+
+ assert(u);
+
+ /* Invoked whenever a unit enters failed or dead state. Logs information about consumed resources if resource
+ * accounting was enabled for a unit. It does this in two ways: a friendly human readable string with reduced
+ * information and the complete data in structured fields. */
+
+ (void) unit_get_cpu_usage(u, &nsec);
+ if (nsec != NSEC_INFINITY) {
+ char buf[FORMAT_TIMESPAN_MAX] = "";
+
+ /* Format the CPU time for inclusion in the structured log message */
+ if (asprintf(&t, "CPU_USAGE_NSEC=%" PRIu64, nsec) < 0) {
+ r = log_oom();
+ goto finish;
+ }
+ iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
+
+ /* Format the CPU time for inclusion in the human language message string */
+ format_timespan(buf, sizeof(buf), nsec / NSEC_PER_USEC, USEC_PER_MSEC);
+ t = strjoin("consumed ", buf, " CPU time");
+ if (!t) {
+ r = log_oom();
+ goto finish;
+ }
+
+ message_parts[n_message_parts++] = t;
+
+ log_level = raise_level(log_level,
+ nsec > NOTICEWORTHY_CPU_NSEC,
+ nsec > MENTIONWORTHY_CPU_NSEC);
+ }
+
+ for (CGroupIOAccountingMetric k = 0; k < _CGROUP_IO_ACCOUNTING_METRIC_MAX; k++) {
+ char buf[FORMAT_BYTES_MAX] = "";
+ uint64_t value = UINT64_MAX;
+
+ assert(io_fields[k]);
+
+ (void) unit_get_io_accounting(u, k, k > 0, &value);
+ if (value == UINT64_MAX)
+ continue;
+
+ have_io_accounting = true;
+ if (value > 0)
+ any_io = true;
+
+ /* Format IO accounting data for inclusion in the structured log message */
+ if (asprintf(&t, "%s=%" PRIu64, io_fields[k], value) < 0) {
+ r = log_oom();
+ goto finish;
+ }
+ iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
+
+ /* Format the IO accounting data for inclusion in the human language message string, but only
+ * for the bytes counters (and not for the operations counters) */
+ if (k == CGROUP_IO_READ_BYTES) {
+ assert(!rr);
+ rr = strjoin("read ", format_bytes(buf, sizeof(buf), value), " from disk");
+ if (!rr) {
+ r = log_oom();
+ goto finish;
+ }
+ } else if (k == CGROUP_IO_WRITE_BYTES) {
+ assert(!wr);
+ wr = strjoin("written ", format_bytes(buf, sizeof(buf), value), " to disk");
+ if (!wr) {
+ r = log_oom();
+ goto finish;
+ }
+ }
+
+ if (IN_SET(k, CGROUP_IO_READ_BYTES, CGROUP_IO_WRITE_BYTES))
+ log_level = raise_level(log_level,
+ value > MENTIONWORTHY_IO_BYTES,
+ value > NOTICEWORTHY_IO_BYTES);
+ }
+
+ if (have_io_accounting) {
+ if (any_io) {
+ if (rr)
+ message_parts[n_message_parts++] = TAKE_PTR(rr);
+ if (wr)
+ message_parts[n_message_parts++] = TAKE_PTR(wr);
+
+ } else {
+ char *k;
+
+ k = strdup("no IO");
+ if (!k) {
+ r = log_oom();
+ goto finish;
+ }
+
+ message_parts[n_message_parts++] = k;
+ }
+ }
+
+ for (CGroupIPAccountingMetric m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
+ char buf[FORMAT_BYTES_MAX] = "";
+ uint64_t value = UINT64_MAX;
+
+ assert(ip_fields[m]);
+
+ (void) unit_get_ip_accounting(u, m, &value);
+ if (value == UINT64_MAX)
+ continue;
+
+ have_ip_accounting = true;
+ if (value > 0)
+ any_traffic = true;
+
+ /* Format IP accounting data for inclusion in the structured log message */
+ if (asprintf(&t, "%s=%" PRIu64, ip_fields[m], value) < 0) {
+ r = log_oom();
+ goto finish;
+ }
+ iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
+
+ /* Format the IP accounting data for inclusion in the human language message string, but only for the
+ * bytes counters (and not for the packets counters) */
+ if (m == CGROUP_IP_INGRESS_BYTES) {
+ assert(!igress);
+ igress = strjoin("received ", format_bytes(buf, sizeof(buf), value), " IP traffic");
+ if (!igress) {
+ r = log_oom();
+ goto finish;
+ }
+ } else if (m == CGROUP_IP_EGRESS_BYTES) {
+ assert(!egress);
+ egress = strjoin("sent ", format_bytes(buf, sizeof(buf), value), " IP traffic");
+ if (!egress) {
+ r = log_oom();
+ goto finish;
+ }
+ }
+
+ if (IN_SET(m, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_EGRESS_BYTES))
+ log_level = raise_level(log_level,
+ value > MENTIONWORTHY_IP_BYTES,
+ value > NOTICEWORTHY_IP_BYTES);
+ }
+
+ if (have_ip_accounting) {
+ if (any_traffic) {
+ if (igress)
+ message_parts[n_message_parts++] = TAKE_PTR(igress);
+ if (egress)
+ message_parts[n_message_parts++] = TAKE_PTR(egress);
+
+ } else {
+ char *k;
+
+ k = strdup("no IP traffic");
+ if (!k) {
+ r = log_oom();
+ goto finish;
+ }
+
+ message_parts[n_message_parts++] = k;
+ }
+ }
+
+ /* Is there any accounting data available at all? */
+ if (n_iovec == 0) {
+ r = 0;
+ goto finish;
+ }
+
+ if (n_message_parts == 0)
+ t = strjoina("MESSAGE=", u->id, ": Completed.");
+ else {
+ _cleanup_free_ char *joined;
+
+ message_parts[n_message_parts] = NULL;
+
+ joined = strv_join(message_parts, ", ");
+ if (!joined) {
+ r = log_oom();
+ goto finish;
+ }
+
+ joined[0] = ascii_toupper(joined[0]);
+ t = strjoina("MESSAGE=", u->id, ": ", joined, ".");
+ }
+
+ /* The following four fields we allocate on the stack or are static strings, we hence don't want to free them,
+ * and hence don't increase n_iovec for them */
+ iovec[n_iovec] = IOVEC_MAKE_STRING(t);
+ iovec[n_iovec + 1] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_UNIT_RESOURCES_STR);
+
+ t = strjoina(u->manager->unit_log_field, u->id);
+ iovec[n_iovec + 2] = IOVEC_MAKE_STRING(t);
+
+ t = strjoina(u->manager->invocation_log_field, u->invocation_id_string);
+ iovec[n_iovec + 3] = IOVEC_MAKE_STRING(t);
+
+ log_struct_iovec(log_level, iovec, n_iovec + 4);
+ r = 0;
+
+finish:
+ for (size_t i = 0; i < n_message_parts; i++)
+ free(message_parts[i]);
+
+ for (size_t i = 0; i < n_iovec; i++)
+ free(iovec[i].iov_base);
+
+ return r;
+
+}
+
+static void unit_update_on_console(Unit *u) {
+ bool b;
+
+ assert(u);
+
+ b = unit_needs_console(u);
+ if (u->on_console == b)
+ return;
+
+ u->on_console = b;
+ if (b)
+ manager_ref_console(u->manager);
+ else
+ manager_unref_console(u->manager);
+}
+
+static void unit_emit_audit_start(Unit *u) {
+ assert(u);
+
+ if (u->type != UNIT_SERVICE)
+ return;
+
+ /* Write audit record if we have just finished starting up */
+ manager_send_unit_audit(u->manager, u, AUDIT_SERVICE_START, true);
+ u->in_audit = true;
+}
+
+static void unit_emit_audit_stop(Unit *u, UnitActiveState state) {
+ assert(u);
+
+ if (u->type != UNIT_SERVICE)
+ return;
+
+ if (u->in_audit) {
+ /* Write audit record if we have just finished shutting down */
+ manager_send_unit_audit(u->manager, u, AUDIT_SERVICE_STOP, state == UNIT_INACTIVE);
+ u->in_audit = false;
+ } else {
+ /* Hmm, if there was no start record written write it now, so that we always have a nice pair */
+ manager_send_unit_audit(u->manager, u, AUDIT_SERVICE_START, state == UNIT_INACTIVE);
+
+ if (state == UNIT_INACTIVE)
+ manager_send_unit_audit(u->manager, u, AUDIT_SERVICE_STOP, true);
+ }
+}
+
+static bool unit_process_job(Job *j, UnitActiveState ns, UnitNotifyFlags flags) {
+ bool unexpected = false;
+ JobResult result;
+
+ assert(j);
+
+ if (j->state == JOB_WAITING)
+
+ /* So we reached a different state for this job. Let's see if we can run it now if it failed previously
+ * due to EAGAIN. */
+ job_add_to_run_queue(j);
+
+ /* Let's check whether the unit's new state constitutes a finished job, or maybe contradicts a running job and
+ * hence needs to invalidate jobs. */
+
+ switch (j->type) {
+
+ case JOB_START:
+ case JOB_VERIFY_ACTIVE:
+
+ if (UNIT_IS_ACTIVE_OR_RELOADING(ns))
+ job_finish_and_invalidate(j, JOB_DONE, true, false);
+ else if (j->state == JOB_RUNNING && ns != UNIT_ACTIVATING) {
+ unexpected = true;
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns)) {
+ if (ns == UNIT_FAILED)
+ result = JOB_FAILED;
+ else if (FLAGS_SET(flags, UNIT_NOTIFY_SKIP_CONDITION))
+ result = JOB_SKIPPED;
+ else
+ result = JOB_DONE;
+
+ job_finish_and_invalidate(j, result, true, false);
+ }
+ }
+
+ break;
+
+ case JOB_RELOAD:
+ case JOB_RELOAD_OR_START:
+ case JOB_TRY_RELOAD:
+
+ if (j->state == JOB_RUNNING) {
+ if (ns == UNIT_ACTIVE)
+ job_finish_and_invalidate(j, (flags & UNIT_NOTIFY_RELOAD_FAILURE) ? JOB_FAILED : JOB_DONE, true, false);
+ else if (!IN_SET(ns, UNIT_ACTIVATING, UNIT_RELOADING)) {
+ unexpected = true;
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns))
+ job_finish_and_invalidate(j, ns == UNIT_FAILED ? JOB_FAILED : JOB_DONE, true, false);
+ }
+ }
+
+ break;
+
+ case JOB_STOP:
+ case JOB_RESTART:
+ case JOB_TRY_RESTART:
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns))
+ job_finish_and_invalidate(j, JOB_DONE, true, false);
+ else if (j->state == JOB_RUNNING && ns != UNIT_DEACTIVATING) {
+ unexpected = true;
+ job_finish_and_invalidate(j, JOB_FAILED, true, false);
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Job type unknown");
+ }
+
+ return unexpected;
+}
+
+void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlags flags) {
+ const char *reason;
+ Manager *m;
+
+ assert(u);
+ assert(os < _UNIT_ACTIVE_STATE_MAX);
+ assert(ns < _UNIT_ACTIVE_STATE_MAX);
+
+ /* Note that this is called for all low-level state changes, even if they might map to the same high-level
+ * UnitActiveState! That means that ns == os is an expected behavior here. For example: if a mount point is
+ * remounted this function will be called too! */
+
+ m = u->manager;
+
+ /* Let's enqueue the change signal early. In case this unit has a job associated we want that this unit is in
+ * the bus queue, so that any job change signal queued will force out the unit change signal first. */
+ unit_add_to_dbus_queue(u);
+
+ /* Update systemd-oomd on the property/state change */
+ if (os != ns) {
+ /* Always send an update if the unit is going into an inactive state so systemd-oomd knows to stop
+ * monitoring.
+ * Also send an update whenever the unit goes active; this is to handle a case where an override file
+ * sets one of the ManagedOOM*= properties to "kill", then later removes it. systemd-oomd needs to
+ * know to stop monitoring when the unit changes from "kill" -> "auto" on daemon-reload, but we don't
+ * have the information on the property. Thus, indiscriminately send an update. */
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns) || UNIT_IS_ACTIVE_OR_RELOADING(ns))
+ (void) manager_varlink_send_managed_oom_update(u);
+ }
+
+ /* Update timestamps for state changes */
+ if (!MANAGER_IS_RELOADING(m)) {
+ dual_timestamp_get(&u->state_change_timestamp);
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(os) && !UNIT_IS_INACTIVE_OR_FAILED(ns))
+ u->inactive_exit_timestamp = u->state_change_timestamp;
+ else if (!UNIT_IS_INACTIVE_OR_FAILED(os) && UNIT_IS_INACTIVE_OR_FAILED(ns))
+ u->inactive_enter_timestamp = u->state_change_timestamp;
+
+ if (!UNIT_IS_ACTIVE_OR_RELOADING(os) && UNIT_IS_ACTIVE_OR_RELOADING(ns))
+ u->active_enter_timestamp = u->state_change_timestamp;
+ else if (UNIT_IS_ACTIVE_OR_RELOADING(os) && !UNIT_IS_ACTIVE_OR_RELOADING(ns))
+ u->active_exit_timestamp = u->state_change_timestamp;
+ }
+
+ /* Keep track of failed units */
+ (void) manager_update_failed_units(m, u, ns == UNIT_FAILED);
+
+ /* Make sure the cgroup and state files are always removed when we become inactive */
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns)) {
+ unit_prune_cgroup(u);
+ unit_unlink_state_files(u);
+ }
+
+ unit_update_on_console(u);
+
+ if (!MANAGER_IS_RELOADING(m)) {
+ bool unexpected;
+
+ /* Let's propagate state changes to the job */
+ if (u->job)
+ unexpected = unit_process_job(u->job, ns, flags);
+ else
+ unexpected = true;
+
+ /* If this state change happened without being requested by a job, then let's retroactively start or
+ * stop dependencies. We skip that step when deserializing, since we don't want to create any
+ * additional jobs just because something is already activated. */
+
+ if (unexpected) {
+ if (UNIT_IS_INACTIVE_OR_FAILED(os) && UNIT_IS_ACTIVE_OR_ACTIVATING(ns))
+ retroactively_start_dependencies(u);
+ else if (UNIT_IS_ACTIVE_OR_ACTIVATING(os) && UNIT_IS_INACTIVE_OR_DEACTIVATING(ns))
+ retroactively_stop_dependencies(u);
+ }
+
+ /* stop unneeded units regardless if going down was expected or not */
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns))
+ check_unneeded_dependencies(u);
+
+ if (ns != os && ns == UNIT_FAILED) {
+ log_unit_debug(u, "Unit entered failed state.");
+
+ if (!(flags & UNIT_NOTIFY_WILL_AUTO_RESTART))
+ unit_start_on_failure(u);
+ }
+
+ if (UNIT_IS_ACTIVE_OR_RELOADING(ns) && !UNIT_IS_ACTIVE_OR_RELOADING(os)) {
+ /* This unit just finished starting up */
+
+ unit_emit_audit_start(u);
+ manager_send_unit_plymouth(m, u);
+ }
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(ns) && !UNIT_IS_INACTIVE_OR_FAILED(os)) {
+ /* This unit just stopped/failed. */
+
+ unit_emit_audit_stop(u, ns);
+ unit_log_resources(u);
+ }
+ }
+
+ manager_recheck_journal(m);
+ manager_recheck_dbus(m);
+
+ unit_trigger_notify(u);
+
+ if (!MANAGER_IS_RELOADING(m)) {
+ /* Maybe we finished startup and are now ready for being stopped because unneeded? */
+ unit_submit_to_stop_when_unneeded_queue(u);
+
+ /* Maybe we finished startup, but something we needed has vanished? Let's die then. (This happens when
+ * something BindsTo= to a Type=oneshot unit, as these units go directly from starting to inactive,
+ * without ever entering started.) */
+ unit_check_binds_to(u);
+
+ if (os != UNIT_FAILED && ns == UNIT_FAILED) {
+ reason = strjoina("unit ", u->id, " failed");
+ emergency_action(m, u->failure_action, 0, u->reboot_arg, unit_failure_action_exit_status(u), reason);
+ } else if (!UNIT_IS_INACTIVE_OR_FAILED(os) && ns == UNIT_INACTIVE) {
+ reason = strjoina("unit ", u->id, " succeeded");
+ emergency_action(m, u->success_action, 0, u->reboot_arg, unit_success_action_exit_status(u), reason);
+ }
+ }
+
+ unit_add_to_gc_queue(u);
+}
+
+int unit_watch_pid(Unit *u, pid_t pid, bool exclusive) {
+ int r;
+
+ assert(u);
+ assert(pid_is_valid(pid));
+
+ /* Watch a specific PID */
+
+ /* Caller might be sure that this PID belongs to this unit only. Let's take this
+ * opportunity to remove any stalled references to this PID as they can be created
+ * easily (when watching a process which is not our direct child). */
+ if (exclusive)
+ manager_unwatch_pid(u->manager, pid);
+
+ r = set_ensure_allocated(&u->pids, NULL);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&u->manager->watch_pids, NULL);
+ if (r < 0)
+ return r;
+
+ /* First try, let's add the unit keyed by "pid". */
+ r = hashmap_put(u->manager->watch_pids, PID_TO_PTR(pid), u);
+ if (r == -EEXIST) {
+ Unit **array;
+ bool found = false;
+ size_t n = 0;
+
+ /* OK, the "pid" key is already assigned to a different unit. Let's see if the "-pid" key (which points
+ * to an array of Units rather than just a Unit), lists us already. */
+
+ array = hashmap_get(u->manager->watch_pids, PID_TO_PTR(-pid));
+ if (array)
+ for (; array[n]; n++)
+ if (array[n] == u)
+ found = true;
+
+ if (found) /* Found it already? if so, do nothing */
+ r = 0;
+ else {
+ Unit **new_array;
+
+ /* Allocate a new array */
+ new_array = new(Unit*, n + 2);
+ if (!new_array)
+ return -ENOMEM;
+
+ memcpy_safe(new_array, array, sizeof(Unit*) * n);
+ new_array[n] = u;
+ new_array[n+1] = NULL;
+
+ /* Add or replace the old array */
+ r = hashmap_replace(u->manager->watch_pids, PID_TO_PTR(-pid), new_array);
+ if (r < 0) {
+ free(new_array);
+ return r;
+ }
+
+ free(array);
+ }
+ } else if (r < 0)
+ return r;
+
+ r = set_put(u->pids, PID_TO_PTR(pid));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+void unit_unwatch_pid(Unit *u, pid_t pid) {
+ Unit **array;
+
+ assert(u);
+ assert(pid_is_valid(pid));
+
+ /* First let's drop the unit in case it's keyed as "pid". */
+ (void) hashmap_remove_value(u->manager->watch_pids, PID_TO_PTR(pid), u);
+
+ /* Then, let's also drop the unit, in case it's in the array keyed by -pid */
+ array = hashmap_get(u->manager->watch_pids, PID_TO_PTR(-pid));
+ if (array) {
+ /* Let's iterate through the array, dropping our own entry */
+
+ size_t m = 0;
+ for (size_t n = 0; array[n]; n++)
+ if (array[n] != u)
+ array[m++] = array[n];
+ array[m] = NULL;
+
+ if (m == 0) {
+ /* The array is now empty, remove the entire entry */
+ assert_se(hashmap_remove(u->manager->watch_pids, PID_TO_PTR(-pid)) == array);
+ free(array);
+ }
+ }
+
+ (void) set_remove(u->pids, PID_TO_PTR(pid));
+}
+
+void unit_unwatch_all_pids(Unit *u) {
+ assert(u);
+
+ while (!set_isempty(u->pids))
+ unit_unwatch_pid(u, PTR_TO_PID(set_first(u->pids)));
+
+ u->pids = set_free(u->pids);
+}
+
+static void unit_tidy_watch_pids(Unit *u) {
+ pid_t except1, except2;
+ void *e;
+
+ assert(u);
+
+ /* Cleans dead PIDs from our list */
+
+ except1 = unit_main_pid(u);
+ except2 = unit_control_pid(u);
+
+ SET_FOREACH(e, u->pids) {
+ pid_t pid = PTR_TO_PID(e);
+
+ if (pid == except1 || pid == except2)
+ continue;
+
+ if (!pid_is_unwaited(pid))
+ unit_unwatch_pid(u, pid);
+ }
+}
+
+static int on_rewatch_pids_event(sd_event_source *s, void *userdata) {
+ Unit *u = userdata;
+
+ assert(s);
+ assert(u);
+
+ unit_tidy_watch_pids(u);
+ unit_watch_all_pids(u);
+
+ /* If the PID set is empty now, then let's finish this off. */
+ unit_synthesize_cgroup_empty_event(u);
+
+ return 0;
+}
+
+int unit_enqueue_rewatch_pids(Unit *u) {
+ int r;
+
+ assert(u);
+
+ if (!u->cgroup_path)
+ return -ENOENT;
+
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return r;
+ if (r > 0) /* On unified we can use proper notifications */
+ return 0;
+
+ /* Enqueues a low-priority job that will clean up dead PIDs from our list of PIDs to watch and subscribe to new
+ * PIDs that might have appeared. We do this in a delayed job because the work might be quite slow, as it
+ * involves issuing kill(pid, 0) on all processes we watch. */
+
+ if (!u->rewatch_pids_event_source) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL;
+
+ r = sd_event_add_defer(u->manager->event, &s, on_rewatch_pids_event, u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event source for tidying watched PIDs: %m");
+
+ r = sd_event_source_set_priority(s, SD_EVENT_PRIORITY_IDLE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust priority of event source for tidying watched PIDs: %m");
+
+ (void) sd_event_source_set_description(s, "tidy-watch-pids");
+
+ u->rewatch_pids_event_source = TAKE_PTR(s);
+ }
+
+ r = sd_event_source_set_enabled(u->rewatch_pids_event_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable event source for tidying watched PIDs: %m");
+
+ return 0;
+}
+
+void unit_dequeue_rewatch_pids(Unit *u) {
+ int r;
+ assert(u);
+
+ if (!u->rewatch_pids_event_source)
+ return;
+
+ r = sd_event_source_set_enabled(u->rewatch_pids_event_source, SD_EVENT_OFF);
+ if (r < 0)
+ log_warning_errno(r, "Failed to disable event source for tidying watched PIDs, ignoring: %m");
+
+ u->rewatch_pids_event_source = sd_event_source_unref(u->rewatch_pids_event_source);
+}
+
+bool unit_job_is_applicable(Unit *u, JobType j) {
+ assert(u);
+ assert(j >= 0 && j < _JOB_TYPE_MAX);
+
+ switch (j) {
+
+ case JOB_VERIFY_ACTIVE:
+ case JOB_START:
+ case JOB_NOP:
+ /* Note that we don't check unit_can_start() here. That's because .device units and suchlike are not
+ * startable by us but may appear due to external events, and it thus makes sense to permit enqueuing
+ * jobs for it. */
+ return true;
+
+ case JOB_STOP:
+ /* Similar as above. However, perpetual units can never be stopped (neither explicitly nor due to
+ * external events), hence it makes no sense to permit enqueuing such a request either. */
+ return !u->perpetual;
+
+ case JOB_RESTART:
+ case JOB_TRY_RESTART:
+ return unit_can_stop(u) && unit_can_start(u);
+
+ case JOB_RELOAD:
+ case JOB_TRY_RELOAD:
+ return unit_can_reload(u);
+
+ case JOB_RELOAD_OR_START:
+ return unit_can_reload(u) && unit_can_start(u);
+
+ default:
+ assert_not_reached("Invalid job type");
+ }
+}
+
+static void maybe_warn_about_dependency(Unit *u, const char *other, UnitDependency dependency) {
+ assert(u);
+
+ /* Only warn about some unit types */
+ if (!IN_SET(dependency, UNIT_CONFLICTS, UNIT_CONFLICTED_BY, UNIT_BEFORE, UNIT_AFTER, UNIT_ON_FAILURE, UNIT_TRIGGERS, UNIT_TRIGGERED_BY))
+ return;
+
+ if (streq_ptr(u->id, other))
+ log_unit_warning(u, "Dependency %s=%s dropped", unit_dependency_to_string(dependency), u->id);
+ else
+ log_unit_warning(u, "Dependency %s=%s dropped, merged into %s", unit_dependency_to_string(dependency), strna(other), u->id);
+}
+
+static int unit_add_dependency_hashmap(
+ Hashmap **h,
+ Unit *other,
+ UnitDependencyMask origin_mask,
+ UnitDependencyMask destination_mask) {
+
+ UnitDependencyInfo info;
+ int r;
+
+ assert(h);
+ assert(other);
+ assert(origin_mask < _UNIT_DEPENDENCY_MASK_FULL);
+ assert(destination_mask < _UNIT_DEPENDENCY_MASK_FULL);
+ assert(origin_mask > 0 || destination_mask > 0);
+
+ r = hashmap_ensure_allocated(h, NULL);
+ if (r < 0)
+ return r;
+
+ assert_cc(sizeof(void*) == sizeof(info));
+
+ info.data = hashmap_get(*h, other);
+ if (info.data) {
+ /* Entry already exists. Add in our mask. */
+
+ if (FLAGS_SET(origin_mask, info.origin_mask) &&
+ FLAGS_SET(destination_mask, info.destination_mask))
+ return 0; /* NOP */
+
+ info.origin_mask |= origin_mask;
+ info.destination_mask |= destination_mask;
+
+ r = hashmap_update(*h, other, info.data);
+ } else {
+ info = (UnitDependencyInfo) {
+ .origin_mask = origin_mask,
+ .destination_mask = destination_mask,
+ };
+
+ r = hashmap_put(*h, other, info.data);
+ }
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int unit_add_dependency(
+ Unit *u,
+ UnitDependency d,
+ Unit *other,
+ bool add_reference,
+ UnitDependencyMask mask) {
+
+ static const UnitDependency inverse_table[_UNIT_DEPENDENCY_MAX] = {
+ [UNIT_REQUIRES] = UNIT_REQUIRED_BY,
+ [UNIT_WANTS] = UNIT_WANTED_BY,
+ [UNIT_REQUISITE] = UNIT_REQUISITE_OF,
+ [UNIT_BINDS_TO] = UNIT_BOUND_BY,
+ [UNIT_PART_OF] = UNIT_CONSISTS_OF,
+ [UNIT_REQUIRED_BY] = UNIT_REQUIRES,
+ [UNIT_REQUISITE_OF] = UNIT_REQUISITE,
+ [UNIT_WANTED_BY] = UNIT_WANTS,
+ [UNIT_BOUND_BY] = UNIT_BINDS_TO,
+ [UNIT_CONSISTS_OF] = UNIT_PART_OF,
+ [UNIT_CONFLICTS] = UNIT_CONFLICTED_BY,
+ [UNIT_CONFLICTED_BY] = UNIT_CONFLICTS,
+ [UNIT_BEFORE] = UNIT_AFTER,
+ [UNIT_AFTER] = UNIT_BEFORE,
+ [UNIT_ON_FAILURE] = _UNIT_DEPENDENCY_INVALID,
+ [UNIT_REFERENCES] = UNIT_REFERENCED_BY,
+ [UNIT_REFERENCED_BY] = UNIT_REFERENCES,
+ [UNIT_TRIGGERS] = UNIT_TRIGGERED_BY,
+ [UNIT_TRIGGERED_BY] = UNIT_TRIGGERS,
+ [UNIT_PROPAGATES_RELOAD_TO] = UNIT_RELOAD_PROPAGATED_FROM,
+ [UNIT_RELOAD_PROPAGATED_FROM] = UNIT_PROPAGATES_RELOAD_TO,
+ [UNIT_JOINS_NAMESPACE_OF] = UNIT_JOINS_NAMESPACE_OF,
+ };
+ Unit *original_u = u, *original_other = other;
+ int r;
+ /* Helper to know whether sending a notification is necessary or not:
+ * if the dependency is already there, no need to notify! */
+ bool noop = true;
+
+ assert(u);
+ assert(d >= 0 && d < _UNIT_DEPENDENCY_MAX);
+ assert(other);
+
+ u = unit_follow_merge(u);
+ other = unit_follow_merge(other);
+
+ /* We won't allow dependencies on ourselves. We will not
+ * consider them an error however. */
+ if (u == other) {
+ maybe_warn_about_dependency(original_u, original_other->id, d);
+ return 0;
+ }
+
+ /* Note that ordering a device unit after a unit is permitted since it
+ * allows to start its job running timeout at a specific time. */
+ if (d == UNIT_BEFORE && other->type == UNIT_DEVICE) {
+ log_unit_warning(u, "Dependency Before=%s ignored (.device units cannot be delayed)", other->id);
+ return 0;
+ }
+
+ if (d == UNIT_ON_FAILURE && !UNIT_VTABLE(u)->can_fail) {
+ log_unit_warning(u, "Requested dependency OnFailure=%s ignored (%s units cannot fail).", other->id, unit_type_to_string(u->type));
+ return 0;
+ }
+
+ if (d == UNIT_TRIGGERS && !UNIT_VTABLE(u)->can_trigger)
+ return log_unit_error_errno(u, SYNTHETIC_ERRNO(EINVAL),
+ "Requested dependency Triggers=%s refused (%s units cannot trigger other units).", other->id, unit_type_to_string(u->type));
+ if (d == UNIT_TRIGGERED_BY && !UNIT_VTABLE(other)->can_trigger)
+ return log_unit_error_errno(u, SYNTHETIC_ERRNO(EINVAL),
+ "Requested dependency TriggeredBy=%s refused (%s units cannot trigger other units).", other->id, unit_type_to_string(other->type));
+
+ r = unit_add_dependency_hashmap(u->dependencies + d, other, mask, 0);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ noop = false;
+
+ if (inverse_table[d] != _UNIT_DEPENDENCY_INVALID && inverse_table[d] != d) {
+ r = unit_add_dependency_hashmap(other->dependencies + inverse_table[d], u, 0, mask);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ noop = false;
+ }
+
+ if (add_reference) {
+ r = unit_add_dependency_hashmap(u->dependencies + UNIT_REFERENCES, other, mask, 0);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ noop = false;
+
+ r = unit_add_dependency_hashmap(other->dependencies + UNIT_REFERENCED_BY, u, 0, mask);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ noop = false;
+ }
+
+ if (!noop)
+ unit_add_to_dbus_queue(u);
+ return 0;
+}
+
+int unit_add_two_dependencies(Unit *u, UnitDependency d, UnitDependency e, Unit *other, bool add_reference, UnitDependencyMask mask) {
+ int r;
+
+ assert(u);
+
+ r = unit_add_dependency(u, d, other, add_reference, mask);
+ if (r < 0)
+ return r;
+
+ return unit_add_dependency(u, e, other, add_reference, mask);
+}
+
+static int resolve_template(Unit *u, const char *name, char **buf, const char **ret) {
+ int r;
+
+ assert(u);
+ assert(name);
+ assert(buf);
+ assert(ret);
+
+ if (!unit_name_is_valid(name, UNIT_NAME_TEMPLATE)) {
+ *buf = NULL;
+ *ret = name;
+ return 0;
+ }
+
+ if (u->instance)
+ r = unit_name_replace_instance(name, u->instance, buf);
+ else {
+ _cleanup_free_ char *i = NULL;
+
+ r = unit_name_to_prefix(u->id, &i);
+ if (r < 0)
+ return r;
+
+ r = unit_name_replace_instance(name, i, buf);
+ }
+ if (r < 0)
+ return r;
+
+ *ret = *buf;
+ return 0;
+}
+
+int unit_add_dependency_by_name(Unit *u, UnitDependency d, const char *name, bool add_reference, UnitDependencyMask mask) {
+ _cleanup_free_ char *buf = NULL;
+ Unit *other;
+ int r;
+
+ assert(u);
+ assert(name);
+
+ r = resolve_template(u, name, &buf, &name);
+ if (r < 0)
+ return r;
+
+ r = manager_load_unit(u->manager, name, NULL, NULL, &other);
+ if (r < 0)
+ return r;
+
+ return unit_add_dependency(u, d, other, add_reference, mask);
+}
+
+int unit_add_two_dependencies_by_name(Unit *u, UnitDependency d, UnitDependency e, const char *name, bool add_reference, UnitDependencyMask mask) {
+ _cleanup_free_ char *buf = NULL;
+ Unit *other;
+ int r;
+
+ assert(u);
+ assert(name);
+
+ r = resolve_template(u, name, &buf, &name);
+ if (r < 0)
+ return r;
+
+ r = manager_load_unit(u->manager, name, NULL, NULL, &other);
+ if (r < 0)
+ return r;
+
+ return unit_add_two_dependencies(u, d, e, other, add_reference, mask);
+}
+
+int set_unit_path(const char *p) {
+ /* This is mostly for debug purposes */
+ if (setenv("SYSTEMD_UNIT_PATH", p, 1) < 0)
+ return -errno;
+
+ return 0;
+}
+
+char *unit_dbus_path(Unit *u) {
+ assert(u);
+
+ if (!u->id)
+ return NULL;
+
+ return unit_dbus_path_from_name(u->id);
+}
+
+char *unit_dbus_path_invocation_id(Unit *u) {
+ assert(u);
+
+ if (sd_id128_is_null(u->invocation_id))
+ return NULL;
+
+ return unit_dbus_path_from_name(u->invocation_id_string);
+}
+
+static int unit_set_invocation_id(Unit *u, sd_id128_t id) {
+ int r;
+
+ assert(u);
+
+ /* Set the invocation ID for this unit. If we cannot, this will not roll back, but reset the whole thing. */
+
+ if (sd_id128_equal(u->invocation_id, id))
+ return 0;
+
+ if (!sd_id128_is_null(u->invocation_id))
+ (void) hashmap_remove_value(u->manager->units_by_invocation_id, &u->invocation_id, u);
+
+ if (sd_id128_is_null(id)) {
+ r = 0;
+ goto reset;
+ }
+
+ r = hashmap_ensure_allocated(&u->manager->units_by_invocation_id, &id128_hash_ops);
+ if (r < 0)
+ goto reset;
+
+ u->invocation_id = id;
+ sd_id128_to_string(id, u->invocation_id_string);
+
+ r = hashmap_put(u->manager->units_by_invocation_id, &u->invocation_id, u);
+ if (r < 0)
+ goto reset;
+
+ return 0;
+
+reset:
+ u->invocation_id = SD_ID128_NULL;
+ u->invocation_id_string[0] = 0;
+ return r;
+}
+
+int unit_set_slice(Unit *u, Unit *slice) {
+ assert(u);
+ assert(slice);
+
+ /* Sets the unit slice if it has not been set before. Is extra
+ * careful, to only allow this for units that actually have a
+ * cgroup context. Also, we don't allow to set this for slices
+ * (since the parent slice is derived from the name). Make
+ * sure the unit we set is actually a slice. */
+
+ if (!UNIT_HAS_CGROUP_CONTEXT(u))
+ return -EOPNOTSUPP;
+
+ if (u->type == UNIT_SLICE)
+ return -EINVAL;
+
+ if (unit_active_state(u) != UNIT_INACTIVE)
+ return -EBUSY;
+
+ if (slice->type != UNIT_SLICE)
+ return -EINVAL;
+
+ if (unit_has_name(u, SPECIAL_INIT_SCOPE) &&
+ !unit_has_name(slice, SPECIAL_ROOT_SLICE))
+ return -EPERM;
+
+ if (UNIT_DEREF(u->slice) == slice)
+ return 0;
+
+ /* Disallow slice changes if @u is already bound to cgroups */
+ if (UNIT_ISSET(u->slice) && u->cgroup_realized)
+ return -EBUSY;
+
+ unit_ref_set(&u->slice, u, slice);
+ return 1;
+}
+
+int unit_set_default_slice(Unit *u) {
+ const char *slice_name;
+ Unit *slice;
+ int r;
+
+ assert(u);
+
+ if (UNIT_ISSET(u->slice))
+ return 0;
+
+ if (u->instance) {
+ _cleanup_free_ char *prefix = NULL, *escaped = NULL;
+
+ /* Implicitly place all instantiated units in their
+ * own per-template slice */
+
+ r = unit_name_to_prefix(u->id, &prefix);
+ if (r < 0)
+ return r;
+
+ /* The prefix is already escaped, but it might include
+ * "-" which has a special meaning for slice units,
+ * hence escape it here extra. */
+ escaped = unit_name_escape(prefix);
+ if (!escaped)
+ return -ENOMEM;
+
+ if (MANAGER_IS_SYSTEM(u->manager))
+ slice_name = strjoina("system-", escaped, ".slice");
+ else
+ slice_name = strjoina("app-", escaped, ".slice");
+
+ } else if (unit_is_extrinsic(u))
+ /* Keep all extrinsic units (e.g. perpetual units and swap and mount units in user mode) in
+ * the root slice. They don't really belong in one of the subslices. */
+ slice_name = SPECIAL_ROOT_SLICE;
+
+ else if (MANAGER_IS_SYSTEM(u->manager))
+ slice_name = SPECIAL_SYSTEM_SLICE;
+ else
+ slice_name = SPECIAL_APP_SLICE;
+
+ r = manager_load_unit(u->manager, slice_name, NULL, NULL, &slice);
+ if (r < 0)
+ return r;
+
+ return unit_set_slice(u, slice);
+}
+
+const char *unit_slice_name(Unit *u) {
+ assert(u);
+
+ if (!UNIT_ISSET(u->slice))
+ return NULL;
+
+ return UNIT_DEREF(u->slice)->id;
+}
+
+int unit_load_related_unit(Unit *u, const char *type, Unit **_found) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(u);
+ assert(type);
+ assert(_found);
+
+ r = unit_name_change_suffix(u->id, type, &t);
+ if (r < 0)
+ return r;
+ if (unit_has_name(u, t))
+ return -EINVAL;
+
+ r = manager_load_unit(u->manager, t, NULL, NULL, _found);
+ assert(r < 0 || *_found != u);
+ return r;
+}
+
+static int signal_name_owner_changed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *new_owner;
+ Unit *u = userdata;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = sd_bus_message_read(message, "sss", NULL, NULL, &new_owner);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ if (UNIT_VTABLE(u)->bus_name_owner_change)
+ UNIT_VTABLE(u)->bus_name_owner_change(u, empty_to_null(new_owner));
+
+ return 0;
+}
+
+static int get_name_owner_handler(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const sd_bus_error *e;
+ const char *new_owner;
+ Unit *u = userdata;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ u->get_name_owner_slot = sd_bus_slot_unref(u->get_name_owner_slot);
+
+ e = sd_bus_message_get_error(message);
+ if (e) {
+ if (!sd_bus_error_has_name(e, "org.freedesktop.DBus.Error.NameHasNoOwner"))
+ log_unit_error(u, "Unexpected error response from GetNameOwner(): %s", e->message);
+
+ new_owner = NULL;
+ } else {
+ r = sd_bus_message_read(message, "s", &new_owner);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ assert(!isempty(new_owner));
+ }
+
+ if (UNIT_VTABLE(u)->bus_name_owner_change)
+ UNIT_VTABLE(u)->bus_name_owner_change(u, new_owner);
+
+ return 0;
+}
+
+int unit_install_bus_match(Unit *u, sd_bus *bus, const char *name) {
+ const char *match;
+ int r;
+
+ assert(u);
+ assert(bus);
+ assert(name);
+
+ if (u->match_bus_slot || u->get_name_owner_slot)
+ return -EBUSY;
+
+ match = strjoina("type='signal',"
+ "sender='org.freedesktop.DBus',"
+ "path='/org/freedesktop/DBus',"
+ "interface='org.freedesktop.DBus',"
+ "member='NameOwnerChanged',"
+ "arg0='", name, "'");
+
+ r = sd_bus_add_match_async(bus, &u->match_bus_slot, match, signal_name_owner_changed, NULL, u);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call_method_async(
+ bus,
+ &u->get_name_owner_slot,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "GetNameOwner",
+ get_name_owner_handler,
+ u,
+ "s", name);
+ if (r < 0) {
+ u->match_bus_slot = sd_bus_slot_unref(u->match_bus_slot);
+ return r;
+ }
+
+ log_unit_debug(u, "Watching D-Bus name '%s'.", name);
+ return 0;
+}
+
+int unit_watch_bus_name(Unit *u, const char *name) {
+ int r;
+
+ assert(u);
+ assert(name);
+
+ /* Watch a specific name on the bus. We only support one unit
+ * watching each name for now. */
+
+ if (u->manager->api_bus) {
+ /* If the bus is already available, install the match directly.
+ * Otherwise, just put the name in the list. bus_setup_api() will take care later. */
+ r = unit_install_bus_match(u, u->manager->api_bus, name);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to subscribe to NameOwnerChanged signal for '%s': %m", name);
+ }
+
+ r = hashmap_put(u->manager->watch_bus, name, u);
+ if (r < 0) {
+ u->match_bus_slot = sd_bus_slot_unref(u->match_bus_slot);
+ u->get_name_owner_slot = sd_bus_slot_unref(u->get_name_owner_slot);
+ return log_warning_errno(r, "Failed to put bus name to hashmap: %m");
+ }
+
+ return 0;
+}
+
+void unit_unwatch_bus_name(Unit *u, const char *name) {
+ assert(u);
+ assert(name);
+
+ (void) hashmap_remove_value(u->manager->watch_bus, name, u);
+ u->match_bus_slot = sd_bus_slot_unref(u->match_bus_slot);
+ u->get_name_owner_slot = sd_bus_slot_unref(u->get_name_owner_slot);
+}
+
+bool unit_can_serialize(Unit *u) {
+ assert(u);
+
+ return UNIT_VTABLE(u)->serialize && UNIT_VTABLE(u)->deserialize_item;
+}
+
+static int serialize_cgroup_mask(FILE *f, const char *key, CGroupMask mask) {
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert(f);
+ assert(key);
+
+ if (mask == 0)
+ return 0;
+
+ r = cg_mask_to_string(mask, &s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format cgroup mask: %m");
+
+ return serialize_item(f, key, s);
+}
+
+static const char *const ip_accounting_metric_field[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
+ [CGROUP_IP_INGRESS_BYTES] = "ip-accounting-ingress-bytes",
+ [CGROUP_IP_INGRESS_PACKETS] = "ip-accounting-ingress-packets",
+ [CGROUP_IP_EGRESS_BYTES] = "ip-accounting-egress-bytes",
+ [CGROUP_IP_EGRESS_PACKETS] = "ip-accounting-egress-packets",
+};
+
+static const char *const io_accounting_metric_field_base[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = {
+ [CGROUP_IO_READ_BYTES] = "io-accounting-read-bytes-base",
+ [CGROUP_IO_WRITE_BYTES] = "io-accounting-write-bytes-base",
+ [CGROUP_IO_READ_OPERATIONS] = "io-accounting-read-operations-base",
+ [CGROUP_IO_WRITE_OPERATIONS] = "io-accounting-write-operations-base",
+};
+
+static const char *const io_accounting_metric_field_last[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = {
+ [CGROUP_IO_READ_BYTES] = "io-accounting-read-bytes-last",
+ [CGROUP_IO_WRITE_BYTES] = "io-accounting-write-bytes-last",
+ [CGROUP_IO_READ_OPERATIONS] = "io-accounting-read-operations-last",
+ [CGROUP_IO_WRITE_OPERATIONS] = "io-accounting-write-operations-last",
+};
+
+int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
+ int r;
+
+ assert(u);
+ assert(f);
+ assert(fds);
+
+ if (unit_can_serialize(u)) {
+ r = UNIT_VTABLE(u)->serialize(u, f, fds);
+ if (r < 0)
+ return r;
+ }
+
+ (void) serialize_dual_timestamp(f, "state-change-timestamp", &u->state_change_timestamp);
+
+ (void) serialize_dual_timestamp(f, "inactive-exit-timestamp", &u->inactive_exit_timestamp);
+ (void) serialize_dual_timestamp(f, "active-enter-timestamp", &u->active_enter_timestamp);
+ (void) serialize_dual_timestamp(f, "active-exit-timestamp", &u->active_exit_timestamp);
+ (void) serialize_dual_timestamp(f, "inactive-enter-timestamp", &u->inactive_enter_timestamp);
+
+ (void) serialize_dual_timestamp(f, "condition-timestamp", &u->condition_timestamp);
+ (void) serialize_dual_timestamp(f, "assert-timestamp", &u->assert_timestamp);
+
+ if (dual_timestamp_is_set(&u->condition_timestamp))
+ (void) serialize_bool(f, "condition-result", u->condition_result);
+
+ if (dual_timestamp_is_set(&u->assert_timestamp))
+ (void) serialize_bool(f, "assert-result", u->assert_result);
+
+ (void) serialize_bool(f, "transient", u->transient);
+ (void) serialize_bool(f, "in-audit", u->in_audit);
+
+ (void) serialize_bool(f, "exported-invocation-id", u->exported_invocation_id);
+ (void) serialize_bool(f, "exported-log-level-max", u->exported_log_level_max);
+ (void) serialize_bool(f, "exported-log-extra-fields", u->exported_log_extra_fields);
+ (void) serialize_bool(f, "exported-log-rate-limit-interval", u->exported_log_ratelimit_interval);
+ (void) serialize_bool(f, "exported-log-rate-limit-burst", u->exported_log_ratelimit_burst);
+
+ (void) serialize_item_format(f, "cpu-usage-base", "%" PRIu64, u->cpu_usage_base);
+ if (u->cpu_usage_last != NSEC_INFINITY)
+ (void) serialize_item_format(f, "cpu-usage-last", "%" PRIu64, u->cpu_usage_last);
+
+ if (u->managed_oom_kill_last > 0)
+ (void) serialize_item_format(f, "managed-oom-kill-last", "%" PRIu64, u->managed_oom_kill_last);
+
+ if (u->oom_kill_last > 0)
+ (void) serialize_item_format(f, "oom-kill-last", "%" PRIu64, u->oom_kill_last);
+
+ for (CGroupIOAccountingMetric im = 0; im < _CGROUP_IO_ACCOUNTING_METRIC_MAX; im++) {
+ (void) serialize_item_format(f, io_accounting_metric_field_base[im], "%" PRIu64, u->io_accounting_base[im]);
+
+ if (u->io_accounting_last[im] != UINT64_MAX)
+ (void) serialize_item_format(f, io_accounting_metric_field_last[im], "%" PRIu64, u->io_accounting_last[im]);
+ }
+
+ if (u->cgroup_path)
+ (void) serialize_item(f, "cgroup", u->cgroup_path);
+
+ (void) serialize_bool(f, "cgroup-realized", u->cgroup_realized);
+ (void) serialize_cgroup_mask(f, "cgroup-realized-mask", u->cgroup_realized_mask);
+ (void) serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask);
+ (void) serialize_cgroup_mask(f, "cgroup-invalidated-mask", u->cgroup_invalidated_mask);
+
+ if (uid_is_valid(u->ref_uid))
+ (void) serialize_item_format(f, "ref-uid", UID_FMT, u->ref_uid);
+ if (gid_is_valid(u->ref_gid))
+ (void) serialize_item_format(f, "ref-gid", GID_FMT, u->ref_gid);
+
+ if (!sd_id128_is_null(u->invocation_id))
+ (void) serialize_item_format(f, "invocation-id", SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id));
+
+ (void) serialize_item_format(f, "freezer-state", "%s", freezer_state_to_string(unit_freezer_state(u)));
+
+ bus_track_serialize(u->bus_track, f, "ref");
+
+ for (CGroupIPAccountingMetric m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
+ uint64_t v;
+
+ r = unit_get_ip_accounting(u, m, &v);
+ if (r >= 0)
+ (void) serialize_item_format(f, ip_accounting_metric_field[m], "%" PRIu64, v);
+ }
+
+ if (serialize_jobs) {
+ if (u->job) {
+ fputs("job\n", f);
+ job_serialize(u->job, f);
+ }
+
+ if (u->nop_job) {
+ fputs("job\n", f);
+ job_serialize(u->nop_job, f);
+ }
+ }
+
+ /* End marker */
+ fputc('\n', f);
+ return 0;
+}
+
+static int unit_deserialize_job(Unit *u, FILE *f) {
+ _cleanup_(job_freep) Job *j = NULL;
+ int r;
+
+ assert(u);
+ assert(f);
+
+ j = job_new_raw(u);
+ if (!j)
+ return log_oom();
+
+ r = job_deserialize(j, f);
+ if (r < 0)
+ return r;
+
+ r = job_install_deserialized(j);
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(j);
+ return 0;
+}
+
+int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
+ int r;
+
+ assert(u);
+ assert(f);
+ assert(fds);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l, *v;
+ ssize_t m;
+ size_t k;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read serialization line: %m");
+ if (r == 0) /* eof */
+ break;
+
+ l = strstrip(line);
+ if (isempty(l)) /* End marker */
+ break;
+
+ k = strcspn(l, "=");
+
+ if (l[k] == '=') {
+ l[k] = 0;
+ v = l+k+1;
+ } else
+ v = l+k;
+
+ if (streq(l, "job")) {
+ if (v[0] == '\0') {
+ /* New-style serialized job */
+ r = unit_deserialize_job(u, f);
+ if (r < 0)
+ return r;
+ } else /* Legacy for pre-44 */
+ log_unit_warning(u, "Update from too old systemd versions are unsupported, cannot deserialize job: %s", v);
+ continue;
+ } else if (streq(l, "state-change-timestamp")) {
+ (void) deserialize_dual_timestamp(v, &u->state_change_timestamp);
+ continue;
+ } else if (streq(l, "inactive-exit-timestamp")) {
+ (void) deserialize_dual_timestamp(v, &u->inactive_exit_timestamp);
+ continue;
+ } else if (streq(l, "active-enter-timestamp")) {
+ (void) deserialize_dual_timestamp(v, &u->active_enter_timestamp);
+ continue;
+ } else if (streq(l, "active-exit-timestamp")) {
+ (void) deserialize_dual_timestamp(v, &u->active_exit_timestamp);
+ continue;
+ } else if (streq(l, "inactive-enter-timestamp")) {
+ (void) deserialize_dual_timestamp(v, &u->inactive_enter_timestamp);
+ continue;
+ } else if (streq(l, "condition-timestamp")) {
+ (void) deserialize_dual_timestamp(v, &u->condition_timestamp);
+ continue;
+ } else if (streq(l, "assert-timestamp")) {
+ (void) deserialize_dual_timestamp(v, &u->assert_timestamp);
+ continue;
+ } else if (streq(l, "condition-result")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse condition result value %s, ignoring.", v);
+ else
+ u->condition_result = r;
+
+ continue;
+
+ } else if (streq(l, "assert-result")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse assert result value %s, ignoring.", v);
+ else
+ u->assert_result = r;
+
+ continue;
+
+ } else if (streq(l, "transient")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse transient bool %s, ignoring.", v);
+ else
+ u->transient = r;
+
+ continue;
+
+ } else if (streq(l, "in-audit")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse in-audit bool %s, ignoring.", v);
+ else
+ u->in_audit = r;
+
+ continue;
+
+ } else if (streq(l, "exported-invocation-id")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse exported invocation ID bool %s, ignoring.", v);
+ else
+ u->exported_invocation_id = r;
+
+ continue;
+
+ } else if (streq(l, "exported-log-level-max")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse exported log level max bool %s, ignoring.", v);
+ else
+ u->exported_log_level_max = r;
+
+ continue;
+
+ } else if (streq(l, "exported-log-extra-fields")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse exported log extra fields bool %s, ignoring.", v);
+ else
+ u->exported_log_extra_fields = r;
+
+ continue;
+
+ } else if (streq(l, "exported-log-rate-limit-interval")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse exported log rate limit interval %s, ignoring.", v);
+ else
+ u->exported_log_ratelimit_interval = r;
+
+ continue;
+
+ } else if (streq(l, "exported-log-rate-limit-burst")) {
+
+ r = parse_boolean(v);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse exported log rate limit burst %s, ignoring.", v);
+ else
+ u->exported_log_ratelimit_burst = r;
+
+ continue;
+
+ } else if (STR_IN_SET(l, "cpu-usage-base", "cpuacct-usage-base")) {
+
+ r = safe_atou64(v, &u->cpu_usage_base);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse CPU usage base %s, ignoring.", v);
+
+ continue;
+
+ } else if (streq(l, "cpu-usage-last")) {
+
+ r = safe_atou64(v, &u->cpu_usage_last);
+ if (r < 0)
+ log_unit_debug(u, "Failed to read CPU usage last %s, ignoring.", v);
+
+ continue;
+
+ } else if (streq(l, "managed-oom-kill-last")) {
+
+ r = safe_atou64(v, &u->managed_oom_kill_last);
+ if (r < 0)
+ log_unit_debug(u, "Failed to read managed OOM kill last %s, ignoring.", v);
+
+ continue;
+
+ } else if (streq(l, "oom-kill-last")) {
+
+ r = safe_atou64(v, &u->oom_kill_last);
+ if (r < 0)
+ log_unit_debug(u, "Failed to read OOM kill last %s, ignoring.", v);
+
+ continue;
+
+ } else if (streq(l, "cgroup")) {
+
+ r = unit_set_cgroup_path(u, v);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to set cgroup path %s, ignoring: %m", v);
+
+ (void) unit_watch_cgroup(u);
+ (void) unit_watch_cgroup_memory(u);
+
+ continue;
+ } else if (streq(l, "cgroup-realized")) {
+ int b;
+
+ b = parse_boolean(v);
+ if (b < 0)
+ log_unit_debug(u, "Failed to parse cgroup-realized bool %s, ignoring.", v);
+ else
+ u->cgroup_realized = b;
+
+ continue;
+
+ } else if (streq(l, "cgroup-realized-mask")) {
+
+ r = cg_mask_from_string(v, &u->cgroup_realized_mask);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse cgroup-realized-mask %s, ignoring.", v);
+ continue;
+
+ } else if (streq(l, "cgroup-enabled-mask")) {
+
+ r = cg_mask_from_string(v, &u->cgroup_enabled_mask);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse cgroup-enabled-mask %s, ignoring.", v);
+ continue;
+
+ } else if (streq(l, "cgroup-invalidated-mask")) {
+
+ r = cg_mask_from_string(v, &u->cgroup_invalidated_mask);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse cgroup-invalidated-mask %s, ignoring.", v);
+ continue;
+
+ } else if (streq(l, "ref-uid")) {
+ uid_t uid;
+
+ r = parse_uid(v, &uid);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse referenced UID %s, ignoring.", v);
+ else
+ unit_ref_uid_gid(u, uid, GID_INVALID);
+
+ continue;
+
+ } else if (streq(l, "ref-gid")) {
+ gid_t gid;
+
+ r = parse_gid(v, &gid);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse referenced GID %s, ignoring.", v);
+ else
+ unit_ref_uid_gid(u, UID_INVALID, gid);
+
+ continue;
+
+ } else if (streq(l, "ref")) {
+
+ r = strv_extend(&u->deserialized_refs, v);
+ if (r < 0)
+ return log_oom();
+
+ continue;
+ } else if (streq(l, "invocation-id")) {
+ sd_id128_t id;
+
+ r = sd_id128_from_string(v, &id);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse invocation id %s, ignoring.", v);
+ else {
+ r = unit_set_invocation_id(u, id);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to set invocation ID for unit: %m");
+ }
+
+ continue;
+ } else if (streq(l, "freezer-state")) {
+ FreezerState s;
+
+ s = freezer_state_from_string(v);
+ if (s < 0)
+ log_unit_debug(u, "Failed to deserialize freezer-state '%s', ignoring.", v);
+ else
+ u->freezer_state = s;
+
+ continue;
+ }
+
+ /* Check if this is an IP accounting metric serialization field */
+ m = string_table_lookup(ip_accounting_metric_field, ELEMENTSOF(ip_accounting_metric_field), l);
+ if (m >= 0) {
+ uint64_t c;
+
+ r = safe_atou64(v, &c);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse IP accounting value %s, ignoring.", v);
+ else
+ u->ip_accounting_extra[m] = c;
+ continue;
+ }
+
+ m = string_table_lookup(io_accounting_metric_field_base, ELEMENTSOF(io_accounting_metric_field_base), l);
+ if (m >= 0) {
+ uint64_t c;
+
+ r = safe_atou64(v, &c);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse IO accounting base value %s, ignoring.", v);
+ else
+ u->io_accounting_base[m] = c;
+ continue;
+ }
+
+ m = string_table_lookup(io_accounting_metric_field_last, ELEMENTSOF(io_accounting_metric_field_last), l);
+ if (m >= 0) {
+ uint64_t c;
+
+ r = safe_atou64(v, &c);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse IO accounting last value %s, ignoring.", v);
+ else
+ u->io_accounting_last[m] = c;
+ continue;
+ }
+
+ if (unit_can_serialize(u)) {
+ r = exec_runtime_deserialize_compat(u, l, v, fds);
+ if (r < 0) {
+ log_unit_warning(u, "Failed to deserialize runtime parameter '%s', ignoring.", l);
+ continue;
+ }
+
+ /* Returns positive if key was handled by the call */
+ if (r > 0)
+ continue;
+
+ r = UNIT_VTABLE(u)->deserialize_item(u, l, v, fds);
+ if (r < 0)
+ log_unit_warning(u, "Failed to deserialize unit parameter '%s', ignoring.", l);
+ }
+ }
+
+ /* Versions before 228 did not carry a state change timestamp. In this case, take the current time. This is
+ * useful, so that timeouts based on this timestamp don't trigger too early, and is in-line with the logic from
+ * before 228 where the base for timeouts was not persistent across reboots. */
+
+ if (!dual_timestamp_is_set(&u->state_change_timestamp))
+ dual_timestamp_get(&u->state_change_timestamp);
+
+ /* Let's make sure that everything that is deserialized also gets any potential new cgroup settings applied
+ * after we are done. For that we invalidate anything already realized, so that we can realize it again. */
+ unit_invalidate_cgroup(u, _CGROUP_MASK_ALL);
+ unit_invalidate_cgroup_bpf(u);
+
+ return 0;
+}
+
+int unit_deserialize_skip(FILE *f) {
+ int r;
+ assert(f);
+
+ /* Skip serialized data for this unit. We don't know what it is. */
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read serialization line: %m");
+ if (r == 0)
+ return 0;
+
+ l = strstrip(line);
+
+ /* End marker */
+ if (isempty(l))
+ return 1;
+ }
+}
+
+int unit_add_node_dependency(Unit *u, const char *what, UnitDependency dep, UnitDependencyMask mask) {
+ _cleanup_free_ char *e = NULL;
+ Unit *device;
+ int r;
+
+ assert(u);
+
+ /* Adds in links to the device node that this unit is based on */
+ if (isempty(what))
+ return 0;
+
+ if (!is_device_path(what))
+ return 0;
+
+ /* When device units aren't supported (such as in a container), don't create dependencies on them. */
+ if (!unit_type_supported(UNIT_DEVICE))
+ return 0;
+
+ r = unit_name_from_path(what, ".device", &e);
+ if (r < 0)
+ return r;
+
+ r = manager_load_unit(u->manager, e, NULL, NULL, &device);
+ if (r < 0)
+ return r;
+
+ if (dep == UNIT_REQUIRES && device_shall_be_bound_by(device, u))
+ dep = UNIT_BINDS_TO;
+
+ return unit_add_two_dependencies(u, UNIT_AFTER,
+ MANAGER_IS_SYSTEM(u->manager) ? dep : UNIT_WANTS,
+ device, true, mask);
+}
+
+int unit_add_blockdev_dependency(Unit *u, const char *what, UnitDependencyMask mask) {
+ _cleanup_free_ char *escaped = NULL, *target = NULL;
+ int r;
+
+ assert(u);
+
+ if (isempty(what))
+ return 0;
+
+ if (!path_startswith(what, "/dev/"))
+ return 0;
+
+ /* If we don't support devices, then also don't bother with blockdev@.target */
+ if (!unit_type_supported(UNIT_DEVICE))
+ return 0;
+
+ r = unit_name_path_escape(what, &escaped);
+ if (r < 0)
+ return r;
+
+ r = unit_name_build("blockdev", escaped, ".target", &target);
+ if (r < 0)
+ return r;
+
+ return unit_add_dependency_by_name(u, UNIT_AFTER, target, true, mask);
+}
+
+int unit_coldplug(Unit *u) {
+ int r = 0, q;
+ char **i;
+ Job *uj;
+
+ assert(u);
+
+ /* Make sure we don't enter a loop, when coldplugging recursively. */
+ if (u->coldplugged)
+ return 0;
+
+ u->coldplugged = true;
+
+ STRV_FOREACH(i, u->deserialized_refs) {
+ q = bus_unit_track_add_name(u, *i);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+ u->deserialized_refs = strv_free(u->deserialized_refs);
+
+ if (UNIT_VTABLE(u)->coldplug) {
+ q = UNIT_VTABLE(u)->coldplug(u);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ uj = u->job ?: u->nop_job;
+ if (uj) {
+ q = job_coldplug(uj);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ return r;
+}
+
+void unit_catchup(Unit *u) {
+ assert(u);
+
+ if (UNIT_VTABLE(u)->catchup)
+ UNIT_VTABLE(u)->catchup(u);
+}
+
+static bool fragment_mtime_newer(const char *path, usec_t mtime, bool path_masked) {
+ struct stat st;
+
+ if (!path)
+ return false;
+
+ /* If the source is some virtual kernel file system, then we assume we watch it anyway, and hence pretend we
+ * are never out-of-date. */
+ if (PATH_STARTSWITH_SET(path, "/proc", "/sys"))
+ return false;
+
+ if (stat(path, &st) < 0)
+ /* What, cannot access this anymore? */
+ return true;
+
+ if (path_masked)
+ /* For masked files check if they are still so */
+ return !null_or_empty(&st);
+ else
+ /* For non-empty files check the mtime */
+ return timespec_load(&st.st_mtim) > mtime;
+
+ return false;
+}
+
+bool unit_need_daemon_reload(Unit *u) {
+ _cleanup_strv_free_ char **t = NULL;
+ char **path;
+
+ assert(u);
+
+ /* For unit files, we allow masking… */
+ if (fragment_mtime_newer(u->fragment_path, u->fragment_mtime,
+ u->load_state == UNIT_MASKED))
+ return true;
+
+ /* Source paths should not be masked… */
+ if (fragment_mtime_newer(u->source_path, u->source_mtime, false))
+ return true;
+
+ if (u->load_state == UNIT_LOADED)
+ (void) unit_find_dropin_paths(u, &t);
+ if (!strv_equal(u->dropin_paths, t))
+ return true;
+
+ /* … any drop-ins that are masked are simply omitted from the list. */
+ STRV_FOREACH(path, u->dropin_paths)
+ if (fragment_mtime_newer(*path, u->dropin_mtime, false))
+ return true;
+
+ return false;
+}
+
+void unit_reset_failed(Unit *u) {
+ assert(u);
+
+ if (UNIT_VTABLE(u)->reset_failed)
+ UNIT_VTABLE(u)->reset_failed(u);
+
+ ratelimit_reset(&u->start_ratelimit);
+ u->start_limit_hit = false;
+}
+
+Unit *unit_following(Unit *u) {
+ assert(u);
+
+ if (UNIT_VTABLE(u)->following)
+ return UNIT_VTABLE(u)->following(u);
+
+ return NULL;
+}
+
+bool unit_stop_pending(Unit *u) {
+ assert(u);
+
+ /* This call does check the current state of the unit. It's
+ * hence useful to be called from state change calls of the
+ * unit itself, where the state isn't updated yet. This is
+ * different from unit_inactive_or_pending() which checks both
+ * the current state and for a queued job. */
+
+ return unit_has_job_type(u, JOB_STOP);
+}
+
+bool unit_inactive_or_pending(Unit *u) {
+ assert(u);
+
+ /* Returns true if the unit is inactive or going down */
+
+ if (UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(u)))
+ return true;
+
+ if (unit_stop_pending(u))
+ return true;
+
+ return false;
+}
+
+bool unit_active_or_pending(Unit *u) {
+ assert(u);
+
+ /* Returns true if the unit is active or going up */
+
+ if (UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(u)))
+ return true;
+
+ if (u->job &&
+ IN_SET(u->job->type, JOB_START, JOB_RELOAD_OR_START, JOB_RESTART))
+ return true;
+
+ return false;
+}
+
+bool unit_will_restart_default(Unit *u) {
+ assert(u);
+
+ return unit_has_job_type(u, JOB_START);
+}
+
+bool unit_will_restart(Unit *u) {
+ assert(u);
+
+ if (!UNIT_VTABLE(u)->will_restart)
+ return false;
+
+ return UNIT_VTABLE(u)->will_restart(u);
+}
+
+int unit_kill(Unit *u, KillWho w, int signo, sd_bus_error *error) {
+ assert(u);
+ assert(w >= 0 && w < _KILL_WHO_MAX);
+ assert(SIGNAL_VALID(signo));
+
+ if (!UNIT_VTABLE(u)->kill)
+ return -EOPNOTSUPP;
+
+ return UNIT_VTABLE(u)->kill(u, w, signo, error);
+}
+
+static Set *unit_pid_set(pid_t main_pid, pid_t control_pid) {
+ _cleanup_set_free_ Set *pid_set = NULL;
+ int r;
+
+ pid_set = set_new(NULL);
+ if (!pid_set)
+ return NULL;
+
+ /* Exclude the main/control pids from being killed via the cgroup */
+ if (main_pid > 0) {
+ r = set_put(pid_set, PID_TO_PTR(main_pid));
+ if (r < 0)
+ return NULL;
+ }
+
+ if (control_pid > 0) {
+ r = set_put(pid_set, PID_TO_PTR(control_pid));
+ if (r < 0)
+ return NULL;
+ }
+
+ return TAKE_PTR(pid_set);
+}
+
+static int kill_common_log(pid_t pid, int signo, void *userdata) {
+ _cleanup_free_ char *comm = NULL;
+ Unit *u = userdata;
+
+ assert(u);
+
+ (void) get_process_comm(pid, &comm);
+ log_unit_info(u, "Sending signal SIG%s to process " PID_FMT " (%s) on client request.",
+ signal_to_string(signo), pid, strna(comm));
+
+ return 1;
+}
+
+int unit_kill_common(
+ Unit *u,
+ KillWho who,
+ int signo,
+ pid_t main_pid,
+ pid_t control_pid,
+ sd_bus_error *error) {
+
+ int r = 0;
+ bool killed = false;
+
+ /* This is the common implementation for explicit user-requested killing of unit processes, shared by
+ * various unit types. Do not confuse with unit_kill_context(), which is what we use when we want to
+ * stop a service ourselves. */
+
+ if (IN_SET(who, KILL_MAIN, KILL_MAIN_FAIL)) {
+ if (main_pid < 0)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_PROCESS, "%s units have no main processes", unit_type_to_string(u->type));
+ if (main_pid == 0)
+ return sd_bus_error_set_const(error, BUS_ERROR_NO_SUCH_PROCESS, "No main process to kill");
+ }
+
+ if (IN_SET(who, KILL_CONTROL, KILL_CONTROL_FAIL)) {
+ if (control_pid < 0)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_PROCESS, "%s units have no control processes", unit_type_to_string(u->type));
+ if (control_pid == 0)
+ return sd_bus_error_set_const(error, BUS_ERROR_NO_SUCH_PROCESS, "No control process to kill");
+ }
+
+ if (IN_SET(who, KILL_CONTROL, KILL_CONTROL_FAIL, KILL_ALL, KILL_ALL_FAIL))
+ if (control_pid > 0) {
+ _cleanup_free_ char *comm = NULL;
+ (void) get_process_comm(control_pid, &comm);
+
+ if (kill(control_pid, signo) < 0) {
+ /* Report this failure both to the logs and to the client */
+ sd_bus_error_set_errnof(
+ error, errno,
+ "Failed to send signal SIG%s to control process " PID_FMT " (%s): %m",
+ signal_to_string(signo), control_pid, strna(comm));
+ r = log_unit_warning_errno(
+ u, errno,
+ "Failed to send signal SIG%s to control process " PID_FMT " (%s) on client request: %m",
+ signal_to_string(signo), control_pid, strna(comm));
+ } else {
+ log_unit_info(u, "Sent signal SIG%s to control process " PID_FMT " (%s) on client request.",
+ signal_to_string(signo), control_pid, strna(comm));
+ killed = true;
+ }
+ }
+
+ if (IN_SET(who, KILL_MAIN, KILL_MAIN_FAIL, KILL_ALL, KILL_ALL_FAIL))
+ if (main_pid > 0) {
+ _cleanup_free_ char *comm = NULL;
+ (void) get_process_comm(main_pid, &comm);
+
+ if (kill(main_pid, signo) < 0) {
+ if (r == 0)
+ sd_bus_error_set_errnof(
+ error, errno,
+ "Failed to send signal SIG%s to main process " PID_FMT " (%s): %m",
+ signal_to_string(signo), main_pid, strna(comm));
+
+ r = log_unit_warning_errno(
+ u, errno,
+ "Failed to send signal SIG%s to main process " PID_FMT " (%s) on client request: %m",
+ signal_to_string(signo), main_pid, strna(comm));
+ } else {
+ log_unit_info(u, "Sent signal SIG%s to main process " PID_FMT " (%s) on client request.",
+ signal_to_string(signo), main_pid, strna(comm));
+ killed = true;
+ }
+ }
+
+ if (IN_SET(who, KILL_ALL, KILL_ALL_FAIL) && u->cgroup_path) {
+ _cleanup_set_free_ Set *pid_set = NULL;
+ int q;
+
+ /* Exclude the main/control pids from being killed via the cgroup */
+ pid_set = unit_pid_set(main_pid, control_pid);
+ if (!pid_set)
+ return log_oom();
+
+ q = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, signo, 0, pid_set, kill_common_log, u);
+ if (q < 0) {
+ if (!IN_SET(q, -ESRCH, -ENOENT)) {
+ if (r == 0)
+ sd_bus_error_set_errnof(
+ error, q,
+ "Failed to send signal SIG%s to auxiliary processes: %m",
+ signal_to_string(signo));
+
+ r = log_unit_warning_errno(
+ u, q,
+ "Failed to send signal SIG%s to auxiliary processes on client request: %m",
+ signal_to_string(signo));
+ }
+ } else
+ killed = true;
+ }
+
+ /* If the "fail" versions of the operation are requested, then complain if the set of processes we killed is empty */
+ if (r == 0 && !killed && IN_SET(who, KILL_ALL_FAIL, KILL_CONTROL_FAIL, KILL_MAIN_FAIL))
+ return sd_bus_error_set_const(error, BUS_ERROR_NO_SUCH_PROCESS, "No matching processes to kill");
+
+ return r;
+}
+
+int unit_following_set(Unit *u, Set **s) {
+ assert(u);
+ assert(s);
+
+ if (UNIT_VTABLE(u)->following_set)
+ return UNIT_VTABLE(u)->following_set(u, s);
+
+ *s = NULL;
+ return 0;
+}
+
+UnitFileState unit_get_unit_file_state(Unit *u) {
+ int r;
+
+ assert(u);
+
+ if (u->unit_file_state < 0 && u->fragment_path) {
+ r = unit_file_get_state(
+ u->manager->unit_file_scope,
+ NULL,
+ u->id,
+ &u->unit_file_state);
+ if (r < 0)
+ u->unit_file_state = UNIT_FILE_BAD;
+ }
+
+ return u->unit_file_state;
+}
+
+int unit_get_unit_file_preset(Unit *u) {
+ assert(u);
+
+ if (u->unit_file_preset < 0 && u->fragment_path)
+ u->unit_file_preset = unit_file_query_preset(
+ u->manager->unit_file_scope,
+ NULL,
+ basename(u->fragment_path),
+ NULL);
+
+ return u->unit_file_preset;
+}
+
+Unit* unit_ref_set(UnitRef *ref, Unit *source, Unit *target) {
+ assert(ref);
+ assert(source);
+ assert(target);
+
+ if (ref->target)
+ unit_ref_unset(ref);
+
+ ref->source = source;
+ ref->target = target;
+ LIST_PREPEND(refs_by_target, target->refs_by_target, ref);
+ return target;
+}
+
+void unit_ref_unset(UnitRef *ref) {
+ assert(ref);
+
+ if (!ref->target)
+ return;
+
+ /* We are about to drop a reference to the unit, make sure the garbage collection has a look at it as it might
+ * be unreferenced now. */
+ unit_add_to_gc_queue(ref->target);
+
+ LIST_REMOVE(refs_by_target, ref->target->refs_by_target, ref);
+ ref->source = ref->target = NULL;
+}
+
+static int user_from_unit_name(Unit *u, char **ret) {
+
+ static const uint8_t hash_key[] = {
+ 0x58, 0x1a, 0xaf, 0xe6, 0x28, 0x58, 0x4e, 0x96,
+ 0xb4, 0x4e, 0xf5, 0x3b, 0x8c, 0x92, 0x07, 0xec
+ };
+
+ _cleanup_free_ char *n = NULL;
+ int r;
+
+ r = unit_name_to_prefix(u->id, &n);
+ if (r < 0)
+ return r;
+
+ if (valid_user_group_name(n, 0)) {
+ *ret = TAKE_PTR(n);
+ return 0;
+ }
+
+ /* If we can't use the unit name as a user name, then let's hash it and use that */
+ if (asprintf(ret, "_du%016" PRIx64, siphash24(n, strlen(n), hash_key)) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int unit_patch_contexts(Unit *u) {
+ CGroupContext *cc;
+ ExecContext *ec;
+ int r;
+
+ assert(u);
+
+ /* Patch in the manager defaults into the exec and cgroup
+ * contexts, _after_ the rest of the settings have been
+ * initialized */
+
+ ec = unit_get_exec_context(u);
+ if (ec) {
+ /* This only copies in the ones that need memory */
+ for (unsigned i = 0; i < _RLIMIT_MAX; i++)
+ if (u->manager->rlimit[i] && !ec->rlimit[i]) {
+ ec->rlimit[i] = newdup(struct rlimit, u->manager->rlimit[i], 1);
+ if (!ec->rlimit[i])
+ return -ENOMEM;
+ }
+
+ if (MANAGER_IS_USER(u->manager) &&
+ !ec->working_directory) {
+
+ r = get_home_dir(&ec->working_directory);
+ if (r < 0)
+ return r;
+
+ /* Allow user services to run, even if the
+ * home directory is missing */
+ ec->working_directory_missing_ok = true;
+ }
+
+ if (ec->private_devices)
+ ec->capability_bounding_set &= ~((UINT64_C(1) << CAP_MKNOD) | (UINT64_C(1) << CAP_SYS_RAWIO));
+
+ if (ec->protect_kernel_modules)
+ ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYS_MODULE);
+
+ if (ec->protect_kernel_logs)
+ ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYSLOG);
+
+ if (ec->protect_clock)
+ ec->capability_bounding_set &= ~((UINT64_C(1) << CAP_SYS_TIME) | (UINT64_C(1) << CAP_WAKE_ALARM));
+
+ if (ec->dynamic_user) {
+ if (!ec->user) {
+ r = user_from_unit_name(u, &ec->user);
+ if (r < 0)
+ return r;
+ }
+
+ if (!ec->group) {
+ ec->group = strdup(ec->user);
+ if (!ec->group)
+ return -ENOMEM;
+ }
+
+ /* If the dynamic user option is on, let's make sure that the unit can't leave its
+ * UID/GID around in the file system or on IPC objects. Hence enforce a strict
+ * sandbox. */
+
+ ec->private_tmp = true;
+ ec->remove_ipc = true;
+ ec->protect_system = PROTECT_SYSTEM_STRICT;
+ if (ec->protect_home == PROTECT_HOME_NO)
+ ec->protect_home = PROTECT_HOME_READ_ONLY;
+
+ /* Make sure this service can neither benefit from SUID/SGID binaries nor create
+ * them. */
+ ec->no_new_privileges = true;
+ ec->restrict_suid_sgid = true;
+ }
+ }
+
+ cc = unit_get_cgroup_context(u);
+ if (cc && ec) {
+
+ if (ec->private_devices &&
+ cc->device_policy == CGROUP_DEVICE_POLICY_AUTO)
+ cc->device_policy = CGROUP_DEVICE_POLICY_CLOSED;
+
+ if ((ec->root_image || !LIST_IS_EMPTY(ec->mount_images)) &&
+ (cc->device_policy != CGROUP_DEVICE_POLICY_AUTO || cc->device_allow)) {
+ const char *p;
+
+ /* When RootImage= or MountImages= is specified, the following devices are touched. */
+ FOREACH_STRING(p, "/dev/loop-control", "/dev/mapper/control") {
+ r = cgroup_add_device_allow(cc, p, "rw");
+ if (r < 0)
+ return r;
+ }
+ FOREACH_STRING(p, "block-loop", "block-blkext", "block-device-mapper") {
+ r = cgroup_add_device_allow(cc, p, "rwm");
+ if (r < 0)
+ return r;
+ }
+
+ /* Make sure "block-loop" can be resolved, i.e. make sure "loop" shows up in /proc/devices.
+ * Same for mapper and verity. */
+ FOREACH_STRING(p, "modprobe@loop.service", "modprobe@dm_mod.service", "modprobe@dm_verity.service") {
+ r = unit_add_two_dependencies_by_name(u, UNIT_AFTER, UNIT_WANTS, p, true, UNIT_DEPENDENCY_FILE);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (ec->protect_clock) {
+ r = cgroup_add_device_allow(cc, "char-rtc", "r");
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+ExecContext *unit_get_exec_context(Unit *u) {
+ size_t offset;
+ assert(u);
+
+ if (u->type < 0)
+ return NULL;
+
+ offset = UNIT_VTABLE(u)->exec_context_offset;
+ if (offset <= 0)
+ return NULL;
+
+ return (ExecContext*) ((uint8_t*) u + offset);
+}
+
+KillContext *unit_get_kill_context(Unit *u) {
+ size_t offset;
+ assert(u);
+
+ if (u->type < 0)
+ return NULL;
+
+ offset = UNIT_VTABLE(u)->kill_context_offset;
+ if (offset <= 0)
+ return NULL;
+
+ return (KillContext*) ((uint8_t*) u + offset);
+}
+
+CGroupContext *unit_get_cgroup_context(Unit *u) {
+ size_t offset;
+
+ if (u->type < 0)
+ return NULL;
+
+ offset = UNIT_VTABLE(u)->cgroup_context_offset;
+ if (offset <= 0)
+ return NULL;
+
+ return (CGroupContext*) ((uint8_t*) u + offset);
+}
+
+ExecRuntime *unit_get_exec_runtime(Unit *u) {
+ size_t offset;
+
+ if (u->type < 0)
+ return NULL;
+
+ offset = UNIT_VTABLE(u)->exec_runtime_offset;
+ if (offset <= 0)
+ return NULL;
+
+ return *(ExecRuntime**) ((uint8_t*) u + offset);
+}
+
+static const char* unit_drop_in_dir(Unit *u, UnitWriteFlags flags) {
+ assert(u);
+
+ if (UNIT_WRITE_FLAGS_NOOP(flags))
+ return NULL;
+
+ if (u->transient) /* Redirect drop-ins for transient units always into the transient directory. */
+ return u->manager->lookup_paths.transient;
+
+ if (flags & UNIT_PERSISTENT)
+ return u->manager->lookup_paths.persistent_control;
+
+ if (flags & UNIT_RUNTIME)
+ return u->manager->lookup_paths.runtime_control;
+
+ return NULL;
+}
+
+char* unit_escape_setting(const char *s, UnitWriteFlags flags, char **buf) {
+ char *ret = NULL;
+
+ if (!s)
+ return NULL;
+
+ /* Escapes the input string as requested. Returns the escaped string. If 'buf' is specified then the allocated
+ * return buffer pointer is also written to *buf, except if no escaping was necessary, in which case *buf is
+ * set to NULL, and the input pointer is returned as-is. This means the return value always contains a properly
+ * escaped version, but *buf when passed only contains a pointer if an allocation was necessary. If *buf is
+ * not specified, then the return value always needs to be freed. Callers can use this to optimize memory
+ * allocations. */
+
+ if (flags & UNIT_ESCAPE_SPECIFIERS) {
+ ret = specifier_escape(s);
+ if (!ret)
+ return NULL;
+
+ s = ret;
+ }
+
+ if (flags & UNIT_ESCAPE_C) {
+ char *a;
+
+ a = cescape(s);
+ free(ret);
+ if (!a)
+ return NULL;
+
+ ret = a;
+ }
+
+ if (buf) {
+ *buf = ret;
+ return ret ?: (char*) s;
+ }
+
+ return ret ?: strdup(s);
+}
+
+char* unit_concat_strv(char **l, UnitWriteFlags flags) {
+ _cleanup_free_ char *result = NULL;
+ size_t n = 0, allocated = 0;
+ char **i;
+
+ /* Takes a list of strings, escapes them, and concatenates them. This may be used to format command lines in a
+ * way suitable for ExecStart= stanzas */
+
+ STRV_FOREACH(i, l) {
+ _cleanup_free_ char *buf = NULL;
+ const char *p;
+ size_t a;
+ char *q;
+
+ p = unit_escape_setting(*i, flags, &buf);
+ if (!p)
+ return NULL;
+
+ a = (n > 0) + 1 + strlen(p) + 1; /* separating space + " + entry + " */
+ if (!GREEDY_REALLOC(result, allocated, n + a + 1))
+ return NULL;
+
+ q = result + n;
+ if (n > 0)
+ *(q++) = ' ';
+
+ *(q++) = '"';
+ q = stpcpy(q, p);
+ *(q++) = '"';
+
+ n += a;
+ }
+
+ if (!GREEDY_REALLOC(result, allocated, n + 1))
+ return NULL;
+
+ result[n] = 0;
+
+ return TAKE_PTR(result);
+}
+
+int unit_write_setting(Unit *u, UnitWriteFlags flags, const char *name, const char *data) {
+ _cleanup_free_ char *p = NULL, *q = NULL, *escaped = NULL;
+ const char *dir, *wrapped;
+ int r;
+
+ assert(u);
+ assert(name);
+ assert(data);
+
+ if (UNIT_WRITE_FLAGS_NOOP(flags))
+ return 0;
+
+ data = unit_escape_setting(data, flags, &escaped);
+ if (!data)
+ return -ENOMEM;
+
+ /* Prefix the section header. If we are writing this out as transient file, then let's suppress this if the
+ * previous section header is the same */
+
+ if (flags & UNIT_PRIVATE) {
+ if (!UNIT_VTABLE(u)->private_section)
+ return -EINVAL;
+
+ if (!u->transient_file || u->last_section_private < 0)
+ data = strjoina("[", UNIT_VTABLE(u)->private_section, "]\n", data);
+ else if (u->last_section_private == 0)
+ data = strjoina("\n[", UNIT_VTABLE(u)->private_section, "]\n", data);
+ } else {
+ if (!u->transient_file || u->last_section_private < 0)
+ data = strjoina("[Unit]\n", data);
+ else if (u->last_section_private > 0)
+ data = strjoina("\n[Unit]\n", data);
+ }
+
+ if (u->transient_file) {
+ /* When this is a transient unit file in creation, then let's not create a new drop-in but instead
+ * write to the transient unit file. */
+ fputs(data, u->transient_file);
+
+ if (!endswith(data, "\n"))
+ fputc('\n', u->transient_file);
+
+ /* Remember which section we wrote this entry to */
+ u->last_section_private = !!(flags & UNIT_PRIVATE);
+ return 0;
+ }
+
+ dir = unit_drop_in_dir(u, flags);
+ if (!dir)
+ return -EINVAL;
+
+ wrapped = strjoina("# This is a drop-in unit file extension, created via \"systemctl set-property\"\n"
+ "# or an equivalent operation. Do not edit.\n",
+ data,
+ "\n");
+
+ r = drop_in_file(dir, u->id, 50, name, &p, &q);
+ if (r < 0)
+ return r;
+
+ (void) mkdir_p_label(p, 0755);
+
+ /* Make sure the drop-in dir is registered in our path cache. This way we don't need to stupidly
+ * recreate the cache after every drop-in we write. */
+ if (u->manager->unit_path_cache) {
+ r = set_put_strdup(&u->manager->unit_path_cache, p);
+ if (r < 0)
+ return r;
+ }
+
+ r = write_string_file_atomic_label(q, wrapped);
+ if (r < 0)
+ return r;
+
+ r = strv_push(&u->dropin_paths, q);
+ if (r < 0)
+ return r;
+ q = NULL;
+
+ strv_uniq(u->dropin_paths);
+
+ u->dropin_mtime = now(CLOCK_REALTIME);
+
+ return 0;
+}
+
+int unit_write_settingf(Unit *u, UnitWriteFlags flags, const char *name, const char *format, ...) {
+ _cleanup_free_ char *p = NULL;
+ va_list ap;
+ int r;
+
+ assert(u);
+ assert(name);
+ assert(format);
+
+ if (UNIT_WRITE_FLAGS_NOOP(flags))
+ return 0;
+
+ va_start(ap, format);
+ r = vasprintf(&p, format, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return -ENOMEM;
+
+ return unit_write_setting(u, flags, name, p);
+}
+
+int unit_make_transient(Unit *u) {
+ _cleanup_free_ char *path = NULL;
+ FILE *f;
+
+ assert(u);
+
+ if (!UNIT_VTABLE(u)->can_transient)
+ return -EOPNOTSUPP;
+
+ (void) mkdir_p_label(u->manager->lookup_paths.transient, 0755);
+
+ path = path_join(u->manager->lookup_paths.transient, u->id);
+ if (!path)
+ return -ENOMEM;
+
+ /* Let's open the file we'll write the transient settings into. This file is kept open as long as we are
+ * creating the transient, and is closed in unit_load(), as soon as we start loading the file. */
+
+ RUN_WITH_UMASK(0022) {
+ f = fopen(path, "we");
+ if (!f)
+ return -errno;
+ }
+
+ safe_fclose(u->transient_file);
+ u->transient_file = f;
+
+ free_and_replace(u->fragment_path, path);
+
+ u->source_path = mfree(u->source_path);
+ u->dropin_paths = strv_free(u->dropin_paths);
+ u->fragment_mtime = u->source_mtime = u->dropin_mtime = 0;
+
+ u->load_state = UNIT_STUB;
+ u->load_error = 0;
+ u->transient = true;
+
+ unit_add_to_dbus_queue(u);
+ unit_add_to_gc_queue(u);
+
+ fputs("# This is a transient unit file, created programmatically via the systemd API. Do not edit.\n",
+ u->transient_file);
+
+ return 0;
+}
+
+static int log_kill(pid_t pid, int sig, void *userdata) {
+ _cleanup_free_ char *comm = NULL;
+
+ (void) get_process_comm(pid, &comm);
+
+ /* Don't log about processes marked with brackets, under the assumption that these are temporary processes
+ only, like for example systemd's own PAM stub process. */
+ if (comm && comm[0] == '(')
+ return 0;
+
+ log_unit_notice(userdata,
+ "Killing process " PID_FMT " (%s) with signal SIG%s.",
+ pid,
+ strna(comm),
+ signal_to_string(sig));
+
+ return 1;
+}
+
+static int operation_to_signal(const KillContext *c, KillOperation k, bool *noteworthy) {
+ assert(c);
+
+ switch (k) {
+
+ case KILL_TERMINATE:
+ case KILL_TERMINATE_AND_LOG:
+ *noteworthy = false;
+ return c->kill_signal;
+
+ case KILL_RESTART:
+ *noteworthy = false;
+ return restart_kill_signal(c);
+
+ case KILL_KILL:
+ *noteworthy = true;
+ return c->final_kill_signal;
+
+ case KILL_WATCHDOG:
+ *noteworthy = true;
+ return c->watchdog_signal;
+
+ default:
+ assert_not_reached("KillOperation unknown");
+ }
+}
+
+int unit_kill_context(
+ Unit *u,
+ KillContext *c,
+ KillOperation k,
+ pid_t main_pid,
+ pid_t control_pid,
+ bool main_pid_alien) {
+
+ bool wait_for_exit = false, send_sighup;
+ cg_kill_log_func_t log_func = NULL;
+ int sig, r;
+
+ assert(u);
+ assert(c);
+
+ /* Kill the processes belonging to this unit, in preparation for shutting the unit down. Returns > 0
+ * if we killed something worth waiting for, 0 otherwise. Do not confuse with unit_kill_common()
+ * which is used for user-requested killing of unit processes. */
+
+ if (c->kill_mode == KILL_NONE)
+ return 0;
+
+ bool noteworthy;
+ sig = operation_to_signal(c, k, &noteworthy);
+ if (noteworthy)
+ log_func = log_kill;
+
+ send_sighup =
+ c->send_sighup &&
+ IN_SET(k, KILL_TERMINATE, KILL_TERMINATE_AND_LOG) &&
+ sig != SIGHUP;
+
+ if (main_pid > 0) {
+ if (log_func)
+ log_func(main_pid, sig, u);
+
+ r = kill_and_sigcont(main_pid, sig);
+ if (r < 0 && r != -ESRCH) {
+ _cleanup_free_ char *comm = NULL;
+ (void) get_process_comm(main_pid, &comm);
+
+ log_unit_warning_errno(u, r, "Failed to kill main process " PID_FMT " (%s), ignoring: %m", main_pid, strna(comm));
+ } else {
+ if (!main_pid_alien)
+ wait_for_exit = true;
+
+ if (r != -ESRCH && send_sighup)
+ (void) kill(main_pid, SIGHUP);
+ }
+ }
+
+ if (control_pid > 0) {
+ if (log_func)
+ log_func(control_pid, sig, u);
+
+ r = kill_and_sigcont(control_pid, sig);
+ if (r < 0 && r != -ESRCH) {
+ _cleanup_free_ char *comm = NULL;
+ (void) get_process_comm(control_pid, &comm);
+
+ log_unit_warning_errno(u, r, "Failed to kill control process " PID_FMT " (%s), ignoring: %m", control_pid, strna(comm));
+ } else {
+ wait_for_exit = true;
+
+ if (r != -ESRCH && send_sighup)
+ (void) kill(control_pid, SIGHUP);
+ }
+ }
+
+ if (u->cgroup_path &&
+ (c->kill_mode == KILL_CONTROL_GROUP || (c->kill_mode == KILL_MIXED && k == KILL_KILL))) {
+ _cleanup_set_free_ Set *pid_set = NULL;
+
+ /* Exclude the main/control pids from being killed via the cgroup */
+ pid_set = unit_pid_set(main_pid, control_pid);
+ if (!pid_set)
+ return -ENOMEM;
+
+ r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
+ sig,
+ CGROUP_SIGCONT|CGROUP_IGNORE_SELF,
+ pid_set,
+ log_func, u);
+ if (r < 0) {
+ if (!IN_SET(r, -EAGAIN, -ESRCH, -ENOENT))
+ log_unit_warning_errno(u, r, "Failed to kill control group %s, ignoring: %m", u->cgroup_path);
+
+ } else if (r > 0) {
+
+ /* FIXME: For now, on the legacy hierarchy, we will not wait for the cgroup members to die if
+ * we are running in a container or if this is a delegation unit, simply because cgroup
+ * notification is unreliable in these cases. It doesn't work at all in containers, and outside
+ * of containers it can be confused easily by left-over directories in the cgroup — which
+ * however should not exist in non-delegated units. On the unified hierarchy that's different,
+ * there we get proper events. Hence rely on them. */
+
+ if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0 ||
+ (detect_container() == 0 && !unit_cgroup_delegate(u)))
+ wait_for_exit = true;
+
+ if (send_sighup) {
+ set_free(pid_set);
+
+ pid_set = unit_pid_set(main_pid, control_pid);
+ if (!pid_set)
+ return -ENOMEM;
+
+ (void) cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
+ SIGHUP,
+ CGROUP_IGNORE_SELF,
+ pid_set,
+ NULL, NULL);
+ }
+ }
+ }
+
+ return wait_for_exit;
+}
+
+int unit_require_mounts_for(Unit *u, const char *path, UnitDependencyMask mask) {
+ _cleanup_free_ char *p = NULL;
+ UnitDependencyInfo di;
+ int r;
+
+ assert(u);
+ assert(path);
+
+ /* Registers a unit for requiring a certain path and all its prefixes. We keep a hashtable of these paths in
+ * the unit (from the path to the UnitDependencyInfo structure indicating how to the dependency came to
+ * be). However, we build a prefix table for all possible prefixes so that new appearing mount units can easily
+ * determine which units to make themselves a dependency of. */
+
+ if (!path_is_absolute(path))
+ return -EINVAL;
+
+ r = hashmap_ensure_allocated(&u->requires_mounts_for, &path_hash_ops);
+ if (r < 0)
+ return r;
+
+ p = strdup(path);
+ if (!p)
+ return -ENOMEM;
+
+ path = path_simplify(p, true);
+
+ if (!path_is_normalized(path))
+ return -EPERM;
+
+ if (hashmap_contains(u->requires_mounts_for, path))
+ return 0;
+
+ di = (UnitDependencyInfo) {
+ .origin_mask = mask
+ };
+
+ r = hashmap_put(u->requires_mounts_for, path, di.data);
+ if (r < 0)
+ return r;
+ p = NULL;
+
+ char prefix[strlen(path) + 1];
+ PATH_FOREACH_PREFIX_MORE(prefix, path) {
+ Set *x;
+
+ x = hashmap_get(u->manager->units_requiring_mounts_for, prefix);
+ if (!x) {
+ _cleanup_free_ char *q = NULL;
+
+ r = hashmap_ensure_allocated(&u->manager->units_requiring_mounts_for, &path_hash_ops);
+ if (r < 0)
+ return r;
+
+ q = strdup(prefix);
+ if (!q)
+ return -ENOMEM;
+
+ x = set_new(NULL);
+ if (!x)
+ return -ENOMEM;
+
+ r = hashmap_put(u->manager->units_requiring_mounts_for, q, x);
+ if (r < 0) {
+ set_free(x);
+ return r;
+ }
+ q = NULL;
+ }
+
+ r = set_put(x, u);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int unit_setup_exec_runtime(Unit *u) {
+ ExecRuntime **rt;
+ size_t offset;
+ Unit *other;
+ void *v;
+ int r;
+
+ offset = UNIT_VTABLE(u)->exec_runtime_offset;
+ assert(offset > 0);
+
+ /* Check if there already is an ExecRuntime for this unit? */
+ rt = (ExecRuntime**) ((uint8_t*) u + offset);
+ if (*rt)
+ return 0;
+
+ /* Try to get it from somebody else */
+ HASHMAP_FOREACH_KEY(v, other, u->dependencies[UNIT_JOINS_NAMESPACE_OF]) {
+ r = exec_runtime_acquire(u->manager, NULL, other->id, false, rt);
+ if (r == 1)
+ return 1;
+ }
+
+ return exec_runtime_acquire(u->manager, unit_get_exec_context(u), u->id, true, rt);
+}
+
+int unit_setup_dynamic_creds(Unit *u) {
+ ExecContext *ec;
+ DynamicCreds *dcreds;
+ size_t offset;
+
+ assert(u);
+
+ offset = UNIT_VTABLE(u)->dynamic_creds_offset;
+ assert(offset > 0);
+ dcreds = (DynamicCreds*) ((uint8_t*) u + offset);
+
+ ec = unit_get_exec_context(u);
+ assert(ec);
+
+ if (!ec->dynamic_user)
+ return 0;
+
+ return dynamic_creds_acquire(dcreds, u->manager, ec->user, ec->group);
+}
+
+bool unit_type_supported(UnitType t) {
+ if (_unlikely_(t < 0))
+ return false;
+ if (_unlikely_(t >= _UNIT_TYPE_MAX))
+ return false;
+
+ if (!unit_vtable[t]->supported)
+ return true;
+
+ return unit_vtable[t]->supported();
+}
+
+void unit_warn_if_dir_nonempty(Unit *u, const char* where) {
+ int r;
+
+ assert(u);
+ assert(where);
+
+ r = dir_is_empty(where);
+ if (r > 0 || r == -ENOTDIR)
+ return;
+ if (r < 0) {
+ log_unit_warning_errno(u, r, "Failed to check directory %s: %m", where);
+ return;
+ }
+
+ log_struct(LOG_NOTICE,
+ "MESSAGE_ID=" SD_MESSAGE_OVERMOUNTING_STR,
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ LOG_UNIT_MESSAGE(u, "Directory %s to mount over is not empty, mounting anyway.", where),
+ "WHERE=%s", where);
+}
+
+int unit_fail_if_noncanonical(Unit *u, const char* where) {
+ _cleanup_free_ char *canonical_where = NULL;
+ int r;
+
+ assert(u);
+ assert(where);
+
+ r = chase_symlinks(where, NULL, CHASE_NONEXISTENT, &canonical_where, NULL);
+ if (r < 0) {
+ log_unit_debug_errno(u, r, "Failed to check %s for symlinks, ignoring: %m", where);
+ return 0;
+ }
+
+ /* We will happily ignore a trailing slash (or any redundant slashes) */
+ if (path_equal(where, canonical_where))
+ return 0;
+
+ /* No need to mention "." or "..", they would already have been rejected by unit_name_from_path() */
+ log_struct(LOG_ERR,
+ "MESSAGE_ID=" SD_MESSAGE_OVERMOUNTING_STR,
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ LOG_UNIT_MESSAGE(u, "Mount path %s is not canonical (contains a symlink).", where),
+ "WHERE=%s", where);
+
+ return -ELOOP;
+}
+
+bool unit_is_pristine(Unit *u) {
+ assert(u);
+
+ /* Check if the unit already exists or is already around,
+ * in a number of different ways. Note that to cater for unit
+ * types such as slice, we are generally fine with units that
+ * are marked UNIT_LOADED even though nothing was actually
+ * loaded, as those unit types don't require a file on disk. */
+
+ return !(!IN_SET(u->load_state, UNIT_NOT_FOUND, UNIT_LOADED) ||
+ u->fragment_path ||
+ u->source_path ||
+ !strv_isempty(u->dropin_paths) ||
+ u->job ||
+ u->merged_into);
+}
+
+pid_t unit_control_pid(Unit *u) {
+ assert(u);
+
+ if (UNIT_VTABLE(u)->control_pid)
+ return UNIT_VTABLE(u)->control_pid(u);
+
+ return 0;
+}
+
+pid_t unit_main_pid(Unit *u) {
+ assert(u);
+
+ if (UNIT_VTABLE(u)->main_pid)
+ return UNIT_VTABLE(u)->main_pid(u);
+
+ return 0;
+}
+
+static void unit_unref_uid_internal(
+ Unit *u,
+ uid_t *ref_uid,
+ bool destroy_now,
+ void (*_manager_unref_uid)(Manager *m, uid_t uid, bool destroy_now)) {
+
+ assert(u);
+ assert(ref_uid);
+ assert(_manager_unref_uid);
+
+ /* Generic implementation of both unit_unref_uid() and unit_unref_gid(), under the assumption that uid_t and
+ * gid_t are actually the same time, with the same validity rules.
+ *
+ * Drops a reference to UID/GID from a unit. */
+
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+ assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+ if (!uid_is_valid(*ref_uid))
+ return;
+
+ _manager_unref_uid(u->manager, *ref_uid, destroy_now);
+ *ref_uid = UID_INVALID;
+}
+
+static void unit_unref_uid(Unit *u, bool destroy_now) {
+ unit_unref_uid_internal(u, &u->ref_uid, destroy_now, manager_unref_uid);
+}
+
+static void unit_unref_gid(Unit *u, bool destroy_now) {
+ unit_unref_uid_internal(u, (uid_t*) &u->ref_gid, destroy_now, manager_unref_gid);
+}
+
+void unit_unref_uid_gid(Unit *u, bool destroy_now) {
+ assert(u);
+
+ unit_unref_uid(u, destroy_now);
+ unit_unref_gid(u, destroy_now);
+}
+
+static int unit_ref_uid_internal(
+ Unit *u,
+ uid_t *ref_uid,
+ uid_t uid,
+ bool clean_ipc,
+ int (*_manager_ref_uid)(Manager *m, uid_t uid, bool clean_ipc)) {
+
+ int r;
+
+ assert(u);
+ assert(ref_uid);
+ assert(uid_is_valid(uid));
+ assert(_manager_ref_uid);
+
+ /* Generic implementation of both unit_ref_uid() and unit_ref_guid(), under the assumption that uid_t and gid_t
+ * are actually the same type, and have the same validity rules.
+ *
+ * Adds a reference on a specific UID/GID to this unit. Each unit referencing the same UID/GID maintains a
+ * reference so that we can destroy the UID/GID's IPC resources as soon as this is requested and the counter
+ * drops to zero. */
+
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+ assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+ if (*ref_uid == uid)
+ return 0;
+
+ if (uid_is_valid(*ref_uid)) /* Already set? */
+ return -EBUSY;
+
+ r = _manager_ref_uid(u->manager, uid, clean_ipc);
+ if (r < 0)
+ return r;
+
+ *ref_uid = uid;
+ return 1;
+}
+
+static int unit_ref_uid(Unit *u, uid_t uid, bool clean_ipc) {
+ return unit_ref_uid_internal(u, &u->ref_uid, uid, clean_ipc, manager_ref_uid);
+}
+
+static int unit_ref_gid(Unit *u, gid_t gid, bool clean_ipc) {
+ return unit_ref_uid_internal(u, (uid_t*) &u->ref_gid, (uid_t) gid, clean_ipc, manager_ref_gid);
+}
+
+static int unit_ref_uid_gid_internal(Unit *u, uid_t uid, gid_t gid, bool clean_ipc) {
+ int r = 0, q = 0;
+
+ assert(u);
+
+ /* Reference both a UID and a GID in one go. Either references both, or neither. */
+
+ if (uid_is_valid(uid)) {
+ r = unit_ref_uid(u, uid, clean_ipc);
+ if (r < 0)
+ return r;
+ }
+
+ if (gid_is_valid(gid)) {
+ q = unit_ref_gid(u, gid, clean_ipc);
+ if (q < 0) {
+ if (r > 0)
+ unit_unref_uid(u, false);
+
+ return q;
+ }
+ }
+
+ return r > 0 || q > 0;
+}
+
+int unit_ref_uid_gid(Unit *u, uid_t uid, gid_t gid) {
+ ExecContext *c;
+ int r;
+
+ assert(u);
+
+ c = unit_get_exec_context(u);
+
+ r = unit_ref_uid_gid_internal(u, uid, gid, c ? c->remove_ipc : false);
+ if (r < 0)
+ return log_unit_warning_errno(u, r, "Couldn't add UID/GID reference to unit, proceeding without: %m");
+
+ return r;
+}
+
+void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid) {
+ int r;
+
+ assert(u);
+
+ /* This is invoked whenever one of the forked off processes let's us know the UID/GID its user name/group names
+ * resolved to. We keep track of which UID/GID is currently assigned in order to be able to destroy its IPC
+ * objects when no service references the UID/GID anymore. */
+
+ r = unit_ref_uid_gid(u, uid, gid);
+ if (r > 0)
+ unit_add_to_dbus_queue(u);
+}
+
+int unit_acquire_invocation_id(Unit *u) {
+ sd_id128_t id;
+ int r;
+
+ assert(u);
+
+ r = sd_id128_randomize(&id);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to generate invocation ID for unit: %m");
+
+ r = unit_set_invocation_id(u, id);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to set invocation ID for unit: %m");
+
+ unit_add_to_dbus_queue(u);
+ return 0;
+}
+
+int unit_set_exec_params(Unit *u, ExecParameters *p) {
+ int r;
+
+ assert(u);
+ assert(p);
+
+ /* Copy parameters from manager */
+ r = manager_get_effective_environment(u->manager, &p->environment);
+ if (r < 0)
+ return r;
+
+ p->confirm_spawn = manager_get_confirm_spawn(u->manager);
+ p->cgroup_supported = u->manager->cgroup_supported;
+ p->prefix = u->manager->prefix;
+ SET_FLAG(p->flags, EXEC_PASS_LOG_UNIT|EXEC_CHOWN_DIRECTORIES, MANAGER_IS_SYSTEM(u->manager));
+
+ /* Copy parameters from unit */
+ p->cgroup_path = u->cgroup_path;
+ SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, unit_cgroup_delegate(u));
+
+ p->received_credentials = u->manager->received_credentials;
+
+ return 0;
+}
+
+int unit_fork_helper_process(Unit *u, const char *name, pid_t *ret) {
+ int r;
+
+ assert(u);
+ assert(ret);
+
+ /* Forks off a helper process and makes sure it is a member of the unit's cgroup. Returns == 0 in the child,
+ * and > 0 in the parent. The pid parameter is always filled in with the child's PID. */
+
+ (void) unit_realize_cgroup(u);
+
+ r = safe_fork(name, FORK_REOPEN_LOG, ret);
+ if (r != 0)
+ return r;
+
+ (void) default_signals(SIGNALS_CRASH_HANDLER, SIGNALS_IGNORE, -1);
+ (void) ignore_signals(SIGPIPE, -1);
+
+ (void) prctl(PR_SET_PDEATHSIG, SIGTERM);
+
+ if (u->cgroup_path) {
+ r = cg_attach_everywhere(u->manager->cgroup_supported, u->cgroup_path, 0, NULL, NULL);
+ if (r < 0) {
+ log_unit_error_errno(u, r, "Failed to join unit cgroup %s: %m", u->cgroup_path);
+ _exit(EXIT_CGROUP);
+ }
+ }
+
+ return 0;
+}
+
+int unit_fork_and_watch_rm_rf(Unit *u, char **paths, pid_t *ret_pid) {
+ pid_t pid;
+ int r;
+
+ assert(u);
+ assert(ret_pid);
+
+ r = unit_fork_helper_process(u, "(sd-rmrf)", &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ int ret = EXIT_SUCCESS;
+ char **i;
+
+ STRV_FOREACH(i, paths) {
+ r = rm_rf(*i, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_MISSING_OK);
+ if (r < 0) {
+ log_error_errno(r, "Failed to remove '%s': %m", *i);
+ ret = EXIT_FAILURE;
+ }
+ }
+
+ _exit(ret);
+ }
+
+ r = unit_watch_pid(u, pid, true);
+ if (r < 0)
+ return r;
+
+ *ret_pid = pid;
+ return 0;
+}
+
+static void unit_update_dependency_mask(Unit *u, UnitDependency d, Unit *other, UnitDependencyInfo di) {
+ assert(u);
+ assert(d >= 0);
+ assert(d < _UNIT_DEPENDENCY_MAX);
+ assert(other);
+
+ if (di.origin_mask == 0 && di.destination_mask == 0) {
+ /* No bit set anymore, let's drop the whole entry */
+ assert_se(hashmap_remove(u->dependencies[d], other));
+ log_unit_debug(u, "lost dependency %s=%s", unit_dependency_to_string(d), other->id);
+ } else
+ /* Mask was reduced, let's update the entry */
+ assert_se(hashmap_update(u->dependencies[d], other, di.data) == 0);
+}
+
+void unit_remove_dependencies(Unit *u, UnitDependencyMask mask) {
+ assert(u);
+
+ /* Removes all dependencies u has on other units marked for ownership by 'mask'. */
+
+ if (mask == 0)
+ return;
+
+ for (UnitDependency d = 0; d < _UNIT_DEPENDENCY_MAX; d++) {
+ bool done;
+
+ do {
+ UnitDependencyInfo di;
+ Unit *other;
+
+ done = true;
+
+ HASHMAP_FOREACH_KEY(di.data, other, u->dependencies[d]) {
+ if (FLAGS_SET(~mask, di.origin_mask))
+ continue;
+ di.origin_mask &= ~mask;
+ unit_update_dependency_mask(u, d, other, di);
+
+ /* We updated the dependency from our unit to the other unit now. But most dependencies
+ * imply a reverse dependency. Hence, let's delete that one too. For that we go through
+ * all dependency types on the other unit and delete all those which point to us and
+ * have the right mask set. */
+
+ for (UnitDependency q = 0; q < _UNIT_DEPENDENCY_MAX; q++) {
+ UnitDependencyInfo dj;
+
+ dj.data = hashmap_get(other->dependencies[q], u);
+ if (FLAGS_SET(~mask, dj.destination_mask))
+ continue;
+ dj.destination_mask &= ~mask;
+
+ unit_update_dependency_mask(other, q, u, dj);
+ }
+
+ unit_add_to_gc_queue(other);
+
+ done = false;
+ break;
+ }
+
+ } while (!done);
+ }
+}
+
+static int unit_get_invocation_path(Unit *u, char **ret) {
+ char *p;
+ int r;
+
+ assert(u);
+ assert(ret);
+
+ if (MANAGER_IS_SYSTEM(u->manager))
+ p = strjoin("/run/systemd/units/invocation:", u->id);
+ else {
+ _cleanup_free_ char *user_path = NULL;
+ r = xdg_user_runtime_dir(&user_path, "/systemd/units/invocation:");
+ if (r < 0)
+ return r;
+ p = strjoin(user_path, u->id);
+ }
+
+ if (!p)
+ return -ENOMEM;
+
+ *ret = p;
+ return 0;
+}
+
+static int unit_export_invocation_id(Unit *u) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(u);
+
+ if (u->exported_invocation_id)
+ return 0;
+
+ if (sd_id128_is_null(u->invocation_id))
+ return 0;
+
+ r = unit_get_invocation_path(u, &p);
+ if (r < 0)
+ return log_unit_debug_errno(u, r, "Failed to get invocation path: %m");
+
+ r = symlink_atomic_label(u->invocation_id_string, p);
+ if (r < 0)
+ return log_unit_debug_errno(u, r, "Failed to create invocation ID symlink %s: %m", p);
+
+ u->exported_invocation_id = true;
+ return 0;
+}
+
+static int unit_export_log_level_max(Unit *u, const ExecContext *c) {
+ const char *p;
+ char buf[2];
+ int r;
+
+ assert(u);
+ assert(c);
+
+ if (u->exported_log_level_max)
+ return 0;
+
+ if (c->log_level_max < 0)
+ return 0;
+
+ assert(c->log_level_max <= 7);
+
+ buf[0] = '0' + c->log_level_max;
+ buf[1] = 0;
+
+ p = strjoina("/run/systemd/units/log-level-max:", u->id);
+ r = symlink_atomic(buf, p);
+ if (r < 0)
+ return log_unit_debug_errno(u, r, "Failed to create maximum log level symlink %s: %m", p);
+
+ u->exported_log_level_max = true;
+ return 0;
+}
+
+static int unit_export_log_extra_fields(Unit *u, const ExecContext *c) {
+ _cleanup_close_ int fd = -1;
+ struct iovec *iovec;
+ const char *p;
+ char *pattern;
+ le64_t *sizes;
+ ssize_t n;
+ int r;
+
+ if (u->exported_log_extra_fields)
+ return 0;
+
+ if (c->n_log_extra_fields <= 0)
+ return 0;
+
+ sizes = newa(le64_t, c->n_log_extra_fields);
+ iovec = newa(struct iovec, c->n_log_extra_fields * 2);
+
+ for (size_t i = 0; i < c->n_log_extra_fields; i++) {
+ sizes[i] = htole64(c->log_extra_fields[i].iov_len);
+
+ iovec[i*2] = IOVEC_MAKE(sizes + i, sizeof(le64_t));
+ iovec[i*2+1] = c->log_extra_fields[i];
+ }
+
+ p = strjoina("/run/systemd/units/log-extra-fields:", u->id);
+ pattern = strjoina(p, ".XXXXXX");
+
+ fd = mkostemp_safe(pattern);
+ if (fd < 0)
+ return log_unit_debug_errno(u, fd, "Failed to create extra fields file %s: %m", p);
+
+ n = writev(fd, iovec, c->n_log_extra_fields*2);
+ if (n < 0) {
+ r = log_unit_debug_errno(u, errno, "Failed to write extra fields: %m");
+ goto fail;
+ }
+
+ (void) fchmod(fd, 0644);
+
+ if (rename(pattern, p) < 0) {
+ r = log_unit_debug_errno(u, errno, "Failed to rename extra fields file: %m");
+ goto fail;
+ }
+
+ u->exported_log_extra_fields = true;
+ return 0;
+
+fail:
+ (void) unlink(pattern);
+ return r;
+}
+
+static int unit_export_log_ratelimit_interval(Unit *u, const ExecContext *c) {
+ _cleanup_free_ char *buf = NULL;
+ const char *p;
+ int r;
+
+ assert(u);
+ assert(c);
+
+ if (u->exported_log_ratelimit_interval)
+ return 0;
+
+ if (c->log_ratelimit_interval_usec == 0)
+ return 0;
+
+ p = strjoina("/run/systemd/units/log-rate-limit-interval:", u->id);
+
+ if (asprintf(&buf, "%" PRIu64, c->log_ratelimit_interval_usec) < 0)
+ return log_oom();
+
+ r = symlink_atomic(buf, p);
+ if (r < 0)
+ return log_unit_debug_errno(u, r, "Failed to create log rate limit interval symlink %s: %m", p);
+
+ u->exported_log_ratelimit_interval = true;
+ return 0;
+}
+
+static int unit_export_log_ratelimit_burst(Unit *u, const ExecContext *c) {
+ _cleanup_free_ char *buf = NULL;
+ const char *p;
+ int r;
+
+ assert(u);
+ assert(c);
+
+ if (u->exported_log_ratelimit_burst)
+ return 0;
+
+ if (c->log_ratelimit_burst == 0)
+ return 0;
+
+ p = strjoina("/run/systemd/units/log-rate-limit-burst:", u->id);
+
+ if (asprintf(&buf, "%u", c->log_ratelimit_burst) < 0)
+ return log_oom();
+
+ r = symlink_atomic(buf, p);
+ if (r < 0)
+ return log_unit_debug_errno(u, r, "Failed to create log rate limit burst symlink %s: %m", p);
+
+ u->exported_log_ratelimit_burst = true;
+ return 0;
+}
+
+void unit_export_state_files(Unit *u) {
+ const ExecContext *c;
+
+ assert(u);
+
+ if (!u->id)
+ return;
+
+ if (MANAGER_IS_TEST_RUN(u->manager))
+ return;
+
+ /* Exports a couple of unit properties to /run/systemd/units/, so that journald can quickly query this data
+ * from there. Ideally, journald would use IPC to query this, like everybody else, but that's hard, as long as
+ * the IPC system itself and PID 1 also log to the journal.
+ *
+ * Note that these files really shouldn't be considered API for anyone else, as use a runtime file system as
+ * IPC replacement is not compatible with today's world of file system namespaces. However, this doesn't really
+ * apply to communication between the journal and systemd, as we assume that these two daemons live in the same
+ * namespace at least.
+ *
+ * Note that some of the "files" exported here are actually symlinks and not regular files. Symlinks work
+ * better for storing small bits of data, in particular as we can write them with two system calls, and read
+ * them with one. */
+
+ (void) unit_export_invocation_id(u);
+
+ if (!MANAGER_IS_SYSTEM(u->manager))
+ return;
+
+ c = unit_get_exec_context(u);
+ if (c) {
+ (void) unit_export_log_level_max(u, c);
+ (void) unit_export_log_extra_fields(u, c);
+ (void) unit_export_log_ratelimit_interval(u, c);
+ (void) unit_export_log_ratelimit_burst(u, c);
+ }
+}
+
+void unit_unlink_state_files(Unit *u) {
+ const char *p;
+
+ assert(u);
+
+ if (!u->id)
+ return;
+
+ /* Undoes the effect of unit_export_state() */
+
+ if (u->exported_invocation_id) {
+ _cleanup_free_ char *invocation_path = NULL;
+ int r = unit_get_invocation_path(u, &invocation_path);
+ if (r >= 0) {
+ (void) unlink(invocation_path);
+ u->exported_invocation_id = false;
+ }
+ }
+
+ if (!MANAGER_IS_SYSTEM(u->manager))
+ return;
+
+ if (u->exported_log_level_max) {
+ p = strjoina("/run/systemd/units/log-level-max:", u->id);
+ (void) unlink(p);
+
+ u->exported_log_level_max = false;
+ }
+
+ if (u->exported_log_extra_fields) {
+ p = strjoina("/run/systemd/units/extra-fields:", u->id);
+ (void) unlink(p);
+
+ u->exported_log_extra_fields = false;
+ }
+
+ if (u->exported_log_ratelimit_interval) {
+ p = strjoina("/run/systemd/units/log-rate-limit-interval:", u->id);
+ (void) unlink(p);
+
+ u->exported_log_ratelimit_interval = false;
+ }
+
+ if (u->exported_log_ratelimit_burst) {
+ p = strjoina("/run/systemd/units/log-rate-limit-burst:", u->id);
+ (void) unlink(p);
+
+ u->exported_log_ratelimit_burst = false;
+ }
+}
+
+int unit_prepare_exec(Unit *u) {
+ int r;
+
+ assert(u);
+
+ /* Load any custom firewall BPF programs here once to test if they are existing and actually loadable.
+ * Fail here early since later errors in the call chain unit_realize_cgroup to cgroup_context_apply are ignored. */
+ r = bpf_firewall_load_custom(u);
+ if (r < 0)
+ return r;
+
+ /* Prepares everything so that we can fork of a process for this unit */
+
+ (void) unit_realize_cgroup(u);
+
+ if (u->reset_accounting) {
+ (void) unit_reset_accounting(u);
+ u->reset_accounting = false;
+ }
+
+ unit_export_state_files(u);
+
+ r = unit_setup_exec_runtime(u);
+ if (r < 0)
+ return r;
+
+ r = unit_setup_dynamic_creds(u);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static bool ignore_leftover_process(const char *comm) {
+ return comm && comm[0] == '('; /* Most likely our own helper process (PAM?), ignore */
+}
+
+int unit_log_leftover_process_start(pid_t pid, int sig, void *userdata) {
+ _cleanup_free_ char *comm = NULL;
+
+ (void) get_process_comm(pid, &comm);
+
+ if (ignore_leftover_process(comm))
+ return 0;
+
+ /* During start we print a warning */
+
+ log_unit_warning(userdata,
+ "Found left-over process " PID_FMT " (%s) in control group while starting unit. Ignoring.\n"
+ "This usually indicates unclean termination of a previous run, or service implementation deficiencies.",
+ pid, strna(comm));
+
+ return 1;
+}
+
+int unit_log_leftover_process_stop(pid_t pid, int sig, void *userdata) {
+ _cleanup_free_ char *comm = NULL;
+
+ (void) get_process_comm(pid, &comm);
+
+ if (ignore_leftover_process(comm))
+ return 0;
+
+ /* During stop we only print an informational message */
+
+ log_unit_info(userdata,
+ "Unit process " PID_FMT " (%s) remains running after unit stopped.",
+ pid, strna(comm));
+
+ return 1;
+}
+
+int unit_warn_leftover_processes(Unit *u, cg_kill_log_func_t log_func) {
+ assert(u);
+
+ (void) unit_pick_cgroup_path(u);
+
+ if (!u->cgroup_path)
+ return 0;
+
+ return cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, 0, 0, NULL, log_func, u);
+}
+
+bool unit_needs_console(Unit *u) {
+ ExecContext *ec;
+ UnitActiveState state;
+
+ assert(u);
+
+ state = unit_active_state(u);
+
+ if (UNIT_IS_INACTIVE_OR_FAILED(state))
+ return false;
+
+ if (UNIT_VTABLE(u)->needs_console)
+ return UNIT_VTABLE(u)->needs_console(u);
+
+ /* If this unit type doesn't implement this call, let's use a generic fallback implementation: */
+ ec = unit_get_exec_context(u);
+ if (!ec)
+ return false;
+
+ return exec_context_may_touch_console(ec);
+}
+
+const char *unit_label_path(const Unit *u) {
+ const char *p;
+
+ assert(u);
+
+ /* Returns the file system path to use for MAC access decisions, i.e. the file to read the SELinux label off
+ * when validating access checks. */
+
+ p = u->source_path ?: u->fragment_path;
+ if (!p)
+ return NULL;
+
+ /* If a unit is masked, then don't read the SELinux label of /dev/null, as that really makes no sense */
+ if (null_or_empty_path(p) > 0)
+ return NULL;
+
+ return p;
+}
+
+int unit_pid_attachable(Unit *u, pid_t pid, sd_bus_error *error) {
+ int r;
+
+ assert(u);
+
+ /* Checks whether the specified PID is generally good for attaching, i.e. a valid PID, not our manager itself,
+ * and not a kernel thread either */
+
+ /* First, a simple range check */
+ if (!pid_is_valid(pid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Process identifier " PID_FMT " is not valid.", pid);
+
+ /* Some extra safety check */
+ if (pid == 1 || pid == getpid_cached())
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Process " PID_FMT " is a manager process, refusing.", pid);
+
+ /* Don't even begin to bother with kernel threads */
+ r = is_kernel_thread(pid);
+ if (r == -ESRCH)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_UNIX_PROCESS_ID_UNKNOWN, "Process with ID " PID_FMT " does not exist.", pid);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to determine whether process " PID_FMT " is a kernel thread: %m", pid);
+ if (r > 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Process " PID_FMT " is a kernel thread, refusing.", pid);
+
+ return 0;
+}
+
+void unit_log_success(Unit *u) {
+ assert(u);
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_SUCCESS_STR,
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ LOG_UNIT_MESSAGE(u, "Succeeded."));
+}
+
+void unit_log_failure(Unit *u, const char *result) {
+ assert(u);
+ assert(result);
+
+ log_struct(LOG_WARNING,
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_FAILURE_RESULT_STR,
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ LOG_UNIT_MESSAGE(u, "Failed with result '%s'.", result),
+ "UNIT_RESULT=%s", result);
+}
+
+void unit_log_skip(Unit *u, const char *result) {
+ assert(u);
+ assert(result);
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_SKIPPED_STR,
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u),
+ LOG_UNIT_MESSAGE(u, "Skipped due to '%s'.", result),
+ "UNIT_RESULT=%s", result);
+}
+
+void unit_log_process_exit(
+ Unit *u,
+ const char *kind,
+ const char *command,
+ bool success,
+ int code,
+ int status) {
+
+ int level;
+
+ assert(u);
+ assert(kind);
+
+ /* If this is a successful exit, let's log about the exit code on DEBUG level. If this is a failure
+ * and the process exited on its own via exit(), then let's make this a NOTICE, under the assumption
+ * that the service already logged the reason at a higher log level on its own. Otherwise, make it a
+ * WARNING. */
+ if (success)
+ level = LOG_DEBUG;
+ else if (code == CLD_EXITED)
+ level = LOG_NOTICE;
+ else
+ level = LOG_WARNING;
+
+ log_struct(level,
+ "MESSAGE_ID=" SD_MESSAGE_UNIT_PROCESS_EXIT_STR,
+ LOG_UNIT_MESSAGE(u, "%s exited, code=%s, status=%i/%s",
+ kind,
+ sigchld_code_to_string(code), status,
+ strna(code == CLD_EXITED
+ ? exit_status_to_string(status, EXIT_STATUS_FULL)
+ : signal_to_string(status))),
+ "EXIT_CODE=%s", sigchld_code_to_string(code),
+ "EXIT_STATUS=%i", status,
+ "COMMAND=%s", strna(command),
+ LOG_UNIT_ID(u),
+ LOG_UNIT_INVOCATION_ID(u));
+}
+
+int unit_exit_status(Unit *u) {
+ assert(u);
+
+ /* Returns the exit status to propagate for the most recent cycle of this unit. Returns a value in the range
+ * 0…255 if there's something to propagate. EOPNOTSUPP if the concept does not apply to this unit type, ENODATA
+ * if no data is currently known (for example because the unit hasn't deactivated yet) and EBADE if the main
+ * service process has exited abnormally (signal/coredump). */
+
+ if (!UNIT_VTABLE(u)->exit_status)
+ return -EOPNOTSUPP;
+
+ return UNIT_VTABLE(u)->exit_status(u);
+}
+
+int unit_failure_action_exit_status(Unit *u) {
+ int r;
+
+ assert(u);
+
+ /* Returns the exit status to propagate on failure, or an error if there's nothing to propagate */
+
+ if (u->failure_action_exit_status >= 0)
+ return u->failure_action_exit_status;
+
+ r = unit_exit_status(u);
+ if (r == -EBADE) /* Exited, but not cleanly (i.e. by signal or such) */
+ return 255;
+
+ return r;
+}
+
+int unit_success_action_exit_status(Unit *u) {
+ int r;
+
+ assert(u);
+
+ /* Returns the exit status to propagate on success, or an error if there's nothing to propagate */
+
+ if (u->success_action_exit_status >= 0)
+ return u->success_action_exit_status;
+
+ r = unit_exit_status(u);
+ if (r == -EBADE) /* Exited, but not cleanly (i.e. by signal or such) */
+ return 255;
+
+ return r;
+}
+
+int unit_test_trigger_loaded(Unit *u) {
+ Unit *trigger;
+
+ /* Tests whether the unit to trigger is loaded */
+
+ trigger = UNIT_TRIGGER(u);
+ if (!trigger)
+ return log_unit_error_errno(u, SYNTHETIC_ERRNO(ENOENT),
+ "Refusing to start, no unit to trigger.");
+ if (trigger->load_state != UNIT_LOADED)
+ return log_unit_error_errno(u, SYNTHETIC_ERRNO(ENOENT),
+ "Refusing to start, unit %s to trigger not loaded.", trigger->id);
+
+ return 0;
+}
+
+void unit_destroy_runtime_data(Unit *u, const ExecContext *context) {
+ assert(u);
+ assert(context);
+
+ if (context->runtime_directory_preserve_mode == EXEC_PRESERVE_NO ||
+ (context->runtime_directory_preserve_mode == EXEC_PRESERVE_RESTART && !unit_will_restart(u)))
+ exec_context_destroy_runtime_directory(context, u->manager->prefix[EXEC_DIRECTORY_RUNTIME]);
+
+ exec_context_destroy_credentials(context, u->manager->prefix[EXEC_DIRECTORY_RUNTIME], u->id);
+}
+
+int unit_clean(Unit *u, ExecCleanMask mask) {
+ UnitActiveState state;
+
+ assert(u);
+
+ /* Special return values:
+ *
+ * -EOPNOTSUPP → cleaning not supported for this unit type
+ * -EUNATCH → cleaning not defined for this resource type
+ * -EBUSY → unit currently can't be cleaned since it's running or not properly loaded, or has
+ * a job queued or similar
+ */
+
+ if (!UNIT_VTABLE(u)->clean)
+ return -EOPNOTSUPP;
+
+ if (mask == 0)
+ return -EUNATCH;
+
+ if (u->load_state != UNIT_LOADED)
+ return -EBUSY;
+
+ if (u->job)
+ return -EBUSY;
+
+ state = unit_active_state(u);
+ if (!IN_SET(state, UNIT_INACTIVE))
+ return -EBUSY;
+
+ return UNIT_VTABLE(u)->clean(u, mask);
+}
+
+int unit_can_clean(Unit *u, ExecCleanMask *ret) {
+ assert(u);
+
+ if (!UNIT_VTABLE(u)->clean ||
+ u->load_state != UNIT_LOADED) {
+ *ret = 0;
+ return 0;
+ }
+
+ /* When the clean() method is set, can_clean() really should be set too */
+ assert(UNIT_VTABLE(u)->can_clean);
+
+ return UNIT_VTABLE(u)->can_clean(u, ret);
+}
+
+bool unit_can_freeze(Unit *u) {
+ assert(u);
+
+ if (UNIT_VTABLE(u)->can_freeze)
+ return UNIT_VTABLE(u)->can_freeze(u);
+
+ return UNIT_VTABLE(u)->freeze;
+}
+
+void unit_frozen(Unit *u) {
+ assert(u);
+
+ u->freezer_state = FREEZER_FROZEN;
+
+ bus_unit_send_pending_freezer_message(u);
+}
+
+void unit_thawed(Unit *u) {
+ assert(u);
+
+ u->freezer_state = FREEZER_RUNNING;
+
+ bus_unit_send_pending_freezer_message(u);
+}
+
+static int unit_freezer_action(Unit *u, FreezerAction action) {
+ UnitActiveState s;
+ int (*method)(Unit*);
+ int r;
+
+ assert(u);
+ assert(IN_SET(action, FREEZER_FREEZE, FREEZER_THAW));
+
+ method = action == FREEZER_FREEZE ? UNIT_VTABLE(u)->freeze : UNIT_VTABLE(u)->thaw;
+ if (!method || !cg_freezer_supported())
+ return -EOPNOTSUPP;
+
+ if (u->job)
+ return -EBUSY;
+
+ if (u->load_state != UNIT_LOADED)
+ return -EHOSTDOWN;
+
+ s = unit_active_state(u);
+ if (s != UNIT_ACTIVE)
+ return -EHOSTDOWN;
+
+ if (IN_SET(u->freezer_state, FREEZER_FREEZING, FREEZER_THAWING))
+ return -EALREADY;
+
+ r = method(u);
+ if (r <= 0)
+ return r;
+
+ return 1;
+}
+
+int unit_freeze(Unit *u) {
+ return unit_freezer_action(u, FREEZER_FREEZE);
+}
+
+int unit_thaw(Unit *u) {
+ return unit_freezer_action(u, FREEZER_THAW);
+}
+
+/* Wrappers around low-level cgroup freezer operations common for service and scope units */
+int unit_freeze_vtable_common(Unit *u) {
+ return unit_cgroup_freezer_action(u, FREEZER_FREEZE);
+}
+
+int unit_thaw_vtable_common(Unit *u) {
+ return unit_cgroup_freezer_action(u, FREEZER_THAW);
+}
+
+static const char* const collect_mode_table[_COLLECT_MODE_MAX] = {
+ [COLLECT_INACTIVE] = "inactive",
+ [COLLECT_INACTIVE_OR_FAILED] = "inactive-or-failed",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(collect_mode, CollectMode);
diff --git a/src/core/unit.h b/src/core/unit.h
new file mode 100644
index 0000000..02b2b24
--- /dev/null
+++ b/src/core/unit.h
@@ -0,0 +1,944 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "bpf-program.h"
+#include "condition.h"
+#include "emergency-action.h"
+#include "list.h"
+#include "show-status.h"
+#include "set.h"
+#include "unit-file.h"
+#include "cgroup.h"
+
+typedef struct UnitRef UnitRef;
+
+typedef enum KillOperation {
+ KILL_TERMINATE,
+ KILL_TERMINATE_AND_LOG,
+ KILL_RESTART,
+ KILL_KILL,
+ KILL_WATCHDOG,
+ _KILL_OPERATION_MAX,
+ _KILL_OPERATION_INVALID = -1
+} KillOperation;
+
+typedef enum CollectMode {
+ COLLECT_INACTIVE,
+ COLLECT_INACTIVE_OR_FAILED,
+ _COLLECT_MODE_MAX,
+ _COLLECT_MODE_INVALID = -1,
+} CollectMode;
+
+static inline bool UNIT_IS_ACTIVE_OR_RELOADING(UnitActiveState t) {
+ return IN_SET(t, UNIT_ACTIVE, UNIT_RELOADING);
+}
+
+static inline bool UNIT_IS_ACTIVE_OR_ACTIVATING(UnitActiveState t) {
+ return IN_SET(t, UNIT_ACTIVE, UNIT_ACTIVATING, UNIT_RELOADING);
+}
+
+static inline bool UNIT_IS_INACTIVE_OR_DEACTIVATING(UnitActiveState t) {
+ return IN_SET(t, UNIT_INACTIVE, UNIT_FAILED, UNIT_DEACTIVATING);
+}
+
+static inline bool UNIT_IS_INACTIVE_OR_FAILED(UnitActiveState t) {
+ return IN_SET(t, UNIT_INACTIVE, UNIT_FAILED);
+}
+
+static inline bool UNIT_IS_LOAD_COMPLETE(UnitLoadState t) {
+ return t >= 0 && t < _UNIT_LOAD_STATE_MAX && t != UNIT_STUB && t != UNIT_MERGED;
+}
+
+/* Stores the 'reason' a dependency was created as a bit mask, i.e. due to which configuration source it came to be. We
+ * use this so that we can selectively flush out parts of dependencies again. Note that the same dependency might be
+ * created as a result of multiple "reasons", hence the bitmask. */
+typedef enum UnitDependencyMask {
+ /* Configured directly by the unit file, .wants/.requires symlink or drop-in, or as an immediate result of a
+ * non-dependency option configured that way. */
+ UNIT_DEPENDENCY_FILE = 1 << 0,
+
+ /* As unconditional implicit dependency (not affected by unit configuration — except by the unit name and
+ * type) */
+ UNIT_DEPENDENCY_IMPLICIT = 1 << 1,
+
+ /* A dependency effected by DefaultDependencies=yes. Note that dependencies marked this way are conceptually
+ * just a subset of UNIT_DEPENDENCY_FILE, as DefaultDependencies= is itself a unit file setting that can only
+ * be set in unit files. We make this two separate bits only to help debugging how dependencies came to be. */
+ UNIT_DEPENDENCY_DEFAULT = 1 << 2,
+
+ /* A dependency created from udev rules */
+ UNIT_DEPENDENCY_UDEV = 1 << 3,
+
+ /* A dependency created because of some unit's RequiresMountsFor= setting */
+ UNIT_DEPENDENCY_PATH = 1 << 4,
+
+ /* A dependency created because of data read from /proc/self/mountinfo and no other configuration source */
+ UNIT_DEPENDENCY_MOUNTINFO_IMPLICIT = 1 << 5,
+
+ /* A dependency created because of data read from /proc/self/mountinfo, but conditionalized by
+ * DefaultDependencies= and thus also involving configuration from UNIT_DEPENDENCY_FILE sources */
+ UNIT_DEPENDENCY_MOUNTINFO_DEFAULT = 1 << 6,
+
+ /* A dependency created because of data read from /proc/swaps and no other configuration source */
+ UNIT_DEPENDENCY_PROC_SWAP = 1 << 7,
+
+ _UNIT_DEPENDENCY_MASK_FULL = (1 << 8) - 1,
+} UnitDependencyMask;
+
+/* The Unit's dependencies[] hashmaps use this structure as value. It has the same size as a void pointer, and thus can
+ * be stored directly as hashmap value, without any indirection. Note that this stores two masks, as both the origin
+ * and the destination of a dependency might have created it. */
+typedef union UnitDependencyInfo {
+ void *data;
+ struct {
+ UnitDependencyMask origin_mask:16;
+ UnitDependencyMask destination_mask:16;
+ } _packed_;
+} UnitDependencyInfo;
+
+#include "job.h"
+
+struct UnitRef {
+ /* Keeps tracks of references to a unit. This is useful so
+ * that we can merge two units if necessary and correct all
+ * references to them */
+
+ Unit *source, *target;
+ LIST_FIELDS(UnitRef, refs_by_target);
+};
+
+typedef struct Unit {
+ Manager *manager;
+
+ UnitType type;
+ UnitLoadState load_state;
+ Unit *merged_into;
+
+ FreezerState freezer_state;
+ sd_bus_message *pending_freezer_message;
+
+ char *id; /* The one special name that we use for identification */
+ char *instance;
+
+ Set *aliases; /* All the other names. */
+
+ /* For each dependency type we maintain a Hashmap whose key is the Unit* object, and the value encodes why the
+ * dependency exists, using the UnitDependencyInfo type */
+ Hashmap *dependencies[_UNIT_DEPENDENCY_MAX];
+
+ /* Similar, for RequiresMountsFor= path dependencies. The key is the path, the value the UnitDependencyInfo type */
+ Hashmap *requires_mounts_for;
+
+ char *description;
+ char **documentation;
+
+ char *fragment_path; /* if loaded from a config file this is the primary path to it */
+ char *source_path; /* if converted, the source file */
+ char **dropin_paths;
+
+ usec_t fragment_not_found_timestamp_hash;
+ usec_t fragment_mtime;
+ usec_t source_mtime;
+ usec_t dropin_mtime;
+
+ /* If this is a transient unit we are currently writing, this is where we are writing it to */
+ FILE *transient_file;
+
+ /* If there is something to do with this unit, then this is the installed job for it */
+ Job *job;
+
+ /* JOB_NOP jobs are special and can be installed without disturbing the real job. */
+ Job *nop_job;
+
+ /* The slot used for watching NameOwnerChanged signals */
+ sd_bus_slot *match_bus_slot;
+ sd_bus_slot *get_name_owner_slot;
+
+ /* References to this unit from clients */
+ sd_bus_track *bus_track;
+ char **deserialized_refs;
+
+ /* Job timeout and action to take */
+ usec_t job_timeout;
+ usec_t job_running_timeout;
+ bool job_running_timeout_set:1;
+ EmergencyAction job_timeout_action;
+ char *job_timeout_reboot_arg;
+
+ /* References to this */
+ LIST_HEAD(UnitRef, refs_by_target);
+
+ /* Conditions to check */
+ LIST_HEAD(Condition, conditions);
+ LIST_HEAD(Condition, asserts);
+
+ dual_timestamp condition_timestamp;
+ dual_timestamp assert_timestamp;
+
+ /* Updated whenever the low-level state changes */
+ dual_timestamp state_change_timestamp;
+
+ /* Updated whenever the (high-level) active state enters or leaves the active or inactive states */
+ dual_timestamp inactive_exit_timestamp;
+ dual_timestamp active_enter_timestamp;
+ dual_timestamp active_exit_timestamp;
+ dual_timestamp inactive_enter_timestamp;
+
+ UnitRef slice;
+
+ /* Per type list */
+ LIST_FIELDS(Unit, units_by_type);
+
+ /* Load queue */
+ LIST_FIELDS(Unit, load_queue);
+
+ /* D-Bus queue */
+ LIST_FIELDS(Unit, dbus_queue);
+
+ /* Cleanup queue */
+ LIST_FIELDS(Unit, cleanup_queue);
+
+ /* GC queue */
+ LIST_FIELDS(Unit, gc_queue);
+
+ /* CGroup realize members queue */
+ LIST_FIELDS(Unit, cgroup_realize_queue);
+
+ /* cgroup empty queue */
+ LIST_FIELDS(Unit, cgroup_empty_queue);
+
+ /* cgroup OOM queue */
+ LIST_FIELDS(Unit, cgroup_oom_queue);
+
+ /* Target dependencies queue */
+ LIST_FIELDS(Unit, target_deps_queue);
+
+ /* Queue of units with StopWhenUnneeded set that shell be checked for clean-up. */
+ LIST_FIELDS(Unit, stop_when_unneeded_queue);
+
+ /* PIDs we keep an eye on. Note that a unit might have many
+ * more, but these are the ones we care enough about to
+ * process SIGCHLD for */
+ Set *pids;
+
+ /* Used in SIGCHLD and sd_notify() message event invocation logic to avoid that we dispatch the same event
+ * multiple times on the same unit. */
+ unsigned sigchldgen;
+ unsigned notifygen;
+
+ /* Used during GC sweeps */
+ unsigned gc_marker;
+
+ /* Error code when we didn't manage to load the unit (negative) */
+ int load_error;
+
+ /* Put a ratelimit on unit starting */
+ RateLimit start_ratelimit;
+ EmergencyAction start_limit_action;
+
+ /* What to do on failure or success */
+ EmergencyAction success_action, failure_action;
+ int success_action_exit_status, failure_action_exit_status;
+ char *reboot_arg;
+
+ /* Make sure we never enter endless loops with the check unneeded logic, or the BindsTo= logic */
+ RateLimit auto_stop_ratelimit;
+
+ /* Reference to a specific UID/GID */
+ uid_t ref_uid;
+ gid_t ref_gid;
+
+ /* Cached unit file state and preset */
+ UnitFileState unit_file_state;
+ int unit_file_preset;
+
+ /* Where the cpu.stat or cpuacct.usage was at the time the unit was started */
+ nsec_t cpu_usage_base;
+ nsec_t cpu_usage_last; /* the most recently read value */
+
+ /* The current counter of processes sent SIGKILL by systemd-oomd */
+ uint64_t managed_oom_kill_last;
+
+ /* The current counter of the oom_kill field in the memory.events cgroup attribute */
+ uint64_t oom_kill_last;
+
+ /* Where the io.stat data was at the time the unit was started */
+ uint64_t io_accounting_base[_CGROUP_IO_ACCOUNTING_METRIC_MAX];
+ uint64_t io_accounting_last[_CGROUP_IO_ACCOUNTING_METRIC_MAX]; /* the most recently read value */
+
+ /* Counterparts in the cgroup filesystem */
+ char *cgroup_path;
+ CGroupMask cgroup_realized_mask; /* In which hierarchies does this unit's cgroup exist? (only relevant on cgroup v1) */
+ CGroupMask cgroup_enabled_mask; /* Which controllers are enabled (or more correctly: enabled for the children) for this unit's cgroup? (only relevant on cgroup v2) */
+ CGroupMask cgroup_invalidated_mask; /* A mask specifying controllers which shall be considered invalidated, and require re-realization */
+ CGroupMask cgroup_members_mask; /* A cache for the controllers required by all children of this cgroup (only relevant for slice units) */
+
+ /* Inotify watch descriptors for watching cgroup.events and memory.events on cgroupv2 */
+ int cgroup_control_inotify_wd;
+ int cgroup_memory_inotify_wd;
+
+ /* Device Controller BPF program */
+ BPFProgram *bpf_device_control_installed;
+
+ /* IP BPF Firewalling/accounting */
+ int ip_accounting_ingress_map_fd;
+ int ip_accounting_egress_map_fd;
+
+ int ipv4_allow_map_fd;
+ int ipv6_allow_map_fd;
+ int ipv4_deny_map_fd;
+ int ipv6_deny_map_fd;
+
+ BPFProgram *ip_bpf_ingress, *ip_bpf_ingress_installed;
+ BPFProgram *ip_bpf_egress, *ip_bpf_egress_installed;
+ Set *ip_bpf_custom_ingress;
+ Set *ip_bpf_custom_ingress_installed;
+ Set *ip_bpf_custom_egress;
+ Set *ip_bpf_custom_egress_installed;
+
+ uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
+
+ /* Low-priority event source which is used to remove watched PIDs that have gone away, and subscribe to any new
+ * ones which might have appeared. */
+ sd_event_source *rewatch_pids_event_source;
+
+ /* How to start OnFailure units */
+ JobMode on_failure_job_mode;
+
+ /* Tweaking the GC logic */
+ CollectMode collect_mode;
+
+ /* The current invocation ID */
+ sd_id128_t invocation_id;
+ char invocation_id_string[SD_ID128_STRING_MAX]; /* useful when logging */
+
+ /* Garbage collect us we nobody wants or requires us anymore */
+ bool stop_when_unneeded;
+
+ /* Create default dependencies */
+ bool default_dependencies;
+
+ /* Refuse manual starting, allow starting only indirectly via dependency. */
+ bool refuse_manual_start;
+
+ /* Don't allow the user to stop this unit manually, allow stopping only indirectly via dependency. */
+ bool refuse_manual_stop;
+
+ /* Allow isolation requests */
+ bool allow_isolate;
+
+ /* Ignore this unit when isolating */
+ bool ignore_on_isolate;
+
+ /* Did the last condition check succeed? */
+ bool condition_result;
+ bool assert_result;
+
+ /* Is this a transient unit? */
+ bool transient;
+
+ /* Is this a unit that is always running and cannot be stopped? */
+ bool perpetual;
+
+ /* Booleans indicating membership of this unit in the various queues */
+ bool in_load_queue:1;
+ bool in_dbus_queue:1;
+ bool in_cleanup_queue:1;
+ bool in_gc_queue:1;
+ bool in_cgroup_realize_queue:1;
+ bool in_cgroup_empty_queue:1;
+ bool in_cgroup_oom_queue:1;
+ bool in_target_deps_queue:1;
+ bool in_stop_when_unneeded_queue:1;
+
+ bool sent_dbus_new_signal:1;
+
+ bool in_audit:1;
+ bool on_console:1;
+
+ bool cgroup_realized:1;
+ bool cgroup_members_mask_valid:1;
+
+ /* Reset cgroup accounting next time we fork something off */
+ bool reset_accounting:1;
+
+ bool start_limit_hit:1;
+
+ /* Did we already invoke unit_coldplug() for this unit? */
+ bool coldplugged:1;
+
+ /* For transient units: whether to add a bus track reference after creating the unit */
+ bool bus_track_add:1;
+
+ /* Remember which unit state files we created */
+ bool exported_invocation_id:1;
+ bool exported_log_level_max:1;
+ bool exported_log_extra_fields:1;
+ bool exported_log_ratelimit_interval:1;
+ bool exported_log_ratelimit_burst:1;
+
+ /* Whether we warned about clamping the CPU quota period */
+ bool warned_clamping_cpu_quota_period:1;
+
+ /* When writing transient unit files, stores which section we stored last. If < 0, we didn't write any yet. If
+ * == 0 we are in the [Unit] section, if > 0 we are in the unit type-specific section. */
+ signed int last_section_private:2;
+} Unit;
+
+typedef struct UnitStatusMessageFormats {
+ const char *starting_stopping[2];
+ const char *finished_start_job[_JOB_RESULT_MAX];
+ const char *finished_stop_job[_JOB_RESULT_MAX];
+ /* If this entry is present, it'll be called to provide a context-dependent format string,
+ * or NULL to fall back to finished_{start,stop}_job; if those are NULL too, fall back to generic. */
+ const char *(*finished_job)(Unit *u, JobType t, JobResult result);
+} UnitStatusMessageFormats;
+
+/* Flags used when writing drop-in files or transient unit files */
+typedef enum UnitWriteFlags {
+ /* Write a runtime unit file or drop-in (i.e. one below /run) */
+ UNIT_RUNTIME = 1 << 0,
+
+ /* Write a persistent drop-in (i.e. one below /etc) */
+ UNIT_PERSISTENT = 1 << 1,
+
+ /* Place this item in the per-unit-type private section, instead of [Unit] */
+ UNIT_PRIVATE = 1 << 2,
+
+ /* Apply specifier escaping before writing */
+ UNIT_ESCAPE_SPECIFIERS = 1 << 3,
+
+ /* Apply C escaping before writing */
+ UNIT_ESCAPE_C = 1 << 4,
+} UnitWriteFlags;
+
+/* Returns true if neither persistent, nor runtime storage is requested, i.e. this is a check invocation only */
+static inline bool UNIT_WRITE_FLAGS_NOOP(UnitWriteFlags flags) {
+ return (flags & (UNIT_RUNTIME|UNIT_PERSISTENT)) == 0;
+}
+
+#include "kill.h"
+
+typedef struct UnitVTable {
+ /* How much memory does an object of this unit type need */
+ size_t object_size;
+
+ /* If greater than 0, the offset into the object where
+ * ExecContext is found, if the unit type has that */
+ size_t exec_context_offset;
+
+ /* If greater than 0, the offset into the object where
+ * CGroupContext is found, if the unit type has that */
+ size_t cgroup_context_offset;
+
+ /* If greater than 0, the offset into the object where
+ * KillContext is found, if the unit type has that */
+ size_t kill_context_offset;
+
+ /* If greater than 0, the offset into the object where the
+ * pointer to ExecRuntime is found, if the unit type has
+ * that */
+ size_t exec_runtime_offset;
+
+ /* If greater than 0, the offset into the object where the pointer to DynamicCreds is found, if the unit type
+ * has that. */
+ size_t dynamic_creds_offset;
+
+ /* The name of the configuration file section with the private settings of this unit */
+ const char *private_section;
+
+ /* Config file sections this unit type understands, separated
+ * by NUL chars */
+ const char *sections;
+
+ /* This should reset all type-specific variables. This should
+ * not allocate memory, and is called with zero-initialized
+ * data. It should hence only initialize variables that need
+ * to be set != 0. */
+ void (*init)(Unit *u);
+
+ /* This should free all type-specific variables. It should be
+ * idempotent. */
+ void (*done)(Unit *u);
+
+ /* Actually load data from disk. This may fail, and should set
+ * load_state to UNIT_LOADED, UNIT_MERGED or leave it at
+ * UNIT_STUB if no configuration could be found. */
+ int (*load)(Unit *u);
+
+ /* During deserialization we only record the intended state to return to. With coldplug() we actually put the
+ * deserialized state in effect. This is where unit_notify() should be called to start things up. Note that
+ * this callback is invoked *before* we leave the reloading state of the manager, i.e. *before* we consider the
+ * reloading to be complete. Thus, this callback should just restore the exact same state for any unit that was
+ * in effect before the reload, i.e. units should not catch up with changes happened during the reload. That's
+ * what catchup() below is for. */
+ int (*coldplug)(Unit *u);
+
+ /* This is called shortly after all units' coldplug() call was invoked, and *after* the manager left the
+ * reloading state. It's supposed to catch up with state changes due to external events we missed so far (for
+ * example because they took place while we were reloading/reexecing) */
+ void (*catchup)(Unit *u);
+
+ void (*dump)(Unit *u, FILE *f, const char *prefix);
+
+ int (*start)(Unit *u);
+ int (*stop)(Unit *u);
+ int (*reload)(Unit *u);
+
+ int (*kill)(Unit *u, KillWho w, int signo, sd_bus_error *error);
+
+ /* Clear out the various runtime/state/cache/logs/configuration data */
+ int (*clean)(Unit *u, ExecCleanMask m);
+
+ /* Freeze the unit */
+ int (*freeze)(Unit *u);
+ int (*thaw)(Unit *u);
+ bool (*can_freeze)(Unit *u);
+
+ /* Return which kind of data can be cleaned */
+ int (*can_clean)(Unit *u, ExecCleanMask *ret);
+
+ bool (*can_reload)(Unit *u);
+
+ /* Write all data that cannot be restored from other sources
+ * away using unit_serialize_item() */
+ int (*serialize)(Unit *u, FILE *f, FDSet *fds);
+
+ /* Restore one item from the serialization */
+ int (*deserialize_item)(Unit *u, const char *key, const char *data, FDSet *fds);
+
+ /* Try to match up fds with what we need for this unit */
+ void (*distribute_fds)(Unit *u, FDSet *fds);
+
+ /* Boils down the more complex internal state of this unit to
+ * a simpler one that the engine can understand */
+ UnitActiveState (*active_state)(Unit *u);
+
+ /* Returns the substate specific to this unit type as
+ * string. This is purely information so that we can give the
+ * user a more fine grained explanation in which actual state a
+ * unit is in. */
+ const char* (*sub_state_to_string)(Unit *u);
+
+ /* Additionally to UnitActiveState determine whether unit is to be restarted. */
+ bool (*will_restart)(Unit *u);
+
+ /* Return false when there is a reason to prevent this unit from being gc'ed
+ * even though nothing references it and it isn't active in any way. */
+ bool (*may_gc)(Unit *u);
+
+ /* Return true when the unit is not controlled by the manager (e.g. extrinsic mounts). */
+ bool (*is_extrinsic)(Unit *u);
+
+ /* When the unit is not running and no job for it queued we shall release its runtime resources */
+ void (*release_resources)(Unit *u);
+
+ /* Invoked on every child that died */
+ void (*sigchld_event)(Unit *u, pid_t pid, int code, int status);
+
+ /* Reset failed state if we are in failed state */
+ void (*reset_failed)(Unit *u);
+
+ /* Called whenever any of the cgroups this unit watches for ran empty */
+ void (*notify_cgroup_empty)(Unit *u);
+
+ /* Called whenever an OOM kill event on this unit was seen */
+ void (*notify_cgroup_oom)(Unit *u);
+
+ /* Called whenever a process of this unit sends us a message */
+ void (*notify_message)(Unit *u, const struct ucred *ucred, char * const *tags, FDSet *fds);
+
+ /* Called whenever a name this Unit registered for comes or goes away. */
+ void (*bus_name_owner_change)(Unit *u, const char *new_owner);
+
+ /* Called for each property that is being set */
+ int (*bus_set_property)(Unit *u, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+
+ /* Called after at least one property got changed to apply the necessary change */
+ int (*bus_commit_properties)(Unit *u);
+
+ /* Return the unit this unit is following */
+ Unit *(*following)(Unit *u);
+
+ /* Return the set of units that are following each other */
+ int (*following_set)(Unit *u, Set **s);
+
+ /* Invoked each time a unit this unit is triggering changes
+ * state or gains/loses a job */
+ void (*trigger_notify)(Unit *u, Unit *trigger);
+
+ /* Called whenever CLOCK_REALTIME made a jump */
+ void (*time_change)(Unit *u);
+
+ /* Called whenever /etc/localtime was modified */
+ void (*timezone_change)(Unit *u);
+
+ /* Returns the next timeout of a unit */
+ int (*get_timeout)(Unit *u, usec_t *timeout);
+
+ /* Returns the main PID if there is any defined, or 0. */
+ pid_t (*main_pid)(Unit *u);
+
+ /* Returns the main PID if there is any defined, or 0. */
+ pid_t (*control_pid)(Unit *u);
+
+ /* Returns true if the unit currently needs access to the console */
+ bool (*needs_console)(Unit *u);
+
+ /* Returns the exit status to propagate in case of FailureAction=exit/SuccessAction=exit; usually returns the
+ * exit code of the "main" process of the service or similar. */
+ int (*exit_status)(Unit *u);
+
+ /* Like the enumerate() callback further down, but only enumerates the perpetual units, i.e. all units that
+ * unconditionally exist and are always active. The main reason to keep both enumeration functions separate is
+ * philosophical: the state of perpetual units should be put in place by coldplug(), while the state of those
+ * discovered through regular enumeration should be put in place by catchup(), see below. */
+ void (*enumerate_perpetual)(Manager *m);
+
+ /* This is called for each unit type and should be used to enumerate units already existing in the system
+ * internally and load them. However, everything that is loaded here should still stay in inactive state. It is
+ * the job of the catchup() call above to put the units into the discovered state. */
+ void (*enumerate)(Manager *m);
+
+ /* Type specific cleanups. */
+ void (*shutdown)(Manager *m);
+
+ /* If this function is set and return false all jobs for units
+ * of this type will immediately fail. */
+ bool (*supported)(void);
+
+ /* The strings to print in status messages */
+ UnitStatusMessageFormats status_message_formats;
+
+ /* True if transient units of this type are OK */
+ bool can_transient:1;
+
+ /* True if cgroup delegation is permissible */
+ bool can_delegate:1;
+
+ /* True if the unit type triggers other units, i.e. can have a UNIT_TRIGGERS dependency */
+ bool can_trigger:1;
+
+ /* True if the unit type knows a failure state, and thus can be source of an OnFailure= dependency */
+ bool can_fail:1;
+
+ /* True if units of this type shall be startable only once and then never again */
+ bool once_only:1;
+
+ /* True if queued jobs of this type should be GC'ed if no other job needs them anymore */
+ bool gc_jobs:1;
+
+ /* True if systemd-oomd can monitor and act on this unit's recursive children's cgroup(s) */
+ bool can_set_managed_oom:1;
+} UnitVTable;
+
+extern const UnitVTable * const unit_vtable[_UNIT_TYPE_MAX];
+
+static inline const UnitVTable* UNIT_VTABLE(Unit *u) {
+ return unit_vtable[u->type];
+}
+
+/* For casting a unit into the various unit types */
+#define DEFINE_CAST(UPPERCASE, MixedCase) \
+ static inline MixedCase* UPPERCASE(Unit *u) { \
+ if (_unlikely_(!u || u->type != UNIT_##UPPERCASE)) \
+ return NULL; \
+ \
+ return (MixedCase*) u; \
+ }
+
+/* For casting the various unit types into a unit */
+#define UNIT(u) \
+ ({ \
+ typeof(u) _u_ = (u); \
+ Unit *_w_ = _u_ ? &(_u_)->meta : NULL; \
+ _w_; \
+ })
+
+#define UNIT_HAS_EXEC_CONTEXT(u) (UNIT_VTABLE(u)->exec_context_offset > 0)
+#define UNIT_HAS_CGROUP_CONTEXT(u) (UNIT_VTABLE(u)->cgroup_context_offset > 0)
+#define UNIT_HAS_KILL_CONTEXT(u) (UNIT_VTABLE(u)->kill_context_offset > 0)
+
+static inline Unit* UNIT_TRIGGER(Unit *u) {
+ return hashmap_first_key(u->dependencies[UNIT_TRIGGERS]);
+}
+
+Unit *unit_new(Manager *m, size_t size);
+void unit_free(Unit *u);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Unit *, unit_free);
+
+int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret);
+int unit_add_name(Unit *u, const char *name);
+
+int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_reference, UnitDependencyMask mask);
+int unit_add_two_dependencies(Unit *u, UnitDependency d, UnitDependency e, Unit *other, bool add_reference, UnitDependencyMask mask);
+
+int unit_add_dependency_by_name(Unit *u, UnitDependency d, const char *name, bool add_reference, UnitDependencyMask mask);
+int unit_add_two_dependencies_by_name(Unit *u, UnitDependency d, UnitDependency e, const char *name, bool add_reference, UnitDependencyMask mask);
+
+int unit_add_exec_dependencies(Unit *u, ExecContext *c);
+
+int unit_choose_id(Unit *u, const char *name);
+int unit_set_description(Unit *u, const char *description);
+
+bool unit_may_gc(Unit *u);
+
+static inline bool unit_is_extrinsic(Unit *u) {
+ return u->perpetual ||
+ (UNIT_VTABLE(u)->is_extrinsic && UNIT_VTABLE(u)->is_extrinsic(u));
+}
+
+void unit_add_to_load_queue(Unit *u);
+void unit_add_to_dbus_queue(Unit *u);
+void unit_add_to_cleanup_queue(Unit *u);
+void unit_add_to_gc_queue(Unit *u);
+void unit_add_to_target_deps_queue(Unit *u);
+void unit_submit_to_stop_when_unneeded_queue(Unit *u);
+
+int unit_merge(Unit *u, Unit *other);
+int unit_merge_by_name(Unit *u, const char *other);
+
+Unit *unit_follow_merge(Unit *u) _pure_;
+
+int unit_load_fragment_and_dropin(Unit *u, bool fragment_required);
+int unit_load(Unit *unit);
+
+int unit_set_slice(Unit *u, Unit *slice);
+int unit_set_default_slice(Unit *u);
+
+const char *unit_description(Unit *u) _pure_;
+const char *unit_status_string(Unit *u) _pure_;
+
+bool unit_has_name(const Unit *u, const char *name);
+
+UnitActiveState unit_active_state(Unit *u);
+FreezerState unit_freezer_state(Unit *u);
+int unit_freezer_state_kernel(Unit *u, FreezerState *ret);
+
+const char* unit_sub_state_to_string(Unit *u);
+
+void unit_dump(Unit *u, FILE *f, const char *prefix);
+
+bool unit_can_reload(Unit *u) _pure_;
+bool unit_can_start(Unit *u) _pure_;
+bool unit_can_stop(Unit *u) _pure_;
+bool unit_can_isolate(Unit *u) _pure_;
+
+int unit_start(Unit *u);
+int unit_stop(Unit *u);
+int unit_reload(Unit *u);
+
+int unit_kill(Unit *u, KillWho w, int signo, sd_bus_error *error);
+int unit_kill_common(Unit *u, KillWho who, int signo, pid_t main_pid, pid_t control_pid, sd_bus_error *error);
+
+typedef enum UnitNotifyFlags {
+ UNIT_NOTIFY_RELOAD_FAILURE = 1 << 0,
+ UNIT_NOTIFY_WILL_AUTO_RESTART = 1 << 1,
+ UNIT_NOTIFY_SKIP_CONDITION = 1 << 2,
+} UnitNotifyFlags;
+
+void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlags flags);
+
+int unit_watch_pid(Unit *u, pid_t pid, bool exclusive);
+void unit_unwatch_pid(Unit *u, pid_t pid);
+void unit_unwatch_all_pids(Unit *u);
+
+int unit_enqueue_rewatch_pids(Unit *u);
+void unit_dequeue_rewatch_pids(Unit *u);
+
+int unit_install_bus_match(Unit *u, sd_bus *bus, const char *name);
+int unit_watch_bus_name(Unit *u, const char *name);
+void unit_unwatch_bus_name(Unit *u, const char *name);
+
+bool unit_job_is_applicable(Unit *u, JobType j);
+
+int set_unit_path(const char *p);
+
+char *unit_dbus_path(Unit *u);
+char *unit_dbus_path_invocation_id(Unit *u);
+
+int unit_load_related_unit(Unit *u, const char *type, Unit **_found);
+
+bool unit_can_serialize(Unit *u) _pure_;
+
+int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs);
+int unit_deserialize(Unit *u, FILE *f, FDSet *fds);
+int unit_deserialize_skip(FILE *f);
+
+int unit_add_node_dependency(Unit *u, const char *what, UnitDependency d, UnitDependencyMask mask);
+int unit_add_blockdev_dependency(Unit *u, const char *what, UnitDependencyMask mask);
+
+int unit_coldplug(Unit *u);
+void unit_catchup(Unit *u);
+
+void unit_status_printf(Unit *u, StatusType status_type, const char *status, const char *unit_status_msg_format) _printf_(4, 0);
+
+bool unit_need_daemon_reload(Unit *u);
+
+void unit_reset_failed(Unit *u);
+
+Unit *unit_following(Unit *u);
+int unit_following_set(Unit *u, Set **s);
+
+const char *unit_slice_name(Unit *u);
+
+bool unit_stop_pending(Unit *u) _pure_;
+bool unit_inactive_or_pending(Unit *u) _pure_;
+bool unit_active_or_pending(Unit *u);
+bool unit_will_restart_default(Unit *u);
+bool unit_will_restart(Unit *u);
+
+int unit_add_default_target_dependency(Unit *u, Unit *target);
+
+void unit_start_on_failure(Unit *u);
+void unit_trigger_notify(Unit *u);
+
+UnitFileState unit_get_unit_file_state(Unit *u);
+int unit_get_unit_file_preset(Unit *u);
+
+Unit* unit_ref_set(UnitRef *ref, Unit *source, Unit *target);
+void unit_ref_unset(UnitRef *ref);
+
+#define UNIT_DEREF(ref) ((ref).target)
+#define UNIT_ISSET(ref) (!!(ref).target)
+
+int unit_patch_contexts(Unit *u);
+
+ExecContext *unit_get_exec_context(Unit *u) _pure_;
+KillContext *unit_get_kill_context(Unit *u) _pure_;
+CGroupContext *unit_get_cgroup_context(Unit *u) _pure_;
+
+ExecRuntime *unit_get_exec_runtime(Unit *u) _pure_;
+
+int unit_setup_exec_runtime(Unit *u);
+int unit_setup_dynamic_creds(Unit *u);
+
+char* unit_escape_setting(const char *s, UnitWriteFlags flags, char **buf);
+char* unit_concat_strv(char **l, UnitWriteFlags flags);
+
+int unit_write_setting(Unit *u, UnitWriteFlags flags, const char *name, const char *data);
+int unit_write_settingf(Unit *u, UnitWriteFlags mode, const char *name, const char *format, ...) _printf_(4,5);
+
+int unit_kill_context(Unit *u, KillContext *c, KillOperation k, pid_t main_pid, pid_t control_pid, bool main_pid_alien);
+
+int unit_make_transient(Unit *u);
+
+int unit_require_mounts_for(Unit *u, const char *path, UnitDependencyMask mask);
+
+bool unit_type_supported(UnitType t);
+
+bool unit_is_pristine(Unit *u);
+
+bool unit_is_unneeded(Unit *u);
+
+pid_t unit_control_pid(Unit *u);
+pid_t unit_main_pid(Unit *u);
+
+void unit_warn_if_dir_nonempty(Unit *u, const char* where);
+int unit_fail_if_noncanonical(Unit *u, const char* where);
+
+int unit_test_start_limit(Unit *u);
+
+int unit_ref_uid_gid(Unit *u, uid_t uid, gid_t gid);
+void unit_unref_uid_gid(Unit *u, bool destroy_now);
+
+void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid);
+
+int unit_acquire_invocation_id(Unit *u);
+
+bool unit_shall_confirm_spawn(Unit *u);
+
+int unit_set_exec_params(Unit *s, ExecParameters *p);
+
+int unit_fork_helper_process(Unit *u, const char *name, pid_t *ret);
+int unit_fork_and_watch_rm_rf(Unit *u, char **paths, pid_t *ret_pid);
+
+void unit_remove_dependencies(Unit *u, UnitDependencyMask mask);
+
+void unit_export_state_files(Unit *u);
+void unit_unlink_state_files(Unit *u);
+
+int unit_prepare_exec(Unit *u);
+
+int unit_log_leftover_process_start(pid_t pid, int sig, void *userdata);
+int unit_log_leftover_process_stop(pid_t pid, int sig, void *userdata);
+int unit_warn_leftover_processes(Unit *u, cg_kill_log_func_t log_func);
+
+bool unit_needs_console(Unit *u);
+
+const char *unit_label_path(const Unit *u);
+
+int unit_pid_attachable(Unit *unit, pid_t pid, sd_bus_error *error);
+
+static inline bool unit_has_job_type(Unit *u, JobType type) {
+ return u && u->job && u->job->type == type;
+}
+
+/* unit_log_skip is for cases like ExecCondition= where a unit is considered "done"
+ * after some execution, rather than succeeded or failed. */
+void unit_log_skip(Unit *u, const char *result);
+void unit_log_success(Unit *u);
+void unit_log_failure(Unit *u, const char *result);
+static inline void unit_log_result(Unit *u, bool success, const char *result) {
+ if (success)
+ unit_log_success(u);
+ else
+ unit_log_failure(u, result);
+}
+
+void unit_log_process_exit(Unit *u, const char *kind, const char *command, bool success, int code, int status);
+
+int unit_exit_status(Unit *u);
+int unit_success_action_exit_status(Unit *u);
+int unit_failure_action_exit_status(Unit *u);
+
+int unit_test_trigger_loaded(Unit *u);
+
+void unit_destroy_runtime_data(Unit *u, const ExecContext *context);
+int unit_clean(Unit *u, ExecCleanMask mask);
+int unit_can_clean(Unit *u, ExecCleanMask *ret_mask);
+
+bool unit_can_freeze(Unit *u);
+int unit_freeze(Unit *u);
+void unit_frozen(Unit *u);
+
+int unit_thaw(Unit *u);
+void unit_thawed(Unit *u);
+
+int unit_freeze_vtable_common(Unit *u);
+int unit_thaw_vtable_common(Unit *u);
+
+/* Macros which append UNIT= or USER_UNIT= to the message */
+
+#define log_unit_full_errno(unit, level, error, ...) \
+ ({ \
+ const Unit *_u = (unit); \
+ (log_get_max_level() < LOG_PRI(level)) ? -ERRNO_VALUE(error) : \
+ _u ? log_object_internal(level, error, PROJECT_FILE, __LINE__, __func__, _u->manager->unit_log_field, _u->id, _u->manager->invocation_log_field, _u->invocation_id_string, ##__VA_ARGS__) : \
+ log_internal(level, error, PROJECT_FILE, __LINE__, __func__, ##__VA_ARGS__); \
+ })
+
+#define log_unit_full(unit, level, ...) (void) log_unit_full_errno(unit, level, 0, __VA_ARGS__)
+
+#define log_unit_debug(unit, ...) log_unit_full_errno(unit, LOG_DEBUG, 0, __VA_ARGS__)
+#define log_unit_info(unit, ...) log_unit_full(unit, LOG_INFO, __VA_ARGS__)
+#define log_unit_notice(unit, ...) log_unit_full(unit, LOG_NOTICE, __VA_ARGS__)
+#define log_unit_warning(unit, ...) log_unit_full(unit, LOG_WARNING, __VA_ARGS__)
+#define log_unit_error(unit, ...) log_unit_full(unit, LOG_ERR, __VA_ARGS__)
+
+#define log_unit_debug_errno(unit, error, ...) log_unit_full_errno(unit, LOG_DEBUG, error, __VA_ARGS__)
+#define log_unit_info_errno(unit, error, ...) log_unit_full_errno(unit, LOG_INFO, error, __VA_ARGS__)
+#define log_unit_notice_errno(unit, error, ...) log_unit_full_errno(unit, LOG_NOTICE, error, __VA_ARGS__)
+#define log_unit_warning_errno(unit, error, ...) log_unit_full_errno(unit, LOG_WARNING, error, __VA_ARGS__)
+#define log_unit_error_errno(unit, error, ...) log_unit_full_errno(unit, LOG_ERR, error, __VA_ARGS__)
+
+#define LOG_UNIT_MESSAGE(unit, fmt, ...) "MESSAGE=%s: " fmt, (unit)->id, ##__VA_ARGS__
+#define LOG_UNIT_ID(unit) (unit)->manager->unit_log_format_string, (unit)->id
+#define LOG_UNIT_INVOCATION_ID(unit) (unit)->manager->invocation_log_format_string, (unit)->invocation_id_string
+
+const char* collect_mode_to_string(CollectMode m) _const_;
+CollectMode collect_mode_from_string(const char *s) _pure_;
diff --git a/src/core/user.conf.in b/src/core/user.conf.in
new file mode 100644
index 0000000..bbe0631
--- /dev/null
+++ b/src/core/user.conf.in
@@ -0,0 +1,47 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# You can override the directives in this file by creating files in
+# /etc/systemd/user.conf.d/*.conf.
+#
+# See systemd-user.conf(5) for details
+
+[Manager]
+#LogLevel=info
+#LogTarget=console
+#LogColor=yes
+#LogLocation=no
+#LogTime=no
+#SystemCallArchitectures=
+#TimerSlackNSec=
+#StatusUnitFormat=@STATUS_UNIT_FORMAT_DEFAULT@
+#DefaultTimerAccuracySec=1min
+#DefaultStandardOutput=inherit
+#DefaultStandardError=inherit
+#DefaultTimeoutStartSec=90s
+#DefaultTimeoutStopSec=90s
+#DefaultTimeoutAbortSec=
+#DefaultRestartSec=100ms
+#DefaultStartLimitIntervalSec=10s
+#DefaultStartLimitBurst=5
+#DefaultEnvironment=
+#DefaultLimitCPU=
+#DefaultLimitFSIZE=
+#DefaultLimitDATA=
+#DefaultLimitSTACK=
+#DefaultLimitCORE=
+#DefaultLimitRSS=
+#DefaultLimitNOFILE=
+#DefaultLimitAS=
+#DefaultLimitNPROC=
+#DefaultLimitMEMLOCK=
+#DefaultLimitLOCKS=
+#DefaultLimitSIGPENDING=
+#DefaultLimitMSGQUEUE=
+#DefaultLimitNICE=
+#DefaultLimitRTPRIO=
+#DefaultLimitRTTIME=
diff --git a/src/coredump/coredump-vacuum.c b/src/coredump/coredump-vacuum.c
new file mode 100644
index 0000000..30c67ff
--- /dev/null
+++ b/src/coredump/coredump-vacuum.c
@@ -0,0 +1,248 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/statvfs.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "coredump-vacuum.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "user-util.h"
+
+#define DEFAULT_MAX_USE_LOWER (uint64_t) (1ULL*1024ULL*1024ULL) /* 1 MiB */
+#define DEFAULT_MAX_USE_UPPER (uint64_t) (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
+#define DEFAULT_KEEP_FREE_UPPER (uint64_t) (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
+#define DEFAULT_KEEP_FREE (uint64_t) (1024ULL*1024ULL) /* 1 MB */
+
+struct vacuum_candidate {
+ unsigned n_files;
+ char *oldest_file;
+ usec_t oldest_mtime;
+};
+
+static void vacuum_candidate_free(struct vacuum_candidate *c) {
+ if (!c)
+ return;
+
+ free(c->oldest_file);
+ free(c);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct vacuum_candidate*, vacuum_candidate_free);
+
+static void vacuum_candidate_hashmap_free(Hashmap *h) {
+ hashmap_free_with_destructor(h, vacuum_candidate_free);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, vacuum_candidate_hashmap_free);
+
+static int uid_from_file_name(const char *filename, uid_t *uid) {
+ const char *p, *e, *u;
+
+ p = startswith(filename, "core.");
+ if (!p)
+ return -EINVAL;
+
+ /* Skip the comm field */
+ p = strchr(p, '.');
+ if (!p)
+ return -EINVAL;
+ p++;
+
+ /* Find end up UID */
+ e = strchr(p, '.');
+ if (!e)
+ return -EINVAL;
+
+ u = strndupa(p, e-p);
+ return parse_uid(u, uid);
+}
+
+static bool vacuum_necessary(int fd, uint64_t sum, uint64_t keep_free, uint64_t max_use) {
+ uint64_t fs_size = 0, fs_free = (uint64_t) -1;
+ struct statvfs sv;
+
+ assert(fd >= 0);
+
+ if (fstatvfs(fd, &sv) >= 0) {
+ fs_size = sv.f_frsize * sv.f_blocks;
+ fs_free = sv.f_frsize * sv.f_bfree;
+ }
+
+ if (max_use == (uint64_t) -1) {
+
+ if (fs_size > 0) {
+ max_use = PAGE_ALIGN(fs_size / 10); /* 10% */
+
+ if (max_use > DEFAULT_MAX_USE_UPPER)
+ max_use = DEFAULT_MAX_USE_UPPER;
+
+ if (max_use < DEFAULT_MAX_USE_LOWER)
+ max_use = DEFAULT_MAX_USE_LOWER;
+ } else
+ max_use = DEFAULT_MAX_USE_LOWER;
+ } else
+ max_use = PAGE_ALIGN(max_use);
+
+ if (max_use > 0 && sum > max_use)
+ return true;
+
+ if (keep_free == (uint64_t) -1) {
+
+ if (fs_size > 0) {
+ keep_free = PAGE_ALIGN((fs_size * 3) / 20); /* 15% */
+
+ if (keep_free > DEFAULT_KEEP_FREE_UPPER)
+ keep_free = DEFAULT_KEEP_FREE_UPPER;
+ } else
+ keep_free = DEFAULT_KEEP_FREE;
+ } else
+ keep_free = PAGE_ALIGN(keep_free);
+
+ if (keep_free > 0 && fs_free < keep_free)
+ return true;
+
+ return false;
+}
+
+int coredump_vacuum(int exclude_fd, uint64_t keep_free, uint64_t max_use) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct stat exclude_st;
+ int r;
+
+ if (keep_free == 0 && max_use == 0)
+ return 0;
+
+ if (exclude_fd >= 0) {
+ if (fstat(exclude_fd, &exclude_st) < 0)
+ return log_error_errno(errno, "Failed to fstat(): %m");
+ }
+
+ /* This algorithm will keep deleting the oldest file of the
+ * user with the most coredumps until we are back in the size
+ * limits. Note that vacuuming for journal files is different,
+ * because we rely on rate-limiting of the messages there,
+ * to avoid being flooded. */
+
+ d = opendir("/var/lib/systemd/coredump");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Can't open coredump directory: %m");
+ }
+
+ for (;;) {
+ _cleanup_(vacuum_candidate_hashmap_freep) Hashmap *h = NULL;
+ struct vacuum_candidate *worst = NULL;
+ struct dirent *de;
+ uint64_t sum = 0;
+
+ rewinddir(d);
+
+ FOREACH_DIRENT(de, d, goto fail) {
+ struct vacuum_candidate *c;
+ struct stat st;
+ uid_t uid;
+ usec_t t;
+
+ r = uid_from_file_name(de->d_name, &uid);
+ if (r < 0)
+ continue;
+
+ if (fstatat(dirfd(d), de->d_name, &st, AT_NO_AUTOMOUNT|AT_SYMLINK_NOFOLLOW) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ log_warning_errno(errno, "Failed to stat /var/lib/systemd/coredump/%s: %m", de->d_name);
+ continue;
+ }
+
+ if (!S_ISREG(st.st_mode))
+ continue;
+
+ if (exclude_fd >= 0 &&
+ exclude_st.st_dev == st.st_dev &&
+ exclude_st.st_ino == st.st_ino)
+ continue;
+
+ r = hashmap_ensure_allocated(&h, NULL);
+ if (r < 0)
+ return log_oom();
+
+ t = timespec_load(&st.st_mtim);
+
+ c = hashmap_get(h, UID_TO_PTR(uid));
+ if (c) {
+
+ if (t < c->oldest_mtime) {
+ char *n;
+
+ n = strdup(de->d_name);
+ if (!n)
+ return log_oom();
+
+ free(c->oldest_file);
+ c->oldest_file = n;
+ c->oldest_mtime = t;
+ }
+
+ } else {
+ _cleanup_(vacuum_candidate_freep) struct vacuum_candidate *n = NULL;
+
+ n = new0(struct vacuum_candidate, 1);
+ if (!n)
+ return log_oom();
+
+ n->oldest_file = strdup(de->d_name);
+ if (!n->oldest_file)
+ return log_oom();
+
+ n->oldest_mtime = t;
+
+ r = hashmap_put(h, UID_TO_PTR(uid), n);
+ if (r < 0)
+ return log_oom();
+
+ c = TAKE_PTR(n);
+ }
+
+ c->n_files++;
+
+ if (!worst ||
+ worst->n_files < c->n_files ||
+ (worst->n_files == c->n_files && c->oldest_mtime < worst->oldest_mtime))
+ worst = c;
+
+ sum += st.st_blocks * 512;
+ }
+
+ if (!worst)
+ break;
+
+ r = vacuum_necessary(dirfd(d), sum, keep_free, max_use);
+ if (r <= 0)
+ return r;
+
+ r = unlinkat_deallocate(dirfd(d), worst->oldest_file, 0);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return log_error_errno(r, "Failed to remove file %s: %m", worst->oldest_file);
+
+ log_info("Removed old coredump %s.", worst->oldest_file);
+ }
+
+ return 0;
+
+fail:
+ return log_error_errno(errno, "Failed to read directory: %m");
+}
diff --git a/src/coredump/coredump-vacuum.h b/src/coredump/coredump-vacuum.h
new file mode 100644
index 0000000..8ad5baf
--- /dev/null
+++ b/src/coredump/coredump-vacuum.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+int coredump_vacuum(int exclude_fd, uint64_t keep_free, uint64_t max_use);
diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c
new file mode 100644
index 0000000..0a1cb91
--- /dev/null
+++ b/src/coredump/coredump.c
@@ -0,0 +1,1330 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+#include <sys/prctl.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#if HAVE_ELFUTILS
+#include <dwarf.h>
+#include <elfutils/libdwfl.h>
+#endif
+
+#include "sd-daemon.h"
+#include "sd-journal.h"
+#include "sd-login.h"
+#include "sd-messages.h"
+
+#include "acl-util.h"
+#include "alloc-util.h"
+#include "capability-util.h"
+#include "cgroup-util.h"
+#include "compress.h"
+#include "conf-parser.h"
+#include "copy.h"
+#include "coredump-vacuum.h"
+#include "dirent-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "journal-importer.h"
+#include "log.h"
+#include "macro.h"
+#include "main-func.h"
+#include "memory-util.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "special.h"
+#include "stacktrace.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "user-record.h"
+#include "user-util.h"
+
+/* The maximum size up to which we process coredumps */
+#define PROCESS_SIZE_MAX ((uint64_t) (2LLU*1024LLU*1024LLU*1024LLU))
+
+/* The maximum size up to which we leave the coredump around on disk */
+#define EXTERNAL_SIZE_MAX PROCESS_SIZE_MAX
+
+/* The maximum size up to which we store the coredump in the journal */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+#define JOURNAL_SIZE_MAX ((size_t) (767LU*1024LU*1024LU))
+#else
+/* oss-fuzz limits memory usage. */
+#define JOURNAL_SIZE_MAX ((size_t) (10LU*1024LU*1024LU))
+#endif
+
+/* Make sure to not make this larger than the maximum journal entry
+ * size. See DATA_SIZE_MAX in journal-importer.h. */
+assert_cc(JOURNAL_SIZE_MAX <= DATA_SIZE_MAX);
+
+enum {
+ /* We use these as array indexes for our process metadata cache.
+ *
+ * The first indices of the cache stores the same metadata as the ones passed by
+ * the kernel via argv[], ie the strings array passed by the kernel according to
+ * our pattern defined in /proc/sys/kernel/core_pattern (see man:core(5)). */
+
+ META_ARGV_PID, /* %P: as seen in the initial pid namespace */
+ META_ARGV_UID, /* %u: as seen in the initial user namespace */
+ META_ARGV_GID, /* %g: as seen in the initial user namespace */
+ META_ARGV_SIGNAL, /* %s: number of signal causing dump */
+ META_ARGV_TIMESTAMP, /* %t: time of dump, expressed as seconds since the Epoch (we expand this to µs granularity) */
+ META_ARGV_RLIMIT, /* %c: core file size soft resource limit */
+ META_ARGV_HOSTNAME, /* %h: hostname */
+ _META_ARGV_MAX,
+
+ /* The following indexes are cached for a couple of special fields we use (and
+ * thereby need to be retrieved quickly) for naming coredump files, and attaching
+ * xattrs. Unlike the previous ones they are retrieved from the runtime
+ * environment. */
+
+ META_COMM = _META_ARGV_MAX,
+ _META_MANDATORY_MAX,
+
+ /* The rest are similar to the previous ones except that we won't fail if one of
+ * them is missing. */
+
+ META_EXE = _META_MANDATORY_MAX,
+ META_UNIT,
+ _META_MAX
+};
+
+static const char * const meta_field_names[_META_MAX] = {
+ [META_ARGV_PID] = "COREDUMP_PID=",
+ [META_ARGV_UID] = "COREDUMP_UID=",
+ [META_ARGV_GID] = "COREDUMP_GID=",
+ [META_ARGV_SIGNAL] = "COREDUMP_SIGNAL=",
+ [META_ARGV_TIMESTAMP] = "COREDUMP_TIMESTAMP=",
+ [META_ARGV_RLIMIT] = "COREDUMP_RLIMIT=",
+ [META_ARGV_HOSTNAME] = "COREDUMP_HOSTNAME=",
+ [META_COMM] = "COREDUMP_COMM=",
+ [META_EXE] = "COREDUMP_EXE=",
+ [META_UNIT] = "COREDUMP_UNIT=",
+};
+
+typedef struct Context {
+ const char *meta[_META_MAX];
+ pid_t pid;
+ bool is_pid1;
+ bool is_journald;
+} Context;
+
+typedef enum CoredumpStorage {
+ COREDUMP_STORAGE_NONE,
+ COREDUMP_STORAGE_EXTERNAL,
+ COREDUMP_STORAGE_JOURNAL,
+ _COREDUMP_STORAGE_MAX,
+ _COREDUMP_STORAGE_INVALID = -1
+} CoredumpStorage;
+
+static const char* const coredump_storage_table[_COREDUMP_STORAGE_MAX] = {
+ [COREDUMP_STORAGE_NONE] = "none",
+ [COREDUMP_STORAGE_EXTERNAL] = "external",
+ [COREDUMP_STORAGE_JOURNAL] = "journal",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(coredump_storage, CoredumpStorage);
+static DEFINE_CONFIG_PARSE_ENUM(config_parse_coredump_storage, coredump_storage, CoredumpStorage, "Failed to parse storage setting");
+
+static CoredumpStorage arg_storage = COREDUMP_STORAGE_EXTERNAL;
+static bool arg_compress = true;
+static uint64_t arg_process_size_max = PROCESS_SIZE_MAX;
+static uint64_t arg_external_size_max = EXTERNAL_SIZE_MAX;
+static uint64_t arg_journal_size_max = JOURNAL_SIZE_MAX;
+static uint64_t arg_keep_free = (uint64_t) -1;
+static uint64_t arg_max_use = (uint64_t) -1;
+
+static int parse_config(void) {
+ static const ConfigTableItem items[] = {
+ { "Coredump", "Storage", config_parse_coredump_storage, 0, &arg_storage },
+ { "Coredump", "Compress", config_parse_bool, 0, &arg_compress },
+ { "Coredump", "ProcessSizeMax", config_parse_iec_uint64, 0, &arg_process_size_max },
+ { "Coredump", "ExternalSizeMax", config_parse_iec_uint64, 0, &arg_external_size_max },
+ { "Coredump", "JournalSizeMax", config_parse_iec_size, 0, &arg_journal_size_max },
+ { "Coredump", "KeepFree", config_parse_iec_uint64, 0, &arg_keep_free },
+ { "Coredump", "MaxUse", config_parse_iec_uint64, 0, &arg_max_use },
+ {}
+ };
+
+ return config_parse_many_nulstr(
+ PKGSYSCONFDIR "/coredump.conf",
+ CONF_PATHS_NULSTR("systemd/coredump.conf.d"),
+ "Coredump\0",
+ config_item_table_lookup, items,
+ CONFIG_PARSE_WARN,
+ NULL,
+ NULL);
+}
+
+static uint64_t storage_size_max(void) {
+ if (arg_storage == COREDUMP_STORAGE_EXTERNAL)
+ return arg_external_size_max;
+ if (arg_storage == COREDUMP_STORAGE_JOURNAL)
+ return arg_journal_size_max;
+ assert(arg_storage == COREDUMP_STORAGE_NONE);
+ return 0;
+}
+
+static int fix_acl(int fd, uid_t uid) {
+
+#if HAVE_ACL
+ int r;
+
+ assert(fd >= 0);
+ assert(uid_is_valid(uid));
+
+ if (uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY)
+ return 0;
+
+ /* Make sure normal users can read (but not write or delete) their own coredumps */
+ r = fd_add_uid_acl_permission(fd, uid, ACL_READ);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust ACL of the coredump: %m");
+#endif
+
+ return 0;
+}
+
+static int fix_xattr(int fd, const Context *context) {
+
+ static const char * const xattrs[_META_MAX] = {
+ [META_ARGV_PID] = "user.coredump.pid",
+ [META_ARGV_UID] = "user.coredump.uid",
+ [META_ARGV_GID] = "user.coredump.gid",
+ [META_ARGV_SIGNAL] = "user.coredump.signal",
+ [META_ARGV_TIMESTAMP] = "user.coredump.timestamp",
+ [META_ARGV_RLIMIT] = "user.coredump.rlimit",
+ [META_ARGV_HOSTNAME] = "user.coredump.hostname",
+ [META_COMM] = "user.coredump.comm",
+ [META_EXE] = "user.coredump.exe",
+ };
+
+ int r = 0;
+ unsigned i;
+
+ assert(fd >= 0);
+
+ /* Attach some metadata to coredumps via extended
+ * attributes. Just because we can. */
+
+ for (i = 0; i < _META_MAX; i++) {
+ int k;
+
+ if (isempty(context->meta[i]) || !xattrs[i])
+ continue;
+
+ k = fsetxattr(fd, xattrs[i], context->meta[i], strlen(context->meta[i]), XATTR_CREATE);
+ if (k < 0 && r == 0)
+ r = -errno;
+ }
+
+ return r;
+}
+
+#define filename_escape(s) xescape((s), "./ ")
+
+static const char *coredump_tmpfile_name(const char *s) {
+ return s ? s : "(unnamed temporary file)";
+}
+
+static int fix_permissions(
+ int fd,
+ const char *filename,
+ const char *target,
+ const Context *context,
+ uid_t uid) {
+
+ int r;
+
+ assert(fd >= 0);
+ assert(target);
+ assert(context);
+
+ /* Ignore errors on these */
+ (void) fchmod(fd, 0640);
+ (void) fix_acl(fd, uid);
+ (void) fix_xattr(fd, context);
+
+ if (fsync(fd) < 0)
+ return log_error_errno(errno, "Failed to sync coredump %s: %m", coredump_tmpfile_name(filename));
+
+ (void) fsync_directory_of_file(fd);
+
+ r = link_tmpfile(fd, filename, target);
+ if (r < 0)
+ return log_error_errno(r, "Failed to move coredump %s into place: %m", target);
+
+ return 0;
+}
+
+static int maybe_remove_external_coredump(const char *filename, uint64_t size) {
+
+ /* Returns 1 if might remove, 0 if will not remove, < 0 on error. */
+
+ if (arg_storage == COREDUMP_STORAGE_EXTERNAL &&
+ size <= arg_external_size_max)
+ return 0;
+
+ if (!filename)
+ return 1;
+
+ if (unlink(filename) < 0 && errno != ENOENT)
+ return log_error_errno(errno, "Failed to unlink %s: %m", filename);
+
+ return 1;
+}
+
+static int make_filename(const Context *context, char **ret) {
+ _cleanup_free_ char *c = NULL, *u = NULL, *p = NULL, *t = NULL;
+ sd_id128_t boot = {};
+ int r;
+
+ assert(context);
+
+ c = filename_escape(context->meta[META_COMM]);
+ if (!c)
+ return -ENOMEM;
+
+ u = filename_escape(context->meta[META_ARGV_UID]);
+ if (!u)
+ return -ENOMEM;
+
+ r = sd_id128_get_boot(&boot);
+ if (r < 0)
+ return r;
+
+ p = filename_escape(context->meta[META_ARGV_PID]);
+ if (!p)
+ return -ENOMEM;
+
+ t = filename_escape(context->meta[META_ARGV_TIMESTAMP]);
+ if (!t)
+ return -ENOMEM;
+
+ if (asprintf(ret,
+ "/var/lib/systemd/coredump/core.%s.%s." SD_ID128_FORMAT_STR ".%s.%s",
+ c,
+ u,
+ SD_ID128_FORMAT_VAL(boot),
+ p,
+ t) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int save_external_coredump(
+ const Context *context,
+ int input_fd,
+ char **ret_filename,
+ int *ret_node_fd,
+ int *ret_data_fd,
+ uint64_t *ret_size,
+ bool *ret_truncated) {
+
+ _cleanup_free_ char *fn = NULL, *tmp = NULL;
+ _cleanup_close_ int fd = -1;
+ uint64_t rlimit, process_limit, max_size;
+ struct stat st;
+ uid_t uid;
+ int r;
+
+ assert(context);
+ assert(ret_filename);
+ assert(ret_node_fd);
+ assert(ret_data_fd);
+ assert(ret_size);
+
+ r = parse_uid(context->meta[META_ARGV_UID], &uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse UID: %m");
+
+ r = safe_atou64(context->meta[META_ARGV_RLIMIT], &rlimit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse resource limit '%s': %m",
+ context->meta[META_ARGV_RLIMIT]);
+ if (rlimit < page_size())
+ /* Is coredumping disabled? Then don't bother saving/processing the
+ * coredump. Anything below PAGE_SIZE cannot give a readable coredump
+ * (the kernel uses ELF_EXEC_PAGESIZE which is not easily accessible, but
+ * is usually the same as PAGE_SIZE. */
+ return log_info_errno(SYNTHETIC_ERRNO(EBADSLT),
+ "Resource limits disable core dumping for process %s (%s).",
+ context->meta[META_ARGV_PID], context->meta[META_COMM]);
+
+ process_limit = MAX(arg_process_size_max, storage_size_max());
+ if (process_limit == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADSLT),
+ "Limits for coredump processing and storage are both 0, not dumping core.");
+
+ /* Never store more than the process configured, or than we actually shall keep or process */
+ max_size = MIN(rlimit, process_limit);
+
+ r = make_filename(context, &fn);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine coredump file name: %m");
+
+ (void) mkdir_p_label("/var/lib/systemd/coredump", 0755);
+
+ fd = open_tmpfile_linkable(fn, O_RDWR|O_CLOEXEC, &tmp);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to create temporary file for coredump %s: %m", fn);
+
+ r = copy_bytes(input_fd, fd, max_size, 0);
+ if (r < 0) {
+ log_error_errno(r, "Cannot store coredump of %s (%s): %m",
+ context->meta[META_ARGV_PID], context->meta[META_COMM]);
+ goto fail;
+ }
+ *ret_truncated = r == 1;
+ if (*ret_truncated)
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("Core file was truncated to %zu bytes.", max_size),
+ "SIZE_LIMIT=%zu", max_size,
+ "MESSAGE_ID=" SD_MESSAGE_TRUNCATED_CORE_STR);
+
+ if (fstat(fd, &st) < 0) {
+ log_error_errno(errno, "Failed to fstat core file %s: %m", coredump_tmpfile_name(tmp));
+ goto fail;
+ }
+
+ if (lseek(fd, 0, SEEK_SET) == (off_t) -1) {
+ log_error_errno(errno, "Failed to seek on %s: %m", coredump_tmpfile_name(tmp));
+ goto fail;
+ }
+
+#if HAVE_COMPRESSION
+ /* If we will remove the coredump anyway, do not compress. */
+ if (arg_compress && !maybe_remove_external_coredump(NULL, st.st_size)) {
+
+ _cleanup_free_ char *fn_compressed = NULL, *tmp_compressed = NULL;
+ _cleanup_close_ int fd_compressed = -1;
+
+ fn_compressed = strjoin(fn, COMPRESSED_EXT);
+ if (!fn_compressed) {
+ log_oom();
+ goto uncompressed;
+ }
+
+ fd_compressed = open_tmpfile_linkable(fn_compressed, O_RDWR|O_CLOEXEC, &tmp_compressed);
+ if (fd_compressed < 0) {
+ log_error_errno(fd_compressed, "Failed to create temporary file for coredump %s: %m", fn_compressed);
+ goto uncompressed;
+ }
+
+ r = compress_stream(fd, fd_compressed, -1);
+ if (r < 0) {
+ log_error_errno(r, "Failed to compress %s: %m", coredump_tmpfile_name(tmp_compressed));
+ goto fail_compressed;
+ }
+
+ r = fix_permissions(fd_compressed, tmp_compressed, fn_compressed, context, uid);
+ if (r < 0)
+ goto fail_compressed;
+
+ /* OK, this worked, we can get rid of the uncompressed version now */
+ if (tmp)
+ unlink_noerrno(tmp);
+
+ *ret_filename = TAKE_PTR(fn_compressed); /* compressed */
+ *ret_node_fd = TAKE_FD(fd_compressed); /* compressed */
+ *ret_data_fd = TAKE_FD(fd); /* uncompressed */
+ *ret_size = (uint64_t) st.st_size; /* uncompressed */
+
+ return 0;
+
+ fail_compressed:
+ if (tmp_compressed)
+ (void) unlink(tmp_compressed);
+ }
+
+uncompressed:
+#endif
+
+ r = fix_permissions(fd, tmp, fn, context, uid);
+ if (r < 0)
+ goto fail;
+
+ *ret_filename = TAKE_PTR(fn);
+ *ret_data_fd = TAKE_FD(fd);
+ *ret_node_fd = -1;
+ *ret_size = (uint64_t) st.st_size;
+
+ return 0;
+
+fail:
+ if (tmp)
+ (void) unlink(tmp);
+ return r;
+}
+
+static int allocate_journal_field(int fd, size_t size, char **ret, size_t *ret_size) {
+ _cleanup_free_ char *field = NULL;
+ ssize_t n;
+
+ assert(fd >= 0);
+ assert(ret);
+ assert(ret_size);
+
+ if (lseek(fd, 0, SEEK_SET) == (off_t) -1)
+ return log_warning_errno(errno, "Failed to seek: %m");
+
+ field = malloc(9 + size);
+ if (!field) {
+ log_warning("Failed to allocate memory for coredump, coredump will not be stored.");
+ return -ENOMEM;
+ }
+
+ memcpy(field, "COREDUMP=", 9);
+
+ n = read(fd, field + 9, size);
+ if (n < 0)
+ return log_error_errno((int) n, "Failed to read core data: %m");
+ if ((size_t) n < size)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Core data too short.");
+
+ *ret = TAKE_PTR(field);
+ *ret_size = size + 9;
+
+ return 0;
+}
+
+/* Joins /proc/[pid]/fd/ and /proc/[pid]/fdinfo/ into the following lines:
+ * 0:/dev/pts/23
+ * pos: 0
+ * flags: 0100002
+ *
+ * 1:/dev/pts/23
+ * pos: 0
+ * flags: 0100002
+ *
+ * 2:/dev/pts/23
+ * pos: 0
+ * flags: 0100002
+ * EOF
+ */
+static int compose_open_fds(pid_t pid, char **open_fds) {
+ _cleanup_closedir_ DIR *proc_fd_dir = NULL;
+ _cleanup_close_ int proc_fdinfo_fd = -1;
+ _cleanup_free_ char *buffer = NULL;
+ _cleanup_fclose_ FILE *stream = NULL;
+ const char *fddelim = "", *path;
+ struct dirent *dent = NULL;
+ size_t size = 0;
+ int r;
+
+ assert(pid >= 0);
+ assert(open_fds != NULL);
+
+ path = procfs_file_alloca(pid, "fd");
+ proc_fd_dir = opendir(path);
+ if (!proc_fd_dir)
+ return -errno;
+
+ proc_fdinfo_fd = openat(dirfd(proc_fd_dir), "../fdinfo", O_DIRECTORY|O_NOFOLLOW|O_CLOEXEC|O_PATH);
+ if (proc_fdinfo_fd < 0)
+ return -errno;
+
+ stream = open_memstream_unlocked(&buffer, &size);
+ if (!stream)
+ return -ENOMEM;
+
+ FOREACH_DIRENT(dent, proc_fd_dir, return -errno) {
+ _cleanup_fclose_ FILE *fdinfo = NULL;
+ _cleanup_free_ char *fdname = NULL;
+ _cleanup_close_ int fd = -1;
+
+ r = readlinkat_malloc(dirfd(proc_fd_dir), dent->d_name, &fdname);
+ if (r < 0)
+ return r;
+
+ fprintf(stream, "%s%s:%s\n", fddelim, dent->d_name, fdname);
+ fddelim = "\n";
+
+ /* Use the directory entry from /proc/[pid]/fd with /proc/[pid]/fdinfo */
+ fd = openat(proc_fdinfo_fd, dent->d_name, O_NOFOLLOW|O_CLOEXEC|O_RDONLY);
+ if (fd < 0)
+ continue;
+
+ fdinfo = take_fdopen(&fd, "r");
+ if (!fdinfo)
+ continue;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+
+ r = read_line(fdinfo, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ fputs(line, stream);
+ fputc('\n', stream);
+ }
+ }
+
+ errno = 0;
+ stream = safe_fclose(stream);
+
+ if (errno > 0)
+ return -errno;
+
+ *open_fds = TAKE_PTR(buffer);
+
+ return 0;
+}
+
+static int get_process_ns(pid_t pid, const char *namespace, ino_t *ns) {
+ const char *p;
+ struct stat stbuf;
+ _cleanup_close_ int proc_ns_dir_fd;
+
+ p = procfs_file_alloca(pid, "ns");
+
+ proc_ns_dir_fd = open(p, O_DIRECTORY | O_CLOEXEC | O_RDONLY);
+ if (proc_ns_dir_fd < 0)
+ return -errno;
+
+ if (fstatat(proc_ns_dir_fd, namespace, &stbuf, /* flags */0) < 0)
+ return -errno;
+
+ *ns = stbuf.st_ino;
+ return 0;
+}
+
+static int get_mount_namespace_leader(pid_t pid, pid_t *container_pid) {
+ pid_t cpid = pid, ppid = 0;
+ ino_t proc_mntns;
+ int r;
+
+ r = get_process_ns(pid, "mnt", &proc_mntns);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ ino_t parent_mntns;
+
+ r = get_process_ppid(cpid, &ppid);
+ if (r < 0)
+ return r;
+
+ r = get_process_ns(ppid, "mnt", &parent_mntns);
+ if (r < 0)
+ return r;
+
+ if (proc_mntns != parent_mntns)
+ break;
+
+ if (ppid == 1)
+ return -ENOENT;
+
+ cpid = ppid;
+ }
+
+ *container_pid = ppid;
+ return 0;
+}
+
+/* Returns 1 if the parent was found.
+ * Returns 0 if there is not a process we can call the pid's
+ * container parent (the pid's process isn't 'containerized').
+ * Returns a negative number on errors.
+ */
+static int get_process_container_parent_cmdline(pid_t pid, char** cmdline) {
+ int r = 0;
+ pid_t container_pid;
+ const char *proc_root_path;
+ struct stat root_stat, proc_root_stat;
+
+ /* To compare inodes of / and /proc/[pid]/root */
+ if (stat("/", &root_stat) < 0)
+ return -errno;
+
+ proc_root_path = procfs_file_alloca(pid, "root");
+ if (stat(proc_root_path, &proc_root_stat) < 0)
+ return -errno;
+
+ /* The process uses system root. */
+ if (proc_root_stat.st_ino == root_stat.st_ino) {
+ *cmdline = NULL;
+ return 0;
+ }
+
+ r = get_mount_namespace_leader(pid, &container_pid);
+ if (r < 0)
+ return r;
+
+ r = get_process_cmdline(container_pid, SIZE_MAX, 0, cmdline);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int change_uid_gid(const Context *context) {
+ uid_t uid;
+ gid_t gid;
+ int r;
+
+ r = parse_uid(context->meta[META_ARGV_UID], &uid);
+ if (r < 0)
+ return r;
+
+ if (uid_is_system(uid)) {
+ const char *user = "systemd-coredump";
+
+ r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
+ if (r < 0) {
+ log_warning_errno(r, "Cannot resolve %s user. Proceeding to dump core as root: %m", user);
+ uid = gid = 0;
+ }
+ } else {
+ r = parse_gid(context->meta[META_ARGV_GID], &gid);
+ if (r < 0)
+ return r;
+ }
+
+ return drop_privileges(uid, gid, 0);
+}
+
+static int submit_coredump(
+ Context *context,
+ struct iovec_wrapper *iovw,
+ int input_fd) {
+
+ _cleanup_close_ int coredump_fd = -1, coredump_node_fd = -1;
+ _cleanup_free_ char *filename = NULL, *coredump_data = NULL;
+ _cleanup_free_ char *stacktrace = NULL;
+ char *core_message;
+ uint64_t coredump_size = UINT64_MAX;
+ bool truncated = false;
+ int r;
+
+ assert(context);
+ assert(iovw);
+ assert(input_fd >= 0);
+
+ /* Vacuum before we write anything again */
+ (void) coredump_vacuum(-1, arg_keep_free, arg_max_use);
+
+ /* Always stream the coredump to disk, if that's possible */
+ r = save_external_coredump(context, input_fd,
+ &filename, &coredump_node_fd, &coredump_fd, &coredump_size, &truncated);
+ if (r < 0)
+ /* Skip whole core dumping part */
+ goto log;
+
+ /* If we don't want to keep the coredump on disk, remove it now, as later on we
+ * will lack the privileges for it. However, we keep the fd to it, so that we can
+ * still process it and log it. */
+ r = maybe_remove_external_coredump(filename, coredump_size);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ (void) iovw_put_string_field(iovw, "COREDUMP_FILENAME=", filename);
+
+ } else if (arg_storage == COREDUMP_STORAGE_EXTERNAL)
+ log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)",
+ coredump_size, arg_external_size_max);
+
+ /* Vacuum again, but exclude the coredump we just created */
+ (void) coredump_vacuum(coredump_node_fd >= 0 ? coredump_node_fd : coredump_fd, arg_keep_free, arg_max_use);
+
+ /* Now, let's drop privileges to become the user who owns the segfaulted process
+ * and allocate the coredump memory under the user's uid. This also ensures that
+ * the credentials journald will see are the ones of the coredumping user, thus
+ * making sure the user gets access to the core dump. Let's also get rid of all
+ * capabilities, if we run as root, we won't need them anymore. */
+ r = change_uid_gid(context);
+ if (r < 0)
+ return log_error_errno(r, "Failed to drop privileges: %m");
+
+#if HAVE_ELFUTILS
+ /* Try to get a stack trace if we can */
+ if (coredump_size > arg_process_size_max) {
+ log_debug("Not generating stack trace: core size %"PRIu64" is greater "
+ "than %"PRIu64" (the configured maximum)",
+ coredump_size, arg_process_size_max);
+ } else
+ coredump_make_stack_trace(coredump_fd, context->meta[META_EXE], &stacktrace);
+#endif
+
+log:
+ core_message = strjoina("Process ", context->meta[META_ARGV_PID],
+ " (", context->meta[META_COMM], ") of user ",
+ context->meta[META_ARGV_UID], " dumped core.",
+ context->is_journald && filename ? "\nCoredump diverted to " : NULL,
+ context->is_journald && filename ? filename : NULL);
+
+ core_message = strjoina(core_message, stacktrace ? "\n\n" : NULL, stacktrace);
+
+ if (context->is_journald) {
+ /* We cannot log to the journal, so just print the message.
+ * The target was set previously to something safe. */
+ log_dispatch(LOG_ERR, 0, core_message);
+ return 0;
+ }
+
+ (void) iovw_put_string_field(iovw, "MESSAGE=", core_message);
+
+ if (truncated)
+ (void) iovw_put_string_field(iovw, "COREDUMP_TRUNCATED=", "1");
+
+ /* Optionally store the entire coredump in the journal */
+ if (arg_storage == COREDUMP_STORAGE_JOURNAL) {
+ if (coredump_size <= arg_journal_size_max) {
+ size_t sz = 0;
+
+ /* Store the coredump itself in the journal */
+
+ r = allocate_journal_field(coredump_fd, (size_t) coredump_size, &coredump_data, &sz);
+ if (r >= 0) {
+ if (iovw_put(iovw, coredump_data, sz) >= 0)
+ TAKE_PTR(coredump_data);
+ } else
+ log_warning_errno(r, "Failed to attach the core to the journal entry: %m");
+ } else
+ log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)",
+ coredump_size, arg_journal_size_max);
+ }
+
+ r = sd_journal_sendv(iovw->iovec, iovw->count);
+ if (r < 0)
+ return log_error_errno(r, "Failed to log coredump: %m");
+
+ return 0;
+}
+
+static int save_context(Context *context, const struct iovec_wrapper *iovw) {
+ unsigned n, i, count = 0;
+ const char *unit;
+ int r;
+
+ assert(context);
+ assert(iovw);
+ assert(iovw->count >= _META_ARGV_MAX);
+
+ /* The context does not allocate any memory on its own */
+
+ for (n = 0; n < iovw->count; n++) {
+ struct iovec *iovec = iovw->iovec + n;
+
+ for (i = 0; i < ELEMENTSOF(meta_field_names); i++) {
+ char *p;
+
+ /* Note that these strings are NUL terminated, because we made sure that a
+ * trailing NUL byte is in the buffer, though not included in the iov_len
+ * count (see process_socket() and gather_pid_metadata_*()) */
+ assert(((char*) iovec->iov_base)[iovec->iov_len] == 0);
+
+ p = startswith(iovec->iov_base, meta_field_names[i]);
+ if (p) {
+ context->meta[i] = p;
+ count++;
+ break;
+ }
+ }
+ }
+
+ if (!context->meta[META_ARGV_PID])
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to find the PID of crashing process");
+
+ r = parse_pid(context->meta[META_ARGV_PID], &context->pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PID \"%s\": %m", context->meta[META_ARGV_PID]);
+
+ unit = context->meta[META_UNIT];
+ context->is_pid1 = streq(context->meta[META_ARGV_PID], "1") || streq_ptr(unit, SPECIAL_INIT_SCOPE);
+ context->is_journald = streq_ptr(unit, SPECIAL_JOURNALD_SERVICE);
+
+ return 0;
+}
+
+static int process_socket(int fd) {
+ _cleanup_close_ int input_fd = -1;
+ Context context = {};
+ struct iovec_wrapper iovw = {};
+ struct iovec iovec;
+ int i, r;
+
+ assert(fd >= 0);
+
+ log_setup_service();
+
+ log_debug("Processing coredump received on stdin...");
+
+ for (;;) {
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int))) control;
+ struct msghdr mh = {
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_iovlen = 1,
+ };
+ ssize_t n;
+ ssize_t l;
+
+ l = next_datagram_size_fd(fd);
+ if (l < 0) {
+ r = log_error_errno(l, "Failed to determine datagram size to read: %m");
+ goto finish;
+ }
+
+ iovec.iov_len = l;
+ iovec.iov_base = malloc(l + 1);
+ if (!iovec.iov_base) {
+ r = log_oom();
+ goto finish;
+ }
+
+ mh.msg_iov = &iovec;
+
+ n = recvmsg_safe(fd, &mh, MSG_CMSG_CLOEXEC);
+ if (n < 0) {
+ free(iovec.iov_base);
+ r = log_error_errno(n, "Failed to receive datagram: %m");
+ goto finish;
+ }
+
+ /* The final zero-length datagram carries the file descriptor and tells us
+ * that we're done. */
+ if (n == 0) {
+ struct cmsghdr *found;
+
+ free(iovec.iov_base);
+
+ found = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int)));
+ if (!found) {
+ cmsg_close_all(&mh);
+ r = log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Coredump file descriptor missing.");
+ goto finish;
+ }
+
+ assert(input_fd < 0);
+ input_fd = *(int*) CMSG_DATA(found);
+ break;
+ } else
+ cmsg_close_all(&mh);
+
+ /* Add trailing NUL byte, in case these are strings */
+ ((char*) iovec.iov_base)[n] = 0;
+ iovec.iov_len = (size_t) n;
+
+ r = iovw_put(&iovw, iovec.iov_base, iovec.iov_len);
+ if (r < 0)
+ goto finish;
+ }
+
+ /* Make sure we got all data we really need */
+ assert(input_fd >= 0);
+
+ r = save_context(&context, &iovw);
+ if (r < 0)
+ goto finish;
+
+ /* Make sure we received at least all fields we need. */
+ for (i = 0; i < _META_MANDATORY_MAX; i++)
+ if (!context.meta[i]) {
+ r = log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "A mandatory argument (%i) has not been sent, aborting.",
+ i);
+ goto finish;
+ }
+
+ r = submit_coredump(&context, &iovw, input_fd);
+
+finish:
+ iovw_free_contents(&iovw, true);
+ return r;
+}
+
+static int send_iovec(const struct iovec_wrapper *iovw, int input_fd) {
+
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/coredump",
+ };
+ _cleanup_close_ int fd = -1;
+ size_t i;
+ int r;
+
+ assert(iovw);
+ assert(input_fd >= 0);
+
+ fd = socket(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to create coredump socket: %m");
+
+ if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0)
+ return log_error_errno(errno, "Failed to connect to coredump service: %m");
+
+ for (i = 0; i < iovw->count; i++) {
+ struct msghdr mh = {
+ .msg_iov = iovw->iovec + i,
+ .msg_iovlen = 1,
+ };
+ struct iovec copy[2];
+
+ for (;;) {
+ if (sendmsg(fd, &mh, MSG_NOSIGNAL) >= 0)
+ break;
+
+ if (errno == EMSGSIZE && mh.msg_iov[0].iov_len > 0) {
+ /* This field didn't fit? That's a pity. Given that this is
+ * just metadata, let's truncate the field at half, and try
+ * again. We append three dots, in order to show that this is
+ * truncated. */
+
+ if (mh.msg_iov != copy) {
+ /* We don't want to modify the caller's iovec, hence
+ * let's create our own array, consisting of two new
+ * iovecs, where the first is a (truncated) copy of
+ * what we want to send, and the second one contains
+ * the trailing dots. */
+ copy[0] = iovw->iovec[i];
+ copy[1] = IOVEC_MAKE(((char[]){'.', '.', '.'}), 3);
+
+ mh.msg_iov = copy;
+ mh.msg_iovlen = 2;
+ }
+
+ copy[0].iov_len /= 2; /* halve it, and try again */
+ continue;
+ }
+
+ return log_error_errno(errno, "Failed to send coredump datagram: %m");
+ }
+ }
+
+ r = send_one_fd(fd, input_fd, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send coredump fd: %m");
+
+ return 0;
+}
+
+static int gather_pid_metadata_from_argv(
+ struct iovec_wrapper *iovw,
+ Context *context,
+ int argc, char **argv) {
+
+ _cleanup_free_ char *free_timestamp = NULL;
+ int i, r, signo;
+ char *t;
+
+ /* We gather all metadata that were passed via argv[] into an array of iovecs that
+ * we'll forward to the socket unit */
+
+ if (argc < _META_ARGV_MAX)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not enough arguments passed by the kernel (%i, expected %i).",
+ argc, _META_ARGV_MAX);
+
+ for (i = 0; i < _META_ARGV_MAX; i++) {
+
+ t = argv[i];
+
+ switch (i) {
+
+ case META_ARGV_TIMESTAMP:
+ /* The journal fields contain the timestamp padded with six
+ * zeroes, so that the kernel-supplied 1s granularity timestamps
+ * becomes 1µs granularity, i.e. the granularity systemd usually
+ * operates in. */
+ t = free_timestamp = strjoin(argv[i], "000000");
+ if (!t)
+ return log_oom();
+ break;
+
+ case META_ARGV_SIGNAL:
+ /* For signal, record its pretty name too */
+ if (safe_atoi(argv[i], &signo) >= 0 && SIGNAL_VALID(signo))
+ (void) iovw_put_string_field(iovw, "COREDUMP_SIGNAL_NAME=SIG",
+ signal_to_string(signo));
+ break;
+
+ default:
+ break;
+ }
+
+ r = iovw_put_string_field(iovw, meta_field_names[i], t);
+ if (r < 0)
+ return r;
+ }
+
+ /* Cache some of the process metadata we collected so far and that we'll need to
+ * access soon */
+ return save_context(context, iovw);
+}
+
+static int gather_pid_metadata(struct iovec_wrapper *iovw, Context *context) {
+ uid_t owner_uid;
+ pid_t pid;
+ char *t;
+ const char *p;
+ int r;
+
+ /* Note that if we fail on oom later on, we do not roll-back changes to the iovec
+ * structure. (It remains valid, with the first iovec fields initialized.) */
+
+ pid = context->pid;
+
+ /* The following is mandatory */
+ r = get_process_comm(pid, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get COMM: %m");
+
+ r = iovw_put_string_field_free(iovw, "COREDUMP_COMM=", t);
+ if (r < 0)
+ return r;
+
+ /* The following are optional but we used them if present */
+ r = get_process_exe(pid, &t);
+ if (r >= 0)
+ r = iovw_put_string_field_free(iovw, "COREDUMP_EXE=", t);
+ if (r < 0)
+ log_warning_errno(r, "Failed to get EXE, ignoring: %m");
+
+ if (cg_pid_get_unit(pid, &t) >= 0)
+ (void) iovw_put_string_field_free(iovw, "COREDUMP_UNIT=", t);
+
+ /* The next are optional */
+ if (cg_pid_get_user_unit(pid, &t) >= 0)
+ (void) iovw_put_string_field_free(iovw, "COREDUMP_USER_UNIT=", t);
+
+ if (sd_pid_get_session(pid, &t) >= 0)
+ (void) iovw_put_string_field_free(iovw, "COREDUMP_SESSION=", t);
+
+ if (sd_pid_get_owner_uid(pid, &owner_uid) >= 0) {
+ r = asprintf(&t, UID_FMT, owner_uid);
+ if (r > 0)
+ (void) iovw_put_string_field_free(iovw, "COREDUMP_OWNER_UID=", t);
+ }
+
+ if (sd_pid_get_slice(pid, &t) >= 0)
+ (void) iovw_put_string_field_free(iovw, "COREDUMP_SLICE=", t);
+
+ if (get_process_cmdline(pid, SIZE_MAX, 0, &t) >= 0)
+ (void) iovw_put_string_field_free(iovw, "COREDUMP_CMDLINE=", t);
+
+ if (cg_pid_get_path_shifted(pid, NULL, &t) >= 0)
+ (void) iovw_put_string_field_free(iovw, "COREDUMP_CGROUP=", t);
+
+ if (compose_open_fds(pid, &t) >= 0)
+ (void) iovw_put_string_field_free(iovw, "COREDUMP_OPEN_FDS=", t);
+
+ p = procfs_file_alloca(pid, "status");
+ if (read_full_file(p, &t, NULL) >= 0)
+ (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_STATUS=", t);
+
+ p = procfs_file_alloca(pid, "maps");
+ if (read_full_file(p, &t, NULL) >= 0)
+ (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_MAPS=", t);
+
+ p = procfs_file_alloca(pid, "limits");
+ if (read_full_file(p, &t, NULL) >= 0)
+ (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_LIMITS=", t);
+
+ p = procfs_file_alloca(pid, "cgroup");
+ if (read_full_file(p, &t, NULL) >=0)
+ (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_CGROUP=", t);
+
+ p = procfs_file_alloca(pid, "mountinfo");
+ if (read_full_file(p, &t, NULL) >=0)
+ (void) iovw_put_string_field_free(iovw, "COREDUMP_PROC_MOUNTINFO=", t);
+
+ if (get_process_cwd(pid, &t) >= 0)
+ (void) iovw_put_string_field_free(iovw, "COREDUMP_CWD=", t);
+
+ if (get_process_root(pid, &t) >= 0) {
+ bool proc_self_root_is_slash;
+
+ proc_self_root_is_slash = strcmp(t, "/") == 0;
+
+ (void) iovw_put_string_field_free(iovw, "COREDUMP_ROOT=", t);
+
+ /* If the process' root is "/", then there is a chance it has
+ * mounted own root and hence being containerized. */
+ if (proc_self_root_is_slash && get_process_container_parent_cmdline(pid, &t) > 0)
+ (void) iovw_put_string_field_free(iovw, "COREDUMP_CONTAINER_CMDLINE=", t);
+ }
+
+ if (get_process_environ(pid, &t) >= 0)
+ (void) iovw_put_string_field_free(iovw, "COREDUMP_ENVIRON=", t);
+
+ /* we successfully acquired all metadata */
+ return save_context(context, iovw);
+}
+
+static int process_kernel(int argc, char* argv[]) {
+ Context context = {};
+ struct iovec_wrapper *iovw;
+ int r;
+
+ log_debug("Processing coredump received from the kernel...");
+
+ iovw = iovw_new();
+ if (!iovw)
+ return log_oom();
+
+ (void) iovw_put_string_field(iovw, "MESSAGE_ID=", SD_MESSAGE_COREDUMP_STR);
+ (void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT));
+
+ /* Collect all process metadata passed by the kernel through argv[] */
+ r = gather_pid_metadata_from_argv(iovw, &context, argc - 1, argv + 1);
+ if (r < 0)
+ goto finish;
+
+ /* Collect the rest of the process metadata retrieved from the runtime */
+ r = gather_pid_metadata(iovw, &context);
+ if (r < 0)
+ goto finish;
+
+ if (!context.is_journald) {
+ /* OK, now we know it's not the journal, hence we can make use of it now. */
+ log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
+ log_open();
+ }
+
+ /* If this is PID 1 disable coredump collection, we'll unlikely be able to process
+ * it later on.
+ *
+ * FIXME: maybe we should disable coredumps generation from the beginning and
+ * re-enable it only when we know it's either safe (ie we're not running OOM) or
+ * it's not pid1 ? */
+ if (context.is_pid1) {
+ log_notice("Due to PID 1 having crashed coredump collection will now be turned off.");
+ disable_coredumps();
+ }
+
+ if (context.is_journald || context.is_pid1)
+ r = submit_coredump(&context, iovw, STDIN_FILENO);
+ else
+ r = send_iovec(iovw, STDIN_FILENO);
+
+ finish:
+ iovw = iovw_free_free(iovw);
+ return r;
+}
+
+static int process_backtrace(int argc, char *argv[]) {
+ Context context = {};
+ struct iovec_wrapper *iovw;
+ char *message;
+ size_t i;
+ int r;
+ _cleanup_(journal_importer_cleanup) JournalImporter importer = JOURNAL_IMPORTER_INIT(STDIN_FILENO);
+
+ log_debug("Processing backtrace on stdin...");
+
+ iovw = iovw_new();
+ if (!iovw)
+ return log_oom();
+
+ (void) iovw_put_string_field(iovw, "MESSAGE_ID=", SD_MESSAGE_BACKTRACE_STR);
+ (void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT));
+
+ /* Collect all process metadata from argv[] by making sure to skip the
+ * '--backtrace' option */
+ r = gather_pid_metadata_from_argv(iovw, &context, argc - 2, argv + 2);
+ if (r < 0)
+ goto finish;
+
+ /* Collect the rest of the process metadata retrieved from the runtime */
+ r = gather_pid_metadata(iovw, &context);
+ if (r < 0)
+ goto finish;
+
+ for (;;) {
+ r = journal_importer_process_data(&importer);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse journal entry on stdin: %m");
+ goto finish;
+ }
+ if (r == 1 || /* complete entry */
+ journal_importer_eof(&importer)) /* end of data */
+ break;
+ }
+
+ if (journal_importer_eof(&importer)) {
+ log_warning("Did not receive a full journal entry on stdin, ignoring message sent by reporter");
+
+ message = strjoina("Process ", context.meta[META_ARGV_PID],
+ " (", context.meta[META_COMM], ")"
+ " of user ", context.meta[META_ARGV_UID],
+ " failed with ", context.meta[META_ARGV_SIGNAL]);
+
+ r = iovw_put_string_field(iovw, "MESSAGE=", message);
+ if (r < 0)
+ return r;
+ } else {
+ /* The imported iovecs are not supposed to be freed by us so let's store
+ * them at the end of the array so we can skip them while freeing the
+ * rest. */
+ for (i = 0; i < importer.iovw.count; i++) {
+ struct iovec *iovec = importer.iovw.iovec + i;
+
+ iovw_put(iovw, iovec->iov_base, iovec->iov_len);
+ }
+ }
+
+ r = sd_journal_sendv(iovw->iovec, iovw->count);
+ if (r < 0)
+ log_error_errno(r, "Failed to log backtrace: %m");
+
+ finish:
+ iovw->count -= importer.iovw.count;
+ iovw = iovw_free_free(iovw);
+ return r;
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ /* First, log to a safe place, since we don't know what crashed and it might
+ * be journald which we'd rather not log to then. */
+
+ log_set_target(LOG_TARGET_KMSG);
+ log_open();
+
+ /* Make sure we never enter a loop */
+ (void) prctl(PR_SET_DUMPABLE, 0);
+
+ /* Ignore all parse errors */
+ (void) parse_config();
+
+ log_debug("Selected storage '%s'.", coredump_storage_to_string(arg_storage));
+ log_debug("Selected compression %s.", yes_no(arg_compress));
+
+ r = sd_listen_fds(false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine the number of file descriptors: %m");
+
+ /* If we got an fd passed, we are running in coredumpd mode. Otherwise we
+ * are invoked from the kernel as coredump handler. */
+ if (r == 0) {
+ if (streq_ptr(argv[1], "--backtrace"))
+ return process_backtrace(argc, argv);
+ else
+ return process_kernel(argc, argv);
+ } else if (r == 1)
+ return process_socket(SD_LISTEN_FDS_START);
+
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Received unexpected number of file descriptors.");
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/coredump/coredump.conf b/src/coredump/coredump.conf
new file mode 100644
index 0000000..c2f0643
--- /dev/null
+++ b/src/coredump/coredump.conf
@@ -0,0 +1,21 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See coredump.conf(5) for details.
+
+[Coredump]
+#Storage=external
+#Compress=yes
+#ProcessSizeMax=2G
+#ExternalSizeMax=2G
+#JournalSizeMax=767M
+#MaxUse=
+#KeepFree=
diff --git a/src/coredump/coredumpctl.c b/src/coredump/coredumpctl.c
new file mode 100644
index 0000000..91356ad
--- /dev/null
+++ b/src/coredump/coredumpctl.c
@@ -0,0 +1,1114 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <locale.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-journal.h"
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "compress.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "glob-util.h"
+#include "journal-internal.h"
+#include "journal-util.h"
+#include "log.h"
+#include "macro.h"
+#include "main-func.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "sigbus.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+#include "verbs.h"
+
+#define SHORT_BUS_CALL_TIMEOUT_USEC (3 * USEC_PER_SEC)
+
+static usec_t arg_since = USEC_INFINITY, arg_until = USEC_INFINITY;
+static const char* arg_field = NULL;
+static const char *arg_debugger = NULL;
+static const char *arg_directory = NULL;
+static char **arg_file = NULL;
+static PagerFlags arg_pager_flags = 0;
+static int arg_no_legend = false;
+static int arg_one = false;
+static const char* arg_output = NULL;
+static bool arg_reverse = false;
+static bool arg_quiet = false;
+
+STATIC_DESTRUCTOR_REGISTER(arg_file, strv_freep);
+
+static int add_match(sd_journal *j, const char *match) {
+ _cleanup_free_ char *p = NULL;
+ const char* prefix, *pattern;
+ pid_t pid;
+ int r;
+
+ if (strchr(match, '='))
+ prefix = "";
+ else if (strchr(match, '/')) {
+ r = path_make_absolute_cwd(match, &p);
+ if (r < 0)
+ return log_error_errno(r, "path_make_absolute_cwd(\"%s\"): %m", match);
+
+ match = p;
+ prefix = "COREDUMP_EXE=";
+ } else if (parse_pid(match, &pid) >= 0)
+ prefix = "COREDUMP_PID=";
+ else
+ prefix = "COREDUMP_COMM=";
+
+ pattern = strjoina(prefix, match);
+ log_debug("Adding match: %s", pattern);
+ r = sd_journal_add_match(j, pattern, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match \"%s\": %m", match);
+
+ return 0;
+}
+
+static int add_matches(sd_journal *j, char **matches) {
+ char **match;
+ int r;
+
+ r = sd_journal_add_match(j, "MESSAGE_ID=" SD_MESSAGE_COREDUMP_STR, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match \"%s\": %m", "MESSAGE_ID=" SD_MESSAGE_COREDUMP_STR);
+
+ r = sd_journal_add_match(j, "MESSAGE_ID=" SD_MESSAGE_BACKTRACE_STR, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match \"%s\": %m", "MESSAGE_ID=" SD_MESSAGE_BACKTRACE_STR);
+
+ STRV_FOREACH(match, matches) {
+ r = add_match(j, *match);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int acquire_journal(sd_journal **ret, char **matches) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ int r;
+
+ assert(ret);
+
+ if (arg_directory) {
+ r = sd_journal_open_directory(&j, arg_directory, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open journals in directory: %s: %m", arg_directory);
+ } else if (arg_file) {
+ r = sd_journal_open_files(&j, (const char**)arg_file, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open journal files: %m");
+ } else {
+ r = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open journal: %m");
+ }
+
+ r = journal_access_check_and_warn(j, arg_quiet, true);
+ if (r < 0)
+ return r;
+
+ r = add_matches(j, matches);
+ if (r < 0)
+ return r;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *filter;
+
+ filter = journal_make_match_string(j);
+ log_debug("Journal filter: %s", filter);
+ }
+
+ *ret = TAKE_PTR(j);
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("coredumpctl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND ...\n\n"
+ "%sList or retrieve coredumps from the journal.%s\n"
+ "\nCommands:\n"
+ " list [MATCHES...] List available coredumps (default)\n"
+ " info [MATCHES...] Show detailed information about one or more coredumps\n"
+ " dump [MATCHES...] Print first matching coredump to stdout\n"
+ " debug [MATCHES...] Start a debugger for the first matching coredump\n"
+ "\nOptions:\n"
+ " -h --help Show this help\n"
+ " --version Print version string\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-legend Do not print the column headers\n"
+ " --debugger=DEBUGGER Use the given debugger\n"
+ " -1 Show information about most recent entry only\n"
+ " -S --since=DATE Only print coredumps since the date\n"
+ " -U --until=DATE Only print coredumps until the date\n"
+ " -r --reverse Show the newest entries first\n"
+ " -F --field=FIELD List all values a certain field takes\n"
+ " -o --output=FILE Write output to FILE\n"
+ " --file=PATH Use journal file\n"
+ " -D --directory=DIR Use journal files from directory\n\n"
+ " -q --quiet Do not show info messages and privilege warning\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight()
+ , ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_PAGER,
+ ARG_NO_LEGEND,
+ ARG_DEBUGGER,
+ ARG_FILE,
+ };
+
+ int c, r;
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version" , no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
+ { "debugger", required_argument, NULL, ARG_DEBUGGER },
+ { "output", required_argument, NULL, 'o' },
+ { "field", required_argument, NULL, 'F' },
+ { "file", required_argument, NULL, ARG_FILE },
+ { "directory", required_argument, NULL, 'D' },
+ { "reverse", no_argument, NULL, 'r' },
+ { "since", required_argument, NULL, 'S' },
+ { "until", required_argument, NULL, 'U' },
+ { "quiet", no_argument, NULL, 'q' },
+ {}
+ };
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "ho:F:1D:rS:U:q", options, NULL)) >= 0)
+ switch(c) {
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_LEGEND:
+ arg_no_legend = true;
+ break;
+
+ case ARG_DEBUGGER:
+ arg_debugger = optarg;
+ break;
+
+ case ARG_FILE:
+ r = glob_extend(&arg_file, optarg, GLOB_NOCHECK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add paths: %m");
+ break;
+
+ case 'o':
+ if (arg_output)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot set output more than once.");
+
+ arg_output = optarg;
+ break;
+
+ case 'S':
+ r = parse_timestamp(optarg, &arg_since);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse timestamp '%s': %m", optarg);
+ break;
+
+ case 'U':
+ r = parse_timestamp(optarg, &arg_until);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse timestamp '%s': %m", optarg);
+ break;
+
+ case 'F':
+ if (arg_field)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot use --field/-F more than once.");
+ arg_field = optarg;
+ break;
+
+ case '1':
+ arg_one = true;
+ break;
+
+ case 'D':
+ arg_directory = optarg;
+ break;
+
+ case 'r':
+ arg_reverse = true;
+ break;
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_since != USEC_INFINITY && arg_until != USEC_INFINITY &&
+ arg_since > arg_until)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--since= must be before --until=.");
+
+ return 1;
+}
+
+static int retrieve(const void *data,
+ size_t len,
+ const char *name,
+ char **var) {
+
+ size_t ident;
+ char *v;
+
+ ident = strlen(name) + 1; /* name + "=" */
+
+ if (len < ident)
+ return 0;
+
+ if (memcmp(data, name, ident - 1) != 0)
+ return 0;
+
+ if (((const char*) data)[ident - 1] != '=')
+ return 0;
+
+ v = strndup((const char*)data + ident, len - ident);
+ if (!v)
+ return log_oom();
+
+ free_and_replace(*var, v);
+ return 1;
+}
+
+static int print_field(FILE* file, sd_journal *j) {
+ const void *d;
+ size_t l;
+
+ assert(file);
+ assert(j);
+
+ assert(arg_field);
+
+ /* A (user-specified) field may appear more than once for a given entry.
+ * We will print all of the occurrences.
+ * This is different below for fields that systemd-coredump uses,
+ * because they cannot meaningfully appear more than once.
+ */
+ SD_JOURNAL_FOREACH_DATA(j, d, l) {
+ _cleanup_free_ char *value = NULL;
+ int r;
+
+ r = retrieve(d, l, arg_field, &value);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ fprintf(file, "%s\n", value);
+ }
+
+ return 0;
+}
+
+#define RETRIEVE(d, l, name, arg) \
+ { \
+ int _r = retrieve(d, l, name, &arg); \
+ if (_r < 0) \
+ return _r; \
+ if (_r > 0) \
+ continue; \
+ }
+
+static int print_list(FILE* file, sd_journal *j, int had_legend) {
+ _cleanup_free_ char
+ *mid = NULL, *pid = NULL, *uid = NULL, *gid = NULL,
+ *sgnl = NULL, *exe = NULL, *comm = NULL, *cmdline = NULL,
+ *filename = NULL, *truncated = NULL, *coredump = NULL;
+ const void *d;
+ size_t l;
+ usec_t t;
+ char buf[FORMAT_TIMESTAMP_MAX];
+ int r;
+ const char *present;
+ bool normal_coredump;
+
+ assert(file);
+ assert(j);
+
+ SD_JOURNAL_FOREACH_DATA(j, d, l) {
+ RETRIEVE(d, l, "MESSAGE_ID", mid);
+ RETRIEVE(d, l, "COREDUMP_PID", pid);
+ RETRIEVE(d, l, "COREDUMP_UID", uid);
+ RETRIEVE(d, l, "COREDUMP_GID", gid);
+ RETRIEVE(d, l, "COREDUMP_SIGNAL", sgnl);
+ RETRIEVE(d, l, "COREDUMP_EXE", exe);
+ RETRIEVE(d, l, "COREDUMP_COMM", comm);
+ RETRIEVE(d, l, "COREDUMP_CMDLINE", cmdline);
+ RETRIEVE(d, l, "COREDUMP_FILENAME", filename);
+ RETRIEVE(d, l, "COREDUMP_TRUNCATED", truncated);
+ RETRIEVE(d, l, "COREDUMP", coredump);
+ }
+
+ if (!pid && !uid && !gid && !sgnl && !exe && !comm && !cmdline && !filename) {
+ log_warning("Empty coredump log entry");
+ return -EINVAL;
+ }
+
+ r = sd_journal_get_realtime_usec(j, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get realtime timestamp: %m");
+
+ format_timestamp(buf, sizeof(buf), t);
+
+ if (!had_legend && !arg_no_legend)
+ fprintf(file, "%-*s %*s %*s %*s %*s %-*s %s\n",
+ FORMAT_TIMESTAMP_WIDTH, "TIME",
+ 6, "PID",
+ 5, "UID",
+ 5, "GID",
+ 3, "SIG",
+ 9, "COREFILE",
+ "EXE");
+
+ normal_coredump = streq_ptr(mid, SD_MESSAGE_COREDUMP_STR);
+
+ if (filename)
+ if (access(filename, R_OK) == 0)
+ present = "present";
+ else if (errno == ENOENT)
+ present = "missing";
+ else
+ present = "error";
+ else if (coredump)
+ present = "journal";
+ else if (normal_coredump)
+ present = "none";
+ else
+ present = "-";
+
+ if (STR_IN_SET(present, "present", "journal") && truncated && parse_boolean(truncated) > 0)
+ present = "truncated";
+
+ fprintf(file, "%-*s %*s %*s %*s %*s %-*s %s\n",
+ FORMAT_TIMESTAMP_WIDTH, buf,
+ 6, strna(pid),
+ 5, strna(uid),
+ 5, strna(gid),
+ 3, normal_coredump ? strna(sgnl) : "-",
+ 9, present,
+ strna(exe ?: (comm ?: cmdline)));
+
+ return 0;
+}
+
+static int print_info(FILE *file, sd_journal *j, bool need_space) {
+ _cleanup_free_ char
+ *mid = NULL, *pid = NULL, *uid = NULL, *gid = NULL,
+ *sgnl = NULL, *exe = NULL, *comm = NULL, *cmdline = NULL,
+ *unit = NULL, *user_unit = NULL, *session = NULL,
+ *boot_id = NULL, *machine_id = NULL, *hostname = NULL,
+ *slice = NULL, *cgroup = NULL, *owner_uid = NULL,
+ *message = NULL, *timestamp = NULL, *filename = NULL,
+ *truncated = NULL, *coredump = NULL;
+ const void *d;
+ size_t l;
+ bool normal_coredump;
+ int r;
+
+ assert(file);
+ assert(j);
+
+ SD_JOURNAL_FOREACH_DATA(j, d, l) {
+ RETRIEVE(d, l, "MESSAGE_ID", mid);
+ RETRIEVE(d, l, "COREDUMP_PID", pid);
+ RETRIEVE(d, l, "COREDUMP_UID", uid);
+ RETRIEVE(d, l, "COREDUMP_GID", gid);
+ RETRIEVE(d, l, "COREDUMP_SIGNAL", sgnl);
+ RETRIEVE(d, l, "COREDUMP_EXE", exe);
+ RETRIEVE(d, l, "COREDUMP_COMM", comm);
+ RETRIEVE(d, l, "COREDUMP_CMDLINE", cmdline);
+ RETRIEVE(d, l, "COREDUMP_UNIT", unit);
+ RETRIEVE(d, l, "COREDUMP_USER_UNIT", user_unit);
+ RETRIEVE(d, l, "COREDUMP_SESSION", session);
+ RETRIEVE(d, l, "COREDUMP_OWNER_UID", owner_uid);
+ RETRIEVE(d, l, "COREDUMP_SLICE", slice);
+ RETRIEVE(d, l, "COREDUMP_CGROUP", cgroup);
+ RETRIEVE(d, l, "COREDUMP_TIMESTAMP", timestamp);
+ RETRIEVE(d, l, "COREDUMP_FILENAME", filename);
+ RETRIEVE(d, l, "COREDUMP_TRUNCATED", truncated);
+ RETRIEVE(d, l, "COREDUMP", coredump);
+ RETRIEVE(d, l, "_BOOT_ID", boot_id);
+ RETRIEVE(d, l, "_MACHINE_ID", machine_id);
+ RETRIEVE(d, l, "_HOSTNAME", hostname);
+ RETRIEVE(d, l, "MESSAGE", message);
+ }
+
+ if (need_space)
+ fputs("\n", file);
+
+ normal_coredump = streq_ptr(mid, SD_MESSAGE_COREDUMP_STR);
+
+ if (comm)
+ fprintf(file,
+ " PID: %s%s%s (%s)\n",
+ ansi_highlight(), strna(pid), ansi_normal(), comm);
+ else
+ fprintf(file,
+ " PID: %s%s%s\n",
+ ansi_highlight(), strna(pid), ansi_normal());
+
+ if (uid) {
+ uid_t n;
+
+ if (parse_uid(uid, &n) >= 0) {
+ _cleanup_free_ char *u = NULL;
+
+ u = uid_to_name(n);
+ fprintf(file,
+ " UID: %s (%s)\n",
+ uid, u);
+ } else {
+ fprintf(file,
+ " UID: %s\n",
+ uid);
+ }
+ }
+
+ if (gid) {
+ gid_t n;
+
+ if (parse_gid(gid, &n) >= 0) {
+ _cleanup_free_ char *g = NULL;
+
+ g = gid_to_name(n);
+ fprintf(file,
+ " GID: %s (%s)\n",
+ gid, g);
+ } else {
+ fprintf(file,
+ " GID: %s\n",
+ gid);
+ }
+ }
+
+ if (sgnl) {
+ int sig;
+ const char *name = normal_coredump ? "Signal" : "Reason";
+
+ if (normal_coredump && safe_atoi(sgnl, &sig) >= 0)
+ fprintf(file, " %s: %s (%s)\n", name, sgnl, signal_to_string(sig));
+ else
+ fprintf(file, " %s: %s\n", name, sgnl);
+ }
+
+ if (timestamp) {
+ usec_t u;
+
+ r = safe_atou64(timestamp, &u);
+ if (r >= 0) {
+ char absolute[FORMAT_TIMESTAMP_MAX], relative[FORMAT_TIMESPAN_MAX];
+
+ fprintf(file,
+ " Timestamp: %s (%s)\n",
+ format_timestamp(absolute, sizeof(absolute), u),
+ format_timestamp_relative(relative, sizeof(relative), u));
+
+ } else
+ fprintf(file, " Timestamp: %s\n", timestamp);
+ }
+
+ if (cmdline)
+ fprintf(file, " Command Line: %s\n", cmdline);
+ if (exe)
+ fprintf(file, " Executable: %s%s%s\n", ansi_highlight(), exe, ansi_normal());
+ if (cgroup)
+ fprintf(file, " Control Group: %s\n", cgroup);
+ if (unit)
+ fprintf(file, " Unit: %s\n", unit);
+ if (user_unit)
+ fprintf(file, " User Unit: %s\n", user_unit);
+ if (slice)
+ fprintf(file, " Slice: %s\n", slice);
+ if (session)
+ fprintf(file, " Session: %s\n", session);
+ if (owner_uid) {
+ uid_t n;
+
+ if (parse_uid(owner_uid, &n) >= 0) {
+ _cleanup_free_ char *u = NULL;
+
+ u = uid_to_name(n);
+ fprintf(file,
+ " Owner UID: %s (%s)\n",
+ owner_uid, u);
+ } else {
+ fprintf(file,
+ " Owner UID: %s\n",
+ owner_uid);
+ }
+ }
+ if (boot_id)
+ fprintf(file, " Boot ID: %s\n", boot_id);
+ if (machine_id)
+ fprintf(file, " Machine ID: %s\n", machine_id);
+ if (hostname)
+ fprintf(file, " Hostname: %s\n", hostname);
+
+ if (filename) {
+ bool inacc, trunc;
+
+ inacc = access(filename, R_OK) < 0;
+ trunc = truncated && parse_boolean(truncated) > 0;
+
+ if (inacc || trunc)
+ fprintf(file, " Storage: %s%s (%s%s%s)%s\n",
+ ansi_highlight_red(),
+ filename,
+ inacc ? "inaccessible" : "",
+ inacc && trunc ? ", " : "",
+ trunc ? "truncated" : "",
+ ansi_normal());
+ else
+ fprintf(file, " Storage: %s\n", filename);
+ }
+
+ else if (coredump)
+ fprintf(file, " Storage: journal\n");
+ else
+ fprintf(file, " Storage: none\n");
+
+ if (message) {
+ _cleanup_free_ char *m = NULL;
+
+ m = strreplace(message, "\n", "\n ");
+
+ fprintf(file, " Message: %s\n", strstrip(m ?: message));
+ }
+
+ return 0;
+}
+
+static int focus(sd_journal *j) {
+ int r;
+
+ r = sd_journal_seek_tail(j);
+ if (r == 0)
+ r = sd_journal_previous(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to search journal: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ESRCH),
+ "No match found.");
+ return r;
+}
+
+static int print_entry(sd_journal *j, unsigned n_found, bool verb_is_info) {
+ assert(j);
+
+ if (verb_is_info)
+ return print_info(stdout, j, n_found);
+ else if (arg_field)
+ return print_field(stdout, j);
+ else
+ return print_list(stdout, j, n_found);
+}
+
+static int dump_list(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ unsigned n_found = 0;
+ bool verb_is_info;
+ int r;
+
+ verb_is_info = (argc >= 1 && streq(argv[0], "info"));
+
+ r = acquire_journal(&j, argv + 1);
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ /* The coredumps are likely to compressed, and for just
+ * listing them we don't need to decompress them, so let's
+ * pick a fairly low data threshold here */
+ sd_journal_set_data_threshold(j, 4096);
+
+ /* "info" without pattern implies "-1" */
+ if (arg_one || (verb_is_info && argc == 1)) {
+ r = focus(j);
+ if (r < 0)
+ return r;
+
+ return print_entry(j, 0, verb_is_info);
+ } else {
+ if (arg_since != USEC_INFINITY && !arg_reverse)
+ r = sd_journal_seek_realtime_usec(j, arg_since);
+ else if (arg_until != USEC_INFINITY && arg_reverse)
+ r = sd_journal_seek_realtime_usec(j, arg_until);
+ else if (arg_reverse)
+ r = sd_journal_seek_tail(j);
+ else
+ r = sd_journal_seek_head(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to seek to date: %m");
+
+ for (;;) {
+ if (!arg_reverse)
+ r = sd_journal_next(j);
+ else
+ r = sd_journal_previous(j);
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to iterate through journal: %m");
+
+ if (r == 0)
+ break;
+
+ if (arg_until != USEC_INFINITY && !arg_reverse) {
+ usec_t usec;
+
+ r = sd_journal_get_realtime_usec(j, &usec);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine timestamp: %m");
+ if (usec > arg_until)
+ continue;
+ }
+
+ if (arg_since != USEC_INFINITY && arg_reverse) {
+ usec_t usec;
+
+ r = sd_journal_get_realtime_usec(j, &usec);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine timestamp: %m");
+ if (usec < arg_since)
+ continue;
+ }
+
+ r = print_entry(j, n_found++, verb_is_info);
+ if (r < 0)
+ return r;
+ }
+
+ if (!arg_field && n_found <= 0) {
+ if (!arg_quiet)
+ log_notice("No coredumps found.");
+ return -ESRCH;
+ }
+ }
+
+ return 0;
+}
+
+static int save_core(sd_journal *j, FILE *file, char **path, bool *unlink_temp) {
+ const char *data;
+ _cleanup_free_ char *filename = NULL;
+ size_t len;
+ int r, fd;
+ _cleanup_close_ int fdt = -1;
+ char *temp = NULL;
+
+ assert(!(file && path)); /* At most one can be specified */
+ assert(!!path == !!unlink_temp); /* Those must be specified together */
+
+ /* Look for a coredump on disk first. */
+ r = sd_journal_get_data(j, "COREDUMP_FILENAME", (const void**) &data, &len);
+ if (r == 0) {
+ r = retrieve(data, len, "COREDUMP_FILENAME", &filename);
+ if (r < 0)
+ return r;
+ assert(r > 0);
+
+ if (access(filename, R_OK) < 0)
+ return log_error_errno(errno, "File \"%s\" is not readable: %m", filename);
+
+ if (path && !ENDSWITH_SET(filename, ".xz", ".lz4", ".zst")) {
+ *path = TAKE_PTR(filename);
+
+ return 0;
+ }
+
+ } else {
+ if (r != -ENOENT)
+ return log_error_errno(r, "Failed to retrieve COREDUMP_FILENAME field: %m");
+ /* Check that we can have a COREDUMP field. We still haven't set a high
+ * data threshold, so we'll get a few kilobytes at most.
+ */
+
+ r = sd_journal_get_data(j, "COREDUMP", (const void**) &data, &len);
+ if (r == -ENOENT)
+ return log_error_errno(r, "Coredump entry has no core attached (neither internally in the journal nor externally on disk).");
+ if (r < 0)
+ return log_error_errno(r, "Failed to retrieve COREDUMP field: %m");
+ }
+
+ if (path) {
+ const char *vt;
+
+ /* Create a temporary file to write the uncompressed core to. */
+
+ r = var_tmp_dir(&vt);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire temporary directory path: %m");
+
+ temp = path_join(vt, "coredump-XXXXXX");
+ if (!temp)
+ return log_oom();
+
+ fdt = mkostemp_safe(temp);
+ if (fdt < 0)
+ return log_error_errno(fdt, "Failed to create temporary file: %m");
+ log_debug("Created temporary file %s", temp);
+
+ fd = fdt;
+ } else {
+ /* If neither path or file are specified, we will write to stdout. Let's now check
+ * if stdout is connected to a tty. We checked that the file exists, or that the
+ * core might be stored in the journal. In this second case, if we found the entry,
+ * in all likelihood we will be able to access the COREDUMP= field. In either case,
+ * we stop before doing any "real" work, i.e. before starting decompression or
+ * reading from the file or creating temporary files.
+ */
+ if (!file) {
+ if (on_tty())
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTTY),
+ "Refusing to dump core to tty"
+ " (use shell redirection or specify --output).");
+ file = stdout;
+ }
+
+ fd = fileno(file);
+ }
+
+ if (filename) {
+#if HAVE_COMPRESSION
+ _cleanup_close_ int fdf;
+
+ fdf = open(filename, O_RDONLY | O_CLOEXEC);
+ if (fdf < 0) {
+ r = log_error_errno(errno, "Failed to open %s: %m", filename);
+ goto error;
+ }
+
+ r = decompress_stream(filename, fdf, fd, -1);
+ if (r < 0) {
+ log_error_errno(r, "Failed to decompress %s: %m", filename);
+ goto error;
+ }
+#else
+ r = log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Cannot decompress file. Compiled without compression support.");
+ goto error;
+#endif
+ } else {
+ ssize_t sz;
+
+ /* We want full data, nothing truncated. */
+ sd_journal_set_data_threshold(j, 0);
+
+ r = sd_journal_get_data(j, "COREDUMP", (const void**) &data, &len);
+ if (r < 0)
+ return log_error_errno(r, "Failed to retrieve COREDUMP field: %m");
+
+ assert(len >= 9);
+ data += 9;
+ len -= 9;
+
+ sz = write(fd, data, len);
+ if (sz < 0) {
+ r = log_error_errno(errno, "Failed to write output: %m");
+ goto error;
+ }
+ if (sz != (ssize_t) len) {
+ log_error("Short write to output.");
+ r = -EIO;
+ goto error;
+ }
+ }
+
+ if (temp) {
+ *path = temp;
+ *unlink_temp = true;
+ }
+ return 0;
+
+error:
+ if (temp) {
+ (void) unlink(temp);
+ log_debug("Removed temporary file %s", temp);
+ }
+ return r;
+}
+
+static int dump_core(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ if (arg_field)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --field/-F only makes sense with list");
+
+ r = acquire_journal(&j, argv + 1);
+ if (r < 0)
+ return r;
+
+ r = focus(j);
+ if (r < 0)
+ return r;
+
+ if (arg_output) {
+ f = fopen(arg_output, "we");
+ if (!f)
+ return log_error_errno(errno, "Failed to open \"%s\" for writing: %m", arg_output);
+ }
+
+ print_info(f ? stdout : stderr, j, false);
+
+ r = save_core(j, f, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_journal_previous(j);
+ if (r > 0 && !arg_quiet)
+ log_notice("More than one entry matches, ignoring rest.");
+
+ return 0;
+}
+
+static int run_debug(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ _cleanup_free_ char *exe = NULL, *path = NULL, *debugger = NULL;
+ bool unlink_path = false;
+ const char *data, *fork_name;
+ size_t len;
+ pid_t pid;
+ int r;
+
+ if (!arg_debugger) {
+ char *env_debugger;
+
+ env_debugger = getenv("SYSTEMD_DEBUGGER");
+ if (env_debugger)
+ arg_debugger = env_debugger;
+ else
+ arg_debugger = "gdb";
+ }
+
+ debugger = strdup(arg_debugger);
+ if (!debugger)
+ return -ENOMEM;
+
+ if (arg_field)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --field/-F only makes sense with list");
+
+ r = acquire_journal(&j, argv + 1);
+ if (r < 0)
+ return r;
+
+ r = focus(j);
+ if (r < 0)
+ return r;
+
+ print_info(stdout, j, false);
+ fputs("\n", stdout);
+
+ r = sd_journal_get_data(j, "COREDUMP_EXE", (const void**) &data, &len);
+ if (r < 0)
+ return log_error_errno(r, "Failed to retrieve COREDUMP_EXE field: %m");
+
+ assert(len > STRLEN("COREDUMP_EXE="));
+ data += STRLEN("COREDUMP_EXE=");
+ len -= STRLEN("COREDUMP_EXE=");
+
+ exe = strndup(data, len);
+ if (!exe)
+ return log_oom();
+
+ if (endswith(exe, " (deleted)"))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT),
+ "Binary already deleted.");
+
+ if (!path_is_absolute(exe))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT),
+ "Binary is not an absolute path.");
+
+ r = save_core(j, NULL, &path, &unlink_path);
+ if (r < 0)
+ return r;
+
+ /* Don't interfere with gdb and its handling of SIGINT. */
+ (void) ignore_signals(SIGINT, -1);
+
+ fork_name = strjoina("(", debugger, ")");
+
+ r = safe_fork(fork_name, FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ goto finish;
+ if (r == 0) {
+ execlp(debugger, debugger, exe, "-c", path, NULL);
+ log_open();
+ log_error_errno(errno, "Failed to invoke %s: %m", debugger);
+ _exit(EXIT_FAILURE);
+ }
+
+ r = wait_for_terminate_and_check(debugger, pid, WAIT_LOG_ABNORMAL);
+
+finish:
+ (void) default_signals(SIGINT, -1);
+
+ if (unlink_path) {
+ log_debug("Removed temporary file %s", path);
+ (void) unlink(path);
+ }
+
+ return r;
+}
+
+static int check_units_active(void) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int c = 0, r;
+ const char *id, *state, *substate;
+
+ if (arg_quiet)
+ return false;
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire bus: %m");
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "ListUnitsByPatterns");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, NULL);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, STRV_MAKE("systemd-coredump@*.service"));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, SHORT_BUS_CALL_TIMEOUT_USEC, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to check if any systemd-coredump@.service units are running: %s",
+ bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssssssouso)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(
+ reply, "(ssssssouso)",
+ &id, NULL, NULL, &state, &substate,
+ NULL, NULL, NULL, NULL, NULL)) > 0) {
+ bool found = !STR_IN_SET(state, "inactive", "dead", "failed");
+ log_debug("Unit %s is %s/%s, %scounting it.", id, state, substate, found ? "" : "not ");
+ c += found;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return c;
+}
+
+static int coredumpctl_main(int argc, char *argv[]) {
+
+ static const Verb verbs[] = {
+ { "list", VERB_ANY, VERB_ANY, VERB_DEFAULT, dump_list },
+ { "info", VERB_ANY, VERB_ANY, 0, dump_list },
+ { "dump", VERB_ANY, VERB_ANY, 0, dump_core },
+ { "debug", VERB_ANY, VERB_ANY, 0, run_debug },
+ { "gdb", VERB_ANY, VERB_ANY, 0, run_debug },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r, units_active;
+
+ setlocale(LC_ALL, "");
+ log_setup_cli();
+
+ /* The journal merging logic potentially needs a lot of fds. */
+ (void) rlimit_nofile_bump(HIGH_RLIMIT_NOFILE);
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ sigbus_install();
+
+ units_active = check_units_active(); /* error is treated the same as 0 */
+
+ r = coredumpctl_main(argc, argv);
+
+ if (units_active > 0)
+ printf("%s-- Notice: %d systemd-coredump@.service %s, output may be incomplete.%s\n",
+ ansi_highlight_red(),
+ units_active, units_active == 1 ? "unit is running" : "units are running",
+ ansi_normal());
+
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/coredump/meson.build b/src/coredump/meson.build
new file mode 100644
index 0000000..ebd99bd
--- /dev/null
+++ b/src/coredump/meson.build
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+systemd_coredump_sources = files('''
+ coredump.c
+ coredump-vacuum.c
+ coredump-vacuum.h
+'''.split())
+
+if conf.get('HAVE_ELFUTILS') == 1
+ systemd_coredump_sources += files(['stacktrace.c',
+ 'stacktrace.h'])
+endif
+
+coredumpctl_sources = files('coredumpctl.c')
+
+if conf.get('ENABLE_COREDUMP') == 1 and install_sysconfdir
+ install_data('coredump.conf',
+ install_dir : pkgsysconfdir)
+endif
+
+tests += [
+ [['src/coredump/test-coredump-vacuum.c',
+ 'src/coredump/coredump-vacuum.c',
+ 'src/coredump/coredump-vacuum.h'],
+ [],
+ [],
+ 'ENABLE_COREDUMP', 'manual'],
+]
diff --git a/src/coredump/stacktrace.c b/src/coredump/stacktrace.c
new file mode 100644
index 0000000..a29ab12
--- /dev/null
+++ b/src/coredump/stacktrace.c
@@ -0,0 +1,198 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <dwarf.h>
+#include <elfutils/libdwfl.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "macro.h"
+#include "stacktrace.h"
+#include "string-util.h"
+#include "util.h"
+
+#define FRAMES_MAX 64
+#define THREADS_MAX 64
+
+struct stack_context {
+ FILE *f;
+ Dwfl *dwfl;
+ Elf *elf;
+ unsigned n_thread;
+ unsigned n_frame;
+};
+
+static int frame_callback(Dwfl_Frame *frame, void *userdata) {
+ struct stack_context *c = userdata;
+ Dwarf_Addr pc, pc_adjusted, bias = 0;
+ _cleanup_free_ Dwarf_Die *scopes = NULL;
+ const char *fname = NULL, *symbol = NULL;
+ Dwfl_Module *module;
+ bool is_activation;
+ uint64_t module_offset = 0;
+
+ assert(frame);
+ assert(c);
+
+ if (c->n_frame >= FRAMES_MAX)
+ return DWARF_CB_ABORT;
+
+ if (!dwfl_frame_pc(frame, &pc, &is_activation))
+ return DWARF_CB_ABORT;
+
+ pc_adjusted = pc - (is_activation ? 0 : 1);
+
+ module = dwfl_addrmodule(c->dwfl, pc_adjusted);
+ if (module) {
+ Dwarf_Die *s, *cudie;
+ int n;
+ Dwarf_Addr start;
+
+ cudie = dwfl_module_addrdie(module, pc_adjusted, &bias);
+ if (cudie) {
+ n = dwarf_getscopes(cudie, pc_adjusted - bias, &scopes);
+ for (s = scopes; s < scopes + n; s++) {
+ if (IN_SET(dwarf_tag(s), DW_TAG_subprogram, DW_TAG_inlined_subroutine, DW_TAG_entry_point)) {
+ Dwarf_Attribute *a, space;
+
+ a = dwarf_attr_integrate(s, DW_AT_MIPS_linkage_name, &space);
+ if (!a)
+ a = dwarf_attr_integrate(s, DW_AT_linkage_name, &space);
+ if (a)
+ symbol = dwarf_formstring(a);
+ if (!symbol)
+ symbol = dwarf_diename(s);
+
+ if (symbol)
+ break;
+ }
+ }
+ }
+
+ if (!symbol)
+ symbol = dwfl_module_addrname(module, pc_adjusted);
+
+ fname = dwfl_module_info(module, NULL, &start, NULL, NULL, NULL, NULL, NULL);
+ module_offset = pc - start;
+ }
+
+ fprintf(c->f, "#%-2u 0x%016" PRIx64 " %s (%s + 0x%" PRIx64 ")\n", c->n_frame, (uint64_t) pc, strna(symbol), strna(fname), module_offset);
+ c->n_frame++;
+
+ return DWARF_CB_OK;
+}
+
+static int thread_callback(Dwfl_Thread *thread, void *userdata) {
+ struct stack_context *c = userdata;
+ pid_t tid;
+
+ assert(thread);
+ assert(c);
+
+ if (c->n_thread >= THREADS_MAX)
+ return DWARF_CB_ABORT;
+
+ if (c->n_thread != 0)
+ fputc('\n', c->f);
+
+ c->n_frame = 0;
+
+ tid = dwfl_thread_tid(thread);
+ fprintf(c->f, "Stack trace of thread " PID_FMT ":\n", tid);
+
+ if (dwfl_thread_getframes(thread, frame_callback, c) < 0)
+ return DWARF_CB_ABORT;
+
+ c->n_thread++;
+
+ return DWARF_CB_OK;
+}
+
+static int make_stack_trace(int fd, const char *executable, char **ret) {
+
+ static const Dwfl_Callbacks callbacks = {
+ .find_elf = dwfl_build_id_find_elf,
+ .find_debuginfo = dwfl_standard_find_debuginfo,
+ };
+
+ struct stack_context c = {};
+ char *buf = NULL;
+ size_t sz = 0;
+ int r;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (lseek(fd, 0, SEEK_SET) == (off_t) -1)
+ return -errno;
+
+ c.f = open_memstream_unlocked(&buf, &sz);
+ if (!c.f)
+ return -ENOMEM;
+
+ elf_version(EV_CURRENT);
+
+ c.elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+ if (!c.elf) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ c.dwfl = dwfl_begin(&callbacks);
+ if (!c.dwfl) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (dwfl_core_file_report(c.dwfl, c.elf, executable) < 0) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (dwfl_report_end(c.dwfl, NULL, NULL) != 0) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (dwfl_core_file_attach(c.dwfl, c.elf) < 0) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (dwfl_getthreads(c.dwfl, thread_callback, &c) < 0) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ c.f = safe_fclose(c.f);
+
+ *ret = TAKE_PTR(buf);
+
+ r = 0;
+
+finish:
+ if (c.dwfl)
+ dwfl_end(c.dwfl);
+
+ if (c.elf)
+ elf_end(c.elf);
+
+ safe_fclose(c.f);
+
+ free(buf);
+
+ return r;
+}
+
+void coredump_make_stack_trace(int fd, const char *executable, char **ret) {
+ int r;
+
+ r = make_stack_trace(fd, executable, ret);
+ if (r == -EINVAL)
+ log_warning("Failed to generate stack trace: %s", dwfl_errmsg(dwfl_errno()));
+ else if (r < 0)
+ log_warning_errno(r, "Failed to generate stack trace: %m");
+}
diff --git a/src/coredump/stacktrace.h b/src/coredump/stacktrace.h
new file mode 100644
index 0000000..b935748
--- /dev/null
+++ b/src/coredump/stacktrace.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+void coredump_make_stack_trace(int fd, const char *executable, char **ret);
diff --git a/src/coredump/test-coredump-vacuum.c b/src/coredump/test-coredump-vacuum.c
new file mode 100644
index 0000000..ac212ea
--- /dev/null
+++ b/src/coredump/test-coredump-vacuum.c
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdlib.h>
+
+#include "coredump-vacuum.h"
+
+int main(int argc, char *argv[]) {
+
+ if (coredump_vacuum(-1, (uint64_t) -1, 70 * 1024) < 0)
+ return EXIT_FAILURE;
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/cryptsetup/cryptsetup-generator.c b/src/cryptsetup/cryptsetup-generator.c
new file mode 100644
index 0000000..68c7349
--- /dev/null
+++ b/src/cryptsetup/cryptsetup-generator.c
@@ -0,0 +1,917 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "alloc-util.h"
+#include "dropin.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fstab-util.h"
+#include "generator.h"
+#include "hashmap.h"
+#include "id128-util.h"
+#include "log.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "util.h"
+
+typedef struct crypto_device {
+ char *uuid;
+ char *keyfile;
+ char *keydev;
+ char *headerdev;
+ char *datadev;
+ char *name;
+ char *options;
+ bool create;
+} crypto_device;
+
+static const char *arg_dest = NULL;
+static bool arg_enabled = true;
+static bool arg_read_crypttab = true;
+static const char *arg_crypttab = NULL;
+static const char *arg_runtime_directory = NULL;
+static bool arg_allow_list = false;
+static Hashmap *arg_disks = NULL;
+static char *arg_default_options = NULL;
+static char *arg_default_keyfile = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_disks, hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_default_options, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_default_keyfile, freep);
+
+static int split_locationspec(const char *locationspec, char **ret_file, char **ret_device) {
+ _cleanup_free_ char *file = NULL, *device = NULL;
+ const char *c;
+
+ assert(ret_file);
+ assert(ret_device);
+
+ if (!locationspec) {
+ *ret_file = *ret_device = NULL;
+ return 0;
+ }
+
+ c = strrchr(locationspec, ':');
+ if (c) {
+ /* The device part has to be either an absolute path to device node (/dev/something,
+ * /dev/foo/something, or even possibly /dev/foo/something:part), or a fstab device
+ * specification starting with LABEL= or similar. The file part has the same syntax.
+ *
+ * Let's try to guess if the second part looks like a device specification, or just part of a
+ * filename with a colon. fstab_node_to_udev_node() will convert the fstab device syntax to
+ * an absolute path. If we didn't get an absolute path, assume that it is just part of the
+ * first file argument. */
+
+ device = fstab_node_to_udev_node(c + 1);
+ if (!device)
+ return log_oom();
+
+ if (path_is_absolute(device))
+ file = strndup(locationspec, c-locationspec);
+ else {
+ log_debug("Location specification argument contains a colon, but \"%s\" doesn't look like a device specification.\n"
+ "Assuming that \"%s\" is a single device specification.",
+ c + 1, locationspec);
+ device = mfree(device);
+ c = NULL;
+ }
+ }
+
+ if (!c)
+ /* No device specified */
+ file = strdup(locationspec);
+
+ if (!file)
+ return log_oom();
+
+ *ret_file = TAKE_PTR(file);
+ *ret_device = TAKE_PTR(device);
+
+ return 0;
+}
+
+static int generate_device_mount(
+ const char *name,
+ const char *device,
+ const char *type_prefix, /* "keydev" or "headerdev" */
+ const char *device_timeout,
+ bool canfail,
+ bool readonly,
+ char **unit,
+ char **mount) {
+
+ _cleanup_free_ char *u = NULL, *where = NULL, *name_escaped = NULL, *device_unit = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+ usec_t timeout_us;
+
+ assert(name);
+ assert(device);
+ assert(unit);
+ assert(mount);
+
+ r = mkdir_parents(arg_runtime_directory, 0755);
+ if (r < 0)
+ return r;
+
+ r = mkdir(arg_runtime_directory, 0700);
+ if (r < 0 && errno != EEXIST)
+ return -errno;
+
+ name_escaped = cescape(name);
+ if (!name_escaped)
+ return -ENOMEM;
+
+ where = strjoin(arg_runtime_directory, "/", type_prefix, "-", name_escaped);
+ if (!where)
+ return -ENOMEM;
+
+ r = mkdir(where, 0700);
+ if (r < 0 && errno != EEXIST)
+ return -errno;
+
+ r = unit_name_from_path(where, ".mount", &u);
+ if (r < 0)
+ return r;
+
+ r = generator_open_unit_file(arg_dest, NULL, u, &f);
+ if (r < 0)
+ return r;
+
+ fprintf(f,
+ "[Unit]\n"
+ "DefaultDependencies=no\n\n"
+ "[Mount]\n"
+ "What=%s\n"
+ "Where=%s\n"
+ "Options=%s%s\n", device, where, readonly ? "ro" : "rw", canfail ? ",nofail" : "");
+
+ if (device_timeout) {
+ r = parse_sec_fix_0(device_timeout, &timeout_us);
+ if (r >= 0) {
+ r = unit_name_from_path(device, ".device", &device_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ r = write_drop_in_format(arg_dest, device_unit, 90, "device-timeout",
+ "# Automatically generated by systemd-cryptsetup-generator \n\n"
+ "[Unit]\nJobRunningTimeoutSec=%s", device_timeout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write device drop-in: %m");
+
+ } else
+ log_warning_errno(r, "Failed to parse %s, ignoring: %m", device_timeout);
+
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ *unit = TAKE_PTR(u);
+ *mount = TAKE_PTR(where);
+
+ return 0;
+}
+
+static int generate_device_umount(const char *name,
+ const char *device_mount,
+ const char *type_prefix, /* "keydev" or "headerdev" */
+ char **ret_umount_unit) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *u = NULL, *name_escaped = NULL, *mount = NULL;
+ int r;
+
+ assert(name);
+ assert(ret_umount_unit);
+
+ name_escaped = cescape(name);
+ if (!name_escaped)
+ return -ENOMEM;
+
+ u = strjoin(type_prefix, "-", name_escaped, "-umount.service");
+ if (!u)
+ return -ENOMEM;
+
+ r = unit_name_from_path(device_mount, ".mount", &mount);
+ if (r < 0)
+ return r;
+
+ r = generator_open_unit_file(arg_dest, NULL, u, &f);
+ if (r < 0)
+ return r;
+
+ fprintf(f,
+ "[Unit]\n"
+ "DefaultDependencies=no\n"
+ "After=%s\n\n"
+ "[Service]\n"
+ "ExecStart=-" UMOUNT_PATH " %s\n\n", mount, device_mount);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ *ret_umount_unit = TAKE_PTR(u);
+ return 0;
+}
+
+static int print_dependencies(FILE *f, const char* device_path) {
+ int r;
+
+ if (STR_IN_SET(device_path, "-", "none"))
+ /* None, nothing to do */
+ return 0;
+
+ if (PATH_IN_SET(device_path,
+ "/dev/urandom",
+ "/dev/random",
+ "/dev/hw_random",
+ "/dev/hwrng")) {
+ /* RNG device, add random dep */
+ fputs("After=systemd-random-seed.service\n", f);
+ return 0;
+ }
+
+ _cleanup_free_ char *udev_node = fstab_node_to_udev_node(device_path);
+ if (!udev_node)
+ return log_oom();
+
+ if (path_equal(udev_node, "/dev/null"))
+ return 0;
+
+ if (path_startswith(udev_node, "/dev/")) {
+ /* We are dealing with a block device, add dependency for corresponding unit */
+ _cleanup_free_ char *unit = NULL;
+
+ r = unit_name_from_path(udev_node, ".device", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ fprintf(f,
+ "After=%1$s\n"
+ "Requires=%1$s\n", unit);
+ } else {
+ /* Regular file, add mount dependency */
+ _cleanup_free_ char *escaped_path = specifier_escape(device_path);
+ if (!escaped_path)
+ return log_oom();
+
+ fprintf(f, "RequiresMountsFor=%s\n", escaped_path);
+ }
+
+ return 0;
+}
+
+static int create_disk(
+ const char *name,
+ const char *device,
+ const char *password,
+ const char *keydev,
+ const char *headerdev,
+ const char *options,
+ const char *source) {
+
+ _cleanup_free_ char *n = NULL, *d = NULL, *u = NULL, *e = NULL,
+ *keydev_mount = NULL, *keyfile_timeout_value = NULL,
+ *filtered = NULL, *u_escaped = NULL, *name_escaped = NULL, *header_path = NULL, *password_buffer = NULL,
+ *tmp_fstype = NULL, *filtered_header = NULL, *headerdev_mount = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *dmname;
+ bool noauto, nofail, swap, netdev, attach_in_initrd;
+ int r, detached_header, keyfile_can_timeout, tmp;
+
+ assert(name);
+ assert(device);
+
+ noauto = fstab_test_yes_no_option(options, "noauto\0" "auto\0");
+ nofail = fstab_test_yes_no_option(options, "nofail\0" "fail\0");
+ swap = fstab_test_option(options, "swap\0");
+ netdev = fstab_test_option(options, "_netdev\0");
+ attach_in_initrd = fstab_test_option(options, "x-initrd.attach\0");
+
+ keyfile_can_timeout = fstab_filter_options(options, "keyfile-timeout\0", NULL, &keyfile_timeout_value, NULL);
+ if (keyfile_can_timeout < 0)
+ return log_error_errno(keyfile_can_timeout, "Failed to parse keyfile-timeout= option value: %m");
+
+ detached_header = fstab_filter_options(
+ options,
+ "header\0",
+ NULL,
+ &header_path,
+ headerdev ? &filtered_header : NULL);
+ if (detached_header < 0)
+ return log_error_errno(detached_header, "Failed to parse header= option value: %m");
+
+ tmp = fstab_filter_options(options, "tmp\0", NULL, &tmp_fstype, NULL);
+ if (tmp < 0)
+ return log_error_errno(tmp, "Failed to parse tmp= option value: %m");
+
+ if (tmp && swap)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Device '%s' cannot be both 'tmp' and 'swap'. Ignoring.",
+ name);
+
+ name_escaped = specifier_escape(name);
+ if (!name_escaped)
+ return log_oom();
+
+ e = unit_name_escape(name);
+ if (!e)
+ return log_oom();
+
+ u = fstab_node_to_udev_node(device);
+ if (!u)
+ return log_oom();
+
+ r = unit_name_build("systemd-cryptsetup", e, ".service", &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ u_escaped = specifier_escape(u);
+ if (!u_escaped)
+ return log_oom();
+
+ r = unit_name_from_path(u, ".device", &d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ if (keydev && !password)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Key device is specified, but path to the password file is missing.");
+
+ r = generator_open_unit_file(arg_dest, NULL, n, &f);
+ if (r < 0)
+ return r;
+
+ r = generator_write_cryptsetup_unit_section(f, source);
+ if (r < 0)
+ return r;
+
+ if (netdev)
+ fprintf(f, "After=remote-fs-pre.target\n");
+
+ /* If initrd takes care of attaching the disk then it should also detach it during shutdown. */
+ if (!attach_in_initrd)
+ fprintf(f, "Conflicts=umount.target\n");
+
+ if (keydev) {
+ _cleanup_free_ char *unit = NULL, *umount_unit = NULL;
+
+ r = generate_device_mount(
+ name,
+ keydev,
+ "keydev",
+ keyfile_timeout_value,
+ /* canfail = */ keyfile_can_timeout > 0,
+ /* readonly= */ true,
+ &unit,
+ &keydev_mount);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate keydev mount unit: %m");
+
+ r = generate_device_umount(name, keydev_mount, "keydev", &umount_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate keydev umount unit: %m");
+
+ password_buffer = path_join(keydev_mount, password);
+ if (!password_buffer)
+ return log_oom();
+
+ password = password_buffer;
+
+ fprintf(f, "After=%s\n", unit);
+ if (keyfile_can_timeout > 0)
+ fprintf(f, "Wants=%s\n", unit);
+ else
+ fprintf(f, "Requires=%s\n", unit);
+
+ if (umount_unit)
+ fprintf(f,
+ "Wants=%s\n"
+ "Before=%s\n",
+ umount_unit,
+ umount_unit
+ );
+ }
+
+ if (headerdev) {
+ _cleanup_free_ char *unit = NULL, *umount_unit = NULL, *p = NULL;
+
+ r = generate_device_mount(
+ name,
+ headerdev,
+ "headerdev",
+ NULL,
+ /* canfail= */ false, /* header is always necessary */
+ /* readonly= */ false, /* LUKS2 recovery requires rw header access */
+ &unit,
+ &headerdev_mount);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate header device mount unit: %m");
+
+ r = generate_device_umount(name, headerdev_mount, "headerdev", &umount_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate header device umount unit: %m");
+
+ p = path_join(headerdev_mount, header_path);
+ if (!p)
+ return log_oom();
+
+ free_and_replace(header_path, p);
+
+ if (isempty(filtered_header))
+ p = strjoin("header=", header_path);
+ else
+ p = strjoin(filtered_header, ",header=", header_path);
+
+ if (!p)
+ return log_oom();
+
+ free_and_replace(filtered_header, p);
+ options = filtered_header;
+
+ fprintf(f, "After=%s\n"
+ "Requires=%s\n", unit, unit);
+
+ if (umount_unit) {
+ fprintf(f,
+ "Wants=%s\n"
+ "Before=%s\n",
+ umount_unit,
+ umount_unit
+ );
+ }
+ }
+
+ if (!nofail)
+ fprintf(f,
+ "Before=%s\n",
+ netdev ? "remote-cryptsetup.target" : "cryptsetup.target");
+
+ if (password && !keydev) {
+ r = print_dependencies(f, password);
+ if (r < 0)
+ return r;
+ }
+
+ /* Check if a header option was specified */
+ if (detached_header > 0 && !headerdev) {
+ r = print_dependencies(f, header_path);
+ if (r < 0)
+ return r;
+ }
+
+ if (path_startswith(u, "/dev/"))
+ fprintf(f,
+ "BindsTo=%s\n"
+ "After=%s\n"
+ "Before=umount.target\n",
+ d, d);
+ else
+ /* For loopback devices, add systemd-tmpfiles-setup-dev.service
+ dependency to ensure that loopback support is available in
+ the kernel (/dev/loop-control needs to exist) */
+ fprintf(f,
+ "RequiresMountsFor=%s\n"
+ "Requires=systemd-tmpfiles-setup-dev.service\n"
+ "After=systemd-tmpfiles-setup-dev.service\n",
+ u_escaped);
+
+ r = generator_write_timeouts(arg_dest, device, name, options, &filtered);
+ if (r < 0)
+ log_warning_errno(r, "Failed to write device timeout drop-in: %m");
+
+ r = generator_write_cryptsetup_service_section(f, name, u, password, filtered);
+ if (r < 0)
+ return r;
+
+ if (tmp) {
+ _cleanup_free_ char *tmp_fstype_escaped = NULL;
+
+ if (tmp_fstype) {
+ tmp_fstype_escaped = specifier_escape(tmp_fstype);
+ if (!tmp_fstype_escaped)
+ return log_oom();
+ }
+
+ fprintf(f,
+ "ExecStartPost=" ROOTLIBEXECDIR "/systemd-makefs '%s' '/dev/mapper/%s'\n",
+ tmp_fstype_escaped ?: "ext4", name_escaped);
+ }
+
+ if (swap)
+ fprintf(f,
+ "ExecStartPost=" ROOTLIBEXECDIR "/systemd-makefs swap '/dev/mapper/%s'\n",
+ name_escaped);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", n);
+
+ if (!noauto) {
+ r = generator_add_symlink(arg_dest,
+ netdev ? "remote-cryptsetup.target" : "cryptsetup.target",
+ nofail ? "wants" : "requires", n);
+ if (r < 0)
+ return r;
+ }
+
+ dmname = strjoina("dev-mapper-", e, ".device");
+ r = generator_add_symlink(arg_dest, dmname, "requires", n);
+ if (r < 0)
+ return r;
+
+ if (!noauto && !nofail) {
+ r = write_drop_in(arg_dest, dmname, 40, "device-timeout",
+ "# Automatically generated by systemd-cryptsetup-generator\n\n"
+ "[Unit]\nJobTimeoutSec=0");
+ if (r < 0)
+ log_warning_errno(r, "Failed to write device timeout drop-in: %m");
+ }
+
+ return 0;
+}
+
+static crypto_device* crypt_device_free(crypto_device *d) {
+ if (!d)
+ return NULL;
+
+ free(d->uuid);
+ free(d->keyfile);
+ free(d->keydev);
+ free(d->name);
+ free(d->options);
+ return mfree(d);
+}
+
+static crypto_device *get_crypto_device(const char *uuid) {
+ int r;
+ crypto_device *d;
+
+ assert(uuid);
+
+ d = hashmap_get(arg_disks, uuid);
+ if (!d) {
+ d = new0(struct crypto_device, 1);
+ if (!d)
+ return NULL;
+
+ d->uuid = strdup(uuid);
+ if (!d->uuid)
+ return mfree(d);
+
+ r = hashmap_put(arg_disks, d->uuid, d);
+ if (r < 0) {
+ free(d->uuid);
+ return mfree(d);
+ }
+ }
+
+ return d;
+}
+
+static bool warn_uuid_invalid(const char *uuid, const char *key) {
+ assert(key);
+
+ if (!id128_is_valid(uuid)) {
+ log_warning("Failed to parse %s= kernel command line switch. UUID is invalid, ignoring.", key);
+ return true;
+ }
+
+ return false;
+}
+
+static int filter_header_device(const char *options,
+ char **ret_headerdev,
+ char **ret_filtered_headerdev_options) {
+ int r;
+ _cleanup_free_ char *headerfile = NULL, *headerdev = NULL, *headerspec = NULL,
+ *filtered_headerdev = NULL, *filtered_headerspec = NULL;
+
+ assert(ret_headerdev);
+ assert(ret_filtered_headerdev_options);
+
+ r = fstab_filter_options(options, "header\0", NULL, &headerspec, &filtered_headerspec);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse header= option value: %m");
+
+ if (r > 0) {
+ r = split_locationspec(headerspec, &headerfile, &headerdev);
+ if (r < 0)
+ return r;
+
+ if (isempty(filtered_headerspec))
+ filtered_headerdev = strjoin("header=", headerfile);
+ else
+ filtered_headerdev = strjoin(filtered_headerspec, ",header=", headerfile);
+
+ if (!filtered_headerdev)
+ return log_oom();
+ } else
+ filtered_headerdev = TAKE_PTR(filtered_headerspec);
+
+ *ret_filtered_headerdev_options = TAKE_PTR(filtered_headerdev);
+ *ret_headerdev = TAKE_PTR(headerdev);
+
+ return 0;
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ _cleanup_free_ char *uuid = NULL, *uuid_value = NULL;
+ crypto_device *d;
+ int r;
+
+ if (streq(key, "luks")) {
+
+ r = value ? parse_boolean(value) : 1;
+ if (r < 0)
+ log_warning("Failed to parse luks= kernel command line switch %s. Ignoring.", value);
+ else
+ arg_enabled = r;
+
+ } else if (streq(key, "luks.crypttab")) {
+
+ r = value ? parse_boolean(value) : 1;
+ if (r < 0)
+ log_warning("Failed to parse luks.crypttab= kernel command line switch %s. Ignoring.", value);
+ else
+ arg_read_crypttab = r;
+
+ } else if (streq(key, "luks.uuid")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ d = get_crypto_device(startswith(value, "luks-") ?: value);
+ if (!d)
+ return log_oom();
+
+ d->create = arg_allow_list = true;
+
+ } else if (streq(key, "luks.options")) {
+ _cleanup_free_ char *headerdev = NULL, *filtered_headerdev_options = NULL;
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = sscanf(value, "%m[0-9a-fA-F-]=%ms", &uuid, &uuid_value);
+ if (r != 2)
+ return free_and_strdup(&arg_default_options, value) < 0 ? log_oom() : 0;
+
+ if (warn_uuid_invalid(uuid, key))
+ return 0;
+
+ d = get_crypto_device(uuid);
+ if (!d)
+ return log_oom();
+
+ r = filter_header_device(uuid_value, &headerdev, &filtered_headerdev_options);
+ if (r < 0)
+ return r;
+
+ free_and_replace(d->options, filtered_headerdev_options);
+ free_and_replace(d->headerdev, headerdev);
+ } else if (streq(key, "luks.key")) {
+ size_t n;
+ _cleanup_free_ char *keyfile = NULL, *keydev = NULL;
+ const char *keyspec;
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ n = strspn(value, ALPHANUMERICAL "-");
+ if (value[n] != '=') {
+ if (free_and_strdup(&arg_default_keyfile, value) < 0)
+ return log_oom();
+ return 0;
+ }
+
+ uuid = strndup(value, n);
+ if (!uuid)
+ return log_oom();
+
+ if (warn_uuid_invalid(uuid, key))
+ return 0;
+
+ d = get_crypto_device(uuid);
+ if (!d)
+ return log_oom();
+
+ keyspec = value + n + 1;
+ r = split_locationspec(keyspec, &keyfile, &keydev);
+ if (r < 0)
+ return r;
+
+ free_and_replace(d->keyfile, keyfile);
+ free_and_replace(d->keydev, keydev);
+ } else if (streq(key, "luks.data")) {
+ size_t n;
+ _cleanup_free_ char *datadev = NULL;
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ n = strspn(value, ALPHANUMERICAL "-");
+ if (value[n] != '=') {
+ log_warning("Failed to parse luks.data= kernel command line switch. UUID is invalid, ignoring.");
+ return 0;
+ }
+
+ uuid = strndup(value, n);
+ if (!uuid)
+ return log_oom();
+
+ if (warn_uuid_invalid(uuid, key))
+ return 0;
+
+ d = get_crypto_device(uuid);
+ if (!d)
+ return log_oom();
+
+ datadev = fstab_node_to_udev_node(value + n + 1);
+ if (!datadev)
+ return log_oom();
+
+ free_and_replace(d->datadev, datadev);
+ } else if (streq(key, "luks.name")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = sscanf(value, "%m[0-9a-fA-F-]=%ms", &uuid, &uuid_value);
+ if (r == 2) {
+ d = get_crypto_device(uuid);
+ if (!d)
+ return log_oom();
+
+ d->create = arg_allow_list = true;
+
+ free_and_replace(d->name, uuid_value);
+ } else
+ log_warning("Failed to parse luks name switch %s. Ignoring.", value);
+ }
+
+ return 0;
+}
+
+static int add_crypttab_devices(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned crypttab_line = 0;
+ int r;
+
+ if (!arg_read_crypttab)
+ return 0;
+
+ r = fopen_unlocked(arg_crypttab, "re", &f);
+ if (r < 0) {
+ if (errno != ENOENT)
+ log_error_errno(errno, "Failed to open %s: %m", arg_crypttab);
+ return 0;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL, *name = NULL, *device = NULL, *keyspec = NULL, *options = NULL,
+ *keyfile = NULL, *keydev = NULL, *headerdev = NULL, *filtered_header = NULL;
+ crypto_device *d = NULL;
+ char *l, *uuid;
+ int k;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read %s: %m", arg_crypttab);
+ if (r == 0)
+ break;
+
+ crypttab_line++;
+
+ l = strstrip(line);
+ if (IN_SET(l[0], 0, '#'))
+ continue;
+
+ k = sscanf(l, "%ms %ms %ms %ms", &name, &device, &keyspec, &options);
+ if (k < 2 || k > 4) {
+ log_error("Failed to parse %s:%u, ignoring.", arg_crypttab, crypttab_line);
+ continue;
+ }
+
+ uuid = startswith(device, "UUID=");
+ if (!uuid)
+ uuid = path_startswith(device, "/dev/disk/by-uuid/");
+ if (!uuid)
+ uuid = startswith(name, "luks-");
+ if (uuid)
+ d = hashmap_get(arg_disks, uuid);
+
+ if (arg_allow_list && !d) {
+ log_info("Not creating device '%s' because it was not specified on the kernel command line.", name);
+ continue;
+ }
+
+ r = split_locationspec(keyspec, &keyfile, &keydev);
+ if (r < 0)
+ return r;
+
+ if (options && (!d || !d->options)) {
+ r = filter_header_device(options, &headerdev, &filtered_header);
+ if (r < 0)
+ return r;
+ free_and_replace(options, filtered_header);
+ }
+
+ r = create_disk(name,
+ device,
+ keyfile,
+ keydev,
+ (d && d->options) ? d->headerdev : headerdev,
+ (d && d->options) ? d->options : options,
+ arg_crypttab);
+ if (r < 0)
+ return r;
+
+ if (d)
+ d->create = false;
+ }
+
+ return 0;
+}
+
+static int add_proc_cmdline_devices(void) {
+ int r;
+ crypto_device *d;
+
+ HASHMAP_FOREACH(d, arg_disks) {
+ _cleanup_free_ char *device = NULL;
+
+ if (!d->create)
+ continue;
+
+ if (!d->name) {
+ d->name = strjoin("luks-", d->uuid);
+ if (!d->name)
+ return log_oom();
+ }
+
+ device = strjoin("UUID=", d->uuid);
+ if (!device)
+ return log_oom();
+
+ r = create_disk(d->name,
+ d->datadev ?: device,
+ d->keyfile ?: arg_default_keyfile,
+ d->keydev,
+ d->headerdev,
+ d->options ?: arg_default_options,
+ "/proc/cmdline");
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(crypt_device_hash_ops, char, string_hash_func, string_compare_func,
+ crypto_device, crypt_device_free);
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ int r;
+
+ assert_se(arg_dest = dest);
+
+ arg_crypttab = getenv("SYSTEMD_CRYPTTAB") ?: "/etc/crypttab";
+ arg_runtime_directory = getenv("RUNTIME_DIRECTORY") ?: "/run/systemd/cryptsetup";
+
+ arg_disks = hashmap_new(&crypt_device_hash_ops);
+ if (!arg_disks)
+ return log_oom();
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_STRIP_RD_PREFIX);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse kernel command line: %m");
+
+ if (!arg_enabled)
+ return 0;
+
+ r = add_crypttab_devices();
+ if (r < 0)
+ return r;
+
+ r = add_proc_cmdline_devices();
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/cryptsetup/cryptsetup-keyfile.c b/src/cryptsetup/cryptsetup-keyfile.c
new file mode 100644
index 0000000..f849123
--- /dev/null
+++ b/src/cryptsetup/cryptsetup-keyfile.c
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "cryptsetup-keyfile.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "memory-util.h"
+#include "path-util.h"
+#include "stat-util.h"
+#include "strv.h"
+
+#define KEY_FILE_SIZE_MAX (16U*1024U*1024U) /* 16 MiB */
+
+int load_key_file(
+ const char *key_file,
+ char **search_path,
+ size_t key_file_size,
+ uint64_t key_file_offset,
+ void **ret_key,
+ size_t *ret_key_size) {
+
+ _cleanup_(erase_and_freep) char *buffer = NULL;
+ _cleanup_free_ char *discovered_path = NULL;
+ _cleanup_close_ int fd = -1;
+ ssize_t n;
+ int r;
+
+ assert(key_file);
+ assert(ret_key);
+ assert(ret_key_size);
+
+ if (strv_isempty(search_path) || path_is_absolute(key_file)) {
+ fd = open(key_file, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to load key file '%s': %m", key_file);
+ } else {
+ char **i;
+
+ STRV_FOREACH(i, search_path) {
+ _cleanup_free_ char *joined;
+
+ joined = path_join(*i, key_file);
+ if (!joined)
+ return log_oom();
+
+ fd = open(joined, O_RDONLY|O_CLOEXEC);
+ if (fd >= 0) {
+ discovered_path = TAKE_PTR(joined);
+ break;
+ }
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to load key file '%s': %m", joined);
+ }
+
+ if (!discovered_path) {
+ /* Search path supplied, but file not found, report by returning NULL, but not failing */
+ *ret_key = NULL;
+ *ret_key_size = 0;
+ return 0;
+ }
+
+ assert(fd >= 0);
+ key_file = discovered_path;
+ }
+
+ if (key_file_size == 0) {
+ struct stat st;
+
+ if (fstat(fd, &st) < 0)
+ return log_error_errno(errno, "Failed to stat key file '%s': %m", key_file);
+
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ return log_error_errno(r, "Key file is not a regular file: %m");
+
+ if (st.st_size == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Key file is empty, refusing.");
+ if ((uint64_t) st.st_size > KEY_FILE_SIZE_MAX) {
+ char buf1[FORMAT_BYTES_MAX], buf2[FORMAT_BYTES_MAX];
+ return log_error_errno(SYNTHETIC_ERRNO(ERANGE),
+ "Key file larger (%s) than allowed maximum size (%s), refusing.",
+ format_bytes(buf1, sizeof(buf1), st.st_size),
+ format_bytes(buf2, sizeof(buf2), KEY_FILE_SIZE_MAX));
+ }
+
+ if (key_file_offset >= (uint64_t) st.st_size)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Key file offset too large for file, refusing.");
+
+ key_file_size = st.st_size - key_file_offset;
+ }
+
+ buffer = malloc(key_file_size);
+ if (!buffer)
+ return log_oom();
+
+ if (key_file_offset > 0)
+ n = pread(fd, buffer, key_file_size, key_file_offset);
+ else
+ n = read(fd, buffer, key_file_size);
+ if (n < 0)
+ return log_error_errno(errno, "Failed to read key file '%s': %m", key_file);
+ if (n == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Empty encrypted key found, refusing.");
+
+ *ret_key = TAKE_PTR(buffer);
+ *ret_key_size = (size_t) n;
+
+ return 1;
+}
diff --git a/src/cryptsetup/cryptsetup-keyfile.h b/src/cryptsetup/cryptsetup-keyfile.h
new file mode 100644
index 0000000..308f5eb
--- /dev/null
+++ b/src/cryptsetup/cryptsetup-keyfile.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+int load_key_file(
+ const char *key_file,
+ char **search_path,
+ size_t key_file_size,
+ uint64_t key_file_offset,
+ void **ret_key,
+ size_t *ret_key_size);
diff --git a/src/cryptsetup/cryptsetup-pkcs11.c b/src/cryptsetup/cryptsetup-pkcs11.c
new file mode 100644
index 0000000..50db46f
--- /dev/null
+++ b/src/cryptsetup/cryptsetup-pkcs11.c
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include <p11-kit/p11-kit.h>
+#include <p11-kit/uri.h>
+
+#include "alloc-util.h"
+#include "ask-password-api.h"
+#include "cryptsetup-pkcs11.h"
+#include "cryptsetup-keyfile.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "pkcs11-util.h"
+#include "stat-util.h"
+#include "strv.h"
+
+struct pkcs11_callback_data {
+ const char *friendly_name;
+ usec_t until;
+ void *encrypted_key;
+ size_t encrypted_key_size;
+ void *decrypted_key;
+ size_t decrypted_key_size;
+ bool free_encrypted_key;
+};
+
+static void pkcs11_callback_data_release(struct pkcs11_callback_data *data) {
+ free(data->decrypted_key);
+
+ if (data->free_encrypted_key)
+ free(data->encrypted_key);
+}
+
+static int pkcs11_callback(
+ CK_FUNCTION_LIST *m,
+ CK_SESSION_HANDLE session,
+ CK_SLOT_ID slot_id,
+ const CK_SLOT_INFO *slot_info,
+ const CK_TOKEN_INFO *token_info,
+ P11KitUri *uri,
+ void *userdata) {
+
+ struct pkcs11_callback_data *data = userdata;
+ CK_OBJECT_HANDLE object;
+ int r;
+
+ assert(m);
+ assert(slot_info);
+ assert(token_info);
+ assert(uri);
+ assert(data);
+
+ /* Called for every token matching our URI */
+
+ r = pkcs11_token_login(
+ m,
+ session,
+ slot_id,
+ token_info,
+ data->friendly_name,
+ "drive-harddisk",
+ "pkcs11-pin",
+ data->until,
+ NULL);
+ if (r < 0)
+ return r;
+
+ /* We are likely called during early boot, where entropy is scarce. Mix some data from the PKCS#11
+ * token, if it supports that. It should be cheap, given that we already are talking to it anyway and
+ * shouldn't hurt. */
+ (void) pkcs11_token_acquire_rng(m, session);
+
+ r = pkcs11_token_find_private_key(m, session, uri, &object);
+ if (r < 0)
+ return r;
+
+ r = pkcs11_token_decrypt_data(
+ m,
+ session,
+ object,
+ data->encrypted_key,
+ data->encrypted_key_size,
+ &data->decrypted_key,
+ &data->decrypted_key_size);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int decrypt_pkcs11_key(
+ const char *friendly_name,
+ const char *pkcs11_uri,
+ const char *key_file, /* We either expect key_file and associated parameters to be set (for file keys) … */
+ size_t key_file_size,
+ uint64_t key_file_offset,
+ const void *key_data, /* … or key_data and key_data_size (for literal keys) */
+ size_t key_data_size,
+ usec_t until,
+ void **ret_decrypted_key,
+ size_t *ret_decrypted_key_size) {
+
+ _cleanup_(pkcs11_callback_data_release) struct pkcs11_callback_data data = {
+ .friendly_name = friendly_name,
+ .until = until,
+ };
+ int r;
+
+ assert(friendly_name);
+ assert(pkcs11_uri);
+ assert(key_file || key_data);
+ assert(ret_decrypted_key);
+ assert(ret_decrypted_key_size);
+
+ /* The functions called here log about all errors, except for EAGAIN which means "token not found right now" */
+
+ if (key_data) {
+ data.encrypted_key = (void*) key_data;
+ data.encrypted_key_size = key_data_size;
+
+ data.free_encrypted_key = false;
+ } else {
+ r = load_key_file(key_file, NULL, key_file_size, key_file_offset, &data.encrypted_key, &data.encrypted_key_size);
+ if (r < 0)
+ return r;
+
+ data.free_encrypted_key = true;
+ }
+
+ r = pkcs11_find_token(pkcs11_uri, pkcs11_callback, &data);
+ if (r < 0)
+ return r;
+
+ *ret_decrypted_key = TAKE_PTR(data.decrypted_key);
+ *ret_decrypted_key_size = data.decrypted_key_size;
+
+ return 0;
+}
diff --git a/src/cryptsetup/cryptsetup-pkcs11.h b/src/cryptsetup/cryptsetup-pkcs11.h
new file mode 100644
index 0000000..266c8e1
--- /dev/null
+++ b/src/cryptsetup/cryptsetup-pkcs11.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+#include "log.h"
+#include "time-util.h"
+
+#if HAVE_P11KIT
+
+int decrypt_pkcs11_key(
+ const char *friendly_name,
+ const char *pkcs11_uri,
+ const char *key_file,
+ size_t key_file_size,
+ uint64_t key_file_offset,
+ const void *key_data,
+ size_t key_data_size,
+ usec_t until,
+ void **ret_decrypted_key,
+ size_t *ret_decrypted_key_size);
+
+#else
+
+static inline int decrypt_pkcs11_key(
+ const char *friendly_name,
+ const char *pkcs11_uri,
+ const char *key_file,
+ size_t key_file_size,
+ uint64_t key_file_offset,
+ const void *key_data,
+ size_t key_data_size,
+ usec_t until,
+ void **ret_decrypted_key,
+ size_t *ret_decrypted_key_size) {
+
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "PKCS#11 Token support not available.");
+}
+
+#endif
diff --git a/src/cryptsetup/cryptsetup.c b/src/cryptsetup/cryptsetup.c
new file mode 100644
index 0000000..129f5fc
--- /dev/null
+++ b/src/cryptsetup/cryptsetup.c
@@ -0,0 +1,1058 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <mntent.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "ask-password-api.h"
+#include "cryptsetup-keyfile.h"
+#include "cryptsetup-pkcs11.h"
+#include "cryptsetup-util.h"
+#include "device-util.h"
+#include "escape.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "fstab-util.h"
+#include "hexdecoct.h"
+#include "log.h"
+#include "main-func.h"
+#include "memory-util.h"
+#include "mount-util.h"
+#include "nulstr-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pkcs11-util.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "strv.h"
+
+/* internal helper */
+#define ANY_LUKS "LUKS"
+/* as in src/cryptsetup.h */
+#define CRYPT_SECTOR_SIZE 512
+#define CRYPT_MAX_SECTOR_SIZE 4096
+
+static const char *arg_type = NULL; /* ANY_LUKS, CRYPT_LUKS1, CRYPT_LUKS2, CRYPT_TCRYPT, CRYPT_BITLK or CRYPT_PLAIN */
+static char *arg_cipher = NULL;
+static unsigned arg_key_size = 0;
+static unsigned arg_sector_size = CRYPT_SECTOR_SIZE;
+static int arg_key_slot = CRYPT_ANY_SLOT;
+static unsigned arg_keyfile_size = 0;
+static uint64_t arg_keyfile_offset = 0;
+static bool arg_keyfile_erase = false;
+static bool arg_try_empty_password = false;
+static char *arg_hash = NULL;
+static char *arg_header = NULL;
+static unsigned arg_tries = 3;
+static bool arg_readonly = false;
+static bool arg_verify = false;
+static bool arg_discards = false;
+static bool arg_same_cpu_crypt = false;
+static bool arg_submit_from_crypt_cpus = false;
+static bool arg_no_read_workqueue = false;
+static bool arg_no_write_workqueue = false;
+static bool arg_tcrypt_hidden = false;
+static bool arg_tcrypt_system = false;
+static bool arg_tcrypt_veracrypt = false;
+static char **arg_tcrypt_keyfiles = NULL;
+static uint64_t arg_offset = 0;
+static uint64_t arg_skip = 0;
+static usec_t arg_timeout = USEC_INFINITY;
+static char *arg_pkcs11_uri = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_cipher, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_hash, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_header, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_tcrypt_keyfiles, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_pkcs11_uri, freep);
+
+/* Options Debian's crypttab knows we don't:
+
+ check=
+ checkargs=
+ noearly
+ loud
+ quiet
+ keyscript=
+ initramfs
+*/
+
+static int parse_one_option(const char *option) {
+ const char *val;
+ int r;
+
+ assert(option);
+
+ /* Handled outside of this tool */
+ if (STR_IN_SET(option, "noauto", "auto", "nofail", "fail", "_netdev", "keyfile-timeout"))
+ return 0;
+
+ if (startswith(option, "keyfile-timeout="))
+ return 0;
+
+ if ((val = startswith(option, "cipher="))) {
+ r = free_and_strdup(&arg_cipher, val);
+ if (r < 0)
+ return log_oom();
+
+ } else if ((val = startswith(option, "size="))) {
+
+ r = safe_atou(val, &arg_key_size);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse %s, ignoring: %m", option);
+ return 0;
+ }
+
+ if (arg_key_size % 8) {
+ log_error("size= not a multiple of 8, ignoring.");
+ return 0;
+ }
+
+ arg_key_size /= 8;
+
+ } else if ((val = startswith(option, "sector-size="))) {
+
+ r = safe_atou(val, &arg_sector_size);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse %s, ignoring: %m", option);
+ return 0;
+ }
+
+ if (arg_sector_size % 2) {
+ log_error("sector-size= not a multiple of 2, ignoring.");
+ return 0;
+ }
+
+ if (arg_sector_size < CRYPT_SECTOR_SIZE || arg_sector_size > CRYPT_MAX_SECTOR_SIZE) {
+ log_error("sector-size= is outside of %u and %u, ignoring.", CRYPT_SECTOR_SIZE, CRYPT_MAX_SECTOR_SIZE);
+ return 0;
+ }
+
+ } else if ((val = startswith(option, "key-slot=")) ||
+ (val = startswith(option, "keyslot="))) {
+
+ arg_type = ANY_LUKS;
+ r = safe_atoi(val, &arg_key_slot);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse %s, ignoring: %m", option);
+ return 0;
+ }
+
+ } else if ((val = startswith(option, "tcrypt-keyfile="))) {
+
+ arg_type = CRYPT_TCRYPT;
+ if (path_is_absolute(val)) {
+ if (strv_extend(&arg_tcrypt_keyfiles, val) < 0)
+ return log_oom();
+ } else
+ log_error("Key file path \"%s\" is not absolute. Ignoring.", val);
+
+ } else if ((val = startswith(option, "keyfile-size="))) {
+
+ r = safe_atou(val, &arg_keyfile_size);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse %s, ignoring: %m", option);
+ return 0;
+ }
+
+ } else if ((val = startswith(option, "keyfile-offset="))) {
+
+ r = safe_atou64(val, &arg_keyfile_offset);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse %s, ignoring: %m", option);
+ return 0;
+ }
+
+ } else if ((val = startswith(option, "keyfile-erase="))) {
+
+ r = parse_boolean(val);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse %s, ignoring: %m", option);
+ return 0;
+ }
+
+ arg_keyfile_erase = r;
+
+ } else if (streq(option, "keyfile-erase"))
+ arg_keyfile_erase = true;
+
+ else if ((val = startswith(option, "hash="))) {
+ r = free_and_strdup(&arg_hash, val);
+ if (r < 0)
+ return log_oom();
+
+ } else if ((val = startswith(option, "header="))) {
+ arg_type = ANY_LUKS;
+
+ if (!path_is_absolute(val))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Header path \"%s\" is not absolute, refusing.", val);
+
+ if (arg_header)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Duplicate header= option, refusing.");
+
+ arg_header = strdup(val);
+ if (!arg_header)
+ return log_oom();
+
+ } else if ((val = startswith(option, "tries="))) {
+
+ r = safe_atou(val, &arg_tries);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse %s, ignoring: %m", option);
+ return 0;
+ }
+
+ } else if (STR_IN_SET(option, "readonly", "read-only"))
+ arg_readonly = true;
+ else if (streq(option, "verify"))
+ arg_verify = true;
+ else if (STR_IN_SET(option, "allow-discards", "discard"))
+ arg_discards = true;
+ else if (streq(option, "same-cpu-crypt"))
+ arg_same_cpu_crypt = true;
+ else if (streq(option, "submit-from-crypt-cpus"))
+ arg_submit_from_crypt_cpus = true;
+ else if (streq(option, "no-read-workqueue"))
+ arg_no_read_workqueue = true;
+ else if (streq(option, "no-write-workqueue"))
+ arg_no_write_workqueue = true;
+ else if (streq(option, "luks"))
+ arg_type = ANY_LUKS;
+/* since cryptsetup 2.3.0 (Feb 2020) */
+#ifdef CRYPT_BITLK
+ else if (streq(option, "bitlk"))
+ arg_type = CRYPT_BITLK;
+#endif
+ else if (streq(option, "tcrypt"))
+ arg_type = CRYPT_TCRYPT;
+ else if (STR_IN_SET(option, "tcrypt-hidden", "tcrypthidden")) {
+ arg_type = CRYPT_TCRYPT;
+ arg_tcrypt_hidden = true;
+ } else if (streq(option, "tcrypt-system")) {
+ arg_type = CRYPT_TCRYPT;
+ arg_tcrypt_system = true;
+ } else if (STR_IN_SET(option, "tcrypt-veracrypt", "veracrypt")) {
+ arg_type = CRYPT_TCRYPT;
+ arg_tcrypt_veracrypt = true;
+ } else if (STR_IN_SET(option, "plain", "swap", "tmp") ||
+ startswith(option, "tmp="))
+ arg_type = CRYPT_PLAIN;
+ else if ((val = startswith(option, "timeout="))) {
+
+ r = parse_sec_fix_0(val, &arg_timeout);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse %s, ignoring: %m", option);
+ return 0;
+ }
+
+ } else if ((val = startswith(option, "offset="))) {
+
+ r = safe_atou64(val, &arg_offset);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s: %m", option);
+
+ } else if ((val = startswith(option, "skip="))) {
+
+ r = safe_atou64(val, &arg_skip);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s: %m", option);
+
+ } else if ((val = startswith(option, "pkcs11-uri="))) {
+
+ if (!pkcs11_uri_valid(val))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "pkcs11-uri= parameter expects a PKCS#11 URI, refusing");
+
+ r = free_and_strdup(&arg_pkcs11_uri, val);
+ if (r < 0)
+ return log_oom();
+
+ } else if ((val = startswith(option, "try-empty-password="))) {
+
+ r = parse_boolean(val);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse %s, ignoring: %m", option);
+ return 0;
+ }
+
+ arg_try_empty_password = r;
+
+ } else if (streq(option, "try-empty-password"))
+ arg_try_empty_password = true;
+
+ else if (!streq(option, "x-initrd.attach"))
+ log_warning("Encountered unknown /etc/crypttab option '%s', ignoring.", option);
+
+ return 0;
+}
+
+static int parse_options(const char *options) {
+ assert(options);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ int r;
+
+ r = extract_first_word(&options, &word, ",", EXTRACT_DONT_COALESCE_SEPARATORS | EXTRACT_UNESCAPE_SEPARATORS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse options: %m");
+ if (r == 0)
+ break;
+
+ r = parse_one_option(word);
+ if (r < 0)
+ return r;
+ }
+
+ /* sanity-check options */
+ if (arg_type && !streq(arg_type, CRYPT_PLAIN)) {
+ if (arg_offset != 0)
+ log_warning("offset= ignored with type %s", arg_type);
+ if (arg_skip != 0)
+ log_warning("skip= ignored with type %s", arg_type);
+ }
+
+ return 0;
+}
+
+static char* disk_description(const char *path) {
+ static const char name_fields[] =
+ "ID_PART_ENTRY_NAME\0"
+ "DM_NAME\0"
+ "ID_MODEL_FROM_DATABASE\0"
+ "ID_MODEL\0";
+
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ const char *i, *name;
+ struct stat st;
+
+ assert(path);
+
+ if (stat(path, &st) < 0)
+ return NULL;
+
+ if (!S_ISBLK(st.st_mode))
+ return NULL;
+
+ if (sd_device_new_from_devnum(&device, 'b', st.st_rdev) < 0)
+ return NULL;
+
+ NULSTR_FOREACH(i, name_fields)
+ if (sd_device_get_property_value(device, i, &name) >= 0 &&
+ !isempty(name))
+ return strdup(name);
+
+ return NULL;
+}
+
+static char *disk_mount_point(const char *label) {
+ _cleanup_free_ char *device = NULL;
+ _cleanup_endmntent_ FILE *f = NULL;
+ struct mntent *m;
+
+ /* Yeah, we don't support native systemd unit files here for now */
+
+ device = strjoin("/dev/mapper/", label);
+ if (!device)
+ return NULL;
+
+ f = setmntent(fstab_path(), "re");
+ if (!f)
+ return NULL;
+
+ while ((m = getmntent(f)))
+ if (path_equal(m->mnt_fsname, device))
+ return strdup(m->mnt_dir);
+
+ return NULL;
+}
+
+static char *friendly_disk_name(const char *src, const char *vol) {
+ _cleanup_free_ char *description = NULL, *mount_point = NULL;
+ char *name_buffer = NULL;
+ int r;
+
+ assert(src);
+ assert(vol);
+
+ description = disk_description(src);
+ mount_point = disk_mount_point(vol);
+
+ /* If the description string is simply the volume name, then let's not show this twice */
+ if (description && streq(vol, description))
+ description = mfree(description);
+
+ if (mount_point && description)
+ r = asprintf(&name_buffer, "%s (%s) on %s", description, vol, mount_point);
+ else if (mount_point)
+ r = asprintf(&name_buffer, "%s on %s", vol, mount_point);
+ else if (description)
+ r = asprintf(&name_buffer, "%s (%s)", description, vol);
+ else
+ return strdup(vol);
+ if (r < 0)
+ return NULL;
+
+ return name_buffer;
+}
+
+static int get_password(
+ const char *vol,
+ const char *src,
+ usec_t until,
+ bool accept_cached,
+ char ***ret) {
+
+ _cleanup_free_ char *friendly = NULL, *text = NULL, *disk_path = NULL;
+ _cleanup_strv_free_erase_ char **passwords = NULL;
+ char **p, *id;
+ int r = 0;
+
+ assert(vol);
+ assert(src);
+ assert(ret);
+
+ friendly = friendly_disk_name(src, vol);
+ if (!friendly)
+ return log_oom();
+
+ if (asprintf(&text, "Please enter passphrase for disk %s:", friendly) < 0)
+ return log_oom();
+
+ disk_path = cescape(src);
+ if (!disk_path)
+ return log_oom();
+
+ id = strjoina("cryptsetup:", disk_path);
+
+ r = ask_password_auto(text, "drive-harddisk", id, "cryptsetup", until,
+ ASK_PASSWORD_PUSH_CACHE | (accept_cached*ASK_PASSWORD_ACCEPT_CACHED),
+ &passwords);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query password: %m");
+
+ if (arg_verify) {
+ _cleanup_strv_free_erase_ char **passwords2 = NULL;
+
+ assert(strv_length(passwords) == 1);
+
+ if (asprintf(&text, "Please enter passphrase for disk %s (verification):", friendly) < 0)
+ return log_oom();
+
+ id = strjoina("cryptsetup-verification:", disk_path);
+
+ r = ask_password_auto(text, "drive-harddisk", id, "cryptsetup", until, ASK_PASSWORD_PUSH_CACHE, &passwords2);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query verification password: %m");
+
+ assert(strv_length(passwords2) == 1);
+
+ if (!streq(passwords[0], passwords2[0]))
+ return log_warning_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "Passwords did not match, retrying.");
+ }
+
+ strv_uniq(passwords);
+
+ STRV_FOREACH(p, passwords) {
+ char *c;
+
+ if (strlen(*p)+1 >= arg_key_size)
+ continue;
+
+ /* Pad password if necessary */
+ c = new(char, arg_key_size);
+ if (!c)
+ return log_oom();
+
+ strncpy(c, *p, arg_key_size);
+ free_and_replace(*p, c);
+ }
+
+ *ret = TAKE_PTR(passwords);
+
+ return 0;
+}
+
+static int attach_tcrypt(
+ struct crypt_device *cd,
+ const char *name,
+ const char *key_file,
+ const void *key_data,
+ size_t key_data_size,
+ char **passwords,
+ uint32_t flags) {
+
+ int r = 0;
+ _cleanup_free_ char *passphrase = NULL;
+ struct crypt_params_tcrypt params = {
+ .flags = CRYPT_TCRYPT_LEGACY_MODES,
+ .keyfiles = (const char **)arg_tcrypt_keyfiles,
+ .keyfiles_count = strv_length(arg_tcrypt_keyfiles)
+ };
+
+ assert(cd);
+ assert(name);
+ assert(key_file || key_data || !strv_isempty(passwords));
+
+ if (arg_pkcs11_uri)
+ /* Ask for a regular password */
+ return log_error_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "Sorry, but tcrypt devices are currently not supported in conjunction with pkcs11 support.");
+
+ if (arg_tcrypt_hidden)
+ params.flags |= CRYPT_TCRYPT_HIDDEN_HEADER;
+
+ if (arg_tcrypt_system)
+ params.flags |= CRYPT_TCRYPT_SYSTEM_HEADER;
+
+ if (arg_tcrypt_veracrypt)
+ params.flags |= CRYPT_TCRYPT_VERA_MODES;
+
+ if (key_data) {
+ params.passphrase = key_data;
+ params.passphrase_size = key_data_size;
+ } else {
+ if (key_file) {
+ r = read_one_line_file(key_file, &passphrase);
+ if (r < 0) {
+ log_error_errno(r, "Failed to read password file '%s': %m", key_file);
+ return -EAGAIN; /* log with the actual error, but return EAGAIN */
+ }
+
+ params.passphrase = passphrase;
+ } else
+ params.passphrase = passwords[0];
+
+ params.passphrase_size = strlen(params.passphrase);
+ }
+
+ r = crypt_load(cd, CRYPT_TCRYPT, &params);
+ if (r < 0) {
+ if (r == -EPERM) {
+ if (key_data)
+ log_error_errno(r, "Failed to activate using discovered key. (Key not correct?)");
+
+ if (key_file)
+ log_error_errno(r, "Failed to activate using password file '%s'. (Key data not correct?)", key_file);
+
+ return -EAGAIN; /* log the actual error, but return EAGAIN */
+ }
+
+ return log_error_errno(r, "Failed to load tcrypt superblock on device %s: %m", crypt_get_device_name(cd));
+ }
+
+ r = crypt_activate_by_volume_key(cd, name, NULL, 0, flags);
+ if (r < 0)
+ return log_error_errno(r, "Failed to activate tcrypt device %s: %m", crypt_get_device_name(cd));
+
+ return 0;
+}
+
+static int attach_luks_or_plain_or_bitlk(
+ struct crypt_device *cd,
+ const char *name,
+ const char *key_file,
+ const void *key_data,
+ size_t key_data_size,
+ char **passwords,
+ uint32_t flags,
+ usec_t until) {
+
+ int r = 0;
+ bool pass_volume_key = false;
+
+ assert(cd);
+ assert(name);
+
+ if ((!arg_type && !crypt_get_type(cd)) || streq_ptr(arg_type, CRYPT_PLAIN)) {
+ struct crypt_params_plain params = {
+ .offset = arg_offset,
+ .skip = arg_skip,
+ .sector_size = arg_sector_size,
+ };
+ const char *cipher, *cipher_mode;
+ _cleanup_free_ char *truncated_cipher = NULL;
+
+ if (arg_hash) {
+ /* plain isn't a real hash type. it just means "use no hash" */
+ if (!streq(arg_hash, "plain"))
+ params.hash = arg_hash;
+ } else if (!key_file)
+ /* for CRYPT_PLAIN, the behaviour of cryptsetup
+ * package is to not hash when a key file is provided */
+ params.hash = "ripemd160";
+
+ if (arg_cipher) {
+ size_t l;
+
+ l = strcspn(arg_cipher, "-");
+ truncated_cipher = strndup(arg_cipher, l);
+ if (!truncated_cipher)
+ return log_oom();
+
+ cipher = truncated_cipher;
+ cipher_mode = arg_cipher[l] ? arg_cipher+l+1 : "plain";
+ } else {
+ cipher = "aes";
+ cipher_mode = "cbc-essiv:sha256";
+ }
+
+ /* for CRYPT_PLAIN limit reads from keyfile to key length, and ignore keyfile-size */
+ arg_keyfile_size = arg_key_size;
+
+ /* In contrast to what the name crypt_format() might suggest this doesn't actually format
+ * anything, it just configures encryption parameters when used for plain mode. */
+ r = crypt_format(cd, CRYPT_PLAIN, cipher, cipher_mode, NULL, NULL, arg_keyfile_size, &params);
+ if (r < 0)
+ return log_error_errno(r, "Loading of cryptographic parameters failed: %m");
+
+ /* hash == NULL implies the user passed "plain" */
+ pass_volume_key = (params.hash == NULL);
+ }
+
+ log_info("Set cipher %s, mode %s, key size %i bits for device %s.",
+ crypt_get_cipher(cd),
+ crypt_get_cipher_mode(cd),
+ crypt_get_volume_key_size(cd)*8,
+ crypt_get_device_name(cd));
+
+ if (arg_pkcs11_uri) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_free_ void *decrypted_key = NULL;
+ _cleanup_free_ char *friendly = NULL;
+ size_t decrypted_key_size = 0;
+
+ if (!key_file && !key_data)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "PKCS#11 mode selected but no key file specified, refusing.");
+
+ friendly = friendly_disk_name(crypt_get_device_name(cd), name);
+ if (!friendly)
+ return log_oom();
+
+ for (;;) {
+ bool processed = false;
+
+ r = decrypt_pkcs11_key(
+ friendly,
+ arg_pkcs11_uri,
+ key_file, arg_keyfile_size, arg_keyfile_offset,
+ key_data, key_data_size,
+ until,
+ &decrypted_key, &decrypted_key_size);
+ if (r >= 0)
+ break;
+ if (r != -EAGAIN) /* EAGAIN means: token not found */
+ return r;
+
+ if (!monitor) {
+ /* We didn't find the token. In this case, watch for it via udev. Let's
+ * create an event loop and monitor first. */
+
+ assert(!event);
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ r = sd_device_monitor_new(&monitor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate device monitor: %m");
+
+ r = sd_device_monitor_filter_add_match_tag(monitor, "security-device");
+ if (r < 0)
+ return log_error_errno(r, "Failed to configure device monitor: %m");
+
+ r = sd_device_monitor_attach_event(monitor, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach device monitor: %m");
+
+ r = sd_device_monitor_start(monitor, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start device monitor: %m");
+
+ log_notice("Security token %s not present for unlocking volume %s, please plug it in.",
+ arg_pkcs11_uri, friendly);
+
+ /* Let's immediately rescan in case the token appeared in the time we needed
+ * to create and configure the monitor */
+ continue;
+ }
+
+ for (;;) {
+ /* Wait for one event, and then eat all subsequent events until there are no
+ * further ones */
+ r = sd_event_run(event, processed ? 0 : UINT64_MAX);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+ if (r == 0)
+ break;
+
+ processed = true;
+ }
+
+ log_debug("Got one or more potentially relevant udev events, rescanning PKCS#11...");
+ }
+
+ if (pass_volume_key)
+ r = crypt_activate_by_volume_key(cd, name, decrypted_key, decrypted_key_size, flags);
+ else {
+ _cleanup_free_ char *base64_encoded = NULL;
+
+ /* Before using this key as passphrase we base64 encode it. Why? For compatibility
+ * with homed's PKCS#11 hookup: there we want to use the key we acquired through
+ * PKCS#11 for other authentication/decryption mechanisms too, and some of them do
+ * not not take arbitrary binary blobs, but require NUL-terminated strings — most
+ * importantly UNIX password hashes. Hence, for compatibility we want to use a string
+ * without embedded NUL here too, and that's easiest to generate from a binary blob
+ * via base64 encoding. */
+
+ r = base64mem(decrypted_key, decrypted_key_size, &base64_encoded);
+ if (r < 0)
+ return log_oom();
+
+ r = crypt_activate_by_passphrase(cd, name, arg_key_slot, base64_encoded, strlen(base64_encoded), flags);
+ }
+ if (r == -EPERM) {
+ log_error_errno(r, "Failed to activate with PKCS#11 decrypted key. (Key incorrect?)");
+ return -EAGAIN; /* log actual error, but return EAGAIN */
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to activate with PKCS#11 acquired key: %m");
+
+ } else if (key_data) {
+ if (pass_volume_key)
+ r = crypt_activate_by_volume_key(cd, name, key_data, key_data_size, flags);
+ else
+ r = crypt_activate_by_passphrase(cd, name, arg_key_slot, key_data, key_data_size, flags);
+ if (r == -EPERM) {
+ log_error_errno(r, "Failed to activate. (Key incorrect?)");
+ return -EAGAIN; /* Log actual error, but return EAGAIN */
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to activate: %m");
+
+ } else if (key_file) {
+ r = crypt_activate_by_keyfile_device_offset(cd, name, arg_key_slot, key_file, arg_keyfile_size, arg_keyfile_offset, flags);
+ if (r == -EPERM) {
+ log_error_errno(r, "Failed to activate with key file '%s'. (Key data incorrect?)", key_file);
+ return -EAGAIN; /* Log actual error, but return EAGAIN */
+ }
+ if (r == -EINVAL) {
+ log_error_errno(r, "Failed to activate with key file '%s'. (Key file missing?)", key_file);
+ return -EAGAIN; /* Log actual error, but return EAGAIN */
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to activate with key file '%s': %m", key_file);
+
+ } else {
+ char **p;
+
+ r = -EINVAL;
+ STRV_FOREACH(p, passwords) {
+ if (pass_volume_key)
+ r = crypt_activate_by_volume_key(cd, name, *p, arg_key_size, flags);
+ else
+ r = crypt_activate_by_passphrase(cd, name, arg_key_slot, *p, strlen(*p), flags);
+ if (r >= 0)
+ break;
+ }
+ if (r == -EPERM) {
+ log_error_errno(r, "Failed to activate with specified passphrase. (Passphrase incorrect?)");
+ return -EAGAIN; /* log actual error, but return EAGAIN */
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to activate with specified passphrase: %m");
+ }
+
+ return r;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-cryptsetup@.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s attach VOLUME SOURCEDEVICE [PASSWORD] [OPTIONS]\n"
+ "%s detach VOLUME\n\n"
+ "Attaches or detaches an encrypted block device.\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static uint32_t determine_flags(void) {
+ uint32_t flags = 0;
+
+ if (arg_readonly)
+ flags |= CRYPT_ACTIVATE_READONLY;
+
+ if (arg_discards)
+ flags |= CRYPT_ACTIVATE_ALLOW_DISCARDS;
+
+ if (arg_same_cpu_crypt)
+ flags |= CRYPT_ACTIVATE_SAME_CPU_CRYPT;
+
+ if (arg_submit_from_crypt_cpus)
+ flags |= CRYPT_ACTIVATE_SUBMIT_FROM_CRYPT_CPUS;
+
+ if (arg_no_read_workqueue)
+ flags |= CRYPT_ACTIVATE_NO_READ_WORKQUEUE;
+
+ if (arg_no_write_workqueue)
+ flags |= CRYPT_ACTIVATE_NO_WRITE_WORKQUEUE;
+
+#ifdef CRYPT_ACTIVATE_SERIALIZE_MEMORY_HARD_PBKDF
+ /* Try to decrease the risk of OOM event if memory hard key derivation function is in use */
+ /* https://gitlab.com/cryptsetup/cryptsetup/issues/446/ */
+ flags |= CRYPT_ACTIVATE_SERIALIZE_MEMORY_HARD_PBKDF;
+#endif
+
+ return flags;
+}
+
+static void remove_and_erasep(const char **p) {
+ int r;
+
+ if (!*p)
+ return;
+
+ r = unlinkat_deallocate(AT_FDCWD, *p, UNLINK_ERASE);
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Unable to erase key file '%s', ignoring: %m", *p);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
+ int r;
+
+ if (argc <= 1)
+ return help();
+
+ if (argc < 3)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program requires at least two arguments.");
+
+ log_setup_service();
+
+ cryptsetup_enable_logging(cd);
+
+ umask(0022);
+
+ if (streq(argv[1], "attach")) {
+ uint32_t flags = 0;
+ unsigned tries;
+ usec_t until;
+ crypt_status_info status;
+ _cleanup_(remove_and_erasep) const char *destroy_key_file = NULL;
+ const char *key_file = NULL;
+ _cleanup_(erase_and_freep) void *key_data = NULL;
+ size_t key_data_size = 0;
+
+ /* Arguments: systemd-cryptsetup attach VOLUME SOURCE-DEVICE [PASSWORD] [OPTIONS] */
+
+ if (argc < 4)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "attach requires at least two arguments.");
+
+ if (!filename_is_valid(argv[2]))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Volume name '%s' is not valid.", argv[2]);
+
+ if (argc >= 5 && !STR_IN_SET(argv[4], "", "-", "none")) {
+ if (path_is_absolute(argv[4]))
+ key_file = argv[4];
+ else
+ log_warning("Password file path '%s' is not absolute. Ignoring.", argv[4]);
+ }
+
+ if (argc >= 6 && !STR_IN_SET(argv[5], "", "-", "none")) {
+ r = parse_options(argv[5]);
+ if (r < 0)
+ return r;
+ }
+
+ log_debug("%s %s ← %s type=%s cipher=%s", __func__,
+ argv[2], argv[3], strempty(arg_type), strempty(arg_cipher));
+
+ /* A delicious drop of snake oil */
+ (void) mlockall(MCL_FUTURE);
+
+ if (!key_file) {
+ const char *fn;
+
+ /* If a key file is not explicitly specified, search for a key in a well defined
+ * search path, and load it. */
+
+ fn = strjoina(argv[2], ".key");
+ r = load_key_file(fn,
+ STRV_MAKE("/etc/cryptsetup-keys.d", "/run/cryptsetup-keys.d"),
+ 0, 0, /* Note we leave arg_keyfile_offset/arg_keyfile_size as something that only applies to arg_keyfile! */
+ &key_data, &key_data_size);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ log_debug("Automatically discovered key for volume '%s'.", argv[2]);
+ } else if (arg_keyfile_erase)
+ destroy_key_file = key_file; /* let's get this baby erased when we leave */
+
+ if (arg_header) {
+ log_debug("LUKS header: %s", arg_header);
+ r = crypt_init(&cd, arg_header);
+ } else
+ r = crypt_init(&cd, argv[3]);
+ if (r < 0)
+ return log_error_errno(r, "crypt_init() failed: %m");
+
+ cryptsetup_enable_logging(cd);
+
+ status = crypt_status(cd, argv[2]);
+ if (IN_SET(status, CRYPT_ACTIVE, CRYPT_BUSY)) {
+ log_info("Volume %s already active.", argv[2]);
+ return 0;
+ }
+
+ flags = determine_flags();
+
+ if (arg_timeout == USEC_INFINITY)
+ until = 0;
+ else
+ until = now(CLOCK_MONOTONIC) + arg_timeout;
+
+ arg_key_size = (arg_key_size > 0 ? arg_key_size : (256 / 8));
+
+ if (key_file) {
+ struct stat st;
+
+ /* Ideally we'd do this on the open fd, but since this is just a
+ * warning it's OK to do this in two steps. */
+ if (stat(key_file, &st) >= 0 && S_ISREG(st.st_mode) && (st.st_mode & 0005))
+ log_warning("Key file %s is world-readable. This is not a good idea!", key_file);
+ }
+
+ if (!arg_type || STR_IN_SET(arg_type, ANY_LUKS, CRYPT_LUKS1, CRYPT_LUKS2)) {
+ r = crypt_load(cd, !arg_type || streq(arg_type, ANY_LUKS) ? CRYPT_LUKS : arg_type, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to load LUKS superblock on device %s: %m", crypt_get_device_name(cd));
+
+ if (arg_header) {
+ r = crypt_set_data_device(cd, argv[3]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set LUKS data device %s: %m", argv[3]);
+ }
+
+ /* Tokens are available in LUKS2 only, but it is ok to call (and fail) with LUKS1. */
+ if (!key_file && !key_data) {
+ r = crypt_activate_by_token(cd, argv[2], CRYPT_ANY_TOKEN, NULL, flags);
+ if (r >= 0) {
+ log_debug("Volume %s activated with LUKS token id %i.", argv[2], r);
+ return 0;
+ }
+
+ log_debug_errno(r, "Token activation unsuccessful for device %s: %m", crypt_get_device_name(cd));
+ }
+ }
+
+/* since cryptsetup 2.3.0 (Feb 2020) */
+#ifdef CRYPT_BITLK
+ if (streq_ptr(arg_type, CRYPT_BITLK)) {
+ r = crypt_load(cd, CRYPT_BITLK, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to load Bitlocker superblock on device %s: %m", crypt_get_device_name(cd));
+ }
+#endif
+
+ for (tries = 0; arg_tries == 0 || tries < arg_tries; tries++) {
+ _cleanup_strv_free_erase_ char **passwords = NULL;
+
+ /* When we were able to acquire multiple keys, let's always process them in this order:
+ *
+ * 1. A key acquired via PKCS#11 token
+ * 2. The discovered key: i.e. key_data + key_data_size
+ * 3. The configured key: i.e. key_file + arg_keyfile_offset + arg_keyfile_size
+ * 4. The empty password, in case arg_try_empty_password is set
+ * 5. We enquire the user for a password
+ */
+
+ if (!key_file && !key_data && !arg_pkcs11_uri) {
+
+ if (arg_try_empty_password) {
+ /* Hmm, let's try an empty password now, but only once */
+ arg_try_empty_password = false;
+
+ key_data = strdup("");
+ if (!key_data)
+ return log_oom();
+
+ key_data_size = 0;
+ } else {
+ /* Ask the user for a passphrase only as last resort, if we have
+ * nothing else to check for */
+
+ r = get_password(argv[2], argv[3], until, tries == 0 && !arg_verify, &passwords);
+ if (r == -EAGAIN)
+ continue;
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (streq_ptr(arg_type, CRYPT_TCRYPT))
+ r = attach_tcrypt(cd, argv[2], key_file, key_data, key_data_size, passwords, flags);
+ else
+ r = attach_luks_or_plain_or_bitlk(cd, argv[2], key_file, key_data, key_data_size, passwords, flags, until);
+ if (r >= 0)
+ break;
+ if (r != -EAGAIN)
+ return r;
+
+ /* Key not correct? Let's try again! */
+
+ key_file = NULL;
+ key_data = erase_and_free(key_data);
+ key_data_size = 0;
+ arg_pkcs11_uri = mfree(arg_pkcs11_uri);
+ }
+
+ if (arg_tries != 0 && tries >= arg_tries)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Too many attempts to activate; giving up.");
+
+ } else if (streq(argv[1], "detach")) {
+
+ if (!filename_is_valid(argv[2]))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Volume name '%s' is not valid.", argv[2]);
+
+ r = crypt_init_by_name(&cd, argv[2]);
+ if (r == -ENODEV) {
+ log_info("Volume %s already inactive.", argv[2]);
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "crypt_init_by_name() failed: %m");
+
+ cryptsetup_enable_logging(cd);
+
+ r = crypt_deactivate(cd, argv[2]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to deactivate: %m");
+
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown verb %s.", argv[1]);
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/debug-generator/debug-generator.c b/src/debug-generator/debug-generator.c
new file mode 100644
index 0000000..c6e4d79
--- /dev/null
+++ b/src/debug-generator/debug-generator.c
@@ -0,0 +1,198 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dropin.h"
+#include "generator.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "special.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-file.h"
+#include "unit-name.h"
+
+static const char *arg_dest = NULL;
+static char *arg_default_unit = NULL;
+static char **arg_mask = NULL;
+static char **arg_wants = NULL;
+static char *arg_debug_shell = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_default_unit, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_mask, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_wants, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_debug_shell, freep);
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ int r;
+
+ assert(key);
+
+ if (streq(key, "systemd.mask")) {
+ char *n;
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = unit_name_mangle(value, UNIT_NAME_MANGLE_WARN, &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to glob unit name: %m");
+
+ r = strv_consume(&arg_mask, n);
+ if (r < 0)
+ return log_oom();
+
+ } else if (streq(key, "systemd.wants")) {
+ char *n;
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = unit_name_mangle(value, UNIT_NAME_MANGLE_WARN, &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to glob unit name: %m");
+
+ r = strv_consume(&arg_wants, n);
+ if (r < 0)
+ return log_oom();
+
+ } else if (proc_cmdline_key_streq(key, "systemd.debug_shell")) {
+ const char *t = NULL;
+
+ r = value ? parse_boolean(value) : 1;
+ if (r < 0)
+ t = skip_dev_prefix(value);
+ else if (r > 0)
+ t = skip_dev_prefix(DEBUGTTY);
+
+ if (free_and_strdup(&arg_debug_shell, t) < 0)
+ return log_oom();
+
+ } else if (streq(key, "systemd.unit")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = free_and_strdup(&arg_default_unit, value);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set default unit %s: %m", value);
+
+ } else if (!value) {
+ const char *target;
+
+ target = runlevel_to_target(key);
+ if (target) {
+ r = free_and_strdup(&arg_default_unit, target);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set default unit %s: %m", target);
+ }
+ }
+
+ return 0;
+}
+
+static int generate_mask_symlinks(void) {
+ char **u;
+ int r = 0;
+
+ if (strv_isempty(arg_mask))
+ return 0;
+
+ STRV_FOREACH(u, arg_mask) {
+ _cleanup_free_ char *p = NULL;
+
+ p = path_join(empty_to_root(arg_dest), *u);
+ if (!p)
+ return log_oom();
+
+ if (symlink("/dev/null", p) < 0)
+ r = log_error_errno(errno,
+ "Failed to create mask symlink %s: %m",
+ p);
+ }
+
+ return r;
+}
+
+static int generate_wants_symlinks(void) {
+ char **u;
+ int r = 0;
+
+ if (strv_isempty(arg_wants))
+ return 0;
+
+ STRV_FOREACH(u, arg_wants) {
+ _cleanup_free_ char *p = NULL, *f = NULL;
+ const char *target;
+
+ /* This should match what do_queue_default_job() in core/main.c does. */
+ if (arg_default_unit)
+ target = arg_default_unit;
+ else if (in_initrd())
+ target = SPECIAL_INITRD_TARGET;
+ else
+ target = SPECIAL_DEFAULT_TARGET;
+
+ p = strjoin(arg_dest, "/", target, ".wants/", *u);
+ if (!p)
+ return log_oom();
+
+ f = path_join(SYSTEM_DATA_UNIT_PATH, *u);
+ if (!f)
+ return log_oom();
+
+ mkdir_parents_label(p, 0755);
+
+ if (symlink(f, p) < 0)
+ r = log_error_errno(errno,
+ "Failed to create wants symlink %s: %m",
+ p);
+ }
+
+ return r;
+}
+
+static void install_debug_shell_dropin(const char *dir) {
+ int r;
+
+ if (streq(arg_debug_shell, skip_dev_prefix(DEBUGTTY)))
+ return;
+
+ r = write_drop_in_format(dir, "debug-shell.service", 50, "tty",
+ "[Unit]\n"
+ "Description=Early root shell on /dev/%s FOR DEBUGGING ONLY\n"
+ "ConditionPathExists=\n"
+ "[Service]\n"
+ "TTYPath=/dev/%s",
+ arg_debug_shell, arg_debug_shell);
+ if (r < 0)
+ log_warning_errno(r, "Failed to write drop-in for debug-shell.service, ignoring: %m");
+}
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ int r, q;
+
+ assert_se(arg_dest = dest_early);
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_RD_STRICT | PROC_CMDLINE_STRIP_RD_PREFIX);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ if (arg_debug_shell) {
+ r = strv_extend(&arg_wants, "debug-shell.service");
+ if (r < 0)
+ return log_oom();
+
+ install_debug_shell_dropin(arg_dest);
+ }
+
+ r = generate_mask_symlinks();
+ q = generate_wants_symlinks();
+
+ return r < 0 ? r : q;
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/delta/delta.c b/src/delta/delta.c
new file mode 100644
index 0000000..4295abd
--- /dev/null
+++ b/src/delta/delta.c
@@ -0,0 +1,689 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "locale-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "nulstr-util.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+
+static const char prefixes[] =
+ "/etc\0"
+ "/run\0"
+ "/usr/local/lib\0"
+ "/usr/local/share\0"
+ "/usr/lib\0"
+ "/usr/share\0"
+#if HAVE_SPLIT_USR
+ "/lib\0"
+#endif
+ ;
+
+static const char suffixes[] =
+ "sysctl.d\0"
+ "tmpfiles.d\0"
+ "modules-load.d\0"
+ "binfmt.d\0"
+ "systemd/system\0"
+ "systemd/user\0"
+ "systemd/system-preset\0"
+ "systemd/user-preset\0"
+ "udev/rules.d\0"
+ "modprobe.d\0";
+
+static const char have_dropins[] =
+ "systemd/system\0"
+ "systemd/user\0";
+
+static PagerFlags arg_pager_flags = 0;
+static int arg_diff = -1;
+
+static enum {
+ SHOW_MASKED = 1 << 0,
+ SHOW_EQUIVALENT = 1 << 1,
+ SHOW_REDIRECTED = 1 << 2,
+ SHOW_OVERRIDDEN = 1 << 3,
+ SHOW_UNCHANGED = 1 << 4,
+ SHOW_EXTENDED = 1 << 5,
+
+ SHOW_DEFAULTS =
+ (SHOW_MASKED | SHOW_EQUIVALENT | SHOW_REDIRECTED | SHOW_OVERRIDDEN | SHOW_EXTENDED)
+} arg_flags = 0;
+
+static int equivalent(const char *a, const char *b) {
+ _cleanup_free_ char *x = NULL, *y = NULL;
+ int r;
+
+ r = chase_symlinks(a, NULL, CHASE_TRAIL_SLASH, &x, NULL);
+ if (r < 0)
+ return r;
+
+ r = chase_symlinks(b, NULL, CHASE_TRAIL_SLASH, &y, NULL);
+ if (r < 0)
+ return r;
+
+ return path_equal(x, y);
+}
+
+static int notify_override_masked(const char *top, const char *bottom) {
+ if (!(arg_flags & SHOW_MASKED))
+ return 0;
+
+ printf("%s%s%s %s %s %s\n",
+ ansi_highlight_red(), "[MASKED]", ansi_normal(),
+ top, special_glyph(SPECIAL_GLYPH_ARROW), bottom);
+ return 1;
+}
+
+static int notify_override_equivalent(const char *top, const char *bottom) {
+ if (!(arg_flags & SHOW_EQUIVALENT))
+ return 0;
+
+ printf("%s%s%s %s %s %s\n",
+ ansi_highlight_green(), "[EQUIVALENT]", ansi_normal(),
+ top, special_glyph(SPECIAL_GLYPH_ARROW), bottom);
+ return 1;
+}
+
+static int notify_override_redirected(const char *top, const char *bottom) {
+ if (!(arg_flags & SHOW_REDIRECTED))
+ return 0;
+
+ printf("%s%s%s %s %s %s\n",
+ ansi_highlight(), "[REDIRECTED]", ansi_normal(),
+ top, special_glyph(SPECIAL_GLYPH_ARROW), bottom);
+ return 1;
+}
+
+static int notify_override_overridden(const char *top, const char *bottom) {
+ if (!(arg_flags & SHOW_OVERRIDDEN))
+ return 0;
+
+ printf("%s%s%s %s %s %s\n",
+ ansi_highlight(), "[OVERRIDDEN]", ansi_normal(),
+ top, special_glyph(SPECIAL_GLYPH_ARROW), bottom);
+ return 1;
+}
+
+static int notify_override_extended(const char *top, const char *bottom) {
+ if (!(arg_flags & SHOW_EXTENDED))
+ return 0;
+
+ printf("%s%s%s %s %s %s\n",
+ ansi_highlight(), "[EXTENDED]", ansi_normal(),
+ top, special_glyph(SPECIAL_GLYPH_ARROW), bottom);
+ return 1;
+}
+
+static int notify_override_unchanged(const char *f) {
+ if (!(arg_flags & SHOW_UNCHANGED))
+ return 0;
+
+ printf("[UNCHANGED] %s\n", f);
+ return 1;
+}
+
+static int found_override(const char *top, const char *bottom) {
+ _cleanup_free_ char *dest = NULL;
+ pid_t pid;
+ int r;
+
+ assert(top);
+ assert(bottom);
+
+ if (null_or_empty_path(top) > 0)
+ return notify_override_masked(top, bottom);
+
+ r = readlink_malloc(top, &dest);
+ if (r >= 0) {
+ if (equivalent(dest, bottom) > 0)
+ return notify_override_equivalent(top, bottom);
+ else
+ return notify_override_redirected(top, bottom);
+ }
+
+ r = notify_override_overridden(top, bottom);
+ if (!arg_diff)
+ return r;
+
+ putchar('\n');
+
+ fflush(stdout);
+
+ r = safe_fork("(diff)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ execlp("diff", "diff", "-us", "--", bottom, top, NULL);
+ log_open();
+ log_error_errno(errno, "Failed to execute diff: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ (void) wait_for_terminate_and_check("diff", pid, WAIT_LOG_ABNORMAL);
+ putchar('\n');
+
+ return r;
+}
+
+static int enumerate_dir_d(
+ OrderedHashmap *top,
+ OrderedHashmap *bottom,
+ OrderedHashmap *drops,
+ const char *toppath, const char *drop) {
+
+ _cleanup_free_ char *unit = NULL;
+ _cleanup_free_ char *path = NULL;
+ _cleanup_strv_free_ char **list = NULL;
+ char **file;
+ char *c;
+ int r;
+
+ assert(!endswith(drop, "/"));
+
+ path = path_join(toppath, drop);
+ if (!path)
+ return -ENOMEM;
+
+ log_debug("Looking at %s", path);
+
+ unit = strdup(drop);
+ if (!unit)
+ return -ENOMEM;
+
+ c = strrchr(unit, '.');
+ if (!c)
+ return -EINVAL;
+ *c = 0;
+
+ r = get_files_in_directory(path, &list);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate %s: %m", path);
+
+ strv_sort(list);
+
+ STRV_FOREACH(file, list) {
+ OrderedHashmap *h;
+ int k;
+ char *p;
+ char *d;
+
+ if (!endswith(*file, ".conf"))
+ continue;
+
+ p = path_join(path, *file);
+ if (!p)
+ return -ENOMEM;
+ d = p + strlen(toppath) + 1;
+
+ log_debug("Adding at top: %s %s %s", d, special_glyph(SPECIAL_GLYPH_ARROW), p);
+ k = ordered_hashmap_put(top, d, p);
+ if (k >= 0) {
+ p = strdup(p);
+ if (!p)
+ return -ENOMEM;
+ d = p + strlen(toppath) + 1;
+ } else if (k != -EEXIST) {
+ free(p);
+ return k;
+ }
+
+ log_debug("Adding at bottom: %s %s %s", d, special_glyph(SPECIAL_GLYPH_ARROW), p);
+ free(ordered_hashmap_remove(bottom, d));
+ k = ordered_hashmap_put(bottom, d, p);
+ if (k < 0) {
+ free(p);
+ return k;
+ }
+
+ h = ordered_hashmap_get(drops, unit);
+ if (!h) {
+ h = ordered_hashmap_new(&string_hash_ops);
+ if (!h)
+ return -ENOMEM;
+ ordered_hashmap_put(drops, unit, h);
+ unit = strdup(unit);
+ if (!unit)
+ return -ENOMEM;
+ }
+
+ p = strdup(p);
+ if (!p)
+ return -ENOMEM;
+
+ log_debug("Adding to drops: %s %s %s %s %s",
+ unit, special_glyph(SPECIAL_GLYPH_ARROW), basename(p), special_glyph(SPECIAL_GLYPH_ARROW), p);
+ k = ordered_hashmap_put(h, basename(p), p);
+ if (k < 0) {
+ free(p);
+ if (k != -EEXIST)
+ return k;
+ }
+ }
+ return 0;
+}
+
+static int enumerate_dir(
+ OrderedHashmap *top,
+ OrderedHashmap *bottom,
+ OrderedHashmap *drops,
+ const char *path, bool dropins) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ _cleanup_strv_free_ char **files = NULL, **dirs = NULL;
+ size_t n_files = 0, allocated_files = 0, n_dirs = 0, allocated_dirs = 0;
+ char **t;
+ int r;
+
+ assert(top);
+ assert(bottom);
+ assert(drops);
+ assert(path);
+
+ log_debug("Looking at %s", path);
+
+ d = opendir(path);
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open %s: %m", path);
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ dirent_ensure_type(d, de);
+
+ if (dropins && de->d_type == DT_DIR && endswith(de->d_name, ".d")) {
+ if (!GREEDY_REALLOC0(dirs, allocated_dirs, n_dirs + 2))
+ return -ENOMEM;
+
+ dirs[n_dirs] = strdup(de->d_name);
+ if (!dirs[n_dirs])
+ return -ENOMEM;
+ n_dirs ++;
+ }
+
+ if (!dirent_is_file(de))
+ continue;
+
+ if (!GREEDY_REALLOC0(files, allocated_files, n_files + 2))
+ return -ENOMEM;
+
+ files[n_files] = strdup(de->d_name);
+ if (!files[n_files])
+ return -ENOMEM;
+ n_files ++;
+ }
+
+ strv_sort(dirs);
+ strv_sort(files);
+
+ STRV_FOREACH(t, dirs) {
+ r = enumerate_dir_d(top, bottom, drops, path, *t);
+ if (r < 0)
+ return r;
+ }
+
+ STRV_FOREACH(t, files) {
+ _cleanup_free_ char *p = NULL;
+
+ p = path_join(path, *t);
+ if (!p)
+ return -ENOMEM;
+
+ log_debug("Adding at top: %s %s %s", basename(p), special_glyph(SPECIAL_GLYPH_ARROW), p);
+ r = ordered_hashmap_put(top, basename(p), p);
+ if (r >= 0) {
+ p = strdup(p);
+ if (!p)
+ return -ENOMEM;
+ } else if (r != -EEXIST)
+ return r;
+
+ log_debug("Adding at bottom: %s %s %s", basename(p), special_glyph(SPECIAL_GLYPH_ARROW), p);
+ free(ordered_hashmap_remove(bottom, basename(p)));
+ r = ordered_hashmap_put(bottom, basename(p), p);
+ if (r < 0)
+ return r;
+ p = NULL;
+ }
+
+ return 0;
+}
+
+static int should_skip_path(const char *prefix, const char *suffix) {
+#if HAVE_SPLIT_USR
+ _cleanup_free_ char *target = NULL;
+ const char *dirname, *p;
+
+ dirname = prefix_roota(prefix, suffix);
+
+ if (chase_symlinks(dirname, NULL, 0, &target, NULL) < 0)
+ return false;
+
+ NULSTR_FOREACH(p, prefixes) {
+ _cleanup_free_ char *tmp = NULL;
+
+ if (path_startswith(dirname, p))
+ continue;
+
+ tmp = path_join(p, suffix);
+ if (!tmp)
+ return -ENOMEM;
+
+ if (path_equal(target, tmp)) {
+ log_debug("%s redirects to %s, skipping.", dirname, target);
+ return true;
+ }
+ }
+#endif
+ return false;
+}
+
+static int process_suffix(const char *suffix, const char *onlyprefix) {
+ const char *p;
+ char *f, *key;
+ OrderedHashmap *top, *bottom, *drops, *h;
+ int r = 0, k, n_found = 0;
+ bool dropins;
+
+ assert(suffix);
+ assert(!startswith(suffix, "/"));
+ assert(!strstr(suffix, "//"));
+
+ dropins = nulstr_contains(have_dropins, suffix);
+
+ top = ordered_hashmap_new(&string_hash_ops);
+ bottom = ordered_hashmap_new(&string_hash_ops);
+ drops = ordered_hashmap_new(&string_hash_ops);
+ if (!top || !bottom || !drops) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ NULSTR_FOREACH(p, prefixes) {
+ _cleanup_free_ char *t = NULL;
+
+ if (should_skip_path(p, suffix) > 0)
+ continue;
+
+ t = path_join(p, suffix);
+ if (!t) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ k = enumerate_dir(top, bottom, drops, t, dropins);
+ if (r == 0)
+ r = k;
+ }
+
+ ORDERED_HASHMAP_FOREACH_KEY(f, key, top) {
+ char *o;
+
+ o = ordered_hashmap_get(bottom, key);
+ assert(o);
+
+ if (!onlyprefix || startswith(o, onlyprefix)) {
+ if (path_equal(o, f)) {
+ notify_override_unchanged(f);
+ } else {
+ k = found_override(f, o);
+ if (k < 0)
+ r = k;
+ else
+ n_found += k;
+ }
+ }
+
+ h = ordered_hashmap_get(drops, key);
+ if (h)
+ ORDERED_HASHMAP_FOREACH(o, h)
+ if (!onlyprefix || startswith(o, onlyprefix))
+ n_found += notify_override_extended(f, o);
+ }
+
+finish:
+ ordered_hashmap_free_free(top);
+ ordered_hashmap_free_free(bottom);
+
+ ORDERED_HASHMAP_FOREACH_KEY(h, key, drops) {
+ ordered_hashmap_free_free(ordered_hashmap_remove(drops, key));
+ ordered_hashmap_remove(drops, key);
+ free(key);
+ }
+ ordered_hashmap_free(drops);
+
+ return r < 0 ? r : n_found;
+}
+
+static int process_suffixes(const char *onlyprefix) {
+ const char *n;
+ int n_found = 0, r;
+
+ NULSTR_FOREACH(n, suffixes) {
+ r = process_suffix(n, onlyprefix);
+ if (r < 0)
+ return r;
+
+ n_found += r;
+ }
+
+ return n_found;
+}
+
+static int process_suffix_chop(const char *arg) {
+ const char *p;
+
+ assert(arg);
+
+ if (!path_is_absolute(arg))
+ return process_suffix(arg, NULL);
+
+ /* Strip prefix from the suffix */
+ NULSTR_FOREACH(p, prefixes) {
+ const char *suffix;
+
+ suffix = startswith(arg, p);
+ if (suffix) {
+ suffix += strspn(suffix, "/");
+ if (*suffix)
+ return process_suffix(suffix, p);
+ else
+ return process_suffixes(arg);
+ }
+ }
+
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid suffix specification %s.", arg);
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-delta", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [SUFFIX...]\n\n"
+ "Find overridden configuration files.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --diff[=1|0] Show a diff when overridden files differ\n"
+ " -t --type=LIST... Only display a selected set of override types\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_flags(const char *flag_str, int flags) {
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ int r;
+
+ r = extract_first_word(&flag_str, &word, ",", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return flags;
+
+ if (streq(word, "masked"))
+ flags |= SHOW_MASKED;
+ else if (streq(word, "equivalent"))
+ flags |= SHOW_EQUIVALENT;
+ else if (streq(word, "redirected"))
+ flags |= SHOW_REDIRECTED;
+ else if (streq(word, "overridden"))
+ flags |= SHOW_OVERRIDDEN;
+ else if (streq(word, "unchanged"))
+ flags |= SHOW_UNCHANGED;
+ else if (streq(word, "extended"))
+ flags |= SHOW_EXTENDED;
+ else if (streq(word, "default"))
+ flags |= SHOW_DEFAULTS;
+ else
+ return -EINVAL;
+ }
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_NO_PAGER = 0x100,
+ ARG_DIFF,
+ ARG_VERSION
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "diff", optional_argument, NULL, ARG_DIFF },
+ { "type", required_argument, NULL, 't' },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 1);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "ht:", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case 't': {
+ int f;
+ f = parse_flags(optarg, arg_flags);
+ if (f < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse flags field.");
+ arg_flags = f;
+ break;
+ }
+
+ case ARG_DIFF:
+ if (!optarg)
+ arg_diff = 1;
+ else {
+ int b;
+
+ b = parse_boolean(optarg);
+ if (b < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse diff boolean.");
+
+ arg_diff = b;
+ }
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ int r, k, n_found = 0;
+
+ log_setup_cli();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (arg_flags == 0)
+ arg_flags = SHOW_DEFAULTS;
+
+ if (arg_diff < 0)
+ arg_diff = !!(arg_flags & SHOW_OVERRIDDEN);
+ else if (arg_diff)
+ arg_flags |= SHOW_OVERRIDDEN;
+
+ (void) pager_open(arg_pager_flags);
+
+ if (optind < argc) {
+ int i;
+
+ for (i = optind; i < argc; i++) {
+ path_simplify(argv[i], false);
+
+ k = process_suffix_chop(argv[i]);
+ if (k < 0)
+ r = k;
+ else
+ n_found += k;
+ }
+
+ } else {
+ k = process_suffixes(NULL);
+ if (k < 0)
+ r = k;
+ else
+ n_found += k;
+ }
+
+ if (r >= 0)
+ printf("%s%i overridden configuration files found.\n", n_found ? "\n" : "", n_found);
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/detect-virt/detect-virt.c b/src/detect-virt/detect-virt.c
new file mode 100644
index 0000000..14d649c
--- /dev/null
+++ b/src/detect-virt/detect-virt.c
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "string-table.h"
+#include "util.h"
+#include "virt.h"
+
+static bool arg_quiet = false;
+static enum {
+ ANY_VIRTUALIZATION,
+ ONLY_VM,
+ ONLY_CONTAINER,
+ ONLY_CHROOT,
+ ONLY_PRIVATE_USERS,
+} arg_mode = ANY_VIRTUALIZATION;
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-detect-virt", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "Detect execution in a virtualized environment.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " -c --container Only detect whether we are run in a container\n"
+ " -v --vm Only detect whether we are run in a VM\n"
+ " -r --chroot Detect whether we are run in a chroot() environment\n"
+ " --private-users Only detect whether we are running in a user namespace\n"
+ " -q --quiet Don't output anything, just set return value\n"
+ " --list List all known and detectable types of virtualization\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_PRIVATE_USERS,
+ ARG_LIST,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "container", no_argument, NULL, 'c' },
+ { "vm", no_argument, NULL, 'v' },
+ { "chroot", no_argument, NULL, 'r' },
+ { "private-users", no_argument, NULL, ARG_PRIVATE_USERS },
+ { "quiet", no_argument, NULL, 'q' },
+ { "list", no_argument, NULL, ARG_LIST },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hqcvr", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case 'c':
+ arg_mode = ONLY_CONTAINER;
+ break;
+
+ case ARG_PRIVATE_USERS:
+ arg_mode = ONLY_PRIVATE_USERS;
+ break;
+
+ case 'v':
+ arg_mode = ONLY_VM;
+ break;
+
+ case 'r':
+ arg_mode = ONLY_CHROOT;
+ break;
+
+ case ARG_LIST:
+ DUMP_STRING_TABLE(virtualization, int, _VIRTUALIZATION_MAX);
+ return 0;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s takes no arguments.",
+ program_invocation_short_name);
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ /* This is mostly intended to be used for scripts which want
+ * to detect whether we are being run in a virtualized
+ * environment or not */
+
+ log_setup_cli();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ switch (arg_mode) {
+ case ONLY_VM:
+ r = detect_vm();
+ if (r < 0)
+ return log_error_errno(r, "Failed to check for VM: %m");
+ break;
+
+ case ONLY_CONTAINER:
+ r = detect_container();
+ if (r < 0)
+ return log_error_errno(r, "Failed to check for container: %m");
+ break;
+
+ case ONLY_CHROOT:
+ r = running_in_chroot();
+ if (r < 0)
+ return log_error_errno(r, "Failed to check for chroot() environment: %m");
+ return !r;
+
+ case ONLY_PRIVATE_USERS:
+ r = running_in_userns();
+ if (r < 0)
+ return log_error_errno(r, "Failed to check for user namespace: %m");
+ return !r;
+
+ case ANY_VIRTUALIZATION:
+ default:
+ r = detect_virtualization();
+ if (r < 0)
+ return log_error_errno(r, "Failed to check for virtualization: %m");
+ break;
+ }
+
+ if (!arg_quiet)
+ puts(virtualization_to_string(r));
+
+ return r == VIRTUALIZATION_NONE;
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/dissect/dissect.c b/src/dissect/dissect.c
new file mode 100644
index 0000000..dc7e9dc
--- /dev/null
+++ b/src/dissect/dissect.c
@@ -0,0 +1,776 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/loop.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+
+#include "architecture.h"
+#include "copy.h"
+#include "dissect-image.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-table.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hexdecoct.h"
+#include "log.h"
+#include "loop-util.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "namespace-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+
+static enum {
+ ACTION_DISSECT,
+ ACTION_MOUNT,
+ ACTION_COPY_FROM,
+ ACTION_COPY_TO,
+} arg_action = ACTION_DISSECT;
+static const char *arg_image = NULL;
+static const char *arg_path = NULL;
+static const char *arg_source = NULL;
+static const char *arg_target = NULL;
+static DissectImageFlags arg_flags = DISSECT_IMAGE_REQUIRE_ROOT|DISSECT_IMAGE_DISCARD_ON_LOOP|DISSECT_IMAGE_RELAX_VAR_CHECK|DISSECT_IMAGE_FSCK;
+static VeritySettings arg_verity_settings = VERITY_SETTINGS_DEFAULT;
+static bool arg_json = false;
+static JsonFormatFlags arg_json_format_flags = 0;
+
+STATIC_DESTRUCTOR_REGISTER(arg_verity_settings, verity_settings_done);
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-dissect", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%1$s [OPTIONS...] IMAGE\n"
+ "%1$s [OPTIONS...] --mount IMAGE PATH\n"
+ "%1$s [OPTIONS...] --copy-from IMAGE PATH [TARGET]\n"
+ "%1$s [OPTIONS...] --copy-to IMAGE [SOURCE] PATH\n\n"
+ "%5$sDissect a file system OS image.%6$s\n\n"
+ "%3$sOptions:%4$s\n"
+ " -r --read-only Mount read-only\n"
+ " --fsck=BOOL Run fsck before mounting\n"
+ " --mkdir Make mount directory before mounting, if missing\n"
+ " --discard=MODE Choose 'discard' mode (disabled, loop, all, crypto)\n"
+ " --root-hash=HASH Specify root hash for verity\n"
+ " --root-hash-sig=SIG Specify pkcs7 signature of root hash for verity\n"
+ " as a DER encoded PKCS7, either as a path to a file\n"
+ " or as an ASCII base64 encoded string prefixed by\n"
+ " 'base64:'\n"
+ " --verity-data=PATH Specify data file with hash tree for verity if it is\n"
+ " not embedded in IMAGE\n"
+ " --json=pretty|short|off\n"
+ " Generate JSON output\n"
+ "\n%3$sCommands:%4$s\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " -m --mount Mount the image to the specified directory\n"
+ " -M Shortcut for --mount --mkdir\n"
+ " -x --copy-from Copy files from image to host\n"
+ " -a --copy-to Copy files from host to image\n"
+ "\nSee the %2$s for details.\n"
+ , program_invocation_short_name
+ , link
+ , ansi_underline(), ansi_normal()
+ , ansi_highlight(), ansi_normal());
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_DISCARD,
+ ARG_FSCK,
+ ARG_ROOT_HASH,
+ ARG_ROOT_HASH_SIG,
+ ARG_VERITY_DATA,
+ ARG_MKDIR,
+ ARG_JSON,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "mount", no_argument, NULL, 'm' },
+ { "read-only", no_argument, NULL, 'r' },
+ { "discard", required_argument, NULL, ARG_DISCARD },
+ { "fsck", required_argument, NULL, ARG_FSCK },
+ { "root-hash", required_argument, NULL, ARG_ROOT_HASH },
+ { "root-hash-sig", required_argument, NULL, ARG_ROOT_HASH_SIG },
+ { "verity-data", required_argument, NULL, ARG_VERITY_DATA },
+ { "mkdir", no_argument, NULL, ARG_MKDIR },
+ { "copy-from", no_argument, NULL, 'x' },
+ { "copy-to", no_argument, NULL, 'a' },
+ { "json", required_argument, NULL, ARG_JSON },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hmrMxa", options, NULL)) >= 0) {
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'm':
+ arg_action = ACTION_MOUNT;
+ break;
+
+ case ARG_MKDIR:
+ arg_flags |= DISSECT_IMAGE_MKDIR;
+ break;
+
+ case 'M':
+ /* Shortcut combination of the above two */
+ arg_action = ACTION_MOUNT;
+ arg_flags |= DISSECT_IMAGE_MKDIR;
+ break;
+
+ case 'x':
+ arg_action = ACTION_COPY_FROM;
+ arg_flags |= DISSECT_IMAGE_READ_ONLY;
+ break;
+
+ case 'a':
+ arg_action = ACTION_COPY_TO;
+ break;
+
+ case 'r':
+ arg_flags |= DISSECT_IMAGE_READ_ONLY;
+ break;
+
+ case ARG_DISCARD: {
+ DissectImageFlags flags;
+
+ if (streq(optarg, "disabled"))
+ flags = 0;
+ else if (streq(optarg, "loop"))
+ flags = DISSECT_IMAGE_DISCARD_ON_LOOP;
+ else if (streq(optarg, "all"))
+ flags = DISSECT_IMAGE_DISCARD_ON_LOOP | DISSECT_IMAGE_DISCARD;
+ else if (streq(optarg, "crypt"))
+ flags = DISSECT_IMAGE_DISCARD_ANY;
+ else if (streq(optarg, "list")) {
+ puts("disabled\n"
+ "all\n"
+ "crypt\n"
+ "loop");
+ return 0;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown --discard= parameter: %s",
+ optarg);
+ arg_flags = (arg_flags & ~DISSECT_IMAGE_DISCARD_ANY) | flags;
+
+ break;
+ }
+
+ case ARG_ROOT_HASH: {
+ _cleanup_free_ void *p = NULL;
+ size_t l;
+
+ r = unhexmem(optarg, strlen(optarg), &p, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse root hash '%s': %m", optarg);
+ if (l < sizeof(sd_id128_t))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Root hash must be at least 128bit long: %s", optarg);
+
+ free_and_replace(arg_verity_settings.root_hash, p);
+ arg_verity_settings.root_hash_size = l;
+ break;
+ }
+
+ case ARG_ROOT_HASH_SIG: {
+ char *value;
+ size_t l;
+ void *p;
+
+ if ((value = startswith(optarg, "base64:"))) {
+ r = unbase64mem(value, strlen(value), &p, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse root hash signature '%s': %m", optarg);
+ } else {
+ r = read_full_file(optarg, (char**) &p, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read root hash signature file '%s': %m", optarg);
+ }
+
+ free_and_replace(arg_verity_settings.root_hash_sig, p);
+ arg_verity_settings.root_hash_sig_size = l;
+ break;
+ }
+
+ case ARG_VERITY_DATA:
+ r = parse_path_argument_and_warn(optarg, false, &arg_verity_settings.data_path);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_FSCK:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --fsck= parameter: %s", optarg);
+
+ SET_FLAG(arg_flags, DISSECT_IMAGE_FSCK, r);
+ break;
+
+ case ARG_JSON:
+ if (streq(optarg, "pretty")) {
+ arg_json = true;
+ arg_json_format_flags = JSON_FORMAT_PRETTY|JSON_FORMAT_COLOR_AUTO;
+ } else if (streq(optarg, "short")) {
+ arg_json = true;
+ arg_json_format_flags = JSON_FORMAT_NEWLINE;
+ } else if (streq(optarg, "off")) {
+ arg_json = false;
+ arg_json_format_flags = 0;
+ } else if (streq(optarg, "help")) {
+ puts("pretty\n"
+ "short\n"
+ "off");
+ return 0;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown argument to --json=: %s", optarg);
+
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ }
+
+ switch (arg_action) {
+
+ case ACTION_DISSECT:
+ if (optind + 1 != argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Expected an image file path as only argument.");
+
+ arg_image = argv[optind];
+ arg_flags |= DISSECT_IMAGE_READ_ONLY;
+ break;
+
+ case ACTION_MOUNT:
+ if (optind + 2 != argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Expected an image file path and mount point path as only arguments.");
+
+ arg_image = argv[optind];
+ arg_path = argv[optind + 1];
+ break;
+
+ case ACTION_COPY_FROM:
+ if (argc < optind + 2 || argc > optind + 3)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Expected an image file path, a source path and an optional destination path as only arguments.");
+
+ arg_image = argv[optind];
+ arg_source = argv[optind + 1];
+ arg_target = argc > optind + 2 ? argv[optind + 2] : "-" /* this means stdout */ ;
+
+ arg_flags |= DISSECT_IMAGE_READ_ONLY;
+ break;
+
+ case ACTION_COPY_TO:
+ if (argc < optind + 2 || argc > optind + 3)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Expected an image file path, an optional source path and a destination path as only arguments.");
+
+ arg_image = argv[optind];
+
+ if (argc > optind + 2) {
+ arg_source = argv[optind + 1];
+ arg_target = argv[optind + 2];
+ } else {
+ arg_source = "-"; /* this means stdin */
+ arg_target = argv[optind + 1];
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Unknown action.");
+ }
+
+ return 1;
+}
+
+static int strv_pair_to_json(char **l, JsonVariant **ret) {
+ _cleanup_strv_free_ char **jl = NULL;
+ char **a, **b;
+
+ STRV_FOREACH_PAIR(a, b, l) {
+ char *j;
+
+ j = strjoin(*a, "=", *b);
+ if (!j)
+ return log_oom();
+
+ if (strv_consume(&jl, j) < 0)
+ return log_oom();
+ }
+
+ return json_variant_new_array_strv(ret, jl);
+}
+
+static int action_dissect(DissectedImage *m, LoopDevice *d) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_(table_unrefp) Table *t = NULL;
+ uint64_t size = UINT64_MAX;
+ int r;
+
+ assert(m);
+ assert(d);
+
+ if (!arg_json)
+ printf(" Name: %s\n", basename(arg_image));
+
+ if (ioctl(d->fd, BLKGETSIZE64, &size) < 0)
+ log_debug_errno(errno, "Failed to query size of loopback device: %m");
+ else if (!arg_json) {
+ char s[FORMAT_BYTES_MAX];
+ printf(" Size: %s\n", format_bytes(s, sizeof(s), size));
+ }
+
+ if (!arg_json)
+ putc('\n', stdout);
+
+ r = dissected_image_acquire_metadata(m);
+ if (r == -ENXIO)
+ return log_error_errno(r, "No root partition discovered.");
+ if (r == -EUCLEAN)
+ return log_error_errno(r, "File system check of image failed.");
+ if (r == -EMEDIUMTYPE)
+ log_warning_errno(r, "Not a valid OS image, no os-release file included. Proceeding anyway.");
+ else if (r == -EUNATCH)
+ log_warning_errno(r, "OS image is encrypted, proceeding without showing OS image metadata.");
+ else if (r == -EBUSY)
+ log_warning_errno(r, "OS image is currently in use, proceeding without showing OS image metadata.");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to acquire image metadata: %m");
+ else if (!arg_json) {
+ if (m->hostname)
+ printf(" Hostname: %s\n", m->hostname);
+
+ if (!sd_id128_is_null(m->machine_id))
+ printf("Machine ID: " SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(m->machine_id));
+
+ if (!strv_isempty(m->machine_info)) {
+ char **p, **q;
+
+ STRV_FOREACH_PAIR(p, q, m->machine_info)
+ printf("%s %s=%s\n",
+ p == m->machine_info ? "Mach. Info:" : " ",
+ *p, *q);
+ }
+
+ if (!strv_isempty(m->os_release)) {
+ char **p, **q;
+
+ STRV_FOREACH_PAIR(p, q, m->os_release)
+ printf("%s %s=%s\n",
+ p == m->os_release ? "OS Release:" : " ",
+ *p, *q);
+ }
+
+ if (m->hostname ||
+ !sd_id128_is_null(m->machine_id) ||
+ !strv_isempty(m->machine_info) ||
+ !strv_isempty(m->os_release))
+ putc('\n', stdout);
+ } else {
+ _cleanup_(json_variant_unrefp) JsonVariant *mi = NULL, *osr = NULL;
+
+ if (!strv_isempty(m->machine_info)) {
+ r = strv_pair_to_json(m->machine_info, &mi);
+ if (r < 0)
+ return log_oom();
+ }
+
+ if (!strv_isempty(m->os_release)) {
+ r = strv_pair_to_json(m->os_release, &osr);
+ if (r < 0)
+ return log_oom();
+ }
+
+ r = json_build(&v, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("name", JSON_BUILD_STRING(basename(arg_image))),
+ JSON_BUILD_PAIR("size", JSON_BUILD_INTEGER(size)),
+ JSON_BUILD_PAIR_CONDITION(m->hostname, "hostname", JSON_BUILD_STRING(m->hostname)),
+ JSON_BUILD_PAIR_CONDITION(!sd_id128_is_null(m->machine_id), "machineId", JSON_BUILD_ID128(m->machine_id)),
+ JSON_BUILD_PAIR_CONDITION(mi, "machineInfo", JSON_BUILD_VARIANT(mi)),
+ JSON_BUILD_PAIR_CONDITION(osr, "osRelease", JSON_BUILD_VARIANT(osr))));
+ if (r < 0)
+ return log_oom();
+ }
+
+ t = table_new("rw", "designator", "partition uuid", "fstype", "architecture", "verity", "node", "partno");
+ if (!t)
+ return log_oom();
+
+ (void) table_set_empty_string(t, "-");
+ (void) table_set_align_percent(t, table_get_cell(t, 0, 7), 100);
+
+ for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
+ DissectedPartition *p = m->partitions + i;
+
+ if (!p->found)
+ continue;
+
+ r = table_add_many(
+ t,
+ TABLE_STRING, p->rw ? "rw" : "ro",
+ TABLE_STRING, partition_designator_to_string(i));
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (sd_id128_is_null(p->uuid))
+ r = table_add_cell(t, NULL, TABLE_EMPTY, NULL);
+ else
+ r = table_add_cell(t, NULL, TABLE_UUID, &p->uuid);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_many(
+ t,
+ TABLE_STRING, p->fstype,
+ TABLE_STRING, architecture_to_string(p->architecture));
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (arg_verity_settings.data_path)
+ r = table_add_cell(t, NULL, TABLE_STRING, "external");
+ else if (dissected_image_can_do_verity(m, i))
+ r = table_add_cell(t, NULL, TABLE_STRING, yes_no(dissected_image_has_verity(m, i)));
+ else
+ r = table_add_cell(t, NULL, TABLE_EMPTY, NULL);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (p->partno < 0) /* no partition table, naked file system */ {
+ r = table_add_cell(t, NULL, TABLE_STRING, arg_image);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_cell(t, NULL, TABLE_EMPTY, NULL);
+ } else {
+ r = table_add_cell(t, NULL, TABLE_STRING, p->node);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_cell(t, NULL, TABLE_INT, &p->partno);
+ }
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (arg_json) {
+ _cleanup_(json_variant_unrefp) JsonVariant *jt = NULL;
+
+ r = table_to_json(t, &jt);
+ if (r < 0)
+ return log_error_errno(r, "Failed to convert table to JSON: %m");
+
+ r = json_variant_set_field(&v, "mounts", jt);
+ if (r < 0)
+ return log_oom();
+
+ json_variant_dump(v, arg_json_format_flags, stdout, NULL);
+ } else {
+ r = table_print(t, stdout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to dump table: %m");
+ }
+
+ return 0;
+}
+
+static int action_mount(DissectedImage *m, LoopDevice *d) {
+ _cleanup_(decrypted_image_unrefp) DecryptedImage *di = NULL;
+ int r;
+
+ assert(m);
+ assert(d);
+
+ r = dissected_image_decrypt_interactively(
+ m, NULL,
+ &arg_verity_settings,
+ arg_flags,
+ &di);
+ if (r < 0)
+ return r;
+
+ r = dissected_image_mount_and_warn(m, arg_path, UID_INVALID, arg_flags);
+ if (r < 0)
+ return r;
+
+ if (di) {
+ r = decrypted_image_relinquish(di);
+ if (r < 0)
+ return log_error_errno(r, "Failed to relinquish DM devices: %m");
+ }
+
+ loop_device_relinquish(d);
+ return 0;
+}
+
+static int action_copy(DissectedImage *m, LoopDevice *d) {
+ _cleanup_(umount_and_rmdir_and_freep) char *mounted_dir = NULL;
+ _cleanup_(decrypted_image_unrefp) DecryptedImage *di = NULL;
+ _cleanup_(rmdir_and_freep) char *created_dir = NULL;
+ _cleanup_free_ char *temp = NULL;
+ int r;
+
+ assert(m);
+ assert(d);
+
+ r = dissected_image_decrypt_interactively(
+ m, NULL,
+ &arg_verity_settings,
+ arg_flags,
+ &di);
+ if (r < 0)
+ return r;
+
+ r = detach_mount_namespace();
+ if (r < 0)
+ return log_error_errno(r, "Failed to detach mount namespace: %m");
+
+ r = tempfn_random_child(NULL, program_invocation_short_name, &temp);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate temporary mount directory: %m");
+
+ r = mkdir_p(temp, 0700);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create mount point: %m");
+
+ created_dir = TAKE_PTR(temp);
+
+ r = dissected_image_mount_and_warn(m, created_dir, UID_INVALID, arg_flags);
+ if (r < 0)
+ return r;
+
+ mounted_dir = TAKE_PTR(created_dir);
+
+ if (di) {
+ r = decrypted_image_relinquish(di);
+ if (r < 0)
+ return log_error_errno(r, "Failed to relinquish DM devices: %m");
+ }
+
+ loop_device_relinquish(d);
+
+ if (arg_action == ACTION_COPY_FROM) {
+ _cleanup_close_ int source_fd = -1, target_fd = -1;
+
+ source_fd = chase_symlinks_and_open(arg_source, mounted_dir, CHASE_PREFIX_ROOT|CHASE_WARN, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL);
+ if (source_fd < 0)
+ return log_error_errno(source_fd, "Failed to open source path '%s' in image '%s': %m", arg_source, arg_image);
+
+ /* Copying to stdout? */
+ if (streq(arg_target, "-")) {
+ r = copy_bytes(source_fd, STDOUT_FILENO, (uint64_t) -1, COPY_REFLINK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to copy bytes from %s in mage '%s' to stdout: %m", arg_source, arg_image);
+
+ /* When we copy to stdou we don't copy any attributes (i.e. no access mode, no ownership, no xattr, no times) */
+ return 0;
+ }
+
+ /* Try to copy as directory? */
+ r = copy_directory_fd(source_fd, arg_target, COPY_REFLINK|COPY_MERGE_EMPTY|COPY_SIGINT|COPY_HARDLINKS);
+ if (r >= 0)
+ return 0;
+ if (r != -ENOTDIR)
+ return log_error_errno(r, "Failed to copy %s in image '%s' to '%s': %m", arg_source, arg_image, arg_target);
+
+ r = fd_verify_regular(source_fd);
+ if (r == -EISDIR)
+ return log_error_errno(r, "Target '%s' exists already and is not a directory.", arg_target);
+ if (r < 0)
+ return log_error_errno(r, "Source path %s in image '%s' is neither regular file nor directory, refusing: %m", arg_source, arg_image);
+
+ /* Nah, it's a plain file! */
+ target_fd = open(arg_target, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0600);
+ if (target_fd < 0)
+ return log_error_errno(errno, "Failed to create regular file at target path '%s': %m", arg_target);
+
+ r = copy_bytes(source_fd, target_fd, (uint64_t) -1, COPY_REFLINK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to copy bytes from %s in mage '%s' to '%s': %m", arg_source, arg_image, arg_target);
+
+ (void) copy_xattr(source_fd, target_fd);
+ (void) copy_access(source_fd, target_fd);
+ (void) copy_times(source_fd, target_fd, 0);
+
+ /* When this is a regular file we don't copy ownership! */
+
+ } else {
+ _cleanup_close_ int source_fd = -1, target_fd = -1;
+ _cleanup_close_ int dfd = -1;
+ _cleanup_free_ char *dn = NULL;
+
+ assert(arg_action == ACTION_COPY_TO);
+
+ dn = dirname_malloc(arg_target);
+ if (!dn)
+ return log_oom();
+
+ r = chase_symlinks(dn, mounted_dir, CHASE_PREFIX_ROOT|CHASE_WARN, NULL, &dfd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open '%s': %m", dn);
+
+ /* Are we reading from stdin? */
+ if (streq(arg_source, "-")) {
+ target_fd = openat(dfd, basename(arg_target), O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY|O_EXCL, 0644);
+ if (target_fd < 0)
+ return log_error_errno(errno, "Failed to open target file '%s': %m", arg_target);
+
+ r = copy_bytes(STDIN_FILENO, target_fd, (uint64_t) -1, COPY_REFLINK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to copy bytes from stdin to '%s' in image '%s': %m", arg_target, arg_image);
+
+ /* When we copy from stdin we don't copy any attributes (i.e. no access mode, no ownership, no xattr, no times) */
+ return 0;
+ }
+
+ source_fd = open(arg_source, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (source_fd < 0)
+ return log_error_errno(source_fd, "Failed to open source path '%s': %m", arg_source);
+
+ r = fd_verify_regular(source_fd);
+ if (r < 0) {
+ if (r != -EISDIR)
+ return log_error_errno(r, "Source '%s' is neither regular file nor directory: %m", arg_source);
+
+ /* We are looking at a directory. */
+
+ target_fd = openat(dfd, basename(arg_target), O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+ if (target_fd < 0) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to open destination '%s': %m", arg_target);
+
+ r = copy_tree_at(source_fd, ".", dfd, basename(arg_target), UID_INVALID, GID_INVALID, COPY_REFLINK|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS);
+ } else
+ r = copy_tree_at(source_fd, ".", target_fd, ".", UID_INVALID, GID_INVALID, COPY_REFLINK|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to copy '%s' to '%s' in image '%s': %m", arg_source, arg_target, arg_image);
+
+ return 0;
+ }
+
+ /* We area looking at a regular file */
+ target_fd = openat(dfd, basename(arg_target), O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY|O_EXCL, 0600);
+ if (target_fd < 0)
+ return log_error_errno(errno, "Failed to open target file '%s': %m", arg_target);
+
+ r = copy_bytes(source_fd, target_fd, (uint64_t) -1, COPY_REFLINK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to copy bytes from '%s' to '%s' in image '%s': %m", arg_source, arg_target, arg_image);
+
+ (void) copy_xattr(source_fd, target_fd);
+ (void) copy_access(source_fd, target_fd);
+ (void) copy_times(source_fd, target_fd, 0);
+
+ /* When this is a regular file we don't copy ownership! */
+ }
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
+ _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = verity_settings_load(
+ &arg_verity_settings,
+ arg_image, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read verity artifacts for %s: %m", arg_image);
+
+ if (arg_verity_settings.data_path)
+ arg_flags |= DISSECT_IMAGE_NO_PARTITION_TABLE; /* We only support Verity per file system,
+ * hence if there's external Verity data
+ * available we turn off partition table
+ * support */
+
+ r = loop_device_make_by_path(
+ arg_image,
+ FLAGS_SET(arg_flags, DISSECT_IMAGE_READ_ONLY) ? O_RDONLY : O_RDWR,
+ FLAGS_SET(arg_flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN,
+ &d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up loopback device: %m");
+
+ r = dissect_image_and_warn(
+ d->fd,
+ arg_image,
+ &arg_verity_settings,
+ NULL,
+ arg_flags,
+ &m);
+ if (r < 0)
+ return r;
+
+ switch (arg_action) {
+
+ case ACTION_DISSECT:
+ r = action_dissect(m, d);
+ break;
+
+ case ACTION_MOUNT:
+ r = action_mount(m, d);
+ break;
+
+ case ACTION_COPY_FROM:
+ case ACTION_COPY_TO:
+ r = action_copy(m, d);
+ break;
+
+ default:
+ assert_not_reached("Unknown action.");
+ }
+
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/environment-d-generator/environment-d-generator.c b/src/environment-d-generator/environment-d-generator.c
new file mode 100644
index 0000000..1c51cf6
--- /dev/null
+++ b/src/environment-d-generator/environment-d-generator.c
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-path.h"
+
+#include "conf-files.h"
+#include "def.h"
+#include "env-file.h"
+#include "escape.h"
+#include "log.h"
+#include "path-lookup.h"
+#include "strv.h"
+
+static int environment_dirs(char ***ret) {
+ _cleanup_strv_free_ char **dirs = NULL;
+ _cleanup_free_ char *c = NULL;
+ int r;
+
+ dirs = strv_new(CONF_PATHS_USR("environment.d"), NULL);
+ if (!dirs)
+ return -ENOMEM;
+
+ /* ~/.config/systemd/environment.d */
+ r = sd_path_lookup(SD_PATH_USER_CONFIGURATION, "environment.d", &c);
+ if (r < 0)
+ return r;
+
+ r = strv_extend_front(&dirs, c);
+ if (r < 0)
+ return r;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *t;
+
+ t = strv_join(dirs, "\n\t");
+ log_debug("Looking for environment.d files in (higher priority first):\n\t%s", strna(t));
+ }
+
+ *ret = TAKE_PTR(dirs);
+ return 0;
+}
+
+static int load_and_print(void) {
+ _cleanup_strv_free_ char **dirs = NULL, **files = NULL, **env = NULL;
+ char **i;
+ int r;
+
+ r = environment_dirs(&dirs);
+ if (r < 0)
+ return r;
+
+ r = conf_files_list_strv(&files, ".conf", NULL, 0, (const char **) dirs);
+ if (r < 0)
+ return r;
+
+ /* This will mutate the existing environment, based on the presumption
+ * that in case of failure, a partial update is better than none. */
+
+ STRV_FOREACH(i, files) {
+ log_debug("Reading %s…", *i);
+
+ r = merge_env_file(&env, NULL, *i);
+ if (r == -ENOMEM)
+ return r;
+ }
+
+ STRV_FOREACH(i, env) {
+ char *t;
+ _cleanup_free_ char *q = NULL;
+
+ t = strchr(*i, '=');
+ assert(t);
+
+ q = shell_maybe_quote(t + 1, ESCAPE_BACKSLASH);
+ if (!q)
+ return log_oom();
+
+ printf("%.*s=%s\n", (int) (t - *i), *i, q);
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ if (argc > 1) {
+ log_error("This program takes no arguments.");
+ return EXIT_FAILURE;
+ }
+
+ r = load_and_print();
+ if (r < 0)
+ log_error_errno(r, "Failed to load environment.d: %m");
+
+ return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/src/escape/escape.c b/src/escape/escape.c
new file mode 100644
index 0000000..05d03ad
--- /dev/null
+++ b/src/escape/escape.c
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+
+static enum {
+ ACTION_ESCAPE,
+ ACTION_UNESCAPE,
+ ACTION_MANGLE
+} arg_action = ACTION_ESCAPE;
+static const char *arg_suffix = NULL;
+static const char *arg_template = NULL;
+static bool arg_path = false;
+static bool arg_instance = false;
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-escape", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [NAME...]\n\n"
+ "Escape strings for usage in systemd unit names.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --suffix=SUFFIX Unit suffix to append to escaped strings\n"
+ " --template=TEMPLATE Insert strings as instance into template\n"
+ " --instance With --unescape, show just the instance part\n"
+ " -u --unescape Unescape strings\n"
+ " -m --mangle Mangle strings\n"
+ " -p --path When escaping/unescaping assume the string is a path\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_SUFFIX,
+ ARG_TEMPLATE
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "suffix", required_argument, NULL, ARG_SUFFIX },
+ { "template", required_argument, NULL, ARG_TEMPLATE },
+ { "unescape", no_argument, NULL, 'u' },
+ { "mangle", no_argument, NULL, 'm' },
+ { "path", no_argument, NULL, 'p' },
+ { "instance", no_argument, NULL, 'i' },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hump", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_SUFFIX:
+
+ if (unit_type_from_string(optarg) < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid unit suffix type %s.", optarg);
+
+ arg_suffix = optarg;
+ break;
+
+ case ARG_TEMPLATE:
+
+ if (!unit_name_is_valid(optarg, UNIT_NAME_TEMPLATE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Template name %s is not valid.", optarg);
+
+ arg_template = optarg;
+ break;
+
+ case 'u':
+ arg_action = ACTION_UNESCAPE;
+ break;
+
+ case 'm':
+ arg_action = ACTION_MANGLE;
+ break;
+
+ case 'p':
+ arg_path = true;
+ break;
+
+ case 'i':
+ arg_instance = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind >= argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not enough arguments.");
+
+ if (arg_template && arg_suffix)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--suffix= and --template= may not be combined.");
+
+ if ((arg_template || arg_suffix) && arg_action == ACTION_MANGLE)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--suffix= and --template= are not compatible with --mangle.");
+
+ if (arg_suffix && arg_action == ACTION_UNESCAPE)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--suffix is not compatible with --unescape.");
+
+ if (arg_path && !IN_SET(arg_action, ACTION_ESCAPE, ACTION_UNESCAPE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--path may not be combined with --mangle.");
+
+ if (arg_instance && arg_action != ACTION_UNESCAPE)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--instance must be used in conjunction with --unescape.");
+
+ if (arg_instance && arg_template)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--instance may not be combined with --template.");
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ char **i;
+ int r;
+
+ log_setup_cli();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ STRV_FOREACH(i, argv + optind) {
+ _cleanup_free_ char *e = NULL;
+
+ switch (arg_action) {
+
+ case ACTION_ESCAPE:
+ if (arg_path) {
+ r = unit_name_path_escape(*i, &e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to escape string: %m");
+ } else {
+ e = unit_name_escape(*i);
+ if (!e)
+ return log_oom();
+ }
+
+ if (arg_template) {
+ char *x;
+
+ r = unit_name_replace_instance(arg_template, e, &x);
+ if (r < 0)
+ return log_error_errno(r, "Failed to replace instance: %m");
+
+ free_and_replace(e, x);
+ } else if (arg_suffix) {
+ char *x;
+
+ x = strjoin(e, ".", arg_suffix);
+ if (!x)
+ return log_oom();
+
+ free_and_replace(e, x);
+ }
+
+ break;
+
+ case ACTION_UNESCAPE: {
+ _cleanup_free_ char *name = NULL;
+
+ if (arg_template || arg_instance) {
+ _cleanup_free_ char *template = NULL;
+
+ r = unit_name_to_instance(*i, &name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extract instance: %m");
+ if (isempty(name))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unit %s is missing the instance name.", *i);
+
+ r = unit_name_template(*i, &template);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extract template: %m");
+ if (arg_template && !streq(arg_template, template))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unit %s template %s does not match specified template %s.",
+ *i, template, arg_template);
+ } else {
+ name = strdup(*i);
+ if (!name)
+ return log_oom();
+ }
+
+ if (arg_path)
+ r = unit_name_path_unescape(name, &e);
+ else
+ r = unit_name_unescape(name, &e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to unescape string: %m");
+
+ break;
+ }
+
+ case ACTION_MANGLE:
+ r = unit_name_mangle(*i, 0, &e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle name: %m");
+
+ break;
+ }
+
+ if (i != argv + optind)
+ fputc(' ', stdout);
+
+ fputs(e, stdout);
+ }
+
+ fputc('\n', stdout);
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/firstboot/firstboot.c b/src/firstboot/firstboot.c
new file mode 100644
index 0000000..742b43f
--- /dev/null
+++ b/src/firstboot/firstboot.c
@@ -0,0 +1,1341 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/loop.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "ask-password-api.h"
+#include "copy.h"
+#include "dissect-image.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "kbd-util.h"
+#include "libcrypt-util.h"
+#include "locale-util.h"
+#include "main-func.h"
+#include "memory-util.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "os-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "proc-cmdline.h"
+#include "pwquality-util.h"
+#include "random-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "tmpfile-util-label.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+
+static char *arg_root = NULL;
+static char *arg_image = NULL;
+static char *arg_locale = NULL; /* $LANG */
+static char *arg_keymap = NULL;
+static char *arg_locale_messages = NULL; /* $LC_MESSAGES */
+static char *arg_timezone = NULL;
+static char *arg_hostname = NULL;
+static sd_id128_t arg_machine_id = {};
+static char *arg_root_password = NULL;
+static char *arg_root_shell = NULL;
+static char *arg_kernel_cmdline = NULL;
+static bool arg_prompt_locale = false;
+static bool arg_prompt_keymap = false;
+static bool arg_prompt_timezone = false;
+static bool arg_prompt_hostname = false;
+static bool arg_prompt_root_password = false;
+static bool arg_prompt_root_shell = false;
+static bool arg_copy_locale = false;
+static bool arg_copy_keymap = false;
+static bool arg_copy_timezone = false;
+static bool arg_copy_root_password = false;
+static bool arg_copy_root_shell = false;
+static bool arg_force = false;
+static bool arg_delete_root_password = false;
+static bool arg_root_password_is_hashed = false;
+static bool arg_welcome = true;
+
+STATIC_DESTRUCTOR_REGISTER(arg_root, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_image, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_locale, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_locale_messages, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_keymap, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_timezone, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_hostname, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_root_password, erase_and_freep);
+
+static bool press_any_key(void) {
+ char k = 0;
+ bool need_nl = true;
+
+ printf("-- Press any key to proceed --");
+ fflush(stdout);
+
+ (void) read_one_char(stdin, &k, USEC_INFINITY, &need_nl);
+
+ if (need_nl)
+ putchar('\n');
+
+ return k != 'q';
+}
+
+static void print_welcome(void) {
+ _cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
+ static bool done = false;
+ const char *pn, *ac;
+ int r;
+
+ if (!arg_welcome)
+ return;
+
+ if (done)
+ return;
+
+ r = parse_os_release(
+ arg_root,
+ "PRETTY_NAME", &pretty_name,
+ "ANSI_COLOR", &ansi_color,
+ NULL);
+ if (r < 0)
+ log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to read os-release file, ignoring: %m");
+
+ pn = isempty(pretty_name) ? "Linux" : pretty_name;
+ ac = isempty(ansi_color) ? "0" : ansi_color;
+
+ if (colors_enabled())
+ printf("\nWelcome to your new installation of \x1B[%sm%s\x1B[0m!\n", ac, pn);
+ else
+ printf("\nWelcome to your new installation of %s!\n", pn);
+
+ printf("\nPlease configure your system!\n\n");
+
+ press_any_key();
+
+ done = true;
+}
+
+static int show_menu(char **x, unsigned n_columns, unsigned width, unsigned percentage) {
+ unsigned break_lines, break_modulo;
+ size_t n, per_column, i, j;
+
+ assert(n_columns > 0);
+
+ n = strv_length(x);
+ per_column = DIV_ROUND_UP(n, n_columns);
+
+ break_lines = lines();
+ if (break_lines > 2)
+ break_lines--;
+
+ /* The first page gets two extra lines, since we want to show
+ * a title */
+ break_modulo = break_lines;
+ if (break_modulo > 3)
+ break_modulo -= 3;
+
+ for (i = 0; i < per_column; i++) {
+
+ for (j = 0; j < n_columns; j ++) {
+ _cleanup_free_ char *e = NULL;
+
+ if (j * per_column + i >= n)
+ break;
+
+ e = ellipsize(x[j * per_column + i], width, percentage);
+ if (!e)
+ return log_oom();
+
+ printf("%4zu) %-*s", j * per_column + i + 1, width, e);
+ }
+
+ putchar('\n');
+
+ /* on the first screen we reserve 2 extra lines for the title */
+ if (i % break_lines == break_modulo) {
+ if (!press_any_key())
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+static int prompt_loop(const char *text, char **l, unsigned percentage, bool (*is_valid)(const char *name), char **ret) {
+ int r;
+
+ assert(text);
+ assert(is_valid);
+ assert(ret);
+
+ for (;;) {
+ _cleanup_free_ char *p = NULL;
+ unsigned u;
+
+ r = ask_string(&p, "%s %s (empty to skip, \"list\" to list options): ",
+ special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET), text);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query user: %m");
+
+ if (isempty(p)) {
+ log_warning("No data entered, skipping.");
+ return 0;
+ }
+
+ if (streq(p, "list")) {
+ r = show_menu(l, 3, 22, percentage);
+ if (r < 0)
+ return r;
+
+ putchar('\n');
+ continue;
+ };
+
+ r = safe_atou(p, &u);
+ if (r >= 0) {
+ if (u <= 0 || u > strv_length(l)) {
+ log_error("Specified entry number out of range.");
+ continue;
+ }
+
+ log_info("Selected '%s'.", l[u-1]);
+ if (free_and_strdup(ret, l[u-1]) < 0)
+ return log_oom();
+
+ return 0;
+ }
+
+ if (!is_valid(p)) {
+ log_error("Entered data invalid.");
+ continue;
+ }
+
+ return free_and_replace(*ret, p);
+ }
+}
+
+static bool locale_is_ok(const char *name) {
+
+ if (arg_root)
+ return locale_is_valid(name);
+
+ return locale_is_installed(name) > 0;
+}
+
+static int prompt_locale(void) {
+ _cleanup_strv_free_ char **locales = NULL;
+ int r;
+
+ if (arg_locale || arg_locale_messages)
+ return 0;
+
+ if (!arg_prompt_locale)
+ return 0;
+
+ r = get_locales(&locales);
+ if (r < 0)
+ return log_error_errno(r, "Cannot query locales list: %m");
+
+ if (strv_isempty(locales))
+ log_debug("No locales found, skipping locale selection.");
+ else if (strv_length(locales) == 1) {
+
+ if (streq(locales[0], SYSTEMD_DEFAULT_LOCALE))
+ log_debug("Only installed locale is default locale anyway, not setting locale explicitly.");
+ else {
+ log_debug("Only a single locale available (%s), selecting it as default.", locales[0]);
+
+ arg_locale = strdup(locales[0]);
+ if (!arg_locale)
+ return log_oom();
+
+ /* Not setting arg_locale_message here, since it defaults to LANG anyway */
+ }
+ } else {
+ print_welcome();
+
+ r = prompt_loop("Please enter system locale name or number",
+ locales, 60, locale_is_ok, &arg_locale);
+ if (r < 0)
+ return r;
+
+ if (isempty(arg_locale))
+ return 0;
+
+ r = prompt_loop("Please enter system message locale name or number",
+ locales, 60, locale_is_ok, &arg_locale_messages);
+ if (r < 0)
+ return r;
+
+ /* Suppress the messages setting if it's the same as the main locale anyway */
+ if (streq_ptr(arg_locale, arg_locale_messages))
+ arg_locale_messages = mfree(arg_locale_messages);
+ }
+
+ return 0;
+}
+
+static int process_locale(void) {
+ const char *etc_localeconf;
+ char* locales[3];
+ unsigned i = 0;
+ int r;
+
+ etc_localeconf = prefix_roota(arg_root, "/etc/locale.conf");
+ if (laccess(etc_localeconf, F_OK) >= 0 && !arg_force)
+ return 0;
+
+ if (arg_copy_locale && arg_root) {
+
+ (void) mkdir_parents(etc_localeconf, 0755);
+ r = copy_file("/etc/locale.conf", etc_localeconf, 0, 0644, 0, 0, COPY_REFLINK);
+ if (r != -ENOENT) {
+ if (r < 0)
+ return log_error_errno(r, "Failed to copy %s: %m", etc_localeconf);
+
+ log_info("%s copied.", etc_localeconf);
+ return 0;
+ }
+ }
+
+ r = prompt_locale();
+ if (r < 0)
+ return r;
+
+ if (!isempty(arg_locale))
+ locales[i++] = strjoina("LANG=", arg_locale);
+ if (!isempty(arg_locale_messages) && !streq(arg_locale_messages, arg_locale))
+ locales[i++] = strjoina("LC_MESSAGES=", arg_locale_messages);
+
+ if (i == 0)
+ return 0;
+
+ locales[i] = NULL;
+
+ (void) mkdir_parents(etc_localeconf, 0755);
+ r = write_env_file(etc_localeconf, locales);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write %s: %m", etc_localeconf);
+
+ log_info("%s written.", etc_localeconf);
+ return 0;
+}
+
+static int prompt_keymap(void) {
+ _cleanup_strv_free_ char **kmaps = NULL;
+ int r;
+
+ if (arg_keymap)
+ return 0;
+
+ if (!arg_prompt_keymap)
+ return 0;
+
+ r = get_keymaps(&kmaps);
+ if (r == -ENOENT) /* no keymaps installed */
+ return r;
+ if (r < 0)
+ return log_error_errno(r, "Failed to read keymaps: %m");
+
+ print_welcome();
+
+ return prompt_loop("Please enter system keymap name or number",
+ kmaps, 60, keymap_is_valid, &arg_keymap);
+}
+
+static int process_keymap(void) {
+ const char *etc_vconsoleconf;
+ char **keymap;
+ int r;
+
+ etc_vconsoleconf = prefix_roota(arg_root, "/etc/vconsole.conf");
+ if (laccess(etc_vconsoleconf, F_OK) >= 0 && !arg_force)
+ return 0;
+
+ if (arg_copy_keymap && arg_root) {
+
+ (void) mkdir_parents(etc_vconsoleconf, 0755);
+ r = copy_file("/etc/vconsole.conf", etc_vconsoleconf, 0, 0644, 0, 0, COPY_REFLINK);
+ if (r != -ENOENT) {
+ if (r < 0)
+ return log_error_errno(r, "Failed to copy %s: %m", etc_vconsoleconf);
+
+ log_info("%s copied.", etc_vconsoleconf);
+ return 0;
+ }
+ }
+
+ r = prompt_keymap();
+ if (r == -ENOENT)
+ return 0; /* don't fail if no keymaps are installed */
+ if (r < 0)
+ return r;
+
+ if (isempty(arg_keymap))
+ return 0;
+
+ keymap = STRV_MAKE(strjoina("KEYMAP=", arg_keymap));
+
+ r = mkdir_parents(etc_vconsoleconf, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create the parent directory of %s: %m", etc_vconsoleconf);
+
+ r = write_env_file(etc_vconsoleconf, keymap);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write %s: %m", etc_vconsoleconf);
+
+ log_info("%s written.", etc_vconsoleconf);
+ return 0;
+}
+
+static bool timezone_is_valid_log_error(const char *name) {
+ return timezone_is_valid(name, LOG_ERR);
+}
+
+static int prompt_timezone(void) {
+ _cleanup_strv_free_ char **zones = NULL;
+ int r;
+
+ if (arg_timezone)
+ return 0;
+
+ if (!arg_prompt_timezone)
+ return 0;
+
+ r = get_timezones(&zones);
+ if (r < 0)
+ return log_error_errno(r, "Cannot query timezone list: %m");
+
+ print_welcome();
+
+ r = prompt_loop("Please enter timezone name or number",
+ zones, 30, timezone_is_valid_log_error, &arg_timezone);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int process_timezone(void) {
+ const char *etc_localtime, *e;
+ int r;
+
+ etc_localtime = prefix_roota(arg_root, "/etc/localtime");
+ if (laccess(etc_localtime, F_OK) >= 0 && !arg_force)
+ return 0;
+
+ if (arg_copy_timezone && arg_root) {
+ _cleanup_free_ char *p = NULL;
+
+ r = readlink_malloc("/etc/localtime", &p);
+ if (r != -ENOENT) {
+ if (r < 0)
+ return log_error_errno(r, "Failed to read host timezone: %m");
+
+ (void) mkdir_parents(etc_localtime, 0755);
+ if (symlink(p, etc_localtime) < 0)
+ return log_error_errno(errno, "Failed to create %s symlink: %m", etc_localtime);
+
+ log_info("%s copied.", etc_localtime);
+ return 0;
+ }
+ }
+
+ r = prompt_timezone();
+ if (r < 0)
+ return r;
+
+ if (isempty(arg_timezone))
+ return 0;
+
+ e = strjoina("../usr/share/zoneinfo/", arg_timezone);
+
+ (void) mkdir_parents(etc_localtime, 0755);
+ if (symlink(e, etc_localtime) < 0)
+ return log_error_errno(errno, "Failed to create %s symlink: %m", etc_localtime);
+
+ log_info("%s written", etc_localtime);
+ return 0;
+}
+
+static int prompt_hostname(void) {
+ int r;
+
+ if (arg_hostname)
+ return 0;
+
+ if (!arg_prompt_hostname)
+ return 0;
+
+ print_welcome();
+ putchar('\n');
+
+ for (;;) {
+ _cleanup_free_ char *h = NULL;
+
+ r = ask_string(&h, "%s Please enter hostname for new system (empty to skip): ", special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET));
+ if (r < 0)
+ return log_error_errno(r, "Failed to query hostname: %m");
+
+ if (isempty(h)) {
+ log_warning("No hostname entered, skipping.");
+ break;
+ }
+
+ if (!hostname_is_valid(h, true)) {
+ log_error("Specified hostname invalid.");
+ continue;
+ }
+
+ /* Get rid of the trailing dot that we allow, but don't want to see */
+ arg_hostname = hostname_cleanup(h);
+ h = NULL;
+ break;
+ }
+
+ return 0;
+}
+
+static int process_hostname(void) {
+ const char *etc_hostname;
+ int r;
+
+ etc_hostname = prefix_roota(arg_root, "/etc/hostname");
+ if (laccess(etc_hostname, F_OK) >= 0 && !arg_force)
+ return 0;
+
+ r = prompt_hostname();
+ if (r < 0)
+ return r;
+
+ if (isempty(arg_hostname))
+ return 0;
+
+ r = write_string_file(etc_hostname, arg_hostname,
+ WRITE_STRING_FILE_CREATE | WRITE_STRING_FILE_SYNC | WRITE_STRING_FILE_MKDIR_0755 |
+ (arg_force ? WRITE_STRING_FILE_ATOMIC : 0));
+ if (r < 0)
+ return log_error_errno(r, "Failed to write %s: %m", etc_hostname);
+
+ log_info("%s written.", etc_hostname);
+ return 0;
+}
+
+static int process_machine_id(void) {
+ const char *etc_machine_id;
+ char id[SD_ID128_STRING_MAX];
+ int r;
+
+ etc_machine_id = prefix_roota(arg_root, "/etc/machine-id");
+ if (laccess(etc_machine_id, F_OK) >= 0 && !arg_force)
+ return 0;
+
+ if (sd_id128_is_null(arg_machine_id))
+ return 0;
+
+ r = write_string_file(etc_machine_id, sd_id128_to_string(arg_machine_id, id),
+ WRITE_STRING_FILE_CREATE | WRITE_STRING_FILE_SYNC | WRITE_STRING_FILE_MKDIR_0755 |
+ (arg_force ? WRITE_STRING_FILE_ATOMIC : 0));
+ if (r < 0)
+ return log_error_errno(r, "Failed to write machine id: %m");
+
+ log_info("%s written.", etc_machine_id);
+ return 0;
+}
+
+static int prompt_root_password(void) {
+ const char *msg1, *msg2;
+ int r;
+
+ if (arg_root_password)
+ return 0;
+
+ if (!arg_prompt_root_password)
+ return 0;
+
+ print_welcome();
+ putchar('\n');
+
+ msg1 = strjoina(special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET), " Please enter a new root password (empty to skip):");
+ msg2 = strjoina(special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET), " Please enter new root password again:");
+
+ suggest_passwords();
+
+ for (;;) {
+ _cleanup_strv_free_erase_ char **a = NULL, **b = NULL;
+ _cleanup_free_ char *error = NULL;
+
+ r = ask_password_tty(-1, msg1, NULL, 0, 0, NULL, &a);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query root password: %m");
+ if (strv_length(a) != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Received multiple passwords, where we expected one.");
+
+ if (isempty(*a)) {
+ log_warning("No password entered, skipping.");
+ break;
+ }
+
+ r = quality_check_password(*a, "root", &error);
+ if (r < 0)
+ return log_error_errno(r, "Failed to check quality of password: %m");
+ if (r == 0)
+ log_warning("Password is weak, accepting anyway: %s", error);
+
+ r = ask_password_tty(-1, msg2, NULL, 0, 0, NULL, &b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query root password: %m");
+ if (strv_length(b) != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Received multiple passwords, where we expected one.");
+
+ if (!streq(*a, *b)) {
+ log_error("Entered passwords did not match, please try again.");
+ continue;
+ }
+
+ arg_root_password = TAKE_PTR(*a);
+ break;
+ }
+
+ return 0;
+}
+
+static int find_shell(const char *path, const char *root) {
+ int r;
+
+ assert(path);
+
+ if (!valid_shell(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "%s is not a valid shell", path);
+
+ r = chase_symlinks(path, root, CHASE_PREFIX_ROOT, NULL, NULL);
+ if (r < 0) {
+ const char *p;
+ p = prefix_roota(root, path);
+ return log_error_errno(r, "Failed to resolve shell %s: %m", p);
+ }
+
+ return 0;
+}
+
+static int prompt_root_shell(void) {
+ int r;
+
+ if (arg_root_shell || !arg_prompt_root_shell)
+ return 0;
+
+ print_welcome();
+ putchar('\n');
+
+ for (;;) {
+ _cleanup_free_ char *s = NULL;
+
+ r = ask_string(&s, "%s Please enter root shell for new system (empty to skip): ", special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET));
+ if (r < 0)
+ return log_error_errno(r, "Failed to query root shell: %m");
+
+ if (isempty(s)) {
+ log_warning("No shell entered, skipping.");
+ break;
+ }
+
+ r = find_shell(s, arg_root);
+ if (r < 0)
+ continue;
+
+ arg_root_shell = TAKE_PTR(s);
+ break;
+ }
+
+ return 0;
+}
+
+static int write_root_passwd(const char *passwd_path, const char *password, const char *shell) {
+ _cleanup_fclose_ FILE *original = NULL, *passwd = NULL;
+ _cleanup_(unlink_and_freep) char *passwd_tmp = NULL;
+ int r;
+
+ assert(password);
+
+ r = fopen_temporary_label("/etc/passwd", passwd_path, &passwd, &passwd_tmp);
+ if (r < 0)
+ return r;
+
+ original = fopen(passwd_path, "re");
+ if (original) {
+ struct passwd *i;
+
+ r = sync_rights(fileno(original), fileno(passwd));
+ if (r < 0)
+ return r;
+
+ while ((r = fgetpwent_sane(original, &i)) > 0) {
+
+ if (streq(i->pw_name, "root")) {
+ i->pw_passwd = (char *) password;
+ if (shell)
+ i->pw_shell = (char *) shell;
+ }
+
+ r = putpwent_sane(i, passwd);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ } else {
+ struct passwd root = {
+ .pw_name = (char *) "root",
+ .pw_passwd = (char *) password,
+ .pw_uid = 0,
+ .pw_gid = 0,
+ .pw_gecos = (char *) "Super User",
+ .pw_dir = (char *) "/root",
+ .pw_shell = (char *) (shell ?: "/bin/sh"),
+ };
+
+ if (errno != ENOENT)
+ return -errno;
+
+ r = fchmod(fileno(passwd), 0644);
+ if (r < 0)
+ return -errno;
+
+ r = putpwent_sane(&root, passwd);
+ if (r < 0)
+ return r;
+ }
+
+ r = fflush_sync_and_check(passwd);
+ if (r < 0)
+ return r;
+
+ r = rename_and_apply_smack_floor_label(passwd_tmp, passwd_path);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int write_root_shadow(const char *shadow_path, const char *hashed_password) {
+ _cleanup_fclose_ FILE *original = NULL, *shadow = NULL;
+ _cleanup_(unlink_and_freep) char *shadow_tmp = NULL;
+ int r;
+
+ assert(hashed_password);
+
+ r = fopen_temporary_label("/etc/shadow", shadow_path, &shadow, &shadow_tmp);
+ if (r < 0)
+ return r;
+
+ original = fopen(shadow_path, "re");
+ if (original) {
+ struct spwd *i;
+
+ r = sync_rights(fileno(original), fileno(shadow));
+ if (r < 0)
+ return r;
+
+ while ((r = fgetspent_sane(original, &i)) > 0) {
+
+ if (streq(i->sp_namp, "root")) {
+ i->sp_pwdp = (char *) hashed_password;
+ i->sp_lstchg = (long) (now(CLOCK_REALTIME) / USEC_PER_DAY);
+ }
+
+ r = putspent_sane(i, shadow);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ } else {
+ struct spwd root = {
+ .sp_namp = (char*) "root",
+ .sp_pwdp = (char *) hashed_password,
+ .sp_lstchg = (long) (now(CLOCK_REALTIME) / USEC_PER_DAY),
+ .sp_min = -1,
+ .sp_max = -1,
+ .sp_warn = -1,
+ .sp_inact = -1,
+ .sp_expire = -1,
+ .sp_flag = (unsigned long) -1, /* this appears to be what everybody does ... */
+ };
+
+ if (errno != ENOENT)
+ return -errno;
+
+ r = fchmod(fileno(shadow), 0000);
+ if (r < 0)
+ return -errno;
+
+ r = putspent_sane(&root, shadow);
+ if (r < 0)
+ return r;
+ }
+
+ r = fflush_sync_and_check(shadow);
+ if (r < 0)
+ return r;
+
+ r = rename_and_apply_smack_floor_label(shadow_tmp, shadow_path);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int process_root_args(void) {
+ _cleanup_close_ int lock = -1;
+ _cleanup_(erase_and_freep) char *_hashed_password = NULL;
+ const char *password, *hashed_password;
+ const char *etc_passwd, *etc_shadow;
+ int r;
+
+ etc_passwd = prefix_roota(arg_root, "/etc/passwd");
+ etc_shadow = prefix_roota(arg_root, "/etc/shadow");
+
+ /* We only mess with passwd and shadow if both do not exist or --force is specified. These files are
+ * tightly coupled and hence we make sure we have permission from the user to create/modify both
+ * files. */
+ if ((laccess(etc_passwd, F_OK) >= 0 || laccess(etc_shadow, F_OK) >= 0) && !arg_force)
+ return 0;
+ /* Don't create/modify passwd and shadow if not asked */
+ if (!(arg_root_password || arg_prompt_root_password || arg_copy_root_password || arg_delete_root_password ||
+ arg_root_shell || arg_prompt_root_shell || arg_copy_root_shell))
+ return 0;
+
+ (void) mkdir_parents(etc_passwd, 0755);
+
+ lock = take_etc_passwd_lock(arg_root);
+ if (lock < 0)
+ return log_error_errno(lock, "Failed to take a lock on %s: %m", etc_passwd);
+
+ if (arg_copy_root_shell && arg_root) {
+ struct passwd *p;
+
+ errno = 0;
+ p = getpwnam("root");
+ if (!p)
+ return log_error_errno(errno_or_else(EIO), "Failed to find passwd entry for root: %m");
+
+ r = free_and_strdup(&arg_root_shell, p->pw_shell);
+ if (r < 0)
+ return log_oom();
+ }
+
+ r = prompt_root_shell();
+ if (r < 0)
+ return r;
+
+ if (arg_copy_root_password && arg_root) {
+ struct spwd *p;
+
+ errno = 0;
+ p = getspnam("root");
+ if (!p)
+ return log_error_errno(errno_or_else(EIO), "Failed to find shadow entry for root: %m");
+
+ r = free_and_strdup(&arg_root_password, p->sp_pwdp);
+ if (r < 0)
+ return log_oom();
+
+ arg_root_password_is_hashed = true;
+ }
+
+ r = prompt_root_password();
+ if (r < 0)
+ return r;
+
+ if (arg_root_password && arg_root_password_is_hashed) {
+ password = "x";
+ hashed_password = arg_root_password;
+ } else if (arg_root_password) {
+ r = hash_password(arg_root_password, &_hashed_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to hash password: %m");
+
+ password = "x";
+ hashed_password = _hashed_password;
+
+ } else if (arg_delete_root_password)
+ password = hashed_password = "";
+ else
+ password = hashed_password = "!";
+
+ r = write_root_passwd(etc_passwd, password, arg_root_shell);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write %s: %m", etc_passwd);
+
+ log_info("%s written", etc_passwd);
+
+ r = write_root_shadow(etc_shadow, hashed_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write %s: %m", etc_shadow);
+
+ log_info("%s written.", etc_shadow);
+ return 0;
+}
+
+static int process_kernel_cmdline(void) {
+ const char *etc_kernel_cmdline;
+ int r;
+
+ etc_kernel_cmdline = prefix_roota(arg_root, "/etc/kernel/cmdline");
+ if (laccess(etc_kernel_cmdline, F_OK) >= 0 && !arg_force)
+ return 0;
+
+ if (!arg_kernel_cmdline)
+ return 0;
+
+ r = write_string_file(etc_kernel_cmdline, arg_kernel_cmdline,
+ WRITE_STRING_FILE_CREATE | WRITE_STRING_FILE_SYNC | WRITE_STRING_FILE_MKDIR_0755 |
+ (arg_force ? WRITE_STRING_FILE_ATOMIC : 0));
+ if (r < 0)
+ return log_error_errno(r, "Failed to write %s: %m", etc_kernel_cmdline);
+
+ log_info("%s written.", etc_kernel_cmdline);
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-firstboot", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "Configures basic settings of the system.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --root=PATH Operate on an alternate filesystem root\n"
+ " --image=PATH Operate on an alternate filesystem image\n"
+ " --locale=LOCALE Set primary locale (LANG=)\n"
+ " --locale-messages=LOCALE Set message locale (LC_MESSAGES=)\n"
+ " --keymap=KEYMAP Set keymap\n"
+ " --timezone=TIMEZONE Set timezone\n"
+ " --hostname=NAME Set hostname\n"
+ " --machine-ID=ID Set machine ID\n"
+ " --root-password=PASSWORD Set root password from plaintext password\n"
+ " --root-password-file=FILE Set root password from file\n"
+ " --root-password-hashed=HASHED_PASSWORD Set root password from hashed password\n"
+ " --root-shell=SHELL Set root shell\n"
+ " --prompt-locale Prompt the user for locale settings\n"
+ " --prompt-keymap Prompt the user for keymap settings\n"
+ " --prompt-timezone Prompt the user for timezone\n"
+ " --prompt-hostname Prompt the user for hostname\n"
+ " --prompt-root-password Prompt the user for root password\n"
+ " --prompt-root-shell Prompt the user for root shell\n"
+ " --prompt Prompt for all of the above\n"
+ " --copy-locale Copy locale from host\n"
+ " --copy-keymap Copy keymap from host\n"
+ " --copy-timezone Copy timezone from host\n"
+ " --copy-root-password Copy root password from host\n"
+ " --copy-root-shell Copy root shell from host\n"
+ " --copy Copy locale, keymap, timezone, root password\n"
+ " --setup-machine-id Generate a new random machine ID\n"
+ " --force Overwrite existing files\n"
+ " --delete-root-password Delete root password\n"
+ " --welcome=no Disable the welcome text\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_ROOT,
+ ARG_IMAGE,
+ ARG_LOCALE,
+ ARG_LOCALE_MESSAGES,
+ ARG_KEYMAP,
+ ARG_TIMEZONE,
+ ARG_HOSTNAME,
+ ARG_MACHINE_ID,
+ ARG_ROOT_PASSWORD,
+ ARG_ROOT_PASSWORD_FILE,
+ ARG_ROOT_PASSWORD_HASHED,
+ ARG_ROOT_SHELL,
+ ARG_KERNEL_COMMAND_LINE,
+ ARG_PROMPT,
+ ARG_PROMPT_LOCALE,
+ ARG_PROMPT_KEYMAP,
+ ARG_PROMPT_TIMEZONE,
+ ARG_PROMPT_HOSTNAME,
+ ARG_PROMPT_ROOT_PASSWORD,
+ ARG_PROMPT_ROOT_SHELL,
+ ARG_COPY,
+ ARG_COPY_LOCALE,
+ ARG_COPY_KEYMAP,
+ ARG_COPY_TIMEZONE,
+ ARG_COPY_ROOT_PASSWORD,
+ ARG_COPY_ROOT_SHELL,
+ ARG_SETUP_MACHINE_ID,
+ ARG_FORCE,
+ ARG_DELETE_ROOT_PASSWORD,
+ ARG_WELCOME,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "root", required_argument, NULL, ARG_ROOT },
+ { "image", required_argument, NULL, ARG_IMAGE },
+ { "locale", required_argument, NULL, ARG_LOCALE },
+ { "locale-messages", required_argument, NULL, ARG_LOCALE_MESSAGES },
+ { "keymap", required_argument, NULL, ARG_KEYMAP },
+ { "timezone", required_argument, NULL, ARG_TIMEZONE },
+ { "hostname", required_argument, NULL, ARG_HOSTNAME },
+ { "machine-id", required_argument, NULL, ARG_MACHINE_ID },
+ { "root-password", required_argument, NULL, ARG_ROOT_PASSWORD },
+ { "root-password-file", required_argument, NULL, ARG_ROOT_PASSWORD_FILE },
+ { "root-password-hashed", required_argument, NULL, ARG_ROOT_PASSWORD_HASHED },
+ { "root-shell", required_argument, NULL, ARG_ROOT_SHELL },
+ { "kernel-command-line", required_argument, NULL, ARG_KERNEL_COMMAND_LINE },
+ { "prompt", no_argument, NULL, ARG_PROMPT },
+ { "prompt-locale", no_argument, NULL, ARG_PROMPT_LOCALE },
+ { "prompt-keymap", no_argument, NULL, ARG_PROMPT_KEYMAP },
+ { "prompt-timezone", no_argument, NULL, ARG_PROMPT_TIMEZONE },
+ { "prompt-hostname", no_argument, NULL, ARG_PROMPT_HOSTNAME },
+ { "prompt-root-password", no_argument, NULL, ARG_PROMPT_ROOT_PASSWORD },
+ { "prompt-root-shell", no_argument, NULL, ARG_PROMPT_ROOT_SHELL },
+ { "copy", no_argument, NULL, ARG_COPY },
+ { "copy-locale", no_argument, NULL, ARG_COPY_LOCALE },
+ { "copy-keymap", no_argument, NULL, ARG_COPY_KEYMAP },
+ { "copy-timezone", no_argument, NULL, ARG_COPY_TIMEZONE },
+ { "copy-root-password", no_argument, NULL, ARG_COPY_ROOT_PASSWORD },
+ { "copy-root-shell", no_argument, NULL, ARG_COPY_ROOT_SHELL },
+ { "setup-machine-id", no_argument, NULL, ARG_SETUP_MACHINE_ID },
+ { "force", no_argument, NULL, ARG_FORCE },
+ { "delete-root-password", no_argument, NULL, ARG_DELETE_ROOT_PASSWORD },
+ { "welcome", required_argument, NULL, ARG_WELCOME },
+ {}
+ };
+
+ int r, c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_ROOT:
+ r = parse_path_argument_and_warn(optarg, true, &arg_root);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_IMAGE:
+ r = parse_path_argument_and_warn(optarg, false, &arg_image);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_LOCALE:
+ r = free_and_strdup(&arg_locale, optarg);
+ if (r < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_LOCALE_MESSAGES:
+ r = free_and_strdup(&arg_locale_messages, optarg);
+ if (r < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_KEYMAP:
+ if (!keymap_is_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Keymap %s is not valid.", optarg);
+
+ r = free_and_strdup(&arg_keymap, optarg);
+ if (r < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_TIMEZONE:
+ if (!timezone_is_valid(optarg, LOG_ERR))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Timezone %s is not valid.", optarg);
+
+ r = free_and_strdup(&arg_timezone, optarg);
+ if (r < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_ROOT_PASSWORD:
+ r = free_and_strdup(&arg_root_password, optarg);
+ if (r < 0)
+ return log_oom();
+
+ arg_root_password_is_hashed = false;
+ break;
+
+ case ARG_ROOT_PASSWORD_FILE:
+ arg_root_password = mfree(arg_root_password);
+
+ r = read_one_line_file(optarg, &arg_root_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read %s: %m", optarg);
+
+ arg_root_password_is_hashed = false;
+ break;
+
+ case ARG_ROOT_PASSWORD_HASHED:
+ r = free_and_strdup(&arg_root_password, optarg);
+ if (r < 0)
+ return log_oom();
+
+ arg_root_password_is_hashed = true;
+ break;
+
+ case ARG_ROOT_SHELL:
+ r = find_shell(optarg, arg_root);
+ if (r < 0)
+ return r;
+
+ r = free_and_strdup(&arg_root_shell, optarg);
+ if (r < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_HOSTNAME:
+ if (!hostname_is_valid(optarg, true))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Host name %s is not valid.", optarg);
+
+ hostname_cleanup(optarg);
+ r = free_and_strdup(&arg_hostname, optarg);
+ if (r < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_MACHINE_ID:
+ if (sd_id128_from_string(optarg, &arg_machine_id) < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse machine id %s.", optarg);
+
+ break;
+
+ case ARG_KERNEL_COMMAND_LINE:
+ r = free_and_strdup(&arg_kernel_cmdline, optarg);
+ if (r < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_PROMPT:
+ arg_prompt_locale = arg_prompt_keymap = arg_prompt_timezone = arg_prompt_hostname =
+ arg_prompt_root_password = arg_prompt_root_shell = true;
+ break;
+
+ case ARG_PROMPT_LOCALE:
+ arg_prompt_locale = true;
+ break;
+
+ case ARG_PROMPT_KEYMAP:
+ arg_prompt_keymap = true;
+ break;
+
+ case ARG_PROMPT_TIMEZONE:
+ arg_prompt_timezone = true;
+ break;
+
+ case ARG_PROMPT_HOSTNAME:
+ arg_prompt_hostname = true;
+ break;
+
+ case ARG_PROMPT_ROOT_PASSWORD:
+ arg_prompt_root_password = true;
+ break;
+
+ case ARG_PROMPT_ROOT_SHELL:
+ arg_prompt_root_shell = true;
+ break;
+
+ case ARG_COPY:
+ arg_copy_locale = arg_copy_keymap = arg_copy_timezone = arg_copy_root_password =
+ arg_copy_root_shell = true;
+ break;
+
+ case ARG_COPY_LOCALE:
+ arg_copy_locale = true;
+ break;
+
+ case ARG_COPY_KEYMAP:
+ arg_copy_keymap = true;
+ break;
+
+ case ARG_COPY_TIMEZONE:
+ arg_copy_timezone = true;
+ break;
+
+ case ARG_COPY_ROOT_PASSWORD:
+ arg_copy_root_password = true;
+ break;
+
+ case ARG_COPY_ROOT_SHELL:
+ arg_copy_root_shell = true;
+ break;
+
+ case ARG_SETUP_MACHINE_ID:
+ r = sd_id128_randomize(&arg_machine_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate randomized machine ID: %m");
+
+ break;
+
+ case ARG_FORCE:
+ arg_force = true;
+ break;
+
+ case ARG_DELETE_ROOT_PASSWORD:
+ arg_delete_root_password = true;
+ break;
+
+ case ARG_WELCOME:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --welcome= argument: %s", optarg);
+
+ arg_welcome = r;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ /* We check if the specified locale strings are valid down here, so that we can take --root= into
+ * account when looking for the locale files. */
+
+ if (arg_locale && !locale_is_ok(arg_locale))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Locale %s is not installed.", arg_locale);
+ if (arg_locale_messages && !locale_is_ok(arg_locale_messages))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Locale %s is not installed.", arg_locale_messages);
+
+ if (arg_delete_root_password && (arg_copy_root_password || arg_root_password || arg_prompt_root_password))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--delete-root-password cannot be combined with other root password options");
+
+ if (arg_image && arg_root)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Please specify either --root= or --image=, the combination of both is not supported.");
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
+ _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL;
+ _cleanup_(umount_and_rmdir_and_freep) char *unlink_dir = NULL;
+ int r;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ log_setup_service();
+
+ umask(0022);
+
+ if (!arg_root && !arg_image) {
+ bool enabled;
+
+ /* If we are called without --root=/--image= let's honour the systemd.firstboot kernel
+ * command line option, because we are called to provision the host with basic settings (as
+ * opposed to some other file system tree/image) */
+
+ r = proc_cmdline_get_bool("systemd.firstboot", &enabled);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse systemd.firstboot= kernel command line argument, ignoring: %m");
+ if (r > 0 && !enabled)
+ return 0; /* disabled */
+ }
+
+ if (arg_image) {
+ assert(!arg_root);
+
+ r = mount_image_privately_interactively(
+ arg_image,
+ DISSECT_IMAGE_REQUIRE_ROOT|DISSECT_IMAGE_VALIDATE_OS|DISSECT_IMAGE_RELAX_VAR_CHECK|DISSECT_IMAGE_FSCK,
+ &unlink_dir,
+ &loop_device,
+ &decrypted_image);
+ if (r < 0)
+ return r;
+
+ arg_root = strdup(unlink_dir);
+ if (!arg_root)
+ return log_oom();
+ }
+
+ r = process_locale();
+ if (r < 0)
+ return r;
+
+ r = process_keymap();
+ if (r < 0)
+ return r;
+
+ r = process_timezone();
+ if (r < 0)
+ return r;
+
+ r = process_hostname();
+ if (r < 0)
+ return r;
+
+ r = process_machine_id();
+ if (r < 0)
+ return r;
+
+ r = process_root_args();
+ if (r < 0)
+ return r;
+
+ r = process_kernel_cmdline();
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/fsck/fsck.c b/src/fsck/fsck.c
new file mode 100644
index 0000000..04752fe
--- /dev/null
+++ b/src/fsck/fsck.c
@@ -0,0 +1,429 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2014 Holger Hans Peter Freyther
+***/
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <sys/file.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "device-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "fsck-util.h"
+#include "main-func.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "special.h"
+#include "stdio-util.h"
+#include "util.h"
+
+static bool arg_skip = false;
+static bool arg_force = false;
+static bool arg_show_progress = false;
+static const char *arg_repair = "-a";
+
+static void start_target(const char *target, const char *mode) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(target);
+
+ r = bus_connect_system_systemd(&bus);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get D-Bus connection: %m");
+ return;
+ }
+
+ log_info("Running request %s/start/replace", target);
+
+ /* Start these units only if we can replace base.target with it */
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartUnitReplace",
+ &error,
+ NULL,
+ "sss", "basic.target", target, mode);
+
+ /* Don't print a warning if we aren't called during startup */
+ if (r < 0 && !sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_JOB))
+ log_error("Failed to start unit: %s", bus_error_message(&error, r));
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ int r;
+
+ assert(key);
+
+ if (streq(key, "fsck.mode")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (streq(value, "auto"))
+ arg_force = arg_skip = false;
+ else if (streq(value, "force"))
+ arg_force = true;
+ else if (streq(value, "skip"))
+ arg_skip = true;
+ else
+ log_warning("Invalid fsck.mode= parameter '%s'. Ignoring.", value);
+
+ } else if (streq(key, "fsck.repair")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (streq(value, "preen"))
+ arg_repair = "-a";
+ else {
+ r = parse_boolean(value);
+ if (r > 0)
+ arg_repair = "-y";
+ else if (r == 0)
+ arg_repair = "-n";
+ else
+ log_warning("Invalid fsck.repair= parameter '%s'. Ignoring.", value);
+ }
+ }
+
+#if HAVE_SYSV_COMPAT
+ else if (streq(key, "fastboot") && !value) {
+ log_warning("Please pass 'fsck.mode=skip' rather than 'fastboot' on the kernel command line.");
+ arg_skip = true;
+
+ } else if (streq(key, "forcefsck") && !value) {
+ log_warning("Please pass 'fsck.mode=force' rather than 'forcefsck' on the kernel command line.");
+ arg_force = true;
+ }
+#endif
+
+ return 0;
+}
+
+static void test_files(void) {
+
+#if HAVE_SYSV_COMPAT
+ if (access("/fastboot", F_OK) >= 0) {
+ log_error("Please pass 'fsck.mode=skip' on the kernel command line rather than creating /fastboot on the root file system.");
+ arg_skip = true;
+ }
+
+ if (access("/forcefsck", F_OK) >= 0) {
+ log_error("Please pass 'fsck.mode=force' on the kernel command line rather than creating /forcefsck on the root file system.");
+ arg_force = true;
+ }
+#endif
+
+ arg_show_progress = access("/run/systemd/show-status", F_OK) >= 0;
+}
+
+static double percent(int pass, unsigned long cur, unsigned long max) {
+ /* Values stolen from e2fsck */
+
+ static const int pass_table[] = {
+ 0, 70, 90, 92, 95, 100
+ };
+
+ if (pass <= 0)
+ return 0.0;
+
+ if ((unsigned) pass >= ELEMENTSOF(pass_table) || max == 0)
+ return 100.0;
+
+ return (double) pass_table[pass-1] +
+ ((double) pass_table[pass] - (double) pass_table[pass-1]) *
+ (double) cur / (double) max;
+}
+
+static int process_progress(int fd, FILE* console) {
+ _cleanup_fclose_ FILE *f = NULL;
+ usec_t last = 0;
+ bool locked = false;
+ int clear = 0, r;
+
+ /* No progress pipe to process? Then we are a NOP. */
+ if (fd < 0)
+ return 0;
+
+ f = fdopen(fd, "r");
+ if (!f) {
+ safe_close(fd);
+ return log_debug_errno(errno, "Failed to use pipe: %m");
+ }
+
+ for (;;) {
+ int pass, m;
+ unsigned long cur, max;
+ _cleanup_free_ char *device = NULL;
+ double p;
+ usec_t t;
+
+ if (fscanf(f, "%i %lu %lu %ms", &pass, &cur, &max, &device) != 4) {
+
+ if (ferror(f))
+ r = log_warning_errno(errno, "Failed to read from progress pipe: %m");
+ else if (feof(f))
+ r = 0;
+ else
+ r = log_warning_errno(SYNTHETIC_ERRNO(errno), "Failed to parse progress pipe data");
+
+ break;
+ }
+
+ /* Only show one progress counter at max */
+ if (!locked) {
+ if (flock(fileno(console), LOCK_EX|LOCK_NB) < 0)
+ continue;
+
+ locked = true;
+ }
+
+ /* Only update once every 50ms */
+ t = now(CLOCK_MONOTONIC);
+ if (last + 50 * USEC_PER_MSEC > t)
+ continue;
+
+ last = t;
+
+ p = percent(pass, cur, max);
+ fprintf(console, "\r%s: fsck %3.1f%% complete...\r%n", device, p, &m);
+ fflush(console);
+
+ if (m > clear)
+ clear = m;
+ }
+
+ if (clear > 0) {
+ unsigned j;
+
+ fputc('\r', console);
+ for (j = 0; j < (unsigned) clear; j++)
+ fputc(' ', console);
+ fputc('\r', console);
+ fflush(console);
+ }
+
+ return r;
+}
+
+static int fsck_progress_socket(void) {
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/fsck.progress",
+ };
+
+ _cleanup_close_ int fd = -1;
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0)
+ return log_warning_errno(errno, "socket(): %m");
+
+ if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0)
+ return log_full_errno(IN_SET(errno, ECONNREFUSED, ENOENT) ? LOG_DEBUG : LOG_WARNING,
+ errno, "Failed to connect to progress socket %s, ignoring: %m", sa.un.sun_path);
+
+ return TAKE_FD(fd);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_close_pair_ int progress_pipe[2] = { -1, -1 };
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ _cleanup_free_ char *dpath = NULL;
+ _cleanup_fclose_ FILE *console = NULL;
+ const char *device, *type;
+ bool root_directory;
+ struct stat st;
+ int r, exit_status;
+ pid_t pid;
+
+ log_setup_service();
+
+ if (argc > 2)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program expects one or no arguments.");
+
+ umask(0022);
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_STRIP_RD_PREFIX);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ test_files();
+
+ if (!arg_force && arg_skip)
+ return 0;
+
+ if (argc > 1) {
+ dpath = strdup(argv[1]);
+ if (!dpath)
+ return log_oom();
+
+ device = dpath;
+
+ if (stat(device, &st) < 0)
+ return log_error_errno(errno, "Failed to stat %s: %m", device);
+
+ if (!S_ISBLK(st.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s is not a block device.",
+ device);
+
+ r = sd_device_new_from_devnum(&dev, 'b', st.st_rdev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to detect device %s: %m", device);
+
+ root_directory = false;
+ } else {
+ struct timespec times[2];
+
+ /* Find root device */
+
+ if (stat("/", &st) < 0)
+ return log_error_errno(errno, "Failed to stat() the root directory: %m");
+
+ /* Virtual root devices don't need an fsck */
+ if (major(st.st_dev) == 0) {
+ log_debug("Root directory is virtual or btrfs, skipping check.");
+ return 0;
+ }
+
+ /* check if we are already writable */
+ times[0] = st.st_atim;
+ times[1] = st.st_mtim;
+
+ if (utimensat(AT_FDCWD, "/", times, 0) == 0) {
+ log_info("Root directory is writable, skipping check.");
+ return 0;
+ }
+
+ r = sd_device_new_from_devnum(&dev, 'b', st.st_dev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to detect root device: %m");
+
+ r = sd_device_get_devname(dev, &device);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to detect device node of root directory: %m");
+
+ root_directory = true;
+ }
+
+ if (sd_device_get_property_value(dev, "ID_FS_TYPE", &type) >= 0) {
+ r = fsck_exists(type);
+ if (r < 0)
+ log_device_warning_errno(dev, r, "Couldn't detect if fsck.%s may be used, proceeding: %m", type);
+ else if (r == 0) {
+ log_device_info(dev, "fsck.%s doesn't exist, not checking file system.", type);
+ return 0;
+ }
+ }
+
+ console = fopen("/dev/console", "we");
+ if (console &&
+ arg_show_progress &&
+ pipe(progress_pipe) < 0)
+ return log_error_errno(errno, "pipe(): %m");
+
+ r = safe_fork("(fsck)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ char dash_c[STRLEN("-C") + DECIMAL_STR_MAX(int) + 1];
+ int progress_socket = -1;
+ const char *cmdline[9];
+ int i = 0;
+
+ /* Child */
+
+ /* Close the reading side of the progress pipe */
+ progress_pipe[0] = safe_close(progress_pipe[0]);
+
+ /* Try to connect to a progress management daemon, if there is one */
+ progress_socket = fsck_progress_socket();
+ if (progress_socket >= 0) {
+ /* If this worked we close the progress pipe early, and just use the socket */
+ progress_pipe[1] = safe_close(progress_pipe[1]);
+ xsprintf(dash_c, "-C%i", progress_socket);
+ } else if (progress_pipe[1] >= 0) {
+ /* Otherwise if we have the progress pipe to our own local handle, we use it */
+ xsprintf(dash_c, "-C%i", progress_pipe[1]);
+ } else
+ dash_c[0] = 0;
+
+ cmdline[i++] = "/sbin/fsck";
+ cmdline[i++] = arg_repair;
+ cmdline[i++] = "-T";
+
+ /*
+ * Since util-linux v2.25 fsck uses /run/fsck/<diskname>.lock files.
+ * The previous versions use flock for the device and conflict with
+ * udevd, see https://bugs.freedesktop.org/show_bug.cgi?id=79576#c5
+ */
+ cmdline[i++] = "-l";
+
+ if (!root_directory)
+ cmdline[i++] = "-M";
+
+ if (arg_force)
+ cmdline[i++] = "-f";
+
+ if (!isempty(dash_c))
+ cmdline[i++] = dash_c;
+
+ cmdline[i++] = device;
+ cmdline[i++] = NULL;
+
+ (void) rlimit_nofile_safe();
+
+ execv(cmdline[0], (char**) cmdline);
+ _exit(FSCK_OPERATIONAL_ERROR);
+ }
+
+ if (console) {
+ progress_pipe[1] = safe_close(progress_pipe[1]);
+ (void) process_progress(TAKE_FD(progress_pipe[0]), console);
+ }
+
+ exit_status = wait_for_terminate_and_check("fsck", pid, WAIT_LOG_ABNORMAL);
+ if (exit_status < 0)
+ return exit_status;
+ if ((exit_status & ~FSCK_ERROR_CORRECTED) != FSCK_SUCCESS) {
+ log_error("fsck failed with exit status %i.", exit_status);
+
+ if ((exit_status & FSCK_SYSTEM_SHOULD_REBOOT) && root_directory) {
+ /* System should be rebooted. */
+ start_target(SPECIAL_REBOOT_TARGET, "replace-irreversibly");
+ return -EINVAL;
+ } else if (exit_status & (FSCK_SYSTEM_SHOULD_REBOOT | FSCK_ERRORS_LEFT_UNCORRECTED))
+ /* Some other problem */
+ start_target(SPECIAL_EMERGENCY_TARGET, "replace");
+ else
+ log_warning("Ignoring error.");
+ }
+
+ if (exit_status & FSCK_ERROR_CORRECTED)
+ (void) touch("/run/systemd/quotacheck");
+
+ return !!(exit_status & (FSCK_SYSTEM_SHOULD_REBOOT | FSCK_ERRORS_LEFT_UNCORRECTED));
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/fstab-generator/fstab-generator.c b/src/fstab-generator/fstab-generator.c
new file mode 100644
index 0000000..15f5892
--- /dev/null
+++ b/src/fstab-generator/fstab-generator.c
@@ -0,0 +1,964 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "fstab-util.h"
+#include "generator.h"
+#include "log.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "mount-setup.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "special.h"
+#include "specifier.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "util.h"
+#include "virt.h"
+#include "volatile-util.h"
+
+typedef enum MountpointFlags {
+ NOAUTO = 1 << 0,
+ NOFAIL = 1 << 1,
+ AUTOMOUNT = 1 << 2,
+ MAKEFS = 1 << 3,
+ GROWFS = 1 << 4,
+ RWONLY = 1 << 5,
+} MountpointFlags;
+
+static const char *arg_dest = NULL;
+static const char *arg_dest_late = NULL;
+static bool arg_fstab_enabled = true;
+static bool arg_swap_enabled = true;
+static char *arg_root_what = NULL;
+static char *arg_root_fstype = NULL;
+static char *arg_root_options = NULL;
+static char *arg_root_hash = NULL;
+static int arg_root_rw = -1;
+static char *arg_usr_what = NULL;
+static char *arg_usr_fstype = NULL;
+static char *arg_usr_options = NULL;
+static VolatileMode arg_volatile_mode = _VOLATILE_MODE_INVALID;
+
+STATIC_DESTRUCTOR_REGISTER(arg_root_what, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_root_fstype, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_root_options, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_root_hash, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_usr_what, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_usr_fstype, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_usr_options, freep);
+
+static int write_options(FILE *f, const char *options) {
+ _cleanup_free_ char *o = NULL;
+
+ if (isempty(options))
+ return 0;
+
+ if (streq(options, "defaults"))
+ return 0;
+
+ o = specifier_escape(options);
+ if (!o)
+ return log_oom();
+
+ fprintf(f, "Options=%s\n", o);
+ return 1;
+}
+
+static int write_what(FILE *f, const char *what) {
+ _cleanup_free_ char *w = NULL;
+
+ w = specifier_escape(what);
+ if (!w)
+ return log_oom();
+
+ fprintf(f, "What=%s\n", w);
+ return 1;
+}
+
+static int add_swap(
+ const char *what,
+ struct mntent *me,
+ MountpointFlags flags) {
+
+ _cleanup_free_ char *name = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(what);
+ assert(me);
+
+ if (!arg_swap_enabled) {
+ log_info("Swap unit generation disabled on kernel command line, ignoring fstab swap entry for %s.", what);
+ return 0;
+ }
+
+ if (access("/proc/swaps", F_OK) < 0) {
+ log_info("Swap not supported, ignoring fstab swap entry for %s.", what);
+ return 0;
+ }
+
+ if (detect_container() > 0) {
+ log_info("Running in a container, ignoring fstab swap entry for %s.", what);
+ return 0;
+ }
+
+ r = unit_name_from_path(what, ".swap", &name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ r = generator_open_unit_file(arg_dest, fstab_path(), name, &f);
+ if (r < 0)
+ return r;
+
+ fprintf(f,
+ "[Unit]\n"
+ "Documentation=man:fstab(5) man:systemd-fstab-generator(8)\n"
+ "SourcePath=%s\n",
+ fstab_path());
+
+ r = generator_write_blockdev_dependency(f, what);
+ if (r < 0)
+ return r;
+
+ fprintf(f,
+ "\n"
+ "[Swap]\n");
+
+ r = write_what(f, what);
+ if (r < 0)
+ return r;
+
+ r = write_options(f, me->mnt_opts);
+ if (r < 0)
+ return r;
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", name);
+
+ /* use what as where, to have a nicer error message */
+ r = generator_write_timeouts(arg_dest, what, what, me->mnt_opts, NULL);
+ if (r < 0)
+ return r;
+
+ if (flags & MAKEFS) {
+ r = generator_hook_up_mkswap(arg_dest, what);
+ if (r < 0)
+ return r;
+ }
+
+ if (flags & GROWFS)
+ /* TODO: swap devices must be wiped and recreated */
+ log_warning("%s: growing swap devices is currently unsupported.", what);
+
+ if (!(flags & NOAUTO)) {
+ r = generator_add_symlink(arg_dest, SPECIAL_SWAP_TARGET,
+ (flags & NOFAIL) ? "wants" : "requires", name);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static bool mount_is_network(struct mntent *me) {
+ assert(me);
+
+ return fstab_test_option(me->mnt_opts, "_netdev\0") ||
+ fstype_is_network(me->mnt_type);
+}
+
+static bool mount_in_initrd(struct mntent *me) {
+ assert(me);
+
+ return fstab_test_option(me->mnt_opts, "x-initrd.mount\0") ||
+ streq(me->mnt_dir, "/usr");
+}
+
+static int write_timeout(
+ FILE *f,
+ const char *where,
+ const char *opts,
+ const char *filter,
+ const char *variable) {
+
+ _cleanup_free_ char *timeout = NULL;
+ char timespan[FORMAT_TIMESPAN_MAX];
+ usec_t u;
+ int r;
+
+ r = fstab_filter_options(opts, filter, NULL, &timeout, NULL);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse options: %m");
+ if (r == 0)
+ return 0;
+
+ r = parse_sec_fix_0(timeout, &u);
+ if (r < 0) {
+ log_warning("Failed to parse timeout for %s, ignoring: %s", where, timeout);
+ return 0;
+ }
+
+ fprintf(f, "%s=%s\n", variable, format_timespan(timespan, sizeof(timespan), u, 0));
+
+ return 0;
+}
+
+static int write_idle_timeout(FILE *f, const char *where, const char *opts) {
+ return write_timeout(f, where, opts,
+ "x-systemd.idle-timeout\0", "TimeoutIdleSec");
+}
+
+static int write_mount_timeout(FILE *f, const char *where, const char *opts) {
+ return write_timeout(f, where, opts,
+ "x-systemd.mount-timeout\0", "TimeoutSec");
+}
+
+static int write_dependency(
+ FILE *f,
+ const char *opts,
+ const char *filter,
+ const char *format) {
+
+ _cleanup_strv_free_ char **names = NULL, **units = NULL;
+ _cleanup_free_ char *res = NULL;
+ char **s;
+ int r;
+
+ assert(f);
+ assert(opts);
+
+ r = fstab_extract_values(opts, filter, &names);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse options: %m");
+ if (r == 0)
+ return 0;
+
+ STRV_FOREACH(s, names) {
+ char *x;
+
+ r = unit_name_mangle_with_suffix(*s, "as dependency", 0, ".mount", &x);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ r = strv_consume(&units, x);
+ if (r < 0)
+ return log_oom();
+ }
+
+ if (units) {
+ res = strv_join(units, " ");
+ if (!res)
+ return log_oom();
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ fprintf(f, format, res);
+ REENABLE_WARNING;
+ }
+
+ return 0;
+}
+
+static int write_after(FILE *f, const char *opts) {
+ return write_dependency(f, opts,
+ "x-systemd.after", "After=%1$s\n");
+}
+
+static int write_requires_after(FILE *f, const char *opts) {
+ return write_dependency(f, opts,
+ "x-systemd.requires", "After=%1$s\nRequires=%1$s\n");
+}
+
+static int write_before(FILE *f, const char *opts) {
+ return write_dependency(f, opts,
+ "x-systemd.before", "Before=%1$s\n");
+}
+
+static int write_requires_mounts_for(FILE *f, const char *opts) {
+ _cleanup_strv_free_ char **paths = NULL, **paths_escaped = NULL;
+ _cleanup_free_ char *res = NULL;
+ int r;
+
+ assert(f);
+ assert(opts);
+
+ r = fstab_extract_values(opts, "x-systemd.requires-mounts-for", &paths);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse options: %m");
+ if (r == 0)
+ return 0;
+
+ r = specifier_escape_strv(paths, &paths_escaped);
+ if (r < 0)
+ return log_error_errno(r, "Failed to escape paths: %m");
+
+ res = strv_join(paths_escaped, " ");
+ if (!res)
+ return log_oom();
+
+ fprintf(f, "RequiresMountsFor=%s\n", res);
+
+ return 0;
+}
+
+static int write_extra_dependencies(FILE *f, const char *opts) {
+ int r;
+
+ assert(f);
+
+ if (opts) {
+ r = write_after(f, opts);
+ if (r < 0)
+ return r;
+ r = write_requires_after(f, opts);
+ if (r < 0)
+ return r;
+ r = write_before(f, opts);
+ if (r < 0)
+ return r;
+ r = write_requires_mounts_for(f, opts);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int add_mount(
+ const char *dest,
+ const char *what,
+ const char *where,
+ const char *original_where,
+ const char *fstype,
+ const char *opts,
+ int passno,
+ MountpointFlags flags,
+ const char *post,
+ const char *source) {
+
+ _cleanup_free_ char
+ *name = NULL,
+ *automount_name = NULL,
+ *filtered = NULL,
+ *where_escaped = NULL;
+ _cleanup_strv_free_ char **wanted_by = NULL, **required_by = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(what);
+ assert(where);
+ assert(opts);
+ assert(post);
+ assert(source);
+
+ if (streq_ptr(fstype, "autofs"))
+ return 0;
+
+ if (!is_path(where)) {
+ log_warning("Mount point %s is not a valid path, ignoring.", where);
+ return 0;
+ }
+
+ if (mount_point_is_api(where) ||
+ mount_point_ignore(where))
+ return 0;
+
+ r = fstab_extract_values(opts, "x-systemd.wanted-by", &wanted_by);
+ if (r < 0)
+ return r;
+
+ r = fstab_extract_values(opts, "x-systemd.required-by", &required_by);
+ if (r < 0)
+ return r;
+
+ if (path_equal(where, "/")) {
+ if (flags & NOAUTO)
+ log_warning("Ignoring \"noauto\" for root device");
+ if (flags & NOFAIL)
+ log_warning("Ignoring \"nofail\" for root device");
+ if (flags & AUTOMOUNT)
+ log_warning("Ignoring automount option for root device");
+ if (!strv_isempty(wanted_by))
+ log_warning("Ignoring \"x-systemd.wanted-by=\" for root device");
+ if (!strv_isempty(required_by))
+ log_warning("Ignoring \"x-systemd.required-by=\" for root device");
+
+ required_by = strv_free(required_by);
+ wanted_by = strv_free(wanted_by);
+ SET_FLAG(flags, NOAUTO | NOFAIL | AUTOMOUNT, false);
+ }
+
+ r = unit_name_from_path(where, ".mount", &name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ r = generator_open_unit_file(dest, fstab_path(), name, &f);
+ if (r < 0)
+ return r;
+
+ fprintf(f,
+ "[Unit]\n"
+ "Documentation=man:fstab(5) man:systemd-fstab-generator(8)\n"
+ "SourcePath=%s\n",
+ source);
+
+ if (STRPTR_IN_SET(fstype, "nfs", "nfs4") && !(flags & AUTOMOUNT) &&
+ fstab_test_yes_no_option(opts, "bg\0" "fg\0")) {
+ /* The default retry timeout that mount.nfs uses for 'bg' mounts
+ * is 10000 minutes, where as it uses 2 minutes for 'fg' mounts.
+ * As we are making 'bg' mounts look like an 'fg' mount to
+ * mount.nfs (so systemd can manage the job-control aspects of 'bg'),
+ * we need to explicitly preserve that default, and also ensure
+ * the systemd mount-timeout doesn't interfere.
+ * By placing these options first, they can be over-ridden by
+ * settings in /etc/fstab. */
+ opts = strjoina("x-systemd.mount-timeout=infinity,retry=10000,nofail,", opts, ",fg");
+ SET_FLAG(flags, NOFAIL, true);
+ }
+
+ r = write_extra_dependencies(f, opts);
+ if (r < 0)
+ return r;
+
+ if (passno != 0) {
+ r = generator_write_fsck_deps(f, dest, what, where, fstype);
+ if (r < 0)
+ return r;
+ }
+
+ r = generator_write_blockdev_dependency(f, what);
+ if (r < 0)
+ return r;
+
+ fprintf(f,
+ "\n"
+ "[Mount]\n");
+
+ if (original_where)
+ fprintf(f, "# Canonicalized from %s\n", original_where);
+
+ where_escaped = specifier_escape(where);
+ if (!where_escaped)
+ return log_oom();
+ fprintf(f, "Where=%s\n", where_escaped);
+
+ r = write_what(f, what);
+ if (r < 0)
+ return r;
+
+ if (!isempty(fstype) && !streq(fstype, "auto")) {
+ _cleanup_free_ char *t;
+
+ t = specifier_escape(fstype);
+ if (!t)
+ return -ENOMEM;
+
+ fprintf(f, "Type=%s\n", t);
+ }
+
+ r = generator_write_timeouts(dest, what, where, opts, &filtered);
+ if (r < 0)
+ return r;
+
+ r = generator_write_device_deps(dest, what, where, opts);
+ if (r < 0)
+ return r;
+
+ r = write_mount_timeout(f, where, opts);
+ if (r < 0)
+ return r;
+
+ r = write_options(f, filtered);
+ if (r < 0)
+ return r;
+
+ if (flags & RWONLY)
+ fprintf(f, "ReadWriteOnly=yes\n");
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", name);
+
+ if (flags & MAKEFS) {
+ r = generator_hook_up_mkfs(dest, what, where, fstype);
+ if (r < 0)
+ return r;
+ }
+
+ if (flags & GROWFS) {
+ r = generator_hook_up_growfs(dest, where, post);
+ if (r < 0)
+ return r;
+ }
+
+ if (!FLAGS_SET(flags, AUTOMOUNT)) {
+ if (!FLAGS_SET(flags, NOAUTO) && strv_isempty(wanted_by) && strv_isempty(required_by)) {
+ r = generator_add_symlink(dest, post,
+ (flags & NOFAIL) ? "wants" : "requires", name);
+ if (r < 0)
+ return r;
+ } else {
+ char **s;
+
+ STRV_FOREACH(s, wanted_by) {
+ r = generator_add_symlink(dest, *s, "wants", name);
+ if (r < 0)
+ return r;
+ }
+
+ STRV_FOREACH(s, required_by) {
+ r = generator_add_symlink(dest, *s, "requires", name);
+ if (r < 0)
+ return r;
+ }
+ }
+ } else {
+ r = unit_name_from_path(where, ".automount", &automount_name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ f = safe_fclose(f);
+
+ r = generator_open_unit_file(dest, fstab_path(), automount_name, &f);
+ if (r < 0)
+ return r;
+
+ fprintf(f,
+ "[Unit]\n"
+ "SourcePath=%s\n"
+ "Documentation=man:fstab(5) man:systemd-fstab-generator(8)\n",
+ source);
+
+ fprintf(f,
+ "\n"
+ "[Automount]\n"
+ "Where=%s\n",
+ where_escaped);
+
+ r = write_idle_timeout(f, where, opts);
+ if (r < 0)
+ return r;
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", automount_name);
+
+ r = generator_add_symlink(dest, post,
+ (flags & NOFAIL) ? "wants" : "requires", automount_name);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int parse_fstab(bool initrd) {
+ _cleanup_endmntent_ FILE *f = NULL;
+ const char *fstab;
+ struct mntent *me;
+ int r = 0;
+
+ fstab = initrd ? "/sysroot/etc/fstab" : fstab_path();
+ log_debug("Parsing %s...", fstab);
+
+ f = setmntent(fstab, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open %s: %m", fstab);
+ }
+
+ while ((me = getmntent(f))) {
+ _cleanup_free_ char *where = NULL, *what = NULL, *canonical_where = NULL;
+ bool makefs, growfs, noauto, nofail, rwonly;
+ int k;
+
+ if (initrd && !mount_in_initrd(me))
+ continue;
+
+ what = fstab_node_to_udev_node(me->mnt_fsname);
+ if (!what)
+ return log_oom();
+
+ if (is_device_path(what) && path_is_read_only_fs("/sys") > 0) {
+ log_info("Running in a container, ignoring fstab device entry for %s.", what);
+ continue;
+ }
+
+ where = strdup(me->mnt_dir);
+ if (!where)
+ return log_oom();
+
+ if (is_path(where)) {
+ path_simplify(where, false);
+
+ /* Follow symlinks here; see 5261ba901845c084de5a8fd06500ed09bfb0bd80 which makes sense for
+ * mount units, but causes problems since it historically worked to have symlinks in e.g.
+ * /etc/fstab. So we canonicalize here. Note that we use CHASE_NONEXISTENT to handle the case
+ * where a symlink refers to another mount target; this works assuming the sub-mountpoint
+ * target is the final directory. */
+ r = chase_symlinks(where, initrd ? "/sysroot" : NULL,
+ CHASE_PREFIX_ROOT | CHASE_NONEXISTENT,
+ &canonical_where, NULL);
+ if (r < 0) /* If we can't canonicalize we continue on as if it wasn't a symlink */
+ log_debug_errno(r, "Failed to read symlink target for %s, ignoring: %m", where);
+ else if (streq(canonical_where, where)) /* If it was fully canonicalized, suppress the change */
+ canonical_where = mfree(canonical_where);
+ else
+ log_debug("Canonicalized what=%s where=%s to %s", what, where, canonical_where);
+ }
+
+ makefs = fstab_test_option(me->mnt_opts, "x-systemd.makefs\0");
+ growfs = fstab_test_option(me->mnt_opts, "x-systemd.growfs\0");
+ rwonly = fstab_test_option(me->mnt_opts, "x-systemd.rw-only\0");
+ noauto = fstab_test_yes_no_option(me->mnt_opts, "noauto\0" "auto\0");
+ nofail = fstab_test_yes_no_option(me->mnt_opts, "nofail\0" "fail\0");
+
+ log_debug("Found entry what=%s where=%s type=%s makefs=%s growfs=%s noauto=%s nofail=%s",
+ what, where, me->mnt_type,
+ yes_no(makefs), yes_no(growfs),
+ yes_no(noauto), yes_no(nofail));
+
+ if (streq(me->mnt_type, "swap"))
+ k = add_swap(what, me,
+ makefs*MAKEFS | growfs*GROWFS | noauto*NOAUTO | nofail*NOFAIL);
+ else {
+ bool automount;
+ const char *post;
+
+ automount = fstab_test_option(me->mnt_opts,
+ "comment=systemd.automount\0"
+ "x-systemd.automount\0");
+ if (initrd)
+ post = SPECIAL_INITRD_FS_TARGET;
+ else if (mount_is_network(me))
+ post = SPECIAL_REMOTE_FS_TARGET;
+ else
+ post = SPECIAL_LOCAL_FS_TARGET;
+
+ k = add_mount(arg_dest,
+ what,
+ canonical_where ?: where,
+ canonical_where ? where: NULL,
+ me->mnt_type,
+ me->mnt_opts,
+ me->mnt_passno,
+ makefs*MAKEFS | growfs*GROWFS | noauto*NOAUTO | nofail*NOFAIL | automount*AUTOMOUNT | rwonly*RWONLY,
+ post,
+ fstab);
+ }
+
+ if (r >= 0 && k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int add_sysroot_mount(void) {
+ _cleanup_free_ char *what = NULL;
+ const char *opts;
+ int r;
+
+ if (isempty(arg_root_what)) {
+ log_debug("Could not find a root= entry on the kernel command line.");
+ return 0;
+ }
+
+ if (streq(arg_root_what, "gpt-auto")) {
+ /* This is handled by the gpt-auto generator */
+ log_debug("Skipping root directory handling, as gpt-auto was requested.");
+ return 0;
+ }
+
+ if (path_equal(arg_root_what, "/dev/nfs")) {
+ /* This is handled by the kernel or the initrd */
+ log_debug("Skipping root directory handling, as /dev/nfs was requested.");
+ return 0;
+ }
+
+ what = fstab_node_to_udev_node(arg_root_what);
+ if (!what)
+ return log_oom();
+
+ if (!arg_root_options)
+ opts = arg_root_rw > 0 ? "rw" : "ro";
+ else if (arg_root_rw >= 0 ||
+ !fstab_test_option(arg_root_options, "ro\0" "rw\0"))
+ opts = strjoina(arg_root_options, ",", arg_root_rw > 0 ? "rw" : "ro");
+ else
+ opts = arg_root_options;
+
+ log_debug("Found entry what=%s where=/sysroot type=%s", what, strna(arg_root_fstype));
+
+ if (is_device_path(what)) {
+ r = generator_write_initrd_root_device_deps(arg_dest, what);
+ if (r < 0)
+ return r;
+ }
+
+ return add_mount(arg_dest,
+ what,
+ "/sysroot",
+ NULL,
+ arg_root_fstype,
+ opts,
+ is_device_path(what) ? 1 : 0, /* passno */
+ 0, /* makefs off, growfs off, noauto off, nofail off, automount off */
+ SPECIAL_INITRD_ROOT_FS_TARGET,
+ "/proc/cmdline");
+}
+
+static int add_sysroot_usr_mount(void) {
+ _cleanup_free_ char *what = NULL;
+ const char *opts;
+
+ if (!arg_usr_what && !arg_usr_fstype && !arg_usr_options)
+ return 0;
+
+ if (arg_root_what && !arg_usr_what) {
+ /* Copy over the root device, in case the /usr mount just differs in a mount option (consider btrfs subvolumes) */
+ arg_usr_what = strdup(arg_root_what);
+ if (!arg_usr_what)
+ return log_oom();
+ }
+
+ if (arg_root_fstype && !arg_usr_fstype) {
+ arg_usr_fstype = strdup(arg_root_fstype);
+ if (!arg_usr_fstype)
+ return log_oom();
+ }
+
+ if (arg_root_options && !arg_usr_options) {
+ arg_usr_options = strdup(arg_root_options);
+ if (!arg_usr_options)
+ return log_oom();
+ }
+
+ if (!arg_usr_what)
+ return 0;
+
+ what = fstab_node_to_udev_node(arg_usr_what);
+ if (!what)
+ return log_oom();
+
+ if (!arg_usr_options)
+ opts = arg_root_rw > 0 ? "rw" : "ro";
+ else if (!fstab_test_option(arg_usr_options, "ro\0" "rw\0"))
+ opts = strjoina(arg_usr_options, ",", arg_root_rw > 0 ? "rw" : "ro");
+ else
+ opts = arg_usr_options;
+
+ log_debug("Found entry what=%s where=/sysroot/usr type=%s", what, strna(arg_usr_fstype));
+ return add_mount(arg_dest,
+ what,
+ "/sysroot/usr",
+ NULL,
+ arg_usr_fstype,
+ opts,
+ is_device_path(what) ? 1 : 0, /* passno */
+ 0,
+ SPECIAL_INITRD_FS_TARGET,
+ "/proc/cmdline");
+}
+
+static int add_volatile_root(void) {
+
+ /* Let's add in systemd-remount-volatile.service which will remount the root device to tmpfs if this is
+ * requested (or as an overlayfs), leaving only /usr from the root mount inside. */
+
+ if (!IN_SET(arg_volatile_mode, VOLATILE_YES, VOLATILE_OVERLAY))
+ return 0;
+
+ return generator_add_symlink(arg_dest, SPECIAL_INITRD_ROOT_FS_TARGET, "requires",
+ SYSTEM_DATA_UNIT_PATH "/" SPECIAL_VOLATILE_ROOT_SERVICE);
+}
+
+static int add_volatile_var(void) {
+
+ if (arg_volatile_mode != VOLATILE_STATE)
+ return 0;
+
+ /* If requested, mount /var as tmpfs, but do so only if there's nothing else defined for this. */
+
+ return add_mount(arg_dest_late,
+ "tmpfs",
+ "/var",
+ NULL,
+ "tmpfs",
+ "mode=0755" TMPFS_LIMITS_VAR,
+ 0,
+ 0,
+ SPECIAL_LOCAL_FS_TARGET,
+ "/proc/cmdline");
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ int r;
+
+ /* root=, usr=, usrfstype= and roofstype= may occur more than once, the last
+ * instance should take precedence. In the case of multiple rootflags=
+ * or usrflags= the arguments should be concatenated */
+
+ if (STR_IN_SET(key, "fstab", "rd.fstab")) {
+
+ r = value ? parse_boolean(value) : 1;
+ if (r < 0)
+ log_warning("Failed to parse fstab switch %s. Ignoring.", value);
+ else
+ arg_fstab_enabled = r;
+
+ } else if (streq(key, "root")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (free_and_strdup(&arg_root_what, value) < 0)
+ return log_oom();
+
+ } else if (streq(key, "rootfstype")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (free_and_strdup(&arg_root_fstype, value) < 0)
+ return log_oom();
+
+ } else if (streq(key, "rootflags")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (!strextend_with_separator(&arg_root_options, ",", value, NULL))
+ return log_oom();
+
+ } else if (streq(key, "roothash")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (free_and_strdup(&arg_root_hash, value) < 0)
+ return log_oom();
+
+ } else if (streq(key, "mount.usr")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (free_and_strdup(&arg_usr_what, value) < 0)
+ return log_oom();
+
+ } else if (streq(key, "mount.usrfstype")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (free_and_strdup(&arg_usr_fstype, value) < 0)
+ return log_oom();
+
+ } else if (streq(key, "mount.usrflags")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (!strextend_with_separator(&arg_usr_options, ",", value, NULL))
+ return log_oom();
+
+ } else if (streq(key, "rw") && !value)
+ arg_root_rw = true;
+ else if (streq(key, "ro") && !value)
+ arg_root_rw = false;
+ else if (streq(key, "systemd.volatile")) {
+ VolatileMode m;
+
+ if (value) {
+ m = volatile_mode_from_string(value);
+ if (m < 0)
+ log_warning("Failed to parse systemd.volatile= argument: %s", value);
+ else
+ arg_volatile_mode = m;
+ } else
+ arg_volatile_mode = VOLATILE_YES;
+
+ } else if (streq(key, "systemd.swap")) {
+
+ r = value ? parse_boolean(value) : 1;
+ if (r < 0)
+ log_warning("Failed to parse systemd.swap switch %s. Ignoring.", value);
+ else
+ arg_swap_enabled = r;
+ }
+
+ return 0;
+}
+
+static int determine_root(void) {
+ /* If we have a root hash but no root device then Verity is used, and we use the "root" DM device as root. */
+
+ if (arg_root_what)
+ return 0;
+
+ if (!arg_root_hash)
+ return 0;
+
+ arg_root_what = strdup("/dev/mapper/root");
+ if (!arg_root_what)
+ return log_oom();
+
+ log_info("Using verity root device %s.", arg_root_what);
+
+ return 1;
+}
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ int r, r2 = 0, r3 = 0;
+
+ assert_se(arg_dest = dest);
+ assert_se(arg_dest_late = dest_late);
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ (void) determine_root();
+
+ /* Always honour root= and usr= in the kernel command line if we are in an initrd */
+ if (in_initrd()) {
+ r = add_sysroot_mount();
+
+ r2 = add_sysroot_usr_mount();
+
+ r3 = add_volatile_root();
+ } else
+ r = add_volatile_var();
+
+ /* Honour /etc/fstab only when that's enabled */
+ if (arg_fstab_enabled) {
+ /* Parse the local /etc/fstab, possibly from the initrd */
+ r2 = parse_fstab(false);
+
+ /* If running in the initrd also parse the /etc/fstab from the host */
+ if (in_initrd())
+ r3 = parse_fstab(true);
+ else
+ r3 = generator_enable_remount_fs_service(arg_dest);
+ }
+
+ return r < 0 ? r : r2 < 0 ? r2 : r3;
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/fuzz/fuzz-bus-label.c b/src/fuzz/fuzz-bus-label.c
new file mode 100644
index 0000000..93bac9a
--- /dev/null
+++ b/src/fuzz/fuzz-bus-label.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "bus-label.h"
+#include "fuzz.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_free_ char *unescaped = NULL, *escaped = NULL;
+
+ unescaped = bus_label_unescape_n((const char*)data, size);
+ assert_se(unescaped != NULL);
+ escaped = bus_label_escape(unescaped);
+ assert_se(escaped != NULL);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-bus-message.c b/src/fuzz/fuzz-bus-message.c
new file mode 100644
index 0000000..af3dbf4
--- /dev/null
+++ b/src/fuzz/fuzz-bus-message.c
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-dump.h"
+#include "bus-message.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fuzz.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_free_ char *out = NULL; /* out should be freed after g */
+ size_t out_size;
+ _cleanup_fclose_ FILE *g = NULL;
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ void *buffer = NULL;
+ int r;
+
+ /* We don't want to fill the logs with messages about parse errors.
+ * Disable most logging if not running standalone */
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ r = sd_bus_new(&bus);
+ assert_se(r >= 0);
+
+ assert_se(buffer = memdup(data, size));
+
+ r = bus_message_from_malloc(bus, buffer, size, NULL, 0, NULL, &m);
+ if (r == -EBADMSG)
+ return 0;
+ assert_se(r >= 0);
+ TAKE_PTR(buffer);
+
+ if (getenv_bool("SYSTEMD_FUZZ_OUTPUT") <= 0)
+ assert_se(g = open_memstream_unlocked(&out, &out_size));
+
+ sd_bus_message_dump(m, g ?: stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ r = sd_bus_message_rewind(m, true);
+ assert_se(r >= 0);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-calendarspec.c b/src/fuzz/fuzz-calendarspec.c
new file mode 100644
index 0000000..8080172
--- /dev/null
+++ b/src/fuzz/fuzz-calendarspec.c
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "calendarspec.h"
+#include "fd-util.h"
+#include "fuzz.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(calendar_spec_freep) CalendarSpec *cspec = NULL;
+ _cleanup_free_ char *str = NULL, *p = NULL;
+
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ str = memdup_suffix0(data, size);
+
+ if (calendar_spec_from_string(str, &cspec) >= 0) {
+ (void) calendar_spec_valid(cspec);
+ (void) calendar_spec_normalize(cspec);
+ (void) calendar_spec_to_string(cspec, &p);
+ }
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-catalog.c b/src/fuzz/fuzz-catalog.c
new file mode 100644
index 0000000..f013455
--- /dev/null
+++ b/src/fuzz/fuzz-catalog.c
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "catalog.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "fuzz.h"
+#include "tmpfile-util.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/fuzz-catalog.XXXXXX";
+ _cleanup_close_ int fd = -1;
+ _cleanup_ordered_hashmap_free_free_free_ OrderedHashmap *h = NULL;
+
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ assert_se(h = ordered_hashmap_new(&catalog_hash_ops));
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(write(fd, data, size) == (ssize_t) size);
+
+ (void) catalog_import_file(h, name);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-compress.c b/src/fuzz/fuzz-compress.c
new file mode 100644
index 0000000..f94fd06
--- /dev/null
+++ b/src/fuzz/fuzz-compress.c
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "compress.h"
+#include "fuzz.h"
+
+static int compress(int alg,
+ const void *src, uint64_t src_size,
+ void *dst, size_t dst_alloc_size, size_t *dst_size) {
+
+ if (alg == OBJECT_COMPRESSED_LZ4)
+ return compress_blob_lz4(src, src_size, dst, dst_alloc_size, dst_size);
+ if (alg == OBJECT_COMPRESSED_XZ)
+ return compress_blob_xz(src, src_size, dst, dst_alloc_size, dst_size);
+ return -EOPNOTSUPP;
+}
+
+typedef struct header {
+ uint32_t alg:2; /* We have only two compression algorithms so far, but we might add
+ * more in the future. Let's make this a bit wider so our fuzzer
+ * cases remain stable in the future. */
+ uint32_t sw_len;
+ uint32_t sw_alloc;
+ uint32_t reserved[3]; /* Extra space to keep fuzz cases stable in case we need to
+ * add stuff in the future. */
+ uint8_t data[];
+} header;
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_free_ void *buf = NULL, *buf2 = NULL;
+ int r;
+
+ if (size < offsetof(header, data) + 1)
+ return 0;
+
+ const header *h = (struct header*) data;
+ const size_t data_len = size - offsetof(header, data);
+
+ int alg = h->alg;
+
+ /* We don't want to fill the logs with messages about parse errors.
+ * Disable most logging if not running standalone */
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ log_info("Using compression %s, data size=%zu",
+ object_compressed_to_string(alg) ?: "(none)",
+ data_len);
+
+ buf = malloc(MAX(size, 128u)); /* Make the buffer a bit larger for very small data */
+ if (!buf) {
+ log_oom();
+ return 0;
+ }
+
+ size_t csize;
+ r = compress(alg, h->data, data_len, buf, size, &csize);
+ if (r < 0) {
+ log_error_errno(r, "Compression failed: %m");
+ return 0;
+ }
+
+ log_debug("Compressed %zu bytes to → %zu bytes", data_len, csize);
+
+ size_t sw_alloc = MAX(h->sw_alloc, 1u);
+ buf2 = malloc(sw_alloc);
+ if (!buf) {
+ log_oom();
+ return 0;
+ }
+
+ size_t sw_len = MIN(data_len - 1, h->sw_len);
+
+ r = decompress_startswith(alg, buf, csize, &buf2, &sw_alloc, h->data, sw_len, h->data[sw_len]);
+ assert_se(r > 0);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-dhcp-server.c b/src/fuzz/fuzz-dhcp-server.c
new file mode 100644
index 0000000..c854d92
--- /dev/null
+++ b/src/fuzz/fuzz-dhcp-server.c
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "fuzz.h"
+
+#include "sd-dhcp-server.c"
+
+/* stub out network so that the server doesn't send */
+ssize_t sendto(int sockfd, const void *buf, size_t len, int flags, const struct sockaddr *dest_addr, socklen_t addrlen) {
+ return len;
+}
+
+ssize_t sendmsg(int sockfd, const struct msghdr *msg, int flags) {
+ return 0;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(sd_dhcp_server_unrefp) sd_dhcp_server *server = NULL;
+ struct in_addr address = {.s_addr = htobe32(UINT32_C(10) << 24 | UINT32_C(1))};
+ static const uint8_t chaddr[] = {3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3};
+ uint8_t *client_id;
+ DHCPLease *lease;
+ int pool_offset;
+
+ if (size < sizeof(DHCPMessage))
+ return 0;
+
+ assert_se(sd_dhcp_server_new(&server, 1) >= 0);
+ server->fd = open("/dev/null", O_RDWR|O_CLOEXEC|O_NOCTTY);
+ assert_se(server->fd >= 0);
+ assert_se(sd_dhcp_server_configure_pool(server, &address, 24, 0, 0) >= 0);
+
+ /* add a lease to the pool to expose additional code paths */
+ client_id = malloc(2);
+ assert_se(client_id);
+ client_id[0] = 2;
+ client_id[1] = 2;
+ lease = new0(DHCPLease, 1);
+ assert_se(lease);
+ lease->client_id.length = 2;
+ lease->client_id.data = client_id;
+ lease->address = htobe32(UINT32_C(10) << 24 | UINT32_C(2));
+ lease->gateway = htobe32(UINT32_C(10) << 24 | UINT32_C(1));
+ lease->expiration = UINT64_MAX;
+ memcpy(lease->chaddr, chaddr, 16);
+ pool_offset = get_pool_offset(server, lease->address);
+ server->bound_leases[pool_offset] = lease;
+ assert_se(hashmap_put(server->leases_by_client_id, &lease->client_id, lease) >= 0);
+
+ (void) dhcp_server_handle_message(server, (DHCPMessage*)data, size);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-dhcp-server.options b/src/fuzz/fuzz-dhcp-server.options
new file mode 100644
index 0000000..5c330e5
--- /dev/null
+++ b/src/fuzz/fuzz-dhcp-server.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 600
diff --git a/src/fuzz/fuzz-dhcp6-client.c b/src/fuzz/fuzz-dhcp6-client.c
new file mode 100644
index 0000000..e5e70dd
--- /dev/null
+++ b/src/fuzz/fuzz-dhcp6-client.c
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "sd-dhcp6-client.h"
+#include "sd-event.h"
+
+#include "dhcp6-internal.h"
+#include "dhcp6-protocol.h"
+#include "fd-util.h"
+#include "fuzz.h"
+
+static int test_dhcp_fd[2] = { -1, -1 };
+
+int dhcp6_network_send_udp_socket(int s, struct in6_addr *server_address,
+ const void *packet, size_t len) {
+ return len;
+}
+
+int dhcp6_network_bind_udp_socket(int index, struct in6_addr *local_address) {
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, test_dhcp_fd) >= 0);
+ return test_dhcp_fd[0];
+}
+
+static void fuzz_client(const uint8_t *data, size_t size, bool is_information_request_enabled) {
+ _cleanup_(sd_event_unrefp) sd_event *e;
+ _cleanup_(sd_dhcp6_client_unrefp) sd_dhcp6_client *client = NULL;
+ struct in6_addr address = { { { 0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01 } } };
+
+ assert_se(sd_event_new(&e) >= 0);
+ assert_se(sd_dhcp6_client_new(&client) >= 0);
+ assert_se(sd_dhcp6_client_attach_event(client, e, 0) >= 0);
+ assert_se(sd_dhcp6_client_set_ifindex(client, 42) == 0);
+ assert_se(sd_dhcp6_client_set_local_address(client, &address) >= 0);
+ assert_se(sd_dhcp6_client_set_information_request(client, is_information_request_enabled) == 0);
+
+ assert_se(sd_dhcp6_client_start(client) >= 0);
+
+ if (size >= sizeof(DHCP6Message))
+ assert_se(sd_dhcp6_client_set_transaction_id(client, htobe32(0x00ffffff) & ((const DHCP6Message *) data)->transaction_id) == 0);
+
+ assert_se(write(test_dhcp_fd[1], data, size) == (ssize_t) size);
+
+ sd_event_run(e, (uint64_t) -1);
+
+ assert_se(sd_dhcp6_client_stop(client) >= 0);
+
+ test_dhcp_fd[1] = safe_close(test_dhcp_fd[1]);
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ if (size > 65536)
+ return 0;
+
+ /* This triggers client_receive_advertise */
+ fuzz_client(data, size, false);
+
+ /* This triggers client_receive_reply */
+ fuzz_client(data, size, true);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-dhcp6-client.options b/src/fuzz/fuzz-dhcp6-client.options
new file mode 100644
index 0000000..678d526
--- /dev/null
+++ b/src/fuzz/fuzz-dhcp6-client.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 65536
diff --git a/src/fuzz/fuzz-dns-packet.c b/src/fuzz/fuzz-dns-packet.c
new file mode 100644
index 0000000..b9a0aa1
--- /dev/null
+++ b/src/fuzz/fuzz-dns-packet.c
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "fuzz.h"
+#include "memory-util.h"
+#include "resolved-dns-packet.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+
+ if (size > DNS_PACKET_SIZE_MAX)
+ return 0;
+
+ assert_se(dns_packet_new(&p, DNS_PROTOCOL_DNS, 0, DNS_PACKET_SIZE_MAX) >= 0);
+ p->size = 0; /* by default append starts after the header, undo that */
+ assert_se(dns_packet_append_blob(p, data, size, NULL) >= 0);
+ if (size < DNS_PACKET_HEADER_SIZE) {
+ /* make sure we pad the packet back up to the minimum header size */
+ assert_se(p->allocated >= DNS_PACKET_HEADER_SIZE);
+ memzero(DNS_PACKET_DATA(p) + size, DNS_PACKET_HEADER_SIZE - size);
+ p->size = DNS_PACKET_HEADER_SIZE;
+ }
+ (void) dns_packet_extract(p);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-dns-packet.options b/src/fuzz/fuzz-dns-packet.options
new file mode 100644
index 0000000..0824b19
--- /dev/null
+++ b/src/fuzz/fuzz-dns-packet.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 65535
diff --git a/src/fuzz/fuzz-env-file.c b/src/fuzz/fuzz-env-file.c
new file mode 100644
index 0000000..e0dac26
--- /dev/null
+++ b/src/fuzz/fuzz-env-file.c
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "fileio.h"
+#include "fd-util.h"
+#include "fuzz.h"
+#include "strv.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **rl = NULL, **rlp = NULL;
+
+ if (size == 0 || size > 65535)
+ return 0;
+
+ f = fmemopen_unlocked((char*) data, size, "re");
+ assert_se(f);
+
+ /* We don't want to fill the logs with messages about parse errors.
+ * Disable most logging if not running standalone */
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ (void) load_env_file(f, NULL, &rl);
+ assert_se(fseek(f, 0, SEEK_SET) == 0);
+ (void) load_env_file_pairs(f, NULL, &rlp);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-env-file.options b/src/fuzz/fuzz-env-file.options
new file mode 100644
index 0000000..0824b19
--- /dev/null
+++ b/src/fuzz/fuzz-env-file.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 65535
diff --git a/src/fuzz/fuzz-fido-id-desc.dict b/src/fuzz/fuzz-fido-id-desc.dict
new file mode 100644
index 0000000..d2d2679
--- /dev/null
+++ b/src/fuzz/fuzz-fido-id-desc.dict
@@ -0,0 +1,6 @@
+"\xfe"
+"\x00"
+"\x01"
+"\xf1"
+"\xd0"
+"\xf1\xd0\x00\x01"
diff --git a/src/fuzz/fuzz-hostname-util.c b/src/fuzz/fuzz-hostname-util.c
new file mode 100644
index 0000000..0a81e74
--- /dev/null
+++ b/src/fuzz/fuzz-hostname-util.c
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fuzz.h"
+#include "hostname-util.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *ret = NULL;
+
+ if (size == 0)
+ return 0;
+
+ f = fmemopen_unlocked((char*) data, size, "re");
+ assert_se(f);
+
+ /* We don't want to fill the logs with messages about parse errors.
+ * Disable most logging if not running standalone */
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ (void) read_etc_hostname_stream(f, &ret);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-journal-remote.c b/src/fuzz/fuzz-journal-remote.c
new file mode 100644
index 0000000..9adbd43
--- /dev/null
+++ b/src/fuzz/fuzz-journal-remote.c
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "fuzz.h"
+
+#include <sys/mman.h>
+
+#include "sd-journal.h"
+
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "journal-remote.h"
+#include "logs-show.h"
+#include "memfd-util.h"
+#include "strv.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_fclose_ FILE *dev_null = NULL;
+ RemoteServer s = {};
+ char name[] = "/tmp/fuzz-journal-remote.XXXXXX.journal";
+ void *mem;
+ int fdin; /* will be closed by journal_remote handler after EOF */
+ _cleanup_close_ int fdout = -1;
+ sd_journal *j;
+ OutputMode mode;
+ int r;
+
+ if (size <= 2)
+ return 0;
+
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ assert_se((fdin = memfd_new_and_map("fuzz-journal-remote", size, &mem)) >= 0);
+ memcpy(mem, data, size);
+ assert_se(munmap(mem, size) == 0);
+
+ fdout = mkostemps(name, STRLEN(".journal"), O_CLOEXEC);
+ assert_se(fdout >= 0);
+
+ /* In */
+
+ assert_se(journal_remote_server_init(&s, name, JOURNAL_WRITE_SPLIT_NONE, false, false) >= 0);
+
+ assert_se(journal_remote_add_source(&s, fdin, (char*) "fuzz-data", false) > 0);
+
+ while (s.active) {
+ r = journal_remote_handle_raw_source(NULL, fdin, 0, &s);
+ assert_se(r >= 0);
+ }
+
+ journal_remote_server_destroy(&s);
+ assert_se(close(fdin) < 0 && errno == EBADF); /* Check that the fd is closed already */
+
+ /* Out */
+
+ r = sd_journal_open_files(&j, (const char**) STRV_MAKE(name), 0);
+ assert_se(r >= 0);
+
+ if (getenv_bool("SYSTEMD_FUZZ_OUTPUT") <= 0)
+ assert_se(dev_null = fopen("/dev/null", "we"));
+
+ for (mode = 0; mode < _OUTPUT_MODE_MAX; mode++) {
+ if (!dev_null)
+ log_info("/* %s */", output_mode_to_string(mode));
+ r = show_journal(dev_null ?: stdout, j, mode, 0, 0, -1, 0, NULL);
+ assert_se(r >= 0);
+
+ r = sd_journal_seek_head(j);
+ assert_se(r >= 0);
+ }
+
+ sd_journal_close(j);
+ unlink(name);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-journal-remote.options b/src/fuzz/fuzz-journal-remote.options
new file mode 100644
index 0000000..678d526
--- /dev/null
+++ b/src/fuzz/fuzz-journal-remote.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 65536
diff --git a/src/fuzz/fuzz-journald-audit.c b/src/fuzz/fuzz-journald-audit.c
new file mode 100644
index 0000000..6e8e180
--- /dev/null
+++ b/src/fuzz/fuzz-journald-audit.c
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "fuzz.h"
+#include "fuzz-journald.h"
+#include "journald-audit.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ Server s;
+
+ dummy_server_init(&s, data, size);
+ process_audit_string(&s, 0, s.buffer, size);
+ server_done(&s);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-journald-kmsg.c b/src/fuzz/fuzz-journald-kmsg.c
new file mode 100644
index 0000000..1b423d5
--- /dev/null
+++ b/src/fuzz/fuzz-journald-kmsg.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "fuzz.h"
+#include "fuzz-journald.h"
+#include "journald-kmsg.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ Server s;
+
+ if (size == 0)
+ return 0;
+
+ dummy_server_init(&s, data, size);
+ dev_kmsg_record(&s, s.buffer, size);
+ server_done(&s);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-journald-native-fd.c b/src/fuzz/fuzz-journald-native-fd.c
new file mode 100644
index 0000000..fcfc5df
--- /dev/null
+++ b/src/fuzz/fuzz-journald-native-fd.c
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "fd-util.h"
+#include "fs-util.h"
+#include "fuzz-journald.h"
+#include "fuzz.h"
+#include "journald-native.h"
+#include "memfd-util.h"
+#include "process-util.h"
+#include "tmpfile-util.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ Server s;
+ _cleanup_close_ int sealed_fd = -1, unsealed_fd = -1;
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/fuzz-journald-native-fd.XXXXXX";
+ char *label = NULL;
+ size_t label_len = 0;
+ struct ucred ucred;
+ struct timeval *tv = NULL;
+
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ dummy_server_init(&s, NULL, 0);
+
+ sealed_fd = memfd_new(NULL);
+ assert_se(sealed_fd >= 0);
+ assert_se(write(sealed_fd, data, size) == (ssize_t) size);
+ assert_se(memfd_set_sealed(sealed_fd) >= 0);
+ assert_se(lseek(sealed_fd, 0, SEEK_SET) == 0);
+ ucred = (struct ucred) {
+ .pid = getpid_cached(),
+ .uid = geteuid(),
+ .gid = getegid(),
+ };
+ server_process_native_file(&s, sealed_fd, &ucred, tv, label, label_len);
+
+ unsealed_fd = mkostemp_safe(name);
+ assert_se(unsealed_fd >= 0);
+ assert_se(write(unsealed_fd, data, size) == (ssize_t) size);
+ assert_se(lseek(unsealed_fd, 0, SEEK_SET) == 0);
+ server_process_native_file(&s, unsealed_fd, &ucred, tv, label, label_len);
+
+ server_done(&s);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-journald-native.c b/src/fuzz/fuzz-journald-native.c
new file mode 100644
index 0000000..6531c4f
--- /dev/null
+++ b/src/fuzz/fuzz-journald-native.c
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "fuzz.h"
+#include "fuzz-journald.h"
+#include "journald-native.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ fuzz_journald_processing_function(data, size, server_process_native_message);
+ return 0;
+}
diff --git a/src/fuzz/fuzz-journald-stream.c b/src/fuzz/fuzz-journald-stream.c
new file mode 100644
index 0000000..038b335
--- /dev/null
+++ b/src/fuzz/fuzz-journald-stream.c
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "fuzz.h"
+#include "fuzz-journald.h"
+#include "journald-stream.h"
+
+static int stream_fds[2] = { -1, -1 };
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ Server s;
+ StdoutStream *stream;
+ int v;
+
+ if (size == 0 || size > 65536)
+ return 0;
+
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0, stream_fds) >= 0);
+ dummy_server_init(&s, NULL, 0);
+ assert_se(stdout_stream_install(&s, stream_fds[0], &stream) >= 0);
+ assert_se(write(stream_fds[1], data, size) == (ssize_t) size);
+ while (ioctl(stream_fds[0], SIOCINQ, &v) == 0 && v)
+ sd_event_run(s.event, (uint64_t) -1);
+ if (s.n_stdout_streams)
+ stdout_stream_destroy(stream);
+ server_done(&s);
+ stream_fds[1] = safe_close(stream_fds[1]);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-journald-stream.options b/src/fuzz/fuzz-journald-stream.options
new file mode 100644
index 0000000..678d526
--- /dev/null
+++ b/src/fuzz/fuzz-journald-stream.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 65536
diff --git a/src/fuzz/fuzz-journald-syslog.c b/src/fuzz/fuzz-journald-syslog.c
new file mode 100644
index 0000000..72ec610
--- /dev/null
+++ b/src/fuzz/fuzz-journald-syslog.c
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "fuzz.h"
+#include "fuzz-journald.h"
+#include "journald-syslog.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ fuzz_journald_processing_function(data, size, server_process_syslog_message);
+ return 0;
+}
diff --git a/src/fuzz/fuzz-journald.c b/src/fuzz/fuzz-journald.c
new file mode 100644
index 0000000..e2f73ff
--- /dev/null
+++ b/src/fuzz/fuzz-journald.c
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "fuzz-journald.h"
+#include "journald-server.h"
+#include "sd-event.h"
+
+void dummy_server_init(Server *s, const uint8_t *buffer, size_t size) {
+ *s = (Server) {
+ .syslog_fd = -1,
+ .native_fd = -1,
+ .stdout_fd = -1,
+ .dev_kmsg_fd = -1,
+ .audit_fd = -1,
+ .hostname_fd = -1,
+ .notify_fd = -1,
+ .storage = STORAGE_NONE,
+ .line_max = 64,
+ };
+ assert_se(sd_event_default(&s->event) >= 0);
+
+ if (buffer) {
+ s->buffer = memdup_suffix0(buffer, size);
+ assert_se(s->buffer);
+ s->buffer_size = size + 1;
+ }
+}
+
+void fuzz_journald_processing_function(
+ const uint8_t *data,
+ size_t size,
+ void (*f)(Server *s, const char *buf, size_t raw_len, const struct ucred *ucred, const struct timeval *tv, const char *label, size_t label_len)
+ ) {
+ Server s;
+ char *label = NULL;
+ size_t label_len = 0;
+ struct ucred *ucred = NULL;
+ struct timeval *tv = NULL;
+
+ if (size == 0)
+ return;
+
+ dummy_server_init(&s, data, size);
+ (*f)(&s, s.buffer, size, ucred, tv, label, label_len);
+ server_done(&s);
+}
diff --git a/src/fuzz/fuzz-journald.h b/src/fuzz/fuzz-journald.h
new file mode 100644
index 0000000..4abb100
--- /dev/null
+++ b/src/fuzz/fuzz-journald.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "journald-server.h"
+
+void dummy_server_init(Server *s, const uint8_t *buffer, size_t size);
+
+void fuzz_journald_processing_function(
+ const uint8_t *data,
+ size_t size,
+ void (*f)(Server *s, const char *buf, size_t raw_len, const struct ucred *ucred, const struct timeval *tv, const char *label, size_t label_len)
+);
diff --git a/src/fuzz/fuzz-json.c b/src/fuzz/fuzz-json.c
new file mode 100644
index 0000000..f9a0e81
--- /dev/null
+++ b/src/fuzz/fuzz-json.c
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "fd-util.h"
+#include "fuzz.h"
+#include "json.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_free_ char *out = NULL; /* out should be freed after g */
+ size_t out_size;
+ _cleanup_fclose_ FILE *f = NULL, *g = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+
+ if (size == 0)
+ return 0;
+
+ f = fmemopen_unlocked((char*) data, size, "re");
+ assert_se(f);
+
+ if (json_parse_file(f, NULL, 0, &v, NULL, NULL) < 0)
+ return 0;
+
+ g = open_memstream_unlocked(&out, &out_size);
+ assert_se(g);
+
+ json_variant_dump(v, 0, g, NULL);
+ json_variant_dump(v, JSON_FORMAT_PRETTY|JSON_FORMAT_COLOR|JSON_FORMAT_SOURCE, g, NULL);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-lldp.c b/src/fuzz/fuzz-lldp.c
new file mode 100644
index 0000000..5747135
--- /dev/null
+++ b/src/fuzz/fuzz-lldp.c
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "sd-event.h"
+#include "sd-lldp.h"
+
+#include "fd-util.h"
+#include "fuzz.h"
+#include "lldp-network.h"
+
+static int test_fd[2] = { -1, -1 };
+
+int lldp_network_bind_raw_socket(int ifindex) {
+ if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, test_fd) < 0)
+ return -errno;
+
+ return test_fd[0];
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ _cleanup_(sd_lldp_unrefp) sd_lldp *lldp = NULL;
+
+ if (size > 2048)
+ return 0;
+
+ assert_se(sd_event_new(&e) == 0);
+ assert_se(sd_lldp_new(&lldp) >= 0);
+ assert_se(sd_lldp_set_ifindex(lldp, 42) >= 0);
+ assert_se(sd_lldp_attach_event(lldp, e, 0) >= 0);
+ assert_se(sd_lldp_start(lldp) >= 0);
+
+ assert_se(write(test_fd[1], data, size) == (ssize_t) size);
+ assert_se(sd_event_run(e, 0) >= 0);
+
+ assert_se(sd_lldp_stop(lldp) >= 0);
+ assert_se(sd_lldp_detach_event(lldp) >= 0);
+ test_fd[1] = safe_close(test_fd[1]);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-lldp.options b/src/fuzz/fuzz-lldp.options
new file mode 100644
index 0000000..60bd9b0
--- /dev/null
+++ b/src/fuzz/fuzz-lldp.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 2048
diff --git a/src/fuzz/fuzz-main.c b/src/fuzz/fuzz-main.c
new file mode 100644
index 0000000..2df2993
--- /dev/null
+++ b/src/fuzz/fuzz-main.c
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "log.h"
+#include "fileio.h"
+#include "fuzz.h"
+#include "tests.h"
+
+/* This is a test driver for the systemd fuzzers that provides main function
+ * for regression testing outside of oss-fuzz (https://github.com/google/oss-fuzz)
+ *
+ * It reads files named on the command line and passes them one by one into the
+ * fuzzer that it is compiled into. */
+
+/* This one was borrowed from
+ * https://github.com/google/oss-fuzz/blob/646fca1b506b056db3a60d32c4a1a7398f171c94/infra/base-images/base-runner/bad_build_check#L19
+ */
+#define MIN_NUMBER_OF_RUNS 4
+
+int main(int argc, char **argv) {
+ int i, r;
+ size_t size;
+ char *name;
+
+ test_setup_logging(LOG_DEBUG);
+
+ for (i = 1; i < argc; i++) {
+ _cleanup_free_ char *buf = NULL;
+
+ name = argv[i];
+ r = read_full_file(name, &buf, &size);
+ if (r < 0) {
+ log_error_errno(r, "Failed to open '%s': %m", name);
+ return EXIT_FAILURE;
+ }
+ printf("%s... ", name);
+ fflush(stdout);
+ for (int j = 0; j < MIN_NUMBER_OF_RUNS; j++)
+ if (LLVMFuzzerTestOneInput((uint8_t*)buf, size) == EXIT_TEST_SKIP)
+ return EXIT_TEST_SKIP;
+ printf("ok\n");
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/fuzz/fuzz-ndisc-rs.c b/src/fuzz/fuzz-ndisc-rs.c
new file mode 100644
index 0000000..d74cd2f
--- /dev/null
+++ b/src/fuzz/fuzz-ndisc-rs.c
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+#include <netinet/icmp6.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "icmp6-util.h"
+#include "fuzz.h"
+#include "sd-ndisc.h"
+#include "socket-util.h"
+#include "ndisc-internal.h"
+
+static int test_fd[2] = { -1, -1 };
+
+int icmp6_bind_router_solicitation(int index) {
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, test_fd) >= 0);
+ return test_fd[0];
+}
+
+int icmp6_bind_router_advertisement(int index) {
+ return -ENOSYS;
+}
+
+int icmp6_receive(int fd, void *iov_base, size_t iov_len,
+ struct in6_addr *dst, triple_timestamp *timestamp) {
+ assert_se(read(fd, iov_base, iov_len) == (ssize_t) iov_len);
+
+ if (timestamp)
+ triple_timestamp_get(timestamp);
+
+ return 0;
+}
+
+int icmp6_send_router_solicitation(int s, const struct ether_addr *ether_addr) {
+ return 0;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ struct ether_addr mac_addr = {
+ .ether_addr_octet = {'A', 'B', 'C', '1', '2', '3'}
+ };
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ _cleanup_(sd_ndisc_unrefp) sd_ndisc *nd = NULL;
+
+ if (size > 2048)
+ return 0;
+
+ assert_se(sd_event_new(&e) >= 0);
+ assert_se(sd_ndisc_new(&nd) >= 0);
+ assert_se(sd_ndisc_attach_event(nd, e, 0) >= 0);
+ assert_se(sd_ndisc_set_ifindex(nd, 42) >= 0);
+ assert_se(sd_ndisc_set_mac(nd, &mac_addr) >= 0);
+ assert_se(sd_ndisc_start(nd) >= 0);
+ assert_se(write(test_fd[1], data, size) == (ssize_t) size);
+ (void) sd_event_run(e, (uint64_t) -1);
+ assert_se(sd_ndisc_stop(nd) >= 0);
+ close(test_fd[1]);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-ndisc-rs.options b/src/fuzz/fuzz-ndisc-rs.options
new file mode 100644
index 0000000..60bd9b0
--- /dev/null
+++ b/src/fuzz/fuzz-ndisc-rs.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 2048
diff --git a/src/fuzz/fuzz-nspawn-oci.c b/src/fuzz/fuzz-nspawn-oci.c
new file mode 100644
index 0000000..cfebf65
--- /dev/null
+++ b/src/fuzz/fuzz-nspawn-oci.c
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fuzz.h"
+#include "nspawn-oci.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_(settings_freep) Settings *s = NULL;
+
+ if (size == 0)
+ return 0;
+
+ f = fmemopen_unlocked((char*) data, size, "re");
+ assert_se(f);
+
+ /* We don't want to fill the logs with messages about parse errors.
+ * Disable most logging if not running standalone */
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ (void) oci_load(f, "/dev/null", &s);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-nspawn-oci.options b/src/fuzz/fuzz-nspawn-oci.options
new file mode 100644
index 0000000..678d526
--- /dev/null
+++ b/src/fuzz/fuzz-nspawn-oci.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 65536
diff --git a/src/fuzz/fuzz-nspawn-settings.c b/src/fuzz/fuzz-nspawn-settings.c
new file mode 100644
index 0000000..bd98ed2
--- /dev/null
+++ b/src/fuzz/fuzz-nspawn-settings.c
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fuzz.h"
+#include "nspawn-settings.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_(settings_freep) Settings *s = NULL;
+
+ if (size == 0)
+ return 0;
+
+ f = fmemopen_unlocked((char*) data, size, "re");
+ assert_se(f);
+
+ /* We don't want to fill the logs with messages about parse errors.
+ * Disable most logging if not running standalone */
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ (void) settings_load(f, "/dev/null", &s);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-nspawn-settings.options b/src/fuzz/fuzz-nspawn-settings.options
new file mode 100644
index 0000000..678d526
--- /dev/null
+++ b/src/fuzz/fuzz-nspawn-settings.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 65536
diff --git a/src/fuzz/fuzz-time-util.c b/src/fuzz/fuzz-time-util.c
new file mode 100644
index 0000000..bf2a663
--- /dev/null
+++ b/src/fuzz/fuzz-time-util.c
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fuzz.h"
+#include "time-util.h"
+#include "util.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_free_ char *str = NULL;
+ usec_t usec;
+
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ str = memdup_suffix0(data, size);
+
+ (void) parse_timestamp(str, &usec);
+ (void) parse_sec(str, &usec);
+ (void) parse_sec_fix_0(str, &usec);
+ (void) parse_sec_def_infinity(str, &usec);
+ (void) parse_time(str, &usec, USEC_PER_SEC);
+ (void) parse_nsec(str, &usec);
+
+ (void) timezone_is_valid(str, LOG_DEBUG);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-udev-database.c b/src/fuzz/fuzz-udev-database.c
new file mode 100644
index 0000000..2a48c14
--- /dev/null
+++ b/src/fuzz/fuzz-udev-database.c
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "device-internal.h"
+#include "device-private.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "fuzz.h"
+#include "tmpfile-util.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ _cleanup_(unlink_tempfilep) char filename[] = "/tmp/fuzz-udev-database.XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ assert_se(fmkostemp_safe(filename, "r+", &f) == 0);
+ if (size != 0)
+ assert_se(fwrite(data, size, 1, f) == 1);
+
+ fflush(f);
+ assert_se(device_new_aux(&dev) >= 0);
+ (void) device_read_db_internal_filename(dev, filename);
+ return 0;
+}
diff --git a/src/fuzz/fuzz-udev-rule-parse-value.c b/src/fuzz/fuzz-udev-rule-parse-value.c
new file mode 100644
index 0000000..404d0cd
--- /dev/null
+++ b/src/fuzz/fuzz-udev-rule-parse-value.c
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <string.h>
+
+#include "alloc-util.h"
+#include "fuzz.h"
+#include "udev-util.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_free_ char *str = NULL;
+ int r;
+ char *value = UINT_TO_PTR(0x12345678U);
+ char *endpos = UINT_TO_PTR(0x87654321U);
+
+ assert_se(str = malloc(size + 1));
+ memcpy(str, data, size);
+ str[size] = '\0';
+
+ r = udev_rule_parse_value(str, &value, &endpos);
+
+ if (r < 0) {
+ /* not modified on failure */
+ assert_se(value == UINT_TO_PTR(0x12345678U));
+ assert_se(endpos == UINT_TO_PTR(0x87654321U));
+ } else {
+ assert_se(endpos <= str + size);
+ assert_se(endpos > str + 1);
+ }
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-udev-rules.c b/src/fuzz/fuzz-udev-rules.c
new file mode 100644
index 0000000..e1140bc
--- /dev/null
+++ b/src/fuzz/fuzz-udev-rules.c
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "fd-util.h"
+#include "fs-util.h"
+#include "fuzz.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "udev-rules.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(udev_rules_freep) UdevRules *rules = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_(unlink_tempfilep) char filename[] = "/tmp/fuzz-udev-rules.XXXXXX";
+ int r;
+
+ if (!getenv("SYSTEMD_LOG_LEVEL")) {
+ log_set_max_level_realm(LOG_REALM_UDEV, LOG_CRIT);
+ log_set_max_level_realm(LOG_REALM_SYSTEMD, LOG_CRIT);
+ }
+
+ assert_se(fmkostemp_safe(filename, "r+", &f) == 0);
+ if (size != 0)
+ assert_se(fwrite(data, size, 1, f) == 1);
+ fflush(f);
+
+ assert_se(rules = udev_rules_new(RESOLVE_NAME_EARLY));
+ r = udev_rules_parse_file(rules, filename);
+ log_info_errno(r, "Parsing %s: %m", filename);
+ assert_se(IN_SET(r,
+ 0, /* OK */
+ -ENOBUFS /* line length exceeded */));
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-udev-rules.options b/src/fuzz/fuzz-udev-rules.options
new file mode 100644
index 0000000..678d526
--- /dev/null
+++ b/src/fuzz/fuzz-udev-rules.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 65536
diff --git a/src/fuzz/fuzz-unit-file.c b/src/fuzz/fuzz-unit-file.c
new file mode 100644
index 0000000..e67f6e9
--- /dev/null
+++ b/src/fuzz/fuzz-unit-file.c
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "conf-parser.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fuzz.h"
+#include "install.h"
+#include "load-fragment.h"
+#include "string-util.h"
+#include "unit.h"
+#include "utf8.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_free_ char *out = NULL; /* out should be freed after g */
+ size_t out_size;
+ _cleanup_fclose_ FILE *f = NULL, *g = NULL;
+ _cleanup_free_ char *p = NULL;
+ UnitType t;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ Unit *u;
+ const char *name;
+ long offset;
+
+ if (size == 0)
+ return 0;
+
+ f = fmemopen_unlocked((char*) data, size, "re");
+ assert_se(f);
+
+ if (read_line(f, LINE_MAX, &p) < 0)
+ return 0;
+
+ t = unit_type_from_string(p);
+ if (t < 0)
+ return 0;
+
+ if (!unit_vtable[t]->load)
+ return 0;
+
+ offset = ftell(f);
+ assert_se(offset >= 0);
+
+ for (;;) {
+ _cleanup_free_ char *l = NULL;
+ const char *ll;
+
+ if (read_line(f, LONG_LINE_MAX, &l) <= 0)
+ break;
+
+ ll = startswith(l, UTF8_BYTE_ORDER_MARK) ?: l;
+ ll = ll + strspn(ll, WHITESPACE);
+
+ if (HAS_FEATURE_MEMORY_SANITIZER && startswith(ll, "ListenNetlink")) {
+ /* ListenNetlink causes a false positive in msan,
+ * let's skip this for now. */
+ log_notice("Skipping test because ListenNetlink= is present");
+ return 0;
+ }
+ }
+
+ assert_se(fseek(f, offset, SEEK_SET) == 0);
+
+ /* We don't want to fill the logs with messages about parse errors.
+ * Disable most logging if not running standalone */
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ assert_se(manager_new(UNIT_FILE_SYSTEM, MANAGER_TEST_RUN_MINIMAL, &m) >= 0);
+
+ name = strjoina("a.", unit_type_to_string(t));
+ assert_se(unit_new_for_name(m, unit_vtable[t]->object_size, name, &u) >= 0);
+
+ (void) config_parse(
+ name, name, f,
+ UNIT_VTABLE(u)->sections,
+ config_item_perf_lookup, load_fragment_gperf_lookup,
+ 0,
+ u,
+ NULL);
+
+ g = open_memstream_unlocked(&out, &out_size);
+ assert_se(g);
+
+ unit_dump(u, g, "");
+ manager_dump(m, g, ">>>");
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-unit-file.options b/src/fuzz/fuzz-unit-file.options
new file mode 100644
index 0000000..678d526
--- /dev/null
+++ b/src/fuzz/fuzz-unit-file.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 65536
diff --git a/src/fuzz/fuzz-varlink.c b/src/fuzz/fuzz-varlink.c
new file mode 100644
index 0000000..f26050c
--- /dev/null
+++ b/src/fuzz/fuzz-varlink.c
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fuzz.h"
+#include "hexdecoct.h"
+#include "io-util.h"
+#include "varlink.h"
+#include "log.h"
+
+static FILE *null = NULL;
+
+static int method_something(Varlink *v, JsonVariant *p, VarlinkMethodFlags flags, void *userdata) {
+ json_variant_dump(p, JSON_FORMAT_NEWLINE|JSON_FORMAT_PRETTY, null, NULL);
+ return 0;
+}
+
+static int reply_callback(Varlink *v, JsonVariant *p, const char *error_id, VarlinkReplyFlags flags, void *userdata) {
+ json_variant_dump(p, JSON_FORMAT_NEWLINE|JSON_FORMAT_PRETTY, null, NULL);
+ return 0;
+}
+
+static int io_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ struct iovec *iov = userdata;
+ bool write_eof = false, read_eof = false;
+
+ assert(s);
+ assert(fd >= 0);
+ assert(iov);
+
+ if ((revents & (EPOLLOUT|EPOLLHUP|EPOLLERR)) && iov->iov_len > 0) {
+ ssize_t n;
+
+ /* never write more than 143 bytes a time, to make broken up recv()s on the other side more
+ * likely, and thus test some additional code paths. */
+ n = send(fd, iov->iov_base, MIN(iov->iov_len, 143U), MSG_NOSIGNAL|MSG_DONTWAIT);
+ if (n < 0) {
+ if (ERRNO_IS_DISCONNECT(errno))
+ write_eof = true;
+ else
+ assert_se(errno == EAGAIN);
+ } else
+ IOVEC_INCREMENT(iov, 1, n);
+ }
+
+ if (revents & EPOLLIN) {
+ char c[137];
+ ssize_t n;
+
+ n = recv(fd, c, sizeof(c), MSG_DONTWAIT);
+ if (n < 0) {
+ if (ERRNO_IS_DISCONNECT(errno))
+ read_eof = true;
+ else
+ assert_se(errno == EAGAIN);
+ } else if (n == 0)
+ read_eof = true;
+ else
+ hexdump(null, c, (size_t) n);
+ }
+
+ /* After we wrote everything we could turn off EPOLLOUT. And if we reached read EOF too turn off the
+ * whole thing. */
+ if (write_eof || iov->iov_len == 0) {
+
+ if (read_eof)
+ assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
+ else
+ assert_se(sd_event_source_set_io_events(s, EPOLLIN) >= 0);
+ }
+
+ return 0;
+}
+
+static int idle_callback(sd_event_source *s, void *userdata) {
+ assert(s);
+
+ /* Called as idle callback when there's nothing else to do anymore */
+ sd_event_exit(sd_event_source_get_event(s), 0);
+ return 0;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ struct iovec server_iov = IOVEC_MAKE((void*) data, size), client_iov = IOVEC_MAKE((void*) data, size);
+ /* Important: the declaration order matters here! we want that the fds are closed on return after the
+ * event sources, hence we declare the fds first, the event sources second */
+ _cleanup_close_pair_ int server_pair[2] = { -1, -1 }, client_pair[2] = { -1, -1 };
+ _cleanup_(sd_event_source_unrefp) sd_event_source *idle_event_source = NULL,
+ *server_event_source = NULL, *client_event_source = NULL;
+ _cleanup_(varlink_server_unrefp) VarlinkServer *s = NULL;
+ _cleanup_(varlink_flush_close_unrefp) Varlink *c = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+
+ log_set_max_level(LOG_CRIT);
+ log_parse_environment();
+
+ assert_se(null = fopen("/dev/null", "we"));
+
+ assert_se(sd_event_default(&e) >= 0);
+
+ /* Test one: write the data as method call to a server */
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, server_pair) >= 0);
+ assert_se(varlink_server_new(&s, 0) >= 0);
+ assert_se(varlink_server_set_description(s, "myserver") >= 0);
+ assert_se(varlink_server_attach_event(s, e, 0) >= 0);
+ assert_se(varlink_server_add_connection(s, server_pair[0], NULL) >= 0);
+ TAKE_FD(server_pair[0]);
+ assert_se(varlink_server_bind_method(s, "io.test.DoSomething", method_something) >= 0);
+ assert_se(sd_event_add_io(e, &server_event_source, server_pair[1], EPOLLIN|EPOLLOUT, io_callback, &server_iov) >= 0);
+
+ /* Test two: write the data as method response to a client */
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, client_pair) >= 0);
+ assert_se(varlink_connect_fd(&c, client_pair[0]) >= 0);
+ TAKE_FD(client_pair[0]);
+ assert_se(varlink_set_description(c, "myclient") >= 0);
+ assert_se(varlink_attach_event(c, e, 0) >= 0);
+ assert_se(varlink_bind_reply(c, reply_callback) >= 0);
+ assert_se(varlink_invoke(c, "io.test.DoSomething", NULL) >= 0);
+ assert_se(sd_event_add_io(e, &client_event_source, client_pair[1], EPOLLIN|EPOLLOUT, io_callback, &client_iov) >= 0);
+
+ assert_se(sd_event_add_defer(e, &idle_event_source, idle_callback, NULL) >= 0);
+ assert_se(sd_event_source_set_priority(idle_event_source, SD_EVENT_PRIORITY_IDLE) >= 0);
+
+ assert_se(sd_event_loop(e) >= 0);
+
+ null = safe_fclose(null);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz-xdg-desktop.c b/src/fuzz/fuzz-xdg-desktop.c
new file mode 100644
index 0000000..23077e4
--- /dev/null
+++ b/src/fuzz/fuzz-xdg-desktop.c
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "fuzz.h"
+#include "xdg-autostart-service.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/fuzz-xdg-desktop.XXXXXX";
+ _cleanup_close_ int fd = -1;
+ _cleanup_(xdg_autostart_service_freep) XdgAutostartService *service = NULL;
+ _cleanup_(rm_rf_physical_and_freep) char *tmpdir = NULL;
+
+ /* We don't want to fill the logs with messages about parse errors.
+ * Disable most logging if not running standalone */
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ assert_se(mkdtemp_malloc("/tmp/fuzz-xdg-desktop-XXXXXX", &tmpdir) >= 0);
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(write(fd, data, size) == (ssize_t) size);
+
+ assert_se(service = xdg_autostart_service_parse_desktop(name));
+ assert_se(service->name = strdup("fuzz-xdg-desktop.service"));
+ (void) xdg_autostart_service_generate_unit(service, tmpdir);
+
+ return 0;
+}
diff --git a/src/fuzz/fuzz.h b/src/fuzz/fuzz.h
new file mode 100644
index 0000000..579b0ee
--- /dev/null
+++ b/src/fuzz/fuzz.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+/* The entry point into the fuzzer */
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
diff --git a/src/fuzz/meson.build b/src/fuzz/meson.build
new file mode 100644
index 0000000..a5fac59
--- /dev/null
+++ b/src/fuzz/meson.build
@@ -0,0 +1,159 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+fuzzers += [
+ [['src/fuzz/fuzz-bus-message.c'],
+ [libshared],
+ []],
+
+ [['src/fuzz/fuzz-catalog.c'],
+ [libjournal_core,
+ libshared],
+ []],
+
+ [['src/fuzz/fuzz-dns-packet.c',
+ dns_type_headers],
+ [libsystemd_resolve_core,
+ libshared],
+ [libgcrypt,
+ libgpg_error,
+ libm]],
+
+ [['src/fuzz/fuzz-dhcp6-client.c',
+ 'src/libsystemd-network/dhcp-identifier.h',
+ 'src/libsystemd-network/dhcp-identifier.c',
+ 'src/libsystemd-network/dhcp6-internal.h',
+ 'src/systemd/sd-dhcp6-client.h'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/fuzz/fuzz-dhcp-server.c'],
+ [libsystemd_network,
+ libshared],
+ []],
+
+ [['src/fuzz/fuzz-lldp.c'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/fuzz/fuzz-ndisc-rs.c',
+ 'src/libsystemd-network/dhcp-identifier.h',
+ 'src/libsystemd-network/dhcp-identifier.c',
+ 'src/libsystemd-network/icmp6-util.h',
+ 'src/systemd/sd-dhcp6-client.h',
+ 'src/systemd/sd-ndisc.h'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/fuzz/fuzz-json.c'],
+ [libshared],
+ []],
+
+ [['src/fuzz/fuzz-varlink.c'],
+ [libshared],
+ []],
+
+ [['src/fuzz/fuzz-unit-file.c'],
+ [libcore,
+ libshared],
+ [libmount]],
+
+ [['src/fuzz/fuzz-journald-audit.c',
+ 'src/fuzz/fuzz-journald.c'],
+ [libjournal_core,
+ libshared],
+ [libselinux]],
+
+ [['src/fuzz/fuzz-journald-kmsg.c',
+ 'src/fuzz/fuzz-journald.c'],
+ [libjournal_core,
+ libshared],
+ [libselinux]],
+
+ [['src/fuzz/fuzz-journald-native.c',
+ 'src/fuzz/fuzz-journald.c'],
+ [libjournal_core,
+ libshared],
+ [libselinux]],
+
+ [['src/fuzz/fuzz-journald-native-fd.c',
+ 'src/fuzz/fuzz-journald.c'],
+ [libjournal_core,
+ libshared],
+ [libselinux]],
+
+ [['src/fuzz/fuzz-journald-stream.c',
+ 'src/fuzz/fuzz-journald.c'],
+ [libjournal_core,
+ libshared],
+ [libselinux]],
+
+ [['src/fuzz/fuzz-journald-syslog.c',
+ 'src/fuzz/fuzz-journald.c'],
+ [libjournal_core,
+ libshared],
+ [libselinux]],
+
+ [['src/fuzz/fuzz-journal-remote.c'],
+ [libsystemd_journal_remote,
+ libshared],
+ []],
+
+ [['src/fuzz/fuzz-udev-database.c'],
+ [libshared],
+ []],
+
+ [['src/fuzz/fuzz-udev-rules.c'],
+ [libudev_core,
+ libudev_static,
+ libsystemd_network,
+ libshared],
+ [threads,
+ libacl]],
+
+ [['src/fuzz/fuzz-compress.c'],
+ [libshared],
+ []],
+
+ [['src/fuzz/fuzz-bus-label.c'],
+ [libshared],
+ []],
+
+ [['src/fuzz/fuzz-env-file.c'],
+ [libshared],
+ []],
+
+ [['src/fuzz/fuzz-hostname-util.c'],
+ [libshared],
+ []],
+
+ [['src/fuzz/fuzz-nspawn-settings.c'],
+ [libshared,
+ libnspawn_core],
+ [libseccomp]],
+
+ [['src/fuzz/fuzz-nspawn-oci.c'],
+ [libshared,
+ libnspawn_core],
+ [libseccomp]],
+
+ [['src/fuzz/fuzz-calendarspec.c'],
+ [libshared],
+ []],
+
+ [['src/fuzz/fuzz-time-util.c'],
+ [libshared],
+ []],
+
+ [['src/fuzz/fuzz-xdg-desktop.c',
+ 'src/xdg-autostart-generator/xdg-autostart-service.h',
+ 'src/xdg-autostart-generator/xdg-autostart-service.c'],
+ [],
+ []],
+
+ [['src/fuzz/fuzz-udev-rule-parse-value.c'],
+ [libshared],
+ []],
+]
diff --git a/src/getty-generator/getty-generator.c b/src/getty-generator/getty-generator.c
new file mode 100644
index 0000000..2f26214
--- /dev/null
+++ b/src/getty-generator/getty-generator.c
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "generator.h"
+#include "log.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "unit-name.h"
+#include "util.h"
+#include "virt.h"
+
+static const char *arg_dest = NULL;
+
+static int add_symlink(const char *fservice, const char *tservice) {
+ char *from, *to;
+ int r;
+
+ assert(fservice);
+ assert(tservice);
+
+ from = strjoina(SYSTEM_DATA_UNIT_PATH "/", fservice);
+ to = strjoina(arg_dest, "/getty.target.wants/", tservice);
+
+ mkdir_parents_label(to, 0755);
+
+ r = symlink(from, to);
+ if (r < 0) {
+ /* In case console=hvc0 is passed this will very likely result in EEXIST */
+ if (errno == EEXIST)
+ return 0;
+
+ return log_error_errno(errno, "Failed to create symlink %s: %m", to);
+ }
+
+ return 0;
+}
+
+static int add_serial_getty(const char *tty) {
+ _cleanup_free_ char *n = NULL;
+ int r;
+
+ assert(tty);
+
+ log_debug("Automatically adding serial getty for /dev/%s.", tty);
+
+ r = unit_name_from_path_instance("serial-getty", tty, ".service", &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate service name: %m");
+
+ return add_symlink("serial-getty@.service", n);
+}
+
+static int add_container_getty(const char *tty) {
+ _cleanup_free_ char *n = NULL;
+ int r;
+
+ assert(tty);
+
+ log_debug("Automatically adding container getty for /dev/pts/%s.", tty);
+
+ r = unit_name_from_path_instance("container-getty", tty, ".service", &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate service name: %m");
+
+ return add_symlink("container-getty@.service", n);
+}
+
+static int verify_tty(const char *name) {
+ _cleanup_close_ int fd = -1;
+ const char *p;
+
+ /* Some TTYs are weird and have been enumerated but don't work
+ * when you try to use them, such as classic ttyS0 and
+ * friends. Let's check that and open the device and run
+ * isatty() on it. */
+
+ p = strjoina("/dev/", name);
+
+ /* O_NONBLOCK is essential here, to make sure we don't wait
+ * for DCD */
+ fd = open(p, O_RDWR|O_NONBLOCK|O_NOCTTY|O_CLOEXEC|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ errno = 0;
+ if (isatty(fd) <= 0)
+ return errno_or_else(EIO);
+
+ return 0;
+}
+
+static int run_container(void) {
+ _cleanup_free_ char *container_ttys = NULL;
+ int r;
+
+ log_debug("Automatically adding console shell.");
+
+ r = add_symlink("console-getty.service", "console-getty.service");
+ if (r < 0)
+ return r;
+
+ /* When $container_ttys is set for PID 1, spawn gettys on all ptys named therein.
+ * Note that despite the variable name we only support ptys here. */
+
+ (void) getenv_for_pid(1, "container_ttys", &container_ttys);
+
+ for (const char *p = container_ttys;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse $container_ttys: %m");
+ if (r == 0)
+ return 0;
+
+ const char *tty = word;
+
+ /* First strip off /dev/ if it is specified */
+ tty = path_startswith(tty, "/dev/") ?: tty;
+
+ /* Then, make sure it's actually a pty */
+ tty = path_startswith(tty, "pts/");
+ if (!tty)
+ continue;
+
+ r = add_container_getty(tty);
+ if (r < 0)
+ return r;
+ }
+}
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ int r;
+
+ assert_se(arg_dest = dest);
+
+ if (detect_container() > 0)
+ /* Add console shell and look at $container_ttys, but don't do add any
+ * further magic if we are in a container. */
+ return run_container();
+
+ /* Automatically add in a serial getty on all active kernel consoles */
+ _cleanup_free_ char *active = NULL;
+ (void) read_one_line_file("/sys/class/tty/console/active", &active);
+ for (const char *p = active;;) {
+ _cleanup_free_ char *tty = NULL;
+
+ r = extract_first_word(&p, &tty, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse /sys/class/tty/console/active: %m");
+ if (r == 0)
+ break;
+
+ /* We assume that gettys on virtual terminals are started via manual configuration and do
+ * this magic only for non-VC terminals. */
+
+ if (isempty(tty) || tty_is_vc(tty))
+ continue;
+
+ if (verify_tty(tty) < 0)
+ continue;
+
+ r = add_serial_getty(tty);
+ if (r < 0)
+ return r;
+ }
+
+ /* Automatically add in a serial getty on the first
+ * virtualizer console */
+ const char *j;
+ FOREACH_STRING(j,
+ "hvc0",
+ "xvc0",
+ "hvsi0",
+ "sclp_line0",
+ "ttysclp0",
+ "3270!tty1") {
+ _cleanup_free_ char *p = NULL;
+
+ p = path_join("/sys/class/tty", j);
+ if (!p)
+ return -ENOMEM;
+ if (access(p, F_OK) < 0)
+ continue;
+
+ r = add_serial_getty(j);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/gpt-auto-generator/gpt-auto-generator.c b/src/gpt-auto-generator/gpt-auto-generator.c
new file mode 100644
index 0000000..f9d0ca5
--- /dev/null
+++ b/src/gpt-auto-generator/gpt-auto-generator.c
@@ -0,0 +1,843 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "blkid-util.h"
+#include "blockdev-util.h"
+#include "btrfs-util.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "dissect-image.h"
+#include "dropin.h"
+#include "efi-loader.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "fstab-util.h"
+#include "generator.h"
+#include "gpt.h"
+#include "mkdir.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "special.h"
+#include "specifier.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "util.h"
+#include "virt.h"
+
+static const char *arg_dest = NULL;
+static bool arg_enabled = true;
+static bool arg_root_enabled = true;
+static int arg_root_rw = -1;
+
+static int open_parent_block_device(dev_t devnum, int *ret_fd) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ const char *name, *devtype, *node;
+ sd_device *parent;
+ dev_t pn;
+ int fd, r;
+
+ assert(ret_fd);
+
+ r = sd_device_new_from_devnum(&d, 'b', devnum);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to open device: %m");
+
+ if (sd_device_get_devname(d, &name) < 0) {
+ r = sd_device_get_syspath(d, &name);
+ if (r < 0) {
+ log_device_debug_errno(d, r, "Device %u:%u does not have a name, ignoring: %m",
+ major(devnum), minor(devnum));
+ return 0;
+ }
+ }
+
+ r = sd_device_get_parent(d, &parent);
+ if (r < 0) {
+ log_device_debug_errno(d, r, "Not a partitioned device, ignoring: %m");
+ return 0;
+ }
+
+ /* Does it have a devtype? */
+ r = sd_device_get_devtype(parent, &devtype);
+ if (r < 0) {
+ log_device_debug_errno(parent, r, "Parent doesn't have a device type, ignoring: %m");
+ return 0;
+ }
+
+ /* Is this a disk or a partition? We only care for disks... */
+ if (!streq(devtype, "disk")) {
+ log_device_debug(parent, "Parent isn't a raw disk, ignoring.");
+ return 0;
+ }
+
+ /* Does it have a device node? */
+ r = sd_device_get_devname(parent, &node);
+ if (r < 0) {
+ log_device_debug_errno(parent, r, "Parent device does not have device node, ignoring: %m");
+ return 0;
+ }
+
+ log_device_debug(d, "Root device %s.", node);
+
+ r = sd_device_get_devnum(parent, &pn);
+ if (r < 0) {
+ log_device_debug_errno(parent, r, "Parent device is not a proper block device, ignoring: %m");
+ return 0;
+ }
+
+ fd = open(node, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open %s: %m", node);
+
+ *ret_fd = fd;
+ return 1;
+}
+
+static int add_cryptsetup(const char *id, const char *what, bool rw, bool require, char **device) {
+ _cleanup_free_ char *e = NULL, *n = NULL, *d = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(id);
+ assert(what);
+
+ r = unit_name_from_path(what, ".device", &d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ e = unit_name_escape(id);
+ if (!e)
+ return log_oom();
+
+ r = unit_name_build("systemd-cryptsetup", e, ".service", &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ r = generator_open_unit_file(arg_dest, NULL, n, &f);
+ if (r < 0)
+ return r;
+
+ r = generator_write_cryptsetup_unit_section(f, NULL);
+ if (r < 0)
+ return r;
+
+ fprintf(f,
+ "Before=umount.target cryptsetup.target\n"
+ "Conflicts=umount.target\n"
+ "BindsTo=%s\n"
+ "After=%s\n",
+ d, d);
+
+ r = generator_write_cryptsetup_service_section(f, id, what, NULL, rw ? NULL : "read-only");
+ if (r < 0)
+ return r;
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write file %s: %m", n);
+
+ r = generator_add_symlink(arg_dest, d, "wants", n);
+ if (r < 0)
+ return r;
+
+ const char *dmname;
+ dmname = strjoina("dev-mapper-", e, ".device");
+
+ if (require) {
+ r = generator_add_symlink(arg_dest, "cryptsetup.target", "requires", n);
+ if (r < 0)
+ return r;
+
+ r = generator_add_symlink(arg_dest, dmname, "requires", n);
+ if (r < 0)
+ return r;
+ }
+
+ r = write_drop_in_format(arg_dest, dmname, 50, "job-timeout",
+ "# Automatically generated by systemd-gpt-auto-generator\n\n"
+ "[Unit]\n"
+ "JobTimeoutSec=0"); /* the binary handles timeouts anyway */
+ if (r < 0)
+ log_warning_errno(r, "Failed to write device timeout drop-in, ignoring: %m");
+
+ if (device) {
+ char *ret;
+
+ ret = path_join("/dev/mapper", id);
+ if (!ret)
+ return log_oom();
+
+ *device = ret;
+ }
+
+ return 0;
+}
+
+static int add_mount(
+ const char *id,
+ const char *what,
+ const char *where,
+ const char *fstype,
+ bool rw,
+ const char *options,
+ const char *description,
+ const char *post) {
+
+ _cleanup_free_ char *unit = NULL, *crypto_what = NULL, *p = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ /* Note that we don't apply specifier escaping on the input strings here, since we know they are not configured
+ * externally, but all originate from our own sources here, and hence we know they contain no % characters that
+ * could potentially be understood as specifiers. */
+
+ assert(id);
+ assert(what);
+ assert(where);
+ assert(description);
+
+ log_debug("Adding %s: %s fstype=%s", where, what, fstype ?: "(any)");
+
+ if (streq_ptr(fstype, "crypto_LUKS")) {
+ r = add_cryptsetup(id, what, rw, true, &crypto_what);
+ if (r < 0)
+ return r;
+
+ what = crypto_what;
+ fstype = NULL;
+ }
+
+ r = unit_name_from_path(where, ".mount", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ p = path_join(empty_to_root(arg_dest), unit);
+ if (!p)
+ return log_oom();
+
+ f = fopen(p, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m", unit);
+
+ fprintf(f,
+ "# Automatically generated by systemd-gpt-auto-generator\n\n"
+ "[Unit]\n"
+ "Description=%s\n"
+ "Documentation=man:systemd-gpt-auto-generator(8)\n",
+ description);
+
+ if (post)
+ fprintf(f, "Before=%s\n", post);
+
+ r = generator_write_fsck_deps(f, arg_dest, what, where, fstype);
+ if (r < 0)
+ return r;
+
+ r = generator_write_blockdev_dependency(f, what);
+ if (r < 0)
+ return r;
+
+ fprintf(f,
+ "\n"
+ "[Mount]\n"
+ "What=%s\n"
+ "Where=%s\n",
+ what, where);
+
+ if (fstype)
+ fprintf(f, "Type=%s\n", fstype);
+
+ if (options)
+ fprintf(f, "Options=%s,%s\n", options, rw ? "rw" : "ro");
+ else
+ fprintf(f, "Options=%s\n", rw ? "rw" : "ro");
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", p);
+
+ if (post)
+ return generator_add_symlink(arg_dest, post, "requires", unit);
+ return 0;
+}
+
+static int path_is_busy(const char *where) {
+ int r;
+
+ /* already a mountpoint; generators run during reload */
+ r = path_is_mount_point(where, NULL, AT_SYMLINK_FOLLOW);
+ if (r > 0)
+ return false;
+
+ /* the directory might not exist on a stateless system */
+ if (r == -ENOENT)
+ return false;
+
+ if (r < 0)
+ return log_warning_errno(r, "Cannot check if \"%s\" is a mount point: %m", where);
+
+ /* not a mountpoint but it contains files */
+ r = dir_is_empty(where);
+ if (r < 0)
+ return log_warning_errno(r, "Cannot check if \"%s\" is empty: %m", where);
+ if (r > 0)
+ return false;
+
+ log_debug("\"%s\" already populated, ignoring.", where);
+ return true;
+}
+
+static int add_partition_mount(
+ DissectedPartition *p,
+ const char *id,
+ const char *where,
+ const char *description) {
+
+ int r;
+ assert(p);
+
+ r = path_is_busy(where);
+ if (r != 0)
+ return r < 0 ? r : 0;
+
+ return add_mount(
+ id,
+ p->node,
+ where,
+ p->fstype,
+ p->rw,
+ NULL,
+ description,
+ SPECIAL_LOCAL_FS_TARGET);
+}
+
+static int add_swap(const char *path) {
+ _cleanup_free_ char *name = NULL, *unit = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(path);
+
+ /* Disable the swap auto logic if at least one swap is defined in /etc/fstab, see #6192. */
+ r = fstab_has_fstype("swap");
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse fstab: %m");
+ if (r > 0) {
+ log_debug("swap specified in fstab, ignoring.");
+ return 0;
+ }
+
+ log_debug("Adding swap: %s", path);
+
+ r = unit_name_from_path(path, ".swap", &name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ unit = path_join(empty_to_root(arg_dest), name);
+ if (!unit)
+ return log_oom();
+
+ f = fopen(unit, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m", unit);
+
+ fprintf(f,
+ "# Automatically generated by systemd-gpt-auto-generator\n\n"
+ "[Unit]\n"
+ "Description=Swap Partition\n"
+ "Documentation=man:systemd-gpt-auto-generator(8)\n");
+
+ r = generator_write_blockdev_dependency(f, path);
+ if (r < 0)
+ return r;
+
+ fprintf(f,
+ "\n"
+ "[Swap]\n"
+ "What=%s\n",
+ path);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", unit);
+
+ return generator_add_symlink(arg_dest, SPECIAL_SWAP_TARGET, "wants", name);
+}
+
+static int add_automount(
+ const char *id,
+ const char *what,
+ const char *where,
+ const char *fstype,
+ bool rw,
+ const char *options,
+ const char *description,
+ usec_t timeout) {
+
+ _cleanup_free_ char *unit = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *opt = "noauto", *p;
+ int r;
+
+ assert(id);
+ assert(where);
+ assert(description);
+
+ if (options)
+ opt = strjoina(options, ",", opt);
+
+ r = add_mount(id,
+ what,
+ where,
+ fstype,
+ rw,
+ opt,
+ description,
+ NULL);
+ if (r < 0)
+ return r;
+
+ r = unit_name_from_path(where, ".automount", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ p = prefix_roota(arg_dest, unit);
+ f = fopen(p, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m", unit);
+
+ fprintf(f,
+ "# Automatically generated by systemd-gpt-auto-generator\n\n"
+ "[Unit]\n"
+ "Description=%s\n"
+ "Documentation=man:systemd-gpt-auto-generator(8)\n"
+ "[Automount]\n"
+ "Where=%s\n"
+ "TimeoutIdleSec="USEC_FMT"\n",
+ description,
+ where,
+ timeout / USEC_PER_SEC);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", p);
+
+ return generator_add_symlink(arg_dest, SPECIAL_LOCAL_FS_TARGET, "wants", unit);
+}
+
+static const char *esp_or_xbootldr_options(const DissectedPartition *p) {
+ assert(p);
+
+ /* if we probed vfat or have no idea about the file system then assume these file systems are vfat
+ * and thus understand "umask=0077". If we detected something else then don't specify any options and
+ * use kernel defaults. */
+
+ if (!p->fstype || streq(p->fstype, "vfat"))
+ return "umask=0077";
+
+ return NULL;
+}
+
+static int add_xbootldr(DissectedPartition *p) {
+ int r;
+
+ assert(p);
+
+ if (in_initrd()) {
+ log_debug("In initrd, ignoring the XBOOTLDR partition.");
+ return 0;
+ }
+
+ r = fstab_is_mount_point("/boot");
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse fstab: %m");
+ if (r > 0) {
+ log_debug("/boot specified in fstab, ignoring XBOOTLDR partition.");
+ return 0;
+ }
+
+ r = path_is_busy("/boot");
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ return add_automount("boot",
+ p->node,
+ "/boot",
+ p->fstype,
+ true,
+ esp_or_xbootldr_options(p),
+ "Boot Loader Partition",
+ 120 * USEC_PER_SEC);
+}
+
+#if ENABLE_EFI
+static int add_esp(DissectedPartition *p, bool has_xbootldr) {
+ const char *esp_path = NULL, *id = NULL;
+ int r;
+
+ assert(p);
+
+ if (in_initrd()) {
+ log_debug("In initrd, ignoring the ESP.");
+ return 0;
+ }
+
+ /* If /efi exists we'll use that. Otherwise we'll use /boot, as that's usually the better choice, but
+ * only if there's no explicit XBOOTLDR partition around. */
+ if (access("/efi", F_OK) < 0) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to determine whether /efi exists: %m");
+
+ /* Use /boot as fallback, but only if there's no XBOOTLDR partition */
+ if (!has_xbootldr) {
+ esp_path = "/boot";
+ id = "boot";
+ }
+ }
+ if (!esp_path)
+ esp_path = "/efi";
+ if (!id)
+ id = "efi";
+
+ /* We create an .automount which is not overridden by the .mount from the fstab generator. */
+ r = fstab_is_mount_point(esp_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse fstab: %m");
+ if (r > 0) {
+ log_debug("%s specified in fstab, ignoring.", esp_path);
+ return 0;
+ }
+
+ r = path_is_busy(esp_path);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ if (is_efi_boot()) {
+ sd_id128_t loader_uuid;
+
+ /* If this is an EFI boot, be extra careful, and only mount the ESP if it was the ESP used for booting. */
+
+ r = efi_loader_get_device_part_uuid(&loader_uuid);
+ if (r == -ENOENT) {
+ log_debug("EFI loader partition unknown.");
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to read ESP partition UUID: %m");
+
+ if (!sd_id128_equal(p->uuid, loader_uuid)) {
+ log_debug("Partition for %s does not appear to be the partition we are booted from.", p->node);
+ return 0;
+ }
+ } else
+ log_debug("Not an EFI boot, skipping ESP check.");
+
+ return add_automount(id,
+ p->node,
+ esp_path,
+ p->fstype,
+ true,
+ esp_or_xbootldr_options(p),
+ "EFI System Partition Automount",
+ 120 * USEC_PER_SEC);
+}
+#else
+static int add_esp(DissectedPartition *p, bool has_xbootldr) {
+ return 0;
+}
+#endif
+
+static int add_root_rw(DissectedPartition *p) {
+ const char *path;
+ int r;
+
+ assert(p);
+
+ if (in_initrd()) {
+ log_debug("In initrd, not generating drop-in for systemd-remount-fs.service.");
+ return 0;
+ }
+
+ if (arg_root_rw >= 0) {
+ log_debug("Parameter ro/rw specified on kernel command line, not generating drop-in for systemd-remount-fs.service.");
+ return 0;
+ }
+
+ if (!p->rw) {
+ log_debug("Root partition marked read-only in GPT partition table, not generating drop-in for systemd-remount-fs.service.");
+ return 0;
+ }
+
+ (void) generator_enable_remount_fs_service(arg_dest);
+
+ path = strjoina(arg_dest, "/systemd-remount-fs.service.d/50-remount-rw.conf");
+
+ r = write_string_file(path,
+ "# Automatically generated by systemd-gpt-generator\n\n"
+ "[Service]\n"
+ "Environment=SYSTEMD_REMOUNT_ROOT_RW=1\n",
+ WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_NOFOLLOW|WRITE_STRING_FILE_MKDIR_0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write drop-in file %s: %m", path);
+
+ return 0;
+}
+
+#if ENABLE_EFI
+static int add_root_cryptsetup(void) {
+
+ /* If a device /dev/gpt-auto-root-luks appears, then make it pull in systemd-cryptsetup-root.service, which
+ * sets it up, and causes /dev/gpt-auto-root to appear which is all we are looking for. */
+
+ return add_cryptsetup("root", "/dev/gpt-auto-root-luks", true, false, NULL);
+}
+#endif
+
+static int add_root_mount(void) {
+#if ENABLE_EFI
+ int r;
+
+ if (!is_efi_boot()) {
+ log_debug("Not a EFI boot, not creating root mount.");
+ return 0;
+ }
+
+ r = efi_loader_get_device_part_uuid(NULL);
+ if (r == -ENOENT) {
+ log_notice("EFI loader partition unknown, exiting.\n"
+ "(The boot loader did not set EFI variable LoaderDevicePartUUID.)");
+ return 0;
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to read ESP partition UUID: %m");
+
+ /* OK, we have an ESP partition, this is fantastic, so let's
+ * wait for a root device to show up. A udev rule will create
+ * the link for us under the right name. */
+
+ if (in_initrd()) {
+ r = generator_write_initrd_root_device_deps(arg_dest, "/dev/gpt-auto-root");
+ if (r < 0)
+ return 0;
+
+ r = add_root_cryptsetup();
+ if (r < 0)
+ return r;
+ }
+
+ /* Note that we do not need to enable systemd-remount-fs.service here. If
+ * /etc/fstab exists, systemd-fstab-generator will pull it in for us. */
+
+ return add_mount(
+ "root",
+ "/dev/gpt-auto-root",
+ in_initrd() ? "/sysroot" : "/",
+ NULL,
+ arg_root_rw > 0,
+ NULL,
+ "Root Partition",
+ in_initrd() ? SPECIAL_INITRD_ROOT_FS_TARGET : SPECIAL_LOCAL_FS_TARGET);
+#else
+ return 0;
+#endif
+}
+
+static int enumerate_partitions(dev_t devnum) {
+ _cleanup_close_ int fd = -1;
+ _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
+ int r, k;
+
+ r = open_parent_block_device(devnum, &fd);
+ if (r <= 0)
+ return r;
+
+ r = dissect_image(fd, NULL, NULL, DISSECT_IMAGE_GPT_ONLY|DISSECT_IMAGE_NO_UDEV, &m);
+ if (r == -ENOPKG) {
+ log_debug_errno(r, "No suitable partition table found, ignoring.");
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to dissect: %m");
+
+ if (m->partitions[PARTITION_SWAP].found) {
+ k = add_swap(m->partitions[PARTITION_SWAP].node);
+ if (k < 0)
+ r = k;
+ }
+
+ if (m->partitions[PARTITION_XBOOTLDR].found) {
+ k = add_xbootldr(m->partitions + PARTITION_XBOOTLDR);
+ if (k < 0)
+ r = k;
+ }
+
+ if (m->partitions[PARTITION_ESP].found) {
+ k = add_esp(m->partitions + PARTITION_ESP, m->partitions[PARTITION_XBOOTLDR].found);
+ if (k < 0)
+ r = k;
+ }
+
+ if (m->partitions[PARTITION_HOME].found) {
+ k = add_partition_mount(m->partitions + PARTITION_HOME, "home", "/home", "Home Partition");
+ if (k < 0)
+ r = k;
+ }
+
+ if (m->partitions[PARTITION_SRV].found) {
+ k = add_partition_mount(m->partitions + PARTITION_SRV, "srv", "/srv", "Server Data Partition");
+ if (k < 0)
+ r = k;
+ }
+
+ if (m->partitions[PARTITION_VAR].found) {
+ k = add_partition_mount(m->partitions + PARTITION_VAR, "var", "/var", "Variable Data Partition");
+ if (k < 0)
+ r = k;
+ }
+
+ if (m->partitions[PARTITION_TMP].found) {
+ k = add_partition_mount(m->partitions + PARTITION_TMP, "var-tmp", "/var/tmp", "Temporary Data Partition");
+ if (k < 0)
+ r = k;
+ }
+
+ if (m->partitions[PARTITION_ROOT].found) {
+ k = add_root_rw(m->partitions + PARTITION_ROOT);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int add_mounts(void) {
+ dev_t devno;
+ int r;
+
+ r = get_block_device_harder("/", &devno);
+ if (r == -EUCLEAN)
+ return btrfs_log_dev_root(LOG_ERR, r, "root file system");
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine block device of root file system: %m");
+ if (r == 0) { /* Not backed by block device */
+ r = get_block_device_harder("/usr", &devno);
+ if (r == -EUCLEAN)
+ return btrfs_log_dev_root(LOG_ERR, r, "/usr");
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine block device of /usr file system: %m");
+ if (r == 0) {
+ _cleanup_free_ char *p = NULL;
+ mode_t m;
+
+ /* If the root mount has been replaced by some form of volatile file system (overlayfs), the
+ * original root block device node is symlinked in /run/systemd/volatile-root. Let's read that
+ * here. */
+ r = readlink_malloc("/run/systemd/volatile-root", &p);
+ if (r == -ENOENT) {
+ log_debug("Neither root nor /usr file system are on a (single) block device.");
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to read symlink /run/systemd/volatile-root: %m");
+
+ r = device_path_parse_major_minor(p, &m, &devno);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse major/minor device node: %m");
+ if (!S_ISBLK(m))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), "Volatile root device is of wrong type.");
+ }
+ }
+
+ return enumerate_partitions(devno);
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ int r;
+
+ assert(key);
+
+ if (proc_cmdline_key_streq(key, "systemd.gpt_auto") ||
+ proc_cmdline_key_streq(key, "rd.systemd.gpt_auto")) {
+
+ r = value ? parse_boolean(value) : 1;
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse gpt-auto switch \"%s\", ignoring: %m", value);
+ else
+ arg_enabled = r;
+
+ } else if (proc_cmdline_key_streq(key, "root")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ /* Disable root disk logic if there's a root= value
+ * specified (unless it happens to be "gpt-auto") */
+
+ if (!streq(value, "gpt-auto")) {
+ arg_root_enabled = false;
+ log_debug("Disabling root partition auto-detection, root= is defined.");
+ }
+
+ } else if (proc_cmdline_key_streq(key, "roothash")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ /* Disable root disk logic if there's roothash= defined (i.e. verity enabled) */
+
+ arg_root_enabled = false;
+
+ } else if (proc_cmdline_key_streq(key, "rw") && !value)
+ arg_root_rw = true;
+ else if (proc_cmdline_key_streq(key, "ro") && !value)
+ arg_root_rw = false;
+
+ return 0;
+}
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ int r, k;
+
+ assert_se(arg_dest = dest_late);
+
+ if (detect_container() > 0) {
+ log_debug("In a container, exiting.");
+ return 0;
+ }
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ if (!arg_enabled) {
+ log_debug("Disabled, exiting.");
+ return 0;
+ }
+
+ if (arg_root_enabled)
+ r = add_root_mount();
+
+ if (!in_initrd()) {
+ k = add_mounts();
+ if (r >= 0)
+ r = k;
+ }
+
+ return r;
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/hibernate-resume/hibernate-resume-generator.c b/src/hibernate-resume/hibernate-resume-generator.c
new file mode 100644
index 0000000..04a28c9
--- /dev/null
+++ b/src/hibernate-resume/hibernate-resume-generator.c
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dropin.h"
+#include "fstab-util.h"
+#include "generator.h"
+#include "log.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "proc-cmdline.h"
+#include "special.h"
+#include "string-util.h"
+#include "unit-name.h"
+
+static const char *arg_dest = "/tmp";
+static char *arg_resume_device = NULL;
+static char *arg_resume_options = NULL;
+static char *arg_root_options = NULL;
+static bool arg_noresume = false;
+
+STATIC_DESTRUCTOR_REGISTER(arg_resume_device, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_resume_options, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_root_options, freep);
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+
+ if (streq(key, "resume")) {
+ char *s;
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ s = fstab_node_to_udev_node(value);
+ if (!s)
+ return log_oom();
+
+ free_and_replace(arg_resume_device, s);
+
+ } else if (streq(key, "resumeflags")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (!strextend_with_separator(&arg_resume_options, ",", value, NULL))
+ return log_oom();
+
+ } else if (streq(key, "rootflags")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (!strextend_with_separator(&arg_root_options, ",", value, NULL))
+ return log_oom();
+
+ } else if (streq(key, "noresume")) {
+ if (value) {
+ log_warning("\"noresume\" kernel command line switch specified with an argument, ignoring.");
+ return 0;
+ }
+
+ arg_noresume = true;
+ }
+
+ return 0;
+}
+
+static int process_resume(void) {
+ _cleanup_free_ char *service_unit = NULL, *device_unit = NULL, *lnk = NULL;
+ int r;
+
+ if (!arg_resume_device)
+ return 0;
+
+ r = unit_name_from_path_instance("systemd-hibernate-resume", arg_resume_device, ".service",
+ &service_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ lnk = strjoin(arg_dest, "/" SPECIAL_SYSINIT_TARGET ".wants/", service_unit);
+ if (!lnk)
+ return log_oom();
+
+ mkdir_parents_label(lnk, 0755);
+ if (symlink(SYSTEM_DATA_UNIT_PATH "/systemd-hibernate-resume@.service", lnk) < 0)
+ return log_error_errno(errno, "Failed to create symlink %s: %m", lnk);
+
+ r = unit_name_from_path(arg_resume_device, ".device", &device_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ r = write_drop_in(arg_dest, device_unit, 40, "device-timeout",
+ "# Automatically generated by systemd-hibernate-resume-generator\n\n"
+ "[Unit]\nJobTimeoutSec=0");
+ if (r < 0)
+ log_warning_errno(r, "Failed to write device timeout drop-in: %m");
+
+ r = generator_write_timeouts(arg_dest,
+ arg_resume_device,
+ arg_resume_device,
+ arg_resume_options ?: arg_root_options,
+ NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ int r = 0;
+
+ log_setup_generator();
+
+ if (argc > 1 && argc != 4)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program takes three or no arguments.");
+
+ if (argc > 1)
+ arg_dest = argv[1];
+
+ /* Don't even consider resuming outside of initramfs. */
+ if (!in_initrd()) {
+ log_debug("Not running in an initrd, quitting.");
+ return 0;
+ }
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ if (arg_noresume) {
+ log_notice("Found \"noresume\" on the kernel command line, quitting.");
+ return 0;
+ }
+
+ return process_resume();
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/hibernate-resume/hibernate-resume.c b/src/hibernate-resume/hibernate-resume.c
new file mode 100644
index 0000000..d8f91f4
--- /dev/null
+++ b/src/hibernate-resume/hibernate-resume.c
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+#include <sys/stat.h>
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ struct stat st;
+ const char *device;
+ _cleanup_free_ char *major_minor = NULL;
+ int r;
+
+ if (argc != 2) {
+ log_error("This program expects one argument.");
+ return EXIT_FAILURE;
+ }
+
+ log_setup_service();
+
+ umask(0022);
+
+ /* Refuse to run unless we are in an initrd() */
+ if (!in_initrd())
+ return EXIT_SUCCESS;
+
+ device = argv[1];
+
+ if (stat(device, &st) < 0) {
+ log_error_errno(errno, "Failed to stat '%s': %m", device);
+ return EXIT_FAILURE;
+ }
+
+ if (!S_ISBLK(st.st_mode)) {
+ log_error("Resume device '%s' is not a block device.", device);
+ return EXIT_FAILURE;
+ }
+
+ if (asprintf(&major_minor, "%d:%d", major(st.st_rdev), minor(st.st_rdev)) < 0) {
+ log_oom();
+ return EXIT_FAILURE;
+ }
+
+ r = write_string_file("/sys/power/resume", major_minor, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write '%s' to /sys/power/resume: %m", major_minor);
+ return EXIT_FAILURE;
+ }
+
+ /*
+ * The write above shall not return.
+ *
+ * However, failed resume is a normal condition (may mean that there is
+ * no hibernation image).
+ */
+
+ log_info("Could not resume from '%s' (%s).", device, major_minor);
+ return EXIT_SUCCESS;
+}
diff --git a/src/home/home-util.c b/src/home/home-util.c
new file mode 100644
index 0000000..cd971b7
--- /dev/null
+++ b/src/home/home-util.c
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "dns-domain.h"
+#include "home-util.h"
+#include "libcrypt-util.h"
+#include "memory-util.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+bool suitable_user_name(const char *name) {
+
+ /* Checks whether the specified name is suitable for management via homed. Note that client-side
+ * we usually validate with the simple valid_user_group_name(), while server-side we are a bit more
+ * restrictive, so that we can change the rules server-side without having to update things
+ * client-side too. */
+
+ if (!valid_user_group_name(name, 0))
+ return false;
+
+ /* We generally rely on NSS to tell us which users not to care for, but let's filter out some
+ * particularly well-known users. */
+ if (STR_IN_SET(name,
+ "root",
+ "nobody",
+ NOBODY_USER_NAME, NOBODY_GROUP_NAME))
+ return false;
+
+ /* Let's also defend our own namespace, as well as Debian's (unwritten?) logic of prefixing system
+ * users with underscores. */
+ if (STARTSWITH_SET(name, "systemd-", "_"))
+ return false;
+
+ return true;
+}
+
+int suitable_realm(const char *realm) {
+ _cleanup_free_ char *normalized = NULL;
+ int r;
+
+ /* Similar to the above: let's validate the realm a bit stricter server-side than client side */
+
+ r = dns_name_normalize(realm, 0, &normalized); /* this also checks general validity */
+ if (r == -EINVAL)
+ return 0;
+ if (r < 0)
+ return r;
+
+ if (!streq(realm, normalized)) /* is this normalized? */
+ return false;
+
+ if (dns_name_is_root(realm)) /* Don't allow top level domain */
+ return false;
+
+ return true;
+}
+
+int suitable_image_path(const char *path) {
+
+ return !empty_or_root(path) &&
+ path_is_valid(path) &&
+ path_is_absolute(path);
+}
+
+bool supported_fstype(const char *fstype) {
+ /* Limit the set of supported file systems a bit, as protection against little tested kernel file
+ * systems. Also, we only support the resize ioctls for these file systems. */
+ return STR_IN_SET(fstype, "ext4", "btrfs", "xfs");
+}
+
+int split_user_name_realm(const char *t, char **ret_user_name, char **ret_realm) {
+ _cleanup_free_ char *user_name = NULL, *realm = NULL;
+ const char *c;
+ int r;
+
+ assert(t);
+ assert(ret_user_name);
+ assert(ret_realm);
+
+ c = strchr(t, '@');
+ if (!c) {
+ user_name = strdup(t);
+ if (!user_name)
+ return -ENOMEM;
+ } else {
+ user_name = strndup(t, c - t);
+ if (!user_name)
+ return -ENOMEM;
+
+ realm = strdup(c + 1);
+ if (!realm)
+ return -ENOMEM;
+ }
+
+ if (!suitable_user_name(user_name))
+ return -EINVAL;
+
+ if (realm) {
+ r = suitable_realm(realm);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+ }
+
+ *ret_user_name = TAKE_PTR(user_name);
+ *ret_realm = TAKE_PTR(realm);
+
+ return 0;
+}
+
+int bus_message_append_secret(sd_bus_message *m, UserRecord *secret) {
+ _cleanup_(erase_and_freep) char *formatted = NULL;
+ JsonVariant *v;
+ int r;
+
+ assert(m);
+ assert(secret);
+
+ if (!FLAGS_SET(secret->mask, USER_RECORD_SECRET))
+ return sd_bus_message_append(m, "s", "{}");
+
+ v = json_variant_by_key(secret->json, "secret");
+ if (!v)
+ return -EINVAL;
+
+ r = json_variant_format(v, 0, &formatted);
+ if (r < 0)
+ return r;
+
+ (void) sd_bus_message_sensitive(m);
+
+ return sd_bus_message_append(m, "s", formatted);
+}
diff --git a/src/home/home-util.h b/src/home/home-util.h
new file mode 100644
index 0000000..fba1c7d
--- /dev/null
+++ b/src/home/home-util.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-bus.h"
+
+#include "time-util.h"
+#include "user-record.h"
+
+bool suitable_user_name(const char *name);
+int suitable_realm(const char *realm);
+int suitable_image_path(const char *path);
+
+bool supported_fstype(const char *fstype);
+
+int split_user_name_realm(const char *t, char **ret_user_name, char **ret_realm);
+
+int bus_message_append_secret(sd_bus_message *m, UserRecord *secret);
+
+/* Many of our operations might be slow due to crypto, fsck, recursive chown() and so on. For these
+ * operations permit a *very* long timeout */
+#define HOME_SLOW_BUS_CALL_TIMEOUT_USEC (2*USEC_PER_MINUTE)
diff --git a/src/home/homectl-fido2.c b/src/home/homectl-fido2.c
new file mode 100644
index 0000000..5557b70
--- /dev/null
+++ b/src/home/homectl-fido2.c
@@ -0,0 +1,534 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_LIBFIDO2
+#include <fido.h>
+#endif
+
+#include "ask-password-api.h"
+#include "errno-util.h"
+#include "format-table.h"
+#include "hexdecoct.h"
+#include "homectl-fido2.h"
+#include "homectl-pkcs11.h"
+#include "libcrypt-util.h"
+#include "locale-util.h"
+#include "memory-util.h"
+#include "random-util.h"
+#include "strv.h"
+
+#if HAVE_LIBFIDO2
+static int add_fido2_credential_id(
+ JsonVariant **v,
+ const void *cid,
+ size_t cid_size) {
+
+ _cleanup_(json_variant_unrefp) JsonVariant *w = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ _cleanup_free_ char *escaped = NULL;
+ int r;
+
+ assert(v);
+ assert(cid);
+
+ r = base64mem(cid, cid_size, &escaped);
+ if (r < 0)
+ return log_error_errno(r, "Failed to base64 encode FIDO2 credential ID: %m");
+
+ w = json_variant_ref(json_variant_by_key(*v, "fido2HmacCredential"));
+ if (w) {
+ r = json_variant_strv(w, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse FIDO2 credential ID list: %m");
+
+ if (strv_contains(l, escaped))
+ return 0;
+ }
+
+ r = strv_extend(&l, escaped);
+ if (r < 0)
+ return log_oom();
+
+ w = json_variant_unref(w);
+ r = json_variant_new_array_strv(&w, l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create FIDO2 credential ID JSON: %m");
+
+ r = json_variant_set_field(v, "fido2HmacCredential", w);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update FIDO2 credential ID: %m");
+
+ return 0;
+}
+
+static int add_fido2_salt(
+ JsonVariant **v,
+ const void *cid,
+ size_t cid_size,
+ const void *fido2_salt,
+ size_t fido2_salt_size,
+ const void *secret,
+ size_t secret_size) {
+
+ _cleanup_(json_variant_unrefp) JsonVariant *l = NULL, *w = NULL, *e = NULL;
+ _cleanup_(erase_and_freep) char *base64_encoded = NULL, *hashed = NULL;
+ int r;
+
+ /* Before using UNIX hashing on the supplied key we base64 encode it, since crypt_r() and friends
+ * expect a NUL terminated string, and we use a binary key */
+ r = base64mem(secret, secret_size, &base64_encoded);
+ if (r < 0)
+ return log_error_errno(r, "Failed to base64 encode secret key: %m");
+
+ r = hash_password(base64_encoded, &hashed);
+ if (r < 0)
+ return log_error_errno(errno_or_else(EINVAL), "Failed to UNIX hash secret key: %m");
+
+ r = json_build(&e, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("credential", JSON_BUILD_BASE64(cid, cid_size)),
+ JSON_BUILD_PAIR("salt", JSON_BUILD_BASE64(fido2_salt, fido2_salt_size)),
+ JSON_BUILD_PAIR("hashedPassword", JSON_BUILD_STRING(hashed))));
+ if (r < 0)
+ return log_error_errno(r, "Failed to build FIDO2 salt JSON key object: %m");
+
+ w = json_variant_ref(json_variant_by_key(*v, "privileged"));
+ l = json_variant_ref(json_variant_by_key(w, "fido2HmacSalt"));
+
+ r = json_variant_append_array(&l, e);
+ if (r < 0)
+ return log_error_errno(r, "Failed append FIDO2 salt: %m");
+
+ r = json_variant_set_field(&w, "fido2HmacSalt", l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set FDO2 salt: %m");
+
+ r = json_variant_set_field(v, "privileged", w);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update privileged field: %m");
+
+ return 0;
+}
+#endif
+
+#define FIDO2_SALT_SIZE 32
+
+int identity_add_fido2_parameters(
+ JsonVariant **v,
+ const char *device) {
+
+#if HAVE_LIBFIDO2
+ _cleanup_(fido_cbor_info_free) fido_cbor_info_t *di = NULL;
+ _cleanup_(fido_assert_free) fido_assert_t *a = NULL;
+ _cleanup_(erase_and_freep) char *used_pin = NULL;
+ _cleanup_(fido_cred_free) fido_cred_t *c = NULL;
+ _cleanup_(fido_dev_free) fido_dev_t *d = NULL;
+ _cleanup_(erase_and_freep) void *salt = NULL;
+ JsonVariant *un, *realm, *rn;
+ bool found_extension = false;
+ const void *cid, *secret;
+ const char *fido_un;
+ size_t n, cid_size, secret_size;
+ char **e;
+ int r;
+
+ /* Construction is like this: we generate a salt of 32 bytes. We then ask the FIDO2 device to
+ * HMAC-SHA256 it for us with its internal key. The result is the key used by LUKS and account
+ * authentication. LUKS and UNIX password auth all do their own salting before hashing, so that FIDO2
+ * device never sees the volume key.
+ *
+ * S = HMAC-SHA256(I, D)
+ *
+ * with: S → LUKS/account authentication key (never stored)
+ * I → internal key on FIDO2 device (stored in the FIDO2 device)
+ * D → salt we generate here (stored in the privileged part of the JSON record)
+ *
+ */
+
+ assert(v);
+ assert(device);
+
+ salt = malloc(FIDO2_SALT_SIZE);
+ if (!salt)
+ return log_oom();
+
+ r = genuine_random_bytes(salt, FIDO2_SALT_SIZE, RANDOM_BLOCK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate salt: %m");
+
+ d = fido_dev_new();
+ if (!d)
+ return log_oom();
+
+ r = fido_dev_open(d, device);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to open FIDO2 device %s: %s", device, fido_strerr(r));
+
+ if (!fido_dev_is_fido2(d))
+ return log_error_errno(SYNTHETIC_ERRNO(ENODEV),
+ "Specified device %s is not a FIDO2 device.", device);
+
+ di = fido_cbor_info_new();
+ if (!di)
+ return log_oom();
+
+ r = fido_dev_get_cbor_info(d, di);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to get CBOR device info for %s: %s", device, fido_strerr(r));
+
+ e = fido_cbor_info_extensions_ptr(di);
+ n = fido_cbor_info_extensions_len(di);
+
+ for (size_t i = 0; i < n; i++)
+ if (streq(e[i], "hmac-secret")) {
+ found_extension = true;
+ break;
+ }
+
+ if (!found_extension)
+ return log_error_errno(SYNTHETIC_ERRNO(ENODEV),
+ "Specified device %s is a FIDO2 device, but does not support the required HMAC-SECRET extension.", device);
+
+ c = fido_cred_new();
+ if (!c)
+ return log_oom();
+
+ r = fido_cred_set_extensions(c, FIDO_EXT_HMAC_SECRET);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to enable HMAC-SECRET extension on FIDO2 credential: %s", fido_strerr(r));
+
+ r = fido_cred_set_rp(c, "io.systemd.home", "Home Directory");
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to set FIDO2 credential relying party ID/name: %s", fido_strerr(r));
+
+ r = fido_cred_set_type(c, COSE_ES256);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to set FIDO2 credential type to ES256: %s", fido_strerr(r));
+
+ un = json_variant_by_key(*v, "userName");
+ if (!un)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "userName field of user record is missing");
+ if (!json_variant_is_string(un))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "userName field of user record is not a string");
+
+ realm = json_variant_by_key(*v, "realm");
+ if (realm) {
+ if (!json_variant_is_string(realm))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "realm field of user record is not a string");
+
+ fido_un = strjoina(json_variant_string(un), json_variant_string(realm));
+ } else
+ fido_un = json_variant_string(un);
+
+ rn = json_variant_by_key(*v, "realName");
+ if (rn && !json_variant_is_string(rn))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "realName field of user record is not a string");
+
+ r = fido_cred_set_user(c,
+ (const unsigned char*) fido_un, strlen(fido_un), /* We pass the user ID and name as the same */
+ fido_un,
+ rn ? json_variant_string(rn) : NULL,
+ NULL /* icon URL */);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to set FIDO2 credential user data: %s", fido_strerr(r));
+
+ r = fido_cred_set_clientdata_hash(c, (const unsigned char[32]) {}, 32);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to set FIDO2 client data hash: %s", fido_strerr(r));
+
+ r = fido_cred_set_rk(c, FIDO_OPT_FALSE);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to turn off FIDO2 resident key option of credential: %s", fido_strerr(r));
+
+ r = fido_cred_set_uv(c, FIDO_OPT_FALSE);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to turn off FIDO2 user verification option of credential: %s", fido_strerr(r));
+
+ log_info("Initializing FIDO2 credential on security token.");
+
+ log_notice("%s%s(Hint: This might require verification of user presence on security token.)",
+ emoji_enabled() ? special_glyph(SPECIAL_GLYPH_TOUCH) : "",
+ emoji_enabled() ? " " : "");
+
+ r = fido_dev_make_cred(d, c, NULL);
+ if (r == FIDO_ERR_PIN_REQUIRED) {
+ _cleanup_free_ char *text = NULL;
+
+ if (asprintf(&text, "Please enter security token PIN:") < 0)
+ return log_oom();
+
+ for (;;) {
+ _cleanup_(strv_free_erasep) char **pin = NULL;
+ char **i;
+
+ r = ask_password_auto(text, "user-home", NULL, "fido2-pin", USEC_INFINITY, 0, &pin);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire user PIN: %m");
+
+ r = FIDO_ERR_PIN_INVALID;
+ STRV_FOREACH(i, pin) {
+ if (isempty(*i)) {
+ log_info("PIN may not be empty.");
+ continue;
+ }
+
+ r = fido_dev_make_cred(d, c, *i);
+ if (r == FIDO_OK) {
+ used_pin = strdup(*i);
+ if (!used_pin)
+ return log_oom();
+ break;
+ }
+ if (r != FIDO_ERR_PIN_INVALID)
+ break;
+ }
+
+ if (r != FIDO_ERR_PIN_INVALID)
+ break;
+
+ log_notice("PIN incorrect, please try again.");
+ }
+ }
+ if (r == FIDO_ERR_PIN_AUTH_BLOCKED)
+ return log_notice_errno(SYNTHETIC_ERRNO(EPERM),
+ "Token PIN is currently blocked, please remove and reinsert token.");
+ if (r == FIDO_ERR_ACTION_TIMEOUT)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOSTR),
+ "Token action timeout. (User didn't interact with token quickly enough.)");
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to generate FIDO2 credential: %s", fido_strerr(r));
+
+ cid = fido_cred_id_ptr(c);
+ if (!cid)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to get FIDO2 credential ID.");
+
+ cid_size = fido_cred_id_len(c);
+
+ a = fido_assert_new();
+ if (!a)
+ return log_oom();
+
+ r = fido_assert_set_extensions(a, FIDO_EXT_HMAC_SECRET);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to enable HMAC-SECRET extension on FIDO2 assertion: %s", fido_strerr(r));
+
+ r = fido_assert_set_hmac_salt(a, salt, FIDO2_SALT_SIZE);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to set salt on FIDO2 assertion: %s", fido_strerr(r));
+
+ r = fido_assert_set_rp(a, "io.systemd.home");
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to set FIDO2 assertion ID: %s", fido_strerr(r));
+
+ r = fido_assert_set_clientdata_hash(a, (const unsigned char[32]) {}, 32);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to set FIDO2 assertion client data hash: %s", fido_strerr(r));
+
+ r = fido_assert_allow_cred(a, cid, cid_size);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to add FIDO2 assertion credential ID: %s", fido_strerr(r));
+
+ r = fido_assert_set_up(a, FIDO_OPT_FALSE);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to turn off FIDO2 assertion user presence: %s", fido_strerr(r));
+
+ log_info("Generating secret key on FIDO2 security token.");
+
+ r = fido_dev_get_assert(d, a, used_pin);
+ if (r == FIDO_ERR_UP_REQUIRED) {
+ r = fido_assert_set_up(a, FIDO_OPT_TRUE);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to turn on FIDO2 assertion user presence: %s", fido_strerr(r));
+
+ log_notice("%s%sIn order to allow secret key generation, please verify presence on security token.",
+ emoji_enabled() ? special_glyph(SPECIAL_GLYPH_TOUCH) : "",
+ emoji_enabled() ? " " : "");
+
+ r = fido_dev_get_assert(d, a, used_pin);
+ }
+ if (r == FIDO_ERR_ACTION_TIMEOUT)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOSTR),
+ "Token action timeout. (User didn't interact with token quickly enough.)");
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to ask token for assertion: %s", fido_strerr(r));
+
+ secret = fido_assert_hmac_secret_ptr(a, 0);
+ if (!secret)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to retrieve HMAC secret.");
+
+ secret_size = fido_assert_hmac_secret_len(a, 0);
+
+ r = add_fido2_credential_id(v, cid, cid_size);
+ if (r < 0)
+ return r;
+
+ r = add_fido2_salt(v,
+ cid,
+ cid_size,
+ salt,
+ FIDO2_SALT_SIZE,
+ secret,
+ secret_size);
+ if (r < 0)
+ return r;
+
+ /* If we acquired the PIN also include it in the secret section of the record, so that systemd-homed
+ * can use it if it needs to, given that it likely needs to decrypt the key again to pass to LUKS or
+ * fscrypt. */
+ r = identity_add_token_pin(v, used_pin);
+ if (r < 0)
+ return r;
+
+ return 0;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "FIDO2 tokens not supported on this build.");
+#endif
+}
+
+int list_fido2_devices(void) {
+#if HAVE_LIBFIDO2
+ _cleanup_(table_unrefp) Table *t = NULL;
+ size_t allocated = 64, found = 0;
+ fido_dev_info_t *di = NULL;
+ int r;
+
+ di = fido_dev_info_new(allocated);
+ if (!di)
+ return log_oom();
+
+ r = fido_dev_info_manifest(di, allocated, &found);
+ if (r == FIDO_ERR_INTERNAL || (r == FIDO_OK && found == 0)) {
+ /* The library returns FIDO_ERR_INTERNAL when no devices are found. I wish it wouldn't. */
+ log_info("No FIDO2 devices found.");
+ r = 0;
+ goto finish;
+ }
+ if (r != FIDO_OK) {
+ r = log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to enumerate FIDO2 devices: %s", fido_strerr(r));
+ goto finish;
+ }
+
+ t = table_new("path", "manufacturer", "product");
+ if (!t) {
+ r = log_oom();
+ goto finish;
+ }
+
+ for (size_t i = 0; i < found; i++) {
+ const fido_dev_info_t *entry;
+
+ entry = fido_dev_info_ptr(di, i);
+ if (!entry) {
+ r = log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to get device information for FIDO device %zu.", i);
+ goto finish;
+ }
+
+ r = table_add_many(
+ t,
+ TABLE_PATH, fido_dev_info_path(entry),
+ TABLE_STRING, fido_dev_info_manufacturer_string(entry),
+ TABLE_STRING, fido_dev_info_product_string(entry));
+ if (r < 0) {
+ table_log_add_error(r);
+ goto finish;
+ }
+ }
+
+ r = table_print(t, stdout);
+ if (r < 0) {
+ log_error_errno(r, "Failed to show device table: %m");
+ goto finish;
+ }
+
+ r = 0;
+
+finish:
+ fido_dev_info_free(&di, allocated);
+ return r;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "FIDO2 tokens not supported on this build.");
+#endif
+}
+
+int find_fido2_auto(char **ret) {
+#if HAVE_LIBFIDO2
+ _cleanup_free_ char *copy = NULL;
+ size_t di_size = 64, found = 0;
+ const fido_dev_info_t *entry;
+ fido_dev_info_t *di = NULL;
+ const char *path;
+ int r;
+
+ di = fido_dev_info_new(di_size);
+ if (!di)
+ return log_oom();
+
+ r = fido_dev_info_manifest(di, di_size, &found);
+ if (r == FIDO_ERR_INTERNAL || (r == FIDO_OK && found == 0)) {
+ /* The library returns FIDO_ERR_INTERNAL when no devices are found. I wish it wouldn't. */
+ r = log_error_errno(SYNTHETIC_ERRNO(ENODEV), "No FIDO2 devices found.");
+ goto finish;
+ }
+ if (r != FIDO_OK) {
+ r = log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to enumerate FIDO2 devices: %s", fido_strerr(r));
+ goto finish;
+ }
+ if (found > 1) {
+ r = log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ), "More than one FIDO2 device found.");
+ goto finish;
+ }
+
+ entry = fido_dev_info_ptr(di, 0);
+ if (!entry) {
+ r = log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to get device information for FIDO device 0.");
+ goto finish;
+ }
+
+ path = fido_dev_info_path(entry);
+ if (!path) {
+ r = log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to query FIDO device path.");
+ goto finish;
+ }
+
+ copy = strdup(path);
+ if (!copy) {
+ r = log_oom();
+ goto finish;
+ }
+
+ *ret = TAKE_PTR(copy);
+ r = 0;
+
+finish:
+ fido_dev_info_free(&di, di_size);
+ return r;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "FIDO2 tokens not supported on this build.");
+#endif
+}
diff --git a/src/home/homectl-fido2.h b/src/home/homectl-fido2.h
new file mode 100644
index 0000000..d0349f5
--- /dev/null
+++ b/src/home/homectl-fido2.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "json.h"
+
+int identity_add_fido2_parameters(JsonVariant **v, const char *device);
+
+int list_fido2_devices(void);
+
+int find_fido2_auto(char **ret);
diff --git a/src/home/homectl-pkcs11.c b/src/home/homectl-pkcs11.c
new file mode 100644
index 0000000..4b7f833
--- /dev/null
+++ b/src/home/homectl-pkcs11.c
@@ -0,0 +1,477 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "errno-util.h"
+#include "format-table.h"
+#include "hexdecoct.h"
+#include "homectl-pkcs11.h"
+#include "libcrypt-util.h"
+#include "memory-util.h"
+#include "openssl-util.h"
+#include "pkcs11-util.h"
+#include "random-util.h"
+#include "strv.h"
+
+struct pkcs11_callback_data {
+ char *pin_used;
+ X509 *cert;
+};
+
+#if HAVE_P11KIT
+static void pkcs11_callback_data_release(struct pkcs11_callback_data *data) {
+ erase_and_free(data->pin_used);
+ X509_free(data->cert);
+}
+
+static int pkcs11_callback(
+ CK_FUNCTION_LIST *m,
+ CK_SESSION_HANDLE session,
+ CK_SLOT_ID slot_id,
+ const CK_SLOT_INFO *slot_info,
+ const CK_TOKEN_INFO *token_info,
+ P11KitUri *uri,
+ void *userdata) {
+
+ _cleanup_(erase_and_freep) char *pin_used = NULL;
+ struct pkcs11_callback_data *data = userdata;
+ CK_OBJECT_HANDLE object;
+ int r;
+
+ assert(m);
+ assert(slot_info);
+ assert(token_info);
+ assert(uri);
+ assert(data);
+
+ /* Called for every token matching our URI */
+
+ r = pkcs11_token_login(m, session, slot_id, token_info, "home directory operation", "user-home", "pkcs11-pin", UINT64_MAX, &pin_used);
+ if (r < 0)
+ return r;
+
+ r = pkcs11_token_find_x509_certificate(m, session, uri, &object);
+ if (r < 0)
+ return r;
+
+ r = pkcs11_token_read_x509_certificate(m, session, object, &data->cert);
+ if (r < 0)
+ return r;
+
+ /* Let's read some random data off the token and write it to the kernel pool before we generate our
+ * random key from it. This way we can claim the quality of the RNG is at least as good as the
+ * kernel's and the token's pool */
+ (void) pkcs11_token_acquire_rng(m, session);
+
+ data->pin_used = TAKE_PTR(pin_used);
+ return 1;
+}
+#endif
+
+static int acquire_pkcs11_certificate(
+ const char *uri,
+ X509 **ret_cert,
+ char **ret_pin_used) {
+
+#if HAVE_P11KIT
+ _cleanup_(pkcs11_callback_data_release) struct pkcs11_callback_data data = {};
+ int r;
+
+ r = pkcs11_find_token(uri, pkcs11_callback, &data);
+ if (r == -EAGAIN) /* pkcs11_find_token() doesn't log about this error, but all others */
+ return log_error_errno(SYNTHETIC_ERRNO(ENXIO),
+ "Specified PKCS#11 token with URI '%s' not found.",
+ uri);
+ if (r < 0)
+ return r;
+
+ *ret_cert = TAKE_PTR(data.cert);
+ *ret_pin_used = TAKE_PTR(data.pin_used);
+
+ return 0;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "PKCS#11 tokens not supported on this build.");
+#endif
+}
+
+static int encrypt_bytes(
+ EVP_PKEY *pkey,
+ const void *decrypted_key,
+ size_t decrypted_key_size,
+ void **ret_encrypt_key,
+ size_t *ret_encrypt_key_size) {
+
+ _cleanup_(EVP_PKEY_CTX_freep) EVP_PKEY_CTX *ctx = NULL;
+ _cleanup_free_ void *b = NULL;
+ size_t l;
+
+ ctx = EVP_PKEY_CTX_new(pkey, NULL);
+ if (!ctx)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to allocate public key context");
+
+ if (EVP_PKEY_encrypt_init(ctx) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to initialize public key context");
+
+ if (EVP_PKEY_CTX_set_rsa_padding(ctx, RSA_PKCS1_PADDING) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to configure PKCS#1 padding");
+
+ if (EVP_PKEY_encrypt(ctx, NULL, &l, decrypted_key, decrypted_key_size) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to determine encrypted key size");
+
+ b = malloc(l);
+ if (!b)
+ return log_oom();
+
+ if (EVP_PKEY_encrypt(ctx, b, &l, decrypted_key, decrypted_key_size) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to determine encrypted key size");
+
+ *ret_encrypt_key = TAKE_PTR(b);
+ *ret_encrypt_key_size = l;
+
+ return 0;
+}
+
+static int add_pkcs11_encrypted_key(
+ JsonVariant **v,
+ const char *uri,
+ const void *encrypted_key, size_t encrypted_key_size,
+ const void *decrypted_key, size_t decrypted_key_size) {
+
+ _cleanup_(json_variant_unrefp) JsonVariant *l = NULL, *w = NULL, *e = NULL;
+ _cleanup_(erase_and_freep) char *base64_encoded = NULL, *hashed = NULL;
+ int r;
+
+ assert(v);
+ assert(uri);
+ assert(encrypted_key);
+ assert(encrypted_key_size > 0);
+ assert(decrypted_key);
+ assert(decrypted_key_size > 0);
+
+ /* Before using UNIX hashing on the supplied key we base64 encode it, since crypt_r() and friends
+ * expect a NUL terminated string, and we use a binary key */
+ r = base64mem(decrypted_key, decrypted_key_size, &base64_encoded);
+ if (r < 0)
+ return log_error_errno(r, "Failed to base64 encode secret key: %m");
+
+ r = hash_password(base64_encoded, &hashed);
+ if (r < 0)
+ return log_error_errno(errno_or_else(EINVAL), "Failed to UNIX hash secret key: %m");
+
+ r = json_build(&e, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("uri", JSON_BUILD_STRING(uri)),
+ JSON_BUILD_PAIR("data", JSON_BUILD_BASE64(encrypted_key, encrypted_key_size)),
+ JSON_BUILD_PAIR("hashedPassword", JSON_BUILD_STRING(hashed))));
+ if (r < 0)
+ return log_error_errno(r, "Failed to build encrypted JSON key object: %m");
+
+ w = json_variant_ref(json_variant_by_key(*v, "privileged"));
+ l = json_variant_ref(json_variant_by_key(w, "pkcs11EncryptedKey"));
+
+ r = json_variant_append_array(&l, e);
+ if (r < 0)
+ return log_error_errno(r, "Failed append PKCS#11 encrypted key: %m");
+
+ r = json_variant_set_field(&w, "pkcs11EncryptedKey", l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set PKCS#11 encrypted key: %m");
+
+ r = json_variant_set_field(v, "privileged", w);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update privileged field: %m");
+
+ return 0;
+}
+
+static int add_pkcs11_token_uri(JsonVariant **v, const char *uri) {
+ _cleanup_(json_variant_unrefp) JsonVariant *w = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ int r;
+
+ assert(v);
+ assert(uri);
+
+ w = json_variant_ref(json_variant_by_key(*v, "pkcs11TokenUri"));
+ if (w) {
+ r = json_variant_strv(w, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PKCS#11 token list: %m");
+
+ if (strv_contains(l, uri))
+ return 0;
+ }
+
+ r = strv_extend(&l, uri);
+ if (r < 0)
+ return log_oom();
+
+ w = json_variant_unref(w);
+ r = json_variant_new_array_strv(&w, l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create PKCS#11 token URI JSON: %m");
+
+ r = json_variant_set_field(v, "pkcs11TokenUri", w);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update PKCS#11 token URI list: %m");
+
+ return 0;
+}
+
+int identity_add_token_pin(JsonVariant **v, const char *pin) {
+ _cleanup_(json_variant_unrefp) JsonVariant *w = NULL, *l = NULL;
+ _cleanup_(strv_free_erasep) char **pins = NULL;
+ int r;
+
+ assert(v);
+
+ if (isempty(pin))
+ return 0;
+
+ w = json_variant_ref(json_variant_by_key(*v, "secret"));
+ l = json_variant_ref(json_variant_by_key(w, "tokenPin"));
+
+ r = json_variant_strv(l, &pins);
+ if (r < 0)
+ return log_error_errno(r, "Failed to convert PIN array: %m");
+
+ if (strv_find(pins, pin))
+ return 0;
+
+ r = strv_extend(&pins, pin);
+ if (r < 0)
+ return log_oom();
+
+ strv_uniq(pins);
+
+ l = json_variant_unref(l);
+
+ r = json_variant_new_array_strv(&l, pins);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate new PIN array JSON: %m");
+
+ json_variant_sensitive(l);
+
+ r = json_variant_set_field(&w, "tokenPin", l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update PIN field: %m");
+
+ r = json_variant_set_field(v, "secret", w);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update secret object: %m");
+
+ return 1;
+}
+
+int identity_add_pkcs11_key_data(JsonVariant **v, const char *uri) {
+ _cleanup_(erase_and_freep) void *decrypted_key = NULL, *encrypted_key = NULL;
+ _cleanup_(erase_and_freep) char *pin = NULL;
+ size_t decrypted_key_size, encrypted_key_size;
+ _cleanup_(X509_freep) X509 *cert = NULL;
+ EVP_PKEY *pkey;
+ RSA *rsa;
+ int bits;
+ int r;
+
+ assert(v);
+
+ r = acquire_pkcs11_certificate(uri, &cert, &pin);
+ if (r < 0)
+ return r;
+
+ pkey = X509_get0_pubkey(cert);
+ if (!pkey)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to extract public key from X.509 certificate.");
+
+ if (EVP_PKEY_base_id(pkey) != EVP_PKEY_RSA)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "X.509 certificate does not refer to RSA key.");
+
+ rsa = EVP_PKEY_get0_RSA(pkey);
+ if (!rsa)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to acquire RSA public key from X.509 certificate.");
+
+ bits = RSA_bits(rsa);
+ log_debug("Bits in RSA key: %i", bits);
+
+ /* We use PKCS#1 padding for the RSA cleartext, hence let's leave some extra space for it, hence only
+ * generate a random key half the size of the RSA length */
+ decrypted_key_size = bits / 8 / 2;
+
+ if (decrypted_key_size < 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Uh, RSA key size too short?");
+
+ log_debug("Generating %zu bytes random key.", decrypted_key_size);
+
+ decrypted_key = malloc(decrypted_key_size);
+ if (!decrypted_key)
+ return log_oom();
+
+ r = genuine_random_bytes(decrypted_key, decrypted_key_size, RANDOM_BLOCK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate random key: %m");
+
+ r = encrypt_bytes(pkey, decrypted_key, decrypted_key_size, &encrypted_key, &encrypted_key_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to encrypt key: %m");
+
+ /* Add the token URI to the public part of the record. */
+ r = add_pkcs11_token_uri(v, uri);
+ if (r < 0)
+ return r;
+
+ /* Include the encrypted version of the random key we just generated in the privileged part of the record */
+ r = add_pkcs11_encrypted_key(
+ v,
+ uri,
+ encrypted_key, encrypted_key_size,
+ decrypted_key, decrypted_key_size);
+ if (r < 0)
+ return r;
+
+ /* If we acquired the PIN also include it in the secret section of the record, so that systemd-homed
+ * can use it if it needs to, given that it likely needs to decrypt the key again to pass to LUKS or
+ * fscrypt. */
+ r = identity_add_token_pin(v, pin);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+#if HAVE_P11KIT
+static int list_callback(
+ CK_FUNCTION_LIST *m,
+ CK_SESSION_HANDLE session,
+ CK_SLOT_ID slot_id,
+ const CK_SLOT_INFO *slot_info,
+ const CK_TOKEN_INFO *token_info,
+ P11KitUri *uri,
+ void *userdata) {
+
+ _cleanup_free_ char *token_uri_string = NULL, *token_label = NULL, *token_manufacturer_id = NULL, *token_model = NULL;
+ _cleanup_(p11_kit_uri_freep) P11KitUri *token_uri = NULL;
+ Table *t = userdata;
+ int uri_result, r;
+
+ assert(slot_info);
+ assert(token_info);
+
+ /* We only care about hardware devices here with a token inserted. Let's filter everything else
+ * out. (Note that the user can explicitly specify non-hardware tokens if they like, but during
+ * enumeration we'll filter those, since software tokens are typically the system certificate store
+ * and such, and it's typically not what people want to bind their home directories to.) */
+ if (!FLAGS_SET(token_info->flags, CKF_HW_SLOT|CKF_TOKEN_PRESENT))
+ return -EAGAIN;
+
+ token_label = pkcs11_token_label(token_info);
+ if (!token_label)
+ return log_oom();
+
+ token_manufacturer_id = pkcs11_token_manufacturer_id(token_info);
+ if (!token_manufacturer_id)
+ return log_oom();
+
+ token_model = pkcs11_token_model(token_info);
+ if (!token_model)
+ return log_oom();
+
+ token_uri = uri_from_token_info(token_info);
+ if (!token_uri)
+ return log_oom();
+
+ uri_result = p11_kit_uri_format(token_uri, P11_KIT_URI_FOR_ANY, &token_uri_string);
+ if (uri_result != P11_KIT_URI_OK)
+ return log_warning_errno(SYNTHETIC_ERRNO(EAGAIN), "Failed to format slot URI: %s", p11_kit_uri_message(uri_result));
+
+ r = table_add_many(
+ t,
+ TABLE_STRING, token_uri_string,
+ TABLE_STRING, token_label,
+ TABLE_STRING, token_manufacturer_id,
+ TABLE_STRING, token_model);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ return -EAGAIN; /* keep scanning */
+}
+#endif
+
+int list_pkcs11_tokens(void) {
+#if HAVE_P11KIT
+ _cleanup_(table_unrefp) Table *t = NULL;
+ int r;
+
+ t = table_new("uri", "label", "manufacturer", "model");
+ if (!t)
+ return log_oom();
+
+ r = pkcs11_find_token(NULL, list_callback, t);
+ if (r < 0 && r != -EAGAIN)
+ return r;
+
+ if (table_get_rows(t) <= 1) {
+ log_info("No suitable PKCS#11 tokens found.");
+ return 0;
+ }
+
+ r = table_print(t, stdout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to show device table: %m");
+
+ return 0;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "PKCS#11 tokens not supported on this build.");
+#endif
+}
+
+#if HAVE_P11KIT
+static int auto_callback(
+ CK_FUNCTION_LIST *m,
+ CK_SESSION_HANDLE session,
+ CK_SLOT_ID slot_id,
+ const CK_SLOT_INFO *slot_info,
+ const CK_TOKEN_INFO *token_info,
+ P11KitUri *uri,
+ void *userdata) {
+
+ _cleanup_(p11_kit_uri_freep) P11KitUri *token_uri = NULL;
+ char **t = userdata;
+ int uri_result;
+
+ assert(slot_info);
+ assert(token_info);
+
+ if (!FLAGS_SET(token_info->flags, CKF_HW_SLOT|CKF_TOKEN_PRESENT))
+ return -EAGAIN;
+
+ if (*t)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ),
+ "More than one suitable PKCS#11 token found.");
+
+ token_uri = uri_from_token_info(token_info);
+ if (!token_uri)
+ return log_oom();
+
+ uri_result = p11_kit_uri_format(token_uri, P11_KIT_URI_FOR_ANY, t);
+ if (uri_result != P11_KIT_URI_OK)
+ return log_warning_errno(SYNTHETIC_ERRNO(EAGAIN), "Failed to format slot URI: %s", p11_kit_uri_message(uri_result));
+
+ return 0;
+}
+#endif
+
+int find_pkcs11_token_auto(char **ret) {
+#if HAVE_P11KIT
+ int r;
+
+ r = pkcs11_find_token(NULL, auto_callback, ret);
+ if (r == -EAGAIN)
+ return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "No suitable PKCS#11 tokens found.");
+ if (r < 0)
+ return r;
+
+ return 0;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "PKCS#11 tokens not supported on this build.");
+#endif
+}
diff --git a/src/home/homectl-pkcs11.h b/src/home/homectl-pkcs11.h
new file mode 100644
index 0000000..5c30fee
--- /dev/null
+++ b/src/home/homectl-pkcs11.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "json.h"
+
+int identity_add_token_pin(JsonVariant **v, const char *pin);
+
+int identity_add_pkcs11_key_data(JsonVariant **v, const char *token_uri);
+
+int list_pkcs11_tokens(void);
+int find_pkcs11_token_auto(char **ret);
diff --git a/src/home/homectl-recovery-key.c b/src/home/homectl-recovery-key.c
new file mode 100644
index 0000000..4a6649d
--- /dev/null
+++ b/src/home/homectl-recovery-key.c
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "errno-util.h"
+#include "homectl-recovery-key.h"
+#include "libcrypt-util.h"
+#include "locale-util.h"
+#include "memory-util.h"
+#include "modhex.h"
+#include "qrcode-util.h"
+#include "random-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+
+static int make_recovery_key(char **ret) {
+ _cleanup_(erase_and_freep) char *formatted = NULL;
+ _cleanup_(erase_and_freep) uint8_t *key = NULL;
+ int r;
+
+ assert(ret);
+
+ key = new(uint8_t, MODHEX_RAW_LENGTH);
+ if (!key)
+ return log_oom();
+
+ r = genuine_random_bytes(key, MODHEX_RAW_LENGTH, RANDOM_BLOCK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to gather entropy for recovery key: %m");
+
+ /* Let's now format it as 64 modhex chars, and after each 8 chars insert a dash */
+ formatted = new(char, MODHEX_FORMATTED_LENGTH);
+ if (!formatted)
+ return log_oom();
+
+ for (size_t i = 0, j = 0; i < MODHEX_RAW_LENGTH; i++) {
+ formatted[j++] = modhex_alphabet[key[i] >> 4];
+ formatted[j++] = modhex_alphabet[key[i] & 0xF];
+
+ if (i % 4 == 3)
+ formatted[j++] = '-';
+ }
+
+ formatted[MODHEX_FORMATTED_LENGTH-1] = 0;
+
+ *ret = TAKE_PTR(formatted);
+ return 0;
+}
+
+static int add_privileged(JsonVariant **v, const char *hashed) {
+ _cleanup_(json_variant_unrefp) JsonVariant *e = NULL, *w = NULL, *l = NULL;
+ int r;
+
+ assert(v);
+ assert(hashed);
+
+ r = json_build(&e, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("type", JSON_BUILD_STRING("modhex64")),
+ JSON_BUILD_PAIR("hashedPassword", JSON_BUILD_STRING(hashed))));
+ if (r < 0)
+ return log_error_errno(r, "Failed to build recover key JSON object: %m");
+
+ json_variant_sensitive(e);
+
+ w = json_variant_ref(json_variant_by_key(*v, "privileged"));
+ l = json_variant_ref(json_variant_by_key(w, "recoveryKey"));
+
+ r = json_variant_append_array(&l, e);
+ if (r < 0)
+ return log_error_errno(r, "Failed append recovery key: %m");
+
+ r = json_variant_set_field(&w, "recoveryKey", l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set recovery key array: %m");
+
+ r = json_variant_set_field(v, "privileged", w);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update privileged field: %m");
+
+ return 0;
+}
+
+static int add_public(JsonVariant **v) {
+ _cleanup_strv_free_ char **types = NULL;
+ int r;
+
+ assert(v);
+
+ r = json_variant_strv(json_variant_by_key(*v, "recoveryKeyType"), &types);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse recovery key type list: %m");
+
+ r = strv_extend(&types, "modhex64");
+ if (r < 0)
+ return log_oom();
+
+ r = json_variant_set_field_strv(v, "recoveryKeyType", types);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update recovery key types: %m");
+
+ return 0;
+}
+
+static int add_secret(JsonVariant **v, const char *password) {
+ _cleanup_(json_variant_unrefp) JsonVariant *w = NULL, *l = NULL;
+ _cleanup_(strv_free_erasep) char **passwords = NULL;
+ int r;
+
+ assert(v);
+ assert(password);
+
+ w = json_variant_ref(json_variant_by_key(*v, "secret"));
+ l = json_variant_ref(json_variant_by_key(w, "password"));
+
+ r = json_variant_strv(l, &passwords);
+ if (r < 0)
+ return log_error_errno(r, "Failed to convert password array: %m");
+
+ r = strv_extend(&passwords, password);
+ if (r < 0)
+ return log_oom();
+
+ r = json_variant_new_array_strv(&l, passwords);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate new password array JSON: %m");
+
+ json_variant_sensitive(l);
+
+ r = json_variant_set_field(&w, "password", l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update password field: %m");
+
+ r = json_variant_set_field(v, "secret", w);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update secret object: %m");
+
+ return 0;
+}
+
+int identity_add_recovery_key(JsonVariant **v) {
+ _cleanup_(erase_and_freep) char *password = NULL, *hashed = NULL;
+ int r;
+
+ assert(v);
+
+ /* First, let's generate a secret key */
+ r = make_recovery_key(&password);
+ if (r < 0)
+ return r;
+
+ /* Let's UNIX hash it */
+ r = hash_password(password, &hashed);
+ if (r < 0)
+ return log_error_errno(errno_or_else(EINVAL), "Failed to UNIX hash secret key: %m");
+
+ /* Let's now add the "privileged" version of the recovery key */
+ r = add_privileged(v, hashed);
+ if (r < 0)
+ return r;
+
+ /* Let's then add the public information about the recovery key */
+ r = add_public(v);
+ if (r < 0)
+ return r;
+
+ /* Finally, let's add the new key to the secret part, too */
+ r = add_secret(v, password);
+ if (r < 0)
+ return r;
+
+ /* We output the key itself with a trailing newline to stdout and the decoration around it to stderr
+ * instead. */
+
+ fflush(stdout);
+ fprintf(stderr,
+ "A secret recovery key has been generated for this account:\n\n"
+ " %s%s%s",
+ emoji_enabled() ? special_glyph(SPECIAL_GLYPH_LOCK_AND_KEY) : "",
+ emoji_enabled() ? " " : "",
+ ansi_highlight());
+ fflush(stderr);
+
+ fputs(password, stdout);
+ fflush(stdout);
+
+ fputs(ansi_normal(), stderr);
+ fflush(stderr);
+
+ fputc('\n', stdout);
+ fflush(stdout);
+
+ fputs("\nPlease save this secret recovery key at a secure location. It may be used to\n"
+ "regain access to the account if the other configured access credentials have\n"
+ "been lost or forgotten. The recovery key may be entered in place of a password\n"
+ "whenever authentication is requested.\n", stderr);
+ fflush(stderr);
+
+ (void) print_qrcode(stderr, "You may optionally scan the recovery key off screen", password);
+
+ return 0;
+}
diff --git a/src/home/homectl-recovery-key.h b/src/home/homectl-recovery-key.h
new file mode 100644
index 0000000..ab195f9
--- /dev/null
+++ b/src/home/homectl-recovery-key.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "json.h"
+
+int identity_add_recovery_key(JsonVariant **v);
diff --git a/src/home/homectl.c b/src/home/homectl.c
new file mode 100644
index 0000000..7cfda7e
--- /dev/null
+++ b/src/home/homectl.c
@@ -0,0 +1,3381 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+
+#include "sd-bus.h"
+
+#include "ask-password-api.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "cgroup-util.h"
+#include "dns-domain.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-table.h"
+#include "home-util.h"
+#include "homectl-fido2.h"
+#include "homectl-pkcs11.h"
+#include "homectl-recovery-key.h"
+#include "locale-util.h"
+#include "main-func.h"
+#include "memory-util.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pkcs11-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "pwquality-util.h"
+#include "rlimit-util.h"
+#include "spawn-polkit-agent.h"
+#include "terminal-util.h"
+#include "user-record-pwquality.h"
+#include "user-record-show.h"
+#include "user-record-util.h"
+#include "user-record.h"
+#include "user-util.h"
+#include "verbs.h"
+
+static PagerFlags arg_pager_flags = 0;
+static bool arg_legend = true;
+static bool arg_ask_password = true;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static const char *arg_host = NULL;
+static const char *arg_identity = NULL;
+static JsonVariant *arg_identity_extra = NULL;
+static JsonVariant *arg_identity_extra_privileged = NULL;
+static JsonVariant *arg_identity_extra_this_machine = NULL;
+static JsonVariant *arg_identity_extra_rlimits = NULL;
+static char **arg_identity_filter = NULL; /* this one is also applied to 'privileged' and 'thisMachine' subobjects */
+static char **arg_identity_filter_rlimits = NULL;
+static uint64_t arg_disk_size = UINT64_MAX;
+static uint64_t arg_disk_size_relative = UINT64_MAX;
+static char **arg_pkcs11_token_uri = NULL;
+static char **arg_fido2_device = NULL;
+static bool arg_recovery_key = false;
+static bool arg_json = false;
+static JsonFormatFlags arg_json_format_flags = 0;
+static bool arg_and_resize = false;
+static bool arg_and_change_password = false;
+static enum {
+ EXPORT_FORMAT_FULL, /* export the full record */
+ EXPORT_FORMAT_STRIPPED, /* strip "state" + "binding", but leave signature in place */
+ EXPORT_FORMAT_MINIMAL, /* also strip signature */
+} arg_export_format = EXPORT_FORMAT_FULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_identity_extra, json_variant_unrefp);
+STATIC_DESTRUCTOR_REGISTER(arg_identity_extra_this_machine, json_variant_unrefp);
+STATIC_DESTRUCTOR_REGISTER(arg_identity_extra_privileged, json_variant_unrefp);
+STATIC_DESTRUCTOR_REGISTER(arg_identity_extra_rlimits, json_variant_unrefp);
+STATIC_DESTRUCTOR_REGISTER(arg_identity_filter, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_identity_filter_rlimits, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_pkcs11_token_uri, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_fido2_device, strv_freep);
+
+static const BusLocator *bus_mgr;
+
+static bool identity_properties_specified(void) {
+ return
+ arg_identity ||
+ !json_variant_is_blank_object(arg_identity_extra) ||
+ !json_variant_is_blank_object(arg_identity_extra_privileged) ||
+ !json_variant_is_blank_object(arg_identity_extra_this_machine) ||
+ !json_variant_is_blank_object(arg_identity_extra_rlimits) ||
+ !strv_isempty(arg_identity_filter) ||
+ !strv_isempty(arg_identity_filter_rlimits) ||
+ !strv_isempty(arg_pkcs11_token_uri) ||
+ !strv_isempty(arg_fido2_device);
+}
+
+static int acquire_bus(sd_bus **bus) {
+ int r;
+
+ assert(bus);
+
+ if (*bus)
+ return 0;
+
+ r = bus_connect_transport(arg_transport, arg_host, false, bus);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ (void) sd_bus_set_allow_interactive_authorization(*bus, arg_ask_password);
+
+ return 0;
+}
+
+static int list_homes(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ r = bus_call_method(bus, bus_mgr, "ListHomes", &error, &reply, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list homes: %s", bus_error_message(&error, r));
+
+ table = table_new("name", "uid", "gid", "state", "realname", "home", "shell");
+ if (!table)
+ return log_oom();
+
+ r = sd_bus_message_enter_container(reply, 'a', "(susussso)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ const char *name, *state, *realname, *home, *shell, *color;
+ TableCell *cell;
+ uint32_t uid, gid;
+
+ r = sd_bus_message_read(reply, "(susussso)", &name, &uid, &state, &gid, &realname, &home, &shell, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ r = table_add_many(table,
+ TABLE_STRING, name,
+ TABLE_UID, uid,
+ TABLE_GID, gid);
+ if (r < 0)
+ return table_log_add_error(r);
+
+
+ r = table_add_cell(table, &cell, TABLE_STRING, state);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ color = user_record_state_color(state);
+ if (color)
+ (void) table_set_color(table, cell, color);
+
+ r = table_add_many(table,
+ TABLE_STRING, strna(empty_to_null(realname)),
+ TABLE_STRING, home,
+ TABLE_STRING, strna(empty_to_null(shell)));
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (table_get_rows(table) > 1 || arg_json) {
+ r = table_set_sort(table, (size_t) 0, (size_t) -1);
+ if (r < 0)
+ return table_log_sort_error(r);
+
+ table_set_header(table, arg_legend);
+
+ if (arg_json)
+ r = table_print_json(table, stdout, arg_json_format_flags);
+ else
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+ }
+
+ if (arg_legend && !arg_json) {
+ if (table_get_rows(table) > 1)
+ printf("\n%zu home areas listed.\n", table_get_rows(table) - 1);
+ else
+ printf("No home areas.\n");
+ }
+
+ return 0;
+}
+
+static int acquire_existing_password(const char *user_name, UserRecord *hr, bool emphasize_current) {
+ _cleanup_(strv_free_erasep) char **password = NULL;
+ _cleanup_free_ char *question = NULL;
+ char *e;
+ int r;
+
+ assert(user_name);
+ assert(hr);
+
+ e = getenv("PASSWORD");
+ if (e) {
+ /* People really shouldn't use environment variables for passing passwords. We support this
+ * only for testing purposes, and do not document the behaviour, so that people won't
+ * actually use this outside of testing. */
+
+ r = user_record_set_password(hr, STRV_MAKE(e), true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to store password: %m");
+
+ string_erase(e);
+ assert_se(unsetenv("PASSWORD") == 0);
+
+ return 0;
+ }
+
+ if (asprintf(&question, emphasize_current ?
+ "Please enter current password for user %s:" :
+ "Please enter password for user %s:",
+ user_name) < 0)
+ return log_oom();
+
+ r = ask_password_auto(question, "user-home", NULL, "home-password", USEC_INFINITY, ASK_PASSWORD_ACCEPT_CACHED|ASK_PASSWORD_PUSH_CACHE, &password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire password: %m");
+
+ r = user_record_set_password(hr, password, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to store password: %m");
+
+ return 0;
+}
+
+static int acquire_token_pin(const char *user_name, UserRecord *hr) {
+ _cleanup_(strv_free_erasep) char **pin = NULL;
+ _cleanup_free_ char *question = NULL;
+ char *e;
+ int r;
+
+ assert(user_name);
+ assert(hr);
+
+ e = getenv("PIN");
+ if (e) {
+ r = user_record_set_token_pin(hr, STRV_MAKE(e), false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to store token PIN: %m");
+
+ string_erase(e);
+ assert_se(unsetenv("PIN") == 0);
+
+ return 0;
+ }
+
+ if (asprintf(&question, "Please enter security token PIN for user %s:", user_name) < 0)
+ return log_oom();
+
+ /* We never cache or use cached PINs, since usually there are only very few attempts allowed before the PIN is blocked */
+ r = ask_password_auto(question, "user-home", NULL, "token-pin", USEC_INFINITY, 0, &pin);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire security token PIN: %m");
+
+ r = user_record_set_token_pin(hr, pin, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to store security token PIN: %m");
+
+ return 0;
+}
+
+static int handle_generic_user_record_error(
+ const char *user_name,
+ UserRecord *hr,
+ const sd_bus_error *error,
+ int ret,
+ bool emphasize_current_password) {
+ int r;
+
+ assert(user_name);
+ assert(hr);
+
+ if (sd_bus_error_has_name(error, BUS_ERROR_HOME_ABSENT))
+ return log_error_errno(SYNTHETIC_ERRNO(EREMOTE),
+ "Home of user %s is currently absent, please plug in the necessary storage device or backing file system.", user_name);
+
+ else if (sd_bus_error_has_name(error, BUS_ERROR_AUTHENTICATION_LIMIT_HIT))
+ return log_error_errno(SYNTHETIC_ERRNO(ETOOMANYREFS),
+ "Too frequent unsuccessful login attempts for user %s, try again later.", user_name);
+
+ else if (sd_bus_error_has_name(error, BUS_ERROR_BAD_PASSWORD)) {
+
+ if (!strv_isempty(hr->password))
+ log_notice("Password incorrect or not sufficient, please try again.");
+
+ r = acquire_existing_password(user_name, hr, emphasize_current_password);
+ if (r < 0)
+ return r;
+
+ } else if (sd_bus_error_has_name(error, BUS_ERROR_BAD_PASSWORD_AND_NO_TOKEN)) {
+
+ if (strv_isempty(hr->password))
+ log_notice("Security token not inserted, please enter password.");
+ else
+ log_notice("Password incorrect or not sufficient, and configured security token not inserted, please try again.");
+
+ r = acquire_existing_password(user_name, hr, emphasize_current_password);
+ if (r < 0)
+ return r;
+
+ } else if (sd_bus_error_has_name(error, BUS_ERROR_TOKEN_PIN_NEEDED)) {
+
+ r = acquire_token_pin(user_name, hr);
+ if (r < 0)
+ return r;
+
+ } else if (sd_bus_error_has_name(error, BUS_ERROR_TOKEN_PROTECTED_AUTHENTICATION_PATH_NEEDED)) {
+
+ log_notice("%s%sPlease authenticate physically on security token.",
+ emoji_enabled() ? special_glyph(SPECIAL_GLYPH_TOUCH) : "",
+ emoji_enabled() ? " " : "");
+
+ r = user_record_set_pkcs11_protected_authentication_path_permitted(hr, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set PKCS#11 protected authentication path permitted flag: %m");
+
+ } else if (sd_bus_error_has_name(error, BUS_ERROR_TOKEN_USER_PRESENCE_NEEDED)) {
+
+ log_notice("%s%sAuthentication requires presence verification on security token.",
+ emoji_enabled() ? special_glyph(SPECIAL_GLYPH_TOUCH) : "",
+ emoji_enabled() ? " " : "");
+
+ r = user_record_set_fido2_user_presence_permitted(hr, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set FIDO2 user presence permitted flag: %m");
+
+ } else if (sd_bus_error_has_name(error, BUS_ERROR_TOKEN_PIN_LOCKED))
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Security token PIN is locked, please unlock it first. (Hint: Removal and re-insertion might suffice.)");
+
+ else if (sd_bus_error_has_name(error, BUS_ERROR_TOKEN_BAD_PIN)) {
+
+ log_notice("Security token PIN incorrect, please try again.");
+
+ r = acquire_token_pin(user_name, hr);
+ if (r < 0)
+ return r;
+
+ } else if (sd_bus_error_has_name(error, BUS_ERROR_TOKEN_BAD_PIN_FEW_TRIES_LEFT)) {
+
+ log_notice("Security token PIN incorrect, please try again (only a few tries left!).");
+
+ r = acquire_token_pin(user_name, hr);
+ if (r < 0)
+ return r;
+
+ } else if (sd_bus_error_has_name(error, BUS_ERROR_TOKEN_BAD_PIN_ONE_TRY_LEFT)) {
+
+ log_notice("Security token PIN incorrect, please try again (only one try left!).");
+
+ r = acquire_token_pin(user_name, hr);
+ if (r < 0)
+ return r;
+ } else
+ return log_error_errno(ret, "Operation on home %s failed: %s", user_name, bus_error_message(error, ret));
+
+ return 0;
+}
+
+static int activate_home(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r, ret = 0;
+ char **i;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, strv_skip(argv, 1)) {
+ _cleanup_(user_record_unrefp) UserRecord *secret = NULL;
+
+ secret = user_record_new();
+ if (!secret)
+ return log_oom();
+
+ for (;;) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = bus_message_new_method_call(bus, &m, bus_mgr, "ActivateHome");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", *i);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = bus_message_append_secret(m, secret);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, NULL);
+ if (r < 0) {
+ r = handle_generic_user_record_error(*i, secret, &error, r, false);
+ if (r < 0) {
+ if (ret == 0)
+ ret = r;
+
+ break;
+ }
+ } else
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static int deactivate_home(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r, ret = 0;
+ char **i;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, strv_skip(argv, 1)) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = bus_message_new_method_call(bus, &m, bus_mgr, "DeactivateHome");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", *i);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to deactivate user home: %s", bus_error_message(&error, r));
+ if (ret == 0)
+ ret = r;
+ }
+ }
+
+ return ret;
+}
+
+static void dump_home_record(UserRecord *hr) {
+ int r;
+
+ assert(hr);
+
+ if (hr->incomplete) {
+ fflush(stdout);
+ log_warning("Warning: lacking rights to acquire privileged fields of user record of '%s', output incomplete.", hr->user_name);
+ }
+
+ if (arg_json) {
+ _cleanup_(user_record_unrefp) UserRecord *stripped = NULL;
+
+ if (arg_export_format == EXPORT_FORMAT_STRIPPED)
+ r = user_record_clone(hr, USER_RECORD_EXTRACT_EMBEDDED, &stripped);
+ else if (arg_export_format == EXPORT_FORMAT_MINIMAL)
+ r = user_record_clone(hr, USER_RECORD_EXTRACT_SIGNABLE, &stripped);
+ else
+ r = 0;
+ if (r < 0)
+ log_warning_errno(r, "Failed to strip user record, ignoring: %m");
+ if (stripped)
+ hr = stripped;
+
+ json_variant_dump(hr->json, arg_json_format_flags, stdout, NULL);
+ } else
+ user_record_show(hr, true);
+}
+
+static char **mangle_user_list(char **list, char ***ret_allocated) {
+ _cleanup_free_ char *myself = NULL;
+ char **l;
+
+ if (!strv_isempty(list)) {
+ *ret_allocated = NULL;
+ return list;
+ }
+
+ myself = getusername_malloc();
+ if (!myself)
+ return NULL;
+
+ l = new(char*, 2);
+ if (!l)
+ return NULL;
+
+ l[0] = TAKE_PTR(myself);
+ l[1] = NULL;
+
+ *ret_allocated = l;
+ return l;
+}
+
+static int inspect_home(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(strv_freep) char **mangled_list = NULL;
+ int r, ret = 0;
+ char **items, **i;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ items = mangle_user_list(strv_skip(argv, 1), &mangled_list);
+ if (!items)
+ return log_oom();
+
+ STRV_FOREACH(i, items) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ const char *json;
+ int incomplete;
+ uid_t uid;
+
+ r = parse_uid(*i, &uid);
+ if (r < 0) {
+ if (!valid_user_group_name(*i, 0)) {
+ log_error("Invalid user name '%s'.", *i);
+ if (ret == 0)
+ ret = -EINVAL;
+
+ continue;
+ }
+
+ r = bus_call_method(bus, bus_mgr, "GetUserRecordByName", &error, &reply, "s", *i);
+ } else
+ r = bus_call_method(bus, bus_mgr, "GetUserRecordByUID", &error, &reply, "u", (uint32_t) uid);
+
+ if (r < 0) {
+ log_error_errno(r, "Failed to inspect home: %s", bus_error_message(&error, r));
+ if (ret == 0)
+ ret = r;
+
+ continue;
+ }
+
+ r = sd_bus_message_read(reply, "sbo", &json, &incomplete, NULL);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ if (ret == 0)
+ ret = r;
+
+ continue;
+ }
+
+ r = json_parse(json, JSON_PARSE_SENSITIVE, &v, NULL, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse JSON identity: %m");
+ if (ret == 0)
+ ret = r;
+
+ continue;
+ }
+
+ hr = user_record_new();
+ if (!hr)
+ return log_oom();
+
+ r = user_record_load(hr, v, USER_RECORD_LOAD_REFUSE_SECRET|USER_RECORD_LOG);
+ if (r < 0) {
+ if (ret == 0)
+ ret = r;
+
+ continue;
+ }
+
+ hr->incomplete = incomplete;
+ dump_home_record(hr);
+ }
+
+ return ret;
+}
+
+static int authenticate_home(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(strv_freep) char **mangled_list = NULL;
+ int r, ret = 0;
+ char **i, **items;
+
+ items = mangle_user_list(strv_skip(argv, 1), &mangled_list);
+ if (!items)
+ return log_oom();
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ STRV_FOREACH(i, items) {
+ _cleanup_(user_record_unrefp) UserRecord *secret = NULL;
+
+ secret = user_record_new();
+ if (!secret)
+ return log_oom();
+
+ for (;;) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = bus_message_new_method_call(bus, &m, bus_mgr, "AuthenticateHome");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", *i);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = bus_message_append_secret(m, secret);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, NULL);
+ if (r < 0) {
+ r = handle_generic_user_record_error(*i, secret, &error, r, false);
+ if (r < 0) {
+ if (ret == 0)
+ ret = r;
+
+ break;
+ }
+ } else
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static int update_last_change(JsonVariant **v, bool with_password, bool override) {
+ JsonVariant *c;
+ usec_t n;
+ int r;
+
+ assert(v);
+
+ n = now(CLOCK_REALTIME);
+
+ c = json_variant_by_key(*v, "lastChangeUSec");
+ if (c) {
+ uintmax_t u;
+
+ if (!override)
+ goto update_password;
+
+ if (!json_variant_is_unsigned(c))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "lastChangeUSec field is not an unsigned integer, refusing.");
+
+ u = json_variant_unsigned(c);
+ if (u >= n)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "lastChangeUSec is from the future, can't update.");
+ }
+
+ r = json_variant_set_field_unsigned(v, "lastChangeUSec", n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update lastChangeUSec: %m");
+
+update_password:
+ if (!with_password)
+ return 0;
+
+ c = json_variant_by_key(*v, "lastPasswordChangeUSec");
+ if (c) {
+ uintmax_t u;
+
+ if (!override)
+ return 0;
+
+ if (!json_variant_is_unsigned(c))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "lastPasswordChangeUSec field is not an unsigned integer, refusing.");
+
+ u = json_variant_unsigned(c);
+ if (u >= n)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "lastPasswordChangeUSec is from the future, can't update.");
+ }
+
+ r = json_variant_set_field_unsigned(v, "lastPasswordChangeUSec", n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update lastPasswordChangeUSec: %m");
+
+ return 1;
+}
+
+static int apply_identity_changes(JsonVariant **_v) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ int r;
+
+ assert(_v);
+
+ v = json_variant_ref(*_v);
+
+ r = json_variant_filter(&v, arg_identity_filter);
+ if (r < 0)
+ return log_error_errno(r, "Failed to filter identity: %m");
+
+ r = json_variant_merge(&v, arg_identity_extra);
+ if (r < 0)
+ return log_error_errno(r, "Failed to merge identities: %m");
+
+ if (arg_identity_extra_this_machine || !strv_isempty(arg_identity_filter)) {
+ _cleanup_(json_variant_unrefp) JsonVariant *per_machine = NULL, *mmid = NULL;
+ char mids[SD_ID128_STRING_MAX];
+ sd_id128_t mid;
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire machine ID: %m");
+
+ r = json_variant_new_string(&mmid, sd_id128_to_string(mid, mids));
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate matchMachineId object: %m");
+
+ per_machine = json_variant_ref(json_variant_by_key(v, "perMachine"));
+ if (per_machine) {
+ _cleanup_(json_variant_unrefp) JsonVariant *npm = NULL, *add = NULL;
+ _cleanup_free_ JsonVariant **array = NULL;
+ JsonVariant *z;
+ size_t i = 0;
+
+ if (!json_variant_is_array(per_machine))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "perMachine field is not an array, refusing.");
+
+ array = new(JsonVariant*, json_variant_elements(per_machine) + 1);
+ if (!array)
+ return log_oom();
+
+ JSON_VARIANT_ARRAY_FOREACH(z, per_machine) {
+ JsonVariant *u;
+
+ if (!json_variant_is_object(z))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "perMachine entry is not an object, refusing.");
+
+ array[i++] = z;
+
+ u = json_variant_by_key(z, "matchMachineId");
+ if (!u)
+ continue;
+
+ if (!json_variant_equal(u, mmid))
+ continue;
+
+ r = json_variant_merge(&add, z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to merge perMachine entry: %m");
+
+ i--;
+ }
+
+ r = json_variant_filter(&add, arg_identity_filter);
+ if (r < 0)
+ return log_error_errno(r, "Failed to filter perMachine: %m");
+
+ r = json_variant_merge(&add, arg_identity_extra_this_machine);
+ if (r < 0)
+ return log_error_errno(r, "Failed to merge in perMachine fields: %m");
+
+ if (arg_identity_filter_rlimits || arg_identity_extra_rlimits) {
+ _cleanup_(json_variant_unrefp) JsonVariant *rlv = NULL;
+
+ rlv = json_variant_ref(json_variant_by_key(add, "resourceLimits"));
+
+ r = json_variant_filter(&rlv, arg_identity_filter_rlimits);
+ if (r < 0)
+ return log_error_errno(r, "Failed to filter resource limits: %m");
+
+ r = json_variant_merge(&rlv, arg_identity_extra_rlimits);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set resource limits: %m");
+
+ if (json_variant_is_blank_object(rlv)) {
+ r = json_variant_filter(&add, STRV_MAKE("resourceLimits"));
+ if (r < 0)
+ return log_error_errno(r, "Failed to drop resource limits field from identity: %m");
+ } else {
+ r = json_variant_set_field(&add, "resourceLimits", rlv);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update resource limits of identity: %m");
+ }
+ }
+
+ if (!json_variant_is_blank_object(add)) {
+ r = json_variant_set_field(&add, "matchMachineId", mmid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set matchMachineId field: %m");
+
+ array[i++] = add;
+ }
+
+ r = json_variant_new_array(&npm, array, i);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate new perMachine array: %m");
+
+ json_variant_unref(per_machine);
+ per_machine = TAKE_PTR(npm);
+ } else {
+ _cleanup_(json_variant_unrefp) JsonVariant *item = json_variant_ref(arg_identity_extra_this_machine);
+
+ if (arg_identity_extra_rlimits) {
+ r = json_variant_set_field(&item, "resourceLimits", arg_identity_extra_rlimits);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update resource limits of identity: %m");
+ }
+
+ r = json_variant_set_field(&item, "matchMachineId", mmid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set matchMachineId field: %m");
+
+ r = json_variant_append_array(&per_machine, item);
+ if (r < 0)
+ return log_error_errno(r, "Failed to append to perMachine array: %m");
+ }
+
+ r = json_variant_set_field(&v, "perMachine", per_machine);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update per machine record: %m");
+ }
+
+ if (arg_identity_extra_privileged || arg_identity_filter) {
+ _cleanup_(json_variant_unrefp) JsonVariant *privileged = NULL;
+
+ privileged = json_variant_ref(json_variant_by_key(v, "privileged"));
+
+ r = json_variant_filter(&privileged, arg_identity_filter);
+ if (r < 0)
+ return log_error_errno(r, "Failed to filter identity (privileged part): %m");
+
+ r = json_variant_merge(&privileged, arg_identity_extra_privileged);
+ if (r < 0)
+ return log_error_errno(r, "Failed to merge identities (privileged part): %m");
+
+ if (json_variant_is_blank_object(privileged)) {
+ r = json_variant_filter(&v, STRV_MAKE("privileged"));
+ if (r < 0)
+ return log_error_errno(r, "Failed to drop privileged part from identity: %m");
+ } else {
+ r = json_variant_set_field(&v, "privileged", privileged);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update privileged part of identity: %m");
+ }
+ }
+
+ if (arg_identity_filter_rlimits) {
+ _cleanup_(json_variant_unrefp) JsonVariant *rlv = NULL;
+
+ rlv = json_variant_ref(json_variant_by_key(v, "resourceLimits"));
+
+ r = json_variant_filter(&rlv, arg_identity_filter_rlimits);
+ if (r < 0)
+ return log_error_errno(r, "Failed to filter resource limits: %m");
+
+ /* Note that we only filter resource limits here, but don't apply them. We do that in the perMachine section */
+
+ if (json_variant_is_blank_object(rlv)) {
+ r = json_variant_filter(&v, STRV_MAKE("resourceLimits"));
+ if (r < 0)
+ return log_error_errno(r, "Failed to drop resource limits field from identity: %m");
+ } else {
+ r = json_variant_set_field(&v, "resourceLimits", rlv);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update resource limits of identity: %m");
+ }
+ }
+
+ json_variant_unref(*_v);
+ *_v = TAKE_PTR(v);
+
+ return 0;
+}
+
+static int add_disposition(JsonVariant **v) {
+ int r;
+
+ assert(v);
+
+ if (json_variant_by_key(*v, "disposition"))
+ return 0;
+
+ /* Set the disposition to regular, if not configured explicitly */
+ r = json_variant_set_field_string(v, "disposition", "regular");
+ if (r < 0)
+ return log_error_errno(r, "Failed to set disposition field: %m");
+
+ return 1;
+}
+
+static int acquire_new_home_record(UserRecord **ret) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ char **i;
+ int r;
+
+ assert(ret);
+
+ if (arg_identity) {
+ unsigned line, column;
+
+ r = json_parse_file(
+ streq(arg_identity, "-") ? stdin : NULL,
+ streq(arg_identity, "-") ? "<stdin>" : arg_identity, JSON_PARSE_SENSITIVE, &v, &line, &column);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse identity at %u:%u: %m", line, column);
+ }
+
+ r = apply_identity_changes(&v);
+ if (r < 0)
+ return r;
+
+ r = add_disposition(&v);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, arg_pkcs11_token_uri) {
+ r = identity_add_pkcs11_key_data(&v, *i);
+ if (r < 0)
+ return r;
+ }
+
+ STRV_FOREACH(i, arg_fido2_device) {
+ r = identity_add_fido2_parameters(&v, *i);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_recovery_key) {
+ r = identity_add_recovery_key(&v);
+ if (r < 0)
+ return r;
+ }
+
+ r = update_last_change(&v, true, false);
+ if (r < 0)
+ return r;
+
+ if (DEBUG_LOGGING)
+ json_variant_dump(v, JSON_FORMAT_PRETTY, NULL, NULL);
+
+ hr = user_record_new();
+ if (!hr)
+ return log_oom();
+
+ r = user_record_load(hr, v, USER_RECORD_REQUIRE_REGULAR|USER_RECORD_ALLOW_SECRET|USER_RECORD_ALLOW_PRIVILEGED|USER_RECORD_ALLOW_PER_MACHINE|USER_RECORD_ALLOW_SIGNATURE|USER_RECORD_LOG);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(hr);
+ return 0;
+}
+
+static int acquire_new_password(
+ const char *user_name,
+ UserRecord *hr,
+ bool suggest,
+ char **ret) {
+
+ unsigned i = 5;
+ char *e;
+ int r;
+
+ assert(user_name);
+ assert(hr);
+
+ e = getenv("NEWPASSWORD");
+ if (e) {
+ _cleanup_(erase_and_freep) char *copy = NULL;
+
+ /* As above, this is not for use, just for testing */
+
+ if (ret) {
+ copy = strdup(e);
+ if (!copy)
+ return log_oom();
+ }
+
+ r = user_record_set_password(hr, STRV_MAKE(e), /* prepend = */ true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to store password: %m");
+
+ string_erase(e);
+ assert_se(unsetenv("NEWPASSWORD") == 0);
+
+ if (ret)
+ *ret = TAKE_PTR(copy);
+
+ return 0;
+ }
+
+ if (suggest)
+ (void) suggest_passwords();
+
+ for (;;) {
+ _cleanup_(strv_free_erasep) char **first = NULL, **second = NULL;
+ _cleanup_free_ char *question = NULL;
+
+ if (--i == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOKEY), "Too many attempts, giving up:");
+
+ if (asprintf(&question, "Please enter new password for user %s:", user_name) < 0)
+ return log_oom();
+
+ r = ask_password_auto(question, "user-home", NULL, "home-password", USEC_INFINITY, 0, &first);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire password: %m");
+
+ question = mfree(question);
+ if (asprintf(&question, "Please enter new password for user %s (repeat):", user_name) < 0)
+ return log_oom();
+
+ r = ask_password_auto(question, "user-home", NULL, "home-password", USEC_INFINITY, 0, &second);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire password: %m");
+
+ if (strv_equal(first, second)) {
+ _cleanup_(erase_and_freep) char *copy = NULL;
+
+ if (ret) {
+ copy = strdup(first[0]);
+ if (!copy)
+ return log_oom();
+ }
+
+ r = user_record_set_password(hr, first, /* prepend = */ true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to store password: %m");
+
+ if (ret)
+ *ret = TAKE_PTR(copy);
+
+ return 0;
+ }
+
+ log_error("Password didn't match, try again.");
+ }
+}
+
+static int create_home(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ int r;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ if (argc >= 2) {
+ /* If a username was specified, use it */
+
+ if (valid_user_group_name(argv[1], 0))
+ r = json_variant_set_field_string(&arg_identity_extra, "userName", argv[1]);
+ else {
+ _cleanup_free_ char *un = NULL, *rr = NULL;
+
+ /* Before we consider the user name invalid, let's check if we can split it? */
+ r = split_user_name_realm(argv[1], &un, &rr);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "User name '%s' is not valid: %m", argv[1]);
+
+ if (rr) {
+ r = json_variant_set_field_string(&arg_identity_extra, "realm", rr);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set realm field: %m");
+ }
+
+ r = json_variant_set_field_string(&arg_identity_extra, "userName", un);
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to set userName field: %m");
+ } else {
+ /* If neither a username nor an identity have been specified we cannot operate. */
+ if (!arg_identity)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "User name required.");
+ }
+
+ r = acquire_new_home_record(&hr);
+ if (r < 0)
+ return r;
+
+ /* If the JSON record carries no plain text password (besides the recovery key), then let's query it
+ * manually. */
+ if (strv_length(hr->password) <= arg_recovery_key) {
+
+ if (strv_isempty(hr->hashed_password)) {
+ _cleanup_(erase_and_freep) char *new_password = NULL;
+
+ /* No regular (i.e. non-PKCS#11) hashed passwords set in the record, let's fix that. */
+ r = acquire_new_password(hr->user_name, hr, /* suggest = */ true, &new_password);
+ if (r < 0)
+ return r;
+
+ r = user_record_make_hashed_password(hr, STRV_MAKE(new_password), /* extend = */ false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to hash password: %m");
+ } else {
+ /* There's a hash password set in the record, acquire the unhashed version of it. */
+ r = acquire_existing_password(hr->user_name, hr, /* emphasize_current= */ false);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (hr->enforce_password_policy == 0) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ /* If password quality enforcement is disabled, let's at least warn client side */
+
+ r = user_record_quality_check_password(hr, hr, &error);
+ if (r < 0)
+ log_warning_errno(r, "Specified password does not pass quality checks (%s), proceeding anyway.", bus_error_message(&error, r));
+ }
+
+ for (;;) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(erase_and_freep) char *formatted = NULL;
+
+ r = json_variant_format(hr->json, 0, &formatted);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format user record: %m");
+
+ r = bus_message_new_method_call(bus, &m, bus_mgr, "CreateHome");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ (void) sd_bus_message_sensitive(m);
+
+ r = sd_bus_message_append(m, "s", formatted);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, NULL);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_LOW_PASSWORD_QUALITY)) {
+ _cleanup_(erase_and_freep) char *new_password = NULL;
+
+ log_error_errno(r, "%s", bus_error_message(&error, r));
+ log_info("(Use --enforce-password-policy=no to turn off password quality checks for this account.)");
+
+ r = acquire_new_password(hr->user_name, hr, /* suggest = */ false, &new_password);
+ if (r < 0)
+ return r;
+
+ r = user_record_make_hashed_password(hr, STRV_MAKE(new_password), /* extend = */ false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to hash passwords: %m");
+ } else {
+ r = handle_generic_user_record_error(hr->user_name, hr, &error, r, false);
+ if (r < 0)
+ return r;
+ }
+ } else
+ break; /* done */
+ }
+
+ return 0;
+}
+
+static int remove_home(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r, ret = 0;
+ char **i;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ STRV_FOREACH(i, strv_skip(argv, 1)) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = bus_message_new_method_call(bus, &m, bus_mgr, "RemoveHome");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", *i);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to remove home: %s", bus_error_message(&error, r));
+ if (ret == 0)
+ ret = r;
+ }
+ }
+
+ return ret;
+}
+
+static int acquire_updated_home_record(
+ sd_bus *bus,
+ const char *username,
+ UserRecord **ret) {
+
+ _cleanup_(json_variant_unrefp) JsonVariant *json = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ char **i;
+ int r;
+
+ assert(ret);
+
+ if (arg_identity) {
+ unsigned line, column;
+ JsonVariant *un;
+
+ r = json_parse_file(
+ streq(arg_identity, "-") ? stdin : NULL,
+ streq(arg_identity, "-") ? "<stdin>" : arg_identity, JSON_PARSE_SENSITIVE, &json, &line, &column);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse identity at %u:%u: %m", line, column);
+
+ un = json_variant_by_key(json, "userName");
+ if (un) {
+ if (!json_variant_is_string(un) || (username && !streq(json_variant_string(un), username)))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "User name specified on command line and in JSON record do not match.");
+ } else {
+ if (!username)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No username specified.");
+
+ r = json_variant_set_field_string(&arg_identity_extra, "userName", username);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set userName field: %m");
+ }
+
+ } else {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int incomplete;
+ const char *text;
+
+ if (!identity_properties_specified())
+ return log_error_errno(SYNTHETIC_ERRNO(EALREADY), "No field to change specified.");
+
+ r = bus_call_method(bus, bus_mgr, "GetUserRecordByName", &error, &reply, "s", username);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire user home record: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "sbo", &text, &incomplete, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (incomplete)
+ return log_error_errno(SYNTHETIC_ERRNO(EACCES), "Lacking rights to acquire user record including privileged metadata, can't update record.");
+
+ r = json_parse(text, JSON_PARSE_SENSITIVE, &json, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse JSON identity: %m");
+
+ reply = sd_bus_message_unref(reply);
+
+ r = json_variant_filter(&json, STRV_MAKE("binding", "status", "signature"));
+ if (r < 0)
+ return log_error_errno(r, "Failed to strip binding and status from record to update: %m");
+ }
+
+ r = apply_identity_changes(&json);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, arg_pkcs11_token_uri) {
+ r = identity_add_pkcs11_key_data(&json, *i);
+ if (r < 0)
+ return r;
+ }
+
+ STRV_FOREACH(i, arg_fido2_device) {
+ r = identity_add_fido2_parameters(&json, *i);
+ if (r < 0)
+ return r;
+ }
+
+ /* If the user supplied a full record, then add in lastChange, but do not override. Otherwise always
+ * override. */
+ r = update_last_change(&json, arg_pkcs11_token_uri || arg_fido2_device, !arg_identity);
+ if (r < 0)
+ return r;
+
+ if (DEBUG_LOGGING)
+ json_variant_dump(json, JSON_FORMAT_PRETTY, NULL, NULL);
+
+ hr = user_record_new();
+ if (!hr)
+ return log_oom();
+
+ r = user_record_load(hr, json, USER_RECORD_REQUIRE_REGULAR|USER_RECORD_ALLOW_PRIVILEGED|USER_RECORD_ALLOW_PER_MACHINE|USER_RECORD_ALLOW_SECRET|USER_RECORD_ALLOW_SIGNATURE|USER_RECORD_LOG);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(hr);
+ return 0;
+}
+
+static int home_record_reset_human_interaction_permission(UserRecord *hr) {
+ int r;
+
+ assert(hr);
+
+ /* When we execute multiple operations one after the other, let's reset the permission to ask the
+ * user each time, so that if interaction is necessary we will be told so again and thus can print a
+ * nice message to the user, telling the user so. */
+
+ r = user_record_set_pkcs11_protected_authentication_path_permitted(hr, -1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to reset PKCS#11 protected authentication path permission flag: %m");
+
+ r = user_record_set_fido2_user_presence_permitted(hr, -1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to reset FIDO2 user presence permission flag: %m");
+
+ return 0;
+}
+
+static int update_home(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ _cleanup_free_ char *buffer = NULL;
+ const char *username;
+ int r;
+
+ if (argc >= 2)
+ username = argv[1];
+ else if (!arg_identity) {
+ buffer = getusername_malloc();
+ if (!buffer)
+ return log_oom();
+
+ username = buffer;
+ } else
+ username = NULL;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = acquire_updated_home_record(bus, username, &hr);
+ if (r < 0)
+ return r;
+
+ /* If we do multiple operations, let's output things more verbosely, since otherwise the repeated
+ * authentication might be confusing. */
+
+ if (arg_and_resize || arg_and_change_password)
+ log_info("Updating home directory.");
+
+ for (;;) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *formatted = NULL;
+
+ r = bus_message_new_method_call(bus, &m, bus_mgr, "UpdateHome");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = json_variant_format(hr->json, 0, &formatted);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format user record: %m");
+
+ (void) sd_bus_message_sensitive(m);
+
+ r = sd_bus_message_append(m, "s", formatted);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, NULL);
+ if (r < 0) {
+ if (arg_and_change_password &&
+ sd_bus_error_has_name(&error, BUS_ERROR_BAD_PASSWORD_AND_NO_TOKEN))
+ /* In the generic handler we'd ask for a password in this case, but when
+ * changing passwords that's not sufficient, as we need to acquire all keys
+ * first. */
+ return log_error_errno(r, "Security token not inserted, refusing.");
+
+ r = handle_generic_user_record_error(hr->user_name, hr, &error, r, false);
+ if (r < 0)
+ return r;
+ } else
+ break;
+ }
+
+ if (arg_and_resize)
+ log_info("Resizing home.");
+
+ (void) home_record_reset_human_interaction_permission(hr);
+
+ /* Also sync down disk size to underlying LUKS/fscrypt/quota */
+ while (arg_and_resize) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = bus_message_new_method_call(bus, &m, bus_mgr, "ResizeHome");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Specify UINT64_MAX as size, in which case the underlying disk size will just be synced */
+ r = sd_bus_message_append(m, "st", hr->user_name, UINT64_MAX);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = bus_message_append_secret(m, hr);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, NULL);
+ if (r < 0) {
+ if (arg_and_change_password &&
+ sd_bus_error_has_name(&error, BUS_ERROR_BAD_PASSWORD_AND_NO_TOKEN))
+ return log_error_errno(r, "Security token not inserted, refusing.");
+
+ r = handle_generic_user_record_error(hr->user_name, hr, &error, r, false);
+ if (r < 0)
+ return r;
+ } else
+ break;
+ }
+
+ if (arg_and_change_password)
+ log_info("Synchronizing passwords and encryption keys.");
+
+ (void) home_record_reset_human_interaction_permission(hr);
+
+ /* Also sync down passwords to underlying LUKS/fscrypt */
+ while (arg_and_change_password) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = bus_message_new_method_call(bus, &m, bus_mgr, "ChangePasswordHome");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Specify an empty new secret, in which case the underlying LUKS/fscrypt password will just be synced */
+ r = sd_bus_message_append(m, "ss", hr->user_name, "{}");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = bus_message_append_secret(m, hr);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, NULL);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_BAD_PASSWORD_AND_NO_TOKEN))
+ return log_error_errno(r, "Security token not inserted, refusing.");
+
+ r = handle_generic_user_record_error(hr->user_name, hr, &error, r, false);
+ if (r < 0)
+ return r;
+ } else
+ break;
+ }
+
+ return 0;
+}
+
+static int passwd_home(int argc, char *argv[], void *userdata) {
+ _cleanup_(user_record_unrefp) UserRecord *old_secret = NULL, *new_secret = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *buffer = NULL;
+ const char *username;
+ int r;
+
+ if (arg_pkcs11_token_uri)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "To change the PKCS#11 security token use 'homectl update --pkcs11-token-uri=…'.");
+ if (arg_fido2_device)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "To change the FIDO2 security token use 'homectl update --fido2-device=…'.");
+ if (identity_properties_specified())
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "The 'passwd' verb does not permit changing other record properties at the same time.");
+
+ if (argc >= 2)
+ username = argv[1];
+ else {
+ buffer = getusername_malloc();
+ if (!buffer)
+ return log_oom();
+
+ username = buffer;
+ }
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ old_secret = user_record_new();
+ if (!old_secret)
+ return log_oom();
+
+ new_secret = user_record_new();
+ if (!new_secret)
+ return log_oom();
+
+ r = acquire_new_password(username, new_secret, /* suggest = */ true, NULL);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = bus_message_new_method_call(bus, &m, bus_mgr, "ChangePasswordHome");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", username);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = bus_message_append_secret(m, new_secret);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = bus_message_append_secret(m, old_secret);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, NULL);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_LOW_PASSWORD_QUALITY)) {
+
+ log_error_errno(r, "%s", bus_error_message(&error, r));
+
+ r = acquire_new_password(username, new_secret, /* suggest = */ false, NULL);
+
+ } else if (sd_bus_error_has_name(&error, BUS_ERROR_BAD_PASSWORD_AND_NO_TOKEN))
+
+ /* In the generic handler we'd ask for a password in this case, but when
+ * changing passwords that's not sufficeint, as we need to acquire all keys
+ * first. */
+ return log_error_errno(r, "Security token not inserted, refusing.");
+ else
+ r = handle_generic_user_record_error(username, old_secret, &error, r, true);
+ if (r < 0)
+ return r;
+ } else
+ break;
+ }
+
+ return 0;
+}
+
+static int resize_home(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *secret = NULL;
+ uint64_t ds = UINT64_MAX;
+ int r;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ if (arg_disk_size_relative != UINT64_MAX ||
+ (argc > 2 && parse_percent(argv[2]) >= 0))
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Relative disk size specification currently not supported when resizing.");
+
+ if (argc > 2) {
+ r = parse_size(argv[2], 1024, &ds);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse disk size parameter: %s", argv[2]);
+ }
+
+ if (arg_disk_size != UINT64_MAX) {
+ if (ds != UINT64_MAX && ds != arg_disk_size)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Disk size specified twice and doesn't match, refusing.");
+
+ ds = arg_disk_size;
+ }
+
+ secret = user_record_new();
+ if (!secret)
+ return log_oom();
+
+ for (;;) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = bus_message_new_method_call(bus, &m, bus_mgr, "ResizeHome");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "st", argv[1], ds);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = bus_message_append_secret(m, secret);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, NULL);
+ if (r < 0) {
+ r = handle_generic_user_record_error(argv[1], secret, &error, r, false);
+ if (r < 0)
+ return r;
+ } else
+ break;
+ }
+
+ return 0;
+}
+
+static int lock_home(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r, ret = 0;
+ char **i;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, strv_skip(argv, 1)) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = bus_message_new_method_call(bus, &m, bus_mgr, "LockHome");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", *i);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to lock home: %s", bus_error_message(&error, r));
+ if (ret == 0)
+ ret = r;
+ }
+ }
+
+ return ret;
+}
+
+static int unlock_home(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r, ret = 0;
+ char **i;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, strv_skip(argv, 1)) {
+ _cleanup_(user_record_unrefp) UserRecord *secret = NULL;
+
+ secret = user_record_new();
+ if (!secret)
+ return log_oom();
+
+ for (;;) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = bus_message_new_method_call(bus, &m, bus_mgr, "UnlockHome");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", *i);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = bus_message_append_secret(m, secret);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, NULL);
+ if (r < 0) {
+ r = handle_generic_user_record_error(argv[1], secret, &error, r, false);
+ if (r < 0) {
+ if (ret == 0)
+ ret = r;
+
+ break;
+ }
+ } else
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static int with_home(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *secret = NULL;
+ _cleanup_close_ int acquired_fd = -1;
+ _cleanup_strv_free_ char **cmdline = NULL;
+ const char *home;
+ int r, ret;
+ pid_t pid;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ if (argc < 3) {
+ _cleanup_free_ char *shell = NULL;
+
+ /* If no command is specified, spawn a shell */
+ r = get_shell(&shell);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire shell: %m");
+
+ cmdline = strv_new(shell);
+ } else
+ cmdline = strv_copy(argv + 2);
+ if (!cmdline)
+ return log_oom();
+
+ secret = user_record_new();
+ if (!secret)
+ return log_oom();
+
+ for (;;) {
+ r = bus_message_new_method_call(bus, &m, bus_mgr, "AcquireHome");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", argv[1]);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = bus_message_append_secret(m, secret);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "b", /* please_suspend = */ getenv_bool("SYSTEMD_PLEASE_SUSPEND_HOME") > 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, &reply);
+ m = sd_bus_message_unref(m);
+ if (r < 0) {
+ r = handle_generic_user_record_error(argv[1], secret, &error, r, false);
+ if (r < 0)
+ return r;
+
+ sd_bus_error_free(&error);
+ } else {
+ int fd;
+
+ r = sd_bus_message_read(reply, "h", &fd);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ acquired_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (acquired_fd < 0)
+ return log_error_errno(errno, "Failed to duplicate acquired fd: %m");
+
+ reply = sd_bus_message_unref(reply);
+ break;
+ }
+ }
+
+ r = bus_call_method(bus, bus_mgr, "GetHomeByName", &error, &reply, "s", argv[1]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to inspect home: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "usussso", NULL, NULL, NULL, NULL, &home, NULL, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = safe_fork("(with)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_LOG|FORK_RLIMIT_NOFILE_SAFE|FORK_REOPEN_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (chdir(home) < 0) {
+ log_error_errno(errno, "Failed to change to directory %s: %m", home);
+ _exit(255);
+ }
+
+ execvp(cmdline[0], cmdline);
+ log_error_errno(errno, "Failed to execute %s: %m", cmdline[0]);
+ _exit(255);
+ }
+
+ ret = wait_for_terminate_and_check(cmdline[0], pid, WAIT_LOG_ABNORMAL);
+
+ /* Close the fd that pings the home now. */
+ acquired_fd = safe_close(acquired_fd);
+
+ r = bus_message_new_method_call(bus, &m, bus_mgr, "ReleaseHome");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", argv[1]);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, NULL);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_HOME_BUSY))
+ log_notice("Not deactivating home directory of %s, as it is still used.", argv[1]);
+ else
+ return log_error_errno(r, "Failed to release user home: %s", bus_error_message(&error, r));
+ }
+
+ return ret;
+}
+
+static int lock_all_homes(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ r = bus_message_new_method_call(bus, &m, bus_mgr, "LockAllHomes");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to lock all homes: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int deactivate_all_homes(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ r = bus_message_new_method_call(bus, &m, bus_mgr, "DeactivateAllHomes");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to deactivate all homes: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int drop_from_identity(const char *field) {
+ int r;
+
+ assert(field);
+
+ /* If we are called to update an identity record and drop some field, let's keep track of what to
+ * remove from the old record */
+ r = strv_extend(&arg_identity_filter, field);
+ if (r < 0)
+ return log_oom();
+
+ /* Let's also drop the field if it was previously set to a new value on the same command line */
+ r = json_variant_filter(&arg_identity_extra, STRV_MAKE(field));
+ if (r < 0)
+ return log_error_errno(r, "Failed to filter JSON identity data: %m");
+
+ r = json_variant_filter(&arg_identity_extra_this_machine, STRV_MAKE(field));
+ if (r < 0)
+ return log_error_errno(r, "Failed to filter JSON identity data: %m");
+
+ r = json_variant_filter(&arg_identity_extra_privileged, STRV_MAKE(field));
+ if (r < 0)
+ return log_error_errno(r, "Failed to filter JSON identity data: %m");
+
+ return 0;
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = terminal_urlify_man("homectl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%1$s [OPTIONS...] COMMAND ...\n\n"
+ "%2$sCreate, manipulate or inspect home directories.%3$s\n"
+ "\n%4$sCommands:%5$s\n"
+ " list List home areas\n"
+ " activate USER… Activate a home area\n"
+ " deactivate USER… Deactivate a home area\n"
+ " inspect USER… Inspect a home area\n"
+ " authenticate USER… Authenticate a home area\n"
+ " create USER Create a home area\n"
+ " remove USER… Remove a home area\n"
+ " update USER Update a home area\n"
+ " passwd USER Change password of a home area\n"
+ " resize USER SIZE Resize a home area\n"
+ " lock USER… Temporarily lock an active home area\n"
+ " unlock USER… Unlock a temporarily locked home area\n"
+ " lock-all Lock all suitable home areas\n"
+ " deactivate-all Deactivate all active home areas\n"
+ " with USER [COMMAND…] Run shell or command with access to a home area\n"
+ "\n%4$sOptions:%5$s\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-legend Do not show the headers and footers\n"
+ " --no-ask-password Do not ask for system passwords\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " --identity=PATH Read JSON identity from file\n"
+ " --json=FORMAT Output inspection data in JSON (takes one of\n"
+ " pretty, short, off)\n"
+ " -j Equivalent to --json=pretty (on TTY) or\n"
+ " --json=short (otherwise)\n"
+ " --export-format= Strip JSON inspection data (full, stripped,\n"
+ " minimal)\n"
+ " -E When specified once equals -j --export-format=\n"
+ " stripped, when specified twice equals\n"
+ " -j --export-format=minimal\n"
+ "\n%4$sGeneral User Record Properties:%5$s\n"
+ " -c --real-name=REALNAME Real name for user\n"
+ " --realm=REALM Realm to create user in\n"
+ " --email-address=EMAIL Email address for user\n"
+ " --location=LOCATION Set location of user on earth\n"
+ " --icon-name=NAME Icon name for user\n"
+ " -d --home-dir=PATH Home directory\n"
+ " -u --uid=UID Numeric UID for user\n"
+ " -G --member-of=GROUP Add user to group\n"
+ " --skel=PATH Skeleton directory to use\n"
+ " --shell=PATH Shell for account\n"
+ " --setenv=VARIABLE=VALUE Set an environment variable at log-in\n"
+ " --timezone=TIMEZONE Set a time-zone\n"
+ " --language=LOCALE Set preferred language\n"
+ " --ssh-authorized-keys=KEYS\n"
+ " Specify SSH public keys\n"
+ " --pkcs11-token-uri=URI URI to PKCS#11 security token containing\n"
+ " private key and matching X.509 certificate\n"
+ " --fido2-device=PATH Path to FIDO2 hidraw device with hmac-secret\n"
+ " extension\n"
+ " --recovery-key=BOOL Add a recovery key\n"
+ "\n%4$sAccount Management User Record Properties:%5$s\n"
+ " --locked=BOOL Set locked account state\n"
+ " --not-before=TIMESTAMP Do not allow logins before\n"
+ " --not-after=TIMESTAMP Do not allow logins after\n"
+ " --rate-limit-interval=SECS\n"
+ " Login rate-limit interval in seconds\n"
+ " --rate-limit-burst=NUMBER\n"
+ " Login rate-limit attempts per interval\n"
+ "\n%4$sPassword Policy User Record Properties:%5$s\n"
+ " --password-hint=HINT Set Password hint\n"
+ " --enforce-password-policy=BOOL\n"
+ " Control whether to enforce system's password\n"
+ " policy for this user\n"
+ " -P Equivalent to --enforce-password-password=no\n"
+ " --password-change-now=BOOL\n"
+ " Require the password to be changed on next login\n"
+ " --password-change-min=TIME\n"
+ " Require minimum time between password changes\n"
+ " --password-change-max=TIME\n"
+ " Require maximum time between password changes\n"
+ " --password-change-warn=TIME\n"
+ " How much time to warn before password expiry\n"
+ " --password-change-inactive=TIME\n"
+ " How much time to block password after expiry\n"
+ "\n%4$sResource Management User Record Properties:%5$s\n"
+ " --disk-size=BYTES Size to assign the user on disk\n"
+ " --access-mode=MODE User home directory access mode\n"
+ " --umask=MODE Umask for user when logging in\n"
+ " --nice=NICE Nice level for user\n"
+ " --rlimit=LIMIT=VALUE[:VALUE]\n"
+ " Set resource limits\n"
+ " --tasks-max=MAX Set maximum number of per-user tasks\n"
+ " --memory-high=BYTES Set high memory threshold in bytes\n"
+ " --memory-max=BYTES Set maximum memory limit\n"
+ " --cpu-weight=WEIGHT Set CPU weight\n"
+ " --io-weight=WEIGHT Set IO weight\n"
+ "\n%4$sStorage User Record Properties:%5$s\n"
+ " --storage=STORAGE Storage type to use (luks, fscrypt, directory,\n"
+ " subvolume, cifs)\n"
+ " --image-path=PATH Path to image file/directory\n"
+ "\n%4$sLUKS Storage User Record Properties:%5$s\n"
+ " --fs-type=TYPE File system type to use in case of luks\n"
+ " storage (btrfs, ext4, xfs)\n"
+ " --luks-discard=BOOL Whether to use 'discard' feature of file system\n"
+ " when activated (mounted)\n"
+ " --luks-offline-discard=BOOL\n"
+ " Whether to trim file on logout\n"
+ " --luks-cipher=CIPHER Cipher to use for LUKS encryption\n"
+ " --luks-cipher-mode=MODE Cipher mode to use for LUKS encryption\n"
+ " --luks-volume-key-size=BITS\n"
+ " Volume key size to use for LUKS encryption\n"
+ " --luks-pbkdf-type=TYPE Password-based Key Derivation Function to use\n"
+ " --luks-pbkdf-hash-algorithm=ALGORITHM\n"
+ " PBKDF hash algorithm to use\n"
+ " --luks-pbkdf-time-cost=SECS\n"
+ " Time cost for PBKDF in seconds\n"
+ " --luks-pbkdf-memory-cost=BYTES\n"
+ " Memory cost for PBKDF in bytes\n"
+ " --luks-pbkdf-parallel-threads=NUMBER\n"
+ " Number of parallel threads for PKBDF\n"
+ "\n%4$sMounting User Record Properties:%5$s\n"
+ " --nosuid=BOOL Control the 'nosuid' flag of the home mount\n"
+ " --nodev=BOOL Control the 'nodev' flag of the home mount\n"
+ " --noexec=BOOL Control the 'noexec' flag of the home mount\n"
+ "\n%4$sCIFS User Record Properties:%5$s\n"
+ " --cifs-domain=DOMAIN CIFS (Windows) domain\n"
+ " --cifs-user-name=USER CIFS (Windows) user name\n"
+ " --cifs-service=SERVICE CIFS (Windows) service to mount as home area\n"
+ "\n%4$sLogin Behaviour User Record Properties:%5$s\n"
+ " --stop-delay=SECS How long to leave user services running after\n"
+ " logout\n"
+ " --kill-processes=BOOL Whether to kill user processes when sessions\n"
+ " terminate\n"
+ " --auto-login=BOOL Try to log this user in automatically\n"
+ "\nSee the %6$s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight(), ansi_normal()
+ , ansi_underline(), ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_PAGER,
+ ARG_NO_LEGEND,
+ ARG_NO_ASK_PASSWORD,
+ ARG_REALM,
+ ARG_EMAIL_ADDRESS,
+ ARG_DISK_SIZE,
+ ARG_ACCESS_MODE,
+ ARG_STORAGE,
+ ARG_FS_TYPE,
+ ARG_IMAGE_PATH,
+ ARG_UMASK,
+ ARG_LUKS_DISCARD,
+ ARG_LUKS_OFFLINE_DISCARD,
+ ARG_JSON,
+ ARG_SETENV,
+ ARG_TIMEZONE,
+ ARG_LANGUAGE,
+ ARG_LOCKED,
+ ARG_SSH_AUTHORIZED_KEYS,
+ ARG_LOCATION,
+ ARG_ICON_NAME,
+ ARG_PASSWORD_HINT,
+ ARG_NICE,
+ ARG_RLIMIT,
+ ARG_NOT_BEFORE,
+ ARG_NOT_AFTER,
+ ARG_LUKS_CIPHER,
+ ARG_LUKS_CIPHER_MODE,
+ ARG_LUKS_VOLUME_KEY_SIZE,
+ ARG_NOSUID,
+ ARG_NODEV,
+ ARG_NOEXEC,
+ ARG_CIFS_DOMAIN,
+ ARG_CIFS_USER_NAME,
+ ARG_CIFS_SERVICE,
+ ARG_TASKS_MAX,
+ ARG_MEMORY_HIGH,
+ ARG_MEMORY_MAX,
+ ARG_CPU_WEIGHT,
+ ARG_IO_WEIGHT,
+ ARG_LUKS_PBKDF_TYPE,
+ ARG_LUKS_PBKDF_HASH_ALGORITHM,
+ ARG_LUKS_PBKDF_TIME_COST,
+ ARG_LUKS_PBKDF_MEMORY_COST,
+ ARG_LUKS_PBKDF_PARALLEL_THREADS,
+ ARG_RATE_LIMIT_INTERVAL,
+ ARG_RATE_LIMIT_BURST,
+ ARG_STOP_DELAY,
+ ARG_KILL_PROCESSES,
+ ARG_ENFORCE_PASSWORD_POLICY,
+ ARG_PASSWORD_CHANGE_NOW,
+ ARG_PASSWORD_CHANGE_MIN,
+ ARG_PASSWORD_CHANGE_MAX,
+ ARG_PASSWORD_CHANGE_WARN,
+ ARG_PASSWORD_CHANGE_INACTIVE,
+ ARG_EXPORT_FORMAT,
+ ARG_AUTO_LOGIN,
+ ARG_PKCS11_TOKEN_URI,
+ ARG_FIDO2_DEVICE,
+ ARG_RECOVERY_KEY,
+ ARG_AND_RESIZE,
+ ARG_AND_CHANGE_PASSWORD,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "identity", required_argument, NULL, 'I' },
+ { "real-name", required_argument, NULL, 'c' },
+ { "comment", required_argument, NULL, 'c' }, /* Compat alias to keep thing in sync with useradd(8) */
+ { "realm", required_argument, NULL, ARG_REALM },
+ { "email-address", required_argument, NULL, ARG_EMAIL_ADDRESS },
+ { "location", required_argument, NULL, ARG_LOCATION },
+ { "password-hint", required_argument, NULL, ARG_PASSWORD_HINT },
+ { "icon-name", required_argument, NULL, ARG_ICON_NAME },
+ { "home-dir", required_argument, NULL, 'd' }, /* Compatible with useradd(8) */
+ { "uid", required_argument, NULL, 'u' }, /* Compatible with useradd(8) */
+ { "member-of", required_argument, NULL, 'G' },
+ { "groups", required_argument, NULL, 'G' }, /* Compat alias to keep thing in sync with useradd(8) */
+ { "skel", required_argument, NULL, 'k' }, /* Compatible with useradd(8) */
+ { "shell", required_argument, NULL, 's' }, /* Compatible with useradd(8) */
+ { "setenv", required_argument, NULL, ARG_SETENV },
+ { "timezone", required_argument, NULL, ARG_TIMEZONE },
+ { "language", required_argument, NULL, ARG_LANGUAGE },
+ { "locked", required_argument, NULL, ARG_LOCKED },
+ { "not-before", required_argument, NULL, ARG_NOT_BEFORE },
+ { "not-after", required_argument, NULL, ARG_NOT_AFTER },
+ { "expiredate", required_argument, NULL, 'e' }, /* Compat alias to keep thing in sync with useradd(8) */
+ { "ssh-authorized-keys", required_argument, NULL, ARG_SSH_AUTHORIZED_KEYS },
+ { "disk-size", required_argument, NULL, ARG_DISK_SIZE },
+ { "access-mode", required_argument, NULL, ARG_ACCESS_MODE },
+ { "umask", required_argument, NULL, ARG_UMASK },
+ { "nice", required_argument, NULL, ARG_NICE },
+ { "rlimit", required_argument, NULL, ARG_RLIMIT },
+ { "tasks-max", required_argument, NULL, ARG_TASKS_MAX },
+ { "memory-high", required_argument, NULL, ARG_MEMORY_HIGH },
+ { "memory-max", required_argument, NULL, ARG_MEMORY_MAX },
+ { "cpu-weight", required_argument, NULL, ARG_CPU_WEIGHT },
+ { "io-weight", required_argument, NULL, ARG_IO_WEIGHT },
+ { "storage", required_argument, NULL, ARG_STORAGE },
+ { "image-path", required_argument, NULL, ARG_IMAGE_PATH },
+ { "fs-type", required_argument, NULL, ARG_FS_TYPE },
+ { "luks-discard", required_argument, NULL, ARG_LUKS_DISCARD },
+ { "luks-offline-discard", required_argument, NULL, ARG_LUKS_OFFLINE_DISCARD },
+ { "luks-cipher", required_argument, NULL, ARG_LUKS_CIPHER },
+ { "luks-cipher-mode", required_argument, NULL, ARG_LUKS_CIPHER_MODE },
+ { "luks-volume-key-size", required_argument, NULL, ARG_LUKS_VOLUME_KEY_SIZE },
+ { "luks-pbkdf-type", required_argument, NULL, ARG_LUKS_PBKDF_TYPE },
+ { "luks-pbkdf-hash-algorithm", required_argument, NULL, ARG_LUKS_PBKDF_HASH_ALGORITHM },
+ { "luks-pbkdf-time-cost", required_argument, NULL, ARG_LUKS_PBKDF_TIME_COST },
+ { "luks-pbkdf-memory-cost", required_argument, NULL, ARG_LUKS_PBKDF_MEMORY_COST },
+ { "luks-pbkdf-parallel-threads", required_argument, NULL, ARG_LUKS_PBKDF_PARALLEL_THREADS },
+ { "nosuid", required_argument, NULL, ARG_NOSUID },
+ { "nodev", required_argument, NULL, ARG_NODEV },
+ { "noexec", required_argument, NULL, ARG_NOEXEC },
+ { "cifs-user-name", required_argument, NULL, ARG_CIFS_USER_NAME },
+ { "cifs-domain", required_argument, NULL, ARG_CIFS_DOMAIN },
+ { "cifs-service", required_argument, NULL, ARG_CIFS_SERVICE },
+ { "rate-limit-interval", required_argument, NULL, ARG_RATE_LIMIT_INTERVAL },
+ { "rate-limit-burst", required_argument, NULL, ARG_RATE_LIMIT_BURST },
+ { "stop-delay", required_argument, NULL, ARG_STOP_DELAY },
+ { "kill-processes", required_argument, NULL, ARG_KILL_PROCESSES },
+ { "enforce-password-policy", required_argument, NULL, ARG_ENFORCE_PASSWORD_POLICY },
+ { "password-change-now", required_argument, NULL, ARG_PASSWORD_CHANGE_NOW },
+ { "password-change-min", required_argument, NULL, ARG_PASSWORD_CHANGE_MIN },
+ { "password-change-max", required_argument, NULL, ARG_PASSWORD_CHANGE_MAX },
+ { "password-change-warn", required_argument, NULL, ARG_PASSWORD_CHANGE_WARN },
+ { "password-change-inactive", required_argument, NULL, ARG_PASSWORD_CHANGE_INACTIVE },
+ { "auto-login", required_argument, NULL, ARG_AUTO_LOGIN },
+ { "json", required_argument, NULL, ARG_JSON },
+ { "export-format", required_argument, NULL, ARG_EXPORT_FORMAT },
+ { "pkcs11-token-uri", required_argument, NULL, ARG_PKCS11_TOKEN_URI },
+ { "fido2-device", required_argument, NULL, ARG_FIDO2_DEVICE },
+ { "recovery-key", required_argument, NULL, ARG_RECOVERY_KEY },
+ { "and-resize", required_argument, NULL, ARG_AND_RESIZE },
+ { "and-change-password", required_argument, NULL, ARG_AND_CHANGE_PASSWORD },
+ {}
+ };
+
+ int r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ for (;;) {
+ int c;
+
+ c = getopt_long(argc, argv, "hH:M:I:c:d:u:k:s:e:G:jPE", options, NULL);
+ if (c < 0)
+ break;
+
+ switch (c) {
+
+ case 'h':
+ return help(0, NULL, NULL);
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_LEGEND:
+ arg_legend = false;
+ break;
+
+ case ARG_NO_ASK_PASSWORD:
+ arg_ask_password = false;
+ break;
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case 'I':
+ arg_identity = optarg;
+ break;
+
+ case 'c':
+ if (isempty(optarg)) {
+ r = drop_from_identity("realName");
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ if (!valid_gecos(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Real name '%s' not a valid GECOS field.", optarg);
+
+ r = json_variant_set_field_string(&arg_identity_extra, "realName", optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set realName field: %m");
+
+ break;
+
+ case 'd': {
+ _cleanup_free_ char *hd = NULL;
+
+ if (isempty(optarg)) {
+ r = drop_from_identity("homeDirectory");
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ r = parse_path_argument_and_warn(optarg, false, &hd);
+ if (r < 0)
+ return r;
+
+ if (!valid_home(hd))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Home directory '%s' not valid.", hd);
+
+ r = json_variant_set_field_string(&arg_identity_extra, "homeDirectory", hd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set homeDirectory field: %m");
+
+ break;
+ }
+
+ case ARG_REALM:
+ if (isempty(optarg)) {
+ r = drop_from_identity("realm");
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ r = dns_name_is_valid(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether realm '%s' is a valid DNS domain: %m", optarg);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Realm '%s' is not a valid DNS domain: %m", optarg);
+
+ r = json_variant_set_field_string(&arg_identity_extra, "realm", optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set realm field: %m");
+ break;
+
+ case ARG_EMAIL_ADDRESS:
+ case ARG_LOCATION:
+ case ARG_ICON_NAME:
+ case ARG_CIFS_USER_NAME:
+ case ARG_CIFS_DOMAIN:
+ case ARG_CIFS_SERVICE: {
+
+ const char *field =
+ c == ARG_EMAIL_ADDRESS ? "emailAddress" :
+ c == ARG_LOCATION ? "location" :
+ c == ARG_ICON_NAME ? "iconName" :
+ c == ARG_CIFS_USER_NAME ? "cifsUserName" :
+ c == ARG_CIFS_DOMAIN ? "cifsDomain" :
+ c == ARG_CIFS_SERVICE ? "cifsService" :
+ NULL;
+
+ assert(field);
+
+ if (isempty(optarg)) {
+ r = drop_from_identity(field);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ r = json_variant_set_field_string(&arg_identity_extra, field, optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set %s field: %m", field);
+
+ break;
+ }
+
+ case ARG_PASSWORD_HINT:
+ if (isempty(optarg)) {
+ r = drop_from_identity("passwordHint");
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ r = json_variant_set_field_string(&arg_identity_extra_privileged, "passwordHint", optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set passwordHint field: %m");
+
+ string_erase(optarg);
+ break;
+
+ case ARG_NICE: {
+ int nc;
+
+ if (isempty(optarg)) {
+ r = drop_from_identity("niceLevel");
+ if (r < 0)
+ return r;
+ break;
+ }
+
+ r = parse_nice(optarg, &nc);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse nice level: %s", optarg);
+
+ r = json_variant_set_field_integer(&arg_identity_extra, "niceLevel", nc);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set niceLevel field: %m");
+
+ break;
+ }
+
+ case ARG_RLIMIT: {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL, *jcur = NULL, *jmax = NULL;
+ _cleanup_free_ char *field = NULL, *t = NULL;
+ const char *eq;
+ struct rlimit rl;
+ int l;
+
+ if (isempty(optarg)) {
+ /* Remove all resource limits */
+
+ r = drop_from_identity("resourceLimits");
+ if (r < 0)
+ return r;
+
+ arg_identity_filter_rlimits = strv_free(arg_identity_filter_rlimits);
+ arg_identity_extra_rlimits = json_variant_unref(arg_identity_extra_rlimits);
+ break;
+ }
+
+ eq = strchr(optarg, '=');
+ if (!eq)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Can't parse resource limit assignment: %s", optarg);
+
+ field = strndup(optarg, eq - optarg);
+ if (!field)
+ return log_oom();
+
+ l = rlimit_from_string_harder(field);
+ if (l < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown resource limit type: %s", field);
+
+ if (isempty(eq + 1)) {
+ /* Remove only the specific rlimit */
+
+ r = strv_extend(&arg_identity_filter_rlimits, rlimit_to_string(l));
+ if (r < 0)
+ return r;
+
+ r = json_variant_filter(&arg_identity_extra_rlimits, STRV_MAKE(field));
+ if (r < 0)
+ return log_error_errno(r, "Failed to filter JSON identity data: %m");
+
+ break;
+ }
+
+ r = rlimit_parse(l, eq + 1, &rl);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse resource limit value: %s", eq + 1);
+
+ r = rl.rlim_cur == RLIM_INFINITY ? json_variant_new_null(&jcur) : json_variant_new_unsigned(&jcur, rl.rlim_cur);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to allocate current integer: %m");
+
+ r = rl.rlim_max == RLIM_INFINITY ? json_variant_new_null(&jmax) : json_variant_new_unsigned(&jmax, rl.rlim_max);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to allocate maximum integer: %m");
+
+ r = json_build(&v,
+ JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("cur", JSON_BUILD_VARIANT(jcur)),
+ JSON_BUILD_PAIR("max", JSON_BUILD_VARIANT(jmax))));
+ if (r < 0)
+ return log_error_errno(r, "Failed to build resource limit: %m");
+
+ t = strjoin("RLIMIT_", rlimit_to_string(l));
+ if (!t)
+ return log_oom();
+
+ r = json_variant_set_field(&arg_identity_extra_rlimits, t, v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set %s field: %m", rlimit_to_string(l));
+
+ break;
+ }
+
+ case 'u': {
+ uid_t uid;
+
+ if (isempty(optarg)) {
+ r = drop_from_identity("uid");
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ r = parse_uid(optarg, &uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse UID '%s'.", optarg);
+
+ if (uid_is_system(uid))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "UID " UID_FMT " is in system range, refusing.", uid);
+ if (uid_is_dynamic(uid))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "UID " UID_FMT " is in dynamic range, refusing.", uid);
+ if (uid == UID_NOBODY)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "UID " UID_FMT " is nobody UID, refusing.", uid);
+
+ r = json_variant_set_field_unsigned(&arg_identity_extra, "uid", uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set realm field: %m");
+
+ break;
+ }
+
+ case 'k':
+ case ARG_IMAGE_PATH: {
+ const char *field = c == 'k' ? "skeletonDirectory" : "imagePath";
+ _cleanup_free_ char *v = NULL;
+
+ if (isempty(optarg)) {
+ r = drop_from_identity(field);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ r = parse_path_argument_and_warn(optarg, false, &v);
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field_string(&arg_identity_extra_this_machine, field, v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set %s field: %m", v);
+
+ break;
+ }
+
+ case 's':
+ if (isempty(optarg)) {
+ r = drop_from_identity("shell");
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ if (!valid_shell(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Shell '%s' not valid.", optarg);
+
+ r = json_variant_set_field_string(&arg_identity_extra, "shell", optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set shell field: %m");
+
+ break;
+
+ case ARG_SETENV: {
+ _cleanup_free_ char **l = NULL, **k = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *ne = NULL;
+ JsonVariant *e;
+
+ if (isempty(optarg)) {
+ r = drop_from_identity("environment");
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ if (!env_assignment_is_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Environment assignment '%s' not valid.", optarg);
+
+ e = json_variant_by_key(arg_identity_extra, "environment");
+ if (e) {
+ r = json_variant_strv(e, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse JSON environment field: %m");
+ }
+
+ k = strv_env_set(l, optarg);
+ if (!k)
+ return log_oom();
+
+ strv_sort(k);
+
+ r = json_variant_new_array_strv(&ne, k);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate environment list JSON: %m");
+
+ r = json_variant_set_field(&arg_identity_extra, "environment", ne);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set environment list: %m");
+
+ break;
+ }
+
+ case ARG_TIMEZONE:
+
+ if (isempty(optarg)) {
+ r = drop_from_identity("timeZone");
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ if (!timezone_is_valid(optarg, LOG_DEBUG))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Timezone '%s' is not valid.", optarg);
+
+ r = json_variant_set_field_string(&arg_identity_extra, "timeZone", optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set timezone field: %m");
+
+ break;
+
+ case ARG_LANGUAGE:
+ if (isempty(optarg)) {
+ r = drop_from_identity("language");
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ if (!locale_is_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Locale '%s' is not valid.", optarg);
+
+ if (locale_is_installed(optarg) <= 0)
+ log_warning("Locale '%s' is not installed, accepting anyway.", optarg);
+
+ r = json_variant_set_field_string(&arg_identity_extra, "preferredLanguage", optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set preferredLanguage field: %m");
+
+ break;
+
+ case ARG_NOSUID:
+ case ARG_NODEV:
+ case ARG_NOEXEC:
+ case ARG_LOCKED:
+ case ARG_KILL_PROCESSES:
+ case ARG_ENFORCE_PASSWORD_POLICY:
+ case ARG_AUTO_LOGIN:
+ case ARG_PASSWORD_CHANGE_NOW: {
+ const char *field =
+ c == ARG_LOCKED ? "locked" :
+ c == ARG_NOSUID ? "mountNoSuid" :
+ c == ARG_NODEV ? "mountNoDevices" :
+ c == ARG_NOEXEC ? "mountNoExecute" :
+ c == ARG_KILL_PROCESSES ? "killProcesses" :
+ c == ARG_ENFORCE_PASSWORD_POLICY ? "enforcePasswordPolicy" :
+ c == ARG_AUTO_LOGIN ? "autoLogin" :
+ c == ARG_PASSWORD_CHANGE_NOW ? "passwordChangeNow" :
+ NULL;
+
+ assert(field);
+
+ if (isempty(optarg)) {
+ r = drop_from_identity(field);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s boolean: %m", field);
+
+ r = json_variant_set_field_boolean(&arg_identity_extra, field, r > 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set %s field: %m", field);
+
+ break;
+ }
+
+ case 'P':
+ r = json_variant_set_field_boolean(&arg_identity_extra, "enforcePasswordPolicy", false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set enforcePasswordPolicy field: %m");
+
+ break;
+
+ case ARG_DISK_SIZE:
+ if (isempty(optarg)) {
+ r = drop_from_identity("diskSize");
+ if (r < 0)
+ return r;
+
+ r = drop_from_identity("diskSizeRelative");
+ if (r < 0)
+ return r;
+
+ arg_disk_size = arg_disk_size_relative = UINT64_MAX;
+ break;
+ }
+
+ r = parse_permille(optarg);
+ if (r < 0) {
+ r = parse_size(optarg, 1024, &arg_disk_size);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Disk size '%s' not valid.", optarg);
+
+ r = drop_from_identity("diskSizeRelative");
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field_unsigned(&arg_identity_extra_this_machine, "diskSize", arg_disk_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set diskSize field: %m");
+
+ arg_disk_size_relative = UINT64_MAX;
+ } else {
+ /* Normalize to UINT32_MAX == 100% */
+ arg_disk_size_relative = (uint64_t) r * UINT32_MAX / 1000U;
+
+ r = drop_from_identity("diskSize");
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field_unsigned(&arg_identity_extra_this_machine, "diskSizeRelative", arg_disk_size_relative);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set diskSizeRelative field: %m");
+
+ arg_disk_size = UINT64_MAX;
+ }
+
+ break;
+
+ case ARG_ACCESS_MODE: {
+ mode_t mode;
+
+ if (isempty(optarg)) {
+ r = drop_from_identity("accessMode");
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ r = parse_mode(optarg, &mode);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Access mode '%s' not valid.", optarg);
+
+ r = json_variant_set_field_unsigned(&arg_identity_extra, "accessMode", mode);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set access mode field: %m");
+
+ break;
+ }
+
+ case ARG_LUKS_DISCARD:
+ if (isempty(optarg)) {
+ r = drop_from_identity("luksDiscard");
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --luks-discard= parameter: %s", optarg);
+
+ r = json_variant_set_field_boolean(&arg_identity_extra, "luksDiscard", r);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set discard field: %m");
+
+ break;
+
+ case ARG_LUKS_OFFLINE_DISCARD:
+ if (isempty(optarg)) {
+ r = drop_from_identity("luksOfflineDiscard");
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --luks-offline-discard= parameter: %s", optarg);
+
+ r = json_variant_set_field_boolean(&arg_identity_extra, "luksOfflineDiscard", r);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set offline discard field: %m");
+
+ break;
+
+ case ARG_LUKS_VOLUME_KEY_SIZE:
+ case ARG_LUKS_PBKDF_PARALLEL_THREADS:
+ case ARG_RATE_LIMIT_BURST: {
+ const char *field =
+ c == ARG_LUKS_VOLUME_KEY_SIZE ? "luksVolumeKeySize" :
+ c == ARG_LUKS_PBKDF_PARALLEL_THREADS ? "luksPbkdfParallelThreads" :
+ c == ARG_RATE_LIMIT_BURST ? "rateLimitBurst" : NULL;
+ unsigned n;
+
+ assert(field);
+
+ if (isempty(optarg)) {
+ r = drop_from_identity(field);
+ if (r < 0)
+ return r;
+ }
+
+ r = safe_atou(optarg, &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s parameter: %s", field, optarg);
+
+ r = json_variant_set_field_unsigned(&arg_identity_extra, field, n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set %s field: %m", field);
+
+ break;
+ }
+
+ case ARG_UMASK: {
+ mode_t m;
+
+ if (isempty(optarg)) {
+ r = drop_from_identity("umask");
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ r = parse_mode(optarg, &m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse umask: %m");
+
+ r = json_variant_set_field_integer(&arg_identity_extra, "umask", m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set umask field: %m");
+
+ break;
+ }
+
+ case ARG_SSH_AUTHORIZED_KEYS: {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_(strv_freep) char **l = NULL, **add = NULL;
+
+ if (isempty(optarg)) {
+ r = drop_from_identity("sshAuthorizedKeys");
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ if (optarg[0] == '@') {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ /* If prefixed with '@' read from a file */
+
+ f = fopen(optarg+1, "re");
+ if (!f)
+ return log_error_errno(errno, "Failed to open '%s': %m", optarg+1);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read from '%s': %m", optarg+1);
+ if (r == 0)
+ break;
+
+ if (isempty(line))
+ continue;
+
+ if (line[0] == '#')
+ continue;
+
+ r = strv_consume(&add, TAKE_PTR(line));
+ if (r < 0)
+ return log_oom();
+ }
+ } else {
+ /* Otherwise, assume it's a literal key. Let's do some superficial checks
+ * before accept it though. */
+
+ if (string_has_cc(optarg, NULL))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Authorized key contains control characters, refusing.");
+ if (optarg[0] == '#')
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Specified key is a comment?");
+
+ add = strv_new(optarg);
+ if (!add)
+ return log_oom();
+ }
+
+ v = json_variant_ref(json_variant_by_key(arg_identity_extra_privileged, "sshAuthorizedKeys"));
+ if (v) {
+ r = json_variant_strv(v, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse SSH authorized keys list: %m");
+ }
+
+ r = strv_extend_strv(&l, add, true);
+ if (r < 0)
+ return log_oom();
+
+ v = json_variant_unref(v);
+
+ r = json_variant_new_array_strv(&v, l);
+ if (r < 0)
+ return log_oom();
+
+ r = json_variant_set_field(&arg_identity_extra_privileged, "sshAuthorizedKeys", v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set authorized keys: %m");
+
+ break;
+ }
+
+ case ARG_NOT_BEFORE:
+ case ARG_NOT_AFTER:
+ case 'e': {
+ const char *field;
+ usec_t n;
+
+ field = c == ARG_NOT_BEFORE ? "notBeforeUSec" :
+ IN_SET(c, ARG_NOT_AFTER, 'e') ? "notAfterUSec" : NULL;
+
+ assert(field);
+
+ if (isempty(optarg)) {
+ r = drop_from_identity(field);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ /* Note the minor discrepancy regarding -e parsing here: we support that for compat
+ * reasons, and in the original useradd(8) implementation it accepts dates in the
+ * format YYYY-MM-DD. Coincidentally, we accept dates formatted like that too, but
+ * with greater precision. */
+ r = parse_timestamp(optarg, &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s parameter: %m", field);
+
+ r = json_variant_set_field_unsigned(&arg_identity_extra, field, n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set %s field: %m", field);
+ break;
+ }
+
+ case ARG_PASSWORD_CHANGE_MIN:
+ case ARG_PASSWORD_CHANGE_MAX:
+ case ARG_PASSWORD_CHANGE_WARN:
+ case ARG_PASSWORD_CHANGE_INACTIVE: {
+ const char *field;
+ usec_t n;
+
+ field = c == ARG_PASSWORD_CHANGE_MIN ? "passwordChangeMinUSec" :
+ c == ARG_PASSWORD_CHANGE_MAX ? "passwordChangeMaxUSec" :
+ c == ARG_PASSWORD_CHANGE_WARN ? "passwordChangeWarnUSec" :
+ c == ARG_PASSWORD_CHANGE_INACTIVE ? "passwordChangeInactiveUSec" :
+ NULL;
+
+ assert(field);
+
+ if (isempty(optarg)) {
+ r = drop_from_identity(field);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ r = parse_sec(optarg, &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s parameter: %m", field);
+
+ r = json_variant_set_field_unsigned(&arg_identity_extra, field, n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set %s field: %m", field);
+ break;
+ }
+
+ case ARG_STORAGE:
+ case ARG_FS_TYPE:
+ case ARG_LUKS_CIPHER:
+ case ARG_LUKS_CIPHER_MODE:
+ case ARG_LUKS_PBKDF_TYPE:
+ case ARG_LUKS_PBKDF_HASH_ALGORITHM: {
+
+ const char *field =
+ c == ARG_STORAGE ? "storage" :
+ c == ARG_FS_TYPE ? "fileSystemType" :
+ c == ARG_LUKS_CIPHER ? "luksCipher" :
+ c == ARG_LUKS_CIPHER_MODE ? "luksCipherMode" :
+ c == ARG_LUKS_PBKDF_TYPE ? "luksPbkdfType" :
+ c == ARG_LUKS_PBKDF_HASH_ALGORITHM ? "luksPbkdfHashAlgorithm" : NULL;
+
+ assert(field);
+
+ if (isempty(optarg)) {
+ r = drop_from_identity(field);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ if (!string_is_safe(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Parameter for %s field not valid: %s", field, optarg);
+
+ r = json_variant_set_field_string(
+ IN_SET(c, ARG_STORAGE, ARG_FS_TYPE) ?
+ &arg_identity_extra_this_machine :
+ &arg_identity_extra, field, optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set %s field: %m", field);
+
+ break;
+ }
+
+ case ARG_LUKS_PBKDF_TIME_COST:
+ case ARG_RATE_LIMIT_INTERVAL:
+ case ARG_STOP_DELAY: {
+ const char *field =
+ c == ARG_LUKS_PBKDF_TIME_COST ? "luksPbkdfTimeCostUSec" :
+ c == ARG_RATE_LIMIT_INTERVAL ? "rateLimitIntervalUSec" :
+ c == ARG_STOP_DELAY ? "stopDelayUSec" :
+ NULL;
+ usec_t t;
+
+ assert(field);
+
+ if (isempty(optarg)) {
+ r = drop_from_identity(field);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ r = parse_sec(optarg, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s field: %s", field, optarg);
+
+ r = json_variant_set_field_unsigned(&arg_identity_extra, field, t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set %s field: %m", field);
+
+ break;
+ }
+
+ case 'G': {
+ const char *p = optarg;
+
+ if (isempty(p)) {
+ r = drop_from_identity("memberOf");
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ for (;;) {
+ _cleanup_(json_variant_unrefp) JsonVariant *mo = NULL;
+ _cleanup_strv_free_ char **list = NULL;
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, ",", 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse group list: %m");
+ if (r == 0)
+ break;
+
+ if (!valid_user_group_name(word, 0))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid group name %s.", word);
+
+ mo = json_variant_ref(json_variant_by_key(arg_identity_extra, "memberOf"));
+
+ r = json_variant_strv(mo, &list);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse group list: %m");
+
+ r = strv_extend(&list, word);
+ if (r < 0)
+ return log_oom();
+
+ strv_sort(list);
+ strv_uniq(list);
+
+ mo = json_variant_unref(mo);
+ r = json_variant_new_array_strv(&mo, list);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create group list JSON: %m");
+
+ r = json_variant_set_field(&arg_identity_extra, "memberOf", mo);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update group list: %m");
+ }
+
+ break;
+ }
+
+ case ARG_TASKS_MAX: {
+ uint64_t u;
+
+ if (isempty(optarg)) {
+ r = drop_from_identity("tasksMax");
+ if (r < 0)
+ return r;
+ break;
+ }
+
+ r = safe_atou64(optarg, &u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --tasks-max= parameter: %s", optarg);
+
+ r = json_variant_set_field_unsigned(&arg_identity_extra, "tasksMax", u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set tasksMax field: %m");
+
+ break;
+ }
+
+ case ARG_MEMORY_MAX:
+ case ARG_MEMORY_HIGH:
+ case ARG_LUKS_PBKDF_MEMORY_COST: {
+ const char *field =
+ c == ARG_MEMORY_MAX ? "memoryMax" :
+ c == ARG_MEMORY_HIGH ? "memoryHigh" :
+ c == ARG_LUKS_PBKDF_MEMORY_COST ? "luksPbkdfMemoryCost" : NULL;
+
+ uint64_t u;
+
+ assert(field);
+
+ if (isempty(optarg)) {
+ r = drop_from_identity(field);
+ if (r < 0)
+ return r;
+ break;
+ }
+
+ r = parse_size(optarg, 1024, &u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s parameter: %s", field, optarg);
+
+ r = json_variant_set_field_unsigned(&arg_identity_extra_this_machine, field, u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set %s field: %m", field);
+
+ break;
+ }
+
+ case ARG_CPU_WEIGHT:
+ case ARG_IO_WEIGHT: {
+ const char *field = c == ARG_CPU_WEIGHT ? "cpuWeight" :
+ c == ARG_IO_WEIGHT ? "ioWeight" : NULL;
+ uint64_t u;
+
+ assert(field);
+
+ if (isempty(optarg)) {
+ r = drop_from_identity(field);
+ if (r < 0)
+ return r;
+ break;
+ }
+
+ r = safe_atou64(optarg, &u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --cpu-weight=/--io-weight= parameter: %s", optarg);
+
+ if (!CGROUP_WEIGHT_IS_OK(u))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Weight %" PRIu64 " is out of valid weight range.", u);
+
+ r = json_variant_set_field_unsigned(&arg_identity_extra, field, u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set %s field: %m", field);
+
+ break;
+ }
+
+ case ARG_PKCS11_TOKEN_URI: {
+ const char *p;
+
+ if (streq(optarg, "list"))
+ return list_pkcs11_tokens();
+
+ /* If --pkcs11-token-uri= is specified we always drop everything old */
+ FOREACH_STRING(p, "pkcs11TokenUri", "pkcs11EncryptedKey") {
+ r = drop_from_identity(p);
+ if (r < 0)
+ return r;
+ }
+
+ if (isempty(optarg)) {
+ arg_pkcs11_token_uri = strv_free(arg_pkcs11_token_uri);
+ break;
+ }
+
+ if (streq(optarg, "auto")) {
+ _cleanup_free_ char *found = NULL;
+
+ r = find_pkcs11_token_auto(&found);
+ if (r < 0)
+ return r;
+ r = strv_consume(&arg_pkcs11_token_uri, TAKE_PTR(found));
+ } else {
+ if (!pkcs11_uri_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Not a valid PKCS#11 URI: %s", optarg);
+
+ r = strv_extend(&arg_pkcs11_token_uri, optarg);
+ }
+ if (r < 0)
+ return r;
+
+ strv_uniq(arg_pkcs11_token_uri);
+ break;
+ }
+
+ case ARG_FIDO2_DEVICE: {
+ const char *p;
+
+ if (streq(optarg, "list"))
+ return list_fido2_devices();
+
+ FOREACH_STRING(p, "fido2HmacCredential", "fido2HmacSalt") {
+ r = drop_from_identity(p);
+ if (r < 0)
+ return r;
+ }
+
+ if (isempty(optarg)) {
+ arg_fido2_device = strv_free(arg_fido2_device);
+ break;
+ }
+
+ if (streq(optarg, "auto")) {
+ _cleanup_free_ char *found = NULL;
+
+ r = find_fido2_auto(&found);
+ if (r < 0)
+ return r;
+
+ r = strv_consume(&arg_fido2_device, TAKE_PTR(found));
+ } else
+ r = strv_extend(&arg_fido2_device, optarg);
+
+ if (r < 0)
+ return r;
+
+ strv_uniq(arg_fido2_device);
+ break;
+ }
+
+ case ARG_RECOVERY_KEY: {
+ const char *p;
+
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --recovery-key= argument: %s", optarg);
+
+ arg_recovery_key = r;
+
+ FOREACH_STRING(p, "recoveryKey", "recoveryKeyType") {
+ r = drop_from_identity(p);
+ if (r < 0)
+ return r;
+ }
+
+ break;
+ }
+
+ case 'j':
+ arg_json = true;
+ arg_json_format_flags = JSON_FORMAT_PRETTY_AUTO|JSON_FORMAT_COLOR_AUTO;
+ break;
+
+ case ARG_JSON:
+ if (streq(optarg, "pretty")) {
+ arg_json = true;
+ arg_json_format_flags = JSON_FORMAT_PRETTY|JSON_FORMAT_COLOR_AUTO;
+ } else if (streq(optarg, "short")) {
+ arg_json = true;
+ arg_json_format_flags = JSON_FORMAT_NEWLINE;
+ } else if (streq(optarg, "off")) {
+ arg_json = false;
+ arg_json_format_flags = 0;
+ } else if (streq(optarg, "help")) {
+ puts("pretty\n"
+ "short\n"
+ "off");
+ return 0;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown argument to --json=: %s", optarg);
+
+ break;
+
+ case 'E':
+ if (arg_export_format == EXPORT_FORMAT_FULL)
+ arg_export_format = EXPORT_FORMAT_STRIPPED;
+ else if (arg_export_format == EXPORT_FORMAT_STRIPPED)
+ arg_export_format = EXPORT_FORMAT_MINIMAL;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Specifying -E more than twice is not supported.");
+
+ arg_json = true;
+ if (arg_json_format_flags == 0)
+ arg_json_format_flags = JSON_FORMAT_PRETTY_AUTO|JSON_FORMAT_COLOR_AUTO;
+ break;
+
+ case ARG_EXPORT_FORMAT:
+ if (streq(optarg, "full"))
+ arg_export_format = EXPORT_FORMAT_FULL;
+ else if (streq(optarg, "stripped"))
+ arg_export_format = EXPORT_FORMAT_STRIPPED;
+ else if (streq(optarg, "minimal"))
+ arg_export_format = EXPORT_FORMAT_MINIMAL;
+ else if (streq(optarg, "help")) {
+ puts("full\n"
+ "stripped\n"
+ "minimal");
+ return 0;
+ }
+
+ break;
+
+ case ARG_AND_RESIZE:
+ arg_and_resize = true;
+ break;
+
+ case ARG_AND_CHANGE_PASSWORD:
+ arg_and_change_password = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+ }
+
+ if (!strv_isempty(arg_pkcs11_token_uri) || !strv_isempty(arg_fido2_device))
+ arg_and_change_password = true;
+
+ if (arg_disk_size != UINT64_MAX || arg_disk_size_relative != UINT64_MAX)
+ arg_and_resize = true;
+
+ return 1;
+}
+
+static int redirect_bus_mgr(void) {
+ const char *suffix;
+
+ /* Talk to a different service if that's requested. (The same env var is also understood by homed, so
+ * that it is relatively easily possible to invoke a second instance of homed for debug purposes and
+ * have homectl talk to it, without colliding with the host version. This is handy when operating
+ * from a homed-managed account.) */
+
+ suffix = getenv("SYSTEMD_HOME_DEBUG_SUFFIX");
+ if (suffix) {
+ static BusLocator locator = {
+ .path = "/org/freedesktop/home1",
+ .interface = "org.freedesktop.home1.Manager",
+ };
+
+ /* Yes, we leak this memory, but there's little point to collect this, given that we only do
+ * this in a debug environment, do it only once, and the string shall live for out entire
+ * process runtime. */
+
+ locator.destination = strjoin("org.freedesktop.home1.", suffix);
+ if (!locator.destination)
+ return log_oom();
+
+ bus_mgr = &locator;
+ } else
+ bus_mgr = bus_home_mgr;
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "list", VERB_ANY, 1, VERB_DEFAULT, list_homes },
+ { "activate", 2, VERB_ANY, 0, activate_home },
+ { "deactivate", 2, VERB_ANY, 0, deactivate_home },
+ { "inspect", VERB_ANY, VERB_ANY, 0, inspect_home },
+ { "authenticate", VERB_ANY, VERB_ANY, 0, authenticate_home },
+ { "create", VERB_ANY, 2, 0, create_home },
+ { "remove", 2, VERB_ANY, 0, remove_home },
+ { "update", VERB_ANY, 2, 0, update_home },
+ { "passwd", VERB_ANY, 2, 0, passwd_home },
+ { "resize", 2, 3, 0, resize_home },
+ { "lock", 2, VERB_ANY, 0, lock_home },
+ { "unlock", 2, VERB_ANY, 0, unlock_home },
+ { "with", 2, VERB_ANY, 0, with_home },
+ { "lock-all", VERB_ANY, 1, 0, lock_all_homes },
+ { "deactivate-all", VERB_ANY, 1, 0, deactivate_all_homes },
+ {}
+ };
+
+ int r;
+
+ log_setup_cli();
+
+ r = redirect_bus_mgr();
+ if (r < 0)
+ return r;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/home/homed-bus.c b/src/home/homed-bus.c
new file mode 100644
index 0000000..d70fda5
--- /dev/null
+++ b/src/home/homed-bus.c
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "homed-bus.h"
+#include "strv.h"
+
+int bus_message_read_secret(sd_bus_message *m, UserRecord **ret, sd_bus_error *error) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL, *full = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ unsigned line = 0, column = 0;
+ const char *json;
+ int r;
+
+ assert(ret);
+
+ r = sd_bus_message_read(m, "s", &json);
+ if (r < 0)
+ return r;
+
+ r = json_parse(json, JSON_PARSE_SENSITIVE, &v, &line, &column);
+ if (r < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Failed to parse JSON secret record at %u:%u: %m", line, column);
+
+ r = json_build(&full, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("secret", JSON_BUILD_VARIANT(v))));
+ if (r < 0)
+ return r;
+
+ hr = user_record_new();
+ if (!hr)
+ return -ENOMEM;
+
+ r = user_record_load(hr, full, USER_RECORD_REQUIRE_SECRET);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(hr);
+ return 0;
+}
+
+int bus_message_read_home_record(sd_bus_message *m, UserRecordLoadFlags flags, UserRecord **ret, sd_bus_error *error) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ unsigned line = 0, column = 0;
+ const char *json;
+ int r;
+
+ assert(ret);
+
+ r = sd_bus_message_read(m, "s", &json);
+ if (r < 0)
+ return r;
+
+ r = json_parse(json, JSON_PARSE_SENSITIVE, &v, &line, &column);
+ if (r < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Failed to parse JSON identity record at %u:%u: %m", line, column);
+
+ hr = user_record_new();
+ if (!hr)
+ return -ENOMEM;
+
+ r = user_record_load(hr, v, flags);
+ if (r < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "JSON data is not a valid identity record");
+
+ *ret = TAKE_PTR(hr);
+ return 0;
+}
diff --git a/src/home/homed-bus.h b/src/home/homed-bus.h
new file mode 100644
index 0000000..977679b
--- /dev/null
+++ b/src/home/homed-bus.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "user-record.h"
+#include "json.h"
+
+int bus_message_read_secret(sd_bus_message *m, UserRecord **ret, sd_bus_error *error);
+int bus_message_read_home_record(sd_bus_message *m, UserRecordLoadFlags flags, UserRecord **ret, sd_bus_error *error);
diff --git a/src/home/homed-conf.c b/src/home/homed-conf.c
new file mode 100644
index 0000000..4f46b0c
--- /dev/null
+++ b/src/home/homed-conf.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "conf-parser.h"
+#include "def.h"
+#include "home-util.h"
+#include "homed-conf.h"
+
+int manager_parse_config_file(Manager *m) {
+ int r;
+
+ assert(m);
+
+ r = config_parse_many_nulstr(
+ PKGSYSCONFDIR "/homed.conf",
+ CONF_PATHS_NULSTR("systemd/homed.conf.d"),
+ "Home\0",
+ config_item_perf_lookup, homed_gperf_lookup,
+ CONFIG_PARSE_WARN,
+ m,
+ NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+
+}
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_default_storage, user_storage, UserStorage, "Failed to parse default storage setting");
+
+int config_parse_default_file_system_type(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char **s = data;
+
+ assert(rvalue);
+ assert(s);
+
+ if (!isempty(rvalue) && !supported_fstype(rvalue)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Unsupported file system, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ return free_and_strdup_warn(s, empty_to_null(rvalue));
+
+}
diff --git a/src/home/homed-conf.h b/src/home/homed-conf.h
new file mode 100644
index 0000000..1defaa9
--- /dev/null
+++ b/src/home/homed-conf.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "conf-parser.h"
+#include "homed-manager.h"
+
+int manager_parse_config_file(Manager *m);
+
+const struct ConfigPerfItem* homed_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_default_storage);
+CONFIG_PARSER_PROTOTYPE(config_parse_default_file_system_type);
diff --git a/src/home/homed-gperf.gperf b/src/home/homed-gperf.gperf
new file mode 100644
index 0000000..970da5f
--- /dev/null
+++ b/src/home/homed-gperf.gperf
@@ -0,0 +1,21 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include "conf-parser.h"
+#include "homed-conf.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name homed_gperf_hash
+%define lookup-function-name homed_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Home.DefaultStorage, config_parse_default_storage, 0, offsetof(Manager, default_storage)
+Home.DefaultFileSystemType, config_parse_default_file_system_type, 0, offsetof(Manager, default_file_system_type)
diff --git a/src/home/homed-home-bus.c b/src/home/homed-home-bus.c
new file mode 100644
index 0000000..5643a9a
--- /dev/null
+++ b/src/home/homed-home-bus.c
@@ -0,0 +1,953 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/capability.h>
+
+#include "bus-common-errors.h"
+#include "bus-polkit.h"
+#include "fd-util.h"
+#include "homed-bus.h"
+#include "homed-home-bus.h"
+#include "homed-home.h"
+#include "strv.h"
+#include "user-record-util.h"
+#include "user-util.h"
+
+static int property_get_unix_record(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Home *h = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(h);
+
+ return sd_bus_message_append(
+ reply, "(suusss)",
+ h->user_name,
+ (uint32_t) h->uid,
+ h->record ? (uint32_t) user_record_gid(h->record) : GID_INVALID,
+ h->record ? user_record_real_name(h->record) : NULL,
+ h->record ? user_record_home_directory(h->record) : NULL,
+ h->record ? user_record_shell(h->record) : NULL);
+}
+
+static int property_get_state(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Home *h = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(h);
+
+ return sd_bus_message_append(reply, "s", home_state_to_string(home_get_state(h)));
+}
+
+int bus_home_client_is_trusted(Home *h, sd_bus_message *message) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ uid_t euid;
+ int r;
+
+ assert(h);
+
+ if (!message)
+ return -EINVAL;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_euid(creds, &euid);
+ if (r < 0)
+ return r;
+
+ return euid == 0 || h->uid == euid;
+}
+
+int bus_home_get_record_json(
+ Home *h,
+ sd_bus_message *message,
+ char **ret,
+ bool *ret_incomplete) {
+
+ _cleanup_(user_record_unrefp) UserRecord *augmented = NULL;
+ UserRecordLoadFlags flags;
+ int r, trusted;
+
+ assert(h);
+ assert(ret);
+
+ trusted = bus_home_client_is_trusted(h, message);
+ if (trusted < 0) {
+ log_warning_errno(trusted, "Failed to determine whether client is trusted, assuming untrusted.");
+ trusted = false;
+ }
+
+ flags = USER_RECORD_REQUIRE_REGULAR|USER_RECORD_ALLOW_PER_MACHINE|USER_RECORD_ALLOW_BINDING|USER_RECORD_STRIP_SECRET|USER_RECORD_ALLOW_STATUS|USER_RECORD_ALLOW_SIGNATURE;
+ if (trusted)
+ flags |= USER_RECORD_ALLOW_PRIVILEGED;
+ else
+ flags |= USER_RECORD_STRIP_PRIVILEGED;
+
+ r = home_augment_status(h, flags, &augmented);
+ if (r < 0)
+ return r;
+
+ r = json_variant_format(augmented->json, 0, ret);
+ if (r < 0)
+ return r;
+
+ if (ret_incomplete)
+ *ret_incomplete = augmented->incomplete;
+
+ return 0;
+}
+
+static int property_get_user_record(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *json = NULL;
+ Home *h = userdata;
+ bool incomplete;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(h);
+
+ r = bus_home_get_record_json(h, sd_bus_get_current_message(bus), &json, &incomplete);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_append(reply, "(sb)", json, incomplete);
+}
+
+int bus_home_method_activate(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_(user_record_unrefp) UserRecord *secret = NULL;
+ Home *h = userdata;
+ int r;
+
+ assert(message);
+ assert(h);
+
+ r = bus_message_read_secret(message, &secret, error);
+ if (r < 0)
+ return r;
+
+ r = home_activate(h, secret, error);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+ assert(!h->current_operation);
+
+ /* The operation is now in process, keep track of this message so that we can later reply to it. */
+ r = home_set_current_message(h, message);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int bus_home_method_deactivate(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Home *h = userdata;
+ int r;
+
+ assert(message);
+ assert(h);
+
+ r = home_deactivate(h, false, error);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+ assert(!h->current_operation);
+
+ r = home_set_current_message(h, message);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int bus_home_method_unregister(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Home *h = userdata;
+ int r;
+
+ assert(message);
+ assert(h);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.home1.remove-home",
+ NULL,
+ true,
+ UID_INVALID,
+ &h->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = home_unregister(h, error);
+ if (r < 0)
+ return r;
+
+ assert(r > 0);
+
+ /* Note that home_unregister() destroyed 'h' here, so no more accesses */
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_home_method_realize(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_(user_record_unrefp) UserRecord *secret = NULL;
+ Home *h = userdata;
+ int r;
+
+ assert(message);
+ assert(h);
+
+ r = bus_message_read_secret(message, &secret, error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.home1.create-home",
+ NULL,
+ true,
+ UID_INVALID,
+ &h->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = home_create(h, secret, error);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+ assert(!h->current_operation);
+
+ h->unregister_on_failure = false;
+
+ r = home_set_current_message(h, message);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int bus_home_method_remove(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Home *h = userdata;
+ int r;
+
+ assert(message);
+ assert(h);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.home1.remove-home",
+ NULL,
+ true,
+ UID_INVALID,
+ &h->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = home_remove(h, error);
+ if (r < 0)
+ return r;
+ if (r > 0) /* Done already. Note that home_remove() destroyed 'h' here, so no more accesses */
+ return sd_bus_reply_method_return(message, NULL);
+
+ assert(!h->current_operation);
+
+ r = home_set_current_message(h, message);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int bus_home_method_fixate(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_(user_record_unrefp) UserRecord *secret = NULL;
+ Home *h = userdata;
+ int r;
+
+ assert(message);
+ assert(h);
+
+ r = bus_message_read_secret(message, &secret, error);
+ if (r < 0)
+ return r;
+
+ r = home_fixate(h, secret, error);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+ assert(!h->current_operation);
+
+ r = home_set_current_message(h, message);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int bus_home_method_authenticate(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_(user_record_unrefp) UserRecord *secret = NULL;
+ Home *h = userdata;
+ int r;
+
+ assert(message);
+ assert(h);
+
+ r = bus_message_read_secret(message, &secret, error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.home1.authenticate-home",
+ NULL,
+ true,
+ h->uid,
+ &h->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = home_authenticate(h, secret, error);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+ assert(!h->current_operation);
+
+ r = home_set_current_message(h, message);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int bus_home_method_update_record(Home *h, sd_bus_message *message, UserRecord *hr, sd_bus_error *error) {
+ int r;
+
+ assert(h);
+ assert(message);
+ assert(hr);
+
+ r = user_record_is_supported(hr, error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.home1.update-home",
+ NULL,
+ true,
+ UID_INVALID,
+ &h->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = home_update(h, hr, error);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+ assert(!h->current_operation);
+
+ r = home_set_current_message(h, message);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int bus_home_method_update(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ Home *h = userdata;
+ int r;
+
+ assert(message);
+ assert(h);
+
+ r = bus_message_read_home_record(message, USER_RECORD_REQUIRE_REGULAR|USER_RECORD_REQUIRE_SECRET|USER_RECORD_ALLOW_PRIVILEGED|USER_RECORD_ALLOW_PER_MACHINE|USER_RECORD_ALLOW_SIGNATURE, &hr, error);
+ if (r < 0)
+ return r;
+
+ return bus_home_method_update_record(h, message, hr, error);
+}
+
+int bus_home_method_resize(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_(user_record_unrefp) UserRecord *secret = NULL;
+ Home *h = userdata;
+ uint64_t sz;
+ int r;
+
+ assert(message);
+ assert(h);
+
+ r = sd_bus_message_read(message, "t", &sz);
+ if (r < 0)
+ return r;
+
+ r = bus_message_read_secret(message, &secret, error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.home1.resize-home",
+ NULL,
+ true,
+ UID_INVALID,
+ &h->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = home_resize(h, sz, secret, error);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+ assert(!h->current_operation);
+
+ r = home_set_current_message(h, message);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int bus_home_method_change_password(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_(user_record_unrefp) UserRecord *new_secret = NULL, *old_secret = NULL;
+ Home *h = userdata;
+ int r;
+
+ assert(message);
+ assert(h);
+
+ r = bus_message_read_secret(message, &new_secret, error);
+ if (r < 0)
+ return r;
+
+ r = bus_message_read_secret(message, &old_secret, error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.home1.passwd-home",
+ NULL,
+ true,
+ h->uid,
+ &h->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = home_passwd(h, new_secret, old_secret, error);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+ assert(!h->current_operation);
+
+ r = home_set_current_message(h, message);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int bus_home_method_lock(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Home *h = userdata;
+ int r;
+
+ assert(message);
+ assert(h);
+
+ r = home_lock(h, error);
+ if (r < 0)
+ return r;
+ if (r > 0) /* Done */
+ return sd_bus_reply_method_return(message, NULL);
+
+ /* The operation is now in process, keep track of this message so that we can later reply to it. */
+ assert(!h->current_operation);
+
+ r = home_set_current_message(h, message);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int bus_home_method_unlock(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_(user_record_unrefp) UserRecord *secret = NULL;
+ Home *h = userdata;
+ int r;
+
+ assert(message);
+ assert(h);
+
+ r = bus_message_read_secret(message, &secret, error);
+ if (r < 0)
+ return r;
+
+ r = home_unlock(h, secret, error);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+ assert(!h->current_operation);
+
+ /* The operation is now in process, keep track of this message so that we can later reply to it. */
+ r = home_set_current_message(h, message);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int bus_home_method_acquire(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_(user_record_unrefp) UserRecord *secret = NULL;
+ _cleanup_(operation_unrefp) Operation *o = NULL;
+ _cleanup_close_ int fd = -1;
+ int r, please_suspend;
+ Home *h = userdata;
+
+ assert(message);
+ assert(h);
+
+ r = bus_message_read_secret(message, &secret, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "b", &please_suspend);
+ if (r < 0)
+ return r;
+
+ /* This operation might not be something we can executed immediately, hence queue it */
+ fd = home_create_fifo(h, please_suspend);
+ if (fd < 0)
+ return sd_bus_reply_method_errnof(message, fd, "Failed to allocate FIFO for %s: %m", h->user_name);
+
+ o = operation_new(OPERATION_ACQUIRE, message);
+ if (!o)
+ return -ENOMEM;
+
+ o->secret = TAKE_PTR(secret);
+ o->send_fd = TAKE_FD(fd);
+
+ r = home_schedule_operation(h, o, error);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int bus_home_method_ref(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_close_ int fd = -1;
+ Home *h = userdata;
+ HomeState state;
+ int please_suspend, r;
+
+ assert(message);
+ assert(h);
+
+ r = sd_bus_message_read(message, "b", &please_suspend);
+ if (r < 0)
+ return r;
+
+ state = home_get_state(h);
+ switch (state) {
+ case HOME_ABSENT:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_ABSENT, "Home %s is currently missing or not plugged in.", h->user_name);
+ case HOME_UNFIXATED:
+ case HOME_INACTIVE:
+ case HOME_DIRTY:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_NOT_ACTIVE, "Home %s not active.", h->user_name);
+ case HOME_LOCKED:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_LOCKED, "Home %s is currently locked.", h->user_name);
+ default:
+ if (HOME_STATE_IS_ACTIVE(state))
+ break;
+
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_BUSY, "An operation on home %s is currently being executed.", h->user_name);
+ }
+
+ fd = home_create_fifo(h, please_suspend);
+ if (fd < 0)
+ return sd_bus_reply_method_errnof(message, fd, "Failed to allocate FIFO for %s: %m", h->user_name);
+
+ return sd_bus_reply_method_return(message, "h", fd);
+}
+
+int bus_home_method_release(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_(operation_unrefp) Operation *o = NULL;
+ Home *h = userdata;
+ int r;
+
+ assert(message);
+ assert(h);
+
+ o = operation_new(OPERATION_RELEASE, message);
+ if (!o)
+ return -ENOMEM;
+
+ r = home_schedule_operation(h, o, error);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+/* We map a uid_t as uint32_t bus property, let's ensure this is safe. */
+assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+
+int bus_home_path(Home *h, char **ret) {
+ assert(ret);
+
+ return sd_bus_path_encode("/org/freedesktop/home1/home", h->user_name, ret);
+}
+
+static int bus_home_object_find(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ void *userdata,
+ void **found,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *e = NULL;
+ Manager *m = userdata;
+ uid_t uid;
+ Home *h;
+ int r;
+
+ r = sd_bus_path_decode(path, "/org/freedesktop/home1/home", &e);
+ if (r <= 0)
+ return 0;
+
+ if (parse_uid(e, &uid) >= 0)
+ h = hashmap_get(m->homes_by_uid, UID_TO_PTR(uid));
+ else
+ h = hashmap_get(m->homes_by_name, e);
+ if (!h)
+ return 0;
+
+ *found = h;
+ return 1;
+}
+
+static int bus_home_node_enumerator(
+ sd_bus *bus,
+ const char *path,
+ void *userdata,
+ char ***nodes,
+ sd_bus_error *error) {
+
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ size_t k = 0;
+ Home *h;
+ int r;
+
+ assert(nodes);
+
+ l = new0(char*, hashmap_size(m->homes_by_uid) + 1);
+ if (!l)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(h, m->homes_by_uid) {
+ r = bus_home_path(h, l + k);
+ if (r < 0)
+ return r;
+ }
+
+ *nodes = TAKE_PTR(l);
+ return 1;
+}
+
+const sd_bus_vtable home_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("UserName", "s",
+ NULL, offsetof(Home, user_name),
+ SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("UID", "u",
+ NULL, offsetof(Home, uid),
+ SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("UnixRecord", "(suusss)",
+ property_get_unix_record, 0,
+ SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("State", "s",
+ property_get_state, 0,
+ 0),
+ SD_BUS_PROPERTY("UserRecord", "(sb)",
+ property_get_user_record, 0,
+ SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION|SD_BUS_VTABLE_SENSITIVE),
+
+ SD_BUS_METHOD_WITH_NAMES("Activate",
+ "s",
+ SD_BUS_PARAM(secret),
+ NULL,,
+ bus_home_method_activate,
+ SD_BUS_VTABLE_SENSITIVE),
+ SD_BUS_METHOD("Deactivate", NULL, NULL, bus_home_method_deactivate, 0),
+ SD_BUS_METHOD("Unregister", NULL, NULL, bus_home_method_unregister, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("Realize",
+ "s",
+ SD_BUS_PARAM(secret),
+ NULL,,
+ bus_home_method_realize,
+ SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_SENSITIVE),
+
+ SD_BUS_METHOD("Remove", NULL, NULL, bus_home_method_remove, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("Fixate",
+ "s",
+ SD_BUS_PARAM(secret),
+ NULL,,
+ bus_home_method_fixate,
+ SD_BUS_VTABLE_SENSITIVE),
+ SD_BUS_METHOD_WITH_NAMES("Authenticate",
+ "s",
+ SD_BUS_PARAM(secret),
+ NULL,,
+ bus_home_method_authenticate,
+ SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_SENSITIVE),
+ SD_BUS_METHOD_WITH_NAMES("Update",
+ "s",
+ SD_BUS_PARAM(user_record),
+ NULL,,
+ bus_home_method_update,
+ SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_SENSITIVE),
+ SD_BUS_METHOD_WITH_NAMES("Resize",
+ "ts",
+ SD_BUS_PARAM(size)
+ SD_BUS_PARAM(secret),
+ NULL,,
+ bus_home_method_resize,
+ SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_SENSITIVE),
+ SD_BUS_METHOD_WITH_NAMES("ChangePassword",
+ "ss",
+ SD_BUS_PARAM(new_secret)
+ SD_BUS_PARAM(old_secret),
+ NULL,,
+ bus_home_method_change_password,
+ SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_SENSITIVE),
+ SD_BUS_METHOD("Lock", NULL, NULL, bus_home_method_lock, 0),
+ SD_BUS_METHOD_WITH_NAMES("Unlock",
+ "s",
+ SD_BUS_PARAM(secret),
+ NULL,,
+ bus_home_method_unlock,
+ SD_BUS_VTABLE_SENSITIVE),
+ SD_BUS_METHOD_WITH_NAMES("Acquire",
+ "sb",
+ SD_BUS_PARAM(secret)
+ SD_BUS_PARAM(please_suspend),
+ "h",
+ SD_BUS_PARAM(send_fd),
+ bus_home_method_acquire,
+ SD_BUS_VTABLE_SENSITIVE),
+ SD_BUS_METHOD_WITH_NAMES("Ref",
+ "b",
+ SD_BUS_PARAM(please_suspend),
+ "h",
+ SD_BUS_PARAM(send_fd),
+ bus_home_method_ref,
+ 0),
+ SD_BUS_METHOD("Release", NULL, NULL, bus_home_method_release, 0),
+ SD_BUS_VTABLE_END
+};
+
+const BusObjectImplementation home_object = {
+ "/org/freedesktop/home1/home",
+ "org.freedesktop.home1.Home",
+ .fallback_vtables = BUS_FALLBACK_VTABLES({home_vtable, bus_home_object_find}),
+ .node_enumerator = bus_home_node_enumerator,
+ .manager = true,
+};
+
+static int on_deferred_change(sd_event_source *s, void *userdata) {
+ _cleanup_free_ char *path = NULL;
+ Home *h = userdata;
+ int r;
+
+ assert(h);
+
+ h->deferred_change_event_source = sd_event_source_unref(h->deferred_change_event_source);
+
+ r = bus_home_path(h, &path);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to generate home bus path, ignoring: %m");
+ return 0;
+ }
+
+ if (h->announced)
+ r = sd_bus_emit_properties_changed_strv(h->manager->bus, path, "org.freedesktop.home1.Home", NULL);
+ else
+ r = sd_bus_emit_object_added(h->manager->bus, path);
+ if (r < 0)
+ log_warning_errno(r, "Failed to send home change event, ignoring: %m");
+ else
+ h->announced = true;
+
+ return 0;
+}
+
+int bus_home_emit_change(Home *h) {
+ int r;
+
+ assert(h);
+
+ if (h->deferred_change_event_source)
+ return 1;
+
+ if (!h->manager->event)
+ return 0;
+
+ if (IN_SET(sd_event_get_state(h->manager->event), SD_EVENT_FINISHED, SD_EVENT_EXITING))
+ return 0;
+
+ r = sd_event_add_defer(h->manager->event, &h->deferred_change_event_source, on_deferred_change, h);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate deferred change event source: %m");
+
+ r = sd_event_source_set_priority(h->deferred_change_event_source, SD_EVENT_PRIORITY_IDLE+5);
+ if (r < 0)
+ log_warning_errno(r, "Failed to tweak priority of event source, ignoring: %m");
+
+ (void) sd_event_source_set_description(h->deferred_change_event_source, "deferred-change-event");
+ return 1;
+}
+
+int bus_home_emit_remove(Home *h) {
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ assert(h);
+
+ if (!h->announced)
+ return 0;
+
+ r = bus_home_path(h, &path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_emit_object_removed(h->manager->bus, path);
+ if (r < 0)
+ return r;
+
+ h->announced = false;
+ return 1;
+}
diff --git a/src/home/homed-home-bus.h b/src/home/homed-home-bus.h
new file mode 100644
index 0000000..5522178
--- /dev/null
+++ b/src/home/homed-home-bus.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "bus-object.h"
+#include "homed-home.h"
+
+int bus_home_client_is_trusted(Home *h, sd_bus_message *message);
+int bus_home_get_record_json(Home *h, sd_bus_message *message, char **ret, bool *ret_incomplete);
+
+int bus_home_method_activate(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_home_method_deactivate(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_home_method_unregister(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_home_method_realize(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_home_method_remove(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_home_method_fixate(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_home_method_authenticate(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_home_method_update(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_home_method_update_record(Home *home, sd_bus_message *message, UserRecord *hr, sd_bus_error *error);
+int bus_home_method_resize(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_home_method_change_password(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_home_method_lock(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_home_method_unlock(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_home_method_acquire(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_home_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_home_method_release(sd_bus_message *message, void *userdata, sd_bus_error *error);
+
+extern const BusObjectImplementation home_object;
+
+int bus_home_path(Home *h, char **ret);
+
+int bus_home_emit_change(Home *h);
+int bus_home_emit_remove(Home *h);
diff --git a/src/home/homed-home.c b/src/home/homed-home.c
new file mode 100644
index 0000000..7f4532e
--- /dev/null
+++ b/src/home/homed-home.c
@@ -0,0 +1,2836 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_LINUX_MEMFD_H
+#include <linux/memfd.h>
+#endif
+
+#include <sys/mman.h>
+#include <sys/quota.h>
+#include <sys/vfs.h>
+
+#include "blockdev-util.h"
+#include "btrfs-util.h"
+#include "bus-common-errors.h"
+#include "env-util.h"
+#include "errno-list.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "home-util.h"
+#include "homed-home-bus.h"
+#include "homed-home.h"
+#include "missing_syscall.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "pwquality-util.h"
+#include "quota-util.h"
+#include "resize-fs.h"
+#include "set.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "strv.h"
+#include "user-record-sign.h"
+#include "user-record-util.h"
+#include "user-record-pwquality.h"
+#include "user-record.h"
+#include "user-util.h"
+
+#define HOME_USERS_MAX 500
+#define PENDING_OPERATIONS_MAX 100
+
+assert_cc(HOME_UID_MIN <= HOME_UID_MAX);
+assert_cc(HOME_USERS_MAX <= (HOME_UID_MAX - HOME_UID_MIN + 1));
+
+static int home_start_work(Home *h, const char *verb, UserRecord *hr, UserRecord *secret);
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(operation_hash_ops, void, trivial_hash_func, trivial_compare_func, Operation, operation_unref);
+
+static int suitable_home_record(UserRecord *hr) {
+ int r;
+
+ assert(hr);
+
+ if (!hr->user_name)
+ return -EUNATCH;
+
+ /* We are a bit more restrictive with what we accept as homed-managed user than what we accept in
+ * home records in general. Let's enforce the stricter rule here. */
+ if (!suitable_user_name(hr->user_name))
+ return -EINVAL;
+ if (!uid_is_valid(hr->uid))
+ return -EINVAL;
+
+ /* Insist we are outside of the dynamic and system range */
+ if (uid_is_system(hr->uid) || gid_is_system(user_record_gid(hr)) ||
+ uid_is_dynamic(hr->uid) || gid_is_dynamic(user_record_gid(hr)))
+ return -EADDRNOTAVAIL;
+
+ /* Insist that GID and UID match */
+ if (user_record_gid(hr) != (gid_t) hr->uid)
+ return -EBADSLT;
+
+ /* Similar for the realm */
+ if (hr->realm) {
+ r = suitable_realm(hr->realm);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int home_new(Manager *m, UserRecord *hr, const char *sysfs, Home **ret) {
+ _cleanup_(home_freep) Home *home = NULL;
+ _cleanup_free_ char *nm = NULL, *ns = NULL;
+ int r;
+
+ assert(m);
+ assert(hr);
+
+ r = suitable_home_record(hr);
+ if (r < 0)
+ return r;
+
+ if (hashmap_contains(m->homes_by_name, hr->user_name))
+ return -EBUSY;
+
+ if (hashmap_contains(m->homes_by_uid, UID_TO_PTR(hr->uid)))
+ return -EBUSY;
+
+ if (sysfs && hashmap_contains(m->homes_by_sysfs, sysfs))
+ return -EBUSY;
+
+ if (hashmap_size(m->homes_by_name) >= HOME_USERS_MAX)
+ return -EUSERS;
+
+ nm = strdup(hr->user_name);
+ if (!nm)
+ return -ENOMEM;
+
+ if (sysfs) {
+ ns = strdup(sysfs);
+ if (!ns)
+ return -ENOMEM;
+ }
+
+ home = new(Home, 1);
+ if (!home)
+ return -ENOMEM;
+
+ *home = (Home) {
+ .manager = m,
+ .user_name = TAKE_PTR(nm),
+ .uid = hr->uid,
+ .state = _HOME_STATE_INVALID,
+ .worker_stdout_fd = -1,
+ .sysfs = TAKE_PTR(ns),
+ .signed_locally = -1,
+ };
+
+ r = hashmap_put(m->homes_by_name, home->user_name, home);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(m->homes_by_uid, UID_TO_PTR(home->uid), home);
+ if (r < 0)
+ return r;
+
+ if (home->sysfs) {
+ r = hashmap_put(m->homes_by_sysfs, home->sysfs, home);
+ if (r < 0)
+ return r;
+ }
+
+ r = user_record_clone(hr, USER_RECORD_LOAD_MASK_SECRET, &home->record);
+ if (r < 0)
+ return r;
+
+ (void) bus_manager_emit_auto_login_changed(m);
+ (void) bus_home_emit_change(home);
+
+ if (ret)
+ *ret = TAKE_PTR(home);
+ else
+ TAKE_PTR(home);
+
+ return 0;
+}
+
+Home *home_free(Home *h) {
+
+ if (!h)
+ return NULL;
+
+ if (h->manager) {
+ (void) bus_home_emit_remove(h);
+ (void) bus_manager_emit_auto_login_changed(h->manager);
+
+ if (h->user_name)
+ (void) hashmap_remove_value(h->manager->homes_by_name, h->user_name, h);
+
+ if (uid_is_valid(h->uid))
+ (void) hashmap_remove_value(h->manager->homes_by_uid, UID_TO_PTR(h->uid), h);
+
+ if (h->sysfs)
+ (void) hashmap_remove_value(h->manager->homes_by_sysfs, h->sysfs, h);
+
+ if (h->worker_pid > 0)
+ (void) hashmap_remove_value(h->manager->homes_by_worker_pid, PID_TO_PTR(h->worker_pid), h);
+
+ if (h->manager->gc_focus == h)
+ h->manager->gc_focus = NULL;
+ }
+
+ user_record_unref(h->record);
+ user_record_unref(h->secret);
+
+ h->worker_event_source = sd_event_source_unref(h->worker_event_source);
+ safe_close(h->worker_stdout_fd);
+ free(h->user_name);
+ free(h->sysfs);
+
+ h->ref_event_source_please_suspend = sd_event_source_unref(h->ref_event_source_please_suspend);
+ h->ref_event_source_dont_suspend = sd_event_source_unref(h->ref_event_source_dont_suspend);
+
+ h->pending_operations = ordered_set_free(h->pending_operations);
+ h->pending_event_source = sd_event_source_unref(h->pending_event_source);
+ h->deferred_change_event_source = sd_event_source_unref(h->deferred_change_event_source);
+
+ h->current_operation = operation_unref(h->current_operation);
+
+ return mfree(h);
+}
+
+int home_set_record(Home *h, UserRecord *hr) {
+ _cleanup_(user_record_unrefp) UserRecord *new_hr = NULL;
+ Home *other;
+ int r;
+
+ assert(h);
+ assert(h->user_name);
+ assert(h->record);
+ assert(hr);
+
+ if (user_record_equal(h->record, hr))
+ return 0;
+
+ r = suitable_home_record(hr);
+ if (r < 0)
+ return r;
+
+ if (!user_record_compatible(h->record, hr))
+ return -EREMCHG;
+
+ if (!FLAGS_SET(hr->mask, USER_RECORD_REGULAR) ||
+ FLAGS_SET(hr->mask, USER_RECORD_SECRET))
+ return -EINVAL;
+
+ if (FLAGS_SET(h->record->mask, USER_RECORD_STATUS)) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+
+ /* Hmm, the existing record has status fields? If so, copy them over */
+
+ v = json_variant_ref(hr->json);
+ r = json_variant_set_field(&v, "status", json_variant_by_key(h->record->json, "status"));
+ if (r < 0)
+ return r;
+
+ new_hr = user_record_new();
+ if (!new_hr)
+ return -ENOMEM;
+
+ r = user_record_load(new_hr, v, USER_RECORD_LOAD_REFUSE_SECRET);
+ if (r < 0)
+ return r;
+
+ hr = new_hr;
+ }
+
+ other = hashmap_get(h->manager->homes_by_uid, UID_TO_PTR(hr->uid));
+ if (other && other != h)
+ return -EBUSY;
+
+ if (h->uid != hr->uid) {
+ r = hashmap_remove_and_replace(h->manager->homes_by_uid, UID_TO_PTR(h->uid), UID_TO_PTR(hr->uid), h);
+ if (r < 0)
+ return r;
+ }
+
+ user_record_unref(h->record);
+ h->record = user_record_ref(hr);
+ h->uid = h->record->uid;
+
+ /* The updated record might have a different autologin setting, trigger a PropertiesChanged event for it */
+ (void) bus_manager_emit_auto_login_changed(h->manager);
+ (void) bus_home_emit_change(h);
+
+ return 0;
+}
+
+int home_save_record(Home *h) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_free_ char *text = NULL;
+ const char *fn;
+ int r;
+
+ assert(h);
+
+ v = json_variant_ref(h->record->json);
+ r = json_variant_normalize(&v);
+ if (r < 0)
+ log_warning_errno(r, "User record could not be normalized.");
+
+ r = json_variant_format(v, JSON_FORMAT_PRETTY|JSON_FORMAT_NEWLINE, &text);
+ if (r < 0)
+ return r;
+
+ (void) mkdir("/var/lib/systemd/", 0755);
+ (void) mkdir("/var/lib/systemd/home/", 0700);
+
+ fn = strjoina("/var/lib/systemd/home/", h->user_name, ".identity");
+
+ r = write_string_file(fn, text, WRITE_STRING_FILE_ATOMIC|WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_MODE_0600|WRITE_STRING_FILE_SYNC);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int home_unlink_record(Home *h) {
+ const char *fn;
+
+ assert(h);
+
+ fn = strjoina("/var/lib/systemd/home/", h->user_name, ".identity");
+ if (unlink(fn) < 0 && errno != ENOENT)
+ return -errno;
+
+ fn = strjoina("/run/systemd/home/", h->user_name, ".ref");
+ if (unlink(fn) < 0 && errno != ENOENT)
+ return -errno;
+
+ return 0;
+}
+
+static void home_set_state(Home *h, HomeState state) {
+ HomeState old_state, new_state;
+
+ assert(h);
+
+ old_state = home_get_state(h);
+ h->state = state;
+ new_state = home_get_state(h); /* Query the new state, since the 'state' variable might be set to -1,
+ * in which case we synthesize an high-level state on demand */
+
+ log_info("%s: changing state %s → %s", h->user_name,
+ home_state_to_string(old_state),
+ home_state_to_string(new_state));
+
+ if (HOME_STATE_IS_EXECUTING_OPERATION(old_state) && !HOME_STATE_IS_EXECUTING_OPERATION(new_state)) {
+ /* If we just finished executing some operation, process the queue of pending operations. And
+ * enqueue it for GC too. */
+
+ home_schedule_operation(h, NULL, NULL);
+ manager_enqueue_gc(h->manager, h);
+ }
+}
+
+static int home_parse_worker_stdout(int _fd, UserRecord **ret) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_close_ int fd = _fd; /* take possession, even on failure */
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned line, column;
+ struct stat st;
+ int r;
+
+ if (fstat(fd, &st) < 0)
+ return log_error_errno(errno, "Failed to stat stdout fd: %m");
+
+ assert(S_ISREG(st.st_mode));
+
+ if (st.st_size == 0) { /* empty record */
+ *ret = NULL;
+ return 0;
+ }
+
+ if (lseek(fd, SEEK_SET, 0) == (off_t) -1)
+ return log_error_errno(errno, "Failed to seek to beginning of memfd: %m");
+
+ f = take_fdopen(&fd, "r");
+ if (!f)
+ return log_error_errno(errno, "Failed to reopen memfd: %m");
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *text = NULL;
+
+ r = read_full_stream(f, &text, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read from client: %m");
+
+ log_debug("Got from worker: %s", text);
+ rewind(f);
+ }
+
+ r = json_parse_file(f, "stdout", JSON_PARSE_SENSITIVE, &v, &line, &column);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse identity at %u:%u: %m", line, column);
+
+ hr = user_record_new();
+ if (!hr)
+ return log_oom();
+
+ r = user_record_load(hr, v, USER_RECORD_LOAD_REFUSE_SECRET);
+ if (r < 0)
+ return log_error_errno(r, "Failed to load home record identity: %m");
+
+ *ret = TAKE_PTR(hr);
+ return 1;
+}
+
+static int home_verify_user_record(Home *h, UserRecord *hr, bool *ret_signed_locally, sd_bus_error *ret_error) {
+ int is_signed;
+
+ assert(h);
+ assert(hr);
+ assert(ret_signed_locally);
+
+ is_signed = manager_verify_user_record(h->manager, hr);
+ switch (is_signed) {
+
+ case USER_RECORD_SIGNED_EXCLUSIVE:
+ log_info("Home %s is signed exclusively by our key, accepting.", hr->user_name);
+ *ret_signed_locally = true;
+ return 0;
+
+ case USER_RECORD_SIGNED:
+ log_info("Home %s is signed by our key (and others), accepting.", hr->user_name);
+ *ret_signed_locally = false;
+ return 0;
+
+ case USER_RECORD_FOREIGN:
+ log_info("Home %s is signed by foreign key we like, accepting.", hr->user_name);
+ *ret_signed_locally = false;
+ return 0;
+
+ case USER_RECORD_UNSIGNED:
+ sd_bus_error_setf(ret_error, BUS_ERROR_BAD_SIGNATURE, "User record %s is not signed at all, refusing.", hr->user_name);
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Home %s contains user record that is not signed at all, refusing.", hr->user_name);
+
+ case -ENOKEY:
+ sd_bus_error_setf(ret_error, BUS_ERROR_BAD_SIGNATURE, "User record %s is not signed by any known key, refusing.", hr->user_name);
+ return log_error_errno(is_signed, "Home %s contains user record that is not signed by any known key, refusing.", hr->user_name);
+
+ default:
+ assert(is_signed < 0);
+ return log_error_errno(is_signed, "Failed to verify signature on user record for %s, refusing fixation: %m", hr->user_name);
+ }
+}
+
+static int convert_worker_errno(Home *h, int e, sd_bus_error *error) {
+ /* Converts the error numbers the worker process returned into somewhat sensible dbus errors */
+
+ switch (e) {
+
+ case -EMSGSIZE:
+ return sd_bus_error_setf(error, BUS_ERROR_BAD_HOME_SIZE, "File systems of this type cannot be shrunk");
+ case -ETXTBSY:
+ return sd_bus_error_setf(error, BUS_ERROR_BAD_HOME_SIZE, "File systems of this type can only be shrunk offline");
+ case -ERANGE:
+ return sd_bus_error_setf(error, BUS_ERROR_BAD_HOME_SIZE, "File system size too small");
+ case -ENOLINK:
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "System does not support selected storage backend");
+ case -EPROTONOSUPPORT:
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "System does not support selected file system");
+ case -ENOTTY:
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Operation not supported on storage backend");
+ case -ESOCKTNOSUPPORT:
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Operation not supported on file system");
+ case -ENOKEY:
+ return sd_bus_error_setf(error, BUS_ERROR_BAD_PASSWORD, "Password for home %s is incorrect or not sufficient for authentication.", h->user_name);
+ case -EBADSLT:
+ return sd_bus_error_setf(error, BUS_ERROR_BAD_PASSWORD_AND_NO_TOKEN, "Password for home %s is incorrect or not sufficient, and configured security token not found either.", h->user_name);
+ case -EREMOTEIO:
+ return sd_bus_error_setf(error, BUS_ERROR_BAD_RECOVERY_KEY, "Recovery key for home %s is incorrect or not sufficient for authentication.", h->user_name);
+ case -ENOANO:
+ return sd_bus_error_setf(error, BUS_ERROR_TOKEN_PIN_NEEDED, "PIN for security token required.");
+ case -ERFKILL:
+ return sd_bus_error_setf(error, BUS_ERROR_TOKEN_PROTECTED_AUTHENTICATION_PATH_NEEDED, "Security token requires protected authentication path.");
+ case -EMEDIUMTYPE:
+ return sd_bus_error_setf(error, BUS_ERROR_TOKEN_USER_PRESENCE_NEEDED, "Security token requires user presence.");
+ case -ENOSTR:
+ return sd_bus_error_setf(error, BUS_ERROR_TOKEN_ACTION_TIMEOUT, "Token action timeout. (User was supposed to verify presence or similar, by interacting with the token, and didn't do that in time.)");
+ case -EOWNERDEAD:
+ return sd_bus_error_setf(error, BUS_ERROR_TOKEN_PIN_LOCKED, "PIN of security token locked.");
+ case -ENOLCK:
+ return sd_bus_error_setf(error, BUS_ERROR_TOKEN_BAD_PIN, "Bad PIN of security token.");
+ case -ETOOMANYREFS:
+ return sd_bus_error_setf(error, BUS_ERROR_TOKEN_BAD_PIN_FEW_TRIES_LEFT, "Bad PIN of security token, and only a few tries left.");
+ case -EUCLEAN:
+ return sd_bus_error_setf(error, BUS_ERROR_TOKEN_BAD_PIN_ONE_TRY_LEFT, "Bad PIN of security token, and only one try left.");
+ case -EBUSY:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_BUSY, "Home %s is currently being used, or an operation on home %s is currently being executed.", h->user_name, h->user_name);
+ case -ENOEXEC:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_NOT_ACTIVE, "Home %s is currently not active", h->user_name);
+ case -ENOSPC:
+ return sd_bus_error_setf(error, BUS_ERROR_NO_DISK_SPACE, "Not enough disk space for home %s", h->user_name);
+ case -EKEYREVOKED:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_CANT_AUTHENTICATE, "Home %s has no password or other authentication mechanism defined.", h->user_name);
+ }
+
+ return 0;
+}
+
+static void home_count_bad_authentication(Home *h, bool save) {
+ int r;
+
+ assert(h);
+
+ r = user_record_bad_authentication(h->record);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to increase bad authentication counter, ignoring: %m");
+ return;
+ }
+
+ if (save) {
+ r = home_save_record(h);
+ if (r < 0)
+ log_warning_errno(r, "Failed to write home record to disk, ignoring: %m");
+ }
+}
+
+static void home_fixate_finish(Home *h, int ret, UserRecord *hr) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(user_record_unrefp) UserRecord *secret = NULL;
+ bool signed_locally;
+ int r;
+
+ assert(h);
+ assert(IN_SET(h->state, HOME_FIXATING, HOME_FIXATING_FOR_ACTIVATION, HOME_FIXATING_FOR_ACQUIRE));
+
+ secret = TAKE_PTR(h->secret); /* Take possession */
+
+ if (ret < 0) {
+ if (ret == -ENOKEY)
+ (void) home_count_bad_authentication(h, false);
+
+ (void) convert_worker_errno(h, ret, &error);
+ r = log_error_errno(ret, "Fixation failed: %m");
+ goto fail;
+ }
+ if (!hr) {
+ r = log_error_errno(SYNTHETIC_ERRNO(EIO), "Did not receive user record from worker process, fixation failed.");
+ goto fail;
+ }
+
+ r = home_verify_user_record(h, hr, &signed_locally, &error);
+ if (r < 0)
+ goto fail;
+
+ r = home_set_record(h, hr);
+ if (r < 0) {
+ log_error_errno(r, "Failed to update home record: %m");
+ goto fail;
+ }
+
+ h->signed_locally = signed_locally;
+
+ /* When we finished fixating (and don't follow-up with activation), let's count this as good authentication */
+ if (h->state == HOME_FIXATING) {
+ r = user_record_good_authentication(h->record);
+ if (r < 0)
+ log_warning_errno(r, "Failed to increase good authentication counter, ignoring: %m");
+ }
+
+ r = home_save_record(h);
+ if (r < 0)
+ log_warning_errno(r, "Failed to write home record to disk, ignoring: %m");
+
+ if (IN_SET(h->state, HOME_FIXATING_FOR_ACTIVATION, HOME_FIXATING_FOR_ACQUIRE)) {
+
+ r = home_start_work(h, "activate", h->record, secret);
+ if (r < 0) {
+ h->current_operation = operation_result_unref(h->current_operation, r, NULL);
+ home_set_state(h, _HOME_STATE_INVALID);
+ } else
+ home_set_state(h, h->state == HOME_FIXATING_FOR_ACTIVATION ? HOME_ACTIVATING : HOME_ACTIVATING_FOR_ACQUIRE);
+
+ return;
+ }
+
+ log_debug("Fixation of %s completed.", h->user_name);
+
+ h->current_operation = operation_result_unref(h->current_operation, 0, NULL);
+
+ /* Reset the state to "invalid", which makes home_get_state() test if the image exists and returns
+ * HOME_ABSENT vs. HOME_INACTIVE as necessary. */
+ home_set_state(h, _HOME_STATE_INVALID);
+ return;
+
+fail:
+ /* If fixation fails, we stay in unfixated state! */
+ h->current_operation = operation_result_unref(h->current_operation, r, &error);
+ home_set_state(h, HOME_UNFIXATED);
+}
+
+static void home_activate_finish(Home *h, int ret, UserRecord *hr) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(h);
+ assert(IN_SET(h->state, HOME_ACTIVATING, HOME_ACTIVATING_FOR_ACQUIRE));
+
+ if (ret < 0) {
+ if (ret == -ENOKEY)
+ home_count_bad_authentication(h, true);
+
+ (void) convert_worker_errno(h, ret, &error);
+ r = log_error_errno(ret, "Activation failed: %m");
+ goto finish;
+ }
+
+ if (hr) {
+ bool signed_locally;
+
+ r = home_verify_user_record(h, hr, &signed_locally, &error);
+ if (r < 0)
+ goto finish;
+
+ r = home_set_record(h, hr);
+ if (r < 0) {
+ log_error_errno(r, "Failed to update home record, ignoring: %m");
+ goto finish;
+ }
+
+ h->signed_locally = signed_locally;
+
+ r = user_record_good_authentication(h->record);
+ if (r < 0)
+ log_warning_errno(r, "Failed to increase good authentication counter, ignoring: %m");
+
+ r = home_save_record(h);
+ if (r < 0)
+ log_warning_errno(r, "Failed to write home record to disk, ignoring: %m");
+ }
+
+ log_debug("Activation of %s completed.", h->user_name);
+ r = 0;
+
+finish:
+ h->current_operation = operation_result_unref(h->current_operation, r, &error);
+ home_set_state(h, _HOME_STATE_INVALID);
+}
+
+static void home_deactivate_finish(Home *h, int ret, UserRecord *hr) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(h);
+ assert(h->state == HOME_DEACTIVATING);
+ assert(!hr); /* We don't expect a record on this operation */
+
+ if (ret < 0) {
+ (void) convert_worker_errno(h, ret, &error);
+ r = log_error_errno(ret, "Deactivation of %s failed: %m", h->user_name);
+ goto finish;
+ }
+
+ log_debug("Deactivation of %s completed.", h->user_name);
+ r = 0;
+
+finish:
+ h->current_operation = operation_result_unref(h->current_operation, r, &error);
+ home_set_state(h, _HOME_STATE_INVALID);
+}
+
+static void home_remove_finish(Home *h, int ret, UserRecord *hr) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ Manager *m;
+ int r;
+
+ assert(h);
+ assert(h->state == HOME_REMOVING);
+ assert(!hr); /* We don't expect a record on this operation */
+
+ m = h->manager;
+
+ if (ret < 0 && ret != -EALREADY) {
+ (void) convert_worker_errno(h, ret, &error);
+ r = log_error_errno(ret, "Removing %s failed: %m", h->user_name);
+ goto fail;
+ }
+
+ /* For a couple of storage types we can't delete the actual data storage when called (such as LUKS on
+ * partitions like USB sticks, or so). Sometimes these storage locations are among those we normally
+ * automatically discover in /home or in udev. When such a home is deleted let's hence issue a rescan
+ * after completion, so that "unfixated" entries are rediscovered. */
+ if (!IN_SET(user_record_test_image_path(h->record), USER_TEST_UNDEFINED, USER_TEST_ABSENT))
+ manager_enqueue_rescan(m);
+
+ /* The image is now removed from disk. Now also remove our stored record */
+ r = home_unlink_record(h);
+ if (r < 0) {
+ log_error_errno(r, "Removing record file failed: %m");
+ goto fail;
+ }
+
+ log_debug("Removal of %s completed.", h->user_name);
+ h->current_operation = operation_result_unref(h->current_operation, 0, NULL);
+
+ /* Unload this record from memory too now. */
+ h = home_free(h);
+ return;
+
+fail:
+ h->current_operation = operation_result_unref(h->current_operation, r, &error);
+ home_set_state(h, _HOME_STATE_INVALID);
+}
+
+static void home_create_finish(Home *h, int ret, UserRecord *hr) {
+ int r;
+
+ assert(h);
+ assert(h->state == HOME_CREATING);
+
+ if (ret < 0) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ (void) convert_worker_errno(h, ret, &error);
+ log_error_errno(ret, "Operation on %s failed: %m", h->user_name);
+ h->current_operation = operation_result_unref(h->current_operation, ret, &error);
+
+ if (h->unregister_on_failure) {
+ (void) home_unlink_record(h);
+ h = home_free(h);
+ return;
+ }
+
+ home_set_state(h, _HOME_STATE_INVALID);
+ return;
+ }
+
+ if (hr) {
+ r = home_set_record(h, hr);
+ if (r < 0)
+ log_warning_errno(r, "Failed to update home record, ignoring: %m");
+ }
+
+ r = home_save_record(h);
+ if (r < 0)
+ log_warning_errno(r, "Failed to save record to disk, ignoring: %m");
+
+ log_debug("Creation of %s completed.", h->user_name);
+
+ h->current_operation = operation_result_unref(h->current_operation, 0, NULL);
+ home_set_state(h, _HOME_STATE_INVALID);
+}
+
+static void home_change_finish(Home *h, int ret, UserRecord *hr) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(h);
+
+ if (ret < 0) {
+ if (ret == -ENOKEY)
+ (void) home_count_bad_authentication(h, true);
+
+ (void) convert_worker_errno(h, ret, &error);
+ r = log_error_errno(ret, "Change operation failed: %m");
+ goto finish;
+ }
+
+ if (hr) {
+ r = home_set_record(h, hr);
+ if (r < 0)
+ log_warning_errno(r, "Failed to update home record, ignoring: %m");
+ else {
+ r = user_record_good_authentication(h->record);
+ if (r < 0)
+ log_warning_errno(r, "Failed to increase good authentication counter, ignoring: %m");
+
+ r = home_save_record(h);
+ if (r < 0)
+ log_warning_errno(r, "Failed to write home record to disk, ignoring: %m");
+ }
+ }
+
+ log_debug("Change operation of %s completed.", h->user_name);
+ r = 0;
+
+finish:
+ h->current_operation = operation_result_unref(h->current_operation, r, &error);
+ home_set_state(h, _HOME_STATE_INVALID);
+}
+
+static void home_locking_finish(Home *h, int ret, UserRecord *hr) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(h);
+ assert(h->state == HOME_LOCKING);
+
+ if (ret < 0) {
+ (void) convert_worker_errno(h, ret, &error);
+ r = log_error_errno(ret, "Locking operation failed: %m");
+ goto finish;
+ }
+
+ log_debug("Locking operation of %s completed.", h->user_name);
+ h->current_operation = operation_result_unref(h->current_operation, 0, NULL);
+ home_set_state(h, HOME_LOCKED);
+ return;
+
+finish:
+ /* If a specific home doesn't know the concept of locking, then that's totally OK, don't propagate
+ * the error if we are executing a LockAllHomes() operation. */
+
+ if (h->current_operation->type == OPERATION_LOCK_ALL && r == -ENOTTY)
+ h->current_operation = operation_result_unref(h->current_operation, 0, NULL);
+ else
+ h->current_operation = operation_result_unref(h->current_operation, r, &error);
+
+ home_set_state(h, _HOME_STATE_INVALID);
+}
+
+static void home_unlocking_finish(Home *h, int ret, UserRecord *hr) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(h);
+ assert(IN_SET(h->state, HOME_UNLOCKING, HOME_UNLOCKING_FOR_ACQUIRE));
+
+ if (ret < 0) {
+ if (ret == -ENOKEY)
+ (void) home_count_bad_authentication(h, true);
+
+ (void) convert_worker_errno(h, ret, &error);
+ r = log_error_errno(ret, "Unlocking operation failed: %m");
+
+ /* Revert to locked state */
+ home_set_state(h, HOME_LOCKED);
+ h->current_operation = operation_result_unref(h->current_operation, r, &error);
+ return;
+ }
+
+ r = user_record_good_authentication(h->record);
+ if (r < 0)
+ log_warning_errno(r, "Failed to increase good authentication counter, ignoring: %m");
+ else {
+ r = home_save_record(h);
+ if (r < 0)
+ log_warning_errno(r, "Failed to write home record to disk, ignoring: %m");
+ }
+
+ log_debug("Unlocking operation of %s completed.", h->user_name);
+
+ h->current_operation = operation_result_unref(h->current_operation, r, &error);
+ home_set_state(h, _HOME_STATE_INVALID);
+ return;
+}
+
+static void home_authenticating_finish(Home *h, int ret, UserRecord *hr) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(h);
+ assert(IN_SET(h->state, HOME_AUTHENTICATING, HOME_AUTHENTICATING_WHILE_ACTIVE, HOME_AUTHENTICATING_FOR_ACQUIRE));
+
+ if (ret < 0) {
+ if (ret == -ENOKEY)
+ (void) home_count_bad_authentication(h, true);
+
+ (void) convert_worker_errno(h, ret, &error);
+ r = log_error_errno(ret, "Authentication failed: %m");
+ goto finish;
+ }
+
+ if (hr) {
+ r = home_set_record(h, hr);
+ if (r < 0)
+ log_warning_errno(r, "Failed to update home record, ignoring: %m");
+ else {
+ r = user_record_good_authentication(h->record);
+ if (r < 0)
+ log_warning_errno(r, "Failed to increase good authentication counter, ignoring: %m");
+
+ r = home_save_record(h);
+ if (r < 0)
+ log_warning_errno(r, "Failed to write home record to disk, ignoring: %m");
+ }
+ }
+
+ log_debug("Authentication of %s completed.", h->user_name);
+ r = 0;
+
+finish:
+ h->current_operation = operation_result_unref(h->current_operation, r, &error);
+ home_set_state(h, _HOME_STATE_INVALID);
+}
+
+static int home_on_worker_process(sd_event_source *s, const siginfo_t *si, void *userdata) {
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ Home *h = userdata;
+ int ret;
+
+ assert(s);
+ assert(si);
+ assert(h);
+
+ assert(h->worker_pid == si->si_pid);
+ assert(h->worker_event_source);
+ assert(h->worker_stdout_fd >= 0);
+
+ (void) hashmap_remove_value(h->manager->homes_by_worker_pid, PID_TO_PTR(h->worker_pid), h);
+
+ h->worker_pid = 0;
+ h->worker_event_source = sd_event_source_unref(h->worker_event_source);
+
+ if (si->si_code != CLD_EXITED) {
+ assert(IN_SET(si->si_code, CLD_KILLED, CLD_DUMPED));
+ ret = log_debug_errno(SYNTHETIC_ERRNO(EPROTO), "Worker process died abnormally with signal %s.", signal_to_string(si->si_status));
+ } else if (si->si_status != EXIT_SUCCESS) {
+ /* If we received an error code via sd_notify(), use it */
+ if (h->worker_error_code != 0)
+ ret = log_debug_errno(h->worker_error_code, "Worker reported error code %s.", errno_to_name(h->worker_error_code));
+ else
+ ret = log_debug_errno(SYNTHETIC_ERRNO(EPROTO), "Worker exited with exit code %i.", si->si_status);
+ } else
+ ret = home_parse_worker_stdout(TAKE_FD(h->worker_stdout_fd), &hr);
+
+ h->worker_stdout_fd = safe_close(h->worker_stdout_fd);
+
+ switch (h->state) {
+
+ case HOME_FIXATING:
+ case HOME_FIXATING_FOR_ACTIVATION:
+ case HOME_FIXATING_FOR_ACQUIRE:
+ home_fixate_finish(h, ret, hr);
+ break;
+
+ case HOME_ACTIVATING:
+ case HOME_ACTIVATING_FOR_ACQUIRE:
+ home_activate_finish(h, ret, hr);
+ break;
+
+ case HOME_DEACTIVATING:
+ home_deactivate_finish(h, ret, hr);
+ break;
+
+ case HOME_LOCKING:
+ home_locking_finish(h, ret, hr);
+ break;
+
+ case HOME_UNLOCKING:
+ case HOME_UNLOCKING_FOR_ACQUIRE:
+ home_unlocking_finish(h, ret, hr);
+ break;
+
+ case HOME_CREATING:
+ home_create_finish(h, ret, hr);
+ break;
+
+ case HOME_REMOVING:
+ home_remove_finish(h, ret, hr);
+ break;
+
+ case HOME_UPDATING:
+ case HOME_UPDATING_WHILE_ACTIVE:
+ case HOME_RESIZING:
+ case HOME_RESIZING_WHILE_ACTIVE:
+ case HOME_PASSWD:
+ case HOME_PASSWD_WHILE_ACTIVE:
+ home_change_finish(h, ret, hr);
+ break;
+
+ case HOME_AUTHENTICATING:
+ case HOME_AUTHENTICATING_WHILE_ACTIVE:
+ case HOME_AUTHENTICATING_FOR_ACQUIRE:
+ home_authenticating_finish(h, ret, hr);
+ break;
+
+ default:
+ assert_not_reached("Unexpected state after worker exited");
+ }
+
+ return 0;
+}
+
+static int home_start_work(Home *h, const char *verb, UserRecord *hr, UserRecord *secret) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_(erase_and_freep) char *formatted = NULL;
+ _cleanup_close_ int stdin_fd = -1, stdout_fd = -1;
+ pid_t pid = 0;
+ int r;
+
+ assert(h);
+ assert(verb);
+ assert(hr);
+
+ if (h->worker_pid != 0)
+ return -EBUSY;
+
+ assert(h->worker_stdout_fd < 0);
+ assert(!h->worker_event_source);
+
+ v = json_variant_ref(hr->json);
+
+ if (secret) {
+ JsonVariant *sub = NULL;
+
+ sub = json_variant_by_key(secret->json, "secret");
+ if (!sub)
+ return -ENOKEY;
+
+ r = json_variant_set_field(&v, "secret", sub);
+ if (r < 0)
+ return r;
+ }
+
+ r = json_variant_format(v, 0, &formatted);
+ if (r < 0)
+ return r;
+
+ stdin_fd = acquire_data_fd(formatted, strlen(formatted), 0);
+ if (stdin_fd < 0)
+ return stdin_fd;
+
+ log_debug("Sending to worker: %s", formatted);
+
+ stdout_fd = memfd_create("homework-stdout", MFD_CLOEXEC);
+ if (stdout_fd < 0)
+ return -errno;
+
+ r = safe_fork_full("(sd-homework)",
+ (int[]) { stdin_fd, stdout_fd }, 2,
+ FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ const char *homework, *suffix, *unix_path;
+
+ /* Child */
+
+ suffix = getenv("SYSTEMD_HOME_DEBUG_SUFFIX");
+ if (suffix)
+ unix_path = strjoina("/run/systemd/home/notify.", suffix);
+ else
+ unix_path = "/run/systemd/home/notify";
+
+ if (setenv("NOTIFY_SOCKET", unix_path, 1) < 0) {
+ log_error_errno(errno, "Failed to set $NOTIFY_SOCKET: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (h->manager->default_storage >= 0)
+ if (setenv("SYSTEMD_HOME_DEFAULT_STORAGE", user_storage_to_string(h->manager->default_storage), 1) < 0) {
+ log_error_errno(errno, "Failed to set $SYSTEMD_HOME_DEFAULT_STORAGE: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (h->manager->default_file_system_type)
+ if (setenv("SYSTEMD_HOME_DEFAULT_FILE_SYSTEM_TYPE", h->manager->default_file_system_type, 1) < 0) {
+ log_error_errno(errno, "Failed to set $SYSTEMD_HOME_DEFAULT_FILE_SYSTEM_TYPE: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ r = rearrange_stdio(stdin_fd, stdout_fd, STDERR_FILENO);
+ if (r < 0) {
+ log_error_errno(r, "Failed to rearrange stdin/stdout/stderr: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ stdin_fd = stdout_fd = -1; /* have been invalidated by rearrange_stdio() */
+
+ /* Allow overriding the homework path via an environment variable, to make debugging
+ * easier. */
+ homework = getenv("SYSTEMD_HOMEWORK_PATH") ?: SYSTEMD_HOMEWORK_PATH;
+
+ execl(homework, homework, verb, NULL);
+ log_error_errno(errno, "Failed to invoke %s: %m", homework);
+ _exit(EXIT_FAILURE);
+ }
+
+ r = sd_event_add_child(h->manager->event, &h->worker_event_source, pid, WEXITED, home_on_worker_process, h);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(h->worker_event_source, "worker");
+
+ r = hashmap_put(h->manager->homes_by_worker_pid, PID_TO_PTR(pid), h);
+ if (r < 0) {
+ h->worker_event_source = sd_event_source_unref(h->worker_event_source);
+ return r;
+ }
+
+ h->worker_stdout_fd = TAKE_FD(stdout_fd);
+ h->worker_pid = pid;
+ h->worker_error_code = 0;
+
+ return 0;
+}
+
+static int home_ratelimit(Home *h, sd_bus_error *error) {
+ int r, ret;
+
+ assert(h);
+
+ ret = user_record_ratelimit(h->record);
+ if (ret < 0)
+ return ret;
+
+ if (h->state != HOME_UNFIXATED) {
+ r = home_save_record(h);
+ if (r < 0)
+ log_warning_errno(r, "Failed to save updated record, ignoring: %m");
+ }
+
+ if (ret == 0) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ usec_t t, n;
+
+ n = now(CLOCK_REALTIME);
+ t = user_record_ratelimit_next_try(h->record);
+
+ if (t != USEC_INFINITY && t > n)
+ return sd_bus_error_setf(error, BUS_ERROR_AUTHENTICATION_LIMIT_HIT, "Too many login attempts, please try again in %s!",
+ format_timespan(buf, sizeof(buf), t - n, USEC_PER_SEC));
+
+ return sd_bus_error_setf(error, BUS_ERROR_AUTHENTICATION_LIMIT_HIT, "Too many login attempts, please try again later.");
+ }
+
+ return 0;
+}
+
+static int home_fixate_internal(
+ Home *h,
+ UserRecord *secret,
+ HomeState for_state,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(h);
+ assert(IN_SET(for_state, HOME_FIXATING, HOME_FIXATING_FOR_ACTIVATION, HOME_FIXATING_FOR_ACQUIRE));
+
+ r = home_start_work(h, "inspect", h->record, secret);
+ if (r < 0)
+ return r;
+
+ if (IN_SET(for_state, HOME_FIXATING_FOR_ACTIVATION, HOME_FIXATING_FOR_ACQUIRE)) {
+ /* Remember the secret data, since we need it for the activation again, later on. */
+ user_record_unref(h->secret);
+ h->secret = user_record_ref(secret);
+ }
+
+ home_set_state(h, for_state);
+ return 0;
+}
+
+int home_fixate(Home *h, UserRecord *secret, sd_bus_error *error) {
+ int r;
+
+ assert(h);
+
+ switch (home_get_state(h)) {
+ case HOME_ABSENT:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_ABSENT, "Home %s is currently missing or not plugged in.", h->user_name);
+ case HOME_INACTIVE:
+ case HOME_DIRTY:
+ case HOME_ACTIVE:
+ case HOME_LOCKED:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_ALREADY_FIXATED, "Home %s is already fixated.", h->user_name);
+ case HOME_UNFIXATED:
+ break;
+ default:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_BUSY, "An operation on home %s is currently being executed.", h->user_name);
+ }
+
+ r = home_ratelimit(h, error);
+ if (r < 0)
+ return r;
+
+ return home_fixate_internal(h, secret, HOME_FIXATING, error);
+}
+
+static int home_activate_internal(Home *h, UserRecord *secret, HomeState for_state, sd_bus_error *error) {
+ int r;
+
+ assert(h);
+ assert(IN_SET(for_state, HOME_ACTIVATING, HOME_ACTIVATING_FOR_ACQUIRE));
+
+ r = home_start_work(h, "activate", h->record, secret);
+ if (r < 0)
+ return r;
+
+ home_set_state(h, for_state);
+ return 0;
+}
+
+int home_activate(Home *h, UserRecord *secret, sd_bus_error *error) {
+ int r;
+
+ assert(h);
+
+ switch (home_get_state(h)) {
+ case HOME_UNFIXATED:
+ return home_fixate_internal(h, secret, HOME_FIXATING_FOR_ACTIVATION, error);
+ case HOME_ABSENT:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_ABSENT, "Home %s is currently missing or not plugged in.", h->user_name);
+ case HOME_ACTIVE:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_ALREADY_ACTIVE, "Home %s is already active.", h->user_name);
+ case HOME_LOCKED:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_LOCKED, "Home %s is currently locked.", h->user_name);
+ case HOME_INACTIVE:
+ case HOME_DIRTY:
+ break;
+ default:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_BUSY, "An operation on home %s is currently being executed.", h->user_name);
+ }
+
+ r = home_ratelimit(h, error);
+ if (r < 0)
+ return r;
+
+ return home_activate_internal(h, secret, HOME_ACTIVATING, error);
+}
+
+static int home_authenticate_internal(Home *h, UserRecord *secret, HomeState for_state, sd_bus_error *error) {
+ int r;
+
+ assert(h);
+ assert(IN_SET(for_state, HOME_AUTHENTICATING, HOME_AUTHENTICATING_WHILE_ACTIVE, HOME_AUTHENTICATING_FOR_ACQUIRE));
+
+ r = home_start_work(h, "inspect", h->record, secret);
+ if (r < 0)
+ return r;
+
+ home_set_state(h, for_state);
+ return 0;
+}
+
+int home_authenticate(Home *h, UserRecord *secret, sd_bus_error *error) {
+ HomeState state;
+ int r;
+
+ assert(h);
+
+ state = home_get_state(h);
+ switch (state) {
+ case HOME_ABSENT:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_ABSENT, "Home %s is currently missing or not plugged in.", h->user_name);
+ case HOME_LOCKED:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_LOCKED, "Home %s is currently locked.", h->user_name);
+ case HOME_UNFIXATED:
+ case HOME_INACTIVE:
+ case HOME_DIRTY:
+ case HOME_ACTIVE:
+ break;
+ default:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_BUSY, "An operation on home %s is currently being executed.", h->user_name);
+ }
+
+ r = home_ratelimit(h, error);
+ if (r < 0)
+ return r;
+
+ return home_authenticate_internal(h, secret, state == HOME_ACTIVE ? HOME_AUTHENTICATING_WHILE_ACTIVE : HOME_AUTHENTICATING, error);
+}
+
+static int home_deactivate_internal(Home *h, bool force, sd_bus_error *error) {
+ int r;
+
+ assert(h);
+
+ r = home_start_work(h, force ? "deactivate-force" : "deactivate", h->record, NULL);
+ if (r < 0)
+ return r;
+
+ home_set_state(h, HOME_DEACTIVATING);
+ return 0;
+}
+
+int home_deactivate(Home *h, bool force, sd_bus_error *error) {
+ assert(h);
+
+ switch (home_get_state(h)) {
+ case HOME_UNFIXATED:
+ case HOME_ABSENT:
+ case HOME_INACTIVE:
+ case HOME_DIRTY:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_NOT_ACTIVE, "Home %s not active.", h->user_name);
+ case HOME_LOCKED:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_LOCKED, "Home %s is currently locked.", h->user_name);
+ case HOME_ACTIVE:
+ break;
+ default:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_BUSY, "An operation on home %s is currently being executed.", h->user_name);
+ }
+
+ return home_deactivate_internal(h, force, error);
+}
+
+int home_create(Home *h, UserRecord *secret, sd_bus_error *error) {
+ int r;
+
+ assert(h);
+
+ switch (home_get_state(h)) {
+ case HOME_INACTIVE: {
+ int t;
+
+ if (h->record->storage < 0)
+ break; /* if no storage is defined we don't know what precisely to look for, hence
+ * HOME_INACTIVE is OK in that case too. */
+
+ t = user_record_test_image_path(h->record);
+ if (IN_SET(t, USER_TEST_MAYBE, USER_TEST_UNDEFINED))
+ break; /* And if the image path test isn't conclusive, let's also go on */
+
+ if (IN_SET(t, -EBADFD, -ENOTDIR))
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_EXISTS, "Selected home image of user %s already exists or has wrong inode type.", h->user_name);
+
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_EXISTS, "Selected home image of user %s already exists.", h->user_name);
+ }
+ case HOME_UNFIXATED:
+ case HOME_DIRTY:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_EXISTS, "Home of user %s already exists.", h->user_name);
+ case HOME_ABSENT:
+ break;
+ case HOME_ACTIVE:
+ case HOME_LOCKED:
+ default:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_BUSY, "Home %s is currently being used, or an operation on home %s is currently being executed.", h->user_name, h->user_name);
+ }
+
+ if (h->record->enforce_password_policy == false)
+ log_debug("Password quality check turned off for account, skipping.");
+ else {
+ r = user_record_quality_check_password(h->record, secret, error);
+ if (r < 0)
+ return r;
+ }
+
+ r = home_start_work(h, "create", h->record, secret);
+ if (r < 0)
+ return r;
+
+ home_set_state(h, HOME_CREATING);
+ return 0;
+}
+
+int home_remove(Home *h, sd_bus_error *error) {
+ HomeState state;
+ int r;
+
+ assert(h);
+
+ state = home_get_state(h);
+ switch (state) {
+ case HOME_ABSENT: /* If the home directory is absent, then this is just like unregistering */
+ return home_unregister(h, error);
+ case HOME_LOCKED:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_LOCKED, "Home %s is currently locked.", h->user_name);
+ case HOME_UNFIXATED:
+ case HOME_INACTIVE:
+ case HOME_DIRTY:
+ break;
+ case HOME_ACTIVE:
+ default:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_BUSY, "Home %s is currently being used, or an operation on home %s is currently being executed.", h->user_name, h->user_name);
+ }
+
+ r = home_start_work(h, "remove", h->record, NULL);
+ if (r < 0)
+ return r;
+
+ home_set_state(h, HOME_REMOVING);
+ return 0;
+}
+
+static int user_record_extend_with_binding(UserRecord *hr, UserRecord *with_binding, UserRecordLoadFlags flags, UserRecord **ret) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *nr = NULL;
+ JsonVariant *binding;
+ int r;
+
+ assert(hr);
+ assert(with_binding);
+ assert(ret);
+
+ assert_se(v = json_variant_ref(hr->json));
+
+ binding = json_variant_by_key(with_binding->json, "binding");
+ if (binding) {
+ r = json_variant_set_field(&v, "binding", binding);
+ if (r < 0)
+ return r;
+ }
+
+ nr = user_record_new();
+ if (!nr)
+ return -ENOMEM;
+
+ r = user_record_load(nr, v, flags);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(nr);
+ return 0;
+}
+
+static int home_update_internal(
+ Home *h,
+ const char *verb,
+ UserRecord *hr,
+ UserRecord *secret,
+ sd_bus_error *error) {
+
+ _cleanup_(user_record_unrefp) UserRecord *new_hr = NULL, *saved_secret = NULL, *signed_hr = NULL;
+ int r, c;
+
+ assert(h);
+ assert(verb);
+ assert(hr);
+
+ if (!user_record_compatible(hr, h->record))
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_RECORD_MISMATCH, "Updated user record is not compatible with existing one.");
+ c = user_record_compare_last_change(hr, h->record); /* refuse downgrades */
+ if (c < 0)
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_RECORD_DOWNGRADE, "Refusing to update to older home record.");
+
+ if (!secret && FLAGS_SET(hr->mask, USER_RECORD_SECRET)) {
+ r = user_record_clone(hr, USER_RECORD_EXTRACT_SECRET, &saved_secret);
+ if (r < 0)
+ return r;
+
+ secret = saved_secret;
+ }
+
+ r = manager_verify_user_record(h->manager, hr);
+ switch (r) {
+
+ case USER_RECORD_UNSIGNED:
+ if (h->signed_locally <= 0) /* If the existing record is not owned by us, don't accept an
+ * unsigned new record. i.e. only implicitly sign new records
+ * that where previously signed by us too. */
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_RECORD_SIGNED, "Home %s is signed and cannot be modified locally.", h->user_name);
+
+ /* The updated record is not signed, then do so now */
+ r = manager_sign_user_record(h->manager, hr, &signed_hr, error);
+ if (r < 0)
+ return r;
+
+ hr = signed_hr;
+ break;
+
+ case USER_RECORD_SIGNED_EXCLUSIVE:
+ case USER_RECORD_SIGNED:
+ case USER_RECORD_FOREIGN:
+ /* Has already been signed. Great! */
+ break;
+
+ case -ENOKEY:
+ default:
+ return r;
+ }
+
+ r = user_record_extend_with_binding(hr, h->record, USER_RECORD_LOAD_MASK_SECRET, &new_hr);
+ if (r < 0)
+ return r;
+
+ if (c == 0) {
+ /* different payload but same lastChangeUSec field? That's not cool! */
+
+ r = user_record_masked_equal(new_hr, h->record, USER_RECORD_REGULAR|USER_RECORD_PRIVILEGED|USER_RECORD_PER_MACHINE);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_RECORD_MISMATCH, "Home record different but timestamp remained the same, refusing.");
+ }
+
+ r = home_start_work(h, verb, new_hr, secret);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int home_update(Home *h, UserRecord *hr, sd_bus_error *error) {
+ HomeState state;
+ int r;
+
+ assert(h);
+ assert(hr);
+
+ state = home_get_state(h);
+ switch (state) {
+ case HOME_UNFIXATED:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_UNFIXATED, "Home %s has not been fixated yet.", h->user_name);
+ case HOME_ABSENT:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_ABSENT, "Home %s is currently missing or not plugged in.", h->user_name);
+ case HOME_LOCKED:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_LOCKED, "Home %s is currently locked.", h->user_name);
+ case HOME_INACTIVE:
+ case HOME_DIRTY:
+ case HOME_ACTIVE:
+ break;
+ default:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_BUSY, "An operation on home %s is currently being executed.", h->user_name);
+ }
+
+ r = home_ratelimit(h, error);
+ if (r < 0)
+ return r;
+
+ r = home_update_internal(h, "update", hr, NULL, error);
+ if (r < 0)
+ return r;
+
+ home_set_state(h, state == HOME_ACTIVE ? HOME_UPDATING_WHILE_ACTIVE : HOME_UPDATING);
+ return 0;
+}
+
+int home_resize(Home *h, uint64_t disk_size, UserRecord *secret, sd_bus_error *error) {
+ _cleanup_(user_record_unrefp) UserRecord *c = NULL;
+ HomeState state;
+ int r;
+
+ assert(h);
+
+ state = home_get_state(h);
+ switch (state) {
+ case HOME_UNFIXATED:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_UNFIXATED, "Home %s has not been fixated yet.", h->user_name);
+ case HOME_ABSENT:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_ABSENT, "Home %s is currently missing or not plugged in.", h->user_name);
+ case HOME_LOCKED:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_LOCKED, "Home %s is currently locked.", h->user_name);
+ case HOME_INACTIVE:
+ case HOME_DIRTY:
+ case HOME_ACTIVE:
+ break;
+ default:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_BUSY, "An operation on home %s is currently being executed.", h->user_name);
+ }
+
+ r = home_ratelimit(h, error);
+ if (r < 0)
+ return r;
+
+ if (disk_size == UINT64_MAX || disk_size == h->record->disk_size) {
+ if (h->record->disk_size == UINT64_MAX)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "No disk size to resize to specified.");
+
+ c = user_record_ref(h->record); /* Shortcut if size is unspecified or matches the record */
+ } else {
+ _cleanup_(user_record_unrefp) UserRecord *signed_c = NULL;
+
+ if (h->signed_locally <= 0) /* Don't allow changing of records not signed only by us */
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_RECORD_SIGNED, "Home %s is signed and cannot be modified locally.", h->user_name);
+
+ r = user_record_clone(h->record, USER_RECORD_LOAD_REFUSE_SECRET, &c);
+ if (r < 0)
+ return r;
+
+ r = user_record_set_disk_size(c, disk_size);
+ if (r == -ERANGE)
+ return sd_bus_error_setf(error, BUS_ERROR_BAD_HOME_SIZE, "Requested size for home %s out of acceptable range.", h->user_name);
+ if (r < 0)
+ return r;
+
+ r = user_record_update_last_changed(c, false);
+ if (r == -ECHRNG)
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_RECORD_MISMATCH, "Record last change time of %s is newer than current time, cannot update.", h->user_name);
+ if (r < 0)
+ return r;
+
+ r = manager_sign_user_record(h->manager, c, &signed_c, error);
+ if (r < 0)
+ return r;
+
+ user_record_unref(c);
+ c = TAKE_PTR(signed_c);
+ }
+
+ r = home_update_internal(h, "resize", c, secret, error);
+ if (r < 0)
+ return r;
+
+ home_set_state(h, state == HOME_ACTIVE ? HOME_RESIZING_WHILE_ACTIVE : HOME_RESIZING);
+ return 0;
+}
+
+static int home_may_change_password(
+ Home *h,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(h);
+
+ r = user_record_test_password_change_required(h->record);
+ if (IN_SET(r, -EKEYREVOKED, -EOWNERDEAD, -EKEYEXPIRED, -ESTALE))
+ return 0; /* expired in some form, but changing is allowed */
+ if (IN_SET(r, -EKEYREJECTED, -EROFS))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Expiration settings of account %s do not allow changing of password.", h->user_name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to test password expiry: %m");
+
+ return 0; /* not expired */
+}
+
+int home_passwd(Home *h,
+ UserRecord *new_secret,
+ UserRecord *old_secret,
+ sd_bus_error *error) {
+
+ _cleanup_(user_record_unrefp) UserRecord *c = NULL, *merged_secret = NULL, *signed_c = NULL;
+ HomeState state;
+ int r;
+
+ assert(h);
+
+ if (h->signed_locally <= 0) /* Don't allow changing of records not signed only by us */
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_RECORD_SIGNED, "Home %s is signed and cannot be modified locally.", h->user_name);
+
+ state = home_get_state(h);
+ switch (state) {
+ case HOME_UNFIXATED:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_UNFIXATED, "Home %s has not been fixated yet.", h->user_name);
+ case HOME_ABSENT:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_ABSENT, "Home %s is currently missing or not plugged in.", h->user_name);
+ case HOME_LOCKED:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_LOCKED, "Home %s is currently locked.", h->user_name);
+ case HOME_INACTIVE:
+ case HOME_DIRTY:
+ case HOME_ACTIVE:
+ break;
+ default:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_BUSY, "An operation on home %s is currently being executed.", h->user_name);
+ }
+
+ r = home_ratelimit(h, error);
+ if (r < 0)
+ return r;
+
+ r = home_may_change_password(h, error);
+ if (r < 0)
+ return r;
+
+ r = user_record_clone(h->record, USER_RECORD_LOAD_REFUSE_SECRET, &c);
+ if (r < 0)
+ return r;
+
+ merged_secret = user_record_new();
+ if (!merged_secret)
+ return -ENOMEM;
+
+ r = user_record_merge_secret(merged_secret, old_secret);
+ if (r < 0)
+ return r;
+
+ r = user_record_merge_secret(merged_secret, new_secret);
+ if (r < 0)
+ return r;
+
+ if (!strv_isempty(new_secret->password)) {
+ /* Update the password only if one is specified, otherwise let's just reuse the old password
+ * data. This is useful as a way to propagate updated user records into the LUKS backends
+ * properly. */
+
+ r = user_record_make_hashed_password(c, new_secret->password, /* extend = */ false);
+ if (r < 0)
+ return r;
+
+ r = user_record_set_password_change_now(c, -1 /* remove */);
+ if (r < 0)
+ return r;
+ }
+
+ r = user_record_update_last_changed(c, true);
+ if (r == -ECHRNG)
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_RECORD_MISMATCH, "Record last change time of %s is newer than current time, cannot update.", h->user_name);
+ if (r < 0)
+ return r;
+
+ r = manager_sign_user_record(h->manager, c, &signed_c, error);
+ if (r < 0)
+ return r;
+
+ if (c->enforce_password_policy == false)
+ log_debug("Password quality check turned off for account, skipping.");
+ else {
+ r = user_record_quality_check_password(c, merged_secret, error);
+ if (r < 0)
+ return r;
+ }
+
+ r = home_update_internal(h, "passwd", signed_c, merged_secret, error);
+ if (r < 0)
+ return r;
+
+ home_set_state(h, state == HOME_ACTIVE ? HOME_PASSWD_WHILE_ACTIVE : HOME_PASSWD);
+ return 0;
+}
+
+int home_unregister(Home *h, sd_bus_error *error) {
+ int r;
+
+ assert(h);
+
+ switch (home_get_state(h)) {
+ case HOME_UNFIXATED:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_UNFIXATED, "Home %s is not registered.", h->user_name);
+ case HOME_LOCKED:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_LOCKED, "Home %s is currently locked.", h->user_name);
+ case HOME_ABSENT:
+ case HOME_INACTIVE:
+ case HOME_DIRTY:
+ break;
+ case HOME_ACTIVE:
+ default:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_BUSY, "Home %s is currently being used, or an operation on home %s is currently being executed.", h->user_name, h->user_name);
+ }
+
+ r = home_unlink_record(h);
+ if (r < 0)
+ return r;
+
+ /* And destroy the whole entry. The caller needs to be prepared for that. */
+ h = home_free(h);
+ return 1;
+}
+
+int home_lock(Home *h, sd_bus_error *error) {
+ int r;
+
+ assert(h);
+
+ switch (home_get_state(h)) {
+ case HOME_UNFIXATED:
+ case HOME_ABSENT:
+ case HOME_INACTIVE:
+ case HOME_DIRTY:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_NOT_ACTIVE, "Home %s is not active.", h->user_name);
+ case HOME_LOCKED:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_LOCKED, "Home %s is already locked.", h->user_name);
+ case HOME_ACTIVE:
+ break;
+ default:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_BUSY, "An operation on home %s is currently being executed.", h->user_name);
+ }
+
+ r = home_start_work(h, "lock", h->record, NULL);
+ if (r < 0)
+ return r;
+
+ home_set_state(h, HOME_LOCKING);
+ return 0;
+}
+
+static int home_unlock_internal(Home *h, UserRecord *secret, HomeState for_state, sd_bus_error *error) {
+ int r;
+
+ assert(h);
+ assert(IN_SET(for_state, HOME_UNLOCKING, HOME_UNLOCKING_FOR_ACQUIRE));
+
+ r = home_start_work(h, "unlock", h->record, secret);
+ if (r < 0)
+ return r;
+
+ home_set_state(h, for_state);
+ return 0;
+}
+
+int home_unlock(Home *h, UserRecord *secret, sd_bus_error *error) {
+ int r;
+ assert(h);
+
+ r = home_ratelimit(h, error);
+ if (r < 0)
+ return r;
+
+ switch (home_get_state(h)) {
+ case HOME_UNFIXATED:
+ case HOME_ABSENT:
+ case HOME_INACTIVE:
+ case HOME_ACTIVE:
+ case HOME_DIRTY:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_NOT_LOCKED, "Home %s is not locked.", h->user_name);
+ case HOME_LOCKED:
+ break;
+ default:
+ return sd_bus_error_setf(error, BUS_ERROR_HOME_BUSY, "An operation on home %s is currently being executed.", h->user_name);
+ }
+
+ return home_unlock_internal(h, secret, HOME_UNLOCKING, error);
+}
+
+HomeState home_get_state(Home *h) {
+ int r;
+ assert(h);
+
+ /* When the state field is initialized, it counts. */
+ if (h->state >= 0)
+ return h->state;
+
+ /* Otherwise, let's see if the home directory is mounted. If so, we assume for sure the home
+ * directory is active */
+ if (user_record_test_home_directory(h->record) == USER_TEST_MOUNTED)
+ return HOME_ACTIVE;
+
+ /* And if we see the image being gone, we report this as absent */
+ r = user_record_test_image_path(h->record);
+ if (r == USER_TEST_ABSENT)
+ return HOME_ABSENT;
+ if (r == USER_TEST_DIRTY)
+ return HOME_DIRTY;
+
+ /* And for all other cases we return "inactive". */
+ return HOME_INACTIVE;
+}
+
+void home_process_notify(Home *h, char **l) {
+ const char *e;
+ int error;
+ int r;
+
+ assert(h);
+
+ e = strv_env_get(l, "ERRNO");
+ if (!e) {
+ log_debug("Got notify message lacking ERRNO= field, ignoring.");
+ return;
+ }
+
+ r = safe_atoi(e, &error);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse received error number, ignoring: %s", e);
+ return;
+ }
+ if (error <= 0) {
+ log_debug("Error number is out of range: %i", error);
+ return;
+ }
+
+ h->worker_error_code = error;
+}
+
+int home_killall(Home *h) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *unit = NULL;
+ int r;
+
+ assert(h);
+
+ if (!uid_is_valid(h->uid))
+ return 0;
+
+ assert(h->uid > 0); /* We never should be UID 0 */
+
+ /* Let's kill everything matching the specified UID */
+ r = safe_fork("(sd-killer)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_WAIT|FORK_LOG, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ gid_t gid;
+
+ /* Child */
+
+ gid = user_record_gid(h->record);
+ if (setresgid(gid, gid, gid) < 0) {
+ log_error_errno(errno, "Failed to change GID to " GID_FMT ": %m", gid);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (setgroups(0, NULL) < 0) {
+ log_error_errno(errno, "Failed to reset auxiliary groups list: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (setresuid(h->uid, h->uid, h->uid) < 0) {
+ log_error_errno(errno, "Failed to change UID to " UID_FMT ": %m", h->uid);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (kill(-1, SIGKILL) < 0) {
+ log_error_errno(errno, "Failed to kill all processes of UID " UID_FMT ": %m", h->uid);
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ /* Let's also kill everything in the user's slice */
+ if (asprintf(&unit, "user-" UID_FMT ".slice", h->uid) < 0)
+ return log_oom();
+
+ r = sd_bus_call_method(
+ h->manager->bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "KillUnit",
+ &error,
+ NULL,
+ "ssi", unit, "all", SIGKILL);
+ if (r < 0)
+ log_full_errno(sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_UNIT) ? LOG_DEBUG : LOG_WARNING,
+ r, "Failed to kill login processes of user, ignoring: %s", bus_error_message(&error, r));
+
+ return 1;
+}
+
+static int home_get_disk_status_luks(
+ Home *h,
+ HomeState state,
+ uint64_t *ret_disk_size,
+ uint64_t *ret_disk_usage,
+ uint64_t *ret_disk_free,
+ uint64_t *ret_disk_ceiling,
+ uint64_t *ret_disk_floor) {
+
+ uint64_t disk_size = UINT64_MAX, disk_usage = UINT64_MAX, disk_free = UINT64_MAX,
+ disk_ceiling = UINT64_MAX, disk_floor = UINT64_MAX,
+ stat_used = UINT64_MAX, fs_size = UINT64_MAX, header_size = 0;
+
+ struct statfs sfs;
+ const char *hd;
+ int r;
+
+ assert(h);
+ assert(ret_disk_size);
+ assert(ret_disk_usage);
+ assert(ret_disk_free);
+ assert(ret_disk_ceiling);
+
+ if (state != HOME_ABSENT) {
+ const char *ip;
+
+ ip = user_record_image_path(h->record);
+ if (ip) {
+ struct stat st;
+
+ if (stat(ip, &st) < 0)
+ log_debug_errno(errno, "Failed to stat() %s, ignoring: %m", ip);
+ else if (S_ISREG(st.st_mode)) {
+ _cleanup_free_ char *parent = NULL;
+
+ disk_size = st.st_size;
+ stat_used = st.st_blocks * 512;
+
+ parent = dirname_malloc(ip);
+ if (!parent)
+ return log_oom();
+
+ if (statfs(parent, &sfs) < 0)
+ log_debug_errno(errno, "Failed to statfs() %s, ignoring: %m", parent);
+ else
+ disk_ceiling = stat_used + sfs.f_bsize * sfs.f_bavail;
+
+ } else if (S_ISBLK(st.st_mode)) {
+ _cleanup_free_ char *szbuf = NULL;
+ char p[SYS_BLOCK_PATH_MAX("/size")];
+
+ /* Let's read the size off sysfs, so that we don't have to open the device */
+ xsprintf_sys_block_path(p, "/size", st.st_rdev);
+ r = read_one_line_file(p, &szbuf);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read %s, ignoring: %m", p);
+ else {
+ uint64_t sz;
+
+ r = safe_atou64(szbuf, &sz);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse %s, ignoring: %s", p, szbuf);
+ else
+ disk_size = sz * 512;
+ }
+ } else
+ log_debug("Image path is not a block device or regular file, not able to acquire size.");
+ }
+ }
+
+ if (!HOME_STATE_IS_ACTIVE(state))
+ goto finish;
+
+ hd = user_record_home_directory(h->record);
+ if (!hd)
+ goto finish;
+
+ if (statfs(hd, &sfs) < 0) {
+ log_debug_errno(errno, "Failed to statfs() %s, ignoring: %m", hd);
+ goto finish;
+ }
+
+ disk_free = sfs.f_bsize * sfs.f_bavail;
+ fs_size = sfs.f_bsize * sfs.f_blocks;
+ if (disk_size != UINT64_MAX && disk_size > fs_size)
+ header_size = disk_size - fs_size;
+
+ /* We take a perspective from the user here (as opposed to from the host): the used disk space is the
+ * difference from the limit and what's free. This makes a difference if sparse mode is not used: in
+ * that case the image is pre-allocated and thus appears all used from the host PoV but is not used
+ * up at all yet from the user's PoV.
+ *
+ * That said, we use use the stat() reported loopback file size as upper boundary: our footprint can
+ * never be larger than what we take up on the lowest layers. */
+
+ if (disk_size != UINT64_MAX && disk_size > disk_free) {
+ disk_usage = disk_size - disk_free;
+
+ if (stat_used != UINT64_MAX && disk_usage > stat_used)
+ disk_usage = stat_used;
+ } else
+ disk_usage = stat_used;
+
+ /* If we have the magic, determine floor preferably by magic */
+ disk_floor = minimal_size_by_fs_magic(sfs.f_type) + header_size;
+
+finish:
+ /* If we don't know the magic, go by file system name */
+ if (disk_floor == UINT64_MAX)
+ disk_floor = minimal_size_by_fs_name(user_record_file_system_type(h->record));
+
+ *ret_disk_size = disk_size;
+ *ret_disk_usage = disk_usage;
+ *ret_disk_free = disk_free;
+ *ret_disk_ceiling = disk_ceiling;
+ *ret_disk_floor = disk_floor;
+
+ return 0;
+}
+
+static int home_get_disk_status_directory(
+ Home *h,
+ HomeState state,
+ uint64_t *ret_disk_size,
+ uint64_t *ret_disk_usage,
+ uint64_t *ret_disk_free,
+ uint64_t *ret_disk_ceiling,
+ uint64_t *ret_disk_floor) {
+
+ uint64_t disk_size = UINT64_MAX, disk_usage = UINT64_MAX, disk_free = UINT64_MAX,
+ disk_ceiling = UINT64_MAX, disk_floor = UINT64_MAX;
+ struct statfs sfs;
+ struct dqblk req;
+ const char *path = NULL;
+ int r;
+
+ assert(ret_disk_size);
+ assert(ret_disk_usage);
+ assert(ret_disk_free);
+ assert(ret_disk_ceiling);
+ assert(ret_disk_floor);
+
+ if (HOME_STATE_IS_ACTIVE(state))
+ path = user_record_home_directory(h->record);
+
+ if (!path) {
+ if (state == HOME_ABSENT)
+ goto finish;
+
+ path = user_record_image_path(h->record);
+ }
+
+ if (!path)
+ goto finish;
+
+ if (statfs(path, &sfs) < 0)
+ log_debug_errno(errno, "Failed to statfs() %s, ignoring: %m", path);
+ else {
+ disk_free = sfs.f_bsize * sfs.f_bavail;
+ disk_size = sfs.f_bsize * sfs.f_blocks;
+
+ /* We don't initialize disk_usage from statfs() data here, since the device is likely not used
+ * by us alone, and disk_usage should only reflect our own use. */
+ }
+
+ if (IN_SET(h->record->storage, USER_CLASSIC, USER_DIRECTORY, USER_SUBVOLUME)) {
+
+ r = btrfs_is_subvol(path);
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine whether %s is a btrfs subvolume: %m", path);
+ else if (r > 0) {
+ BtrfsQuotaInfo qi;
+
+ r = btrfs_subvol_get_subtree_quota(path, 0, &qi);
+ if (r < 0)
+ log_debug_errno(r, "Failed to query btrfs subtree quota, ignoring: %m");
+ else {
+ disk_usage = qi.referenced;
+
+ if (disk_free != UINT64_MAX) {
+ disk_ceiling = qi.referenced + disk_free;
+
+ if (disk_size != UINT64_MAX && disk_ceiling > disk_size)
+ disk_ceiling = disk_size;
+ }
+
+ if (qi.referenced_max != UINT64_MAX) {
+ if (disk_size != UINT64_MAX)
+ disk_size = MIN(qi.referenced_max, disk_size);
+ else
+ disk_size = qi.referenced_max;
+ }
+
+ if (disk_size != UINT64_MAX) {
+ if (disk_size > disk_usage)
+ disk_free = disk_size - disk_usage;
+ else
+ disk_free = 0;
+ }
+ }
+
+ goto finish;
+ }
+ }
+
+ if (IN_SET(h->record->storage, USER_CLASSIC, USER_DIRECTORY, USER_FSCRYPT)) {
+ r = quotactl_path(QCMD_FIXED(Q_GETQUOTA, USRQUOTA), path, h->uid, &req);
+ if (r < 0) {
+ if (ERRNO_IS_NOT_SUPPORTED(r)) {
+ log_debug_errno(r, "No UID quota support on %s.", path);
+ goto finish;
+ }
+
+ if (r != -ESRCH) {
+ log_debug_errno(r, "Failed to query disk quota for UID " UID_FMT ": %m", h->uid);
+ goto finish;
+ }
+
+ disk_usage = 0; /* No record of this user? then nothing was used */
+ } else {
+ if (FLAGS_SET(req.dqb_valid, QIF_SPACE) && disk_free != UINT64_MAX) {
+ disk_ceiling = req.dqb_curspace + disk_free;
+
+ if (disk_size != UINT64_MAX && disk_ceiling > disk_size)
+ disk_ceiling = disk_size;
+ }
+
+ if (FLAGS_SET(req.dqb_valid, QIF_BLIMITS)) {
+ uint64_t q;
+
+ /* Take the minimum of the quota and the available disk space here */
+ q = req.dqb_bhardlimit * QIF_DQBLKSIZE;
+ if (disk_size != UINT64_MAX)
+ disk_size = MIN(disk_size, q);
+ else
+ disk_size = q;
+ }
+ if (FLAGS_SET(req.dqb_valid, QIF_SPACE)) {
+ disk_usage = req.dqb_curspace;
+
+ if (disk_size != UINT64_MAX) {
+ if (disk_size > disk_usage)
+ disk_free = disk_size - disk_usage;
+ else
+ disk_free = 0;
+ }
+ }
+ }
+ }
+
+finish:
+ *ret_disk_size = disk_size;
+ *ret_disk_usage = disk_usage;
+ *ret_disk_free = disk_free;
+ *ret_disk_ceiling = disk_ceiling;
+ *ret_disk_floor = disk_floor;
+
+ return 0;
+}
+
+int home_augment_status(
+ Home *h,
+ UserRecordLoadFlags flags,
+ UserRecord **ret) {
+
+ uint64_t disk_size = UINT64_MAX, disk_usage = UINT64_MAX, disk_free = UINT64_MAX, disk_ceiling = UINT64_MAX, disk_floor = UINT64_MAX;
+ _cleanup_(json_variant_unrefp) JsonVariant *j = NULL, *v = NULL, *m = NULL, *status = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *ur = NULL;
+ char ids[SD_ID128_STRING_MAX];
+ HomeState state;
+ sd_id128_t id;
+ int r;
+
+ assert(h);
+ assert(ret);
+
+ /* We are supposed to add this, this can't be on hence. */
+ assert(!FLAGS_SET(flags, USER_RECORD_STRIP_STATUS));
+
+ r = sd_id128_get_machine(&id);
+ if (r < 0)
+ return r;
+
+ state = home_get_state(h);
+
+ switch (h->record->storage) {
+
+ case USER_LUKS:
+ r = home_get_disk_status_luks(h, state, &disk_size, &disk_usage, &disk_free, &disk_ceiling, &disk_floor);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case USER_CLASSIC:
+ case USER_DIRECTORY:
+ case USER_SUBVOLUME:
+ case USER_FSCRYPT:
+ case USER_CIFS:
+ r = home_get_disk_status_directory(h, state, &disk_size, &disk_usage, &disk_free, &disk_ceiling, &disk_floor);
+ if (r < 0)
+ return r;
+
+ break;
+
+ default:
+ ; /* unset */
+ }
+
+ if (disk_floor == UINT64_MAX || (disk_usage != UINT64_MAX && disk_floor < disk_usage))
+ disk_floor = disk_usage;
+ if (disk_floor == UINT64_MAX || disk_floor < USER_DISK_SIZE_MIN)
+ disk_floor = USER_DISK_SIZE_MIN;
+ if (disk_ceiling == UINT64_MAX || disk_ceiling > USER_DISK_SIZE_MAX)
+ disk_ceiling = USER_DISK_SIZE_MAX;
+
+ r = json_build(&status,
+ JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("state", JSON_BUILD_STRING(home_state_to_string(state))),
+ JSON_BUILD_PAIR("service", JSON_BUILD_STRING("io.systemd.Home")),
+ JSON_BUILD_PAIR_CONDITION(disk_size != UINT64_MAX, "diskSize", JSON_BUILD_UNSIGNED(disk_size)),
+ JSON_BUILD_PAIR_CONDITION(disk_usage != UINT64_MAX, "diskUsage", JSON_BUILD_UNSIGNED(disk_usage)),
+ JSON_BUILD_PAIR_CONDITION(disk_free != UINT64_MAX, "diskFree", JSON_BUILD_UNSIGNED(disk_free)),
+ JSON_BUILD_PAIR_CONDITION(disk_ceiling != UINT64_MAX, "diskCeiling", JSON_BUILD_UNSIGNED(disk_ceiling)),
+ JSON_BUILD_PAIR_CONDITION(disk_floor != UINT64_MAX, "diskFloor", JSON_BUILD_UNSIGNED(disk_floor)),
+ JSON_BUILD_PAIR_CONDITION(h->signed_locally >= 0, "signedLocally", JSON_BUILD_BOOLEAN(h->signed_locally))
+ ));
+ if (r < 0)
+ return r;
+
+ j = json_variant_ref(h->record->json);
+ v = json_variant_ref(json_variant_by_key(j, "status"));
+ m = json_variant_ref(json_variant_by_key(v, sd_id128_to_string(id, ids)));
+
+ r = json_variant_filter(&m, STRV_MAKE("diskSize", "diskUsage", "diskFree", "diskCeiling", "diskFloor", "signedLocally"));
+ if (r < 0)
+ return r;
+
+ r = json_variant_merge(&m, status);
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field(&v, ids, m);
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field(&j, "status", v);
+ if (r < 0)
+ return r;
+
+ ur = user_record_new();
+ if (!ur)
+ return -ENOMEM;
+
+ r = user_record_load(ur, j, flags);
+ if (r < 0)
+ return r;
+
+ ur->incomplete =
+ FLAGS_SET(h->record->mask, USER_RECORD_PRIVILEGED) &&
+ !FLAGS_SET(ur->mask, USER_RECORD_PRIVILEGED);
+
+ *ret = TAKE_PTR(ur);
+ return 0;
+}
+
+static int on_home_ref_eof(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(operation_unrefp) Operation *o = NULL;
+ Home *h = userdata;
+
+ assert(s);
+ assert(h);
+
+ if (h->ref_event_source_please_suspend == s)
+ h->ref_event_source_please_suspend = sd_event_source_disable_unref(h->ref_event_source_please_suspend);
+
+ if (h->ref_event_source_dont_suspend == s)
+ h->ref_event_source_dont_suspend = sd_event_source_disable_unref(h->ref_event_source_dont_suspend);
+
+ if (h->ref_event_source_dont_suspend || h->ref_event_source_please_suspend)
+ return 0;
+
+ log_info("Got notification that all sessions of user %s ended, deactivating automatically.", h->user_name);
+
+ o = operation_new(OPERATION_PIPE_EOF, NULL);
+ if (!o) {
+ log_oom();
+ return 0;
+ }
+
+ home_schedule_operation(h, o, NULL);
+ return 0;
+}
+
+int home_create_fifo(Home *h, bool please_suspend) {
+ _cleanup_close_ int ret_fd = -1;
+ sd_event_source **ss;
+ const char *fn, *suffix;
+ int r;
+
+ assert(h);
+
+ if (please_suspend) {
+ suffix = ".please-suspend";
+ ss = &h->ref_event_source_please_suspend;
+ } else {
+ suffix = ".dont-suspend";
+ ss = &h->ref_event_source_dont_suspend;
+ }
+
+ fn = strjoina("/run/systemd/home/", h->user_name, suffix);
+
+ if (!*ss) {
+ _cleanup_close_ int ref_fd = -1;
+
+ (void) mkdir("/run/systemd/home/", 0755);
+ if (mkfifo(fn, 0600) < 0 && errno != EEXIST)
+ return log_error_errno(errno, "Failed to create FIFO %s: %m", fn);
+
+ ref_fd = open(fn, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ if (ref_fd < 0)
+ return log_error_errno(errno, "Failed to open FIFO %s for reading: %m", fn);
+
+ r = sd_event_add_io(h->manager->event, ss, ref_fd, 0, on_home_ref_eof, h);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate reference FIFO event source: %m");
+
+ (void) sd_event_source_set_description(*ss, "acquire-ref");
+
+ r = sd_event_source_set_priority(*ss, SD_EVENT_PRIORITY_IDLE-1);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_io_fd_own(*ss, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to pass ownership of FIFO event fd to event source: %m");
+
+ TAKE_FD(ref_fd);
+ }
+
+ ret_fd = open(fn, O_WRONLY|O_CLOEXEC|O_NONBLOCK);
+ if (ret_fd < 0)
+ return log_error_errno(errno, "Failed to open FIFO %s for writing: %m", fn);
+
+ return TAKE_FD(ret_fd);
+}
+
+static int home_dispatch_acquire(Home *h, Operation *o) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int (*call)(Home *h, UserRecord *secret, HomeState for_state, sd_bus_error *error) = NULL;
+ HomeState for_state;
+ int r;
+
+ assert(h);
+ assert(o);
+ assert(o->type == OPERATION_ACQUIRE);
+
+ switch (home_get_state(h)) {
+
+ case HOME_UNFIXATED:
+ for_state = HOME_FIXATING_FOR_ACQUIRE;
+ call = home_fixate_internal;
+ break;
+
+ case HOME_ABSENT:
+ r = sd_bus_error_setf(&error, BUS_ERROR_HOME_ABSENT, "Home %s is currently missing or not plugged in.", h->user_name);
+ break;
+
+ case HOME_INACTIVE:
+ case HOME_DIRTY:
+ for_state = HOME_ACTIVATING_FOR_ACQUIRE;
+ call = home_activate_internal;
+ break;
+
+ case HOME_ACTIVE:
+ for_state = HOME_AUTHENTICATING_FOR_ACQUIRE;
+ call = home_authenticate_internal;
+ break;
+
+ case HOME_LOCKED:
+ for_state = HOME_UNLOCKING_FOR_ACQUIRE;
+ call = home_unlock_internal;
+ break;
+
+ default:
+ /* All other cases means we are currently executing an operation, which means the job remains
+ * pending. */
+ return 0;
+ }
+
+ assert(!h->current_operation);
+
+ if (call) {
+ r = home_ratelimit(h, &error);
+ if (r >= 0)
+ r = call(h, o->secret, for_state, &error);
+ }
+
+ if (r != 0) /* failure or completed */
+ operation_result(o, r, &error);
+ else /* ongoing */
+ h->current_operation = operation_ref(o);
+
+ return 1;
+}
+
+static int home_dispatch_release(Home *h, Operation *o) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(h);
+ assert(o);
+ assert(o->type == OPERATION_RELEASE);
+
+ if (h->ref_event_source_dont_suspend || h->ref_event_source_please_suspend)
+ /* If there's now a reference again, then let's abort the release attempt */
+ r = sd_bus_error_setf(&error, BUS_ERROR_HOME_BUSY, "Home %s is currently referenced.", h->user_name);
+ else {
+ switch (home_get_state(h)) {
+
+ case HOME_UNFIXATED:
+ case HOME_ABSENT:
+ case HOME_INACTIVE:
+ case HOME_DIRTY:
+ r = 1; /* done */
+ break;
+
+ case HOME_LOCKED:
+ r = sd_bus_error_setf(&error, BUS_ERROR_HOME_LOCKED, "Home %s is currently locked.", h->user_name);
+ break;
+
+ case HOME_ACTIVE:
+ r = home_deactivate_internal(h, false, &error);
+ break;
+
+ default:
+ /* All other cases means we are currently executing an operation, which means the job remains
+ * pending. */
+ return 0;
+ }
+ }
+
+ assert(!h->current_operation);
+
+ if (r != 0) /* failure or completed */
+ operation_result(o, r, &error);
+ else /* ongoing */
+ h->current_operation = operation_ref(o);
+
+ return 1;
+}
+
+static int home_dispatch_lock_all(Home *h, Operation *o) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(h);
+ assert(o);
+ assert(o->type == OPERATION_LOCK_ALL);
+
+ switch (home_get_state(h)) {
+
+ case HOME_UNFIXATED:
+ case HOME_ABSENT:
+ case HOME_INACTIVE:
+ case HOME_DIRTY:
+ log_info("Home %s is not active, no locking necessary.", h->user_name);
+ r = 1; /* done */
+ break;
+
+ case HOME_LOCKED:
+ log_info("Home %s is already locked.", h->user_name);
+ r = 1; /* done */
+ break;
+
+ case HOME_ACTIVE:
+ log_info("Locking home %s.", h->user_name);
+ r = home_lock(h, &error);
+ break;
+
+ default:
+ /* All other cases means we are currently executing an operation, which means the job remains
+ * pending. */
+ return 0;
+ }
+
+ assert(!h->current_operation);
+
+ if (r != 0) /* failure or completed */
+ operation_result(o, r, &error);
+ else /* ongoing */
+ h->current_operation = operation_ref(o);
+
+ return 1;
+}
+
+static int home_dispatch_deactivate_all(Home *h, Operation *o) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(h);
+ assert(o);
+ assert(o->type == OPERATION_DEACTIVATE_ALL);
+
+ switch (home_get_state(h)) {
+
+ case HOME_UNFIXATED:
+ case HOME_ABSENT:
+ case HOME_INACTIVE:
+ case HOME_DIRTY:
+ log_info("Home %s is already deactivated.", h->user_name);
+ r = 1; /* done */
+ break;
+
+ case HOME_LOCKED:
+ log_info("Home %s is currently locked, not deactivating.", h->user_name);
+ r = 1; /* done */
+ break;
+
+ case HOME_ACTIVE:
+ log_info("Deactivating home %s.", h->user_name);
+ r = home_deactivate_internal(h, false, &error);
+ break;
+
+ default:
+ /* All other cases means we are currently executing an operation, which means the job remains
+ * pending. */
+ return 0;
+ }
+
+ assert(!h->current_operation);
+
+ if (r != 0) /* failure or completed */
+ operation_result(o, r, &error);
+ else /* ongoing */
+ h->current_operation = operation_ref(o);
+
+ return 1;
+}
+
+static int home_dispatch_pipe_eof(Home *h, Operation *o) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(h);
+ assert(o);
+ assert(o->type == OPERATION_PIPE_EOF);
+
+ if (h->ref_event_source_please_suspend || h->ref_event_source_dont_suspend)
+ return 1; /* Hmm, there's a reference again, let's cancel this */
+
+ switch (home_get_state(h)) {
+
+ case HOME_UNFIXATED:
+ case HOME_ABSENT:
+ case HOME_INACTIVE:
+ case HOME_DIRTY:
+ log_info("Home %s already deactivated, no automatic deactivation needed.", h->user_name);
+ break;
+
+ case HOME_DEACTIVATING:
+ log_info("Home %s is already being deactivated, automatic deactivated unnecessary.", h->user_name);
+ break;
+
+ case HOME_ACTIVE:
+ r = home_deactivate_internal(h, false, &error);
+ if (r < 0)
+ log_warning_errno(r, "Failed to deactivate %s, ignoring: %s", h->user_name, bus_error_message(&error, r));
+ break;
+
+ case HOME_LOCKED:
+ default:
+ /* If the device is locked or any operation is being executed, let's leave this pending */
+ return 0;
+ }
+
+ /* Note that we don't call operation_fail() or operation_success() here, because this kind of
+ * operation has no message associated with it, and thus there's no need to propagate success. */
+
+ assert(!o->message);
+ return 1;
+}
+
+static int home_dispatch_deactivate_force(Home *h, Operation *o) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(h);
+ assert(o);
+ assert(o->type == OPERATION_DEACTIVATE_FORCE);
+
+ switch (home_get_state(h)) {
+
+ case HOME_UNFIXATED:
+ case HOME_ABSENT:
+ case HOME_INACTIVE:
+ case HOME_DIRTY:
+ log_debug("Home %s already deactivated, no forced deactivation due to unplug needed.", h->user_name);
+ break;
+
+ case HOME_DEACTIVATING:
+ log_debug("Home %s is already being deactivated, forced deactivation due to unplug unnecessary.", h->user_name);
+ break;
+
+ case HOME_ACTIVE:
+ case HOME_LOCKED:
+ r = home_deactivate_internal(h, true, &error);
+ if (r < 0)
+ log_warning_errno(r, "Failed to forcibly deactivate %s, ignoring: %s", h->user_name, bus_error_message(&error, r));
+ break;
+
+ default:
+ /* If any operation is being executed, let's leave this pending */
+ return 0;
+ }
+
+ /* Note that we don't call operation_fail() or operation_success() here, because this kind of
+ * operation has no message associated with it, and thus there's no need to propagate success. */
+
+ assert(!o->message);
+ return 1;
+}
+
+static int on_pending(sd_event_source *s, void *userdata) {
+ Home *h = userdata;
+ Operation *o;
+ int r;
+
+ assert(s);
+ assert(h);
+
+ o = ordered_set_first(h->pending_operations);
+ if (o) {
+ static int (* const operation_table[_OPERATION_MAX])(Home *h, Operation *o) = {
+ [OPERATION_ACQUIRE] = home_dispatch_acquire,
+ [OPERATION_RELEASE] = home_dispatch_release,
+ [OPERATION_LOCK_ALL] = home_dispatch_lock_all,
+ [OPERATION_DEACTIVATE_ALL] = home_dispatch_deactivate_all,
+ [OPERATION_PIPE_EOF] = home_dispatch_pipe_eof,
+ [OPERATION_DEACTIVATE_FORCE] = home_dispatch_deactivate_force,
+ };
+
+ assert(operation_table[o->type]);
+ r = operation_table[o->type](h, o);
+ if (r != 0) {
+ /* The operation completed, let's remove it from the pending list, and exit while
+ * leaving the event source enabled as it is. */
+ assert_se(ordered_set_remove(h->pending_operations, o) == o);
+ operation_unref(o);
+ return 0;
+ }
+ }
+
+ /* Nothing to do anymore, let's turn off this event source */
+ r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
+ if (r < 0)
+ return log_error_errno(r, "Failed to disable event source: %m");
+
+ return 0;
+}
+
+int home_schedule_operation(Home *h, Operation *o, sd_bus_error *error) {
+ int r;
+
+ assert(h);
+
+ if (o) {
+ if (ordered_set_size(h->pending_operations) >= PENDING_OPERATIONS_MAX)
+ return sd_bus_error_setf(error, BUS_ERROR_TOO_MANY_OPERATIONS, "Too many client operations requested");
+
+ r = ordered_set_ensure_allocated(&h->pending_operations, &operation_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = ordered_set_put(h->pending_operations, o);
+ if (r < 0)
+ return r;
+
+ operation_ref(o);
+ }
+
+ if (!h->pending_event_source) {
+ r = sd_event_add_defer(h->manager->event, &h->pending_event_source, on_pending, h);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate pending defer event source: %m");
+
+ (void) sd_event_source_set_description(h->pending_event_source, "pending");
+
+ r = sd_event_source_set_priority(h->pending_event_source, SD_EVENT_PRIORITY_IDLE);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_source_set_enabled(h->pending_event_source, SD_EVENT_ON);
+ if (r < 0)
+ return log_error_errno(r, "Failed to trigger pending event source: %m");
+
+ return 0;
+}
+
+static int home_get_image_path_seat(Home *h, char **ret) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ _cleanup_free_ char *c = NULL;
+ const char *ip, *seat;
+ struct stat st;
+ int r;
+
+ assert(h);
+
+ if (user_record_storage(h->record) != USER_LUKS)
+ return -ENXIO;
+
+ ip = user_record_image_path(h->record);
+ if (!ip)
+ return -ENXIO;
+
+ if (!path_startswith(ip, "/dev/"))
+ return -ENXIO;
+
+ if (stat(ip, &st) < 0)
+ return -errno;
+
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTBLK;
+
+ r = sd_device_new_from_devnum(&d, 'b', st.st_rdev);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_property_value(d, "ID_SEAT", &seat);
+ if (r == -ENOENT) /* no property means seat0 */
+ seat = "seat0";
+ else if (r < 0)
+ return r;
+
+ c = strdup(seat);
+ if (!c)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(c);
+ return 0;
+}
+
+int home_auto_login(Home *h, char ***ret_seats) {
+ _cleanup_free_ char *seat = NULL, *seat2 = NULL;
+
+ assert(h);
+ assert(ret_seats);
+
+ (void) home_get_image_path_seat(h, &seat);
+
+ if (h->record->auto_login > 0 && !streq_ptr(seat, "seat0")) {
+ /* For now, when the auto-login boolean is set for a user, let's make it mean
+ * "seat0". Eventually we can extend the concept and allow configuration of any kind of seat,
+ * but let's keep simple initially, most likely the feature is interesting on single-user
+ * systems anyway, only.
+ *
+ * We filter out users marked for auto-login in we know for sure their home directory is
+ * absent. */
+
+ if (user_record_test_image_path(h->record) != USER_TEST_ABSENT) {
+ seat2 = strdup("seat0");
+ if (!seat2)
+ return -ENOMEM;
+ }
+ }
+
+ if (seat || seat2) {
+ _cleanup_strv_free_ char **list = NULL;
+ size_t i = 0;
+
+ list = new(char*, 3);
+ if (!list)
+ return -ENOMEM;
+
+ if (seat)
+ list[i++] = TAKE_PTR(seat);
+ if (seat2)
+ list[i++] = TAKE_PTR(seat2);
+
+ list[i] = NULL;
+ *ret_seats = TAKE_PTR(list);
+ return 1;
+ }
+
+ *ret_seats = NULL;
+ return 0;
+}
+
+int home_set_current_message(Home *h, sd_bus_message *m) {
+ assert(h);
+
+ if (!m)
+ return 0;
+
+ if (h->current_operation)
+ return -EBUSY;
+
+ h->current_operation = operation_new(OPERATION_IMMEDIATE, m);
+ if (!h->current_operation)
+ return -ENOMEM;
+
+ return 1;
+}
+
+int home_wait_for_worker(Home *h) {
+ assert(h);
+
+ if (h->worker_pid <= 0)
+ return 0;
+
+ log_info("Worker process for home %s is still running while exiting. Waiting for it to finish.", h->user_name);
+ (void) wait_for_terminate(h->worker_pid, NULL);
+ (void) hashmap_remove_value(h->manager->homes_by_worker_pid, PID_TO_PTR(h->worker_pid), h);
+ h->worker_pid = 0;
+ return 1;
+}
+
+static const char* const home_state_table[_HOME_STATE_MAX] = {
+ [HOME_UNFIXATED] = "unfixated",
+ [HOME_ABSENT] = "absent",
+ [HOME_INACTIVE] = "inactive",
+ [HOME_DIRTY] = "dirty",
+ [HOME_FIXATING] = "fixating",
+ [HOME_FIXATING_FOR_ACTIVATION] = "fixating-for-activation",
+ [HOME_FIXATING_FOR_ACQUIRE] = "fixating-for-acquire",
+ [HOME_ACTIVATING] = "activating",
+ [HOME_ACTIVATING_FOR_ACQUIRE] = "activating-for-acquire",
+ [HOME_DEACTIVATING] = "deactivating",
+ [HOME_ACTIVE] = "active",
+ [HOME_LOCKING] = "locking",
+ [HOME_LOCKED] = "locked",
+ [HOME_UNLOCKING] = "unlocking",
+ [HOME_UNLOCKING_FOR_ACQUIRE] = "unlocking-for-acquire",
+ [HOME_CREATING] = "creating",
+ [HOME_REMOVING] = "removing",
+ [HOME_UPDATING] = "updating",
+ [HOME_UPDATING_WHILE_ACTIVE] = "updating-while-active",
+ [HOME_RESIZING] = "resizing",
+ [HOME_RESIZING_WHILE_ACTIVE] = "resizing-while-active",
+ [HOME_PASSWD] = "passwd",
+ [HOME_PASSWD_WHILE_ACTIVE] = "passwd-while-active",
+ [HOME_AUTHENTICATING] = "authenticating",
+ [HOME_AUTHENTICATING_WHILE_ACTIVE] = "authenticating-while-active",
+ [HOME_AUTHENTICATING_FOR_ACQUIRE] = "authenticating-for-acquire",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(home_state, HomeState);
diff --git a/src/home/homed-home.h b/src/home/homed-home.h
new file mode 100644
index 0000000..4c24ee7
--- /dev/null
+++ b/src/home/homed-home.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct Home Home;
+
+#include "homed-manager.h"
+#include "homed-operation.h"
+#include "list.h"
+#include "ordered-set.h"
+#include "user-record.h"
+
+typedef enum HomeState {
+ HOME_UNFIXATED, /* home exists, but local record does not */
+ HOME_ABSENT, /* local record exists, but home does not */
+ HOME_INACTIVE, /* record and home exist, but is not logged in */
+ HOME_DIRTY, /* like HOME_INACTIVE, but the home directory wasn't cleanly deactivated */
+ HOME_FIXATING, /* generating local record from home */
+ HOME_FIXATING_FOR_ACTIVATION, /* fixating in order to activate soon */
+ HOME_FIXATING_FOR_ACQUIRE, /* fixating because Acquire() was called */
+ HOME_ACTIVATING,
+ HOME_ACTIVATING_FOR_ACQUIRE, /* activating because Acquire() was called */
+ HOME_DEACTIVATING,
+ HOME_ACTIVE, /* logged in right now */
+ HOME_LOCKING,
+ HOME_LOCKED,
+ HOME_UNLOCKING,
+ HOME_UNLOCKING_FOR_ACQUIRE, /* unlocking because Acquire() was called */
+ HOME_CREATING,
+ HOME_REMOVING,
+ HOME_UPDATING,
+ HOME_UPDATING_WHILE_ACTIVE,
+ HOME_RESIZING,
+ HOME_RESIZING_WHILE_ACTIVE,
+ HOME_PASSWD,
+ HOME_PASSWD_WHILE_ACTIVE,
+ HOME_AUTHENTICATING,
+ HOME_AUTHENTICATING_WHILE_ACTIVE,
+ HOME_AUTHENTICATING_FOR_ACQUIRE, /* authenticating because Acquire() was called */
+ _HOME_STATE_MAX,
+ _HOME_STATE_INVALID = -1
+} HomeState;
+
+static inline bool HOME_STATE_IS_ACTIVE(HomeState state) {
+ return IN_SET(state,
+ HOME_ACTIVE,
+ HOME_UPDATING_WHILE_ACTIVE,
+ HOME_RESIZING_WHILE_ACTIVE,
+ HOME_PASSWD_WHILE_ACTIVE,
+ HOME_AUTHENTICATING_WHILE_ACTIVE,
+ HOME_AUTHENTICATING_FOR_ACQUIRE);
+}
+
+static inline bool HOME_STATE_IS_EXECUTING_OPERATION(HomeState state) {
+ return IN_SET(state,
+ HOME_FIXATING,
+ HOME_FIXATING_FOR_ACTIVATION,
+ HOME_FIXATING_FOR_ACQUIRE,
+ HOME_ACTIVATING,
+ HOME_ACTIVATING_FOR_ACQUIRE,
+ HOME_DEACTIVATING,
+ HOME_LOCKING,
+ HOME_UNLOCKING,
+ HOME_UNLOCKING_FOR_ACQUIRE,
+ HOME_CREATING,
+ HOME_REMOVING,
+ HOME_UPDATING,
+ HOME_UPDATING_WHILE_ACTIVE,
+ HOME_RESIZING,
+ HOME_RESIZING_WHILE_ACTIVE,
+ HOME_PASSWD,
+ HOME_PASSWD_WHILE_ACTIVE,
+ HOME_AUTHENTICATING,
+ HOME_AUTHENTICATING_WHILE_ACTIVE,
+ HOME_AUTHENTICATING_FOR_ACQUIRE);
+}
+
+struct Home {
+ Manager *manager;
+ char *user_name;
+ uid_t uid;
+
+ char *sysfs; /* When found via plugged in device, the sysfs path to it */
+
+ /* Note that the 'state' field is only set to a state while we are doing something (i.e. activating,
+ * deactivating, creating, removing, and such), or when the home is an "unfixated" one. When we are
+ * done with an operation we invalidate the state. This is hint for home_get_state() to check the
+ * state on request as needed from the mount table and similar.*/
+ HomeState state;
+ int signed_locally; /* signed only by us */
+
+ UserRecord *record;
+
+ pid_t worker_pid;
+ int worker_stdout_fd;
+ sd_event_source *worker_event_source;
+ int worker_error_code;
+
+ /* The message we are currently processing, and thus need to reply to on completion */
+ Operation *current_operation;
+
+ /* Stores the raw, plaintext passwords, but only for short periods of time */
+ UserRecord *secret;
+
+ /* When we create a home area and that fails, we should possibly unregister the record altogether
+ * again, which is remembered in this boolean. */
+ bool unregister_on_failure;
+
+ /* The reading side of a FIFO stored in /run/systemd/home/, the writing side being used for reference
+ * counting. The references dropped to zero as soon as we see EOF. This concept exists twice: once
+ * for clients that are fine if we suspend the home directory on system suspend, and once for cliets
+ * that are not ok with that. This allows us to determine for each home whether there are any clients
+ * that support unsuspend. */
+ sd_event_source *ref_event_source_please_suspend;
+ sd_event_source *ref_event_source_dont_suspend;
+
+ /* Any pending operations we still need to execute. These are for operations we want to queue if we
+ * can't execute them right-away. */
+ OrderedSet *pending_operations;
+
+ /* A defer event source that processes pending acquire/release/eof events. We have a common
+ * dispatcher that processes all three kinds of events. */
+ sd_event_source *pending_event_source;
+
+ /* Did we send out a D-Bus notification about this entry? */
+ bool announced;
+
+ /* Used to coalesce bus PropertiesChanged events */
+ sd_event_source *deferred_change_event_source;
+};
+
+int home_new(Manager *m, UserRecord *hr, const char *sysfs, Home **ret);
+Home *home_free(Home *h);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Home*, home_free);
+
+int home_set_record(Home *h, UserRecord *hr);
+int home_save_record(Home *h);
+int home_unlink_record(Home *h);
+
+int home_fixate(Home *h, UserRecord *secret, sd_bus_error *error);
+int home_activate(Home *h, UserRecord *secret, sd_bus_error *error);
+int home_authenticate(Home *h, UserRecord *secret, sd_bus_error *error);
+int home_deactivate(Home *h, bool force, sd_bus_error *error);
+int home_create(Home *h, UserRecord *secret, sd_bus_error *error);
+int home_remove(Home *h, sd_bus_error *error);
+int home_update(Home *h, UserRecord *new_record, sd_bus_error *error);
+int home_resize(Home *h, uint64_t disk_size, UserRecord *secret, sd_bus_error *error);
+int home_passwd(Home *h, UserRecord *new_secret, UserRecord *old_secret, sd_bus_error *error);
+int home_unregister(Home *h, sd_bus_error *error);
+int home_lock(Home *h, sd_bus_error *error);
+int home_unlock(Home *h, UserRecord *secret, sd_bus_error *error);
+
+HomeState home_get_state(Home *h);
+
+void home_process_notify(Home *h, char **l);
+
+int home_killall(Home *h);
+
+int home_augment_status(Home *h, UserRecordLoadFlags flags, UserRecord **ret);
+
+int home_create_fifo(Home *h, bool please_suspend);
+int home_schedule_operation(Home *h, Operation *o, sd_bus_error *error);
+
+int home_auto_login(Home *h, char ***ret_seats);
+
+int home_set_current_message(Home *h, sd_bus_message *m);
+
+int home_wait_for_worker(Home *h);
+
+const char *home_state_to_string(HomeState state);
+HomeState home_state_from_string(const char *s);
diff --git a/src/home/homed-manager-bus.c b/src/home/homed-manager-bus.c
new file mode 100644
index 0000000..d3ac98f
--- /dev/null
+++ b/src/home/homed-manager-bus.c
@@ -0,0 +1,899 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/capability.h>
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-polkit.h"
+#include "format-util.h"
+#include "homed-bus.h"
+#include "homed-home-bus.h"
+#include "homed-manager-bus.h"
+#include "homed-manager.h"
+#include "strv.h"
+#include "user-record-sign.h"
+#include "user-record-util.h"
+#include "user-util.h"
+
+static int property_get_auto_login(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ Home *h;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ r = sd_bus_message_open_container(reply, 'a', "(sso)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(h, m->homes_by_name) {
+ _cleanup_(strv_freep) char **seats = NULL;
+ _cleanup_free_ char *home_path = NULL;
+ char **s;
+
+ r = home_auto_login(h, &seats);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to determine whether home '%s' is candidate for auto-login, ignoring: %m", h->user_name);
+ continue;
+ }
+ if (!r)
+ continue;
+
+ r = bus_home_path(h, &home_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate home bus path: %m");
+
+ STRV_FOREACH(s, seats) {
+ r = sd_bus_message_append(reply, "(sso)", h->user_name, *s, home_path);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int method_get_home_by_name(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *path = NULL;
+ const char *user_name;
+ Manager *m = userdata;
+ Home *h;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &user_name);
+ if (r < 0)
+ return r;
+ if (!valid_user_group_name(user_name, 0))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "User name %s is not valid", user_name);
+
+ h = hashmap_get(m->homes_by_name, user_name);
+ if (!h)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_HOME, "No home for user %s known", user_name);
+
+ r = bus_home_path(h, &path);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(
+ message, "usussso",
+ (uint32_t) h->uid,
+ home_state_to_string(home_get_state(h)),
+ h->record ? (uint32_t) user_record_gid(h->record) : GID_INVALID,
+ h->record ? user_record_real_name(h->record) : NULL,
+ h->record ? user_record_home_directory(h->record) : NULL,
+ h->record ? user_record_shell(h->record) : NULL,
+ path);
+}
+
+static int method_get_home_by_uid(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *path = NULL;
+ Manager *m = userdata;
+ uint32_t uid;
+ int r;
+ Home *h;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "u", &uid);
+ if (r < 0)
+ return r;
+ if (!uid_is_valid(uid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "UID " UID_FMT " is not valid", uid);
+
+ h = hashmap_get(m->homes_by_uid, UID_TO_PTR(uid));
+ if (!h)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_HOME, "No home for UID " UID_FMT " known", uid);
+
+ /* Note that we don't use bus_home_path() here, but build the path manually, since if we are queried
+ * for a UID we should also generate the bus path with a UID, and bus_home_path() uses our more
+ * typical bus path by name. */
+ if (asprintf(&path, "/org/freedesktop/home1/home/" UID_FMT, h->uid) < 0)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(
+ message, "ssussso",
+ h->user_name,
+ home_state_to_string(home_get_state(h)),
+ h->record ? (uint32_t) user_record_gid(h->record) : GID_INVALID,
+ h->record ? user_record_real_name(h->record) : NULL,
+ h->record ? user_record_home_directory(h->record) : NULL,
+ h->record ? user_record_shell(h->record) : NULL,
+ path);
+}
+
+static int method_list_homes(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ Home *h;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(susussso)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(h, m->homes_by_uid) {
+ _cleanup_free_ char *path = NULL;
+
+ r = bus_home_path(h, &path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(
+ reply, "(susussso)",
+ h->user_name,
+ (uint32_t) h->uid,
+ home_state_to_string(home_get_state(h)),
+ h->record ? (uint32_t) user_record_gid(h->record) : GID_INVALID,
+ h->record ? user_record_real_name(h->record) : NULL,
+ h->record ? user_record_home_directory(h->record) : NULL,
+ h->record ? user_record_shell(h->record) : NULL,
+ path);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_get_user_record_by_name(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *json = NULL, *path = NULL;
+ Manager *m = userdata;
+ const char *user_name;
+ bool incomplete;
+ Home *h;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &user_name);
+ if (r < 0)
+ return r;
+ if (!valid_user_group_name(user_name, 0))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "User name %s is not valid", user_name);
+
+ h = hashmap_get(m->homes_by_name, user_name);
+ if (!h)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_HOME, "No home for user %s known", user_name);
+
+ r = bus_home_get_record_json(h, message, &json, &incomplete);
+ if (r < 0)
+ return r;
+
+ r = bus_home_path(h, &path);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(
+ message, "sbo",
+ json,
+ incomplete,
+ path);
+}
+
+static int method_get_user_record_by_uid(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *json = NULL, *path = NULL;
+ Manager *m = userdata;
+ bool incomplete;
+ uint32_t uid;
+ Home *h;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "u", &uid);
+ if (r < 0)
+ return r;
+ if (!uid_is_valid(uid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "UID " UID_FMT " is not valid", uid);
+
+ h = hashmap_get(m->homes_by_uid, UID_TO_PTR(uid));
+ if (!h)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_HOME, "No home for UID " UID_FMT " known", uid);
+
+ r = bus_home_get_record_json(h, message, &json, &incomplete);
+ if (r < 0)
+ return r;
+
+ if (asprintf(&path, "/org/freedesktop/home1/home/" UID_FMT, h->uid) < 0)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(
+ message, "sbo",
+ json,
+ incomplete,
+ path);
+}
+
+static int generic_home_method(
+ Manager *m,
+ sd_bus_message *message,
+ sd_bus_message_handler_t handler,
+ sd_bus_error *error) {
+
+ const char *user_name;
+ Home *h;
+ int r;
+
+ r = sd_bus_message_read(message, "s", &user_name);
+ if (r < 0)
+ return r;
+
+ if (!valid_user_group_name(user_name, 0))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "User name %s is not valid", user_name);
+
+ h = hashmap_get(m->homes_by_name, user_name);
+ if (!h)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_HOME, "No home for user %s known", user_name);
+
+ return handler(message, h, error);
+}
+
+static int method_activate_home(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return generic_home_method(userdata, message, bus_home_method_activate, error);
+}
+
+static int method_deactivate_home(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return generic_home_method(userdata, message, bus_home_method_deactivate, error);
+}
+
+static int validate_and_allocate_home(Manager *m, UserRecord *hr, Home **ret, sd_bus_error *error) {
+ _cleanup_(user_record_unrefp) UserRecord *signed_hr = NULL;
+ struct passwd *pw;
+ struct group *gr;
+ bool signed_locally;
+ Home *other;
+ int r;
+
+ assert(m);
+ assert(hr);
+ assert(ret);
+
+ r = user_record_is_supported(hr, error);
+ if (r < 0)
+ return r;
+
+ other = hashmap_get(m->homes_by_name, hr->user_name);
+ if (other)
+ return sd_bus_error_setf(error, BUS_ERROR_USER_NAME_EXISTS, "Specified user name %s exists already, refusing.", hr->user_name);
+
+ pw = getpwnam(hr->user_name);
+ if (pw)
+ return sd_bus_error_setf(error, BUS_ERROR_USER_NAME_EXISTS, "Specified user name %s exists in the NSS user database, refusing.", hr->user_name);
+
+ gr = getgrnam(hr->user_name);
+ if (gr)
+ return sd_bus_error_setf(error, BUS_ERROR_USER_NAME_EXISTS, "Specified user name %s conflicts with an NSS group by the same name, refusing.", hr->user_name);
+
+ r = manager_verify_user_record(m, hr);
+ switch (r) {
+
+ case USER_RECORD_UNSIGNED:
+ /* If the record is unsigned, then let's sign it with our own key */
+ r = manager_sign_user_record(m, hr, &signed_hr, error);
+ if (r < 0)
+ return r;
+
+ hr = signed_hr;
+ _fallthrough_;
+
+ case USER_RECORD_SIGNED_EXCLUSIVE:
+ signed_locally = true;
+ break;
+
+ case USER_RECORD_SIGNED:
+ case USER_RECORD_FOREIGN:
+ signed_locally = false;
+ break;
+
+ case -ENOKEY:
+ return sd_bus_error_setf(error, BUS_ERROR_BAD_SIGNATURE, "Specified user record for %s is signed by a key we don't recognize, refusing.", hr->user_name);
+
+ default:
+ return sd_bus_error_set_errnof(error, r, "Failed to validate signature for '%s': %m", hr->user_name);
+ }
+
+ if (uid_is_valid(hr->uid)) {
+ other = hashmap_get(m->homes_by_uid, UID_TO_PTR(hr->uid));
+ if (other)
+ return sd_bus_error_setf(error, BUS_ERROR_UID_IN_USE, "Specified UID " UID_FMT " already in use by home %s, refusing.", hr->uid, other->user_name);
+
+ pw = getpwuid(hr->uid);
+ if (pw)
+ return sd_bus_error_setf(error, BUS_ERROR_UID_IN_USE, "Specified UID " UID_FMT " already in use by NSS user %s, refusing.", hr->uid, pw->pw_name);
+
+ gr = getgrgid(hr->uid);
+ if (gr)
+ return sd_bus_error_setf(error, BUS_ERROR_UID_IN_USE, "Specified UID " UID_FMT " already in use as GID by NSS group %s, refusing.", hr->uid, gr->gr_name);
+ } else {
+ r = manager_augment_record_with_uid(m, hr);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to acquire UID for '%s': %m", hr->user_name);
+ }
+
+ r = home_new(m, hr, NULL, ret);
+ if (r < 0)
+ return r;
+
+ (*ret)->signed_locally = signed_locally;
+ return r;
+}
+
+static int method_register_home(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ Manager *m = userdata;
+ Home *h;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = bus_message_read_home_record(message, USER_RECORD_LOAD_EMBEDDED, &hr, error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.home1.create-home",
+ NULL,
+ true,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = validate_and_allocate_home(m, hr, &h, error);
+ if (r < 0)
+ return r;
+
+ r = home_save_record(h);
+ if (r < 0) {
+ home_free(h);
+ return r;
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_unregister_home(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return generic_home_method(userdata, message, bus_home_method_unregister, error);
+}
+
+static int method_create_home(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ Manager *m = userdata;
+ Home *h;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = bus_message_read_home_record(message, USER_RECORD_REQUIRE_REGULAR|USER_RECORD_ALLOW_SECRET|USER_RECORD_ALLOW_PRIVILEGED|USER_RECORD_ALLOW_PER_MACHINE|USER_RECORD_ALLOW_SIGNATURE, &hr, error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.home1.create-home",
+ NULL,
+ true,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = validate_and_allocate_home(m, hr, &h, error);
+ if (r < 0)
+ return r;
+
+ r = home_create(h, hr, error);
+ if (r < 0)
+ goto fail;
+
+ assert(r == 0);
+ h->unregister_on_failure = true;
+ assert(!h->current_operation);
+
+ r = home_set_current_message(h, message);
+ if (r < 0)
+ return r;
+
+ return 1;
+
+fail:
+ (void) home_unlink_record(h);
+ h = home_free(h);
+ return r;
+}
+
+static int method_realize_home(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return generic_home_method(userdata, message, bus_home_method_realize, error);
+}
+
+static int method_remove_home(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return generic_home_method(userdata, message, bus_home_method_remove, error);
+}
+
+static int method_fixate_home(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return generic_home_method(userdata, message, bus_home_method_fixate, error);
+}
+
+static int method_authenticate_home(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return generic_home_method(userdata, message, bus_home_method_authenticate, error);
+}
+
+static int method_update_home(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ Manager *m = userdata;
+ Home *h;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = bus_message_read_home_record(message, USER_RECORD_REQUIRE_REGULAR|USER_RECORD_ALLOW_SECRET|USER_RECORD_ALLOW_PRIVILEGED|USER_RECORD_ALLOW_PER_MACHINE|USER_RECORD_ALLOW_SIGNATURE, &hr, error);
+ if (r < 0)
+ return r;
+
+ assert(hr->user_name);
+
+ h = hashmap_get(m->homes_by_name, hr->user_name);
+ if (!h)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_HOME, "No home for user %s known", hr->user_name);
+
+ return bus_home_method_update_record(h, message, hr, error);
+}
+
+static int method_resize_home(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return generic_home_method(userdata, message, bus_home_method_resize, error);
+}
+
+static int method_change_password_home(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return generic_home_method(userdata, message, bus_home_method_change_password, error);
+}
+
+static int method_lock_home(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return generic_home_method(userdata, message, bus_home_method_lock, error);
+}
+
+static int method_unlock_home(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return generic_home_method(userdata, message, bus_home_method_unlock, error);
+}
+
+static int method_acquire_home(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return generic_home_method(userdata, message, bus_home_method_acquire, error);
+}
+
+static int method_ref_home(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return generic_home_method(userdata, message, bus_home_method_ref, error);
+}
+
+static int method_release_home(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return generic_home_method(userdata, message, bus_home_method_release, error);
+}
+
+static int method_lock_all_homes(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(operation_unrefp) Operation *o = NULL;
+ bool waiting = false;
+ Manager *m = userdata;
+ Home *h;
+ int r;
+
+ assert(m);
+
+ /* This is called from logind when we are preparing for system suspend. We enqueue a lock operation
+ * for every suitable home we have and only when all of them completed we send a reply indicating
+ * completion. */
+
+ HASHMAP_FOREACH(h, m->homes_by_name) {
+
+ /* Automatically suspend all homes that have at least one client referencing it that asked
+ * for "please suspend", and no client that asked for "please do not suspend". */
+ if (h->ref_event_source_dont_suspend ||
+ !h->ref_event_source_please_suspend)
+ continue;
+
+ if (!o) {
+ o = operation_new(OPERATION_LOCK_ALL, message);
+ if (!o)
+ return -ENOMEM;
+ }
+
+ log_info("Automatically locking home of user %s.", h->user_name);
+
+ r = home_schedule_operation(h, o, error);
+ if (r < 0)
+ return r;
+
+ waiting = true;
+ }
+
+ if (waiting) /* At least one lock operation was enqeued, let's leave here without a reply: it will
+ * be sent as soon as the last of the lock operations completed. */
+ return 1;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_deactivate_all_homes(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(operation_unrefp) Operation *o = NULL;
+ bool waiting = false;
+ Manager *m = userdata;
+ Home *h;
+ int r;
+
+ assert(m);
+
+ /* This is called from systemd-homed-activate.service's ExecStop= command to ensure that all home
+ * directories are shutdown before the system goes down. Note that we don't do this from
+ * systemd-homed.service itself since we want to allow restarting of it without tearing down all home
+ * directories. */
+
+ HASHMAP_FOREACH(h, m->homes_by_name) {
+
+ if (!o) {
+ o = operation_new(OPERATION_DEACTIVATE_ALL, message);
+ if (!o)
+ return -ENOMEM;
+ }
+
+ log_info("Automatically deactivating home of user %s.", h->user_name);
+
+ r = home_schedule_operation(h, o, error);
+ if (r < 0)
+ return r;
+
+ waiting = true;
+ }
+
+ if (waiting) /* At least one lock operation was enqeued, let's leave here without a reply: it will be
+ * sent as soon as the last of the deactivation operations completed. */
+ return 1;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static const sd_bus_vtable manager_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("AutoLogin", "a(sso)", property_get_auto_login, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+
+ SD_BUS_METHOD_WITH_NAMES("GetHomeByName",
+ "s",
+ SD_BUS_PARAM(user_name),
+ "usussso",
+ SD_BUS_PARAM(uid)
+ SD_BUS_PARAM(home_state)
+ SD_BUS_PARAM(gid)
+ SD_BUS_PARAM(real_name)
+ SD_BUS_PARAM(home_directory)
+ SD_BUS_PARAM(shell)
+ SD_BUS_PARAM(bus_path),
+ method_get_home_by_name,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetHomeByUID",
+ "u",
+ SD_BUS_PARAM(uid),
+ "ssussso",
+ SD_BUS_PARAM(user_name)
+ SD_BUS_PARAM(home_state)
+ SD_BUS_PARAM(gid)
+ SD_BUS_PARAM(real_name)
+ SD_BUS_PARAM(home_directory)
+ SD_BUS_PARAM(shell)
+ SD_BUS_PARAM(bus_path),
+ method_get_home_by_uid,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetUserRecordByName",
+ "s",
+ SD_BUS_PARAM(user_name),
+ "sbo",
+ SD_BUS_PARAM(user_record)
+ SD_BUS_PARAM(incomplete)
+ SD_BUS_PARAM(bus_path),
+ method_get_user_record_by_name,
+ SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_SENSITIVE),
+ SD_BUS_METHOD_WITH_NAMES("GetUserRecordByUID",
+ "u",
+ SD_BUS_PARAM(uid),
+ "sbo",
+ SD_BUS_PARAM(user_record)
+ SD_BUS_PARAM(incomplete)
+ SD_BUS_PARAM(bus_path),
+ method_get_user_record_by_uid,
+ SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_SENSITIVE),
+ SD_BUS_METHOD_WITH_NAMES("ListHomes",
+ NULL,,
+ "a(susussso)",
+ SD_BUS_PARAM(home_areas),
+ method_list_homes,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ /* The following methods directly execute an operation on a home area, without ref-counting, queueing
+ * or anything, and are accessible through homectl. */
+ SD_BUS_METHOD_WITH_NAMES("ActivateHome",
+ "ss",
+ SD_BUS_PARAM(user_name)
+ SD_BUS_PARAM(secret),
+ NULL,,
+ method_activate_home,
+ SD_BUS_VTABLE_SENSITIVE),
+ SD_BUS_METHOD_WITH_NAMES("DeactivateHome",
+ "s",
+ SD_BUS_PARAM(user_name),
+ NULL,,
+ method_deactivate_home,
+ 0),
+
+ /* Add the JSON record to homed, but don't create actual $HOME */
+ SD_BUS_METHOD_WITH_NAMES("RegisterHome",
+ "s",
+ SD_BUS_PARAM(user_record),
+ NULL,,
+ method_register_home,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ /* Remove the JSON record from homed, but don't remove actual $HOME */
+ SD_BUS_METHOD_WITH_NAMES("UnregisterHome",
+ "s",
+ SD_BUS_PARAM(user_name),
+ NULL,,
+ method_unregister_home,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ /* Add JSON record, and create $HOME for it */
+ SD_BUS_METHOD_WITH_NAMES("CreateHome",
+ "s",
+ SD_BUS_PARAM(user_record),
+ NULL,,
+ method_create_home,
+ SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_SENSITIVE),
+
+ /* Create $HOME for already registered JSON entry */
+ SD_BUS_METHOD_WITH_NAMES("RealizeHome",
+ "ss",
+ SD_BUS_PARAM(user_name)
+ SD_BUS_PARAM(secret),
+ NULL,,
+ method_realize_home,
+ SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_SENSITIVE),
+
+ /* Remove the JSON record and remove $HOME */
+ SD_BUS_METHOD_WITH_NAMES("RemoveHome",
+ "s",
+ SD_BUS_PARAM(user_name),
+ NULL,,
+ method_remove_home,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ /* Investigate $HOME and propagate contained JSON record into our database */
+ SD_BUS_METHOD_WITH_NAMES("FixateHome",
+ "ss",
+ SD_BUS_PARAM(user_name)
+ SD_BUS_PARAM(secret),
+ NULL,,
+ method_fixate_home,
+ SD_BUS_VTABLE_SENSITIVE),
+
+ /* Just check credentials */
+ SD_BUS_METHOD_WITH_NAMES("AuthenticateHome",
+ "ss",
+ SD_BUS_PARAM(user_name)
+ SD_BUS_PARAM(secret),
+ NULL,,
+ method_authenticate_home,
+ SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_SENSITIVE),
+
+ /* Update the JSON record of existing user */
+ SD_BUS_METHOD_WITH_NAMES("UpdateHome",
+ "s",
+ SD_BUS_PARAM(user_record),
+ NULL,,
+ method_update_home,
+ SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_SENSITIVE),
+
+ SD_BUS_METHOD_WITH_NAMES("ResizeHome",
+ "sts",
+ SD_BUS_PARAM(user_name)
+ SD_BUS_PARAM(size)
+ SD_BUS_PARAM(secret),
+ NULL,,
+ method_resize_home,
+ SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_SENSITIVE),
+
+ SD_BUS_METHOD_WITH_NAMES("ChangePasswordHome",
+ "sss",
+ SD_BUS_PARAM(user_name)
+ SD_BUS_PARAM(new_secret)
+ SD_BUS_PARAM(old_secret),
+ NULL,,
+ method_change_password_home,
+ SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_SENSITIVE),
+
+ /* Prepare active home for system suspend: flush out passwords, suspend access */
+ SD_BUS_METHOD_WITH_NAMES("LockHome",
+ "s",
+ SD_BUS_PARAM(user_name),
+ NULL,,
+ method_lock_home,
+ 0),
+
+ /* Make $HOME usable after system resume again */
+ SD_BUS_METHOD_WITH_NAMES("UnlockHome",
+ "ss",
+ SD_BUS_PARAM(user_name)
+ SD_BUS_PARAM(secret),
+ NULL,,
+ method_unlock_home,
+ SD_BUS_VTABLE_SENSITIVE),
+
+ /* The following methods implement ref-counted activation, and are what the PAM module and "homectl
+ * with" use. In contrast to the methods above which fail if an operation is already being executed
+ * on a home directory, these ones will queue the request, and are thus more reliable. Moreover,
+ * they are a bit smarter: AcquireHome() will fixate, activate, unlock, or authenticate depending on
+ * the state of the home area, so that the end result is always the same (i.e. the home directory is
+ * accessible), and we always validate the specified passwords. RefHome() will not authenticate, and
+ * thus only works if the home area is already active. */
+ SD_BUS_METHOD_WITH_NAMES("AcquireHome",
+ "ssb",
+ SD_BUS_PARAM(user_name)
+ SD_BUS_PARAM(secret)
+ SD_BUS_PARAM(please_suspend),
+ "h",
+ SD_BUS_PARAM(send_fd),
+ method_acquire_home,
+ SD_BUS_VTABLE_UNPRIVILEGED|SD_BUS_VTABLE_SENSITIVE),
+ SD_BUS_METHOD_WITH_NAMES("RefHome",
+ "sb",
+ SD_BUS_PARAM(user_name)
+ SD_BUS_PARAM(please_suspend),
+ "h",
+ SD_BUS_PARAM(send_fd),
+ method_ref_home,
+ 0),
+ SD_BUS_METHOD_WITH_NAMES("ReleaseHome",
+ "s",
+ SD_BUS_PARAM(user_name),
+ NULL,,
+ method_release_home,
+ 0),
+
+ /* An operation that acts on all homes that allow it */
+ SD_BUS_METHOD("LockAllHomes", NULL, NULL, method_lock_all_homes, 0),
+ SD_BUS_METHOD("DeactivateAllHomes", NULL, NULL, method_deactivate_all_homes, 0),
+
+ SD_BUS_VTABLE_END
+};
+
+const BusObjectImplementation manager_object = {
+ "/org/freedesktop/home1",
+ "org.freedesktop.home1.Manager",
+ .vtables = BUS_VTABLES(manager_vtable),
+ .children = BUS_IMPLEMENTATIONS(&home_object),
+};
+
+static int on_deferred_auto_login(sd_event_source *s, void *userdata) {
+ Manager *m = userdata;
+ int r;
+
+ assert(m);
+
+ m->deferred_auto_login_event_source = sd_event_source_unref(m->deferred_auto_login_event_source);
+
+ r = sd_bus_emit_properties_changed(
+ m->bus,
+ "/org/freedesktop/home1",
+ "org.freedesktop.home1.Manager",
+ "AutoLogin", NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to send AutoLogin property change event, ignoring: %m");
+
+ return 0;
+}
+
+int bus_manager_emit_auto_login_changed(Manager *m) {
+ int r;
+ assert(m);
+
+ if (m->deferred_auto_login_event_source)
+ return 0;
+
+ if (!m->event)
+ return 0;
+
+ if (IN_SET(sd_event_get_state(m->event), SD_EVENT_FINISHED, SD_EVENT_EXITING))
+ return 0;
+
+ r = sd_event_add_defer(m->event, &m->deferred_auto_login_event_source, on_deferred_auto_login, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate auto login event source: %m");
+
+ r = sd_event_source_set_priority(m->deferred_auto_login_event_source, SD_EVENT_PRIORITY_IDLE+10);
+ if (r < 0)
+ log_warning_errno(r, "Failed to tweak priority of event source, ignoring: %m");
+
+ (void) sd_event_source_set_description(m->deferred_auto_login_event_source, "deferred-auto-login");
+ return 1;
+}
diff --git a/src/home/homed-manager-bus.h b/src/home/homed-manager-bus.h
new file mode 100644
index 0000000..7db29fa
--- /dev/null
+++ b/src/home/homed-manager-bus.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "bus-util.h"
+
+extern const BusObjectImplementation manager_object;
diff --git a/src/home/homed-manager.c b/src/home/homed-manager.c
new file mode 100644
index 0000000..365ea4d
--- /dev/null
+++ b/src/home/homed-manager.c
@@ -0,0 +1,1742 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <grp.h>
+#include <linux/fs.h>
+#include <linux/magic.h>
+#include <openssl/pem.h>
+#include <pwd.h>
+#include <sys/ioctl.h>
+#include <sys/quota.h>
+#include <sys/stat.h>
+
+#include "btrfs-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-log-control-api.h"
+#include "bus-polkit.h"
+#include "clean-ipc.h"
+#include "conf-files.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "gpt.h"
+#include "home-util.h"
+#include "homed-conf.h"
+#include "homed-home-bus.h"
+#include "homed-home.h"
+#include "homed-manager-bus.h"
+#include "homed-manager.h"
+#include "homed-varlink.h"
+#include "io-util.h"
+#include "mkdir.h"
+#include "process-util.h"
+#include "quota-util.h"
+#include "random-util.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "udev-util.h"
+#include "user-record-sign.h"
+#include "user-record-util.h"
+#include "user-record.h"
+#include "user-util.h"
+
+/* Where to look for private/public keys that are used to sign the user records. We are not using
+ * CONF_PATHS_NULSTR() here since we want to insert /var/lib/systemd/home/ in the middle. And we insert that
+ * since we want to auto-generate a persistent private/public key pair if we need to. */
+#define KEY_PATHS_NULSTR \
+ "/etc/systemd/home/\0" \
+ "/run/systemd/home/\0" \
+ "/var/lib/systemd/home/\0" \
+ "/usr/local/lib/systemd/home/\0" \
+ "/usr/lib/systemd/home/\0"
+
+static bool uid_is_home(uid_t uid) {
+ return uid >= HOME_UID_MIN && uid <= HOME_UID_MAX;
+}
+/* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
+
+#define UID_CLAMP_INTO_HOME_RANGE(rnd) (((uid_t) (rnd) % (HOME_UID_MAX - HOME_UID_MIN + 1)) + HOME_UID_MIN)
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_uid_hash_ops, void, trivial_hash_func, trivial_compare_func, Home, home_free);
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_name_hash_ops, char, string_hash_func, string_compare_func, Home, home_free);
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_worker_pid_hash_ops, void, trivial_hash_func, trivial_compare_func, Home, home_free);
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_sysfs_hash_ops, char, path_hash_func, path_compare, Home, home_free);
+
+static int on_home_inotify(sd_event_source *s, const struct inotify_event *event, void *userdata);
+static int manager_gc_images(Manager *m);
+static int manager_enumerate_images(Manager *m);
+static int manager_assess_image(Manager *m, int dir_fd, const char *dir_path, const char *dentry_name);
+static void manager_revalidate_image(Manager *m, Home *h);
+
+static void manager_watch_home(Manager *m) {
+ struct statfs sfs;
+ int r;
+
+ assert(m);
+
+ m->inotify_event_source = sd_event_source_unref(m->inotify_event_source);
+ m->scan_slash_home = false;
+
+ if (statfs("/home/", &sfs) < 0) {
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to statfs() /home/ directory, disabling automatic scanning.");
+ return;
+ }
+
+ if (is_network_fs(&sfs)) {
+ log_info("/home/ is a network file system, disabling automatic scanning.");
+ return;
+ }
+
+ if (is_fs_type(&sfs, AUTOFS_SUPER_MAGIC)) {
+ log_info("/home/ is on autofs, disabling automatic scanning.");
+ return;
+ }
+
+ m->scan_slash_home = true;
+
+ r = sd_event_add_inotify(m->event, &m->inotify_event_source, "/home/", IN_CREATE|IN_CLOSE_WRITE|IN_DELETE_SELF|IN_MOVE_SELF|IN_ONLYDIR|IN_MOVED_TO|IN_MOVED_FROM|IN_DELETE, on_home_inotify, m);
+ if (r < 0)
+ log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to create inotify watch on /home/, ignoring.");
+
+ (void) sd_event_source_set_description(m->inotify_event_source, "home-inotify");
+}
+
+static int on_home_inotify(sd_event_source *s, const struct inotify_event *event, void *userdata) {
+ Manager *m = userdata;
+ const char *e, *n;
+
+ assert(m);
+ assert(event);
+
+ if ((event->mask & (IN_Q_OVERFLOW|IN_MOVE_SELF|IN_DELETE_SELF|IN_IGNORED|IN_UNMOUNT)) != 0) {
+
+ if (FLAGS_SET(event->mask, IN_Q_OVERFLOW))
+ log_debug("/home/ inotify queue overflow, rescanning.");
+ else if (FLAGS_SET(event->mask, IN_MOVE_SELF))
+ log_info("/home/ moved or renamed, recreating watch and rescanning.");
+ else if (FLAGS_SET(event->mask, IN_DELETE_SELF))
+ log_info("/home/ deleted, recreating watch and rescanning.");
+ else if (FLAGS_SET(event->mask, IN_UNMOUNT))
+ log_info("/home/ unmounted, recreating watch and rescanning.");
+ else if (FLAGS_SET(event->mask, IN_IGNORED))
+ log_info("/home/ watch invalidated, recreating watch and rescanning.");
+
+ manager_watch_home(m);
+ (void) manager_gc_images(m);
+ (void) manager_enumerate_images(m);
+ (void) bus_manager_emit_auto_login_changed(m);
+ return 0;
+ }
+
+ /* For the other inotify events, let's ignore all events for file names that don't match our
+ * expectations */
+ if (isempty(event->name))
+ return 0;
+ e = endswith(event->name, FLAGS_SET(event->mask, IN_ISDIR) ? ".homedir" : ".home");
+ if (!e)
+ return 0;
+
+ n = strndupa(event->name, e - event->name);
+ if (!suitable_user_name(n))
+ return 0;
+
+ if ((event->mask & (IN_CREATE|IN_CLOSE_WRITE|IN_MOVED_TO)) != 0) {
+ if (FLAGS_SET(event->mask, IN_CREATE))
+ log_debug("/home/%s has been created, having a look.", event->name);
+ else if (FLAGS_SET(event->mask, IN_CLOSE_WRITE))
+ log_debug("/home/%s has been modified, having a look.", event->name);
+ else if (FLAGS_SET(event->mask, IN_MOVED_TO))
+ log_debug("/home/%s has been moved in, having a look.", event->name);
+
+ (void) manager_assess_image(m, -1, "/home/", event->name);
+ (void) bus_manager_emit_auto_login_changed(m);
+ }
+
+ if ((event->mask & (IN_DELETE | IN_CLOSE_WRITE | IN_MOVED_FROM)) != 0) {
+ Home *h;
+
+ if (FLAGS_SET(event->mask, IN_DELETE))
+ log_debug("/home/%s has been deleted, revalidating.", event->name);
+ else if (FLAGS_SET(event->mask, IN_CLOSE_WRITE))
+ log_debug("/home/%s has been closed after writing, revalidating.", event->name);
+ else if (FLAGS_SET(event->mask, IN_MOVED_FROM))
+ log_debug("/home/%s has been moved away, revalidating.", event->name);
+
+ h = hashmap_get(m->homes_by_name, n);
+ if (h) {
+ manager_revalidate_image(m, h);
+ (void) bus_manager_emit_auto_login_changed(m);
+ }
+ }
+
+ return 0;
+}
+
+int manager_new(Manager **ret) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ int r;
+
+ assert(ret);
+
+ m = new(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (Manager) {
+ .default_storage = _USER_STORAGE_INVALID,
+ };
+
+ r = manager_parse_config_file(m);
+ if (r < 0)
+ return r;
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_set_watchdog(m->event, true);
+
+ m->homes_by_uid = hashmap_new(&homes_by_uid_hash_ops);
+ if (!m->homes_by_uid)
+ return -ENOMEM;
+
+ m->homes_by_name = hashmap_new(&homes_by_name_hash_ops);
+ if (!m->homes_by_name)
+ return -ENOMEM;
+
+ m->homes_by_worker_pid = hashmap_new(&homes_by_worker_pid_hash_ops);
+ if (!m->homes_by_worker_pid)
+ return -ENOMEM;
+
+ m->homes_by_sysfs = hashmap_new(&homes_by_sysfs_hash_ops);
+ if (!m->homes_by_sysfs)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(m);
+ return 0;
+}
+
+Manager* manager_free(Manager *m) {
+ Home *h;
+
+ assert(m);
+
+ HASHMAP_FOREACH(h, m->homes_by_worker_pid)
+ (void) home_wait_for_worker(h);
+
+ hashmap_free(m->homes_by_uid);
+ hashmap_free(m->homes_by_name);
+ hashmap_free(m->homes_by_worker_pid);
+ hashmap_free(m->homes_by_sysfs);
+
+ m->inotify_event_source = sd_event_source_unref(m->inotify_event_source);
+
+ bus_verify_polkit_async_registry_free(m->polkit_registry);
+
+ sd_bus_flush_close_unref(m->bus);
+ sd_event_unref(m->event);
+
+ m->notify_socket_event_source = sd_event_source_unref(m->notify_socket_event_source);
+ m->device_monitor = sd_device_monitor_unref(m->device_monitor);
+
+ m->deferred_rescan_event_source = sd_event_source_unref(m->deferred_rescan_event_source);
+ m->deferred_gc_event_source = sd_event_source_unref(m->deferred_gc_event_source);
+ m->deferred_auto_login_event_source = sd_event_source_unref(m->deferred_auto_login_event_source);
+
+ if (m->private_key)
+ EVP_PKEY_free(m->private_key);
+
+ hashmap_free(m->public_keys);
+
+ varlink_server_unref(m->varlink_server);
+ free(m->userdb_service);
+
+ free(m->default_file_system_type);
+
+ return mfree(m);
+}
+
+int manager_verify_user_record(Manager *m, UserRecord *hr) {
+ EVP_PKEY *pkey;
+ int r;
+
+ assert(m);
+ assert(hr);
+
+ if (!m->private_key && hashmap_isempty(m->public_keys)) {
+ r = user_record_has_signature(hr);
+ if (r < 0)
+ return r;
+
+ return r ? -ENOKEY : USER_RECORD_UNSIGNED;
+ }
+
+ /* Is it our own? */
+ if (m->private_key) {
+ r = user_record_verify(hr, m->private_key);
+ switch (r) {
+
+ case USER_RECORD_FOREIGN:
+ /* This record is not signed by this key, but let's see below */
+ break;
+
+ case USER_RECORD_SIGNED: /* Signed by us, but also by others, let's propagate that */
+ case USER_RECORD_SIGNED_EXCLUSIVE: /* Signed by us, and nothing else, ditto */
+ case USER_RECORD_UNSIGNED: /* Not signed at all, ditto */
+ default:
+ return r;
+ }
+ }
+
+ HASHMAP_FOREACH(pkey, m->public_keys) {
+ r = user_record_verify(hr, pkey);
+ switch (r) {
+
+ case USER_RECORD_FOREIGN:
+ /* This record is not signed by this key, but let's see our other keys */
+ break;
+
+ case USER_RECORD_SIGNED: /* It's signed by this key we are happy with, but which is not our own. */
+ case USER_RECORD_SIGNED_EXCLUSIVE:
+ return USER_RECORD_FOREIGN;
+
+ case USER_RECORD_UNSIGNED: /* It's not signed at all */
+ default:
+ return r;
+ }
+ }
+
+ return -ENOKEY;
+}
+
+static int manager_add_home_by_record(
+ Manager *m,
+ const char *name,
+ int dir_fd,
+ const char *fname) {
+
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ unsigned line, column;
+ int r, is_signed;
+ struct stat st;
+ Home *h;
+
+ assert(m);
+ assert(name);
+ assert(fname);
+
+ if (fstatat(dir_fd, fname, &st, 0) < 0)
+ return log_error_errno(errno, "Failed to stat identity record %s: %m", fname);
+
+ if (!S_ISREG(st.st_mode)) {
+ log_debug("Identity record file %s is not a regular file, ignoring.", fname);
+ return 0;
+ }
+
+ if (st.st_size == 0)
+ goto unlink_this_file;
+
+ r = json_parse_file_at(NULL, dir_fd, fname, JSON_PARSE_SENSITIVE, &v, &line, &column);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse identity record at %s:%u%u: %m", fname, line, column);
+
+ if (json_variant_is_blank_object(v))
+ goto unlink_this_file;
+
+ hr = user_record_new();
+ if (!hr)
+ return log_oom();
+
+ r = user_record_load(hr, v, USER_RECORD_LOAD_REFUSE_SECRET|USER_RECORD_LOG);
+ if (r < 0)
+ return r;
+
+ if (!streq_ptr(hr->user_name, name))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Identity's user name %s does not match file name %s, refusing.", hr->user_name, name);
+
+ is_signed = manager_verify_user_record(m, hr);
+ switch (is_signed) {
+
+ case -ENOKEY:
+ return log_warning_errno(is_signed, "User record %s is not signed by any accepted key, ignoring.", fname);
+ case USER_RECORD_UNSIGNED:
+ return log_warning_errno(SYNTHETIC_ERRNO(EPERM), "User record %s is not signed at all, ignoring.", fname);
+ case USER_RECORD_SIGNED:
+ log_info("User record %s is signed by us (and others), accepting.", fname);
+ break;
+ case USER_RECORD_SIGNED_EXCLUSIVE:
+ log_info("User record %s is signed only by us, accepting.", fname);
+ break;
+ case USER_RECORD_FOREIGN:
+ log_info("User record %s is signed by registered key from others, accepting.", fname);
+ break;
+ default:
+ assert(is_signed < 0);
+ return log_error_errno(is_signed, "Failed to verify signature of user record in %s: %m", fname);
+ }
+
+ h = hashmap_get(m->homes_by_name, name);
+ if (h) {
+ r = home_set_record(h, hr);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update home record for %s: %m", name);
+
+ /* If we acquired a record now for a previously unallocated entry, then reset the state. This
+ * makes sure home_get_state() will check for the availability of the image file dynamically
+ * in order to detect to distinguish HOME_INACTIVE and HOME_ABSENT. */
+ if (h->state == HOME_UNFIXATED)
+ h->state = _HOME_STATE_INVALID;
+ } else {
+ r = home_new(m, hr, NULL, &h);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate new home object: %m");
+
+ log_info("Added registered home for user %s.", hr->user_name);
+ }
+
+ /* Only entries we exclusively signed are writable to us, hence remember the result */
+ h->signed_locally = is_signed == USER_RECORD_SIGNED_EXCLUSIVE;
+
+ return 1;
+
+unlink_this_file:
+ /* If this is an empty file, then let's just remove it. An empty file is not useful in any case, and
+ * apparently xfs likes to leave empty files around when not unmounted cleanly (see
+ * https://github.com/systemd/systemd/issues/15178 for example). Note that we don't delete non-empty
+ * files even if they are invalid, because that's just too risky, we might delete data the user still
+ * needs. But empty files are never useful, hence let's just remove them. */
+
+ if (unlinkat(dir_fd, fname, 0) < 0)
+ return log_error_errno(errno, "Failed to remove empty user record file %s: %m", fname);
+
+ log_notice("Discovered empty user record file /var/lib/systemd/home/%s, removed automatically.", fname);
+ return 0;
+}
+
+static int manager_enumerate_records(Manager *m) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ assert(m);
+
+ d = opendir("/var/lib/systemd/home/");
+ if (!d)
+ return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to open /var/lib/systemd/home/: %m");
+
+ FOREACH_DIRENT(de, d, return log_error_errno(errno, "Failed to read record directory: %m")) {
+ _cleanup_free_ char *n = NULL;
+ const char *e;
+
+ if (!dirent_is_file(de))
+ continue;
+
+ e = endswith(de->d_name, ".identity");
+ if (!e)
+ continue;
+
+ n = strndup(de->d_name, e - de->d_name);
+ if (!n)
+ return log_oom();
+
+ if (!suitable_user_name(n))
+ continue;
+
+ (void) manager_add_home_by_record(m, n, dirfd(d), de->d_name);
+ }
+
+ return 0;
+}
+
+static int search_quota(uid_t uid, const char *exclude_quota_path) {
+ struct stat exclude_st = {};
+ dev_t previous_devno = 0;
+ const char *where;
+ int r;
+
+ /* Checks whether the specified UID owns any files on the files system, but ignore any file system
+ * backing the specified file. The file is used when operating on home directories, where it's OK if
+ * the UID of them already owns files. */
+
+ if (exclude_quota_path && stat(exclude_quota_path, &exclude_st) < 0) {
+ if (errno != ENOENT)
+ return log_warning_errno(errno, "Failed to stat %s, ignoring: %m", exclude_quota_path);
+ }
+
+ /* Check a few usual suspects where regular users might own files. Note that this is by no means
+ * comprehensive, but should cover most cases. Note that in an ideal world every user would be
+ * registered in NSS and avoid our own UID range, but for all other cases, it's a good idea to be
+ * paranoid and check quota if we can. */
+ FOREACH_STRING(where, "/home/", "/tmp/", "/var/", "/var/mail/", "/var/tmp/", "/var/spool/") {
+ struct dqblk req;
+ struct stat st;
+
+ if (stat(where, &st) < 0) {
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to stat %s, ignoring: %m", where);
+ continue;
+ }
+
+ if (major(st.st_dev) == 0) {
+ log_debug("Directory %s is not on a real block device, not checking quota for UID use.", where);
+ continue;
+ }
+
+ if (st.st_dev == exclude_st.st_dev) { /* If an exclude path is specified, then ignore quota
+ * reported on the same block device as that path. */
+ log_debug("Directory %s is where the home directory is located, not checking quota for UID use.", where);
+ continue;
+ }
+
+ if (st.st_dev == previous_devno) { /* Does this directory have the same devno as the previous
+ * one we tested? If so, there's no point in testing this
+ * again. */
+ log_debug("Directory %s is on same device as previous tested directory, not checking quota for UID use a second time.", where);
+ continue;
+ }
+
+ previous_devno = st.st_dev;
+
+ r = quotactl_devno(QCMD_FIXED(Q_GETQUOTA, USRQUOTA), st.st_dev, uid, &req);
+ if (r < 0) {
+ if (ERRNO_IS_NOT_SUPPORTED(r))
+ log_debug_errno(r, "No UID quota support on %s, ignoring.", where);
+ else if (ERRNO_IS_PRIVILEGE(r))
+ log_debug_errno(r, "UID quota support for %s prohibited, ignoring.", where);
+ else
+ log_warning_errno(r, "Failed to query quota on %s, ignoring: %m", where);
+
+ continue;
+ }
+
+ if ((FLAGS_SET(req.dqb_valid, QIF_SPACE) && req.dqb_curspace > 0) ||
+ (FLAGS_SET(req.dqb_valid, QIF_INODES) && req.dqb_curinodes > 0)) {
+ log_debug_errno(errno, "Quota reports UID " UID_FMT " occupies disk space on %s.", uid, where);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int manager_acquire_uid(
+ Manager *m,
+ uid_t start_uid,
+ const char *user_name,
+ const char *exclude_quota_path,
+ uid_t *ret) {
+
+ static const uint8_t hash_key[] = {
+ 0xa3, 0xb8, 0x82, 0x69, 0x9a, 0x71, 0xf7, 0xa9,
+ 0xe0, 0x7c, 0xf6, 0xf1, 0x21, 0x69, 0xd2, 0x1e
+ };
+
+ enum {
+ PHASE_SUGGESTED,
+ PHASE_HASHED,
+ PHASE_RANDOM
+ } phase = PHASE_SUGGESTED;
+
+ unsigned n_tries = 100;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ for (;;) {
+ struct passwd *pw;
+ struct group *gr;
+ uid_t candidate;
+ Home *other;
+
+ if (--n_tries <= 0)
+ return -EBUSY;
+
+ switch (phase) {
+
+ case PHASE_SUGGESTED:
+ phase = PHASE_HASHED;
+
+ if (!uid_is_home(start_uid))
+ continue;
+
+ candidate = start_uid;
+ break;
+
+ case PHASE_HASHED:
+ phase = PHASE_RANDOM;
+
+ if (!user_name)
+ continue;
+
+ candidate = UID_CLAMP_INTO_HOME_RANGE(siphash24(user_name, strlen(user_name), hash_key));
+ break;
+
+ case PHASE_RANDOM:
+ random_bytes(&candidate, sizeof(candidate));
+ candidate = UID_CLAMP_INTO_HOME_RANGE(candidate);
+ break;
+
+ default:
+ assert_not_reached("unknown phase");
+ }
+
+ other = hashmap_get(m->homes_by_uid, UID_TO_PTR(candidate));
+ if (other) {
+ log_debug("Candidate UID " UID_FMT " already used by another home directory (%s), let's try another.", candidate, other->user_name);
+ continue;
+ }
+
+ pw = getpwuid(candidate);
+ if (pw) {
+ log_debug("Candidate UID " UID_FMT " already registered by another user in NSS (%s), let's try another.", candidate, pw->pw_name);
+ continue;
+ }
+
+ gr = getgrgid((gid_t) candidate);
+ if (gr) {
+ log_debug("Candidate UID " UID_FMT " already registered by another group in NSS (%s), let's try another.", candidate, gr->gr_name);
+ continue;
+ }
+
+ r = search_ipc(candidate, (gid_t) candidate);
+ if (r < 0)
+ continue;
+ if (r > 0) {
+ log_debug_errno(r, "Candidate UID " UID_FMT " already owns IPC objects, let's try another: %m", candidate);
+ continue;
+ }
+
+ r = search_quota(candidate, exclude_quota_path);
+ if (r != 0)
+ continue;
+
+ *ret = candidate;
+ return 0;
+ }
+}
+
+static int manager_add_home_by_image(
+ Manager *m,
+ const char *user_name,
+ const char *realm,
+ const char *image_path,
+ const char *sysfs,
+ UserStorage storage,
+ uid_t start_uid) {
+
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ uid_t uid;
+ Home *h;
+ int r;
+
+ assert(m);
+
+ assert(m);
+ assert(user_name);
+ assert(image_path);
+ assert(storage >= 0);
+ assert(storage < _USER_STORAGE_MAX);
+
+ h = hashmap_get(m->homes_by_name, user_name);
+ if (h) {
+ bool same;
+
+ if (h->state != HOME_UNFIXATED) {
+ log_debug("Found an image for user %s which already has a record, skipping.", user_name);
+ return 0; /* ignore images that synthesize a user we already have a record for */
+ }
+
+ same = user_record_storage(h->record) == storage;
+ if (same) {
+ if (h->sysfs && sysfs)
+ same = path_equal(h->sysfs, sysfs);
+ else if (!!h->sysfs != !!sysfs)
+ same = false;
+ else {
+ const char *p;
+
+ p = user_record_image_path(h->record);
+ same = p && path_equal(p, image_path);
+ }
+ }
+
+ if (!same) {
+ log_debug("Found multiple images for user '%s', ignoring image '%s'.", user_name, image_path);
+ return 0;
+ }
+ } else {
+ /* Check NSS, in case there's another user or group by this name */
+ if (getpwnam(user_name) || getgrnam(user_name)) {
+ log_debug("Found an existing user or group by name '%s', ignoring image '%s'.", user_name, image_path);
+ return 0;
+ }
+ }
+
+ if (h && uid_is_valid(h->uid))
+ uid = h->uid;
+ else {
+ r = manager_acquire_uid(m, start_uid, user_name, IN_SET(storage, USER_SUBVOLUME, USER_DIRECTORY, USER_FSCRYPT) ? image_path : NULL, &uid);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to acquire unused UID for %s: %m", user_name);
+ }
+
+ hr = user_record_new();
+ if (!hr)
+ return log_oom();
+
+ r = user_record_synthesize(hr, user_name, realm, image_path, storage, uid, (gid_t) uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to synthesize home record for %s (image %s): %m", user_name, image_path);
+
+ if (h) {
+ r = home_set_record(h, hr);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update home record for %s: %m", user_name);
+ } else {
+ r = home_new(m, hr, sysfs, &h);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate new home object: %m");
+
+ h->state = HOME_UNFIXATED;
+
+ log_info("Discovered new home for user %s through image %s.", user_name, image_path);
+ }
+
+ return 1;
+}
+
+int manager_augment_record_with_uid(
+ Manager *m,
+ UserRecord *hr) {
+
+ const char *exclude_quota_path = NULL;
+ uid_t start_uid = UID_INVALID, uid;
+ int r;
+
+ assert(m);
+ assert(hr);
+
+ if (uid_is_valid(hr->uid))
+ return 0;
+
+ if (IN_SET(hr->storage, USER_CLASSIC, USER_SUBVOLUME, USER_DIRECTORY, USER_FSCRYPT)) {
+ const char * ip;
+
+ ip = user_record_image_path(hr);
+ if (ip) {
+ struct stat st;
+
+ if (stat(ip, &st) < 0) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to stat(%s): %m", ip);
+ } else if (uid_is_home(st.st_uid)) {
+ start_uid = st.st_uid;
+ exclude_quota_path = ip;
+ }
+ }
+ }
+
+ r = manager_acquire_uid(m, start_uid, hr->user_name, exclude_quota_path, &uid);
+ if (r < 0)
+ return r;
+
+ log_debug("Acquired new UID " UID_FMT " for %s.", uid, hr->user_name);
+
+ r = user_record_add_binding(
+ hr,
+ _USER_STORAGE_INVALID,
+ NULL,
+ SD_ID128_NULL,
+ SD_ID128_NULL,
+ SD_ID128_NULL,
+ NULL,
+ NULL,
+ UINT64_MAX,
+ NULL,
+ NULL,
+ uid,
+ (gid_t) uid);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int manager_assess_image(
+ Manager *m,
+ int dir_fd,
+ const char *dir_path,
+ const char *dentry_name) {
+
+ char *luks_suffix, *directory_suffix;
+ _cleanup_free_ char *path = NULL;
+ struct stat st;
+ int r;
+
+ assert(m);
+ assert(dir_path);
+ assert(dentry_name);
+
+ luks_suffix = endswith(dentry_name, ".home");
+ if (luks_suffix)
+ directory_suffix = NULL;
+ else
+ directory_suffix = endswith(dentry_name, ".homedir");
+
+ /* Early filter out: by name */
+ if (!luks_suffix && !directory_suffix)
+ return 0;
+
+ path = path_join(dir_path, dentry_name);
+ if (!path)
+ return log_oom();
+
+ /* Follow symlinks here, to allow people to link in stuff to make them available locally. */
+ if (dir_fd >= 0)
+ r = fstatat(dir_fd, dentry_name, &st, 0);
+ else
+ r = stat(path, &st);
+ if (r < 0)
+ return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to stat() directory entry '%s', ignoring: %m", dentry_name);
+
+ if (S_ISREG(st.st_mode)) {
+ _cleanup_free_ char *n = NULL, *user_name = NULL, *realm = NULL;
+
+ if (!luks_suffix)
+ return 0;
+
+ n = strndup(dentry_name, luks_suffix - dentry_name);
+ if (!n)
+ return log_oom();
+
+ r = split_user_name_realm(n, &user_name, &realm);
+ if (r == -EINVAL) /* Not the right format: ignore */
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to split image name into user name/realm: %m");
+
+ return manager_add_home_by_image(m, user_name, realm, path, NULL, USER_LUKS, UID_INVALID);
+ }
+
+ if (S_ISDIR(st.st_mode)) {
+ _cleanup_free_ char *n = NULL, *user_name = NULL, *realm = NULL;
+ _cleanup_close_ int fd = -1;
+ UserStorage storage;
+
+ if (!directory_suffix)
+ return 0;
+
+ n = strndup(dentry_name, directory_suffix - dentry_name);
+ if (!n)
+ return log_oom();
+
+ r = split_user_name_realm(n, &user_name, &realm);
+ if (r == -EINVAL) /* Not the right format: ignore */
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to split image name into user name/realm: %m");
+
+ if (dir_fd >= 0)
+ fd = openat(dir_fd, dentry_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+ else
+ fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to open directory '%s', ignoring: %m", path);
+
+ if (fstat(fd, &st) < 0)
+ return log_warning_errno(errno, "Failed to fstat() %s, ignoring: %m", path);
+
+ assert(S_ISDIR(st.st_mode)); /* Must hold, we used O_DIRECTORY above */
+
+ r = btrfs_is_subvol_fd(fd);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to determine whether %s is a btrfs subvolume: %m", path);
+ if (r > 0)
+ storage = USER_SUBVOLUME;
+ else {
+ struct fscrypt_policy policy;
+
+ if (ioctl(fd, FS_IOC_GET_ENCRYPTION_POLICY, &policy) < 0) {
+
+ if (errno == ENODATA)
+ log_debug_errno(errno, "Determined %s is not fscrypt encrypted.", path);
+ else if (ERRNO_IS_NOT_SUPPORTED(errno))
+ log_debug_errno(errno, "Determined %s is not fscrypt encrypted because kernel or file system doesn't support it.", path);
+ else
+ log_debug_errno(errno, "FS_IOC_GET_ENCRYPTION_POLICY failed with unexpected error code on %s, ignoring: %m", path);
+
+ storage = USER_DIRECTORY;
+ } else
+ storage = USER_FSCRYPT;
+ }
+
+ return manager_add_home_by_image(m, user_name, realm, path, NULL, storage, st.st_uid);
+ }
+
+ return 0;
+}
+
+int manager_enumerate_images(Manager *m) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ assert(m);
+
+ if (!m->scan_slash_home)
+ return 0;
+
+ d = opendir("/home/");
+ if (!d)
+ return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to open /home/: %m");
+
+ FOREACH_DIRENT(de, d, return log_error_errno(errno, "Failed to read /home/ directory: %m"))
+ (void) manager_assess_image(m, dirfd(d), "/home", de->d_name);
+
+ return 0;
+}
+
+static int manager_connect_bus(Manager *m) {
+ const char *suffix, *busname;
+ int r;
+
+ assert(m);
+ assert(!m->bus);
+
+ r = sd_bus_default_system(&m->bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to system bus: %m");
+
+ r = bus_add_implementation(m->bus, &manager_object, m);
+ if (r < 0)
+ return r;
+
+ suffix = getenv("SYSTEMD_HOME_DEBUG_SUFFIX");
+ if (suffix)
+ busname = strjoina("org.freedesktop.home1.", suffix);
+ else
+ busname = "org.freedesktop.home1";
+
+ r = sd_bus_request_name_async(m->bus, NULL, busname, 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(m->bus, m->event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ (void) sd_bus_set_exit_on_disconnect(m->bus, true);
+
+ return 0;
+}
+
+static int manager_bind_varlink(Manager *m) {
+ const char *suffix, *socket_path;
+ int r;
+
+ assert(m);
+ assert(!m->varlink_server);
+
+ r = varlink_server_new(&m->varlink_server, VARLINK_SERVER_ACCOUNT_UID);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate varlink server object: %m");
+
+ varlink_server_set_userdata(m->varlink_server, m);
+
+ r = varlink_server_bind_method_many(
+ m->varlink_server,
+ "io.systemd.UserDatabase.GetUserRecord", vl_method_get_user_record,
+ "io.systemd.UserDatabase.GetGroupRecord", vl_method_get_group_record,
+ "io.systemd.UserDatabase.GetMemberships", vl_method_get_memberships);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register varlink methods: %m");
+
+ (void) mkdir_p("/run/systemd/userdb", 0755);
+
+ /* To make things easier to debug, when working from a homed managed home directory, let's optionally
+ * use a different varlink socket name */
+ suffix = getenv("SYSTEMD_HOME_DEBUG_SUFFIX");
+ if (suffix)
+ socket_path = strjoina("/run/systemd/userdb/io.systemd.Home.", suffix);
+ else
+ socket_path = "/run/systemd/userdb/io.systemd.Home";
+
+ r = varlink_server_listen_address(m->varlink_server, socket_path, 0666);
+ if (r < 0)
+ return log_error_errno(r, "Failed to bind to varlink socket: %m");
+
+ r = varlink_server_attach_event(m->varlink_server, m->event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach varlink connection to event loop: %m");
+
+ assert(!m->userdb_service);
+ m->userdb_service = strdup(basename(socket_path));
+ if (!m->userdb_service)
+ return log_oom();
+
+ /* Avoid recursion */
+ if (setenv("SYSTEMD_BYPASS_USERDB", m->userdb_service, 1) < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to set $SYSTEMD_BYPASS_USERDB: %m");
+
+ return 0;
+}
+
+static ssize_t read_datagram(int fd, struct ucred *ret_sender, void **ret) {
+ _cleanup_free_ void *buffer = NULL;
+ ssize_t n, m;
+
+ assert(fd >= 0);
+ assert(ret_sender);
+ assert(ret);
+
+ n = next_datagram_size_fd(fd);
+ if (n < 0)
+ return n;
+
+ buffer = malloc(n + 2);
+ if (!buffer)
+ return -ENOMEM;
+
+ if (ret_sender) {
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control;
+ bool found_ucred = false;
+ struct cmsghdr *cmsg;
+ struct msghdr mh;
+ struct iovec iov;
+
+ /* Pass one extra byte, as a size check */
+ iov = IOVEC_MAKE(buffer, n + 1);
+
+ mh = (struct msghdr) {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+
+ m = recvmsg_safe(fd, &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ if (m < 0)
+ return m;
+
+ cmsg_close_all(&mh);
+
+ /* Ensure the size matches what we determined before */
+ if (m != n)
+ return -EMSGSIZE;
+
+ CMSG_FOREACH(cmsg, &mh)
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) {
+
+ memcpy(ret_sender, CMSG_DATA(cmsg), sizeof(struct ucred));
+ found_ucred = true;
+ }
+
+ if (!found_ucred)
+ *ret_sender = (struct ucred) {
+ .pid = 0,
+ .uid = UID_INVALID,
+ .gid = GID_INVALID,
+ };
+ } else {
+ m = recv(fd, buffer, n + 1, MSG_DONTWAIT);
+ if (m < 0)
+ return -errno;
+
+ /* Ensure the size matches what we determined before */
+ if (m != n)
+ return -EMSGSIZE;
+ }
+
+ /* For safety reasons: let's always NUL terminate. */
+ ((char*) buffer)[n] = 0;
+ *ret = TAKE_PTR(buffer);
+
+ return 0;
+}
+
+static int on_notify_socket(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_strv_free_ char **l = NULL;
+ _cleanup_free_ void *datagram = NULL;
+ struct ucred sender;
+ Manager *m = userdata;
+ ssize_t n;
+ Home *h;
+
+ assert(s);
+ assert(m);
+
+ n = read_datagram(fd, &sender, &datagram);
+ if (IN_SET(n, -EAGAIN, -EINTR))
+ return 0;
+ if (n < 0)
+ return log_error_errno(n, "Failed to read notify datagram: %m");
+
+ if (sender.pid <= 0) {
+ log_warning("Received notify datagram without valid sender PID, ignoring.");
+ return 0;
+ }
+
+ h = hashmap_get(m->homes_by_worker_pid, PID_TO_PTR(sender.pid));
+ if (!h) {
+ log_warning("Received notify datagram of unknown process, ignoring.");
+ return 0;
+ }
+
+ l = strv_split(datagram, "\n");
+ if (!l)
+ return log_oom();
+
+ home_process_notify(h, l);
+ return 0;
+}
+
+static int manager_listen_notify(Manager *m) {
+ _cleanup_close_ int fd = -1;
+ union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/home/notify",
+ };
+ const char *suffix;
+ int r;
+
+ assert(m);
+ assert(!m->notify_socket_event_source);
+
+ suffix = getenv("SYSTEMD_HOME_DEBUG_SUFFIX");
+ if (suffix) {
+ const char *unix_path;
+
+ unix_path = strjoina("/run/systemd/home/notify.", suffix);
+ r = sockaddr_un_set_path(&sa.un, unix_path);
+ if (r < 0)
+ return log_error_errno(r, "Socket path %s does not fit in sockaddr_un: %m", unix_path);
+ }
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to create listening socket: %m");
+
+ (void) mkdir_parents(sa.un.sun_path, 0755);
+ (void) sockaddr_un_unlink(&sa.un);
+
+ if (bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0)
+ return log_error_errno(errno, "Failed to bind to socket: %m");
+
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_io(m->event, &m->notify_socket_event_source, fd, EPOLLIN, on_notify_socket, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event source for notify socket: %m");
+
+ (void) sd_event_source_set_description(m->notify_socket_event_source, "notify-socket");
+
+ /* Make sure we process sd_notify() before SIGCHLD for any worker, so that we always know the error
+ * number of a client before it exits. */
+ r = sd_event_source_set_priority(m->notify_socket_event_source, SD_EVENT_PRIORITY_NORMAL - 5);
+ if (r < 0)
+ return log_error_errno(r, "Failed to alter priority of NOTIFY_SOCKET event source: %m");
+
+ r = sd_event_source_set_io_fd_own(m->notify_socket_event_source, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to pass ownership of notify socket: %m");
+
+ return TAKE_FD(fd);
+}
+
+static int manager_add_device(Manager *m, sd_device *d) {
+ _cleanup_free_ char *user_name = NULL, *realm = NULL, *node = NULL;
+ const char *tabletype, *parttype, *partname, *partuuid, *sysfs;
+ sd_id128_t id;
+ int r;
+
+ assert(m);
+ assert(d);
+
+ r = sd_device_get_syspath(d, &sysfs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire sysfs path of device: %m");
+
+ r = sd_device_get_property_value(d, "ID_PART_TABLE_TYPE", &tabletype);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire ID_PART_TABLE_TYPE device property, ignoring: %m");
+
+ if (!streq(tabletype, "gpt")) {
+ log_debug("Found partition (%s) on non-GPT table, ignoring.", sysfs);
+ return 0;
+ }
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_TYPE", &parttype);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire ID_PART_ENTRY_TYPE device property, ignoring: %m");
+ r = sd_id128_from_string(parttype, &id);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse ID_PART_ENTRY_TYPE field '%s', ignoring: %m", parttype);
+ if (!sd_id128_equal(id, GPT_USER_HOME)) {
+ log_debug("Found partition (%s) we don't care about, ignoring.", sysfs);
+ return 0;
+ }
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_NAME", &partname);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to acquire ID_PART_ENTRY_NAME device property, ignoring: %m");
+
+ r = split_user_name_realm(partname, &user_name, &realm);
+ if (r == -EINVAL)
+ return log_warning_errno(r, "Found partition with correct partition type but a non-parsable partition name '%s', ignoring.", partname);
+ if (r < 0)
+ return log_error_errno(r, "Failed to validate partition name '%s': %m", partname);
+
+ r = sd_device_get_property_value(d, "ID_FS_UUID", &partuuid);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to acquire ID_FS_UUID device property, ignoring: %m");
+
+ r = sd_id128_from_string(partuuid, &id);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse ID_FS_UUID field '%s', ignoring: %m", partuuid);
+
+ if (asprintf(&node, "/dev/disk/by-uuid/" SD_ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(id)) < 0)
+ return log_oom();
+
+ return manager_add_home_by_image(m, user_name, realm, node, sysfs, USER_LUKS, UID_INVALID);
+}
+
+static int manager_on_device(sd_device_monitor *monitor, sd_device *d, void *userdata) {
+ Manager *m = userdata;
+ int r;
+
+ assert(m);
+ assert(d);
+
+ if (device_for_action(d, DEVICE_ACTION_REMOVE)) {
+ const char *sysfs;
+ Home *h;
+
+ r = sd_device_get_syspath(d, &sysfs);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to acquire sysfs path from device: %m");
+ return 0;
+ }
+
+ log_info("block device %s has been removed.", sysfs);
+
+ /* Let's see if we previously synthesized a home record from this device, if so, let's just
+ * revalidate that. Otherwise let's revalidate them all, but asynchronously. */
+ h = hashmap_get(m->homes_by_sysfs, sysfs);
+ if (h)
+ manager_revalidate_image(m, h);
+ else
+ manager_enqueue_gc(m, NULL);
+ } else
+ (void) manager_add_device(m, d);
+
+ (void) bus_manager_emit_auto_login_changed(m);
+ return 0;
+}
+
+static int manager_watch_devices(Manager *m) {
+ int r;
+
+ assert(m);
+ assert(!m->device_monitor);
+
+ r = sd_device_monitor_new(&m->device_monitor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate device monitor: %m");
+
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(m->device_monitor, "block", NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to configure device monitor match: %m");
+
+ r = sd_device_monitor_attach_event(m->device_monitor, m->event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach device monitor to event loop: %m");
+
+ r = sd_device_monitor_start(m->device_monitor, manager_on_device, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start device monitor: %m");
+
+ return 0;
+}
+
+static int manager_enumerate_devices(Manager *m) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
+ int r;
+
+ assert(m);
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_subsystem(e, "block", true);
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, d)
+ (void) manager_add_device(m, d);
+
+ return 0;
+}
+
+static int manager_load_key_pair(Manager *m) {
+ _cleanup_(fclosep) FILE *f = NULL;
+ struct stat st;
+ int r;
+
+ assert(m);
+
+ if (m->private_key) {
+ EVP_PKEY_free(m->private_key);
+ m->private_key = NULL;
+ }
+
+ r = search_and_fopen_nulstr("local.private", "re", NULL, KEY_PATHS_NULSTR, &f);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to read private key file: %m");
+
+ if (fstat(fileno(f), &st) < 0)
+ return log_error_errno(errno, "Failed to stat private key file: %m");
+
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ return log_error_errno(r, "Private key file is not regular: %m");
+
+ if (st.st_uid != 0 || (st.st_mode & 0077) != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Private key file is readable by more than the root user");
+
+ m->private_key = PEM_read_PrivateKey(f, NULL, NULL, NULL);
+ if (!m->private_key)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to load private key pair");
+
+ log_info("Successfully loaded private key pair.");
+
+ return 1;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(EVP_PKEY_CTX*, EVP_PKEY_CTX_free);
+
+static int manager_generate_key_pair(Manager *m) {
+ _cleanup_(EVP_PKEY_CTX_freep) EVP_PKEY_CTX *ctx = NULL;
+ _cleanup_(unlink_and_freep) char *temp_public = NULL, *temp_private = NULL;
+ _cleanup_fclose_ FILE *fpublic = NULL, *fprivate = NULL;
+ int r;
+
+ if (m->private_key) {
+ EVP_PKEY_free(m->private_key);
+ m->private_key = NULL;
+ }
+
+ ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_ED25519, NULL);
+ if (!ctx)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to allocate Ed25519 key generation context.");
+
+ if (EVP_PKEY_keygen_init(ctx) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to initialize Ed25519 key generation context.");
+
+ log_info("Generating key pair for signing local user identity records.");
+
+ if (EVP_PKEY_keygen(ctx, &m->private_key) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to generate Ed25519 key pair");
+
+ log_info("Successfully created Ed25519 key pair.");
+
+ (void) mkdir_p("/var/lib/systemd/home", 0755);
+
+ /* Write out public key (note that we only do that as a help to the user, we don't make use of this ever */
+ r = fopen_temporary("/var/lib/systemd/home/local.public", &fpublic, &temp_public);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to open key file for writing: %m");
+
+ if (PEM_write_PUBKEY(fpublic, m->private_key) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to write public key.");
+
+ r = fflush_sync_and_check(fpublic);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write private key: %m");
+
+ fpublic = safe_fclose(fpublic);
+
+ /* Write out the private key (this actually writes out both private and public, OpenSSL is confusing) */
+ r = fopen_temporary("/var/lib/systemd/home/local.private", &fprivate, &temp_private);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to open key file for writing: %m");
+
+ if (PEM_write_PrivateKey(fprivate, m->private_key, NULL, NULL, 0, NULL, 0) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to write private key pair.");
+
+ r = fflush_sync_and_check(fprivate);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write private key: %m");
+
+ fprivate = safe_fclose(fprivate);
+
+ /* Both are written now, move them into place */
+
+ if (rename(temp_public, "/var/lib/systemd/home/local.public") < 0)
+ return log_error_errno(errno, "Failed to move public key file into place: %m");
+ temp_public = mfree(temp_public);
+
+ if (rename(temp_private, "/var/lib/systemd/home/local.private") < 0) {
+ (void) unlink_noerrno("/var/lib/systemd/home/local.public"); /* try to remove the file we already created */
+ return log_error_errno(errno, "Failed to move private key file into place: %m");
+ }
+ temp_private = mfree(temp_private);
+
+ r = fsync_path_at(AT_FDCWD, "/var/lib/systemd/home/");
+ if (r < 0)
+ log_warning_errno(r, "Failed to sync /var/lib/systemd/home/, ignoring: %m");
+
+ return 1;
+}
+
+int manager_acquire_key_pair(Manager *m) {
+ int r;
+
+ assert(m);
+
+ /* Already there? */
+ if (m->private_key)
+ return 1;
+
+ /* First try to load key off disk */
+ r = manager_load_key_pair(m);
+ if (r != 0)
+ return r;
+
+ /* Didn't work, generate a new one */
+ return manager_generate_key_pair(m);
+}
+
+int manager_sign_user_record(Manager *m, UserRecord *u, UserRecord **ret, sd_bus_error *error) {
+ int r;
+
+ assert(m);
+ assert(u);
+ assert(ret);
+
+ r = manager_acquire_key_pair(m);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_PRIVATE_KEY, "Can't sign without local key.");
+
+ return user_record_sign(u, m->private_key, ret);
+}
+
+DEFINE_PRIVATE_HASH_OPS_FULL(public_key_hash_ops, char, string_hash_func, string_compare_func, free, EVP_PKEY, EVP_PKEY_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(EVP_PKEY*, EVP_PKEY_free);
+
+static int manager_load_public_key_one(Manager *m, const char *path) {
+ _cleanup_(EVP_PKEY_freep) EVP_PKEY *pkey = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *fn = NULL;
+ struct stat st;
+ int r;
+
+ assert(m);
+
+ if (streq(basename(path), "local.public")) /* we already loaded the private key, which includes the public one */
+ return 0;
+
+ f = fopen(path, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open public key %s: %m", path);
+ }
+
+ if (fstat(fileno(f), &st) < 0)
+ return log_error_errno(errno, "Failed to stat public key %s: %m", path);
+
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ return log_error_errno(r, "Public key file %s is not a regular file: %m", path);
+
+ if (st.st_uid != 0 || (st.st_mode & 0022) != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Public key file %s is writable by more than the root user, refusing.", path);
+
+ r = hashmap_ensure_allocated(&m->public_keys, &public_key_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ pkey = PEM_read_PUBKEY(f, &pkey, NULL, NULL);
+ if (!pkey)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to parse public key file %s.", path);
+
+ fn = strdup(basename(path));
+ if (!fn)
+ return log_oom();
+
+ r = hashmap_put(m->public_keys, fn, pkey);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add public key to set: %m");
+
+ TAKE_PTR(fn);
+ TAKE_PTR(pkey);
+
+ return 0;
+}
+
+static int manager_load_public_keys(Manager *m) {
+ _cleanup_strv_free_ char **files = NULL;
+ char **i;
+ int r;
+
+ assert(m);
+
+ m->public_keys = hashmap_free(m->public_keys);
+
+ r = conf_files_list_nulstr(
+ &files,
+ ".public",
+ NULL,
+ CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED,
+ KEY_PATHS_NULSTR);
+ if (r < 0)
+ return log_error_errno(r, "Failed to assemble list of public key directories: %m");
+
+ STRV_FOREACH(i, files)
+ (void) manager_load_public_key_one(m, *i);
+
+ return 0;
+}
+
+int manager_startup(Manager *m) {
+ int r;
+
+ assert(m);
+
+ r = manager_listen_notify(m);
+ if (r < 0)
+ return r;
+
+ r = manager_connect_bus(m);
+ if (r < 0)
+ return r;
+
+ r = manager_bind_varlink(m);
+ if (r < 0)
+ return r;
+
+ r = manager_load_key_pair(m); /* only try to load it, don't generate any */
+ if (r < 0)
+ return r;
+
+ r = manager_load_public_keys(m);
+ if (r < 0)
+ return r;
+
+ manager_watch_home(m);
+ (void) manager_watch_devices(m);
+
+ (void) manager_enumerate_records(m);
+ (void) manager_enumerate_images(m);
+ (void) manager_enumerate_devices(m);
+
+ /* Let's clean up home directories whose devices got removed while we were not running */
+ (void) manager_enqueue_gc(m, NULL);
+
+ return 0;
+}
+
+void manager_revalidate_image(Manager *m, Home *h) {
+ int r;
+
+ assert(m);
+ assert(h);
+
+ /* Frees an automatically discovered image, if it's synthetic and its image disappeared. Unmounts any
+ * image if it's mounted but it's image vanished. */
+
+ if (h->current_operation || !ordered_set_isempty(h->pending_operations))
+ return;
+
+ if (h->state == HOME_UNFIXATED) {
+ r = user_record_test_image_path(h->record);
+ if (r < 0)
+ log_warning_errno(r, "Can't determine if image of %s exists, freeing unfixated user: %m", h->user_name);
+ else if (r == USER_TEST_ABSENT)
+ log_info("Image for %s disappeared, freeing unfixated user.", h->user_name);
+ else
+ return;
+
+ home_free(h);
+
+ } else if (h->state < 0) {
+
+ r = user_record_test_home_directory(h->record);
+ if (r < 0) {
+ log_warning_errno(r, "Unable to determine state of home directory, ignoring: %m");
+ return;
+ }
+
+ if (r == USER_TEST_MOUNTED) {
+ r = user_record_test_image_path(h->record);
+ if (r < 0) {
+ log_warning_errno(r, "Unable to determine state of image path, ignoring: %m");
+ return;
+ }
+
+ if (r == USER_TEST_ABSENT) {
+ _cleanup_(operation_unrefp) Operation *o = NULL;
+
+ log_notice("Backing image disappeared while home directory %s was mounted, unmounting it forcibly.", h->user_name);
+ /* Wowza, the thing is mounted, but the device is gone? Act on it. */
+
+ r = home_killall(h);
+ if (r < 0)
+ log_warning_errno(r, "Failed to kill processes of user %s, ignoring: %m", h->user_name);
+
+ /* We enqueue the operation here, after all the home directory might
+ * currently already run some operation, and we can deactivate it only after
+ * that's complete. */
+ o = operation_new(OPERATION_DEACTIVATE_FORCE, NULL);
+ if (!o) {
+ log_oom();
+ return;
+ }
+
+ r = home_schedule_operation(h, o, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to enqueue forced home directory %s deactivation, ignoring: %m", h->user_name);
+ }
+ }
+ }
+}
+
+int manager_gc_images(Manager *m) {
+ Home *h;
+
+ assert_se(m);
+
+ if (m->gc_focus) {
+ /* Focus on a specific home */
+
+ h = TAKE_PTR(m->gc_focus);
+ manager_revalidate_image(m, h);
+ } else {
+ /* Gc all */
+
+ HASHMAP_FOREACH(h, m->homes_by_name)
+ manager_revalidate_image(m, h);
+ }
+
+ return 0;
+}
+
+static int on_deferred_rescan(sd_event_source *s, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+
+ m->deferred_rescan_event_source = sd_event_source_unref(m->deferred_rescan_event_source);
+
+ manager_enumerate_devices(m);
+ manager_enumerate_images(m);
+ return 0;
+}
+
+int manager_enqueue_rescan(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (m->deferred_rescan_event_source)
+ return 0;
+
+ if (!m->event)
+ return 0;
+
+ if (IN_SET(sd_event_get_state(m->event), SD_EVENT_FINISHED, SD_EVENT_EXITING))
+ return 0;
+
+ r = sd_event_add_defer(m->event, &m->deferred_rescan_event_source, on_deferred_rescan, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate rescan event source: %m");
+
+ r = sd_event_source_set_priority(m->deferred_rescan_event_source, SD_EVENT_PRIORITY_IDLE+1);
+ if (r < 0)
+ log_warning_errno(r, "Failed to tweak priority of event source, ignoring: %m");
+
+ (void) sd_event_source_set_description(m->deferred_rescan_event_source, "deferred-rescan");
+ return 1;
+}
+
+static int on_deferred_gc(sd_event_source *s, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+
+ m->deferred_gc_event_source = sd_event_source_unref(m->deferred_gc_event_source);
+
+ manager_gc_images(m);
+ return 0;
+}
+
+int manager_enqueue_gc(Manager *m, Home *focus) {
+ int r;
+
+ assert(m);
+
+ /* This enqueues a request to GC dead homes. It may be called with focus=NULL in which case all homes
+ * will be scanned, or with the parameter set, in which case only that home is checked. */
+
+ if (!m->event)
+ return 0;
+
+ if (IN_SET(sd_event_get_state(m->event), SD_EVENT_FINISHED, SD_EVENT_EXITING))
+ return 0;
+
+ /* If a focus home is specified, then remember to focus just on this home. Otherwise invalidate any
+ * focus that might be set to look at all homes. */
+
+ if (m->deferred_gc_event_source) {
+ if (m->gc_focus != focus) /* not the same focus, then look at everything */
+ m->gc_focus = NULL;
+
+ return 0;
+ } else
+ m->gc_focus = focus; /* start focused */
+
+ r = sd_event_add_defer(m->event, &m->deferred_gc_event_source, on_deferred_gc, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate GC event source: %m");
+
+ r = sd_event_source_set_priority(m->deferred_gc_event_source, SD_EVENT_PRIORITY_IDLE);
+ if (r < 0)
+ log_warning_errno(r, "Failed to tweak priority of event source, ignoring: %m");
+
+ (void) sd_event_source_set_description(m->deferred_gc_event_source, "deferred-gc");
+ return 1;
+}
diff --git a/src/home/homed-manager.h b/src/home/homed-manager.h
new file mode 100644
index 0000000..851b302
--- /dev/null
+++ b/src/home/homed-manager.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <openssl/evp.h>
+
+#include "sd-bus.h"
+#include "sd-device.h"
+#include "sd-event.h"
+
+typedef struct Manager Manager;
+
+#include "hashmap.h"
+#include "homed-home.h"
+#include "varlink.h"
+
+#define HOME_UID_MIN 60001
+#define HOME_UID_MAX 60513
+
+struct Manager {
+ sd_event *event;
+ sd_bus *bus;
+
+ Hashmap *polkit_registry;
+
+ Hashmap *homes_by_uid;
+ Hashmap *homes_by_name;
+ Hashmap *homes_by_worker_pid;
+ Hashmap *homes_by_sysfs;
+
+ bool scan_slash_home;
+ UserStorage default_storage;
+ char *default_file_system_type;
+
+ sd_event_source *inotify_event_source;
+
+ /* An event source we receive sd_notify() messages from our worker from */
+ sd_event_source *notify_socket_event_source;
+
+ sd_device_monitor *device_monitor;
+
+ sd_event_source *deferred_rescan_event_source;
+ sd_event_source *deferred_gc_event_source;
+ sd_event_source *deferred_auto_login_event_source;
+
+ Home *gc_focus;
+
+ VarlinkServer *varlink_server;
+ char *userdb_service;
+
+ EVP_PKEY *private_key; /* actually a pair of private and public key */
+ Hashmap *public_keys; /* key name [char*] → publick key [EVP_PKEY*] */
+};
+
+int manager_new(Manager **ret);
+Manager* manager_free(Manager *m);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
+
+int manager_startup(Manager *m);
+
+int manager_augment_record_with_uid(Manager *m, UserRecord *hr);
+
+int manager_enqueue_rescan(Manager *m);
+int manager_enqueue_gc(Manager *m, Home *focus);
+
+int manager_verify_user_record(Manager *m, UserRecord *hr);
+
+int manager_acquire_key_pair(Manager *m);
+int manager_sign_user_record(Manager *m, UserRecord *u, UserRecord **ret, sd_bus_error *error);
+
+int bus_manager_emit_auto_login_changed(Manager *m);
diff --git a/src/home/homed-operation.c b/src/home/homed-operation.c
new file mode 100644
index 0000000..3847fc5
--- /dev/null
+++ b/src/home/homed-operation.c
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "fd-util.h"
+#include "homed-operation.h"
+
+Operation *operation_new(OperationType type, sd_bus_message *m) {
+ Operation *o;
+
+ assert(type >= 0);
+ assert(type < _OPERATION_MAX);
+
+ o = new(Operation, 1);
+ if (!o)
+ return NULL;
+
+ *o = (Operation) {
+ .type = type,
+ .n_ref = 1,
+ .message = sd_bus_message_ref(m),
+ .send_fd = -1,
+ .result = -1,
+ };
+
+ return o;
+}
+
+static Operation *operation_free(Operation *o) {
+ int r;
+
+ if (!o)
+ return NULL;
+
+ if (o->message && o->result >= 0) {
+
+ if (o->result) {
+ /* Propagate success */
+ if (o->send_fd < 0)
+ r = sd_bus_reply_method_return(o->message, NULL);
+ else
+ r = sd_bus_reply_method_return(o->message, "h", o->send_fd);
+
+ } else {
+ /* Propagate failure */
+ if (sd_bus_error_is_set(&o->error))
+ r = sd_bus_reply_method_error(o->message, &o->error);
+ else
+ r = sd_bus_reply_method_errnof(o->message, o->ret, "Failed to execute operation: %m");
+ }
+ if (r < 0)
+ log_warning_errno(r, "Failed to reply to %s method call, ignoring: %m", sd_bus_message_get_member(o->message));
+ }
+
+ sd_bus_message_unref(o->message);
+ user_record_unref(o->secret);
+ safe_close(o->send_fd);
+ sd_bus_error_free(&o->error);
+
+ return mfree(o);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(Operation, operation, operation_free);
+
+void operation_result(Operation *o, int ret, const sd_bus_error *error) {
+ assert(o);
+
+ if (ret >= 0)
+ o->result = true;
+ else {
+ o->ret = ret;
+
+ sd_bus_error_free(&o->error);
+ sd_bus_error_copy(&o->error, error);
+
+ o->result = false;
+ }
+}
diff --git a/src/home/homed-operation.h b/src/home/homed-operation.h
new file mode 100644
index 0000000..6721363
--- /dev/null
+++ b/src/home/homed-operation.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sd-bus.h>
+
+#include "user-record.h"
+
+typedef enum OperationType {
+ OPERATION_ACQUIRE, /* enqueued on AcquireHome() */
+ OPERATION_RELEASE, /* enqueued on ReleaseHome() */
+ OPERATION_LOCK_ALL, /* enqueued on LockAllHomes() */
+ OPERATION_DEACTIVATE_ALL, /* enqueued on DeactivateAllHomes() */
+ OPERATION_PIPE_EOF, /* enqueued when we see EOF on the per-home reference pipes */
+ OPERATION_DEACTIVATE_FORCE, /* enqueued on hard $HOME unplug */
+ OPERATION_IMMEDIATE, /* this is never enqueued, it's just a marker we immediately started executing an operation without enqueuing anything first. */
+ _OPERATION_MAX,
+ _OPERATION_INVALID = -1,
+} OperationType;
+
+/* Encapsulates an operation on one or more home directories. This has two uses:
+ *
+ * 1) For queuing an operation when we need to execute one for some reason but there's already one being
+ * executed.
+ *
+ * 2) When executing an operation without enqueuing it first (OPERATION_IMMEDIATE)
+ *
+ * Note that a single operation object can encapsulate operations on multiple home directories. This is used
+ * for the LockAllHomes() operation, which is one operation but applies to all homes at once. In case the
+ * operation applies to multiple homes the reference counter is increased once for each, and thus the
+ * operation is fully completed only after it reached zero again.
+ *
+ * The object (optionally) contains a reference of the D-Bus message triggering the operation, which is
+ * replied to when the operation is fully completed, i.e. when n_ref reaches zero.
+ */
+
+typedef struct Operation {
+ unsigned n_ref;
+ OperationType type;
+ sd_bus_message *message;
+
+ UserRecord *secret;
+ int send_fd; /* pipe fd for AcquireHome() which is taken already when we start the operation */
+
+ int result; /* < 0 if not completed yet, == 0 on failure, > 0 on success */
+ sd_bus_error error;
+ int ret;
+} Operation;
+
+Operation *operation_new(OperationType type, sd_bus_message *m);
+Operation *operation_ref(Operation *operation);
+Operation *operation_unref(Operation *operation);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Operation*, operation_unref);
+
+void operation_result(Operation *o, int ret, const sd_bus_error *error);
+
+static inline Operation* operation_result_unref(Operation *o, int ret, const sd_bus_error *error) {
+ if (!o)
+ return NULL;
+
+ operation_result(o, ret, error);
+ return operation_unref(o);
+}
diff --git a/src/home/homed-varlink.c b/src/home/homed-varlink.c
new file mode 100644
index 0000000..c429083
--- /dev/null
+++ b/src/home/homed-varlink.c
@@ -0,0 +1,366 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "group-record.h"
+#include "homed-varlink.h"
+#include "strv.h"
+#include "user-record-util.h"
+#include "user-record.h"
+#include "user-util.h"
+#include "format-util.h"
+
+typedef struct LookupParameters {
+ const char *user_name;
+ const char *group_name;
+ union {
+ uid_t uid;
+ gid_t gid;
+ };
+ const char *service;
+} LookupParameters;
+
+static bool client_is_trusted(Varlink *link, Home *h) {
+ uid_t peer_uid;
+ int r;
+
+ assert(link);
+ assert(h);
+
+ r = varlink_get_peer_uid(link, &peer_uid);
+ if (r < 0) {
+ log_debug_errno(r, "Unable to query peer UID, ignoring: %m");
+ return false;
+ }
+
+ return peer_uid == 0 || peer_uid == h->uid;
+}
+
+static int build_user_json(Home *h, bool trusted, JsonVariant **ret) {
+ _cleanup_(user_record_unrefp) UserRecord *augmented = NULL;
+ UserRecordLoadFlags flags;
+ int r;
+
+ assert(h);
+ assert(ret);
+
+ flags = USER_RECORD_REQUIRE_REGULAR|USER_RECORD_ALLOW_PER_MACHINE|USER_RECORD_ALLOW_BINDING|USER_RECORD_STRIP_SECRET|USER_RECORD_ALLOW_STATUS|USER_RECORD_ALLOW_SIGNATURE;
+ if (trusted)
+ flags |= USER_RECORD_ALLOW_PRIVILEGED;
+ else
+ flags |= USER_RECORD_STRIP_PRIVILEGED;
+
+ r = home_augment_status(h, flags, &augmented);
+ if (r < 0)
+ return r;
+
+ return json_build(ret, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("record", JSON_BUILD_VARIANT(augmented->json)),
+ JSON_BUILD_PAIR("incomplete", JSON_BUILD_BOOLEAN(augmented->incomplete))));
+}
+
+static bool home_user_match_lookup_parameters(LookupParameters *p, Home *h) {
+ assert(p);
+ assert(h);
+
+ if (p->user_name && !streq(p->user_name, h->user_name))
+ return false;
+
+ if (uid_is_valid(p->uid) && h->uid != p->uid)
+ return false;
+
+ return true;
+}
+
+int vl_method_get_user_record(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+
+ static const JsonDispatch dispatch_table[] = {
+ { "uid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(LookupParameters, uid), 0 },
+ { "userName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, user_name), JSON_SAFE },
+ { "service", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, service), 0 },
+ {}
+ };
+
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ LookupParameters p = {
+ .uid = UID_INVALID,
+ };
+ Manager *m = userdata;
+ bool trusted;
+ Home *h;
+ int r;
+
+ assert(parameters);
+ assert(m);
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &p);
+ if (r < 0)
+ return r;
+
+ if (!streq_ptr(p.service, m->userdb_service))
+ return varlink_error(link, "io.systemd.UserDatabase.BadService", NULL);
+
+ if (uid_is_valid(p.uid))
+ h = hashmap_get(m->homes_by_uid, UID_TO_PTR(p.uid));
+ else if (p.user_name)
+ h = hashmap_get(m->homes_by_name, p.user_name);
+ else {
+
+ /* If neither UID nor name was specified, then dump all homes. Do so with varlink_notify()
+ * for all entries but the last, so that clients can stream the results, and easily process
+ * them piecemeal. */
+
+ HASHMAP_FOREACH(h, m->homes_by_name) {
+
+ if (!home_user_match_lookup_parameters(&p, h))
+ continue;
+
+ if (v) {
+ /* An entry set from the previous iteration? Then send it now */
+ r = varlink_notify(link, v);
+ if (r < 0)
+ return r;
+
+ v = json_variant_unref(v);
+ }
+
+ trusted = client_is_trusted(link, h);
+
+ r = build_user_json(h, trusted, &v);
+ if (r < 0)
+ return r;
+ }
+
+ if (!v)
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+
+ return varlink_reply(link, v);
+ }
+
+ if (!h)
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+
+ if (!home_user_match_lookup_parameters(&p, h))
+ return varlink_error(link, "io.systemd.UserDatabase.ConflictingRecordFound", NULL);
+
+ trusted = client_is_trusted(link, h);
+
+ r = build_user_json(h, trusted, &v);
+ if (r < 0)
+ return r;
+
+ return varlink_reply(link, v);
+}
+
+static int build_group_json(Home *h, JsonVariant **ret) {
+ _cleanup_(group_record_unrefp) GroupRecord *g = NULL;
+ int r;
+
+ assert(h);
+ assert(ret);
+
+ g = group_record_new();
+ if (!g)
+ return -ENOMEM;
+
+ r = group_record_synthesize(g, h->record);
+ if (r < 0)
+ return r;
+
+ assert(!FLAGS_SET(g->mask, USER_RECORD_SECRET));
+ assert(!FLAGS_SET(g->mask, USER_RECORD_PRIVILEGED));
+
+ return json_build(ret,
+ JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("record", JSON_BUILD_VARIANT(g->json))));
+}
+
+static bool home_group_match_lookup_parameters(LookupParameters *p, Home *h) {
+ assert(p);
+ assert(h);
+
+ if (p->group_name && !streq(h->user_name, p->group_name))
+ return false;
+
+ if (gid_is_valid(p->gid) && h->uid != (uid_t) p->gid)
+ return false;
+
+ return true;
+}
+
+int vl_method_get_group_record(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+
+ static const JsonDispatch dispatch_table[] = {
+ { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(LookupParameters, gid), 0 },
+ { "groupName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, group_name), JSON_SAFE },
+ { "service", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, service), 0 },
+ {}
+ };
+
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ LookupParameters p = {
+ .gid = GID_INVALID,
+ };
+ Manager *m = userdata;
+ Home *h;
+ int r;
+
+ assert(parameters);
+ assert(m);
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &p);
+ if (r < 0)
+ return r;
+
+ if (!streq_ptr(p.service, m->userdb_service))
+ return varlink_error(link, "io.systemd.UserDatabase.BadService", NULL);
+
+ if (gid_is_valid(p.gid))
+ h = hashmap_get(m->homes_by_uid, UID_TO_PTR((uid_t) p.gid));
+ else if (p.group_name)
+ h = hashmap_get(m->homes_by_name, p.group_name);
+ else {
+
+ HASHMAP_FOREACH(h, m->homes_by_name) {
+
+ if (!home_group_match_lookup_parameters(&p, h))
+ continue;
+
+ if (v) {
+ r = varlink_notify(link, v);
+ if (r < 0)
+ return r;
+
+ v = json_variant_unref(v);
+ }
+
+ r = build_group_json(h, &v);
+ if (r < 0)
+ return r;
+ }
+
+ if (!v)
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+
+ return varlink_reply(link, v);
+ }
+
+ if (!h)
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+
+ if (!home_group_match_lookup_parameters(&p, h))
+ return varlink_error(link, "io.systemd.UserDatabase.ConflictingRecordFound", NULL);
+
+ r = build_group_json(h, &v);
+ if (r < 0)
+ return r;
+
+ return varlink_reply(link, v);
+}
+
+int vl_method_get_memberships(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+
+ static const JsonDispatch dispatch_table[] = {
+ { "userName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, user_name), JSON_SAFE },
+ { "groupName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, group_name), JSON_SAFE },
+ { "service", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, service), 0 },
+ {}
+ };
+
+ Manager *m = userdata;
+ LookupParameters p = {};
+ Home *h;
+ int r;
+
+ assert(parameters);
+ assert(m);
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &p);
+ if (r < 0)
+ return r;
+
+ if (!streq_ptr(p.service, m->userdb_service))
+ return varlink_error(link, "io.systemd.UserDatabase.BadService", NULL);
+
+ if (p.user_name) {
+ const char *last = NULL;
+ char **i;
+
+ h = hashmap_get(m->homes_by_name, p.user_name);
+ if (!h)
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+
+ if (p.group_name) {
+ if (!strv_contains(h->record->member_of, p.group_name))
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+
+ return varlink_replyb(link, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(h->user_name)),
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(p.group_name))));
+ }
+
+ STRV_FOREACH(i, h->record->member_of) {
+ if (last) {
+ r = varlink_notifyb(link, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(h->user_name)),
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(last))));
+ if (r < 0)
+ return r;
+ }
+
+ last = *i;
+ }
+
+ if (last)
+ return varlink_replyb(link, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(h->user_name)),
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(last))));
+
+ } else if (p.group_name) {
+ const char *last = NULL;
+
+ HASHMAP_FOREACH(h, m->homes_by_name) {
+
+ if (!strv_contains(h->record->member_of, p.group_name))
+ continue;
+
+ if (last) {
+ r = varlink_notifyb(link, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(last)),
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(p.group_name))));
+ if (r < 0)
+ return r;
+ }
+
+ last = h->user_name;
+ }
+
+ if (last)
+ return varlink_replyb(link, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(last)),
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(p.group_name))));
+ } else {
+ const char *last_user_name = NULL, *last_group_name = NULL;
+
+ HASHMAP_FOREACH(h, m->homes_by_name) {
+ char **j;
+
+ STRV_FOREACH(j, h->record->member_of) {
+
+ if (last_user_name) {
+ assert(last_group_name);
+
+ r = varlink_notifyb(link, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(last_user_name)),
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(last_group_name))));
+
+ if (r < 0)
+ return r;
+ }
+
+ last_user_name = h->user_name;
+ last_group_name = *j;
+ }
+ }
+
+ if (last_user_name) {
+ assert(last_group_name);
+ return varlink_replyb(link, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(last_user_name)),
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(last_group_name))));
+ }
+ }
+
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+}
diff --git a/src/home/homed-varlink.h b/src/home/homed-varlink.h
new file mode 100644
index 0000000..2e404f0
--- /dev/null
+++ b/src/home/homed-varlink.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "homed-manager.h"
+
+int vl_method_get_user_record(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata);
+int vl_method_get_group_record(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata);
+int vl_method_get_memberships(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata);
diff --git a/src/home/homed.c b/src/home/homed.c
new file mode 100644
index 0000000..e4d64bd
--- /dev/null
+++ b/src/home/homed.c
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "bus-log-control-api.h"
+#include "daemon-util.h"
+#include "homed-manager.h"
+#include "homed-manager-bus.h"
+#include "log.h"
+#include "main-func.h"
+#include "service-util.h"
+#include "signal-util.h"
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ _cleanup_(notify_on_cleanup) const char *notify_stop = NULL;
+ int r;
+
+ log_setup_service();
+
+ r = service_parse_argv("systemd-homed.service",
+ "A service to create, remove, change or inspect home areas.",
+ BUS_IMPLEMENTATIONS(&manager_object,
+ &log_control_object),
+ argc, argv);
+ if (r <= 0)
+ return r;
+
+ umask(0022);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, SIGTERM, SIGINT, -1) >= 0);
+
+ r = manager_new(&m);
+ if (r < 0)
+ return log_error_errno(r, "Could not create manager: %m");
+
+ r = manager_startup(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start up daemon: %m");
+
+ notify_stop = notify_start(NOTIFY_READY, NOTIFY_STOPPING);
+
+ r = sd_event_loop(m->event);
+ if (r < 0)
+ return log_error_errno(r, "Event loop failed: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/home/homed.conf b/src/home/homed.conf
new file mode 100644
index 0000000..1b5dbed
--- /dev/null
+++ b/src/home/homed.conf
@@ -0,0 +1,16 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See homed.conf(5) for details
+
+[Home]
+#DefaultStorage=
+#DefaultFileSystemType=btrfs
diff --git a/src/home/homework-cifs.c b/src/home/homework-cifs.c
new file mode 100644
index 0000000..2736095
--- /dev/null
+++ b/src/home/homework-cifs.c
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "homework-cifs.h"
+#include "homework-mount.h"
+#include "mount-util.h"
+#include "process-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+
+int home_prepare_cifs(
+ UserRecord *h,
+ bool already_activated,
+ HomeSetup *setup) {
+
+ assert(h);
+ assert(setup);
+ assert(user_record_storage(h) == USER_CIFS);
+
+ if (already_activated)
+ setup->root_fd = open(user_record_home_directory(h), O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ else {
+ bool mounted = false;
+ char **pw;
+ int r;
+
+ r = home_unshare_and_mount(NULL, NULL, false, user_record_mount_flags(h));
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(pw, h->password) {
+ _cleanup_(unlink_and_freep) char *p = NULL;
+ _cleanup_free_ char *options = NULL;
+ _cleanup_(fclosep) FILE *f = NULL;
+ pid_t mount_pid;
+ int exit_status;
+
+ r = fopen_temporary(NULL, &f, &p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create temporary credentials file: %m");
+
+ fprintf(f,
+ "username=%s\n"
+ "password=%s\n",
+ user_record_cifs_user_name(h),
+ *pw);
+
+ if (h->cifs_domain)
+ fprintf(f, "domain=%s\n", h->cifs_domain);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write temporary credentials file: %m");
+
+ f = safe_fclose(f);
+
+ if (asprintf(&options, "credentials=%s,uid=" UID_FMT ",forceuid,gid=" UID_FMT ",forcegid,file_mode=0%3o,dir_mode=0%3o",
+ p, h->uid, h->uid, h->access_mode, h->access_mode) < 0)
+ return log_oom();
+
+ r = safe_fork("(mount)", FORK_RESET_SIGNALS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_LOG|FORK_STDOUT_TO_STDERR, &mount_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Child */
+ execl("/bin/mount", "/bin/mount", "-n", "-t", "cifs",
+ h->cifs_service, "/run/systemd/user-home-mount",
+ "-o", options, NULL);
+
+ log_error_errno(errno, "Failed to execute fsck: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ exit_status = wait_for_terminate_and_check("mount", mount_pid, WAIT_LOG_ABNORMAL|WAIT_LOG_NON_ZERO_EXIT_STATUS);
+ if (exit_status < 0)
+ return exit_status;
+ if (exit_status != EXIT_SUCCESS)
+ return -EPROTO;
+
+ mounted = true;
+ break;
+ }
+
+ if (!mounted)
+ return log_error_errno(ENOKEY, "Failed to mount home directory with supplied password.");
+
+ setup->root_fd = open("/run/systemd/user-home-mount", O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ }
+ if (setup->root_fd < 0)
+ return log_error_errno(errno, "Failed to open home directory: %m");
+
+ return 0;
+}
+
+int home_activate_cifs(
+ UserRecord *h,
+ PasswordCache *cache,
+ UserRecord **ret_home) {
+
+ _cleanup_(home_setup_undo) HomeSetup setup = HOME_SETUP_INIT;
+ _cleanup_(user_record_unrefp) UserRecord *new_home = NULL;
+ const char *hdo, *hd;
+ int r;
+
+ assert(h);
+ assert(user_record_storage(h) == USER_CIFS);
+ assert(ret_home);
+
+ if (!h->cifs_service)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "User record lacks CIFS service, refusing.");
+
+ assert_se(hdo = user_record_home_directory(h));
+ hd = strdupa(hdo); /* copy the string out, since it might change later in the home record object */
+
+ r = home_prepare_cifs(h, false, &setup);
+ if (r < 0)
+ return r;
+
+ r = home_refresh(h, &setup, NULL, cache, NULL, &new_home);
+ if (r < 0)
+ return r;
+
+ setup.root_fd = safe_close(setup.root_fd);
+
+ r = home_move_mount(NULL, hd);
+ if (r < 0)
+ return r;
+
+ setup.undo_mount = false;
+
+ log_info("Everything completed.");
+
+ *ret_home = TAKE_PTR(new_home);
+ return 1;
+}
+
+int home_create_cifs(UserRecord *h, UserRecord **ret_home) {
+ _cleanup_(home_setup_undo) HomeSetup setup = HOME_SETUP_INIT;
+ _cleanup_(user_record_unrefp) UserRecord *new_home = NULL;
+ _cleanup_(closedirp) DIR *d = NULL;
+ _cleanup_close_ int copy = -1;
+ int r;
+
+ assert(h);
+ assert(user_record_storage(h) == USER_CIFS);
+ assert(ret_home);
+
+ if (!h->cifs_service)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "User record lacks CIFS service, refusing.");
+
+ if (access("/sbin/mount.cifs", F_OK) < 0) {
+ if (errno == ENOENT)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOLINK), "/sbin/mount.cifs is missing.");
+
+ return log_error_errno(errno, "Unable to detect whether /sbin/mount.cifs exists: %m");
+ }
+
+ r = home_prepare_cifs(h, false, &setup);
+ if (r < 0)
+ return r;
+
+ copy = fcntl(setup.root_fd, F_DUPFD_CLOEXEC, 3);
+ if (copy < 0)
+ return -errno;
+
+ d = take_fdopendir(&copy);
+ if (!d)
+ return -errno;
+
+ errno = 0;
+ if (readdir_no_dot(d))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTEMPTY), "Selected CIFS directory not empty, refusing.");
+ if (errno != 0)
+ return log_error_errno(errno, "Failed to detect if CIFS directory is empty: %m");
+
+ r = home_populate(h, setup.root_fd);
+ if (r < 0)
+ return r;
+
+ r = home_sync_and_statfs(setup.root_fd, NULL);
+ if (r < 0)
+ return r;
+
+ r = user_record_clone(h, USER_RECORD_LOAD_MASK_SECRET, &new_home);
+ if (r < 0)
+ return log_error_errno(r, "Failed to clone record: %m");
+
+ r = user_record_add_binding(
+ new_home,
+ USER_CIFS,
+ NULL,
+ SD_ID128_NULL,
+ SD_ID128_NULL,
+ SD_ID128_NULL,
+ NULL,
+ NULL,
+ UINT64_MAX,
+ NULL,
+ NULL,
+ h->uid,
+ (gid_t) h->uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add binding to record: %m");
+
+ log_info("Everything completed.");
+
+ *ret_home = TAKE_PTR(new_home);
+ return 0;
+}
diff --git a/src/home/homework-cifs.h b/src/home/homework-cifs.h
new file mode 100644
index 0000000..da2e50a
--- /dev/null
+++ b/src/home/homework-cifs.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "homework.h"
+#include "user-record.h"
+
+int home_prepare_cifs(UserRecord *h, bool already_activated, HomeSetup *setup);
+
+int home_activate_cifs(UserRecord *h, PasswordCache *cache, UserRecord **ret_home);
+
+int home_create_cifs(UserRecord *h, UserRecord **ret_home);
diff --git a/src/home/homework-directory.c b/src/home/homework-directory.c
new file mode 100644
index 0000000..2d80003
--- /dev/null
+++ b/src/home/homework-directory.c
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/mount.h>
+
+#include "btrfs-util.h"
+#include "fd-util.h"
+#include "homework-directory.h"
+#include "homework-quota.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+
+int home_prepare_directory(UserRecord *h, bool already_activated, HomeSetup *setup) {
+ assert(h);
+ assert(setup);
+
+ setup->root_fd = open(user_record_image_path(h), O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (setup->root_fd < 0)
+ return log_error_errno(errno, "Failed to open home directory: %m");
+
+ return 0;
+}
+
+int home_activate_directory(
+ UserRecord *h,
+ PasswordCache *cache,
+ UserRecord **ret_home) {
+
+ _cleanup_(user_record_unrefp) UserRecord *new_home = NULL, *header_home = NULL;
+ _cleanup_(home_setup_undo) HomeSetup setup = HOME_SETUP_INIT;
+ const char *hdo, *hd, *ipo, *ip;
+ int r;
+
+ assert(h);
+ assert(IN_SET(user_record_storage(h), USER_DIRECTORY, USER_SUBVOLUME, USER_FSCRYPT));
+ assert(ret_home);
+
+ assert_se(ipo = user_record_image_path(h));
+ ip = strdupa(ipo); /* copy out, since reconciliation might cause changing of the field */
+
+ assert_se(hdo = user_record_home_directory(h));
+ hd = strdupa(hdo);
+
+ r = home_prepare(h, false, cache, &setup, &header_home);
+ if (r < 0)
+ return r;
+
+ r = home_refresh(h, &setup, header_home, cache, NULL, &new_home);
+ if (r < 0)
+ return r;
+
+ setup.root_fd = safe_close(setup.root_fd);
+
+ /* Create mount point to mount over if necessary */
+ if (!path_equal(ip, hd))
+ (void) mkdir_p(hd, 0700);
+
+ /* Create a mount point (even if the directory is already placed correctly), as a way to indicate
+ * this mount point is now "activated". Moreover, we want to set per-user
+ * MS_NOSUID/MS_NOEXEC/MS_NODEV. */
+ r = mount_nofollow_verbose(LOG_ERR, ip, hd, NULL, MS_BIND, NULL);
+ if (r < 0)
+ return r;
+
+ r = mount_nofollow_verbose(LOG_ERR, NULL, hd, NULL, MS_BIND|MS_REMOUNT|user_record_mount_flags(h), NULL);
+ if (r < 0) {
+ (void) umount_verbose(LOG_ERR, hd, UMOUNT_NOFOLLOW);
+ return r;
+ }
+
+ log_info("Everything completed.");
+
+ *ret_home = TAKE_PTR(new_home);
+ return 0;
+}
+
+int home_create_directory_or_subvolume(UserRecord *h, UserRecord **ret_home) {
+ _cleanup_(rm_rf_subvolume_and_freep) char *temporary = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *new_home = NULL;
+ _cleanup_close_ int root_fd = -1;
+ _cleanup_free_ char *d = NULL;
+ const char *ip;
+ int r;
+
+ assert(h);
+ assert(IN_SET(user_record_storage(h), USER_DIRECTORY, USER_SUBVOLUME));
+ assert(ret_home);
+
+ assert_se(ip = user_record_image_path(h));
+
+ r = tempfn_random(ip, "homework", &d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate temporary directory: %m");
+
+ (void) mkdir_parents(d, 0755);
+
+ switch (user_record_storage(h)) {
+
+ case USER_SUBVOLUME:
+ RUN_WITH_UMASK(0077)
+ r = btrfs_subvol_make(d);
+
+ if (r >= 0) {
+ log_info("Subvolume created.");
+
+ if (h->disk_size != UINT64_MAX) {
+
+ /* Enable quota for the subvolume we just created. Note we don't check for
+ * errors here and only log about debug level about this. */
+ r = btrfs_quota_enable(d, true);
+ if (r < 0)
+ log_debug_errno(r, "Failed to enable quota on %s, ignoring: %m", d);
+
+ r = btrfs_subvol_auto_qgroup(d, 0, false);
+ if (r < 0)
+ log_debug_errno(r, "Failed to set up automatic quota group on %s, ignoring: %m", d);
+
+ /* Actually configure the quota. We also ignore errors here, but we do log
+ * about them loudly, to keep things discoverable even though we don't
+ * consider lacking quota support in kernel fatal. */
+ (void) home_update_quota_btrfs(h, d);
+ }
+
+ break;
+ }
+ if (r != -ENOTTY)
+ return log_error_errno(r, "Failed to create temporary home directory subvolume %s: %m", d);
+
+ log_info("Creating subvolume %s is not supported, as file system does not support subvolumes. Falling back to regular directory.", d);
+ _fallthrough_;
+
+ case USER_DIRECTORY:
+
+ if (mkdir(d, 0700) < 0)
+ return log_error_errno(errno, "Failed to create temporary home directory %s: %m", d);
+
+ (void) home_update_quota_classic(h, d);
+ break;
+
+ default:
+ assert_not_reached("unexpected storage");
+ }
+
+ temporary = TAKE_PTR(d); /* Needs to be destroyed now */
+
+ root_fd = open(temporary, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (root_fd < 0)
+ return log_error_errno(errno, "Failed to open temporary home directory: %m");
+
+ r = home_populate(h, root_fd);
+ if (r < 0)
+ return r;
+
+ r = home_sync_and_statfs(root_fd, NULL);
+ if (r < 0)
+ return r;
+
+ r = user_record_clone(h, USER_RECORD_LOAD_MASK_SECRET, &new_home);
+ if (r < 0)
+ return log_error_errno(r, "Failed to clone record: %m");
+
+ r = user_record_add_binding(
+ new_home,
+ user_record_storage(h),
+ ip,
+ SD_ID128_NULL,
+ SD_ID128_NULL,
+ SD_ID128_NULL,
+ NULL,
+ NULL,
+ UINT64_MAX,
+ NULL,
+ NULL,
+ h->uid,
+ (gid_t) h->uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add binding to record: %m");
+
+ if (rename(temporary, ip) < 0)
+ return log_error_errno(errno, "Failed to rename %s to %s: %m", temporary, ip);
+
+ temporary = mfree(temporary);
+
+ log_info("Everything completed.");
+
+ *ret_home = TAKE_PTR(new_home);
+ return 0;
+}
+
+int home_resize_directory(
+ UserRecord *h,
+ bool already_activated,
+ PasswordCache *cache,
+ HomeSetup *setup,
+ UserRecord **ret_home) {
+
+ _cleanup_(user_record_unrefp) UserRecord *embedded_home = NULL, *new_home = NULL;
+ int r;
+
+ assert(h);
+ assert(setup);
+ assert(ret_home);
+ assert(IN_SET(user_record_storage(h), USER_DIRECTORY, USER_SUBVOLUME, USER_FSCRYPT));
+
+ r = home_prepare(h, already_activated, cache, setup, NULL);
+ if (r < 0)
+ return r;
+
+ r = home_load_embedded_identity(h, setup->root_fd, NULL, USER_RECONCILE_REQUIRE_NEWER_OR_EQUAL, cache, &embedded_home, &new_home);
+ if (r < 0)
+ return r;
+
+ r = home_update_quota_auto(h, NULL);
+ if (ERRNO_IS_NOT_SUPPORTED(r))
+ return -ESOCKTNOSUPPORT; /* make recognizable */
+ if (r < 0)
+ return r;
+
+ r = home_store_embedded_identity(new_home, setup->root_fd, h->uid, embedded_home);
+ if (r < 0)
+ return r;
+
+ r = home_extend_embedded_identity(new_home, h, setup);
+ if (r < 0)
+ return r;
+
+ r = home_sync_and_statfs(setup->root_fd, NULL);
+ if (r < 0)
+ return r;
+
+ r = home_setup_undo(setup);
+ if (r < 0)
+ return r;
+
+ log_info("Everything completed.");
+
+ *ret_home = TAKE_PTR(new_home);
+ return 0;
+}
diff --git a/src/home/homework-directory.h b/src/home/homework-directory.h
new file mode 100644
index 0000000..27d640f
--- /dev/null
+++ b/src/home/homework-directory.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "homework.h"
+#include "user-record.h"
+
+int home_prepare_directory(UserRecord *h, bool already_activated, HomeSetup *setup);
+int home_activate_directory(UserRecord *h, PasswordCache *cache, UserRecord **ret_home);
+int home_create_directory_or_subvolume(UserRecord *h, UserRecord **ret_home);
+int home_resize_directory(UserRecord *h, bool already_activated, PasswordCache *cache, HomeSetup *setup, UserRecord **ret_home);
diff --git a/src/home/homework-fido2.c b/src/home/homework-fido2.c
new file mode 100644
index 0000000..2f717a5
--- /dev/null
+++ b/src/home/homework-fido2.c
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fido.h>
+
+#include "hexdecoct.h"
+#include "homework-fido2.h"
+#include "strv.h"
+
+static int fido2_use_specific_token(
+ const char *path,
+ UserRecord *h,
+ UserRecord *secret,
+ const Fido2HmacSalt *salt,
+ char **ret) {
+
+ _cleanup_(fido_cbor_info_free) fido_cbor_info_t *di = NULL;
+ _cleanup_(fido_assert_free) fido_assert_t *a = NULL;
+ _cleanup_(fido_dev_free) fido_dev_t *d = NULL;
+ bool found_extension = false;
+ size_t n, hmac_size;
+ const void *hmac;
+ char **e;
+ int r;
+
+ d = fido_dev_new();
+ if (!d)
+ return log_oom();
+
+ r = fido_dev_open(d, path);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to open FIDO2 device %s: %s", path, fido_strerr(r));
+
+ if (!fido_dev_is_fido2(d))
+ return log_error_errno(SYNTHETIC_ERRNO(ENODEV),
+ "Specified device %s is not a FIDO2 device.", path);
+
+ di = fido_cbor_info_new();
+ if (!di)
+ return log_oom();
+
+ r = fido_dev_get_cbor_info(d, di);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to get CBOR device info for %s: %s", path, fido_strerr(r));
+
+ e = fido_cbor_info_extensions_ptr(di);
+ n = fido_cbor_info_extensions_len(di);
+
+ for (size_t i = 0; i < n; i++)
+ if (streq(e[i], "hmac-secret")) {
+ found_extension = true;
+ break;
+ }
+
+ if (!found_extension)
+ return log_error_errno(SYNTHETIC_ERRNO(ENODEV),
+ "Specified device %s is a FIDO2 device, but does not support the required HMAC-SECRET extension.", path);
+
+ a = fido_assert_new();
+ if (!a)
+ return log_oom();
+
+ r = fido_assert_set_extensions(a, FIDO_EXT_HMAC_SECRET);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to enable HMAC-SECRET extension on FIDO2 assertion: %s", fido_strerr(r));
+
+ r = fido_assert_set_hmac_salt(a, salt->salt, salt->salt_size);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to set salt on FIDO2 assertion: %s", fido_strerr(r));
+
+ r = fido_assert_set_rp(a, "io.systemd.home");
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to set FIDO2 assertion ID: %s", fido_strerr(r));
+
+ r = fido_assert_set_clientdata_hash(a, (const unsigned char[32]) {}, 32);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to set FIDO2 assertion client data hash: %s", fido_strerr(r));
+
+ r = fido_assert_allow_cred(a, salt->credential.id, salt->credential.size);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to add FIDO2 assertion credential ID: %s", fido_strerr(r));
+
+ r = fido_assert_set_up(a, h->fido2_user_presence_permitted <= 0 ? FIDO_OPT_FALSE : FIDO_OPT_TRUE);
+ if (r != FIDO_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to set FIDO2 assertion user presence: %s", fido_strerr(r));
+
+ log_info("Asking FIDO2 token for authentication.");
+
+ r = fido_dev_get_assert(d, a, NULL); /* try without pin first */
+ if (r == FIDO_ERR_PIN_REQUIRED) {
+ char **i;
+
+ /* OK, we needed a pin, try with all pins in turn */
+ STRV_FOREACH(i, secret->token_pin) {
+ r = fido_dev_get_assert(d, a, *i);
+ if (r != FIDO_ERR_PIN_INVALID)
+ break;
+ }
+ }
+
+ switch (r) {
+ case FIDO_OK:
+ break;
+ case FIDO_ERR_NO_CREDENTIALS:
+ return log_error_errno(SYNTHETIC_ERRNO(EBADSLT),
+ "Wrong security token; needed credentials not present on token.");
+ case FIDO_ERR_PIN_REQUIRED:
+ return log_error_errno(SYNTHETIC_ERRNO(ENOANO),
+ "Security token requires PIN.");
+ case FIDO_ERR_PIN_AUTH_BLOCKED:
+ return log_error_errno(SYNTHETIC_ERRNO(EOWNERDEAD),
+ "PIN of security token is blocked, please remove/reinsert token.");
+ case FIDO_ERR_PIN_INVALID:
+ return log_error_errno(SYNTHETIC_ERRNO(ENOLCK),
+ "PIN of security token incorrect.");
+ case FIDO_ERR_UP_REQUIRED:
+ return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE),
+ "User presence required.");
+ case FIDO_ERR_ACTION_TIMEOUT:
+ return log_error_errno(SYNTHETIC_ERRNO(ENOSTR),
+ "Token action timeout. (User didn't interact with token quickly enough.)");
+ default:
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to ask token for assertion: %s", fido_strerr(r));
+ }
+
+ hmac = fido_assert_hmac_secret_ptr(a, 0);
+ if (!hmac)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to retrieve HMAC secret.");
+
+ hmac_size = fido_assert_hmac_secret_len(a, 0);
+
+ r = base64mem(hmac, hmac_size, ret);
+ if (r < 0)
+ return log_error_errno(r, "Failed to base64 encode HMAC secret: %m");
+
+ return 0;
+}
+
+int fido2_use_token(UserRecord *h, UserRecord *secret, const Fido2HmacSalt *salt, char **ret) {
+ size_t allocated = 64, found = 0;
+ fido_dev_info_t *di = NULL;
+ int r;
+
+ di = fido_dev_info_new(allocated);
+ if (!di)
+ return log_oom();
+
+ r = fido_dev_info_manifest(di, allocated, &found);
+ if (r == FIDO_ERR_INTERNAL) {
+ /* The library returns FIDO_ERR_INTERNAL when no devices are found. I wish it wouldn't. */
+ r = log_debug_errno(SYNTHETIC_ERRNO(EAGAIN), "Got FIDO_ERR_INTERNAL, assuming no devices.");
+ goto finish;
+ }
+ if (r != FIDO_OK) {
+ r = log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to enumerate FIDO2 devices: %s", fido_strerr(r));
+ goto finish;
+ }
+
+ for (size_t i = 0; i < found; i++) {
+ const fido_dev_info_t *entry;
+ const char *path;
+
+ entry = fido_dev_info_ptr(di, i);
+ if (!entry) {
+ r = log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to get device information for FIDO device %zu.", i);
+ goto finish;
+ }
+
+ path = fido_dev_info_path(entry);
+ if (!path) {
+ r = log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to query FIDO device path.");
+ goto finish;
+ }
+
+ r = fido2_use_specific_token(path, h, secret, salt, ret);
+ if (!IN_SET(r,
+ -EBADSLT, /* device doesn't understand our credential hash */
+ -ENODEV /* device is not a FIDO2 device with HMAC-SECRET */))
+ goto finish;
+ }
+
+ r = -EAGAIN;
+
+finish:
+ fido_dev_info_free(&di, allocated);
+ return r;
+}
diff --git a/src/home/homework-fido2.h b/src/home/homework-fido2.h
new file mode 100644
index 0000000..a1dcba2
--- /dev/null
+++ b/src/home/homework-fido2.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "user-record.h"
+
+int fido2_use_token(UserRecord *h, UserRecord *secret, const Fido2HmacSalt *salt, char **ret);
diff --git a/src/home/homework-fscrypt.c b/src/home/homework-fscrypt.c
new file mode 100644
index 0000000..d0676f8
--- /dev/null
+++ b/src/home/homework-fscrypt.c
@@ -0,0 +1,643 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/fs.h>
+#include <openssl/evp.h>
+#include <openssl/sha.h>
+#include <sys/ioctl.h>
+#include <sys/xattr.h>
+
+#include "errno-util.h"
+#include "fd-util.h"
+#include "hexdecoct.h"
+#include "homework-fscrypt.h"
+#include "homework-quota.h"
+#include "memory-util.h"
+#include "missing_keyctl.h"
+#include "missing_syscall.h"
+#include "mkdir.h"
+#include "nulstr-util.h"
+#include "openssl-util.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "rm-rf.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "xattr-util.h"
+
+static int fscrypt_upload_volume_key(
+ const uint8_t key_descriptor[static FS_KEY_DESCRIPTOR_SIZE],
+ const void *volume_key,
+ size_t volume_key_size,
+ key_serial_t where) {
+
+ _cleanup_free_ char *hex = NULL;
+ const char *description;
+ struct fscrypt_key key;
+ key_serial_t serial;
+
+ assert(key_descriptor);
+ assert(volume_key);
+ assert(volume_key_size > 0);
+
+ if (volume_key_size > sizeof(key.raw))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Volume key too long.");
+
+ hex = hexmem(key_descriptor, FS_KEY_DESCRIPTOR_SIZE);
+ if (!hex)
+ return log_oom();
+
+ description = strjoina("fscrypt:", hex);
+
+ key = (struct fscrypt_key) {
+ .size = volume_key_size,
+ };
+ memcpy(key.raw, volume_key, volume_key_size);
+
+ /* Upload to the kernel */
+ serial = add_key("logon", description, &key, sizeof(key), where);
+ explicit_bzero_safe(&key, sizeof(key));
+
+ if (serial < 0)
+ return log_error_errno(errno, "Failed to install master key in keyring: %m");
+
+ log_info("Uploaded encryption key to kernel.");
+
+ return 0;
+}
+
+static void calculate_key_descriptor(
+ const void *key,
+ size_t key_size,
+ uint8_t ret_key_descriptor[static FS_KEY_DESCRIPTOR_SIZE]) {
+
+ uint8_t hashed[512 / 8] = {}, hashed2[512 / 8] = {};
+
+ /* Derive the key descriptor from the volume key via double SHA512, in order to be compatible with e4crypt */
+
+ assert_se(SHA512(key, key_size, hashed) == hashed);
+ assert_se(SHA512(hashed, sizeof(hashed), hashed2) == hashed2);
+
+ assert_cc(sizeof(hashed2) >= FS_KEY_DESCRIPTOR_SIZE);
+
+ memcpy(ret_key_descriptor, hashed2, FS_KEY_DESCRIPTOR_SIZE);
+}
+
+static int fscrypt_slot_try_one(
+ const char *password,
+ const void *salt, size_t salt_size,
+ const void *encrypted, size_t encrypted_size,
+ const uint8_t match_key_descriptor[static FS_KEY_DESCRIPTOR_SIZE],
+ void **ret_decrypted, size_t *ret_decrypted_size) {
+
+
+ _cleanup_(EVP_CIPHER_CTX_freep) EVP_CIPHER_CTX *context = NULL;
+ _cleanup_(erase_and_freep) void *decrypted = NULL;
+ uint8_t key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+ int decrypted_size_out1, decrypted_size_out2;
+ uint8_t derived[512 / 8] = {};
+ size_t decrypted_size;
+ const EVP_CIPHER *cc;
+ int r;
+
+ assert(password);
+ assert(salt);
+ assert(salt_size > 0);
+ assert(encrypted);
+ assert(encrypted_size > 0);
+ assert(match_key_descriptor);
+
+ /* Our construction is like this:
+ *
+ * 1. In each key slot we store a salt value plus the encrypted volume key
+ *
+ * 2. Unlocking is via calculating PBKDF2-HMAC-SHA512 of the supplied password (in combination with
+ * the salt), then using the first 256 bit of the hash as key for decrypting the encrypted
+ * volume key in AES256 counter mode.
+ *
+ * 3. Writing a password is similar: calculate PBKDF2-HMAC-SHA512 of the supplied password (in
+ * combination with the salt), then encrypt the volume key in AES256 counter mode with the
+ * resulting hash.
+ */
+
+ if (PKCS5_PBKDF2_HMAC(
+ password, strlen(password),
+ salt, salt_size,
+ 0xFFFF, EVP_sha512(),
+ sizeof(derived), derived) != 1) {
+ r = log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), "PBKDF2 failed");
+ goto finish;
+ }
+
+ context = EVP_CIPHER_CTX_new();
+ if (!context) {
+ r = log_oom();
+ goto finish;
+ }
+
+ /* We use AES256 in counter mode */
+ assert_se(cc = EVP_aes_256_ctr());
+
+ /* We only use the first half of the derived key */
+ assert(sizeof(derived) >= (size_t) EVP_CIPHER_key_length(cc));
+
+ if (EVP_DecryptInit_ex(context, cc, NULL, derived, NULL) != 1) {
+ r = log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to initialize decryption context.");
+ goto finish;
+ }
+
+ /* Flush out the derived key now, we don't need it anymore */
+ explicit_bzero_safe(derived, sizeof(derived));
+
+ decrypted_size = encrypted_size + EVP_CIPHER_key_length(cc) * 2;
+ decrypted = malloc(decrypted_size);
+ if (!decrypted)
+ return log_oom();
+
+ if (EVP_DecryptUpdate(context, (uint8_t*) decrypted, &decrypted_size_out1, encrypted, encrypted_size) != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to decrypt volume key.");
+
+ assert((size_t) decrypted_size_out1 <= decrypted_size);
+
+ if (EVP_DecryptFinal_ex(context, (uint8_t*) decrypted_size + decrypted_size_out1, &decrypted_size_out2) != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to finish decryption of volume key.");
+
+ assert((size_t) decrypted_size_out1 + (size_t) decrypted_size_out2 < decrypted_size);
+ decrypted_size = (size_t) decrypted_size_out1 + (size_t) decrypted_size_out2;
+
+ calculate_key_descriptor(decrypted, decrypted_size, key_descriptor);
+
+ if (memcmp(key_descriptor, match_key_descriptor, FS_KEY_DESCRIPTOR_SIZE) != 0)
+ return -ENOANO; /* don't log here */
+
+ r = fscrypt_upload_volume_key(key_descriptor, decrypted, decrypted_size, KEY_SPEC_THREAD_KEYRING);
+ if (r < 0)
+ return r;
+
+ if (ret_decrypted)
+ *ret_decrypted = TAKE_PTR(decrypted);
+ if (ret_decrypted_size)
+ *ret_decrypted_size = decrypted_size;
+
+ return 0;
+
+finish:
+ explicit_bzero_safe(derived, sizeof(derived));
+ return r;
+}
+
+static int fscrypt_slot_try_many(
+ char **passwords,
+ const void *salt, size_t salt_size,
+ const void *encrypted, size_t encrypted_size,
+ const uint8_t match_key_descriptor[static FS_KEY_DESCRIPTOR_SIZE],
+ void **ret_decrypted, size_t *ret_decrypted_size) {
+
+ char **i;
+ int r;
+
+ STRV_FOREACH(i, passwords) {
+ r = fscrypt_slot_try_one(*i, salt, salt_size, encrypted, encrypted_size, match_key_descriptor, ret_decrypted, ret_decrypted_size);
+ if (r != -ENOANO)
+ return r;
+ }
+
+ return -ENOANO;
+}
+
+static int fscrypt_setup(
+ const PasswordCache *cache,
+ char **password,
+ HomeSetup *setup,
+ void **ret_volume_key,
+ size_t *ret_volume_key_size) {
+
+ _cleanup_free_ char *xattr_buf = NULL;
+ const char *xa;
+ int r;
+
+ assert(setup);
+ assert(setup->root_fd >= 0);
+
+ r = flistxattr_malloc(setup->root_fd, &xattr_buf);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to retrieve xattr list: %m");
+
+ NULSTR_FOREACH(xa, xattr_buf) {
+ _cleanup_free_ void *salt = NULL, *encrypted = NULL;
+ _cleanup_free_ char *value = NULL;
+ size_t salt_size, encrypted_size;
+ const char *nr, *e;
+ char **list;
+ int n;
+
+ /* Check if this xattr has the format 'trusted.fscrypt_slot<nr>' where '<nr>' is a 32bit unsigned integer */
+ nr = startswith(xa, "trusted.fscrypt_slot");
+ if (!nr)
+ continue;
+ if (safe_atou32(nr, NULL) < 0)
+ continue;
+
+ n = fgetxattr_malloc(setup->root_fd, xa, &value);
+ if (n == -ENODATA) /* deleted by now? */
+ continue;
+ if (n < 0)
+ return log_error_errno(n, "Failed to read %s xattr: %m", xa);
+
+ e = memchr(value, ':', n);
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "xattr %s lacks ':' separator: %m", xa);
+
+ r = unbase64mem(value, e - value, &salt, &salt_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to decode salt of %s: %m", xa);
+ r = unbase64mem(e+1, n - (e - value) - 1, &encrypted, &encrypted_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to decode encrypted key of %s: %m", xa);
+
+ r = -ENOANO;
+ FOREACH_POINTER(list, cache->pkcs11_passwords, cache->fido2_passwords, password) {
+ r = fscrypt_slot_try_many(
+ list,
+ salt, salt_size,
+ encrypted, encrypted_size,
+ setup->fscrypt_key_descriptor,
+ ret_volume_key, ret_volume_key_size);
+ if (r != -ENOANO)
+ break;
+ }
+ if (r < 0) {
+ if (r != -ENOANO)
+ return r;
+ } else
+ return 0;
+ }
+
+ return log_error_errno(SYNTHETIC_ERRNO(ENOKEY), "Failed to set up home directory with provided passwords.");
+}
+
+int home_prepare_fscrypt(
+ UserRecord *h,
+ bool already_activated,
+ PasswordCache *cache,
+ HomeSetup *setup) {
+
+ _cleanup_(erase_and_freep) void *volume_key = NULL;
+ struct fscrypt_policy policy = {};
+ size_t volume_key_size = 0;
+ const char *ip;
+ int r;
+
+ assert(h);
+ assert(setup);
+ assert(user_record_storage(h) == USER_FSCRYPT);
+
+ assert_se(ip = user_record_image_path(h));
+
+ setup->root_fd = open(ip, O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (setup->root_fd < 0)
+ return log_error_errno(errno, "Failed to open home directory: %m");
+
+ if (ioctl(setup->root_fd, FS_IOC_GET_ENCRYPTION_POLICY, &policy) < 0) {
+ if (errno == ENODATA)
+ return log_error_errno(errno, "Home directory %s is not encrypted.", ip);
+ if (ERRNO_IS_NOT_SUPPORTED(errno)) {
+ log_error_errno(errno, "File system does not support fscrypt: %m");
+ return -ENOLINK; /* make recognizable */
+ }
+ return log_error_errno(errno, "Failed to acquire encryption policy of %s: %m", ip);
+ }
+
+ memcpy(setup->fscrypt_key_descriptor, policy.master_key_descriptor, FS_KEY_DESCRIPTOR_SIZE);
+
+ r = fscrypt_setup(
+ cache,
+ h->password,
+ setup,
+ &volume_key,
+ &volume_key_size);
+ if (r < 0)
+ return r;
+
+ /* Also install the access key in the user's own keyring */
+
+ if (uid_is_valid(h->uid)) {
+ r = safe_fork("(sd-addkey)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_LOG|FORK_WAIT, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed install encryption key in user's keyring: %m");
+ if (r == 0) {
+ gid_t gid;
+
+ /* Child */
+
+ gid = user_record_gid(h);
+ if (setresgid(gid, gid, gid) < 0) {
+ log_error_errno(errno, "Failed to change GID to " GID_FMT ": %m", gid);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (setgroups(0, NULL) < 0) {
+ log_error_errno(errno, "Failed to reset auxiliary groups list: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (setresuid(h->uid, h->uid, h->uid) < 0) {
+ log_error_errno(errno, "Failed to change UID to " UID_FMT ": %m", h->uid);
+ _exit(EXIT_FAILURE);
+ }
+
+ r = fscrypt_upload_volume_key(
+ setup->fscrypt_key_descriptor,
+ volume_key,
+ volume_key_size,
+ KEY_SPEC_USER_KEYRING);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+ }
+
+ return 0;
+}
+
+static int fscrypt_slot_set(
+ int root_fd,
+ const void *volume_key,
+ size_t volume_key_size,
+ const char *password,
+ uint32_t nr) {
+
+ _cleanup_free_ char *salt_base64 = NULL, *encrypted_base64 = NULL, *joined = NULL;
+ char label[STRLEN("trusted.fscrypt_slot") + DECIMAL_STR_MAX(nr) + 1];
+ _cleanup_(EVP_CIPHER_CTX_freep) EVP_CIPHER_CTX *context = NULL;
+ int r, encrypted_size_out1, encrypted_size_out2;
+ uint8_t salt[64], derived[512 / 8] = {};
+ _cleanup_free_ void *encrypted = NULL;
+ const EVP_CIPHER *cc;
+ size_t encrypted_size;
+
+ r = genuine_random_bytes(salt, sizeof(salt), RANDOM_BLOCK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate salt: %m");
+
+ if (PKCS5_PBKDF2_HMAC(
+ password, strlen(password),
+ salt, sizeof(salt),
+ 0xFFFF, EVP_sha512(),
+ sizeof(derived), derived) != 1) {
+ r = log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), "PBKDF2 failed");
+ goto finish;
+ }
+
+ context = EVP_CIPHER_CTX_new();
+ if (!context) {
+ r = log_oom();
+ goto finish;
+ }
+
+ /* We use AES256 in counter mode */
+ cc = EVP_aes_256_ctr();
+
+ /* We only use the first half of the derived key */
+ assert(sizeof(derived) >= (size_t) EVP_CIPHER_key_length(cc));
+
+ if (EVP_EncryptInit_ex(context, cc, NULL, derived, NULL) != 1) {
+ r = log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to initialize encryption context.");
+ goto finish;
+ }
+
+ /* Flush out the derived key now, we don't need it anymore */
+ explicit_bzero_safe(derived, sizeof(derived));
+
+ encrypted_size = volume_key_size + EVP_CIPHER_key_length(cc) * 2;
+ encrypted = malloc(encrypted_size);
+ if (!encrypted)
+ return log_oom();
+
+ if (EVP_EncryptUpdate(context, (uint8_t*) encrypted, &encrypted_size_out1, volume_key, volume_key_size) != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to encrypt volume key.");
+
+ assert((size_t) encrypted_size_out1 <= encrypted_size);
+
+ if (EVP_EncryptFinal_ex(context, (uint8_t*) encrypted_size + encrypted_size_out1, &encrypted_size_out2) != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to finish encryption of volume key.");
+
+ assert((size_t) encrypted_size_out1 + (size_t) encrypted_size_out2 < encrypted_size);
+ encrypted_size = (size_t) encrypted_size_out1 + (size_t) encrypted_size_out2;
+
+ r = base64mem(salt, sizeof(salt), &salt_base64);
+ if (r < 0)
+ return log_oom();
+
+ r = base64mem(encrypted, encrypted_size, &encrypted_base64);
+ if (r < 0)
+ return log_oom();
+
+ joined = strjoin(salt_base64, ":", encrypted_base64);
+ if (!joined)
+ return log_oom();
+
+ xsprintf(label, "trusted.fscrypt_slot%" PRIu32, nr);
+ if (fsetxattr(root_fd, label, joined, strlen(joined), 0) < 0)
+ return log_error_errno(errno, "Failed to write xattr %s: %m", label);
+
+ log_info("Written key slot %s.", label);
+
+ return 0;
+
+finish:
+ explicit_bzero_safe(derived, sizeof(derived));
+ return r;
+}
+
+int home_create_fscrypt(
+ UserRecord *h,
+ char **effective_passwords,
+ UserRecord **ret_home) {
+
+ _cleanup_(rm_rf_physical_and_freep) char *temporary = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *new_home = NULL;
+ _cleanup_(erase_and_freep) void *volume_key = NULL;
+ struct fscrypt_policy policy = {};
+ size_t volume_key_size = 512 / 8;
+ _cleanup_close_ int root_fd = -1;
+ _cleanup_free_ char *d = NULL;
+ uint32_t nr = 0;
+ const char *ip;
+ char **i;
+ int r;
+
+ assert(h);
+ assert(user_record_storage(h) == USER_FSCRYPT);
+ assert(ret_home);
+
+ assert_se(ip = user_record_image_path(h));
+
+ r = tempfn_random(ip, "homework", &d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate temporary directory: %m");
+
+ (void) mkdir_parents(d, 0755);
+
+ if (mkdir(d, 0700) < 0)
+ return log_error_errno(errno, "Failed to create temporary home directory %s: %m", d);
+
+ temporary = TAKE_PTR(d); /* Needs to be destroyed now */
+
+ root_fd = open(temporary, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (root_fd < 0)
+ return log_error_errno(errno, "Failed to open temporary home directory: %m");
+
+ if (ioctl(root_fd, FS_IOC_GET_ENCRYPTION_POLICY, &policy) < 0) {
+ if (ERRNO_IS_NOT_SUPPORTED(errno)) {
+ log_error_errno(errno, "File system does not support fscrypt: %m");
+ return -ENOLINK; /* make recognizable */
+ }
+ if (errno != ENODATA)
+ return log_error_errno(errno, "Failed to get fscrypt policy of directory: %m");
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EBUSY), "Parent of %s already encrypted, refusing.", d);
+
+ volume_key = malloc(volume_key_size);
+ if (!volume_key)
+ return log_oom();
+
+ r = genuine_random_bytes(volume_key, volume_key_size, RANDOM_BLOCK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire volume key: %m");
+
+ log_info("Generated volume key of size %zu.", volume_key_size);
+
+ policy = (struct fscrypt_policy) {
+ .contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS,
+ .filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS,
+ .flags = FS_POLICY_FLAGS_PAD_32,
+ };
+
+ calculate_key_descriptor(volume_key, volume_key_size, policy.master_key_descriptor);
+
+ r = fscrypt_upload_volume_key(policy.master_key_descriptor, volume_key, volume_key_size, KEY_SPEC_THREAD_KEYRING);
+ if (r < 0)
+ return r;
+
+ log_info("Uploaded volume key to kernel.");
+
+ if (ioctl(root_fd, FS_IOC_SET_ENCRYPTION_POLICY, &policy) < 0)
+ return log_error_errno(errno, "Failed to set fscrypt policy on directory: %m");
+
+ log_info("Encryption policy set.");
+
+ STRV_FOREACH(i, effective_passwords) {
+ r = fscrypt_slot_set(root_fd, volume_key, volume_key_size, *i, nr);
+ if (r < 0)
+ return r;
+
+ nr++;
+ }
+
+ (void) home_update_quota_classic(h, temporary);
+
+ r = home_populate(h, root_fd);
+ if (r < 0)
+ return r;
+
+ r = home_sync_and_statfs(root_fd, NULL);
+ if (r < 0)
+ return r;
+
+ r = user_record_clone(h, USER_RECORD_LOAD_MASK_SECRET, &new_home);
+ if (r < 0)
+ return log_error_errno(r, "Failed to clone record: %m");
+
+ r = user_record_add_binding(
+ new_home,
+ USER_FSCRYPT,
+ ip,
+ SD_ID128_NULL,
+ SD_ID128_NULL,
+ SD_ID128_NULL,
+ NULL,
+ NULL,
+ UINT64_MAX,
+ NULL,
+ NULL,
+ h->uid,
+ (gid_t) h->uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add binding to record: %m");
+
+ if (rename(temporary, ip) < 0)
+ return log_error_errno(errno, "Failed to rename %s to %s: %m", temporary, ip);
+
+ temporary = mfree(temporary);
+
+ log_info("Everything completed.");
+
+ *ret_home = TAKE_PTR(new_home);
+ return 0;
+}
+
+int home_passwd_fscrypt(
+ UserRecord *h,
+ HomeSetup *setup,
+ PasswordCache *cache, /* the passwords acquired via PKCS#11/FIDO2 security tokens */
+ char **effective_passwords /* new passwords */) {
+
+ _cleanup_(erase_and_freep) void *volume_key = NULL;
+ _cleanup_free_ char *xattr_buf = NULL;
+ size_t volume_key_size = 0;
+ uint32_t slot = 0;
+ const char *xa;
+ char **p;
+ int r;
+
+ assert(h);
+ assert(user_record_storage(h) == USER_FSCRYPT);
+ assert(setup);
+
+ r = fscrypt_setup(
+ cache,
+ h->password,
+ setup,
+ &volume_key,
+ &volume_key_size);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, effective_passwords) {
+ r = fscrypt_slot_set(setup->root_fd, volume_key, volume_key_size, *p, slot);
+ if (r < 0)
+ return r;
+
+ slot++;
+ }
+
+ r = flistxattr_malloc(setup->root_fd, &xattr_buf);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to retrieve xattr list: %m");
+
+ NULSTR_FOREACH(xa, xattr_buf) {
+ const char *nr;
+ uint32_t z;
+
+ /* Check if this xattr has the format 'trusted.fscrypt_slot<nr>' where '<nr>' is a 32bit unsigned integer */
+ nr = startswith(xa, "trusted.fscrypt_slot");
+ if (!nr)
+ continue;
+ if (safe_atou32(nr, &z) < 0)
+ continue;
+
+ if (z < slot)
+ continue;
+
+ if (fremovexattr(setup->root_fd, xa) < 0)
+
+ if (errno != ENODATA)
+ log_warning_errno(errno, "Failed to remove xattr %s: %m", xa);
+ }
+
+ return 0;
+}
diff --git a/src/home/homework-fscrypt.h b/src/home/homework-fscrypt.h
new file mode 100644
index 0000000..50b0399
--- /dev/null
+++ b/src/home/homework-fscrypt.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "homework.h"
+#include "user-record.h"
+
+int home_prepare_fscrypt(UserRecord *h, bool already_activated, PasswordCache *cache, HomeSetup *setup);
+int home_create_fscrypt(UserRecord *h, char **effective_passwords, UserRecord **ret_home);
+
+int home_passwd_fscrypt(UserRecord *h, HomeSetup *setup, PasswordCache *cache, char **effective_passwords);
diff --git a/src/home/homework-luks.c b/src/home/homework-luks.c
new file mode 100644
index 0000000..b0b2d80
--- /dev/null
+++ b/src/home/homework-luks.c
@@ -0,0 +1,3087 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <libfdisk.h>
+#include <linux/loop.h>
+#include <poll.h>
+#include <sys/file.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/xattr.h>
+
+#include "blkid-util.h"
+#include "blockdev-util.h"
+#include "btrfs-util.h"
+#include "chattr-util.h"
+#include "dm-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "fsck-util.h"
+#include "home-util.h"
+#include "homework-luks.h"
+#include "homework-mount.h"
+#include "id128-util.h"
+#include "io-util.h"
+#include "memory-util.h"
+#include "missing_magic.h"
+#include "mkdir.h"
+#include "mkfs-util.h"
+#include "mount-util.h"
+#include "openssl-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "resize-fs.h"
+#include "stat-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+
+/* Round down to the nearest 1K size. Note that Linux generally handles block devices with 512 blocks only,
+ * but actually doesn't accept uneven numbers in many cases. To avoid any confusion around this we'll
+ * strictly round disk sizes down to the next 1K boundary.*/
+#define DISK_SIZE_ROUND_DOWN(x) ((x) & ~UINT64_C(1023))
+
+int run_mark_dirty(int fd, bool b) {
+ char x = '1';
+ int r, ret;
+
+ /* Sets or removes the 'user.home-dirty' xattr on the specified file. We use this to detect when a
+ * home directory was not properly unmounted. */
+
+ assert(fd >= 0);
+
+ r = fd_verify_regular(fd);
+ if (r < 0)
+ return r;
+
+ if (b) {
+ ret = fsetxattr(fd, "user.home-dirty", &x, 1, XATTR_CREATE);
+ if (ret < 0 && errno != EEXIST)
+ return log_debug_errno(errno, "Could not mark home directory as dirty: %m");
+
+ } else {
+ r = fsync_full(fd);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to synchronize image before marking it clean: %m");
+
+ ret = fremovexattr(fd, "user.home-dirty");
+ if (ret < 0 && errno != ENODATA)
+ return log_debug_errno(errno, "Could not mark home directory as clean: %m");
+ }
+
+ r = fsync_full(fd);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to synchronize dirty flag to disk: %m");
+
+ return ret >= 0;
+}
+
+int run_mark_dirty_by_path(const char *path, bool b) {
+ _cleanup_close_ int fd = -1;
+
+ assert(path);
+
+ fd = open(path, O_RDWR|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return log_debug_errno(errno, "Failed to open %s to mark dirty or clean: %m", path);
+
+ return run_mark_dirty(fd, b);
+}
+
+static int probe_file_system_by_fd(
+ int fd,
+ char **ret_fstype,
+ sd_id128_t *ret_uuid) {
+
+ _cleanup_(blkid_free_probep) blkid_probe b = NULL;
+ _cleanup_free_ char *s = NULL;
+ const char *fstype = NULL, *uuid = NULL;
+ sd_id128_t id;
+ int r;
+
+ assert(fd >= 0);
+ assert(ret_fstype);
+ assert(ret_uuid);
+
+ b = blkid_new_probe();
+ if (!b)
+ return -ENOMEM;
+
+ errno = 0;
+ r = blkid_probe_set_device(b, fd, 0, 0);
+ if (r != 0)
+ return errno > 0 ? -errno : -ENOMEM;
+
+ (void) blkid_probe_enable_superblocks(b, 1);
+ (void) blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_UUID);
+
+ errno = 0;
+ r = blkid_do_safeprobe(b);
+ if (IN_SET(r, -2, 1)) /* nothing found or ambiguous result */
+ return -ENOPKG;
+ if (r != 0)
+ return errno > 0 ? -errno : -EIO;
+
+ (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
+ if (!fstype)
+ return -ENOPKG;
+
+ (void) blkid_probe_lookup_value(b, "UUID", &uuid, NULL);
+ if (!uuid)
+ return -ENOPKG;
+
+ r = sd_id128_from_string(uuid, &id);
+ if (r < 0)
+ return r;
+
+ s = strdup(fstype);
+ if (!s)
+ return -ENOMEM;
+
+ *ret_fstype = TAKE_PTR(s);
+ *ret_uuid = id;
+
+ return 0;
+}
+
+static int probe_file_system_by_path(const char *path, char **ret_fstype, sd_id128_t *ret_uuid) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (fd < 0)
+ return -errno;
+
+ return probe_file_system_by_fd(fd, ret_fstype, ret_uuid);
+}
+
+static int block_get_size_by_fd(int fd, uint64_t *ret) {
+ struct stat st;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTBLK;
+
+ if (ioctl(fd, BLKGETSIZE64, ret) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int block_get_size_by_path(const char *path, uint64_t *ret) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (fd < 0)
+ return -errno;
+
+ return block_get_size_by_fd(fd, ret);
+}
+
+static int run_fsck(const char *node, const char *fstype) {
+ int r, exit_status;
+ pid_t fsck_pid;
+
+ assert(node);
+ assert(fstype);
+
+ r = fsck_exists(fstype);
+ if (r < 0)
+ return log_error_errno(r, "Failed to check if fsck for file system %s exists: %m", fstype);
+ if (r == 0) {
+ log_warning("No fsck for file system %s installed, ignoring.", fstype);
+ return 0;
+ }
+
+ r = safe_fork("(fsck)", FORK_RESET_SIGNALS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_LOG|FORK_STDOUT_TO_STDERR, &fsck_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Child */
+ execl("/sbin/fsck", "/sbin/fsck", "-aTl", node, NULL);
+ log_error_errno(errno, "Failed to execute fsck: %m");
+ _exit(FSCK_OPERATIONAL_ERROR);
+ }
+
+ exit_status = wait_for_terminate_and_check("fsck", fsck_pid, WAIT_LOG_ABNORMAL);
+ if (exit_status < 0)
+ return exit_status;
+ if ((exit_status & ~FSCK_ERROR_CORRECTED) != 0) {
+ log_warning("fsck failed with exit status %i.", exit_status);
+
+ if ((exit_status & (FSCK_SYSTEM_SHOULD_REBOOT|FSCK_ERRORS_LEFT_UNCORRECTED)) != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "File system is corrupted, refusing.");
+
+ log_warning("Ignoring fsck error.");
+ }
+
+ log_info("File system check completed.");
+
+ return 1;
+}
+
+static int luks_try_passwords(
+ struct crypt_device *cd,
+ char **passwords,
+ void *volume_key,
+ size_t *volume_key_size) {
+
+ char **pp;
+ int r;
+
+ assert(cd);
+
+ STRV_FOREACH(pp, passwords) {
+ size_t vks = *volume_key_size;
+
+ r = crypt_volume_key_get(
+ cd,
+ CRYPT_ANY_SLOT,
+ volume_key,
+ &vks,
+ *pp,
+ strlen(*pp));
+ if (r >= 0) {
+ *volume_key_size = vks;
+ return 0;
+ }
+
+ log_debug_errno(r, "Password %zu didn't work for unlocking LUKS superblock: %m", (size_t) (pp - passwords));
+ }
+
+ return -ENOKEY;
+}
+
+static int luks_setup(
+ const char *node,
+ const char *dm_name,
+ sd_id128_t uuid,
+ const char *cipher,
+ const char *cipher_mode,
+ uint64_t volume_key_size,
+ char **passwords,
+ const PasswordCache *cache,
+ bool discard,
+ struct crypt_device **ret,
+ sd_id128_t *ret_found_uuid,
+ void **ret_volume_key,
+ size_t *ret_volume_key_size) {
+
+ _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
+ _cleanup_(erase_and_freep) void *vk = NULL;
+ sd_id128_t p;
+ size_t vks;
+ char **list;
+ int r;
+
+ assert(node);
+ assert(dm_name);
+ assert(ret);
+
+ r = crypt_init(&cd, node);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate libcryptsetup context: %m");
+
+ cryptsetup_enable_logging(cd);
+
+ r = crypt_load(cd, CRYPT_LUKS2, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to load LUKS superblock: %m");
+
+ r = crypt_get_volume_key_size(cd);
+ if (r <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine LUKS volume key size");
+ vks = (size_t) r;
+
+ if (!sd_id128_is_null(uuid) || ret_found_uuid) {
+ const char *s;
+
+ s = crypt_get_uuid(cd);
+ if (!s)
+ return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has no UUID.");
+
+ r = sd_id128_from_string(s, &p);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has invalid UUID.");
+
+ /* Check that the UUID matches, if specified */
+ if (!sd_id128_is_null(uuid) &&
+ !sd_id128_equal(uuid, p))
+ return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has wrong UUID.");
+ }
+
+ if (cipher && !streq_ptr(cipher, crypt_get_cipher(cd)))
+ return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock declares wrong cipher.");
+
+ if (cipher_mode && !streq_ptr(cipher_mode, crypt_get_cipher_mode(cd)))
+ return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock declares wrong cipher mode.");
+
+ if (volume_key_size != UINT64_MAX && vks != volume_key_size)
+ return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock declares wrong volume key size.");
+
+ vk = malloc(vks);
+ if (!vk)
+ return log_oom();
+
+ r = -ENOKEY;
+ FOREACH_POINTER(list, cache->pkcs11_passwords, cache->fido2_passwords, passwords) {
+ r = luks_try_passwords(cd, list, vk, &vks);
+ if (r != -ENOKEY)
+ break;
+ }
+ if (r == -ENOKEY)
+ return log_error_errno(r, "No valid password for LUKS superblock.");
+ if (r < 0)
+ return log_error_errno(r, "Failed to unlocks LUKS superblock: %m");
+
+ r = crypt_activate_by_volume_key(
+ cd,
+ dm_name,
+ vk, vks,
+ discard ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to unlock LUKS superblock: %m");
+
+ log_info("Setting up LUKS device /dev/mapper/%s completed.", dm_name);
+
+ *ret = TAKE_PTR(cd);
+
+ if (ret_found_uuid) /* Return the UUID actually found if the caller wants to know */
+ *ret_found_uuid = p;
+ if (ret_volume_key)
+ *ret_volume_key = TAKE_PTR(vk);
+ if (ret_volume_key_size)
+ *ret_volume_key_size = vks;
+
+ return 0;
+}
+
+static int luks_open(
+ const char *dm_name,
+ char **passwords,
+ PasswordCache *cache,
+ struct crypt_device **ret,
+ sd_id128_t *ret_found_uuid,
+ void **ret_volume_key,
+ size_t *ret_volume_key_size) {
+
+ _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
+ _cleanup_(erase_and_freep) void *vk = NULL;
+ sd_id128_t p;
+ char **list;
+ size_t vks;
+ int r;
+
+ assert(dm_name);
+ assert(ret);
+
+ /* Opens a LUKS device that is already set up. Re-validates the password while doing so (which also
+ * provides us with the volume key, which we want). */
+
+ r = crypt_init_by_name(&cd, dm_name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize cryptsetup context for %s: %m", dm_name);
+
+ cryptsetup_enable_logging(cd);
+
+ r = crypt_load(cd, CRYPT_LUKS2, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to load LUKS superblock: %m");
+
+ r = crypt_get_volume_key_size(cd);
+ if (r <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine LUKS volume key size");
+ vks = (size_t) r;
+
+ if (ret_found_uuid) {
+ const char *s;
+
+ s = crypt_get_uuid(cd);
+ if (!s)
+ return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has no UUID.");
+
+ r = sd_id128_from_string(s, &p);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has invalid UUID.");
+ }
+
+ vk = malloc(vks);
+ if (!vk)
+ return log_oom();
+
+ r = -ENOKEY;
+ FOREACH_POINTER(list, cache->pkcs11_passwords, cache->fido2_passwords, passwords) {
+ r = luks_try_passwords(cd, list, vk, &vks);
+ if (r != -ENOKEY)
+ break;
+ }
+ if (r == -ENOKEY)
+ return log_error_errno(r, "No valid password for LUKS superblock.");
+ if (r < 0)
+ return log_error_errno(r, "Failed to unlocks LUKS superblock: %m");
+
+ log_info("Discovered used LUKS device /dev/mapper/%s, and validated password.", dm_name);
+
+ /* This is needed so that crypt_resize() can operate correctly for pre-existing LUKS devices. We need
+ * to tell libcryptsetup the volume key explicitly, so that it is in the kernel keyring. */
+ r = crypt_activate_by_volume_key(cd, NULL, vk, vks, CRYPT_ACTIVATE_KEYRING_KEY);
+ if (r < 0)
+ return log_error_errno(r, "Failed to upload volume key again: %m");
+
+ log_info("Successfully re-activated LUKS device.");
+
+ *ret = TAKE_PTR(cd);
+
+ if (ret_found_uuid)
+ *ret_found_uuid = p;
+ if (ret_volume_key)
+ *ret_volume_key = TAKE_PTR(vk);
+ if (ret_volume_key_size)
+ *ret_volume_key_size = vks;
+
+ return 0;
+}
+
+static int fs_validate(
+ const char *dm_node,
+ sd_id128_t uuid,
+ char **ret_fstype,
+ sd_id128_t *ret_found_uuid) {
+
+ _cleanup_free_ char *fstype = NULL;
+ sd_id128_t u;
+ int r;
+
+ assert(dm_node);
+ assert(ret_fstype);
+
+ r = probe_file_system_by_path(dm_node, &fstype, &u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to probe file system: %m");
+
+ /* Limit the set of supported file systems a bit, as protection against little tested kernel file
+ * systems. Also, we only support the resize ioctls for these file systems. */
+ if (!supported_fstype(fstype))
+ return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "Image contains unsupported file system: %s", strna(fstype));
+
+ if (!sd_id128_is_null(uuid) &&
+ !sd_id128_equal(uuid, u))
+ return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "File system has wrong UUID.");
+
+ log_info("Probing file system completed (found %s).", fstype);
+
+ *ret_fstype = TAKE_PTR(fstype);
+
+ if (ret_found_uuid) /* Return the UUID actually found if the caller wants to know */
+ *ret_found_uuid = u;
+
+ return 0;
+}
+
+static int make_dm_names(const char *user_name, char **ret_dm_name, char **ret_dm_node) {
+ _cleanup_free_ char *name = NULL, *node = NULL;
+
+ assert(user_name);
+ assert(ret_dm_name);
+ assert(ret_dm_node);
+
+ name = strjoin("home-", user_name);
+ if (!name)
+ return log_oom();
+
+ node = path_join("/dev/mapper/", name);
+ if (!node)
+ return log_oom();
+
+ *ret_dm_name = TAKE_PTR(name);
+ *ret_dm_node = TAKE_PTR(node);
+ return 0;
+}
+
+static int luks_validate(
+ int fd,
+ const char *label,
+ sd_id128_t partition_uuid,
+ sd_id128_t *ret_partition_uuid,
+ uint64_t *ret_offset,
+ uint64_t *ret_size) {
+
+ _cleanup_(blkid_free_probep) blkid_probe b = NULL;
+ sd_id128_t found_partition_uuid = SD_ID128_NULL;
+ const char *fstype = NULL, *pttype = NULL;
+ blkid_loff_t offset = 0, size = 0;
+ blkid_partlist pl;
+ bool found = false;
+ int r, i, n;
+
+ assert(fd >= 0);
+ assert(label);
+ assert(ret_offset);
+ assert(ret_size);
+
+ b = blkid_new_probe();
+ if (!b)
+ return -ENOMEM;
+
+ errno = 0;
+ r = blkid_probe_set_device(b, fd, 0, 0);
+ if (r != 0)
+ return errno > 0 ? -errno : -ENOMEM;
+
+ (void) blkid_probe_enable_superblocks(b, 1);
+ (void) blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
+ (void) blkid_probe_enable_partitions(b, 1);
+ (void) blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
+
+ errno = 0;
+ r = blkid_do_safeprobe(b);
+ if (IN_SET(r, -2, 1)) /* nothing found or ambiguous result */
+ return -ENOPKG;
+ if (r != 0)
+ return errno > 0 ? -errno : -EIO;
+
+ (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
+ if (streq_ptr(fstype, "crypto_LUKS")) {
+ /* Directly a LUKS image */
+ *ret_offset = 0;
+ *ret_size = UINT64_MAX; /* full disk */
+ *ret_partition_uuid = SD_ID128_NULL;
+ return 0;
+ } else if (fstype)
+ return -ENOPKG;
+
+ (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
+ if (!streq_ptr(pttype, "gpt"))
+ return -ENOPKG;
+
+ errno = 0;
+ pl = blkid_probe_get_partitions(b);
+ if (!pl)
+ return errno > 0 ? -errno : -ENOMEM;
+
+ errno = 0;
+ n = blkid_partlist_numof_partitions(pl);
+ if (n < 0)
+ return errno > 0 ? -errno : -EIO;
+
+ for (i = 0; i < n; i++) {
+ blkid_partition pp;
+ sd_id128_t id;
+ const char *sid;
+
+ errno = 0;
+ pp = blkid_partlist_get_partition(pl, i);
+ if (!pp)
+ return errno > 0 ? -errno : -EIO;
+
+ if (!streq_ptr(blkid_partition_get_type_string(pp), "773f91ef-66d4-49b5-bd83-d683bf40ad16"))
+ continue;
+
+ if (!streq_ptr(blkid_partition_get_name(pp), label))
+ continue;
+
+ sid = blkid_partition_get_uuid(pp);
+ if (sid) {
+ r = sd_id128_from_string(sid, &id);
+ if (r < 0)
+ log_debug_errno(r, "Couldn't parse partition UUID %s, weird: %m", sid);
+
+ if (!sd_id128_is_null(partition_uuid) && !sd_id128_equal(id, partition_uuid))
+ continue;
+ }
+
+ if (found)
+ return -ENOPKG;
+
+ offset = blkid_partition_get_start(pp);
+ size = blkid_partition_get_size(pp);
+ found_partition_uuid = id;
+
+ found = true;
+ }
+
+ if (!found)
+ return -ENOPKG;
+
+ if (offset < 0)
+ return -EINVAL;
+ if ((uint64_t) offset > UINT64_MAX / 512U)
+ return -EINVAL;
+ if (size <= 0)
+ return -EINVAL;
+ if ((uint64_t) size > UINT64_MAX / 512U)
+ return -EINVAL;
+
+ *ret_offset = offset * 512U;
+ *ret_size = size * 512U;
+ *ret_partition_uuid = found_partition_uuid;
+
+ return 0;
+}
+
+static int crypt_device_to_evp_cipher(struct crypt_device *cd, const EVP_CIPHER **ret) {
+ _cleanup_free_ char *cipher_name = NULL;
+ const char *cipher, *cipher_mode, *e;
+ size_t key_size, key_bits;
+ const EVP_CIPHER *cc;
+ int r;
+
+ assert(cd);
+
+ /* Let's find the right OpenSSL EVP_CIPHER object that matches the encryption settings of the LUKS
+ * device */
+
+ cipher = crypt_get_cipher(cd);
+ if (!cipher)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot get cipher from LUKS device.");
+
+ cipher_mode = crypt_get_cipher_mode(cd);
+ if (!cipher_mode)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot get cipher mode from LUKS device.");
+
+ e = strchr(cipher_mode, '-');
+ if (e)
+ cipher_mode = strndupa(cipher_mode, e - cipher_mode);
+
+ r = crypt_get_volume_key_size(cd);
+ if (r <= 0)
+ return log_error_errno(r < 0 ? r : SYNTHETIC_ERRNO(EINVAL), "Cannot get volume key size from LUKS device.");
+
+ key_size = r;
+ key_bits = key_size * 8;
+ if (streq(cipher_mode, "xts"))
+ key_bits /= 2;
+
+ if (asprintf(&cipher_name, "%s-%zu-%s", cipher, key_bits, cipher_mode) < 0)
+ return log_oom();
+
+ cc = EVP_get_cipherbyname(cipher_name);
+ if (!cc)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Selected cipher mode '%s' not supported, can't encrypt JSON record.", cipher_name);
+
+ /* Verify that our key length calculations match what OpenSSL thinks */
+ r = EVP_CIPHER_key_length(cc);
+ if (r < 0 || (uint64_t) r != key_size)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Key size of selected cipher doesn't meet our expectations.");
+
+ *ret = cc;
+ return 0;
+}
+
+static int luks_validate_home_record(
+ struct crypt_device *cd,
+ UserRecord *h,
+ const void *volume_key,
+ PasswordCache *cache,
+ UserRecord **ret_luks_home_record) {
+
+ int r, token;
+
+ assert(cd);
+ assert(h);
+
+ for (token = 0;; token++) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL, *rr = NULL;
+ _cleanup_(EVP_CIPHER_CTX_freep) EVP_CIPHER_CTX *context = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *lhr = NULL;
+ _cleanup_free_ void *encrypted = NULL, *iv = NULL;
+ size_t decrypted_size, encrypted_size, iv_size;
+ int decrypted_size_out1, decrypted_size_out2;
+ _cleanup_free_ char *decrypted = NULL;
+ const char *text, *type;
+ crypt_token_info state;
+ JsonVariant *jr, *jiv;
+ unsigned line, column;
+ const EVP_CIPHER *cc;
+
+ state = crypt_token_status(cd, token, &type);
+ if (state == CRYPT_TOKEN_INACTIVE) /* First unconfigured token, give up */
+ break;
+ if (IN_SET(state, CRYPT_TOKEN_INTERNAL, CRYPT_TOKEN_INTERNAL_UNKNOWN, CRYPT_TOKEN_EXTERNAL))
+ continue;
+ if (state != CRYPT_TOKEN_EXTERNAL_UNKNOWN)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unexpected token state of token %i: %i", token, (int) state);
+
+ if (!streq(type, "systemd-homed"))
+ continue;
+
+ r = crypt_token_json_get(cd, token, &text);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read LUKS token %i: %m", token);
+
+ r = json_parse(text, JSON_PARSE_SENSITIVE, &v, &line, &column);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse LUKS token JSON data %u:%u: %m", line, column);
+
+ jr = json_variant_by_key(v, "record");
+ if (!jr)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "LUKS token lacks 'record' field.");
+ jiv = json_variant_by_key(v, "iv");
+ if (!jiv)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "LUKS token lacks 'iv' field.");
+
+ r = json_variant_unbase64(jr, &encrypted, &encrypted_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to base64 decode record: %m");
+
+ r = json_variant_unbase64(jiv, &iv, &iv_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to base64 decode IV: %m");
+
+ r = crypt_device_to_evp_cipher(cd, &cc);
+ if (r < 0)
+ return r;
+ if (iv_size > INT_MAX || EVP_CIPHER_iv_length(cc) != (int) iv_size)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "IV size doesn't match.");
+
+ context = EVP_CIPHER_CTX_new();
+ if (!context)
+ return log_oom();
+
+ if (EVP_DecryptInit_ex(context, cc, NULL, volume_key, iv) != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to initialize decryption context.");
+
+ decrypted_size = encrypted_size + EVP_CIPHER_key_length(cc) * 2;
+ decrypted = new(char, decrypted_size);
+ if (!decrypted)
+ return log_oom();
+
+ if (EVP_DecryptUpdate(context, (uint8_t*) decrypted, &decrypted_size_out1, encrypted, encrypted_size) != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to decrypt JSON record.");
+
+ assert((size_t) decrypted_size_out1 <= decrypted_size);
+
+ if (EVP_DecryptFinal_ex(context, (uint8_t*) decrypted + decrypted_size_out1, &decrypted_size_out2) != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to finish decryption of JSON record.");
+
+ assert((size_t) decrypted_size_out1 + (size_t) decrypted_size_out2 < decrypted_size);
+ decrypted_size = (size_t) decrypted_size_out1 + (size_t) decrypted_size_out2;
+
+ if (memchr(decrypted, 0, decrypted_size))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Inner NUL byte in JSON record, refusing.");
+
+ decrypted[decrypted_size] = 0;
+
+ r = json_parse(decrypted, JSON_PARSE_SENSITIVE, &rr, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse decrypted JSON record, refusing.");
+
+ lhr = user_record_new();
+ if (!lhr)
+ return log_oom();
+
+ r = user_record_load(lhr, rr, USER_RECORD_LOAD_EMBEDDED);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse user record: %m");
+
+ if (!user_record_compatible(h, lhr))
+ return log_error_errno(SYNTHETIC_ERRNO(EREMCHG), "LUKS home record not compatible with host record, refusing.");
+
+ r = user_record_authenticate(lhr, h, cache, /* strict_verify= */ true);
+ if (r < 0)
+ return r;
+ assert(r > 0); /* Insist that a password was verified */
+
+ *ret_luks_home_record = TAKE_PTR(lhr);
+ return 0;
+ }
+
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Couldn't find home record in LUKS2 header, refusing.");
+}
+
+static int format_luks_token_text(
+ struct crypt_device *cd,
+ UserRecord *hr,
+ const void *volume_key,
+ char **ret) {
+
+ int r, encrypted_size_out1 = 0, encrypted_size_out2 = 0, iv_size, key_size;
+ _cleanup_(EVP_CIPHER_CTX_freep) EVP_CIPHER_CTX *context = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_free_ void *iv = NULL, *encrypted = NULL;
+ size_t text_length, encrypted_size;
+ _cleanup_free_ char *text = NULL;
+ const EVP_CIPHER *cc;
+
+ assert(cd);
+ assert(hr);
+ assert(volume_key);
+ assert(ret);
+
+ r = crypt_device_to_evp_cipher(cd, &cc);
+ if (r < 0)
+ return r;
+
+ key_size = EVP_CIPHER_key_length(cc);
+ iv_size = EVP_CIPHER_iv_length(cc);
+
+ if (iv_size > 0) {
+ iv = malloc(iv_size);
+ if (!iv)
+ return log_oom();
+
+ r = genuine_random_bytes(iv, iv_size, RANDOM_BLOCK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate IV: %m");
+ }
+
+ context = EVP_CIPHER_CTX_new();
+ if (!context)
+ return log_oom();
+
+ if (EVP_EncryptInit_ex(context, cc, NULL, volume_key, iv) != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to initialize encryption context.");
+
+ r = json_variant_format(hr->json, 0, &text);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format user record for LUKS: %m");
+
+ text_length = strlen(text);
+ encrypted_size = text_length + 2*key_size - 1;
+
+ encrypted = malloc(encrypted_size);
+ if (!encrypted)
+ return log_oom();
+
+ if (EVP_EncryptUpdate(context, encrypted, &encrypted_size_out1, (uint8_t*) text, text_length) != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to encrypt JSON record.");
+
+ assert((size_t) encrypted_size_out1 <= encrypted_size);
+
+ if (EVP_EncryptFinal_ex(context, (uint8_t*) encrypted + encrypted_size_out1, &encrypted_size_out2) != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to finish encryption of JSON record. ");
+
+ assert((size_t) encrypted_size_out1 + (size_t) encrypted_size_out2 <= encrypted_size);
+
+ r = json_build(&v,
+ JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("type", JSON_BUILD_STRING("systemd-homed")),
+ JSON_BUILD_PAIR("keyslots", JSON_BUILD_EMPTY_ARRAY),
+ JSON_BUILD_PAIR("record", JSON_BUILD_BASE64(encrypted, encrypted_size_out1 + encrypted_size_out2)),
+ JSON_BUILD_PAIR("iv", JSON_BUILD_BASE64(iv, iv_size))));
+ if (r < 0)
+ return log_error_errno(r, "Failed to prepare LUKS JSON token object: %m");
+
+ r = json_variant_format(v, 0, ret);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format encrypted user record for LUKS: %m");
+
+ return 0;
+}
+
+int home_store_header_identity_luks(
+ UserRecord *h,
+ HomeSetup *setup,
+ UserRecord *old_home) {
+
+ _cleanup_(user_record_unrefp) UserRecord *header_home = NULL;
+ _cleanup_free_ char *text = NULL;
+ int token = 0, r;
+
+ assert(h);
+
+ if (!setup->crypt_device)
+ return 0;
+
+ assert(setup->volume_key);
+
+ /* Let's store the user's identity record in the LUKS2 "token" header data fields, in an encrypted
+ * fashion. Why that? If we'd rely on the record being embedded in the payload file system itself we
+ * would have to mount the file system before we can validate the JSON record, its signatures and
+ * whether it matches what we are looking for. However, kernel file system implementations are
+ * generally not ready to be used on untrusted media. Hence let's store the record independently of
+ * the file system, so that we can validate it first, and only then mount the file system. To keep
+ * things simple we use the same encryption settings for this record as for the file system itself. */
+
+ r = user_record_clone(h, USER_RECORD_EXTRACT_EMBEDDED, &header_home);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine new header record: %m");
+
+ if (old_home && user_record_equal(old_home, header_home)) {
+ log_debug("Not updating header home record.");
+ return 0;
+ }
+
+ r = format_luks_token_text(setup->crypt_device, header_home, setup->volume_key, &text);
+ if (r < 0)
+ return r;
+
+ for (;; token++) {
+ crypt_token_info state;
+ const char *type;
+
+ state = crypt_token_status(setup->crypt_device, token, &type);
+ if (state == CRYPT_TOKEN_INACTIVE) /* First unconfigured token, we are done */
+ break;
+ if (IN_SET(state, CRYPT_TOKEN_INTERNAL, CRYPT_TOKEN_INTERNAL_UNKNOWN, CRYPT_TOKEN_EXTERNAL))
+ continue; /* Not ours */
+ if (state != CRYPT_TOKEN_EXTERNAL_UNKNOWN)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unexpected token state of token %i: %i", token, (int) state);
+
+ if (!streq(type, "systemd-homed"))
+ continue;
+
+ r = crypt_token_json_set(setup->crypt_device, token, text);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set JSON token for slot %i: %m", token);
+
+ /* Now, let's free the text so that for all further matching tokens we all crypt_json_token_set()
+ * with a NULL text in order to invalidate the tokens. */
+ text = mfree(text);
+ token++;
+ }
+
+ if (text)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Didn't find any record token to update.");
+
+ log_info("Wrote LUKS header user record.");
+
+ return 1;
+}
+
+int run_fitrim(int root_fd) {
+ char buf[FORMAT_BYTES_MAX];
+ struct fstrim_range range = {
+ .len = UINT64_MAX,
+ };
+
+ /* If discarding is on, discard everything right after mounting, so that the discard setting takes
+ * effect on activation. (Also, optionally, trim on logout) */
+
+ assert(root_fd >= 0);
+
+ if (ioctl(root_fd, FITRIM, &range) < 0) {
+ if (ERRNO_IS_NOT_SUPPORTED(errno) || errno == EBADF) {
+ log_debug_errno(errno, "File system does not support FITRIM, not trimming.");
+ return 0;
+ }
+
+ return log_warning_errno(errno, "Failed to invoke FITRIM, ignoring: %m");
+ }
+
+ log_info("Discarded unused %s.",
+ format_bytes(buf, sizeof(buf), range.len));
+ return 1;
+}
+
+int run_fitrim_by_path(const char *root_path) {
+ _cleanup_close_ int root_fd = -1;
+
+ root_fd = open(root_path, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+ if (root_fd < 0)
+ return log_error_errno(errno, "Failed to open file system '%s' for trimming: %m", root_path);
+
+ return run_fitrim(root_fd);
+}
+
+int run_fallocate(int backing_fd, const struct stat *st) {
+ char buf[FORMAT_BYTES_MAX];
+ struct stat stbuf;
+
+ assert(backing_fd >= 0);
+
+ /* If discarding is off, let's allocate the whole image before mounting, so that the setting takes
+ * effect on activation */
+
+ if (!st) {
+ if (fstat(backing_fd, &stbuf) < 0)
+ return log_error_errno(errno, "Failed to fstat(): %m");
+
+ st = &stbuf;
+ }
+
+ if (!S_ISREG(st->st_mode))
+ return 0;
+
+ if (st->st_blocks >= DIV_ROUND_UP(st->st_size, 512)) {
+ log_info("Backing file is fully allocated already.");
+ return 0;
+ }
+
+ if (fallocate(backing_fd, FALLOC_FL_KEEP_SIZE, 0, st->st_size) < 0) {
+
+ if (ERRNO_IS_NOT_SUPPORTED(errno)) {
+ log_debug_errno(errno, "fallocate() not supported on file system, ignoring.");
+ return 0;
+ }
+
+ if (ERRNO_IS_DISK_SPACE(errno)) {
+ log_debug_errno(errno, "Not enough disk space to fully allocate home.");
+ return -ENOSPC; /* make recognizable */
+ }
+
+ return log_error_errno(errno, "Failed to allocate backing file blocks: %m");
+ }
+
+ log_info("Allocated additional %s.",
+ format_bytes(buf, sizeof(buf), (DIV_ROUND_UP(st->st_size, 512) - st->st_blocks) * 512));
+ return 1;
+}
+
+int run_fallocate_by_path(const char *backing_path) {
+ _cleanup_close_ int backing_fd = -1;
+
+ backing_fd = open(backing_path, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (backing_fd < 0)
+ return log_error_errno(errno, "Failed to open '%s' for fallocate(): %m", backing_path);
+
+ return run_fallocate(backing_fd, NULL);
+}
+
+int home_prepare_luks(
+ UserRecord *h,
+ bool already_activated,
+ const char *force_image_path,
+ PasswordCache *cache,
+ HomeSetup *setup,
+ UserRecord **ret_luks_home) {
+
+ sd_id128_t found_partition_uuid, found_luks_uuid, found_fs_uuid;
+ _cleanup_(user_record_unrefp) UserRecord *luks_home = NULL;
+ _cleanup_(loop_device_unrefp) LoopDevice *loop = NULL;
+ _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
+ _cleanup_(erase_and_freep) void *volume_key = NULL;
+ _cleanup_close_ int root_fd = -1, image_fd = -1;
+ bool dm_activated = false, mounted = false;
+ size_t volume_key_size = 0;
+ bool marked_dirty = false;
+ uint64_t offset, size;
+ int r;
+
+ assert(h);
+ assert(setup);
+ assert(setup->dm_name);
+ assert(setup->dm_node);
+
+ assert(user_record_storage(h) == USER_LUKS);
+
+ if (already_activated) {
+ struct loop_info64 info;
+ const char *n;
+
+ r = luks_open(setup->dm_name,
+ h->password,
+ cache,
+ &cd,
+ &found_luks_uuid,
+ &volume_key,
+ &volume_key_size);
+ if (r < 0)
+ return r;
+
+ r = luks_validate_home_record(cd, h, volume_key, cache, &luks_home);
+ if (r < 0)
+ return r;
+
+ n = crypt_get_device_name(cd);
+ if (!n)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine backing device for DM %s.", setup->dm_name);
+
+ r = loop_device_open(n, O_RDWR, &loop);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open loopback device %s: %m", n);
+
+ if (ioctl(loop->fd, LOOP_GET_STATUS64, &info) < 0) {
+ _cleanup_free_ char *sysfs = NULL;
+ struct stat st;
+
+ if (!IN_SET(errno, ENOTTY, EINVAL))
+ return log_error_errno(errno, "Failed to get block device metrics of %s: %m", n);
+
+ if (ioctl(loop->fd, BLKGETSIZE64, &size) < 0)
+ return log_error_errno(r, "Failed to read block device size of %s: %m", n);
+
+ if (fstat(loop->fd, &st) < 0)
+ return log_error_errno(r, "Failed to stat block device %s: %m", n);
+ assert(S_ISBLK(st.st_mode));
+
+ if (asprintf(&sysfs, "/sys/dev/block/%u:%u/partition", major(st.st_rdev), minor(st.st_rdev)) < 0)
+ return log_oom();
+
+ if (access(sysfs, F_OK) < 0) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to determine whether %s exists: %m", sysfs);
+
+ offset = 0;
+ } else {
+ _cleanup_free_ char *buffer = NULL;
+
+ if (asprintf(&sysfs, "/sys/dev/block/%u:%u/start", major(st.st_rdev), minor(st.st_rdev)) < 0)
+ return log_oom();
+
+ r = read_one_line_file(sysfs, &buffer);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read partition start offset: %m");
+
+ r = safe_atou64(buffer, &offset);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse partition start offset: %m");
+
+ if (offset > UINT64_MAX / 512U)
+ return log_error_errno(SYNTHETIC_ERRNO(E2BIG), "Offset too large for 64 byte range, refusing.");
+
+ offset *= 512U;
+ }
+ } else {
+ offset = info.lo_offset;
+ size = info.lo_sizelimit;
+ }
+
+ found_partition_uuid = found_fs_uuid = SD_ID128_NULL;
+
+ log_info("Discovered used loopback device %s.", loop->node);
+
+ root_fd = open(user_record_home_directory(h), O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (root_fd < 0) {
+ r = log_error_errno(r, "Failed to open home directory: %m");
+ goto fail;
+ }
+ } else {
+ _cleanup_free_ char *fstype = NULL, *subdir = NULL;
+ const char *ip;
+ struct stat st;
+
+ ip = force_image_path ?: user_record_image_path(h);
+
+ subdir = path_join("/run/systemd/user-home-mount/", user_record_user_name_and_realm(h));
+ if (!subdir)
+ return log_oom();
+
+ image_fd = open(ip, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (image_fd < 0)
+ return log_error_errno(errno, "Failed to open image file %s: %m", ip);
+
+ if (fstat(image_fd, &st) < 0)
+ return log_error_errno(errno, "Failed to fstat() image file: %m");
+ if (!S_ISREG(st.st_mode) && !S_ISBLK(st.st_mode))
+ return log_error_errno(
+ S_ISDIR(st.st_mode) ? SYNTHETIC_ERRNO(EISDIR) : SYNTHETIC_ERRNO(EBADFD),
+ "Image file %s is not a regular file or block device: %m", ip);
+
+ r = luks_validate(image_fd, user_record_user_name_and_realm(h), h->partition_uuid, &found_partition_uuid, &offset, &size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to validate disk label: %m");
+
+ /* Everything before this point left the image untouched. We are now starting to make
+ * changes, hence mark the image dirty */
+ marked_dirty = run_mark_dirty(image_fd, true) > 0;
+
+ if (!user_record_luks_discard(h)) {
+ r = run_fallocate(image_fd, &st);
+ if (r < 0)
+ return r;
+ }
+
+ r = loop_device_make(image_fd, O_RDWR, offset, size, 0, &loop);
+ if (r == -ENOENT) {
+ log_error_errno(r, "Loopback block device support is not available on this system.");
+ return -ENOLINK; /* make recognizable */
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate loopback context: %m");
+
+ log_info("Setting up loopback device %s completed.", loop->node ?: ip);
+
+ r = luks_setup(loop->node ?: ip,
+ setup->dm_name,
+ h->luks_uuid,
+ h->luks_cipher,
+ h->luks_cipher_mode,
+ h->luks_volume_key_size,
+ h->password,
+ cache,
+ user_record_luks_discard(h) || user_record_luks_offline_discard(h),
+ &cd,
+ &found_luks_uuid,
+ &volume_key,
+ &volume_key_size);
+ if (r < 0)
+ return r;
+
+ dm_activated = true;
+
+ r = luks_validate_home_record(cd, h, volume_key, cache, &luks_home);
+ if (r < 0)
+ goto fail;
+
+ r = fs_validate(setup->dm_node, h->file_system_uuid, &fstype, &found_fs_uuid);
+ if (r < 0)
+ goto fail;
+
+ r = run_fsck(setup->dm_node, fstype);
+ if (r < 0)
+ goto fail;
+
+ r = home_unshare_and_mount(setup->dm_node, fstype, user_record_luks_discard(h), user_record_mount_flags(h));
+ if (r < 0)
+ goto fail;
+
+ mounted = true;
+
+ root_fd = open(subdir, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (root_fd < 0) {
+ r = log_error_errno(r, "Failed to open home directory: %m");
+ goto fail;
+ }
+
+ if (user_record_luks_discard(h))
+ (void) run_fitrim(root_fd);
+
+ setup->image_fd = TAKE_FD(image_fd);
+ setup->do_offline_fallocate = !(setup->do_offline_fitrim = user_record_luks_offline_discard(h));
+ setup->do_mark_clean = marked_dirty;
+ }
+
+ setup->loop = TAKE_PTR(loop);
+ setup->crypt_device = TAKE_PTR(cd);
+ setup->root_fd = TAKE_FD(root_fd);
+ setup->found_partition_uuid = found_partition_uuid;
+ setup->found_luks_uuid = found_luks_uuid;
+ setup->found_fs_uuid = found_fs_uuid;
+ setup->partition_offset = offset;
+ setup->partition_size = size;
+ setup->volume_key = TAKE_PTR(volume_key);
+ setup->volume_key_size = volume_key_size;
+
+ setup->undo_mount = mounted;
+ setup->undo_dm = dm_activated;
+
+ if (ret_luks_home)
+ *ret_luks_home = TAKE_PTR(luks_home);
+
+ return 0;
+
+fail:
+ if (mounted)
+ (void) umount_verbose(LOG_ERR, "/run/systemd/user-home-mount", UMOUNT_NOFOLLOW);
+
+ if (dm_activated)
+ (void) crypt_deactivate(cd, setup->dm_name);
+
+ if (image_fd >= 0 && marked_dirty)
+ (void) run_mark_dirty(image_fd, false);
+
+ return r;
+}
+
+static void print_size_summary(uint64_t host_size, uint64_t encrypted_size, struct statfs *sfs) {
+ char buffer1[FORMAT_BYTES_MAX], buffer2[FORMAT_BYTES_MAX], buffer3[FORMAT_BYTES_MAX], buffer4[FORMAT_BYTES_MAX];
+
+ assert(sfs);
+
+ log_info("Image size is %s, file system size is %s, file system payload size is %s, file system free is %s.",
+ format_bytes(buffer1, sizeof(buffer1), host_size),
+ format_bytes(buffer2, sizeof(buffer2), encrypted_size),
+ format_bytes(buffer3, sizeof(buffer3), (uint64_t) sfs->f_blocks * (uint64_t) sfs->f_frsize),
+ format_bytes(buffer4, sizeof(buffer4), (uint64_t) sfs->f_bfree * (uint64_t) sfs->f_frsize));
+}
+
+int home_activate_luks(
+ UserRecord *h,
+ PasswordCache *cache,
+ UserRecord **ret_home) {
+
+ _cleanup_(user_record_unrefp) UserRecord *new_home = NULL, *luks_home_record = NULL;
+ _cleanup_(home_setup_undo) HomeSetup setup = HOME_SETUP_INIT;
+ uint64_t host_size, encrypted_size;
+ const char *hdo, *hd;
+ struct statfs sfs;
+ int r;
+
+ assert(h);
+ assert(user_record_storage(h) == USER_LUKS);
+ assert(ret_home);
+
+ assert_se(hdo = user_record_home_directory(h));
+ hd = strdupa(hdo); /* copy the string out, since it might change later in the home record object */
+
+ r = make_dm_names(h->user_name, &setup.dm_name, &setup.dm_node);
+ if (r < 0)
+ return r;
+
+ r = access(setup.dm_node, F_OK);
+ if (r < 0) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to determine whether %s exists: %m", setup.dm_node);
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Device mapper device %s already exists, refusing.", setup.dm_node);
+
+ r = home_prepare_luks(
+ h,
+ false,
+ NULL,
+ cache,
+ &setup,
+ &luks_home_record);
+ if (r < 0)
+ return r;
+
+ r = block_get_size_by_fd(setup.loop->fd, &host_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get loopback block device size: %m");
+
+ r = block_get_size_by_path(setup.dm_node, &encrypted_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get LUKS block device size: %m");
+
+ r = home_refresh(
+ h,
+ &setup,
+ luks_home_record,
+ cache,
+ &sfs,
+ &new_home);
+ if (r < 0)
+ return r;
+
+ r = home_extend_embedded_identity(new_home, h, &setup);
+ if (r < 0)
+ return r;
+
+ setup.root_fd = safe_close(setup.root_fd);
+
+ r = home_move_mount(user_record_user_name_and_realm(h), hd);
+ if (r < 0)
+ return r;
+
+ setup.undo_mount = false;
+ setup.do_offline_fitrim = false;
+
+ loop_device_relinquish(setup.loop);
+
+ r = crypt_deactivate_by_name(NULL, setup.dm_name, CRYPT_DEACTIVATE_DEFERRED);
+ if (r < 0)
+ log_warning_errno(r, "Failed to relinquish DM device, ignoring: %m");
+
+ setup.undo_dm = false;
+ setup.do_offline_fallocate = false;
+ setup.do_mark_clean = false;
+
+ log_info("Everything completed.");
+
+ print_size_summary(host_size, encrypted_size, &sfs);
+
+ *ret_home = TAKE_PTR(new_home);
+ return 1;
+}
+
+int home_deactivate_luks(UserRecord *h) {
+ _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
+ _cleanup_free_ char *dm_name = NULL, *dm_node = NULL;
+ bool we_detached;
+ int r;
+
+ /* Note that the DM device and loopback device are set to auto-detach, hence strictly speaking we
+ * don't have to explicitly have to detach them. However, we do that nonetheless (in case of the DM
+ * device), to avoid races: by explicitly detaching them we know when the detaching is complete. We
+ * don't bother about the loopback device because unlike the DM device it doesn't have a fixed
+ * name. */
+
+ r = make_dm_names(h->user_name, &dm_name, &dm_node);
+ if (r < 0)
+ return r;
+
+ r = crypt_init_by_name(&cd, dm_name);
+ if (IN_SET(r, -ENODEV, -EINVAL, -ENOENT)) {
+ log_debug_errno(r, "LUKS device %s has already been detached.", dm_name);
+ we_detached = false;
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to initialize cryptsetup context for %s: %m", dm_name);
+ else {
+ log_info("Discovered used LUKS device %s.", dm_node);
+
+ cryptsetup_enable_logging(cd);
+
+ r = crypt_deactivate(cd, dm_name);
+ if (IN_SET(r, -ENODEV, -EINVAL, -ENOENT)) {
+ log_debug_errno(r, "LUKS device %s is already detached.", dm_node);
+ we_detached = false;
+ } else if (r < 0)
+ return log_info_errno(r, "LUKS device %s couldn't be deactivated: %m", dm_node);
+ else {
+ log_info("LUKS device detaching completed.");
+ we_detached = true;
+ }
+ }
+
+ if (user_record_luks_offline_discard(h))
+ log_debug("Not allocating on logout.");
+ else
+ (void) run_fallocate_by_path(user_record_image_path(h));
+
+ run_mark_dirty_by_path(user_record_image_path(h), false);
+ return we_detached;
+}
+
+int home_trim_luks(UserRecord *h) {
+ assert(h);
+
+ if (!user_record_luks_offline_discard(h)) {
+ log_debug("Not trimming on logout.");
+ return 0;
+ }
+
+ (void) run_fitrim_by_path(user_record_home_directory(h));
+ return 0;
+}
+
+static struct crypt_pbkdf_type* build_good_pbkdf(struct crypt_pbkdf_type *buffer, UserRecord *hr) {
+ assert(buffer);
+ assert(hr);
+
+ *buffer = (struct crypt_pbkdf_type) {
+ .hash = user_record_luks_pbkdf_hash_algorithm(hr),
+ .type = user_record_luks_pbkdf_type(hr),
+ .time_ms = user_record_luks_pbkdf_time_cost_usec(hr) / USEC_PER_MSEC,
+ .max_memory_kb = user_record_luks_pbkdf_memory_cost(hr) / 1024,
+ .parallel_threads = user_record_luks_pbkdf_parallel_threads(hr),
+ };
+
+ return buffer;
+}
+
+static struct crypt_pbkdf_type* build_minimal_pbkdf(struct crypt_pbkdf_type *buffer, UserRecord *hr) {
+ assert(buffer);
+ assert(hr);
+
+ /* For PKCS#11 derived keys (which are generated randomly and are of high quality already) we use a
+ * minimal PBKDF */
+ *buffer = (struct crypt_pbkdf_type) {
+ .hash = user_record_luks_pbkdf_hash_algorithm(hr),
+ .type = CRYPT_KDF_PBKDF2,
+ .iterations = 1,
+ .time_ms = 1,
+ };
+
+ return buffer;
+}
+
+static int luks_format(
+ const char *node,
+ const char *dm_name,
+ sd_id128_t uuid,
+ const char *label,
+ const PasswordCache *cache,
+ char **effective_passwords,
+ bool discard,
+ UserRecord *hr,
+ struct crypt_device **ret) {
+
+ _cleanup_(user_record_unrefp) UserRecord *reduced = NULL;
+ _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
+ _cleanup_(erase_and_freep) void *volume_key = NULL;
+ struct crypt_pbkdf_type good_pbkdf, minimal_pbkdf;
+ char suuid[ID128_UUID_STRING_MAX], **pp;
+ _cleanup_free_ char *text = NULL;
+ size_t volume_key_size;
+ int slot = 0, r;
+
+ assert(node);
+ assert(dm_name);
+ assert(hr);
+ assert(ret);
+
+ r = crypt_init(&cd, node);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate libcryptsetup context: %m");
+
+ cryptsetup_enable_logging(cd);
+
+ /* Normally we'd, just leave volume key generation to libcryptsetup. However, we can't, since we
+ * can't extract the volume key from the library again, but we need it in order to encrypt the JSON
+ * record. Hence, let's generate it on our own, so that we can keep track of it. */
+
+ volume_key_size = user_record_luks_volume_key_size(hr);
+ volume_key = malloc(volume_key_size);
+ if (!volume_key)
+ return log_oom();
+
+ r = genuine_random_bytes(volume_key, volume_key_size, RANDOM_BLOCK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate volume key: %m");
+
+#if HAVE_CRYPT_SET_METADATA_SIZE
+ /* Increase the metadata space to 4M, the largest LUKS2 supports */
+ r = crypt_set_metadata_size(cd, 4096U*1024U, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to change LUKS2 metadata size: %m");
+#endif
+
+ build_good_pbkdf(&good_pbkdf, hr);
+ build_minimal_pbkdf(&minimal_pbkdf, hr);
+
+ r = crypt_format(cd,
+ CRYPT_LUKS2,
+ user_record_luks_cipher(hr),
+ user_record_luks_cipher_mode(hr),
+ id128_to_uuid_string(uuid, suuid),
+ volume_key,
+ volume_key_size,
+ &(struct crypt_params_luks2) {
+ .label = label,
+ .subsystem = "systemd-home",
+ .sector_size = 512U,
+ .pbkdf = &good_pbkdf,
+ });
+ if (r < 0)
+ return log_error_errno(r, "Failed to format LUKS image: %m");
+
+ log_info("LUKS formatting completed.");
+
+ STRV_FOREACH(pp, effective_passwords) {
+
+ if (strv_contains(cache->pkcs11_passwords, *pp) ||
+ strv_contains(cache->fido2_passwords, *pp)) {
+ log_debug("Using minimal PBKDF for slot %i", slot);
+ r = crypt_set_pbkdf_type(cd, &minimal_pbkdf);
+ } else {
+ log_debug("Using good PBKDF for slot %i", slot);
+ r = crypt_set_pbkdf_type(cd, &good_pbkdf);
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to tweak PBKDF for slot %i: %m", slot);
+
+ r = crypt_keyslot_add_by_volume_key(
+ cd,
+ slot,
+ volume_key,
+ volume_key_size,
+ *pp,
+ strlen(*pp));
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up LUKS password for slot %i: %m", slot);
+
+ log_info("Writing password to LUKS keyslot %i completed.", slot);
+ slot++;
+ }
+
+ r = crypt_activate_by_volume_key(
+ cd,
+ dm_name,
+ volume_key,
+ volume_key_size,
+ discard ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to activate LUKS superblock: %m");
+
+ log_info("LUKS activation by volume key succeeded.");
+
+ r = user_record_clone(hr, USER_RECORD_EXTRACT_EMBEDDED, &reduced);
+ if (r < 0)
+ return log_error_errno(r, "Failed to prepare home record for LUKS: %m");
+
+ r = format_luks_token_text(cd, reduced, volume_key, &text);
+ if (r < 0)
+ return r;
+
+ r = crypt_token_json_set(cd, CRYPT_ANY_TOKEN, text);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set LUKS JSON token: %m");
+
+ log_info("Writing user record as LUKS token completed.");
+
+ if (ret)
+ *ret = TAKE_PTR(cd);
+
+ return 0;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct fdisk_context*, fdisk_unref_context);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct fdisk_partition*, fdisk_unref_partition);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct fdisk_parttype*, fdisk_unref_parttype);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct fdisk_table*, fdisk_unref_table);
+
+static int make_partition_table(
+ int fd,
+ const char *label,
+ sd_id128_t uuid,
+ uint64_t *ret_offset,
+ uint64_t *ret_size,
+ sd_id128_t *ret_disk_uuid) {
+
+ _cleanup_(fdisk_unref_partitionp) struct fdisk_partition *p = NULL, *q = NULL;
+ _cleanup_(fdisk_unref_parttypep) struct fdisk_parttype *t = NULL;
+ _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
+ _cleanup_free_ char *path = NULL, *disk_uuid_as_string = NULL;
+ uint64_t offset, size;
+ sd_id128_t disk_uuid;
+ char uuids[ID128_UUID_STRING_MAX];
+ int r;
+
+ assert(fd >= 0);
+ assert(label);
+ assert(ret_offset);
+ assert(ret_size);
+
+ t = fdisk_new_parttype();
+ if (!t)
+ return log_oom();
+
+ r = fdisk_parttype_set_typestr(t, "773f91ef-66d4-49b5-bd83-d683bf40ad16");
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize partition type: %m");
+
+ c = fdisk_new_context();
+ if (!c)
+ return log_oom();
+
+ if (asprintf(&path, "/proc/self/fd/%i", fd) < 0)
+ return log_oom();
+
+ r = fdisk_assign_device(c, path, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open device: %m");
+
+ r = fdisk_create_disklabel(c, "gpt");
+ if (r < 0)
+ return log_error_errno(r, "Failed to create GPT disk label: %m");
+
+ p = fdisk_new_partition();
+ if (!p)
+ return log_oom();
+
+ r = fdisk_partition_set_type(p, t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set partition type: %m");
+
+ r = fdisk_partition_start_follow_default(p, 1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to place partition at beginning of space: %m");
+
+ r = fdisk_partition_partno_follow_default(p, 1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to place partition at first free partition index: %m");
+
+ r = fdisk_partition_end_follow_default(p, 1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make partition cover all free space: %m");
+
+ r = fdisk_partition_set_name(p, label);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set partition name: %m");
+
+ r = fdisk_partition_set_uuid(p, id128_to_uuid_string(uuid, uuids));
+ if (r < 0)
+ return log_error_errno(r, "Failed to set partition UUID: %m");
+
+ r = fdisk_add_partition(c, p, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add partition: %m");
+
+ r = fdisk_write_disklabel(c);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write disk label: %m");
+
+ r = fdisk_get_disklabel_id(c, &disk_uuid_as_string);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine disk label UUID: %m");
+
+ r = sd_id128_from_string(disk_uuid_as_string, &disk_uuid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse disk label UUID: %m");
+
+ r = fdisk_get_partition(c, 0, &q);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read created partition metadata: %m");
+
+ assert(fdisk_partition_has_start(q));
+ offset = fdisk_partition_get_start(q);
+ if (offset > UINT64_MAX / 512U)
+ return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Partition offset too large.");
+
+ assert(fdisk_partition_has_size(q));
+ size = fdisk_partition_get_size(q);
+ if (size > UINT64_MAX / 512U)
+ return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Partition size too large.");
+
+ *ret_offset = offset * 512U;
+ *ret_size = size * 512U;
+ *ret_disk_uuid = disk_uuid;
+
+ return 0;
+}
+
+static bool supported_fs_size(const char *fstype, uint64_t host_size) {
+ uint64_t m;
+
+ m = minimal_size_by_fs_name(fstype);
+ if (m == UINT64_MAX)
+ return false;
+
+ return host_size >= m;
+}
+
+static int wait_for_devlink(const char *path) {
+ _cleanup_close_ int inotify_fd = -1;
+ usec_t until;
+ int r;
+
+ /* let's wait for a device link to show up in /dev, with a timeout. This is good to do since we
+ * return a /dev/disk/by-uuid/… link to our callers and they likely want to access it right-away,
+ * hence let's wait until udev has caught up with our changes, and wait for the symlink to be
+ * created. */
+
+ until = usec_add(now(CLOCK_MONOTONIC), 45 * USEC_PER_SEC);
+
+ for (;;) {
+ _cleanup_free_ char *dn = NULL;
+ usec_t w;
+
+ if (laccess(path, F_OK) < 0) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to determine whether %s exists: %m", path);
+ } else
+ return 0; /* Found it */
+
+ if (inotify_fd < 0) {
+ /* We need to wait for the device symlink to show up, let's create an inotify watch for it */
+ inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (inotify_fd < 0)
+ return log_error_errno(errno, "Failed to allocate inotify fd: %m");
+ }
+
+ dn = dirname_malloc(path);
+ for (;;) {
+ if (!dn)
+ return log_oom();
+
+ log_info("Watching %s", dn);
+
+ if (inotify_add_watch(inotify_fd, dn, IN_CREATE|IN_MOVED_TO|IN_ONLYDIR|IN_DELETE_SELF|IN_MOVE_SELF) < 0) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to add watch on %s: %m", dn);
+ } else
+ break;
+
+ if (empty_or_root(dn))
+ break;
+
+ dn = dirname_malloc(dn);
+ }
+
+ w = now(CLOCK_MONOTONIC);
+ if (w >= until)
+ return log_error_errno(SYNTHETIC_ERRNO(ETIMEDOUT), "Device link %s still hasn't shown up, giving up.", path);
+
+ r = fd_wait_for_event(inotify_fd, POLLIN, usec_sub_unsigned(until, w));
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch inotify: %m");
+
+ (void) flush_fd(inotify_fd);
+ }
+}
+
+static int calculate_disk_size(UserRecord *h, const char *parent_dir, uint64_t *ret) {
+ char buf[FORMAT_BYTES_MAX];
+ struct statfs sfs;
+ uint64_t m;
+
+ assert(h);
+ assert(parent_dir);
+ assert(ret);
+
+ if (h->disk_size != UINT64_MAX) {
+ *ret = DISK_SIZE_ROUND_DOWN(h->disk_size);
+ return 0;
+ }
+
+ if (statfs(parent_dir, &sfs) < 0)
+ return log_error_errno(errno, "statfs() on %s failed: %m", parent_dir);
+
+ m = sfs.f_bsize * sfs.f_bavail;
+
+ if (h->disk_size_relative == UINT64_MAX) {
+
+ if (m > UINT64_MAX / USER_DISK_SIZE_DEFAULT_PERCENT)
+ return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Disk size too large.");
+
+ *ret = DISK_SIZE_ROUND_DOWN(m * USER_DISK_SIZE_DEFAULT_PERCENT / 100);
+
+ log_info("Sizing home to %u%% of available disk space, which is %s.",
+ USER_DISK_SIZE_DEFAULT_PERCENT,
+ format_bytes(buf, sizeof(buf), *ret));
+ } else {
+ *ret = DISK_SIZE_ROUND_DOWN((uint64_t) ((double) m * (double) h->disk_size_relative / (double) UINT32_MAX));
+
+ log_info("Sizing home to %" PRIu64 ".%01" PRIu64 "%% of available disk space, which is %s.",
+ (h->disk_size_relative * 100) / UINT32_MAX,
+ ((h->disk_size_relative * 1000) / UINT32_MAX) % 10,
+ format_bytes(buf, sizeof(buf), *ret));
+ }
+
+ if (*ret < USER_DISK_SIZE_MIN)
+ *ret = USER_DISK_SIZE_MIN;
+
+ return 0;
+}
+
+static int home_truncate(
+ UserRecord *h,
+ int fd,
+ const char *path,
+ uint64_t size) {
+
+ bool trunc;
+ int r;
+
+ assert(h);
+ assert(fd >= 0);
+ assert(path);
+
+ trunc = user_record_luks_discard(h);
+ if (!trunc) {
+ r = fallocate(fd, 0, 0, size);
+ if (r < 0 && ERRNO_IS_NOT_SUPPORTED(errno)) {
+ /* Some file systems do not support fallocate(), let's gracefully degrade
+ * (ZFS, reiserfs, …) and fall back to truncation */
+ log_notice_errno(errno, "Backing file system does not support fallocate(), falling back to ftruncate(), i.e. implicitly using non-discard mode.");
+ trunc = true;
+ }
+ }
+
+ if (trunc)
+ r = ftruncate(fd, size);
+
+ if (r < 0) {
+ if (ERRNO_IS_DISK_SPACE(errno)) {
+ log_error_errno(errno, "Not enough disk space to allocate home.");
+ return -ENOSPC; /* make recognizable */
+ }
+
+ return log_error_errno(errno, "Failed to truncate home image %s: %m", path);
+ }
+
+ return 0;
+}
+
+int home_create_luks(
+ UserRecord *h,
+ PasswordCache *cache,
+ char **effective_passwords,
+ UserRecord **ret_home) {
+
+ _cleanup_free_ char *dm_name = NULL, *dm_node = NULL, *subdir = NULL, *disk_uuid_path = NULL, *temporary_image_path = NULL;
+ uint64_t host_size, encrypted_size, partition_offset, partition_size;
+ bool image_created = false, dm_activated = false, mounted = false;
+ _cleanup_(user_record_unrefp) UserRecord *new_home = NULL;
+ sd_id128_t partition_uuid, fs_uuid, luks_uuid, disk_uuid;
+ _cleanup_(loop_device_unrefp) LoopDevice *loop = NULL;
+ _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
+ _cleanup_close_ int image_fd = -1, root_fd = -1;
+ const char *fstype, *ip;
+ struct statfs sfs;
+ int r;
+
+ assert(h);
+ assert(h->storage < 0 || h->storage == USER_LUKS);
+ assert(ret_home);
+
+ assert_se(ip = user_record_image_path(h));
+
+ fstype = user_record_file_system_type(h);
+ if (!supported_fstype(fstype))
+ return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "Unsupported file system type: %s", fstype);
+
+ r = mkfs_exists(fstype);
+ if (r < 0)
+ return log_error_errno(r, "Failed to check if mkfs binary for %s exists: %m", fstype);
+ if (r == 0) {
+ if (h->file_system_type || streq(fstype, "ext4") || !supported_fstype("ext4"))
+ return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "mkfs binary for file system type %s does not exist.", fstype);
+
+ /* If the record does not explicitly declare a file system to use, and the compiled-in
+ * default does not actually exist, than do an automatic fallback onto ext4, as the baseline
+ * fs of Linux. We won't search for a working fs type here beyond ext4, i.e. nothing fancier
+ * than a single, conservative fallback to baseline. This should be useful in minimal
+ * environments where mkfs.btrfs or so are not made available, but mkfs.ext4 as Linux' most
+ * boring, most basic fs is. */
+ log_info("Formatting tool for compiled-in default file system %s not available, falling back to ext4 instead.", fstype);
+ fstype = "ext4";
+ }
+
+ if (sd_id128_is_null(h->partition_uuid)) {
+ r = sd_id128_randomize(&partition_uuid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire partition UUID: %m");
+ } else
+ partition_uuid = h->partition_uuid;
+
+ if (sd_id128_is_null(h->luks_uuid)) {
+ r = sd_id128_randomize(&luks_uuid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire LUKS UUID: %m");
+ } else
+ luks_uuid = h->luks_uuid;
+
+ if (sd_id128_is_null(h->file_system_uuid)) {
+ r = sd_id128_randomize(&fs_uuid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire file system UUID: %m");
+ } else
+ fs_uuid = h->file_system_uuid;
+
+ r = make_dm_names(h->user_name, &dm_name, &dm_node);
+ if (r < 0)
+ return r;
+
+ r = access(dm_node, F_OK);
+ if (r < 0) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to determine whether %s exists: %m", dm_node);
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Device mapper device %s already exists, refusing.", dm_node);
+
+ if (path_startswith(ip, "/dev/")) {
+ _cleanup_free_ char *sysfs = NULL;
+ uint64_t block_device_size;
+ struct stat st;
+
+ /* Let's place the home directory on a real device, i.e. an USB stick or such */
+
+ image_fd = open(ip, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (image_fd < 0)
+ return log_error_errno(errno, "Failed to open device %s: %m", ip);
+
+ if (fstat(image_fd, &st) < 0)
+ return log_error_errno(errno, "Failed to stat device %s: %m", ip);
+ if (!S_ISBLK(st.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), "Device is not a block device, refusing.");
+
+ if (asprintf(&sysfs, "/sys/dev/block/%u:%u/partition", major(st.st_rdev), minor(st.st_rdev)) < 0)
+ return log_oom();
+ if (access(sysfs, F_OK) < 0) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to check whether %s exists: %m", sysfs);
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), "Operating on partitions is currently not supported, sorry. Please specify a top-level block device.");
+
+ if (flock(image_fd, LOCK_EX) < 0) /* make sure udev doesn't read from it while we operate on the device */
+ return log_error_errno(errno, "Failed to lock block device %s: %m", ip);
+
+ if (ioctl(image_fd, BLKGETSIZE64, &block_device_size) < 0)
+ return log_error_errno(errno, "Failed to read block device size: %m");
+
+ if (h->disk_size == UINT64_MAX) {
+
+ /* If a relative disk size is requested, apply it relative to the block device size */
+ if (h->disk_size_relative < UINT32_MAX)
+ host_size = CLAMP(DISK_SIZE_ROUND_DOWN(block_device_size * h->disk_size_relative / UINT32_MAX),
+ USER_DISK_SIZE_MIN, USER_DISK_SIZE_MAX);
+ else
+ host_size = block_device_size; /* Otherwise, take the full device */
+
+ } else if (h->disk_size > block_device_size)
+ return log_error_errno(SYNTHETIC_ERRNO(EMSGSIZE), "Selected disk size larger than backing block device, refusing.");
+ else
+ host_size = DISK_SIZE_ROUND_DOWN(h->disk_size);
+
+ if (!supported_fs_size(fstype, host_size))
+ return log_error_errno(SYNTHETIC_ERRNO(ERANGE),
+ "Selected file system size too small for %s.", fstype);
+
+ /* After creation we should reference this partition by its UUID instead of the block
+ * device. That's preferable since the user might have specified a device node such as
+ * /dev/sdb to us, which might look very different when replugged. */
+ if (asprintf(&disk_uuid_path, "/dev/disk/by-uuid/" SD_ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(luks_uuid)) < 0)
+ return log_oom();
+
+ if (user_record_luks_discard(h) || user_record_luks_offline_discard(h)) {
+ /* If we want online or offline discard, discard once before we start using things. */
+
+ if (ioctl(image_fd, BLKDISCARD, (uint64_t[]) { 0, block_device_size }) < 0)
+ log_full_errno(errno == EOPNOTSUPP ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to issue full-device BLKDISCARD on device, ignoring: %m");
+ else
+ log_info("Full device discard completed.");
+ }
+ } else {
+ _cleanup_free_ char *parent = NULL;
+
+ parent = dirname_malloc(ip);
+ if (!parent)
+ return log_oom();
+
+ r = mkdir_p(parent, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create parent directory %s: %m", parent);
+
+ r = calculate_disk_size(h, parent, &host_size);
+ if (r < 0)
+ return r;
+
+ if (!supported_fs_size(fstype, host_size))
+ return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Selected file system size too small for %s.", fstype);
+
+ r = tempfn_random(ip, "homework", &temporary_image_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to derive temporary file name for %s: %m", ip);
+
+ image_fd = open(temporary_image_path, O_RDWR|O_CREAT|O_EXCL|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0600);
+ if (image_fd < 0)
+ return log_error_errno(errno, "Failed to create home image %s: %m", temporary_image_path);
+
+ image_created = true;
+
+ r = chattr_fd(image_fd, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+ if (r < 0)
+ log_full_errno(ERRNO_IS_NOT_SUPPORTED(r) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set file attributes on %s, ignoring: %m", temporary_image_path);
+
+ r = home_truncate(h, image_fd, temporary_image_path, host_size);
+ if (r < 0)
+ goto fail;
+
+ log_info("Allocating image file completed.");
+ }
+
+ r = make_partition_table(
+ image_fd,
+ user_record_user_name_and_realm(h),
+ partition_uuid,
+ &partition_offset,
+ &partition_size,
+ &disk_uuid);
+ if (r < 0)
+ goto fail;
+
+ log_info("Writing of partition table completed.");
+
+ r = loop_device_make(image_fd, O_RDWR, partition_offset, partition_size, 0, &loop);
+ if (r < 0) {
+ if (r == -ENOENT) { /* this means /dev/loop-control doesn't exist, i.e. we are in a container
+ * or similar and loopback bock devices are not available, return a
+ * recognizable error in this case. */
+ log_error_errno(r, "Loopback block device support is not available on this system.");
+ r = -ENOLINK;
+ goto fail;
+ }
+
+ log_error_errno(r, "Failed to set up loopback device for %s: %m", temporary_image_path);
+ goto fail;
+ }
+
+ r = loop_device_flock(loop, LOCK_EX); /* make sure udev won't read before we are done */
+ if (r < 0) {
+ log_error_errno(r, "Failed to take lock on loop device: %m");
+ goto fail;
+ }
+
+ log_info("Setting up loopback device %s completed.", loop->node ?: ip);
+
+ r = luks_format(loop->node,
+ dm_name,
+ luks_uuid,
+ user_record_user_name_and_realm(h),
+ cache,
+ effective_passwords,
+ user_record_luks_discard(h) || user_record_luks_offline_discard(h),
+ h,
+ &cd);
+ if (r < 0)
+ goto fail;
+
+ dm_activated = true;
+
+ r = block_get_size_by_path(dm_node, &encrypted_size);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get encrypted block device size: %m");
+ goto fail;
+ }
+
+ log_info("Setting up LUKS device %s completed.", dm_node);
+
+ r = make_filesystem(dm_node, fstype, user_record_user_name_and_realm(h), fs_uuid, user_record_luks_discard(h));
+ if (r < 0)
+ goto fail;
+
+ log_info("Formatting file system completed.");
+
+ r = home_unshare_and_mount(dm_node, fstype, user_record_luks_discard(h), user_record_mount_flags(h));
+ if (r < 0)
+ goto fail;
+
+ mounted = true;
+
+ subdir = path_join("/run/systemd/user-home-mount/", user_record_user_name_and_realm(h));
+ if (!subdir) {
+ r = log_oom();
+ goto fail;
+ }
+
+ /* Prefer using a btrfs subvolume if we can, fall back to directory otherwise */
+ r = btrfs_subvol_make_fallback(subdir, 0700);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create user directory in mounted image file: %m");
+ goto fail;
+ }
+
+ root_fd = open(subdir, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (root_fd < 0) {
+ r = log_error_errno(errno, "Failed to open user directory in mounted image file: %m");
+ goto fail;
+ }
+
+ r = home_populate(h, root_fd);
+ if (r < 0)
+ goto fail;
+
+ r = home_sync_and_statfs(root_fd, &sfs);
+ if (r < 0)
+ goto fail;
+
+ r = user_record_clone(h, USER_RECORD_LOAD_MASK_SECRET|USER_RECORD_LOG, &new_home);
+ if (r < 0) {
+ log_error_errno(r, "Failed to clone record: %m");
+ goto fail;
+ }
+
+ r = user_record_add_binding(
+ new_home,
+ USER_LUKS,
+ disk_uuid_path ?: ip,
+ partition_uuid,
+ luks_uuid,
+ fs_uuid,
+ crypt_get_cipher(cd),
+ crypt_get_cipher_mode(cd),
+ luks_volume_key_size_convert(cd),
+ fstype,
+ NULL,
+ h->uid,
+ (gid_t) h->uid);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add binding to record: %m");
+ goto fail;
+ }
+
+ if (user_record_luks_offline_discard(h)) {
+ r = run_fitrim(root_fd);
+ if (r < 0)
+ goto fail;
+ }
+
+ root_fd = safe_close(root_fd);
+
+ r = umount_verbose(LOG_ERR, "/run/systemd/user-home-mount", UMOUNT_NOFOLLOW);
+ if (r < 0)
+ goto fail;
+
+ mounted = false;
+
+ r = crypt_deactivate(cd, dm_name);
+ if (r < 0) {
+ log_error_errno(r, "Failed to deactivate LUKS device: %m");
+ goto fail;
+ }
+
+ crypt_free(cd);
+ cd = NULL;
+
+ dm_activated = false;
+
+ loop = loop_device_unref(loop);
+
+ if (!user_record_luks_offline_discard(h)) {
+ r = run_fallocate(image_fd, NULL /* refresh stat() data */);
+ if (r < 0)
+ goto fail;
+ }
+
+ /* Sync everything to disk before we move things into place under the final name. */
+ if (fsync(image_fd) < 0) {
+ r = log_error_errno(r, "Failed to synchronize image to disk: %m");
+ goto fail;
+ }
+
+ if (disk_uuid_path)
+ (void) ioctl(image_fd, BLKRRPART, 0);
+ else {
+ /* If we operate on a file, sync the containing directory too. */
+ r = fsync_directory_of_file(image_fd);
+ if (r < 0) {
+ log_error_errno(r, "Failed to synchronize directory of image file to disk: %m");
+ goto fail;
+ }
+ }
+
+ /* Let's close the image fd now. If we are operating on a real block device this will release the BSD
+ * lock that ensures udev doesn't interfere with what we are doing */
+ image_fd = safe_close(image_fd);
+
+ if (temporary_image_path) {
+ if (rename(temporary_image_path, ip) < 0) {
+ log_error_errno(errno, "Failed to rename image file: %m");
+ goto fail;
+ }
+
+ log_info("Moved image file into place.");
+ }
+
+ if (disk_uuid_path)
+ (void) wait_for_devlink(disk_uuid_path);
+
+ log_info("Everything completed.");
+
+ print_size_summary(host_size, encrypted_size, &sfs);
+
+ *ret_home = TAKE_PTR(new_home);
+ return 0;
+
+fail:
+ /* Let's close all files before we unmount the file system, to avoid EBUSY */
+ root_fd = safe_close(root_fd);
+
+ if (mounted)
+ (void) umount_verbose(LOG_WARNING, "/run/systemd/user-home-mount", UMOUNT_NOFOLLOW);
+
+ if (dm_activated)
+ (void) crypt_deactivate(cd, dm_name);
+
+ loop = loop_device_unref(loop);
+
+ if (image_created)
+ (void) unlink(temporary_image_path);
+
+ return r;
+}
+
+int home_validate_update_luks(UserRecord *h, HomeSetup *setup) {
+ _cleanup_free_ char *dm_name = NULL, *dm_node = NULL;
+ int r;
+
+ assert(h);
+ assert(setup);
+
+ r = make_dm_names(h->user_name, &dm_name, &dm_node);
+ if (r < 0)
+ return r;
+
+ r = access(dm_node, F_OK);
+ if (r < 0 && errno != ENOENT)
+ return log_error_errno(errno, "Failed to determine whether %s exists: %m", dm_node);
+
+ free_and_replace(setup->dm_name, dm_name);
+ free_and_replace(setup->dm_node, dm_node);
+
+ return r >= 0;
+}
+
+enum {
+ CAN_RESIZE_ONLINE,
+ CAN_RESIZE_OFFLINE,
+};
+
+static int can_resize_fs(int fd, uint64_t old_size, uint64_t new_size) {
+ struct statfs sfs;
+
+ assert(fd >= 0);
+
+ /* Filter out bogus requests early */
+ if (old_size == 0 || old_size == UINT64_MAX ||
+ new_size == 0 || new_size == UINT64_MAX)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid resize parameters.");
+
+ if ((old_size & 511) != 0 || (new_size & 511) != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Resize parameters not multiple of 512.");
+
+ if (fstatfs(fd, &sfs) < 0)
+ return log_error_errno(errno, "Failed to fstatfs() file system: %m");
+
+ if (is_fs_type(&sfs, BTRFS_SUPER_MAGIC)) {
+
+ if (new_size < BTRFS_MINIMAL_SIZE)
+ return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "New file system size too small for btrfs (needs to be 256M at least.");
+
+ /* btrfs can grow and shrink online */
+
+ } else if (is_fs_type(&sfs, XFS_SB_MAGIC)) {
+
+ if (new_size < XFS_MINIMAL_SIZE)
+ return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "New file system size too small for xfs (needs to be 14M at least).");
+
+ /* XFS can grow, but not shrink */
+ if (new_size < old_size)
+ return log_error_errno(SYNTHETIC_ERRNO(EMSGSIZE), "Shrinking this type of file system is not supported.");
+
+ } else if (is_fs_type(&sfs, EXT4_SUPER_MAGIC)) {
+
+ if (new_size < EXT4_MINIMAL_SIZE)
+ return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "New file system size too small for ext4 (needs to be 1M at least).");
+
+ /* ext4 can grow online, and shrink offline */
+ if (new_size < old_size)
+ return CAN_RESIZE_OFFLINE;
+
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(ESOCKTNOSUPPORT), "Resizing this type of file system is not supported.");
+
+ return CAN_RESIZE_ONLINE;
+}
+
+static int ext4_offline_resize_fs(HomeSetup *setup, uint64_t new_size, bool discard, unsigned long flags) {
+ _cleanup_free_ char *size_str = NULL;
+ bool re_open = false, re_mount = false;
+ pid_t resize_pid, fsck_pid;
+ int r, exit_status;
+
+ assert(setup);
+ assert(setup->dm_node);
+
+ /* First, unmount the file system */
+ if (setup->root_fd >= 0) {
+ setup->root_fd = safe_close(setup->root_fd);
+ re_open = true;
+ }
+
+ if (setup->undo_mount) {
+ r = umount_verbose(LOG_ERR, "/run/systemd/user-home-mount", UMOUNT_NOFOLLOW);
+ if (r < 0)
+ return r;
+
+ setup->undo_mount = false;
+ re_mount = true;
+ }
+
+ log_info("Temporary unmounting of file system completed.");
+
+ /* resize2fs requires that the file system is force checked first, do so. */
+ r = safe_fork("(e2fsck)", FORK_RESET_SIGNALS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_LOG|FORK_STDOUT_TO_STDERR, &fsck_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Child */
+ execlp("e2fsck" ,"e2fsck", "-fp", setup->dm_node, NULL);
+ log_error_errno(errno, "Failed to execute e2fsck: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ exit_status = wait_for_terminate_and_check("e2fsck", fsck_pid, WAIT_LOG_ABNORMAL);
+ if (exit_status < 0)
+ return exit_status;
+ if ((exit_status & ~FSCK_ERROR_CORRECTED) != 0) {
+ log_warning("e2fsck failed with exit status %i.", exit_status);
+
+ if ((exit_status & (FSCK_SYSTEM_SHOULD_REBOOT|FSCK_ERRORS_LEFT_UNCORRECTED)) != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "File system is corrupted, refusing.");
+
+ log_warning("Ignoring fsck error.");
+ }
+
+ log_info("Forced file system check completed.");
+
+ /* We use 512 sectors here, because resize2fs doesn't do byte sizes */
+ if (asprintf(&size_str, "%" PRIu64 "s", new_size / 512) < 0)
+ return log_oom();
+
+ /* Resize the thing */
+ r = safe_fork("(e2resize)", FORK_RESET_SIGNALS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_LOG|FORK_WAIT|FORK_STDOUT_TO_STDERR, &resize_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Child */
+ execlp("resize2fs" ,"resize2fs", setup->dm_node, size_str, NULL);
+ log_error_errno(errno, "Failed to execute resize2fs: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ log_info("Offline file system resize completed.");
+
+ /* Re-establish mounts and reopen the directory */
+ if (re_mount) {
+ r = home_mount_node(setup->dm_node, "ext4", discard, flags);
+ if (r < 0)
+ return r;
+
+ setup->undo_mount = true;
+ }
+
+ if (re_open) {
+ setup->root_fd = open("/run/systemd/user-home-mount", O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (setup->root_fd < 0)
+ return log_error_errno(errno, "Failed to reopen file system: %m");
+ }
+
+ log_info("File system mounted again.");
+
+ return 0;
+}
+
+static int prepare_resize_partition(
+ int fd,
+ uint64_t partition_offset,
+ uint64_t old_partition_size,
+ uint64_t new_partition_size,
+ sd_id128_t *ret_disk_uuid,
+ struct fdisk_table **ret_table) {
+
+ _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
+ _cleanup_(fdisk_unref_tablep) struct fdisk_table *t = NULL;
+ _cleanup_free_ char *path = NULL, *disk_uuid_as_string = NULL;
+ size_t n_partitions, i;
+ sd_id128_t disk_uuid;
+ bool found = false;
+ int r;
+
+ assert(fd >= 0);
+ assert(ret_disk_uuid);
+ assert(ret_table);
+
+ assert((partition_offset & 511) == 0);
+ assert((old_partition_size & 511) == 0);
+ assert((new_partition_size & 511) == 0);
+ assert(UINT64_MAX - old_partition_size >= partition_offset);
+ assert(UINT64_MAX - new_partition_size >= partition_offset);
+
+ if (partition_offset == 0) {
+ /* If the offset is at the beginning we assume no partition table, let's exit early. */
+ log_debug("Not rewriting partition table, operating on naked device.");
+ *ret_disk_uuid = SD_ID128_NULL;
+ *ret_table = NULL;
+ return 0;
+ }
+
+ c = fdisk_new_context();
+ if (!c)
+ return log_oom();
+
+ if (asprintf(&path, "/proc/self/fd/%i", fd) < 0)
+ return log_oom();
+
+ r = fdisk_assign_device(c, path, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open device: %m");
+
+ if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOMEDIUM), "Disk has no GPT partition table.");
+
+ r = fdisk_get_disklabel_id(c, &disk_uuid_as_string);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire disk UUID: %m");
+
+ r = sd_id128_from_string(disk_uuid_as_string, &disk_uuid);
+ if (r < 0)
+ return log_error_errno(r, "Failed parse disk UUID: %m");
+
+ r = fdisk_get_partitions(c, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire partition table: %m");
+
+ n_partitions = fdisk_table_get_nents(t);
+ for (i = 0; i < n_partitions; i++) {
+ struct fdisk_partition *p;
+
+ p = fdisk_table_get_partition(t, i);
+ if (!p)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read partition metadata: %m");
+
+ if (fdisk_partition_is_used(p) <= 0)
+ continue;
+ if (fdisk_partition_has_start(p) <= 0 || fdisk_partition_has_size(p) <= 0 || fdisk_partition_has_end(p) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Found partition without a size.");
+
+ if (fdisk_partition_get_start(p) == partition_offset / 512U &&
+ fdisk_partition_get_size(p) == old_partition_size / 512U) {
+
+ if (found)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ), "Partition found twice, refusing.");
+
+ /* Found our partition, now patch it */
+ r = fdisk_partition_size_explicit(p, 1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable explicit partition size: %m");
+
+ r = fdisk_partition_set_size(p, new_partition_size / 512U);
+ if (r < 0)
+ return log_error_errno(r, "Failed to change partition size: %m");
+
+ found = true;
+ continue;
+
+ } else {
+ if (fdisk_partition_get_start(p) < partition_offset + new_partition_size / 512U &&
+ fdisk_partition_get_end(p) >= partition_offset / 512)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Can't extend, conflicting partition found.");
+ }
+ }
+
+ if (!found)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOPKG), "Failed to find matching partition to resize.");
+
+ *ret_table = TAKE_PTR(t);
+ *ret_disk_uuid = disk_uuid;
+
+ return 1;
+}
+
+static int ask_cb(struct fdisk_context *c, struct fdisk_ask *ask, void *userdata) {
+ char *result;
+
+ assert(c);
+
+ switch (fdisk_ask_get_type(ask)) {
+
+ case FDISK_ASKTYPE_STRING:
+ result = new(char, 37);
+ if (!result)
+ return log_oom();
+
+ fdisk_ask_string_set_result(ask, id128_to_uuid_string(*(sd_id128_t*) userdata, result));
+ break;
+
+ default:
+ log_debug("Unexpected question from libfdisk, ignoring.");
+ }
+
+ return 0;
+}
+
+static int apply_resize_partition(int fd, sd_id128_t disk_uuids, struct fdisk_table *t) {
+ _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
+ _cleanup_free_ void *two_zero_lbas = NULL;
+ _cleanup_free_ char *path = NULL;
+ ssize_t n;
+ int r;
+
+ assert(fd >= 0);
+
+ if (!t) /* no partition table to apply, exit early */
+ return 0;
+
+ two_zero_lbas = malloc0(1024U);
+ if (!two_zero_lbas)
+ return log_oom();
+
+ /* libfdisk appears to get confused by the existing PMBR. Let's explicitly flush it out. */
+ n = pwrite(fd, two_zero_lbas, 1024U, 0);
+ if (n < 0)
+ return log_error_errno(errno, "Failed to wipe partition table: %m");
+ if (n != 1024)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short write while wiping partition table.");
+
+ c = fdisk_new_context();
+ if (!c)
+ return log_oom();
+
+ if (asprintf(&path, "/proc/self/fd/%i", fd) < 0)
+ return log_oom();
+
+ r = fdisk_assign_device(c, path, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open device: %m");
+
+ r = fdisk_create_disklabel(c, "gpt");
+ if (r < 0)
+ return log_error_errno(r, "Failed to create GPT disk label: %m");
+
+ r = fdisk_apply_table(c, t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to apply partition table: %m");
+
+ r = fdisk_set_ask(c, ask_cb, &disk_uuids);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set libfdisk query function: %m");
+
+ r = fdisk_set_disklabel_id(c);
+ if (r < 0)
+ return log_error_errno(r, "Failed to change disklabel ID: %m");
+
+ r = fdisk_write_disklabel(c);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write disk label: %m");
+
+ return 1;
+}
+
+int home_resize_luks(
+ UserRecord *h,
+ bool already_activated,
+ PasswordCache *cache,
+ HomeSetup *setup,
+ UserRecord **ret_home) {
+
+ char buffer1[FORMAT_BYTES_MAX], buffer2[FORMAT_BYTES_MAX], buffer3[FORMAT_BYTES_MAX],
+ buffer4[FORMAT_BYTES_MAX], buffer5[FORMAT_BYTES_MAX], buffer6[FORMAT_BYTES_MAX];
+ uint64_t old_image_size, new_image_size, old_fs_size, new_fs_size, crypto_offset, new_partition_size;
+ _cleanup_(user_record_unrefp) UserRecord *header_home = NULL, *embedded_home = NULL, *new_home = NULL;
+ _cleanup_(fdisk_unref_tablep) struct fdisk_table *table = NULL;
+ _cleanup_free_ char *whole_disk = NULL;
+ _cleanup_close_ int image_fd = -1;
+ sd_id128_t disk_uuid;
+ const char *ip, *ipo;
+ struct statfs sfs;
+ struct stat st;
+ int r, resize_type;
+
+ assert(h);
+ assert(user_record_storage(h) == USER_LUKS);
+ assert(setup);
+ assert(ret_home);
+
+ assert_se(ipo = user_record_image_path(h));
+ ip = strdupa(ipo); /* copy out since original might change later in home record object */
+
+ image_fd = open(ip, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (image_fd < 0)
+ return log_error_errno(errno, "Failed to open image file %s: %m", ip);
+
+ if (fstat(image_fd, &st) < 0)
+ return log_error_errno(errno, "Failed to stat image file %s: %m", ip);
+ if (S_ISBLK(st.st_mode)) {
+ dev_t parent;
+
+ r = block_get_whole_disk(st.st_rdev, &parent);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire whole block device for %s: %m", ip);
+ if (r > 0) {
+ /* If we shall resize a file system on a partition device, then let's figure out the
+ * whole disk device and operate on that instead, since we need to rewrite the
+ * partition table to resize the partition. */
+
+ log_info("Operating on partition device %s, using parent device.", ip);
+
+ r = device_path_make_major_minor(st.st_mode, parent, &whole_disk);
+ if (r < 0)
+ return log_error_errno(r, "Failed to derive whole disk path for %s: %m", ip);
+
+ safe_close(image_fd);
+
+ image_fd = open(whole_disk, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (image_fd < 0)
+ return log_error_errno(errno, "Failed to open whole block device %s: %m", whole_disk);
+
+ if (fstat(image_fd, &st) < 0)
+ return log_error_errno(errno, "Failed to stat whole block device %s: %m", whole_disk);
+ if (!S_ISBLK(st.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), "Whole block device %s is not actually a block device, refusing.", whole_disk);
+ } else
+ log_info("Operating on whole block device %s.", ip);
+
+ if (ioctl(image_fd, BLKGETSIZE64, &old_image_size) < 0)
+ return log_error_errno(errno, "Failed to determine size of original block device: %m");
+
+ if (flock(image_fd, LOCK_EX) < 0) /* make sure udev doesn't read from it while we operate on the device */
+ return log_error_errno(errno, "Failed to lock block device %s: %m", ip);
+
+ new_image_size = old_image_size; /* we can't resize physical block devices */
+ } else {
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ return log_error_errno(r, "Image %s is not a block device nor regular file: %m", ip);
+
+ old_image_size = st.st_size;
+
+ /* Note an asymetry here: when we operate on loopback files the specified disk size we get we
+ * apply onto the loopback file as a whole. When we operate on block devices we instead apply
+ * to the partition itself only. */
+
+ new_image_size = DISK_SIZE_ROUND_DOWN(h->disk_size);
+ if (new_image_size == old_image_size) {
+ log_info("Image size already matching, skipping operation.");
+ return 0;
+ }
+ }
+
+ r = home_prepare_luks(h, already_activated, whole_disk, cache, setup, &header_home);
+ if (r < 0)
+ return r;
+
+ r = home_load_embedded_identity(h, setup->root_fd, header_home, USER_RECONCILE_REQUIRE_NEWER_OR_EQUAL, cache, &embedded_home, &new_home);
+ if (r < 0)
+ return r;
+
+ log_info("offset = %" PRIu64 ", size = %" PRIu64 ", image = %" PRIu64, setup->partition_offset, setup->partition_size, old_image_size);
+
+ if ((UINT64_MAX - setup->partition_offset) < setup->partition_size ||
+ setup->partition_offset + setup->partition_size > old_image_size)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Old partition doesn't fit in backing storage, refusing.");
+
+ if (S_ISREG(st.st_mode)) {
+ uint64_t partition_table_extra;
+
+ partition_table_extra = old_image_size - setup->partition_size;
+ if (new_image_size <= partition_table_extra)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "New size smaller than partition table metadata.");
+
+ new_partition_size = new_image_size - partition_table_extra;
+ } else {
+ assert(S_ISBLK(st.st_mode));
+
+ new_partition_size = DISK_SIZE_ROUND_DOWN(h->disk_size);
+ if (new_partition_size == setup->partition_size) {
+ log_info("Partition size already matching, skipping operation.");
+ return 0;
+ }
+ }
+
+ if ((UINT64_MAX - setup->partition_offset) < new_partition_size ||
+ setup->partition_offset + new_partition_size > new_image_size)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "New partition doesn't fit into backing storage, refusing.");
+
+ crypto_offset = crypt_get_data_offset(setup->crypt_device);
+ if (setup->partition_size / 512U <= crypto_offset)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Weird, old crypto payload offset doesn't actually fit in partition size?");
+ if (new_partition_size / 512U <= crypto_offset)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "New size smaller than crypto payload offset?");
+
+ old_fs_size = (setup->partition_size / 512U - crypto_offset) * 512U;
+ new_fs_size = (new_partition_size / 512U - crypto_offset) * 512U;
+
+ /* Before we start doing anything, let's figure out if we actually can */
+ resize_type = can_resize_fs(setup->root_fd, old_fs_size, new_fs_size);
+ if (resize_type < 0)
+ return resize_type;
+ if (resize_type == CAN_RESIZE_OFFLINE && already_activated)
+ return log_error_errno(SYNTHETIC_ERRNO(ETXTBSY), "File systems of this type can only be resized offline, but is currently online.");
+
+ log_info("Ready to resize image size %s → %s, partition size %s → %s, file system size %s → %s.",
+ format_bytes(buffer1, sizeof(buffer1), old_image_size),
+ format_bytes(buffer2, sizeof(buffer2), new_image_size),
+ format_bytes(buffer3, sizeof(buffer3), setup->partition_size),
+ format_bytes(buffer4, sizeof(buffer4), new_partition_size),
+ format_bytes(buffer5, sizeof(buffer5), old_fs_size),
+ format_bytes(buffer6, sizeof(buffer6), new_fs_size));
+
+ r = prepare_resize_partition(
+ image_fd,
+ setup->partition_offset,
+ setup->partition_size,
+ new_partition_size,
+ &disk_uuid,
+ &table);
+ if (r < 0)
+ return r;
+
+ if (new_fs_size > old_fs_size) {
+
+ if (S_ISREG(st.st_mode)) {
+ /* Grow file size */
+ r = home_truncate(h, image_fd, ip, new_image_size);
+ if (r < 0)
+ return r;
+
+ log_info("Growing of image file completed.");
+ }
+
+ /* Make sure loopback device sees the new bigger size */
+ r = loop_device_refresh_size(setup->loop, UINT64_MAX, new_partition_size);
+ if (r == -ENOTTY)
+ log_debug_errno(r, "Device is not a loopback device, not refreshing size.");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to refresh loopback device size: %m");
+ else
+ log_info("Refreshing loop device size completed.");
+
+ r = apply_resize_partition(image_fd, disk_uuid, table);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ log_info("Growing of partition completed.");
+
+ if (ioctl(image_fd, BLKRRPART, 0) < 0)
+ log_debug_errno(errno, "BLKRRPART failed on block device, ignoring: %m");
+
+ /* Tell LUKS about the new bigger size too */
+ r = crypt_resize(setup->crypt_device, setup->dm_name, new_fs_size / 512U);
+ if (r < 0)
+ return log_error_errno(r, "Failed to grow LUKS device: %m");
+
+ log_info("LUKS device growing completed.");
+ } else {
+ r = home_store_embedded_identity(new_home, setup->root_fd, h->uid, embedded_home);
+ if (r < 0)
+ return r;
+
+ if (S_ISREG(st.st_mode)) {
+ if (user_record_luks_discard(h))
+ /* Before we shrink, let's trim the file system, so that we need less space on disk during the shrinking */
+ (void) run_fitrim(setup->root_fd);
+ else {
+ /* If discard is off, let's ensure all backing blocks are allocated, so that our resize operation doesn't fail half-way */
+ r = run_fallocate(image_fd, &st);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ /* Now resize the file system */
+ if (resize_type == CAN_RESIZE_ONLINE)
+ r = resize_fs(setup->root_fd, new_fs_size, NULL);
+ else
+ r = ext4_offline_resize_fs(setup, new_fs_size, user_record_luks_discard(h), user_record_mount_flags(h));
+ if (r < 0)
+ return log_error_errno(r, "Failed to resize file system: %m");
+
+ log_info("File system resizing completed.");
+
+ /* Immediately sync afterwards */
+ r = home_sync_and_statfs(setup->root_fd, NULL);
+ if (r < 0)
+ return r;
+
+ if (new_fs_size < old_fs_size) {
+
+ /* Shrink the LUKS device now, matching the new file system size */
+ r = crypt_resize(setup->crypt_device, setup->dm_name, new_fs_size / 512);
+ if (r < 0)
+ return log_error_errno(r, "Failed to shrink LUKS device: %m");
+
+ log_info("LUKS device shrinking completed.");
+
+ if (S_ISREG(st.st_mode)) {
+ /* Shrink the image file */
+ if (ftruncate(image_fd, new_image_size) < 0)
+ return log_error_errno(errno, "Failed to shrink image file %s: %m", ip);
+
+ log_info("Shrinking of image file completed.");
+ }
+
+ /* Refresh the loop devices size */
+ r = loop_device_refresh_size(setup->loop, UINT64_MAX, new_partition_size);
+ if (r == -ENOTTY)
+ log_debug_errno(r, "Device is not a loopback device, not refreshing size.");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to refresh loopback device size: %m");
+ else
+ log_info("Refreshing loop device size completed.");
+
+ r = apply_resize_partition(image_fd, disk_uuid, table);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ log_info("Shrinking of partition completed.");
+
+ if (ioctl(image_fd, BLKRRPART, 0) < 0)
+ log_debug_errno(errno, "BLKRRPART failed on block device, ignoring: %m");
+ } else {
+ r = home_store_embedded_identity(new_home, setup->root_fd, h->uid, embedded_home);
+ if (r < 0)
+ return r;
+ }
+
+ r = home_store_header_identity_luks(new_home, setup, header_home);
+ if (r < 0)
+ return r;
+
+ r = home_extend_embedded_identity(new_home, h, setup);
+ if (r < 0)
+ return r;
+
+ if (user_record_luks_discard(h))
+ (void) run_fitrim(setup->root_fd);
+
+ r = home_sync_and_statfs(setup->root_fd, &sfs);
+ if (r < 0)
+ return r;
+
+ r = home_setup_undo(setup);
+ if (r < 0)
+ return r;
+
+ log_info("Everything completed.");
+
+ print_size_summary(new_image_size, new_fs_size, &sfs);
+
+ *ret_home = TAKE_PTR(new_home);
+ return 0;
+}
+
+int home_passwd_luks(
+ UserRecord *h,
+ HomeSetup *setup,
+ PasswordCache *cache, /* the passwords acquired via PKCS#11/FIDO2 security tokens */
+ char **effective_passwords /* new passwords */) {
+
+ size_t volume_key_size, i, max_key_slots, n_effective;
+ _cleanup_(erase_and_freep) void *volume_key = NULL;
+ struct crypt_pbkdf_type good_pbkdf, minimal_pbkdf;
+ const char *type;
+ char **list;
+ int r;
+
+ assert(h);
+ assert(user_record_storage(h) == USER_LUKS);
+ assert(setup);
+
+ type = crypt_get_type(setup->crypt_device);
+ if (!type)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine crypto device type.");
+
+ r = crypt_keyslot_max(type);
+ if (r <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine number of key slots.");
+ max_key_slots = r;
+
+ r = crypt_get_volume_key_size(setup->crypt_device);
+ if (r <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine volume key size.");
+ volume_key_size = (size_t) r;
+
+ volume_key = malloc(volume_key_size);
+ if (!volume_key)
+ return log_oom();
+
+ r = -ENOKEY;
+ FOREACH_POINTER(list, cache->pkcs11_passwords, cache->fido2_passwords, h->password) {
+ r = luks_try_passwords(setup->crypt_device, list, volume_key, &volume_key_size);
+ if (r != -ENOKEY)
+ break;
+ }
+ if (r == -ENOKEY)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOKEY), "Failed to unlock LUKS superblock with supplied passwords.");
+ if (r < 0)
+ return log_error_errno(r, "Failed to unlocks LUKS superblock: %m");
+
+ n_effective = strv_length(effective_passwords);
+
+ build_good_pbkdf(&good_pbkdf, h);
+ build_minimal_pbkdf(&minimal_pbkdf, h);
+
+ for (i = 0; i < max_key_slots; i++) {
+ r = crypt_keyslot_destroy(setup->crypt_device, i);
+ if (r < 0 && !IN_SET(r, -ENOENT, -EINVAL)) /* Returns EINVAL or ENOENT if there's no key in this slot already */
+ return log_error_errno(r, "Failed to destroy LUKS password: %m");
+
+ if (i >= n_effective) {
+ if (r >= 0)
+ log_info("Destroyed LUKS key slot %zu.", i);
+ continue;
+ }
+
+ if (strv_contains(cache->pkcs11_passwords, effective_passwords[i]) ||
+ strv_contains(cache->fido2_passwords, effective_passwords[i])) {
+ log_debug("Using minimal PBKDF for slot %zu", i);
+ r = crypt_set_pbkdf_type(setup->crypt_device, &minimal_pbkdf);
+ } else {
+ log_debug("Using good PBKDF for slot %zu", i);
+ r = crypt_set_pbkdf_type(setup->crypt_device, &good_pbkdf);
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to tweak PBKDF for slot %zu: %m", i);
+
+ r = crypt_keyslot_add_by_volume_key(
+ setup->crypt_device,
+ i,
+ volume_key,
+ volume_key_size,
+ effective_passwords[i],
+ strlen(effective_passwords[i]));
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up LUKS password: %m");
+
+ log_info("Updated LUKS key slot %zu.", i);
+ }
+
+ return 1;
+}
+
+int home_lock_luks(UserRecord *h) {
+ _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
+ _cleanup_free_ char *dm_name = NULL, *dm_node = NULL;
+ _cleanup_close_ int root_fd = -1;
+ const char *p;
+ int r;
+
+ assert(h);
+
+ assert_se(p = user_record_home_directory(h));
+ root_fd = open(p, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
+ if (root_fd < 0)
+ return log_error_errno(errno, "Failed to open home directory: %m");
+
+ r = make_dm_names(h->user_name, &dm_name, &dm_node);
+ if (r < 0)
+ return r;
+
+ r = crypt_init_by_name(&cd, dm_name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize cryptsetup context for %s: %m", dm_name);
+
+ log_info("Discovered used LUKS device %s.", dm_node);
+ cryptsetup_enable_logging(cd);
+
+ if (syncfs(root_fd) < 0) /* Snake oil, but let's better be safe than sorry */
+ return log_error_errno(errno, "Failed to synchronize file system %s: %m", p);
+
+ root_fd = safe_close(root_fd);
+
+ log_info("File system synchronized.");
+
+ /* Note that we don't invoke FIFREEZE here, it appears libcryptsetup/device-mapper already does that on its own for us */
+
+ r = crypt_suspend(cd, dm_name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to suspend cryptsetup device: %s: %m", dm_node);
+
+ log_info("LUKS device suspended.");
+ return 0;
+}
+
+static int luks_try_resume(
+ struct crypt_device *cd,
+ const char *dm_name,
+ char **password) {
+
+ char **pp;
+ int r;
+
+ assert(cd);
+ assert(dm_name);
+
+ STRV_FOREACH(pp, password) {
+ r = crypt_resume_by_passphrase(
+ cd,
+ dm_name,
+ CRYPT_ANY_SLOT,
+ *pp,
+ strlen(*pp));
+ if (r >= 0) {
+ log_info("Resumed LUKS device %s.", dm_name);
+ return 0;
+ }
+
+ log_debug_errno(r, "Password %zu didn't work for resuming device: %m", (size_t) (pp - password));
+ }
+
+ return -ENOKEY;
+}
+
+int home_unlock_luks(UserRecord *h, PasswordCache *cache) {
+ _cleanup_free_ char *dm_name = NULL, *dm_node = NULL;
+ _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
+ char **list;
+ int r;
+
+ assert(h);
+
+ r = make_dm_names(h->user_name, &dm_name, &dm_node);
+ if (r < 0)
+ return r;
+
+ r = crypt_init_by_name(&cd, dm_name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize cryptsetup context for %s: %m", dm_name);
+
+ log_info("Discovered used LUKS device %s.", dm_node);
+ cryptsetup_enable_logging(cd);
+
+ r = -ENOKEY;
+ FOREACH_POINTER(list, cache->pkcs11_passwords, cache->fido2_passwords, h->password) {
+ r = luks_try_resume(cd, dm_name, list);
+ if (r != -ENOKEY)
+ break;
+ }
+ if (r == -ENOKEY)
+ return log_error_errno(r, "No valid password for LUKS superblock.");
+ if (r < 0)
+ return log_error_errno(r, "Failed to resume LUKS superblock: %m");
+
+ log_info("LUKS device resumed.");
+ return 0;
+}
diff --git a/src/home/homework-luks.h b/src/home/homework-luks.h
new file mode 100644
index 0000000..c43bdfc
--- /dev/null
+++ b/src/home/homework-luks.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "cryptsetup-util.h"
+#include "homework.h"
+#include "user-record.h"
+
+int home_prepare_luks(UserRecord *h, bool already_activated, const char *force_image_path, PasswordCache *cache, HomeSetup *setup, UserRecord **ret_luks_home);
+
+int home_activate_luks(UserRecord *h, PasswordCache *cache, UserRecord **ret_home);
+int home_deactivate_luks(UserRecord *h);
+int home_trim_luks(UserRecord *h);
+
+int home_store_header_identity_luks(UserRecord *h, HomeSetup *setup, UserRecord *old_home);
+
+int home_create_luks(UserRecord *h, PasswordCache *cache, char **effective_passwords, UserRecord **ret_home);
+
+int home_validate_update_luks(UserRecord *h, HomeSetup *setup);
+
+int home_resize_luks(UserRecord *h, bool already_activated, PasswordCache *cache, HomeSetup *setup, UserRecord **ret_home);
+
+int home_passwd_luks(UserRecord *h, HomeSetup *setup, PasswordCache *cache, char **effective_passwords);
+
+int home_lock_luks(UserRecord *h);
+int home_unlock_luks(UserRecord *h, PasswordCache *cache);
+
+static inline uint64_t luks_volume_key_size_convert(struct crypt_device *cd) {
+ int k;
+
+ assert(cd);
+
+ /* Convert the "int" to uint64_t, which we usually use for byte sizes stored on disk. */
+
+ k = crypt_get_volume_key_size(cd);
+ if (k <= 0)
+ return UINT64_MAX;
+
+ return (uint64_t) k;
+}
+
+int run_fitrim(int root_fd);
+int run_fitrim_by_path(const char *root_path);
+int run_fallocate(int backing_fd, const struct stat *st);
+int run_fallocate_by_path(const char *backing_path);
+int run_mark_dirty(int fd, bool b);
+int run_mark_dirty_by_path(const char *path, bool b);
diff --git a/src/home/homework-mount.c b/src/home/homework-mount.c
new file mode 100644
index 0000000..5e73768
--- /dev/null
+++ b/src/home/homework-mount.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sched.h>
+#include <sys/mount.h>
+
+#include "alloc-util.h"
+#include "homework-mount.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "path-util.h"
+#include "string-util.h"
+
+static const char *mount_options_for_fstype(const char *fstype) {
+ if (streq(fstype, "ext4"))
+ return "noquota,user_xattr";
+ if (streq(fstype, "xfs"))
+ return "noquota";
+ if (streq(fstype, "btrfs"))
+ return "noacl";
+ return NULL;
+}
+
+int home_mount_node(const char *node, const char *fstype, bool discard, unsigned long flags) {
+ _cleanup_free_ char *joined = NULL;
+ const char *options, *discard_option;
+ int r;
+
+ options = mount_options_for_fstype(fstype);
+
+ discard_option = discard ? "discard" : "nodiscard";
+
+ if (options) {
+ joined = strjoin(options, ",", discard_option);
+ if (!joined)
+ return log_oom();
+
+ options = joined;
+ } else
+ options = discard_option;
+
+ r = mount_nofollow_verbose(LOG_ERR, node, "/run/systemd/user-home-mount", fstype, flags|MS_RELATIME, strempty(options));
+ if (r < 0)
+ return r;
+
+ log_info("Mounting file system completed.");
+ return 0;
+}
+
+int home_unshare_and_mount(const char *node, const char *fstype, bool discard, unsigned long flags) {
+ int r;
+
+ if (unshare(CLONE_NEWNS) < 0)
+ return log_error_errno(errno, "Couldn't unshare file system namespace: %m");
+
+ r = mount_nofollow_verbose(LOG_ERR, "/run", "/run", NULL, MS_SLAVE|MS_REC, NULL); /* Mark /run as MS_SLAVE in our new namespace */
+ if (r < 0)
+ return r;
+
+ (void) mkdir_p("/run/systemd/user-home-mount", 0700);
+
+ if (node)
+ return home_mount_node(node, fstype, discard, flags);
+
+ return 0;
+}
+
+int home_move_mount(const char *user_name_and_realm, const char *target) {
+ _cleanup_free_ char *subdir = NULL;
+ const char *d;
+ int r;
+
+ assert(user_name_and_realm);
+ assert(target);
+
+ if (user_name_and_realm) {
+ subdir = path_join("/run/systemd/user-home-mount/", user_name_and_realm);
+ if (!subdir)
+ return log_oom();
+
+ d = subdir;
+ } else
+ d = "/run/systemd/user-home-mount/";
+
+ (void) mkdir_p(target, 0700);
+
+ r = mount_nofollow_verbose(LOG_ERR, d, target, NULL, MS_BIND, NULL);
+ if (r < 0)
+ return r;
+
+ r = umount_verbose(LOG_ERR, "/run/systemd/user-home-mount", UMOUNT_NOFOLLOW);
+ if (r < 0)
+ return r;
+
+ log_info("Moving to final mount point %s completed.", target);
+ return 0;
+}
diff --git a/src/home/homework-mount.h b/src/home/homework-mount.h
new file mode 100644
index 0000000..2a4591c
--- /dev/null
+++ b/src/home/homework-mount.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+int home_mount_node(const char *node, const char *fstype, bool discard, unsigned long flags);
+int home_unshare_and_mount(const char *node, const char *fstype, bool discard, unsigned long flags);
+int home_move_mount(const char *user_name_and_realm, const char *target);
diff --git a/src/home/homework-pkcs11.c b/src/home/homework-pkcs11.c
new file mode 100644
index 0000000..15402b1
--- /dev/null
+++ b/src/home/homework-pkcs11.c
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "hexdecoct.h"
+#include "homework-pkcs11.h"
+#include "pkcs11-util.h"
+#include "strv.h"
+
+int pkcs11_callback(
+ CK_FUNCTION_LIST *m,
+ CK_SESSION_HANDLE session,
+ CK_SLOT_ID slot_id,
+ const CK_SLOT_INFO *slot_info,
+ const CK_TOKEN_INFO *token_info,
+ P11KitUri *uri,
+ void *userdata) {
+
+ _cleanup_(erase_and_freep) void *decrypted_key = NULL;
+ struct pkcs11_callback_data *data = userdata;
+ _cleanup_free_ char *token_label = NULL;
+ CK_TOKEN_INFO updated_token_info;
+ size_t decrypted_key_size;
+ CK_OBJECT_HANDLE object;
+ char **i;
+ CK_RV rv;
+ int r;
+
+ assert(m);
+ assert(slot_info);
+ assert(token_info);
+ assert(uri);
+ assert(data);
+
+ /* Special return values:
+ *
+ * -ENOANO → if we need a PIN but have none
+ * -ERFKILL → if a "protected authentication path" is needed but we have no OK to use it
+ * -EOWNERDEAD → if the PIN is locked
+ * -ENOLCK → if the supplied PIN is incorrect
+ * -ETOOMANYREFS → ditto, but only a few tries left
+ * -EUCLEAN → ditto, but only a single try left
+ */
+
+ token_label = pkcs11_token_label(token_info);
+ if (!token_label)
+ return log_oom();
+
+ if (FLAGS_SET(token_info->flags, CKF_PROTECTED_AUTHENTICATION_PATH)) {
+
+ if (data->secret->pkcs11_protected_authentication_path_permitted <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ERFKILL), "Security token requires authentication through protected authentication path.");
+
+ rv = m->C_Login(session, CKU_USER, NULL, 0);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to log into security token '%s': %s", token_label, p11_kit_strerror(rv));
+
+ log_info("Successfully logged into security token '%s' via protected authentication path.", token_label);
+ goto decrypt;
+ }
+
+ if (!FLAGS_SET(token_info->flags, CKF_LOGIN_REQUIRED)) {
+ log_info("No login into security token '%s' required.", token_label);
+ goto decrypt;
+ }
+
+ if (strv_isempty(data->secret->token_pin))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOANO), "Security token requires PIN.");
+
+ STRV_FOREACH(i, data->secret->token_pin) {
+ rv = m->C_Login(session, CKU_USER, (CK_UTF8CHAR*) *i, strlen(*i));
+ if (rv == CKR_OK) {
+ log_info("Successfully logged into security token '%s' with PIN.", token_label);
+ goto decrypt;
+ }
+ if (rv == CKR_PIN_LOCKED)
+ return log_error_errno(SYNTHETIC_ERRNO(EOWNERDEAD), "PIN of security token is blocked. Please unblock it first.");
+ if (!IN_SET(rv, CKR_PIN_INCORRECT, CKR_PIN_LEN_RANGE))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to log into security token '%s': %s", token_label, p11_kit_strerror(rv));
+ }
+
+ rv = m->C_GetTokenInfo(slot_id, &updated_token_info);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to acquire updated security token information for slot %lu: %s", slot_id, p11_kit_strerror(rv));
+
+ if (FLAGS_SET(updated_token_info.flags, CKF_USER_PIN_FINAL_TRY))
+ return log_error_errno(SYNTHETIC_ERRNO(EUCLEAN), "PIN of security token incorrect, only a single try left.");
+ if (FLAGS_SET(updated_token_info.flags, CKF_USER_PIN_COUNT_LOW))
+ return log_error_errno(SYNTHETIC_ERRNO(ETOOMANYREFS), "PIN of security token incorrect, only a few tries left.");
+
+ return log_error_errno(SYNTHETIC_ERRNO(ENOLCK), "PIN of security token incorrect.");
+
+decrypt:
+ r = pkcs11_token_find_private_key(m, session, uri, &object);
+ if (r < 0)
+ return r;
+
+ r = pkcs11_token_decrypt_data(m, session, object, data->encrypted_key->data, data->encrypted_key->size, &decrypted_key, &decrypted_key_size);
+ if (r < 0)
+ return r;
+
+ if (base64mem(decrypted_key, decrypted_key_size, &data->decrypted_password) < 0)
+ return log_oom();
+
+ return 1;
+}
diff --git a/src/home/homework-pkcs11.h b/src/home/homework-pkcs11.h
new file mode 100644
index 0000000..c8674e0
--- /dev/null
+++ b/src/home/homework-pkcs11.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#if HAVE_P11KIT
+#include "memory-util.h"
+#include "user-record.h"
+#include "pkcs11-util.h"
+
+struct pkcs11_callback_data {
+ UserRecord *user_record;
+ UserRecord *secret;
+ Pkcs11EncryptedKey *encrypted_key;
+ char *decrypted_password;
+};
+
+static inline void pkcs11_callback_data_release(struct pkcs11_callback_data *data) {
+ erase_and_free(data->decrypted_password);
+}
+
+int pkcs11_callback(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session, CK_SLOT_ID slot_id, const CK_SLOT_INFO *slot_info, const CK_TOKEN_INFO *token_info, P11KitUri *uri, void *userdata);
+#endif
diff --git a/src/home/homework-quota.c b/src/home/homework-quota.c
new file mode 100644
index 0000000..7001870
--- /dev/null
+++ b/src/home/homework-quota.c
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#include <sys/quota.h>
+
+#include "blockdev-util.h"
+#include "btrfs-util.h"
+#include "errno-util.h"
+#include "format-util.h"
+#include "homework-quota.h"
+#include "missing_magic.h"
+#include "quota-util.h"
+#include "stat-util.h"
+#include "user-util.h"
+
+int home_update_quota_btrfs(UserRecord *h, const char *path) {
+ int r;
+
+ assert(h);
+ assert(path);
+
+ if (h->disk_size == UINT64_MAX)
+ return 0;
+
+ /* If the user wants quota, enable it */
+ r = btrfs_quota_enable(path, true);
+ if (r == -ENOTTY)
+ return log_error_errno(r, "No btrfs quota support on subvolume %s.", path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable btrfs quota support on %s.", path);
+
+ r = btrfs_qgroup_set_limit(path, 0, h->disk_size);
+ if (r < 0)
+ return log_error_errno(r, "Faled to set disk quota on subvolume %s: %m", path);
+
+ log_info("Set btrfs quota.");
+
+ return 0;
+}
+
+int home_update_quota_classic(UserRecord *h, const char *path) {
+ struct dqblk req;
+ dev_t devno;
+ int r;
+
+ assert(h);
+ assert(uid_is_valid(h->uid));
+ assert(path);
+
+ if (h->disk_size == UINT64_MAX)
+ return 0;
+
+ r = get_block_device(path, &devno);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine block device of %s: %m", path);
+ if (devno == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "File system %s not backed by a block device.", path);
+
+ r = quotactl_devno(QCMD_FIXED(Q_GETQUOTA, USRQUOTA), devno, h->uid, &req);
+ if (r < 0) {
+ if (ERRNO_IS_NOT_SUPPORTED(r))
+ return log_error_errno(r, "No UID quota support on %s.", path);
+
+ if (r != -ESRCH)
+ return log_error_errno(r, "Failed to query disk quota for UID " UID_FMT ": %m", h->uid);
+
+ zero(req);
+ } else {
+ /* Shortcut things if everything is set up properly already */
+ if (FLAGS_SET(req.dqb_valid, QIF_BLIMITS) && h->disk_size / QIF_DQBLKSIZE == req.dqb_bhardlimit) {
+ log_info("Configured quota already matches the intended setting, not updating quota.");
+ return 0;
+ }
+ }
+
+ req.dqb_valid = QIF_BLIMITS;
+ req.dqb_bsoftlimit = req.dqb_bhardlimit = h->disk_size / QIF_DQBLKSIZE;
+
+ r = quotactl_devno(QCMD_FIXED(Q_SETQUOTA, USRQUOTA), devno, h->uid, &req);
+ if (r < 0) {
+ if (r == -ESRCH)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTTY), "UID quota not available on %s.", path);
+
+ return log_error_errno(r, "Failed to set disk quota for UID " UID_FMT ": %m", h->uid);
+ }
+
+ log_info("Updated per-UID quota.");
+
+ return 0;
+}
+
+int home_update_quota_auto(UserRecord *h, const char *path) {
+ struct statfs sfs;
+ int r;
+
+ assert(h);
+
+ if (h->disk_size == UINT64_MAX)
+ return 0;
+
+ if (!path) {
+ path = user_record_image_path(h);
+ if (!path)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Home record lacks image path.");
+ }
+
+ if (statfs(path, &sfs) < 0)
+ return log_error_errno(errno, "Failed to statfs() file system: %m");
+
+ if (is_fs_type(&sfs, XFS_SB_MAGIC) ||
+ is_fs_type(&sfs, EXT4_SUPER_MAGIC))
+ return home_update_quota_classic(h, path);
+
+ if (is_fs_type(&sfs, BTRFS_SUPER_MAGIC)) {
+
+ r = btrfs_is_subvol(path);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to test if %s is a subvolume: %m", path);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTTY), "Directory %s is not a subvolume, cannot apply quota.", path);
+
+ return home_update_quota_btrfs(h, path);
+ }
+
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTTY), "Type of directory %s not known, cannot apply quota.", path);
+}
diff --git a/src/home/homework-quota.h b/src/home/homework-quota.h
new file mode 100644
index 0000000..a21c9ba
--- /dev/null
+++ b/src/home/homework-quota.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "user-record.h"
+
+int home_update_quota_btrfs(UserRecord *h, const char *path);
+int home_update_quota_classic(UserRecord *h, const char *path);
+int home_update_quota_auto(UserRecord *h, const char *path);
diff --git a/src/home/homework.c b/src/home/homework.c
new file mode 100644
index 0000000..b61f650
--- /dev/null
+++ b/src/home/homework.c
@@ -0,0 +1,1747 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stddef.h>
+#include <sys/mount.h>
+
+#include "chown-recursive.h"
+#include "copy.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "home-util.h"
+#include "homework-cifs.h"
+#include "homework-directory.h"
+#include "homework-fido2.h"
+#include "homework-fscrypt.h"
+#include "homework-luks.h"
+#include "homework-mount.h"
+#include "homework-pkcs11.h"
+#include "homework.h"
+#include "libcrypt-util.h"
+#include "main-func.h"
+#include "memory-util.h"
+#include "missing_magic.h"
+#include "modhex.h"
+#include "mount-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "stat-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "virt.h"
+
+/* Make sure a bad password always results in a 3s delay, no matter what */
+#define BAD_PASSWORD_DELAY_USEC (3 * USEC_PER_SEC)
+
+void password_cache_free(PasswordCache *cache) {
+ if (!cache)
+ return;
+
+ cache->pkcs11_passwords = strv_free_erase(cache->pkcs11_passwords);
+ cache->fido2_passwords = strv_free_erase(cache->fido2_passwords);
+}
+
+int user_record_authenticate(
+ UserRecord *h,
+ UserRecord *secret,
+ PasswordCache *cache,
+ bool strict_verify) {
+
+ bool need_password = false, need_recovery_key = false, need_token = false, need_pin = false, need_protected_authentication_path_permitted = false, need_user_presence_permitted = false,
+ pin_locked = false, pin_incorrect = false, pin_incorrect_few_tries_left = false, pin_incorrect_one_try_left = false, token_action_timeout = false;
+ int r;
+
+ assert(h);
+ assert(secret);
+
+ /* Tries to authenticate a user record with the supplied secrets. i.e. checks whether at least one
+ * supplied plaintext passwords matches a hashed password field of the user record. Or if a
+ * configured PKCS#11 or FIDO2 token is around and can unlock the record.
+ *
+ * Note that the 'cache' parameter is both an input and output parameter: it contains lists of
+ * configured, decrypted PKCS#11/FIDO2 passwords. We typically have to call this function multiple
+ * times over the course of an operation (think: on login we authenticate the host user record, the
+ * record embedded in the LUKS record and the one embedded in $HOME). Hence we keep a list of
+ * passwords we already decrypted, so that we don't have to do the (slow and potentially interactive)
+ * PKCS#11/FIDO2 dance for the relevant token again and again. */
+
+ /* First, let's see if the supplied plain-text passwords work? */
+ r = user_record_test_password(h, secret);
+ if (r == -ENOKEY)
+ need_password = true;
+ else if (r == -ENXIO)
+ log_debug_errno(r, "User record has no hashed passwords, plaintext passwords not tested.");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to validate password of record: %m");
+ else {
+ log_info("Provided password unlocks user record.");
+ return 1;
+ }
+
+ /* Similar, but test against the recovery keys */
+ r = user_record_test_recovery_key(h, secret);
+ if (r == -ENOKEY)
+ need_recovery_key = true;
+ else if (r == -ENXIO)
+ log_debug_errno(r, "User record has no recovery keys, plaintext passwords not tested against it.");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to validate the recovery key of the record: %m");
+ else {
+ log_info("Provided password is a recovery key that unlocks the user record.");
+ return 1;
+ }
+
+ if (need_password && need_recovery_key)
+ log_info("None of the supplied plaintext passwords unlock the user record's hashed passwords or recovery keys.");
+ else if (need_password)
+ log_info("None of the supplied plaintext passwords unlock the user record's hashed passwords.");
+ else
+ log_info("None of the supplied plaintext passwords unlock the user record's hashed recovery keys.");
+
+ /* Second, test cached PKCS#11 passwords */
+ for (size_t n = 0; n < h->n_pkcs11_encrypted_key; n++) {
+ char **pp;
+
+ STRV_FOREACH(pp, cache->pkcs11_passwords) {
+ r = test_password_one(h->pkcs11_encrypted_key[n].hashed_password, *pp);
+ if (r < 0)
+ return log_error_errno(r, "Failed to check supplied PKCS#11 password: %m");
+ if (r > 0) {
+ log_info("Previously acquired PKCS#11 password unlocks user record.");
+ return 1;
+ }
+ }
+ }
+
+ /* Third, test cached FIDO2 passwords */
+ for (size_t n = 0; n < h->n_fido2_hmac_salt; n++) {
+ char **pp;
+
+ /* See if any of the previously calculated passwords work */
+ STRV_FOREACH(pp, cache->fido2_passwords) {
+ r = test_password_one(h->fido2_hmac_salt[n].hashed_password, *pp);
+ if (r < 0)
+ return log_error_errno(r, "Failed to check supplied FIDO2 password: %m");
+ if (r > 0) {
+ log_info("Previously acquired FIDO2 password unlocks user record.");
+ return 0;
+ }
+ }
+ }
+
+ /* Fourth, let's see if any of the PKCS#11 security tokens are plugged in and help us */
+ for (size_t n = 0; n < h->n_pkcs11_encrypted_key; n++) {
+#if HAVE_P11KIT
+ _cleanup_(pkcs11_callback_data_release) struct pkcs11_callback_data data = {
+ .user_record = h,
+ .secret = secret,
+ .encrypted_key = h->pkcs11_encrypted_key + n,
+ };
+
+ r = pkcs11_find_token(data.encrypted_key->uri, pkcs11_callback, &data);
+ switch (r) {
+ case -EAGAIN:
+ need_token = true;
+ break;
+ case -ENOANO:
+ need_pin = true;
+ break;
+ case -ERFKILL:
+ need_protected_authentication_path_permitted = true;
+ break;
+ case -EOWNERDEAD:
+ pin_locked = true;
+ break;
+ case -ENOLCK:
+ pin_incorrect = true;
+ break;
+ case -ETOOMANYREFS:
+ pin_incorrect = pin_incorrect_few_tries_left = true;
+ break;
+ case -EUCLEAN:
+ pin_incorrect = pin_incorrect_few_tries_left = pin_incorrect_one_try_left = true;
+ break;
+ default:
+ if (r < 0)
+ return r;
+
+ r = test_password_one(data.encrypted_key->hashed_password, data.decrypted_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to test PKCS#11 password: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Configured PKCS#11 security token %s does not decrypt encrypted key correctly.", data.encrypted_key->uri);
+
+ log_info("Decrypted password from PKCS#11 security token %s unlocks user record.", data.encrypted_key->uri);
+
+ r = strv_extend(&cache->pkcs11_passwords, data.decrypted_password);
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+ }
+#else
+ need_token = true;
+ break;
+#endif
+ }
+
+ /* Fifth, let's see if any of the FIDO2 security tokens are plugged in and help us */
+ for (size_t n = 0; n < h->n_fido2_hmac_salt; n++) {
+#if HAVE_LIBFIDO2
+ _cleanup_(erase_and_freep) char *decrypted_password = NULL;
+
+ r = fido2_use_token(h, secret, h->fido2_hmac_salt + n, &decrypted_password);
+ switch (r) {
+ case -EAGAIN:
+ need_token = true;
+ break;
+ case -ENOANO:
+ need_pin = true;
+ break;
+ case -EOWNERDEAD:
+ pin_locked = true;
+ break;
+ case -ENOLCK:
+ pin_incorrect = true;
+ break;
+ case -EMEDIUMTYPE:
+ need_user_presence_permitted = true;
+ break;
+ case -ENOSTR:
+ token_action_timeout = true;
+ break;
+ default:
+ if (r < 0)
+ return r;
+
+ r = test_password_one(h->fido2_hmac_salt[n].hashed_password, decrypted_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to test FIDO2 password: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Configured FIDO2 security token does not decrypt encrypted key correctly.");
+
+ log_info("Decrypted password from FIDO2 security token unlocks user record.");
+
+ r = strv_extend(&cache->fido2_passwords, decrypted_password);
+ if (r < 0)
+ return log_oom();
+
+ return 1;
+ }
+#else
+ need_token = true;
+ break;
+#endif
+ }
+
+ /* Ordered by "relevance", i.e. the most "important" or "interesting" error condition is returned. */
+ if (pin_incorrect_one_try_left)
+ return -EUCLEAN;
+ if (pin_incorrect_few_tries_left)
+ return -ETOOMANYREFS;
+ if (pin_incorrect)
+ return -ENOLCK;
+ if (pin_locked)
+ return -EOWNERDEAD;
+ if (token_action_timeout)
+ return -ENOSTR;
+ if (need_protected_authentication_path_permitted)
+ return -ERFKILL;
+ if (need_user_presence_permitted)
+ return -EMEDIUMTYPE;
+ if (need_pin)
+ return -ENOANO;
+ if (need_token)
+ return -EBADSLT;
+ if (need_password)
+ return -ENOKEY;
+ if (need_recovery_key)
+ return -EREMOTEIO;
+
+ /* Hmm, this means neither PCKS#11/FIDO2 nor classic hashed passwords or recovery keys were supplied,
+ * we cannot authenticate this reasonably */
+ if (strict_verify)
+ return log_debug_errno(SYNTHETIC_ERRNO(EKEYREVOKED),
+ "No hashed passwords, no recovery keys and no PKCS#11/FIDO2 tokens defined, cannot authenticate user record, refusing.");
+
+ /* If strict verification is off this means we are possibly in the case where we encountered an
+ * unfixated record, i.e. a synthetic one that accordingly lacks any authentication data. In this
+ * case, allow the authentication to pass for now, so that the second (or third) authentication level
+ * (the ones of the user record in the LUKS header or inside the home directory) will then catch
+ * invalid passwords. The second/third authentication always runs in strict verification mode. */
+ log_debug("No hashed passwords, not recovery keys and no PKCS#11 tokens defined in record, cannot authenticate user record. "
+ "Deferring to embedded user record.");
+ return 0;
+}
+
+int home_setup_undo(HomeSetup *setup) {
+ int r = 0, q;
+
+ assert(setup);
+
+ if (setup->root_fd >= 0) {
+ if (setup->do_offline_fitrim) {
+ q = run_fitrim(setup->root_fd);
+ if (q < 0)
+ r = q;
+ }
+
+ setup->root_fd = safe_close(setup->root_fd);
+ }
+
+ if (setup->undo_mount) {
+ q = umount_verbose(LOG_DEBUG, "/run/systemd/user-home-mount", UMOUNT_NOFOLLOW);
+ if (q < 0)
+ r = q;
+ }
+
+ if (setup->undo_dm && setup->crypt_device && setup->dm_name) {
+ q = crypt_deactivate(setup->crypt_device, setup->dm_name);
+ if (q < 0)
+ r = q;
+ }
+
+ if (setup->image_fd >= 0) {
+ if (setup->do_offline_fallocate) {
+ q = run_fallocate(setup->image_fd, NULL);
+ if (q < 0)
+ r = q;
+ }
+
+ if (setup->do_mark_clean) {
+ q = run_mark_dirty(setup->image_fd, false);
+ if (q < 0)
+ r = q;
+ }
+
+ setup->image_fd = safe_close(setup->image_fd);
+ }
+
+ setup->undo_mount = false;
+ setup->undo_dm = false;
+ setup->do_offline_fitrim = false;
+ setup->do_offline_fallocate = false;
+ setup->do_mark_clean = false;
+
+ setup->dm_name = mfree(setup->dm_name);
+ setup->dm_node = mfree(setup->dm_node);
+
+ setup->loop = loop_device_unref(setup->loop);
+ crypt_free(setup->crypt_device);
+ setup->crypt_device = NULL;
+
+ explicit_bzero_safe(setup->volume_key, setup->volume_key_size);
+ setup->volume_key = mfree(setup->volume_key);
+ setup->volume_key_size = 0;
+
+ return r;
+}
+
+int home_prepare(
+ UserRecord *h,
+ bool already_activated,
+ PasswordCache *cache,
+ HomeSetup *setup,
+ UserRecord **ret_header_home) {
+
+ int r;
+
+ assert(h);
+ assert(setup);
+ assert(!setup->loop);
+ assert(!setup->crypt_device);
+ assert(setup->root_fd < 0);
+ assert(!setup->undo_dm);
+ assert(!setup->undo_mount);
+
+ /* Makes a home directory accessible (through the root_fd file descriptor, not by path!). */
+
+ switch (user_record_storage(h)) {
+
+ case USER_LUKS:
+ return home_prepare_luks(h, already_activated, NULL, cache, setup, ret_header_home);
+
+ case USER_SUBVOLUME:
+ case USER_DIRECTORY:
+ r = home_prepare_directory(h, already_activated, setup);
+ break;
+
+ case USER_FSCRYPT:
+ r = home_prepare_fscrypt(h, already_activated, cache, setup);
+ break;
+
+ case USER_CIFS:
+ r = home_prepare_cifs(h, already_activated, setup);
+ break;
+
+ default:
+ return log_error_errno(SYNTHETIC_ERRNO(ENOLINK), "Processing home directories of type '%s' currently not supported.", user_storage_to_string(user_record_storage(h)));
+ }
+
+ if (r < 0)
+ return r;
+
+ if (ret_header_home)
+ *ret_header_home = NULL;
+
+ return r;
+}
+
+int home_sync_and_statfs(int root_fd, struct statfs *ret) {
+ assert(root_fd >= 0);
+
+ /* Let's sync this to disk, so that the disk space reported by fstatfs() below is accurate (for file
+ * systems such as btrfs where this is determined lazily). */
+
+ if (syncfs(root_fd) < 0)
+ return log_error_errno(errno, "Failed to synchronize file system: %m");
+
+ if (ret)
+ if (fstatfs(root_fd, ret) < 0)
+ return log_error_errno(errno, "Failed to statfs() file system: %m");
+
+ log_info("Synchronized disk.");
+
+ return 0;
+}
+
+static int read_identity_file(int root_fd, JsonVariant **ret) {
+ _cleanup_(fclosep) FILE *identity_file = NULL;
+ _cleanup_close_ int identity_fd = -1;
+ unsigned line, column;
+ int r;
+
+ assert(root_fd >= 0);
+ assert(ret);
+
+ identity_fd = openat(root_fd, ".identity", O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK);
+ if (identity_fd < 0)
+ return log_error_errno(errno, "Failed to open .identity file in home directory: %m");
+
+ r = fd_verify_regular(identity_fd);
+ if (r < 0)
+ return log_error_errno(r, "Embedded identity file is not a regular file, refusing: %m");
+
+ identity_file = take_fdopen(&identity_fd, "r");
+ if (!identity_file)
+ return log_oom();
+
+ r = json_parse_file(identity_file, ".identity", JSON_PARSE_SENSITIVE, ret, &line, &column);
+ if (r < 0)
+ return log_error_errno(r, "[.identity:%u:%u] Failed to parse JSON data: %m", line, column);
+
+ log_info("Read embedded .identity file.");
+
+ return 0;
+}
+
+static int write_identity_file(int root_fd, JsonVariant *v, uid_t uid) {
+ _cleanup_(json_variant_unrefp) JsonVariant *normalized = NULL;
+ _cleanup_(fclosep) FILE *identity_file = NULL;
+ _cleanup_close_ int identity_fd = -1;
+ _cleanup_free_ char *fn = NULL;
+ int r;
+
+ assert(root_fd >= 0);
+ assert(v);
+
+ normalized = json_variant_ref(v);
+
+ r = json_variant_normalize(&normalized);
+ if (r < 0)
+ log_warning_errno(r, "Failed to normalize user record, ignoring: %m");
+
+ r = tempfn_random(".identity", NULL, &fn);
+ if (r < 0)
+ return r;
+
+ identity_fd = openat(root_fd, fn, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0600);
+ if (identity_fd < 0)
+ return log_error_errno(errno, "Failed to create .identity file in home directory: %m");
+
+ identity_file = take_fdopen(&identity_fd, "w");
+ if (!identity_file) {
+ r = log_oom();
+ goto fail;
+ }
+
+ json_variant_dump(normalized, JSON_FORMAT_PRETTY, identity_file, NULL);
+
+ r = fflush_and_check(identity_file);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write .identity file: %m");
+ goto fail;
+ }
+
+ if (fchown(fileno(identity_file), uid, uid) < 0) {
+ log_error_errno(r, "Failed to change ownership of identity file: %m");
+ goto fail;
+ }
+
+ if (renameat(root_fd, fn, root_fd, ".identity") < 0) {
+ r = log_error_errno(errno, "Failed to move identity file into place: %m");
+ goto fail;
+ }
+
+ log_info("Wrote embedded .identity file.");
+
+ return 0;
+
+fail:
+ (void) unlinkat(root_fd, fn, 0);
+ return r;
+}
+
+int home_load_embedded_identity(
+ UserRecord *h,
+ int root_fd,
+ UserRecord *header_home,
+ UserReconcileMode mode,
+ PasswordCache *cache,
+ UserRecord **ret_embedded_home,
+ UserRecord **ret_new_home) {
+
+ _cleanup_(user_record_unrefp) UserRecord *embedded_home = NULL, *intermediate_home = NULL, *new_home = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ int r;
+
+ assert(h);
+ assert(root_fd >= 0);
+
+ r = read_identity_file(root_fd, &v);
+ if (r < 0)
+ return r;
+
+ embedded_home = user_record_new();
+ if (!embedded_home)
+ return log_oom();
+
+ r = user_record_load(embedded_home, v, USER_RECORD_LOAD_EMBEDDED);
+ if (r < 0)
+ return r;
+
+ if (!user_record_compatible(h, embedded_home))
+ return log_error_errno(SYNTHETIC_ERRNO(EREMCHG), "Embedded home record not compatible with host record, refusing.");
+
+ /* Insist that credentials the user supplies also unlocks any embedded records. */
+ r = user_record_authenticate(embedded_home, h, cache, /* strict_verify= */ true);
+ if (r < 0)
+ return r;
+ assert(r > 0); /* Insist that a password was verified */
+
+ /* At this point we have three records to deal with:
+ *
+ * · The record we got passed from the host
+ * · The record included in the LUKS header (only if LUKS is used)
+ * · The record in the home directory itself (~.identity)
+ *
+ * Now we have to reconcile all three, and let the newest one win. */
+
+ if (header_home) {
+ /* Note we relax the requirements here. Instead of insisting that the host record is strictly
+ * newer, let's also be OK if its equally new. If it is, we'll however insist that the
+ * embedded record must be newer, so that we update at least one of the two. */
+
+ r = user_record_reconcile(h, header_home, mode == USER_RECONCILE_REQUIRE_NEWER ? USER_RECONCILE_REQUIRE_NEWER_OR_EQUAL : mode, &intermediate_home);
+ if (r == -EREMCHG) /* this was supposed to be checked earlier already, but let's check this again */
+ return log_error_errno(r, "Identity stored on host and in header don't match, refusing.");
+ if (r == -ESTALE)
+ return log_error_errno(r, "Embedded identity record is newer than supplied record, refusing.");
+ if (r < 0)
+ return log_error_errno(r, "Failed to reconcile host and header identities: %m");
+ if (r == USER_RECONCILE_EMBEDDED_WON)
+ log_info("Reconciling header user identity completed (header version was newer).");
+ else if (r == USER_RECONCILE_HOST_WON) {
+ log_info("Reconciling header user identity completed (host version was newer).");
+
+ if (mode == USER_RECONCILE_REQUIRE_NEWER) /* Host version is newer than the header
+ * version, hence we'll update
+ * something. This means we can relax the
+ * requirements on the embedded
+ * identity. */
+ mode = USER_RECONCILE_REQUIRE_NEWER_OR_EQUAL;
+ } else {
+ assert(r == USER_RECONCILE_IDENTICAL);
+ log_info("Reconciling user identities completed (host and header version were identical).");
+ }
+
+ h = intermediate_home;
+ }
+
+ r = user_record_reconcile(h, embedded_home, mode, &new_home);
+ if (r == -EREMCHG)
+ return log_error_errno(r, "Identity stored on host and in home don't match, refusing.");
+ if (r == -ESTALE)
+ return log_error_errno(r, "Embedded identity record is equally new or newer than supplied record, refusing.");
+ if (r < 0)
+ return log_error_errno(r, "Failed to reconcile host and embedded identities: %m");
+ if (r == USER_RECONCILE_EMBEDDED_WON)
+ log_info("Reconciling embedded user identity completed (embedded version was newer).");
+ else if (r == USER_RECONCILE_HOST_WON)
+ log_info("Reconciling embedded user identity completed (host version was newer).");
+ else {
+ assert(r == USER_RECONCILE_IDENTICAL);
+ log_info("Reconciling embedded user identity completed (host and embedded version were identical).");
+ }
+
+ if (ret_embedded_home)
+ *ret_embedded_home = TAKE_PTR(embedded_home);
+
+ if (ret_new_home)
+ *ret_new_home = TAKE_PTR(new_home);
+
+ return 0;
+}
+
+int home_store_embedded_identity(UserRecord *h, int root_fd, uid_t uid, UserRecord *old_home) {
+ _cleanup_(user_record_unrefp) UserRecord *embedded = NULL;
+ int r;
+
+ assert(h);
+ assert(root_fd >= 0);
+ assert(uid_is_valid(uid));
+
+ r = user_record_clone(h, USER_RECORD_EXTRACT_EMBEDDED, &embedded);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine new embedded record: %m");
+
+ if (old_home && user_record_equal(old_home, embedded)) {
+ log_debug("Not updating embedded home record.");
+ return 0;
+ }
+
+ /* The identity has changed, let's update it in the image */
+ r = write_identity_file(root_fd, embedded->json, h->uid);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static const char *file_system_type_fd(int fd) {
+ struct statfs sfs;
+
+ assert(fd >= 0);
+
+ if (fstatfs(fd, &sfs) < 0) {
+ log_debug_errno(errno, "Failed to statfs(): %m");
+ return NULL;
+ }
+
+ if (is_fs_type(&sfs, XFS_SB_MAGIC))
+ return "xfs";
+ if (is_fs_type(&sfs, EXT4_SUPER_MAGIC))
+ return "ext4";
+ if (is_fs_type(&sfs, BTRFS_SUPER_MAGIC))
+ return "btrfs";
+
+ return NULL;
+}
+
+int home_extend_embedded_identity(UserRecord *h, UserRecord *used, HomeSetup *setup) {
+ int r;
+
+ assert(h);
+ assert(used);
+ assert(setup);
+
+ r = user_record_add_binding(
+ h,
+ user_record_storage(used),
+ user_record_image_path(used),
+ setup->found_partition_uuid,
+ setup->found_luks_uuid,
+ setup->found_fs_uuid,
+ setup->crypt_device ? crypt_get_cipher(setup->crypt_device) : NULL,
+ setup->crypt_device ? crypt_get_cipher_mode(setup->crypt_device) : NULL,
+ setup->crypt_device ? luks_volume_key_size_convert(setup->crypt_device) : UINT64_MAX,
+ file_system_type_fd(setup->root_fd),
+ user_record_home_directory(used),
+ used->uid,
+ (gid_t) used->uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update binding in record: %m");
+
+ return 0;
+}
+
+static int chown_recursive_directory(int root_fd, uid_t uid) {
+ int r;
+
+ assert(root_fd >= 0);
+ assert(uid_is_valid(uid));
+
+ r = fd_chown_recursive(root_fd, uid, (gid_t) uid, 0777);
+ if (r < 0)
+ return log_error_errno(r, "Failed to change ownership of files and directories: %m");
+ if (r == 0)
+ log_info("Recursive changing of ownership not necessary, skipped.");
+ else
+ log_info("Recursive changing of ownership completed.");
+
+ return 0;
+}
+
+int home_refresh(
+ UserRecord *h,
+ HomeSetup *setup,
+ UserRecord *header_home,
+ PasswordCache *cache,
+ struct statfs *ret_statfs,
+ UserRecord **ret_new_home) {
+
+ _cleanup_(user_record_unrefp) UserRecord *embedded_home = NULL, *new_home = NULL;
+ int r;
+
+ assert(h);
+ assert(setup);
+ assert(ret_new_home);
+
+ /* When activating a home directory, does the identity work: loads the identity from the $HOME
+ * directory, reconciles it with our idea, chown()s everything. */
+
+ r = home_load_embedded_identity(h, setup->root_fd, header_home, USER_RECONCILE_ANY, cache, &embedded_home, &new_home);
+ if (r < 0)
+ return r;
+
+ r = home_store_header_identity_luks(new_home, setup, header_home);
+ if (r < 0)
+ return r;
+
+ r = home_store_embedded_identity(new_home, setup->root_fd, h->uid, embedded_home);
+ if (r < 0)
+ return r;
+
+ r = chown_recursive_directory(setup->root_fd, h->uid);
+ if (r < 0)
+ return r;
+
+ r = home_sync_and_statfs(setup->root_fd, ret_statfs);
+ if (r < 0)
+ return r;
+
+ *ret_new_home = TAKE_PTR(new_home);
+ return 0;
+}
+
+static int home_activate(UserRecord *h, UserRecord **ret_home) {
+ _cleanup_(password_cache_free) PasswordCache cache = {};
+ _cleanup_(user_record_unrefp) UserRecord *new_home = NULL;
+ int r;
+
+ assert(h);
+
+ if (!h->user_name)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "User record lacks user name, refusing.");
+ if (!uid_is_valid(h->uid))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "User record lacks UID, refusing.");
+ if (!IN_SET(user_record_storage(h), USER_LUKS, USER_DIRECTORY, USER_SUBVOLUME, USER_FSCRYPT, USER_CIFS))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTTY), "Activating home directories of type '%s' currently not supported.", user_storage_to_string(user_record_storage(h)));
+
+ r = user_record_authenticate(h, h, &cache, /* strict_verify= */ false);
+ if (r < 0)
+ return r;
+
+ r = user_record_test_home_directory_and_warn(h);
+ if (r < 0)
+ return r;
+ if (r == USER_TEST_MOUNTED)
+ return log_error_errno(SYNTHETIC_ERRNO(EALREADY), "Home directory %s is already mounted, refusing.", user_record_home_directory(h));
+
+ r = user_record_test_image_path_and_warn(h);
+ if (r < 0)
+ return r;
+ if (r == USER_TEST_ABSENT)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Image path %s is missing, refusing.", user_record_image_path(h));
+
+ switch (user_record_storage(h)) {
+
+ case USER_LUKS:
+ r = home_activate_luks(h, &cache, &new_home);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case USER_SUBVOLUME:
+ case USER_DIRECTORY:
+ case USER_FSCRYPT:
+ r = home_activate_directory(h, &cache, &new_home);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case USER_CIFS:
+ r = home_activate_cifs(h, &cache, &new_home);
+ if (r < 0)
+ return r;
+
+ break;
+
+ default:
+ assert_not_reached("unexpected type");
+ }
+
+ /* Note that the returned object might either be a reference to an updated version of the existing
+ * home object, or a reference to a newly allocated home object. The caller has to be able to deal
+ * with both, and consider the old object out-of-date. */
+ if (user_record_equal(h, new_home)) {
+ *ret_home = NULL;
+ return 0; /* no identity change */
+ }
+
+ *ret_home = TAKE_PTR(new_home);
+ return 1; /* identity updated */
+}
+
+static int home_deactivate(UserRecord *h, bool force) {
+ bool done = false;
+ int r;
+
+ assert(h);
+
+ if (!h->user_name)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "User record incomplete, refusing.");
+ if (!IN_SET(user_record_storage(h), USER_LUKS, USER_DIRECTORY, USER_SUBVOLUME, USER_FSCRYPT, USER_CIFS))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTTY), "Deactivating home directories of type '%s' currently not supported.", user_storage_to_string(user_record_storage(h)));
+
+ r = user_record_test_home_directory_and_warn(h);
+ if (r < 0)
+ return r;
+ if (r == USER_TEST_MOUNTED) {
+ if (user_record_storage(h) == USER_LUKS) {
+ r = home_trim_luks(h);
+ if (r < 0)
+ return r;
+ }
+
+ if (umount2(user_record_home_directory(h), UMOUNT_NOFOLLOW | (force ? MNT_FORCE|MNT_DETACH : 0)) < 0)
+ return log_error_errno(errno, "Failed to unmount %s: %m", user_record_home_directory(h));
+
+ log_info("Unmounting completed.");
+ done = true;
+ } else
+ log_info("Directory %s is already unmounted.", user_record_home_directory(h));
+
+ if (user_record_storage(h) == USER_LUKS) {
+ r = home_deactivate_luks(h);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ done = true;
+ }
+
+ if (!done)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOEXEC), "Home is not active.");
+
+ log_info("Everything completed.");
+ return 0;
+}
+
+static int copy_skel(int root_fd, const char *skel) {
+ int r;
+
+ assert(root_fd >= 0);
+
+ r = copy_tree_at(AT_FDCWD, skel, root_fd, ".", UID_INVALID, GID_INVALID, COPY_MERGE|COPY_REPLACE);
+ if (r == -ENOENT) {
+ log_info("Skeleton directory %s missing, ignoring.", skel);
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to copy in %s: %m", skel);
+
+ log_info("Copying in %s completed.", skel);
+ return 0;
+}
+
+static int change_access_mode(int root_fd, mode_t m) {
+ assert(root_fd >= 0);
+
+ if (fchmod(root_fd, m) < 0)
+ return log_error_errno(errno, "Failed to change access mode of top-level directory: %m");
+
+ log_info("Changed top-level directory access mode to 0%o.", m);
+ return 0;
+}
+
+int home_populate(UserRecord *h, int dir_fd) {
+ int r;
+
+ assert(h);
+ assert(dir_fd >= 0);
+
+ r = copy_skel(dir_fd, user_record_skeleton_directory(h));
+ if (r < 0)
+ return r;
+
+ r = home_store_embedded_identity(h, dir_fd, h->uid, NULL);
+ if (r < 0)
+ return r;
+
+ r = chown_recursive_directory(dir_fd, h->uid);
+ if (r < 0)
+ return r;
+
+ r = change_access_mode(dir_fd, user_record_access_mode(h));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int user_record_compile_effective_passwords(
+ UserRecord *h,
+ PasswordCache *cache,
+ char ***ret_effective_passwords) {
+
+ _cleanup_(strv_free_erasep) char **effective = NULL;
+ size_t n;
+ char **i;
+ int r;
+
+ assert(h);
+ assert(cache);
+
+ /* We insist on at least one classic hashed password to be defined in addition to any PKCS#11 one, as
+ * a safe fallback, but also to simplify the password changing algorithm: there we require providing
+ * the old literal password only (and do not care for the old PKCS#11 token) */
+
+ if (strv_isempty(h->hashed_password))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "User record has no hashed passwords, refusing.");
+
+ /* Generates the list of plaintext passwords to propagate to LUKS/fscrypt devices, and checks whether
+ * we have a plaintext password for each hashed one. If we are missing one we'll fail, since we
+ * couldn't sync fscrypt/LUKS to the login account properly. */
+
+ STRV_FOREACH(i, h->hashed_password) {
+ bool found = false;
+ char **j;
+
+ log_debug("Looking for plaintext password for: %s", *i);
+
+ /* Let's scan all provided plaintext passwords */
+ STRV_FOREACH(j, h->password) {
+ r = test_password_one(*i, *j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to test plaintext password: %m");
+ if (r > 0) {
+ if (ret_effective_passwords) {
+ r = strv_extend(&effective, *j);
+ if (r < 0)
+ return log_oom();
+ }
+
+ log_debug("Found literal plaintext password.");
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOKEY), "Missing plaintext password for defined hashed password");
+ }
+
+ for (n = 0; n < h->n_recovery_key; n++) {
+ bool found = false;
+ char **j;
+
+ log_debug("Looking for plaintext recovery key for: %s", h->recovery_key[n].hashed_password);
+
+ STRV_FOREACH(j, h->password) {
+ _cleanup_(erase_and_freep) char *mangled = NULL;
+ const char *p;
+
+ if (streq(h->recovery_key[n].type, "modhex64")) {
+
+ r = normalize_recovery_key(*j, &mangled);
+ if (r == -EINVAL) /* Not properly formatted, probably a regular password. */
+ continue;
+ if (r < 0)
+ return log_error_errno(r, "Failed to normalize recovery key: %m");
+
+ p = mangled;
+ } else
+ p = *j;
+
+ r = test_password_one(h->recovery_key[n].hashed_password, p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to test plaintext recovery key: %m");
+ if (r > 0) {
+ if (ret_effective_passwords) {
+ r = strv_extend(&effective, p);
+ if (r < 0)
+ return log_oom();
+ }
+
+ log_debug("Found plaintext recovery key.");
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ return log_error_errno(SYNTHETIC_ERRNO(EREMOTEIO), "Missing plaintext recovery key for defined recovery key");
+ }
+
+ for (n = 0; n < h->n_pkcs11_encrypted_key; n++) {
+#if HAVE_P11KIT
+ _cleanup_(pkcs11_callback_data_release) struct pkcs11_callback_data data = {
+ .user_record = h,
+ .secret = h,
+ .encrypted_key = h->pkcs11_encrypted_key + n,
+ };
+
+ r = pkcs11_find_token(data.encrypted_key->uri, pkcs11_callback, &data);
+ if (r == -EAGAIN)
+ return -EBADSLT;
+ if (r < 0)
+ return r;
+
+ r = test_password_one(data.encrypted_key->hashed_password, data.decrypted_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to test PKCS#11 password: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Decrypted password from token is not correct, refusing.");
+
+ if (ret_effective_passwords) {
+ r = strv_extend(&effective, data.decrypted_password);
+ if (r < 0)
+ return log_oom();
+ }
+
+ r = strv_extend(&cache->pkcs11_passwords, data.decrypted_password);
+ if (r < 0)
+ return log_oom();
+#else
+ return -EBADSLT;
+#endif
+ }
+
+ for (n = 0; n < h->n_fido2_hmac_salt; n++) {
+#if HAVE_LIBFIDO2
+ _cleanup_(erase_and_freep) char *decrypted_password = NULL;
+
+ r = fido2_use_token(h, h, h->fido2_hmac_salt + n, &decrypted_password);
+ if (r < 0)
+ return r;
+
+ r = test_password_one(h->fido2_hmac_salt[n].hashed_password, decrypted_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to test FIDO2 password: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Decrypted password from token is not correct, refusing.");
+
+ if (ret_effective_passwords) {
+ r = strv_extend(&effective, decrypted_password);
+ if (r < 0)
+ return log_oom();
+ }
+
+ r = strv_extend(&cache->fido2_passwords, decrypted_password);
+ if (r < 0)
+ return log_oom();
+#else
+ return -EBADSLT;
+#endif
+ }
+
+ if (ret_effective_passwords)
+ *ret_effective_passwords = TAKE_PTR(effective);
+
+ return 0;
+}
+
+static int determine_default_storage(UserStorage *ret) {
+ UserStorage storage = _USER_STORAGE_INVALID;
+ const char *e;
+ int r;
+
+ assert(ret);
+
+ /* homed tells us via an environment variable which default storage to use */
+ e = getenv("SYSTEMD_HOME_DEFAULT_STORAGE");
+ if (e) {
+ storage = user_storage_from_string(e);
+ if (storage < 0)
+ log_warning("$SYSTEMD_HOME_DEFAULT_STORAGE set to invalid storage type, ignoring: %s", e);
+ else {
+ log_info("Using configured default storage '%s'.", user_storage_to_string(storage));
+ *ret = storage;
+ return 0;
+ }
+ }
+
+ /* When neither user nor admin specified the storage type to use, fix it to be LUKS — unless we run
+ * in a container where loopback devices and LUKS/DM are not available. Also, if /home is encrypted
+ * anyway, let's avoid duplicate encryption. Note that we typically default to the assumption of
+ * "classic" storage for most operations. However, if we create a new home, then let's user LUKS if
+ * nothing is specified. */
+
+ r = detect_container();
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether we are in a container: %m");
+ if (r == 0) {
+ r = path_is_encrypted("/home");
+ if (r < 0)
+ log_warning_errno(r, "Failed to determine if /home is encrypted, ignoring: %m");
+ if (r <= 0) {
+ log_info("Using automatic default storage of '%s'.", user_storage_to_string(USER_LUKS));
+ *ret = USER_LUKS;
+ return 0;
+ }
+
+ log_info("/home is encrypted, not using '%s' storage, in order to avoid double encryption.", user_storage_to_string(USER_LUKS));
+ } else
+ log_info("Running in container, not using '%s' storage.", user_storage_to_string(USER_LUKS));
+
+ r = path_is_fs_type("/home", BTRFS_SUPER_MAGIC);
+ if (r < 0)
+ log_warning_errno(r, "Failed to determine file system of /home, ignoring: %m");
+ if (r > 0) {
+ log_info("/home is on btrfs, using '%s' as storage.", user_storage_to_string(USER_SUBVOLUME));
+ *ret = USER_SUBVOLUME;
+ } else {
+ log_info("/home is on simple file system, using '%s' as storage.", user_storage_to_string(USER_DIRECTORY));
+ *ret = USER_DIRECTORY;
+ }
+
+ return 0;
+}
+
+static int home_create(UserRecord *h, UserRecord **ret_home) {
+ _cleanup_(strv_free_erasep) char **effective_passwords = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *new_home = NULL;
+ _cleanup_(password_cache_free) PasswordCache cache = {};
+ UserStorage new_storage = _USER_STORAGE_INVALID;
+ const char *new_fs = NULL;
+ int r;
+
+ assert(h);
+
+ if (!h->user_name)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "User record lacks name, refusing.");
+ if (!uid_is_valid(h->uid))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "User record lacks UID, refusing.");
+
+ r = user_record_compile_effective_passwords(h, &cache, &effective_passwords);
+ if (r < 0)
+ return r;
+
+ r = user_record_test_home_directory_and_warn(h);
+ if (r < 0)
+ return r;
+ if (r != USER_TEST_ABSENT)
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Home directory %s already exists, refusing.", user_record_home_directory(h));
+
+ if (h->storage < 0) {
+ r = determine_default_storage(&new_storage);
+ if (r < 0)
+ return r;
+ }
+
+ if ((h->storage == USER_LUKS ||
+ (h->storage < 0 && new_storage == USER_LUKS)) &&
+ !h->file_system_type)
+ new_fs = getenv("SYSTEMD_HOME_DEFAULT_FILE_SYSTEM_TYPE");
+
+ if (new_storage >= 0 || new_fs) {
+ r = user_record_add_binding(
+ h,
+ new_storage,
+ NULL,
+ SD_ID128_NULL,
+ SD_ID128_NULL,
+ SD_ID128_NULL,
+ NULL,
+ NULL,
+ UINT64_MAX,
+ new_fs,
+ NULL,
+ UID_INVALID,
+ GID_INVALID);
+ if (r < 0)
+ return log_error_errno(r, "Failed to change storage type to LUKS: %m");
+ }
+
+ r = user_record_test_image_path_and_warn(h);
+ if (r < 0)
+ return r;
+ if (!IN_SET(r, USER_TEST_ABSENT, USER_TEST_UNDEFINED, USER_TEST_MAYBE))
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Image path %s already exists, refusing.", user_record_image_path(h));
+
+ switch (user_record_storage(h)) {
+
+ case USER_LUKS:
+ r = home_create_luks(h, &cache, effective_passwords, &new_home);
+ break;
+
+ case USER_DIRECTORY:
+ case USER_SUBVOLUME:
+ r = home_create_directory_or_subvolume(h, &new_home);
+ break;
+
+ case USER_FSCRYPT:
+ r = home_create_fscrypt(h, effective_passwords, &new_home);
+ break;
+
+ case USER_CIFS:
+ r = home_create_cifs(h, &new_home);
+ break;
+
+ default:
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTTY),
+ "Creating home directories of type '%s' currently not supported.", user_storage_to_string(user_record_storage(h)));
+ }
+ if (r < 0)
+ return r;
+
+ if (user_record_equal(h, new_home)) {
+ *ret_home = NULL;
+ return 0;
+ }
+
+ *ret_home = TAKE_PTR(new_home);
+ return 1;
+}
+
+static int home_remove(UserRecord *h) {
+ bool deleted = false;
+ const char *ip, *hd;
+ int r;
+
+ assert(h);
+
+ if (!h->user_name)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "User record lacks user name, refusing.");
+ if (!IN_SET(user_record_storage(h), USER_LUKS, USER_DIRECTORY, USER_SUBVOLUME, USER_FSCRYPT, USER_CIFS))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTTY), "Removing home directories of type '%s' currently not supported.", user_storage_to_string(user_record_storage(h)));
+
+ hd = user_record_home_directory(h);
+
+ r = user_record_test_home_directory_and_warn(h);
+ if (r < 0)
+ return r;
+ if (r == USER_TEST_MOUNTED)
+ return log_error_errno(SYNTHETIC_ERRNO(EBUSY), "Directory %s is still mounted, refusing.", hd);
+
+ assert(hd);
+
+ r = user_record_test_image_path_and_warn(h);
+ if (r < 0)
+ return r;
+
+ ip = user_record_image_path(h);
+
+ switch (user_record_storage(h)) {
+
+ case USER_LUKS: {
+ struct stat st;
+
+ assert(ip);
+
+ if (stat(ip, &st) < 0) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to stat() %s: %m", ip);
+
+ } else {
+ if (S_ISREG(st.st_mode)) {
+ if (unlink(ip) < 0) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to remove %s: %m", ip);
+ } else
+ deleted = true;
+
+ } else if (S_ISBLK(st.st_mode))
+ log_info("Not removing file system on block device %s.", ip);
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), "Image file %s is neither block device, nor regular, refusing removal.", ip);
+ }
+
+ break;
+ }
+
+ case USER_SUBVOLUME:
+ case USER_DIRECTORY:
+ case USER_FSCRYPT:
+ assert(ip);
+
+ r = rm_rf(ip, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return log_warning_errno(r, "Failed to remove %s: %m", ip);
+ } else
+ deleted = true;
+
+ /* If the image path and the home directory are the same invalidate the home directory, so
+ * that we don't remove it anymore */
+ if (path_equal(ip, hd))
+ hd = NULL;
+
+ break;
+
+ case USER_CIFS:
+ /* Nothing else to do here: we won't remove remote stuff. */
+ log_info("Not removing home directory on remote server.");
+ break;
+
+ default:
+ assert_not_reached("unknown storage type");
+ }
+
+ if (hd) {
+ if (rmdir(hd) < 0) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to remove %s, ignoring: %m", hd);
+ } else
+ deleted = true;
+ }
+
+ if (deleted)
+ log_info("Everything completed.");
+ else
+ return log_notice_errno(SYNTHETIC_ERRNO(EALREADY),
+ "Nothing to remove.");
+
+ return 0;
+}
+
+static int home_validate_update(UserRecord *h, HomeSetup *setup) {
+ bool has_mount = false;
+ int r;
+
+ assert(h);
+ assert(setup);
+
+ if (!h->user_name)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "User record lacks user name, refusing.");
+ if (!uid_is_valid(h->uid))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "User record lacks UID, refusing.");
+ if (!IN_SET(user_record_storage(h), USER_LUKS, USER_DIRECTORY, USER_SUBVOLUME, USER_FSCRYPT, USER_CIFS))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTTY), "Processing home directories of type '%s' currently not supported.", user_storage_to_string(user_record_storage(h)));
+
+ r = user_record_test_home_directory_and_warn(h);
+ if (r < 0)
+ return r;
+
+ has_mount = r == USER_TEST_MOUNTED;
+
+ r = user_record_test_image_path_and_warn(h);
+ if (r < 0)
+ return r;
+ if (r == USER_TEST_ABSENT)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Image path %s does not exist", user_record_image_path(h));
+
+ switch (user_record_storage(h)) {
+
+ case USER_DIRECTORY:
+ case USER_SUBVOLUME:
+ case USER_FSCRYPT:
+ case USER_CIFS:
+ break;
+
+ case USER_LUKS: {
+ r = home_validate_update_luks(h, setup);
+ if (r < 0)
+ return r;
+ if ((r > 0) != has_mount)
+ return log_error_errno(SYNTHETIC_ERRNO(EBUSY), "Home mount incompletely set up.");
+
+ break;
+ }
+
+ default:
+ assert_not_reached("unexpected storage type");
+ }
+
+ return has_mount; /* return true if the home record is already active */
+}
+
+static int home_update(UserRecord *h, UserRecord **ret) {
+ _cleanup_(user_record_unrefp) UserRecord *new_home = NULL, *header_home = NULL, *embedded_home = NULL;
+ _cleanup_(home_setup_undo) HomeSetup setup = HOME_SETUP_INIT;
+ _cleanup_(password_cache_free) PasswordCache cache = {};
+ bool already_activated = false;
+ int r;
+
+ assert(h);
+ assert(ret);
+
+ r = user_record_authenticate(h, h, &cache, /* strict_verify= */ true);
+ if (r < 0)
+ return r;
+ assert(r > 0); /* Insist that a password was verified */
+
+ r = home_validate_update(h, &setup);
+ if (r < 0)
+ return r;
+
+ already_activated = r > 0;
+
+ r = home_prepare(h, already_activated, &cache, &setup, &header_home);
+ if (r < 0)
+ return r;
+
+ r = home_load_embedded_identity(h, setup.root_fd, header_home, USER_RECONCILE_REQUIRE_NEWER, &cache, &embedded_home, &new_home);
+ if (r < 0)
+ return r;
+
+ r = home_store_header_identity_luks(new_home, &setup, header_home);
+ if (r < 0)
+ return r;
+
+ r = home_store_embedded_identity(new_home, setup.root_fd, h->uid, embedded_home);
+ if (r < 0)
+ return r;
+
+ r = home_extend_embedded_identity(new_home, h, &setup);
+ if (r < 0)
+ return r;
+
+ r = home_sync_and_statfs(setup.root_fd, NULL);
+ if (r < 0)
+ return r;
+
+ r = home_setup_undo(&setup);
+ if (r < 0)
+ return r;
+
+ log_info("Everything completed.");
+
+ *ret = TAKE_PTR(new_home);
+ return 0;
+}
+
+static int home_resize(UserRecord *h, UserRecord **ret) {
+ _cleanup_(home_setup_undo) HomeSetup setup = HOME_SETUP_INIT;
+ _cleanup_(password_cache_free) PasswordCache cache = {};
+ bool already_activated = false;
+ int r;
+
+ assert(h);
+ assert(ret);
+
+ if (h->disk_size == UINT64_MAX)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No target size specified, refusing.");
+
+ r = user_record_authenticate(h, h, &cache, /* strict_verify= */ true);
+ if (r < 0)
+ return r;
+ assert(r > 0); /* Insist that a password was verified */
+
+ r = home_validate_update(h, &setup);
+ if (r < 0)
+ return r;
+
+ already_activated = r > 0;
+
+ switch (user_record_storage(h)) {
+
+ case USER_LUKS:
+ return home_resize_luks(h, already_activated, &cache, &setup, ret);
+
+ case USER_DIRECTORY:
+ case USER_SUBVOLUME:
+ case USER_FSCRYPT:
+ return home_resize_directory(h, already_activated, &cache, &setup, ret);
+
+ default:
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTTY), "Resizing home directories of type '%s' currently not supported.", user_storage_to_string(user_record_storage(h)));
+ }
+}
+
+static int home_passwd(UserRecord *h, UserRecord **ret_home) {
+ _cleanup_(user_record_unrefp) UserRecord *header_home = NULL, *embedded_home = NULL, *new_home = NULL;
+ _cleanup_(strv_free_erasep) char **effective_passwords = NULL;
+ _cleanup_(home_setup_undo) HomeSetup setup = HOME_SETUP_INIT;
+ _cleanup_(password_cache_free) PasswordCache cache = {};
+ bool already_activated = false;
+ int r;
+
+ assert(h);
+ assert(ret_home);
+
+ if (!IN_SET(user_record_storage(h), USER_LUKS, USER_DIRECTORY, USER_SUBVOLUME, USER_FSCRYPT))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTTY), "Changing password of home directories of type '%s' currently not supported.", user_storage_to_string(user_record_storage(h)));
+
+ r = user_record_compile_effective_passwords(h, &cache, &effective_passwords);
+ if (r < 0)
+ return r;
+
+ r = home_validate_update(h, &setup);
+ if (r < 0)
+ return r;
+
+ already_activated = r > 0;
+
+ r = home_prepare(h, already_activated, &cache, &setup, &header_home);
+ if (r < 0)
+ return r;
+
+ r = home_load_embedded_identity(h, setup.root_fd, header_home, USER_RECONCILE_REQUIRE_NEWER_OR_EQUAL, &cache, &embedded_home, &new_home);
+ if (r < 0)
+ return r;
+
+ switch (user_record_storage(h)) {
+
+ case USER_LUKS:
+ r = home_passwd_luks(h, &setup, &cache, effective_passwords);
+ if (r < 0)
+ return r;
+ break;
+
+ case USER_FSCRYPT:
+ r = home_passwd_fscrypt(h, &setup, &cache, effective_passwords);
+ if (r < 0)
+ return r;
+ break;
+
+ default:
+ break;
+ }
+
+ r = home_store_header_identity_luks(new_home, &setup, header_home);
+ if (r < 0)
+ return r;
+
+ r = home_store_embedded_identity(new_home, setup.root_fd, h->uid, embedded_home);
+ if (r < 0)
+ return r;
+
+ r = home_extend_embedded_identity(new_home, h, &setup);
+ if (r < 0)
+ return r;
+
+ r = home_sync_and_statfs(setup.root_fd, NULL);
+ if (r < 0)
+ return r;
+
+ r = home_setup_undo(&setup);
+ if (r < 0)
+ return r;
+
+ log_info("Everything completed.");
+
+ *ret_home = TAKE_PTR(new_home);
+ return 1;
+}
+
+static int home_inspect(UserRecord *h, UserRecord **ret_home) {
+ _cleanup_(user_record_unrefp) UserRecord *header_home = NULL, *new_home = NULL;
+ _cleanup_(home_setup_undo) HomeSetup setup = HOME_SETUP_INIT;
+ _cleanup_(password_cache_free) PasswordCache cache = {};
+ bool already_activated = false;
+ int r;
+
+ assert(h);
+ assert(ret_home);
+
+ r = user_record_authenticate(h, h, &cache, /* strict_verify= */ false);
+ if (r < 0)
+ return r;
+
+ r = home_validate_update(h, &setup);
+ if (r < 0)
+ return r;
+
+ already_activated = r > 0;
+
+ r = home_prepare(h, already_activated, &cache, &setup, &header_home);
+ if (r < 0)
+ return r;
+
+ r = home_load_embedded_identity(h, setup.root_fd, header_home, USER_RECONCILE_ANY, &cache, NULL, &new_home);
+ if (r < 0)
+ return r;
+
+ r = home_extend_embedded_identity(new_home, h, &setup);
+ if (r < 0)
+ return r;
+
+ r = home_setup_undo(&setup);
+ if (r < 0)
+ return r;
+
+ log_info("Everything completed.");
+
+ *ret_home = TAKE_PTR(new_home);
+ return 1;
+}
+
+static int home_lock(UserRecord *h) {
+ int r;
+
+ assert(h);
+
+ if (!h->user_name)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "User record incomplete, refusing.");
+ if (user_record_storage(h) != USER_LUKS)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTTY), "Locking home directories of type '%s' currently not supported.", user_storage_to_string(user_record_storage(h)));
+
+ r = user_record_test_home_directory_and_warn(h);
+ if (r < 0)
+ return r;
+ if (r != USER_TEST_MOUNTED)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOEXEC), "Home directory of %s is not mounted, can't lock.", h->user_name);
+
+ r = home_lock_luks(h);
+ if (r < 0)
+ return r;
+
+ log_info("Everything completed.");
+ return 1;
+}
+
+static int home_unlock(UserRecord *h) {
+ _cleanup_(password_cache_free) PasswordCache cache = {};
+ int r;
+
+ assert(h);
+
+ if (!h->user_name)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "User record incomplete, refusing.");
+ if (user_record_storage(h) != USER_LUKS)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTTY), "Unlocking home directories of type '%s' currently not supported.", user_storage_to_string(user_record_storage(h)));
+
+ /* Note that we don't check if $HOME is actually mounted, since we want to avoid disk accesses on
+ * that mount until we have resumed the device. */
+
+ r = user_record_authenticate(h, h, &cache, /* strict_verify= */ false);
+ if (r < 0)
+ return r;
+
+ r = home_unlock_luks(h, &cache);
+ if (r < 0)
+ return r;
+
+ log_info("Everything completed.");
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(user_record_unrefp) UserRecord *home = NULL, *new_home = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_(fclosep) FILE *opened_file = NULL;
+ unsigned line = 0, column = 0;
+ const char *json_path = NULL;
+ FILE *json_file;
+ usec_t start;
+ int r;
+
+ start = now(CLOCK_MONOTONIC);
+
+ log_setup_service();
+
+ umask(0022);
+
+ if (argc < 2 || argc > 3)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program takes one or two arguments.");
+
+ if (argc > 2) {
+ json_path = argv[2];
+
+ opened_file = fopen(json_path, "re");
+ if (!opened_file)
+ return log_error_errno(errno, "Failed to open %s: %m", json_path);
+
+ json_file = opened_file;
+ } else {
+ json_path = "<stdin>";
+ json_file = stdin;
+ }
+
+ r = json_parse_file(json_file, json_path, JSON_PARSE_SENSITIVE, &v, &line, &column);
+ if (r < 0)
+ return log_error_errno(r, "[%s:%u:%u] Failed to parse JSON data: %m", json_path, line, column);
+
+ home = user_record_new();
+ if (!home)
+ return log_oom();
+
+ r = user_record_load(home, v, USER_RECORD_LOAD_FULL|USER_RECORD_LOG);
+ if (r < 0)
+ return r;
+
+ /* Well known return values of these operations, that systemd-homed knows and converts to proper D-Bus errors:
+ *
+ * EMSGSIZE → file systems of this type cannot be shrunk
+ * ETXTBSY → file systems of this type can only be shrunk offline
+ * ERANGE → file system size too small
+ * ENOLINK → system does not support selected storage backend
+ * EPROTONOSUPPORT → system does not support selected file system
+ * ENOTTY → operation not support on this storage
+ * ESOCKTNOSUPPORT → operation not support on this file system
+ * ENOKEY → password incorrect (or not sufficient, or not supplied)
+ * EREMOTEIO → recovery key incorrect (or not sufficeint, or not supplied — only if no passwords defined)
+ * EBADSLT → similar, but PKCS#11 device is defined and might be able to provide password, if it was plugged in which it is not
+ * ENOANO → suitable PKCS#11/FIDO2 device found, but PIN is missing to unlock it
+ * ERFKILL → suitable PKCS#11 device found, but OK to ask for on-device interactive authentication not given
+ * EMEDIUMTYPE → suitable FIDO2 device found, but OK to ask for user presence not given
+ * ENOSTR → suitable FIDO2 device found, but user didn't react to action request on token quickly enough
+ * EOWNERDEAD → suitable PKCS#11/FIDO2 device found, but its PIN is locked
+ * ENOLCK → suitable PKCS#11/FIDO2 device found, but PIN incorrect
+ * ETOOMANYREFS → suitable PKCS#11 device found, but PIN incorrect, and only few tries left
+ * EUCLEAN → suitable PKCS#11 device found, but PIN incorrect, and only one try left
+ * EBUSY → file system is currently active
+ * ENOEXEC → file system is currently not active
+ * ENOSPC → not enough disk space for operation
+ * EKEYREVOKED → user record has not suitable hashed password or pkcs#11 entry, we cannot authenticate
+ */
+
+ if (streq(argv[1], "activate"))
+ r = home_activate(home, &new_home);
+ else if (streq(argv[1], "deactivate"))
+ r = home_deactivate(home, false);
+ else if (streq(argv[1], "deactivate-force"))
+ r = home_deactivate(home, true);
+ else if (streq(argv[1], "create"))
+ r = home_create(home, &new_home);
+ else if (streq(argv[1], "remove"))
+ r = home_remove(home);
+ else if (streq(argv[1], "update"))
+ r = home_update(home, &new_home);
+ else if (streq(argv[1], "resize"))
+ r = home_resize(home, &new_home);
+ else if (streq(argv[1], "passwd"))
+ r = home_passwd(home, &new_home);
+ else if (streq(argv[1], "inspect"))
+ r = home_inspect(home, &new_home);
+ else if (streq(argv[1], "lock"))
+ r = home_lock(home);
+ else if (streq(argv[1], "unlock"))
+ r = home_unlock(home);
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown verb '%s'.", argv[1]);
+ if (IN_SET(r, -ENOKEY, -EREMOTEIO) && !strv_isempty(home->password) ) { /* There were passwords specified but they were incorrect */
+ usec_t end, n, d;
+
+ /* Make sure bad password replies always take at least 3s, and if longer multiples of 3s, so
+ * that it's not clear how long we actually needed for our calculations. */
+ n = now(CLOCK_MONOTONIC);
+ assert(n >= start);
+
+ d = usec_sub_unsigned(n, start);
+ if (d > BAD_PASSWORD_DELAY_USEC)
+ end = start + DIV_ROUND_UP(d, BAD_PASSWORD_DELAY_USEC) * BAD_PASSWORD_DELAY_USEC;
+ else
+ end = start + BAD_PASSWORD_DELAY_USEC;
+
+ if (n < end)
+ (void) usleep(usec_sub_unsigned(end, n));
+ }
+ if (r < 0)
+ return r;
+
+ /* We always pass the new record back, regardless if it changed or not. This allows our caller to
+ * prepare a fresh record, send to us, and only if it works use it without having to keep a local
+ * copy. */
+ if (new_home)
+ json_variant_dump(new_home->json, JSON_FORMAT_NEWLINE, stdout, NULL);
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/home/homework.h b/src/home/homework.h
new file mode 100644
index 0000000..fb53fd4
--- /dev/null
+++ b/src/home/homework.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/fs.h>
+#include <sys/vfs.h>
+
+#include "sd-id128.h"
+
+#include "loop-util.h"
+#include "user-record.h"
+#include "user-record-util.h"
+
+typedef struct HomeSetup {
+ char *dm_name;
+ char *dm_node;
+
+ LoopDevice *loop;
+ struct crypt_device *crypt_device;
+ int root_fd;
+ int image_fd;
+ sd_id128_t found_partition_uuid;
+ sd_id128_t found_luks_uuid;
+ sd_id128_t found_fs_uuid;
+
+ uint8_t fscrypt_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+
+ void *volume_key;
+ size_t volume_key_size;
+
+ bool undo_dm;
+ bool undo_mount;
+ bool do_offline_fitrim;
+ bool do_offline_fallocate;
+ bool do_mark_clean;
+
+ uint64_t partition_offset;
+ uint64_t partition_size;
+} HomeSetup;
+
+typedef struct PasswordCache {
+ /* Decoding passwords from security tokens is expensive and typically requires user interaction, hence cache any we already figured out. */
+ char **pkcs11_passwords;
+ char **fido2_passwords;
+} PasswordCache;
+
+void password_cache_free(PasswordCache *cache);
+
+#define HOME_SETUP_INIT \
+ { \
+ .root_fd = -1, \
+ .image_fd = -1, \
+ .partition_offset = UINT64_MAX, \
+ .partition_size = UINT64_MAX, \
+ }
+
+int home_setup_undo(HomeSetup *setup);
+
+int home_prepare(UserRecord *h, bool already_activated, PasswordCache *cache, HomeSetup *setup, UserRecord **ret_header_home);
+
+int home_refresh(UserRecord *h, HomeSetup *setup, UserRecord *header_home, PasswordCache *cache, struct statfs *ret_statfs, UserRecord **ret_new_home);
+
+int home_populate(UserRecord *h, int dir_fd);
+
+int home_load_embedded_identity(UserRecord *h, int root_fd, UserRecord *header_home, UserReconcileMode mode, PasswordCache *cache, UserRecord **ret_embedded_home, UserRecord **ret_new_home);
+int home_store_embedded_identity(UserRecord *h, int root_fd, uid_t uid, UserRecord *old_home);
+int home_extend_embedded_identity(UserRecord *h, UserRecord *used, HomeSetup *setup);
+
+int user_record_authenticate(UserRecord *h, UserRecord *secret, PasswordCache *cache, bool strict_verify);
+
+int home_sync_and_statfs(int root_fd, struct statfs *ret);
diff --git a/src/home/meson.build b/src/home/meson.build
new file mode 100644
index 0000000..444dc47
--- /dev/null
+++ b/src/home/meson.build
@@ -0,0 +1,122 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+systemd_homework_sources = files('''
+ home-util.c
+ home-util.h
+ homework-cifs.c
+ homework-cifs.h
+ homework-directory.c
+ homework-directory.h
+ homework-fido2.h
+ homework-fscrypt.c
+ homework-fscrypt.h
+ homework-luks.c
+ homework-luks.h
+ homework-mount.c
+ homework-mount.h
+ homework-pkcs11.h
+ homework-quota.c
+ homework-quota.h
+ homework.c
+ homework.h
+ modhex.c
+ modhex.h
+ user-record-util.c
+ user-record-util.h
+'''.split())
+
+if conf.get('HAVE_P11KIT') == 1
+ systemd_homework_sources += files('homework-pkcs11.c')
+endif
+if conf.get('HAVE_LIBFIDO2') == 1
+ systemd_homework_sources += files('homework-fido2.c')
+endif
+
+systemd_homed_sources = files('''
+ home-util.c
+ home-util.h
+ homed-bus.c
+ homed-bus.h
+ homed-conf.c
+ homed-conf.h
+ homed-home-bus.c
+ homed-home-bus.h
+ homed-home.c
+ homed-home.h
+ homed-manager-bus.c
+ homed-manager-bus.h
+ homed-manager.c
+ homed-manager.h
+ homed-operation.c
+ homed-operation.h
+ homed-varlink.c
+ homed-varlink.h
+ homed.c
+ modhex.c
+ modhex.h
+ user-record-pwquality.c
+ user-record-pwquality.h
+ user-record-sign.c
+ user-record-sign.h
+ user-record-util.c
+ user-record-util.h
+'''.split())
+
+homed_gperf_c = custom_target(
+ 'homed_gperf.c',
+ input : 'homed-gperf.gperf',
+ output : 'homed-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+systemd_homed_sources += [homed_gperf_c]
+
+homectl_sources = files('''
+ home-util.c
+ home-util.h
+ homectl-fido2.c
+ homectl-fido2.h
+ homectl-pkcs11.c
+ homectl-pkcs11.h
+ homectl-recovery-key.c
+ homectl-recovery-key.h
+ homectl.c
+ modhex.c
+ modhex.h
+ user-record-pwquality.c
+ user-record-pwquality.h
+ user-record-util.c
+ user-record-util.h
+'''.split())
+
+pam_systemd_home_sym = 'src/home/pam_systemd_home.sym'
+pam_systemd_home_c = files('''
+ home-util.c
+ home-util.h
+ modhex.c
+ modhex.h
+ pam_systemd_home.c
+ user-record-util.c
+ user-record-util.h
+'''.split())
+
+if conf.get('ENABLE_HOMED') == 1
+ install_data('org.freedesktop.home1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.home1.service',
+ install_dir : dbussystemservicedir)
+ install_data('org.freedesktop.home1.policy',
+ install_dir : polkitpolicydir)
+
+ if install_sysconfdir
+ install_data('homed.conf',
+ install_dir : pkgsysconfdir)
+ endif
+endif
+
+tests += [
+ [['src/home/test-modhex.c',
+ 'src/home/modhex.c',
+ 'src/home/modhex.h'],
+ [],
+ []],
+]
diff --git a/src/home/modhex.c b/src/home/modhex.c
new file mode 100644
index 0000000..ae5f895
--- /dev/null
+++ b/src/home/modhex.c
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "modhex.h"
+#include "macro.h"
+#include "memory-util.h"
+
+const char modhex_alphabet[16] = {
+ 'c', 'b', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'r', 't', 'u', 'v'
+};
+
+int decode_modhex_char(char x) {
+
+ for (size_t i = 0; i < ELEMENTSOF(modhex_alphabet); i++)
+ /* Check both upper and lowercase */
+ if (modhex_alphabet[i] == x || (modhex_alphabet[i] - 32) == x)
+ return i;
+
+ return -EINVAL;
+}
+
+int normalize_recovery_key(const char *password, char **ret) {
+ _cleanup_(erase_and_freep) char *mangled = NULL;
+ size_t l;
+
+ assert(password);
+ assert(ret);
+
+ l = strlen(password);
+ if (!IN_SET(l,
+ MODHEX_RAW_LENGTH*2, /* syntax without dashes */
+ MODHEX_FORMATTED_LENGTH-1)) /* syntax with dashes */
+ return -EINVAL;
+
+ mangled = new(char, MODHEX_FORMATTED_LENGTH);
+ if (!mangled)
+ return -ENOMEM;
+
+ for (size_t i = 0, j = 0; i < MODHEX_RAW_LENGTH; i++) {
+ size_t k;
+ int a, b;
+
+ if (l == MODHEX_RAW_LENGTH*2)
+ /* Syntax without dashes */
+ k = i * 2;
+ else {
+ /* Syntax with dashes */
+ assert(l == MODHEX_FORMATTED_LENGTH-1);
+ k = i * 2 + i / 4;
+
+ if (i > 0 && i % 4 == 0 && password[k-1] != '-')
+ return -EINVAL;
+ }
+
+ a = decode_modhex_char(password[k]);
+ if (a < 0)
+ return -EINVAL;
+ b = decode_modhex_char(password[k+1]);
+ if (b < 0)
+ return -EINVAL;
+
+ mangled[j++] = modhex_alphabet[a];
+ mangled[j++] = modhex_alphabet[b];
+
+ if (i % 4 == 3)
+ mangled[j++] = '-';
+ }
+
+ mangled[MODHEX_FORMATTED_LENGTH-1] = 0;
+
+ *ret = TAKE_PTR(mangled);
+ return 0;
+}
diff --git a/src/home/modhex.h b/src/home/modhex.h
new file mode 100644
index 0000000..7776ed0
--- /dev/null
+++ b/src/home/modhex.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/* 256 bit keys = 32 bytes */
+#define MODHEX_RAW_LENGTH 32
+
+/* Formatted as sequences of 64 modhex characters, with dashes inserted after multiples of 8 chars (incl. trailing NUL) */
+#define MODHEX_FORMATTED_LENGTH (MODHEX_RAW_LENGTH*2/8*9)
+
+extern const char modhex_alphabet[16];
+
+int decode_modhex_char(char x);
+
+int normalize_recovery_key(const char *password, char **ret);
diff --git a/src/home/org.freedesktop.home1.conf b/src/home/org.freedesktop.home1.conf
new file mode 100644
index 0000000..1975d5f
--- /dev/null
+++ b/src/home/org.freedesktop.home1.conf
@@ -0,0 +1,193 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!-- SPDX-License-Identifier: LGPL-2.1-or-later -->
+
+<busconfig>
+
+ <policy user="root">
+ <allow own="org.freedesktop.home1"/>
+ <allow send_destination="org.freedesktop.home1"/>
+ <allow receive_sender="org.freedesktop.home1"/>
+ </policy>
+
+ <policy context="default">
+ <deny send_destination="org.freedesktop.home1"/>
+
+ <!-- generic interfaces -->
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.DBus.Introspectable"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.DBus.Peer"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="Get"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="GetAll"/>
+
+ <!-- Manager object -->
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="GetHomeByName"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="GetHomeByUID"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="GetUserRecordByName"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="GetUserRecordByUID"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="ListHomes"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="ActivateHome"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="DeactivateHome"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="RegisterHome"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="UnregisterHome"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="CreateHome"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="RealizeHome"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="RemoveHome"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="FixateHome"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="AuthenticateHome"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="UpdateHome"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="ResizeHome"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="ChangePasswordHome"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="LockHome"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="UnlockHome"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="AcquireHome"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="RefHome"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="ReleaseHome"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Manager"
+ send_member="LockAllHomes"/>
+
+ <!-- Home object -->
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Home"
+ send_member="Activate"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Home"
+ send_member="Deactivate"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Home"
+ send_member="Unregister"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Home"
+ send_member="Realize"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Home"
+ send_member="Remove"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Home"
+ send_member="Fixate"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Home"
+ send_member="Authenticate"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Home"
+ send_member="Update"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Home"
+ send_member="Resize"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Home"
+ send_member="ChangePassword"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Home"
+ send_member="Lock"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Home"
+ send_member="Unlock"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Home"
+ send_member="Acquire"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Home"
+ send_member="Ref"/>
+
+ <allow send_destination="org.freedesktop.home1"
+ send_interface="org.freedesktop.home1.Home"
+ send_member="Release"/>
+
+ <allow receive_sender="org.freedesktop.home1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/home/org.freedesktop.home1.policy b/src/home/org.freedesktop.home1.policy
new file mode 100644
index 0000000..10ad7c2
--- /dev/null
+++ b/src/home/org.freedesktop.home1.policy
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!-- SPDX-License-Identifier: LGPL-2.1-or-later -->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.home1.create-home">
+ <description gettext-domain="systemd">Create a home area</description>
+ <message gettext-domain="systemd">Authentication is required to create a user's home area.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.home1.remove-home">
+ <description gettext-domain="systemd">Remove a home area</description>
+ <message gettext-domain="systemd">Authentication is required to remove a user's home area.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.home1.authenticate-home">
+ <description gettext-domain="systemd">Check credentials of a home area</description>
+ <message gettext-domain="systemd">Authentication is required to check credentials against a user's home area.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.home1.update-home">
+ <description gettext-domain="systemd">Update a home area</description>
+ <message gettext-domain="systemd">Authentication is required to update a user's home area.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.home1.resize-home">
+ <description gettext-domain="systemd">Resize a home area</description>
+ <message gettext-domain="systemd">Authentication is required to resize a user's home area.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.home1.passwd-home">
+ <description gettext-domain="systemd">Change password of a home area</description>
+ <message gettext-domain="systemd">Authentication is required to change the password of a user's home area.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+</policyconfig>
diff --git a/src/home/org.freedesktop.home1.service b/src/home/org.freedesktop.home1.service
new file mode 100644
index 0000000..fb03914
--- /dev/null
+++ b/src/home/org.freedesktop.home1.service
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+[D-BUS Service]
+Name=org.freedesktop.home1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.home1.service
diff --git a/src/home/pam_systemd_home.c b/src/home/pam_systemd_home.c
new file mode 100644
index 0000000..a91df91
--- /dev/null
+++ b/src/home/pam_systemd_home.c
@@ -0,0 +1,1070 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <security/pam_ext.h>
+#include <security/pam_modules.h>
+
+#include "sd-bus.h"
+
+#include "bus-common-errors.h"
+#include "bus-locator.h"
+#include "bus-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "home-util.h"
+#include "memory-util.h"
+#include "pam-util.h"
+#include "parse-util.h"
+#include "strv.h"
+#include "user-record-util.h"
+#include "user-record.h"
+#include "user-util.h"
+
+static int parse_argv(
+ pam_handle_t *handle,
+ int argc, const char **argv,
+ bool *please_suspend,
+ bool *debug) {
+
+ int i;
+
+ assert(argc >= 0);
+ assert(argc == 0 || argv);
+
+ for (i = 0; i < argc; i++) {
+ const char *v;
+
+ if ((v = startswith(argv[i], "suspend="))) {
+ int k;
+
+ k = parse_boolean(v);
+ if (k < 0)
+ pam_syslog(handle, LOG_WARNING, "Failed to parse suspend= argument, ignoring: %s", v);
+ else if (please_suspend)
+ *please_suspend = k;
+
+ } else if (streq(argv[i], "debug")) {
+ if (debug)
+ *debug = true;
+
+ } else if ((v = startswith(argv[i], "debug="))) {
+ int k;
+ k = parse_boolean(v);
+ if (k < 0)
+ pam_syslog(handle, LOG_WARNING, "Failed to parse debug= argument, ignoring: %s", v);
+ else if (debug)
+ *debug = k;
+
+ } else
+ pam_syslog(handle, LOG_WARNING, "Unknown parameter '%s', ignoring", argv[i]);
+ }
+
+ return 0;
+}
+
+static int parse_env(
+ pam_handle_t *handle,
+ bool *please_suspend) {
+
+ const char *v;
+ int r;
+
+ /* Let's read the suspend setting from an env var in addition to the PAM command line. That makes it
+ * easy to declare the features of a display manager in code rather than configuration, and this is
+ * really a feature of code */
+
+ v = pam_getenv(handle, "SYSTEMD_HOME_SUSPEND");
+ if (!v) {
+ /* Also check the process env block, so that people can control this via an env var from the
+ * outside of our process. */
+ v = secure_getenv("SYSTEMD_HOME_SUSPEND");
+ if (!v)
+ return 0;
+ }
+
+ r = parse_boolean(v);
+ if (r < 0)
+ pam_syslog(handle, LOG_WARNING, "Failed to parse $SYSTEMD_HOME_SUSPEND argument, ignoring: %s", v);
+ else if (please_suspend)
+ *please_suspend = r;
+
+ return 0;
+}
+
+static int acquire_user_record(
+ pam_handle_t *handle,
+ const char *username,
+ UserRecord **ret_record) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *ur = NULL;
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *homed_field = NULL;
+ const char *json = NULL;
+ int r;
+
+ assert(handle);
+
+ if (!username) {
+ r = pam_get_user(handle, &username, NULL);
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to get user name: %s", pam_strerror(handle, r));
+ return r;
+ }
+
+ if (isempty(username)) {
+ pam_syslog(handle, LOG_ERR, "User name not set.");
+ return PAM_SERVICE_ERR;
+ }
+ }
+
+ /* Let's bypass all IPC complexity for the two user names we know for sure we don't manage, and for
+ * user names we don't consider valid. */
+ if (STR_IN_SET(username, "root", NOBODY_USER_NAME) || !valid_user_group_name(username, 0))
+ return PAM_USER_UNKNOWN;
+
+ /* We cache the user record in the PAM context. We use a field name that includes the username, since
+ * clients might change the user name associated with a PAM context underneath us. Notably, 'sudo'
+ * creates a single PAM context and first authenticates it with the user set to the originating user,
+ * then updates the user for the destination user and issues the session stack with the same PAM
+ * context. We thus must be prepared that the user record changes between calls and we keep any
+ * caching separate. */
+ homed_field = strjoin("systemd-home-user-record-", username);
+ if (!homed_field)
+ return pam_log_oom(handle);
+
+ /* Let's use the cache, so that we can share it between the session and the authentication hooks */
+ r = pam_get_data(handle, homed_field, (const void**) &json);
+ if (!IN_SET(r, PAM_SUCCESS, PAM_NO_MODULE_DATA)) {
+ pam_syslog(handle, LOG_ERR, "Failed to get PAM user record data: %s", pam_strerror(handle, r));
+ return r;
+ }
+ if (r == PAM_SUCCESS && json) {
+ /* We determined earlier that this is not a homed user? Then exit early. (We use -1 as
+ * negative cache indicator) */
+ if (json == POINTER_MAX)
+ return PAM_USER_UNKNOWN;
+ } else {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *generic_field = NULL, *json_copy = NULL;
+
+ r = pam_acquire_bus_connection(handle, &bus);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = bus_call_method(bus, bus_home_mgr, "GetUserRecordByName", &error, &reply, "s", username);
+ if (r < 0) {
+ if (bus_error_is_unknown_service(&error)) {
+ pam_syslog(handle, LOG_DEBUG, "systemd-homed is not available: %s", bus_error_message(&error, r));
+ goto user_unknown;
+ }
+
+ if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_HOME)) {
+ pam_syslog(handle, LOG_DEBUG, "Not a user managed by systemd-homed: %s", bus_error_message(&error, r));
+ goto user_unknown;
+ }
+
+ pam_syslog(handle, LOG_ERR, "Failed to query user record: %s", bus_error_message(&error, r));
+ return PAM_SERVICE_ERR;
+ }
+
+ r = sd_bus_message_read(reply, "sbo", &json, NULL, NULL);
+ if (r < 0)
+ return pam_bus_log_parse_error(handle, r);
+
+ /* First copy: for the homed-specific data field, i.e. where we know the user record is from
+ * homed */
+ json_copy = strdup(json);
+ if (!json_copy)
+ return pam_log_oom(handle);
+
+ r = pam_set_data(handle, homed_field, json_copy, pam_cleanup_free);
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to set PAM user record data '%s': %s",
+ homed_field, pam_strerror(handle, r));
+ return r;
+ }
+
+ /* Take a second copy: for the generic data field, the one which we share with
+ * pam_systemd. While we insist on only reusing homed records, pam_systemd is fine with homed
+ * and non-homed user records. */
+ json_copy = strdup(json);
+ if (!json_copy)
+ return pam_log_oom(handle);
+
+ generic_field = strjoin("systemd-user-record-", username);
+ if (!generic_field)
+ return pam_log_oom(handle);
+
+ r = pam_set_data(handle, generic_field, json_copy, pam_cleanup_free);
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to set PAM user record data '%s': %s",
+ homed_field, pam_strerror(handle, r));
+ return r;
+ }
+
+ TAKE_PTR(json_copy);
+ }
+
+ r = json_parse(json, JSON_PARSE_SENSITIVE, &v, NULL, NULL);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to parse JSON user record: %s", strerror_safe(r));
+ return PAM_SERVICE_ERR;
+ }
+
+ ur = user_record_new();
+ if (!ur)
+ return pam_log_oom(handle);
+
+ r = user_record_load(ur, v, USER_RECORD_LOAD_REFUSE_SECRET);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to load user record: %s", strerror_safe(r));
+ return PAM_SERVICE_ERR;
+ }
+
+ /* Safety check if cached record actually matches what we are looking for */
+ if (!streq_ptr(username, ur->user_name)) {
+ pam_syslog(handle, LOG_ERR, "Acquired user record does not match user name.");
+ return PAM_SERVICE_ERR;
+ }
+
+ if (ret_record)
+ *ret_record = TAKE_PTR(ur);
+
+ return PAM_SUCCESS;
+
+user_unknown:
+ /* Cache this, so that we don't check again */
+ r = pam_set_data(handle, homed_field, POINTER_MAX, NULL);
+ if (r != PAM_SUCCESS)
+ pam_syslog(handle, LOG_ERR, "Failed to set PAM user record data '%s' to invalid, ignoring: %s",
+ homed_field, pam_strerror(handle, r));
+
+ return PAM_USER_UNKNOWN;
+}
+
+static int release_user_record(pam_handle_t *handle, const char *username) {
+ _cleanup_free_ char *homed_field = NULL, *generic_field = NULL;
+ int r, k;
+
+ assert(handle);
+ assert(username);
+
+ homed_field = strjoin("systemd-home-user-record-", username);
+ if (!homed_field)
+ return pam_log_oom(handle);
+
+ r = pam_set_data(handle, homed_field, NULL, NULL);
+ if (r != PAM_SUCCESS)
+ pam_syslog(handle, LOG_ERR, "Failed to release PAM user record data '%s': %s", homed_field, pam_strerror(handle, r));
+
+ generic_field = strjoin("systemd-user-record-", username);
+ if (!generic_field)
+ return pam_log_oom(handle);
+
+ k = pam_set_data(handle, generic_field, NULL, NULL);
+ if (k != PAM_SUCCESS)
+ pam_syslog(handle, LOG_ERR, "Failed to release PAM user record data '%s': %s", generic_field, pam_strerror(handle, k));
+
+ return IN_SET(r, PAM_SUCCESS, PAM_NO_MODULE_DATA) ? k : r;
+}
+
+static void cleanup_home_fd(pam_handle_t *handle, void *data, int error_status) {
+ safe_close(PTR_TO_FD(data));
+}
+
+static int handle_generic_user_record_error(
+ pam_handle_t *handle,
+ const char *user_name,
+ UserRecord *secret,
+ int ret,
+ const sd_bus_error *error) {
+
+ assert(user_name);
+ assert(secret);
+ assert(error);
+
+ int r;
+
+ /* Logs about all errors, except for PAM_CONV_ERR, i.e. when requesting more info failed. */
+
+ if (sd_bus_error_has_name(error, BUS_ERROR_HOME_ABSENT)) {
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "Home of user %s is currently absent, please plug in the necessary storage device or backing file system.", user_name);
+ pam_syslog(handle, LOG_ERR, "Failed to acquire home for user %s: %s", user_name, bus_error_message(error, ret));
+ return PAM_PERM_DENIED;
+
+ } else if (sd_bus_error_has_name(error, BUS_ERROR_AUTHENTICATION_LIMIT_HIT)) {
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "Too frequent unsuccessful login attempts for user %s, try again later.", user_name);
+ pam_syslog(handle, LOG_ERR, "Failed to acquire home for user %s: %s", user_name, bus_error_message(error, ret));
+ return PAM_MAXTRIES;
+
+ } else if (sd_bus_error_has_name(error, BUS_ERROR_BAD_PASSWORD)) {
+ _cleanup_(erase_and_freep) char *newp = NULL;
+
+ /* This didn't work? Ask for an (additional?) password */
+
+ if (strv_isempty(secret->password))
+ r = pam_prompt(handle, PAM_PROMPT_ECHO_OFF, &newp, "Password: ");
+ else {
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "Password incorrect or not sufficient for authentication of user %s.", user_name);
+ r = pam_prompt(handle, PAM_PROMPT_ECHO_OFF, &newp, "Sorry, try again: ");
+ }
+ if (r != PAM_SUCCESS)
+ return PAM_CONV_ERR; /* no logging here */
+
+ if (isempty(newp)) {
+ pam_syslog(handle, LOG_DEBUG, "Password request aborted.");
+ return PAM_AUTHTOK_ERR;
+ }
+
+ r = user_record_set_password(secret, STRV_MAKE(newp), true);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to store password: %s", strerror_safe(r));
+ return PAM_SERVICE_ERR;
+ }
+
+ } else if (sd_bus_error_has_name(error, BUS_ERROR_BAD_PASSWORD_AND_NO_TOKEN)) {
+ _cleanup_(erase_and_freep) char *newp = NULL;
+
+ if (strv_isempty(secret->password)) {
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "Security token of user %s not inserted.", user_name);
+ r = pam_prompt(handle, PAM_PROMPT_ECHO_OFF, &newp, "Try again with password: ");
+ } else {
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "Password incorrect or not sufficient, and configured security token of user %s not inserted.", user_name);
+ r = pam_prompt(handle, PAM_PROMPT_ECHO_OFF, &newp, "Try again with password: ");
+ }
+ if (r != PAM_SUCCESS)
+ return PAM_CONV_ERR; /* no logging here */
+
+ if (isempty(newp)) {
+ pam_syslog(handle, LOG_DEBUG, "Password request aborted.");
+ return PAM_AUTHTOK_ERR;
+ }
+
+ r = user_record_set_password(secret, STRV_MAKE(newp), true);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to store password: %s", strerror_safe(r));
+ return PAM_SERVICE_ERR;
+ }
+
+ } else if (sd_bus_error_has_name(error, BUS_ERROR_TOKEN_PIN_NEEDED)) {
+ _cleanup_(erase_and_freep) char *newp = NULL;
+
+ r = pam_prompt(handle, PAM_PROMPT_ECHO_OFF, &newp, "Security token PIN: ");
+ if (r != PAM_SUCCESS)
+ return PAM_CONV_ERR; /* no logging here */
+
+ if (isempty(newp)) {
+ pam_syslog(handle, LOG_DEBUG, "PIN request aborted.");
+ return PAM_AUTHTOK_ERR;
+ }
+
+ r = user_record_set_token_pin(secret, STRV_MAKE(newp), false);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to store PIN: %s", strerror_safe(r));
+ return PAM_SERVICE_ERR;
+ }
+
+ } else if (sd_bus_error_has_name(error, BUS_ERROR_TOKEN_PROTECTED_AUTHENTICATION_PATH_NEEDED)) {
+
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "Please authenticate physically on security token of user %s.", user_name);
+
+ r = user_record_set_pkcs11_protected_authentication_path_permitted(secret, true);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to set PKCS#11 protected authentication path permitted flag: %s", strerror_safe(r));
+ return PAM_SERVICE_ERR;
+ }
+
+ } else if (sd_bus_error_has_name(error, BUS_ERROR_TOKEN_USER_PRESENCE_NEEDED)) {
+
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "Please verify presence on security token of user %s.", user_name);
+
+ r = user_record_set_fido2_user_presence_permitted(secret, true);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to set FIDO2 user presence permitted flag: %s", strerror_safe(r));
+ return PAM_SERVICE_ERR;
+ }
+
+ } else if (sd_bus_error_has_name(error, BUS_ERROR_TOKEN_PIN_LOCKED)) {
+
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "Security token PIN is locked, please unlock it first. (Hint: Removal and re-insertion might suffice.)");
+ return PAM_SERVICE_ERR;
+
+ } else if (sd_bus_error_has_name(error, BUS_ERROR_TOKEN_BAD_PIN)) {
+ _cleanup_(erase_and_freep) char *newp = NULL;
+
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "Security token PIN incorrect for user %s.", user_name);
+ r = pam_prompt(handle, PAM_PROMPT_ECHO_OFF, &newp, "Sorry, retry security token PIN: ");
+ if (r != PAM_SUCCESS)
+ return PAM_CONV_ERR; /* no logging here */
+
+ if (isempty(newp)) {
+ pam_syslog(handle, LOG_DEBUG, "PIN request aborted.");
+ return PAM_AUTHTOK_ERR;
+ }
+
+ r = user_record_set_token_pin(secret, STRV_MAKE(newp), false);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to store PIN: %s", strerror_safe(r));
+ return PAM_SERVICE_ERR;
+ }
+
+ } else if (sd_bus_error_has_name(error, BUS_ERROR_TOKEN_BAD_PIN_FEW_TRIES_LEFT)) {
+ _cleanup_(erase_and_freep) char *newp = NULL;
+
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "Security token PIN of user %s incorrect (only a few tries left!)", user_name);
+ r = pam_prompt(handle, PAM_PROMPT_ECHO_OFF, &newp, "Sorry, retry security token PIN: ");
+ if (r != PAM_SUCCESS)
+ return PAM_CONV_ERR; /* no logging here */
+
+ if (isempty(newp)) {
+ pam_syslog(handle, LOG_DEBUG, "PIN request aborted.");
+ return PAM_AUTHTOK_ERR;
+ }
+
+ r = user_record_set_token_pin(secret, STRV_MAKE(newp), false);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to store PIN: %s", strerror_safe(r));
+ return PAM_SERVICE_ERR;
+ }
+
+ } else if (sd_bus_error_has_name(error, BUS_ERROR_TOKEN_BAD_PIN_ONE_TRY_LEFT)) {
+ _cleanup_(erase_and_freep) char *newp = NULL;
+
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "Security token PIN of user %s incorrect (only one try left!)", user_name);
+ r = pam_prompt(handle, PAM_PROMPT_ECHO_OFF, &newp, "Sorry, retry security token PIN: ");
+ if (r != PAM_SUCCESS)
+ return PAM_CONV_ERR; /* no logging here */
+
+ if (isempty(newp)) {
+ pam_syslog(handle, LOG_DEBUG, "PIN request aborted.");
+ return PAM_AUTHTOK_ERR;
+ }
+
+ r = user_record_set_token_pin(secret, STRV_MAKE(newp), false);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to store PIN: %s", strerror_safe(r));
+ return PAM_SERVICE_ERR;
+ }
+
+ } else {
+ pam_syslog(handle, LOG_ERR, "Failed to acquire home for user %s: %s", user_name, bus_error_message(error, ret));
+ return PAM_SERVICE_ERR;
+ }
+
+ return PAM_SUCCESS;
+}
+
+static int acquire_home(
+ pam_handle_t *handle,
+ bool please_authenticate,
+ bool please_suspend,
+ bool debug) {
+
+ _cleanup_(user_record_unrefp) UserRecord *ur = NULL, *secret = NULL;
+ bool do_auth = please_authenticate, home_not_active = false, home_locked = false;
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ _cleanup_close_ int acquired_fd = -1;
+ _cleanup_free_ char *fd_field = NULL;
+ const void *home_fd_ptr = NULL;
+ const char *username = NULL;
+ unsigned n_attempts = 0;
+ int r;
+
+ assert(handle);
+
+ /* This acquires a reference to a home directory in one of two ways: if please_authenticate is true,
+ * then we'll call AcquireHome() after asking the user for a password. Otherwise it tries to call
+ * RefHome() and if that fails queries the user for a password and uses AcquireHome().
+ *
+ * The idea is that the PAM authentication hook sets please_authenticate and thus always
+ * authenticates, while the other PAM hooks unset it so that they can a ref of their own without
+ * authentication if possible, but with authentication if necessary. */
+
+ r = pam_get_user(handle, &username, NULL);
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to get user name: %s", pam_strerror(handle, r));
+ return r;
+ }
+
+ if (isempty(username)) {
+ pam_syslog(handle, LOG_ERR, "User name not set.");
+ return PAM_SERVICE_ERR;
+ }
+
+ /* If we already have acquired the fd, let's shortcut this */
+ fd_field = strjoin("systemd-home-fd-", username);
+ if (!fd_field)
+ return pam_log_oom(handle);
+
+ r = pam_get_data(handle, fd_field, &home_fd_ptr);
+ if (!IN_SET(r, PAM_SUCCESS, PAM_NO_MODULE_DATA)) {
+ pam_syslog(handle, LOG_ERR, "Failed to retrieve PAM home reference fd: %s", pam_strerror(handle, r));
+ return r;
+ }
+ if (r == PAM_SUCCESS && PTR_TO_FD(home_fd_ptr) >= 0)
+ return PAM_SUCCESS;
+
+ r = pam_acquire_bus_connection(handle, &bus);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = acquire_user_record(handle, username, &ur);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ /* Implement our own retry loop here instead of relying on the PAM client's one. That's because it
+ * might happen that the record we stored on the host does not match the encryption password of
+ * the LUKS image in case the image was used in a different system where the password was
+ * changed. In that case it will happen that the LUKS password and the host password are
+ * different, and we handle that by collecting and passing multiple passwords in that case. Hence we
+ * treat bad passwords as a request to collect one more password and pass the new all all previously
+ * used passwords again. */
+
+ for (;;) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ if (do_auth && !secret) {
+ const char *cached_password = NULL;
+
+ secret = user_record_new();
+ if (!secret)
+ return pam_log_oom(handle);
+
+ /* If there's already a cached password, use it. But if not let's authenticate
+ * without anything, maybe some other authentication mechanism systemd-homed
+ * implements (such as PKCS#11) allows us to authenticate without anything else. */
+ r = pam_get_item(handle, PAM_AUTHTOK, (const void**) &cached_password);
+ if (!IN_SET(r, PAM_BAD_ITEM, PAM_SUCCESS)) {
+ pam_syslog(handle, LOG_ERR, "Failed to get cached password: %s", pam_strerror(handle, r));
+ return r;
+ }
+
+ if (!isempty(cached_password)) {
+ r = user_record_set_password(secret, STRV_MAKE(cached_password), true);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to store password: %s", strerror_safe(r));
+ return PAM_SERVICE_ERR;
+ }
+ }
+ }
+
+ r = bus_message_new_method_call(bus, &m, bus_home_mgr, do_auth ? "AcquireHome" : "RefHome");
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+
+ r = sd_bus_message_append(m, "s", ur->user_name);
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+
+ if (do_auth) {
+ r = bus_message_append_secret(m, secret);
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+ }
+
+ r = sd_bus_message_append(m, "b", please_suspend);
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+
+ r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, &reply);
+ if (r < 0) {
+
+ if (sd_bus_error_has_name(&error, BUS_ERROR_HOME_NOT_ACTIVE))
+ /* Only on RefHome(): We can't access the home directory currently, unless
+ * it's unlocked with a password. Hence, let's try this again, this time with
+ * authentication. */
+ home_not_active = true;
+ else if (sd_bus_error_has_name(&error, BUS_ERROR_HOME_LOCKED))
+ home_locked = true; /* Similar */
+ else {
+ r = handle_generic_user_record_error(handle, ur->user_name, secret, r, &error);
+ if (r == PAM_CONV_ERR) {
+ /* Password/PIN prompts will fail in certain environments, for example when
+ * we are called from OpenSSH's account or session hooks, or in systemd's
+ * per-service PAM logic. In that case, print a friendly message and accept
+ * failure. */
+
+ if (home_not_active)
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "Home of user %s is currently not active, please log in locally first.", ur->user_name);
+ if (home_locked)
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "Home of user %s is currently locked, please unlock locally first.", ur->user_name);
+
+ pam_syslog(handle, please_authenticate ? LOG_ERR : LOG_DEBUG, "Failed to prompt for password/prompt.");
+
+ return home_not_active || home_locked ? PAM_PERM_DENIED : PAM_CONV_ERR;
+ }
+ if (r != PAM_SUCCESS)
+ return r;
+ }
+
+ } else {
+ int fd;
+
+ r = sd_bus_message_read(reply, "h", &fd);
+ if (r < 0)
+ return pam_bus_log_parse_error(handle, r);
+
+ acquired_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (acquired_fd < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to duplicate acquired fd: %s", bus_error_message(&error, r));
+ return PAM_SERVICE_ERR;
+ }
+
+ break;
+ }
+
+ if (++n_attempts >= 5) {
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "Too many unsuccessful login attempts for user %s, refusing.", ur->user_name);
+ pam_syslog(handle, LOG_ERR, "Failed to acquire home for user %s: %s", ur->user_name, bus_error_message(&error, r));
+ return PAM_MAXTRIES;
+ }
+
+ /* Try again, this time with authentication if we didn't do that before. */
+ do_auth = true;
+ }
+
+ /* Later PAM modules may need the auth token, but only during pam_authenticate. */
+ if (please_authenticate && !strv_isempty(secret->password)) {
+ r = pam_set_item(handle, PAM_AUTHTOK, *secret->password);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to set PAM auth token: %s", pam_strerror(handle, r));
+ return r;
+ }
+ }
+
+ r = pam_set_data(handle, fd_field, FD_TO_PTR(acquired_fd), cleanup_home_fd);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to set PAM bus data: %s", pam_strerror(handle, r));
+ return r;
+ }
+ TAKE_FD(acquired_fd);
+
+ if (do_auth) {
+ /* We likely just activated the home directory, let's flush out the user record, since a
+ * newer embedded user record might have been acquired from the activation. */
+
+ r = release_user_record(handle, ur->user_name);
+ if (!IN_SET(r, PAM_SUCCESS, PAM_NO_MODULE_DATA))
+ return r;
+ }
+
+ pam_syslog(handle, LOG_NOTICE, "Home for user %s successfully acquired.", ur->user_name);
+
+ return PAM_SUCCESS;
+}
+
+static int release_home_fd(pam_handle_t *handle, const char *username) {
+ _cleanup_free_ char *fd_field = NULL;
+ const void *home_fd_ptr = NULL;
+ int r;
+
+ assert(handle);
+ assert(username);
+
+ fd_field = strjoin("systemd-home-fd-", username);
+ if (!fd_field)
+ return pam_log_oom(handle);
+
+ r = pam_get_data(handle, fd_field, &home_fd_ptr);
+ if (r == PAM_NO_MODULE_DATA || (r == PAM_SUCCESS && PTR_TO_FD(home_fd_ptr) < 0))
+ return PAM_NO_MODULE_DATA;
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to retrieve PAM home reference fd: %s", pam_strerror(handle, r));
+ return r;
+ }
+
+ r = pam_set_data(handle, fd_field, NULL, NULL);
+ if (r != PAM_SUCCESS)
+ pam_syslog(handle, LOG_ERR, "Failed to release PAM home reference fd: %s", pam_strerror(handle, r));
+
+ return r;
+}
+
+_public_ PAM_EXTERN int pam_sm_authenticate(
+ pam_handle_t *handle,
+ int flags,
+ int argc, const char **argv) {
+
+ bool debug = false, suspend_please = false;
+
+ if (parse_env(handle, &suspend_please) < 0)
+ return PAM_AUTH_ERR;
+
+ if (parse_argv(handle,
+ argc, argv,
+ &suspend_please,
+ &debug) < 0)
+ return PAM_AUTH_ERR;
+
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "pam-systemd-homed authenticating");
+
+ return acquire_home(handle, /* please_authenticate= */ true, suspend_please, debug);
+}
+
+_public_ PAM_EXTERN int pam_sm_setcred(pam_handle_t *pamh, int flags, int argc, const char **argv) {
+ return PAM_SUCCESS;
+}
+
+_public_ PAM_EXTERN int pam_sm_open_session(
+ pam_handle_t *handle,
+ int flags,
+ int argc, const char **argv) {
+
+ bool debug = false, suspend_please = false;
+ int r;
+
+ if (parse_env(handle, &suspend_please) < 0)
+ return PAM_SESSION_ERR;
+
+ if (parse_argv(handle,
+ argc, argv,
+ &suspend_please,
+ &debug) < 0)
+ return PAM_SESSION_ERR;
+
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "pam-systemd-homed session start");
+
+ r = acquire_home(handle, /* please_authenticate = */ false, suspend_please, debug);
+ if (r == PAM_USER_UNKNOWN) /* Not managed by us? Don't complain. */
+ return PAM_SUCCESS;
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = pam_putenv(handle, "SYSTEMD_HOME=1");
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to set PAM environment variable $SYSTEMD_HOME: %s", pam_strerror(handle, r));
+ return r;
+ }
+
+ r = pam_putenv(handle, suspend_please ? "SYSTEMD_HOME_SUSPEND=1" : "SYSTEMD_HOME_SUSPEND=0");
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to set PAM environment variable $SYSTEMD_HOME_SUSPEND: %s", pam_strerror(handle, r));
+ return r;
+ }
+
+ /* Let's release the D-Bus connection, after all the session might live quite a long time, and we are
+ * not going to process the bus connection in that time, so let's better close before the daemon
+ * kicks us off because we are not processing anything. */
+ (void) pam_release_bus_connection(handle);
+ return PAM_SUCCESS;
+}
+
+_public_ PAM_EXTERN int pam_sm_close_session(
+ pam_handle_t *handle,
+ int flags,
+ int argc, const char **argv) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ const char *username = NULL;
+ bool debug = false;
+ int r;
+
+ if (parse_argv(handle,
+ argc, argv,
+ NULL,
+ &debug) < 0)
+ return PAM_SESSION_ERR;
+
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "pam-systemd-homed session end");
+
+ r = pam_get_user(handle, &username, NULL);
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to get user name: %s", pam_strerror(handle, r));
+ return r;
+ }
+
+ if (isempty(username)) {
+ pam_syslog(handle, LOG_ERR, "User name not set.");
+ return PAM_SERVICE_ERR;
+ }
+
+ /* Let's explicitly drop the reference to the homed session, so that the subsequent ReleaseHome()
+ * call will be able to do its thing. */
+ r = release_home_fd(handle, username);
+ if (r == PAM_NO_MODULE_DATA) /* Nothing to do, we never acquired an fd */
+ return PAM_SUCCESS;
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = pam_acquire_bus_connection(handle, &bus);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = bus_message_new_method_call(bus, &m, bus_home_mgr, "ReleaseHome");
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+
+ r = sd_bus_message_append(m, "s", username);
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+
+ r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, NULL);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_HOME_BUSY))
+ pam_syslog(handle, LOG_NOTICE, "Not deactivating home directory of %s, as it is still used.", username);
+ else {
+ pam_syslog(handle, LOG_ERR, "Failed to release user home: %s", bus_error_message(&error, r));
+ return PAM_SESSION_ERR;
+ }
+ }
+
+ return PAM_SUCCESS;
+}
+
+_public_ PAM_EXTERN int pam_sm_acct_mgmt(
+ pam_handle_t *handle,
+ int flags,
+ int argc,
+ const char **argv) {
+
+ _cleanup_(user_record_unrefp) UserRecord *ur = NULL;
+ bool debug = false, please_suspend = false;
+ usec_t t;
+ int r;
+
+ if (parse_env(handle, &please_suspend) < 0)
+ return PAM_AUTH_ERR;
+
+ if (parse_argv(handle,
+ argc, argv,
+ &please_suspend,
+ &debug) < 0)
+ return PAM_AUTH_ERR;
+
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "pam-systemd-homed account management");
+
+ r = acquire_home(handle, /* please_authenticate = */ false, please_suspend, debug);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = acquire_user_record(handle, NULL, &ur);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = user_record_test_blocked(ur);
+ switch (r) {
+
+ case -ESTALE:
+ pam_syslog(handle, LOG_WARNING, "User record for '%s' is newer than current system time, assuming incorrect system clock, allowing access.", ur->user_name);
+ break;
+
+ case -ENOLCK:
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "User record is blocked, prohibiting access.");
+ return PAM_ACCT_EXPIRED;
+
+ case -EL2HLT:
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "User record is not valid yet, prohibiting access.");
+ return PAM_ACCT_EXPIRED;
+
+ case -EL3HLT:
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "User record is not valid anymore, prohibiting access.");
+ return PAM_ACCT_EXPIRED;
+
+ default:
+ if (r < 0) {
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "User record not valid, prohibiting access.");
+ return PAM_ACCT_EXPIRED;
+ }
+
+ break;
+ }
+
+ t = user_record_ratelimit_next_try(ur);
+ if (t != USEC_INFINITY) {
+ usec_t n = now(CLOCK_REALTIME);
+
+ if (t > n) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "Too many logins, try again in %s.",
+ format_timespan(buf, sizeof(buf), t - n, USEC_PER_SEC));
+
+ return PAM_MAXTRIES;
+ }
+ }
+
+ r = user_record_test_password_change_required(ur);
+ switch (r) {
+
+ case -EKEYREVOKED:
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "Password change required.");
+ return PAM_NEW_AUTHTOK_REQD;
+
+ case -EOWNERDEAD:
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "Password expired, change requird.");
+ return PAM_NEW_AUTHTOK_REQD;
+
+ case -EKEYREJECTED:
+ /* Strictly speaking this is only about password expiration, and we might want to allow
+ * authentication via PKCS#11 or so, but let's ignore this fine distinction for now. */
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "Password is expired, but can't change, refusing login.");
+ return PAM_AUTHTOK_EXPIRED;
+
+ case -EKEYEXPIRED:
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "Password will expire soon, please change.");
+ break;
+
+ case -ESTALE:
+ /* If the system clock is wrong, let's log but continue */
+ pam_syslog(handle, LOG_WARNING, "Couldn't check if password change is required, last change is in the future, system clock likely wrong.");
+ break;
+
+ case -EROFS:
+ /* All good, just means the password if we wanted to change we couldn't, but we don't need to */
+ break;
+
+ default:
+ if (r < 0) {
+ (void) pam_prompt(handle, PAM_ERROR_MSG, NULL, "User record not valid, prohibiting access.");
+ return PAM_AUTHTOK_EXPIRED;
+ }
+
+ break;
+ }
+
+ return PAM_SUCCESS;
+}
+
+_public_ PAM_EXTERN int pam_sm_chauthtok(
+ pam_handle_t *handle,
+ int flags,
+ int argc,
+ const char **argv) {
+
+ _cleanup_(user_record_unrefp) UserRecord *ur = NULL, *old_secret = NULL, *new_secret = NULL;
+ const char *old_password = NULL, *new_password = NULL;
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ unsigned n_attempts = 0;
+ bool debug = false;
+ int r;
+
+ if (parse_argv(handle,
+ argc, argv,
+ NULL,
+ &debug) < 0)
+ return PAM_AUTH_ERR;
+
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "pam-systemd-homed account management");
+
+ r = pam_acquire_bus_connection(handle, &bus);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = acquire_user_record(handle, NULL, &ur);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ /* Start with cached credentials */
+ r = pam_get_item(handle, PAM_OLDAUTHTOK, (const void**) &old_password);
+ if (!IN_SET(r, PAM_BAD_ITEM, PAM_SUCCESS)) {
+ pam_syslog(handle, LOG_ERR, "Failed to get old password: %s", pam_strerror(handle, r));
+ return r;
+ }
+ r = pam_get_item(handle, PAM_AUTHTOK, (const void**) &new_password);
+ if (!IN_SET(r, PAM_BAD_ITEM, PAM_SUCCESS)) {
+ pam_syslog(handle, LOG_ERR, "Failed to get cached password: %s", pam_strerror(handle, r));
+ return r;
+ }
+
+ if (isempty(new_password)) {
+ /* No, it's not cached, then let's ask for the password and its verification, and cache
+ * it. */
+
+ r = pam_get_authtok_noverify(handle, &new_password, "New password: ");
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to get new password: %s", pam_strerror(handle, r));
+ return r;
+ }
+ if (isempty(new_password)) {
+ pam_syslog(handle, LOG_DEBUG, "Password request aborted.");
+ return PAM_AUTHTOK_ERR;
+ }
+
+ r = pam_get_authtok_verify(handle, &new_password, "new password: "); /* Lower case, since PAM prefixes 'Repeat' */
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to get password again: %s", pam_strerror(handle, r));
+ return r;
+ }
+
+ // FIXME: pam_pwquality will ask for the password a third time. It really shouldn't do
+ // that, and instead assume the password was already verified once when it is found to be
+ // cached already. needs to be fixed in pam_pwquality
+ }
+
+ /* Now everything is cached and checked, let's exit from the preliminary check */
+ if (FLAGS_SET(flags, PAM_PRELIM_CHECK))
+ return PAM_SUCCESS;
+
+ old_secret = user_record_new();
+ if (!old_secret)
+ return pam_log_oom(handle);
+
+ if (!isempty(old_password)) {
+ r = user_record_set_password(old_secret, STRV_MAKE(old_password), true);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to store old password: %s", strerror_safe(r));
+ return PAM_SERVICE_ERR;
+ }
+ }
+
+ new_secret = user_record_new();
+ if (!new_secret)
+ return pam_log_oom(handle);
+
+ r = user_record_set_password(new_secret, STRV_MAKE(new_password), true);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to store new password: %s", strerror_safe(r));
+ return PAM_SERVICE_ERR;
+ }
+
+ for (;;) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = bus_message_new_method_call(bus, &m, bus_home_mgr, "ChangePasswordHome");
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+
+ r = sd_bus_message_append(m, "s", ur->user_name);
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+
+ r = bus_message_append_secret(m, new_secret);
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+
+ r = bus_message_append_secret(m, old_secret);
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+
+ r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, NULL);
+ if (r < 0) {
+ r = handle_generic_user_record_error(handle, ur->user_name, old_secret, r, &error);
+ if (r == PAM_CONV_ERR) {
+ pam_syslog(handle, LOG_ERR, "Failed to prompt for password/prompt.");
+ return PAM_CONV_ERR;
+ }
+ if (r != PAM_SUCCESS)
+ return r;
+ } else {
+ pam_syslog(handle, LOG_NOTICE, "Successfully changed password for user %s.", ur->user_name);
+ return PAM_SUCCESS;
+ }
+
+ if (++n_attempts >= 5)
+ break;
+
+ /* Try again */
+ };
+
+ pam_syslog(handle, LOG_NOTICE, "Failed to change password for user %s: %m", ur->user_name);
+ return PAM_MAXTRIES;
+}
diff --git a/src/home/pam_systemd_home.sym b/src/home/pam_systemd_home.sym
new file mode 100644
index 0000000..293c06f
--- /dev/null
+++ b/src/home/pam_systemd_home.sym
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+{
+global:
+ pam_sm_authenticate;
+ pam_sm_setcred;
+ pam_sm_open_session;
+ pam_sm_close_session;
+ pam_sm_acct_mgmt;
+ pam_sm_chauthtok;
+local: *;
+};
diff --git a/src/home/test-modhex.c b/src/home/test-modhex.c
new file mode 100644
index 0000000..1bd9061
--- /dev/null
+++ b/src/home/test-modhex.c
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "modhex.h"
+#include "alloc-util.h"
+#include "string-util.h"
+
+static void test_normalize_recovery_key(const char *t, const char *expected) {
+ _cleanup_free_ char *z = NULL;
+ int r;
+
+ assert(t);
+
+ r = normalize_recovery_key(t, &z);
+ assert_se(expected ?
+ (r >= 0 && streq(z, expected)) :
+ (r == -EINVAL && z == NULL));
+}
+
+int main(int argc, char *arv[]) {
+
+ test_normalize_recovery_key("iefgcelh-biduvkjv-cjvuncnk-vlfchdid-jhtuhhde-urkllkeg-ilkjgbrt-hjkbgktj",
+ "iefgcelh-biduvkjv-cjvuncnk-vlfchdid-jhtuhhde-urkllkeg-ilkjgbrt-hjkbgktj");
+
+ test_normalize_recovery_key("iefgcelhbiduvkjvcjvuncnkvlfchdidjhtuhhdeurkllkegilkjgbrthjkbgktj",
+ "iefgcelh-biduvkjv-cjvuncnk-vlfchdid-jhtuhhde-urkllkeg-ilkjgbrt-hjkbgktj");
+
+ test_normalize_recovery_key("IEFGCELH-BIDUVKJV-CJVUNCNK-VLFCHDID-JHTUHHDE-URKLLKEG-ILKJGBRT-HJKBGKTJ",
+ "iefgcelh-biduvkjv-cjvuncnk-vlfchdid-jhtuhhde-urkllkeg-ilkjgbrt-hjkbgktj");
+
+ test_normalize_recovery_key("IEFGCELHBIDUVKJVCJVUNCNKVLFCHDIDJHTUHHDEURKLLKEGILKJGBRTHJKBGKTJ",
+ "iefgcelh-biduvkjv-cjvuncnk-vlfchdid-jhtuhhde-urkllkeg-ilkjgbrt-hjkbgktj");
+
+ test_normalize_recovery_key("Iefgcelh-Biduvkjv-Cjvuncnk-Vlfchdid-Jhtuhhde-Urkllkeg-Ilkjgbrt-Hjkbgktj",
+ "iefgcelh-biduvkjv-cjvuncnk-vlfchdid-jhtuhhde-urkllkeg-ilkjgbrt-hjkbgktj");
+
+ test_normalize_recovery_key("Iefgcelhbiduvkjvcjvuncnkvlfchdidjhtuhhdeurkllkegilkjgbrthjkbgktj",
+ "iefgcelh-biduvkjv-cjvuncnk-vlfchdid-jhtuhhde-urkllkeg-ilkjgbrt-hjkbgktj");
+
+ test_normalize_recovery_key("iefgcelh-biduvkjv-cjvuncnk-vlfchdid-jhtuhhde-urkllkeg-ilkjgbrt-hjkbgkt", NULL);
+ test_normalize_recovery_key("iefgcelhbiduvkjvcjvuncnkvlfchdidjhtuhhdeurkllkegilkjgbrthjkbgkt", NULL);
+ test_normalize_recovery_key("IEFGCELHBIDUVKJVCJVUNCNKVLFCHDIDJHTUHHDEURKLLKEGILKJGBRTHJKBGKT", NULL);
+
+ test_normalize_recovery_key("xefgcelh-biduvkjv-cjvuncnk-vlfchdid-jhtuhhde-urkllkeg-ilkjgbrt-hjkbgktj", NULL);
+ test_normalize_recovery_key("Xefgcelh-biduvkjv-cjvuncnk-vlfchdid-jhtuhhde-urkllkeg-ilkjgbrt-hjkbgktj", NULL);
+ test_normalize_recovery_key("iefgcelh+biduvkjv-cjvuncnk-vlfchdid-jhtuhhde-urkllkeg-ilkjgbrt-hjkbgktj", NULL);
+ test_normalize_recovery_key("iefgcelhebiduvkjv-cjvuncnk-vlfchdid-jhtuhhde-urkllkeg-ilkjgbrt-hjkbgktj", NULL);
+
+ test_normalize_recovery_key("", NULL);
+
+ return 0;
+}
diff --git a/src/home/user-record-pwquality.c b/src/home/user-record-pwquality.c
new file mode 100644
index 0000000..23c3357
--- /dev/null
+++ b/src/home/user-record-pwquality.c
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-common-errors.h"
+#include "errno-util.h"
+#include "home-util.h"
+#include "libcrypt-util.h"
+#include "pwquality-util.h"
+#include "strv.h"
+#include "user-record-pwquality.h"
+#include "user-record-util.h"
+
+#if HAVE_PWQUALITY
+
+int user_record_quality_check_password(
+ UserRecord *hr,
+ UserRecord *secret,
+ sd_bus_error *error) {
+
+ _cleanup_(sym_pwquality_free_settingsp) pwquality_settings_t *pwq = NULL;
+ char buf[PWQ_MAX_ERROR_MESSAGE_LEN], **pp;
+ void *auxerror;
+ int r;
+
+ assert(hr);
+ assert(secret);
+
+ r = pwq_allocate_context(&pwq);
+ if (ERRNO_IS_NOT_SUPPORTED(r))
+ return 0;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to allocate libpwquality context: %m");
+
+ /* This is a bit more complex than one might think at first. pwquality_check() would like to know the
+ * old password to make security checks. We support arbitrary numbers of passwords however, hence we
+ * call the function once for each combination of old and new password. */
+
+ /* Iterate through all new passwords */
+ STRV_FOREACH(pp, secret->password) {
+ bool called = false;
+ char **old;
+
+ r = test_password_many(hr->hashed_password, *pp);
+ if (r < 0)
+ return r;
+ if (r == 0) /* This is an old password as it isn't listed in the hashedPassword field, skip it */
+ continue;
+
+ /* Check this password against all old passwords */
+ STRV_FOREACH(old, secret->password) {
+
+ if (streq(*pp, *old))
+ continue;
+
+ r = test_password_many(hr->hashed_password, *old);
+ if (r < 0)
+ return r;
+ if (r > 0) /* This is a new password, not suitable as old password */
+ continue;
+
+ r = sym_pwquality_check(pwq, *pp, *old, hr->user_name, &auxerror);
+ if (r < 0)
+ return sd_bus_error_setf(error, BUS_ERROR_LOW_PASSWORD_QUALITY, "Password too weak: %s",
+ sym_pwquality_strerror(buf, sizeof(buf), r, auxerror));
+
+ called = true;
+ }
+
+ if (called)
+ continue;
+
+ /* If there are no old passwords, let's call pwquality_check() without any. */
+ r = sym_pwquality_check(pwq, *pp, NULL, hr->user_name, &auxerror);
+ if (r < 0)
+ return sd_bus_error_setf(error, BUS_ERROR_LOW_PASSWORD_QUALITY, "Password too weak: %s",
+ sym_pwquality_strerror(buf, sizeof(buf), r, auxerror));
+ }
+
+ return 1;
+}
+
+#else
+
+int user_record_quality_check_password(
+ UserRecord *hr,
+ UserRecord *secret,
+ sd_bus_error *error) {
+
+ return 0;
+}
+
+#endif
diff --git a/src/home/user-record-pwquality.h b/src/home/user-record-pwquality.h
new file mode 100644
index 0000000..b3b2690
--- /dev/null
+++ b/src/home/user-record-pwquality.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+#include "user-record.h"
+
+int user_record_quality_check_password(UserRecord *hr, UserRecord *secret, sd_bus_error *error);
diff --git a/src/home/user-record-sign.c b/src/home/user-record-sign.c
new file mode 100644
index 0000000..8cd3a46
--- /dev/null
+++ b/src/home/user-record-sign.c
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <openssl/pem.h>
+
+#include "fd-util.h"
+#include "user-record-sign.h"
+#include "fileio.h"
+
+static int user_record_signable_json(UserRecord *ur, char **ret) {
+ _cleanup_(user_record_unrefp) UserRecord *reduced = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *j = NULL;
+ int r;
+
+ assert(ur);
+ assert(ret);
+
+ r = user_record_clone(ur, USER_RECORD_REQUIRE_REGULAR|USER_RECORD_ALLOW_PRIVILEGED|USER_RECORD_ALLOW_PER_MACHINE|USER_RECORD_STRIP_SECRET|USER_RECORD_STRIP_BINDING|USER_RECORD_STRIP_STATUS|USER_RECORD_STRIP_SIGNATURE, &reduced);
+ if (r < 0)
+ return r;
+
+ j = json_variant_ref(reduced->json);
+
+ r = json_variant_normalize(&j);
+ if (r < 0)
+ return r;
+
+ return json_variant_format(j, 0, ret);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(EVP_MD_CTX*, EVP_MD_CTX_free);
+
+int user_record_sign(UserRecord *ur, EVP_PKEY *private_key, UserRecord **ret) {
+ _cleanup_(json_variant_unrefp) JsonVariant *encoded = NULL, *v = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *signed_ur = NULL;
+ _cleanup_(EVP_MD_CTX_freep) EVP_MD_CTX *md_ctx = NULL;
+ _cleanup_free_ char *text = NULL, *key = NULL;
+ size_t signature_size = 0, key_size = 0;
+ _cleanup_free_ void *signature = NULL;
+ _cleanup_fclose_ FILE *mf = NULL;
+ int r;
+
+ assert(ur);
+ assert(private_key);
+ assert(ret);
+
+ r = user_record_signable_json(ur, &text);
+ if (r < 0)
+ return r;
+
+ md_ctx = EVP_MD_CTX_new();
+ if (!md_ctx)
+ return -ENOMEM;
+
+ if (EVP_DigestSignInit(md_ctx, NULL, NULL, NULL, private_key) <= 0)
+ return -EIO;
+
+ /* Request signature size */
+ if (EVP_DigestSign(md_ctx, NULL, &signature_size, (uint8_t*) text, strlen(text)) <= 0)
+ return -EIO;
+
+ signature = malloc(signature_size);
+ if (!signature)
+ return -ENOMEM;
+
+ if (EVP_DigestSign(md_ctx, signature, &signature_size, (uint8_t*) text, strlen(text)) <= 0)
+ return -EIO;
+
+ mf = open_memstream_unlocked(&key, &key_size);
+ if (!mf)
+ return -ENOMEM;
+
+ if (PEM_write_PUBKEY(mf, private_key) <= 0)
+ return -EIO;
+
+ r = fflush_and_check(mf);
+ if (r < 0)
+ return r;
+
+ r = json_build(&encoded, JSON_BUILD_ARRAY(
+ JSON_BUILD_OBJECT(JSON_BUILD_PAIR("data", JSON_BUILD_BASE64(signature, signature_size)),
+ JSON_BUILD_PAIR("key", JSON_BUILD_STRING(key)))));
+ if (r < 0)
+ return r;
+
+ v = json_variant_ref(ur->json);
+
+ r = json_variant_set_field(&v, "signature", encoded);
+ if (r < 0)
+ return r;
+
+ if (DEBUG_LOGGING)
+ json_variant_dump(v, JSON_FORMAT_PRETTY|JSON_FORMAT_COLOR_AUTO, NULL, NULL);
+
+ signed_ur = user_record_new();
+ if (!signed_ur)
+ return log_oom();
+
+ r = user_record_load(signed_ur, v, USER_RECORD_LOAD_FULL);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(signed_ur);
+ return 0;
+}
+
+int user_record_verify(UserRecord *ur, EVP_PKEY *public_key) {
+ _cleanup_free_ char *text = NULL;
+ unsigned n_good = 0, n_bad = 0;
+ JsonVariant *array, *e;
+ int r;
+
+ assert(ur);
+ assert(public_key);
+
+ array = json_variant_by_key(ur->json, "signature");
+ if (!array)
+ return USER_RECORD_UNSIGNED;
+
+ if (!json_variant_is_array(array))
+ return -EINVAL;
+
+ if (json_variant_elements(array) == 0)
+ return USER_RECORD_UNSIGNED;
+
+ r = user_record_signable_json(ur, &text);
+ if (r < 0)
+ return r;
+
+ JSON_VARIANT_ARRAY_FOREACH(e, array) {
+ _cleanup_(EVP_MD_CTX_freep) EVP_MD_CTX *md_ctx = NULL;
+ _cleanup_free_ void *signature = NULL;
+ size_t signature_size = 0;
+ JsonVariant *data;
+
+ if (!json_variant_is_object(e))
+ return -EINVAL;
+
+ data = json_variant_by_key(e, "data");
+ if (!data)
+ return -EINVAL;
+
+ r = json_variant_unbase64(data, &signature, &signature_size);
+ if (r < 0)
+ return r;
+
+ md_ctx = EVP_MD_CTX_new();
+ if (!md_ctx)
+ return -ENOMEM;
+
+ if (EVP_DigestVerifyInit(md_ctx, NULL, NULL, NULL, public_key) <= 0)
+ return -EIO;
+
+ if (EVP_DigestVerify(md_ctx, signature, signature_size, (uint8_t*) text, strlen(text)) <= 0) {
+ n_bad ++;
+ continue;
+ }
+
+ n_good ++;
+ }
+
+ return n_good > 0 ? (n_bad == 0 ? USER_RECORD_SIGNED_EXCLUSIVE : USER_RECORD_SIGNED) :
+ (n_bad == 0 ? USER_RECORD_UNSIGNED : USER_RECORD_FOREIGN);
+}
+
+int user_record_has_signature(UserRecord *ur) {
+ JsonVariant *array;
+
+ array = json_variant_by_key(ur->json, "signature");
+ if (!array)
+ return false;
+
+ if (!json_variant_is_array(array))
+ return -EINVAL;
+
+ return json_variant_elements(array) > 0;
+}
diff --git a/src/home/user-record-sign.h b/src/home/user-record-sign.h
new file mode 100644
index 0000000..87c6813
--- /dev/null
+++ b/src/home/user-record-sign.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <openssl/evp.h>
+
+#include "user-record.h"
+
+int user_record_sign(UserRecord *ur, EVP_PKEY *private_key, UserRecord **ret);
+
+enum {
+ USER_RECORD_UNSIGNED, /* user record has no signature */
+ USER_RECORD_SIGNED_EXCLUSIVE, /* user record has only a signature by our own key */
+ USER_RECORD_SIGNED, /* user record is signed by us, but by others too */
+ USER_RECORD_FOREIGN, /* user record is not signed by us, but by others */
+};
+
+int user_record_verify(UserRecord *ur, EVP_PKEY *public_key);
+
+int user_record_has_signature(UserRecord *ur);
diff --git a/src/home/user-record-util.c b/src/home/user-record-util.c
new file mode 100644
index 0000000..6bcbb56
--- /dev/null
+++ b/src/home/user-record-util.c
@@ -0,0 +1,1366 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/xattr.h>
+
+#include "errno-util.h"
+#include "home-util.h"
+#include "id128-util.h"
+#include "libcrypt-util.h"
+#include "memory-util.h"
+#include "modhex.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "stat-util.h"
+#include "user-record-util.h"
+#include "user-util.h"
+
+int user_record_synthesize(
+ UserRecord *h,
+ const char *user_name,
+ const char *realm,
+ const char *image_path,
+ UserStorage storage,
+ uid_t uid,
+ gid_t gid) {
+
+ _cleanup_free_ char *hd = NULL, *un = NULL, *ip = NULL, *rr = NULL, *user_name_and_realm = NULL;
+ char smid[SD_ID128_STRING_MAX];
+ sd_id128_t mid;
+ int r;
+
+ assert(h);
+ assert(user_name);
+ assert(image_path);
+ assert(IN_SET(storage, USER_LUKS, USER_SUBVOLUME, USER_FSCRYPT, USER_DIRECTORY));
+ assert(uid_is_valid(uid));
+ assert(gid_is_valid(gid));
+
+ /* Fill in a home record from just a username and an image path. */
+
+ if (h->json)
+ return -EBUSY;
+
+ if (!suitable_user_name(user_name))
+ return -EINVAL;
+
+ if (realm) {
+ r = suitable_realm(realm);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+ }
+
+ if (!suitable_image_path(image_path))
+ return -EINVAL;
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return r;
+
+ un = strdup(user_name);
+ if (!un)
+ return -ENOMEM;
+
+ if (realm) {
+ rr = strdup(realm);
+ if (!rr)
+ return -ENOMEM;
+
+ user_name_and_realm = strjoin(user_name, "@", realm);
+ if (!user_name_and_realm)
+ return -ENOMEM;
+ }
+
+ ip = strdup(image_path);
+ if (!ip)
+ return -ENOMEM;
+
+ hd = path_join("/home/", user_name);
+ if (!hd)
+ return -ENOMEM;
+
+ r = json_build(&h->json,
+ JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(user_name)),
+ JSON_BUILD_PAIR_CONDITION(!!rr, "realm", JSON_BUILD_STRING(realm)),
+ JSON_BUILD_PAIR("disposition", JSON_BUILD_STRING("regular")),
+ JSON_BUILD_PAIR("binding", JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR(sd_id128_to_string(mid, smid), JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("imagePath", JSON_BUILD_STRING(image_path)),
+ JSON_BUILD_PAIR("homeDirectory", JSON_BUILD_STRING(hd)),
+ JSON_BUILD_PAIR("storage", JSON_BUILD_STRING(user_storage_to_string(storage))),
+ JSON_BUILD_PAIR("uid", JSON_BUILD_UNSIGNED(uid)),
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(gid))))))));
+ if (r < 0)
+ return r;
+
+ free_and_replace(h->user_name, un);
+ free_and_replace(h->realm, rr);
+ free_and_replace(h->user_name_and_realm_auto, user_name_and_realm);
+ free_and_replace(h->image_path, ip);
+ free_and_replace(h->home_directory, hd);
+ h->storage = storage;
+ h->uid = uid;
+
+ h->mask = USER_RECORD_REGULAR|USER_RECORD_BINDING;
+ return 0;
+}
+
+int group_record_synthesize(GroupRecord *g, UserRecord *h) {
+ _cleanup_free_ char *un = NULL, *rr = NULL, *group_name_and_realm = NULL, *description = NULL;
+ char smid[SD_ID128_STRING_MAX];
+ sd_id128_t mid;
+ int r;
+
+ assert(g);
+ assert(h);
+
+ if (g->json)
+ return -EBUSY;
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return r;
+
+ un = strdup(h->user_name);
+ if (!un)
+ return -ENOMEM;
+
+ if (h->realm) {
+ rr = strdup(h->realm);
+ if (!rr)
+ return -ENOMEM;
+
+ group_name_and_realm = strjoin(un, "@", rr);
+ if (!group_name_and_realm)
+ return -ENOMEM;
+ }
+
+ description = strjoin("Primary Group of User ", un);
+ if (!description)
+ return -ENOMEM;
+
+ r = json_build(&g->json,
+ JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(un)),
+ JSON_BUILD_PAIR_CONDITION(!!rr, "realm", JSON_BUILD_STRING(rr)),
+ JSON_BUILD_PAIR("description", JSON_BUILD_STRING(description)),
+ JSON_BUILD_PAIR("binding", JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR(sd_id128_to_string(mid, smid), JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(user_record_gid(h))))))),
+ JSON_BUILD_PAIR_CONDITION(h->disposition >= 0, "disposition", JSON_BUILD_STRING(user_disposition_to_string(user_record_disposition(h)))),
+ JSON_BUILD_PAIR("status", JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR(sd_id128_to_string(mid, smid), JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("service", JSON_BUILD_STRING("io.systemd.Home"))))))));
+ if (r < 0)
+ return r;
+
+ free_and_replace(g->group_name, un);
+ free_and_replace(g->realm, rr);
+ free_and_replace(g->group_name_and_realm_auto, group_name_and_realm);
+ g->gid = user_record_gid(h);
+ g->disposition = h->disposition;
+
+ g->mask = USER_RECORD_REGULAR|USER_RECORD_BINDING;
+ return 0;
+}
+
+int user_record_reconcile(
+ UserRecord *host,
+ UserRecord *embedded,
+ UserReconcileMode mode,
+ UserRecord **ret) {
+
+ int r, result;
+
+ /* Reconciles the identity record stored on the host with the one embedded in a $HOME
+ * directory. Returns the following error codes:
+ *
+ * -EINVAL: one of the records not valid
+ * -REMCHG: identity records are not about the same user
+ * -ESTALE: embedded identity record is equally new or newer than supplied record
+ *
+ * Return the new record to use, which is either the embedded record updated with the host
+ * binding or the host record. In both cases the secret data is stripped. */
+
+ assert(host);
+ assert(embedded);
+
+ /* Make sure both records are initialized */
+ if (!host->json || !embedded->json)
+ return -EINVAL;
+
+ /* Ensure these records actually contain user data */
+ if (!(embedded->mask & host->mask & USER_RECORD_REGULAR))
+ return -EINVAL;
+
+ /* Make sure the user name and realm matches */
+ if (!user_record_compatible(host, embedded))
+ return -EREMCHG;
+
+ /* Embedded identities may not contain secrets or binding info*/
+ if ((embedded->mask & (USER_RECORD_SECRET|USER_RECORD_BINDING)) != 0)
+ return -EINVAL;
+
+ /* The embedded record checked out, let's now figure out which of the two identities we'll consider
+ * in effect from now on. We do this by checking the last change timestamp, and in doubt always let
+ * the embedded data win. */
+ if (host->last_change_usec != UINT64_MAX &&
+ (embedded->last_change_usec == UINT64_MAX || host->last_change_usec > embedded->last_change_usec))
+
+ /* The host version is definitely newer, either because it has a version at all and the
+ * embedded version doesn't or because it is numerically newer. */
+ result = USER_RECONCILE_HOST_WON;
+
+ else if (host->last_change_usec == embedded->last_change_usec) {
+
+ /* The nominal version number of the host and the embedded identity is the same. If so, let's
+ * verify that, and tell the caller if we are ignoring embedded data. */
+
+ r = user_record_masked_equal(host, embedded, USER_RECORD_REGULAR|USER_RECORD_PRIVILEGED|USER_RECORD_PER_MACHINE);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (mode == USER_RECONCILE_REQUIRE_NEWER)
+ return -ESTALE;
+
+ result = USER_RECONCILE_IDENTICAL;
+ } else
+ result = USER_RECONCILE_HOST_WON;
+ } else {
+ _cleanup_(json_variant_unrefp) JsonVariant *extended = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *merged = NULL;
+ JsonVariant *e;
+
+ /* The embedded version is newer */
+
+ if (mode == USER_RECONCILE_REQUIRE_NEWER_OR_EQUAL)
+ return -ESTALE;
+
+ /* Copy in the binding data */
+ extended = json_variant_ref(embedded->json);
+
+ e = json_variant_by_key(host->json, "binding");
+ if (e) {
+ r = json_variant_set_field(&extended, "binding", e);
+ if (r < 0)
+ return r;
+ }
+
+ merged = user_record_new();
+ if (!merged)
+ return -ENOMEM;
+
+ r = user_record_load(merged, extended, USER_RECORD_LOAD_MASK_SECRET);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(merged);
+ return USER_RECONCILE_EMBEDDED_WON; /* update */
+ }
+
+ /* Strip out secrets */
+ r = user_record_clone(host, USER_RECORD_LOAD_MASK_SECRET, ret);
+ if (r < 0)
+ return r;
+
+ return result;
+}
+
+int user_record_add_binding(
+ UserRecord *h,
+ UserStorage storage,
+ const char *image_path,
+ sd_id128_t partition_uuid,
+ sd_id128_t luks_uuid,
+ sd_id128_t fs_uuid,
+ const char *luks_cipher,
+ const char *luks_cipher_mode,
+ uint64_t luks_volume_key_size,
+ const char *file_system_type,
+ const char *home_directory,
+ uid_t uid,
+ gid_t gid) {
+
+ _cleanup_(json_variant_unrefp) JsonVariant *new_binding_entry = NULL, *binding = NULL;
+ char smid[SD_ID128_STRING_MAX], partition_uuids[ID128_UUID_STRING_MAX], luks_uuids[ID128_UUID_STRING_MAX], fs_uuids[ID128_UUID_STRING_MAX];
+ _cleanup_free_ char *ip = NULL, *hd = NULL, *ip_auto = NULL, *lc = NULL, *lcm = NULL, *fst = NULL;
+ sd_id128_t mid;
+ int r;
+
+ assert(h);
+
+ if (!h->json)
+ return -EUNATCH;
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return r;
+ sd_id128_to_string(mid, smid);
+
+ if (image_path) {
+ ip = strdup(image_path);
+ if (!ip)
+ return -ENOMEM;
+ } else if (!h->image_path && storage >= 0) {
+ r = user_record_build_image_path(storage, user_record_user_name_and_realm(h), &ip_auto);
+ if (r < 0)
+ return r;
+ }
+
+ if (home_directory) {
+ hd = strdup(home_directory);
+ if (!hd)
+ return -ENOMEM;
+ }
+
+ if (file_system_type) {
+ fst = strdup(file_system_type);
+ if (!fst)
+ return -ENOMEM;
+ }
+
+ if (luks_cipher) {
+ lc = strdup(luks_cipher);
+ if (!lc)
+ return -ENOMEM;
+ }
+
+ if (luks_cipher_mode) {
+ lcm = strdup(luks_cipher_mode);
+ if (!lcm)
+ return -ENOMEM;
+ }
+
+ r = json_build(&new_binding_entry,
+ JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR_CONDITION(!!image_path, "imagePath", JSON_BUILD_STRING(image_path)),
+ JSON_BUILD_PAIR_CONDITION(!sd_id128_is_null(partition_uuid), "partitionUuid", JSON_BUILD_STRING(id128_to_uuid_string(partition_uuid, partition_uuids))),
+ JSON_BUILD_PAIR_CONDITION(!sd_id128_is_null(luks_uuid), "luksUuid", JSON_BUILD_STRING(id128_to_uuid_string(luks_uuid, luks_uuids))),
+ JSON_BUILD_PAIR_CONDITION(!sd_id128_is_null(fs_uuid), "fileSystemUuid", JSON_BUILD_STRING(id128_to_uuid_string(fs_uuid, fs_uuids))),
+ JSON_BUILD_PAIR_CONDITION(!!luks_cipher, "luksCipher", JSON_BUILD_STRING(luks_cipher)),
+ JSON_BUILD_PAIR_CONDITION(!!luks_cipher_mode, "luksCipherMode", JSON_BUILD_STRING(luks_cipher_mode)),
+ JSON_BUILD_PAIR_CONDITION(luks_volume_key_size != UINT64_MAX, "luksVolumeKeySize", JSON_BUILD_UNSIGNED(luks_volume_key_size)),
+ JSON_BUILD_PAIR_CONDITION(!!file_system_type, "fileSystemType", JSON_BUILD_STRING(file_system_type)),
+ JSON_BUILD_PAIR_CONDITION(!!home_directory, "homeDirectory", JSON_BUILD_STRING(home_directory)),
+ JSON_BUILD_PAIR_CONDITION(uid_is_valid(uid), "uid", JSON_BUILD_UNSIGNED(uid)),
+ JSON_BUILD_PAIR_CONDITION(gid_is_valid(gid), "gid", JSON_BUILD_UNSIGNED(gid)),
+ JSON_BUILD_PAIR_CONDITION(storage >= 0, "storage", JSON_BUILD_STRING(user_storage_to_string(storage)))));
+ if (r < 0)
+ return r;
+
+ binding = json_variant_ref(json_variant_by_key(h->json, "binding"));
+ if (binding) {
+ _cleanup_(json_variant_unrefp) JsonVariant *be = NULL;
+
+ /* Merge the new entry with an old one, if that exists */
+ be = json_variant_ref(json_variant_by_key(binding, smid));
+ if (be) {
+ r = json_variant_merge(&be, new_binding_entry);
+ if (r < 0)
+ return r;
+
+ json_variant_unref(new_binding_entry);
+ new_binding_entry = TAKE_PTR(be);
+ }
+ }
+
+ r = json_variant_set_field(&binding, smid, new_binding_entry);
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field(&h->json, "binding", binding);
+ if (r < 0)
+ return r;
+
+ if (storage >= 0)
+ h->storage = storage;
+
+ if (ip)
+ free_and_replace(h->image_path, ip);
+ if (ip_auto)
+ free_and_replace(h->image_path_auto, ip_auto);
+
+ if (!sd_id128_is_null(partition_uuid))
+ h->partition_uuid = partition_uuid;
+
+ if (!sd_id128_is_null(luks_uuid))
+ h->luks_uuid = luks_uuid;
+
+ if (!sd_id128_is_null(fs_uuid))
+ h->file_system_uuid = fs_uuid;
+
+ if (lc)
+ free_and_replace(h->luks_cipher, lc);
+ if (lcm)
+ free_and_replace(h->luks_cipher_mode, lcm);
+ if (luks_volume_key_size != UINT64_MAX)
+ h->luks_volume_key_size = luks_volume_key_size;
+
+ if (fst)
+ free_and_replace(h->file_system_type, fst);
+ if (hd)
+ free_and_replace(h->home_directory, hd);
+
+ if (uid_is_valid(uid))
+ h->uid = uid;
+ if (gid_is_valid(gid))
+ h->gid = gid;
+
+ h->mask |= USER_RECORD_BINDING;
+ return 1;
+}
+
+int user_record_test_home_directory(UserRecord *h) {
+ const char *hd;
+ int r;
+
+ assert(h);
+
+ /* Returns one of USER_TEST_ABSENT, USER_TEST_MOUNTED, USER_TEST_EXISTS on success */
+
+ hd = user_record_home_directory(h);
+ if (!hd)
+ return -ENXIO;
+
+ r = is_dir(hd, false);
+ if (r == -ENOENT)
+ return USER_TEST_ABSENT;
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENOTDIR;
+
+ r = path_is_mount_point(hd, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return USER_TEST_MOUNTED;
+
+ /* If the image path and the home directory are identical, then it's OK if the directory is
+ * populated. */
+ if (IN_SET(user_record_storage(h), USER_CLASSIC, USER_DIRECTORY, USER_SUBVOLUME, USER_FSCRYPT)) {
+ const char *ip;
+
+ ip = user_record_image_path(h);
+ if (ip && path_equal(ip, hd))
+ return USER_TEST_EXISTS;
+ }
+
+ /* Otherwise it's not OK */
+ r = dir_is_empty(hd);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EBUSY;
+
+ return USER_TEST_EXISTS;
+}
+
+int user_record_test_home_directory_and_warn(UserRecord *h) {
+ int r;
+
+ assert(h);
+
+ r = user_record_test_home_directory(h);
+ if (r == -ENXIO)
+ return log_error_errno(r, "User record lacks home directory, refusing.");
+ if (r == -ENOTDIR)
+ return log_error_errno(r, "Home directory %s is not a directory, refusing.", user_record_home_directory(h));
+ if (r == -EBUSY)
+ return log_error_errno(r, "Home directory %s exists, is not mounted but populated, refusing.", user_record_home_directory(h));
+ if (r < 0)
+ return log_error_errno(r, "Failed to test whether the home directory %s exists: %m", user_record_home_directory(h));
+
+ return r;
+}
+
+int user_record_test_image_path(UserRecord *h) {
+ const char *ip;
+ struct stat st;
+
+ assert(h);
+
+ if (user_record_storage(h) == USER_CIFS)
+ return USER_TEST_UNDEFINED;
+
+ ip = user_record_image_path(h);
+ if (!ip)
+ return -ENXIO;
+
+ if (stat(ip, &st) < 0) {
+ if (errno == ENOENT)
+ return USER_TEST_ABSENT;
+
+ return -errno;
+ }
+
+ switch (user_record_storage(h)) {
+
+ case USER_LUKS:
+ if (S_ISREG(st.st_mode)) {
+ ssize_t n;
+ char x[2];
+
+ n = getxattr(ip, "user.home-dirty", x, sizeof(x));
+ if (n < 0) {
+ if (errno != ENODATA)
+ log_debug_errno(errno, "Unable to read dirty xattr off image file, ignoring: %m");
+
+ } else if (n == 1 && x[0] == '1')
+ return USER_TEST_DIRTY;
+
+ return USER_TEST_EXISTS;
+ }
+
+ if (S_ISBLK(st.st_mode)) {
+ /* For block devices we can't really be sure if the device referenced actually is the
+ * fs we look for or some other file system (think: what does /dev/sdb1 refer
+ * to?). Hence, let's return USER_TEST_MAYBE as an ambiguous return value for these
+ * case, except if the device path used is one of the paths that is based on a
+ * filesystem or partition UUID or label, because in those cases we can be sure we
+ * are referring to the right device. */
+
+ if (PATH_STARTSWITH_SET(ip,
+ "/dev/disk/by-uuid/",
+ "/dev/disk/by-partuuid/",
+ "/dev/disk/by-partlabel/",
+ "/dev/disk/by-label/"))
+ return USER_TEST_EXISTS;
+
+ return USER_TEST_MAYBE;
+ }
+
+ return -EBADFD;
+
+ case USER_CLASSIC:
+ case USER_DIRECTORY:
+ case USER_SUBVOLUME:
+ case USER_FSCRYPT:
+ if (S_ISDIR(st.st_mode))
+ return USER_TEST_EXISTS;
+
+ return -ENOTDIR;
+
+ default:
+ assert_not_reached("Unexpected record type");
+ }
+}
+
+int user_record_test_image_path_and_warn(UserRecord *h) {
+ int r;
+
+ assert(h);
+
+ r = user_record_test_image_path(h);
+ if (r == -ENXIO)
+ return log_error_errno(r, "User record lacks image path, refusing.");
+ if (r == -EBADFD)
+ return log_error_errno(r, "Image path %s is not a regular file or block device, refusing.", user_record_image_path(h));
+ if (r == -ENOTDIR)
+ return log_error_errno(r, "Image path %s is not a directory, refusing.", user_record_image_path(h));
+ if (r < 0)
+ return log_error_errno(r, "Failed to test whether image path %s exists: %m", user_record_image_path(h));
+
+ return r;
+}
+
+int user_record_test_password(UserRecord *h, UserRecord *secret) {
+ char **i;
+ int r;
+
+ assert(h);
+
+ /* Checks whether any of the specified passwords matches any of the hashed passwords of the entry */
+
+ if (strv_isempty(h->hashed_password))
+ return -ENXIO;
+
+ STRV_FOREACH(i, secret->password) {
+ r = test_password_many(h->hashed_password, *i);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+ }
+
+ return -ENOKEY;
+}
+
+int user_record_test_recovery_key(UserRecord *h, UserRecord *secret) {
+ char **i;
+ int r;
+
+ assert(h);
+
+ /* Checks whether any of the specified passwords matches any of the hashed recovery keys of the entry */
+
+ if (h->n_recovery_key == 0)
+ return -ENXIO;
+
+ STRV_FOREACH(i, secret->password) {
+ for (size_t j = 0; j < h->n_recovery_key; j++) {
+ _cleanup_(erase_and_freep) char *mangled = NULL;
+ const char *p;
+
+ if (streq(h->recovery_key[j].type, "modhex64")) {
+ /* If this key is for a modhex64 recovery key, then try to normalize the
+ * passphrase to make things more robust: that way the password becomes case
+ * insensitive and the dashes become optional. */
+
+ r = normalize_recovery_key(*i, &mangled);
+ if (r == -EINVAL) /* Not a valid modhex64 passphrase, don't bother */
+ continue;
+ if (r < 0)
+ return r;
+
+ p = mangled;
+ } else
+ p = *i; /* Unknown recovery key types process as is */
+
+ r = test_password_one(h->recovery_key[j].hashed_password, p);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+ }
+ }
+
+ return -ENOKEY;
+}
+
+int user_record_set_disk_size(UserRecord *h, uint64_t disk_size) {
+ _cleanup_(json_variant_unrefp) JsonVariant *new_per_machine = NULL, *midv = NULL, *midav = NULL, *ne = NULL;
+ _cleanup_free_ JsonVariant **array = NULL;
+ char smid[SD_ID128_STRING_MAX];
+ size_t idx = SIZE_MAX, n;
+ JsonVariant *per_machine;
+ sd_id128_t mid;
+ int r;
+
+ assert(h);
+
+ if (!h->json)
+ return -EUNATCH;
+
+ if (disk_size < USER_DISK_SIZE_MIN || disk_size > USER_DISK_SIZE_MAX)
+ return -ERANGE;
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return r;
+
+ sd_id128_to_string(mid, smid);
+
+ r = json_variant_new_string(&midv, smid);
+ if (r < 0)
+ return r;
+
+ r = json_variant_new_array(&midav, (JsonVariant*[]) { midv }, 1);
+ if (r < 0)
+ return r;
+
+ per_machine = json_variant_by_key(h->json, "perMachine");
+ if (per_machine) {
+ size_t i;
+
+ if (!json_variant_is_array(per_machine))
+ return -EINVAL;
+
+ n = json_variant_elements(per_machine);
+
+ array = new(JsonVariant*, n + 1);
+ if (!array)
+ return -ENOMEM;
+
+ for (i = 0; i < n; i++) {
+ JsonVariant *m;
+
+ array[i] = json_variant_by_index(per_machine, i);
+
+ if (!json_variant_is_object(array[i]))
+ return -EINVAL;
+
+ m = json_variant_by_key(array[i], "matchMachineId");
+ if (!m) {
+ /* No machineId field? Let's ignore this, but invalidate what we found so far */
+ idx = SIZE_MAX;
+ continue;
+ }
+
+ if (json_variant_equal(m, midv) ||
+ json_variant_equal(m, midav)) {
+ /* Matches exactly what we are looking for. Let's use this */
+ idx = i;
+ continue;
+ }
+
+ r = per_machine_id_match(m, JSON_PERMISSIVE);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ /* Also matches what we are looking for, but with a broader match. In this
+ * case let's ignore this entry, and add a new specific one to the end. */
+ idx = SIZE_MAX;
+ }
+
+ if (idx == SIZE_MAX)
+ idx = n++; /* Nothing suitable found, place new entry at end */
+ else
+ ne = json_variant_ref(array[idx]);
+
+ } else {
+ array = new(JsonVariant*, 1);
+ if (!array)
+ return -ENOMEM;
+
+ idx = 0;
+ n = 1;
+ }
+
+ if (!ne) {
+ r = json_variant_set_field(&ne, "matchMachineId", midav);
+ if (r < 0)
+ return r;
+ }
+
+ r = json_variant_set_field_unsigned(&ne, "diskSize", disk_size);
+ if (r < 0)
+ return r;
+
+ assert(idx < n);
+ array[idx] = ne;
+
+ r = json_variant_new_array(&new_per_machine, array, n);
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field(&h->json, "perMachine", new_per_machine);
+ if (r < 0)
+ return r;
+
+ h->disk_size = disk_size;
+ h->mask |= USER_RECORD_PER_MACHINE;
+ return 0;
+}
+
+int user_record_update_last_changed(UserRecord *h, bool with_password) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ usec_t n;
+ int r;
+
+ assert(h);
+
+ if (!h->json)
+ return -EUNATCH;
+
+ n = now(CLOCK_REALTIME);
+
+ /* refuse downgrading */
+ if (h->last_change_usec != UINT64_MAX && h->last_change_usec >= n)
+ return -ECHRNG;
+ if (h->last_password_change_usec != UINT64_MAX && h->last_password_change_usec >= n)
+ return -ECHRNG;
+
+ v = json_variant_ref(h->json);
+
+ r = json_variant_set_field_unsigned(&v, "lastChangeUSec", n);
+ if (r < 0)
+ return r;
+
+ if (with_password) {
+ r = json_variant_set_field_unsigned(&v, "lastPasswordChangeUSec", n);
+ if (r < 0)
+ return r;
+
+ h->last_password_change_usec = n;
+ }
+
+ h->last_change_usec = n;
+
+ json_variant_unref(h->json);
+ h->json = TAKE_PTR(v);
+
+ h->mask |= USER_RECORD_REGULAR;
+ return 0;
+}
+
+int user_record_make_hashed_password(UserRecord *h, char **secret, bool extend) {
+ _cleanup_(json_variant_unrefp) JsonVariant *priv = NULL;
+ _cleanup_strv_free_ char **np = NULL;
+ char **i;
+ int r;
+
+ assert(h);
+ assert(secret);
+
+ /* Initializes the hashed password list from the specified plaintext passwords */
+
+ if (extend) {
+ np = strv_copy(h->hashed_password);
+ if (!np)
+ return -ENOMEM;
+
+ strv_uniq(np);
+ }
+
+ STRV_FOREACH(i, secret) {
+ _cleanup_(erase_and_freep) char *hashed = NULL;
+
+ r = hash_password(*i, &hashed);
+ if (r < 0)
+ return r;
+
+ r = strv_consume(&np, TAKE_PTR(hashed));
+ if (r < 0)
+ return r;
+ }
+
+ priv = json_variant_ref(json_variant_by_key(h->json, "privileged"));
+
+ if (strv_isempty(np))
+ r = json_variant_filter(&priv, STRV_MAKE("hashedPassword"));
+ else {
+ _cleanup_(json_variant_unrefp) JsonVariant *new_array = NULL;
+
+ r = json_variant_new_array_strv(&new_array, np);
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field(&priv, "hashedPassword", new_array);
+ if (r < 0)
+ return r;
+ }
+
+ r = json_variant_set_field(&h->json, "privileged", priv);
+ if (r < 0)
+ return r;
+
+ strv_free_and_replace(h->hashed_password, np);
+
+ SET_FLAG(h->mask, USER_RECORD_PRIVILEGED, !json_variant_is_blank_object(priv));
+ return 0;
+}
+
+int user_record_set_hashed_password(UserRecord *h, char **hashed_password) {
+ _cleanup_(json_variant_unrefp) JsonVariant *priv = NULL;
+ _cleanup_strv_free_ char **copy = NULL;
+ int r;
+
+ assert(h);
+
+ priv = json_variant_ref(json_variant_by_key(h->json, "privileged"));
+
+ if (strv_isempty(hashed_password))
+ r = json_variant_filter(&priv, STRV_MAKE("hashedPassword"));
+ else {
+ _cleanup_(json_variant_unrefp) JsonVariant *array = NULL;
+
+ copy = strv_copy(hashed_password);
+ if (!copy)
+ return -ENOMEM;
+
+ strv_uniq(copy);
+
+ r = json_variant_new_array_strv(&array, copy);
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field(&priv, "hashedPassword", array);
+ }
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field(&h->json, "privileged", priv);
+ if (r < 0)
+ return r;
+
+ strv_free_and_replace(h->hashed_password, copy);
+
+ SET_FLAG(h->mask, USER_RECORD_PRIVILEGED, !json_variant_is_blank_object(priv));
+ return 0;
+}
+
+int user_record_set_password(UserRecord *h, char **password, bool prepend) {
+ _cleanup_(json_variant_unrefp) JsonVariant *w = NULL;
+ _cleanup_(strv_free_erasep) char **e = NULL;
+ int r;
+
+ assert(h);
+
+ if (prepend) {
+ e = strv_copy(password);
+ if (!e)
+ return -ENOMEM;
+
+ r = strv_extend_strv(&e, h->password, true);
+ if (r < 0)
+ return r;
+
+ strv_uniq(e);
+
+ if (strv_equal(h->password, e))
+ return 0;
+
+ } else {
+ if (strv_equal(h->password, password))
+ return 0;
+
+ e = strv_copy(password);
+ if (!e)
+ return -ENOMEM;
+
+ strv_uniq(e);
+ }
+
+ w = json_variant_ref(json_variant_by_key(h->json, "secret"));
+
+ if (strv_isempty(e))
+ r = json_variant_filter(&w, STRV_MAKE("password"));
+ else {
+ _cleanup_(json_variant_unrefp) JsonVariant *l = NULL;
+
+ r = json_variant_new_array_strv(&l, e);
+ if (r < 0)
+ return r;
+
+ json_variant_sensitive(l);
+
+ r = json_variant_set_field(&w, "password", l);
+ }
+ if (r < 0)
+ return r;
+
+ json_variant_sensitive(w);
+
+ r = json_variant_set_field(&h->json, "secret", w);
+ if (r < 0)
+ return r;
+
+ strv_free_and_replace(h->password, e);
+
+ SET_FLAG(h->mask, USER_RECORD_SECRET, !json_variant_is_blank_object(w));
+ return 0;
+}
+
+int user_record_set_token_pin(UserRecord *h, char **pin, bool prepend) {
+ _cleanup_(json_variant_unrefp) JsonVariant *w = NULL;
+ _cleanup_(strv_free_erasep) char **e = NULL;
+ int r;
+
+ assert(h);
+
+ if (prepend) {
+ e = strv_copy(pin);
+ if (!e)
+ return -ENOMEM;
+
+ r = strv_extend_strv(&e, h->token_pin, true);
+ if (r < 0)
+ return r;
+
+ strv_uniq(e);
+
+ if (strv_equal(h->token_pin, e))
+ return 0;
+
+ } else {
+ if (strv_equal(h->token_pin, pin))
+ return 0;
+
+ e = strv_copy(pin);
+ if (!e)
+ return -ENOMEM;
+
+ strv_uniq(e);
+ }
+
+ w = json_variant_ref(json_variant_by_key(h->json, "secret"));
+
+ if (strv_isempty(e))
+ r = json_variant_filter(&w, STRV_MAKE("tokenPin"));
+ else {
+ _cleanup_(json_variant_unrefp) JsonVariant *l = NULL;
+
+ r = json_variant_new_array_strv(&l, e);
+ if (r < 0)
+ return r;
+
+ json_variant_sensitive(l);
+
+ r = json_variant_set_field(&w, "tokenPin", l);
+ }
+ if (r < 0)
+ return r;
+
+ json_variant_sensitive(w);
+
+ r = json_variant_set_field(&h->json, "secret", w);
+ if (r < 0)
+ return r;
+
+ strv_free_and_replace(h->token_pin, e);
+
+ SET_FLAG(h->mask, USER_RECORD_SECRET, !json_variant_is_blank_object(w));
+ return 0;
+}
+
+int user_record_set_pkcs11_protected_authentication_path_permitted(UserRecord *h, int b) {
+ _cleanup_(json_variant_unrefp) JsonVariant *w = NULL;
+ int r;
+
+ assert(h);
+
+ w = json_variant_ref(json_variant_by_key(h->json, "secret"));
+
+ if (b < 0)
+ r = json_variant_filter(&w, STRV_MAKE("pkcs11ProtectedAuthenticationPathPermitted"));
+ else
+ r = json_variant_set_field_boolean(&w, "pkcs11ProtectedAuthenticationPathPermitted", b);
+ if (r < 0)
+ return r;
+
+ if (json_variant_is_blank_object(w))
+ r = json_variant_filter(&h->json, STRV_MAKE("secret"));
+ else {
+ json_variant_sensitive(w);
+
+ r = json_variant_set_field(&h->json, "secret", w);
+ }
+ if (r < 0)
+ return r;
+
+ h->pkcs11_protected_authentication_path_permitted = b;
+
+ SET_FLAG(h->mask, USER_RECORD_SECRET, !json_variant_is_blank_object(w));
+ return 0;
+}
+
+int user_record_set_fido2_user_presence_permitted(UserRecord *h, int b) {
+ _cleanup_(json_variant_unrefp) JsonVariant *w = NULL;
+ int r;
+
+ assert(h);
+
+ w = json_variant_ref(json_variant_by_key(h->json, "secret"));
+
+ if (b < 0)
+ r = json_variant_filter(&w, STRV_MAKE("fido2UserPresencePermitted"));
+ else
+ r = json_variant_set_field_boolean(&w, "fido2UserPresencePermitted", b);
+ if (r < 0)
+ return r;
+
+ if (json_variant_is_blank_object(w))
+ r = json_variant_filter(&h->json, STRV_MAKE("secret"));
+ else
+ r = json_variant_set_field(&h->json, "secret", w);
+ if (r < 0)
+ return r;
+
+ h->fido2_user_presence_permitted = b;
+
+ SET_FLAG(h->mask, USER_RECORD_SECRET, !json_variant_is_blank_object(w));
+ return 0;
+}
+
+static bool per_machine_entry_empty(JsonVariant *v) {
+ const char *k;
+ _unused_ JsonVariant *e;
+
+ JSON_VARIANT_OBJECT_FOREACH(k, e, v)
+ if (!STR_IN_SET(k, "matchMachineId", "matchHostname"))
+ return false;
+
+ return true;
+}
+
+int user_record_set_password_change_now(UserRecord *h, int b) {
+ _cleanup_(json_variant_unrefp) JsonVariant *w = NULL;
+ JsonVariant *per_machine;
+ int r;
+
+ assert(h);
+
+ w = json_variant_ref(h->json);
+
+ if (b < 0)
+ r = json_variant_filter(&w, STRV_MAKE("passwordChangeNow"));
+ else
+ r = json_variant_set_field_boolean(&w, "passwordChangeNow", b);
+ if (r < 0)
+ return r;
+
+ /* Also drop the field from all perMachine entries */
+ per_machine = json_variant_by_key(w, "perMachine");
+ if (per_machine) {
+ _cleanup_(json_variant_unrefp) JsonVariant *array = NULL;
+ JsonVariant *e;
+
+ JSON_VARIANT_ARRAY_FOREACH(e, per_machine) {
+ _cleanup_(json_variant_unrefp) JsonVariant *z = NULL;
+
+ if (!json_variant_is_object(e))
+ return -EINVAL;
+
+ z = json_variant_ref(e);
+
+ r = json_variant_filter(&z, STRV_MAKE("passwordChangeNow"));
+ if (r < 0)
+ return r;
+
+ if (per_machine_entry_empty(z))
+ continue;
+
+ r = json_variant_append_array(&array, z);
+ if (r < 0)
+ return r;
+ }
+
+ if (json_variant_is_blank_array(array))
+ r = json_variant_filter(&w, STRV_MAKE("perMachine"));
+ else
+ r = json_variant_set_field(&w, "perMachine", array);
+ if (r < 0)
+ return r;
+
+ SET_FLAG(h->mask, USER_RECORD_PER_MACHINE, !json_variant_is_blank_array(array));
+ }
+
+ json_variant_unref(h->json);
+ h->json = TAKE_PTR(w);
+
+ h->password_change_now = b;
+
+ return 0;
+}
+
+int user_record_merge_secret(UserRecord *h, UserRecord *secret) {
+ int r;
+
+ assert(h);
+
+ /* Merges the secrets from 'secret' into 'h'. */
+
+ r = user_record_set_password(h, secret->password, true);
+ if (r < 0)
+ return r;
+
+ r = user_record_set_token_pin(h, secret->token_pin, true);
+ if (r < 0)
+ return r;
+
+ if (secret->pkcs11_protected_authentication_path_permitted >= 0) {
+ r = user_record_set_pkcs11_protected_authentication_path_permitted(
+ h,
+ secret->pkcs11_protected_authentication_path_permitted);
+ if (r < 0)
+ return r;
+ }
+
+ if (secret->fido2_user_presence_permitted >= 0) {
+ r = user_record_set_fido2_user_presence_permitted(
+ h,
+ secret->fido2_user_presence_permitted);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int user_record_good_authentication(UserRecord *h) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL, *w = NULL, *z = NULL;
+ char buf[SD_ID128_STRING_MAX];
+ uint64_t counter, usec;
+ sd_id128_t mid;
+ int r;
+
+ assert(h);
+
+ switch (h->good_authentication_counter) {
+ case UINT64_MAX:
+ counter = 1;
+ break;
+ case UINT64_MAX-1:
+ counter = h->good_authentication_counter; /* saturate */
+ break;
+ default:
+ counter = h->good_authentication_counter + 1;
+ break;
+ }
+
+ usec = now(CLOCK_REALTIME);
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return r;
+
+ v = json_variant_ref(h->json);
+ w = json_variant_ref(json_variant_by_key(v, "status"));
+ z = json_variant_ref(json_variant_by_key(w, sd_id128_to_string(mid, buf)));
+
+ r = json_variant_set_field_unsigned(&z, "goodAuthenticationCounter", counter);
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field_unsigned(&z, "lastGoodAuthenticationUSec", usec);
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field(&w, buf, z);
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field(&v, "status", w);
+ if (r < 0)
+ return r;
+
+ json_variant_unref(h->json);
+ h->json = TAKE_PTR(v);
+
+ h->good_authentication_counter = counter;
+ h->last_good_authentication_usec = usec;
+
+ h->mask |= USER_RECORD_STATUS;
+ return 0;
+}
+
+int user_record_bad_authentication(UserRecord *h) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL, *w = NULL, *z = NULL;
+ char buf[SD_ID128_STRING_MAX];
+ uint64_t counter, usec;
+ sd_id128_t mid;
+ int r;
+
+ assert(h);
+
+ switch (h->bad_authentication_counter) {
+ case UINT64_MAX:
+ counter = 1;
+ break;
+ case UINT64_MAX-1:
+ counter = h->bad_authentication_counter; /* saturate */
+ break;
+ default:
+ counter = h->bad_authentication_counter + 1;
+ break;
+ }
+
+ usec = now(CLOCK_REALTIME);
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return r;
+
+ v = json_variant_ref(h->json);
+ w = json_variant_ref(json_variant_by_key(v, "status"));
+ z = json_variant_ref(json_variant_by_key(w, sd_id128_to_string(mid, buf)));
+
+ r = json_variant_set_field_unsigned(&z, "badAuthenticationCounter", counter);
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field_unsigned(&z, "lastBadAuthenticationUSec", usec);
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field(&w, buf, z);
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field(&v, "status", w);
+ if (r < 0)
+ return r;
+
+ json_variant_unref(h->json);
+ h->json = TAKE_PTR(v);
+
+ h->bad_authentication_counter = counter;
+ h->last_bad_authentication_usec = usec;
+
+ h->mask |= USER_RECORD_STATUS;
+ return 0;
+}
+
+int user_record_ratelimit(UserRecord *h) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL, *w = NULL, *z = NULL;
+ usec_t usec, new_ratelimit_begin_usec, new_ratelimit_count;
+ char buf[SD_ID128_STRING_MAX];
+ sd_id128_t mid;
+ int r;
+
+ assert(h);
+
+ usec = now(CLOCK_REALTIME);
+
+ if (h->ratelimit_begin_usec != UINT64_MAX && h->ratelimit_begin_usec > usec) {
+ /* Hmm, start-time is after the current time? If so, the RTC most likely doesn't work. */
+ new_ratelimit_begin_usec = usec;
+ new_ratelimit_count = 1;
+ log_debug("Rate limit timestamp is in the future, assuming incorrect system clock, resetting limit.");
+ } else if (h->ratelimit_begin_usec == UINT64_MAX ||
+ usec_add(h->ratelimit_begin_usec, user_record_ratelimit_interval_usec(h)) <= usec) {
+ /* Fresh start */
+ new_ratelimit_begin_usec = usec;
+ new_ratelimit_count = 1;
+ } else if (h->ratelimit_count < user_record_ratelimit_burst(h)) {
+ /* Count up */
+ new_ratelimit_begin_usec = h->ratelimit_begin_usec;
+ new_ratelimit_count = h->ratelimit_count + 1;
+ } else
+ /* Limit hit */
+ return 0;
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return r;
+
+ v = json_variant_ref(h->json);
+ w = json_variant_ref(json_variant_by_key(v, "status"));
+ z = json_variant_ref(json_variant_by_key(w, sd_id128_to_string(mid, buf)));
+
+ r = json_variant_set_field_unsigned(&z, "rateLimitBeginUSec", new_ratelimit_begin_usec);
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field_unsigned(&z, "rateLimitCount", new_ratelimit_count);
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field(&w, buf, z);
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field(&v, "status", w);
+ if (r < 0)
+ return r;
+
+ json_variant_unref(h->json);
+ h->json = TAKE_PTR(v);
+
+ h->ratelimit_begin_usec = new_ratelimit_begin_usec;
+ h->ratelimit_count = new_ratelimit_count;
+
+ h->mask |= USER_RECORD_STATUS;
+ return 1;
+}
+
+int user_record_is_supported(UserRecord *hr, sd_bus_error *error) {
+ assert(hr);
+
+ if (hr->disposition >= 0 && hr->disposition != USER_REGULAR)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Cannot manage anything but regular users.");
+
+ if (hr->storage >= 0 && !IN_SET(hr->storage, USER_LUKS, USER_DIRECTORY, USER_SUBVOLUME, USER_FSCRYPT, USER_CIFS))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "User record has storage type this service cannot manage.");
+
+ if (gid_is_valid(hr->gid) && hr->uid != (uid_t) hr->gid)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "User record has to have matching UID/GID fields.");
+
+ if (hr->service && !streq(hr->service, "io.systemd.Home"))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Not accepted with service not matching io.systemd.Home.");
+
+ return 0;
+}
diff --git a/src/home/user-record-util.h b/src/home/user-record-util.h
new file mode 100644
index 0000000..302e7a5
--- /dev/null
+++ b/src/home/user-record-util.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "user-record.h"
+#include "group-record.h"
+
+int user_record_synthesize(UserRecord *h, const char *user_name, const char *realm, const char *image_path, UserStorage storage, uid_t uid, gid_t gid);
+int group_record_synthesize(GroupRecord *g, UserRecord *u);
+
+typedef enum UserReconcileMode {
+ USER_RECONCILE_ANY,
+ USER_RECONCILE_REQUIRE_NEWER, /* host version must be newer than embedded version */
+ USER_RECONCILE_REQUIRE_NEWER_OR_EQUAL, /* similar, but may also be equal */
+ _USER_RECONCILE_MODE_MAX,
+ _USER_RECONCILE_MODE_INVALID = -1,
+} UserReconcileMode;
+
+enum { /* return values */
+ USER_RECONCILE_HOST_WON,
+ USER_RECONCILE_EMBEDDED_WON,
+ USER_RECONCILE_IDENTICAL,
+};
+
+int user_record_reconcile(UserRecord *host, UserRecord *embedded, UserReconcileMode mode, UserRecord **ret);
+int user_record_add_binding(UserRecord *h, UserStorage storage, const char *image_path, sd_id128_t partition_uuid, sd_id128_t luks_uuid, sd_id128_t fs_uuid, const char *luks_cipher, const char *luks_cipher_mode, uint64_t luks_volume_key_size, const char *file_system_type, const char *home_directory, uid_t uid, gid_t gid);
+
+/* Results of the two test functions below. */
+enum {
+ USER_TEST_UNDEFINED, /* Returned by user_record_test_image_path() if the storage type knows no image paths */
+ USER_TEST_ABSENT,
+ USER_TEST_EXISTS,
+ USER_TEST_DIRTY, /* Only applies to user_record_test_image_path(), when the image exists but is marked dirty */
+ USER_TEST_MOUNTED, /* Only applies to user_record_test_home_directory(), when the home directory exists. */
+ USER_TEST_MAYBE, /* Only applies to LUKS devices: block device exists, but we don't know if it's the right one */
+};
+
+int user_record_test_home_directory(UserRecord *h);
+int user_record_test_home_directory_and_warn(UserRecord *h);
+int user_record_test_image_path(UserRecord *h);
+int user_record_test_image_path_and_warn(UserRecord *h);
+
+int user_record_test_password(UserRecord *h, UserRecord *secret);
+int user_record_test_recovery_key(UserRecord *h, UserRecord *secret);
+
+int user_record_update_last_changed(UserRecord *h, bool with_password);
+int user_record_set_disk_size(UserRecord *h, uint64_t disk_size);
+int user_record_set_password(UserRecord *h, char **password, bool prepend);
+int user_record_make_hashed_password(UserRecord *h, char **password, bool extend);
+int user_record_set_hashed_password(UserRecord *h, char **hashed_password);
+int user_record_set_token_pin(UserRecord *h, char **pin, bool prepend);
+int user_record_set_pkcs11_protected_authentication_path_permitted(UserRecord *h, int b);
+int user_record_set_fido2_user_presence_permitted(UserRecord *h, int b);
+int user_record_set_password_change_now(UserRecord *h, int b);
+int user_record_merge_secret(UserRecord *h, UserRecord *secret);
+int user_record_good_authentication(UserRecord *h);
+int user_record_bad_authentication(UserRecord *h);
+int user_record_ratelimit(UserRecord *h);
+
+int user_record_is_supported(UserRecord *hr, sd_bus_error *error);
diff --git a/src/hostname/hostnamectl.c b/src/hostname/hostnamectl.c
new file mode 100644
index 0000000..0d39e91
--- /dev/null
+++ b/src/hostname/hostnamectl.c
@@ -0,0 +1,451 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <locale.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sd-bus.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "architecture.h"
+#include "bus-error.h"
+#include "bus-map-properties.h"
+#include "hostname-util.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "spawn-polkit-agent.h"
+#include "terminal-util.h"
+#include "util.h"
+#include "verbs.h"
+
+static bool arg_ask_password = true;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static char *arg_host = NULL;
+static bool arg_transient = false;
+static bool arg_pretty = false;
+static bool arg_static = false;
+
+typedef struct StatusInfo {
+ const char *hostname;
+ const char *static_hostname;
+ const char *pretty_hostname;
+ const char *icon_name;
+ const char *chassis;
+ const char *deployment;
+ const char *location;
+ const char *kernel_name;
+ const char *kernel_release;
+ const char *os_pretty_name;
+ const char *os_cpe_name;
+ const char *virtualization;
+ const char *architecture;
+ const char *home_url;
+} StatusInfo;
+
+static void print_status_info(StatusInfo *i) {
+ sd_id128_t mid = {}, bid = {};
+ int r;
+
+ assert(i);
+
+ printf(" Static hostname: %s\n", strna(i->static_hostname));
+
+ if (!isempty(i->pretty_hostname) &&
+ !streq_ptr(i->pretty_hostname, i->static_hostname))
+ printf(" Pretty hostname: %s\n", i->pretty_hostname);
+
+ if (!isempty(i->hostname) &&
+ !streq_ptr(i->hostname, i->static_hostname))
+ printf("Transient hostname: %s\n", i->hostname);
+
+ if (!isempty(i->icon_name))
+ printf(" Icon name: %s\n",
+ strna(i->icon_name));
+
+ if (!isempty(i->chassis))
+ printf(" Chassis: %s\n",
+ strna(i->chassis));
+
+ if (!isempty(i->deployment))
+ printf(" Deployment: %s\n", i->deployment);
+
+ if (!isempty(i->location))
+ printf(" Location: %s\n", i->location);
+
+ r = sd_id128_get_machine(&mid);
+ if (r >= 0)
+ printf(" Machine ID: " SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(mid));
+
+ r = sd_id128_get_boot(&bid);
+ if (r >= 0)
+ printf(" Boot ID: " SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(bid));
+
+ if (!isempty(i->virtualization))
+ printf(" Virtualization: %s\n", i->virtualization);
+
+ if (!isempty(i->os_pretty_name)) {
+ _cleanup_free_ char *formatted = NULL;
+ const char *t = i->os_pretty_name;
+
+ if (i->home_url) {
+ if (terminal_urlify(i->home_url, i->os_pretty_name, &formatted) >= 0)
+ t = formatted;
+ }
+
+ printf(" Operating System: %s\n", t);
+ }
+
+ if (!isempty(i->os_cpe_name))
+ printf(" CPE OS Name: %s\n", i->os_cpe_name);
+
+ if (!isempty(i->kernel_name) && !isempty(i->kernel_release))
+ printf(" Kernel: %s %s\n", i->kernel_name, i->kernel_release);
+
+ if (!isempty(i->architecture))
+ printf(" Architecture: %s\n", i->architecture);
+
+}
+
+static int show_one_name(sd_bus *bus, const char* attr) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *s;
+ int r;
+
+ r = sd_bus_get_property(
+ bus,
+ "org.freedesktop.hostname1",
+ "/org/freedesktop/hostname1",
+ "org.freedesktop.hostname1",
+ attr,
+ &error, &reply, "s");
+ if (r < 0)
+ return log_error_errno(r, "Could not get property: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "s", &s);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ printf("%s\n", s);
+
+ return 0;
+}
+
+static int show_all_names(sd_bus *bus, sd_bus_error *error) {
+ StatusInfo info = {};
+
+ static const struct bus_properties_map hostname_map[] = {
+ { "Hostname", "s", NULL, offsetof(StatusInfo, hostname) },
+ { "StaticHostname", "s", NULL, offsetof(StatusInfo, static_hostname) },
+ { "PrettyHostname", "s", NULL, offsetof(StatusInfo, pretty_hostname) },
+ { "IconName", "s", NULL, offsetof(StatusInfo, icon_name) },
+ { "Chassis", "s", NULL, offsetof(StatusInfo, chassis) },
+ { "Deployment", "s", NULL, offsetof(StatusInfo, deployment) },
+ { "Location", "s", NULL, offsetof(StatusInfo, location) },
+ { "KernelName", "s", NULL, offsetof(StatusInfo, kernel_name) },
+ { "KernelRelease", "s", NULL, offsetof(StatusInfo, kernel_release) },
+ { "OperatingSystemPrettyName", "s", NULL, offsetof(StatusInfo, os_pretty_name) },
+ { "OperatingSystemCPEName", "s", NULL, offsetof(StatusInfo, os_cpe_name) },
+ { "HomeURL", "s", NULL, offsetof(StatusInfo, home_url) },
+ {}
+ };
+
+ static const struct bus_properties_map manager_map[] = {
+ { "Virtualization", "s", NULL, offsetof(StatusInfo, virtualization) },
+ { "Architecture", "s", NULL, offsetof(StatusInfo, architecture) },
+ {}
+ };
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *host_message = NULL, *manager_message = NULL;
+ int r;
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.hostname1",
+ "/org/freedesktop/hostname1",
+ hostname_map,
+ 0,
+ error,
+ &host_message,
+ &info);
+ if (r < 0)
+ return r;
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ manager_map,
+ 0,
+ error,
+ &manager_message,
+ &info);
+
+ print_status_info(&info);
+
+ return r;
+}
+
+static int show_status(int argc, char **argv, void *userdata) {
+ sd_bus *bus = userdata;
+ int r;
+
+ if (arg_pretty || arg_static || arg_transient) {
+ const char *attr;
+
+ if (!!arg_static + !!arg_pretty + !!arg_transient > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot query more than one name type at a time");
+
+ attr = arg_pretty ? "PrettyHostname" :
+ arg_static ? "StaticHostname" : "Hostname";
+
+ return show_one_name(bus, attr);
+ } else {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ r = show_all_names(bus, &error);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query system properties: %s", bus_error_message(&error, r));
+
+ return 0;
+ }
+}
+
+static int set_simple_string(sd_bus *bus, const char *method, const char *value) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r = 0;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.hostname1",
+ "/org/freedesktop/hostname1",
+ "org.freedesktop.hostname1",
+ method,
+ &error, NULL,
+ "sb", value, arg_ask_password);
+ if (r < 0)
+ return log_error_errno(r, "Could not set property: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int set_hostname(int argc, char **argv, void *userdata) {
+ _cleanup_free_ char *h = NULL;
+ const char *hostname = argv[1];
+ sd_bus *bus = userdata;
+ int r;
+
+ if (!arg_pretty && !arg_static && !arg_transient)
+ arg_pretty = arg_static = arg_transient = true;
+
+ if (arg_pretty) {
+ const char *p;
+
+ /* If the passed hostname is already valid, then assume the user doesn't know anything about pretty
+ * hostnames, so let's unset the pretty hostname, and just set the passed hostname as static/dynamic
+ * hostname. */
+ if (arg_static && hostname_is_valid(hostname, true))
+ p = ""; /* No pretty hostname (as it is redundant), just a static one */
+ else
+ p = hostname; /* Use the passed name as pretty hostname */
+
+ r = set_simple_string(bus, "SetPrettyHostname", p);
+ if (r < 0)
+ return r;
+
+ /* Now that we set the pretty hostname, let's clean up the parameter and use that as static
+ * hostname. If the hostname was already valid as static hostname, this will only chop off the trailing
+ * dot if there is one. If it was not valid, then it will be made fully valid by truncating, dropping
+ * multiple dots, and dropping weird chars. Note that we clean the name up only if we also are
+ * supposed to set the pretty name. If the pretty name is not being set we assume the user knows what
+ * he does and pass the name as-is. */
+ h = strdup(hostname);
+ if (!h)
+ return log_oom();
+
+ hostname = hostname_cleanup(h); /* Use the cleaned up name as static hostname */
+ }
+
+ if (arg_static) {
+ r = set_simple_string(bus, "SetStaticHostname", hostname);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_transient) {
+ r = set_simple_string(bus, "SetHostname", hostname);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int set_icon_name(int argc, char **argv, void *userdata) {
+ return set_simple_string(userdata, "SetIconName", argv[1]);
+}
+
+static int set_chassis(int argc, char **argv, void *userdata) {
+ return set_simple_string(userdata, "SetChassis", argv[1]);
+}
+
+static int set_deployment(int argc, char **argv, void *userdata) {
+ return set_simple_string(userdata, "SetDeployment", argv[1]);
+}
+
+static int set_location(int argc, char **argv, void *userdata) {
+ return set_simple_string(userdata, "SetLocation", argv[1]);
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("hostnamectl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND ...\n\n"
+ "%sQuery or change system hostname.%s\n"
+ "\nCommands:\n"
+ " status Show current hostname settings\n"
+ " set-hostname NAME Set system hostname\n"
+ " set-icon-name NAME Set icon name for host\n"
+ " set-chassis NAME Set chassis type for host\n"
+ " set-deployment NAME Set deployment environment for host\n"
+ " set-location NAME Set location for host\n"
+ "\nOptions:\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-ask-password Do not prompt for password\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " --transient Only set transient hostname\n"
+ " --static Only set static hostname\n"
+ " --pretty Only set pretty hostname\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight()
+ , ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int verb_help(int argc, char **argv, void *userdata) {
+ return help();
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_ASK_PASSWORD,
+ ARG_TRANSIENT,
+ ARG_STATIC,
+ ARG_PRETTY
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "transient", no_argument, NULL, ARG_TRANSIENT },
+ { "static", no_argument, NULL, ARG_STATIC },
+ { "pretty", no_argument, NULL, ARG_PRETTY },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hH:M:", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case ARG_TRANSIENT:
+ arg_transient = true;
+ break;
+
+ case ARG_PRETTY:
+ arg_pretty = true;
+ break;
+
+ case ARG_STATIC:
+ arg_static = true;
+ break;
+
+ case ARG_NO_ASK_PASSWORD:
+ arg_ask_password = false;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int hostnamectl_main(sd_bus *bus, int argc, char *argv[]) {
+
+ static const Verb verbs[] = {
+ { "status", VERB_ANY, 1, VERB_DEFAULT, show_status },
+ { "set-hostname", 2, 2, 0, set_hostname },
+ { "set-icon-name", 2, 2, 0, set_icon_name },
+ { "set-chassis", 2, 2, 0, set_chassis },
+ { "set-deployment", 2, 2, 0, set_deployment },
+ { "set-location", 2, 2, 0, set_location },
+ { "help", VERB_ANY, VERB_ANY, 0, verb_help }, /* Not documented, but supported since it is created. */
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, bus);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_setup_cli();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = bus_connect_transport(arg_transport, arg_host, false, &bus);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ return hostnamectl_main(bus, argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/hostname/hostnamed.c b/src/hostname/hostnamed.c
new file mode 100644
index 0000000..a1794bd
--- /dev/null
+++ b/src/hostname/hostnamed.c
@@ -0,0 +1,1011 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/utsname.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-log-control-api.h"
+#include "bus-polkit.h"
+#include "def.h"
+#include "env-file-label.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fileio-label.h"
+#include "fileio.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "main-func.h"
+#include "missing_capability.h"
+#include "nscd-flush.h"
+#include "nulstr-util.h"
+#include "os-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "selinux-util.h"
+#include "service-util.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+#include "virt.h"
+
+#define VALID_DEPLOYMENT_CHARS (DIGITS LETTERS "-.:")
+
+enum {
+ /* Read from /etc/hostname */
+ PROP_STATIC_HOSTNAME,
+
+ /* Read from /etc/machine-info */
+ PROP_PRETTY_HOSTNAME,
+ PROP_ICON_NAME,
+ PROP_CHASSIS,
+ PROP_DEPLOYMENT,
+ PROP_LOCATION,
+
+ /* Read from /etc/os-release (or /usr/lib/os-release) */
+ PROP_OS_PRETTY_NAME,
+ PROP_OS_CPE_NAME,
+ PROP_OS_HOME_URL,
+ _PROP_MAX,
+ _PROP_INVALID = -1,
+};
+
+typedef struct Context {
+ char *data[_PROP_MAX];
+
+ struct stat etc_hostname_stat;
+ struct stat etc_os_release_stat;
+ struct stat etc_machine_info_stat;
+
+ Hashmap *polkit_registry;
+} Context;
+
+static void context_reset(Context *c, uint64_t mask) {
+ int p;
+
+ assert(c);
+
+ for (p = 0; p < _PROP_MAX; p++) {
+ if (!FLAGS_SET(mask, UINT64_C(1) << p))
+ continue;
+
+ c->data[p] = mfree(c->data[p]);
+ }
+}
+
+static void context_destroy(Context *c) {
+ assert(c);
+
+ context_reset(c, UINT64_MAX);
+ bus_verify_polkit_async_registry_free(c->polkit_registry);
+}
+
+static void context_read_etc_hostname(Context *c) {
+ struct stat current_stat = {};
+ int r;
+
+ assert(c);
+
+ if (stat("/etc/hostname", &current_stat) >= 0 &&
+ stat_inode_unmodified(&c->etc_hostname_stat, &current_stat))
+ return;
+
+ context_reset(c, UINT64_C(1) << PROP_STATIC_HOSTNAME);
+
+ r = read_etc_hostname(NULL, &c->data[PROP_STATIC_HOSTNAME]);
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Failed to read /etc/hostname, ignoring: %m");
+
+ c->etc_hostname_stat = current_stat;
+}
+
+static void context_read_machine_info(Context *c) {
+ struct stat current_stat = {};
+ int r;
+
+ assert(c);
+
+ if (stat("/etc/machine-info", &current_stat) >= 0 &&
+ stat_inode_unmodified(&c->etc_machine_info_stat, &current_stat))
+ return;
+
+ context_reset(c,
+ (UINT64_C(1) << PROP_PRETTY_HOSTNAME) |
+ (UINT64_C(1) << PROP_ICON_NAME) |
+ (UINT64_C(1) << PROP_CHASSIS) |
+ (UINT64_C(1) << PROP_DEPLOYMENT) |
+ (UINT64_C(1) << PROP_LOCATION));
+
+ r = parse_env_file(NULL, "/etc/machine-info",
+ "PRETTY_HOSTNAME", &c->data[PROP_PRETTY_HOSTNAME],
+ "ICON_NAME", &c->data[PROP_ICON_NAME],
+ "CHASSIS", &c->data[PROP_CHASSIS],
+ "DEPLOYMENT", &c->data[PROP_DEPLOYMENT],
+ "LOCATION", &c->data[PROP_LOCATION]);
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Failed to read /etc/machine-info, ignoring: %m");
+
+ c->etc_machine_info_stat = current_stat;
+}
+
+static void context_read_os_release(Context *c) {
+ struct stat current_stat = {};
+ int r;
+
+ assert(c);
+
+ if ((stat("/etc/os-release", &current_stat) >= 0 ||
+ stat("/usr/lib/os-release", &current_stat) >= 0) &&
+ stat_inode_unmodified(&c->etc_os_release_stat, &current_stat))
+ return;
+
+ context_reset(c,
+ (UINT64_C(1) << PROP_OS_PRETTY_NAME) |
+ (UINT64_C(1) << PROP_OS_CPE_NAME) |
+ (UINT64_C(1) << PROP_OS_HOME_URL));
+
+ r = parse_os_release(NULL,
+ "PRETTY_NAME", &c->data[PROP_OS_PRETTY_NAME],
+ "CPE_NAME", &c->data[PROP_OS_CPE_NAME],
+ "HOME_URL", &c->data[PROP_OS_HOME_URL],
+ NULL);
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Failed to read os-release file, ignoring: %m");
+
+ c->etc_os_release_stat = current_stat;
+}
+
+static bool valid_chassis(const char *chassis) {
+ assert(chassis);
+
+ return nulstr_contains(
+ "vm\0"
+ "container\0"
+ "desktop\0"
+ "laptop\0"
+ "convertible\0"
+ "server\0"
+ "tablet\0"
+ "handset\0"
+ "watch\0"
+ "embedded\0",
+ chassis);
+}
+
+static bool valid_deployment(const char *deployment) {
+ assert(deployment);
+
+ return in_charset(deployment, VALID_DEPLOYMENT_CHARS);
+}
+
+static const char* fallback_chassis(void) {
+ char *type;
+ unsigned t;
+ int v, r;
+
+ v = detect_virtualization();
+ if (v < 0)
+ log_debug_errno(v, "Failed to detect virtualization, ignoring: %m");
+ else if (VIRTUALIZATION_IS_VM(v))
+ return "vm";
+ else if (VIRTUALIZATION_IS_CONTAINER(v))
+ return "container";
+
+ r = read_one_line_file("/sys/class/dmi/id/chassis_type", &type);
+ if (r < 0) {
+ log_debug_errno(v, "Failed to read DMI chassis type, ignoring: %m");
+ goto try_acpi;
+ }
+
+ r = safe_atou(type, &t);
+ free(type);
+ if (r < 0) {
+ log_debug_errno(v, "Failed to parse DMI chassis type, ignoring: %m");
+ goto try_acpi;
+ }
+
+ /* We only list the really obvious cases here. The DMI data is unreliable enough, so let's not do any
+ additional guesswork on top of that.
+
+ See the SMBIOS Specification 3.0 section 7.4.1 for details about the values listed here:
+
+ https://www.dmtf.org/sites/default/files/standards/documents/DSP0134_3.0.0.pdf
+ */
+
+ switch (t) {
+
+ case 0x3: /* Desktop */
+ case 0x4: /* Low Profile Desktop */
+ case 0x6: /* Mini Tower */
+ case 0x7: /* Tower */
+ case 0xD: /* All in one (i.e. PC built into monitor) */
+ return "desktop";
+
+ case 0x8: /* Portable */
+ case 0x9: /* Laptop */
+ case 0xA: /* Notebook */
+ case 0xE: /* Sub Notebook */
+ return "laptop";
+
+ case 0xB: /* Hand Held */
+ return "handset";
+
+ case 0x11: /* Main Server Chassis */
+ case 0x1C: /* Blade */
+ case 0x1D: /* Blade Enclosure */
+ return "server";
+
+ case 0x1E: /* Tablet */
+ return "tablet";
+
+ case 0x1F: /* Convertible */
+ case 0x20: /* Detachable */
+ return "convertible";
+
+ default:
+ log_debug("Unhandled DMI chassis type 0x%02x, ignoring.", t);
+ }
+
+try_acpi:
+ r = read_one_line_file("/sys/firmware/acpi/pm_profile", &type);
+ if (r < 0) {
+ log_debug_errno(v, "Failed read ACPI PM profile, ignoring: %m");
+ return NULL;
+ }
+
+ r = safe_atou(type, &t);
+ free(type);
+ if (r < 0) {
+ log_debug_errno(v, "Failed parse ACPI PM profile, ignoring: %m");
+ return NULL;
+ }
+
+ /* We only list the really obvious cases here as the ACPI data is not really super reliable.
+ *
+ * See the ACPI 5.0 Spec Section 5.2.9.1 for details:
+ *
+ * http://www.acpi.info/DOWNLOADS/ACPIspec50.pdf
+ */
+
+ switch(t) {
+
+ case 1: /* Desktop */
+ case 3: /* Workstation */
+ case 6: /* Appliance PC */
+ return "desktop";
+
+ case 2: /* Mobile */
+ return "laptop";
+
+ case 4: /* Enterprise Server */
+ case 5: /* SOHO Server */
+ case 7: /* Performance Server */
+ return "server";
+
+ case 8: /* Tablet */
+ return "tablet";
+
+ default:
+ log_debug("Unhandled ACPI PM profile 0x%02x, ignoring.", t);
+ }
+
+ return NULL;
+}
+
+static char* context_fallback_icon_name(Context *c) {
+ const char *chassis;
+
+ assert(c);
+
+ if (!isempty(c->data[PROP_CHASSIS]))
+ return strjoin("computer-", c->data[PROP_CHASSIS]);
+
+ chassis = fallback_chassis();
+ if (chassis)
+ return strjoin("computer-", chassis);
+
+ return strdup("computer");
+}
+
+static bool hostname_is_useful(const char *hn) {
+ return !isempty(hn) && !is_localhost(hn);
+}
+
+static int context_update_kernel_hostname(
+ Context *c,
+ const char *transient_hn) {
+
+ const char *static_hn, *hn;
+ struct utsname u;
+
+ assert(c);
+
+ if (!transient_hn) {
+ /* If no transient hostname is passed in, then let's check what is currently set. */
+ assert_se(uname(&u) >= 0);
+ transient_hn =
+ isempty(u.nodename) || streq(u.nodename, "(none)") ? NULL : u.nodename;
+ }
+
+ static_hn = c->data[PROP_STATIC_HOSTNAME];
+
+ /* /etc/hostname with something other than "localhost"
+ * has the highest preference ... */
+ if (hostname_is_useful(static_hn))
+ hn = static_hn;
+
+ /* ... the transient hostname, (ie: DHCP) comes next ... */
+ else if (!isempty(transient_hn))
+ hn = transient_hn;
+
+ /* ... fallback to static "localhost.*" ignored above ... */
+ else if (!isempty(static_hn))
+ hn = static_hn;
+
+ /* ... and the ultimate fallback */
+ else
+ hn = FALLBACK_HOSTNAME;
+
+ if (sethostname_idempotent(hn) < 0)
+ return -errno;
+
+ (void) nscd_flush_cache(STRV_MAKE("hosts"));
+
+ return 0;
+}
+
+static int context_write_data_static_hostname(Context *c) {
+ assert(c);
+
+ if (isempty(c->data[PROP_STATIC_HOSTNAME])) {
+
+ if (unlink("/etc/hostname") < 0)
+ return errno == ENOENT ? 0 : -errno;
+
+ return 0;
+ }
+ return write_string_file_atomic_label("/etc/hostname", c->data[PROP_STATIC_HOSTNAME]);
+}
+
+static int context_write_data_machine_info(Context *c) {
+
+ static const char * const name[_PROP_MAX] = {
+ [PROP_PRETTY_HOSTNAME] = "PRETTY_HOSTNAME",
+ [PROP_ICON_NAME] = "ICON_NAME",
+ [PROP_CHASSIS] = "CHASSIS",
+ [PROP_DEPLOYMENT] = "DEPLOYMENT",
+ [PROP_LOCATION] = "LOCATION",
+ };
+
+ _cleanup_strv_free_ char **l = NULL;
+ int r, p;
+
+ assert(c);
+
+ r = load_env_file(NULL, "/etc/machine-info", &l);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ for (p = PROP_PRETTY_HOSTNAME; p <= PROP_LOCATION; p++) {
+ _cleanup_free_ char *t = NULL;
+ char **u;
+
+ assert(name[p]);
+
+ if (isempty(c->data[p])) {
+ strv_env_unset(l, name[p]);
+ continue;
+ }
+
+ t = strjoin(name[p], "=", c->data[p]);
+ if (!t)
+ return -ENOMEM;
+
+ u = strv_env_set(l, t);
+ if (!u)
+ return -ENOMEM;
+
+ strv_free_and_replace(l, u);
+ }
+
+ if (strv_isempty(l)) {
+ if (unlink("/etc/machine-info") < 0)
+ return errno == ENOENT ? 0 : -errno;
+
+ return 0;
+ }
+
+ return write_env_file_label("/etc/machine-info", l);
+}
+
+static int property_get_hostname(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *current = NULL;
+ int r;
+
+ r = gethostname_strict(&current);
+ if (r == -ENXIO)
+ return sd_bus_message_append(reply, "s", FALLBACK_HOSTNAME);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_append(reply, "s", current);
+}
+
+static int property_get_static_hostname(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Context *c = userdata;
+ assert(c);
+
+ context_read_etc_hostname(c);
+
+ return sd_bus_message_append(reply, "s", c->data[PROP_STATIC_HOSTNAME]);
+}
+
+static int property_get_machine_info_field(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ sd_bus_slot *slot;
+ Context *c;
+
+ /* Acquire the context object without this property's userdata offset added. Explanation: we want
+ * access to two pointers here: a) the main context object we cache all properties in, and b) the
+ * pointer to the property field inside the context object that we are supposed to update and
+ * use. The latter (b) we get in the 'userdata' function parameter, and sd-bus calculates that for us
+ * from the 'userdata' pointer we supplied when the vtable was registered, with the offset we
+ * specified in the vtable added on top. To get the former (a) we need the 'userdata' pointer from
+ * the vtable registration directly, without the offset added. Hence we ask sd-bus what the slot
+ * object is (which encapsulates the vtable registration), and then query the 'userdata' field
+ * directly off it. */
+ assert_se(slot = sd_bus_get_current_slot(bus));
+ assert_se(c = sd_bus_slot_get_userdata(slot));
+
+ context_read_machine_info(c);
+
+ return sd_bus_message_append(reply, "s", *(char**) userdata);
+}
+
+static int property_get_os_release_field(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ sd_bus_slot *slot;
+ Context *c;
+
+ /* As above, acquire the current context without this property's userdata offset added. */
+ assert_se(slot = sd_bus_get_current_slot(bus));
+ assert_se(c = sd_bus_slot_get_userdata(slot));
+
+ context_read_os_release(c);
+
+ return sd_bus_message_append(reply, "s", *(char**) userdata);
+}
+
+static int property_get_icon_name(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *n = NULL;
+ Context *c = userdata;
+ const char *name;
+
+ context_read_machine_info(c);
+
+ if (isempty(c->data[PROP_ICON_NAME]))
+ name = n = context_fallback_icon_name(c);
+ else
+ name = c->data[PROP_ICON_NAME];
+
+ if (!name)
+ return -ENOMEM;
+
+ return sd_bus_message_append(reply, "s", name);
+}
+
+static int property_get_chassis(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Context *c = userdata;
+ const char *name;
+
+ context_read_machine_info(c);
+
+ if (isempty(c->data[PROP_CHASSIS]))
+ name = fallback_chassis();
+ else
+ name = c->data[PROP_CHASSIS];
+
+ return sd_bus_message_append(reply, "s", name);
+}
+
+static int property_get_uname_field(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ struct utsname u;
+
+ assert_se(uname(&u) >= 0);
+
+ return sd_bus_message_append(reply, "s", (char*) &u + PTR_TO_SIZE(userdata));
+}
+
+static int method_set_hostname(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ Context *c = userdata;
+ const char *name;
+ int interactive, r;
+ struct utsname u;
+
+ assert(m);
+ assert(c);
+
+ r = sd_bus_message_read(m, "sb", &name, &interactive);
+ if (r < 0)
+ return r;
+
+ context_read_etc_hostname(c);
+
+ if (isempty(name))
+ name = c->data[PROP_STATIC_HOSTNAME];
+
+ if (isempty(name))
+ name = FALLBACK_HOSTNAME;
+
+ if (!hostname_is_valid(name, false))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid hostname '%s'", name);
+
+ assert_se(uname(&u) >= 0);
+ if (streq_ptr(name, u.nodename))
+ return sd_bus_reply_method_return(m, NULL);
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.hostname1.set-hostname",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = context_update_kernel_hostname(c, name);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set hostname: %m");
+ return sd_bus_error_set_errnof(error, r, "Failed to set hostname: %m");
+ }
+
+ log_info("Changed hostname to '%s'", name);
+
+ (void) sd_bus_emit_properties_changed(sd_bus_message_get_bus(m), "/org/freedesktop/hostname1", "org.freedesktop.hostname1", "Hostname", NULL);
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static int method_set_static_hostname(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ Context *c = userdata;
+ const char *name;
+ int interactive;
+ int r;
+
+ assert(m);
+ assert(c);
+
+ r = sd_bus_message_read(m, "sb", &name, &interactive);
+ if (r < 0)
+ return r;
+
+ name = empty_to_null(name);
+
+ context_read_etc_hostname(c);
+
+ if (streq_ptr(name, c->data[PROP_STATIC_HOSTNAME]))
+ return sd_bus_reply_method_return(m, NULL);
+
+ if (!isempty(name) && !hostname_is_valid(name, false))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid static hostname '%s'", name);
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.hostname1.set-static-hostname",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = free_and_strdup(&c->data[PROP_STATIC_HOSTNAME], name);
+ if (r < 0)
+ return r;
+
+ r = context_update_kernel_hostname(c, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set hostname: %m");
+ return sd_bus_error_set_errnof(error, r, "Failed to set hostname: %m");
+ }
+
+ r = context_write_data_static_hostname(c);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write static hostname: %m");
+ return sd_bus_error_set_errnof(error, r, "Failed to set static hostname: %m");
+ }
+
+ log_info("Changed static hostname to '%s'", strna(c->data[PROP_STATIC_HOSTNAME]));
+
+ (void) sd_bus_emit_properties_changed(sd_bus_message_get_bus(m), "/org/freedesktop/hostname1", "org.freedesktop.hostname1", "StaticHostname", NULL);
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static int set_machine_info(Context *c, sd_bus_message *m, int prop, sd_bus_message_handler_t cb, sd_bus_error *error) {
+ int interactive;
+ const char *name;
+ int r;
+
+ assert(c);
+ assert(m);
+
+ r = sd_bus_message_read(m, "sb", &name, &interactive);
+ if (r < 0)
+ return r;
+
+ name = empty_to_null(name);
+
+ context_read_machine_info(c);
+
+ if (streq_ptr(name, c->data[prop]))
+ return sd_bus_reply_method_return(m, NULL);
+
+ if (!isempty(name)) {
+ /* The icon name might ultimately be used as file
+ * name, so better be safe than sorry */
+
+ if (prop == PROP_ICON_NAME && !filename_is_valid(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid icon name '%s'", name);
+ if (prop == PROP_PRETTY_HOSTNAME && string_has_cc(name, NULL))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid pretty hostname '%s'", name);
+ if (prop == PROP_CHASSIS && !valid_chassis(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid chassis '%s'", name);
+ if (prop == PROP_DEPLOYMENT && !valid_deployment(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid deployment '%s'", name);
+ if (prop == PROP_LOCATION && string_has_cc(name, NULL))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid location '%s'", name);
+ }
+
+ /* Since the pretty hostname should always be changed at the
+ * same time as the static one, use the same policy action for
+ * both... */
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_ADMIN,
+ prop == PROP_PRETTY_HOSTNAME ? "org.freedesktop.hostname1.set-static-hostname" : "org.freedesktop.hostname1.set-machine-info",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = free_and_strdup(&c->data[prop], name);
+ if (r < 0)
+ return r;
+
+ r = context_write_data_machine_info(c);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write machine info: %m");
+ return sd_bus_error_set_errnof(error, r, "Failed to write machine info: %m");
+ }
+
+ log_info("Changed %s to '%s'",
+ prop == PROP_PRETTY_HOSTNAME ? "pretty hostname" :
+ prop == PROP_DEPLOYMENT ? "deployment" :
+ prop == PROP_LOCATION ? "location" :
+ prop == PROP_CHASSIS ? "chassis" : "icon name", strna(c->data[prop]));
+
+ (void) sd_bus_emit_properties_changed(
+ sd_bus_message_get_bus(m),
+ "/org/freedesktop/hostname1",
+ "org.freedesktop.hostname1",
+ prop == PROP_PRETTY_HOSTNAME ? "PrettyHostname" :
+ prop == PROP_DEPLOYMENT ? "Deployment" :
+ prop == PROP_LOCATION ? "Location" :
+ prop == PROP_CHASSIS ? "Chassis" : "IconName" , NULL);
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static int method_set_pretty_hostname(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ return set_machine_info(userdata, m, PROP_PRETTY_HOSTNAME, method_set_pretty_hostname, error);
+}
+
+static int method_set_icon_name(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ return set_machine_info(userdata, m, PROP_ICON_NAME, method_set_icon_name, error);
+}
+
+static int method_set_chassis(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ return set_machine_info(userdata, m, PROP_CHASSIS, method_set_chassis, error);
+}
+
+static int method_set_deployment(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ return set_machine_info(userdata, m, PROP_DEPLOYMENT, method_set_deployment, error);
+}
+
+static int method_set_location(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ return set_machine_info(userdata, m, PROP_LOCATION, method_set_location, error);
+}
+
+static int method_get_product_uuid(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Context *c = userdata;
+ bool has_uuid = false;
+ int interactive, r;
+ sd_id128_t uuid;
+
+ assert(m);
+ assert(c);
+
+ r = id128_read("/sys/class/dmi/id/product_uuid", ID128_UUID, &uuid);
+ if (r == -ENOENT)
+ r = id128_read("/sys/firmware/devicetree/base/vm,uuid", ID128_UUID, &uuid);
+ if (r < 0)
+ log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to read product UUID, ignoring: %m");
+ else if (sd_id128_is_null(uuid) || sd_id128_is_allf(uuid))
+ log_debug("DMI product UUID " SD_ID128_FORMAT_STR " is all 0x00 or all 0xFF, ignoring.", SD_ID128_FORMAT_VAL(uuid));
+ else
+ has_uuid = true;
+
+ if (!has_uuid)
+ return sd_bus_error_set(error, BUS_ERROR_NO_PRODUCT_UUID,
+ "Failed to read product UUID from firmware.");
+
+ r = sd_bus_message_read(m, "b", &interactive);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.hostname1.get-product-uuid",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', &uuid, sizeof(uuid));
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static const sd_bus_vtable hostname_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Hostname", "s", property_get_hostname, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("StaticHostname", "s", property_get_static_hostname, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("PrettyHostname", "s", property_get_machine_info_field, offsetof(Context, data) + sizeof(char*) * PROP_PRETTY_HOSTNAME, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("IconName", "s", property_get_icon_name, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Chassis", "s", property_get_chassis, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Deployment", "s", property_get_machine_info_field, offsetof(Context, data) + sizeof(char*) * PROP_DEPLOYMENT, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Location", "s", property_get_machine_info_field, offsetof(Context, data) + sizeof(char*) * PROP_LOCATION, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("KernelName", "s", property_get_uname_field, offsetof(struct utsname, sysname), SD_BUS_VTABLE_ABSOLUTE_OFFSET|SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KernelRelease", "s", property_get_uname_field, offsetof(struct utsname, release), SD_BUS_VTABLE_ABSOLUTE_OFFSET|SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KernelVersion", "s", property_get_uname_field, offsetof(struct utsname, version), SD_BUS_VTABLE_ABSOLUTE_OFFSET|SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("OperatingSystemPrettyName", "s", property_get_os_release_field, offsetof(Context, data) + sizeof(char*) * PROP_OS_PRETTY_NAME, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("OperatingSystemCPEName", "s", property_get_os_release_field, offsetof(Context, data) + sizeof(char*) * PROP_OS_CPE_NAME, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("HomeURL", "s", property_get_os_release_field, offsetof(Context, data) + sizeof(char*) * PROP_OS_HOME_URL, SD_BUS_VTABLE_PROPERTY_CONST),
+
+ SD_BUS_METHOD_WITH_NAMES("SetHostname",
+ "sb",
+ SD_BUS_PARAM(hostname)
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_set_hostname,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetStaticHostname",
+ "sb",
+ SD_BUS_PARAM(hostname)
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_set_static_hostname,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetPrettyHostname",
+ "sb",
+ SD_BUS_PARAM(hostname)
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_set_pretty_hostname,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetIconName",
+ "sb",
+ SD_BUS_PARAM(icon)
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_set_icon_name,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetChassis",
+ "sb",
+ SD_BUS_PARAM(chassis)
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_set_chassis,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetDeployment",
+ "sb",
+ SD_BUS_PARAM(deployment)
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_set_deployment,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetLocation",
+ "sb",
+ SD_BUS_PARAM(location)
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_set_location,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetProductUUID",
+ "b",
+ SD_BUS_PARAM(interactive),
+ "ay",
+ SD_BUS_PARAM(uuid),
+ method_get_product_uuid,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_VTABLE_END,
+};
+
+static const BusObjectImplementation manager_object = {
+ "/org/freedesktop/hostname1",
+ "org.freedesktop.hostname1",
+ .vtables = BUS_VTABLES(hostname_vtable),
+};
+
+static int connect_bus(Context *c, sd_event *event, sd_bus **ret) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(c);
+ assert(event);
+ assert(ret);
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get system bus connection: %m");
+
+ r = bus_add_implementation(bus, &manager_object, c);
+ if (r < 0)
+ return r;
+
+ r = bus_log_control_api_register(bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_request_name_async(bus, NULL, "org.freedesktop.hostname1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(bus, event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ *ret = TAKE_PTR(bus);
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(context_destroy) Context context = {};
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ log_setup_service();
+
+ r = service_parse_argv("systemd-hostnamed.service",
+ "Manage the system hostname and related metadata.",
+ BUS_IMPLEMENTATIONS(&manager_object,
+ &log_control_object),
+ argc, argv);
+ if (r <= 0)
+ return r;
+
+ umask(0022);
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return r;
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ (void) sd_event_set_watchdog(event, true);
+
+ r = sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to install SIGINT handler: %m");
+
+ r = sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to install SIGTERM handler: %m");
+
+ r = connect_bus(&context, event, &bus);
+ if (r < 0)
+ return r;
+
+ r = bus_event_loop_with_idle(event, bus, "org.freedesktop.hostname1", DEFAULT_EXIT_USEC, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/hostname/meson.build b/src/hostname/meson.build
new file mode 100644
index 0000000..718a7bd
--- /dev/null
+++ b/src/hostname/meson.build
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+if conf.get('ENABLE_HOSTNAMED') == 1
+ install_data('org.freedesktop.hostname1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.hostname1.service',
+ install_dir : dbussystemservicedir)
+ install_data('org.freedesktop.hostname1.policy',
+ install_dir : polkitpolicydir)
+endif
diff --git a/src/hostname/org.freedesktop.hostname1.conf b/src/hostname/org.freedesktop.hostname1.conf
new file mode 100644
index 0000000..2f34102
--- /dev/null
+++ b/src/hostname/org.freedesktop.hostname1.conf
@@ -0,0 +1,29 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="root">
+ <allow own="org.freedesktop.hostname1"/>
+ <allow send_destination="org.freedesktop.hostname1"/>
+ <allow receive_sender="org.freedesktop.hostname1"/>
+ </policy>
+
+ <policy context="default">
+ <allow send_destination="org.freedesktop.hostname1"/>
+ <allow receive_sender="org.freedesktop.hostname1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/hostname/org.freedesktop.hostname1.policy b/src/hostname/org.freedesktop.hostname1.policy
new file mode 100644
index 0000000..7d28c39
--- /dev/null
+++ b/src/hostname/org.freedesktop.hostname1.policy
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.hostname1.set-hostname">
+ <description gettext-domain="systemd">Set hostname</description>
+ <message gettext-domain="systemd">Authentication is required to set the local hostname.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.hostname1.set-static-hostname">
+ <description gettext-domain="systemd">Set static hostname</description>
+ <message gettext-domain="systemd">Authentication is required to set the statically configured local hostname, as well as the pretty hostname.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.hostname1.set-hostname org.freedesktop.hostname1.set-machine-info</annotate>
+ </action>
+
+ <action id="org.freedesktop.hostname1.set-machine-info">
+ <description gettext-domain="systemd">Set machine information</description>
+ <message gettext-domain="systemd">Authentication is required to set local machine information.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.hostname1.get-product-uuid">
+ <description gettext-domain="systemd">Get product UUID</description>
+ <message gettext-domain="systemd">Authentication is required to get product UUID.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+</policyconfig>
diff --git a/src/hostname/org.freedesktop.hostname1.service b/src/hostname/org.freedesktop.hostname1.service
new file mode 100644
index 0000000..1d6b9c8
--- /dev/null
+++ b/src/hostname/org.freedesktop.hostname1.service
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.hostname1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.hostname1.service
diff --git a/src/hwdb/hwdb.c b/src/hwdb/hwdb.c
new file mode 100644
index 0000000..1246d68
--- /dev/null
+++ b/src/hwdb/hwdb.c
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+
+#include "sd-hwdb.h"
+
+#include "alloc-util.h"
+#include "hwdb-util.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "selinux-util.h"
+#include "terminal-util.h"
+#include "util.h"
+#include "verbs.h"
+
+static const char *arg_hwdb_bin_dir = NULL;
+static const char *arg_root = NULL;
+static bool arg_strict = false;
+
+static int verb_query(int argc, char *argv[], void *userdata) {
+ return hwdb_query(argv[1]);
+}
+
+static int verb_update(int argc, char *argv[], void *userdata) {
+ return hwdb_update(arg_root, arg_hwdb_bin_dir, arg_strict, false);
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-hwdb", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND ...\n\n"
+ "%sUpdate or query the hardware database.%s\n"
+ "\nCommands:\n"
+ " update Update the hwdb database\n"
+ " query MODALIAS Query database and print result\n"
+ "\nOptions:\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " -s --strict When updating, return non-zero exit value on any parsing error\n"
+ " --usr Generate in " UDEVLIBEXECDIR " instead of /etc/udev\n"
+ " -r --root=PATH Alternative root path in the filesystem\n\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight()
+ , ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_USR,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "usr", no_argument, NULL, ARG_USR },
+ { "strict", no_argument, NULL, 's' },
+ { "root", required_argument, NULL, 'r' },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "ust:r:h", options, NULL)) >= 0)
+ switch(c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_USR:
+ arg_hwdb_bin_dir = UDEVLIBEXECDIR;
+ break;
+
+ case 's':
+ arg_strict = true;
+ break;
+
+ case 'r':
+ arg_root = optarg;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unknown option");
+ }
+
+ return 1;
+}
+
+static int hwdb_main(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "update", 1, 1, 0, verb_update },
+ { "query", 2, 2, 0, verb_query },
+ {},
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return r;
+
+ return hwdb_main(argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/id128/id128.c b/src/id128/id128.c
new file mode 100644
index 0000000..086f398
--- /dev/null
+++ b/src/id128/id128.c
@@ -0,0 +1,261 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "gpt.h"
+#include "id128-print.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "strv.h"
+#include "format-table.h"
+#include "terminal-util.h"
+#include "util.h"
+#include "verbs.h"
+
+static Id128PrettyPrintMode arg_mode = ID128_PRINT_ID128;
+static sd_id128_t arg_app = {};
+
+static int verb_new(int argc, char **argv, void *userdata) {
+ return id128_print_new(arg_mode);
+}
+
+static int verb_machine_id(int argc, char **argv, void *userdata) {
+ sd_id128_t id;
+ int r;
+
+ if (sd_id128_is_null(arg_app))
+ r = sd_id128_get_machine(&id);
+ else
+ r = sd_id128_get_machine_app_specific(arg_app, &id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get %smachine-ID: %m",
+ sd_id128_is_null(arg_app) ? "" : "app-specific ");
+
+ return id128_pretty_print(id, arg_mode);
+}
+
+static int verb_boot_id(int argc, char **argv, void *userdata) {
+ sd_id128_t id;
+ int r;
+
+ if (sd_id128_is_null(arg_app))
+ r = sd_id128_get_boot(&id);
+ else
+ r = sd_id128_get_boot_app_specific(arg_app, &id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get %sboot-ID: %m",
+ sd_id128_is_null(arg_app) ? "" : "app-specific ");
+
+ return id128_pretty_print(id, arg_mode);
+}
+
+static int verb_invocation_id(int argc, char **argv, void *userdata) {
+ sd_id128_t id;
+ int r;
+
+ if (!sd_id128_is_null(arg_app))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Verb \"invocation-id\" cannot be combined with --app-specific=.");
+
+ r = sd_id128_get_invocation(&id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get invocation-ID: %m");
+
+ return id128_pretty_print(id, arg_mode);
+}
+
+static int show_one(Table **table, const char *name, sd_id128_t uuid, bool first) {
+ int r;
+
+ if (arg_mode == ID128_PRINT_PRETTY) {
+ _cleanup_free_ char *id = NULL;
+
+ id = strreplace(name, "-", "_");
+ if (!id)
+ return log_oom();
+
+ ascii_strupper(id);
+
+ r = id128_pretty_print_sample(id, uuid);
+ if (r < 0)
+ return r;
+ if (!first)
+ puts("");
+ return 0;
+
+ } else {
+ if (!*table) {
+ *table = table_new("name", "id");
+ if (!*table)
+ return log_oom();
+ table_set_width(*table, 0);
+ }
+
+ return table_add_many(*table,
+ TABLE_STRING, name,
+ arg_mode == ID128_PRINT_ID128 ? TABLE_ID128 : TABLE_UUID,
+ uuid);
+ }
+}
+
+static int verb_show(int argc, char **argv, void *userdata) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ char **p;
+ int r;
+
+ argv = strv_skip(argv, 1);
+ if (strv_isempty(argv))
+ for (const GptPartitionType *e = gpt_partition_type_table; e->name; e++) {
+ r = show_one(&table, e->name, e->uuid, e == gpt_partition_type_table);
+ if (r < 0)
+ return r;
+ }
+ else
+ STRV_FOREACH(p, argv) {
+ sd_id128_t uuid;
+ bool have_uuid;
+ const char *id;
+
+ /* Check if the argument is an actual UUID first */
+ have_uuid = sd_id128_from_string(*p, &uuid) >= 0;
+
+ if (have_uuid)
+ id = gpt_partition_type_uuid_to_string(uuid) ?: "XYZ";
+ else {
+ r = gpt_partition_type_uuid_from_string(*p, &uuid);
+ if (r < 0)
+ return log_error_errno(r, "Unknown identifier \"%s\".", *p);
+
+ id = *p;
+ }
+
+ r = show_one(&table, id, uuid, p == argv);
+ if (r < 0)
+ return r;
+ }
+
+ if (table) {
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+ }
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-id128", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND\n\n"
+ "%sGenerate and print 128bit identifiers.%s\n"
+ "\nCommands:\n"
+ " new Generate a new ID\n"
+ " machine-id Print the ID of current machine\n"
+ " boot-id Print the ID of current boot\n"
+ " invocation-id Print the ID of current invocation\n"
+ " show [NAME] Print one or more well-known IDs\n"
+ " help Show this help\n"
+ "\nOptions:\n"
+ " -h --help Show this help\n"
+ " -p --pretty Generate samples of program code\n"
+ " -a --app-specific=ID Generate app-specific IDs\n"
+ " -u --uuid Output in UUID format\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight(), ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int verb_help(int argc, char **argv, void *userdata) {
+ return help();
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "pretty", no_argument, NULL, 'p' },
+ { "app-specific", required_argument, NULL, 'a' },
+ { "uuid", no_argument, NULL, 'u' },
+ {},
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hpa:u", options, NULL)) >= 0)
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'p':
+ arg_mode = ID128_PRINT_PRETTY;
+ break;
+
+ case 'a':
+ r = sd_id128_from_string(optarg, &arg_app);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse \"%s\" as application-ID: %m", optarg);
+ break;
+
+ case 'u':
+ arg_mode = ID128_PRINT_UUID;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int id128_main(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "new", VERB_ANY, 1, 0, verb_new },
+ { "machine-id", VERB_ANY, 1, 0, verb_machine_id },
+ { "boot-id", VERB_ANY, 1, 0, verb_boot_id },
+ { "invocation-id", VERB_ANY, 1, 0, verb_invocation_id },
+ { "show", VERB_ANY, VERB_ANY, 0, verb_show },
+ { "help", VERB_ANY, VERB_ANY, 0, verb_help },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ log_setup_cli();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ return id128_main(argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/import/curl-util.c b/src/import/curl-util.c
new file mode 100644
index 0000000..5e09043
--- /dev/null
+++ b/src/import/curl-util.c
@@ -0,0 +1,380 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+
+#include "alloc-util.h"
+#include "build.h"
+#include "curl-util.h"
+#include "fd-util.h"
+#include "locale-util.h"
+#include "string-util.h"
+
+static void curl_glue_check_finished(CurlGlue *g) {
+ CURLMsg *msg;
+ int k = 0;
+
+ assert(g);
+
+ msg = curl_multi_info_read(g->curl, &k);
+ if (!msg)
+ return;
+
+ if (msg->msg != CURLMSG_DONE)
+ return;
+
+ if (g->on_finished)
+ g->on_finished(g, msg->easy_handle, msg->data.result);
+}
+
+static int curl_glue_on_io(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ CurlGlue *g = userdata;
+ int action, k = 0;
+
+ assert(s);
+ assert(g);
+
+ if (FLAGS_SET(revents, EPOLLIN | EPOLLOUT))
+ action = CURL_POLL_INOUT;
+ else if (revents & EPOLLIN)
+ action = CURL_POLL_IN;
+ else if (revents & EPOLLOUT)
+ action = CURL_POLL_OUT;
+ else
+ action = 0;
+
+ if (curl_multi_socket_action(g->curl, fd, action, &k) != CURLM_OK)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to propagate IO event.");
+
+ curl_glue_check_finished(g);
+ return 0;
+}
+
+static int curl_glue_socket_callback(CURL *curl, curl_socket_t s, int action, void *userdata, void *socketp) {
+ sd_event_source *io = socketp;
+ CurlGlue *g = userdata;
+ uint32_t events = 0;
+ int r;
+
+ assert(curl);
+ assert(g);
+
+ if (action == CURL_POLL_REMOVE) {
+ if (io) {
+ sd_event_source_disable_unref(io);
+
+ hashmap_remove(g->ios, FD_TO_PTR(s));
+ }
+
+ return 0;
+ }
+
+ r = hashmap_ensure_allocated(&g->ios, &trivial_hash_ops);
+ if (r < 0) {
+ log_oom();
+ return -1;
+ }
+
+ if (action == CURL_POLL_IN)
+ events = EPOLLIN;
+ else if (action == CURL_POLL_OUT)
+ events = EPOLLOUT;
+ else if (action == CURL_POLL_INOUT)
+ events = EPOLLIN|EPOLLOUT;
+
+ if (io) {
+ if (sd_event_source_set_io_events(io, events) < 0)
+ return -1;
+
+ if (sd_event_source_set_enabled(io, SD_EVENT_ON) < 0)
+ return -1;
+ } else {
+ if (sd_event_add_io(g->event, &io, s, events, curl_glue_on_io, g) < 0)
+ return -1;
+
+ if (curl_multi_assign(g->curl, s, io) != CURLM_OK)
+ return -1;
+
+ (void) sd_event_source_set_description(io, "curl-io");
+
+ r = hashmap_put(g->ios, FD_TO_PTR(s), io);
+ if (r < 0) {
+ log_oom();
+ sd_event_source_unref(io);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int curl_glue_on_timer(sd_event_source *s, uint64_t usec, void *userdata) {
+ CurlGlue *g = userdata;
+ int k = 0;
+
+ assert(s);
+ assert(g);
+
+ if (curl_multi_socket_action(g->curl, CURL_SOCKET_TIMEOUT, 0, &k) != CURLM_OK)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to propagate timeout.");
+
+ curl_glue_check_finished(g);
+ return 0;
+}
+
+static int curl_glue_timer_callback(CURLM *curl, long timeout_ms, void *userdata) {
+ CurlGlue *g = userdata;
+ usec_t usec;
+
+ assert(curl);
+ assert(g);
+
+ if (timeout_ms < 0) {
+ if (g->timer) {
+ if (sd_event_source_set_enabled(g->timer, SD_EVENT_OFF) < 0)
+ return -1;
+ }
+
+ return 0;
+ }
+
+ usec = (usec_t) timeout_ms * USEC_PER_MSEC + USEC_PER_MSEC - 1;
+
+ if (g->timer) {
+ if (sd_event_source_set_time_relative(g->timer, usec) < 0)
+ return -1;
+
+ if (sd_event_source_set_enabled(g->timer, SD_EVENT_ONESHOT) < 0)
+ return -1;
+ } else {
+ if (sd_event_add_time_relative(g->event, &g->timer, clock_boottime_or_monotonic(), usec, 0, curl_glue_on_timer, g) < 0)
+ return -1;
+
+ (void) sd_event_source_set_description(g->timer, "curl-timer");
+ }
+
+ return 0;
+}
+
+CurlGlue *curl_glue_unref(CurlGlue *g) {
+ sd_event_source *io;
+
+ if (!g)
+ return NULL;
+
+ if (g->curl)
+ curl_multi_cleanup(g->curl);
+
+ while ((io = hashmap_steal_first(g->ios)))
+ sd_event_source_unref(io);
+
+ hashmap_free(g->ios);
+
+ sd_event_source_unref(g->timer);
+ sd_event_unref(g->event);
+ return mfree(g);
+}
+
+int curl_glue_new(CurlGlue **glue, sd_event *event) {
+ _cleanup_(curl_glue_unrefp) CurlGlue *g = NULL;
+ _cleanup_(curl_multi_cleanupp) CURLM *c = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ int r;
+
+ if (event)
+ e = sd_event_ref(event);
+ else {
+ r = sd_event_default(&e);
+ if (r < 0)
+ return r;
+ }
+
+ c = curl_multi_init();
+ if (!c)
+ return -ENOMEM;
+
+ g = new(CurlGlue, 1);
+ if (!g)
+ return -ENOMEM;
+
+ *g = (CurlGlue) {
+ .event = TAKE_PTR(e),
+ .curl = TAKE_PTR(c),
+ };
+
+ if (curl_multi_setopt(g->curl, CURLMOPT_SOCKETDATA, g) != CURLM_OK)
+ return -EINVAL;
+
+ if (curl_multi_setopt(g->curl, CURLMOPT_SOCKETFUNCTION, curl_glue_socket_callback) != CURLM_OK)
+ return -EINVAL;
+
+ if (curl_multi_setopt(g->curl, CURLMOPT_TIMERDATA, g) != CURLM_OK)
+ return -EINVAL;
+
+ if (curl_multi_setopt(g->curl, CURLMOPT_TIMERFUNCTION, curl_glue_timer_callback) != CURLM_OK)
+ return -EINVAL;
+
+ *glue = TAKE_PTR(g);
+
+ return 0;
+}
+
+int curl_glue_make(CURL **ret, const char *url, void *userdata) {
+ _cleanup_(curl_easy_cleanupp) CURL *c = NULL;
+ const char *useragent;
+
+ assert(ret);
+ assert(url);
+
+ c = curl_easy_init();
+ if (!c)
+ return -ENOMEM;
+
+ /* curl_easy_setopt(c, CURLOPT_VERBOSE, 1L); */
+
+ if (curl_easy_setopt(c, CURLOPT_URL, url) != CURLE_OK)
+ return -EIO;
+
+ if (curl_easy_setopt(c, CURLOPT_PRIVATE, userdata) != CURLE_OK)
+ return -EIO;
+
+ useragent = strjoina(program_invocation_short_name, "/" GIT_VERSION);
+ if (curl_easy_setopt(c, CURLOPT_USERAGENT, useragent) != CURLE_OK)
+ return -EIO;
+
+ if (curl_easy_setopt(c, CURLOPT_FOLLOWLOCATION, 1L) != CURLE_OK)
+ return -EIO;
+
+ if (curl_easy_setopt(c, CURLOPT_NOSIGNAL, 1L) != CURLE_OK)
+ return -EIO;
+
+ if (curl_easy_setopt(c, CURLOPT_LOW_SPEED_TIME, 60L) != CURLE_OK)
+ return -EIO;
+
+ if (curl_easy_setopt(c, CURLOPT_LOW_SPEED_LIMIT, 30L) != CURLE_OK)
+ return -EIO;
+
+ *ret = TAKE_PTR(c);
+ return 0;
+}
+
+int curl_glue_add(CurlGlue *g, CURL *c) {
+ assert(g);
+ assert(c);
+
+ if (curl_multi_add_handle(g->curl, c) != CURLM_OK)
+ return -EIO;
+
+ return 0;
+}
+
+void curl_glue_remove_and_free(CurlGlue *g, CURL *c) {
+ assert(g);
+
+ if (!c)
+ return;
+
+ if (g->curl)
+ curl_multi_remove_handle(g->curl, c);
+
+ curl_easy_cleanup(c);
+}
+
+struct curl_slist *curl_slist_new(const char *first, ...) {
+ struct curl_slist *l;
+ va_list ap;
+
+ if (!first)
+ return NULL;
+
+ l = curl_slist_append(NULL, first);
+ if (!l)
+ return NULL;
+
+ va_start(ap, first);
+
+ for (;;) {
+ struct curl_slist *n;
+ const char *i;
+
+ i = va_arg(ap, const char*);
+ if (!i)
+ break;
+
+ n = curl_slist_append(l, i);
+ if (!n) {
+ va_end(ap);
+ curl_slist_free_all(l);
+ return NULL;
+ }
+
+ l = n;
+ }
+
+ va_end(ap);
+ return l;
+}
+
+int curl_header_strdup(const void *contents, size_t sz, const char *field, char **value) {
+ const char *p;
+ char *s;
+
+ p = memory_startswith_no_case(contents, sz, field);
+ if (!p)
+ return 0;
+
+ sz -= p - (const char*) contents;
+
+ if (memchr(p, 0, sz))
+ return 0;
+
+ /* Skip over preceding whitespace */
+ while (sz > 0 && strchr(WHITESPACE, p[0])) {
+ p++;
+ sz--;
+ }
+
+ /* Truncate trailing whitespace */
+ while (sz > 0 && strchr(WHITESPACE, p[sz-1]))
+ sz--;
+
+ s = strndup(p, sz);
+ if (!s)
+ return -ENOMEM;
+
+ *value = s;
+ return 1;
+}
+
+int curl_parse_http_time(const char *t, usec_t *ret) {
+ _cleanup_(freelocalep) locale_t loc = (locale_t) 0;
+ const char *e;
+ struct tm tm;
+ time_t v;
+
+ assert(t);
+ assert(ret);
+
+ loc = newlocale(LC_TIME_MASK, "C", (locale_t) 0);
+ if (loc == (locale_t) 0)
+ return -errno;
+
+ /* RFC822 */
+ e = strptime_l(t, "%a, %d %b %Y %H:%M:%S %Z", &tm, loc);
+ if (!e || *e != 0)
+ /* RFC 850 */
+ e = strptime_l(t, "%A, %d-%b-%y %H:%M:%S %Z", &tm, loc);
+ if (!e || *e != 0)
+ /* ANSI C */
+ e = strptime_l(t, "%a %b %d %H:%M:%S %Y", &tm, loc);
+ if (!e || *e != 0)
+ return -EINVAL;
+
+ v = timegm(&tm);
+ if (v == (time_t) -1)
+ return -EINVAL;
+
+ *ret = (usec_t) v * USEC_PER_SEC;
+ return 0;
+}
diff --git a/src/import/curl-util.h b/src/import/curl-util.h
new file mode 100644
index 0000000..4ab52d7
--- /dev/null
+++ b/src/import/curl-util.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <curl/curl.h>
+#include <sys/types.h>
+
+#include "sd-event.h"
+
+#include "hashmap.h"
+#include "time-util.h"
+
+typedef struct CurlGlue CurlGlue;
+
+struct CurlGlue {
+ sd_event *event;
+ CURLM *curl;
+ sd_event_source *timer;
+ Hashmap *ios;
+
+ void (*on_finished)(CurlGlue *g, CURL *curl, CURLcode code);
+ void *userdata;
+};
+
+int curl_glue_new(CurlGlue **glue, sd_event *event);
+CurlGlue* curl_glue_unref(CurlGlue *glue);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(CurlGlue*, curl_glue_unref);
+
+int curl_glue_make(CURL **ret, const char *url, void *userdata);
+int curl_glue_add(CurlGlue *g, CURL *c);
+void curl_glue_remove_and_free(CurlGlue *g, CURL *c);
+
+struct curl_slist *curl_slist_new(const char *first, ...) _sentinel_;
+int curl_header_strdup(const void *contents, size_t sz, const char *field, char **value);
+int curl_parse_http_time(const char *t, usec_t *ret);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(CURL*, curl_easy_cleanup);
+DEFINE_TRIVIAL_CLEANUP_FUNC(CURLM*, curl_multi_cleanup);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct curl_slist*, curl_slist_free_all);
diff --git a/src/import/export-raw.c b/src/import/export-raw.c
new file mode 100644
index 0000000..3e0348f
--- /dev/null
+++ b/src/import/export-raw.c
@@ -0,0 +1,331 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/sendfile.h>
+
+/* When we include libgen.h because we need dirname() we immediately
+ * undefine basename() since libgen.h defines it as a macro to the POSIX
+ * version which is really broken. We prefer GNU basename(). */
+#include <libgen.h>
+#undef basename
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "copy.h"
+#include "export-raw.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "import-common.h"
+#include "missing_fcntl.h"
+#include "ratelimit.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+#define COPY_BUFFER_SIZE (16*1024)
+
+struct RawExport {
+ sd_event *event;
+
+ RawExportFinished on_finished;
+ void *userdata;
+
+ char *path;
+
+ int input_fd;
+ int output_fd;
+
+ ImportCompress compress;
+
+ sd_event_source *output_event_source;
+
+ void *buffer;
+ size_t buffer_size;
+ size_t buffer_allocated;
+
+ uint64_t written_compressed;
+ uint64_t written_uncompressed;
+
+ unsigned last_percent;
+ RateLimit progress_ratelimit;
+
+ struct stat st;
+
+ bool eof;
+ bool tried_reflink;
+ bool tried_sendfile;
+};
+
+RawExport *raw_export_unref(RawExport *e) {
+ if (!e)
+ return NULL;
+
+ sd_event_source_unref(e->output_event_source);
+
+ import_compress_free(&e->compress);
+
+ sd_event_unref(e->event);
+
+ safe_close(e->input_fd);
+
+ free(e->buffer);
+ free(e->path);
+ return mfree(e);
+}
+
+int raw_export_new(
+ RawExport **ret,
+ sd_event *event,
+ RawExportFinished on_finished,
+ void *userdata) {
+
+ _cleanup_(raw_export_unrefp) RawExport *e = NULL;
+ int r;
+
+ assert(ret);
+
+ e = new(RawExport, 1);
+ if (!e)
+ return -ENOMEM;
+
+ *e = (RawExport) {
+ .output_fd = -1,
+ .input_fd = -1,
+ .on_finished = on_finished,
+ .userdata = userdata,
+ .last_percent = (unsigned) -1,
+ .progress_ratelimit = { 100 * USEC_PER_MSEC, 1 },
+ };
+
+ if (event)
+ e->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&e->event);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(e);
+
+ return 0;
+}
+
+static void raw_export_report_progress(RawExport *e) {
+ unsigned percent;
+ assert(e);
+
+ if (e->written_uncompressed >= (uint64_t) e->st.st_size)
+ percent = 100;
+ else
+ percent = (unsigned) ((e->written_uncompressed * UINT64_C(100)) / (uint64_t) e->st.st_size);
+
+ if (percent == e->last_percent)
+ return;
+
+ if (!ratelimit_below(&e->progress_ratelimit))
+ return;
+
+ sd_notifyf(false, "X_IMPORT_PROGRESS=%u", percent);
+ log_info("Exported %u%%.", percent);
+
+ e->last_percent = percent;
+}
+
+static int raw_export_process(RawExport *e) {
+ ssize_t l;
+ int r;
+
+ assert(e);
+
+ if (!e->tried_reflink && e->compress.type == IMPORT_COMPRESS_UNCOMPRESSED) {
+
+ /* If we shall take an uncompressed snapshot we can
+ * reflink source to destination directly. Let's see
+ * if this works. */
+
+ r = btrfs_reflink(e->input_fd, e->output_fd);
+ if (r >= 0) {
+ r = 0;
+ goto finish;
+ }
+
+ e->tried_reflink = true;
+ }
+
+ if (!e->tried_sendfile && e->compress.type == IMPORT_COMPRESS_UNCOMPRESSED) {
+
+ l = sendfile(e->output_fd, e->input_fd, NULL, COPY_BUFFER_SIZE);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ e->tried_sendfile = true;
+ } else if (l == 0) {
+ r = 0;
+ goto finish;
+ } else {
+ e->written_uncompressed += l;
+ e->written_compressed += l;
+
+ raw_export_report_progress(e);
+
+ return 0;
+ }
+ }
+
+ while (e->buffer_size <= 0) {
+ uint8_t input[COPY_BUFFER_SIZE];
+
+ if (e->eof) {
+ r = 0;
+ goto finish;
+ }
+
+ l = read(e->input_fd, input, sizeof(input));
+ if (l < 0) {
+ r = log_error_errno(errno, "Failed to read raw file: %m");
+ goto finish;
+ }
+
+ if (l == 0) {
+ e->eof = true;
+ r = import_compress_finish(&e->compress, &e->buffer, &e->buffer_size, &e->buffer_allocated);
+ } else {
+ e->written_uncompressed += l;
+ r = import_compress(&e->compress, input, l, &e->buffer, &e->buffer_size, &e->buffer_allocated);
+ }
+ if (r < 0) {
+ r = log_error_errno(r, "Failed to encode: %m");
+ goto finish;
+ }
+ }
+
+ l = write(e->output_fd, e->buffer, e->buffer_size);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ r = log_error_errno(errno, "Failed to write output file: %m");
+ goto finish;
+ }
+
+ assert((size_t) l <= e->buffer_size);
+ memmove(e->buffer, (uint8_t*) e->buffer + l, e->buffer_size - l);
+ e->buffer_size -= l;
+ e->written_compressed += l;
+
+ raw_export_report_progress(e);
+
+ return 0;
+
+finish:
+ if (r >= 0) {
+ (void) copy_times(e->input_fd, e->output_fd, COPY_CRTIME);
+ (void) copy_xattr(e->input_fd, e->output_fd);
+ }
+
+ if (e->on_finished)
+ e->on_finished(e, r, e->userdata);
+ else
+ sd_event_exit(e->event, r);
+
+ return 0;
+}
+
+static int raw_export_on_output(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ RawExport *i = userdata;
+
+ return raw_export_process(i);
+}
+
+static int raw_export_on_defer(sd_event_source *s, void *userdata) {
+ RawExport *i = userdata;
+
+ return raw_export_process(i);
+}
+
+static int reflink_snapshot(int fd, const char *path) {
+ int new_fd, r;
+
+ new_fd = open_parent(path, O_TMPFILE|O_CLOEXEC|O_RDWR, 0600);
+ if (new_fd < 0) {
+ _cleanup_free_ char *t = NULL;
+
+ r = tempfn_random(path, NULL, &t);
+ if (r < 0)
+ return r;
+
+ new_fd = open(t, O_CLOEXEC|O_CREAT|O_NOCTTY|O_RDWR, 0600);
+ if (new_fd < 0)
+ return -errno;
+
+ (void) unlink(t);
+ }
+
+ r = btrfs_reflink(fd, new_fd);
+ if (r < 0) {
+ safe_close(new_fd);
+ return r;
+ }
+
+ return new_fd;
+}
+
+int raw_export_start(RawExport *e, const char *path, int fd, ImportCompressType compress) {
+ _cleanup_close_ int sfd = -1, tfd = -1;
+ int r;
+
+ assert(e);
+ assert(path);
+ assert(fd >= 0);
+ assert(compress < _IMPORT_COMPRESS_TYPE_MAX);
+ assert(compress != IMPORT_COMPRESS_UNKNOWN);
+
+ if (e->output_fd >= 0)
+ return -EBUSY;
+
+ r = fd_nonblock(fd, true);
+ if (r < 0)
+ return r;
+
+ r = free_and_strdup(&e->path, path);
+ if (r < 0)
+ return r;
+
+ sfd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (sfd < 0)
+ return -errno;
+
+ if (fstat(sfd, &e->st) < 0)
+ return -errno;
+ r = stat_verify_regular(&e->st);
+ if (r < 0)
+ return r;
+
+ /* Try to take a reflink snapshot of the file, if we can t make the export atomic */
+ tfd = reflink_snapshot(sfd, path);
+ if (tfd >= 0)
+ e->input_fd = TAKE_FD(tfd);
+ else
+ e->input_fd = TAKE_FD(sfd);
+
+ r = import_compress_init(&e->compress, compress);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_io(e->event, &e->output_event_source, fd, EPOLLOUT, raw_export_on_output, e);
+ if (r == -EPERM) {
+ r = sd_event_add_defer(e->event, &e->output_event_source, raw_export_on_defer, e);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(e->output_event_source, SD_EVENT_ON);
+ }
+ if (r < 0)
+ return r;
+
+ e->output_fd = fd;
+ return r;
+}
diff --git a/src/import/export-raw.h b/src/import/export-raw.h
new file mode 100644
index 0000000..27009e4
--- /dev/null
+++ b/src/import/export-raw.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-event.h"
+
+#include "import-compress.h"
+#include "macro.h"
+
+typedef struct RawExport RawExport;
+
+typedef void (*RawExportFinished)(RawExport *export, int error, void *userdata);
+
+int raw_export_new(RawExport **export, sd_event *event, RawExportFinished on_finished, void *userdata);
+RawExport* raw_export_unref(RawExport *export);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(RawExport*, raw_export_unref);
+
+int raw_export_start(RawExport *export, const char *path, int fd, ImportCompressType compress);
diff --git a/src/import/export-tar.c b/src/import/export-tar.c
new file mode 100644
index 0000000..b8b650f
--- /dev/null
+++ b/src/import/export-tar.c
@@ -0,0 +1,330 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "export-tar.h"
+#include "fd-util.h"
+#include "import-common.h"
+#include "process-util.h"
+#include "ratelimit.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+#define COPY_BUFFER_SIZE (16*1024)
+
+struct TarExport {
+ sd_event *event;
+
+ TarExportFinished on_finished;
+ void *userdata;
+
+ char *path;
+ char *temp_path;
+
+ int output_fd;
+ int tar_fd;
+
+ ImportCompress compress;
+
+ sd_event_source *output_event_source;
+
+ void *buffer;
+ size_t buffer_size;
+ size_t buffer_allocated;
+
+ uint64_t written_compressed;
+ uint64_t written_uncompressed;
+
+ pid_t tar_pid;
+
+ struct stat st;
+ uint64_t quota_referenced;
+
+ unsigned last_percent;
+ RateLimit progress_ratelimit;
+
+ bool eof;
+ bool tried_splice;
+};
+
+TarExport *tar_export_unref(TarExport *e) {
+ if (!e)
+ return NULL;
+
+ sd_event_source_unref(e->output_event_source);
+
+ if (e->tar_pid > 1) {
+ (void) kill_and_sigcont(e->tar_pid, SIGKILL);
+ (void) wait_for_terminate(e->tar_pid, NULL);
+ }
+
+ if (e->temp_path) {
+ (void) btrfs_subvol_remove(e->temp_path, BTRFS_REMOVE_QUOTA);
+ free(e->temp_path);
+ }
+
+ import_compress_free(&e->compress);
+
+ sd_event_unref(e->event);
+
+ safe_close(e->tar_fd);
+
+ free(e->buffer);
+ free(e->path);
+ return mfree(e);
+}
+
+int tar_export_new(
+ TarExport **ret,
+ sd_event *event,
+ TarExportFinished on_finished,
+ void *userdata) {
+
+ _cleanup_(tar_export_unrefp) TarExport *e = NULL;
+ int r;
+
+ assert(ret);
+
+ e = new(TarExport, 1);
+ if (!e)
+ return -ENOMEM;
+
+ *e = (TarExport) {
+ .output_fd = -1,
+ .tar_fd = -1,
+ .on_finished = on_finished,
+ .userdata = userdata,
+ .quota_referenced = (uint64_t) -1,
+ .last_percent = (unsigned) -1,
+ .progress_ratelimit = { 100 * USEC_PER_MSEC, 1 },
+ };
+
+ if (event)
+ e->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&e->event);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(e);
+
+ return 0;
+}
+
+static void tar_export_report_progress(TarExport *e) {
+ unsigned percent;
+ assert(e);
+
+ /* Do we have any quota info? If not, we don't know anything about the progress */
+ if (e->quota_referenced == (uint64_t) -1)
+ return;
+
+ if (e->written_uncompressed >= e->quota_referenced)
+ percent = 100;
+ else
+ percent = (unsigned) ((e->written_uncompressed * UINT64_C(100)) / e->quota_referenced);
+
+ if (percent == e->last_percent)
+ return;
+
+ if (!ratelimit_below(&e->progress_ratelimit))
+ return;
+
+ sd_notifyf(false, "X_IMPORT_PROGRESS=%u", percent);
+ log_info("Exported %u%%.", percent);
+
+ e->last_percent = percent;
+}
+
+static int tar_export_finish(TarExport *e) {
+ int r;
+
+ assert(e);
+ assert(e->tar_fd >= 0);
+
+ if (e->tar_pid > 0) {
+ r = wait_for_terminate_and_check("tar", e->tar_pid, WAIT_LOG);
+ e->tar_pid = 0;
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS)
+ return -EPROTO;
+ }
+
+ e->tar_fd = safe_close(e->tar_fd);
+
+ return 0;
+}
+
+static int tar_export_process(TarExport *e) {
+ ssize_t l;
+ int r;
+
+ assert(e);
+
+ if (!e->tried_splice && e->compress.type == IMPORT_COMPRESS_UNCOMPRESSED) {
+
+ l = splice(e->tar_fd, NULL, e->output_fd, NULL, COPY_BUFFER_SIZE, 0);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ e->tried_splice = true;
+ } else if (l == 0) {
+ r = tar_export_finish(e);
+ goto finish;
+ } else {
+ e->written_uncompressed += l;
+ e->written_compressed += l;
+
+ tar_export_report_progress(e);
+
+ return 0;
+ }
+ }
+
+ while (e->buffer_size <= 0) {
+ uint8_t input[COPY_BUFFER_SIZE];
+
+ if (e->eof) {
+ r = tar_export_finish(e);
+ goto finish;
+ }
+
+ l = read(e->tar_fd, input, sizeof(input));
+ if (l < 0) {
+ r = log_error_errno(errno, "Failed to read tar file: %m");
+ goto finish;
+ }
+
+ if (l == 0) {
+ e->eof = true;
+ r = import_compress_finish(&e->compress, &e->buffer, &e->buffer_size, &e->buffer_allocated);
+ } else {
+ e->written_uncompressed += l;
+ r = import_compress(&e->compress, input, l, &e->buffer, &e->buffer_size, &e->buffer_allocated);
+ }
+ if (r < 0) {
+ r = log_error_errno(r, "Failed to encode: %m");
+ goto finish;
+ }
+ }
+
+ l = write(e->output_fd, e->buffer, e->buffer_size);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ r = log_error_errno(errno, "Failed to write output file: %m");
+ goto finish;
+ }
+
+ assert((size_t) l <= e->buffer_size);
+ memmove(e->buffer, (uint8_t*) e->buffer + l, e->buffer_size - l);
+ e->buffer_size -= l;
+ e->written_compressed += l;
+
+ tar_export_report_progress(e);
+
+ return 0;
+
+finish:
+ if (e->on_finished)
+ e->on_finished(e, r, e->userdata);
+ else
+ sd_event_exit(e->event, r);
+
+ return 0;
+}
+
+static int tar_export_on_output(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ TarExport *i = userdata;
+
+ return tar_export_process(i);
+}
+
+static int tar_export_on_defer(sd_event_source *s, void *userdata) {
+ TarExport *i = userdata;
+
+ return tar_export_process(i);
+}
+
+int tar_export_start(TarExport *e, const char *path, int fd, ImportCompressType compress) {
+ _cleanup_close_ int sfd = -1;
+ int r;
+
+ assert(e);
+ assert(path);
+ assert(fd >= 0);
+ assert(compress < _IMPORT_COMPRESS_TYPE_MAX);
+ assert(compress != IMPORT_COMPRESS_UNKNOWN);
+
+ if (e->output_fd >= 0)
+ return -EBUSY;
+
+ sfd = open(path, O_DIRECTORY|O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (sfd < 0)
+ return -errno;
+
+ if (fstat(sfd, &e->st) < 0)
+ return -errno;
+
+ r = fd_nonblock(fd, true);
+ if (r < 0)
+ return r;
+
+ r = free_and_strdup(&e->path, path);
+ if (r < 0)
+ return r;
+
+ e->quota_referenced = (uint64_t) -1;
+
+ if (e->st.st_ino == 256) { /* might be a btrfs subvolume? */
+ BtrfsQuotaInfo q;
+
+ r = btrfs_subvol_get_subtree_quota_fd(sfd, 0, &q);
+ if (r >= 0)
+ e->quota_referenced = q.referenced;
+
+ e->temp_path = mfree(e->temp_path);
+
+ r = tempfn_random(path, NULL, &e->temp_path);
+ if (r < 0)
+ return r;
+
+ /* Let's try to make a snapshot, if we can, so that the export is atomic */
+ r = btrfs_subvol_snapshot_fd(sfd, e->temp_path, BTRFS_SNAPSHOT_READ_ONLY|BTRFS_SNAPSHOT_RECURSIVE);
+ if (r < 0) {
+ log_debug_errno(r, "Couldn't create snapshot %s of %s, not exporting atomically: %m", e->temp_path, path);
+ e->temp_path = mfree(e->temp_path);
+ }
+ }
+
+ r = import_compress_init(&e->compress, compress);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_io(e->event, &e->output_event_source, fd, EPOLLOUT, tar_export_on_output, e);
+ if (r == -EPERM) {
+ r = sd_event_add_defer(e->event, &e->output_event_source, tar_export_on_defer, e);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(e->output_event_source, SD_EVENT_ON);
+ }
+ if (r < 0)
+ return r;
+
+ e->tar_fd = import_fork_tar_c(e->temp_path ?: e->path, &e->tar_pid);
+ if (e->tar_fd < 0) {
+ e->output_event_source = sd_event_source_unref(e->output_event_source);
+ return e->tar_fd;
+ }
+
+ e->output_fd = fd;
+ return r;
+}
diff --git a/src/import/export-tar.h b/src/import/export-tar.h
new file mode 100644
index 0000000..3b55d12
--- /dev/null
+++ b/src/import/export-tar.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-event.h"
+
+#include "import-compress.h"
+#include "macro.h"
+
+typedef struct TarExport TarExport;
+
+typedef void (*TarExportFinished)(TarExport *export, int error, void *userdata);
+
+int tar_export_new(TarExport **export, sd_event *event, TarExportFinished on_finished, void *userdata);
+TarExport* tar_export_unref(TarExport *export);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(TarExport*, tar_export_unref);
+
+int tar_export_start(TarExport *export, const char *path, int fd, ImportCompressType compress);
diff --git a/src/import/export.c b/src/import/export.c
new file mode 100644
index 0000000..83990df
--- /dev/null
+++ b/src/import/export.c
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <locale.h>
+
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "export-raw.h"
+#include "export-tar.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "import-util.h"
+#include "machine-image.h"
+#include "main-func.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "verbs.h"
+
+static ImportCompressType arg_compress = IMPORT_COMPRESS_UNKNOWN;
+
+static void determine_compression_from_filename(const char *p) {
+
+ if (arg_compress != IMPORT_COMPRESS_UNKNOWN)
+ return;
+
+ if (!p) {
+ arg_compress = IMPORT_COMPRESS_UNCOMPRESSED;
+ return;
+ }
+
+ if (endswith(p, ".xz"))
+ arg_compress = IMPORT_COMPRESS_XZ;
+ else if (endswith(p, ".gz"))
+ arg_compress = IMPORT_COMPRESS_GZIP;
+ else if (endswith(p, ".bz2"))
+ arg_compress = IMPORT_COMPRESS_BZIP2;
+ else
+ arg_compress = IMPORT_COMPRESS_UNCOMPRESSED;
+}
+
+static int interrupt_signal_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ log_notice("Transfer aborted.");
+ sd_event_exit(sd_event_source_get_event(s), EINTR);
+ return 0;
+}
+
+static void on_tar_finished(TarExport *export, int error, void *userdata) {
+ sd_event *event = userdata;
+ assert(export);
+
+ if (error == 0)
+ log_info("Operation completed successfully.");
+
+ sd_event_exit(event, abs(error));
+}
+
+static int export_tar(int argc, char *argv[], void *userdata) {
+ _cleanup_(tar_export_unrefp) TarExport *export = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_(image_unrefp) Image *image = NULL;
+ const char *path = NULL, *local = NULL;
+ _cleanup_close_ int open_fd = -1;
+ int r, fd;
+
+ if (machine_name_is_valid(argv[1])) {
+ r = image_find(IMAGE_MACHINE, argv[1], &image);
+ if (r == -ENOENT)
+ return log_error_errno(r, "Machine image %s not found.", argv[1]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to look for machine %s: %m", argv[1]);
+
+ local = image->path;
+ } else
+ local = argv[1];
+
+ if (argc >= 3)
+ path = argv[2];
+ path = empty_or_dash_to_null(path);
+
+ determine_compression_from_filename(path);
+
+ if (path) {
+ open_fd = open(path, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC|O_NOCTTY, 0666);
+ if (open_fd < 0)
+ return log_error_errno(errno, "Failed to open tar image for export: %m");
+
+ fd = open_fd;
+
+ log_info("Exporting '%s', saving to '%s' with compression '%s'.", local, path, import_compress_type_to_string(arg_compress));
+ } else {
+ _cleanup_free_ char *pretty = NULL;
+
+ fd = STDOUT_FILENO;
+
+ (void) fd_get_path(fd, &pretty);
+ log_info("Exporting '%s', saving to '%s' with compression '%s'.", local, strna(pretty), import_compress_type_to_string(arg_compress));
+ }
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+ (void) sd_event_add_signal(event, NULL, SIGTERM, interrupt_signal_handler, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGINT, interrupt_signal_handler, NULL);
+
+ r = tar_export_new(&export, event, on_tar_finished, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate exporter: %m");
+
+ r = tar_export_start(export, local, fd, arg_compress);
+ if (r < 0)
+ return log_error_errno(r, "Failed to export image: %m");
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ log_info("Exiting.");
+ return -r;
+}
+
+static void on_raw_finished(RawExport *export, int error, void *userdata) {
+ sd_event *event = userdata;
+ assert(export);
+
+ if (error == 0)
+ log_info("Operation completed successfully.");
+
+ sd_event_exit(event, abs(error));
+}
+
+static int export_raw(int argc, char *argv[], void *userdata) {
+ _cleanup_(raw_export_unrefp) RawExport *export = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_(image_unrefp) Image *image = NULL;
+ const char *path = NULL, *local = NULL;
+ _cleanup_close_ int open_fd = -1;
+ int r, fd;
+
+ if (machine_name_is_valid(argv[1])) {
+ r = image_find(IMAGE_MACHINE, argv[1], &image);
+ if (r == -ENOENT)
+ return log_error_errno(r, "Machine image %s not found.", argv[1]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to look for machine %s: %m", argv[1]);
+
+ local = image->path;
+ } else
+ local = argv[1];
+
+ if (argc >= 3)
+ path = argv[2];
+ path = empty_or_dash_to_null(path);
+
+ determine_compression_from_filename(path);
+
+ if (path) {
+ open_fd = open(path, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC|O_NOCTTY, 0666);
+ if (open_fd < 0)
+ return log_error_errno(errno, "Failed to open raw image for export: %m");
+
+ fd = open_fd;
+
+ log_info("Exporting '%s', saving to '%s' with compression '%s'.", local, path, import_compress_type_to_string(arg_compress));
+ } else {
+ _cleanup_free_ char *pretty = NULL;
+
+ fd = STDOUT_FILENO;
+
+ (void) fd_get_path(fd, &pretty);
+ log_info("Exporting '%s', saving to '%s' with compression '%s'.", local, strna(pretty), import_compress_type_to_string(arg_compress));
+ }
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+ (void) sd_event_add_signal(event, NULL, SIGTERM, interrupt_signal_handler, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGINT, interrupt_signal_handler, NULL);
+
+ r = raw_export_new(&export, event, on_raw_finished, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate exporter: %m");
+
+ r = raw_export_start(export, local, fd, arg_compress);
+ if (r < 0)
+ return log_error_errno(r, "Failed to export image: %m");
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ log_info("Exiting.");
+ return -r;
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+
+ printf("%s [OPTIONS...] {COMMAND} ...\n\n"
+ "Export container or virtual machine images.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --format=FORMAT Select format\n\n"
+ "Commands:\n"
+ " tar NAME [FILE] Export a TAR image\n"
+ " raw NAME [FILE] Export a RAW image\n",
+ program_invocation_short_name);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_FORMAT,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "format", required_argument, NULL, ARG_FORMAT },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help(0, NULL, NULL);
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_FORMAT:
+ if (streq(optarg, "uncompressed"))
+ arg_compress = IMPORT_COMPRESS_UNCOMPRESSED;
+ else if (streq(optarg, "xz"))
+ arg_compress = IMPORT_COMPRESS_XZ;
+ else if (streq(optarg, "gzip"))
+ arg_compress = IMPORT_COMPRESS_GZIP;
+ else if (streq(optarg, "bzip2"))
+ arg_compress = IMPORT_COMPRESS_BZIP2;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown format: %s", optarg);
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int export_main(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "tar", 2, 3, 0, export_tar },
+ { "raw", 2, 3, 0, export_raw },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ (void) ignore_signals(SIGPIPE, -1);
+
+ return export_main(argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/import/import-common.c b/src/import/import-common.c
new file mode 100644
index 0000000..2502705
--- /dev/null
+++ b/src/import/import-common.c
@@ -0,0 +1,299 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sched.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "capability-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "import-common.h"
+#include "os-util.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+int import_make_read_only_fd(int fd) {
+ int r;
+
+ assert(fd >= 0);
+
+ /* First, let's make this a read-only subvolume if it refers
+ * to a subvolume */
+ r = btrfs_subvol_set_read_only_fd(fd, true);
+ if (IN_SET(r, -ENOTTY, -ENOTDIR, -EINVAL)) {
+ struct stat st;
+
+ /* This doesn't refer to a subvolume, or the file
+ * system isn't even btrfs. In that, case fall back to
+ * chmod()ing */
+
+ r = fstat(fd, &st);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to stat temporary image: %m");
+
+ /* Drop "w" flag */
+ if (fchmod(fd, st.st_mode & 07555) < 0)
+ return log_error_errno(errno, "Failed to chmod() final image: %m");
+
+ return 0;
+
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to make subvolume read-only: %m");
+
+ return 0;
+}
+
+int import_make_read_only(const char *path) {
+ _cleanup_close_ int fd = 1;
+
+ fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open %s: %m", path);
+
+ return import_make_read_only_fd(fd);
+}
+
+int import_fork_tar_x(const char *path, pid_t *ret) {
+ _cleanup_close_pair_ int pipefd[2] = { -1, -1 };
+ bool use_selinux;
+ pid_t pid;
+ int r;
+
+ assert(path);
+ assert(ret);
+
+ if (pipe2(pipefd, O_CLOEXEC) < 0)
+ return log_error_errno(errno, "Failed to create pipe for tar: %m");
+
+ use_selinux = mac_selinux_use();
+
+ r = safe_fork("(tar)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ const char *cmdline[] = {
+ "tar",
+ "--ignore-zeros",
+ "--numeric-owner",
+ "-C", path,
+ "-px",
+ "--xattrs",
+ "--xattrs-include=*",
+ use_selinux ? "--selinux" : "--no-selinux",
+ NULL
+ };
+
+ uint64_t retain =
+ (1ULL << CAP_CHOWN) |
+ (1ULL << CAP_FOWNER) |
+ (1ULL << CAP_FSETID) |
+ (1ULL << CAP_MKNOD) |
+ (1ULL << CAP_SETFCAP) |
+ (1ULL << CAP_DAC_OVERRIDE);
+
+ /* Child */
+
+ pipefd[1] = safe_close(pipefd[1]);
+
+ r = rearrange_stdio(pipefd[0], -1, STDERR_FILENO);
+ if (r < 0) {
+ log_error_errno(r, "Failed to rearrange stdin/stdout: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (unshare(CLONE_NEWNET) < 0)
+ log_error_errno(errno, "Failed to lock tar into network namespace, ignoring: %m");
+
+ r = capability_bounding_set_drop(retain, true);
+ if (r < 0)
+ log_error_errno(r, "Failed to drop capabilities, ignoring: %m");
+
+ /* Try "gtar" before "tar". We only test things upstream with GNU tar. Some distros appear to
+ * install a different implementation as "tar" (in particular some that do not support the
+ * same command line switches), but then provide "gtar" as alias for the real thing, hence
+ * let's prefer that. (Yes, it's a bad idea they do that, given they don't provide equivalent
+ * command line support, but we are not here to argue, let's just expose the same
+ * behaviour/implementation everywhere.) */
+ execvp("gtar", (char* const*) cmdline);
+ execvp("tar", (char* const*) cmdline);
+
+ log_error_errno(errno, "Failed to execute tar: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ *ret = pid;
+
+ return TAKE_FD(pipefd[1]);
+}
+
+int import_fork_tar_c(const char *path, pid_t *ret) {
+ _cleanup_close_pair_ int pipefd[2] = { -1, -1 };
+ bool use_selinux;
+ pid_t pid;
+ int r;
+
+ assert(path);
+ assert(ret);
+
+ if (pipe2(pipefd, O_CLOEXEC) < 0)
+ return log_error_errno(errno, "Failed to create pipe for tar: %m");
+
+ use_selinux = mac_selinux_use();
+
+ r = safe_fork("(tar)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ const char *cmdline[] = {
+ "tar",
+ "-C", path,
+ "-c",
+ "--xattrs",
+ "--xattrs-include=*",
+ use_selinux ? "--selinux" : "--no-selinux",
+ ".",
+ NULL
+ };
+
+ uint64_t retain = (1ULL << CAP_DAC_OVERRIDE);
+
+ /* Child */
+
+ pipefd[0] = safe_close(pipefd[0]);
+
+ r = rearrange_stdio(-1, pipefd[1], STDERR_FILENO);
+ if (r < 0) {
+ log_error_errno(r, "Failed to rearrange stdin/stdout: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (unshare(CLONE_NEWNET) < 0)
+ log_error_errno(errno, "Failed to lock tar into network namespace, ignoring: %m");
+
+ r = capability_bounding_set_drop(retain, true);
+ if (r < 0)
+ log_error_errno(r, "Failed to drop capabilities, ignoring: %m");
+
+ execvp("gtar", (char* const*) cmdline);
+ execvp("tar", (char* const*) cmdline);
+
+ log_error_errno(errno, "Failed to execute tar: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ *ret = pid;
+
+ return TAKE_FD(pipefd[0]);
+}
+
+int import_mangle_os_tree(const char *path) {
+ _cleanup_closedir_ DIR *d = NULL, *cd = NULL;
+ _cleanup_free_ char *child = NULL, *t = NULL;
+ const char *joined;
+ struct dirent *de;
+ int r;
+
+ assert(path);
+
+ /* Some tarballs contain a single top-level directory that contains the actual OS directory tree. Try to
+ * recognize this, and move the tree one level up. */
+
+ r = path_is_os_tree(path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether '%s' is an OS tree: %m", path);
+ if (r > 0) {
+ log_debug("Directory tree '%s' is a valid OS tree.", path);
+ return 0;
+ }
+
+ log_debug("Directory tree '%s' is not recognizable as OS tree, checking whether to rearrange it.", path);
+
+ d = opendir(path);
+ if (!d)
+ return log_error_errno(r, "Failed to open directory '%s': %m", path);
+
+ errno = 0;
+ de = readdir_no_dot(d);
+ if (!de) {
+ if (errno != 0)
+ return log_error_errno(errno, "Failed to iterate through directory '%s': %m", path);
+
+ log_debug("Directory '%s' is empty, leaving it as it is.", path);
+ return 0;
+ }
+
+ child = strdup(de->d_name);
+ if (!child)
+ return log_oom();
+
+ errno = 0;
+ de = readdir_no_dot(d);
+ if (de) {
+ if (errno != 0)
+ return log_error_errno(errno, "Failed to iterate through directory '%s': %m", path);
+
+ log_debug("Directory '%s' does not look like a directory tree, and has multiple children, leaving as it is.", path);
+ return 0;
+ }
+
+ joined = prefix_roota(path, child);
+ r = path_is_os_tree(joined);
+ if (r == -ENOTDIR) {
+ log_debug("Directory '%s' does not look like a directory tree, and contains a single regular file only, leaving as it is.", path);
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether '%s' is an OS tree: %m", joined);
+ if (r == 0) {
+ log_debug("Neither '%s' nor '%s' is a valid OS tree, leaving them as they are.", path, joined);
+ return 0;
+ }
+
+ /* Nice, we have checked now:
+ *
+ * 1. The top-level directory does not qualify as OS tree
+ * 1. The top-level directory only contains one item
+ * 2. That item is a directory
+ * 3. And that directory qualifies as OS tree
+ *
+ * Let's now rearrange things, moving everything in the inner directory one level up */
+
+ cd = xopendirat(dirfd(d), child, O_NOFOLLOW);
+ if (!cd)
+ return log_error_errno(errno, "Can't open directory '%s': %m", joined);
+
+ log_info("Rearranging '%s', moving OS tree one directory up.", joined);
+
+ /* Let's rename the child to an unguessable name so that we can be sure all files contained in it can be
+ * safely moved up and won't collide with the name. */
+ r = tempfn_random(child, NULL, &t);
+ if (r < 0)
+ return log_oom();
+ r = rename_noreplace(dirfd(d), child, dirfd(d), t);
+ if (r < 0)
+ return log_error_errno(r, "Unable to rename '%s' to '%s/%s': %m", joined, path, t);
+
+ FOREACH_DIRENT_ALL(de, cd, return log_error_errno(errno, "Failed to iterate through directory '%s': %m", joined)) {
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ r = rename_noreplace(dirfd(cd), de->d_name, dirfd(d), de->d_name);
+ if (r < 0)
+ return log_error_errno(r, "Unable to move '%s/%s/%s' to '%s/%s': %m", path, t, de->d_name, path, de->d_name);
+ }
+
+ if (unlinkat(dirfd(d), t, AT_REMOVEDIR) < 0)
+ return log_error_errno(errno, "Failed to remove temporary directory '%s/%s': %m", path, t);
+
+ log_info("Successfully rearranged OS tree.");
+
+ return 0;
+}
diff --git a/src/import/import-common.h b/src/import/import-common.h
new file mode 100644
index 0000000..b27a980
--- /dev/null
+++ b/src/import/import-common.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+int import_make_read_only_fd(int fd);
+int import_make_read_only(const char *path);
+
+int import_fork_tar_c(const char *path, pid_t *ret);
+int import_fork_tar_x(const char *path, pid_t *ret);
+
+int import_mangle_os_tree(const char *path);
diff --git a/src/import/import-compress.c b/src/import/import-compress.c
new file mode 100644
index 0000000..b89ffb1
--- /dev/null
+++ b/src/import/import-compress.c
@@ -0,0 +1,466 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "import-compress.h"
+#include "string-table.h"
+#include "util.h"
+
+void import_compress_free(ImportCompress *c) {
+ assert(c);
+
+ if (c->type == IMPORT_COMPRESS_XZ)
+ lzma_end(&c->xz);
+ else if (c->type == IMPORT_COMPRESS_GZIP) {
+ if (c->encoding)
+ deflateEnd(&c->gzip);
+ else
+ inflateEnd(&c->gzip);
+#if HAVE_BZIP2
+ } else if (c->type == IMPORT_COMPRESS_BZIP2) {
+ if (c->encoding)
+ BZ2_bzCompressEnd(&c->bzip2);
+ else
+ BZ2_bzDecompressEnd(&c->bzip2);
+#endif
+ }
+
+ c->type = IMPORT_COMPRESS_UNKNOWN;
+}
+
+int import_uncompress_detect(ImportCompress *c, const void *data, size_t size) {
+ static const uint8_t xz_signature[] = {
+ 0xfd, '7', 'z', 'X', 'Z', 0x00
+ };
+ static const uint8_t gzip_signature[] = {
+ 0x1f, 0x8b
+ };
+ static const uint8_t bzip2_signature[] = {
+ 'B', 'Z', 'h'
+ };
+
+ int r;
+
+ assert(c);
+
+ if (c->type != IMPORT_COMPRESS_UNKNOWN)
+ return 1;
+
+ if (size < MAX3(sizeof(xz_signature),
+ sizeof(gzip_signature),
+ sizeof(bzip2_signature)))
+ return 0;
+
+ assert(data);
+
+ if (memcmp(data, xz_signature, sizeof(xz_signature)) == 0) {
+ lzma_ret xzr;
+
+ xzr = lzma_stream_decoder(&c->xz, UINT64_MAX, LZMA_TELL_UNSUPPORTED_CHECK | LZMA_CONCATENATED);
+ if (xzr != LZMA_OK)
+ return -EIO;
+
+ c->type = IMPORT_COMPRESS_XZ;
+
+ } else if (memcmp(data, gzip_signature, sizeof(gzip_signature)) == 0) {
+ r = inflateInit2(&c->gzip, 15+16);
+ if (r != Z_OK)
+ return -EIO;
+
+ c->type = IMPORT_COMPRESS_GZIP;
+
+#if HAVE_BZIP2
+ } else if (memcmp(data, bzip2_signature, sizeof(bzip2_signature)) == 0) {
+ r = BZ2_bzDecompressInit(&c->bzip2, 0, 0);
+ if (r != BZ_OK)
+ return -EIO;
+
+ c->type = IMPORT_COMPRESS_BZIP2;
+#endif
+ } else
+ c->type = IMPORT_COMPRESS_UNCOMPRESSED;
+
+ c->encoding = false;
+
+ return 1;
+}
+
+int import_uncompress(ImportCompress *c, const void *data, size_t size, ImportCompressCallback callback, void *userdata) {
+ int r;
+
+ assert(c);
+ assert(callback);
+
+ r = import_uncompress_detect(c, data, size);
+ if (r <= 0)
+ return r;
+
+ if (c->encoding)
+ return -EINVAL;
+
+ if (size <= 0)
+ return 1;
+
+ assert(data);
+
+ switch (c->type) {
+
+ case IMPORT_COMPRESS_UNCOMPRESSED:
+ r = callback(data, size, userdata);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case IMPORT_COMPRESS_XZ:
+ c->xz.next_in = data;
+ c->xz.avail_in = size;
+
+ while (c->xz.avail_in > 0) {
+ uint8_t buffer[16 * 1024];
+ lzma_ret lzr;
+
+ c->xz.next_out = buffer;
+ c->xz.avail_out = sizeof(buffer);
+
+ lzr = lzma_code(&c->xz, LZMA_RUN);
+ if (!IN_SET(lzr, LZMA_OK, LZMA_STREAM_END))
+ return -EIO;
+
+ r = callback(buffer, sizeof(buffer) - c->xz.avail_out, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ break;
+
+ case IMPORT_COMPRESS_GZIP:
+ c->gzip.next_in = (void*) data;
+ c->gzip.avail_in = size;
+
+ while (c->gzip.avail_in > 0) {
+ uint8_t buffer[16 * 1024];
+
+ c->gzip.next_out = buffer;
+ c->gzip.avail_out = sizeof(buffer);
+
+ r = inflate(&c->gzip, Z_NO_FLUSH);
+ if (!IN_SET(r, Z_OK, Z_STREAM_END))
+ return -EIO;
+
+ r = callback(buffer, sizeof(buffer) - c->gzip.avail_out, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ break;
+
+#if HAVE_BZIP2
+ case IMPORT_COMPRESS_BZIP2:
+ c->bzip2.next_in = (void*) data;
+ c->bzip2.avail_in = size;
+
+ while (c->bzip2.avail_in > 0) {
+ uint8_t buffer[16 * 1024];
+
+ c->bzip2.next_out = (char*) buffer;
+ c->bzip2.avail_out = sizeof(buffer);
+
+ r = BZ2_bzDecompress(&c->bzip2);
+ if (!IN_SET(r, BZ_OK, BZ_STREAM_END))
+ return -EIO;
+
+ r = callback(buffer, sizeof(buffer) - c->bzip2.avail_out, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ break;
+#endif
+
+ default:
+ assert_not_reached("Unknown compression");
+ }
+
+ return 1;
+}
+
+int import_compress_init(ImportCompress *c, ImportCompressType t) {
+ int r;
+
+ assert(c);
+
+ switch (t) {
+
+ case IMPORT_COMPRESS_XZ: {
+ lzma_ret xzr;
+
+ xzr = lzma_easy_encoder(&c->xz, LZMA_PRESET_DEFAULT, LZMA_CHECK_CRC64);
+ if (xzr != LZMA_OK)
+ return -EIO;
+
+ c->type = IMPORT_COMPRESS_XZ;
+ break;
+ }
+
+ case IMPORT_COMPRESS_GZIP:
+ r = deflateInit2(&c->gzip, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY);
+ if (r != Z_OK)
+ return -EIO;
+
+ c->type = IMPORT_COMPRESS_GZIP;
+ break;
+
+#if HAVE_BZIP2
+ case IMPORT_COMPRESS_BZIP2:
+ r = BZ2_bzCompressInit(&c->bzip2, 9, 0, 0);
+ if (r != BZ_OK)
+ return -EIO;
+
+ c->type = IMPORT_COMPRESS_BZIP2;
+ break;
+#endif
+
+ case IMPORT_COMPRESS_UNCOMPRESSED:
+ c->type = IMPORT_COMPRESS_UNCOMPRESSED;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ c->encoding = true;
+ return 0;
+}
+
+static int enlarge_buffer(void **buffer, size_t *buffer_size, size_t *buffer_allocated) {
+ size_t l;
+ void *p;
+
+ if (*buffer_allocated > *buffer_size)
+ return 0;
+
+ l = MAX(16*1024U, (*buffer_size * 2));
+ p = realloc(*buffer, l);
+ if (!p)
+ return -ENOMEM;
+
+ *buffer = p;
+ *buffer_allocated = l;
+
+ return 1;
+}
+
+int import_compress(ImportCompress *c, const void *data, size_t size, void **buffer, size_t *buffer_size, size_t *buffer_allocated) {
+ int r;
+
+ assert(c);
+ assert(buffer);
+ assert(buffer_size);
+ assert(buffer_allocated);
+
+ if (!c->encoding)
+ return -EINVAL;
+
+ if (size <= 0)
+ return 0;
+
+ assert(data);
+
+ *buffer_size = 0;
+
+ switch (c->type) {
+
+ case IMPORT_COMPRESS_XZ:
+
+ c->xz.next_in = data;
+ c->xz.avail_in = size;
+
+ while (c->xz.avail_in > 0) {
+ lzma_ret lzr;
+
+ r = enlarge_buffer(buffer, buffer_size, buffer_allocated);
+ if (r < 0)
+ return r;
+
+ c->xz.next_out = (uint8_t*) *buffer + *buffer_size;
+ c->xz.avail_out = *buffer_allocated - *buffer_size;
+
+ lzr = lzma_code(&c->xz, LZMA_RUN);
+ if (lzr != LZMA_OK)
+ return -EIO;
+
+ *buffer_size += (*buffer_allocated - *buffer_size) - c->xz.avail_out;
+ }
+
+ break;
+
+ case IMPORT_COMPRESS_GZIP:
+
+ c->gzip.next_in = (void*) data;
+ c->gzip.avail_in = size;
+
+ while (c->gzip.avail_in > 0) {
+ r = enlarge_buffer(buffer, buffer_size, buffer_allocated);
+ if (r < 0)
+ return r;
+
+ c->gzip.next_out = (uint8_t*) *buffer + *buffer_size;
+ c->gzip.avail_out = *buffer_allocated - *buffer_size;
+
+ r = deflate(&c->gzip, Z_NO_FLUSH);
+ if (r != Z_OK)
+ return -EIO;
+
+ *buffer_size += (*buffer_allocated - *buffer_size) - c->gzip.avail_out;
+ }
+
+ break;
+
+#if HAVE_BZIP2
+ case IMPORT_COMPRESS_BZIP2:
+
+ c->bzip2.next_in = (void*) data;
+ c->bzip2.avail_in = size;
+
+ while (c->bzip2.avail_in > 0) {
+ r = enlarge_buffer(buffer, buffer_size, buffer_allocated);
+ if (r < 0)
+ return r;
+
+ c->bzip2.next_out = (void*) ((uint8_t*) *buffer + *buffer_size);
+ c->bzip2.avail_out = *buffer_allocated - *buffer_size;
+
+ r = BZ2_bzCompress(&c->bzip2, BZ_RUN);
+ if (r != BZ_RUN_OK)
+ return -EIO;
+
+ *buffer_size += (*buffer_allocated - *buffer_size) - c->bzip2.avail_out;
+ }
+
+ break;
+#endif
+
+ case IMPORT_COMPRESS_UNCOMPRESSED:
+
+ if (*buffer_allocated < size) {
+ void *p;
+
+ p = realloc(*buffer, size);
+ if (!p)
+ return -ENOMEM;
+
+ *buffer = p;
+ *buffer_allocated = size;
+ }
+
+ memcpy(*buffer, data, size);
+ *buffer_size = size;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int import_compress_finish(ImportCompress *c, void **buffer, size_t *buffer_size, size_t *buffer_allocated) {
+ int r;
+
+ assert(c);
+ assert(buffer);
+ assert(buffer_size);
+ assert(buffer_allocated);
+
+ if (!c->encoding)
+ return -EINVAL;
+
+ *buffer_size = 0;
+
+ switch (c->type) {
+
+ case IMPORT_COMPRESS_XZ: {
+ lzma_ret lzr;
+
+ c->xz.avail_in = 0;
+
+ do {
+ r = enlarge_buffer(buffer, buffer_size, buffer_allocated);
+ if (r < 0)
+ return r;
+
+ c->xz.next_out = (uint8_t*) *buffer + *buffer_size;
+ c->xz.avail_out = *buffer_allocated - *buffer_size;
+
+ lzr = lzma_code(&c->xz, LZMA_FINISH);
+ if (!IN_SET(lzr, LZMA_OK, LZMA_STREAM_END))
+ return -EIO;
+
+ *buffer_size += (*buffer_allocated - *buffer_size) - c->xz.avail_out;
+ } while (lzr != LZMA_STREAM_END);
+
+ break;
+ }
+
+ case IMPORT_COMPRESS_GZIP:
+ c->gzip.avail_in = 0;
+
+ do {
+ r = enlarge_buffer(buffer, buffer_size, buffer_allocated);
+ if (r < 0)
+ return r;
+
+ c->gzip.next_out = (uint8_t*) *buffer + *buffer_size;
+ c->gzip.avail_out = *buffer_allocated - *buffer_size;
+
+ r = deflate(&c->gzip, Z_FINISH);
+ if (!IN_SET(r, Z_OK, Z_STREAM_END))
+ return -EIO;
+
+ *buffer_size += (*buffer_allocated - *buffer_size) - c->gzip.avail_out;
+ } while (r != Z_STREAM_END);
+
+ break;
+
+#if HAVE_BZIP2
+ case IMPORT_COMPRESS_BZIP2:
+ c->bzip2.avail_in = 0;
+
+ do {
+ r = enlarge_buffer(buffer, buffer_size, buffer_allocated);
+ if (r < 0)
+ return r;
+
+ c->bzip2.next_out = (void*) ((uint8_t*) *buffer + *buffer_size);
+ c->bzip2.avail_out = *buffer_allocated - *buffer_size;
+
+ r = BZ2_bzCompress(&c->bzip2, BZ_FINISH);
+ if (!IN_SET(r, BZ_FINISH_OK, BZ_STREAM_END))
+ return -EIO;
+
+ *buffer_size += (*buffer_allocated - *buffer_size) - c->bzip2.avail_out;
+ } while (r != BZ_STREAM_END);
+
+ break;
+#endif
+
+ case IMPORT_COMPRESS_UNCOMPRESSED:
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static const char* const import_compress_type_table[_IMPORT_COMPRESS_TYPE_MAX] = {
+ [IMPORT_COMPRESS_UNKNOWN] = "unknown",
+ [IMPORT_COMPRESS_UNCOMPRESSED] = "uncompressed",
+ [IMPORT_COMPRESS_XZ] = "xz",
+ [IMPORT_COMPRESS_GZIP] = "gzip",
+#if HAVE_BZIP2
+ [IMPORT_COMPRESS_BZIP2] = "bzip2",
+#endif
+};
+
+DEFINE_STRING_TABLE_LOOKUP(import_compress_type, ImportCompressType);
diff --git a/src/import/import-compress.h b/src/import/import-compress.h
new file mode 100644
index 0000000..e40f4db
--- /dev/null
+++ b/src/import/import-compress.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#if HAVE_BZIP2
+#include <bzlib.h>
+#endif
+#include <lzma.h>
+#include <sys/types.h>
+#include <zlib.h>
+
+#include "macro.h"
+
+typedef enum ImportCompressType {
+ IMPORT_COMPRESS_UNKNOWN,
+ IMPORT_COMPRESS_UNCOMPRESSED,
+ IMPORT_COMPRESS_XZ,
+ IMPORT_COMPRESS_GZIP,
+ IMPORT_COMPRESS_BZIP2,
+ _IMPORT_COMPRESS_TYPE_MAX,
+ _IMPORT_COMPRESS_TYPE_INVALID = -1,
+} ImportCompressType;
+
+typedef struct ImportCompress {
+ ImportCompressType type;
+ bool encoding;
+ union {
+ lzma_stream xz;
+ z_stream gzip;
+#if HAVE_BZIP2
+ bz_stream bzip2;
+#endif
+ };
+} ImportCompress;
+
+typedef int (*ImportCompressCallback)(const void *data, size_t size, void *userdata);
+
+void import_compress_free(ImportCompress *c);
+
+int import_uncompress_detect(ImportCompress *c, const void *data, size_t size);
+int import_uncompress(ImportCompress *c, const void *data, size_t size, ImportCompressCallback callback, void *userdata);
+
+int import_compress_init(ImportCompress *c, ImportCompressType t);
+int import_compress(ImportCompress *c, const void *data, size_t size, void **buffer, size_t *buffer_size, size_t *buffer_allocated);
+int import_compress_finish(ImportCompress *c, void **buffer, size_t *buffer_size, size_t *buffer_allocated);
+
+const char* import_compress_type_to_string(ImportCompressType t) _const_;
+ImportCompressType import_compress_type_from_string(const char *s) _pure_;
diff --git a/src/import/import-fs.c b/src/import/import-fs.c
new file mode 100644
index 0000000..3b43ea1
--- /dev/null
+++ b/src/import/import-fs.c
@@ -0,0 +1,327 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <locale.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "import-common.h"
+#include "import-util.h"
+#include "machine-image.h"
+#include "mkdir.h"
+#include "ratelimit.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "verbs.h"
+
+static bool arg_force = false;
+static bool arg_read_only = false;
+static const char *arg_image_root = "/var/lib/machines";
+
+typedef struct ProgressInfo {
+ RateLimit limit;
+ char *path;
+ uint64_t size;
+ bool started;
+ bool logged_incomplete;
+} ProgressInfo;
+
+static volatile sig_atomic_t cancelled = false;
+
+static void sigterm_sigint(int sig) {
+ cancelled = true;
+}
+
+static void progress_info_free(ProgressInfo *p) {
+ free(p->path);
+}
+
+static void progress_show(ProgressInfo *p) {
+ assert(p);
+
+ /* Show progress only every now and then. */
+ if (!ratelimit_below(&p->limit))
+ return;
+
+ /* Suppress the first message, start with the second one */
+ if (!p->started) {
+ p->started = true;
+ return;
+ }
+
+ /* Mention the list is incomplete before showing first output. */
+ if (!p->logged_incomplete) {
+ log_notice("(Note, file list shown below is incomplete, and is intended as sporadic progress report only.)");
+ p->logged_incomplete = true;
+ }
+
+ if (p->size == 0)
+ log_info("Copying tree, currently at '%s'...", p->path);
+ else {
+ char buffer[FORMAT_BYTES_MAX];
+
+ log_info("Copying tree, currently at '%s' (@%s)...", p->path, format_bytes(buffer, sizeof(buffer), p->size));
+ }
+}
+
+static int progress_path(const char *path, const struct stat *st, void *userdata) {
+ ProgressInfo *p = userdata;
+ int r;
+
+ assert(p);
+
+ if (cancelled)
+ return -EOWNERDEAD;
+
+ r = free_and_strdup(&p->path, path);
+ if (r < 0)
+ return r;
+
+ p->size = 0;
+
+ progress_show(p);
+ return 0;
+}
+
+static int progress_bytes(uint64_t nbytes, void *userdata) {
+ ProgressInfo *p = userdata;
+
+ assert(p);
+ assert(p->size != UINT64_MAX);
+
+ if (cancelled)
+ return -EOWNERDEAD;
+
+ p->size += nbytes;
+
+ progress_show(p);
+ return 0;
+}
+
+static int import_fs(int argc, char *argv[], void *userdata) {
+ _cleanup_(rm_rf_subvolume_and_freep) char *temp_path = NULL;
+ _cleanup_(progress_info_free) ProgressInfo progress = {};
+ const char *path = NULL, *local = NULL, *final_path;
+ _cleanup_close_ int open_fd = -1;
+ struct sigaction old_sigint_sa, old_sigterm_sa;
+ static const struct sigaction sa = {
+ .sa_handler = sigterm_sigint,
+ .sa_flags = SA_RESTART,
+ };
+ int r, fd;
+
+ if (argc >= 2)
+ path = argv[1];
+ path = empty_or_dash_to_null(path);
+
+ if (argc >= 3)
+ local = argv[2];
+ else if (path)
+ local = basename(path);
+ local = empty_or_dash_to_null(local);
+
+ if (local) {
+ if (!machine_name_is_valid(local))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Local image name '%s' is not valid.",
+ local);
+
+ if (!arg_force) {
+ r = image_find(IMAGE_MACHINE, local, NULL);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return log_error_errno(r, "Failed to check whether image '%s' exists: %m", local);
+ } else {
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "Image '%s' already exists.",
+ local);
+ }
+ }
+ } else
+ local = "imported";
+
+ if (path) {
+ open_fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+ if (open_fd < 0)
+ return log_error_errno(errno, "Failed to open directory to import: %m");
+
+ fd = open_fd;
+
+ log_info("Importing '%s', saving as '%s'.", path, local);
+ } else {
+ _cleanup_free_ char *pretty = NULL;
+
+ fd = STDIN_FILENO;
+
+ (void) fd_get_path(fd, &pretty);
+ log_info("Importing '%s', saving as '%s'.", strempty(pretty), local);
+ }
+
+ final_path = prefix_roota(arg_image_root, local);
+
+ r = tempfn_random(final_path, NULL, &temp_path);
+ if (r < 0)
+ return log_oom();
+
+ (void) mkdir_parents_label(temp_path, 0700);
+
+ progress.limit = (RateLimit) { 200*USEC_PER_MSEC, 1 };
+
+ /* Hook into SIGINT/SIGTERM, so that we can cancel things then */
+ assert_se(sigaction(SIGINT, &sa, &old_sigint_sa) >= 0);
+ assert_se(sigaction(SIGTERM, &sa, &old_sigterm_sa) >= 0);
+
+ r = btrfs_subvol_snapshot_fd_full(
+ fd,
+ temp_path,
+ BTRFS_SNAPSHOT_FALLBACK_COPY|BTRFS_SNAPSHOT_RECURSIVE|BTRFS_SNAPSHOT_FALLBACK_DIRECTORY|BTRFS_SNAPSHOT_QUOTA,
+ progress_path,
+ progress_bytes,
+ &progress);
+ if (r == -EOWNERDEAD) { /* SIGINT + SIGTERM cause this, see signal handler above */
+ log_error("Copy cancelled.");
+ goto finish;
+ }
+ if (r < 0) {
+ log_error_errno(r, "Failed to copy directory: %m");
+ goto finish;
+ }
+
+ r = import_mangle_os_tree(temp_path);
+ if (r < 0)
+ goto finish;
+
+ (void) import_assign_pool_quota_and_warn(temp_path);
+
+ if (arg_read_only) {
+ r = import_make_read_only(temp_path);
+ if (r < 0) {
+ log_error_errno(r, "Failed to make directory read-only: %m");
+ goto finish;
+ }
+ }
+
+ if (arg_force)
+ (void) rm_rf(final_path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+
+ r = rename_noreplace(AT_FDCWD, temp_path, AT_FDCWD, final_path);
+ if (r < 0) {
+ log_error_errno(r, "Failed to move image into place: %m");
+ goto finish;
+ }
+
+ temp_path = mfree(temp_path);
+
+ log_info("Exiting.");
+
+finish:
+ /* Put old signal handlers into place */
+ assert_se(sigaction(SIGINT, &old_sigint_sa, NULL) >= 0);
+ assert_se(sigaction(SIGTERM, &old_sigterm_sa, NULL) >= 0);
+
+ return 0;
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+
+ printf("%s [OPTIONS...] {COMMAND} ...\n\n"
+ "Import container images from a file system.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --force Force creation of image\n"
+ " --image-root=PATH Image root directory\n"
+ " --read-only Create a read-only image\n\n"
+ "Commands:\n"
+ " run DIRECTORY [NAME] Import a directory\n",
+ program_invocation_short_name);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_FORCE,
+ ARG_IMAGE_ROOT,
+ ARG_READ_ONLY,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "force", no_argument, NULL, ARG_FORCE },
+ { "image-root", required_argument, NULL, ARG_IMAGE_ROOT },
+ { "read-only", no_argument, NULL, ARG_READ_ONLY },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help(0, NULL, NULL);
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_FORCE:
+ arg_force = true;
+ break;
+
+ case ARG_IMAGE_ROOT:
+ arg_image_root = optarg;
+ break;
+
+ case ARG_READ_ONLY:
+ arg_read_only = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int import_fs_main(int argc, char *argv[]) {
+
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "run", 2, 3, 0, import_fs },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+int main(int argc, char *argv[]) {
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ goto finish;
+
+ r = import_fs_main(argc, argv);
+
+finish:
+ return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/src/import/import-pubring.gpg b/src/import/import-pubring.gpg
new file mode 100644
index 0000000..be27776
--- /dev/null
+++ b/src/import/import-pubring.gpg
Binary files differ
diff --git a/src/import/import-raw.c b/src/import/import-raw.c
new file mode 100644
index 0000000..9f5c13b
--- /dev/null
+++ b/src/import/import-raw.c
@@ -0,0 +1,429 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/fs.h>
+
+#include "sd-daemon.h"
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "copy.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "import-common.h"
+#include "import-compress.h"
+#include "import-raw.h"
+#include "io-util.h"
+#include "machine-pool.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "qcow2-util.h"
+#include "ratelimit.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+struct RawImport {
+ sd_event *event;
+
+ char *image_root;
+
+ RawImportFinished on_finished;
+ void *userdata;
+
+ char *local;
+ bool force_local;
+ bool read_only;
+
+ char *temp_path;
+ char *final_path;
+
+ int input_fd;
+ int output_fd;
+
+ ImportCompress compress;
+
+ sd_event_source *input_event_source;
+
+ uint8_t buffer[16*1024];
+ size_t buffer_size;
+
+ uint64_t written_compressed;
+ uint64_t written_uncompressed;
+
+ struct stat st;
+
+ unsigned last_percent;
+ RateLimit progress_ratelimit;
+};
+
+RawImport* raw_import_unref(RawImport *i) {
+ if (!i)
+ return NULL;
+
+ sd_event_unref(i->event);
+
+ if (i->temp_path) {
+ (void) unlink(i->temp_path);
+ free(i->temp_path);
+ }
+
+ import_compress_free(&i->compress);
+
+ sd_event_source_unref(i->input_event_source);
+
+ safe_close(i->output_fd);
+
+ free(i->final_path);
+ free(i->image_root);
+ free(i->local);
+ return mfree(i);
+}
+
+int raw_import_new(
+ RawImport **ret,
+ sd_event *event,
+ const char *image_root,
+ RawImportFinished on_finished,
+ void *userdata) {
+
+ _cleanup_(raw_import_unrefp) RawImport *i = NULL;
+ _cleanup_free_ char *root = NULL;
+ int r;
+
+ assert(ret);
+
+ root = strdup(image_root ?: "/var/lib/machines");
+ if (!root)
+ return -ENOMEM;
+
+ i = new(RawImport, 1);
+ if (!i)
+ return -ENOMEM;
+
+ *i = (RawImport) {
+ .input_fd = -1,
+ .output_fd = -1,
+ .on_finished = on_finished,
+ .userdata = userdata,
+ .last_percent = (unsigned) -1,
+ .image_root = TAKE_PTR(root),
+ .progress_ratelimit = { 100 * USEC_PER_MSEC, 1 },
+ };
+
+ if (event)
+ i->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&i->event);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(i);
+
+ return 0;
+}
+
+static void raw_import_report_progress(RawImport *i) {
+ unsigned percent;
+ assert(i);
+
+ /* We have no size information, unless the source is a regular file */
+ if (!S_ISREG(i->st.st_mode))
+ return;
+
+ if (i->written_compressed >= (uint64_t) i->st.st_size)
+ percent = 100;
+ else
+ percent = (unsigned) ((i->written_compressed * UINT64_C(100)) / (uint64_t) i->st.st_size);
+
+ if (percent == i->last_percent)
+ return;
+
+ if (!ratelimit_below(&i->progress_ratelimit))
+ return;
+
+ sd_notifyf(false, "X_IMPORT_PROGRESS=%u", percent);
+ log_info("Imported %u%%.", percent);
+
+ i->last_percent = percent;
+}
+
+static int raw_import_maybe_convert_qcow2(RawImport *i) {
+ _cleanup_close_ int converted_fd = -1;
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(i);
+
+ r = qcow2_detect(i->output_fd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to detect whether this is a QCOW2 image: %m");
+ if (r == 0)
+ return 0;
+
+ /* This is a QCOW2 image, let's convert it */
+ r = tempfn_random(i->final_path, NULL, &t);
+ if (r < 0)
+ return log_oom();
+
+ converted_fd = open(t, O_RDWR|O_CREAT|O_EXCL|O_NOCTTY|O_CLOEXEC, 0664);
+ if (converted_fd < 0)
+ return log_error_errno(errno, "Failed to create %s: %m", t);
+
+ (void) import_set_nocow_and_log(converted_fd, t);
+
+ log_info("Unpacking QCOW2 file.");
+
+ r = qcow2_convert(i->output_fd, converted_fd);
+ if (r < 0) {
+ (void) unlink(t);
+ return log_error_errno(r, "Failed to convert qcow2 image: %m");
+ }
+
+ (void) unlink(i->temp_path);
+ free_and_replace(i->temp_path, t);
+ CLOSE_AND_REPLACE(i->output_fd, converted_fd);
+
+ return 1;
+}
+
+static int raw_import_finish(RawImport *i) {
+ int r;
+
+ assert(i);
+ assert(i->output_fd >= 0);
+ assert(i->temp_path);
+ assert(i->final_path);
+
+ /* In case this was a sparse file, make sure the file system is right */
+ if (i->written_uncompressed > 0) {
+ if (ftruncate(i->output_fd, i->written_uncompressed) < 0)
+ return log_error_errno(errno, "Failed to truncate file: %m");
+ }
+
+ r = raw_import_maybe_convert_qcow2(i);
+ if (r < 0)
+ return r;
+
+ if (S_ISREG(i->st.st_mode)) {
+ (void) copy_times(i->input_fd, i->output_fd, COPY_CRTIME);
+ (void) copy_xattr(i->input_fd, i->output_fd);
+ }
+
+ if (i->read_only) {
+ r = import_make_read_only_fd(i->output_fd);
+ if (r < 0)
+ return r;
+ }
+
+ if (i->force_local)
+ (void) rm_rf(i->final_path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+
+ r = rename_noreplace(AT_FDCWD, i->temp_path, AT_FDCWD, i->final_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to move image into place: %m");
+
+ i->temp_path = mfree(i->temp_path);
+
+ return 0;
+}
+
+static int raw_import_open_disk(RawImport *i) {
+ int r;
+
+ assert(i);
+
+ assert(!i->final_path);
+ assert(!i->temp_path);
+ assert(i->output_fd < 0);
+
+ i->final_path = strjoin(i->image_root, "/", i->local, ".raw");
+ if (!i->final_path)
+ return log_oom();
+
+ r = tempfn_random(i->final_path, NULL, &i->temp_path);
+ if (r < 0)
+ return log_oom();
+
+ (void) mkdir_parents_label(i->temp_path, 0700);
+
+ i->output_fd = open(i->temp_path, O_RDWR|O_CREAT|O_EXCL|O_NOCTTY|O_CLOEXEC, 0664);
+ if (i->output_fd < 0)
+ return log_error_errno(errno, "Failed to open destination %s: %m", i->temp_path);
+
+ (void) import_set_nocow_and_log(i->output_fd, i->temp_path);
+ return 0;
+}
+
+static int raw_import_try_reflink(RawImport *i) {
+ off_t p;
+ int r;
+
+ assert(i);
+ assert(i->input_fd >= 0);
+ assert(i->output_fd >= 0);
+
+ if (i->compress.type != IMPORT_COMPRESS_UNCOMPRESSED)
+ return 0;
+
+ if (!S_ISREG(i->st.st_mode))
+ return 0;
+
+ p = lseek(i->input_fd, 0, SEEK_CUR);
+ if (p == (off_t) -1)
+ return log_error_errno(errno, "Failed to read file offset of input file: %m");
+
+ /* Let's only try a btrfs reflink, if we are reading from the beginning of the file */
+ if ((uint64_t) p != (uint64_t) i->buffer_size)
+ return 0;
+
+ r = btrfs_reflink(i->input_fd, i->output_fd);
+ if (r >= 0)
+ return 1;
+
+ return 0;
+}
+
+static int raw_import_write(const void *p, size_t sz, void *userdata) {
+ RawImport *i = userdata;
+ ssize_t n;
+
+ n = sparse_write(i->output_fd, p, sz, 64);
+ if (n < 0)
+ return (int) n;
+ if ((size_t) n < sz)
+ return -EIO;
+
+ i->written_uncompressed += sz;
+
+ return 0;
+}
+
+static int raw_import_process(RawImport *i) {
+ ssize_t l;
+ int r;
+
+ assert(i);
+ assert(i->buffer_size < sizeof(i->buffer));
+
+ l = read(i->input_fd, i->buffer + i->buffer_size, sizeof(i->buffer) - i->buffer_size);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ r = log_error_errno(errno, "Failed to read input file: %m");
+ goto finish;
+ }
+ if (l == 0) {
+ if (i->compress.type == IMPORT_COMPRESS_UNKNOWN) {
+ log_error("Premature end of file.");
+ r = -EIO;
+ goto finish;
+ }
+
+ r = raw_import_finish(i);
+ goto finish;
+ }
+
+ i->buffer_size += l;
+
+ if (i->compress.type == IMPORT_COMPRESS_UNKNOWN) {
+ r = import_uncompress_detect(&i->compress, i->buffer, i->buffer_size);
+ if (r < 0) {
+ log_error_errno(r, "Failed to detect file compression: %m");
+ goto finish;
+ }
+ if (r == 0) /* Need more data */
+ return 0;
+
+ r = raw_import_open_disk(i);
+ if (r < 0)
+ goto finish;
+
+ r = raw_import_try_reflink(i);
+ if (r < 0)
+ goto finish;
+ if (r > 0) {
+ r = raw_import_finish(i);
+ goto finish;
+ }
+ }
+
+ r = import_uncompress(&i->compress, i->buffer, i->buffer_size, raw_import_write, i);
+ if (r < 0) {
+ log_error_errno(r, "Failed to decode and write: %m");
+ goto finish;
+ }
+
+ i->written_compressed += i->buffer_size;
+ i->buffer_size = 0;
+
+ raw_import_report_progress(i);
+
+ return 0;
+
+finish:
+ if (i->on_finished)
+ i->on_finished(i, r, i->userdata);
+ else
+ sd_event_exit(i->event, r);
+
+ return 0;
+}
+
+static int raw_import_on_input(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ RawImport *i = userdata;
+
+ return raw_import_process(i);
+}
+
+static int raw_import_on_defer(sd_event_source *s, void *userdata) {
+ RawImport *i = userdata;
+
+ return raw_import_process(i);
+}
+
+int raw_import_start(RawImport *i, int fd, const char *local, bool force_local, bool read_only) {
+ int r;
+
+ assert(i);
+ assert(fd >= 0);
+ assert(local);
+
+ if (!machine_name_is_valid(local))
+ return -EINVAL;
+
+ if (i->input_fd >= 0)
+ return -EBUSY;
+
+ r = fd_nonblock(fd, true);
+ if (r < 0)
+ return r;
+
+ r = free_and_strdup(&i->local, local);
+ if (r < 0)
+ return r;
+ i->force_local = force_local;
+ i->read_only = read_only;
+
+ if (fstat(fd, &i->st) < 0)
+ return -errno;
+
+ r = sd_event_add_io(i->event, &i->input_event_source, fd, EPOLLIN, raw_import_on_input, i);
+ if (r == -EPERM) {
+ /* This fd does not support epoll, for example because it is a regular file. Busy read in that case */
+ r = sd_event_add_defer(i->event, &i->input_event_source, raw_import_on_defer, i);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(i->input_event_source, SD_EVENT_ON);
+ }
+ if (r < 0)
+ return r;
+
+ i->input_fd = fd;
+ return r;
+}
diff --git a/src/import/import-raw.h b/src/import/import-raw.h
new file mode 100644
index 0000000..4612a9f
--- /dev/null
+++ b/src/import/import-raw.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-event.h"
+
+#include "import-util.h"
+#include "macro.h"
+
+typedef struct RawImport RawImport;
+
+typedef void (*RawImportFinished)(RawImport *import, int error, void *userdata);
+
+int raw_import_new(RawImport **import, sd_event *event, const char *image_root, RawImportFinished on_finished, void *userdata);
+RawImport* raw_import_unref(RawImport *import);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(RawImport*, raw_import_unref);
+
+int raw_import_start(RawImport *i, int fd, const char *local, bool force_local, bool read_only);
diff --git a/src/import/import-tar.c b/src/import/import-tar.c
new file mode 100644
index 0000000..9f68d45
--- /dev/null
+++ b/src/import/import-tar.c
@@ -0,0 +1,365 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/fs.h>
+
+#include "sd-daemon.h"
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "copy.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "import-common.h"
+#include "import-compress.h"
+#include "import-tar.h"
+#include "io-util.h"
+#include "machine-pool.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "qcow2-util.h"
+#include "ratelimit.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+struct TarImport {
+ sd_event *event;
+
+ char *image_root;
+
+ TarImportFinished on_finished;
+ void *userdata;
+
+ char *local;
+ bool force_local;
+ bool read_only;
+
+ char *temp_path;
+ char *final_path;
+
+ int input_fd;
+ int tar_fd;
+
+ ImportCompress compress;
+
+ sd_event_source *input_event_source;
+
+ uint8_t buffer[16*1024];
+ size_t buffer_size;
+
+ uint64_t written_compressed;
+ uint64_t written_uncompressed;
+
+ struct stat st;
+
+ pid_t tar_pid;
+
+ unsigned last_percent;
+ RateLimit progress_ratelimit;
+};
+
+TarImport* tar_import_unref(TarImport *i) {
+ if (!i)
+ return NULL;
+
+ sd_event_source_unref(i->input_event_source);
+
+ if (i->tar_pid > 1) {
+ (void) kill_and_sigcont(i->tar_pid, SIGKILL);
+ (void) wait_for_terminate(i->tar_pid, NULL);
+ }
+
+ if (i->temp_path) {
+ (void) rm_rf(i->temp_path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+ free(i->temp_path);
+ }
+
+ import_compress_free(&i->compress);
+
+ sd_event_unref(i->event);
+
+ safe_close(i->tar_fd);
+
+ free(i->final_path);
+ free(i->image_root);
+ free(i->local);
+ return mfree(i);
+}
+
+int tar_import_new(
+ TarImport **ret,
+ sd_event *event,
+ const char *image_root,
+ TarImportFinished on_finished,
+ void *userdata) {
+
+ _cleanup_(tar_import_unrefp) TarImport *i = NULL;
+ _cleanup_free_ char *root = NULL;
+ int r;
+
+ assert(ret);
+
+ root = strdup(image_root ?: "/var/lib/machines");
+ if (!root)
+ return -ENOMEM;
+
+ i = new(TarImport, 1);
+ if (!i)
+ return -ENOMEM;
+
+ *i = (TarImport) {
+ .input_fd = -1,
+ .tar_fd = -1,
+ .on_finished = on_finished,
+ .userdata = userdata,
+ .last_percent = (unsigned) -1,
+ .image_root = TAKE_PTR(root),
+ .progress_ratelimit = { 100 * USEC_PER_MSEC, 1 },
+ };
+
+ if (event)
+ i->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&i->event);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(i);
+
+ return 0;
+}
+
+static void tar_import_report_progress(TarImport *i) {
+ unsigned percent;
+ assert(i);
+
+ /* We have no size information, unless the source is a regular file */
+ if (!S_ISREG(i->st.st_mode))
+ return;
+
+ if (i->written_compressed >= (uint64_t) i->st.st_size)
+ percent = 100;
+ else
+ percent = (unsigned) ((i->written_compressed * UINT64_C(100)) / (uint64_t) i->st.st_size);
+
+ if (percent == i->last_percent)
+ return;
+
+ if (!ratelimit_below(&i->progress_ratelimit))
+ return;
+
+ sd_notifyf(false, "X_IMPORT_PROGRESS=%u", percent);
+ log_info("Imported %u%%.", percent);
+
+ i->last_percent = percent;
+}
+
+static int tar_import_finish(TarImport *i) {
+ int r;
+
+ assert(i);
+ assert(i->tar_fd >= 0);
+ assert(i->temp_path);
+ assert(i->final_path);
+
+ i->tar_fd = safe_close(i->tar_fd);
+
+ if (i->tar_pid > 0) {
+ r = wait_for_terminate_and_check("tar", i->tar_pid, WAIT_LOG);
+ i->tar_pid = 0;
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS)
+ return -EPROTO;
+ }
+
+ r = import_mangle_os_tree(i->temp_path);
+ if (r < 0)
+ return r;
+
+ if (i->read_only) {
+ r = import_make_read_only(i->temp_path);
+ if (r < 0)
+ return r;
+ }
+
+ if (i->force_local)
+ (void) rm_rf(i->final_path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+
+ r = rename_noreplace(AT_FDCWD, i->temp_path, AT_FDCWD, i->final_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to move image into place: %m");
+
+ i->temp_path = mfree(i->temp_path);
+
+ return 0;
+}
+
+static int tar_import_fork_tar(TarImport *i) {
+ int r;
+
+ assert(i);
+
+ assert(!i->final_path);
+ assert(!i->temp_path);
+ assert(i->tar_fd < 0);
+
+ i->final_path = path_join(i->image_root, i->local);
+ if (!i->final_path)
+ return log_oom();
+
+ r = tempfn_random(i->final_path, NULL, &i->temp_path);
+ if (r < 0)
+ return log_oom();
+
+ (void) mkdir_parents_label(i->temp_path, 0700);
+
+ r = btrfs_subvol_make_fallback(i->temp_path, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create directory/subvolume %s: %m", i->temp_path);
+ if (r > 0) /* actually btrfs subvol */
+ (void) import_assign_pool_quota_and_warn(i->temp_path);
+
+ i->tar_fd = import_fork_tar_x(i->temp_path, &i->tar_pid);
+ if (i->tar_fd < 0)
+ return i->tar_fd;
+
+ return 0;
+}
+
+static int tar_import_write(const void *p, size_t sz, void *userdata) {
+ TarImport *i = userdata;
+ int r;
+
+ r = loop_write(i->tar_fd, p, sz, false);
+ if (r < 0)
+ return r;
+
+ i->written_uncompressed += sz;
+
+ return 0;
+}
+
+static int tar_import_process(TarImport *i) {
+ ssize_t l;
+ int r;
+
+ assert(i);
+ assert(i->buffer_size < sizeof(i->buffer));
+
+ l = read(i->input_fd, i->buffer + i->buffer_size, sizeof(i->buffer) - i->buffer_size);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ r = log_error_errno(errno, "Failed to read input file: %m");
+ goto finish;
+ }
+ if (l == 0) {
+ if (i->compress.type == IMPORT_COMPRESS_UNKNOWN) {
+ log_error("Premature end of file.");
+ r = -EIO;
+ goto finish;
+ }
+
+ r = tar_import_finish(i);
+ goto finish;
+ }
+
+ i->buffer_size += l;
+
+ if (i->compress.type == IMPORT_COMPRESS_UNKNOWN) {
+ r = import_uncompress_detect(&i->compress, i->buffer, i->buffer_size);
+ if (r < 0) {
+ log_error_errno(r, "Failed to detect file compression: %m");
+ goto finish;
+ }
+ if (r == 0) /* Need more data */
+ return 0;
+
+ r = tar_import_fork_tar(i);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = import_uncompress(&i->compress, i->buffer, i->buffer_size, tar_import_write, i);
+ if (r < 0) {
+ log_error_errno(r, "Failed to decode and write: %m");
+ goto finish;
+ }
+
+ i->written_compressed += i->buffer_size;
+ i->buffer_size = 0;
+
+ tar_import_report_progress(i);
+
+ return 0;
+
+finish:
+ if (i->on_finished)
+ i->on_finished(i, r, i->userdata);
+ else
+ sd_event_exit(i->event, r);
+
+ return 0;
+}
+
+static int tar_import_on_input(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ TarImport *i = userdata;
+
+ return tar_import_process(i);
+}
+
+static int tar_import_on_defer(sd_event_source *s, void *userdata) {
+ TarImport *i = userdata;
+
+ return tar_import_process(i);
+}
+
+int tar_import_start(TarImport *i, int fd, const char *local, bool force_local, bool read_only) {
+ int r;
+
+ assert(i);
+ assert(fd >= 0);
+ assert(local);
+
+ if (!machine_name_is_valid(local))
+ return -EINVAL;
+
+ if (i->input_fd >= 0)
+ return -EBUSY;
+
+ r = fd_nonblock(fd, true);
+ if (r < 0)
+ return r;
+
+ r = free_and_strdup(&i->local, local);
+ if (r < 0)
+ return r;
+ i->force_local = force_local;
+ i->read_only = read_only;
+
+ if (fstat(fd, &i->st) < 0)
+ return -errno;
+
+ r = sd_event_add_io(i->event, &i->input_event_source, fd, EPOLLIN, tar_import_on_input, i);
+ if (r == -EPERM) {
+ /* This fd does not support epoll, for example because it is a regular file. Busy read in that case */
+ r = sd_event_add_defer(i->event, &i->input_event_source, tar_import_on_defer, i);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(i->input_event_source, SD_EVENT_ON);
+ }
+ if (r < 0)
+ return r;
+
+ i->input_fd = fd;
+ return r;
+}
diff --git a/src/import/import-tar.h b/src/import/import-tar.h
new file mode 100644
index 0000000..afbe98a
--- /dev/null
+++ b/src/import/import-tar.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-event.h"
+
+#include "import-util.h"
+#include "macro.h"
+
+typedef struct TarImport TarImport;
+
+typedef void (*TarImportFinished)(TarImport *import, int error, void *userdata);
+
+int tar_import_new(TarImport **import, sd_event *event, const char *image_root, TarImportFinished on_finished, void *userdata);
+TarImport* tar_import_unref(TarImport *import);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(TarImport*, tar_import_unref);
+
+int tar_import_start(TarImport *import, int fd, const char *local, bool force_local, bool read_only);
diff --git a/src/import/import.c b/src/import/import.c
new file mode 100644
index 0000000..eade0f0
--- /dev/null
+++ b/src/import/import.c
@@ -0,0 +1,321 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <locale.h>
+
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "import-raw.h"
+#include "import-tar.h"
+#include "import-util.h"
+#include "machine-image.h"
+#include "main-func.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "verbs.h"
+
+static bool arg_force = false;
+static bool arg_read_only = false;
+static const char *arg_image_root = "/var/lib/machines";
+
+static int interrupt_signal_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ log_notice("Transfer aborted.");
+ sd_event_exit(sd_event_source_get_event(s), EINTR);
+ return 0;
+}
+
+static void on_tar_finished(TarImport *import, int error, void *userdata) {
+ sd_event *event = userdata;
+ assert(import);
+
+ if (error == 0)
+ log_info("Operation completed successfully.");
+
+ sd_event_exit(event, abs(error));
+}
+
+static int import_tar(int argc, char *argv[], void *userdata) {
+ _cleanup_(tar_import_unrefp) TarImport *import = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ const char *path = NULL, *local = NULL;
+ _cleanup_free_ char *ll = NULL;
+ _cleanup_close_ int open_fd = -1;
+ int r, fd;
+
+ if (argc >= 2)
+ path = argv[1];
+ path = empty_or_dash_to_null(path);
+
+ if (argc >= 3)
+ local = argv[2];
+ else if (path)
+ local = basename(path);
+ local = empty_or_dash_to_null(local);
+
+ if (local) {
+ r = tar_strip_suffixes(local, &ll);
+ if (r < 0)
+ return log_oom();
+
+ local = ll;
+
+ if (!machine_name_is_valid(local))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Local image name '%s' is not valid.",
+ local);
+
+ if (!arg_force) {
+ r = image_find(IMAGE_MACHINE, local, NULL);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return log_error_errno(r, "Failed to check whether image '%s' exists: %m", local);
+ } else {
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "Image '%s' already exists.",
+ local);
+ }
+ }
+ } else
+ local = "imported";
+
+ if (path) {
+ open_fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (open_fd < 0)
+ return log_error_errno(errno, "Failed to open tar image to import: %m");
+
+ fd = open_fd;
+
+ log_info("Importing '%s', saving as '%s'.", path, local);
+ } else {
+ _cleanup_free_ char *pretty = NULL;
+
+ fd = STDIN_FILENO;
+
+ (void) fd_get_path(fd, &pretty);
+ log_info("Importing '%s', saving as '%s'.", strna(pretty), local);
+ }
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+ (void) sd_event_add_signal(event, NULL, SIGTERM, interrupt_signal_handler, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGINT, interrupt_signal_handler, NULL);
+
+ r = tar_import_new(&import, event, arg_image_root, on_tar_finished, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate importer: %m");
+
+ r = tar_import_start(import, fd, local, arg_force, arg_read_only);
+ if (r < 0)
+ return log_error_errno(r, "Failed to import image: %m");
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ log_info("Exiting.");
+ return -r;
+}
+
+static void on_raw_finished(RawImport *import, int error, void *userdata) {
+ sd_event *event = userdata;
+ assert(import);
+
+ if (error == 0)
+ log_info("Operation completed successfully.");
+
+ sd_event_exit(event, abs(error));
+}
+
+static int import_raw(int argc, char *argv[], void *userdata) {
+ _cleanup_(raw_import_unrefp) RawImport *import = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ const char *path = NULL, *local = NULL;
+ _cleanup_free_ char *ll = NULL;
+ _cleanup_close_ int open_fd = -1;
+ int r, fd;
+
+ if (argc >= 2)
+ path = argv[1];
+ path = empty_or_dash_to_null(path);
+
+ if (argc >= 3)
+ local = argv[2];
+ else if (path)
+ local = basename(path);
+ local = empty_or_dash_to_null(local);
+
+ if (local) {
+ r = raw_strip_suffixes(local, &ll);
+ if (r < 0)
+ return log_oom();
+
+ local = ll;
+
+ if (!machine_name_is_valid(local))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Local image name '%s' is not valid.",
+ local);
+
+ if (!arg_force) {
+ r = image_find(IMAGE_MACHINE, local, NULL);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return log_error_errno(r, "Failed to check whether image '%s' exists: %m", local);
+ } else {
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "Image '%s' already exists.",
+ local);
+ }
+ }
+ } else
+ local = "imported";
+
+ if (path) {
+ open_fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (open_fd < 0)
+ return log_error_errno(errno, "Failed to open raw image to import: %m");
+
+ fd = open_fd;
+
+ log_info("Importing '%s', saving as '%s'.", path, local);
+ } else {
+ _cleanup_free_ char *pretty = NULL;
+
+ fd = STDIN_FILENO;
+
+ (void) fd_get_path(fd, &pretty);
+ log_info("Importing '%s', saving as '%s'.", strempty(pretty), local);
+ }
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+ (void) sd_event_add_signal(event, NULL, SIGTERM, interrupt_signal_handler, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGINT, interrupt_signal_handler, NULL);
+
+ r = raw_import_new(&import, event, arg_image_root, on_raw_finished, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate importer: %m");
+
+ r = raw_import_start(import, fd, local, arg_force, arg_read_only);
+ if (r < 0)
+ return log_error_errno(r, "Failed to import image: %m");
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ log_info("Exiting.");
+ return -r;
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+
+ printf("%s [OPTIONS...] {COMMAND} ...\n\n"
+ "Import container or virtual machine images.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --force Force creation of image\n"
+ " --image-root=PATH Image root directory\n"
+ " --read-only Create a read-only image\n\n"
+ "Commands:\n"
+ " tar FILE [NAME] Import a TAR image\n"
+ " raw FILE [NAME] Import a RAW image\n",
+ program_invocation_short_name);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_FORCE,
+ ARG_IMAGE_ROOT,
+ ARG_READ_ONLY,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "force", no_argument, NULL, ARG_FORCE },
+ { "image-root", required_argument, NULL, ARG_IMAGE_ROOT },
+ { "read-only", no_argument, NULL, ARG_READ_ONLY },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help(0, NULL, NULL);
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_FORCE:
+ arg_force = true;
+ break;
+
+ case ARG_IMAGE_ROOT:
+ arg_image_root = optarg;
+ break;
+
+ case ARG_READ_ONLY:
+ arg_read_only = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int import_main(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "tar", 2, 3, 0, import_tar },
+ { "raw", 2, 3, 0, import_raw },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return 0;
+
+ (void) ignore_signals(SIGPIPE, -1);
+
+ return import_main(argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/import/importd.c b/src/import/importd.c
new file mode 100644
index 0000000..63f80e0
--- /dev/null
+++ b/src/import/importd.c
@@ -0,0 +1,1397 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/prctl.h>
+#include <sys/wait.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-get-properties.h"
+#include "bus-log-control-api.h"
+#include "bus-polkit.h"
+#include "def.h"
+#include "fd-util.h"
+#include "float.h"
+#include "hostname-util.h"
+#include "import-util.h"
+#include "machine-pool.h"
+#include "main-func.h"
+#include "missing_capability.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "service-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "strv.h"
+#include "syslog-util.h"
+#include "user-util.h"
+#include "util.h"
+#include "web-util.h"
+
+typedef struct Transfer Transfer;
+typedef struct Manager Manager;
+
+typedef enum TransferType {
+ TRANSFER_IMPORT_TAR,
+ TRANSFER_IMPORT_RAW,
+ TRANSFER_IMPORT_FS,
+ TRANSFER_EXPORT_TAR,
+ TRANSFER_EXPORT_RAW,
+ TRANSFER_PULL_TAR,
+ TRANSFER_PULL_RAW,
+ _TRANSFER_TYPE_MAX,
+ _TRANSFER_TYPE_INVALID = -1,
+} TransferType;
+
+struct Transfer {
+ Manager *manager;
+
+ uint32_t id;
+ char *object_path;
+
+ TransferType type;
+ ImportVerify verify;
+
+ char *remote;
+ char *local;
+ bool force_local;
+ bool read_only;
+
+ char *format;
+
+ pid_t pid;
+
+ int log_fd;
+
+ char log_message[LINE_MAX];
+ size_t log_message_size;
+
+ sd_event_source *pid_event_source;
+ sd_event_source *log_event_source;
+
+ unsigned n_canceled;
+ unsigned progress_percent;
+
+ int stdin_fd;
+ int stdout_fd;
+};
+
+struct Manager {
+ sd_event *event;
+ sd_bus *bus;
+
+ uint32_t current_transfer_id;
+ Hashmap *transfers;
+
+ Hashmap *polkit_registry;
+
+ int notify_fd;
+
+ sd_event_source *notify_event_source;
+};
+
+#define TRANSFERS_MAX 64
+
+static const char* const transfer_type_table[_TRANSFER_TYPE_MAX] = {
+ [TRANSFER_IMPORT_TAR] = "import-tar",
+ [TRANSFER_IMPORT_RAW] = "import-raw",
+ [TRANSFER_IMPORT_FS] = "import-fs",
+ [TRANSFER_EXPORT_TAR] = "export-tar",
+ [TRANSFER_EXPORT_RAW] = "export-raw",
+ [TRANSFER_PULL_TAR] = "pull-tar",
+ [TRANSFER_PULL_RAW] = "pull-raw",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(transfer_type, TransferType);
+
+static Transfer *transfer_unref(Transfer *t) {
+ if (!t)
+ return NULL;
+
+ if (t->manager)
+ hashmap_remove(t->manager->transfers, UINT32_TO_PTR(t->id));
+
+ sd_event_source_unref(t->pid_event_source);
+ sd_event_source_unref(t->log_event_source);
+
+ free(t->remote);
+ free(t->local);
+ free(t->format);
+ free(t->object_path);
+
+ if (t->pid > 0) {
+ (void) kill_and_sigcont(t->pid, SIGKILL);
+ (void) wait_for_terminate(t->pid, NULL);
+ }
+
+ safe_close(t->log_fd);
+ safe_close(t->stdin_fd);
+ safe_close(t->stdout_fd);
+
+ return mfree(t);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Transfer*, transfer_unref);
+
+static int transfer_new(Manager *m, Transfer **ret) {
+ _cleanup_(transfer_unrefp) Transfer *t = NULL;
+ uint32_t id;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ if (hashmap_size(m->transfers) >= TRANSFERS_MAX)
+ return -E2BIG;
+
+ r = hashmap_ensure_allocated(&m->transfers, &trivial_hash_ops);
+ if (r < 0)
+ return r;
+
+ t = new(Transfer, 1);
+ if (!t)
+ return -ENOMEM;
+
+ *t = (Transfer) {
+ .type = _TRANSFER_TYPE_INVALID,
+ .log_fd = -1,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .verify = _IMPORT_VERIFY_INVALID,
+ .progress_percent= (unsigned) -1,
+ };
+
+ id = m->current_transfer_id + 1;
+
+ if (asprintf(&t->object_path, "/org/freedesktop/import1/transfer/_%" PRIu32, id) < 0)
+ return -ENOMEM;
+
+ r = hashmap_put(m->transfers, UINT32_TO_PTR(id), t);
+ if (r < 0)
+ return r;
+
+ m->current_transfer_id = id;
+
+ t->manager = m;
+ t->id = id;
+
+ *ret = TAKE_PTR(t);
+
+ return 0;
+}
+
+static double transfer_percent_as_double(Transfer *t) {
+ assert(t);
+
+ if (t->progress_percent == (unsigned) -1)
+ return -DBL_MAX;
+
+ return (double) t->progress_percent / 100.0;
+}
+
+static void transfer_send_log_line(Transfer *t, const char *line) {
+ int r, priority = LOG_INFO;
+
+ assert(t);
+ assert(line);
+
+ syslog_parse_priority(&line, &priority, true);
+
+ log_full(priority, "(transfer%" PRIu32 ") %s", t->id, line);
+
+ r = sd_bus_emit_signal(
+ t->manager->bus,
+ t->object_path,
+ "org.freedesktop.import1.Transfer",
+ "LogMessage",
+ "us",
+ priority,
+ line);
+ if (r < 0)
+ log_warning_errno(r, "Cannot emit log message signal, ignoring: %m");
+ }
+
+static void transfer_send_logs(Transfer *t, bool flush) {
+ assert(t);
+
+ /* Try to send out all log messages, if we can. But if we
+ * can't we remove the messages from the buffer, but don't
+ * fail */
+
+ while (t->log_message_size > 0) {
+ _cleanup_free_ char *n = NULL;
+ char *e;
+
+ if (t->log_message_size >= sizeof(t->log_message))
+ e = t->log_message + sizeof(t->log_message);
+ else {
+ char *a, *b;
+
+ a = memchr(t->log_message, 0, t->log_message_size);
+ b = memchr(t->log_message, '\n', t->log_message_size);
+
+ if (a && b)
+ e = a < b ? a : b;
+ else if (a)
+ e = a;
+ else
+ e = b;
+ }
+
+ if (!e) {
+ if (!flush)
+ return;
+
+ e = t->log_message + t->log_message_size;
+ }
+
+ n = strndup(t->log_message, e - t->log_message);
+
+ /* Skip over NUL and newlines */
+ while (e < t->log_message + t->log_message_size && IN_SET(*e, 0, '\n'))
+ e++;
+
+ memmove(t->log_message, e, t->log_message + sizeof(t->log_message) - e);
+ t->log_message_size -= e - t->log_message;
+
+ if (!n) {
+ log_oom();
+ continue;
+ }
+
+ if (isempty(n))
+ continue;
+
+ transfer_send_log_line(t, n);
+ }
+}
+
+static int transfer_finalize(Transfer *t, bool success) {
+ int r;
+
+ assert(t);
+
+ transfer_send_logs(t, true);
+
+ r = sd_bus_emit_signal(
+ t->manager->bus,
+ "/org/freedesktop/import1",
+ "org.freedesktop.import1.Manager",
+ "TransferRemoved",
+ "uos",
+ t->id,
+ t->object_path,
+ success ? "done" :
+ t->n_canceled > 0 ? "canceled" : "failed");
+
+ if (r < 0)
+ log_error_errno(r, "Cannot emit message: %m");
+
+ transfer_unref(t);
+ return 0;
+}
+
+static int transfer_cancel(Transfer *t) {
+ int r;
+
+ assert(t);
+
+ r = kill_and_sigcont(t->pid, t->n_canceled < 3 ? SIGTERM : SIGKILL);
+ if (r < 0)
+ return r;
+
+ t->n_canceled++;
+ return 0;
+}
+
+static int transfer_on_pid(sd_event_source *s, const siginfo_t *si, void *userdata) {
+ Transfer *t = userdata;
+ bool success = false;
+
+ assert(s);
+ assert(t);
+
+ if (si->si_code == CLD_EXITED) {
+ if (si->si_status != 0)
+ log_error("Transfer process failed with exit code %i.", si->si_status);
+ else {
+ log_debug("Transfer process succeeded.");
+ success = true;
+ }
+
+ } else if (IN_SET(si->si_code, CLD_KILLED, CLD_DUMPED))
+ log_error("Transfer process terminated by signal %s.", signal_to_string(si->si_status));
+ else
+ log_error("Transfer process failed due to unknown reason.");
+
+ t->pid = 0;
+
+ return transfer_finalize(t, success);
+}
+
+static int transfer_on_log(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Transfer *t = userdata;
+ ssize_t l;
+
+ assert(s);
+ assert(t);
+
+ l = read(fd, t->log_message + t->log_message_size, sizeof(t->log_message) - t->log_message_size);
+ if (l < 0)
+ log_error_errno(errno, "Failed to read log message: %m");
+ if (l <= 0) {
+ /* EOF/read error. We just close the pipe here, and
+ * close the watch, waiting for the SIGCHLD to arrive,
+ * before we do anything else. */
+ t->log_event_source = sd_event_source_unref(t->log_event_source);
+ return 0;
+ }
+
+ t->log_message_size += l;
+
+ transfer_send_logs(t, false);
+
+ return 0;
+}
+
+static int transfer_start(Transfer *t) {
+ _cleanup_close_pair_ int pipefd[2] = { -1, -1 };
+ int r;
+
+ assert(t);
+ assert(t->pid <= 0);
+
+ if (pipe2(pipefd, O_CLOEXEC) < 0)
+ return -errno;
+
+ r = safe_fork("(sd-transfer)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &t->pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ const char *cmd[] = {
+ NULL, /* systemd-import, systemd-import-fs, systemd-export or systemd-pull */
+ NULL, /* tar, raw */
+ NULL, /* --verify= */
+ NULL, /* verify argument */
+ NULL, /* maybe --force */
+ NULL, /* maybe --read-only */
+ NULL, /* if so: the actual URL */
+ NULL, /* maybe --format= */
+ NULL, /* if so: the actual format */
+ NULL, /* remote */
+ NULL, /* local */
+ NULL
+ };
+ unsigned k = 0;
+
+ /* Child */
+
+ pipefd[0] = safe_close(pipefd[0]);
+
+ r = rearrange_stdio(t->stdin_fd,
+ t->stdout_fd < 0 ? pipefd[1] : t->stdout_fd,
+ pipefd[1]);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set stdin/stdout/stderr: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (setenv("SYSTEMD_LOG_TARGET", "console-prefixed", 1) < 0 ||
+ setenv("NOTIFY_SOCKET", "/run/systemd/import/notify", 1) < 0) {
+ log_error_errno(errno, "setenv() failed: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ switch (t->type) {
+
+ case TRANSFER_IMPORT_TAR:
+ case TRANSFER_IMPORT_RAW:
+ cmd[k++] = SYSTEMD_IMPORT_PATH;
+ break;
+
+ case TRANSFER_IMPORT_FS:
+ cmd[k++] = SYSTEMD_IMPORT_FS_PATH;
+ break;
+
+ case TRANSFER_EXPORT_TAR:
+ case TRANSFER_EXPORT_RAW:
+ cmd[k++] = SYSTEMD_EXPORT_PATH;
+ break;
+
+ case TRANSFER_PULL_TAR:
+ case TRANSFER_PULL_RAW:
+ cmd[k++] = SYSTEMD_PULL_PATH;
+ break;
+
+ default:
+ assert_not_reached("Unexpected transfer type");
+ }
+
+ switch (t->type) {
+
+ case TRANSFER_IMPORT_TAR:
+ case TRANSFER_EXPORT_TAR:
+ case TRANSFER_PULL_TAR:
+ cmd[k++] = "tar";
+ break;
+
+ case TRANSFER_IMPORT_RAW:
+ case TRANSFER_EXPORT_RAW:
+ case TRANSFER_PULL_RAW:
+ cmd[k++] = "raw";
+ break;
+
+ case TRANSFER_IMPORT_FS:
+ cmd[k++] = "run";
+ break;
+
+ default:
+ break;
+ }
+
+ if (t->verify != _IMPORT_VERIFY_INVALID) {
+ cmd[k++] = "--verify";
+ cmd[k++] = import_verify_to_string(t->verify);
+ }
+
+ if (t->force_local)
+ cmd[k++] = "--force";
+ if (t->read_only)
+ cmd[k++] = "--read-only";
+
+ if (t->format) {
+ cmd[k++] = "--format";
+ cmd[k++] = t->format;
+ }
+
+ if (!IN_SET(t->type, TRANSFER_EXPORT_TAR, TRANSFER_EXPORT_RAW)) {
+ if (t->remote)
+ cmd[k++] = t->remote;
+ else
+ cmd[k++] = "-";
+ }
+
+ if (t->local)
+ cmd[k++] = t->local;
+ cmd[k] = NULL;
+
+ execv(cmd[0], (char * const *) cmd);
+ log_error_errno(errno, "Failed to execute %s tool: %m", cmd[0]);
+ _exit(EXIT_FAILURE);
+ }
+
+ pipefd[1] = safe_close(pipefd[1]);
+ t->log_fd = TAKE_FD(pipefd[0]);
+
+ t->stdin_fd = safe_close(t->stdin_fd);
+
+ r = sd_event_add_child(t->manager->event, &t->pid_event_source,
+ t->pid, WEXITED, transfer_on_pid, t);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_io(t->manager->event, &t->log_event_source,
+ t->log_fd, EPOLLIN, transfer_on_log, t);
+ if (r < 0)
+ return r;
+
+ /* Make sure always process logging before SIGCHLD */
+ r = sd_event_source_set_priority(t->log_event_source, SD_EVENT_PRIORITY_NORMAL -5);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_emit_signal(
+ t->manager->bus,
+ "/org/freedesktop/import1",
+ "org.freedesktop.import1.Manager",
+ "TransferNew",
+ "uo",
+ t->id,
+ t->object_path);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static Manager *manager_unref(Manager *m) {
+ Transfer *t;
+
+ if (!m)
+ return NULL;
+
+ sd_event_source_unref(m->notify_event_source);
+ safe_close(m->notify_fd);
+
+ while ((t = hashmap_first(m->transfers)))
+ transfer_unref(t);
+
+ hashmap_free(m->transfers);
+
+ bus_verify_polkit_async_registry_free(m->polkit_registry);
+
+ m->bus = sd_bus_flush_close_unref(m->bus);
+ sd_event_unref(m->event);
+
+ return mfree(m);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_unref);
+
+static int manager_on_notify(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+
+ char buf[NOTIFY_BUFFER_MAX+1];
+ struct iovec iovec = {
+ .iov_base = buf,
+ .iov_len = sizeof(buf)-1,
+ };
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(int) * NOTIFY_FD_MAX)) control;
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ struct ucred *ucred;
+ Manager *m = userdata;
+ char *p, *e;
+ Transfer *t;
+ ssize_t n;
+ int r;
+
+ n = recvmsg_safe(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ if (IN_SET(n, -EAGAIN, -EINTR))
+ return 0;
+ if (n < 0)
+ return (int) n;
+
+ cmsg_close_all(&msghdr);
+
+ if (msghdr.msg_flags & MSG_TRUNC) {
+ log_warning("Got overly long notification datagram, ignoring.");
+ return 0;
+ }
+
+ ucred = CMSG_FIND_DATA(&msghdr, SOL_SOCKET, SCM_CREDENTIALS, struct ucred);
+ if (!ucred || ucred->pid <= 0) {
+ log_warning("Got notification datagram lacking credential information, ignoring.");
+ return 0;
+ }
+
+ HASHMAP_FOREACH(t, m->transfers)
+ if (ucred->pid == t->pid)
+ break;
+
+ if (!t) {
+ log_warning("Got notification datagram from unexpected peer, ignoring.");
+ return 0;
+ }
+
+ buf[n] = 0;
+
+ p = startswith(buf, "X_IMPORT_PROGRESS=");
+ if (!p) {
+ p = strstr(buf, "\nX_IMPORT_PROGRESS=");
+ if (!p)
+ return 0;
+
+ p += 19;
+ }
+
+ e = strchrnul(p, '\n');
+ *e = 0;
+
+ r = parse_percent(p);
+ if (r < 0) {
+ log_warning("Got invalid percent value, ignoring.");
+ return 0;
+ }
+
+ t->progress_percent = (unsigned) r;
+
+ log_debug("Got percentage from client: %u%%", t->progress_percent);
+ return 0;
+}
+
+static int manager_new(Manager **ret) {
+ _cleanup_(manager_unrefp) Manager *m = NULL;
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/import/notify",
+ };
+ int r;
+
+ assert(ret);
+
+ m = new0(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ sd_event_set_watchdog(m->event, true);
+
+ r = sd_bus_default_system(&m->bus);
+ if (r < 0)
+ return r;
+
+ m->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (m->notify_fd < 0)
+ return -errno;
+
+ (void) mkdir_parents_label(sa.un.sun_path, 0755);
+ (void) sockaddr_un_unlink(&sa.un);
+
+ if (bind(m->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0)
+ return -errno;
+
+ r = setsockopt_int(m->notify_fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_io(m->event, &m->notify_event_source,
+ m->notify_fd, EPOLLIN, manager_on_notify, m);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
+
+static Transfer *manager_find(Manager *m, TransferType type, const char *remote) {
+ Transfer *t;
+
+ assert(m);
+ assert(type >= 0);
+ assert(type < _TRANSFER_TYPE_MAX);
+
+ HASHMAP_FOREACH(t, m->transfers)
+ if (t->type == type && streq_ptr(t->remote, remote))
+ return t;
+
+ return NULL;
+}
+
+static int method_import_tar_or_raw(sd_bus_message *msg, void *userdata, sd_bus_error *error) {
+ _cleanup_(transfer_unrefp) Transfer *t = NULL;
+ int fd, force, read_only, r;
+ const char *local, *object;
+ Manager *m = userdata;
+ TransferType type;
+ struct stat st;
+ uint32_t id;
+
+ assert(msg);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ msg,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.import1.import",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = sd_bus_message_read(msg, "hsbb", &fd, &local, &force, &read_only);
+ if (r < 0)
+ return r;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISREG(st.st_mode) && !S_ISFIFO(st.st_mode))
+ return -EINVAL;
+
+ if (!machine_name_is_valid(local))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Local name %s is invalid", local);
+
+ r = setup_machine_directory(error);
+ if (r < 0)
+ return r;
+
+ type = streq_ptr(sd_bus_message_get_member(msg), "ImportTar") ?
+ TRANSFER_IMPORT_TAR : TRANSFER_IMPORT_RAW;
+
+ r = transfer_new(m, &t);
+ if (r < 0)
+ return r;
+
+ t->type = type;
+ t->force_local = force;
+ t->read_only = read_only;
+
+ t->local = strdup(local);
+ if (!t->local)
+ return -ENOMEM;
+
+ t->stdin_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (t->stdin_fd < 0)
+ return -errno;
+
+ r = transfer_start(t);
+ if (r < 0)
+ return r;
+
+ object = t->object_path;
+ id = t->id;
+ t = NULL;
+
+ return sd_bus_reply_method_return(msg, "uo", id, object);
+}
+
+static int method_import_fs(sd_bus_message *msg, void *userdata, sd_bus_error *error) {
+ _cleanup_(transfer_unrefp) Transfer *t = NULL;
+ int fd, force, read_only, r;
+ const char *local, *object;
+ Manager *m = userdata;
+ uint32_t id;
+
+ assert(msg);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ msg,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.import1.import",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = sd_bus_message_read(msg, "hsbb", &fd, &local, &force, &read_only);
+ if (r < 0)
+ return r;
+
+ r = fd_verify_directory(fd);
+ if (r < 0)
+ return r;
+
+ if (!machine_name_is_valid(local))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Local name %s is invalid", local);
+
+ r = setup_machine_directory(error);
+ if (r < 0)
+ return r;
+
+ r = transfer_new(m, &t);
+ if (r < 0)
+ return r;
+
+ t->type = TRANSFER_IMPORT_FS;
+ t->force_local = force;
+ t->read_only = read_only;
+
+ t->local = strdup(local);
+ if (!t->local)
+ return -ENOMEM;
+
+ t->stdin_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (t->stdin_fd < 0)
+ return -errno;
+
+ r = transfer_start(t);
+ if (r < 0)
+ return r;
+
+ object = t->object_path;
+ id = t->id;
+ t = NULL;
+
+ return sd_bus_reply_method_return(msg, "uo", id, object);
+}
+
+static int method_export_tar_or_raw(sd_bus_message *msg, void *userdata, sd_bus_error *error) {
+ _cleanup_(transfer_unrefp) Transfer *t = NULL;
+ int fd, r;
+ const char *local, *object, *format;
+ Manager *m = userdata;
+ TransferType type;
+ struct stat st;
+ uint32_t id;
+
+ assert(msg);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ msg,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.import1.export",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = sd_bus_message_read(msg, "shs", &local, &fd, &format);
+ if (r < 0)
+ return r;
+
+ if (!machine_name_is_valid(local))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Local name %s is invalid", local);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISREG(st.st_mode) && !S_ISFIFO(st.st_mode))
+ return -EINVAL;
+
+ type = streq_ptr(sd_bus_message_get_member(msg), "ExportTar") ?
+ TRANSFER_EXPORT_TAR : TRANSFER_EXPORT_RAW;
+
+ r = transfer_new(m, &t);
+ if (r < 0)
+ return r;
+
+ t->type = type;
+
+ if (!isempty(format)) {
+ t->format = strdup(format);
+ if (!t->format)
+ return -ENOMEM;
+ }
+
+ t->local = strdup(local);
+ if (!t->local)
+ return -ENOMEM;
+
+ t->stdout_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (t->stdout_fd < 0)
+ return -errno;
+
+ r = transfer_start(t);
+ if (r < 0)
+ return r;
+
+ object = t->object_path;
+ id = t->id;
+ t = NULL;
+
+ return sd_bus_reply_method_return(msg, "uo", id, object);
+}
+
+static int method_pull_tar_or_raw(sd_bus_message *msg, void *userdata, sd_bus_error *error) {
+ _cleanup_(transfer_unrefp) Transfer *t = NULL;
+ const char *remote, *local, *verify, *object;
+ Manager *m = userdata;
+ ImportVerify v;
+ TransferType type;
+ int force, r;
+ uint32_t id;
+
+ assert(msg);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ msg,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.import1.pull",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = sd_bus_message_read(msg, "sssb", &remote, &local, &verify, &force);
+ if (r < 0)
+ return r;
+
+ if (!http_url_is_valid(remote))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "URL %s is invalid", remote);
+
+ if (isempty(local))
+ local = NULL;
+ else if (!machine_name_is_valid(local))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Local name %s is invalid", local);
+
+ if (isempty(verify))
+ v = IMPORT_VERIFY_SIGNATURE;
+ else
+ v = import_verify_from_string(verify);
+ if (v < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Unknown verification mode %s", verify);
+
+ r = setup_machine_directory(error);
+ if (r < 0)
+ return r;
+
+ type = streq_ptr(sd_bus_message_get_member(msg), "PullTar") ?
+ TRANSFER_PULL_TAR : TRANSFER_PULL_RAW;
+
+ if (manager_find(m, type, remote))
+ return sd_bus_error_setf(error, BUS_ERROR_TRANSFER_IN_PROGRESS,
+ "Transfer for %s already in progress.", remote);
+
+ r = transfer_new(m, &t);
+ if (r < 0)
+ return r;
+
+ t->type = type;
+ t->verify = v;
+ t->force_local = force;
+
+ t->remote = strdup(remote);
+ if (!t->remote)
+ return -ENOMEM;
+
+ if (local) {
+ t->local = strdup(local);
+ if (!t->local)
+ return -ENOMEM;
+ }
+
+ r = transfer_start(t);
+ if (r < 0)
+ return r;
+
+ object = t->object_path;
+ id = t->id;
+ t = NULL;
+
+ return sd_bus_reply_method_return(msg, "uo", id, object);
+}
+
+static int method_list_transfers(sd_bus_message *msg, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ Transfer *t;
+ int r;
+
+ assert(msg);
+ assert(m);
+
+ r = sd_bus_message_new_method_return(msg, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(usssdo)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(t, m->transfers) {
+
+ r = sd_bus_message_append(
+ reply,
+ "(usssdo)",
+ t->id,
+ transfer_type_to_string(t->type),
+ t->remote,
+ t->local,
+ transfer_percent_as_double(t),
+ t->object_path);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_cancel(sd_bus_message *msg, void *userdata, sd_bus_error *error) {
+ Transfer *t = userdata;
+ int r;
+
+ assert(msg);
+ assert(t);
+
+ r = bus_verify_polkit_async(
+ msg,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.import1.pull",
+ NULL,
+ false,
+ UID_INVALID,
+ &t->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = transfer_cancel(t);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(msg, NULL);
+}
+
+static int method_cancel_transfer(sd_bus_message *msg, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ Transfer *t;
+ uint32_t id;
+ int r;
+
+ assert(msg);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ msg,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.import1.pull",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = sd_bus_message_read(msg, "u", &id);
+ if (r < 0)
+ return r;
+ if (id <= 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid transfer id");
+
+ t = hashmap_get(m->transfers, UINT32_TO_PTR(id));
+ if (!t)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_TRANSFER, "No transfer by id %" PRIu32, id);
+
+ r = transfer_cancel(t);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(msg, NULL);
+}
+
+static int property_get_progress(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Transfer *t = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(t);
+
+ return sd_bus_message_append(reply, "d", transfer_percent_as_double(t));
+}
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, transfer_type, TransferType);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_verify, import_verify, ImportVerify);
+
+static int transfer_object_find(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ void *userdata,
+ void **found,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ Transfer *t;
+ const char *p;
+ uint32_t id;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ p = startswith(path, "/org/freedesktop/import1/transfer/_");
+ if (!p)
+ return 0;
+
+ r = safe_atou32(p, &id);
+ if (r < 0 || id == 0)
+ return 0;
+
+ t = hashmap_get(m->transfers, UINT32_TO_PTR(id));
+ if (!t)
+ return 0;
+
+ *found = t;
+ return 1;
+}
+
+static int transfer_node_enumerator(
+ sd_bus *bus,
+ const char *path,
+ void *userdata,
+ char ***nodes,
+ sd_bus_error *error) {
+
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ Transfer *t;
+ unsigned k = 0;
+
+ l = new0(char*, hashmap_size(m->transfers) + 1);
+ if (!l)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(t, m->transfers) {
+
+ l[k] = strdup(t->object_path);
+ if (!l[k])
+ return -ENOMEM;
+
+ k++;
+ }
+
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
+
+static const sd_bus_vtable transfer_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("Id", "u", NULL, offsetof(Transfer, id), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Local", "s", NULL, offsetof(Transfer, local), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Remote", "s", NULL, offsetof(Transfer, remote), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Type", "s", property_get_type, offsetof(Transfer, type), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Verify", "s", property_get_verify, offsetof(Transfer, verify), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Progress", "d", property_get_progress, 0, 0),
+
+ SD_BUS_METHOD("Cancel", NULL, NULL, method_cancel, SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_SIGNAL_WITH_NAMES("LogMessage",
+ "us",
+ SD_BUS_PARAM(priority)
+ SD_BUS_PARAM(line),
+ 0),
+
+ SD_BUS_VTABLE_END,
+};
+
+static const BusObjectImplementation transfer_object = {
+ "/org/freedesktop/import1/transfer",
+ "org.freedesktop.import1.Transfer",
+ .fallback_vtables = BUS_FALLBACK_VTABLES({transfer_vtable, transfer_object_find}),
+ .node_enumerator = transfer_node_enumerator,
+};
+
+static const sd_bus_vtable manager_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_METHOD_WITH_NAMES("ImportTar",
+ "hsbb",
+ SD_BUS_PARAM(fd)
+ SD_BUS_PARAM(local_name)
+ SD_BUS_PARAM(force)
+ SD_BUS_PARAM(read_only),
+ "uo",
+ SD_BUS_PARAM(transfer_id)
+ SD_BUS_PARAM(transfer_path),
+ method_import_tar_or_raw,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ImportRaw",
+ "hsbb",
+ SD_BUS_PARAM(fd)
+ SD_BUS_PARAM(local_name)
+ SD_BUS_PARAM(force)
+ SD_BUS_PARAM(read_only),
+ "uo",
+ SD_BUS_PARAM(transfer_id)
+ SD_BUS_PARAM(transfer_path),
+ method_import_tar_or_raw,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ImportFileSystem",
+ "hsbb",
+ SD_BUS_PARAM(fd)
+ SD_BUS_PARAM(local_name)
+ SD_BUS_PARAM(force)
+ SD_BUS_PARAM(read_only),
+ "uo",
+ SD_BUS_PARAM(transfer_id)
+ SD_BUS_PARAM(transfer_path),
+ method_import_fs,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ExportTar",
+ "shs",
+ SD_BUS_PARAM(local_name)
+ SD_BUS_PARAM(fd)
+ SD_BUS_PARAM(format),
+ "uo",
+ SD_BUS_PARAM(transfer_id)
+ SD_BUS_PARAM(transfer_path),
+ method_export_tar_or_raw,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ExportRaw",
+ "shs",
+ SD_BUS_PARAM(local_name)
+ SD_BUS_PARAM(fd)
+ SD_BUS_PARAM(format),
+ "uo",
+ SD_BUS_PARAM(transfer_id)
+ SD_BUS_PARAM(transfer_path),
+ method_export_tar_or_raw,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("PullTar",
+ "sssb",
+ SD_BUS_PARAM(url)
+ SD_BUS_PARAM(local_name)
+ SD_BUS_PARAM(verify_mode)
+ SD_BUS_PARAM(force),
+ "uo",
+ SD_BUS_PARAM(transfer_id)
+ SD_BUS_PARAM(transfer_path),
+ method_pull_tar_or_raw,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("PullRaw",
+ "sssb",
+ SD_BUS_PARAM(url)
+ SD_BUS_PARAM(local_name)
+ SD_BUS_PARAM(verify_mode)
+ SD_BUS_PARAM(force),
+ "uo",
+ SD_BUS_PARAM(transfer_id)
+ SD_BUS_PARAM(transfer_path),
+ method_pull_tar_or_raw,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ListTransfers",
+ NULL,,
+ "a(usssdo)",
+ SD_BUS_PARAM(transfers),
+ method_list_transfers,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CancelTransfer",
+ "u",
+ SD_BUS_PARAM(transfer_id),
+ NULL,,
+ method_cancel_transfer,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_SIGNAL_WITH_NAMES("TransferNew",
+ "uo",
+ SD_BUS_PARAM(transfer_id)
+ SD_BUS_PARAM(transfer_path),
+ 0),
+ SD_BUS_SIGNAL_WITH_NAMES("TransferRemoved",
+ "uos",
+ SD_BUS_PARAM(transfer_id)
+ SD_BUS_PARAM(transfer_path)
+ SD_BUS_PARAM(result),
+ 0),
+
+ SD_BUS_VTABLE_END,
+};
+
+static const BusObjectImplementation manager_object = {
+ "/org/freedesktop/import1",
+ "org.freedesktop.import1.Manager",
+ .vtables = BUS_VTABLES(manager_vtable),
+ .children = BUS_IMPLEMENTATIONS(&transfer_object),
+};
+
+static int manager_add_bus_objects(Manager *m) {
+ int r;
+
+ assert(m);
+
+ r = bus_add_implementation(m->bus, &manager_object, m);
+ if (r < 0)
+ return r;
+
+ r = bus_log_control_api_register(m->bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_request_name_async(m->bus, NULL, "org.freedesktop.import1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(m->bus, m->event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ return 0;
+}
+
+static bool manager_check_idle(void *userdata) {
+ Manager *m = userdata;
+
+ return hashmap_isempty(m->transfers);
+}
+
+static int manager_run(Manager *m) {
+ assert(m);
+
+ return bus_event_loop_with_idle(
+ m->event,
+ m->bus,
+ "org.freedesktop.import1",
+ DEFAULT_EXIT_USEC,
+ manager_check_idle,
+ m);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(manager_unrefp) Manager *m = NULL;
+ int r;
+
+ log_setup_service();
+
+ r = service_parse_argv("systemd-importd.service",
+ "VM and container image import and export service.",
+ BUS_IMPLEMENTATIONS(&manager_object,
+ &log_control_object),
+ argc, argv);
+ if (r <= 0)
+ return r;
+
+ umask(0022);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, -1) >= 0);
+
+ r = manager_new(&m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate manager object: %m");
+
+ r = manager_add_bus_objects(m);
+ if (r < 0)
+ return r;
+
+ r = manager_run(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/import/meson.build b/src/import/meson.build
new file mode 100644
index 0000000..2207b86
--- /dev/null
+++ b/src/import/meson.build
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+systemd_importd_sources = files('''
+ importd.c
+'''.split())
+
+systemd_pull_sources = files('''
+ pull.c
+ pull-raw.c
+ pull-raw.h
+ pull-tar.c
+ pull-tar.h
+ pull-job.c
+ pull-job.h
+ pull-common.c
+ pull-common.h
+ import-common.c
+ import-common.h
+ import-compress.c
+ import-compress.h
+ curl-util.c
+ curl-util.h
+ qcow2-util.c
+ qcow2-util.h
+'''.split())
+
+systemd_import_sources = files('''
+ import.c
+ import-raw.c
+ import-raw.h
+ import-tar.c
+ import-tar.h
+ import-common.c
+ import-common.h
+ import-compress.c
+ import-compress.h
+ qcow2-util.c
+ qcow2-util.h
+'''.split())
+
+systemd_import_fs_sources = files('''
+ import-fs.c
+ import-common.c
+ import-common.h
+'''.split())
+
+systemd_export_sources = files('''
+ export.c
+ export-tar.c
+ export-tar.h
+ export-raw.c
+ export-raw.h
+ import-common.c
+ import-common.h
+ import-compress.c
+ import-compress.h
+'''.split())
+
+if conf.get('ENABLE_IMPORTD') == 1
+ install_data('org.freedesktop.import1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.import1.service',
+ install_dir : dbussystemservicedir)
+ install_data('org.freedesktop.import1.policy',
+ install_dir : polkitpolicydir)
+
+ install_data('import-pubring.gpg',
+ install_dir : rootlibexecdir)
+ # TODO: shouldn't this be in pkgdatadir?
+endif
+
+tests += [
+ [['src/import/test-qcow2.c',
+ 'src/import/qcow2-util.c',
+ 'src/import/qcow2-util.h'],
+ [libshared],
+ [libz],
+ 'HAVE_ZLIB', 'manual'],
+]
diff --git a/src/import/org.freedesktop.import1.conf b/src/import/org.freedesktop.import1.conf
new file mode 100644
index 0000000..4838e79
--- /dev/null
+++ b/src/import/org.freedesktop.import1.conf
@@ -0,0 +1,84 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="root">
+ <allow own="org.freedesktop.import1"/>
+ <allow send_destination="org.freedesktop.import1"/>
+ <allow receive_sender="org.freedesktop.import1"/>
+ </policy>
+
+ <policy context="default">
+ <deny send_destination="org.freedesktop.import1"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.DBus.Introspectable"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.DBus.Peer"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="Get"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="GetAll"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Manager"
+ send_member="ListTransfers"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Manager"
+ send_member="CancelTransfer"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Manager"
+ send_member="ImportTar"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Manager"
+ send_member="ImportRaw"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Manager"
+ send_member="ImportFileSystem"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Manager"
+ send_member="ExportTar"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Manager"
+ send_member="ExportRaw"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Manager"
+ send_member="PullTar"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Manager"
+ send_member="PullRaw"/>
+
+ <allow send_destination="org.freedesktop.import1"
+ send_interface="org.freedesktop.import1.Transfer"
+ send_member="Cancel"/>
+
+ <allow receive_sender="org.freedesktop.import1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/import/org.freedesktop.import1.policy b/src/import/org.freedesktop.import1.policy
new file mode 100644
index 0000000..9736816
--- /dev/null
+++ b/src/import/org.freedesktop.import1.policy
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.import1.import">
+ <description gettext-domain="systemd">Import a VM or container image</description>
+ <message gettext-domain="systemd">Authentication is required to import a VM or container image</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.import1.export">
+ <description gettext-domain="systemd">Export a VM or container image</description>
+ <message gettext-domain="systemd">Authentication is required to export a VM or container image</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.import1.pull">
+ <description gettext-domain="systemd">Download a VM or container image</description>
+ <message gettext-domain="systemd">Authentication is required to download a VM or container image</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+</policyconfig>
diff --git a/src/import/org.freedesktop.import1.service b/src/import/org.freedesktop.import1.service
new file mode 100644
index 0000000..4fe921f
--- /dev/null
+++ b/src/import/org.freedesktop.import1.service
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.import1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.import1.service
diff --git a/src/import/pull-common.c b/src/import/pull-common.c
new file mode 100644
index 0000000..33be609
--- /dev/null
+++ b/src/import/pull-common.c
@@ -0,0 +1,526 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/prctl.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "capability-util.h"
+#include "copy.h"
+#include "dirent-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "pull-common.h"
+#include "pull-job.h"
+#include "rlimit-util.h"
+#include "rm-rf.h"
+#include "signal-util.h"
+#include "siphash24.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+#include "web-util.h"
+
+#define FILENAME_ESCAPE "/.#\"\'"
+#define HASH_URL_THRESHOLD_LENGTH (_POSIX_PATH_MAX - 16)
+
+int pull_find_old_etags(
+ const char *url,
+ const char *image_root,
+ int dt,
+ const char *prefix,
+ const char *suffix,
+ char ***etags) {
+
+ _cleanup_free_ char *escaped_url = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ struct dirent *de;
+ int r;
+
+ assert(url);
+ assert(etags);
+
+ if (!image_root)
+ image_root = "/var/lib/machines";
+
+ escaped_url = xescape(url, FILENAME_ESCAPE);
+ if (!escaped_url)
+ return -ENOMEM;
+
+ d = opendir(image_root);
+ if (!d) {
+ if (errno == ENOENT) {
+ *etags = NULL;
+ return 0;
+ }
+
+ return -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ _cleanup_free_ char *u = NULL;
+ const char *a, *b;
+
+ if (de->d_type != DT_UNKNOWN &&
+ de->d_type != dt)
+ continue;
+
+ if (prefix) {
+ a = startswith(de->d_name, prefix);
+ if (!a)
+ continue;
+ } else
+ a = de->d_name;
+
+ a = startswith(a, escaped_url);
+ if (!a)
+ continue;
+
+ a = startswith(a, ".");
+ if (!a)
+ continue;
+
+ if (suffix) {
+ b = endswith(de->d_name, suffix);
+ if (!b)
+ continue;
+ } else
+ b = strchr(de->d_name, 0);
+
+ if (a >= b)
+ continue;
+
+ r = cunescape_length(a, b - a, 0, &u);
+ if (r < 0)
+ return r;
+
+ if (!http_etag_is_valid(u))
+ continue;
+
+ r = strv_consume(&l, TAKE_PTR(u));
+ if (r < 0)
+ return r;
+ }
+
+ *etags = TAKE_PTR(l);
+
+ return 0;
+}
+
+int pull_make_local_copy(const char *final, const char *image_root, const char *local, bool force_local) {
+ const char *p;
+ int r;
+
+ assert(final);
+ assert(local);
+
+ if (!image_root)
+ image_root = "/var/lib/machines";
+
+ p = prefix_roota(image_root, local);
+
+ if (force_local)
+ (void) rm_rf(p, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+
+ r = btrfs_subvol_snapshot(final, p,
+ BTRFS_SNAPSHOT_QUOTA|
+ BTRFS_SNAPSHOT_FALLBACK_COPY|
+ BTRFS_SNAPSHOT_FALLBACK_DIRECTORY|
+ BTRFS_SNAPSHOT_RECURSIVE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create local image: %m");
+
+ log_info("Created new local image '%s'.", local);
+
+ return 0;
+}
+
+static int hash_url(const char *url, char **ret) {
+ uint64_t h;
+ static const sd_id128_t k = SD_ID128_ARRAY(df,89,16,87,01,cc,42,30,98,ab,4a,19,a6,a5,63,4f);
+
+ assert(url);
+
+ h = siphash24(url, strlen(url), k.bytes);
+ if (asprintf(ret, "%"PRIx64, h) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int pull_make_path(const char *url, const char *etag, const char *image_root, const char *prefix, const char *suffix, char **ret) {
+ _cleanup_free_ char *escaped_url = NULL, *escaped_etag = NULL;
+ char *path;
+
+ assert(url);
+ assert(ret);
+
+ if (!image_root)
+ image_root = "/var/lib/machines";
+
+ escaped_url = xescape(url, FILENAME_ESCAPE);
+ if (!escaped_url)
+ return -ENOMEM;
+
+ if (etag) {
+ escaped_etag = xescape(etag, FILENAME_ESCAPE);
+ if (!escaped_etag)
+ return -ENOMEM;
+ }
+
+ path = strjoin(image_root, "/", strempty(prefix), escaped_url, escaped_etag ? "." : "",
+ strempty(escaped_etag), strempty(suffix));
+ if (!path)
+ return -ENOMEM;
+
+ /* URLs might make the path longer than the maximum allowed length for a file name.
+ * When that happens, a URL hash is used instead. Paths returned by this function
+ * can be later used with tempfn_random() which adds 16 bytes to the resulting name. */
+ if (strlen(path) >= HASH_URL_THRESHOLD_LENGTH) {
+ _cleanup_free_ char *hash = NULL;
+ int r;
+
+ free(path);
+
+ r = hash_url(url, &hash);
+ if (r < 0)
+ return r;
+
+ path = strjoin(image_root, "/", strempty(prefix), hash, escaped_etag ? "." : "",
+ strempty(escaped_etag), strempty(suffix));
+ if (!path)
+ return -ENOMEM;
+ }
+
+ *ret = path;
+ return 0;
+}
+
+int pull_make_auxiliary_job(
+ PullJob **ret,
+ const char *url,
+ int (*strip_suffixes)(const char *name, char **ret),
+ const char *suffix,
+ CurlGlue *glue,
+ PullJobFinished on_finished,
+ void *userdata) {
+
+ _cleanup_free_ char *last_component = NULL, *ll = NULL, *auxiliary_url = NULL;
+ _cleanup_(pull_job_unrefp) PullJob *job = NULL;
+ const char *q;
+ int r;
+
+ assert(ret);
+ assert(url);
+ assert(strip_suffixes);
+ assert(glue);
+
+ r = import_url_last_component(url, &last_component);
+ if (r < 0)
+ return r;
+
+ r = strip_suffixes(last_component, &ll);
+ if (r < 0)
+ return r;
+
+ q = strjoina(ll, suffix);
+
+ r = import_url_change_last_component(url, q, &auxiliary_url);
+ if (r < 0)
+ return r;
+
+ r = pull_job_new(&job, auxiliary_url, glue, userdata);
+ if (r < 0)
+ return r;
+
+ job->on_finished = on_finished;
+ job->compressed_max = job->uncompressed_max = 1ULL * 1024ULL * 1024ULL;
+
+ *ret = TAKE_PTR(job);
+
+ return 0;
+}
+
+int pull_make_verification_jobs(
+ PullJob **ret_checksum_job,
+ PullJob **ret_signature_job,
+ ImportVerify verify,
+ const char *url,
+ CurlGlue *glue,
+ PullJobFinished on_finished,
+ void *userdata) {
+
+ _cleanup_(pull_job_unrefp) PullJob *checksum_job = NULL, *signature_job = NULL;
+ int r;
+ const char *chksums = NULL;
+
+ assert(ret_checksum_job);
+ assert(ret_signature_job);
+ assert(verify >= 0);
+ assert(verify < _IMPORT_VERIFY_MAX);
+ assert(url);
+ assert(glue);
+
+ if (verify != IMPORT_VERIFY_NO) {
+ _cleanup_free_ char *checksum_url = NULL, *fn = NULL;
+
+ /* Queue jobs for the checksum file for the image. */
+ r = import_url_last_component(url, &fn);
+ if (r < 0)
+ return r;
+
+ chksums = strjoina(fn, ".sha256");
+
+ r = import_url_change_last_component(url, chksums, &checksum_url);
+ if (r < 0)
+ return r;
+
+ r = pull_job_new(&checksum_job, checksum_url, glue, userdata);
+ if (r < 0)
+ return r;
+
+ checksum_job->on_finished = on_finished;
+ checksum_job->uncompressed_max = checksum_job->compressed_max = 1ULL * 1024ULL * 1024ULL;
+ }
+
+ if (verify == IMPORT_VERIFY_SIGNATURE) {
+ _cleanup_free_ char *signature_url = NULL;
+
+ /* Queue job for the SHA256SUMS.gpg file for the image. */
+ r = import_url_change_last_component(url, "SHA256SUMS.gpg", &signature_url);
+ if (r < 0)
+ return r;
+
+ r = pull_job_new(&signature_job, signature_url, glue, userdata);
+ if (r < 0)
+ return r;
+
+ signature_job->on_finished = on_finished;
+ signature_job->uncompressed_max = signature_job->compressed_max = 1ULL * 1024ULL * 1024ULL;
+ }
+
+ *ret_checksum_job = checksum_job;
+ *ret_signature_job = signature_job;
+
+ checksum_job = signature_job = NULL;
+
+ return 0;
+}
+
+static int verify_one(PullJob *checksum_job, PullJob *job) {
+ _cleanup_free_ char *fn = NULL;
+ const char *line, *p;
+ int r;
+
+ assert(checksum_job);
+
+ if (!job)
+ return 0;
+
+ assert(IN_SET(job->state, PULL_JOB_DONE, PULL_JOB_FAILED));
+
+ /* Don't verify the checksum if we didn't actually successfully download something new */
+ if (job->state != PULL_JOB_DONE)
+ return 0;
+ if (job->error != 0)
+ return 0;
+ if (job->etag_exists)
+ return 0;
+
+ assert(job->calc_checksum);
+ assert(job->checksum);
+
+ r = import_url_last_component(job->url, &fn);
+ if (r < 0)
+ return log_oom();
+
+ if (!filename_is_valid(fn))
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Cannot verify checksum, could not determine server-side file name.");
+
+ line = strjoina(job->checksum, " *", fn, "\n");
+
+ p = memmem(checksum_job->payload,
+ checksum_job->payload_size,
+ line,
+ strlen(line));
+
+ if (!p) {
+ line = strjoina(job->checksum, " ", fn, "\n");
+
+ p = memmem(checksum_job->payload,
+ checksum_job->payload_size,
+ line,
+ strlen(line));
+ }
+
+ if (!p || (p != (char*) checksum_job->payload && p[-1] != '\n'))
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "DOWNLOAD INVALID: Checksum of %s file did not checkout, file has been tampered with.", fn);
+
+ log_info("SHA256 checksum of %s is valid.", job->url);
+ return 1;
+}
+
+int pull_verify(PullJob *main_job,
+ PullJob *roothash_job,
+ PullJob *settings_job,
+ PullJob *checksum_job,
+ PullJob *signature_job) {
+
+ _cleanup_close_pair_ int gpg_pipe[2] = { -1, -1 };
+ _cleanup_close_ int sig_file = -1;
+ char sig_file_path[] = "/tmp/sigXXXXXX", gpg_home[] = "/tmp/gpghomeXXXXXX";
+ _cleanup_(sigkill_waitp) pid_t pid = 0;
+ bool gpg_home_created = false;
+ int r;
+
+ assert(main_job);
+ assert(main_job->state == PULL_JOB_DONE);
+
+ if (!checksum_job)
+ return 0;
+
+ assert(main_job->calc_checksum);
+ assert(main_job->checksum);
+
+ assert(checksum_job->state == PULL_JOB_DONE);
+
+ if (!checksum_job->payload || checksum_job->payload_size <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Checksum is empty, cannot verify.");
+
+ r = verify_one(checksum_job, main_job);
+ if (r < 0)
+ return r;
+
+ r = verify_one(checksum_job, roothash_job);
+ if (r < 0)
+ return r;
+
+ r = verify_one(checksum_job, settings_job);
+ if (r < 0)
+ return r;
+
+ if (!signature_job)
+ return 0;
+
+ if (checksum_job->style == VERIFICATION_PER_FILE)
+ signature_job = checksum_job;
+
+ assert(signature_job->state == PULL_JOB_DONE);
+
+ if (!signature_job->payload || signature_job->payload_size <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Signature is empty, cannot verify.");
+
+ r = pipe2(gpg_pipe, O_CLOEXEC);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to create pipe for gpg: %m");
+
+ sig_file = mkostemp(sig_file_path, O_RDWR);
+ if (sig_file < 0)
+ return log_error_errno(errno, "Failed to create temporary file: %m");
+
+ r = loop_write(sig_file, signature_job->payload, signature_job->payload_size, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write to temporary file: %m");
+ goto finish;
+ }
+
+ if (!mkdtemp(gpg_home)) {
+ r = log_error_errno(errno, "Failed to create temporary home for gpg: %m");
+ goto finish;
+ }
+
+ gpg_home_created = true;
+
+ r = safe_fork("(gpg)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ const char *cmd[] = {
+ "gpg",
+ "--no-options",
+ "--no-default-keyring",
+ "--no-auto-key-locate",
+ "--no-auto-check-trustdb",
+ "--batch",
+ "--trust-model=always",
+ NULL, /* --homedir= */
+ NULL, /* --keyring= */
+ NULL, /* --verify */
+ NULL, /* signature file */
+ NULL, /* dash */
+ NULL /* trailing NULL */
+ };
+ unsigned k = ELEMENTSOF(cmd) - 6;
+
+ /* Child */
+
+ gpg_pipe[1] = safe_close(gpg_pipe[1]);
+
+ r = rearrange_stdio(gpg_pipe[0], -1, STDERR_FILENO);
+ if (r < 0) {
+ log_error_errno(r, "Failed to rearrange stdin/stdout: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ (void) rlimit_nofile_safe();
+
+ cmd[k++] = strjoina("--homedir=", gpg_home);
+
+ /* We add the user keyring only to the command line
+ * arguments, if it's around since gpg fails
+ * otherwise. */
+ if (access(USER_KEYRING_PATH, F_OK) >= 0)
+ cmd[k++] = "--keyring=" USER_KEYRING_PATH;
+ else
+ cmd[k++] = "--keyring=" VENDOR_KEYRING_PATH;
+
+ cmd[k++] = "--verify";
+ if (checksum_job->style == VERIFICATION_PER_DIRECTORY) {
+ cmd[k++] = sig_file_path;
+ cmd[k++] = "-";
+ cmd[k++] = NULL;
+ }
+
+ execvp("gpg2", (char * const *) cmd);
+ execvp("gpg", (char * const *) cmd);
+ log_error_errno(errno, "Failed to execute gpg: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ gpg_pipe[0] = safe_close(gpg_pipe[0]);
+
+ r = loop_write(gpg_pipe[1], checksum_job->payload, checksum_job->payload_size, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write to pipe: %m");
+ goto finish;
+ }
+
+ gpg_pipe[1] = safe_close(gpg_pipe[1]);
+
+ r = wait_for_terminate_and_check("gpg", pid, WAIT_LOG_ABNORMAL);
+ pid = 0;
+ if (r < 0)
+ goto finish;
+ if (r != EXIT_SUCCESS) {
+ log_error("DOWNLOAD INVALID: Signature verification failed.");
+ r = -EBADMSG;
+ } else {
+ log_info("Signature verification succeeded.");
+ r = 0;
+ }
+
+finish:
+ (void) unlink(sig_file_path);
+
+ if (gpg_home_created)
+ (void) rm_rf(gpg_home, REMOVE_ROOT|REMOVE_PHYSICAL);
+
+ return r;
+}
diff --git a/src/import/pull-common.h b/src/import/pull-common.h
new file mode 100644
index 0000000..025bcee
--- /dev/null
+++ b/src/import/pull-common.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "import-util.h"
+#include "pull-job.h"
+
+int pull_make_local_copy(const char *final, const char *root, const char *local, bool force_local);
+
+int pull_find_old_etags(const char *url, const char *root, int dt, const char *prefix, const char *suffix, char ***etags);
+
+int pull_make_path(const char *url, const char *etag, const char *image_root, const char *prefix, const char *suffix, char **ret);
+
+int pull_make_auxiliary_job(PullJob **ret, const char *url, int (*strip_suffixes)(const char *name, char **ret), const char *suffix, CurlGlue *glue, PullJobFinished on_finished, void *userdata);
+int pull_make_verification_jobs(PullJob **ret_checksum_job, PullJob **ret_signature_job, ImportVerify verify, const char *url, CurlGlue *glue, PullJobFinished on_finished, void *userdata);
+
+int pull_verify(PullJob *main_job, PullJob *roothash_job, PullJob *settings_job, PullJob *checksum_job, PullJob *signature_job);
diff --git a/src/import/pull-job.c b/src/import/pull-job.c
new file mode 100644
index 0000000..eea0038
--- /dev/null
+++ b/src/import/pull-job.c
@@ -0,0 +1,639 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/xattr.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "gcrypt-util.h"
+#include "hexdecoct.h"
+#include "import-util.h"
+#include "io-util.h"
+#include "machine-pool.h"
+#include "parse-util.h"
+#include "pull-common.h"
+#include "pull-job.h"
+#include "string-util.h"
+#include "strv.h"
+#include "xattr-util.h"
+
+PullJob* pull_job_unref(PullJob *j) {
+ if (!j)
+ return NULL;
+
+ curl_glue_remove_and_free(j->glue, j->curl);
+ curl_slist_free_all(j->request_header);
+
+ safe_close(j->disk_fd);
+
+ import_compress_free(&j->compress);
+
+ if (j->checksum_context)
+ gcry_md_close(j->checksum_context);
+
+ free(j->url);
+ free(j->etag);
+ strv_free(j->old_etags);
+ free(j->payload);
+ free(j->checksum);
+
+ return mfree(j);
+}
+
+static void pull_job_finish(PullJob *j, int ret) {
+ assert(j);
+
+ if (IN_SET(j->state, PULL_JOB_DONE, PULL_JOB_FAILED))
+ return;
+
+ if (ret == 0) {
+ j->state = PULL_JOB_DONE;
+ j->progress_percent = 100;
+ log_info("Download of %s complete.", j->url);
+ } else {
+ j->state = PULL_JOB_FAILED;
+ j->error = ret;
+ }
+
+ if (j->on_finished)
+ j->on_finished(j);
+}
+
+static int pull_job_restart(PullJob *j) {
+ int r;
+ char *chksum_url = NULL;
+
+ r = import_url_change_last_component(j->url, "SHA256SUMS", &chksum_url);
+ if (r < 0)
+ return r;
+
+ free(j->url);
+ j->url = chksum_url;
+ j->state = PULL_JOB_INIT;
+ j->payload = mfree(j->payload);
+ j->payload_size = 0;
+ j->payload_allocated = 0;
+ j->written_compressed = 0;
+ j->written_uncompressed = 0;
+
+ r = pull_job_begin(j);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+void pull_job_curl_on_finished(CurlGlue *g, CURL *curl, CURLcode result) {
+ PullJob *j = NULL;
+ CURLcode code;
+ long status;
+ int r;
+
+ if (curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&j) != CURLE_OK)
+ return;
+
+ if (!j || IN_SET(j->state, PULL_JOB_DONE, PULL_JOB_FAILED))
+ return;
+
+ if (result != CURLE_OK) {
+ log_error("Transfer failed: %s", curl_easy_strerror(result));
+ r = -EIO;
+ goto finish;
+ }
+
+ code = curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &status);
+ if (code != CURLE_OK) {
+ log_error("Failed to retrieve response code: %s", curl_easy_strerror(code));
+ r = -EIO;
+ goto finish;
+ } else if (status == 304) {
+ log_info("Image already downloaded. Skipping download.");
+ j->etag_exists = true;
+ r = 0;
+ goto finish;
+ } else if (status >= 300) {
+ if (status == 404 && j->style == VERIFICATION_PER_FILE) {
+
+ /* retry pull job with SHA256SUMS file */
+ r = pull_job_restart(j);
+ if (r < 0)
+ goto finish;
+
+ code = curl_easy_getinfo(j->curl, CURLINFO_RESPONSE_CODE, &status);
+ if (code != CURLE_OK) {
+ log_error("Failed to retrieve response code: %s", curl_easy_strerror(code));
+ r = -EIO;
+ goto finish;
+ }
+
+ if (status == 0) {
+ j->style = VERIFICATION_PER_DIRECTORY;
+ return;
+ }
+ }
+
+ log_error("HTTP request to %s failed with code %li.", j->url, status);
+ r = -EIO;
+ goto finish;
+ } else if (status < 200) {
+ log_error("HTTP request to %s finished with unexpected code %li.", j->url, status);
+ r = -EIO;
+ goto finish;
+ }
+
+ if (j->state != PULL_JOB_RUNNING) {
+ log_error("Premature connection termination.");
+ r = -EIO;
+ goto finish;
+ }
+
+ if (j->content_length != (uint64_t) -1 &&
+ j->content_length != j->written_compressed) {
+ log_error("Download truncated.");
+ r = -EIO;
+ goto finish;
+ }
+
+ if (j->checksum_context) {
+ uint8_t *k;
+
+ k = gcry_md_read(j->checksum_context, GCRY_MD_SHA256);
+ if (!k) {
+ log_error("Failed to get checksum.");
+ r = -EIO;
+ goto finish;
+ }
+
+ j->checksum = hexmem(k, gcry_md_get_algo_dlen(GCRY_MD_SHA256));
+ if (!j->checksum) {
+ r = log_oom();
+ goto finish;
+ }
+
+ log_debug("SHA256 of %s is %s.", j->url, j->checksum);
+ }
+
+ if (j->disk_fd >= 0 && j->allow_sparse) {
+ /* Make sure the file size is right, in case the file was
+ * sparse and we just seeked for the last part */
+
+ if (ftruncate(j->disk_fd, j->written_uncompressed) < 0) {
+ r = log_error_errno(errno, "Failed to truncate file: %m");
+ goto finish;
+ }
+
+ if (j->etag)
+ (void) fsetxattr(j->disk_fd, "user.source_etag", j->etag, strlen(j->etag), 0);
+ if (j->url)
+ (void) fsetxattr(j->disk_fd, "user.source_url", j->url, strlen(j->url), 0);
+
+ if (j->mtime != 0) {
+ struct timespec ut[2];
+
+ timespec_store(&ut[0], j->mtime);
+ ut[1] = ut[0];
+ (void) futimens(j->disk_fd, ut);
+
+ (void) fd_setcrtime(j->disk_fd, j->mtime);
+ }
+ }
+
+ r = 0;
+
+finish:
+ pull_job_finish(j, r);
+}
+
+static int pull_job_write_uncompressed(const void *p, size_t sz, void *userdata) {
+ PullJob *j = userdata;
+ ssize_t n;
+
+ assert(j);
+ assert(p);
+
+ if (sz <= 0)
+ return 0;
+
+ if (j->written_uncompressed + sz < j->written_uncompressed)
+ return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW),
+ "File too large, overflow");
+
+ if (j->written_uncompressed + sz > j->uncompressed_max)
+ return log_error_errno(SYNTHETIC_ERRNO(EFBIG),
+ "File overly large, refusing");
+
+ if (j->disk_fd >= 0) {
+
+ if (j->allow_sparse)
+ n = sparse_write(j->disk_fd, p, sz, 64);
+ else {
+ n = write(j->disk_fd, p, sz);
+ if (n < 0)
+ n = -errno;
+ }
+ if (n < 0)
+ return log_error_errno((int) n, "Failed to write file: %m");
+ if ((size_t) n < sz)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short write");
+ } else {
+
+ if (!GREEDY_REALLOC(j->payload, j->payload_allocated, j->payload_size + sz))
+ return log_oom();
+
+ memcpy(j->payload + j->payload_size, p, sz);
+ j->payload_size += sz;
+ }
+
+ j->written_uncompressed += sz;
+
+ return 0;
+}
+
+static int pull_job_write_compressed(PullJob *j, void *p, size_t sz) {
+ int r;
+
+ assert(j);
+ assert(p);
+
+ if (sz <= 0)
+ return 0;
+
+ if (j->written_compressed + sz < j->written_compressed)
+ return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "File too large, overflow");
+
+ if (j->written_compressed + sz > j->compressed_max)
+ return log_error_errno(SYNTHETIC_ERRNO(EFBIG), "File overly large, refusing.");
+
+ if (j->content_length != (uint64_t) -1 &&
+ j->written_compressed + sz > j->content_length)
+ return log_error_errno(SYNTHETIC_ERRNO(EFBIG),
+ "Content length incorrect.");
+
+ if (j->checksum_context)
+ gcry_md_write(j->checksum_context, p, sz);
+
+ r = import_uncompress(&j->compress, p, sz, pull_job_write_uncompressed, j);
+ if (r < 0)
+ return r;
+
+ j->written_compressed += sz;
+
+ return 0;
+}
+
+static int pull_job_open_disk(PullJob *j) {
+ int r;
+
+ assert(j);
+
+ if (j->on_open_disk) {
+ r = j->on_open_disk(j);
+ if (r < 0)
+ return r;
+ }
+
+ if (j->disk_fd >= 0) {
+ /* Check if we can do sparse files */
+
+ if (lseek(j->disk_fd, SEEK_SET, 0) == 0)
+ j->allow_sparse = true;
+ else {
+ if (errno != ESPIPE)
+ return log_error_errno(errno, "Failed to seek on file descriptor: %m");
+
+ j->allow_sparse = false;
+ }
+ }
+
+ if (j->calc_checksum) {
+ initialize_libgcrypt(false);
+
+ if (gcry_md_open(&j->checksum_context, GCRY_MD_SHA256, 0) != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to initialize hash context.");
+ }
+
+ return 0;
+}
+
+static int pull_job_detect_compression(PullJob *j) {
+ _cleanup_free_ uint8_t *stub = NULL;
+ size_t stub_size;
+
+ int r;
+
+ assert(j);
+
+ r = import_uncompress_detect(&j->compress, j->payload, j->payload_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize compressor: %m");
+ if (r == 0)
+ return 0;
+
+ log_debug("Stream is compressed: %s", import_compress_type_to_string(j->compress.type));
+
+ r = pull_job_open_disk(j);
+ if (r < 0)
+ return r;
+
+ /* Now, take the payload we read so far, and decompress it */
+ stub = j->payload;
+ stub_size = j->payload_size;
+
+ j->payload = NULL;
+ j->payload_size = 0;
+ j->payload_allocated = 0;
+
+ j->state = PULL_JOB_RUNNING;
+
+ r = pull_job_write_compressed(j, stub, stub_size);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static size_t pull_job_write_callback(void *contents, size_t size, size_t nmemb, void *userdata) {
+ PullJob *j = userdata;
+ size_t sz = size * nmemb;
+ int r;
+
+ assert(contents);
+ assert(j);
+
+ switch (j->state) {
+
+ case PULL_JOB_ANALYZING:
+ /* Let's first check what it actually is */
+
+ if (!GREEDY_REALLOC(j->payload, j->payload_allocated, j->payload_size + sz)) {
+ r = log_oom();
+ goto fail;
+ }
+
+ memcpy(j->payload + j->payload_size, contents, sz);
+ j->payload_size += sz;
+
+ r = pull_job_detect_compression(j);
+ if (r < 0)
+ goto fail;
+
+ break;
+
+ case PULL_JOB_RUNNING:
+
+ r = pull_job_write_compressed(j, contents, sz);
+ if (r < 0)
+ goto fail;
+
+ break;
+
+ case PULL_JOB_DONE:
+ case PULL_JOB_FAILED:
+ r = -ESTALE;
+ goto fail;
+
+ default:
+ assert_not_reached("Impossible state.");
+ }
+
+ return sz;
+
+fail:
+ pull_job_finish(j, r);
+ return 0;
+}
+
+static size_t pull_job_header_callback(void *contents, size_t size, size_t nmemb, void *userdata) {
+ PullJob *j = userdata;
+ size_t sz = size * nmemb;
+ _cleanup_free_ char *length = NULL, *last_modified = NULL;
+ char *etag;
+ int r;
+
+ assert(contents);
+ assert(j);
+
+ if (IN_SET(j->state, PULL_JOB_DONE, PULL_JOB_FAILED)) {
+ r = -ESTALE;
+ goto fail;
+ }
+
+ assert(j->state == PULL_JOB_ANALYZING);
+
+ r = curl_header_strdup(contents, sz, "ETag:", &etag);
+ if (r < 0) {
+ log_oom();
+ goto fail;
+ }
+ if (r > 0) {
+ free(j->etag);
+ j->etag = etag;
+
+ if (strv_contains(j->old_etags, j->etag)) {
+ log_info("Image already downloaded. Skipping download.");
+ j->etag_exists = true;
+ pull_job_finish(j, 0);
+ return sz;
+ }
+
+ return sz;
+ }
+
+ r = curl_header_strdup(contents, sz, "Content-Length:", &length);
+ if (r < 0) {
+ log_oom();
+ goto fail;
+ }
+ if (r > 0) {
+ (void) safe_atou64(length, &j->content_length);
+
+ if (j->content_length != (uint64_t) -1) {
+ char bytes[FORMAT_BYTES_MAX];
+
+ if (j->content_length > j->compressed_max) {
+ log_error("Content too large.");
+ r = -EFBIG;
+ goto fail;
+ }
+
+ log_info("Downloading %s for %s.", format_bytes(bytes, sizeof(bytes), j->content_length), j->url);
+ }
+
+ return sz;
+ }
+
+ r = curl_header_strdup(contents, sz, "Last-Modified:", &last_modified);
+ if (r < 0) {
+ log_oom();
+ goto fail;
+ }
+ if (r > 0) {
+ (void) curl_parse_http_time(last_modified, &j->mtime);
+ return sz;
+ }
+
+ if (j->on_header) {
+ r = j->on_header(j, contents, sz);
+ if (r < 0)
+ goto fail;
+ }
+
+ return sz;
+
+fail:
+ pull_job_finish(j, r);
+ return 0;
+}
+
+static int pull_job_progress_callback(void *userdata, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow) {
+ PullJob *j = userdata;
+ unsigned percent;
+ usec_t n;
+
+ assert(j);
+
+ if (dltotal <= 0)
+ return 0;
+
+ percent = ((100 * dlnow) / dltotal);
+ n = now(CLOCK_MONOTONIC);
+
+ if (n > j->last_status_usec + USEC_PER_SEC &&
+ percent != j->progress_percent &&
+ dlnow < dltotal) {
+ char buf[FORMAT_TIMESPAN_MAX];
+
+ if (n - j->start_usec > USEC_PER_SEC && dlnow > 0) {
+ char y[FORMAT_BYTES_MAX];
+ usec_t left, done;
+
+ done = n - j->start_usec;
+ left = (usec_t) (((double) done * (double) dltotal) / dlnow) - done;
+
+ log_info("Got %u%% of %s. %s left at %s/s.",
+ percent,
+ j->url,
+ format_timespan(buf, sizeof(buf), left, USEC_PER_SEC),
+ format_bytes(y, sizeof(y), (uint64_t) ((double) dlnow / ((double) done / (double) USEC_PER_SEC))));
+ } else
+ log_info("Got %u%% of %s.", percent, j->url);
+
+ j->progress_percent = percent;
+ j->last_status_usec = n;
+
+ if (j->on_progress)
+ j->on_progress(j);
+ }
+
+ return 0;
+}
+
+int pull_job_new(PullJob **ret, const char *url, CurlGlue *glue, void *userdata) {
+ _cleanup_(pull_job_unrefp) PullJob *j = NULL;
+ _cleanup_free_ char *u = NULL;
+
+ assert(url);
+ assert(glue);
+ assert(ret);
+
+ u = strdup(url);
+ if (!u)
+ return -ENOMEM;
+
+ j = new(PullJob, 1);
+ if (!j)
+ return -ENOMEM;
+
+ *j = (PullJob) {
+ .state = PULL_JOB_INIT,
+ .disk_fd = -1,
+ .userdata = userdata,
+ .glue = glue,
+ .content_length = (uint64_t) -1,
+ .start_usec = now(CLOCK_MONOTONIC),
+ .compressed_max = 64LLU * 1024LLU * 1024LLU * 1024LLU, /* 64GB safety limit */
+ .uncompressed_max = 64LLU * 1024LLU * 1024LLU * 1024LLU, /* 64GB safety limit */
+ .style = VERIFICATION_STYLE_UNSET,
+ .url = TAKE_PTR(u),
+ };
+
+ *ret = TAKE_PTR(j);
+
+ return 0;
+}
+
+int pull_job_begin(PullJob *j) {
+ int r;
+
+ assert(j);
+
+ if (j->state != PULL_JOB_INIT)
+ return -EBUSY;
+
+ r = curl_glue_make(&j->curl, j->url, j);
+ if (r < 0)
+ return r;
+
+ if (!strv_isempty(j->old_etags)) {
+ _cleanup_free_ char *cc = NULL, *hdr = NULL;
+
+ cc = strv_join(j->old_etags, ", ");
+ if (!cc)
+ return -ENOMEM;
+
+ hdr = strjoin("If-None-Match: ", cc);
+ if (!hdr)
+ return -ENOMEM;
+
+ if (!j->request_header) {
+ j->request_header = curl_slist_new(hdr, NULL);
+ if (!j->request_header)
+ return -ENOMEM;
+ } else {
+ struct curl_slist *l;
+
+ l = curl_slist_append(j->request_header, hdr);
+ if (!l)
+ return -ENOMEM;
+
+ j->request_header = l;
+ }
+ }
+
+ if (j->request_header) {
+ if (curl_easy_setopt(j->curl, CURLOPT_HTTPHEADER, j->request_header) != CURLE_OK)
+ return -EIO;
+ }
+
+ if (curl_easy_setopt(j->curl, CURLOPT_WRITEFUNCTION, pull_job_write_callback) != CURLE_OK)
+ return -EIO;
+
+ if (curl_easy_setopt(j->curl, CURLOPT_WRITEDATA, j) != CURLE_OK)
+ return -EIO;
+
+ if (curl_easy_setopt(j->curl, CURLOPT_HEADERFUNCTION, pull_job_header_callback) != CURLE_OK)
+ return -EIO;
+
+ if (curl_easy_setopt(j->curl, CURLOPT_HEADERDATA, j) != CURLE_OK)
+ return -EIO;
+
+ if (curl_easy_setopt(j->curl, CURLOPT_XFERINFOFUNCTION, pull_job_progress_callback) != CURLE_OK)
+ return -EIO;
+
+ if (curl_easy_setopt(j->curl, CURLOPT_XFERINFODATA, j) != CURLE_OK)
+ return -EIO;
+
+ if (curl_easy_setopt(j->curl, CURLOPT_NOPROGRESS, 0) != CURLE_OK)
+ return -EIO;
+
+ r = curl_glue_add(j->glue, j->curl);
+ if (r < 0)
+ return r;
+
+ j->state = PULL_JOB_ANALYZING;
+
+ return 0;
+}
diff --git a/src/import/pull-job.h b/src/import/pull-job.h
new file mode 100644
index 0000000..719196c
--- /dev/null
+++ b/src/import/pull-job.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <gcrypt.h>
+
+#include "curl-util.h"
+#include "import-compress.h"
+#include "macro.h"
+
+typedef struct PullJob PullJob;
+
+typedef void (*PullJobFinished)(PullJob *job);
+typedef int (*PullJobOpenDisk)(PullJob *job);
+typedef int (*PullJobHeader)(PullJob *job, const char *header, size_t sz);
+typedef void (*PullJobProgress)(PullJob *job);
+
+typedef enum PullJobState {
+ PULL_JOB_INIT,
+ PULL_JOB_ANALYZING, /* Still reading into ->payload, to figure out what we have */
+ PULL_JOB_RUNNING, /* Writing to destination */
+ PULL_JOB_DONE,
+ PULL_JOB_FAILED,
+ _PULL_JOB_STATE_MAX,
+ _PULL_JOB_STATE_INVALID = -1,
+} PullJobState;
+
+typedef enum VerificationStyle {
+ VERIFICATION_STYLE_UNSET,
+ VERIFICATION_PER_FILE, /* SuSE-style ".sha256" files with inline signature */
+ VERIFICATION_PER_DIRECTORY, /* Ubuntu-style SHA256SUM files with detach SHA256SUM.gpg signatures */
+} VerificationStyle;
+
+#define PULL_JOB_IS_COMPLETE(j) (IN_SET((j)->state, PULL_JOB_DONE, PULL_JOB_FAILED))
+
+struct PullJob {
+ PullJobState state;
+ int error;
+
+ char *url;
+
+ void *userdata;
+ PullJobFinished on_finished;
+ PullJobOpenDisk on_open_disk;
+ PullJobHeader on_header;
+ PullJobProgress on_progress;
+
+ CurlGlue *glue;
+ CURL *curl;
+ struct curl_slist *request_header;
+
+ char *etag;
+ char **old_etags;
+ bool etag_exists;
+
+ uint64_t content_length;
+ uint64_t written_compressed;
+ uint64_t written_uncompressed;
+
+ uint64_t uncompressed_max;
+ uint64_t compressed_max;
+
+ uint8_t *payload;
+ size_t payload_size;
+ size_t payload_allocated;
+
+ int disk_fd;
+
+ usec_t mtime;
+
+ ImportCompress compress;
+
+ unsigned progress_percent;
+ usec_t start_usec;
+ usec_t last_status_usec;
+
+ bool allow_sparse;
+
+ bool calc_checksum;
+ gcry_md_hd_t checksum_context;
+
+ char *checksum;
+
+ VerificationStyle style;
+};
+
+int pull_job_new(PullJob **job, const char *url, CurlGlue *glue, void *userdata);
+PullJob* pull_job_unref(PullJob *job);
+
+int pull_job_begin(PullJob *j);
+
+void pull_job_curl_on_finished(CurlGlue *g, CURL *curl, CURLcode result);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(PullJob*, pull_job_unref);
diff --git a/src/import/pull-raw.c b/src/import/pull-raw.c
new file mode 100644
index 0000000..7956ef0
--- /dev/null
+++ b/src/import/pull-raw.c
@@ -0,0 +1,741 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <curl/curl.h>
+#include <linux/fs.h>
+#include <sys/xattr.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "copy.h"
+#include "curl-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "import-common.h"
+#include "import-util.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "pull-common.h"
+#include "pull-job.h"
+#include "pull-raw.h"
+#include "qcow2-util.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "utf8.h"
+#include "util.h"
+#include "web-util.h"
+
+typedef enum RawProgress {
+ RAW_DOWNLOADING,
+ RAW_VERIFYING,
+ RAW_UNPACKING,
+ RAW_FINALIZING,
+ RAW_COPYING,
+} RawProgress;
+
+struct RawPull {
+ sd_event *event;
+ CurlGlue *glue;
+
+ char *image_root;
+
+ PullJob *raw_job;
+ PullJob *roothash_job;
+ PullJob *settings_job;
+ PullJob *checksum_job;
+ PullJob *signature_job;
+
+ RawPullFinished on_finished;
+ void *userdata;
+
+ char *local;
+ bool force_local;
+ bool settings;
+ bool roothash;
+
+ char *final_path;
+ char *temp_path;
+
+ char *settings_path;
+ char *settings_temp_path;
+
+ char *roothash_path;
+ char *roothash_temp_path;
+
+ ImportVerify verify;
+};
+
+RawPull* raw_pull_unref(RawPull *i) {
+ if (!i)
+ return NULL;
+
+ pull_job_unref(i->raw_job);
+ pull_job_unref(i->settings_job);
+ pull_job_unref(i->roothash_job);
+ pull_job_unref(i->checksum_job);
+ pull_job_unref(i->signature_job);
+
+ curl_glue_unref(i->glue);
+ sd_event_unref(i->event);
+
+ if (i->temp_path) {
+ (void) unlink(i->temp_path);
+ free(i->temp_path);
+ }
+
+ if (i->roothash_temp_path) {
+ (void) unlink(i->roothash_temp_path);
+ free(i->roothash_temp_path);
+ }
+
+ if (i->settings_temp_path) {
+ (void) unlink(i->settings_temp_path);
+ free(i->settings_temp_path);
+ }
+
+ free(i->final_path);
+ free(i->roothash_path);
+ free(i->settings_path);
+ free(i->image_root);
+ free(i->local);
+ return mfree(i);
+}
+
+int raw_pull_new(
+ RawPull **ret,
+ sd_event *event,
+ const char *image_root,
+ RawPullFinished on_finished,
+ void *userdata) {
+
+ _cleanup_(curl_glue_unrefp) CurlGlue *g = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ _cleanup_(raw_pull_unrefp) RawPull *i = NULL;
+ _cleanup_free_ char *root = NULL;
+ int r;
+
+ assert(ret);
+
+ root = strdup(image_root ?: "/var/lib/machines");
+ if (!root)
+ return -ENOMEM;
+
+ if (event)
+ e = sd_event_ref(event);
+ else {
+ r = sd_event_default(&e);
+ if (r < 0)
+ return r;
+ }
+
+ r = curl_glue_new(&g, e);
+ if (r < 0)
+ return r;
+
+ i = new(RawPull, 1);
+ if (!i)
+ return -ENOMEM;
+
+ *i = (RawPull) {
+ .on_finished = on_finished,
+ .userdata = userdata,
+ .image_root = TAKE_PTR(root),
+ .event = TAKE_PTR(e),
+ .glue = TAKE_PTR(g),
+ };
+
+ i->glue->on_finished = pull_job_curl_on_finished;
+ i->glue->userdata = i;
+
+ *ret = TAKE_PTR(i);
+
+ return 0;
+}
+
+static void raw_pull_report_progress(RawPull *i, RawProgress p) {
+ unsigned percent;
+
+ assert(i);
+
+ switch (p) {
+
+ case RAW_DOWNLOADING: {
+ unsigned remain = 80;
+
+ percent = 0;
+
+ if (i->settings_job) {
+ percent += i->settings_job->progress_percent * 5 / 100;
+ remain -= 5;
+ }
+
+ if (i->roothash_job) {
+ percent += i->roothash_job->progress_percent * 5 / 100;
+ remain -= 5;
+ }
+
+ if (i->checksum_job) {
+ percent += i->checksum_job->progress_percent * 5 / 100;
+ remain -= 5;
+ }
+
+ if (i->signature_job) {
+ percent += i->signature_job->progress_percent * 5 / 100;
+ remain -= 5;
+ }
+
+ if (i->raw_job)
+ percent += i->raw_job->progress_percent * remain / 100;
+ break;
+ }
+
+ case RAW_VERIFYING:
+ percent = 80;
+ break;
+
+ case RAW_UNPACKING:
+ percent = 85;
+ break;
+
+ case RAW_FINALIZING:
+ percent = 90;
+ break;
+
+ case RAW_COPYING:
+ percent = 95;
+ break;
+
+ default:
+ assert_not_reached("Unknown progress state");
+ }
+
+ sd_notifyf(false, "X_IMPORT_PROGRESS=%u", percent);
+ log_debug("Combined progress %u%%", percent);
+}
+
+static int raw_pull_maybe_convert_qcow2(RawPull *i) {
+ _cleanup_close_ int converted_fd = -1;
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(i);
+ assert(i->raw_job);
+
+ r = qcow2_detect(i->raw_job->disk_fd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to detect whether this is a QCOW2 image: %m");
+ if (r == 0)
+ return 0;
+
+ /* This is a QCOW2 image, let's convert it */
+ r = tempfn_random(i->final_path, NULL, &t);
+ if (r < 0)
+ return log_oom();
+
+ converted_fd = open(t, O_RDWR|O_CREAT|O_EXCL|O_NOCTTY|O_CLOEXEC, 0664);
+ if (converted_fd < 0)
+ return log_error_errno(errno, "Failed to create %s: %m", t);
+
+ (void) import_set_nocow_and_log(converted_fd, t);
+
+ log_info("Unpacking QCOW2 file.");
+
+ r = qcow2_convert(i->raw_job->disk_fd, converted_fd);
+ if (r < 0) {
+ (void) unlink(t);
+ return log_error_errno(r, "Failed to convert qcow2 image: %m");
+ }
+
+ (void) unlink(i->temp_path);
+ free_and_replace(i->temp_path, t);
+ CLOSE_AND_REPLACE(i->raw_job->disk_fd, converted_fd);
+
+ return 1;
+}
+
+static int raw_pull_determine_path(RawPull *i, const char *suffix, char **field) {
+ int r;
+
+ assert(i);
+ assert(field);
+
+ if (*field)
+ return 0;
+
+ assert(i->raw_job);
+
+ r = pull_make_path(i->raw_job->url, i->raw_job->etag, i->image_root, ".raw-", suffix, field);
+ if (r < 0)
+ return log_oom();
+
+ return 1;
+}
+
+static int raw_pull_copy_auxiliary_file(
+ RawPull *i,
+ const char *suffix,
+ char **path) {
+
+ const char *local;
+ int r;
+
+ assert(i);
+ assert(suffix);
+ assert(path);
+
+ r = raw_pull_determine_path(i, suffix, path);
+ if (r < 0)
+ return r;
+
+ local = strjoina(i->image_root, "/", i->local, suffix);
+
+ r = copy_file_atomic(*path, local, 0644, 0, 0, COPY_REFLINK | (i->force_local ? COPY_REPLACE : 0));
+ if (r == -EEXIST)
+ log_warning_errno(r, "File %s already exists, not replacing.", local);
+ else if (r == -ENOENT)
+ log_debug_errno(r, "Skipping creation of auxiliary file, since none was found.");
+ else if (r < 0)
+ log_warning_errno(r, "Failed to copy file %s, ignoring: %m", local);
+ else
+ log_info("Created new file %s.", local);
+
+ return 0;
+}
+
+static int raw_pull_make_local_copy(RawPull *i) {
+ _cleanup_free_ char *tp = NULL;
+ _cleanup_close_ int dfd = -1;
+ const char *p;
+ int r;
+
+ assert(i);
+ assert(i->raw_job);
+
+ if (!i->local)
+ return 0;
+
+ if (i->raw_job->etag_exists) {
+ /* We have downloaded this one previously, reopen it */
+
+ assert(i->raw_job->disk_fd < 0);
+
+ i->raw_job->disk_fd = open(i->final_path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (i->raw_job->disk_fd < 0)
+ return log_error_errno(errno, "Failed to open vendor image: %m");
+ } else {
+ /* We freshly downloaded the image, use it */
+
+ assert(i->raw_job->disk_fd >= 0);
+
+ if (lseek(i->raw_job->disk_fd, SEEK_SET, 0) == (off_t) -1)
+ return log_error_errno(errno, "Failed to seek to beginning of vendor image: %m");
+ }
+
+ p = strjoina(i->image_root, "/", i->local, ".raw");
+
+ if (i->force_local)
+ (void) rm_rf(p, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+
+ r = tempfn_random(p, NULL, &tp);
+ if (r < 0)
+ return log_oom();
+
+ dfd = open(tp, O_WRONLY|O_CREAT|O_EXCL|O_NOCTTY|O_CLOEXEC, 0664);
+ if (dfd < 0)
+ return log_error_errno(errno, "Failed to create writable copy of image: %m");
+
+ /* Turn off COW writing. This should greatly improve performance on COW file systems like btrfs,
+ * since it reduces fragmentation caused by not allowing in-place writes. */
+ (void) import_set_nocow_and_log(dfd, tp);
+
+ r = copy_bytes(i->raw_job->disk_fd, dfd, (uint64_t) -1, COPY_REFLINK);
+ if (r < 0) {
+ (void) unlink(tp);
+ return log_error_errno(r, "Failed to make writable copy of image: %m");
+ }
+
+ (void) copy_times(i->raw_job->disk_fd, dfd, COPY_CRTIME);
+ (void) copy_xattr(i->raw_job->disk_fd, dfd);
+
+ dfd = safe_close(dfd);
+
+ r = rename(tp, p);
+ if (r < 0) {
+ r = log_error_errno(errno, "Failed to move writable image into place: %m");
+ (void) unlink(tp);
+ return r;
+ }
+
+ log_info("Created new local image '%s'.", i->local);
+
+ if (i->roothash) {
+ r = raw_pull_copy_auxiliary_file(i, ".roothash", &i->roothash_path);
+ if (r < 0)
+ return r;
+ }
+
+ if (i->settings) {
+ r = raw_pull_copy_auxiliary_file(i, ".nspawn", &i->settings_path);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static bool raw_pull_is_done(RawPull *i) {
+ assert(i);
+ assert(i->raw_job);
+
+ if (!PULL_JOB_IS_COMPLETE(i->raw_job))
+ return false;
+ if (i->roothash_job && !PULL_JOB_IS_COMPLETE(i->roothash_job))
+ return false;
+ if (i->settings_job && !PULL_JOB_IS_COMPLETE(i->settings_job))
+ return false;
+ if (i->checksum_job && !PULL_JOB_IS_COMPLETE(i->checksum_job))
+ return false;
+ if (i->signature_job && !PULL_JOB_IS_COMPLETE(i->signature_job))
+ return false;
+
+ return true;
+}
+
+static int raw_pull_rename_auxiliary_file(
+ RawPull *i,
+ const char *suffix,
+ char **temp_path,
+ char **path) {
+
+ int r;
+
+ assert(i);
+ assert(temp_path);
+ assert(suffix);
+ assert(path);
+
+ /* Regenerate final name for this auxiliary file, we might know the etag of the file now, and we should
+ * incorporate it in the file name if we can */
+ *path = mfree(*path);
+ r = raw_pull_determine_path(i, suffix, path);
+ if (r < 0)
+ return r;
+
+ r = import_make_read_only(*temp_path);
+ if (r < 0)
+ return r;
+
+ r = rename_noreplace(AT_FDCWD, *temp_path, AT_FDCWD, *path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to rename file %s to %s: %m", *temp_path, *path);
+
+ *temp_path = mfree(*temp_path);
+
+ return 1;
+}
+
+static void raw_pull_job_on_finished(PullJob *j) {
+ RawPull *i;
+ int r;
+
+ assert(j);
+ assert(j->userdata);
+
+ i = j->userdata;
+ if (j == i->roothash_job) {
+ if (j->error != 0)
+ log_info_errno(j->error, "Root hash file could not be retrieved, proceeding without.");
+ } else if (j == i->settings_job) {
+ if (j->error != 0)
+ log_info_errno(j->error, "Settings file could not be retrieved, proceeding without.");
+ } else if (j->error != 0 && j != i->signature_job) {
+ if (j == i->checksum_job)
+ log_error_errno(j->error, "Failed to retrieve SHA256 checksum, cannot verify. (Try --verify=no?)");
+ else
+ log_error_errno(j->error, "Failed to retrieve image file. (Wrong URL?)");
+
+ r = j->error;
+ goto finish;
+ }
+
+ /* This is invoked if either the download completed
+ * successfully, or the download was skipped because we
+ * already have the etag. In this case ->etag_exists is
+ * true.
+ *
+ * We only do something when we got all three files */
+
+ if (!raw_pull_is_done(i))
+ return;
+
+ if (i->signature_job && i->checksum_job->style == VERIFICATION_PER_DIRECTORY && i->signature_job->error != 0) {
+ log_error_errno(j->error, "Failed to retrieve signature file, cannot verify. (Try --verify=no?)");
+
+ r = i->signature_job->error;
+ goto finish;
+ }
+
+ if (i->roothash_job)
+ i->roothash_job->disk_fd = safe_close(i->roothash_job->disk_fd);
+ if (i->settings_job)
+ i->settings_job->disk_fd = safe_close(i->settings_job->disk_fd);
+
+ r = raw_pull_determine_path(i, ".raw", &i->final_path);
+ if (r < 0)
+ goto finish;
+
+ if (!i->raw_job->etag_exists) {
+ /* This is a new download, verify it, and move it into place */
+ assert(i->raw_job->disk_fd >= 0);
+
+ raw_pull_report_progress(i, RAW_VERIFYING);
+
+ r = pull_verify(i->raw_job, i->roothash_job, i->settings_job, i->checksum_job, i->signature_job);
+ if (r < 0)
+ goto finish;
+
+ raw_pull_report_progress(i, RAW_UNPACKING);
+
+ r = raw_pull_maybe_convert_qcow2(i);
+ if (r < 0)
+ goto finish;
+
+ raw_pull_report_progress(i, RAW_FINALIZING);
+
+ if (i->raw_job->etag) {
+ /* Only make a read-only copy if ETag header is set. */
+ r = import_make_read_only_fd(i->raw_job->disk_fd);
+ if (r < 0)
+ goto finish;
+
+ r = rename_noreplace(AT_FDCWD, i->temp_path, AT_FDCWD, i->final_path);
+ if (r < 0) {
+ log_error_errno(r, "Failed to rename raw file to %s: %m", i->final_path);
+ goto finish;
+ }
+ }
+
+ i->temp_path = mfree(i->temp_path);
+
+ if (i->roothash_job &&
+ i->roothash_job->error == 0) {
+ r = raw_pull_rename_auxiliary_file(i, ".roothash", &i->roothash_temp_path, &i->roothash_path);
+ if (r < 0)
+ goto finish;
+ }
+
+ if (i->settings_job &&
+ i->settings_job->error == 0) {
+ r = raw_pull_rename_auxiliary_file(i, ".nspawn", &i->settings_temp_path, &i->settings_path);
+ if (r < 0)
+ goto finish;
+ }
+ }
+
+ raw_pull_report_progress(i, RAW_COPYING);
+
+ r = raw_pull_make_local_copy(i);
+ if (r < 0)
+ goto finish;
+
+ r = 0;
+
+finish:
+ if (i->on_finished)
+ i->on_finished(i, r, i->userdata);
+ else
+ sd_event_exit(i->event, r);
+}
+
+static int raw_pull_job_on_open_disk_generic(
+ RawPull *i,
+ PullJob *j,
+ const char *extra,
+ char **temp_path) {
+
+ int r;
+
+ assert(i);
+ assert(j);
+ assert(extra);
+ assert(temp_path);
+
+ if (!*temp_path) {
+ r = tempfn_random_child(i->image_root, extra, temp_path);
+ if (r < 0)
+ return log_oom();
+ }
+
+ (void) mkdir_parents_label(*temp_path, 0700);
+
+ j->disk_fd = open(*temp_path, O_RDWR|O_CREAT|O_EXCL|O_NOCTTY|O_CLOEXEC, 0664);
+ if (j->disk_fd < 0)
+ return log_error_errno(errno, "Failed to create %s: %m", *temp_path);
+
+ return 0;
+}
+
+static int raw_pull_job_on_open_disk_raw(PullJob *j) {
+ RawPull *i;
+ int r;
+
+ assert(j);
+ assert(j->userdata);
+
+ i = j->userdata;
+ assert(i->raw_job == j);
+
+ r = raw_pull_job_on_open_disk_generic(i, j, "raw", &i->temp_path);
+ if (r < 0)
+ return r;
+
+ (void) import_set_nocow_and_log(j->disk_fd, i->temp_path);
+ return 0;
+}
+
+static int raw_pull_job_on_open_disk_roothash(PullJob *j) {
+ RawPull *i;
+
+ assert(j);
+ assert(j->userdata);
+
+ i = j->userdata;
+ assert(i->roothash_job == j);
+
+ return raw_pull_job_on_open_disk_generic(i, j, "roothash", &i->roothash_temp_path);
+}
+
+static int raw_pull_job_on_open_disk_settings(PullJob *j) {
+ RawPull *i;
+
+ assert(j);
+ assert(j->userdata);
+
+ i = j->userdata;
+ assert(i->settings_job == j);
+
+ return raw_pull_job_on_open_disk_generic(i, j, "settings", &i->settings_temp_path);
+}
+
+static void raw_pull_job_on_progress(PullJob *j) {
+ RawPull *i;
+
+ assert(j);
+ assert(j->userdata);
+
+ i = j->userdata;
+
+ raw_pull_report_progress(i, RAW_DOWNLOADING);
+}
+
+int raw_pull_start(
+ RawPull *i,
+ const char *url,
+ const char *local,
+ bool force_local,
+ ImportVerify verify,
+ bool settings,
+ bool roothash) {
+
+ int r;
+
+ assert(i);
+ assert(verify < _IMPORT_VERIFY_MAX);
+ assert(verify >= 0);
+
+ if (!http_url_is_valid(url))
+ return -EINVAL;
+
+ if (local && !machine_name_is_valid(local))
+ return -EINVAL;
+
+ if (i->raw_job)
+ return -EBUSY;
+
+ r = free_and_strdup(&i->local, local);
+ if (r < 0)
+ return r;
+
+ i->force_local = force_local;
+ i->verify = verify;
+ i->settings = settings;
+ i->roothash = roothash;
+
+ /* Queue job for the image itself */
+ r = pull_job_new(&i->raw_job, url, i->glue, i);
+ if (r < 0)
+ return r;
+
+ i->raw_job->on_finished = raw_pull_job_on_finished;
+ i->raw_job->on_open_disk = raw_pull_job_on_open_disk_raw;
+ i->raw_job->on_progress = raw_pull_job_on_progress;
+ i->raw_job->calc_checksum = verify != IMPORT_VERIFY_NO;
+
+ r = pull_find_old_etags(url, i->image_root, DT_REG, ".raw-", ".raw", &i->raw_job->old_etags);
+ if (r < 0)
+ return r;
+
+ if (roothash) {
+ r = pull_make_auxiliary_job(&i->roothash_job, url, raw_strip_suffixes, ".roothash", i->glue, raw_pull_job_on_finished, i);
+ if (r < 0)
+ return r;
+
+ i->roothash_job->on_open_disk = raw_pull_job_on_open_disk_roothash;
+ i->roothash_job->on_progress = raw_pull_job_on_progress;
+ i->roothash_job->calc_checksum = verify != IMPORT_VERIFY_NO;
+ }
+
+ if (settings) {
+ r = pull_make_auxiliary_job(&i->settings_job, url, raw_strip_suffixes, ".nspawn", i->glue, raw_pull_job_on_finished, i);
+ if (r < 0)
+ return r;
+
+ i->settings_job->on_open_disk = raw_pull_job_on_open_disk_settings;
+ i->settings_job->on_progress = raw_pull_job_on_progress;
+ i->settings_job->calc_checksum = verify != IMPORT_VERIFY_NO;
+ }
+
+ r = pull_make_verification_jobs(&i->checksum_job, &i->signature_job, verify, url, i->glue, raw_pull_job_on_finished, i);
+ if (r < 0)
+ return r;
+
+ r = pull_job_begin(i->raw_job);
+ if (r < 0)
+ return r;
+
+ if (i->roothash_job) {
+ r = pull_job_begin(i->roothash_job);
+ if (r < 0)
+ return r;
+ }
+
+ if (i->settings_job) {
+ r = pull_job_begin(i->settings_job);
+ if (r < 0)
+ return r;
+ }
+
+ if (i->checksum_job) {
+ i->checksum_job->on_progress = raw_pull_job_on_progress;
+ i->checksum_job->style = VERIFICATION_PER_FILE;
+
+ r = pull_job_begin(i->checksum_job);
+ if (r < 0)
+ return r;
+ }
+
+ if (i->signature_job) {
+ i->signature_job->on_progress = raw_pull_job_on_progress;
+
+ r = pull_job_begin(i->signature_job);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/import/pull-raw.h b/src/import/pull-raw.h
new file mode 100644
index 0000000..e1d450d
--- /dev/null
+++ b/src/import/pull-raw.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-event.h"
+
+#include "import-util.h"
+#include "macro.h"
+
+typedef struct RawPull RawPull;
+
+typedef void (*RawPullFinished)(RawPull *pull, int error, void *userdata);
+
+int raw_pull_new(RawPull **pull, sd_event *event, const char *image_root, RawPullFinished on_finished, void *userdata);
+RawPull* raw_pull_unref(RawPull *pull);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(RawPull*, raw_pull_unref);
+
+int raw_pull_start(RawPull *pull, const char *url, const char *local, bool force_local, ImportVerify verify, bool settings, bool roothash);
diff --git a/src/import/pull-tar.c b/src/import/pull-tar.c
new file mode 100644
index 0000000..31e9a8e
--- /dev/null
+++ b/src/import/pull-tar.c
@@ -0,0 +1,559 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <curl/curl.h>
+#include <sys/prctl.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "copy.h"
+#include "curl-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "import-common.h"
+#include "import-util.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "pull-common.h"
+#include "pull-job.h"
+#include "pull-tar.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "utf8.h"
+#include "util.h"
+#include "web-util.h"
+
+typedef enum TarProgress {
+ TAR_DOWNLOADING,
+ TAR_VERIFYING,
+ TAR_FINALIZING,
+ TAR_COPYING,
+} TarProgress;
+
+struct TarPull {
+ sd_event *event;
+ CurlGlue *glue;
+
+ char *image_root;
+
+ PullJob *tar_job;
+ PullJob *settings_job;
+ PullJob *checksum_job;
+ PullJob *signature_job;
+
+ TarPullFinished on_finished;
+ void *userdata;
+
+ char *local;
+ bool force_local;
+ bool settings;
+
+ pid_t tar_pid;
+
+ char *final_path;
+ char *temp_path;
+
+ char *settings_path;
+ char *settings_temp_path;
+
+ ImportVerify verify;
+};
+
+TarPull* tar_pull_unref(TarPull *i) {
+ if (!i)
+ return NULL;
+
+ if (i->tar_pid > 1) {
+ (void) kill_and_sigcont(i->tar_pid, SIGKILL);
+ (void) wait_for_terminate(i->tar_pid, NULL);
+ }
+
+ pull_job_unref(i->tar_job);
+ pull_job_unref(i->settings_job);
+ pull_job_unref(i->checksum_job);
+ pull_job_unref(i->signature_job);
+
+ curl_glue_unref(i->glue);
+ sd_event_unref(i->event);
+
+ if (i->temp_path) {
+ (void) rm_rf(i->temp_path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+ free(i->temp_path);
+ }
+
+ if (i->settings_temp_path) {
+ (void) unlink(i->settings_temp_path);
+ free(i->settings_temp_path);
+ }
+
+ free(i->final_path);
+ free(i->settings_path);
+ free(i->image_root);
+ free(i->local);
+
+ return mfree(i);
+}
+
+int tar_pull_new(
+ TarPull **ret,
+ sd_event *event,
+ const char *image_root,
+ TarPullFinished on_finished,
+ void *userdata) {
+
+ _cleanup_(curl_glue_unrefp) CurlGlue *g = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ _cleanup_(tar_pull_unrefp) TarPull *i = NULL;
+ _cleanup_free_ char *root = NULL;
+ int r;
+
+ assert(ret);
+
+ root = strdup(image_root ?: "/var/lib/machines");
+ if (!root)
+ return -ENOMEM;
+
+ if (event)
+ e = sd_event_ref(event);
+ else {
+ r = sd_event_default(&e);
+ if (r < 0)
+ return r;
+ }
+
+ r = curl_glue_new(&g, e);
+ if (r < 0)
+ return r;
+
+ i = new(TarPull, 1);
+ if (!i)
+ return -ENOMEM;
+
+ *i = (TarPull) {
+ .on_finished = on_finished,
+ .userdata = userdata,
+ .image_root = TAKE_PTR(root),
+ .event = TAKE_PTR(e),
+ .glue = TAKE_PTR(g),
+ };
+
+ i->glue->on_finished = pull_job_curl_on_finished;
+ i->glue->userdata = i;
+
+ *ret = TAKE_PTR(i);
+
+ return 0;
+}
+
+static void tar_pull_report_progress(TarPull *i, TarProgress p) {
+ unsigned percent;
+
+ assert(i);
+
+ switch (p) {
+
+ case TAR_DOWNLOADING: {
+ unsigned remain = 85;
+
+ percent = 0;
+
+ if (i->settings_job) {
+ percent += i->settings_job->progress_percent * 5 / 100;
+ remain -= 5;
+ }
+
+ if (i->checksum_job) {
+ percent += i->checksum_job->progress_percent * 5 / 100;
+ remain -= 5;
+ }
+
+ if (i->signature_job) {
+ percent += i->signature_job->progress_percent * 5 / 100;
+ remain -= 5;
+ }
+
+ if (i->tar_job)
+ percent += i->tar_job->progress_percent * remain / 100;
+ break;
+ }
+
+ case TAR_VERIFYING:
+ percent = 85;
+ break;
+
+ case TAR_FINALIZING:
+ percent = 90;
+ break;
+
+ case TAR_COPYING:
+ percent = 95;
+ break;
+
+ default:
+ assert_not_reached("Unknown progress state");
+ }
+
+ sd_notifyf(false, "X_IMPORT_PROGRESS=%u", percent);
+ log_debug("Combined progress %u%%", percent);
+}
+
+static int tar_pull_determine_path(TarPull *i, const char *suffix, char **field) {
+ int r;
+
+ assert(i);
+ assert(field);
+
+ if (*field)
+ return 0;
+
+ assert(i->tar_job);
+
+ r = pull_make_path(i->tar_job->url, i->tar_job->etag, i->image_root, ".tar-", suffix, field);
+ if (r < 0)
+ return log_oom();
+
+ return 1;
+}
+
+static int tar_pull_make_local_copy(TarPull *i) {
+ int r;
+
+ assert(i);
+ assert(i->tar_job);
+
+ if (!i->local)
+ return 0;
+
+ r = pull_make_local_copy(i->final_path, i->image_root, i->local, i->force_local);
+ if (r < 0)
+ return r;
+
+ if (i->settings) {
+ const char *local_settings;
+ assert(i->settings_job);
+
+ r = tar_pull_determine_path(i, ".nspawn", &i->settings_path);
+ if (r < 0)
+ return r;
+
+ local_settings = strjoina(i->image_root, "/", i->local, ".nspawn");
+
+ r = copy_file_atomic(i->settings_path, local_settings, 0664, 0, 0, COPY_REFLINK | (i->force_local ? COPY_REPLACE : 0));
+ if (r == -EEXIST)
+ log_warning_errno(r, "Settings file %s already exists, not replacing.", local_settings);
+ else if (r == -ENOENT)
+ log_debug_errno(r, "Skipping creation of settings file, since none was found.");
+ else if (r < 0)
+ log_warning_errno(r, "Failed to copy settings files %s, ignoring: %m", local_settings);
+ else
+ log_info("Created new settings file %s.", local_settings);
+ }
+
+ return 0;
+}
+
+static bool tar_pull_is_done(TarPull *i) {
+ assert(i);
+ assert(i->tar_job);
+
+ if (!PULL_JOB_IS_COMPLETE(i->tar_job))
+ return false;
+ if (i->settings_job && !PULL_JOB_IS_COMPLETE(i->settings_job))
+ return false;
+ if (i->checksum_job && !PULL_JOB_IS_COMPLETE(i->checksum_job))
+ return false;
+ if (i->signature_job && !PULL_JOB_IS_COMPLETE(i->signature_job))
+ return false;
+
+ return true;
+}
+
+static void tar_pull_job_on_finished(PullJob *j) {
+ TarPull *i;
+ int r;
+
+ assert(j);
+ assert(j->userdata);
+
+ i = j->userdata;
+
+ if (j == i->settings_job) {
+ if (j->error != 0)
+ log_info_errno(j->error, "Settings file could not be retrieved, proceeding without.");
+ } else if (j->error != 0 && j != i->signature_job) {
+ if (j == i->checksum_job)
+ log_error_errno(j->error, "Failed to retrieve SHA256 checksum, cannot verify. (Try --verify=no?)");
+ else
+ log_error_errno(j->error, "Failed to retrieve image file. (Wrong URL?)");
+
+ r = j->error;
+ goto finish;
+ }
+
+ /* This is invoked if either the download completed successfully, or the download was skipped because
+ * we already have the etag. */
+
+ if (!tar_pull_is_done(i))
+ return;
+
+ if (i->signature_job && i->checksum_job->style == VERIFICATION_PER_DIRECTORY && i->signature_job->error != 0) {
+ log_error_errno(j->error, "Failed to retrieve signature file, cannot verify. (Try --verify=no?)");
+
+ r = i->signature_job->error;
+ goto finish;
+ }
+
+ i->tar_job->disk_fd = safe_close(i->tar_job->disk_fd);
+ if (i->settings_job)
+ i->settings_job->disk_fd = safe_close(i->settings_job->disk_fd);
+
+ r = tar_pull_determine_path(i, NULL, &i->final_path);
+ if (r < 0)
+ goto finish;
+
+ if (i->tar_pid > 0) {
+ r = wait_for_terminate_and_check("tar", i->tar_pid, WAIT_LOG);
+ i->tar_pid = 0;
+ if (r < 0)
+ goto finish;
+ if (r != EXIT_SUCCESS) {
+ r = -EIO;
+ goto finish;
+ }
+ }
+
+ if (!i->tar_job->etag_exists) {
+ /* This is a new download, verify it, and move it into place */
+
+ tar_pull_report_progress(i, TAR_VERIFYING);
+
+ r = pull_verify(i->tar_job, NULL, i->settings_job, i->checksum_job, i->signature_job);
+ if (r < 0)
+ goto finish;
+
+ tar_pull_report_progress(i, TAR_FINALIZING);
+
+ r = import_mangle_os_tree(i->temp_path);
+ if (r < 0)
+ goto finish;
+
+ r = import_make_read_only(i->temp_path);
+ if (r < 0)
+ goto finish;
+
+ r = rename_noreplace(AT_FDCWD, i->temp_path, AT_FDCWD, i->final_path);
+ if (r < 0) {
+ log_error_errno(r, "Failed to rename to final image name to %s: %m", i->final_path);
+ goto finish;
+ }
+
+ i->temp_path = mfree(i->temp_path);
+
+ if (i->settings_job &&
+ i->settings_job->error == 0) {
+
+ /* Also move the settings file into place, if it exists. Note that we do so only if we also
+ * moved the tar file in place, to keep things strictly in sync. */
+ assert(i->settings_temp_path);
+
+ /* Regenerate final name for this auxiliary file, we might know the etag of the file now, and
+ * we should incorporate it in the file name if we can */
+ i->settings_path = mfree(i->settings_path);
+
+ r = tar_pull_determine_path(i, ".nspawn", &i->settings_path);
+ if (r < 0)
+ goto finish;
+
+ r = import_make_read_only(i->settings_temp_path);
+ if (r < 0)
+ goto finish;
+
+ r = rename_noreplace(AT_FDCWD, i->settings_temp_path, AT_FDCWD, i->settings_path);
+ if (r < 0) {
+ log_error_errno(r, "Failed to rename settings file to %s: %m", i->settings_path);
+ goto finish;
+ }
+
+ i->settings_temp_path = mfree(i->settings_temp_path);
+ }
+ }
+
+ tar_pull_report_progress(i, TAR_COPYING);
+
+ r = tar_pull_make_local_copy(i);
+ if (r < 0)
+ goto finish;
+
+ r = 0;
+
+finish:
+ if (i->on_finished)
+ i->on_finished(i, r, i->userdata);
+ else
+ sd_event_exit(i->event, r);
+}
+
+static int tar_pull_job_on_open_disk_tar(PullJob *j) {
+ TarPull *i;
+ int r;
+
+ assert(j);
+ assert(j->userdata);
+
+ i = j->userdata;
+ assert(i->tar_job == j);
+ assert(i->tar_pid <= 0);
+
+ if (!i->temp_path) {
+ r = tempfn_random_child(i->image_root, "tar", &i->temp_path);
+ if (r < 0)
+ return log_oom();
+ }
+
+ mkdir_parents_label(i->temp_path, 0700);
+
+ r = btrfs_subvol_make_fallback(i->temp_path, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create directory/subvolume %s: %m", i->temp_path);
+ if (r > 0) /* actually btrfs subvol */
+ (void) import_assign_pool_quota_and_warn(i->temp_path);
+
+ j->disk_fd = import_fork_tar_x(i->temp_path, &i->tar_pid);
+ if (j->disk_fd < 0)
+ return j->disk_fd;
+
+ return 0;
+}
+
+static int tar_pull_job_on_open_disk_settings(PullJob *j) {
+ TarPull *i;
+ int r;
+
+ assert(j);
+ assert(j->userdata);
+
+ i = j->userdata;
+ assert(i->settings_job == j);
+
+ if (!i->settings_temp_path) {
+ r = tempfn_random_child(i->image_root, "settings", &i->settings_temp_path);
+ if (r < 0)
+ return log_oom();
+ }
+
+ mkdir_parents_label(i->settings_temp_path, 0700);
+
+ j->disk_fd = open(i->settings_temp_path, O_RDWR|O_CREAT|O_EXCL|O_NOCTTY|O_CLOEXEC, 0664);
+ if (j->disk_fd < 0)
+ return log_error_errno(errno, "Failed to create %s: %m", i->settings_temp_path);
+
+ return 0;
+}
+
+static void tar_pull_job_on_progress(PullJob *j) {
+ TarPull *i;
+
+ assert(j);
+ assert(j->userdata);
+
+ i = j->userdata;
+
+ tar_pull_report_progress(i, TAR_DOWNLOADING);
+}
+
+int tar_pull_start(
+ TarPull *i,
+ const char *url,
+ const char *local,
+ bool force_local,
+ ImportVerify verify,
+ bool settings) {
+
+ int r;
+
+ assert(i);
+ assert(verify < _IMPORT_VERIFY_MAX);
+ assert(verify >= 0);
+
+ if (!http_url_is_valid(url))
+ return -EINVAL;
+
+ if (local && !machine_name_is_valid(local))
+ return -EINVAL;
+
+ if (i->tar_job)
+ return -EBUSY;
+
+ r = free_and_strdup(&i->local, local);
+ if (r < 0)
+ return r;
+
+ i->force_local = force_local;
+ i->verify = verify;
+ i->settings = settings;
+
+ /* Set up download job for TAR file */
+ r = pull_job_new(&i->tar_job, url, i->glue, i);
+ if (r < 0)
+ return r;
+
+ i->tar_job->on_finished = tar_pull_job_on_finished;
+ i->tar_job->on_open_disk = tar_pull_job_on_open_disk_tar;
+ i->tar_job->on_progress = tar_pull_job_on_progress;
+ i->tar_job->calc_checksum = verify != IMPORT_VERIFY_NO;
+
+ r = pull_find_old_etags(url, i->image_root, DT_DIR, ".tar-", NULL, &i->tar_job->old_etags);
+ if (r < 0)
+ return r;
+
+ /* Set up download job for the settings file (.nspawn) */
+ if (settings) {
+ r = pull_make_auxiliary_job(&i->settings_job, url, tar_strip_suffixes, ".nspawn", i->glue, tar_pull_job_on_finished, i);
+ if (r < 0)
+ return r;
+
+ i->settings_job->on_open_disk = tar_pull_job_on_open_disk_settings;
+ i->settings_job->on_progress = tar_pull_job_on_progress;
+ i->settings_job->calc_checksum = verify != IMPORT_VERIFY_NO;
+ }
+
+ /* Set up download of checksum/signature files */
+ r = pull_make_verification_jobs(&i->checksum_job, &i->signature_job, verify, url, i->glue, tar_pull_job_on_finished, i);
+ if (r < 0)
+ return r;
+
+ r = pull_job_begin(i->tar_job);
+ if (r < 0)
+ return r;
+
+ if (i->settings_job) {
+ r = pull_job_begin(i->settings_job);
+ if (r < 0)
+ return r;
+ }
+
+ if (i->checksum_job) {
+ i->checksum_job->on_progress = tar_pull_job_on_progress;
+ i->checksum_job->style = VERIFICATION_PER_FILE;
+
+ r = pull_job_begin(i->checksum_job);
+ if (r < 0)
+ return r;
+ }
+
+ if (i->signature_job) {
+ i->signature_job->on_progress = tar_pull_job_on_progress;
+
+ r = pull_job_begin(i->signature_job);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/import/pull-tar.h b/src/import/pull-tar.h
new file mode 100644
index 0000000..78d982c
--- /dev/null
+++ b/src/import/pull-tar.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-event.h"
+
+#include "import-util.h"
+#include "macro.h"
+
+typedef struct TarPull TarPull;
+
+typedef void (*TarPullFinished)(TarPull *pull, int error, void *userdata);
+
+int tar_pull_new(TarPull **pull, sd_event *event, const char *image_root, TarPullFinished on_finished, void *userdata);
+TarPull* tar_pull_unref(TarPull *pull);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(TarPull*, tar_pull_unref);
+
+int tar_pull_start(TarPull *pull, const char *url, const char *local, bool force_local, ImportVerify verify, bool settings);
diff --git a/src/import/pull.c b/src/import/pull.c
new file mode 100644
index 0000000..9aff377
--- /dev/null
+++ b/src/import/pull.c
@@ -0,0 +1,332 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <locale.h>
+
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "hostname-util.h"
+#include "import-util.h"
+#include "machine-image.h"
+#include "main-func.h"
+#include "parse-util.h"
+#include "pull-raw.h"
+#include "pull-tar.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "verbs.h"
+#include "web-util.h"
+
+static bool arg_force = false;
+static const char *arg_image_root = "/var/lib/machines";
+static ImportVerify arg_verify = IMPORT_VERIFY_SIGNATURE;
+static bool arg_settings = true;
+static bool arg_roothash = true;
+
+static int interrupt_signal_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ log_notice("Transfer aborted.");
+ sd_event_exit(sd_event_source_get_event(s), EINTR);
+ return 0;
+}
+
+static void on_tar_finished(TarPull *pull, int error, void *userdata) {
+ sd_event *event = userdata;
+ assert(pull);
+
+ if (error == 0)
+ log_info("Operation completed successfully.");
+
+ sd_event_exit(event, abs(error));
+}
+
+static int pull_tar(int argc, char *argv[], void *userdata) {
+ _cleanup_(tar_pull_unrefp) TarPull *pull = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ const char *url, *local;
+ _cleanup_free_ char *l = NULL, *ll = NULL;
+ int r;
+
+ url = argv[1];
+ if (!http_url_is_valid(url))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "URL '%s' is not valid.", url);
+
+ if (argc >= 3)
+ local = argv[2];
+ else {
+ r = import_url_last_component(url, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed get final component of URL: %m");
+
+ local = l;
+ }
+
+ local = empty_or_dash_to_null(local);
+
+ if (local) {
+ r = tar_strip_suffixes(local, &ll);
+ if (r < 0)
+ return log_oom();
+
+ local = ll;
+
+ if (!machine_name_is_valid(local))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Local image name '%s' is not valid.",
+ local);
+
+ if (!arg_force) {
+ r = image_find(IMAGE_MACHINE, local, NULL);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return log_error_errno(r, "Failed to check whether image '%s' exists: %m", local);
+ } else {
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "Image '%s' already exists.",
+ local);
+ }
+ }
+
+ log_info("Pulling '%s', saving as '%s'.", url, local);
+ } else
+ log_info("Pulling '%s'.", url);
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+ (void) sd_event_add_signal(event, NULL, SIGTERM, interrupt_signal_handler, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGINT, interrupt_signal_handler, NULL);
+
+ r = tar_pull_new(&pull, event, arg_image_root, on_tar_finished, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate puller: %m");
+
+ r = tar_pull_start(pull, url, local, arg_force, arg_verify, arg_settings);
+ if (r < 0)
+ return log_error_errno(r, "Failed to pull image: %m");
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ log_info("Exiting.");
+ return -r;
+}
+
+static void on_raw_finished(RawPull *pull, int error, void *userdata) {
+ sd_event *event = userdata;
+ assert(pull);
+
+ if (error == 0)
+ log_info("Operation completed successfully.");
+
+ sd_event_exit(event, abs(error));
+}
+
+static int pull_raw(int argc, char *argv[], void *userdata) {
+ _cleanup_(raw_pull_unrefp) RawPull *pull = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ const char *url, *local;
+ _cleanup_free_ char *l = NULL, *ll = NULL;
+ int r;
+
+ url = argv[1];
+ if (!http_url_is_valid(url))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "URL '%s' is not valid.", url);
+
+ if (argc >= 3)
+ local = argv[2];
+ else {
+ r = import_url_last_component(url, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed get final component of URL: %m");
+
+ local = l;
+ }
+
+ local = empty_or_dash_to_null(local);
+
+ if (local) {
+ r = raw_strip_suffixes(local, &ll);
+ if (r < 0)
+ return log_oom();
+
+ local = ll;
+
+ if (!machine_name_is_valid(local))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Local image name '%s' is not valid.",
+ local);
+
+ if (!arg_force) {
+ r = image_find(IMAGE_MACHINE, local, NULL);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return log_error_errno(r, "Failed to check whether image '%s' exists: %m", local);
+ } else {
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "Image '%s' already exists.",
+ local);
+ }
+ }
+
+ log_info("Pulling '%s', saving as '%s'.", url, local);
+ } else
+ log_info("Pulling '%s'.", url);
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+ (void) sd_event_add_signal(event, NULL, SIGTERM, interrupt_signal_handler, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGINT, interrupt_signal_handler, NULL);
+
+ r = raw_pull_new(&pull, event, arg_image_root, on_raw_finished, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate puller: %m");
+
+ r = raw_pull_start(pull, url, local, arg_force, arg_verify, arg_settings, arg_roothash);
+ if (r < 0)
+ return log_error_errno(r, "Failed to pull image: %m");
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ log_info("Exiting.");
+ return -r;
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+
+ printf("%s [OPTIONS...] {COMMAND} ...\n\n"
+ "Download container or virtual machine images.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --force Force creation of image\n"
+ " --verify=MODE Verify downloaded image, one of: 'no',\n"
+ " 'checksum', 'signature'\n"
+ " --settings=BOOL Download settings file with image\n"
+ " --roothash=BOOL Download root hash file with image\n"
+ " --image-root=PATH Image root directory\n\n"
+ "Commands:\n"
+ " tar URL [NAME] Download a TAR image\n"
+ " raw URL [NAME] Download a RAW image\n",
+ program_invocation_short_name);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_FORCE,
+ ARG_IMAGE_ROOT,
+ ARG_VERIFY,
+ ARG_SETTINGS,
+ ARG_ROOTHASH,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "force", no_argument, NULL, ARG_FORCE },
+ { "image-root", required_argument, NULL, ARG_IMAGE_ROOT },
+ { "verify", required_argument, NULL, ARG_VERIFY },
+ { "settings", required_argument, NULL, ARG_SETTINGS },
+ { "roothash", required_argument, NULL, ARG_ROOTHASH },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help(0, NULL, NULL);
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_FORCE:
+ arg_force = true;
+ break;
+
+ case ARG_IMAGE_ROOT:
+ arg_image_root = optarg;
+ break;
+
+ case ARG_VERIFY:
+ arg_verify = import_verify_from_string(optarg);
+ if (arg_verify < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid verification setting '%s'", optarg);
+
+ break;
+
+ case ARG_SETTINGS:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --settings= parameter '%s': %m", optarg);
+
+ arg_settings = r;
+ break;
+
+ case ARG_ROOTHASH:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --roothash= parameter '%s': %m", optarg);
+
+ arg_roothash = r;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int pull_main(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "tar", 2, 3, 0, pull_tar },
+ { "raw", 2, 3, 0, pull_raw },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ (void) ignore_signals(SIGPIPE, -1);
+
+ return pull_main(argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/import/qcow2-util.c b/src/import/qcow2-util.c
new file mode 100644
index 0000000..5a7232d
--- /dev/null
+++ b/src/import/qcow2-util.c
@@ -0,0 +1,334 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <zlib.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "qcow2-util.h"
+#include "sparse-endian.h"
+#include "util.h"
+
+#define QCOW2_MAGIC 0x514649fb
+
+#define QCOW2_COPIED (1ULL << 63)
+#define QCOW2_COMPRESSED (1ULL << 62)
+#define QCOW2_ZERO (1ULL << 0)
+
+typedef struct _packed_ Header {
+ be32_t magic;
+ be32_t version;
+
+ be64_t backing_file_offset;
+ be32_t backing_file_size;
+
+ be32_t cluster_bits;
+ be64_t size;
+ be32_t crypt_method;
+
+ be32_t l1_size;
+ be64_t l1_table_offset;
+
+ be64_t refcount_table_offset;
+ be32_t refcount_table_clusters;
+
+ be32_t nb_snapshots;
+ be64_t snapshots_offset;
+
+ /* The remainder is only present on QCOW3 */
+ be64_t incompatible_features;
+ be64_t compatible_features;
+ be64_t autoclear_features;
+
+ be32_t refcount_order;
+ be32_t header_length;
+} Header;
+
+#define HEADER_MAGIC(header) be32toh((header)->magic)
+#define HEADER_VERSION(header) be32toh((header)->version)
+#define HEADER_CLUSTER_BITS(header) be32toh((header)->cluster_bits)
+#define HEADER_CLUSTER_SIZE(header) (1ULL << HEADER_CLUSTER_BITS(header))
+#define HEADER_L2_BITS(header) (HEADER_CLUSTER_BITS(header) - 3)
+#define HEADER_SIZE(header) be64toh((header)->size)
+#define HEADER_CRYPT_METHOD(header) be32toh((header)->crypt_method)
+#define HEADER_L1_SIZE(header) be32toh((header)->l1_size)
+#define HEADER_L2_SIZE(header) (HEADER_CLUSTER_SIZE(header)/sizeof(uint64_t))
+#define HEADER_L1_TABLE_OFFSET(header) be64toh((header)->l1_table_offset)
+
+static uint32_t HEADER_HEADER_LENGTH(const Header *h) {
+ if (HEADER_VERSION(h) < 3)
+ return offsetof(Header, incompatible_features);
+
+ return be32toh(h->header_length);
+}
+
+static int copy_cluster(
+ int sfd, uint64_t soffset,
+ int dfd, uint64_t doffset,
+ uint64_t cluster_size,
+ void *buffer) {
+
+ ssize_t l;
+ int r;
+
+ r = btrfs_clone_range(sfd, soffset, dfd, doffset, cluster_size);
+ if (r >= 0)
+ return r;
+
+ l = pread(sfd, buffer, cluster_size, soffset);
+ if (l < 0)
+ return -errno;
+ if ((uint64_t) l != cluster_size)
+ return -EIO;
+
+ l = pwrite(dfd, buffer, cluster_size, doffset);
+ if (l < 0)
+ return -errno;
+ if ((uint64_t) l != cluster_size)
+ return -EIO;
+
+ return 0;
+}
+
+static int decompress_cluster(
+ int sfd, uint64_t soffset,
+ int dfd, uint64_t doffset,
+ uint64_t compressed_size,
+ uint64_t cluster_size,
+ void *buffer1,
+ void *buffer2) {
+
+ _cleanup_free_ void *large_buffer = NULL;
+ z_stream s = {};
+ uint64_t sz;
+ ssize_t l;
+ int r;
+
+ if (compressed_size > cluster_size) {
+ /* The usual cluster buffer doesn't suffice, let's
+ * allocate a larger one, temporarily */
+
+ large_buffer = malloc(compressed_size);
+ if (!large_buffer)
+ return -ENOMEM;
+
+ buffer1 = large_buffer;
+ }
+
+ l = pread(sfd, buffer1, compressed_size, soffset);
+ if (l < 0)
+ return -errno;
+ if ((uint64_t) l != compressed_size)
+ return -EIO;
+
+ s.next_in = buffer1;
+ s.avail_in = compressed_size;
+ s.next_out = buffer2;
+ s.avail_out = cluster_size;
+
+ r = inflateInit2(&s, -12);
+ if (r != Z_OK)
+ return -EIO;
+
+ r = inflate(&s, Z_FINISH);
+ sz = (uint8_t*) s.next_out - (uint8_t*) buffer2;
+ inflateEnd(&s);
+ if (r != Z_STREAM_END || sz != cluster_size)
+ return -EIO;
+
+ l = pwrite(dfd, buffer2, cluster_size, doffset);
+ if (l < 0)
+ return -errno;
+ if ((uint64_t) l != cluster_size)
+ return -EIO;
+
+ return 0;
+}
+
+static int normalize_offset(
+ const Header *header,
+ uint64_t p,
+ uint64_t *ret,
+ bool *compressed,
+ uint64_t *compressed_size) {
+
+ uint64_t q;
+
+ q = be64toh(p);
+
+ if (q & QCOW2_COMPRESSED) {
+ uint64_t sz, csize_shift, csize_mask;
+
+ if (!compressed)
+ return -EOPNOTSUPP;
+
+ csize_shift = 64 - 2 - (HEADER_CLUSTER_BITS(header) - 8);
+ csize_mask = (1ULL << (HEADER_CLUSTER_BITS(header) - 8)) - 1;
+ sz = (((q >> csize_shift) & csize_mask) + 1) * 512 - (q & 511);
+ q &= ((1ULL << csize_shift) - 1);
+
+ if (compressed_size)
+ *compressed_size = sz;
+
+ *compressed = true;
+
+ } else {
+ if (compressed) {
+ *compressed = false;
+ *compressed_size = 0;
+ }
+
+ if (q & QCOW2_ZERO) {
+ /* We make no distinction between zero blocks and holes */
+ *ret = 0;
+ return 0;
+ }
+
+ q &= ~QCOW2_COPIED;
+ }
+
+ *ret = q;
+ return q > 0; /* returns positive if not a hole */
+}
+
+static int verify_header(const Header *header) {
+ assert(header);
+
+ if (HEADER_MAGIC(header) != QCOW2_MAGIC)
+ return -EBADMSG;
+
+ if (!IN_SET(HEADER_VERSION(header), 2, 3))
+ return -EOPNOTSUPP;
+
+ if (HEADER_CRYPT_METHOD(header) != 0)
+ return -EOPNOTSUPP;
+
+ if (HEADER_CLUSTER_BITS(header) < 9) /* 512K */
+ return -EBADMSG;
+
+ if (HEADER_CLUSTER_BITS(header) > 21) /* 2MB */
+ return -EBADMSG;
+
+ if (HEADER_SIZE(header) % HEADER_CLUSTER_SIZE(header) != 0)
+ return -EBADMSG;
+
+ if (HEADER_L1_SIZE(header) > 32*1024*1024) /* 32MB */
+ return -EBADMSG;
+
+ if (HEADER_VERSION(header) == 3) {
+
+ if (header->incompatible_features != 0)
+ return -EOPNOTSUPP;
+
+ if (HEADER_HEADER_LENGTH(header) < sizeof(Header))
+ return -EBADMSG;
+ }
+
+ return 0;
+}
+
+int qcow2_convert(int qcow2_fd, int raw_fd) {
+ _cleanup_free_ void *buffer1 = NULL, *buffer2 = NULL;
+ _cleanup_free_ be64_t *l1_table = NULL, *l2_table = NULL;
+ uint64_t sz, i;
+ Header header;
+ ssize_t l;
+ int r;
+
+ l = pread(qcow2_fd, &header, sizeof(header), 0);
+ if (l < 0)
+ return -errno;
+ if (l != sizeof(header))
+ return -EIO;
+
+ r = verify_header(&header);
+ if (r < 0)
+ return r;
+
+ l1_table = new(be64_t, HEADER_L1_SIZE(&header));
+ if (!l1_table)
+ return -ENOMEM;
+
+ l2_table = malloc(HEADER_CLUSTER_SIZE(&header));
+ if (!l2_table)
+ return -ENOMEM;
+
+ buffer1 = malloc(HEADER_CLUSTER_SIZE(&header));
+ if (!buffer1)
+ return -ENOMEM;
+
+ buffer2 = malloc(HEADER_CLUSTER_SIZE(&header));
+ if (!buffer2)
+ return -ENOMEM;
+
+ /* Empty the file if it exists, we rely on zero bits */
+ if (ftruncate(raw_fd, 0) < 0)
+ return -errno;
+
+ if (ftruncate(raw_fd, HEADER_SIZE(&header)) < 0)
+ return -errno;
+
+ sz = sizeof(uint64_t) * HEADER_L1_SIZE(&header);
+ l = pread(qcow2_fd, l1_table, sz, HEADER_L1_TABLE_OFFSET(&header));
+ if (l < 0)
+ return -errno;
+ if ((uint64_t) l != sz)
+ return -EIO;
+
+ for (i = 0; i < HEADER_L1_SIZE(&header); i ++) {
+ uint64_t l2_begin, j;
+
+ r = normalize_offset(&header, l1_table[i], &l2_begin, NULL, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ l = pread(qcow2_fd, l2_table, HEADER_CLUSTER_SIZE(&header), l2_begin);
+ if (l < 0)
+ return -errno;
+ if ((uint64_t) l != HEADER_CLUSTER_SIZE(&header))
+ return -EIO;
+
+ for (j = 0; j < HEADER_L2_SIZE(&header); j++) {
+ uint64_t data_begin, p, compressed_size;
+ bool compressed;
+
+ p = ((i << HEADER_L2_BITS(&header)) + j) << HEADER_CLUSTER_BITS(&header);
+
+ r = normalize_offset(&header, l2_table[j], &data_begin, &compressed, &compressed_size);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (compressed)
+ r = decompress_cluster(
+ qcow2_fd, data_begin,
+ raw_fd, p,
+ compressed_size, HEADER_CLUSTER_SIZE(&header),
+ buffer1, buffer2);
+ else
+ r = copy_cluster(
+ qcow2_fd, data_begin,
+ raw_fd, p,
+ HEADER_CLUSTER_SIZE(&header), buffer1);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+int qcow2_detect(int fd) {
+ be32_t id;
+ ssize_t l;
+
+ l = pread(fd, &id, sizeof(id), 0);
+ if (l < 0)
+ return -errno;
+ if (l != sizeof(id))
+ return -EIO;
+
+ return htobe32(QCOW2_MAGIC) == id;
+}
diff --git a/src/import/qcow2-util.h b/src/import/qcow2-util.h
new file mode 100644
index 0000000..f17c159
--- /dev/null
+++ b/src/import/qcow2-util.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int qcow2_detect(int fd);
+int qcow2_convert(int qcow2_fd, int raw_fd);
diff --git a/src/import/test-qcow2.c b/src/import/test-qcow2.c
new file mode 100644
index 0000000..77fed01
--- /dev/null
+++ b/src/import/test-qcow2.c
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "fd-util.h"
+#include "log.h"
+#include "qcow2-util.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_close_ int sfd = -1, dfd = -1;
+ int r;
+
+ if (argc != 3) {
+ log_error("Needs two arguments.");
+ return EXIT_FAILURE;
+ }
+
+ sfd = open(argv[1], O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (sfd < 0) {
+ log_error_errno(errno, "Can't open source file: %m");
+ return EXIT_FAILURE;
+ }
+
+ dfd = open(argv[2], O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY, 0666);
+ if (dfd < 0) {
+ log_error_errno(errno, "Can't open destination file: %m");
+ return EXIT_FAILURE;
+ }
+
+ r = qcow2_convert(sfd, dfd);
+ if (r < 0) {
+ log_error_errno(r, "Failed to unpack: %m");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/initctl/initctl.c b/src/initctl/initctl.c
new file mode 100644
index 0000000..e0b7833
--- /dev/null
+++ b/src/initctl/initctl.c
@@ -0,0 +1,360 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <sys/epoll.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "daemon-util.h"
+#include "def.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "initreq.h"
+#include "list.h"
+#include "log.h"
+#include "main-func.h"
+#include "memory-util.h"
+#include "process-util.h"
+#include "special.h"
+
+#define SERVER_FD_MAX 16
+#define TIMEOUT_MSEC ((int) (DEFAULT_EXIT_USEC/USEC_PER_MSEC))
+
+typedef struct Fifo Fifo;
+
+typedef struct Server {
+ int epoll_fd;
+
+ LIST_HEAD(Fifo, fifos);
+ unsigned n_fifos;
+
+ sd_bus *bus;
+
+ bool quit;
+} Server;
+
+struct Fifo {
+ Server *server;
+
+ int fd;
+
+ struct init_request buffer;
+ size_t bytes_read;
+
+ LIST_FIELDS(Fifo, fifo);
+};
+
+static const char *translate_runlevel(int runlevel, bool *isolate) {
+ static const struct {
+ const int runlevel;
+ const char *special;
+ bool isolate;
+ } table[] = {
+ { '0', SPECIAL_POWEROFF_TARGET, false },
+ { '1', SPECIAL_RESCUE_TARGET, true },
+ { 's', SPECIAL_RESCUE_TARGET, true },
+ { 'S', SPECIAL_RESCUE_TARGET, true },
+ { '2', SPECIAL_MULTI_USER_TARGET, true },
+ { '3', SPECIAL_MULTI_USER_TARGET, true },
+ { '4', SPECIAL_MULTI_USER_TARGET, true },
+ { '5', SPECIAL_GRAPHICAL_TARGET, true },
+ { '6', SPECIAL_REBOOT_TARGET, false },
+ };
+
+ assert(isolate);
+
+ for (size_t i = 0; i < ELEMENTSOF(table); i++)
+ if (table[i].runlevel == runlevel) {
+ *isolate = table[i].isolate;
+ if (runlevel == '6' && kexec_loaded())
+ return SPECIAL_KEXEC_TARGET;
+ return table[i].special;
+ }
+
+ return NULL;
+}
+
+static int change_runlevel(Server *s, int runlevel) {
+ const char *target;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *mode;
+ bool isolate = false;
+ int r;
+
+ assert(s);
+
+ target = translate_runlevel(runlevel, &isolate);
+ if (!target) {
+ log_warning("Got request for unknown runlevel %c, ignoring.", runlevel);
+ return 0;
+ }
+
+ if (isolate)
+ mode = "isolate";
+ else
+ mode = "replace-irreversibly";
+
+ log_debug("Running request %s/start/%s", target, mode);
+
+ r = sd_bus_call_method(
+ s->bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartUnit",
+ &error,
+ NULL,
+ "ss", target, mode);
+ if (r < 0)
+ return log_error_errno(r, "Failed to change runlevel: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static void request_process(Server *s, const struct init_request *req) {
+ assert(s);
+ assert(req);
+
+ if (req->magic != INIT_MAGIC) {
+ log_error("Got initctl request with invalid magic. Ignoring.");
+ return;
+ }
+
+ switch (req->cmd) {
+
+ case INIT_CMD_RUNLVL:
+ if (!isprint(req->runlevel))
+ log_error("Got invalid runlevel. Ignoring.");
+ else
+ switch (req->runlevel) {
+
+ /* we are async anyway, so just use kill for reexec/reload */
+ case 'u':
+ case 'U':
+ if (kill(1, SIGTERM) < 0)
+ log_error_errno(errno, "kill() failed: %m");
+
+ /* The bus connection will be
+ * terminated if PID 1 is reexecuted,
+ * hence let's just exit here, and
+ * rely on that we'll be restarted on
+ * the next request */
+ s->quit = true;
+ break;
+
+ case 'q':
+ case 'Q':
+ if (kill(1, SIGHUP) < 0)
+ log_error_errno(errno, "kill() failed: %m");
+ break;
+
+ default:
+ (void) change_runlevel(s, req->runlevel);
+ }
+ return;
+
+ case INIT_CMD_POWERFAIL:
+ case INIT_CMD_POWERFAILNOW:
+ case INIT_CMD_POWEROK:
+ log_warning("Received UPS/power initctl request. This is not implemented in systemd. Upgrade your UPS daemon!");
+ return;
+
+ case INIT_CMD_CHANGECONS:
+ log_warning("Received console change initctl request. This is not implemented in systemd.");
+ return;
+
+ case INIT_CMD_SETENV:
+ case INIT_CMD_UNSETENV:
+ log_warning("Received environment initctl request. This is not implemented in systemd.");
+ return;
+
+ default:
+ log_warning("Received unknown initctl request. Ignoring.");
+ return;
+ }
+}
+
+static int fifo_process(Fifo *f) {
+ ssize_t l;
+
+ assert(f);
+
+ errno = EIO;
+ l = read(f->fd,
+ ((uint8_t*) &f->buffer) + f->bytes_read,
+ sizeof(f->buffer) - f->bytes_read);
+ if (l <= 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to read from fifo: %m");
+ }
+
+ f->bytes_read += l;
+ assert(f->bytes_read <= sizeof(f->buffer));
+
+ if (f->bytes_read == sizeof(f->buffer)) {
+ request_process(f->server, &f->buffer);
+ f->bytes_read = 0;
+ }
+
+ return 0;
+}
+
+static void fifo_free(Fifo *f) {
+ assert(f);
+
+ if (f->server) {
+ assert(f->server->n_fifos > 0);
+ f->server->n_fifos--;
+ LIST_REMOVE(fifo, f->server->fifos, f);
+ }
+
+ if (f->fd >= 0) {
+ if (f->server)
+ (void) epoll_ctl(f->server->epoll_fd, EPOLL_CTL_DEL, f->fd, NULL);
+
+ safe_close(f->fd);
+ }
+
+ free(f);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(Fifo*, fifo_free);
+
+static void server_done(Server *s) {
+ assert(s);
+
+ while (s->fifos)
+ fifo_free(s->fifos);
+
+ s->epoll_fd = safe_close(s->epoll_fd);
+ s->bus = sd_bus_flush_close_unref(s->bus);
+}
+
+static int server_init(Server *s, unsigned n_sockets) {
+ int r;
+
+ /* This function will leave s partially initialized on failure. Caller needs to clean up. */
+
+ assert(s);
+ assert(n_sockets > 0);
+
+ s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (s->epoll_fd < 0)
+ return log_error_errno(errno, "Failed to create epoll object: %m");
+
+ for (unsigned i = 0; i < n_sockets; i++) {
+ _cleanup_(fifo_freep) Fifo *f = NULL;
+ int fd = SD_LISTEN_FDS_START + i;
+
+ r = sd_is_fifo(fd, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine file descriptor type: %m");
+ if (!r)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Wrong file descriptor type.");
+
+ f = new0(Fifo, 1);
+ if (!f)
+ return log_oom();
+
+ struct epoll_event ev = {
+ .events = EPOLLIN,
+ .data.ptr = f,
+ };
+
+ if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0)
+ return log_error_errno(errno, "Failed to add fifo fd to epoll object: %m");
+
+ f->fd = fd;
+ f->server = s;
+ LIST_PREPEND(fifo, s->fifos, TAKE_PTR(f));
+ s->n_fifos++;
+ }
+
+ r = bus_connect_system_systemd(&s->bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get D-Bus connection: %m");
+
+ return 0;
+}
+
+static int process_event(Server *s, struct epoll_event *ev) {
+ int r;
+ Fifo *f;
+
+ assert(s);
+
+ if (!(ev->events & EPOLLIN))
+ return log_info_errno(SYNTHETIC_ERRNO(EIO),
+ "Got invalid event from epoll. (3)");
+
+ f = (Fifo*) ev->data.ptr;
+ r = fifo_process(f);
+ if (r < 0) {
+ log_info_errno(r, "Got error on fifo: %m");
+ fifo_free(f);
+ return r;
+ }
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(server_done) Server server = { .epoll_fd = -1 };
+ _cleanup_(notify_on_cleanup) const char *notify_stop = NULL;
+ int r, n;
+
+ if (argc > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program does not take arguments.");
+
+ log_setup_service();
+
+ umask(0022);
+
+ n = sd_listen_fds(true);
+ if (n < 0)
+ return log_error_errno(errno,
+ "Failed to read listening file descriptors from environment: %m");
+
+ if (n <= 0 || n > SERVER_FD_MAX)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "No or too many file descriptors passed.");
+
+ r = server_init(&server, (unsigned) n);
+ if (r < 0)
+ return r;
+
+ notify_stop = notify_start(NOTIFY_READY, NOTIFY_STOPPING);
+
+ while (!server.quit) {
+ struct epoll_event event;
+ int k;
+
+ k = epoll_wait(server.epoll_fd, &event, 1, TIMEOUT_MSEC);
+ if (k < 0) {
+ if (errno == EINTR)
+ continue;
+ return log_error_errno(errno, "epoll_wait() failed: %m");
+ }
+ if (k == 0)
+ break;
+
+ r = process_event(&server, &event);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/journal-remote/browse.html b/src/journal-remote/browse.html
new file mode 100644
index 0000000..9a5ae80
--- /dev/null
+++ b/src/journal-remote/browse.html
@@ -0,0 +1,547 @@
+<!DOCTYPE html>
+<html>
+<head>
+ <title>Journal</title>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+ <style type="text/css">
+ div#divlogs, div#diventry {
+ font-family: monospace;
+ font-size: 7pt;
+ background-color: #ffffff;
+ padding: 1em;
+ margin: 2em 0em;
+ border-radius: 10px 10px 10px 10px;
+ border: 1px solid threedshadow;
+ white-space: nowrap;
+ overflow-x: scroll;
+ }
+ div#diventry {
+ display: none;
+ }
+ div#divlogs {
+ display: block;
+ }
+ body {
+ background-color: #ededed;
+ color: #313739;
+ font: message-box;
+ margin: 3em;
+ }
+ td.timestamp {
+ text-align: right;
+ border-right: 1px dotted lightgrey;
+ padding-right: 5px;
+ }
+ td.process {
+ border-right: 1px dotted lightgrey;
+ padding-left: 5px;
+ padding-right: 5px;
+ }
+ td.message {
+ padding-left: 5px;
+ }
+ td.message > a:link, td.message > a:visited {
+ text-decoration: none;
+ color: #313739;
+ }
+ td.message-error {
+ padding-left: 5px;
+ color: red;
+ font-weight: bold;
+ }
+ td.message-error > a:link, td.message-error > a:visited {
+ text-decoration: none;
+ color: red;
+ }
+ td.message-highlight {
+ padding-left: 5px;
+ font-weight: bold;
+ }
+ td.message-highlight > a:link, td.message-highlight > a:visited {
+ text-decoration: none;
+ color: #313739;
+ }
+ td > a:hover, td > a:active {
+ text-decoration: underline;
+ color: #c13739;
+ }
+ table#tablelogs, table#tableentry {
+ border-collapse: collapse;
+ }
+ td.field {
+ text-align: right;
+ border-right: 1px dotted lightgrey;
+ padding-right: 5px;
+ }
+ td.data {
+ padding-left: 5px;
+ }
+ div#keynav {
+ text-align: center;
+ font-size: 7pt;
+ color: #818789;
+ padding-top: 2em;
+ }
+ span.key {
+ font-weight: bold;
+ color: #313739;
+ }
+ div#buttonnav {
+ text-align: center;
+ }
+ button {
+ font-size: 18pt;
+ font-weight: bold;
+ width: 2em;
+ height: 2em;
+ }
+ div#filternav {
+ text-align: center;
+ }
+ select {
+ width: 50em;
+ }
+ </style>
+</head>
+
+<body>
+ <!-- TODO:
+ - live display
+ - show red lines for reboots -->
+
+ <h1 id="title"></h1>
+
+ <div id="os"></div>
+ <div id="virtualization"></div>
+ <div id="cutoff"></div>
+ <div id="machine"></div>
+ <div id="usage"></div>
+ <div id="showing"></div>
+
+ <div id="filternav">
+ <select id="filter" onchange="onFilterChange(this);" onfocus="onFilterFocus(this);">
+ <option>No filter</option>
+ </select>
+ &nbsp;&nbsp;&nbsp;&nbsp;
+ <input id="boot" type="checkbox" onchange="onBootChange(this);">Only current boot</input>
+ </div>
+
+ <div id="divlogs"><table id="tablelogs"></table></div>
+ <a name="entry"></a>
+ <div id="diventry"><table id="tableentry"></table></div>
+
+ <div id="buttonnav">
+ <button id="head" onclick="entriesLoadHead();" title="First Page">&#8676;</button>
+ <button id="previous" type="button" onclick="entriesLoadPrevious();" title="Previous Page"/>&#8592;</button>
+ <button id="next" type="button" onclick="entriesLoadNext();" title="Next Page"/>&#8594;</button>
+ <button id="tail" type="button" onclick="entriesLoadTail();" title="Last Page"/>&#8677;</button>
+ &nbsp;&nbsp;&nbsp;&nbsp;
+ <button id="more" type="button" onclick="entriesMore();" title="More Entries"/>+</button>
+ <button id="less" type="button" onclick="entriesLess();" title="Fewer Entries"/>-</button>
+ </div>
+
+ <div id="keynav">
+ <span class="key">g</span>: First Page &nbsp;&nbsp;&nbsp;&nbsp;
+ <span class="key">&#8592;, k, BACKSPACE</span>: Previous Page &nbsp;&nbsp;&nbsp;&nbsp;
+ <span class="key">&#8594;, j, SPACE</span>: Next Page &nbsp;&nbsp;&nbsp;&nbsp;
+ <span class="key">G</span>: Last Page &nbsp;&nbsp;&nbsp;&nbsp;
+ <span class="key">+</span>: More entries &nbsp;&nbsp;&nbsp;&nbsp;
+ <span class="key">-</span>: Fewer entries
+ </div>
+
+ <script type="text/javascript">
+ var first_cursor = null;
+ var last_cursor = null;
+
+ function getNEntries() {
+ var n;
+ n = localStorage["n_entries"];
+ if (n == null)
+ return 50;
+ n = parseInt(n);
+ if (n < 10)
+ return 10;
+ if (n > 1000)
+ return 1000;
+ return n;
+ }
+
+ function showNEntries(n) {
+ var showing = document.getElementById("showing");
+ showing.innerHTML = "Showing <b>" + n.toString() + "</b> entries.";
+ }
+
+ function setNEntries(n) {
+ if (n < 10)
+ return 10;
+ if (n > 1000)
+ return 1000;
+ localStorage["n_entries"] = n.toString();
+ showNEntries(n);
+ }
+
+ function machineLoad() {
+ var request = new XMLHttpRequest();
+ request.open("GET", "machine");
+ request.onreadystatechange = machineOnResult;
+ request.setRequestHeader("Accept", "application/json");
+ request.send(null);
+ }
+
+ function formatBytes(u) {
+ if (u >= 1024*1024*1024*1024)
+ return (u/1024/1024/1024/1024).toFixed(1) + " TiB";
+ else if (u >= 1024*1024*1024)
+ return (u/1024/1024/1024).toFixed(1) + " GiB";
+ else if (u >= 1024*1024)
+ return (u/1024/1024).toFixed(1) + " MiB";
+ else if (u >= 1024)
+ return (u/1024).toFixed(1) + " KiB";
+ else
+ return u.toString() + " B";
+ }
+
+ function escapeHTML(s) {
+ return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
+ }
+
+ function machineOnResult(event) {
+ if ((event.currentTarget.readyState != 4) ||
+ (event.currentTarget.status != 200 && event.currentTarget.status != 0))
+ return;
+
+ var d = JSON.parse(event.currentTarget.responseText);
+
+ var title = document.getElementById("title");
+ title.innerHTML = 'Journal of ' + escapeHTML(d.hostname);
+ document.title = 'Journal of ' + escapeHTML(d.hostname);
+
+ var machine = document.getElementById("machine");
+ machine.innerHTML = 'Machine ID is <b>' + d.machine_id + '</b>, current boot ID is <b>' + d.boot_id + '</b>.';
+
+ var cutoff = document.getElementById("cutoff");
+ var from = new Date(parseInt(d.cutoff_from_realtime) / 1000);
+ var to = new Date(parseInt(d.cutoff_to_realtime) / 1000);
+ cutoff.innerHTML = 'Journal begins at <b>' + from.toLocaleString() + '</b> and ends at <b>' + to.toLocaleString() + '</b>.';
+
+ var usage = document.getElementById("usage");
+ usage.innerHTML = 'Disk usage is <b>' + formatBytes(parseInt(d.usage)) + '</b>.';
+
+ var os = document.getElementById("os");
+ os.innerHTML = 'Operating system is <b>' + escapeHTML(d.os_pretty_name) + '</b>.';
+
+ var virtualization = document.getElementById("virtualization");
+ virtualization.innerHTML = d.virtualization == "bare" ? "Running on <b>bare metal</b>." : "Running on virtualization <b>" + escapeHTML(d.virtualization) + "</b>.";
+ }
+
+ function entriesLoad(range) {
+
+ if (range == null) {
+ if (localStorage["cursor"] != null && localStorage["cursor"] != "")
+ range = localStorage["cursor"] + ":0";
+ else
+ range = "";
+ }
+
+ var url = "entries";
+
+ if (localStorage["filter"] != "" && localStorage["filter"] != null) {
+ url += "?_SYSTEMD_UNIT=" + escape(localStorage["filter"]);
+
+ if (localStorage["boot"] == "1")
+ url += "&boot";
+ } else {
+ if (localStorage["boot"] == "1")
+ url += "?boot";
+ }
+
+ var request = new XMLHttpRequest();
+ request.open("GET", url);
+ request.onreadystatechange = entriesOnResult;
+ request.setRequestHeader("Accept", "application/json");
+ request.setRequestHeader("Range", "entries=" + range + ":" + getNEntries().toString());
+ request.send(null);
+ }
+
+ function entriesLoadNext() {
+ if (last_cursor == null)
+ entriesLoad("");
+ else
+ entriesLoad(last_cursor + ":1");
+ }
+
+ function entriesLoadPrevious() {
+ if (first_cursor == null)
+ entriesLoad("");
+ else
+ entriesLoad(first_cursor + ":-" + getNEntries().toString());
+ }
+
+ function entriesLoadHead() {
+ entriesLoad("");
+ }
+
+ function entriesLoadTail() {
+ entriesLoad(":-" + getNEntries().toString());
+ }
+
+ function entriesOnResult(event) {
+
+ if ((event.currentTarget.readyState != 4) ||
+ (event.currentTarget.status != 200 && event.currentTarget.status != 0))
+ return;
+
+ var logs = document.getElementById("tablelogs");
+
+ var lc = null;
+ var fc = null;
+
+ var i, l = event.currentTarget.responseText.split('\n');
+
+ if (l.length <= 1) {
+ logs.innerHTML = '<tbody><tr><td colspan="3"><i>No further entries...</i></td></tr></tbody>';
+ return;
+ }
+
+ var buf = '';
+
+ for (i in l) {
+ if (l[i] == '')
+ continue;
+
+ var d = JSON.parse(l[i]);
+ if (d.MESSAGE == undefined || d.__CURSOR == undefined)
+ continue;
+
+ if (fc == null)
+ fc = d.__CURSOR;
+ lc = d.__CURSOR;
+
+ var priority;
+ if (d.PRIORITY != undefined)
+ priority = parseInt(d.PRIORITY);
+ else
+ priority = 6;
+
+ var clazz;
+ if (priority <= 3)
+ clazz = "message-error";
+ else if (priority <= 5)
+ clazz = "message-highlight";
+ else
+ clazz = "message";
+
+ buf += '<tr><td class="timestamp">';
+
+ if (d.__REALTIME_TIMESTAMP != undefined) {
+ var timestamp = new Date(parseInt(d.__REALTIME_TIMESTAMP) / 1000);
+ buf += timestamp.toLocaleString();
+ }
+
+ buf += '</td><td class="process">';
+
+ if (d.SYSLOG_IDENTIFIER != undefined)
+ buf += escapeHTML(d.SYSLOG_IDENTIFIER);
+ else if (d._COMM != undefined)
+ buf += escapeHTML(d._COMM);
+
+ if (d._PID != undefined)
+ buf += "[" + escapeHTML(d._PID) + "]";
+ else if (d.SYSLOG_PID != undefined)
+ buf += "[" + escapeHTML(d.SYSLOG_PID) + "]";
+
+ buf += '</td><td class="' + clazz + '"><a href="#entry" onclick="onMessageClick(\'' + d.__CURSOR + '\');">';
+
+ if (d.MESSAGE == null)
+ buf += "[blob data]";
+ else if (d.MESSAGE instanceof Array)
+ buf += "[" + formatBytes(d.MESSAGE.length) + " blob data]";
+ else
+ buf += escapeHTML(d.MESSAGE);
+
+ buf += '</a></td></tr>';
+ }
+
+ logs.innerHTML = '<tbody>' + buf + '</tbody>';
+
+ if (fc != null) {
+ first_cursor = fc;
+ localStorage["cursor"] = fc;
+ }
+ if (lc != null)
+ last_cursor = lc;
+ }
+
+ function entriesMore() {
+ setNEntries(getNEntries() + 10);
+ entriesLoad(first_cursor);
+ }
+
+ function entriesLess() {
+ setNEntries(getNEntries() - 10);
+ entriesLoad(first_cursor);
+ }
+
+ function onResultMessageClick(event) {
+ if ((event.currentTarget.readyState != 4) ||
+ (event.currentTarget.status != 200 && event.currentTarget.status != 0))
+ return;
+
+ var d = JSON.parse(event.currentTarget.responseText);
+
+ document.getElementById("diventry").style.display = "block";
+ var entry = document.getElementById("tableentry");
+
+ var buf = "";
+ for (var key in d) {
+ var data = d[key];
+
+ if (data == null)
+ data = "[blob data]";
+ else if (data instanceof Array)
+ data = "[" + formatBytes(data.length) + " blob data]";
+ else
+ data = escapeHTML(data);
+
+ buf += '<tr><td class="field">' + key + '</td><td class="data">' + data + '</td></tr>';
+ }
+ entry.innerHTML = '<tbody>' + buf + '</tbody>';
+ }
+
+ function onMessageClick(t) {
+ var request = new XMLHttpRequest();
+ request.open("GET", "entries?discrete");
+ request.onreadystatechange = onResultMessageClick;
+ request.setRequestHeader("Accept", "application/json");
+ request.setRequestHeader("Range", "entries=" + t + ":0:1");
+ request.send(null);
+ }
+
+ function onKeyUp(event) {
+ switch (event.keyCode) {
+ case 8:
+ case 37:
+ case 75:
+ entriesLoadPrevious();
+ break;
+ case 32:
+ case 39:
+ case 74:
+ entriesLoadNext();
+ break;
+
+ case 71:
+ if (event.shiftKey)
+ entriesLoadTail();
+ else
+ entriesLoadHead();
+ break;
+ case 171:
+ entriesMore();
+ break;
+ case 173:
+ entriesLess();
+ break;
+ }
+ }
+
+ function onMouseWheel(event) {
+ if (event.detail < 0 || event.wheelDelta > 0)
+ entriesLoadPrevious();
+ else
+ entriesLoadNext();
+ }
+
+ function onResultFilterFocus(event) {
+ if ((event.currentTarget.readyState != 4) ||
+ (event.currentTarget.status != 200 && event.currentTarget.status != 0))
+ return;
+
+ var f = document.getElementById("filter");
+
+ var l = event.currentTarget.responseText.split('\n');
+ var buf = '<option>No filter</option>';
+ var j = -1;
+
+ for (i in l) {
+
+ if (l[i] == '')
+ continue;
+
+ var d = JSON.parse(l[i]);
+ if (d._SYSTEMD_UNIT == undefined)
+ continue;
+
+ buf += '<option value="' + escape(d._SYSTEMD_UNIT) + '">' + escapeHTML(d._SYSTEMD_UNIT) + '</option>';
+
+ if (d._SYSTEMD_UNIT == localStorage["filter"])
+ j = i;
+ }
+
+ if (j < 0) {
+ if (localStorage["filter"] != null && localStorage["filter"] != "") {
+ buf += '<option value="' + escape(localStorage["filter"]) + '">' + escapeHTML(localStorage["filter"]) + '</option>';
+ j = i + 1;
+ } else
+ j = 0;
+ }
+
+ f.innerHTML = buf;
+ f.selectedIndex = j;
+ }
+
+ function onFilterFocus(w) {
+ var request = new XMLHttpRequest();
+ request.open("GET", "fields/_SYSTEMD_UNIT");
+ request.onreadystatechange = onResultFilterFocus;
+ request.setRequestHeader("Accept", "application/json");
+ request.send(null);
+ }
+
+ function onFilterChange(w) {
+ if (w.selectedIndex <= 0)
+ localStorage["filter"] = "";
+ else
+ localStorage["filter"] = unescape(w.options[w.selectedIndex].value);
+
+ entriesLoadHead();
+ }
+
+ function onBootChange(w) {
+ localStorage["boot"] = w.checked ? "1" : "0";
+ entriesLoadHead();
+ }
+
+ function initFilter() {
+ var f = document.getElementById("filter");
+
+ var buf = '<option>No filter</option>';
+
+ var filter = localStorage["filter"];
+ var j;
+ if (filter != null && filter != "") {
+ buf += '<option value="' + escape(filter) + '">' + escapeHTML(filter) + '</option>';
+ j = 1;
+ } else
+ j = 0;
+
+ f.innerHTML = buf;
+ f.selectedIndex = j;
+ }
+
+ function installHandlers() {
+ document.onkeyup = onKeyUp;
+
+ var logs = document.getElementById("divlogs");
+ logs.addEventListener("mousewheel", onMouseWheel, false);
+ logs.addEventListener("DOMMouseScroll", onMouseWheel, false);
+ }
+
+ machineLoad();
+ entriesLoad(null);
+ showNEntries(getNEntries());
+ initFilter();
+ installHandlers();
+ </script>
+</body>
+</html>
diff --git a/src/journal-remote/journal-gatewayd.c b/src/journal-remote/journal-gatewayd.c
new file mode 100644
index 0000000..0723f7d
--- /dev/null
+++ b/src/journal-remote/journal-gatewayd.c
@@ -0,0 +1,1036 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <microhttpd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-daemon.h"
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hostname-util.h"
+#include "log.h"
+#include "logs-show.h"
+#include "main-func.h"
+#include "microhttpd-util.h"
+#include "os-util.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "sigbus.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+#define JOURNAL_WAIT_TIMEOUT (10*USEC_PER_SEC)
+
+static char *arg_key_pem = NULL;
+static char *arg_cert_pem = NULL;
+static char *arg_trust_pem = NULL;
+static const char *arg_directory = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_key_pem, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_cert_pem, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_trust_pem, freep);
+
+typedef struct RequestMeta {
+ sd_journal *journal;
+
+ OutputMode mode;
+
+ char *cursor;
+ int64_t n_skip;
+ uint64_t n_entries;
+ bool n_entries_set;
+
+ FILE *tmp;
+ uint64_t delta, size;
+
+ int argument_parse_error;
+
+ bool follow;
+ bool discrete;
+} RequestMeta;
+
+static const char* const mime_types[_OUTPUT_MODE_MAX] = {
+ [OUTPUT_SHORT] = "text/plain",
+ [OUTPUT_JSON] = "application/json",
+ [OUTPUT_JSON_SSE] = "text/event-stream",
+ [OUTPUT_JSON_SEQ] = "application/json-seq",
+ [OUTPUT_EXPORT] = "application/vnd.fdo.journal",
+};
+
+static RequestMeta *request_meta(void **connection_cls) {
+ RequestMeta *m;
+
+ assert(connection_cls);
+ if (*connection_cls)
+ return *connection_cls;
+
+ m = new0(RequestMeta, 1);
+ if (!m)
+ return NULL;
+
+ *connection_cls = m;
+ return m;
+}
+
+static void request_meta_free(
+ void *cls,
+ struct MHD_Connection *connection,
+ void **connection_cls,
+ enum MHD_RequestTerminationCode toe) {
+
+ RequestMeta *m = *connection_cls;
+
+ if (!m)
+ return;
+
+ sd_journal_close(m->journal);
+
+ safe_fclose(m->tmp);
+
+ free(m->cursor);
+ free(m);
+}
+
+static int open_journal(RequestMeta *m) {
+ assert(m);
+
+ if (m->journal)
+ return 0;
+
+ if (arg_directory)
+ return sd_journal_open_directory(&m->journal, arg_directory, 0);
+ else
+ return sd_journal_open(&m->journal, SD_JOURNAL_LOCAL_ONLY|SD_JOURNAL_SYSTEM);
+}
+
+static int request_meta_ensure_tmp(RequestMeta *m) {
+ assert(m);
+
+ if (m->tmp)
+ rewind(m->tmp);
+ else {
+ _cleanup_close_ int fd = -1;
+
+ fd = open_tmpfile_unlinkable("/tmp", O_RDWR|O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ m->tmp = take_fdopen(&fd, "w+");
+ if (!m->tmp)
+ return -errno;
+ }
+
+ return 0;
+}
+
+static ssize_t request_reader_entries(
+ void *cls,
+ uint64_t pos,
+ char *buf,
+ size_t max) {
+
+ RequestMeta *m = cls;
+ int r;
+ size_t n, k;
+
+ assert(m);
+ assert(buf);
+ assert(max > 0);
+ assert(pos >= m->delta);
+
+ pos -= m->delta;
+
+ while (pos >= m->size) {
+ off_t sz;
+
+ /* End of this entry, so let's serialize the next
+ * one */
+
+ if (m->n_entries_set &&
+ m->n_entries <= 0)
+ return MHD_CONTENT_READER_END_OF_STREAM;
+
+ if (m->n_skip < 0)
+ r = sd_journal_previous_skip(m->journal, (uint64_t) -m->n_skip + 1);
+ else if (m->n_skip > 0)
+ r = sd_journal_next_skip(m->journal, (uint64_t) m->n_skip + 1);
+ else
+ r = sd_journal_next(m->journal);
+
+ if (r < 0) {
+ log_error_errno(r, "Failed to advance journal pointer: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ } else if (r == 0) {
+
+ if (m->follow) {
+ r = sd_journal_wait(m->journal, (uint64_t) JOURNAL_WAIT_TIMEOUT);
+ if (r < 0) {
+ log_error_errno(r, "Couldn't wait for journal event: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+ if (r == SD_JOURNAL_NOP)
+ break;
+
+ continue;
+ }
+
+ return MHD_CONTENT_READER_END_OF_STREAM;
+ }
+
+ if (m->discrete) {
+ assert(m->cursor);
+
+ r = sd_journal_test_cursor(m->journal, m->cursor);
+ if (r < 0) {
+ log_error_errno(r, "Failed to test cursor: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ if (r == 0)
+ return MHD_CONTENT_READER_END_OF_STREAM;
+ }
+
+ pos -= m->size;
+ m->delta += m->size;
+
+ if (m->n_entries_set)
+ m->n_entries -= 1;
+
+ m->n_skip = 0;
+
+ r = request_meta_ensure_tmp(m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create temporary file: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ r = show_journal_entry(m->tmp, m->journal, m->mode, 0, OUTPUT_FULL_WIDTH,
+ NULL, NULL, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to serialize item: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ sz = ftello(m->tmp);
+ if (sz == (off_t) -1) {
+ log_error_errno(errno, "Failed to retrieve file position: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ m->size = (uint64_t) sz;
+ }
+
+ if (m->tmp == NULL && m->follow)
+ return 0;
+
+ if (fseeko(m->tmp, pos, SEEK_SET) < 0) {
+ log_error_errno(errno, "Failed to seek to position: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ n = m->size - pos;
+ if (n < 1)
+ return 0;
+ if (n > max)
+ n = max;
+
+ errno = 0;
+ k = fread(buf, 1, n, m->tmp);
+ if (k != n) {
+ log_error("Failed to read from file: %s", errno != 0 ? strerror_safe(errno) : "Premature EOF");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ return (ssize_t) k;
+}
+
+static int request_parse_accept(
+ RequestMeta *m,
+ struct MHD_Connection *connection) {
+
+ const char *header;
+
+ assert(m);
+ assert(connection);
+
+ header = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "Accept");
+ if (!header)
+ return 0;
+
+ if (streq(header, mime_types[OUTPUT_JSON]))
+ m->mode = OUTPUT_JSON;
+ else if (streq(header, mime_types[OUTPUT_JSON_SSE]))
+ m->mode = OUTPUT_JSON_SSE;
+ else if (streq(header, mime_types[OUTPUT_JSON_SEQ]))
+ m->mode = OUTPUT_JSON_SEQ;
+ else if (streq(header, mime_types[OUTPUT_EXPORT]))
+ m->mode = OUTPUT_EXPORT;
+ else
+ m->mode = OUTPUT_SHORT;
+
+ return 0;
+}
+
+static int request_parse_range(
+ RequestMeta *m,
+ struct MHD_Connection *connection) {
+
+ const char *range, *colon, *colon2;
+ int r;
+
+ assert(m);
+ assert(connection);
+
+ range = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "Range");
+ if (!range)
+ return 0;
+
+ if (!startswith(range, "entries="))
+ return 0;
+
+ range += 8;
+ range += strspn(range, WHITESPACE);
+
+ colon = strchr(range, ':');
+ if (!colon)
+ m->cursor = strdup(range);
+ else {
+ const char *p;
+
+ colon2 = strchr(colon + 1, ':');
+ if (colon2) {
+ _cleanup_free_ char *t;
+
+ t = strndup(colon + 1, colon2 - colon - 1);
+ if (!t)
+ return -ENOMEM;
+
+ r = safe_atoi64(t, &m->n_skip);
+ if (r < 0)
+ return r;
+ }
+
+ p = (colon2 ? colon2 : colon) + 1;
+ if (*p) {
+ r = safe_atou64(p, &m->n_entries);
+ if (r < 0)
+ return r;
+
+ if (m->n_entries <= 0)
+ return -EINVAL;
+
+ m->n_entries_set = true;
+ }
+
+ m->cursor = strndup(range, colon - range);
+ }
+
+ if (!m->cursor)
+ return -ENOMEM;
+
+ m->cursor[strcspn(m->cursor, WHITESPACE)] = 0;
+ if (isempty(m->cursor))
+ m->cursor = mfree(m->cursor);
+
+ return 0;
+}
+
+static mhd_result request_parse_arguments_iterator(
+ void *cls,
+ enum MHD_ValueKind kind,
+ const char *key,
+ const char *value) {
+
+ RequestMeta *m = cls;
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(m);
+
+ if (isempty(key)) {
+ m->argument_parse_error = -EINVAL;
+ return MHD_NO;
+ }
+
+ if (streq(key, "follow")) {
+ if (isempty(value)) {
+ m->follow = true;
+ return MHD_YES;
+ }
+
+ r = parse_boolean(value);
+ if (r < 0) {
+ m->argument_parse_error = r;
+ return MHD_NO;
+ }
+
+ m->follow = r;
+ return MHD_YES;
+ }
+
+ if (streq(key, "discrete")) {
+ if (isempty(value)) {
+ m->discrete = true;
+ return MHD_YES;
+ }
+
+ r = parse_boolean(value);
+ if (r < 0) {
+ m->argument_parse_error = r;
+ return MHD_NO;
+ }
+
+ m->discrete = r;
+ return MHD_YES;
+ }
+
+ if (streq(key, "boot")) {
+ if (isempty(value))
+ r = true;
+ else {
+ r = parse_boolean(value);
+ if (r < 0) {
+ m->argument_parse_error = r;
+ return MHD_NO;
+ }
+ }
+
+ if (r) {
+ char match[9 + 32 + 1] = "_BOOT_ID=";
+ sd_id128_t bid;
+
+ r = sd_id128_get_boot(&bid);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get boot ID: %m");
+ return MHD_NO;
+ }
+
+ sd_id128_to_string(bid, match + 9);
+ r = sd_journal_add_match(m->journal, match, sizeof(match)-1);
+ if (r < 0) {
+ m->argument_parse_error = r;
+ return MHD_NO;
+ }
+ }
+
+ return MHD_YES;
+ }
+
+ p = strjoin(key, "=", strempty(value));
+ if (!p) {
+ m->argument_parse_error = log_oom();
+ return MHD_NO;
+ }
+
+ r = sd_journal_add_match(m->journal, p, 0);
+ if (r < 0) {
+ m->argument_parse_error = r;
+ return MHD_NO;
+ }
+
+ return MHD_YES;
+}
+
+static int request_parse_arguments(
+ RequestMeta *m,
+ struct MHD_Connection *connection) {
+
+ assert(m);
+ assert(connection);
+
+ m->argument_parse_error = 0;
+ MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, request_parse_arguments_iterator, m);
+
+ return m->argument_parse_error;
+}
+
+static int request_handler_entries(
+ struct MHD_Connection *connection,
+ void *connection_cls) {
+
+ _cleanup_(MHD_destroy_responsep) struct MHD_Response *response = NULL;
+ RequestMeta *m = connection_cls;
+ int r;
+
+ assert(connection);
+ assert(m);
+
+ r = open_journal(m);
+ if (r < 0)
+ return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to open journal: %m");
+
+ if (request_parse_accept(m, connection) < 0)
+ return mhd_respond(connection, MHD_HTTP_BAD_REQUEST, "Failed to parse Accept header.");
+
+ if (request_parse_range(m, connection) < 0)
+ return mhd_respond(connection, MHD_HTTP_BAD_REQUEST, "Failed to parse Range header.");
+
+ if (request_parse_arguments(m, connection) < 0)
+ return mhd_respond(connection, MHD_HTTP_BAD_REQUEST, "Failed to parse URL arguments.");
+
+ if (m->discrete) {
+ if (!m->cursor)
+ return mhd_respond(connection, MHD_HTTP_BAD_REQUEST, "Discrete seeks require a cursor specification.");
+
+ m->n_entries = 1;
+ m->n_entries_set = true;
+ }
+
+ if (m->cursor)
+ r = sd_journal_seek_cursor(m->journal, m->cursor);
+ else if (m->n_skip >= 0)
+ r = sd_journal_seek_head(m->journal);
+ else if (m->n_skip < 0)
+ r = sd_journal_seek_tail(m->journal);
+ if (r < 0)
+ return mhd_respond(connection, MHD_HTTP_BAD_REQUEST, "Failed to seek in journal.");
+
+ response = MHD_create_response_from_callback(MHD_SIZE_UNKNOWN, 4*1024, request_reader_entries, m, NULL);
+ if (!response)
+ return respond_oom(connection);
+
+ MHD_add_response_header(response, "Content-Type", mime_types[m->mode]);
+ return MHD_queue_response(connection, MHD_HTTP_OK, response);
+}
+
+static int output_field(FILE *f, OutputMode m, const char *d, size_t l) {
+ const char *eq;
+ size_t j;
+
+ eq = memchr(d, '=', l);
+ if (!eq)
+ return -EINVAL;
+
+ j = l - (eq - d + 1);
+
+ if (m == OUTPUT_JSON) {
+ fprintf(f, "{ \"%.*s\" : ", (int) (eq - d), d);
+ json_escape(f, eq+1, j, OUTPUT_FULL_WIDTH);
+ fputs(" }\n", f);
+ } else {
+ fwrite(eq+1, 1, j, f);
+ fputc('\n', f);
+ }
+
+ return 0;
+}
+
+static ssize_t request_reader_fields(
+ void *cls,
+ uint64_t pos,
+ char *buf,
+ size_t max) {
+
+ RequestMeta *m = cls;
+ int r;
+ size_t n, k;
+
+ assert(m);
+ assert(buf);
+ assert(max > 0);
+ assert(pos >= m->delta);
+
+ pos -= m->delta;
+
+ while (pos >= m->size) {
+ off_t sz;
+ const void *d;
+ size_t l;
+
+ /* End of this field, so let's serialize the next
+ * one */
+
+ r = sd_journal_enumerate_unique(m->journal, &d, &l);
+ if (r < 0) {
+ log_error_errno(r, "Failed to advance field index: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ } else if (r == 0)
+ return MHD_CONTENT_READER_END_OF_STREAM;
+
+ pos -= m->size;
+ m->delta += m->size;
+
+ r = request_meta_ensure_tmp(m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create temporary file: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ r = output_field(m->tmp, m->mode, d, l);
+ if (r < 0) {
+ log_error_errno(r, "Failed to serialize item: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ sz = ftello(m->tmp);
+ if (sz == (off_t) -1) {
+ log_error_errno(errno, "Failed to retrieve file position: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ m->size = (uint64_t) sz;
+ }
+
+ if (fseeko(m->tmp, pos, SEEK_SET) < 0) {
+ log_error_errno(errno, "Failed to seek to position: %m");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ n = m->size - pos;
+ if (n > max)
+ n = max;
+
+ errno = 0;
+ k = fread(buf, 1, n, m->tmp);
+ if (k != n) {
+ log_error("Failed to read from file: %s", errno != 0 ? strerror_safe(errno) : "Premature EOF");
+ return MHD_CONTENT_READER_END_WITH_ERROR;
+ }
+
+ return (ssize_t) k;
+}
+
+static int request_handler_fields(
+ struct MHD_Connection *connection,
+ const char *field,
+ void *connection_cls) {
+
+ _cleanup_(MHD_destroy_responsep) struct MHD_Response *response = NULL;
+ RequestMeta *m = connection_cls;
+ int r;
+
+ assert(connection);
+ assert(m);
+
+ r = open_journal(m);
+ if (r < 0)
+ return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to open journal: %m");
+
+ if (request_parse_accept(m, connection) < 0)
+ return mhd_respond(connection, MHD_HTTP_BAD_REQUEST, "Failed to parse Accept header.");
+
+ r = sd_journal_query_unique(m->journal, field);
+ if (r < 0)
+ return mhd_respond(connection, MHD_HTTP_BAD_REQUEST, "Failed to query unique fields.");
+
+ response = MHD_create_response_from_callback(MHD_SIZE_UNKNOWN, 4*1024, request_reader_fields, m, NULL);
+ if (!response)
+ return respond_oom(connection);
+
+ MHD_add_response_header(response, "Content-Type", mime_types[m->mode == OUTPUT_JSON ? OUTPUT_JSON : OUTPUT_SHORT]);
+ return MHD_queue_response(connection, MHD_HTTP_OK, response);
+}
+
+static int request_handler_redirect(
+ struct MHD_Connection *connection,
+ const char *target) {
+
+ char *page;
+ _cleanup_(MHD_destroy_responsep) struct MHD_Response *response = NULL;
+
+ assert(connection);
+ assert(target);
+
+ if (asprintf(&page, "<html><body>Please continue to the <a href=\"%s\">journal browser</a>.</body></html>", target) < 0)
+ return respond_oom(connection);
+
+ response = MHD_create_response_from_buffer(strlen(page), page, MHD_RESPMEM_MUST_FREE);
+ if (!response) {
+ free(page);
+ return respond_oom(connection);
+ }
+
+ MHD_add_response_header(response, "Content-Type", "text/html");
+ MHD_add_response_header(response, "Location", target);
+ return MHD_queue_response(connection, MHD_HTTP_MOVED_PERMANENTLY, response);
+}
+
+static int request_handler_file(
+ struct MHD_Connection *connection,
+ const char *path,
+ const char *mime_type) {
+
+ _cleanup_(MHD_destroy_responsep) struct MHD_Response *response = NULL;
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+
+ assert(connection);
+ assert(path);
+ assert(mime_type);
+
+ fd = open(path, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return mhd_respondf(connection, errno, MHD_HTTP_NOT_FOUND, "Failed to open file %s: %m", path);
+
+ if (fstat(fd, &st) < 0)
+ return mhd_respondf(connection, errno, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to stat file: %m");
+
+ response = MHD_create_response_from_fd_at_offset64(st.st_size, fd, 0);
+ if (!response)
+ return respond_oom(connection);
+ TAKE_FD(fd);
+
+ MHD_add_response_header(response, "Content-Type", mime_type);
+ return MHD_queue_response(connection, MHD_HTTP_OK, response);
+}
+
+static int get_virtualization(char **v) {
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ char *b = NULL;
+ int r;
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "Virtualization",
+ NULL,
+ &b);
+ if (r < 0)
+ return r;
+
+ if (isempty(b)) {
+ free(b);
+ *v = NULL;
+ return 0;
+ }
+
+ *v = b;
+ return 1;
+}
+
+static int request_handler_machine(
+ struct MHD_Connection *connection,
+ void *connection_cls) {
+
+ _cleanup_(MHD_destroy_responsep) struct MHD_Response *response = NULL;
+ RequestMeta *m = connection_cls;
+ int r;
+ _cleanup_free_ char* hostname = NULL, *os_name = NULL;
+ uint64_t cutoff_from = 0, cutoff_to = 0, usage = 0;
+ sd_id128_t mid, bid;
+ _cleanup_free_ char *v = NULL, *json = NULL;
+
+ assert(connection);
+ assert(m);
+
+ r = open_journal(m);
+ if (r < 0)
+ return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to open journal: %m");
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to determine machine ID: %m");
+
+ r = sd_id128_get_boot(&bid);
+ if (r < 0)
+ return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to determine boot ID: %m");
+
+ hostname = gethostname_malloc();
+ if (!hostname)
+ return respond_oom(connection);
+
+ r = sd_journal_get_usage(m->journal, &usage);
+ if (r < 0)
+ return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to determine disk usage: %m");
+
+ r = sd_journal_get_cutoff_realtime_usec(m->journal, &cutoff_from, &cutoff_to);
+ if (r < 0)
+ return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to determine disk usage: %m");
+
+ (void) parse_os_release(NULL, "PRETTY_NAME", &os_name, NULL);
+ (void) get_virtualization(&v);
+
+ r = asprintf(&json,
+ "{ \"machine_id\" : \"" SD_ID128_FORMAT_STR "\","
+ "\"boot_id\" : \"" SD_ID128_FORMAT_STR "\","
+ "\"hostname\" : \"%s\","
+ "\"os_pretty_name\" : \"%s\","
+ "\"virtualization\" : \"%s\","
+ "\"usage\" : \"%"PRIu64"\","
+ "\"cutoff_from_realtime\" : \"%"PRIu64"\","
+ "\"cutoff_to_realtime\" : \"%"PRIu64"\" }\n",
+ SD_ID128_FORMAT_VAL(mid),
+ SD_ID128_FORMAT_VAL(bid),
+ hostname_cleanup(hostname),
+ os_name ? os_name : "Linux",
+ v ? v : "bare",
+ usage,
+ cutoff_from,
+ cutoff_to);
+ if (r < 0)
+ return respond_oom(connection);
+
+ response = MHD_create_response_from_buffer(strlen(json), json, MHD_RESPMEM_MUST_FREE);
+ if (!response)
+ return respond_oom(connection);
+ TAKE_PTR(json);
+
+ MHD_add_response_header(response, "Content-Type", "application/json");
+ return MHD_queue_response(connection, MHD_HTTP_OK, response);
+}
+
+static mhd_result request_handler(
+ void *cls,
+ struct MHD_Connection *connection,
+ const char *url,
+ const char *method,
+ const char *version,
+ const char *upload_data,
+ size_t *upload_data_size,
+ void **connection_cls) {
+ int r, code;
+
+ assert(connection);
+ assert(connection_cls);
+ assert(url);
+ assert(method);
+
+ if (!streq(method, "GET"))
+ return mhd_respond(connection, MHD_HTTP_NOT_ACCEPTABLE, "Unsupported method.");
+
+ if (!*connection_cls) {
+ if (!request_meta(connection_cls))
+ return respond_oom(connection);
+ return MHD_YES;
+ }
+
+ if (arg_trust_pem) {
+ r = check_permissions(connection, &code, NULL);
+ if (r < 0)
+ return code;
+ }
+
+ if (streq(url, "/"))
+ return request_handler_redirect(connection, "/browse");
+
+ if (streq(url, "/entries"))
+ return request_handler_entries(connection, *connection_cls);
+
+ if (startswith(url, "/fields/"))
+ return request_handler_fields(connection, url + 8, *connection_cls);
+
+ if (streq(url, "/browse"))
+ return request_handler_file(connection, DOCUMENT_ROOT "/browse.html", "text/html");
+
+ if (streq(url, "/machine"))
+ return request_handler_machine(connection, *connection_cls);
+
+ return mhd_respond(connection, MHD_HTTP_NOT_FOUND, "Not found.");
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-journal-gatewayd.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] ...\n\n"
+ "HTTP server for journal events.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --cert=CERT.PEM Server certificate in PEM format\n"
+ " --key=KEY.PEM Server key in PEM format\n"
+ " --trust=CERT.PEM Certificate authority certificate in PEM format\n"
+ " -D --directory=PATH Serve journal files in directory\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_KEY,
+ ARG_CERT,
+ ARG_TRUST,
+ };
+
+ int r, c;
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "key", required_argument, NULL, ARG_KEY },
+ { "cert", required_argument, NULL, ARG_CERT },
+ { "trust", required_argument, NULL, ARG_TRUST },
+ { "directory", required_argument, NULL, 'D' },
+ {}
+ };
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hD:", options, NULL)) >= 0)
+
+ switch(c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_KEY:
+ if (arg_key_pem)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Key file specified twice");
+ r = read_full_file_full(AT_FDCWD, optarg, READ_FULL_FILE_CONNECT_SOCKET, NULL, &arg_key_pem, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read key file: %m");
+ assert(arg_key_pem);
+ break;
+
+ case ARG_CERT:
+ if (arg_cert_pem)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Certificate file specified twice");
+ r = read_full_file_full(AT_FDCWD, optarg, READ_FULL_FILE_CONNECT_SOCKET, NULL, &arg_cert_pem, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read certificate file: %m");
+ assert(arg_cert_pem);
+ break;
+
+ case ARG_TRUST:
+#if HAVE_GNUTLS
+ if (arg_trust_pem)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "CA certificate file specified twice");
+ r = read_full_file_full(AT_FDCWD, optarg, READ_FULL_FILE_CONNECT_SOCKET, NULL, &arg_trust_pem, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read CA certificate file: %m");
+ assert(arg_trust_pem);
+ break;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --trust is not available.");
+#endif
+ case 'D':
+ arg_directory = optarg;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program does not take arguments.");
+
+ if (!!arg_key_pem != !!arg_cert_pem)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Certificate and key files must be specified together");
+
+ if (arg_trust_pem && !arg_key_pem)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "CA certificate can only be used with certificate file");
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(MHD_stop_daemonp) struct MHD_Daemon *d = NULL;
+ struct MHD_OptionItem opts[] = {
+ { MHD_OPTION_NOTIFY_COMPLETED,
+ (intptr_t) request_meta_free, NULL },
+ { MHD_OPTION_EXTERNAL_LOGGER,
+ (intptr_t) microhttpd_logger, NULL },
+ { MHD_OPTION_END, 0, NULL },
+ { MHD_OPTION_END, 0, NULL },
+ { MHD_OPTION_END, 0, NULL },
+ { MHD_OPTION_END, 0, NULL },
+ { MHD_OPTION_END, 0, NULL },
+ };
+ int opts_pos = 2;
+
+ /* We force MHD_USE_ITC here, in order to make sure
+ * libmicrohttpd doesn't use shutdown() on our listening
+ * socket, which would break socket re-activation. See
+ *
+ * https://lists.gnu.org/archive/html/libmicrohttpd/2015-09/msg00014.html
+ * https://github.com/systemd/systemd/pull/1286
+ */
+
+ int flags =
+ MHD_USE_DEBUG |
+ MHD_USE_DUAL_STACK |
+ MHD_USE_ITC |
+ MHD_USE_POLL_INTERNAL_THREAD |
+ MHD_USE_THREAD_PER_CONNECTION;
+ int r, n;
+
+ log_setup_service();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ sigbus_install();
+
+ r = setup_gnutls_logger(NULL);
+ if (r < 0)
+ return r;
+
+ n = sd_listen_fds(1);
+ if (n < 0)
+ return log_error_errno(n, "Failed to determine passed sockets: %m");
+ if (n > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Can't listen on more than one socket.");
+
+ if (n == 1)
+ opts[opts_pos++] = (struct MHD_OptionItem)
+ { MHD_OPTION_LISTEN_SOCKET, SD_LISTEN_FDS_START };
+
+ if (arg_key_pem) {
+ assert(arg_cert_pem);
+ opts[opts_pos++] = (struct MHD_OptionItem)
+ { MHD_OPTION_HTTPS_MEM_KEY, 0, arg_key_pem };
+ opts[opts_pos++] = (struct MHD_OptionItem)
+ { MHD_OPTION_HTTPS_MEM_CERT, 0, arg_cert_pem };
+ flags |= MHD_USE_TLS;
+ }
+
+ if (arg_trust_pem) {
+ assert(flags & MHD_USE_TLS);
+ opts[opts_pos++] = (struct MHD_OptionItem)
+ { MHD_OPTION_HTTPS_MEM_TRUST, 0, arg_trust_pem };
+ }
+
+ d = MHD_start_daemon(flags, 19531,
+ NULL, NULL,
+ request_handler, NULL,
+ MHD_OPTION_ARRAY, opts,
+ MHD_OPTION_END);
+ if (!d)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to start daemon!");
+
+ pause();
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/journal-remote/journal-remote-main.c b/src/journal-remote/journal-remote-main.c
new file mode 100644
index 0000000..d2aa181
--- /dev/null
+++ b/src/journal-remote/journal-remote-main.c
@@ -0,0 +1,1177 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+
+#include "conf-parser.h"
+#include "daemon-util.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "journal-remote-write.h"
+#include "journal-remote.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "socket-netlink.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "strv.h"
+
+#define PRIV_KEY_FILE CERTIFICATE_ROOT "/private/journal-remote.pem"
+#define CERT_FILE CERTIFICATE_ROOT "/certs/journal-remote.pem"
+#define TRUST_FILE CERTIFICATE_ROOT "/ca/trusted.pem"
+
+static const char* arg_url = NULL;
+static const char* arg_getter = NULL;
+static const char* arg_listen_raw = NULL;
+static const char* arg_listen_http = NULL;
+static const char* arg_listen_https = NULL;
+static char** arg_files = NULL; /* Do not free this. */
+static int arg_compress = true;
+static int arg_seal = false;
+static int http_socket = -1, https_socket = -1;
+static char** arg_gnutls_log = NULL;
+
+static JournalWriteSplitMode arg_split_mode = _JOURNAL_WRITE_SPLIT_INVALID;
+static const char* arg_output = NULL;
+
+static char *arg_key = NULL;
+static char *arg_cert = NULL;
+static char *arg_trust = NULL;
+static bool arg_trust_all = false;
+
+STATIC_DESTRUCTOR_REGISTER(arg_gnutls_log, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_key, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_cert, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_trust, freep);
+
+static const char* const journal_write_split_mode_table[_JOURNAL_WRITE_SPLIT_MAX] = {
+ [JOURNAL_WRITE_SPLIT_NONE] = "none",
+ [JOURNAL_WRITE_SPLIT_HOST] = "host",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(journal_write_split_mode, JournalWriteSplitMode);
+static DEFINE_CONFIG_PARSE_ENUM(config_parse_write_split_mode,
+ journal_write_split_mode,
+ JournalWriteSplitMode,
+ "Failed to parse split mode setting");
+
+/**********************************************************************
+ **********************************************************************
+ **********************************************************************/
+
+static int spawn_child(const char* child, char** argv) {
+ pid_t child_pid;
+ int fd[2], r;
+
+ if (pipe(fd) < 0)
+ return log_error_errno(errno, "Failed to create pager pipe: %m");
+
+ r = safe_fork("(remote)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &child_pid);
+ if (r < 0) {
+ safe_close_pair(fd);
+ return r;
+ }
+
+ /* In the child */
+ if (r == 0) {
+ safe_close(fd[0]);
+
+ r = rearrange_stdio(STDIN_FILENO, fd[1], STDERR_FILENO);
+ if (r < 0) {
+ log_error_errno(r, "Failed to dup pipe to stdout: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ (void) rlimit_nofile_safe();
+
+ execvp(child, argv);
+ log_error_errno(errno, "Failed to exec child %s: %m", child);
+ _exit(EXIT_FAILURE);
+ }
+
+ safe_close(fd[1]);
+
+ r = fd_nonblock(fd[0], true);
+ if (r < 0)
+ log_warning_errno(errno, "Failed to set child pipe to non-blocking: %m");
+
+ return fd[0];
+}
+
+static int spawn_curl(const char* url) {
+ char **argv = STRV_MAKE("curl",
+ "-HAccept: application/vnd.fdo.journal",
+ "--silent",
+ "--show-error",
+ url);
+ int r;
+
+ r = spawn_child("curl", argv);
+ if (r < 0)
+ log_error_errno(r, "Failed to spawn curl: %m");
+ return r;
+}
+
+static int spawn_getter(const char *getter) {
+ int r;
+ _cleanup_strv_free_ char **words = NULL;
+
+ assert(getter);
+ r = strv_split_full(&words, getter, WHITESPACE, EXTRACT_UNQUOTE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to split getter option: %m");
+
+ r = spawn_child(words[0], words);
+ if (r < 0)
+ log_error_errno(r, "Failed to spawn getter %s: %m", getter);
+
+ return r;
+}
+
+/**********************************************************************
+ **********************************************************************
+ **********************************************************************/
+
+static int null_timer_event_handler(sd_event_source *s,
+ uint64_t usec,
+ void *userdata);
+static int dispatch_http_event(sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ void *userdata);
+
+static int request_meta(void **connection_cls, int fd, char *hostname) {
+ RemoteSource *source;
+ Writer *writer;
+ int r;
+
+ assert(connection_cls);
+ if (*connection_cls)
+ return 0;
+
+ r = journal_remote_get_writer(journal_remote_server_global, hostname, &writer);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to get writer for source %s: %m",
+ hostname);
+
+ source = source_new(fd, true, hostname, writer);
+ if (!source) {
+ writer_unref(writer);
+ return log_oom();
+ }
+
+ log_debug("Added RemoteSource as connection metadata %p", source);
+
+ *connection_cls = source;
+ return 0;
+}
+
+static void request_meta_free(void *cls,
+ struct MHD_Connection *connection,
+ void **connection_cls,
+ enum MHD_RequestTerminationCode toe) {
+ RemoteSource *s;
+
+ assert(connection_cls);
+ s = *connection_cls;
+
+ if (s) {
+ log_debug("Cleaning up connection metadata %p", s);
+ source_free(s);
+ *connection_cls = NULL;
+ }
+}
+
+static int process_http_upload(
+ struct MHD_Connection *connection,
+ const char *upload_data,
+ size_t *upload_data_size,
+ RemoteSource *source) {
+
+ bool finished = false;
+ size_t remaining;
+ int r;
+
+ assert(source);
+
+ log_trace("%s: connection %p, %zu bytes",
+ __func__, connection, *upload_data_size);
+
+ if (*upload_data_size) {
+ log_trace("Received %zu bytes", *upload_data_size);
+
+ r = journal_importer_push_data(&source->importer,
+ upload_data, *upload_data_size);
+ if (r < 0)
+ return mhd_respond_oom(connection);
+
+ *upload_data_size = 0;
+ } else
+ finished = true;
+
+ for (;;) {
+ r = process_source(source,
+ journal_remote_server_global->compress,
+ journal_remote_server_global->seal);
+ if (r == -EAGAIN)
+ break;
+ if (r < 0) {
+ if (r == -ENOBUFS)
+ log_warning_errno(r, "Entry is above the maximum of %u, aborting connection %p.",
+ DATA_SIZE_MAX, connection);
+ else if (r == -E2BIG)
+ log_warning_errno(r, "Entry with more fields than the maximum of %u, aborting connection %p.",
+ ENTRY_FIELD_COUNT_MAX, connection);
+ else
+ log_warning_errno(r, "Failed to process data, aborting connection %p: %m",
+ connection);
+ return MHD_NO;
+ }
+ }
+
+ if (!finished)
+ return MHD_YES;
+
+ /* The upload is finished */
+
+ remaining = journal_importer_bytes_remaining(&source->importer);
+ if (remaining > 0) {
+ log_warning("Premature EOF byte. %zu bytes lost.", remaining);
+ return mhd_respondf(connection,
+ 0, MHD_HTTP_EXPECTATION_FAILED,
+ "Premature EOF. %zu bytes of trailing data not processed.",
+ remaining);
+ }
+
+ return mhd_respond(connection, MHD_HTTP_ACCEPTED, "OK.");
+};
+
+static mhd_result request_handler(
+ void *cls,
+ struct MHD_Connection *connection,
+ const char *url,
+ const char *method,
+ const char *version,
+ const char *upload_data,
+ size_t *upload_data_size,
+ void **connection_cls) {
+
+ const char *header;
+ int r, code, fd;
+ _cleanup_free_ char *hostname = NULL;
+ bool chunked = false;
+
+ assert(connection);
+ assert(connection_cls);
+ assert(url);
+ assert(method);
+
+ log_trace("Handling a connection %s %s %s", method, url, version);
+
+ if (*connection_cls)
+ return process_http_upload(connection,
+ upload_data, upload_data_size,
+ *connection_cls);
+
+ if (!streq(method, "POST"))
+ return mhd_respond(connection, MHD_HTTP_NOT_ACCEPTABLE, "Unsupported method.");
+
+ if (!streq(url, "/upload"))
+ return mhd_respond(connection, MHD_HTTP_NOT_FOUND, "Not found.");
+
+ header = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "Content-Type");
+ if (!header || !streq(header, "application/vnd.fdo.journal"))
+ return mhd_respond(connection, MHD_HTTP_UNSUPPORTED_MEDIA_TYPE,
+ "Content-Type: application/vnd.fdo.journal is required.");
+
+ header = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "Transfer-Encoding");
+ if (header) {
+ if (!strcaseeq(header, "chunked"))
+ return mhd_respondf(connection, 0, MHD_HTTP_BAD_REQUEST,
+ "Unsupported Transfer-Encoding type: %s", header);
+
+ chunked = true;
+ }
+
+ header = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "Content-Length");
+ if (header) {
+ size_t len;
+
+ if (chunked)
+ return mhd_respond(connection, MHD_HTTP_BAD_REQUEST,
+ "Content-Length must not specified when Transfer-Encoding type is 'chuncked'");
+
+ r = safe_atozu(header, &len);
+ if (r < 0)
+ return mhd_respondf(connection, r, MHD_HTTP_LENGTH_REQUIRED,
+ "Content-Length: %s cannot be parsed: %m", header);
+
+ if (len > ENTRY_SIZE_MAX)
+ /* When serialized, an entry of maximum size might be slightly larger,
+ * so this does not correspond exactly to the limit in journald. Oh well.
+ */
+ return mhd_respondf(connection, 0, MHD_HTTP_PAYLOAD_TOO_LARGE,
+ "Payload larger than maximum size of %u bytes", ENTRY_SIZE_MAX);
+ }
+
+ {
+ const union MHD_ConnectionInfo *ci;
+
+ ci = MHD_get_connection_info(connection,
+ MHD_CONNECTION_INFO_CONNECTION_FD);
+ if (!ci) {
+ log_error("MHD_get_connection_info failed: cannot get remote fd");
+ return mhd_respond(connection, MHD_HTTP_INTERNAL_SERVER_ERROR,
+ "Cannot check remote address.");
+ }
+
+ fd = ci->connect_fd;
+ assert(fd >= 0);
+ }
+
+ if (journal_remote_server_global->check_trust) {
+ r = check_permissions(connection, &code, &hostname);
+ if (r < 0)
+ return code;
+ } else {
+ r = getpeername_pretty(fd, false, &hostname);
+ if (r < 0)
+ return mhd_respond(connection, MHD_HTTP_INTERNAL_SERVER_ERROR,
+ "Cannot check remote hostname.");
+ }
+
+ assert(hostname);
+
+ r = request_meta(connection_cls, fd, hostname);
+ if (r == -ENOMEM)
+ return respond_oom(connection);
+ else if (r < 0)
+ return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "%m");
+
+ hostname = NULL;
+ return MHD_YES;
+}
+
+static int setup_microhttpd_server(RemoteServer *s,
+ int fd,
+ const char *key,
+ const char *cert,
+ const char *trust) {
+ struct MHD_OptionItem opts[] = {
+ { MHD_OPTION_NOTIFY_COMPLETED, (intptr_t) request_meta_free},
+ { MHD_OPTION_EXTERNAL_LOGGER, (intptr_t) microhttpd_logger},
+ { MHD_OPTION_LISTEN_SOCKET, fd},
+ { MHD_OPTION_CONNECTION_MEMORY_LIMIT, 128*1024},
+ { MHD_OPTION_END},
+ { MHD_OPTION_END},
+ { MHD_OPTION_END},
+ { MHD_OPTION_END},
+ { MHD_OPTION_END}};
+ int opts_pos = 4;
+ int flags =
+ MHD_USE_DEBUG |
+ MHD_USE_DUAL_STACK |
+ MHD_USE_EPOLL |
+ MHD_USE_ITC;
+
+ const union MHD_DaemonInfo *info;
+ int r, epoll_fd;
+ MHDDaemonWrapper *d;
+
+ assert(fd >= 0);
+
+ r = fd_nonblock(fd, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make fd:%d nonblocking: %m", fd);
+
+/* MHD_OPTION_STRICT_FOR_CLIENT is introduced in microhttpd 0.9.54,
+ * and MHD_USE_PEDANTIC_CHECKS will be deprecated in future.
+ * If MHD_USE_PEDANTIC_CHECKS is '#define'd, then it is deprecated
+ * and we should use MHD_OPTION_STRICT_FOR_CLIENT. On the other hand,
+ * if MHD_USE_PEDANTIC_CHECKS is not '#define'd, then it is not
+ * deprecated yet and there exists an enum element with the same name.
+ * So we can safely use it. */
+#ifdef MHD_USE_PEDANTIC_CHECKS
+ opts[opts_pos++] = (struct MHD_OptionItem)
+ {MHD_OPTION_STRICT_FOR_CLIENT, 1};
+#else
+ flags |= MHD_USE_PEDANTIC_CHECKS;
+#endif
+
+ if (key) {
+ assert(cert);
+
+ opts[opts_pos++] = (struct MHD_OptionItem)
+ {MHD_OPTION_HTTPS_MEM_KEY, 0, (char*) key};
+ opts[opts_pos++] = (struct MHD_OptionItem)
+ {MHD_OPTION_HTTPS_MEM_CERT, 0, (char*) cert};
+
+ flags |= MHD_USE_TLS;
+
+ if (trust)
+ opts[opts_pos++] = (struct MHD_OptionItem)
+ {MHD_OPTION_HTTPS_MEM_TRUST, 0, (char*) trust};
+ }
+
+ d = new(MHDDaemonWrapper, 1);
+ if (!d)
+ return log_oom();
+
+ d->fd = (uint64_t) fd;
+
+ d->daemon = MHD_start_daemon(flags, 0,
+ NULL, NULL,
+ request_handler, NULL,
+ MHD_OPTION_ARRAY, opts,
+ MHD_OPTION_END);
+ if (!d->daemon) {
+ log_error("Failed to start µhttp daemon");
+ r = -EINVAL;
+ goto error;
+ }
+
+ log_debug("Started MHD %s daemon on fd:%d (wrapper @ %p)",
+ key ? "HTTPS" : "HTTP", fd, d);
+
+ info = MHD_get_daemon_info(d->daemon, MHD_DAEMON_INFO_EPOLL_FD_LINUX_ONLY);
+ if (!info) {
+ log_error("µhttp returned NULL daemon info");
+ r = -EOPNOTSUPP;
+ goto error;
+ }
+
+ epoll_fd = info->listen_fd;
+ if (epoll_fd < 0) {
+ log_error("µhttp epoll fd is invalid");
+ r = -EUCLEAN;
+ goto error;
+ }
+
+ r = sd_event_add_io(s->events, &d->io_event,
+ epoll_fd, EPOLLIN,
+ dispatch_http_event, d);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add event callback: %m");
+ goto error;
+ }
+
+ r = sd_event_source_set_description(d->io_event, "io_event");
+ if (r < 0) {
+ log_error_errno(r, "Failed to set source name: %m");
+ goto error;
+ }
+
+ r = sd_event_add_time(s->events, &d->timer_event,
+ CLOCK_MONOTONIC, (uint64_t) -1, 0,
+ null_timer_event_handler, d);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add timer_event: %m");
+ goto error;
+ }
+
+ r = sd_event_source_set_description(d->timer_event, "timer_event");
+ if (r < 0) {
+ log_error_errno(r, "Failed to set source name: %m");
+ goto error;
+ }
+
+ r = hashmap_ensure_allocated(&s->daemons, &uint64_hash_ops);
+ if (r < 0) {
+ log_oom();
+ goto error;
+ }
+
+ r = hashmap_put(s->daemons, &d->fd, d);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add daemon to hashmap: %m");
+ goto error;
+ }
+
+ s->active++;
+ return 0;
+
+error:
+ MHD_stop_daemon(d->daemon);
+ free(d->daemon);
+ free(d);
+ return r;
+}
+
+static int setup_microhttpd_socket(RemoteServer *s,
+ const char *address,
+ const char *key,
+ const char *cert,
+ const char *trust) {
+ int fd;
+
+ fd = make_socket_fd(LOG_DEBUG, address, SOCK_STREAM, SOCK_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ return setup_microhttpd_server(s, fd, key, cert, trust);
+}
+
+static int null_timer_event_handler(sd_event_source *timer_event,
+ uint64_t usec,
+ void *userdata) {
+ return dispatch_http_event(timer_event, 0, 0, userdata);
+}
+
+static int dispatch_http_event(sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ void *userdata) {
+ MHDDaemonWrapper *d = userdata;
+ int r;
+ MHD_UNSIGNED_LONG_LONG timeout = ULLONG_MAX;
+
+ assert(d);
+
+ r = MHD_run(d->daemon);
+ if (r == MHD_NO)
+ // FIXME: unregister daemon
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "MHD_run failed!");
+ if (MHD_get_timeout(d->daemon, &timeout) == MHD_NO)
+ timeout = ULLONG_MAX;
+
+ r = sd_event_source_set_time(d->timer_event, timeout);
+ if (r < 0) {
+ log_warning_errno(r, "Unable to set event loop timeout: %m, this may result in indefinite blocking!");
+ return 1;
+ }
+
+ r = sd_event_source_set_enabled(d->timer_event, SD_EVENT_ON);
+ if (r < 0)
+ log_warning_errno(r, "Unable to enable timer_event: %m, this may result in indefinite blocking!");
+
+ return 1; /* work to do */
+}
+
+/**********************************************************************
+ **********************************************************************
+ **********************************************************************/
+
+static int setup_signals(RemoteServer *s) {
+ int r;
+
+ assert(s);
+
+ assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, -1) >= 0);
+
+ r = sd_event_add_signal(s->events, &s->sigterm_event, SIGTERM, NULL, s);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(s->events, &s->sigint_event, SIGINT, NULL, s);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int setup_raw_socket(RemoteServer *s, const char *address) {
+ int fd;
+
+ fd = make_socket_fd(LOG_INFO, address, SOCK_STREAM, SOCK_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ return journal_remote_add_raw_socket(s, fd);
+}
+
+static int create_remoteserver(
+ RemoteServer *s,
+ const char* key,
+ const char* cert,
+ const char* trust) {
+
+ int r, n, fd;
+ char **file;
+
+ r = journal_remote_server_init(s, arg_output, arg_split_mode, arg_compress, arg_seal);
+ if (r < 0)
+ return r;
+
+ r = setup_signals(s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up signals: %m");
+
+ n = sd_listen_fds(true);
+ if (n < 0)
+ return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
+ else
+ log_debug("Received %d descriptors", n);
+
+ if (MAX(http_socket, https_socket) >= SD_LISTEN_FDS_START + n)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADFD),
+ "Received fewer sockets than expected");
+
+ for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
+ if (sd_is_socket(fd, AF_UNSPEC, 0, true)) {
+ log_debug("Received a listening socket (fd:%d)", fd);
+
+ if (fd == http_socket)
+ r = setup_microhttpd_server(s, fd, NULL, NULL, NULL);
+ else if (fd == https_socket)
+ r = setup_microhttpd_server(s, fd, key, cert, trust);
+ else
+ r = journal_remote_add_raw_socket(s, fd);
+ } else if (sd_is_socket(fd, AF_UNSPEC, 0, false)) {
+ char *hostname;
+
+ r = getpeername_pretty(fd, false, &hostname);
+ if (r < 0)
+ return log_error_errno(r, "Failed to retrieve remote name: %m");
+
+ log_debug("Received a connection socket (fd:%d) from %s", fd, hostname);
+
+ r = journal_remote_add_source(s, fd, hostname, true);
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown socket passed on fd:%d", fd);
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to register socket (fd:%d): %m", fd);
+ }
+
+ if (arg_getter) {
+ log_info("Spawning getter %s...", arg_getter);
+ fd = spawn_getter(arg_getter);
+ if (fd < 0)
+ return fd;
+
+ r = journal_remote_add_source(s, fd, (char*) arg_output, false);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_url) {
+ const char *url, *hostname;
+
+ if (!strstr(arg_url, "/entries")) {
+ if (endswith(arg_url, "/"))
+ url = strjoina(arg_url, "entries");
+ else
+ url = strjoina(arg_url, "/entries");
+ } else
+ url = strdupa(arg_url);
+
+ log_info("Spawning curl %s...", url);
+ fd = spawn_curl(url);
+ if (fd < 0)
+ return fd;
+
+ hostname = STARTSWITH_SET(arg_url, "https://", "http://");
+ if (!hostname)
+ hostname = arg_url;
+
+ hostname = strndupa(hostname, strcspn(hostname, "/:"));
+
+ r = journal_remote_add_source(s, fd, (char *) hostname, false);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_listen_raw) {
+ log_debug("Listening on a socket...");
+ r = setup_raw_socket(s, arg_listen_raw);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_listen_http) {
+ r = setup_microhttpd_socket(s, arg_listen_http, NULL, NULL, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_listen_https) {
+ r = setup_microhttpd_socket(s, arg_listen_https, key, cert, trust);
+ if (r < 0)
+ return r;
+ }
+
+ STRV_FOREACH(file, arg_files) {
+ const char *output_name;
+
+ if (streq(*file, "-")) {
+ log_debug("Using standard input as source.");
+
+ fd = STDIN_FILENO;
+ output_name = "stdin";
+ } else {
+ log_debug("Reading file %s...", *file);
+
+ fd = open(*file, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open %s: %m", *file);
+ output_name = *file;
+ }
+
+ r = journal_remote_add_source(s, fd, (char*) output_name, false);
+ if (r < 0)
+ return r;
+ }
+
+ if (s->active == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Zero sources specified");
+
+ if (arg_split_mode == JOURNAL_WRITE_SPLIT_NONE) {
+ /* In this case we know what the writer will be
+ called, so we can create it and verify that we can
+ create output as expected. */
+ r = journal_remote_get_writer(s, NULL, &s->_single_writer);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int negative_fd(const char *spec) {
+ /* Return a non-positive number as its inverse, -EINVAL otherwise. */
+
+ int fd, r;
+
+ r = safe_atoi(spec, &fd);
+ if (r < 0)
+ return r;
+
+ if (fd > 0)
+ return -EINVAL;
+ else
+ return -fd;
+}
+
+static int parse_config(void) {
+ const ConfigTableItem items[] = {
+ { "Remote", "Seal", config_parse_bool, 0, &arg_seal },
+ { "Remote", "SplitMode", config_parse_write_split_mode, 0, &arg_split_mode },
+ { "Remote", "ServerKeyFile", config_parse_path, 0, &arg_key },
+ { "Remote", "ServerCertificateFile", config_parse_path, 0, &arg_cert },
+ { "Remote", "TrustedCertificateFile", config_parse_path, 0, &arg_trust },
+ {}
+ };
+
+ return config_parse_many_nulstr(
+ PKGSYSCONFDIR "/journal-remote.conf",
+ CONF_PATHS_NULSTR("systemd/journal-remote.conf.d"),
+ "Remote\0",
+ config_item_table_lookup, items,
+ CONFIG_PARSE_WARN,
+ NULL,
+ NULL);
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-journal-remote.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] {FILE|-}...\n\n"
+ "Write external journal events to journal file(s).\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --url=URL Read events from systemd-journal-gatewayd at URL\n"
+ " --getter=COMMAND Read events from the output of COMMAND\n"
+ " --listen-raw=ADDR Listen for connections at ADDR\n"
+ " --listen-http=ADDR Listen for HTTP connections at ADDR\n"
+ " --listen-https=ADDR Listen for HTTPS connections at ADDR\n"
+ " -o --output=FILE|DIR Write output to FILE or DIR/external-*.journal\n"
+ " --compress[=BOOL] Use compression in the output journal (default: yes)\n"
+ " --seal[=BOOL] Use event sealing (default: no)\n"
+ " --key=FILENAME SSL key in PEM format (default:\n"
+ " \"" PRIV_KEY_FILE "\")\n"
+ " --cert=FILENAME SSL certificate in PEM format (default:\n"
+ " \"" CERT_FILE "\")\n"
+ " --trust=FILENAME|all SSL CA certificate or disable checking (default:\n"
+ " \"" TRUST_FILE "\")\n"
+ " --gnutls-log=CATEGORY...\n"
+ " Specify a list of gnutls logging categories\n"
+ " --split-mode=none|host How many output files to create\n"
+ "\nNote: file descriptors from sd_listen_fds() will be consumed, too.\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_URL,
+ ARG_LISTEN_RAW,
+ ARG_LISTEN_HTTP,
+ ARG_LISTEN_HTTPS,
+ ARG_GETTER,
+ ARG_SPLIT_MODE,
+ ARG_COMPRESS,
+ ARG_SEAL,
+ ARG_KEY,
+ ARG_CERT,
+ ARG_TRUST,
+ ARG_GNUTLS_LOG,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "url", required_argument, NULL, ARG_URL },
+ { "getter", required_argument, NULL, ARG_GETTER },
+ { "listen-raw", required_argument, NULL, ARG_LISTEN_RAW },
+ { "listen-http", required_argument, NULL, ARG_LISTEN_HTTP },
+ { "listen-https", required_argument, NULL, ARG_LISTEN_HTTPS },
+ { "output", required_argument, NULL, 'o' },
+ { "split-mode", required_argument, NULL, ARG_SPLIT_MODE },
+ { "compress", optional_argument, NULL, ARG_COMPRESS },
+ { "seal", optional_argument, NULL, ARG_SEAL },
+ { "key", required_argument, NULL, ARG_KEY },
+ { "cert", required_argument, NULL, ARG_CERT },
+ { "trust", required_argument, NULL, ARG_TRUST },
+ { "gnutls-log", required_argument, NULL, ARG_GNUTLS_LOG },
+ {}
+ };
+
+ int c, r;
+ bool type_a, type_b;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "ho:", options, NULL)) >= 0)
+ switch(c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_URL:
+ if (arg_url)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot currently set more than one --url");
+
+ arg_url = optarg;
+ break;
+
+ case ARG_GETTER:
+ if (arg_getter)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot currently use --getter more than once");
+
+ arg_getter = optarg;
+ break;
+
+ case ARG_LISTEN_RAW:
+ if (arg_listen_raw)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot currently use --listen-raw more than once");
+
+ arg_listen_raw = optarg;
+ break;
+
+ case ARG_LISTEN_HTTP:
+ if (arg_listen_http || http_socket >= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot currently use --listen-http more than once");
+
+ r = negative_fd(optarg);
+ if (r >= 0)
+ http_socket = r;
+ else
+ arg_listen_http = optarg;
+ break;
+
+ case ARG_LISTEN_HTTPS:
+ if (arg_listen_https || https_socket >= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot currently use --listen-https more than once");
+
+ r = negative_fd(optarg);
+ if (r >= 0)
+ https_socket = r;
+ else
+ arg_listen_https = optarg;
+
+ break;
+
+ case ARG_KEY:
+ if (arg_key)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Key file specified twice");
+
+ arg_key = strdup(optarg);
+ if (!arg_key)
+ return log_oom();
+
+ break;
+
+ case ARG_CERT:
+ if (arg_cert)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Certificate file specified twice");
+
+ arg_cert = strdup(optarg);
+ if (!arg_cert)
+ return log_oom();
+
+ break;
+
+ case ARG_TRUST:
+ if (arg_trust || arg_trust_all)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Confusing trusted CA configuration");
+
+ if (streq(optarg, "all"))
+ arg_trust_all = true;
+ else {
+#if HAVE_GNUTLS
+ arg_trust = strdup(optarg);
+ if (!arg_trust)
+ return log_oom();
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --trust is not available.");
+#endif
+ }
+
+ break;
+
+ case 'o':
+ if (arg_output)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot use --output/-o more than once");
+
+ arg_output = optarg;
+ break;
+
+ case ARG_SPLIT_MODE:
+ arg_split_mode = journal_write_split_mode_from_string(optarg);
+ if (arg_split_mode == _JOURNAL_WRITE_SPLIT_INVALID)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid split mode: %s", optarg);
+ break;
+
+ case ARG_COMPRESS:
+ if (optarg) {
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse --compress= parameter.");
+
+ arg_compress = !!r;
+ } else
+ arg_compress = true;
+
+ break;
+
+ case ARG_SEAL:
+ if (optarg) {
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse --seal= parameter.");
+
+ arg_seal = !!r;
+ } else
+ arg_seal = true;
+
+ break;
+
+ case ARG_GNUTLS_LOG: {
+#if HAVE_GNUTLS
+ const char* p = optarg;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, ",", 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --gnutls-log= argument: %m");
+ if (r == 0)
+ break;
+
+ if (strv_push(&arg_gnutls_log, word) < 0)
+ return log_oom();
+
+ word = NULL;
+ }
+ break;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --gnutls-log is not available.");
+#endif
+ }
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unknown option code.");
+ }
+
+ if (optind < argc)
+ arg_files = argv + optind;
+
+ type_a = arg_getter || !strv_isempty(arg_files);
+ type_b = arg_url
+ || arg_listen_raw
+ || arg_listen_http || arg_listen_https
+ || sd_listen_fds(false) > 0;
+ if (type_a && type_b)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot use file input or --getter with "
+ "--arg-listen-... or socket activation.");
+ if (type_a) {
+ if (!arg_output)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --output must be specified with file input or --getter.");
+
+ if (!IN_SET(arg_split_mode, JOURNAL_WRITE_SPLIT_NONE, _JOURNAL_WRITE_SPLIT_INVALID))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "For active sources, only --split-mode=none is allowed.");
+
+ arg_split_mode = JOURNAL_WRITE_SPLIT_NONE;
+ }
+
+ if (arg_split_mode == _JOURNAL_WRITE_SPLIT_INVALID)
+ arg_split_mode = JOURNAL_WRITE_SPLIT_HOST;
+
+ if (arg_split_mode == JOURNAL_WRITE_SPLIT_NONE && arg_output) {
+ if (is_dir(arg_output, true) > 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "For SplitMode=none, output must be a file.");
+ if (!endswith(arg_output, ".journal"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "For SplitMode=none, output file name must end with .journal.");
+ }
+
+ if (arg_split_mode == JOURNAL_WRITE_SPLIT_HOST
+ && arg_output && is_dir(arg_output, true) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "For SplitMode=host, output must be a directory.");
+
+ log_debug("Full config: SplitMode=%s Key=%s Cert=%s Trust=%s",
+ journal_write_split_mode_to_string(arg_split_mode),
+ strna(arg_key),
+ strna(arg_cert),
+ strna(arg_trust));
+
+ return 1 /* work to do */;
+}
+
+static int load_certificates(char **key, char **cert, char **trust) {
+ int r;
+
+ r = read_full_file_full(AT_FDCWD, arg_key ?: PRIV_KEY_FILE, READ_FULL_FILE_CONNECT_SOCKET, NULL, key, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read key from file '%s': %m",
+ arg_key ?: PRIV_KEY_FILE);
+
+ r = read_full_file_full(AT_FDCWD, arg_cert ?: CERT_FILE, READ_FULL_FILE_CONNECT_SOCKET, NULL, cert, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read certificate from file '%s': %m",
+ arg_cert ?: CERT_FILE);
+
+ if (arg_trust_all)
+ log_info("Certificate checking disabled.");
+ else {
+ r = read_full_file_full(AT_FDCWD, arg_trust ?: TRUST_FILE, READ_FULL_FILE_CONNECT_SOCKET, NULL, trust, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read CA certificate file '%s': %m",
+ arg_trust ?: TRUST_FILE);
+ }
+
+ if ((arg_listen_raw || arg_listen_http) && *trust)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --trust makes all non-HTTPS connections untrusted.");
+
+ return 0;
+}
+
+static int run(int argc, char **argv) {
+ _cleanup_(journal_remote_server_destroy) RemoteServer s = {};
+ _cleanup_(notify_on_cleanup) const char *notify_message = NULL;
+ _cleanup_free_ char *key = NULL, *cert = NULL, *trust = NULL;
+ int r;
+
+ log_show_color(true);
+ log_parse_environment_cli();
+
+ /* The journal merging logic potentially needs a lot of fds. */
+ (void) rlimit_nofile_bump(HIGH_RLIMIT_NOFILE);
+
+ r = parse_config();
+ if (r < 0)
+ return r;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (arg_listen_http || arg_listen_https) {
+ r = setup_gnutls_logger(arg_gnutls_log);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_listen_https || https_socket >= 0) {
+ r = load_certificates(&key, &cert, &trust);
+ if (r < 0)
+ return r;
+
+ s.check_trust = !arg_trust_all;
+ }
+
+ r = create_remoteserver(&s, key, cert, trust);
+ if (r < 0)
+ return r;
+
+ r = sd_event_set_watchdog(s.events, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable watchdog: %m");
+
+ log_debug("Watchdog is %sd.", enable_disable(r > 0));
+
+ log_debug("%s running as pid "PID_FMT,
+ program_invocation_short_name, getpid_cached());
+
+ notify_message = notify_start(NOTIFY_READY, NOTIFY_STOPPING);
+
+ while (s.active) {
+ r = sd_event_get_state(s.events);
+ if (r < 0)
+ return r;
+ if (r == SD_EVENT_FINISHED)
+ break;
+
+ r = sd_event_run(s.events, -1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+ }
+
+ notify_message = NULL;
+ (void) sd_notifyf(false,
+ "STOPPING=1\n"
+ "STATUS=Shutting down after writing %" PRIu64 " entries...", s.event_count);
+
+ log_info("Finishing after writing %" PRIu64 " entries", s.event_count);
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/journal-remote/journal-remote-parse.c b/src/journal-remote/journal-remote-parse.c
new file mode 100644
index 0000000..7bc349c
--- /dev/null
+++ b/src/journal-remote/journal-remote-parse.c
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "journal-remote-parse.h"
+#include "journald-native.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+void source_free(RemoteSource *source) {
+ if (!source)
+ return;
+
+ journal_importer_cleanup(&source->importer);
+
+ log_debug("Writer ref count %i", source->writer->n_ref);
+ writer_unref(source->writer);
+
+ sd_event_source_unref(source->event);
+ sd_event_source_unref(source->buffer_event);
+
+ free(source);
+}
+
+/**
+ * Initialize zero-filled source with given values. On success, takes
+ * ownership of fd, name, and writer, otherwise does not touch them.
+ */
+RemoteSource* source_new(int fd, bool passive_fd, char *name, Writer *writer) {
+ RemoteSource *source;
+
+ log_debug("Creating source for %sfd:%d (%s)",
+ passive_fd ? "passive " : "", fd, name);
+
+ assert(fd >= 0);
+
+ source = new0(RemoteSource, 1);
+ if (!source)
+ return NULL;
+
+ source->importer = JOURNAL_IMPORTER_MAKE(fd);
+ source->importer.passive_fd = passive_fd;
+ source->importer.name = name;
+
+ source->writer = writer;
+
+ return source;
+}
+
+int process_source(RemoteSource *source, bool compress, bool seal) {
+ int r;
+
+ assert(source);
+ assert(source->writer);
+
+ r = journal_importer_process_data(&source->importer);
+ if (r <= 0)
+ return r;
+
+ /* We have a full event */
+ log_trace("Received full event from source@%p fd:%d (%s)",
+ source, source->importer.fd, source->importer.name);
+
+ if (source->importer.iovw.count == 0) {
+ log_warning("Entry with no payload, skipping");
+ goto freeing;
+ }
+
+ assert(source->importer.iovw.iovec);
+
+ r = writer_write(source->writer,
+ &source->importer.iovw,
+ &source->importer.ts,
+ &source->importer.boot_id,
+ compress, seal);
+ if (r == -EBADMSG) {
+ log_error_errno(r, "Entry is invalid, ignoring.");
+ r = 0;
+ } else if (r < 0)
+ log_error_errno(r, "Failed to write entry of %zu bytes: %m",
+ iovw_size(&source->importer.iovw));
+ else
+ r = 1;
+
+ freeing:
+ journal_importer_drop_iovw(&source->importer);
+ return r;
+}
diff --git a/src/journal-remote/journal-remote-parse.h b/src/journal-remote/journal-remote-parse.h
new file mode 100644
index 0000000..a5b51ad
--- /dev/null
+++ b/src/journal-remote/journal-remote-parse.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-event.h"
+
+#include "journal-importer.h"
+#include "journal-remote-write.h"
+
+typedef struct RemoteSource {
+ JournalImporter importer;
+
+ Writer *writer;
+
+ sd_event_source *event;
+ sd_event_source *buffer_event;
+} RemoteSource;
+
+RemoteSource* source_new(int fd, bool passive_fd, char *name, Writer *writer);
+void source_free(RemoteSource *source);
+int process_source(RemoteSource *source, bool compress, bool seal);
diff --git a/src/journal-remote/journal-remote-write.c b/src/journal-remote/journal-remote-write.c
new file mode 100644
index 0000000..764a3ec
--- /dev/null
+++ b/src/journal-remote/journal-remote-write.c
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "journal-remote.h"
+
+static int do_rotate(JournalFile **f, bool compress, bool seal) {
+ int r = journal_file_rotate(f, compress, (uint64_t) -1, seal, NULL);
+ if (r < 0) {
+ if (*f)
+ log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
+ else
+ log_error_errno(r, "Failed to create rotated journal: %m");
+ }
+
+ return r;
+}
+
+Writer* writer_new(RemoteServer *server) {
+ Writer *w;
+
+ w = new0(Writer, 1);
+ if (!w)
+ return NULL;
+
+ memset(&w->metrics, 0xFF, sizeof(w->metrics));
+
+ w->mmap = mmap_cache_new();
+ if (!w->mmap)
+ return mfree(w);
+
+ w->n_ref = 1;
+ w->server = server;
+
+ return w;
+}
+
+static Writer* writer_free(Writer *w) {
+ if (!w)
+ return NULL;
+
+ if (w->journal) {
+ log_debug("Closing journal file %s.", w->journal->path);
+ journal_file_close(w->journal);
+ }
+
+ if (w->server && w->hashmap_key)
+ hashmap_remove(w->server->writers, w->hashmap_key);
+
+ free(w->hashmap_key);
+
+ if (w->mmap)
+ mmap_cache_unref(w->mmap);
+
+ return mfree(w);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(Writer, writer, writer_free);
+
+int writer_write(Writer *w,
+ struct iovec_wrapper *iovw,
+ dual_timestamp *ts,
+ sd_id128_t *boot_id,
+ bool compress,
+ bool seal) {
+ int r;
+
+ assert(w);
+ assert(iovw);
+ assert(iovw->count > 0);
+
+ if (journal_file_rotate_suggested(w->journal, 0)) {
+ log_info("%s: Journal header limits reached or header out-of-date, rotating",
+ w->journal->path);
+ r = do_rotate(&w->journal, compress, seal);
+ if (r < 0)
+ return r;
+ }
+
+ r = journal_file_append_entry(w->journal, ts, boot_id,
+ iovw->iovec, iovw->count,
+ &w->seqnum, NULL, NULL);
+ if (r >= 0) {
+ if (w->server)
+ w->server->event_count += 1;
+ return 0;
+ } else if (r == -EBADMSG)
+ return r;
+
+ log_debug_errno(r, "%s: Write failed, rotating: %m", w->journal->path);
+ r = do_rotate(&w->journal, compress, seal);
+ if (r < 0)
+ return r;
+ else
+ log_debug("%s: Successfully rotated journal", w->journal->path);
+
+ log_debug("Retrying write.");
+ r = journal_file_append_entry(w->journal, ts, boot_id,
+ iovw->iovec, iovw->count,
+ &w->seqnum, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ if (w->server)
+ w->server->event_count += 1;
+ return 0;
+}
diff --git a/src/journal-remote/journal-remote-write.h b/src/journal-remote/journal-remote-write.h
new file mode 100644
index 0000000..46b5521
--- /dev/null
+++ b/src/journal-remote/journal-remote-write.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "journal-file.h"
+#include "journal-importer.h"
+
+typedef struct RemoteServer RemoteServer;
+
+typedef struct Writer {
+ JournalFile *journal;
+ JournalMetrics metrics;
+
+ MMapCache *mmap;
+ RemoteServer *server;
+ char *hashmap_key;
+
+ uint64_t seqnum;
+
+ unsigned n_ref;
+} Writer;
+
+Writer* writer_new(RemoteServer* server);
+Writer* writer_ref(Writer *w);
+Writer* writer_unref(Writer *w);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Writer*, writer_unref);
+
+int writer_write(Writer *s,
+ struct iovec_wrapper *iovw,
+ dual_timestamp *ts,
+ sd_id128_t *boot_id,
+ bool compress,
+ bool seal);
+
+typedef enum JournalWriteSplitMode {
+ JOURNAL_WRITE_SPLIT_NONE,
+ JOURNAL_WRITE_SPLIT_HOST,
+ _JOURNAL_WRITE_SPLIT_MAX,
+ _JOURNAL_WRITE_SPLIT_INVALID = -1
+} JournalWriteSplitMode;
diff --git a/src/journal-remote/journal-remote.c b/src/journal-remote/journal-remote.c
new file mode 100644
index 0000000..0cee844
--- /dev/null
+++ b/src/journal-remote/journal-remote.c
@@ -0,0 +1,536 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/prctl.h>
+#include <stdint.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "def.h"
+#include "errno-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "journal-file.h"
+#include "journal-remote-write.h"
+#include "journal-remote.h"
+#include "journald-native.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+#define REMOTE_JOURNAL_PATH "/var/log/journal/remote"
+
+#define filename_escape(s) xescape((s), "/ ")
+
+static int open_output(RemoteServer *s, Writer *w, const char* host) {
+ _cleanup_free_ char *_filename = NULL;
+ const char *filename;
+ int r;
+
+ switch (s->split_mode) {
+ case JOURNAL_WRITE_SPLIT_NONE:
+ filename = s->output;
+ break;
+
+ case JOURNAL_WRITE_SPLIT_HOST: {
+ _cleanup_free_ char *name;
+
+ assert(host);
+
+ name = filename_escape(host);
+ if (!name)
+ return log_oom();
+
+ r = asprintf(&_filename, "%s/remote-%s.journal", s->output, name);
+ if (r < 0)
+ return log_oom();
+
+ filename = _filename;
+ break;
+ }
+
+ default:
+ assert_not_reached("what?");
+ }
+
+ r = journal_file_open_reliably(filename,
+ O_RDWR|O_CREAT, 0640,
+ s->compress, (uint64_t) -1, s->seal,
+ &w->metrics,
+ w->mmap, NULL,
+ NULL, &w->journal);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open output journal %s: %m", filename);
+
+ log_debug("Opened output file %s", w->journal->path);
+ return 0;
+}
+
+/**********************************************************************
+ **********************************************************************
+ **********************************************************************/
+
+static int init_writer_hashmap(RemoteServer *s) {
+ static const struct hash_ops* const hash_ops[] = {
+ [JOURNAL_WRITE_SPLIT_NONE] = NULL,
+ [JOURNAL_WRITE_SPLIT_HOST] = &string_hash_ops,
+ };
+
+ assert(s);
+ assert(s->split_mode >= 0 && s->split_mode < (int) ELEMENTSOF(hash_ops));
+
+ s->writers = hashmap_new(hash_ops[s->split_mode]);
+ if (!s->writers)
+ return log_oom();
+
+ return 0;
+}
+
+int journal_remote_get_writer(RemoteServer *s, const char *host, Writer **writer) {
+ _cleanup_(writer_unrefp) Writer *w = NULL;
+ const void *key;
+ int r;
+
+ switch(s->split_mode) {
+ case JOURNAL_WRITE_SPLIT_NONE:
+ key = "one and only";
+ break;
+
+ case JOURNAL_WRITE_SPLIT_HOST:
+ assert(host);
+ key = host;
+ break;
+
+ default:
+ assert_not_reached("what split mode?");
+ }
+
+ w = hashmap_get(s->writers, key);
+ if (w)
+ writer_ref(w);
+ else {
+ w = writer_new(s);
+ if (!w)
+ return log_oom();
+
+ if (s->split_mode == JOURNAL_WRITE_SPLIT_HOST) {
+ w->hashmap_key = strdup(key);
+ if (!w->hashmap_key)
+ return log_oom();
+ }
+
+ r = open_output(s, w, host);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(s->writers, w->hashmap_key ?: key, w);
+ if (r < 0)
+ return r;
+ }
+
+ *writer = TAKE_PTR(w);
+
+ return 0;
+}
+
+/**********************************************************************
+ **********************************************************************
+ **********************************************************************/
+
+/* This should go away as soon as µhttpd allows state to be passed around. */
+RemoteServer *journal_remote_server_global;
+
+static int dispatch_raw_source_event(sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ void *userdata);
+static int dispatch_raw_source_until_block(sd_event_source *event,
+ void *userdata);
+static int dispatch_blocking_source_event(sd_event_source *event,
+ void *userdata);
+static int dispatch_raw_connection_event(sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ void *userdata);
+
+static int get_source_for_fd(RemoteServer *s,
+ int fd, char *name, RemoteSource **source) {
+ Writer *writer;
+ int r;
+
+ /* This takes ownership of name, but only on success. */
+
+ assert(fd >= 0);
+ assert(source);
+
+ if (!GREEDY_REALLOC0(s->sources, s->sources_size, fd + 1))
+ return log_oom();
+
+ r = journal_remote_get_writer(s, name, &writer);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to get writer for source %s: %m",
+ name);
+
+ if (!s->sources[fd]) {
+ s->sources[fd] = source_new(fd, false, name, writer);
+ if (!s->sources[fd]) {
+ writer_unref(writer);
+ return log_oom();
+ }
+
+ s->active++;
+ }
+
+ *source = s->sources[fd];
+ return 0;
+}
+
+static int remove_source(RemoteServer *s, int fd) {
+ RemoteSource *source;
+
+ assert(s);
+ assert(fd >= 0 && fd < (ssize_t) s->sources_size);
+
+ source = s->sources[fd];
+ if (source) {
+ /* this closes fd too */
+ source_free(source);
+ s->sources[fd] = NULL;
+ s->active--;
+ }
+
+ return 0;
+}
+
+int journal_remote_add_source(RemoteServer *s, int fd, char* name, bool own_name) {
+ RemoteSource *source = NULL;
+ int r;
+
+ /* This takes ownership of name, even on failure, if own_name is true. */
+
+ assert(s);
+ assert(fd >= 0);
+ assert(name);
+
+ if (!own_name) {
+ name = strdup(name);
+ if (!name)
+ return log_oom();
+ }
+
+ r = get_source_for_fd(s, fd, name, &source);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create source for fd:%d (%s): %m",
+ fd, name);
+ free(name);
+ return r;
+ }
+
+ r = sd_event_add_io(s->events, &source->event,
+ fd, EPOLLIN|EPOLLRDHUP|EPOLLPRI,
+ dispatch_raw_source_event, source);
+ if (r == 0) {
+ /* Add additional source for buffer processing. It will be
+ * enabled later. */
+ r = sd_event_add_defer(s->events, &source->buffer_event,
+ dispatch_raw_source_until_block, source);
+ if (r == 0)
+ sd_event_source_set_enabled(source->buffer_event, SD_EVENT_OFF);
+ } else if (r == -EPERM) {
+ log_debug("Falling back to sd_event_add_defer for fd:%d (%s)", fd, name);
+ r = sd_event_add_defer(s->events, &source->event,
+ dispatch_blocking_source_event, source);
+ if (r == 0)
+ sd_event_source_set_enabled(source->event, SD_EVENT_ON);
+ }
+ if (r < 0) {
+ log_error_errno(r, "Failed to register event source for fd:%d: %m",
+ fd);
+ goto error;
+ }
+
+ r = sd_event_source_set_description(source->event, name);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set source name for fd:%d: %m", fd);
+ goto error;
+ }
+
+ return 1; /* work to do */
+
+ error:
+ remove_source(s, fd);
+ return r;
+}
+
+int journal_remote_add_raw_socket(RemoteServer *s, int fd) {
+ int r;
+ _cleanup_close_ int fd_ = fd;
+ char name[STRLEN("raw-socket-") + DECIMAL_STR_MAX(int) + 1];
+
+ assert(fd >= 0);
+
+ r = sd_event_add_io(s->events, &s->listen_event,
+ fd, EPOLLIN,
+ dispatch_raw_connection_event, s);
+ if (r < 0)
+ return r;
+
+ xsprintf(name, "raw-socket-%d", fd);
+
+ r = sd_event_source_set_description(s->listen_event, name);
+ if (r < 0)
+ return r;
+
+ fd_ = -1;
+ s->active++;
+ return 0;
+}
+
+/**********************************************************************
+ **********************************************************************
+ **********************************************************************/
+
+int journal_remote_server_init(
+ RemoteServer *s,
+ const char *output,
+ JournalWriteSplitMode split_mode,
+ bool compress,
+ bool seal) {
+
+ int r;
+
+ assert(s);
+
+ assert(journal_remote_server_global == NULL);
+ journal_remote_server_global = s;
+
+ s->split_mode = split_mode;
+ s->compress = compress;
+ s->seal = seal;
+
+ if (output)
+ s->output = output;
+ else if (split_mode == JOURNAL_WRITE_SPLIT_NONE)
+ s->output = REMOTE_JOURNAL_PATH "/remote.journal";
+ else if (split_mode == JOURNAL_WRITE_SPLIT_HOST)
+ s->output = REMOTE_JOURNAL_PATH;
+ else
+ assert_not_reached("bad split mode");
+
+ r = sd_event_default(&s->events);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ r = init_writer_hashmap(s);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+#if HAVE_MICROHTTPD
+static void MHDDaemonWrapper_free(MHDDaemonWrapper *d) {
+ MHD_stop_daemon(d->daemon);
+ sd_event_source_unref(d->io_event);
+ sd_event_source_unref(d->timer_event);
+ free(d);
+}
+#endif
+
+void journal_remote_server_destroy(RemoteServer *s) {
+ size_t i;
+
+#if HAVE_MICROHTTPD
+ hashmap_free_with_destructor(s->daemons, MHDDaemonWrapper_free);
+#endif
+
+ assert(s->sources_size == 0 || s->sources);
+ for (i = 0; i < s->sources_size; i++)
+ remove_source(s, i);
+ free(s->sources);
+
+ writer_unref(s->_single_writer);
+ hashmap_free(s->writers);
+
+ sd_event_source_unref(s->sigterm_event);
+ sd_event_source_unref(s->sigint_event);
+ sd_event_source_unref(s->listen_event);
+ sd_event_unref(s->events);
+
+ if (s == journal_remote_server_global)
+ journal_remote_server_global = NULL;
+
+ /* fds that we're listening on remain open... */
+}
+
+/**********************************************************************
+ **********************************************************************
+ **********************************************************************/
+
+int journal_remote_handle_raw_source(
+ sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ RemoteServer *s) {
+
+ RemoteSource *source;
+ int r;
+
+ /* Returns 1 if there might be more data pending,
+ * 0 if data is currently exhausted, negative on error.
+ */
+
+ assert(fd >= 0 && fd < (ssize_t) s->sources_size);
+ source = s->sources[fd];
+ assert(source->importer.fd == fd);
+
+ r = process_source(source, s->compress, s->seal);
+ if (journal_importer_eof(&source->importer)) {
+ size_t remaining;
+
+ log_debug("EOF reached with source %s (fd=%d)",
+ source->importer.name, source->importer.fd);
+
+ remaining = journal_importer_bytes_remaining(&source->importer);
+ if (remaining > 0)
+ log_notice("Premature EOF. %zu bytes lost.", remaining);
+ remove_source(s, source->importer.fd);
+ log_debug("%zu active sources remaining", s->active);
+ return 0;
+ } else if (r == -E2BIG) {
+ log_notice("Entry with too many fields, skipped");
+ return 1;
+ } else if (r == -ENOBUFS) {
+ log_notice("Entry too big, skipped");
+ return 1;
+ } else if (r == -EAGAIN) {
+ return 0;
+ } else if (r < 0) {
+ log_debug_errno(r, "Closing connection: %m");
+ remove_source(s, fd);
+ return 0;
+ } else
+ return 1;
+}
+
+static int dispatch_raw_source_until_block(sd_event_source *event,
+ void *userdata) {
+ RemoteSource *source = userdata;
+ int r;
+
+ /* Make sure event stays around even if source is destroyed */
+ sd_event_source_ref(event);
+
+ r = journal_remote_handle_raw_source(event, source->importer.fd, EPOLLIN, journal_remote_server_global);
+ if (r != 1)
+ /* No more data for now */
+ sd_event_source_set_enabled(event, SD_EVENT_OFF);
+
+ sd_event_source_unref(event);
+
+ return r;
+}
+
+static int dispatch_raw_source_event(sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ void *userdata) {
+ RemoteSource *source = userdata;
+ int r;
+
+ assert(source->event);
+ assert(source->buffer_event);
+
+ r = journal_remote_handle_raw_source(event, fd, EPOLLIN, journal_remote_server_global);
+ if (r == 1)
+ /* Might have more data. We need to rerun the handler
+ * until we are sure the buffer is exhausted. */
+ sd_event_source_set_enabled(source->buffer_event, SD_EVENT_ON);
+
+ return r;
+}
+
+static int dispatch_blocking_source_event(sd_event_source *event,
+ void *userdata) {
+ RemoteSource *source = userdata;
+
+ return journal_remote_handle_raw_source(event, source->importer.fd, EPOLLIN, journal_remote_server_global);
+}
+
+static int accept_connection(
+ const char* type,
+ int fd,
+ SocketAddress *addr,
+ char **hostname) {
+
+ _cleanup_close_ int fd2 = -1;
+ int r;
+
+ log_debug("Accepting new %s connection on fd:%d", type, fd);
+ fd2 = accept4(fd, &addr->sockaddr.sa, &addr->size, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (fd2 < 0) {
+ if (ERRNO_IS_ACCEPT_AGAIN(errno))
+ return -EAGAIN;
+
+ return log_error_errno(errno, "accept() on fd:%d failed: %m", fd);
+ }
+
+ switch(socket_address_family(addr)) {
+ case AF_INET:
+ case AF_INET6: {
+ _cleanup_free_ char *a = NULL;
+ char *b;
+
+ r = socket_address_print(addr, &a);
+ if (r < 0)
+ return log_error_errno(r, "socket_address_print(): %m");
+
+ r = socknameinfo_pretty(&addr->sockaddr, addr->size, &b);
+ if (r < 0)
+ return log_error_errno(r, "Resolving hostname failed: %m");
+
+ log_debug("Accepted %s %s connection from %s",
+ type,
+ socket_address_family(addr) == AF_INET ? "IP" : "IPv6",
+ a);
+
+ *hostname = b;
+ return TAKE_FD(fd2);
+ }
+
+ default:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Rejected %s connection with unsupported family %d",
+ type, socket_address_family(addr));
+ }
+}
+
+static int dispatch_raw_connection_event(
+ sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ void *userdata) {
+
+ RemoteServer *s = userdata;
+ int fd2;
+ SocketAddress addr = {
+ .size = sizeof(union sockaddr_union),
+ .type = SOCK_STREAM,
+ };
+ char *hostname = NULL;
+
+ fd2 = accept_connection("raw", fd, &addr, &hostname);
+ if (fd2 == -EAGAIN)
+ return 0;
+ if (fd2 < 0)
+ return fd2;
+
+ return journal_remote_add_source(s, fd2, hostname, true);
+}
diff --git a/src/journal-remote/journal-remote.conf.in b/src/journal-remote/journal-remote.conf.in
new file mode 100644
index 0000000..edc3aba
--- /dev/null
+++ b/src/journal-remote/journal-remote.conf.in
@@ -0,0 +1,19 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See journal-remote.conf(5) for details
+
+[Remote]
+# Seal=false
+# SplitMode=host
+# ServerKeyFile=@CERTIFICATEROOT@/private/journal-remote.pem
+# ServerCertificateFile=@CERTIFICATEROOT@/certs/journal-remote.pem
+# TrustedCertificateFile=@CERTIFICATEROOT@/ca/trusted.pem
diff --git a/src/journal-remote/journal-remote.h b/src/journal-remote/journal-remote.h
new file mode 100644
index 0000000..247ffa9
--- /dev/null
+++ b/src/journal-remote/journal-remote.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-event.h"
+
+#include "hashmap.h"
+#include "journal-remote-parse.h"
+#include "journal-remote-write.h"
+
+#if HAVE_MICROHTTPD
+#include "microhttpd-util.h"
+
+typedef struct MHDDaemonWrapper MHDDaemonWrapper;
+
+struct MHDDaemonWrapper {
+ uint64_t fd;
+ struct MHD_Daemon *daemon;
+
+ sd_event_source *io_event;
+ sd_event_source *timer_event;
+};
+#endif
+
+struct RemoteServer {
+ RemoteSource **sources;
+ size_t sources_size;
+ size_t active;
+
+ sd_event *events;
+ sd_event_source *sigterm_event, *sigint_event, *listen_event;
+
+ Hashmap *writers;
+ Writer *_single_writer;
+ uint64_t event_count;
+
+#if HAVE_MICROHTTPD
+ Hashmap *daemons;
+#endif
+ const char *output; /* either the output file or directory */
+
+ JournalWriteSplitMode split_mode;
+ bool compress;
+ bool seal;
+ bool check_trust;
+};
+extern RemoteServer *journal_remote_server_global;
+
+int journal_remote_server_init(
+ RemoteServer *s,
+ const char *output,
+ JournalWriteSplitMode split_mode,
+ bool compress,
+ bool seal);
+
+int journal_remote_get_writer(RemoteServer *s, const char *host, Writer **writer);
+
+int journal_remote_add_source(RemoteServer *s, int fd, char* name, bool own_name);
+int journal_remote_add_raw_socket(RemoteServer *s, int fd);
+int journal_remote_handle_raw_source(
+ sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ RemoteServer *s);
+
+void journal_remote_server_destroy(RemoteServer *s);
diff --git a/src/journal-remote/journal-upload-journal.c b/src/journal-remote/journal-upload-journal.c
new file mode 100644
index 0000000..3296c22
--- /dev/null
+++ b/src/journal-remote/journal-upload-journal.c
@@ -0,0 +1,414 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <curl/curl.h>
+#include <stdbool.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "journal-upload.h"
+#include "log.h"
+#include "string-util.h"
+#include "utf8.h"
+#include "util.h"
+
+/**
+ * Write up to size bytes to buf. Return negative on error, and number of
+ * bytes written otherwise. The last case is a kind of an error too.
+ */
+static ssize_t write_entry(char *buf, size_t size, Uploader *u) {
+ int r;
+ size_t pos = 0;
+
+ assert(size <= SSIZE_MAX);
+
+ for (;;) {
+
+ switch(u->entry_state) {
+ case ENTRY_CURSOR: {
+ u->current_cursor = mfree(u->current_cursor);
+
+ r = sd_journal_get_cursor(u->journal, &u->current_cursor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get cursor: %m");
+
+ r = snprintf(buf + pos, size - pos,
+ "__CURSOR=%s\n", u->current_cursor);
+ assert(r >= 0);
+ if ((size_t) r > size - pos)
+ /* not enough space */
+ return pos;
+
+ u->entry_state++;
+
+ if (pos + r == size) {
+ /* exactly one character short, but we don't need it */
+ buf[size - 1] = '\n';
+ return size;
+ }
+
+ pos += r;
+ }
+ _fallthrough_;
+ case ENTRY_REALTIME: {
+ usec_t realtime;
+
+ r = sd_journal_get_realtime_usec(u->journal, &realtime);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get realtime timestamp: %m");
+
+ r = snprintf(buf + pos, size - pos,
+ "__REALTIME_TIMESTAMP="USEC_FMT"\n", realtime);
+ assert(r >= 0);
+ if ((size_t) r > size - pos)
+ /* not enough space */
+ return pos;
+
+ u->entry_state++;
+
+ if (r + pos == size) {
+ /* exactly one character short, but we don't need it */
+ buf[size - 1] = '\n';
+ return size;
+ }
+
+ pos += r;
+ }
+ _fallthrough_;
+ case ENTRY_MONOTONIC: {
+ usec_t monotonic;
+ sd_id128_t boot_id;
+
+ r = sd_journal_get_monotonic_usec(u->journal, &monotonic, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get monotonic timestamp: %m");
+
+ r = snprintf(buf + pos, size - pos,
+ "__MONOTONIC_TIMESTAMP="USEC_FMT"\n", monotonic);
+ assert(r >= 0);
+ if ((size_t) r > size - pos)
+ /* not enough space */
+ return pos;
+
+ u->entry_state++;
+
+ if (r + pos == size) {
+ /* exactly one character short, but we don't need it */
+ buf[size - 1] = '\n';
+ return size;
+ }
+
+ pos += r;
+ }
+ _fallthrough_;
+ case ENTRY_BOOT_ID: {
+ sd_id128_t boot_id;
+ char sid[SD_ID128_STRING_MAX];
+
+ r = sd_journal_get_monotonic_usec(u->journal, NULL, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get monotonic timestamp: %m");
+
+ r = snprintf(buf + pos, size - pos,
+ "_BOOT_ID=%s\n", sd_id128_to_string(boot_id, sid));
+ assert(r >= 0);
+ if ((size_t) r > size - pos)
+ /* not enough space */
+ return pos;
+
+ u->entry_state++;
+
+ if (r + pos == size) {
+ /* exactly one character short, but we don't need it */
+ buf[size - 1] = '\n';
+ return size;
+ }
+
+ pos += r;
+ }
+ _fallthrough_;
+ case ENTRY_NEW_FIELD: {
+ u->field_pos = 0;
+
+ r = sd_journal_enumerate_data(u->journal,
+ &u->field_data,
+ &u->field_length);
+ if (r < 0)
+ return log_error_errno(r, "Failed to move to next field in entry: %m");
+ else if (r == 0) {
+ u->entry_state = ENTRY_OUTRO;
+ continue;
+ }
+
+ /* We already printed the boot id from the data in
+ * the header, hence let's suppress it here */
+ if (memory_startswith(u->field_data, u->field_length, "_BOOT_ID="))
+ continue;
+
+ if (!utf8_is_printable_newline(u->field_data, u->field_length, false)) {
+ u->entry_state = ENTRY_BINARY_FIELD_START;
+ continue;
+ }
+
+ u->entry_state++;
+ }
+ _fallthrough_;
+ case ENTRY_TEXT_FIELD:
+ case ENTRY_BINARY_FIELD: {
+ bool done;
+ size_t tocopy;
+
+ done = size - pos > u->field_length - u->field_pos;
+ if (done)
+ tocopy = u->field_length - u->field_pos;
+ else
+ tocopy = size - pos;
+
+ memcpy(buf + pos,
+ (char*) u->field_data + u->field_pos,
+ tocopy);
+
+ if (done) {
+ buf[pos + tocopy] = '\n';
+ pos += tocopy + 1;
+ u->entry_state = ENTRY_NEW_FIELD;
+ continue;
+ } else {
+ u->field_pos += tocopy;
+ return size;
+ }
+ }
+
+ case ENTRY_BINARY_FIELD_START: {
+ const char *c;
+ size_t len;
+
+ c = memchr(u->field_data, '=', u->field_length);
+ if (!c || c == u->field_data)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid field.");
+
+ len = c - (const char*)u->field_data;
+
+ /* need space for label + '\n' */
+ if (size - pos < len + 1)
+ return pos;
+
+ memcpy(buf + pos, u->field_data, len);
+ buf[pos + len] = '\n';
+ pos += len + 1;
+
+ u->field_pos = len + 1;
+ u->entry_state++;
+ }
+ _fallthrough_;
+ case ENTRY_BINARY_FIELD_SIZE: {
+ uint64_t le64;
+
+ /* need space for uint64_t */
+ if (size - pos < 8)
+ return pos;
+
+ le64 = htole64(u->field_length - u->field_pos);
+ memcpy(buf + pos, &le64, 8);
+ pos += 8;
+
+ u->entry_state++;
+ continue;
+ }
+
+ case ENTRY_OUTRO:
+ /* need space for '\n' */
+ if (size - pos < 1)
+ return pos;
+
+ buf[pos++] = '\n';
+ u->entry_state++;
+ u->entries_sent++;
+
+ return pos;
+
+ default:
+ assert_not_reached("WTF?");
+ }
+ }
+ assert_not_reached("WTF?");
+}
+
+static void check_update_watchdog(Uploader *u) {
+ usec_t after;
+ usec_t elapsed_time;
+
+ if (u->watchdog_usec <= 0)
+ return;
+
+ after = now(CLOCK_MONOTONIC);
+ elapsed_time = usec_sub_unsigned(after, u->watchdog_timestamp);
+ if (elapsed_time > u->watchdog_usec / 2) {
+ log_debug("Update watchdog timer");
+ sd_notify(false, "WATCHDOG=1");
+ u->watchdog_timestamp = after;
+ }
+}
+
+static size_t journal_input_callback(void *buf, size_t size, size_t nmemb, void *userp) {
+ Uploader *u = userp;
+ int r;
+ sd_journal *j;
+ size_t filled = 0;
+ ssize_t w;
+
+ assert(u);
+ assert(nmemb <= SSIZE_MAX / size);
+
+ check_update_watchdog(u);
+
+ j = u->journal;
+
+ while (j && filled < size * nmemb) {
+ if (u->entry_state == ENTRY_DONE) {
+ r = sd_journal_next(j);
+ if (r < 0) {
+ log_error_errno(r, "Failed to move to next entry in journal: %m");
+ return CURL_READFUNC_ABORT;
+ } else if (r == 0) {
+ if (u->input_event)
+ log_debug("No more entries, waiting for journal.");
+ else {
+ log_info("No more entries, closing journal.");
+ close_journal_input(u);
+ }
+
+ u->uploading = false;
+
+ break;
+ }
+
+ u->entry_state = ENTRY_CURSOR;
+ }
+
+ w = write_entry((char*)buf + filled, size * nmemb - filled, u);
+ if (w < 0)
+ return CURL_READFUNC_ABORT;
+ filled += w;
+
+ if (filled == 0) {
+ log_error("Buffer space is too small to write entry.");
+ return CURL_READFUNC_ABORT;
+ } else if (u->entry_state != ENTRY_DONE)
+ /* This means that all available space was used up */
+ break;
+
+ log_debug("Entry %zu (%s) has been uploaded.",
+ u->entries_sent, u->current_cursor);
+ }
+
+ return filled;
+}
+
+void close_journal_input(Uploader *u) {
+ assert(u);
+
+ if (u->journal) {
+ log_debug("Closing journal input.");
+
+ sd_journal_close(u->journal);
+ u->journal = NULL;
+ }
+ u->timeout = 0;
+}
+
+static int process_journal_input(Uploader *u, int skip) {
+ int r;
+
+ if (u->uploading)
+ return 0;
+
+ r = sd_journal_next_skip(u->journal, skip);
+ if (r < 0)
+ return log_error_errno(r, "Failed to skip to next entry: %m");
+ else if (r < skip)
+ return 0;
+
+ /* have data */
+ u->entry_state = ENTRY_CURSOR;
+ return start_upload(u, journal_input_callback, u);
+}
+
+int check_journal_input(Uploader *u) {
+ if (u->input_event) {
+ int r;
+
+ r = sd_journal_process(u->journal);
+ if (r < 0) {
+ log_error_errno(r, "Failed to process journal: %m");
+ close_journal_input(u);
+ return r;
+ }
+
+ if (r == SD_JOURNAL_NOP)
+ return 0;
+ }
+
+ return process_journal_input(u, 1);
+}
+
+static int dispatch_journal_input(sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ void *userp) {
+ Uploader *u = userp;
+
+ assert(u);
+
+ if (u->uploading)
+ return 0;
+
+ log_debug("Detected journal input, checking for new data.");
+ return check_journal_input(u);
+}
+
+int open_journal_for_upload(Uploader *u,
+ sd_journal *j,
+ const char *cursor,
+ bool after_cursor,
+ bool follow) {
+ int fd, r, events;
+
+ u->journal = j;
+
+ sd_journal_set_data_threshold(j, 0);
+
+ if (follow) {
+ fd = sd_journal_get_fd(j);
+ if (fd < 0)
+ return log_error_errno(fd, "sd_journal_get_fd failed: %m");
+
+ events = sd_journal_get_events(j);
+
+ r = sd_journal_reliable_fd(j);
+ assert(r >= 0);
+ if (r > 0)
+ u->timeout = -1;
+ else
+ u->timeout = JOURNAL_UPLOAD_POLL_TIMEOUT;
+
+ r = sd_event_add_io(u->events, &u->input_event,
+ fd, events, dispatch_journal_input, u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register input event: %m");
+
+ log_debug("Listening for journal events on fd:%d, timeout %d",
+ fd, u->timeout == (uint64_t) -1 ? -1 : (int) u->timeout);
+ } else
+ log_debug("Not listening for journal events.");
+
+ if (cursor) {
+ r = sd_journal_seek_cursor(j, cursor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to seek to cursor %s: %m",
+ cursor);
+ }
+
+ return process_journal_input(u, !!after_cursor);
+}
diff --git a/src/journal-remote/journal-upload.c b/src/journal-remote/journal-upload.c
new file mode 100644
index 0000000..bf362d0
--- /dev/null
+++ b/src/journal-remote/journal-upload.c
@@ -0,0 +1,904 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <curl/curl.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <sys/stat.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "build.h"
+#include "conf-parser.h"
+#include "daemon-util.h"
+#include "def.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "glob-util.h"
+#include "journal-upload.h"
+#include "log.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "sigbus.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+#define PRIV_KEY_FILE CERTIFICATE_ROOT "/private/journal-upload.pem"
+#define CERT_FILE CERTIFICATE_ROOT "/certs/journal-upload.pem"
+#define TRUST_FILE CERTIFICATE_ROOT "/ca/trusted.pem"
+#define DEFAULT_PORT 19532
+
+static const char* arg_url = NULL;
+static const char *arg_key = NULL;
+static const char *arg_cert = NULL;
+static const char *arg_trust = NULL;
+static const char *arg_directory = NULL;
+static char **arg_file = NULL;
+static const char *arg_cursor = NULL;
+static bool arg_after_cursor = false;
+static int arg_journal_type = 0;
+static const char *arg_machine = NULL;
+static bool arg_merge = false;
+static int arg_follow = -1;
+static const char *arg_save_state = NULL;
+
+static void close_fd_input(Uploader *u);
+
+#define SERVER_ANSWER_KEEP 2048
+
+#define STATE_FILE "/var/lib/systemd/journal-upload/state"
+
+#define easy_setopt(curl, opt, value, level, cmd) \
+ do { \
+ code = curl_easy_setopt(curl, opt, value); \
+ if (code) { \
+ log_full(level, \
+ "curl_easy_setopt " #opt " failed: %s", \
+ curl_easy_strerror(code)); \
+ cmd; \
+ } \
+ } while (0)
+
+static size_t output_callback(char *buf,
+ size_t size,
+ size_t nmemb,
+ void *userp) {
+ Uploader *u = userp;
+
+ assert(u);
+
+ log_debug("The server answers (%zu bytes): %.*s",
+ size*nmemb, (int)(size*nmemb), buf);
+
+ if (nmemb && !u->answer) {
+ u->answer = strndup(buf, size*nmemb);
+ if (!u->answer)
+ log_warning("Failed to store server answer (%zu bytes): out of memory", size*nmemb);
+ }
+
+ return size * nmemb;
+}
+
+static int check_cursor_updating(Uploader *u) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ if (!u->state_file)
+ return 0;
+
+ r = mkdir_parents(u->state_file, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Cannot create parent directory of state file %s: %m",
+ u->state_file);
+
+ r = fopen_temporary(u->state_file, &f, &temp_path);
+ if (r < 0)
+ return log_error_errno(r, "Cannot save state to %s: %m",
+ u->state_file);
+ (void) unlink(temp_path);
+
+ return 0;
+}
+
+static int update_cursor_state(Uploader *u) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ if (!u->state_file || !u->last_cursor)
+ return 0;
+
+ r = fopen_temporary(u->state_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ fprintf(f,
+ "# This is private data. Do not parse.\n"
+ "LAST_CURSOR=%s\n",
+ u->last_cursor);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, u->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ (void) unlink(u->state_file);
+
+ return log_error_errno(r, "Failed to save state %s: %m", u->state_file);
+}
+
+static int load_cursor_state(Uploader *u) {
+ int r;
+
+ if (!u->state_file)
+ return 0;
+
+ r = parse_env_file(NULL, u->state_file, "LAST_CURSOR", &u->last_cursor);
+ if (r == -ENOENT)
+ log_debug("State file %s is not present.", u->state_file);
+ else if (r < 0)
+ return log_error_errno(r, "Failed to read state file %s: %m",
+ u->state_file);
+ else
+ log_debug("Last cursor was %s", u->last_cursor);
+
+ return 0;
+}
+
+int start_upload(Uploader *u,
+ size_t (*input_callback)(void *ptr,
+ size_t size,
+ size_t nmemb,
+ void *userdata),
+ void *data) {
+ CURLcode code;
+
+ assert(u);
+ assert(input_callback);
+
+ if (!u->header) {
+ struct curl_slist *h;
+
+ h = curl_slist_append(NULL, "Content-Type: application/vnd.fdo.journal");
+ if (!h)
+ return log_oom();
+
+ h = curl_slist_append(h, "Transfer-Encoding: chunked");
+ if (!h) {
+ curl_slist_free_all(h);
+ return log_oom();
+ }
+
+ h = curl_slist_append(h, "Accept: text/plain");
+ if (!h) {
+ curl_slist_free_all(h);
+ return log_oom();
+ }
+
+ u->header = h;
+ }
+
+ if (!u->easy) {
+ CURL *curl;
+
+ curl = curl_easy_init();
+ if (!curl)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOSR),
+ "Call to curl_easy_init failed.");
+
+ /* tell it to POST to the URL */
+ easy_setopt(curl, CURLOPT_POST, 1L,
+ LOG_ERR, return -EXFULL);
+
+ easy_setopt(curl, CURLOPT_ERRORBUFFER, u->error,
+ LOG_ERR, return -EXFULL);
+
+ /* set where to write to */
+ easy_setopt(curl, CURLOPT_WRITEFUNCTION, output_callback,
+ LOG_ERR, return -EXFULL);
+
+ easy_setopt(curl, CURLOPT_WRITEDATA, data,
+ LOG_ERR, return -EXFULL);
+
+ /* set where to read from */
+ easy_setopt(curl, CURLOPT_READFUNCTION, input_callback,
+ LOG_ERR, return -EXFULL);
+
+ easy_setopt(curl, CURLOPT_READDATA, data,
+ LOG_ERR, return -EXFULL);
+
+ /* use our special own mime type and chunked transfer */
+ easy_setopt(curl, CURLOPT_HTTPHEADER, u->header,
+ LOG_ERR, return -EXFULL);
+
+ if (DEBUG_LOGGING)
+ /* enable verbose for easier tracing */
+ easy_setopt(curl, CURLOPT_VERBOSE, 1L, LOG_WARNING, );
+
+ easy_setopt(curl, CURLOPT_USERAGENT,
+ "systemd-journal-upload " GIT_VERSION,
+ LOG_WARNING, );
+
+ if (!streq_ptr(arg_key, "-") && (arg_key || startswith(u->url, "https://"))) {
+ easy_setopt(curl, CURLOPT_SSLKEY, arg_key ?: PRIV_KEY_FILE,
+ LOG_ERR, return -EXFULL);
+ easy_setopt(curl, CURLOPT_SSLCERT, arg_cert ?: CERT_FILE,
+ LOG_ERR, return -EXFULL);
+ }
+
+ if (STRPTR_IN_SET(arg_trust, "-", "all"))
+ easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0,
+ LOG_ERR, return -EUCLEAN);
+ else if (arg_trust || startswith(u->url, "https://"))
+ easy_setopt(curl, CURLOPT_CAINFO, arg_trust ?: TRUST_FILE,
+ LOG_ERR, return -EXFULL);
+
+ if (arg_key || arg_trust)
+ easy_setopt(curl, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1,
+ LOG_WARNING, );
+
+ u->easy = curl;
+ } else {
+ /* truncate the potential old error message */
+ u->error[0] = '\0';
+
+ free(u->answer);
+ u->answer = 0;
+ }
+
+ /* upload to this place */
+ code = curl_easy_setopt(u->easy, CURLOPT_URL, u->url);
+ if (code)
+ return log_error_errno(SYNTHETIC_ERRNO(EXFULL),
+ "curl_easy_setopt CURLOPT_URL failed: %s",
+ curl_easy_strerror(code));
+
+ u->uploading = true;
+
+ return 0;
+}
+
+static size_t fd_input_callback(void *buf, size_t size, size_t nmemb, void *userp) {
+ Uploader *u = userp;
+ ssize_t n;
+
+ assert(u);
+ assert(nmemb < SSIZE_MAX / size);
+
+ if (u->input < 0)
+ return 0;
+
+ assert(!size_multiply_overflow(size, nmemb));
+
+ n = read(u->input, buf, size * nmemb);
+ log_debug("%s: allowed %zu, read %zd", __func__, size*nmemb, n);
+ if (n > 0)
+ return n;
+
+ u->uploading = false;
+ if (n < 0) {
+ log_error_errno(errno, "Aborting transfer after read error on input: %m.");
+ return CURL_READFUNC_ABORT;
+ }
+
+ log_debug("Reached EOF");
+ close_fd_input(u);
+ return 0;
+}
+
+static void close_fd_input(Uploader *u) {
+ assert(u);
+
+ u->input = safe_close(u->input);
+ u->timeout = 0;
+}
+
+static int dispatch_fd_input(sd_event_source *event,
+ int fd,
+ uint32_t revents,
+ void *userp) {
+ Uploader *u = userp;
+
+ assert(u);
+ assert(fd >= 0);
+
+ if (revents & EPOLLHUP) {
+ log_debug("Received HUP");
+ close_fd_input(u);
+ return 0;
+ }
+
+ if (!(revents & EPOLLIN)) {
+ log_warning("Unexpected poll event %"PRIu32".", revents);
+ return -EINVAL;
+ }
+
+ if (u->uploading) {
+ log_warning("dispatch_fd_input called when uploading, ignoring.");
+ return 0;
+ }
+
+ return start_upload(u, fd_input_callback, u);
+}
+
+static int open_file_for_upload(Uploader *u, const char *filename) {
+ int fd, r = 0;
+
+ if (streq(filename, "-"))
+ fd = STDIN_FILENO;
+ else {
+ fd = open(filename, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open %s: %m", filename);
+ }
+
+ u->input = fd;
+
+ if (arg_follow) {
+ r = sd_event_add_io(u->events, &u->input_event,
+ fd, EPOLLIN, dispatch_fd_input, u);
+ if (r < 0) {
+ if (r != -EPERM || arg_follow > 0)
+ return log_error_errno(r, "Failed to register input event: %m");
+
+ /* Normal files should just be consumed without polling. */
+ r = start_upload(u, fd_input_callback, u);
+ }
+ }
+
+ return r;
+}
+
+static int dispatch_sigterm(sd_event_source *event,
+ const struct signalfd_siginfo *si,
+ void *userdata) {
+ Uploader *u = userdata;
+
+ assert(u);
+
+ log_received_signal(LOG_INFO, si);
+
+ close_fd_input(u);
+ close_journal_input(u);
+
+ sd_event_exit(u->events, 0);
+ return 0;
+}
+
+static int setup_signals(Uploader *u) {
+ int r;
+
+ assert(u);
+
+ assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, -1) >= 0);
+
+ r = sd_event_add_signal(u->events, &u->sigterm_event, SIGTERM, dispatch_sigterm, u);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(u->events, &u->sigint_event, SIGINT, dispatch_sigterm, u);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int setup_uploader(Uploader *u, const char *url, const char *state_file) {
+ int r;
+ const char *host, *proto = "";
+
+ assert(u);
+ assert(url);
+
+ *u = (Uploader) {
+ .input = -1
+ };
+
+ host = STARTSWITH_SET(url, "http://", "https://");
+ if (!host) {
+ host = url;
+ proto = "https://";
+ }
+
+ if (strchr(host, ':'))
+ u->url = strjoin(proto, url, "/upload");
+ else {
+ char *t;
+ size_t x;
+
+ t = strdupa(url);
+ x = strlen(t);
+ while (x > 0 && t[x - 1] == '/')
+ t[x - 1] = '\0';
+
+ u->url = strjoin(proto, t, ":" STRINGIFY(DEFAULT_PORT), "/upload");
+ }
+ if (!u->url)
+ return log_oom();
+
+ u->state_file = state_file;
+
+ r = sd_event_default(&u->events);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_default failed: %m");
+
+ r = setup_signals(u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up signals: %m");
+
+ (void) sd_watchdog_enabled(false, &u->watchdog_usec);
+
+ return load_cursor_state(u);
+}
+
+static void destroy_uploader(Uploader *u) {
+ assert(u);
+
+ curl_easy_cleanup(u->easy);
+ curl_slist_free_all(u->header);
+ free(u->answer);
+
+ free(u->last_cursor);
+ free(u->current_cursor);
+
+ free(u->url);
+
+ u->input_event = sd_event_source_unref(u->input_event);
+
+ close_fd_input(u);
+ close_journal_input(u);
+
+ sd_event_source_unref(u->sigterm_event);
+ sd_event_source_unref(u->sigint_event);
+ sd_event_unref(u->events);
+}
+
+static int perform_upload(Uploader *u) {
+ CURLcode code;
+ long status;
+
+ assert(u);
+
+ u->watchdog_timestamp = now(CLOCK_MONOTONIC);
+ code = curl_easy_perform(u->easy);
+ if (code) {
+ if (u->error[0])
+ log_error("Upload to %s failed: %.*s",
+ u->url, (int) sizeof(u->error), u->error);
+ else
+ log_error("Upload to %s failed: %s",
+ u->url, curl_easy_strerror(code));
+ return -EIO;
+ }
+
+ code = curl_easy_getinfo(u->easy, CURLINFO_RESPONSE_CODE, &status);
+ if (code)
+ return log_error_errno(SYNTHETIC_ERRNO(EUCLEAN),
+ "Failed to retrieve response code: %s",
+ curl_easy_strerror(code));
+
+ if (status >= 300)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Upload to %s failed with code %ld: %s",
+ u->url, status, strna(u->answer));
+ else if (status < 200)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Upload to %s finished with unexpected code %ld: %s",
+ u->url, status, strna(u->answer));
+ else
+ log_debug("Upload finished successfully with code %ld: %s",
+ status, strna(u->answer));
+
+ free_and_replace(u->last_cursor, u->current_cursor);
+
+ return update_cursor_state(u);
+}
+
+static int config_parse_path_or_ignore(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *n = NULL;
+ bool fatal = ltype;
+ char **s = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue))
+ goto finalize;
+
+ n = strdup(rvalue);
+ if (!n)
+ return log_oom();
+
+ if (streq(n, "-"))
+ goto finalize;
+
+ r = path_simplify_and_warn(n, PATH_CHECK_ABSOLUTE | (fatal ? PATH_CHECK_FATAL : 0), unit, filename, line, lvalue);
+ if (r < 0)
+ return fatal ? -ENOEXEC : 0;
+
+finalize:
+ return free_and_replace(*s, n);
+}
+
+static int parse_config(void) {
+ const ConfigTableItem items[] = {
+ { "Upload", "URL", config_parse_string, 0, &arg_url },
+ { "Upload", "ServerKeyFile", config_parse_path_or_ignore, 0, &arg_key },
+ { "Upload", "ServerCertificateFile", config_parse_path_or_ignore, 0, &arg_cert },
+ { "Upload", "TrustedCertificateFile", config_parse_path_or_ignore, 0, &arg_trust },
+ {}
+ };
+
+ return config_parse_many_nulstr(
+ PKGSYSCONFDIR "/journal-upload.conf",
+ CONF_PATHS_NULSTR("systemd/journal-upload.conf.d"),
+ "Upload\0",
+ config_item_table_lookup, items,
+ CONFIG_PARSE_WARN,
+ NULL,
+ NULL);
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-journal-upload.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s -u URL {FILE|-}...\n\n"
+ "Upload journal events to a remote server.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " -u --url=URL Upload to this address (default port "
+ STRINGIFY(DEFAULT_PORT) ")\n"
+ " --key=FILENAME Specify key in PEM format (default:\n"
+ " \"" PRIV_KEY_FILE "\")\n"
+ " --cert=FILENAME Specify certificate in PEM format (default:\n"
+ " \"" CERT_FILE "\")\n"
+ " --trust=FILENAME|all Specify CA certificate or disable checking (default:\n"
+ " \"" TRUST_FILE "\")\n"
+ " --system Use the system journal\n"
+ " --user Use the user journal for the current user\n"
+ " -m --merge Use all available journals\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " -D --directory=PATH Use journal files from directory\n"
+ " --file=PATH Use this journal file\n"
+ " --cursor=CURSOR Start at the specified cursor\n"
+ " --after-cursor=CURSOR Start after the specified cursor\n"
+ " --follow[=BOOL] Do [not] wait for input\n"
+ " --save-state[=FILE] Save uploaded cursors (default \n"
+ " " STATE_FILE ")\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_KEY,
+ ARG_CERT,
+ ARG_TRUST,
+ ARG_USER,
+ ARG_SYSTEM,
+ ARG_FILE,
+ ARG_CURSOR,
+ ARG_AFTER_CURSOR,
+ ARG_FOLLOW,
+ ARG_SAVE_STATE,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "url", required_argument, NULL, 'u' },
+ { "key", required_argument, NULL, ARG_KEY },
+ { "cert", required_argument, NULL, ARG_CERT },
+ { "trust", required_argument, NULL, ARG_TRUST },
+ { "system", no_argument, NULL, ARG_SYSTEM },
+ { "user", no_argument, NULL, ARG_USER },
+ { "merge", no_argument, NULL, 'm' },
+ { "machine", required_argument, NULL, 'M' },
+ { "directory", required_argument, NULL, 'D' },
+ { "file", required_argument, NULL, ARG_FILE },
+ { "cursor", required_argument, NULL, ARG_CURSOR },
+ { "after-cursor", required_argument, NULL, ARG_AFTER_CURSOR },
+ { "follow", optional_argument, NULL, ARG_FOLLOW },
+ { "save-state", optional_argument, NULL, ARG_SAVE_STATE },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ opterr = 0;
+
+ while ((c = getopt_long(argc, argv, "hu:mM:D:", options, NULL)) >= 0)
+ switch(c) {
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'u':
+ if (arg_url)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot use more than one --url");
+
+ arg_url = optarg;
+ break;
+
+ case ARG_KEY:
+ if (arg_key)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot use more than one --key");
+
+ arg_key = optarg;
+ break;
+
+ case ARG_CERT:
+ if (arg_cert)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot use more than one --cert");
+
+ arg_cert = optarg;
+ break;
+
+ case ARG_TRUST:
+ if (arg_trust)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot use more than one --trust");
+
+ arg_trust = optarg;
+ break;
+
+ case ARG_SYSTEM:
+ arg_journal_type |= SD_JOURNAL_SYSTEM;
+ break;
+
+ case ARG_USER:
+ arg_journal_type |= SD_JOURNAL_CURRENT_USER;
+ break;
+
+ case 'm':
+ arg_merge = true;
+ break;
+
+ case 'M':
+ if (arg_machine)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot use more than one --machine/-M");
+
+ arg_machine = optarg;
+ break;
+
+ case 'D':
+ if (arg_directory)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot use more than one --directory/-D");
+
+ arg_directory = optarg;
+ break;
+
+ case ARG_FILE:
+ r = glob_extend(&arg_file, optarg, GLOB_NOCHECK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add paths: %m");
+ break;
+
+ case ARG_CURSOR:
+ if (arg_cursor)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot use more than one --cursor/--after-cursor");
+
+ arg_cursor = optarg;
+ break;
+
+ case ARG_AFTER_CURSOR:
+ if (arg_cursor)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "cannot use more than one --cursor/--after-cursor");
+
+ arg_cursor = optarg;
+ arg_after_cursor = true;
+ break;
+
+ case ARG_FOLLOW:
+ if (optarg) {
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse --follow= parameter.");
+
+ arg_follow = !!r;
+ } else
+ arg_follow = true;
+
+ break;
+
+ case ARG_SAVE_STATE:
+ arg_save_state = optarg ?: STATE_FILE;
+ break;
+
+ case '?':
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown option %s.",
+ argv[optind - 1]);
+
+ case ':':
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Missing argument to %s.",
+ argv[optind - 1]);
+
+ default:
+ assert_not_reached("Unhandled option code.");
+ }
+
+ if (!arg_url)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Required --url/-u option missing.");
+
+ if (!!arg_key != !!arg_cert)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Options --key and --cert must be used together.");
+
+ if (optind < argc && (arg_directory || arg_file || arg_machine || arg_journal_type))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Input arguments make no sense with journal input.");
+
+ return 1;
+}
+
+static int open_journal(sd_journal **j) {
+ int r;
+
+ if (arg_directory)
+ r = sd_journal_open_directory(j, arg_directory, arg_journal_type);
+ else if (arg_file)
+ r = sd_journal_open_files(j, (const char**) arg_file, 0);
+ else if (arg_machine) {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+ /* FIXME: replace with D-Bus call OpenMachineRootDirectory() so that things also work with raw disk images */
+ r = sd_journal_open_container(j, arg_machine, 0);
+#pragma GCC diagnostic pop
+ } else
+ r = sd_journal_open(j, !arg_merge*SD_JOURNAL_LOCAL_ONLY + arg_journal_type);
+ if (r < 0)
+ log_error_errno(r, "Failed to open %s: %m",
+ arg_directory ? arg_directory : arg_file ? "files" : "journal");
+ return r;
+}
+
+static int run(int argc, char **argv) {
+ _cleanup_(destroy_uploader) Uploader u = {};
+ _cleanup_(notify_on_cleanup) const char *notify_message = NULL;
+ bool use_journal;
+ int r;
+
+ log_show_color(true);
+ log_parse_environment_cli();
+
+ /* The journal merging logic potentially needs a lot of fds. */
+ (void) rlimit_nofile_bump(HIGH_RLIMIT_NOFILE);
+
+ r = parse_config();
+ if (r < 0)
+ return r;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ sigbus_install();
+
+ r = setup_uploader(&u, arg_url, arg_save_state);
+ if (r < 0)
+ return r;
+
+ sd_event_set_watchdog(u.events, true);
+
+ r = check_cursor_updating(&u);
+ if (r < 0)
+ return r;
+
+ log_debug("%s running as pid "PID_FMT,
+ program_invocation_short_name, getpid_cached());
+
+ use_journal = optind >= argc;
+ if (use_journal) {
+ sd_journal *j;
+ r = open_journal(&j);
+ if (r < 0)
+ return r;
+ r = open_journal_for_upload(&u, j,
+ arg_cursor ?: u.last_cursor,
+ arg_cursor ? arg_after_cursor : true,
+ !!arg_follow);
+ if (r < 0)
+ return r;
+ }
+
+ notify_message = notify_start("READY=1\n"
+ "STATUS=Processing input...",
+ NOTIFY_STOPPING);
+
+ for (;;) {
+ r = sd_event_get_state(u.events);
+ if (r < 0)
+ return r;
+ if (r == SD_EVENT_FINISHED)
+ return 0;
+
+ if (use_journal) {
+ if (!u.journal)
+ return 0;
+
+ r = check_journal_input(&u);
+ } else if (u.input < 0 && !use_journal) {
+ if (optind >= argc)
+ return 0;
+
+ log_debug("Using %s as input.", argv[optind]);
+ r = open_file_for_upload(&u, argv[optind++]);
+ }
+ if (r < 0)
+ return r;
+
+ if (u.uploading) {
+ r = perform_upload(&u);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_run(u.events, u.timeout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+ }
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/journal-remote/journal-upload.conf.in b/src/journal-remote/journal-upload.conf.in
new file mode 100644
index 0000000..5f59a6f
--- /dev/null
+++ b/src/journal-remote/journal-upload.conf.in
@@ -0,0 +1,18 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See journal-upload.conf(5) for details
+
+[Upload]
+# URL=
+# ServerKeyFile=@CERTIFICATEROOT@/private/journal-upload.pem
+# ServerCertificateFile=@CERTIFICATEROOT@/certs/journal-upload.pem
+# TrustedCertificateFile=@CERTIFICATEROOT@/ca/trusted.pem
diff --git a/src/journal-remote/journal-upload.h b/src/journal-remote/journal-upload.h
new file mode 100644
index 0000000..9ff5a7b
--- /dev/null
+++ b/src/journal-remote/journal-upload.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include <inttypes.h>
+
+#include "sd-event.h"
+#include "sd-journal.h"
+
+#include "time-util.h"
+
+typedef enum {
+ ENTRY_CURSOR = 0, /* Nothing actually written yet. */
+ ENTRY_REALTIME,
+ ENTRY_MONOTONIC,
+ ENTRY_BOOT_ID,
+ ENTRY_NEW_FIELD, /* In between fields. */
+ ENTRY_TEXT_FIELD, /* In the middle of a text field. */
+ ENTRY_BINARY_FIELD_START, /* Writing the name of a binary field. */
+ ENTRY_BINARY_FIELD_SIZE, /* Writing the size of a binary field. */
+ ENTRY_BINARY_FIELD, /* In the middle of a binary field. */
+ ENTRY_OUTRO, /* Writing '\n' */
+ ENTRY_DONE, /* Need to move to a new field. */
+} entry_state;
+
+typedef struct Uploader {
+ sd_event *events;
+ sd_event_source *sigint_event, *sigterm_event;
+
+ char *url;
+ CURL *easy;
+ bool uploading;
+ char error[CURL_ERROR_SIZE];
+ struct curl_slist *header;
+ char *answer;
+
+ sd_event_source *input_event;
+ uint64_t timeout;
+
+ /* fd stuff */
+ int input;
+
+ /* journal stuff */
+ sd_journal* journal;
+
+ entry_state entry_state;
+ const void *field_data;
+ size_t field_pos, field_length;
+
+ /* general metrics */
+ const char *state_file;
+
+ size_t entries_sent;
+ char *last_cursor, *current_cursor;
+ usec_t watchdog_timestamp;
+ usec_t watchdog_usec;
+} Uploader;
+
+#define JOURNAL_UPLOAD_POLL_TIMEOUT (10 * USEC_PER_SEC)
+
+int start_upload(Uploader *u,
+ size_t (*input_callback)(void *ptr,
+ size_t size,
+ size_t nmemb,
+ void *userdata),
+ void *data);
+
+int open_journal_for_upload(Uploader *u,
+ sd_journal *j,
+ const char *cursor,
+ bool after_cursor,
+ bool follow);
+void close_journal_input(Uploader *u);
+int check_journal_input(Uploader *u);
diff --git a/src/journal-remote/log-generator.py b/src/journal-remote/log-generator.py
new file mode 100755
index 0000000..e1725b1
--- /dev/null
+++ b/src/journal-remote/log-generator.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+import sys
+import argparse
+
+PARSER = argparse.ArgumentParser()
+PARSER.add_argument('n', type=int)
+PARSER.add_argument('--dots', action='store_true')
+PARSER.add_argument('-m', '--message-size', type=int, default=200)
+PARSER.add_argument('-d', '--data-size', type=int, default=4000)
+PARSER.add_argument('--data-type', choices={'random', 'simple'})
+OPTIONS = PARSER.parse_args()
+
+template = """\
+__CURSOR=s=6863c726210b4560b7048889d8ada5c5;i=3e931;b=f446871715504074bf7049ef0718fa93;m={m:x};t=4fd05c
+__REALTIME_TIMESTAMP={realtime_ts}
+__MONOTONIC_TIMESTAMP={monotonic_ts}
+_BOOT_ID=f446871715504074bf7049ef0718fa93
+_TRANSPORT=syslog
+PRIORITY={priority}
+SYSLOG_FACILITY={facility}
+SYSLOG_IDENTIFIER=/USR/SBIN/CRON
+MESSAGE={message}
+_UID=0
+_GID=0
+_MACHINE_ID=69121ca41d12c1b69a7960174c27b618
+_HOSTNAME=hostname
+SYSLOG_PID=25721
+_PID=25721
+_SOURCE_REALTIME_TIMESTAMP={source_realtime_ts}
+DATA={data}
+"""
+
+m = 0x198603b12d7
+realtime_ts = 1404101101501873
+monotonic_ts = 1753961140951
+source_realtime_ts = 1404101101483516
+priority = 3
+facility = 6
+
+src = open('/dev/urandom', 'rb')
+
+bytes = 0
+counter = 0
+
+for i in range(OPTIONS.n):
+ message = src.read(OPTIONS.message_size)
+ message = repr(message)[2:-1]
+
+ if OPTIONS.data_type == 'random':
+ data = repr(src.read(OPTIONS.data_size))
+ else:
+ # keep the pattern non-repeating so we get a different blob every time
+ data = '{:0{}}'.format(counter, OPTIONS.data_size)
+ counter += 1
+
+ entry = template.format(m=m,
+ realtime_ts=realtime_ts,
+ monotonic_ts=monotonic_ts,
+ source_realtime_ts=source_realtime_ts,
+ priority=priority,
+ facility=facility,
+ message=message,
+ data=data)
+ m += 1
+ realtime_ts += 1
+ monotonic_ts += 1
+ source_realtime_ts += 1
+
+ bytes += len(entry)
+
+ print(entry)
+
+ if OPTIONS.dots:
+ print('.', file=sys.stderr, end='', flush=True)
+
+if OPTIONS.dots:
+ print(file=sys.stderr)
+print('Wrote {} bytes'.format(bytes), file=sys.stderr)
diff --git a/src/journal-remote/meson.build b/src/journal-remote/meson.build
new file mode 100644
index 0000000..4572f4b
--- /dev/null
+++ b/src/journal-remote/meson.build
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+systemd_journal_upload_sources = files('''
+ journal-upload.h
+ journal-upload.c
+ journal-upload-journal.c
+'''.split())
+
+libsystemd_journal_remote_sources = files('''
+ journal-remote-parse.h
+ journal-remote-parse.c
+ journal-remote-write.h
+ journal-remote-write.c
+ journal-remote.h
+ journal-remote.c
+'''.split())
+
+if conf.get('HAVE_MICROHTTPD') == 1
+ libsystemd_journal_remote_sources += files('''
+ microhttpd-util.h
+ microhttpd-util.c
+'''.split())
+endif
+
+libsystemd_journal_remote = static_library(
+ 'systemd-journal-remote',
+ libsystemd_journal_remote_sources,
+ include_directories : includes,
+ dependencies : [threads,
+ libmicrohttpd,
+ libgnutls,
+ libxz,
+ liblz4],
+ install : false)
+
+systemd_journal_remote_sources = files('''
+ journal-remote-main.c
+'''.split())
+
+systemd_journal_gatewayd_sources = files('''
+ journal-gatewayd.c
+ microhttpd-util.h
+ microhttpd-util.c
+'''.split())
+
+if conf.get('ENABLE_REMOTE') ==1 and conf.get('HAVE_LIBCURL') == 1
+ journal_upload_conf = configure_file(
+ input : 'journal-upload.conf.in',
+ output : 'journal-upload.conf',
+ configuration : substs)
+ if install_sysconfdir
+ install_data(journal_upload_conf,
+ install_dir : pkgsysconfdir)
+ endif
+endif
+
+if conf.get('ENABLE_REMOTE') == 1 and conf.get('HAVE_MICROHTTPD') == 1
+ journal_remote_conf = configure_file(
+ input : 'journal-remote.conf.in',
+ output : 'journal-remote.conf',
+ configuration : substs)
+ if install_sysconfdir
+ install_data(journal_remote_conf,
+ install_dir : pkgsysconfdir)
+ endif
+
+ install_data('browse.html',
+ install_dir : join_paths(pkgdatadir, 'gatewayd'))
+
+ if get_option('create-log-dirs')
+ meson.add_install_script('sh', '-c',
+ mkdir_p.format('/var/log/journal/remote'))
+ meson.add_install_script('sh', '-c',
+ '''chown 0:0 $DESTDIR/var/log/journal/remote &&
+ chmod 755 $DESTDIR/var/log/journal/remote || :''')
+ endif
+endif
diff --git a/src/journal-remote/microhttpd-util.c b/src/journal-remote/microhttpd-util.c
new file mode 100644
index 0000000..d3fb0b8
--- /dev/null
+++ b/src/journal-remote/microhttpd-util.c
@@ -0,0 +1,310 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stddef.h>
+#include <stdio.h>
+
+#if HAVE_GNUTLS
+#include <gnutls/gnutls.h>
+#include <gnutls/x509.h>
+#endif
+
+#include "alloc-util.h"
+#include "log.h"
+#include "macro.h"
+#include "microhttpd-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+void microhttpd_logger(void *arg, const char *fmt, va_list ap) {
+ char *f;
+
+ f = strjoina("microhttpd: ", fmt);
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ log_internalv(LOG_INFO, 0, NULL, 0, NULL, f, ap);
+ REENABLE_WARNING;
+}
+
+static int mhd_respond_internal(struct MHD_Connection *connection,
+ enum MHD_RequestTerminationCode code,
+ const char *buffer,
+ size_t size,
+ enum MHD_ResponseMemoryMode mode) {
+ assert(connection);
+
+ _cleanup_(MHD_destroy_responsep) struct MHD_Response *response
+ = MHD_create_response_from_buffer(size, (char*) buffer, mode);
+ if (!response)
+ return MHD_NO;
+
+ log_debug("Queueing response %u: %s", code, buffer);
+ MHD_add_response_header(response, "Content-Type", "text/plain");
+ return MHD_queue_response(connection, code, response);
+}
+
+int mhd_respond(struct MHD_Connection *connection,
+ enum MHD_RequestTerminationCode code,
+ const char *message) {
+
+ const char *fmt;
+
+ fmt = strjoina(message, "\n");
+
+ return mhd_respond_internal(connection, code,
+ fmt, strlen(message) + 1,
+ MHD_RESPMEM_PERSISTENT);
+}
+
+int mhd_respond_oom(struct MHD_Connection *connection) {
+ return mhd_respond(connection, MHD_HTTP_SERVICE_UNAVAILABLE, "Out of memory.");
+}
+
+int mhd_respondf(struct MHD_Connection *connection,
+ int error,
+ enum MHD_RequestTerminationCode code,
+ const char *format, ...) {
+
+ const char *fmt;
+ char *m;
+ int r;
+ va_list ap;
+
+ assert(connection);
+ assert(format);
+
+ if (error < 0)
+ error = -error;
+ errno = -error;
+ fmt = strjoina(format, "\n");
+ va_start(ap, format);
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ r = vasprintf(&m, fmt, ap);
+ REENABLE_WARNING;
+ va_end(ap);
+
+ if (r < 0)
+ return respond_oom(connection);
+
+ return mhd_respond_internal(connection, code, m, r, MHD_RESPMEM_MUST_FREE);
+}
+
+#if HAVE_GNUTLS
+
+static struct {
+ const char *const names[4];
+ int level;
+ bool enabled;
+} gnutls_log_map[] = {
+ { {"0"}, LOG_DEBUG },
+ { {"1", "audit"}, LOG_WARNING, true}, /* gnutls session audit */
+ { {"2", "assert"}, LOG_DEBUG }, /* gnutls assert log */
+ { {"3", "hsk", "ext"}, LOG_DEBUG }, /* gnutls handshake log */
+ { {"4", "rec"}, LOG_DEBUG }, /* gnutls record log */
+ { {"5", "dtls"}, LOG_DEBUG }, /* gnutls DTLS log */
+ { {"6", "buf"}, LOG_DEBUG },
+ { {"7", "write", "read"}, LOG_DEBUG },
+ { {"8"}, LOG_DEBUG },
+ { {"9", "enc", "int"}, LOG_DEBUG },
+};
+
+static void log_func_gnutls(int level, const char *message) {
+ assert_se(message);
+
+ if (0 <= level && level < (int) ELEMENTSOF(gnutls_log_map)) {
+ if (gnutls_log_map[level].enabled)
+ log_internal(gnutls_log_map[level].level, 0, NULL, 0, NULL, "gnutls %d/%s: %s", level, gnutls_log_map[level].names[1], message);
+ } else {
+ log_debug("Received GNUTLS message with unknown level %d.", level);
+ log_internal(LOG_DEBUG, 0, NULL, 0, NULL, "gnutls: %s", message);
+ }
+}
+
+static void log_reset_gnutls_level(void) {
+ int i;
+
+ for (i = ELEMENTSOF(gnutls_log_map) - 1; i >= 0; i--)
+ if (gnutls_log_map[i].enabled) {
+ log_debug("Setting gnutls log level to %d", i);
+ gnutls_global_set_log_level(i);
+ break;
+ }
+}
+
+static int log_enable_gnutls_category(const char *cat) {
+ unsigned i;
+
+ if (streq(cat, "all")) {
+ for (i = 0; i < ELEMENTSOF(gnutls_log_map); i++)
+ gnutls_log_map[i].enabled = true;
+ log_reset_gnutls_level();
+ return 0;
+ } else
+ for (i = 0; i < ELEMENTSOF(gnutls_log_map); i++)
+ if (strv_contains((char**)gnutls_log_map[i].names, cat)) {
+ gnutls_log_map[i].enabled = true;
+ log_reset_gnutls_level();
+ return 0;
+ }
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No such log category: %s", cat);
+}
+
+int setup_gnutls_logger(char **categories) {
+ char **cat;
+ int r;
+
+ gnutls_global_set_log_function(log_func_gnutls);
+
+ if (categories) {
+ STRV_FOREACH(cat, categories) {
+ r = log_enable_gnutls_category(*cat);
+ if (r < 0)
+ return r;
+ }
+ } else
+ log_reset_gnutls_level();
+
+ return 0;
+}
+
+static int verify_cert_authorized(gnutls_session_t session) {
+ unsigned status;
+ gnutls_certificate_type_t type;
+ gnutls_datum_t out;
+ int r;
+
+ r = gnutls_certificate_verify_peers2(session, &status);
+ if (r < 0)
+ return log_error_errno(r, "gnutls_certificate_verify_peers2 failed: %m");
+
+ type = gnutls_certificate_type_get(session);
+ r = gnutls_certificate_verification_status_print(status, type, &out, 0);
+ if (r < 0)
+ return log_error_errno(r, "gnutls_certificate_verification_status_print failed: %m");
+
+ log_debug("Certificate status: %s", out.data);
+ gnutls_free(out.data);
+
+ return status == 0 ? 0 : -EPERM;
+}
+
+static int get_client_cert(gnutls_session_t session, gnutls_x509_crt_t *client_cert) {
+ const gnutls_datum_t *pcert;
+ unsigned listsize;
+ gnutls_x509_crt_t cert;
+ int r;
+
+ assert(session);
+ assert(client_cert);
+
+ pcert = gnutls_certificate_get_peers(session, &listsize);
+ if (!pcert || !listsize)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to retrieve certificate chain");
+
+ r = gnutls_x509_crt_init(&cert);
+ if (r < 0) {
+ log_error("Failed to initialize client certificate");
+ return r;
+ }
+
+ /* Note that by passing values between 0 and listsize here, you
+ can get access to the CA's certs */
+ r = gnutls_x509_crt_import(cert, &pcert[0], GNUTLS_X509_FMT_DER);
+ if (r < 0) {
+ log_error("Failed to import client certificate");
+ gnutls_x509_crt_deinit(cert);
+ return r;
+ }
+
+ *client_cert = cert;
+ return 0;
+}
+
+static int get_auth_dn(gnutls_x509_crt_t client_cert, char **buf) {
+ size_t len = 0;
+ int r;
+
+ assert(buf);
+ assert(*buf == NULL);
+
+ r = gnutls_x509_crt_get_dn(client_cert, NULL, &len);
+ if (r != GNUTLS_E_SHORT_MEMORY_BUFFER) {
+ log_error("gnutls_x509_crt_get_dn failed");
+ return r;
+ }
+
+ *buf = malloc(len);
+ if (!*buf)
+ return log_oom();
+
+ gnutls_x509_crt_get_dn(client_cert, *buf, &len);
+ return 0;
+}
+
+static void gnutls_x509_crt_deinitp(gnutls_x509_crt_t *p) {
+ gnutls_x509_crt_deinit(*p);
+}
+
+int check_permissions(struct MHD_Connection *connection, int *code, char **hostname) {
+ const union MHD_ConnectionInfo *ci;
+ gnutls_session_t session;
+ _cleanup_(gnutls_x509_crt_deinitp) gnutls_x509_crt_t client_cert = NULL;
+ _cleanup_free_ char *buf = NULL;
+ int r;
+
+ assert(connection);
+ assert(code);
+
+ *code = 0;
+
+ ci = MHD_get_connection_info(connection,
+ MHD_CONNECTION_INFO_GNUTLS_SESSION);
+ if (!ci) {
+ log_error("MHD_get_connection_info failed: session is unencrypted");
+ *code = mhd_respond(connection, MHD_HTTP_FORBIDDEN,
+ "Encrypted connection is required");
+ return -EPERM;
+ }
+ session = ci->tls_session;
+ assert(session);
+
+ r = get_client_cert(session, &client_cert);
+ if (r < 0) {
+ *code = mhd_respond(connection, MHD_HTTP_UNAUTHORIZED,
+ "Authorization through certificate is required");
+ return -EPERM;
+ }
+
+ r = get_auth_dn(client_cert, &buf);
+ if (r < 0) {
+ *code = mhd_respond(connection, MHD_HTTP_UNAUTHORIZED,
+ "Failed to determine distinguished name from certificate");
+ return -EPERM;
+ }
+
+ log_debug("Connection from %s", buf);
+
+ if (hostname)
+ *hostname = TAKE_PTR(buf);
+
+ r = verify_cert_authorized(session);
+ if (r < 0) {
+ log_warning("Client is not authorized");
+ *code = mhd_respond(connection, MHD_HTTP_UNAUTHORIZED,
+ "Client certificate not signed by recognized authority");
+ }
+ return r;
+}
+
+#else
+int check_permissions(struct MHD_Connection *connection, int *code, char **hostname) {
+ return -EPERM;
+}
+
+int setup_gnutls_logger(char **categories) {
+ if (categories)
+ log_notice("Ignoring specified gnutls logging categories — gnutls not available.");
+ return 0;
+}
+#endif
diff --git a/src/journal-remote/microhttpd-util.h b/src/journal-remote/microhttpd-util.h
new file mode 100644
index 0000000..7f90a09
--- /dev/null
+++ b/src/journal-remote/microhttpd-util.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <microhttpd.h>
+#include <stdarg.h>
+
+#include "macro.h"
+
+/* Those defines are added when options are renamed. If the old names
+ * are not '#define'd, then they are not deprecated yet and there are
+ * enum elements with the same name. Hence let's check for the *old* name,
+ * and define the new name by the value of the old name. */
+
+/* Renamed in µhttpd 0.9.51 */
+#ifndef MHD_USE_PIPE_FOR_SHUTDOWN
+# define MHD_USE_ITC MHD_USE_PIPE_FOR_SHUTDOWN
+#endif
+
+/* Renamed in µhttpd 0.9.52 */
+#ifndef MHD_USE_EPOLL_LINUX_ONLY
+# define MHD_USE_EPOLL MHD_USE_EPOLL_LINUX_ONLY
+#endif
+
+/* Renamed in µhttpd 0.9.52 */
+#ifndef MHD_USE_SSL
+# define MHD_USE_TLS MHD_USE_SSL
+#endif
+
+/* Renamed in µhttpd 0.9.53 */
+#ifndef MHD_USE_POLL_INTERNALLY
+# define MHD_USE_POLL_INTERNAL_THREAD MHD_USE_POLL_INTERNALLY
+#endif
+
+/* Both the old and new names are defines, check for the new one. */
+
+/* Compatibility with libmicrohttpd < 0.9.38 */
+#ifndef MHD_HTTP_NOT_ACCEPTABLE
+# define MHD_HTTP_NOT_ACCEPTABLE MHD_HTTP_METHOD_NOT_ACCEPTABLE
+#endif
+
+/* Renamed in µhttpd 0.9.53 */
+#ifndef MHD_HTTP_PAYLOAD_TOO_LARGE
+# define MHD_HTTP_PAYLOAD_TOO_LARGE MHD_HTTP_REQUEST_ENTITY_TOO_LARGE
+#endif
+
+#if MHD_VERSION < 0x00094203
+# define MHD_create_response_from_fd_at_offset64 MHD_create_response_from_fd_at_offset
+#endif
+
+#if MHD_VERSION >= 0x00097002
+# define mhd_result enum MHD_Result
+#else
+# define mhd_result int
+#endif
+
+void microhttpd_logger(void *arg, const char *fmt, va_list ap) _printf_(2, 0);
+
+/* respond_oom() must be usable with return, hence this form. */
+#define respond_oom(connection) log_oom(), mhd_respond_oom(connection)
+
+int mhd_respondf(struct MHD_Connection *connection,
+ int error,
+ unsigned code,
+ const char *format, ...) _printf_(4,5);
+
+int mhd_respond(struct MHD_Connection *connection,
+ unsigned code,
+ const char *message);
+
+int mhd_respond_oom(struct MHD_Connection *connection);
+
+int check_permissions(struct MHD_Connection *connection, int *code, char **hostname);
+
+/* Set gnutls internal logging function to a callback which uses our
+ * own logging framework.
+ *
+ * gnutls categories are additionally filtered by our internal log
+ * level, so it should be set fairly high to capture all potentially
+ * interesting events without overwhelming detail.
+ */
+int setup_gnutls_logger(char **categories);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct MHD_Daemon*, MHD_stop_daemon);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct MHD_Response*, MHD_destroy_response);
diff --git a/src/journal/audit-type.c b/src/journal/audit-type.c
new file mode 100644
index 0000000..122cdf5
--- /dev/null
+++ b/src/journal/audit-type.c
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "audit-type.h"
+#include "missing_audit.h"
+
+#include "audit_type-to-name.h"
diff --git a/src/journal/audit-type.h b/src/journal/audit-type.h
new file mode 100644
index 0000000..f2c4898
--- /dev/null
+++ b/src/journal/audit-type.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+
+const char *audit_type_to_string(int type);
+int audit_type_from_string(const char *s);
+
+/* This is inspired by DNS TYPEnnn formatting */
+#define audit_type_name_alloca(type) \
+ ({ \
+ const char *_s_; \
+ _s_ = audit_type_to_string(type); \
+ if (!_s_) { \
+ _s_ = newa(char, STRLEN("AUDIT") + DECIMAL_STR_MAX(int)); \
+ sprintf((char*) _s_, "AUDIT%04i", type); \
+ } \
+ _s_; \
+ })
diff --git a/src/journal/audit_type-to-name.awk b/src/journal/audit_type-to-name.awk
new file mode 100644
index 0000000..44fc702
--- /dev/null
+++ b/src/journal/audit_type-to-name.awk
@@ -0,0 +1,9 @@
+BEGIN{
+ print "const char *audit_type_to_string(int type) {\n\tswitch(type) {"
+}
+{
+ printf " case AUDIT_%s: return \"%s\";\n", $1, $1
+}
+END{
+ print " default: return NULL;\n\t}\n}\n"
+}
diff --git a/src/journal/cat.c b/src/journal/cat.c
new file mode 100644
index 0000000..bccf615
--- /dev/null
+++ b/src/journal/cat.c
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "main-func.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "syslog-util.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static const char *arg_identifier = NULL;
+static int arg_priority = LOG_INFO;
+static int arg_stderr_priority = -1;
+static bool arg_level_prefix = true;
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-cat", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND ...\n"
+ "\n%sExecute process with stdout/stderr connected to the journal.%s\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " -t --identifier=STRING Set syslog identifier\n"
+ " -p --priority=PRIORITY Set priority value (0..7)\n"
+ " --stderr-priority=PRIORITY Set priority value (0..7) used for stderr\n"
+ " --level-prefix=BOOL Control whether level prefix shall be parsed\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight(), ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_STDERR_PRIORITY,
+ ARG_LEVEL_PREFIX
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "identifier", required_argument, NULL, 't' },
+ { "priority", required_argument, NULL, 'p' },
+ { "stderr-priority", required_argument, NULL, ARG_STDERR_PRIORITY },
+ { "level-prefix", required_argument, NULL, ARG_LEVEL_PREFIX },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "+ht:p:", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ help();
+ return 0;
+
+ case ARG_VERSION:
+ return version();
+
+ case 't':
+ if (isempty(optarg))
+ arg_identifier = NULL;
+ else
+ arg_identifier = optarg;
+ break;
+
+ case 'p':
+ arg_priority = log_level_from_string(optarg);
+ if (arg_priority < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse priority value.");
+ break;
+
+ case ARG_STDERR_PRIORITY:
+ arg_stderr_priority = log_level_from_string(optarg);
+ if (arg_stderr_priority < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse stderr priority value.");
+ break;
+
+ case ARG_LEVEL_PREFIX: {
+ int k;
+
+ k = parse_boolean(optarg);
+ if (k < 0)
+ return log_error_errno(k, "Failed to parse level prefix value.");
+
+ arg_level_prefix = k;
+ break;
+ }
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_close_ int outfd = -1, errfd = -1, saved_stderr = -1;
+ int r;
+
+ log_setup_cli();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ outfd = sd_journal_stream_fd(arg_identifier, arg_priority, arg_level_prefix);
+ if (outfd < 0)
+ return log_error_errno(outfd, "Failed to create stream fd: %m");
+
+ if (arg_stderr_priority >= 0 && arg_stderr_priority != arg_priority) {
+ errfd = sd_journal_stream_fd(arg_identifier, arg_stderr_priority, arg_level_prefix);
+ if (errfd < 0)
+ return log_error_errno(errfd, "Failed to create stream fd: %m");
+ }
+
+ saved_stderr = fcntl(STDERR_FILENO, F_DUPFD_CLOEXEC, 3);
+
+ r = rearrange_stdio(STDIN_FILENO, outfd, errfd < 0 ? outfd : errfd); /* Invalidates fd on success + error! */
+ TAKE_FD(outfd);
+ TAKE_FD(errfd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to rearrange stdout/stderr: %m");
+
+ if (argc <= optind)
+ (void) execl("/bin/cat", "/bin/cat", NULL);
+ else
+ (void) execvp(argv[optind], argv + optind);
+ r = -errno;
+
+ /* Let's try to restore a working stderr, so we can print the error message */
+ if (saved_stderr >= 0)
+ (void) dup3(saved_stderr, STDERR_FILENO, 0);
+
+ return log_error_errno(r, "Failed to execute process: %m");
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/journal/catalog.c b/src/journal/catalog.c
new file mode 100644
index 0000000..0f6ad8a
--- /dev/null
+++ b/src/journal/catalog.c
@@ -0,0 +1,742 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <locale.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "catalog.h"
+#include "conf-files.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hashmap.h"
+#include "log.h"
+#include "memory-util.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "siphash24.h"
+#include "sort-util.h"
+#include "sparse-endian.h"
+#include "strbuf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+
+const char * const catalog_file_dirs[] = {
+ "/usr/local/lib/systemd/catalog/",
+ "/usr/lib/systemd/catalog/",
+ NULL
+};
+
+#define CATALOG_SIGNATURE { 'R', 'H', 'H', 'H', 'K', 'S', 'L', 'P' }
+
+typedef struct CatalogHeader {
+ uint8_t signature[8]; /* "RHHHKSLP" */
+ le32_t compatible_flags;
+ le32_t incompatible_flags;
+ le64_t header_size;
+ le64_t n_items;
+ le64_t catalog_item_size;
+} CatalogHeader;
+
+typedef struct CatalogItem {
+ sd_id128_t id;
+ char language[32]; /* One byte is used for termination, so the maximum allowed
+ * length of the string is actually 31 bytes. */
+ le64_t offset;
+} CatalogItem;
+
+static void catalog_hash_func(const CatalogItem *i, struct siphash *state) {
+ siphash24_compress(&i->id, sizeof(i->id), state);
+ siphash24_compress_string(i->language, state);
+}
+
+static int catalog_compare_func(const CatalogItem *a, const CatalogItem *b) {
+ unsigned k;
+ int r;
+
+ for (k = 0; k < ELEMENTSOF(b->id.bytes); k++) {
+ r = CMP(a->id.bytes[k], b->id.bytes[k]);
+ if (r != 0)
+ return r;
+ }
+
+ return strcmp(a->language, b->language);
+}
+
+DEFINE_HASH_OPS(catalog_hash_ops, CatalogItem, catalog_hash_func, catalog_compare_func);
+
+static bool next_header(const char **s) {
+ const char *e;
+
+ e = strchr(*s, '\n');
+
+ /* Unexpected end */
+ if (!e)
+ return false;
+
+ /* End of headers */
+ if (e == *s)
+ return false;
+
+ *s = e + 1;
+ return true;
+}
+
+static const char *skip_header(const char *s) {
+ while (next_header(&s))
+ ;
+ return s;
+}
+
+static char *combine_entries(const char *one, const char *two) {
+ const char *b1, *b2;
+ size_t l1, l2, n;
+ char *dest, *p;
+
+ /* Find split point of headers to body */
+ b1 = skip_header(one);
+ b2 = skip_header(two);
+
+ l1 = strlen(one);
+ l2 = strlen(two);
+ dest = new(char, l1 + l2 + 1);
+ if (!dest) {
+ log_oom();
+ return NULL;
+ }
+
+ p = dest;
+
+ /* Headers from @one */
+ n = b1 - one;
+ p = mempcpy(p, one, n);
+
+ /* Headers from @two, these will only be found if not present above */
+ n = b2 - two;
+ p = mempcpy(p, two, n);
+
+ /* Body from @one */
+ n = l1 - (b1 - one);
+ if (n > 0) {
+ memcpy(p, b1, n);
+ p += n;
+
+ /* Body from @two */
+ } else {
+ n = l2 - (b2 - two);
+ memcpy(p, b2, n);
+ p += n;
+ }
+
+ assert(p - dest <= (ptrdiff_t)(l1 + l2));
+ p[0] = '\0';
+ return dest;
+}
+
+static int finish_item(
+ OrderedHashmap *h,
+ sd_id128_t id,
+ const char *language,
+ char *payload, size_t payload_size) {
+
+ _cleanup_free_ CatalogItem *i = NULL;
+ _cleanup_free_ char *prev = NULL, *combined = NULL;
+
+ assert(h);
+ assert(payload);
+ assert(payload_size > 0);
+
+ i = new0(CatalogItem, 1);
+ if (!i)
+ return log_oom();
+
+ i->id = id;
+ if (language) {
+ assert(strlen(language) > 1 && strlen(language) < 32);
+ strcpy(i->language, language);
+ }
+
+ prev = ordered_hashmap_get(h, i);
+ if (prev) {
+ /* Already have such an item, combine them */
+ combined = combine_entries(payload, prev);
+ if (!combined)
+ return log_oom();
+
+ if (ordered_hashmap_update(h, i, combined) < 0)
+ return log_oom();
+ combined = NULL;
+ } else {
+ /* A new item */
+ combined = memdup(payload, payload_size + 1);
+ if (!combined)
+ return log_oom();
+
+ if (ordered_hashmap_put(h, i, combined) < 0)
+ return log_oom();
+ i = NULL;
+ combined = NULL;
+ }
+
+ return 0;
+}
+
+int catalog_file_lang(const char* filename, char **lang) {
+ char *beg, *end, *_lang;
+
+ end = endswith(filename, ".catalog");
+ if (!end)
+ return 0;
+
+ beg = end - 1;
+ while (beg > filename && !IN_SET(*beg, '.', '/') && end - beg < 32)
+ beg--;
+
+ if (*beg != '.' || end <= beg + 1)
+ return 0;
+
+ _lang = strndup(beg + 1, end - beg - 1);
+ if (!_lang)
+ return -ENOMEM;
+
+ *lang = _lang;
+ return 1;
+}
+
+static int catalog_entry_lang(
+ const char* filename,
+ unsigned line,
+ const char* t,
+ const char* deflang,
+ char **ret) {
+
+ size_t c;
+ char *z;
+
+ c = strlen(t);
+ if (c < 2)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] Language too short.", filename, line);
+ if (c > 31)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] language too long.", filename, line);
+
+ if (deflang) {
+ if (streq(t, deflang)) {
+ log_warning("[%s:%u] language specified unnecessarily", filename, line);
+ return 0;
+ }
+
+ log_warning("[%s:%u] language differs from default for file", filename, line);
+ }
+
+ z = strdup(t);
+ if (!z)
+ return -ENOMEM;
+
+ *ret = z;
+ return 0;
+}
+
+int catalog_import_file(OrderedHashmap *h, const char *path) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *payload = NULL;
+ size_t payload_size = 0, payload_allocated = 0;
+ unsigned n = 0;
+ sd_id128_t id;
+ _cleanup_free_ char *deflang = NULL, *lang = NULL;
+ bool got_id = false, empty_line = true;
+ int r;
+
+ assert(h);
+ assert(path);
+
+ f = fopen(path, "re");
+ if (!f)
+ return log_error_errno(errno, "Failed to open file %s: %m", path);
+
+ r = catalog_file_lang(path, &deflang);
+ if (r < 0)
+ log_error_errno(r, "Failed to determine language for file %s: %m", path);
+ if (r == 1)
+ log_debug("File %s has language %s.", path, deflang);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ size_t line_len;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read file %s: %m", path);
+ if (r == 0)
+ break;
+
+ n++;
+
+ if (isempty(line)) {
+ empty_line = true;
+ continue;
+ }
+
+ if (strchr(COMMENTS, line[0]))
+ continue;
+
+ if (empty_line &&
+ strlen(line) >= 2+1+32 &&
+ line[0] == '-' &&
+ line[1] == '-' &&
+ line[2] == ' ' &&
+ IN_SET(line[2+1+32], ' ', '\0')) {
+
+ bool with_language;
+ sd_id128_t jd;
+
+ /* New entry */
+
+ with_language = line[2+1+32] != '\0';
+ line[2+1+32] = '\0';
+
+ if (sd_id128_from_string(line + 2 + 1, &jd) >= 0) {
+
+ if (got_id) {
+ if (payload_size == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] No payload text.",
+ path,
+ n);
+
+ r = finish_item(h, id, lang ?: deflang, payload, payload_size);
+ if (r < 0)
+ return r;
+
+ lang = mfree(lang);
+ payload_size = 0;
+ }
+
+ if (with_language) {
+ char *t;
+
+ t = strstrip(line + 2 + 1 + 32 + 1);
+ r = catalog_entry_lang(path, n, t, deflang, &lang);
+ if (r < 0)
+ return r;
+ }
+
+ got_id = true;
+ empty_line = false;
+ id = jd;
+
+ continue;
+ }
+ }
+
+ /* Payload */
+ if (!got_id)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] Got payload before ID.",
+ path, n);
+
+ line_len = strlen(line);
+ if (!GREEDY_REALLOC(payload, payload_allocated,
+ payload_size + (empty_line ? 1 : 0) + line_len + 1 + 1))
+ return log_oom();
+
+ if (empty_line)
+ payload[payload_size++] = '\n';
+ memcpy(payload + payload_size, line, line_len);
+ payload_size += line_len;
+ payload[payload_size++] = '\n';
+ payload[payload_size] = '\0';
+
+ empty_line = false;
+ }
+
+ if (got_id) {
+ if (payload_size == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] No payload text.",
+ path, n);
+
+ r = finish_item(h, id, lang ?: deflang, payload, payload_size);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int64_t write_catalog(
+ const char *database,
+ struct strbuf *sb,
+ CatalogItem *items,
+ size_t n) {
+
+ _cleanup_fclose_ FILE *w = NULL;
+ _cleanup_free_ char *p = NULL;
+ CatalogHeader header;
+ size_t k;
+ int r;
+
+ r = mkdir_parents(database, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create parent directories of %s: %m", database);
+
+ r = fopen_temporary(database, &w, &p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open database for writing: %s: %m",
+ database);
+
+ header = (CatalogHeader) {
+ .signature = CATALOG_SIGNATURE,
+ .header_size = htole64(ALIGN_TO(sizeof(CatalogHeader), 8)),
+ .catalog_item_size = htole64(sizeof(CatalogItem)),
+ .n_items = htole64(n),
+ };
+
+ r = -EIO;
+
+ k = fwrite(&header, 1, sizeof(header), w);
+ if (k != sizeof(header)) {
+ log_error("%s: failed to write header.", p);
+ goto error;
+ }
+
+ k = fwrite(items, 1, n * sizeof(CatalogItem), w);
+ if (k != n * sizeof(CatalogItem)) {
+ log_error("%s: failed to write database.", p);
+ goto error;
+ }
+
+ k = fwrite(sb->buf, 1, sb->len, w);
+ if (k != sb->len) {
+ log_error("%s: failed to write strings.", p);
+ goto error;
+ }
+
+ r = fflush_and_check(w);
+ if (r < 0) {
+ log_error_errno(r, "%s: failed to write database: %m", p);
+ goto error;
+ }
+
+ (void) fchmod(fileno(w), 0644);
+
+ if (rename(p, database) < 0) {
+ r = log_error_errno(errno, "rename (%s -> %s) failed: %m", p, database);
+ goto error;
+ }
+
+ return ftello(w);
+
+error:
+ (void) unlink(p);
+ return r;
+}
+
+int catalog_update(const char* database, const char* root, const char* const* dirs) {
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+ _cleanup_(strbuf_cleanupp) struct strbuf *sb = NULL;
+ _cleanup_ordered_hashmap_free_free_free_ OrderedHashmap *h = NULL;
+ _cleanup_free_ CatalogItem *items = NULL;
+ ssize_t offset;
+ char *payload;
+ CatalogItem *i;
+ unsigned n;
+ int r;
+ int64_t sz;
+
+ h = ordered_hashmap_new(&catalog_hash_ops);
+ sb = strbuf_new();
+ if (!h || !sb)
+ return log_oom();
+
+ r = conf_files_list_strv(&files, ".catalog", root, 0, dirs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get catalog files: %m");
+
+ STRV_FOREACH(f, files) {
+ log_debug("Reading file '%s'", *f);
+ r = catalog_import_file(h, *f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to import file '%s': %m", *f);
+ }
+
+ if (ordered_hashmap_size(h) <= 0) {
+ log_info("No items in catalog.");
+ return 0;
+ } else
+ log_debug("Found %u items in catalog.", ordered_hashmap_size(h));
+
+ items = new(CatalogItem, ordered_hashmap_size(h));
+ if (!items)
+ return log_oom();
+
+ n = 0;
+ ORDERED_HASHMAP_FOREACH_KEY(payload, i, h) {
+ log_debug("Found " SD_ID128_FORMAT_STR ", language %s",
+ SD_ID128_FORMAT_VAL(i->id),
+ isempty(i->language) ? "C" : i->language);
+
+ offset = strbuf_add_string(sb, payload, strlen(payload));
+ if (offset < 0)
+ return log_oom();
+
+ i->offset = htole64((uint64_t) offset);
+ items[n++] = *i;
+ }
+
+ assert(n == ordered_hashmap_size(h));
+ typesafe_qsort(items, n, catalog_compare_func);
+
+ strbuf_complete(sb);
+
+ sz = write_catalog(database, sb, items, n);
+ if (sz < 0)
+ return log_error_errno(sz, "Failed to write %s: %m", database);
+
+ log_debug("%s: wrote %u items, with %zu bytes of strings, %"PRIi64" total size.",
+ database, n, sb->len, sz);
+ return 0;
+}
+
+static int open_mmap(const char *database, int *_fd, struct stat *_st, void **_p) {
+ _cleanup_close_ int fd = -1;
+ const CatalogHeader *h;
+ struct stat st;
+ void *p;
+
+ assert(_fd);
+ assert(_st);
+ assert(_p);
+
+ fd = open(database, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (st.st_size < (off_t) sizeof(CatalogHeader))
+ return -EINVAL;
+
+ p = mmap(NULL, PAGE_ALIGN(st.st_size), PROT_READ, MAP_SHARED, fd, 0);
+ if (p == MAP_FAILED)
+ return -errno;
+
+ h = p;
+ if (memcmp(h->signature, (const uint8_t[]) CATALOG_SIGNATURE, sizeof(h->signature)) != 0 ||
+ le64toh(h->header_size) < sizeof(CatalogHeader) ||
+ le64toh(h->catalog_item_size) < sizeof(CatalogItem) ||
+ h->incompatible_flags != 0 ||
+ le64toh(h->n_items) <= 0 ||
+ st.st_size < (off_t) (le64toh(h->header_size) + le64toh(h->catalog_item_size) * le64toh(h->n_items))) {
+ munmap(p, st.st_size);
+ return -EBADMSG;
+ }
+
+ *_fd = TAKE_FD(fd);
+ *_st = st;
+ *_p = p;
+
+ return 0;
+}
+
+static const char *find_id(void *p, sd_id128_t id) {
+ CatalogItem *f = NULL, key = { .id = id };
+ const CatalogHeader *h = p;
+ const char *loc;
+
+ loc = setlocale(LC_MESSAGES, NULL);
+ if (!isempty(loc) && !STR_IN_SET(loc, "C", "POSIX")) {
+ size_t len;
+
+ len = strcspn(loc, ".@");
+ if (len > sizeof(key.language) - 1)
+ log_debug("LC_MESSAGES value too long, ignoring: \"%.*s\"", (int) len, loc);
+ else {
+ strncpy(key.language, loc, len);
+ key.language[len] = '\0';
+
+ f = bsearch(&key,
+ (const uint8_t*) p + le64toh(h->header_size),
+ le64toh(h->n_items),
+ le64toh(h->catalog_item_size),
+ (comparison_fn_t) catalog_compare_func);
+ if (!f) {
+ char *e;
+
+ e = strchr(key.language, '_');
+ if (e) {
+ *e = 0;
+ f = bsearch(&key,
+ (const uint8_t*) p + le64toh(h->header_size),
+ le64toh(h->n_items),
+ le64toh(h->catalog_item_size),
+ (comparison_fn_t) catalog_compare_func);
+ }
+ }
+ }
+ }
+
+ if (!f) {
+ zero(key.language);
+ f = bsearch(&key,
+ (const uint8_t*) p + le64toh(h->header_size),
+ le64toh(h->n_items),
+ le64toh(h->catalog_item_size),
+ (comparison_fn_t) catalog_compare_func);
+ }
+
+ if (!f)
+ return NULL;
+
+ return (const char*) p +
+ le64toh(h->header_size) +
+ le64toh(h->n_items) * le64toh(h->catalog_item_size) +
+ le64toh(f->offset);
+}
+
+int catalog_get(const char* database, sd_id128_t id, char **_text) {
+ _cleanup_close_ int fd = -1;
+ void *p = NULL;
+ struct stat st = {};
+ char *text = NULL;
+ int r;
+ const char *s;
+
+ assert(_text);
+
+ r = open_mmap(database, &fd, &st, &p);
+ if (r < 0)
+ return r;
+
+ s = find_id(p, id);
+ if (!s) {
+ r = -ENOENT;
+ goto finish;
+ }
+
+ text = strdup(s);
+ if (!text) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ *_text = text;
+ r = 0;
+
+finish:
+ if (p)
+ munmap(p, st.st_size);
+
+ return r;
+}
+
+static char *find_header(const char *s, const char *header) {
+
+ for (;;) {
+ const char *v;
+
+ v = startswith(s, header);
+ if (v) {
+ v += strspn(v, WHITESPACE);
+ return strndup(v, strcspn(v, NEWLINE));
+ }
+
+ if (!next_header(&s))
+ return NULL;
+ }
+}
+
+static void dump_catalog_entry(FILE *f, sd_id128_t id, const char *s, bool oneline) {
+ if (oneline) {
+ _cleanup_free_ char *subject = NULL, *defined_by = NULL;
+
+ subject = find_header(s, "Subject:");
+ defined_by = find_header(s, "Defined-By:");
+
+ fprintf(f, SD_ID128_FORMAT_STR " %s: %s\n",
+ SD_ID128_FORMAT_VAL(id),
+ strna(defined_by), strna(subject));
+ } else
+ fprintf(f, "-- " SD_ID128_FORMAT_STR "\n%s\n",
+ SD_ID128_FORMAT_VAL(id), s);
+}
+
+int catalog_list(FILE *f, const char *database, bool oneline) {
+ _cleanup_close_ int fd = -1;
+ void *p = NULL;
+ struct stat st;
+ const CatalogHeader *h;
+ const CatalogItem *items;
+ int r;
+ unsigned n;
+ sd_id128_t last_id;
+ bool last_id_set = false;
+
+ r = open_mmap(database, &fd, &st, &p);
+ if (r < 0)
+ return r;
+
+ h = p;
+ items = (const CatalogItem*) ((const uint8_t*) p + le64toh(h->header_size));
+
+ for (n = 0; n < le64toh(h->n_items); n++) {
+ const char *s;
+
+ if (last_id_set && sd_id128_equal(last_id, items[n].id))
+ continue;
+
+ assert_se(s = find_id(p, items[n].id));
+
+ dump_catalog_entry(f, items[n].id, s, oneline);
+
+ last_id_set = true;
+ last_id = items[n].id;
+ }
+
+ munmap(p, st.st_size);
+
+ return 0;
+}
+
+int catalog_list_items(FILE *f, const char *database, bool oneline, char **items) {
+ char **item;
+ int r = 0;
+
+ STRV_FOREACH(item, items) {
+ sd_id128_t id;
+ int k;
+ _cleanup_free_ char *msg = NULL;
+
+ k = sd_id128_from_string(*item, &id);
+ if (k < 0) {
+ log_error_errno(k, "Failed to parse id128 '%s': %m", *item);
+ if (r == 0)
+ r = k;
+ continue;
+ }
+
+ k = catalog_get(database, id, &msg);
+ if (k < 0) {
+ log_full_errno(k == -ENOENT ? LOG_NOTICE : LOG_ERR, k,
+ "Failed to retrieve catalog entry for '%s': %m", *item);
+ if (r == 0)
+ r = k;
+ continue;
+ }
+
+ dump_catalog_entry(f, id, msg, oneline);
+ }
+
+ return r;
+}
diff --git a/src/journal/catalog.h b/src/journal/catalog.h
new file mode 100644
index 0000000..df27869
--- /dev/null
+++ b/src/journal/catalog.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "sd-id128.h"
+
+#include "hashmap.h"
+#include "strbuf.h"
+
+int catalog_import_file(OrderedHashmap *h, const char *path);
+int catalog_update(const char* database, const char* root, const char* const* dirs);
+int catalog_get(const char* database, sd_id128_t id, char **data);
+int catalog_list(FILE *f, const char* database, bool oneline);
+int catalog_list_items(FILE *f, const char* database, bool oneline, char **items);
+int catalog_file_lang(const char *filename, char **lang);
+extern const char * const catalog_file_dirs[];
+extern const struct hash_ops catalog_hash_ops;
diff --git a/src/journal/compress.c b/src/journal/compress.c
new file mode 100644
index 0000000..aaf186b
--- /dev/null
+++ b/src/journal/compress.c
@@ -0,0 +1,1061 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#if HAVE_XZ
+#include <lzma.h>
+#endif
+
+#if HAVE_LZ4
+#include <lz4.h>
+#include <lz4frame.h>
+#endif
+
+#if HAVE_ZSTD
+#include <zstd.h>
+#include <zstd_errors.h>
+#endif
+
+#include "alloc-util.h"
+#include "compress.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "journal-def.h"
+#include "macro.h"
+#include "sparse-endian.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "unaligned.h"
+#include "util.h"
+
+#if HAVE_LZ4
+DEFINE_TRIVIAL_CLEANUP_FUNC(LZ4F_compressionContext_t, LZ4F_freeCompressionContext);
+DEFINE_TRIVIAL_CLEANUP_FUNC(LZ4F_decompressionContext_t, LZ4F_freeDecompressionContext);
+#endif
+
+#if HAVE_ZSTD
+DEFINE_TRIVIAL_CLEANUP_FUNC(ZSTD_CCtx *, ZSTD_freeCCtx);
+DEFINE_TRIVIAL_CLEANUP_FUNC(ZSTD_DCtx *, ZSTD_freeDCtx);
+
+static int zstd_ret_to_errno(size_t ret) {
+ switch (ZSTD_getErrorCode(ret)) {
+ case ZSTD_error_dstSize_tooSmall:
+ return -ENOBUFS;
+ case ZSTD_error_memory_allocation:
+ return -ENOMEM;
+ default:
+ return -EBADMSG;
+ }
+}
+#endif
+
+#define ALIGN_8(l) ALIGN_TO(l, sizeof(size_t))
+
+static const char* const object_compressed_table[_OBJECT_COMPRESSED_MAX] = {
+ [OBJECT_COMPRESSED_XZ] = "XZ",
+ [OBJECT_COMPRESSED_LZ4] = "LZ4",
+ [OBJECT_COMPRESSED_ZSTD] = "ZSTD",
+ /* If we add too many more entries here, it's going to grow quite large (and be mostly sparse), since
+ * the array key is actually a bitmask, not a plain enum */
+};
+
+DEFINE_STRING_TABLE_LOOKUP(object_compressed, int);
+
+int compress_blob_xz(const void *src, uint64_t src_size,
+ void *dst, size_t dst_alloc_size, size_t *dst_size) {
+#if HAVE_XZ
+ static const lzma_options_lzma opt = {
+ 1u << 20u, NULL, 0, LZMA_LC_DEFAULT, LZMA_LP_DEFAULT,
+ LZMA_PB_DEFAULT, LZMA_MODE_FAST, 128, LZMA_MF_HC3, 4
+ };
+ static const lzma_filter filters[] = {
+ { LZMA_FILTER_LZMA2, (lzma_options_lzma*) &opt },
+ { LZMA_VLI_UNKNOWN, NULL }
+ };
+ lzma_ret ret;
+ size_t out_pos = 0;
+
+ assert(src);
+ assert(src_size > 0);
+ assert(dst);
+ assert(dst_alloc_size > 0);
+ assert(dst_size);
+
+ /* Returns < 0 if we couldn't compress the data or the
+ * compressed result is longer than the original */
+
+ if (src_size < 80)
+ return -ENOBUFS;
+
+ ret = lzma_stream_buffer_encode((lzma_filter*) filters, LZMA_CHECK_NONE, NULL,
+ src, src_size, dst, &out_pos, dst_alloc_size);
+ if (ret != LZMA_OK)
+ return -ENOBUFS;
+
+ *dst_size = out_pos;
+ return 0;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int compress_blob_lz4(const void *src, uint64_t src_size,
+ void *dst, size_t dst_alloc_size, size_t *dst_size) {
+#if HAVE_LZ4
+ int r;
+
+ assert(src);
+ assert(src_size > 0);
+ assert(dst);
+ assert(dst_alloc_size > 0);
+ assert(dst_size);
+
+ /* Returns < 0 if we couldn't compress the data or the
+ * compressed result is longer than the original */
+
+ if (src_size < 9)
+ return -ENOBUFS;
+
+ r = LZ4_compress_default(src, (char*)dst + 8, src_size, (int) dst_alloc_size - 8);
+ if (r <= 0)
+ return -ENOBUFS;
+
+ unaligned_write_le64(dst, src_size);
+ *dst_size = r + 8;
+
+ return 0;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int compress_blob_zstd(
+ const void *src, uint64_t src_size,
+ void *dst, size_t dst_alloc_size, size_t *dst_size) {
+#if HAVE_ZSTD
+ size_t k;
+
+ assert(src);
+ assert(src_size > 0);
+ assert(dst);
+ assert(dst_alloc_size > 0);
+ assert(dst_size);
+
+ k = ZSTD_compress(dst, dst_alloc_size, src, src_size, 0);
+ if (ZSTD_isError(k))
+ return zstd_ret_to_errno(k);
+
+ *dst_size = k;
+ return 0;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int decompress_blob_xz(const void *src, uint64_t src_size,
+ void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max) {
+
+#if HAVE_XZ
+ _cleanup_(lzma_end) lzma_stream s = LZMA_STREAM_INIT;
+ lzma_ret ret;
+ size_t space;
+
+ assert(src);
+ assert(src_size > 0);
+ assert(dst);
+ assert(dst_alloc_size);
+ assert(dst_size);
+ assert(*dst_alloc_size == 0 || *dst);
+
+ ret = lzma_stream_decoder(&s, UINT64_MAX, 0);
+ if (ret != LZMA_OK)
+ return -ENOMEM;
+
+ space = MIN(src_size * 2, dst_max ?: (size_t) -1);
+ if (!greedy_realloc(dst, dst_alloc_size, space, 1))
+ return -ENOMEM;
+
+ s.next_in = src;
+ s.avail_in = src_size;
+
+ s.next_out = *dst;
+ s.avail_out = space;
+
+ for (;;) {
+ size_t used;
+
+ ret = lzma_code(&s, LZMA_FINISH);
+
+ if (ret == LZMA_STREAM_END)
+ break;
+ else if (ret != LZMA_OK)
+ return -ENOMEM;
+
+ if (dst_max > 0 && (space - s.avail_out) >= dst_max)
+ break;
+ else if (dst_max > 0 && space == dst_max)
+ return -ENOBUFS;
+
+ used = space - s.avail_out;
+ space = MIN(2 * space, dst_max ?: (size_t) -1);
+ if (!greedy_realloc(dst, dst_alloc_size, space, 1))
+ return -ENOMEM;
+
+ s.avail_out = space - used;
+ s.next_out = *(uint8_t**)dst + used;
+ }
+
+ *dst_size = space - s.avail_out;
+ return 0;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int decompress_blob_lz4(const void *src, uint64_t src_size,
+ void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max) {
+
+#if HAVE_LZ4
+ char* out;
+ int r, size; /* LZ4 uses int for size */
+
+ assert(src);
+ assert(src_size > 0);
+ assert(dst);
+ assert(dst_alloc_size);
+ assert(dst_size);
+ assert(*dst_alloc_size == 0 || *dst);
+
+ if (src_size <= 8)
+ return -EBADMSG;
+
+ size = unaligned_read_le64(src);
+ if (size < 0 || (unsigned) size != unaligned_read_le64(src))
+ return -EFBIG;
+ if ((size_t) size > *dst_alloc_size) {
+ out = realloc(*dst, size);
+ if (!out)
+ return -ENOMEM;
+ *dst = out;
+ *dst_alloc_size = size;
+ } else
+ out = *dst;
+
+ r = LZ4_decompress_safe((char*)src + 8, out, src_size - 8, size);
+ if (r < 0 || r != size)
+ return -EBADMSG;
+
+ *dst_size = size;
+ return 0;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int decompress_blob_zstd(
+ const void *src, uint64_t src_size,
+ void **dst, size_t *dst_alloc_size, size_t *dst_size, size_t dst_max) {
+
+#if HAVE_ZSTD
+ uint64_t size;
+
+ assert(src);
+ assert(src_size > 0);
+ assert(dst);
+ assert(dst_alloc_size);
+ assert(dst_size);
+ assert(*dst_alloc_size == 0 || *dst);
+
+ size = ZSTD_getFrameContentSize(src, src_size);
+ if (IN_SET(size, ZSTD_CONTENTSIZE_ERROR, ZSTD_CONTENTSIZE_UNKNOWN))
+ return -EBADMSG;
+
+ if (dst_max > 0 && size > dst_max)
+ size = dst_max;
+ if (size > SIZE_MAX)
+ return -E2BIG;
+
+ if (!(greedy_realloc(dst, dst_alloc_size, MAX(ZSTD_DStreamOutSize(), size), 1)))
+ return -ENOMEM;
+
+ _cleanup_(ZSTD_freeDCtxp) ZSTD_DCtx *dctx = ZSTD_createDCtx();
+ if (!dctx)
+ return -ENOMEM;
+
+ ZSTD_inBuffer input = {
+ .src = src,
+ .size = src_size,
+ };
+ ZSTD_outBuffer output = {
+ .dst = *dst,
+ .size = *dst_alloc_size,
+ };
+
+ size_t k = ZSTD_decompressStream(dctx, &output, &input);
+ if (ZSTD_isError(k)) {
+ log_debug("ZSTD decoder failed: %s", ZSTD_getErrorName(k));
+ return zstd_ret_to_errno(k);
+ }
+ assert(output.pos >= size);
+
+ *dst_size = size;
+ return 0;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int decompress_blob(
+ int compression,
+ const void *src, uint64_t src_size,
+ void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max) {
+
+ if (compression == OBJECT_COMPRESSED_XZ)
+ return decompress_blob_xz(
+ src, src_size,
+ dst, dst_alloc_size, dst_size, dst_max);
+ else if (compression == OBJECT_COMPRESSED_LZ4)
+ return decompress_blob_lz4(
+ src, src_size,
+ dst, dst_alloc_size, dst_size, dst_max);
+ else if (compression == OBJECT_COMPRESSED_ZSTD)
+ return decompress_blob_zstd(
+ src, src_size,
+ dst, dst_alloc_size, dst_size, dst_max);
+ else
+ return -EPROTONOSUPPORT;
+}
+
+int decompress_startswith_xz(const void *src, uint64_t src_size,
+ void **buffer, size_t *buffer_size,
+ const void *prefix, size_t prefix_len,
+ uint8_t extra) {
+
+#if HAVE_XZ
+ _cleanup_(lzma_end) lzma_stream s = LZMA_STREAM_INIT;
+ lzma_ret ret;
+
+ /* Checks whether the decompressed blob starts with the
+ * mentioned prefix. The byte extra needs to follow the
+ * prefix */
+
+ assert(src);
+ assert(src_size > 0);
+ assert(buffer);
+ assert(buffer_size);
+ assert(prefix);
+ assert(*buffer_size == 0 || *buffer);
+
+ ret = lzma_stream_decoder(&s, UINT64_MAX, 0);
+ if (ret != LZMA_OK)
+ return -EBADMSG;
+
+ if (!(greedy_realloc(buffer, buffer_size, ALIGN_8(prefix_len + 1), 1)))
+ return -ENOMEM;
+
+ s.next_in = src;
+ s.avail_in = src_size;
+
+ s.next_out = *buffer;
+ s.avail_out = *buffer_size;
+
+ for (;;) {
+ ret = lzma_code(&s, LZMA_FINISH);
+
+ if (!IN_SET(ret, LZMA_OK, LZMA_STREAM_END))
+ return -EBADMSG;
+
+ if (*buffer_size - s.avail_out >= prefix_len + 1)
+ return memcmp(*buffer, prefix, prefix_len) == 0 &&
+ ((const uint8_t*) *buffer)[prefix_len] == extra;
+
+ if (ret == LZMA_STREAM_END)
+ return 0;
+
+ s.avail_out += *buffer_size;
+
+ if (!(greedy_realloc(buffer, buffer_size, *buffer_size * 2, 1)))
+ return -ENOMEM;
+
+ s.next_out = *(uint8_t**)buffer + *buffer_size - s.avail_out;
+ }
+
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int decompress_startswith_lz4(const void *src, uint64_t src_size,
+ void **buffer, size_t *buffer_size,
+ const void *prefix, size_t prefix_len,
+ uint8_t extra) {
+#if HAVE_LZ4
+ /* Checks whether the decompressed blob starts with the
+ * mentioned prefix. The byte extra needs to follow the
+ * prefix */
+
+ int r;
+
+ assert(src);
+ assert(src_size > 0);
+ assert(buffer);
+ assert(buffer_size);
+ assert(prefix);
+ assert(*buffer_size == 0 || *buffer);
+
+ if (src_size <= 8)
+ return -EBADMSG;
+
+ if (!(greedy_realloc(buffer, buffer_size, ALIGN_8(prefix_len + 1), 1)))
+ return -ENOMEM;
+
+ r = LZ4_decompress_safe_partial((char*)src + 8, *buffer, src_size - 8,
+ prefix_len + 1, *buffer_size);
+ /* One lz4 < 1.8.3, we might get "failure" (r < 0), or "success" where
+ * just a part of the buffer is decompressed. But if we get a smaller
+ * amount of bytes than requested, we don't know whether there isn't enough
+ * data to fill the requested size or whether we just got a partial answer.
+ */
+ if (r < 0 || (size_t) r < prefix_len + 1) {
+ size_t size;
+
+ if (LZ4_versionNumber() >= 10803)
+ /* We trust that the newer lz4 decompresses the number of bytes we
+ * requested if available in the compressed string. */
+ return 0;
+
+ if (r > 0)
+ /* Compare what we have first, in case of mismatch we can
+ * shortcut the full comparison. */
+ if (memcmp(*buffer, prefix, r) != 0)
+ return 0;
+
+ /* Before version 1.8.3, lz4 always tries to decode full a "sequence",
+ * so in pathological cases might need to decompress the full field. */
+ r = decompress_blob_lz4(src, src_size, buffer, buffer_size, &size, 0);
+ if (r < 0)
+ return r;
+
+ if (size < prefix_len + 1)
+ return 0;
+ }
+
+ return memcmp(*buffer, prefix, prefix_len) == 0 &&
+ ((const uint8_t*) *buffer)[prefix_len] == extra;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int decompress_startswith_zstd(
+ const void *src, uint64_t src_size,
+ void **buffer, size_t *buffer_size,
+ const void *prefix, size_t prefix_len,
+ uint8_t extra) {
+#if HAVE_ZSTD
+ assert(src);
+ assert(src_size > 0);
+ assert(buffer);
+ assert(buffer_size);
+ assert(prefix);
+ assert(*buffer_size == 0 || *buffer);
+
+ uint64_t size = ZSTD_getFrameContentSize(src, src_size);
+ if (IN_SET(size, ZSTD_CONTENTSIZE_ERROR, ZSTD_CONTENTSIZE_UNKNOWN))
+ return -EBADMSG;
+
+ if (size < prefix_len + 1)
+ return 0; /* Decompressed text too short to match the prefix and extra */
+
+ _cleanup_(ZSTD_freeDCtxp) ZSTD_DCtx *dctx = ZSTD_createDCtx();
+ if (!dctx)
+ return -ENOMEM;
+
+ if (!(greedy_realloc(buffer, buffer_size, MAX(ZSTD_DStreamOutSize(), prefix_len + 1), 1)))
+ return -ENOMEM;
+
+ ZSTD_inBuffer input = {
+ .src = src,
+ .size = src_size,
+ };
+ ZSTD_outBuffer output = {
+ .dst = *buffer,
+ .size = *buffer_size,
+ };
+ size_t k;
+
+ k = ZSTD_decompressStream(dctx, &output, &input);
+ if (ZSTD_isError(k)) {
+ log_debug("ZSTD decoder failed: %s", ZSTD_getErrorName(k));
+ return zstd_ret_to_errno(k);
+ }
+ assert(output.pos >= prefix_len + 1);
+
+ return memcmp(*buffer, prefix, prefix_len) == 0 &&
+ ((const uint8_t*) *buffer)[prefix_len] == extra;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int decompress_startswith(
+ int compression,
+ const void *src, uint64_t src_size,
+ void **buffer, size_t *buffer_size,
+ const void *prefix, size_t prefix_len,
+ uint8_t extra) {
+
+ if (compression == OBJECT_COMPRESSED_XZ)
+ return decompress_startswith_xz(
+ src, src_size,
+ buffer, buffer_size,
+ prefix, prefix_len,
+ extra);
+
+ else if (compression == OBJECT_COMPRESSED_LZ4)
+ return decompress_startswith_lz4(
+ src, src_size,
+ buffer, buffer_size,
+ prefix, prefix_len,
+ extra);
+ else if (compression == OBJECT_COMPRESSED_ZSTD)
+ return decompress_startswith_zstd(
+ src, src_size,
+ buffer, buffer_size,
+ prefix, prefix_len,
+ extra);
+ else
+ return -EBADMSG;
+}
+
+int compress_stream_xz(int fdf, int fdt, uint64_t max_bytes) {
+#if HAVE_XZ
+ _cleanup_(lzma_end) lzma_stream s = LZMA_STREAM_INIT;
+ lzma_ret ret;
+ uint8_t buf[BUFSIZ], out[BUFSIZ];
+ lzma_action action = LZMA_RUN;
+
+ assert(fdf >= 0);
+ assert(fdt >= 0);
+
+ ret = lzma_easy_encoder(&s, LZMA_PRESET_DEFAULT, LZMA_CHECK_CRC64);
+ if (ret != LZMA_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to initialize XZ encoder: code %u",
+ ret);
+
+ for (;;) {
+ if (s.avail_in == 0 && action == LZMA_RUN) {
+ size_t m = sizeof(buf);
+ ssize_t n;
+
+ if (max_bytes != (uint64_t) -1 && (uint64_t) m > max_bytes)
+ m = (size_t) max_bytes;
+
+ n = read(fdf, buf, m);
+ if (n < 0)
+ return -errno;
+ if (n == 0)
+ action = LZMA_FINISH;
+ else {
+ s.next_in = buf;
+ s.avail_in = n;
+
+ if (max_bytes != (uint64_t) -1) {
+ assert(max_bytes >= (uint64_t) n);
+ max_bytes -= n;
+ }
+ }
+ }
+
+ if (s.avail_out == 0) {
+ s.next_out = out;
+ s.avail_out = sizeof(out);
+ }
+
+ ret = lzma_code(&s, action);
+ if (!IN_SET(ret, LZMA_OK, LZMA_STREAM_END))
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Compression failed: code %u",
+ ret);
+
+ if (s.avail_out == 0 || ret == LZMA_STREAM_END) {
+ ssize_t n, k;
+
+ n = sizeof(out) - s.avail_out;
+
+ k = loop_write(fdt, out, n, false);
+ if (k < 0)
+ return k;
+
+ if (ret == LZMA_STREAM_END) {
+ log_debug("XZ compression finished (%"PRIu64" -> %"PRIu64" bytes, %.1f%%)",
+ s.total_in, s.total_out,
+ (double) s.total_out / s.total_in * 100);
+
+ return 0;
+ }
+ }
+ }
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+#define LZ4_BUFSIZE (512*1024u)
+
+int compress_stream_lz4(int fdf, int fdt, uint64_t max_bytes) {
+
+#if HAVE_LZ4
+ LZ4F_errorCode_t c;
+ _cleanup_(LZ4F_freeCompressionContextp) LZ4F_compressionContext_t ctx = NULL;
+ _cleanup_free_ char *buf = NULL;
+ char *src = NULL;
+ size_t size, n, total_in = 0, total_out, offset = 0, frame_size;
+ struct stat st;
+ int r;
+ static const LZ4F_compressOptions_t options = {
+ .stableSrc = 1,
+ };
+ static const LZ4F_preferences_t preferences = {
+ .frameInfo.blockSizeID = 5,
+ };
+
+ c = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
+ if (LZ4F_isError(c))
+ return -ENOMEM;
+
+ if (fstat(fdf, &st) < 0)
+ return log_debug_errno(errno, "fstat() failed: %m");
+
+ frame_size = LZ4F_compressBound(LZ4_BUFSIZE, &preferences);
+ size = frame_size + 64*1024; /* add some space for header and trailer */
+ buf = malloc(size);
+ if (!buf)
+ return -ENOMEM;
+
+ n = offset = total_out = LZ4F_compressBegin(ctx, buf, size, &preferences);
+ if (LZ4F_isError(n))
+ return -EINVAL;
+
+ src = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fdf, 0);
+ if (src == MAP_FAILED)
+ return -errno;
+
+ log_debug("Buffer size is %zu bytes, header size %zu bytes.", size, n);
+
+ while (total_in < (size_t) st.st_size) {
+ ssize_t k;
+
+ k = MIN(LZ4_BUFSIZE, st.st_size - total_in);
+ n = LZ4F_compressUpdate(ctx, buf + offset, size - offset,
+ src + total_in, k, &options);
+ if (LZ4F_isError(n)) {
+ r = -ENOTRECOVERABLE;
+ goto cleanup;
+ }
+
+ total_in += k;
+ offset += n;
+ total_out += n;
+
+ if (max_bytes != (uint64_t) -1 && total_out > (size_t) max_bytes)
+ return log_debug_errno(SYNTHETIC_ERRNO(EFBIG),
+ "Compressed stream longer than %" PRIu64 " bytes",
+ max_bytes);
+
+ if (size - offset < frame_size + 4) {
+ k = loop_write(fdt, buf, offset, false);
+ if (k < 0) {
+ r = k;
+ goto cleanup;
+ }
+ offset = 0;
+ }
+ }
+
+ n = LZ4F_compressEnd(ctx, buf + offset, size - offset, &options);
+ if (LZ4F_isError(n)) {
+ r = -ENOTRECOVERABLE;
+ goto cleanup;
+ }
+
+ offset += n;
+ total_out += n;
+ r = loop_write(fdt, buf, offset, false);
+ if (r < 0)
+ goto cleanup;
+
+ log_debug("LZ4 compression finished (%zu -> %zu bytes, %.1f%%)",
+ total_in, total_out,
+ (double) total_out / total_in * 100);
+ cleanup:
+ munmap(src, st.st_size);
+ return r;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int decompress_stream_xz(int fdf, int fdt, uint64_t max_bytes) {
+
+#if HAVE_XZ
+ _cleanup_(lzma_end) lzma_stream s = LZMA_STREAM_INIT;
+ lzma_ret ret;
+
+ uint8_t buf[BUFSIZ], out[BUFSIZ];
+ lzma_action action = LZMA_RUN;
+
+ assert(fdf >= 0);
+ assert(fdt >= 0);
+
+ ret = lzma_stream_decoder(&s, UINT64_MAX, 0);
+ if (ret != LZMA_OK)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOMEM),
+ "Failed to initialize XZ decoder: code %u",
+ ret);
+
+ for (;;) {
+ if (s.avail_in == 0 && action == LZMA_RUN) {
+ ssize_t n;
+
+ n = read(fdf, buf, sizeof(buf));
+ if (n < 0)
+ return -errno;
+ if (n == 0)
+ action = LZMA_FINISH;
+ else {
+ s.next_in = buf;
+ s.avail_in = n;
+ }
+ }
+
+ if (s.avail_out == 0) {
+ s.next_out = out;
+ s.avail_out = sizeof(out);
+ }
+
+ ret = lzma_code(&s, action);
+ if (!IN_SET(ret, LZMA_OK, LZMA_STREAM_END))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Decompression failed: code %u",
+ ret);
+
+ if (s.avail_out == 0 || ret == LZMA_STREAM_END) {
+ ssize_t n, k;
+
+ n = sizeof(out) - s.avail_out;
+
+ if (max_bytes != (uint64_t) -1) {
+ if (max_bytes < (uint64_t) n)
+ return -EFBIG;
+
+ max_bytes -= n;
+ }
+
+ k = loop_write(fdt, out, n, false);
+ if (k < 0)
+ return k;
+
+ if (ret == LZMA_STREAM_END) {
+ log_debug("XZ decompression finished (%"PRIu64" -> %"PRIu64" bytes, %.1f%%)",
+ s.total_in, s.total_out,
+ (double) s.total_out / s.total_in * 100);
+
+ return 0;
+ }
+ }
+ }
+#else
+ return log_debug_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT),
+ "Cannot decompress file. Compiled without XZ support.");
+#endif
+}
+
+int decompress_stream_lz4(int in, int out, uint64_t max_bytes) {
+#if HAVE_LZ4
+ size_t c;
+ _cleanup_(LZ4F_freeDecompressionContextp) LZ4F_decompressionContext_t ctx = NULL;
+ _cleanup_free_ char *buf = NULL;
+ char *src;
+ struct stat st;
+ int r = 0;
+ size_t total_in = 0, total_out = 0;
+
+ c = LZ4F_createDecompressionContext(&ctx, LZ4F_VERSION);
+ if (LZ4F_isError(c))
+ return -ENOMEM;
+
+ if (fstat(in, &st) < 0)
+ return log_debug_errno(errno, "fstat() failed: %m");
+
+ buf = malloc(LZ4_BUFSIZE);
+ if (!buf)
+ return -ENOMEM;
+
+ src = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, in, 0);
+ if (src == MAP_FAILED)
+ return -errno;
+
+ while (total_in < (size_t) st.st_size) {
+ size_t produced = LZ4_BUFSIZE;
+ size_t used = st.st_size - total_in;
+
+ c = LZ4F_decompress(ctx, buf, &produced, src + total_in, &used, NULL);
+ if (LZ4F_isError(c)) {
+ r = -EBADMSG;
+ goto cleanup;
+ }
+
+ total_in += used;
+ total_out += produced;
+
+ if (max_bytes != (uint64_t) -1 && total_out > (size_t) max_bytes) {
+ log_debug("Decompressed stream longer than %"PRIu64" bytes", max_bytes);
+ r = -EFBIG;
+ goto cleanup;
+ }
+
+ r = loop_write(out, buf, produced, false);
+ if (r < 0)
+ goto cleanup;
+ }
+
+ log_debug("LZ4 decompression finished (%zu -> %zu bytes, %.1f%%)",
+ total_in, total_out,
+ total_in > 0 ? (double) total_out / total_in * 100 : 0.0);
+ cleanup:
+ munmap(src, st.st_size);
+ return r;
+#else
+ return log_debug_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT),
+ "Cannot decompress file. Compiled without LZ4 support.");
+#endif
+}
+
+int compress_stream_zstd(int fdf, int fdt, uint64_t max_bytes) {
+#if HAVE_ZSTD
+ _cleanup_(ZSTD_freeCCtxp) ZSTD_CCtx *cctx = NULL;
+ _cleanup_free_ void *in_buff = NULL, *out_buff = NULL;
+ size_t in_allocsize, out_allocsize;
+ size_t z;
+ uint64_t left = max_bytes, in_bytes = 0;
+
+ assert(fdf >= 0);
+ assert(fdt >= 0);
+
+ /* Create the context and buffers */
+ in_allocsize = ZSTD_CStreamInSize();
+ out_allocsize = ZSTD_CStreamOutSize();
+ in_buff = malloc(in_allocsize);
+ out_buff = malloc(out_allocsize);
+ cctx = ZSTD_createCCtx();
+ if (!cctx || !out_buff || !in_buff)
+ return -ENOMEM;
+
+ z = ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1);
+ if (ZSTD_isError(z))
+ log_debug("Failed to enable ZSTD checksum, ignoring: %s", ZSTD_getErrorName(z));
+
+ /* This loop read from the input file, compresses that entire chunk,
+ * and writes all output produced to the output file.
+ */
+ for (;;) {
+ bool is_last_chunk;
+ ZSTD_inBuffer input = {
+ .src = in_buff,
+ .size = 0,
+ .pos = 0
+ };
+ ssize_t red;
+
+ red = loop_read(fdf, in_buff, in_allocsize, true);
+ if (red < 0)
+ return red;
+ is_last_chunk = red == 0;
+
+ in_bytes += (size_t) red;
+ input.size = (size_t) red;
+
+ for (bool finished = false; !finished;) {
+ ZSTD_outBuffer output = {
+ .dst = out_buff,
+ .size = out_allocsize,
+ .pos = 0
+ };
+ size_t remaining;
+ ssize_t wrote;
+
+ /* Compress into the output buffer and write all of the
+ * output to the file so we can reuse the buffer next
+ * iteration.
+ */
+ remaining = ZSTD_compressStream2(
+ cctx, &output, &input,
+ is_last_chunk ? ZSTD_e_end : ZSTD_e_continue);
+
+ if (ZSTD_isError(remaining)) {
+ log_debug("ZSTD encoder failed: %s", ZSTD_getErrorName(remaining));
+ return zstd_ret_to_errno(remaining);
+ }
+
+ if (left < output.pos)
+ return -EFBIG;
+
+ wrote = loop_write(fdt, output.dst, output.pos, 1);
+ if (wrote < 0)
+ return wrote;
+
+ left -= output.pos;
+
+ /* If we're on the last chunk we're finished when zstd
+ * returns 0, which means its consumed all the input AND
+ * finished the frame. Otherwise, we're finished when
+ * we've consumed all the input.
+ */
+ finished = is_last_chunk ? (remaining == 0) : (input.pos == input.size);
+ }
+
+ /* zstd only returns 0 when the input is completely consumed */
+ assert(input.pos == input.size);
+ if (is_last_chunk)
+ break;
+ }
+
+ if (in_bytes > 0)
+ log_debug("ZSTD compression finished (%" PRIu64 " -> %" PRIu64 " bytes, %.1f%%)",
+ in_bytes, max_bytes - left, (double) (max_bytes - left) / in_bytes * 100);
+ else
+ log_debug("ZSTD compression finished (%" PRIu64 " -> %" PRIu64 " bytes)",
+ in_bytes, max_bytes - left);
+
+ return 0;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+int decompress_stream_zstd(int fdf, int fdt, uint64_t max_bytes) {
+#if HAVE_ZSTD
+ _cleanup_(ZSTD_freeDCtxp) ZSTD_DCtx *dctx = NULL;
+ _cleanup_free_ void *in_buff = NULL, *out_buff = NULL;
+ size_t in_allocsize, out_allocsize;
+ size_t last_result = 0;
+ uint64_t left = max_bytes, in_bytes = 0;
+
+ assert(fdf >= 0);
+ assert(fdt >= 0);
+
+ /* Create the context and buffers */
+ in_allocsize = ZSTD_DStreamInSize();
+ out_allocsize = ZSTD_DStreamOutSize();
+ in_buff = malloc(in_allocsize);
+ out_buff = malloc(out_allocsize);
+ dctx = ZSTD_createDCtx();
+ if (!dctx || !out_buff || !in_buff)
+ return -ENOMEM;
+
+ /* This loop assumes that the input file is one or more concatenated
+ * zstd streams. This example won't work if there is trailing non-zstd
+ * data at the end, but streaming decompression in general handles this
+ * case. ZSTD_decompressStream() returns 0 exactly when the frame is
+ * completed, and doesn't consume input after the frame.
+ */
+ for (;;) {
+ bool has_error = false;
+ ZSTD_inBuffer input = {
+ .src = in_buff,
+ .size = 0,
+ .pos = 0
+ };
+ ssize_t red;
+
+ red = loop_read(fdf, in_buff, in_allocsize, true);
+ if (red < 0)
+ return red;
+ if (red == 0)
+ break;
+
+ in_bytes += (size_t) red;
+ input.size = (size_t) red;
+ input.pos = 0;
+
+ /* Given a valid frame, zstd won't consume the last byte of the
+ * frame until it has flushed all of the decompressed data of
+ * the frame. So input.pos < input.size means frame is not done
+ * or there is still output available.
+ */
+ while (input.pos < input.size) {
+ ZSTD_outBuffer output = {
+ .dst = out_buff,
+ .size = out_allocsize,
+ .pos = 0
+ };
+ ssize_t wrote;
+ /* The return code is zero if the frame is complete, but
+ * there may be multiple frames concatenated together.
+ * Zstd will automatically reset the context when a
+ * frame is complete. Still, calling ZSTD_DCtx_reset()
+ * can be useful to reset the context to a clean state,
+ * for instance if the last decompression call returned
+ * an error.
+ */
+ last_result = ZSTD_decompressStream(dctx, &output, &input);
+ if (ZSTD_isError(last_result)) {
+ has_error = true;
+ break;
+ }
+
+ if (left < output.pos)
+ return -EFBIG;
+
+ wrote = loop_write(fdt, output.dst, output.pos, 1);
+ if (wrote < 0)
+ return wrote;
+
+ left -= output.pos;
+ }
+ if (has_error)
+ break;
+ }
+
+ if (in_bytes == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "ZSTD decoder failed: no data read");
+
+ if (last_result != 0) {
+ /* The last return value from ZSTD_decompressStream did not end
+ * on a frame, but we reached the end of the file! We assume
+ * this is an error, and the input was truncated.
+ */
+ log_debug("ZSTD decoder failed: %s", ZSTD_getErrorName(last_result));
+ return zstd_ret_to_errno(last_result);
+ }
+
+ log_debug(
+ "ZSTD decompression finished (%" PRIu64 " -> %" PRIu64 " bytes, %.1f%%)",
+ in_bytes,
+ max_bytes - left,
+ (double) (max_bytes - left) / in_bytes * 100);
+ return 0;
+#else
+ return log_debug_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT),
+ "Cannot decompress file. Compiled without ZSTD support.");
+#endif
+}
+
+int decompress_stream(const char *filename, int fdf, int fdt, uint64_t max_bytes) {
+
+ if (endswith(filename, ".lz4"))
+ return decompress_stream_lz4(fdf, fdt, max_bytes);
+ else if (endswith(filename, ".xz"))
+ return decompress_stream_xz(fdf, fdt, max_bytes);
+ else if (endswith(filename, ".zst"))
+ return decompress_stream_zstd(fdf, fdt, max_bytes);
+ else
+ return -EPROTONOSUPPORT;
+}
diff --git a/src/journal/compress.h b/src/journal/compress.h
new file mode 100644
index 0000000..db7f399
--- /dev/null
+++ b/src/journal/compress.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <unistd.h>
+
+#include "journal-def.h"
+
+const char* object_compressed_to_string(int compression);
+int object_compressed_from_string(const char *compression);
+
+int compress_blob_xz(const void *src, uint64_t src_size,
+ void *dst, size_t dst_alloc_size, size_t *dst_size);
+int compress_blob_lz4(const void *src, uint64_t src_size,
+ void *dst, size_t dst_alloc_size, size_t *dst_size);
+int compress_blob_zstd(const void *src, uint64_t src_size,
+ void *dst, size_t dst_alloc_size, size_t *dst_size);
+
+static inline int compress_blob(const void *src, uint64_t src_size,
+ void *dst, size_t dst_alloc_size, size_t *dst_size) {
+ int r;
+#if HAVE_ZSTD
+ r = compress_blob_zstd(src, src_size, dst, dst_alloc_size, dst_size);
+ if (r == 0)
+ return OBJECT_COMPRESSED_ZSTD;
+#elif HAVE_LZ4
+ r = compress_blob_lz4(src, src_size, dst, dst_alloc_size, dst_size);
+ if (r == 0)
+ return OBJECT_COMPRESSED_LZ4;
+#elif HAVE_XZ
+ r = compress_blob_xz(src, src_size, dst, dst_alloc_size, dst_size);
+ if (r == 0)
+ return OBJECT_COMPRESSED_XZ;
+#else
+ r = -EOPNOTSUPP;
+#endif
+ return r;
+}
+
+int decompress_blob_xz(const void *src, uint64_t src_size,
+ void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max);
+int decompress_blob_lz4(const void *src, uint64_t src_size,
+ void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max);
+int decompress_blob_zstd(const void *src, uint64_t src_size,
+ void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max);
+int decompress_blob(int compression,
+ const void *src, uint64_t src_size,
+ void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max);
+
+int decompress_startswith_xz(const void *src, uint64_t src_size,
+ void **buffer, size_t *buffer_size,
+ const void *prefix, size_t prefix_len,
+ uint8_t extra);
+int decompress_startswith_lz4(const void *src, uint64_t src_size,
+ void **buffer, size_t *buffer_size,
+ const void *prefix, size_t prefix_len,
+ uint8_t extra);
+int decompress_startswith_zstd(const void *src, uint64_t src_size,
+ void **buffer, size_t *buffer_size,
+ const void *prefix, size_t prefix_len,
+ uint8_t extra);
+int decompress_startswith(int compression,
+ const void *src, uint64_t src_size,
+ void **buffer, size_t *buffer_size,
+ const void *prefix, size_t prefix_len,
+ uint8_t extra);
+
+int compress_stream_xz(int fdf, int fdt, uint64_t max_bytes);
+int compress_stream_lz4(int fdf, int fdt, uint64_t max_bytes);
+int compress_stream_zstd(int fdf, int fdt, uint64_t max_bytes);
+
+int decompress_stream_xz(int fdf, int fdt, uint64_t max_size);
+int decompress_stream_lz4(int fdf, int fdt, uint64_t max_size);
+int decompress_stream_zstd(int fdf, int fdt, uint64_t max_size);
+
+#if HAVE_ZSTD
+# define compress_stream compress_stream_zstd
+# define COMPRESSED_EXT ".zst"
+#elif HAVE_LZ4
+# define compress_stream compress_stream_lz4
+# define COMPRESSED_EXT ".lz4"
+#elif HAVE_XZ
+# define compress_stream compress_stream_xz
+# define COMPRESSED_EXT ".xz"
+#else
+static inline int compress_stream(int fdf, int fdt, uint64_t max_size) {
+ return -EOPNOTSUPP;
+}
+# define COMPRESSED_EXT ""
+#endif
+
+int decompress_stream(const char *filename, int fdf, int fdt, uint64_t max_bytes);
diff --git a/src/journal/fsprg.c b/src/journal/fsprg.c
new file mode 100644
index 0000000..7ea7249
--- /dev/null
+++ b/src/journal/fsprg.c
@@ -0,0 +1,378 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ *
+ * fsprg v0.1 - (seekable) forward-secure pseudorandom generator
+ * Copyright © 2012 B. Poettering
+ * Contact: fsprg@point-at-infinity.org
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+/*
+ * See "Practical Secure Logging: Seekable Sequential Key Generators"
+ * by G. A. Marson, B. Poettering for details:
+ *
+ * http://eprint.iacr.org/2013/397
+ */
+
+#include <string.h>
+
+#include "fsprg.h"
+#include "gcrypt-util.h"
+#include "memory-util.h"
+
+#define ISVALID_SECPAR(secpar) (((secpar) % 16 == 0) && ((secpar) >= 16) && ((secpar) <= 16384))
+#define VALIDATE_SECPAR(secpar) assert(ISVALID_SECPAR(secpar));
+
+#define RND_HASH GCRY_MD_SHA256
+#define RND_GEN_P 0x01
+#define RND_GEN_Q 0x02
+#define RND_GEN_X 0x03
+
+#pragma GCC diagnostic ignored "-Wpointer-arith"
+/* TODO: remove void* arithmetic and this work-around */
+
+/******************************************************************************/
+
+static void mpi_export(void *buf, size_t buflen, const gcry_mpi_t x) {
+ unsigned len;
+ size_t nwritten;
+
+ assert(gcry_mpi_cmp_ui(x, 0) >= 0);
+ len = (gcry_mpi_get_nbits(x) + 7) / 8;
+ assert(len <= buflen);
+ memzero(buf, buflen);
+ gcry_mpi_print(GCRYMPI_FMT_USG, buf + (buflen - len), len, &nwritten, x);
+ assert(nwritten == len);
+}
+
+static gcry_mpi_t mpi_import(const void *buf, size_t buflen) {
+ gcry_mpi_t h;
+ _unused_ unsigned len;
+
+ assert_se(gcry_mpi_scan(&h, GCRYMPI_FMT_USG, buf, buflen, NULL) == 0);
+ len = (gcry_mpi_get_nbits(h) + 7) / 8;
+ assert(len <= buflen);
+ assert(gcry_mpi_cmp_ui(h, 0) >= 0);
+
+ return h;
+}
+
+static void uint64_export(void *buf, size_t buflen, uint64_t x) {
+ assert(buflen == 8);
+ ((uint8_t*) buf)[0] = (x >> 56) & 0xff;
+ ((uint8_t*) buf)[1] = (x >> 48) & 0xff;
+ ((uint8_t*) buf)[2] = (x >> 40) & 0xff;
+ ((uint8_t*) buf)[3] = (x >> 32) & 0xff;
+ ((uint8_t*) buf)[4] = (x >> 24) & 0xff;
+ ((uint8_t*) buf)[5] = (x >> 16) & 0xff;
+ ((uint8_t*) buf)[6] = (x >> 8) & 0xff;
+ ((uint8_t*) buf)[7] = (x >> 0) & 0xff;
+}
+
+_pure_ static uint64_t uint64_import(const void *buf, size_t buflen) {
+ assert(buflen == 8);
+ return
+ (uint64_t)(((uint8_t*) buf)[0]) << 56 |
+ (uint64_t)(((uint8_t*) buf)[1]) << 48 |
+ (uint64_t)(((uint8_t*) buf)[2]) << 40 |
+ (uint64_t)(((uint8_t*) buf)[3]) << 32 |
+ (uint64_t)(((uint8_t*) buf)[4]) << 24 |
+ (uint64_t)(((uint8_t*) buf)[5]) << 16 |
+ (uint64_t)(((uint8_t*) buf)[6]) << 8 |
+ (uint64_t)(((uint8_t*) buf)[7]) << 0;
+}
+
+/* deterministically generate from seed/idx a string of buflen pseudorandom bytes */
+static void det_randomize(void *buf, size_t buflen, const void *seed, size_t seedlen, uint32_t idx) {
+ gcry_md_hd_t hd, hd2;
+ size_t olen, cpylen;
+ uint32_t ctr;
+
+ olen = gcry_md_get_algo_dlen(RND_HASH);
+ gcry_md_open(&hd, RND_HASH, 0);
+ gcry_md_write(hd, seed, seedlen);
+ gcry_md_putc(hd, (idx >> 24) & 0xff);
+ gcry_md_putc(hd, (idx >> 16) & 0xff);
+ gcry_md_putc(hd, (idx >> 8) & 0xff);
+ gcry_md_putc(hd, (idx >> 0) & 0xff);
+
+ for (ctr = 0; buflen; ctr++) {
+ gcry_md_copy(&hd2, hd);
+ gcry_md_putc(hd2, (ctr >> 24) & 0xff);
+ gcry_md_putc(hd2, (ctr >> 16) & 0xff);
+ gcry_md_putc(hd2, (ctr >> 8) & 0xff);
+ gcry_md_putc(hd2, (ctr >> 0) & 0xff);
+ gcry_md_final(hd2);
+ cpylen = (buflen < olen) ? buflen : olen;
+ memcpy(buf, gcry_md_read(hd2, RND_HASH), cpylen);
+ gcry_md_close(hd2);
+ buf += cpylen;
+ buflen -= cpylen;
+ }
+ gcry_md_close(hd);
+}
+
+/* deterministically generate from seed/idx a prime of length `bits' that is 3 (mod 4) */
+static gcry_mpi_t genprime3mod4(int bits, const void *seed, size_t seedlen, uint32_t idx) {
+ size_t buflen = bits / 8;
+ uint8_t buf[buflen];
+ gcry_mpi_t p;
+
+ assert(bits % 8 == 0);
+ assert(buflen > 0);
+
+ det_randomize(buf, buflen, seed, seedlen, idx);
+ buf[0] |= 0xc0; /* set upper two bits, so that n=pq has maximum size */
+ buf[buflen - 1] |= 0x03; /* set lower two bits, to have result 3 (mod 4) */
+
+ p = mpi_import(buf, buflen);
+ while (gcry_prime_check(p, 0))
+ gcry_mpi_add_ui(p, p, 4);
+
+ return p;
+}
+
+/* deterministically generate from seed/idx a quadratic residue (mod n) */
+static gcry_mpi_t gensquare(const gcry_mpi_t n, const void *seed, size_t seedlen, uint32_t idx, unsigned secpar) {
+ size_t buflen = secpar / 8;
+ uint8_t buf[buflen];
+ gcry_mpi_t x;
+
+ det_randomize(buf, buflen, seed, seedlen, idx);
+ buf[0] &= 0x7f; /* clear upper bit, so that we have x < n */
+ x = mpi_import(buf, buflen);
+ assert(gcry_mpi_cmp(x, n) < 0);
+ gcry_mpi_mulm(x, x, x, n);
+ return x;
+}
+
+/* compute 2^m (mod phi(p)), for a prime p */
+static gcry_mpi_t twopowmodphi(uint64_t m, const gcry_mpi_t p) {
+ gcry_mpi_t phi, r;
+ int n;
+
+ phi = gcry_mpi_new(0);
+ gcry_mpi_sub_ui(phi, p, 1);
+
+ /* count number of used bits in m */
+ for (n = 0; (1ULL << n) <= m; n++)
+ ;
+
+ r = gcry_mpi_new(0);
+ gcry_mpi_set_ui(r, 1);
+ while (n) { /* square and multiply algorithm for fast exponentiation */
+ n--;
+ gcry_mpi_mulm(r, r, r, phi);
+ if (m & ((uint64_t)1 << n)) {
+ gcry_mpi_add(r, r, r);
+ if (gcry_mpi_cmp(r, phi) >= 0)
+ gcry_mpi_sub(r, r, phi);
+ }
+ }
+
+ gcry_mpi_release(phi);
+ return r;
+}
+
+/* Decompose $x \in Z_n$ into $(xp,xq) \in Z_p \times Z_q$ using Chinese Remainder Theorem */
+static void CRT_decompose(gcry_mpi_t *xp, gcry_mpi_t *xq, const gcry_mpi_t x, const gcry_mpi_t p, const gcry_mpi_t q) {
+ *xp = gcry_mpi_new(0);
+ *xq = gcry_mpi_new(0);
+ gcry_mpi_mod(*xp, x, p);
+ gcry_mpi_mod(*xq, x, q);
+}
+
+/* Compose $(xp,xq) \in Z_p \times Z_q$ into $x \in Z_n$ using Chinese Remainder Theorem */
+static void CRT_compose(gcry_mpi_t *x, const gcry_mpi_t xp, const gcry_mpi_t xq, const gcry_mpi_t p, const gcry_mpi_t q) {
+ gcry_mpi_t a, u;
+
+ a = gcry_mpi_new(0);
+ u = gcry_mpi_new(0);
+ *x = gcry_mpi_new(0);
+ gcry_mpi_subm(a, xq, xp, q);
+ gcry_mpi_invm(u, p, q);
+ gcry_mpi_mulm(a, a, u, q); /* a = (xq - xp) / p (mod q) */
+ gcry_mpi_mul(*x, p, a);
+ gcry_mpi_add(*x, *x, xp); /* x = p * ((xq - xp) / p mod q) + xp */
+ gcry_mpi_release(a);
+ gcry_mpi_release(u);
+}
+
+/******************************************************************************/
+
+size_t FSPRG_mskinbytes(unsigned _secpar) {
+ VALIDATE_SECPAR(_secpar);
+ return 2 + 2 * (_secpar / 2) / 8; /* to store header,p,q */
+}
+
+size_t FSPRG_mpkinbytes(unsigned _secpar) {
+ VALIDATE_SECPAR(_secpar);
+ return 2 + _secpar / 8; /* to store header,n */
+}
+
+size_t FSPRG_stateinbytes(unsigned _secpar) {
+ VALIDATE_SECPAR(_secpar);
+ return 2 + 2 * _secpar / 8 + 8; /* to store header,n,x,epoch */
+}
+
+static void store_secpar(void *buf, uint16_t secpar) {
+ secpar = secpar / 16 - 1;
+ ((uint8_t*) buf)[0] = (secpar >> 8) & 0xff;
+ ((uint8_t*) buf)[1] = (secpar >> 0) & 0xff;
+}
+
+static uint16_t read_secpar(const void *buf) {
+ uint16_t secpar;
+ secpar =
+ (uint16_t)(((uint8_t*) buf)[0]) << 8 |
+ (uint16_t)(((uint8_t*) buf)[1]) << 0;
+ return 16 * (secpar + 1);
+}
+
+void FSPRG_GenMK(void *msk, void *mpk, const void *seed, size_t seedlen, unsigned _secpar) {
+ uint8_t iseed[FSPRG_RECOMMENDED_SEEDLEN];
+ gcry_mpi_t n, p, q;
+ uint16_t secpar;
+
+ VALIDATE_SECPAR(_secpar);
+ secpar = _secpar;
+
+ initialize_libgcrypt(false);
+
+ if (!seed) {
+ gcry_randomize(iseed, FSPRG_RECOMMENDED_SEEDLEN, GCRY_STRONG_RANDOM);
+ seed = iseed;
+ seedlen = FSPRG_RECOMMENDED_SEEDLEN;
+ }
+
+ p = genprime3mod4(secpar / 2, seed, seedlen, RND_GEN_P);
+ q = genprime3mod4(secpar / 2, seed, seedlen, RND_GEN_Q);
+
+ if (msk) {
+ store_secpar(msk + 0, secpar);
+ mpi_export(msk + 2 + 0 * (secpar / 2) / 8, (secpar / 2) / 8, p);
+ mpi_export(msk + 2 + 1 * (secpar / 2) / 8, (secpar / 2) / 8, q);
+ }
+
+ if (mpk) {
+ n = gcry_mpi_new(0);
+ gcry_mpi_mul(n, p, q);
+ assert(gcry_mpi_get_nbits(n) == secpar);
+
+ store_secpar(mpk + 0, secpar);
+ mpi_export(mpk + 2, secpar / 8, n);
+
+ gcry_mpi_release(n);
+ }
+
+ gcry_mpi_release(p);
+ gcry_mpi_release(q);
+}
+
+void FSPRG_GenState0(void *state, const void *mpk, const void *seed, size_t seedlen) {
+ gcry_mpi_t n, x;
+ uint16_t secpar;
+
+ initialize_libgcrypt(false);
+
+ secpar = read_secpar(mpk + 0);
+ n = mpi_import(mpk + 2, secpar / 8);
+ x = gensquare(n, seed, seedlen, RND_GEN_X, secpar);
+
+ memcpy(state, mpk, 2 + secpar / 8);
+ mpi_export(state + 2 + 1 * secpar / 8, secpar / 8, x);
+ memzero(state + 2 + 2 * secpar / 8, 8);
+
+ gcry_mpi_release(n);
+ gcry_mpi_release(x);
+}
+
+void FSPRG_Evolve(void *state) {
+ gcry_mpi_t n, x;
+ uint16_t secpar;
+ uint64_t epoch;
+
+ initialize_libgcrypt(false);
+
+ secpar = read_secpar(state + 0);
+ n = mpi_import(state + 2 + 0 * secpar / 8, secpar / 8);
+ x = mpi_import(state + 2 + 1 * secpar / 8, secpar / 8);
+ epoch = uint64_import(state + 2 + 2 * secpar / 8, 8);
+
+ gcry_mpi_mulm(x, x, x, n);
+ epoch++;
+
+ mpi_export(state + 2 + 1 * secpar / 8, secpar / 8, x);
+ uint64_export(state + 2 + 2 * secpar / 8, 8, epoch);
+
+ gcry_mpi_release(n);
+ gcry_mpi_release(x);
+}
+
+uint64_t FSPRG_GetEpoch(const void *state) {
+ uint16_t secpar;
+ secpar = read_secpar(state + 0);
+ return uint64_import(state + 2 + 2 * secpar / 8, 8);
+}
+
+void FSPRG_Seek(void *state, uint64_t epoch, const void *msk, const void *seed, size_t seedlen) {
+ gcry_mpi_t p, q, n, x, xp, xq, kp, kq, xm;
+ uint16_t secpar;
+
+ initialize_libgcrypt(false);
+
+ secpar = read_secpar(msk + 0);
+ p = mpi_import(msk + 2 + 0 * (secpar / 2) / 8, (secpar / 2) / 8);
+ q = mpi_import(msk + 2 + 1 * (secpar / 2) / 8, (secpar / 2) / 8);
+
+ n = gcry_mpi_new(0);
+ gcry_mpi_mul(n, p, q);
+
+ x = gensquare(n, seed, seedlen, RND_GEN_X, secpar);
+ CRT_decompose(&xp, &xq, x, p, q); /* split (mod n) into (mod p) and (mod q) using CRT */
+
+ kp = twopowmodphi(epoch, p); /* compute 2^epoch (mod phi(p)) */
+ kq = twopowmodphi(epoch, q); /* compute 2^epoch (mod phi(q)) */
+
+ gcry_mpi_powm(xp, xp, kp, p); /* compute x^(2^epoch) (mod p) */
+ gcry_mpi_powm(xq, xq, kq, q); /* compute x^(2^epoch) (mod q) */
+
+ CRT_compose(&xm, xp, xq, p, q); /* combine (mod p) and (mod q) to (mod n) using CRT */
+
+ store_secpar(state + 0, secpar);
+ mpi_export(state + 2 + 0 * secpar / 8, secpar / 8, n);
+ mpi_export(state + 2 + 1 * secpar / 8, secpar / 8, xm);
+ uint64_export(state + 2 + 2 * secpar / 8, 8, epoch);
+
+ gcry_mpi_release(p);
+ gcry_mpi_release(q);
+ gcry_mpi_release(n);
+ gcry_mpi_release(x);
+ gcry_mpi_release(xp);
+ gcry_mpi_release(xq);
+ gcry_mpi_release(kp);
+ gcry_mpi_release(kq);
+ gcry_mpi_release(xm);
+}
+
+void FSPRG_GetKey(const void *state, void *key, size_t keylen, uint32_t idx) {
+ uint16_t secpar;
+
+ initialize_libgcrypt(false);
+
+ secpar = read_secpar(state + 0);
+ det_randomize(key, keylen, state + 2, 2 * secpar / 8 + 8, idx);
+}
diff --git a/src/journal/fsprg.h b/src/journal/fsprg.h
new file mode 100644
index 0000000..dfe2d79
--- /dev/null
+++ b/src/journal/fsprg.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/*
+ * fsprg v0.1 - (seekable) forward-secure pseudorandom generator
+ * Copyright © 2012 B. Poettering
+ * Contact: fsprg@point-at-infinity.org
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+#include "macro.h"
+#include "util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FSPRG_RECOMMENDED_SECPAR 1536
+#define FSPRG_RECOMMENDED_SEEDLEN (96/8)
+
+size_t FSPRG_mskinbytes(unsigned secpar) _const_;
+size_t FSPRG_mpkinbytes(unsigned secpar) _const_;
+size_t FSPRG_stateinbytes(unsigned secpar) _const_;
+
+/* Setup msk and mpk. Providing seed != NULL makes this algorithm deterministic. */
+void FSPRG_GenMK(void *msk, void *mpk, const void *seed, size_t seedlen, unsigned secpar);
+
+/* Initialize state deterministically in dependence on seed. */
+/* Note: in case one wants to run only one GenState0 per GenMK it is safe to use
+ the same seed for both GenMK and GenState0.
+*/
+void FSPRG_GenState0(void *state, const void *mpk, const void *seed, size_t seedlen);
+
+void FSPRG_Evolve(void *state);
+
+uint64_t FSPRG_GetEpoch(const void *state) _pure_;
+
+/* Seek to any arbitrary state (by providing msk together with seed from GenState0). */
+void FSPRG_Seek(void *state, uint64_t epoch, const void *msk, const void *seed, size_t seedlen);
+
+void FSPRG_GetKey(const void *state, void *key, size_t keylen, uint32_t idx);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/journal/generate-audit_type-list.sh b/src/journal/generate-audit_type-list.sh
new file mode 100755
index 0000000..912d0c9
--- /dev/null
+++ b/src/journal/generate-audit_type-list.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+set -eu
+
+cpp="$1"
+shift
+
+includes=""
+for i in "$@"; do
+ includes="$includes -include $i"
+done
+
+$cpp -dM $includes - </dev/null | \
+ grep -vE 'AUDIT_.*(FIRST|LAST)_' | \
+ sed -r -n 's/^#define\s+AUDIT_(\w+)\s+([0-9]{4})\s*$$/\1\t\2/p' | \
+ sort -k2
diff --git a/src/journal/journal-authenticate.c b/src/journal/journal-authenticate.c
new file mode 100644
index 0000000..a5ff987
--- /dev/null
+++ b/src/journal/journal-authenticate.c
@@ -0,0 +1,536 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include "fd-util.h"
+#include "fsprg.h"
+#include "gcrypt-util.h"
+#include "hexdecoct.h"
+#include "journal-authenticate.h"
+#include "journal-def.h"
+#include "journal-file.h"
+#include "memory-util.h"
+#include "time-util.h"
+
+static uint64_t journal_file_tag_seqnum(JournalFile *f) {
+ uint64_t r;
+
+ assert(f);
+
+ r = le64toh(f->header->n_tags) + 1;
+ f->header->n_tags = htole64(r);
+
+ return r;
+}
+
+int journal_file_append_tag(JournalFile *f) {
+ Object *o;
+ uint64_t p;
+ int r;
+
+ assert(f);
+
+ if (!f->seal)
+ return 0;
+
+ if (!f->hmac_running)
+ return 0;
+
+ assert(f->hmac);
+
+ r = journal_file_append_object(f, OBJECT_TAG, sizeof(struct TagObject), &o, &p);
+ if (r < 0)
+ return r;
+
+ o->tag.seqnum = htole64(journal_file_tag_seqnum(f));
+ o->tag.epoch = htole64(FSPRG_GetEpoch(f->fsprg_state));
+
+ log_debug("Writing tag %"PRIu64" for epoch %"PRIu64"",
+ le64toh(o->tag.seqnum),
+ FSPRG_GetEpoch(f->fsprg_state));
+
+ /* Add the tag object itself, so that we can protect its
+ * header. This will exclude the actual hash value in it */
+ r = journal_file_hmac_put_object(f, OBJECT_TAG, o, p);
+ if (r < 0)
+ return r;
+
+ /* Get the HMAC tag and store it in the object */
+ memcpy(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH);
+ f->hmac_running = false;
+
+ return 0;
+}
+
+int journal_file_hmac_start(JournalFile *f) {
+ uint8_t key[256 / 8]; /* Let's pass 256 bit from FSPRG to HMAC */
+ assert(f);
+
+ if (!f->seal)
+ return 0;
+
+ if (f->hmac_running)
+ return 0;
+
+ /* Prepare HMAC for next cycle */
+ gcry_md_reset(f->hmac);
+ FSPRG_GetKey(f->fsprg_state, key, sizeof(key), 0);
+ gcry_md_setkey(f->hmac, key, sizeof(key));
+
+ f->hmac_running = true;
+
+ return 0;
+}
+
+static int journal_file_get_epoch(JournalFile *f, uint64_t realtime, uint64_t *epoch) {
+ uint64_t t;
+
+ assert(f);
+ assert(epoch);
+ assert(f->seal);
+
+ if (f->fss_start_usec == 0 ||
+ f->fss_interval_usec == 0)
+ return -EOPNOTSUPP;
+
+ if (realtime < f->fss_start_usec)
+ return -ESTALE;
+
+ t = realtime - f->fss_start_usec;
+ t = t / f->fss_interval_usec;
+
+ *epoch = t;
+ return 0;
+}
+
+static int journal_file_fsprg_need_evolve(JournalFile *f, uint64_t realtime) {
+ uint64_t goal, epoch;
+ int r;
+ assert(f);
+
+ if (!f->seal)
+ return 0;
+
+ r = journal_file_get_epoch(f, realtime, &goal);
+ if (r < 0)
+ return r;
+
+ epoch = FSPRG_GetEpoch(f->fsprg_state);
+ if (epoch > goal)
+ return -ESTALE;
+
+ return epoch != goal;
+}
+
+int journal_file_fsprg_evolve(JournalFile *f, uint64_t realtime) {
+ uint64_t goal, epoch;
+ int r;
+
+ assert(f);
+
+ if (!f->seal)
+ return 0;
+
+ r = journal_file_get_epoch(f, realtime, &goal);
+ if (r < 0)
+ return r;
+
+ epoch = FSPRG_GetEpoch(f->fsprg_state);
+ if (epoch < goal)
+ log_debug("Evolving FSPRG key from epoch %"PRIu64" to %"PRIu64".", epoch, goal);
+
+ for (;;) {
+ if (epoch > goal)
+ return -ESTALE;
+ if (epoch == goal)
+ return 0;
+
+ FSPRG_Evolve(f->fsprg_state);
+ epoch = FSPRG_GetEpoch(f->fsprg_state);
+ }
+}
+
+int journal_file_fsprg_seek(JournalFile *f, uint64_t goal) {
+ void *msk;
+ uint64_t epoch;
+
+ assert(f);
+
+ if (!f->seal)
+ return 0;
+
+ assert(f->fsprg_seed);
+
+ if (f->fsprg_state) {
+ /* Cheaper... */
+
+ epoch = FSPRG_GetEpoch(f->fsprg_state);
+ if (goal == epoch)
+ return 0;
+
+ if (goal == epoch+1) {
+ FSPRG_Evolve(f->fsprg_state);
+ return 0;
+ }
+ } else {
+ f->fsprg_state_size = FSPRG_stateinbytes(FSPRG_RECOMMENDED_SECPAR);
+ f->fsprg_state = malloc(f->fsprg_state_size);
+
+ if (!f->fsprg_state)
+ return -ENOMEM;
+ }
+
+ log_debug("Seeking FSPRG key to %"PRIu64".", goal);
+
+ msk = alloca(FSPRG_mskinbytes(FSPRG_RECOMMENDED_SECPAR));
+ FSPRG_GenMK(msk, NULL, f->fsprg_seed, f->fsprg_seed_size, FSPRG_RECOMMENDED_SECPAR);
+ FSPRG_Seek(f->fsprg_state, goal, msk, f->fsprg_seed, f->fsprg_seed_size);
+ return 0;
+}
+
+int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime) {
+ int r;
+
+ assert(f);
+
+ if (!f->seal)
+ return 0;
+
+ if (realtime <= 0)
+ realtime = now(CLOCK_REALTIME);
+
+ r = journal_file_fsprg_need_evolve(f, realtime);
+ if (r <= 0)
+ return 0;
+
+ r = journal_file_append_tag(f);
+ if (r < 0)
+ return r;
+
+ r = journal_file_fsprg_evolve(f, realtime);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int journal_file_hmac_put_object(JournalFile *f, ObjectType type, Object *o, uint64_t p) {
+ int r;
+
+ assert(f);
+
+ if (!f->seal)
+ return 0;
+
+ r = journal_file_hmac_start(f);
+ if (r < 0)
+ return r;
+
+ if (!o) {
+ r = journal_file_move_to_object(f, type, p, &o);
+ if (r < 0)
+ return r;
+ } else {
+ if (type > OBJECT_UNUSED && o->object.type != type)
+ return -EBADMSG;
+ }
+
+ gcry_md_write(f->hmac, o, offsetof(ObjectHeader, payload));
+
+ switch (o->object.type) {
+
+ case OBJECT_DATA:
+ /* All but hash and payload are mutable */
+ gcry_md_write(f->hmac, &o->data.hash, sizeof(o->data.hash));
+ gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(DataObject, payload));
+ break;
+
+ case OBJECT_FIELD:
+ /* Same here */
+ gcry_md_write(f->hmac, &o->field.hash, sizeof(o->field.hash));
+ gcry_md_write(f->hmac, o->field.payload, le64toh(o->object.size) - offsetof(FieldObject, payload));
+ break;
+
+ case OBJECT_ENTRY:
+ /* All */
+ gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(EntryObject, seqnum));
+ break;
+
+ case OBJECT_FIELD_HASH_TABLE:
+ case OBJECT_DATA_HASH_TABLE:
+ case OBJECT_ENTRY_ARRAY:
+ /* Nothing: everything is mutable */
+ break;
+
+ case OBJECT_TAG:
+ /* All but the tag itself */
+ gcry_md_write(f->hmac, &o->tag.seqnum, sizeof(o->tag.seqnum));
+ gcry_md_write(f->hmac, &o->tag.epoch, sizeof(o->tag.epoch));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int journal_file_hmac_put_header(JournalFile *f) {
+ int r;
+
+ assert(f);
+
+ if (!f->seal)
+ return 0;
+
+ r = journal_file_hmac_start(f);
+ if (r < 0)
+ return r;
+
+ /* All but state+reserved, boot_id, arena_size,
+ * tail_object_offset, n_objects, n_entries,
+ * tail_entry_seqnum, head_entry_seqnum, entry_array_offset,
+ * head_entry_realtime, tail_entry_realtime,
+ * tail_entry_monotonic, n_data, n_fields, n_tags,
+ * n_entry_arrays. */
+
+ gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature));
+ gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, boot_id) - offsetof(Header, file_id));
+ gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id));
+ gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset));
+
+ return 0;
+}
+
+int journal_file_fss_load(JournalFile *f) {
+ int r, fd = -1;
+ char *p = NULL;
+ struct stat st;
+ FSSHeader *m = NULL;
+ sd_id128_t machine;
+
+ assert(f);
+
+ if (!f->seal)
+ return 0;
+
+ r = sd_id128_get_machine(&machine);
+ if (r < 0)
+ return r;
+
+ if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/fss",
+ SD_ID128_FORMAT_VAL(machine)) < 0)
+ return -ENOMEM;
+
+ fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY, 0600);
+ if (fd < 0) {
+ if (errno != ENOENT)
+ log_error_errno(errno, "Failed to open %s: %m", p);
+
+ r = -errno;
+ goto finish;
+ }
+
+ if (fstat(fd, &st) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ if (st.st_size < (off_t) sizeof(FSSHeader)) {
+ r = -ENODATA;
+ goto finish;
+ }
+
+ m = mmap(NULL, PAGE_ALIGN(sizeof(FSSHeader)), PROT_READ, MAP_SHARED, fd, 0);
+ if (m == MAP_FAILED) {
+ m = NULL;
+ r = -errno;
+ goto finish;
+ }
+
+ if (memcmp(m->signature, FSS_HEADER_SIGNATURE, 8) != 0) {
+ r = -EBADMSG;
+ goto finish;
+ }
+
+ if (m->incompatible_flags != 0) {
+ r = -EPROTONOSUPPORT;
+ goto finish;
+ }
+
+ if (le64toh(m->header_size) < sizeof(FSSHeader)) {
+ r = -EBADMSG;
+ goto finish;
+ }
+
+ if (le64toh(m->fsprg_state_size) != FSPRG_stateinbytes(le16toh(m->fsprg_secpar))) {
+ r = -EBADMSG;
+ goto finish;
+ }
+
+ f->fss_file_size = le64toh(m->header_size) + le64toh(m->fsprg_state_size);
+ if ((uint64_t) st.st_size < f->fss_file_size) {
+ r = -ENODATA;
+ goto finish;
+ }
+
+ if (!sd_id128_equal(machine, m->machine_id)) {
+ r = -EHOSTDOWN;
+ goto finish;
+ }
+
+ if (le64toh(m->start_usec) <= 0 ||
+ le64toh(m->interval_usec) <= 0) {
+ r = -EBADMSG;
+ goto finish;
+ }
+
+ f->fss_file = mmap(NULL, PAGE_ALIGN(f->fss_file_size), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ if (f->fss_file == MAP_FAILED) {
+ f->fss_file = NULL;
+ r = -errno;
+ goto finish;
+ }
+
+ f->fss_start_usec = le64toh(f->fss_file->start_usec);
+ f->fss_interval_usec = le64toh(f->fss_file->interval_usec);
+
+ f->fsprg_state = (uint8_t*) f->fss_file + le64toh(f->fss_file->header_size);
+ f->fsprg_state_size = le64toh(f->fss_file->fsprg_state_size);
+
+ r = 0;
+
+finish:
+ if (m)
+ munmap(m, PAGE_ALIGN(sizeof(FSSHeader)));
+
+ safe_close(fd);
+ free(p);
+
+ return r;
+}
+
+int journal_file_hmac_setup(JournalFile *f) {
+ gcry_error_t e;
+
+ if (!f->seal)
+ return 0;
+
+ initialize_libgcrypt(true);
+
+ e = gcry_md_open(&f->hmac, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC);
+ if (e != 0)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+int journal_file_append_first_tag(JournalFile *f) {
+ int r;
+ uint64_t p;
+
+ if (!f->seal)
+ return 0;
+
+ log_debug("Calculating first tag...");
+
+ r = journal_file_hmac_put_header(f);
+ if (r < 0)
+ return r;
+
+ p = le64toh(f->header->field_hash_table_offset);
+ if (p < offsetof(Object, hash_table.items))
+ return -EINVAL;
+ p -= offsetof(Object, hash_table.items);
+
+ r = journal_file_hmac_put_object(f, OBJECT_FIELD_HASH_TABLE, NULL, p);
+ if (r < 0)
+ return r;
+
+ p = le64toh(f->header->data_hash_table_offset);
+ if (p < offsetof(Object, hash_table.items))
+ return -EINVAL;
+ p -= offsetof(Object, hash_table.items);
+
+ r = journal_file_hmac_put_object(f, OBJECT_DATA_HASH_TABLE, NULL, p);
+ if (r < 0)
+ return r;
+
+ r = journal_file_append_tag(f);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int journal_file_parse_verification_key(JournalFile *f, const char *key) {
+ uint8_t *seed;
+ size_t seed_size, c;
+ const char *k;
+ int r;
+ unsigned long long start, interval;
+
+ seed_size = FSPRG_RECOMMENDED_SEEDLEN;
+ seed = malloc(seed_size);
+ if (!seed)
+ return -ENOMEM;
+
+ k = key;
+ for (c = 0; c < seed_size; c++) {
+ int x, y;
+
+ while (*k == '-')
+ k++;
+
+ x = unhexchar(*k);
+ if (x < 0) {
+ free(seed);
+ return -EINVAL;
+ }
+ k++;
+ y = unhexchar(*k);
+ if (y < 0) {
+ free(seed);
+ return -EINVAL;
+ }
+ k++;
+
+ seed[c] = (uint8_t) (x * 16 + y);
+ }
+
+ if (*k != '/') {
+ free(seed);
+ return -EINVAL;
+ }
+ k++;
+
+ r = sscanf(k, "%llx-%llx", &start, &interval);
+ if (r != 2) {
+ free(seed);
+ return -EINVAL;
+ }
+
+ f->fsprg_seed = seed;
+ f->fsprg_seed_size = seed_size;
+
+ f->fss_start_usec = start * interval;
+ f->fss_interval_usec = interval;
+
+ return 0;
+}
+
+bool journal_file_next_evolve_usec(JournalFile *f, usec_t *u) {
+ uint64_t epoch;
+
+ assert(f);
+ assert(u);
+
+ if (!f->seal)
+ return false;
+
+ epoch = FSPRG_GetEpoch(f->fsprg_state);
+
+ *u = (usec_t) (f->fss_start_usec + f->fss_interval_usec * epoch + f->fss_interval_usec);
+
+ return true;
+}
diff --git a/src/journal/journal-authenticate.h b/src/journal/journal-authenticate.h
new file mode 100644
index 0000000..e895722
--- /dev/null
+++ b/src/journal/journal-authenticate.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "journal-file.h"
+
+int journal_file_append_tag(JournalFile *f);
+int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime);
+int journal_file_append_first_tag(JournalFile *f);
+
+int journal_file_hmac_setup(JournalFile *f);
+int journal_file_hmac_start(JournalFile *f);
+int journal_file_hmac_put_header(JournalFile *f);
+int journal_file_hmac_put_object(JournalFile *f, ObjectType type, Object *o, uint64_t p);
+
+int journal_file_fss_load(JournalFile *f);
+int journal_file_parse_verification_key(JournalFile *f, const char *key);
+
+int journal_file_fsprg_evolve(JournalFile *f, uint64_t realtime);
+int journal_file_fsprg_seek(JournalFile *f, uint64_t epoch);
+
+bool journal_file_next_evolve_usec(JournalFile *f, usec_t *u);
diff --git a/src/journal/journal-def.h b/src/journal/journal-def.h
new file mode 100644
index 0000000..bd924bd
--- /dev/null
+++ b/src/journal/journal-def.h
@@ -0,0 +1,252 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-id128.h"
+
+#include "macro.h"
+#include "sparse-endian.h"
+
+/*
+ * If you change this file you probably should also change its documentation:
+ *
+ * https://systemd.io/JOURNAL_FILE_FORMAT
+ */
+
+typedef struct Header Header;
+
+typedef struct ObjectHeader ObjectHeader;
+typedef union Object Object;
+
+typedef struct DataObject DataObject;
+typedef struct FieldObject FieldObject;
+typedef struct EntryObject EntryObject;
+typedef struct HashTableObject HashTableObject;
+typedef struct EntryArrayObject EntryArrayObject;
+typedef struct TagObject TagObject;
+
+typedef struct EntryItem EntryItem;
+typedef struct HashItem HashItem;
+
+typedef struct FSSHeader FSSHeader;
+
+/* Object types */
+typedef enum ObjectType {
+ OBJECT_UNUSED, /* also serves as "any type" or "additional context" */
+ OBJECT_DATA,
+ OBJECT_FIELD,
+ OBJECT_ENTRY,
+ OBJECT_DATA_HASH_TABLE,
+ OBJECT_FIELD_HASH_TABLE,
+ OBJECT_ENTRY_ARRAY,
+ OBJECT_TAG,
+ _OBJECT_TYPE_MAX
+} ObjectType;
+
+/* Object flags */
+enum {
+ OBJECT_COMPRESSED_XZ = 1 << 0,
+ OBJECT_COMPRESSED_LZ4 = 1 << 1,
+ OBJECT_COMPRESSED_ZSTD = 1 << 2,
+ OBJECT_COMPRESSION_MASK = (OBJECT_COMPRESSED_XZ | OBJECT_COMPRESSED_LZ4 | OBJECT_COMPRESSED_ZSTD),
+ _OBJECT_COMPRESSED_MAX = OBJECT_COMPRESSION_MASK,
+};
+
+struct ObjectHeader {
+ uint8_t type;
+ uint8_t flags;
+ uint8_t reserved[6];
+ le64_t size;
+ uint8_t payload[];
+} _packed_;
+
+#define DataObject__contents { \
+ ObjectHeader object; \
+ le64_t hash; \
+ le64_t next_hash_offset; \
+ le64_t next_field_offset; \
+ le64_t entry_offset; /* the first array entry we store inline */ \
+ le64_t entry_array_offset; \
+ le64_t n_entries; \
+ uint8_t payload[]; \
+ }
+
+struct DataObject DataObject__contents;
+struct DataObject__packed DataObject__contents _packed_;
+assert_cc(sizeof(struct DataObject) == sizeof(struct DataObject__packed));
+
+#define FieldObject__contents { \
+ ObjectHeader object; \
+ le64_t hash; \
+ le64_t next_hash_offset; \
+ le64_t head_data_offset; \
+ uint8_t payload[]; \
+}
+
+struct FieldObject FieldObject__contents;
+struct FieldObject__packed FieldObject__contents _packed_;
+assert_cc(sizeof(struct FieldObject) == sizeof(struct FieldObject__packed));
+
+struct EntryItem {
+ le64_t object_offset;
+ le64_t hash;
+} _packed_;
+
+#define EntryObject__contents { \
+ ObjectHeader object; \
+ le64_t seqnum; \
+ le64_t realtime; \
+ le64_t monotonic; \
+ sd_id128_t boot_id; \
+ le64_t xor_hash; \
+ EntryItem items[]; \
+ }
+
+struct EntryObject EntryObject__contents;
+struct EntryObject__packed EntryObject__contents _packed_;
+assert_cc(sizeof(struct EntryObject) == sizeof(struct EntryObject__packed));
+
+struct HashItem {
+ le64_t head_hash_offset;
+ le64_t tail_hash_offset;
+} _packed_;
+
+struct HashTableObject {
+ ObjectHeader object;
+ HashItem items[];
+} _packed_;
+
+struct EntryArrayObject {
+ ObjectHeader object;
+ le64_t next_entry_array_offset;
+ le64_t items[];
+} _packed_;
+
+#define TAG_LENGTH (256/8)
+
+struct TagObject {
+ ObjectHeader object;
+ le64_t seqnum;
+ le64_t epoch;
+ uint8_t tag[TAG_LENGTH]; /* SHA-256 HMAC */
+} _packed_;
+
+union Object {
+ ObjectHeader object;
+ DataObject data;
+ FieldObject field;
+ EntryObject entry;
+ HashTableObject hash_table;
+ EntryArrayObject entry_array;
+ TagObject tag;
+};
+
+enum {
+ STATE_OFFLINE = 0,
+ STATE_ONLINE = 1,
+ STATE_ARCHIVED = 2,
+ _STATE_MAX
+};
+
+/* Header flags */
+enum {
+ HEADER_INCOMPATIBLE_COMPRESSED_XZ = 1 << 0,
+ HEADER_INCOMPATIBLE_COMPRESSED_LZ4 = 1 << 1,
+ HEADER_INCOMPATIBLE_KEYED_HASH = 1 << 2,
+ HEADER_INCOMPATIBLE_COMPRESSED_ZSTD = 1 << 3,
+};
+
+#define HEADER_INCOMPATIBLE_ANY \
+ (HEADER_INCOMPATIBLE_COMPRESSED_XZ | \
+ HEADER_INCOMPATIBLE_COMPRESSED_LZ4 | \
+ HEADER_INCOMPATIBLE_KEYED_HASH | \
+ HEADER_INCOMPATIBLE_COMPRESSED_ZSTD)
+
+#if HAVE_XZ && HAVE_LZ4 && HAVE_ZSTD
+# define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_ANY
+#elif HAVE_XZ && HAVE_LZ4
+# define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_XZ|HEADER_INCOMPATIBLE_COMPRESSED_LZ4|HEADER_INCOMPATIBLE_KEYED_HASH)
+#elif HAVE_XZ && HAVE_ZSTD
+# define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_XZ|HEADER_INCOMPATIBLE_COMPRESSED_ZSTD|HEADER_INCOMPATIBLE_KEYED_HASH)
+#elif HAVE_LZ4 && HAVE_ZSTD
+# define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_LZ4|HEADER_INCOMPATIBLE_COMPRESSED_ZSTD|HEADER_INCOMPATIBLE_KEYED_HASH)
+#elif HAVE_XZ
+# define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_XZ|HEADER_INCOMPATIBLE_KEYED_HASH)
+#elif HAVE_LZ4
+# define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_LZ4|HEADER_INCOMPATIBLE_KEYED_HASH)
+#elif HAVE_ZSTD
+# define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_ZSTD|HEADER_INCOMPATIBLE_KEYED_HASH)
+#else
+# define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_KEYED_HASH
+#endif
+
+enum {
+ HEADER_COMPATIBLE_SEALED = 1 << 0,
+};
+
+#define HEADER_COMPATIBLE_ANY HEADER_COMPATIBLE_SEALED
+#if HAVE_GCRYPT
+# define HEADER_COMPATIBLE_SUPPORTED HEADER_COMPATIBLE_SEALED
+#else
+# define HEADER_COMPATIBLE_SUPPORTED 0
+#endif
+
+#define HEADER_SIGNATURE \
+ ((const char[]) { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' })
+
+#define struct_Header__contents { \
+ uint8_t signature[8]; /* "LPKSHHRH" */ \
+ le32_t compatible_flags; \
+ le32_t incompatible_flags; \
+ uint8_t state; \
+ uint8_t reserved[7]; \
+ sd_id128_t file_id; \
+ sd_id128_t machine_id; \
+ sd_id128_t boot_id; /* last writer */ \
+ sd_id128_t seqnum_id; \
+ le64_t header_size; \
+ le64_t arena_size; \
+ le64_t data_hash_table_offset; \
+ le64_t data_hash_table_size; \
+ le64_t field_hash_table_offset; \
+ le64_t field_hash_table_size; \
+ le64_t tail_object_offset; \
+ le64_t n_objects; \
+ le64_t n_entries; \
+ le64_t tail_entry_seqnum; \
+ le64_t head_entry_seqnum; \
+ le64_t entry_array_offset; \
+ le64_t head_entry_realtime; \
+ le64_t tail_entry_realtime; \
+ le64_t tail_entry_monotonic; \
+ /* Added in 187 */ \
+ le64_t n_data; \
+ le64_t n_fields; \
+ /* Added in 189 */ \
+ le64_t n_tags; \
+ le64_t n_entry_arrays; \
+ /* Added in 246 */ \
+ le64_t data_hash_chain_depth; \
+ le64_t field_hash_chain_depth; \
+ }
+
+struct Header struct_Header__contents;
+struct Header__packed struct_Header__contents _packed_;
+assert_cc(sizeof(struct Header) == sizeof(struct Header__packed));
+assert_cc(sizeof(struct Header) == 256);
+
+#define FSS_HEADER_SIGNATURE \
+ ((const char[]) { 'K', 'S', 'H', 'H', 'R', 'H', 'L', 'P' })
+
+struct FSSHeader {
+ uint8_t signature[8]; /* "KSHHRHLP" */
+ le32_t compatible_flags;
+ le32_t incompatible_flags;
+ sd_id128_t machine_id;
+ sd_id128_t boot_id; /* last writer */
+ le64_t header_size;
+ le64_t start_usec;
+ le64_t interval_usec;
+ le16_t fsprg_secpar;
+ le16_t reserved[3];
+ le64_t fsprg_state_size;
+} _packed_;
diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c
new file mode 100644
index 0000000..15336be
--- /dev/null
+++ b/src/journal/journal-file.c
@@ -0,0 +1,4162 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fs.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/statvfs.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "chattr-util.h"
+#include "compress.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "journal-authenticate.h"
+#include "journal-def.h"
+#include "journal-file.h"
+#include "lookup3.h"
+#include "memory-util.h"
+#include "path-util.h"
+#include "random-util.h"
+#include "set.h"
+#include "sort-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "xattr-util.h"
+
+#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
+#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
+
+#define DEFAULT_COMPRESS_THRESHOLD (512ULL)
+#define MIN_COMPRESS_THRESHOLD (8ULL)
+
+/* This is the minimum journal file size */
+#define JOURNAL_FILE_SIZE_MIN (512 * 1024ULL) /* 512 KiB */
+
+/* These are the lower and upper bounds if we deduce the max_use value
+ * from the file system size */
+#define MAX_USE_LOWER (1 * 1024 * 1024ULL) /* 1 MiB */
+#define MAX_USE_UPPER (4 * 1024 * 1024 * 1024ULL) /* 4 GiB */
+
+/* Those are the lower and upper bounds for the minimal use limit,
+ * i.e. how much we'll use even if keep_free suggests otherwise. */
+#define MIN_USE_LOW (1 * 1024 * 1024ULL) /* 1 MiB */
+#define MIN_USE_HIGH (16 * 1024 * 1024ULL) /* 16 MiB */
+
+/* This is the upper bound if we deduce max_size from max_use */
+#define MAX_SIZE_UPPER (128 * 1024 * 1024ULL) /* 128 MiB */
+
+/* This is the upper bound if we deduce the keep_free value from the
+ * file system size */
+#define KEEP_FREE_UPPER (4 * 1024 * 1024 * 1024ULL) /* 4 GiB */
+
+/* This is the keep_free value when we can't determine the system
+ * size */
+#define DEFAULT_KEEP_FREE (1024 * 1024ULL) /* 1 MB */
+
+/* This is the default maximum number of journal files to keep around. */
+#define DEFAULT_N_MAX_FILES 100
+
+/* n_data was the first entry we added after the initial file format design */
+#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
+
+/* How many entries to keep in the entry array chain cache at max */
+#define CHAIN_CACHE_MAX 20
+
+/* How much to increase the journal file size at once each time we allocate something new. */
+#define FILE_SIZE_INCREASE (8 * 1024 * 1024ULL) /* 8MB */
+
+/* Reread fstat() of the file for detecting deletions at least this often */
+#define LAST_STAT_REFRESH_USEC (5*USEC_PER_SEC)
+
+/* The mmap context to use for the header we pick as one above the last defined typed */
+#define CONTEXT_HEADER _OBJECT_TYPE_MAX
+
+/* Longest hash chain to rotate after */
+#define HASH_CHAIN_DEPTH_MAX 100
+
+#ifdef __clang__
+# pragma GCC diagnostic ignored "-Waddress-of-packed-member"
+#endif
+
+/* This may be called from a separate thread to prevent blocking the caller for the duration of fsync().
+ * As a result we use atomic operations on f->offline_state for inter-thread communications with
+ * journal_file_set_offline() and journal_file_set_online(). */
+static void journal_file_set_offline_internal(JournalFile *f) {
+ assert(f);
+ assert(f->fd >= 0);
+ assert(f->header);
+
+ for (;;) {
+ switch (f->offline_state) {
+ case OFFLINE_CANCEL:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_CANCEL, OFFLINE_DONE))
+ continue;
+ return;
+
+ case OFFLINE_AGAIN_FROM_SYNCING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_SYNCING, OFFLINE_SYNCING))
+ continue;
+ break;
+
+ case OFFLINE_AGAIN_FROM_OFFLINING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_OFFLINING, OFFLINE_SYNCING))
+ continue;
+ break;
+
+ case OFFLINE_SYNCING:
+ (void) fsync(f->fd);
+
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_SYNCING, OFFLINE_OFFLINING))
+ continue;
+
+ f->header->state = f->archive ? STATE_ARCHIVED : STATE_OFFLINE;
+ (void) fsync(f->fd);
+ break;
+
+ case OFFLINE_OFFLINING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_OFFLINING, OFFLINE_DONE))
+ continue;
+ _fallthrough_;
+ case OFFLINE_DONE:
+ return;
+
+ case OFFLINE_JOINED:
+ log_debug("OFFLINE_JOINED unexpected offline state for journal_file_set_offline_internal()");
+ return;
+ }
+ }
+}
+
+static void * journal_file_set_offline_thread(void *arg) {
+ JournalFile *f = arg;
+
+ (void) pthread_setname_np(pthread_self(), "journal-offline");
+
+ journal_file_set_offline_internal(f);
+
+ return NULL;
+}
+
+static int journal_file_set_offline_thread_join(JournalFile *f) {
+ int r;
+
+ assert(f);
+
+ if (f->offline_state == OFFLINE_JOINED)
+ return 0;
+
+ r = pthread_join(f->offline_thread, NULL);
+ if (r)
+ return -r;
+
+ f->offline_state = OFFLINE_JOINED;
+
+ if (mmap_cache_got_sigbus(f->mmap, f->cache_fd))
+ return -EIO;
+
+ return 0;
+}
+
+/* Trigger a restart if the offline thread is mid-flight in a restartable state. */
+static bool journal_file_set_offline_try_restart(JournalFile *f) {
+ for (;;) {
+ switch (f->offline_state) {
+ case OFFLINE_AGAIN_FROM_SYNCING:
+ case OFFLINE_AGAIN_FROM_OFFLINING:
+ return true;
+
+ case OFFLINE_CANCEL:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_CANCEL, OFFLINE_AGAIN_FROM_SYNCING))
+ continue;
+ return true;
+
+ case OFFLINE_SYNCING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_SYNCING, OFFLINE_AGAIN_FROM_SYNCING))
+ continue;
+ return true;
+
+ case OFFLINE_OFFLINING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_OFFLINING, OFFLINE_AGAIN_FROM_OFFLINING))
+ continue;
+ return true;
+
+ default:
+ return false;
+ }
+ }
+}
+
+/* Sets a journal offline.
+ *
+ * If wait is false then an offline is dispatched in a separate thread for a
+ * subsequent journal_file_set_offline() or journal_file_set_online() of the
+ * same journal to synchronize with.
+ *
+ * If wait is true, then either an existing offline thread will be restarted
+ * and joined, or if none exists the offline is simply performed in this
+ * context without involving another thread.
+ */
+int journal_file_set_offline(JournalFile *f, bool wait) {
+ bool restarted;
+ int r;
+
+ assert(f);
+
+ if (!f->writable)
+ return -EPERM;
+
+ if (f->fd < 0 || !f->header)
+ return -EINVAL;
+
+ /* An offlining journal is implicitly online and may modify f->header->state,
+ * we must also join any potentially lingering offline thread when not online. */
+ if (!journal_file_is_offlining(f) && f->header->state != STATE_ONLINE)
+ return journal_file_set_offline_thread_join(f);
+
+ /* Restart an in-flight offline thread and wait if needed, or join a lingering done one. */
+ restarted = journal_file_set_offline_try_restart(f);
+ if ((restarted && wait) || !restarted) {
+ r = journal_file_set_offline_thread_join(f);
+ if (r < 0)
+ return r;
+ }
+
+ if (restarted)
+ return 0;
+
+ /* Initiate a new offline. */
+ f->offline_state = OFFLINE_SYNCING;
+
+ if (wait) /* Without using a thread if waiting. */
+ journal_file_set_offline_internal(f);
+ else {
+ sigset_t ss, saved_ss;
+ int k;
+
+ assert_se(sigfillset(&ss) >= 0);
+ /* Don't block SIGBUS since the offlining thread accesses a memory mapped file.
+ * Asynchronous SIGBUS signals can safely be handled by either thread. */
+ assert_se(sigdelset(&ss, SIGBUS) >= 0);
+
+ r = pthread_sigmask(SIG_BLOCK, &ss, &saved_ss);
+ if (r > 0)
+ return -r;
+
+ r = pthread_create(&f->offline_thread, NULL, journal_file_set_offline_thread, f);
+
+ k = pthread_sigmask(SIG_SETMASK, &saved_ss, NULL);
+ if (r > 0) {
+ f->offline_state = OFFLINE_JOINED;
+ return -r;
+ }
+ if (k > 0)
+ return -k;
+ }
+
+ return 0;
+}
+
+static int journal_file_set_online(JournalFile *f) {
+ bool wait = true;
+
+ assert(f);
+
+ if (!f->writable)
+ return -EPERM;
+
+ if (f->fd < 0 || !f->header)
+ return -EINVAL;
+
+ while (wait) {
+ switch (f->offline_state) {
+ case OFFLINE_JOINED:
+ /* No offline thread, no need to wait. */
+ wait = false;
+ break;
+
+ case OFFLINE_SYNCING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_SYNCING, OFFLINE_CANCEL))
+ continue;
+ /* Canceled syncing prior to offlining, no need to wait. */
+ wait = false;
+ break;
+
+ case OFFLINE_AGAIN_FROM_SYNCING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_SYNCING, OFFLINE_CANCEL))
+ continue;
+ /* Canceled restart from syncing, no need to wait. */
+ wait = false;
+ break;
+
+ case OFFLINE_AGAIN_FROM_OFFLINING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_OFFLINING, OFFLINE_CANCEL))
+ continue;
+ /* Canceled restart from offlining, must wait for offlining to complete however. */
+ _fallthrough_;
+ default: {
+ int r;
+
+ r = journal_file_set_offline_thread_join(f);
+ if (r < 0)
+ return r;
+
+ wait = false;
+ break;
+ }
+ }
+ }
+
+ if (mmap_cache_got_sigbus(f->mmap, f->cache_fd))
+ return -EIO;
+
+ switch (f->header->state) {
+ case STATE_ONLINE:
+ return 0;
+
+ case STATE_OFFLINE:
+ f->header->state = STATE_ONLINE;
+ (void) fsync(f->fd);
+ return 0;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+bool journal_file_is_offlining(JournalFile *f) {
+ assert(f);
+
+ __sync_synchronize();
+
+ if (IN_SET(f->offline_state, OFFLINE_DONE, OFFLINE_JOINED))
+ return false;
+
+ return true;
+}
+
+JournalFile* journal_file_close(JournalFile *f) {
+ if (!f)
+ return NULL;
+
+#if HAVE_GCRYPT
+ /* Write the final tag */
+ if (f->seal && f->writable) {
+ int r;
+
+ r = journal_file_append_tag(f);
+ if (r < 0)
+ log_error_errno(r, "Failed to append tag when closing journal: %m");
+ }
+#endif
+
+ if (f->post_change_timer) {
+ if (sd_event_source_get_enabled(f->post_change_timer, NULL) > 0)
+ journal_file_post_change(f);
+
+ sd_event_source_disable_unref(f->post_change_timer);
+ }
+
+ journal_file_set_offline(f, true);
+
+ if (f->mmap && f->cache_fd)
+ mmap_cache_free_fd(f->mmap, f->cache_fd);
+
+ if (f->fd >= 0 && f->defrag_on_close) {
+
+ /* Be friendly to btrfs: turn COW back on again now,
+ * and defragment the file. We won't write to the file
+ * ever again, hence remove all fragmentation, and
+ * reenable all the good bits COW usually provides
+ * (such as data checksumming). */
+
+ (void) chattr_fd(f->fd, 0, FS_NOCOW_FL, NULL);
+ (void) btrfs_defrag_fd(f->fd);
+ }
+
+ if (f->close_fd)
+ safe_close(f->fd);
+ free(f->path);
+
+ mmap_cache_unref(f->mmap);
+
+ ordered_hashmap_free_free(f->chain_cache);
+
+#if HAVE_COMPRESSION
+ free(f->compress_buffer);
+#endif
+
+#if HAVE_GCRYPT
+ if (f->fss_file)
+ munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
+ else
+ free(f->fsprg_state);
+
+ free(f->fsprg_seed);
+
+ if (f->hmac)
+ gcry_md_close(f->hmac);
+#endif
+
+ return mfree(f);
+}
+
+static int journal_file_init_header(JournalFile *f, JournalFile *template) {
+ Header h = {};
+ ssize_t k;
+ int r;
+
+ assert(f);
+
+ memcpy(h.signature, HEADER_SIGNATURE, 8);
+ h.header_size = htole64(ALIGN64(sizeof(h)));
+
+ h.incompatible_flags |= htole32(
+ f->compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ |
+ f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4 |
+ f->compress_zstd * HEADER_INCOMPATIBLE_COMPRESSED_ZSTD |
+ f->keyed_hash * HEADER_INCOMPATIBLE_KEYED_HASH);
+
+ h.compatible_flags = htole32(
+ f->seal * HEADER_COMPATIBLE_SEALED);
+
+ r = sd_id128_randomize(&h.file_id);
+ if (r < 0)
+ return r;
+
+ if (template) {
+ h.seqnum_id = template->header->seqnum_id;
+ h.tail_entry_seqnum = template->header->tail_entry_seqnum;
+ } else
+ h.seqnum_id = h.file_id;
+
+ k = pwrite(f->fd, &h, sizeof(h), 0);
+ if (k < 0)
+ return -errno;
+
+ if (k != sizeof(h))
+ return -EIO;
+
+ return 0;
+}
+
+static int journal_file_refresh_header(JournalFile *f) {
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ r = sd_id128_get_machine(&f->header->machine_id);
+ if (IN_SET(r, -ENOENT, -ENOMEDIUM))
+ /* We don't have a machine-id, let's continue without */
+ zero(f->header->machine_id);
+ else if (r < 0)
+ return r;
+
+ r = sd_id128_get_boot(&f->header->boot_id);
+ if (r < 0)
+ return r;
+
+ r = journal_file_set_online(f);
+
+ /* Sync the online state to disk */
+ (void) fsync(f->fd);
+
+ /* We likely just created a new file, also sync the directory this file is located in. */
+ (void) fsync_directory_of_file(f->fd);
+
+ return r;
+}
+
+static bool warn_wrong_flags(const JournalFile *f, bool compatible) {
+ const uint32_t any = compatible ? HEADER_COMPATIBLE_ANY : HEADER_INCOMPATIBLE_ANY,
+ supported = compatible ? HEADER_COMPATIBLE_SUPPORTED : HEADER_INCOMPATIBLE_SUPPORTED;
+ const char *type = compatible ? "compatible" : "incompatible";
+ uint32_t flags;
+
+ flags = le32toh(compatible ? f->header->compatible_flags : f->header->incompatible_flags);
+
+ if (flags & ~supported) {
+ if (flags & ~any)
+ log_debug("Journal file %s has unknown %s flags 0x%"PRIx32,
+ f->path, type, flags & ~any);
+ flags = (flags & any) & ~supported;
+ if (flags) {
+ const char* strv[5];
+ unsigned n = 0;
+ _cleanup_free_ char *t = NULL;
+
+ if (compatible) {
+ if (flags & HEADER_COMPATIBLE_SEALED)
+ strv[n++] = "sealed";
+ } else {
+ if (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ)
+ strv[n++] = "xz-compressed";
+ if (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
+ strv[n++] = "lz4-compressed";
+ if (flags & HEADER_INCOMPATIBLE_COMPRESSED_ZSTD)
+ strv[n++] = "zstd-compressed";
+ if (flags & HEADER_INCOMPATIBLE_KEYED_HASH)
+ strv[n++] = "keyed-hash";
+ }
+ strv[n] = NULL;
+ assert(n < ELEMENTSOF(strv));
+
+ t = strv_join((char**) strv, ", ");
+ log_debug("Journal file %s uses %s %s %s disabled at compilation time.",
+ f->path, type, n > 1 ? "flags" : "flag", strnull(t));
+ }
+ return true;
+ }
+
+ return false;
+}
+
+static int journal_file_verify_header(JournalFile *f) {
+ uint64_t arena_size, header_size;
+
+ assert(f);
+ assert(f->header);
+
+ if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
+ return -EBADMSG;
+
+ /* In both read and write mode we refuse to open files with incompatible
+ * flags we don't know. */
+ if (warn_wrong_flags(f, false))
+ return -EPROTONOSUPPORT;
+
+ /* When open for writing we refuse to open files with compatible flags, too. */
+ if (f->writable && warn_wrong_flags(f, true))
+ return -EPROTONOSUPPORT;
+
+ if (f->header->state >= _STATE_MAX)
+ return -EBADMSG;
+
+ header_size = le64toh(READ_NOW(f->header->header_size));
+
+ /* The first addition was n_data, so check that we are at least this large */
+ if (header_size < HEADER_SIZE_MIN)
+ return -EBADMSG;
+
+ if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
+ return -EBADMSG;
+
+ arena_size = le64toh(READ_NOW(f->header->arena_size));
+
+ if (UINT64_MAX - header_size < arena_size || header_size + arena_size > (uint64_t) f->last_stat.st_size)
+ return -ENODATA;
+
+ if (le64toh(f->header->tail_object_offset) > header_size + arena_size)
+ return -ENODATA;
+
+ if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
+ !VALID64(le64toh(f->header->field_hash_table_offset)) ||
+ !VALID64(le64toh(f->header->tail_object_offset)) ||
+ !VALID64(le64toh(f->header->entry_array_offset)))
+ return -ENODATA;
+
+ if (f->writable) {
+ sd_id128_t machine_id;
+ uint8_t state;
+ int r;
+
+ r = sd_id128_get_machine(&machine_id);
+ if (r < 0)
+ return r;
+
+ if (!sd_id128_equal(machine_id, f->header->machine_id))
+ return -EHOSTDOWN;
+
+ state = f->header->state;
+
+ if (state == STATE_ARCHIVED)
+ return -ESHUTDOWN; /* Already archived */
+ else if (state == STATE_ONLINE)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBUSY),
+ "Journal file %s is already online. Assuming unclean closing.",
+ f->path);
+ else if (state != STATE_OFFLINE)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBUSY),
+ "Journal file %s has unknown state %i.",
+ f->path, state);
+
+ if (f->header->field_hash_table_size == 0 || f->header->data_hash_table_size == 0)
+ return -EBADMSG;
+
+ /* Don't permit appending to files from the future. Because otherwise the realtime timestamps wouldn't
+ * be strictly ordered in the entries in the file anymore, and we can't have that since it breaks
+ * bisection. */
+ if (le64toh(f->header->tail_entry_realtime) > now(CLOCK_REALTIME))
+ return log_debug_errno(SYNTHETIC_ERRNO(ETXTBSY),
+ "Journal file %s is from the future, refusing to append new data to it that'd be older.",
+ f->path);
+ }
+
+ f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header);
+ f->compress_lz4 = JOURNAL_HEADER_COMPRESSED_LZ4(f->header);
+ f->compress_zstd = JOURNAL_HEADER_COMPRESSED_ZSTD(f->header);
+
+ f->seal = JOURNAL_HEADER_SEALED(f->header);
+
+ f->keyed_hash = JOURNAL_HEADER_KEYED_HASH(f->header);
+
+ return 0;
+}
+
+int journal_file_fstat(JournalFile *f) {
+ int r;
+
+ assert(f);
+ assert(f->fd >= 0);
+
+ if (fstat(f->fd, &f->last_stat) < 0)
+ return -errno;
+
+ f->last_stat_usec = now(CLOCK_MONOTONIC);
+
+ /* Refuse dealing with files that aren't regular */
+ r = stat_verify_regular(&f->last_stat);
+ if (r < 0)
+ return r;
+
+ /* Refuse appending to files that are already deleted */
+ if (f->last_stat.st_nlink <= 0)
+ return -EIDRM;
+
+ return 0;
+}
+
+static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
+ uint64_t old_size, new_size, old_header_size, old_arena_size;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ /* We assume that this file is not sparse, and we know that for sure, since we always call
+ * posix_fallocate() ourselves */
+
+ if (size > PAGE_ALIGN_DOWN(UINT64_MAX) - offset)
+ return -EINVAL;
+
+ if (mmap_cache_got_sigbus(f->mmap, f->cache_fd))
+ return -EIO;
+
+ old_header_size = le64toh(READ_NOW(f->header->header_size));
+ old_arena_size = le64toh(READ_NOW(f->header->arena_size));
+ if (old_arena_size > PAGE_ALIGN_DOWN(UINT64_MAX) - old_header_size)
+ return -EBADMSG;
+
+ old_size = old_header_size + old_arena_size;
+
+ new_size = MAX(PAGE_ALIGN(offset + size), old_header_size);
+
+ if (new_size <= old_size) {
+
+ /* We already pre-allocated enough space, but before
+ * we write to it, let's check with fstat() if the
+ * file got deleted, in order make sure we don't throw
+ * away the data immediately. Don't check fstat() for
+ * all writes though, but only once ever 10s. */
+
+ if (f->last_stat_usec + LAST_STAT_REFRESH_USEC > now(CLOCK_MONOTONIC))
+ return 0;
+
+ return journal_file_fstat(f);
+ }
+
+ /* Allocate more space. */
+
+ if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
+ return -E2BIG;
+
+ if (new_size > f->metrics.min_size && f->metrics.keep_free > 0) {
+ struct statvfs svfs;
+
+ if (fstatvfs(f->fd, &svfs) >= 0) {
+ uint64_t available;
+
+ available = LESS_BY((uint64_t) svfs.f_bfree * (uint64_t) svfs.f_bsize, f->metrics.keep_free);
+
+ if (new_size - old_size > available)
+ return -E2BIG;
+ }
+ }
+
+ /* Increase by larger blocks at once */
+ new_size = DIV_ROUND_UP(new_size, FILE_SIZE_INCREASE) * FILE_SIZE_INCREASE;
+ if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
+ new_size = f->metrics.max_size;
+
+ /* Note that the glibc fallocate() fallback is very
+ inefficient, hence we try to minimize the allocation area
+ as we can. */
+ r = posix_fallocate(f->fd, old_size, new_size - old_size);
+ if (r != 0)
+ return -r;
+
+ f->header->arena_size = htole64(new_size - old_header_size);
+
+ return journal_file_fstat(f);
+}
+
+static unsigned type_to_context(ObjectType type) {
+ /* One context for each type, plus one catch-all for the rest */
+ assert_cc(_OBJECT_TYPE_MAX <= MMAP_CACHE_MAX_CONTEXTS);
+ assert_cc(CONTEXT_HEADER < MMAP_CACHE_MAX_CONTEXTS);
+ return type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX ? type : 0;
+}
+
+static int journal_file_move_to(
+ JournalFile *f,
+ ObjectType type,
+ bool keep_always,
+ uint64_t offset,
+ uint64_t size,
+ void **ret,
+ size_t *ret_size) {
+
+ int r;
+
+ assert(f);
+ assert(ret);
+
+ if (size <= 0)
+ return -EINVAL;
+
+ if (size > UINT64_MAX - offset)
+ return -EBADMSG;
+
+ /* Avoid SIGBUS on invalid accesses */
+ if (offset + size > (uint64_t) f->last_stat.st_size) {
+ /* Hmm, out of range? Let's refresh the fstat() data
+ * first, before we trust that check. */
+
+ r = journal_file_fstat(f);
+ if (r < 0)
+ return r;
+
+ if (offset + size > (uint64_t) f->last_stat.st_size)
+ return -EADDRNOTAVAIL;
+ }
+
+ return mmap_cache_get(f->mmap, f->cache_fd, f->prot, type_to_context(type), keep_always, offset, size, &f->last_stat, ret, ret_size);
+}
+
+static uint64_t minimum_header_size(Object *o) {
+
+ static const uint64_t table[] = {
+ [OBJECT_DATA] = sizeof(DataObject),
+ [OBJECT_FIELD] = sizeof(FieldObject),
+ [OBJECT_ENTRY] = sizeof(EntryObject),
+ [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
+ [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
+ [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
+ [OBJECT_TAG] = sizeof(TagObject),
+ };
+
+ if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
+ return sizeof(ObjectHeader);
+
+ return table[o->object.type];
+}
+
+/* Lightweight object checks. We want this to be fast, so that we won't
+ * slowdown every journal_file_move_to_object() call too much. */
+static int journal_file_check_object(JournalFile *f, uint64_t offset, Object *o) {
+ assert(f);
+ assert(o);
+
+ switch (o->object.type) {
+
+ case OBJECT_DATA:
+ if ((le64toh(o->data.entry_offset) == 0) ^ (le64toh(o->data.n_entries) == 0))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Bad n_entries: %" PRIu64 ": %" PRIu64,
+ le64toh(o->data.n_entries),
+ offset);
+
+ if (le64toh(o->object.size) <= offsetof(DataObject, payload))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Bad object size (<= %zu): %" PRIu64 ": %" PRIu64,
+ offsetof(DataObject, payload),
+ le64toh(o->object.size),
+ offset);
+
+ if (!VALID64(le64toh(o->data.next_hash_offset)) ||
+ !VALID64(le64toh(o->data.next_field_offset)) ||
+ !VALID64(le64toh(o->data.entry_offset)) ||
+ !VALID64(le64toh(o->data.entry_array_offset)))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid offset, next_hash_offset=" OFSfmt ", next_field_offset=" OFSfmt ", entry_offset=" OFSfmt ", entry_array_offset=" OFSfmt ": %" PRIu64,
+ le64toh(o->data.next_hash_offset),
+ le64toh(o->data.next_field_offset),
+ le64toh(o->data.entry_offset),
+ le64toh(o->data.entry_array_offset),
+ offset);
+
+ break;
+
+ case OBJECT_FIELD:
+ if (le64toh(o->object.size) <= offsetof(FieldObject, payload))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Bad field size (<= %zu): %" PRIu64 ": %" PRIu64,
+ offsetof(FieldObject, payload),
+ le64toh(o->object.size),
+ offset);
+
+ if (!VALID64(le64toh(o->field.next_hash_offset)) ||
+ !VALID64(le64toh(o->field.head_data_offset)))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid offset, next_hash_offset=" OFSfmt ", head_data_offset=" OFSfmt ": %" PRIu64,
+ le64toh(o->field.next_hash_offset),
+ le64toh(o->field.head_data_offset),
+ offset);
+ break;
+
+ case OBJECT_ENTRY: {
+ uint64_t sz;
+
+ sz = le64toh(READ_NOW(o->object.size));
+ if (sz < offsetof(EntryObject, items) ||
+ (sz - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Bad entry size (<= %zu): %" PRIu64 ": %" PRIu64,
+ offsetof(EntryObject, items),
+ sz,
+ offset);
+
+ if ((sz - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid number items in entry: %" PRIu64 ": %" PRIu64,
+ (sz - offsetof(EntryObject, items)) / sizeof(EntryItem),
+ offset);
+
+ if (le64toh(o->entry.seqnum) <= 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid entry seqnum: %" PRIx64 ": %" PRIu64,
+ le64toh(o->entry.seqnum),
+ offset);
+
+ if (!VALID_REALTIME(le64toh(o->entry.realtime)))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid entry realtime timestamp: %" PRIu64 ": %" PRIu64,
+ le64toh(o->entry.realtime),
+ offset);
+
+ if (!VALID_MONOTONIC(le64toh(o->entry.monotonic)))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid entry monotonic timestamp: %" PRIu64 ": %" PRIu64,
+ le64toh(o->entry.monotonic),
+ offset);
+
+ break;
+ }
+
+ case OBJECT_DATA_HASH_TABLE:
+ case OBJECT_FIELD_HASH_TABLE: {
+ uint64_t sz;
+
+ sz = le64toh(READ_NOW(o->object.size));
+ if (sz < offsetof(HashTableObject, items) ||
+ (sz - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0 ||
+ (sz - offsetof(HashTableObject, items)) / sizeof(HashItem) <= 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid %s hash table size: %" PRIu64 ": %" PRIu64,
+ o->object.type == OBJECT_DATA_HASH_TABLE ? "data" : "field",
+ sz,
+ offset);
+
+ break;
+ }
+
+ case OBJECT_ENTRY_ARRAY: {
+ uint64_t sz;
+
+ sz = le64toh(READ_NOW(o->object.size));
+ if (sz < offsetof(EntryArrayObject, items) ||
+ (sz - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0 ||
+ (sz - offsetof(EntryArrayObject, items)) / sizeof(le64_t) <= 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid object entry array size: %" PRIu64 ": %" PRIu64,
+ sz,
+ offset);
+
+ if (!VALID64(le64toh(o->entry_array.next_entry_array_offset)))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid object entry array next_entry_array_offset: " OFSfmt ": %" PRIu64,
+ le64toh(o->entry_array.next_entry_array_offset),
+ offset);
+
+ break;
+ }
+
+ case OBJECT_TAG:
+ if (le64toh(o->object.size) != sizeof(TagObject))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid object tag size: %" PRIu64 ": %" PRIu64,
+ le64toh(o->object.size),
+ offset);
+
+ if (!VALID_EPOCH(le64toh(o->tag.epoch)))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid object tag epoch: %" PRIu64 ": %" PRIu64,
+ le64toh(o->tag.epoch), offset);
+
+ break;
+ }
+
+ return 0;
+}
+
+int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset, Object **ret) {
+ int r;
+ void *t;
+ size_t tsize;
+ Object *o;
+ uint64_t s;
+
+ assert(f);
+ assert(ret);
+
+ /* Objects may only be located at multiple of 64 bit */
+ if (!VALID64(offset))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to move to object at non-64bit boundary: %" PRIu64,
+ offset);
+
+ /* Object may not be located in the file header */
+ if (offset < le64toh(f->header->header_size))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to move to object located in file header: %" PRIu64,
+ offset);
+
+ r = journal_file_move_to(f, type, false, offset, sizeof(ObjectHeader), &t, &tsize);
+ if (r < 0)
+ return r;
+
+ o = (Object*) t;
+ s = le64toh(READ_NOW(o->object.size));
+
+ if (s == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to move to uninitialized object: %" PRIu64,
+ offset);
+ if (s < sizeof(ObjectHeader))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to move to overly short object: %" PRIu64,
+ offset);
+
+ if (o->object.type <= OBJECT_UNUSED)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to move to object with invalid type: %" PRIu64,
+ offset);
+
+ if (s < minimum_header_size(o))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to move to truncated object: %" PRIu64,
+ offset);
+
+ if (type > OBJECT_UNUSED && o->object.type != type)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to move to object of unexpected type: %" PRIu64,
+ offset);
+
+ if (s > tsize) {
+ r = journal_file_move_to(f, type, false, offset, s, &t, NULL);
+ if (r < 0)
+ return r;
+
+ o = (Object*) t;
+ }
+
+ r = journal_file_check_object(f, offset, o);
+ if (r < 0)
+ return r;
+
+ *ret = o;
+ return 0;
+}
+
+static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
+ uint64_t r;
+
+ assert(f);
+ assert(f->header);
+
+ r = le64toh(f->header->tail_entry_seqnum) + 1;
+
+ if (seqnum) {
+ /* If an external seqnum counter was passed, we update
+ * both the local and the external one, and set it to
+ * the maximum of both */
+
+ if (*seqnum + 1 > r)
+ r = *seqnum + 1;
+
+ *seqnum = r;
+ }
+
+ f->header->tail_entry_seqnum = htole64(r);
+
+ if (f->header->head_entry_seqnum == 0)
+ f->header->head_entry_seqnum = htole64(r);
+
+ return r;
+}
+
+int journal_file_append_object(
+ JournalFile *f,
+ ObjectType type,
+ uint64_t size,
+ Object **ret,
+ uint64_t *ret_offset) {
+
+ int r;
+ uint64_t p;
+ Object *tail, *o;
+ void *t;
+
+ assert(f);
+ assert(f->header);
+ assert(type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX);
+ assert(size >= sizeof(ObjectHeader));
+
+ r = journal_file_set_online(f);
+ if (r < 0)
+ return r;
+
+ p = le64toh(f->header->tail_object_offset);
+ if (p == 0)
+ p = le64toh(f->header->header_size);
+ else {
+ uint64_t sz;
+
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &tail);
+ if (r < 0)
+ return r;
+
+ sz = le64toh(READ_NOW(tail->object.size));
+ if (sz > UINT64_MAX - sizeof(uint64_t) + 1)
+ return -EBADMSG;
+
+ sz = ALIGN64(sz);
+ if (p > UINT64_MAX - sz)
+ return -EBADMSG;
+
+ p += sz;
+ }
+
+ r = journal_file_allocate(f, p, size);
+ if (r < 0)
+ return r;
+
+ r = journal_file_move_to(f, type, false, p, size, &t, NULL);
+ if (r < 0)
+ return r;
+
+ o = (Object*) t;
+ o->object = (ObjectHeader) {
+ .type = type,
+ .size = htole64(size),
+ };
+
+ f->header->tail_object_offset = htole64(p);
+ f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
+
+ if (ret)
+ *ret = o;
+
+ if (ret_offset)
+ *ret_offset = p;
+
+ return 0;
+}
+
+static int journal_file_setup_data_hash_table(JournalFile *f) {
+ uint64_t s, p;
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ /* We estimate that we need 1 hash table entry per 768 bytes
+ of journal file and we want to make sure we never get
+ beyond 75% fill level. Calculate the hash table size for
+ the maximum file size based on these metrics. */
+
+ s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
+ if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
+ s = DEFAULT_DATA_HASH_TABLE_SIZE;
+
+ log_debug("Reserving %"PRIu64" entries in data hash table.", s / sizeof(HashItem));
+
+ r = journal_file_append_object(f,
+ OBJECT_DATA_HASH_TABLE,
+ offsetof(Object, hash_table.items) + s,
+ &o, &p);
+ if (r < 0)
+ return r;
+
+ memzero(o->hash_table.items, s);
+
+ f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
+ f->header->data_hash_table_size = htole64(s);
+
+ return 0;
+}
+
+static int journal_file_setup_field_hash_table(JournalFile *f) {
+ uint64_t s, p;
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ /* We use a fixed size hash table for the fields as this
+ * number should grow very slowly only */
+
+ s = DEFAULT_FIELD_HASH_TABLE_SIZE;
+ log_debug("Reserving %"PRIu64" entries in field hash table.", s / sizeof(HashItem));
+
+ r = journal_file_append_object(f,
+ OBJECT_FIELD_HASH_TABLE,
+ offsetof(Object, hash_table.items) + s,
+ &o, &p);
+ if (r < 0)
+ return r;
+
+ memzero(o->hash_table.items, s);
+
+ f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
+ f->header->field_hash_table_size = htole64(s);
+
+ return 0;
+}
+
+int journal_file_map_data_hash_table(JournalFile *f) {
+ uint64_t s, p;
+ void *t;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ if (f->data_hash_table)
+ return 0;
+
+ p = le64toh(f->header->data_hash_table_offset);
+ s = le64toh(f->header->data_hash_table_size);
+
+ r = journal_file_move_to(f,
+ OBJECT_DATA_HASH_TABLE,
+ true,
+ p, s,
+ &t, NULL);
+ if (r < 0)
+ return r;
+
+ f->data_hash_table = t;
+ return 0;
+}
+
+int journal_file_map_field_hash_table(JournalFile *f) {
+ uint64_t s, p;
+ void *t;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ if (f->field_hash_table)
+ return 0;
+
+ p = le64toh(f->header->field_hash_table_offset);
+ s = le64toh(f->header->field_hash_table_size);
+
+ r = journal_file_move_to(f,
+ OBJECT_FIELD_HASH_TABLE,
+ true,
+ p, s,
+ &t, NULL);
+ if (r < 0)
+ return r;
+
+ f->field_hash_table = t;
+ return 0;
+}
+
+static int journal_file_link_field(
+ JournalFile *f,
+ Object *o,
+ uint64_t offset,
+ uint64_t hash) {
+
+ uint64_t p, h, m;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(f->field_hash_table);
+ assert(o);
+ assert(offset > 0);
+
+ if (o->object.type != OBJECT_FIELD)
+ return -EINVAL;
+
+ m = le64toh(READ_NOW(f->header->field_hash_table_size)) / sizeof(HashItem);
+ if (m <= 0)
+ return -EBADMSG;
+
+ /* This might alter the window we are looking at */
+ o->field.next_hash_offset = o->field.head_data_offset = 0;
+
+ h = hash % m;
+ p = le64toh(f->field_hash_table[h].tail_hash_offset);
+ if (p == 0)
+ f->field_hash_table[h].head_hash_offset = htole64(offset);
+ else {
+ r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
+ if (r < 0)
+ return r;
+
+ o->field.next_hash_offset = htole64(offset);
+ }
+
+ f->field_hash_table[h].tail_hash_offset = htole64(offset);
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
+ f->header->n_fields = htole64(le64toh(f->header->n_fields) + 1);
+
+ return 0;
+}
+
+static int journal_file_link_data(
+ JournalFile *f,
+ Object *o,
+ uint64_t offset,
+ uint64_t hash) {
+
+ uint64_t p, h, m;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(f->data_hash_table);
+ assert(o);
+ assert(offset > 0);
+
+ if (o->object.type != OBJECT_DATA)
+ return -EINVAL;
+
+ m = le64toh(READ_NOW(f->header->data_hash_table_size)) / sizeof(HashItem);
+ if (m <= 0)
+ return -EBADMSG;
+
+ /* This might alter the window we are looking at */
+ o->data.next_hash_offset = o->data.next_field_offset = 0;
+ o->data.entry_offset = o->data.entry_array_offset = 0;
+ o->data.n_entries = 0;
+
+ h = hash % m;
+ p = le64toh(f->data_hash_table[h].tail_hash_offset);
+ if (p == 0)
+ /* Only entry in the hash table is easy */
+ f->data_hash_table[h].head_hash_offset = htole64(offset);
+ else {
+ /* Move back to the previous data object, to patch in
+ * pointer */
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ o->data.next_hash_offset = htole64(offset);
+ }
+
+ f->data_hash_table[h].tail_hash_offset = htole64(offset);
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
+ f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
+
+ return 0;
+}
+
+static int next_hash_offset(
+ JournalFile *f,
+ uint64_t *p,
+ le64_t *next_hash_offset,
+ uint64_t *depth,
+ le64_t *header_max_depth) {
+
+ uint64_t nextp;
+
+ nextp = le64toh(READ_NOW(*next_hash_offset));
+ if (nextp > 0) {
+ if (nextp <= *p) /* Refuse going in loops */
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Detected hash item loop in %s, refusing.", f->path);
+
+ (*depth)++;
+
+ /* If the depth of this hash chain is larger than all others we have seen so far, record it */
+ if (header_max_depth && f->writable)
+ *header_max_depth = htole64(MAX(*depth, le64toh(*header_max_depth)));
+ }
+
+ *p = nextp;
+ return 0;
+}
+
+int journal_file_find_field_object_with_hash(
+ JournalFile *f,
+ const void *field, uint64_t size, uint64_t hash,
+ Object **ret, uint64_t *ret_offset) {
+
+ uint64_t p, osize, h, m, depth = 0;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(field && size > 0);
+
+ /* If the field hash table is empty, we can't find anything */
+ if (le64toh(f->header->field_hash_table_size) <= 0)
+ return 0;
+
+ /* Map the field hash table, if it isn't mapped yet. */
+ r = journal_file_map_field_hash_table(f);
+ if (r < 0)
+ return r;
+
+ osize = offsetof(Object, field.payload) + size;
+
+ m = le64toh(READ_NOW(f->header->field_hash_table_size)) / sizeof(HashItem);
+ if (m <= 0)
+ return -EBADMSG;
+
+ h = hash % m;
+ p = le64toh(f->field_hash_table[h].head_hash_offset);
+ while (p > 0) {
+ Object *o;
+
+ r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
+ if (r < 0)
+ return r;
+
+ if (le64toh(o->field.hash) == hash &&
+ le64toh(o->object.size) == osize &&
+ memcmp(o->field.payload, field, size) == 0) {
+
+ if (ret)
+ *ret = o;
+ if (ret_offset)
+ *ret_offset = p;
+
+ return 1;
+ }
+
+ r = next_hash_offset(
+ f,
+ &p,
+ &o->field.next_hash_offset,
+ &depth,
+ JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth) ? &f->header->field_hash_chain_depth : NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+uint64_t journal_file_hash_data(
+ JournalFile *f,
+ const void *data,
+ size_t sz) {
+
+ assert(f);
+ assert(data || sz == 0);
+
+ /* We try to unify our codebase on siphash, hence new-styled journal files utilizing the keyed hash
+ * function use siphash. Old journal files use the Jenkins hash. */
+
+ if (JOURNAL_HEADER_KEYED_HASH(f->header))
+ return siphash24(data, sz, f->header->file_id.bytes);
+
+ return jenkins_hash64(data, sz);
+}
+
+int journal_file_find_field_object(
+ JournalFile *f,
+ const void *field, uint64_t size,
+ Object **ret, uint64_t *ret_offset) {
+
+ assert(f);
+ assert(field && size > 0);
+
+ return journal_file_find_field_object_with_hash(
+ f,
+ field, size,
+ journal_file_hash_data(f, field, size),
+ ret, ret_offset);
+}
+
+int journal_file_find_data_object_with_hash(
+ JournalFile *f,
+ const void *data, uint64_t size, uint64_t hash,
+ Object **ret, uint64_t *ret_offset) {
+
+ uint64_t p, osize, h, m, depth = 0;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(data || size == 0);
+
+ /* If there's no data hash table, then there's no entry. */
+ if (le64toh(f->header->data_hash_table_size) <= 0)
+ return 0;
+
+ /* Map the data hash table, if it isn't mapped yet. */
+ r = journal_file_map_data_hash_table(f);
+ if (r < 0)
+ return r;
+
+ osize = offsetof(Object, data.payload) + size;
+
+ m = le64toh(READ_NOW(f->header->data_hash_table_size)) / sizeof(HashItem);
+ if (m <= 0)
+ return -EBADMSG;
+
+ h = hash % m;
+ p = le64toh(f->data_hash_table[h].head_hash_offset);
+
+ while (p > 0) {
+ Object *o;
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ if (le64toh(o->data.hash) != hash)
+ goto next;
+
+ if (o->object.flags & OBJECT_COMPRESSION_MASK) {
+#if HAVE_COMPRESSION
+ uint64_t l;
+ size_t rsize = 0;
+
+ l = le64toh(READ_NOW(o->object.size));
+ if (l <= offsetof(Object, data.payload))
+ return -EBADMSG;
+
+ l -= offsetof(Object, data.payload);
+
+ r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
+ o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, 0);
+ if (r < 0)
+ return r;
+
+ if (rsize == size &&
+ memcmp(f->compress_buffer, data, size) == 0) {
+
+ if (ret)
+ *ret = o;
+
+ if (ret_offset)
+ *ret_offset = p;
+
+ return 1;
+ }
+#else
+ return -EPROTONOSUPPORT;
+#endif
+ } else if (le64toh(o->object.size) == osize &&
+ memcmp(o->data.payload, data, size) == 0) {
+
+ if (ret)
+ *ret = o;
+
+ if (ret_offset)
+ *ret_offset = p;
+
+ return 1;
+ }
+
+ next:
+ r = next_hash_offset(
+ f,
+ &p,
+ &o->data.next_hash_offset,
+ &depth,
+ JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth) ? &f->header->data_hash_chain_depth : NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int journal_file_find_data_object(
+ JournalFile *f,
+ const void *data, uint64_t size,
+ Object **ret, uint64_t *ret_offset) {
+
+ assert(f);
+ assert(data || size == 0);
+
+ return journal_file_find_data_object_with_hash(
+ f,
+ data, size,
+ journal_file_hash_data(f, data, size),
+ ret, ret_offset);
+}
+
+bool journal_field_valid(const char *p, size_t l, bool allow_protected) {
+ const char *a;
+
+ /* We kinda enforce POSIX syntax recommendations for
+ environment variables here, but make a couple of additional
+ requirements.
+
+ http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
+
+ if (l == (size_t) -1)
+ l = strlen(p);
+
+ /* No empty field names */
+ if (l <= 0)
+ return false;
+
+ /* Don't allow names longer than 64 chars */
+ if (l > 64)
+ return false;
+
+ /* Variables starting with an underscore are protected */
+ if (!allow_protected && p[0] == '_')
+ return false;
+
+ /* Don't allow digits as first character */
+ if (p[0] >= '0' && p[0] <= '9')
+ return false;
+
+ /* Only allow A-Z0-9 and '_' */
+ for (a = p; a < p + l; a++)
+ if ((*a < 'A' || *a > 'Z') &&
+ (*a < '0' || *a > '9') &&
+ *a != '_')
+ return false;
+
+ return true;
+}
+
+static int journal_file_append_field(
+ JournalFile *f,
+ const void *field, uint64_t size,
+ Object **ret, uint64_t *ret_offset) {
+
+ uint64_t hash, p;
+ uint64_t osize;
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(field && size > 0);
+
+ if (!journal_field_valid(field, size, true))
+ return -EBADMSG;
+
+ hash = journal_file_hash_data(f, field, size);
+
+ r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
+ if (r < 0)
+ return r;
+ else if (r > 0) {
+
+ if (ret)
+ *ret = o;
+
+ if (ret_offset)
+ *ret_offset = p;
+
+ return 0;
+ }
+
+ osize = offsetof(Object, field.payload) + size;
+ r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p);
+ if (r < 0)
+ return r;
+
+ o->field.hash = htole64(hash);
+ memcpy(o->field.payload, field, size);
+
+ r = journal_file_link_field(f, o, p, hash);
+ if (r < 0)
+ return r;
+
+ /* The linking might have altered the window, so let's
+ * refresh our pointer */
+ r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
+ if (r < 0)
+ return r;
+
+#if HAVE_GCRYPT
+ r = journal_file_hmac_put_object(f, OBJECT_FIELD, o, p);
+ if (r < 0)
+ return r;
+#endif
+
+ if (ret)
+ *ret = o;
+
+ if (ret_offset)
+ *ret_offset = p;
+
+ return 0;
+}
+
+static int journal_file_append_data(
+ JournalFile *f,
+ const void *data, uint64_t size,
+ Object **ret, uint64_t *ret_offset) {
+
+ uint64_t hash, p;
+ uint64_t osize;
+ Object *o;
+ int r, compression = 0;
+ const void *eq;
+
+ assert(f);
+ assert(data || size == 0);
+
+ hash = journal_file_hash_data(f, data, size);
+
+ r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+
+ if (ret)
+ *ret = o;
+
+ if (ret_offset)
+ *ret_offset = p;
+
+ return 0;
+ }
+
+ osize = offsetof(Object, data.payload) + size;
+ r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
+ if (r < 0)
+ return r;
+
+ o->data.hash = htole64(hash);
+
+#if HAVE_COMPRESSION
+ if (JOURNAL_FILE_COMPRESS(f) && size >= f->compress_threshold_bytes) {
+ size_t rsize = 0;
+
+ compression = compress_blob(data, size, o->data.payload, size - 1, &rsize);
+
+ if (compression >= 0) {
+ o->object.size = htole64(offsetof(Object, data.payload) + rsize);
+ o->object.flags |= compression;
+
+ log_debug("Compressed data object %"PRIu64" -> %zu using %s",
+ size, rsize, object_compressed_to_string(compression));
+ } else
+ /* Compression didn't work, we don't really care why, let's continue without compression */
+ compression = 0;
+ }
+#endif
+
+ if (compression == 0)
+ memcpy_safe(o->data.payload, data, size);
+
+ r = journal_file_link_data(f, o, p, hash);
+ if (r < 0)
+ return r;
+
+#if HAVE_GCRYPT
+ r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
+ if (r < 0)
+ return r;
+#endif
+
+ /* The linking might have altered the window, so let's
+ * refresh our pointer */
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ if (!data)
+ eq = NULL;
+ else
+ eq = memchr(data, '=', size);
+ if (eq && eq > data) {
+ Object *fo = NULL;
+ uint64_t fp;
+
+ /* Create field object ... */
+ r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
+ if (r < 0)
+ return r;
+
+ /* ... and link it in. */
+ o->data.next_field_offset = fo->field.head_data_offset;
+ fo->field.head_data_offset = le64toh(p);
+ }
+
+ if (ret)
+ *ret = o;
+
+ if (ret_offset)
+ *ret_offset = p;
+
+ return 0;
+}
+
+uint64_t journal_file_entry_n_items(Object *o) {
+ uint64_t sz;
+ assert(o);
+
+ if (o->object.type != OBJECT_ENTRY)
+ return 0;
+
+ sz = le64toh(READ_NOW(o->object.size));
+ if (sz < offsetof(Object, entry.items))
+ return 0;
+
+ return (sz - offsetof(Object, entry.items)) / sizeof(EntryItem);
+}
+
+uint64_t journal_file_entry_array_n_items(Object *o) {
+ uint64_t sz;
+
+ assert(o);
+
+ if (o->object.type != OBJECT_ENTRY_ARRAY)
+ return 0;
+
+ sz = le64toh(READ_NOW(o->object.size));
+ if (sz < offsetof(Object, entry_array.items))
+ return 0;
+
+ return (sz - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
+}
+
+uint64_t journal_file_hash_table_n_items(Object *o) {
+ uint64_t sz;
+
+ assert(o);
+
+ if (!IN_SET(o->object.type, OBJECT_DATA_HASH_TABLE, OBJECT_FIELD_HASH_TABLE))
+ return 0;
+
+ sz = le64toh(READ_NOW(o->object.size));
+ if (sz < offsetof(Object, hash_table.items))
+ return 0;
+
+ return (sz - offsetof(Object, hash_table.items)) / sizeof(HashItem);
+}
+
+static int link_entry_into_array(JournalFile *f,
+ le64_t *first,
+ le64_t *idx,
+ uint64_t p) {
+ int r;
+ uint64_t n = 0, ap = 0, q, i, a, hidx;
+ Object *o;
+
+ assert(f);
+ assert(f->header);
+ assert(first);
+ assert(idx);
+ assert(p > 0);
+
+ a = le64toh(*first);
+ i = hidx = le64toh(READ_NOW(*idx));
+ while (a > 0) {
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+
+ n = journal_file_entry_array_n_items(o);
+ if (i < n) {
+ o->entry_array.items[i] = htole64(p);
+ *idx = htole64(hidx + 1);
+ return 0;
+ }
+
+ i -= n;
+ ap = a;
+ a = le64toh(o->entry_array.next_entry_array_offset);
+ }
+
+ if (hidx > n)
+ n = (hidx+1) * 2;
+ else
+ n = n * 2;
+
+ if (n < 4)
+ n = 4;
+
+ r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
+ offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
+ &o, &q);
+ if (r < 0)
+ return r;
+
+#if HAVE_GCRYPT
+ r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
+ if (r < 0)
+ return r;
+#endif
+
+ o->entry_array.items[i] = htole64(p);
+
+ if (ap == 0)
+ *first = htole64(q);
+ else {
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
+ if (r < 0)
+ return r;
+
+ o->entry_array.next_entry_array_offset = htole64(q);
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
+ f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
+
+ *idx = htole64(hidx + 1);
+
+ return 0;
+}
+
+static int link_entry_into_array_plus_one(JournalFile *f,
+ le64_t *extra,
+ le64_t *first,
+ le64_t *idx,
+ uint64_t p) {
+
+ uint64_t hidx;
+ int r;
+
+ assert(f);
+ assert(extra);
+ assert(first);
+ assert(idx);
+ assert(p > 0);
+
+ hidx = le64toh(READ_NOW(*idx));
+ if (hidx == UINT64_MAX)
+ return -EBADMSG;
+ if (hidx == 0)
+ *extra = htole64(p);
+ else {
+ le64_t i;
+
+ i = htole64(hidx - 1);
+ r = link_entry_into_array(f, first, &i, p);
+ if (r < 0)
+ return r;
+ }
+
+ *idx = htole64(hidx + 1);
+ return 0;
+}
+
+static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
+ uint64_t p;
+ int r;
+
+ assert(f);
+ assert(o);
+ assert(offset > 0);
+
+ p = le64toh(o->entry.items[i].object_offset);
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ return link_entry_into_array_plus_one(f,
+ &o->data.entry_offset,
+ &o->data.entry_array_offset,
+ &o->data.n_entries,
+ offset);
+}
+
+static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
+ uint64_t n, i;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(o);
+ assert(offset > 0);
+
+ if (o->object.type != OBJECT_ENTRY)
+ return -EINVAL;
+
+ __sync_synchronize();
+
+ /* Link up the entry itself */
+ r = link_entry_into_array(f,
+ &f->header->entry_array_offset,
+ &f->header->n_entries,
+ offset);
+ if (r < 0)
+ return r;
+
+ /* log_debug("=> %s seqnr=%"PRIu64" n_entries=%"PRIu64, f->path, o->entry.seqnum, f->header->n_entries); */
+
+ if (f->header->head_entry_realtime == 0)
+ f->header->head_entry_realtime = o->entry.realtime;
+
+ f->header->tail_entry_realtime = o->entry.realtime;
+ f->header->tail_entry_monotonic = o->entry.monotonic;
+
+ /* Link up the items */
+ n = journal_file_entry_n_items(o);
+ for (i = 0; i < n; i++) {
+ r = journal_file_link_entry_item(f, o, offset, i);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int journal_file_append_entry_internal(
+ JournalFile *f,
+ const dual_timestamp *ts,
+ const sd_id128_t *boot_id,
+ uint64_t xor_hash,
+ const EntryItem items[], unsigned n_items,
+ uint64_t *seqnum,
+ Object **ret, uint64_t *ret_offset) {
+ uint64_t np;
+ uint64_t osize;
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(items || n_items == 0);
+ assert(ts);
+
+ osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
+
+ r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
+ if (r < 0)
+ return r;
+
+ o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
+ memcpy_safe(o->entry.items, items, n_items * sizeof(EntryItem));
+ o->entry.realtime = htole64(ts->realtime);
+ o->entry.monotonic = htole64(ts->monotonic);
+ o->entry.xor_hash = htole64(xor_hash);
+ if (boot_id)
+ f->header->boot_id = *boot_id;
+ o->entry.boot_id = f->header->boot_id;
+
+#if HAVE_GCRYPT
+ r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
+ if (r < 0)
+ return r;
+#endif
+
+ r = journal_file_link_entry(f, o, np);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = o;
+
+ if (ret_offset)
+ *ret_offset = np;
+
+ return 0;
+}
+
+void journal_file_post_change(JournalFile *f) {
+ assert(f);
+
+ if (f->fd < 0)
+ return;
+
+ /* inotify() does not receive IN_MODIFY events from file
+ * accesses done via mmap(). After each access we hence
+ * trigger IN_MODIFY by truncating the journal file to its
+ * current size which triggers IN_MODIFY. */
+
+ __sync_synchronize();
+
+ if (ftruncate(f->fd, f->last_stat.st_size) < 0)
+ log_debug_errno(errno, "Failed to truncate file to its own size: %m");
+}
+
+static int post_change_thunk(sd_event_source *timer, uint64_t usec, void *userdata) {
+ assert(userdata);
+
+ journal_file_post_change(userdata);
+
+ return 1;
+}
+
+static void schedule_post_change(JournalFile *f) {
+ int r;
+
+ assert(f);
+ assert(f->post_change_timer);
+
+ r = sd_event_source_get_enabled(f->post_change_timer, NULL);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to get ftruncate timer state: %m");
+ goto fail;
+ }
+ if (r > 0)
+ return;
+
+ r = sd_event_source_set_time_relative(f->post_change_timer, f->post_change_timer_period);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to set time for scheduling ftruncate: %m");
+ goto fail;
+ }
+
+ r = sd_event_source_set_enabled(f->post_change_timer, SD_EVENT_ONESHOT);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to enable scheduled ftruncate: %m");
+ goto fail;
+ }
+
+ return;
+
+fail:
+ /* On failure, let's simply post the change immediately. */
+ journal_file_post_change(f);
+}
+
+/* Enable coalesced change posting in a timer on the provided sd_event instance */
+int journal_file_enable_post_change_timer(JournalFile *f, sd_event *e, usec_t t) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *timer = NULL;
+ int r;
+
+ assert(f);
+ assert_return(!f->post_change_timer, -EINVAL);
+ assert(e);
+ assert(t);
+
+ r = sd_event_add_time(e, &timer, CLOCK_MONOTONIC, 0, 0, post_change_thunk, f);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(timer, SD_EVENT_OFF);
+ if (r < 0)
+ return r;
+
+ f->post_change_timer = TAKE_PTR(timer);
+ f->post_change_timer_period = t;
+
+ return r;
+}
+
+static int entry_item_cmp(const EntryItem *a, const EntryItem *b) {
+ return CMP(le64toh(a->object_offset), le64toh(b->object_offset));
+}
+
+int journal_file_append_entry(
+ JournalFile *f,
+ const dual_timestamp *ts,
+ const sd_id128_t *boot_id,
+ const struct iovec iovec[], unsigned n_iovec,
+ uint64_t *seqnum,
+ Object **ret, uint64_t *ret_offset) {
+
+ unsigned i;
+ EntryItem *items;
+ int r;
+ uint64_t xor_hash = 0;
+ struct dual_timestamp _ts;
+
+ assert(f);
+ assert(f->header);
+ assert(iovec || n_iovec == 0);
+
+ if (ts) {
+ if (!VALID_REALTIME(ts->realtime))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid realtime timestamp %" PRIu64 ", refusing entry.",
+ ts->realtime);
+ if (!VALID_MONOTONIC(ts->monotonic))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid monotomic timestamp %" PRIu64 ", refusing entry.",
+ ts->monotonic);
+ } else {
+ dual_timestamp_get(&_ts);
+ ts = &_ts;
+ }
+
+#if HAVE_GCRYPT
+ r = journal_file_maybe_append_tag(f, ts->realtime);
+ if (r < 0)
+ return r;
+#endif
+
+ /* alloca() can't take 0, hence let's allocate at least one */
+ items = newa(EntryItem, MAX(1u, n_iovec));
+
+ for (i = 0; i < n_iovec; i++) {
+ uint64_t p;
+ Object *o;
+
+ r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
+ if (r < 0)
+ return r;
+
+ /* When calculating the XOR hash field, we need to take special care if the "keyed-hash"
+ * journal file flag is on. We use the XOR hash field to quickly determine the identity of a
+ * specific record, and give records with otherwise identical position (i.e. match in seqno,
+ * timestamp, …) a stable ordering. But for that we can't have it that the hash of the
+ * objects in each file is different since they are keyed. Hence let's calculate the Jenkins
+ * hash here for that. This also has the benefit that cursors for old and new journal files
+ * are completely identical (they include the XOR hash after all). For classic Jenkins-hash
+ * files things are easier, we can just take the value from the stored record directly. */
+
+ if (JOURNAL_HEADER_KEYED_HASH(f->header))
+ xor_hash ^= jenkins_hash64(iovec[i].iov_base, iovec[i].iov_len);
+ else
+ xor_hash ^= le64toh(o->data.hash);
+
+ items[i].object_offset = htole64(p);
+ items[i].hash = o->data.hash;
+ }
+
+ /* Order by the position on disk, in order to improve seek
+ * times for rotating media. */
+ typesafe_qsort(items, n_iovec, entry_item_cmp);
+
+ r = journal_file_append_entry_internal(f, ts, boot_id, xor_hash, items, n_iovec, seqnum, ret, ret_offset);
+
+ /* If the memory mapping triggered a SIGBUS then we return an
+ * IO error and ignore the error code passed down to us, since
+ * it is very likely just an effect of a nullified replacement
+ * mapping page */
+
+ if (mmap_cache_got_sigbus(f->mmap, f->cache_fd))
+ r = -EIO;
+
+ if (f->post_change_timer)
+ schedule_post_change(f);
+ else
+ journal_file_post_change(f);
+
+ return r;
+}
+
+typedef struct ChainCacheItem {
+ uint64_t first; /* the array at the beginning of the chain */
+ uint64_t array; /* the cached array */
+ uint64_t begin; /* the first item in the cached array */
+ uint64_t total; /* the total number of items in all arrays before this one in the chain */
+ uint64_t last_index; /* the last index we looked at, to optimize locality when bisecting */
+} ChainCacheItem;
+
+static void chain_cache_put(
+ OrderedHashmap *h,
+ ChainCacheItem *ci,
+ uint64_t first,
+ uint64_t array,
+ uint64_t begin,
+ uint64_t total,
+ uint64_t last_index) {
+
+ if (!ci) {
+ /* If the chain item to cache for this chain is the
+ * first one it's not worth caching anything */
+ if (array == first)
+ return;
+
+ if (ordered_hashmap_size(h) >= CHAIN_CACHE_MAX) {
+ ci = ordered_hashmap_steal_first(h);
+ assert(ci);
+ } else {
+ ci = new(ChainCacheItem, 1);
+ if (!ci)
+ return;
+ }
+
+ ci->first = first;
+
+ if (ordered_hashmap_put(h, &ci->first, ci) < 0) {
+ free(ci);
+ return;
+ }
+ } else
+ assert(ci->first == first);
+
+ ci->array = array;
+ ci->begin = begin;
+ ci->total = total;
+ ci->last_index = last_index;
+}
+
+static int generic_array_get(
+ JournalFile *f,
+ uint64_t first,
+ uint64_t i,
+ Object **ret, uint64_t *ret_offset) {
+
+ Object *o;
+ uint64_t p = 0, a, t = 0;
+ int r;
+ ChainCacheItem *ci;
+
+ assert(f);
+
+ a = first;
+
+ /* Try the chain cache first */
+ ci = ordered_hashmap_get(f->chain_cache, &first);
+ if (ci && i > ci->total) {
+ a = ci->array;
+ i -= ci->total;
+ t = ci->total;
+ }
+
+ while (a > 0) {
+ uint64_t k;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+
+ k = journal_file_entry_array_n_items(o);
+ if (i < k) {
+ p = le64toh(o->entry_array.items[i]);
+ goto found;
+ }
+
+ i -= k;
+ t += k;
+ a = le64toh(o->entry_array.next_entry_array_offset);
+ }
+
+ return 0;
+
+found:
+ /* Let's cache this item for the next invocation */
+ chain_cache_put(f->chain_cache, ci, first, a, le64toh(o->entry_array.items[0]), t, i);
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = o;
+
+ if (ret_offset)
+ *ret_offset = p;
+
+ return 1;
+}
+
+static int generic_array_get_plus_one(
+ JournalFile *f,
+ uint64_t extra,
+ uint64_t first,
+ uint64_t i,
+ Object **ret, uint64_t *ret_offset) {
+
+ Object *o;
+
+ assert(f);
+
+ if (i == 0) {
+ int r;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = o;
+
+ if (ret_offset)
+ *ret_offset = extra;
+
+ return 1;
+ }
+
+ return generic_array_get(f, first, i-1, ret, ret_offset);
+}
+
+enum {
+ TEST_FOUND,
+ TEST_LEFT,
+ TEST_RIGHT
+};
+
+static int generic_array_bisect(
+ JournalFile *f,
+ uint64_t first,
+ uint64_t n,
+ uint64_t needle,
+ int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
+ direction_t direction,
+ Object **ret,
+ uint64_t *ret_offset,
+ uint64_t *ret_idx) {
+
+ uint64_t a, p, t = 0, i = 0, last_p = 0, last_index = (uint64_t) -1;
+ bool subtract_one = false;
+ Object *o, *array = NULL;
+ int r;
+ ChainCacheItem *ci;
+
+ assert(f);
+ assert(test_object);
+
+ /* Start with the first array in the chain */
+ a = first;
+
+ ci = ordered_hashmap_get(f->chain_cache, &first);
+ if (ci && n > ci->total && ci->begin != 0) {
+ /* Ah, we have iterated this bisection array chain
+ * previously! Let's see if we can skip ahead in the
+ * chain, as far as the last time. But we can't jump
+ * backwards in the chain, so let's check that
+ * first. */
+
+ r = test_object(f, ci->begin, needle);
+ if (r < 0)
+ return r;
+
+ if (r == TEST_LEFT) {
+ /* OK, what we are looking for is right of the
+ * begin of this EntryArray, so let's jump
+ * straight to previously cached array in the
+ * chain */
+
+ a = ci->array;
+ n -= ci->total;
+ t = ci->total;
+ last_index = ci->last_index;
+ }
+ }
+
+ while (a > 0) {
+ uint64_t left, right, k, lp;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
+ if (r < 0)
+ return r;
+
+ k = journal_file_entry_array_n_items(array);
+ right = MIN(k, n);
+ if (right <= 0)
+ return 0;
+
+ i = right - 1;
+ lp = p = le64toh(array->entry_array.items[i]);
+ if (p <= 0)
+ r = -EBADMSG;
+ else
+ r = test_object(f, p, needle);
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Encountered invalid entry while bisecting, cutting algorithm short. (1)");
+ n = i;
+ continue;
+ }
+ if (r < 0)
+ return r;
+
+ if (r == TEST_FOUND)
+ r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
+
+ if (r == TEST_RIGHT) {
+ left = 0;
+ right -= 1;
+
+ if (last_index != (uint64_t) -1) {
+ assert(last_index <= right);
+
+ /* If we cached the last index we
+ * looked at, let's try to not to jump
+ * too wildly around and see if we can
+ * limit the range to look at early to
+ * the immediate neighbors of the last
+ * index we looked at. */
+
+ if (last_index > 0) {
+ uint64_t x = last_index - 1;
+
+ p = le64toh(array->entry_array.items[x]);
+ if (p <= 0)
+ return -EBADMSG;
+
+ r = test_object(f, p, needle);
+ if (r < 0)
+ return r;
+
+ if (r == TEST_FOUND)
+ r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
+
+ if (r == TEST_RIGHT)
+ right = x;
+ else
+ left = x + 1;
+ }
+
+ if (last_index < right) {
+ uint64_t y = last_index + 1;
+
+ p = le64toh(array->entry_array.items[y]);
+ if (p <= 0)
+ return -EBADMSG;
+
+ r = test_object(f, p, needle);
+ if (r < 0)
+ return r;
+
+ if (r == TEST_FOUND)
+ r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
+
+ if (r == TEST_RIGHT)
+ right = y;
+ else
+ left = y + 1;
+ }
+ }
+
+ for (;;) {
+ if (left == right) {
+ if (direction == DIRECTION_UP)
+ subtract_one = true;
+
+ i = left;
+ goto found;
+ }
+
+ assert(left < right);
+ i = (left + right) / 2;
+
+ p = le64toh(array->entry_array.items[i]);
+ if (p <= 0)
+ r = -EBADMSG;
+ else
+ r = test_object(f, p, needle);
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Encountered invalid entry while bisecting, cutting algorithm short. (2)");
+ right = n = i;
+ continue;
+ }
+ if (r < 0)
+ return r;
+
+ if (r == TEST_FOUND)
+ r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
+
+ if (r == TEST_RIGHT)
+ right = i;
+ else
+ left = i + 1;
+ }
+ }
+
+ if (k >= n) {
+ if (direction == DIRECTION_UP) {
+ i = n;
+ subtract_one = true;
+ goto found;
+ }
+
+ return 0;
+ }
+
+ last_p = lp;
+
+ n -= k;
+ t += k;
+ last_index = (uint64_t) -1;
+ a = le64toh(array->entry_array.next_entry_array_offset);
+ }
+
+ return 0;
+
+found:
+ if (subtract_one && t == 0 && i == 0)
+ return 0;
+
+ /* Let's cache this item for the next invocation */
+ chain_cache_put(f->chain_cache, ci, first, a, le64toh(array->entry_array.items[0]), t, subtract_one ? (i > 0 ? i-1 : (uint64_t) -1) : i);
+
+ if (subtract_one && i == 0)
+ p = last_p;
+ else if (subtract_one)
+ p = le64toh(array->entry_array.items[i-1]);
+ else
+ p = le64toh(array->entry_array.items[i]);
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = o;
+
+ if (ret_offset)
+ *ret_offset = p;
+
+ if (ret_idx)
+ *ret_idx = t + i + (subtract_one ? -1 : 0);
+
+ return 1;
+}
+
+static int generic_array_bisect_plus_one(
+ JournalFile *f,
+ uint64_t extra,
+ uint64_t first,
+ uint64_t n,
+ uint64_t needle,
+ int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
+ direction_t direction,
+ Object **ret,
+ uint64_t *ret_offset,
+ uint64_t *ret_idx) {
+
+ int r;
+ bool step_back = false;
+ Object *o;
+
+ assert(f);
+ assert(test_object);
+
+ if (n <= 0)
+ return 0;
+
+ /* This bisects the array in object 'first', but first checks
+ * an extra */
+ r = test_object(f, extra, needle);
+ if (r < 0)
+ return r;
+
+ if (r == TEST_FOUND)
+ r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
+
+ /* if we are looking with DIRECTION_UP then we need to first
+ see if in the actual array there is a matching entry, and
+ return the last one of that. But if there isn't any we need
+ to return this one. Hence remember this, and return it
+ below. */
+ if (r == TEST_LEFT)
+ step_back = direction == DIRECTION_UP;
+
+ if (r == TEST_RIGHT) {
+ if (direction == DIRECTION_DOWN)
+ goto found;
+ else
+ return 0;
+ }
+
+ r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, ret_offset, ret_idx);
+
+ if (r == 0 && step_back)
+ goto found;
+
+ if (r > 0 && ret_idx)
+ (*ret_idx)++;
+
+ return r;
+
+found:
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = o;
+
+ if (ret_offset)
+ *ret_offset = extra;
+
+ if (ret_idx)
+ *ret_idx = 0;
+
+ return 1;
+}
+
+_pure_ static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
+ assert(f);
+ assert(p > 0);
+
+ if (p == needle)
+ return TEST_FOUND;
+ else if (p < needle)
+ return TEST_LEFT;
+ else
+ return TEST_RIGHT;
+}
+
+static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
+ uint64_t sq;
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(p > 0);
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
+ if (r < 0)
+ return r;
+
+ sq = le64toh(READ_NOW(o->entry.seqnum));
+ if (sq == needle)
+ return TEST_FOUND;
+ else if (sq < needle)
+ return TEST_LEFT;
+ else
+ return TEST_RIGHT;
+}
+
+int journal_file_move_to_entry_by_seqnum(
+ JournalFile *f,
+ uint64_t seqnum,
+ direction_t direction,
+ Object **ret,
+ uint64_t *ret_offset) {
+ assert(f);
+ assert(f->header);
+
+ return generic_array_bisect(
+ f,
+ le64toh(f->header->entry_array_offset),
+ le64toh(f->header->n_entries),
+ seqnum,
+ test_object_seqnum,
+ direction,
+ ret, ret_offset, NULL);
+}
+
+static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
+ Object *o;
+ uint64_t rt;
+ int r;
+
+ assert(f);
+ assert(p > 0);
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
+ if (r < 0)
+ return r;
+
+ rt = le64toh(READ_NOW(o->entry.realtime));
+ if (rt == needle)
+ return TEST_FOUND;
+ else if (rt < needle)
+ return TEST_LEFT;
+ else
+ return TEST_RIGHT;
+}
+
+int journal_file_move_to_entry_by_realtime(
+ JournalFile *f,
+ uint64_t realtime,
+ direction_t direction,
+ Object **ret,
+ uint64_t *ret_offset) {
+ assert(f);
+ assert(f->header);
+
+ return generic_array_bisect(
+ f,
+ le64toh(f->header->entry_array_offset),
+ le64toh(f->header->n_entries),
+ realtime,
+ test_object_realtime,
+ direction,
+ ret, ret_offset, NULL);
+}
+
+static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
+ Object *o;
+ uint64_t m;
+ int r;
+
+ assert(f);
+ assert(p > 0);
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
+ if (r < 0)
+ return r;
+
+ m = le64toh(READ_NOW(o->entry.monotonic));
+ if (m == needle)
+ return TEST_FOUND;
+ else if (m < needle)
+ return TEST_LEFT;
+ else
+ return TEST_RIGHT;
+}
+
+static int find_data_object_by_boot_id(
+ JournalFile *f,
+ sd_id128_t boot_id,
+ Object **o,
+ uint64_t *b) {
+
+ char t[STRLEN("_BOOT_ID=") + 32 + 1] = "_BOOT_ID=";
+
+ sd_id128_to_string(boot_id, t + 9);
+ return journal_file_find_data_object(f, t, sizeof(t) - 1, o, b);
+}
+
+int journal_file_move_to_entry_by_monotonic(
+ JournalFile *f,
+ sd_id128_t boot_id,
+ uint64_t monotonic,
+ direction_t direction,
+ Object **ret,
+ uint64_t *ret_offset) {
+
+ Object *o;
+ int r;
+
+ assert(f);
+
+ r = find_data_object_by_boot_id(f, boot_id, &o, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENOENT;
+
+ return generic_array_bisect_plus_one(
+ f,
+ le64toh(o->data.entry_offset),
+ le64toh(o->data.entry_array_offset),
+ le64toh(o->data.n_entries),
+ monotonic,
+ test_object_monotonic,
+ direction,
+ ret, ret_offset, NULL);
+}
+
+void journal_file_reset_location(JournalFile *f) {
+ f->location_type = LOCATION_HEAD;
+ f->current_offset = 0;
+ f->current_seqnum = 0;
+ f->current_realtime = 0;
+ f->current_monotonic = 0;
+ zero(f->current_boot_id);
+ f->current_xor_hash = 0;
+}
+
+void journal_file_save_location(JournalFile *f, Object *o, uint64_t offset) {
+ f->location_type = LOCATION_SEEK;
+ f->current_offset = offset;
+ f->current_seqnum = le64toh(o->entry.seqnum);
+ f->current_realtime = le64toh(o->entry.realtime);
+ f->current_monotonic = le64toh(o->entry.monotonic);
+ f->current_boot_id = o->entry.boot_id;
+ f->current_xor_hash = le64toh(o->entry.xor_hash);
+}
+
+int journal_file_compare_locations(JournalFile *af, JournalFile *bf) {
+ int r;
+
+ assert(af);
+ assert(af->header);
+ assert(bf);
+ assert(bf->header);
+ assert(af->location_type == LOCATION_SEEK);
+ assert(bf->location_type == LOCATION_SEEK);
+
+ /* If contents, timestamps and seqnum match, these entries are
+ * identical*/
+ if (sd_id128_equal(af->current_boot_id, bf->current_boot_id) &&
+ af->current_monotonic == bf->current_monotonic &&
+ af->current_realtime == bf->current_realtime &&
+ af->current_xor_hash == bf->current_xor_hash &&
+ sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id) &&
+ af->current_seqnum == bf->current_seqnum)
+ return 0;
+
+ if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) {
+
+ /* If this is from the same seqnum source, compare
+ * seqnums */
+ r = CMP(af->current_seqnum, bf->current_seqnum);
+ if (r != 0)
+ return r;
+
+ /* Wow! This is weird, different data but the same
+ * seqnums? Something is borked, but let's make the
+ * best of it and compare by time. */
+ }
+
+ if (sd_id128_equal(af->current_boot_id, bf->current_boot_id)) {
+
+ /* If the boot id matches, compare monotonic time */
+ r = CMP(af->current_monotonic, bf->current_monotonic);
+ if (r != 0)
+ return r;
+ }
+
+ /* Otherwise, compare UTC time */
+ r = CMP(af->current_realtime, bf->current_realtime);
+ if (r != 0)
+ return r;
+
+ /* Finally, compare by contents */
+ return CMP(af->current_xor_hash, bf->current_xor_hash);
+}
+
+static int bump_array_index(uint64_t *i, direction_t direction, uint64_t n) {
+
+ /* Increase or decrease the specified index, in the right direction. */
+
+ if (direction == DIRECTION_DOWN) {
+ if (*i >= n - 1)
+ return 0;
+
+ (*i) ++;
+ } else {
+ if (*i <= 0)
+ return 0;
+
+ (*i) --;
+ }
+
+ return 1;
+}
+
+static bool check_properly_ordered(uint64_t new_offset, uint64_t old_offset, direction_t direction) {
+
+ /* Consider it an error if any of the two offsets is uninitialized */
+ if (old_offset == 0 || new_offset == 0)
+ return false;
+
+ /* If we go down, the new offset must be larger than the old one. */
+ return direction == DIRECTION_DOWN ?
+ new_offset > old_offset :
+ new_offset < old_offset;
+}
+
+int journal_file_next_entry(
+ JournalFile *f,
+ uint64_t p,
+ direction_t direction,
+ Object **ret, uint64_t *ret_offset) {
+
+ uint64_t i, n, ofs;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ n = le64toh(READ_NOW(f->header->n_entries));
+ if (n <= 0)
+ return 0;
+
+ if (p == 0)
+ i = direction == DIRECTION_DOWN ? 0 : n - 1;
+ else {
+ r = generic_array_bisect(f,
+ le64toh(f->header->entry_array_offset),
+ le64toh(f->header->n_entries),
+ p,
+ test_object_offset,
+ DIRECTION_DOWN,
+ NULL, NULL,
+ &i);
+ if (r <= 0)
+ return r;
+
+ r = bump_array_index(&i, direction, n);
+ if (r <= 0)
+ return r;
+ }
+
+ /* And jump to it */
+ for (;;) {
+ r = generic_array_get(f,
+ le64toh(f->header->entry_array_offset),
+ i,
+ ret, &ofs);
+ if (r > 0)
+ break;
+ if (r != -EBADMSG)
+ return r;
+
+ /* OK, so this entry is borked. Most likely some entry didn't get synced to disk properly, let's see if
+ * the next one might work for us instead. */
+ log_debug_errno(r, "Entry item %" PRIu64 " is bad, skipping over it.", i);
+
+ r = bump_array_index(&i, direction, n);
+ if (r <= 0)
+ return r;
+ }
+
+ /* Ensure our array is properly ordered. */
+ if (p > 0 && !check_properly_ordered(ofs, p, direction))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "%s: entry array not properly ordered at entry %" PRIu64,
+ f->path, i);
+
+ if (ret_offset)
+ *ret_offset = ofs;
+
+ return 1;
+}
+
+int journal_file_next_entry_for_data(
+ JournalFile *f,
+ Object *o, uint64_t p,
+ uint64_t data_offset,
+ direction_t direction,
+ Object **ret, uint64_t *ret_offset) {
+
+ uint64_t i, n, ofs;
+ Object *d;
+ int r;
+
+ assert(f);
+ assert(p > 0 || !o);
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
+ if (r < 0)
+ return r;
+
+ n = le64toh(READ_NOW(d->data.n_entries));
+ if (n <= 0)
+ return n;
+
+ if (!o)
+ i = direction == DIRECTION_DOWN ? 0 : n - 1;
+ else {
+ if (o->object.type != OBJECT_ENTRY)
+ return -EINVAL;
+
+ r = generic_array_bisect_plus_one(f,
+ le64toh(d->data.entry_offset),
+ le64toh(d->data.entry_array_offset),
+ le64toh(d->data.n_entries),
+ p,
+ test_object_offset,
+ DIRECTION_DOWN,
+ NULL, NULL,
+ &i);
+
+ if (r <= 0)
+ return r;
+
+ r = bump_array_index(&i, direction, n);
+ if (r <= 0)
+ return r;
+ }
+
+ for (;;) {
+ r = generic_array_get_plus_one(f,
+ le64toh(d->data.entry_offset),
+ le64toh(d->data.entry_array_offset),
+ i,
+ ret, &ofs);
+ if (r > 0)
+ break;
+ if (r != -EBADMSG)
+ return r;
+
+ log_debug_errno(r, "Data entry item %" PRIu64 " is bad, skipping over it.", i);
+
+ r = bump_array_index(&i, direction, n);
+ if (r <= 0)
+ return r;
+ }
+
+ /* Ensure our array is properly ordered. */
+ if (p > 0 && check_properly_ordered(ofs, p, direction))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "%s data entry array not properly ordered at entry %" PRIu64,
+ f->path, i);
+
+ if (ret_offset)
+ *ret_offset = ofs;
+
+ return 1;
+}
+
+int journal_file_move_to_entry_by_offset_for_data(
+ JournalFile *f,
+ uint64_t data_offset,
+ uint64_t p,
+ direction_t direction,
+ Object **ret, uint64_t *ret_offset) {
+
+ int r;
+ Object *d;
+
+ assert(f);
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
+ if (r < 0)
+ return r;
+
+ return generic_array_bisect_plus_one(
+ f,
+ le64toh(d->data.entry_offset),
+ le64toh(d->data.entry_array_offset),
+ le64toh(d->data.n_entries),
+ p,
+ test_object_offset,
+ direction,
+ ret, ret_offset, NULL);
+}
+
+int journal_file_move_to_entry_by_monotonic_for_data(
+ JournalFile *f,
+ uint64_t data_offset,
+ sd_id128_t boot_id,
+ uint64_t monotonic,
+ direction_t direction,
+ Object **ret, uint64_t *ret_offset) {
+
+ Object *o, *d;
+ int r;
+ uint64_t b, z;
+
+ assert(f);
+
+ /* First, seek by time */
+ r = find_data_object_by_boot_id(f, boot_id, &o, &b);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENOENT;
+
+ r = generic_array_bisect_plus_one(f,
+ le64toh(o->data.entry_offset),
+ le64toh(o->data.entry_array_offset),
+ le64toh(o->data.n_entries),
+ monotonic,
+ test_object_monotonic,
+ direction,
+ NULL, &z, NULL);
+ if (r <= 0)
+ return r;
+
+ /* And now, continue seeking until we find an entry that
+ * exists in both bisection arrays */
+
+ for (;;) {
+ Object *qo;
+ uint64_t p, q;
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
+ if (r < 0)
+ return r;
+
+ r = generic_array_bisect_plus_one(f,
+ le64toh(d->data.entry_offset),
+ le64toh(d->data.entry_array_offset),
+ le64toh(d->data.n_entries),
+ z,
+ test_object_offset,
+ direction,
+ NULL, &p, NULL);
+ if (r <= 0)
+ return r;
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
+ if (r < 0)
+ return r;
+
+ r = generic_array_bisect_plus_one(f,
+ le64toh(o->data.entry_offset),
+ le64toh(o->data.entry_array_offset),
+ le64toh(o->data.n_entries),
+ p,
+ test_object_offset,
+ direction,
+ &qo, &q, NULL);
+
+ if (r <= 0)
+ return r;
+
+ if (p == q) {
+ if (ret)
+ *ret = qo;
+ if (ret_offset)
+ *ret_offset = q;
+
+ return 1;
+ }
+
+ z = q;
+ }
+}
+
+int journal_file_move_to_entry_by_seqnum_for_data(
+ JournalFile *f,
+ uint64_t data_offset,
+ uint64_t seqnum,
+ direction_t direction,
+ Object **ret, uint64_t *ret_offset) {
+
+ Object *d;
+ int r;
+
+ assert(f);
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
+ if (r < 0)
+ return r;
+
+ return generic_array_bisect_plus_one(
+ f,
+ le64toh(d->data.entry_offset),
+ le64toh(d->data.entry_array_offset),
+ le64toh(d->data.n_entries),
+ seqnum,
+ test_object_seqnum,
+ direction,
+ ret, ret_offset, NULL);
+}
+
+int journal_file_move_to_entry_by_realtime_for_data(
+ JournalFile *f,
+ uint64_t data_offset,
+ uint64_t realtime,
+ direction_t direction,
+ Object **ret, uint64_t *ret_offset) {
+
+ Object *d;
+ int r;
+
+ assert(f);
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
+ if (r < 0)
+ return r;
+
+ return generic_array_bisect_plus_one(
+ f,
+ le64toh(d->data.entry_offset),
+ le64toh(d->data.entry_array_offset),
+ le64toh(d->data.n_entries),
+ realtime,
+ test_object_realtime,
+ direction,
+ ret, ret_offset, NULL);
+}
+
+void journal_file_dump(JournalFile *f) {
+ Object *o;
+ int r;
+ uint64_t p;
+
+ assert(f);
+ assert(f->header);
+
+ journal_file_print_header(f);
+
+ p = le64toh(READ_NOW(f->header->header_size));
+ while (p != 0) {
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o);
+ if (r < 0)
+ goto fail;
+
+ switch (o->object.type) {
+
+ case OBJECT_UNUSED:
+ printf("Type: OBJECT_UNUSED\n");
+ break;
+
+ case OBJECT_DATA:
+ printf("Type: OBJECT_DATA\n");
+ break;
+
+ case OBJECT_FIELD:
+ printf("Type: OBJECT_FIELD\n");
+ break;
+
+ case OBJECT_ENTRY:
+ printf("Type: OBJECT_ENTRY seqnum=%"PRIu64" monotonic=%"PRIu64" realtime=%"PRIu64"\n",
+ le64toh(o->entry.seqnum),
+ le64toh(o->entry.monotonic),
+ le64toh(o->entry.realtime));
+ break;
+
+ case OBJECT_FIELD_HASH_TABLE:
+ printf("Type: OBJECT_FIELD_HASH_TABLE\n");
+ break;
+
+ case OBJECT_DATA_HASH_TABLE:
+ printf("Type: OBJECT_DATA_HASH_TABLE\n");
+ break;
+
+ case OBJECT_ENTRY_ARRAY:
+ printf("Type: OBJECT_ENTRY_ARRAY\n");
+ break;
+
+ case OBJECT_TAG:
+ printf("Type: OBJECT_TAG seqnum=%"PRIu64" epoch=%"PRIu64"\n",
+ le64toh(o->tag.seqnum),
+ le64toh(o->tag.epoch));
+ break;
+
+ default:
+ printf("Type: unknown (%i)\n", o->object.type);
+ break;
+ }
+
+ if (o->object.flags & OBJECT_COMPRESSION_MASK)
+ printf("Flags: %s\n",
+ object_compressed_to_string(o->object.flags & OBJECT_COMPRESSION_MASK));
+
+ if (p == le64toh(f->header->tail_object_offset))
+ p = 0;
+ else
+ p += ALIGN64(le64toh(o->object.size));
+ }
+
+ return;
+fail:
+ log_error("File corrupt");
+}
+
+static const char* format_timestamp_safe(char *buf, size_t l, usec_t t) {
+ const char *x;
+
+ x = format_timestamp(buf, l, t);
+ if (x)
+ return x;
+ return " --- ";
+}
+
+void journal_file_print_header(JournalFile *f) {
+ char a[SD_ID128_STRING_MAX], b[SD_ID128_STRING_MAX], c[SD_ID128_STRING_MAX], d[SD_ID128_STRING_MAX];
+ char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX], z[FORMAT_TIMESTAMP_MAX];
+ struct stat st;
+ char bytes[FORMAT_BYTES_MAX];
+
+ assert(f);
+ assert(f->header);
+
+ printf("File path: %s\n"
+ "File ID: %s\n"
+ "Machine ID: %s\n"
+ "Boot ID: %s\n"
+ "Sequential number ID: %s\n"
+ "State: %s\n"
+ "Compatible flags:%s%s\n"
+ "Incompatible flags:%s%s%s%s%s\n"
+ "Header size: %"PRIu64"\n"
+ "Arena size: %"PRIu64"\n"
+ "Data hash table size: %"PRIu64"\n"
+ "Field hash table size: %"PRIu64"\n"
+ "Rotate suggested: %s\n"
+ "Head sequential number: %"PRIu64" (%"PRIx64")\n"
+ "Tail sequential number: %"PRIu64" (%"PRIx64")\n"
+ "Head realtime timestamp: %s (%"PRIx64")\n"
+ "Tail realtime timestamp: %s (%"PRIx64")\n"
+ "Tail monotonic timestamp: %s (%"PRIx64")\n"
+ "Objects: %"PRIu64"\n"
+ "Entry objects: %"PRIu64"\n",
+ f->path,
+ sd_id128_to_string(f->header->file_id, a),
+ sd_id128_to_string(f->header->machine_id, b),
+ sd_id128_to_string(f->header->boot_id, c),
+ sd_id128_to_string(f->header->seqnum_id, d),
+ f->header->state == STATE_OFFLINE ? "OFFLINE" :
+ f->header->state == STATE_ONLINE ? "ONLINE" :
+ f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
+ JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
+ (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "",
+ JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "",
+ JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "",
+ JOURNAL_HEADER_COMPRESSED_ZSTD(f->header) ? " COMPRESSED-ZSTD" : "",
+ JOURNAL_HEADER_KEYED_HASH(f->header) ? " KEYED-HASH" : "",
+ (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_ANY) ? " ???" : "",
+ le64toh(f->header->header_size),
+ le64toh(f->header->arena_size),
+ le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
+ le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
+ yes_no(journal_file_rotate_suggested(f, 0)),
+ le64toh(f->header->head_entry_seqnum), le64toh(f->header->head_entry_seqnum),
+ le64toh(f->header->tail_entry_seqnum), le64toh(f->header->tail_entry_seqnum),
+ format_timestamp_safe(x, sizeof(x), le64toh(f->header->head_entry_realtime)), le64toh(f->header->head_entry_realtime),
+ format_timestamp_safe(y, sizeof(y), le64toh(f->header->tail_entry_realtime)), le64toh(f->header->tail_entry_realtime),
+ format_timespan(z, sizeof(z), le64toh(f->header->tail_entry_monotonic), USEC_PER_MSEC), le64toh(f->header->tail_entry_monotonic),
+ le64toh(f->header->n_objects),
+ le64toh(f->header->n_entries));
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
+ printf("Data objects: %"PRIu64"\n"
+ "Data hash table fill: %.1f%%\n",
+ le64toh(f->header->n_data),
+ 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
+ printf("Field objects: %"PRIu64"\n"
+ "Field hash table fill: %.1f%%\n",
+ le64toh(f->header->n_fields),
+ 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
+ printf("Tag objects: %"PRIu64"\n",
+ le64toh(f->header->n_tags));
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
+ printf("Entry array objects: %"PRIu64"\n",
+ le64toh(f->header->n_entry_arrays));
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth))
+ printf("Deepest field hash chain: %" PRIu64"\n",
+ f->header->field_hash_chain_depth);
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth))
+ printf("Deepest data hash chain: %" PRIu64"\n",
+ f->header->data_hash_chain_depth);
+
+ if (fstat(f->fd, &st) >= 0)
+ printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (uint64_t) st.st_blocks * 512ULL));
+}
+
+static int journal_file_warn_btrfs(JournalFile *f) {
+ unsigned attrs;
+ int r;
+
+ assert(f);
+
+ /* Before we write anything, check if the COW logic is turned
+ * off on btrfs. Given our write pattern that is quite
+ * unfriendly to COW file systems this should greatly improve
+ * performance on COW file systems, such as btrfs, at the
+ * expense of data integrity features (which shouldn't be too
+ * bad, given that we do our own checksumming). */
+
+ r = btrfs_is_filesystem(f->fd);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to determine if journal is on btrfs: %m");
+ if (!r)
+ return 0;
+
+ r = read_attr_fd(f->fd, &attrs);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to read file attributes: %m");
+
+ if (attrs & FS_NOCOW_FL) {
+ log_debug("Detected btrfs file system with copy-on-write disabled, all is good.");
+ return 0;
+ }
+
+ log_notice("Creating journal file %s on a btrfs file system, and copy-on-write is enabled. "
+ "This is likely to slow down journal access substantially, please consider turning "
+ "off the copy-on-write file attribute on the journal directory, using chattr +C.", f->path);
+
+ return 1;
+}
+
+int journal_file_open(
+ int fd,
+ const char *fname,
+ int flags,
+ mode_t mode,
+ bool compress,
+ uint64_t compress_threshold_bytes,
+ bool seal,
+ JournalMetrics *metrics,
+ MMapCache *mmap_cache,
+ Set *deferred_closes,
+ JournalFile *template,
+ JournalFile **ret) {
+
+ bool newly_created = false;
+ JournalFile *f;
+ void *h;
+ int r;
+
+ assert(ret);
+ assert(fd >= 0 || fname);
+
+ if (!IN_SET((flags & O_ACCMODE), O_RDONLY, O_RDWR))
+ return -EINVAL;
+
+ if (fname && (flags & O_CREAT) && !endswith(fname, ".journal"))
+ return -EINVAL;
+
+ f = new(JournalFile, 1);
+ if (!f)
+ return -ENOMEM;
+
+ *f = (JournalFile) {
+ .fd = fd,
+ .mode = mode,
+
+ .flags = flags,
+ .prot = prot_from_flags(flags),
+ .writable = (flags & O_ACCMODE) != O_RDONLY,
+
+#if HAVE_ZSTD
+ .compress_zstd = compress,
+#elif HAVE_LZ4
+ .compress_lz4 = compress,
+#elif HAVE_XZ
+ .compress_xz = compress,
+#endif
+ .compress_threshold_bytes = compress_threshold_bytes == (uint64_t) -1 ?
+ DEFAULT_COMPRESS_THRESHOLD :
+ MAX(MIN_COMPRESS_THRESHOLD, compress_threshold_bytes),
+#if HAVE_GCRYPT
+ .seal = seal,
+#endif
+ };
+
+ /* We turn on keyed hashes by default, but provide an environment variable to turn them off, if
+ * people really want that */
+ r = getenv_bool("SYSTEMD_JOURNAL_KEYED_HASH");
+ if (r < 0) {
+ if (r != -ENXIO)
+ log_debug_errno(r, "Failed to parse $SYSTEMD_JOURNAL_KEYED_HASH environment variable, ignoring.");
+ f->keyed_hash = true;
+ } else
+ f->keyed_hash = r;
+
+ if (DEBUG_LOGGING) {
+ static int last_seal = -1, last_compress = -1, last_keyed_hash = -1;
+ static uint64_t last_bytes = UINT64_MAX;
+ char bytes[FORMAT_BYTES_MAX];
+
+ if (last_seal != f->seal ||
+ last_keyed_hash != f->keyed_hash ||
+ last_compress != JOURNAL_FILE_COMPRESS(f) ||
+ last_bytes != f->compress_threshold_bytes) {
+
+ log_debug("Journal effective settings seal=%s keyed_hash=%s compress=%s compress_threshold_bytes=%s",
+ yes_no(f->seal), yes_no(f->keyed_hash), yes_no(JOURNAL_FILE_COMPRESS(f)),
+ format_bytes(bytes, sizeof bytes, f->compress_threshold_bytes));
+ last_seal = f->seal;
+ last_keyed_hash = f->keyed_hash;
+ last_compress = JOURNAL_FILE_COMPRESS(f);
+ last_bytes = f->compress_threshold_bytes;
+ }
+ }
+
+ if (mmap_cache)
+ f->mmap = mmap_cache_ref(mmap_cache);
+ else {
+ f->mmap = mmap_cache_new();
+ if (!f->mmap) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (fname) {
+ f->path = strdup(fname);
+ if (!f->path) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ } else {
+ assert(fd >= 0);
+
+ /* If we don't know the path, fill in something explanatory and vaguely useful */
+ if (asprintf(&f->path, "/proc/self/%i", fd) < 0) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ f->chain_cache = ordered_hashmap_new(&uint64_hash_ops);
+ if (!f->chain_cache) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ if (f->fd < 0) {
+ /* We pass O_NONBLOCK here, so that in case somebody pointed us to some character device node or FIFO
+ * or so, we likely fail quickly than block for long. For regular files O_NONBLOCK has no effect, hence
+ * it doesn't hurt in that case. */
+
+ f->fd = open(f->path, f->flags|O_CLOEXEC|O_NONBLOCK, f->mode);
+ if (f->fd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ /* fds we opened here by us should also be closed by us. */
+ f->close_fd = true;
+
+ r = fd_nonblock(f->fd, false);
+ if (r < 0)
+ goto fail;
+ }
+
+ f->cache_fd = mmap_cache_add_fd(f->mmap, f->fd);
+ if (!f->cache_fd) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ r = journal_file_fstat(f);
+ if (r < 0)
+ goto fail;
+
+ if (f->last_stat.st_size == 0 && f->writable) {
+
+ (void) journal_file_warn_btrfs(f);
+
+ /* Let's attach the creation time to the journal file, so that the vacuuming code knows the age of this
+ * file even if the file might end up corrupted one day... Ideally we'd just use the creation time many
+ * file systems maintain for each file, but the API to query this is very new, hence let's emulate this
+ * via extended attributes. If extended attributes are not supported we'll just skip this, and rely
+ * solely on mtime/atime/ctime of the file. */
+ (void) fd_setcrtime(f->fd, 0);
+
+#if HAVE_GCRYPT
+ /* Try to load the FSPRG state, and if we can't, then
+ * just don't do sealing */
+ if (f->seal) {
+ r = journal_file_fss_load(f);
+ if (r < 0)
+ f->seal = false;
+ }
+#endif
+
+ r = journal_file_init_header(f, template);
+ if (r < 0)
+ goto fail;
+
+ r = journal_file_fstat(f);
+ if (r < 0)
+ goto fail;
+
+ newly_created = true;
+ }
+
+ if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
+ r = -ENODATA;
+ goto fail;
+ }
+
+ r = mmap_cache_get(f->mmap, f->cache_fd, f->prot, CONTEXT_HEADER, true, 0, PAGE_ALIGN(sizeof(Header)), &f->last_stat, &h, NULL);
+ if (r == -EINVAL) {
+ /* Some file systems (jffs2 or p9fs) don't support mmap() properly (or only read-only
+ * mmap()), and return EINVAL in that case. Let's propagate that as a more recognizable error
+ * code. */
+ r = -EAFNOSUPPORT;
+ goto fail;
+ }
+ if (r < 0)
+ goto fail;
+
+ f->header = h;
+
+ if (!newly_created) {
+ set_clear_with_destructor(deferred_closes, journal_file_close);
+
+ r = journal_file_verify_header(f);
+ if (r < 0)
+ goto fail;
+ }
+
+#if HAVE_GCRYPT
+ if (!newly_created && f->writable) {
+ r = journal_file_fss_load(f);
+ if (r < 0)
+ goto fail;
+ }
+#endif
+
+ if (f->writable) {
+ if (metrics) {
+ journal_default_metrics(metrics, f->fd);
+ f->metrics = *metrics;
+ } else if (template)
+ f->metrics = template->metrics;
+
+ r = journal_file_refresh_header(f);
+ if (r < 0)
+ goto fail;
+ }
+
+#if HAVE_GCRYPT
+ r = journal_file_hmac_setup(f);
+ if (r < 0)
+ goto fail;
+#endif
+
+ if (newly_created) {
+ r = journal_file_setup_field_hash_table(f);
+ if (r < 0)
+ goto fail;
+
+ r = journal_file_setup_data_hash_table(f);
+ if (r < 0)
+ goto fail;
+
+#if HAVE_GCRYPT
+ r = journal_file_append_first_tag(f);
+ if (r < 0)
+ goto fail;
+#endif
+ }
+
+ if (mmap_cache_got_sigbus(f->mmap, f->cache_fd)) {
+ r = -EIO;
+ goto fail;
+ }
+
+ if (template && template->post_change_timer) {
+ r = journal_file_enable_post_change_timer(
+ f,
+ sd_event_source_get_event(template->post_change_timer),
+ template->post_change_timer_period);
+
+ if (r < 0)
+ goto fail;
+ }
+
+ /* The file is opened now successfully, thus we take possession of any passed in fd. */
+ f->close_fd = true;
+
+ *ret = f;
+ return 0;
+
+fail:
+ if (f->cache_fd && mmap_cache_got_sigbus(f->mmap, f->cache_fd))
+ r = -EIO;
+
+ (void) journal_file_close(f);
+
+ return r;
+}
+
+int journal_file_archive(JournalFile *f) {
+ _cleanup_free_ char *p = NULL;
+
+ assert(f);
+
+ if (!f->writable)
+ return -EINVAL;
+
+ /* Is this a journal file that was passed to us as fd? If so, we synthesized a path name for it, and we refuse
+ * rotation, since we don't know the actual path, and couldn't rename the file hence. */
+ if (path_startswith(f->path, "/proc/self/fd"))
+ return -EINVAL;
+
+ if (!endswith(f->path, ".journal"))
+ return -EINVAL;
+
+ if (asprintf(&p, "%.*s@" SD_ID128_FORMAT_STR "-%016"PRIx64"-%016"PRIx64".journal",
+ (int) strlen(f->path) - 8, f->path,
+ SD_ID128_FORMAT_VAL(f->header->seqnum_id),
+ le64toh(f->header->head_entry_seqnum),
+ le64toh(f->header->head_entry_realtime)) < 0)
+ return -ENOMEM;
+
+ /* Try to rename the file to the archived version. If the file already was deleted, we'll get ENOENT, let's
+ * ignore that case. */
+ if (rename(f->path, p) < 0 && errno != ENOENT)
+ return -errno;
+
+ /* Sync the rename to disk */
+ (void) fsync_directory_of_file(f->fd);
+
+ /* Set as archive so offlining commits w/state=STATE_ARCHIVED. Previously we would set old_file->header->state
+ * to STATE_ARCHIVED directly here, but journal_file_set_offline() short-circuits when state != STATE_ONLINE,
+ * which would result in the rotated journal never getting fsync() called before closing. Now we simply queue
+ * the archive state by setting an archive bit, leaving the state as STATE_ONLINE so proper offlining
+ * occurs. */
+ f->archive = true;
+
+ /* Currently, btrfs is not very good with out write patterns and fragments heavily. Let's defrag our journal
+ * files when we archive them */
+ f->defrag_on_close = true;
+
+ return 0;
+}
+
+JournalFile* journal_initiate_close(
+ JournalFile *f,
+ Set *deferred_closes) {
+
+ int r;
+
+ assert(f);
+
+ if (deferred_closes) {
+
+ r = set_put(deferred_closes, f);
+ if (r < 0)
+ log_debug_errno(r, "Failed to add file to deferred close set, closing immediately.");
+ else {
+ (void) journal_file_set_offline(f, false);
+ return NULL;
+ }
+ }
+
+ return journal_file_close(f);
+}
+
+int journal_file_rotate(
+ JournalFile **f,
+ bool compress,
+ uint64_t compress_threshold_bytes,
+ bool seal,
+ Set *deferred_closes) {
+
+ JournalFile *new_file = NULL;
+ int r;
+
+ assert(f);
+ assert(*f);
+
+ r = journal_file_archive(*f);
+ if (r < 0)
+ return r;
+
+ r = journal_file_open(
+ -1,
+ (*f)->path,
+ (*f)->flags,
+ (*f)->mode,
+ compress,
+ compress_threshold_bytes,
+ seal,
+ NULL, /* metrics */
+ (*f)->mmap,
+ deferred_closes,
+ *f, /* template */
+ &new_file);
+
+ journal_initiate_close(*f, deferred_closes);
+ *f = new_file;
+
+ return r;
+}
+
+int journal_file_dispose(int dir_fd, const char *fname) {
+ _cleanup_free_ char *p = NULL;
+ _cleanup_close_ int fd = -1;
+
+ assert(fname);
+
+ /* Renames a journal file to *.journal~, i.e. to mark it as corruped or otherwise uncleanly shutdown. Note that
+ * this is done without looking into the file or changing any of its contents. The idea is that this is called
+ * whenever something is suspicious and we want to move the file away and make clear that it is not accessed
+ * for writing anymore. */
+
+ if (!endswith(fname, ".journal"))
+ return -EINVAL;
+
+ if (asprintf(&p, "%.*s@%016" PRIx64 "-%016" PRIx64 ".journal~",
+ (int) strlen(fname) - 8, fname,
+ now(CLOCK_REALTIME),
+ random_u64()) < 0)
+ return -ENOMEM;
+
+ if (renameat(dir_fd, fname, dir_fd, p) < 0)
+ return -errno;
+
+ /* btrfs doesn't cope well with our write pattern and fragments heavily. Let's defrag all files we rotate */
+ fd = openat(dir_fd, p, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ log_debug_errno(errno, "Failed to open file for defragmentation/FS_NOCOW_FL, ignoring: %m");
+ else {
+ (void) chattr_fd(fd, 0, FS_NOCOW_FL, NULL);
+ (void) btrfs_defrag_fd(fd);
+ }
+
+ return 0;
+}
+
+int journal_file_open_reliably(
+ const char *fname,
+ int flags,
+ mode_t mode,
+ bool compress,
+ uint64_t compress_threshold_bytes,
+ bool seal,
+ JournalMetrics *metrics,
+ MMapCache *mmap_cache,
+ Set *deferred_closes,
+ JournalFile *template,
+ JournalFile **ret) {
+
+ int r;
+
+ r = journal_file_open(-1, fname, flags, mode, compress, compress_threshold_bytes, seal, metrics, mmap_cache,
+ deferred_closes, template, ret);
+ if (!IN_SET(r,
+ -EBADMSG, /* Corrupted */
+ -ENODATA, /* Truncated */
+ -EHOSTDOWN, /* Other machine */
+ -EPROTONOSUPPORT, /* Incompatible feature */
+ -EBUSY, /* Unclean shutdown */
+ -ESHUTDOWN, /* Already archived */
+ -EIO, /* IO error, including SIGBUS on mmap */
+ -EIDRM, /* File has been deleted */
+ -ETXTBSY)) /* File is from the future */
+ return r;
+
+ if ((flags & O_ACCMODE) == O_RDONLY)
+ return r;
+
+ if (!(flags & O_CREAT))
+ return r;
+
+ if (!endswith(fname, ".journal"))
+ return r;
+
+ /* The file is corrupted. Rotate it away and try it again (but only once) */
+ log_warning_errno(r, "File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
+
+ r = journal_file_dispose(AT_FDCWD, fname);
+ if (r < 0)
+ return r;
+
+ return journal_file_open(-1, fname, flags, mode, compress, compress_threshold_bytes, seal, metrics, mmap_cache,
+ deferred_closes, template, ret);
+}
+
+int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p) {
+ uint64_t i, n;
+ uint64_t q, xor_hash = 0;
+ int r;
+ EntryItem *items;
+ dual_timestamp ts;
+ const sd_id128_t *boot_id;
+
+ assert(from);
+ assert(to);
+ assert(o);
+ assert(p);
+
+ if (!to->writable)
+ return -EPERM;
+
+ ts.monotonic = le64toh(o->entry.monotonic);
+ ts.realtime = le64toh(o->entry.realtime);
+ boot_id = &o->entry.boot_id;
+
+ n = journal_file_entry_n_items(o);
+ /* alloca() can't take 0, hence let's allocate at least one */
+ items = newa(EntryItem, MAX(1u, n));
+
+ for (i = 0; i < n; i++) {
+ uint64_t l, h;
+ le64_t le_hash;
+ size_t t;
+ void *data;
+ Object *u;
+
+ q = le64toh(o->entry.items[i].object_offset);
+ le_hash = o->entry.items[i].hash;
+
+ r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
+ if (r < 0)
+ return r;
+
+ if (le_hash != o->data.hash)
+ return -EBADMSG;
+
+ l = le64toh(READ_NOW(o->object.size));
+ if (l < offsetof(Object, data.payload))
+ return -EBADMSG;
+
+ l -= offsetof(Object, data.payload);
+ t = (size_t) l;
+
+ /* We hit the limit on 32bit machines */
+ if ((uint64_t) t != l)
+ return -E2BIG;
+
+ if (o->object.flags & OBJECT_COMPRESSION_MASK) {
+#if HAVE_COMPRESSION
+ size_t rsize = 0;
+
+ r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
+ o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize, 0);
+ if (r < 0)
+ return r;
+
+ data = from->compress_buffer;
+ l = rsize;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+ } else
+ data = o->data.payload;
+
+ r = journal_file_append_data(to, data, l, &u, &h);
+ if (r < 0)
+ return r;
+
+ if (JOURNAL_HEADER_KEYED_HASH(to->header))
+ xor_hash ^= jenkins_hash64(data, l);
+ else
+ xor_hash ^= le64toh(u->data.hash);
+
+ items[i].object_offset = htole64(h);
+ items[i].hash = u->data.hash;
+
+ r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
+ if (r < 0)
+ return r;
+ }
+
+ r = journal_file_append_entry_internal(to, &ts, boot_id, xor_hash, items, n,
+ NULL, NULL, NULL);
+
+ if (mmap_cache_got_sigbus(to->mmap, to->cache_fd))
+ return -EIO;
+
+ return r;
+}
+
+void journal_reset_metrics(JournalMetrics *m) {
+ assert(m);
+
+ /* Set everything to "pick automatic values". */
+
+ *m = (JournalMetrics) {
+ .min_use = (uint64_t) -1,
+ .max_use = (uint64_t) -1,
+ .min_size = (uint64_t) -1,
+ .max_size = (uint64_t) -1,
+ .keep_free = (uint64_t) -1,
+ .n_max_files = (uint64_t) -1,
+ };
+}
+
+void journal_default_metrics(JournalMetrics *m, int fd) {
+ char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX], e[FORMAT_BYTES_MAX];
+ struct statvfs ss;
+ uint64_t fs_size = 0;
+
+ assert(m);
+ assert(fd >= 0);
+
+ if (fstatvfs(fd, &ss) >= 0)
+ fs_size = ss.f_frsize * ss.f_blocks;
+ else
+ log_debug_errno(errno, "Failed to determine disk size: %m");
+
+ if (m->max_use == (uint64_t) -1) {
+
+ if (fs_size > 0)
+ m->max_use = CLAMP(PAGE_ALIGN(fs_size / 10), /* 10% of file system size */
+ MAX_USE_LOWER, MAX_USE_UPPER);
+ else
+ m->max_use = MAX_USE_LOWER;
+ } else {
+ m->max_use = PAGE_ALIGN(m->max_use);
+
+ if (m->max_use != 0 && m->max_use < JOURNAL_FILE_SIZE_MIN*2)
+ m->max_use = JOURNAL_FILE_SIZE_MIN*2;
+ }
+
+ if (m->min_use == (uint64_t) -1) {
+ if (fs_size > 0)
+ m->min_use = CLAMP(PAGE_ALIGN(fs_size / 50), /* 2% of file system size */
+ MIN_USE_LOW, MIN_USE_HIGH);
+ else
+ m->min_use = MIN_USE_LOW;
+ }
+
+ if (m->min_use > m->max_use)
+ m->min_use = m->max_use;
+
+ if (m->max_size == (uint64_t) -1)
+ m->max_size = MIN(PAGE_ALIGN(m->max_use / 8), /* 8 chunks */
+ MAX_SIZE_UPPER);
+ else
+ m->max_size = PAGE_ALIGN(m->max_size);
+
+ if (m->max_size != 0) {
+ if (m->max_size < JOURNAL_FILE_SIZE_MIN)
+ m->max_size = JOURNAL_FILE_SIZE_MIN;
+
+ if (m->max_use != 0 && m->max_size*2 > m->max_use)
+ m->max_use = m->max_size*2;
+ }
+
+ if (m->min_size == (uint64_t) -1)
+ m->min_size = JOURNAL_FILE_SIZE_MIN;
+ else
+ m->min_size = CLAMP(PAGE_ALIGN(m->min_size),
+ JOURNAL_FILE_SIZE_MIN,
+ m->max_size ?: UINT64_MAX);
+
+ if (m->keep_free == (uint64_t) -1) {
+ if (fs_size > 0)
+ m->keep_free = MIN(PAGE_ALIGN(fs_size / 20), /* 5% of file system size */
+ KEEP_FREE_UPPER);
+ else
+ m->keep_free = DEFAULT_KEEP_FREE;
+ }
+
+ if (m->n_max_files == (uint64_t) -1)
+ m->n_max_files = DEFAULT_N_MAX_FILES;
+
+ log_debug("Fixed min_use=%s max_use=%s max_size=%s min_size=%s keep_free=%s n_max_files=%" PRIu64,
+ format_bytes(a, sizeof(a), m->min_use),
+ format_bytes(b, sizeof(b), m->max_use),
+ format_bytes(c, sizeof(c), m->max_size),
+ format_bytes(d, sizeof(d), m->min_size),
+ format_bytes(e, sizeof(e), m->keep_free),
+ m->n_max_files);
+}
+
+int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
+ assert(f);
+ assert(f->header);
+ assert(from || to);
+
+ if (from) {
+ if (f->header->head_entry_realtime == 0)
+ return -ENOENT;
+
+ *from = le64toh(f->header->head_entry_realtime);
+ }
+
+ if (to) {
+ if (f->header->tail_entry_realtime == 0)
+ return -ENOENT;
+
+ *to = le64toh(f->header->tail_entry_realtime);
+ }
+
+ return 1;
+}
+
+int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
+ Object *o;
+ uint64_t p;
+ int r;
+
+ assert(f);
+ assert(from || to);
+
+ r = find_data_object_by_boot_id(f, boot_id, &o, &p);
+ if (r <= 0)
+ return r;
+
+ if (le64toh(o->data.n_entries) <= 0)
+ return 0;
+
+ if (from) {
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
+ if (r < 0)
+ return r;
+
+ *from = le64toh(o->entry.monotonic);
+ }
+
+ if (to) {
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ r = generic_array_get_plus_one(f,
+ le64toh(o->data.entry_offset),
+ le64toh(o->data.entry_array_offset),
+ le64toh(o->data.n_entries)-1,
+ &o, NULL);
+ if (r <= 0)
+ return r;
+
+ *to = le64toh(o->entry.monotonic);
+ }
+
+ return 1;
+}
+
+bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
+ assert(f);
+ assert(f->header);
+
+ /* If we gained new header fields we gained new features,
+ * hence suggest a rotation */
+ if (le64toh(f->header->header_size) < sizeof(Header)) {
+ log_debug("%s uses an outdated header, suggesting rotation.", f->path);
+ return true;
+ }
+
+ /* Let's check if the hash tables grew over a certain fill level (75%, borrowing this value from
+ * Java's hash table implementation), and if so suggest a rotation. To calculate the fill level we
+ * need the n_data field, which only exists in newer versions. */
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
+ if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
+ log_debug("Data hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items, %llu file size, %"PRIu64" bytes per hash table item), suggesting rotation.",
+ f->path,
+ 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
+ le64toh(f->header->n_data),
+ le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
+ (unsigned long long) f->last_stat.st_size,
+ f->last_stat.st_size / le64toh(f->header->n_data));
+ return true;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
+ if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
+ log_debug("Field hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items), suggesting rotation.",
+ f->path,
+ 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
+ le64toh(f->header->n_fields),
+ le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
+ return true;
+ }
+
+ /* If there are too many hash collisions somebody is most likely playing games with us. Hence, if our
+ * longest chain is longer than some threshold, let's suggest rotation. */
+ if (JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth) &&
+ le64toh(f->header->data_hash_chain_depth) > HASH_CHAIN_DEPTH_MAX) {
+ log_debug("Data hash table of %s has deepest hash chain of length %" PRIu64 ", suggesting rotation.",
+ f->path, le64toh(f->header->data_hash_chain_depth));
+ return true;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth) &&
+ le64toh(f->header->field_hash_chain_depth) > HASH_CHAIN_DEPTH_MAX) {
+ log_debug("Field hash table of %s has deepest hash chain of length at %" PRIu64 ", suggesting rotation.",
+ f->path, le64toh(f->header->field_hash_chain_depth));
+ return true;
+ }
+
+ /* Are the data objects properly indexed by field objects? */
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
+ JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
+ le64toh(f->header->n_data) > 0 &&
+ le64toh(f->header->n_fields) == 0)
+ return true;
+
+ if (max_file_usec > 0) {
+ usec_t t, h;
+
+ h = le64toh(f->header->head_entry_realtime);
+ t = now(CLOCK_REALTIME);
+
+ if (h > 0 && t > h + max_file_usec)
+ return true;
+ }
+
+ return false;
+}
diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h
new file mode 100644
index 0000000..c48d95f
--- /dev/null
+++ b/src/journal/journal-file.h
@@ -0,0 +1,276 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <sys/uio.h>
+
+#if HAVE_GCRYPT
+# include <gcrypt.h>
+#endif
+
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "hashmap.h"
+#include "journal-def.h"
+#include "mmap-cache.h"
+#include "sparse-endian.h"
+#include "time-util.h"
+
+typedef struct JournalMetrics {
+ /* For all these: -1 means "pick automatically", and 0 means "no limit enforced" */
+ uint64_t max_size; /* how large journal files grow at max */
+ uint64_t min_size; /* how large journal files grow at least */
+ uint64_t max_use; /* how much disk space to use in total at max, keep_free permitting */
+ uint64_t min_use; /* how much disk space to use in total at least, even if keep_free says not to */
+ uint64_t keep_free; /* how much to keep free on disk */
+ uint64_t n_max_files; /* how many files to keep around at max */
+} JournalMetrics;
+
+typedef enum direction {
+ DIRECTION_UP,
+ DIRECTION_DOWN
+} direction_t;
+
+typedef enum LocationType {
+ /* The first and last entries, resp. */
+ LOCATION_HEAD,
+ LOCATION_TAIL,
+
+ /* We already read the entry we currently point to, and the
+ * next one to read should probably not be this one again. */
+ LOCATION_DISCRETE,
+
+ /* We should seek to the precise location specified, and
+ * return it, as we haven't read it yet. */
+ LOCATION_SEEK
+} LocationType;
+
+typedef enum OfflineState {
+ OFFLINE_JOINED,
+ OFFLINE_SYNCING,
+ OFFLINE_OFFLINING,
+ OFFLINE_CANCEL,
+ OFFLINE_AGAIN_FROM_SYNCING,
+ OFFLINE_AGAIN_FROM_OFFLINING,
+ OFFLINE_DONE
+} OfflineState;
+
+typedef struct JournalFile {
+ int fd;
+ MMapFileDescriptor *cache_fd;
+
+ mode_t mode;
+
+ int flags;
+ int prot;
+ bool writable:1;
+ bool compress_xz:1;
+ bool compress_lz4:1;
+ bool compress_zstd:1;
+ bool seal:1;
+ bool defrag_on_close:1;
+ bool close_fd:1;
+ bool archive:1;
+ bool keyed_hash:1;
+
+ direction_t last_direction;
+ LocationType location_type;
+ uint64_t last_n_entries;
+
+ char *path;
+ struct stat last_stat;
+ usec_t last_stat_usec;
+
+ Header *header;
+ HashItem *data_hash_table;
+ HashItem *field_hash_table;
+
+ uint64_t current_offset;
+ uint64_t current_seqnum;
+ uint64_t current_realtime;
+ uint64_t current_monotonic;
+ sd_id128_t current_boot_id;
+ uint64_t current_xor_hash;
+
+ JournalMetrics metrics;
+ MMapCache *mmap;
+
+ sd_event_source *post_change_timer;
+ usec_t post_change_timer_period;
+
+ OrderedHashmap *chain_cache;
+
+ pthread_t offline_thread;
+ volatile OfflineState offline_state;
+
+ unsigned last_seen_generation;
+
+ uint64_t compress_threshold_bytes;
+#if HAVE_COMPRESSION
+ void *compress_buffer;
+ size_t compress_buffer_size;
+#endif
+
+#if HAVE_GCRYPT
+ gcry_md_hd_t hmac;
+ bool hmac_running;
+
+ FSSHeader *fss_file;
+ size_t fss_file_size;
+
+ uint64_t fss_start_usec;
+ uint64_t fss_interval_usec;
+
+ void *fsprg_state;
+ size_t fsprg_state_size;
+
+ void *fsprg_seed;
+ size_t fsprg_seed_size;
+#endif
+} JournalFile;
+
+int journal_file_open(
+ int fd,
+ const char *fname,
+ int flags,
+ mode_t mode,
+ bool compress,
+ uint64_t compress_threshold_bytes,
+ bool seal,
+ JournalMetrics *metrics,
+ MMapCache *mmap_cache,
+ Set *deferred_closes,
+ JournalFile *template,
+ JournalFile **ret);
+
+int journal_file_set_offline(JournalFile *f, bool wait);
+bool journal_file_is_offlining(JournalFile *f);
+JournalFile* journal_file_close(JournalFile *j);
+int journal_file_fstat(JournalFile *f);
+DEFINE_TRIVIAL_CLEANUP_FUNC(JournalFile*, journal_file_close);
+
+int journal_file_open_reliably(
+ const char *fname,
+ int flags,
+ mode_t mode,
+ bool compress,
+ uint64_t compress_threshold_bytes,
+ bool seal,
+ JournalMetrics *metrics,
+ MMapCache *mmap_cache,
+ Set *deferred_closes,
+ JournalFile *template,
+ JournalFile **ret);
+
+#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
+#define VALID64(x) (((x) & 7ULL) == 0ULL)
+
+/* Use six characters to cover the offsets common in smallish journal
+ * files without adding too many zeros. */
+#define OFSfmt "%06"PRIx64
+
+static inline bool VALID_REALTIME(uint64_t u) {
+ /* This considers timestamps until the year 3112 valid. That should be plenty room... */
+ return u > 0 && u < (1ULL << 55);
+}
+
+static inline bool VALID_MONOTONIC(uint64_t u) {
+ /* This considers timestamps until 1142 years of runtime valid. */
+ return u < (1ULL << 55);
+}
+
+static inline bool VALID_EPOCH(uint64_t u) {
+ /* This allows changing the key for 1142 years, every usec. */
+ return u < (1ULL << 55);
+}
+
+#define JOURNAL_HEADER_CONTAINS(h, field) \
+ (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
+
+#define JOURNAL_HEADER_SEALED(h) \
+ FLAGS_SET(le32toh((h)->compatible_flags), HEADER_COMPATIBLE_SEALED)
+
+#define JOURNAL_HEADER_COMPRESSED_XZ(h) \
+ FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPRESSED_XZ)
+
+#define JOURNAL_HEADER_COMPRESSED_LZ4(h) \
+ FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
+
+#define JOURNAL_HEADER_COMPRESSED_ZSTD(h) \
+ FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPRESSED_ZSTD)
+
+#define JOURNAL_HEADER_KEYED_HASH(h) \
+ FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_KEYED_HASH)
+
+int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset, Object **ret);
+
+uint64_t journal_file_entry_n_items(Object *o) _pure_;
+uint64_t journal_file_entry_array_n_items(Object *o) _pure_;
+uint64_t journal_file_hash_table_n_items(Object *o) _pure_;
+
+int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, Object **ret, uint64_t *offset);
+int journal_file_append_entry(
+ JournalFile *f,
+ const dual_timestamp *ts,
+ const sd_id128_t *boot_id,
+ const struct iovec iovec[], unsigned n_iovec,
+ uint64_t *seqno,
+ Object **ret,
+ uint64_t *offset);
+
+int journal_file_find_data_object(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset);
+int journal_file_find_data_object_with_hash(JournalFile *f, const void *data, uint64_t size, uint64_t hash, Object **ret, uint64_t *offset);
+
+int journal_file_find_field_object(JournalFile *f, const void *field, uint64_t size, Object **ret, uint64_t *offset);
+int journal_file_find_field_object_with_hash(JournalFile *f, const void *field, uint64_t size, uint64_t hash, Object **ret, uint64_t *offset);
+
+void journal_file_reset_location(JournalFile *f);
+void journal_file_save_location(JournalFile *f, Object *o, uint64_t offset);
+int journal_file_compare_locations(JournalFile *af, JournalFile *bf);
+int journal_file_next_entry(JournalFile *f, uint64_t p, direction_t direction, Object **ret, uint64_t *offset);
+
+int journal_file_next_entry_for_data(JournalFile *f, Object *o, uint64_t p, uint64_t data_offset, direction_t direction, Object **ret, uint64_t *offset);
+
+int journal_file_move_to_entry_by_seqnum(JournalFile *f, uint64_t seqnum, direction_t direction, Object **ret, uint64_t *offset);
+int journal_file_move_to_entry_by_realtime(JournalFile *f, uint64_t realtime, direction_t direction, Object **ret, uint64_t *offset);
+int journal_file_move_to_entry_by_monotonic(JournalFile *f, sd_id128_t boot_id, uint64_t monotonic, direction_t direction, Object **ret, uint64_t *offset);
+
+int journal_file_move_to_entry_by_offset_for_data(JournalFile *f, uint64_t data_offset, uint64_t p, direction_t direction, Object **ret, uint64_t *offset);
+int journal_file_move_to_entry_by_seqnum_for_data(JournalFile *f, uint64_t data_offset, uint64_t seqnum, direction_t direction, Object **ret, uint64_t *offset);
+int journal_file_move_to_entry_by_realtime_for_data(JournalFile *f, uint64_t data_offset, uint64_t realtime, direction_t direction, Object **ret, uint64_t *offset);
+int journal_file_move_to_entry_by_monotonic_for_data(JournalFile *f, uint64_t data_offset, sd_id128_t boot_id, uint64_t monotonic, direction_t direction, Object **ret, uint64_t *offset);
+
+int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p);
+
+void journal_file_dump(JournalFile *f);
+void journal_file_print_header(JournalFile *f);
+
+int journal_file_archive(JournalFile *f);
+JournalFile* journal_initiate_close(JournalFile *f, Set *deferred_closes);
+int journal_file_rotate(JournalFile **f, bool compress, uint64_t compress_threshold_bytes, bool seal, Set *deferred_closes);
+
+int journal_file_dispose(int dir_fd, const char *fname);
+
+void journal_file_post_change(JournalFile *f);
+int journal_file_enable_post_change_timer(JournalFile *f, sd_event *e, usec_t t);
+
+void journal_reset_metrics(JournalMetrics *m);
+void journal_default_metrics(JournalMetrics *m, int fd);
+
+int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to);
+int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot, usec_t *from, usec_t *to);
+
+bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec);
+
+int journal_file_map_data_hash_table(JournalFile *f);
+int journal_file_map_field_hash_table(JournalFile *f);
+
+static inline bool JOURNAL_FILE_COMPRESS(JournalFile *f) {
+ assert(f);
+ return f->compress_xz || f->compress_lz4 || f->compress_zstd;
+}
+
+uint64_t journal_file_hash_data(JournalFile *f, const void *data, size_t sz);
+
+bool journal_field_valid(const char *p, size_t l, bool allow_protected);
diff --git a/src/journal/journal-internal.h b/src/journal/journal-internal.h
new file mode 100644
index 0000000..c2d29aa
--- /dev/null
+++ b/src/journal/journal-internal.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-id128.h"
+#include "sd-journal.h"
+
+#include "hashmap.h"
+#include "journal-def.h"
+#include "journal-file.h"
+#include "list.h"
+#include "set.h"
+
+typedef struct Match Match;
+typedef struct Location Location;
+typedef struct Directory Directory;
+
+typedef enum MatchType {
+ MATCH_DISCRETE,
+ MATCH_OR_TERM,
+ MATCH_AND_TERM
+} MatchType;
+
+struct Match {
+ MatchType type;
+ Match *parent;
+ LIST_FIELDS(Match, matches);
+
+ /* For concrete matches */
+ char *data;
+ size_t size;
+ uint64_t hash; /* old-style jenkins hash. New-style siphash is different per file, hence won't be cached here */
+
+ /* For terms */
+ LIST_HEAD(Match, matches);
+};
+
+struct Location {
+ LocationType type;
+
+ bool seqnum_set:1;
+ bool realtime_set:1;
+ bool monotonic_set:1;
+ bool xor_hash_set:1;
+
+ uint64_t seqnum;
+ sd_id128_t seqnum_id;
+
+ uint64_t realtime;
+
+ uint64_t monotonic;
+ sd_id128_t boot_id;
+
+ uint64_t xor_hash;
+};
+
+struct Directory {
+ char *path;
+ int wd;
+ bool is_root;
+ unsigned last_seen_generation;
+};
+
+struct sd_journal {
+ int toplevel_fd;
+
+ char *path;
+ char *prefix;
+ char *namespace;
+
+ OrderedHashmap *files;
+ IteratedCache *files_cache;
+ MMapCache *mmap;
+
+ Location current_location;
+
+ JournalFile *current_file;
+ uint64_t current_field;
+
+ Match *level0, *level1, *level2;
+
+ pid_t original_pid;
+
+ int inotify_fd;
+ unsigned current_invalidate_counter, last_invalidate_counter;
+ usec_t last_process_usec;
+ unsigned generation;
+
+ /* Iterating through unique fields and their data values */
+ char *unique_field;
+ JournalFile *unique_file;
+ uint64_t unique_offset;
+
+ /* Iterating through known fields */
+ JournalFile *fields_file;
+ uint64_t fields_offset;
+ uint64_t fields_hash_table_index;
+ char *fields_buffer;
+ size_t fields_buffer_allocated;
+
+ int flags;
+
+ bool on_network:1;
+ bool no_new_files:1;
+ bool no_inotify:1;
+ bool unique_file_lost:1; /* File we were iterating over got
+ removed, and there were no more
+ files, so sd_j_enumerate_unique
+ will return a value equal to 0. */
+ bool fields_file_lost:1;
+ bool has_runtime_files:1;
+ bool has_persistent_files:1;
+
+ size_t data_threshold;
+
+ Hashmap *directories_by_path;
+ Hashmap *directories_by_wd;
+
+ Hashmap *errors;
+};
+
+char *journal_make_match_string(sd_journal *j);
+void journal_print_header(sd_journal *j);
+
+#define JOURNAL_FOREACH_DATA_RETVAL(j, data, l, retval) \
+ for (sd_journal_restart_data(j); ((retval) = sd_journal_enumerate_data((j), &(data), &(l))) > 0; )
+
+/* All errors that we might encounter while extracting a field that are not real errors,
+ * but only mean that the field is too large or we don't support the compression. */
+static inline bool JOURNAL_ERRNO_IS_UNAVAILABLE_FIELD(int r) {
+ return IN_SET(abs(r),
+ ENOBUFS, /* Field or decompressed field too large */
+ E2BIG, /* Field too large for pointer width */
+ EPROTONOSUPPORT); /* Unsupported compression */
+}
diff --git a/src/journal/journal-send.c b/src/journal/journal-send.c
new file mode 100644
index 0000000..fd3fd7e
--- /dev/null
+++ b/src/journal/journal-send.c
@@ -0,0 +1,569 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <printf.h>
+#include <stddef.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#define SD_JOURNAL_SUPPRESS_LOCATION
+
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "fileio.h"
+#include "memfd-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+
+#define SNDBUF_SIZE (8*1024*1024)
+
+#define ALLOCA_CODE_FUNC(f, func) \
+ do { \
+ size_t _fl; \
+ const char *_func = (func); \
+ char **_f = &(f); \
+ _fl = strlen(_func) + 1; \
+ *_f = newa(char, _fl + 10); \
+ memcpy(*_f, "CODE_FUNC=", 10); \
+ memcpy(*_f + 10, _func, _fl); \
+ } while (false)
+
+/* We open a single fd, and we'll share it with the current process,
+ * all its threads, and all its subprocesses. This means we need to
+ * initialize it atomically, and need to operate on it atomically
+ * never assuming we are the only user */
+
+static int journal_fd(void) {
+ int fd;
+ static int fd_plus_one = 0;
+
+retry:
+ if (fd_plus_one > 0)
+ return fd_plus_one - 1;
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ return -errno;
+
+ fd_inc_sndbuf(fd, SNDBUF_SIZE);
+
+ if (!__sync_bool_compare_and_swap(&fd_plus_one, 0, fd+1)) {
+ safe_close(fd);
+ goto retry;
+ }
+
+ return fd;
+}
+
+_public_ int sd_journal_print(int priority, const char *format, ...) {
+ int r;
+ va_list ap;
+
+ va_start(ap, format);
+ r = sd_journal_printv(priority, format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_journal_printv(int priority, const char *format, va_list ap) {
+ char p[STRLEN("PRIORITY=") + DECIMAL_STR_MAX(int) + 1];
+ char sbuf[LINE_MAX + 8] = "MESSAGE=";
+ struct iovec iov[2];
+ int len;
+ va_list aq;
+ char *buffer = sbuf;
+
+ assert_return(priority >= 0, -EINVAL);
+ assert_return(priority <= 7, -EINVAL);
+ assert_return(format, -EINVAL);
+
+ xsprintf(p, "PRIORITY=%i", priority & LOG_PRIMASK);
+
+ va_copy(aq, ap);
+ len = vsnprintf(buffer + 8, LINE_MAX, format, aq);
+ va_end(aq);
+
+ if (len >= (int)LONG_LINE_MAX - 8)
+ return -ENOBUFS;
+
+ /* Allocate large buffer to accommodate big message */
+ if (len >= LINE_MAX) {
+ buffer = alloca(len + 9);
+ memcpy(buffer, "MESSAGE=", 8);
+ assert_se(vsnprintf(buffer + 8, len + 1, format, ap) == len);
+ }
+
+ /* Strip trailing whitespace, keep prefix whitespace. */
+ (void) strstrip(buffer);
+
+ /* Suppress empty lines */
+ if (isempty(buffer + 8))
+ return 0;
+
+ iov[0] = IOVEC_MAKE_STRING(buffer);
+ iov[1] = IOVEC_MAKE_STRING(p);
+
+ return sd_journal_sendv(iov, 2);
+}
+
+_printf_(1, 0) static int fill_iovec_sprintf(const char *format, va_list ap, int extra, struct iovec **_iov) {
+ PROTECT_ERRNO;
+ int r, n = 0, i = 0, j;
+ struct iovec *iov = NULL;
+
+ assert(_iov);
+
+ if (extra > 0) {
+ n = MAX(extra * 2, extra + 4);
+ iov = malloc0(n * sizeof(struct iovec));
+ if (!iov) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ i = extra;
+ }
+
+ while (format) {
+ struct iovec *c;
+ char *buffer;
+ va_list aq;
+
+ if (i >= n) {
+ n = MAX(i*2, 4);
+ c = reallocarray(iov, n, sizeof(struct iovec));
+ if (!c) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ iov = c;
+ }
+
+ va_copy(aq, ap);
+ if (vasprintf(&buffer, format, aq) < 0) {
+ va_end(aq);
+ r = -ENOMEM;
+ goto fail;
+ }
+ va_end(aq);
+
+ VA_FORMAT_ADVANCE(format, ap);
+
+ (void) strstrip(buffer); /* strip trailing whitespace, keep prefixing whitespace */
+
+ iov[i++] = IOVEC_MAKE_STRING(buffer);
+
+ format = va_arg(ap, char *);
+ }
+
+ *_iov = iov;
+
+ return i;
+
+fail:
+ for (j = 0; j < i; j++)
+ free(iov[j].iov_base);
+
+ free(iov);
+
+ return r;
+}
+
+_public_ int sd_journal_send(const char *format, ...) {
+ int r, i, j;
+ va_list ap;
+ struct iovec *iov = NULL;
+
+ va_start(ap, format);
+ i = fill_iovec_sprintf(format, ap, 0, &iov);
+ va_end(ap);
+
+ if (_unlikely_(i < 0)) {
+ r = i;
+ goto finish;
+ }
+
+ r = sd_journal_sendv(iov, i);
+
+finish:
+ for (j = 0; j < i; j++)
+ free(iov[j].iov_base);
+
+ free(iov);
+
+ return r;
+}
+
+_public_ int sd_journal_sendv(const struct iovec *iov, int n) {
+ PROTECT_ERRNO;
+ int fd, r;
+ _cleanup_close_ int buffer_fd = -1;
+ struct iovec *w;
+ uint64_t *l;
+ int i, j = 0;
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/journal/socket",
+ };
+ struct msghdr mh = {
+ .msg_name = (struct sockaddr*) &sa.sa,
+ .msg_namelen = SOCKADDR_UN_LEN(sa.un),
+ };
+ ssize_t k;
+ bool have_syslog_identifier = false;
+ bool seal = true;
+
+ assert_return(iov, -EINVAL);
+ assert_return(n > 0, -EINVAL);
+
+ w = newa(struct iovec, n * 5 + 3);
+ l = newa(uint64_t, n);
+
+ for (i = 0; i < n; i++) {
+ char *c, *nl;
+
+ if (_unlikely_(!iov[i].iov_base || iov[i].iov_len <= 1))
+ return -EINVAL;
+
+ c = memchr(iov[i].iov_base, '=', iov[i].iov_len);
+ if (_unlikely_(!c || c == iov[i].iov_base))
+ return -EINVAL;
+
+ have_syslog_identifier = have_syslog_identifier ||
+ (c == (char *) iov[i].iov_base + 17 &&
+ startswith(iov[i].iov_base, "SYSLOG_IDENTIFIER"));
+
+ nl = memchr(iov[i].iov_base, '\n', iov[i].iov_len);
+ if (nl) {
+ if (_unlikely_(nl < c))
+ return -EINVAL;
+
+ /* Already includes a newline? Bummer, then
+ * let's write the variable name, then a
+ * newline, then the size (64bit LE), followed
+ * by the data and a final newline */
+
+ w[j++] = IOVEC_MAKE(iov[i].iov_base, c - (char*) iov[i].iov_base);
+ w[j++] = IOVEC_MAKE_STRING("\n");
+
+ l[i] = htole64(iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1);
+ w[j++] = IOVEC_MAKE(&l[i], sizeof(uint64_t));
+
+ w[j++] = IOVEC_MAKE(c + 1, iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1);
+ } else
+ /* Nothing special? Then just add the line and
+ * append a newline */
+ w[j++] = iov[i];
+
+ w[j++] = IOVEC_MAKE_STRING("\n");
+ }
+
+ if (!have_syslog_identifier &&
+ string_is_safe(program_invocation_short_name)) {
+
+ /* Implicitly add program_invocation_short_name, if it
+ * is not set explicitly. We only do this for
+ * program_invocation_short_name, and nothing else
+ * since everything else is much nicer to retrieve
+ * from the outside. */
+
+ w[j++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=");
+ w[j++] = IOVEC_MAKE_STRING(program_invocation_short_name);
+ w[j++] = IOVEC_MAKE_STRING("\n");
+ }
+
+ fd = journal_fd();
+ if (_unlikely_(fd < 0))
+ return fd;
+
+ mh.msg_iov = w;
+ mh.msg_iovlen = j;
+
+ k = sendmsg(fd, &mh, MSG_NOSIGNAL);
+ if (k >= 0)
+ return 0;
+
+ /* Fail silently if the journal is not available */
+ if (errno == ENOENT)
+ return 0;
+
+ if (!IN_SET(errno, EMSGSIZE, ENOBUFS))
+ return -errno;
+
+ /* Message doesn't fit... Let's dump the data in a memfd or
+ * temporary file and just pass a file descriptor of it to the
+ * other side.
+ *
+ * For the temporary files we use /dev/shm instead of /tmp
+ * here, since we want this to be a tmpfs, and one that is
+ * available from early boot on and where unprivileged users
+ * can create files. */
+ buffer_fd = memfd_new(NULL);
+ if (buffer_fd < 0) {
+ if (buffer_fd == -ENOSYS) {
+ buffer_fd = open_tmpfile_unlinkable("/dev/shm", O_RDWR | O_CLOEXEC);
+ if (buffer_fd < 0)
+ return buffer_fd;
+
+ seal = false;
+ } else
+ return buffer_fd;
+ }
+
+ n = writev(buffer_fd, w, j);
+ if (n < 0)
+ return -errno;
+
+ if (seal) {
+ r = memfd_set_sealed(buffer_fd);
+ if (r < 0)
+ return r;
+ }
+
+ r = send_one_fd_sa(fd, buffer_fd, mh.msg_name, mh.msg_namelen, 0);
+ if (r == -ENOENT)
+ /* Fail silently if the journal is not available */
+ return 0;
+ return r;
+}
+
+static int fill_iovec_perror_and_send(const char *message, int skip, struct iovec iov[]) {
+ PROTECT_ERRNO;
+ size_t n, k;
+
+ k = isempty(message) ? 0 : strlen(message) + 2;
+ n = 8 + k + 256 + 1;
+
+ for (;;) {
+ char buffer[n];
+ char* j;
+
+ errno = 0;
+ j = strerror_r(_saved_errno_, buffer + 8 + k, n - 8 - k);
+ if (errno == 0) {
+ char error[STRLEN("ERRNO=") + DECIMAL_STR_MAX(int) + 1];
+
+ if (j != buffer + 8 + k)
+ memmove(buffer + 8 + k, j, strlen(j)+1);
+
+ memcpy(buffer, "MESSAGE=", 8);
+
+ if (k > 0) {
+ memcpy(buffer + 8, message, k - 2);
+ memcpy(buffer + 8 + k - 2, ": ", 2);
+ }
+
+ xsprintf(error, "ERRNO=%i", _saved_errno_);
+
+ assert_cc(3 == LOG_ERR);
+ iov[skip+0] = IOVEC_MAKE_STRING("PRIORITY=3");
+ iov[skip+1] = IOVEC_MAKE_STRING(buffer);
+ iov[skip+2] = IOVEC_MAKE_STRING(error);
+
+ return sd_journal_sendv(iov, skip + 3);
+ }
+
+ if (errno != ERANGE)
+ return -errno;
+
+ n *= 2;
+ }
+}
+
+_public_ int sd_journal_perror(const char *message) {
+ struct iovec iovec[3];
+
+ return fill_iovec_perror_and_send(message, 0, iovec);
+}
+
+_public_ int sd_journal_stream_fd(const char *identifier, int priority, int level_prefix) {
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/journal/stdout",
+ };
+ _cleanup_close_ int fd = -1;
+ char *header;
+ size_t l;
+ int r;
+
+ assert_return(priority >= 0, -EINVAL);
+ assert_return(priority <= 7, -EINVAL);
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ return -errno;
+
+ r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ if (r < 0)
+ return -errno;
+
+ if (shutdown(fd, SHUT_RD) < 0)
+ return -errno;
+
+ (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
+
+ identifier = strempty(identifier);
+
+ l = strlen(identifier);
+ header = newa(char, l + 1 + 1 + 2 + 2 + 2 + 2 + 2);
+
+ memcpy(header, identifier, l);
+ header[l++] = '\n';
+ header[l++] = '\n'; /* unit id */
+ header[l++] = '0' + priority;
+ header[l++] = '\n';
+ header[l++] = '0' + !!level_prefix;
+ header[l++] = '\n';
+ header[l++] = '0';
+ header[l++] = '\n';
+ header[l++] = '0';
+ header[l++] = '\n';
+ header[l++] = '0';
+ header[l++] = '\n';
+
+ r = loop_write(fd, header, l, false);
+ if (r < 0)
+ return r;
+
+ return TAKE_FD(fd);
+}
+
+_public_ int sd_journal_print_with_location(int priority, const char *file, const char *line, const char *func, const char *format, ...) {
+ int r;
+ va_list ap;
+
+ va_start(ap, format);
+ r = sd_journal_printv_with_location(priority, file, line, func, format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_journal_printv_with_location(int priority, const char *file, const char *line, const char *func, const char *format, va_list ap) {
+ char p[STRLEN("PRIORITY=") + DECIMAL_STR_MAX(int) + 1];
+ char sbuf[LINE_MAX + 8] = "MESSAGE=";
+ struct iovec iov[5];
+ char *f;
+ int len;
+ char *buffer = sbuf;
+ va_list aq;
+
+ assert_return(priority >= 0, -EINVAL);
+ assert_return(priority <= 7, -EINVAL);
+ assert_return(format, -EINVAL);
+
+ xsprintf(p, "PRIORITY=%i", priority & LOG_PRIMASK);
+
+ va_copy(aq, ap);
+ len = vsnprintf(buffer + 8, LINE_MAX, format, aq);
+ va_end(aq);
+
+ if (len >= (int)LONG_LINE_MAX - 8)
+ return -ENOBUFS;
+
+ /* Allocate large buffer to accommodate big message */
+ if (len >= LINE_MAX) {
+ buffer = alloca(len + 9);
+ memcpy(buffer, "MESSAGE=", 8);
+ assert_se(vsnprintf(buffer + 8, len + 1, format, ap) == len);
+ }
+
+ /* Strip trailing whitespace, keep prefixing whitespace */
+ (void) strstrip(buffer);
+
+ /* Suppress empty lines */
+ if (isempty(buffer + 8))
+ return 0;
+
+ /* func is initialized from __func__ which is not a macro, but
+ * a static const char[], hence cannot easily be prefixed with
+ * CODE_FUNC=, hence let's do it manually here. */
+ ALLOCA_CODE_FUNC(f, func);
+
+ iov[0] = IOVEC_MAKE_STRING(buffer);
+ iov[1] = IOVEC_MAKE_STRING(p);
+ iov[2] = IOVEC_MAKE_STRING(file);
+ iov[3] = IOVEC_MAKE_STRING(line);
+ iov[4] = IOVEC_MAKE_STRING(f);
+
+ return sd_journal_sendv(iov, ELEMENTSOF(iov));
+}
+
+_public_ int sd_journal_send_with_location(const char *file, const char *line, const char *func, const char *format, ...) {
+ _cleanup_free_ struct iovec *iov = NULL;
+ int r, i, j;
+ va_list ap;
+ char *f;
+
+ va_start(ap, format);
+ i = fill_iovec_sprintf(format, ap, 3, &iov);
+ va_end(ap);
+
+ if (_unlikely_(i < 0)) {
+ r = i;
+ goto finish;
+ }
+
+ ALLOCA_CODE_FUNC(f, func);
+
+ iov[0] = IOVEC_MAKE_STRING(file);
+ iov[1] = IOVEC_MAKE_STRING(line);
+ iov[2] = IOVEC_MAKE_STRING(f);
+
+ r = sd_journal_sendv(iov, i);
+
+finish:
+ for (j = 3; j < i; j++)
+ free(iov[j].iov_base);
+
+ return r;
+}
+
+_public_ int sd_journal_sendv_with_location(
+ const char *file, const char *line,
+ const char *func,
+ const struct iovec *iov, int n) {
+
+ struct iovec *niov;
+ char *f;
+
+ assert_return(iov, -EINVAL);
+ assert_return(n > 0, -EINVAL);
+
+ niov = newa(struct iovec, n + 3);
+ memcpy(niov, iov, sizeof(struct iovec) * n);
+
+ ALLOCA_CODE_FUNC(f, func);
+
+ niov[n++] = IOVEC_MAKE_STRING(file);
+ niov[n++] = IOVEC_MAKE_STRING(line);
+ niov[n++] = IOVEC_MAKE_STRING(f);
+
+ return sd_journal_sendv(niov, n);
+}
+
+_public_ int sd_journal_perror_with_location(
+ const char *file, const char *line,
+ const char *func,
+ const char *message) {
+
+ struct iovec iov[6];
+ char *f;
+
+ ALLOCA_CODE_FUNC(f, func);
+
+ iov[0] = IOVEC_MAKE_STRING(file);
+ iov[1] = IOVEC_MAKE_STRING(line);
+ iov[2] = IOVEC_MAKE_STRING(f);
+
+ return fill_iovec_perror_and_send(message, 3, iov);
+}
diff --git a/src/journal/journal-vacuum.c b/src/journal/journal-vacuum.c
new file mode 100644
index 0000000..c173664
--- /dev/null
+++ b/src/journal/journal-vacuum.c
@@ -0,0 +1,321 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "journal-def.h"
+#include "journal-file.h"
+#include "journal-vacuum.h"
+#include "sort-util.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "xattr-util.h"
+
+struct vacuum_info {
+ uint64_t usage;
+ char *filename;
+
+ uint64_t realtime;
+
+ sd_id128_t seqnum_id;
+ uint64_t seqnum;
+ bool have_seqnum;
+};
+
+static int vacuum_compare(const struct vacuum_info *a, const struct vacuum_info *b) {
+ int r;
+
+ if (a->have_seqnum && b->have_seqnum &&
+ sd_id128_equal(a->seqnum_id, b->seqnum_id))
+ return CMP(a->seqnum, b->seqnum);
+
+ r = CMP(a->realtime, b->realtime);
+ if (r != 0)
+ return r;
+
+ if (a->have_seqnum && b->have_seqnum)
+ return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
+
+ return strcmp(a->filename, b->filename);
+}
+
+static void patch_realtime(
+ int fd,
+ const char *fn,
+ const struct stat *st,
+ unsigned long long *realtime) {
+
+ usec_t x, crtime = 0;
+
+ /* The timestamp was determined by the file name, but let's
+ * see if the file might actually be older than the file name
+ * suggested... */
+
+ assert(fd >= 0);
+ assert(fn);
+ assert(st);
+ assert(realtime);
+
+ x = timespec_load(&st->st_ctim);
+ if (x > 0 && x != USEC_INFINITY && x < *realtime)
+ *realtime = x;
+
+ x = timespec_load(&st->st_atim);
+ if (x > 0 && x != USEC_INFINITY && x < *realtime)
+ *realtime = x;
+
+ x = timespec_load(&st->st_mtim);
+ if (x > 0 && x != USEC_INFINITY && x < *realtime)
+ *realtime = x;
+
+ /* Let's read the original creation time, if possible. Ideally
+ * we'd just query the creation time the FS might provide, but
+ * unfortunately there's currently no sane API to query
+ * it. Hence let's implement this manually... */
+
+ if (fd_getcrtime_at(fd, fn, &crtime, 0) >= 0) {
+ if (crtime < *realtime)
+ *realtime = crtime;
+ }
+}
+
+static int journal_file_empty(int dir_fd, const char *name) {
+ _cleanup_close_ int fd;
+ struct stat st;
+ le64_t n_entries;
+ ssize_t n;
+
+ fd = openat(dir_fd, name, O_RDONLY|O_CLOEXEC|O_NOFOLLOW|O_NONBLOCK|O_NOATIME);
+ if (fd < 0) {
+ /* Maybe failed due to O_NOATIME and lack of privileges? */
+ fd = openat(dir_fd, name, O_RDONLY|O_CLOEXEC|O_NOFOLLOW|O_NONBLOCK);
+ if (fd < 0)
+ return -errno;
+ }
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /* If an offline file doesn't even have a header we consider it empty */
+ if (st.st_size < (off_t) sizeof(Header))
+ return 1;
+
+ /* If the number of entries is empty, we consider it empty, too */
+ n = pread(fd, &n_entries, sizeof(n_entries), offsetof(Header, n_entries));
+ if (n < 0)
+ return -errno;
+ if (n != sizeof(n_entries))
+ return -EIO;
+
+ return le64toh(n_entries) <= 0;
+}
+
+int journal_directory_vacuum(
+ const char *directory,
+ uint64_t max_use,
+ uint64_t n_max_files,
+ usec_t max_retention_usec,
+ usec_t *oldest_usec,
+ bool verbose) {
+
+ uint64_t sum = 0, freed = 0, n_active_files = 0;
+ size_t n_list = 0, n_allocated = 0, i;
+ _cleanup_closedir_ DIR *d = NULL;
+ struct vacuum_info *list = NULL;
+ usec_t retention_limit = 0;
+ char sbytes[FORMAT_BYTES_MAX];
+ struct dirent *de;
+ int r;
+
+ assert(directory);
+
+ if (max_use <= 0 && max_retention_usec <= 0 && n_max_files <= 0)
+ return 0;
+
+ if (max_retention_usec > 0)
+ retention_limit = usec_sub_unsigned(now(CLOCK_REALTIME), max_retention_usec);
+
+ d = opendir(directory);
+ if (!d)
+ return -errno;
+
+ FOREACH_DIRENT_ALL(de, d, r = -errno; goto finish) {
+
+ unsigned long long seqnum = 0, realtime;
+ _cleanup_free_ char *p = NULL;
+ sd_id128_t seqnum_id;
+ bool have_seqnum;
+ uint64_t size;
+ struct stat st;
+ size_t q;
+
+ if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+ log_debug_errno(errno, "Failed to stat file %s while vacuuming, ignoring: %m", de->d_name);
+ continue;
+ }
+
+ if (!S_ISREG(st.st_mode))
+ continue;
+
+ q = strlen(de->d_name);
+
+ if (endswith(de->d_name, ".journal")) {
+
+ /* Vacuum archived files. Active files are
+ * left around */
+
+ if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8) {
+ n_active_files++;
+ continue;
+ }
+
+ if (de->d_name[q-8-16-1] != '-' ||
+ de->d_name[q-8-16-1-16-1] != '-' ||
+ de->d_name[q-8-16-1-16-1-32-1] != '@') {
+ n_active_files++;
+ continue;
+ }
+
+ p = strdup(de->d_name);
+ if (!p) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ de->d_name[q-8-16-1-16-1] = 0;
+ if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
+ n_active_files++;
+ continue;
+ }
+
+ if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
+ n_active_files++;
+ continue;
+ }
+
+ have_seqnum = true;
+
+ } else if (endswith(de->d_name, ".journal~")) {
+ unsigned long long tmp;
+
+ /* Vacuum corrupted files */
+
+ if (q < 1 + 16 + 1 + 16 + 8 + 1) {
+ n_active_files++;
+ continue;
+ }
+
+ if (de->d_name[q-1-8-16-1] != '-' ||
+ de->d_name[q-1-8-16-1-16-1] != '@') {
+ n_active_files++;
+ continue;
+ }
+
+ p = strdup(de->d_name);
+ if (!p) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
+ n_active_files++;
+ continue;
+ }
+
+ have_seqnum = false;
+ } else {
+ /* We do not vacuum unknown files! */
+ log_debug("Not vacuuming unknown file %s.", de->d_name);
+ continue;
+ }
+
+ size = 512UL * (uint64_t) st.st_blocks;
+
+ r = journal_file_empty(dirfd(d), p);
+ if (r < 0) {
+ log_debug_errno(r, "Failed check if %s is empty, ignoring: %m", p);
+ continue;
+ }
+ if (r > 0) {
+ /* Always vacuum empty non-online files. */
+
+ r = unlinkat_deallocate(dirfd(d), p, 0);
+ if (r >= 0) {
+
+ log_full(verbose ? LOG_INFO : LOG_DEBUG,
+ "Deleted empty archived journal %s/%s (%s).", directory, p, format_bytes(sbytes, sizeof(sbytes), size));
+
+ freed += size;
+ } else if (r != -ENOENT)
+ log_warning_errno(r, "Failed to delete empty archived journal %s/%s: %m", directory, p);
+
+ continue;
+ }
+
+ patch_realtime(dirfd(d), p, &st, &realtime);
+
+ if (!GREEDY_REALLOC(list, n_allocated, n_list + 1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ list[n_list++] = (struct vacuum_info) {
+ .filename = TAKE_PTR(p),
+ .usage = size,
+ .seqnum = seqnum,
+ .realtime = realtime,
+ .seqnum_id = seqnum_id,
+ .have_seqnum = have_seqnum,
+ };
+
+ sum += size;
+ }
+
+ typesafe_qsort(list, n_list, vacuum_compare);
+
+ for (i = 0; i < n_list; i++) {
+ uint64_t left;
+
+ left = n_active_files + n_list - i;
+
+ if ((max_retention_usec <= 0 || list[i].realtime >= retention_limit) &&
+ (max_use <= 0 || sum <= max_use) &&
+ (n_max_files <= 0 || left <= n_max_files))
+ break;
+
+ r = unlinkat_deallocate(dirfd(d), list[i].filename, 0);
+ if (r >= 0) {
+ log_full(verbose ? LOG_INFO : LOG_DEBUG, "Deleted archived journal %s/%s (%s).", directory, list[i].filename, format_bytes(sbytes, sizeof(sbytes), list[i].usage));
+ freed += list[i].usage;
+
+ if (list[i].usage < sum)
+ sum -= list[i].usage;
+ else
+ sum = 0;
+
+ } else if (r != -ENOENT)
+ log_warning_errno(r, "Failed to delete archived journal %s/%s: %m", directory, list[i].filename);
+ }
+
+ if (oldest_usec && i < n_list && (*oldest_usec == 0 || list[i].realtime < *oldest_usec))
+ *oldest_usec = list[i].realtime;
+
+ r = 0;
+
+finish:
+ for (i = 0; i < n_list; i++)
+ free(list[i].filename);
+ free(list);
+
+ log_full(verbose ? LOG_INFO : LOG_DEBUG, "Vacuuming done, freed %s of archived journals from %s.", format_bytes(sbytes, sizeof(sbytes), freed), directory);
+
+ return r;
+}
diff --git a/src/journal/journal-vacuum.h b/src/journal/journal-vacuum.h
new file mode 100644
index 0000000..d87c847
--- /dev/null
+++ b/src/journal/journal-vacuum.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+
+#include "time-util.h"
+
+int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t n_max_files, usec_t max_retention_usec, usec_t *oldest_usec, bool verbose);
diff --git a/src/journal/journal-verify.c b/src/journal/journal-verify.c
new file mode 100644
index 0000000..6ea2f4c
--- /dev/null
+++ b/src/journal/journal-verify.c
@@ -0,0 +1,1327 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "compress.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "journal-authenticate.h"
+#include "journal-def.h"
+#include "journal-file.h"
+#include "journal-verify.h"
+#include "lookup3.h"
+#include "macro.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+static void draw_progress(uint64_t p, usec_t *last_usec) {
+ unsigned n, i, j, k;
+ usec_t z, x;
+
+ if (!on_tty())
+ return;
+
+ z = now(CLOCK_MONOTONIC);
+ x = *last_usec;
+
+ if (x != 0 && x + 40 * USEC_PER_MSEC > z)
+ return;
+
+ *last_usec = z;
+
+ n = (3 * columns()) / 4;
+ j = (n * (unsigned) p) / 65535ULL;
+ k = n - j;
+
+ fputs("\r", stdout);
+ if (colors_enabled())
+ fputs("\x1B[?25l" ANSI_HIGHLIGHT_GREEN, stdout);
+
+ for (i = 0; i < j; i++)
+ fputs("\xe2\x96\x88", stdout);
+
+ fputs(ansi_normal(), stdout);
+
+ for (i = 0; i < k; i++)
+ fputs("\xe2\x96\x91", stdout);
+
+ printf(" %3"PRIu64"%%", 100U * p / 65535U);
+
+ fputs("\r", stdout);
+ if (colors_enabled())
+ fputs("\x1B[?25h", stdout);
+
+ fflush(stdout);
+}
+
+static uint64_t scale_progress(uint64_t scale, uint64_t p, uint64_t m) {
+ /* Calculates scale * p / m, but handles m == 0 safely, and saturates.
+ * Currently all callers use m >= 1, but we keep the check to be defensive.
+ */
+
+ if (p >= m || m == 0) // lgtm[cpp/constant-comparison]
+ return scale;
+
+ return scale * p / m;
+}
+
+static void flush_progress(void) {
+ unsigned n, i;
+
+ if (!on_tty())
+ return;
+
+ n = (3 * columns()) / 4;
+
+ putchar('\r');
+
+ for (i = 0; i < n + 5; i++)
+ putchar(' ');
+
+ putchar('\r');
+ fflush(stdout);
+}
+
+#define debug(_offset, _fmt, ...) do { \
+ flush_progress(); \
+ log_debug(OFSfmt": " _fmt, _offset, ##__VA_ARGS__); \
+ } while (0)
+
+#define warning(_offset, _fmt, ...) do { \
+ flush_progress(); \
+ log_warning(OFSfmt": " _fmt, _offset, ##__VA_ARGS__); \
+ } while (0)
+
+#define error(_offset, _fmt, ...) do { \
+ flush_progress(); \
+ log_error(OFSfmt": " _fmt, (uint64_t)_offset, ##__VA_ARGS__); \
+ } while (0)
+
+#define error_errno(_offset, error, _fmt, ...) do { \
+ flush_progress(); \
+ log_error_errno(error, OFSfmt": " _fmt, (uint64_t)_offset, ##__VA_ARGS__); \
+ } while (0)
+
+static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o) {
+ uint64_t i;
+
+ assert(f);
+ assert(offset);
+ assert(o);
+
+ /* This does various superficial tests about the length an
+ * possible field values. It does not follow any references to
+ * other objects. */
+
+ if ((o->object.flags & OBJECT_COMPRESSED_XZ) &&
+ o->object.type != OBJECT_DATA) {
+ error(offset, "Found compressed object that isn't of type DATA, which is not allowed.");
+ return -EBADMSG;
+ }
+
+ switch (o->object.type) {
+
+ case OBJECT_DATA: {
+ uint64_t h1, h2;
+ int compression, r;
+
+ if (le64toh(o->data.entry_offset) == 0)
+ warning(offset, "Unused data (entry_offset==0)");
+
+ if ((le64toh(o->data.entry_offset) == 0) ^ (le64toh(o->data.n_entries) == 0)) {
+ error(offset, "Bad n_entries: %"PRIu64, le64toh(o->data.n_entries));
+ return -EBADMSG;
+ }
+
+ if (le64toh(o->object.size) - offsetof(DataObject, payload) <= 0) {
+ error(offset, "Bad object size (<= %zu): %"PRIu64,
+ offsetof(DataObject, payload),
+ le64toh(o->object.size));
+ return -EBADMSG;
+ }
+
+ h1 = le64toh(o->data.hash);
+
+ compression = o->object.flags & OBJECT_COMPRESSION_MASK;
+ if (compression) {
+ _cleanup_free_ void *b = NULL;
+ size_t alloc = 0, b_size;
+
+ r = decompress_blob(compression,
+ o->data.payload,
+ le64toh(o->object.size) - offsetof(Object, data.payload),
+ &b, &alloc, &b_size, 0);
+ if (r < 0) {
+ error_errno(offset, r, "%s decompression failed: %m",
+ object_compressed_to_string(compression));
+ return r;
+ }
+
+ h2 = journal_file_hash_data(f, b, b_size);
+ } else
+ h2 = journal_file_hash_data(f, o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
+
+ if (h1 != h2) {
+ error(offset, "Invalid hash (%08"PRIx64" vs. %08"PRIx64, h1, h2);
+ return -EBADMSG;
+ }
+
+ if (!VALID64(le64toh(o->data.next_hash_offset)) ||
+ !VALID64(le64toh(o->data.next_field_offset)) ||
+ !VALID64(le64toh(o->data.entry_offset)) ||
+ !VALID64(le64toh(o->data.entry_array_offset))) {
+ error(offset, "Invalid offset (next_hash_offset="OFSfmt", next_field_offset="OFSfmt", entry_offset="OFSfmt", entry_array_offset="OFSfmt,
+ le64toh(o->data.next_hash_offset),
+ le64toh(o->data.next_field_offset),
+ le64toh(o->data.entry_offset),
+ le64toh(o->data.entry_array_offset));
+ return -EBADMSG;
+ }
+
+ break;
+ }
+
+ case OBJECT_FIELD:
+ if (le64toh(o->object.size) - offsetof(FieldObject, payload) <= 0) {
+ error(offset,
+ "Bad field size (<= %zu): %"PRIu64,
+ offsetof(FieldObject, payload),
+ le64toh(o->object.size));
+ return -EBADMSG;
+ }
+
+ if (!VALID64(le64toh(o->field.next_hash_offset)) ||
+ !VALID64(le64toh(o->field.head_data_offset))) {
+ error(offset,
+ "Invalid offset (next_hash_offset="OFSfmt", head_data_offset="OFSfmt,
+ le64toh(o->field.next_hash_offset),
+ le64toh(o->field.head_data_offset));
+ return -EBADMSG;
+ }
+ break;
+
+ case OBJECT_ENTRY:
+ if ((le64toh(o->object.size) - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0) {
+ error(offset,
+ "Bad entry size (<= %zu): %"PRIu64,
+ offsetof(EntryObject, items),
+ le64toh(o->object.size));
+ return -EBADMSG;
+ }
+
+ if ((le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0) {
+ error(offset,
+ "Invalid number items in entry: %"PRIu64,
+ (le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem));
+ return -EBADMSG;
+ }
+
+ if (le64toh(o->entry.seqnum) <= 0) {
+ error(offset,
+ "Invalid entry seqnum: %"PRIx64,
+ le64toh(o->entry.seqnum));
+ return -EBADMSG;
+ }
+
+ if (!VALID_REALTIME(le64toh(o->entry.realtime))) {
+ error(offset,
+ "Invalid entry realtime timestamp: %"PRIu64,
+ le64toh(o->entry.realtime));
+ return -EBADMSG;
+ }
+
+ if (!VALID_MONOTONIC(le64toh(o->entry.monotonic))) {
+ error(offset,
+ "Invalid entry monotonic timestamp: %"PRIu64,
+ le64toh(o->entry.monotonic));
+ return -EBADMSG;
+ }
+
+ for (i = 0; i < journal_file_entry_n_items(o); i++) {
+ if (le64toh(o->entry.items[i].object_offset) == 0 ||
+ !VALID64(le64toh(o->entry.items[i].object_offset))) {
+ error(offset,
+ "Invalid entry item (%"PRIu64"/%"PRIu64" offset: "OFSfmt,
+ i, journal_file_entry_n_items(o),
+ le64toh(o->entry.items[i].object_offset));
+ return -EBADMSG;
+ }
+ }
+
+ break;
+
+ case OBJECT_DATA_HASH_TABLE:
+ case OBJECT_FIELD_HASH_TABLE:
+ if ((le64toh(o->object.size) - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0 ||
+ (le64toh(o->object.size) - offsetof(HashTableObject, items)) / sizeof(HashItem) <= 0) {
+ error(offset,
+ "Invalid %s hash table size: %"PRIu64,
+ o->object.type == OBJECT_DATA_HASH_TABLE ? "data" : "field",
+ le64toh(o->object.size));
+ return -EBADMSG;
+ }
+
+ for (i = 0; i < journal_file_hash_table_n_items(o); i++) {
+ if (o->hash_table.items[i].head_hash_offset != 0 &&
+ !VALID64(le64toh(o->hash_table.items[i].head_hash_offset))) {
+ error(offset,
+ "Invalid %s hash table item (%"PRIu64"/%"PRIu64") head_hash_offset: "OFSfmt,
+ o->object.type == OBJECT_DATA_HASH_TABLE ? "data" : "field",
+ i, journal_file_hash_table_n_items(o),
+ le64toh(o->hash_table.items[i].head_hash_offset));
+ return -EBADMSG;
+ }
+ if (o->hash_table.items[i].tail_hash_offset != 0 &&
+ !VALID64(le64toh(o->hash_table.items[i].tail_hash_offset))) {
+ error(offset,
+ "Invalid %s hash table item (%"PRIu64"/%"PRIu64") tail_hash_offset: "OFSfmt,
+ o->object.type == OBJECT_DATA_HASH_TABLE ? "data" : "field",
+ i, journal_file_hash_table_n_items(o),
+ le64toh(o->hash_table.items[i].tail_hash_offset));
+ return -EBADMSG;
+ }
+
+ if ((o->hash_table.items[i].head_hash_offset != 0) !=
+ (o->hash_table.items[i].tail_hash_offset != 0)) {
+ error(offset,
+ "Invalid %s hash table item (%"PRIu64"/%"PRIu64"): head_hash_offset="OFSfmt" tail_hash_offset="OFSfmt,
+ o->object.type == OBJECT_DATA_HASH_TABLE ? "data" : "field",
+ i, journal_file_hash_table_n_items(o),
+ le64toh(o->hash_table.items[i].head_hash_offset),
+ le64toh(o->hash_table.items[i].tail_hash_offset));
+ return -EBADMSG;
+ }
+ }
+
+ break;
+
+ case OBJECT_ENTRY_ARRAY:
+ if ((le64toh(o->object.size) - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0 ||
+ (le64toh(o->object.size) - offsetof(EntryArrayObject, items)) / sizeof(le64_t) <= 0) {
+ error(offset,
+ "Invalid object entry array size: %"PRIu64,
+ le64toh(o->object.size));
+ return -EBADMSG;
+ }
+
+ if (!VALID64(le64toh(o->entry_array.next_entry_array_offset))) {
+ error(offset,
+ "Invalid object entry array next_entry_array_offset: "OFSfmt,
+ le64toh(o->entry_array.next_entry_array_offset));
+ return -EBADMSG;
+ }
+
+ for (i = 0; i < journal_file_entry_array_n_items(o); i++)
+ if (le64toh(o->entry_array.items[i]) != 0 &&
+ !VALID64(le64toh(o->entry_array.items[i]))) {
+ error(offset,
+ "Invalid object entry array item (%"PRIu64"/%"PRIu64"): "OFSfmt,
+ i, journal_file_entry_array_n_items(o),
+ le64toh(o->entry_array.items[i]));
+ return -EBADMSG;
+ }
+
+ break;
+
+ case OBJECT_TAG:
+ if (le64toh(o->object.size) != sizeof(TagObject)) {
+ error(offset,
+ "Invalid object tag size: %"PRIu64,
+ le64toh(o->object.size));
+ return -EBADMSG;
+ }
+
+ if (!VALID_EPOCH(le64toh(o->tag.epoch))) {
+ error(offset,
+ "Invalid object tag epoch: %"PRIu64,
+ le64toh(o->tag.epoch));
+ return -EBADMSG;
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+static int write_uint64(int fd, uint64_t p) {
+ ssize_t k;
+
+ k = write(fd, &p, sizeof(p));
+ if (k < 0)
+ return -errno;
+ if (k != sizeof(p))
+ return -EIO;
+
+ return 0;
+}
+
+static int contains_uint64(MMapCache *m, MMapFileDescriptor *f, uint64_t n, uint64_t p) {
+ uint64_t a, b;
+ int r;
+
+ assert(m);
+ assert(f);
+
+ /* Bisection ... */
+
+ a = 0; b = n;
+ while (a < b) {
+ uint64_t c, *z;
+
+ c = (a + b) / 2;
+
+ r = mmap_cache_get(m, f, PROT_READ|PROT_WRITE, 0, false, c * sizeof(uint64_t), sizeof(uint64_t), NULL, (void **) &z, NULL);
+ if (r < 0)
+ return r;
+
+ if (*z == p)
+ return 1;
+
+ if (a + 1 >= b)
+ return 0;
+
+ if (p < *z)
+ b = c;
+ else
+ a = c;
+ }
+
+ return 0;
+}
+
+static int entry_points_to_data(
+ JournalFile *f,
+ MMapFileDescriptor *cache_entry_fd,
+ uint64_t n_entries,
+ uint64_t entry_p,
+ uint64_t data_p) {
+
+ int r;
+ uint64_t i, n, a;
+ Object *o;
+ bool found = false;
+
+ assert(f);
+ assert(cache_entry_fd);
+
+ if (!contains_uint64(f->mmap, cache_entry_fd, n_entries, entry_p)) {
+ error(data_p, "Data object references invalid entry at "OFSfmt, entry_p);
+ return -EBADMSG;
+ }
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, entry_p, &o);
+ if (r < 0)
+ return r;
+
+ n = journal_file_entry_n_items(o);
+ for (i = 0; i < n; i++)
+ if (le64toh(o->entry.items[i].object_offset) == data_p) {
+ found = true;
+ break;
+ }
+
+ if (!found) {
+ error(entry_p, "Data object at "OFSfmt" not referenced by linked entry", data_p);
+ return -EBADMSG;
+ }
+
+ /* Check if this entry is also in main entry array. Since the
+ * main entry array has already been verified we can rely on
+ * its consistency. */
+
+ i = 0;
+ n = le64toh(f->header->n_entries);
+ a = le64toh(f->header->entry_array_offset);
+
+ while (i < n) {
+ uint64_t m, u;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+
+ m = journal_file_entry_array_n_items(o);
+ u = MIN(n - i, m);
+
+ if (entry_p <= le64toh(o->entry_array.items[u-1])) {
+ uint64_t x, y, z;
+
+ x = 0;
+ y = u;
+
+ while (x < y) {
+ z = (x + y) / 2;
+
+ if (le64toh(o->entry_array.items[z]) == entry_p)
+ return 0;
+
+ if (x + 1 >= y)
+ break;
+
+ if (entry_p < le64toh(o->entry_array.items[z]))
+ y = z;
+ else
+ x = z;
+ }
+
+ error(entry_p, "Entry object doesn't exist in main entry array");
+ return -EBADMSG;
+ }
+
+ i += u;
+ a = le64toh(o->entry_array.next_entry_array_offset);
+ }
+
+ return 0;
+}
+
+static int verify_data(
+ JournalFile *f,
+ Object *o, uint64_t p,
+ MMapFileDescriptor *cache_entry_fd, uint64_t n_entries,
+ MMapFileDescriptor *cache_entry_array_fd, uint64_t n_entry_arrays) {
+
+ uint64_t i, n, a, last, q;
+ int r;
+
+ assert(f);
+ assert(o);
+ assert(cache_entry_fd);
+ assert(cache_entry_array_fd);
+
+ n = le64toh(o->data.n_entries);
+ a = le64toh(o->data.entry_array_offset);
+
+ /* Entry array means at least two objects */
+ if (a && n < 2) {
+ error(p, "Entry array present (entry_array_offset="OFSfmt", but n_entries=%"PRIu64")", a, n);
+ return -EBADMSG;
+ }
+
+ if (n == 0)
+ return 0;
+
+ /* We already checked that earlier */
+ assert(o->data.entry_offset);
+
+ last = q = le64toh(o->data.entry_offset);
+ r = entry_points_to_data(f, cache_entry_fd, n_entries, q, p);
+ if (r < 0)
+ return r;
+
+ i = 1;
+ while (i < n) {
+ uint64_t next, m, j;
+
+ if (a == 0) {
+ error(p, "Array chain too short");
+ return -EBADMSG;
+ }
+
+ if (!contains_uint64(f->mmap, cache_entry_array_fd, n_entry_arrays, a)) {
+ error(p, "Invalid array offset "OFSfmt, a);
+ return -EBADMSG;
+ }
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+
+ next = le64toh(o->entry_array.next_entry_array_offset);
+ if (next != 0 && next <= a) {
+ error(p, "Array chain has cycle (jumps back from "OFSfmt" to "OFSfmt")", a, next);
+ return -EBADMSG;
+ }
+
+ m = journal_file_entry_array_n_items(o);
+ for (j = 0; i < n && j < m; i++, j++) {
+
+ q = le64toh(o->entry_array.items[j]);
+ if (q <= last) {
+ error(p, "Data object's entry array not sorted");
+ return -EBADMSG;
+ }
+ last = q;
+
+ r = entry_points_to_data(f, cache_entry_fd, n_entries, q, p);
+ if (r < 0)
+ return r;
+
+ /* Pointer might have moved, reposition */
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+ }
+
+ a = next;
+ }
+
+ return 0;
+}
+
+static int verify_hash_table(
+ JournalFile *f,
+ MMapFileDescriptor *cache_data_fd, uint64_t n_data,
+ MMapFileDescriptor *cache_entry_fd, uint64_t n_entries,
+ MMapFileDescriptor *cache_entry_array_fd, uint64_t n_entry_arrays,
+ usec_t *last_usec,
+ bool show_progress) {
+
+ uint64_t i, n;
+ int r;
+
+ assert(f);
+ assert(cache_data_fd);
+ assert(cache_entry_fd);
+ assert(cache_entry_array_fd);
+ assert(last_usec);
+
+ n = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
+ if (n <= 0)
+ return 0;
+
+ r = journal_file_map_data_hash_table(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to map data hash table: %m");
+
+ for (i = 0; i < n; i++) {
+ uint64_t last = 0, p;
+
+ if (show_progress)
+ draw_progress(0xC000 + scale_progress(0x3FFF, i, n), last_usec);
+
+ p = le64toh(f->data_hash_table[i].head_hash_offset);
+ while (p != 0) {
+ Object *o;
+ uint64_t next;
+
+ if (!contains_uint64(f->mmap, cache_data_fd, n_data, p)) {
+ error(p, "Invalid data object at hash entry %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ next = le64toh(o->data.next_hash_offset);
+ if (next != 0 && next <= p) {
+ error(p, "Hash chain has a cycle in hash entry %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+
+ if (le64toh(o->data.hash) % n != i) {
+ error(p, "Hash value mismatch in hash entry %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+
+ r = verify_data(f, o, p, cache_entry_fd, n_entries, cache_entry_array_fd, n_entry_arrays);
+ if (r < 0)
+ return r;
+
+ last = p;
+ p = next;
+ }
+
+ if (last != le64toh(f->data_hash_table[i].tail_hash_offset)) {
+ error(p, "Tail hash pointer mismatch in hash table");
+ return -EBADMSG;
+ }
+ }
+
+ return 0;
+}
+
+static int data_object_in_hash_table(JournalFile *f, uint64_t hash, uint64_t p) {
+ uint64_t n, h, q;
+ int r;
+ assert(f);
+
+ n = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
+ if (n <= 0)
+ return 0;
+
+ r = journal_file_map_data_hash_table(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to map data hash table: %m");
+
+ h = hash % n;
+
+ q = le64toh(f->data_hash_table[h].head_hash_offset);
+ while (q != 0) {
+ Object *o;
+
+ if (p == q)
+ return 1;
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, q, &o);
+ if (r < 0)
+ return r;
+
+ q = le64toh(o->data.next_hash_offset);
+ }
+
+ return 0;
+}
+
+static int verify_entry(
+ JournalFile *f,
+ Object *o, uint64_t p,
+ MMapFileDescriptor *cache_data_fd, uint64_t n_data) {
+
+ uint64_t i, n;
+ int r;
+
+ assert(f);
+ assert(o);
+ assert(cache_data_fd);
+
+ n = journal_file_entry_n_items(o);
+ for (i = 0; i < n; i++) {
+ uint64_t q, h;
+ Object *u;
+
+ q = le64toh(o->entry.items[i].object_offset);
+ h = le64toh(o->entry.items[i].hash);
+
+ if (!contains_uint64(f->mmap, cache_data_fd, n_data, q)) {
+ error(p, "Invalid data object of entry");
+ return -EBADMSG;
+ }
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, q, &u);
+ if (r < 0)
+ return r;
+
+ if (le64toh(u->data.hash) != h) {
+ error(p, "Hash mismatch for data object of entry");
+ return -EBADMSG;
+ }
+
+ r = data_object_in_hash_table(f, h, q);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ error(p, "Data object missing from hash table");
+ return -EBADMSG;
+ }
+ }
+
+ return 0;
+}
+
+static int verify_entry_array(
+ JournalFile *f,
+ MMapFileDescriptor *cache_data_fd, uint64_t n_data,
+ MMapFileDescriptor *cache_entry_fd, uint64_t n_entries,
+ MMapFileDescriptor *cache_entry_array_fd, uint64_t n_entry_arrays,
+ usec_t *last_usec,
+ bool show_progress) {
+
+ uint64_t i = 0, a, n, last = 0;
+ int r;
+
+ assert(f);
+ assert(cache_data_fd);
+ assert(cache_entry_fd);
+ assert(cache_entry_array_fd);
+ assert(last_usec);
+
+ n = le64toh(f->header->n_entries);
+ a = le64toh(f->header->entry_array_offset);
+ while (i < n) {
+ uint64_t next, m, j;
+ Object *o;
+
+ if (show_progress)
+ draw_progress(0x8000 + scale_progress(0x3FFF, i, n), last_usec);
+
+ if (a == 0) {
+ error(a, "Array chain too short at %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+
+ if (!contains_uint64(f->mmap, cache_entry_array_fd, n_entry_arrays, a)) {
+ error(a, "Invalid array %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+
+ next = le64toh(o->entry_array.next_entry_array_offset);
+ if (next != 0 && next <= a) {
+ error(a, "Array chain has cycle at %"PRIu64" of %"PRIu64" (jumps back from to "OFSfmt")", i, n, next);
+ return -EBADMSG;
+ }
+
+ m = journal_file_entry_array_n_items(o);
+ for (j = 0; i < n && j < m; i++, j++) {
+ uint64_t p;
+
+ p = le64toh(o->entry_array.items[j]);
+ if (p <= last) {
+ error(a, "Entry array not sorted at %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+ last = p;
+
+ if (!contains_uint64(f->mmap, cache_entry_fd, n_entries, p)) {
+ error(a, "Invalid array entry at %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
+ if (r < 0)
+ return r;
+
+ r = verify_entry(f, o, p, cache_data_fd, n_data);
+ if (r < 0)
+ return r;
+
+ /* Pointer might have moved, reposition */
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+ }
+
+ a = next;
+ }
+
+ return 0;
+}
+
+int journal_file_verify(
+ JournalFile *f,
+ const char *key,
+ usec_t *first_contained, usec_t *last_validated, usec_t *last_contained,
+ bool show_progress) {
+ int r;
+ Object *o;
+ uint64_t p = 0, last_epoch = 0, last_tag_realtime = 0, last_sealed_realtime = 0;
+
+ uint64_t entry_seqnum = 0, entry_monotonic = 0, entry_realtime = 0;
+ sd_id128_t entry_boot_id;
+ bool entry_seqnum_set = false, entry_monotonic_set = false, entry_realtime_set = false, found_main_entry_array = false;
+ uint64_t n_weird = 0, n_objects = 0, n_entries = 0, n_data = 0, n_fields = 0, n_data_hash_tables = 0, n_field_hash_tables = 0, n_entry_arrays = 0, n_tags = 0;
+ usec_t last_usec = 0;
+ int data_fd = -1, entry_fd = -1, entry_array_fd = -1;
+ MMapFileDescriptor *cache_data_fd = NULL, *cache_entry_fd = NULL, *cache_entry_array_fd = NULL;
+ unsigned i;
+ bool found_last = false;
+ const char *tmp_dir = NULL;
+
+#if HAVE_GCRYPT
+ uint64_t last_tag = 0;
+#endif
+ assert(f);
+
+ if (key) {
+#if HAVE_GCRYPT
+ r = journal_file_parse_verification_key(f, key);
+ if (r < 0) {
+ log_error("Failed to parse seed.");
+ return r;
+ }
+#else
+ return -EOPNOTSUPP;
+#endif
+ } else if (f->seal)
+ return -ENOKEY;
+
+ r = var_tmp_dir(&tmp_dir);
+ if (r < 0) {
+ log_error_errno(r, "Failed to determine temporary directory: %m");
+ goto fail;
+ }
+
+ data_fd = open_tmpfile_unlinkable(tmp_dir, O_RDWR | O_CLOEXEC);
+ if (data_fd < 0) {
+ r = log_error_errno(data_fd, "Failed to create data file: %m");
+ goto fail;
+ }
+
+ entry_fd = open_tmpfile_unlinkable(tmp_dir, O_RDWR | O_CLOEXEC);
+ if (entry_fd < 0) {
+ r = log_error_errno(entry_fd, "Failed to create entry file: %m");
+ goto fail;
+ }
+
+ entry_array_fd = open_tmpfile_unlinkable(tmp_dir, O_RDWR | O_CLOEXEC);
+ if (entry_array_fd < 0) {
+ r = log_error_errno(entry_array_fd,
+ "Failed to create entry array file: %m");
+ goto fail;
+ }
+
+ cache_data_fd = mmap_cache_add_fd(f->mmap, data_fd);
+ if (!cache_data_fd) {
+ r = log_oom();
+ goto fail;
+ }
+
+ cache_entry_fd = mmap_cache_add_fd(f->mmap, entry_fd);
+ if (!cache_entry_fd) {
+ r = log_oom();
+ goto fail;
+ }
+
+ cache_entry_array_fd = mmap_cache_add_fd(f->mmap, entry_array_fd);
+ if (!cache_entry_array_fd) {
+ r = log_oom();
+ goto fail;
+ }
+
+ if (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SUPPORTED) {
+ log_error("Cannot verify file with unknown extensions.");
+ r = -EOPNOTSUPP;
+ goto fail;
+ }
+
+ for (i = 0; i < sizeof(f->header->reserved); i++)
+ if (f->header->reserved[i] != 0) {
+ error(offsetof(Header, reserved[i]), "Reserved field is non-zero");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ /* First iteration: we go through all objects, verify the
+ * superficial structure, headers, hashes. */
+
+ p = le64toh(f->header->header_size);
+ for (;;) {
+ /* Early exit if there are no objects in the file, at all */
+ if (le64toh(f->header->tail_object_offset) == 0)
+ break;
+
+ if (show_progress)
+ draw_progress(scale_progress(0x7FFF, p, le64toh(f->header->tail_object_offset)), &last_usec);
+
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o);
+ if (r < 0) {
+ error(p, "Invalid object");
+ goto fail;
+ }
+
+ if (p > le64toh(f->header->tail_object_offset)) {
+ error(offsetof(Header, tail_object_offset), "Invalid tail object pointer");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ n_objects++;
+
+ r = journal_file_object_verify(f, p, o);
+ if (r < 0) {
+ error_errno(p, r, "Invalid object contents: %m");
+ goto fail;
+ }
+
+ if (!!(o->object.flags & OBJECT_COMPRESSED_XZ) +
+ !!(o->object.flags & OBJECT_COMPRESSED_LZ4) +
+ !!(o->object.flags & OBJECT_COMPRESSED_ZSTD) > 1) {
+ error(p, "Object has multiple compression flags set");
+ r = -EINVAL;
+ goto fail;
+ }
+
+ if ((o->object.flags & OBJECT_COMPRESSED_XZ) && !JOURNAL_HEADER_COMPRESSED_XZ(f->header)) {
+ error(p, "XZ compressed object in file without XZ compression");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if ((o->object.flags & OBJECT_COMPRESSED_LZ4) && !JOURNAL_HEADER_COMPRESSED_LZ4(f->header)) {
+ error(p, "LZ4 compressed object in file without LZ4 compression");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if ((o->object.flags & OBJECT_COMPRESSED_ZSTD) && !JOURNAL_HEADER_COMPRESSED_ZSTD(f->header)) {
+ error(p, "ZSTD compressed object in file without ZSTD compression");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ switch (o->object.type) {
+
+ case OBJECT_DATA:
+ r = write_uint64(data_fd, p);
+ if (r < 0)
+ goto fail;
+
+ n_data++;
+ break;
+
+ case OBJECT_FIELD:
+ n_fields++;
+ break;
+
+ case OBJECT_ENTRY:
+ if (JOURNAL_HEADER_SEALED(f->header) && n_tags <= 0) {
+ error(p, "First entry before first tag");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ r = write_uint64(entry_fd, p);
+ if (r < 0)
+ goto fail;
+
+ if (le64toh(o->entry.realtime) < last_tag_realtime) {
+ error(p, "Older entry after newer tag");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (!entry_seqnum_set &&
+ le64toh(o->entry.seqnum) != le64toh(f->header->head_entry_seqnum)) {
+ error(p, "Head entry sequence number incorrect");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (entry_seqnum_set &&
+ entry_seqnum >= le64toh(o->entry.seqnum)) {
+ error(p, "Entry sequence number out of synchronization");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ entry_seqnum = le64toh(o->entry.seqnum);
+ entry_seqnum_set = true;
+
+ if (entry_monotonic_set &&
+ sd_id128_equal(entry_boot_id, o->entry.boot_id) &&
+ entry_monotonic > le64toh(o->entry.monotonic)) {
+ error(p, "Entry timestamp out of synchronization");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ entry_monotonic = le64toh(o->entry.monotonic);
+ entry_boot_id = o->entry.boot_id;
+ entry_monotonic_set = true;
+
+ if (!entry_realtime_set &&
+ le64toh(o->entry.realtime) != le64toh(f->header->head_entry_realtime)) {
+ error(p, "Head entry realtime timestamp incorrect");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ entry_realtime = le64toh(o->entry.realtime);
+ entry_realtime_set = true;
+
+ n_entries++;
+ break;
+
+ case OBJECT_DATA_HASH_TABLE:
+ if (n_data_hash_tables > 1) {
+ error(p, "More than one data hash table");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (le64toh(f->header->data_hash_table_offset) != p + offsetof(HashTableObject, items) ||
+ le64toh(f->header->data_hash_table_size) != le64toh(o->object.size) - offsetof(HashTableObject, items)) {
+ error(p, "header fields for data hash table invalid");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ n_data_hash_tables++;
+ break;
+
+ case OBJECT_FIELD_HASH_TABLE:
+ if (n_field_hash_tables > 1) {
+ error(p, "More than one field hash table");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (le64toh(f->header->field_hash_table_offset) != p + offsetof(HashTableObject, items) ||
+ le64toh(f->header->field_hash_table_size) != le64toh(o->object.size) - offsetof(HashTableObject, items)) {
+ error(p, "Header fields for field hash table invalid");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ n_field_hash_tables++;
+ break;
+
+ case OBJECT_ENTRY_ARRAY:
+ r = write_uint64(entry_array_fd, p);
+ if (r < 0)
+ goto fail;
+
+ if (p == le64toh(f->header->entry_array_offset)) {
+ if (found_main_entry_array) {
+ error(p, "More than one main entry array");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ found_main_entry_array = true;
+ }
+
+ n_entry_arrays++;
+ break;
+
+ case OBJECT_TAG:
+ if (!JOURNAL_HEADER_SEALED(f->header)) {
+ error(p, "Tag object in file without sealing");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (le64toh(o->tag.seqnum) != n_tags + 1) {
+ error(p, "Tag sequence number out of synchronization");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (le64toh(o->tag.epoch) < last_epoch) {
+ error(p, "Epoch sequence out of synchronization");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+#if HAVE_GCRYPT
+ if (f->seal) {
+ uint64_t q, rt;
+
+ debug(p, "Checking tag %"PRIu64"...", le64toh(o->tag.seqnum));
+
+ rt = f->fss_start_usec + le64toh(o->tag.epoch) * f->fss_interval_usec;
+ if (entry_realtime_set && entry_realtime >= rt + f->fss_interval_usec) {
+ error(p, "tag/entry realtime timestamp out of synchronization");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ /* OK, now we know the epoch. So let's now set
+ * it, and calculate the HMAC for everything
+ * since the last tag. */
+ r = journal_file_fsprg_seek(f, le64toh(o->tag.epoch));
+ if (r < 0)
+ goto fail;
+
+ r = journal_file_hmac_start(f);
+ if (r < 0)
+ goto fail;
+
+ if (last_tag == 0) {
+ r = journal_file_hmac_put_header(f);
+ if (r < 0)
+ goto fail;
+
+ q = le64toh(f->header->header_size);
+ } else
+ q = last_tag;
+
+ while (q <= p) {
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, q, &o);
+ if (r < 0)
+ goto fail;
+
+ r = journal_file_hmac_put_object(f, OBJECT_UNUSED, o, q);
+ if (r < 0)
+ goto fail;
+
+ q = q + ALIGN64(le64toh(o->object.size));
+ }
+
+ /* Position might have changed, let's reposition things */
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o);
+ if (r < 0)
+ goto fail;
+
+ if (memcmp(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH) != 0) {
+ error(p, "Tag failed verification");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ f->hmac_running = false;
+ last_tag_realtime = rt;
+ last_sealed_realtime = entry_realtime;
+ }
+
+ last_tag = p + ALIGN64(le64toh(o->object.size));
+#endif
+
+ last_epoch = le64toh(o->tag.epoch);
+
+ n_tags++;
+ break;
+
+ default:
+ n_weird++;
+ }
+
+ if (p == le64toh(f->header->tail_object_offset)) {
+ found_last = true;
+ break;
+ }
+
+ p = p + ALIGN64(le64toh(o->object.size));
+ };
+
+ if (!found_last && le64toh(f->header->tail_object_offset) != 0) {
+ error(le64toh(f->header->tail_object_offset), "Tail object pointer dead");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (n_objects != le64toh(f->header->n_objects)) {
+ error(offsetof(Header, n_objects), "Object number mismatch");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (n_entries != le64toh(f->header->n_entries)) {
+ error(offsetof(Header, n_entries), "Entry number mismatch");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
+ n_data != le64toh(f->header->n_data)) {
+ error(offsetof(Header, n_data), "Data number mismatch");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
+ n_fields != le64toh(f->header->n_fields)) {
+ error(offsetof(Header, n_fields), "Field number mismatch");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_tags) &&
+ n_tags != le64toh(f->header->n_tags)) {
+ error(offsetof(Header, n_tags), "Tag number mismatch");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays) &&
+ n_entry_arrays != le64toh(f->header->n_entry_arrays)) {
+ error(offsetof(Header, n_entry_arrays), "Entry array number mismatch");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (!found_main_entry_array && le64toh(f->header->entry_array_offset) != 0) {
+ error(0, "Missing entry array");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (entry_seqnum_set &&
+ entry_seqnum != le64toh(f->header->tail_entry_seqnum)) {
+ error(offsetof(Header, tail_entry_seqnum), "Invalid tail seqnum");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (entry_monotonic_set &&
+ (sd_id128_equal(entry_boot_id, f->header->boot_id) &&
+ entry_monotonic != le64toh(f->header->tail_entry_monotonic))) {
+ error(0, "Invalid tail monotonic timestamp");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (entry_realtime_set && entry_realtime != le64toh(f->header->tail_entry_realtime)) {
+ error(0, "Invalid tail realtime timestamp");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ /* Second iteration: we follow all objects referenced from the
+ * two entry points: the object hash table and the entry
+ * array. We also check that everything referenced (directly
+ * or indirectly) in the data hash table also exists in the
+ * entry array, and vice versa. Note that we do not care for
+ * unreferenced objects. We only care that everything that is
+ * referenced is consistent. */
+
+ r = verify_entry_array(f,
+ cache_data_fd, n_data,
+ cache_entry_fd, n_entries,
+ cache_entry_array_fd, n_entry_arrays,
+ &last_usec,
+ show_progress);
+ if (r < 0)
+ goto fail;
+
+ r = verify_hash_table(f,
+ cache_data_fd, n_data,
+ cache_entry_fd, n_entries,
+ cache_entry_array_fd, n_entry_arrays,
+ &last_usec,
+ show_progress);
+ if (r < 0)
+ goto fail;
+
+ if (show_progress)
+ flush_progress();
+
+ mmap_cache_free_fd(f->mmap, cache_data_fd);
+ mmap_cache_free_fd(f->mmap, cache_entry_fd);
+ mmap_cache_free_fd(f->mmap, cache_entry_array_fd);
+
+ safe_close(data_fd);
+ safe_close(entry_fd);
+ safe_close(entry_array_fd);
+
+ if (first_contained)
+ *first_contained = le64toh(f->header->head_entry_realtime);
+ if (last_validated)
+ *last_validated = last_sealed_realtime;
+ if (last_contained)
+ *last_contained = le64toh(f->header->tail_entry_realtime);
+
+ return 0;
+
+fail:
+ if (show_progress)
+ flush_progress();
+
+ log_error("File corruption detected at %s:"OFSfmt" (of %llu bytes, %"PRIu64"%%).",
+ f->path,
+ p,
+ (unsigned long long) f->last_stat.st_size,
+ 100 * p / f->last_stat.st_size);
+
+ if (data_fd >= 0)
+ safe_close(data_fd);
+
+ if (entry_fd >= 0)
+ safe_close(entry_fd);
+
+ if (entry_array_fd >= 0)
+ safe_close(entry_array_fd);
+
+ if (cache_data_fd)
+ mmap_cache_free_fd(f->mmap, cache_data_fd);
+
+ if (cache_entry_fd)
+ mmap_cache_free_fd(f->mmap, cache_entry_fd);
+
+ if (cache_entry_array_fd)
+ mmap_cache_free_fd(f->mmap, cache_entry_array_fd);
+
+ return r;
+}
diff --git a/src/journal/journal-verify.h b/src/journal/journal-verify.h
new file mode 100644
index 0000000..5790330
--- /dev/null
+++ b/src/journal/journal-verify.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "journal-file.h"
+
+int journal_file_verify(JournalFile *f, const char *key, usec_t *first_contained, usec_t *last_validated, usec_t *last_contained, bool show_progress);
diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c
new file mode 100644
index 0000000..bcf2e01
--- /dev/null
+++ b/src/journal/journalctl.c
@@ -0,0 +1,2830 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <getopt.h>
+#include <linux/fs.h>
+#include <poll.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/inotify.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#if HAVE_PCRE2
+# define PCRE2_CODE_UNIT_WIDTH 8
+# include <pcre2.h>
+#endif
+
+#include "sd-bus.h"
+#include "sd-device.h"
+#include "sd-journal.h"
+
+#include "acl-util.h"
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "catalog.h"
+#include "chattr-util.h"
+#include "def.h"
+#include "device-private.h"
+#include "dissect-image.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "fsprg.h"
+#include "glob-util.h"
+#include "hostname-util.h"
+#include "id128-print.h"
+#include "io-util.h"
+#include "journal-def.h"
+#include "journal-internal.h"
+#include "journal-util.h"
+#include "journal-vacuum.h"
+#include "journal-verify.h"
+#include "locale-util.h"
+#include "log.h"
+#include "logs-show.h"
+#include "memory-util.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "nulstr-util.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pcre2-dlopen.h"
+#include "pretty-print.h"
+#include "qrcode-util.h"
+#include "random-util.h"
+#include "rlimit-util.h"
+#include "set.h"
+#include "sigbus.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "strv.h"
+#include "syslog-util.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "unit-name.h"
+#include "user-util.h"
+#include "varlink.h"
+
+#define DEFAULT_FSS_INTERVAL_USEC (15*USEC_PER_MINUTE)
+#define PROCESS_INOTIFY_INTERVAL 1024 /* Every 1,024 messages processed */
+
+enum {
+ /* Special values for arg_lines */
+ ARG_LINES_DEFAULT = -2,
+ ARG_LINES_ALL = -1,
+};
+
+static OutputMode arg_output = OUTPUT_SHORT;
+static bool arg_utc = false;
+static bool arg_follow = false;
+static bool arg_full = true;
+static bool arg_all = false;
+static PagerFlags arg_pager_flags = 0;
+static int arg_lines = ARG_LINES_DEFAULT;
+static bool arg_no_tail = false;
+static bool arg_quiet = false;
+static bool arg_merge = false;
+static bool arg_boot = false;
+static sd_id128_t arg_boot_id = {};
+static int arg_boot_offset = 0;
+static bool arg_dmesg = false;
+static bool arg_no_hostname = false;
+static const char *arg_cursor = NULL;
+static const char *arg_cursor_file = NULL;
+static const char *arg_after_cursor = NULL;
+static bool arg_show_cursor = false;
+static const char *arg_directory = NULL;
+static char **arg_file = NULL;
+static bool arg_file_stdin = false;
+static int arg_priorities = 0xFF;
+static Set *arg_facilities = NULL;
+static char *arg_verify_key = NULL;
+#if HAVE_GCRYPT
+static usec_t arg_interval = DEFAULT_FSS_INTERVAL_USEC;
+static bool arg_force = false;
+#endif
+static usec_t arg_since, arg_until;
+static bool arg_since_set = false, arg_until_set = false;
+static char **arg_syslog_identifier = NULL;
+static char **arg_system_units = NULL;
+static char **arg_user_units = NULL;
+static const char *arg_field = NULL;
+static bool arg_catalog = false;
+static bool arg_reverse = false;
+static int arg_journal_type = 0;
+static int arg_namespace_flags = 0;
+static char *arg_root = NULL;
+static char *arg_image = NULL;
+static const char *arg_machine = NULL;
+static const char *arg_namespace = NULL;
+static uint64_t arg_vacuum_size = 0;
+static uint64_t arg_vacuum_n_files = 0;
+static usec_t arg_vacuum_time = 0;
+static char **arg_output_fields = NULL;
+#if HAVE_PCRE2
+static const char *arg_pattern = NULL;
+static pcre2_code *arg_compiled_pattern = NULL;
+static int arg_case_sensitive = -1; /* -1 means be smart */
+#endif
+
+static enum {
+ ACTION_SHOW,
+ ACTION_NEW_ID128,
+ ACTION_PRINT_HEADER,
+ ACTION_SETUP_KEYS,
+ ACTION_VERIFY,
+ ACTION_DISK_USAGE,
+ ACTION_LIST_CATALOG,
+ ACTION_DUMP_CATALOG,
+ ACTION_UPDATE_CATALOG,
+ ACTION_LIST_BOOTS,
+ ACTION_FLUSH,
+ ACTION_RELINQUISH_VAR,
+ ACTION_SYNC,
+ ACTION_ROTATE,
+ ACTION_VACUUM,
+ ACTION_ROTATE_AND_VACUUM,
+ ACTION_LIST_FIELDS,
+ ACTION_LIST_FIELD_NAMES,
+} arg_action = ACTION_SHOW;
+
+typedef struct BootId {
+ sd_id128_t id;
+ uint64_t first;
+ uint64_t last;
+ LIST_FIELDS(struct BootId, boot_list);
+} BootId;
+
+#if HAVE_PCRE2
+DEFINE_TRIVIAL_CLEANUP_FUNC(pcre2_match_data*, sym_pcre2_match_data_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(pcre2_code*, sym_pcre2_code_free);
+
+static int pattern_compile(const char *pattern, unsigned flags, pcre2_code **out) {
+ int errorcode, r;
+ PCRE2_SIZE erroroffset;
+ pcre2_code *p;
+
+ p = sym_pcre2_compile((PCRE2_SPTR8) pattern,
+ PCRE2_ZERO_TERMINATED, flags, &errorcode, &erroroffset, NULL);
+ if (!p) {
+ unsigned char buf[LINE_MAX];
+
+ r = sym_pcre2_get_error_message(errorcode, buf, sizeof buf);
+
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Bad pattern \"%s\": %s", pattern,
+ r < 0 ? "unknown error" : (char *)buf);
+ }
+
+ *out = p;
+ return 0;
+}
+#endif
+
+static int add_matches_for_device(sd_journal *j, const char *devpath) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ sd_device *d = NULL;
+ struct stat st;
+ int r;
+
+ assert(j);
+ assert(devpath);
+
+ if (!path_startswith(devpath, "/dev/"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Devpath does not start with /dev/");
+
+ if (stat(devpath, &st) < 0)
+ return log_error_errno(errno, "Couldn't stat file: %m");
+
+ r = device_new_from_stat_rdev(&device, &st);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device from devnum %u:%u: %m", major(st.st_rdev), minor(st.st_rdev));
+
+ for (d = device; d; ) {
+ _cleanup_free_ char *match = NULL;
+ const char *subsys, *sysname, *devnode;
+ sd_device *parent;
+
+ r = sd_device_get_subsystem(d, &subsys);
+ if (r < 0)
+ goto get_parent;
+
+ r = sd_device_get_sysname(d, &sysname);
+ if (r < 0)
+ goto get_parent;
+
+ match = strjoin("_KERNEL_DEVICE=+", subsys, ":", sysname);
+ if (!match)
+ return log_oom();
+
+ r = sd_journal_add_match(j, match, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match: %m");
+
+ if (sd_device_get_devname(d, &devnode) >= 0) {
+ _cleanup_free_ char *match1 = NULL;
+
+ r = stat(devnode, &st);
+ if (r < 0)
+ return log_error_errno(r, "Failed to stat() device node \"%s\": %m", devnode);
+
+ r = asprintf(&match1, "_KERNEL_DEVICE=%c%u:%u", S_ISBLK(st.st_mode) ? 'b' : 'c', major(st.st_rdev), minor(st.st_rdev));
+ if (r < 0)
+ return log_oom();
+
+ r = sd_journal_add_match(j, match1, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match: %m");
+ }
+
+get_parent:
+ if (sd_device_get_parent(d, &parent) < 0)
+ break;
+
+ d = parent;
+ }
+
+ r = add_match_this_boot(j, arg_machine);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match for the current boot: %m");
+
+ return 0;
+}
+
+static char *format_timestamp_maybe_utc(char *buf, size_t l, usec_t t) {
+
+ if (arg_utc)
+ return format_timestamp_style(buf, l, t, TIMESTAMP_UTC);
+
+ return format_timestamp(buf, l, t);
+}
+
+static int parse_boot_descriptor(const char *x, sd_id128_t *boot_id, int *offset) {
+ sd_id128_t id = SD_ID128_NULL;
+ int off = 0, r;
+
+ if (streq(x, "all")) {
+ *boot_id = SD_ID128_NULL;
+ *offset = 0;
+ return 0;
+ } else if (strlen(x) >= 32) {
+ char *t;
+
+ t = strndupa(x, 32);
+ r = sd_id128_from_string(t, &id);
+ if (r >= 0)
+ x += 32;
+
+ if (!IN_SET(*x, 0, '-', '+'))
+ return -EINVAL;
+
+ if (*x != 0) {
+ r = safe_atoi(x, &off);
+ if (r < 0)
+ return r;
+ }
+ } else {
+ r = safe_atoi(x, &off);
+ if (r < 0)
+ return r;
+ }
+
+ if (boot_id)
+ *boot_id = id;
+
+ if (offset)
+ *offset = off;
+
+ return 1;
+}
+
+static int help_facilities(void) {
+ if (!arg_quiet)
+ puts("Available facilities:");
+
+ for (int i = 0; i < LOG_NFACILITIES; i++) {
+ _cleanup_free_ char *t = NULL;
+
+ if (log_facility_unshifted_to_string_alloc(i, &t))
+ return log_oom();
+ puts(t);
+ }
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = terminal_urlify_man("journalctl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%1$s [OPTIONS...] [MATCHES...]\n\n"
+ "%5$sQuery the journal.%6$s\n\n"
+ "%3$sOptions:%4$s\n"
+ " --system Show the system journal\n"
+ " --user Show the user journal for the current user\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " -S --since=DATE Show entries not older than the specified date\n"
+ " -U --until=DATE Show entries not newer than the specified date\n"
+ " -c --cursor=CURSOR Show entries starting at the specified cursor\n"
+ " --after-cursor=CURSOR Show entries after the specified cursor\n"
+ " --show-cursor Print the cursor after all the entries\n"
+ " --cursor-file=FILE Show entries after cursor in FILE and update FILE\n"
+ " -b --boot[=ID] Show current boot or the specified boot\n"
+ " --list-boots Show terse information about recorded boots\n"
+ " -k --dmesg Show kernel message log from the current boot\n"
+ " -u --unit=UNIT Show logs from the specified unit\n"
+ " --user-unit=UNIT Show logs from the specified user unit\n"
+ " -t --identifier=STRING Show entries with the specified syslog identifier\n"
+ " -p --priority=RANGE Show entries with the specified priority\n"
+ " --facility=FACILITY... Show entries with the specified facilities\n"
+ " -g --grep=PATTERN Show entries with MESSAGE matching PATTERN\n"
+ " --case-sensitive[=BOOL] Force case sensitive or insensitive matching\n"
+ " -e --pager-end Immediately jump to the end in the pager\n"
+ " -f --follow Follow the journal\n"
+ " -n --lines[=INTEGER] Number of journal entries to show\n"
+ " --no-tail Show all lines, even in follow mode\n"
+ " -r --reverse Show the newest entries first\n"
+ " -o --output=STRING Change journal output mode (short, short-precise,\n"
+ " short-iso, short-iso-precise, short-full,\n"
+ " short-monotonic, short-unix, verbose, export,\n"
+ " json, json-pretty, json-sse, json-seq, cat,\n"
+ " with-unit)\n"
+ " --output-fields=LIST Select fields to print in verbose/export/json modes\n"
+ " --utc Express time in Coordinated Universal Time (UTC)\n"
+ " -x --catalog Add message explanations where available\n"
+ " --no-full Ellipsize fields\n"
+ " -a --all Show all fields, including long and unprintable\n"
+ " -q --quiet Do not show info messages and privilege warning\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-hostname Suppress output of hostname field\n"
+ " -m --merge Show entries from all available journals\n"
+ " -D --directory=PATH Show journal files from directory\n"
+ " --file=PATH Show journal file\n"
+ " --root=ROOT Operate on files below a root directory\n"
+ " --image=IMAGE Operate on files in filesystem image\n"
+ " --namespace=NAMESPACE Show journal data from specified namespace\n"
+ " --interval=TIME Time interval for changing the FSS sealing key\n"
+ " --verify-key=KEY Specify FSS verification key\n"
+ " --force Override of the FSS key pair with --setup-keys\n"
+ "\n%3$sCommands:%4$s\n"
+ " -h --help Show this help text\n"
+ " --version Show package version\n"
+ " -N --fields List all field names currently used\n"
+ " -F --field=FIELD List all values that a specified field takes\n"
+ " --disk-usage Show total disk usage of all journal files\n"
+ " --vacuum-size=BYTES Reduce disk usage below specified size\n"
+ " --vacuum-files=INT Leave only the specified number of journal files\n"
+ " --vacuum-time=TIME Remove journal files older than specified time\n"
+ " --verify Verify journal file consistency\n"
+ " --sync Synchronize unwritten journal messages to disk\n"
+ " --relinquish-var Stop logging to disk, log to temporary file system\n"
+ " --smart-relinquish-var Similar, but NOP if log directory is on root mount\n"
+ " --flush Flush all journal data from /run into /var\n"
+ " --rotate Request immediate rotation of the journal files\n"
+ " --header Show journal header information\n"
+ " --list-catalog Show all message IDs in the catalog\n"
+ " --dump-catalog Show entries in the message catalog\n"
+ " --update-catalog Update the message catalog database\n"
+ " --setup-keys Generate a new FSS key pair\n"
+ "\nSee the %2$s for details.\n"
+ , program_invocation_short_name
+ , link
+ , ansi_underline(), ansi_normal()
+ , ansi_highlight(), ansi_normal()
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_PAGER,
+ ARG_NO_FULL,
+ ARG_NO_TAIL,
+ ARG_NEW_ID128,
+ ARG_THIS_BOOT,
+ ARG_LIST_BOOTS,
+ ARG_USER,
+ ARG_SYSTEM,
+ ARG_ROOT,
+ ARG_IMAGE,
+ ARG_HEADER,
+ ARG_FACILITY,
+ ARG_SETUP_KEYS,
+ ARG_FILE,
+ ARG_INTERVAL,
+ ARG_VERIFY,
+ ARG_VERIFY_KEY,
+ ARG_DISK_USAGE,
+ ARG_AFTER_CURSOR,
+ ARG_CURSOR_FILE,
+ ARG_SHOW_CURSOR,
+ ARG_USER_UNIT,
+ ARG_LIST_CATALOG,
+ ARG_DUMP_CATALOG,
+ ARG_UPDATE_CATALOG,
+ ARG_FORCE,
+ ARG_CASE_SENSITIVE,
+ ARG_UTC,
+ ARG_SYNC,
+ ARG_FLUSH,
+ ARG_RELINQUISH_VAR,
+ ARG_SMART_RELINQUISH_VAR,
+ ARG_ROTATE,
+ ARG_VACUUM_SIZE,
+ ARG_VACUUM_FILES,
+ ARG_VACUUM_TIME,
+ ARG_NO_HOSTNAME,
+ ARG_OUTPUT_FIELDS,
+ ARG_NAMESPACE,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version" , no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "pager-end", no_argument, NULL, 'e' },
+ { "follow", no_argument, NULL, 'f' },
+ { "force", no_argument, NULL, ARG_FORCE },
+ { "output", required_argument, NULL, 'o' },
+ { "all", no_argument, NULL, 'a' },
+ { "full", no_argument, NULL, 'l' },
+ { "no-full", no_argument, NULL, ARG_NO_FULL },
+ { "lines", optional_argument, NULL, 'n' },
+ { "no-tail", no_argument, NULL, ARG_NO_TAIL },
+ { "new-id128", no_argument, NULL, ARG_NEW_ID128 }, /* deprecated */
+ { "quiet", no_argument, NULL, 'q' },
+ { "merge", no_argument, NULL, 'm' },
+ { "this-boot", no_argument, NULL, ARG_THIS_BOOT }, /* deprecated */
+ { "boot", optional_argument, NULL, 'b' },
+ { "list-boots", no_argument, NULL, ARG_LIST_BOOTS },
+ { "dmesg", no_argument, NULL, 'k' },
+ { "system", no_argument, NULL, ARG_SYSTEM },
+ { "user", no_argument, NULL, ARG_USER },
+ { "directory", required_argument, NULL, 'D' },
+ { "file", required_argument, NULL, ARG_FILE },
+ { "root", required_argument, NULL, ARG_ROOT },
+ { "image", required_argument, NULL, ARG_IMAGE },
+ { "header", no_argument, NULL, ARG_HEADER },
+ { "identifier", required_argument, NULL, 't' },
+ { "priority", required_argument, NULL, 'p' },
+ { "facility", required_argument, NULL, ARG_FACILITY },
+ { "grep", required_argument, NULL, 'g' },
+ { "case-sensitive", optional_argument, NULL, ARG_CASE_SENSITIVE },
+ { "setup-keys", no_argument, NULL, ARG_SETUP_KEYS },
+ { "interval", required_argument, NULL, ARG_INTERVAL },
+ { "verify", no_argument, NULL, ARG_VERIFY },
+ { "verify-key", required_argument, NULL, ARG_VERIFY_KEY },
+ { "disk-usage", no_argument, NULL, ARG_DISK_USAGE },
+ { "cursor", required_argument, NULL, 'c' },
+ { "cursor-file", required_argument, NULL, ARG_CURSOR_FILE },
+ { "after-cursor", required_argument, NULL, ARG_AFTER_CURSOR },
+ { "show-cursor", no_argument, NULL, ARG_SHOW_CURSOR },
+ { "since", required_argument, NULL, 'S' },
+ { "until", required_argument, NULL, 'U' },
+ { "unit", required_argument, NULL, 'u' },
+ { "user-unit", required_argument, NULL, ARG_USER_UNIT },
+ { "field", required_argument, NULL, 'F' },
+ { "fields", no_argument, NULL, 'N' },
+ { "catalog", no_argument, NULL, 'x' },
+ { "list-catalog", no_argument, NULL, ARG_LIST_CATALOG },
+ { "dump-catalog", no_argument, NULL, ARG_DUMP_CATALOG },
+ { "update-catalog", no_argument, NULL, ARG_UPDATE_CATALOG },
+ { "reverse", no_argument, NULL, 'r' },
+ { "machine", required_argument, NULL, 'M' },
+ { "utc", no_argument, NULL, ARG_UTC },
+ { "flush", no_argument, NULL, ARG_FLUSH },
+ { "relinquish-var", no_argument, NULL, ARG_RELINQUISH_VAR },
+ { "smart-relinquish-var", no_argument, NULL, ARG_SMART_RELINQUISH_VAR },
+ { "sync", no_argument, NULL, ARG_SYNC },
+ { "rotate", no_argument, NULL, ARG_ROTATE },
+ { "vacuum-size", required_argument, NULL, ARG_VACUUM_SIZE },
+ { "vacuum-files", required_argument, NULL, ARG_VACUUM_FILES },
+ { "vacuum-time", required_argument, NULL, ARG_VACUUM_TIME },
+ { "no-hostname", no_argument, NULL, ARG_NO_HOSTNAME },
+ { "output-fields", required_argument, NULL, ARG_OUTPUT_FIELDS },
+ { "namespace", required_argument, NULL, ARG_NAMESPACE },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hefo:aln::qmb::kD:p:g:c:S:U:t:u:NF:xrM:", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case 'e':
+ arg_pager_flags |= PAGER_JUMP_TO_END;
+
+ if (arg_lines == ARG_LINES_DEFAULT)
+ arg_lines = 1000;
+
+ break;
+
+ case 'f':
+ arg_follow = true;
+ break;
+
+ case 'o':
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(output_mode, OutputMode, _OUTPUT_MODE_MAX);
+ return 0;
+ }
+
+ arg_output = output_mode_from_string(optarg);
+ if (arg_output < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown output format '%s'.", optarg);
+
+ if (IN_SET(arg_output, OUTPUT_EXPORT, OUTPUT_JSON, OUTPUT_JSON_PRETTY, OUTPUT_JSON_SSE, OUTPUT_JSON_SEQ, OUTPUT_CAT))
+ arg_quiet = true;
+
+ break;
+
+ case 'l':
+ arg_full = true;
+ break;
+
+ case ARG_NO_FULL:
+ arg_full = false;
+ break;
+
+ case 'a':
+ arg_all = true;
+ break;
+
+ case 'n':
+ if (optarg) {
+ if (streq(optarg, "all"))
+ arg_lines = ARG_LINES_ALL;
+ else {
+ r = safe_atoi(optarg, &arg_lines);
+ if (r < 0 || arg_lines < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse lines '%s'", optarg);
+ }
+ } else {
+ arg_lines = 10;
+
+ /* Hmm, no argument? Maybe the next
+ * word on the command line is
+ * supposed to be the argument? Let's
+ * see if there is one, and is
+ * parsable. */
+ if (optind < argc) {
+ int n;
+ if (streq(argv[optind], "all")) {
+ arg_lines = ARG_LINES_ALL;
+ optind++;
+ } else if (safe_atoi(argv[optind], &n) >= 0 && n >= 0) {
+ arg_lines = n;
+ optind++;
+ }
+ }
+ }
+
+ break;
+
+ case ARG_NO_TAIL:
+ arg_no_tail = true;
+ break;
+
+ case ARG_NEW_ID128:
+ arg_action = ACTION_NEW_ID128;
+ break;
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case 'm':
+ arg_merge = true;
+ break;
+
+ case ARG_THIS_BOOT:
+ arg_boot = true;
+ arg_boot_id = SD_ID128_NULL;
+ arg_boot_offset = 0;
+ break;
+
+ case 'b':
+ arg_boot = true;
+ arg_boot_id = SD_ID128_NULL;
+ arg_boot_offset = 0;
+
+ if (optarg) {
+ r = parse_boot_descriptor(optarg, &arg_boot_id, &arg_boot_offset);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse boot descriptor '%s'", optarg);
+
+ arg_boot = r;
+
+ /* Hmm, no argument? Maybe the next
+ * word on the command line is
+ * supposed to be the argument? Let's
+ * see if there is one and is parsable
+ * as a boot descriptor... */
+ } else if (optind < argc) {
+ r = parse_boot_descriptor(argv[optind], &arg_boot_id, &arg_boot_offset);
+ if (r >= 0) {
+ arg_boot = r;
+ optind++;
+ }
+ }
+ break;
+
+ case ARG_LIST_BOOTS:
+ arg_action = ACTION_LIST_BOOTS;
+ break;
+
+ case 'k':
+ arg_boot = arg_dmesg = true;
+ break;
+
+ case ARG_SYSTEM:
+ arg_journal_type |= SD_JOURNAL_SYSTEM;
+ break;
+
+ case ARG_USER:
+ arg_journal_type |= SD_JOURNAL_CURRENT_USER;
+ break;
+
+ case 'M':
+ arg_machine = optarg;
+ break;
+
+ case ARG_NAMESPACE:
+ if (streq(optarg, "*")) {
+ arg_namespace_flags = SD_JOURNAL_ALL_NAMESPACES;
+ arg_namespace = NULL;
+ } else if (startswith(optarg, "+")) {
+ arg_namespace_flags = SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE;
+ arg_namespace = optarg + 1;
+ } else if (isempty(optarg)) {
+ arg_namespace_flags = 0;
+ arg_namespace = NULL;
+ } else {
+ arg_namespace_flags = 0;
+ arg_namespace = optarg;
+ }
+
+ break;
+
+ case 'D':
+ arg_directory = optarg;
+ break;
+
+ case ARG_FILE:
+ if (streq(optarg, "-"))
+ /* An undocumented feature: we can read journal files from STDIN. We don't document
+ * this though, since after all we only support this for mmap-able, seekable files, and
+ * not for example pipes which are probably the primary usecase for reading things from
+ * STDIN. To avoid confusion we hence don't document this feature. */
+ arg_file_stdin = true;
+ else {
+ r = glob_extend(&arg_file, optarg, GLOB_NOCHECK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add paths: %m");
+ }
+ break;
+
+ case ARG_ROOT:
+ r = parse_path_argument_and_warn(optarg, /* suppress_root= */ true, &arg_root);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_IMAGE:
+ r = parse_path_argument_and_warn(optarg, /* suppress_root= */ false, &arg_image);
+ if (r < 0)
+ return r;
+ break;
+
+ case 'c':
+ arg_cursor = optarg;
+ break;
+
+ case ARG_CURSOR_FILE:
+ arg_cursor_file = optarg;
+ break;
+
+ case ARG_AFTER_CURSOR:
+ arg_after_cursor = optarg;
+ break;
+
+ case ARG_SHOW_CURSOR:
+ arg_show_cursor = true;
+ break;
+
+ case ARG_HEADER:
+ arg_action = ACTION_PRINT_HEADER;
+ break;
+
+ case ARG_VERIFY:
+ arg_action = ACTION_VERIFY;
+ break;
+
+ case ARG_DISK_USAGE:
+ arg_action = ACTION_DISK_USAGE;
+ break;
+
+ case ARG_VACUUM_SIZE:
+ r = parse_size(optarg, 1024, &arg_vacuum_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse vacuum size: %s", optarg);
+
+ arg_action = arg_action == ACTION_ROTATE ? ACTION_ROTATE_AND_VACUUM : ACTION_VACUUM;
+ break;
+
+ case ARG_VACUUM_FILES:
+ r = safe_atou64(optarg, &arg_vacuum_n_files);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse vacuum files: %s", optarg);
+
+ arg_action = arg_action == ACTION_ROTATE ? ACTION_ROTATE_AND_VACUUM : ACTION_VACUUM;
+ break;
+
+ case ARG_VACUUM_TIME:
+ r = parse_sec(optarg, &arg_vacuum_time);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse vacuum time: %s", optarg);
+
+ arg_action = arg_action == ACTION_ROTATE ? ACTION_ROTATE_AND_VACUUM : ACTION_VACUUM;
+ break;
+
+#if HAVE_GCRYPT
+ case ARG_FORCE:
+ arg_force = true;
+ break;
+
+ case ARG_SETUP_KEYS:
+ arg_action = ACTION_SETUP_KEYS;
+ break;
+
+ case ARG_VERIFY_KEY:
+ r = free_and_strdup(&arg_verify_key, optarg);
+ if (r < 0)
+ return r;
+ /* Use memset not explicit_bzero() or similar so this doesn't look confusing
+ * in ps or htop output. */
+ memset(optarg, 'x', strlen(optarg));
+
+ arg_action = ACTION_VERIFY;
+ arg_merge = false;
+ break;
+
+ case ARG_INTERVAL:
+ r = parse_sec(optarg, &arg_interval);
+ if (r < 0 || arg_interval <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse sealing key change interval: %s", optarg);
+ break;
+#else
+ case ARG_SETUP_KEYS:
+ case ARG_VERIFY_KEY:
+ case ARG_INTERVAL:
+ case ARG_FORCE:
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Compiled without forward-secure sealing support.");
+#endif
+
+ case 'p': {
+ const char *dots;
+
+ dots = strstr(optarg, "..");
+ if (dots) {
+ _cleanup_free_ char *a = NULL;
+ int from, to, i;
+
+ /* a range */
+ a = strndup(optarg, dots - optarg);
+ if (!a)
+ return log_oom();
+
+ from = log_level_from_string(a);
+ to = log_level_from_string(dots + 2);
+
+ if (from < 0 || to < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse log level range %s", optarg);
+
+ arg_priorities = 0;
+
+ if (from < to) {
+ for (i = from; i <= to; i++)
+ arg_priorities |= 1 << i;
+ } else {
+ for (i = to; i <= from; i++)
+ arg_priorities |= 1 << i;
+ }
+
+ } else {
+ int p, i;
+
+ p = log_level_from_string(optarg);
+ if (p < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown log level %s", optarg);
+
+ arg_priorities = 0;
+
+ for (i = 0; i <= p; i++)
+ arg_priorities |= 1 << i;
+ }
+
+ break;
+ }
+
+ case ARG_FACILITY: {
+ const char *p;
+
+ for (p = optarg;;) {
+ _cleanup_free_ char *fac = NULL;
+ int num;
+
+ r = extract_first_word(&p, &fac, ",", 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse facilities: %s", optarg);
+ if (r == 0)
+ break;
+
+ if (streq(fac, "help")) {
+ help_facilities();
+ return 0;
+ }
+
+ num = log_facility_unshifted_from_string(fac);
+ if (num < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Bad --facility= argument \"%s\".", fac);
+
+ if (set_ensure_put(&arg_facilities, NULL, INT_TO_PTR(num)) < 0)
+ return log_oom();
+ }
+
+ break;
+ }
+
+#if HAVE_PCRE2
+ case 'g':
+ arg_pattern = optarg;
+ break;
+
+ case ARG_CASE_SENSITIVE:
+ if (optarg) {
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Bad --case-sensitive= argument \"%s\": %m", optarg);
+ arg_case_sensitive = r;
+ } else
+ arg_case_sensitive = true;
+
+ break;
+#else
+ case 'g':
+ case ARG_CASE_SENSITIVE:
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Compiled without pattern matching support");
+#endif
+
+ case 'S':
+ r = parse_timestamp(optarg, &arg_since);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse timestamp: %s", optarg);
+ arg_since_set = true;
+ break;
+
+ case 'U':
+ r = parse_timestamp(optarg, &arg_until);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse timestamp: %s", optarg);
+ arg_until_set = true;
+ break;
+
+ case 't':
+ r = strv_extend(&arg_syslog_identifier, optarg);
+ if (r < 0)
+ return log_oom();
+ break;
+
+ case 'u':
+ r = strv_extend(&arg_system_units, optarg);
+ if (r < 0)
+ return log_oom();
+ break;
+
+ case ARG_USER_UNIT:
+ r = strv_extend(&arg_user_units, optarg);
+ if (r < 0)
+ return log_oom();
+ break;
+
+ case 'F':
+ arg_action = ACTION_LIST_FIELDS;
+ arg_field = optarg;
+ break;
+
+ case 'N':
+ arg_action = ACTION_LIST_FIELD_NAMES;
+ break;
+
+ case ARG_NO_HOSTNAME:
+ arg_no_hostname = true;
+ break;
+
+ case 'x':
+ arg_catalog = true;
+ break;
+
+ case ARG_LIST_CATALOG:
+ arg_action = ACTION_LIST_CATALOG;
+ break;
+
+ case ARG_DUMP_CATALOG:
+ arg_action = ACTION_DUMP_CATALOG;
+ break;
+
+ case ARG_UPDATE_CATALOG:
+ arg_action = ACTION_UPDATE_CATALOG;
+ break;
+
+ case 'r':
+ arg_reverse = true;
+ break;
+
+ case ARG_UTC:
+ arg_utc = true;
+ break;
+
+ case ARG_FLUSH:
+ arg_action = ACTION_FLUSH;
+ break;
+
+ case ARG_SMART_RELINQUISH_VAR: {
+ int root_mnt_id, log_mnt_id;
+
+ /* Try to be smart about relinquishing access to /var/log/journal/ during shutdown:
+ * if it's on the same mount as the root file system there's no point in
+ * relinquishing access and we can leave journald write to it until the very last
+ * moment. */
+
+ r = path_get_mnt_id("/", &root_mnt_id);
+ if (r < 0)
+ log_debug_errno(r, "Failed to get root mount ID, ignoring: %m");
+ else {
+ r = path_get_mnt_id("/var/log/journal/", &log_mnt_id);
+ if (r < 0)
+ log_debug_errno(r, "Failed to get journal directory mount ID, ignoring: %m");
+ else if (root_mnt_id == log_mnt_id) {
+ log_debug("/var/log/journal/ is on root file system, not relinquishing access to /var.");
+ return 0;
+ } else
+ log_debug("/var/log/journal/ is not on the root file system, relinquishing access to it.");
+ }
+
+ _fallthrough_;
+ }
+
+ case ARG_RELINQUISH_VAR:
+ arg_action = ACTION_RELINQUISH_VAR;
+ break;
+
+ case ARG_ROTATE:
+ arg_action = arg_action == ACTION_VACUUM ? ACTION_ROTATE_AND_VACUUM : ACTION_ROTATE;
+ break;
+
+ case ARG_SYNC:
+ arg_action = ACTION_SYNC;
+ break;
+
+ case ARG_OUTPUT_FIELDS: {
+ _cleanup_strv_free_ char **v = NULL;
+
+ v = strv_split(optarg, ",");
+ if (!v)
+ return log_oom();
+
+ if (!arg_output_fields)
+ arg_output_fields = TAKE_PTR(v);
+ else {
+ r = strv_extend_strv(&arg_output_fields, v, true);
+ if (r < 0)
+ return log_oom();
+ }
+ break;
+ }
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_follow && !arg_no_tail && !arg_since && arg_lines == ARG_LINES_DEFAULT)
+ arg_lines = 10;
+
+ if (!!arg_directory + !!arg_file + !!arg_machine + !!arg_root + !!arg_image > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Please specify at most one of -D/--directory=, --file=, -M/--machine=, --root=, --image=.");
+
+ if (arg_since_set && arg_until_set && arg_since > arg_until)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--since= must be before --until=.");
+
+ if (!!arg_cursor + !!arg_after_cursor + !!arg_since_set > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Please specify only one of --since=, --cursor=, and --after-cursor.");
+
+ if (arg_follow && arg_reverse)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Please specify either --reverse= or --follow=, not both.");
+
+ if (!IN_SET(arg_action, ACTION_SHOW, ACTION_DUMP_CATALOG, ACTION_LIST_CATALOG) && optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Extraneous arguments starting with '%s'",
+ argv[optind]);
+
+ if ((arg_boot || arg_action == ACTION_LIST_BOOTS) && arg_merge)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Using --boot or --list-boots with --merge is not supported.");
+
+ if (!strv_isempty(arg_system_units) && arg_journal_type == SD_JOURNAL_CURRENT_USER) {
+ /* Specifying --user and --unit= at the same time makes no sense (as the former excludes the user
+ * journal, but the latter excludes the system journal, thus resulting in empty output). Let's be nice
+ * to users, and automatically turn --unit= into --user-unit= if combined with --user. */
+ r = strv_extend_strv(&arg_user_units, arg_system_units, true);
+ if (r < 0)
+ return r;
+
+ arg_system_units = strv_free(arg_system_units);
+ }
+
+#if HAVE_PCRE2
+ if (arg_pattern) {
+ unsigned flags;
+
+ r = dlopen_pcre2();
+ if (r < 0)
+ return r;
+
+ if (arg_case_sensitive >= 0)
+ flags = !arg_case_sensitive * PCRE2_CASELESS;
+ else {
+ _cleanup_(sym_pcre2_match_data_freep) pcre2_match_data *md = NULL;
+ bool has_case;
+ _cleanup_(sym_pcre2_code_freep) pcre2_code *cs = NULL;
+
+ md = sym_pcre2_match_data_create(1, NULL);
+ if (!md)
+ return log_oom();
+
+ r = pattern_compile("[[:upper:]]", 0, &cs);
+ if (r < 0)
+ return r;
+
+ r = sym_pcre2_match(cs, (PCRE2_SPTR8) arg_pattern, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL);
+ has_case = r >= 0;
+
+ flags = !has_case * PCRE2_CASELESS;
+ }
+
+ log_debug("Doing case %s matching based on %s",
+ flags & PCRE2_CASELESS ? "insensitive" : "sensitive",
+ arg_case_sensitive >= 0 ? "request" : "pattern casing");
+
+ r = pattern_compile(arg_pattern, flags, &arg_compiled_pattern);
+ if (r < 0)
+ return r;
+ }
+#endif
+
+ return 1;
+}
+
+static int add_matches(sd_journal *j, char **args) {
+ char **i;
+ bool have_term = false;
+
+ assert(j);
+
+ STRV_FOREACH(i, args) {
+ int r;
+
+ if (streq(*i, "+")) {
+ if (!have_term)
+ break;
+ r = sd_journal_add_disjunction(j);
+ have_term = false;
+
+ } else if (path_is_absolute(*i)) {
+ _cleanup_free_ char *p = NULL, *t = NULL, *t2 = NULL, *interpreter = NULL;
+ struct stat st;
+
+ r = chase_symlinks(*i, NULL, CHASE_TRAIL_SLASH, &p, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Couldn't canonicalize path: %m");
+
+ if (lstat(p, &st) < 0)
+ return log_error_errno(errno, "Couldn't stat file: %m");
+
+ if (S_ISREG(st.st_mode) && (0111 & st.st_mode)) {
+ if (executable_is_script(p, &interpreter) > 0) {
+ _cleanup_free_ char *comm;
+
+ comm = strndup(basename(p), 15);
+ if (!comm)
+ return log_oom();
+
+ t = strjoin("_COMM=", comm);
+ if (!t)
+ return log_oom();
+
+ /* Append _EXE only if the interpreter is not a link.
+ Otherwise, it might be outdated often. */
+ if (lstat(interpreter, &st) == 0 && !S_ISLNK(st.st_mode)) {
+ t2 = strjoin("_EXE=", interpreter);
+ if (!t2)
+ return log_oom();
+ }
+ } else {
+ t = strjoin("_EXE=", p);
+ if (!t)
+ return log_oom();
+ }
+
+ r = sd_journal_add_match(j, t, 0);
+
+ if (r >=0 && t2)
+ r = sd_journal_add_match(j, t2, 0);
+
+ } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
+ r = add_matches_for_device(j, p);
+ if (r < 0)
+ return r;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "File is neither a device node, nor regular file, nor executable: %s",
+ *i);
+
+ have_term = true;
+ } else {
+ r = sd_journal_add_match(j, *i, 0);
+ have_term = true;
+ }
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match '%s': %m", *i);
+ }
+
+ if (!strv_isempty(args) && !have_term)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "\"+\" can only be used between terms");
+
+ return 0;
+}
+
+static void boot_id_free_all(BootId *l) {
+
+ while (l) {
+ BootId *i = l;
+ LIST_REMOVE(boot_list, l, i);
+ free(i);
+ }
+}
+
+static int discover_next_boot(sd_journal *j,
+ sd_id128_t previous_boot_id,
+ bool advance_older,
+ BootId **ret) {
+
+ _cleanup_free_ BootId *next_boot = NULL;
+ char match[9+32+1] = "_BOOT_ID=";
+ sd_id128_t boot_id;
+ int r;
+
+ assert(j);
+ assert(ret);
+
+ /* We expect the journal to be on the last position of a boot
+ * (in relation to the direction we are going), so that the next
+ * invocation of sd_journal_next/previous will be from a different
+ * boot. We then collect any information we desire and then jump
+ * to the last location of the new boot by using a _BOOT_ID match
+ * coming from the other journal direction. */
+
+ /* Make sure we aren't restricted by any _BOOT_ID matches, so that
+ * we can actually advance to a *different* boot. */
+ sd_journal_flush_matches(j);
+
+ do {
+ if (advance_older)
+ r = sd_journal_previous(j);
+ else
+ r = sd_journal_next(j);
+ if (r < 0)
+ return r;
+ else if (r == 0)
+ return 0; /* End of journal, yay. */
+
+ r = sd_journal_get_monotonic_usec(j, NULL, &boot_id);
+ if (r < 0)
+ return r;
+
+ /* We iterate through this in a loop, until the boot ID differs from the previous one. Note that
+ * normally, this will only require a single iteration, as we seeked to the last entry of the previous
+ * boot entry already. However, it might happen that the per-journal-field entry arrays are less
+ * complete than the main entry array, and hence might reference an entry that's not actually the last
+ * one of the boot ID as last one. Let's hence use the per-field array is initial seek position to
+ * speed things up, but let's not trust that it is complete, and hence, manually advance as
+ * necessary. */
+
+ } while (sd_id128_equal(boot_id, previous_boot_id));
+
+ next_boot = new0(BootId, 1);
+ if (!next_boot)
+ return -ENOMEM;
+
+ next_boot->id = boot_id;
+
+ r = sd_journal_get_realtime_usec(j, &next_boot->first);
+ if (r < 0)
+ return r;
+
+ /* Now seek to the last occurrence of this boot ID. */
+ sd_id128_to_string(next_boot->id, match + 9);
+ r = sd_journal_add_match(j, match, sizeof(match) - 1);
+ if (r < 0)
+ return r;
+
+ if (advance_older)
+ r = sd_journal_seek_head(j);
+ else
+ r = sd_journal_seek_tail(j);
+ if (r < 0)
+ return r;
+
+ if (advance_older)
+ r = sd_journal_next(j);
+ else
+ r = sd_journal_previous(j);
+ if (r < 0)
+ return r;
+ else if (r == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENODATA),
+ "Whoopsie! We found a boot ID but can't read its last entry."); /* This shouldn't happen. We just came from this very boot ID. */
+
+ r = sd_journal_get_realtime_usec(j, &next_boot->last);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(next_boot);
+
+ return 0;
+}
+
+static int get_boots(
+ sd_journal *j,
+ BootId **boots,
+ sd_id128_t *boot_id,
+ int offset) {
+
+ bool skip_once;
+ int r, count = 0;
+ BootId *head = NULL, *tail = NULL, *id;
+ const bool advance_older = boot_id && offset <= 0;
+ sd_id128_t previous_boot_id;
+
+ assert(j);
+
+ /* Adjust for the asymmetry that offset 0 is
+ * the last (and current) boot, while 1 is considered the
+ * (chronological) first boot in the journal. */
+ skip_once = boot_id && sd_id128_is_null(*boot_id) && offset <= 0;
+
+ /* Advance to the earliest/latest occurrence of our reference
+ * boot ID (taking our lookup direction into account), so that
+ * discover_next_boot() can do its job.
+ * If no reference is given, the journal head/tail will do,
+ * they're "virtual" boots after all. */
+ if (boot_id && !sd_id128_is_null(*boot_id)) {
+ char match[9+32+1] = "_BOOT_ID=";
+
+ sd_journal_flush_matches(j);
+
+ sd_id128_to_string(*boot_id, match + 9);
+ r = sd_journal_add_match(j, match, sizeof(match) - 1);
+ if (r < 0)
+ return r;
+
+ if (advance_older)
+ r = sd_journal_seek_head(j); /* seek to oldest */
+ else
+ r = sd_journal_seek_tail(j); /* seek to newest */
+ if (r < 0)
+ return r;
+
+ if (advance_older)
+ r = sd_journal_next(j); /* read the oldest entry */
+ else
+ r = sd_journal_previous(j); /* read the most recently added entry */
+ if (r < 0)
+ return r;
+ else if (r == 0)
+ goto finish;
+ else if (offset == 0) {
+ count = 1;
+ goto finish;
+ }
+
+ /* At this point the read pointer is positioned at the oldest/newest occurrence of the reference boot
+ * ID. After flushing the matches, one more invocation of _previous()/_next() will hence place us at
+ * the following entry, which must then have an older/newer boot ID */
+ } else {
+
+ if (advance_older)
+ r = sd_journal_seek_tail(j); /* seek to newest */
+ else
+ r = sd_journal_seek_head(j); /* seek to oldest */
+ if (r < 0)
+ return r;
+
+ /* No sd_journal_next()/_previous() here.
+ *
+ * At this point the read pointer is positioned after the newest/before the oldest entry in the whole
+ * journal. The next invocation of _previous()/_next() will hence position us at the newest/oldest
+ * entry we have. */
+ }
+
+ previous_boot_id = SD_ID128_NULL;
+ for (;;) {
+ _cleanup_free_ BootId *current = NULL;
+
+ r = discover_next_boot(j, previous_boot_id, advance_older, &current);
+ if (r < 0) {
+ boot_id_free_all(head);
+ return r;
+ }
+
+ if (!current)
+ break;
+
+ previous_boot_id = current->id;
+
+ if (boot_id) {
+ if (!skip_once)
+ offset += advance_older ? 1 : -1;
+ skip_once = false;
+
+ if (offset == 0) {
+ count = 1;
+ *boot_id = current->id;
+ break;
+ }
+ } else {
+ LIST_FOREACH(boot_list, id, head) {
+ if (sd_id128_equal(id->id, current->id)) {
+ /* boot id already stored, something wrong with the journal files */
+ /* exiting as otherwise this problem would cause forever loop */
+ goto finish;
+ }
+ }
+ LIST_INSERT_AFTER(boot_list, head, tail, current);
+ tail = TAKE_PTR(current);
+ count++;
+ }
+ }
+
+finish:
+ if (boots)
+ *boots = head;
+
+ sd_journal_flush_matches(j);
+
+ return count;
+}
+
+static int list_boots(sd_journal *j) {
+ int w, i, count;
+ BootId *id, *all_ids;
+
+ assert(j);
+
+ count = get_boots(j, &all_ids, NULL, 0);
+ if (count < 0)
+ return log_error_errno(count, "Failed to determine boots: %m");
+ if (count == 0)
+ return count;
+
+ (void) pager_open(arg_pager_flags);
+
+ /* numbers are one less, but we need an extra char for the sign */
+ w = DECIMAL_STR_WIDTH(count - 1) + 1;
+
+ i = 0;
+ LIST_FOREACH(boot_list, id, all_ids) {
+ char a[FORMAT_TIMESTAMP_MAX], b[FORMAT_TIMESTAMP_MAX];
+
+ printf("% *i " SD_ID128_FORMAT_STR " %s—%s\n",
+ w, i - count + 1,
+ SD_ID128_FORMAT_VAL(id->id),
+ format_timestamp_maybe_utc(a, sizeof(a), id->first),
+ format_timestamp_maybe_utc(b, sizeof(b), id->last));
+ i++;
+ }
+
+ boot_id_free_all(all_ids);
+
+ return 0;
+}
+
+static int add_boot(sd_journal *j) {
+ char match[9+32+1] = "_BOOT_ID=";
+ sd_id128_t boot_id;
+ int r;
+
+ assert(j);
+
+ if (!arg_boot)
+ return 0;
+
+ /* Take a shortcut and use the current boot_id, which we can do very quickly.
+ * We can do this only when we logs are coming from the current machine,
+ * so take the slow path if log location is specified. */
+ if (arg_boot_offset == 0 && sd_id128_is_null(arg_boot_id) &&
+ !arg_directory && !arg_file && !arg_root)
+ return add_match_this_boot(j, arg_machine);
+
+ boot_id = arg_boot_id;
+ r = get_boots(j, NULL, &boot_id, arg_boot_offset);
+ assert(r <= 1);
+ if (r <= 0) {
+ const char *reason = (r == 0) ? "No such boot ID in journal" : strerror_safe(r);
+
+ if (sd_id128_is_null(arg_boot_id))
+ log_error("Data from the specified boot (%+i) is not available: %s",
+ arg_boot_offset, reason);
+ else
+ log_error("Data from the specified boot ("SD_ID128_FORMAT_STR") is not available: %s",
+ SD_ID128_FORMAT_VAL(arg_boot_id), reason);
+
+ return r == 0 ? -ENODATA : r;
+ }
+
+ sd_id128_to_string(boot_id, match + 9);
+
+ r = sd_journal_add_match(j, match, sizeof(match) - 1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match: %m");
+
+ r = sd_journal_add_conjunction(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add conjunction: %m");
+
+ return 0;
+}
+
+static int add_dmesg(sd_journal *j) {
+ int r;
+ assert(j);
+
+ if (!arg_dmesg)
+ return 0;
+
+ r = sd_journal_add_match(j, "_TRANSPORT=kernel",
+ STRLEN("_TRANSPORT=kernel"));
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match: %m");
+
+ r = sd_journal_add_conjunction(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add conjunction: %m");
+
+ return 0;
+}
+
+static int get_possible_units(
+ sd_journal *j,
+ const char *fields,
+ char **patterns,
+ Set **units) {
+
+ _cleanup_set_free_free_ Set *found;
+ const char *field;
+ int r;
+
+ found = set_new(&string_hash_ops);
+ if (!found)
+ return -ENOMEM;
+
+ NULSTR_FOREACH(field, fields) {
+ const void *data;
+ size_t size;
+
+ r = sd_journal_query_unique(j, field);
+ if (r < 0)
+ return r;
+
+ SD_JOURNAL_FOREACH_UNIQUE(j, data, size) {
+ char **pattern, *eq;
+ size_t prefix;
+ _cleanup_free_ char *u = NULL;
+
+ eq = memchr(data, '=', size);
+ if (eq)
+ prefix = eq - (char*) data + 1;
+ else
+ prefix = 0;
+
+ u = strndup((char*) data + prefix, size - prefix);
+ if (!u)
+ return -ENOMEM;
+
+ STRV_FOREACH(pattern, patterns)
+ if (fnmatch(*pattern, u, FNM_NOESCAPE) == 0) {
+ log_debug("Matched %s with pattern %s=%s", u, field, *pattern);
+
+ r = set_consume(found, u);
+ u = NULL;
+ if (r < 0 && r != -EEXIST)
+ return r;
+
+ break;
+ }
+ }
+ }
+
+ *units = TAKE_PTR(found);
+
+ return 0;
+}
+
+/* This list is supposed to return the superset of unit names
+ * possibly matched by rules added with add_matches_for_unit... */
+#define SYSTEM_UNITS \
+ "_SYSTEMD_UNIT\0" \
+ "COREDUMP_UNIT\0" \
+ "UNIT\0" \
+ "OBJECT_SYSTEMD_UNIT\0" \
+ "_SYSTEMD_SLICE\0"
+
+/* ... and add_matches_for_user_unit */
+#define USER_UNITS \
+ "_SYSTEMD_USER_UNIT\0" \
+ "USER_UNIT\0" \
+ "COREDUMP_USER_UNIT\0" \
+ "OBJECT_SYSTEMD_USER_UNIT\0" \
+ "_SYSTEMD_USER_SLICE\0"
+
+static int add_units(sd_journal *j) {
+ _cleanup_strv_free_ char **patterns = NULL;
+ int r, count = 0;
+ char **i;
+
+ assert(j);
+
+ STRV_FOREACH(i, arg_system_units) {
+ _cleanup_free_ char *u = NULL;
+
+ r = unit_name_mangle(*i, UNIT_NAME_MANGLE_GLOB | (arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN), &u);
+ if (r < 0)
+ return r;
+
+ if (string_is_glob(u)) {
+ r = strv_push(&patterns, u);
+ if (r < 0)
+ return r;
+ u = NULL;
+ } else {
+ r = add_matches_for_unit(j, u);
+ if (r < 0)
+ return r;
+ r = sd_journal_add_disjunction(j);
+ if (r < 0)
+ return r;
+ count++;
+ }
+ }
+
+ if (!strv_isempty(patterns)) {
+ _cleanup_set_free_free_ Set *units = NULL;
+ char *u;
+
+ r = get_possible_units(j, SYSTEM_UNITS, patterns, &units);
+ if (r < 0)
+ return r;
+
+ SET_FOREACH(u, units) {
+ r = add_matches_for_unit(j, u);
+ if (r < 0)
+ return r;
+ r = sd_journal_add_disjunction(j);
+ if (r < 0)
+ return r;
+ count++;
+ }
+ }
+
+ patterns = strv_free(patterns);
+
+ STRV_FOREACH(i, arg_user_units) {
+ _cleanup_free_ char *u = NULL;
+
+ r = unit_name_mangle(*i, UNIT_NAME_MANGLE_GLOB | (arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN), &u);
+ if (r < 0)
+ return r;
+
+ if (string_is_glob(u)) {
+ r = strv_push(&patterns, u);
+ if (r < 0)
+ return r;
+ u = NULL;
+ } else {
+ r = add_matches_for_user_unit(j, u, getuid());
+ if (r < 0)
+ return r;
+ r = sd_journal_add_disjunction(j);
+ if (r < 0)
+ return r;
+ count++;
+ }
+ }
+
+ if (!strv_isempty(patterns)) {
+ _cleanup_set_free_free_ Set *units = NULL;
+ char *u;
+
+ r = get_possible_units(j, USER_UNITS, patterns, &units);
+ if (r < 0)
+ return r;
+
+ SET_FOREACH(u, units) {
+ r = add_matches_for_user_unit(j, u, getuid());
+ if (r < 0)
+ return r;
+ r = sd_journal_add_disjunction(j);
+ if (r < 0)
+ return r;
+ count++;
+ }
+ }
+
+ /* Complain if the user request matches but nothing whatsoever was
+ * found, since otherwise everything would be matched. */
+ if (!(strv_isempty(arg_system_units) && strv_isempty(arg_user_units)) && count == 0)
+ return -ENODATA;
+
+ r = sd_journal_add_conjunction(j);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int add_priorities(sd_journal *j) {
+ char match[] = "PRIORITY=0";
+ int i, r;
+ assert(j);
+
+ if (arg_priorities == 0xFF)
+ return 0;
+
+ for (i = LOG_EMERG; i <= LOG_DEBUG; i++)
+ if (arg_priorities & (1 << i)) {
+ match[sizeof(match)-2] = '0' + i;
+
+ r = sd_journal_add_match(j, match, strlen(match));
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match: %m");
+ }
+
+ r = sd_journal_add_conjunction(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add conjunction: %m");
+
+ return 0;
+}
+
+static int add_facilities(sd_journal *j) {
+ void *p;
+ int r;
+
+ SET_FOREACH(p, arg_facilities) {
+ char match[STRLEN("SYSLOG_FACILITY=") + DECIMAL_STR_MAX(int)];
+
+ xsprintf(match, "SYSLOG_FACILITY=%d", PTR_TO_INT(p));
+
+ r = sd_journal_add_match(j, match, strlen(match));
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match: %m");
+ }
+
+ return 0;
+}
+
+static int add_syslog_identifier(sd_journal *j) {
+ int r;
+ char **i;
+
+ assert(j);
+
+ STRV_FOREACH(i, arg_syslog_identifier) {
+ _cleanup_free_ char *u = NULL;
+
+ u = strjoin("SYSLOG_IDENTIFIER=", *i);
+ if (!u)
+ return -ENOMEM;
+ r = sd_journal_add_match(j, u, 0);
+ if (r < 0)
+ return r;
+ r = sd_journal_add_disjunction(j);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_journal_add_conjunction(j);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+#if HAVE_GCRYPT
+static int format_journal_url(
+ const void *seed,
+ size_t seed_size,
+ uint64_t start,
+ uint64_t interval,
+ const char *hn,
+ sd_id128_t machine,
+ bool full,
+ char **ret_url) {
+ _cleanup_free_ char *url = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ size_t url_size = 0;
+ int r;
+
+ assert(seed);
+ assert(seed_size > 0);
+
+ f = open_memstream_unlocked(&url, &url_size);
+ if (!f)
+ return -ENOMEM;
+
+ if (full)
+ fputs("fss://", f);
+
+ for (size_t i = 0; i < seed_size; i++) {
+ if (i > 0 && i % 3 == 0)
+ fputc('-', f);
+ fprintf(f, "%02x", ((uint8_t*) seed)[i]);
+ }
+
+ fprintf(f, "/%"PRIx64"-%"PRIx64, start, interval);
+
+ if (full) {
+ fprintf(f, "?machine=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(machine));
+ if (hn)
+ fprintf(f, ";hostname=%s", hn);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ f = safe_fclose(f);
+ *ret_url = TAKE_PTR(url);
+ return 0;
+}
+#endif
+
+static int setup_keys(void) {
+#if HAVE_GCRYPT
+ size_t mpk_size, seed_size, state_size;
+ _cleanup_(unlink_and_freep) char *k = NULL;
+ _cleanup_free_ char *p = NULL;
+ uint8_t *mpk, *seed, *state;
+ _cleanup_close_ int fd = -1;
+ sd_id128_t machine, boot;
+ struct stat st;
+ uint64_t n;
+ int r;
+
+ r = stat("/var/log/journal", &st);
+ if (r < 0 && !IN_SET(errno, ENOENT, ENOTDIR))
+ return log_error_errno(errno, "stat(\"%s\") failed: %m", "/var/log/journal");
+
+ if (r < 0 || !S_ISDIR(st.st_mode)) {
+ log_error("%s is not a directory, must be using persistent logging for FSS.",
+ "/var/log/journal");
+ return r < 0 ? -errno : -ENOTDIR;
+ }
+
+ r = sd_id128_get_machine(&machine);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get machine ID: %m");
+
+ r = sd_id128_get_boot(&boot);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get boot ID: %m");
+
+ if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/fss",
+ SD_ID128_FORMAT_VAL(machine)) < 0)
+ return log_oom();
+
+ if (arg_force) {
+ r = unlink(p);
+ if (r < 0 && errno != ENOENT)
+ return log_error_errno(errno, "unlink(\"%s\") failed: %m", p);
+ } else if (access(p, F_OK) >= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "Sealing key file %s exists already. Use --force to recreate.", p);
+
+ if (asprintf(&k, "/var/log/journal/" SD_ID128_FORMAT_STR "/fss.tmp.XXXXXX",
+ SD_ID128_FORMAT_VAL(machine)) < 0)
+ return log_oom();
+
+ mpk_size = FSPRG_mskinbytes(FSPRG_RECOMMENDED_SECPAR);
+ mpk = alloca(mpk_size);
+
+ seed_size = FSPRG_RECOMMENDED_SEEDLEN;
+ seed = alloca(seed_size);
+
+ state_size = FSPRG_stateinbytes(FSPRG_RECOMMENDED_SECPAR);
+ state = alloca(state_size);
+
+ log_info("Generating seed...");
+ r = genuine_random_bytes(seed, seed_size, RANDOM_BLOCK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire random seed: %m");
+
+ log_info("Generating key pair...");
+ FSPRG_GenMK(NULL, mpk, seed, seed_size, FSPRG_RECOMMENDED_SECPAR);
+
+ log_info("Generating sealing key...");
+ FSPRG_GenState0(state, mpk, seed, seed_size);
+
+ assert(arg_interval > 0);
+
+ n = now(CLOCK_REALTIME);
+ n /= arg_interval;
+
+ safe_close(fd);
+ fd = mkostemp_safe(k);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to open %s: %m", k);
+
+ /* Enable secure remove, exclusion from dump, synchronous writing and in-place updating */
+ static const unsigned chattr_flags[] = {
+ FS_SECRM_FL,
+ FS_NODUMP_FL,
+ FS_SYNC_FL,
+ FS_NOCOW_FL,
+ };
+ for (size_t j = 0; j < ELEMENTSOF(chattr_flags); j++) {
+ r = chattr_fd(fd, chattr_flags[j], chattr_flags[j], NULL);
+ if (r < 0)
+ log_full_errno(ERRNO_IS_NOT_SUPPORTED(r) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set file attribute 0x%x: %m", chattr_flags[j]);
+ }
+
+ struct FSSHeader h = {
+ .signature = { 'K', 'S', 'H', 'H', 'R', 'H', 'L', 'P' },
+ .machine_id = machine,
+ .boot_id = boot,
+ .header_size = htole64(sizeof(h)),
+ .start_usec = htole64(n * arg_interval),
+ .interval_usec = htole64(arg_interval),
+ .fsprg_secpar = htole16(FSPRG_RECOMMENDED_SECPAR),
+ .fsprg_state_size = htole64(state_size),
+ };
+
+ r = loop_write(fd, &h, sizeof(h), false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write header: %m");
+
+ r = loop_write(fd, state, state_size, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write state: %m");
+
+ if (rename(k, p) < 0)
+ return log_error_errno(errno, "Failed to link file: %m");
+
+ k = mfree(k);
+
+ _cleanup_free_ char *hn = NULL, *key = NULL;
+
+ r = format_journal_url(seed, seed_size, n, arg_interval, hn, machine, false, &key);
+ if (r < 0)
+ return r;
+
+ if (on_tty()) {
+ hn = gethostname_malloc();
+ if (hn)
+ hostname_cleanup(hn);
+
+ char tsb[FORMAT_TIMESPAN_MAX];
+ fprintf(stderr,
+ "\nNew keys have been generated for host %s%s" SD_ID128_FORMAT_STR ".\n"
+ "\n"
+ "The %ssecret sealing key%s has been written to the following local file.\n"
+ "This key file is automatically updated when the sealing key is advanced.\n"
+ "It should not be used on multiple hosts.\n"
+ "\n"
+ "\t%s\n"
+ "\n"
+ "The sealing key is automatically changed every %s.\n"
+ "\n"
+ "Please write down the following %ssecret verification key%s. It should be stored\n"
+ "in a safe location and should not be saved locally on disk.\n"
+ "\n\t%s",
+ strempty(hn), hn ? "/" : "",
+ SD_ID128_FORMAT_VAL(machine),
+ ansi_highlight(), ansi_normal(),
+ p,
+ format_timespan(tsb, sizeof(tsb), arg_interval, 0),
+ ansi_highlight(), ansi_normal(),
+ ansi_highlight_red());
+ fflush(stderr);
+ }
+
+ puts(key);
+
+ if (on_tty()) {
+ fprintf(stderr, "%s", ansi_normal());
+#if HAVE_QRENCODE
+ _cleanup_free_ char *url = NULL;
+ r = format_journal_url(seed, seed_size, n, arg_interval, hn, machine, true, &url);
+ if (r < 0)
+ return r;
+
+ (void) print_qrcode(stderr,
+ "To transfer the verification key to your phone scan the QR code below",
+ url);
+#endif
+ }
+
+ return 0;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Forward-secure sealing not available.");
+#endif
+}
+
+static int verify(sd_journal *j) {
+ int r = 0;
+ JournalFile *f;
+
+ assert(j);
+
+ log_show_color(true);
+
+ ORDERED_HASHMAP_FOREACH(f, j->files) {
+ int k;
+ usec_t first = 0, validated = 0, last = 0;
+
+#if HAVE_GCRYPT
+ if (!arg_verify_key && JOURNAL_HEADER_SEALED(f->header))
+ log_notice("Journal file %s has sealing enabled but verification key has not been passed using --verify-key=.", f->path);
+#endif
+
+ k = journal_file_verify(f, arg_verify_key, &first, &validated, &last, true);
+ if (k == -EINVAL)
+ /* If the key was invalid give up right-away. */
+ return k;
+ else if (k < 0)
+ r = log_warning_errno(k, "FAIL: %s (%m)", f->path);
+ else {
+ char a[FORMAT_TIMESTAMP_MAX], b[FORMAT_TIMESTAMP_MAX], c[FORMAT_TIMESPAN_MAX];
+ log_info("PASS: %s", f->path);
+
+ if (arg_verify_key && JOURNAL_HEADER_SEALED(f->header)) {
+ if (validated > 0) {
+ log_info("=> Validated from %s to %s, final %s entries not sealed.",
+ format_timestamp_maybe_utc(a, sizeof(a), first),
+ format_timestamp_maybe_utc(b, sizeof(b), validated),
+ format_timespan(c, sizeof(c), last > validated ? last - validated : 0, 0));
+ } else if (last > 0)
+ log_info("=> No sealing yet, %s of entries not sealed.",
+ format_timespan(c, sizeof(c), last - first, 0));
+ else
+ log_info("=> No sealing yet, no entries in file.");
+ }
+ }
+ }
+
+ return r;
+}
+
+static int simple_varlink_call(const char *option, const char *method) {
+ _cleanup_(varlink_flush_close_unrefp) Varlink *link = NULL;
+ const char *error, *fn;
+ int r;
+
+ if (arg_machine)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "%s is not supported in conjunction with --machine=.", option);
+
+ fn = arg_namespace ?
+ strjoina("/run/systemd/journal.", arg_namespace, "/io.systemd.journal") :
+ "/run/systemd/journal/io.systemd.journal";
+
+ r = varlink_connect_address(&link, fn);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to %s: %m", fn);
+
+ (void) varlink_set_description(link, "journal");
+ (void) varlink_set_relative_timeout(link, USEC_INFINITY);
+
+ r = varlink_call(link, method, NULL, NULL, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to execute varlink call: %m");
+ if (error)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOANO),
+ "Failed to execute varlink call: %s", error);
+
+ return 0;
+}
+
+static int flush_to_var(void) {
+ return simple_varlink_call("--flush", "io.systemd.Journal.FlushToVar");
+}
+
+static int relinquish_var(void) {
+ return simple_varlink_call("--relinquish-var/--smart-relinquish-var", "io.systemd.Journal.RelinquishVar");
+}
+
+static int rotate(void) {
+ return simple_varlink_call("--rotate", "io.systemd.Journal.Rotate");
+}
+
+static int sync_journal(void) {
+ return simple_varlink_call("--sync", "io.systemd.Journal.Synchronize");
+}
+
+static int wait_for_change(sd_journal *j, int poll_fd) {
+ struct pollfd pollfds[] = {
+ { .fd = poll_fd, .events = POLLIN },
+ { .fd = STDOUT_FILENO },
+ };
+
+ struct timespec ts;
+ usec_t timeout;
+ int r;
+
+ assert(j);
+ assert(poll_fd >= 0);
+
+ /* Much like sd_journal_wait() but also keeps an eye on STDOUT, and exits as soon as we see a POLLHUP on that,
+ * i.e. when it is closed. */
+
+ r = sd_journal_get_timeout(j, &timeout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine journal waiting time: %m");
+
+ if (ppoll(pollfds, ELEMENTSOF(pollfds),
+ timeout == USEC_INFINITY ? NULL : timespec_store(&ts, timeout), NULL) < 0) {
+ if (errno == EINTR)
+ return 0;
+
+ return log_error_errno(errno, "Couldn't wait for journal event: %m");
+ }
+
+ if (pollfds[1].revents & (POLLHUP|POLLERR|POLLNVAL)) /* STDOUT has been closed? */
+ return log_debug_errno(SYNTHETIC_ERRNO(ECANCELED),
+ "Standard output has been closed.");
+
+ if (pollfds[0].revents & POLLNVAL)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADF), "Change fd closed?");
+
+ r = sd_journal_process(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to process journal events: %m");
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
+ _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL;
+ _cleanup_(umount_and_rmdir_and_freep) char *unlink_dir = NULL;
+ bool previous_boot_id_valid = false, first_line = true, ellipsized = false, need_seek = false;
+ bool use_cursor = false, after_cursor = false;
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ sd_id128_t previous_boot_id;
+ int n_shown = 0, r, poll_fd = -1;
+
+ setlocale(LC_ALL, "");
+ log_setup_cli();
+
+ /* Increase max number of open files if we can, we might needs this when browsing journal files, which might be
+ * split up into many files. */
+ (void) rlimit_nofile_bump(HIGH_RLIMIT_NOFILE);
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ goto finish;
+
+ if (arg_image) {
+ assert(!arg_root);
+
+ r = mount_image_privately_interactively(
+ arg_image,
+ DISSECT_IMAGE_REQUIRE_ROOT|DISSECT_IMAGE_VALIDATE_OS|DISSECT_IMAGE_RELAX_VAR_CHECK|
+ (arg_action == ACTION_UPDATE_CATALOG ? DISSECT_IMAGE_FSCK : DISSECT_IMAGE_READ_ONLY),
+ &unlink_dir,
+ &loop_device,
+ &decrypted_image);
+ if (r < 0)
+ return r;
+
+ arg_root = strdup(unlink_dir);
+ if (!arg_root)
+ return log_oom();
+ }
+
+ signal(SIGWINCH, columns_lines_cache_reset);
+ sigbus_install();
+
+ switch (arg_action) {
+
+ case ACTION_NEW_ID128:
+ r = id128_print_new(ID128_PRINT_PRETTY);
+ goto finish;
+
+ case ACTION_SETUP_KEYS:
+ r = setup_keys();
+ goto finish;
+
+ case ACTION_LIST_CATALOG:
+ case ACTION_DUMP_CATALOG:
+ case ACTION_UPDATE_CATALOG: {
+ _cleanup_free_ char *database;
+
+ database = path_join(arg_root, CATALOG_DATABASE);
+ if (!database) {
+ r = log_oom();
+ goto finish;
+ }
+
+ if (arg_action == ACTION_UPDATE_CATALOG) {
+ r = catalog_update(database, arg_root, catalog_file_dirs);
+ if (r < 0)
+ log_error_errno(r, "Failed to list catalog: %m");
+ } else {
+ bool oneline = arg_action == ACTION_LIST_CATALOG;
+
+ (void) pager_open(arg_pager_flags);
+
+ if (optind < argc)
+ r = catalog_list_items(stdout, database, oneline, argv + optind);
+ else
+ r = catalog_list(stdout, database, oneline);
+ if (r < 0)
+ log_error_errno(r, "Failed to list catalog: %m");
+ }
+
+ goto finish;
+ }
+
+ case ACTION_FLUSH:
+ r = flush_to_var();
+ goto finish;
+
+ case ACTION_RELINQUISH_VAR:
+ r = relinquish_var();
+ goto finish;
+
+ case ACTION_SYNC:
+ r = sync_journal();
+ goto finish;
+
+ case ACTION_ROTATE:
+ r = rotate();
+ goto finish;
+
+ case ACTION_SHOW:
+ case ACTION_PRINT_HEADER:
+ case ACTION_VERIFY:
+ case ACTION_DISK_USAGE:
+ case ACTION_LIST_BOOTS:
+ case ACTION_VACUUM:
+ case ACTION_ROTATE_AND_VACUUM:
+ case ACTION_LIST_FIELDS:
+ case ACTION_LIST_FIELD_NAMES:
+ /* These ones require access to the journal files, continue below. */
+ break;
+
+ default:
+ assert_not_reached("Unknown action");
+ }
+
+ if (arg_directory)
+ r = sd_journal_open_directory(&j, arg_directory, arg_journal_type);
+ else if (arg_root)
+ r = sd_journal_open_directory(&j, arg_root, arg_journal_type | SD_JOURNAL_OS_ROOT);
+ else if (arg_file_stdin)
+ r = sd_journal_open_files_fd(&j, (int[]) { STDIN_FILENO }, 1, 0);
+ else if (arg_file)
+ r = sd_journal_open_files(&j, (const char**) arg_file, 0);
+ else if (arg_machine) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int fd;
+
+ if (geteuid() != 0) {
+ /* The file descriptor returned by OpenMachineRootDirectory() will be owned by users/groups of
+ * the container, thus we need root privileges to override them. */
+ r = log_error_errno(SYNTHETIC_ERRNO(EPERM), "Using the --machine= switch requires root privileges.");
+ goto finish;
+ }
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0) {
+ log_error_errno(r, "Failed to open system bus: %m");
+ goto finish;
+ }
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "OpenMachineRootDirectory",
+ &error,
+ &reply,
+ "s", arg_machine);
+ if (r < 0) {
+ log_error_errno(r, "Failed to open root directory: %s", bus_error_message(&error, r));
+ goto finish;
+ }
+
+ r = sd_bus_message_read(reply, "h", &fd);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ goto finish;
+ }
+
+ fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (fd < 0) {
+ r = log_error_errno(errno, "Failed to duplicate file descriptor: %m");
+ goto finish;
+ }
+
+ r = sd_journal_open_directory_fd(&j, fd, SD_JOURNAL_OS_ROOT);
+ if (r < 0)
+ safe_close(fd);
+ } else
+ r = sd_journal_open_namespace(
+ &j,
+ arg_namespace,
+ (arg_merge ? 0 : SD_JOURNAL_LOCAL_ONLY) |
+ arg_namespace_flags | arg_journal_type);
+ if (r < 0) {
+ log_error_errno(r, "Failed to open %s: %m", arg_directory ?: arg_file ? "files" : "journal");
+ goto finish;
+ }
+
+ r = journal_access_check_and_warn(j, arg_quiet,
+ !(arg_journal_type == SD_JOURNAL_CURRENT_USER || arg_user_units));
+ if (r < 0)
+ goto finish;
+
+ switch (arg_action) {
+
+ case ACTION_NEW_ID128:
+ case ACTION_SETUP_KEYS:
+ case ACTION_LIST_CATALOG:
+ case ACTION_DUMP_CATALOG:
+ case ACTION_UPDATE_CATALOG:
+ case ACTION_FLUSH:
+ case ACTION_SYNC:
+ case ACTION_ROTATE:
+ assert_not_reached("Unexpected action.");
+
+ case ACTION_PRINT_HEADER:
+ journal_print_header(j);
+ r = 0;
+ goto finish;
+
+ case ACTION_VERIFY:
+ r = verify(j);
+ goto finish;
+
+ case ACTION_DISK_USAGE: {
+ uint64_t bytes = 0;
+ char sbytes[FORMAT_BYTES_MAX];
+
+ r = sd_journal_get_usage(j, &bytes);
+ if (r < 0)
+ goto finish;
+
+ printf("Archived and active journals take up %s in the file system.\n",
+ format_bytes(sbytes, sizeof(sbytes), bytes));
+ goto finish;
+ }
+
+ case ACTION_LIST_BOOTS:
+ r = list_boots(j);
+ goto finish;
+
+ case ACTION_ROTATE_AND_VACUUM:
+
+ r = rotate();
+ if (r < 0)
+ goto finish;
+
+ _fallthrough_;
+
+ case ACTION_VACUUM: {
+ Directory *d;
+
+ HASHMAP_FOREACH(d, j->directories_by_path) {
+ int q;
+
+ q = journal_directory_vacuum(d->path, arg_vacuum_size, arg_vacuum_n_files, arg_vacuum_time, NULL, !arg_quiet);
+ if (q < 0)
+ r = log_error_errno(q, "Failed to vacuum %s: %m", d->path);
+ }
+
+ goto finish;
+ }
+
+ case ACTION_LIST_FIELD_NAMES: {
+ const char *field;
+
+ SD_JOURNAL_FOREACH_FIELD(j, field) {
+ printf("%s\n", field);
+ n_shown++;
+ }
+
+ r = 0;
+ goto finish;
+ }
+
+ case ACTION_SHOW:
+ case ACTION_LIST_FIELDS:
+ break;
+
+ default:
+ assert_not_reached("Unknown action");
+ }
+
+ if (arg_boot_offset != 0 &&
+ sd_journal_has_runtime_files(j) > 0 &&
+ sd_journal_has_persistent_files(j) == 0) {
+ log_info("Specifying boot ID or boot offset has no effect, no persistent journal was found.");
+ r = 0;
+ goto finish;
+ }
+ /* add_boot() must be called first!
+ * It may need to seek the journal to find parent boot IDs. */
+ r = add_boot(j);
+ if (r < 0)
+ goto finish;
+
+ r = add_dmesg(j);
+ if (r < 0)
+ goto finish;
+
+ r = add_units(j);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add filter for units: %m");
+ goto finish;
+ }
+
+ r = add_syslog_identifier(j);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add filter for syslog identifiers: %m");
+ goto finish;
+ }
+
+ r = add_priorities(j);
+ if (r < 0)
+ goto finish;
+
+ r = add_facilities(j);
+ if (r < 0)
+ goto finish;
+
+ r = add_matches(j, argv + optind);
+ if (r < 0)
+ goto finish;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *filter;
+
+ filter = journal_make_match_string(j);
+ if (!filter)
+ return log_oom();
+
+ log_debug("Journal filter: %s", filter);
+ }
+
+ if (arg_action == ACTION_LIST_FIELDS) {
+ const void *data;
+ size_t size;
+
+ assert(arg_field);
+
+ r = sd_journal_set_data_threshold(j, 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to unset data size threshold: %m");
+ goto finish;
+ }
+
+ r = sd_journal_query_unique(j, arg_field);
+ if (r < 0) {
+ log_error_errno(r, "Failed to query unique data objects: %m");
+ goto finish;
+ }
+
+ SD_JOURNAL_FOREACH_UNIQUE(j, data, size) {
+ const void *eq;
+
+ if (arg_lines >= 0 && n_shown >= arg_lines)
+ break;
+
+ eq = memchr(data, '=', size);
+ if (eq)
+ printf("%.*s\n", (int) (size - ((const uint8_t*) eq - (const uint8_t*) data + 1)), (const char*) eq + 1);
+ else
+ printf("%.*s\n", (int) size, (const char*) data);
+
+ n_shown++;
+ }
+
+ r = 0;
+ goto finish;
+ }
+
+ /* Opening the fd now means the first sd_journal_wait() will actually wait */
+ if (arg_follow) {
+ poll_fd = sd_journal_get_fd(j);
+ if (poll_fd == -EMFILE) {
+ log_warning_errno(poll_fd, "Insufficient watch descriptors available. Reverting to -n.");
+ arg_follow = false;
+ } else if (poll_fd == -EMEDIUMTYPE) {
+ log_error_errno(poll_fd, "The --follow switch is not supported in conjunction with reading from STDIN.");
+ goto finish;
+ } else if (poll_fd < 0) {
+ log_error_errno(poll_fd, "Failed to get journal fd: %m");
+ goto finish;
+ }
+ }
+
+ if (arg_cursor || arg_after_cursor || arg_cursor_file) {
+ _cleanup_free_ char *cursor_from_file = NULL;
+ const char *cursor = arg_cursor ?: arg_after_cursor;
+
+ if (arg_cursor_file) {
+ r = read_one_line_file(arg_cursor_file, &cursor_from_file);
+ if (r < 0 && r != -ENOENT) {
+ log_error_errno(r, "Failed to read cursor file %s: %m", arg_cursor_file);
+ goto finish;
+ }
+
+ if (r > 0) {
+ cursor = cursor_from_file;
+ after_cursor = true;
+ }
+ } else
+ after_cursor = arg_after_cursor;
+
+ if (cursor) {
+ r = sd_journal_seek_cursor(j, cursor);
+ if (r < 0) {
+ log_error_errno(r, "Failed to seek to cursor: %m");
+ goto finish;
+ }
+ use_cursor = true;
+ }
+ }
+
+ if (use_cursor) {
+ if (!arg_reverse)
+ r = sd_journal_next_skip(j, 1 + after_cursor);
+ else
+ r = sd_journal_previous_skip(j, 1 + after_cursor);
+
+ if (after_cursor && r < 2) {
+ /* We couldn't find the next entry after the cursor. */
+ if (arg_follow)
+ need_seek = true;
+ else
+ arg_lines = 0;
+ }
+
+ } else if (arg_since_set && !arg_reverse) {
+ r = sd_journal_seek_realtime_usec(j, arg_since);
+ if (r < 0) {
+ log_error_errno(r, "Failed to seek to date: %m");
+ goto finish;
+ }
+ r = sd_journal_next(j);
+
+ } else if (arg_until_set && arg_reverse) {
+ r = sd_journal_seek_realtime_usec(j, arg_until);
+ if (r < 0) {
+ log_error_errno(r, "Failed to seek to date: %m");
+ goto finish;
+ }
+ r = sd_journal_previous(j);
+
+ } else if (arg_reverse) {
+ r = sd_journal_seek_tail(j);
+ if (r < 0) {
+ log_error_errno(r, "Failed to seek to tail: %m");
+ goto finish;
+ }
+
+ r = sd_journal_previous(j);
+
+ } else if (arg_lines >= 0) {
+ r = sd_journal_seek_tail(j);
+ if (r < 0) {
+ log_error_errno(r, "Failed to seek to tail: %m");
+ goto finish;
+ }
+
+ r = sd_journal_previous_skip(j, arg_lines);
+
+ } else {
+ r = sd_journal_seek_head(j);
+ if (r < 0) {
+ log_error_errno(r, "Failed to seek to head: %m");
+ goto finish;
+ }
+
+ r = sd_journal_next(j);
+ }
+
+ if (r < 0) {
+ log_error_errno(r, "Failed to iterate through journal: %m");
+ goto finish;
+ }
+ if (r == 0)
+ need_seek = true;
+
+ if (!arg_follow)
+ (void) pager_open(arg_pager_flags);
+
+ if (!arg_quiet && (arg_lines != 0 || arg_follow)) {
+ usec_t start, end;
+ char start_buf[FORMAT_TIMESTAMP_MAX], end_buf[FORMAT_TIMESTAMP_MAX];
+
+ r = sd_journal_get_cutoff_realtime_usec(j, &start, &end);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get cutoff: %m");
+ goto finish;
+ }
+
+ if (r > 0) {
+ if (arg_follow)
+ printf("-- Journal begins at %s. --\n",
+ format_timestamp_maybe_utc(start_buf, sizeof(start_buf), start));
+ else
+ printf("-- Journal begins at %s, ends at %s. --\n",
+ format_timestamp_maybe_utc(start_buf, sizeof(start_buf), start),
+ format_timestamp_maybe_utc(end_buf, sizeof(end_buf), end));
+ }
+ }
+
+ for (;;) {
+ while (arg_lines < 0 || n_shown < arg_lines || (arg_follow && !first_line)) {
+ int flags;
+ size_t highlight[2] = {};
+
+ if (need_seek) {
+ if (!arg_reverse)
+ r = sd_journal_next(j);
+ else
+ r = sd_journal_previous(j);
+ if (r < 0) {
+ log_error_errno(r, "Failed to iterate through journal: %m");
+ goto finish;
+ }
+ if (r == 0)
+ break;
+ }
+
+ if (arg_until_set && !arg_reverse) {
+ usec_t usec;
+
+ r = sd_journal_get_realtime_usec(j, &usec);
+ if (r < 0) {
+ log_error_errno(r, "Failed to determine timestamp: %m");
+ goto finish;
+ }
+ if (usec > arg_until)
+ break;
+ }
+
+ if (arg_since_set && arg_reverse) {
+ usec_t usec;
+
+ r = sd_journal_get_realtime_usec(j, &usec);
+ if (r < 0) {
+ log_error_errno(r, "Failed to determine timestamp: %m");
+ goto finish;
+ }
+ if (usec < arg_since)
+ break;
+ }
+
+ if (!arg_merge && !arg_quiet) {
+ sd_id128_t boot_id;
+
+ r = sd_journal_get_monotonic_usec(j, NULL, &boot_id);
+ if (r >= 0) {
+ if (previous_boot_id_valid &&
+ !sd_id128_equal(boot_id, previous_boot_id))
+ printf("%s-- Boot "SD_ID128_FORMAT_STR" --%s\n",
+ ansi_highlight(), SD_ID128_FORMAT_VAL(boot_id), ansi_normal());
+
+ previous_boot_id = boot_id;
+ previous_boot_id_valid = true;
+ }
+ }
+
+#if HAVE_PCRE2
+ if (arg_compiled_pattern) {
+ _cleanup_(sym_pcre2_match_data_freep) pcre2_match_data *md = NULL;
+ const void *message;
+ size_t len;
+ PCRE2_SIZE *ovec;
+
+ md = sym_pcre2_match_data_create(1, NULL);
+ if (!md)
+ return log_oom();
+
+ r = sd_journal_get_data(j, "MESSAGE", &message, &len);
+ if (r < 0) {
+ if (r == -ENOENT) {
+ need_seek = true;
+ continue;
+ }
+
+ log_error_errno(r, "Failed to get MESSAGE field: %m");
+ goto finish;
+ }
+
+ assert_se(message = startswith(message, "MESSAGE="));
+
+ r = sym_pcre2_match(arg_compiled_pattern,
+ message,
+ len - strlen("MESSAGE="),
+ 0, /* start at offset 0 in the subject */
+ 0, /* default options */
+ md,
+ NULL);
+ if (r == PCRE2_ERROR_NOMATCH) {
+ need_seek = true;
+ continue;
+ }
+ if (r < 0) {
+ unsigned char buf[LINE_MAX];
+ int r2;
+
+ r2 = sym_pcre2_get_error_message(r, buf, sizeof buf);
+ log_error("Pattern matching failed: %s",
+ r2 < 0 ? "unknown error" : (char*) buf);
+ r = -EINVAL;
+ goto finish;
+ }
+
+ ovec = sym_pcre2_get_ovector_pointer(md);
+ highlight[0] = ovec[0];
+ highlight[1] = ovec[1];
+ }
+#endif
+
+ flags =
+ arg_all * OUTPUT_SHOW_ALL |
+ arg_full * OUTPUT_FULL_WIDTH |
+ colors_enabled() * OUTPUT_COLOR |
+ arg_catalog * OUTPUT_CATALOG |
+ arg_utc * OUTPUT_UTC |
+ arg_no_hostname * OUTPUT_NO_HOSTNAME;
+
+ r = show_journal_entry(stdout, j, arg_output, 0, flags,
+ arg_output_fields, highlight, &ellipsized);
+ need_seek = true;
+ if (r == -EADDRNOTAVAIL)
+ break;
+ else if (r < 0)
+ goto finish;
+
+ n_shown++;
+
+ /* If journalctl take a long time to process messages, and during that time journal file
+ * rotation occurs, a journalctl client will keep those rotated files open until it calls
+ * sd_journal_process(), which typically happens as a result of calling sd_journal_wait() below
+ * in the "following" case. By periodically calling sd_journal_process() during the processing
+ * loop we shrink the window of time a client instance has open file descriptors for rotated
+ * (deleted) journal files. */
+ if ((n_shown % PROCESS_INOTIFY_INTERVAL) == 0) {
+ r = sd_journal_process(j);
+ if (r < 0) {
+ log_error_errno(r, "Failed to process inotify events: %m");
+ goto finish;
+ }
+ }
+ }
+
+ if (!arg_follow) {
+ if (n_shown == 0 && !arg_quiet)
+ printf("-- No entries --\n");
+ break;
+ }
+
+ fflush(stdout);
+
+ r = wait_for_change(j, poll_fd);
+ if (r < 0)
+ goto finish;
+
+ first_line = false;
+ }
+
+ if (arg_show_cursor || arg_cursor_file) {
+ _cleanup_free_ char *cursor = NULL;
+
+ r = sd_journal_get_cursor(j, &cursor);
+ if (r < 0 && r != -EADDRNOTAVAIL)
+ log_error_errno(r, "Failed to get cursor: %m");
+ else if (r >= 0) {
+ if (arg_show_cursor)
+ printf("-- cursor: %s\n", cursor);
+
+ if (arg_cursor_file) {
+ r = write_string_file(arg_cursor_file, cursor,
+ WRITE_STRING_FILE_CREATE |
+ WRITE_STRING_FILE_ATOMIC);
+ if (r < 0)
+ log_error_errno(r,
+ "Failed to write new cursor to %s: %m",
+ arg_cursor_file);
+ }
+ }
+ }
+
+finish:
+ pager_close();
+
+ strv_free(arg_file);
+
+ set_free(arg_facilities);
+ strv_free(arg_syslog_identifier);
+ strv_free(arg_system_units);
+ strv_free(arg_user_units);
+ strv_free(arg_output_fields);
+
+ free(arg_root);
+ free(arg_verify_key);
+
+#if HAVE_PCRE2
+ if (arg_compiled_pattern) {
+ sym_pcre2_code_free(arg_compiled_pattern);
+
+ /* --grep was used, no error was thrown, but the pattern didn't
+ * match anything. Let's mimic grep's behavior here and return
+ * a non-zero exit code, so journalctl --grep can be used
+ * in scripts and such */
+ if (r == 0 && n_shown == 0)
+ r = -ENOENT;
+ }
+#endif
+
+ return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/src/journal/journald-audit.c b/src/journal/journald-audit.c
new file mode 100644
index 0000000..744f750
--- /dev/null
+++ b/src/journal/journald-audit.c
@@ -0,0 +1,555 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "audit-type.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "hexdecoct.h"
+#include "io-util.h"
+#include "journald-audit.h"
+#include "missing_audit.h"
+#include "string-util.h"
+
+typedef struct MapField {
+ const char *audit_field;
+ const char *journal_field;
+ int (*map)(const char *field, const char **p, struct iovec **iov, size_t *n_iov_allocated, size_t *n_iov);
+} MapField;
+
+static int map_simple_field(const char *field, const char **p, struct iovec **iov, size_t *n_iov_allocated, size_t *n_iov) {
+ _cleanup_free_ char *c = NULL;
+ size_t l = 0, allocated = 0;
+ const char *e;
+
+ assert(field);
+ assert(p);
+ assert(iov);
+ assert(n_iov);
+
+ l = strlen(field);
+ allocated = l + 1;
+ c = malloc(allocated);
+ if (!c)
+ return -ENOMEM;
+
+ memcpy(c, field, l);
+ for (e = *p; !IN_SET(*e, 0, ' '); e++) {
+ if (!GREEDY_REALLOC(c, allocated, l+2))
+ return -ENOMEM;
+
+ c[l++] = *e;
+ }
+
+ c[l] = 0;
+
+ if (!GREEDY_REALLOC(*iov, *n_iov_allocated, *n_iov + 1))
+ return -ENOMEM;
+
+ (*iov)[(*n_iov)++] = IOVEC_MAKE(c, l);
+
+ *p = e;
+ c = NULL;
+
+ return 1;
+}
+
+static int map_string_field_internal(const char *field, const char **p, struct iovec **iov, size_t *n_iov_allocated, size_t *n_iov, bool filter_printable) {
+ _cleanup_free_ char *c = NULL;
+ const char *s, *e;
+ size_t l;
+
+ assert(field);
+ assert(p);
+ assert(iov);
+ assert(n_iov);
+
+ /* The kernel formats string fields in one of two formats. */
+
+ if (**p == '"') {
+ /* Normal quoted syntax */
+ s = *p + 1;
+ e = strchr(s, '"');
+ if (!e)
+ return 0;
+
+ l = strlen(field) + (e - s);
+ c = malloc(l+1);
+ if (!c)
+ return -ENOMEM;
+
+ *((char*) mempcpy(stpcpy(c, field), s, e - s)) = 0;
+
+ e += 1;
+
+ } else if (unhexchar(**p) >= 0) {
+ /* Hexadecimal escaping */
+ size_t allocated = 0;
+
+ l = strlen(field);
+ allocated = l + 2;
+ c = malloc(allocated);
+ if (!c)
+ return -ENOMEM;
+
+ memcpy(c, field, l);
+ for (e = *p; !IN_SET(*e, 0, ' '); e += 2) {
+ int a, b;
+ uint8_t x;
+
+ a = unhexchar(e[0]);
+ if (a < 0)
+ return 0;
+
+ b = unhexchar(e[1]);
+ if (b < 0)
+ return 0;
+
+ x = ((uint8_t) a << 4 | (uint8_t) b);
+
+ if (filter_printable && x < (uint8_t) ' ')
+ x = (uint8_t) ' ';
+
+ if (!GREEDY_REALLOC(c, allocated, l+2))
+ return -ENOMEM;
+
+ c[l++] = (char) x;
+ }
+
+ c[l] = 0;
+ } else
+ return 0;
+
+ if (!GREEDY_REALLOC(*iov, *n_iov_allocated, *n_iov + 1))
+ return -ENOMEM;
+
+ (*iov)[(*n_iov)++] = IOVEC_MAKE(c, l);
+
+ *p = e;
+ c = NULL;
+
+ return 1;
+}
+
+static int map_string_field(const char *field, const char **p, struct iovec **iov, size_t *n_iov_allocated, size_t *n_iov) {
+ return map_string_field_internal(field, p, iov, n_iov_allocated, n_iov, false);
+}
+
+static int map_string_field_printable(const char *field, const char **p, struct iovec **iov, size_t *n_iov_allocated, size_t *n_iov) {
+ return map_string_field_internal(field, p, iov, n_iov_allocated, n_iov, true);
+}
+
+static int map_generic_field(const char *prefix, const char **p, struct iovec **iov, size_t *n_iov_allocated, size_t *n_iov) {
+ const char *e, *f;
+ char *c, *t;
+ int r;
+
+ /* Implements fallback mappings for all fields we don't know */
+
+ for (e = *p; e < *p + 16; e++) {
+
+ if (IN_SET(*e, 0, ' '))
+ return 0;
+
+ if (*e == '=')
+ break;
+
+ if (!((*e >= 'a' && *e <= 'z') ||
+ (*e >= 'A' && *e <= 'Z') ||
+ (*e >= '0' && *e <= '9') ||
+ IN_SET(*e, '_', '-')))
+ return 0;
+ }
+
+ if (e <= *p || e >= *p + 16)
+ return 0;
+
+ c = newa(char, strlen(prefix) + (e - *p) + 2);
+
+ t = stpcpy(c, prefix);
+ for (f = *p; f < e; f++) {
+ char x;
+
+ if (*f >= 'a' && *f <= 'z')
+ x = (*f - 'a') + 'A'; /* uppercase */
+ else if (*f == '-')
+ x = '_'; /* dashes → underscores */
+ else
+ x = *f;
+
+ *(t++) = x;
+ }
+ strcpy(t, "=");
+
+ e++;
+
+ r = map_simple_field(c, &e, iov, n_iov_allocated, n_iov);
+ if (r < 0)
+ return r;
+
+ *p = e;
+ return r;
+}
+
+/* Kernel fields are those occurring in the audit string before
+ * msg='. All of these fields are trusted, hence carry the "_" prefix.
+ * We try to translate the fields we know into our native names. The
+ * other's are generically mapped to _AUDIT_FIELD_XYZ= */
+static const MapField map_fields_kernel[] = {
+
+ /* First, we map certain well-known audit fields into native
+ * well-known fields */
+ { "pid=", "_PID=", map_simple_field },
+ { "ppid=", "_PPID=", map_simple_field },
+ { "uid=", "_UID=", map_simple_field },
+ { "euid=", "_EUID=", map_simple_field },
+ { "fsuid=", "_FSUID=", map_simple_field },
+ { "gid=", "_GID=", map_simple_field },
+ { "egid=", "_EGID=", map_simple_field },
+ { "fsgid=", "_FSGID=", map_simple_field },
+ { "tty=", "_TTY=", map_simple_field },
+ { "ses=", "_AUDIT_SESSION=", map_simple_field },
+ { "auid=", "_AUDIT_LOGINUID=", map_simple_field },
+ { "subj=", "_SELINUX_CONTEXT=", map_simple_field },
+ { "comm=", "_COMM=", map_string_field },
+ { "exe=", "_EXE=", map_string_field },
+ { "proctitle=", "_CMDLINE=", map_string_field_printable },
+
+ /* Some fields don't map to native well-known fields. However,
+ * we know that they are string fields, hence let's undo
+ * string field escaping for them, though we stick to the
+ * generic field names. */
+ { "path=", "_AUDIT_FIELD_PATH=", map_string_field },
+ { "dev=", "_AUDIT_FIELD_DEV=", map_string_field },
+ { "name=", "_AUDIT_FIELD_NAME=", map_string_field },
+ {}
+};
+
+/* Userspace fields are those occurring in the audit string after
+ * msg='. All of these fields are untrusted, hence carry no "_"
+ * prefix. We map the fields we don't know to AUDIT_FIELD_XYZ= */
+static const MapField map_fields_userspace[] = {
+ { "cwd=", "AUDIT_FIELD_CWD=", map_string_field },
+ { "cmd=", "AUDIT_FIELD_CMD=", map_string_field },
+ { "acct=", "AUDIT_FIELD_ACCT=", map_string_field },
+ { "exe=", "AUDIT_FIELD_EXE=", map_string_field },
+ { "comm=", "AUDIT_FIELD_COMM=", map_string_field },
+ {}
+};
+
+static int map_all_fields(
+ const char *p,
+ const MapField map_fields[],
+ const char *prefix,
+ bool handle_msg,
+ struct iovec **iov,
+ size_t *n_iov_allocated,
+ size_t *n_iov) {
+
+ int r;
+
+ assert(p);
+ assert(iov);
+ assert(n_iov_allocated);
+ assert(n_iov);
+
+ for (;;) {
+ bool mapped = false;
+ const MapField *m;
+ const char *v;
+
+ p += strspn(p, WHITESPACE);
+
+ if (*p == 0)
+ return 0;
+
+ if (handle_msg) {
+ v = startswith(p, "msg='");
+ if (v) {
+ _cleanup_free_ char *c = NULL;
+ const char *e;
+
+ /* Userspace message. It's enclosed in
+ simple quotation marks, is not
+ escaped, but the last field in the
+ line, hence let's remove the
+ quotation mark, and apply the
+ userspace mapping instead of the
+ kernel mapping. */
+
+ e = endswith(v, "'");
+ if (!e)
+ return 0; /* don't continue splitting up if the final quotation mark is missing */
+
+ c = strndup(v, e - v);
+ if (!c)
+ return -ENOMEM;
+
+ return map_all_fields(c, map_fields_userspace, "AUDIT_FIELD_", false, iov, n_iov_allocated, n_iov);
+ }
+ }
+
+ /* Try to map the kernel fields to our own names */
+ for (m = map_fields; m->audit_field; m++) {
+ v = startswith(p, m->audit_field);
+ if (!v)
+ continue;
+
+ r = m->map(m->journal_field, &v, iov, n_iov_allocated, n_iov);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse audit array: %m");
+
+ if (r > 0) {
+ mapped = true;
+ p = v;
+ break;
+ }
+ }
+
+ if (!mapped) {
+ r = map_generic_field(prefix, &p, iov, n_iov_allocated, n_iov);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse audit array: %m");
+
+ if (r == 0)
+ /* Couldn't process as generic field, let's just skip over it */
+ p += strcspn(p, WHITESPACE);
+ }
+ }
+}
+
+void process_audit_string(Server *s, int type, const char *data, size_t size) {
+ size_t n_iov_allocated = 0, n_iov = 0, z;
+ _cleanup_free_ struct iovec *iov = NULL;
+ uint64_t seconds, msec, id;
+ const char *p, *type_name;
+ char id_field[sizeof("_AUDIT_ID=") + DECIMAL_STR_MAX(uint64_t)],
+ type_field[sizeof("_AUDIT_TYPE=") + DECIMAL_STR_MAX(int)],
+ source_time_field[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
+ char *m, *type_field_name;
+ int k;
+
+ assert(s);
+
+ if (size <= 0)
+ return;
+
+ if (!data)
+ return;
+
+ /* Note that the input buffer is NUL terminated, but let's
+ * check whether there is a spurious NUL byte */
+ if (memchr(data, 0, size))
+ return;
+
+ p = startswith(data, "audit");
+ if (!p)
+ return;
+
+ k = 0;
+ if (sscanf(p, "(%" PRIu64 ".%" PRIu64 ":%" PRIu64 "):%n",
+ &seconds,
+ &msec,
+ &id,
+ &k) != 3 || k == 0)
+ return;
+
+ p += k;
+ p += strspn(p, WHITESPACE);
+
+ if (isempty(p))
+ return;
+
+ n_iov_allocated = N_IOVEC_META_FIELDS + 8;
+ iov = new(struct iovec, n_iov_allocated);
+ if (!iov) {
+ log_oom();
+ return;
+ }
+
+ iov[n_iov++] = IOVEC_MAKE_STRING("_TRANSPORT=audit");
+
+ sprintf(source_time_field, "_SOURCE_REALTIME_TIMESTAMP=%" PRIu64,
+ (usec_t) seconds * USEC_PER_SEC + (usec_t) msec * USEC_PER_MSEC);
+ iov[n_iov++] = IOVEC_MAKE_STRING(source_time_field);
+
+ sprintf(type_field, "_AUDIT_TYPE=%i", type);
+ iov[n_iov++] = IOVEC_MAKE_STRING(type_field);
+
+ sprintf(id_field, "_AUDIT_ID=%" PRIu64, id);
+ iov[n_iov++] = IOVEC_MAKE_STRING(id_field);
+
+ assert_cc(4 == LOG_FAC(LOG_AUTH));
+ iov[n_iov++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=4");
+ iov[n_iov++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=audit");
+
+ type_name = audit_type_name_alloca(type);
+
+ type_field_name = strjoina("_AUDIT_TYPE_NAME=", type_name);
+ iov[n_iov++] = IOVEC_MAKE_STRING(type_field_name);
+
+ m = strjoina("MESSAGE=", type_name, " ", p);
+ iov[n_iov++] = IOVEC_MAKE_STRING(m);
+
+ z = n_iov;
+
+ map_all_fields(p, map_fields_kernel, "_AUDIT_FIELD_", true, &iov, &n_iov_allocated, &n_iov);
+
+ if (!GREEDY_REALLOC(iov, n_iov_allocated, n_iov + N_IOVEC_META_FIELDS)) {
+ log_oom();
+ goto finish;
+ }
+
+ server_dispatch_message(s, iov, n_iov, n_iov_allocated, NULL, NULL, LOG_NOTICE, 0);
+
+finish:
+ /* free() all entries that map_all_fields() added. All others
+ * are allocated on the stack or are constant. */
+
+ for (; z < n_iov; z++)
+ free(iov[z].iov_base);
+}
+
+void server_process_audit_message(
+ Server *s,
+ const void *buffer,
+ size_t buffer_size,
+ const struct ucred *ucred,
+ const union sockaddr_union *sa,
+ socklen_t salen) {
+
+ const struct nlmsghdr *nl = buffer;
+
+ assert(s);
+
+ if (buffer_size < ALIGN(sizeof(struct nlmsghdr)))
+ return;
+
+ assert(buffer);
+
+ /* Filter out fake data */
+ if (!sa ||
+ salen != sizeof(struct sockaddr_nl) ||
+ sa->nl.nl_family != AF_NETLINK ||
+ sa->nl.nl_pid != 0) {
+ log_debug("Audit netlink message from invalid sender.");
+ return;
+ }
+
+ if (!ucred || ucred->pid != 0) {
+ log_debug("Audit netlink message with invalid credentials.");
+ return;
+ }
+
+ if (!NLMSG_OK(nl, buffer_size)) {
+ log_error("Audit netlink message truncated.");
+ return;
+ }
+
+ /* Ignore special Netlink messages */
+ if (IN_SET(nl->nlmsg_type, NLMSG_NOOP, NLMSG_ERROR))
+ return;
+
+ /* Except AUDIT_USER, all messages below AUDIT_FIRST_USER_MSG are control messages, let's ignore those */
+ if (nl->nlmsg_type < AUDIT_FIRST_USER_MSG && nl->nlmsg_type != AUDIT_USER)
+ return;
+
+ process_audit_string(s, nl->nlmsg_type, NLMSG_DATA(nl), nl->nlmsg_len - ALIGN(sizeof(struct nlmsghdr)));
+}
+
+static int enable_audit(int fd, bool b) {
+ struct {
+ union {
+ struct nlmsghdr header;
+ uint8_t header_space[NLMSG_HDRLEN];
+ };
+ struct audit_status body;
+ } _packed_ request = {
+ .header.nlmsg_len = NLMSG_LENGTH(sizeof(struct audit_status)),
+ .header.nlmsg_type = AUDIT_SET,
+ .header.nlmsg_flags = NLM_F_REQUEST,
+ .header.nlmsg_seq = 1,
+ .header.nlmsg_pid = 0,
+ .body.mask = AUDIT_STATUS_ENABLED,
+ .body.enabled = b,
+ };
+ union sockaddr_union sa = {
+ .nl.nl_family = AF_NETLINK,
+ .nl.nl_pid = 0,
+ };
+ struct iovec iovec = {
+ .iov_base = &request,
+ .iov_len = NLMSG_LENGTH(sizeof(struct audit_status)),
+ };
+ struct msghdr mh = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_name = &sa.sa,
+ .msg_namelen = sizeof(sa.nl),
+ };
+
+ ssize_t n;
+
+ n = sendmsg(fd, &mh, MSG_NOSIGNAL);
+ if (n < 0)
+ return -errno;
+ if (n != NLMSG_LENGTH(sizeof(struct audit_status)))
+ return -EIO;
+
+ /* We don't wait for the result here, we can't do anything
+ * about it anyway */
+
+ return 0;
+}
+
+int server_open_audit(Server *s) {
+ int r;
+
+ if (s->audit_fd < 0) {
+ static const union sockaddr_union sa = {
+ .nl.nl_family = AF_NETLINK,
+ .nl.nl_pid = 0,
+ .nl.nl_groups = AUDIT_NLGRP_READLOG,
+ };
+
+ s->audit_fd = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_AUDIT);
+ if (s->audit_fd < 0) {
+ if (ERRNO_IS_NOT_SUPPORTED(errno))
+ log_debug("Audit not supported in the kernel.");
+ else
+ log_warning_errno(errno, "Failed to create audit socket, ignoring: %m");
+
+ return 0;
+ }
+
+ if (bind(s->audit_fd, &sa.sa, sizeof(sa.nl)) < 0) {
+ log_warning_errno(errno,
+ "Failed to join audit multicast group. "
+ "The kernel is probably too old or multicast reading is not supported. "
+ "Ignoring: %m");
+ s->audit_fd = safe_close(s->audit_fd);
+ return 0;
+ }
+ } else
+ (void) fd_nonblock(s->audit_fd, true);
+
+ r = setsockopt_int(s->audit_fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set SO_PASSCRED on audit socket: %m");
+
+ r = sd_event_add_io(s->event, &s->audit_event_source, s->audit_fd, EPOLLIN, server_process_datagram, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add audit fd to event loop: %m");
+
+ if (s->set_audit >= 0) {
+ /* We are listening now, try to enable audit if configured so */
+ r = enable_audit(s->audit_fd, s->set_audit);
+ if (r < 0)
+ log_warning_errno(r, "Failed to issue audit enable call: %m");
+ else if (s->set_audit > 0)
+ log_debug("Auditing in kernel turned on.");
+ else
+ log_debug("Auditing in kernel turned off.");
+ }
+
+ return 0;
+}
diff --git a/src/journal/journald-audit.h b/src/journal/journald-audit.h
new file mode 100644
index 0000000..79f3da9
--- /dev/null
+++ b/src/journal/journald-audit.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "journald-server.h"
+#include "socket-util.h"
+
+void server_process_audit_message(Server *s, const void *buffer, size_t buffer_size, const struct ucred *ucred, const union sockaddr_union *sa, socklen_t salen);
+
+void process_audit_string(Server *s, int type, const char *data, size_t size);
+
+int server_open_audit(Server *s);
diff --git a/src/journal/journald-console.c b/src/journal/journald-console.c
new file mode 100644
index 0000000..2035e2d
--- /dev/null
+++ b/src/journal/journald-console.c
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <time.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "journald-console.h"
+#include "journald-server.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "stdio-util.h"
+#include "terminal-util.h"
+
+static bool prefix_timestamp(void) {
+
+ static int cached_printk_time = -1;
+
+ if (_unlikely_(cached_printk_time < 0)) {
+ _cleanup_free_ char *p = NULL;
+
+ cached_printk_time =
+ read_one_line_file("/sys/module/printk/parameters/time", &p) >= 0
+ && parse_boolean(p) > 0;
+ }
+
+ return cached_printk_time;
+}
+
+void server_forward_console(
+ Server *s,
+ int priority,
+ const char *identifier,
+ const char *message,
+ const struct ucred *ucred) {
+
+ struct iovec iovec[5];
+ struct timespec ts;
+ char tbuf[STRLEN("[] ") + DECIMAL_STR_MAX(ts.tv_sec) + DECIMAL_STR_MAX(ts.tv_nsec)-3 + 1];
+ char header_pid[STRLEN("[]: ") + DECIMAL_STR_MAX(pid_t)];
+ _cleanup_free_ char *ident_buf = NULL;
+ _cleanup_close_ int fd = -1;
+ const char *tty;
+ int n = 0;
+
+ assert(s);
+ assert(message);
+
+ if (LOG_PRI(priority) > s->max_level_console)
+ return;
+
+ /* First: timestamp */
+ if (prefix_timestamp()) {
+ assert_se(clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
+ xsprintf(tbuf, "[%5"PRI_TIME".%06"PRI_NSEC"] ",
+ ts.tv_sec,
+ (nsec_t)ts.tv_nsec / 1000);
+
+ iovec[n++] = IOVEC_MAKE_STRING(tbuf);
+ }
+
+ /* Second: identifier and PID */
+ if (ucred) {
+ if (!identifier) {
+ (void) get_process_comm(ucred->pid, &ident_buf);
+ identifier = ident_buf;
+ }
+
+ xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
+
+ if (identifier)
+ iovec[n++] = IOVEC_MAKE_STRING(identifier);
+
+ iovec[n++] = IOVEC_MAKE_STRING(header_pid);
+ } else if (identifier) {
+ iovec[n++] = IOVEC_MAKE_STRING(identifier);
+ iovec[n++] = IOVEC_MAKE_STRING(": ");
+ }
+
+ /* Fourth: message */
+ iovec[n++] = IOVEC_MAKE_STRING(message);
+ iovec[n++] = IOVEC_MAKE_STRING("\n");
+
+ tty = s->tty_path ?: "/dev/console";
+
+ /* Before you ask: yes, on purpose we open/close the console for each log line we write individually. This is a
+ * good strategy to avoid journald getting killed by the kernel's SAK concept (it doesn't fix this entirely,
+ * but minimizes the time window the kernel might end up killing journald due to SAK). It also makes things
+ * easier for us so that we don't have to recover from hangups and suchlike triggered on the console. */
+
+ fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0) {
+ log_debug_errno(fd, "Failed to open %s for logging: %m", tty);
+ return;
+ }
+
+ if (writev(fd, iovec, n) < 0)
+ log_debug_errno(errno, "Failed to write to %s for logging: %m", tty);
+}
diff --git a/src/journal/journald-console.h b/src/journal/journald-console.h
new file mode 100644
index 0000000..0a26f9c
--- /dev/null
+++ b/src/journal/journald-console.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "journald-server.h"
+
+void server_forward_console(Server *s, int priority, const char *identifier, const char *message, const struct ucred *ucred);
diff --git a/src/journal/journald-context.c b/src/journal/journald-context.c
new file mode 100644
index 0000000..8736495
--- /dev/null
+++ b/src/journal/journald-context.c
@@ -0,0 +1,792 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_SELINUX
+#include <selinux/selinux.h>
+#endif
+
+#include "alloc-util.h"
+#include "audit-util.h"
+#include "cgroup-util.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "journal-util.h"
+#include "journald-context.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "procfs-util.h"
+#include "string-util.h"
+#include "syslog-util.h"
+#include "unaligned.h"
+#include "user-util.h"
+
+/* This implements a metadata cache for clients, which are identified by their PID. Requesting metadata through /proc
+ * is expensive, hence let's cache the data if we can. Note that this means the metadata might be out-of-date when we
+ * store it, but it might already be anyway, as we request the data asynchronously from /proc at a different time the
+ * log entry was originally created. We hence just increase the "window of inaccuracy" a bit.
+ *
+ * The cache is indexed by the PID. Entries may be "pinned" in the cache, in which case the entries are not removed
+ * until they are unpinned. Unpinned entries are kept around until cache pressure is seen. Cache entries older than 5s
+ * are never used (a sad attempt to deal with the UNIX weakness of PIDs reuse), cache entries older than 1s are
+ * refreshed in an incremental way (meaning: data is reread from /proc, but any old data we can't refresh is not
+ * flushed out). Data newer than 1s is used immediately without refresh.
+ *
+ * Log stream clients (i.e. all clients using the AF_UNIX/SOCK_STREAM stdout/stderr transport) will pin a cache entry
+ * as long as their socket is connected. Note that cache entries are shared between different transports. That means a
+ * cache entry pinned for the stream connection logic may be reused for the syslog or native protocols.
+ *
+ * Caching metadata like this has two major benefits:
+ *
+ * 1. Reading metadata is expensive, and we can thus substantially speed up log processing under flood.
+ *
+ * 2. Because metadata caching is shared between stream and datagram transports and stream connections pin a cache
+ * entry there's a good chance we can properly map a substantial set of datagram log messages to their originating
+ * service, as all services (unless explicitly configured otherwise) will have their stdout/stderr connected to a
+ * stream connection. This should improve cases where a service process logs immediately before exiting and we
+ * previously had trouble associating the log message with the service.
+ *
+ * NB: With and without the metadata cache: the implicitly added entry metadata in the journal (with the exception of
+ * UID/PID/GID and SELinux label) must be understood as possibly slightly out of sync (i.e. sometimes slightly older
+ * and sometimes slightly newer than what was current at the log event).
+ */
+
+/* We refresh every 1s */
+#define REFRESH_USEC (1*USEC_PER_SEC)
+
+/* Data older than 5s we flush out */
+#define MAX_USEC (5*USEC_PER_SEC)
+
+/* Keep at most 16K entries in the cache. (Note though that this limit may be violated if enough streams pin entries in
+ * the cache, in which case we *do* permit this limit to be breached. That's safe however, as the number of stream
+ * clients itself is limited.) */
+#define CACHE_MAX_FALLBACK 128U
+#define CACHE_MAX_MAX (16*1024U)
+#define CACHE_MAX_MIN 64U
+
+static size_t cache_max(void) {
+ static size_t cached = -1;
+
+ if (cached == (size_t) -1) {
+ uint64_t mem_total;
+ int r;
+
+ r = procfs_memory_get(&mem_total, NULL);
+ if (r < 0) {
+ log_warning_errno(r, "Cannot query /proc/meminfo for MemTotal: %m");
+ cached = CACHE_MAX_FALLBACK;
+ } else
+ /* Cache entries are usually a few kB, but the process cmdline is controlled by the
+ * user and can be up to _SC_ARG_MAX, usually 2MB. Let's say that approximately up to
+ * 1/8th of memory may be used by the cache.
+ *
+ * In the common case, this formula gives 64 cache entries for each GB of RAM.
+ */
+ cached = CLAMP(mem_total / 8 / sc_arg_max(), CACHE_MAX_MIN, CACHE_MAX_MAX);
+ }
+
+ return cached;
+}
+
+static int client_context_compare(const void *a, const void *b) {
+ const ClientContext *x = a, *y = b;
+ int r;
+
+ r = CMP(x->timestamp, y->timestamp);
+ if (r != 0)
+ return r;
+
+ return CMP(x->pid, y->pid);
+}
+
+static int client_context_new(Server *s, pid_t pid, ClientContext **ret) {
+ ClientContext *c;
+ int r;
+
+ assert(s);
+ assert(pid_is_valid(pid));
+ assert(ret);
+
+ r = hashmap_ensure_allocated(&s->client_contexts, NULL);
+ if (r < 0)
+ return r;
+
+ r = prioq_ensure_allocated(&s->client_contexts_lru, client_context_compare);
+ if (r < 0)
+ return r;
+
+ c = new(ClientContext, 1);
+ if (!c)
+ return -ENOMEM;
+
+ *c = (ClientContext) {
+ .pid = pid,
+ .uid = UID_INVALID,
+ .gid = GID_INVALID,
+ .auditid = AUDIT_SESSION_INVALID,
+ .loginuid = UID_INVALID,
+ .owner_uid = UID_INVALID,
+ .lru_index = PRIOQ_IDX_NULL,
+ .timestamp = USEC_INFINITY,
+ .extra_fields_mtime = NSEC_INFINITY,
+ .log_level_max = -1,
+ .log_ratelimit_interval = s->ratelimit_interval,
+ .log_ratelimit_burst = s->ratelimit_burst,
+ };
+
+ r = hashmap_put(s->client_contexts, PID_TO_PTR(pid), c);
+ if (r < 0) {
+ free(c);
+ return r;
+ }
+
+ *ret = c;
+ return 0;
+}
+
+static void client_context_reset(Server *s, ClientContext *c) {
+ assert(s);
+ assert(c);
+
+ c->timestamp = USEC_INFINITY;
+
+ c->uid = UID_INVALID;
+ c->gid = GID_INVALID;
+
+ c->comm = mfree(c->comm);
+ c->exe = mfree(c->exe);
+ c->cmdline = mfree(c->cmdline);
+ c->capeff = mfree(c->capeff);
+
+ c->auditid = AUDIT_SESSION_INVALID;
+ c->loginuid = UID_INVALID;
+
+ c->cgroup = mfree(c->cgroup);
+ c->session = mfree(c->session);
+ c->owner_uid = UID_INVALID;
+ c->unit = mfree(c->unit);
+ c->user_unit = mfree(c->user_unit);
+ c->slice = mfree(c->slice);
+ c->user_slice = mfree(c->user_slice);
+
+ c->invocation_id = SD_ID128_NULL;
+
+ c->label = mfree(c->label);
+ c->label_size = 0;
+
+ c->extra_fields_iovec = mfree(c->extra_fields_iovec);
+ c->extra_fields_n_iovec = 0;
+ c->extra_fields_data = mfree(c->extra_fields_data);
+ c->extra_fields_mtime = NSEC_INFINITY;
+
+ c->log_level_max = -1;
+
+ c->log_ratelimit_interval = s->ratelimit_interval;
+ c->log_ratelimit_burst = s->ratelimit_burst;
+}
+
+static ClientContext* client_context_free(Server *s, ClientContext *c) {
+ assert(s);
+
+ if (!c)
+ return NULL;
+
+ assert_se(hashmap_remove(s->client_contexts, PID_TO_PTR(c->pid)) == c);
+
+ if (c->in_lru)
+ assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0);
+
+ client_context_reset(s, c);
+
+ return mfree(c);
+}
+
+static void client_context_read_uid_gid(ClientContext *c, const struct ucred *ucred) {
+ assert(c);
+ assert(pid_is_valid(c->pid));
+
+ /* The ucred data passed in is always the most current and accurate, if we have any. Use it. */
+ if (ucred && uid_is_valid(ucred->uid))
+ c->uid = ucred->uid;
+ else
+ (void) get_process_uid(c->pid, &c->uid);
+
+ if (ucred && gid_is_valid(ucred->gid))
+ c->gid = ucred->gid;
+ else
+ (void) get_process_gid(c->pid, &c->gid);
+}
+
+static void client_context_read_basic(ClientContext *c) {
+ char *t;
+
+ assert(c);
+ assert(pid_is_valid(c->pid));
+
+ if (get_process_comm(c->pid, &t) >= 0)
+ free_and_replace(c->comm, t);
+
+ if (get_process_exe(c->pid, &t) >= 0)
+ free_and_replace(c->exe, t);
+
+ if (get_process_cmdline(c->pid, SIZE_MAX, 0, &t) >= 0)
+ free_and_replace(c->cmdline, t);
+
+ if (get_process_capeff(c->pid, &t) >= 0)
+ free_and_replace(c->capeff, t);
+}
+
+static int client_context_read_label(
+ ClientContext *c,
+ const char *label, size_t label_size) {
+
+ assert(c);
+ assert(pid_is_valid(c->pid));
+ assert(label_size == 0 || label);
+
+ if (label_size > 0) {
+ char *l;
+
+ /* If we got an SELinux label passed in it counts. */
+
+ l = newdup_suffix0(char, label, label_size);
+ if (!l)
+ return -ENOMEM;
+
+ free_and_replace(c->label, l);
+ c->label_size = label_size;
+ }
+#if HAVE_SELINUX
+ else {
+ char *con;
+
+ /* If we got no SELinux label passed in, let's try to acquire one */
+
+ if (getpidcon(c->pid, &con) >= 0) {
+ free_and_replace(c->label, con);
+ c->label_size = strlen(c->label);
+ }
+ }
+#endif
+
+ return 0;
+}
+
+static int client_context_read_cgroup(Server *s, ClientContext *c, const char *unit_id) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(c);
+
+ /* Try to acquire the current cgroup path */
+ r = cg_pid_get_path_shifted(c->pid, s->cgroup_root, &t);
+ if (r < 0 || empty_or_root(t)) {
+ /* We use the unit ID passed in as fallback if we have nothing cached yet and cg_pid_get_path_shifted()
+ * failed or process is running in a root cgroup. Zombie processes are automatically migrated to root cgroup
+ * on cgroup v1 and we want to be able to map log messages from them too. */
+ if (unit_id && !c->unit) {
+ c->unit = strdup(unit_id);
+ if (c->unit)
+ return 0;
+ }
+
+ return r;
+ }
+
+ /* Let's shortcut this if the cgroup path didn't change */
+ if (streq_ptr(c->cgroup, t))
+ return 0;
+
+ free_and_replace(c->cgroup, t);
+
+ (void) cg_path_get_session(c->cgroup, &t);
+ free_and_replace(c->session, t);
+
+ if (cg_path_get_owner_uid(c->cgroup, &c->owner_uid) < 0)
+ c->owner_uid = UID_INVALID;
+
+ (void) cg_path_get_unit(c->cgroup, &t);
+ free_and_replace(c->unit, t);
+
+ (void) cg_path_get_user_unit(c->cgroup, &t);
+ free_and_replace(c->user_unit, t);
+
+ (void) cg_path_get_slice(c->cgroup, &t);
+ free_and_replace(c->slice, t);
+
+ (void) cg_path_get_user_slice(c->cgroup, &t);
+ free_and_replace(c->user_slice, t);
+
+ return 0;
+}
+
+static int client_context_read_invocation_id(
+ Server *s,
+ ClientContext *c) {
+
+ _cleanup_free_ char *p = NULL, *value = NULL;
+ int r;
+
+ assert(s);
+ assert(c);
+
+ /* Read the invocation ID of a unit off a unit.
+ * PID 1 stores it in a per-unit symlink in /run/systemd/units/
+ * User managers store it in a per-unit symlink under /run/user/<uid>/systemd/units/ */
+
+ if (!c->unit)
+ return 0;
+
+ if (c->user_unit) {
+ r = asprintf(&p, "/run/user/" UID_FMT "/systemd/units/invocation:%s", c->owner_uid, c->user_unit);
+ if (r < 0)
+ return r;
+ } else {
+ p = strjoin("/run/systemd/units/invocation:", c->unit);
+ if (!p)
+ return -ENOMEM;
+ }
+
+ r = readlink_malloc(p, &value);
+ if (r < 0)
+ return r;
+
+ return sd_id128_from_string(value, &c->invocation_id);
+}
+
+static int client_context_read_log_level_max(
+ Server *s,
+ ClientContext *c) {
+
+ _cleanup_free_ char *value = NULL;
+ const char *p;
+ int r, ll;
+
+ if (!c->unit)
+ return 0;
+
+ p = strjoina("/run/systemd/units/log-level-max:", c->unit);
+ r = readlink_malloc(p, &value);
+ if (r < 0)
+ return r;
+
+ ll = log_level_from_string(value);
+ if (ll < 0)
+ return -EINVAL;
+
+ c->log_level_max = ll;
+ return 0;
+}
+
+static int client_context_read_extra_fields(
+ Server *s,
+ ClientContext *c) {
+
+ size_t size = 0, n_iovec = 0, n_allocated = 0, left;
+ _cleanup_free_ struct iovec *iovec = NULL;
+ _cleanup_free_ void *data = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ struct stat st;
+ const char *p;
+ uint8_t *q;
+ int r;
+
+ if (!c->unit)
+ return 0;
+
+ p = strjoina("/run/systemd/units/log-extra-fields:", c->unit);
+
+ if (c->extra_fields_mtime != NSEC_INFINITY) {
+ if (stat(p, &st) < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+
+ if (timespec_load_nsec(&st.st_mtim) == c->extra_fields_mtime)
+ return 0;
+ }
+
+ f = fopen(p, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+
+ if (fstat(fileno(f), &st) < 0) /* The file might have been replaced since the stat() above, let's get a new
+ * one, that matches the stuff we are reading */
+ return -errno;
+
+ r = read_full_stream(f, (char**) &data, &size);
+ if (r < 0)
+ return r;
+
+ q = data, left = size;
+ while (left > 0) {
+ uint8_t *field, *eq;
+ uint64_t v, n;
+
+ if (left < sizeof(uint64_t))
+ return -EBADMSG;
+
+ v = unaligned_read_le64(q);
+ if (v < 2)
+ return -EBADMSG;
+
+ n = sizeof(uint64_t) + v;
+ if (left < n)
+ return -EBADMSG;
+
+ field = q + sizeof(uint64_t);
+
+ eq = memchr(field, '=', v);
+ if (!eq)
+ return -EBADMSG;
+
+ if (!journal_field_valid((const char *) field, eq - field, false))
+ return -EBADMSG;
+
+ if (!GREEDY_REALLOC(iovec, n_allocated, n_iovec+1))
+ return -ENOMEM;
+
+ iovec[n_iovec++] = IOVEC_MAKE(field, v);
+
+ left -= n, q += n;
+ }
+
+ free(c->extra_fields_iovec);
+ free(c->extra_fields_data);
+
+ c->extra_fields_iovec = TAKE_PTR(iovec);
+ c->extra_fields_n_iovec = n_iovec;
+ c->extra_fields_data = TAKE_PTR(data);
+ c->extra_fields_mtime = timespec_load_nsec(&st.st_mtim);
+
+ return 0;
+}
+
+static int client_context_read_log_ratelimit_interval(ClientContext *c) {
+ _cleanup_free_ char *value = NULL;
+ const char *p;
+ int r;
+
+ assert(c);
+
+ if (!c->unit)
+ return 0;
+
+ p = strjoina("/run/systemd/units/log-rate-limit-interval:", c->unit);
+ r = readlink_malloc(p, &value);
+ if (r < 0)
+ return r;
+
+ return safe_atou64(value, &c->log_ratelimit_interval);
+}
+
+static int client_context_read_log_ratelimit_burst(ClientContext *c) {
+ _cleanup_free_ char *value = NULL;
+ const char *p;
+ int r;
+
+ assert(c);
+
+ if (!c->unit)
+ return 0;
+
+ p = strjoina("/run/systemd/units/log-rate-limit-burst:", c->unit);
+ r = readlink_malloc(p, &value);
+ if (r < 0)
+ return r;
+
+ return safe_atou(value, &c->log_ratelimit_burst);
+}
+
+static void client_context_really_refresh(
+ Server *s,
+ ClientContext *c,
+ const struct ucred *ucred,
+ const char *label, size_t label_size,
+ const char *unit_id,
+ usec_t timestamp) {
+
+ assert(s);
+ assert(c);
+ assert(pid_is_valid(c->pid));
+
+ if (timestamp == USEC_INFINITY)
+ timestamp = now(CLOCK_MONOTONIC);
+
+ client_context_read_uid_gid(c, ucred);
+ client_context_read_basic(c);
+ (void) client_context_read_label(c, label, label_size);
+
+ (void) audit_session_from_pid(c->pid, &c->auditid);
+ (void) audit_loginuid_from_pid(c->pid, &c->loginuid);
+
+ (void) client_context_read_cgroup(s, c, unit_id);
+ (void) client_context_read_invocation_id(s, c);
+ (void) client_context_read_log_level_max(s, c);
+ (void) client_context_read_extra_fields(s, c);
+ (void) client_context_read_log_ratelimit_interval(c);
+ (void) client_context_read_log_ratelimit_burst(c);
+
+ c->timestamp = timestamp;
+
+ if (c->in_lru) {
+ assert(c->n_ref == 0);
+ assert_se(prioq_reshuffle(s->client_contexts_lru, c, &c->lru_index) >= 0);
+ }
+}
+
+void client_context_maybe_refresh(
+ Server *s,
+ ClientContext *c,
+ const struct ucred *ucred,
+ const char *label, size_t label_size,
+ const char *unit_id,
+ usec_t timestamp) {
+
+ assert(s);
+ assert(c);
+
+ if (timestamp == USEC_INFINITY)
+ timestamp = now(CLOCK_MONOTONIC);
+
+ /* No cached data so far? Let's fill it up */
+ if (c->timestamp == USEC_INFINITY)
+ goto refresh;
+
+ /* If the data isn't pinned and if the cashed data is older than the upper limit, we flush it out
+ * entirely. This follows the logic that as long as an entry is pinned the PID reuse is unlikely. */
+ if (c->n_ref == 0 && c->timestamp + MAX_USEC < timestamp) {
+ client_context_reset(s, c);
+ goto refresh;
+ }
+
+ /* If the data is older than the lower limit, we refresh, but keep the old data for all we can't update */
+ if (c->timestamp + REFRESH_USEC < timestamp)
+ goto refresh;
+
+ /* If the data passed along doesn't match the cached data we also do a refresh */
+ if (ucred && uid_is_valid(ucred->uid) && c->uid != ucred->uid)
+ goto refresh;
+
+ if (ucred && gid_is_valid(ucred->gid) && c->gid != ucred->gid)
+ goto refresh;
+
+ if (label_size > 0 && (label_size != c->label_size || memcmp(label, c->label, label_size) != 0))
+ goto refresh;
+
+ return;
+
+refresh:
+ client_context_really_refresh(s, c, ucred, label, label_size, unit_id, timestamp);
+}
+
+static void client_context_try_shrink_to(Server *s, size_t limit) {
+ ClientContext *c;
+ usec_t t;
+
+ assert(s);
+
+ /* Flush any cache entries for PIDs that have already moved on. Don't do this
+ * too often, since it's a slow process. */
+ t = now(CLOCK_MONOTONIC);
+ if (s->last_cache_pid_flush + MAX_USEC < t) {
+ unsigned n = prioq_size(s->client_contexts_lru), idx = 0;
+
+ /* We do a number of iterations based on the initial size of the prioq. When we remove an
+ * item, a new item is moved into its places, and items to the right might be reshuffled.
+ */
+ for (unsigned i = 0; i < n; i++) {
+ c = prioq_peek_by_index(s->client_contexts_lru, idx);
+
+ assert(c->n_ref == 0);
+
+ if (!pid_is_unwaited(c->pid))
+ client_context_free(s, c);
+ else
+ idx ++;
+ }
+
+ s->last_cache_pid_flush = t;
+ }
+
+ /* Bring the number of cache entries below the indicated limit, so that we can create a new entry without
+ * breaching the limit. Note that we only flush out entries that aren't pinned here. This means the number of
+ * cache entries may very well grow beyond the limit, if all entries stored remain pinned. */
+
+ while (hashmap_size(s->client_contexts) > limit) {
+ c = prioq_pop(s->client_contexts_lru);
+ if (!c)
+ break; /* All remaining entries are pinned, give up */
+
+ assert(c->in_lru);
+ assert(c->n_ref == 0);
+
+ c->in_lru = false;
+
+ client_context_free(s, c);
+ }
+}
+
+void client_context_flush_all(Server *s) {
+ assert(s);
+
+ /* Flush out all remaining entries. This assumes all references are already dropped. */
+
+ s->my_context = client_context_release(s, s->my_context);
+ s->pid1_context = client_context_release(s, s->pid1_context);
+
+ client_context_try_shrink_to(s, 0);
+
+ assert(prioq_size(s->client_contexts_lru) == 0);
+ assert(hashmap_size(s->client_contexts) == 0);
+
+ s->client_contexts_lru = prioq_free(s->client_contexts_lru);
+ s->client_contexts = hashmap_free(s->client_contexts);
+}
+
+static int client_context_get_internal(
+ Server *s,
+ pid_t pid,
+ const struct ucred *ucred,
+ const char *label, size_t label_len,
+ const char *unit_id,
+ bool add_ref,
+ ClientContext **ret) {
+
+ ClientContext *c;
+ int r;
+
+ assert(s);
+ assert(ret);
+
+ if (!pid_is_valid(pid))
+ return -EINVAL;
+
+ c = hashmap_get(s->client_contexts, PID_TO_PTR(pid));
+ if (c) {
+
+ if (add_ref) {
+ if (c->in_lru) {
+ /* The entry wasn't pinned so far, let's remove it from the LRU list then */
+ assert(c->n_ref == 0);
+ assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0);
+ c->in_lru = false;
+ }
+
+ c->n_ref++;
+ }
+
+ client_context_maybe_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY);
+
+ *ret = c;
+ return 0;
+ }
+
+ client_context_try_shrink_to(s, cache_max()-1);
+
+ r = client_context_new(s, pid, &c);
+ if (r < 0)
+ return r;
+
+ if (add_ref)
+ c->n_ref++;
+ else {
+ r = prioq_put(s->client_contexts_lru, c, &c->lru_index);
+ if (r < 0) {
+ client_context_free(s, c);
+ return r;
+ }
+
+ c->in_lru = true;
+ }
+
+ client_context_really_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY);
+
+ *ret = c;
+ return 0;
+}
+
+int client_context_get(
+ Server *s,
+ pid_t pid,
+ const struct ucred *ucred,
+ const char *label, size_t label_len,
+ const char *unit_id,
+ ClientContext **ret) {
+
+ return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, false, ret);
+}
+
+int client_context_acquire(
+ Server *s,
+ pid_t pid,
+ const struct ucred *ucred,
+ const char *label, size_t label_len,
+ const char *unit_id,
+ ClientContext **ret) {
+
+ return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, true, ret);
+};
+
+ClientContext *client_context_release(Server *s, ClientContext *c) {
+ assert(s);
+
+ if (!c)
+ return NULL;
+
+ assert(c->n_ref > 0);
+ assert(!c->in_lru);
+
+ c->n_ref--;
+ if (c->n_ref > 0)
+ return NULL;
+
+ /* The entry is not pinned anymore, let's add it to the LRU prioq if we can. If we can't we'll drop it
+ * right-away */
+
+ if (prioq_put(s->client_contexts_lru, c, &c->lru_index) < 0)
+ client_context_free(s, c);
+ else
+ c->in_lru = true;
+
+ return NULL;
+}
+
+void client_context_acquire_default(Server *s) {
+ int r;
+
+ assert(s);
+
+ /* Ensure that our own and PID1's contexts are always pinned. Our own context is particularly useful to
+ * generate driver messages. */
+
+ if (!s->my_context) {
+ struct ucred ucred = {
+ .pid = getpid_cached(),
+ .uid = getuid(),
+ .gid = getgid(),
+ };
+
+ r = client_context_acquire(s, ucred.pid, &ucred, NULL, 0, NULL, &s->my_context);
+ if (r < 0)
+ log_warning_errno(r, "Failed to acquire our own context, ignoring: %m");
+ }
+
+ if (!s->namespace && !s->pid1_context) {
+ /* Acquire PID1's context, but only if we are in non-namespaced mode, since PID 1 is only
+ * going to log to the non-namespaced journal instance. */
+
+ r = client_context_acquire(s, 1, NULL, NULL, 0, NULL, &s->pid1_context);
+ if (r < 0)
+ log_warning_errno(r, "Failed to acquire PID1's context, ignoring: %m");
+
+ }
+}
diff --git a/src/journal/journald-context.h b/src/journal/journald-context.h
new file mode 100644
index 0000000..9bf74b2
--- /dev/null
+++ b/src/journal/journald-context.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "sd-id128.h"
+
+#include "time-util.h"
+
+typedef struct ClientContext ClientContext;
+
+#include "journald-server.h"
+
+struct ClientContext {
+ unsigned n_ref;
+ unsigned lru_index;
+ usec_t timestamp;
+ bool in_lru;
+
+ pid_t pid;
+ uid_t uid;
+ gid_t gid;
+
+ char *comm;
+ char *exe;
+ char *cmdline;
+ char *capeff;
+
+ uint32_t auditid;
+ uid_t loginuid;
+
+ char *cgroup;
+ char *session;
+ uid_t owner_uid;
+
+ char *unit;
+ char *user_unit;
+
+ char *slice;
+ char *user_slice;
+
+ sd_id128_t invocation_id;
+
+ char *label;
+ size_t label_size;
+
+ int log_level_max;
+
+ struct iovec *extra_fields_iovec;
+ size_t extra_fields_n_iovec;
+ void *extra_fields_data;
+ nsec_t extra_fields_mtime;
+
+ usec_t log_ratelimit_interval;
+ unsigned log_ratelimit_burst;
+};
+
+int client_context_get(
+ Server *s,
+ pid_t pid,
+ const struct ucred *ucred,
+ const char *label, size_t label_len,
+ const char *unit_id,
+ ClientContext **ret);
+
+int client_context_acquire(
+ Server *s,
+ pid_t pid,
+ const struct ucred *ucred,
+ const char *label, size_t label_len,
+ const char *unit_id,
+ ClientContext **ret);
+
+ClientContext* client_context_release(Server *s, ClientContext *c);
+
+void client_context_maybe_refresh(
+ Server *s,
+ ClientContext *c,
+ const struct ucred *ucred,
+ const char *label, size_t label_size,
+ const char *unit_id,
+ usec_t tstamp);
+
+void client_context_acquire_default(Server *s);
+void client_context_flush_all(Server *s);
+
+static inline size_t client_context_extra_fields_n_iovec(const ClientContext *c) {
+ return c ? c->extra_fields_n_iovec : 0;
+}
+
+static inline bool client_context_test_priority(const ClientContext *c, int priority) {
+ if (!c)
+ return true;
+
+ if (c->log_level_max < 0)
+ return true;
+
+ return LOG_PRI(priority) <= c->log_level_max;
+}
diff --git a/src/journal/journald-gperf.gperf b/src/journal/journald-gperf.gperf
new file mode 100644
index 0000000..c70ac9a
--- /dev/null
+++ b/src/journal/journald-gperf.gperf
@@ -0,0 +1,52 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include <sys/socket.h>
+#include "conf-parser.h"
+#include "journald-server.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name journald_gperf_hash
+%define lookup-function-name journald_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Journal.Storage, config_parse_storage, 0, offsetof(Server, storage)
+Journal.Compress, config_parse_compress, 0, offsetof(Server, compress)
+Journal.Seal, config_parse_bool, 0, offsetof(Server, seal)
+Journal.ReadKMsg, config_parse_bool, 0, offsetof(Server, read_kmsg)
+Journal.Audit, config_parse_tristate, 0, offsetof(Server, set_audit)
+Journal.SyncIntervalSec, config_parse_sec, 0, offsetof(Server, sync_interval_usec)
+# The following is a legacy name for compatibility
+Journal.RateLimitInterval, config_parse_sec, 0, offsetof(Server, ratelimit_interval)
+Journal.RateLimitIntervalSec,config_parse_sec, 0, offsetof(Server, ratelimit_interval)
+Journal.RateLimitBurst, config_parse_unsigned, 0, offsetof(Server, ratelimit_burst)
+Journal.SystemMaxUse, config_parse_iec_uint64, 0, offsetof(Server, system_storage.metrics.max_use)
+Journal.SystemMaxFileSize, config_parse_iec_uint64, 0, offsetof(Server, system_storage.metrics.max_size)
+Journal.SystemKeepFree, config_parse_iec_uint64, 0, offsetof(Server, system_storage.metrics.keep_free)
+Journal.SystemMaxFiles, config_parse_uint64, 0, offsetof(Server, system_storage.metrics.n_max_files)
+Journal.RuntimeMaxUse, config_parse_iec_uint64, 0, offsetof(Server, runtime_storage.metrics.max_use)
+Journal.RuntimeMaxFileSize, config_parse_iec_uint64, 0, offsetof(Server, runtime_storage.metrics.max_size)
+Journal.RuntimeKeepFree, config_parse_iec_uint64, 0, offsetof(Server, runtime_storage.metrics.keep_free)
+Journal.RuntimeMaxFiles, config_parse_uint64, 0, offsetof(Server, runtime_storage.metrics.n_max_files)
+Journal.MaxRetentionSec, config_parse_sec, 0, offsetof(Server, max_retention_usec)
+Journal.MaxFileSec, config_parse_sec, 0, offsetof(Server, max_file_usec)
+Journal.ForwardToSyslog, config_parse_bool, 0, offsetof(Server, forward_to_syslog)
+Journal.ForwardToKMsg, config_parse_bool, 0, offsetof(Server, forward_to_kmsg)
+Journal.ForwardToConsole, config_parse_bool, 0, offsetof(Server, forward_to_console)
+Journal.ForwardToWall, config_parse_bool, 0, offsetof(Server, forward_to_wall)
+Journal.TTYPath, config_parse_path, 0, offsetof(Server, tty_path)
+Journal.MaxLevelStore, config_parse_log_level, 0, offsetof(Server, max_level_store)
+Journal.MaxLevelSyslog, config_parse_log_level, 0, offsetof(Server, max_level_syslog)
+Journal.MaxLevelKMsg, config_parse_log_level, 0, offsetof(Server, max_level_kmsg)
+Journal.MaxLevelConsole, config_parse_log_level, 0, offsetof(Server, max_level_console)
+Journal.MaxLevelWall, config_parse_log_level, 0, offsetof(Server, max_level_wall)
+Journal.SplitMode, config_parse_split_mode, 0, offsetof(Server, split_mode)
+Journal.LineMax, config_parse_line_max, 0, offsetof(Server, line_max)
diff --git a/src/journal/journald-kmsg.c b/src/journal/journald-kmsg.c
new file mode 100644
index 0000000..e7255b0
--- /dev/null
+++ b/src/journal/journald-kmsg.c
@@ -0,0 +1,454 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/epoll.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "device-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "journald-kmsg.h"
+#include "journald-server.h"
+#include "journald-syslog.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+void server_forward_kmsg(
+ Server *s,
+ int priority,
+ const char *identifier,
+ const char *message,
+ const struct ucred *ucred) {
+
+ _cleanup_free_ char *ident_buf = NULL;
+ struct iovec iovec[5];
+ char header_priority[DECIMAL_STR_MAX(priority) + 3],
+ header_pid[STRLEN("[]: ") + DECIMAL_STR_MAX(pid_t) + 1];
+ int n = 0;
+
+ assert(s);
+ assert(priority >= 0);
+ assert(priority <= 999);
+ assert(message);
+
+ if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg))
+ return;
+
+ if (_unlikely_(s->dev_kmsg_fd < 0))
+ return;
+
+ /* Never allow messages with kernel facility to be written to
+ * kmsg, regardless where the data comes from. */
+ priority = syslog_fixup_facility(priority);
+
+ /* First: priority field */
+ xsprintf(header_priority, "<%i>", priority);
+ iovec[n++] = IOVEC_MAKE_STRING(header_priority);
+
+ /* Second: identifier and PID */
+ if (ucred) {
+ if (!identifier) {
+ (void) get_process_comm(ucred->pid, &ident_buf);
+ identifier = ident_buf;
+ }
+
+ xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
+
+ if (identifier)
+ iovec[n++] = IOVEC_MAKE_STRING(identifier);
+
+ iovec[n++] = IOVEC_MAKE_STRING(header_pid);
+ } else if (identifier) {
+ iovec[n++] = IOVEC_MAKE_STRING(identifier);
+ iovec[n++] = IOVEC_MAKE_STRING(": ");
+ }
+
+ /* Fourth: message */
+ iovec[n++] = IOVEC_MAKE_STRING(message);
+ iovec[n++] = IOVEC_MAKE_STRING("\n");
+
+ if (writev(s->dev_kmsg_fd, iovec, n) < 0)
+ log_debug_errno(errno, "Failed to write to /dev/kmsg for logging: %m");
+}
+
+static bool is_us(const char *identifier, const char *pid) {
+ pid_t pid_num;
+
+ if (!identifier || !pid)
+ return false;
+
+ if (parse_pid(pid, &pid_num) < 0)
+ return false;
+
+ return pid_num == getpid_cached() &&
+ streq(identifier, program_invocation_short_name);
+}
+
+void dev_kmsg_record(Server *s, char *p, size_t l) {
+
+ _cleanup_free_ char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL, *identifier = NULL, *pid = NULL;
+ struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS + 2 + N_IOVEC_UDEV_FIELDS];
+ char *kernel_device = NULL;
+ unsigned long long usec;
+ size_t n = 0, z = 0, j;
+ int priority, r;
+ char *e, *f, *k;
+ uint64_t serial;
+ size_t pl;
+
+ assert(s);
+ assert(p);
+
+ if (l <= 0)
+ return;
+
+ e = memchr(p, ',', l);
+ if (!e)
+ return;
+ *e = 0;
+
+ r = safe_atoi(p, &priority);
+ if (r < 0 || priority < 0 || priority > 999)
+ return;
+
+ if (s->forward_to_kmsg && LOG_FAC(priority) != LOG_KERN)
+ return;
+
+ l -= (e - p) + 1;
+ p = e + 1;
+ e = memchr(p, ',', l);
+ if (!e)
+ return;
+ *e = 0;
+
+ r = safe_atou64(p, &serial);
+ if (r < 0)
+ return;
+
+ if (s->kernel_seqnum) {
+ /* We already read this one? */
+ if (serial < *s->kernel_seqnum)
+ return;
+
+ /* Did we lose any? */
+ if (serial > *s->kernel_seqnum)
+ server_driver_message(s, 0,
+ "MESSAGE_ID=" SD_MESSAGE_JOURNAL_MISSED_STR,
+ LOG_MESSAGE("Missed %"PRIu64" kernel messages",
+ serial - *s->kernel_seqnum),
+ NULL);
+
+ /* Make sure we never read this one again. Note that
+ * we always store the next message serial we expect
+ * here, simply because this makes handling the first
+ * message with serial 0 easy. */
+ *s->kernel_seqnum = serial + 1;
+ }
+
+ l -= (e - p) + 1;
+ p = e + 1;
+ f = memchr(p, ';', l);
+ if (!f)
+ return;
+ /* Kernel 3.6 has the flags field, kernel 3.5 lacks that */
+ e = memchr(p, ',', l);
+ if (!e || f < e)
+ e = f;
+ *e = 0;
+
+ r = safe_atollu(p, &usec);
+ if (r < 0)
+ return;
+
+ l -= (f - p) + 1;
+ p = f + 1;
+ e = memchr(p, '\n', l);
+ if (!e)
+ return;
+ *e = 0;
+
+ pl = e - p;
+ l -= (e - p) + 1;
+ k = e + 1;
+
+ for (j = 0; l > 0 && j < N_IOVEC_KERNEL_FIELDS; j++) {
+ char *m;
+ /* Metadata fields attached */
+
+ if (*k != ' ')
+ break;
+
+ k++, l--;
+
+ e = memchr(k, '\n', l);
+ if (!e)
+ goto finish;
+
+ *e = 0;
+
+ if (cunescape_length_with_prefix(k, e - k, "_KERNEL_", UNESCAPE_RELAX, &m) < 0)
+ break;
+
+ if (startswith(m, "_KERNEL_DEVICE="))
+ kernel_device = m + 15;
+
+ iovec[n++] = IOVEC_MAKE_STRING(m);
+ z++;
+
+ l -= (e - k) + 1;
+ k = e + 1;
+ }
+
+ if (kernel_device) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+
+ if (sd_device_new_from_device_id(&d, kernel_device) >= 0) {
+ const char *g;
+ char *b;
+
+ if (sd_device_get_devname(d, &g) >= 0) {
+ b = strjoin("_UDEV_DEVNODE=", g);
+ if (b) {
+ iovec[n++] = IOVEC_MAKE_STRING(b);
+ z++;
+ }
+ }
+
+ if (sd_device_get_sysname(d, &g) >= 0) {
+ b = strjoin("_UDEV_SYSNAME=", g);
+ if (b) {
+ iovec[n++] = IOVEC_MAKE_STRING(b);
+ z++;
+ }
+ }
+
+ j = 0;
+ FOREACH_DEVICE_DEVLINK(d, g) {
+
+ if (j >= N_IOVEC_UDEV_FIELDS)
+ break;
+
+ b = strjoin("_UDEV_DEVLINK=", g);
+ if (b) {
+ iovec[n++] = IOVEC_MAKE_STRING(b);
+ z++;
+ }
+
+ j++;
+ }
+ }
+ }
+
+ if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu", usec) >= 0)
+ iovec[n++] = IOVEC_MAKE_STRING(source_time);
+
+ iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=kernel");
+
+ if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);
+
+ if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
+
+ if (LOG_FAC(priority) == LOG_KERN)
+ iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=kernel");
+ else {
+ pl -= syslog_parse_identifier((const char**) &p, &identifier, &pid);
+
+ /* Avoid any messages we generated ourselves via
+ * log_info() and friends. */
+ if (is_us(identifier, pid))
+ goto finish;
+
+ if (identifier) {
+ syslog_identifier = strjoin("SYSLOG_IDENTIFIER=", identifier);
+ if (syslog_identifier)
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
+ }
+
+ if (pid) {
+ syslog_pid = strjoin("SYSLOG_PID=", pid);
+ if (syslog_pid)
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_pid);
+ }
+ }
+
+ if (cunescape_length_with_prefix(p, pl, "MESSAGE=", UNESCAPE_RELAX, &message) >= 0)
+ iovec[n++] = IOVEC_MAKE_STRING(message);
+
+ server_dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, priority, 0);
+
+finish:
+ for (j = 0; j < z; j++)
+ free(iovec[j].iov_base);
+}
+
+static int server_read_dev_kmsg(Server *s) {
+ char buffer[8192+1]; /* the kernel-side limit per record is 8K currently */
+ ssize_t l;
+
+ assert(s);
+ assert(s->dev_kmsg_fd >= 0);
+
+ l = read(s->dev_kmsg_fd, buffer, sizeof(buffer) - 1);
+ if (l == 0)
+ return 0;
+ if (l < 0) {
+ /* Old kernels who don't allow reading from /dev/kmsg
+ * return EINVAL when we try. So handle this cleanly,
+ * but don' try to ever read from it again. */
+ if (errno == EINVAL) {
+ s->dev_kmsg_event_source = sd_event_source_unref(s->dev_kmsg_event_source);
+ return 0;
+ }
+
+ if (IN_SET(errno, EAGAIN, EINTR, EPIPE))
+ return 0;
+
+ return log_error_errno(errno, "Failed to read from /dev/kmsg: %m");
+ }
+
+ dev_kmsg_record(s, buffer, l);
+ return 1;
+}
+
+int server_flush_dev_kmsg(Server *s) {
+ int r;
+
+ assert(s);
+
+ if (s->dev_kmsg_fd < 0)
+ return 0;
+
+ if (!s->dev_kmsg_readable)
+ return 0;
+
+ log_debug("Flushing /dev/kmsg...");
+
+ for (;;) {
+ r = server_read_dev_kmsg(s);
+ if (r < 0)
+ return r;
+
+ if (r == 0)
+ break;
+ }
+
+ return 0;
+}
+
+static int dispatch_dev_kmsg(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ Server *s = userdata;
+
+ assert(es);
+ assert(fd == s->dev_kmsg_fd);
+ assert(s);
+
+ if (revents & EPOLLERR)
+ log_warning("/dev/kmsg buffer overrun, some messages lost.");
+
+ if (!(revents & EPOLLIN))
+ log_error("Got invalid event from epoll for /dev/kmsg: %"PRIx32, revents);
+
+ return server_read_dev_kmsg(s);
+}
+
+int server_open_dev_kmsg(Server *s) {
+ mode_t mode;
+ int r;
+
+ assert(s);
+
+ if (s->read_kmsg)
+ mode = O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY;
+ else
+ mode = O_WRONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY;
+
+ s->dev_kmsg_fd = open("/dev/kmsg", mode);
+ if (s->dev_kmsg_fd < 0) {
+ log_full(errno == ENOENT ? LOG_DEBUG : LOG_WARNING,
+ "Failed to open /dev/kmsg, ignoring: %m");
+ return 0;
+ }
+
+ if (!s->read_kmsg)
+ return 0;
+
+ r = sd_event_add_io(s->event, &s->dev_kmsg_event_source, s->dev_kmsg_fd, EPOLLIN, dispatch_dev_kmsg, s);
+ if (r < 0) {
+
+ /* This will fail with EPERM on older kernels where
+ * /dev/kmsg is not readable. */
+ if (r == -EPERM) {
+ r = 0;
+ goto fail;
+ }
+
+ log_error_errno(r, "Failed to add /dev/kmsg fd to event loop: %m");
+ goto fail;
+ }
+
+ r = sd_event_source_set_priority(s->dev_kmsg_event_source, SD_EVENT_PRIORITY_IMPORTANT+10);
+ if (r < 0) {
+ log_error_errno(r, "Failed to adjust priority of kmsg event source: %m");
+ goto fail;
+ }
+
+ s->dev_kmsg_readable = true;
+
+ return 0;
+
+fail:
+ s->dev_kmsg_event_source = sd_event_source_unref(s->dev_kmsg_event_source);
+ s->dev_kmsg_fd = safe_close(s->dev_kmsg_fd);
+
+ return r;
+}
+
+int server_open_kernel_seqnum(Server *s) {
+ _cleanup_close_ int fd = -1;
+ const char *fn;
+ uint64_t *p;
+ int r;
+
+ assert(s);
+
+ /* We store the seqnum we last read in an mmapped file. That way we can just use it like a variable,
+ * but it is persistent and automatically flushed at reboot. */
+
+ if (!s->read_kmsg)
+ return 0;
+
+ fn = strjoina(s->runtime_directory, "/kernel-seqnum");
+ fd = open(fn, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0644);
+ if (fd < 0) {
+ log_error_errno(errno, "Failed to open %s, ignoring: %m", fn);
+ return 0;
+ }
+
+ r = posix_fallocate(fd, 0, sizeof(uint64_t));
+ if (r != 0) {
+ log_error_errno(r, "Failed to allocate sequential number file, ignoring: %m");
+ return 0;
+ }
+
+ p = mmap(NULL, sizeof(uint64_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ if (p == MAP_FAILED) {
+ log_error_errno(errno, "Failed to map sequential number file, ignoring: %m");
+ return 0;
+ }
+
+ s->kernel_seqnum = p;
+
+ return 0;
+}
diff --git a/src/journal/journald-kmsg.h b/src/journal/journald-kmsg.h
new file mode 100644
index 0000000..bd288c5
--- /dev/null
+++ b/src/journal/journald-kmsg.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "journald-server.h"
+
+int server_open_dev_kmsg(Server *s);
+int server_flush_dev_kmsg(Server *s);
+
+void server_forward_kmsg(Server *s, int priority, const char *identifier, const char *message, const struct ucred *ucred);
+
+int server_open_kernel_seqnum(Server *s);
+
+void dev_kmsg_record(Server *s, char *p, size_t l);
diff --git a/src/journal/journald-native.c b/src/journal/journald-native.c
new file mode 100644
index 0000000..1c5849e
--- /dev/null
+++ b/src/journal/journald-native.c
@@ -0,0 +1,505 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stddef.h>
+#include <sys/epoll.h>
+#include <sys/mman.h>
+#include <sys/statvfs.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "journal-importer.h"
+#include "journal-util.h"
+#include "journald-console.h"
+#include "journald-kmsg.h"
+#include "journald-native.h"
+#include "journald-server.h"
+#include "journald-syslog.h"
+#include "journald-wall.h"
+#include "memfd-util.h"
+#include "memory-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unaligned.h"
+
+static bool allow_object_pid(const struct ucred *ucred) {
+ return ucred && ucred->uid == 0;
+}
+
+static void server_process_entry_meta(
+ const char *p, size_t l,
+ const struct ucred *ucred,
+ int *priority,
+ char **identifier,
+ char **message,
+ pid_t *object_pid) {
+
+ /* We need to determine the priority of this entry for the rate limiting logic */
+
+ if (l == 10 &&
+ startswith(p, "PRIORITY=") &&
+ p[9] >= '0' && p[9] <= '9')
+ *priority = (*priority & LOG_FACMASK) | (p[9] - '0');
+
+ else if (l == 17 &&
+ startswith(p, "SYSLOG_FACILITY=") &&
+ p[16] >= '0' && p[16] <= '9')
+ *priority = (*priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
+
+ else if (l == 18 &&
+ startswith(p, "SYSLOG_FACILITY=") &&
+ p[16] >= '0' && p[16] <= '9' &&
+ p[17] >= '0' && p[17] <= '9')
+ *priority = (*priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
+
+ else if (l >= 19 &&
+ startswith(p, "SYSLOG_IDENTIFIER=")) {
+ char *t;
+
+ t = memdup_suffix0(p + 18, l - 18);
+ if (t) {
+ free(*identifier);
+ *identifier = t;
+ }
+
+ } else if (l >= 8 &&
+ startswith(p, "MESSAGE=")) {
+ char *t;
+
+ t = memdup_suffix0(p + 8, l - 8);
+ if (t) {
+ free(*message);
+ *message = t;
+ }
+
+ } else if (l > STRLEN("OBJECT_PID=") &&
+ l < STRLEN("OBJECT_PID=") + DECIMAL_STR_MAX(pid_t) &&
+ startswith(p, "OBJECT_PID=") &&
+ allow_object_pid(ucred)) {
+ char buf[DECIMAL_STR_MAX(pid_t)];
+ memcpy(buf, p + STRLEN("OBJECT_PID="),
+ l - STRLEN("OBJECT_PID="));
+ buf[l-STRLEN("OBJECT_PID=")] = '\0';
+
+ (void) parse_pid(buf, object_pid);
+ }
+}
+
+static int server_process_entry(
+ Server *s,
+ const void *buffer, size_t *remaining,
+ ClientContext *context,
+ const struct ucred *ucred,
+ const struct timeval *tv,
+ const char *label, size_t label_len) {
+
+ /* Process a single entry from a native message. Returns 0 if nothing special happened and the message
+ * processing should continue, and a negative or positive value otherwise.
+ *
+ * Note that *remaining is altered on both success and failure. */
+
+ size_t n = 0, j, tn = (size_t) -1, m = 0, entry_size = 0;
+ char *identifier = NULL, *message = NULL;
+ struct iovec *iovec = NULL;
+ int priority = LOG_INFO;
+ pid_t object_pid = 0;
+ const char *p;
+ int r = 1;
+
+ p = buffer;
+
+ while (*remaining > 0) {
+ const char *e, *q;
+
+ e = memchr(p, '\n', *remaining);
+
+ if (!e) {
+ /* Trailing noise, let's ignore it, and flush what we collected */
+ log_debug("Received message with trailing noise, ignoring.");
+ break; /* finish processing of the message */
+ }
+
+ if (e == p) {
+ /* Entry separator */
+ *remaining -= 1;
+ break;
+ }
+
+ if (IN_SET(*p, '.', '#')) {
+ /* Ignore control commands for now, and comments too. */
+ *remaining -= (e - p) + 1;
+ p = e + 1;
+ continue;
+ }
+
+ /* A property follows */
+ if (n > ENTRY_FIELD_COUNT_MAX) {
+ log_debug("Received an entry that has more than " STRINGIFY(ENTRY_FIELD_COUNT_MAX) " fields, ignoring entry.");
+ goto finish;
+ }
+
+ /* n existing properties, 1 new, +1 for _TRANSPORT */
+ if (!GREEDY_REALLOC(iovec, m,
+ n + 2 +
+ N_IOVEC_META_FIELDS + N_IOVEC_OBJECT_FIELDS +
+ client_context_extra_fields_n_iovec(context))) {
+ r = log_oom();
+ goto finish;
+ }
+
+ q = memchr(p, '=', e - p);
+ if (q) {
+ if (journal_field_valid(p, q - p, false)) {
+ size_t l;
+
+ l = e - p;
+ if (l > DATA_SIZE_MAX) {
+ log_debug("Received text block of %zu bytes is too large, ignoring entry.", l);
+ goto finish;
+ }
+
+ if (entry_size + l + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
+ log_debug("Entry is too big (%zu bytes after processing %zu entries), ignoring entry.",
+ entry_size + l, n + 1);
+ goto finish;
+ }
+
+ /* If the field name starts with an underscore, skip the variable, since that indicates
+ * a trusted field */
+ iovec[n++] = IOVEC_MAKE((char*) p, l);
+ entry_size += l;
+
+ server_process_entry_meta(p, l, ucred,
+ &priority,
+ &identifier,
+ &message,
+ &object_pid);
+ }
+
+ *remaining -= (e - p) + 1;
+ p = e + 1;
+ continue;
+ } else {
+ uint64_t l, total;
+ char *k;
+
+ if (*remaining < e - p + 1 + sizeof(uint64_t) + 1) {
+ log_debug("Failed to parse message, ignoring.");
+ break;
+ }
+
+ l = unaligned_read_le64(e + 1);
+ if (l > DATA_SIZE_MAX) {
+ log_debug("Received binary data block of %"PRIu64" bytes is too large, ignoring entry.", l);
+ goto finish;
+ }
+
+ total = (e - p) + 1 + l;
+ if (entry_size + total + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
+ log_debug("Entry is too big (%"PRIu64"bytes after processing %zu fields), ignoring.",
+ entry_size + total, n + 1);
+ goto finish;
+ }
+
+ if ((uint64_t) *remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
+ e[1+sizeof(uint64_t)+l] != '\n') {
+ log_debug("Failed to parse message, ignoring.");
+ break;
+ }
+
+ k = malloc(total);
+ if (!k) {
+ log_oom();
+ break;
+ }
+
+ memcpy(k, p, e - p);
+ k[e - p] = '=';
+ memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
+
+ if (journal_field_valid(p, e - p, false)) {
+ iovec[n] = IOVEC_MAKE(k, (e - p) + 1 + l);
+ entry_size += iovec[n].iov_len;
+ n++;
+
+ server_process_entry_meta(k, (e - p) + 1 + l, ucred,
+ &priority,
+ &identifier,
+ &message,
+ &object_pid);
+ } else
+ free(k);
+
+ *remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
+ p = e + 1 + sizeof(uint64_t) + l + 1;
+ }
+ }
+
+ if (n <= 0)
+ goto finish;
+
+ tn = n++;
+ iovec[tn] = IOVEC_MAKE_STRING("_TRANSPORT=journal");
+ entry_size += STRLEN("_TRANSPORT=journal");
+
+ if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
+ log_debug("Entry is too big with %zu properties and %zu bytes, ignoring.", n, entry_size);
+ goto finish;
+ }
+
+ r = 0; /* Success, we read the message. */
+
+ if (!client_context_test_priority(context, priority))
+ goto finish;
+
+ if (message) {
+ if (s->forward_to_syslog)
+ server_forward_syslog(s, syslog_fixup_facility(priority), identifier, message, ucred, tv);
+
+ if (s->forward_to_kmsg)
+ server_forward_kmsg(s, priority, identifier, message, ucred);
+
+ if (s->forward_to_console)
+ server_forward_console(s, priority, identifier, message, ucred);
+
+ if (s->forward_to_wall)
+ server_forward_wall(s, priority, identifier, message, ucred);
+ }
+
+ server_dispatch_message(s, iovec, n, m, context, tv, priority, object_pid);
+
+finish:
+ for (j = 0; j < n; j++) {
+ if (j == tn)
+ continue;
+
+ if (iovec[j].iov_base < buffer ||
+ (const char*) iovec[j].iov_base >= p + *remaining)
+ free(iovec[j].iov_base);
+ }
+
+ free(iovec);
+ free(identifier);
+ free(message);
+
+ return r;
+}
+
+void server_process_native_message(
+ Server *s,
+ const char *buffer, size_t buffer_size,
+ const struct ucred *ucred,
+ const struct timeval *tv,
+ const char *label, size_t label_len) {
+
+ size_t remaining = buffer_size;
+ ClientContext *context = NULL;
+ int r;
+
+ assert(s);
+ assert(buffer || buffer_size == 0);
+
+ if (ucred && pid_is_valid(ucred->pid)) {
+ r = client_context_get(s, ucred->pid, ucred, label, label_len, NULL, &context);
+ if (r < 0)
+ log_warning_errno(r, "Failed to retrieve credentials for PID " PID_FMT ", ignoring: %m", ucred->pid);
+ }
+
+ do {
+ r = server_process_entry(s,
+ (const uint8_t*) buffer + (buffer_size - remaining), &remaining,
+ context, ucred, tv, label, label_len);
+ } while (r == 0);
+}
+
+void server_process_native_file(
+ Server *s,
+ int fd,
+ const struct ucred *ucred,
+ const struct timeval *tv,
+ const char *label, size_t label_len) {
+
+ struct stat st;
+ bool sealed;
+ int r;
+
+ /* Data is in the passed fd, probably it didn't fit in a datagram. */
+
+ assert(s);
+ assert(fd >= 0);
+
+ /* If it's a memfd, check if it is sealed. If so, we can just
+ * mmap it and use it, and do not need to copy the data out. */
+ sealed = memfd_get_sealed(fd) > 0;
+
+ if (!sealed && (!ucred || ucred->uid != 0)) {
+ _cleanup_free_ char *k = NULL;
+ const char *e;
+
+ /* If this is not a sealed memfd, and the peer is unknown or
+ * unprivileged, then verify the path. */
+
+ r = fd_get_path(fd, &k);
+ if (r < 0) {
+ log_error_errno(r, "readlink(/proc/self/fd/%i) failed: %m", fd);
+ return;
+ }
+
+ e = PATH_STARTSWITH_SET(k, "/dev/shm/", "/tmp/", "/var/tmp/");
+ if (!e) {
+ log_error("Received file outside of allowed directories. Refusing.");
+ return;
+ }
+
+ if (!filename_is_valid(e)) {
+ log_error("Received file in subdirectory of allowed directories. Refusing.");
+ return;
+ }
+ }
+
+ if (fstat(fd, &st) < 0) {
+ log_error_errno(errno, "Failed to stat passed file, ignoring: %m");
+ return;
+ }
+
+ if (!S_ISREG(st.st_mode)) {
+ log_error("File passed is not regular. Ignoring.");
+ return;
+ }
+
+ if (st.st_size <= 0)
+ return;
+
+ /* When !sealed, set a lower memory limit. We have to read the file,
+ * effectively doubling memory use. */
+ if (st.st_size > ENTRY_SIZE_MAX / (sealed ? 1 : 2)) {
+ log_error("File passed too large (%"PRIu64" bytes). Ignoring.", (uint64_t) st.st_size);
+ return;
+ }
+
+ if (sealed) {
+ void *p;
+ size_t ps;
+
+ /* The file is sealed, we can just map it and use it. */
+
+ ps = PAGE_ALIGN(st.st_size);
+ p = mmap(NULL, ps, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (p == MAP_FAILED) {
+ log_error_errno(errno, "Failed to map memfd, ignoring: %m");
+ return;
+ }
+
+ server_process_native_message(s, p, st.st_size, ucred, tv, label, label_len);
+ assert_se(munmap(p, ps) >= 0);
+ } else {
+ _cleanup_free_ void *p = NULL;
+ struct statvfs vfs;
+ ssize_t n;
+
+ if (fstatvfs(fd, &vfs) < 0) {
+ log_error_errno(errno, "Failed to stat file system of passed file, not processing it: %m");
+ return;
+ }
+
+ /* Refuse operating on file systems that have
+ * mandatory locking enabled, see:
+ *
+ * https://github.com/systemd/systemd/issues/1822
+ */
+ if (vfs.f_flag & ST_MANDLOCK) {
+ log_error("Received file descriptor from file system with mandatory locking enabled, not processing it.");
+ return;
+ }
+
+ /* Make the fd non-blocking. On regular files this has
+ * the effect of bypassing mandatory locking. Of
+ * course, this should normally not be necessary given
+ * the check above, but let's better be safe than
+ * sorry, after all NFS is pretty confusing regarding
+ * file system flags, and we better don't trust it,
+ * and so is SMB. */
+ r = fd_nonblock(fd, true);
+ if (r < 0) {
+ log_error_errno(r, "Failed to make fd non-blocking, not processing it: %m");
+ return;
+ }
+
+ /* The file is not sealed, we can't map the file here, since
+ * clients might then truncate it and trigger a SIGBUS for
+ * us. So let's stupidly read it. */
+
+ p = malloc(st.st_size);
+ if (!p) {
+ log_oom();
+ return;
+ }
+
+ n = pread(fd, p, st.st_size, 0);
+ if (n < 0)
+ log_error_errno(errno, "Failed to read file, ignoring: %m");
+ else if (n > 0)
+ server_process_native_message(s, p, n, ucred, tv, label, label_len);
+ }
+}
+
+int server_open_native_socket(Server *s, const char *native_socket) {
+ int r;
+
+ assert(s);
+ assert(native_socket);
+
+ if (s->native_fd < 0) {
+ union sockaddr_union sa;
+ size_t sa_len;
+
+ r = sockaddr_un_set_path(&sa.un, native_socket);
+ if (r < 0)
+ return log_error_errno(r, "Unable to use namespace path %s for AF_UNIX socket: %m", native_socket);
+ sa_len = r;
+
+ s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s->native_fd < 0)
+ return log_error_errno(errno, "socket() failed: %m");
+
+ (void) sockaddr_un_unlink(&sa.un);
+
+ r = bind(s->native_fd, &sa.sa, sa_len);
+ if (r < 0)
+ return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
+
+ (void) chmod(sa.un.sun_path, 0666);
+ } else
+ (void) fd_nonblock(s->native_fd, true);
+
+ r = setsockopt_int(s->native_fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return log_error_errno(r, "SO_PASSCRED failed: %m");
+
+ if (mac_selinux_use()) {
+ r = setsockopt_int(s->native_fd, SOL_SOCKET, SO_PASSSEC, true);
+ if (r < 0)
+ log_warning_errno(r, "SO_PASSSEC failed: %m");
+ }
+
+ r = setsockopt_int(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, true);
+ if (r < 0)
+ return log_error_errno(r, "SO_TIMESTAMP failed: %m");
+
+ r = sd_event_add_io(s->event, &s->native_event_source, s->native_fd, EPOLLIN, server_process_datagram, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add native server fd to event loop: %m");
+
+ r = sd_event_source_set_priority(s->native_event_source, SD_EVENT_PRIORITY_NORMAL+5);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust native event source priority: %m");
+
+ return 0;
+}
diff --git a/src/journal/journald-native.h b/src/journal/journald-native.h
new file mode 100644
index 0000000..7bbaaed
--- /dev/null
+++ b/src/journal/journald-native.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "journald-server.h"
+
+void server_process_native_message(
+ Server *s,
+ const char *buffer,
+ size_t buffer_size,
+ const struct ucred *ucred,
+ const struct timeval *tv,
+ const char *label,
+ size_t label_len);
+
+void server_process_native_file(
+ Server *s,
+ int fd,
+ const struct ucred *ucred,
+ const struct timeval *tv,
+ const char *label,
+ size_t label_len);
+
+int server_open_native_socket(Server *s, const char *native_socket);
diff --git a/src/journal/journald-rate-limit.c b/src/journal/journald-rate-limit.c
new file mode 100644
index 0000000..f464b6e
--- /dev/null
+++ b/src/journal/journald-rate-limit.c
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "hashmap.h"
+#include "journald-rate-limit.h"
+#include "list.h"
+#include "random-util.h"
+#include "string-util.h"
+#include "time-util.h"
+
+#define POOLS_MAX 5
+#define BUCKETS_MAX 127
+#define GROUPS_MAX 2047
+
+static const int priority_map[] = {
+ [LOG_EMERG] = 0,
+ [LOG_ALERT] = 0,
+ [LOG_CRIT] = 0,
+ [LOG_ERR] = 1,
+ [LOG_WARNING] = 2,
+ [LOG_NOTICE] = 3,
+ [LOG_INFO] = 3,
+ [LOG_DEBUG] = 4
+};
+
+typedef struct JournalRateLimitPool JournalRateLimitPool;
+typedef struct JournalRateLimitGroup JournalRateLimitGroup;
+
+struct JournalRateLimitPool {
+ usec_t begin;
+ unsigned num;
+ unsigned suppressed;
+};
+
+struct JournalRateLimitGroup {
+ JournalRateLimit *parent;
+
+ char *id;
+
+ /* Interval is stored to keep track of when the group expires */
+ usec_t interval;
+
+ JournalRateLimitPool pools[POOLS_MAX];
+ uint64_t hash;
+
+ LIST_FIELDS(JournalRateLimitGroup, bucket);
+ LIST_FIELDS(JournalRateLimitGroup, lru);
+};
+
+struct JournalRateLimit {
+
+ JournalRateLimitGroup* buckets[BUCKETS_MAX];
+ JournalRateLimitGroup *lru, *lru_tail;
+
+ unsigned n_groups;
+
+ uint8_t hash_key[16];
+};
+
+JournalRateLimit *journal_ratelimit_new(void) {
+ JournalRateLimit *r;
+
+ r = new0(JournalRateLimit, 1);
+ if (!r)
+ return NULL;
+
+ random_bytes(r->hash_key, sizeof(r->hash_key));
+
+ return r;
+}
+
+static void journal_ratelimit_group_free(JournalRateLimitGroup *g) {
+ assert(g);
+
+ if (g->parent) {
+ assert(g->parent->n_groups > 0);
+
+ if (g->parent->lru_tail == g)
+ g->parent->lru_tail = g->lru_prev;
+
+ LIST_REMOVE(lru, g->parent->lru, g);
+ LIST_REMOVE(bucket, g->parent->buckets[g->hash % BUCKETS_MAX], g);
+
+ g->parent->n_groups--;
+ }
+
+ free(g->id);
+ free(g);
+}
+
+void journal_ratelimit_free(JournalRateLimit *r) {
+ assert(r);
+
+ while (r->lru)
+ journal_ratelimit_group_free(r->lru);
+
+ free(r);
+}
+
+static bool journal_ratelimit_group_expired(JournalRateLimitGroup *g, usec_t ts) {
+ unsigned i;
+
+ assert(g);
+
+ for (i = 0; i < POOLS_MAX; i++)
+ if (g->pools[i].begin + g->interval >= ts)
+ return false;
+
+ return true;
+}
+
+static void journal_ratelimit_vacuum(JournalRateLimit *r, usec_t ts) {
+ assert(r);
+
+ /* Makes room for at least one new item, but drop all
+ * expored items too. */
+
+ while (r->n_groups >= GROUPS_MAX ||
+ (r->lru_tail && journal_ratelimit_group_expired(r->lru_tail, ts)))
+ journal_ratelimit_group_free(r->lru_tail);
+}
+
+static JournalRateLimitGroup* journal_ratelimit_group_new(JournalRateLimit *r, const char *id, usec_t interval, usec_t ts) {
+ JournalRateLimitGroup *g;
+
+ assert(r);
+ assert(id);
+
+ g = new0(JournalRateLimitGroup, 1);
+ if (!g)
+ return NULL;
+
+ g->id = strdup(id);
+ if (!g->id)
+ goto fail;
+
+ g->hash = siphash24_string(g->id, r->hash_key);
+
+ g->interval = interval;
+
+ journal_ratelimit_vacuum(r, ts);
+
+ LIST_PREPEND(bucket, r->buckets[g->hash % BUCKETS_MAX], g);
+ LIST_PREPEND(lru, r->lru, g);
+ if (!g->lru_next)
+ r->lru_tail = g;
+ r->n_groups++;
+
+ g->parent = r;
+ return g;
+
+fail:
+ journal_ratelimit_group_free(g);
+ return NULL;
+}
+
+static unsigned burst_modulate(unsigned burst, uint64_t available) {
+ unsigned k;
+
+ /* Modulates the burst rate a bit with the amount of available
+ * disk space */
+
+ k = u64log2(available);
+
+ /* 1MB */
+ if (k <= 20)
+ return burst;
+
+ burst = (burst * (k-16)) / 4;
+
+ /*
+ * Example:
+ *
+ * <= 1MB = rate * 1
+ * 16MB = rate * 2
+ * 256MB = rate * 3
+ * 4GB = rate * 4
+ * 64GB = rate * 5
+ * 1TB = rate * 6
+ */
+
+ return burst;
+}
+
+int journal_ratelimit_test(JournalRateLimit *r, const char *id, usec_t rl_interval, unsigned rl_burst, int priority, uint64_t available) {
+ uint64_t h;
+ JournalRateLimitGroup *g;
+ JournalRateLimitPool *p;
+ unsigned burst;
+ usec_t ts;
+
+ assert(id);
+
+ /* Returns:
+ *
+ * 0 → the log message shall be suppressed,
+ * 1 + n → the log message shall be permitted, and n messages were dropped from the peer before
+ * < 0 → error
+ */
+
+ if (!r)
+ return 1;
+
+ ts = now(CLOCK_MONOTONIC);
+
+ h = siphash24_string(id, r->hash_key);
+ g = r->buckets[h % BUCKETS_MAX];
+
+ LIST_FOREACH(bucket, g, g)
+ if (streq(g->id, id))
+ break;
+
+ if (!g) {
+ g = journal_ratelimit_group_new(r, id, rl_interval, ts);
+ if (!g)
+ return -ENOMEM;
+ } else
+ g->interval = rl_interval;
+
+ if (rl_interval == 0 || rl_burst == 0)
+ return 1;
+
+ burst = burst_modulate(rl_burst, available);
+
+ p = &g->pools[priority_map[priority]];
+
+ if (p->begin <= 0) {
+ p->suppressed = 0;
+ p->num = 1;
+ p->begin = ts;
+ return 1;
+ }
+
+ if (p->begin + rl_interval < ts) {
+ unsigned s;
+
+ s = p->suppressed;
+ p->suppressed = 0;
+ p->num = 1;
+ p->begin = ts;
+
+ return 1 + s;
+ }
+
+ if (p->num < burst) {
+ p->num++;
+ return 1;
+ }
+
+ p->suppressed++;
+ return 0;
+}
diff --git a/src/journal/journald-rate-limit.h b/src/journal/journald-rate-limit.h
new file mode 100644
index 0000000..8def60f
--- /dev/null
+++ b/src/journal/journald-rate-limit.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "time-util.h"
+
+typedef struct JournalRateLimit JournalRateLimit;
+
+JournalRateLimit *journal_ratelimit_new(void);
+void journal_ratelimit_free(JournalRateLimit *r);
+int journal_ratelimit_test(JournalRateLimit *r, const char *id, usec_t rl_interval, unsigned rl_burst, int priority, uint64_t available);
diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c
new file mode 100644
index 0000000..10ebc3e
--- /dev/null
+++ b/src/journal/journald-server.c
@@ -0,0 +1,2619 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_SELINUX
+#include <selinux/selinux.h>
+#endif
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/signalfd.h>
+#include <sys/statvfs.h>
+#include <linux/sockios.h>
+
+#include "sd-daemon.h"
+#include "sd-journal.h"
+#include "sd-messages.h"
+
+#include "acl-util.h"
+#include "alloc-util.h"
+#include "audit-util.h"
+#include "cgroup-util.h"
+#include "conf-parser.h"
+#include "dirent-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "io-util.h"
+#include "journal-authenticate.h"
+#include "journal-file.h"
+#include "journal-internal.h"
+#include "journal-vacuum.h"
+#include "journald-audit.h"
+#include "journald-context.h"
+#include "journald-kmsg.h"
+#include "journald-native.h"
+#include "journald-rate-limit.h"
+#include "journald-server.h"
+#include "journald-stream.h"
+#include "journald-syslog.h"
+#include "log.h"
+#include "missing_audit.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "rm-rf.h"
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "syslog-util.h"
+#include "user-record.h"
+#include "user-util.h"
+
+#define USER_JOURNALS_MAX 1024
+
+#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
+#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
+#define DEFAULT_RATE_LIMIT_BURST 10000
+#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
+
+#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
+
+#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
+
+/* The period to insert between posting changes for coalescing */
+#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
+
+/* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
+ * for a bit of additional metadata. */
+#define DEFAULT_LINE_MAX (48*1024)
+
+#define DEFERRED_CLOSES_MAX (4096)
+
+#define IDLE_TIMEOUT_USEC (30*USEC_PER_SEC)
+
+static int determine_path_usage(
+ Server *s,
+ const char *path,
+ uint64_t *ret_used,
+ uint64_t *ret_free) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ struct statvfs ss;
+
+ assert(s);
+ assert(path);
+ assert(ret_used);
+ assert(ret_free);
+
+ d = opendir(path);
+ if (!d)
+ return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
+ errno, "Failed to open %s: %m", path);
+
+ if (fstatvfs(dirfd(d), &ss) < 0)
+ return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
+
+ *ret_free = ss.f_bsize * ss.f_bavail;
+ *ret_used = 0;
+ FOREACH_DIRENT_ALL(de, d, break) {
+ struct stat st;
+
+ if (!endswith(de->d_name, ".journal") &&
+ !endswith(de->d_name, ".journal~"))
+ continue;
+
+ if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+ log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
+ continue;
+ }
+
+ if (!S_ISREG(st.st_mode))
+ continue;
+
+ *ret_used += (uint64_t) st.st_blocks * 512UL;
+ }
+
+ return 0;
+}
+
+static void cache_space_invalidate(JournalStorageSpace *space) {
+ zero(*space);
+}
+
+static int cache_space_refresh(Server *s, JournalStorage *storage) {
+ JournalStorageSpace *space;
+ JournalMetrics *metrics;
+ uint64_t vfs_used, vfs_avail, avail;
+ usec_t ts;
+ int r;
+
+ assert(s);
+
+ metrics = &storage->metrics;
+ space = &storage->space;
+
+ ts = now(CLOCK_MONOTONIC);
+
+ if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
+ return 0;
+
+ r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
+ if (r < 0)
+ return r;
+
+ space->vfs_used = vfs_used;
+ space->vfs_available = vfs_avail;
+
+ avail = LESS_BY(vfs_avail, metrics->keep_free);
+
+ space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
+ space->available = LESS_BY(space->limit, vfs_used);
+ space->timestamp = ts;
+ return 1;
+}
+
+static void patch_min_use(JournalStorage *storage) {
+ assert(storage);
+
+ /* Let's bump the min_use limit to the current usage on disk. We do
+ * this when starting up and first opening the journal files. This way
+ * sudden spikes in disk usage will not cause journald to vacuum files
+ * without bounds. Note that this means that only a restart of journald
+ * will make it reset this value. */
+
+ storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
+}
+
+static JournalStorage* server_current_storage(Server *s) {
+ assert(s);
+
+ return s->system_journal ? &s->system_storage : &s->runtime_storage;
+}
+
+static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
+ JournalStorage *js;
+ int r;
+
+ assert(s);
+
+ js = server_current_storage(s);
+
+ r = cache_space_refresh(s, js);
+ if (r >= 0) {
+ if (available)
+ *available = js->space.available;
+ if (limit)
+ *limit = js->space.limit;
+ }
+ return r;
+}
+
+void server_space_usage_message(Server *s, JournalStorage *storage) {
+ char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
+ fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
+ JournalMetrics *metrics;
+
+ assert(s);
+
+ if (!storage)
+ storage = server_current_storage(s);
+
+ if (cache_space_refresh(s, storage) < 0)
+ return;
+
+ metrics = &storage->metrics;
+ format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
+ format_bytes(fb2, sizeof(fb2), metrics->max_use);
+ format_bytes(fb3, sizeof(fb3), metrics->keep_free);
+ format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
+ format_bytes(fb5, sizeof(fb5), storage->space.limit);
+ format_bytes(fb6, sizeof(fb6), storage->space.available);
+
+ server_driver_message(s, 0,
+ "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
+ LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
+ storage->name, storage->path, fb1, fb5, fb6),
+ "JOURNAL_NAME=%s", storage->name,
+ "JOURNAL_PATH=%s", storage->path,
+ "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
+ "CURRENT_USE_PRETTY=%s", fb1,
+ "MAX_USE=%"PRIu64, metrics->max_use,
+ "MAX_USE_PRETTY=%s", fb2,
+ "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
+ "DISK_KEEP_FREE_PRETTY=%s", fb3,
+ "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
+ "DISK_AVAILABLE_PRETTY=%s", fb4,
+ "LIMIT=%"PRIu64, storage->space.limit,
+ "LIMIT_PRETTY=%s", fb5,
+ "AVAILABLE=%"PRIu64, storage->space.available,
+ "AVAILABLE_PRETTY=%s", fb6,
+ NULL);
+}
+
+static bool uid_for_system_journal(uid_t uid) {
+
+ /* Returns true if the specified UID shall get its data stored in the system journal*/
+
+ return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY;
+}
+
+static void server_add_acls(JournalFile *f, uid_t uid) {
+ assert(f);
+
+#if HAVE_ACL
+ int r;
+
+ if (uid_for_system_journal(uid))
+ return;
+
+ r = fd_add_uid_acl_permission(f->fd, uid, ACL_READ);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
+#endif
+}
+
+static int open_journal(
+ Server *s,
+ bool reliably,
+ const char *fname,
+ int flags,
+ bool seal,
+ JournalMetrics *metrics,
+ JournalFile **ret) {
+
+ _cleanup_(journal_file_closep) JournalFile *f = NULL;
+ int r;
+
+ assert(s);
+ assert(fname);
+ assert(ret);
+
+ if (reliably)
+ r = journal_file_open_reliably(fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes,
+ seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
+ else
+ r = journal_file_open(-1, fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes, seal,
+ metrics, s->mmap, s->deferred_closes, NULL, &f);
+
+ if (r < 0)
+ return r;
+
+ r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(f);
+ return r;
+}
+
+static bool flushed_flag_is_set(Server *s) {
+ const char *fn;
+
+ assert(s);
+
+ /* We don't support the "flushing" concept for namespace instances, we assume them to always have
+ * access to /var */
+ if (s->namespace)
+ return true;
+
+ fn = strjoina(s->runtime_directory, "/flushed");
+ return access(fn, F_OK) >= 0;
+}
+
+static int system_journal_open(Server *s, bool flush_requested, bool relinquish_requested) {
+ const char *fn;
+ int r = 0;
+
+ if (!s->system_journal &&
+ IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
+ (flush_requested || flushed_flag_is_set(s)) &&
+ !relinquish_requested) {
+
+ /* If in auto mode: first try to create the machine path, but not the prefix.
+ *
+ * If in persistent mode: create /var/log/journal and the machine path */
+
+ if (s->storage == STORAGE_PERSISTENT)
+ (void) mkdir_parents(s->system_storage.path, 0755);
+
+ (void) mkdir(s->system_storage.path, 0755);
+
+ fn = strjoina(s->system_storage.path, "/system.journal");
+ r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
+ if (r >= 0) {
+ server_add_acls(s->system_journal, 0);
+ (void) cache_space_refresh(s, &s->system_storage);
+ patch_min_use(&s->system_storage);
+ } else {
+ if (!IN_SET(r, -ENOENT, -EROFS))
+ log_warning_errno(r, "Failed to open system journal: %m");
+
+ r = 0;
+ }
+
+ /* If the runtime journal is open, and we're post-flush, we're recovering from a failed
+ * system journal rotate (ENOSPC) for which the runtime journal was reopened.
+ *
+ * Perform an implicit flush to var, leaving the runtime journal closed, now that the system
+ * journal is back.
+ */
+ if (!flush_requested)
+ (void) server_flush_to_var(s, true);
+ }
+
+ if (!s->runtime_journal &&
+ (s->storage != STORAGE_NONE)) {
+
+ fn = strjoina(s->runtime_storage.path, "/system.journal");
+
+ if (s->system_journal && !relinquish_requested) {
+
+ /* Try to open the runtime journal, but only
+ * if it already exists, so that we can flush
+ * it into the system journal */
+
+ r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
+ if (r < 0) {
+ if (r != -ENOENT)
+ log_warning_errno(r, "Failed to open runtime journal: %m");
+
+ r = 0;
+ }
+
+ } else {
+
+ /* OK, we really need the runtime journal, so create it if necessary. */
+
+ (void) mkdir_parents(s->runtime_storage.path, 0755);
+ (void) mkdir(s->runtime_storage.path, 0750);
+
+ r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open runtime journal: %m");
+ }
+
+ if (s->runtime_journal) {
+ server_add_acls(s->runtime_journal, 0);
+ (void) cache_space_refresh(s, &s->runtime_storage);
+ patch_min_use(&s->runtime_storage);
+ }
+ }
+
+ return r;
+}
+
+static JournalFile* find_journal(Server *s, uid_t uid) {
+ _cleanup_free_ char *p = NULL;
+ JournalFile *f;
+ int r;
+
+ assert(s);
+
+ /* A rotate that fails to create the new journal (ENOSPC) leaves the rotated journal as NULL. Unless
+ * we revisit opening, even after space is made available we'll continue to return NULL indefinitely.
+ *
+ * system_journal_open() is a noop if the journals are already open, so we can just call it here to
+ * recover from failed rotates (or anything else that's left the journals as NULL).
+ *
+ * Fixes https://github.com/systemd/systemd/issues/3968 */
+ (void) system_journal_open(s, false, false);
+
+ /* We split up user logs only on /var, not on /run. If the runtime file is open, we write to it
+ * exclusively, in order to guarantee proper order as soon as we flush /run to /var and close the
+ * runtime file. */
+
+ if (s->runtime_journal)
+ return s->runtime_journal;
+
+ if (uid_for_system_journal(uid))
+ return s->system_journal;
+
+ f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
+ if (f)
+ return f;
+
+ if (asprintf(&p, "%s/user-" UID_FMT ".journal", s->system_storage.path, uid) < 0) {
+ log_oom();
+ return s->system_journal;
+ }
+
+ /* Too many open? Then let's close one (or more) */
+ while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
+ assert_se(f = ordered_hashmap_steal_first(s->user_journals));
+ (void) journal_file_close(f);
+ }
+
+ r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
+ if (r < 0)
+ return s->system_journal;
+
+ r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
+ if (r < 0) {
+ (void) journal_file_close(f);
+ return s->system_journal;
+ }
+
+ server_add_acls(f, uid);
+ return f;
+}
+
+static int do_rotate(
+ Server *s,
+ JournalFile **f,
+ const char* name,
+ bool seal,
+ uint32_t uid) {
+
+ int r;
+ assert(s);
+
+ if (!*f)
+ return -EINVAL;
+
+ r = journal_file_rotate(f, s->compress.enabled, s->compress.threshold_bytes, seal, s->deferred_closes);
+ if (r < 0) {
+ if (*f)
+ return log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
+ else
+ return log_error_errno(r, "Failed to create new %s journal: %m", name);
+ }
+
+ server_add_acls(*f, uid);
+ return r;
+}
+
+static void server_process_deferred_closes(Server *s) {
+ JournalFile *f;
+
+ /* Perform any deferred closes which aren't still offlining. */
+ SET_FOREACH(f, s->deferred_closes) {
+ if (journal_file_is_offlining(f))
+ continue;
+
+ (void) set_remove(s->deferred_closes, f);
+ (void) journal_file_close(f);
+ }
+}
+
+static void server_vacuum_deferred_closes(Server *s) {
+ assert(s);
+
+ /* Make some room in the deferred closes list, so that it doesn't grow without bounds */
+ if (set_size(s->deferred_closes) < DEFERRED_CLOSES_MAX)
+ return;
+
+ /* Let's first remove all journal files that might already have completed closing */
+ server_process_deferred_closes(s);
+
+ /* And now, let's close some more until we reach the limit again. */
+ while (set_size(s->deferred_closes) >= DEFERRED_CLOSES_MAX) {
+ JournalFile *f;
+
+ assert_se(f = set_steal_first(s->deferred_closes));
+ journal_file_close(f);
+ }
+}
+
+static int vacuum_offline_user_journals(Server *s) {
+ _cleanup_closedir_ DIR *d = NULL;
+ int r;
+
+ assert(s);
+
+ d = opendir(s->system_storage.path);
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open %s: %m", s->system_storage.path);
+ }
+
+ for (;;) {
+ _cleanup_free_ char *u = NULL, *full = NULL;
+ _cleanup_close_ int fd = -1;
+ const char *a, *b;
+ struct dirent *de;
+ JournalFile *f;
+ uid_t uid;
+
+ errno = 0;
+ de = readdir_no_dot(d);
+ if (!de) {
+ if (errno != 0)
+ log_warning_errno(errno, "Failed to enumerate %s, ignoring: %m", s->system_storage.path);
+
+ break;
+ }
+
+ a = startswith(de->d_name, "user-");
+ if (!a)
+ continue;
+ b = endswith(de->d_name, ".journal");
+ if (!b)
+ continue;
+
+ u = strndup(a, b-a);
+ if (!u)
+ return log_oom();
+
+ r = parse_uid(u, &uid);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse UID from file name '%s', ignoring: %m", de->d_name);
+ continue;
+ }
+
+ /* Already rotated in the above loop? i.e. is it an open user journal? */
+ if (ordered_hashmap_contains(s->user_journals, UID_TO_PTR(uid)))
+ continue;
+
+ full = path_join(s->system_storage.path, de->d_name);
+ if (!full)
+ return log_oom();
+
+ fd = openat(dirfd(d), de->d_name, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK);
+ if (fd < 0) {
+ log_full_errno(IN_SET(errno, ELOOP, ENOENT) ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to open journal file '%s' for rotation: %m", full);
+ continue;
+ }
+
+ /* Make some room in the set of deferred close()s */
+ server_vacuum_deferred_closes(s);
+
+ /* Open the file briefly, so that we can archive it */
+ r = journal_file_open(fd,
+ full,
+ O_RDWR,
+ 0640,
+ s->compress.enabled,
+ s->compress.threshold_bytes,
+ s->seal,
+ &s->system_storage.metrics,
+ s->mmap,
+ s->deferred_closes,
+ NULL,
+ &f);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to read journal file %s for rotation, trying to move it out of the way: %m", full);
+
+ r = journal_file_dispose(dirfd(d), de->d_name);
+ if (r < 0)
+ log_warning_errno(r, "Failed to move %s out of the way, ignoring: %m", full);
+ else
+ log_debug("Successfully moved %s out of the way.", full);
+
+ continue;
+ }
+
+ TAKE_FD(fd); /* Donated to journal_file_open() */
+
+ r = journal_file_archive(f);
+ if (r < 0)
+ log_debug_errno(r, "Failed to archive journal file '%s', ignoring: %m", full);
+
+ f = journal_initiate_close(f, s->deferred_closes);
+ }
+
+ return 0;
+}
+
+void server_rotate(Server *s) {
+ JournalFile *f;
+ void *k;
+ int r;
+
+ log_debug("Rotating...");
+
+ /* First, rotate the system journal (either in its runtime flavour or in its runtime flavour) */
+ (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
+ (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
+
+ /* Then, rotate all user journals we have open (keeping them open) */
+ ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals) {
+ r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
+ if (r >= 0)
+ ordered_hashmap_replace(s->user_journals, k, f);
+ else if (!f)
+ /* Old file has been closed and deallocated */
+ ordered_hashmap_remove(s->user_journals, k);
+ }
+
+ /* Finally, also rotate all user journals we currently do not have open. (But do so only if we
+ * actually have access to /var, i.e. are not in the log-to-runtime-journal mode). */
+ if (!s->runtime_journal)
+ (void) vacuum_offline_user_journals(s);
+
+ server_process_deferred_closes(s);
+}
+
+void server_sync(Server *s) {
+ JournalFile *f;
+ int r;
+
+ if (s->system_journal) {
+ r = journal_file_set_offline(s->system_journal, false);
+ if (r < 0)
+ log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
+ }
+
+ ORDERED_HASHMAP_FOREACH(f, s->user_journals) {
+ r = journal_file_set_offline(f, false);
+ if (r < 0)
+ log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
+ }
+
+ if (s->sync_event_source) {
+ r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
+ if (r < 0)
+ log_error_errno(r, "Failed to disable sync timer source: %m");
+ }
+
+ s->sync_scheduled = false;
+}
+
+static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
+
+ int r;
+
+ assert(s);
+ assert(storage);
+
+ (void) cache_space_refresh(s, storage);
+
+ if (verbose)
+ server_space_usage_message(s, storage);
+
+ r = journal_directory_vacuum(storage->path, storage->space.limit,
+ storage->metrics.n_max_files, s->max_retention_usec,
+ &s->oldest_file_usec, verbose);
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
+
+ cache_space_invalidate(&storage->space);
+}
+
+int server_vacuum(Server *s, bool verbose) {
+ assert(s);
+
+ log_debug("Vacuuming...");
+
+ s->oldest_file_usec = 0;
+
+ if (s->system_journal)
+ do_vacuum(s, &s->system_storage, verbose);
+ if (s->runtime_journal)
+ do_vacuum(s, &s->runtime_storage, verbose);
+
+ return 0;
+}
+
+static void server_cache_machine_id(Server *s) {
+ sd_id128_t id;
+ int r;
+
+ assert(s);
+
+ r = sd_id128_get_machine(&id);
+ if (r < 0)
+ return;
+
+ sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
+}
+
+static void server_cache_boot_id(Server *s) {
+ sd_id128_t id;
+ int r;
+
+ assert(s);
+
+ r = sd_id128_get_boot(&id);
+ if (r < 0)
+ return;
+
+ sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
+}
+
+static void server_cache_hostname(Server *s) {
+ _cleanup_free_ char *t = NULL;
+ char *x;
+
+ assert(s);
+
+ t = gethostname_malloc();
+ if (!t)
+ return;
+
+ x = strjoin("_HOSTNAME=", t);
+ if (!x)
+ return;
+
+ free_and_replace(s->hostname_field, x);
+}
+
+static bool shall_try_append_again(JournalFile *f, int r) {
+ switch(r) {
+
+ case -E2BIG: /* Hit configured limit */
+ case -EFBIG: /* Hit fs limit */
+ case -EDQUOT: /* Quota limit hit */
+ case -ENOSPC: /* Disk full */
+ log_debug("%s: Allocation limit reached, rotating.", f->path);
+ return true;
+
+ case -EIO: /* I/O error of some kind (mmap) */
+ log_warning("%s: IO error, rotating.", f->path);
+ return true;
+
+ case -EHOSTDOWN: /* Other machine */
+ log_info("%s: Journal file from other machine, rotating.", f->path);
+ return true;
+
+ case -EBUSY: /* Unclean shutdown */
+ log_info("%s: Unclean shutdown, rotating.", f->path);
+ return true;
+
+ case -EPROTONOSUPPORT: /* Unsupported feature */
+ log_info("%s: Unsupported feature, rotating.", f->path);
+ return true;
+
+ case -EBADMSG: /* Corrupted */
+ case -ENODATA: /* Truncated */
+ case -ESHUTDOWN: /* Already archived */
+ log_warning("%s: Journal file corrupted, rotating.", f->path);
+ return true;
+
+ case -EIDRM: /* Journal file has been deleted */
+ log_warning("%s: Journal file has been deleted, rotating.", f->path);
+ return true;
+
+ case -ETXTBSY: /* Journal file is from the future */
+ log_warning("%s: Journal file is from the future, rotating.", f->path);
+ return true;
+
+ case -EAFNOSUPPORT:
+ log_warning("%s: underlying file system does not support memory mapping or another required file system feature.", f->path);
+ return false;
+
+ default:
+ return false;
+ }
+}
+
+static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n, int priority) {
+ bool vacuumed = false, rotate = false;
+ struct dual_timestamp ts;
+ JournalFile *f;
+ int r;
+
+ assert(s);
+ assert(iovec);
+ assert(n > 0);
+
+ /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
+ * the source time, and not even the time the event was originally seen, but instead simply the time we started
+ * processing it, as we want strictly linear ordering in what we write out.) */
+ assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
+ assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
+
+ if (ts.realtime < s->last_realtime_clock) {
+ /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
+ * regular operation. However, when it does happen, then we should make sure that we start fresh files
+ * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
+ * bisection works correctly. */
+
+ log_debug("Time jumped backwards, rotating.");
+ rotate = true;
+ } else {
+
+ f = find_journal(s, uid);
+ if (!f)
+ return;
+
+ if (journal_file_rotate_suggested(f, s->max_file_usec)) {
+ log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
+ rotate = true;
+ }
+ }
+
+ if (rotate) {
+ server_rotate(s);
+ server_vacuum(s, false);
+ vacuumed = true;
+
+ f = find_journal(s, uid);
+ if (!f)
+ return;
+ }
+
+ s->last_realtime_clock = ts.realtime;
+
+ r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
+ if (r >= 0) {
+ server_schedule_sync(s, priority);
+ return;
+ }
+
+ if (vacuumed || !shall_try_append_again(f, r)) {
+ log_error_errno(r, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
+ return;
+ }
+
+ server_rotate(s);
+ server_vacuum(s, false);
+
+ f = find_journal(s, uid);
+ if (!f)
+ return;
+
+ log_debug("Retrying write.");
+ r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
+ if (r < 0)
+ log_error_errno(r, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
+ else
+ server_schedule_sync(s, priority);
+}
+
+#define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
+ if (isset(value)) { \
+ char *k; \
+ k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
+ sprintf(k, field "=" format, value); \
+ iovec[n++] = IOVEC_MAKE_STRING(k); \
+ }
+
+#define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
+ if (!isempty(value)) { \
+ char *k; \
+ k = strjoina(field "=", value); \
+ iovec[n++] = IOVEC_MAKE_STRING(k); \
+ }
+
+#define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
+ if (!sd_id128_is_null(value)) { \
+ char *k; \
+ k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
+ sd_id128_to_string(value, stpcpy(k, field "=")); \
+ iovec[n++] = IOVEC_MAKE_STRING(k); \
+ }
+
+#define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
+ if (value_size > 0) { \
+ char *k; \
+ k = newa(char, STRLEN(field "=") + value_size + 1); \
+ *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
+ iovec[n++] = IOVEC_MAKE_STRING(k); \
+ } \
+
+static void dispatch_message_real(
+ Server *s,
+ struct iovec *iovec, size_t n, size_t m,
+ const ClientContext *c,
+ const struct timeval *tv,
+ int priority,
+ pid_t object_pid) {
+
+ char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
+ _cleanup_free_ char *cmdline1 = NULL, *cmdline2 = NULL;
+ uid_t journal_uid;
+ ClientContext *o;
+
+ assert(s);
+ assert(iovec);
+ assert(n > 0);
+ assert(n +
+ N_IOVEC_META_FIELDS +
+ (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) +
+ client_context_extra_fields_n_iovec(c) <= m);
+
+ if (c) {
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
+
+ IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM"); /* At most TASK_COMM_LENGTH (16 bytes) */
+ IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE"); /* A path, so at most PATH_MAX (4096 bytes) */
+
+ if (c->cmdline)
+ /* At most _SC_ARG_MAX (2MB usually), which is too much to put on stack.
+ * Let's use a heap allocation for this one. */
+ cmdline1 = set_iovec_string_field(iovec, &n, "_CMDLINE=", c->cmdline);
+
+ IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE"); /* Read from /proc/.../status */
+ IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
+
+ IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP"); /* A path */
+ IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
+ IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT"); /* Unit names are bounded by UNIT_NAME_MAX */
+ IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
+ IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
+ IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
+
+ IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
+
+ if (c->extra_fields_n_iovec > 0) {
+ memcpy(iovec + n, c->extra_fields_iovec, c->extra_fields_n_iovec * sizeof(struct iovec));
+ n += c->extra_fields_n_iovec;
+ }
+ }
+
+ assert(n <= m);
+
+ if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
+
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
+
+ /* See above for size limits, only ->cmdline may be large, so use a heap allocation for it. */
+ IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
+ IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
+ if (o->cmdline)
+ cmdline2 = set_iovec_string_field(iovec, &n, "OBJECT_CMDLINE=", o->cmdline);
+
+ IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
+ IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
+
+ IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
+ IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
+ IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
+ IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
+ IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
+ IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
+ IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
+
+ IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
+ }
+
+ assert(n <= m);
+
+ if (tv) {
+ sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
+ iovec[n++] = IOVEC_MAKE_STRING(source_time);
+ }
+
+ /* Note that strictly speaking storing the boot id here is
+ * redundant since the entry includes this in-line
+ * anyway. However, we need this indexed, too. */
+ if (!isempty(s->boot_id_field))
+ iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
+
+ if (!isempty(s->machine_id_field))
+ iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
+
+ if (!isempty(s->hostname_field))
+ iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
+
+ if (!isempty(s->namespace_field))
+ iovec[n++] = IOVEC_MAKE_STRING(s->namespace_field);
+
+ assert(n <= m);
+
+ if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
+ /* Split up strictly by (non-root) UID */
+ journal_uid = c->uid;
+ else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
+ /* Split up by login UIDs. We do this only if the
+ * realuid is not root, in order not to accidentally
+ * leak privileged information to the user that is
+ * logged by a privileged process that is part of an
+ * unprivileged session. */
+ journal_uid = c->owner_uid;
+ else
+ journal_uid = 0;
+
+ write_to_journal(s, journal_uid, iovec, n, priority);
+}
+
+void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) {
+
+ struct iovec *iovec;
+ size_t n = 0, k, m;
+ va_list ap;
+ int r;
+
+ assert(s);
+ assert(format);
+
+ m = N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS + client_context_extra_fields_n_iovec(s->my_context) + N_IOVEC_OBJECT_FIELDS;
+ iovec = newa(struct iovec, m);
+
+ assert_cc(3 == LOG_FAC(LOG_DAEMON));
+ iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
+ iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
+
+ iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
+ assert_cc(6 == LOG_INFO);
+ iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
+
+ if (message_id)
+ iovec[n++] = IOVEC_MAKE_STRING(message_id);
+ k = n;
+
+ va_start(ap, format);
+ r = log_format_iovec(iovec, m, &n, false, 0, format, ap);
+ /* Error handling below */
+ va_end(ap);
+
+ if (r >= 0)
+ dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
+
+ while (k < n)
+ free(iovec[k++].iov_base);
+
+ if (r < 0) {
+ /* We failed to format the message. Emit a warning instead. */
+ char buf[LINE_MAX];
+
+ xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror_safe(r));
+
+ n = 3;
+ iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
+ iovec[n++] = IOVEC_MAKE_STRING(buf);
+ dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
+ }
+}
+
+void server_dispatch_message(
+ Server *s,
+ struct iovec *iovec, size_t n, size_t m,
+ ClientContext *c,
+ const struct timeval *tv,
+ int priority,
+ pid_t object_pid) {
+
+ uint64_t available = 0;
+ int rl;
+
+ assert(s);
+ assert(iovec || n == 0);
+
+ if (n == 0)
+ return;
+
+ if (LOG_PRI(priority) > s->max_level_store)
+ return;
+
+ /* Stop early in case the information will not be stored
+ * in a journal. */
+ if (s->storage == STORAGE_NONE)
+ return;
+
+ if (c && c->unit) {
+ (void) determine_space(s, &available, NULL);
+
+ rl = journal_ratelimit_test(s->ratelimit, c->unit, c->log_ratelimit_interval, c->log_ratelimit_burst, priority & LOG_PRIMASK, available);
+ if (rl == 0)
+ return;
+
+ /* Write a suppression message if we suppressed something */
+ if (rl > 1)
+ server_driver_message(s, c->pid,
+ "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
+ LOG_MESSAGE("Suppressed %i messages from %s", rl - 1, c->unit),
+ "N_DROPPED=%i", rl - 1,
+ NULL);
+ }
+
+ dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
+}
+
+int server_flush_to_var(Server *s, bool require_flag_file) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ sd_journal *j = NULL;
+ const char *fn;
+ unsigned n = 0;
+ usec_t start;
+ int r, k;
+
+ assert(s);
+
+ if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
+ return 0;
+
+ if (s->namespace) /* Flushing concept does not exist for namespace instances */
+ return 0;
+
+ if (!s->runtime_journal) /* Nothing to flush? */
+ return 0;
+
+ if (require_flag_file && !flushed_flag_is_set(s))
+ return 0;
+
+ (void) system_journal_open(s, true, false);
+
+ if (!s->system_journal)
+ return 0;
+
+ log_debug("Flushing to %s...", s->system_storage.path);
+
+ start = now(CLOCK_MONOTONIC);
+
+ r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read runtime journal: %m");
+
+ sd_journal_set_data_threshold(j, 0);
+
+ SD_JOURNAL_FOREACH(j) {
+ Object *o = NULL;
+ JournalFile *f;
+
+ f = j->current_file;
+ assert(f && f->current_offset > 0);
+
+ n++;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0) {
+ log_error_errno(r, "Can't read entry: %m");
+ goto finish;
+ }
+
+ r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
+ if (r >= 0)
+ continue;
+
+ if (!shall_try_append_again(s->system_journal, r)) {
+ log_error_errno(r, "Can't write entry: %m");
+ goto finish;
+ }
+
+ server_rotate(s);
+ server_vacuum(s, false);
+
+ if (!s->system_journal) {
+ log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
+ r = -EIO;
+ goto finish;
+ }
+
+ log_debug("Retrying write.");
+ r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
+ if (r < 0) {
+ log_error_errno(r, "Can't write entry: %m");
+ goto finish;
+ }
+ }
+
+ r = 0;
+
+finish:
+ if (s->system_journal)
+ journal_file_post_change(s->system_journal);
+
+ s->runtime_journal = journal_file_close(s->runtime_journal);
+
+ if (r >= 0)
+ (void) rm_rf(s->runtime_storage.path, REMOVE_ROOT);
+
+ sd_journal_close(j);
+
+ server_driver_message(s, 0, NULL,
+ LOG_MESSAGE("Time spent on flushing to %s is %s for %u entries.",
+ s->system_storage.path,
+ format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
+ n),
+ NULL);
+
+ fn = strjoina(s->runtime_directory, "/flushed");
+ k = touch(fn);
+ if (k < 0)
+ log_warning_errno(k, "Failed to touch %s, ignoring: %m", fn);
+
+ server_refresh_idle_timer(s);
+ return r;
+}
+
+static int server_relinquish_var(Server *s) {
+ const char *fn;
+ assert(s);
+
+ if (s->storage == STORAGE_NONE)
+ return 0;
+
+ if (s->namespace) /* Concept does not exist for namespaced instances */
+ return -EOPNOTSUPP;
+
+ if (s->runtime_journal && !s->system_journal)
+ return 0;
+
+ log_debug("Relinquishing %s...", s->system_storage.path);
+
+ (void) system_journal_open(s, false, true);
+
+ s->system_journal = journal_file_close(s->system_journal);
+ ordered_hashmap_clear_with_destructor(s->user_journals, journal_file_close);
+ set_clear_with_destructor(s->deferred_closes, journal_file_close);
+
+ fn = strjoina(s->runtime_directory, "/flushed");
+ if (unlink(fn) < 0 && errno != ENOENT)
+ log_warning_errno(errno, "Failed to unlink %s, ignoring: %m", fn);
+
+ server_refresh_idle_timer(s);
+ return 0;
+}
+
+int server_process_datagram(
+ sd_event_source *es,
+ int fd,
+ uint32_t revents,
+ void *userdata) {
+
+ Server *s = userdata;
+ struct ucred *ucred = NULL;
+ struct timeval *tv = NULL;
+ struct cmsghdr *cmsg;
+ char *label = NULL;
+ size_t label_len = 0, m;
+ struct iovec iovec;
+ ssize_t n;
+ int *fds = NULL, v = 0;
+ size_t n_fds = 0;
+
+ /* We use NAME_MAX space for the SELinux label here. The kernel currently enforces no limit, but
+ * according to suggestions from the SELinux people this will change and it will probably be
+ * identical to NAME_MAX. For now we use that, but this should be updated one day when the final
+ * limit is known. */
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(struct timeval)) +
+ CMSG_SPACE(sizeof(int)) + /* fd */
+ CMSG_SPACE(NAME_MAX) /* selinux label */) control;
+
+ union sockaddr_union sa = {};
+
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_name = &sa,
+ .msg_namelen = sizeof(sa),
+ };
+
+ assert(s);
+ assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
+
+ if (revents != EPOLLIN)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Got invalid event from epoll for datagram fd: %" PRIx32,
+ revents);
+
+ /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
+ * it.) */
+ (void) ioctl(fd, SIOCINQ, &v);
+
+ /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
+ m = PAGE_ALIGN(MAX3((size_t) v + 1,
+ (size_t) LINE_MAX,
+ ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
+
+ if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
+ return log_oom();
+
+ iovec = IOVEC_MAKE(s->buffer, s->buffer_size - 1); /* Leave room for trailing NUL we add later */
+
+ n = recvmsg_safe(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ if (IN_SET(n, -EINTR, -EAGAIN))
+ return 0;
+ if (n == -EXFULL) {
+ log_warning("Got message with truncated control data (too many fds sent?), ignoring.");
+ return 0;
+ }
+ if (n < 0)
+ return log_error_errno(n, "recvmsg() failed: %m");
+
+ CMSG_FOREACH(cmsg, &msghdr)
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) {
+ assert(!ucred);
+ ucred = (struct ucred*) CMSG_DATA(cmsg);
+ } else if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_SECURITY) {
+ assert(!label);
+ label = (char*) CMSG_DATA(cmsg);
+ label_len = cmsg->cmsg_len - CMSG_LEN(0);
+ } else if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SO_TIMESTAMP &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval))) {
+ assert(!tv);
+ tv = (struct timeval*) CMSG_DATA(cmsg);
+ } else if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_RIGHTS) {
+ assert(!fds);
+ fds = (int*) CMSG_DATA(cmsg);
+ n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+ }
+
+ /* And a trailing NUL, just in case */
+ s->buffer[n] = 0;
+
+ if (fd == s->syslog_fd) {
+ if (n > 0 && n_fds == 0)
+ server_process_syslog_message(s, s->buffer, n, ucred, tv, label, label_len);
+ else if (n_fds > 0)
+ log_warning("Got file descriptors via syslog socket. Ignoring.");
+
+ } else if (fd == s->native_fd) {
+ if (n > 0 && n_fds == 0)
+ server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
+ else if (n == 0 && n_fds == 1)
+ server_process_native_file(s, fds[0], ucred, tv, label, label_len);
+ else if (n_fds > 0)
+ log_warning("Got too many file descriptors via native socket. Ignoring.");
+
+ } else {
+ assert(fd == s->audit_fd);
+
+ if (n > 0 && n_fds == 0)
+ server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
+ else if (n_fds > 0)
+ log_warning("Got file descriptors via audit socket. Ignoring.");
+ }
+
+ close_many(fds, n_fds);
+
+ server_refresh_idle_timer(s);
+ return 0;
+}
+
+static void server_full_flush(Server *s) {
+ assert(s);
+
+ (void) server_flush_to_var(s, false);
+ server_sync(s);
+ server_vacuum(s, false);
+
+ server_space_usage_message(s, NULL);
+
+ server_refresh_idle_timer(s);
+}
+
+static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
+ Server *s = userdata;
+
+ assert(s);
+
+ if (s->namespace) {
+ log_error("Received SIGUSR1 signal from PID " PID_FMT ", but flushing runtime journals not supported for namespaced instances.", si->ssi_pid);
+ return 0;
+ }
+
+ log_info("Received SIGUSR1 signal from PID " PID_FMT ", as request to flush runtime journal.", si->ssi_pid);
+ server_full_flush(s);
+
+ return 0;
+}
+
+static void server_full_rotate(Server *s) {
+ const char *fn;
+ int r;
+
+ assert(s);
+
+ server_rotate(s);
+ server_vacuum(s, true);
+
+ if (s->system_journal)
+ patch_min_use(&s->system_storage);
+ if (s->runtime_journal)
+ patch_min_use(&s->runtime_storage);
+
+ /* Let clients know when the most recent rotation happened. */
+ fn = strjoina(s->runtime_directory, "/rotated");
+ r = write_timestamp_file_atomic(fn, now(CLOCK_MONOTONIC));
+ if (r < 0)
+ log_warning_errno(r, "Failed to write %s, ignoring: %m", fn);
+}
+
+static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
+ Server *s = userdata;
+
+ assert(s);
+
+ log_info("Received SIGUSR2 signal from PID " PID_FMT ", as request to rotate journal.", si->ssi_pid);
+ server_full_rotate(s);
+
+ return 0;
+}
+
+static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
+ Server *s = userdata;
+
+ assert(s);
+
+ log_received_signal(LOG_INFO, si);
+
+ sd_event_exit(s->event, 0);
+ return 0;
+}
+
+static void server_full_sync(Server *s) {
+ const char *fn;
+ int r;
+
+ assert(s);
+
+ server_sync(s);
+
+ /* Let clients know when the most recent sync happened. */
+ fn = strjoina(s->runtime_directory, "/synced");
+ r = write_timestamp_file_atomic(fn, now(CLOCK_MONOTONIC));
+ if (r < 0)
+ log_warning_errno(r, "Failed to write %s, ignoring: %m", fn);
+
+ return;
+}
+
+static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
+ Server *s = userdata;
+
+ assert(s);
+
+ log_debug("Received SIGRTMIN1 signal from PID " PID_FMT ", as request to sync.", si->ssi_pid );
+ server_full_sync(s);
+
+ return 0;
+}
+
+static int setup_signals(Server *s) {
+ int r;
+
+ assert(s);
+
+ assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
+
+ r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
+ if (r < 0)
+ return r;
+
+ /* Let's process SIGTERM late, so that we flush all queued messages to disk before we exit */
+ r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
+ if (r < 0)
+ return r;
+
+ /* When journald is invoked on the terminal (when debugging), it's useful if C-c is handled
+ * equivalent to SIGTERM. */
+ r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
+ if (r < 0)
+ return r;
+
+ /* SIGRTMIN+1 causes an immediate sync. We process this very late, so that everything else queued at
+ * this point is really written to disk. Clients can watch /run/systemd/journal/synced with inotify
+ * until its mtime changes to see when a sync happened. */
+ r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ Server *s = data;
+ int r;
+
+ assert(s);
+
+ if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
+
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
+ else
+ s->forward_to_syslog = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
+
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
+ else
+ s->forward_to_kmsg = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
+
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
+ else
+ s->forward_to_console = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
+
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
+ else
+ s->forward_to_wall = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = log_level_from_string(value);
+ if (r < 0)
+ log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
+ else
+ s->max_level_console = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = log_level_from_string(value);
+ if (r < 0)
+ log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
+ else
+ s->max_level_store = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = log_level_from_string(value);
+ if (r < 0)
+ log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
+ else
+ s->max_level_syslog = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = log_level_from_string(value);
+ if (r < 0)
+ log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
+ else
+ s->max_level_kmsg = r;
+
+ } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = log_level_from_string(value);
+ if (r < 0)
+ log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
+ else
+ s->max_level_wall = r;
+
+ } else if (startswith(key, "systemd.journald"))
+ log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
+
+ /* do not warn about state here, since probably systemd already did */
+ return 0;
+}
+
+static int server_parse_config_file(Server *s) {
+ int r;
+
+ assert(s);
+
+ if (s->namespace) {
+ const char *namespaced;
+
+ /* If we are running in namespace mode, load the namespace specific configuration file, and nothing else */
+ namespaced = strjoina(PKGSYSCONFDIR "/journald@", s->namespace, ".conf");
+
+ r = config_parse(NULL,
+ namespaced, NULL,
+ "Journal\0",
+ config_item_perf_lookup, journald_gperf_lookup,
+ CONFIG_PARSE_WARN, s,
+ NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+ }
+
+ return config_parse_many_nulstr(
+ PKGSYSCONFDIR "/journald.conf",
+ CONF_PATHS_NULSTR("systemd/journald.conf.d"),
+ "Journal\0",
+ config_item_perf_lookup, journald_gperf_lookup,
+ CONFIG_PARSE_WARN, s, NULL);
+}
+
+static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
+ Server *s = userdata;
+
+ assert(s);
+
+ server_sync(s);
+ return 0;
+}
+
+int server_schedule_sync(Server *s, int priority) {
+ int r;
+
+ assert(s);
+
+ if (priority <= LOG_CRIT) {
+ /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
+ server_sync(s);
+ return 0;
+ }
+
+ if (s->sync_scheduled)
+ return 0;
+
+ if (s->sync_interval_usec > 0) {
+
+ if (!s->sync_event_source) {
+ r = sd_event_add_time_relative(
+ s->event,
+ &s->sync_event_source,
+ CLOCK_MONOTONIC,
+ s->sync_interval_usec, 0,
+ server_dispatch_sync, s);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
+ } else {
+ r = sd_event_source_set_time_relative(s->sync_event_source, s->sync_interval_usec);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
+ }
+ if (r < 0)
+ return r;
+
+ s->sync_scheduled = true;
+ }
+
+ return 0;
+}
+
+static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ Server *s = userdata;
+
+ assert(s);
+
+ server_cache_hostname(s);
+ return 0;
+}
+
+static int server_open_hostname(Server *s) {
+ int r;
+
+ assert(s);
+
+ s->hostname_fd = open("/proc/sys/kernel/hostname",
+ O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (s->hostname_fd < 0)
+ return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
+
+ r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
+ if (r < 0) {
+ /* kernels prior to 3.2 don't support polling this file. Ignore
+ * the failure. */
+ if (r == -EPERM) {
+ log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
+ s->hostname_fd = safe_close(s->hostname_fd);
+ return 0;
+ }
+
+ return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
+ }
+
+ r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust priority of hostname event source: %m");
+
+ return 0;
+}
+
+static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ Server *s = userdata;
+ int r;
+
+ assert(s);
+ assert(s->notify_event_source == es);
+ assert(s->notify_fd == fd);
+
+ /* The $NOTIFY_SOCKET is writable again, now send exactly one
+ * message on it. Either it's the watchdog event, the initial
+ * READY=1 event or an stdout stream event. If there's nothing
+ * to write anymore, turn our event source off. The next time
+ * there's something to send it will be turned on again. */
+
+ if (!s->sent_notify_ready) {
+ static const char p[] =
+ "READY=1\n"
+ "STATUS=Processing requests...";
+ ssize_t l;
+
+ l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
+ }
+
+ s->sent_notify_ready = true;
+ log_debug("Sent READY=1 notification.");
+
+ } else if (s->send_watchdog) {
+
+ static const char p[] =
+ "WATCHDOG=1";
+
+ ssize_t l;
+
+ l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
+ }
+
+ s->send_watchdog = false;
+ log_debug("Sent WATCHDOG=1 notification.");
+
+ } else if (s->stdout_streams_notify_queue)
+ /* Dispatch one stream notification event */
+ stdout_stream_send_notify(s->stdout_streams_notify_queue);
+
+ /* Leave us enabled if there's still more to do. */
+ if (s->send_watchdog || s->stdout_streams_notify_queue)
+ return 0;
+
+ /* There was nothing to do anymore, let's turn ourselves off. */
+ r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
+ if (r < 0)
+ return log_error_errno(r, "Failed to turn off notify event source: %m");
+
+ return 0;
+}
+
+static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
+ Server *s = userdata;
+ int r;
+
+ assert(s);
+
+ s->send_watchdog = true;
+
+ r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
+ if (r < 0)
+ log_warning_errno(r, "Failed to turn on notify event source: %m");
+
+ r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
+ if (r < 0)
+ return log_error_errno(r, "Failed to restart watchdog event source: %m");
+
+ r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable watchdog event source: %m");
+
+ return 0;
+}
+
+static int server_connect_notify(Server *s) {
+ union sockaddr_union sa;
+ socklen_t sa_len;
+ const char *e;
+ int r;
+
+ assert(s);
+ assert(s->notify_fd < 0);
+ assert(!s->notify_event_source);
+
+ /*
+ * So here's the problem: we'd like to send notification messages to PID 1, but we cannot do that via
+ * sd_notify(), since that's synchronous, and we might end up blocking on it. Specifically: given
+ * that PID 1 might block on dbus-daemon during IPC, and dbus-daemon is logging to us, and might
+ * hence block on us, we might end up in a deadlock if we block on sending PID 1 notification
+ * messages — by generating a full blocking circle. To avoid this, let's create a non-blocking
+ * socket, and connect it to the notification socket, and then wait for POLLOUT before we send
+ * anything. This should efficiently avoid any deadlocks, as we'll never block on PID 1, hence PID 1
+ * can safely block on dbus-daemon which can safely block on us again.
+ *
+ * Don't think that this issue is real? It is, see: https://github.com/systemd/systemd/issues/1505
+ */
+
+ e = getenv("NOTIFY_SOCKET");
+ if (!e)
+ return 0;
+
+ r = sockaddr_un_set_path(&sa.un, e);
+ if (r < 0)
+ return log_error_errno(r, "NOTIFY_SOCKET set to invalid value '%s': %m", e);
+ sa_len = r;
+
+ s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s->notify_fd < 0)
+ return log_error_errno(errno, "Failed to create notify socket: %m");
+
+ (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
+
+ r = connect(s->notify_fd, &sa.sa, sa_len);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to connect to notify socket: %m");
+
+ r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch notification socket: %m");
+
+ if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
+ s->send_watchdog = true;
+
+ r = sd_event_add_time_relative(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add watchdog time event: %m");
+ }
+
+ /* This should fire pretty soon, which we'll use to send the READY=1 event. */
+
+ return 0;
+}
+
+static int synchronize_second_half(sd_event_source *event_source, void *userdata) {
+ Varlink *link = userdata;
+ Server *s;
+ int r;
+
+ assert(link);
+ assert_se(s = varlink_get_userdata(link));
+
+ /* This is the "second half" of the Synchronize() varlink method. This function is called as deferred
+ * event source at a low priority to ensure the synchronization completes after all queued log
+ * messages are processed. */
+ server_full_sync(s);
+
+ /* Let's get rid of the event source now, by marking it as non-floating again. It then has no ref
+ * anymore and is immediately destroyed after we return from this function, i.e. from this event
+ * source handler at the end. */
+ r = sd_event_source_set_floating(event_source, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mark event source as non-floating: %m");
+
+ return varlink_reply(link, NULL);
+}
+
+static void synchronize_destroy(void *userdata) {
+ varlink_unref(userdata);
+}
+
+static int vl_method_synchronize(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *event_source = NULL;
+ Server *s = userdata;
+ int r;
+
+ assert(link);
+ assert(s);
+
+ if (json_variant_elements(parameters) > 0)
+ return varlink_error_invalid_parameter(link, parameters);
+
+ log_info("Received client request to rotate journal.");
+
+ /* We don't do the main work now, but instead enqueue a deferred event loop job which will do
+ * it. That job is scheduled at low priority, so that we return from this method call only after all
+ * queued but not processed log messages are written to disk, so that this method call returning can
+ * be used as nice synchronization point. */
+ r = sd_event_add_defer(s->event, &event_source, synchronize_second_half, link);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate defer event source: %m");
+
+ r = sd_event_source_set_destroy_callback(event_source, synchronize_destroy);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set event source destroy callback: %m");
+
+ varlink_ref(link); /* The varlink object is now left to the destroy callback to unref */
+
+ r = sd_event_source_set_priority(event_source, SD_EVENT_PRIORITY_NORMAL+15);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set defer event source priority: %m");
+
+ /* Give up ownership of this event source. It will now be destroyed along with event loop itself,
+ * unless it destroys itself earlier. */
+ r = sd_event_source_set_floating(event_source, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mark event source as floating: %m");
+
+ (void) sd_event_source_set_description(event_source, "deferred-sync");
+
+ return 0;
+}
+
+static int vl_method_rotate(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+ Server *s = userdata;
+
+ assert(link);
+ assert(s);
+
+ if (json_variant_elements(parameters) > 0)
+ return varlink_error_invalid_parameter(link, parameters);
+
+ log_info("Received client request to rotate journal.");
+ server_full_rotate(s);
+
+ return varlink_reply(link, NULL);
+}
+
+static int vl_method_flush_to_var(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+ Server *s = userdata;
+
+ assert(link);
+ assert(s);
+
+ if (json_variant_elements(parameters) > 0)
+ return varlink_error_invalid_parameter(link, parameters);
+ if (s->namespace)
+ return varlink_error(link, "io.systemd.Journal.NotSupportedByNamespaces", NULL);
+
+ log_info("Received client request to flush runtime journal.");
+ server_full_flush(s);
+
+ return varlink_reply(link, NULL);
+}
+
+static int vl_method_relinquish_var(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+ Server *s = userdata;
+
+ assert(link);
+ assert(s);
+
+ if (json_variant_elements(parameters) > 0)
+ return varlink_error_invalid_parameter(link, parameters);
+ if (s->namespace)
+ return varlink_error(link, "io.systemd.Journal.NotSupportedByNamespaces", NULL);
+
+ log_info("Received client request to relinquish %s access.", s->system_storage.path);
+ server_relinquish_var(s);
+
+ return varlink_reply(link, NULL);
+}
+
+static int vl_connect(VarlinkServer *server, Varlink *link, void *userdata) {
+ Server *s = userdata;
+
+ assert(server);
+ assert(link);
+ assert(s);
+
+ (void) server_start_or_stop_idle_timer(s); /* maybe we are no longer idle */
+
+ return 0;
+}
+
+static void vl_disconnect(VarlinkServer *server, Varlink *link, void *userdata) {
+ Server *s = userdata;
+
+ assert(server);
+ assert(link);
+ assert(s);
+
+ (void) server_start_or_stop_idle_timer(s); /* maybe we are idle now */
+}
+
+static int server_open_varlink(Server *s, const char *socket, int fd) {
+ int r;
+
+ assert(s);
+
+ r = varlink_server_new(&s->varlink_server, VARLINK_SERVER_ROOT_ONLY);
+ if (r < 0)
+ return r;
+
+ varlink_server_set_userdata(s->varlink_server, s);
+
+ r = varlink_server_bind_method_many(
+ s->varlink_server,
+ "io.systemd.Journal.Synchronize", vl_method_synchronize,
+ "io.systemd.Journal.Rotate", vl_method_rotate,
+ "io.systemd.Journal.FlushToVar", vl_method_flush_to_var,
+ "io.systemd.Journal.RelinquishVar", vl_method_relinquish_var);
+ if (r < 0)
+ return r;
+
+ r = varlink_server_bind_connect(s->varlink_server, vl_connect);
+ if (r < 0)
+ return r;
+
+ r = varlink_server_bind_disconnect(s->varlink_server, vl_disconnect);
+ if (r < 0)
+ return r;
+
+ if (fd < 0)
+ r = varlink_server_listen_address(s->varlink_server, socket, 0600);
+ else
+ r = varlink_server_listen_fd(s->varlink_server, fd);
+ if (r < 0)
+ return r;
+
+ r = varlink_server_attach_event(s->varlink_server, s->event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static bool server_is_idle(Server *s) {
+ assert(s);
+
+ /* The server for the main namespace is never idle */
+ if (!s->namespace)
+ return false;
+
+ /* If a retention maximum is set larger than the idle time we need to be running to enforce it, hence
+ * turn off the idle logic. */
+ if (s->max_retention_usec > IDLE_TIMEOUT_USEC)
+ return false;
+
+ /* We aren't idle if we have a varlink client */
+ if (varlink_server_current_connections(s->varlink_server) > 0)
+ return false;
+
+ /* If we have stdout streams we aren't idle */
+ if (s->n_stdout_streams > 0)
+ return false;
+
+ return true;
+}
+
+static int server_idle_handler(sd_event_source *source, uint64_t usec, void *userdata) {
+ Server *s = userdata;
+
+ assert(source);
+ assert(s);
+
+ log_debug("Server is idle, exiting.");
+ sd_event_exit(s->event, 0);
+ return 0;
+}
+
+int server_start_or_stop_idle_timer(Server *s) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *source = NULL;
+ int r;
+
+ assert(s);
+
+ if (!server_is_idle(s)) {
+ s->idle_event_source = sd_event_source_disable_unref(s->idle_event_source);
+ return 0;
+ }
+
+ if (s->idle_event_source)
+ return 1;
+
+ r = sd_event_add_time_relative(s->event, &source, CLOCK_MONOTONIC, IDLE_TIMEOUT_USEC, 0, server_idle_handler, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate idle timer: %m");
+
+ r = sd_event_source_set_priority(source, SD_EVENT_PRIORITY_IDLE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set idle timer priority: %m");
+
+ (void) sd_event_source_set_description(source, "idle-timer");
+
+ s->idle_event_source = TAKE_PTR(source);
+ return 1;
+}
+
+int server_refresh_idle_timer(Server *s) {
+ int r;
+
+ assert(s);
+
+ if (!s->idle_event_source)
+ return 0;
+
+ r = sd_event_source_set_time_relative(s->idle_event_source, IDLE_TIMEOUT_USEC);
+ if (r < 0)
+ return log_error_errno(r, "Failed to refresh idle timer: %m");
+
+ return 1;
+}
+
+static int set_namespace(Server *s, const char *namespace) {
+ assert(s);
+
+ if (!namespace)
+ return 0;
+
+ if (!log_namespace_name_valid(namespace))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Specified namespace name not valid, refusing: %s", namespace);
+
+ s->namespace = strdup(namespace);
+ if (!s->namespace)
+ return log_oom();
+
+ s->namespace_field = strjoin("_NAMESPACE=", namespace);
+ if (!s->namespace_field)
+ return log_oom();
+
+ return 1;
+}
+
+int server_init(Server *s, const char *namespace) {
+ const char *native_socket, *syslog_socket, *stdout_socket, *varlink_socket, *e;
+ _cleanup_fdset_free_ FDSet *fds = NULL;
+ int n, r, fd, varlink_fd = -1;
+ bool no_sockets;
+
+ assert(s);
+
+ *s = (Server) {
+ .syslog_fd = -1,
+ .native_fd = -1,
+ .stdout_fd = -1,
+ .dev_kmsg_fd = -1,
+ .audit_fd = -1,
+ .hostname_fd = -1,
+ .notify_fd = -1,
+
+ .compress.enabled = true,
+ .compress.threshold_bytes = (uint64_t) -1,
+ .seal = true,
+
+ .set_audit = true,
+
+ .watchdog_usec = USEC_INFINITY,
+
+ .sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC,
+ .sync_scheduled = false,
+
+ .ratelimit_interval = DEFAULT_RATE_LIMIT_INTERVAL,
+ .ratelimit_burst = DEFAULT_RATE_LIMIT_BURST,
+
+ .forward_to_wall = true,
+
+ .max_file_usec = DEFAULT_MAX_FILE_USEC,
+
+ .max_level_store = LOG_DEBUG,
+ .max_level_syslog = LOG_DEBUG,
+ .max_level_kmsg = LOG_NOTICE,
+ .max_level_console = LOG_INFO,
+ .max_level_wall = LOG_EMERG,
+
+ .line_max = DEFAULT_LINE_MAX,
+
+ .runtime_storage.name = "Runtime Journal",
+ .system_storage.name = "System Journal",
+ };
+
+ r = set_namespace(s, namespace);
+ if (r < 0)
+ return r;
+
+ /* By default, only read from /dev/kmsg if are the main namespace */
+ s->read_kmsg = !s->namespace;
+ s->storage = s->namespace ? STORAGE_PERSISTENT : STORAGE_AUTO;
+
+ journal_reset_metrics(&s->system_storage.metrics);
+ journal_reset_metrics(&s->runtime_storage.metrics);
+
+ server_parse_config_file(s);
+
+ if (!s->namespace) {
+ /* Parse kernel command line, but only if we are not a namespace instance */
+ r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+ }
+
+ if (!!s->ratelimit_interval != !!s->ratelimit_burst) { /* One set to 0 and the other not? */
+ log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
+ s->ratelimit_interval, s->ratelimit_burst);
+ s->ratelimit_interval = s->ratelimit_burst = 0;
+ }
+
+ e = getenv("RUNTIME_DIRECTORY");
+ if (e)
+ s->runtime_directory = strdup(e);
+ else if (s->namespace)
+ s->runtime_directory = strjoin("/run/systemd/journal.", s->namespace);
+ else
+ s->runtime_directory = strdup("/run/systemd/journal");
+ if (!s->runtime_directory)
+ return log_oom();
+
+ (void) mkdir_p(s->runtime_directory, 0755);
+
+ s->user_journals = ordered_hashmap_new(NULL);
+ if (!s->user_journals)
+ return log_oom();
+
+ s->mmap = mmap_cache_new();
+ if (!s->mmap)
+ return log_oom();
+
+ s->deferred_closes = set_new(NULL);
+ if (!s->deferred_closes)
+ return log_oom();
+
+ r = sd_event_default(&s->event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create event loop: %m");
+
+ n = sd_listen_fds(true);
+ if (n < 0)
+ return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
+
+ native_socket = strjoina(s->runtime_directory, "/socket");
+ stdout_socket = strjoina(s->runtime_directory, "/stdout");
+ syslog_socket = strjoina(s->runtime_directory, "/dev-log");
+ varlink_socket = strjoina(s->runtime_directory, "/io.systemd.journal");
+
+ for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
+
+ if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, native_socket, 0) > 0) {
+
+ if (s->native_fd >= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many native sockets passed.");
+
+ s->native_fd = fd;
+
+ } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, stdout_socket, 0) > 0) {
+
+ if (s->stdout_fd >= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many stdout sockets passed.");
+
+ s->stdout_fd = fd;
+
+ } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, syslog_socket, 0) > 0) {
+
+ if (s->syslog_fd >= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many /dev/log sockets passed.");
+
+ s->syslog_fd = fd;
+
+ } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, varlink_socket, 0) > 0) {
+
+ if (varlink_fd >= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many varlink sockets passed.");
+
+ varlink_fd = fd;
+ } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
+
+ if (s->audit_fd >= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many audit sockets passed.");
+
+ s->audit_fd = fd;
+
+ } else {
+
+ if (!fds) {
+ fds = fdset_new();
+ if (!fds)
+ return log_oom();
+ }
+
+ r = fdset_put(fds, fd);
+ if (r < 0)
+ return log_oom();
+ }
+ }
+
+ /* Try to restore streams, but don't bother if this fails */
+ (void) server_restore_streams(s, fds);
+
+ if (fdset_size(fds) > 0) {
+ log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
+ fds = fdset_free(fds);
+ }
+
+ no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0 && varlink_fd < 0;
+
+ /* always open stdout, syslog, native, and kmsg sockets */
+
+ /* systemd-journald.socket: /run/systemd/journal/stdout */
+ r = server_open_stdout_socket(s, stdout_socket);
+ if (r < 0)
+ return r;
+
+ /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
+ r = server_open_syslog_socket(s, syslog_socket);
+ if (r < 0)
+ return r;
+
+ /* systemd-journald.socket: /run/systemd/journal/socket */
+ r = server_open_native_socket(s, native_socket);
+ if (r < 0)
+ return r;
+
+ /* /dev/kmsg */
+ r = server_open_dev_kmsg(s);
+ if (r < 0)
+ return r;
+
+ /* Unless we got *some* sockets and not audit, open audit socket */
+ if (s->audit_fd >= 0 || no_sockets) {
+ r = server_open_audit(s);
+ if (r < 0)
+ return r;
+ }
+
+ r = server_open_varlink(s, varlink_socket, varlink_fd);
+ if (r < 0)
+ return r;
+
+ r = server_open_kernel_seqnum(s);
+ if (r < 0)
+ return r;
+
+ r = server_open_hostname(s);
+ if (r < 0)
+ return r;
+
+ r = setup_signals(s);
+ if (r < 0)
+ return r;
+
+ s->ratelimit = journal_ratelimit_new();
+ if (!s->ratelimit)
+ return log_oom();
+
+ r = cg_get_root_path(&s->cgroup_root);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire cgroup root path: %m");
+
+ server_cache_hostname(s);
+ server_cache_boot_id(s);
+ server_cache_machine_id(s);
+
+ if (s->namespace)
+ s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s), ".", s->namespace);
+ else
+ s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
+ if (!s->runtime_storage.path)
+ return log_oom();
+
+ e = getenv("LOGS_DIRECTORY");
+ if (e)
+ s->system_storage.path = strdup(e);
+ else if (s->namespace)
+ s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s), ".", s->namespace);
+ else
+ s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
+ if (!s->system_storage.path)
+ return log_oom();
+
+ (void) server_connect_notify(s);
+
+ (void) client_context_acquire_default(s);
+
+ r = system_journal_open(s, false, false);
+ if (r < 0)
+ return r;
+
+ server_start_or_stop_idle_timer(s);
+ return 0;
+}
+
+void server_maybe_append_tags(Server *s) {
+#if HAVE_GCRYPT
+ JournalFile *f;
+ usec_t n;
+
+ n = now(CLOCK_REALTIME);
+
+ if (s->system_journal)
+ journal_file_maybe_append_tag(s->system_journal, n);
+
+ ORDERED_HASHMAP_FOREACH(f, s->user_journals)
+ journal_file_maybe_append_tag(f, n);
+#endif
+}
+
+void server_done(Server *s) {
+ assert(s);
+
+ free(s->namespace);
+ free(s->namespace_field);
+
+ set_free_with_destructor(s->deferred_closes, journal_file_close);
+
+ while (s->stdout_streams)
+ stdout_stream_free(s->stdout_streams);
+
+ client_context_flush_all(s);
+
+ (void) journal_file_close(s->system_journal);
+ (void) journal_file_close(s->runtime_journal);
+
+ ordered_hashmap_free_with_destructor(s->user_journals, journal_file_close);
+
+ varlink_server_unref(s->varlink_server);
+
+ sd_event_source_unref(s->syslog_event_source);
+ sd_event_source_unref(s->native_event_source);
+ sd_event_source_unref(s->stdout_event_source);
+ sd_event_source_unref(s->dev_kmsg_event_source);
+ sd_event_source_unref(s->audit_event_source);
+ sd_event_source_unref(s->sync_event_source);
+ sd_event_source_unref(s->sigusr1_event_source);
+ sd_event_source_unref(s->sigusr2_event_source);
+ sd_event_source_unref(s->sigterm_event_source);
+ sd_event_source_unref(s->sigint_event_source);
+ sd_event_source_unref(s->sigrtmin1_event_source);
+ sd_event_source_unref(s->hostname_event_source);
+ sd_event_source_unref(s->notify_event_source);
+ sd_event_source_unref(s->watchdog_event_source);
+ sd_event_source_unref(s->idle_event_source);
+ sd_event_unref(s->event);
+
+ safe_close(s->syslog_fd);
+ safe_close(s->native_fd);
+ safe_close(s->stdout_fd);
+ safe_close(s->dev_kmsg_fd);
+ safe_close(s->audit_fd);
+ safe_close(s->hostname_fd);
+ safe_close(s->notify_fd);
+
+ if (s->ratelimit)
+ journal_ratelimit_free(s->ratelimit);
+
+ if (s->kernel_seqnum)
+ munmap(s->kernel_seqnum, sizeof(uint64_t));
+
+ free(s->buffer);
+ free(s->tty_path);
+ free(s->cgroup_root);
+ free(s->hostname_field);
+ free(s->runtime_storage.path);
+ free(s->system_storage.path);
+ free(s->runtime_directory);
+
+ mmap_cache_unref(s->mmap);
+}
+
+static const char* const storage_table[_STORAGE_MAX] = {
+ [STORAGE_AUTO] = "auto",
+ [STORAGE_VOLATILE] = "volatile",
+ [STORAGE_PERSISTENT] = "persistent",
+ [STORAGE_NONE] = "none"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
+
+static const char* const split_mode_table[_SPLIT_MAX] = {
+ [SPLIT_LOGIN] = "login",
+ [SPLIT_UID] = "uid",
+ [SPLIT_NONE] = "none",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
+
+int config_parse_line_max(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ size_t *sz = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue))
+ /* Empty assignment means default */
+ *sz = DEFAULT_LINE_MAX;
+ else {
+ uint64_t v;
+
+ r = parse_size(rvalue, 1024, &v);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (v < 79) {
+ /* Why specify 79 here as minimum line length? Simply, because the most common traditional
+ * terminal size is 80ch, and it might make sense to break one character before the natural
+ * line break would occur on that. */
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
+ *sz = 79;
+ } else if (v > (uint64_t) (SSIZE_MAX-1)) {
+ /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
+ * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
+ * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
+ * fail much earlier anyway. */
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
+ *sz = SSIZE_MAX-1;
+ } else
+ *sz = (size_t) v;
+ }
+
+ return 0;
+}
+
+int config_parse_compress(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ JournalCompressOptions* compress = data;
+ int r;
+
+ if (isempty(rvalue)) {
+ compress->enabled = true;
+ compress->threshold_bytes = (uint64_t) -1;
+ } else if (streq(rvalue, "1")) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Compress= ambiguously specified as 1, enabling compression with default threshold");
+ compress->enabled = true;
+ } else if (streq(rvalue, "0")) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Compress= ambiguously specified as 0, disabling compression");
+ compress->enabled = false;
+ } else {
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ r = parse_size(rvalue, 1024, &compress->threshold_bytes);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse Compress= value, ignoring: %s", rvalue);
+ else
+ compress->enabled = true;
+ } else
+ compress->enabled = r;
+ }
+
+ return 0;
+}
diff --git a/src/journal/journald-server.h b/src/journal/journald-server.h
new file mode 100644
index 0000000..5fb145e
--- /dev/null
+++ b/src/journal/journald-server.h
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-event.h"
+
+typedef struct Server Server;
+
+#include "conf-parser.h"
+#include "hashmap.h"
+#include "journal-file.h"
+#include "journald-context.h"
+#include "journald-rate-limit.h"
+#include "journald-stream.h"
+#include "list.h"
+#include "prioq.h"
+#include "time-util.h"
+#include "varlink.h"
+
+typedef enum Storage {
+ STORAGE_AUTO,
+ STORAGE_VOLATILE,
+ STORAGE_PERSISTENT,
+ STORAGE_NONE,
+ _STORAGE_MAX,
+ _STORAGE_INVALID = -1
+} Storage;
+
+typedef enum SplitMode {
+ SPLIT_UID,
+ SPLIT_LOGIN, /* deprecated */
+ SPLIT_NONE,
+ _SPLIT_MAX,
+ _SPLIT_INVALID = -1
+} SplitMode;
+
+typedef struct JournalCompressOptions {
+ bool enabled;
+ uint64_t threshold_bytes;
+} JournalCompressOptions;
+
+typedef struct JournalStorageSpace {
+ usec_t timestamp;
+
+ uint64_t available;
+ uint64_t limit;
+
+ uint64_t vfs_used; /* space used by journal files */
+ uint64_t vfs_available;
+} JournalStorageSpace;
+
+typedef struct JournalStorage {
+ const char *name;
+ char *path;
+
+ JournalMetrics metrics;
+ JournalStorageSpace space;
+} JournalStorage;
+
+struct Server {
+ char *namespace;
+
+ int syslog_fd;
+ int native_fd;
+ int stdout_fd;
+ int dev_kmsg_fd;
+ int audit_fd;
+ int hostname_fd;
+ int notify_fd;
+
+ sd_event *event;
+
+ sd_event_source *syslog_event_source;
+ sd_event_source *native_event_source;
+ sd_event_source *stdout_event_source;
+ sd_event_source *dev_kmsg_event_source;
+ sd_event_source *audit_event_source;
+ sd_event_source *sync_event_source;
+ sd_event_source *sigusr1_event_source;
+ sd_event_source *sigusr2_event_source;
+ sd_event_source *sigterm_event_source;
+ sd_event_source *sigint_event_source;
+ sd_event_source *sigrtmin1_event_source;
+ sd_event_source *hostname_event_source;
+ sd_event_source *notify_event_source;
+ sd_event_source *watchdog_event_source;
+ sd_event_source *idle_event_source;
+
+ JournalFile *runtime_journal;
+ JournalFile *system_journal;
+ OrderedHashmap *user_journals;
+
+ uint64_t seqnum;
+
+ char *buffer;
+ size_t buffer_size;
+
+ JournalRateLimit *ratelimit;
+ usec_t sync_interval_usec;
+ usec_t ratelimit_interval;
+ unsigned ratelimit_burst;
+
+ JournalStorage runtime_storage;
+ JournalStorage system_storage;
+
+ JournalCompressOptions compress;
+ bool seal;
+ bool read_kmsg;
+ int set_audit;
+
+ bool forward_to_kmsg;
+ bool forward_to_syslog;
+ bool forward_to_console;
+ bool forward_to_wall;
+
+ unsigned n_forward_syslog_missed;
+ usec_t last_warn_forward_syslog_missed;
+
+ usec_t max_retention_usec;
+ usec_t max_file_usec;
+ usec_t oldest_file_usec;
+
+ LIST_HEAD(StdoutStream, stdout_streams);
+ LIST_HEAD(StdoutStream, stdout_streams_notify_queue);
+ unsigned n_stdout_streams;
+
+ char *tty_path;
+
+ int max_level_store;
+ int max_level_syslog;
+ int max_level_kmsg;
+ int max_level_console;
+ int max_level_wall;
+
+ Storage storage;
+ SplitMode split_mode;
+
+ MMapCache *mmap;
+
+ Set *deferred_closes;
+
+ uint64_t *kernel_seqnum;
+ bool dev_kmsg_readable:1;
+
+ bool send_watchdog:1;
+ bool sent_notify_ready:1;
+ bool sync_scheduled:1;
+
+ char machine_id_field[sizeof("_MACHINE_ID=") + 32];
+ char boot_id_field[sizeof("_BOOT_ID=") + 32];
+ char *hostname_field;
+ char *namespace_field;
+ char *runtime_directory;
+
+ /* Cached cgroup root, so that we don't have to query that all the time */
+ char *cgroup_root;
+
+ usec_t watchdog_usec;
+
+ usec_t last_realtime_clock;
+
+ size_t line_max;
+
+ /* Caching of client metadata */
+ Hashmap *client_contexts;
+ Prioq *client_contexts_lru;
+
+ usec_t last_cache_pid_flush;
+
+ ClientContext *my_context; /* the context of journald itself */
+ ClientContext *pid1_context; /* the context of PID 1 */
+
+ VarlinkServer *varlink_server;
+};
+
+#define SERVER_MACHINE_ID(s) ((s)->machine_id_field + STRLEN("_MACHINE_ID="))
+
+/* Extra fields for any log messages */
+#define N_IOVEC_META_FIELDS 23
+
+/* Extra fields for log messages that contain OBJECT_PID= (i.e. log about another process) */
+#define N_IOVEC_OBJECT_FIELDS 18
+
+/* Maximum number of fields we'll add in for driver (i.e. internal) messages */
+#define N_IOVEC_PAYLOAD_FIELDS 16
+
+/* kmsg: Maximum number of extra fields we'll import from the kernel's /dev/kmsg */
+#define N_IOVEC_KERNEL_FIELDS 64
+
+/* kmsg: Maximum number of extra fields we'll import from udev's devices */
+#define N_IOVEC_UDEV_FIELDS 32
+
+void server_dispatch_message(Server *s, struct iovec *iovec, size_t n, size_t m, ClientContext *c, const struct timeval *tv, int priority, pid_t object_pid);
+void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) _sentinel_ _printf_(4,0);
+
+/* gperf lookup function */
+const struct ConfigPerfItem* journald_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_storage);
+CONFIG_PARSER_PROTOTYPE(config_parse_line_max);
+CONFIG_PARSER_PROTOTYPE(config_parse_compress);
+
+const char *storage_to_string(Storage s) _const_;
+Storage storage_from_string(const char *s) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_split_mode);
+
+const char *split_mode_to_string(SplitMode s) _const_;
+SplitMode split_mode_from_string(const char *s) _pure_;
+
+int server_init(Server *s, const char *namespace);
+void server_done(Server *s);
+void server_sync(Server *s);
+int server_vacuum(Server *s, bool verbose);
+void server_rotate(Server *s);
+int server_schedule_sync(Server *s, int priority);
+int server_flush_to_var(Server *s, bool require_flag_file);
+void server_maybe_append_tags(Server *s);
+int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata);
+void server_space_usage_message(Server *s, JournalStorage *storage);
+
+int server_start_or_stop_idle_timer(Server *s);
+int server_refresh_idle_timer(Server *s);
diff --git a/src/journal/journald-stream.c b/src/journal/journald-stream.c
new file mode 100644
index 0000000..3241ef2
--- /dev/null
+++ b/src/journal/journald-stream.c
@@ -0,0 +1,963 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stddef.h>
+#include <unistd.h>
+
+#if HAVE_SELINUX
+#include <selinux/selinux.h>
+#endif
+
+#include "sd-daemon.h"
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "env-file.h"
+#include "errno-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "journald-console.h"
+#include "journald-context.h"
+#include "journald-kmsg.h"
+#include "journald-server.h"
+#include "journald-stream.h"
+#include "journald-syslog.h"
+#include "journald-wall.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "syslog-util.h"
+#include "tmpfile-util.h"
+#include "unit-name.h"
+
+#define STDOUT_STREAMS_MAX 4096
+
+typedef enum StdoutStreamState {
+ STDOUT_STREAM_IDENTIFIER,
+ STDOUT_STREAM_UNIT_ID,
+ STDOUT_STREAM_PRIORITY,
+ STDOUT_STREAM_LEVEL_PREFIX,
+ STDOUT_STREAM_FORWARD_TO_SYSLOG,
+ STDOUT_STREAM_FORWARD_TO_KMSG,
+ STDOUT_STREAM_FORWARD_TO_CONSOLE,
+ STDOUT_STREAM_RUNNING
+} StdoutStreamState;
+
+/* The different types of log record terminators: a real \n was read, a NUL character was read, the maximum line length
+ * was reached, or the end of the stream was reached */
+
+typedef enum LineBreak {
+ LINE_BREAK_NEWLINE,
+ LINE_BREAK_NUL,
+ LINE_BREAK_LINE_MAX,
+ LINE_BREAK_EOF,
+ LINE_BREAK_PID_CHANGE,
+ _LINE_BREAK_MAX,
+ _LINE_BREAK_INVALID = -1,
+} LineBreak;
+
+struct StdoutStream {
+ Server *server;
+ StdoutStreamState state;
+
+ int fd;
+
+ struct ucred ucred;
+ char *label;
+ char *identifier;
+ char *unit_id;
+ int priority;
+ bool level_prefix:1;
+ bool forward_to_syslog:1;
+ bool forward_to_kmsg:1;
+ bool forward_to_console:1;
+
+ bool fdstore:1;
+ bool in_notify_queue:1;
+
+ char *buffer;
+ size_t length;
+ size_t allocated;
+
+ sd_event_source *event_source;
+
+ char *state_file;
+
+ ClientContext *context;
+
+ LIST_FIELDS(StdoutStream, stdout_stream);
+ LIST_FIELDS(StdoutStream, stdout_stream_notify_queue);
+
+ char id_field[STRLEN("_STREAM_ID=") + SD_ID128_STRING_MAX];
+};
+
+void stdout_stream_free(StdoutStream *s) {
+ if (!s)
+ return;
+
+ if (s->server) {
+
+ if (s->context)
+ client_context_release(s->server, s->context);
+
+ assert(s->server->n_stdout_streams > 0);
+ s->server->n_stdout_streams--;
+ LIST_REMOVE(stdout_stream, s->server->stdout_streams, s);
+
+ if (s->in_notify_queue)
+ LIST_REMOVE(stdout_stream_notify_queue, s->server->stdout_streams_notify_queue, s);
+
+ (void) server_start_or_stop_idle_timer(s->server); /* Maybe we are idle now? */
+ }
+
+ if (s->event_source) {
+ sd_event_source_set_enabled(s->event_source, SD_EVENT_OFF);
+ s->event_source = sd_event_source_unref(s->event_source);
+ }
+
+ safe_close(s->fd);
+ free(s->label);
+ free(s->identifier);
+ free(s->unit_id);
+ free(s->state_file);
+ free(s->buffer);
+
+ free(s);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(StdoutStream*, stdout_stream_free);
+
+void stdout_stream_destroy(StdoutStream *s) {
+ if (!s)
+ return;
+
+ if (s->state_file)
+ (void) unlink(s->state_file);
+
+ stdout_stream_free(s);
+}
+
+static int stdout_stream_save(StdoutStream *s) {
+ _cleanup_(unlink_and_freep) char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(s);
+
+ if (s->state != STDOUT_STREAM_RUNNING)
+ return 0;
+
+ if (!s->state_file) {
+ struct stat st;
+
+ r = fstat(s->fd, &st);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to stat connected stream: %m");
+
+ /* We use device and inode numbers as identifier for the stream */
+ r = asprintf(&s->state_file, "%s/streams/%lu:%lu", s->server->runtime_directory, (unsigned long) st.st_dev, (unsigned long) st.st_ino);
+ if (r < 0)
+ return log_oom();
+ }
+
+ (void) mkdir_parents(s->state_file, 0755);
+
+ r = fopen_temporary(s->state_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ fprintf(f,
+ "# This is private data. Do not parse\n"
+ "PRIORITY=%i\n"
+ "LEVEL_PREFIX=%i\n"
+ "FORWARD_TO_SYSLOG=%i\n"
+ "FORWARD_TO_KMSG=%i\n"
+ "FORWARD_TO_CONSOLE=%i\n"
+ "STREAM_ID=%s\n",
+ s->priority,
+ s->level_prefix,
+ s->forward_to_syslog,
+ s->forward_to_kmsg,
+ s->forward_to_console,
+ s->id_field + STRLEN("_STREAM_ID="));
+
+ if (!isempty(s->identifier)) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(s->identifier);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "IDENTIFIER=%s\n", escaped);
+ }
+
+ if (!isempty(s->unit_id)) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(s->unit_id);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "UNIT=%s\n", escaped);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, s->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ temp_path = mfree(temp_path);
+
+ if (!s->fdstore && !s->in_notify_queue) {
+ LIST_PREPEND(stdout_stream_notify_queue, s->server->stdout_streams_notify_queue, s);
+ s->in_notify_queue = true;
+
+ if (s->server->notify_event_source) {
+ r = sd_event_source_set_enabled(s->server->notify_event_source, SD_EVENT_ON);
+ if (r < 0)
+ log_warning_errno(r, "Failed to enable notify event source: %m");
+ }
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(s->state_file);
+ return log_error_errno(r, "Failed to save stream data %s: %m", s->state_file);
+}
+
+static int stdout_stream_log(
+ StdoutStream *s,
+ const char *p,
+ LineBreak line_break) {
+
+ struct iovec *iovec;
+ int priority;
+ char syslog_priority[] = "PRIORITY=\0";
+ char syslog_facility[STRLEN("SYSLOG_FACILITY=") + DECIMAL_STR_MAX(int) + 1];
+ _cleanup_free_ char *message = NULL, *syslog_identifier = NULL;
+ size_t n = 0, m;
+ int r;
+
+ assert(s);
+ assert(p);
+
+ assert(line_break >= 0);
+ assert(line_break < _LINE_BREAK_MAX);
+
+ if (s->context)
+ (void) client_context_maybe_refresh(s->server, s->context, NULL, NULL, 0, NULL, USEC_INFINITY);
+ else if (pid_is_valid(s->ucred.pid)) {
+ r = client_context_acquire(s->server, s->ucred.pid, &s->ucred, s->label, strlen_ptr(s->label), s->unit_id, &s->context);
+ if (r < 0)
+ log_warning_errno(r, "Failed to acquire client context, ignoring: %m");
+ }
+
+ priority = s->priority;
+
+ if (s->level_prefix)
+ syslog_parse_priority(&p, &priority, false);
+
+ if (!client_context_test_priority(s->context, priority))
+ return 0;
+
+ if (isempty(p))
+ return 0;
+
+ if (s->forward_to_syslog || s->server->forward_to_syslog)
+ server_forward_syslog(s->server, syslog_fixup_facility(priority), s->identifier, p, &s->ucred, NULL);
+
+ if (s->forward_to_kmsg || s->server->forward_to_kmsg)
+ server_forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
+
+ if (s->forward_to_console || s->server->forward_to_console)
+ server_forward_console(s->server, priority, s->identifier, p, &s->ucred);
+
+ if (s->server->forward_to_wall)
+ server_forward_wall(s->server, priority, s->identifier, p, &s->ucred);
+
+ m = N_IOVEC_META_FIELDS + 7 + client_context_extra_fields_n_iovec(s->context);
+ iovec = newa(struct iovec, m);
+
+ iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=stdout");
+ iovec[n++] = IOVEC_MAKE_STRING(s->id_field);
+
+ syslog_priority[STRLEN("PRIORITY=")] = '0' + LOG_PRI(priority);
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);
+
+ if (priority & LOG_FACMASK) {
+ xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority));
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
+ }
+
+ if (s->identifier) {
+ syslog_identifier = strjoin("SYSLOG_IDENTIFIER=", s->identifier);
+ if (syslog_identifier)
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
+ }
+
+ static const char * const line_break_field_table[_LINE_BREAK_MAX] = {
+ [LINE_BREAK_NEWLINE] = NULL, /* Do not add field if traditional newline */
+ [LINE_BREAK_NUL] = "_LINE_BREAK=nul",
+ [LINE_BREAK_LINE_MAX] = "_LINE_BREAK=line-max",
+ [LINE_BREAK_EOF] = "_LINE_BREAK=eof",
+ [LINE_BREAK_PID_CHANGE] = "_LINE_BREAK=pid-change",
+ };
+
+ const char *c = line_break_field_table[line_break];
+
+ /* If this log message was generated due to an uncommon line break then mention this in the log
+ * entry */
+ if (c)
+ iovec[n++] = IOVEC_MAKE_STRING(c);
+
+ message = strjoin("MESSAGE=", p);
+ if (message)
+ iovec[n++] = IOVEC_MAKE_STRING(message);
+
+ server_dispatch_message(s->server, iovec, n, m, s->context, NULL, priority, 0);
+ return 0;
+}
+
+static int stdout_stream_line(StdoutStream *s, char *p, LineBreak line_break) {
+ char *orig;
+ int r;
+
+ assert(s);
+ assert(p);
+
+ orig = p;
+ p = strstrip(p);
+
+ /* line breaks by NUL, line max length or EOF are not permissible during the negotiation part of the protocol */
+ if (line_break != LINE_BREAK_NEWLINE && s->state != STDOUT_STREAM_RUNNING)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Control protocol line not properly terminated.");
+
+ switch (s->state) {
+
+ case STDOUT_STREAM_IDENTIFIER:
+ if (!isempty(p)) {
+ s->identifier = strdup(p);
+ if (!s->identifier)
+ return log_oom();
+ }
+
+ s->state = STDOUT_STREAM_UNIT_ID;
+ return 0;
+
+ case STDOUT_STREAM_UNIT_ID:
+ if (s->ucred.uid == 0 &&
+ unit_name_is_valid(p, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE)) {
+
+ s->unit_id = strdup(p);
+ if (!s->unit_id)
+ return log_oom();
+ }
+
+ s->state = STDOUT_STREAM_PRIORITY;
+ return 0;
+
+ case STDOUT_STREAM_PRIORITY:
+ r = safe_atoi(p, &s->priority);
+ if (r < 0 || s->priority < 0 || s->priority > 999) {
+ log_warning("Failed to parse log priority line.");
+ return -EINVAL;
+ }
+
+ s->state = STDOUT_STREAM_LEVEL_PREFIX;
+ return 0;
+
+ case STDOUT_STREAM_LEVEL_PREFIX:
+ r = parse_boolean(p);
+ if (r < 0) {
+ log_warning("Failed to parse level prefix line.");
+ return -EINVAL;
+ }
+
+ s->level_prefix = r;
+ s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
+ return 0;
+
+ case STDOUT_STREAM_FORWARD_TO_SYSLOG:
+ r = parse_boolean(p);
+ if (r < 0) {
+ log_warning("Failed to parse forward to syslog line.");
+ return -EINVAL;
+ }
+
+ s->forward_to_syslog = r;
+ s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
+ return 0;
+
+ case STDOUT_STREAM_FORWARD_TO_KMSG:
+ r = parse_boolean(p);
+ if (r < 0) {
+ log_warning("Failed to parse copy to kmsg line.");
+ return -EINVAL;
+ }
+
+ s->forward_to_kmsg = r;
+ s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
+ return 0;
+
+ case STDOUT_STREAM_FORWARD_TO_CONSOLE:
+ r = parse_boolean(p);
+ if (r < 0) {
+ log_warning("Failed to parse copy to console line.");
+ return -EINVAL;
+ }
+
+ s->forward_to_console = r;
+ s->state = STDOUT_STREAM_RUNNING;
+
+ /* Try to save the stream, so that journald can be restarted and we can recover */
+ (void) stdout_stream_save(s);
+ return 0;
+
+ case STDOUT_STREAM_RUNNING:
+ return stdout_stream_log(s, orig, line_break);
+ }
+
+ assert_not_reached("Unknown stream state");
+}
+
+static int stdout_stream_found(
+ StdoutStream *s,
+ char *p,
+ size_t l,
+ LineBreak line_break) {
+
+ char saved;
+ int r;
+
+ assert(s);
+ assert(p);
+
+ /* Let's NUL terminate the specified buffer for this call, and revert back afterwards */
+ saved = p[l];
+ p[l] = 0;
+ r = stdout_stream_line(s, p, line_break);
+ p[l] = saved;
+
+ return r;
+}
+
+static int stdout_stream_scan(
+ StdoutStream *s,
+ char *p,
+ size_t remaining,
+ LineBreak force_flush,
+ size_t *ret_consumed) {
+
+ size_t consumed = 0;
+ int r;
+
+ assert(s);
+ assert(p);
+
+ for (;;) {
+ LineBreak line_break;
+ size_t skip, found;
+ char *end1, *end2;
+
+ end1 = memchr(p, '\n', remaining);
+ end2 = memchr(p, 0, end1 ? (size_t) (end1 - p) : remaining);
+
+ if (end2) {
+ /* We found a NUL terminator */
+ found = end2 - p;
+ skip = found + 1;
+ line_break = LINE_BREAK_NUL;
+ } else if (end1) {
+ /* We found a \n terminator */
+ found = end1 - p;
+ skip = found + 1;
+ line_break = LINE_BREAK_NEWLINE;
+ } else if (remaining >= s->server->line_max) {
+ /* Force a line break after the maximum line length */
+ found = skip = s->server->line_max;
+ line_break = LINE_BREAK_LINE_MAX;
+ } else
+ break;
+
+ r = stdout_stream_found(s, p, found, line_break);
+ if (r < 0)
+ return r;
+
+ p += skip;
+ consumed += skip;
+ remaining -= skip;
+ }
+
+ if (force_flush >= 0 && remaining > 0) {
+ r = stdout_stream_found(s, p, remaining, force_flush);
+ if (r < 0)
+ return r;
+
+ consumed += remaining;
+ }
+
+ if (ret_consumed)
+ *ret_consumed = consumed;
+
+ return 0;
+}
+
+static int stdout_stream_process(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control;
+ StdoutStream *s = userdata;
+ size_t limit, consumed;
+ struct ucred *ucred;
+ struct iovec iovec;
+ ssize_t l;
+ char *p;
+ int r;
+
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+
+ assert(s);
+
+ if ((revents|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
+ log_error("Got invalid event from epoll for stdout stream: %"PRIx32, revents);
+ goto terminate;
+ }
+
+ /* If the buffer is almost full, add room for another 1K */
+ if (s->length + 512 >= s->allocated) {
+ if (!GREEDY_REALLOC(s->buffer, s->allocated, s->length + 1 + 1024)) {
+ log_oom();
+ goto terminate;
+ }
+ }
+
+ /* Try to make use of the allocated buffer in full, but never read more than the configured line size. Also,
+ * always leave room for a terminating NUL we might need to add. */
+ limit = MIN(s->allocated - 1, s->server->line_max);
+ assert(s->length <= limit);
+ iovec = IOVEC_MAKE(s->buffer + s->length, limit - s->length);
+
+ l = recvmsg(s->fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ if (l < 0) {
+ if (IN_SET(errno, EINTR, EAGAIN))
+ return 0;
+
+ log_warning_errno(errno, "Failed to read from stream: %m");
+ goto terminate;
+ }
+ cmsg_close_all(&msghdr);
+
+ if (l == 0) {
+ (void) stdout_stream_scan(s, s->buffer, s->length, /* force_flush = */ LINE_BREAK_EOF, NULL);
+ goto terminate;
+ }
+
+ /* Invalidate the context if the PID of the sender changed. This happens when a forked process
+ * inherits stdout/stderr from a parent. In this case getpeercred() returns the ucred of the parent,
+ * which can be invalid if the parent has exited in the meantime. */
+ ucred = CMSG_FIND_DATA(&msghdr, SOL_SOCKET, SCM_CREDENTIALS, struct ucred);
+ if (ucred && ucred->pid != s->ucred.pid) {
+ /* Force out any previously half-written lines from a different process, before we switch to
+ * the new ucred structure for everything we just added */
+ r = stdout_stream_scan(s, s->buffer, s->length, /* force_flush = */ LINE_BREAK_PID_CHANGE, NULL);
+ if (r < 0)
+ goto terminate;
+
+ s->context = client_context_release(s->server, s->context);
+
+ p = s->buffer + s->length;
+ } else {
+ p = s->buffer;
+ l += s->length;
+ }
+
+ /* Always copy in the new credentials */
+ if (ucred)
+ s->ucred = *ucred;
+
+ r = stdout_stream_scan(s, p, l, _LINE_BREAK_INVALID, &consumed);
+ if (r < 0)
+ goto terminate;
+
+ /* Move what wasn't consumed to the front of the buffer */
+ assert(consumed <= (size_t) l);
+ s->length = l - consumed;
+ memmove(s->buffer, p + consumed, s->length);
+
+ return 1;
+
+terminate:
+ stdout_stream_destroy(s);
+ return 0;
+}
+
+int stdout_stream_install(Server *s, int fd, StdoutStream **ret) {
+ _cleanup_(stdout_stream_freep) StdoutStream *stream = NULL;
+ sd_id128_t id;
+ int r;
+
+ assert(s);
+ assert(fd >= 0);
+
+ r = sd_id128_randomize(&id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate stream ID: %m");
+
+ stream = new(StdoutStream, 1);
+ if (!stream)
+ return log_oom();
+
+ *stream = (StdoutStream) {
+ .fd = -1,
+ .priority = LOG_INFO,
+ };
+
+ xsprintf(stream->id_field, "_STREAM_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(id));
+
+ r = getpeercred(fd, &stream->ucred);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine peer credentials: %m");
+
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return log_error_errno(r, "SO_PASSCRED failed: %m");
+
+ if (mac_selinux_use()) {
+ r = getpeersec(fd, &stream->label);
+ if (r < 0 && r != -EOPNOTSUPP)
+ (void) log_warning_errno(r, "Failed to determine peer security context: %m");
+ }
+
+ (void) shutdown(fd, SHUT_WR);
+
+ r = sd_event_add_io(s->event, &stream->event_source, fd, EPOLLIN, stdout_stream_process, stream);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add stream to event loop: %m");
+
+ r = sd_event_source_set_priority(stream->event_source, SD_EVENT_PRIORITY_NORMAL+5);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust stdout event source priority: %m");
+
+ stream->fd = fd;
+
+ stream->server = s;
+ LIST_PREPEND(stdout_stream, s->stdout_streams, stream);
+ s->n_stdout_streams++;
+
+ (void) server_start_or_stop_idle_timer(s); /* Maybe no longer idle? */
+
+ if (ret)
+ *ret = stream;
+
+ TAKE_PTR(stream);
+ return 0;
+}
+
+static int stdout_stream_new(sd_event_source *es, int listen_fd, uint32_t revents, void *userdata) {
+ _cleanup_close_ int fd = -1;
+ Server *s = userdata;
+ int r;
+
+ assert(s);
+
+ if (revents != EPOLLIN)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Got invalid event from epoll for stdout server fd: %" PRIx32,
+ revents);
+
+ fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (fd < 0) {
+ if (ERRNO_IS_ACCEPT_AGAIN(errno))
+ return 0;
+
+ return log_error_errno(errno, "Failed to accept stdout connection: %m");
+ }
+
+ if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
+ struct ucred u;
+
+ r = getpeercred(fd, &u);
+
+ /* By closing fd here we make sure that the client won't wait too long for journald to
+ * gather all the data it adds to the error message to find out that the connection has
+ * just been refused.
+ */
+ fd = safe_close(fd);
+
+ server_driver_message(s, r < 0 ? 0 : u.pid, NULL, LOG_MESSAGE("Too many stdout streams, refusing connection."), NULL);
+ return 0;
+ }
+
+ r = stdout_stream_install(s, fd, NULL);
+ if (r < 0)
+ return r;
+
+ TAKE_FD(fd);
+ return 0;
+}
+
+static int stdout_stream_load(StdoutStream *stream, const char *fname) {
+ _cleanup_free_ char
+ *priority = NULL,
+ *level_prefix = NULL,
+ *forward_to_syslog = NULL,
+ *forward_to_kmsg = NULL,
+ *forward_to_console = NULL,
+ *stream_id = NULL;
+ int r;
+
+ assert(stream);
+ assert(fname);
+
+ if (!stream->state_file) {
+ stream->state_file = path_join(stream->server->runtime_directory, "streams", fname);
+ if (!stream->state_file)
+ return log_oom();
+ }
+
+ r = parse_env_file(NULL, stream->state_file,
+ "PRIORITY", &priority,
+ "LEVEL_PREFIX", &level_prefix,
+ "FORWARD_TO_SYSLOG", &forward_to_syslog,
+ "FORWARD_TO_KMSG", &forward_to_kmsg,
+ "FORWARD_TO_CONSOLE", &forward_to_console,
+ "IDENTIFIER", &stream->identifier,
+ "UNIT", &stream->unit_id,
+ "STREAM_ID", &stream_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read: %s", stream->state_file);
+
+ if (priority) {
+ int p;
+
+ p = log_level_from_string(priority);
+ if (p >= 0)
+ stream->priority = p;
+ }
+
+ if (level_prefix) {
+ r = parse_boolean(level_prefix);
+ if (r >= 0)
+ stream->level_prefix = r;
+ }
+
+ if (forward_to_syslog) {
+ r = parse_boolean(forward_to_syslog);
+ if (r >= 0)
+ stream->forward_to_syslog = r;
+ }
+
+ if (forward_to_kmsg) {
+ r = parse_boolean(forward_to_kmsg);
+ if (r >= 0)
+ stream->forward_to_kmsg = r;
+ }
+
+ if (forward_to_console) {
+ r = parse_boolean(forward_to_console);
+ if (r >= 0)
+ stream->forward_to_console = r;
+ }
+
+ if (stream_id) {
+ sd_id128_t id;
+
+ r = sd_id128_from_string(stream_id, &id);
+ if (r >= 0)
+ xsprintf(stream->id_field, "_STREAM_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(id));
+ }
+
+ return 0;
+}
+
+static int stdout_stream_restore(Server *s, const char *fname, int fd) {
+ StdoutStream *stream;
+ int r;
+
+ assert(s);
+ assert(fname);
+ assert(fd >= 0);
+
+ if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
+ log_warning("Too many stdout streams, refusing restoring of stream.");
+ return -ENOBUFS;
+ }
+
+ r = stdout_stream_install(s, fd, &stream);
+ if (r < 0)
+ return r;
+
+ stream->state = STDOUT_STREAM_RUNNING;
+ stream->fdstore = true;
+
+ /* Ignore all parsing errors */
+ (void) stdout_stream_load(stream, fname);
+
+ return 0;
+}
+
+int server_restore_streams(Server *s, FDSet *fds) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ const char *path;
+ int r;
+
+ path = strjoina(s->runtime_directory, "/streams");
+ d = opendir(path);
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to enumerate %s: %m", path);
+ }
+
+ FOREACH_DIRENT(de, d, goto fail) {
+ unsigned long st_dev, st_ino;
+ bool found = false;
+ int fd;
+
+ if (sscanf(de->d_name, "%lu:%lu", &st_dev, &st_ino) != 2)
+ continue;
+
+ FDSET_FOREACH(fd, fds) {
+ struct stat st;
+
+ if (fstat(fd, &st) < 0)
+ return log_error_errno(errno, "Failed to stat %s: %m", de->d_name);
+
+ if (S_ISSOCK(st.st_mode) && st.st_dev == st_dev && st.st_ino == st_ino) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ /* No file descriptor? Then let's delete the state file */
+ log_debug("Cannot restore stream file %s", de->d_name);
+ if (unlinkat(dirfd(d), de->d_name, 0) < 0)
+ log_warning_errno(errno, "Failed to remove %s/%s: %m", path, de->d_name);
+ continue;
+ }
+
+ fdset_remove(fds, fd);
+
+ r = stdout_stream_restore(s, de->d_name, fd);
+ if (r < 0)
+ safe_close(fd);
+ }
+
+ return 0;
+
+fail:
+ return log_error_errno(errno, "Failed to read streams directory: %m");
+}
+
+int server_open_stdout_socket(Server *s, const char *stdout_socket) {
+ int r;
+
+ assert(s);
+ assert(stdout_socket);
+
+ if (s->stdout_fd < 0) {
+ union sockaddr_union sa;
+ socklen_t sa_len;
+
+ r = sockaddr_un_set_path(&sa.un, stdout_socket);
+ if (r < 0)
+ return log_error_errno(r, "Unable to use namespace path %s for AF_UNIX socket: %m", stdout_socket);
+ sa_len = r;
+
+ s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s->stdout_fd < 0)
+ return log_error_errno(errno, "socket() failed: %m");
+
+ (void) sockaddr_un_unlink(&sa.un);
+
+ r = bind(s->stdout_fd, &sa.sa, sa_len);
+ if (r < 0)
+ return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
+
+ (void) chmod(sa.un.sun_path, 0666);
+
+ if (listen(s->stdout_fd, SOMAXCONN) < 0)
+ return log_error_errno(errno, "listen(%s) failed: %m", sa.un.sun_path);
+ } else
+ (void) fd_nonblock(s->stdout_fd, true);
+
+ r = sd_event_add_io(s->event, &s->stdout_event_source, s->stdout_fd, EPOLLIN, stdout_stream_new, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add stdout server fd to event source: %m");
+
+ r = sd_event_source_set_priority(s->stdout_event_source, SD_EVENT_PRIORITY_NORMAL+5);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust priority of stdout server event source: %m");
+
+ return 0;
+}
+
+void stdout_stream_send_notify(StdoutStream *s) {
+ struct iovec iovec = {
+ .iov_base = (char*) "FDSTORE=1",
+ .iov_len = STRLEN("FDSTORE=1"),
+ };
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ };
+ struct cmsghdr *cmsg;
+ ssize_t l;
+
+ assert(s);
+ assert(!s->fdstore);
+ assert(s->in_notify_queue);
+ assert(s->server);
+ assert(s->server->notify_fd >= 0);
+
+ /* Store the connection fd in PID 1, so that we get it passed
+ * in again on next start */
+
+ msghdr.msg_controllen = CMSG_SPACE(sizeof(int));
+ msghdr.msg_control = alloca0(msghdr.msg_controllen);
+
+ cmsg = CMSG_FIRSTHDR(&msghdr);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+
+ memcpy(CMSG_DATA(cmsg), &s->fd, sizeof(int));
+
+ l = sendmsg(s->server->notify_fd, &msghdr, MSG_DONTWAIT|MSG_NOSIGNAL);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return;
+
+ log_error_errno(errno, "Failed to send stream file descriptor to service manager: %m");
+ } else {
+ log_debug("Successfully sent stream file descriptor to service manager.");
+ s->fdstore = 1;
+ }
+
+ LIST_REMOVE(stdout_stream_notify_queue, s->server->stdout_streams_notify_queue, s);
+ s->in_notify_queue = false;
+
+}
diff --git a/src/journal/journald-stream.h b/src/journal/journald-stream.h
new file mode 100644
index 0000000..0a033b4
--- /dev/null
+++ b/src/journal/journald-stream.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct StdoutStream StdoutStream;
+
+#include "fdset.h"
+#include "journald-server.h"
+
+int server_open_stdout_socket(Server *s, const char *stdout_socket);
+int server_restore_streams(Server *s, FDSet *fds);
+
+void stdout_stream_free(StdoutStream *s);
+int stdout_stream_install(Server *s, int fd, StdoutStream **ret);
+void stdout_stream_destroy(StdoutStream *s);
+void stdout_stream_send_notify(StdoutStream *s);
diff --git a/src/journal/journald-syslog.c b/src/journal/journald-syslog.c
new file mode 100644
index 0000000..925bd50
--- /dev/null
+++ b/src/journal/journald-syslog.c
@@ -0,0 +1,527 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stddef.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "journald-console.h"
+#include "journald-kmsg.h"
+#include "journald-server.h"
+#include "journald-syslog.h"
+#include "journald-wall.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "syslog-util.h"
+
+/* Warn once every 30s if we missed syslog message */
+#define WARN_FORWARD_SYSLOG_MISSED_USEC (30 * USEC_PER_SEC)
+
+static void forward_syslog_iovec(
+ Server *s,
+ const struct iovec *iovec,
+ unsigned n_iovec,
+ const struct ucred *ucred,
+ const struct timeval *tv) {
+
+ union sockaddr_union sa;
+
+ struct msghdr msghdr = {
+ .msg_iov = (struct iovec *) iovec,
+ .msg_iovlen = n_iovec,
+ };
+ struct cmsghdr *cmsg;
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control;
+ const char *j;
+ int r;
+
+ assert(s);
+ assert(iovec);
+ assert(n_iovec > 0);
+
+ j = strjoina(s->runtime_directory, "/syslog");
+ r = sockaddr_un_set_path(&sa.un, j);
+ if (r < 0) {
+ log_debug_errno(r, "Forwarding socket path %s too long for AF_UNIX, not forwarding: %m", j);
+ return;
+ }
+
+ msghdr.msg_name = &sa.sa;
+ msghdr.msg_namelen = r;
+
+ if (ucred) {
+ zero(control);
+ msghdr.msg_control = &control;
+ msghdr.msg_controllen = sizeof(control);
+
+ cmsg = CMSG_FIRSTHDR(&msghdr);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_CREDENTIALS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
+ memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
+ msghdr.msg_controllen = cmsg->cmsg_len;
+ }
+
+ /* Forward the syslog message we received via /dev/log to /run/systemd/syslog. Unfortunately we
+ * currently can't set the SO_TIMESTAMP auxiliary data, and hence we don't. */
+
+ if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
+ return;
+
+ /* The socket is full? I guess the syslog implementation is
+ * too slow, and we shouldn't wait for that... */
+ if (errno == EAGAIN) {
+ s->n_forward_syslog_missed++;
+ return;
+ }
+
+ if (ucred && IN_SET(errno, ESRCH, EPERM)) {
+ struct ucred u;
+
+ /* Hmm, presumably the sender process vanished
+ * by now, or we don't have CAP_SYS_AMDIN, so
+ * let's fix it as good as we can, and retry */
+
+ u = *ucred;
+ u.pid = getpid_cached();
+ memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
+
+ if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
+ return;
+
+ if (errno == EAGAIN) {
+ s->n_forward_syslog_missed++;
+ return;
+ }
+ }
+
+ if (errno != ENOENT)
+ log_debug_errno(errno, "Failed to forward syslog message: %m");
+}
+
+static void forward_syslog_raw(Server *s, int priority, const char *buffer, size_t buffer_len, const struct ucred *ucred, const struct timeval *tv) {
+ struct iovec iovec;
+
+ assert(s);
+ assert(buffer);
+
+ if (LOG_PRI(priority) > s->max_level_syslog)
+ return;
+
+ iovec = IOVEC_MAKE((char *) buffer, buffer_len);
+ forward_syslog_iovec(s, &iovec, 1, ucred, tv);
+}
+
+void server_forward_syslog(Server *s, int priority, const char *identifier, const char *message, const struct ucred *ucred, const struct timeval *tv) {
+ struct iovec iovec[5];
+ char header_priority[DECIMAL_STR_MAX(priority) + 3], header_time[64],
+ header_pid[STRLEN("[]: ") + DECIMAL_STR_MAX(pid_t) + 1];
+ int n = 0;
+ time_t t;
+ struct tm tm;
+ _cleanup_free_ char *ident_buf = NULL;
+
+ assert(s);
+ assert(priority >= 0);
+ assert(priority <= 999);
+ assert(message);
+
+ if (LOG_PRI(priority) > s->max_level_syslog)
+ return;
+
+ /* First: priority field */
+ xsprintf(header_priority, "<%i>", priority);
+ iovec[n++] = IOVEC_MAKE_STRING(header_priority);
+
+ /* Second: timestamp */
+ t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
+ if (!localtime_r(&t, &tm))
+ return;
+ if (strftime(header_time, sizeof(header_time), "%h %e %T ", &tm) <= 0)
+ return;
+ iovec[n++] = IOVEC_MAKE_STRING(header_time);
+
+ /* Third: identifier and PID */
+ if (ucred) {
+ if (!identifier) {
+ (void) get_process_comm(ucred->pid, &ident_buf);
+ identifier = ident_buf;
+ }
+
+ xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
+
+ if (identifier)
+ iovec[n++] = IOVEC_MAKE_STRING(identifier);
+
+ iovec[n++] = IOVEC_MAKE_STRING(header_pid);
+ } else if (identifier) {
+ iovec[n++] = IOVEC_MAKE_STRING(identifier);
+ iovec[n++] = IOVEC_MAKE_STRING(": ");
+ }
+
+ /* Fourth: message */
+ iovec[n++] = IOVEC_MAKE_STRING(message);
+
+ forward_syslog_iovec(s, iovec, n, ucred, tv);
+}
+
+int syslog_fixup_facility(int priority) {
+
+ if ((priority & LOG_FACMASK) == 0)
+ return (priority & LOG_PRIMASK) | LOG_USER;
+
+ return priority;
+}
+
+size_t syslog_parse_identifier(const char **buf, char **identifier, char **pid) {
+ const char *p;
+ char *t;
+ size_t l, e;
+
+ assert(buf);
+ assert(identifier);
+ assert(pid);
+
+ p = *buf;
+
+ p += strspn(p, WHITESPACE);
+ l = strcspn(p, WHITESPACE);
+
+ if (l <= 0 ||
+ p[l-1] != ':')
+ return 0;
+
+ e = l;
+ l--;
+
+ if (l > 0 && p[l-1] == ']') {
+ size_t k = l-1;
+
+ for (;;) {
+
+ if (p[k] == '[') {
+ t = strndup(p+k+1, l-k-2);
+ if (t)
+ *pid = t;
+
+ l = k;
+ break;
+ }
+
+ if (k == 0)
+ break;
+
+ k--;
+ }
+ }
+
+ t = strndup(p, l);
+ if (t)
+ *identifier = t;
+
+ /* Single space is used as separator */
+ if (p[e] != '\0' && strchr(WHITESPACE, p[e]))
+ e++;
+
+ l = (p - *buf) + e;
+ *buf = p + e;
+ return l;
+}
+
+static int syslog_skip_timestamp(const char **buf) {
+ enum {
+ LETTER,
+ SPACE,
+ NUMBER,
+ SPACE_OR_NUMBER,
+ COLON
+ } sequence[] = {
+ LETTER, LETTER, LETTER,
+ SPACE,
+ SPACE_OR_NUMBER, NUMBER,
+ SPACE,
+ SPACE_OR_NUMBER, NUMBER,
+ COLON,
+ SPACE_OR_NUMBER, NUMBER,
+ COLON,
+ SPACE_OR_NUMBER, NUMBER,
+ SPACE
+ };
+
+ const char *p, *t;
+ unsigned i;
+
+ assert(buf);
+ assert(*buf);
+
+ for (i = 0, p = *buf; i < ELEMENTSOF(sequence); i++, p++) {
+ if (!*p)
+ return 0;
+
+ switch (sequence[i]) {
+
+ case SPACE:
+ if (*p != ' ')
+ return 0;
+ break;
+
+ case SPACE_OR_NUMBER:
+ if (*p == ' ')
+ break;
+
+ _fallthrough_;
+ case NUMBER:
+ if (*p < '0' || *p > '9')
+ return 0;
+
+ break;
+
+ case LETTER:
+ if (!(*p >= 'A' && *p <= 'Z') &&
+ !(*p >= 'a' && *p <= 'z'))
+ return 0;
+
+ break;
+
+ case COLON:
+ if (*p != ':')
+ return 0;
+ break;
+
+ }
+ }
+
+ t = *buf;
+ *buf = p;
+ return p - t;
+}
+
+void server_process_syslog_message(
+ Server *s,
+ const char *buf,
+ size_t raw_len,
+ const struct ucred *ucred,
+ const struct timeval *tv,
+ const char *label,
+ size_t label_len) {
+
+ char *t, syslog_priority[sizeof("PRIORITY=") + DECIMAL_STR_MAX(int)],
+ syslog_facility[sizeof("SYSLOG_FACILITY=") + DECIMAL_STR_MAX(int)];
+ const char *msg, *syslog_ts, *a;
+ _cleanup_free_ char *identifier = NULL, *pid = NULL,
+ *dummy = NULL, *msg_msg = NULL, *msg_raw = NULL;
+ int priority = LOG_USER | LOG_INFO, r;
+ ClientContext *context = NULL;
+ struct iovec *iovec;
+ size_t n = 0, m, i, leading_ws, syslog_ts_len;
+ bool store_raw;
+
+ assert(s);
+ assert(buf);
+ /* The message cannot be empty. */
+ assert(raw_len > 0);
+ /* The buffer NUL-terminated and can be used a string. raw_len is the length
+ * without the terminating NUL byte, the buffer is actually one bigger. */
+ assert(buf[raw_len] == '\0');
+
+ if (ucred && pid_is_valid(ucred->pid)) {
+ r = client_context_get(s, ucred->pid, ucred, label, label_len, NULL, &context);
+ if (r < 0)
+ log_warning_errno(r, "Failed to retrieve credentials for PID " PID_FMT ", ignoring: %m", ucred->pid);
+ }
+
+ /* We are creating a copy of the message because we want to forward the original message
+ verbatim to the legacy syslog implementation */
+ for (i = raw_len; i > 0; i--)
+ if (!strchr(WHITESPACE, buf[i-1]))
+ break;
+
+ leading_ws = strspn(buf, WHITESPACE);
+
+ if (i == 0)
+ /* The message contains only whitespaces */
+ msg = buf + raw_len;
+ else if (i == raw_len)
+ /* Nice! No need to strip anything on the end, let's optimize this a bit */
+ msg = buf + leading_ws;
+ else {
+ msg = dummy = new(char, i - leading_ws + 1);
+ if (!dummy) {
+ log_oom();
+ return;
+ }
+
+ memcpy(dummy, buf + leading_ws, i - leading_ws);
+ dummy[i - leading_ws] = 0;
+ }
+
+ /* We will add the SYSLOG_RAW= field when we stripped anything
+ * _or_ if the input message contained NUL bytes. */
+ store_raw = msg != buf || strlen(msg) != raw_len;
+
+ syslog_parse_priority(&msg, &priority, true);
+
+ if (!client_context_test_priority(context, priority))
+ return;
+
+ syslog_ts = msg;
+ syslog_ts_len = syslog_skip_timestamp(&msg);
+ if (syslog_ts_len == 0)
+ /* We failed to parse the full timestamp, store the raw message too */
+ store_raw = true;
+
+ syslog_parse_identifier(&msg, &identifier, &pid);
+
+ if (s->forward_to_syslog)
+ forward_syslog_raw(s, priority, buf, raw_len, ucred, tv);
+
+ if (s->forward_to_kmsg)
+ server_forward_kmsg(s, priority, identifier, msg, ucred);
+
+ if (s->forward_to_console)
+ server_forward_console(s, priority, identifier, msg, ucred);
+
+ if (s->forward_to_wall)
+ server_forward_wall(s, priority, identifier, msg, ucred);
+
+ m = N_IOVEC_META_FIELDS + 8 + client_context_extra_fields_n_iovec(context);
+ iovec = newa(struct iovec, m);
+
+ iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=syslog");
+
+ xsprintf(syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK);
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);
+
+ if (priority & LOG_FACMASK) {
+ xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority));
+ iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
+ }
+
+ if (identifier) {
+ a = strjoina("SYSLOG_IDENTIFIER=", identifier);
+ iovec[n++] = IOVEC_MAKE_STRING(a);
+ }
+
+ if (pid) {
+ a = strjoina("SYSLOG_PID=", pid);
+ iovec[n++] = IOVEC_MAKE_STRING(a);
+ }
+
+ if (syslog_ts_len > 0) {
+ const size_t hlen = STRLEN("SYSLOG_TIMESTAMP=");
+
+ t = newa(char, hlen + syslog_ts_len);
+ memcpy(t, "SYSLOG_TIMESTAMP=", hlen);
+ memcpy(t + hlen, syslog_ts, syslog_ts_len);
+
+ iovec[n++] = IOVEC_MAKE(t, hlen + syslog_ts_len);
+ }
+
+ msg_msg = strjoin("MESSAGE=", msg);
+ if (!msg_msg) {
+ log_oom();
+ return;
+ }
+ iovec[n++] = IOVEC_MAKE_STRING(msg_msg);
+
+ if (store_raw) {
+ const size_t hlen = STRLEN("SYSLOG_RAW=");
+
+ msg_raw = new(char, hlen + raw_len);
+ if (!msg_raw) {
+ log_oom();
+ return;
+ }
+
+ memcpy(msg_raw, "SYSLOG_RAW=", hlen);
+ memcpy(msg_raw + hlen, buf, raw_len);
+
+ iovec[n++] = IOVEC_MAKE(msg_raw, hlen + raw_len);
+ }
+
+ server_dispatch_message(s, iovec, n, m, context, tv, priority, 0);
+}
+
+int server_open_syslog_socket(Server *s, const char *syslog_socket) {
+ int r;
+
+ assert(s);
+ assert(syslog_socket);
+
+ if (s->syslog_fd < 0) {
+ union sockaddr_union sa;
+ socklen_t sa_len;
+
+ r = sockaddr_un_set_path(&sa.un, syslog_socket);
+ if (r < 0)
+ return log_error_errno(r, "Unable to use namespace path %s for AF_UNIX socket: %m", syslog_socket);
+ sa_len = r;
+
+ s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s->syslog_fd < 0)
+ return log_error_errno(errno, "socket() failed: %m");
+
+ (void) sockaddr_un_unlink(&sa.un);
+
+ r = bind(s->syslog_fd, &sa.sa, sa_len);
+ if (r < 0)
+ return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
+
+ (void) chmod(sa.un.sun_path, 0666);
+ } else
+ (void) fd_nonblock(s->syslog_fd, true);
+
+ r = setsockopt_int(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return log_error_errno(r, "SO_PASSCRED failed: %m");
+
+ if (mac_selinux_use()) {
+ r = setsockopt_int(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, true);
+ if (r < 0)
+ log_warning_errno(r, "SO_PASSSEC failed: %m");
+ }
+
+ r = setsockopt_int(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, true);
+ if (r < 0)
+ return log_error_errno(r, "SO_TIMESTAMP failed: %m");
+
+ r = sd_event_add_io(s->event, &s->syslog_event_source, s->syslog_fd, EPOLLIN, server_process_datagram, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add syslog server fd to event loop: %m");
+
+ r = sd_event_source_set_priority(s->syslog_event_source, SD_EVENT_PRIORITY_NORMAL+5);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust syslog event source priority: %m");
+
+ return 0;
+}
+
+void server_maybe_warn_forward_syslog_missed(Server *s) {
+ usec_t n;
+
+ assert(s);
+
+ if (s->n_forward_syslog_missed <= 0)
+ return;
+
+ n = now(CLOCK_MONOTONIC);
+ if (s->last_warn_forward_syslog_missed + WARN_FORWARD_SYSLOG_MISSED_USEC > n)
+ return;
+
+ server_driver_message(s, 0,
+ "MESSAGE_ID=" SD_MESSAGE_FORWARD_SYSLOG_MISSED_STR,
+ LOG_MESSAGE("Forwarding to syslog missed %u messages.",
+ s->n_forward_syslog_missed),
+ NULL);
+
+ s->n_forward_syslog_missed = 0;
+ s->last_warn_forward_syslog_missed = n;
+}
diff --git a/src/journal/journald-syslog.h b/src/journal/journald-syslog.h
new file mode 100644
index 0000000..3bc3ffd
--- /dev/null
+++ b/src/journal/journald-syslog.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "journald-server.h"
+
+int syslog_fixup_facility(int priority) _const_;
+
+size_t syslog_parse_identifier(const char **buf, char **identifier, char **pid);
+
+void server_forward_syslog(Server *s, int priority, const char *identifier, const char *message, const struct ucred *ucred, const struct timeval *tv);
+
+void server_process_syslog_message(Server *s, const char *buf, size_t buf_len, const struct ucred *ucred, const struct timeval *tv, const char *label, size_t label_len);
+int server_open_syslog_socket(Server *s, const char *syslog_socket);
+
+void server_maybe_warn_forward_syslog_missed(Server *s);
diff --git a/src/journal/journald-wall.c b/src/journal/journald-wall.c
new file mode 100644
index 0000000..21ec5a7
--- /dev/null
+++ b/src/journal/journald-wall.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "format-util.h"
+#include "journald-server.h"
+#include "journald-wall.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "utmp-wtmp.h"
+
+void server_forward_wall(
+ Server *s,
+ int priority,
+ const char *identifier,
+ const char *message,
+ const struct ucred *ucred) {
+
+ _cleanup_free_ char *ident_buf = NULL, *l_buf = NULL;
+ const char *l;
+ int r;
+
+ assert(s);
+ assert(message);
+
+ if (LOG_PRI(priority) > s->max_level_wall)
+ return;
+
+ if (ucred) {
+ if (!identifier) {
+ (void) get_process_comm(ucred->pid, &ident_buf);
+ identifier = ident_buf;
+ }
+
+ if (asprintf(&l_buf, "%s["PID_FMT"]: %s", strempty(identifier), ucred->pid, message) < 0) {
+ log_oom();
+ return;
+ }
+
+ l = l_buf;
+
+ } else if (identifier) {
+
+ l = l_buf = strjoin(identifier, ": ", message);
+ if (!l_buf) {
+ log_oom();
+ return;
+ }
+ } else
+ l = message;
+
+ r = utmp_wall(l, "systemd-journald", NULL, NULL, NULL);
+ if (r < 0)
+ log_debug_errno(r, "Failed to send wall message: %m");
+}
diff --git a/src/journal/journald-wall.h b/src/journal/journald-wall.h
new file mode 100644
index 0000000..3f98c35
--- /dev/null
+++ b/src/journal/journald-wall.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/socket.h>
+
+#include "journald-server.h"
+
+void server_forward_wall(Server *s, int priority, const char *identifier, const char *message, const struct ucred *ucred);
diff --git a/src/journal/journald.c b/src/journal/journald.c
new file mode 100644
index 0000000..cfbaf36
--- /dev/null
+++ b/src/journal/journald.c
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "sd-daemon.h"
+#include "sd-messages.h"
+
+#include "format-util.h"
+#include "journal-authenticate.h"
+#include "journald-kmsg.h"
+#include "journald-server.h"
+#include "journald-syslog.h"
+#include "process-util.h"
+#include "sigbus.h"
+
+int main(int argc, char *argv[]) {
+ const char *namespace;
+ LogTarget log_target;
+ Server server;
+ int r;
+
+ if (argc > 2) {
+ log_error("This program takes one or no arguments.");
+ return EXIT_FAILURE;
+ }
+
+ namespace = argc > 1 ? empty_to_null(argv[1]) : NULL;
+
+ /* So here's the deal: journald can't be considered as regular daemon when it comes to
+ * logging hence LOG_TARGET_AUTO won't do the right thing for it. Hence explicitly log to
+ * the console if we're started from a console or to kmsg otherwise. */
+ log_target = isatty(STDERR_FILENO) > 0 ? LOG_TARGET_CONSOLE : LOG_TARGET_KMSG;
+
+ log_set_prohibit_ipc(true); /* better safe than sorry */
+ log_set_target(log_target);
+ log_set_facility(LOG_SYSLOG);
+ log_parse_environment();
+ log_open();
+
+ umask(0022);
+
+ sigbus_install();
+
+ r = server_init(&server, namespace);
+ if (r < 0)
+ goto finish;
+
+ server_vacuum(&server, false);
+ server_flush_to_var(&server, true);
+ server_flush_dev_kmsg(&server);
+
+ if (server.namespace)
+ log_debug("systemd-journald running as PID "PID_FMT" for namespace '%s'.", getpid_cached(), server.namespace);
+ else
+ log_debug("systemd-journald running as PID "PID_FMT" for the system.", getpid_cached());
+
+ server_driver_message(&server, 0,
+ "MESSAGE_ID=" SD_MESSAGE_JOURNAL_START_STR,
+ LOG_MESSAGE("Journal started"),
+ NULL);
+
+ /* Make sure to send the usage message *after* flushing the
+ * journal so entries from the runtime journals are ordered
+ * before this message. See #4190 for some details. */
+ server_space_usage_message(&server, NULL);
+
+ for (;;) {
+ usec_t t = USEC_INFINITY, n;
+
+ r = sd_event_get_state(server.event);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get event loop state: %m");
+ goto finish;
+ }
+ if (r == SD_EVENT_FINISHED)
+ break;
+
+ n = now(CLOCK_REALTIME);
+
+ if (server.max_retention_usec > 0 && server.oldest_file_usec > 0) {
+
+ /* The retention time is reached, so let's vacuum! */
+ if (server.oldest_file_usec + server.max_retention_usec < n) {
+ log_info("Retention time reached.");
+ server_rotate(&server);
+ server_vacuum(&server, false);
+ continue;
+ }
+
+ /* Calculate when to rotate the next time */
+ t = server.oldest_file_usec + server.max_retention_usec - n;
+ }
+
+#if HAVE_GCRYPT
+ if (server.system_journal) {
+ usec_t u;
+
+ if (journal_file_next_evolve_usec(server.system_journal, &u)) {
+ if (n >= u)
+ t = 0;
+ else
+ t = MIN(t, u - n);
+ }
+ }
+#endif
+
+ r = sd_event_run(server.event, t);
+ if (r < 0) {
+ log_error_errno(r, "Failed to run event loop: %m");
+ goto finish;
+ }
+
+ server_maybe_append_tags(&server);
+ server_maybe_warn_forward_syslog_missed(&server);
+ }
+
+ if (server.namespace)
+ log_debug("systemd-journald stopped as PID "PID_FMT" for namespace '%s'.", getpid_cached(), server.namespace);
+ else
+ log_debug("systemd-journald stopped as PID "PID_FMT" for the system.", getpid_cached());
+
+ server_driver_message(&server, 0,
+ "MESSAGE_ID=" SD_MESSAGE_JOURNAL_STOP_STR,
+ LOG_MESSAGE("Journal stopped"),
+ NULL);
+
+finish:
+ server_done(&server);
+
+ return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/src/journal/journald.conf b/src/journal/journald.conf
new file mode 100644
index 0000000..2e1aacd
--- /dev/null
+++ b/src/journal/journald.conf
@@ -0,0 +1,44 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See journald.conf(5) for details.
+
+[Journal]
+#Storage=auto
+#Compress=yes
+#Seal=yes
+#SplitMode=uid
+#SyncIntervalSec=5m
+#RateLimitIntervalSec=30s
+#RateLimitBurst=10000
+#SystemMaxUse=
+#SystemKeepFree=
+#SystemMaxFileSize=
+#SystemMaxFiles=100
+#RuntimeMaxUse=
+#RuntimeKeepFree=
+#RuntimeMaxFileSize=
+#RuntimeMaxFiles=100
+#MaxRetentionSec=
+#MaxFileSec=1month
+#ForwardToSyslog=no
+#ForwardToKMsg=no
+#ForwardToConsole=no
+#ForwardToWall=yes
+#TTYPath=/dev/console
+#MaxLevelStore=debug
+#MaxLevelSyslog=debug
+#MaxLevelKMsg=notice
+#MaxLevelConsole=info
+#MaxLevelWall=emerg
+#LineMax=48K
+#ReadKMsg=yes
+#Audit=yes
diff --git a/src/journal/lookup3.c b/src/journal/lookup3.c
new file mode 100644
index 0000000..39967f2
--- /dev/null
+++ b/src/journal/lookup3.c
@@ -0,0 +1,1006 @@
+/* SPDX-License-Identifier: LicenseRef-lookup3-public-domain */
+/* Slightly modified by Lennart Poettering, to avoid name clashes, and
+ * unexport a few functions. */
+
+#include "lookup3.h"
+
+/*
+-------------------------------------------------------------------------------
+lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+
+These are functions for producing 32-bit hashes for hash table lookup.
+hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+are externally useful functions. Routines to test the hash are included
+if SELF_TEST is defined. You can use this free for any purpose. It's in
+the public domain. It has no warranty.
+
+You probably want to use hashlittle(). hashlittle() and hashbig()
+hash byte arrays. hashlittle() is faster than hashbig() on
+little-endian machines. Intel and AMD are little-endian machines.
+On second thought, you probably want hashlittle2(), which is identical to
+hashlittle() except it returns two 32-bit hashes for the price of one.
+You could implement hashbig2() if you wanted but I haven't bothered here.
+
+If you want to find a hash of, say, exactly 7 integers, do
+ a = i1; b = i2; c = i3;
+ mix(a,b,c);
+ a += i4; b += i5; c += i6;
+ mix(a,b,c);
+ a += i7;
+ final(a,b,c);
+then use c as the hash value. If you have a variable length array of
+4-byte integers to hash, use hashword(). If you have a byte array (like
+a character string), use hashlittle(). If you have several byte arrays, or
+a mix of things, see the comments above hashlittle().
+
+Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
+then mix those integers. This is fast (you can do a lot more thorough
+mixing with 12*3 instructions on 3 integers than you can with 3 instructions
+on 1 byte), but shoehorning those bytes into integers efficiently is messy.
+-------------------------------------------------------------------------------
+*/
+/* #define SELF_TEST 1 */
+
+#include <stdint.h> /* defines uint32_t etc */
+#include <stdio.h> /* defines printf for tests */
+#include <sys/param.h> /* attempt to define endianness */
+#include <time.h> /* defines time_t for timings in the test */
+#ifdef linux
+# include <endian.h> /* attempt to define endianness */
+#endif
+
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+
+/*
+ * My best guess at if you are big-endian or little-endian. This may
+ * need adjustment.
+ */
+#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
+ __BYTE_ORDER == __LITTLE_ENDIAN) || \
+ (defined(i386) || defined(__i386__) || defined(__i486__) || \
+ defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
+# define HASH_LITTLE_ENDIAN 1
+# define HASH_BIG_ENDIAN 0
+#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
+ __BYTE_ORDER == __BIG_ENDIAN) || \
+ (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
+# define HASH_LITTLE_ENDIAN 0
+# define HASH_BIG_ENDIAN 1
+#else
+# define HASH_LITTLE_ENDIAN 0
+# define HASH_BIG_ENDIAN 0
+#endif
+
+#define hashsize(n) ((uint32_t)1<<(n))
+#define hashmask(n) (hashsize(n)-1)
+#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+
+/*
+-------------------------------------------------------------------------------
+mix -- mix 3 32-bit values reversibly.
+
+This is reversible, so any information in (a,b,c) before mix() is
+still in (a,b,c) after mix().
+
+If four pairs of (a,b,c) inputs are run through mix(), or through
+mix() in reverse, there are at least 32 bits of the output that
+are sometimes the same for one pair and different for another pair.
+This was tested for:
+* pairs that differed by one bit, by two bits, in any combination
+ of top bits of (a,b,c), or in any combination of bottom bits of
+ (a,b,c).
+* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
+ the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+ is commonly produced by subtraction) look like a single 1-bit
+ difference.
+* the base values were pseudorandom, all zero but one bit set, or
+ all zero plus a counter that starts at zero.
+
+Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
+satisfy this are
+ 4 6 8 16 19 4
+ 9 15 3 18 27 15
+ 14 9 3 7 17 3
+Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
+for "differ" defined as + with a one-bit base and a two-bit delta. I
+used http://burtleburtle.net/bob/hash/avalanche.html to choose
+the operations, constants, and arrangements of the variables.
+
+This does not achieve avalanche. There are input bits of (a,b,c)
+that fail to affect some output bits of (a,b,c), especially of a. The
+most thoroughly mixed value is c, but it doesn't really even achieve
+avalanche in c.
+
+This allows some parallelism. Read-after-writes are good at doubling
+the number of bits affected, so the goal of mixing pulls in the opposite
+direction as the goal of parallelism. I did what I could. Rotates
+seem to cost as much as shifts on every machine I could lay my hands
+on, and rotates are much kinder to the top and bottom bits, so I used
+rotates.
+-------------------------------------------------------------------------------
+*/
+#define mix(a,b,c) \
+{ \
+ a -= c; a ^= rot(c, 4); c += b; \
+ b -= a; b ^= rot(a, 6); a += c; \
+ c -= b; c ^= rot(b, 8); b += a; \
+ a -= c; a ^= rot(c,16); c += b; \
+ b -= a; b ^= rot(a,19); a += c; \
+ c -= b; c ^= rot(b, 4); b += a; \
+}
+
+/*
+-------------------------------------------------------------------------------
+final -- final mixing of 3 32-bit values (a,b,c) into c
+
+Pairs of (a,b,c) values differing in only a few bits will usually
+produce values of c that look totally different. This was tested for
+* pairs that differed by one bit, by two bits, in any combination
+ of top bits of (a,b,c), or in any combination of bottom bits of
+ (a,b,c).
+* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
+ the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+ is commonly produced by subtraction) look like a single 1-bit
+ difference.
+* the base values were pseudorandom, all zero but one bit set, or
+ all zero plus a counter that starts at zero.
+
+These constants passed:
+ 14 11 25 16 4 14 24
+ 12 14 25 16 4 14 24
+and these came close:
+ 4 8 15 26 3 22 24
+ 10 8 15 26 3 22 24
+ 11 8 15 26 3 22 24
+-------------------------------------------------------------------------------
+*/
+#define final(a,b,c) \
+{ \
+ c ^= b; c -= rot(b,14); \
+ a ^= c; a -= rot(c,11); \
+ b ^= a; b -= rot(a,25); \
+ c ^= b; c -= rot(b,16); \
+ a ^= c; a -= rot(c,4); \
+ b ^= a; b -= rot(a,14); \
+ c ^= b; c -= rot(b,24); \
+}
+
+/*
+--------------------------------------------------------------------
+ This works on all machines. To be useful, it requires
+ -- that the key be an array of uint32_t's, and
+ -- that the length be the number of uint32_t's in the key
+
+ The function hashword() is identical to hashlittle() on little-endian
+ machines, and identical to hashbig() on big-endian machines,
+ except that the length has to be measured in uint32_ts rather than in
+ bytes. hashlittle() is more complicated than hashword() only because
+ hashlittle() has to dance around fitting the key bytes into registers.
+--------------------------------------------------------------------
+*/
+uint32_t jenkins_hashword(
+const uint32_t *k, /* the key, an array of uint32_t values */
+size_t length, /* the length of the key, in uint32_ts */
+uint32_t initval) /* the previous hash, or an arbitrary value */
+{
+ uint32_t a,b,c;
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
+
+ /*------------------------------------------------- handle most of the key */
+ while (length > 3)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 3;
+ k += 3;
+ }
+
+ /*------------------------------------------- handle the last 3 uint32_t's */
+ switch(length) /* all the case statements fall through */
+ {
+ case 3 : c+=k[2];
+ case 2 : b+=k[1];
+ case 1 : a+=k[0];
+ final(a,b,c);
+ case 0: /* case 0: nothing left to add */
+ break;
+ }
+ /*------------------------------------------------------ report the result */
+ return c;
+}
+
+/*
+--------------------------------------------------------------------
+hashword2() -- same as hashword(), but take two seeds and return two
+32-bit values. pc and pb must both be nonnull, and *pc and *pb must
+both be initialized with seeds. If you pass in (*pb)==0, the output
+(*pc) will be the same as the return value from hashword().
+--------------------------------------------------------------------
+*/
+void jenkins_hashword2 (
+const uint32_t *k, /* the key, an array of uint32_t values */
+size_t length, /* the length of the key, in uint32_ts */
+uint32_t *pc, /* IN: seed OUT: primary hash value */
+uint32_t *pb) /* IN: more seed OUT: secondary hash value */
+{
+ uint32_t a,b,c;
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
+ c += *pb;
+
+ /*------------------------------------------------- handle most of the key */
+ while (length > 3)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 3;
+ k += 3;
+ }
+
+ /*------------------------------------------- handle the last 3 uint32_t's */
+ switch(length) /* all the case statements fall through */
+ {
+ case 3 : c+=k[2];
+ case 2 : b+=k[1];
+ case 1 : a+=k[0];
+ final(a,b,c);
+ case 0: /* case 0: nothing left to add */
+ break;
+ }
+ /*------------------------------------------------------ report the result */
+ *pc=c; *pb=b;
+}
+
+/*
+-------------------------------------------------------------------------------
+hashlittle() -- hash a variable-length key into a 32-bit value
+ k : the key (the unaligned variable-length array of bytes)
+ length : the length of the key, counting by bytes
+ initval : can be any 4-byte value
+Returns a 32-bit value. Every bit of the key affects every bit of
+the return value. Two keys differing by one or two bits will have
+totally different hash values.
+
+The best hash table sizes are powers of 2. There is no need to do
+mod a prime (mod is sooo slow!). If you need less than 32 bits,
+use a bitmask. For example, if you need only 10 bits, do
+ h = (h & hashmask(10));
+In which case, the hash table should have hashsize(10) elements.
+
+If you are hashing n strings (uint8_t **)k, do it like this:
+ for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
+
+By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
+code any way you wish, private, educational, or commercial. It's free.
+
+Use for hash table lookup, or anything where one collision in 2^^32 is
+acceptable. Do NOT use for cryptographic purposes.
+-------------------------------------------------------------------------------
+*/
+
+uint32_t jenkins_hashlittle( const void *key, size_t length, uint32_t initval)
+{
+ uint32_t a,b,c; /* internal state */
+ union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+
+ u.ptr = key;
+ if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
+ const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
+
+ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 12;
+ k += 3;
+ }
+
+ /*----------------------------- handle the last (probably partial) block */
+ /*
+ * "k[2]&0xffffff" actually reads beyond the end of the string, but
+ * then masks off the part it's not allowed to read. Because the
+ * string is aligned, the masked-off tail is in the same word as the
+ * rest of the string. Every machine with memory protection I've seen
+ * does it on word boundaries, so is OK with this. But valgrind will
+ * still catch it and complain. The masking trick does make the hash
+ * noticeably faster for short strings (like English words).
+ */
+#if !VALGRIND && !HAS_FEATURE_ADDRESS_SANITIZER && !HAS_FEATURE_MEMORY_SANITIZER
+
+ switch(length)
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
+ case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
+ case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
+ case 6 : b+=k[1]&0xffff; a+=k[0]; break;
+ case 5 : b+=k[1]&0xff; a+=k[0]; break;
+ case 4 : a+=k[0]; break;
+ case 3 : a+=k[0]&0xffffff; break;
+ case 2 : a+=k[0]&0xffff; break;
+ case 1 : a+=k[0]&0xff; break;
+ case 0 : return c; /* zero length strings require no mixing */
+ }
+
+#else /* make valgrind happy */
+ {
+ const uint8_t *k8 = (const uint8_t *) k;
+
+ switch(length)
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
+ case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
+ case 9 : c+=k8[8]; /* fall through */
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
+ case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
+ case 5 : b+=k8[4]; /* fall through */
+ case 4 : a+=k[0]; break;
+ case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
+ case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
+ case 1 : a+=k8[0]; break;
+ case 0 : return c;
+ }
+ }
+
+#endif /* !valgrind */
+
+ } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
+ const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
+ const uint8_t *k8;
+
+ /*--------------- all but last block: aligned reads and different mixing */
+ while (length > 12)
+ {
+ a += k[0] + (((uint32_t)k[1])<<16);
+ b += k[2] + (((uint32_t)k[3])<<16);
+ c += k[4] + (((uint32_t)k[5])<<16);
+ mix(a,b,c);
+ length -= 12;
+ k += 6;
+ }
+
+ /*----------------------------- handle the last (probably partial) block */
+ k8 = (const uint8_t *)k;
+ switch(length)
+ {
+ case 12: c+=k[4]+(((uint32_t)k[5])<<16);
+ b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
+ case 10: c+=k[4];
+ b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 9 : c+=k8[8]; /* fall through */
+ case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
+ case 6 : b+=k[2];
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 5 : b+=k8[4]; /* fall through */
+ case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
+ case 2 : a+=k[0];
+ break;
+ case 1 : a+=k8[0];
+ break;
+ case 0 : return c; /* zero length requires no mixing */
+ }
+
+ } else { /* need to read the key one byte at a time */
+ const uint8_t *k = (const uint8_t *)key;
+
+ /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += k[0];
+ a += ((uint32_t)k[1])<<8;
+ a += ((uint32_t)k[2])<<16;
+ a += ((uint32_t)k[3])<<24;
+ b += k[4];
+ b += ((uint32_t)k[5])<<8;
+ b += ((uint32_t)k[6])<<16;
+ b += ((uint32_t)k[7])<<24;
+ c += k[8];
+ c += ((uint32_t)k[9])<<8;
+ c += ((uint32_t)k[10])<<16;
+ c += ((uint32_t)k[11])<<24;
+ mix(a,b,c);
+ length -= 12;
+ k += 12;
+ }
+
+ /*-------------------------------- last block: affect all 32 bits of (c) */
+ switch(length) /* all the case statements fall through */
+ {
+ case 12: c+=((uint32_t)k[11])<<24;
+ case 11: c+=((uint32_t)k[10])<<16;
+ case 10: c+=((uint32_t)k[9])<<8;
+ case 9 : c+=k[8];
+ case 8 : b+=((uint32_t)k[7])<<24;
+ case 7 : b+=((uint32_t)k[6])<<16;
+ case 6 : b+=((uint32_t)k[5])<<8;
+ case 5 : b+=k[4];
+ case 4 : a+=((uint32_t)k[3])<<24;
+ case 3 : a+=((uint32_t)k[2])<<16;
+ case 2 : a+=((uint32_t)k[1])<<8;
+ case 1 : a+=k[0];
+ break;
+ case 0 : return c;
+ }
+ }
+
+ final(a,b,c);
+ return c;
+}
+
+/*
+ * hashlittle2: return 2 32-bit hash values
+ *
+ * This is identical to hashlittle(), except it returns two 32-bit hash
+ * values instead of just one. This is good enough for hash table
+ * lookup with 2^^64 buckets, or if you want a second hash if you're not
+ * happy with the first, or if you want a probably-unique 64-bit ID for
+ * the key. *pc is better mixed than *pb, so use *pc first. If you want
+ * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
+ */
+void jenkins_hashlittle2(
+ const void *key, /* the key to hash */
+ size_t length, /* length of the key */
+ uint32_t *pc, /* IN: primary initval, OUT: primary hash */
+ uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
+{
+ uint32_t a,b,c; /* internal state */
+ union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
+ c += *pb;
+
+ u.ptr = key;
+ if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
+ const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
+
+ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 12;
+ k += 3;
+ }
+
+ /*----------------------------- handle the last (probably partial) block */
+ /*
+ * "k[2]&0xffffff" actually reads beyond the end of the string, but
+ * then masks off the part it's not allowed to read. Because the
+ * string is aligned, the masked-off tail is in the same word as the
+ * rest of the string. Every machine with memory protection I've seen
+ * does it on word boundaries, so is OK with this. But valgrind will
+ * still catch it and complain. The masking trick does make the hash
+ * noticeably faster for short strings (like English words).
+ */
+#if !VALGRIND && !HAS_FEATURE_ADDRESS_SANITIZER && !HAS_FEATURE_MEMORY_SANITIZER
+
+ switch(length)
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
+ case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
+ case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
+ case 6 : b+=k[1]&0xffff; a+=k[0]; break;
+ case 5 : b+=k[1]&0xff; a+=k[0]; break;
+ case 4 : a+=k[0]; break;
+ case 3 : a+=k[0]&0xffffff; break;
+ case 2 : a+=k[0]&0xffff; break;
+ case 1 : a+=k[0]&0xff; break;
+ case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
+ }
+
+#else /* make valgrind happy */
+
+ {
+ const uint8_t *k8 = (const uint8_t *)k;
+ switch(length)
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
+ case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
+ case 9 : c+=k8[8]; /* fall through */
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
+ case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
+ case 5 : b+=k8[4]; /* fall through */
+ case 4 : a+=k[0]; break;
+ case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
+ case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
+ case 1 : a+=k8[0]; break;
+ case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
+ }
+ }
+
+#endif /* !valgrind */
+
+ } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
+ const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
+ const uint8_t *k8;
+
+ /*--------------- all but last block: aligned reads and different mixing */
+ while (length > 12)
+ {
+ a += k[0] + (((uint32_t)k[1])<<16);
+ b += k[2] + (((uint32_t)k[3])<<16);
+ c += k[4] + (((uint32_t)k[5])<<16);
+ mix(a,b,c);
+ length -= 12;
+ k += 6;
+ }
+
+ /*----------------------------- handle the last (probably partial) block */
+ k8 = (const uint8_t *)k;
+ switch(length)
+ {
+ case 12: c+=k[4]+(((uint32_t)k[5])<<16);
+ b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
+ case 10: c+=k[4];
+ b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 9 : c+=k8[8]; /* fall through */
+ case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
+ case 6 : b+=k[2];
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 5 : b+=k8[4]; /* fall through */
+ case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
+ case 2 : a+=k[0];
+ break;
+ case 1 : a+=k8[0];
+ break;
+ case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
+ }
+
+ } else { /* need to read the key one byte at a time */
+ const uint8_t *k = (const uint8_t *)key;
+
+ /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += k[0];
+ a += ((uint32_t)k[1])<<8;
+ a += ((uint32_t)k[2])<<16;
+ a += ((uint32_t)k[3])<<24;
+ b += k[4];
+ b += ((uint32_t)k[5])<<8;
+ b += ((uint32_t)k[6])<<16;
+ b += ((uint32_t)k[7])<<24;
+ c += k[8];
+ c += ((uint32_t)k[9])<<8;
+ c += ((uint32_t)k[10])<<16;
+ c += ((uint32_t)k[11])<<24;
+ mix(a,b,c);
+ length -= 12;
+ k += 12;
+ }
+
+ /*-------------------------------- last block: affect all 32 bits of (c) */
+ switch(length) /* all the case statements fall through */
+ {
+ case 12: c+=((uint32_t)k[11])<<24;
+ case 11: c+=((uint32_t)k[10])<<16;
+ case 10: c+=((uint32_t)k[9])<<8;
+ case 9 : c+=k[8];
+ case 8 : b+=((uint32_t)k[7])<<24;
+ case 7 : b+=((uint32_t)k[6])<<16;
+ case 6 : b+=((uint32_t)k[5])<<8;
+ case 5 : b+=k[4];
+ case 4 : a+=((uint32_t)k[3])<<24;
+ case 3 : a+=((uint32_t)k[2])<<16;
+ case 2 : a+=((uint32_t)k[1])<<8;
+ case 1 : a+=k[0];
+ break;
+ case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
+ }
+ }
+
+ final(a,b,c);
+ *pc=c; *pb=b;
+}
+
+/*
+ * hashbig():
+ * This is the same as hashword() on big-endian machines. It is different
+ * from hashlittle() on all machines. hashbig() takes advantage of
+ * big-endian byte ordering.
+ */
+uint32_t jenkins_hashbig( const void *key, size_t length, uint32_t initval)
+{
+ uint32_t a,b,c;
+ union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+
+ u.ptr = key;
+ if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
+ const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
+
+ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 12;
+ k += 3;
+ }
+
+ /*----------------------------- handle the last (probably partial) block */
+ /*
+ * "k[2]<<8" actually reads beyond the end of the string, but
+ * then shifts out the part it's not allowed to read. Because the
+ * string is aligned, the illegal read is in the same word as the
+ * rest of the string. Every machine with memory protection I've seen
+ * does it on word boundaries, so is OK with this. But valgrind will
+ * still catch it and complain. The masking trick does make the hash
+ * noticeably faster for short strings (like English words).
+ */
+#if !VALGRIND && !HAS_FEATURE_ADDRESS_SANITIZER && !HAS_FEATURE_MEMORY_SANITIZER
+
+ switch(length)
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
+ case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
+ case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
+ case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
+ case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
+ case 4 : a+=k[0]; break;
+ case 3 : a+=k[0]&0xffffff00; break;
+ case 2 : a+=k[0]&0xffff0000; break;
+ case 1 : a+=k[0]&0xff000000; break;
+ case 0 : return c; /* zero length strings require no mixing */
+ }
+
+#else /* make valgrind happy */
+
+ {
+ const uint8_t *k8 = (const uint8_t *)k;
+ switch(length) /* all the case statements fall through */
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
+ case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
+ case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
+ case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
+ case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
+ case 4 : a+=k[0]; break;
+ case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
+ case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
+ case 1 : a+=((uint32_t)k8[0])<<24; break;
+ case 0 : return c;
+ }
+ }
+
+#endif /* !VALGRIND */
+
+ } else { /* need to read the key one byte at a time */
+ const uint8_t *k = (const uint8_t *)key;
+
+ /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += ((uint32_t)k[0])<<24;
+ a += ((uint32_t)k[1])<<16;
+ a += ((uint32_t)k[2])<<8;
+ a += ((uint32_t)k[3]);
+ b += ((uint32_t)k[4])<<24;
+ b += ((uint32_t)k[5])<<16;
+ b += ((uint32_t)k[6])<<8;
+ b += ((uint32_t)k[7]);
+ c += ((uint32_t)k[8])<<24;
+ c += ((uint32_t)k[9])<<16;
+ c += ((uint32_t)k[10])<<8;
+ c += ((uint32_t)k[11]);
+ mix(a,b,c);
+ length -= 12;
+ k += 12;
+ }
+
+ /*-------------------------------- last block: affect all 32 bits of (c) */
+ switch(length) /* all the case statements fall through */
+ {
+ case 12: c+=k[11];
+ case 11: c+=((uint32_t)k[10])<<8;
+ case 10: c+=((uint32_t)k[9])<<16;
+ case 9 : c+=((uint32_t)k[8])<<24;
+ case 8 : b+=k[7];
+ case 7 : b+=((uint32_t)k[6])<<8;
+ case 6 : b+=((uint32_t)k[5])<<16;
+ case 5 : b+=((uint32_t)k[4])<<24;
+ case 4 : a+=k[3];
+ case 3 : a+=((uint32_t)k[2])<<8;
+ case 2 : a+=((uint32_t)k[1])<<16;
+ case 1 : a+=((uint32_t)k[0])<<24;
+ break;
+ case 0 : return c;
+ }
+ }
+
+ final(a,b,c);
+ return c;
+}
+
+#ifdef SELF_TEST
+
+/* used for timings */
+void driver1()
+{
+ uint8_t buf[256];
+ uint32_t i;
+ uint32_t h=0;
+ time_t a,z;
+
+ time(&a);
+ for (i=0; i<256; ++i) buf[i] = 'x';
+ for (i=0; i<1; ++i)
+ {
+ h = hashlittle(&buf[0],1,h);
+ }
+ time(&z);
+ if (z-a > 0) printf("time %d %.8x\n", z-a, h);
+}
+
+/* check that every input bit changes every output bit half the time */
+#define HASHSTATE 1
+#define HASHLEN 1
+#define MAXPAIR 60
+#define MAXLEN 70
+void driver2()
+{
+ uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
+ uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
+ uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
+ uint32_t x[HASHSTATE],y[HASHSTATE];
+ uint32_t hlen;
+
+ printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
+ for (hlen=0; hlen < MAXLEN; ++hlen)
+ {
+ z=0;
+ for (i=0; i<hlen; ++i) /*----------------------- for each input byte, */
+ {
+ for (j=0; j<8; ++j) /*------------------------ for each input bit, */
+ {
+ for (m=1; m<8; ++m) /*------------- for several possible initvals, */
+ {
+ for (l=0; l<HASHSTATE; ++l)
+ e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
+
+ /*---- check that every output bit is affected by that input bit */
+ for (k=0; k<MAXPAIR; k+=2)
+ {
+ uint32_t finished=1;
+ /* keys have one bit different */
+ for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
+ /* have a and b be two keys differing in only one bit */
+ a[i] ^= (k<<j);
+ a[i] ^= (k>>(8-j));
+ c[0] = hashlittle(a, hlen, m);
+ b[i] ^= ((k+1)<<j);
+ b[i] ^= ((k+1)>>(8-j));
+ d[0] = hashlittle(b, hlen, m);
+ /* check every bit is 1, 0, set, and not set at least once */
+ for (l=0; l<HASHSTATE; ++l)
+ {
+ e[l] &= (c[l]^d[l]);
+ f[l] &= ~(c[l]^d[l]);
+ g[l] &= c[l];
+ h[l] &= ~c[l];
+ x[l] &= d[l];
+ y[l] &= ~d[l];
+ if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
+ }
+ if (finished) break;
+ }
+ if (k>z) z=k;
+ if (k==MAXPAIR)
+ {
+ printf("Some bit didn't change: ");
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x ",
+ e[0],f[0],g[0],h[0],x[0],y[0]);
+ printf("i %d j %d m %d len %d\n", i, j, m, hlen);
+ }
+ if (z==MAXPAIR) goto done;
+ }
+ }
+ }
+ done:
+ if (z < MAXPAIR)
+ {
+ printf("Mix success %2d bytes %2d initvals ",i,m);
+ printf("required %d trials\n", z/2);
+ }
+ }
+ printf("\n");
+}
+
+/* Check for reading beyond the end of the buffer and alignment problems */
+void driver3()
+{
+ uint8_t buf[MAXLEN+20], *b;
+ uint32_t len;
+ uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
+ uint32_t h;
+ uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
+ uint32_t i;
+ uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
+ uint32_t j;
+ uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
+ uint32_t ref,x,y;
+ uint8_t *p;
+
+ printf("Endianness. These lines should all be the same (for values filled in):\n");
+ printf("%.8x %.8x %.8x\n",
+ hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
+ hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
+ hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
+ p = q;
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+ hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
+ hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
+ hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
+ hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
+ hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
+ hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
+ p = &qq[1];
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+ hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
+ hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
+ hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
+ hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
+ hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
+ hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
+ p = &qqq[2];
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+ hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
+ hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
+ hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
+ hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
+ hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
+ hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
+ p = &qqqq[3];
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+ hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
+ hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
+ hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
+ hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
+ hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
+ hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
+ printf("\n");
+
+ /* check that hashlittle2 and hashlittle produce the same results */
+ i=47; j=0;
+ hashlittle2(q, sizeof(q), &i, &j);
+ if (hashlittle(q, sizeof(q), 47) != i)
+ printf("hashlittle2 and hashlittle mismatch\n");
+
+ /* check that hashword2 and hashword produce the same results */
+ len = 0xdeadbeef;
+ i=47, j=0;
+ hashword2(&len, 1, &i, &j);
+ if (hashword(&len, 1, 47) != i)
+ printf("hashword2 and hashword mismatch %x %x\n",
+ i, hashword(&len, 1, 47));
+
+ /* check hashlittle doesn't read before or after the ends of the string */
+ for (h=0, b=buf+1; h<8; ++h, ++b)
+ {
+ for (i=0; i<MAXLEN; ++i)
+ {
+ len = i;
+ for (j=0; j<i; ++j) *(b+j)=0;
+
+ /* these should all be equal */
+ ref = hashlittle(b, len, (uint32_t)1);
+ *(b+i)=(uint8_t)~0;
+ *(b-1)=(uint8_t)~0;
+ x = hashlittle(b, len, (uint32_t)1);
+ y = hashlittle(b, len, (uint32_t)1);
+ if ((ref != x) || (ref != y))
+ {
+ printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
+ h, i);
+ }
+ }
+ }
+}
+
+/* check for problems with nulls */
+ void driver4()
+{
+ uint8_t buf[1];
+ uint32_t h,i,state[HASHSTATE];
+
+ buf[0] = ~0;
+ for (i=0; i<HASHSTATE; ++i) state[i] = 1;
+ printf("These should all be different\n");
+ for (i=0, h=0; i<8; ++i)
+ {
+ h = hashlittle(buf, 0, h);
+ printf("%2ld 0-byte strings, hash is %.8x\n", i, h);
+ }
+}
+
+void driver5()
+{
+ uint32_t b,c;
+ b=0, c=0, hashlittle2("", 0, &c, &b);
+ printf("hash is %.8lx %.8lx\n", c, b); /* deadbeef deadbeef */
+ b=0xdeadbeef, c=0, hashlittle2("", 0, &c, &b);
+ printf("hash is %.8lx %.8lx\n", c, b); /* bd5b7dde deadbeef */
+ b=0xdeadbeef, c=0xdeadbeef, hashlittle2("", 0, &c, &b);
+ printf("hash is %.8lx %.8lx\n", c, b); /* 9c093ccd bd5b7dde */
+ b=0, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
+ printf("hash is %.8lx %.8lx\n", c, b); /* 17770551 ce7226e6 */
+ b=1, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
+ printf("hash is %.8lx %.8lx\n", c, b); /* e3607cae bd371de4 */
+ b=0, c=1, hashlittle2("Four score and seven years ago", 30, &c, &b);
+ printf("hash is %.8lx %.8lx\n", c, b); /* cd628161 6cbea4b3 */
+ c = hashlittle("Four score and seven years ago", 30, 0);
+ printf("hash is %.8lx\n", c); /* 17770551 */
+ c = hashlittle("Four score and seven years ago", 30, 1);
+ printf("hash is %.8lx\n", c); /* cd628161 */
+}
+
+int main()
+{
+ driver1(); /* test that the key is hashed: used for timings */
+ driver2(); /* test that whole key is hashed thoroughly */
+ driver3(); /* test that nothing but the key is hashed */
+ driver4(); /* test hashing multiple buffers (all buffers are null) */
+ driver5(); /* test the hash against known vectors */
+ return 1;
+}
+
+#endif /* SELF_TEST */
diff --git a/src/journal/lookup3.h b/src/journal/lookup3.h
new file mode 100644
index 0000000..04e493e
--- /dev/null
+++ b/src/journal/lookup3.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LicenseRef-lookup3-public-domain */
+#pragma once
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+uint32_t jenkins_hashword(const uint32_t *k, size_t length, uint32_t initval) _pure_;
+void jenkins_hashword2(const uint32_t *k, size_t length, uint32_t *pc, uint32_t *pb);
+
+uint32_t jenkins_hashlittle(const void *key, size_t length, uint32_t initval) _pure_;
+void jenkins_hashlittle2(const void *key, size_t length, uint32_t *pc, uint32_t *pb);
+
+uint32_t jenkins_hashbig(const void *key, size_t length, uint32_t initval) _pure_;
+
+static inline uint64_t jenkins_hash64(const void *data, size_t length) {
+ uint32_t a = 0, b = 0;
+
+ jenkins_hashlittle2(data, length, &a, &b);
+
+ return ((uint64_t) a << 32ULL) | (uint64_t) b;
+}
diff --git a/src/journal/meson.build b/src/journal/meson.build
new file mode 100644
index 0000000..7aea28d
--- /dev/null
+++ b/src/journal/meson.build
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+journal_client_sources = files('''
+ audit-type.c
+ audit-type.h
+ catalog.c
+ catalog.h
+ compress.c
+ compress.h
+ journal-def.h
+ journal-file.c
+ journal-file.h
+ journal-send.c
+ journal-vacuum.c
+ journal-vacuum.h
+ journal-verify.c
+ journal-verify.h
+ lookup3.c
+ lookup3.h
+ mmap-cache.c
+ mmap-cache.h
+ sd-journal.c
+'''.split())
+
+if conf.get('HAVE_GCRYPT') == 1
+ journal_client_sources += files('''
+ journal-authenticate.c
+ journal-authenticate.h
+ fsprg.c
+ fsprg.h
+ '''.split())
+endif
+
+############################################################
+
+audit_type_includes = [config_h,
+ missing_audit_h,
+ 'linux/audit.h']
+if conf.get('HAVE_AUDIT') == 1
+ audit_type_includes += 'libaudit.h'
+endif
+
+generate_audit_type_list = find_program('generate-audit_type-list.sh')
+audit_type_list_txt = custom_target(
+ 'audit_type-list.txt',
+ output : 'audit_type-list.txt',
+ command : [generate_audit_type_list, cpp] + audit_type_includes,
+ capture : true)
+
+audit_type_to_name = custom_target(
+ 'audit_type-to-name.h',
+ input : ['audit_type-to-name.awk', audit_type_list_txt],
+ output : 'audit_type-to-name.h',
+ command : [awk, '-f', '@INPUT0@', '@INPUT1@'],
+ capture : true)
+
+journal_client_sources += [audit_type_to_name]
+
+libjournal_client = static_library(
+ 'journal-client',
+ journal_client_sources,
+ include_directories : includes,
+ c_args : ['-fvisibility=default'])
+
+############################################################
+
+libjournal_core_sources = files('''
+ journald-audit.c
+ journald-audit.h
+ journald-console.c
+ journald-console.h
+ journald-context.c
+ journald-context.h
+ journald-kmsg.c
+ journald-kmsg.h
+ journald-native.c
+ journald-native.h
+ journald-rate-limit.c
+ journald-rate-limit.h
+ journald-server.c
+ journald-server.h
+ journald-stream.c
+ journald-stream.h
+ journald-syslog.c
+ journald-syslog.h
+ journald-wall.c
+ journald-wall.h
+ journal-internal.h
+'''.split())
+
+systemd_journald_sources = files('''
+ journald.c
+ journald-server.h
+'''.split())
+
+journald_gperf_c = custom_target(
+ 'journald-gperf.c',
+ input : 'journald-gperf.gperf',
+ output : 'journald-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+systemd_cat_sources = files('cat.c')
+
+journalctl_sources = files('''
+ journalctl.c
+ pcre2-dlopen.c
+ pcre2-dlopen.h
+'''.split())
+
+if install_sysconfdir
+ install_data('journald.conf',
+ install_dir : pkgsysconfdir)
+endif
+
+if get_option('create-log-dirs')
+ meson.add_install_script(
+ 'sh', '-c',
+ mkdir_p.format('/var/log/journal'))
+ meson.add_install_script(
+ 'sh', '-c',
+ '''chown 0:0 $DESTDIR/var/log/journal &&
+ chmod 755 $DESTDIR/var/log/journal || :''')
+ if get_option('adm-group')
+ meson.add_install_script(
+ 'sh', '-c',
+ 'setfacl -nm g:adm:rx,d:g:adm:rx $DESTDIR/var/log/journal || :')
+ endif
+ if get_option('wheel-group')
+ meson.add_install_script(
+ 'sh', '-c',
+ 'setfacl -nm g:wheel:rx,d:g:wheel:rx $DESTDIR/var/log/journal || :')
+ endif
+endif
diff --git a/src/journal/mmap-cache.c b/src/journal/mmap-cache.c
new file mode 100644
index 0000000..9882016
--- /dev/null
+++ b/src/journal/mmap-cache.c
@@ -0,0 +1,669 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "hashmap.h"
+#include "list.h"
+#include "log.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "mmap-cache.h"
+#include "sigbus.h"
+
+typedef struct Window Window;
+typedef struct Context Context;
+
+struct Window {
+ MMapCache *cache;
+
+ bool invalidated:1;
+ bool keep_always:1;
+ bool in_unused:1;
+
+ int prot;
+ void *ptr;
+ uint64_t offset;
+ size_t size;
+
+ MMapFileDescriptor *fd;
+
+ LIST_FIELDS(Window, by_fd);
+ LIST_FIELDS(Window, unused);
+
+ LIST_HEAD(Context, contexts);
+};
+
+struct Context {
+ MMapCache *cache;
+ unsigned id;
+ Window *window;
+
+ LIST_FIELDS(Context, by_window);
+};
+
+struct MMapFileDescriptor {
+ MMapCache *cache;
+ int fd;
+ bool sigbus;
+ LIST_HEAD(Window, windows);
+};
+
+struct MMapCache {
+ unsigned n_ref;
+ unsigned n_windows;
+
+ unsigned n_hit, n_missed;
+
+ Hashmap *fds;
+ Context *contexts[MMAP_CACHE_MAX_CONTEXTS];
+
+ LIST_HEAD(Window, unused);
+ Window *last_unused;
+};
+
+#define WINDOWS_MIN 64
+
+#if ENABLE_DEBUG_MMAP_CACHE
+/* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
+# define WINDOW_SIZE (page_size())
+#else
+# define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
+#endif
+
+MMapCache* mmap_cache_new(void) {
+ MMapCache *m;
+
+ m = new0(MMapCache, 1);
+ if (!m)
+ return NULL;
+
+ m->n_ref = 1;
+ return m;
+}
+
+static void window_unlink(Window *w) {
+ Context *c;
+
+ assert(w);
+
+ if (w->ptr)
+ munmap(w->ptr, w->size);
+
+ if (w->fd)
+ LIST_REMOVE(by_fd, w->fd->windows, w);
+
+ if (w->in_unused) {
+ if (w->cache->last_unused == w)
+ w->cache->last_unused = w->unused_prev;
+
+ LIST_REMOVE(unused, w->cache->unused, w);
+ }
+
+ LIST_FOREACH(by_window, c, w->contexts) {
+ assert(c->window == w);
+ c->window = NULL;
+ }
+}
+
+static void window_invalidate(Window *w) {
+ assert(w);
+
+ if (w->invalidated)
+ return;
+
+ /* Replace the window with anonymous pages. This is useful
+ * when we hit a SIGBUS and want to make sure the file cannot
+ * trigger any further SIGBUS, possibly overrunning the sigbus
+ * queue. */
+
+ assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
+ w->invalidated = true;
+}
+
+static void window_free(Window *w) {
+ assert(w);
+
+ window_unlink(w);
+ w->cache->n_windows--;
+ free(w);
+}
+
+_pure_ static bool window_matches(Window *w, int prot, uint64_t offset, size_t size) {
+ assert(w);
+ assert(size > 0);
+
+ return
+ prot == w->prot &&
+ offset >= w->offset &&
+ offset + size <= w->offset + w->size;
+}
+
+_pure_ static bool window_matches_fd(Window *w, MMapFileDescriptor *f, int prot, uint64_t offset, size_t size) {
+ assert(w);
+ assert(f);
+
+ return
+ w->fd &&
+ f->fd == w->fd->fd &&
+ window_matches(w, prot, offset, size);
+}
+
+static Window *window_add(MMapCache *m, MMapFileDescriptor *f, int prot, bool keep_always, uint64_t offset, size_t size, void *ptr) {
+ Window *w;
+
+ assert(m);
+ assert(f);
+
+ if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
+
+ /* Allocate a new window */
+ w = new(Window, 1);
+ if (!w)
+ return NULL;
+ m->n_windows++;
+ } else {
+
+ /* Reuse an existing one */
+ w = m->last_unused;
+ window_unlink(w);
+ }
+
+ *w = (Window) {
+ .cache = m,
+ .fd = f,
+ .prot = prot,
+ .keep_always = keep_always,
+ .offset = offset,
+ .size = size,
+ .ptr = ptr,
+ };
+
+ LIST_PREPEND(by_fd, f->windows, w);
+
+ return w;
+}
+
+static void context_detach_window(Context *c) {
+ Window *w;
+
+ assert(c);
+
+ if (!c->window)
+ return;
+
+ w = TAKE_PTR(c->window);
+ LIST_REMOVE(by_window, w->contexts, c);
+
+ if (!w->contexts && !w->keep_always) {
+ /* Not used anymore? */
+#if ENABLE_DEBUG_MMAP_CACHE
+ /* Unmap unused windows immediately to expose use-after-unmap
+ * by SIGSEGV. */
+ window_free(w);
+#else
+ LIST_PREPEND(unused, c->cache->unused, w);
+ if (!c->cache->last_unused)
+ c->cache->last_unused = w;
+
+ w->in_unused = true;
+#endif
+ }
+}
+
+static void context_attach_window(Context *c, Window *w) {
+ assert(c);
+ assert(w);
+
+ if (c->window == w)
+ return;
+
+ context_detach_window(c);
+
+ if (w->in_unused) {
+ /* Used again? */
+ LIST_REMOVE(unused, c->cache->unused, w);
+ if (c->cache->last_unused == w)
+ c->cache->last_unused = w->unused_prev;
+
+ w->in_unused = false;
+ }
+
+ c->window = w;
+ LIST_PREPEND(by_window, w->contexts, c);
+}
+
+static Context *context_add(MMapCache *m, unsigned id) {
+ Context *c;
+
+ assert(m);
+
+ c = m->contexts[id];
+ if (c)
+ return c;
+
+ c = new0(Context, 1);
+ if (!c)
+ return NULL;
+
+ c->cache = m;
+ c->id = id;
+
+ assert(!m->contexts[id]);
+ m->contexts[id] = c;
+
+ return c;
+}
+
+static void context_free(Context *c) {
+ assert(c);
+
+ context_detach_window(c);
+
+ if (c->cache) {
+ assert(c->cache->contexts[c->id] == c);
+ c->cache->contexts[c->id] = NULL;
+ }
+
+ free(c);
+}
+
+static MMapCache *mmap_cache_free(MMapCache *m) {
+ int i;
+
+ assert(m);
+
+ for (i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++)
+ if (m->contexts[i])
+ context_free(m->contexts[i]);
+
+ hashmap_free(m->fds);
+
+ while (m->unused)
+ window_free(m->unused);
+
+ return mfree(m);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(MMapCache, mmap_cache, mmap_cache_free);
+
+static int make_room(MMapCache *m) {
+ assert(m);
+
+ if (!m->last_unused)
+ return 0;
+
+ window_free(m->last_unused);
+ return 1;
+}
+
+static int try_context(
+ MMapCache *m,
+ MMapFileDescriptor *f,
+ int prot,
+ unsigned context,
+ bool keep_always,
+ uint64_t offset,
+ size_t size,
+ void **ret,
+ size_t *ret_size) {
+
+ Context *c;
+
+ assert(m);
+ assert(m->n_ref > 0);
+ assert(f);
+ assert(size > 0);
+ assert(ret);
+
+ c = m->contexts[context];
+ if (!c)
+ return 0;
+
+ assert(c->id == context);
+
+ if (!c->window)
+ return 0;
+
+ if (!window_matches_fd(c->window, f, prot, offset, size)) {
+
+ /* Drop the reference to the window, since it's unnecessary now */
+ context_detach_window(c);
+ return 0;
+ }
+
+ if (c->window->fd->sigbus)
+ return -EIO;
+
+ c->window->keep_always = c->window->keep_always || keep_always;
+
+ *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
+ if (ret_size)
+ *ret_size = c->window->size - (offset - c->window->offset);
+
+ return 1;
+}
+
+static int find_mmap(
+ MMapCache *m,
+ MMapFileDescriptor *f,
+ int prot,
+ unsigned context,
+ bool keep_always,
+ uint64_t offset,
+ size_t size,
+ void **ret,
+ size_t *ret_size) {
+
+ Window *w;
+ Context *c;
+
+ assert(m);
+ assert(m->n_ref > 0);
+ assert(f);
+ assert(size > 0);
+
+ if (f->sigbus)
+ return -EIO;
+
+ LIST_FOREACH(by_fd, w, f->windows)
+ if (window_matches(w, prot, offset, size))
+ break;
+
+ if (!w)
+ return 0;
+
+ c = context_add(m, context);
+ if (!c)
+ return -ENOMEM;
+
+ context_attach_window(c, w);
+ w->keep_always = w->keep_always || keep_always;
+
+ *ret = (uint8_t*) w->ptr + (offset - w->offset);
+ if (ret_size)
+ *ret_size = w->size - (offset - w->offset);
+
+ return 1;
+}
+
+static int mmap_try_harder(MMapCache *m, void *addr, MMapFileDescriptor *f, int prot, int flags, uint64_t offset, size_t size, void **res) {
+ void *ptr;
+
+ assert(m);
+ assert(f);
+ assert(res);
+
+ for (;;) {
+ int r;
+
+ ptr = mmap(addr, size, prot, flags, f->fd, offset);
+ if (ptr != MAP_FAILED)
+ break;
+ if (errno != ENOMEM)
+ return negative_errno();
+
+ r = make_room(m);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENOMEM;
+ }
+
+ *res = ptr;
+ return 0;
+}
+
+static int add_mmap(
+ MMapCache *m,
+ MMapFileDescriptor *f,
+ int prot,
+ unsigned context,
+ bool keep_always,
+ uint64_t offset,
+ size_t size,
+ struct stat *st,
+ void **ret,
+ size_t *ret_size) {
+
+ uint64_t woffset, wsize;
+ Context *c;
+ Window *w;
+ void *d;
+ int r;
+
+ assert(m);
+ assert(m->n_ref > 0);
+ assert(f);
+ assert(size > 0);
+ assert(ret);
+
+ woffset = offset & ~((uint64_t) page_size() - 1ULL);
+ wsize = size + (offset - woffset);
+ wsize = PAGE_ALIGN(wsize);
+
+ if (wsize < WINDOW_SIZE) {
+ uint64_t delta;
+
+ delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
+
+ if (delta > offset)
+ woffset = 0;
+ else
+ woffset -= delta;
+
+ wsize = WINDOW_SIZE;
+ }
+
+ if (st) {
+ /* Memory maps that are larger then the files
+ underneath have undefined behavior. Hence, clamp
+ things to the file size if we know it */
+
+ if (woffset >= (uint64_t) st->st_size)
+ return -EADDRNOTAVAIL;
+
+ if (woffset + wsize > (uint64_t) st->st_size)
+ wsize = PAGE_ALIGN(st->st_size - woffset);
+ }
+
+ r = mmap_try_harder(m, NULL, f, prot, MAP_SHARED, woffset, wsize, &d);
+ if (r < 0)
+ return r;
+
+ c = context_add(m, context);
+ if (!c)
+ goto outofmem;
+
+ w = window_add(m, f, prot, keep_always, woffset, wsize, d);
+ if (!w)
+ goto outofmem;
+
+ context_attach_window(c, w);
+
+ *ret = (uint8_t*) w->ptr + (offset - w->offset);
+ if (ret_size)
+ *ret_size = w->size - (offset - w->offset);
+
+ return 1;
+
+outofmem:
+ (void) munmap(d, wsize);
+ return -ENOMEM;
+}
+
+int mmap_cache_get(
+ MMapCache *m,
+ MMapFileDescriptor *f,
+ int prot,
+ unsigned context,
+ bool keep_always,
+ uint64_t offset,
+ size_t size,
+ struct stat *st,
+ void **ret,
+ size_t *ret_size) {
+
+ int r;
+
+ assert(m);
+ assert(m->n_ref > 0);
+ assert(f);
+ assert(size > 0);
+ assert(ret);
+ assert(context < MMAP_CACHE_MAX_CONTEXTS);
+
+ /* Check whether the current context is the right one already */
+ r = try_context(m, f, prot, context, keep_always, offset, size, ret, ret_size);
+ if (r != 0) {
+ m->n_hit++;
+ return r;
+ }
+
+ /* Search for a matching mmap */
+ r = find_mmap(m, f, prot, context, keep_always, offset, size, ret, ret_size);
+ if (r != 0) {
+ m->n_hit++;
+ return r;
+ }
+
+ m->n_missed++;
+
+ /* Create a new mmap */
+ return add_mmap(m, f, prot, context, keep_always, offset, size, st, ret, ret_size);
+}
+
+unsigned mmap_cache_get_hit(MMapCache *m) {
+ assert(m);
+
+ return m->n_hit;
+}
+
+unsigned mmap_cache_get_missed(MMapCache *m) {
+ assert(m);
+
+ return m->n_missed;
+}
+
+static void mmap_cache_process_sigbus(MMapCache *m) {
+ bool found = false;
+ MMapFileDescriptor *f;
+ int r;
+
+ assert(m);
+
+ /* Iterate through all triggered pages and mark their files as
+ * invalidated */
+ for (;;) {
+ bool ours;
+ void *addr;
+
+ r = sigbus_pop(&addr);
+ if (_likely_(r == 0))
+ break;
+ if (r < 0) {
+ log_error_errno(r, "SIGBUS handling failed: %m");
+ abort();
+ }
+
+ ours = false;
+ HASHMAP_FOREACH(f, m->fds) {
+ Window *w;
+
+ LIST_FOREACH(by_fd, w, f->windows) {
+ if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
+ (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
+ found = ours = f->sigbus = true;
+ break;
+ }
+ }
+
+ if (ours)
+ break;
+ }
+
+ /* Didn't find a matching window, give up */
+ if (!ours) {
+ log_error("Unknown SIGBUS page, aborting.");
+ abort();
+ }
+ }
+
+ /* The list of triggered pages is now empty. Now, let's remap
+ * all windows of the triggered file to anonymous maps, so
+ * that no page of the file in question is triggered again, so
+ * that we can be sure not to hit the queue size limit. */
+ if (_likely_(!found))
+ return;
+
+ HASHMAP_FOREACH(f, m->fds) {
+ Window *w;
+
+ if (!f->sigbus)
+ continue;
+
+ LIST_FOREACH(by_fd, w, f->windows)
+ window_invalidate(w);
+ }
+}
+
+bool mmap_cache_got_sigbus(MMapCache *m, MMapFileDescriptor *f) {
+ assert(m);
+ assert(f);
+
+ mmap_cache_process_sigbus(m);
+
+ return f->sigbus;
+}
+
+MMapFileDescriptor* mmap_cache_add_fd(MMapCache *m, int fd) {
+ MMapFileDescriptor *f;
+ int r;
+
+ assert(m);
+ assert(fd >= 0);
+
+ f = hashmap_get(m->fds, FD_TO_PTR(fd));
+ if (f)
+ return f;
+
+ r = hashmap_ensure_allocated(&m->fds, NULL);
+ if (r < 0)
+ return NULL;
+
+ f = new0(MMapFileDescriptor, 1);
+ if (!f)
+ return NULL;
+
+ f->cache = m;
+ f->fd = fd;
+
+ r = hashmap_put(m->fds, FD_TO_PTR(fd), f);
+ if (r < 0)
+ return mfree(f);
+
+ return f;
+}
+
+void mmap_cache_free_fd(MMapCache *m, MMapFileDescriptor *f) {
+ assert(m);
+ assert(f);
+
+ /* Make sure that any queued SIGBUS are first dispatched, so
+ * that we don't end up with a SIGBUS entry we cannot relate
+ * to any existing memory map */
+
+ mmap_cache_process_sigbus(m);
+
+ while (f->windows)
+ window_free(f->windows);
+
+ if (f->cache)
+ assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)));
+
+ free(f);
+}
diff --git a/src/journal/mmap-cache.h b/src/journal/mmap-cache.h
new file mode 100644
index 0000000..28d5ab1
--- /dev/null
+++ b/src/journal/mmap-cache.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/stat.h>
+
+/* One context per object type, plus one of the header, plus one "additional" one */
+#define MMAP_CACHE_MAX_CONTEXTS 9
+
+typedef struct MMapCache MMapCache;
+typedef struct MMapFileDescriptor MMapFileDescriptor;
+
+MMapCache* mmap_cache_new(void);
+MMapCache* mmap_cache_ref(MMapCache *m);
+MMapCache* mmap_cache_unref(MMapCache *m);
+
+int mmap_cache_get(
+ MMapCache *m,
+ MMapFileDescriptor *f,
+ int prot,
+ unsigned context,
+ bool keep_always,
+ uint64_t offset,
+ size_t size,
+ struct stat *st,
+ void **ret,
+ size_t *ret_size);
+MMapFileDescriptor * mmap_cache_add_fd(MMapCache *m, int fd);
+void mmap_cache_free_fd(MMapCache *m, MMapFileDescriptor *f);
+
+unsigned mmap_cache_get_hit(MMapCache *m);
+unsigned mmap_cache_get_missed(MMapCache *m);
+
+bool mmap_cache_got_sigbus(MMapCache *m, MMapFileDescriptor *f);
diff --git a/src/journal/pcre2-dlopen.c b/src/journal/pcre2-dlopen.c
new file mode 100644
index 0000000..fbe81f9
--- /dev/null
+++ b/src/journal/pcre2-dlopen.c
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "dlfcn-util.h"
+#include "pcre2-dlopen.h"
+
+#if HAVE_PCRE2
+static void *pcre2_dl = NULL;
+
+pcre2_match_data* (*sym_pcre2_match_data_create)(uint32_t, pcre2_general_context *);
+void (*sym_pcre2_match_data_free)(pcre2_match_data *);
+void (*sym_pcre2_code_free)(pcre2_code *);
+pcre2_code* (*sym_pcre2_compile)(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, pcre2_compile_context *);
+int (*sym_pcre2_get_error_message)(int, PCRE2_UCHAR *, PCRE2_SIZE);
+int (*sym_pcre2_match)(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, pcre2_match_data *, pcre2_match_context *);
+PCRE2_SIZE* (*sym_pcre2_get_ovector_pointer)(pcre2_match_data *);
+
+int dlopen_pcre2(void) {
+ _cleanup_(dlclosep) void *dl = NULL;
+ int r;
+
+ if (pcre2_dl)
+ return 0; /* Already loaded */
+
+ dl = dlopen("libpcre2-8.so.0", RTLD_LAZY);
+ if (!dl)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "PCRE2 support is not installed: %s", dlerror());
+
+ r = dlsym_many_and_warn(
+ dl,
+ LOG_ERR,
+ &sym_pcre2_match_data_create, "pcre2_match_data_create_8",
+ &sym_pcre2_match_data_free, "pcre2_match_data_free_8",
+ &sym_pcre2_code_free, "pcre2_code_free_8",
+ &sym_pcre2_compile, "pcre2_compile_8",
+ &sym_pcre2_get_error_message, "pcre2_get_error_message_8",
+ &sym_pcre2_match, "pcre2_match_8",
+ &sym_pcre2_get_ovector_pointer, "pcre2_get_ovector_pointer_8",
+ NULL);
+ if (r < 0)
+ return r;
+
+ /* Note that we never release the reference here, because there's no real reason to, after all this
+ * was traditionally a regular shared library dependency which lives forever too. */
+ pcre2_dl = TAKE_PTR(dl);
+
+ return 1;
+}
+
+#else
+
+int dlopen_pcre2(void) {
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "PCRE2 support is not compiled in.");
+}
+#endif
diff --git a/src/journal/pcre2-dlopen.h b/src/journal/pcre2-dlopen.h
new file mode 100644
index 0000000..1306334
--- /dev/null
+++ b/src/journal/pcre2-dlopen.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#if HAVE_PCRE2
+
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
+
+extern pcre2_match_data* (*sym_pcre2_match_data_create)(uint32_t, pcre2_general_context *);
+extern void (*sym_pcre2_match_data_free)(pcre2_match_data *);
+extern void (*sym_pcre2_code_free)(pcre2_code *);
+extern pcre2_code* (*sym_pcre2_compile)(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, pcre2_compile_context *);
+extern int (*sym_pcre2_get_error_message)(int, PCRE2_UCHAR *, PCRE2_SIZE);
+extern int (*sym_pcre2_match)(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, pcre2_match_data *, pcre2_match_context *);
+extern PCRE2_SIZE* (*sym_pcre2_get_ovector_pointer)(pcre2_match_data *);
+#endif
+
+int dlopen_pcre2(void);
diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c
new file mode 100644
index 0000000..346970d
--- /dev/null
+++ b/src/journal/sd-journal.c
@@ -0,0 +1,3274 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <linux/magic.h>
+#include <poll.h>
+#include <stddef.h>
+#include <sys/inotify.h>
+#include <sys/vfs.h>
+#include <unistd.h>
+
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "catalog.h"
+#include "compress.h"
+#include "dirent-util.h"
+#include "env-file.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "io-util.h"
+#include "journal-def.h"
+#include "journal-file.h"
+#include "journal-internal.h"
+#include "list.h"
+#include "lookup3.h"
+#include "nulstr-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "replace-var.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "syslog-util.h"
+
+#define JOURNAL_FILES_MAX 7168
+
+#define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
+
+/* The maximum size of variable values we'll expand in catalog entries. We bind this to PATH_MAX for now, as
+ * we want to be able to show all officially valid paths at least */
+#define REPLACE_VAR_MAX PATH_MAX
+
+#define DEFAULT_DATA_THRESHOLD (64*1024)
+
+static void remove_file_real(sd_journal *j, JournalFile *f);
+
+static bool journal_pid_changed(sd_journal *j) {
+ assert(j);
+
+ /* We don't support people creating a journal object and
+ * keeping it around over a fork(). Let's complain. */
+
+ return j->original_pid != getpid_cached();
+}
+
+static int journal_put_error(sd_journal *j, int r, const char *path) {
+ char *copy;
+ int k;
+
+ /* Memorize an error we encountered, and store which
+ * file/directory it was generated from. Note that we store
+ * only *one* path per error code, as the error code is the
+ * key into the hashmap, and the path is the value. This means
+ * we keep track only of all error kinds, but not of all error
+ * locations. This has the benefit that the hashmap cannot
+ * grow beyond bounds.
+ *
+ * We return an error here only if we didn't manage to
+ * memorize the real error. */
+
+ if (r >= 0)
+ return r;
+
+ k = hashmap_ensure_allocated(&j->errors, NULL);
+ if (k < 0)
+ return k;
+
+ if (path) {
+ copy = strdup(path);
+ if (!copy)
+ return -ENOMEM;
+ } else
+ copy = NULL;
+
+ k = hashmap_put(j->errors, INT_TO_PTR(r), copy);
+ if (k < 0) {
+ free(copy);
+
+ if (k == -EEXIST)
+ return 0;
+
+ return k;
+ }
+
+ return 0;
+}
+
+static void detach_location(sd_journal *j) {
+ JournalFile *f;
+
+ assert(j);
+
+ j->current_file = NULL;
+ j->current_field = 0;
+
+ ORDERED_HASHMAP_FOREACH(f, j->files)
+ journal_file_reset_location(f);
+}
+
+static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) {
+ assert(l);
+ assert(IN_SET(type, LOCATION_DISCRETE, LOCATION_SEEK));
+ assert(f);
+
+ *l = (Location) {
+ .type = type,
+ .seqnum = le64toh(o->entry.seqnum),
+ .seqnum_id = f->header->seqnum_id,
+ .realtime = le64toh(o->entry.realtime),
+ .monotonic = le64toh(o->entry.monotonic),
+ .boot_id = o->entry.boot_id,
+ .xor_hash = le64toh(o->entry.xor_hash),
+ .seqnum_set = true,
+ .realtime_set = true,
+ .monotonic_set = true,
+ .xor_hash_set = true,
+ };
+}
+
+static void set_location(sd_journal *j, JournalFile *f, Object *o) {
+ assert(j);
+ assert(f);
+ assert(o);
+
+ init_location(&j->current_location, LOCATION_DISCRETE, f, o);
+
+ j->current_file = f;
+ j->current_field = 0;
+
+ /* Let f know its candidate entry was picked. */
+ assert(f->location_type == LOCATION_SEEK);
+ f->location_type = LOCATION_DISCRETE;
+}
+
+static int match_is_valid(const void *data, size_t size) {
+ const char *b, *p;
+
+ assert(data);
+
+ if (size < 2)
+ return false;
+
+ if (((char*) data)[0] == '_' && ((char*) data)[1] == '_')
+ return false;
+
+ b = data;
+ for (p = b; p < b + size; p++) {
+
+ if (*p == '=')
+ return p > b;
+
+ if (*p == '_')
+ continue;
+
+ if (*p >= 'A' && *p <= 'Z')
+ continue;
+
+ if (*p >= '0' && *p <= '9')
+ continue;
+
+ return false;
+ }
+
+ return false;
+}
+
+static bool same_field(const void *_a, size_t s, const void *_b, size_t t) {
+ const uint8_t *a = _a, *b = _b;
+ size_t j;
+
+ for (j = 0; j < s && j < t; j++) {
+
+ if (a[j] != b[j])
+ return false;
+
+ if (a[j] == '=')
+ return true;
+ }
+
+ assert_not_reached("\"=\" not found");
+}
+
+static Match *match_new(Match *p, MatchType t) {
+ Match *m;
+
+ m = new(Match, 1);
+ if (!m)
+ return NULL;
+
+ *m = (Match) {
+ .type = t,
+ .parent = p,
+ };
+
+ if (p)
+ LIST_PREPEND(matches, p->matches, m);
+
+ return m;
+}
+
+static void match_free(Match *m) {
+ assert(m);
+
+ while (m->matches)
+ match_free(m->matches);
+
+ if (m->parent)
+ LIST_REMOVE(matches, m->parent->matches, m);
+
+ free(m->data);
+ free(m);
+}
+
+static void match_free_if_empty(Match *m) {
+ if (!m || m->matches)
+ return;
+
+ match_free(m);
+}
+
+_public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
+ Match *l3, *l4, *add_here = NULL, *m;
+ uint64_t hash;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(data, -EINVAL);
+
+ if (size == 0)
+ size = strlen(data);
+
+ assert_return(match_is_valid(data, size), -EINVAL);
+
+ /* level 0: AND term
+ * level 1: OR terms
+ * level 2: AND terms
+ * level 3: OR terms
+ * level 4: concrete matches */
+
+ if (!j->level0) {
+ j->level0 = match_new(NULL, MATCH_AND_TERM);
+ if (!j->level0)
+ return -ENOMEM;
+ }
+
+ if (!j->level1) {
+ j->level1 = match_new(j->level0, MATCH_OR_TERM);
+ if (!j->level1)
+ return -ENOMEM;
+ }
+
+ if (!j->level2) {
+ j->level2 = match_new(j->level1, MATCH_AND_TERM);
+ if (!j->level2)
+ return -ENOMEM;
+ }
+
+ assert(j->level0->type == MATCH_AND_TERM);
+ assert(j->level1->type == MATCH_OR_TERM);
+ assert(j->level2->type == MATCH_AND_TERM);
+
+ /* Old-style Jenkins (unkeyed) hashing only here. We do not cover new-style siphash (keyed) hashing
+ * here, since it's different for each file, and thus can't be pre-calculated in the Match object. */
+ hash = jenkins_hash64(data, size);
+
+ LIST_FOREACH(matches, l3, j->level2->matches) {
+ assert(l3->type == MATCH_OR_TERM);
+
+ LIST_FOREACH(matches, l4, l3->matches) {
+ assert(l4->type == MATCH_DISCRETE);
+
+ /* Exactly the same match already? Then ignore
+ * this addition */
+ if (l4->hash == hash &&
+ l4->size == size &&
+ memcmp(l4->data, data, size) == 0)
+ return 0;
+
+ /* Same field? Then let's add this to this OR term */
+ if (same_field(data, size, l4->data, l4->size)) {
+ add_here = l3;
+ break;
+ }
+ }
+
+ if (add_here)
+ break;
+ }
+
+ if (!add_here) {
+ add_here = match_new(j->level2, MATCH_OR_TERM);
+ if (!add_here)
+ goto fail;
+ }
+
+ m = match_new(add_here, MATCH_DISCRETE);
+ if (!m)
+ goto fail;
+
+ m->hash = hash;
+ m->size = size;
+ m->data = memdup(data, size);
+ if (!m->data)
+ goto fail;
+
+ detach_location(j);
+
+ return 0;
+
+fail:
+ match_free_if_empty(add_here);
+ match_free_if_empty(j->level2);
+ match_free_if_empty(j->level1);
+ match_free_if_empty(j->level0);
+
+ return -ENOMEM;
+}
+
+_public_ int sd_journal_add_conjunction(sd_journal *j) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ if (!j->level0)
+ return 0;
+
+ if (!j->level1)
+ return 0;
+
+ if (!j->level1->matches)
+ return 0;
+
+ j->level1 = NULL;
+ j->level2 = NULL;
+
+ return 0;
+}
+
+_public_ int sd_journal_add_disjunction(sd_journal *j) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ if (!j->level0)
+ return 0;
+
+ if (!j->level1)
+ return 0;
+
+ if (!j->level2)
+ return 0;
+
+ if (!j->level2->matches)
+ return 0;
+
+ j->level2 = NULL;
+ return 0;
+}
+
+static char *match_make_string(Match *m) {
+ char *p = NULL, *r;
+ Match *i;
+ bool enclose = false;
+
+ if (!m)
+ return strdup("none");
+
+ if (m->type == MATCH_DISCRETE)
+ return cescape_length(m->data, m->size);
+
+ LIST_FOREACH(matches, i, m->matches) {
+ char *t, *k;
+
+ t = match_make_string(i);
+ if (!t)
+ return mfree(p);
+
+ if (p) {
+ k = strjoin(p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t);
+ free(p);
+ free(t);
+
+ if (!k)
+ return NULL;
+
+ p = k;
+
+ enclose = true;
+ } else
+ p = t;
+ }
+
+ if (enclose) {
+ r = strjoin("(", p, ")");
+ free(p);
+ return r;
+ }
+
+ return p;
+}
+
+char *journal_make_match_string(sd_journal *j) {
+ assert(j);
+
+ return match_make_string(j->level0);
+}
+
+_public_ void sd_journal_flush_matches(sd_journal *j) {
+ if (!j)
+ return;
+
+ if (j->level0)
+ match_free(j->level0);
+
+ j->level0 = j->level1 = j->level2 = NULL;
+
+ detach_location(j);
+}
+
+_pure_ static int compare_with_location(const JournalFile *f, const Location *l, const JournalFile *current_file) {
+ int r;
+
+ assert(f);
+ assert(l);
+ assert(f->location_type == LOCATION_SEEK);
+ assert(IN_SET(l->type, LOCATION_DISCRETE, LOCATION_SEEK));
+
+ if (l->monotonic_set &&
+ sd_id128_equal(f->current_boot_id, l->boot_id) &&
+ l->realtime_set &&
+ f->current_realtime == l->realtime &&
+ l->xor_hash_set &&
+ f->current_xor_hash == l->xor_hash &&
+ l->seqnum_set &&
+ sd_id128_equal(f->header->seqnum_id, l->seqnum_id) &&
+ f->current_seqnum == l->seqnum &&
+ f != current_file)
+ return 0;
+
+ if (l->seqnum_set &&
+ sd_id128_equal(f->header->seqnum_id, l->seqnum_id)) {
+
+ r = CMP(f->current_seqnum, l->seqnum);
+ if (r != 0)
+ return r;
+ }
+
+ if (l->monotonic_set &&
+ sd_id128_equal(f->current_boot_id, l->boot_id)) {
+
+ r = CMP(f->current_monotonic, l->monotonic);
+ if (r != 0)
+ return r;
+ }
+
+ if (l->realtime_set) {
+
+ r = CMP(f->current_realtime, l->realtime);
+ if (r != 0)
+ return r;
+ }
+
+ if (l->xor_hash_set) {
+
+ r = CMP(f->current_xor_hash, l->xor_hash);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int next_for_match(
+ sd_journal *j,
+ Match *m,
+ JournalFile *f,
+ uint64_t after_offset,
+ direction_t direction,
+ Object **ret,
+ uint64_t *offset) {
+
+ int r;
+ uint64_t np = 0;
+ Object *n;
+
+ assert(j);
+ assert(m);
+ assert(f);
+
+ if (m->type == MATCH_DISCRETE) {
+ uint64_t dp, hash;
+
+ /* If the keyed hash logic is used, we need to calculate the hash fresh per file. Otherwise
+ * we can use what we pre-calculated. */
+ if (JOURNAL_HEADER_KEYED_HASH(f->header))
+ hash = journal_file_hash_data(f, m->data, m->size);
+ else
+ hash = m->hash;
+
+ r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, NULL, &dp);
+ if (r <= 0)
+ return r;
+
+ return journal_file_move_to_entry_by_offset_for_data(f, dp, after_offset, direction, ret, offset);
+
+ } else if (m->type == MATCH_OR_TERM) {
+ Match *i;
+
+ /* Find the earliest match beyond after_offset */
+
+ LIST_FOREACH(matches, i, m->matches) {
+ uint64_t cp;
+
+ r = next_for_match(j, i, f, after_offset, direction, NULL, &cp);
+ if (r < 0)
+ return r;
+ else if (r > 0) {
+ if (np == 0 || (direction == DIRECTION_DOWN ? cp < np : cp > np))
+ np = cp;
+ }
+ }
+
+ if (np == 0)
+ return 0;
+
+ } else if (m->type == MATCH_AND_TERM) {
+ Match *i, *last_moved;
+
+ /* Always jump to the next matching entry and repeat
+ * this until we find an offset that matches for all
+ * matches. */
+
+ if (!m->matches)
+ return 0;
+
+ r = next_for_match(j, m->matches, f, after_offset, direction, NULL, &np);
+ if (r <= 0)
+ return r;
+
+ assert(direction == DIRECTION_DOWN ? np >= after_offset : np <= after_offset);
+ last_moved = m->matches;
+
+ LIST_LOOP_BUT_ONE(matches, i, m->matches, last_moved) {
+ uint64_t cp;
+
+ r = next_for_match(j, i, f, np, direction, NULL, &cp);
+ if (r <= 0)
+ return r;
+
+ assert(direction == DIRECTION_DOWN ? cp >= np : cp <= np);
+ if (direction == DIRECTION_DOWN ? cp > np : cp < np) {
+ np = cp;
+ last_moved = i;
+ }
+ }
+ }
+
+ assert(np > 0);
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = n;
+ if (offset)
+ *offset = np;
+
+ return 1;
+}
+
+static int find_location_for_match(
+ sd_journal *j,
+ Match *m,
+ JournalFile *f,
+ direction_t direction,
+ Object **ret,
+ uint64_t *offset) {
+
+ int r;
+
+ assert(j);
+ assert(m);
+ assert(f);
+
+ if (m->type == MATCH_DISCRETE) {
+ uint64_t dp, hash;
+
+ if (JOURNAL_HEADER_KEYED_HASH(f->header))
+ hash = journal_file_hash_data(f, m->data, m->size);
+ else
+ hash = m->hash;
+
+ r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, NULL, &dp);
+ if (r <= 0)
+ return r;
+
+ /* FIXME: missing: find by monotonic */
+
+ if (j->current_location.type == LOCATION_HEAD)
+ return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_DOWN, ret, offset);
+ if (j->current_location.type == LOCATION_TAIL)
+ return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_UP, ret, offset);
+ if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
+ return journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, ret, offset);
+ if (j->current_location.monotonic_set) {
+ r = journal_file_move_to_entry_by_monotonic_for_data(f, dp, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
+ if (r != -ENOENT)
+ return r;
+ }
+ if (j->current_location.realtime_set)
+ return journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, ret, offset);
+
+ return journal_file_next_entry_for_data(f, NULL, 0, dp, direction, ret, offset);
+
+ } else if (m->type == MATCH_OR_TERM) {
+ uint64_t np = 0;
+ Object *n;
+ Match *i;
+
+ /* Find the earliest match */
+
+ LIST_FOREACH(matches, i, m->matches) {
+ uint64_t cp;
+
+ r = find_location_for_match(j, i, f, direction, NULL, &cp);
+ if (r < 0)
+ return r;
+ else if (r > 0) {
+ if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
+ np = cp;
+ }
+ }
+
+ if (np == 0)
+ return 0;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = n;
+ if (offset)
+ *offset = np;
+
+ return 1;
+
+ } else {
+ Match *i;
+ uint64_t np = 0;
+
+ assert(m->type == MATCH_AND_TERM);
+
+ /* First jump to the last match, and then find the
+ * next one where all matches match */
+
+ if (!m->matches)
+ return 0;
+
+ LIST_FOREACH(matches, i, m->matches) {
+ uint64_t cp;
+
+ r = find_location_for_match(j, i, f, direction, NULL, &cp);
+ if (r <= 0)
+ return r;
+
+ if (np == 0 || (direction == DIRECTION_DOWN ? cp > np : cp < np))
+ np = cp;
+ }
+
+ return next_for_match(j, m, f, np, direction, ret, offset);
+ }
+}
+
+static int find_location_with_matches(
+ sd_journal *j,
+ JournalFile *f,
+ direction_t direction,
+ Object **ret,
+ uint64_t *offset) {
+
+ int r;
+
+ assert(j);
+ assert(f);
+ assert(ret);
+ assert(offset);
+
+ if (!j->level0) {
+ /* No matches is simple */
+
+ if (j->current_location.type == LOCATION_HEAD)
+ return journal_file_next_entry(f, 0, DIRECTION_DOWN, ret, offset);
+ if (j->current_location.type == LOCATION_TAIL)
+ return journal_file_next_entry(f, 0, DIRECTION_UP, ret, offset);
+ if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
+ return journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, ret, offset);
+ if (j->current_location.monotonic_set) {
+ r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
+ if (r != -ENOENT)
+ return r;
+ }
+ if (j->current_location.realtime_set)
+ return journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, ret, offset);
+
+ return journal_file_next_entry(f, 0, direction, ret, offset);
+ } else
+ return find_location_for_match(j, j->level0, f, direction, ret, offset);
+}
+
+static int next_with_matches(
+ sd_journal *j,
+ JournalFile *f,
+ direction_t direction,
+ Object **ret,
+ uint64_t *offset) {
+
+ assert(j);
+ assert(f);
+ assert(ret);
+ assert(offset);
+
+ /* No matches is easy. We simple advance the file
+ * pointer by one. */
+ if (!j->level0)
+ return journal_file_next_entry(f, f->current_offset, direction, ret, offset);
+
+ /* If we have a match then we look for the next matching entry
+ * with an offset at least one step larger */
+ return next_for_match(j, j->level0, f,
+ direction == DIRECTION_DOWN ? f->current_offset + 1
+ : f->current_offset - 1,
+ direction, ret, offset);
+}
+
+static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction) {
+ Object *c;
+ uint64_t cp, n_entries;
+ int r;
+
+ assert(j);
+ assert(f);
+
+ n_entries = le64toh(f->header->n_entries);
+
+ /* If we hit EOF before, we don't need to look into this file again
+ * unless direction changed or new entries appeared. */
+ if (f->last_direction == direction && f->location_type == LOCATION_TAIL &&
+ n_entries == f->last_n_entries)
+ return 0;
+
+ f->last_n_entries = n_entries;
+
+ if (f->last_direction == direction && f->current_offset > 0) {
+ /* LOCATION_SEEK here means we did the work in a previous
+ * iteration and the current location already points to a
+ * candidate entry. */
+ if (f->location_type != LOCATION_SEEK) {
+ r = next_with_matches(j, f, direction, &c, &cp);
+ if (r <= 0)
+ return r;
+
+ journal_file_save_location(f, c, cp);
+ }
+ } else {
+ f->last_direction = direction;
+
+ r = find_location_with_matches(j, f, direction, &c, &cp);
+ if (r <= 0)
+ return r;
+
+ journal_file_save_location(f, c, cp);
+ }
+
+ /* OK, we found the spot, now let's advance until an entry
+ * that is actually different from what we were previously
+ * looking at. This is necessary to handle entries which exist
+ * in two (or more) journal files, and which shall all be
+ * suppressed but one. */
+
+ for (;;) {
+ bool found;
+
+ if (j->current_location.type == LOCATION_DISCRETE) {
+ int k;
+
+ k = compare_with_location(f, &j->current_location, j->current_file);
+
+ found = direction == DIRECTION_DOWN ? k > 0 : k < 0;
+ } else
+ found = true;
+
+ if (found)
+ return 1;
+
+ r = next_with_matches(j, f, direction, &c, &cp);
+ if (r <= 0)
+ return r;
+
+ journal_file_save_location(f, c, cp);
+ }
+}
+
+static int real_journal_next(sd_journal *j, direction_t direction) {
+ JournalFile *new_file = NULL;
+ unsigned i, n_files;
+ const void **files;
+ Object *o;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ r = iterated_cache_get(j->files_cache, NULL, &files, &n_files);
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < n_files; i++) {
+ JournalFile *f = (JournalFile *)files[i];
+ bool found;
+
+ r = next_beyond_location(j, f, direction);
+ if (r < 0) {
+ log_debug_errno(r, "Can't iterate through %s, ignoring: %m", f->path);
+ remove_file_real(j, f);
+ continue;
+ } else if (r == 0) {
+ f->location_type = LOCATION_TAIL;
+ continue;
+ }
+
+ if (!new_file)
+ found = true;
+ else {
+ int k;
+
+ k = journal_file_compare_locations(f, new_file);
+
+ found = direction == DIRECTION_DOWN ? k < 0 : k > 0;
+ }
+
+ if (found)
+ new_file = f;
+ }
+
+ if (!new_file)
+ return 0;
+
+ r = journal_file_move_to_object(new_file, OBJECT_ENTRY, new_file->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ set_location(j, new_file, o);
+
+ return 1;
+}
+
+_public_ int sd_journal_next(sd_journal *j) {
+ return real_journal_next(j, DIRECTION_DOWN);
+}
+
+_public_ int sd_journal_previous(sd_journal *j) {
+ return real_journal_next(j, DIRECTION_UP);
+}
+
+static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) {
+ int c = 0, r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(skip <= INT_MAX, -ERANGE);
+
+ if (skip == 0) {
+ /* If this is not a discrete skip, then at least
+ * resolve the current location */
+ if (j->current_location.type != LOCATION_DISCRETE) {
+ r = real_journal_next(j, direction);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+ }
+
+ do {
+ r = real_journal_next(j, direction);
+ if (r < 0)
+ return r;
+
+ if (r == 0)
+ return c;
+
+ skip--;
+ c++;
+ } while (skip > 0);
+
+ return c;
+}
+
+_public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) {
+ return real_journal_next_skip(j, DIRECTION_DOWN, skip);
+}
+
+_public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) {
+ return real_journal_next_skip(j, DIRECTION_UP, skip);
+}
+
+_public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) {
+ Object *o;
+ int r;
+ char bid[SD_ID128_STRING_MAX], sid[SD_ID128_STRING_MAX];
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(cursor, -EINVAL);
+
+ if (!j->current_file || j->current_file->current_offset <= 0)
+ return -EADDRNOTAVAIL;
+
+ r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ sd_id128_to_string(j->current_file->header->seqnum_id, sid);
+ sd_id128_to_string(o->entry.boot_id, bid);
+
+ if (asprintf(cursor,
+ "s=%s;i=%"PRIx64";b=%s;m=%"PRIx64";t=%"PRIx64";x=%"PRIx64,
+ sid, le64toh(o->entry.seqnum),
+ bid, le64toh(o->entry.monotonic),
+ le64toh(o->entry.realtime),
+ le64toh(o->entry.xor_hash)) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+_public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
+ unsigned long long seqnum, monotonic, realtime, xor_hash;
+ bool seqnum_id_set = false,
+ seqnum_set = false,
+ boot_id_set = false,
+ monotonic_set = false,
+ realtime_set = false,
+ xor_hash_set = false;
+ sd_id128_t seqnum_id, boot_id;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(!isempty(cursor), -EINVAL);
+
+ for (const char *p = cursor;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, ";", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (word[0] == '\0' || word[1] != '=')
+ return -EINVAL;
+
+ switch (word[0]) {
+ case 's':
+ seqnum_id_set = true;
+ r = sd_id128_from_string(word + 2, &seqnum_id);
+ if (r < 0)
+ return r;
+ break;
+
+ case 'i':
+ seqnum_set = true;
+ if (sscanf(word + 2, "%llx", &seqnum) != 1)
+ return -EINVAL;
+ break;
+
+ case 'b':
+ boot_id_set = true;
+ r = sd_id128_from_string(word + 2, &boot_id);
+ break;
+
+ case 'm':
+ monotonic_set = true;
+ if (sscanf(word + 2, "%llx", &monotonic) != 1)
+ return -EINVAL;
+ break;
+
+ case 't':
+ realtime_set = true;
+ if (sscanf(word + 2, "%llx", &realtime) != 1)
+ return -EINVAL;
+ break;
+
+ case 'x':
+ xor_hash_set = true;
+ if (sscanf(word + 2, "%llx", &xor_hash) != 1)
+ return -EINVAL;
+ break;
+ }
+ }
+
+ if ((!seqnum_set || !seqnum_id_set) &&
+ (!monotonic_set || !boot_id_set) &&
+ !realtime_set)
+ return -EINVAL;
+
+ detach_location(j);
+ j->current_location = (Location) {
+ .type = LOCATION_SEEK,
+ };
+
+ if (realtime_set) {
+ j->current_location.realtime = (uint64_t) realtime;
+ j->current_location.realtime_set = true;
+ }
+
+ if (seqnum_set && seqnum_id_set) {
+ j->current_location.seqnum = (uint64_t) seqnum;
+ j->current_location.seqnum_id = seqnum_id;
+ j->current_location.seqnum_set = true;
+ }
+
+ if (monotonic_set && boot_id_set) {
+ j->current_location.monotonic = (uint64_t) monotonic;
+ j->current_location.boot_id = boot_id;
+ j->current_location.monotonic_set = true;
+ }
+
+ if (xor_hash_set) {
+ j->current_location.xor_hash = (uint64_t) xor_hash;
+ j->current_location.xor_hash_set = true;
+ }
+
+ return 0;
+}
+
+_public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) {
+ int r;
+ Object *o;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(!isempty(cursor), -EINVAL);
+
+ if (!j->current_file || j->current_file->current_offset <= 0)
+ return -EADDRNOTAVAIL;
+
+ r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *item = NULL;
+ unsigned long long ll;
+ sd_id128_t id;
+ int k = 0;
+
+ r = extract_first_word(&cursor, &item, ";", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+
+ if (r == 0)
+ break;
+
+ if (strlen(item) < 2 || item[1] != '=')
+ return -EINVAL;
+
+ switch (item[0]) {
+
+ case 's':
+ k = sd_id128_from_string(item+2, &id);
+ if (k < 0)
+ return k;
+ if (!sd_id128_equal(id, j->current_file->header->seqnum_id))
+ return 0;
+ break;
+
+ case 'i':
+ if (sscanf(item+2, "%llx", &ll) != 1)
+ return -EINVAL;
+ if (ll != le64toh(o->entry.seqnum))
+ return 0;
+ break;
+
+ case 'b':
+ k = sd_id128_from_string(item+2, &id);
+ if (k < 0)
+ return k;
+ if (!sd_id128_equal(id, o->entry.boot_id))
+ return 0;
+ break;
+
+ case 'm':
+ if (sscanf(item+2, "%llx", &ll) != 1)
+ return -EINVAL;
+ if (ll != le64toh(o->entry.monotonic))
+ return 0;
+ break;
+
+ case 't':
+ if (sscanf(item+2, "%llx", &ll) != 1)
+ return -EINVAL;
+ if (ll != le64toh(o->entry.realtime))
+ return 0;
+ break;
+
+ case 'x':
+ if (sscanf(item+2, "%llx", &ll) != 1)
+ return -EINVAL;
+ if (ll != le64toh(o->entry.xor_hash))
+ return 0;
+ break;
+ }
+ }
+
+ return 1;
+}
+
+_public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ detach_location(j);
+
+ j->current_location = (Location) {
+ .type = LOCATION_SEEK,
+ .boot_id = boot_id,
+ .monotonic = usec,
+ .monotonic_set = true,
+ };
+
+ return 0;
+}
+
+_public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ detach_location(j);
+
+ j->current_location = (Location) {
+ .type = LOCATION_SEEK,
+ .realtime = usec,
+ .realtime_set = true,
+ };
+
+ return 0;
+}
+
+_public_ int sd_journal_seek_head(sd_journal *j) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ detach_location(j);
+
+ j->current_location = (Location) {
+ .type = LOCATION_HEAD,
+ };
+
+ return 0;
+}
+
+_public_ int sd_journal_seek_tail(sd_journal *j) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ detach_location(j);
+
+ j->current_location = (Location) {
+ .type = LOCATION_TAIL,
+ };
+
+ return 0;
+}
+
+static void check_network(sd_journal *j, int fd) {
+ assert(j);
+
+ if (j->on_network)
+ return;
+
+ j->on_network = fd_is_network_fs(fd);
+}
+
+static bool file_has_type_prefix(const char *prefix, const char *filename) {
+ const char *full, *tilded, *atted;
+
+ full = strjoina(prefix, ".journal");
+ tilded = strjoina(full, "~");
+ atted = strjoina(prefix, "@");
+
+ return STR_IN_SET(filename, full, tilded) ||
+ startswith(filename, atted);
+}
+
+static bool file_type_wanted(int flags, const char *filename) {
+ assert(filename);
+
+ if (!endswith(filename, ".journal") && !endswith(filename, ".journal~"))
+ return false;
+
+ /* no flags set → every type is OK */
+ if (!(flags & (SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)))
+ return true;
+
+ if (flags & SD_JOURNAL_SYSTEM && file_has_type_prefix("system", filename))
+ return true;
+
+ if (flags & SD_JOURNAL_CURRENT_USER) {
+ char prefix[5 + DECIMAL_STR_MAX(uid_t) + 1];
+
+ xsprintf(prefix, "user-"UID_FMT, getuid());
+
+ if (file_has_type_prefix(prefix, filename))
+ return true;
+ }
+
+ return false;
+}
+
+static bool path_has_prefix(sd_journal *j, const char *path, const char *prefix) {
+ assert(j);
+ assert(path);
+ assert(prefix);
+
+ if (j->toplevel_fd >= 0)
+ return false;
+
+ return path_startswith(path, prefix);
+}
+
+static void track_file_disposition(sd_journal *j, JournalFile *f) {
+ assert(j);
+ assert(f);
+
+ if (!j->has_runtime_files && path_has_prefix(j, f->path, "/run"))
+ j->has_runtime_files = true;
+ else if (!j->has_persistent_files && path_has_prefix(j, f->path, "/var"))
+ j->has_persistent_files = true;
+}
+
+static const char *skip_slash(const char *p) {
+
+ if (!p)
+ return NULL;
+
+ while (*p == '/')
+ p++;
+
+ return p;
+}
+
+static int add_any_file(
+ sd_journal *j,
+ int fd,
+ const char *path) {
+
+ bool close_fd = false;
+ JournalFile *f;
+ struct stat st;
+ int r, k;
+
+ assert(j);
+ assert(fd >= 0 || path);
+
+ if (fd < 0) {
+ if (j->toplevel_fd >= 0)
+ /* If there's a top-level fd defined make the path relative, explicitly, since otherwise
+ * openat() ignores the first argument. */
+
+ fd = openat(j->toplevel_fd, skip_slash(path), O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ else
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0) {
+ r = log_debug_errno(errno, "Failed to open journal file %s: %m", path);
+ goto finish;
+ }
+
+ close_fd = true;
+
+ r = fd_nonblock(fd, false);
+ if (r < 0) {
+ r = log_debug_errno(errno, "Failed to turn off O_NONBLOCK for %s: %m", path);
+ goto finish;
+ }
+ }
+
+ if (fstat(fd, &st) < 0) {
+ r = log_debug_errno(errno, "Failed to fstat file '%s': %m", path);
+ goto finish;
+ }
+
+ r = stat_verify_regular(&st);
+ if (r < 0) {
+ log_debug_errno(r, "Refusing to open '%s', as it is not a regular file.", path);
+ goto finish;
+ }
+
+ f = ordered_hashmap_get(j->files, path);
+ if (f) {
+ if (f->last_stat.st_dev == st.st_dev &&
+ f->last_stat.st_ino == st.st_ino) {
+
+ /* We already track this file, under the same path and with the same device/inode numbers, it's
+ * hence really the same. Mark this file as seen in this generation. This is used to GC old
+ * files in process_q_overflow() to detect journal files that are still there and discern them
+ * from those which are gone. */
+
+ f->last_seen_generation = j->generation;
+ r = 0;
+ goto finish;
+ }
+
+ /* So we tracked a file under this name, but it has a different inode/device. In that case, it got
+ * replaced (probably due to rotation?), let's drop it hence from our list. */
+ remove_file_real(j, f);
+ f = NULL;
+ }
+
+ if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
+ log_debug("Too many open journal files, not adding %s.", path);
+ r = -ETOOMANYREFS;
+ goto finish;
+ }
+
+ r = journal_file_open(fd, path, O_RDONLY, 0, false, 0, false, NULL, j->mmap, NULL, NULL, &f);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to open journal file %s: %m", path);
+ goto finish;
+ }
+
+ /* journal_file_dump(f); */
+
+ r = ordered_hashmap_put(j->files, f->path, f);
+ if (r < 0) {
+ f->close_fd = false; /* make sure journal_file_close() doesn't close the caller's fd (or our own). We'll let the caller do that, or ourselves */
+ (void) journal_file_close(f);
+ goto finish;
+ }
+
+ close_fd = false; /* the fd is now owned by the JournalFile object */
+
+ f->last_seen_generation = j->generation;
+
+ track_file_disposition(j, f);
+ check_network(j, f->fd);
+
+ j->current_invalidate_counter++;
+
+ log_debug("File %s added.", f->path);
+
+ r = 0;
+
+finish:
+ if (close_fd)
+ safe_close(fd);
+
+ if (r < 0) {
+ k = journal_put_error(j, r, path);
+ if (k < 0)
+ return k;
+ }
+
+ return r;
+}
+
+static int add_file_by_name(
+ sd_journal *j,
+ const char *prefix,
+ const char *filename) {
+
+ const char *path;
+
+ assert(j);
+ assert(prefix);
+ assert(filename);
+
+ if (j->no_new_files)
+ return 0;
+
+ if (!file_type_wanted(j->flags, filename))
+ return 0;
+
+ path = prefix_roota(prefix, filename);
+ return add_any_file(j, -1, path);
+}
+
+static void remove_file_by_name(
+ sd_journal *j,
+ const char *prefix,
+ const char *filename) {
+
+ const char *path;
+ JournalFile *f;
+
+ assert(j);
+ assert(prefix);
+ assert(filename);
+
+ path = prefix_roota(prefix, filename);
+ f = ordered_hashmap_get(j->files, path);
+ if (!f)
+ return;
+
+ remove_file_real(j, f);
+}
+
+static void remove_file_real(sd_journal *j, JournalFile *f) {
+ assert(j);
+ assert(f);
+
+ (void) ordered_hashmap_remove(j->files, f->path);
+
+ log_debug("File %s removed.", f->path);
+
+ if (j->current_file == f) {
+ j->current_file = NULL;
+ j->current_field = 0;
+ }
+
+ if (j->unique_file == f) {
+ /* Jump to the next unique_file or NULL if that one was last */
+ j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
+ j->unique_offset = 0;
+ if (!j->unique_file)
+ j->unique_file_lost = true;
+ }
+
+ if (j->fields_file == f) {
+ j->fields_file = ordered_hashmap_next(j->files, j->fields_file->path);
+ j->fields_offset = 0;
+ if (!j->fields_file)
+ j->fields_file_lost = true;
+ }
+
+ (void) journal_file_close(f);
+
+ j->current_invalidate_counter++;
+}
+
+static int dirname_is_machine_id(const char *fn) {
+ sd_id128_t id, machine;
+ const char *e;
+ int r;
+
+ /* Returns true if the specified directory name matches the local machine ID */
+
+ r = sd_id128_get_machine(&machine);
+ if (r < 0)
+ return r;
+
+ e = strchr(fn, '.');
+ if (e) {
+ const char *k;
+
+ /* Looks like it has a namespace suffix. Verify that. */
+ if (!log_namespace_name_valid(e + 1))
+ return false;
+
+ k = strndupa(fn, e - fn);
+ r = sd_id128_from_string(k, &id);
+ } else
+ r = sd_id128_from_string(fn, &id);
+ if (r < 0)
+ return r;
+
+ return sd_id128_equal(id, machine);
+}
+
+static int dirname_has_namespace(const char *fn, const char *namespace) {
+ const char *e;
+
+ /* Returns true if the specified directory name matches the specified namespace */
+
+ e = strchr(fn, '.');
+ if (e) {
+ const char *k;
+
+ if (!namespace)
+ return false;
+
+ if (!streq(e + 1, namespace))
+ return false;
+
+ k = strndupa(fn, e - fn);
+ return id128_is_valid(k);
+ }
+
+ if (namespace)
+ return false;
+
+ return id128_is_valid(fn);
+}
+
+static bool dirent_is_journal_file(const struct dirent *de) {
+ assert(de);
+
+ /* Returns true if the specified directory entry looks like a journal file we might be interested in */
+
+ if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
+ return false;
+
+ return endswith(de->d_name, ".journal") ||
+ endswith(de->d_name, ".journal~");
+}
+
+static bool dirent_is_journal_subdir(const struct dirent *de) {
+ const char *e, *n;
+ assert(de);
+
+ /* returns true if the specified directory entry looks like a directory that might contain journal
+ * files we might be interested in, i.e. is either a 128bit ID or a 128bit ID suffixed by a
+ * namespace. */
+
+ if (!IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN))
+ return false;
+
+ e = strchr(de->d_name, '.');
+ if (!e)
+ return id128_is_valid(de->d_name); /* No namespace */
+
+ n = strndupa(de->d_name, e - de->d_name);
+ if (!id128_is_valid(n))
+ return false;
+
+ return log_namespace_name_valid(e + 1);
+}
+
+static int directory_open(sd_journal *j, const char *path, DIR **ret) {
+ DIR *d;
+
+ assert(j);
+ assert(path);
+ assert(ret);
+
+ if (j->toplevel_fd < 0)
+ d = opendir(path);
+ else
+ /* Open the specified directory relative to the toplevel fd. Enforce that the path specified is
+ * relative, by dropping the initial slash */
+ d = xopendirat(j->toplevel_fd, skip_slash(path), 0);
+ if (!d)
+ return -errno;
+
+ *ret = d;
+ return 0;
+}
+
+static int add_directory(sd_journal *j, const char *prefix, const char *dirname);
+
+static void directory_enumerate(sd_journal *j, Directory *m, DIR *d) {
+ struct dirent *de;
+
+ assert(j);
+ assert(m);
+ assert(d);
+
+ FOREACH_DIRENT_ALL(de, d, goto fail) {
+
+ if (dirent_is_journal_file(de))
+ (void) add_file_by_name(j, m->path, de->d_name);
+
+ if (m->is_root && dirent_is_journal_subdir(de))
+ (void) add_directory(j, m->path, de->d_name);
+ }
+
+ return;
+
+fail:
+ log_debug_errno(errno, "Failed to enumerate directory %s, ignoring: %m", m->path);
+}
+
+static void directory_watch(sd_journal *j, Directory *m, int fd, uint32_t mask) {
+ int r;
+
+ assert(j);
+ assert(m);
+ assert(fd >= 0);
+
+ /* Watch this directory if that's enabled and if it not being watched yet. */
+
+ if (m->wd > 0) /* Already have a watch? */
+ return;
+ if (j->inotify_fd < 0) /* Not watching at all? */
+ return;
+
+ m->wd = inotify_add_watch_fd(j->inotify_fd, fd, mask);
+ if (m->wd < 0) {
+ log_debug_errno(errno, "Failed to watch journal directory '%s', ignoring: %m", m->path);
+ return;
+ }
+
+ r = hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m);
+ if (r == -EEXIST)
+ log_debug_errno(r, "Directory '%s' already being watched under a different path, ignoring: %m", m->path);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m->path);
+ (void) inotify_rm_watch(j->inotify_fd, m->wd);
+ m->wd = -1;
+ }
+}
+
+static int add_directory(
+ sd_journal *j,
+ const char *prefix,
+ const char *dirname) {
+
+ _cleanup_free_ char *path = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ Directory *m;
+ int r, k;
+
+ assert(j);
+ assert(prefix);
+
+ /* Adds a journal file directory to watch. If the directory is already tracked this updates the inotify watch
+ * and reenumerates directory contents */
+
+ path = path_join(prefix, dirname);
+ if (!path) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ log_debug("Considering directory '%s'.", path);
+
+ /* We consider everything local that is in a directory for the local machine ID, or that is stored in /run */
+ if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
+ !((dirname && dirname_is_machine_id(dirname) > 0) || path_has_prefix(j, path, "/run")))
+ return 0;
+
+ if (dirname &&
+ (!(FLAGS_SET(j->flags, SD_JOURNAL_ALL_NAMESPACES) ||
+ dirname_has_namespace(dirname, j->namespace) > 0 ||
+ (FLAGS_SET(j->flags, SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE) && dirname_has_namespace(dirname, NULL) > 0))))
+ return 0;
+
+ r = directory_open(j, path, &d);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to open directory '%s': %m", path);
+ goto fail;
+ }
+
+ m = hashmap_get(j->directories_by_path, path);
+ if (!m) {
+ m = new(Directory, 1);
+ if (!m) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ *m = (Directory) {
+ .is_root = false,
+ .path = path,
+ };
+
+ if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
+ free(m);
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ path = NULL; /* avoid freeing in cleanup */
+ j->current_invalidate_counter++;
+
+ log_debug("Directory %s added.", m->path);
+
+ } else if (m->is_root)
+ return 0; /* Don't 'downgrade' from root directory */
+
+ m->last_seen_generation = j->generation;
+
+ directory_watch(j, m, dirfd(d),
+ IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
+ IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
+ IN_ONLYDIR);
+
+ if (!j->no_new_files)
+ directory_enumerate(j, m, d);
+
+ check_network(j, dirfd(d));
+
+ return 0;
+
+fail:
+ k = journal_put_error(j, r, path ?: prefix);
+ if (k < 0)
+ return k;
+
+ return r;
+}
+
+static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ Directory *m;
+ int r, k;
+
+ assert(j);
+
+ /* Adds a root directory to our set of directories to use. If the root directory is already in the set, we
+ * update the inotify logic, and renumerate the directory entries. This call may hence be called to initially
+ * populate the set, as well as to update it later. */
+
+ if (p) {
+ /* If there's a path specified, use it. */
+
+ log_debug("Considering root directory '%s'.", p);
+
+ if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
+ !path_has_prefix(j, p, "/run"))
+ return -EINVAL;
+
+ if (j->prefix)
+ p = strjoina(j->prefix, p);
+
+ r = directory_open(j, p, &d);
+ if (r == -ENOENT && missing_ok)
+ return 0;
+ if (r < 0) {
+ log_debug_errno(r, "Failed to open root directory %s: %m", p);
+ goto fail;
+ }
+ } else {
+ _cleanup_close_ int dfd = -1;
+
+ /* If there's no path specified, then we use the top-level fd itself. We duplicate the fd here, since
+ * opendir() will take possession of the fd, and close it, which we don't want. */
+
+ p = "."; /* store this as "." in the directories hashmap */
+
+ dfd = fcntl(j->toplevel_fd, F_DUPFD_CLOEXEC, 3);
+ if (dfd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ d = take_fdopendir(&dfd);
+ if (!d) {
+ r = -errno;
+ goto fail;
+ }
+
+ rewinddir(d);
+ }
+
+ m = hashmap_get(j->directories_by_path, p);
+ if (!m) {
+ m = new0(Directory, 1);
+ if (!m) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ m->is_root = true;
+
+ m->path = strdup(p);
+ if (!m->path) {
+ free(m);
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
+ free(m->path);
+ free(m);
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ j->current_invalidate_counter++;
+
+ log_debug("Root directory %s added.", m->path);
+
+ } else if (!m->is_root)
+ return 0;
+
+ directory_watch(j, m, dirfd(d),
+ IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
+ IN_ONLYDIR);
+
+ if (!j->no_new_files)
+ directory_enumerate(j, m, d);
+
+ check_network(j, dirfd(d));
+
+ return 0;
+
+fail:
+ k = journal_put_error(j, r, p);
+ if (k < 0)
+ return k;
+
+ return r;
+}
+
+static void remove_directory(sd_journal *j, Directory *d) {
+ assert(j);
+
+ if (d->wd > 0) {
+ hashmap_remove(j->directories_by_wd, INT_TO_PTR(d->wd));
+
+ if (j->inotify_fd >= 0)
+ (void) inotify_rm_watch(j->inotify_fd, d->wd);
+ }
+
+ hashmap_remove(j->directories_by_path, d->path);
+
+ if (d->is_root)
+ log_debug("Root directory %s removed.", d->path);
+ else
+ log_debug("Directory %s removed.", d->path);
+
+ free(d->path);
+ free(d);
+}
+
+static int add_search_paths(sd_journal *j) {
+
+ static const char search_paths[] =
+ "/run/log/journal\0"
+ "/var/log/journal\0";
+ const char *p;
+
+ assert(j);
+
+ /* We ignore most errors here, since the idea is to only open
+ * what's actually accessible, and ignore the rest. */
+
+ NULSTR_FOREACH(p, search_paths)
+ (void) add_root_directory(j, p, true);
+
+ if (!(j->flags & SD_JOURNAL_LOCAL_ONLY))
+ (void) add_root_directory(j, "/var/log/journal/remote", true);
+
+ return 0;
+}
+
+static int add_current_paths(sd_journal *j) {
+ JournalFile *f;
+
+ assert(j);
+ assert(j->no_new_files);
+
+ /* Simply adds all directories for files we have open as directories. We don't expect errors here, so we
+ * treat them as fatal. */
+
+ ORDERED_HASHMAP_FOREACH(f, j->files) {
+ _cleanup_free_ char *dir;
+ int r;
+
+ dir = dirname_malloc(f->path);
+ if (!dir)
+ return -ENOMEM;
+
+ r = add_directory(j, dir, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int allocate_inotify(sd_journal *j) {
+ assert(j);
+
+ if (j->inotify_fd < 0) {
+ j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (j->inotify_fd < 0)
+ return -errno;
+ }
+
+ return hashmap_ensure_allocated(&j->directories_by_wd, NULL);
+}
+
+static sd_journal *journal_new(int flags, const char *path, const char *namespace) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+
+ j = new0(sd_journal, 1);
+ if (!j)
+ return NULL;
+
+ j->original_pid = getpid_cached();
+ j->toplevel_fd = -1;
+ j->inotify_fd = -1;
+ j->flags = flags;
+ j->data_threshold = DEFAULT_DATA_THRESHOLD;
+
+ if (path) {
+ char *t;
+
+ t = strdup(path);
+ if (!t)
+ return NULL;
+
+ if (flags & SD_JOURNAL_OS_ROOT)
+ j->prefix = t;
+ else
+ j->path = t;
+ }
+
+ if (namespace) {
+ j->namespace = strdup(namespace);
+ if (!j->namespace)
+ return NULL;
+ }
+
+ j->files = ordered_hashmap_new(&path_hash_ops);
+ if (!j->files)
+ return NULL;
+
+ j->files_cache = ordered_hashmap_iterated_cache_new(j->files);
+ j->directories_by_path = hashmap_new(&path_hash_ops);
+ j->mmap = mmap_cache_new();
+ if (!j->files_cache || !j->directories_by_path || !j->mmap)
+ return NULL;
+
+ return TAKE_PTR(j);
+}
+
+#define OPEN_ALLOWED_FLAGS \
+ (SD_JOURNAL_LOCAL_ONLY | \
+ SD_JOURNAL_RUNTIME_ONLY | \
+ SD_JOURNAL_SYSTEM | \
+ SD_JOURNAL_CURRENT_USER | \
+ SD_JOURNAL_ALL_NAMESPACES | \
+ SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE)
+
+_public_ int sd_journal_open_namespace(sd_journal **ret, const char *namespace, int flags) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return((flags & ~OPEN_ALLOWED_FLAGS) == 0, -EINVAL);
+
+ j = journal_new(flags, NULL, namespace);
+ if (!j)
+ return -ENOMEM;
+
+ r = add_search_paths(j);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(j);
+ return 0;
+}
+
+_public_ int sd_journal_open(sd_journal **ret, int flags) {
+ return sd_journal_open_namespace(ret, NULL, flags);
+}
+
+#define OPEN_CONTAINER_ALLOWED_FLAGS \
+ (SD_JOURNAL_LOCAL_ONLY | SD_JOURNAL_SYSTEM)
+
+_public_ int sd_journal_open_container(sd_journal **ret, const char *machine, int flags) {
+ _cleanup_free_ char *root = NULL, *class = NULL;
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ char *p;
+ int r;
+
+ /* This is deprecated, people should use machined's OpenMachineRootDirectory() call instead in
+ * combination with sd_journal_open_directory_fd(). */
+
+ assert_return(machine, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return((flags & ~OPEN_CONTAINER_ALLOWED_FLAGS) == 0, -EINVAL);
+ assert_return(machine_name_is_valid(machine), -EINVAL);
+
+ p = strjoina("/run/systemd/machines/", machine);
+ r = parse_env_file(NULL, p,
+ "ROOT", &root,
+ "CLASS", &class);
+ if (r == -ENOENT)
+ return -EHOSTDOWN;
+ if (r < 0)
+ return r;
+ if (!root)
+ return -ENODATA;
+
+ if (!streq_ptr(class, "container"))
+ return -EIO;
+
+ j = journal_new(flags, root, NULL);
+ if (!j)
+ return -ENOMEM;
+
+ r = add_search_paths(j);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(j);
+ return 0;
+}
+
+#define OPEN_DIRECTORY_ALLOWED_FLAGS \
+ (SD_JOURNAL_OS_ROOT | \
+ SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER )
+
+_public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(path, -EINVAL);
+ assert_return((flags & ~OPEN_DIRECTORY_ALLOWED_FLAGS) == 0, -EINVAL);
+
+ j = journal_new(flags, path, NULL);
+ if (!j)
+ return -ENOMEM;
+
+ if (flags & SD_JOURNAL_OS_ROOT)
+ r = add_search_paths(j);
+ else
+ r = add_root_directory(j, path, false);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(j);
+ return 0;
+}
+
+_public_ int sd_journal_open_files(sd_journal **ret, const char **paths, int flags) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ const char **path;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(flags == 0, -EINVAL);
+
+ j = journal_new(flags, NULL, NULL);
+ if (!j)
+ return -ENOMEM;
+
+ STRV_FOREACH(path, paths) {
+ r = add_any_file(j, -1, *path);
+ if (r < 0)
+ return r;
+ }
+
+ j->no_new_files = true;
+
+ *ret = TAKE_PTR(j);
+ return 0;
+}
+
+#define OPEN_DIRECTORY_FD_ALLOWED_FLAGS \
+ (SD_JOURNAL_OS_ROOT | \
+ SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER )
+
+_public_ int sd_journal_open_directory_fd(sd_journal **ret, int fd, int flags) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ struct stat st;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(fd >= 0, -EBADF);
+ assert_return((flags & ~OPEN_DIRECTORY_FD_ALLOWED_FLAGS) == 0, -EINVAL);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode))
+ return -EBADFD;
+
+ j = journal_new(flags, NULL, NULL);
+ if (!j)
+ return -ENOMEM;
+
+ j->toplevel_fd = fd;
+
+ if (flags & SD_JOURNAL_OS_ROOT)
+ r = add_search_paths(j);
+ else
+ r = add_root_directory(j, NULL, false);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(j);
+ return 0;
+}
+
+_public_ int sd_journal_open_files_fd(sd_journal **ret, int fds[], unsigned n_fds, int flags) {
+ JournalFile *f;
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ unsigned i;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(n_fds > 0, -EBADF);
+ assert_return(flags == 0, -EINVAL);
+
+ j = journal_new(flags, NULL, NULL);
+ if (!j)
+ return -ENOMEM;
+
+ for (i = 0; i < n_fds; i++) {
+ struct stat st;
+
+ if (fds[i] < 0) {
+ r = -EBADF;
+ goto fail;
+ }
+
+ if (fstat(fds[i], &st) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ goto fail;
+
+ r = add_any_file(j, fds[i], NULL);
+ if (r < 0)
+ goto fail;
+ }
+
+ j->no_new_files = true;
+ j->no_inotify = true;
+
+ *ret = TAKE_PTR(j);
+ return 0;
+
+fail:
+ /* If we fail, make sure we don't take possession of the files we managed to make use of successfully, and they
+ * remain open */
+ ORDERED_HASHMAP_FOREACH(f, j->files)
+ f->close_fd = false;
+
+ return r;
+}
+
+_public_ void sd_journal_close(sd_journal *j) {
+ Directory *d;
+
+ if (!j)
+ return;
+
+ sd_journal_flush_matches(j);
+
+ ordered_hashmap_free_with_destructor(j->files, journal_file_close);
+ iterated_cache_free(j->files_cache);
+
+ while ((d = hashmap_first(j->directories_by_path)))
+ remove_directory(j, d);
+
+ while ((d = hashmap_first(j->directories_by_wd)))
+ remove_directory(j, d);
+
+ hashmap_free(j->directories_by_path);
+ hashmap_free(j->directories_by_wd);
+
+ safe_close(j->inotify_fd);
+
+ if (j->mmap) {
+ log_debug("mmap cache statistics: %u hit, %u miss", mmap_cache_get_hit(j->mmap), mmap_cache_get_missed(j->mmap));
+ mmap_cache_unref(j->mmap);
+ }
+
+ hashmap_free_free(j->errors);
+
+ free(j->path);
+ free(j->prefix);
+ free(j->namespace);
+ free(j->unique_field);
+ free(j->fields_buffer);
+ free(j);
+}
+
+_public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) {
+ Object *o;
+ JournalFile *f;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(ret, -EINVAL);
+
+ f = j->current_file;
+ if (!f)
+ return -EADDRNOTAVAIL;
+
+ if (f->current_offset <= 0)
+ return -EADDRNOTAVAIL;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ *ret = le64toh(o->entry.realtime);
+ return 0;
+}
+
+_public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) {
+ Object *o;
+ JournalFile *f;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ f = j->current_file;
+ if (!f)
+ return -EADDRNOTAVAIL;
+
+ if (f->current_offset <= 0)
+ return -EADDRNOTAVAIL;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ if (ret_boot_id)
+ *ret_boot_id = o->entry.boot_id;
+ else {
+ sd_id128_t id;
+
+ r = sd_id128_get_boot(&id);
+ if (r < 0)
+ return r;
+
+ if (!sd_id128_equal(id, o->entry.boot_id))
+ return -ESTALE;
+ }
+
+ if (ret)
+ *ret = le64toh(o->entry.monotonic);
+
+ return 0;
+}
+
+static bool field_is_valid(const char *field) {
+ const char *p;
+
+ assert(field);
+
+ if (isempty(field))
+ return false;
+
+ if (startswith(field, "__"))
+ return false;
+
+ for (p = field; *p; p++) {
+
+ if (*p == '_')
+ continue;
+
+ if (*p >= 'A' && *p <= 'Z')
+ continue;
+
+ if (*p >= '0' && *p <= '9')
+ continue;
+
+ return false;
+ }
+
+ return true;
+}
+
+_public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) {
+ JournalFile *f;
+ uint64_t i, n;
+ size_t field_length;
+ int r;
+ Object *o;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(field, -EINVAL);
+ assert_return(data, -EINVAL);
+ assert_return(size, -EINVAL);
+ assert_return(field_is_valid(field), -EINVAL);
+
+ f = j->current_file;
+ if (!f)
+ return -EADDRNOTAVAIL;
+
+ if (f->current_offset <= 0)
+ return -EADDRNOTAVAIL;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ field_length = strlen(field);
+
+ n = journal_file_entry_n_items(o);
+ for (i = 0; i < n; i++) {
+ uint64_t p, l;
+ le64_t le_hash;
+ size_t t;
+ int compression;
+
+ p = le64toh(o->entry.items[i].object_offset);
+ le_hash = o->entry.items[i].hash;
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ if (le_hash != o->data.hash)
+ return -EBADMSG;
+
+ l = le64toh(o->object.size) - offsetof(Object, data.payload);
+
+ compression = o->object.flags & OBJECT_COMPRESSION_MASK;
+ if (compression) {
+#if HAVE_COMPRESSION
+ r = decompress_startswith(compression,
+ o->data.payload, l,
+ &f->compress_buffer, &f->compress_buffer_size,
+ field, field_length, '=');
+ if (r < 0)
+ log_debug_errno(r, "Cannot decompress %s object of length %"PRIu64" at offset "OFSfmt": %m",
+ object_compressed_to_string(compression), l, p);
+ else if (r > 0) {
+
+ size_t rsize;
+
+ r = decompress_blob(compression,
+ o->data.payload, l,
+ &f->compress_buffer, &f->compress_buffer_size, &rsize,
+ j->data_threshold);
+ if (r < 0)
+ return r;
+
+ *data = f->compress_buffer;
+ *size = (size_t) rsize;
+
+ return 0;
+ }
+#else
+ return -EPROTONOSUPPORT;
+#endif
+ } else if (l >= field_length+1 &&
+ memcmp(o->data.payload, field, field_length) == 0 &&
+ o->data.payload[field_length] == '=') {
+
+ t = (size_t) l;
+
+ if ((uint64_t) t != l)
+ return -E2BIG;
+
+ *data = o->data.payload;
+ *size = t;
+
+ return 0;
+ }
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0)
+ return r;
+ }
+
+ return -ENOENT;
+}
+
+static int return_data(sd_journal *j, JournalFile *f, Object *o, const void **data, size_t *size) {
+ size_t t;
+ uint64_t l;
+ int compression;
+
+ l = le64toh(READ_NOW(o->object.size));
+ if (l < offsetof(Object, data.payload))
+ return -EBADMSG;
+ l -= offsetof(Object, data.payload);
+ t = (size_t) l;
+
+ /* We can't read objects larger than 4G on a 32bit machine */
+ if ((uint64_t) t != l)
+ return -E2BIG;
+
+ compression = o->object.flags & OBJECT_COMPRESSION_MASK;
+ if (compression) {
+#if HAVE_COMPRESSION
+ size_t rsize;
+ int r;
+
+ r = decompress_blob(compression,
+ o->data.payload, l, &f->compress_buffer,
+ &f->compress_buffer_size, &rsize, j->data_threshold);
+ if (r < 0)
+ return r;
+
+ *data = f->compress_buffer;
+ *size = (size_t) rsize;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+ } else {
+ *data = o->data.payload;
+ *size = t;
+ }
+
+ return 0;
+}
+
+_public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) {
+ JournalFile *f;
+ uint64_t p, n;
+ le64_t le_hash;
+ int r;
+ Object *o;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(data, -EINVAL);
+ assert_return(size, -EINVAL);
+
+ f = j->current_file;
+ if (!f)
+ return -EADDRNOTAVAIL;
+
+ if (f->current_offset <= 0)
+ return -EADDRNOTAVAIL;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ n = journal_file_entry_n_items(o);
+ if (j->current_field >= n)
+ return 0;
+
+ p = le64toh(o->entry.items[j->current_field].object_offset);
+ le_hash = o->entry.items[j->current_field].hash;
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ if (le_hash != o->data.hash)
+ return -EBADMSG;
+
+ r = return_data(j, f, o, data, size);
+ if (r < 0)
+ return r;
+
+ j->current_field++;
+
+ return 1;
+}
+
+_public_ int sd_journal_enumerate_available_data(sd_journal *j, const void **data, size_t *size) {
+ for (;;) {
+ int r;
+
+ r = sd_journal_enumerate_data(j, data, size);
+ if (r >= 0)
+ return r;
+ if (!JOURNAL_ERRNO_IS_UNAVAILABLE_FIELD(r))
+ return r;
+ j->current_field++; /* Try with the next field */
+ }
+}
+
+_public_ void sd_journal_restart_data(sd_journal *j) {
+ if (!j)
+ return;
+
+ j->current_field = 0;
+}
+
+static int reiterate_all_paths(sd_journal *j) {
+ assert(j);
+
+ if (j->no_new_files)
+ return add_current_paths(j);
+
+ if (j->flags & SD_JOURNAL_OS_ROOT)
+ return add_search_paths(j);
+
+ if (j->toplevel_fd >= 0)
+ return add_root_directory(j, NULL, false);
+
+ if (j->path)
+ return add_root_directory(j, j->path, true);
+
+ return add_search_paths(j);
+}
+
+_public_ int sd_journal_get_fd(sd_journal *j) {
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ if (j->no_inotify)
+ return -EMEDIUMTYPE;
+
+ if (j->inotify_fd >= 0)
+ return j->inotify_fd;
+
+ r = allocate_inotify(j);
+ if (r < 0)
+ return r;
+
+ log_debug("Reiterating files to get inotify watches established.");
+
+ /* Iterate through all dirs again, to add them to the inotify */
+ r = reiterate_all_paths(j);
+ if (r < 0)
+ return r;
+
+ return j->inotify_fd;
+}
+
+_public_ int sd_journal_get_events(sd_journal *j) {
+ int fd;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ fd = sd_journal_get_fd(j);
+ if (fd < 0)
+ return fd;
+
+ return POLLIN;
+}
+
+_public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) {
+ int fd;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(timeout_usec, -EINVAL);
+
+ fd = sd_journal_get_fd(j);
+ if (fd < 0)
+ return fd;
+
+ if (!j->on_network) {
+ *timeout_usec = (uint64_t) -1;
+ return 0;
+ }
+
+ /* If we are on the network we need to regularly check for
+ * changes manually */
+
+ *timeout_usec = j->last_process_usec + JOURNAL_FILES_RECHECK_USEC;
+ return 1;
+}
+
+static void process_q_overflow(sd_journal *j) {
+ JournalFile *f;
+ Directory *m;
+
+ assert(j);
+
+ /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list
+ * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all
+ * journal files we encounter. All journal files and all directories that don't carry it after reenumeration
+ * are subject for unloading. */
+
+ log_debug("Inotify queue overrun, reiterating everything.");
+
+ j->generation++;
+ (void) reiterate_all_paths(j);
+
+ ORDERED_HASHMAP_FOREACH(f, j->files) {
+
+ if (f->last_seen_generation == j->generation)
+ continue;
+
+ log_debug("File '%s' hasn't been seen in this enumeration, removing.", f->path);
+ remove_file_real(j, f);
+ }
+
+ HASHMAP_FOREACH(m, j->directories_by_path) {
+
+ if (m->last_seen_generation == j->generation)
+ continue;
+
+ if (m->is_root) /* Never GC root directories */
+ continue;
+
+ log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f->path);
+ remove_directory(j, m);
+ }
+
+ log_debug("Reiteration complete.");
+}
+
+static void process_inotify_event(sd_journal *j, const struct inotify_event *e) {
+ Directory *d;
+
+ assert(j);
+ assert(e);
+
+ if (e->mask & IN_Q_OVERFLOW) {
+ process_q_overflow(j);
+ return;
+ }
+
+ /* Is this a subdirectory we watch? */
+ d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
+ if (d) {
+ if (!(e->mask & IN_ISDIR) && e->len > 0 &&
+ (endswith(e->name, ".journal") ||
+ endswith(e->name, ".journal~"))) {
+
+ /* Event for a journal file */
+
+ if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB))
+ (void) add_file_by_name(j, d->path, e->name);
+ else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT))
+ remove_file_by_name(j, d->path, e->name);
+
+ } else if (!d->is_root && e->len == 0) {
+
+ /* Event for a subdirectory */
+
+ if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT))
+ remove_directory(j, d);
+
+ } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && id128_is_valid(e->name)) {
+
+ /* Event for root directory */
+
+ if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB))
+ (void) add_directory(j, d->path, e->name);
+ }
+
+ return;
+ }
+
+ if (e->mask & IN_IGNORED)
+ return;
+
+ log_debug("Unexpected inotify event.");
+}
+
+static int determine_change(sd_journal *j) {
+ bool b;
+
+ assert(j);
+
+ b = j->current_invalidate_counter != j->last_invalidate_counter;
+ j->last_invalidate_counter = j->current_invalidate_counter;
+
+ return b ? SD_JOURNAL_INVALIDATE : SD_JOURNAL_APPEND;
+}
+
+_public_ int sd_journal_process(sd_journal *j) {
+ bool got_something = false;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ if (j->inotify_fd < 0) /* We have no inotify fd yet? Then there's noting to process. */
+ return 0;
+
+ j->last_process_usec = now(CLOCK_MONOTONIC);
+ j->last_invalidate_counter = j->current_invalidate_counter;
+
+ for (;;) {
+ union inotify_event_buffer buffer;
+ struct inotify_event *e;
+ ssize_t l;
+
+ l = read(j->inotify_fd, &buffer, sizeof(buffer));
+ if (l < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return got_something ? determine_change(j) : SD_JOURNAL_NOP;
+
+ return -errno;
+ }
+
+ got_something = true;
+
+ FOREACH_INOTIFY_EVENT(e, buffer, l)
+ process_inotify_event(j, e);
+ }
+}
+
+_public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) {
+ int r;
+ uint64_t t;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ if (j->inotify_fd < 0) {
+ JournalFile *f;
+
+ /* This is the first invocation, hence create the
+ * inotify watch */
+ r = sd_journal_get_fd(j);
+ if (r < 0)
+ return r;
+
+ /* Server might have done some vacuuming while we weren't watching.
+ Get rid of the deleted files now so they don't stay around indefinitely. */
+ ORDERED_HASHMAP_FOREACH(f, j->files) {
+ r = journal_file_fstat(f);
+ if (r == -EIDRM)
+ remove_file_real(j, f);
+ else if (r < 0) {
+ log_debug_errno(r,"Failed to fstat() journal file '%s' : %m", f->path);
+ continue;
+ }
+ }
+
+ /* The journal might have changed since the context
+ * object was created and we weren't watching before,
+ * hence don't wait for anything, and return
+ * immediately. */
+ return determine_change(j);
+ }
+
+ r = sd_journal_get_timeout(j, &t);
+ if (r < 0)
+ return r;
+
+ if (t != (uint64_t) -1) {
+ usec_t n;
+
+ n = now(CLOCK_MONOTONIC);
+ t = t > n ? t - n : 0;
+
+ if (timeout_usec == (uint64_t) -1 || timeout_usec > t)
+ timeout_usec = t;
+ }
+
+ do {
+ r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec);
+ } while (r == -EINTR);
+
+ if (r < 0)
+ return r;
+
+ return sd_journal_process(j);
+}
+
+_public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) {
+ JournalFile *f;
+ bool first = true;
+ uint64_t fmin = 0, tmax = 0;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(from || to, -EINVAL);
+ assert_return(from != to, -EINVAL);
+
+ ORDERED_HASHMAP_FOREACH(f, j->files) {
+ usec_t fr, t;
+
+ r = journal_file_get_cutoff_realtime_usec(f, &fr, &t);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (first) {
+ fmin = fr;
+ tmax = t;
+ first = false;
+ } else {
+ fmin = MIN(fr, fmin);
+ tmax = MAX(t, tmax);
+ }
+ }
+
+ if (from)
+ *from = fmin;
+ if (to)
+ *to = tmax;
+
+ return first ? 0 : 1;
+}
+
+_public_ int sd_journal_get_cutoff_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t *from, uint64_t *to) {
+ JournalFile *f;
+ bool found = false;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(from || to, -EINVAL);
+ assert_return(from != to, -EINVAL);
+
+ ORDERED_HASHMAP_FOREACH(f, j->files) {
+ usec_t fr, t;
+
+ r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &fr, &t);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (found) {
+ if (from)
+ *from = MIN(fr, *from);
+ if (to)
+ *to = MAX(t, *to);
+ } else {
+ if (from)
+ *from = fr;
+ if (to)
+ *to = t;
+ found = true;
+ }
+ }
+
+ return found;
+}
+
+void journal_print_header(sd_journal *j) {
+ JournalFile *f;
+ bool newline = false;
+
+ assert(j);
+
+ ORDERED_HASHMAP_FOREACH(f, j->files) {
+ if (newline)
+ putchar('\n');
+ else
+ newline = true;
+
+ journal_file_print_header(f);
+ }
+}
+
+_public_ int sd_journal_get_usage(sd_journal *j, uint64_t *bytes) {
+ JournalFile *f;
+ uint64_t sum = 0;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(bytes, -EINVAL);
+
+ ORDERED_HASHMAP_FOREACH(f, j->files) {
+ struct stat st;
+
+ if (fstat(f->fd, &st) < 0)
+ return -errno;
+
+ sum += (uint64_t) st.st_blocks * 512ULL;
+ }
+
+ *bytes = sum;
+ return 0;
+}
+
+_public_ int sd_journal_query_unique(sd_journal *j, const char *field) {
+ char *f;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(!isempty(field), -EINVAL);
+ assert_return(field_is_valid(field), -EINVAL);
+
+ f = strdup(field);
+ if (!f)
+ return -ENOMEM;
+
+ free(j->unique_field);
+ j->unique_field = f;
+ j->unique_file = NULL;
+ j->unique_offset = 0;
+ j->unique_file_lost = false;
+
+ return 0;
+}
+
+_public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l) {
+ size_t k;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(data, -EINVAL);
+ assert_return(l, -EINVAL);
+ assert_return(j->unique_field, -EINVAL);
+
+ k = strlen(j->unique_field);
+
+ if (!j->unique_file) {
+ if (j->unique_file_lost)
+ return 0;
+
+ j->unique_file = ordered_hashmap_first(j->files);
+ if (!j->unique_file)
+ return 0;
+
+ j->unique_offset = 0;
+ }
+
+ for (;;) {
+ JournalFile *of;
+ Object *o;
+ const void *odata;
+ size_t ol;
+ bool found;
+ int r;
+
+ /* Proceed to next data object in the field's linked list */
+ if (j->unique_offset == 0) {
+ r = journal_file_find_field_object(j->unique_file, j->unique_field, k, &o, NULL);
+ if (r < 0)
+ return r;
+
+ j->unique_offset = r > 0 ? le64toh(o->field.head_data_offset) : 0;
+ } else {
+ r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
+ if (r < 0)
+ return r;
+
+ j->unique_offset = le64toh(o->data.next_field_offset);
+ }
+
+ /* We reached the end of the list? Then start again, with the next file */
+ if (j->unique_offset == 0) {
+ j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
+ if (!j->unique_file)
+ return 0;
+
+ continue;
+ }
+
+ /* We do not use OBJECT_DATA context here, but OBJECT_UNUSED
+ * instead, so that we can look at this data object at the same
+ * time as one on another file */
+ r = journal_file_move_to_object(j->unique_file, OBJECT_UNUSED, j->unique_offset, &o);
+ if (r < 0)
+ return r;
+
+ /* Let's do the type check by hand, since we used 0 context above. */
+ if (o->object.type != OBJECT_DATA)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "%s:offset " OFSfmt ": object has type %d, expected %d",
+ j->unique_file->path,
+ j->unique_offset,
+ o->object.type, OBJECT_DATA);
+
+ r = return_data(j, j->unique_file, o, &odata, &ol);
+ if (r < 0)
+ return r;
+
+ /* Check if we have at least the field name and "=". */
+ if (ol <= k)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "%s:offset " OFSfmt ": object has size %zu, expected at least %zu",
+ j->unique_file->path,
+ j->unique_offset, ol, k + 1);
+
+ if (memcmp(odata, j->unique_field, k) || ((const char*) odata)[k] != '=')
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "%s:offset " OFSfmt ": object does not start with \"%s=\"",
+ j->unique_file->path,
+ j->unique_offset,
+ j->unique_field);
+
+ /* OK, now let's see if we already returned this data
+ * object by checking if it exists in the earlier
+ * traversed files. */
+ found = false;
+ ORDERED_HASHMAP_FOREACH(of, j->files) {
+ if (of == j->unique_file)
+ break;
+
+ /* Skip this file it didn't have any fields indexed */
+ if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
+ continue;
+
+ r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), NULL, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found)
+ continue;
+
+ r = return_data(j, j->unique_file, o, data, l);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+}
+
+_public_ int sd_journal_enumerate_available_unique(sd_journal *j, const void **data, size_t *size) {
+ for (;;) {
+ int r;
+
+ r = sd_journal_enumerate_unique(j, data, size);
+ if (r >= 0)
+ return r;
+ if (!JOURNAL_ERRNO_IS_UNAVAILABLE_FIELD(r))
+ return r;
+ /* Try with the next field. sd_journal_enumerate_unique() modifies state, so on the next try
+ * we will access the next field. */
+ }
+}
+
+_public_ void sd_journal_restart_unique(sd_journal *j) {
+ if (!j)
+ return;
+
+ j->unique_file = NULL;
+ j->unique_offset = 0;
+ j->unique_file_lost = false;
+}
+
+_public_ int sd_journal_enumerate_fields(sd_journal *j, const char **field) {
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(field, -EINVAL);
+
+ if (!j->fields_file) {
+ if (j->fields_file_lost)
+ return 0;
+
+ j->fields_file = ordered_hashmap_first(j->files);
+ if (!j->fields_file)
+ return 0;
+
+ j->fields_hash_table_index = 0;
+ j->fields_offset = 0;
+ }
+
+ for (;;) {
+ JournalFile *f, *of;
+ uint64_t m;
+ Object *o;
+ size_t sz;
+ bool found;
+
+ f = j->fields_file;
+
+ if (j->fields_offset == 0) {
+ bool eof = false;
+
+ /* We are not yet positioned at any field. Let's pick the first one */
+ r = journal_file_map_field_hash_table(f);
+ if (r < 0)
+ return r;
+
+ m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
+ for (;;) {
+ if (j->fields_hash_table_index >= m) {
+ /* Reached the end of the hash table, go to the next file. */
+ eof = true;
+ break;
+ }
+
+ j->fields_offset = le64toh(f->field_hash_table[j->fields_hash_table_index].head_hash_offset);
+
+ if (j->fields_offset != 0)
+ break;
+
+ /* Empty hash table bucket, go to next one */
+ j->fields_hash_table_index++;
+ }
+
+ if (eof) {
+ /* Proceed with next file */
+ j->fields_file = ordered_hashmap_next(j->files, f->path);
+ if (!j->fields_file) {
+ *field = NULL;
+ return 0;
+ }
+
+ j->fields_offset = 0;
+ j->fields_hash_table_index = 0;
+ continue;
+ }
+
+ } else {
+ /* We are already positioned at a field. If so, let's figure out the next field from it */
+
+ r = journal_file_move_to_object(f, OBJECT_FIELD, j->fields_offset, &o);
+ if (r < 0)
+ return r;
+
+ j->fields_offset = le64toh(o->field.next_hash_offset);
+ if (j->fields_offset == 0) {
+ /* Reached the end of the hash table chain */
+ j->fields_hash_table_index++;
+ continue;
+ }
+ }
+
+ /* We use OBJECT_UNUSED here, so that the iterator below doesn't remove our mmap window */
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, j->fields_offset, &o);
+ if (r < 0)
+ return r;
+
+ /* Because we used OBJECT_UNUSED above, we need to do our type check manually */
+ if (o->object.type != OBJECT_FIELD)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "%s:offset " OFSfmt ": object has type %i, expected %i",
+ f->path, j->fields_offset,
+ o->object.type, OBJECT_FIELD);
+
+ sz = le64toh(o->object.size) - offsetof(Object, field.payload);
+
+ /* Let's see if we already returned this field name before. */
+ found = false;
+ ORDERED_HASHMAP_FOREACH(of, j->files) {
+ if (of == f)
+ break;
+
+ /* Skip this file it didn't have any fields indexed */
+ if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
+ continue;
+
+ r = journal_file_find_field_object_with_hash(of, o->field.payload, sz, le64toh(o->field.hash), NULL, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found)
+ continue;
+
+ /* Check if this is really a valid string containing no NUL byte */
+ if (memchr(o->field.payload, 0, sz))
+ return -EBADMSG;
+
+ if (sz > j->data_threshold)
+ sz = j->data_threshold;
+
+ if (!GREEDY_REALLOC(j->fields_buffer, j->fields_buffer_allocated, sz + 1))
+ return -ENOMEM;
+
+ memcpy(j->fields_buffer, o->field.payload, sz);
+ j->fields_buffer[sz] = 0;
+
+ if (!field_is_valid(j->fields_buffer))
+ return -EBADMSG;
+
+ *field = j->fields_buffer;
+ return 1;
+ }
+}
+
+_public_ void sd_journal_restart_fields(sd_journal *j) {
+ if (!j)
+ return;
+
+ j->fields_file = NULL;
+ j->fields_hash_table_index = 0;
+ j->fields_offset = 0;
+ j->fields_file_lost = false;
+}
+
+_public_ int sd_journal_reliable_fd(sd_journal *j) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ return !j->on_network;
+}
+
+static char *lookup_field(const char *field, void *userdata) {
+ sd_journal *j = userdata;
+ const void *data;
+ size_t size, d;
+ int r;
+
+ assert(field);
+ assert(j);
+
+ r = sd_journal_get_data(j, field, &data, &size);
+ if (r < 0 ||
+ size > REPLACE_VAR_MAX)
+ return strdup(field);
+
+ d = strlen(field) + 1;
+
+ return strndup((const char*) data + d, size - d);
+}
+
+_public_ int sd_journal_get_catalog(sd_journal *j, char **ret) {
+ const void *data;
+ size_t size;
+ sd_id128_t id;
+ _cleanup_free_ char *text = NULL, *cid = NULL;
+ char *t;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(ret, -EINVAL);
+
+ r = sd_journal_get_data(j, "MESSAGE_ID", &data, &size);
+ if (r < 0)
+ return r;
+
+ cid = strndup((const char*) data + 11, size - 11);
+ if (!cid)
+ return -ENOMEM;
+
+ r = sd_id128_from_string(cid, &id);
+ if (r < 0)
+ return r;
+
+ r = catalog_get(CATALOG_DATABASE, id, &text);
+ if (r < 0)
+ return r;
+
+ t = replace_var(text, lookup_field, j);
+ if (!t)
+ return -ENOMEM;
+
+ *ret = t;
+ return 0;
+}
+
+_public_ int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **ret) {
+ assert_return(ret, -EINVAL);
+
+ return catalog_get(CATALOG_DATABASE, id, ret);
+}
+
+_public_ int sd_journal_set_data_threshold(sd_journal *j, size_t sz) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+
+ j->data_threshold = sz;
+ return 0;
+}
+
+_public_ int sd_journal_get_data_threshold(sd_journal *j, size_t *sz) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_pid_changed(j), -ECHILD);
+ assert_return(sz, -EINVAL);
+
+ *sz = j->data_threshold;
+ return 0;
+}
+
+_public_ int sd_journal_has_runtime_files(sd_journal *j) {
+ assert_return(j, -EINVAL);
+
+ return j->has_runtime_files;
+}
+
+_public_ int sd_journal_has_persistent_files(sd_journal *j) {
+ assert_return(j, -EINVAL);
+
+ return j->has_persistent_files;
+}
diff --git a/src/journal/test-audit-type.c b/src/journal/test-audit-type.c
new file mode 100644
index 0000000..5adbf0d
--- /dev/null
+++ b/src/journal/test-audit-type.c
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+#include <linux/audit.h>
+
+#include "audit-type.h"
+
+static void print_audit_label(int i) {
+ const char *name;
+
+ name = audit_type_name_alloca(i);
+ /* This is a separate function only because of alloca */
+ printf("%i → %s → %s\n", i, audit_type_to_string(i), name);
+}
+
+static void test_audit_type(void) {
+ int i;
+
+ for (i = 0; i <= AUDIT_KERNEL; i++)
+ print_audit_label(i);
+}
+
+int main(int argc, char **argv) {
+ test_audit_type();
+ return 0;
+}
diff --git a/src/journal/test-catalog.c b/src/journal/test-catalog.c
new file mode 100644
index 0000000..982fec0
--- /dev/null
+++ b/src/journal/test-catalog.c
@@ -0,0 +1,235 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <locale.h>
+#include <unistd.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "catalog.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+static char** catalog_dirs = NULL;
+static const char *no_catalog_dirs[] = {
+ "/bin/hopefully/with/no/catalog",
+ NULL
+};
+
+static OrderedHashmap* test_import(const char* contents, ssize_t size, int code) {
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-catalog.XXXXXX";
+ _cleanup_close_ int fd;
+ OrderedHashmap *h;
+
+ if (size < 0)
+ size = strlen(contents);
+
+ assert_se(h = ordered_hashmap_new(&catalog_hash_ops));
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(write(fd, contents, size) == size);
+
+ assert_se(catalog_import_file(h, name) == code);
+
+ return h;
+}
+
+static void test_catalog_import_invalid(void) {
+ _cleanup_ordered_hashmap_free_free_free_ OrderedHashmap *h = NULL;
+
+ h = test_import("xxx", -1, -EINVAL);
+ assert_se(ordered_hashmap_isempty(h));
+}
+
+static void test_catalog_import_badid(void) {
+ _cleanup_ordered_hashmap_free_free_free_ OrderedHashmap *h = NULL;
+ const char *input =
+"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededede\n" \
+"Subject: message\n" \
+"\n" \
+"payload\n";
+ h = test_import(input, -1, -EINVAL);
+}
+
+static void test_catalog_import_one(void) {
+ _cleanup_ordered_hashmap_free_free_free_ OrderedHashmap *h = NULL;
+ char *payload;
+
+ const char *input =
+"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \
+"Subject: message\n" \
+"\n" \
+"payload\n";
+ const char *expect =
+"Subject: message\n" \
+"\n" \
+"payload\n";
+
+ h = test_import(input, -1, 0);
+ assert_se(ordered_hashmap_size(h) == 1);
+
+ ORDERED_HASHMAP_FOREACH(payload, h) {
+ printf("expect: %s\n", expect);
+ printf("actual: %s\n", payload);
+ assert_se(streq(expect, payload));
+ }
+}
+
+static void test_catalog_import_merge(void) {
+ _cleanup_ordered_hashmap_free_free_free_ OrderedHashmap *h = NULL;
+ char *payload;
+
+ const char *input =
+"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \
+"Subject: message\n" \
+"Defined-By: me\n" \
+"\n" \
+"payload\n" \
+"\n" \
+"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \
+"Subject: override subject\n" \
+"X-Header: hello\n" \
+"\n" \
+"override payload\n";
+
+ const char *combined =
+"Subject: override subject\n" \
+"X-Header: hello\n" \
+"Subject: message\n" \
+"Defined-By: me\n" \
+"\n" \
+"override payload\n";
+
+ h = test_import(input, -1, 0);
+ assert_se(ordered_hashmap_size(h) == 1);
+
+ ORDERED_HASHMAP_FOREACH(payload, h)
+ assert_se(streq(combined, payload));
+}
+
+static void test_catalog_import_merge_no_body(void) {
+ _cleanup_ordered_hashmap_free_free_free_ OrderedHashmap *h = NULL;
+ char *payload;
+
+ const char *input =
+"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \
+"Subject: message\n" \
+"Defined-By: me\n" \
+"\n" \
+"payload\n" \
+"\n" \
+"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \
+"Subject: override subject\n" \
+"X-Header: hello\n" \
+"\n";
+
+ const char *combined =
+"Subject: override subject\n" \
+"X-Header: hello\n" \
+"Subject: message\n" \
+"Defined-By: me\n" \
+"\n" \
+"payload\n";
+
+ h = test_import(input, -1, 0);
+ assert_se(ordered_hashmap_size(h) == 1);
+
+ ORDERED_HASHMAP_FOREACH(payload, h)
+ assert_se(streq(combined, payload));
+}
+
+static void test_catalog_update(const char *database) {
+ int r;
+
+ /* Test what happens if there are no files. */
+ r = catalog_update(database, NULL, NULL);
+ assert_se(r == 0);
+
+ /* Test what happens if there are no files in the directory. */
+ r = catalog_update(database, NULL, no_catalog_dirs);
+ assert_se(r == 0);
+
+ /* Make sure that we at least have some files loaded or the
+ * catalog_list below will fail. */
+ r = catalog_update(database, NULL, (const char * const *) catalog_dirs);
+ assert_se(r == 0);
+}
+
+static void test_catalog_file_lang(void) {
+ _cleanup_free_ char *lang = NULL, *lang2 = NULL, *lang3 = NULL, *lang4 = NULL;
+
+ assert_se(catalog_file_lang("systemd.de_DE.catalog", &lang) == 1);
+ assert_se(streq(lang, "de_DE"));
+
+ assert_se(catalog_file_lang("systemd..catalog", &lang2) == 0);
+ assert_se(lang2 == NULL);
+
+ assert_se(catalog_file_lang("systemd.fr.catalog", &lang2) == 1);
+ assert_se(streq(lang2, "fr"));
+
+ assert_se(catalog_file_lang("systemd.fr.catalog.gz", &lang3) == 0);
+ assert_se(lang3 == NULL);
+
+ assert_se(catalog_file_lang("systemd.01234567890123456789012345678901.catalog", &lang3) == 0);
+ assert_se(lang3 == NULL);
+
+ assert_se(catalog_file_lang("systemd.0123456789012345678901234567890.catalog", &lang3) == 1);
+ assert_se(streq(lang3, "0123456789012345678901234567890"));
+
+ assert_se(catalog_file_lang("/x/y/systemd.catalog", &lang4) == 0);
+ assert_se(lang4 == NULL);
+
+ assert_se(catalog_file_lang("/x/y/systemd.ru_RU.catalog", &lang4) == 1);
+ assert_se(streq(lang4, "ru_RU"));
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(unlink_tempfilep) char database[] = "/tmp/test-catalog.XXXXXX";
+ _cleanup_free_ char *text = NULL;
+ int r;
+
+ setlocale(LC_ALL, "de_DE.UTF-8");
+
+ test_setup_logging(LOG_DEBUG);
+
+ /* If test-catalog is located at the build directory, then use catalogs in that.
+ * If it is not, e.g. installed by systemd-tests package, then use installed catalogs. */
+ catalog_dirs = STRV_MAKE(get_catalog_dir());
+
+ assert_se(access(catalog_dirs[0], F_OK) >= 0);
+ log_notice("Using catalog directory '%s'", catalog_dirs[0]);
+
+ test_catalog_file_lang();
+
+ test_catalog_import_invalid();
+ test_catalog_import_badid();
+ test_catalog_import_one();
+ test_catalog_import_merge();
+ test_catalog_import_merge_no_body();
+
+ assert_se(mkostemp_safe(database) >= 0);
+
+ test_catalog_update(database);
+
+ r = catalog_list(stdout, database, true);
+ assert_se(r >= 0);
+
+ r = catalog_list(stdout, database, false);
+ assert_se(r >= 0);
+
+ assert_se(catalog_get(database, SD_MESSAGE_COREDUMP, &text) >= 0);
+ printf(">>>%s<<<\n", text);
+
+ return 0;
+}
diff --git a/src/journal/test-compress-benchmark.c b/src/journal/test-compress-benchmark.c
new file mode 100644
index 0000000..0019760
--- /dev/null
+++ b/src/journal/test-compress-benchmark.c
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "compress.h"
+#include "env-util.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "nulstr-util.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "string-util.h"
+#include "tests.h"
+
+typedef int (compress_t)(const void *src, uint64_t src_size, void *dst,
+ size_t dst_alloc_size, size_t *dst_size);
+typedef int (decompress_t)(const void *src, uint64_t src_size,
+ void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max);
+
+#if HAVE_COMPRESSION
+
+static usec_t arg_duration;
+static size_t arg_start;
+
+#define MAX_SIZE (1024*1024LU)
+#define PRIME 1048571 /* A prime close enough to one megabyte that mod 4 == 3 */
+
+static size_t _permute(size_t x) {
+ size_t residue;
+
+ if (x >= PRIME)
+ return x;
+
+ residue = x*x % PRIME;
+ if (x <= PRIME / 2)
+ return residue;
+ else
+ return PRIME - residue;
+}
+
+static size_t permute(size_t x) {
+ return _permute((_permute(x) + arg_start) % MAX_SIZE ^ 0xFF345);
+}
+
+static char* make_buf(size_t count, const char *type) {
+ char *buf;
+ size_t i;
+
+ buf = malloc(count);
+ assert_se(buf);
+
+ if (streq(type, "zeros"))
+ memzero(buf, count);
+ else if (streq(type, "simple"))
+ for (i = 0; i < count; i++)
+ buf[i] = 'a' + i % ('z' - 'a' + 1);
+ else if (streq(type, "random")) {
+ size_t step = count / 10;
+
+ random_bytes(buf, step);
+ memzero(buf + 1*step, step);
+ random_bytes(buf + 2*step, step);
+ memzero(buf + 3*step, step);
+ random_bytes(buf + 4*step, step);
+ memzero(buf + 5*step, step);
+ random_bytes(buf + 6*step, step);
+ memzero(buf + 7*step, step);
+ random_bytes(buf + 8*step, step);
+ memzero(buf + 9*step, step);
+ } else
+ assert_not_reached("here");
+
+ return buf;
+}
+
+static void test_compress_decompress(const char* label, const char* type,
+ compress_t compress, decompress_t decompress) {
+ usec_t n, n2 = 0;
+ float dt;
+
+ _cleanup_free_ char *text, *buf;
+ _cleanup_free_ void *buf2 = NULL;
+ size_t buf2_allocated = 0;
+ size_t skipped = 0, compressed = 0, total = 0;
+
+ text = make_buf(MAX_SIZE, type);
+ buf = calloc(MAX_SIZE + 1, 1);
+ assert_se(text && buf);
+
+ n = now(CLOCK_MONOTONIC);
+
+ for (size_t i = 0; i <= MAX_SIZE; i++) {
+ size_t j = 0, k = 0, size;
+ int r;
+
+ size = permute(i);
+ if (size == 0)
+ continue;
+
+ log_debug("%s %zu %zu", type, i, size);
+
+ memzero(buf, MIN(size + 1000, MAX_SIZE));
+
+ r = compress(text, size, buf, size, &j);
+ /* assume compression must be successful except for small or random inputs */
+ assert_se(r == 0 || (size < 2048 && r == -ENOBUFS) || streq(type, "random"));
+
+ /* check for overwrites */
+ assert_se(buf[size] == 0);
+ if (r != 0) {
+ skipped += size;
+ continue;
+ }
+
+ assert_se(j > 0);
+ if (j >= size)
+ log_error("%s \"compressed\" %zu -> %zu", label, size, j);
+
+ r = decompress(buf, j, &buf2, &buf2_allocated, &k, 0);
+ assert_se(r == 0);
+ assert_se(buf2_allocated >= k);
+ assert_se(k == size);
+
+ assert_se(memcmp(text, buf2, size) == 0);
+
+ total += size;
+ compressed += j;
+
+ n2 = now(CLOCK_MONOTONIC);
+ if (n2 - n > arg_duration)
+ break;
+ }
+
+ dt = (n2-n) / 1e6;
+
+ log_info("%s/%s: compressed & decompressed %zu bytes in %.2fs (%.2fMiB/s), "
+ "mean compression %.2f%%, skipped %zu bytes",
+ label, type, total, dt,
+ total / 1024. / 1024 / dt,
+ 100 - compressed * 100. / total,
+ skipped);
+}
+#endif
+
+int main(int argc, char *argv[]) {
+#if HAVE_COMPRESSION
+ test_setup_logging(LOG_INFO);
+
+ if (argc >= 2) {
+ unsigned x;
+
+ assert_se(safe_atou(argv[1], &x) >= 0);
+ arg_duration = x * USEC_PER_SEC;
+ } else
+ arg_duration = slow_tests_enabled() ?
+ 2 * USEC_PER_SEC : USEC_PER_SEC / 50;
+
+ if (argc == 3)
+ (void) safe_atozu(argv[2], &arg_start);
+ else
+ arg_start = getpid_cached();
+
+ const char *i;
+ NULSTR_FOREACH(i, "zeros\0simple\0random\0") {
+#if HAVE_XZ
+ test_compress_decompress("XZ", i, compress_blob_xz, decompress_blob_xz);
+#endif
+#if HAVE_LZ4
+ test_compress_decompress("LZ4", i, compress_blob_lz4, decompress_blob_lz4);
+#endif
+#if HAVE_ZSTD
+ test_compress_decompress("ZSTD", i, compress_blob_zstd, decompress_blob_zstd);
+#endif
+ }
+ return 0;
+#else
+ return log_tests_skipped("No compression feature is enabled");
+#endif
+}
diff --git a/src/journal/test-compress.c b/src/journal/test-compress.c
new file mode 100644
index 0000000..ccd4605
--- /dev/null
+++ b/src/journal/test-compress.c
@@ -0,0 +1,372 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/stat.h>
+
+#if HAVE_LZ4
+#include <lz4.h>
+#endif
+
+#include "alloc-util.h"
+#include "compress.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "path-util.h"
+#include "random-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+
+#if HAVE_XZ
+# define XZ_OK 0
+#else
+# define XZ_OK -EPROTONOSUPPORT
+#endif
+
+#if HAVE_LZ4
+# define LZ4_OK 0
+#else
+# define LZ4_OK -EPROTONOSUPPORT
+#endif
+
+#define HUGE_SIZE (4096*1024)
+
+typedef int (compress_blob_t)(const void *src, uint64_t src_size,
+ void *dst, size_t dst_alloc_size, size_t *dst_size);
+typedef int (decompress_blob_t)(const void *src, uint64_t src_size,
+ void **dst, size_t *dst_alloc_size,
+ size_t* dst_size, size_t dst_max);
+typedef int (decompress_sw_t)(const void *src, uint64_t src_size,
+ void **buffer, size_t *buffer_size,
+ const void *prefix, size_t prefix_len,
+ uint8_t extra);
+
+typedef int (compress_stream_t)(int fdf, int fdt, uint64_t max_bytes);
+typedef int (decompress_stream_t)(int fdf, int fdt, uint64_t max_size);
+
+#if HAVE_COMPRESSION
+_unused_ static void test_compress_decompress(const char *compression,
+ compress_blob_t compress,
+ decompress_blob_t decompress,
+ const char *data,
+ size_t data_len,
+ bool may_fail) {
+ char compressed[512];
+ size_t csize, usize = 0;
+ _cleanup_free_ char *decompressed = NULL;
+ int r;
+
+ log_info("/* testing %s %s blob compression/decompression */",
+ compression, data);
+
+ r = compress(data, data_len, compressed, sizeof(compressed), &csize);
+ if (r == -ENOBUFS) {
+ log_info_errno(r, "compression failed: %m");
+ assert_se(may_fail);
+ } else {
+ assert_se(r == 0);
+ r = decompress(compressed, csize,
+ (void **) &decompressed, &usize, &csize, 0);
+ assert_se(r == 0);
+ assert_se(decompressed);
+ assert_se(memcmp(decompressed, data, data_len) == 0);
+ }
+
+ r = decompress("garbage", 7,
+ (void **) &decompressed, &usize, &csize, 0);
+ assert_se(r < 0);
+
+ /* make sure to have the minimal lz4 compressed size */
+ r = decompress("00000000\1g", 9,
+ (void **) &decompressed, &usize, &csize, 0);
+ assert_se(r < 0);
+
+ r = decompress("\100000000g", 9,
+ (void **) &decompressed, &usize, &csize, 0);
+ assert_se(r < 0);
+
+ memzero(decompressed, usize);
+}
+
+_unused_ static void test_decompress_startswith(const char *compression,
+ compress_blob_t compress,
+ decompress_sw_t decompress_sw,
+ const char *data,
+ size_t data_len,
+ bool may_fail) {
+
+ char *compressed;
+ _cleanup_free_ char *compressed1 = NULL, *compressed2 = NULL, *decompressed = NULL;
+ size_t csize, usize = 0, len;
+ int r;
+
+ log_info("/* testing decompress_startswith with %s on %.20s text */",
+ compression, data);
+
+#define BUFSIZE_1 512
+#define BUFSIZE_2 20000
+
+ compressed = compressed1 = malloc(BUFSIZE_1);
+ assert_se(compressed1);
+ r = compress(data, data_len, compressed, BUFSIZE_1, &csize);
+ if (r == -ENOBUFS) {
+ log_info_errno(r, "compression failed: %m");
+ assert_se(may_fail);
+
+ compressed = compressed2 = malloc(BUFSIZE_2);
+ assert_se(compressed2);
+ r = compress(data, data_len, compressed, BUFSIZE_2, &csize);
+ assert(r == 0);
+ }
+ assert_se(r == 0);
+
+ len = strlen(data);
+
+ r = decompress_sw(compressed, csize, (void **) &decompressed, &usize, data, len, '\0');
+ assert_se(r > 0);
+ r = decompress_sw(compressed, csize, (void **) &decompressed, &usize, data, len, 'w');
+ assert_se(r == 0);
+ r = decompress_sw(compressed, csize, (void **) &decompressed, &usize, "barbarbar", 9, ' ');
+ assert_se(r == 0);
+ r = decompress_sw(compressed, csize, (void **) &decompressed, &usize, data, len - 1, data[len-1]);
+ assert_se(r > 0);
+ r = decompress_sw(compressed, csize, (void **) &decompressed, &usize, data, len - 1, 'w');
+ assert_se(r == 0);
+ r = decompress_sw(compressed, csize, (void **) &decompressed, &usize, data, len, '\0');
+ assert_se(r > 0);
+}
+
+_unused_ static void test_decompress_startswith_short(const char *compression,
+ compress_blob_t compress,
+ decompress_sw_t decompress_sw) {
+
+#define TEXT "HUGE=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+
+ char buf[1024];
+ size_t i, csize;
+ int r;
+
+ log_info("/* %s with %s */", __func__, compression);
+
+ r = compress(TEXT, sizeof TEXT, buf, sizeof buf, &csize);
+ assert_se(r == 0);
+
+ for (i = 1; i < strlen(TEXT); i++) {
+ size_t alloc_size = i;
+ _cleanup_free_ void *buf2 = NULL;
+
+ assert_se(buf2 = malloc(i));
+
+ assert_se(decompress_sw(buf, csize, &buf2, &alloc_size, TEXT, i, TEXT[i]) == 1);
+ assert_se(decompress_sw(buf, csize, &buf2, &alloc_size, TEXT, i, 'y') == 0);
+ }
+}
+
+_unused_ static void test_compress_stream(const char *compression,
+ const char *cat,
+ compress_stream_t compress,
+ decompress_stream_t decompress,
+ const char *srcfile) {
+
+ _cleanup_close_ int src = -1, dst = -1, dst2 = -1;
+ _cleanup_(unlink_tempfilep) char
+ pattern[] = "/tmp/systemd-test.compressed.XXXXXX",
+ pattern2[] = "/tmp/systemd-test.compressed.XXXXXX";
+ int r;
+ _cleanup_free_ char *cmd = NULL, *cmd2 = NULL;
+ struct stat st = {};
+
+ r = find_executable(cat, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Skipping %s, could not find %s binary: %m", __func__, cat);
+ return;
+ }
+
+ log_debug("/* testing %s compression */", compression);
+
+ log_debug("/* create source from %s */", srcfile);
+
+ assert_se((src = open(srcfile, O_RDONLY|O_CLOEXEC)) >= 0);
+
+ log_debug("/* test compression */");
+
+ assert_se((dst = mkostemp_safe(pattern)) >= 0);
+
+ assert_se(compress(src, dst, -1) == 0);
+
+ if (cat) {
+ assert_se(asprintf(&cmd, "%s %s | diff %s -", cat, pattern, srcfile) > 0);
+ assert_se(system(cmd) == 0);
+ }
+
+ log_debug("/* test decompression */");
+
+ assert_se((dst2 = mkostemp_safe(pattern2)) >= 0);
+
+ assert_se(stat(srcfile, &st) == 0);
+
+ assert_se(lseek(dst, 0, SEEK_SET) == 0);
+ r = decompress(dst, dst2, st.st_size);
+ assert_se(r == 0);
+
+ assert_se(asprintf(&cmd2, "diff %s %s", srcfile, pattern2) > 0);
+ assert_se(system(cmd2) == 0);
+
+ log_debug("/* test faulty decompression */");
+
+ assert_se(lseek(dst, 1, SEEK_SET) == 1);
+ r = decompress(dst, dst2, st.st_size);
+ assert_se(IN_SET(r, 0, -EBADMSG));
+
+ assert_se(lseek(dst, 0, SEEK_SET) == 0);
+ assert_se(lseek(dst2, 0, SEEK_SET) == 0);
+ r = decompress(dst, dst2, st.st_size - 1);
+ assert_se(r == -EFBIG);
+}
+#endif
+
+#if HAVE_LZ4
+static void test_lz4_decompress_partial(void) {
+ char buf[20000], buf2[100];
+ size_t buf_size = sizeof(buf), compressed;
+ int r;
+ _cleanup_free_ char *huge = NULL;
+
+ log_debug("/* %s */", __func__);
+
+ assert_se(huge = malloc(HUGE_SIZE));
+ memcpy(huge, "HUGE=", STRLEN("HUGE="));
+ memset(&huge[STRLEN("HUGE=")], 'x', HUGE_SIZE - STRLEN("HUGE=") - 1);
+ huge[HUGE_SIZE - 1] = '\0';
+
+ r = LZ4_compress_default(huge, buf, HUGE_SIZE, buf_size);
+ assert_se(r >= 0);
+ compressed = r;
+ log_info("Compressed %i → %zu", HUGE_SIZE, compressed);
+
+ r = LZ4_decompress_safe(buf, huge, r, HUGE_SIZE);
+ assert_se(r >= 0);
+ log_info("Decompressed → %i", r);
+
+ r = LZ4_decompress_safe_partial(buf, huge,
+ compressed,
+ 12, HUGE_SIZE);
+ assert_se(r >= 0);
+ log_info("Decompressed partial %i/%i → %i", 12, HUGE_SIZE, r);
+
+ for (size_t size = 1; size < sizeof(buf2); size++) {
+ /* This failed in older lz4s but works in newer ones. */
+ r = LZ4_decompress_safe_partial(buf, buf2, compressed, size, size);
+ log_info("Decompressed partial %zu/%zu → %i (%s)", size, size, r,
+ r < 0 ? "bad" : "good");
+ if (r >= 0 && LZ4_versionNumber() >= 10803)
+ /* lz4 <= 1.8.2 should fail that test, let's only check for newer ones */
+ assert_se(memcmp(buf2, huge, r) == 0);
+ }
+}
+#endif
+
+int main(int argc, char *argv[]) {
+#if HAVE_COMPRESSION
+ _unused_ const char text[] =
+ "text\0foofoofoofoo AAAA aaaaaaaaa ghost busters barbarbar FFF"
+ "foofoofoofoo AAAA aaaaaaaaa ghost busters barbarbar FFF";
+
+ /* The file to test compression on can be specified as the first argument */
+ const char *srcfile = argc > 1 ? argv[1] : argv[0];
+
+ char data[512] = "random\0";
+
+ _cleanup_free_ char *huge = NULL;
+
+ assert_se(huge = malloc(HUGE_SIZE));
+ memcpy(huge, "HUGE=", STRLEN("HUGE="));
+ memset(&huge[STRLEN("HUGE=")], 'x', HUGE_SIZE - STRLEN("HUGE=") - 1);
+ huge[HUGE_SIZE - 1] = '\0';
+
+ test_setup_logging(LOG_DEBUG);
+
+ random_bytes(data + 7, sizeof(data) - 7);
+
+#if HAVE_XZ
+ test_compress_decompress("XZ", compress_blob_xz, decompress_blob_xz,
+ text, sizeof(text), false);
+ test_compress_decompress("XZ", compress_blob_xz, decompress_blob_xz,
+ data, sizeof(data), true);
+
+ test_decompress_startswith("XZ",
+ compress_blob_xz, decompress_startswith_xz,
+ text, sizeof(text), false);
+ test_decompress_startswith("XZ",
+ compress_blob_xz, decompress_startswith_xz,
+ data, sizeof(data), true);
+ test_decompress_startswith("XZ",
+ compress_blob_xz, decompress_startswith_xz,
+ huge, HUGE_SIZE, true);
+
+ test_compress_stream("XZ", "xzcat",
+ compress_stream_xz, decompress_stream_xz, srcfile);
+
+ test_decompress_startswith_short("XZ", compress_blob_xz, decompress_startswith_xz);
+
+#else
+ log_info("/* XZ test skipped */");
+#endif
+
+#if HAVE_LZ4
+ test_compress_decompress("LZ4", compress_blob_lz4, decompress_blob_lz4,
+ text, sizeof(text), false);
+ test_compress_decompress("LZ4", compress_blob_lz4, decompress_blob_lz4,
+ data, sizeof(data), true);
+
+ test_decompress_startswith("LZ4",
+ compress_blob_lz4, decompress_startswith_lz4,
+ text, sizeof(text), false);
+ test_decompress_startswith("LZ4",
+ compress_blob_lz4, decompress_startswith_lz4,
+ data, sizeof(data), true);
+ test_decompress_startswith("LZ4",
+ compress_blob_lz4, decompress_startswith_lz4,
+ huge, HUGE_SIZE, true);
+
+ test_compress_stream("LZ4", "lz4cat",
+ compress_stream_lz4, decompress_stream_lz4, srcfile);
+
+ test_lz4_decompress_partial();
+
+ test_decompress_startswith_short("LZ4", compress_blob_lz4, decompress_startswith_lz4);
+
+#else
+ log_info("/* LZ4 test skipped */");
+#endif
+
+#if HAVE_ZSTD
+ test_compress_decompress("ZSTD", compress_blob_zstd, decompress_blob_zstd,
+ text, sizeof(text), false);
+ test_compress_decompress("ZSTD", compress_blob_zstd, decompress_blob_zstd,
+ data, sizeof(data), true);
+
+ test_decompress_startswith("ZSTD",
+ compress_blob_zstd, decompress_startswith_zstd,
+ text, sizeof(text), false);
+ test_decompress_startswith("ZSTD",
+ compress_blob_zstd, decompress_startswith_zstd,
+ data, sizeof(data), true);
+ test_decompress_startswith("ZSTD",
+ compress_blob_zstd, decompress_startswith_zstd,
+ huge, HUGE_SIZE, true);
+
+ test_compress_stream("ZSTD", "zstdcat",
+ compress_stream_zstd, decompress_stream_zstd, srcfile);
+
+ test_decompress_startswith_short("ZSTD", compress_blob_zstd, decompress_startswith_zstd);
+#else
+ log_info("/* ZSTD test skipped */");
+#endif
+
+ return 0;
+#else
+ log_info("/* XZ, LZ4 and ZSTD tests skipped */");
+ return EXIT_TEST_SKIP;
+#endif
+}
diff --git a/src/journal/test-journal-config.c b/src/journal/test-journal-config.c
new file mode 100644
index 0000000..4f29e1b
--- /dev/null
+++ b/src/journal/test-journal-config.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdbool.h>
+
+#include "journald-server.h"
+
+#define _COMPRESS_PARSE_CHECK(str, enab, thresh, varname) \
+ do { \
+ JournalCompressOptions varname = {true, 111}; \
+ config_parse_compress("", "", 0, "", 0, "", 0, str, \
+ &varname, NULL); \
+ assert_se((enab) == varname.enabled); \
+ if (varname.enabled) \
+ assert_se((thresh) == varname.threshold_bytes); \
+ } while (0)
+
+#define COMPRESS_PARSE_CHECK(str, enabled, threshold) \
+ _COMPRESS_PARSE_CHECK(str, enabled, threshold, conf##__COUNTER__)
+
+static void test_config_compress(void) {
+ COMPRESS_PARSE_CHECK("yes", true, 111);
+ COMPRESS_PARSE_CHECK("no", false, 111);
+ COMPRESS_PARSE_CHECK("y", true, 111);
+ COMPRESS_PARSE_CHECK("n", false, 111);
+ COMPRESS_PARSE_CHECK("true", true, 111);
+ COMPRESS_PARSE_CHECK("false", false, 111);
+ COMPRESS_PARSE_CHECK("t", true, 111);
+ COMPRESS_PARSE_CHECK("f", false, 111);
+ COMPRESS_PARSE_CHECK("on", true, 111);
+ COMPRESS_PARSE_CHECK("off", false, 111);
+
+ /* Weird size/bool overlapping case. We preserve backward compatibility instead of assuming these are byte
+ * counts. */
+ COMPRESS_PARSE_CHECK("1", true, 111);
+ COMPRESS_PARSE_CHECK("0", false, 111);
+
+ /* IEC sizing */
+ COMPRESS_PARSE_CHECK("1B", true, 1);
+ COMPRESS_PARSE_CHECK("1K", true, 1024);
+ COMPRESS_PARSE_CHECK("1M", true, 1024 * 1024);
+ COMPRESS_PARSE_CHECK("1G", true, 1024 * 1024 * 1024);
+
+ /* Invalid Case */
+ COMPRESS_PARSE_CHECK("-1", true, 111);
+ COMPRESS_PARSE_CHECK("blah blah", true, 111);
+ COMPRESS_PARSE_CHECK("", true, (uint64_t)-1);
+}
+
+int main(int argc, char *argv[]) {
+ test_config_compress();
+
+ return 0;
+}
diff --git a/src/journal/test-journal-enum.c b/src/journal/test-journal-enum.c
new file mode 100644
index 0000000..03fe8e2
--- /dev/null
+++ b/src/journal/test-journal-enum.c
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "sd-journal.h"
+
+#include "journal-internal.h"
+#include "log.h"
+#include "macro.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ unsigned n = 0;
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY) >= 0);
+
+ assert_se(sd_journal_add_match(j, "_TRANSPORT=syslog", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "_UID=0", 0) >= 0);
+
+ SD_JOURNAL_FOREACH_BACKWARDS(j) {
+ const void *d;
+ size_t l;
+
+ assert_se(sd_journal_get_data(j, "MESSAGE", &d, &l) >= 0);
+
+ printf("%.*s\n", (int) l, (char*) d);
+
+ n++;
+ if (n >= 10)
+ break;
+ }
+
+ return 0;
+}
diff --git a/src/journal/test-journal-flush.c b/src/journal/test-journal-flush.c
new file mode 100644
index 0000000..dad277d
--- /dev/null
+++ b/src/journal/test-journal-flush.c
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "chattr-util.h"
+#include "journal-file.h"
+#include "journal-internal.h"
+#include "macro.h"
+#include "path-util.h"
+#include "string-util.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_free_ char *fn = NULL;
+ char dn[] = "/var/tmp/test-journal-flush.XXXXXX";
+ JournalFile *new_journal = NULL;
+ sd_journal *j = NULL;
+ unsigned n = 0;
+ int r;
+
+ assert_se(mkdtemp(dn));
+ (void) chattr_path(dn, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+
+ fn = path_join(dn, "test.journal");
+
+ r = journal_file_open(-1, fn, O_CREAT|O_RDWR, 0644, false, 0, false, NULL, NULL, NULL, NULL, &new_journal);
+ assert_se(r >= 0);
+
+ r = sd_journal_open(&j, 0);
+ assert_se(r >= 0);
+
+ sd_journal_set_data_threshold(j, 0);
+
+ SD_JOURNAL_FOREACH(j) {
+ Object *o;
+ JournalFile *f;
+
+ f = j->current_file;
+ assert_se(f && f->current_offset > 0);
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0)
+ log_error_errno(r, "journal_file_move_to_object failed: %m");
+ assert_se(r >= 0);
+
+ r = journal_file_copy_entry(f, new_journal, o, f->current_offset);
+ if (r < 0)
+ log_error_errno(r, "journal_file_copy_entry failed: %m");
+ assert_se(r >= 0);
+
+ if (++n >= 10000)
+ break;
+ }
+
+ sd_journal_close(j);
+
+ (void) journal_file_close(new_journal);
+
+ unlink(fn);
+ assert_se(rmdir(dn) == 0);
+
+ return 0;
+}
diff --git a/src/journal/test-journal-init.c b/src/journal/test-journal-init.c
new file mode 100644
index 0000000..80aff75
--- /dev/null
+++ b/src/journal/test-journal-init.c
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-journal.h"
+
+#include "chattr-util.h"
+#include "log.h"
+#include "parse-util.h"
+#include "rm-rf.h"
+#include "tests.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ sd_journal *j;
+ int r, i, I = 100;
+ char t[] = "/var/tmp/journal-stream-XXXXXX";
+
+ test_setup_logging(LOG_DEBUG);
+
+ if (argc >= 2) {
+ r = safe_atoi(argv[1], &I);
+ if (r < 0)
+ log_info("Could not parse loop count argument. Using default.");
+ }
+
+ log_info("Running %d loops", I);
+
+ assert_se(mkdtemp(t));
+ (void) chattr_path(t, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+
+ for (i = 0; i < I; i++) {
+ r = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
+ assert_se(r == 0);
+
+ sd_journal_close(j);
+
+ r = sd_journal_open_directory(&j, t, 0);
+ assert_se(r == 0);
+
+ sd_journal_close(j);
+
+ j = NULL;
+ r = sd_journal_open_directory(&j, t, SD_JOURNAL_LOCAL_ONLY);
+ assert_se(r == -EINVAL);
+ assert_se(j == NULL);
+ }
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+
+ return 0;
+}
diff --git a/src/journal/test-journal-interleaving.c b/src/journal/test-journal-interleaving.c
new file mode 100644
index 0000000..8c78c3b
--- /dev/null
+++ b/src/journal/test-journal-interleaving.c
@@ -0,0 +1,296 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "chattr-util.h"
+#include "io-util.h"
+#include "journal-file.h"
+#include "journal-vacuum.h"
+#include "log.h"
+#include "parse-util.h"
+#include "rm-rf.h"
+#include "tests.h"
+#include "util.h"
+
+/* This program tests skipping around in a multi-file journal. */
+
+static bool arg_keep = false;
+
+_noreturn_ static void log_assert_errno(const char *text, int error, const char *file, int line, const char *func) {
+ log_internal(LOG_CRIT, error, file, line, func,
+ "'%s' failed at %s:%u (%s): %m", text, file, line, func);
+ abort();
+}
+
+#define assert_ret(expr) \
+ do { \
+ int _r_ = (expr); \
+ if (_unlikely_(_r_ < 0)) \
+ log_assert_errno(#expr, -_r_, PROJECT_FILE, __LINE__, __PRETTY_FUNCTION__); \
+ } while (false)
+
+static JournalFile *test_open(const char *name) {
+ JournalFile *f;
+ assert_ret(journal_file_open(-1, name, O_RDWR|O_CREAT, 0644, true, (uint64_t) -1, false, NULL, NULL, NULL, NULL, &f));
+ return f;
+}
+
+static void test_close(JournalFile *f) {
+ (void) journal_file_close (f);
+}
+
+static void append_number(JournalFile *f, int n, uint64_t *seqnum) {
+ char *p;
+ dual_timestamp ts;
+ static dual_timestamp previous_ts = {};
+ struct iovec iovec[1];
+
+ dual_timestamp_get(&ts);
+
+ if (ts.monotonic <= previous_ts.monotonic)
+ ts.monotonic = previous_ts.monotonic + 1;
+
+ if (ts.realtime <= previous_ts.realtime)
+ ts.realtime = previous_ts.realtime + 1;
+
+ previous_ts = ts;
+
+ assert_se(asprintf(&p, "NUMBER=%d", n) >= 0);
+ iovec[0] = IOVEC_MAKE_STRING(p);
+ assert_ret(journal_file_append_entry(f, &ts, NULL, iovec, 1, seqnum, NULL, NULL));
+ free(p);
+}
+
+static void test_check_number (sd_journal *j, int n) {
+ const void *d;
+ _cleanup_free_ char *k;
+ size_t l;
+ int x;
+
+ assert_ret(sd_journal_get_data(j, "NUMBER", &d, &l));
+ assert_se(k = strndup(d, l));
+ printf("%s\n", k);
+
+ assert_se(safe_atoi(k + 7, &x) >= 0);
+ assert_se(n == x);
+}
+
+static void test_check_numbers_down (sd_journal *j, int count) {
+ int i;
+
+ for (i = 1; i <= count; i++) {
+ int r;
+ test_check_number(j, i);
+ assert_ret(r = sd_journal_next(j));
+ if (i == count)
+ assert_se(r == 0);
+ else
+ assert_se(r == 1);
+ }
+
+}
+
+static void test_check_numbers_up (sd_journal *j, int count) {
+ for (int i = count; i >= 1; i--) {
+ int r;
+ test_check_number(j, i);
+ assert_ret(r = sd_journal_previous(j));
+ if (i == 1)
+ assert_se(r == 0);
+ else
+ assert_se(r == 1);
+ }
+
+}
+
+static void setup_sequential(void) {
+ JournalFile *one, *two;
+ one = test_open("one.journal");
+ two = test_open("two.journal");
+ append_number(one, 1, NULL);
+ append_number(one, 2, NULL);
+ append_number(two, 3, NULL);
+ append_number(two, 4, NULL);
+ test_close(one);
+ test_close(two);
+}
+
+static void setup_interleaved(void) {
+ JournalFile *one, *two;
+ one = test_open("one.journal");
+ two = test_open("two.journal");
+ append_number(one, 1, NULL);
+ append_number(two, 2, NULL);
+ append_number(one, 3, NULL);
+ append_number(two, 4, NULL);
+ test_close(one);
+ test_close(two);
+}
+
+static void mkdtemp_chdir_chattr(char *path) {
+ assert_se(mkdtemp(path));
+ assert_se(chdir(path) >= 0);
+
+ /* Speed up things a bit on btrfs, ensuring that CoW is turned off for all files created in our
+ * directory during the test run */
+ (void) chattr_path(path, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+}
+
+static void test_skip(void (*setup)(void)) {
+ char t[] = "/var/tmp/journal-skip-XXXXXX";
+ sd_journal *j;
+ int r;
+
+ mkdtemp_chdir_chattr(t);
+
+ setup();
+
+ /* Seek to head, iterate down.
+ */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_head(j));
+ assert_ret(sd_journal_next(j));
+ test_check_numbers_down(j, 4);
+ sd_journal_close(j);
+
+ /* Seek to tail, iterate up.
+ */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_tail(j));
+ assert_ret(sd_journal_previous(j));
+ test_check_numbers_up(j, 4);
+ sd_journal_close(j);
+
+ /* Seek to tail, skip to head, iterate down.
+ */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_tail(j));
+ assert_ret(r = sd_journal_previous_skip(j, 4));
+ assert_se(r == 4);
+ test_check_numbers_down(j, 4);
+ sd_journal_close(j);
+
+ /* Seek to head, skip to tail, iterate up.
+ */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_head(j));
+ assert_ret(r = sd_journal_next_skip(j, 4));
+ assert_se(r == 4);
+ test_check_numbers_up(j, 4);
+ sd_journal_close(j);
+
+ log_info("Done...");
+
+ if (arg_keep)
+ log_info("Not removing %s", t);
+ else {
+ journal_directory_vacuum(".", 3000000, 0, 0, NULL, true);
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+ }
+
+ puts("------------------------------------------------------------");
+}
+
+static void test_sequence_numbers(void) {
+
+ char t[] = "/var/tmp/journal-seq-XXXXXX";
+ JournalFile *one, *two;
+ uint64_t seqnum = 0;
+ sd_id128_t seqnum_id;
+
+ mkdtemp_chdir_chattr(t);
+
+ assert_se(journal_file_open(-1, "one.journal", O_RDWR|O_CREAT, 0644,
+ true, (uint64_t) -1, false, NULL, NULL, NULL, NULL, &one) == 0);
+
+ append_number(one, 1, &seqnum);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 1);
+ append_number(one, 2, &seqnum);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 2);
+
+ assert_se(one->header->state == STATE_ONLINE);
+ assert_se(!sd_id128_equal(one->header->file_id, one->header->machine_id));
+ assert_se(!sd_id128_equal(one->header->file_id, one->header->boot_id));
+ assert_se(sd_id128_equal(one->header->file_id, one->header->seqnum_id));
+
+ memcpy(&seqnum_id, &one->header->seqnum_id, sizeof(sd_id128_t));
+
+ assert_se(journal_file_open(-1, "two.journal", O_RDWR|O_CREAT, 0644,
+ true, (uint64_t) -1, false, NULL, NULL, NULL, one, &two) == 0);
+
+ assert_se(two->header->state == STATE_ONLINE);
+ assert_se(!sd_id128_equal(two->header->file_id, one->header->file_id));
+ assert_se(sd_id128_equal(one->header->machine_id, one->header->machine_id));
+ assert_se(sd_id128_equal(one->header->boot_id, one->header->boot_id));
+ assert_se(sd_id128_equal(one->header->seqnum_id, one->header->seqnum_id));
+
+ append_number(two, 3, &seqnum);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 3);
+ append_number(two, 4, &seqnum);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 4);
+
+ test_close(two);
+
+ append_number(one, 5, &seqnum);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 5);
+
+ append_number(one, 6, &seqnum);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 6);
+
+ test_close(one);
+
+ /* restart server */
+ seqnum = 0;
+
+ assert_se(journal_file_open(-1, "two.journal", O_RDWR, 0,
+ true, (uint64_t) -1, false, NULL, NULL, NULL, NULL, &two) == 0);
+
+ assert_se(sd_id128_equal(two->header->seqnum_id, seqnum_id));
+
+ append_number(two, 7, &seqnum);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 5);
+
+ /* So..., here we have the same seqnum in two files with the
+ * same seqnum_id. */
+
+ test_close(two);
+
+ log_info("Done...");
+
+ if (arg_keep)
+ log_info("Not removing %s", t);
+ else {
+ journal_directory_vacuum(".", 3000000, 0, 0, NULL, true);
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+ }
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ /* journal_file_open requires a valid machine id */
+ if (access("/etc/machine-id", F_OK) != 0)
+ return log_tests_skipped("/etc/machine-id not found");
+
+ arg_keep = argc > 1;
+
+ test_skip(setup_sequential);
+ test_skip(setup_interleaved);
+
+ test_sequence_numbers();
+
+ return 0;
+}
diff --git a/src/journal/test-journal-match.c b/src/journal/test-journal-match.c
new file mode 100644
index 0000000..ded6756
--- /dev/null
+++ b/src/journal/test-journal-match.c
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "journal-internal.h"
+#include "log.h"
+#include "string-util.h"
+#include "tests.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ _cleanup_free_ char *t;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(sd_journal_open(&j, 0) >= 0);
+
+ assert_se(sd_journal_add_match(j, "foobar", 0) < 0);
+ assert_se(sd_journal_add_match(j, "foobar=waldo", 0) < 0);
+ assert_se(sd_journal_add_match(j, "", 0) < 0);
+ assert_se(sd_journal_add_match(j, "=", 0) < 0);
+ assert_se(sd_journal_add_match(j, "=xxxxx", 0) < 0);
+ assert_se(sd_journal_add_match(j, (uint8_t[4]){'A', '=', '\1', '\2'}, 4) >= 0);
+ assert_se(sd_journal_add_match(j, (uint8_t[5]){'B', '=', 'C', '\0', 'D'}, 5) >= 0);
+ assert_se(sd_journal_add_match(j, "HALLO=WALDO", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "QUUX=mmmm", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "QUUX=xxxxx", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "HALLO=", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "QUUX=xxxxx", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "QUUX=yyyyy", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "PIFF=paff", 0) >= 0);
+
+ assert_se(sd_journal_add_disjunction(j) >= 0);
+
+ assert_se(sd_journal_add_match(j, "ONE=one", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "ONE=two", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "TWO=two", 0) >= 0);
+
+ assert_se(sd_journal_add_conjunction(j) >= 0);
+
+ assert_se(sd_journal_add_match(j, "L4_1=yes", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "L4_1=ok", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "L4_2=yes", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "L4_2=ok", 0) >= 0);
+
+ assert_se(sd_journal_add_disjunction(j) >= 0);
+
+ assert_se(sd_journal_add_match(j, "L3=yes", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "L3=ok", 0) >= 0);
+
+ assert_se(t = journal_make_match_string(j));
+
+ printf("resulting match expression is: %s\n", t);
+
+ assert_se(streq(t, "(((L3=ok OR L3=yes) OR ((L4_2=ok OR L4_2=yes) AND (L4_1=ok OR L4_1=yes))) AND ((TWO=two AND (ONE=two OR ONE=one)) OR (PIFF=paff AND (QUUX=yyyyy OR QUUX=xxxxx OR QUUX=mmmm) AND (HALLO= OR HALLO=WALDO) AND B=C\\000D AND A=\\001\\002)))"));
+
+ return 0;
+}
diff --git a/src/journal/test-journal-send.c b/src/journal/test-journal-send.c
new file mode 100644
index 0000000..75bd8e7
--- /dev/null
+++ b/src/journal/test-journal-send.c
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-journal.h"
+#include "fileio.h"
+#include "macro.h"
+#include "memory-util.h"
+
+static void test_journal_print(void) {
+ assert_se(sd_journal_print(LOG_INFO, "XXX") == 0);
+ assert_se(sd_journal_print(LOG_INFO, "%s", "YYY") == 0);
+ assert_se(sd_journal_print(LOG_INFO, "X%4094sY", "ZZZ") == 0);
+ assert_se(sd_journal_print(LOG_INFO, "X%*sY", LONG_LINE_MAX - 8 - 3, "ZZZ") == 0);
+ assert_se(sd_journal_print(LOG_INFO, "X%*sY", LONG_LINE_MAX - 8 - 2, "ZZZ") == -ENOBUFS);
+}
+
+static void test_journal_send(void) {
+ _cleanup_free_ char *huge = NULL;
+
+#define HUGE_SIZE (4096*1024)
+ assert_se(huge = malloc(HUGE_SIZE));
+
+ /* utf-8 and non-utf-8, message-less and message-ful iovecs */
+ struct iovec graph1[] = {
+ {(char*) "GRAPH=graph", STRLEN("GRAPH=graph")}
+ };
+ struct iovec graph2[] = {
+ {(char*) "GRAPH=graph\n", STRLEN("GRAPH=graph\n")}
+ };
+ struct iovec message1[] = {
+ {(char*) "MESSAGE=graph", STRLEN("MESSAGE=graph")}
+ };
+ struct iovec message2[] = {
+ {(char*) "MESSAGE=graph\n", STRLEN("MESSAGE=graph\n")}
+ };
+
+ assert_se(sd_journal_print(LOG_INFO, "piepapo") == 0);
+
+ assert_se(sd_journal_send("MESSAGE=foobar",
+ "VALUE=%i", 7,
+ NULL) == 0);
+
+ errno = ENOENT;
+ assert_se(sd_journal_perror("Foobar") == 0);
+
+ assert_se(sd_journal_perror("") == 0);
+
+ memcpy(huge, "HUGE=", STRLEN("HUGE="));
+ memset(&huge[STRLEN("HUGE=")], 'x', HUGE_SIZE - STRLEN("HUGE=") - 1);
+ huge[HUGE_SIZE - 1] = '\0';
+
+ assert_se(sd_journal_send("MESSAGE=Huge field attached",
+ huge,
+ NULL) == 0);
+
+ assert_se(sd_journal_send("MESSAGE=uiui",
+ "VALUE=A",
+ "VALUE=B",
+ "VALUE=C",
+ "SINGLETON=1",
+ "OTHERVALUE=X",
+ "OTHERVALUE=Y",
+ "WITH_BINARY=this is a binary value \a",
+ NULL) == 0);
+
+ syslog(LOG_NOTICE, "Hello World!");
+
+ assert_se(sd_journal_print(LOG_NOTICE, "Hello World") == 0);
+
+ assert_se(sd_journal_send("MESSAGE=Hello World!",
+ "MESSAGE_ID=52fb62f99e2c49d89cfbf9d6de5e3555",
+ "PRIORITY=5",
+ "HOME=%s", getenv("HOME"),
+ "TERM=%s", getenv("TERM"),
+ "PAGE_SIZE=%li", sysconf(_SC_PAGESIZE),
+ "N_CPUS=%li", sysconf(_SC_NPROCESSORS_ONLN),
+ NULL) == 0);
+
+ assert_se(sd_journal_sendv(graph1, 1) == 0);
+ assert_se(sd_journal_sendv(graph2, 1) == 0);
+ assert_se(sd_journal_sendv(message1, 1) == 0);
+ assert_se(sd_journal_sendv(message2, 1) == 0);
+
+ /* test without location fields */
+#undef sd_journal_sendv
+ assert_se(sd_journal_sendv(graph1, 1) == 0);
+ assert_se(sd_journal_sendv(graph2, 1) == 0);
+ assert_se(sd_journal_sendv(message1, 1) == 0);
+ assert_se(sd_journal_sendv(message2, 1) == 0);
+}
+
+int main(int argc, char *argv[]) {
+ test_journal_print();
+ test_journal_send();
+
+ /* Sleep a bit to make it easy for journald to collect metadata. */
+ sleep(1);
+
+ return 0;
+}
diff --git a/src/journal/test-journal-stream.c b/src/journal/test-journal-stream.c
new file mode 100644
index 0000000..a121859
--- /dev/null
+++ b/src/journal/test-journal-stream.c
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "chattr-util.h"
+#include "io-util.h"
+#include "journal-file.h"
+#include "journal-internal.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "rm-rf.h"
+#include "tests.h"
+#include "util.h"
+
+#define N_ENTRIES 200
+
+static void verify_contents(sd_journal *j, unsigned skip) {
+ unsigned i;
+
+ assert_se(j);
+
+ i = 0;
+ SD_JOURNAL_FOREACH(j) {
+ const void *d;
+ char *k, *c;
+ size_t l;
+ unsigned u = 0;
+
+ assert_se(sd_journal_get_cursor(j, &k) >= 0);
+ printf("cursor: %s\n", k);
+ free(k);
+
+ assert_se(sd_journal_get_data(j, "MAGIC", &d, &l) >= 0);
+ printf("\t%.*s\n", (int) l, (const char*) d);
+
+ assert_se(sd_journal_get_data(j, "NUMBER", &d, &l) >= 0);
+ assert_se(k = strndup(d, l));
+ printf("\t%s\n", k);
+
+ if (skip > 0) {
+ assert_se(safe_atou(k + 7, &u) >= 0);
+ assert_se(i == u);
+ i += skip;
+ }
+
+ free(k);
+
+ assert_se(sd_journal_get_cursor(j, &c) >= 0);
+ assert_se(sd_journal_test_cursor(j, c) > 0);
+ free(c);
+ }
+
+ if (skip > 0)
+ assert_se(i == N_ENTRIES);
+}
+
+static void run_test(void) {
+ JournalFile *one, *two, *three;
+ char t[] = "/var/tmp/journal-stream-XXXXXX";
+ unsigned i;
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ char *z;
+ const void *data;
+ size_t l;
+ dual_timestamp previous_ts = DUAL_TIMESTAMP_NULL;
+
+ assert_se(mkdtemp(t));
+ assert_se(chdir(t) >= 0);
+ (void) chattr_path(t, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+
+ assert_se(journal_file_open(-1, "one.journal", O_RDWR|O_CREAT, 0666, true, (uint64_t) -1, false, NULL, NULL, NULL, NULL, &one) == 0);
+ assert_se(journal_file_open(-1, "two.journal", O_RDWR|O_CREAT, 0666, true, (uint64_t) -1, false, NULL, NULL, NULL, NULL, &two) == 0);
+ assert_se(journal_file_open(-1, "three.journal", O_RDWR|O_CREAT, 0666, true, (uint64_t) -1, false, NULL, NULL, NULL, NULL, &three) == 0);
+
+ for (i = 0; i < N_ENTRIES; i++) {
+ char *p, *q;
+ dual_timestamp ts;
+ struct iovec iovec[2];
+
+ dual_timestamp_get(&ts);
+
+ if (ts.monotonic <= previous_ts.monotonic)
+ ts.monotonic = previous_ts.monotonic + 1;
+
+ if (ts.realtime <= previous_ts.realtime)
+ ts.realtime = previous_ts.realtime + 1;
+
+ previous_ts = ts;
+
+ assert_se(asprintf(&p, "NUMBER=%u", i) >= 0);
+ iovec[0] = IOVEC_MAKE(p, strlen(p));
+
+ assert_se(asprintf(&q, "MAGIC=%s", i % 5 == 0 ? "quux" : "waldo") >= 0);
+
+ iovec[1] = IOVEC_MAKE(q, strlen(q));
+
+ if (i % 10 == 0)
+ assert_se(journal_file_append_entry(three, &ts, NULL, iovec, 2, NULL, NULL, NULL) == 0);
+ else {
+ if (i % 3 == 0)
+ assert_se(journal_file_append_entry(two, &ts, NULL, iovec, 2, NULL, NULL, NULL) == 0);
+
+ assert_se(journal_file_append_entry(one, &ts, NULL, iovec, 2, NULL, NULL, NULL) == 0);
+ }
+
+ free(p);
+ free(q);
+ }
+
+ (void) journal_file_close(one);
+ (void) journal_file_close(two);
+ (void) journal_file_close(three);
+
+ assert_se(sd_journal_open_directory(&j, t, 0) >= 0);
+
+ assert_se(sd_journal_add_match(j, "MAGIC=quux", 0) >= 0);
+ SD_JOURNAL_FOREACH_BACKWARDS(j) {
+ _cleanup_free_ char *c;
+
+ assert_se(sd_journal_get_data(j, "NUMBER", &data, &l) >= 0);
+ printf("\t%.*s\n", (int) l, (const char*) data);
+
+ assert_se(sd_journal_get_cursor(j, &c) >= 0);
+ assert_se(sd_journal_test_cursor(j, c) > 0);
+ }
+
+ SD_JOURNAL_FOREACH(j) {
+ _cleanup_free_ char *c;
+
+ assert_se(sd_journal_get_data(j, "NUMBER", &data, &l) >= 0);
+ printf("\t%.*s\n", (int) l, (const char*) data);
+
+ assert_se(sd_journal_get_cursor(j, &c) >= 0);
+ assert_se(sd_journal_test_cursor(j, c) > 0);
+ }
+
+ sd_journal_flush_matches(j);
+
+ verify_contents(j, 1);
+
+ printf("NEXT TEST\n");
+ assert_se(sd_journal_add_match(j, "MAGIC=quux", 0) >= 0);
+
+ assert_se(z = journal_make_match_string(j));
+ printf("resulting match expression is: %s\n", z);
+ free(z);
+
+ verify_contents(j, 5);
+
+ printf("NEXT TEST\n");
+ sd_journal_flush_matches(j);
+ assert_se(sd_journal_add_match(j, "MAGIC=waldo", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "NUMBER=10", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "NUMBER=11", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "NUMBER=12", 0) >= 0);
+
+ assert_se(z = journal_make_match_string(j));
+ printf("resulting match expression is: %s\n", z);
+ free(z);
+
+ verify_contents(j, 0);
+
+ assert_se(sd_journal_query_unique(j, "NUMBER") >= 0);
+ SD_JOURNAL_FOREACH_UNIQUE(j, data, l)
+ printf("%.*s\n", (int) l, (const char*) data);
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+}
+
+int main(int argc, char *argv[]) {
+
+ /* journal_file_open requires a valid machine id */
+ if (access("/etc/machine-id", F_OK) != 0)
+ return log_tests_skipped("/etc/machine-id not found");
+
+ test_setup_logging(LOG_DEBUG);
+
+ /* Run this test twice. Once with old hashing and once with new hashing */
+ assert_se(setenv("SYSTEMD_JOURNAL_KEYED_HASH", "1", 1) >= 0);
+ run_test();
+
+ assert_se(setenv("SYSTEMD_JOURNAL_KEYED_HASH", "0", 1) >= 0);
+ run_test();
+
+ return 0;
+}
diff --git a/src/journal/test-journal-syslog.c b/src/journal/test-journal-syslog.c
new file mode 100644
index 0000000..33f4129
--- /dev/null
+++ b/src/journal/test-journal-syslog.c
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "journald-syslog.h"
+#include "macro.h"
+#include "string-util.h"
+#include "syslog-util.h"
+
+static void test_syslog_parse_identifier(const char *str,
+ const char *ident, const char *pid, const char *rest, int ret) {
+ const char *buf = str;
+ _cleanup_free_ char *ident2 = NULL, *pid2 = NULL;
+ int ret2;
+
+ ret2 = syslog_parse_identifier(&buf, &ident2, &pid2);
+
+ assert_se(ret == ret2);
+ assert_se(ident == ident2 || streq_ptr(ident, ident2));
+ assert_se(pid == pid2 || streq_ptr(pid, pid2));
+ assert_se(streq(buf, rest));
+}
+
+static void test_syslog_parse_priority(const char *str, int priority, int ret) {
+ const char *buf = str;
+ int priority2 = 0, ret2;
+
+ ret2 = syslog_parse_priority(&buf, &priority2, false);
+
+ assert_se(ret == ret2);
+ if (ret2 == 1)
+ assert_se(priority == priority2);
+}
+
+int main(void) {
+ test_syslog_parse_identifier("pidu[111]: xxx", "pidu", "111", "xxx", 11);
+ test_syslog_parse_identifier("pidu: xxx", "pidu", NULL, "xxx", 6);
+ test_syslog_parse_identifier("pidu: xxx", "pidu", NULL, " xxx", 6);
+ test_syslog_parse_identifier("pidu xxx", NULL, NULL, "pidu xxx", 0);
+ test_syslog_parse_identifier(" pidu xxx", NULL, NULL, " pidu xxx", 0);
+ test_syslog_parse_identifier("", NULL, NULL, "", 0);
+ test_syslog_parse_identifier(" ", NULL, NULL, " ", 0);
+ test_syslog_parse_identifier(":", "", NULL, "", 1);
+ test_syslog_parse_identifier(": ", "", NULL, " ", 2);
+ test_syslog_parse_identifier(" :", "", NULL, "", 2);
+ test_syslog_parse_identifier(" pidu:", "pidu", NULL, "", 8);
+ test_syslog_parse_identifier("pidu:", "pidu", NULL, "", 5);
+ test_syslog_parse_identifier("pidu: ", "pidu", NULL, "", 6);
+ test_syslog_parse_identifier("pidu : ", NULL, NULL, "pidu : ", 0);
+
+ test_syslog_parse_priority("<>", 0, 0);
+ test_syslog_parse_priority("<>aaa", 0, 0);
+ test_syslog_parse_priority("<aaaa>", 0, 0);
+ test_syslog_parse_priority("<aaaa>aaa", 0, 0);
+ test_syslog_parse_priority(" <aaaa>", 0, 0);
+ test_syslog_parse_priority(" <aaaa>aaa", 0, 0);
+ /* TODO: add test cases of valid priorities */
+
+ return 0;
+}
diff --git a/src/journal/test-journal-verify.c b/src/journal/test-journal-verify.c
new file mode 100644
index 0000000..d208e46
--- /dev/null
+++ b/src/journal/test-journal-verify.c
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "chattr-util.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "journal-file.h"
+#include "journal-verify.h"
+#include "log.h"
+#include "rm-rf.h"
+#include "terminal-util.h"
+#include "tests.h"
+#include "util.h"
+
+#define N_ENTRIES 6000
+#define RANDOM_RANGE 77
+
+static void bit_toggle(const char *fn, uint64_t p) {
+ uint8_t b;
+ ssize_t r;
+ int fd;
+
+ fd = open(fn, O_RDWR|O_CLOEXEC);
+ assert_se(fd >= 0);
+
+ r = pread(fd, &b, 1, p/8);
+ assert_se(r == 1);
+
+ b ^= 1 << (p % 8);
+
+ r = pwrite(fd, &b, 1, p/8);
+ assert_se(r == 1);
+
+ safe_close(fd);
+}
+
+static int raw_verify(const char *fn, const char *verification_key) {
+ JournalFile *f;
+ int r;
+
+ r = journal_file_open(-1, fn, O_RDONLY, 0666, true, (uint64_t) -1, !!verification_key, NULL, NULL, NULL, NULL, &f);
+ if (r < 0)
+ return r;
+
+ r = journal_file_verify(f, verification_key, NULL, NULL, NULL, false);
+ (void) journal_file_close(f);
+
+ return r;
+}
+
+int main(int argc, char *argv[]) {
+ char t[] = "/var/tmp/journal-XXXXXX";
+ unsigned n;
+ JournalFile *f;
+ const char *verification_key = argv[1];
+ usec_t from = 0, to = 0, total = 0;
+ char a[FORMAT_TIMESTAMP_MAX];
+ char b[FORMAT_TIMESTAMP_MAX];
+ char c[FORMAT_TIMESPAN_MAX];
+ struct stat st;
+ uint64_t p;
+
+ /* journal_file_open requires a valid machine id */
+ if (access("/etc/machine-id", F_OK) != 0)
+ return log_tests_skipped("/etc/machine-id not found");
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(mkdtemp(t));
+ assert_se(chdir(t) >= 0);
+ (void) chattr_path(t, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+
+ log_info("Generating...");
+
+ assert_se(journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, 0666, true, (uint64_t) -1, !!verification_key, NULL, NULL, NULL, NULL, &f) == 0);
+
+ for (n = 0; n < N_ENTRIES; n++) {
+ struct iovec iovec;
+ struct dual_timestamp ts;
+ char *test;
+
+ dual_timestamp_get(&ts);
+
+ assert_se(asprintf(&test, "RANDOM=%lu", random() % RANDOM_RANGE));
+
+ iovec = IOVEC_MAKE_STRING(test);
+
+ assert_se(journal_file_append_entry(f, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
+
+ free(test);
+ }
+
+ (void) journal_file_close(f);
+
+ log_info("Verifying...");
+
+ assert_se(journal_file_open(-1, "test.journal", O_RDONLY, 0666, true, (uint64_t) -1, !!verification_key, NULL, NULL, NULL, NULL, &f) == 0);
+ /* journal_file_print_header(f); */
+ journal_file_dump(f);
+
+ assert_se(journal_file_verify(f, verification_key, &from, &to, &total, true) >= 0);
+
+ if (verification_key && JOURNAL_HEADER_SEALED(f->header))
+ log_info("=> Validated from %s to %s, %s missing",
+ format_timestamp(a, sizeof(a), from),
+ format_timestamp(b, sizeof(b), to),
+ format_timespan(c, sizeof(c), total > to ? total - to : 0, 0));
+
+ (void) journal_file_close(f);
+
+ if (verification_key) {
+ log_info("Toggling bits...");
+
+ assert_se(stat("test.journal", &st) >= 0);
+
+ for (p = 38448*8+0; p < ((uint64_t) st.st_size * 8); p ++) {
+ bit_toggle("test.journal", p);
+
+ log_info("[ %"PRIu64"+%"PRIu64"]", p / 8, p % 8);
+
+ if (raw_verify("test.journal", verification_key) >= 0)
+ log_notice(ANSI_HIGHLIGHT_RED ">>>> %"PRIu64" (bit %"PRIu64") can be toggled without detection." ANSI_NORMAL, p / 8, p % 8);
+
+ bit_toggle("test.journal", p);
+ }
+ }
+
+ log_info("Exiting...");
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+
+ return 0;
+}
diff --git a/src/journal/test-journal.c b/src/journal/test-journal.c
new file mode 100644
index 0000000..f8f08b5
--- /dev/null
+++ b/src/journal/test-journal.c
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "chattr-util.h"
+#include "io-util.h"
+#include "journal-authenticate.h"
+#include "journal-file.h"
+#include "journal-vacuum.h"
+#include "log.h"
+#include "rm-rf.h"
+#include "tests.h"
+
+static bool arg_keep = false;
+
+static void mkdtemp_chdir_chattr(char *path) {
+ assert_se(mkdtemp(path));
+ assert_se(chdir(path) >= 0);
+
+ /* Speed up things a bit on btrfs, ensuring that CoW is turned off for all files created in our
+ * directory during the test run */
+ (void) chattr_path(path, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+}
+
+static void test_non_empty(void) {
+ dual_timestamp ts;
+ JournalFile *f;
+ struct iovec iovec;
+ static const char test[] = "TEST1=1", test2[] = "TEST2=2";
+ Object *o;
+ uint64_t p;
+ sd_id128_t fake_boot_id;
+ char t[] = "/var/tmp/journal-XXXXXX";
+
+ test_setup_logging(LOG_DEBUG);
+
+ mkdtemp_chdir_chattr(t);
+
+ assert_se(journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, 0666, true, (uint64_t) -1, true, NULL, NULL, NULL, NULL, &f) == 0);
+
+ assert_se(dual_timestamp_get(&ts));
+ assert_se(sd_id128_randomize(&fake_boot_id) == 0);
+
+ iovec = IOVEC_MAKE_STRING(test);
+ assert_se(journal_file_append_entry(f, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
+
+ iovec = IOVEC_MAKE_STRING(test2);
+ assert_se(journal_file_append_entry(f, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
+
+ iovec = IOVEC_MAKE_STRING(test);
+ assert_se(journal_file_append_entry(f, &ts, &fake_boot_id, &iovec, 1, NULL, NULL, NULL) == 0);
+
+#if HAVE_GCRYPT
+ journal_file_append_tag(f);
+#endif
+ journal_file_dump(f);
+
+ assert_se(journal_file_next_entry(f, 0, DIRECTION_DOWN, &o, &p) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 1);
+
+ assert_se(journal_file_next_entry(f, p, DIRECTION_DOWN, &o, &p) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 2);
+
+ assert_se(journal_file_next_entry(f, p, DIRECTION_DOWN, &o, &p) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 3);
+ assert_se(sd_id128_equal(o->entry.boot_id, fake_boot_id));
+
+ assert_se(journal_file_next_entry(f, p, DIRECTION_DOWN, &o, &p) == 0);
+
+ assert_se(journal_file_next_entry(f, 0, DIRECTION_DOWN, &o, &p) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 1);
+
+ assert_se(journal_file_find_data_object(f, test, strlen(test), NULL, &p) == 1);
+ assert_se(journal_file_next_entry_for_data(f, NULL, 0, p, DIRECTION_DOWN, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 1);
+
+ assert_se(journal_file_next_entry_for_data(f, NULL, 0, p, DIRECTION_UP, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 3);
+
+ assert_se(journal_file_find_data_object(f, test2, strlen(test2), NULL, &p) == 1);
+ assert_se(journal_file_next_entry_for_data(f, NULL, 0, p, DIRECTION_UP, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 2);
+
+ assert_se(journal_file_next_entry_for_data(f, NULL, 0, p, DIRECTION_DOWN, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 2);
+
+ assert_se(journal_file_find_data_object(f, "quux", 4, NULL, &p) == 0);
+
+ assert_se(journal_file_move_to_entry_by_seqnum(f, 1, DIRECTION_DOWN, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 1);
+
+ assert_se(journal_file_move_to_entry_by_seqnum(f, 3, DIRECTION_DOWN, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 3);
+
+ assert_se(journal_file_move_to_entry_by_seqnum(f, 2, DIRECTION_DOWN, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 2);
+
+ assert_se(journal_file_move_to_entry_by_seqnum(f, 10, DIRECTION_DOWN, &o, NULL) == 0);
+
+ journal_file_rotate(&f, true, (uint64_t) -1, true, NULL);
+ journal_file_rotate(&f, true, (uint64_t) -1, true, NULL);
+
+ (void) journal_file_close(f);
+
+ log_info("Done...");
+
+ if (arg_keep)
+ log_info("Not removing %s", t);
+ else {
+ journal_directory_vacuum(".", 3000000, 0, 0, NULL, true);
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+ }
+
+ puts("------------------------------------------------------------");
+}
+
+static void test_empty(void) {
+ JournalFile *f1, *f2, *f3, *f4;
+ char t[] = "/var/tmp/journal-XXXXXX";
+
+ test_setup_logging(LOG_DEBUG);
+
+ mkdtemp_chdir_chattr(t);
+
+ assert_se(journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, 0666, false, (uint64_t) -1, false, NULL, NULL, NULL, NULL, &f1) == 0);
+
+ assert_se(journal_file_open(-1, "test-compress.journal", O_RDWR|O_CREAT, 0666, true, (uint64_t) -1, false, NULL, NULL, NULL, NULL, &f2) == 0);
+
+ assert_se(journal_file_open(-1, "test-seal.journal", O_RDWR|O_CREAT, 0666, false, (uint64_t) -1, true, NULL, NULL, NULL, NULL, &f3) == 0);
+
+ assert_se(journal_file_open(-1, "test-seal-compress.journal", O_RDWR|O_CREAT, 0666, true, (uint64_t) -1, true, NULL, NULL, NULL, NULL, &f4) == 0);
+
+ journal_file_print_header(f1);
+ puts("");
+ journal_file_print_header(f2);
+ puts("");
+ journal_file_print_header(f3);
+ puts("");
+ journal_file_print_header(f4);
+ puts("");
+
+ log_info("Done...");
+
+ if (arg_keep)
+ log_info("Not removing %s", t);
+ else {
+ journal_directory_vacuum(".", 3000000, 0, 0, NULL, true);
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+ }
+
+ (void) journal_file_close(f1);
+ (void) journal_file_close(f2);
+ (void) journal_file_close(f3);
+ (void) journal_file_close(f4);
+}
+
+#if HAVE_COMPRESSION
+static bool check_compressed(uint64_t compress_threshold, uint64_t data_size) {
+ dual_timestamp ts;
+ JournalFile *f;
+ struct iovec iovec;
+ Object *o;
+ uint64_t p;
+ char t[] = "/var/tmp/journal-XXXXXX";
+ char data[2048] = {0};
+ bool is_compressed;
+ int r;
+
+ assert_se(data_size <= sizeof(data));
+
+ test_setup_logging(LOG_DEBUG);
+
+ mkdtemp_chdir_chattr(t);
+
+ assert_se(journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, 0666, true, compress_threshold, true, NULL, NULL, NULL, NULL, &f) == 0);
+
+ dual_timestamp_get(&ts);
+
+ iovec = IOVEC_MAKE(data, data_size);
+ assert_se(journal_file_append_entry(f, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
+
+#if HAVE_GCRYPT
+ journal_file_append_tag(f);
+#endif
+ journal_file_dump(f);
+
+ /* We have to partially reimplement some of the dump logic, because the normal next_entry does the
+ * decompression for us. */
+ p = le64toh(f->header->header_size);
+ for (;;) {
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o);
+ assert_se(r == 0);
+ if (o->object.type == OBJECT_DATA)
+ break;
+
+ assert_se(p < le64toh(f->header->tail_object_offset));
+ p = p + ALIGN64(le64toh(o->object.size));
+ }
+
+ is_compressed = (o->object.flags & OBJECT_COMPRESSION_MASK) != 0;
+
+ (void) journal_file_close(f);
+
+ log_info("Done...");
+
+ if (arg_keep)
+ log_info("Not removing %s", t);
+ else {
+ journal_directory_vacuum(".", 3000000, 0, 0, NULL, true);
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+ }
+
+ puts("------------------------------------------------------------");
+
+ return is_compressed;
+}
+
+static void test_min_compress_size(void) {
+ /* Note that XZ will actually fail to compress anything under 80 bytes, so you have to choose the limits
+ * carefully */
+
+ /* DEFAULT_MIN_COMPRESS_SIZE is 512 */
+ assert_se(!check_compressed((uint64_t) -1, 255));
+ assert_se(check_compressed((uint64_t) -1, 513));
+
+ /* compress everything */
+ assert_se(check_compressed(0, 96));
+ assert_se(check_compressed(8, 96));
+
+ /* Ensure we don't try to compress less than 8 bytes */
+ assert_se(!check_compressed(0, 7));
+
+ /* check boundary conditions */
+ assert_se(check_compressed(256, 256));
+ assert_se(!check_compressed(256, 255));
+}
+#endif
+
+int main(int argc, char *argv[]) {
+ arg_keep = argc > 1;
+
+ test_setup_logging(LOG_INFO);
+
+ /* journal_file_open requires a valid machine id */
+ if (access("/etc/machine-id", F_OK) != 0)
+ return log_tests_skipped("/etc/machine-id not found");
+
+ test_non_empty();
+ test_empty();
+#if HAVE_COMPRESSION
+ test_min_compress_size();
+#endif
+
+ return 0;
+}
diff --git a/src/journal/test-mmap-cache.c b/src/journal/test-mmap-cache.c
new file mode 100644
index 0000000..d1d2876
--- /dev/null
+++ b/src/journal/test-mmap-cache.c
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "macro.h"
+#include "mmap-cache.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ MMapFileDescriptor *fx;
+ int x, y, z, r;
+ char px[] = "/tmp/testmmapXXXXXXX", py[] = "/tmp/testmmapYXXXXXX", pz[] = "/tmp/testmmapZXXXXXX";
+ MMapCache *m;
+ void *p, *q;
+
+ assert_se(m = mmap_cache_new());
+
+ x = mkostemp_safe(px);
+ assert_se(x >= 0);
+ unlink(px);
+
+ assert_se(fx = mmap_cache_add_fd(m, x));
+
+ y = mkostemp_safe(py);
+ assert_se(y >= 0);
+ unlink(py);
+
+ z = mkostemp_safe(pz);
+ assert_se(z >= 0);
+ unlink(pz);
+
+ r = mmap_cache_get(m, fx, PROT_READ, 0, false, 1, 2, NULL, &p, NULL);
+ assert_se(r >= 0);
+
+ r = mmap_cache_get(m, fx, PROT_READ, 0, false, 2, 2, NULL, &q, NULL);
+ assert_se(r >= 0);
+
+ assert_se((uint8_t*) p + 1 == (uint8_t*) q);
+
+ r = mmap_cache_get(m, fx, PROT_READ, 1, false, 3, 2, NULL, &q, NULL);
+ assert_se(r >= 0);
+
+ assert_se((uint8_t*) p + 2 == (uint8_t*) q);
+
+ r = mmap_cache_get(m, fx, PROT_READ, 0, false, 16ULL*1024ULL*1024ULL, 2, NULL, &p, NULL);
+ assert_se(r >= 0);
+
+ r = mmap_cache_get(m, fx, PROT_READ, 1, false, 16ULL*1024ULL*1024ULL+1, 2, NULL, &q, NULL);
+ assert_se(r >= 0);
+
+ assert_se((uint8_t*) p + 1 == (uint8_t*) q);
+
+ mmap_cache_free_fd(m, fx);
+ mmap_cache_unref(m);
+
+ safe_close(x);
+ safe_close(y);
+ safe_close(z);
+
+ return 0;
+}
diff --git a/src/kernel-install/00-entry-directory.install b/src/kernel-install/00-entry-directory.install
new file mode 100644
index 0000000..21c09fa
--- /dev/null
+++ b/src/kernel-install/00-entry-directory.install
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+# -*- mode: shell-script; indent-tabs-mode: nil; sh-basic-offset: 4; -*-
+# ex: ts=8 sw=4 sts=4 et filetype=sh
+
+COMMAND="$1"
+KERNEL_VERSION="$2"
+ENTRY_DIR_ABS="$3"
+KERNEL_IMAGE="$4"
+INITRD_OPTIONS_START="5"
+
+if ! [[ $KERNEL_INSTALL_MACHINE_ID ]]; then
+ exit 0
+fi
+
+if [[ $COMMAND != add ]]; then
+ exit 0
+fi
+
+# If the boot dir exists (e.g. $ESP/<machine-id>),
+# create the entry directory ($ESP/<machine-id>/<kernel-version>).
+# This is the only function of this plugin.
+MACHINE_ID_DIR="${ENTRY_DIR_ABS%/*}"
+if ! [ -d "$MACHINE_ID_DIR" ]; then
+ exit 0
+fi
+
+if [ "$KERNEL_INSTALL_VERBOSE" -gt 0 ]; then
+ echo "+mkdir -v -p $ENTRY_DIR_ABS"
+ exec mkdir -v -p "$ENTRY_DIR_ABS"
+else
+ exec mkdir -p "$ENTRY_DIR_ABS"
+fi
diff --git a/src/kernel-install/50-depmod.install b/src/kernel-install/50-depmod.install
new file mode 100644
index 0000000..3850eac
--- /dev/null
+++ b/src/kernel-install/50-depmod.install
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+# -*- mode: shell-script; indent-tabs-mode: nil; sh-basic-offset: 4; -*-
+# ex: ts=8 sw=4 sts=4 et filetype=sh
+
+COMMAND="$1"
+KERNEL_VERSION="$2"
+ENTRY_DIR_ABS="$3"
+KERNEL_IMAGE="$4"
+INITRD_OPTIONS_START="5"
+
+[[ $KERNEL_VERSION ]] || exit 1
+
+case "$COMMAND" in
+ add)
+ [[ -d "/lib/modules/${KERNEL_VERSION}/kernel" ]] || exit 0
+ [ "$KERNEL_INSTALL_VERBOSE" -gt 0 ] && \
+ echo "Running depmod -a ${KERNEL_VERSION}"
+ exec depmod -a "${KERNEL_VERSION}"
+ ;;
+ remove)
+ [ "$KERNEL_INSTALL_VERBOSE" -gt 0 ] && \
+ echo "Removing /lib/modules/${KERNEL_VERSION}/modules.dep and associated files"
+ exec rm -f /lib/modules/"${KERNEL_VERSION}"/modules.{alias{,.bin},builtin.bin,dep{,.bin},devname,softdep,symbols{,.bin}}
+ ;;
+ *)
+ exit 0
+esac
diff --git a/src/kernel-install/90-loaderentry.install b/src/kernel-install/90-loaderentry.install
new file mode 100644
index 0000000..d096745
--- /dev/null
+++ b/src/kernel-install/90-loaderentry.install
@@ -0,0 +1,124 @@
+#!/usr/bin/env bash
+# -*- mode: shell-script; indent-tabs-mode: nil; sh-basic-offset: 4; -*-
+# ex: ts=8 sw=4 sts=4 et filetype=sh
+
+COMMAND="$1"
+KERNEL_VERSION="$2"
+ENTRY_DIR_ABS="$3"
+KERNEL_IMAGE="$4"
+INITRD_OPTIONS_START="5"
+
+if ! [[ $KERNEL_INSTALL_MACHINE_ID ]]; then
+ exit 0
+fi
+
+if ! [[ -d "$ENTRY_DIR_ABS" ]]; then
+ exit 0
+fi
+
+MACHINE_ID=$KERNEL_INSTALL_MACHINE_ID
+
+BOOT_ROOT=${ENTRY_DIR_ABS%/$MACHINE_ID/$KERNEL_VERSION}
+BOOT_MNT=$(stat -c %m $BOOT_ROOT)
+ENTRY_DIR=${ENTRY_DIR_ABS#$BOOT_MNT}
+
+if [[ $COMMAND == remove ]]; then
+ rm -f "$BOOT_ROOT/loader/entries/$MACHINE_ID-$KERNEL_VERSION.conf"
+ rm -f "$BOOT_ROOT/loader/entries/$MACHINE_ID-$KERNEL_VERSION+"*".conf"
+ exit 0
+fi
+
+if ! [[ $COMMAND == add ]]; then
+ exit 1
+fi
+
+if ! [[ $KERNEL_IMAGE ]]; then
+ exit 1
+fi
+
+if [[ -f /etc/os-release ]]; then
+ . /etc/os-release
+elif [[ -f /usr/lib/os-release ]]; then
+ . /usr/lib/os-release
+fi
+
+if ! [[ $PRETTY_NAME ]]; then
+ PRETTY_NAME="Linux $KERNEL_VERSION"
+fi
+
+if [[ -f /etc/kernel/cmdline ]]; then
+ read -r -d '' -a BOOT_OPTIONS < /etc/kernel/cmdline
+elif [[ -f /usr/lib/kernel/cmdline ]]; then
+ read -r -d '' -a BOOT_OPTIONS < /usr/lib/kernel/cmdline
+else
+ declare -a BOOT_OPTIONS
+
+ read -r -d '' -a line < /proc/cmdline
+ for i in "${line[@]}"; do
+ [[ "${i#initrd=*}" != "$i" ]] && continue
+ [[ "${i#BOOT_IMAGE=*}" != "$i" ]] && continue
+ BOOT_OPTIONS+=("$i")
+ done
+fi
+
+if [[ -f /etc/kernel/tries ]]; then
+ read -r TRIES </etc/kernel/tries
+ if ! [[ "$TRIES" =~ ^[0-9]+$ ]] ; then
+ echo "/etc/kernel/tries does not contain an integer." >&2
+ exit 1
+ fi
+ LOADER_ENTRY="$BOOT_ROOT/loader/entries/$MACHINE_ID-$KERNEL_VERSION+$TRIES.conf"
+else
+ LOADER_ENTRY="$BOOT_ROOT/loader/entries/$MACHINE_ID-$KERNEL_VERSION.conf"
+fi
+
+cp "$KERNEL_IMAGE" "$ENTRY_DIR_ABS/linux" &&
+ chown root:root "$ENTRY_DIR_ABS/linux" &&
+ chmod 0644 "$ENTRY_DIR_ABS/linux" || {
+ echo "Could not copy '$KERNEL_IMAGE to '$ENTRY_DIR_ABS/linux'." >&2
+ exit 1
+}
+
+INITRD_OPTIONS=( "${@:${INITRD_OPTIONS_START}}" )
+
+for initrd in "${INITRD_OPTIONS[@]}"; do
+ if [[ -f "${initrd}" ]]; then
+ initrd_basename="$(basename ${initrd})"
+ [ "$KERNEL_INSTALL_VERBOSE" -gt 0 ] && \
+ echo "Installing $ENTRY_DIR_ABS/${initrd_basename}"
+ cp "${initrd}" "$ENTRY_DIR_ABS/${initrd_basename}" &&
+ chown root:root "$ENTRY_DIR_ABS/${initrd_basename}" &&
+ chmod 0644 "$ENTRY_DIR_ABS/${initrd_basename}" || {
+ echo "Could not copy '${initrd}' to '$ENTRY_DIR_ABS/${initrd_basename}'." >&2
+ exit 1
+ }
+ fi
+done
+
+# If no initrd option is supplied, fall back to "initrd" which is
+# the name used by dracut when generating it in its kernel-install hook
+[[ ${#INITRD_OPTIONS[@]} == 0 ]] && INITRD_OPTIONS=( initrd )
+
+mkdir -p "${LOADER_ENTRY%/*}" || {
+ echo "Could not create loader entry directory '${LOADER_ENTRY%/*}'." >&2
+ exit 1
+}
+
+[ "$KERNEL_INSTALL_VERBOSE" -gt 0 ] && \
+ echo "Creating $LOADER_ENTRY"
+{
+ echo "title $PRETTY_NAME"
+ echo "version $KERNEL_VERSION"
+ echo "machine-id $MACHINE_ID"
+ echo "options ${BOOT_OPTIONS[*]}"
+ echo "linux $ENTRY_DIR/linux"
+ for initrd in "${INITRD_OPTIONS[@]}"; do
+ [[ -f $ENTRY_DIR_ABS/$(basename ${initrd}) ]] && \
+ echo "initrd $ENTRY_DIR/$(basename ${initrd})"
+ done
+ :
+} > "$LOADER_ENTRY" || {
+ echo "Could not create loader entry '$LOADER_ENTRY'." >&2
+ exit 1
+}
+exit 0
diff --git a/src/kernel-install/kernel-install b/src/kernel-install/kernel-install
new file mode 100755
index 0000000..e7457e9
--- /dev/null
+++ b/src/kernel-install/kernel-install
@@ -0,0 +1,181 @@
+#!/usr/bin/env bash
+# -*- mode: shell-script; indent-tabs-mode: nil; sh-basic-offset: 4; -*-
+# ex: ts=8 sw=4 sts=4 et filetype=sh
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# systemd is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with systemd; If not, see <http://www.gnu.org/licenses/>.
+
+SKIP_REMAINING=77
+
+usage()
+{
+ echo "Usage:"
+ echo " $0 [OPTIONS...] add KERNEL-VERSION KERNEL-IMAGE [INITRD-FILE ...]"
+ echo " $0 [OPTIONS...] remove KERNEL-VERSION"
+ echo "Options:"
+ echo " -h,--help Print this help"
+ echo " -v,--verbose Increase verbosity"
+}
+
+dropindirs_sort()
+{
+ local suffix=$1; shift
+ local -a files
+ local f d i
+
+ readarray -t files <<<"$(
+ for d in "$@"; do
+ for i in "$d/"*"$suffix"; do
+ if [[ -e "$i" ]]; then
+ echo "${i##*/}"
+ fi
+ done
+ done | sort -Vu
+ )"
+
+ for f in "${files[@]}"; do
+ for d in "$@"; do
+ if [[ -e "$d/$f" ]]; then
+ echo "$d/$f"
+ continue 2
+ fi
+ done
+ done
+}
+
+export LC_COLLATE=C
+
+for i in "$@"; do
+ if [ "$i" == "--help" -o "$i" == "-h" ]; then
+ usage
+ exit 0
+ fi
+done
+
+KERNEL_INSTALL_VERBOSE=0
+if [ "$1" == "--verbose" -o "$1" == "-v" ]; then
+ shift
+ KERNEL_INSTALL_VERBOSE=1
+fi
+export KERNEL_INSTALL_VERBOSE
+
+if [[ "${0##*/}" == 'installkernel' ]]; then
+ COMMAND='add'
+ # make install doesn't pass any parameter wrt initrd handling
+ INITRD_OPTIONS=()
+else
+ COMMAND="$1"
+ shift
+ INITRD_OPTIONS=( "${@:3}" )
+fi
+
+KERNEL_VERSION="$1"
+KERNEL_IMAGE="$2"
+
+# Reuse directory created without a machine ID present if it exists.
+if [[ -d /efi/Default ]] || [[ -d /boot/Default ]] || [[ -d /boot/efi/Default ]]; then
+ MACHINE_ID="Default"
+elif [[ -f /etc/machine-id ]]; then
+ read MACHINE_ID < /etc/machine-id
+else
+ MACHINE_ID="Default"
+fi
+
+if [[ ! $COMMAND ]] || [[ ! $KERNEL_VERSION ]]; then
+ echo "Not enough arguments" >&2
+ exit 1
+fi
+
+if [[ -d /efi/loader/entries ]] || [[ -d /efi/$MACHINE_ID ]]; then
+ ENTRY_DIR_ABS="/efi/$MACHINE_ID/$KERNEL_VERSION"
+elif [[ -d /boot/loader/entries ]] || [[ -d /boot/$MACHINE_ID ]]; then
+ ENTRY_DIR_ABS="/boot/$MACHINE_ID/$KERNEL_VERSION"
+elif [[ -d /boot/efi/loader/entries ]] || [[ -d /boot/efi/$MACHINE_ID ]]; then
+ ENTRY_DIR_ABS="/boot/efi/$MACHINE_ID/$KERNEL_VERSION"
+elif mountpoint -q /efi; then
+ ENTRY_DIR_ABS="/efi/$MACHINE_ID/$KERNEL_VERSION"
+elif mountpoint -q /boot/efi; then
+ ENTRY_DIR_ABS="/boot/efi/$MACHINE_ID/$KERNEL_VERSION"
+else
+ ENTRY_DIR_ABS="/boot/$MACHINE_ID/$KERNEL_VERSION"
+fi
+
+export KERNEL_INSTALL_MACHINE_ID=$MACHINE_ID
+
+ret=0
+
+readarray -t PLUGINS <<<"$(
+ dropindirs_sort ".install" \
+ "/etc/kernel/install.d" \
+ "/usr/lib/kernel/install.d"
+)"
+
+case $COMMAND in
+ add)
+ if [[ ! "$KERNEL_IMAGE" ]]; then
+ echo "Command 'add' requires an argument" >&2
+ exit 1
+ fi
+
+ if [[ ! -f "$KERNEL_IMAGE" ]]; then
+ echo "Kernel image argument ${KERNEL_IMAGE} not a file" >&2
+ exit 1
+ fi
+
+ for f in "${PLUGINS[@]}"; do
+ if [[ -x $f ]]; then
+ [ "$KERNEL_INSTALL_VERBOSE" -gt 0 ] && \
+ echo "+$f add $KERNEL_VERSION $ENTRY_DIR_ABS $KERNEL_IMAGE ${INITRD_OPTIONS[@]}"
+ "$f" add "$KERNEL_VERSION" "$ENTRY_DIR_ABS" "$KERNEL_IMAGE" "${INITRD_OPTIONS[@]}"
+ x=$?
+ if [[ $x == $SKIP_REMAINING ]]; then
+ ret=0
+ break
+ fi
+ ((ret+=$x))
+ fi
+ done
+ ;;
+
+ remove)
+ for f in "${PLUGINS[@]}"; do
+ if [[ -x $f ]]; then
+ [ "$KERNEL_INSTALL_VERBOSE" -gt 0 ] && \
+ echo "+$f remove $KERNEL_VERSION $ENTRY_DIR_ABS"
+ "$f" remove "$KERNEL_VERSION" "$ENTRY_DIR_ABS"
+ x=$?
+ if [[ $x == $SKIP_REMAINING ]]; then
+ ret=0
+ break
+ fi
+ ((ret+=$x))
+ fi
+ done
+
+ [ "$KERNEL_INSTALL_VERBOSE" -gt 0 ] && \
+ echo "Removing $ENTRY_DIR_ABS"
+
+ rm -rf "$ENTRY_DIR_ABS"
+ ((ret+=$?))
+ ;;
+
+ *)
+ echo "Unknown command '$COMMAND'" >&2
+ exit 1
+ ;;
+esac
+
+exit $ret
diff --git a/src/kernel-install/meson.build b/src/kernel-install/meson.build
new file mode 100644
index 0000000..4117188
--- /dev/null
+++ b/src/kernel-install/meson.build
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+want_kernel_install = get_option('kernel-install')
+
+if want_kernel_install
+ install_data('kernel-install',
+ install_mode : 'rwxr-xr-x',
+ install_dir : bindir)
+
+ install_data('00-entry-directory.install',
+ '50-depmod.install',
+ '90-loaderentry.install',
+ install_mode : 'rwxr-xr-x',
+ install_dir : kernelinstalldir)
+
+ if install_sysconfdir
+ meson.add_install_script('sh', '-c',
+ mkdir_p.format(join_paths(sysconfdir, 'kernel/install.d')))
+ endif
+
+endif
diff --git a/src/libsystemd-network/arp-util.c b/src/libsystemd-network/arp-util.c
new file mode 100644
index 0000000..327fb2f
--- /dev/null
+++ b/src/libsystemd-network/arp-util.c
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2014 Axis Communications AB. All rights reserved.
+***/
+
+#include <arpa/inet.h>
+#include <linux/filter.h>
+#include <netinet/if_ether.h>
+
+#include "arp-util.h"
+#include "fd-util.h"
+#include "unaligned.h"
+#include "util.h"
+
+int arp_network_bind_raw_socket(int ifindex, be32_t address, const struct ether_addr *eth_mac) {
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_W + BPF_LEN, 0), /* A <- packet length */
+ BPF_JUMP(BPF_JMP + BPF_JGE + BPF_K, sizeof(struct ether_arp), 1, 0), /* packet >= arp packet ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_hrd)), /* A <- header */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARPHRD_ETHER, 1, 0), /* header == ethernet ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_pro)), /* A <- protocol */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_IP, 1, 0), /* protocol == IP ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_hln)), /* A <- hardware address length */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, sizeof(struct ether_addr), 1, 0), /* length == sizeof(ether_addr)? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_pln)), /* A <- protocol address length */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, sizeof(struct in_addr), 1, 0), /* length == sizeof(in_addr) ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_op)), /* A <- operation */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARPOP_REQUEST, 2, 0), /* protocol == request ? */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARPOP_REPLY, 1, 0), /* protocol == reply ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ /* Sender Hardware Address must be different from our own */
+ BPF_STMT(BPF_LD + BPF_IMM, unaligned_read_be32(&eth_mac->ether_addr_octet[0])),/* A <- 4 bytes of client's MAC */
+ BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
+ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct ether_arp, arp_sha)), /* A <- 4 bytes of SHA */
+ BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* A xor X */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 6), /* A == 0 ? */
+ BPF_STMT(BPF_LD + BPF_IMM, unaligned_read_be16(&eth_mac->ether_addr_octet[4])),/* A <- remainder of client's MAC */
+ BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, arp_sha) + 4), /* A <- remainder of SHA */
+ BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* A xor X */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 1), /* A == 0 ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ /* Sender Protocol Address or Target Protocol Address must be equal to the one we care about */
+ BPF_STMT(BPF_LD + BPF_IMM, htobe32(address)), /* A <- clients IP */
+ BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
+ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct ether_arp, arp_spa)), /* A <- SPA */
+ BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* X xor A */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 1), /* A == 0 ? */
+ BPF_STMT(BPF_RET + BPF_K, 65535), /* return all */
+ BPF_STMT(BPF_LD + BPF_IMM, htobe32(address)), /* A <- clients IP */
+ BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
+ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct ether_arp, arp_tpa)), /* A <- TPA */
+ BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* X xor A */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 1), /* A == 0 ? */
+ BPF_STMT(BPF_RET + BPF_K, 65535), /* return all */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ };
+ struct sock_fprog fprog = {
+ .len = ELEMENTSOF(filter),
+ .filter = (struct sock_filter*) filter
+ };
+ union sockaddr_union link = {
+ .ll.sll_family = AF_PACKET,
+ .ll.sll_protocol = htobe16(ETH_P_ARP),
+ .ll.sll_ifindex = ifindex,
+ .ll.sll_halen = ETH_ALEN,
+ .ll.sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ };
+ _cleanup_close_ int s = -1;
+ int r;
+
+ assert(ifindex > 0);
+
+ s = socket(AF_PACKET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
+ if (s < 0)
+ return -errno;
+
+ r = setsockopt(s, SOL_SOCKET, SO_ATTACH_FILTER, &fprog, sizeof(fprog));
+ if (r < 0)
+ return -errno;
+
+ r = bind(s, &link.sa, sizeof(link.ll));
+ if (r < 0)
+ return -errno;
+
+ return TAKE_FD(s);
+}
+
+static int arp_send_packet(int fd, int ifindex,
+ be32_t pa, const struct ether_addr *ha,
+ bool announce) {
+ union sockaddr_union link = {
+ .ll.sll_family = AF_PACKET,
+ .ll.sll_protocol = htobe16(ETH_P_ARP),
+ .ll.sll_ifindex = ifindex,
+ .ll.sll_halen = ETH_ALEN,
+ .ll.sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ };
+ struct ether_arp arp = {
+ .ea_hdr.ar_hrd = htobe16(ARPHRD_ETHER), /* HTYPE */
+ .ea_hdr.ar_pro = htobe16(ETHERTYPE_IP), /* PTYPE */
+ .ea_hdr.ar_hln = ETH_ALEN, /* HLEN */
+ .ea_hdr.ar_pln = sizeof(be32_t), /* PLEN */
+ .ea_hdr.ar_op = htobe16(ARPOP_REQUEST), /* REQUEST */
+ };
+ int r;
+
+ assert(fd >= 0);
+ assert(pa != 0);
+ assert(ha);
+
+ memcpy(&arp.arp_sha, ha, ETH_ALEN);
+ memcpy(&arp.arp_tpa, &pa, sizeof(pa));
+
+ if (announce)
+ memcpy(&arp.arp_spa, &pa, sizeof(pa));
+
+ r = sendto(fd, &arp, sizeof(struct ether_arp), 0, &link.sa, sizeof(link.ll));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int arp_send_probe(int fd, int ifindex,
+ be32_t pa, const struct ether_addr *ha) {
+ return arp_send_packet(fd, ifindex, pa, ha, false);
+}
+
+int arp_send_announcement(int fd, int ifindex,
+ be32_t pa, const struct ether_addr *ha) {
+ return arp_send_packet(fd, ifindex, pa, ha, true);
+}
diff --git a/src/libsystemd-network/arp-util.h b/src/libsystemd-network/arp-util.h
new file mode 100644
index 0000000..2dac8cf
--- /dev/null
+++ b/src/libsystemd-network/arp-util.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2014 Axis Communications AB. All rights reserved.
+***/
+
+#include <net/ethernet.h>
+
+#include "socket-util.h"
+#include "sparse-endian.h"
+
+int arp_network_bind_raw_socket(int index, be32_t address, const struct ether_addr *eth_mac);
+
+int arp_send_probe(int fd, int ifindex,
+ be32_t pa, const struct ether_addr *ha);
+int arp_send_announcement(int fd, int ifindex,
+ be32_t pa, const struct ether_addr *ha);
diff --git a/src/libsystemd-network/dhcp-client-internal.h b/src/libsystemd-network/dhcp-client-internal.h
new file mode 100644
index 0000000..a6f3752
--- /dev/null
+++ b/src/libsystemd-network/dhcp-client-internal.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+extern const struct hash_ops dhcp_option_hash_ops;
diff --git a/src/libsystemd-network/dhcp-identifier.c b/src/libsystemd-network/dhcp-identifier.c
new file mode 100644
index 0000000..ea9c77a
--- /dev/null
+++ b/src/libsystemd-network/dhcp-identifier.c
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/if_infiniband.h>
+#include <net/if_arp.h>
+
+#include "sd-device.h"
+#include "sd-id128.h"
+
+#include "dhcp-identifier.h"
+#include "dhcp6-protocol.h"
+#include "network-internal.h"
+#include "siphash24.h"
+#include "sparse-endian.h"
+#include "stdio-util.h"
+#include "udev-util.h"
+#include "virt.h"
+
+#define HASH_KEY SD_ID128_MAKE(80,11,8c,c2,fe,4a,03,ee,3e,d6,0c,6f,36,39,14,09)
+#define APPLICATION_ID SD_ID128_MAKE(a5,0a,d1,12,bf,60,45,77,a2,fb,74,1a,b1,95,5b,03)
+#define USEC_2000 ((usec_t) 946684800000000) /* 2000-01-01 00:00:00 UTC */
+
+int dhcp_validate_duid_len(uint16_t duid_type, size_t duid_len, bool strict) {
+ struct duid d;
+
+ assert_cc(sizeof(d.raw) >= MAX_DUID_LEN);
+ if (duid_len > MAX_DUID_LEN)
+ return -EINVAL;
+
+ if (!strict)
+ /* Strict validation is not requested. We only ensure that the
+ * DUID is not too long. */
+ return 0;
+
+ switch (duid_type) {
+ case DUID_TYPE_LLT:
+ if (duid_len <= sizeof(d.llt))
+ return -EINVAL;
+ break;
+ case DUID_TYPE_EN:
+ if (duid_len != sizeof(d.en))
+ return -EINVAL;
+ break;
+ case DUID_TYPE_LL:
+ if (duid_len <= sizeof(d.ll))
+ return -EINVAL;
+ break;
+ case DUID_TYPE_UUID:
+ if (duid_len != sizeof(d.uuid))
+ return -EINVAL;
+ break;
+ default:
+ /* accept unknown type in order to be forward compatible */
+ break;
+ }
+ return 0;
+}
+
+int dhcp_identifier_set_duid_llt(struct duid *duid, usec_t t, const uint8_t *addr, size_t addr_len, uint16_t arp_type, size_t *len) {
+ uint16_t time_from_2000y;
+
+ assert(duid);
+ assert(len);
+ assert(addr);
+
+ if (arp_type == ARPHRD_ETHER)
+ assert_return(addr_len == ETH_ALEN, -EINVAL);
+ else if (arp_type == ARPHRD_INFINIBAND)
+ assert_return(addr_len == INFINIBAND_ALEN, -EINVAL);
+ else
+ return -EINVAL;
+
+ if (t < USEC_2000)
+ time_from_2000y = 0;
+ else
+ time_from_2000y = (uint16_t) (((t - USEC_2000) / USEC_PER_SEC) & 0xffffffff);
+
+ unaligned_write_be16(&duid->type, DUID_TYPE_LLT);
+ unaligned_write_be16(&duid->llt.htype, arp_type);
+ unaligned_write_be32(&duid->llt.time, time_from_2000y);
+ memcpy(duid->llt.haddr, addr, addr_len);
+
+ *len = sizeof(duid->type) + sizeof(duid->llt.htype) + sizeof(duid->llt.time) + addr_len;
+
+ return 0;
+}
+
+int dhcp_identifier_set_duid_ll(struct duid *duid, const uint8_t *addr, size_t addr_len, uint16_t arp_type, size_t *len) {
+ assert(duid);
+ assert(len);
+ assert(addr);
+
+ if (arp_type == ARPHRD_ETHER)
+ assert_return(addr_len == ETH_ALEN, -EINVAL);
+ else if (arp_type == ARPHRD_INFINIBAND)
+ assert_return(addr_len == INFINIBAND_ALEN, -EINVAL);
+ else
+ return -EINVAL;
+
+ unaligned_write_be16(&duid->type, DUID_TYPE_LL);
+ unaligned_write_be16(&duid->ll.htype, arp_type);
+ memcpy(duid->ll.haddr, addr, addr_len);
+
+ *len = sizeof(duid->type) + sizeof(duid->ll.htype) + addr_len;
+
+ return 0;
+}
+
+int dhcp_identifier_set_duid_en(struct duid *duid, size_t *len) {
+ sd_id128_t machine_id;
+ uint64_t hash;
+ int r;
+
+ assert(duid);
+ assert(len);
+
+ r = sd_id128_get_machine(&machine_id);
+ if (r < 0) {
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ machine_id = SD_ID128_MAKE(01, 02, 03, 04, 05, 06, 07, 08, 09, 0a, 0b, 0c, 0d, 0e, 0f, 10);
+#else
+ return r;
+#endif
+ }
+
+ unaligned_write_be16(&duid->type, DUID_TYPE_EN);
+ unaligned_write_be32(&duid->en.pen, SYSTEMD_PEN);
+
+ *len = sizeof(duid->type) + sizeof(duid->en);
+
+ /* a bit of snake-oil perhaps, but no need to expose the machine-id
+ * directly; duid->en.id might not be aligned, so we need to copy */
+ hash = htole64(siphash24(&machine_id, sizeof(machine_id), HASH_KEY.bytes));
+ memcpy(duid->en.id, &hash, sizeof(duid->en.id));
+
+ return 0;
+}
+
+int dhcp_identifier_set_duid_uuid(struct duid *duid, size_t *len) {
+ sd_id128_t machine_id;
+ int r;
+
+ assert(duid);
+ assert(len);
+
+ r = sd_id128_get_machine_app_specific(APPLICATION_ID, &machine_id);
+ if (r < 0)
+ return r;
+
+ unaligned_write_be16(&duid->type, DUID_TYPE_UUID);
+ memcpy(&duid->raw.data, &machine_id, sizeof(machine_id));
+
+ *len = sizeof(duid->type) + sizeof(machine_id);
+
+ return 0;
+}
+
+int dhcp_identifier_set_iaid(
+ int ifindex,
+ const uint8_t *mac,
+ size_t mac_len,
+ bool legacy_unstable_byteorder,
+ void *_id) {
+ /* name is a pointer to memory in the sd_device struct, so must
+ * have the same scope */
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ const char *name = NULL;
+ uint64_t id;
+ uint32_t id32;
+
+ if (detect_container() <= 0) {
+ /* not in a container, udev will be around */
+ char ifindex_str[1 + DECIMAL_STR_MAX(int)];
+ int r;
+
+ xsprintf(ifindex_str, "n%d", ifindex);
+ if (sd_device_new_from_device_id(&device, ifindex_str) >= 0) {
+ r = sd_device_get_is_initialized(device);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ /* not yet ready */
+ return -EBUSY;
+
+ r = device_is_renaming(device);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ /* device is under renaming */
+ return -EBUSY;
+
+ name = net_get_name_persistent(device);
+ }
+ }
+
+ if (name)
+ id = siphash24(name, strlen(name), HASH_KEY.bytes);
+ else
+ /* fall back to MAC address if no predictable name available */
+ id = siphash24(mac, mac_len, HASH_KEY.bytes);
+
+ id32 = (id & 0xffffffff) ^ (id >> 32);
+
+ if (legacy_unstable_byteorder)
+ /* for historical reasons (a bug), the bits were swapped and thus
+ * the result was endianness dependent. Preserve that behavior. */
+ id32 = __bswap_32(id32);
+ else
+ /* the fixed behavior returns a stable byte order. Since LE is expected
+ * to be more common, swap the bytes on LE to give the same as legacy
+ * behavior. */
+ id32 = be32toh(id32);
+
+ unaligned_write_ne32(_id, id32);
+ return 0;
+}
diff --git a/src/libsystemd-network/dhcp-identifier.h b/src/libsystemd-network/dhcp-identifier.h
new file mode 100644
index 0000000..e9f2ea7
--- /dev/null
+++ b/src/libsystemd-network/dhcp-identifier.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-id128.h"
+
+#include "macro.h"
+#include "sparse-endian.h"
+#include "time-util.h"
+#include "unaligned.h"
+
+#define SYSTEMD_PEN 43793
+
+typedef enum DUIDType {
+ DUID_TYPE_LLT = 1,
+ DUID_TYPE_EN = 2,
+ DUID_TYPE_LL = 3,
+ DUID_TYPE_UUID = 4,
+ _DUID_TYPE_MAX,
+ _DUID_TYPE_INVALID = -1,
+} DUIDType;
+
+/* RFC 3315 section 9.1:
+ * A DUID can be no more than 128 octets long (not including the type code).
+ */
+#define MAX_DUID_LEN 128
+
+/* https://tools.ietf.org/html/rfc3315#section-9.1 */
+struct duid {
+ be16_t type;
+ union {
+ struct {
+ /* DUID_TYPE_LLT */
+ be16_t htype;
+ be32_t time;
+ uint8_t haddr[0];
+ } _packed_ llt;
+ struct {
+ /* DUID_TYPE_EN */
+ be32_t pen;
+ uint8_t id[8];
+ } _packed_ en;
+ struct {
+ /* DUID_TYPE_LL */
+ be16_t htype;
+ uint8_t haddr[0];
+ } _packed_ ll;
+ struct {
+ /* DUID_TYPE_UUID */
+ sd_id128_t uuid;
+ } _packed_ uuid;
+ struct {
+ uint8_t data[MAX_DUID_LEN];
+ } _packed_ raw;
+ };
+} _packed_;
+
+int dhcp_validate_duid_len(uint16_t duid_type, size_t duid_len, bool strict);
+int dhcp_identifier_set_duid_llt(struct duid *duid, usec_t t, const uint8_t *addr, size_t addr_len, uint16_t arp_type, size_t *len);
+int dhcp_identifier_set_duid_ll(struct duid *duid, const uint8_t *addr, size_t addr_len, uint16_t arp_type, size_t *len);
+int dhcp_identifier_set_duid_en(struct duid *duid, size_t *len);
+int dhcp_identifier_set_duid_uuid(struct duid *duid, size_t *len);
+int dhcp_identifier_set_iaid(int ifindex, const uint8_t *mac, size_t mac_len, bool legacy_unstable_byteorder, void *_id);
diff --git a/src/libsystemd-network/dhcp-internal.h b/src/libsystemd-network/dhcp-internal.h
new file mode 100644
index 0000000..40e6b1f
--- /dev/null
+++ b/src/libsystemd-network/dhcp-internal.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <linux/if_packet.h>
+#include <net/ethernet.h>
+#include <stdint.h>
+
+#include "sd-dhcp-client.h"
+
+#include "dhcp-protocol.h"
+#include "socket-util.h"
+
+typedef struct sd_dhcp_option {
+ unsigned n_ref;
+
+ uint8_t option;
+ void *data;
+ size_t length;
+} sd_dhcp_option;
+
+typedef struct DHCPServerData {
+ struct in_addr *addr;
+ size_t size;
+} DHCPServerData;
+
+extern const struct hash_ops dhcp_option_hash_ops;
+
+int dhcp_network_bind_raw_socket(int ifindex, union sockaddr_union *link, uint32_t xid,
+ const uint8_t *mac_addr, size_t mac_addr_len,
+ const uint8_t *bcast_addr, size_t bcast_addr_len,
+ uint16_t arp_type, uint16_t port);
+int dhcp_network_bind_udp_socket(int ifindex, be32_t address, uint16_t port, int ip_service_type);
+int dhcp_network_send_raw_socket(int s, const union sockaddr_union *link,
+ const void *packet, size_t len);
+int dhcp_network_send_udp_socket(int s, be32_t address, uint16_t port,
+ const void *packet, size_t len);
+
+int dhcp_option_append(DHCPMessage *message, size_t size, size_t *offset, uint8_t overload,
+ uint8_t code, size_t optlen, const void *optval);
+
+typedef int (*dhcp_option_callback_t)(uint8_t code, uint8_t len,
+ const void *option, void *userdata);
+
+int dhcp_option_parse(DHCPMessage *message, size_t len, dhcp_option_callback_t cb, void *userdata, char **error_message);
+
+int dhcp_message_init(DHCPMessage *message, uint8_t op, uint32_t xid,
+ uint8_t type, uint16_t arp_type, size_t optlen,
+ size_t *optoffset);
+
+uint16_t dhcp_packet_checksum(uint8_t *buf, size_t len);
+
+void dhcp_packet_append_ip_headers(DHCPPacket *packet, be32_t source_addr,
+ uint16_t source, be32_t destination_addr,
+ uint16_t destination, uint16_t len, int ip_service_type);
+
+int dhcp_packet_verify_headers(DHCPPacket *packet, size_t len, bool checksum, uint16_t port);
+
+/* If we are invoking callbacks of a dhcp-client, ensure unreffing the
+ * client from the callback doesn't destroy the object we are working
+ * on */
+#define DHCP_CLIENT_DONT_DESTROY(client) \
+ _cleanup_(sd_dhcp_client_unrefp) _unused_ sd_dhcp_client *_dont_destroy_##client = sd_dhcp_client_ref(client)
+
+#define log_dhcp_client_errno(client, error, fmt, ...) log_internal(LOG_DEBUG, error, PROJECT_FILE, __LINE__, __func__, "DHCP CLIENT (0x%x): " fmt, client->xid, ##__VA_ARGS__)
+#define log_dhcp_client(client, fmt, ...) log_dhcp_client_errno(client, 0, fmt, ##__VA_ARGS__)
diff --git a/src/libsystemd-network/dhcp-lease-internal.h b/src/libsystemd-network/dhcp-lease-internal.h
new file mode 100644
index 0000000..49392d1
--- /dev/null
+++ b/src/libsystemd-network/dhcp-lease-internal.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include "sd-dhcp-client.h"
+
+#include "dhcp-internal.h"
+#include "dhcp-protocol.h"
+#include "list.h"
+#include "util.h"
+
+struct sd_dhcp_route {
+ struct in_addr dst_addr;
+ struct in_addr gw_addr;
+ unsigned char dst_prefixlen;
+
+ uint8_t option;
+};
+
+struct sd_dhcp_raw_option {
+ LIST_FIELDS(struct sd_dhcp_raw_option, options);
+
+ uint8_t tag;
+ uint8_t length;
+ void *data;
+};
+
+struct sd_dhcp_lease {
+ unsigned n_ref;
+
+ /* each 0 if unset */
+ uint32_t t1;
+ uint32_t t2;
+ uint32_t lifetime;
+
+ /* each 0 if unset */
+ be32_t address;
+ be32_t server_address;
+ be32_t next_server;
+
+ bool have_subnet_mask;
+ be32_t subnet_mask;
+
+ bool have_broadcast;
+ be32_t broadcast;
+
+ struct in_addr *router;
+ size_t router_size;
+
+ DHCPServerData servers[_SD_DHCP_LEASE_SERVER_TYPE_MAX];
+
+ struct sd_dhcp_route *static_route;
+ size_t static_route_size, static_route_allocated;
+
+ uint16_t mtu; /* 0 if unset */
+
+ char *domainname;
+ char **search_domains;
+ char *hostname;
+ char *root_path;
+
+ void *client_id;
+ size_t client_id_len;
+
+ void *vendor_specific;
+ size_t vendor_specific_len;
+
+ char *timezone;
+
+ LIST_HEAD(struct sd_dhcp_raw_option, private_options);
+};
+
+int dhcp_lease_new(sd_dhcp_lease **ret);
+
+int dhcp_lease_parse_options(uint8_t code, uint8_t len, const void *option, void *userdata);
+int dhcp_lease_parse_search_domains(const uint8_t *option, size_t len, char ***domains);
+int dhcp_lease_insert_private_option(sd_dhcp_lease *lease, uint8_t tag, const void *data, uint8_t len);
+
+int dhcp_lease_set_default_subnet_mask(sd_dhcp_lease *lease);
+
+int dhcp_lease_set_client_id(sd_dhcp_lease *lease, const void *client_id, size_t client_id_len);
diff --git a/src/libsystemd-network/dhcp-network.c b/src/libsystemd-network/dhcp-network.c
new file mode 100644
index 0000000..656482b
--- /dev/null
+++ b/src/libsystemd-network/dhcp-network.c
@@ -0,0 +1,244 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <stdio.h>
+#include <string.h>
+#include <linux/filter.h>
+#include <linux/if_infiniband.h>
+#include <linux/if_packet.h>
+
+#include "dhcp-internal.h"
+#include "fd-util.h"
+#include "socket-util.h"
+#include "unaligned.h"
+
+static int _bind_raw_socket(int ifindex, union sockaddr_union *link,
+ uint32_t xid,
+ const uint8_t *bcast_addr,
+ size_t bcast_addr_len,
+ const struct ether_addr *eth_mac,
+ uint16_t arp_type, uint8_t dhcp_hlen,
+ uint16_t port) {
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_W + BPF_LEN, 0), /* A <- packet length */
+ BPF_JUMP(BPF_JMP + BPF_JGE + BPF_K, sizeof(DHCPPacket), 1, 0), /* packet >= DHCPPacket ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(DHCPPacket, ip.protocol)), /* A <- IP protocol */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 1, 0), /* IP protocol == UDP ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(DHCPPacket, ip.frag_off)), /* A <- Flags */
+ BPF_STMT(BPF_ALU + BPF_AND + BPF_K, 0x20), /* A <- A & 0x20 (More Fragments bit) */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 1, 0), /* A == 0 ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(DHCPPacket, ip.frag_off)), /* A <- Flags + Fragment offset */
+ BPF_STMT(BPF_ALU + BPF_AND + BPF_K, 0x1fff), /* A <- A & 0x1fff (Fragment offset) */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 1, 0), /* A == 0 ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(DHCPPacket, udp.dest)), /* A <- UDP destination port */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, port, 1, 0), /* UDP destination port == DHCP client port ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(DHCPPacket, dhcp.op)), /* A <- DHCP op */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, BOOTREPLY, 1, 0), /* op == BOOTREPLY ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(DHCPPacket, dhcp.htype)), /* A <- DHCP header type */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, arp_type, 1, 0), /* header type == arp_type ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(DHCPPacket, dhcp.xid)), /* A <- client identifier */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, xid, 1, 0), /* client identifier == xid ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(DHCPPacket, dhcp.hlen)), /* A <- MAC address length */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, dhcp_hlen, 1, 0), /* address length == dhcp_hlen ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+
+ /* We only support MAC address length to be either 0 or 6 (ETH_ALEN). Optionally
+ * compare chaddr for ETH_ALEN bytes. */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETH_ALEN, 0, 12), /* A (the MAC address length) == ETH_ALEN ? */
+ BPF_STMT(BPF_LD + BPF_IMM, unaligned_read_be32(&eth_mac->ether_addr_octet[0])), /* A <- 4 bytes of client's MAC */
+ BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
+ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(DHCPPacket, dhcp.chaddr)), /* A <- 4 bytes of MAC from dhcp.chaddr */
+ BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* A xor X */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 1, 0), /* A == 0 ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_LD + BPF_IMM, unaligned_read_be16(&eth_mac->ether_addr_octet[4])), /* A <- remainder of client's MAC */
+ BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(DHCPPacket, dhcp.chaddr) + 4), /* A <- remainder of MAC from dhcp.chaddr */
+ BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* A xor X */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 1, 0), /* A == 0 ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+
+ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(DHCPPacket, dhcp.magic)), /* A <- DHCP magic cookie */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DHCP_MAGIC_COOKIE, 1, 0), /* cookie == DHCP magic cookie ? */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
+ BPF_STMT(BPF_RET + BPF_K, 65535), /* return all */
+ };
+ struct sock_fprog fprog = {
+ .len = ELEMENTSOF(filter),
+ .filter = filter
+ };
+ _cleanup_close_ int s = -1;
+ int r;
+
+ assert(ifindex > 0);
+ assert(link);
+
+ s = socket(AF_PACKET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
+ if (s < 0)
+ return -errno;
+
+ r = setsockopt_int(s, SOL_PACKET, PACKET_AUXDATA, true);
+ if (r < 0)
+ return r;
+
+ r = setsockopt(s, SOL_SOCKET, SO_ATTACH_FILTER, &fprog, sizeof(fprog));
+ if (r < 0)
+ return -errno;
+
+ link->ll = (struct sockaddr_ll) {
+ .sll_family = AF_PACKET,
+ .sll_protocol = htobe16(ETH_P_IP),
+ .sll_ifindex = ifindex,
+ .sll_hatype = htobe16(arp_type),
+ .sll_halen = bcast_addr_len,
+ };
+ memcpy(link->ll.sll_addr, bcast_addr, bcast_addr_len); /* We may overflow link->ll. link->ll_buffer ensures we have enough space. */
+
+ r = bind(s, &link->sa, SOCKADDR_LL_LEN(link->ll));
+ if (r < 0)
+ return -errno;
+
+ return TAKE_FD(s);
+}
+
+int dhcp_network_bind_raw_socket(int ifindex, union sockaddr_union *link, uint32_t xid,
+ const uint8_t *mac_addr, size_t mac_addr_len,
+ const uint8_t *bcast_addr, size_t bcast_addr_len,
+ uint16_t arp_type, uint16_t port) {
+ static const uint8_t eth_bcast[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+ /* Default broadcast address for IPoIB */
+ static const uint8_t ib_bcast[] = {
+ 0x00, 0xff, 0xff, 0xff, 0xff, 0x12, 0x40, 0x1b,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff
+ };
+ struct ether_addr eth_mac = { { 0, 0, 0, 0, 0, 0 } };
+ const uint8_t *default_bcast_addr;
+ size_t expected_bcast_addr_len;
+ uint8_t dhcp_hlen = 0;
+
+ if (arp_type == ARPHRD_ETHER) {
+ assert_return(mac_addr_len == ETH_ALEN, -EINVAL);
+ memcpy(&eth_mac, mac_addr, ETH_ALEN);
+ dhcp_hlen = ETH_ALEN;
+
+ default_bcast_addr = eth_bcast;
+ expected_bcast_addr_len = ETH_ALEN;
+ } else if (arp_type == ARPHRD_INFINIBAND) {
+ default_bcast_addr = ib_bcast;
+ expected_bcast_addr_len = INFINIBAND_ALEN;
+ } else
+ return -EINVAL;
+
+ if (bcast_addr && bcast_addr_len > 0)
+ assert_return(bcast_addr_len == expected_bcast_addr_len, -EINVAL);
+ else {
+ bcast_addr = default_bcast_addr;
+ bcast_addr_len = expected_bcast_addr_len;
+ }
+
+ return _bind_raw_socket(ifindex, link, xid, bcast_addr, bcast_addr_len,
+ &eth_mac, arp_type, dhcp_hlen, port);
+}
+
+int dhcp_network_bind_udp_socket(int ifindex, be32_t address, uint16_t port, int ip_service_type) {
+ union sockaddr_union src = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = htobe16(port),
+ .in.sin_addr.s_addr = address,
+ };
+ _cleanup_close_ int s = -1;
+ int r;
+
+ s = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
+ if (s < 0)
+ return -errno;
+
+ if (ip_service_type >= 0)
+ r = setsockopt_int(s, IPPROTO_IP, IP_TOS, ip_service_type);
+ else
+ r = setsockopt_int(s, IPPROTO_IP, IP_TOS, IPTOS_CLASS_CS6);
+
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return r;
+
+ if (ifindex > 0) {
+ r = socket_bind_to_ifindex(s, ifindex);
+ if (r < 0)
+ return r;
+ }
+
+ if (address == INADDR_ANY) {
+ r = setsockopt_int(s, IPPROTO_IP, IP_PKTINFO, true);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(s, SOL_SOCKET, SO_BROADCAST, true);
+ if (r < 0)
+ return r;
+
+ } else {
+ r = setsockopt_int(s, IPPROTO_IP, IP_FREEBIND, true);
+ if (r < 0)
+ return r;
+ }
+
+ r = bind(s, &src.sa, sizeof(src.in));
+ if (r < 0)
+ return -errno;
+
+ return TAKE_FD(s);
+}
+
+int dhcp_network_send_raw_socket(int s, const union sockaddr_union *link,
+ const void *packet, size_t len) {
+ int r;
+
+ assert(link);
+ assert(packet);
+ assert(len);
+
+ r = sendto(s, packet, len, 0, &link->sa, SOCKADDR_LL_LEN(link->ll));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int dhcp_network_send_udp_socket(int s, be32_t address, uint16_t port,
+ const void *packet, size_t len) {
+ union sockaddr_union dest = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = htobe16(port),
+ .in.sin_addr.s_addr = address,
+ };
+ int r;
+
+ assert(s >= 0);
+ assert(packet);
+ assert(len);
+
+ r = sendto(s, packet, len, 0, &dest.sa, sizeof(dest.in));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/libsystemd-network/dhcp-option.c b/src/libsystemd-network/dhcp-option.c
new file mode 100644
index 0000000..7e3fe43
--- /dev/null
+++ b/src/libsystemd-network/dhcp-option.c
@@ -0,0 +1,358 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "dhcp-internal.h"
+#include "dhcp-server-internal.h"
+#include "memory-util.h"
+#include "strv.h"
+#include "utf8.h"
+
+static int option_append(uint8_t options[], size_t size, size_t *offset,
+ uint8_t code, size_t optlen, const void *optval) {
+ assert(options);
+ assert(offset);
+
+ if (code != SD_DHCP_OPTION_END)
+ /* always make sure there is space for an END option */
+ size--;
+
+ switch (code) {
+
+ case SD_DHCP_OPTION_PAD:
+ case SD_DHCP_OPTION_END:
+ if (*offset + 1 > size)
+ return -ENOBUFS;
+
+ options[*offset] = code;
+ *offset += 1;
+ break;
+
+ case SD_DHCP_OPTION_USER_CLASS: {
+ size_t total = 0;
+ char **s;
+
+ if (strv_isempty((char **) optval))
+ return -EINVAL;
+
+ STRV_FOREACH(s, (char **) optval) {
+ size_t len = strlen(*s);
+
+ if (len > 255 || len == 0)
+ return -EINVAL;
+
+ total += 1 + len;
+ }
+
+ if (*offset + 2 + total > size)
+ return -ENOBUFS;
+
+ options[*offset] = code;
+ options[*offset + 1] = total;
+ *offset += 2;
+
+ STRV_FOREACH(s, (char **) optval) {
+ size_t len = strlen(*s);
+
+ options[*offset] = len;
+ memcpy(&options[*offset + 1], *s, len);
+ *offset += 1 + len;
+ }
+
+ break;
+ }
+ case SD_DHCP_OPTION_SIP_SERVER:
+ if (*offset + 3 + optlen > size)
+ return -ENOBUFS;
+
+ options[*offset] = code;
+ options[*offset + 1] = optlen + 1;
+ options[*offset + 2] = 1;
+
+ memcpy_safe(&options[*offset + 3], optval, optlen);
+ *offset += 3 + optlen;
+
+ break;
+ case SD_DHCP_OPTION_VENDOR_SPECIFIC: {
+ OrderedHashmap *s = (OrderedHashmap *) optval;
+ struct sd_dhcp_option *p;
+ size_t l = 0;
+
+ ORDERED_HASHMAP_FOREACH(p, s)
+ l += p->length + 2;
+
+ if (*offset + l + 2 > size)
+ return -ENOBUFS;
+
+ options[*offset] = code;
+ options[*offset + 1] = l;
+
+ *offset += 2;
+
+ ORDERED_HASHMAP_FOREACH(p, s) {
+ options[*offset] = p->option;
+ options[*offset + 1] = p->length;
+ memcpy(&options[*offset + 2], p->data, p->length);
+ *offset += 2 + p->length;
+ }
+
+ break;
+ }
+ default:
+ if (*offset + 2 + optlen > size)
+ return -ENOBUFS;
+
+ options[*offset] = code;
+ options[*offset + 1] = optlen;
+
+ memcpy_safe(&options[*offset + 2], optval, optlen);
+ *offset += 2 + optlen;
+
+ break;
+ }
+
+ return 0;
+}
+
+int dhcp_option_append(DHCPMessage *message, size_t size, size_t *offset,
+ uint8_t overload,
+ uint8_t code, size_t optlen, const void *optval) {
+ const bool use_file = overload & DHCP_OVERLOAD_FILE;
+ const bool use_sname = overload & DHCP_OVERLOAD_SNAME;
+ int r;
+
+ assert(message);
+ assert(offset);
+
+ /* If *offset is in range [0, size), we are writing to ->options,
+ * if *offset is in range [size, size + sizeof(message->file)) and use_file, we are writing to ->file,
+ * if *offset is in range [size + use_file*sizeof(message->file), size + use_file*sizeof(message->file) + sizeof(message->sname))
+ * and use_sname, we are writing to ->sname.
+ */
+
+ if (*offset < size) {
+ /* still space in the options array */
+ r = option_append(message->options, size, offset, code, optlen, optval);
+ if (r >= 0)
+ return 0;
+ else if (r == -ENOBUFS && (use_file || use_sname)) {
+ /* did not fit, but we have more buffers to try
+ close the options array and move the offset to its end */
+ r = option_append(message->options, size, offset, SD_DHCP_OPTION_END, 0, NULL);
+ if (r < 0)
+ return r;
+
+ *offset = size;
+ } else
+ return r;
+ }
+
+ if (use_file) {
+ size_t file_offset = *offset - size;
+
+ if (file_offset < sizeof(message->file)) {
+ /* still space in the 'file' array */
+ r = option_append(message->file, sizeof(message->file), &file_offset, code, optlen, optval);
+ if (r >= 0) {
+ *offset = size + file_offset;
+ return 0;
+ } else if (r == -ENOBUFS && use_sname) {
+ /* did not fit, but we have more buffers to try
+ close the file array and move the offset to its end */
+ r = option_append(message->options, size, offset, SD_DHCP_OPTION_END, 0, NULL);
+ if (r < 0)
+ return r;
+
+ *offset = size + sizeof(message->file);
+ } else
+ return r;
+ }
+ }
+
+ if (use_sname) {
+ size_t sname_offset = *offset - size - use_file*sizeof(message->file);
+
+ if (sname_offset < sizeof(message->sname)) {
+ /* still space in the 'sname' array */
+ r = option_append(message->sname, sizeof(message->sname), &sname_offset, code, optlen, optval);
+ if (r >= 0) {
+ *offset = size + use_file*sizeof(message->file) + sname_offset;
+ return 0;
+ } else
+ /* no space, or other error, give up */
+ return r;
+ }
+ }
+
+ return -ENOBUFS;
+}
+
+static int parse_options(const uint8_t options[], size_t buflen, uint8_t *overload,
+ uint8_t *message_type, char **error_message, dhcp_option_callback_t cb,
+ void *userdata) {
+ uint8_t code, len;
+ const uint8_t *option;
+ size_t offset = 0;
+
+ while (offset < buflen) {
+ code = options[offset ++];
+
+ switch (code) {
+ case SD_DHCP_OPTION_PAD:
+ continue;
+
+ case SD_DHCP_OPTION_END:
+ return 0;
+ }
+
+ if (buflen < offset + 1)
+ return -ENOBUFS;
+
+ len = options[offset ++];
+
+ if (buflen < offset + len)
+ return -EINVAL;
+
+ option = &options[offset];
+
+ switch (code) {
+ case SD_DHCP_OPTION_MESSAGE_TYPE:
+ if (len != 1)
+ return -EINVAL;
+
+ if (message_type)
+ *message_type = *option;
+
+ break;
+
+ case SD_DHCP_OPTION_ERROR_MESSAGE:
+ if (len == 0)
+ return -EINVAL;
+
+ if (error_message) {
+ _cleanup_free_ char *string = NULL;
+
+ /* Accept a trailing NUL byte */
+ if (memchr(option, 0, len - 1))
+ return -EINVAL;
+
+ string = memdup_suffix0((const char *) option, len);
+ if (!string)
+ return -ENOMEM;
+
+ if (!ascii_is_valid(string))
+ return -EINVAL;
+
+ free_and_replace(*error_message, string);
+ }
+
+ break;
+ case SD_DHCP_OPTION_OVERLOAD:
+ if (len != 1)
+ return -EINVAL;
+
+ if (overload)
+ *overload = *option;
+
+ break;
+
+ default:
+ if (cb)
+ cb(code, len, option, userdata);
+
+ break;
+ }
+
+ offset += len;
+ }
+
+ if (offset < buflen)
+ return -EINVAL;
+
+ return 0;
+}
+
+int dhcp_option_parse(DHCPMessage *message, size_t len, dhcp_option_callback_t cb, void *userdata, char **_error_message) {
+ _cleanup_free_ char *error_message = NULL;
+ uint8_t overload = 0;
+ uint8_t message_type = 0;
+ int r;
+
+ if (!message)
+ return -EINVAL;
+
+ if (len < sizeof(DHCPMessage))
+ return -EINVAL;
+
+ len -= sizeof(DHCPMessage);
+
+ r = parse_options(message->options, len, &overload, &message_type, &error_message, cb, userdata);
+ if (r < 0)
+ return r;
+
+ if (overload & DHCP_OVERLOAD_FILE) {
+ r = parse_options(message->file, sizeof(message->file), NULL, &message_type, &error_message, cb, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ if (overload & DHCP_OVERLOAD_SNAME) {
+ r = parse_options(message->sname, sizeof(message->sname), NULL, &message_type, &error_message, cb, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ if (message_type == 0)
+ return -ENOMSG;
+
+ if (_error_message && IN_SET(message_type, DHCP_NAK, DHCP_DECLINE))
+ *_error_message = TAKE_PTR(error_message);
+
+ return message_type;
+}
+
+static sd_dhcp_option* dhcp_option_free(sd_dhcp_option *i) {
+ if (!i)
+ return NULL;
+
+ free(i->data);
+ return mfree(i);
+}
+
+int sd_dhcp_option_new(uint8_t option, const void *data, size_t length, sd_dhcp_option **ret) {
+ assert_return(ret, -EINVAL);
+ assert_return(length == 0 || data, -EINVAL);
+
+ _cleanup_free_ void *q = memdup(data, length);
+ if (!q)
+ return -ENOMEM;
+
+ sd_dhcp_option *p = new(sd_dhcp_option, 1);
+ if (!p)
+ return -ENOMEM;
+
+ *p = (sd_dhcp_option) {
+ .n_ref = 1,
+ .option = option,
+ .length = length,
+ .data = TAKE_PTR(q),
+ };
+
+ *ret = TAKE_PTR(p);
+ return 0;
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_dhcp_option, sd_dhcp_option, dhcp_option_free);
+DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
+ dhcp_option_hash_ops,
+ void,
+ trivial_hash_func,
+ trivial_compare_func,
+ sd_dhcp_option,
+ sd_dhcp_option_unref);
diff --git a/src/libsystemd-network/dhcp-packet.c b/src/libsystemd-network/dhcp-packet.c
new file mode 100644
index 0000000..cace916
--- /dev/null
+++ b/src/libsystemd-network/dhcp-packet.c
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+#include <net/ethernet.h>
+#include <net/if_arp.h>
+#include <string.h>
+
+#include "dhcp-internal.h"
+#include "dhcp-protocol.h"
+
+#define DHCP_CLIENT_MIN_OPTIONS_SIZE 312
+
+int dhcp_message_init(DHCPMessage *message, uint8_t op, uint32_t xid,
+ uint8_t type, uint16_t arp_type, size_t optlen,
+ size_t *optoffset) {
+ size_t offset = 0;
+ int r;
+
+ assert(IN_SET(op, BOOTREQUEST, BOOTREPLY));
+ assert(IN_SET(arp_type, ARPHRD_ETHER, ARPHRD_INFINIBAND));
+
+ message->op = op;
+ message->htype = arp_type;
+ message->hlen = (arp_type == ARPHRD_ETHER) ? ETHER_ADDR_LEN : 0;
+ message->xid = htobe32(xid);
+ message->magic = htobe32(DHCP_MAGIC_COOKIE);
+
+ r = dhcp_option_append(message, optlen, &offset, 0,
+ SD_DHCP_OPTION_MESSAGE_TYPE, 1, &type);
+ if (r < 0)
+ return r;
+
+ *optoffset = offset;
+
+ return 0;
+}
+
+uint16_t dhcp_packet_checksum(uint8_t *buf, size_t len) {
+ uint64_t *buf_64 = (uint64_t*)buf;
+ uint64_t *end_64 = buf_64 + (len / sizeof(uint64_t));
+ uint64_t sum = 0;
+
+ /* See RFC1071 */
+
+ while (buf_64 < end_64) {
+ sum += *buf_64;
+ if (sum < *buf_64)
+ /* wrap around in one's complement */
+ sum++;
+
+ buf_64++;
+ }
+
+ if (len % sizeof(uint64_t)) {
+ /* If the buffer is not aligned to 64-bit, we need
+ to zero-pad the last few bytes and add them in */
+ uint64_t buf_tail = 0;
+
+ memcpy(&buf_tail, buf_64, len % sizeof(uint64_t));
+
+ sum += buf_tail;
+ if (sum < buf_tail)
+ /* wrap around */
+ sum++;
+ }
+
+ while (sum >> 16)
+ sum = (sum & 0xffff) + (sum >> 16);
+
+ return ~sum;
+}
+
+void dhcp_packet_append_ip_headers(DHCPPacket *packet, be32_t source_addr,
+ uint16_t source_port, be32_t destination_addr,
+ uint16_t destination_port, uint16_t len, int ip_service_type) {
+ packet->ip.version = IPVERSION;
+ packet->ip.ihl = DHCP_IP_SIZE / 4;
+ packet->ip.tot_len = htobe16(len);
+
+ if (ip_service_type >= 0)
+ packet->ip.tos = ip_service_type;
+ else
+ packet->ip.tos = IPTOS_CLASS_CS6;
+
+ packet->ip.protocol = IPPROTO_UDP;
+ packet->ip.saddr = source_addr;
+ packet->ip.daddr = destination_addr;
+
+ packet->udp.source = htobe16(source_port);
+ packet->udp.dest = htobe16(destination_port);
+
+ packet->udp.len = htobe16(len - DHCP_IP_SIZE);
+
+ packet->ip.check = packet->udp.len;
+ packet->udp.check = dhcp_packet_checksum((uint8_t*)&packet->ip.ttl, len - 8);
+
+ packet->ip.ttl = IPDEFTTL;
+ packet->ip.check = 0;
+ packet->ip.check = dhcp_packet_checksum((uint8_t*)&packet->ip, DHCP_IP_SIZE);
+}
+
+int dhcp_packet_verify_headers(DHCPPacket *packet, size_t len, bool checksum, uint16_t port) {
+ size_t hdrlen;
+
+ assert(packet);
+
+ /* IP */
+
+ if (packet->ip.version != IPVERSION)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "ignoring packet: not IPv4");
+
+ if (packet->ip.ihl < 5)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "ignoring packet: IPv4 IHL (%u words) invalid",
+ packet->ip.ihl);
+
+ hdrlen = packet->ip.ihl * 4;
+ if (hdrlen < 20)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "ignoring packet: IPv4 IHL (%zu bytes) "
+ "smaller than minimum (20 bytes)",
+ hdrlen);
+
+ if (len < hdrlen)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "ignoring packet: packet (%zu bytes) "
+ "smaller than expected (%zu) by IP header",
+ len, hdrlen);
+
+ /* UDP */
+
+ if (packet->ip.protocol != IPPROTO_UDP)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "ignoring packet: not UDP");
+
+ if (len < hdrlen + be16toh(packet->udp.len))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "ignoring packet: packet (%zu bytes) "
+ "smaller than expected (%zu) by UDP header",
+ len, hdrlen + be16toh(packet->udp.len));
+
+ if (be16toh(packet->udp.dest) != port)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "ignoring packet: to port %u, which "
+ "is not the DHCP client port (%u)",
+ be16toh(packet->udp.dest), port);
+
+ /* checksums - computing these is relatively expensive, so only do it
+ if all the other checks have passed
+ */
+
+ if (dhcp_packet_checksum((uint8_t*)&packet->ip, hdrlen))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "ignoring packet: invalid IP checksum");
+
+ if (checksum && packet->udp.check) {
+ packet->ip.check = packet->udp.len;
+ packet->ip.ttl = 0;
+
+ if (dhcp_packet_checksum((uint8_t*)&packet->ip.ttl,
+ be16toh(packet->udp.len) + 12))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "ignoring packet: invalid UDP checksum");
+ }
+
+ return 0;
+}
diff --git a/src/libsystemd-network/dhcp-protocol.h b/src/libsystemd-network/dhcp-protocol.h
new file mode 100644
index 0000000..11f4201
--- /dev/null
+++ b/src/libsystemd-network/dhcp-protocol.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <stdint.h>
+
+#include "macro.h"
+#include "sparse-endian.h"
+
+struct DHCPMessage {
+ uint8_t op;
+ uint8_t htype;
+ uint8_t hlen;
+ uint8_t hops;
+ be32_t xid;
+ be16_t secs;
+ be16_t flags;
+ be32_t ciaddr;
+ be32_t yiaddr;
+ be32_t siaddr;
+ be32_t giaddr;
+ uint8_t chaddr[16];
+ uint8_t sname[64];
+ uint8_t file[128];
+ be32_t magic;
+ uint8_t options[0];
+} _packed_;
+
+typedef struct DHCPMessage DHCPMessage;
+
+struct DHCPPacket {
+ struct iphdr ip;
+ struct udphdr udp;
+ DHCPMessage dhcp;
+} _packed_;
+
+typedef struct DHCPPacket DHCPPacket;
+
+#define DHCP_IP_SIZE (int32_t)(sizeof(struct iphdr))
+#define DHCP_IP_UDP_SIZE (int32_t)(sizeof(struct udphdr) + DHCP_IP_SIZE)
+#define DHCP_MESSAGE_SIZE (int32_t)(sizeof(DHCPMessage))
+#define DHCP_DEFAULT_MIN_SIZE 576 /* the minimum internet hosts must be able to receive */
+#define DHCP_MIN_OPTIONS_SIZE (DHCP_DEFAULT_MIN_SIZE - DHCP_IP_UDP_SIZE - DHCP_MESSAGE_SIZE)
+#define DHCP_MAGIC_COOKIE (uint32_t)(0x63825363)
+
+enum {
+ DHCP_PORT_SERVER = 67,
+ DHCP_PORT_CLIENT = 68,
+};
+
+enum DHCPState {
+ DHCP_STATE_INIT = 0,
+ DHCP_STATE_SELECTING = 1,
+ DHCP_STATE_INIT_REBOOT = 2,
+ DHCP_STATE_REBOOTING = 3,
+ DHCP_STATE_REQUESTING = 4,
+ DHCP_STATE_BOUND = 5,
+ DHCP_STATE_RENEWING = 6,
+ DHCP_STATE_REBINDING = 7,
+ DHCP_STATE_STOPPED = 8,
+};
+
+typedef enum DHCPState DHCPState;
+
+enum {
+ BOOTREQUEST = 1,
+ BOOTREPLY = 2,
+};
+
+enum {
+ DHCP_DISCOVER = 1,
+ DHCP_OFFER = 2,
+ DHCP_REQUEST = 3,
+ DHCP_DECLINE = 4,
+ DHCP_ACK = 5,
+ DHCP_NAK = 6,
+ DHCP_RELEASE = 7,
+ DHCP_INFORM = 8,
+ DHCP_FORCERENEW = 9,
+};
+
+enum {
+ DHCP_OVERLOAD_FILE = 1,
+ DHCP_OVERLOAD_SNAME = 2,
+};
+
+#define DHCP_MAX_FQDN_LENGTH 255
+
+enum {
+ DHCP_FQDN_FLAG_S = (1 << 0),
+ DHCP_FQDN_FLAG_O = (1 << 1),
+ DHCP_FQDN_FLAG_E = (1 << 2),
+ DHCP_FQDN_FLAG_N = (1 << 3),
+};
diff --git a/src/libsystemd-network/dhcp-server-internal.h b/src/libsystemd-network/dhcp-server-internal.h
new file mode 100644
index 0000000..b57737e
--- /dev/null
+++ b/src/libsystemd-network/dhcp-server-internal.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include "sd-dhcp-server.h"
+#include "sd-event.h"
+
+#include "dhcp-internal.h"
+#include "hashmap.h"
+#include "log.h"
+#include "time-util.h"
+
+typedef enum DHCPRawOption {
+ DHCP_RAW_OPTION_DATA_UINT8,
+ DHCP_RAW_OPTION_DATA_UINT16,
+ DHCP_RAW_OPTION_DATA_UINT32,
+ DHCP_RAW_OPTION_DATA_STRING,
+ DHCP_RAW_OPTION_DATA_IPV4ADDRESS,
+ DHCP_RAW_OPTION_DATA_IPV6ADDRESS,
+ _DHCP_RAW_OPTION_DATA_MAX,
+ _DHCP_RAW_OPTION_DATA_INVALID,
+} DHCPRawOption;
+
+typedef struct DHCPClientId {
+ size_t length;
+ void *data;
+} DHCPClientId;
+
+typedef struct DHCPLease {
+ DHCPClientId client_id;
+
+ be32_t address;
+ be32_t gateway;
+ uint8_t chaddr[16];
+ usec_t expiration;
+} DHCPLease;
+
+struct sd_dhcp_server {
+ unsigned n_ref;
+
+ sd_event *event;
+ int event_priority;
+ sd_event_source *receive_message;
+ int fd;
+ int fd_raw;
+
+ int ifindex;
+ be32_t address;
+ be32_t netmask;
+ be32_t subnet;
+ uint32_t pool_offset;
+ uint32_t pool_size;
+
+ char *timezone;
+
+ DHCPServerData servers[_SD_DHCP_LEASE_SERVER_TYPE_MAX];
+
+ OrderedHashmap *extra_options;
+ OrderedHashmap *vendor_options;
+
+ bool emit_router;
+
+ Hashmap *leases_by_client_id;
+ DHCPLease **bound_leases;
+ DHCPLease invalid_lease;
+
+ uint32_t max_lease_time, default_lease_time;
+
+ sd_dhcp_server_callback_t callback;
+ void *callback_userdata;
+};
+
+typedef struct DHCPRequest {
+ /* received message */
+ DHCPMessage *message;
+
+ /* options */
+ DHCPClientId client_id;
+ size_t max_optlen;
+ be32_t server_id;
+ be32_t requested_ip;
+ uint32_t lifetime;
+} DHCPRequest;
+
+#define log_dhcp_server(client, fmt, ...) log_internal(LOG_DEBUG, 0, PROJECT_FILE, __LINE__, __func__, "DHCP SERVER: " fmt, ##__VA_ARGS__)
+#define log_dhcp_server_errno(client, error, fmt, ...) log_internal(LOG_DEBUG, error, PROJECT_FILE, __LINE__, __func__, "DHCP SERVER: " fmt, ##__VA_ARGS__)
+
+int dhcp_server_handle_message(sd_dhcp_server *server, DHCPMessage *message,
+ size_t length);
+int dhcp_server_send_packet(sd_dhcp_server *server,
+ DHCPRequest *req, DHCPPacket *packet,
+ int type, size_t optoffset);
+
+void client_id_hash_func(const DHCPClientId *p, struct siphash *state);
+int client_id_compare_func(const DHCPClientId *a, const DHCPClientId *b);
diff --git a/src/libsystemd-network/dhcp6-internal.h b/src/libsystemd-network/dhcp6-internal.h
new file mode 100644
index 0000000..9a32b00
--- /dev/null
+++ b/src/libsystemd-network/dhcp6-internal.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2014-2015 Intel Corporation. All rights reserved.
+***/
+
+#include <net/ethernet.h>
+#include <netinet/in.h>
+
+#include "sd-event.h"
+
+#include "list.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "sparse-endian.h"
+
+typedef struct sd_dhcp6_option {
+ unsigned n_ref;
+
+ uint32_t enterprise_identifier;
+ uint16_t option;
+ void *data;
+ size_t length;
+} sd_dhcp6_option;
+
+extern const struct hash_ops dhcp6_option_hash_ops;
+
+/* Common option header */
+typedef struct DHCP6Option {
+ be16_t code;
+ be16_t len;
+ uint8_t data[];
+} _packed_ DHCP6Option;
+
+/* Address option */
+struct iaaddr {
+ struct in6_addr address;
+ be32_t lifetime_preferred;
+ be32_t lifetime_valid;
+} _packed_;
+
+/* Prefix Delegation Prefix option */
+struct iapdprefix {
+ be32_t lifetime_preferred;
+ be32_t lifetime_valid;
+ uint8_t prefixlen;
+ struct in6_addr address;
+} _packed_;
+
+typedef struct DHCP6Address DHCP6Address;
+
+struct DHCP6Address {
+ LIST_FIELDS(DHCP6Address, addresses);
+
+ union {
+ struct iaaddr iaaddr;
+ struct iapdprefix iapdprefix;
+ };
+};
+
+/* Non-temporary Address option */
+struct ia_na {
+ be32_t id;
+ be32_t lifetime_t1;
+ be32_t lifetime_t2;
+} _packed_;
+
+/* Prefix Delegation option */
+struct ia_pd {
+ be32_t id;
+ be32_t lifetime_t1;
+ be32_t lifetime_t2;
+} _packed_;
+
+/* Temporary Address option */
+struct ia_ta {
+ be32_t id;
+} _packed_;
+
+struct DHCP6IA {
+ uint16_t type;
+ union {
+ struct ia_na ia_na;
+ struct ia_pd ia_pd;
+ struct ia_ta ia_ta;
+ };
+
+ LIST_HEAD(DHCP6Address, addresses);
+};
+
+typedef struct DHCP6IA DHCP6IA;
+
+#define log_dhcp6_client_errno(p, error, fmt, ...) log_internal(LOG_DEBUG, error, PROJECT_FILE, __LINE__, __func__, "DHCPv6 CLIENT: " fmt, ##__VA_ARGS__)
+#define log_dhcp6_client(p, fmt, ...) log_dhcp6_client_errno(p, 0, fmt, ##__VA_ARGS__)
+
+int dhcp6_option_append(uint8_t **buf, size_t *buflen, uint16_t code,
+ size_t optlen, const void *optval);
+int dhcp6_option_append_ia(uint8_t **buf, size_t *buflen, const DHCP6IA *ia);
+int dhcp6_option_append_pd(uint8_t *buf, size_t len, const DHCP6IA *pd, DHCP6Address *hint_pd_prefix);
+int dhcp6_option_append_fqdn(uint8_t **buf, size_t *buflen, const char *fqdn);
+int dhcp6_option_append_user_class(uint8_t **buf, size_t *buflen, char * const *user_class);
+int dhcp6_option_append_vendor_class(uint8_t **buf, size_t *buflen, char * const *user_class);
+int dhcp6_option_append_vendor_option(uint8_t **buf, size_t *buflen, OrderedHashmap *vendor_options);
+int dhcp6_option_parse(uint8_t **buf, size_t *buflen, uint16_t *optcode,
+ size_t *optlen, uint8_t **optvalue);
+int dhcp6_option_parse_status(DHCP6Option *option, size_t len);
+int dhcp6_option_parse_ia(DHCP6Option *iaoption, DHCP6IA *ia, uint16_t *ret_status_code);
+int dhcp6_option_parse_ip6addrs(uint8_t *optval, uint16_t optlen,
+ struct in6_addr **addrs, size_t count,
+ size_t *allocated);
+int dhcp6_option_parse_domainname_list(const uint8_t *optval, uint16_t optlen,
+ char ***str_arr);
+int dhcp6_option_parse_domainname(const uint8_t *optval, uint16_t optlen, char **str);
+
+int dhcp6_network_bind_udp_socket(int ifindex, struct in6_addr *address);
+int dhcp6_network_send_udp_socket(int s, struct in6_addr *address,
+ const void *packet, size_t len);
+
+const char *dhcp6_message_type_to_string(int s) _const_;
+int dhcp6_message_type_from_string(const char *s) _pure_;
+const char *dhcp6_message_status_to_string(int s) _const_;
+int dhcp6_message_status_from_string(const char *s) _pure_;
diff --git a/src/libsystemd-network/dhcp6-lease-internal.h b/src/libsystemd-network/dhcp6-lease-internal.h
new file mode 100644
index 0000000..e9e2362
--- /dev/null
+++ b/src/libsystemd-network/dhcp6-lease-internal.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2014-2015 Intel Corporation. All rights reserved.
+***/
+
+#include <stdint.h>
+
+#include "sd-dhcp6-lease.h"
+
+#include "dhcp6-internal.h"
+
+struct sd_dhcp6_lease {
+ unsigned n_ref;
+
+ uint8_t *serverid;
+ size_t serverid_len;
+ uint8_t preference;
+ bool rapid_commit;
+
+ DHCP6IA ia;
+ DHCP6IA pd;
+
+ DHCP6Address *addr_iter;
+ DHCP6Address *prefix_iter;
+
+ struct in6_addr *dns;
+ size_t dns_count;
+ size_t dns_allocated;
+ char **domains;
+ size_t domains_count;
+ struct in6_addr *ntp;
+ size_t ntp_count;
+ size_t ntp_allocated;
+ char **ntp_fqdn;
+ size_t ntp_fqdn_count;
+ char *fqdn;
+};
+
+int dhcp6_lease_ia_rebind_expire(const DHCP6IA *ia, uint32_t *expire);
+DHCP6IA *dhcp6_lease_free_ia(DHCP6IA *ia);
+
+int dhcp6_lease_set_serverid(sd_dhcp6_lease *lease, const uint8_t *id,
+ size_t len);
+int dhcp6_lease_get_serverid(sd_dhcp6_lease *lease, uint8_t **id, size_t *len);
+int dhcp6_lease_set_preference(sd_dhcp6_lease *lease, uint8_t preference);
+int dhcp6_lease_get_preference(sd_dhcp6_lease *lease, uint8_t *preference);
+int dhcp6_lease_set_rapid_commit(sd_dhcp6_lease *lease);
+int dhcp6_lease_get_rapid_commit(sd_dhcp6_lease *lease, bool *rapid_commit);
+
+int dhcp6_lease_get_iaid(sd_dhcp6_lease *lease, be32_t *iaid);
+int dhcp6_lease_get_pd_iaid(sd_dhcp6_lease *lease, be32_t *iaid);
+
+int dhcp6_lease_set_dns(sd_dhcp6_lease *lease, uint8_t *optval, size_t optlen);
+int dhcp6_lease_set_domains(sd_dhcp6_lease *lease, uint8_t *optval,
+ size_t optlen);
+int dhcp6_lease_set_ntp(sd_dhcp6_lease *lease, uint8_t *optval, size_t optlen);
+int dhcp6_lease_set_sntp(sd_dhcp6_lease *lease, uint8_t *optval,
+ size_t optlen) ;
+int dhcp6_lease_set_fqdn(sd_dhcp6_lease *lease, const uint8_t *optval, size_t optlen);
+
+int dhcp6_lease_new(sd_dhcp6_lease **ret);
diff --git a/src/libsystemd-network/dhcp6-network.c b/src/libsystemd-network/dhcp6-network.c
new file mode 100644
index 0000000..4f7bd53
--- /dev/null
+++ b/src/libsystemd-network/dhcp6-network.c
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+#include <netinet/in.h>
+#include <netinet/ip6.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <linux/if_packet.h>
+
+#include "dhcp6-internal.h"
+#include "dhcp6-protocol.h"
+#include "fd-util.h"
+#include "socket-util.h"
+
+int dhcp6_network_bind_udp_socket(int ifindex, struct in6_addr *local_address) {
+ union sockaddr_union src = {
+ .in6.sin6_family = AF_INET6,
+ .in6.sin6_port = htobe16(DHCP6_PORT_CLIENT),
+ .in6.sin6_scope_id = ifindex,
+ };
+ _cleanup_close_ int s = -1;
+ int r;
+
+ assert(ifindex > 0);
+ assert(local_address);
+
+ src.in6.sin6_addr = *local_address;
+
+ s = socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_UDP);
+ if (s < 0)
+ return -errno;
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_V6ONLY, true);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, false);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return r;
+
+ r = bind(s, &src.sa, sizeof(src.in6));
+ if (r < 0)
+ return -errno;
+
+ return TAKE_FD(s);
+}
+
+int dhcp6_network_send_udp_socket(int s, struct in6_addr *server_address,
+ const void *packet, size_t len) {
+ union sockaddr_union dest = {
+ .in6.sin6_family = AF_INET6,
+ .in6.sin6_port = htobe16(DHCP6_PORT_SERVER),
+ };
+ int r;
+
+ assert(server_address);
+
+ memcpy(&dest.in6.sin6_addr, server_address, sizeof(dest.in6.sin6_addr));
+
+ r = sendto(s, packet, len, 0, &dest.sa, sizeof(dest.in6));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/libsystemd-network/dhcp6-option.c b/src/libsystemd-network/dhcp6-option.c
new file mode 100644
index 0000000..9f47c1b
--- /dev/null
+++ b/src/libsystemd-network/dhcp6-option.c
@@ -0,0 +1,799 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2014-2015 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+#include <netinet/in.h>
+
+#include "sd-dhcp6-client.h"
+
+#include "alloc-util.h"
+#include "dhcp-identifier.h"
+#include "dhcp6-internal.h"
+#include "dhcp6-lease-internal.h"
+#include "dhcp6-protocol.h"
+#include "dns-domain.h"
+#include "memory-util.h"
+#include "sparse-endian.h"
+#include "strv.h"
+#include "unaligned.h"
+
+typedef struct DHCP6StatusOption {
+ struct DHCP6Option option;
+ be16_t status;
+ char msg[];
+} _packed_ DHCP6StatusOption;
+
+typedef struct DHCP6AddressOption {
+ struct DHCP6Option option;
+ struct iaaddr iaaddr;
+ uint8_t options[];
+} _packed_ DHCP6AddressOption;
+
+typedef struct DHCP6PDPrefixOption {
+ struct DHCP6Option option;
+ struct iapdprefix iapdprefix;
+ uint8_t options[];
+} _packed_ DHCP6PDPrefixOption;
+
+#define DHCP6_OPTION_IA_NA_LEN (sizeof(struct ia_na))
+#define DHCP6_OPTION_IA_PD_LEN (sizeof(struct ia_pd))
+#define DHCP6_OPTION_IA_TA_LEN (sizeof(struct ia_ta))
+
+static int option_append_hdr(uint8_t **buf, size_t *buflen, uint16_t optcode,
+ size_t optlen) {
+ DHCP6Option *option = (DHCP6Option*) *buf;
+
+ assert_return(buf, -EINVAL);
+ assert_return(*buf, -EINVAL);
+ assert_return(buflen, -EINVAL);
+
+ if (optlen > 0xffff || *buflen < optlen + offsetof(DHCP6Option, data))
+ return -ENOBUFS;
+
+ option->code = htobe16(optcode);
+ option->len = htobe16(optlen);
+
+ *buf += offsetof(DHCP6Option, data);
+ *buflen -= offsetof(DHCP6Option, data);
+
+ return 0;
+}
+
+int dhcp6_option_append(uint8_t **buf, size_t *buflen, uint16_t code,
+ size_t optlen, const void *optval) {
+ int r;
+
+ assert_return(optval || optlen == 0, -EINVAL);
+
+ r = option_append_hdr(buf, buflen, code, optlen);
+ if (r < 0)
+ return r;
+
+ memcpy_safe(*buf, optval, optlen);
+
+ *buf += optlen;
+ *buflen -= optlen;
+
+ return 0;
+}
+
+int dhcp6_option_append_vendor_option(uint8_t **buf, size_t *buflen, OrderedHashmap *vendor_options) {
+ sd_dhcp6_option *options;
+ int r;
+
+ assert(buf);
+ assert(*buf);
+ assert(buflen);
+ assert(vendor_options);
+
+ ORDERED_HASHMAP_FOREACH(options, vendor_options) {
+ _cleanup_free_ uint8_t *p = NULL;
+ size_t total;
+
+ total = 4 + 2 + 2 + options->length;
+
+ p = malloc(total);
+ if (!p)
+ return -ENOMEM;
+
+ unaligned_write_be32(p, options->enterprise_identifier);
+ unaligned_write_be16(p + 4, options->option);
+ unaligned_write_be16(p + 6, options->length);
+ memcpy(p + 8, options->data, options->length);
+
+ r = dhcp6_option_append(buf, buflen, SD_DHCP6_OPTION_VENDOR_OPTS, total, p);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int dhcp6_option_append_ia(uint8_t **buf, size_t *buflen, const DHCP6IA *ia) {
+ uint16_t len;
+ uint8_t *ia_hdr;
+ size_t iaid_offset, ia_buflen, ia_addrlen = 0;
+ DHCP6Address *addr;
+ int r;
+
+ assert_return(buf, -EINVAL);
+ assert_return(*buf, -EINVAL);
+ assert_return(buflen, -EINVAL);
+ assert_return(ia, -EINVAL);
+
+ switch (ia->type) {
+ case SD_DHCP6_OPTION_IA_NA:
+ len = DHCP6_OPTION_IA_NA_LEN;
+ iaid_offset = offsetof(DHCP6IA, ia_na);
+ break;
+
+ case SD_DHCP6_OPTION_IA_TA:
+ len = DHCP6_OPTION_IA_TA_LEN;
+ iaid_offset = offsetof(DHCP6IA, ia_ta);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ if (*buflen < offsetof(DHCP6Option, data) + len)
+ return -ENOBUFS;
+
+ ia_hdr = *buf;
+ ia_buflen = *buflen;
+
+ *buf += offsetof(DHCP6Option, data);
+ *buflen -= offsetof(DHCP6Option, data);
+
+ memcpy(*buf, (char*) ia + iaid_offset, len);
+
+ *buf += len;
+ *buflen -= len;
+
+ LIST_FOREACH(addresses, addr, ia->addresses) {
+ r = option_append_hdr(buf, buflen, SD_DHCP6_OPTION_IAADDR,
+ sizeof(addr->iaaddr));
+ if (r < 0)
+ return r;
+
+ memcpy(*buf, &addr->iaaddr, sizeof(addr->iaaddr));
+
+ *buf += sizeof(addr->iaaddr);
+ *buflen -= sizeof(addr->iaaddr);
+
+ ia_addrlen += offsetof(DHCP6Option, data) + sizeof(addr->iaaddr);
+ }
+
+ r = option_append_hdr(&ia_hdr, &ia_buflen, ia->type, len + ia_addrlen);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int dhcp6_option_append_fqdn(uint8_t **buf, size_t *buflen, const char *fqdn) {
+ uint8_t buffer[1 + DNS_WIRE_FORMAT_HOSTNAME_MAX];
+ int r;
+
+ assert_return(buf && *buf && buflen && fqdn, -EINVAL);
+
+ buffer[0] = DHCP6_FQDN_FLAG_S; /* Request server to perform AAAA RR DNS updates */
+
+ /* Store domain name after flags field */
+ r = dns_name_to_wire_format(fqdn, buffer + 1, sizeof(buffer) - 1, false);
+ if (r <= 0)
+ return r;
+
+ /*
+ * According to RFC 4704, chapter 4.2 only add terminating zero-length
+ * label in case a FQDN is provided. Since dns_name_to_wire_format
+ * always adds terminating zero-length label remove if only a hostname
+ * is provided.
+ */
+ if (dns_name_is_single_label(fqdn))
+ r--;
+
+ r = dhcp6_option_append(buf, buflen, SD_DHCP6_OPTION_FQDN, 1 + r, buffer);
+
+ return r;
+}
+
+int dhcp6_option_append_user_class(uint8_t **buf, size_t *buflen, char * const *user_class) {
+ _cleanup_free_ uint8_t *p = NULL;
+ size_t total = 0, offset = 0;
+ char * const *s;
+
+ assert(buf);
+ assert(*buf);
+ assert(buflen);
+ assert(!strv_isempty(user_class));
+
+ STRV_FOREACH(s, user_class) {
+ size_t len = strlen(*s);
+ uint8_t *q;
+
+ if (len > 0xffff || len == 0)
+ return -EINVAL;
+ q = realloc(p, total + len + 2);
+ if (!q)
+ return -ENOMEM;
+
+ p = q;
+
+ unaligned_write_be16(&p[offset], len);
+ memcpy(&p[offset + 2], *s, len);
+
+ offset += 2 + len;
+ total += 2 + len;
+ }
+
+ return dhcp6_option_append(buf, buflen, SD_DHCP6_OPTION_USER_CLASS, total, p);
+}
+
+int dhcp6_option_append_vendor_class(uint8_t **buf, size_t *buflen, char * const *vendor_class) {
+ _cleanup_free_ uint8_t *p = NULL;
+ uint32_t enterprise_identifier;
+ size_t total, offset;
+ char * const *s;
+
+ assert(buf);
+ assert(*buf);
+ assert(buflen);
+ assert(!strv_isempty(vendor_class));
+
+ enterprise_identifier = htobe32(SYSTEMD_PEN);
+
+ p = memdup(&enterprise_identifier, sizeof(enterprise_identifier));
+ if (!p)
+ return -ENOMEM;
+
+ total = sizeof(enterprise_identifier);
+ offset = total;
+
+ STRV_FOREACH(s, vendor_class) {
+ size_t len = strlen(*s);
+ uint8_t *q;
+
+ if (len > UINT16_MAX || len == 0)
+ return -EINVAL;
+
+ q = realloc(p, total + len + 2);
+ if (!q)
+ return -ENOMEM;
+
+ p = q;
+
+ unaligned_write_be16(&p[offset], len);
+ memcpy(&p[offset + 2], *s, len);
+
+ offset += 2 + len;
+ total += 2 + len;
+ }
+
+ return dhcp6_option_append(buf, buflen, SD_DHCP6_OPTION_VENDOR_CLASS, total, p);
+}
+
+int dhcp6_option_append_pd(uint8_t *buf, size_t len, const DHCP6IA *pd, DHCP6Address *hint_pd_prefix) {
+ DHCP6Option *option = (DHCP6Option *)buf;
+ size_t i = sizeof(*option) + sizeof(pd->ia_pd);
+ DHCP6PDPrefixOption *prefix_opt;
+ DHCP6Address *prefix;
+
+ assert_return(buf, -EINVAL);
+ assert_return(pd, -EINVAL);
+ assert_return(pd->type == SD_DHCP6_OPTION_IA_PD, -EINVAL);
+
+ if (len < i)
+ return -ENOBUFS;
+
+ option->code = htobe16(SD_DHCP6_OPTION_IA_PD);
+
+ memcpy(&option->data, &pd->ia_pd, sizeof(pd->ia_pd));
+ LIST_FOREACH(addresses, prefix, pd->addresses) {
+ if (len < i + sizeof(*prefix_opt))
+ return -ENOBUFS;
+
+ prefix_opt = (DHCP6PDPrefixOption *)&buf[i];
+ prefix_opt->option.code = htobe16(SD_DHCP6_OPTION_IA_PD_PREFIX);
+ prefix_opt->option.len = htobe16(sizeof(prefix_opt->iapdprefix));
+
+ memcpy(&prefix_opt->iapdprefix, &prefix->iapdprefix, sizeof(struct iapdprefix));
+ i += sizeof(*prefix_opt);
+ }
+
+ if (hint_pd_prefix && hint_pd_prefix->iapdprefix.prefixlen > 0) {
+ if (len < i + sizeof(*prefix_opt))
+ return -ENOBUFS;
+
+ prefix_opt = (DHCP6PDPrefixOption *)&buf[i];
+ prefix_opt->option.code = htobe16(SD_DHCP6_OPTION_IA_PD_PREFIX);
+ prefix_opt->option.len = htobe16(sizeof(prefix_opt->iapdprefix));
+
+ memcpy(&prefix_opt->iapdprefix, &hint_pd_prefix->iapdprefix, sizeof(struct iapdprefix));
+ i += sizeof(*prefix_opt);
+ }
+
+ option->len = htobe16(i - sizeof(*option));
+
+ return i;
+}
+
+static int option_parse_hdr(uint8_t **buf, size_t *buflen, uint16_t *optcode, size_t *optlen) {
+ DHCP6Option *option = (DHCP6Option*) *buf;
+ uint16_t len;
+
+ assert_return(buf, -EINVAL);
+ assert_return(optcode, -EINVAL);
+ assert_return(optlen, -EINVAL);
+
+ if (*buflen < offsetof(DHCP6Option, data))
+ return -ENOMSG;
+
+ len = be16toh(option->len);
+
+ if (len > *buflen)
+ return -ENOMSG;
+
+ *optcode = be16toh(option->code);
+ *optlen = len;
+
+ *buf += 4;
+ *buflen -= 4;
+
+ return 0;
+}
+
+int dhcp6_option_parse(uint8_t **buf, size_t *buflen, uint16_t *optcode,
+ size_t *optlen, uint8_t **optvalue) {
+ int r;
+
+ assert_return(buf && buflen && optcode && optlen && optvalue, -EINVAL);
+
+ r = option_parse_hdr(buf, buflen, optcode, optlen);
+ if (r < 0)
+ return r;
+
+ if (*optlen > *buflen)
+ return -ENOBUFS;
+
+ *optvalue = *buf;
+ *buflen -= *optlen;
+ *buf += *optlen;
+
+ return 0;
+}
+
+int dhcp6_option_parse_status(DHCP6Option *option, size_t len) {
+ DHCP6StatusOption *statusopt = (DHCP6StatusOption *)option;
+
+ if (len < sizeof(DHCP6StatusOption) ||
+ be16toh(option->len) + offsetof(DHCP6Option, data) < sizeof(DHCP6StatusOption))
+ return -ENOBUFS;
+
+ return be16toh(statusopt->status);
+}
+
+static int dhcp6_option_parse_address(DHCP6Option *option, DHCP6IA *ia,
+ uint32_t *lifetime_valid) {
+ DHCP6AddressOption *addr_option = (DHCP6AddressOption *)option;
+ DHCP6Address *addr;
+ uint32_t lt_valid, lt_pref;
+ int r;
+
+ if (be16toh(option->len) + offsetof(DHCP6Option, data) < sizeof(*addr_option))
+ return -ENOBUFS;
+
+ lt_valid = be32toh(addr_option->iaaddr.lifetime_valid);
+ lt_pref = be32toh(addr_option->iaaddr.lifetime_preferred);
+
+ if (lt_valid == 0 || lt_pref > lt_valid) {
+ log_dhcp6_client(client, "Valid lifetime of an IA address is zero or preferred lifetime %d > valid lifetime %d",
+ lt_pref, lt_valid);
+
+ return 0;
+ }
+
+ if (be16toh(option->len) + offsetof(DHCP6Option, data) > sizeof(*addr_option)) {
+ r = dhcp6_option_parse_status((DHCP6Option *)addr_option->options, be16toh(option->len) + offsetof(DHCP6Option, data) - sizeof(*addr_option));
+ if (r != 0)
+ return r < 0 ? r: 0;
+ }
+
+ addr = new0(DHCP6Address, 1);
+ if (!addr)
+ return -ENOMEM;
+
+ LIST_INIT(addresses, addr);
+ memcpy(&addr->iaaddr, option->data, sizeof(addr->iaaddr));
+
+ LIST_PREPEND(addresses, ia->addresses, addr);
+
+ *lifetime_valid = be32toh(addr->iaaddr.lifetime_valid);
+
+ return 0;
+}
+
+static int dhcp6_option_parse_pdprefix(DHCP6Option *option, DHCP6IA *ia,
+ uint32_t *lifetime_valid) {
+ DHCP6PDPrefixOption *pdprefix_option = (DHCP6PDPrefixOption *)option;
+ DHCP6Address *prefix;
+ uint32_t lt_valid, lt_pref;
+ int r;
+
+ if (be16toh(option->len) + offsetof(DHCP6Option, data) < sizeof(*pdprefix_option))
+ return -ENOBUFS;
+
+ lt_valid = be32toh(pdprefix_option->iapdprefix.lifetime_valid);
+ lt_pref = be32toh(pdprefix_option->iapdprefix.lifetime_preferred);
+
+ if (lt_valid == 0 || lt_pref > lt_valid) {
+ log_dhcp6_client(client, "Valid lifetieme of a PD prefix is zero or preferred lifetime %d > valid lifetime %d",
+ lt_pref, lt_valid);
+
+ return 0;
+ }
+
+ if (be16toh(option->len) + offsetof(DHCP6Option, data) > sizeof(*pdprefix_option)) {
+ r = dhcp6_option_parse_status((DHCP6Option *)pdprefix_option->options, be16toh(option->len) + offsetof(DHCP6Option, data) - sizeof(*pdprefix_option));
+ if (r != 0)
+ return r < 0 ? r: 0;
+ }
+
+ prefix = new0(DHCP6Address, 1);
+ if (!prefix)
+ return -ENOMEM;
+
+ LIST_INIT(addresses, prefix);
+ memcpy(&prefix->iapdprefix, option->data, sizeof(prefix->iapdprefix));
+
+ LIST_PREPEND(addresses, ia->addresses, prefix);
+
+ *lifetime_valid = be32toh(prefix->iapdprefix.lifetime_valid);
+
+ return 0;
+}
+
+int dhcp6_option_parse_ia(DHCP6Option *iaoption, DHCP6IA *ia, uint16_t *ret_status_code) {
+ uint32_t lt_t1, lt_t2, lt_valid = 0, lt_min = UINT32_MAX;
+ uint16_t iatype, optlen;
+ size_t iaaddr_offset;
+ int r = 0, status;
+ size_t i, len;
+ uint16_t opt;
+
+ assert_return(ia, -EINVAL);
+ assert_return(!ia->addresses, -EINVAL);
+
+ iatype = be16toh(iaoption->code);
+ len = be16toh(iaoption->len);
+
+ switch (iatype) {
+ case SD_DHCP6_OPTION_IA_NA:
+
+ if (len < DHCP6_OPTION_IA_NA_LEN)
+ return -ENOBUFS;
+
+ iaaddr_offset = DHCP6_OPTION_IA_NA_LEN;
+ memcpy(&ia->ia_na, iaoption->data, sizeof(ia->ia_na));
+
+ lt_t1 = be32toh(ia->ia_na.lifetime_t1);
+ lt_t2 = be32toh(ia->ia_na.lifetime_t2);
+
+ if (lt_t1 && lt_t2 && lt_t1 > lt_t2) {
+ log_dhcp6_client(client, "IA NA T1 %ds > T2 %ds",
+ lt_t1, lt_t2);
+ return -EINVAL;
+ }
+
+ break;
+
+ case SD_DHCP6_OPTION_IA_PD:
+
+ if (len < sizeof(ia->ia_pd))
+ return -ENOBUFS;
+
+ iaaddr_offset = sizeof(ia->ia_pd);
+ memcpy(&ia->ia_pd, iaoption->data, sizeof(ia->ia_pd));
+
+ lt_t1 = be32toh(ia->ia_pd.lifetime_t1);
+ lt_t2 = be32toh(ia->ia_pd.lifetime_t2);
+
+ if (lt_t1 && lt_t2 && lt_t1 > lt_t2) {
+ log_dhcp6_client(client, "IA PD T1 %ds > T2 %ds",
+ lt_t1, lt_t2);
+ return -EINVAL;
+ }
+
+ break;
+
+ case SD_DHCP6_OPTION_IA_TA:
+ if (len < DHCP6_OPTION_IA_TA_LEN)
+ return -ENOBUFS;
+
+ iaaddr_offset = DHCP6_OPTION_IA_TA_LEN;
+ memcpy(&ia->ia_ta.id, iaoption->data, sizeof(ia->ia_ta));
+
+ break;
+
+ default:
+ return -ENOMSG;
+ }
+
+ ia->type = iatype;
+ i = iaaddr_offset;
+
+ while (i < len) {
+ DHCP6Option *option = (DHCP6Option *)&iaoption->data[i];
+
+ if (len < i + sizeof(*option) || len < i + sizeof(*option) + be16toh(option->len))
+ return -ENOBUFS;
+
+ opt = be16toh(option->code);
+ optlen = be16toh(option->len);
+
+ switch (opt) {
+ case SD_DHCP6_OPTION_IAADDR:
+
+ if (!IN_SET(ia->type, SD_DHCP6_OPTION_IA_NA, SD_DHCP6_OPTION_IA_TA)) {
+ log_dhcp6_client(client, "IA Address option not in IA NA or TA option");
+ return -EINVAL;
+ }
+
+ r = dhcp6_option_parse_address(option, ia, &lt_valid);
+ if (r < 0)
+ return r;
+
+ if (lt_valid < lt_min)
+ lt_min = lt_valid;
+
+ break;
+
+ case SD_DHCP6_OPTION_IA_PD_PREFIX:
+
+ if (!IN_SET(ia->type, SD_DHCP6_OPTION_IA_PD)) {
+ log_dhcp6_client(client, "IA PD Prefix option not in IA PD option");
+ return -EINVAL;
+ }
+
+ r = dhcp6_option_parse_pdprefix(option, ia, &lt_valid);
+ if (r < 0)
+ return r;
+
+ if (lt_valid < lt_min)
+ lt_min = lt_valid;
+
+ break;
+
+ case SD_DHCP6_OPTION_STATUS_CODE:
+
+ status = dhcp6_option_parse_status(option, optlen + offsetof(DHCP6Option, data));
+ if (status < 0)
+ return status;
+
+ if (status > 0) {
+ if (ret_status_code)
+ *ret_status_code = status;
+
+ log_dhcp6_client(client, "IA status %s",
+ dhcp6_message_status_to_string(status));
+
+ return 0;
+ }
+
+ break;
+
+ default:
+ log_dhcp6_client(client, "Unknown IA option %d", opt);
+ break;
+ }
+
+ i += sizeof(*option) + optlen;
+ }
+
+ switch(iatype) {
+ case SD_DHCP6_OPTION_IA_NA:
+ if (!ia->ia_na.lifetime_t1 && !ia->ia_na.lifetime_t2) {
+ lt_t1 = lt_min / 2;
+ lt_t2 = lt_min / 10 * 8;
+ ia->ia_na.lifetime_t1 = htobe32(lt_t1);
+ ia->ia_na.lifetime_t2 = htobe32(lt_t2);
+
+ log_dhcp6_client(client, "Computed IA NA T1 %ds and T2 %ds as both were zero",
+ lt_t1, lt_t2);
+ }
+
+ break;
+
+ case SD_DHCP6_OPTION_IA_PD:
+ if (!ia->ia_pd.lifetime_t1 && !ia->ia_pd.lifetime_t2) {
+ lt_t1 = lt_min / 2;
+ lt_t2 = lt_min / 10 * 8;
+ ia->ia_pd.lifetime_t1 = htobe32(lt_t1);
+ ia->ia_pd.lifetime_t2 = htobe32(lt_t2);
+
+ log_dhcp6_client(client, "Computed IA PD T1 %ds and T2 %ds as both were zero",
+ lt_t1, lt_t2);
+ }
+
+ break;
+
+ default:
+ break;
+ }
+
+ if (ret_status_code)
+ *ret_status_code = 0;
+
+ return 1;
+}
+
+int dhcp6_option_parse_ip6addrs(uint8_t *optval, uint16_t optlen,
+ struct in6_addr **addrs, size_t count,
+ size_t *allocated) {
+
+ if (optlen == 0 || optlen % sizeof(struct in6_addr) != 0)
+ return -EINVAL;
+
+ if (!GREEDY_REALLOC(*addrs, *allocated,
+ count * sizeof(struct in6_addr) + optlen))
+ return -ENOMEM;
+
+ memcpy(*addrs + count, optval, optlen);
+
+ count += optlen / sizeof(struct in6_addr);
+
+ return count;
+}
+
+static int parse_domain(const uint8_t **data, uint16_t *len, char **out_domain) {
+ _cleanup_free_ char *ret = NULL;
+ size_t n = 0, allocated = 0;
+ const uint8_t *optval = *data;
+ uint16_t optlen = *len;
+ bool first = true;
+ int r;
+
+ if (optlen <= 1)
+ return -ENODATA;
+
+ for (;;) {
+ const char *label;
+ uint8_t c;
+
+ if (optlen == 0)
+ break;
+
+ c = *optval;
+ optval++;
+ optlen--;
+
+ if (c == 0)
+ /* End label */
+ break;
+ if (c > 63)
+ return -EBADMSG;
+ if (c > optlen)
+ return -EMSGSIZE;
+
+ /* Literal label */
+ label = (const char *)optval;
+ optval += c;
+ optlen -= c;
+
+ if (!GREEDY_REALLOC(ret, allocated, n + !first + DNS_LABEL_ESCAPED_MAX))
+ return -ENOMEM;
+
+ if (first)
+ first = false;
+ else
+ ret[n++] = '.';
+
+ r = dns_label_escape(label, c, ret + n, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ n += r;
+ }
+
+ if (n) {
+ if (!GREEDY_REALLOC(ret, allocated, n + 1))
+ return -ENOMEM;
+ ret[n] = 0;
+ }
+
+ *out_domain = TAKE_PTR(ret);
+ *data = optval;
+ *len = optlen;
+
+ return n;
+}
+
+int dhcp6_option_parse_domainname(const uint8_t *optval, uint16_t optlen, char **str) {
+ _cleanup_free_ char *domain = NULL;
+ int r;
+
+ r = parse_domain(&optval, &optlen, &domain);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENODATA;
+ if (optlen != 0)
+ return -EINVAL;
+
+ *str = TAKE_PTR(domain);
+ return 0;
+}
+
+int dhcp6_option_parse_domainname_list(const uint8_t *optval, uint16_t optlen, char ***str_arr) {
+ size_t idx = 0;
+ _cleanup_strv_free_ char **names = NULL;
+ int r;
+
+ if (optlen <= 1)
+ return -ENODATA;
+ if (optval[optlen - 1] != '\0')
+ return -EINVAL;
+
+ while (optlen > 0) {
+ _cleanup_free_ char *ret = NULL;
+
+ r = parse_domain(&optval, &optlen, &ret);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = strv_extend(&names, ret);
+ if (r < 0)
+ return r;
+
+ idx++;
+ }
+
+ *str_arr = TAKE_PTR(names);
+
+ return idx;
+}
+
+static sd_dhcp6_option* dhcp6_option_free(sd_dhcp6_option *i) {
+ if (!i)
+ return NULL;
+
+ free(i->data);
+ return mfree(i);
+}
+
+int sd_dhcp6_option_new(uint16_t option, const void *data, size_t length, uint32_t enterprise_identifier, sd_dhcp6_option **ret) {
+ assert_return(ret, -EINVAL);
+ assert_return(length == 0 || data, -EINVAL);
+
+ _cleanup_free_ void *q = memdup(data, length);
+ if (!q)
+ return -ENOMEM;
+
+ sd_dhcp6_option *p = new(sd_dhcp6_option, 1);
+ if (!p)
+ return -ENOMEM;
+
+ *p = (sd_dhcp6_option) {
+ .n_ref = 1,
+ .option = option,
+ .enterprise_identifier = enterprise_identifier,
+ .length = length,
+ .data = TAKE_PTR(q),
+ };
+
+ *ret = p;
+ return 0;
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_dhcp6_option, sd_dhcp6_option, dhcp6_option_free);
+DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
+ dhcp6_option_hash_ops,
+ void,
+ trivial_hash_func,
+ trivial_compare_func,
+ sd_dhcp6_option,
+ sd_dhcp6_option_unref);
diff --git a/src/libsystemd-network/dhcp6-protocol.h b/src/libsystemd-network/dhcp6-protocol.h
new file mode 100644
index 0000000..c700363
--- /dev/null
+++ b/src/libsystemd-network/dhcp6-protocol.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+
+#include "macro.h"
+#include "sparse-endian.h"
+
+struct DHCP6Message {
+ union {
+ struct {
+ uint8_t type;
+ uint8_t _pad[3];
+ } _packed_;
+ be32_t transaction_id;
+ };
+ uint8_t options[];
+} _packed_;
+
+typedef struct DHCP6Message DHCP6Message;
+
+#define DHCP6_MIN_OPTIONS_SIZE \
+ 1280 - sizeof(struct ip6_hdr) - sizeof(struct udphdr)
+
+#define IN6ADDR_ALL_DHCP6_RELAY_AGENTS_AND_SERVERS_INIT \
+ { { { 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x02 } } }
+
+enum {
+ DHCP6_PORT_SERVER = 547,
+ DHCP6_PORT_CLIENT = 546,
+};
+
+#define DHCP6_INF_TIMEOUT 1 * USEC_PER_SEC
+#define DHCP6_INF_MAX_RT 120 * USEC_PER_SEC
+#define DHCP6_SOL_MAX_DELAY 1 * USEC_PER_SEC
+#define DHCP6_SOL_TIMEOUT 1 * USEC_PER_SEC
+#define DHCP6_SOL_MAX_RT 120 * USEC_PER_SEC
+#define DHCP6_REQ_TIMEOUT 1 * USEC_PER_SEC
+#define DHCP6_REQ_MAX_RT 120 * USEC_PER_SEC
+#define DHCP6_REQ_MAX_RC 10
+#define DHCP6_REN_TIMEOUT 10 * USEC_PER_SEC
+#define DHCP6_REN_MAX_RT 600 * USEC_PER_SEC
+#define DHCP6_REB_TIMEOUT 10 * USEC_PER_SEC
+#define DHCP6_REB_MAX_RT 600 * USEC_PER_SEC
+
+enum DHCP6State {
+ DHCP6_STATE_STOPPED = 0,
+ DHCP6_STATE_INFORMATION_REQUEST = 1,
+ DHCP6_STATE_SOLICITATION = 2,
+ DHCP6_STATE_REQUEST = 3,
+ DHCP6_STATE_BOUND = 4,
+ DHCP6_STATE_RENEW = 5,
+ DHCP6_STATE_REBIND = 6,
+};
+
+enum {
+ DHCP6_SOLICIT = 1,
+ DHCP6_ADVERTISE = 2,
+ DHCP6_REQUEST = 3,
+ DHCP6_CONFIRM = 4,
+ DHCP6_RENEW = 5,
+ DHCP6_REBIND = 6,
+ DHCP6_REPLY = 7,
+ DHCP6_RELEASE = 8,
+ DHCP6_DECLINE = 9,
+ DHCP6_RECONFIGURE = 10,
+ DHCP6_INFORMATION_REQUEST = 11,
+ DHCP6_RELAY_FORW = 12,
+ DHCP6_RELAY_REPL = 13,
+ _DHCP6_MESSAGE_MAX = 14,
+};
+
+enum {
+ DHCP6_NTP_SUBOPTION_SRV_ADDR = 1,
+ DHCP6_NTP_SUBOPTION_MC_ADDR = 2,
+ DHCP6_NTP_SUBOPTION_SRV_FQDN = 3,
+};
+
+/*
+ * RFC 8415, RFC 5007 and RFC 7653 status codes:
+ * https://www.iana.org/assignments/dhcpv6-parameters/dhcpv6-parameters.xhtml#dhcpv6-parameters-5
+ */
+enum {
+ DHCP6_STATUS_SUCCESS = 0,
+ DHCP6_STATUS_UNSPEC_FAIL = 1,
+ DHCP6_STATUS_NO_ADDRS_AVAIL = 2,
+ DHCP6_STATUS_NO_BINDING = 3,
+ DHCP6_STATUS_NOT_ON_LINK = 4,
+ DHCP6_STATUS_USE_MULTICAST = 5,
+ DHCP6_STATUS_NO_PREFIX_AVAIL = 6,
+ DHCP6_STATUS_UNKNOWN_QUERY_TYPE = 7,
+ DHCP6_STATUS_MALFORMED_QUERY = 8,
+ DHCP6_STATUS_NOT_CONFIGURED = 9,
+ DHCP6_STATUS_NOT_ALLOWED = 10,
+ DHCP6_STATUS_QUERY_TERMINATED = 11,
+ DHCP6_STATUS_DATA_MISSING = 12,
+ DHCP6_STATUS_CATCHUP_COMPLETE = 13,
+ DHCP6_STATUS_NOT_SUPPORTED = 14,
+ DHCP6_STATUS_TLS_CONNECTION_REFUSED = 15,
+ DHCP6_STATUS_ADDRESS_IN_USE = 16,
+ DHCP6_STATUS_CONFIGURATION_CONFLICT = 17,
+ DHCP6_STATUS_MISSING_BINDING_INFORMATION = 18,
+ DHCP6_STATUS_OUTDATED_BINDING_INFORMATION = 19,
+ DHCP6_STATUS_SERVER_SHUTTING_DOWN = 20,
+ DHCP6_STATUS_DNS_UPDATE_NOT_SUPPORTED = 21,
+ DHCP6_STATUS_EXCESSIVE_TIME_SKEW = 22,
+ _DHCP6_STATUS_MAX = 23,
+};
+
+enum {
+ DHCP6_FQDN_FLAG_S = (1 << 0),
+ DHCP6_FQDN_FLAG_O = (1 << 1),
+ DHCP6_FQDN_FLAG_N = (1 << 2),
+};
diff --git a/src/libsystemd-network/icmp6-util.c b/src/libsystemd-network/icmp6-util.c
new file mode 100644
index 0000000..4af0125
--- /dev/null
+++ b/src/libsystemd-network/icmp6-util.c
@@ -0,0 +1,210 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+#include <netinet/icmp6.h>
+#include <netinet/in.h>
+#include <netinet/ip6.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <net/if.h>
+#include <linux/if_packet.h>
+
+#include "fd-util.h"
+#include "icmp6-util.h"
+#include "in-addr-util.h"
+#include "io-util.h"
+#include "socket-util.h"
+
+#define IN6ADDR_ALL_ROUTERS_MULTICAST_INIT \
+ { { { 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 } } }
+
+#define IN6ADDR_ALL_NODES_MULTICAST_INIT \
+ { { { 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 } } }
+
+static int icmp6_bind_router_message(const struct icmp6_filter *filter,
+ const struct ipv6_mreq *mreq) {
+ int ifindex = mreq->ipv6mr_interface;
+ _cleanup_close_ int s = -1;
+ int r;
+
+ s = socket(AF_INET6, SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_ICMPV6);
+ if (s < 0)
+ return -errno;
+
+ r = setsockopt(s, IPPROTO_ICMPV6, ICMP6_FILTER, filter, sizeof(*filter));
+ if (r < 0)
+ return -errno;
+
+ r = setsockopt(s, IPPROTO_IPV6, IPV6_ADD_MEMBERSHIP, mreq, sizeof(*mreq));
+ if (r < 0)
+ return -errno;
+
+ /* RFC 3315, section 6.7, bullet point 2 may indicate that an
+ IPV6_PKTINFO socket option also applies for ICMPv6 multicast.
+ Empirical experiments indicates otherwise and therefore an
+ IPV6_MULTICAST_IF socket option is used here instead */
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_MULTICAST_IF, ifindex);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, false);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, 255);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 255);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(s, SOL_IPV6, IPV6_RECVHOPLIMIT, true);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(s, SOL_SOCKET, SO_TIMESTAMP, true);
+ if (r < 0)
+ return r;
+
+ r = socket_bind_to_ifindex(s, ifindex);
+ if (r < 0)
+ return r;
+
+ return TAKE_FD(s);
+}
+
+int icmp6_bind_router_solicitation(int ifindex) {
+ struct icmp6_filter filter = {};
+ struct ipv6_mreq mreq = {
+ .ipv6mr_multiaddr = IN6ADDR_ALL_NODES_MULTICAST_INIT,
+ .ipv6mr_interface = ifindex,
+ };
+
+ ICMP6_FILTER_SETBLOCKALL(&filter);
+ ICMP6_FILTER_SETPASS(ND_ROUTER_ADVERT, &filter);
+
+ return icmp6_bind_router_message(&filter, &mreq);
+}
+
+int icmp6_bind_router_advertisement(int ifindex) {
+ struct icmp6_filter filter = {};
+ struct ipv6_mreq mreq = {
+ .ipv6mr_multiaddr = IN6ADDR_ALL_ROUTERS_MULTICAST_INIT,
+ .ipv6mr_interface = ifindex,
+ };
+
+ ICMP6_FILTER_SETBLOCKALL(&filter);
+ ICMP6_FILTER_SETPASS(ND_ROUTER_SOLICIT, &filter);
+
+ return icmp6_bind_router_message(&filter, &mreq);
+}
+
+int icmp6_send_router_solicitation(int s, const struct ether_addr *ether_addr) {
+ struct sockaddr_in6 dst = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = IN6ADDR_ALL_ROUTERS_MULTICAST_INIT,
+ };
+ struct {
+ struct nd_router_solicit rs;
+ struct nd_opt_hdr rs_opt;
+ struct ether_addr rs_opt_mac;
+ } _packed_ rs = {
+ .rs.nd_rs_type = ND_ROUTER_SOLICIT,
+ .rs_opt.nd_opt_type = ND_OPT_SOURCE_LINKADDR,
+ .rs_opt.nd_opt_len = 1,
+ };
+ struct iovec iov = {
+ .iov_base = &rs,
+ .iov_len = sizeof(rs),
+ };
+ struct msghdr msg = {
+ .msg_name = &dst,
+ .msg_namelen = sizeof(dst),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ int r;
+
+ assert(s >= 0);
+ assert(ether_addr);
+
+ rs.rs_opt_mac = *ether_addr;
+
+ r = sendmsg(s, &msg, 0);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int icmp6_receive(int fd, void *buffer, size_t size, struct in6_addr *dst,
+ triple_timestamp *timestamp) {
+
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int)) + /* ttl */
+ CMSG_SPACE(sizeof(struct timeval))) control;
+ struct iovec iov = {};
+ union sockaddr_union sa = {};
+ struct msghdr msg = {
+ .msg_name = &sa.sa,
+ .msg_namelen = sizeof(sa),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ struct cmsghdr *cmsg;
+ ssize_t len;
+
+ iov = IOVEC_MAKE(buffer, size);
+
+ len = recvmsg_safe(fd, &msg, MSG_DONTWAIT);
+ if (len < 0)
+ return (int) len;
+
+ if ((size_t) len != size)
+ return -EINVAL;
+
+ if (msg.msg_namelen == sizeof(struct sockaddr_in6) &&
+ sa.in6.sin6_family == AF_INET6) {
+
+ *dst = sa.in6.sin6_addr;
+ if (in_addr_is_link_local(AF_INET6, (union in_addr_union*) dst) <= 0)
+ return -EADDRNOTAVAIL;
+
+ } else if (msg.msg_namelen > 0)
+ return -EPFNOSUPPORT;
+
+ /* namelen == 0 only happens when running the test-suite over a socketpair */
+
+ assert(!(msg.msg_flags & MSG_CTRUNC));
+ assert(!(msg.msg_flags & MSG_TRUNC));
+
+ CMSG_FOREACH(cmsg, &msg) {
+ if (cmsg->cmsg_level == SOL_IPV6 &&
+ cmsg->cmsg_type == IPV6_HOPLIMIT &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(int))) {
+ int hops = *(int*) CMSG_DATA(cmsg);
+
+ if (hops != 255)
+ return -EMULTIHOP;
+ }
+
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SO_TIMESTAMP &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
+ triple_timestamp_from_realtime(timestamp, timeval_load((struct timeval*) CMSG_DATA(cmsg)));
+ }
+
+ if (!triple_timestamp_is_set(timestamp))
+ triple_timestamp_get(timestamp);
+
+ return 0;
+}
diff --git a/src/libsystemd-network/icmp6-util.h b/src/libsystemd-network/icmp6-util.h
new file mode 100644
index 0000000..50d21b5
--- /dev/null
+++ b/src/libsystemd-network/icmp6-util.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2014-2015 Intel Corporation. All rights reserved.
+***/
+
+#include <net/ethernet.h>
+
+#include "time-util.h"
+
+#define IN6ADDR_ALL_ROUTERS_MULTICAST_INIT \
+ { { { 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 } } }
+
+#define IN6ADDR_ALL_NODES_MULTICAST_INIT \
+ { { { 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 } } }
+
+int icmp6_bind_router_solicitation(int ifindex);
+int icmp6_bind_router_advertisement(int ifindex);
+int icmp6_send_router_solicitation(int s, const struct ether_addr *ether_addr);
+int icmp6_receive(int fd, void *buffer, size_t size, struct in6_addr *dst,
+ triple_timestamp *timestamp);
diff --git a/src/libsystemd-network/lldp-internal.h b/src/libsystemd-network/lldp-internal.h
new file mode 100644
index 0000000..f23695f
--- /dev/null
+++ b/src/libsystemd-network/lldp-internal.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-event.h"
+#include "sd-lldp.h"
+
+#include "hashmap.h"
+#include "log.h"
+#include "prioq.h"
+
+struct sd_lldp {
+ unsigned n_ref;
+
+ int ifindex;
+ int fd;
+
+ sd_event *event;
+ int64_t event_priority;
+ sd_event_source *io_event_source;
+ sd_event_source *timer_event_source;
+
+ Prioq *neighbor_by_expiry;
+ Hashmap *neighbor_by_id;
+
+ uint64_t neighbors_max;
+
+ sd_lldp_callback_t callback;
+ void *userdata;
+
+ uint16_t capability_mask;
+
+ struct ether_addr filter_address;
+};
+
+#define log_lldp_errno(error, fmt, ...) log_internal(LOG_DEBUG, error, PROJECT_FILE, __LINE__, __func__, "LLDP: " fmt, ##__VA_ARGS__)
+#define log_lldp(fmt, ...) log_lldp_errno(0, fmt, ##__VA_ARGS__)
+
+const char* lldp_event_to_string(sd_lldp_event e) _const_;
+sd_lldp_event lldp_event_from_string(const char *s) _pure_;
diff --git a/src/libsystemd-network/lldp-neighbor.c b/src/libsystemd-network/lldp-neighbor.c
new file mode 100644
index 0000000..546ae1c
--- /dev/null
+++ b/src/libsystemd-network/lldp-neighbor.c
@@ -0,0 +1,792 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "ether-addr-util.h"
+#include "hexdecoct.h"
+#include "in-addr-util.h"
+#include "lldp-internal.h"
+#include "lldp-neighbor.h"
+#include "memory-util.h"
+#include "missing_network.h"
+#include "unaligned.h"
+
+static void lldp_neighbor_id_hash_func(const LLDPNeighborID *id, struct siphash *state) {
+ siphash24_compress(id->chassis_id, id->chassis_id_size, state);
+ siphash24_compress(&id->chassis_id_size, sizeof(id->chassis_id_size), state);
+ siphash24_compress(id->port_id, id->port_id_size, state);
+ siphash24_compress(&id->port_id_size, sizeof(id->port_id_size), state);
+}
+
+int lldp_neighbor_id_compare_func(const LLDPNeighborID *x, const LLDPNeighborID *y) {
+ return memcmp_nn(x->chassis_id, x->chassis_id_size, y->chassis_id, y->chassis_id_size)
+ ?: memcmp_nn(x->port_id, x->port_id_size, y->port_id, y->port_id_size);
+}
+
+DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(lldp_neighbor_hash_ops, LLDPNeighborID, lldp_neighbor_id_hash_func, lldp_neighbor_id_compare_func,
+ sd_lldp_neighbor, lldp_neighbor_unlink);
+
+int lldp_neighbor_prioq_compare_func(const void *a, const void *b) {
+ const sd_lldp_neighbor *x = a, *y = b;
+
+ return CMP(x->until, y->until);
+}
+
+_public_ sd_lldp_neighbor *sd_lldp_neighbor_ref(sd_lldp_neighbor *n) {
+ if (!n)
+ return NULL;
+
+ assert(n->n_ref > 0 || n->lldp);
+ n->n_ref++;
+
+ return n;
+}
+
+static void lldp_neighbor_free(sd_lldp_neighbor *n) {
+ assert(n);
+
+ free(n->id.port_id);
+ free(n->id.chassis_id);
+ free(n->port_description);
+ free(n->system_name);
+ free(n->system_description);
+ free(n->mud_url);
+ free(n->chassis_id_as_string);
+ free(n->port_id_as_string);
+ free(n);
+}
+
+_public_ sd_lldp_neighbor *sd_lldp_neighbor_unref(sd_lldp_neighbor *n) {
+
+ /* Drops one reference from the neighbor. Note that the object is not freed unless it is already unlinked from
+ * the sd_lldp object. */
+
+ if (!n)
+ return NULL;
+
+ assert(n->n_ref > 0);
+ n->n_ref--;
+
+ if (n->n_ref <= 0 && !n->lldp)
+ lldp_neighbor_free(n);
+
+ return NULL;
+}
+
+sd_lldp_neighbor *lldp_neighbor_unlink(sd_lldp_neighbor *n) {
+
+ /* Removes the neighbor object from the LLDP object, and frees it if it also has no other reference. */
+
+ if (!n)
+ return NULL;
+
+ if (!n->lldp)
+ return NULL;
+
+ /* Only remove the neighbor object from the hash table if it's in there, don't complain if it isn't. This is
+ * because we are used as destructor call for hashmap_clear() and thus sometimes are called to de-register
+ * ourselves from the hashtable and sometimes are called after we already are de-registered. */
+
+ (void) hashmap_remove_value(n->lldp->neighbor_by_id, &n->id, n);
+
+ assert_se(prioq_remove(n->lldp->neighbor_by_expiry, n, &n->prioq_idx) >= 0);
+
+ n->lldp = NULL;
+
+ if (n->n_ref <= 0)
+ lldp_neighbor_free(n);
+
+ return NULL;
+}
+
+sd_lldp_neighbor *lldp_neighbor_new(size_t raw_size) {
+ sd_lldp_neighbor *n;
+
+ n = malloc0(ALIGN(sizeof(sd_lldp_neighbor)) + raw_size);
+ if (!n)
+ return NULL;
+
+ n->raw_size = raw_size;
+ n->n_ref = 1;
+
+ return n;
+}
+
+static int parse_string(char **s, const void *q, size_t n) {
+ const char *p = q;
+ char *k;
+
+ assert(s);
+ assert(p || n == 0);
+
+ if (*s) {
+ log_lldp("Found duplicate string, ignoring field.");
+ return 0;
+ }
+
+ /* Strip trailing NULs, just to be nice */
+ while (n > 0 && p[n-1] == 0)
+ n--;
+
+ if (n <= 0) /* Ignore empty strings */
+ return 0;
+
+ /* Look for inner NULs */
+ if (memchr(p, 0, n)) {
+ log_lldp("Found inner NUL in string, ignoring field.");
+ return 0;
+ }
+
+ /* Let's escape weird chars, for security reasons */
+ k = cescape_length(p, n);
+ if (!k)
+ return -ENOMEM;
+
+ free(*s);
+ *s = k;
+
+ return 1;
+}
+
+int lldp_neighbor_parse(sd_lldp_neighbor *n) {
+ struct ether_header h;
+ const uint8_t *p;
+ size_t left;
+ int r;
+
+ assert(n);
+
+ if (n->raw_size < sizeof(struct ether_header)) {
+ log_lldp("Received truncated packet, ignoring.");
+ return -EBADMSG;
+ }
+
+ memcpy(&h, LLDP_NEIGHBOR_RAW(n), sizeof(h));
+
+ if (h.ether_type != htobe16(ETHERTYPE_LLDP)) {
+ log_lldp("Received packet with wrong type, ignoring.");
+ return -EBADMSG;
+ }
+
+ if (h.ether_dhost[0] != 0x01 ||
+ h.ether_dhost[1] != 0x80 ||
+ h.ether_dhost[2] != 0xc2 ||
+ h.ether_dhost[3] != 0x00 ||
+ h.ether_dhost[4] != 0x00 ||
+ !IN_SET(h.ether_dhost[5], 0x00, 0x03, 0x0e)) {
+ log_lldp("Received packet with wrong destination address, ignoring.");
+ return -EBADMSG;
+ }
+
+ memcpy(&n->source_address, h.ether_shost, sizeof(struct ether_addr));
+ memcpy(&n->destination_address, h.ether_dhost, sizeof(struct ether_addr));
+
+ p = (const uint8_t*) LLDP_NEIGHBOR_RAW(n) + sizeof(struct ether_header);
+ left = n->raw_size - sizeof(struct ether_header);
+
+ for (;;) {
+ uint8_t type;
+ uint16_t length;
+
+ if (left < 2) {
+ log_lldp("TLV lacks header, ignoring.");
+ return -EBADMSG;
+ }
+
+ type = p[0] >> 1;
+ length = p[1] + (((uint16_t) (p[0] & 1)) << 8);
+ p += 2, left -= 2;
+
+ if (left < length) {
+ log_lldp("TLV truncated, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ switch (type) {
+
+ case SD_LLDP_TYPE_END:
+ if (length != 0) {
+ log_lldp("End marker TLV not zero-sized, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ /* Note that after processing the SD_LLDP_TYPE_END left could still be > 0
+ * as the message may contain padding (see IEEE 802.1AB-2016, sec. 8.5.12) */
+
+ goto end_marker;
+
+ case SD_LLDP_TYPE_CHASSIS_ID:
+ if (length < 2 || length > 256) { /* includes the chassis subtype, hence one extra byte */
+ log_lldp("Chassis ID field size out of range, ignoring datagram.");
+ return -EBADMSG;
+ }
+ if (n->id.chassis_id) {
+ log_lldp("Duplicate chassis ID field, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ n->id.chassis_id = memdup(p, length);
+ if (!n->id.chassis_id)
+ return -ENOMEM;
+
+ n->id.chassis_id_size = length;
+ break;
+
+ case SD_LLDP_TYPE_PORT_ID:
+ if (length < 2 || length > 256) { /* includes the port subtype, hence one extra byte */
+ log_lldp("Port ID field size out of range, ignoring datagram.");
+ return -EBADMSG;
+ }
+ if (n->id.port_id) {
+ log_lldp("Duplicate port ID field, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ n->id.port_id = memdup(p, length);
+ if (!n->id.port_id)
+ return -ENOMEM;
+
+ n->id.port_id_size = length;
+ break;
+
+ case SD_LLDP_TYPE_TTL:
+ if (length != 2) {
+ log_lldp("TTL field has wrong size, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ if (n->has_ttl) {
+ log_lldp("Duplicate TTL field, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ n->ttl = unaligned_read_be16(p);
+ n->has_ttl = true;
+ break;
+
+ case SD_LLDP_TYPE_PORT_DESCRIPTION:
+ r = parse_string(&n->port_description, p, length);
+ if (r < 0)
+ return r;
+ break;
+
+ case SD_LLDP_TYPE_SYSTEM_NAME:
+ r = parse_string(&n->system_name, p, length);
+ if (r < 0)
+ return r;
+ break;
+
+ case SD_LLDP_TYPE_SYSTEM_DESCRIPTION:
+ r = parse_string(&n->system_description, p, length);
+ if (r < 0)
+ return r;
+ break;
+
+ case SD_LLDP_TYPE_SYSTEM_CAPABILITIES:
+ if (length != 4)
+ log_lldp("System capabilities field has wrong size, ignoring.");
+ else {
+ n->system_capabilities = unaligned_read_be16(p);
+ n->enabled_capabilities = unaligned_read_be16(p + 2);
+ n->has_capabilities = true;
+ }
+
+ break;
+
+ case SD_LLDP_TYPE_PRIVATE: {
+ if (length < 4)
+ log_lldp("Found private TLV that is too short, ignoring.");
+ else {
+ /* RFC 8520: MUD URL */
+ if (memcmp(p, SD_LLDP_OUI_MUD, sizeof(SD_LLDP_OUI_MUD)) == 0 &&
+ p[sizeof(SD_LLDP_OUI_MUD)] == SD_LLDP_OUI_SUBTYPE_MUD_USAGE_DESCRIPTION) {
+ r = parse_string(&n->mud_url, p + sizeof(SD_LLDP_OUI_MUD) + 1,
+ length - 1 - sizeof(SD_LLDP_OUI_MUD));
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ break;
+ }
+
+ p += length, left -= length;
+ }
+
+end_marker:
+ if (!n->id.chassis_id || !n->id.port_id || !n->has_ttl) {
+ log_lldp("One or more mandatory TLV missing in datagram. Ignoring.");
+ return -EBADMSG;
+
+ }
+
+ n->rindex = sizeof(struct ether_header);
+
+ return 0;
+}
+
+void lldp_neighbor_start_ttl(sd_lldp_neighbor *n) {
+ assert(n);
+
+ if (n->ttl > 0) {
+ usec_t base;
+
+ /* Use the packet's timestamp if there is one known */
+ base = triple_timestamp_by_clock(&n->timestamp, clock_boottime_or_monotonic());
+ if (base <= 0 || base == USEC_INFINITY)
+ base = now(clock_boottime_or_monotonic()); /* Otherwise, take the current time */
+
+ n->until = usec_add(base, n->ttl * USEC_PER_SEC);
+ } else
+ n->until = 0;
+
+ if (n->lldp)
+ prioq_reshuffle(n->lldp->neighbor_by_expiry, n, &n->prioq_idx);
+}
+
+bool lldp_neighbor_equal(const sd_lldp_neighbor *a, const sd_lldp_neighbor *b) {
+ if (a == b)
+ return true;
+
+ if (!a || !b)
+ return false;
+
+ if (a->raw_size != b->raw_size)
+ return false;
+
+ return memcmp(LLDP_NEIGHBOR_RAW(a), LLDP_NEIGHBOR_RAW(b), a->raw_size) == 0;
+}
+
+_public_ int sd_lldp_neighbor_get_source_address(sd_lldp_neighbor *n, struct ether_addr* address) {
+ assert_return(n, -EINVAL);
+ assert_return(address, -EINVAL);
+
+ *address = n->source_address;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_destination_address(sd_lldp_neighbor *n, struct ether_addr* address) {
+ assert_return(n, -EINVAL);
+ assert_return(address, -EINVAL);
+
+ *address = n->destination_address;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_raw(sd_lldp_neighbor *n, const void **ret, size_t *size) {
+ assert_return(n, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(size, -EINVAL);
+
+ *ret = LLDP_NEIGHBOR_RAW(n);
+ *size = n->raw_size;
+
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_chassis_id(sd_lldp_neighbor *n, uint8_t *type, const void **ret, size_t *size) {
+ assert_return(n, -EINVAL);
+ assert_return(type, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(size, -EINVAL);
+
+ assert(n->id.chassis_id_size > 0);
+
+ *type = *(uint8_t*) n->id.chassis_id;
+ *ret = (uint8_t*) n->id.chassis_id + 1;
+ *size = n->id.chassis_id_size - 1;
+
+ return 0;
+}
+
+static int format_mac_address(const void *data, size_t sz, char **ret) {
+ struct ether_addr a;
+ char *k;
+
+ assert(data || sz <= 0);
+
+ if (sz != 7)
+ return 0;
+
+ memcpy(&a, (uint8_t*) data + 1, sizeof(a));
+
+ k = new(char, ETHER_ADDR_TO_STRING_MAX);
+ if (!k)
+ return -ENOMEM;
+
+ *ret = ether_addr_to_string(&a, k);
+ return 1;
+}
+
+static int format_network_address(const void *data, size_t sz, char **ret) {
+ union in_addr_union a;
+ int family, r;
+
+ if (sz == 6 && ((uint8_t*) data)[1] == 1) {
+ memcpy(&a.in, (uint8_t*) data + 2, sizeof(a.in));
+ family = AF_INET;
+ } else if (sz == 18 && ((uint8_t*) data)[1] == 2) {
+ memcpy(&a.in6, (uint8_t*) data + 2, sizeof(a.in6));
+ family = AF_INET6;
+ } else
+ return 0;
+
+ r = in_addr_to_string(family, &a, ret);
+ if (r < 0)
+ return r;
+ return 1;
+}
+
+_public_ int sd_lldp_neighbor_get_chassis_id_as_string(sd_lldp_neighbor *n, const char **ret) {
+ char *k;
+ int r;
+
+ assert_return(n, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (n->chassis_id_as_string) {
+ *ret = n->chassis_id_as_string;
+ return 0;
+ }
+
+ assert(n->id.chassis_id_size > 0);
+
+ switch (*(uint8_t*) n->id.chassis_id) {
+
+ case SD_LLDP_CHASSIS_SUBTYPE_CHASSIS_COMPONENT:
+ case SD_LLDP_CHASSIS_SUBTYPE_INTERFACE_ALIAS:
+ case SD_LLDP_CHASSIS_SUBTYPE_PORT_COMPONENT:
+ case SD_LLDP_CHASSIS_SUBTYPE_INTERFACE_NAME:
+ case SD_LLDP_CHASSIS_SUBTYPE_LOCALLY_ASSIGNED:
+ k = cescape_length((char*) n->id.chassis_id + 1, n->id.chassis_id_size - 1);
+ if (!k)
+ return -ENOMEM;
+
+ goto done;
+
+ case SD_LLDP_CHASSIS_SUBTYPE_MAC_ADDRESS:
+ r = format_mac_address(n->id.chassis_id, n->id.chassis_id_size, &k);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ goto done;
+
+ break;
+
+ case SD_LLDP_CHASSIS_SUBTYPE_NETWORK_ADDRESS:
+ r = format_network_address(n->id.chassis_id, n->id.chassis_id_size, &k);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ goto done;
+
+ break;
+ }
+
+ /* Generic fallback */
+ k = hexmem(n->id.chassis_id, n->id.chassis_id_size);
+ if (!k)
+ return -ENOMEM;
+
+done:
+ *ret = n->chassis_id_as_string = k;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_port_id(sd_lldp_neighbor *n, uint8_t *type, const void **ret, size_t *size) {
+ assert_return(n, -EINVAL);
+ assert_return(type, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(size, -EINVAL);
+
+ assert(n->id.port_id_size > 0);
+
+ *type = *(uint8_t*) n->id.port_id;
+ *ret = (uint8_t*) n->id.port_id + 1;
+ *size = n->id.port_id_size - 1;
+
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_port_id_as_string(sd_lldp_neighbor *n, const char **ret) {
+ char *k;
+ int r;
+
+ assert_return(n, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (n->port_id_as_string) {
+ *ret = n->port_id_as_string;
+ return 0;
+ }
+
+ assert(n->id.port_id_size > 0);
+
+ switch (*(uint8_t*) n->id.port_id) {
+
+ case SD_LLDP_PORT_SUBTYPE_INTERFACE_ALIAS:
+ case SD_LLDP_PORT_SUBTYPE_PORT_COMPONENT:
+ case SD_LLDP_PORT_SUBTYPE_INTERFACE_NAME:
+ case SD_LLDP_PORT_SUBTYPE_LOCALLY_ASSIGNED:
+ k = cescape_length((char*) n->id.port_id + 1, n->id.port_id_size - 1);
+ if (!k)
+ return -ENOMEM;
+
+ goto done;
+
+ case SD_LLDP_PORT_SUBTYPE_MAC_ADDRESS:
+ r = format_mac_address(n->id.port_id, n->id.port_id_size, &k);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ goto done;
+
+ break;
+
+ case SD_LLDP_PORT_SUBTYPE_NETWORK_ADDRESS:
+ r = format_network_address(n->id.port_id, n->id.port_id_size, &k);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ goto done;
+
+ break;
+ }
+
+ /* Generic fallback */
+ k = hexmem(n->id.port_id, n->id.port_id_size);
+ if (!k)
+ return -ENOMEM;
+
+done:
+ *ret = n->port_id_as_string = k;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_ttl(sd_lldp_neighbor *n, uint16_t *ret_sec) {
+ assert_return(n, -EINVAL);
+ assert_return(ret_sec, -EINVAL);
+
+ *ret_sec = n->ttl;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_system_name(sd_lldp_neighbor *n, const char **ret) {
+ assert_return(n, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!n->system_name)
+ return -ENODATA;
+
+ *ret = n->system_name;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_system_description(sd_lldp_neighbor *n, const char **ret) {
+ assert_return(n, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!n->system_description)
+ return -ENODATA;
+
+ *ret = n->system_description;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_port_description(sd_lldp_neighbor *n, const char **ret) {
+ assert_return(n, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!n->port_description)
+ return -ENODATA;
+
+ *ret = n->port_description;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_mud_url(sd_lldp_neighbor *n, const char **ret) {
+ assert_return(n, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!n->mud_url)
+ return -ENODATA;
+
+ *ret = n->mud_url;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_system_capabilities(sd_lldp_neighbor *n, uint16_t *ret) {
+ assert_return(n, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!n->has_capabilities)
+ return -ENODATA;
+
+ *ret = n->system_capabilities;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_enabled_capabilities(sd_lldp_neighbor *n, uint16_t *ret) {
+ assert_return(n, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!n->has_capabilities)
+ return -ENODATA;
+
+ *ret = n->enabled_capabilities;
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_from_raw(sd_lldp_neighbor **ret, const void *raw, size_t raw_size) {
+ _cleanup_(sd_lldp_neighbor_unrefp) sd_lldp_neighbor *n = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(raw || raw_size <= 0, -EINVAL);
+
+ n = lldp_neighbor_new(raw_size);
+ if (!n)
+ return -ENOMEM;
+
+ memcpy(LLDP_NEIGHBOR_RAW(n), raw, raw_size);
+ r = lldp_neighbor_parse(n);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(n);
+
+ return r;
+}
+
+_public_ int sd_lldp_neighbor_tlv_rewind(sd_lldp_neighbor *n) {
+ assert_return(n, -EINVAL);
+
+ assert(n->raw_size >= sizeof(struct ether_header));
+ n->rindex = sizeof(struct ether_header);
+
+ return n->rindex < n->raw_size;
+}
+
+_public_ int sd_lldp_neighbor_tlv_next(sd_lldp_neighbor *n) {
+ size_t length;
+
+ assert_return(n, -EINVAL);
+
+ if (n->rindex == n->raw_size) /* EOF */
+ return -ESPIPE;
+
+ if (n->rindex + 2 > n->raw_size) /* Truncated message */
+ return -EBADMSG;
+
+ length = LLDP_NEIGHBOR_TLV_LENGTH(n);
+ if (n->rindex + 2 + length > n->raw_size)
+ return -EBADMSG;
+
+ n->rindex += 2 + length;
+ return n->rindex < n->raw_size;
+}
+
+_public_ int sd_lldp_neighbor_tlv_get_type(sd_lldp_neighbor *n, uint8_t *type) {
+ assert_return(n, -EINVAL);
+ assert_return(type, -EINVAL);
+
+ if (n->rindex == n->raw_size) /* EOF */
+ return -ESPIPE;
+
+ if (n->rindex + 2 > n->raw_size)
+ return -EBADMSG;
+
+ *type = LLDP_NEIGHBOR_TLV_TYPE(n);
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_tlv_is_type(sd_lldp_neighbor *n, uint8_t type) {
+ uint8_t k;
+ int r;
+
+ assert_return(n, -EINVAL);
+
+ r = sd_lldp_neighbor_tlv_get_type(n, &k);
+ if (r < 0)
+ return r;
+
+ return type == k;
+}
+
+_public_ int sd_lldp_neighbor_tlv_get_oui(sd_lldp_neighbor *n, uint8_t oui[_SD_ARRAY_STATIC 3], uint8_t *subtype) {
+ const uint8_t *d;
+ size_t length;
+ int r;
+
+ assert_return(n, -EINVAL);
+ assert_return(oui, -EINVAL);
+ assert_return(subtype, -EINVAL);
+
+ r = sd_lldp_neighbor_tlv_is_type(n, SD_LLDP_TYPE_PRIVATE);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENXIO;
+
+ length = LLDP_NEIGHBOR_TLV_LENGTH(n);
+ if (length < 4)
+ return -EBADMSG;
+
+ if (n->rindex + 2 + length > n->raw_size)
+ return -EBADMSG;
+
+ d = LLDP_NEIGHBOR_TLV_DATA(n);
+ memcpy(oui, d, 3);
+ *subtype = d[3];
+
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_tlv_is_oui(sd_lldp_neighbor *n, const uint8_t oui[_SD_ARRAY_STATIC 3], uint8_t subtype) {
+ uint8_t k[3], st;
+ int r;
+
+ r = sd_lldp_neighbor_tlv_get_oui(n, k, &st);
+ if (r == -ENXIO)
+ return 0;
+ if (r < 0)
+ return r;
+
+ return memcmp(k, oui, 3) == 0 && st == subtype;
+}
+
+_public_ int sd_lldp_neighbor_tlv_get_raw(sd_lldp_neighbor *n, const void **ret, size_t *size) {
+ size_t length;
+
+ assert_return(n, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(size, -EINVAL);
+
+ /* Note that this returns the full TLV, including the TLV header */
+
+ if (n->rindex + 2 > n->raw_size)
+ return -EBADMSG;
+
+ length = LLDP_NEIGHBOR_TLV_LENGTH(n);
+ if (n->rindex + 2 + length > n->raw_size)
+ return -EBADMSG;
+
+ *ret = (uint8_t*) LLDP_NEIGHBOR_RAW(n) + n->rindex;
+ *size = length + 2;
+
+ return 0;
+}
+
+_public_ int sd_lldp_neighbor_get_timestamp(sd_lldp_neighbor *n, clockid_t clock, uint64_t *ret) {
+ assert_return(n, -EINVAL);
+ assert_return(TRIPLE_TIMESTAMP_HAS_CLOCK(clock), -EOPNOTSUPP);
+ assert_return(clock_supported(clock), -EOPNOTSUPP);
+ assert_return(ret, -EINVAL);
+
+ if (!triple_timestamp_is_set(&n->timestamp))
+ return -ENODATA;
+
+ *ret = triple_timestamp_by_clock(&n->timestamp, clock);
+ return 0;
+}
diff --git a/src/libsystemd-network/lldp-neighbor.h b/src/libsystemd-network/lldp-neighbor.h
new file mode 100644
index 0000000..a5718c8
--- /dev/null
+++ b/src/libsystemd-network/lldp-neighbor.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-lldp.h"
+
+#include "hash-funcs.h"
+#include "lldp-internal.h"
+#include "time-util.h"
+
+typedef struct LLDPNeighborID {
+ /* The spec calls this an "MSAP identifier" */
+ void *chassis_id;
+ size_t chassis_id_size;
+
+ void *port_id;
+ size_t port_id_size;
+} LLDPNeighborID;
+
+struct sd_lldp_neighbor {
+ /* Neighbor objects stay around as long as they are linked into an "sd_lldp" object or n_ref > 0. */
+ sd_lldp *lldp;
+ unsigned n_ref;
+
+ triple_timestamp timestamp;
+
+ usec_t until;
+ unsigned prioq_idx;
+
+ struct ether_addr source_address;
+ struct ether_addr destination_address;
+
+ LLDPNeighborID id;
+
+ /* The raw packet size. The data is appended to the object, accessible via LLDP_NEIGHBOR_RAW() */
+ size_t raw_size;
+
+ /* The current read index for the iterative TLV interface */
+ size_t rindex;
+
+ /* And a couple of fields parsed out. */
+ bool has_ttl:1;
+ bool has_capabilities:1;
+ bool has_port_vlan_id:1;
+
+ uint16_t ttl;
+
+ uint16_t system_capabilities;
+ uint16_t enabled_capabilities;
+
+ char *port_description;
+ char *system_name;
+ char *system_description;
+ char *mud_url;
+
+ uint16_t port_vlan_id;
+
+ char *chassis_id_as_string;
+ char *port_id_as_string;
+};
+
+static inline void *LLDP_NEIGHBOR_RAW(const sd_lldp_neighbor *n) {
+ return (uint8_t*) n + ALIGN(sizeof(sd_lldp_neighbor));
+}
+
+static inline uint8_t LLDP_NEIGHBOR_TLV_TYPE(const sd_lldp_neighbor *n) {
+ return ((uint8_t*) LLDP_NEIGHBOR_RAW(n))[n->rindex] >> 1;
+}
+
+static inline size_t LLDP_NEIGHBOR_TLV_LENGTH(const sd_lldp_neighbor *n) {
+ uint8_t *p;
+
+ p = (uint8_t*) LLDP_NEIGHBOR_RAW(n) + n->rindex;
+ return p[1] + (((size_t) (p[0] & 1)) << 8);
+}
+
+static inline void* LLDP_NEIGHBOR_TLV_DATA(const sd_lldp_neighbor *n) {
+ return ((uint8_t*) LLDP_NEIGHBOR_RAW(n)) + n->rindex + 2;
+}
+
+extern const struct hash_ops lldp_neighbor_hash_ops;
+int lldp_neighbor_id_compare_func(const LLDPNeighborID *x, const LLDPNeighborID *y);
+int lldp_neighbor_prioq_compare_func(const void *a, const void *b);
+
+sd_lldp_neighbor *lldp_neighbor_unlink(sd_lldp_neighbor *n);
+sd_lldp_neighbor *lldp_neighbor_new(size_t raw_size);
+int lldp_neighbor_parse(sd_lldp_neighbor *n);
+void lldp_neighbor_start_ttl(sd_lldp_neighbor *n);
+bool lldp_neighbor_equal(const sd_lldp_neighbor *a, const sd_lldp_neighbor *b);
diff --git a/src/libsystemd-network/lldp-network.c b/src/libsystemd-network/lldp-network.c
new file mode 100644
index 0000000..9616cb6
--- /dev/null
+++ b/src/libsystemd-network/lldp-network.c
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/filter.h>
+#include <netinet/if_ether.h>
+
+#include "fd-util.h"
+#include "lldp-network.h"
+#include "missing_network.h"
+#include "socket-util.h"
+
+int lldp_network_bind_raw_socket(int ifindex) {
+
+ static const struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct ethhdr, h_dest)), /* A <- 4 bytes of destination MAC */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x0180c200, 1, 0), /* A != 01:80:c2:00 */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* drop packet */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ethhdr, h_dest) + 4), /* A <- remaining 2 bytes of destination MAC */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x0000, 3, 0), /* A != 00:00 */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x0003, 2, 0), /* A != 00:03 */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x000e, 1, 0), /* A != 00:0e */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* drop packet */
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ethhdr, h_proto)), /* A <- protocol */
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_LLDP, 1, 0), /* A != ETHERTYPE_LLDP */
+ BPF_STMT(BPF_RET + BPF_K, 0), /* drop packet */
+ BPF_STMT(BPF_RET + BPF_K, (uint32_t) -1), /* accept packet */
+ };
+
+ static const struct sock_fprog fprog = {
+ .len = ELEMENTSOF(filter),
+ .filter = (struct sock_filter*) filter,
+ };
+
+ struct packet_mreq mreq = {
+ .mr_ifindex = ifindex,
+ .mr_type = PACKET_MR_MULTICAST,
+ .mr_alen = ETH_ALEN,
+ .mr_address = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x00 }
+ };
+
+ union sockaddr_union saddrll = {
+ .ll.sll_family = AF_PACKET,
+ .ll.sll_ifindex = ifindex,
+ };
+
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(ifindex > 0);
+
+ fd = socket(AF_PACKET, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK,
+ htobe16(ETHERTYPE_LLDP));
+ if (fd < 0)
+ return -errno;
+
+ r = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &fprog, sizeof(fprog));
+ if (r < 0)
+ return -errno;
+
+ r = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq));
+ if (r < 0)
+ return -errno;
+
+ mreq.mr_address[ETH_ALEN - 1] = 0x03;
+ r = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq));
+ if (r < 0)
+ return -errno;
+
+ mreq.mr_address[ETH_ALEN - 1] = 0x0E;
+ r = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq));
+ if (r < 0)
+ return -errno;
+
+ r = bind(fd, &saddrll.sa, sizeof(saddrll.ll));
+ if (r < 0)
+ return -errno;
+
+ return TAKE_FD(fd);
+}
diff --git a/src/libsystemd-network/lldp-network.h b/src/libsystemd-network/lldp-network.h
new file mode 100644
index 0000000..bc69b32
--- /dev/null
+++ b/src/libsystemd-network/lldp-network.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-event.h"
+
+int lldp_network_bind_raw_socket(int ifindex);
diff --git a/src/libsystemd-network/meson.build b/src/libsystemd-network/meson.build
new file mode 100644
index 0000000..604cfd9
--- /dev/null
+++ b/src/libsystemd-network/meson.build
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+sources = files('''
+ sd-dhcp-client.c
+ sd-dhcp-server.c
+ dhcp-client-internal.h
+ dhcp-network.c
+ dhcp-option.c
+ dhcp-packet.c
+ dhcp-internal.h
+ dhcp-server-internal.h
+ dhcp-protocol.h
+ dhcp-lease-internal.h
+ sd-dhcp-lease.c
+ sd-ipv4ll.c
+ sd-ipv4acd.c
+ arp-util.h
+ arp-util.c
+ network-internal.c
+ sd-ndisc.c
+ ndisc-internal.h
+ ndisc-router.h
+ ndisc-router.c
+ sd-radv.c
+ radv-internal.h
+ icmp6-util.h
+ icmp6-util.c
+ sd-dhcp6-client.c
+ dhcp6-internal.h
+ dhcp6-protocol.h
+ dhcp6-network.c
+ dhcp6-option.c
+ dhcp6-lease-internal.h
+ sd-dhcp6-lease.c
+ dhcp-identifier.h
+ dhcp-identifier.c
+ lldp-internal.h
+ lldp-network.h
+ lldp-network.c
+ lldp-neighbor.h
+ lldp-neighbor.c
+ sd-lldp.c
+'''.split())
+
+network_internal_h = files('network-internal.h')
+
+libsystemd_network = static_library(
+ 'systemd-network',
+ sources,
+ network_internal_h,
+ include_directories : includes)
diff --git a/src/libsystemd-network/ndisc-internal.h b/src/libsystemd-network/ndisc-internal.h
new file mode 100644
index 0000000..65f9371
--- /dev/null
+++ b/src/libsystemd-network/ndisc-internal.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include "log.h"
+#include "time-util.h"
+
+#include "sd-ndisc.h"
+
+#define NDISC_ROUTER_SOLICITATION_INTERVAL (4U * USEC_PER_SEC)
+#define NDISC_MAX_ROUTER_SOLICITATION_INTERVAL (3600U * USEC_PER_SEC)
+#define NDISC_MAX_ROUTER_SOLICITATIONS 3U
+
+struct sd_ndisc {
+ unsigned n_ref;
+
+ int ifindex;
+ int fd;
+
+ sd_event *event;
+ int event_priority;
+
+ struct ether_addr mac_addr;
+ uint8_t hop_limit;
+ uint32_t mtu;
+
+ sd_event_source *recv_event_source;
+ sd_event_source *timeout_event_source;
+ sd_event_source *timeout_no_ra;
+
+ usec_t retransmit_time;
+
+ sd_ndisc_callback_t callback;
+ void *userdata;
+};
+
+#define log_ndisc_errno(error, fmt, ...) log_internal(LOG_DEBUG, error, PROJECT_FILE, __LINE__, __func__, "NDISC: " fmt, ##__VA_ARGS__)
+#define log_ndisc(fmt, ...) log_ndisc_errno(0, fmt, ##__VA_ARGS__)
+
+const char* ndisc_event_to_string(sd_ndisc_event e) _const_;
+sd_ndisc_event ndisc_event_from_string(const char *s) _pure_;
diff --git a/src/libsystemd-network/ndisc-router.c b/src/libsystemd-network/ndisc-router.c
new file mode 100644
index 0000000..3cb71db
--- /dev/null
+++ b/src/libsystemd-network/ndisc-router.c
@@ -0,0 +1,750 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <netinet/icmp6.h>
+
+#include "sd-ndisc.h"
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "hostname-util.h"
+#include "memory-util.h"
+#include "missing_network.h"
+#include "ndisc-internal.h"
+#include "ndisc-router.h"
+#include "strv.h"
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_ndisc_router, sd_ndisc_router, mfree);
+
+sd_ndisc_router *ndisc_router_new(size_t raw_size) {
+ sd_ndisc_router *rt;
+
+ rt = malloc0(ALIGN(sizeof(sd_ndisc_router)) + raw_size);
+ if (!rt)
+ return NULL;
+
+ rt->raw_size = raw_size;
+ rt->n_ref = 1;
+
+ return rt;
+}
+
+_public_ int sd_ndisc_router_from_raw(sd_ndisc_router **ret, const void *raw, size_t raw_size) {
+ _cleanup_(sd_ndisc_router_unrefp) sd_ndisc_router *rt = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(raw || raw_size <= 0, -EINVAL);
+
+ rt = ndisc_router_new(raw_size);
+ if (!rt)
+ return -ENOMEM;
+
+ memcpy(NDISC_ROUTER_RAW(rt), raw, raw_size);
+ r = ndisc_router_parse(rt);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(rt);
+
+ return r;
+}
+
+_public_ int sd_ndisc_router_get_address(sd_ndisc_router *rt, struct in6_addr *ret_addr) {
+ assert_return(rt, -EINVAL);
+ assert_return(ret_addr, -EINVAL);
+
+ if (IN6_IS_ADDR_UNSPECIFIED(&rt->address))
+ return -ENODATA;
+
+ *ret_addr = rt->address;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_get_timestamp(sd_ndisc_router *rt, clockid_t clock, uint64_t *ret) {
+ assert_return(rt, -EINVAL);
+ assert_return(TRIPLE_TIMESTAMP_HAS_CLOCK(clock), -EOPNOTSUPP);
+ assert_return(clock_supported(clock), -EOPNOTSUPP);
+ assert_return(ret, -EINVAL);
+
+ if (!triple_timestamp_is_set(&rt->timestamp))
+ return -ENODATA;
+
+ *ret = triple_timestamp_by_clock(&rt->timestamp, clock);
+ return 0;
+}
+
+_public_ int sd_ndisc_router_get_raw(sd_ndisc_router *rt, const void **ret, size_t *size) {
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(size, -EINVAL);
+
+ *ret = NDISC_ROUTER_RAW(rt);
+ *size = rt->raw_size;
+
+ return 0;
+}
+
+int ndisc_router_parse(sd_ndisc_router *rt) {
+ struct nd_router_advert *a;
+ const uint8_t *p;
+ bool has_mtu = false, has_flag_extension = false;
+ size_t left;
+
+ assert(rt);
+
+ if (rt->raw_size < sizeof(struct nd_router_advert)) {
+ log_ndisc("Too small to be a router advertisement, ignoring.");
+ return -EBADMSG;
+ }
+
+ /* Router advertisement packets are neatly aligned to 64bit boundaries, hence we can access them directly */
+ a = NDISC_ROUTER_RAW(rt);
+
+ if (a->nd_ra_type != ND_ROUTER_ADVERT) {
+ log_ndisc("Received ND packet that is not a router advertisement, ignoring.");
+ return -EBADMSG;
+ }
+
+ if (a->nd_ra_code != 0) {
+ log_ndisc("Received ND packet with wrong RA code, ignoring.");
+ return -EBADMSG;
+ }
+
+ rt->hop_limit = a->nd_ra_curhoplimit;
+ rt->flags = a->nd_ra_flags_reserved; /* the first 8bit */
+ rt->lifetime = be16toh(a->nd_ra_router_lifetime);
+
+ rt->preference = (rt->flags >> 3) & 3;
+ if (!IN_SET(rt->preference, SD_NDISC_PREFERENCE_LOW, SD_NDISC_PREFERENCE_HIGH))
+ rt->preference = SD_NDISC_PREFERENCE_MEDIUM;
+
+ p = (const uint8_t*) NDISC_ROUTER_RAW(rt) + sizeof(struct nd_router_advert);
+ left = rt->raw_size - sizeof(struct nd_router_advert);
+
+ for (;;) {
+ uint8_t type;
+ size_t length;
+
+ if (left == 0)
+ break;
+
+ if (left < 2) {
+ log_ndisc("Option lacks header, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ type = p[0];
+ length = p[1] * 8;
+
+ if (length == 0) {
+ log_ndisc("Zero-length option, ignoring datagram.");
+ return -EBADMSG;
+ }
+ if (left < length) {
+ log_ndisc("Option truncated, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ switch (type) {
+
+ case SD_NDISC_OPTION_PREFIX_INFORMATION:
+
+ if (length != 4*8) {
+ log_ndisc("Prefix option of invalid size, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ if (p[2] > 128) {
+ log_ndisc("Bad prefix length, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ break;
+
+ case SD_NDISC_OPTION_MTU: {
+ uint32_t m;
+
+ if (has_mtu) {
+ log_ndisc("MTU option specified twice, ignoring.");
+ break;
+ }
+
+ if (length != 8) {
+ log_ndisc("MTU option of invalid size, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ m = be32toh(*(uint32_t*) (p + 4));
+ if (m >= IPV6_MIN_MTU) /* ignore invalidly small MTUs */
+ rt->mtu = m;
+
+ has_mtu = true;
+ break;
+ }
+
+ case SD_NDISC_OPTION_ROUTE_INFORMATION:
+ if (length < 1*8 || length > 3*8) {
+ log_ndisc("Route information option of invalid size, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ if (p[2] > 128) {
+ log_ndisc("Bad route prefix length, ignoring datagram.");
+ return -EBADMSG;
+ }
+
+ break;
+
+ case SD_NDISC_OPTION_RDNSS:
+ if (length < 3*8 || (length % (2*8)) != 1*8) {
+ log_ndisc("RDNSS option has invalid size.");
+ return -EBADMSG;
+ }
+
+ break;
+
+ case SD_NDISC_OPTION_FLAGS_EXTENSION:
+
+ if (has_flag_extension) {
+ log_ndisc("Flags extension option specified twice, ignoring.");
+ break;
+ }
+
+ if (length < 1*8) {
+ log_ndisc("Flags extension option has invalid size.");
+ return -EBADMSG;
+ }
+
+ /* Add in the additional flags bits */
+ rt->flags |=
+ ((uint64_t) p[2] << 8) |
+ ((uint64_t) p[3] << 16) |
+ ((uint64_t) p[4] << 24) |
+ ((uint64_t) p[5] << 32) |
+ ((uint64_t) p[6] << 40) |
+ ((uint64_t) p[7] << 48);
+
+ has_flag_extension = true;
+ break;
+
+ case SD_NDISC_OPTION_DNSSL:
+ if (length < 2*8) {
+ log_ndisc("DNSSL option has invalid size.");
+ return -EBADMSG;
+ }
+
+ break;
+ }
+
+ p += length, left -= length;
+ }
+
+ rt->rindex = sizeof(struct nd_router_advert);
+ return 0;
+}
+
+_public_ int sd_ndisc_router_get_hop_limit(sd_ndisc_router *rt, uint8_t *ret) {
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ *ret = rt->hop_limit;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_get_flags(sd_ndisc_router *rt, uint64_t *ret_flags) {
+ assert_return(rt, -EINVAL);
+ assert_return(ret_flags, -EINVAL);
+
+ *ret_flags = rt->flags;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_get_lifetime(sd_ndisc_router *rt, uint16_t *ret_lifetime) {
+ assert_return(rt, -EINVAL);
+ assert_return(ret_lifetime, -EINVAL);
+
+ *ret_lifetime = rt->lifetime;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_get_preference(sd_ndisc_router *rt, unsigned *ret) {
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ *ret = rt->preference;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_get_mtu(sd_ndisc_router *rt, uint32_t *ret) {
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (rt->mtu <= 0)
+ return -ENODATA;
+
+ *ret = rt->mtu;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_option_rewind(sd_ndisc_router *rt) {
+ assert_return(rt, -EINVAL);
+
+ assert(rt->raw_size >= sizeof(struct nd_router_advert));
+ rt->rindex = sizeof(struct nd_router_advert);
+
+ return rt->rindex < rt->raw_size;
+}
+
+_public_ int sd_ndisc_router_option_next(sd_ndisc_router *rt) {
+ size_t length;
+
+ assert_return(rt, -EINVAL);
+
+ if (rt->rindex == rt->raw_size) /* EOF */
+ return -ESPIPE;
+
+ if (rt->rindex + 2 > rt->raw_size) /* Truncated message */
+ return -EBADMSG;
+
+ length = NDISC_ROUTER_OPTION_LENGTH(rt);
+ if (rt->rindex + length > rt->raw_size)
+ return -EBADMSG;
+
+ rt->rindex += length;
+ return rt->rindex < rt->raw_size;
+}
+
+_public_ int sd_ndisc_router_option_get_type(sd_ndisc_router *rt, uint8_t *ret) {
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (rt->rindex == rt->raw_size) /* EOF */
+ return -ESPIPE;
+
+ if (rt->rindex + 2 > rt->raw_size) /* Truncated message */
+ return -EBADMSG;
+
+ *ret = NDISC_ROUTER_OPTION_TYPE(rt);
+ return 0;
+}
+
+_public_ int sd_ndisc_router_option_is_type(sd_ndisc_router *rt, uint8_t type) {
+ uint8_t k;
+ int r;
+
+ assert_return(rt, -EINVAL);
+
+ r = sd_ndisc_router_option_get_type(rt, &k);
+ if (r < 0)
+ return r;
+
+ return type == k;
+}
+
+_public_ int sd_ndisc_router_option_get_raw(sd_ndisc_router *rt, const void **ret, size_t *size) {
+ size_t length;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(size, -EINVAL);
+
+ /* Note that this returns the full option, including the option header */
+
+ if (rt->rindex + 2 > rt->raw_size)
+ return -EBADMSG;
+
+ length = NDISC_ROUTER_OPTION_LENGTH(rt);
+ if (rt->rindex + length > rt->raw_size)
+ return -EBADMSG;
+
+ *ret = (uint8_t*) NDISC_ROUTER_RAW(rt) + rt->rindex;
+ *size = length;
+
+ return 0;
+}
+
+static int get_prefix_info(sd_ndisc_router *rt, struct nd_opt_prefix_info **ret) {
+ struct nd_opt_prefix_info *ri;
+ size_t length;
+ int r;
+
+ assert(rt);
+ assert(ret);
+
+ r = sd_ndisc_router_option_is_type(rt, SD_NDISC_OPTION_PREFIX_INFORMATION);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EMEDIUMTYPE;
+
+ length = NDISC_ROUTER_OPTION_LENGTH(rt);
+ if (length != sizeof(struct nd_opt_prefix_info))
+ return -EBADMSG;
+
+ ri = (struct nd_opt_prefix_info*) ((uint8_t*) NDISC_ROUTER_RAW(rt) + rt->rindex);
+ if (ri->nd_opt_pi_prefix_len > 128)
+ return -EBADMSG;
+
+ *ret = ri;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_prefix_get_valid_lifetime(sd_ndisc_router *rt, uint32_t *ret) {
+ struct nd_opt_prefix_info *ri;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_prefix_info(rt, &ri);
+ if (r < 0)
+ return r;
+
+ *ret = be32toh(ri->nd_opt_pi_valid_time);
+ return 0;
+}
+
+_public_ int sd_ndisc_router_prefix_get_preferred_lifetime(sd_ndisc_router *rt, uint32_t *ret) {
+ struct nd_opt_prefix_info *pi;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_prefix_info(rt, &pi);
+ if (r < 0)
+ return r;
+
+ *ret = be32toh(pi->nd_opt_pi_preferred_time);
+ return 0;
+}
+
+_public_ int sd_ndisc_router_prefix_get_flags(sd_ndisc_router *rt, uint8_t *ret) {
+ struct nd_opt_prefix_info *pi;
+ uint8_t flags;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_prefix_info(rt, &pi);
+ if (r < 0)
+ return r;
+
+ flags = pi->nd_opt_pi_flags_reserved;
+
+ if ((flags & ND_OPT_PI_FLAG_AUTO) && (pi->nd_opt_pi_prefix_len != 64)) {
+ log_ndisc("Invalid prefix length, ignoring prefix for stateless autoconfiguration.");
+ flags &= ~ND_OPT_PI_FLAG_AUTO;
+ }
+
+ *ret = flags;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_prefix_get_address(sd_ndisc_router *rt, struct in6_addr *ret_addr) {
+ struct nd_opt_prefix_info *pi;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret_addr, -EINVAL);
+
+ r = get_prefix_info(rt, &pi);
+ if (r < 0)
+ return r;
+
+ *ret_addr = pi->nd_opt_pi_prefix;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_prefix_get_prefixlen(sd_ndisc_router *rt, unsigned *ret) {
+ struct nd_opt_prefix_info *pi;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_prefix_info(rt, &pi);
+ if (r < 0)
+ return r;
+
+ if (pi->nd_opt_pi_prefix_len > 128)
+ return -EBADMSG;
+
+ *ret = pi->nd_opt_pi_prefix_len;
+ return 0;
+}
+
+static int get_route_info(sd_ndisc_router *rt, uint8_t **ret) {
+ uint8_t *ri;
+ size_t length;
+ int r;
+
+ assert(rt);
+ assert(ret);
+
+ r = sd_ndisc_router_option_is_type(rt, SD_NDISC_OPTION_ROUTE_INFORMATION);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EMEDIUMTYPE;
+
+ length = NDISC_ROUTER_OPTION_LENGTH(rt);
+ if (length < 1*8 || length > 3*8)
+ return -EBADMSG;
+
+ ri = (uint8_t*) NDISC_ROUTER_RAW(rt) + rt->rindex;
+
+ if (ri[2] > 128)
+ return -EBADMSG;
+
+ *ret = ri;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_route_get_lifetime(sd_ndisc_router *rt, uint32_t *ret) {
+ uint8_t *ri;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_route_info(rt, &ri);
+ if (r < 0)
+ return r;
+
+ *ret = be32toh(*(uint32_t*) (ri + 4));
+ return 0;
+}
+
+_public_ int sd_ndisc_router_route_get_address(sd_ndisc_router *rt, struct in6_addr *ret_addr) {
+ uint8_t *ri;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret_addr, -EINVAL);
+
+ r = get_route_info(rt, &ri);
+ if (r < 0)
+ return r;
+
+ zero(*ret_addr);
+ memcpy(ret_addr, ri + 8, NDISC_ROUTER_OPTION_LENGTH(rt) - 8);
+
+ return 0;
+}
+
+_public_ int sd_ndisc_router_route_get_prefixlen(sd_ndisc_router *rt, unsigned *ret) {
+ uint8_t *ri;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_route_info(rt, &ri);
+ if (r < 0)
+ return r;
+
+ *ret = ri[2];
+ return 0;
+}
+
+_public_ int sd_ndisc_router_route_get_preference(sd_ndisc_router *rt, unsigned *ret) {
+ uint8_t *ri;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_route_info(rt, &ri);
+ if (r < 0)
+ return r;
+
+ *ret = (ri[3] >> 3) & 3;
+ if (!IN_SET(*ret, SD_NDISC_PREFERENCE_LOW, SD_NDISC_PREFERENCE_HIGH))
+ *ret = SD_NDISC_PREFERENCE_MEDIUM;
+
+ return 0;
+}
+
+static int get_rdnss_info(sd_ndisc_router *rt, uint8_t **ret) {
+ size_t length;
+ int r;
+
+ assert(rt);
+ assert(ret);
+
+ r = sd_ndisc_router_option_is_type(rt, SD_NDISC_OPTION_RDNSS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EMEDIUMTYPE;
+
+ length = NDISC_ROUTER_OPTION_LENGTH(rt);
+ if (length < 3*8 || (length % (2*8)) != 1*8)
+ return -EBADMSG;
+
+ *ret = (uint8_t*) NDISC_ROUTER_RAW(rt) + rt->rindex;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_rdnss_get_addresses(sd_ndisc_router *rt, const struct in6_addr **ret) {
+ uint8_t *ri;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_rdnss_info(rt, &ri);
+ if (r < 0)
+ return r;
+
+ *ret = (const struct in6_addr*) (ri + 8);
+ return (NDISC_ROUTER_OPTION_LENGTH(rt) - 8) / 16;
+}
+
+_public_ int sd_ndisc_router_rdnss_get_lifetime(sd_ndisc_router *rt, uint32_t *ret) {
+ uint8_t *ri;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_rdnss_info(rt, &ri);
+ if (r < 0)
+ return r;
+
+ *ret = be32toh(*(uint32_t*) (ri + 4));
+ return 0;
+}
+
+static int get_dnssl_info(sd_ndisc_router *rt, uint8_t **ret) {
+ size_t length;
+ int r;
+
+ assert(rt);
+ assert(ret);
+
+ r = sd_ndisc_router_option_is_type(rt, SD_NDISC_OPTION_DNSSL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EMEDIUMTYPE;
+
+ length = NDISC_ROUTER_OPTION_LENGTH(rt);
+ if (length < 2*8)
+ return -EBADMSG;
+
+ *ret = (uint8_t*) NDISC_ROUTER_RAW(rt) + rt->rindex;
+ return 0;
+}
+
+_public_ int sd_ndisc_router_dnssl_get_domains(sd_ndisc_router *rt, char ***ret) {
+ _cleanup_strv_free_ char **l = NULL;
+ _cleanup_free_ char *e = NULL;
+ size_t allocated = 0, n = 0, left;
+ uint8_t *ri, *p;
+ bool first = true;
+ int r;
+ unsigned k = 0;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = get_dnssl_info(rt, &ri);
+ if (r < 0)
+ return r;
+
+ p = ri + 8;
+ left = NDISC_ROUTER_OPTION_LENGTH(rt) - 8;
+
+ for (;;) {
+ if (left == 0) {
+
+ if (n > 0) /* Not properly NUL terminated */
+ return -EBADMSG;
+
+ break;
+ }
+
+ if (*p == 0) {
+ /* Found NUL termination */
+
+ if (n > 0) {
+ _cleanup_free_ char *normalized = NULL;
+
+ e[n] = 0;
+ r = dns_name_normalize(e, 0, &normalized);
+ if (r < 0)
+ return r;
+
+ /* Ignore the root domain name or "localhost" and friends */
+ if (!is_localhost(normalized) &&
+ !dns_name_is_root(normalized)) {
+
+ if (strv_push(&l, normalized) < 0)
+ return -ENOMEM;
+
+ normalized = NULL;
+ k++;
+ }
+ }
+
+ n = 0;
+ first = true;
+ p++, left--;
+ continue;
+ }
+
+ /* Check for compression (which is not allowed) */
+ if (*p > 63)
+ return -EBADMSG;
+
+ if (1U + *p + 1U > left)
+ return -EBADMSG;
+
+ if (!GREEDY_REALLOC(e, allocated, n + !first + DNS_LABEL_ESCAPED_MAX + 1U))
+ return -ENOMEM;
+
+ if (first)
+ first = false;
+ else
+ e[n++] = '.';
+
+ r = dns_label_escape((char*) p+1, *p, e + n, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ n += r;
+
+ left -= 1 + *p;
+ p += 1 + *p;
+ }
+
+ if (strv_isempty(l)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ *ret = TAKE_PTR(l);
+
+ return k;
+}
+
+_public_ int sd_ndisc_router_dnssl_get_lifetime(sd_ndisc_router *rt, uint32_t *ret_sec) {
+ uint8_t *ri;
+ int r;
+
+ assert_return(rt, -EINVAL);
+ assert_return(ret_sec, -EINVAL);
+
+ r = get_dnssl_info(rt, &ri);
+ if (r < 0)
+ return r;
+
+ *ret_sec = be32toh(*(uint32_t*) (ri + 4));
+ return 0;
+}
diff --git a/src/libsystemd-network/ndisc-router.h b/src/libsystemd-network/ndisc-router.h
new file mode 100644
index 0000000..cb3a564
--- /dev/null
+++ b/src/libsystemd-network/ndisc-router.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include "sd-ndisc.h"
+
+#include "time-util.h"
+
+struct sd_ndisc_router {
+ unsigned n_ref;
+
+ triple_timestamp timestamp;
+ struct in6_addr address;
+
+ /* The raw packet size. The data is appended to the object, accessible via NDIS_ROUTER_RAW() */
+ size_t raw_size;
+
+ /* The current read index for the iterative option interface */
+ size_t rindex;
+
+ uint64_t flags;
+ unsigned preference;
+ uint16_t lifetime;
+
+ uint8_t hop_limit;
+ uint32_t mtu;
+};
+
+static inline void* NDISC_ROUTER_RAW(const sd_ndisc_router *rt) {
+ return (uint8_t*) rt + ALIGN(sizeof(sd_ndisc_router));
+}
+
+static inline void *NDISC_ROUTER_OPTION_DATA(const sd_ndisc_router *rt) {
+ return ((uint8_t*) NDISC_ROUTER_RAW(rt)) + rt->rindex;
+}
+
+static inline uint8_t NDISC_ROUTER_OPTION_TYPE(const sd_ndisc_router *rt) {
+ return ((uint8_t*) NDISC_ROUTER_OPTION_DATA(rt))[0];
+}
+static inline size_t NDISC_ROUTER_OPTION_LENGTH(const sd_ndisc_router *rt) {
+ return ((uint8_t*) NDISC_ROUTER_OPTION_DATA(rt))[1] * 8;
+}
+
+sd_ndisc_router *ndisc_router_new(size_t raw_size);
+int ndisc_router_parse(sd_ndisc_router *rt);
diff --git a/src/libsystemd-network/network-internal.c b/src/libsystemd-network/network-internal.c
new file mode 100644
index 0000000..e4a0741
--- /dev/null
+++ b/src/libsystemd-network/network-internal.c
@@ -0,0 +1,886 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+#include <linux/if.h>
+#include <netinet/ether.h>
+
+#include "sd-id128.h"
+#include "sd-ndisc.h"
+
+#include "alloc-util.h"
+#include "arphrd-list.h"
+#include "condition.h"
+#include "conf-parser.h"
+#include "device-util.h"
+#include "dhcp-lease-internal.h"
+#include "env-util.h"
+#include "ether-addr-util.h"
+#include "hexdecoct.h"
+#include "log.h"
+#include "network-internal.h"
+#include "parse-util.h"
+#include "siphash24.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+#include "util.h"
+
+const char *net_get_name_persistent(sd_device *device) {
+ const char *name, *field;
+
+ assert(device);
+
+ /* fetch some persistent data unique (on this machine) to this device */
+ FOREACH_STRING(field, "ID_NET_NAME_ONBOARD", "ID_NET_NAME_SLOT", "ID_NET_NAME_PATH", "ID_NET_NAME_MAC")
+ if (sd_device_get_property_value(device, field, &name) >= 0)
+ return name;
+
+ return NULL;
+}
+
+#define HASH_KEY SD_ID128_MAKE(d3,1e,48,fa,90,fe,4b,4c,9d,af,d5,d7,a1,b1,2e,8a)
+
+int net_get_unique_predictable_data(sd_device *device, bool use_sysname, uint64_t *result) {
+ size_t l, sz = 0;
+ const char *name;
+ int r;
+ uint8_t *v;
+
+ assert(device);
+
+ /* net_get_name_persistent() will return one of the device names based on stable information about
+ * the device. If this is not available, we fall back to using the actual device name. */
+ name = net_get_name_persistent(device);
+ if (!name && use_sysname)
+ (void) sd_device_get_sysname(device, &name);
+ if (!name)
+ return log_device_debug_errno(device, SYNTHETIC_ERRNO(ENODATA),
+ "No stable identifying information found");
+
+ log_device_debug(device, "Using \"%s\" as stable identifying information", name);
+ l = strlen(name);
+ sz = sizeof(sd_id128_t) + l;
+ v = newa(uint8_t, sz);
+
+ /* Fetch some persistent data unique to this machine */
+ r = sd_id128_get_machine((sd_id128_t*) v);
+ if (r < 0)
+ return r;
+ memcpy(v + sizeof(sd_id128_t), name, l);
+
+ /* Let's hash the machine ID plus the device name. We use
+ * a fixed, but originally randomly created hash key here. */
+ *result = htole64(siphash24(v, sz, HASH_KEY.bytes));
+ return 0;
+}
+
+static bool net_condition_test_strv(char * const *patterns, const char *string) {
+ char * const *p;
+ bool match = false, has_positive_rule = false;
+
+ if (strv_isempty(patterns))
+ return true;
+
+ STRV_FOREACH(p, patterns) {
+ const char *q = *p;
+ bool invert;
+
+ invert = *q == '!';
+ q += invert;
+
+ if (!invert)
+ has_positive_rule = true;
+
+ if (string && fnmatch(q, string, 0) == 0) {
+ if (invert)
+ return false;
+ else
+ match = true;
+ }
+ }
+
+ return has_positive_rule ? match : true;
+}
+
+static bool net_condition_test_ifname(char * const *patterns, const char *ifname, char * const *alternative_names) {
+ if (net_condition_test_strv(patterns, ifname))
+ return true;
+
+ char * const *p;
+ STRV_FOREACH(p, alternative_names)
+ if (net_condition_test_strv(patterns, *p))
+ return true;
+
+ return false;
+}
+
+static int net_condition_test_property(char * const *match_property, sd_device *device) {
+ char * const *p;
+
+ if (strv_isempty(match_property))
+ return true;
+
+ STRV_FOREACH(p, match_property) {
+ _cleanup_free_ char *key = NULL;
+ const char *val, *dev_val;
+ bool invert, v;
+
+ invert = **p == '!';
+
+ val = strchr(*p + invert, '=');
+ if (!val)
+ return -EINVAL;
+
+ key = strndup(*p + invert, val - *p - invert);
+ if (!key)
+ return -ENOMEM;
+
+ val++;
+
+ v = device &&
+ sd_device_get_property_value(device, key, &dev_val) >= 0 &&
+ fnmatch(val, dev_val, 0) == 0;
+
+ if (invert ? v : !v)
+ return false;
+ }
+
+ return true;
+}
+
+static const char *const wifi_iftype_table[NL80211_IFTYPE_MAX+1] = {
+ [NL80211_IFTYPE_ADHOC] = "ad-hoc",
+ [NL80211_IFTYPE_STATION] = "station",
+ [NL80211_IFTYPE_AP] = "ap",
+ [NL80211_IFTYPE_AP_VLAN] = "ap-vlan",
+ [NL80211_IFTYPE_WDS] = "wds",
+ [NL80211_IFTYPE_MONITOR] = "monitor",
+ [NL80211_IFTYPE_MESH_POINT] = "mesh-point",
+ [NL80211_IFTYPE_P2P_CLIENT] = "p2p-client",
+ [NL80211_IFTYPE_P2P_GO] = "p2p-go",
+ [NL80211_IFTYPE_P2P_DEVICE] = "p2p-device",
+ [NL80211_IFTYPE_OCB] = "ocb",
+ [NL80211_IFTYPE_NAN] = "nan",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(wifi_iftype, enum nl80211_iftype);
+
+char *link_get_type_string(unsigned short iftype, sd_device *device) {
+ const char *t, *devtype;
+ char *p;
+
+ if (device &&
+ sd_device_get_devtype(device, &devtype) >= 0 &&
+ !isempty(devtype))
+ return strdup(devtype);
+
+ t = arphrd_to_name(iftype);
+ if (!t)
+ return NULL;
+
+ p = strdup(t);
+ if (!p)
+ return NULL;
+
+ ascii_strlower(p);
+ return p;
+}
+
+bool net_match_config(Set *match_mac,
+ Set *match_permanent_mac,
+ char * const *match_paths,
+ char * const *match_drivers,
+ char * const *match_iftypes,
+ char * const *match_names,
+ char * const *match_property,
+ char * const *match_wifi_iftype,
+ char * const *match_ssid,
+ Set *match_bssid,
+ sd_device *device,
+ const struct ether_addr *dev_mac,
+ const struct ether_addr *dev_permanent_mac,
+ const char *dev_driver,
+ unsigned short dev_iftype,
+ const char *dev_name,
+ char * const *alternative_names,
+ enum nl80211_iftype dev_wifi_iftype,
+ const char *dev_ssid,
+ const struct ether_addr *dev_bssid) {
+
+ _cleanup_free_ char *dev_iftype_str;
+ const char *dev_path = NULL;
+
+ dev_iftype_str = link_get_type_string(dev_iftype, device);
+
+ if (device) {
+ const char *mac_str;
+
+ (void) sd_device_get_property_value(device, "ID_PATH", &dev_path);
+ if (!dev_driver)
+ (void) sd_device_get_property_value(device, "ID_NET_DRIVER", &dev_driver);
+ if (!dev_name)
+ (void) sd_device_get_sysname(device, &dev_name);
+ if (!dev_mac &&
+ sd_device_get_sysattr_value(device, "address", &mac_str) >= 0)
+ dev_mac = ether_aton(mac_str);
+ }
+
+ if (match_mac && (!dev_mac || !set_contains(match_mac, dev_mac)))
+ return false;
+
+ if (match_permanent_mac &&
+ (!dev_permanent_mac ||
+ ether_addr_is_null(dev_permanent_mac) ||
+ !set_contains(match_permanent_mac, dev_permanent_mac)))
+ return false;
+
+ if (!net_condition_test_strv(match_paths, dev_path))
+ return false;
+
+ if (!net_condition_test_strv(match_drivers, dev_driver))
+ return false;
+
+ if (!net_condition_test_strv(match_iftypes, dev_iftype_str))
+ return false;
+
+ if (!net_condition_test_ifname(match_names, dev_name, alternative_names))
+ return false;
+
+ if (!net_condition_test_property(match_property, device))
+ return false;
+
+ if (!net_condition_test_strv(match_wifi_iftype, wifi_iftype_to_string(dev_wifi_iftype)))
+ return false;
+
+ if (!net_condition_test_strv(match_ssid, dev_ssid))
+ return false;
+
+ if (match_bssid && (!dev_bssid || !set_contains(match_bssid, dev_bssid)))
+ return false;
+
+ return true;
+}
+
+int config_parse_net_condition(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ConditionType cond = ltype;
+ Condition **list = data, *c;
+ bool negate;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *list = condition_free_list_type(*list, cond);
+ return 0;
+ }
+
+ negate = rvalue[0] == '!';
+ if (negate)
+ rvalue++;
+
+ c = condition_new(cond, rvalue, false, negate);
+ if (!c)
+ return log_oom();
+
+ /* Drop previous assignment. */
+ *list = condition_free_list_type(*list, cond);
+
+ LIST_PREPEND(conditions, *list, c);
+ return 0;
+}
+
+int config_parse_match_strv(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ const char *p = rvalue;
+ char ***sv = data;
+ bool invert;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *sv = strv_free(*sv);
+ return 0;
+ }
+
+ invert = *p == '!';
+ p += invert;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE|EXTRACT_RETAIN_ESCAPE);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (invert) {
+ k = strjoin("!", word);
+ if (!k)
+ return log_oom();
+ } else
+ k = TAKE_PTR(word);
+
+ r = strv_consume(sv, TAKE_PTR(k));
+ if (r < 0)
+ return log_oom();
+ }
+}
+
+int config_parse_match_ifnames(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ const char *p = rvalue;
+ char ***sv = data;
+ bool invert;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ invert = *p == '!';
+ p += invert;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "Failed to parse interface name list: %s", rvalue);
+ return 0;
+ }
+
+ if (!ifname_valid_full(word, ltype)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "Interface name is not valid or too long, ignoring assignment: %s", word);
+ continue;
+ }
+
+ if (invert) {
+ k = strjoin("!", word);
+ if (!k)
+ return log_oom();
+ } else
+ k = TAKE_PTR(word);
+
+ r = strv_consume(sv, TAKE_PTR(k));
+ if (r < 0)
+ return log_oom();
+ }
+}
+
+int config_parse_match_property(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ const char *p = rvalue;
+ char ***sv = data;
+ bool invert;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ invert = *p == '!';
+ p += invert;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_CUNESCAPE|EXTRACT_UNQUOTE);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (!env_assignment_is_valid(word)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0,
+ "Invalid property or value, ignoring assignment: %s", word);
+ continue;
+ }
+
+ if (invert) {
+ k = strjoin("!", word);
+ if (!k)
+ return log_oom();
+ } else
+ k = TAKE_PTR(word);
+
+ r = strv_consume(sv, TAKE_PTR(k));
+ if (r < 0)
+ return log_oom();
+ }
+}
+
+int config_parse_ifalias(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char **s = data;
+ _cleanup_free_ char *n = NULL;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ n = strdup(rvalue);
+ if (!n)
+ return log_oom();
+
+ if (!ascii_is_valid(n) || strlen(n) >= IFALIASZ) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Interface alias is not ASCII clean or is too long, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ if (isempty(n))
+ *s = mfree(*s);
+ else
+ free_and_replace(*s, n);
+
+ return 0;
+}
+
+int config_parse_hwaddr(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ struct ether_addr *n = NULL;
+ struct ether_addr **hwaddr = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ n = new0(struct ether_addr, 1);
+ if (!n)
+ return log_oom();
+
+ r = ether_addr_from_string(rvalue, n);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Not a valid MAC address, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ free_and_replace(*hwaddr, n);
+
+ return 0;
+}
+
+int config_parse_hwaddrs(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_set_free_free_ Set *s = NULL;
+ const char *p = rvalue;
+ Set **hwaddrs = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ *hwaddrs = set_free_free(*hwaddrs);
+ return 0;
+ }
+
+ s = set_new(&ether_addr_hash_ops);
+ if (!s)
+ return log_oom();
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ _cleanup_free_ struct ether_addr *n = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ n = new(struct ether_addr, 1);
+ if (!n)
+ return log_oom();
+
+ r = ether_addr_from_string(word, n);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Not a valid MAC address, ignoring: %s", word);
+ continue;
+ }
+
+ r = set_put(s, n);
+ if (r < 0)
+ return log_oom();
+ if (r > 0)
+ n = NULL; /* avoid cleanup */
+ }
+
+ r = set_ensure_allocated(hwaddrs, &ether_addr_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = set_move(*hwaddrs, s);
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+int config_parse_bridge_port_priority(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint16_t i;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou16(rvalue, &i);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to parse bridge port priority, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (i > LINK_BRIDGE_PORT_PRIORITY_MAX) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Bridge port priority is larger than maximum %u, ignoring: %s", LINK_BRIDGE_PORT_PRIORITY_MAX, rvalue);
+ return 0;
+ }
+
+ *((uint16_t *)data) = i;
+
+ return 0;
+}
+
+size_t serialize_in_addrs(FILE *f,
+ const struct in_addr *addresses,
+ size_t size,
+ bool *with_leading_space,
+ bool (*predicate)(const struct in_addr *addr)) {
+ assert(f);
+ assert(addresses);
+
+ size_t count = 0;
+ bool _space = false;
+ if (!with_leading_space)
+ with_leading_space = &_space;
+
+ for (size_t i = 0; i < size; i++) {
+ char sbuf[INET_ADDRSTRLEN];
+
+ if (predicate && !predicate(&addresses[i]))
+ continue;
+
+ if (*with_leading_space)
+ fputc(' ', f);
+ fputs(inet_ntop(AF_INET, &addresses[i], sbuf, sizeof(sbuf)), f);
+ count++;
+ *with_leading_space = true;
+ }
+
+ return count;
+}
+
+int deserialize_in_addrs(struct in_addr **ret, const char *string) {
+ _cleanup_free_ struct in_addr *addresses = NULL;
+ int size = 0;
+
+ assert(ret);
+ assert(string);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ struct in_addr *new_addresses;
+ int r;
+
+ r = extract_first_word(&string, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ new_addresses = reallocarray(addresses, size + 1, sizeof(struct in_addr));
+ if (!new_addresses)
+ return -ENOMEM;
+ else
+ addresses = new_addresses;
+
+ r = inet_pton(AF_INET, word, &(addresses[size]));
+ if (r <= 0)
+ continue;
+
+ size++;
+ }
+
+ *ret = size > 0 ? TAKE_PTR(addresses) : NULL;
+
+ return size;
+}
+
+void serialize_in6_addrs(FILE *f, const struct in6_addr *addresses, size_t size, bool *with_leading_space) {
+ assert(f);
+ assert(addresses);
+ assert(size);
+
+ bool _space = false;
+ if (!with_leading_space)
+ with_leading_space = &_space;
+
+ for (size_t i = 0; i < size; i++) {
+ char buffer[INET6_ADDRSTRLEN];
+
+ if (*with_leading_space)
+ fputc(' ', f);
+ fputs(inet_ntop(AF_INET6, addresses+i, buffer, sizeof(buffer)), f);
+ *with_leading_space = true;
+ }
+}
+
+int deserialize_in6_addrs(struct in6_addr **ret, const char *string) {
+ _cleanup_free_ struct in6_addr *addresses = NULL;
+ int size = 0;
+
+ assert(ret);
+ assert(string);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ struct in6_addr *new_addresses;
+ int r;
+
+ r = extract_first_word(&string, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ new_addresses = reallocarray(addresses, size + 1, sizeof(struct in6_addr));
+ if (!new_addresses)
+ return -ENOMEM;
+ else
+ addresses = new_addresses;
+
+ r = inet_pton(AF_INET6, word, &(addresses[size]));
+ if (r <= 0)
+ continue;
+
+ size++;
+ }
+
+ *ret = TAKE_PTR(addresses);
+
+ return size;
+}
+
+void serialize_dhcp_routes(FILE *f, const char *key, sd_dhcp_route **routes, size_t size) {
+ assert(f);
+ assert(key);
+ assert(routes);
+ assert(size);
+
+ fprintf(f, "%s=", key);
+
+ for (size_t i = 0; i < size; i++) {
+ char sbuf[INET_ADDRSTRLEN];
+ struct in_addr dest, gw;
+ uint8_t length;
+
+ assert_se(sd_dhcp_route_get_destination(routes[i], &dest) >= 0);
+ assert_se(sd_dhcp_route_get_gateway(routes[i], &gw) >= 0);
+ assert_se(sd_dhcp_route_get_destination_prefix_length(routes[i], &length) >= 0);
+
+ fprintf(f, "%s/%" PRIu8, inet_ntop(AF_INET, &dest, sbuf, sizeof sbuf), length);
+ fprintf(f, ",%s%s", inet_ntop(AF_INET, &gw, sbuf, sizeof sbuf), i < size - 1 ? " ": "");
+ }
+
+ fputs("\n", f);
+}
+
+int deserialize_dhcp_routes(struct sd_dhcp_route **ret, size_t *ret_size, size_t *ret_allocated, const char *string) {
+ _cleanup_free_ struct sd_dhcp_route *routes = NULL;
+ size_t size = 0, allocated = 0;
+
+ assert(ret);
+ assert(ret_size);
+ assert(ret_allocated);
+ assert(string);
+
+ /* WORD FORMAT: dst_ip/dst_prefixlen,gw_ip */
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ char *tok, *tok_end;
+ unsigned n;
+ int r;
+
+ r = extract_first_word(&string, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (!GREEDY_REALLOC(routes, allocated, size + 1))
+ return -ENOMEM;
+
+ tok = word;
+
+ /* get the subnet */
+ tok_end = strchr(tok, '/');
+ if (!tok_end)
+ continue;
+ *tok_end = '\0';
+
+ r = inet_aton(tok, &routes[size].dst_addr);
+ if (r == 0)
+ continue;
+
+ tok = tok_end + 1;
+
+ /* get the prefixlen */
+ tok_end = strchr(tok, ',');
+ if (!tok_end)
+ continue;
+
+ *tok_end = '\0';
+
+ r = safe_atou(tok, &n);
+ if (r < 0 || n > 32)
+ continue;
+
+ routes[size].dst_prefixlen = (uint8_t) n;
+ tok = tok_end + 1;
+
+ /* get the gateway */
+ r = inet_aton(tok, &routes[size].gw_addr);
+ if (r == 0)
+ continue;
+
+ size++;
+ }
+
+ *ret_size = size;
+ *ret_allocated = allocated;
+ *ret = TAKE_PTR(routes);
+
+ return 0;
+}
+
+int serialize_dhcp_option(FILE *f, const char *key, const void *data, size_t size) {
+ _cleanup_free_ char *hex_buf = NULL;
+
+ assert(f);
+ assert(key);
+ assert(data);
+
+ hex_buf = hexmem(data, size);
+ if (!hex_buf)
+ return -ENOMEM;
+
+ fprintf(f, "%s=%s\n", key, hex_buf);
+
+ return 0;
+}
diff --git a/src/libsystemd-network/network-internal.h b/src/libsystemd-network/network-internal.h
new file mode 100644
index 0000000..5dae5ab
--- /dev/null
+++ b/src/libsystemd-network/network-internal.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/nl80211.h>
+#include <stdbool.h>
+
+#include "sd-device.h"
+#include "sd-dhcp-lease.h"
+
+#include "conf-parser.h"
+#include "set.h"
+#include "strv.h"
+
+#define LINK_BRIDGE_PORT_PRIORITY_INVALID 128
+#define LINK_BRIDGE_PORT_PRIORITY_MAX 63
+
+char *link_get_type_string(unsigned short iftype, sd_device *device);
+bool net_match_config(Set *match_mac,
+ Set *match_permanent_mac,
+ char * const *match_paths,
+ char * const *match_drivers,
+ char * const *match_iftypes,
+ char * const *match_names,
+ char * const *match_property,
+ char * const *match_wifi_iftype,
+ char * const *match_ssid,
+ Set *match_bssid,
+ sd_device *device,
+ const struct ether_addr *dev_mac,
+ const struct ether_addr *dev_permanent_mac,
+ const char *dev_driver,
+ unsigned short dev_iftype,
+ const char *dev_name,
+ char * const *alternative_names,
+ enum nl80211_iftype dev_wifi_iftype,
+ const char *dev_ssid,
+ const struct ether_addr *dev_bssid);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_net_condition);
+CONFIG_PARSER_PROTOTYPE(config_parse_hwaddr);
+CONFIG_PARSER_PROTOTYPE(config_parse_hwaddrs);
+CONFIG_PARSER_PROTOTYPE(config_parse_match_strv);
+CONFIG_PARSER_PROTOTYPE(config_parse_match_ifnames);
+CONFIG_PARSER_PROTOTYPE(config_parse_match_property);
+CONFIG_PARSER_PROTOTYPE(config_parse_ifalias);
+CONFIG_PARSER_PROTOTYPE(config_parse_bridge_port_priority);
+
+int net_get_unique_predictable_data(sd_device *device, bool use_sysname, uint64_t *result);
+const char *net_get_name_persistent(sd_device *device);
+
+size_t serialize_in_addrs(FILE *f,
+ const struct in_addr *addresses,
+ size_t size,
+ bool *with_leading_space,
+ bool (*predicate)(const struct in_addr *addr));
+int deserialize_in_addrs(struct in_addr **addresses, const char *string);
+void serialize_in6_addrs(FILE *f, const struct in6_addr *addresses,
+ size_t size,
+ bool *with_leading_space);
+int deserialize_in6_addrs(struct in6_addr **addresses, const char *string);
+
+/* don't include "dhcp-lease-internal.h" as it causes conflicts between netinet/ip.h and linux/ip.h */
+struct sd_dhcp_route;
+struct sd_dhcp_lease;
+
+void serialize_dhcp_routes(FILE *f, const char *key, sd_dhcp_route **routes, size_t size);
+int deserialize_dhcp_routes(struct sd_dhcp_route **ret, size_t *ret_size, size_t *ret_allocated, const char *string);
+
+/* It is not necessary to add deserialize_dhcp_option(). Use unhexmem() instead. */
+int serialize_dhcp_option(FILE *f, const char *key, const void *data, size_t size);
+
+int dhcp_lease_save(sd_dhcp_lease *lease, const char *lease_file);
+int dhcp_lease_load(sd_dhcp_lease **ret, const char *lease_file);
diff --git a/src/libsystemd-network/radv-internal.h b/src/libsystemd-network/radv-internal.h
new file mode 100644
index 0000000..3dbeffe
--- /dev/null
+++ b/src/libsystemd-network/radv-internal.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2017 Intel Corporation. All rights reserved.
+***/
+
+#include "sd-radv.h"
+
+#include "log.h"
+#include "list.h"
+#include "sparse-endian.h"
+
+assert_cc(SD_RADV_DEFAULT_MIN_TIMEOUT_USEC <= SD_RADV_DEFAULT_MAX_TIMEOUT_USEC);
+
+#define SD_RADV_MAX_INITIAL_RTR_ADVERT_INTERVAL_USEC (16*USEC_PER_SEC)
+#define SD_RADV_MAX_INITIAL_RTR_ADVERTISEMENTS 3
+#define SD_RADV_MAX_FINAL_RTR_ADVERTISEMENTS 3
+#define SD_RADV_MIN_DELAY_BETWEEN_RAS 3
+#define SD_RADV_MAX_RA_DELAY_TIME_USEC (500*USEC_PER_MSEC)
+
+#define SD_RADV_OPT_ROUTE_INFORMATION 24
+#define SD_RADV_OPT_RDNSS 25
+#define SD_RADV_OPT_DNSSL 31
+
+enum RAdvState {
+ SD_RADV_STATE_IDLE = 0,
+ SD_RADV_STATE_ADVERTISING = 1,
+};
+typedef enum RAdvState RAdvState;
+
+struct sd_radv_opt_dns {
+ uint8_t type;
+ uint8_t length;
+ uint16_t reserved;
+ be32_t lifetime;
+} _packed_;
+
+struct sd_radv {
+ unsigned n_ref;
+ RAdvState state;
+
+ int ifindex;
+
+ sd_event *event;
+ int event_priority;
+
+ struct ether_addr mac_addr;
+ uint8_t hop_limit;
+ uint8_t flags;
+ uint32_t mtu;
+ uint16_t lifetime;
+
+ int fd;
+ unsigned ra_sent;
+ sd_event_source *recv_event_source;
+ sd_event_source *timeout_event_source;
+
+ unsigned n_prefixes;
+ LIST_HEAD(sd_radv_prefix, prefixes);
+
+ unsigned n_route_prefixes;
+ LIST_HEAD(sd_radv_route_prefix, route_prefixes);
+
+ size_t n_rdnss;
+ struct sd_radv_opt_dns *rdnss;
+ struct sd_radv_opt_dns *dnssl;
+};
+
+#define radv_prefix_opt__contents { \
+ uint8_t type; \
+ uint8_t length; \
+ uint8_t prefixlen; \
+ uint8_t flags; \
+ be32_t valid_lifetime; \
+ be32_t preferred_lifetime; \
+ uint32_t reserved; \
+ struct in6_addr in6_addr; \
+}
+
+struct radv_prefix_opt radv_prefix_opt__contents;
+
+/* We need the opt substructure to be packed, because we use it in send(). But
+ * if we use _packed_, this means that the structure cannot be used directly in
+ * normal code in general, because the fields might not be properly aligned.
+ * But in this particular case, the structure is defined in a way that gives
+ * proper alignment, even without the explicit _packed_ attribute. To appease
+ * the compiler we use the "unpacked" structure, but we also verify that
+ * structure contains no holes, so offsets are the same when _packed_ is used.
+ */
+struct radv_prefix_opt__packed radv_prefix_opt__contents _packed_;
+assert_cc(sizeof(struct radv_prefix_opt) == sizeof(struct radv_prefix_opt__packed));
+
+struct sd_radv_prefix {
+ unsigned n_ref;
+
+ struct radv_prefix_opt opt;
+
+ LIST_FIELDS(struct sd_radv_prefix, prefix);
+
+ usec_t valid_until;
+ usec_t preferred_until;
+};
+
+#define radv_route_prefix_opt__contents { \
+ uint8_t type; \
+ uint8_t length; \
+ uint8_t prefixlen; \
+ uint8_t flags_reserved; \
+ be32_t lifetime; \
+ struct in6_addr in6_addr; \
+}
+
+struct radv_route_prefix_opt radv_route_prefix_opt__contents;
+
+struct radv_route_prefix_opt__packed radv_route_prefix_opt__contents _packed_;
+assert_cc(sizeof(struct radv_route_prefix_opt) == sizeof(struct radv_route_prefix_opt__packed));
+
+struct sd_radv_route_prefix {
+ unsigned n_ref;
+
+ struct radv_route_prefix_opt opt;
+
+ LIST_FIELDS(struct sd_radv_route_prefix, prefix);
+};
+
+#define log_radv_full(level, error, fmt, ...) log_internal(level, error, PROJECT_FILE, __LINE__, __func__, "RADV: " fmt, ##__VA_ARGS__)
+#define log_radv_errno(error, fmt, ...) log_radv_full(LOG_DEBUG, error, fmt, ##__VA_ARGS__)
+#define log_radv(fmt, ...) log_radv_errno(0, fmt, ##__VA_ARGS__)
diff --git a/src/libsystemd-network/sd-dhcp-client.c b/src/libsystemd-network/sd-dhcp-client.c
new file mode 100644
index 0000000..d472fcd
--- /dev/null
+++ b/src/libsystemd-network/sd-dhcp-client.c
@@ -0,0 +1,2267 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+#include <net/ethernet.h>
+#include <net/if_arp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <linux/if_infiniband.h>
+
+#include "sd-dhcp-client.h"
+
+#include "alloc-util.h"
+#include "dhcp-identifier.h"
+#include "dhcp-internal.h"
+#include "dhcp-lease-internal.h"
+#include "dhcp-protocol.h"
+#include "dns-domain.h"
+#include "event-util.h"
+#include "fd-util.h"
+#include "hostname-util.h"
+#include "io-util.h"
+#include "memory-util.h"
+#include "random-util.h"
+#include "set.h"
+#include "sort-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+#include "web-util.h"
+
+#define MAX_CLIENT_ID_LEN (sizeof(uint32_t) + MAX_DUID_LEN) /* Arbitrary limit */
+#define MAX_MAC_ADDR_LEN CONST_MAX(INFINIBAND_ALEN, ETH_ALEN)
+
+#define RESTART_AFTER_NAK_MIN_USEC (1 * USEC_PER_SEC)
+#define RESTART_AFTER_NAK_MAX_USEC (30 * USEC_PER_MINUTE)
+
+typedef struct sd_dhcp_client_id {
+ uint8_t type;
+ union {
+ struct {
+ /* 0: Generic (non-LL) (RFC 2132) */
+ uint8_t data[MAX_CLIENT_ID_LEN];
+ } _packed_ gen;
+ struct {
+ /* 1: Ethernet Link-Layer (RFC 2132) */
+ uint8_t haddr[ETH_ALEN];
+ } _packed_ eth;
+ struct {
+ /* 2 - 254: ARP/Link-Layer (RFC 2132) */
+ uint8_t haddr[0];
+ } _packed_ ll;
+ struct {
+ /* 255: Node-specific (RFC 4361) */
+ be32_t iaid;
+ struct duid duid;
+ } _packed_ ns;
+ struct {
+ uint8_t data[MAX_CLIENT_ID_LEN];
+ } _packed_ raw;
+ };
+} _packed_ sd_dhcp_client_id;
+
+struct sd_dhcp_client {
+ unsigned n_ref;
+
+ DHCPState state;
+ sd_event *event;
+ int event_priority;
+ sd_event_source *timeout_resend;
+ int ifindex;
+ int fd;
+ uint16_t port;
+ union sockaddr_union link;
+ sd_event_source *receive_message;
+ bool request_broadcast;
+ Set *req_opts;
+ bool anonymize;
+ be32_t last_addr;
+ uint8_t mac_addr[MAX_MAC_ADDR_LEN];
+ size_t mac_addr_len;
+ uint8_t bcast_addr[MAX_MAC_ADDR_LEN];
+ size_t bcast_addr_len;
+ uint16_t arp_type;
+ sd_dhcp_client_id client_id;
+ size_t client_id_len;
+ char *hostname;
+ char *vendor_class_identifier;
+ char *mudurl;
+ char **user_class;
+ uint32_t mtu;
+ uint32_t fallback_lease_lifetime;
+ uint32_t xid;
+ usec_t start_time;
+ uint64_t attempt;
+ uint64_t max_attempts;
+ OrderedHashmap *extra_options;
+ OrderedHashmap *vendor_options;
+ usec_t request_sent;
+ sd_event_source *timeout_t1;
+ sd_event_source *timeout_t2;
+ sd_event_source *timeout_expire;
+ sd_dhcp_client_callback_t callback;
+ void *userdata;
+ sd_dhcp_lease *lease;
+ usec_t start_delay;
+ int ip_service_type;
+};
+
+static const uint8_t default_req_opts[] = {
+ SD_DHCP_OPTION_SUBNET_MASK,
+ SD_DHCP_OPTION_ROUTER,
+ SD_DHCP_OPTION_HOST_NAME,
+ SD_DHCP_OPTION_DOMAIN_NAME,
+ SD_DHCP_OPTION_DOMAIN_NAME_SERVER,
+};
+
+/* RFC7844 section 3:
+ MAY contain the Parameter Request List option.
+ RFC7844 section 3.6:
+ The client intending to protect its privacy SHOULD only request a
+ minimal number of options in the PRL and SHOULD also randomly shuffle
+ the ordering of option codes in the PRL. If this random ordering
+ cannot be implemented, the client MAY order the option codes in the
+ PRL by option code number (lowest to highest).
+*/
+/* NOTE: using PRL options that Windows 10 RFC7844 implementation uses */
+static const uint8_t default_req_opts_anonymize[] = {
+ SD_DHCP_OPTION_SUBNET_MASK, /* 1 */
+ SD_DHCP_OPTION_ROUTER, /* 3 */
+ SD_DHCP_OPTION_DOMAIN_NAME_SERVER, /* 6 */
+ SD_DHCP_OPTION_DOMAIN_NAME, /* 15 */
+ SD_DHCP_OPTION_ROUTER_DISCOVER, /* 31 */
+ SD_DHCP_OPTION_STATIC_ROUTE, /* 33 */
+ SD_DHCP_OPTION_VENDOR_SPECIFIC, /* 43 */
+ SD_DHCP_OPTION_NETBIOS_NAMESERVER, /* 44 */
+ SD_DHCP_OPTION_NETBIOS_NODETYPE, /* 46 */
+ SD_DHCP_OPTION_NETBIOS_SCOPE, /* 47 */
+ SD_DHCP_OPTION_CLASSLESS_STATIC_ROUTE, /* 121 */
+ SD_DHCP_OPTION_PRIVATE_CLASSLESS_STATIC_ROUTE, /* 249 */
+ SD_DHCP_OPTION_PRIVATE_PROXY_AUTODISCOVERY, /* 252 */
+};
+
+static int client_receive_message_raw(
+ sd_event_source *s,
+ int fd,
+ uint32_t revents,
+ void *userdata);
+static int client_receive_message_udp(
+ sd_event_source *s,
+ int fd,
+ uint32_t revents,
+ void *userdata);
+static void client_stop(sd_dhcp_client *client, int error);
+
+int sd_dhcp_client_id_to_string(const void *data, size_t len, char **ret) {
+ const sd_dhcp_client_id *client_id = data;
+ _cleanup_free_ char *t = NULL;
+ int r = 0;
+
+ assert_return(data, -EINVAL);
+ assert_return(len >= 1, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ len -= 1;
+ if (len > MAX_CLIENT_ID_LEN)
+ return -EINVAL;
+
+ switch (client_id->type) {
+ case 0:
+ if (utf8_is_printable((char *) client_id->gen.data, len))
+ r = asprintf(&t, "%.*s", (int) len, client_id->gen.data);
+ else
+ r = asprintf(&t, "DATA");
+ break;
+ case 1:
+ if (len != sizeof_field(sd_dhcp_client_id, eth))
+ return -EINVAL;
+
+ r = asprintf(&t, "%x:%x:%x:%x:%x:%x",
+ client_id->eth.haddr[0],
+ client_id->eth.haddr[1],
+ client_id->eth.haddr[2],
+ client_id->eth.haddr[3],
+ client_id->eth.haddr[4],
+ client_id->eth.haddr[5]);
+ break;
+ case 2 ... 254:
+ r = asprintf(&t, "ARP/LL");
+ break;
+ case 255:
+ if (len < 6)
+ return -EINVAL;
+
+ uint32_t iaid = be32toh(client_id->ns.iaid);
+ uint16_t duid_type = be16toh(client_id->ns.duid.type);
+ if (dhcp_validate_duid_len(duid_type, len - 6, true) < 0)
+ return -EINVAL;
+
+ r = asprintf(&t, "IAID:0x%x/DUID", iaid);
+ break;
+ }
+
+ if (r < 0)
+ return -ENOMEM;
+ *ret = TAKE_PTR(t);
+ return 0;
+}
+
+int sd_dhcp_client_set_callback(
+ sd_dhcp_client *client,
+ sd_dhcp_client_callback_t cb,
+ void *userdata) {
+
+ assert_return(client, -EINVAL);
+
+ client->callback = cb;
+ client->userdata = userdata;
+
+ return 0;
+}
+
+int sd_dhcp_client_set_request_broadcast(sd_dhcp_client *client, int broadcast) {
+ assert_return(client, -EINVAL);
+
+ client->request_broadcast = !!broadcast;
+
+ return 0;
+}
+
+int sd_dhcp_client_set_request_option(sd_dhcp_client *client, uint8_t option) {
+ assert_return(client, -EINVAL);
+ assert_return(IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_STOPPED), -EBUSY);
+
+ switch(option) {
+
+ case SD_DHCP_OPTION_PAD:
+ case SD_DHCP_OPTION_OVERLOAD:
+ case SD_DHCP_OPTION_MESSAGE_TYPE:
+ case SD_DHCP_OPTION_PARAMETER_REQUEST_LIST:
+ case SD_DHCP_OPTION_END:
+ return -EINVAL;
+
+ default:
+ break;
+ }
+
+ return set_ensure_put(&client->req_opts, NULL, UINT8_TO_PTR(option));
+}
+
+int sd_dhcp_client_set_request_address(
+ sd_dhcp_client *client,
+ const struct in_addr *last_addr) {
+
+ assert_return(client, -EINVAL);
+ assert_return(IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_STOPPED), -EBUSY);
+
+ if (last_addr)
+ client->last_addr = last_addr->s_addr;
+ else
+ client->last_addr = INADDR_ANY;
+
+ return 0;
+}
+
+int sd_dhcp_client_set_ifindex(sd_dhcp_client *client, int ifindex) {
+
+ assert_return(client, -EINVAL);
+ assert_return(IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_STOPPED), -EBUSY);
+ assert_return(ifindex > 0, -EINVAL);
+
+ client->ifindex = ifindex;
+ return 0;
+}
+
+int sd_dhcp_client_set_mac(
+ sd_dhcp_client *client,
+ const uint8_t *addr,
+ const uint8_t *bcast_addr,
+ size_t addr_len,
+ uint16_t arp_type) {
+
+ DHCP_CLIENT_DONT_DESTROY(client);
+ bool need_restart = false;
+ int r;
+
+ assert_return(client, -EINVAL);
+ assert_return(addr, -EINVAL);
+ assert_return(addr_len > 0 && addr_len <= MAX_MAC_ADDR_LEN, -EINVAL);
+ assert_return(arp_type > 0, -EINVAL);
+
+ if (arp_type == ARPHRD_ETHER)
+ assert_return(addr_len == ETH_ALEN, -EINVAL);
+ else if (arp_type == ARPHRD_INFINIBAND)
+ assert_return(addr_len == INFINIBAND_ALEN, -EINVAL);
+ else
+ return -EINVAL;
+
+ if (client->mac_addr_len == addr_len &&
+ memcmp(&client->mac_addr, addr, addr_len) == 0 &&
+ (client->bcast_addr_len > 0) == !!bcast_addr &&
+ (!bcast_addr || memcmp(&client->bcast_addr, bcast_addr, addr_len) == 0))
+ return 0;
+
+ if (!IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_STOPPED)) {
+ log_dhcp_client(client, "Changing MAC address on running DHCP client, restarting");
+ need_restart = true;
+ client_stop(client, SD_DHCP_CLIENT_EVENT_STOP);
+ }
+
+ memcpy(&client->mac_addr, addr, addr_len);
+ client->mac_addr_len = addr_len;
+ client->arp_type = arp_type;
+ client->bcast_addr_len = 0;
+
+ if (bcast_addr) {
+ memcpy(&client->bcast_addr, bcast_addr, addr_len);
+ client->bcast_addr_len = addr_len;
+ }
+
+ if (need_restart && client->state != DHCP_STATE_STOPPED) {
+ r = sd_dhcp_client_start(client);
+ if (r < 0)
+ return log_dhcp_client_errno(client, r, "Failed to restart DHCPv4 client: %m");
+ }
+
+ return 0;
+}
+
+int sd_dhcp_client_get_client_id(
+ sd_dhcp_client *client,
+ uint8_t *type,
+ const uint8_t **data,
+ size_t *data_len) {
+
+ assert_return(client, -EINVAL);
+ assert_return(type, -EINVAL);
+ assert_return(data, -EINVAL);
+ assert_return(data_len, -EINVAL);
+
+ *type = 0;
+ *data = NULL;
+ *data_len = 0;
+ if (client->client_id_len) {
+ *type = client->client_id.type;
+ *data = client->client_id.raw.data;
+ *data_len = client->client_id_len - sizeof(client->client_id.type);
+ }
+
+ return 0;
+}
+
+int sd_dhcp_client_set_client_id(
+ sd_dhcp_client *client,
+ uint8_t type,
+ const uint8_t *data,
+ size_t data_len) {
+
+ DHCP_CLIENT_DONT_DESTROY(client);
+ bool need_restart = false;
+ int r;
+
+ assert_return(client, -EINVAL);
+ assert_return(data, -EINVAL);
+ assert_return(data_len > 0 && data_len <= MAX_CLIENT_ID_LEN, -EINVAL);
+
+ if (client->client_id_len == data_len + sizeof(client->client_id.type) &&
+ client->client_id.type == type &&
+ memcmp(&client->client_id.raw.data, data, data_len) == 0)
+ return 0;
+
+ /* For hardware types, log debug message about unexpected data length.
+ *
+ * Note that infiniband's INFINIBAND_ALEN is 20 bytes long, but only
+ * the last 8 bytes of the address are stable and suitable to put into
+ * the client-id. The caller is advised to account for that. */
+ if ((type == ARPHRD_ETHER && data_len != ETH_ALEN) ||
+ (type == ARPHRD_INFINIBAND && data_len != 8))
+ log_dhcp_client(client, "Changing client ID to hardware type %u with "
+ "unexpected address length %zu",
+ type, data_len);
+
+ if (!IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_STOPPED)) {
+ log_dhcp_client(client, "Changing client ID on running DHCP "
+ "client, restarting");
+ need_restart = true;
+ client_stop(client, SD_DHCP_CLIENT_EVENT_STOP);
+ }
+
+ client->client_id.type = type;
+ memcpy(&client->client_id.raw.data, data, data_len);
+ client->client_id_len = data_len + sizeof (client->client_id.type);
+
+ if (need_restart && client->state != DHCP_STATE_STOPPED) {
+ r = sd_dhcp_client_start(client);
+ if (r < 0)
+ return log_dhcp_client_errno(client, r, "Failed to restart DHCPv4 client: %m");
+ }
+
+ return 0;
+}
+
+/**
+ * Sets IAID and DUID. If duid is non-null, the DUID is set to duid_type + duid
+ * without further modification. Otherwise, if duid_type is supported, DUID
+ * is set based on that type. Otherwise, an error is returned.
+ */
+static int dhcp_client_set_iaid_duid_internal(
+ sd_dhcp_client *client,
+ bool iaid_append,
+ bool iaid_set,
+ uint32_t iaid,
+ uint16_t duid_type,
+ const void *duid,
+ size_t duid_len,
+ usec_t llt_time) {
+
+ DHCP_CLIENT_DONT_DESTROY(client);
+ int r;
+ size_t len;
+
+ assert_return(client, -EINVAL);
+ assert_return(duid_len == 0 || duid, -EINVAL);
+
+ if (duid) {
+ r = dhcp_validate_duid_len(duid_type, duid_len, true);
+ if (r < 0)
+ return log_dhcp_client_errno(client, r, "Failed to validate length of DUID: %m");
+ }
+
+ zero(client->client_id);
+ client->client_id.type = 255;
+
+ if (iaid_append) {
+ if (iaid_set)
+ client->client_id.ns.iaid = htobe32(iaid);
+ else {
+ r = dhcp_identifier_set_iaid(client->ifindex, client->mac_addr,
+ client->mac_addr_len,
+ true,
+ &client->client_id.ns.iaid);
+ if (r < 0)
+ return log_dhcp_client_errno(client, r, "Failed to set IAID: %m");
+ }
+ }
+
+ if (duid) {
+ client->client_id.ns.duid.type = htobe16(duid_type);
+ memcpy(&client->client_id.ns.duid.raw.data, duid, duid_len);
+ len = sizeof(client->client_id.ns.duid.type) + duid_len;
+ } else
+ switch (duid_type) {
+ case DUID_TYPE_LLT:
+ if (client->mac_addr_len == 0)
+ return log_dhcp_client_errno(client, SYNTHETIC_ERRNO(EOPNOTSUPP), "Failed to set DUID-LLT, MAC address is not set.");
+
+ r = dhcp_identifier_set_duid_llt(&client->client_id.ns.duid, llt_time, client->mac_addr, client->mac_addr_len, client->arp_type, &len);
+ if (r < 0)
+ return log_dhcp_client_errno(client, r, "Failed to set DUID-LLT: %m");
+ break;
+ case DUID_TYPE_EN:
+ r = dhcp_identifier_set_duid_en(&client->client_id.ns.duid, &len);
+ if (r < 0)
+ return log_dhcp_client_errno(client, r, "Failed to set DUID-EN: %m");
+ break;
+ case DUID_TYPE_LL:
+ if (client->mac_addr_len == 0)
+ return log_dhcp_client_errno(client, SYNTHETIC_ERRNO(EOPNOTSUPP), "Failed to set DUID-LL, MAC address is not set.");
+
+ r = dhcp_identifier_set_duid_ll(&client->client_id.ns.duid, client->mac_addr, client->mac_addr_len, client->arp_type, &len);
+ if (r < 0)
+ return log_dhcp_client_errno(client, r, "Failed to set DUID-LL: %m");
+ break;
+ case DUID_TYPE_UUID:
+ r = dhcp_identifier_set_duid_uuid(&client->client_id.ns.duid, &len);
+ if (r < 0)
+ return log_dhcp_client_errno(client, r, "Failed to set DUID-UUID: %m");
+ break;
+ default:
+ return log_dhcp_client_errno(client, SYNTHETIC_ERRNO(EINVAL), "Invalid DUID type");
+ }
+
+ client->client_id_len = sizeof(client->client_id.type) + len +
+ (iaid_append ? sizeof(client->client_id.ns.iaid) : 0);
+
+ if (!IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_STOPPED)) {
+ log_dhcp_client(client, "Configured %sDUID, restarting.", iaid_append ? "IAID+" : "");
+ client_stop(client, SD_DHCP_CLIENT_EVENT_STOP);
+ r = sd_dhcp_client_start(client);
+ if (r < 0)
+ return log_dhcp_client_errno(client, r, "Failed to restart DHCPv4 client: %m");
+ }
+
+ return 0;
+}
+
+int sd_dhcp_client_set_iaid_duid(
+ sd_dhcp_client *client,
+ bool iaid_set,
+ uint32_t iaid,
+ uint16_t duid_type,
+ const void *duid,
+ size_t duid_len) {
+ return dhcp_client_set_iaid_duid_internal(client, true, iaid_set, iaid, duid_type, duid, duid_len, 0);
+}
+
+int sd_dhcp_client_set_iaid_duid_llt(
+ sd_dhcp_client *client,
+ bool iaid_set,
+ uint32_t iaid,
+ usec_t llt_time) {
+ return dhcp_client_set_iaid_duid_internal(client, true, iaid_set, iaid, DUID_TYPE_LLT, NULL, 0, llt_time);
+}
+
+int sd_dhcp_client_set_duid(
+ sd_dhcp_client *client,
+ uint16_t duid_type,
+ const void *duid,
+ size_t duid_len) {
+ return dhcp_client_set_iaid_duid_internal(client, false, false, 0, duid_type, duid, duid_len, 0);
+}
+
+int sd_dhcp_client_set_duid_llt(
+ sd_dhcp_client *client,
+ usec_t llt_time) {
+ return dhcp_client_set_iaid_duid_internal(client, false, false, 0, DUID_TYPE_LLT, NULL, 0, llt_time);
+}
+
+int sd_dhcp_client_set_hostname(
+ sd_dhcp_client *client,
+ const char *hostname) {
+
+ assert_return(client, -EINVAL);
+
+ /* Make sure hostnames qualify as DNS and as Linux hostnames */
+ if (hostname &&
+ !(hostname_is_valid(hostname, false) && dns_name_is_valid(hostname) > 0))
+ return -EINVAL;
+
+ return free_and_strdup(&client->hostname, hostname);
+}
+
+int sd_dhcp_client_set_vendor_class_identifier(
+ sd_dhcp_client *client,
+ const char *vci) {
+
+ assert_return(client, -EINVAL);
+
+ return free_and_strdup(&client->vendor_class_identifier, vci);
+}
+
+int sd_dhcp_client_set_mud_url(
+ sd_dhcp_client *client,
+ const char *mudurl) {
+
+ assert_return(client, -EINVAL);
+ assert_return(mudurl, -EINVAL);
+ assert_return(strlen(mudurl) <= 255, -EINVAL);
+ assert_return(http_url_is_valid(mudurl), -EINVAL);
+
+ return free_and_strdup(&client->mudurl, mudurl);
+}
+
+int sd_dhcp_client_set_user_class(
+ sd_dhcp_client *client,
+ char * const *user_class) {
+
+ char * const *p;
+ char **s = NULL;
+
+ assert_return(client, -EINVAL);
+ assert_return(!strv_isempty(user_class), -EINVAL);
+
+ STRV_FOREACH(p, user_class) {
+ size_t n = strlen(*p);
+
+ if (n > 255 || n == 0)
+ return -EINVAL;
+ }
+
+ s = strv_copy(user_class);
+ if (!s)
+ return -ENOMEM;
+
+ return strv_free_and_replace(client->user_class, s);
+}
+
+int sd_dhcp_client_set_client_port(
+ sd_dhcp_client *client,
+ uint16_t port) {
+
+ assert_return(client, -EINVAL);
+
+ client->port = port;
+
+ return 0;
+}
+
+int sd_dhcp_client_set_mtu(sd_dhcp_client *client, uint32_t mtu) {
+ assert_return(client, -EINVAL);
+ assert_return(mtu >= DHCP_DEFAULT_MIN_SIZE, -ERANGE);
+
+ client->mtu = mtu;
+
+ return 0;
+}
+
+int sd_dhcp_client_set_max_attempts(sd_dhcp_client *client, uint64_t max_attempts) {
+ assert_return(client, -EINVAL);
+
+ client->max_attempts = max_attempts;
+
+ return 0;
+}
+
+int sd_dhcp_client_add_option(sd_dhcp_client *client, sd_dhcp_option *v) {
+ int r;
+
+ assert_return(client, -EINVAL);
+ assert_return(v, -EINVAL);
+
+ r = ordered_hashmap_ensure_allocated(&client->extra_options, &dhcp_option_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = ordered_hashmap_put(client->extra_options, UINT_TO_PTR(v->option), v);
+ if (r < 0)
+ return r;
+
+ sd_dhcp_option_ref(v);
+ return 0;
+}
+
+int sd_dhcp_client_add_vendor_option(sd_dhcp_client *client, sd_dhcp_option *v) {
+ int r;
+
+ assert_return(client, -EINVAL);
+ assert_return(v, -EINVAL);
+
+ r = ordered_hashmap_ensure_allocated(&client->vendor_options, &dhcp_option_hash_ops);
+ if (r < 0)
+ return -ENOMEM;
+
+ r = ordered_hashmap_put(client->vendor_options, v, v);
+ if (r < 0)
+ return r;
+
+ sd_dhcp_option_ref(v);
+
+ return 1;
+}
+
+int sd_dhcp_client_get_lease(sd_dhcp_client *client, sd_dhcp_lease **ret) {
+ assert_return(client, -EINVAL);
+
+ if (!IN_SET(client->state, DHCP_STATE_SELECTING, DHCP_STATE_BOUND, DHCP_STATE_RENEWING, DHCP_STATE_REBINDING))
+ return -EADDRNOTAVAIL;
+
+ if (ret)
+ *ret = client->lease;
+
+ return 0;
+}
+
+int sd_dhcp_client_set_service_type(sd_dhcp_client *client, int type) {
+ assert_return(client, -EINVAL);
+
+ client->ip_service_type = type;
+
+ return 0;
+}
+
+int sd_dhcp_client_set_fallback_lease_lifetime(sd_dhcp_client *client, uint32_t fallback_lease_lifetime) {
+ assert_return(client, -EINVAL);
+ assert_return(fallback_lease_lifetime > 0, -EINVAL);
+
+ client->fallback_lease_lifetime = fallback_lease_lifetime;
+
+ return 0;
+}
+
+static int client_notify(sd_dhcp_client *client, int event) {
+ assert(client);
+
+ if (client->callback)
+ return client->callback(client, event, client->userdata);
+
+ return 0;
+}
+
+static int client_initialize(sd_dhcp_client *client) {
+ assert_return(client, -EINVAL);
+
+ client->receive_message = sd_event_source_unref(client->receive_message);
+
+ client->fd = safe_close(client->fd);
+
+ (void) event_source_disable(client->timeout_resend);
+ (void) event_source_disable(client->timeout_t1);
+ (void) event_source_disable(client->timeout_t2);
+ (void) event_source_disable(client->timeout_expire);
+
+ client->attempt = 0;
+
+ client->state = DHCP_STATE_INIT;
+ client->xid = 0;
+
+ client->lease = sd_dhcp_lease_unref(client->lease);
+
+ return 0;
+}
+
+static void client_stop(sd_dhcp_client *client, int error) {
+ assert(client);
+
+ if (error < 0)
+ log_dhcp_client_errno(client, error, "STOPPED: %m");
+ else if (error == SD_DHCP_CLIENT_EVENT_STOP)
+ log_dhcp_client(client, "STOPPED");
+ else
+ log_dhcp_client(client, "STOPPED: Unknown event");
+
+ client_notify(client, error);
+
+ client_initialize(client);
+}
+
+static int cmp_uint8(const uint8_t *a, const uint8_t *b) {
+ return CMP(*a, *b);
+}
+
+static int client_message_init(
+ sd_dhcp_client *client,
+ DHCPPacket **ret,
+ uint8_t type,
+ size_t *_optlen,
+ size_t *_optoffset) {
+
+ _cleanup_free_ DHCPPacket *packet = NULL;
+ size_t optlen, optoffset, size;
+ be16_t max_size;
+ usec_t time_now;
+ uint16_t secs;
+ int r;
+
+ assert(client);
+ assert(client->start_time);
+ assert(ret);
+ assert(_optlen);
+ assert(_optoffset);
+ assert(IN_SET(type, DHCP_DISCOVER, DHCP_REQUEST, DHCP_RELEASE, DHCP_DECLINE));
+
+ optlen = DHCP_MIN_OPTIONS_SIZE;
+ size = sizeof(DHCPPacket) + optlen;
+
+ packet = malloc0(size);
+ if (!packet)
+ return -ENOMEM;
+
+ r = dhcp_message_init(&packet->dhcp, BOOTREQUEST, client->xid, type,
+ client->arp_type, optlen, &optoffset);
+ if (r < 0)
+ return r;
+
+ /* Although 'secs' field is a SHOULD in RFC 2131, certain DHCP servers
+ refuse to issue an DHCP lease if 'secs' is set to zero */
+ r = sd_event_now(client->event, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return r;
+ assert(time_now >= client->start_time);
+
+ /* seconds between sending first and last DISCOVER
+ * must always be strictly positive to deal with broken servers */
+ secs = ((time_now - client->start_time) / USEC_PER_SEC) ? : 1;
+ packet->dhcp.secs = htobe16(secs);
+
+ /* RFC2132 section 4.1
+ A client that cannot receive unicast IP datagrams until its protocol
+ software has been configured with an IP address SHOULD set the
+ BROADCAST bit in the 'flags' field to 1 in any DHCPDISCOVER or
+ DHCPREQUEST messages that client sends. The BROADCAST bit will
+ provide a hint to the DHCP server and BOOTP relay agent to broadcast
+ any messages to the client on the client's subnet.
+
+ Note: some interfaces needs this to be enabled, but some networks
+ needs this to be disabled as broadcasts are filteretd, so this
+ needs to be configurable */
+ if (client->request_broadcast || client->arp_type != ARPHRD_ETHER)
+ packet->dhcp.flags = htobe16(0x8000);
+
+ /* RFC2132 section 4.1.1:
+ The client MUST include its hardware address in the ’chaddr’ field, if
+ necessary for delivery of DHCP reply messages. Non-Ethernet
+ interfaces will leave 'chaddr' empty and use the client identifier
+ instead (eg, RFC 4390 section 2.1).
+ */
+ if (client->arp_type == ARPHRD_ETHER)
+ memcpy(&packet->dhcp.chaddr, &client->mac_addr, ETH_ALEN);
+
+ /* If no client identifier exists, construct an RFC 4361-compliant one */
+ if (client->client_id_len == 0) {
+ size_t duid_len;
+
+ client->client_id.type = 255;
+
+ r = dhcp_identifier_set_iaid(client->ifindex, client->mac_addr, client->mac_addr_len,
+ true, &client->client_id.ns.iaid);
+ if (r < 0)
+ return r;
+
+ r = dhcp_identifier_set_duid_en(&client->client_id.ns.duid, &duid_len);
+ if (r < 0)
+ return r;
+
+ client->client_id_len = sizeof(client->client_id.type) + sizeof(client->client_id.ns.iaid) + duid_len;
+ }
+
+ /* Some DHCP servers will refuse to issue an DHCP lease if the Client
+ Identifier option is not set */
+ if (client->client_id_len) {
+ r = dhcp_option_append(&packet->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_CLIENT_IDENTIFIER,
+ client->client_id_len,
+ &client->client_id);
+ if (r < 0)
+ return r;
+ }
+
+ /* RFC2131 section 3.5:
+ in its initial DHCPDISCOVER or DHCPREQUEST message, a
+ client may provide the server with a list of specific
+ parameters the client is interested in. If the client
+ includes a list of parameters in a DHCPDISCOVER message,
+ it MUST include that list in any subsequent DHCPREQUEST
+ messages.
+ */
+
+ /* RFC7844 section 3:
+ MAY contain the Parameter Request List option. */
+ /* NOTE: in case that there would be an option to do not send
+ * any PRL at all, the size should be checked before sending */
+ if (!set_isempty(client->req_opts) && type != DHCP_RELEASE) {
+ _cleanup_free_ uint8_t *opts = NULL;
+ size_t n_opts, i = 0;
+ void *val;
+
+ n_opts = set_size(client->req_opts);
+ opts = new(uint8_t, n_opts);
+ if (!opts)
+ return -ENOMEM;
+
+ SET_FOREACH(val, client->req_opts)
+ opts[i++] = PTR_TO_UINT8(val);
+ assert(i == n_opts);
+
+ /* For anonymizing the request, let's sort the options. */
+ typesafe_qsort(opts, n_opts, cmp_uint8);
+
+ r = dhcp_option_append(&packet->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_PARAMETER_REQUEST_LIST,
+ n_opts, opts);
+ if (r < 0)
+ return r;
+ }
+
+ /* RFC2131 section 3.5:
+ The client SHOULD include the ’maximum DHCP message size’ option to
+ let the server know how large the server may make its DHCP messages.
+
+ Note (from ConnMan): Some DHCP servers will send bigger DHCP packets
+ than the defined default size unless the Maximum Message Size option
+ is explicitly set
+
+ RFC3442 "Requirements to Avoid Sizing Constraints":
+ Because a full routing table can be quite large, the standard 576
+ octet maximum size for a DHCP message may be too short to contain
+ some legitimate Classless Static Route options. Because of this,
+ clients implementing the Classless Static Route option SHOULD send a
+ Maximum DHCP Message Size [4] option if the DHCP client's TCP/IP
+ stack is capable of receiving larger IP datagrams. In this case, the
+ client SHOULD set the value of this option to at least the MTU of the
+ interface that the client is configuring. The client MAY set the
+ value of this option higher, up to the size of the largest UDP packet
+ it is prepared to accept. (Note that the value specified in the
+ Maximum DHCP Message Size option is the total maximum packet size,
+ including IP and UDP headers.)
+ */
+ /* RFC7844 section 3:
+ SHOULD NOT contain any other option. */
+ if (!client->anonymize && type != DHCP_RELEASE) {
+ max_size = htobe16(size);
+ r = dhcp_option_append(&packet->dhcp, client->mtu, &optoffset, 0,
+ SD_DHCP_OPTION_MAXIMUM_MESSAGE_SIZE,
+ 2, &max_size);
+ if (r < 0)
+ return r;
+ }
+
+ *_optlen = optlen;
+ *_optoffset = optoffset;
+ *ret = TAKE_PTR(packet);
+
+ return 0;
+}
+
+static int client_append_fqdn_option(
+ DHCPMessage *message,
+ size_t optlen,
+ size_t *optoffset,
+ const char *fqdn) {
+
+ uint8_t buffer[3 + DHCP_MAX_FQDN_LENGTH];
+ int r;
+
+ buffer[0] = DHCP_FQDN_FLAG_S | /* Request server to perform A RR DNS updates */
+ DHCP_FQDN_FLAG_E; /* Canonical wire format */
+ buffer[1] = 0; /* RCODE1 (deprecated) */
+ buffer[2] = 0; /* RCODE2 (deprecated) */
+
+ r = dns_name_to_wire_format(fqdn, buffer + 3, sizeof(buffer) - 3, false);
+ if (r > 0)
+ r = dhcp_option_append(message, optlen, optoffset, 0,
+ SD_DHCP_OPTION_FQDN, 3 + r, buffer);
+
+ return r;
+}
+
+static int dhcp_client_send_raw(
+ sd_dhcp_client *client,
+ DHCPPacket *packet,
+ size_t len) {
+
+ dhcp_packet_append_ip_headers(packet, INADDR_ANY, client->port,
+ INADDR_BROADCAST, DHCP_PORT_SERVER, len, client->ip_service_type);
+
+ return dhcp_network_send_raw_socket(client->fd, &client->link,
+ packet, len);
+}
+
+static int client_append_common_discover_request_options(sd_dhcp_client *client, DHCPPacket *packet, size_t *optoffset, size_t optlen) {
+ sd_dhcp_option *j;
+ int r;
+
+ assert(client);
+
+ if (client->hostname) {
+ /* According to RFC 4702 "clients that send the Client FQDN option in
+ their messages MUST NOT also send the Host Name option". Just send
+ one of the two depending on the hostname type.
+ */
+ if (dns_name_is_single_label(client->hostname)) {
+ /* it is unclear from RFC 2131 if client should send hostname in
+ DHCPDISCOVER but dhclient does and so we do as well
+ */
+ r = dhcp_option_append(&packet->dhcp, optlen, optoffset, 0,
+ SD_DHCP_OPTION_HOST_NAME,
+ strlen(client->hostname), client->hostname);
+ } else
+ r = client_append_fqdn_option(&packet->dhcp, optlen, optoffset,
+ client->hostname);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->vendor_class_identifier) {
+ r = dhcp_option_append(&packet->dhcp, optlen, optoffset, 0,
+ SD_DHCP_OPTION_VENDOR_CLASS_IDENTIFIER,
+ strlen(client->vendor_class_identifier),
+ client->vendor_class_identifier);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->mudurl) {
+ r = dhcp_option_append(&packet->dhcp, optlen, optoffset, 0,
+ SD_DHCP_OPTION_MUD_URL,
+ strlen(client->mudurl),
+ client->mudurl);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->user_class) {
+ r = dhcp_option_append(&packet->dhcp, optlen, optoffset, 0,
+ SD_DHCP_OPTION_USER_CLASS,
+ strv_length(client->user_class),
+ client->user_class);
+ if (r < 0)
+ return r;
+ }
+
+ ORDERED_HASHMAP_FOREACH(j, client->extra_options) {
+ r = dhcp_option_append(&packet->dhcp, optlen, optoffset, 0,
+ j->option, j->length, j->data);
+ if (r < 0)
+ return r;
+ }
+
+ if (!ordered_hashmap_isempty(client->vendor_options)) {
+ r = dhcp_option_append(
+ &packet->dhcp, optlen, optoffset, 0,
+ SD_DHCP_OPTION_VENDOR_SPECIFIC,
+ ordered_hashmap_size(client->vendor_options), client->vendor_options);
+ if (r < 0)
+ return r;
+ }
+
+
+ return 0;
+}
+
+static int client_send_discover(sd_dhcp_client *client) {
+ _cleanup_free_ DHCPPacket *discover = NULL;
+ size_t optoffset, optlen;
+ int r;
+
+ assert(client);
+ assert(IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_SELECTING));
+
+ r = client_message_init(client, &discover, DHCP_DISCOVER,
+ &optlen, &optoffset);
+ if (r < 0)
+ return r;
+
+ /* the client may suggest values for the network address
+ and lease time in the DHCPDISCOVER message. The client may include
+ the ’requested IP address’ option to suggest that a particular IP
+ address be assigned, and may include the ’IP address lease time’
+ option to suggest the lease time it would like.
+ */
+ /* RFC7844 section 3:
+ SHOULD NOT contain any other option. */
+ if (!client->anonymize && client->last_addr != INADDR_ANY) {
+ r = dhcp_option_append(&discover->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_REQUESTED_IP_ADDRESS,
+ 4, &client->last_addr);
+ if (r < 0)
+ return r;
+ }
+
+ r = client_append_common_discover_request_options(client, discover, &optoffset, optlen);
+ if (r < 0)
+ return r;
+
+ r = dhcp_option_append(&discover->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_END, 0, NULL);
+ if (r < 0)
+ return r;
+
+ /* We currently ignore:
+ The client SHOULD wait a random time between one and ten seconds to
+ desynchronize the use of DHCP at startup.
+ */
+ r = dhcp_client_send_raw(client, discover, sizeof(DHCPPacket) + optoffset);
+ if (r < 0)
+ return r;
+
+ log_dhcp_client(client, "DISCOVER");
+
+ return 0;
+}
+
+static int client_send_request(sd_dhcp_client *client) {
+ _cleanup_free_ DHCPPacket *request = NULL;
+ size_t optoffset, optlen;
+ int r;
+
+ assert(client);
+
+ r = client_message_init(client, &request, DHCP_REQUEST, &optlen, &optoffset);
+ if (r < 0)
+ return r;
+
+ switch (client->state) {
+ /* See RFC2131 section 4.3.2 (note that there is a typo in the RFC,
+ SELECTING should be REQUESTING)
+ */
+
+ case DHCP_STATE_REQUESTING:
+ /* Client inserts the address of the selected server in ’server
+ identifier’, ’ciaddr’ MUST be zero, ’requested IP address’ MUST be
+ filled in with the yiaddr value from the chosen DHCPOFFER.
+ */
+
+ r = dhcp_option_append(&request->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_SERVER_IDENTIFIER,
+ 4, &client->lease->server_address);
+ if (r < 0)
+ return r;
+
+ r = dhcp_option_append(&request->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_REQUESTED_IP_ADDRESS,
+ 4, &client->lease->address);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case DHCP_STATE_INIT_REBOOT:
+ /* ’server identifier’ MUST NOT be filled in, ’requested IP address’
+ option MUST be filled in with client’s notion of its previously
+ assigned address. ’ciaddr’ MUST be zero.
+ */
+ r = dhcp_option_append(&request->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_REQUESTED_IP_ADDRESS,
+ 4, &client->last_addr);
+ if (r < 0)
+ return r;
+ break;
+
+ case DHCP_STATE_RENEWING:
+ /* ’server identifier’ MUST NOT be filled in, ’requested IP address’
+ option MUST NOT be filled in, ’ciaddr’ MUST be filled in with
+ client’s IP address.
+ */
+
+ case DHCP_STATE_REBINDING:
+ /* ’server identifier’ MUST NOT be filled in, ’requested IP address’
+ option MUST NOT be filled in, ’ciaddr’ MUST be filled in with
+ client’s IP address.
+
+ This message MUST be broadcast to the 0xffffffff IP broadcast address.
+ */
+ request->dhcp.ciaddr = client->lease->address;
+
+ break;
+
+ case DHCP_STATE_INIT:
+ case DHCP_STATE_SELECTING:
+ case DHCP_STATE_REBOOTING:
+ case DHCP_STATE_BOUND:
+ case DHCP_STATE_STOPPED:
+ return -EINVAL;
+ }
+
+ r = client_append_common_discover_request_options(client, request, &optoffset, optlen);
+ if (r < 0)
+ return r;
+
+ r = dhcp_option_append(&request->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_END, 0, NULL);
+ if (r < 0)
+ return r;
+
+ if (client->state == DHCP_STATE_RENEWING)
+ r = dhcp_network_send_udp_socket(client->fd,
+ client->lease->server_address,
+ DHCP_PORT_SERVER,
+ &request->dhcp,
+ sizeof(DHCPMessage) + optoffset);
+ else
+ r = dhcp_client_send_raw(client, request, sizeof(DHCPPacket) + optoffset);
+ if (r < 0)
+ return r;
+
+ switch (client->state) {
+
+ case DHCP_STATE_REQUESTING:
+ log_dhcp_client(client, "REQUEST (requesting)");
+ break;
+
+ case DHCP_STATE_INIT_REBOOT:
+ log_dhcp_client(client, "REQUEST (init-reboot)");
+ break;
+
+ case DHCP_STATE_RENEWING:
+ log_dhcp_client(client, "REQUEST (renewing)");
+ break;
+
+ case DHCP_STATE_REBINDING:
+ log_dhcp_client(client, "REQUEST (rebinding)");
+ break;
+
+ default:
+ log_dhcp_client(client, "REQUEST (invalid)");
+ break;
+ }
+
+ return 0;
+}
+
+static int client_start(sd_dhcp_client *client);
+
+static int client_timeout_resend(
+ sd_event_source *s,
+ uint64_t usec,
+ void *userdata) {
+
+ sd_dhcp_client *client = userdata;
+ DHCP_CLIENT_DONT_DESTROY(client);
+ usec_t next_timeout = 0;
+ uint64_t time_now;
+ uint32_t time_left;
+ int r;
+
+ assert(s);
+ assert(client);
+ assert(client->event);
+
+ r = sd_event_now(client->event, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ goto error;
+
+ switch (client->state) {
+
+ case DHCP_STATE_RENEWING:
+
+ time_left = (client->lease->t2 - client->lease->t1) / 2;
+ if (time_left < 60)
+ time_left = 60;
+
+ next_timeout = time_now + time_left * USEC_PER_SEC;
+
+ break;
+
+ case DHCP_STATE_REBINDING:
+
+ time_left = (client->lease->lifetime - client->lease->t2) / 2;
+ if (time_left < 60)
+ time_left = 60;
+
+ next_timeout = time_now + time_left * USEC_PER_SEC;
+ break;
+
+ case DHCP_STATE_REBOOTING:
+ /* start over as we did not receive a timely ack or nak */
+ r = client_initialize(client);
+ if (r < 0)
+ goto error;
+
+ r = client_start(client);
+ if (r < 0)
+ goto error;
+ else {
+ log_dhcp_client(client, "REBOOTED");
+ return 0;
+ }
+
+ case DHCP_STATE_INIT:
+ case DHCP_STATE_INIT_REBOOT:
+ case DHCP_STATE_SELECTING:
+ case DHCP_STATE_REQUESTING:
+ case DHCP_STATE_BOUND:
+
+ if (client->attempt < client->max_attempts)
+ client->attempt++;
+ else
+ goto error;
+
+ next_timeout = time_now + ((UINT64_C(1) << MIN(client->attempt, (uint64_t) 6)) - 1) * USEC_PER_SEC;
+
+ break;
+
+ case DHCP_STATE_STOPPED:
+ r = -EINVAL;
+ goto error;
+ }
+
+ next_timeout += (random_u32() & 0x1fffff);
+
+ r = event_reset_time(client->event, &client->timeout_resend,
+ clock_boottime_or_monotonic(),
+ next_timeout, 10 * USEC_PER_MSEC,
+ client_timeout_resend, client,
+ client->event_priority, "dhcp4-resend-timer", true);
+ if (r < 0)
+ goto error;
+
+ switch (client->state) {
+ case DHCP_STATE_INIT:
+ r = client_send_discover(client);
+ if (r >= 0) {
+ client->state = DHCP_STATE_SELECTING;
+ client->attempt = 0;
+ } else if (client->attempt >= client->max_attempts)
+ goto error;
+
+ break;
+
+ case DHCP_STATE_SELECTING:
+ r = client_send_discover(client);
+ if (r < 0 && client->attempt >= client->max_attempts)
+ goto error;
+
+ break;
+
+ case DHCP_STATE_INIT_REBOOT:
+ case DHCP_STATE_REQUESTING:
+ case DHCP_STATE_RENEWING:
+ case DHCP_STATE_REBINDING:
+ r = client_send_request(client);
+ if (r < 0 && client->attempt >= client->max_attempts)
+ goto error;
+
+ if (client->state == DHCP_STATE_INIT_REBOOT)
+ client->state = DHCP_STATE_REBOOTING;
+
+ client->request_sent = time_now;
+
+ break;
+
+ case DHCP_STATE_REBOOTING:
+ case DHCP_STATE_BOUND:
+
+ break;
+
+ case DHCP_STATE_STOPPED:
+ r = -EINVAL;
+ goto error;
+ }
+
+ return 0;
+
+error:
+ client_stop(client, r);
+
+ /* Errors were dealt with when stopping the client, don't spill
+ errors into the event loop handler */
+ return 0;
+}
+
+static int client_initialize_io_events(
+ sd_dhcp_client *client,
+ sd_event_io_handler_t io_callback) {
+
+ int r;
+
+ assert(client);
+ assert(client->event);
+
+ r = sd_event_add_io(client->event, &client->receive_message,
+ client->fd, EPOLLIN, io_callback,
+ client);
+ if (r < 0)
+ goto error;
+
+ r = sd_event_source_set_priority(client->receive_message,
+ client->event_priority);
+ if (r < 0)
+ goto error;
+
+ r = sd_event_source_set_description(client->receive_message, "dhcp4-receive-message");
+ if (r < 0)
+ goto error;
+
+error:
+ if (r < 0)
+ client_stop(client, r);
+
+ return 0;
+}
+
+static int client_initialize_time_events(sd_dhcp_client *client) {
+ uint64_t usec = 0;
+ int r;
+
+ assert(client);
+ assert(client->event);
+
+ if (client->start_delay > 0) {
+ assert_se(sd_event_now(client->event, clock_boottime_or_monotonic(), &usec) >= 0);
+ usec += client->start_delay;
+ }
+
+ r = event_reset_time(client->event, &client->timeout_resend,
+ clock_boottime_or_monotonic(),
+ usec, 0,
+ client_timeout_resend, client,
+ client->event_priority, "dhcp4-resend-timer", true);
+ if (r < 0)
+ client_stop(client, r);
+
+ return 0;
+
+}
+
+static int client_initialize_events(sd_dhcp_client *client, sd_event_io_handler_t io_callback) {
+ client_initialize_io_events(client, io_callback);
+ client_initialize_time_events(client);
+
+ return 0;
+}
+
+static int client_start_delayed(sd_dhcp_client *client) {
+ int r;
+
+ assert_return(client, -EINVAL);
+ assert_return(client->event, -EINVAL);
+ assert_return(client->ifindex > 0, -EINVAL);
+ assert_return(client->fd < 0, -EBUSY);
+ assert_return(client->xid == 0, -EINVAL);
+ assert_return(IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_INIT_REBOOT), -EBUSY);
+
+ client->xid = random_u32();
+
+ r = dhcp_network_bind_raw_socket(client->ifindex, &client->link, client->xid,
+ client->mac_addr, client->mac_addr_len,
+ client->bcast_addr, client->bcast_addr_len,
+ client->arp_type, client->port);
+ if (r < 0) {
+ client_stop(client, r);
+ return r;
+ }
+ client->fd = r;
+
+ if (IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_INIT_REBOOT))
+ client->start_time = now(clock_boottime_or_monotonic());
+
+ return client_initialize_events(client, client_receive_message_raw);
+}
+
+static int client_start(sd_dhcp_client *client) {
+ client->start_delay = 0;
+ return client_start_delayed(client);
+}
+
+static int client_timeout_expire(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_dhcp_client *client = userdata;
+ DHCP_CLIENT_DONT_DESTROY(client);
+
+ log_dhcp_client(client, "EXPIRED");
+
+ client_notify(client, SD_DHCP_CLIENT_EVENT_EXPIRED);
+
+ /* lease was lost, start over if not freed or stopped in callback */
+ if (client->state != DHCP_STATE_STOPPED) {
+ client_initialize(client);
+ client_start(client);
+ }
+
+ return 0;
+}
+
+static int client_timeout_t2(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_dhcp_client *client = userdata;
+ DHCP_CLIENT_DONT_DESTROY(client);
+ int r;
+
+ assert(client);
+
+ client->receive_message = sd_event_source_unref(client->receive_message);
+ client->fd = safe_close(client->fd);
+
+ client->state = DHCP_STATE_REBINDING;
+ client->attempt = 0;
+
+ r = dhcp_network_bind_raw_socket(client->ifindex, &client->link, client->xid,
+ client->mac_addr, client->mac_addr_len,
+ client->bcast_addr, client->bcast_addr_len,
+ client->arp_type, client->port);
+ if (r < 0) {
+ client_stop(client, r);
+ return 0;
+ }
+ client->fd = r;
+
+ return client_initialize_events(client, client_receive_message_raw);
+}
+
+static int client_timeout_t1(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_dhcp_client *client = userdata;
+ DHCP_CLIENT_DONT_DESTROY(client);
+
+ if (client->lease)
+ client->state = DHCP_STATE_RENEWING;
+ else if (client->state != DHCP_STATE_INIT)
+ client->state = DHCP_STATE_INIT_REBOOT;
+ client->attempt = 0;
+
+ return client_initialize_time_events(client);
+}
+
+static int client_handle_offer(sd_dhcp_client *client, DHCPMessage *offer, size_t len) {
+ _cleanup_(sd_dhcp_lease_unrefp) sd_dhcp_lease *lease = NULL;
+ int r;
+
+ r = dhcp_lease_new(&lease);
+ if (r < 0)
+ return r;
+
+ if (client->client_id_len) {
+ r = dhcp_lease_set_client_id(lease,
+ (uint8_t *) &client->client_id,
+ client->client_id_len);
+ if (r < 0)
+ return r;
+ }
+
+ r = dhcp_option_parse(offer, len, dhcp_lease_parse_options, lease, NULL);
+ if (r != DHCP_OFFER) {
+ log_dhcp_client(client, "received message was not an OFFER, ignoring");
+ return -ENOMSG;
+ }
+
+ lease->next_server = offer->siaddr;
+ lease->address = offer->yiaddr;
+
+ if (lease->lifetime == 0 && client->fallback_lease_lifetime > 0)
+ lease->lifetime = client->fallback_lease_lifetime;
+
+ if (lease->address == 0 ||
+ lease->server_address == 0 ||
+ lease->lifetime == 0) {
+ log_dhcp_client(client, "received lease lacks address, server address or lease lifetime, ignoring");
+ return -ENOMSG;
+ }
+
+ if (!lease->have_subnet_mask) {
+ r = dhcp_lease_set_default_subnet_mask(lease);
+ if (r < 0) {
+ log_dhcp_client(client,
+ "received lease lacks subnet mask, "
+ "and a fallback one cannot be generated, ignoring");
+ return -ENOMSG;
+ }
+ }
+
+ sd_dhcp_lease_unref(client->lease);
+ client->lease = TAKE_PTR(lease);
+
+ if (client_notify(client, SD_DHCP_CLIENT_EVENT_SELECTING) < 0)
+ return -ENOMSG;
+
+ log_dhcp_client(client, "OFFER");
+
+ return 0;
+}
+
+static int client_handle_forcerenew(sd_dhcp_client *client, DHCPMessage *force, size_t len) {
+ int r;
+
+ r = dhcp_option_parse(force, len, NULL, NULL, NULL);
+ if (r != DHCP_FORCERENEW)
+ return -ENOMSG;
+
+ log_dhcp_client(client, "FORCERENEW");
+
+ return 0;
+}
+
+static bool lease_equal(const sd_dhcp_lease *a, const sd_dhcp_lease *b) {
+ if (a->address != b->address)
+ return false;
+
+ if (a->subnet_mask != b->subnet_mask)
+ return false;
+
+ if (a->router_size != b->router_size)
+ return false;
+
+ for (size_t i = 0; i < a->router_size; i++)
+ if (a->router[i].s_addr != b->router[i].s_addr)
+ return false;
+
+ return true;
+}
+
+static int client_handle_ack(sd_dhcp_client *client, DHCPMessage *ack, size_t len) {
+ _cleanup_(sd_dhcp_lease_unrefp) sd_dhcp_lease *lease = NULL;
+ _cleanup_free_ char *error_message = NULL;
+ int r;
+
+ r = dhcp_lease_new(&lease);
+ if (r < 0)
+ return r;
+
+ if (client->client_id_len) {
+ r = dhcp_lease_set_client_id(lease,
+ (uint8_t *) &client->client_id,
+ client->client_id_len);
+ if (r < 0)
+ return r;
+ }
+
+ r = dhcp_option_parse(ack, len, dhcp_lease_parse_options, lease, &error_message);
+ if (r == DHCP_NAK) {
+ log_dhcp_client(client, "NAK: %s", strna(error_message));
+ return -EADDRNOTAVAIL;
+ }
+
+ if (r != DHCP_ACK) {
+ log_dhcp_client(client, "received message was not an ACK, ignoring");
+ return -ENOMSG;
+ }
+
+ lease->next_server = ack->siaddr;
+
+ lease->address = ack->yiaddr;
+
+ if (lease->address == INADDR_ANY ||
+ lease->server_address == INADDR_ANY ||
+ lease->lifetime == 0) {
+ log_dhcp_client(client, "received lease lacks address, server "
+ "address or lease lifetime, ignoring");
+ return -ENOMSG;
+ }
+
+ if (lease->subnet_mask == INADDR_ANY) {
+ r = dhcp_lease_set_default_subnet_mask(lease);
+ if (r < 0) {
+ log_dhcp_client(client,
+ "received lease lacks subnet mask, "
+ "and a fallback one cannot be generated, ignoring");
+ return -ENOMSG;
+ }
+ }
+
+ r = SD_DHCP_CLIENT_EVENT_IP_ACQUIRE;
+ if (client->lease) {
+ if (lease_equal(client->lease, lease))
+ r = SD_DHCP_CLIENT_EVENT_RENEW;
+ else
+ r = SD_DHCP_CLIENT_EVENT_IP_CHANGE;
+
+ client->lease = sd_dhcp_lease_unref(client->lease);
+ }
+
+ client->lease = TAKE_PTR(lease);
+
+ log_dhcp_client(client, "ACK");
+
+ return r;
+}
+
+static uint64_t client_compute_timeout(sd_dhcp_client *client, uint32_t lifetime, double factor) {
+ assert(client);
+ assert(client->request_sent);
+ assert(lifetime > 0);
+
+ if (lifetime > 3)
+ lifetime -= 3;
+ else
+ lifetime = 0;
+
+ return client->request_sent + (lifetime * USEC_PER_SEC * factor) +
+ + (random_u32() & 0x1fffff);
+}
+
+static int client_set_lease_timeouts(sd_dhcp_client *client) {
+ usec_t time_now;
+ uint64_t lifetime_timeout;
+ uint64_t t2_timeout;
+ uint64_t t1_timeout;
+ char time_string[FORMAT_TIMESPAN_MAX];
+ int r;
+
+ assert(client);
+ assert(client->event);
+ assert(client->lease);
+ assert(client->lease->lifetime);
+
+ /* don't set timers for infinite leases */
+ if (client->lease->lifetime == 0xffffffff) {
+ (void) event_source_disable(client->timeout_t1);
+ (void) event_source_disable(client->timeout_t2);
+ (void) event_source_disable(client->timeout_expire);
+
+ return 0;
+ }
+
+ r = sd_event_now(client->event, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return r;
+ assert(client->request_sent <= time_now);
+
+ /* convert the various timeouts from relative (secs) to absolute (usecs) */
+ lifetime_timeout = client_compute_timeout(client, client->lease->lifetime, 1);
+ if (client->lease->t1 > 0 && client->lease->t2 > 0) {
+ /* both T1 and T2 are given */
+ if (client->lease->t1 < client->lease->t2 &&
+ client->lease->t2 < client->lease->lifetime) {
+ /* they are both valid */
+ t2_timeout = client_compute_timeout(client, client->lease->t2, 1);
+ t1_timeout = client_compute_timeout(client, client->lease->t1, 1);
+ } else {
+ /* discard both */
+ t2_timeout = client_compute_timeout(client, client->lease->lifetime, 7.0 / 8.0);
+ client->lease->t2 = (client->lease->lifetime * 7) / 8;
+ t1_timeout = client_compute_timeout(client, client->lease->lifetime, 0.5);
+ client->lease->t1 = client->lease->lifetime / 2;
+ }
+ } else if (client->lease->t2 > 0 && client->lease->t2 < client->lease->lifetime) {
+ /* only T2 is given, and it is valid */
+ t2_timeout = client_compute_timeout(client, client->lease->t2, 1);
+ t1_timeout = client_compute_timeout(client, client->lease->lifetime, 0.5);
+ client->lease->t1 = client->lease->lifetime / 2;
+ if (t2_timeout <= t1_timeout) {
+ /* the computed T1 would be invalid, so discard T2 */
+ t2_timeout = client_compute_timeout(client, client->lease->lifetime, 7.0 / 8.0);
+ client->lease->t2 = (client->lease->lifetime * 7) / 8;
+ }
+ } else if (client->lease->t1 > 0 && client->lease->t1 < client->lease->lifetime) {
+ /* only T1 is given, and it is valid */
+ t1_timeout = client_compute_timeout(client, client->lease->t1, 1);
+ t2_timeout = client_compute_timeout(client, client->lease->lifetime, 7.0 / 8.0);
+ client->lease->t2 = (client->lease->lifetime * 7) / 8;
+ if (t2_timeout <= t1_timeout) {
+ /* the computed T2 would be invalid, so discard T1 */
+ t2_timeout = client_compute_timeout(client, client->lease->lifetime, 0.5);
+ client->lease->t2 = client->lease->lifetime / 2;
+ }
+ } else {
+ /* fall back to the default timeouts */
+ t1_timeout = client_compute_timeout(client, client->lease->lifetime, 0.5);
+ client->lease->t1 = client->lease->lifetime / 2;
+ t2_timeout = client_compute_timeout(client, client->lease->lifetime, 7.0 / 8.0);
+ client->lease->t2 = (client->lease->lifetime * 7) / 8;
+ }
+
+ /* arm lifetime timeout */
+ r = event_reset_time(client->event, &client->timeout_expire,
+ clock_boottime_or_monotonic(),
+ lifetime_timeout, 10 * USEC_PER_MSEC,
+ client_timeout_expire, client,
+ client->event_priority, "dhcp4-lifetime", true);
+ if (r < 0)
+ return r;
+
+ log_dhcp_client(client, "lease expires in %s",
+ format_timespan(time_string, FORMAT_TIMESPAN_MAX, lifetime_timeout - time_now, USEC_PER_SEC));
+
+ /* don't arm earlier timeouts if this has already expired */
+ if (lifetime_timeout <= time_now)
+ return 0;
+
+ /* arm T2 timeout */
+ r = event_reset_time(client->event, &client->timeout_t2,
+ clock_boottime_or_monotonic(),
+ t2_timeout, 10 * USEC_PER_MSEC,
+ client_timeout_t2, client,
+ client->event_priority, "dhcp4-t2-timeout", true);
+ if (r < 0)
+ return r;
+
+ log_dhcp_client(client, "T2 expires in %s",
+ format_timespan(time_string, FORMAT_TIMESPAN_MAX, t2_timeout - time_now, USEC_PER_SEC));
+
+ /* don't arm earlier timeout if this has already expired */
+ if (t2_timeout <= time_now)
+ return 0;
+
+ /* arm T1 timeout */
+ r = event_reset_time(client->event, &client->timeout_t1,
+ clock_boottime_or_monotonic(),
+ t1_timeout, 10 * USEC_PER_MSEC,
+ client_timeout_t1, client,
+ client->event_priority, "dhcp4-t1-timer", true);
+ if (r < 0)
+ return r;
+
+ log_dhcp_client(client, "T1 expires in %s",
+ format_timespan(time_string, FORMAT_TIMESPAN_MAX, t1_timeout - time_now, USEC_PER_SEC));
+
+ return 0;
+}
+
+static int client_handle_message(sd_dhcp_client *client, DHCPMessage *message, int len) {
+ DHCP_CLIENT_DONT_DESTROY(client);
+ char time_string[FORMAT_TIMESPAN_MAX];
+ int r = 0, notify_event = 0;
+
+ assert(client);
+ assert(client->event);
+ assert(message);
+
+ switch (client->state) {
+ case DHCP_STATE_SELECTING:
+
+ r = client_handle_offer(client, message, len);
+ if (r >= 0) {
+
+ client->state = DHCP_STATE_REQUESTING;
+ client->attempt = 0;
+
+ r = event_reset_time(client->event, &client->timeout_resend,
+ clock_boottime_or_monotonic(),
+ 0, 0,
+ client_timeout_resend, client,
+ client->event_priority, "dhcp4-resend-timer", true);
+ if (r < 0)
+ goto error;
+ } else if (r == -ENOMSG)
+ /* invalid message, let's ignore it */
+ return 0;
+
+ break;
+
+ case DHCP_STATE_REBOOTING:
+ case DHCP_STATE_REQUESTING:
+ case DHCP_STATE_RENEWING:
+ case DHCP_STATE_REBINDING:
+
+ r = client_handle_ack(client, message, len);
+ if (r >= 0) {
+ client->start_delay = 0;
+ (void) event_source_disable(client->timeout_resend);
+ client->receive_message =
+ sd_event_source_unref(client->receive_message);
+ client->fd = safe_close(client->fd);
+
+ if (IN_SET(client->state, DHCP_STATE_REQUESTING,
+ DHCP_STATE_REBOOTING))
+ notify_event = SD_DHCP_CLIENT_EVENT_IP_ACQUIRE;
+ else if (r != SD_DHCP_CLIENT_EVENT_IP_ACQUIRE)
+ notify_event = r;
+
+ client->state = DHCP_STATE_BOUND;
+ client->attempt = 0;
+
+ client->last_addr = client->lease->address;
+
+ r = client_set_lease_timeouts(client);
+ if (r < 0) {
+ log_dhcp_client(client, "could not set lease timeouts");
+ goto error;
+ }
+
+ r = dhcp_network_bind_udp_socket(client->ifindex, client->lease->address, client->port, client->ip_service_type);
+ if (r < 0) {
+ log_dhcp_client(client, "could not bind UDP socket");
+ goto error;
+ }
+
+ client->fd = r;
+
+ client_initialize_io_events(client, client_receive_message_udp);
+
+ if (notify_event) {
+ client_notify(client, notify_event);
+ if (client->state == DHCP_STATE_STOPPED)
+ return 0;
+ }
+
+ } else if (r == -EADDRNOTAVAIL) {
+ /* got a NAK, let's restart the client */
+ client_notify(client, SD_DHCP_CLIENT_EVENT_EXPIRED);
+
+ r = client_initialize(client);
+ if (r < 0)
+ goto error;
+
+ r = client_start_delayed(client);
+ if (r < 0)
+ goto error;
+
+ log_dhcp_client(client, "REBOOT in %s", format_timespan(time_string, FORMAT_TIMESPAN_MAX,
+ client->start_delay, USEC_PER_SEC));
+
+ client->start_delay = CLAMP(client->start_delay * 2,
+ RESTART_AFTER_NAK_MIN_USEC, RESTART_AFTER_NAK_MAX_USEC);
+
+ return 0;
+ } else if (r == -ENOMSG)
+ /* invalid message, let's ignore it */
+ return 0;
+
+ break;
+
+ case DHCP_STATE_BOUND:
+ r = client_handle_forcerenew(client, message, len);
+ if (r >= 0) {
+ r = client_timeout_t1(NULL, 0, client);
+ if (r < 0)
+ goto error;
+ } else if (r == -ENOMSG)
+ /* invalid message, let's ignore it */
+ return 0;
+
+ break;
+
+ case DHCP_STATE_INIT:
+ case DHCP_STATE_INIT_REBOOT:
+
+ break;
+
+ case DHCP_STATE_STOPPED:
+ r = -EINVAL;
+ goto error;
+ }
+
+error:
+ if (r < 0)
+ client_stop(client, r);
+
+ return r;
+}
+
+static int client_receive_message_udp(
+ sd_event_source *s,
+ int fd,
+ uint32_t revents,
+ void *userdata) {
+
+ sd_dhcp_client *client = userdata;
+ _cleanup_free_ DHCPMessage *message = NULL;
+ const uint8_t *expected_chaddr = NULL;
+ uint8_t expected_hlen = 0;
+ ssize_t len, buflen;
+
+ assert(s);
+ assert(client);
+
+ buflen = next_datagram_size_fd(fd);
+ if (buflen == -ENETDOWN)
+ /* the link is down. Don't return an error or the I/O event
+ source will be disconnected and we won't be able to receive
+ packets again when the link comes back. */
+ return 0;
+ if (buflen < 0)
+ return buflen;
+
+ message = malloc0(buflen);
+ if (!message)
+ return -ENOMEM;
+
+ len = recv(fd, message, buflen, 0);
+ if (len < 0) {
+ /* see comment above for why we shouldn't error out on ENETDOWN. */
+ if (IN_SET(errno, EAGAIN, EINTR, ENETDOWN))
+ return 0;
+
+ return log_dhcp_client_errno(client, errno,
+ "Could not receive message from UDP socket: %m");
+ }
+ if ((size_t) len < sizeof(DHCPMessage)) {
+ log_dhcp_client(client, "Too small to be a DHCP message: ignoring");
+ return 0;
+ }
+
+ if (be32toh(message->magic) != DHCP_MAGIC_COOKIE) {
+ log_dhcp_client(client, "Not a DHCP message: ignoring");
+ return 0;
+ }
+
+ if (message->op != BOOTREPLY) {
+ log_dhcp_client(client, "Not a BOOTREPLY message: ignoring");
+ return 0;
+ }
+
+ if (message->htype != client->arp_type) {
+ log_dhcp_client(client, "Packet type does not match client type");
+ return 0;
+ }
+
+ if (client->arp_type == ARPHRD_ETHER) {
+ expected_hlen = ETH_ALEN;
+ expected_chaddr = &client->mac_addr[0];
+ }
+
+ if (message->hlen != expected_hlen) {
+ log_dhcp_client(client, "Unexpected packet hlen %d", message->hlen);
+ return 0;
+ }
+
+ if (expected_hlen > 0 && memcmp(&message->chaddr[0], expected_chaddr, expected_hlen)) {
+ log_dhcp_client(client, "Received chaddr does not match expected: ignoring");
+ return 0;
+ }
+
+ if (client->state != DHCP_STATE_BOUND &&
+ be32toh(message->xid) != client->xid) {
+ /* in BOUND state, we may receive FORCERENEW with xid set by server,
+ so ignore the xid in this case */
+ log_dhcp_client(client, "Received xid (%u) does not match expected (%u): ignoring",
+ be32toh(message->xid), client->xid);
+ return 0;
+ }
+
+ return client_handle_message(client, message, len);
+}
+
+static int client_receive_message_raw(
+ sd_event_source *s,
+ int fd,
+ uint32_t revents,
+ void *userdata) {
+
+ sd_dhcp_client *client = userdata;
+ _cleanup_free_ DHCPPacket *packet = NULL;
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct tpacket_auxdata))) control;
+ struct iovec iov = {};
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ struct cmsghdr *cmsg;
+ bool checksum = true;
+ ssize_t buflen, len;
+ int r;
+
+ assert(s);
+ assert(client);
+
+ buflen = next_datagram_size_fd(fd);
+ if (buflen == -ENETDOWN)
+ return 0;
+ if (buflen < 0)
+ return buflen;
+
+ packet = malloc0(buflen);
+ if (!packet)
+ return -ENOMEM;
+
+ iov = IOVEC_MAKE(packet, buflen);
+
+ len = recvmsg_safe(fd, &msg, 0);
+ if (IN_SET(len, -EAGAIN, -EINTR, -ENETDOWN))
+ return 0;
+ if (len < 0)
+ return log_dhcp_client_errno(client, len,
+ "Could not receive message from raw socket: %m");
+
+ if ((size_t) len < sizeof(DHCPPacket))
+ return 0;
+
+ cmsg = cmsg_find(&msg, SOL_PACKET, PACKET_AUXDATA, CMSG_LEN(sizeof(struct tpacket_auxdata)));
+ if (cmsg) {
+ struct tpacket_auxdata *aux = (struct tpacket_auxdata*) CMSG_DATA(cmsg);
+ checksum = !(aux->tp_status & TP_STATUS_CSUMNOTREADY);
+ }
+
+ r = dhcp_packet_verify_headers(packet, len, checksum, client->port);
+ if (r < 0)
+ return 0;
+
+ len -= DHCP_IP_UDP_SIZE;
+
+ return client_handle_message(client, &packet->dhcp, len);
+}
+
+int sd_dhcp_client_send_renew(sd_dhcp_client *client) {
+ assert_return(client, -EINVAL);
+ assert_return(client->fd >= 0, -EINVAL);
+
+ if (!client->lease)
+ return 0;
+
+ client->start_delay = 0;
+ client->attempt = 1;
+ client->state = DHCP_STATE_RENEWING;
+
+ return client_initialize_time_events(client);
+}
+
+int sd_dhcp_client_start(sd_dhcp_client *client) {
+ int r;
+
+ assert_return(client, -EINVAL);
+
+ r = client_initialize(client);
+ if (r < 0)
+ return r;
+
+ /* RFC7844 section 3.3:
+ SHOULD perform a complete four-way handshake, starting with a
+ DHCPDISCOVER, to obtain a new address lease. If the client can
+ ascertain that this is exactly the same network to which it was
+ previously connected, and if the link-layer address did not change,
+ the client MAY issue a DHCPREQUEST to try to reclaim the current
+ address. */
+ if (client->last_addr && !client->anonymize)
+ client->state = DHCP_STATE_INIT_REBOOT;
+
+ r = client_start(client);
+ if (r >= 0)
+ log_dhcp_client(client, "STARTED on ifindex %i", client->ifindex);
+
+ return r;
+}
+
+int sd_dhcp_client_send_release(sd_dhcp_client *client) {
+ assert_return(client, -EINVAL);
+ assert_return(client->state != DHCP_STATE_STOPPED, -ESTALE);
+ assert_return(client->lease, -EUNATCH);
+
+ _cleanup_free_ DHCPPacket *release = NULL;
+ size_t optoffset, optlen;
+ int r;
+
+ r = client_message_init(client, &release, DHCP_RELEASE, &optlen, &optoffset);
+ if (r < 0)
+ return r;
+
+ /* Fill up release IP and MAC */
+ release->dhcp.ciaddr = client->lease->address;
+ memcpy(&release->dhcp.chaddr, &client->mac_addr, client->mac_addr_len);
+
+ r = dhcp_option_append(&release->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_END, 0, NULL);
+ if (r < 0)
+ return r;
+
+ r = dhcp_network_send_udp_socket(client->fd,
+ client->lease->server_address,
+ DHCP_PORT_SERVER,
+ &release->dhcp,
+ sizeof(DHCPMessage) + optoffset);
+ if (r < 0)
+ return r;
+
+ log_dhcp_client(client, "RELEASE");
+
+ return 0;
+}
+
+int sd_dhcp_client_send_decline(sd_dhcp_client *client) {
+ assert_return(client, -EINVAL);
+ assert_return(client->state != DHCP_STATE_STOPPED, -ESTALE);
+ assert_return(client->lease, -EUNATCH);
+
+ _cleanup_free_ DHCPPacket *release = NULL;
+ size_t optoffset, optlen;
+ int r;
+
+ r = client_message_init(client, &release, DHCP_DECLINE, &optlen, &optoffset);
+ if (r < 0)
+ return r;
+
+ release->dhcp.ciaddr = client->lease->address;
+ memcpy(&release->dhcp.chaddr, &client->mac_addr, client->mac_addr_len);
+
+ r = dhcp_option_append(&release->dhcp, optlen, &optoffset, 0,
+ SD_DHCP_OPTION_END, 0, NULL);
+ if (r < 0)
+ return r;
+
+ r = dhcp_network_send_udp_socket(client->fd,
+ client->lease->server_address,
+ DHCP_PORT_SERVER,
+ &release->dhcp,
+ sizeof(DHCPMessage) + optoffset);
+ if (r < 0)
+ return r;
+
+ log_dhcp_client(client, "DECLINE");
+
+ client_stop(client, SD_DHCP_CLIENT_EVENT_STOP);
+
+ if (client->state != DHCP_STATE_STOPPED) {
+ r = sd_dhcp_client_start(client);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int sd_dhcp_client_stop(sd_dhcp_client *client) {
+ if (!client)
+ return 0;
+
+ DHCP_CLIENT_DONT_DESTROY(client);
+
+ client_stop(client, SD_DHCP_CLIENT_EVENT_STOP);
+ client->state = DHCP_STATE_STOPPED;
+
+ return 0;
+}
+
+int sd_dhcp_client_attach_event(sd_dhcp_client *client, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(client, -EINVAL);
+ assert_return(!client->event, -EBUSY);
+
+ if (event)
+ client->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&client->event);
+ if (r < 0)
+ return 0;
+ }
+
+ client->event_priority = priority;
+
+ return 0;
+}
+
+int sd_dhcp_client_detach_event(sd_dhcp_client *client) {
+ assert_return(client, -EINVAL);
+
+ client->event = sd_event_unref(client->event);
+
+ return 0;
+}
+
+sd_event *sd_dhcp_client_get_event(sd_dhcp_client *client) {
+ assert_return(client, NULL);
+
+ return client->event;
+}
+
+static sd_dhcp_client *dhcp_client_free(sd_dhcp_client *client) {
+ if (!client)
+ return NULL;
+
+ log_dhcp_client(client, "FREE");
+
+ client->timeout_resend = sd_event_source_unref(client->timeout_resend);
+ client->timeout_t1 = sd_event_source_unref(client->timeout_t1);
+ client->timeout_t2 = sd_event_source_unref(client->timeout_t2);
+ client->timeout_expire = sd_event_source_unref(client->timeout_expire);
+
+ client_initialize(client);
+
+ sd_dhcp_client_detach_event(client);
+
+ sd_dhcp_lease_unref(client->lease);
+
+ set_free(client->req_opts);
+ free(client->hostname);
+ free(client->vendor_class_identifier);
+ free(client->mudurl);
+ client->user_class = strv_free(client->user_class);
+ ordered_hashmap_free(client->extra_options);
+ ordered_hashmap_free(client->vendor_options);
+ return mfree(client);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_dhcp_client, sd_dhcp_client, dhcp_client_free);
+
+int sd_dhcp_client_new(sd_dhcp_client **ret, int anonymize) {
+ const uint8_t *opts;
+ size_t n_opts;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ _cleanup_(sd_dhcp_client_unrefp) sd_dhcp_client *client = new(sd_dhcp_client, 1);
+ if (!client)
+ return -ENOMEM;
+
+ *client = (sd_dhcp_client) {
+ .n_ref = 1,
+ .state = DHCP_STATE_INIT,
+ .ifindex = -1,
+ .fd = -1,
+ .mtu = DHCP_DEFAULT_MIN_SIZE,
+ .port = DHCP_PORT_CLIENT,
+ .anonymize = !!anonymize,
+ .max_attempts = (uint64_t) -1,
+ .ip_service_type = -1,
+ };
+ /* NOTE: this could be moved to a function. */
+ if (anonymize) {
+ n_opts = ELEMENTSOF(default_req_opts_anonymize);
+ opts = default_req_opts_anonymize;
+ } else {
+ n_opts = ELEMENTSOF(default_req_opts);
+ opts = default_req_opts;
+ }
+
+ for (size_t i = 0; i < n_opts; i++) {
+ r = sd_dhcp_client_set_request_option(client, opts[i]);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(client);
+
+ return 0;
+}
diff --git a/src/libsystemd-network/sd-dhcp-lease.c b/src/libsystemd-network/sd-dhcp-lease.c
new file mode 100644
index 0000000..8a138ff
--- /dev/null
+++ b/src/libsystemd-network/sd-dhcp-lease.c
@@ -0,0 +1,1399 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-dhcp-lease.h"
+
+#include "alloc-util.h"
+#include "dhcp-lease-internal.h"
+#include "dhcp-protocol.h"
+#include "dns-domain.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
+#include "network-internal.h"
+#include "parse-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "unaligned.h"
+
+int sd_dhcp_lease_get_address(sd_dhcp_lease *lease, struct in_addr *addr) {
+ assert_return(lease, -EINVAL);
+ assert_return(addr, -EINVAL);
+
+ if (lease->address == 0)
+ return -ENODATA;
+
+ addr->s_addr = lease->address;
+ return 0;
+}
+
+int sd_dhcp_lease_get_broadcast(sd_dhcp_lease *lease, struct in_addr *addr) {
+ assert_return(lease, -EINVAL);
+ assert_return(addr, -EINVAL);
+
+ if (!lease->have_broadcast)
+ return -ENODATA;
+
+ addr->s_addr = lease->broadcast;
+ return 0;
+}
+
+int sd_dhcp_lease_get_lifetime(sd_dhcp_lease *lease, uint32_t *lifetime) {
+ assert_return(lease, -EINVAL);
+ assert_return(lifetime, -EINVAL);
+
+ if (lease->lifetime <= 0)
+ return -ENODATA;
+
+ *lifetime = lease->lifetime;
+ return 0;
+}
+
+int sd_dhcp_lease_get_t1(sd_dhcp_lease *lease, uint32_t *t1) {
+ assert_return(lease, -EINVAL);
+ assert_return(t1, -EINVAL);
+
+ if (lease->t1 <= 0)
+ return -ENODATA;
+
+ *t1 = lease->t1;
+ return 0;
+}
+
+int sd_dhcp_lease_get_t2(sd_dhcp_lease *lease, uint32_t *t2) {
+ assert_return(lease, -EINVAL);
+ assert_return(t2, -EINVAL);
+
+ if (lease->t2 <= 0)
+ return -ENODATA;
+
+ *t2 = lease->t2;
+ return 0;
+}
+
+int sd_dhcp_lease_get_mtu(sd_dhcp_lease *lease, uint16_t *mtu) {
+ assert_return(lease, -EINVAL);
+ assert_return(mtu, -EINVAL);
+
+ if (lease->mtu <= 0)
+ return -ENODATA;
+
+ *mtu = lease->mtu;
+ return 0;
+}
+
+int sd_dhcp_lease_get_servers(
+ sd_dhcp_lease *lease,
+ sd_dhcp_lease_server_type what,
+ const struct in_addr **addr) {
+
+ assert_return(lease, -EINVAL);
+ assert_return(what >= 0, -EINVAL);
+ assert_return(what < _SD_DHCP_LEASE_SERVER_TYPE_MAX, -EINVAL);
+ assert_return(addr, -EINVAL);
+
+ if (lease->servers[what].size <= 0)
+ return -ENODATA;
+
+ *addr = lease->servers[what].addr;
+ return (int) lease->servers[what].size;
+}
+
+int sd_dhcp_lease_get_dns(sd_dhcp_lease *lease, const struct in_addr **addr) {
+ return sd_dhcp_lease_get_servers(lease, SD_DHCP_LEASE_DNS, addr);
+}
+int sd_dhcp_lease_get_ntp(sd_dhcp_lease *lease, const struct in_addr **addr) {
+ return sd_dhcp_lease_get_servers(lease, SD_DHCP_LEASE_NTP, addr);
+}
+int sd_dhcp_lease_get_sip(sd_dhcp_lease *lease, const struct in_addr **addr) {
+ return sd_dhcp_lease_get_servers(lease, SD_DHCP_LEASE_SIP, addr);
+}
+int sd_dhcp_lease_get_pop3(sd_dhcp_lease *lease, const struct in_addr **addr) {
+ return sd_dhcp_lease_get_servers(lease, SD_DHCP_LEASE_POP3, addr);
+}
+int sd_dhcp_lease_get_smtp(sd_dhcp_lease *lease, const struct in_addr **addr) {
+ return sd_dhcp_lease_get_servers(lease, SD_DHCP_LEASE_SMTP, addr);
+}
+int sd_dhcp_lease_get_lpr(sd_dhcp_lease *lease, const struct in_addr **addr) {
+ return sd_dhcp_lease_get_servers(lease, SD_DHCP_LEASE_LPR, addr);
+}
+
+int sd_dhcp_lease_get_domainname(sd_dhcp_lease *lease, const char **domainname) {
+ assert_return(lease, -EINVAL);
+ assert_return(domainname, -EINVAL);
+
+ if (!lease->domainname)
+ return -ENODATA;
+
+ *domainname = lease->domainname;
+ return 0;
+}
+
+int sd_dhcp_lease_get_hostname(sd_dhcp_lease *lease, const char **hostname) {
+ assert_return(lease, -EINVAL);
+ assert_return(hostname, -EINVAL);
+
+ if (!lease->hostname)
+ return -ENODATA;
+
+ *hostname = lease->hostname;
+ return 0;
+}
+
+int sd_dhcp_lease_get_root_path(sd_dhcp_lease *lease, const char **root_path) {
+ assert_return(lease, -EINVAL);
+ assert_return(root_path, -EINVAL);
+
+ if (!lease->root_path)
+ return -ENODATA;
+
+ *root_path = lease->root_path;
+ return 0;
+}
+
+int sd_dhcp_lease_get_router(sd_dhcp_lease *lease, const struct in_addr **addr) {
+ assert_return(lease, -EINVAL);
+ assert_return(addr, -EINVAL);
+
+ if (lease->router_size <= 0)
+ return -ENODATA;
+
+ *addr = lease->router;
+ return (int) lease->router_size;
+}
+
+int sd_dhcp_lease_get_netmask(sd_dhcp_lease *lease, struct in_addr *addr) {
+ assert_return(lease, -EINVAL);
+ assert_return(addr, -EINVAL);
+
+ if (!lease->have_subnet_mask)
+ return -ENODATA;
+
+ addr->s_addr = lease->subnet_mask;
+ return 0;
+}
+
+int sd_dhcp_lease_get_server_identifier(sd_dhcp_lease *lease, struct in_addr *addr) {
+ assert_return(lease, -EINVAL);
+ assert_return(addr, -EINVAL);
+
+ if (lease->server_address == 0)
+ return -ENODATA;
+
+ addr->s_addr = lease->server_address;
+ return 0;
+}
+
+int sd_dhcp_lease_get_next_server(sd_dhcp_lease *lease, struct in_addr *addr) {
+ assert_return(lease, -EINVAL);
+ assert_return(addr, -EINVAL);
+
+ if (lease->next_server == 0)
+ return -ENODATA;
+
+ addr->s_addr = lease->next_server;
+ return 0;
+}
+
+/*
+ * The returned routes array must be freed by the caller.
+ * Route objects have the same lifetime of the lease and must not be freed.
+ */
+int sd_dhcp_lease_get_routes(sd_dhcp_lease *lease, sd_dhcp_route ***routes) {
+ sd_dhcp_route **ret;
+ unsigned i;
+
+ assert_return(lease, -EINVAL);
+ assert_return(routes, -EINVAL);
+
+ if (lease->static_route_size <= 0)
+ return -ENODATA;
+
+ ret = new(sd_dhcp_route *, lease->static_route_size);
+ if (!ret)
+ return -ENOMEM;
+
+ for (i = 0; i < lease->static_route_size; i++)
+ ret[i] = &lease->static_route[i];
+
+ *routes = ret;
+ return (int) lease->static_route_size;
+}
+
+int sd_dhcp_lease_get_search_domains(sd_dhcp_lease *lease, char ***domains) {
+ size_t r;
+
+ assert_return(lease, -EINVAL);
+ assert_return(domains, -EINVAL);
+
+ r = strv_length(lease->search_domains);
+ if (r > 0) {
+ *domains = lease->search_domains;
+ return (int) r;
+ }
+
+ return -ENODATA;
+}
+
+int sd_dhcp_lease_get_vendor_specific(sd_dhcp_lease *lease, const void **data, size_t *data_len) {
+ assert_return(lease, -EINVAL);
+ assert_return(data, -EINVAL);
+ assert_return(data_len, -EINVAL);
+
+ if (lease->vendor_specific_len <= 0)
+ return -ENODATA;
+
+ *data = lease->vendor_specific;
+ *data_len = lease->vendor_specific_len;
+ return 0;
+}
+
+static sd_dhcp_lease *dhcp_lease_free(sd_dhcp_lease *lease) {
+ assert(lease);
+
+ while (lease->private_options) {
+ struct sd_dhcp_raw_option *option = lease->private_options;
+
+ LIST_REMOVE(options, lease->private_options, option);
+
+ free(option->data);
+ free(option);
+ }
+
+ free(lease->root_path);
+ free(lease->router);
+ free(lease->timezone);
+ free(lease->hostname);
+ free(lease->domainname);
+
+ for (sd_dhcp_lease_server_type i = 0; i < _SD_DHCP_LEASE_SERVER_TYPE_MAX; i++)
+ free(lease->servers[i].addr);
+
+ free(lease->static_route);
+ free(lease->client_id);
+ free(lease->vendor_specific);
+ strv_free(lease->search_domains);
+ return mfree(lease);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_dhcp_lease, sd_dhcp_lease, dhcp_lease_free);
+
+static int lease_parse_u32(const uint8_t *option, size_t len, uint32_t *ret, uint32_t min) {
+ assert(option);
+ assert(ret);
+
+ if (len != 4)
+ return -EINVAL;
+
+ *ret = unaligned_read_be32((be32_t*) option);
+ if (*ret < min)
+ *ret = min;
+
+ return 0;
+}
+
+static int lease_parse_u16(const uint8_t *option, size_t len, uint16_t *ret, uint16_t min) {
+ assert(option);
+ assert(ret);
+
+ if (len != 2)
+ return -EINVAL;
+
+ *ret = unaligned_read_be16((be16_t*) option);
+ if (*ret < min)
+ *ret = min;
+
+ return 0;
+}
+
+static int lease_parse_be32(const uint8_t *option, size_t len, be32_t *ret) {
+ assert(option);
+ assert(ret);
+
+ if (len != 4)
+ return -EINVAL;
+
+ memcpy(ret, option, 4);
+ return 0;
+}
+
+static int lease_parse_string(const uint8_t *option, size_t len, char **ret) {
+ assert(option);
+ assert(ret);
+
+ if (len <= 0)
+ *ret = mfree(*ret);
+ else {
+ char *string;
+
+ /*
+ * One trailing NUL byte is OK, we don't mind. See:
+ * https://github.com/systemd/systemd/issues/1337
+ */
+ if (memchr(option, 0, len - 1))
+ return -EINVAL;
+
+ string = memdup_suffix0((const char *) option, len);
+ if (!string)
+ return -ENOMEM;
+
+ free_and_replace(*ret, string);
+ }
+
+ return 0;
+}
+
+static int lease_parse_domain(const uint8_t *option, size_t len, char **ret) {
+ _cleanup_free_ char *name = NULL, *normalized = NULL;
+ int r;
+
+ assert(option);
+ assert(ret);
+
+ r = lease_parse_string(option, len, &name);
+ if (r < 0)
+ return r;
+ if (!name) {
+ *ret = mfree(*ret);
+ return 0;
+ }
+
+ r = dns_name_normalize(name, 0, &normalized);
+ if (r < 0)
+ return r;
+
+ if (is_localhost(normalized))
+ return -EINVAL;
+
+ if (dns_name_is_root(normalized))
+ return -EINVAL;
+
+ free_and_replace(*ret, normalized);
+
+ return 0;
+}
+
+static int lease_parse_in_addrs(const uint8_t *option, size_t len, struct in_addr **ret, size_t *n_ret) {
+ assert(option || len == 0);
+ assert(ret);
+ assert(n_ret);
+
+ if (len <= 0) {
+ *ret = mfree(*ret);
+ *n_ret = 0;
+ } else {
+ size_t n_addresses;
+ struct in_addr *addresses;
+
+ if (len % 4 != 0)
+ return -EINVAL;
+
+ n_addresses = len / 4;
+
+ addresses = newdup(struct in_addr, option, n_addresses);
+ if (!addresses)
+ return -ENOMEM;
+
+ free(*ret);
+ *ret = addresses;
+ *n_ret = n_addresses;
+ }
+
+ return 0;
+}
+
+static int lease_parse_sip_server(const uint8_t *option, size_t len, struct in_addr **ret, size_t *n_ret) {
+ assert(option || len == 0);
+ assert(ret);
+ assert(n_ret);
+
+ if (len <= 0)
+ return -EINVAL;
+
+ /* The SIP record is like the other, regular server records, but prefixed with a single "encoding"
+ * byte that is either 0 or 1. We only support it to be 1 for now. Let's drop it and parse it like
+ * the other fields */
+
+ if (option[0] != 1) { /* We only support IP address encoding for now */
+ *ret = mfree(*ret);
+ *n_ret = 0;
+ return 0;
+ }
+
+ return lease_parse_in_addrs(option + 1, len - 1, ret, n_ret);
+}
+
+static int lease_parse_routes(
+ const uint8_t *option, size_t len,
+ struct sd_dhcp_route **routes, size_t *routes_size, size_t *routes_allocated) {
+
+ struct in_addr addr;
+
+ assert(option || len <= 0);
+ assert(routes);
+ assert(routes_size);
+ assert(routes_allocated);
+
+ if (len <= 0)
+ return 0;
+
+ if (len % 8 != 0)
+ return -EINVAL;
+
+ if (!GREEDY_REALLOC(*routes, *routes_allocated, *routes_size + (len / 8)))
+ return -ENOMEM;
+
+ while (len >= 8) {
+ struct sd_dhcp_route *route = *routes + *routes_size;
+ int r;
+
+ route->option = SD_DHCP_OPTION_STATIC_ROUTE;
+ r = in4_addr_default_prefixlen((struct in_addr*) option, &route->dst_prefixlen);
+ if (r < 0) {
+ log_debug("Failed to determine destination prefix length from class based IP, ignoring");
+ continue;
+ }
+
+ assert_se(lease_parse_be32(option, 4, &addr.s_addr) >= 0);
+ route->dst_addr = inet_makeaddr(inet_netof(addr), 0);
+ option += 4;
+
+ assert_se(lease_parse_be32(option, 4, &route->gw_addr.s_addr) >= 0);
+ option += 4;
+
+ len -= 8;
+ (*routes_size)++;
+ }
+
+ return 0;
+}
+
+/* parses RFC3442 Classless Static Route Option */
+static int lease_parse_classless_routes(
+ const uint8_t *option, size_t len,
+ struct sd_dhcp_route **routes, size_t *routes_size, size_t *routes_allocated) {
+
+ assert(option || len <= 0);
+ assert(routes);
+ assert(routes_size);
+ assert(routes_allocated);
+
+ if (len <= 0)
+ return 0;
+
+ /* option format: (subnet-mask-width significant-subnet-octets gateway-ip)* */
+
+ while (len > 0) {
+ uint8_t dst_octets;
+ struct sd_dhcp_route *route;
+
+ if (!GREEDY_REALLOC(*routes, *routes_allocated, *routes_size + 1))
+ return -ENOMEM;
+
+ route = *routes + *routes_size;
+ route->option = SD_DHCP_OPTION_CLASSLESS_STATIC_ROUTE;
+
+ dst_octets = (*option == 0 ? 0 : ((*option - 1) / 8) + 1);
+ route->dst_prefixlen = *option;
+ option++;
+ len--;
+
+ /* can't have more than 4 octets in IPv4 */
+ if (dst_octets > 4 || len < dst_octets)
+ return -EINVAL;
+
+ route->dst_addr.s_addr = 0;
+ memcpy(&route->dst_addr.s_addr, option, dst_octets);
+ option += dst_octets;
+ len -= dst_octets;
+
+ if (len < 4)
+ return -EINVAL;
+
+ assert_se(lease_parse_be32(option, 4, &route->gw_addr.s_addr) >= 0);
+ option += 4;
+ len -= 4;
+
+ (*routes_size)++;
+ }
+
+ return 0;
+}
+
+int dhcp_lease_parse_options(uint8_t code, uint8_t len, const void *option, void *userdata) {
+ sd_dhcp_lease *lease = userdata;
+ int r;
+
+ assert(lease);
+
+ switch(code) {
+
+ case SD_DHCP_OPTION_IP_ADDRESS_LEASE_TIME:
+ r = lease_parse_u32(option, len, &lease->lifetime, 1);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse lease time, ignoring: %m");
+
+ break;
+
+ case SD_DHCP_OPTION_SERVER_IDENTIFIER:
+ r = lease_parse_be32(option, len, &lease->server_address);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse server identifier, ignoring: %m");
+
+ break;
+
+ case SD_DHCP_OPTION_SUBNET_MASK:
+ r = lease_parse_be32(option, len, &lease->subnet_mask);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse subnet mask, ignoring: %m");
+ else
+ lease->have_subnet_mask = true;
+ break;
+
+ case SD_DHCP_OPTION_BROADCAST:
+ r = lease_parse_be32(option, len, &lease->broadcast);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse broadcast address, ignoring: %m");
+ else
+ lease->have_broadcast = true;
+ break;
+
+ case SD_DHCP_OPTION_ROUTER:
+ r = lease_parse_in_addrs(option, len, &lease->router, &lease->router_size);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse router addresses, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_DOMAIN_NAME_SERVER:
+ r = lease_parse_in_addrs(option, len, &lease->servers[SD_DHCP_LEASE_DNS].addr, &lease->servers[SD_DHCP_LEASE_DNS].size);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse DNS server, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_NTP_SERVER:
+ r = lease_parse_in_addrs(option, len, &lease->servers[SD_DHCP_LEASE_NTP].addr, &lease->servers[SD_DHCP_LEASE_NTP].size);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse NTP server, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_SIP_SERVER:
+ r = lease_parse_sip_server(option, len, &lease->servers[SD_DHCP_LEASE_SIP].addr, &lease->servers[SD_DHCP_LEASE_SIP].size);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse SIP server, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_POP3_SERVER:
+ r = lease_parse_in_addrs(option, len, &lease->servers[SD_DHCP_LEASE_POP3].addr, &lease->servers[SD_DHCP_LEASE_POP3].size);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse POP3 server, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_SMTP_SERVER:
+ r = lease_parse_in_addrs(option, len, &lease->servers[SD_DHCP_LEASE_SMTP].addr, &lease->servers[SD_DHCP_LEASE_SMTP].size);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse SMTP server, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_LPR_SERVER:
+ r = lease_parse_in_addrs(option, len, &lease->servers[SD_DHCP_LEASE_LPR].addr, &lease->servers[SD_DHCP_LEASE_LPR].size);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse LPR server, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_STATIC_ROUTE:
+ r = lease_parse_routes(option, len, &lease->static_route, &lease->static_route_size, &lease->static_route_allocated);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse static routes, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_INTERFACE_MTU:
+ r = lease_parse_u16(option, len, &lease->mtu, 68);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse MTU, ignoring: %m");
+ if (lease->mtu < DHCP_DEFAULT_MIN_SIZE) {
+ log_debug("MTU value of %" PRIu16 " too small. Using default MTU value of %d instead.", lease->mtu, DHCP_DEFAULT_MIN_SIZE);
+ lease->mtu = DHCP_DEFAULT_MIN_SIZE;
+ }
+
+ break;
+
+ case SD_DHCP_OPTION_DOMAIN_NAME:
+ r = lease_parse_domain(option, len, &lease->domainname);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse domain name, ignoring: %m");
+ return 0;
+ }
+
+ break;
+
+ case SD_DHCP_OPTION_DOMAIN_SEARCH_LIST:
+ r = dhcp_lease_parse_search_domains(option, len, &lease->search_domains);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse Domain Search List, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_HOST_NAME:
+ r = lease_parse_domain(option, len, &lease->hostname);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse hostname, ignoring: %m");
+ return 0;
+ }
+
+ break;
+
+ case SD_DHCP_OPTION_ROOT_PATH:
+ r = lease_parse_string(option, len, &lease->root_path);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse root path, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_RENEWAL_T1_TIME:
+ r = lease_parse_u32(option, len, &lease->t1, 1);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse T1 time, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_REBINDING_T2_TIME:
+ r = lease_parse_u32(option, len, &lease->t2, 1);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse T2 time, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_CLASSLESS_STATIC_ROUTE:
+ r = lease_parse_classless_routes(
+ option, len,
+ &lease->static_route,
+ &lease->static_route_size,
+ &lease->static_route_allocated);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse classless routes, ignoring: %m");
+ break;
+
+ case SD_DHCP_OPTION_NEW_TZDB_TIMEZONE: {
+ _cleanup_free_ char *tz = NULL;
+
+ r = lease_parse_string(option, len, &tz);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse timezone option, ignoring: %m");
+ return 0;
+ }
+
+ if (!timezone_is_valid(tz, LOG_DEBUG)) {
+ log_debug_errno(r, "Timezone is not valid, ignoring: %m");
+ return 0;
+ }
+
+ free_and_replace(lease->timezone, tz);
+
+ break;
+ }
+
+ case SD_DHCP_OPTION_VENDOR_SPECIFIC:
+
+ if (len <= 0)
+ lease->vendor_specific = mfree(lease->vendor_specific);
+ else {
+ void *p;
+
+ p = memdup(option, len);
+ if (!p)
+ return -ENOMEM;
+
+ free(lease->vendor_specific);
+ lease->vendor_specific = p;
+ }
+
+ lease->vendor_specific_len = len;
+ break;
+
+ case SD_DHCP_OPTION_PRIVATE_BASE ... SD_DHCP_OPTION_PRIVATE_LAST:
+ r = dhcp_lease_insert_private_option(lease, code, option, len);
+ if (r < 0)
+ return r;
+
+ break;
+
+ default:
+ log_debug("Ignoring option DHCP option %"PRIu8" while parsing.", code);
+ break;
+ }
+
+ return 0;
+}
+
+/* Parses compressed domain names. */
+int dhcp_lease_parse_search_domains(const uint8_t *option, size_t len, char ***domains) {
+ _cleanup_strv_free_ char **names = NULL;
+ size_t pos = 0, cnt = 0;
+ int r;
+
+ assert(domains);
+ assert_return(option && len > 0, -ENODATA);
+
+ while (pos < len) {
+ _cleanup_free_ char *name = NULL;
+ size_t n = 0, allocated = 0;
+ size_t jump_barrier = pos, next_chunk = 0;
+ bool first = true;
+
+ for (;;) {
+ uint8_t c;
+ c = option[pos++];
+
+ if (c == 0) {
+ /* End of name */
+ break;
+ } else if (c <= 63) {
+ const char *label;
+
+ /* Literal label */
+ label = (const char*) (option + pos);
+ pos += c;
+ if (pos >= len)
+ return -EBADMSG;
+
+ if (!GREEDY_REALLOC(name, allocated, n + !first + DNS_LABEL_ESCAPED_MAX))
+ return -ENOMEM;
+
+ if (first)
+ first = false;
+ else
+ name[n++] = '.';
+
+ r = dns_label_escape(label, c, name + n, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ n += r;
+ } else if (FLAGS_SET(c, 0xc0)) {
+ /* Pointer */
+
+ uint8_t d;
+ uint16_t ptr;
+
+ if (pos >= len)
+ return -EBADMSG;
+
+ d = option[pos++];
+ ptr = (uint16_t) (c & ~0xc0) << 8 | (uint16_t) d;
+
+ /* Jumps are limited to a "prior occurrence" (RFC-1035 4.1.4) */
+ if (ptr >= jump_barrier)
+ return -EBADMSG;
+ jump_barrier = ptr;
+
+ /* Save current location so we don't end up re-parsing what's parsed so far. */
+ if (next_chunk == 0)
+ next_chunk = pos;
+
+ pos = ptr;
+ } else
+ return -EBADMSG;
+ }
+
+ if (!GREEDY_REALLOC(name, allocated, n + 1))
+ return -ENOMEM;
+ name[n] = 0;
+
+ r = strv_extend(&names, name);
+ if (r < 0)
+ return r;
+
+ cnt++;
+
+ if (next_chunk != 0)
+ pos = next_chunk;
+ }
+
+ *domains = TAKE_PTR(names);
+
+ return cnt;
+}
+
+int dhcp_lease_insert_private_option(sd_dhcp_lease *lease, uint8_t tag, const void *data, uint8_t len) {
+ struct sd_dhcp_raw_option *cur, *option;
+
+ assert(lease);
+
+ LIST_FOREACH(options, cur, lease->private_options) {
+ if (tag < cur->tag)
+ break;
+ if (tag == cur->tag) {
+ log_debug("Ignoring duplicate option, tagged %i.", tag);
+ return 0;
+ }
+ }
+
+ option = new(struct sd_dhcp_raw_option, 1);
+ if (!option)
+ return -ENOMEM;
+
+ option->tag = tag;
+ option->length = len;
+ option->data = memdup(data, len);
+ if (!option->data) {
+ free(option);
+ return -ENOMEM;
+ }
+
+ LIST_INSERT_BEFORE(options, lease->private_options, cur, option);
+ return 0;
+}
+
+int dhcp_lease_new(sd_dhcp_lease **ret) {
+ sd_dhcp_lease *lease;
+
+ lease = new0(sd_dhcp_lease, 1);
+ if (!lease)
+ return -ENOMEM;
+
+ lease->n_ref = 1;
+
+ *ret = lease;
+ return 0;
+}
+
+int dhcp_lease_save(sd_dhcp_lease *lease, const char *lease_file) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ struct sd_dhcp_raw_option *option;
+ struct in_addr address;
+ const struct in_addr *addresses;
+ const void *client_id, *data;
+ size_t client_id_len, data_len;
+ char sbuf[INET_ADDRSTRLEN];
+ const char *string;
+ uint16_t mtu;
+ _cleanup_free_ sd_dhcp_route **routes = NULL;
+ char **search_domains = NULL;
+ uint32_t t1, t2, lifetime;
+ int r;
+
+ assert(lease);
+ assert(lease_file);
+
+ r = fopen_temporary(lease_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ (void) fchmod(fileno(f), 0644);
+
+ fprintf(f,
+ "# This is private data. Do not parse.\n");
+
+ r = sd_dhcp_lease_get_address(lease, &address);
+ if (r >= 0)
+ fprintf(f, "ADDRESS=%s\n", inet_ntop(AF_INET, &address, sbuf, sizeof(sbuf)));
+
+ r = sd_dhcp_lease_get_netmask(lease, &address);
+ if (r >= 0)
+ fprintf(f, "NETMASK=%s\n", inet_ntop(AF_INET, &address, sbuf, sizeof(sbuf)));
+
+ r = sd_dhcp_lease_get_router(lease, &addresses);
+ if (r > 0) {
+ fputs("ROUTER=", f);
+ serialize_in_addrs(f, addresses, r, false, NULL);
+ fputc('\n', f);
+ }
+
+ r = sd_dhcp_lease_get_server_identifier(lease, &address);
+ if (r >= 0)
+ fprintf(f, "SERVER_ADDRESS=%s\n", inet_ntop(AF_INET, &address, sbuf, sizeof(sbuf)));
+
+ r = sd_dhcp_lease_get_next_server(lease, &address);
+ if (r >= 0)
+ fprintf(f, "NEXT_SERVER=%s\n", inet_ntop(AF_INET, &address, sbuf, sizeof(sbuf)));
+
+ r = sd_dhcp_lease_get_broadcast(lease, &address);
+ if (r >= 0)
+ fprintf(f, "BROADCAST=%s\n", inet_ntop(AF_INET, &address, sbuf, sizeof(sbuf)));
+
+ r = sd_dhcp_lease_get_mtu(lease, &mtu);
+ if (r >= 0)
+ fprintf(f, "MTU=%" PRIu16 "\n", mtu);
+
+ r = sd_dhcp_lease_get_t1(lease, &t1);
+ if (r >= 0)
+ fprintf(f, "T1=%" PRIu32 "\n", t1);
+
+ r = sd_dhcp_lease_get_t2(lease, &t2);
+ if (r >= 0)
+ fprintf(f, "T2=%" PRIu32 "\n", t2);
+
+ r = sd_dhcp_lease_get_lifetime(lease, &lifetime);
+ if (r >= 0)
+ fprintf(f, "LIFETIME=%" PRIu32 "\n", lifetime);
+
+ r = sd_dhcp_lease_get_dns(lease, &addresses);
+ if (r > 0) {
+ fputs("DNS=", f);
+ serialize_in_addrs(f, addresses, r, false, NULL);
+ fputc('\n', f);
+ }
+
+ r = sd_dhcp_lease_get_ntp(lease, &addresses);
+ if (r > 0) {
+ fputs("NTP=", f);
+ serialize_in_addrs(f, addresses, r, false, NULL);
+ fputc('\n', f);
+ }
+
+ r = sd_dhcp_lease_get_sip(lease, &addresses);
+ if (r > 0) {
+ fputs("SIP=", f);
+ serialize_in_addrs(f, addresses, r, false, NULL);
+ fputc('\n', f);
+ }
+
+ r = sd_dhcp_lease_get_domainname(lease, &string);
+ if (r >= 0)
+ fprintf(f, "DOMAINNAME=%s\n", string);
+
+ r = sd_dhcp_lease_get_search_domains(lease, &search_domains);
+ if (r > 0) {
+ fputs("DOMAIN_SEARCH_LIST=", f);
+ fputstrv(f, search_domains, NULL, NULL);
+ fputc('\n', f);
+ }
+
+ r = sd_dhcp_lease_get_hostname(lease, &string);
+ if (r >= 0)
+ fprintf(f, "HOSTNAME=%s\n", string);
+
+ r = sd_dhcp_lease_get_root_path(lease, &string);
+ if (r >= 0)
+ fprintf(f, "ROOT_PATH=%s\n", string);
+
+ r = sd_dhcp_lease_get_routes(lease, &routes);
+ if (r > 0)
+ serialize_dhcp_routes(f, "ROUTES", routes, r);
+
+ r = sd_dhcp_lease_get_timezone(lease, &string);
+ if (r >= 0)
+ fprintf(f, "TIMEZONE=%s\n", string);
+
+ r = sd_dhcp_lease_get_client_id(lease, &client_id, &client_id_len);
+ if (r >= 0) {
+ _cleanup_free_ char *client_id_hex = NULL;
+
+ client_id_hex = hexmem(client_id, client_id_len);
+ if (!client_id_hex) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ fprintf(f, "CLIENTID=%s\n", client_id_hex);
+ }
+
+ r = sd_dhcp_lease_get_vendor_specific(lease, &data, &data_len);
+ if (r >= 0) {
+ _cleanup_free_ char *option_hex = NULL;
+
+ option_hex = hexmem(data, data_len);
+ if (!option_hex) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ fprintf(f, "VENDOR_SPECIFIC=%s\n", option_hex);
+ }
+
+ LIST_FOREACH(options, option, lease->private_options) {
+ char key[STRLEN("OPTION_000")+1];
+
+ xsprintf(key, "OPTION_%" PRIu8, option->tag);
+ r = serialize_dhcp_option(f, key, option->data, option->length);
+ if (r < 0)
+ goto fail;
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, lease_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return log_error_errno(r, "Failed to save lease data %s: %m", lease_file);
+}
+
+int dhcp_lease_load(sd_dhcp_lease **ret, const char *lease_file) {
+
+ _cleanup_(sd_dhcp_lease_unrefp) sd_dhcp_lease *lease = NULL;
+ _cleanup_free_ char
+ *address = NULL,
+ *router = NULL,
+ *netmask = NULL,
+ *server_address = NULL,
+ *next_server = NULL,
+ *broadcast = NULL,
+ *dns = NULL,
+ *ntp = NULL,
+ *sip = NULL,
+ *pop3 = NULL,
+ *smtp = NULL,
+ *lpr = NULL,
+ *mtu = NULL,
+ *routes = NULL,
+ *domains = NULL,
+ *client_id_hex = NULL,
+ *vendor_specific_hex = NULL,
+ *lifetime = NULL,
+ *t1 = NULL,
+ *t2 = NULL,
+ *options[SD_DHCP_OPTION_PRIVATE_LAST - SD_DHCP_OPTION_PRIVATE_BASE + 1] = {};
+
+ int r, i;
+
+ assert(lease_file);
+ assert(ret);
+
+ r = dhcp_lease_new(&lease);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, lease_file,
+ "ADDRESS", &address,
+ "ROUTER", &router,
+ "NETMASK", &netmask,
+ "SERVER_ADDRESS", &server_address,
+ "NEXT_SERVER", &next_server,
+ "BROADCAST", &broadcast,
+ "DNS", &dns,
+ "NTP", &ntp,
+ "SIP", &sip,
+ "POP3", &pop3,
+ "SMTP", &smtp,
+ "LPR", &lpr,
+ "MTU", &mtu,
+ "DOMAINNAME", &lease->domainname,
+ "HOSTNAME", &lease->hostname,
+ "DOMAIN_SEARCH_LIST", &domains,
+ "ROOT_PATH", &lease->root_path,
+ "ROUTES", &routes,
+ "CLIENTID", &client_id_hex,
+ "TIMEZONE", &lease->timezone,
+ "VENDOR_SPECIFIC", &vendor_specific_hex,
+ "LIFETIME", &lifetime,
+ "T1", &t1,
+ "T2", &t2,
+ "OPTION_224", &options[0],
+ "OPTION_225", &options[1],
+ "OPTION_226", &options[2],
+ "OPTION_227", &options[3],
+ "OPTION_228", &options[4],
+ "OPTION_229", &options[5],
+ "OPTION_230", &options[6],
+ "OPTION_231", &options[7],
+ "OPTION_232", &options[8],
+ "OPTION_233", &options[9],
+ "OPTION_234", &options[10],
+ "OPTION_235", &options[11],
+ "OPTION_236", &options[12],
+ "OPTION_237", &options[13],
+ "OPTION_238", &options[14],
+ "OPTION_239", &options[15],
+ "OPTION_240", &options[16],
+ "OPTION_241", &options[17],
+ "OPTION_242", &options[18],
+ "OPTION_243", &options[19],
+ "OPTION_244", &options[20],
+ "OPTION_245", &options[21],
+ "OPTION_246", &options[22],
+ "OPTION_247", &options[23],
+ "OPTION_248", &options[24],
+ "OPTION_249", &options[25],
+ "OPTION_250", &options[26],
+ "OPTION_251", &options[27],
+ "OPTION_252", &options[28],
+ "OPTION_253", &options[29],
+ "OPTION_254", &options[30]);
+ if (r < 0)
+ return r;
+
+ if (address) {
+ r = inet_pton(AF_INET, address, &lease->address);
+ if (r <= 0)
+ log_debug("Failed to parse address %s, ignoring.", address);
+ }
+
+ if (router) {
+ r = deserialize_in_addrs(&lease->router, router);
+ if (r < 0)
+ log_debug_errno(r, "Failed to deserialize router addresses %s, ignoring: %m", router);
+ else
+ lease->router_size = r;
+ }
+
+ if (netmask) {
+ r = inet_pton(AF_INET, netmask, &lease->subnet_mask);
+ if (r <= 0)
+ log_debug("Failed to parse netmask %s, ignoring.", netmask);
+ else
+ lease->have_subnet_mask = true;
+ }
+
+ if (server_address) {
+ r = inet_pton(AF_INET, server_address, &lease->server_address);
+ if (r <= 0)
+ log_debug("Failed to parse server address %s, ignoring.", server_address);
+ }
+
+ if (next_server) {
+ r = inet_pton(AF_INET, next_server, &lease->next_server);
+ if (r <= 0)
+ log_debug("Failed to parse next server %s, ignoring.", next_server);
+ }
+
+ if (broadcast) {
+ r = inet_pton(AF_INET, broadcast, &lease->broadcast);
+ if (r <= 0)
+ log_debug("Failed to parse broadcast address %s, ignoring.", broadcast);
+ else
+ lease->have_broadcast = true;
+ }
+
+ if (dns) {
+ r = deserialize_in_addrs(&lease->servers[SD_DHCP_LEASE_DNS].addr, dns);
+ if (r < 0)
+ log_debug_errno(r, "Failed to deserialize DNS servers %s, ignoring: %m", dns);
+ else
+ lease->servers[SD_DHCP_LEASE_DNS].size = r;
+ }
+
+ if (ntp) {
+ r = deserialize_in_addrs(&lease->servers[SD_DHCP_LEASE_NTP].addr, ntp);
+ if (r < 0)
+ log_debug_errno(r, "Failed to deserialize NTP servers %s, ignoring: %m", ntp);
+ else
+ lease->servers[SD_DHCP_LEASE_NTP].size = r;
+ }
+
+ if (sip) {
+ r = deserialize_in_addrs(&lease->servers[SD_DHCP_LEASE_SIP].addr, sip);
+ if (r < 0)
+ log_debug_errno(r, "Failed to deserialize SIP servers %s, ignoring: %m", sip);
+ else
+ lease->servers[SD_DHCP_LEASE_SIP].size = r;
+ }
+
+ if (pop3) {
+ r = deserialize_in_addrs(&lease->servers[SD_DHCP_LEASE_POP3].addr, pop3);
+ if (r < 0)
+ log_debug_errno(r, "Failed to deserialize POP3 server %s, ignoring: %m", pop3);
+ else
+ lease->servers[SD_DHCP_LEASE_POP3].size = r;
+ }
+
+ if (smtp) {
+ r = deserialize_in_addrs(&lease->servers[SD_DHCP_LEASE_SMTP].addr, smtp);
+ if (r < 0)
+ log_debug_errno(r, "Failed to deserialize SMTP server %s, ignoring: %m", smtp);
+ else
+ lease->servers[SD_DHCP_LEASE_SMTP].size = r;
+ }
+
+ if (lpr) {
+ r = deserialize_in_addrs(&lease->servers[SD_DHCP_LEASE_LPR].addr, lpr);
+ if (r < 0)
+ log_debug_errno(r, "Failed to deserialize LPR server %s, ignoring: %m", lpr);
+ else
+ lease->servers[SD_DHCP_LEASE_LPR].size = r;
+ }
+
+ if (mtu) {
+ r = safe_atou16(mtu, &lease->mtu);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse MTU %s, ignoring: %m", mtu);
+ }
+
+ if (domains) {
+ _cleanup_strv_free_ char **a = NULL;
+ a = strv_split(domains, " ");
+ if (!a)
+ return -ENOMEM;
+
+ if (!strv_isempty(a))
+ lease->search_domains = TAKE_PTR(a);
+ }
+
+ if (routes) {
+ r = deserialize_dhcp_routes(
+ &lease->static_route,
+ &lease->static_route_size,
+ &lease->static_route_allocated,
+ routes);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse DHCP routes %s, ignoring: %m", routes);
+ }
+
+ if (lifetime) {
+ r = safe_atou32(lifetime, &lease->lifetime);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse lifetime %s, ignoring: %m", lifetime);
+ }
+
+ if (t1) {
+ r = safe_atou32(t1, &lease->t1);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse T1 %s, ignoring: %m", t1);
+ }
+
+ if (t2) {
+ r = safe_atou32(t2, &lease->t2);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse T2 %s, ignoring: %m", t2);
+ }
+
+ if (client_id_hex) {
+ r = unhexmem(client_id_hex, (size_t) -1, &lease->client_id, &lease->client_id_len);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse client ID %s, ignoring: %m", client_id_hex);
+ }
+
+ if (vendor_specific_hex) {
+ r = unhexmem(vendor_specific_hex, (size_t) -1, &lease->vendor_specific, &lease->vendor_specific_len);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse vendor specific data %s, ignoring: %m", vendor_specific_hex);
+ }
+
+ for (i = 0; i <= SD_DHCP_OPTION_PRIVATE_LAST - SD_DHCP_OPTION_PRIVATE_BASE; i++) {
+ _cleanup_free_ void *data = NULL;
+ size_t len;
+
+ if (!options[i])
+ continue;
+
+ r = unhexmem(options[i], (size_t) -1, &data, &len);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse private DHCP option %s, ignoring: %m", options[i]);
+ continue;
+ }
+
+ r = dhcp_lease_insert_private_option(lease, SD_DHCP_OPTION_PRIVATE_BASE + i, data, len);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(lease);
+
+ return 0;
+}
+
+int dhcp_lease_set_default_subnet_mask(sd_dhcp_lease *lease) {
+ struct in_addr address, mask;
+ int r;
+
+ assert(lease);
+
+ if (lease->address == 0)
+ return -ENODATA;
+
+ address.s_addr = lease->address;
+
+ /* fall back to the default subnet masks based on address class */
+ r = in4_addr_default_subnet_mask(&address, &mask);
+ if (r < 0)
+ return r;
+
+ lease->subnet_mask = mask.s_addr;
+ lease->have_subnet_mask = true;
+
+ return 0;
+}
+
+int sd_dhcp_lease_get_client_id(sd_dhcp_lease *lease, const void **client_id, size_t *client_id_len) {
+ assert_return(lease, -EINVAL);
+ assert_return(client_id, -EINVAL);
+ assert_return(client_id_len, -EINVAL);
+
+ if (!lease->client_id)
+ return -ENODATA;
+
+ *client_id = lease->client_id;
+ *client_id_len = lease->client_id_len;
+
+ return 0;
+}
+
+int dhcp_lease_set_client_id(sd_dhcp_lease *lease, const void *client_id, size_t client_id_len) {
+ assert_return(lease, -EINVAL);
+ assert_return(client_id || client_id_len <= 0, -EINVAL);
+
+ if (client_id_len <= 0)
+ lease->client_id = mfree(lease->client_id);
+ else {
+ void *p;
+
+ p = memdup(client_id, client_id_len);
+ if (!p)
+ return -ENOMEM;
+
+ free(lease->client_id);
+ lease->client_id = p;
+ lease->client_id_len = client_id_len;
+ }
+
+ return 0;
+}
+
+int sd_dhcp_lease_get_timezone(sd_dhcp_lease *lease, const char **tz) {
+ assert_return(lease, -EINVAL);
+ assert_return(tz, -EINVAL);
+
+ if (!lease->timezone)
+ return -ENODATA;
+
+ *tz = lease->timezone;
+ return 0;
+}
+
+int sd_dhcp_route_get_destination(sd_dhcp_route *route, struct in_addr *destination) {
+ assert_return(route, -EINVAL);
+ assert_return(destination, -EINVAL);
+
+ *destination = route->dst_addr;
+ return 0;
+}
+
+int sd_dhcp_route_get_destination_prefix_length(sd_dhcp_route *route, uint8_t *length) {
+ assert_return(route, -EINVAL);
+ assert_return(length, -EINVAL);
+
+ *length = route->dst_prefixlen;
+ return 0;
+}
+
+int sd_dhcp_route_get_gateway(sd_dhcp_route *route, struct in_addr *gateway) {
+ assert_return(route, -EINVAL);
+ assert_return(gateway, -EINVAL);
+
+ *gateway = route->gw_addr;
+ return 0;
+}
+
+int sd_dhcp_route_get_option(sd_dhcp_route *route) {
+ assert_return(route, -EINVAL);
+
+ return route->option;
+}
diff --git a/src/libsystemd-network/sd-dhcp-server.c b/src/libsystemd-network/sd-dhcp-server.c
new file mode 100644
index 0000000..dfced72
--- /dev/null
+++ b/src/libsystemd-network/sd-dhcp-server.c
@@ -0,0 +1,1222 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <net/if_arp.h>
+#include <sys/ioctl.h>
+
+#include "sd-dhcp-server.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "dhcp-internal.h"
+#include "dhcp-server-internal.h"
+#include "fd-util.h"
+#include "in-addr-util.h"
+#include "io-util.h"
+#include "siphash24.h"
+#include "string-util.h"
+#include "unaligned.h"
+
+#define DHCP_DEFAULT_LEASE_TIME_USEC USEC_PER_HOUR
+#define DHCP_MAX_LEASE_TIME_USEC (USEC_PER_HOUR*12)
+
+static DHCPLease *dhcp_lease_free(DHCPLease *lease) {
+ if (!lease)
+ return NULL;
+
+ free(lease->client_id.data);
+ return mfree(lease);
+}
+
+/* configures the server's address and subnet, and optionally the pool's size and offset into the subnet
+ * the whole pool must fit into the subnet, and may not contain the first (any) nor last (broadcast) address
+ * moreover, the server's own address may be in the pool, and is in that case reserved in order not to
+ * accidentally hand it out */
+int sd_dhcp_server_configure_pool(
+ sd_dhcp_server *server,
+ const struct in_addr *address,
+ unsigned char prefixlen,
+ uint32_t offset,
+ uint32_t size) {
+
+ struct in_addr netmask_addr;
+ be32_t netmask;
+ uint32_t server_off, broadcast_off, size_max;
+
+ assert_return(server, -EINVAL);
+ assert_return(address, -EINVAL);
+ assert_return(address->s_addr != INADDR_ANY, -EINVAL);
+ assert_return(prefixlen <= 32, -ERANGE);
+
+ assert_se(in4_addr_prefixlen_to_netmask(&netmask_addr, prefixlen));
+ netmask = netmask_addr.s_addr;
+
+ server_off = be32toh(address->s_addr & ~netmask);
+ broadcast_off = be32toh(~netmask);
+
+ /* the server address cannot be the subnet address */
+ assert_return(server_off != 0, -ERANGE);
+
+ /* nor the broadcast address */
+ assert_return(server_off != broadcast_off, -ERANGE);
+
+ /* 0 offset means we should set a default, we skip the first (subnet) address
+ and take the next one */
+ if (offset == 0)
+ offset = 1;
+
+ size_max = (broadcast_off + 1) /* the number of addresses in the subnet */
+ - offset /* exclude the addresses before the offset */
+ - 1; /* exclude the last (broadcast) address */
+
+ /* The pool must contain at least one address */
+ assert_return(size_max >= 1, -ERANGE);
+
+ if (size != 0)
+ assert_return(size <= size_max, -ERANGE);
+ else
+ size = size_max;
+
+ if (server->address != address->s_addr || server->netmask != netmask || server->pool_size != size || server->pool_offset != offset) {
+
+ free(server->bound_leases);
+ server->bound_leases = new0(DHCPLease*, size);
+ if (!server->bound_leases)
+ return -ENOMEM;
+
+ server->pool_offset = offset;
+ server->pool_size = size;
+
+ server->address = address->s_addr;
+ server->netmask = netmask;
+ server->subnet = address->s_addr & netmask;
+
+ if (server_off >= offset && server_off - offset < size)
+ server->bound_leases[server_off - offset] = &server->invalid_lease;
+
+ /* Drop any leases associated with the old address range */
+ hashmap_clear(server->leases_by_client_id);
+
+ if (server->callback)
+ server->callback(server, SD_DHCP_SERVER_EVENT_LEASE_CHANGED, server->callback_userdata);
+ }
+
+ return 0;
+}
+
+int sd_dhcp_server_is_running(sd_dhcp_server *server) {
+ assert_return(server, false);
+
+ return !!server->receive_message;
+}
+
+void client_id_hash_func(const DHCPClientId *id, struct siphash *state) {
+ assert(id);
+ assert(id->length);
+ assert(id->data);
+
+ siphash24_compress(&id->length, sizeof(id->length), state);
+ siphash24_compress(id->data, id->length, state);
+}
+
+int client_id_compare_func(const DHCPClientId *a, const DHCPClientId *b) {
+ int r;
+
+ assert(!a->length || a->data);
+ assert(!b->length || b->data);
+
+ r = CMP(a->length, b->length);
+ if (r != 0)
+ return r;
+
+ return memcmp(a->data, b->data, a->length);
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(dhcp_lease_hash_ops, DHCPClientId, client_id_hash_func, client_id_compare_func,
+ DHCPLease, dhcp_lease_free);
+
+static sd_dhcp_server *dhcp_server_free(sd_dhcp_server *server) {
+ assert(server);
+
+ log_dhcp_server(server, "UNREF");
+
+ sd_dhcp_server_stop(server);
+
+ sd_event_unref(server->event);
+
+ free(server->timezone);
+
+ for (sd_dhcp_lease_server_type i = 0; i < _SD_DHCP_LEASE_SERVER_TYPE_MAX; i++)
+ free(server->servers[i].addr);
+
+ hashmap_free(server->leases_by_client_id);
+
+ ordered_hashmap_free(server->extra_options);
+ ordered_hashmap_free(server->vendor_options);
+
+ free(server->bound_leases);
+ return mfree(server);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_dhcp_server, sd_dhcp_server, dhcp_server_free);
+
+int sd_dhcp_server_new(sd_dhcp_server **ret, int ifindex) {
+ _cleanup_(sd_dhcp_server_unrefp) sd_dhcp_server *server = NULL;
+
+ assert_return(ret, -EINVAL);
+ assert_return(ifindex > 0, -EINVAL);
+
+ server = new0(sd_dhcp_server, 1);
+ if (!server)
+ return -ENOMEM;
+
+ server->n_ref = 1;
+ server->fd_raw = -1;
+ server->fd = -1;
+ server->address = htobe32(INADDR_ANY);
+ server->netmask = htobe32(INADDR_ANY);
+ server->ifindex = ifindex;
+
+ server->leases_by_client_id = hashmap_new(&dhcp_lease_hash_ops);
+ if (!server->leases_by_client_id)
+ return -ENOMEM;
+
+ server->default_lease_time = DIV_ROUND_UP(DHCP_DEFAULT_LEASE_TIME_USEC, USEC_PER_SEC);
+ server->max_lease_time = DIV_ROUND_UP(DHCP_MAX_LEASE_TIME_USEC, USEC_PER_SEC);
+
+ *ret = TAKE_PTR(server);
+
+ return 0;
+}
+
+int sd_dhcp_server_attach_event(sd_dhcp_server *server, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(server, -EINVAL);
+ assert_return(!server->event, -EBUSY);
+
+ if (event)
+ server->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&server->event);
+ if (r < 0)
+ return r;
+ }
+
+ server->event_priority = priority;
+
+ return 0;
+}
+
+int sd_dhcp_server_detach_event(sd_dhcp_server *server) {
+ assert_return(server, -EINVAL);
+
+ server->event = sd_event_unref(server->event);
+
+ return 0;
+}
+
+sd_event *sd_dhcp_server_get_event(sd_dhcp_server *server) {
+ assert_return(server, NULL);
+
+ return server->event;
+}
+
+int sd_dhcp_server_stop(sd_dhcp_server *server) {
+ if (!server)
+ return 0;
+
+ server->receive_message =
+ sd_event_source_unref(server->receive_message);
+
+ server->fd_raw = safe_close(server->fd_raw);
+ server->fd = safe_close(server->fd);
+
+ log_dhcp_server(server, "STOPPED");
+
+ return 0;
+}
+
+static int dhcp_server_send_unicast_raw(sd_dhcp_server *server,
+ DHCPPacket *packet, size_t len) {
+ union sockaddr_union link = {
+ .ll.sll_family = AF_PACKET,
+ .ll.sll_protocol = htobe16(ETH_P_IP),
+ .ll.sll_ifindex = server->ifindex,
+ .ll.sll_halen = ETH_ALEN,
+ };
+
+ assert(server);
+ assert(server->ifindex > 0);
+ assert(server->address);
+ assert(packet);
+ assert(len > sizeof(DHCPPacket));
+
+ memcpy(&link.ll.sll_addr, &packet->dhcp.chaddr, ETH_ALEN);
+
+ dhcp_packet_append_ip_headers(packet, server->address, DHCP_PORT_SERVER,
+ packet->dhcp.yiaddr,
+ DHCP_PORT_CLIENT, len, -1);
+
+ return dhcp_network_send_raw_socket(server->fd_raw, &link, packet, len);
+}
+
+static int dhcp_server_send_udp(sd_dhcp_server *server, be32_t destination,
+ uint16_t destination_port,
+ DHCPMessage *message, size_t len) {
+ union sockaddr_union dest = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = htobe16(destination_port),
+ .in.sin_addr.s_addr = destination,
+ };
+ struct iovec iov = {
+ .iov_base = message,
+ .iov_len = len,
+ };
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct in_pktinfo))) control = {};
+ struct msghdr msg = {
+ .msg_name = &dest,
+ .msg_namelen = sizeof(dest.in),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ struct cmsghdr *cmsg;
+ struct in_pktinfo *pktinfo;
+
+ assert(server);
+ assert(server->fd >= 0);
+ assert(message);
+ assert(len > sizeof(DHCPMessage));
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ assert(cmsg);
+
+ cmsg->cmsg_level = IPPROTO_IP;
+ cmsg->cmsg_type = IP_PKTINFO;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
+
+ /* we attach source interface and address info to the message
+ rather than binding the socket. This will be mostly useful
+ when we gain support for arbitrary number of server addresses
+ */
+ pktinfo = (struct in_pktinfo*) CMSG_DATA(cmsg);
+ assert(pktinfo);
+
+ pktinfo->ipi_ifindex = server->ifindex;
+ pktinfo->ipi_spec_dst.s_addr = server->address;
+
+ if (sendmsg(server->fd, &msg, 0) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static bool requested_broadcast(DHCPRequest *req) {
+ assert(req);
+
+ return req->message->flags & htobe16(0x8000);
+}
+
+int dhcp_server_send_packet(sd_dhcp_server *server,
+ DHCPRequest *req, DHCPPacket *packet,
+ int type, size_t optoffset) {
+ be32_t destination = INADDR_ANY;
+ uint16_t destination_port = DHCP_PORT_CLIENT;
+ int r;
+
+ assert(server);
+ assert(req);
+ assert(req->max_optlen);
+ assert(optoffset <= req->max_optlen);
+ assert(packet);
+
+ r = dhcp_option_append(&packet->dhcp, req->max_optlen, &optoffset, 0,
+ SD_DHCP_OPTION_SERVER_IDENTIFIER,
+ 4, &server->address);
+ if (r < 0)
+ return r;
+
+ r = dhcp_option_append(&packet->dhcp, req->max_optlen, &optoffset, 0,
+ SD_DHCP_OPTION_END, 0, NULL);
+ if (r < 0)
+ return r;
+
+ /* RFC 2131 Section 4.1
+
+ If the ’giaddr’ field in a DHCP message from a client is non-zero,
+ the server sends any return messages to the ’DHCP server’ port on the
+ BOOTP relay agent whose address appears in ’giaddr’. If the ’giaddr’
+ field is zero and the ’ciaddr’ field is nonzero, then the server
+ unicasts DHCPOFFER and DHCPACK messages to the address in ’ciaddr’.
+ If ’giaddr’ is zero and ’ciaddr’ is zero, and the broadcast bit is
+ set, then the server broadcasts DHCPOFFER and DHCPACK messages to
+ 0xffffffff. If the broadcast bit is not set and ’giaddr’ is zero and
+ ’ciaddr’ is zero, then the server unicasts DHCPOFFER and DHCPACK
+ messages to the client’s hardware address and ’yiaddr’ address. In
+ all cases, when ’giaddr’ is zero, the server broadcasts any DHCPNAK
+ messages to 0xffffffff.
+
+ Section 4.3.2
+
+ If ’giaddr’ is set in the DHCPREQUEST message, the client is on a
+ different subnet. The server MUST set the broadcast bit in the
+ DHCPNAK, so that the relay agent will broadcast the DHCPNAK to the
+ client, because the client may not have a correct network address
+ or subnet mask, and the client may not be answering ARP requests.
+ */
+ if (req->message->giaddr) {
+ destination = req->message->giaddr;
+ destination_port = DHCP_PORT_SERVER;
+ if (type == DHCP_NAK)
+ packet->dhcp.flags = htobe16(0x8000);
+ } else if (req->message->ciaddr && type != DHCP_NAK)
+ destination = req->message->ciaddr;
+
+ if (destination != INADDR_ANY)
+ return dhcp_server_send_udp(server, destination,
+ destination_port, &packet->dhcp,
+ sizeof(DHCPMessage) + optoffset);
+ else if (requested_broadcast(req) || type == DHCP_NAK)
+ return dhcp_server_send_udp(server, INADDR_BROADCAST,
+ destination_port, &packet->dhcp,
+ sizeof(DHCPMessage) + optoffset);
+ else
+ /* we cannot send UDP packet to specific MAC address when the
+ address is not yet configured, so must fall back to raw
+ packets */
+ return dhcp_server_send_unicast_raw(server, packet,
+ sizeof(DHCPPacket) + optoffset);
+}
+
+static int server_message_init(sd_dhcp_server *server, DHCPPacket **ret,
+ uint8_t type, size_t *_optoffset,
+ DHCPRequest *req) {
+ _cleanup_free_ DHCPPacket *packet = NULL;
+ size_t optoffset = 0;
+ int r;
+
+ assert(server);
+ assert(ret);
+ assert(_optoffset);
+ assert(IN_SET(type, DHCP_OFFER, DHCP_ACK, DHCP_NAK));
+
+ packet = malloc0(sizeof(DHCPPacket) + req->max_optlen);
+ if (!packet)
+ return -ENOMEM;
+
+ r = dhcp_message_init(&packet->dhcp, BOOTREPLY,
+ be32toh(req->message->xid), type, ARPHRD_ETHER,
+ req->max_optlen, &optoffset);
+ if (r < 0)
+ return r;
+
+ packet->dhcp.flags = req->message->flags;
+ packet->dhcp.giaddr = req->message->giaddr;
+ memcpy(&packet->dhcp.chaddr, &req->message->chaddr, ETH_ALEN);
+
+ *_optoffset = optoffset;
+ *ret = TAKE_PTR(packet);
+
+ return 0;
+}
+
+static int server_send_offer(sd_dhcp_server *server, DHCPRequest *req,
+ be32_t address) {
+ _cleanup_free_ DHCPPacket *packet = NULL;
+ size_t offset;
+ be32_t lease_time;
+ int r;
+
+ r = server_message_init(server, &packet, DHCP_OFFER, &offset, req);
+ if (r < 0)
+ return r;
+
+ packet->dhcp.yiaddr = address;
+
+ lease_time = htobe32(req->lifetime);
+ r = dhcp_option_append(&packet->dhcp, req->max_optlen, &offset, 0,
+ SD_DHCP_OPTION_IP_ADDRESS_LEASE_TIME, 4,
+ &lease_time);
+ if (r < 0)
+ return r;
+
+ r = dhcp_option_append(&packet->dhcp, req->max_optlen, &offset, 0,
+ SD_DHCP_OPTION_SUBNET_MASK, 4, &server->netmask);
+ if (r < 0)
+ return r;
+
+ if (server->emit_router) {
+ r = dhcp_option_append(&packet->dhcp, req->max_optlen, &offset, 0,
+ SD_DHCP_OPTION_ROUTER, 4, &server->address);
+ if (r < 0)
+ return r;
+ }
+
+ r = dhcp_server_send_packet(server, req, packet, DHCP_OFFER, offset);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int server_send_ack(
+ sd_dhcp_server *server,
+ DHCPRequest *req,
+ be32_t address) {
+
+ static const uint8_t option_map[_SD_DHCP_LEASE_SERVER_TYPE_MAX] = {
+ [SD_DHCP_LEASE_DNS] = SD_DHCP_OPTION_DOMAIN_NAME_SERVER,
+ [SD_DHCP_LEASE_NTP] = SD_DHCP_OPTION_NTP_SERVER,
+ [SD_DHCP_LEASE_SIP] = SD_DHCP_OPTION_SIP_SERVER,
+ [SD_DHCP_LEASE_POP3] = SD_DHCP_OPTION_POP3_SERVER,
+ [SD_DHCP_LEASE_SMTP] = SD_DHCP_OPTION_SMTP_SERVER,
+ [SD_DHCP_LEASE_LPR] = SD_DHCP_OPTION_LPR_SERVER,
+ };
+
+ _cleanup_free_ DHCPPacket *packet = NULL;
+ be32_t lease_time;
+ sd_dhcp_option *j;
+ size_t offset;
+ int r;
+
+ r = server_message_init(server, &packet, DHCP_ACK, &offset, req);
+ if (r < 0)
+ return r;
+
+ packet->dhcp.yiaddr = address;
+
+ lease_time = htobe32(req->lifetime);
+ r = dhcp_option_append(&packet->dhcp, req->max_optlen, &offset, 0,
+ SD_DHCP_OPTION_IP_ADDRESS_LEASE_TIME, 4,
+ &lease_time);
+ if (r < 0)
+ return r;
+
+ r = dhcp_option_append(&packet->dhcp, req->max_optlen, &offset, 0,
+ SD_DHCP_OPTION_SUBNET_MASK, 4, &server->netmask);
+ if (r < 0)
+ return r;
+
+ if (server->emit_router) {
+ r = dhcp_option_append(&packet->dhcp, req->max_optlen, &offset, 0,
+ SD_DHCP_OPTION_ROUTER, 4, &server->address);
+ if (r < 0)
+ return r;
+ }
+
+ for (sd_dhcp_lease_server_type k = 0; k < _SD_DHCP_LEASE_SERVER_TYPE_MAX; k++) {
+
+ if (server->servers[k].size <= 0)
+ continue;
+
+ r = dhcp_option_append(
+ &packet->dhcp, req->max_optlen, &offset, 0,
+ option_map[k],
+ sizeof(struct in_addr) * server->servers[k].size, server->servers[k].addr);
+ if (r < 0)
+ return r;
+ }
+
+
+ if (server->timezone) {
+ r = dhcp_option_append(
+ &packet->dhcp, req->max_optlen, &offset, 0,
+ SD_DHCP_OPTION_NEW_TZDB_TIMEZONE,
+ strlen(server->timezone), server->timezone);
+ if (r < 0)
+ return r;
+ }
+
+ ORDERED_HASHMAP_FOREACH(j, server->extra_options) {
+ r = dhcp_option_append(&packet->dhcp, req->max_optlen, &offset, 0,
+ j->option, j->length, j->data);
+ if (r < 0)
+ return r;
+ }
+
+ if (!ordered_hashmap_isempty(server->vendor_options)) {
+ r = dhcp_option_append(
+ &packet->dhcp, req->max_optlen, &offset, 0,
+ SD_DHCP_OPTION_VENDOR_SPECIFIC,
+ ordered_hashmap_size(server->vendor_options), server->vendor_options);
+ if (r < 0)
+ return r;
+ }
+
+ r = dhcp_server_send_packet(server, req, packet, DHCP_ACK, offset);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int server_send_nak(sd_dhcp_server *server, DHCPRequest *req) {
+ _cleanup_free_ DHCPPacket *packet = NULL;
+ size_t offset;
+ int r;
+
+ r = server_message_init(server, &packet, DHCP_NAK, &offset, req);
+ if (r < 0)
+ return r;
+
+ return dhcp_server_send_packet(server, req, packet, DHCP_NAK, offset);
+}
+
+static int server_send_forcerenew(sd_dhcp_server *server, be32_t address,
+ be32_t gateway, const uint8_t chaddr[]) {
+ _cleanup_free_ DHCPPacket *packet = NULL;
+ size_t optoffset = 0;
+ int r;
+
+ assert(server);
+ assert(address != INADDR_ANY);
+ assert(chaddr);
+
+ packet = malloc0(sizeof(DHCPPacket) + DHCP_MIN_OPTIONS_SIZE);
+ if (!packet)
+ return -ENOMEM;
+
+ r = dhcp_message_init(&packet->dhcp, BOOTREPLY, 0,
+ DHCP_FORCERENEW, ARPHRD_ETHER,
+ DHCP_MIN_OPTIONS_SIZE, &optoffset);
+ if (r < 0)
+ return r;
+
+ r = dhcp_option_append(&packet->dhcp, DHCP_MIN_OPTIONS_SIZE,
+ &optoffset, 0, SD_DHCP_OPTION_END, 0, NULL);
+ if (r < 0)
+ return r;
+
+ memcpy(&packet->dhcp.chaddr, chaddr, ETH_ALEN);
+
+ r = dhcp_server_send_udp(server, address, DHCP_PORT_CLIENT,
+ &packet->dhcp,
+ sizeof(DHCPMessage) + optoffset);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int parse_request(uint8_t code, uint8_t len, const void *option, void *userdata) {
+ DHCPRequest *req = userdata;
+
+ assert(req);
+
+ switch(code) {
+ case SD_DHCP_OPTION_IP_ADDRESS_LEASE_TIME:
+ if (len == 4)
+ req->lifetime = unaligned_read_be32(option);
+
+ break;
+ case SD_DHCP_OPTION_REQUESTED_IP_ADDRESS:
+ if (len == 4)
+ memcpy(&req->requested_ip, option, sizeof(be32_t));
+
+ break;
+ case SD_DHCP_OPTION_SERVER_IDENTIFIER:
+ if (len == 4)
+ memcpy(&req->server_id, option, sizeof(be32_t));
+
+ break;
+ case SD_DHCP_OPTION_CLIENT_IDENTIFIER:
+ if (len >= 2) {
+ uint8_t *data;
+
+ data = memdup(option, len);
+ if (!data)
+ return -ENOMEM;
+
+ free(req->client_id.data);
+ req->client_id.data = data;
+ req->client_id.length = len;
+ }
+
+ break;
+ case SD_DHCP_OPTION_MAXIMUM_MESSAGE_SIZE:
+
+ if (len == 2 && unaligned_read_be16(option) >= sizeof(DHCPPacket))
+ req->max_optlen = unaligned_read_be16(option) - sizeof(DHCPPacket);
+
+ break;
+ }
+
+ return 0;
+}
+
+static void dhcp_request_free(DHCPRequest *req) {
+ if (!req)
+ return;
+
+ free(req->client_id.data);
+ free(req);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DHCPRequest*, dhcp_request_free);
+
+static int ensure_sane_request(sd_dhcp_server *server, DHCPRequest *req, DHCPMessage *message) {
+ assert(req);
+ assert(message);
+
+ req->message = message;
+
+ /* set client id based on MAC address if client did not send an explicit
+ one */
+ if (!req->client_id.data) {
+ void *data;
+
+ data = malloc0(ETH_ALEN + 1);
+ if (!data)
+ return -ENOMEM;
+
+ ((uint8_t*) data)[0] = 0x01;
+ memcpy((uint8_t*) data + 1, &message->chaddr, ETH_ALEN);
+
+ req->client_id.length = ETH_ALEN + 1;
+ req->client_id.data = data;
+ }
+
+ if (req->max_optlen < DHCP_MIN_OPTIONS_SIZE)
+ req->max_optlen = DHCP_MIN_OPTIONS_SIZE;
+
+ if (req->lifetime <= 0)
+ req->lifetime = MAX(1ULL, server->default_lease_time);
+
+ if (server->max_lease_time > 0 && req->lifetime > server->max_lease_time)
+ req->lifetime = server->max_lease_time;
+
+ return 0;
+}
+
+static int get_pool_offset(sd_dhcp_server *server, be32_t requested_ip) {
+ assert(server);
+
+ if (!server->pool_size)
+ return -EINVAL;
+
+ if (be32toh(requested_ip) < (be32toh(server->subnet) | server->pool_offset) ||
+ be32toh(requested_ip) >= (be32toh(server->subnet) | (server->pool_offset + server->pool_size)))
+ return -ERANGE;
+
+ return be32toh(requested_ip & ~server->netmask) - server->pool_offset;
+}
+
+#define HASH_KEY SD_ID128_MAKE(0d,1d,fe,bd,f1,24,bd,b3,47,f1,dd,6e,73,21,93,30)
+
+int dhcp_server_handle_message(sd_dhcp_server *server, DHCPMessage *message,
+ size_t length) {
+ _cleanup_(dhcp_request_freep) DHCPRequest *req = NULL;
+ _cleanup_free_ char *error_message = NULL;
+ DHCPLease *existing_lease;
+ int type, r;
+
+ assert(server);
+ assert(message);
+
+ if (message->op != BOOTREQUEST ||
+ message->htype != ARPHRD_ETHER ||
+ message->hlen != ETHER_ADDR_LEN)
+ return 0;
+
+ req = new0(DHCPRequest, 1);
+ if (!req)
+ return -ENOMEM;
+
+ type = dhcp_option_parse(message, length, parse_request, req, &error_message);
+ if (type < 0)
+ return 0;
+
+ r = ensure_sane_request(server, req, message);
+ if (r < 0)
+ /* this only fails on critical errors */
+ return r;
+
+ existing_lease = hashmap_get(server->leases_by_client_id,
+ &req->client_id);
+
+ switch(type) {
+
+ case DHCP_DISCOVER: {
+ be32_t address = INADDR_ANY;
+ unsigned i;
+
+ log_dhcp_server(server, "DISCOVER (0x%x)",
+ be32toh(req->message->xid));
+
+ if (!server->pool_size)
+ /* no pool allocated */
+ return 0;
+
+ /* for now pick a random free address from the pool */
+ if (existing_lease)
+ address = existing_lease->address;
+ else {
+ struct siphash state;
+ uint64_t hash;
+ uint32_t next_offer;
+
+ /* even with no persistence of leases, we try to offer the same client
+ the same IP address. we do this by using the hash of the client id
+ as the offset into the pool of leases when finding the next free one */
+
+ siphash24_init(&state, HASH_KEY.bytes);
+ client_id_hash_func(&req->client_id, &state);
+ hash = htole64(siphash24_finalize(&state));
+ next_offer = hash % server->pool_size;
+
+ for (i = 0; i < server->pool_size; i++) {
+ if (!server->bound_leases[next_offer]) {
+ address = server->subnet | htobe32(server->pool_offset + next_offer);
+ break;
+ }
+
+ next_offer = (next_offer + 1) % server->pool_size;
+ }
+ }
+
+ if (address == INADDR_ANY)
+ /* no free addresses left */
+ return 0;
+
+ r = server_send_offer(server, req, address);
+ if (r < 0)
+ /* this only fails on critical errors */
+ return log_dhcp_server_errno(server, r, "Could not send offer: %m");
+
+ log_dhcp_server(server, "OFFER (0x%x)", be32toh(req->message->xid));
+ return DHCP_OFFER;
+ }
+ case DHCP_DECLINE:
+ log_dhcp_server(server, "DECLINE (0x%x): %s", be32toh(req->message->xid), strna(error_message));
+
+ /* TODO: make sure we don't offer this address again */
+
+ return 1;
+
+ case DHCP_REQUEST: {
+ be32_t address;
+ bool init_reboot = false;
+ int pool_offset;
+
+ /* see RFC 2131, section 4.3.2 */
+
+ if (req->server_id) {
+ log_dhcp_server(server, "REQUEST (selecting) (0x%x)",
+ be32toh(req->message->xid));
+
+ /* SELECTING */
+ if (req->server_id != server->address)
+ /* client did not pick us */
+ return 0;
+
+ if (req->message->ciaddr)
+ /* this MUST be zero */
+ return 0;
+
+ if (!req->requested_ip)
+ /* this must be filled in with the yiaddr
+ from the chosen OFFER */
+ return 0;
+
+ address = req->requested_ip;
+ } else if (req->requested_ip) {
+ log_dhcp_server(server, "REQUEST (init-reboot) (0x%x)",
+ be32toh(req->message->xid));
+
+ /* INIT-REBOOT */
+ if (req->message->ciaddr)
+ /* this MUST be zero */
+ return 0;
+
+ /* TODO: check more carefully if IP is correct */
+ address = req->requested_ip;
+ init_reboot = true;
+ } else {
+ log_dhcp_server(server, "REQUEST (rebinding/renewing) (0x%x)",
+ be32toh(req->message->xid));
+
+ /* REBINDING / RENEWING */
+ if (!req->message->ciaddr)
+ /* this MUST be filled in with clients IP address */
+ return 0;
+
+ address = req->message->ciaddr;
+ }
+
+ pool_offset = get_pool_offset(server, address);
+
+ /* verify that the requested address is from the pool, and either
+ owned by the current client or free */
+ if (pool_offset >= 0 &&
+ server->bound_leases[pool_offset] == existing_lease) {
+ DHCPLease *lease;
+ usec_t time_now = 0;
+
+ if (!existing_lease) {
+ lease = new0(DHCPLease, 1);
+ if (!lease)
+ return -ENOMEM;
+ lease->address = address;
+ lease->client_id.data = memdup(req->client_id.data,
+ req->client_id.length);
+ if (!lease->client_id.data) {
+ free(lease);
+ return -ENOMEM;
+ }
+ lease->client_id.length = req->client_id.length;
+ memcpy(&lease->chaddr, &req->message->chaddr,
+ ETH_ALEN);
+ lease->gateway = req->message->giaddr;
+ } else
+ lease = existing_lease;
+
+ r = sd_event_now(server->event,
+ clock_boottime_or_monotonic(),
+ &time_now);
+ if (r < 0) {
+ if (!existing_lease)
+ dhcp_lease_free(lease);
+ return r;
+ }
+
+ lease->expiration = req->lifetime * USEC_PER_SEC + time_now;
+
+ r = server_send_ack(server, req, address);
+ if (r < 0) {
+ /* this only fails on critical errors */
+ log_dhcp_server_errno(server, r, "Could not send ack: %m");
+
+ if (!existing_lease)
+ dhcp_lease_free(lease);
+
+ return r;
+ } else {
+ log_dhcp_server(server, "ACK (0x%x)",
+ be32toh(req->message->xid));
+
+ server->bound_leases[pool_offset] = lease;
+ hashmap_put(server->leases_by_client_id,
+ &lease->client_id, lease);
+
+ if (server->callback)
+ server->callback(server, SD_DHCP_SERVER_EVENT_LEASE_CHANGED, server->callback_userdata);
+
+ return DHCP_ACK;
+ }
+
+ } else if (init_reboot) {
+ r = server_send_nak(server, req);
+ if (r < 0)
+ /* this only fails on critical errors */
+ return log_dhcp_server_errno(server, r, "Could not send nak: %m");
+
+ log_dhcp_server(server, "NAK (0x%x)", be32toh(req->message->xid));
+ return DHCP_NAK;
+ }
+
+ break;
+ }
+
+ case DHCP_RELEASE: {
+ int pool_offset;
+
+ log_dhcp_server(server, "RELEASE (0x%x)",
+ be32toh(req->message->xid));
+
+ if (!existing_lease)
+ return 0;
+
+ if (existing_lease->address != req->message->ciaddr)
+ return 0;
+
+ pool_offset = get_pool_offset(server, req->message->ciaddr);
+ if (pool_offset < 0)
+ return 0;
+
+ if (server->bound_leases[pool_offset] == existing_lease) {
+ server->bound_leases[pool_offset] = NULL;
+ hashmap_remove(server->leases_by_client_id, existing_lease);
+ dhcp_lease_free(existing_lease);
+
+ if (server->callback)
+ server->callback(server, SD_DHCP_SERVER_EVENT_LEASE_CHANGED, server->callback_userdata);
+ }
+
+ return 0;
+ }}
+
+ return 0;
+}
+
+static int server_receive_message(sd_event_source *s, int fd,
+ uint32_t revents, void *userdata) {
+ _cleanup_free_ DHCPMessage *message = NULL;
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct in_pktinfo))) control;
+ sd_dhcp_server *server = userdata;
+ struct iovec iov = {};
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ struct cmsghdr *cmsg;
+ ssize_t buflen, len;
+ int r;
+
+ assert(server);
+
+ buflen = next_datagram_size_fd(fd);
+ if (buflen < 0)
+ return buflen;
+
+ message = malloc(buflen);
+ if (!message)
+ return -ENOMEM;
+
+ iov = IOVEC_MAKE(message, buflen);
+
+ len = recvmsg_safe(fd, &msg, 0);
+ if (IN_SET(len, -EAGAIN, -EINTR))
+ return 0;
+ if (len < 0)
+ return len;
+ if ((size_t) len < sizeof(DHCPMessage))
+ return 0;
+
+ CMSG_FOREACH(cmsg, &msg) {
+ if (cmsg->cmsg_level == IPPROTO_IP &&
+ cmsg->cmsg_type == IP_PKTINFO &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct in_pktinfo))) {
+ struct in_pktinfo *info = (struct in_pktinfo*)CMSG_DATA(cmsg);
+
+ /* TODO figure out if this can be done as a filter on
+ * the socket, like for IPv6 */
+ if (server->ifindex != info->ipi_ifindex)
+ return 0;
+
+ break;
+ }
+ }
+
+ r = dhcp_server_handle_message(server, message, (size_t) len);
+ if (r < 0)
+ log_dhcp_server_errno(server, r, "Couldn't process incoming message: %m");
+
+ return 0;
+}
+
+int sd_dhcp_server_start(sd_dhcp_server *server) {
+ int r;
+
+ assert_return(server, -EINVAL);
+ assert_return(server->event, -EINVAL);
+ assert_return(!server->receive_message, -EBUSY);
+ assert_return(server->fd_raw < 0, -EBUSY);
+ assert_return(server->fd < 0, -EBUSY);
+ assert_return(server->address != htobe32(INADDR_ANY), -EUNATCH);
+
+ r = socket(AF_PACKET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
+ if (r < 0) {
+ r = -errno;
+ sd_dhcp_server_stop(server);
+ return r;
+ }
+ server->fd_raw = r;
+
+ r = dhcp_network_bind_udp_socket(server->ifindex, INADDR_ANY, DHCP_PORT_SERVER, -1);
+ if (r < 0) {
+ sd_dhcp_server_stop(server);
+ return r;
+ }
+ server->fd = r;
+
+ r = sd_event_add_io(server->event, &server->receive_message,
+ server->fd, EPOLLIN,
+ server_receive_message, server);
+ if (r < 0) {
+ sd_dhcp_server_stop(server);
+ return r;
+ }
+
+ r = sd_event_source_set_priority(server->receive_message,
+ server->event_priority);
+ if (r < 0) {
+ sd_dhcp_server_stop(server);
+ return r;
+ }
+
+ log_dhcp_server(server, "STARTED");
+
+ return 0;
+}
+
+int sd_dhcp_server_forcerenew(sd_dhcp_server *server) {
+ unsigned i;
+ int r = 0;
+
+ assert_return(server, -EINVAL);
+ assert(server->bound_leases);
+
+ for (i = 0; i < server->pool_size; i++) {
+ DHCPLease *lease = server->bound_leases[i];
+
+ if (!lease || lease == &server->invalid_lease)
+ continue;
+
+ r = server_send_forcerenew(server, lease->address,
+ lease->gateway,
+ lease->chaddr);
+ if (r < 0)
+ return r;
+
+ log_dhcp_server(server, "FORCERENEW");
+ }
+
+ return r;
+}
+
+int sd_dhcp_server_set_timezone(sd_dhcp_server *server, const char *tz) {
+ int r;
+
+ assert_return(server, -EINVAL);
+ assert_return(timezone_is_valid(tz, LOG_DEBUG), -EINVAL);
+
+ if (streq_ptr(tz, server->timezone))
+ return 0;
+
+ r = free_and_strdup(&server->timezone, tz);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int sd_dhcp_server_set_max_lease_time(sd_dhcp_server *server, uint32_t t) {
+ assert_return(server, -EINVAL);
+
+ if (t == server->max_lease_time)
+ return 0;
+
+ server->max_lease_time = t;
+ return 1;
+}
+
+int sd_dhcp_server_set_default_lease_time(sd_dhcp_server *server, uint32_t t) {
+ assert_return(server, -EINVAL);
+
+ if (t == server->default_lease_time)
+ return 0;
+
+ server->default_lease_time = t;
+ return 1;
+}
+
+int sd_dhcp_server_set_servers(
+ sd_dhcp_server *server,
+ sd_dhcp_lease_server_type what,
+ const struct in_addr addresses[],
+ size_t n_addresses) {
+
+ struct in_addr *c = NULL;
+
+ assert_return(server, -EINVAL);
+ assert_return(addresses || n_addresses == 0, -EINVAL);
+ assert_return(what >= 0, -EINVAL);
+ assert_return(what < _SD_DHCP_LEASE_SERVER_TYPE_MAX, -EINVAL);
+
+ if (server->servers[what].size == n_addresses &&
+ memcmp(server->servers[what].addr, addresses, sizeof(struct in_addr) * n_addresses) == 0)
+ return 0;
+
+ if (n_addresses > 0) {
+ c = newdup(struct in_addr, addresses, n_addresses);
+ if (!c)
+ return -ENOMEM;
+ }
+
+ free(server->servers[what].addr);
+ server->servers[what].addr = c;
+ server->servers[what].size = n_addresses;
+ return 1;
+}
+
+int sd_dhcp_server_set_dns(sd_dhcp_server *server, const struct in_addr dns[], size_t n) {
+ return sd_dhcp_server_set_servers(server, SD_DHCP_LEASE_DNS, dns, n);
+}
+int sd_dhcp_server_set_ntp(sd_dhcp_server *server, const struct in_addr ntp[], size_t n) {
+ return sd_dhcp_server_set_servers(server, SD_DHCP_LEASE_NTP, ntp, n);
+}
+int sd_dhcp_server_set_sip(sd_dhcp_server *server, const struct in_addr sip[], size_t n) {
+ return sd_dhcp_server_set_servers(server, SD_DHCP_LEASE_SIP, sip, n);
+}
+int sd_dhcp_server_set_pop3(sd_dhcp_server *server, const struct in_addr pop3[], size_t n) {
+ return sd_dhcp_server_set_servers(server, SD_DHCP_LEASE_POP3, pop3, n);
+}
+int sd_dhcp_server_set_smtp(sd_dhcp_server *server, const struct in_addr smtp[], size_t n) {
+ return sd_dhcp_server_set_servers(server, SD_DHCP_LEASE_SMTP, smtp, n);
+}
+int sd_dhcp_server_set_lpr(sd_dhcp_server *server, const struct in_addr lpr[], size_t n) {
+ return sd_dhcp_server_set_servers(server, SD_DHCP_LEASE_LPR, lpr, n);
+}
+
+int sd_dhcp_server_set_emit_router(sd_dhcp_server *server, int enabled) {
+ assert_return(server, -EINVAL);
+
+ if (enabled == server->emit_router)
+ return 0;
+
+ server->emit_router = enabled;
+
+ return 1;
+}
+
+int sd_dhcp_server_add_option(sd_dhcp_server *server, sd_dhcp_option *v) {
+ int r;
+
+ assert_return(server, -EINVAL);
+ assert_return(v, -EINVAL);
+
+ r = ordered_hashmap_ensure_allocated(&server->extra_options, &dhcp_option_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = ordered_hashmap_put(server->extra_options, UINT_TO_PTR(v->option), v);
+ if (r < 0)
+ return r;
+
+ sd_dhcp_option_ref(v);
+ return 0;
+}
+
+int sd_dhcp_server_add_vendor_option(sd_dhcp_server *server, sd_dhcp_option *v) {
+ int r;
+
+ assert_return(server, -EINVAL);
+ assert_return(v, -EINVAL);
+
+ r = ordered_hashmap_ensure_allocated(&server->vendor_options, &dhcp_option_hash_ops);
+ if (r < 0)
+ return -ENOMEM;
+
+ r = ordered_hashmap_put(server->vendor_options, v, v);
+ if (r < 0)
+ return r;
+
+ sd_dhcp_option_ref(v);
+
+ return 1;
+}
+
+int sd_dhcp_server_set_callback(sd_dhcp_server *server, sd_dhcp_server_callback_t cb, void *userdata) {
+ assert_return(server, -EINVAL);
+
+ server->callback = cb;
+ server->callback_userdata = userdata;
+
+ return 0;
+}
diff --git a/src/libsystemd-network/sd-dhcp6-client.c b/src/libsystemd-network/sd-dhcp6-client.c
new file mode 100644
index 0000000..d502051
--- /dev/null
+++ b/src/libsystemd-network/sd-dhcp6-client.c
@@ -0,0 +1,1848 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2014-2015 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <linux/if_arp.h>
+#include <linux/if_infiniband.h>
+
+#include "sd-dhcp6-client.h"
+
+#include "alloc-util.h"
+#include "dhcp-identifier.h"
+#include "dhcp6-internal.h"
+#include "dhcp6-lease-internal.h"
+#include "dhcp6-protocol.h"
+#include "dns-domain.h"
+#include "event-util.h"
+#include "fd-util.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
+#include "network-internal.h"
+#include "random-util.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "util.h"
+#include "web-util.h"
+
+#define MAX_MAC_ADDR_LEN INFINIBAND_ALEN
+
+#define IRT_DEFAULT (1 * USEC_PER_DAY)
+#define IRT_MINIMUM (600 * USEC_PER_SEC)
+
+/* what to request from the server, addresses (IA_NA) and/or prefixes (IA_PD) */
+enum {
+ DHCP6_REQUEST_IA_NA = 1,
+ DHCP6_REQUEST_IA_TA = 2, /* currently not used */
+ DHCP6_REQUEST_IA_PD = 4,
+};
+
+struct sd_dhcp6_client {
+ unsigned n_ref;
+
+ enum DHCP6State state;
+ sd_event *event;
+ int event_priority;
+ int ifindex;
+ DHCP6Address hint_pd_prefix;
+ struct in6_addr local_address;
+ uint8_t mac_addr[MAX_MAC_ADDR_LEN];
+ size_t mac_addr_len;
+ uint16_t arp_type;
+ DHCP6IA ia_na;
+ DHCP6IA ia_pd;
+ sd_event_source *timeout_t1;
+ sd_event_source *timeout_t2;
+ unsigned request;
+ be32_t transaction_id;
+ usec_t transaction_start;
+ struct sd_dhcp6_lease *lease;
+ int fd;
+ bool information_request;
+ bool iaid_set;
+ be16_t *req_opts;
+ size_t req_opts_allocated;
+ size_t req_opts_len;
+ char *fqdn;
+ char *mudurl;
+ char **user_class;
+ char **vendor_class;
+ sd_event_source *receive_message;
+ usec_t retransmit_time;
+ uint8_t retransmit_count;
+ sd_event_source *timeout_resend;
+ sd_event_source *timeout_resend_expire;
+ sd_dhcp6_client_callback_t callback;
+ void *userdata;
+ struct duid duid;
+ size_t duid_len;
+ usec_t information_request_time_usec;
+ usec_t information_refresh_time_usec;
+ OrderedHashmap *extra_options;
+ OrderedHashmap *vendor_options;
+};
+
+static const uint16_t default_req_opts[] = {
+ SD_DHCP6_OPTION_DNS_SERVERS,
+ SD_DHCP6_OPTION_DOMAIN_LIST,
+ SD_DHCP6_OPTION_NTP_SERVER,
+ SD_DHCP6_OPTION_SNTP_SERVERS,
+};
+
+const char * dhcp6_message_type_table[_DHCP6_MESSAGE_MAX] = {
+ [DHCP6_SOLICIT] = "SOLICIT",
+ [DHCP6_ADVERTISE] = "ADVERTISE",
+ [DHCP6_REQUEST] = "REQUEST",
+ [DHCP6_CONFIRM] = "CONFIRM",
+ [DHCP6_RENEW] = "RENEW",
+ [DHCP6_REBIND] = "REBIND",
+ [DHCP6_REPLY] = "REPLY",
+ [DHCP6_RELEASE] = "RELEASE",
+ [DHCP6_DECLINE] = "DECLINE",
+ [DHCP6_RECONFIGURE] = "RECONFIGURE",
+ [DHCP6_INFORMATION_REQUEST] = "INFORMATION-REQUEST",
+ [DHCP6_RELAY_FORW] = "RELAY-FORW",
+ [DHCP6_RELAY_REPL] = "RELAY-REPL",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(dhcp6_message_type, int);
+
+const char * dhcp6_message_status_table[_DHCP6_STATUS_MAX] = {
+ [DHCP6_STATUS_SUCCESS] = "Success",
+ [DHCP6_STATUS_UNSPEC_FAIL] = "Unspecified failure",
+ [DHCP6_STATUS_NO_ADDRS_AVAIL] = "No addresses available",
+ [DHCP6_STATUS_NO_BINDING] = "Binding unavailable",
+ [DHCP6_STATUS_NOT_ON_LINK] = "Not on link",
+ [DHCP6_STATUS_USE_MULTICAST] = "Use multicast",
+ [DHCP6_STATUS_NO_PREFIX_AVAIL] = "No prefix available",
+ [DHCP6_STATUS_UNKNOWN_QUERY_TYPE] = "Unknown query type",
+ [DHCP6_STATUS_MALFORMED_QUERY] = "Malformed query",
+ [DHCP6_STATUS_NOT_CONFIGURED] = "Not configured",
+ [DHCP6_STATUS_NOT_ALLOWED] = "Not allowed",
+ [DHCP6_STATUS_QUERY_TERMINATED] = "Query terminated",
+ [DHCP6_STATUS_DATA_MISSING] = "Data missing",
+ [DHCP6_STATUS_CATCHUP_COMPLETE] = "Catch up complete",
+ [DHCP6_STATUS_NOT_SUPPORTED] = "Not supported",
+ [DHCP6_STATUS_TLS_CONNECTION_REFUSED] = "TLS connection refused",
+ [DHCP6_STATUS_ADDRESS_IN_USE] = "Address in use",
+ [DHCP6_STATUS_CONFIGURATION_CONFLICT] = "Configuration conflict",
+ [DHCP6_STATUS_MISSING_BINDING_INFORMATION] = "Missing binding information",
+ [DHCP6_STATUS_OUTDATED_BINDING_INFORMATION] = "Outdated binding information",
+ [DHCP6_STATUS_SERVER_SHUTTING_DOWN] = "Server shutting down",
+ [DHCP6_STATUS_DNS_UPDATE_NOT_SUPPORTED] = "DNS update not supported",
+ [DHCP6_STATUS_EXCESSIVE_TIME_SKEW] = "Excessive time skew",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(dhcp6_message_status, int);
+
+#define DHCP6_CLIENT_DONT_DESTROY(client) \
+ _cleanup_(sd_dhcp6_client_unrefp) _unused_ sd_dhcp6_client *_dont_destroy_##client = sd_dhcp6_client_ref(client)
+
+static int client_start(sd_dhcp6_client *client, enum DHCP6State state);
+
+int sd_dhcp6_client_set_callback(
+ sd_dhcp6_client *client,
+ sd_dhcp6_client_callback_t cb,
+ void *userdata) {
+
+ assert_return(client, -EINVAL);
+
+ client->callback = cb;
+ client->userdata = userdata;
+
+ return 0;
+}
+
+int sd_dhcp6_client_set_ifindex(sd_dhcp6_client *client, int ifindex) {
+
+ assert_return(client, -EINVAL);
+ assert_return(ifindex > 0, -EINVAL);
+ assert_return(IN_SET(client->state, DHCP6_STATE_STOPPED), -EBUSY);
+
+ client->ifindex = ifindex;
+ return 0;
+}
+
+int sd_dhcp6_client_set_local_address(
+ sd_dhcp6_client *client,
+ const struct in6_addr *local_address) {
+
+ assert_return(client, -EINVAL);
+ assert_return(local_address, -EINVAL);
+ assert_return(in_addr_is_link_local(AF_INET6, (const union in_addr_union *) local_address) > 0, -EINVAL);
+
+ assert_return(IN_SET(client->state, DHCP6_STATE_STOPPED), -EBUSY);
+
+ client->local_address = *local_address;
+
+ return 0;
+}
+
+int sd_dhcp6_client_set_mac(
+ sd_dhcp6_client *client,
+ const uint8_t *addr, size_t addr_len,
+ uint16_t arp_type) {
+
+ assert_return(client, -EINVAL);
+ assert_return(addr, -EINVAL);
+ assert_return(addr_len <= MAX_MAC_ADDR_LEN, -EINVAL);
+
+ assert_return(IN_SET(client->state, DHCP6_STATE_STOPPED), -EBUSY);
+
+ if (arp_type == ARPHRD_ETHER)
+ assert_return(addr_len == ETH_ALEN, -EINVAL);
+ else if (arp_type == ARPHRD_INFINIBAND)
+ assert_return(addr_len == INFINIBAND_ALEN, -EINVAL);
+ else {
+ client->arp_type = ARPHRD_NONE;
+ client->mac_addr_len = 0;
+ return 0;
+ }
+
+ if (client->mac_addr_len == addr_len &&
+ memcmp(&client->mac_addr, addr, addr_len) == 0)
+ return 0;
+
+ memcpy(&client->mac_addr, addr, addr_len);
+ client->mac_addr_len = addr_len;
+ client->arp_type = arp_type;
+
+ return 0;
+}
+
+int sd_dhcp6_client_set_prefix_delegation_hint(
+ sd_dhcp6_client *client,
+ uint8_t prefixlen,
+ const struct in6_addr *pd_address) {
+
+ assert_return(client, -EINVAL);
+ assert_return(pd_address, -EINVAL);
+
+ assert_return(IN_SET(client->state, DHCP6_STATE_STOPPED), -EBUSY);
+
+ client->hint_pd_prefix.iapdprefix.address = *pd_address;
+ client->hint_pd_prefix.iapdprefix.prefixlen = prefixlen;
+
+ return 0;
+}
+
+int sd_dhcp6_client_add_vendor_option(sd_dhcp6_client *client, sd_dhcp6_option *v) {
+ int r;
+
+ assert_return(client, -EINVAL);
+ assert_return(v, -EINVAL);
+
+ r = ordered_hashmap_ensure_allocated(&client->vendor_options, &dhcp6_option_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = ordered_hashmap_put(client->vendor_options, v, v);
+ if (r < 0)
+ return r;
+
+ sd_dhcp6_option_ref(v);
+
+ return 1;
+}
+
+static int client_ensure_duid(sd_dhcp6_client *client) {
+ if (client->duid_len != 0)
+ return 0;
+
+ return dhcp_identifier_set_duid_en(&client->duid, &client->duid_len);
+}
+
+/**
+ * Sets DUID. If duid is non-null, the DUID is set to duid_type + duid
+ * without further modification. Otherwise, if duid_type is supported, DUID
+ * is set based on that type. Otherwise, an error is returned.
+ */
+static int dhcp6_client_set_duid_internal(
+ sd_dhcp6_client *client,
+ uint16_t duid_type,
+ const void *duid,
+ size_t duid_len,
+ usec_t llt_time) {
+ int r;
+
+ assert_return(client, -EINVAL);
+ assert_return(duid_len == 0 || duid != NULL, -EINVAL);
+ assert_return(IN_SET(client->state, DHCP6_STATE_STOPPED), -EBUSY);
+
+ if (duid) {
+ r = dhcp_validate_duid_len(duid_type, duid_len, true);
+ if (r < 0) {
+ r = dhcp_validate_duid_len(duid_type, duid_len, false);
+ if (r < 0)
+ return log_dhcp6_client_errno(client, r, "Failed to validate length of DUID: %m");
+
+ log_dhcp6_client(client, "Using DUID of type %u of incorrect length, proceeding.", duid_type);
+ }
+
+ client->duid.type = htobe16(duid_type);
+ memcpy(&client->duid.raw.data, duid, duid_len);
+ client->duid_len = sizeof(client->duid.type) + duid_len;
+ } else
+ switch (duid_type) {
+ case DUID_TYPE_LLT:
+ if (client->mac_addr_len == 0)
+ return log_dhcp6_client_errno(client, SYNTHETIC_ERRNO(EOPNOTSUPP), "Failed to set DUID-LLT, MAC address is not set.");
+
+ r = dhcp_identifier_set_duid_llt(&client->duid, llt_time, client->mac_addr, client->mac_addr_len, client->arp_type, &client->duid_len);
+ if (r < 0)
+ return log_dhcp6_client_errno(client, r, "Failed to set DUID-LLT: %m");
+ break;
+ case DUID_TYPE_EN:
+ r = dhcp_identifier_set_duid_en(&client->duid, &client->duid_len);
+ if (r < 0)
+ return log_dhcp6_client_errno(client, r, "Failed to set DUID-EN: %m");
+ break;
+ case DUID_TYPE_LL:
+ if (client->mac_addr_len == 0)
+ return log_dhcp6_client_errno(client, SYNTHETIC_ERRNO(EOPNOTSUPP), "Failed to set DUID-LL, MAC address is not set.");
+
+ r = dhcp_identifier_set_duid_ll(&client->duid, client->mac_addr, client->mac_addr_len, client->arp_type, &client->duid_len);
+ if (r < 0)
+ return log_dhcp6_client_errno(client, r, "Failed to set DUID-LL: %m");
+ break;
+ case DUID_TYPE_UUID:
+ r = dhcp_identifier_set_duid_uuid(&client->duid, &client->duid_len);
+ if (r < 0)
+ return log_dhcp6_client_errno(client, r, "Failed to set DUID-UUID: %m");
+ break;
+ default:
+ return log_dhcp6_client_errno(client, SYNTHETIC_ERRNO(EINVAL), "Invalid DUID type");
+ }
+
+ return 0;
+}
+
+int sd_dhcp6_client_set_duid(
+ sd_dhcp6_client *client,
+ uint16_t duid_type,
+ const void *duid,
+ size_t duid_len) {
+ return dhcp6_client_set_duid_internal(client, duid_type, duid, duid_len, 0);
+}
+
+int sd_dhcp6_client_set_duid_llt(
+ sd_dhcp6_client *client,
+ usec_t llt_time) {
+ return dhcp6_client_set_duid_internal(client, DUID_TYPE_LLT, NULL, 0, llt_time);
+}
+
+static const char* const dhcp6_duid_type_table[_DUID_TYPE_MAX] = {
+ [DUID_TYPE_LLT] = "DUID-LLT",
+ [DUID_TYPE_EN] = "DUID-EN/Vendor",
+ [DUID_TYPE_LL] = "DUID-LL",
+ [DUID_TYPE_UUID] = "UUID",
+};
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(dhcp6_duid_type, DUIDType);
+
+int sd_dhcp6_client_duid_as_string(
+ sd_dhcp6_client *client,
+ char **duid) {
+ _cleanup_free_ char *p = NULL, *s = NULL, *t = NULL;
+ const char *v;
+ int r;
+
+ assert_return(client, -EINVAL);
+ assert_return(client->duid_len > 0, -ENODATA);
+
+ v = dhcp6_duid_type_to_string(be16toh(client->duid.type));
+ if (v) {
+ s = strdup(v);
+ if (!s)
+ return -ENOMEM;
+ } else {
+ r = asprintf(&s, "%0x", client->duid.type);
+ if (r < 0)
+ return -ENOMEM;
+ }
+
+ t = hexmem(&client->duid.raw.data, client->duid_len);
+ if (!t)
+ return -ENOMEM;
+
+ p = strjoin(s, ":", t);
+ if (!p)
+ return -ENOMEM;
+
+ *duid = TAKE_PTR(p);
+
+ return 0;
+}
+
+int sd_dhcp6_client_set_iaid(sd_dhcp6_client *client, uint32_t iaid) {
+ assert_return(client, -EINVAL);
+ assert_return(IN_SET(client->state, DHCP6_STATE_STOPPED), -EBUSY);
+
+ client->ia_na.ia_na.id = htobe32(iaid);
+ client->ia_pd.ia_pd.id = htobe32(iaid);
+ client->iaid_set = true;
+
+ return 0;
+}
+
+int sd_dhcp6_client_get_iaid(sd_dhcp6_client *client, uint32_t *iaid) {
+ assert_return(client, -EINVAL);
+ assert_return(iaid, -EINVAL);
+
+ if (!client->iaid_set)
+ return -ENODATA;
+
+ *iaid = be32toh(client->ia_na.ia_na.id);
+
+ return 0;
+}
+
+int sd_dhcp6_client_set_fqdn(
+ sd_dhcp6_client *client,
+ const char *fqdn) {
+
+ assert_return(client, -EINVAL);
+
+ /* Make sure FQDN qualifies as DNS and as Linux hostname */
+ if (fqdn &&
+ !(hostname_is_valid(fqdn, false) && dns_name_is_valid(fqdn) > 0))
+ return -EINVAL;
+
+ return free_and_strdup(&client->fqdn, fqdn);
+}
+
+int sd_dhcp6_client_set_information_request(sd_dhcp6_client *client, int enabled) {
+ assert_return(client, -EINVAL);
+ assert_return(IN_SET(client->state, DHCP6_STATE_STOPPED), -EBUSY);
+
+ client->information_request = enabled;
+
+ return 0;
+}
+
+int sd_dhcp6_client_get_information_request(sd_dhcp6_client *client, int *enabled) {
+ assert_return(client, -EINVAL);
+ assert_return(enabled, -EINVAL);
+
+ *enabled = client->information_request;
+
+ return 0;
+}
+
+int sd_dhcp6_client_set_request_option(sd_dhcp6_client *client, uint16_t option) {
+ size_t t;
+
+ assert_return(client, -EINVAL);
+ assert_return(client->state == DHCP6_STATE_STOPPED, -EBUSY);
+
+ if (option <= 0 || option >= UINT8_MAX)
+ return -EINVAL;
+
+ for (t = 0; t < client->req_opts_len; t++)
+ if (client->req_opts[t] == htobe16(option))
+ return -EEXIST;
+
+ if (!GREEDY_REALLOC(client->req_opts, client->req_opts_allocated,
+ client->req_opts_len + 1))
+ return -ENOMEM;
+
+ client->req_opts[client->req_opts_len++] = htobe16(option);
+
+ return 0;
+}
+
+int sd_dhcp6_client_set_request_mud_url(sd_dhcp6_client *client, const char *mudurl) {
+
+ assert_return(client, -EINVAL);
+ assert_return(client->state == DHCP6_STATE_STOPPED, -EBUSY);
+ assert_return(mudurl, -EINVAL);
+ assert_return(strlen(mudurl) <= UINT8_MAX, -EINVAL);
+ assert_return(http_url_is_valid(mudurl), -EINVAL);
+
+ return free_and_strdup(&client->mudurl, mudurl);
+}
+
+int sd_dhcp6_client_set_request_user_class(sd_dhcp6_client *client, char * const *user_class) {
+ char * const *p;
+ char **s;
+
+ assert_return(client, -EINVAL);
+ assert_return(client->state == DHCP6_STATE_STOPPED, -EBUSY);
+ assert_return(!strv_isempty(user_class), -EINVAL);
+
+ STRV_FOREACH(p, user_class) {
+ size_t len = strlen(*p);
+
+ if (len > UINT16_MAX || len == 0)
+ return -EINVAL;
+ }
+
+ s = strv_copy(user_class);
+ if (!s)
+ return -ENOMEM;
+
+ return strv_free_and_replace(client->user_class, s);
+}
+
+int sd_dhcp6_client_set_request_vendor_class(sd_dhcp6_client *client, char * const *vendor_class) {
+ char * const *p;
+ char **s;
+
+ assert_return(client, -EINVAL);
+ assert_return(client->state == DHCP6_STATE_STOPPED, -EBUSY);
+ assert_return(!strv_isempty(vendor_class), -EINVAL);
+
+ STRV_FOREACH(p, vendor_class) {
+ size_t len = strlen(*p);
+
+ if (len > UINT16_MAX || len == 0)
+ return -EINVAL;
+ }
+
+ s = strv_copy(vendor_class);
+ if (!s)
+ return -ENOMEM;
+
+ return strv_free_and_replace(client->vendor_class, s);
+}
+
+int sd_dhcp6_client_get_prefix_delegation(sd_dhcp6_client *client, int *delegation) {
+ assert_return(client, -EINVAL);
+ assert_return(delegation, -EINVAL);
+
+ *delegation = FLAGS_SET(client->request, DHCP6_REQUEST_IA_PD);
+
+ return 0;
+}
+
+int sd_dhcp6_client_set_prefix_delegation(sd_dhcp6_client *client, int delegation) {
+ assert_return(client, -EINVAL);
+
+ SET_FLAG(client->request, DHCP6_REQUEST_IA_PD, delegation);
+
+ return 0;
+}
+
+int sd_dhcp6_client_get_address_request(sd_dhcp6_client *client, int *request) {
+ assert_return(client, -EINVAL);
+ assert_return(request, -EINVAL);
+
+ *request = FLAGS_SET(client->request, DHCP6_REQUEST_IA_NA);
+
+ return 0;
+}
+
+int sd_dhcp6_client_set_address_request(sd_dhcp6_client *client, int request) {
+ assert_return(client, -EINVAL);
+
+ SET_FLAG(client->request, DHCP6_REQUEST_IA_NA, request);
+
+ return 0;
+}
+
+int sd_dhcp6_client_set_transaction_id(sd_dhcp6_client *client, uint32_t transaction_id) {
+ assert_return(client, -EINVAL);
+
+ client->transaction_id = transaction_id;
+
+ return 0;
+}
+
+int sd_dhcp6_client_get_lease(sd_dhcp6_client *client, sd_dhcp6_lease **ret) {
+ assert_return(client, -EINVAL);
+
+ if (!client->lease)
+ return -ENOMSG;
+
+ if (ret)
+ *ret = client->lease;
+
+ return 0;
+}
+
+int sd_dhcp6_client_add_option(sd_dhcp6_client *client, sd_dhcp6_option *v) {
+ int r;
+
+ assert_return(client, -EINVAL);
+ assert_return(v, -EINVAL);
+
+ r = ordered_hashmap_ensure_allocated(&client->extra_options, &dhcp6_option_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = ordered_hashmap_put(client->extra_options, UINT_TO_PTR(v->option), v);
+ if (r < 0)
+ return r;
+
+ sd_dhcp6_option_ref(v);
+ return 0;
+}
+
+static void client_notify(sd_dhcp6_client *client, int event) {
+ assert(client);
+
+ if (client->callback)
+ client->callback(client, event, client->userdata);
+}
+
+static int client_reset(sd_dhcp6_client *client) {
+ assert(client);
+
+ client->lease = sd_dhcp6_lease_unref(client->lease);
+
+ client->receive_message =
+ sd_event_source_unref(client->receive_message);
+
+ client->transaction_id = 0;
+ client->transaction_start = 0;
+
+ client->retransmit_time = 0;
+ client->retransmit_count = 0;
+
+ (void) event_source_disable(client->timeout_resend);
+ (void) event_source_disable(client->timeout_resend_expire);
+ (void) event_source_disable(client->timeout_t1);
+ (void) event_source_disable(client->timeout_t2);
+
+ client->state = DHCP6_STATE_STOPPED;
+
+ return 0;
+}
+
+static void client_stop(sd_dhcp6_client *client, int error) {
+ DHCP6_CLIENT_DONT_DESTROY(client);
+
+ assert(client);
+
+ client_notify(client, error);
+
+ client_reset(client);
+}
+
+static int client_send_message(sd_dhcp6_client *client, usec_t time_now) {
+ _cleanup_free_ DHCP6Message *message = NULL;
+ struct in6_addr all_servers =
+ IN6ADDR_ALL_DHCP6_RELAY_AGENTS_AND_SERVERS_INIT;
+ struct sd_dhcp6_option *j;
+ size_t len, optlen = 512;
+ uint8_t *opt;
+ int r;
+ usec_t elapsed_usec;
+ be16_t elapsed_time;
+
+ assert(client);
+
+ len = sizeof(DHCP6Message) + optlen;
+
+ message = malloc0(len);
+ if (!message)
+ return -ENOMEM;
+
+ opt = (uint8_t *)(message + 1);
+
+ message->transaction_id = client->transaction_id;
+
+ switch(client->state) {
+ case DHCP6_STATE_INFORMATION_REQUEST:
+ message->type = DHCP6_INFORMATION_REQUEST;
+
+ if (client->mudurl) {
+ r = dhcp6_option_append(&opt, &optlen,
+ SD_DHCP6_OPTION_MUD_URL, strlen(client->mudurl),
+ client->mudurl);
+ if (r < 0)
+ return r;
+ }
+
+ break;
+
+ case DHCP6_STATE_SOLICITATION:
+ message->type = DHCP6_SOLICIT;
+
+ r = dhcp6_option_append(&opt, &optlen,
+ SD_DHCP6_OPTION_RAPID_COMMIT, 0, NULL);
+ if (r < 0)
+ return r;
+
+ if (FLAGS_SET(client->request, DHCP6_REQUEST_IA_NA)) {
+ r = dhcp6_option_append_ia(&opt, &optlen,
+ &client->ia_na);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->fqdn) {
+ r = dhcp6_option_append_fqdn(&opt, &optlen, client->fqdn);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->mudurl) {
+ r = dhcp6_option_append(&opt, &optlen,
+ SD_DHCP6_OPTION_MUD_URL, strlen(client->mudurl),
+ client->mudurl);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->user_class) {
+ r = dhcp6_option_append_user_class(&opt, &optlen, client->user_class);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->vendor_class) {
+ r = dhcp6_option_append_vendor_class(&opt, &optlen, client->vendor_class);
+ if (r < 0)
+ return r;
+ }
+
+ if (!ordered_hashmap_isempty(client->vendor_options)) {
+ r = dhcp6_option_append_vendor_option(&opt, &optlen,
+ client->vendor_options);
+ if (r < 0)
+ return r;
+ }
+
+ if (FLAGS_SET(client->request, DHCP6_REQUEST_IA_PD)) {
+ r = dhcp6_option_append_pd(opt, optlen, &client->ia_pd, &client->hint_pd_prefix);
+ if (r < 0)
+ return r;
+
+ opt += r;
+ optlen -= r;
+ }
+
+ break;
+
+ case DHCP6_STATE_REQUEST:
+ case DHCP6_STATE_RENEW:
+
+ if (client->state == DHCP6_STATE_REQUEST)
+ message->type = DHCP6_REQUEST;
+ else
+ message->type = DHCP6_RENEW;
+
+ r = dhcp6_option_append(&opt, &optlen, SD_DHCP6_OPTION_SERVERID,
+ client->lease->serverid_len,
+ client->lease->serverid);
+ if (r < 0)
+ return r;
+
+ if (FLAGS_SET(client->request, DHCP6_REQUEST_IA_NA) && client->lease->ia.addresses) {
+ r = dhcp6_option_append_ia(&opt, &optlen,
+ &client->lease->ia);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->fqdn) {
+ r = dhcp6_option_append_fqdn(&opt, &optlen, client->fqdn);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->mudurl) {
+ r = dhcp6_option_append(&opt, &optlen,
+ SD_DHCP6_OPTION_MUD_URL, strlen(client->mudurl),
+ client->mudurl);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->user_class) {
+ r = dhcp6_option_append_user_class(&opt, &optlen, client->user_class);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->vendor_class) {
+ r = dhcp6_option_append_vendor_class(&opt, &optlen, client->vendor_class);
+ if (r < 0)
+ return r;
+ }
+
+ if (!ordered_hashmap_isempty(client->vendor_options)) {
+ r = dhcp6_option_append_vendor_option(&opt, &optlen, client->vendor_options);
+ if (r < 0)
+ return r;
+ }
+
+ if (FLAGS_SET(client->request, DHCP6_REQUEST_IA_PD) && client->lease->pd.addresses) {
+ r = dhcp6_option_append_pd(opt, optlen, &client->lease->pd, NULL);
+ if (r < 0)
+ return r;
+
+ opt += r;
+ optlen -= r;
+ }
+
+ break;
+
+ case DHCP6_STATE_REBIND:
+ message->type = DHCP6_REBIND;
+
+ if (FLAGS_SET(client->request, DHCP6_REQUEST_IA_NA)) {
+ r = dhcp6_option_append_ia(&opt, &optlen, &client->lease->ia);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->fqdn) {
+ r = dhcp6_option_append_fqdn(&opt, &optlen, client->fqdn);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->mudurl) {
+ r = dhcp6_option_append(&opt, &optlen,
+ SD_DHCP6_OPTION_MUD_URL, strlen(client->mudurl),
+ client->mudurl);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->user_class) {
+ r = dhcp6_option_append_user_class(&opt, &optlen, client->user_class);
+ if (r < 0)
+ return r;
+ }
+
+ if (client->vendor_class) {
+ r = dhcp6_option_append_vendor_class(&opt, &optlen, client->vendor_class);
+ if (r < 0)
+ return r;
+ }
+
+ if (!ordered_hashmap_isempty(client->vendor_options)) {
+ r = dhcp6_option_append_vendor_option(&opt, &optlen, client->vendor_options);
+ if (r < 0)
+ return r;
+ }
+
+ if (FLAGS_SET(client->request, DHCP6_REQUEST_IA_PD)) {
+ r = dhcp6_option_append_pd(opt, optlen, &client->lease->pd, NULL);
+ if (r < 0)
+ return r;
+
+ opt += r;
+ optlen -= r;
+ }
+
+ break;
+
+ case DHCP6_STATE_STOPPED:
+ case DHCP6_STATE_BOUND:
+ return -EINVAL;
+ }
+
+ r = dhcp6_option_append(&opt, &optlen, SD_DHCP6_OPTION_ORO,
+ client->req_opts_len * sizeof(be16_t),
+ client->req_opts);
+ if (r < 0)
+ return r;
+
+ assert(client->duid_len);
+ r = dhcp6_option_append(&opt, &optlen, SD_DHCP6_OPTION_CLIENTID,
+ client->duid_len, &client->duid);
+ if (r < 0)
+ return r;
+
+ elapsed_usec = time_now - client->transaction_start;
+ if (elapsed_usec < 0xffff * USEC_PER_MSEC * 10)
+ elapsed_time = htobe16(elapsed_usec / USEC_PER_MSEC / 10);
+ else
+ elapsed_time = 0xffff;
+
+ r = dhcp6_option_append(&opt, &optlen, SD_DHCP6_OPTION_ELAPSED_TIME,
+ sizeof(elapsed_time), &elapsed_time);
+ if (r < 0)
+ return r;
+
+ ORDERED_HASHMAP_FOREACH(j, client->extra_options) {
+ r = dhcp6_option_append(&opt, &optlen, j->option, j->length, j->data);
+ if (r < 0)
+ return r;
+ }
+
+ r = dhcp6_network_send_udp_socket(client->fd, &all_servers, message,
+ len - optlen);
+ if (r < 0)
+ return r;
+
+ log_dhcp6_client(client, "Sent %s",
+ dhcp6_message_type_to_string(message->type));
+
+ return 0;
+}
+
+static int client_timeout_t2(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_dhcp6_client *client = userdata;
+
+ assert(s);
+ assert(client);
+ assert(client->lease);
+
+ (void) event_source_disable(client->timeout_t2);
+
+ log_dhcp6_client(client, "Timeout T2");
+
+ client_start(client, DHCP6_STATE_REBIND);
+
+ return 0;
+}
+
+static int client_timeout_t1(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_dhcp6_client *client = userdata;
+
+ assert(s);
+ assert(client);
+ assert(client->lease);
+
+ (void) event_source_disable(client->timeout_t1);
+
+ log_dhcp6_client(client, "Timeout T1");
+
+ client_start(client, DHCP6_STATE_RENEW);
+
+ return 0;
+}
+
+static int client_timeout_resend_expire(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_dhcp6_client *client = userdata;
+ DHCP6_CLIENT_DONT_DESTROY(client);
+ enum DHCP6State state;
+
+ assert(s);
+ assert(client);
+ assert(client->event);
+
+ state = client->state;
+
+ client_stop(client, SD_DHCP6_CLIENT_EVENT_RESEND_EXPIRE);
+
+ /* RFC 3315, section 18.1.4., says that "...the client may choose to
+ use a Solicit message to locate a new DHCP server..." */
+ if (state == DHCP6_STATE_REBIND)
+ client_start(client, DHCP6_STATE_SOLICITATION);
+
+ return 0;
+}
+
+static usec_t client_timeout_compute_random(usec_t val) {
+ return val - (random_u32() % USEC_PER_SEC) * val / 10 / USEC_PER_SEC;
+}
+
+static int client_timeout_resend(sd_event_source *s, uint64_t usec, void *userdata) {
+ int r = 0;
+ sd_dhcp6_client *client = userdata;
+ usec_t time_now, init_retransmit_time = 0, max_retransmit_time = 0;
+ usec_t max_retransmit_duration = 0;
+ uint8_t max_retransmit_count = 0;
+ char time_string[FORMAT_TIMESPAN_MAX];
+
+ assert(s);
+ assert(client);
+ assert(client->event);
+
+ (void) event_source_disable(client->timeout_resend);
+
+ switch (client->state) {
+ case DHCP6_STATE_INFORMATION_REQUEST:
+ init_retransmit_time = DHCP6_INF_TIMEOUT;
+ max_retransmit_time = DHCP6_INF_MAX_RT;
+
+ break;
+
+ case DHCP6_STATE_SOLICITATION:
+
+ if (client->retransmit_count && client->lease) {
+ client_start(client, DHCP6_STATE_REQUEST);
+ return 0;
+ }
+
+ init_retransmit_time = DHCP6_SOL_TIMEOUT;
+ max_retransmit_time = DHCP6_SOL_MAX_RT;
+
+ break;
+
+ case DHCP6_STATE_REQUEST:
+ init_retransmit_time = DHCP6_REQ_TIMEOUT;
+ max_retransmit_time = DHCP6_REQ_MAX_RT;
+ max_retransmit_count = DHCP6_REQ_MAX_RC;
+
+ break;
+
+ case DHCP6_STATE_RENEW:
+ init_retransmit_time = DHCP6_REN_TIMEOUT;
+ max_retransmit_time = DHCP6_REN_MAX_RT;
+
+ /* RFC 3315, section 18.1.3. says max retransmit duration will
+ be the remaining time until T2. Instead of setting MRD,
+ wait for T2 to trigger with the same end result */
+
+ break;
+
+ case DHCP6_STATE_REBIND:
+ init_retransmit_time = DHCP6_REB_TIMEOUT;
+ max_retransmit_time = DHCP6_REB_MAX_RT;
+
+ if (event_source_is_enabled(client->timeout_resend_expire) <= 0) {
+ uint32_t expire = 0;
+
+ r = dhcp6_lease_ia_rebind_expire(&client->lease->ia, &expire);
+ if (r < 0) {
+ client_stop(client, r);
+ return 0;
+ }
+ max_retransmit_duration = expire * USEC_PER_SEC;
+ }
+
+ break;
+
+ case DHCP6_STATE_STOPPED:
+ case DHCP6_STATE_BOUND:
+ return 0;
+ }
+
+ if (max_retransmit_count > 0 &&
+ client->retransmit_count >= max_retransmit_count) {
+ client_stop(client, SD_DHCP6_CLIENT_EVENT_RETRANS_MAX);
+ return 0;
+ }
+
+ r = sd_event_now(client->event, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ goto error;
+
+ r = client_send_message(client, time_now);
+ if (r >= 0)
+ client->retransmit_count++;
+
+ if (client->retransmit_time == 0) {
+ client->retransmit_time =
+ client_timeout_compute_random(init_retransmit_time);
+
+ if (client->state == DHCP6_STATE_SOLICITATION)
+ client->retransmit_time += init_retransmit_time / 10;
+
+ } else {
+ if (max_retransmit_time > 0 &&
+ client->retransmit_time > max_retransmit_time / 2)
+ client->retransmit_time = client_timeout_compute_random(max_retransmit_time);
+ else
+ client->retransmit_time += client_timeout_compute_random(client->retransmit_time);
+ }
+
+ log_dhcp6_client(client, "Next retransmission in %s",
+ format_timespan(time_string, FORMAT_TIMESPAN_MAX, client->retransmit_time, USEC_PER_SEC));
+
+ r = event_reset_time(client->event, &client->timeout_resend,
+ clock_boottime_or_monotonic(),
+ time_now + client->retransmit_time, 10 * USEC_PER_MSEC,
+ client_timeout_resend, client,
+ client->event_priority, "dhcp6-resend-timer", true);
+ if (r < 0)
+ goto error;
+
+ if (max_retransmit_duration > 0 && event_source_is_enabled(client->timeout_resend_expire) <= 0) {
+
+ log_dhcp6_client(client, "Max retransmission duration %"PRIu64" secs",
+ max_retransmit_duration / USEC_PER_SEC);
+
+ r = event_reset_time(client->event, &client->timeout_resend_expire,
+ clock_boottime_or_monotonic(),
+ time_now + max_retransmit_duration, USEC_PER_SEC,
+ client_timeout_resend_expire, client,
+ client->event_priority, "dhcp6-resend-expire-timer", true);
+ if (r < 0)
+ goto error;
+ }
+
+error:
+ if (r < 0)
+ client_stop(client, r);
+
+ return 0;
+}
+
+static int client_ensure_iaid(sd_dhcp6_client *client) {
+ int r;
+ uint32_t iaid;
+
+ assert(client);
+
+ if (client->iaid_set)
+ return 0;
+
+ r = dhcp_identifier_set_iaid(client->ifindex, client->mac_addr, client->mac_addr_len, true, &iaid);
+ if (r < 0)
+ return r;
+
+ client->ia_na.ia_na.id = iaid;
+ client->ia_pd.ia_pd.id = iaid;
+ client->iaid_set = true;
+
+ return 0;
+}
+
+static int client_parse_message(
+ sd_dhcp6_client *client,
+ DHCP6Message *message,
+ size_t len,
+ sd_dhcp6_lease *lease) {
+
+ uint16_t ia_na_status = 0, ia_pd_status = 0;
+ uint32_t lt_t1 = ~0, lt_t2 = ~0;
+ usec_t irt = IRT_DEFAULT;
+ bool clientid = false;
+ size_t pos = 0;
+ int r;
+
+ assert(client);
+ assert(message);
+ assert(len >= sizeof(DHCP6Message));
+ assert(lease);
+
+ len -= sizeof(DHCP6Message);
+
+ while (pos < len) {
+ DHCP6Option *option = (DHCP6Option *) &message->options[pos];
+ uint16_t optcode, optlen;
+ be32_t iaid_lease;
+ int status;
+ uint8_t *optval;
+
+ if (len < pos + offsetof(DHCP6Option, data))
+ return -ENOBUFS;
+
+ optcode = be16toh(option->code);
+ optlen = be16toh(option->len);
+ optval = option->data;
+
+ if (len < pos + offsetof(DHCP6Option, data) + optlen)
+ return -ENOBUFS;
+
+ switch (optcode) {
+ case SD_DHCP6_OPTION_CLIENTID:
+ if (clientid) {
+ log_dhcp6_client(client, "%s contains multiple clientids",
+ dhcp6_message_type_to_string(message->type));
+ return -EINVAL;
+ }
+
+ if (optlen != client->duid_len ||
+ memcmp(&client->duid, optval, optlen) != 0) {
+ log_dhcp6_client(client, "%s DUID does not match",
+ dhcp6_message_type_to_string(message->type));
+
+ return -EINVAL;
+ }
+ clientid = true;
+
+ break;
+
+ case SD_DHCP6_OPTION_SERVERID:
+ r = dhcp6_lease_get_serverid(lease, NULL, NULL);
+ if (r >= 0) {
+ log_dhcp6_client(client, "%s contains multiple serverids",
+ dhcp6_message_type_to_string(message->type));
+ return -EINVAL;
+ }
+
+ r = dhcp6_lease_set_serverid(lease, optval, optlen);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_DHCP6_OPTION_PREFERENCE:
+ if (optlen != 1)
+ return -EINVAL;
+
+ r = dhcp6_lease_set_preference(lease, optval[0]);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_DHCP6_OPTION_STATUS_CODE:
+ status = dhcp6_option_parse_status(option, optlen + sizeof(DHCP6Option));
+ if (status < 0)
+ return status;
+
+ if (status > 0) {
+ log_dhcp6_client(client, "%s Status %s",
+ dhcp6_message_type_to_string(message->type),
+ dhcp6_message_status_to_string(status));
+
+ return -EINVAL;
+ }
+
+ break;
+
+ case SD_DHCP6_OPTION_IA_NA:
+ if (client->state == DHCP6_STATE_INFORMATION_REQUEST) {
+ log_dhcp6_client(client, "Information request ignoring IA NA option");
+
+ break;
+ }
+
+ r = dhcp6_option_parse_ia(option, &lease->ia, &ia_na_status);
+ if (r < 0 && r != -ENOMSG)
+ return r;
+
+ if (ia_na_status == DHCP6_STATUS_NO_ADDRS_AVAIL) {
+ pos += offsetof(DHCP6Option, data) + optlen;
+ continue;
+ }
+
+ r = dhcp6_lease_get_iaid(lease, &iaid_lease);
+ if (r < 0)
+ return r;
+
+ if (client->ia_na.ia_na.id != iaid_lease) {
+ log_dhcp6_client(client, "%s has wrong IAID for IA NA",
+ dhcp6_message_type_to_string(message->type));
+ return -EINVAL;
+ }
+
+ if (lease->ia.addresses) {
+ lt_t1 = MIN(lt_t1, be32toh(lease->ia.ia_na.lifetime_t1));
+ lt_t2 = MIN(lt_t2, be32toh(lease->ia.ia_na.lifetime_t1));
+ }
+
+ break;
+
+ case SD_DHCP6_OPTION_IA_PD:
+ if (client->state == DHCP6_STATE_INFORMATION_REQUEST) {
+ log_dhcp6_client(client, "Information request ignoring IA PD option");
+
+ break;
+ }
+
+ r = dhcp6_option_parse_ia(option, &lease->pd, &ia_pd_status);
+ if (r < 0 && r != -ENOMSG)
+ return r;
+
+ if (ia_pd_status == DHCP6_STATUS_NO_PREFIX_AVAIL) {
+ pos += offsetof(DHCP6Option, data) + optlen;
+ continue;
+ }
+
+ r = dhcp6_lease_get_pd_iaid(lease, &iaid_lease);
+ if (r < 0)
+ return r;
+
+ if (client->ia_pd.ia_pd.id != iaid_lease) {
+ log_dhcp6_client(client, "%s has wrong IAID for IA PD",
+ dhcp6_message_type_to_string(message->type));
+ return -EINVAL;
+ }
+
+ if (lease->pd.addresses) {
+ lt_t1 = MIN(lt_t1, be32toh(lease->pd.ia_pd.lifetime_t1));
+ lt_t2 = MIN(lt_t2, be32toh(lease->pd.ia_pd.lifetime_t2));
+ }
+
+ break;
+
+ case SD_DHCP6_OPTION_RAPID_COMMIT:
+ r = dhcp6_lease_set_rapid_commit(lease);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_DHCP6_OPTION_DNS_SERVERS:
+ r = dhcp6_lease_set_dns(lease, optval, optlen);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_DHCP6_OPTION_DOMAIN_LIST:
+ r = dhcp6_lease_set_domains(lease, optval, optlen);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_DHCP6_OPTION_NTP_SERVER:
+ r = dhcp6_lease_set_ntp(lease, optval, optlen);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_DHCP6_OPTION_SNTP_SERVERS:
+ r = dhcp6_lease_set_sntp(lease, optval, optlen);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_DHCP6_OPTION_FQDN:
+ r = dhcp6_lease_set_fqdn(lease, optval, optlen);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_DHCP6_OPTION_INFORMATION_REFRESH_TIME:
+ if (optlen != 4)
+ return -EINVAL;
+
+ irt = unaligned_read_be32((be32_t *) optval) * USEC_PER_SEC;
+ break;
+ }
+
+ pos += offsetof(DHCP6Option, data) + optlen;
+ }
+
+ if (ia_na_status > 0 && ia_pd_status > 0) {
+ log_dhcp6_client(client, "No IA_PD prefix or IA_NA address received. Ignoring.");
+ return -EINVAL;
+ }
+
+ if (!clientid) {
+ log_dhcp6_client(client, "%s has incomplete options",
+ dhcp6_message_type_to_string(message->type));
+ return -EINVAL;
+ }
+
+ if (client->state != DHCP6_STATE_INFORMATION_REQUEST) {
+ r = dhcp6_lease_get_serverid(lease, NULL, NULL);
+ if (r < 0) {
+ log_dhcp6_client(client, "%s has no server id",
+ dhcp6_message_type_to_string(message->type));
+ return -EINVAL;
+ }
+
+ } else {
+ if (lease->ia.addresses) {
+ lease->ia.ia_na.lifetime_t1 = htobe32(lt_t1);
+ lease->ia.ia_na.lifetime_t2 = htobe32(lt_t2);
+ }
+
+ if (lease->pd.addresses) {
+ lease->pd.ia_pd.lifetime_t1 = htobe32(lt_t1);
+ lease->pd.ia_pd.lifetime_t2 = htobe32(lt_t2);
+ }
+ }
+
+ client->information_refresh_time_usec = MAX(irt, IRT_MINIMUM);
+
+ return 0;
+}
+
+static int client_receive_reply(sd_dhcp6_client *client, DHCP6Message *reply, size_t len) {
+ _cleanup_(sd_dhcp6_lease_unrefp) sd_dhcp6_lease *lease = NULL;
+ bool rapid_commit;
+ int r;
+
+ assert(client);
+ assert(reply);
+
+ if (reply->type != DHCP6_REPLY)
+ return 0;
+
+ r = dhcp6_lease_new(&lease);
+ if (r < 0)
+ return -ENOMEM;
+
+ r = client_parse_message(client, reply, len, lease);
+ if (r < 0)
+ return r;
+
+ if (client->state == DHCP6_STATE_SOLICITATION) {
+ r = dhcp6_lease_get_rapid_commit(lease, &rapid_commit);
+ if (r < 0)
+ return r;
+
+ if (!rapid_commit)
+ return 0;
+ }
+
+ sd_dhcp6_lease_unref(client->lease);
+ client->lease = TAKE_PTR(lease);
+
+ return DHCP6_STATE_BOUND;
+}
+
+static int client_receive_advertise(sd_dhcp6_client *client, DHCP6Message *advertise, size_t len) {
+ _cleanup_(sd_dhcp6_lease_unrefp) sd_dhcp6_lease *lease = NULL;
+ uint8_t pref_advertise = 0, pref_lease = 0;
+ int r;
+
+ if (advertise->type != DHCP6_ADVERTISE)
+ return 0;
+
+ r = dhcp6_lease_new(&lease);
+ if (r < 0)
+ return r;
+
+ r = client_parse_message(client, advertise, len, lease);
+ if (r < 0)
+ return r;
+
+ r = dhcp6_lease_get_preference(lease, &pref_advertise);
+ if (r < 0)
+ return r;
+
+ r = dhcp6_lease_get_preference(client->lease, &pref_lease);
+
+ if (r < 0 || pref_advertise > pref_lease) {
+ sd_dhcp6_lease_unref(client->lease);
+ client->lease = TAKE_PTR(lease);
+ r = 0;
+ }
+
+ if (pref_advertise == 255 || client->retransmit_count > 1)
+ r = DHCP6_STATE_REQUEST;
+
+ return r;
+}
+
+static int client_receive_message(
+ sd_event_source *s,
+ int fd, uint32_t
+ revents,
+ void *userdata) {
+
+ sd_dhcp6_client *client = userdata;
+ DHCP6_CLIENT_DONT_DESTROY(client);
+ _cleanup_free_ DHCP6Message *message = NULL;
+ ssize_t buflen, len;
+ int r = 0;
+
+ assert(s);
+ assert(client);
+ assert(client->event);
+
+ buflen = next_datagram_size_fd(fd);
+ if (buflen == -ENETDOWN)
+ /* the link is down. Don't return an error or the I/O event
+ source will be disconnected and we won't be able to receive
+ packets again when the link comes back. */
+ return 0;
+ if (buflen < 0)
+ return buflen;
+
+ message = malloc(buflen);
+ if (!message)
+ return -ENOMEM;
+
+ len = recv(fd, message, buflen, 0);
+ if (len < 0) {
+ /* see comment above for why we shouldn't error out on ENETDOWN. */
+ if (IN_SET(errno, EAGAIN, EINTR, ENETDOWN))
+ return 0;
+
+ return log_dhcp6_client_errno(client, errno, "Could not receive message from UDP socket: %m");
+
+ }
+ if ((size_t) len < sizeof(DHCP6Message)) {
+ log_dhcp6_client(client, "Too small to be DHCP6 message: ignoring");
+ return 0;
+ }
+
+ switch(message->type) {
+ case DHCP6_SOLICIT:
+ case DHCP6_REQUEST:
+ case DHCP6_CONFIRM:
+ case DHCP6_RENEW:
+ case DHCP6_REBIND:
+ case DHCP6_RELEASE:
+ case DHCP6_DECLINE:
+ case DHCP6_INFORMATION_REQUEST:
+ case DHCP6_RELAY_FORW:
+ case DHCP6_RELAY_REPL:
+ return 0;
+
+ case DHCP6_ADVERTISE:
+ case DHCP6_REPLY:
+ case DHCP6_RECONFIGURE:
+ break;
+
+ default:
+ log_dhcp6_client(client, "Unknown message type %d", message->type);
+ return 0;
+ }
+
+ if (client->transaction_id != (message->transaction_id &
+ htobe32(0x00ffffff)))
+ return 0;
+
+ switch (client->state) {
+ case DHCP6_STATE_INFORMATION_REQUEST:
+ r = client_receive_reply(client, message, len);
+ if (r < 0)
+ return 0;
+
+ client_notify(client, SD_DHCP6_CLIENT_EVENT_INFORMATION_REQUEST);
+
+ client_start(client, DHCP6_STATE_STOPPED);
+
+ break;
+
+ case DHCP6_STATE_SOLICITATION:
+ r = client_receive_advertise(client, message, len);
+
+ if (r == DHCP6_STATE_REQUEST) {
+ client_start(client, r);
+
+ break;
+ }
+
+ _fallthrough_; /* for Solicitation Rapid Commit option check */
+ case DHCP6_STATE_REQUEST:
+ case DHCP6_STATE_RENEW:
+ case DHCP6_STATE_REBIND:
+
+ r = client_receive_reply(client, message, len);
+ if (r < 0)
+ return 0;
+
+ if (r == DHCP6_STATE_BOUND) {
+
+ r = client_start(client, DHCP6_STATE_BOUND);
+ if (r < 0) {
+ client_stop(client, r);
+ return 0;
+ }
+
+ client_notify(client, SD_DHCP6_CLIENT_EVENT_IP_ACQUIRE);
+ }
+
+ break;
+
+ case DHCP6_STATE_BOUND:
+
+ break;
+
+ case DHCP6_STATE_STOPPED:
+ return 0;
+ }
+
+ log_dhcp6_client(client, "Recv %s",
+ dhcp6_message_type_to_string(message->type));
+
+ return 0;
+}
+
+static int client_get_lifetime(sd_dhcp6_client *client, uint32_t *lifetime_t1,
+ uint32_t *lifetime_t2) {
+ assert_return(client, -EINVAL);
+ assert_return(client->lease, -EINVAL);
+
+ if (FLAGS_SET(client->request, DHCP6_REQUEST_IA_NA) && client->lease->ia.addresses) {
+ *lifetime_t1 = be32toh(client->lease->ia.ia_na.lifetime_t1);
+ *lifetime_t2 = be32toh(client->lease->ia.ia_na.lifetime_t2);
+
+ return 0;
+ }
+
+ if (FLAGS_SET(client->request, DHCP6_REQUEST_IA_PD) && client->lease->pd.addresses) {
+ *lifetime_t1 = be32toh(client->lease->pd.ia_pd.lifetime_t1);
+ *lifetime_t2 = be32toh(client->lease->pd.ia_pd.lifetime_t2);
+
+ return 0;
+ }
+
+ return -ENOMSG;
+}
+
+static int client_start(sd_dhcp6_client *client, enum DHCP6State state) {
+ int r;
+ usec_t timeout, time_now;
+ char time_string[FORMAT_TIMESPAN_MAX];
+ uint32_t lifetime_t1, lifetime_t2;
+
+ assert_return(client, -EINVAL);
+ assert_return(client->event, -EINVAL);
+ assert_return(client->ifindex > 0, -EINVAL);
+ assert_return(client->state != state, -EINVAL);
+
+ (void) event_source_disable(client->timeout_resend_expire);
+ (void) event_source_disable(client->timeout_resend);
+ client->retransmit_time = 0;
+ client->retransmit_count = 0;
+
+ r = sd_event_now(client->event, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return r;
+
+ if (!client->receive_message) {
+ r = sd_event_add_io(client->event, &client->receive_message,
+ client->fd, EPOLLIN, client_receive_message,
+ client);
+ if (r < 0)
+ goto error;
+
+ r = sd_event_source_set_priority(client->receive_message,
+ client->event_priority);
+ if (r < 0)
+ goto error;
+
+ r = sd_event_source_set_description(client->receive_message,
+ "dhcp6-receive-message");
+ if (r < 0)
+ goto error;
+ }
+
+ switch (state) {
+ case DHCP6_STATE_STOPPED:
+ if (client->state == DHCP6_STATE_INFORMATION_REQUEST) {
+ client->state = DHCP6_STATE_STOPPED;
+
+ return 0;
+ }
+
+ _fallthrough_;
+ case DHCP6_STATE_SOLICITATION:
+ client->state = DHCP6_STATE_SOLICITATION;
+
+ break;
+
+ case DHCP6_STATE_INFORMATION_REQUEST:
+ case DHCP6_STATE_REQUEST:
+ case DHCP6_STATE_RENEW:
+ case DHCP6_STATE_REBIND:
+
+ client->state = state;
+
+ break;
+
+ case DHCP6_STATE_BOUND:
+
+ r = client_get_lifetime(client, &lifetime_t1, &lifetime_t2);
+ if (r < 0)
+ goto error;
+
+ if (lifetime_t1 == 0xffffffff || lifetime_t2 == 0xffffffff) {
+ log_dhcp6_client(client, "Infinite T1 0x%08x or T2 0x%08x",
+ lifetime_t1, lifetime_t2);
+
+ return 0;
+ }
+
+ timeout = client_timeout_compute_random(lifetime_t1 * USEC_PER_SEC);
+
+ log_dhcp6_client(client, "T1 expires in %s",
+ format_timespan(time_string, FORMAT_TIMESPAN_MAX, timeout, USEC_PER_SEC));
+
+ r = event_reset_time(client->event, &client->timeout_t1,
+ clock_boottime_or_monotonic(),
+ time_now + timeout, 10 * USEC_PER_SEC,
+ client_timeout_t1, client,
+ client->event_priority, "dhcp6-t1-timeout", true);
+ if (r < 0)
+ goto error;
+
+ timeout = client_timeout_compute_random(lifetime_t2 * USEC_PER_SEC);
+
+ log_dhcp6_client(client, "T2 expires in %s",
+ format_timespan(time_string, FORMAT_TIMESPAN_MAX, timeout, USEC_PER_SEC));
+
+ r = event_reset_time(client->event, &client->timeout_t2,
+ clock_boottime_or_monotonic(),
+ time_now + timeout, 10 * USEC_PER_SEC,
+ client_timeout_t2, client,
+ client->event_priority, "dhcp6-t2-timeout", true);
+ if (r < 0)
+ goto error;
+
+ client->state = state;
+
+ return 0;
+ }
+
+ client->transaction_id = random_u32() & htobe32(0x00ffffff);
+ client->transaction_start = time_now;
+
+ r = event_reset_time(client->event, &client->timeout_resend,
+ clock_boottime_or_monotonic(),
+ 0, 0,
+ client_timeout_resend, client,
+ client->event_priority, "dhcp6-resend-timeout", true);
+ if (r < 0)
+ goto error;
+
+ return 0;
+
+ error:
+ client_reset(client);
+ return r;
+}
+
+int sd_dhcp6_client_stop(sd_dhcp6_client *client) {
+ if (!client)
+ return 0;
+
+ client_stop(client, SD_DHCP6_CLIENT_EVENT_STOP);
+
+ client->fd = safe_close(client->fd);
+
+ return 0;
+}
+
+int sd_dhcp6_client_is_running(sd_dhcp6_client *client) {
+ assert_return(client, -EINVAL);
+
+ return client->state != DHCP6_STATE_STOPPED;
+}
+
+int sd_dhcp6_client_start(sd_dhcp6_client *client) {
+ enum DHCP6State state = DHCP6_STATE_SOLICITATION;
+ int r;
+
+ assert_return(client, -EINVAL);
+ assert_return(client->event, -EINVAL);
+ assert_return(client->ifindex > 0, -EINVAL);
+ assert_return(in_addr_is_link_local(AF_INET6, (const union in_addr_union *) &client->local_address) > 0, -EINVAL);
+
+ if (!IN_SET(client->state, DHCP6_STATE_STOPPED))
+ return -EBUSY;
+
+ if (!client->information_request && !client->request)
+ return -EINVAL;
+
+ r = client_reset(client);
+ if (r < 0)
+ return r;
+
+ r = client_ensure_iaid(client);
+ if (r < 0)
+ return r;
+
+ r = client_ensure_duid(client);
+ if (r < 0)
+ return r;
+
+ if (client->fd < 0) {
+ r = dhcp6_network_bind_udp_socket(client->ifindex, &client->local_address);
+ if (r < 0) {
+ _cleanup_free_ char *p = NULL;
+
+ (void) in_addr_to_string(AF_INET6, (const union in_addr_union*) &client->local_address, &p);
+ return log_dhcp6_client_errno(client, r,
+ "Failed to bind to UDP socket at address %s: %m", strna(p));
+ }
+
+ client->fd = r;
+ }
+
+ if (client->information_request) {
+ usec_t t = now(CLOCK_MONOTONIC);
+
+ if (t < usec_add(client->information_request_time_usec, client->information_refresh_time_usec))
+ return 0;
+
+ client->information_request_time_usec = t;
+ state = DHCP6_STATE_INFORMATION_REQUEST;
+ }
+
+ log_dhcp6_client(client, "Started in %s mode",
+ client->information_request ? "Information request" : "Managed");
+
+ return client_start(client, state);
+}
+
+int sd_dhcp6_client_attach_event(sd_dhcp6_client *client, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(client, -EINVAL);
+ assert_return(!client->event, -EBUSY);
+
+ if (event)
+ client->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&client->event);
+ if (r < 0)
+ return 0;
+ }
+
+ client->event_priority = priority;
+
+ return 0;
+}
+
+int sd_dhcp6_client_detach_event(sd_dhcp6_client *client) {
+ assert_return(client, -EINVAL);
+
+ client->event = sd_event_unref(client->event);
+
+ return 0;
+}
+
+sd_event *sd_dhcp6_client_get_event(sd_dhcp6_client *client) {
+ assert_return(client, NULL);
+
+ return client->event;
+}
+
+static sd_dhcp6_client *dhcp6_client_free(sd_dhcp6_client *client) {
+ assert(client);
+
+ client->timeout_resend = sd_event_source_unref(client->timeout_resend);
+ client->timeout_resend_expire = sd_event_source_unref(client->timeout_resend_expire);
+ client->timeout_t1 = sd_event_source_unref(client->timeout_t1);
+ client->timeout_t2 = sd_event_source_unref(client->timeout_t2);
+
+ client_reset(client);
+
+ client->fd = safe_close(client->fd);
+
+ sd_dhcp6_client_detach_event(client);
+
+ free(client->req_opts);
+ free(client->fqdn);
+ free(client->mudurl);
+
+ ordered_hashmap_free(client->extra_options);
+ strv_free(client->user_class);
+ strv_free(client->vendor_class);
+
+ return mfree(client);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_dhcp6_client, sd_dhcp6_client, dhcp6_client_free);
+
+int sd_dhcp6_client_new(sd_dhcp6_client **ret) {
+ _cleanup_(sd_dhcp6_client_unrefp) sd_dhcp6_client *client = NULL;
+ _cleanup_free_ be16_t *req_opts = NULL;
+ size_t t;
+
+ assert_return(ret, -EINVAL);
+
+ req_opts = new(be16_t, ELEMENTSOF(default_req_opts));
+ if (!req_opts)
+ return -ENOMEM;
+
+ for (t = 0; t < ELEMENTSOF(default_req_opts); t++)
+ req_opts[t] = htobe16(default_req_opts[t]);
+
+ client = new(sd_dhcp6_client, 1);
+ if (!client)
+ return -ENOMEM;
+
+ *client = (sd_dhcp6_client) {
+ .n_ref = 1,
+ .ia_na.type = SD_DHCP6_OPTION_IA_NA,
+ .ia_pd.type = SD_DHCP6_OPTION_IA_PD,
+ .ifindex = -1,
+ .request = DHCP6_REQUEST_IA_NA,
+ .fd = -1,
+ .req_opts_len = ELEMENTSOF(default_req_opts),
+ .hint_pd_prefix.iapdprefix.lifetime_preferred = (be32_t) -1,
+ .hint_pd_prefix.iapdprefix.lifetime_valid = (be32_t) -1,
+ .req_opts = TAKE_PTR(req_opts),
+ };
+
+ *ret = TAKE_PTR(client);
+
+ return 0;
+}
diff --git a/src/libsystemd-network/sd-dhcp6-lease.c b/src/libsystemd-network/sd-dhcp6-lease.c
new file mode 100644
index 0000000..d6f0708
--- /dev/null
+++ b/src/libsystemd-network/sd-dhcp6-lease.c
@@ -0,0 +1,433 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2014-2015 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "dhcp6-lease-internal.h"
+#include "dhcp6-protocol.h"
+#include "strv.h"
+#include "util.h"
+
+int dhcp6_lease_ia_rebind_expire(const DHCP6IA *ia, uint32_t *expire) {
+ DHCP6Address *addr;
+ uint32_t valid = 0, t;
+
+ assert_return(ia, -EINVAL);
+ assert_return(expire, -EINVAL);
+
+ LIST_FOREACH(addresses, addr, ia->addresses) {
+ t = be32toh(addr->iaaddr.lifetime_valid);
+ if (valid < t)
+ valid = t;
+ }
+
+ t = be32toh(ia->ia_na.lifetime_t2);
+ if (t > valid)
+ return -EINVAL;
+
+ *expire = valid - t;
+
+ return 0;
+}
+
+DHCP6IA *dhcp6_lease_free_ia(DHCP6IA *ia) {
+ DHCP6Address *address;
+
+ if (!ia)
+ return NULL;
+
+ while (ia->addresses) {
+ address = ia->addresses;
+
+ LIST_REMOVE(addresses, ia->addresses, address);
+
+ free(address);
+ }
+
+ return NULL;
+}
+
+int dhcp6_lease_set_serverid(sd_dhcp6_lease *lease, const uint8_t *id,
+ size_t len) {
+ uint8_t *serverid;
+
+ assert_return(lease, -EINVAL);
+ assert_return(id, -EINVAL);
+
+ serverid = memdup(id, len);
+ if (!serverid)
+ return -ENOMEM;
+
+ free_and_replace(lease->serverid, serverid);
+ lease->serverid_len = len;
+
+ return 0;
+}
+
+int dhcp6_lease_get_serverid(sd_dhcp6_lease *lease, uint8_t **id, size_t *len) {
+ assert_return(lease, -EINVAL);
+
+ if (!lease->serverid)
+ return -ENOMSG;
+
+ if (id)
+ *id = lease->serverid;
+ if (len)
+ *len = lease->serverid_len;
+
+ return 0;
+}
+
+int dhcp6_lease_set_preference(sd_dhcp6_lease *lease, uint8_t preference) {
+ assert_return(lease, -EINVAL);
+
+ lease->preference = preference;
+
+ return 0;
+}
+
+int dhcp6_lease_get_preference(sd_dhcp6_lease *lease, uint8_t *preference) {
+ assert_return(preference, -EINVAL);
+
+ if (!lease)
+ return -EINVAL;
+
+ *preference = lease->preference;
+
+ return 0;
+}
+
+int dhcp6_lease_set_rapid_commit(sd_dhcp6_lease *lease) {
+ assert_return(lease, -EINVAL);
+
+ lease->rapid_commit = true;
+
+ return 0;
+}
+
+int dhcp6_lease_get_rapid_commit(sd_dhcp6_lease *lease, bool *rapid_commit) {
+ assert_return(lease, -EINVAL);
+ assert_return(rapid_commit, -EINVAL);
+
+ *rapid_commit = lease->rapid_commit;
+
+ return 0;
+}
+
+int dhcp6_lease_get_iaid(sd_dhcp6_lease *lease, be32_t *iaid) {
+ assert_return(lease, -EINVAL);
+ assert_return(iaid, -EINVAL);
+
+ *iaid = lease->ia.ia_na.id;
+
+ return 0;
+}
+
+int dhcp6_lease_get_pd_iaid(sd_dhcp6_lease *lease, be32_t *iaid) {
+ assert_return(lease, -EINVAL);
+ assert_return(iaid, -EINVAL);
+
+ *iaid = lease->pd.ia_pd.id;
+
+ return 0;
+}
+
+int sd_dhcp6_lease_get_address(sd_dhcp6_lease *lease, struct in6_addr *addr,
+ uint32_t *lifetime_preferred,
+ uint32_t *lifetime_valid) {
+ assert_return(lease, -EINVAL);
+ assert_return(addr, -EINVAL);
+ assert_return(lifetime_preferred, -EINVAL);
+ assert_return(lifetime_valid, -EINVAL);
+
+ if (!lease->addr_iter)
+ return -ENOMSG;
+
+ memcpy(addr, &lease->addr_iter->iaaddr.address,
+ sizeof(struct in6_addr));
+ *lifetime_preferred =
+ be32toh(lease->addr_iter->iaaddr.lifetime_preferred);
+ *lifetime_valid = be32toh(lease->addr_iter->iaaddr.lifetime_valid);
+
+ lease->addr_iter = lease->addr_iter->addresses_next;
+
+ return 0;
+}
+
+void sd_dhcp6_lease_reset_address_iter(sd_dhcp6_lease *lease) {
+ if (lease)
+ lease->addr_iter = lease->ia.addresses;
+}
+
+int sd_dhcp6_lease_get_pd(sd_dhcp6_lease *lease, struct in6_addr *prefix,
+ uint8_t *prefix_len,
+ uint32_t *lifetime_preferred,
+ uint32_t *lifetime_valid) {
+ assert_return(lease, -EINVAL);
+ assert_return(prefix, -EINVAL);
+ assert_return(prefix_len, -EINVAL);
+ assert_return(lifetime_preferred, -EINVAL);
+ assert_return(lifetime_valid, -EINVAL);
+
+ if (!lease->prefix_iter)
+ return -ENOMSG;
+
+ memcpy(prefix, &lease->prefix_iter->iapdprefix.address,
+ sizeof(struct in6_addr));
+ *prefix_len = lease->prefix_iter->iapdprefix.prefixlen;
+ *lifetime_preferred =
+ be32toh(lease->prefix_iter->iapdprefix.lifetime_preferred);
+ *lifetime_valid =
+ be32toh(lease->prefix_iter->iapdprefix.lifetime_valid);
+
+ lease->prefix_iter = lease->prefix_iter->addresses_next;
+
+ return 0;
+}
+
+void sd_dhcp6_lease_reset_pd_prefix_iter(sd_dhcp6_lease *lease) {
+ if (lease)
+ lease->prefix_iter = lease->pd.addresses;
+}
+
+int dhcp6_lease_set_dns(sd_dhcp6_lease *lease, uint8_t *optval, size_t optlen) {
+ int r;
+
+ assert_return(lease, -EINVAL);
+ assert_return(optval, -EINVAL);
+
+ if (!optlen)
+ return 0;
+
+ r = dhcp6_option_parse_ip6addrs(optval, optlen, &lease->dns,
+ lease->dns_count,
+ &lease->dns_allocated);
+ if (r < 0)
+ return log_dhcp6_client_errno(client, r, "Invalid DNS server option: %m");
+
+ lease->dns_count = r;
+
+ return 0;
+}
+
+int sd_dhcp6_lease_get_dns(sd_dhcp6_lease *lease, const struct in6_addr **addrs) {
+ assert_return(lease, -EINVAL);
+ assert_return(addrs, -EINVAL);
+
+ if (lease->dns_count) {
+ *addrs = lease->dns;
+ return lease->dns_count;
+ }
+
+ return -ENOENT;
+}
+
+int dhcp6_lease_set_domains(sd_dhcp6_lease *lease, uint8_t *optval,
+ size_t optlen) {
+ int r;
+ char **domains;
+
+ assert_return(lease, -EINVAL);
+ assert_return(optval, -EINVAL);
+
+ if (!optlen)
+ return 0;
+
+ r = dhcp6_option_parse_domainname_list(optval, optlen, &domains);
+ if (r < 0)
+ return 0;
+
+ strv_free_and_replace(lease->domains, domains);
+ lease->domains_count = r;
+
+ return r;
+}
+
+int sd_dhcp6_lease_get_domains(sd_dhcp6_lease *lease, char ***domains) {
+ assert_return(lease, -EINVAL);
+ assert_return(domains, -EINVAL);
+
+ if (lease->domains_count) {
+ *domains = lease->domains;
+ return lease->domains_count;
+ }
+
+ return -ENOENT;
+}
+
+int dhcp6_lease_set_ntp(sd_dhcp6_lease *lease, uint8_t *optval, size_t optlen) {
+ int r;
+ uint16_t subopt;
+ size_t sublen;
+ uint8_t *subval;
+
+ assert_return(lease, -EINVAL);
+ assert_return(optval, -EINVAL);
+
+ lease->ntp = mfree(lease->ntp);
+ lease->ntp_count = 0;
+ lease->ntp_allocated = 0;
+
+ while ((r = dhcp6_option_parse(&optval, &optlen, &subopt, &sublen,
+ &subval)) >= 0) {
+ int s;
+ char **servers;
+
+ switch(subopt) {
+ case DHCP6_NTP_SUBOPTION_SRV_ADDR:
+ case DHCP6_NTP_SUBOPTION_MC_ADDR:
+ if (sublen != 16)
+ return 0;
+
+ s = dhcp6_option_parse_ip6addrs(subval, sublen,
+ &lease->ntp,
+ lease->ntp_count,
+ &lease->ntp_allocated);
+ if (s < 0)
+ return s;
+
+ lease->ntp_count = s;
+
+ break;
+
+ case DHCP6_NTP_SUBOPTION_SRV_FQDN:
+ r = dhcp6_option_parse_domainname_list(subval, sublen,
+ &servers);
+ if (r < 0)
+ return 0;
+
+ strv_free_and_replace(lease->ntp_fqdn, servers);
+ lease->ntp_fqdn_count = r;
+
+ break;
+ }
+ }
+
+ if (r != -ENOMSG)
+ return r;
+
+ return 0;
+}
+
+int dhcp6_lease_set_sntp(sd_dhcp6_lease *lease, uint8_t *optval, size_t optlen) {
+ int r;
+
+ assert_return(lease, -EINVAL);
+ assert_return(optval, -EINVAL);
+
+ if (!optlen)
+ return 0;
+
+ if (lease->ntp || lease->ntp_fqdn) {
+ log_dhcp6_client(client, "NTP information already provided");
+
+ return 0;
+ }
+
+ log_dhcp6_client(client, "Using deprecated SNTP information");
+
+ r = dhcp6_option_parse_ip6addrs(optval, optlen, &lease->ntp,
+ lease->ntp_count,
+ &lease->ntp_allocated);
+ if (r < 0)
+ return log_dhcp6_client_errno(client, r, "Invalid SNTP server option: %m");
+
+ lease->ntp_count = r;
+
+ return 0;
+}
+
+int sd_dhcp6_lease_get_ntp_addrs(sd_dhcp6_lease *lease,
+ const struct in6_addr **addrs) {
+ assert_return(lease, -EINVAL);
+ assert_return(addrs, -EINVAL);
+
+ if (lease->ntp_count) {
+ *addrs = lease->ntp;
+ return lease->ntp_count;
+ }
+
+ return -ENOENT;
+}
+
+int sd_dhcp6_lease_get_ntp_fqdn(sd_dhcp6_lease *lease, char ***ntp_fqdn) {
+ assert_return(lease, -EINVAL);
+ assert_return(ntp_fqdn, -EINVAL);
+
+ if (lease->ntp_fqdn_count) {
+ *ntp_fqdn = lease->ntp_fqdn;
+ return lease->ntp_fqdn_count;
+ }
+
+ return -ENOENT;
+}
+
+int dhcp6_lease_set_fqdn(sd_dhcp6_lease *lease, const uint8_t *optval,
+ size_t optlen) {
+ int r;
+ char *fqdn;
+
+ assert_return(lease, -EINVAL);
+ assert_return(optval, -EINVAL);
+
+ if (optlen < 2)
+ return -ENODATA;
+
+ /* Ignore the flags field, it doesn't carry any useful
+ information for clients. */
+ r = dhcp6_option_parse_domainname(optval + 1, optlen - 1, &fqdn);
+ if (r < 0)
+ return r;
+
+ return free_and_replace(lease->fqdn, fqdn);
+}
+
+int sd_dhcp6_lease_get_fqdn(sd_dhcp6_lease *lease, const char **fqdn) {
+ assert_return(lease, -EINVAL);
+ assert_return(fqdn, -EINVAL);
+
+ if (lease->fqdn) {
+ *fqdn = lease->fqdn;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static sd_dhcp6_lease *dhcp6_lease_free(sd_dhcp6_lease *lease) {
+ assert(lease);
+
+ free(lease->serverid);
+ dhcp6_lease_free_ia(&lease->ia);
+ dhcp6_lease_free_ia(&lease->pd);
+
+ free(lease->dns);
+ free(lease->fqdn);
+
+ lease->domains = strv_free(lease->domains);
+
+ free(lease->ntp);
+
+ lease->ntp_fqdn = strv_free(lease->ntp_fqdn);
+ return mfree(lease);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_dhcp6_lease, sd_dhcp6_lease, dhcp6_lease_free);
+
+int dhcp6_lease_new(sd_dhcp6_lease **ret) {
+ sd_dhcp6_lease *lease;
+
+ lease = new0(sd_dhcp6_lease, 1);
+ if (!lease)
+ return -ENOMEM;
+
+ lease->n_ref = 1;
+
+ LIST_HEAD_INIT(lease->ia.addresses);
+
+ *ret = lease;
+ return 0;
+}
diff --git a/src/libsystemd-network/sd-ipv4acd.c b/src/libsystemd-network/sd-ipv4acd.c
new file mode 100644
index 0000000..2e1e46c
--- /dev/null
+++ b/src/libsystemd-network/sd-ipv4acd.c
@@ -0,0 +1,501 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2014 Axis Communications AB. All rights reserved.
+***/
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <netinet/if_ether.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "sd-ipv4acd.h"
+
+#include "alloc-util.h"
+#include "arp-util.h"
+#include "ether-addr-util.h"
+#include "event-util.h"
+#include "fd-util.h"
+#include "in-addr-util.h"
+#include "list.h"
+#include "random-util.h"
+#include "siphash24.h"
+#include "string-util.h"
+#include "time-util.h"
+
+/* Constants from the RFC */
+#define PROBE_WAIT_USEC (1U * USEC_PER_SEC)
+#define PROBE_NUM 3U
+#define PROBE_MIN_USEC (1U * USEC_PER_SEC)
+#define PROBE_MAX_USEC (2U * USEC_PER_SEC)
+#define ANNOUNCE_WAIT_USEC (2U * USEC_PER_SEC)
+#define ANNOUNCE_NUM 2U
+#define ANNOUNCE_INTERVAL_USEC (2U * USEC_PER_SEC)
+#define MAX_CONFLICTS 10U
+#define RATE_LIMIT_INTERVAL_USEC (60U * USEC_PER_SEC)
+#define DEFEND_INTERVAL_USEC (10U * USEC_PER_SEC)
+
+typedef enum IPv4ACDState {
+ IPV4ACD_STATE_INIT,
+ IPV4ACD_STATE_STARTED,
+ IPV4ACD_STATE_WAITING_PROBE,
+ IPV4ACD_STATE_PROBING,
+ IPV4ACD_STATE_WAITING_ANNOUNCE,
+ IPV4ACD_STATE_ANNOUNCING,
+ IPV4ACD_STATE_RUNNING,
+ _IPV4ACD_STATE_MAX,
+ _IPV4ACD_STATE_INVALID = -1
+} IPv4ACDState;
+
+struct sd_ipv4acd {
+ unsigned n_ref;
+
+ IPv4ACDState state;
+ int ifindex;
+ int fd;
+
+ unsigned n_iteration;
+ unsigned n_conflict;
+
+ sd_event_source *receive_message_event_source;
+ sd_event_source *timer_event_source;
+
+ usec_t defend_window;
+ be32_t address;
+
+ /* External */
+ struct ether_addr mac_addr;
+
+ sd_event *event;
+ int event_priority;
+ sd_ipv4acd_callback_t callback;
+ void* userdata;
+};
+
+#define log_ipv4acd_errno(acd, error, fmt, ...) log_internal(LOG_DEBUG, error, PROJECT_FILE, __LINE__, __func__, "IPV4ACD: " fmt, ##__VA_ARGS__)
+#define log_ipv4acd(acd, fmt, ...) log_ipv4acd_errno(acd, 0, fmt, ##__VA_ARGS__)
+
+static void ipv4acd_set_state(sd_ipv4acd *acd, IPv4ACDState st, bool reset_counter) {
+ assert(acd);
+ assert(st < _IPV4ACD_STATE_MAX);
+
+ if (st == acd->state && !reset_counter)
+ acd->n_iteration++;
+ else {
+ acd->state = st;
+ acd->n_iteration = 0;
+ }
+}
+
+static void ipv4acd_reset(sd_ipv4acd *acd) {
+ assert(acd);
+
+ (void) event_source_disable(acd->timer_event_source);
+ acd->receive_message_event_source = sd_event_source_unref(acd->receive_message_event_source);
+
+ acd->fd = safe_close(acd->fd);
+
+ ipv4acd_set_state(acd, IPV4ACD_STATE_INIT, true);
+}
+
+static sd_ipv4acd *ipv4acd_free(sd_ipv4acd *acd) {
+ assert(acd);
+
+ acd->timer_event_source = sd_event_source_unref(acd->timer_event_source);
+
+ ipv4acd_reset(acd);
+ sd_ipv4acd_detach_event(acd);
+
+ return mfree(acd);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_ipv4acd, sd_ipv4acd, ipv4acd_free);
+
+int sd_ipv4acd_new(sd_ipv4acd **ret) {
+ _cleanup_(sd_ipv4acd_unrefp) sd_ipv4acd *acd = NULL;
+
+ assert_return(ret, -EINVAL);
+
+ acd = new(sd_ipv4acd, 1);
+ if (!acd)
+ return -ENOMEM;
+
+ *acd = (sd_ipv4acd) {
+ .n_ref = 1,
+ .state = IPV4ACD_STATE_INIT,
+ .ifindex = -1,
+ .fd = -1,
+ };
+
+ *ret = TAKE_PTR(acd);
+
+ return 0;
+}
+
+static void ipv4acd_client_notify(sd_ipv4acd *acd, int event) {
+ assert(acd);
+
+ if (!acd->callback)
+ return;
+
+ acd->callback(acd, event, acd->userdata);
+}
+
+int sd_ipv4acd_stop(sd_ipv4acd *acd) {
+ IPv4ACDState old_state;
+
+ if (!acd)
+ return 0;
+
+ old_state = acd->state;
+
+ ipv4acd_reset(acd);
+
+ if (old_state == IPV4ACD_STATE_INIT)
+ return 0;
+
+ log_ipv4acd(acd, "STOPPED");
+
+ ipv4acd_client_notify(acd, SD_IPV4ACD_EVENT_STOP);
+
+ return 0;
+}
+
+static int ipv4acd_on_timeout(sd_event_source *s, uint64_t usec, void *userdata);
+
+static int ipv4acd_set_next_wakeup(sd_ipv4acd *acd, usec_t usec, usec_t random_usec) {
+ usec_t next_timeout, time_now;
+
+ assert(acd);
+
+ next_timeout = usec;
+
+ if (random_usec > 0)
+ next_timeout += (usec_t) random_u64() % random_usec;
+
+ assert_se(sd_event_now(acd->event, clock_boottime_or_monotonic(), &time_now) >= 0);
+
+ return event_reset_time(acd->event, &acd->timer_event_source,
+ clock_boottime_or_monotonic(),
+ time_now + next_timeout, 0,
+ ipv4acd_on_timeout, acd,
+ acd->event_priority, "ipv4acd-timer", true);
+}
+
+static bool ipv4acd_arp_conflict(sd_ipv4acd *acd, struct ether_arp *arp) {
+ assert(acd);
+ assert(arp);
+
+ /* see the BPF */
+ if (memcmp(arp->arp_spa, &acd->address, sizeof(acd->address)) == 0)
+ return true;
+
+ /* the TPA matched instead of the SPA, this is not a conflict */
+ return false;
+}
+
+static int ipv4acd_on_timeout(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_ipv4acd *acd = userdata;
+ int r = 0;
+
+ assert(acd);
+
+ switch (acd->state) {
+
+ case IPV4ACD_STATE_STARTED:
+ ipv4acd_set_state(acd, IPV4ACD_STATE_WAITING_PROBE, true);
+
+ if (acd->n_conflict >= MAX_CONFLICTS) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ log_ipv4acd(acd, "Max conflicts reached, delaying by %s", format_timespan(ts, sizeof(ts), RATE_LIMIT_INTERVAL_USEC, 0));
+
+ r = ipv4acd_set_next_wakeup(acd, RATE_LIMIT_INTERVAL_USEC, PROBE_WAIT_USEC);
+ if (r < 0)
+ goto fail;
+ } else {
+ r = ipv4acd_set_next_wakeup(acd, 0, PROBE_WAIT_USEC);
+ if (r < 0)
+ goto fail;
+ }
+
+ break;
+
+ case IPV4ACD_STATE_WAITING_PROBE:
+ case IPV4ACD_STATE_PROBING:
+ /* Send a probe */
+ r = arp_send_probe(acd->fd, acd->ifindex, acd->address, &acd->mac_addr);
+ if (r < 0) {
+ log_ipv4acd_errno(acd, r, "Failed to send ARP probe: %m");
+ goto fail;
+ } else {
+ _cleanup_free_ char *address = NULL;
+ union in_addr_union addr = { .in.s_addr = acd->address };
+
+ (void) in_addr_to_string(AF_INET, &addr, &address);
+ log_ipv4acd(acd, "Probing %s", strna(address));
+ }
+
+ if (acd->n_iteration < PROBE_NUM - 2) {
+ ipv4acd_set_state(acd, IPV4ACD_STATE_PROBING, false);
+
+ r = ipv4acd_set_next_wakeup(acd, PROBE_MIN_USEC, (PROBE_MAX_USEC-PROBE_MIN_USEC));
+ if (r < 0)
+ goto fail;
+ } else {
+ ipv4acd_set_state(acd, IPV4ACD_STATE_WAITING_ANNOUNCE, true);
+
+ r = ipv4acd_set_next_wakeup(acd, ANNOUNCE_WAIT_USEC, 0);
+ if (r < 0)
+ goto fail;
+ }
+
+ break;
+
+ case IPV4ACD_STATE_ANNOUNCING:
+ if (acd->n_iteration >= ANNOUNCE_NUM - 1) {
+ ipv4acd_set_state(acd, IPV4ACD_STATE_RUNNING, false);
+ break;
+ }
+
+ _fallthrough_;
+ case IPV4ACD_STATE_WAITING_ANNOUNCE:
+ /* Send announcement packet */
+ r = arp_send_announcement(acd->fd, acd->ifindex, acd->address, &acd->mac_addr);
+ if (r < 0) {
+ log_ipv4acd_errno(acd, r, "Failed to send ARP announcement: %m");
+ goto fail;
+ } else
+ log_ipv4acd(acd, "ANNOUNCE");
+
+ ipv4acd_set_state(acd, IPV4ACD_STATE_ANNOUNCING, false);
+
+ r = ipv4acd_set_next_wakeup(acd, ANNOUNCE_INTERVAL_USEC, 0);
+ if (r < 0)
+ goto fail;
+
+ if (acd->n_iteration == 0) {
+ acd->n_conflict = 0;
+ ipv4acd_client_notify(acd, SD_IPV4ACD_EVENT_BIND);
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Invalid state.");
+ }
+
+ return 0;
+
+fail:
+ sd_ipv4acd_stop(acd);
+ return 0;
+}
+
+static void ipv4acd_on_conflict(sd_ipv4acd *acd) {
+ _cleanup_free_ char *address = NULL;
+ union in_addr_union addr = { .in.s_addr = acd->address };
+
+ assert(acd);
+
+ acd->n_conflict++;
+
+ (void) in_addr_to_string(AF_INET, &addr, &address);
+ log_ipv4acd(acd, "Conflict on %s (%u)", strna(address), acd->n_conflict);
+
+ ipv4acd_reset(acd);
+ ipv4acd_client_notify(acd, SD_IPV4ACD_EVENT_CONFLICT);
+}
+
+static int ipv4acd_on_packet(
+ sd_event_source *s,
+ int fd,
+ uint32_t revents,
+ void *userdata) {
+
+ sd_ipv4acd *acd = userdata;
+ struct ether_arp packet;
+ ssize_t n;
+ int r;
+
+ assert(s);
+ assert(acd);
+ assert(fd >= 0);
+
+ n = recv(fd, &packet, sizeof(struct ether_arp), 0);
+ if (n < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ log_ipv4acd_errno(acd, errno, "Failed to read ARP packet: %m");
+ goto fail;
+ }
+ if ((size_t) n != sizeof(struct ether_arp)) {
+ log_ipv4acd(acd, "Ignoring too short ARP packet.");
+ return 0;
+ }
+
+ switch (acd->state) {
+
+ case IPV4ACD_STATE_ANNOUNCING:
+ case IPV4ACD_STATE_RUNNING:
+
+ if (ipv4acd_arp_conflict(acd, &packet)) {
+ usec_t ts;
+
+ assert_se(sd_event_now(acd->event, clock_boottime_or_monotonic(), &ts) >= 0);
+
+ /* Defend address */
+ if (ts > acd->defend_window) {
+ acd->defend_window = ts + DEFEND_INTERVAL_USEC;
+ r = arp_send_announcement(acd->fd, acd->ifindex, acd->address, &acd->mac_addr);
+ if (r < 0) {
+ log_ipv4acd_errno(acd, r, "Failed to send ARP announcement: %m");
+ goto fail;
+ } else
+ log_ipv4acd(acd, "DEFEND");
+
+ } else
+ ipv4acd_on_conflict(acd);
+ }
+ break;
+
+ case IPV4ACD_STATE_WAITING_PROBE:
+ case IPV4ACD_STATE_PROBING:
+ case IPV4ACD_STATE_WAITING_ANNOUNCE:
+ /* BPF ensures this packet indicates a conflict */
+ ipv4acd_on_conflict(acd);
+ break;
+
+ default:
+ assert_not_reached("Invalid state.");
+ }
+
+ return 0;
+
+fail:
+ sd_ipv4acd_stop(acd);
+ return 0;
+}
+
+int sd_ipv4acd_set_ifindex(sd_ipv4acd *acd, int ifindex) {
+ assert_return(acd, -EINVAL);
+ assert_return(ifindex > 0, -EINVAL);
+ assert_return(acd->state == IPV4ACD_STATE_INIT, -EBUSY);
+
+ acd->ifindex = ifindex;
+
+ return 0;
+}
+
+int sd_ipv4acd_set_mac(sd_ipv4acd *acd, const struct ether_addr *addr) {
+ assert_return(acd, -EINVAL);
+ assert_return(addr, -EINVAL);
+ assert_return(acd->state == IPV4ACD_STATE_INIT, -EBUSY);
+
+ acd->mac_addr = *addr;
+
+ return 0;
+}
+
+int sd_ipv4acd_detach_event(sd_ipv4acd *acd) {
+ assert_return(acd, -EINVAL);
+
+ acd->event = sd_event_unref(acd->event);
+
+ return 0;
+}
+
+int sd_ipv4acd_attach_event(sd_ipv4acd *acd, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(acd, -EINVAL);
+ assert_return(!acd->event, -EBUSY);
+
+ if (event)
+ acd->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&acd->event);
+ if (r < 0)
+ return r;
+ }
+
+ acd->event_priority = priority;
+
+ return 0;
+}
+
+int sd_ipv4acd_set_callback(sd_ipv4acd *acd, sd_ipv4acd_callback_t cb, void *userdata) {
+ assert_return(acd, -EINVAL);
+
+ acd->callback = cb;
+ acd->userdata = userdata;
+
+ return 0;
+}
+
+int sd_ipv4acd_set_address(sd_ipv4acd *acd, const struct in_addr *address) {
+ assert_return(acd, -EINVAL);
+ assert_return(address, -EINVAL);
+ assert_return(acd->state == IPV4ACD_STATE_INIT, -EBUSY);
+
+ acd->address = address->s_addr;
+
+ return 0;
+}
+
+int sd_ipv4acd_get_address(sd_ipv4acd *acd, struct in_addr *address) {
+ assert_return(acd, -EINVAL);
+ assert_return(address, -EINVAL);
+
+ address->s_addr = acd->address;
+
+ return 0;
+}
+
+int sd_ipv4acd_is_running(sd_ipv4acd *acd) {
+ assert_return(acd, false);
+
+ return acd->state != IPV4ACD_STATE_INIT;
+}
+
+int sd_ipv4acd_start(sd_ipv4acd *acd, bool reset_conflicts) {
+ int r;
+
+ assert_return(acd, -EINVAL);
+ assert_return(acd->event, -EINVAL);
+ assert_return(acd->ifindex > 0, -EINVAL);
+ assert_return(acd->address != 0, -EINVAL);
+ assert_return(!ether_addr_is_null(&acd->mac_addr), -EINVAL);
+ assert_return(acd->state == IPV4ACD_STATE_INIT, -EBUSY);
+
+ r = arp_network_bind_raw_socket(acd->ifindex, acd->address, &acd->mac_addr);
+ if (r < 0)
+ return r;
+
+ CLOSE_AND_REPLACE(acd->fd, r);
+ acd->defend_window = 0;
+
+ if (reset_conflicts)
+ acd->n_conflict = 0;
+
+ r = sd_event_add_io(acd->event, &acd->receive_message_event_source, acd->fd, EPOLLIN, ipv4acd_on_packet, acd);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(acd->receive_message_event_source, acd->event_priority);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(acd->receive_message_event_source, "ipv4acd-receive-message");
+
+ r = ipv4acd_set_next_wakeup(acd, 0, 0);
+ if (r < 0)
+ goto fail;
+
+ ipv4acd_set_state(acd, IPV4ACD_STATE_STARTED, true);
+ return 0;
+
+fail:
+ ipv4acd_reset(acd);
+ return r;
+}
diff --git a/src/libsystemd-network/sd-ipv4ll.c b/src/libsystemd-network/sd-ipv4ll.c
new file mode 100644
index 0000000..09f2bda
--- /dev/null
+++ b/src/libsystemd-network/sd-ipv4ll.c
@@ -0,0 +1,324 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2014 Axis Communications AB. All rights reserved.
+***/
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "sd-id128.h"
+#include "sd-ipv4acd.h"
+#include "sd-ipv4ll.h"
+
+#include "alloc-util.h"
+#include "ether-addr-util.h"
+#include "in-addr-util.h"
+#include "list.h"
+#include "random-util.h"
+#include "siphash24.h"
+#include "sparse-endian.h"
+#include "string-util.h"
+#include "util.h"
+
+#define IPV4LL_NETWORK UINT32_C(0xA9FE0000)
+#define IPV4LL_NETMASK UINT32_C(0xFFFF0000)
+
+#define IPV4LL_DONT_DESTROY(ll) \
+ _cleanup_(sd_ipv4ll_unrefp) _unused_ sd_ipv4ll *_dont_destroy_##ll = sd_ipv4ll_ref(ll)
+
+struct sd_ipv4ll {
+ unsigned n_ref;
+
+ sd_ipv4acd *acd;
+
+ be32_t address; /* the address pushed to ACD */
+ struct ether_addr mac;
+
+ struct {
+ le64_t value;
+ le64_t generation;
+ } seed;
+ bool seed_set;
+
+ /* External */
+ be32_t claimed_address;
+
+ sd_ipv4ll_callback_t callback;
+ void* userdata;
+};
+
+#define log_ipv4ll_errno(ll, error, fmt, ...) log_internal(LOG_DEBUG, error, PROJECT_FILE, __LINE__, __func__, "IPV4LL: " fmt, ##__VA_ARGS__)
+#define log_ipv4ll(ll, fmt, ...) log_ipv4ll_errno(ll, 0, fmt, ##__VA_ARGS__)
+
+static void ipv4ll_on_acd(sd_ipv4acd *ll, int event, void *userdata);
+
+static sd_ipv4ll *ipv4ll_free(sd_ipv4ll *ll) {
+ assert(ll);
+
+ sd_ipv4acd_unref(ll->acd);
+ return mfree(ll);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_ipv4ll, sd_ipv4ll, ipv4ll_free);
+
+int sd_ipv4ll_new(sd_ipv4ll **ret) {
+ _cleanup_(sd_ipv4ll_unrefp) sd_ipv4ll *ll = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ ll = new0(sd_ipv4ll, 1);
+ if (!ll)
+ return -ENOMEM;
+
+ ll->n_ref = 1;
+
+ r = sd_ipv4acd_new(&ll->acd);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4acd_set_callback(ll->acd, ipv4ll_on_acd, ll);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(ll);
+
+ return 0;
+}
+
+int sd_ipv4ll_stop(sd_ipv4ll *ll) {
+ if (!ll)
+ return 0;
+
+ return sd_ipv4acd_stop(ll->acd);
+}
+
+int sd_ipv4ll_set_ifindex(sd_ipv4ll *ll, int ifindex) {
+ assert_return(ll, -EINVAL);
+ assert_return(ifindex > 0, -EINVAL);
+ assert_return(sd_ipv4ll_is_running(ll) == 0, -EBUSY);
+
+ return sd_ipv4acd_set_ifindex(ll->acd, ifindex);
+}
+
+int sd_ipv4ll_set_mac(sd_ipv4ll *ll, const struct ether_addr *addr) {
+ int r;
+
+ assert_return(ll, -EINVAL);
+ assert_return(addr, -EINVAL);
+ assert_return(sd_ipv4ll_is_running(ll) == 0, -EBUSY);
+
+ r = sd_ipv4acd_set_mac(ll->acd, addr);
+ if (r < 0)
+ return r;
+
+ ll->mac = *addr;
+ return 0;
+}
+
+int sd_ipv4ll_detach_event(sd_ipv4ll *ll) {
+ assert_return(ll, -EINVAL);
+
+ return sd_ipv4acd_detach_event(ll->acd);
+}
+
+int sd_ipv4ll_attach_event(sd_ipv4ll *ll, sd_event *event, int64_t priority) {
+ assert_return(ll, -EINVAL);
+
+ return sd_ipv4acd_attach_event(ll->acd, event, priority);
+}
+
+int sd_ipv4ll_set_callback(sd_ipv4ll *ll, sd_ipv4ll_callback_t cb, void *userdata) {
+ assert_return(ll, -EINVAL);
+
+ ll->callback = cb;
+ ll->userdata = userdata;
+
+ return 0;
+}
+
+int sd_ipv4ll_get_address(sd_ipv4ll *ll, struct in_addr *address) {
+ assert_return(ll, -EINVAL);
+ assert_return(address, -EINVAL);
+
+ if (ll->claimed_address == 0)
+ return -ENOENT;
+
+ address->s_addr = ll->claimed_address;
+
+ return 0;
+}
+
+int sd_ipv4ll_set_address_seed(sd_ipv4ll *ll, uint64_t seed) {
+ assert_return(ll, -EINVAL);
+ assert_return(sd_ipv4ll_is_running(ll) == 0, -EBUSY);
+
+ ll->seed.value = htole64(seed);
+ ll->seed_set = true;
+
+ return 0;
+}
+
+int sd_ipv4ll_is_running(sd_ipv4ll *ll) {
+ assert_return(ll, false);
+
+ return sd_ipv4acd_is_running(ll->acd);
+}
+
+static bool ipv4ll_address_is_valid(const struct in_addr *address) {
+ assert(address);
+
+ if (!in_addr_is_link_local(AF_INET, (const union in_addr_union *) address))
+ return false;
+
+ return !IN_SET(be32toh(address->s_addr) & 0x0000FF00U, 0x0000U, 0xFF00U);
+}
+
+int sd_ipv4ll_set_address(sd_ipv4ll *ll, const struct in_addr *address) {
+ int r;
+
+ assert_return(ll, -EINVAL);
+ assert_return(address, -EINVAL);
+ assert_return(ipv4ll_address_is_valid(address), -EINVAL);
+
+ r = sd_ipv4acd_set_address(ll->acd, address);
+ if (r < 0)
+ return r;
+
+ ll->address = address->s_addr;
+
+ return 0;
+}
+
+#define PICK_HASH_KEY SD_ID128_MAKE(15,ac,82,a6,d6,3f,49,78,98,77,5d,0c,69,02,94,0b)
+
+static int ipv4ll_pick_address(sd_ipv4ll *ll) {
+ _cleanup_free_ char *address = NULL;
+ be32_t addr;
+
+ assert(ll);
+
+ do {
+ uint64_t h;
+
+ h = siphash24(&ll->seed, sizeof(ll->seed), PICK_HASH_KEY.bytes);
+
+ /* Increase the generation counter by one */
+ ll->seed.generation = htole64(le64toh(ll->seed.generation) + 1);
+
+ addr = htobe32((h & UINT32_C(0x0000FFFF)) | IPV4LL_NETWORK);
+ } while (addr == ll->address ||
+ IN_SET(be32toh(addr) & 0x0000FF00U, 0x0000U, 0xFF00U));
+
+ (void) in_addr_to_string(AF_INET, &(union in_addr_union) { .in.s_addr = addr }, &address);
+ log_ipv4ll(ll, "Picked new IP address %s.", strna(address));
+
+ return sd_ipv4ll_set_address(ll, &(struct in_addr) { addr });
+}
+
+#define MAC_HASH_KEY SD_ID128_MAKE(df,04,22,98,3f,ad,14,52,f9,87,2e,d1,9c,70,e2,f2)
+
+static int ipv4ll_start_internal(sd_ipv4ll *ll, bool reset_generation) {
+ int r;
+ bool picked_address = false;
+
+ assert_return(ll, -EINVAL);
+ assert_return(!ether_addr_is_null(&ll->mac), -EINVAL);
+
+ /* If no random seed is set, generate some from the MAC address */
+ if (!ll->seed_set)
+ ll->seed.value = htole64(siphash24(ll->mac.ether_addr_octet, ETH_ALEN, MAC_HASH_KEY.bytes));
+
+ if (reset_generation)
+ ll->seed.generation = 0;
+
+ if (ll->address == 0) {
+ r = ipv4ll_pick_address(ll);
+ if (r < 0)
+ return r;
+
+ picked_address = true;
+ }
+
+ r = sd_ipv4acd_start(ll->acd, reset_generation);
+ if (r < 0) {
+
+ /* We couldn't start? If so, let's forget the picked address again, the user might make a change and
+ * retry, and we want the new data to take effect when picking an address. */
+ if (picked_address)
+ ll->address = 0;
+
+ return r;
+ }
+
+ return 1;
+}
+
+int sd_ipv4ll_start(sd_ipv4ll *ll) {
+ assert_return(ll, -EINVAL);
+
+ if (sd_ipv4ll_is_running(ll))
+ return 0;
+
+ return ipv4ll_start_internal(ll, true);
+}
+
+int sd_ipv4ll_restart(sd_ipv4ll *ll) {
+ ll->address = 0;
+
+ return ipv4ll_start_internal(ll, false);
+}
+
+static void ipv4ll_client_notify(sd_ipv4ll *ll, int event) {
+ assert(ll);
+
+ if (ll->callback)
+ ll->callback(ll, event, ll->userdata);
+}
+
+void ipv4ll_on_acd(sd_ipv4acd *acd, int event, void *userdata) {
+ sd_ipv4ll *ll = userdata;
+ IPV4LL_DONT_DESTROY(ll);
+ int r;
+
+ assert(acd);
+ assert(ll);
+
+ switch (event) {
+
+ case SD_IPV4ACD_EVENT_STOP:
+ ipv4ll_client_notify(ll, SD_IPV4LL_EVENT_STOP);
+ ll->claimed_address = 0;
+ break;
+
+ case SD_IPV4ACD_EVENT_BIND:
+ ll->claimed_address = ll->address;
+ ipv4ll_client_notify(ll, SD_IPV4LL_EVENT_BIND);
+ break;
+
+ case SD_IPV4ACD_EVENT_CONFLICT:
+ /* if an address was already bound we must call up to the
+ user to handle this, otherwise we just try again */
+ if (ll->claimed_address != 0) {
+ ipv4ll_client_notify(ll, SD_IPV4LL_EVENT_CONFLICT);
+
+ ll->claimed_address = 0;
+ } else {
+ r = sd_ipv4ll_restart(ll);
+ if (r < 0)
+ goto error;
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Invalid IPv4ACD event.");
+ }
+
+ return;
+
+error:
+ ipv4ll_client_notify(ll, SD_IPV4LL_EVENT_STOP);
+}
diff --git a/src/libsystemd-network/sd-lldp.c b/src/libsystemd-network/sd-lldp.c
new file mode 100644
index 0000000..8b66652
--- /dev/null
+++ b/src/libsystemd-network/sd-lldp.c
@@ -0,0 +1,498 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+
+#include "sd-lldp.h"
+
+#include "alloc-util.h"
+#include "ether-addr-util.h"
+#include "event-util.h"
+#include "fd-util.h"
+#include "lldp-internal.h"
+#include "lldp-neighbor.h"
+#include "lldp-network.h"
+#include "memory-util.h"
+#include "socket-util.h"
+#include "sort-util.h"
+#include "string-table.h"
+
+#define LLDP_DEFAULT_NEIGHBORS_MAX 128U
+
+static const char * const lldp_event_table[_SD_LLDP_EVENT_MAX] = {
+ [SD_LLDP_EVENT_ADDED] = "added",
+ [SD_LLDP_EVENT_REMOVED] = "removed",
+ [SD_LLDP_EVENT_UPDATED] = "updated",
+ [SD_LLDP_EVENT_REFRESHED] = "refreshed",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(lldp_event, sd_lldp_event);
+
+static void lldp_flush_neighbors(sd_lldp *lldp) {
+ assert(lldp);
+
+ hashmap_clear(lldp->neighbor_by_id);
+}
+
+static void lldp_callback(sd_lldp *lldp, sd_lldp_event event, sd_lldp_neighbor *n) {
+ assert(lldp);
+ assert(event >= 0 && event < _SD_LLDP_EVENT_MAX);
+
+ if (!lldp->callback) {
+ log_lldp("Received '%s' event.", lldp_event_to_string(event));
+ return;
+ }
+
+ log_lldp("Invoking callback for '%s' event.", lldp_event_to_string(event));
+ lldp->callback(lldp, event, n, lldp->userdata);
+}
+
+static int lldp_make_space(sd_lldp *lldp, size_t extra) {
+ usec_t t = USEC_INFINITY;
+ bool changed = false;
+
+ assert(lldp);
+
+ /* Remove all entries that are past their TTL, and more until at least the specified number of extra entries
+ * are free. */
+
+ for (;;) {
+ _cleanup_(sd_lldp_neighbor_unrefp) sd_lldp_neighbor *n = NULL;
+
+ n = prioq_peek(lldp->neighbor_by_expiry);
+ if (!n)
+ break;
+
+ sd_lldp_neighbor_ref(n);
+
+ if (hashmap_size(lldp->neighbor_by_id) > LESS_BY(lldp->neighbors_max, extra))
+ goto remove_one;
+
+ if (t == USEC_INFINITY)
+ t = now(clock_boottime_or_monotonic());
+
+ if (n->until > t)
+ break;
+
+ remove_one:
+ lldp_neighbor_unlink(n);
+ lldp_callback(lldp, SD_LLDP_EVENT_REMOVED, n);
+ changed = true;
+ }
+
+ return changed;
+}
+
+static bool lldp_keep_neighbor(sd_lldp *lldp, sd_lldp_neighbor *n) {
+ assert(lldp);
+ assert(n);
+
+ /* Don't keep data with a zero TTL */
+ if (n->ttl <= 0)
+ return false;
+
+ /* Filter out data from the filter address */
+ if (!ether_addr_is_null(&lldp->filter_address) &&
+ ether_addr_equal(&lldp->filter_address, &n->source_address))
+ return false;
+
+ /* Only add if the neighbor has a capability we are interested in. Note that we also store all neighbors with
+ * no caps field set. */
+ if (n->has_capabilities &&
+ (n->enabled_capabilities & lldp->capability_mask) == 0)
+ return false;
+
+ /* Keep everything else */
+ return true;
+}
+
+static int lldp_start_timer(sd_lldp *lldp, sd_lldp_neighbor *neighbor);
+
+static int lldp_add_neighbor(sd_lldp *lldp, sd_lldp_neighbor *n) {
+ _cleanup_(sd_lldp_neighbor_unrefp) sd_lldp_neighbor *old = NULL;
+ bool keep;
+ int r;
+
+ assert(lldp);
+ assert(n);
+ assert(!n->lldp);
+
+ keep = lldp_keep_neighbor(lldp, n);
+
+ /* First retrieve the old entry for this MSAP */
+ old = hashmap_get(lldp->neighbor_by_id, &n->id);
+ if (old) {
+ sd_lldp_neighbor_ref(old);
+
+ if (!keep) {
+ lldp_neighbor_unlink(old);
+ lldp_callback(lldp, SD_LLDP_EVENT_REMOVED, old);
+ return 0;
+ }
+
+ if (lldp_neighbor_equal(n, old)) {
+ /* Is this equal, then restart the TTL counter, but don't do anything else. */
+ old->timestamp = n->timestamp;
+ lldp_start_timer(lldp, old);
+ lldp_callback(lldp, SD_LLDP_EVENT_REFRESHED, old);
+ return 0;
+ }
+
+ /* Data changed, remove the old entry, and add a new one */
+ lldp_neighbor_unlink(old);
+
+ } else if (!keep)
+ return 0;
+
+ /* Then, make room for at least one new neighbor */
+ lldp_make_space(lldp, 1);
+
+ r = hashmap_put(lldp->neighbor_by_id, &n->id, n);
+ if (r < 0)
+ goto finish;
+
+ r = prioq_put(lldp->neighbor_by_expiry, n, &n->prioq_idx);
+ if (r < 0) {
+ assert_se(hashmap_remove(lldp->neighbor_by_id, &n->id) == n);
+ goto finish;
+ }
+
+ n->lldp = lldp;
+
+ lldp_start_timer(lldp, n);
+ lldp_callback(lldp, old ? SD_LLDP_EVENT_UPDATED : SD_LLDP_EVENT_ADDED, n);
+
+ return 1;
+
+finish:
+ if (old)
+ lldp_callback(lldp, SD_LLDP_EVENT_REMOVED, old);
+
+ return r;
+}
+
+static int lldp_handle_datagram(sd_lldp *lldp, sd_lldp_neighbor *n) {
+ int r;
+
+ assert(lldp);
+ assert(n);
+
+ r = lldp_neighbor_parse(n);
+ if (r == -EBADMSG) /* Ignore bad messages */
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = lldp_add_neighbor(lldp, n);
+ if (r < 0) {
+ log_lldp_errno(r, "Failed to add datagram. Ignoring.");
+ return 0;
+ }
+
+ log_lldp("Successfully processed LLDP datagram.");
+ return 0;
+}
+
+static int lldp_receive_datagram(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(sd_lldp_neighbor_unrefp) sd_lldp_neighbor *n = NULL;
+ ssize_t space, length;
+ sd_lldp *lldp = userdata;
+ struct timespec ts;
+
+ assert(fd >= 0);
+ assert(lldp);
+
+ space = next_datagram_size_fd(fd);
+ if (space < 0)
+ return log_lldp_errno(space, "Failed to determine datagram size to read: %m");
+
+ n = lldp_neighbor_new(space);
+ if (!n)
+ return -ENOMEM;
+
+ length = recv(fd, LLDP_NEIGHBOR_RAW(n), n->raw_size, MSG_DONTWAIT);
+ if (length < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return log_lldp_errno(errno, "Failed to read LLDP datagram: %m");
+ }
+
+ if ((size_t) length != n->raw_size) {
+ log_lldp("Packet size mismatch.");
+ return -EINVAL;
+ }
+
+ /* Try to get the timestamp of this packet if it is known */
+ if (ioctl(fd, SIOCGSTAMPNS, &ts) >= 0)
+ triple_timestamp_from_realtime(&n->timestamp, timespec_load(&ts));
+ else
+ triple_timestamp_get(&n->timestamp);
+
+ return lldp_handle_datagram(lldp, n);
+}
+
+static void lldp_reset(sd_lldp *lldp) {
+ assert(lldp);
+
+ (void) event_source_disable(lldp->timer_event_source);
+ lldp->io_event_source = sd_event_source_unref(lldp->io_event_source);
+ lldp->fd = safe_close(lldp->fd);
+}
+
+_public_ int sd_lldp_start(sd_lldp *lldp) {
+ int r;
+
+ assert_return(lldp, -EINVAL);
+ assert_return(lldp->event, -EINVAL);
+ assert_return(lldp->ifindex > 0, -EINVAL);
+
+ if (lldp->fd >= 0)
+ return 0;
+
+ assert(!lldp->io_event_source);
+
+ lldp->fd = lldp_network_bind_raw_socket(lldp->ifindex);
+ if (lldp->fd < 0)
+ return lldp->fd;
+
+ r = sd_event_add_io(lldp->event, &lldp->io_event_source, lldp->fd, EPOLLIN, lldp_receive_datagram, lldp);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(lldp->io_event_source, lldp->event_priority);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(lldp->io_event_source, "lldp-io");
+
+ log_lldp("Started LLDP client");
+ return 1;
+
+fail:
+ lldp_reset(lldp);
+ return r;
+}
+
+_public_ int sd_lldp_stop(sd_lldp *lldp) {
+ if (!lldp)
+ return 0;
+
+ if (lldp->fd < 0)
+ return 0;
+
+ log_lldp("Stopping LLDP client");
+
+ lldp_reset(lldp);
+ lldp_flush_neighbors(lldp);
+
+ return 1;
+}
+
+_public_ int sd_lldp_attach_event(sd_lldp *lldp, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(lldp, -EINVAL);
+ assert_return(lldp->fd < 0, -EBUSY);
+ assert_return(!lldp->event, -EBUSY);
+
+ if (event)
+ lldp->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&lldp->event);
+ if (r < 0)
+ return r;
+ }
+
+ lldp->event_priority = priority;
+
+ return 0;
+}
+
+_public_ int sd_lldp_detach_event(sd_lldp *lldp) {
+
+ assert_return(lldp, -EINVAL);
+ assert_return(lldp->fd < 0, -EBUSY);
+
+ lldp->event = sd_event_unref(lldp->event);
+ return 0;
+}
+
+_public_ sd_event* sd_lldp_get_event(sd_lldp *lldp) {
+ assert_return(lldp, NULL);
+
+ return lldp->event;
+}
+
+_public_ int sd_lldp_set_callback(sd_lldp *lldp, sd_lldp_callback_t cb, void *userdata) {
+ assert_return(lldp, -EINVAL);
+
+ lldp->callback = cb;
+ lldp->userdata = userdata;
+
+ return 0;
+}
+
+_public_ int sd_lldp_set_ifindex(sd_lldp *lldp, int ifindex) {
+ assert_return(lldp, -EINVAL);
+ assert_return(ifindex > 0, -EINVAL);
+ assert_return(lldp->fd < 0, -EBUSY);
+
+ lldp->ifindex = ifindex;
+ return 0;
+}
+
+static sd_lldp* lldp_free(sd_lldp *lldp) {
+ assert(lldp);
+
+ lldp->timer_event_source = sd_event_source_unref(lldp->timer_event_source);
+
+ lldp_reset(lldp);
+ sd_lldp_detach_event(lldp);
+ lldp_flush_neighbors(lldp);
+
+ hashmap_free(lldp->neighbor_by_id);
+ prioq_free(lldp->neighbor_by_expiry);
+ return mfree(lldp);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_lldp, sd_lldp, lldp_free);
+
+_public_ int sd_lldp_new(sd_lldp **ret) {
+ _cleanup_(sd_lldp_unrefp) sd_lldp *lldp = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ lldp = new(sd_lldp, 1);
+ if (!lldp)
+ return -ENOMEM;
+
+ *lldp = (sd_lldp) {
+ .n_ref = 1,
+ .fd = -1,
+ .neighbors_max = LLDP_DEFAULT_NEIGHBORS_MAX,
+ .capability_mask = (uint16_t) -1,
+ };
+
+ lldp->neighbor_by_id = hashmap_new(&lldp_neighbor_hash_ops);
+ if (!lldp->neighbor_by_id)
+ return -ENOMEM;
+
+ r = prioq_ensure_allocated(&lldp->neighbor_by_expiry, lldp_neighbor_prioq_compare_func);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(lldp);
+
+ return 0;
+}
+
+static int neighbor_compare_func(sd_lldp_neighbor * const *a, sd_lldp_neighbor * const *b) {
+ return lldp_neighbor_id_compare_func(&(*a)->id, &(*b)->id);
+}
+
+static int on_timer_event(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_lldp *lldp = userdata;
+ int r;
+
+ r = lldp_make_space(lldp, 0);
+ if (r < 0)
+ return log_lldp_errno(r, "Failed to make space: %m");
+
+ r = lldp_start_timer(lldp, NULL);
+ if (r < 0)
+ return log_lldp_errno(r, "Failed to restart timer: %m");
+
+ return 0;
+}
+
+static int lldp_start_timer(sd_lldp *lldp, sd_lldp_neighbor *neighbor) {
+ sd_lldp_neighbor *n;
+
+ assert(lldp);
+
+ if (neighbor)
+ lldp_neighbor_start_ttl(neighbor);
+
+ n = prioq_peek(lldp->neighbor_by_expiry);
+ if (!n)
+ return event_source_disable(lldp->timer_event_source);
+
+ if (!lldp->event)
+ return 0;
+
+ return event_reset_time(lldp->event, &lldp->timer_event_source,
+ clock_boottime_or_monotonic(),
+ n->until, 0,
+ on_timer_event, lldp,
+ lldp->event_priority, "lldp-timer", true);
+}
+
+_public_ int sd_lldp_get_neighbors(sd_lldp *lldp, sd_lldp_neighbor ***ret) {
+ sd_lldp_neighbor **l = NULL, *n;
+ int k = 0, r;
+
+ assert_return(lldp, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (hashmap_isempty(lldp->neighbor_by_id)) { /* Special shortcut */
+ *ret = NULL;
+ return 0;
+ }
+
+ l = new0(sd_lldp_neighbor*, hashmap_size(lldp->neighbor_by_id));
+ if (!l)
+ return -ENOMEM;
+
+ r = lldp_start_timer(lldp, NULL);
+ if (r < 0) {
+ free(l);
+ return r;
+ }
+
+ HASHMAP_FOREACH(n, lldp->neighbor_by_id)
+ l[k++] = sd_lldp_neighbor_ref(n);
+
+ assert((size_t) k == hashmap_size(lldp->neighbor_by_id));
+
+ /* Return things in a stable order */
+ typesafe_qsort(l, k, neighbor_compare_func);
+ *ret = l;
+
+ return k;
+}
+
+_public_ int sd_lldp_set_neighbors_max(sd_lldp *lldp, uint64_t m) {
+ assert_return(lldp, -EINVAL);
+ assert_return(m > 0, -EINVAL);
+
+ lldp->neighbors_max = m;
+ lldp_make_space(lldp, 0);
+
+ return 0;
+}
+
+_public_ int sd_lldp_match_capabilities(sd_lldp *lldp, uint16_t mask) {
+ assert_return(lldp, -EINVAL);
+ assert_return(mask != 0, -EINVAL);
+
+ lldp->capability_mask = mask;
+
+ return 0;
+}
+
+_public_ int sd_lldp_set_filter_address(sd_lldp *lldp, const struct ether_addr *addr) {
+ assert_return(lldp, -EINVAL);
+
+ /* In order to deal nicely with bridges that send back our own packets, allow one address to be filtered, so
+ * that our own can be filtered out here. */
+
+ if (addr)
+ lldp->filter_address = *addr;
+ else
+ zero(lldp->filter_address);
+
+ return 0;
+}
diff --git a/src/libsystemd-network/sd-ndisc.c b/src/libsystemd-network/sd-ndisc.c
new file mode 100644
index 0000000..db7ada6
--- /dev/null
+++ b/src/libsystemd-network/sd-ndisc.c
@@ -0,0 +1,389 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <netinet/icmp6.h>
+#include <netinet/in.h>
+
+#include "sd-ndisc.h"
+
+#include "alloc-util.h"
+#include "event-util.h"
+#include "fd-util.h"
+#include "icmp6-util.h"
+#include "in-addr-util.h"
+#include "memory-util.h"
+#include "ndisc-internal.h"
+#include "ndisc-router.h"
+#include "random-util.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "string-util.h"
+
+#define NDISC_TIMEOUT_NO_RA_USEC (NDISC_ROUTER_SOLICITATION_INTERVAL * NDISC_MAX_ROUTER_SOLICITATIONS)
+
+static const char * const ndisc_event_table[_SD_NDISC_EVENT_MAX] = {
+ [SD_NDISC_EVENT_TIMEOUT] = "timeout",
+ [SD_NDISC_EVENT_ROUTER] = "router",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(ndisc_event, sd_ndisc_event);
+
+static void ndisc_callback(sd_ndisc *ndisc, sd_ndisc_event event, sd_ndisc_router *rt) {
+ assert(ndisc);
+ assert(event >= 0 && event < _SD_NDISC_EVENT_MAX);
+
+ if (!ndisc->callback) {
+ log_ndisc("Received '%s' event.", ndisc_event_to_string(event));
+ return;
+ }
+
+ log_ndisc("Invoking callback for '%s' event.", ndisc_event_to_string(event));
+ ndisc->callback(ndisc, event, rt, ndisc->userdata);
+}
+
+_public_ int sd_ndisc_set_callback(
+ sd_ndisc *nd,
+ sd_ndisc_callback_t callback,
+ void *userdata) {
+
+ assert_return(nd, -EINVAL);
+
+ nd->callback = callback;
+ nd->userdata = userdata;
+
+ return 0;
+}
+
+_public_ int sd_ndisc_set_ifindex(sd_ndisc *nd, int ifindex) {
+ assert_return(nd, -EINVAL);
+ assert_return(ifindex > 0, -EINVAL);
+ assert_return(nd->fd < 0, -EBUSY);
+
+ nd->ifindex = ifindex;
+ return 0;
+}
+
+_public_ int sd_ndisc_set_mac(sd_ndisc *nd, const struct ether_addr *mac_addr) {
+ assert_return(nd, -EINVAL);
+
+ if (mac_addr)
+ nd->mac_addr = *mac_addr;
+ else
+ zero(nd->mac_addr);
+
+ return 0;
+}
+
+_public_ int sd_ndisc_attach_event(sd_ndisc *nd, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(nd, -EINVAL);
+ assert_return(nd->fd < 0, -EBUSY);
+ assert_return(!nd->event, -EBUSY);
+
+ if (event)
+ nd->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&nd->event);
+ if (r < 0)
+ return 0;
+ }
+
+ nd->event_priority = priority;
+
+ return 0;
+}
+
+_public_ int sd_ndisc_detach_event(sd_ndisc *nd) {
+
+ assert_return(nd, -EINVAL);
+ assert_return(nd->fd < 0, -EBUSY);
+
+ nd->event = sd_event_unref(nd->event);
+ return 0;
+}
+
+_public_ sd_event *sd_ndisc_get_event(sd_ndisc *nd) {
+ assert_return(nd, NULL);
+
+ return nd->event;
+}
+
+static void ndisc_reset(sd_ndisc *nd) {
+ assert(nd);
+
+ (void) event_source_disable(nd->timeout_event_source);
+ (void) event_source_disable(nd->timeout_no_ra);
+ nd->retransmit_time = 0;
+ nd->recv_event_source = sd_event_source_unref(nd->recv_event_source);
+ nd->fd = safe_close(nd->fd);
+}
+
+static sd_ndisc *ndisc_free(sd_ndisc *nd) {
+ assert(nd);
+
+ nd->timeout_event_source = sd_event_source_unref(nd->timeout_event_source);
+ nd->timeout_no_ra = sd_event_source_unref(nd->timeout_no_ra);
+
+ ndisc_reset(nd);
+ sd_ndisc_detach_event(nd);
+ return mfree(nd);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_ndisc, sd_ndisc, ndisc_free);
+
+_public_ int sd_ndisc_new(sd_ndisc **ret) {
+ _cleanup_(sd_ndisc_unrefp) sd_ndisc *nd = NULL;
+
+ assert_return(ret, -EINVAL);
+
+ nd = new(sd_ndisc, 1);
+ if (!nd)
+ return -ENOMEM;
+
+ *nd = (sd_ndisc) {
+ .n_ref = 1,
+ .fd = -1,
+ };
+
+ *ret = TAKE_PTR(nd);
+
+ return 0;
+}
+
+_public_ int sd_ndisc_get_mtu(sd_ndisc *nd, uint32_t *mtu) {
+ assert_return(nd, -EINVAL);
+ assert_return(mtu, -EINVAL);
+
+ if (nd->mtu == 0)
+ return -ENODATA;
+
+ *mtu = nd->mtu;
+ return 0;
+}
+
+_public_ int sd_ndisc_get_hop_limit(sd_ndisc *nd, uint8_t *ret) {
+ assert_return(nd, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (nd->hop_limit == 0)
+ return -ENODATA;
+
+ *ret = nd->hop_limit;
+ return 0;
+}
+
+static int ndisc_handle_datagram(sd_ndisc *nd, sd_ndisc_router *rt) {
+ int r;
+
+ assert(nd);
+ assert(rt);
+
+ r = ndisc_router_parse(rt);
+ if (r == -EBADMSG) /* Bad packet */
+ return 0;
+ if (r < 0)
+ return 0;
+
+ /* Update global variables we keep */
+ if (rt->mtu > 0)
+ nd->mtu = rt->mtu;
+ if (rt->hop_limit > 0)
+ nd->hop_limit = rt->hop_limit;
+
+ log_ndisc("Received Router Advertisement: flags %s preference %s lifetime %" PRIu16 " sec",
+ rt->flags & ND_RA_FLAG_MANAGED ? "MANAGED" : rt->flags & ND_RA_FLAG_OTHER ? "OTHER" : "none",
+ rt->preference == SD_NDISC_PREFERENCE_HIGH ? "high" : rt->preference == SD_NDISC_PREFERENCE_LOW ? "low" : "medium",
+ rt->lifetime);
+
+ ndisc_callback(nd, SD_NDISC_EVENT_ROUTER, rt);
+ return 0;
+}
+
+static int ndisc_recv(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(sd_ndisc_router_unrefp) sd_ndisc_router *rt = NULL;
+ sd_ndisc *nd = userdata;
+ ssize_t buflen;
+ int r;
+ _cleanup_free_ char *addr = NULL;
+
+ assert(s);
+ assert(nd);
+ assert(nd->event);
+
+ buflen = next_datagram_size_fd(fd);
+ if (buflen < 0)
+ return log_ndisc_errno(buflen, "Failed to determine datagram size to read: %m");
+
+ rt = ndisc_router_new(buflen);
+ if (!rt)
+ return -ENOMEM;
+
+ r = icmp6_receive(fd, NDISC_ROUTER_RAW(rt), rt->raw_size, &rt->address,
+ &rt->timestamp);
+ if (r < 0) {
+ switch (r) {
+ case -EADDRNOTAVAIL:
+ (void) in_addr_to_string(AF_INET6, (union in_addr_union*) &rt->address, &addr);
+ log_ndisc("Received RA from non-link-local address %s. Ignoring", addr);
+ break;
+
+ case -EMULTIHOP:
+ log_ndisc("Received RA with invalid hop limit. Ignoring.");
+ break;
+
+ case -EPFNOSUPPORT:
+ log_ndisc("Received invalid source address from ICMPv6 socket. Ignoring.");
+ break;
+
+ case -EAGAIN: /* ignore spurious wakeups */
+ break;
+
+ default:
+ log_ndisc_errno(r, "Unexpected error while reading from ICMPv6, ignoring: %m");
+ break;
+ }
+
+ return 0;
+ }
+
+ (void) event_source_disable(nd->timeout_event_source);
+
+ return ndisc_handle_datagram(nd, rt);
+}
+
+static usec_t ndisc_timeout_compute_random(usec_t val) {
+ /* compute a time that is random within ±10% of the given value */
+ return val - val / 10 +
+ (random_u64() % (2 * USEC_PER_SEC)) * val / 10 / USEC_PER_SEC;
+}
+
+static int ndisc_timeout(sd_event_source *s, uint64_t usec, void *userdata) {
+ char time_string[FORMAT_TIMESPAN_MAX];
+ sd_ndisc *nd = userdata;
+ usec_t time_now;
+ int r;
+
+ assert(s);
+ assert(nd);
+ assert(nd->event);
+
+ assert_se(sd_event_now(nd->event, clock_boottime_or_monotonic(), &time_now) >= 0);
+
+ if (!nd->retransmit_time)
+ nd->retransmit_time = ndisc_timeout_compute_random(NDISC_ROUTER_SOLICITATION_INTERVAL);
+ else {
+ if (nd->retransmit_time > NDISC_MAX_ROUTER_SOLICITATION_INTERVAL / 2)
+ nd->retransmit_time = ndisc_timeout_compute_random(NDISC_MAX_ROUTER_SOLICITATION_INTERVAL);
+ else
+ nd->retransmit_time += ndisc_timeout_compute_random(nd->retransmit_time);
+ }
+
+ r = event_reset_time(nd->event, &nd->timeout_event_source,
+ clock_boottime_or_monotonic(),
+ time_now + nd->retransmit_time, 10 * USEC_PER_MSEC,
+ ndisc_timeout, nd,
+ nd->event_priority, "ndisc-timeout-no-ra", true);
+ if (r < 0)
+ goto fail;
+
+ r = icmp6_send_router_solicitation(nd->fd, &nd->mac_addr);
+ if (r < 0) {
+ log_ndisc_errno(r, "Error sending Router Solicitation: %m");
+ goto fail;
+ }
+
+ log_ndisc("Sent Router Solicitation, next solicitation in %s",
+ format_timespan(time_string, FORMAT_TIMESPAN_MAX,
+ nd->retransmit_time, USEC_PER_SEC));
+
+ return 0;
+
+fail:
+ (void) sd_ndisc_stop(nd);
+ return 0;
+}
+
+static int ndisc_timeout_no_ra(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_ndisc *nd = userdata;
+
+ assert(s);
+ assert(nd);
+
+ log_ndisc("No RA received before link confirmation timeout");
+
+ (void) event_source_disable(nd->timeout_no_ra);
+ ndisc_callback(nd, SD_NDISC_EVENT_TIMEOUT, NULL);
+
+ return 0;
+}
+
+_public_ int sd_ndisc_stop(sd_ndisc *nd) {
+ if (!nd)
+ return 0;
+
+ if (nd->fd < 0)
+ return 0;
+
+ log_ndisc("Stopping IPv6 Router Solicitation client");
+
+ ndisc_reset(nd);
+ return 1;
+}
+
+_public_ int sd_ndisc_start(sd_ndisc *nd) {
+ int r;
+ usec_t time_now;
+
+ assert_return(nd, -EINVAL);
+ assert_return(nd->event, -EINVAL);
+ assert_return(nd->ifindex > 0, -EINVAL);
+
+ if (nd->fd >= 0)
+ return 0;
+
+ assert(!nd->recv_event_source);
+
+ r = sd_event_now(nd->event, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ goto fail;
+
+ nd->fd = icmp6_bind_router_solicitation(nd->ifindex);
+ if (nd->fd < 0)
+ return nd->fd;
+
+ r = sd_event_add_io(nd->event, &nd->recv_event_source, nd->fd, EPOLLIN, ndisc_recv, nd);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(nd->recv_event_source, nd->event_priority);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(nd->recv_event_source, "ndisc-receive-message");
+
+ r = event_reset_time(nd->event, &nd->timeout_event_source,
+ clock_boottime_or_monotonic(),
+ time_now + USEC_PER_SEC / 2, 1 * USEC_PER_SEC, /* See RFC 8415 sec. 18.2.1 */
+ ndisc_timeout, nd,
+ nd->event_priority, "ndisc-timeout", true);
+ if (r < 0)
+ goto fail;
+
+ r = event_reset_time(nd->event, &nd->timeout_no_ra,
+ clock_boottime_or_monotonic(),
+ time_now + NDISC_TIMEOUT_NO_RA_USEC, 10 * USEC_PER_MSEC,
+ ndisc_timeout_no_ra, nd,
+ nd->event_priority, "ndisc-timeout-no-ra", true);
+ if (r < 0)
+ goto fail;
+
+ log_ndisc("Started IPv6 Router Solicitation client");
+ return 1;
+
+fail:
+ ndisc_reset(nd);
+ return r;
+}
diff --git a/src/libsystemd-network/sd-radv.c b/src/libsystemd-network/sd-radv.c
new file mode 100644
index 0000000..8beb845
--- /dev/null
+++ b/src/libsystemd-network/sd-radv.c
@@ -0,0 +1,953 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2017 Intel Corporation. All rights reserved.
+***/
+
+#include <netinet/icmp6.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include "sd-radv.h"
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "ether-addr-util.h"
+#include "event-util.h"
+#include "fd-util.h"
+#include "icmp6-util.h"
+#include "in-addr-util.h"
+#include "io-util.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "radv-internal.h"
+#include "random-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+_public_ int sd_radv_new(sd_radv **ret) {
+ _cleanup_(sd_radv_unrefp) sd_radv *ra = NULL;
+
+ assert_return(ret, -EINVAL);
+
+ ra = new(sd_radv, 1);
+ if (!ra)
+ return -ENOMEM;
+
+ *ra = (sd_radv) {
+ .n_ref = 1,
+ .fd = -1,
+ };
+
+ *ret = TAKE_PTR(ra);
+
+ return 0;
+}
+
+_public_ int sd_radv_attach_event(sd_radv *ra, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(ra, -EINVAL);
+ assert_return(!ra->event, -EBUSY);
+
+ if (event)
+ ra->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&ra->event);
+ if (r < 0)
+ return 0;
+ }
+
+ ra->event_priority = priority;
+
+ return 0;
+}
+
+_public_ int sd_radv_detach_event(sd_radv *ra) {
+
+ assert_return(ra, -EINVAL);
+
+ ra->event = sd_event_unref(ra->event);
+ return 0;
+}
+
+_public_ sd_event *sd_radv_get_event(sd_radv *ra) {
+ assert_return(ra, NULL);
+
+ return ra->event;
+}
+
+_public_ int sd_radv_is_running(sd_radv *ra) {
+ assert_return(ra, false);
+
+ return ra->state != SD_RADV_STATE_IDLE;
+}
+
+static void radv_reset(sd_radv *ra) {
+ assert(ra);
+
+ (void) event_source_disable(ra->timeout_event_source);
+
+ ra->recv_event_source =
+ sd_event_source_unref(ra->recv_event_source);
+
+ ra->ra_sent = 0;
+}
+
+static sd_radv *radv_free(sd_radv *ra) {
+ if (!ra)
+ return NULL;
+
+ while (ra->prefixes) {
+ sd_radv_prefix *p = ra->prefixes;
+
+ LIST_REMOVE(prefix, ra->prefixes, p);
+ sd_radv_prefix_unref(p);
+ }
+
+ while (ra->route_prefixes) {
+ sd_radv_route_prefix *p = ra->route_prefixes;
+
+ LIST_REMOVE(prefix, ra->route_prefixes, p);
+ sd_radv_route_prefix_unref(p);
+ }
+
+ free(ra->rdnss);
+ free(ra->dnssl);
+
+ ra->timeout_event_source = sd_event_source_unref(ra->timeout_event_source);
+
+ radv_reset(ra);
+
+ sd_radv_detach_event(ra);
+
+ ra->fd = safe_close(ra->fd);
+
+ return mfree(ra);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_radv, sd_radv, radv_free);
+
+static int radv_send(sd_radv *ra, const struct in6_addr *dst, uint32_t router_lifetime) {
+ sd_radv_route_prefix *rt;
+ sd_radv_prefix *p;
+ struct sockaddr_in6 dst_addr = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = IN6ADDR_ALL_NODES_MULTICAST_INIT,
+ };
+ struct nd_router_advert adv = {};
+ struct {
+ struct nd_opt_hdr opthdr;
+ struct ether_addr slladdr;
+ } _packed_ opt_mac = {
+ .opthdr = {
+ .nd_opt_type = ND_OPT_SOURCE_LINKADDR,
+ .nd_opt_len = (sizeof(struct nd_opt_hdr) +
+ sizeof(struct ether_addr) - 1) /8 + 1,
+ },
+ };
+ struct nd_opt_mtu opt_mtu = {
+ .nd_opt_mtu_type = ND_OPT_MTU,
+ .nd_opt_mtu_len = 1,
+ };
+ /* Reserve iov space for RA header, linkaddr, MTU, N prefixes, N routes, RDNSS
+ and DNSSL */
+ struct iovec iov[5 + ra->n_prefixes + ra->n_route_prefixes];
+ struct msghdr msg = {
+ .msg_name = &dst_addr,
+ .msg_namelen = sizeof(dst_addr),
+ .msg_iov = iov,
+ };
+ usec_t time_now;
+ int r;
+
+ assert(ra);
+
+ r = sd_event_now(ra->event, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return r;
+
+ if (dst && !IN6_IS_ADDR_UNSPECIFIED(dst))
+ dst_addr.sin6_addr = *dst;
+
+ adv.nd_ra_type = ND_ROUTER_ADVERT;
+ adv.nd_ra_curhoplimit = ra->hop_limit;
+ adv.nd_ra_flags_reserved = ra->flags;
+ adv.nd_ra_router_lifetime = htobe16(router_lifetime);
+ iov[msg.msg_iovlen++] = IOVEC_MAKE(&adv, sizeof(adv));
+
+ /* MAC address is optional, either because the link does not use L2
+ addresses or load sharing is desired. See RFC 4861, Section 4.2 */
+ if (!ether_addr_is_null(&ra->mac_addr)) {
+ opt_mac.slladdr = ra->mac_addr;
+ iov[msg.msg_iovlen++] = IOVEC_MAKE(&opt_mac, sizeof(opt_mac));
+ }
+
+ if (ra->mtu) {
+ opt_mtu.nd_opt_mtu_mtu = htobe32(ra->mtu);
+ iov[msg.msg_iovlen++] = IOVEC_MAKE(&opt_mtu, sizeof(opt_mtu));
+ }
+
+ LIST_FOREACH(prefix, p, ra->prefixes) {
+ if (p->valid_until) {
+
+ if (time_now > p->valid_until)
+ p->opt.valid_lifetime = 0;
+ else
+ p->opt.valid_lifetime = htobe32((p->valid_until - time_now) / USEC_PER_SEC);
+
+ if (time_now > p->preferred_until)
+ p->opt.preferred_lifetime = 0;
+ else
+ p->opt.preferred_lifetime = htobe32((p->preferred_until - time_now) / USEC_PER_SEC);
+ }
+ iov[msg.msg_iovlen++] = IOVEC_MAKE(&p->opt, sizeof(p->opt));
+ }
+
+ LIST_FOREACH(prefix, rt, ra->route_prefixes)
+ iov[msg.msg_iovlen++] = IOVEC_MAKE(&rt->opt, sizeof(rt->opt));
+
+ if (ra->rdnss)
+ iov[msg.msg_iovlen++] = IOVEC_MAKE(ra->rdnss, ra->rdnss->length * 8);
+
+ if (ra->dnssl)
+ iov[msg.msg_iovlen++] = IOVEC_MAKE(ra->dnssl, ra->dnssl->length * 8);
+
+ if (sendmsg(ra->fd, &msg, 0) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int radv_recv(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ sd_radv *ra = userdata;
+ _cleanup_free_ char *addr = NULL;
+ struct in6_addr src;
+ triple_timestamp timestamp;
+ int r;
+ ssize_t buflen;
+ _cleanup_free_ char *buf = NULL;
+
+ assert(s);
+ assert(ra);
+ assert(ra->event);
+
+ buflen = next_datagram_size_fd(fd);
+ if (buflen < 0)
+ return (int) buflen;
+
+ buf = new0(char, buflen);
+ if (!buf)
+ return -ENOMEM;
+
+ r = icmp6_receive(fd, buf, buflen, &src, &timestamp);
+ if (r < 0) {
+ switch (r) {
+ case -EADDRNOTAVAIL:
+ (void) in_addr_to_string(AF_INET6, (union in_addr_union*) &src, &addr);
+ log_radv("Received RS from non-link-local address %s. Ignoring", addr);
+ break;
+
+ case -EMULTIHOP:
+ log_radv("Received RS with invalid hop limit. Ignoring.");
+ break;
+
+ case -EPFNOSUPPORT:
+ log_radv("Received invalid source address from ICMPv6 socket. Ignoring.");
+ break;
+
+ case -EAGAIN: /* ignore spurious wakeups */
+ break;
+
+ default:
+ log_radv_errno(r, "Unexpected error receiving from ICMPv6 socket: %m");
+ break;
+ }
+
+ return 0;
+ }
+
+ if ((size_t) buflen < sizeof(struct nd_router_solicit)) {
+ log_radv("Too short packet received");
+ return 0;
+ }
+
+ (void) in_addr_to_string(AF_INET6, (union in_addr_union*) &src, &addr);
+
+ r = radv_send(ra, &src, ra->lifetime);
+ if (r < 0)
+ log_radv_errno(r, "Unable to send solicited Router Advertisement to %s: %m", strnull(addr));
+ else
+ log_radv("Sent solicited Router Advertisement to %s", strnull(addr));
+
+ return 0;
+}
+
+static usec_t radv_compute_timeout(usec_t min, usec_t max) {
+ assert_return(min <= max, SD_RADV_DEFAULT_MIN_TIMEOUT_USEC);
+
+ /* RFC 4861: min must be no less than 3s, max must be no less than 4s */
+ min = MAX(min, 3*USEC_PER_SEC);
+ max = MAX(max, 4*USEC_PER_SEC);
+
+ return min + (random_u32() % (max - min));
+}
+
+static int radv_timeout(sd_event_source *s, uint64_t usec, void *userdata) {
+ int r;
+ sd_radv *ra = userdata;
+ usec_t min_timeout = SD_RADV_DEFAULT_MIN_TIMEOUT_USEC;
+ usec_t max_timeout = SD_RADV_DEFAULT_MAX_TIMEOUT_USEC;
+ usec_t time_now, timeout;
+ char time_string[FORMAT_TIMESPAN_MAX];
+
+ assert(s);
+ assert(ra);
+ assert(ra->event);
+
+ r = sd_event_now(ra->event, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ goto fail;
+
+ r = radv_send(ra, NULL, ra->lifetime);
+ if (r < 0)
+ log_radv_errno(r, "Unable to send Router Advertisement: %m");
+
+ /* RFC 4861, Section 6.2.4, sending initial Router Advertisements */
+ if (ra->ra_sent < SD_RADV_MAX_INITIAL_RTR_ADVERTISEMENTS) {
+ max_timeout = SD_RADV_MAX_INITIAL_RTR_ADVERT_INTERVAL_USEC;
+ min_timeout = SD_RADV_MAX_INITIAL_RTR_ADVERT_INTERVAL_USEC / 3;
+ }
+
+ /* RFC 4861, Section 6.2.1, lifetime must be at least MaxRtrAdvInterval,
+ so lower the interval here */
+ if (ra->lifetime > 0 && (ra->lifetime * USEC_PER_SEC) < max_timeout) {
+ max_timeout = ra->lifetime * USEC_PER_SEC;
+ min_timeout = max_timeout / 3;
+ }
+
+ timeout = radv_compute_timeout(min_timeout, max_timeout);
+
+ log_radv("Next Router Advertisement in %s",
+ format_timespan(time_string, FORMAT_TIMESPAN_MAX,
+ timeout, USEC_PER_SEC));
+
+ r = event_reset_time(ra->event, &ra->timeout_event_source,
+ clock_boottime_or_monotonic(),
+ time_now + timeout, MSEC_PER_SEC,
+ radv_timeout, ra,
+ ra->event_priority, "radv-timeout", true);
+ if (r < 0)
+ goto fail;
+
+ ra->ra_sent++;
+
+ return 0;
+
+fail:
+ sd_radv_stop(ra);
+
+ return 0;
+}
+
+_public_ int sd_radv_stop(sd_radv *ra) {
+ int r;
+
+ if (!ra)
+ return 0;
+
+ if (ra->state == SD_RADV_STATE_IDLE)
+ return 0;
+
+ log_radv("Stopping IPv6 Router Advertisement daemon");
+
+ /* RFC 4861, Section 6.2.5, send at least one Router Advertisement
+ with zero lifetime */
+ r = radv_send(ra, NULL, 0);
+ if (r < 0)
+ log_radv_errno(r, "Unable to send last Router Advertisement with router lifetime set to zero: %m");
+
+ radv_reset(ra);
+ ra->fd = safe_close(ra->fd);
+ ra->state = SD_RADV_STATE_IDLE;
+
+ return 0;
+}
+
+_public_ int sd_radv_start(sd_radv *ra) {
+ int r;
+
+ assert_return(ra, -EINVAL);
+ assert_return(ra->event, -EINVAL);
+ assert_return(ra->ifindex > 0, -EINVAL);
+
+ if (ra->state != SD_RADV_STATE_IDLE)
+ return 0;
+
+ r = event_reset_time(ra->event, &ra->timeout_event_source,
+ clock_boottime_or_monotonic(),
+ 0, 0,
+ radv_timeout, ra,
+ ra->event_priority, "radv-timeout", true);
+ if (r < 0)
+ goto fail;
+
+ r = icmp6_bind_router_advertisement(ra->ifindex);
+ if (r < 0)
+ goto fail;
+
+ ra->fd = r;
+
+ r = sd_event_add_io(ra->event, &ra->recv_event_source, ra->fd, EPOLLIN, radv_recv, ra);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(ra->recv_event_source, ra->event_priority);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(ra->recv_event_source, "radv-receive-message");
+
+ ra->state = SD_RADV_STATE_ADVERTISING;
+
+ log_radv("Started IPv6 Router Advertisement daemon");
+
+ return 0;
+
+ fail:
+ radv_reset(ra);
+
+ return r;
+}
+
+_public_ int sd_radv_set_ifindex(sd_radv *ra, int ifindex) {
+ assert_return(ra, -EINVAL);
+ assert_return(ifindex > 0, -EINVAL);
+
+ if (ra->state != SD_RADV_STATE_IDLE)
+ return -EBUSY;
+
+ ra->ifindex = ifindex;
+
+ return 0;
+}
+
+_public_ int sd_radv_set_mac(sd_radv *ra, const struct ether_addr *mac_addr) {
+ assert_return(ra, -EINVAL);
+
+ if (ra->state != SD_RADV_STATE_IDLE)
+ return -EBUSY;
+
+ if (mac_addr)
+ ra->mac_addr = *mac_addr;
+ else
+ zero(ra->mac_addr);
+
+ return 0;
+}
+
+_public_ int sd_radv_set_mtu(sd_radv *ra, uint32_t mtu) {
+ assert_return(ra, -EINVAL);
+ assert_return(mtu >= 1280, -EINVAL);
+
+ ra->mtu = mtu;
+
+ return 0;
+}
+
+_public_ int sd_radv_set_hop_limit(sd_radv *ra, uint8_t hop_limit) {
+ assert_return(ra, -EINVAL);
+
+ if (ra->state != SD_RADV_STATE_IDLE)
+ return -EBUSY;
+
+ ra->hop_limit = hop_limit;
+
+ return 0;
+}
+
+_public_ int sd_radv_set_router_lifetime(sd_radv *ra, uint16_t router_lifetime) {
+ assert_return(ra, -EINVAL);
+
+ if (ra->state != SD_RADV_STATE_IDLE)
+ return -EBUSY;
+
+ /* RFC 4191, Section 2.2, "...If the Router Lifetime is zero, the preference value MUST be set
+ * to (00) by the sender..." */
+ if (router_lifetime == 0 &&
+ (ra->flags & (0x3 << 3)) != (SD_NDISC_PREFERENCE_MEDIUM << 3))
+ return -ETIME;
+
+ ra->lifetime = router_lifetime;
+
+ return 0;
+}
+
+_public_ int sd_radv_set_managed_information(sd_radv *ra, int managed) {
+ assert_return(ra, -EINVAL);
+
+ if (ra->state != SD_RADV_STATE_IDLE)
+ return -EBUSY;
+
+ SET_FLAG(ra->flags, ND_RA_FLAG_MANAGED, managed);
+
+ return 0;
+}
+
+_public_ int sd_radv_set_other_information(sd_radv *ra, int other) {
+ assert_return(ra, -EINVAL);
+
+ if (ra->state != SD_RADV_STATE_IDLE)
+ return -EBUSY;
+
+ SET_FLAG(ra->flags, ND_RA_FLAG_OTHER, other);
+
+ return 0;
+}
+
+_public_ int sd_radv_set_preference(sd_radv *ra, unsigned preference) {
+ assert_return(ra, -EINVAL);
+ assert_return(IN_SET(preference,
+ SD_NDISC_PREFERENCE_LOW,
+ SD_NDISC_PREFERENCE_MEDIUM,
+ SD_NDISC_PREFERENCE_HIGH), -EINVAL);
+
+ /* RFC 4191, Section 2.2, "...If the Router Lifetime is zero, the preference value MUST be set
+ * to (00) by the sender..." */
+ if (ra->lifetime == 0 && preference != SD_NDISC_PREFERENCE_MEDIUM)
+ return -EINVAL;
+
+ ra->flags = (ra->flags & ~(0x3 << 3)) | (preference << 3);
+
+ return 0;
+}
+
+_public_ int sd_radv_add_prefix(sd_radv *ra, sd_radv_prefix *p, int dynamic) {
+ sd_radv_prefix *cur;
+ int r;
+ _cleanup_free_ char *addr_p = NULL;
+ char time_string_preferred[FORMAT_TIMESPAN_MAX];
+ char time_string_valid[FORMAT_TIMESPAN_MAX];
+ usec_t time_now, valid, preferred, valid_until, preferred_until;
+
+ assert_return(ra, -EINVAL);
+
+ if (!p)
+ return -EINVAL;
+
+ /* Refuse prefixes that don't have a prefix set */
+ if (IN6_IS_ADDR_UNSPECIFIED(&p->opt.in6_addr))
+ return -ENOEXEC;
+
+ LIST_FOREACH(prefix, cur, ra->prefixes) {
+
+ r = in_addr_prefix_intersect(AF_INET6,
+ (union in_addr_union*) &cur->opt.in6_addr,
+ cur->opt.prefixlen,
+ (union in_addr_union*) &p->opt.in6_addr,
+ p->opt.prefixlen);
+ if (r > 0) {
+ _cleanup_free_ char *addr_cur = NULL;
+
+ (void) in_addr_to_string(AF_INET6,
+ (union in_addr_union*) &p->opt.in6_addr,
+ &addr_p);
+
+ if (dynamic && cur->opt.prefixlen == p->opt.prefixlen)
+ goto update;
+
+ (void) in_addr_to_string(AF_INET6,
+ (union in_addr_union*) &cur->opt.in6_addr,
+ &addr_cur);
+ log_radv("IPv6 prefix %s/%u already configured, ignoring %s/%u",
+ addr_cur, cur->opt.prefixlen,
+ addr_p, p->opt.prefixlen);
+
+ return -EEXIST;
+ }
+ }
+
+ p = sd_radv_prefix_ref(p);
+
+ LIST_APPEND(prefix, ra->prefixes, p);
+
+ ra->n_prefixes++;
+
+ (void) in_addr_to_string(AF_INET6, (union in_addr_union*) &p->opt.in6_addr, &addr_p);
+
+ if (!dynamic) {
+ log_radv("Added prefix %s/%d", addr_p, p->opt.prefixlen);
+ return 0;
+ }
+
+ cur = p;
+
+ /* If RAs have already been sent, send an RA immediately to announce the newly-added prefix */
+ if (ra->ra_sent > 0) {
+ r = radv_send(ra, NULL, ra->lifetime);
+ if (r < 0)
+ log_radv_errno(r, "Unable to send Router Advertisement for added prefix: %m");
+ else
+ log_radv("Sent Router Advertisement for added prefix");
+ }
+
+ update:
+ r = sd_event_now(ra->event, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return r;
+
+ valid = be32toh(p->opt.valid_lifetime) * USEC_PER_SEC;
+ valid_until = usec_add(valid, time_now);
+ if (valid_until == USEC_INFINITY)
+ return -EOVERFLOW;
+
+ preferred = be32toh(p->opt.preferred_lifetime) * USEC_PER_SEC;
+ preferred_until = usec_add(preferred, time_now);
+ if (preferred_until == USEC_INFINITY)
+ return -EOVERFLOW;
+
+ cur->valid_until = valid_until;
+ cur->preferred_until = preferred_until;
+
+ log_radv("Updated prefix %s/%u preferred %s valid %s",
+ addr_p, p->opt.prefixlen,
+ format_timespan(time_string_preferred, FORMAT_TIMESPAN_MAX,
+ preferred, USEC_PER_SEC),
+ format_timespan(time_string_valid, FORMAT_TIMESPAN_MAX,
+ valid, USEC_PER_SEC));
+
+ return 0;
+}
+
+_public_ sd_radv_prefix *sd_radv_remove_prefix(sd_radv *ra,
+ const struct in6_addr *prefix,
+ unsigned char prefixlen) {
+ sd_radv_prefix *cur, *next;
+
+ assert_return(ra, NULL);
+ assert_return(prefix, NULL);
+
+ LIST_FOREACH_SAFE(prefix, cur, next, ra->prefixes) {
+ if (prefixlen != cur->opt.prefixlen)
+ continue;
+
+ if (!in_addr_equal(AF_INET6,
+ (union in_addr_union *)prefix,
+ (union in_addr_union *)&cur->opt.in6_addr))
+ continue;
+
+ LIST_REMOVE(prefix, ra->prefixes, cur);
+ ra->n_prefixes--;
+ sd_radv_prefix_unref(cur);
+
+ break;
+ }
+
+ return cur;
+}
+
+_public_ int sd_radv_add_route_prefix(sd_radv *ra, sd_radv_route_prefix *p, int dynamic) {
+ char time_string_valid[FORMAT_TIMESPAN_MAX];
+ usec_t time_now, valid, valid_until;
+ _cleanup_free_ char *pretty = NULL;
+ sd_radv_route_prefix *cur;
+ int r;
+
+ assert_return(ra, -EINVAL);
+
+ if (!p)
+ return -EINVAL;
+
+ (void) in_addr_to_string(AF_INET6,
+ (union in_addr_union*) &p->opt.in6_addr,
+ &pretty);
+
+ LIST_FOREACH(prefix, cur, ra->route_prefixes) {
+ _cleanup_free_ char *addr = NULL;
+
+ r = in_addr_prefix_intersect(AF_INET6,
+ (union in_addr_union*) &cur->opt.in6_addr,
+ cur->opt.prefixlen,
+ (union in_addr_union*) &p->opt.in6_addr,
+ p->opt.prefixlen);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (dynamic && cur->opt.prefixlen == p->opt.prefixlen)
+ goto update;
+
+ (void) in_addr_to_string(AF_INET6,
+ (union in_addr_union*) &cur->opt.in6_addr,
+ &addr);
+ log_radv("IPv6 route prefix %s/%u already configured, ignoring %s/%u",
+ strempty(addr), cur->opt.prefixlen,
+ strempty(pretty), p->opt.prefixlen);
+
+ return -EEXIST;
+ }
+
+ p = sd_radv_route_prefix_ref(p);
+
+ LIST_APPEND(prefix, ra->route_prefixes, p);
+ ra->n_route_prefixes++;
+
+ if (!dynamic) {
+ log_radv("Added prefix %s/%u", strempty(pretty), p->opt.prefixlen);
+ return 0;
+ }
+
+ /* If RAs have already been sent, send an RA immediately to announce the newly-added route prefix */
+ if (ra->ra_sent > 0) {
+ r = radv_send(ra, NULL, ra->lifetime);
+ if (r < 0)
+ log_radv_errno(r, "Unable to send Router Advertisement for added route prefix: %m");
+ else
+ log_radv("Sent Router Advertisement for added route prefix");
+ }
+
+ update:
+ r = sd_event_now(ra->event, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return r;
+
+ valid = be32toh(p->opt.lifetime) * USEC_PER_SEC;
+ valid_until = usec_add(valid, time_now);
+ if (valid_until == USEC_INFINITY)
+ return -EOVERFLOW;
+
+ log_radv("Updated route prefix %s/%u valid %s",
+ strempty(pretty), p->opt.prefixlen,
+ format_timespan(time_string_valid, FORMAT_TIMESPAN_MAX, valid, USEC_PER_SEC));
+
+ return 0;
+}
+
+_public_ int sd_radv_set_rdnss(sd_radv *ra, uint32_t lifetime,
+ const struct in6_addr *dns, size_t n_dns) {
+ _cleanup_free_ struct sd_radv_opt_dns *opt_rdnss = NULL;
+ size_t len;
+
+ assert_return(ra, -EINVAL);
+ assert_return(n_dns < 128, -EINVAL);
+
+ if (!dns || n_dns == 0) {
+ ra->rdnss = mfree(ra->rdnss);
+ ra->n_rdnss = 0;
+
+ return 0;
+ }
+
+ len = sizeof(struct sd_radv_opt_dns) + sizeof(struct in6_addr) * n_dns;
+
+ opt_rdnss = malloc0(len);
+ if (!opt_rdnss)
+ return -ENOMEM;
+
+ opt_rdnss->type = SD_RADV_OPT_RDNSS;
+ opt_rdnss->length = len / 8;
+ opt_rdnss->lifetime = htobe32(lifetime);
+
+ memcpy(opt_rdnss + 1, dns, n_dns * sizeof(struct in6_addr));
+
+ free_and_replace(ra->rdnss, opt_rdnss);
+
+ ra->n_rdnss = n_dns;
+
+ return 0;
+}
+
+_public_ int sd_radv_set_dnssl(sd_radv *ra, uint32_t lifetime,
+ char **search_list) {
+ _cleanup_free_ struct sd_radv_opt_dns *opt_dnssl = NULL;
+ size_t len = 0;
+ char **s;
+ uint8_t *p;
+
+ assert_return(ra, -EINVAL);
+
+ if (strv_isempty(search_list)) {
+ ra->dnssl = mfree(ra->dnssl);
+ return 0;
+ }
+
+ STRV_FOREACH(s, search_list)
+ len += strlen(*s) + 2;
+
+ len = (sizeof(struct sd_radv_opt_dns) + len + 7) & ~0x7;
+
+ opt_dnssl = malloc0(len);
+ if (!opt_dnssl)
+ return -ENOMEM;
+
+ opt_dnssl->type = SD_RADV_OPT_DNSSL;
+ opt_dnssl->length = len / 8;
+ opt_dnssl->lifetime = htobe32(lifetime);
+
+ p = (uint8_t *)(opt_dnssl + 1);
+ len -= sizeof(struct sd_radv_opt_dns);
+
+ STRV_FOREACH(s, search_list) {
+ int r;
+
+ r = dns_name_to_wire_format(*s, p, len, false);
+ if (r < 0)
+ return r;
+
+ if (len < (size_t)r)
+ return -ENOBUFS;
+
+ p += r;
+ len -= r;
+ }
+
+ free_and_replace(ra->dnssl, opt_dnssl);
+
+ return 0;
+}
+
+_public_ int sd_radv_prefix_new(sd_radv_prefix **ret) {
+ sd_radv_prefix *p;
+
+ assert_return(ret, -EINVAL);
+
+ p = new(sd_radv_prefix, 1);
+ if (!p)
+ return -ENOMEM;
+
+ *p = (sd_radv_prefix) {
+ .n_ref = 1,
+
+ .opt.type = ND_OPT_PREFIX_INFORMATION,
+ .opt.length = (sizeof(p->opt) - 1)/8 + 1,
+ .opt.prefixlen = 64,
+
+ /* RFC 4861, Section 6.2.1 */
+ .opt.flags = ND_OPT_PI_FLAG_ONLINK|ND_OPT_PI_FLAG_AUTO,
+
+ .opt.preferred_lifetime = htobe32(604800),
+ .opt.valid_lifetime = htobe32(2592000),
+ };
+
+ *ret = p;
+ return 0;
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_radv_prefix, sd_radv_prefix, mfree);
+
+_public_ int sd_radv_prefix_set_prefix(sd_radv_prefix *p, const struct in6_addr *in6_addr,
+ unsigned char prefixlen) {
+ assert_return(p, -EINVAL);
+ assert_return(in6_addr, -EINVAL);
+
+ if (prefixlen < 3 || prefixlen > 128)
+ return -EINVAL;
+
+ if (prefixlen > 64)
+ /* unusual but allowed, log it */
+ log_radv("Unusual prefix length %d greater than 64", prefixlen);
+
+ p->opt.in6_addr = *in6_addr;
+ p->opt.prefixlen = prefixlen;
+
+ return 0;
+}
+
+_public_ int sd_radv_prefix_get_prefix(sd_radv_prefix *p, struct in6_addr *ret_in6_addr,
+ unsigned char *ret_prefixlen) {
+ assert_return(p, -EINVAL);
+ assert_return(ret_in6_addr, -EINVAL);
+ assert_return(ret_prefixlen, -EINVAL);
+
+ *ret_in6_addr = p->opt.in6_addr;
+ *ret_prefixlen = p->opt.prefixlen;
+
+ return 0;
+}
+
+_public_ int sd_radv_prefix_set_onlink(sd_radv_prefix *p, int onlink) {
+ assert_return(p, -EINVAL);
+
+ SET_FLAG(p->opt.flags, ND_OPT_PI_FLAG_ONLINK, onlink);
+
+ return 0;
+}
+
+_public_ int sd_radv_prefix_set_address_autoconfiguration(sd_radv_prefix *p,
+ int address_autoconfiguration) {
+ assert_return(p, -EINVAL);
+
+ SET_FLAG(p->opt.flags, ND_OPT_PI_FLAG_AUTO, address_autoconfiguration);
+
+ return 0;
+}
+
+_public_ int sd_radv_prefix_set_valid_lifetime(sd_radv_prefix *p,
+ uint32_t valid_lifetime) {
+ assert_return(p, -EINVAL);
+
+ p->opt.valid_lifetime = htobe32(valid_lifetime);
+
+ return 0;
+}
+
+_public_ int sd_radv_prefix_set_preferred_lifetime(sd_radv_prefix *p,
+ uint32_t preferred_lifetime) {
+ assert_return(p, -EINVAL);
+
+ p->opt.preferred_lifetime = htobe32(preferred_lifetime);
+
+ return 0;
+}
+
+_public_ int sd_radv_route_prefix_new(sd_radv_route_prefix **ret) {
+ sd_radv_route_prefix *p;
+
+ assert_return(ret, -EINVAL);
+
+ p = new(sd_radv_route_prefix, 1);
+ if (!p)
+ return -ENOMEM;
+
+ *p = (sd_radv_route_prefix) {
+ .n_ref = 1,
+
+ .opt.type = SD_RADV_OPT_ROUTE_INFORMATION,
+ .opt.length = DIV_ROUND_UP(sizeof(p->opt), 8),
+ .opt.prefixlen = 64,
+
+ .opt.lifetime = htobe32(604800),
+ };
+
+ *ret = p;
+ return 0;
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_radv_route_prefix, sd_radv_route_prefix, mfree);
+
+_public_ int sd_radv_prefix_set_route_prefix(sd_radv_route_prefix *p, const struct in6_addr *in6_addr,
+ unsigned char prefixlen) {
+ assert_return(p, -EINVAL);
+ assert_return(in6_addr, -EINVAL);
+
+ if (prefixlen > 128)
+ return -EINVAL;
+
+ if (prefixlen > 64)
+ /* unusual but allowed, log it */
+ log_radv("Unusual prefix length %u greater than 64", prefixlen);
+
+ p->opt.in6_addr = *in6_addr;
+ p->opt.prefixlen = prefixlen;
+
+ return 0;
+}
+
+_public_ int sd_radv_route_prefix_set_lifetime(sd_radv_route_prefix *p, uint32_t valid_lifetime) {
+ assert_return(p, -EINVAL);
+
+ p->opt.lifetime = htobe32(valid_lifetime);
+
+ return 0;
+}
diff --git a/src/libsystemd-network/test-acd.c b/src/libsystemd-network/test-acd.c
new file mode 100644
index 0000000..f4aab85
--- /dev/null
+++ b/src/libsystemd-network/test-acd.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/veth.h>
+#include <net/if.h>
+
+#include "sd-event.h"
+#include "sd-ipv4acd.h"
+#include "sd-netlink.h"
+
+#include "in-addr-util.h"
+#include "netlink-util.h"
+#include "tests.h"
+#include "util.h"
+
+static void acd_handler(sd_ipv4acd *acd, int event, void *userdata) {
+ assert_se(acd);
+
+ switch (event) {
+ case SD_IPV4ACD_EVENT_BIND:
+ log_info("bound");
+ break;
+ case SD_IPV4ACD_EVENT_CONFLICT:
+ log_info("conflict");
+ break;
+ case SD_IPV4ACD_EVENT_STOP:
+ log_error("the client was stopped");
+ break;
+ default:
+ assert_not_reached("invalid ACD event");
+ }
+}
+
+static int client_run(int ifindex, const struct in_addr *pa, const struct ether_addr *ha, sd_event *e) {
+ sd_ipv4acd *acd;
+
+ assert_se(sd_ipv4acd_new(&acd) >= 0);
+ assert_se(sd_ipv4acd_attach_event(acd, e, 0) >= 0);
+
+ assert_se(sd_ipv4acd_set_ifindex(acd, ifindex) >= 0);
+ assert_se(sd_ipv4acd_set_mac(acd, ha) >= 0);
+ assert_se(sd_ipv4acd_set_address(acd, pa) >= 0);
+ assert_se(sd_ipv4acd_set_callback(acd, acd_handler, NULL) >= 0);
+
+ log_info("starting IPv4ACD client");
+
+ assert_se(sd_ipv4acd_start(acd, true) >= 0);
+
+ assert_se(sd_event_loop(e) >= 0);
+
+ assert_se(!sd_ipv4acd_unref(acd));
+
+ return EXIT_SUCCESS;
+}
+
+static int test_acd(const char *ifname, const char *address) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *reply = NULL;
+ union in_addr_union pa;
+ struct ether_addr ha;
+ int ifindex;
+
+ assert_se(in_addr_from_string(AF_INET, address, &pa) >= 0);
+
+ assert_se(sd_event_new(&e) >= 0);
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+ assert_se(sd_netlink_attach_event(rtnl, e, 0) >= 0);
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, 0) >= 0);
+ assert_se(sd_netlink_message_append_string(m, IFLA_IFNAME, ifname) >= 0);
+ assert_se(sd_netlink_call(rtnl, m, 0, &reply) >= 0);
+
+ assert_se(sd_rtnl_message_link_get_ifindex(reply, &ifindex) >= 0);
+ assert_se(sd_netlink_message_read_ether_addr(reply, IFLA_ADDRESS, &ha) >= 0);
+
+ client_run(ifindex, &pa.in, &ha, e);
+
+ return EXIT_SUCCESS;
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ if (argc == 3)
+ return test_acd(argv[1], argv[2]);
+ else {
+ log_error("This program takes two arguments.\n"
+ "\t %s <ifname> <IPv4 address>", program_invocation_short_name);
+ return EXIT_FAILURE;
+ }
+}
diff --git a/src/libsystemd-network/test-dhcp-client.c b/src/libsystemd-network/test-dhcp-client.c
new file mode 100644
index 0000000..d39d1f5
--- /dev/null
+++ b/src/libsystemd-network/test-dhcp-client.c
@@ -0,0 +1,570 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include "sd-dhcp-client.h"
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "dhcp-identifier.h"
+#include "dhcp-internal.h"
+#include "dhcp-protocol.h"
+#include "fd-util.h"
+#include "random-util.h"
+#include "tests.h"
+#include "util.h"
+
+static uint8_t mac_addr[] = {'A', 'B', 'C', '1', '2', '3'};
+static uint8_t bcast_addr[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+
+typedef int (*test_callback_recv_t)(size_t size, DHCPMessage *dhcp);
+
+static bool verbose = true;
+static int test_fd[2];
+static test_callback_recv_t callback_recv;
+static be32_t xid;
+static sd_event_source *test_hangcheck;
+
+static int test_dhcp_hangcheck(sd_event_source *s, uint64_t usec, void *userdata) {
+ assert_not_reached("Test case should have completed in 2 seconds");
+
+ return 0;
+}
+
+static void test_request_basic(sd_event *e) {
+ int r;
+
+ sd_dhcp_client *client;
+
+ if (verbose)
+ printf("* %s\n", __FUNCTION__);
+
+ /* Initialize client without Anonymize settings. */
+ r = sd_dhcp_client_new(&client, false);
+
+ assert_se(r >= 0);
+ assert_se(client);
+
+ r = sd_dhcp_client_attach_event(client, e, 0);
+ assert_se(r >= 0);
+
+ assert_se(sd_dhcp_client_set_request_option(NULL, 0) == -EINVAL);
+ assert_se(sd_dhcp_client_set_request_address(NULL, NULL) == -EINVAL);
+ assert_se(sd_dhcp_client_set_ifindex(NULL, 0) == -EINVAL);
+
+ assert_se(sd_dhcp_client_set_ifindex(client, 15) == 0);
+ assert_se(sd_dhcp_client_set_ifindex(client, -42) == -EINVAL);
+ assert_se(sd_dhcp_client_set_ifindex(client, -1) == -EINVAL);
+ assert_se(sd_dhcp_client_set_ifindex(client, 0) == -EINVAL);
+ assert_se(sd_dhcp_client_set_ifindex(client, 1) == 0);
+
+ assert_se(sd_dhcp_client_set_hostname(client, "host") == 1);
+ assert_se(sd_dhcp_client_set_hostname(client, "host.domain") == 1);
+ assert_se(sd_dhcp_client_set_hostname(client, NULL) == 1);
+ assert_se(sd_dhcp_client_set_hostname(client, "~host") == -EINVAL);
+ assert_se(sd_dhcp_client_set_hostname(client, "~host.domain") == -EINVAL);
+
+ assert_se(sd_dhcp_client_set_request_option(client, SD_DHCP_OPTION_SUBNET_MASK) == 0);
+ assert_se(sd_dhcp_client_set_request_option(client, SD_DHCP_OPTION_ROUTER) == 0);
+ /* This PRL option is not set when using Anonymize, but in this test
+ * Anonymize settings are not being used. */
+ assert_se(sd_dhcp_client_set_request_option(client, SD_DHCP_OPTION_HOST_NAME) == 0);
+ assert_se(sd_dhcp_client_set_request_option(client, SD_DHCP_OPTION_DOMAIN_NAME) == 0);
+ assert_se(sd_dhcp_client_set_request_option(client, SD_DHCP_OPTION_DOMAIN_NAME_SERVER) == 0);
+
+ assert_se(sd_dhcp_client_set_request_option(client, SD_DHCP_OPTION_PAD) == -EINVAL);
+ assert_se(sd_dhcp_client_set_request_option(client, SD_DHCP_OPTION_END) == -EINVAL);
+ assert_se(sd_dhcp_client_set_request_option(client, SD_DHCP_OPTION_MESSAGE_TYPE) == -EINVAL);
+ assert_se(sd_dhcp_client_set_request_option(client, SD_DHCP_OPTION_OVERLOAD) == -EINVAL);
+ assert_se(sd_dhcp_client_set_request_option(client, SD_DHCP_OPTION_PARAMETER_REQUEST_LIST) == -EINVAL);
+
+ /* RFC7844: option 33 (SD_DHCP_OPTION_STATIC_ROUTE) is set in the
+ * default PRL when using Anonymize, so it is changed to other option
+ * that is not set by default, to check that it was set successfully.
+ * Options not set by default (using or not anonymize) are option 17
+ * (SD_DHCP_OPTION_ROOT_PATH) and 42 (SD_DHCP_OPTION_NTP_SERVER) */
+ assert_se(sd_dhcp_client_set_request_option(client, 17) == 1);
+ assert_se(sd_dhcp_client_set_request_option(client, 17) == 0);
+ assert_se(sd_dhcp_client_set_request_option(client, 42) == 1);
+ assert_se(sd_dhcp_client_set_request_option(client, 17) == 0);
+
+ sd_dhcp_client_unref(client);
+}
+
+static void test_request_anonymize(sd_event *e) {
+ int r;
+
+ sd_dhcp_client *client;
+
+ if (verbose)
+ printf("* %s\n", __FUNCTION__);
+
+ /* Initialize client with Anonymize settings. */
+ r = sd_dhcp_client_new(&client, true);
+
+ assert_se(r >= 0);
+ assert_se(client);
+
+ r = sd_dhcp_client_attach_event(client, e, 0);
+ assert_se(r >= 0);
+
+ assert_se(sd_dhcp_client_set_request_option(client, SD_DHCP_OPTION_NETBIOS_NAMESERVER) == 0);
+ /* This PRL option is not set when using Anonymize */
+ assert_se(sd_dhcp_client_set_request_option(client, SD_DHCP_OPTION_HOST_NAME) == 1);
+ assert_se(sd_dhcp_client_set_request_option(client, SD_DHCP_OPTION_PARAMETER_REQUEST_LIST) == -EINVAL);
+
+ /* RFC7844: option 101 (SD_DHCP_OPTION_NEW_TZDB_TIMEZONE) is not set in the
+ * default PRL when using Anonymize, */
+ assert_se(sd_dhcp_client_set_request_option(client, 101) == 1);
+ assert_se(sd_dhcp_client_set_request_option(client, 101) == 0);
+
+ sd_dhcp_client_unref(client);
+}
+
+static void test_checksum(void) {
+ uint8_t buf[20] = {
+ 0x45, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00,
+ 0x40, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff
+ };
+
+ if (verbose)
+ printf("* %s\n", __FUNCTION__);
+
+ assert_se(dhcp_packet_checksum((uint8_t*)&buf, 20) == be16toh(0x78ae));
+}
+
+static void test_dhcp_identifier_set_iaid(void) {
+ uint32_t iaid_legacy;
+ be32_t iaid;
+ int ifindex;
+
+ for (;;) {
+ char ifname[IFNAMSIZ];
+
+ /* try to find an ifindex which does not exist. I causes dhcp_identifier_set_iaid()
+ * to hash the MAC address. */
+ pseudo_random_bytes(&ifindex, sizeof(ifindex));
+ if (ifindex > 0 && !if_indextoname(ifindex, ifname))
+ break;
+ }
+
+ assert_se(dhcp_identifier_set_iaid(ifindex, mac_addr, sizeof(mac_addr), true, &iaid_legacy) >= 0);
+ assert_se(dhcp_identifier_set_iaid(ifindex, mac_addr, sizeof(mac_addr), false, &iaid) >= 0);
+
+ /* we expect, that the MAC address was hashed. The legacy value is in native
+ * endianness. */
+ assert_se(iaid_legacy == 0x8dde4ba8u);
+ assert_se(iaid == htole32(0x8dde4ba8u));
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ assert_se(iaid == iaid_legacy);
+#else
+ assert_se(iaid == __bswap_32(iaid_legacy));
+#endif
+}
+
+static int check_options(uint8_t code, uint8_t len, const void *option, void *userdata) {
+ switch(code) {
+ case SD_DHCP_OPTION_CLIENT_IDENTIFIER:
+ {
+ uint32_t iaid;
+ struct duid duid;
+ size_t duid_len;
+
+ assert_se(dhcp_identifier_set_duid_en(&duid, &duid_len) >= 0);
+ assert_se(dhcp_identifier_set_iaid(42, mac_addr, ETH_ALEN, true, &iaid) >= 0);
+
+ assert_se(len == sizeof(uint8_t) + sizeof(uint32_t) + duid_len);
+ assert_se(len == 19);
+ assert_se(((uint8_t*) option)[0] == 0xff);
+
+ assert_se(memcmp((uint8_t*) option + 1, &iaid, sizeof(iaid)) == 0);
+ assert_se(memcmp((uint8_t*) option + 5, &duid, duid_len) == 0);
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+int dhcp_network_send_raw_socket(int s, const union sockaddr_union *link, const void *packet, size_t len) {
+ size_t size;
+ _cleanup_free_ DHCPPacket *discover;
+ uint16_t ip_check, udp_check;
+
+ assert_se(s >= 0);
+ assert_se(packet);
+
+ size = sizeof(DHCPPacket);
+ assert_se(len > size);
+
+ discover = memdup(packet, len);
+
+ assert_se(discover->ip.ttl == IPDEFTTL);
+ assert_se(discover->ip.protocol == IPPROTO_UDP);
+ assert_se(discover->ip.saddr == INADDR_ANY);
+ assert_se(discover->ip.daddr == INADDR_BROADCAST);
+ assert_se(discover->udp.source == be16toh(DHCP_PORT_CLIENT));
+ assert_se(discover->udp.dest == be16toh(DHCP_PORT_SERVER));
+
+ ip_check = discover->ip.check;
+
+ discover->ip.ttl = 0;
+ discover->ip.check = discover->udp.len;
+
+ udp_check = ~dhcp_packet_checksum((uint8_t*)&discover->ip.ttl, len - 8);
+ assert_se(udp_check == 0xffff);
+
+ discover->ip.ttl = IPDEFTTL;
+ discover->ip.check = ip_check;
+
+ ip_check = ~dhcp_packet_checksum((uint8_t*)&discover->ip, sizeof(discover->ip));
+ assert_se(ip_check == 0xffff);
+
+ assert_se(discover->dhcp.xid);
+ assert_se(memcmp(discover->dhcp.chaddr, &mac_addr, ETH_ALEN) == 0);
+
+ size = len - sizeof(struct iphdr) - sizeof(struct udphdr);
+
+ assert_se(callback_recv);
+ callback_recv(size, &discover->dhcp);
+
+ return 575;
+}
+
+int dhcp_network_bind_raw_socket(
+ int ifindex,
+ union sockaddr_union *link,
+ uint32_t id,
+ const uint8_t *addr, size_t addr_len,
+ const uint8_t *bcaddr, size_t bcaddr_len,
+ uint16_t arp_type, uint16_t port) {
+
+ if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, test_fd) < 0)
+ return -errno;
+
+ return test_fd[0];
+}
+
+int dhcp_network_bind_udp_socket(int ifindex, be32_t address, uint16_t port, int ip_service_type) {
+ int fd;
+
+ fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
+
+int dhcp_network_send_udp_socket(int s, be32_t address, uint16_t port, const void *packet, size_t len) {
+ return 0;
+}
+
+static int test_discover_message_verify(size_t size, struct DHCPMessage *dhcp) {
+ int res;
+
+ res = dhcp_option_parse(dhcp, size, check_options, NULL, NULL);
+ assert_se(res == DHCP_DISCOVER);
+
+ if (verbose)
+ printf(" recv DHCP Discover 0x%08x\n", be32toh(dhcp->xid));
+
+ return 0;
+}
+
+static void test_discover_message(sd_event *e) {
+ sd_dhcp_client *client;
+ int res, r;
+
+ if (verbose)
+ printf("* %s\n", __FUNCTION__);
+
+ r = sd_dhcp_client_new(&client, false);
+ assert_se(r >= 0);
+ assert_se(client);
+
+ r = sd_dhcp_client_attach_event(client, e, 0);
+ assert_se(r >= 0);
+
+ assert_se(sd_dhcp_client_set_ifindex(client, 42) >= 0);
+ assert_se(sd_dhcp_client_set_mac(client, mac_addr, bcast_addr, ETH_ALEN, ARPHRD_ETHER) >= 0);
+
+ assert_se(sd_dhcp_client_set_request_option(client, 248) >= 0);
+
+ callback_recv = test_discover_message_verify;
+
+ res = sd_dhcp_client_start(client);
+
+ assert_se(IN_SET(res, 0, -EINPROGRESS));
+
+ sd_event_run(e, (uint64_t) -1);
+
+ sd_dhcp_client_stop(client);
+ sd_dhcp_client_unref(client);
+
+ test_fd[1] = safe_close(test_fd[1]);
+
+ callback_recv = NULL;
+}
+
+static uint8_t test_addr_acq_offer[] = {
+ 0x45, 0x10, 0x01, 0x48, 0x00, 0x00, 0x00, 0x00,
+ 0x80, 0x11, 0xb3, 0x84, 0xc0, 0xa8, 0x02, 0x01,
+ 0xc0, 0xa8, 0x02, 0xbf, 0x00, 0x43, 0x00, 0x44,
+ 0x01, 0x34, 0x00, 0x00, 0x02, 0x01, 0x06, 0x00,
+ 0x6f, 0x95, 0x2f, 0x30, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xc0, 0xa8, 0x02, 0xbf,
+ 0xc0, 0xa8, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x63, 0x82, 0x53, 0x63, 0x35, 0x01, 0x02, 0x36,
+ 0x04, 0xc0, 0xa8, 0x02, 0x01, 0x33, 0x04, 0x00,
+ 0x00, 0x02, 0x58, 0x01, 0x04, 0xff, 0xff, 0xff,
+ 0x00, 0x2a, 0x04, 0xc0, 0xa8, 0x02, 0x01, 0x0f,
+ 0x09, 0x6c, 0x61, 0x62, 0x2e, 0x69, 0x6e, 0x74,
+ 0x72, 0x61, 0x03, 0x04, 0xc0, 0xa8, 0x02, 0x01,
+ 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static uint8_t test_addr_acq_ack[] = {
+ 0x45, 0x10, 0x01, 0x48, 0x00, 0x00, 0x00, 0x00,
+ 0x80, 0x11, 0xb3, 0x84, 0xc0, 0xa8, 0x02, 0x01,
+ 0xc0, 0xa8, 0x02, 0xbf, 0x00, 0x43, 0x00, 0x44,
+ 0x01, 0x34, 0x00, 0x00, 0x02, 0x01, 0x06, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xc0, 0xa8, 0x02, 0xbf,
+ 0xc0, 0xa8, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x63, 0x82, 0x53, 0x63, 0x35, 0x01, 0x05, 0x36,
+ 0x04, 0xc0, 0xa8, 0x02, 0x01, 0x33, 0x04, 0x00,
+ 0x00, 0x02, 0x58, 0x01, 0x04, 0xff, 0xff, 0xff,
+ 0x00, 0x2a, 0x04, 0xc0, 0xa8, 0x02, 0x01, 0x0f,
+ 0x09, 0x6c, 0x61, 0x62, 0x2e, 0x69, 0x6e, 0x74,
+ 0x72, 0x61, 0x03, 0x04, 0xc0, 0xa8, 0x02, 0x01,
+ 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static int test_addr_acq_acquired(sd_dhcp_client *client, int event,
+ void *userdata) {
+ sd_event *e = userdata;
+ sd_dhcp_lease *lease;
+ struct in_addr addr;
+ const struct in_addr *addrs;
+
+ assert_se(client);
+ assert_se(IN_SET(event, SD_DHCP_CLIENT_EVENT_IP_ACQUIRE, SD_DHCP_CLIENT_EVENT_SELECTING));
+
+ assert_se(sd_dhcp_client_get_lease(client, &lease) >= 0);
+ assert_se(lease);
+
+ assert_se(sd_dhcp_lease_get_address(lease, &addr) >= 0);
+ assert_se(memcmp(&addr.s_addr, &test_addr_acq_ack[44],
+ sizeof(addr.s_addr)) == 0);
+
+ assert_se(sd_dhcp_lease_get_netmask(lease, &addr) >= 0);
+ assert_se(memcmp(&addr.s_addr, &test_addr_acq_ack[285],
+ sizeof(addr.s_addr)) == 0);
+
+ assert_se(sd_dhcp_lease_get_router(lease, &addrs) == 1);
+ assert_se(memcmp(&addrs[0].s_addr, &test_addr_acq_ack[308],
+ sizeof(addrs[0].s_addr)) == 0);
+
+ if (verbose)
+ printf(" DHCP address acquired\n");
+
+ sd_event_exit(e, 0);
+
+ return 0;
+}
+
+static int test_addr_acq_recv_request(size_t size, DHCPMessage *request) {
+ uint16_t udp_check = 0;
+ uint8_t *msg_bytes = (uint8_t *)request;
+ int res;
+
+ res = dhcp_option_parse(request, size, check_options, NULL, NULL);
+ assert_se(res == DHCP_REQUEST);
+ assert_se(xid == request->xid);
+
+ assert_se(msg_bytes[size - 1] == SD_DHCP_OPTION_END);
+
+ if (verbose)
+ printf(" recv DHCP Request 0x%08x\n", be32toh(xid));
+
+ memcpy(&test_addr_acq_ack[26], &udp_check, sizeof(udp_check));
+ memcpy(&test_addr_acq_ack[32], &xid, sizeof(xid));
+ memcpy(&test_addr_acq_ack[56], &mac_addr, ETHER_ADDR_LEN);
+
+ callback_recv = NULL;
+
+ res = write(test_fd[1], test_addr_acq_ack,
+ sizeof(test_addr_acq_ack));
+ assert_se(res == sizeof(test_addr_acq_ack));
+
+ if (verbose)
+ printf(" send DHCP Ack\n");
+
+ return 0;
+};
+
+static int test_addr_acq_recv_discover(size_t size, DHCPMessage *discover) {
+ uint16_t udp_check = 0;
+ uint8_t *msg_bytes = (uint8_t *)discover;
+ int res;
+
+ res = dhcp_option_parse(discover, size, check_options, NULL, NULL);
+ assert_se(res == DHCP_DISCOVER);
+
+ assert_se(msg_bytes[size - 1] == SD_DHCP_OPTION_END);
+
+ xid = discover->xid;
+
+ if (verbose)
+ printf(" recv DHCP Discover 0x%08x\n", be32toh(xid));
+
+ memcpy(&test_addr_acq_offer[26], &udp_check, sizeof(udp_check));
+ memcpy(&test_addr_acq_offer[32], &xid, sizeof(xid));
+ memcpy(&test_addr_acq_offer[56], &mac_addr, ETHER_ADDR_LEN);
+
+ callback_recv = test_addr_acq_recv_request;
+
+ res = write(test_fd[1], test_addr_acq_offer,
+ sizeof(test_addr_acq_offer));
+ assert_se(res == sizeof(test_addr_acq_offer));
+
+ if (verbose)
+ printf(" sent DHCP Offer\n");
+
+ return 0;
+}
+
+static void test_addr_acq(sd_event *e) {
+ sd_dhcp_client *client;
+ int res, r;
+
+ if (verbose)
+ printf("* %s\n", __FUNCTION__);
+
+ r = sd_dhcp_client_new(&client, false);
+ assert_se(r >= 0);
+ assert_se(client);
+
+ r = sd_dhcp_client_attach_event(client, e, 0);
+ assert_se(r >= 0);
+
+ assert_se(sd_dhcp_client_set_ifindex(client, 42) >= 0);
+ assert_se(sd_dhcp_client_set_mac(client, mac_addr, bcast_addr, ETH_ALEN, ARPHRD_ETHER) >= 0);
+
+ assert_se(sd_dhcp_client_set_callback(client, test_addr_acq_acquired, e) >= 0);
+
+ callback_recv = test_addr_acq_recv_discover;
+
+ assert_se(sd_event_add_time_relative(
+ e, &test_hangcheck,
+ clock_boottime_or_monotonic(),
+ 2 * USEC_PER_SEC, 0,
+ test_dhcp_hangcheck, NULL) >= 0);
+
+ res = sd_dhcp_client_start(client);
+ assert_se(IN_SET(res, 0, -EINPROGRESS));
+
+ assert_se(sd_event_loop(e) >= 0);
+
+ test_hangcheck = sd_event_source_unref(test_hangcheck);
+
+ assert_se(sd_dhcp_client_set_callback(client, NULL, NULL) >= 0);
+ assert_se(sd_dhcp_client_stop(client) >= 0);
+ sd_dhcp_client_unref(client);
+
+ test_fd[1] = safe_close(test_fd[1]);
+
+ callback_recv = NULL;
+ xid = 0;
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_event_unrefp) sd_event *e;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(sd_event_new(&e) >= 0);
+
+ test_request_basic(e);
+ test_request_anonymize(e);
+ test_checksum();
+ test_dhcp_identifier_set_iaid();
+
+ test_discover_message(e);
+ test_addr_acq(e);
+
+#if VALGRIND
+ /* Make sure the async_close thread has finished.
+ * valgrind would report some of the phread_* structures
+ * as not cleaned up properly. */
+ sleep(1);
+#endif
+
+ return 0;
+}
diff --git a/src/libsystemd-network/test-dhcp-option.c b/src/libsystemd-network/test-dhcp-option.c
new file mode 100644
index 0000000..1eebe3a
--- /dev/null
+++ b/src/libsystemd-network/test-dhcp-option.c
@@ -0,0 +1,370 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <net/if_arp.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "dhcp-internal.h"
+#include "dhcp-protocol.h"
+#include "macro.h"
+#include "memory-util.h"
+
+struct option_desc {
+ uint8_t sname[64];
+ int snamelen;
+ uint8_t file[128];
+ int filelen;
+ uint8_t options[128];
+ int len;
+ bool success;
+ int filepos;
+ int snamepos;
+ int pos;
+};
+
+static bool verbose = false;
+
+static struct option_desc option_tests[] = {
+ { {}, 0, {}, 0, { 42, 5, 65, 66, 67, 68, 69 }, 7, false, },
+ { {}, 0, {}, 0, { 42, 5, 65, 66, 67, 68, 69, 0, 0,
+ SD_DHCP_OPTION_MESSAGE_TYPE, 1, DHCP_ACK }, 12, true, },
+ { {}, 0, {}, 0, { 8, 255, 70, 71, 72 }, 5, false, },
+ { {}, 0, {}, 0, { 0x35, 0x01, 0x05, 0x36, 0x04, 0x01, 0x00, 0xa8,
+ 0xc0, 0x33, 0x04, 0x00, 0x01, 0x51, 0x80, 0x01,
+ 0x04, 0xff, 0xff, 0xff, 0x00, 0x03, 0x04, 0xc0,
+ 0xa8, 0x00, 0x01, 0x06, 0x04, 0xc0, 0xa8, 0x00,
+ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, },
+ 40, true, },
+ { {}, 0, {}, 0, { SD_DHCP_OPTION_MESSAGE_TYPE, 1, DHCP_OFFER,
+ 42, 3, 0, 0, 0 }, 8, true, },
+ { {}, 0, {}, 0, { 42, 2, 1, 2, 44 }, 5, false, },
+
+ { {}, 0,
+ { 222, 3, 1, 2, 3, SD_DHCP_OPTION_MESSAGE_TYPE, 1, DHCP_NAK }, 8,
+ { SD_DHCP_OPTION_OVERLOAD, 1, DHCP_OVERLOAD_FILE }, 3, true, },
+
+ { { 1, 4, 1, 2, 3, 4, SD_DHCP_OPTION_MESSAGE_TYPE, 1, DHCP_ACK }, 9,
+ { 222, 3, 1, 2, 3 }, 5,
+ { SD_DHCP_OPTION_OVERLOAD, 1,
+ DHCP_OVERLOAD_FILE|DHCP_OVERLOAD_SNAME }, 3, true, },
+};
+
+static const char *dhcp_type(int type) {
+ switch(type) {
+ case DHCP_DISCOVER:
+ return "DHCPDISCOVER";
+ case DHCP_OFFER:
+ return "DHCPOFFER";
+ case DHCP_REQUEST:
+ return "DHCPREQUEST";
+ case DHCP_DECLINE:
+ return "DHCPDECLINE";
+ case DHCP_ACK:
+ return "DHCPACK";
+ case DHCP_NAK:
+ return "DHCPNAK";
+ case DHCP_RELEASE:
+ return "DHCPRELEASE";
+ default:
+ return "unknown";
+ }
+}
+
+static void test_invalid_buffer_length(void) {
+ DHCPMessage message;
+
+ assert_se(dhcp_option_parse(&message, 0, NULL, NULL, NULL) == -EINVAL);
+ assert_se(dhcp_option_parse(&message, sizeof(DHCPMessage) - 1, NULL, NULL, NULL) == -EINVAL);
+}
+
+static void test_message_init(void) {
+ _cleanup_free_ DHCPMessage *message = NULL;
+ size_t optlen = 4, optoffset;
+ size_t len = sizeof(DHCPMessage) + optlen;
+ uint8_t *magic;
+
+ message = malloc0(len);
+
+ assert_se(dhcp_message_init(message, BOOTREQUEST, 0x12345678,
+ DHCP_DISCOVER, ARPHRD_ETHER, optlen, &optoffset) >= 0);
+
+ assert_se(message->xid == htobe32(0x12345678));
+ assert_se(message->op == BOOTREQUEST);
+
+ magic = (uint8_t*)&message->magic;
+
+ assert_se(magic[0] == 99);
+ assert_se(magic[1] == 130);
+ assert_se(magic[2] == 83);
+ assert_se(magic[3] == 99);
+
+ assert_se(dhcp_option_parse(message, len, NULL, NULL, NULL) >= 0);
+}
+
+static DHCPMessage *create_message(uint8_t *options, uint16_t optlen,
+ uint8_t *file, uint8_t filelen,
+ uint8_t *sname, uint8_t snamelen) {
+ DHCPMessage *message;
+ size_t len = sizeof(DHCPMessage) + optlen;
+
+ message = malloc0(len);
+ assert_se(message);
+
+ memcpy_safe(&message->options, options, optlen);
+ memcpy_safe(&message->file, file, filelen);
+ memcpy_safe(&message->sname, sname, snamelen);
+
+ return message;
+}
+
+static void test_ignore_opts(uint8_t *descoption, int *descpos, int *desclen) {
+ assert(*descpos >= 0);
+
+ while (*descpos < *desclen) {
+ switch(descoption[*descpos]) {
+ case SD_DHCP_OPTION_PAD:
+ *descpos += 1;
+ break;
+
+ case SD_DHCP_OPTION_MESSAGE_TYPE:
+ case SD_DHCP_OPTION_OVERLOAD:
+ *descpos += 3;
+ break;
+
+ default:
+ return;
+ }
+ }
+}
+
+static int test_options_cb(uint8_t code, uint8_t len, const void *option, void *userdata) {
+ struct option_desc *desc = userdata;
+ uint8_t *descoption = NULL;
+ int *desclen = NULL, *descpos = NULL;
+ uint8_t optcode = 0;
+ uint8_t optlen = 0;
+ uint8_t i;
+
+ assert_se((!desc && !code && !len) || desc);
+
+ if (!desc)
+ return -EINVAL;
+
+ assert_se(code != SD_DHCP_OPTION_PAD);
+ assert_se(code != SD_DHCP_OPTION_END);
+ assert_se(code != SD_DHCP_OPTION_MESSAGE_TYPE);
+ assert_se(code != SD_DHCP_OPTION_OVERLOAD);
+
+ while (desc->pos >= 0 || desc->filepos >= 0 || desc->snamepos >= 0) {
+
+ if (desc->pos >= 0) {
+ descoption = &desc->options[0];
+ desclen = &desc->len;
+ descpos = &desc->pos;
+ } else if (desc->filepos >= 0) {
+ descoption = &desc->file[0];
+ desclen = &desc->filelen;
+ descpos = &desc->filepos;
+ } else if (desc->snamepos >= 0) {
+ descoption = &desc->sname[0];
+ desclen = &desc->snamelen;
+ descpos = &desc->snamepos;
+ }
+
+ assert_se(descoption && desclen && descpos);
+
+ if (*desclen)
+ test_ignore_opts(descoption, descpos, desclen);
+
+ if (*descpos < *desclen)
+ break;
+
+ if (*descpos == *desclen)
+ *descpos = -1;
+ }
+
+ assert_se(descpos);
+ assert_se(*descpos != -1);
+
+ optcode = descoption[*descpos];
+ optlen = descoption[*descpos + 1];
+
+ if (verbose)
+ printf("DHCP code %2d(%2d) len %2d(%2d) ", code, optcode,
+ len, optlen);
+
+ assert_se(code == optcode);
+ assert_se(len == optlen);
+
+ for (i = 0; i < len; i++) {
+
+ if (verbose)
+ printf("0x%02x(0x%02x) ", ((uint8_t*) option)[i],
+ descoption[*descpos + 2 + i]);
+
+ assert_se(((uint8_t*) option)[i] == descoption[*descpos + 2 + i]);
+ }
+
+ if (verbose)
+ printf("\n");
+
+ *descpos += optlen + 2;
+
+ test_ignore_opts(descoption, descpos, desclen);
+
+ if (desc->pos != -1 && desc->pos == desc->len)
+ desc->pos = -1;
+
+ if (desc->filepos != -1 && desc->filepos == desc->filelen)
+ desc->filepos = -1;
+
+ if (desc->snamepos != -1 && desc->snamepos == desc->snamelen)
+ desc->snamepos = -1;
+
+ return 0;
+}
+
+static void test_options(struct option_desc *desc) {
+ uint8_t *options = NULL;
+ uint8_t *file = NULL;
+ uint8_t *sname = NULL;
+ int optlen = 0;
+ int filelen = 0;
+ int snamelen = 0;
+ int buflen = 0;
+ _cleanup_free_ DHCPMessage *message = NULL;
+ int res;
+
+ if (desc) {
+ file = &desc->file[0];
+ filelen = desc->filelen;
+ if (!filelen)
+ desc->filepos = -1;
+
+ sname = &desc->sname[0];
+ snamelen = desc->snamelen;
+ if (!snamelen)
+ desc->snamepos = -1;
+
+ options = &desc->options[0];
+ optlen = desc->len;
+ desc->pos = 0;
+ }
+ message = create_message(options, optlen, file, filelen,
+ sname, snamelen);
+
+ buflen = sizeof(DHCPMessage) + optlen;
+
+ if (!desc) {
+ assert_se((res = dhcp_option_parse(message, buflen, test_options_cb, NULL, NULL)) == -ENOMSG);
+ } else if (desc->success) {
+ assert_se((res = dhcp_option_parse(message, buflen, test_options_cb, desc, NULL)) >= 0);
+ assert_se(desc->pos == -1 && desc->filepos == -1 && desc->snamepos == -1);
+ } else
+ assert_se((res = dhcp_option_parse(message, buflen, test_options_cb, desc, NULL)) < 0);
+
+ if (verbose)
+ printf("DHCP type %s\n", dhcp_type(res));
+}
+
+static uint8_t options[64] = {
+ 'A', 'B', 'C', 'D',
+ 160, 2, 0x11, 0x12,
+ 0,
+ 31, 8, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+ 0,
+ 55, 3, 0x51, 0x52, 0x53,
+ 17, 7, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 255
+};
+
+static void test_option_set(void) {
+ _cleanup_free_ DHCPMessage *result = NULL;
+ size_t offset = 0, len, pos;
+ unsigned i;
+
+ result = malloc0(sizeof(DHCPMessage) + 11);
+ assert_se(result);
+
+ result->options[0] = 'A';
+ result->options[1] = 'B';
+ result->options[2] = 'C';
+ result->options[3] = 'D';
+
+ assert_se(dhcp_option_append(result, 0, &offset, 0, SD_DHCP_OPTION_PAD,
+ 0, NULL) == -ENOBUFS);
+ assert_se(offset == 0);
+
+ offset = 4;
+ assert_se(dhcp_option_append(result, 5, &offset, 0, SD_DHCP_OPTION_PAD,
+ 0, NULL) == -ENOBUFS);
+ assert_se(offset == 4);
+ assert_se(dhcp_option_append(result, 6, &offset, 0, SD_DHCP_OPTION_PAD,
+ 0, NULL) >= 0);
+ assert_se(offset == 5);
+
+ offset = pos = 4;
+ len = 11;
+ while (pos < len && options[pos] != SD_DHCP_OPTION_END) {
+ assert_se(dhcp_option_append(result, len, &offset, DHCP_OVERLOAD_SNAME,
+ options[pos],
+ options[pos + 1],
+ &options[pos + 2]) >= 0);
+
+ if (options[pos] == SD_DHCP_OPTION_PAD)
+ pos++;
+ else
+ pos += 2 + options[pos + 1];
+
+ if (pos < len)
+ assert_se(offset == pos);
+ }
+
+ for (i = 0; i < 9; i++) {
+ if (verbose)
+ printf("%2u: 0x%02x(0x%02x) (options)\n", i, result->options[i],
+ options[i]);
+ assert_se(result->options[i] == options[i]);
+ }
+
+ if (verbose)
+ printf("%2d: 0x%02x(0x%02x) (options)\n", 9, result->options[9],
+ SD_DHCP_OPTION_END);
+
+ assert_se(result->options[9] == SD_DHCP_OPTION_END);
+
+ if (verbose)
+ printf("%2d: 0x%02x(0x%02x) (options)\n", 10, result->options[10],
+ SD_DHCP_OPTION_PAD);
+
+ assert_se(result->options[10] == SD_DHCP_OPTION_PAD);
+
+ for (i = 0; i < pos - 8; i++) {
+ if (verbose)
+ printf("%2u: 0x%02x(0x%02x) (sname)\n", i, result->sname[i],
+ options[i + 9]);
+ assert_se(result->sname[i] == options[i + 9]);
+ }
+
+ if (verbose)
+ printf ("\n");
+}
+
+int main(int argc, char *argv[]) {
+ unsigned i;
+
+ test_invalid_buffer_length();
+ test_message_init();
+
+ test_options(NULL);
+
+ for (i = 0; i < ELEMENTSOF(option_tests); i++)
+ test_options(&option_tests[i]);
+
+ test_option_set();
+
+ return 0;
+}
diff --git a/src/libsystemd-network/test-dhcp-server.c b/src/libsystemd-network/test-dhcp-server.c
new file mode 100644
index 0000000..e91b440
--- /dev/null
+++ b/src/libsystemd-network/test-dhcp-server.c
@@ -0,0 +1,245 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+***/
+
+#include <errno.h>
+#include <net/if_arp.h>
+
+#include "sd-dhcp-server.h"
+#include "sd-event.h"
+
+#include "dhcp-server-internal.h"
+#include "tests.h"
+
+static void test_pool(struct in_addr *address, unsigned size, int ret) {
+ _cleanup_(sd_dhcp_server_unrefp) sd_dhcp_server *server = NULL;
+
+ assert_se(sd_dhcp_server_new(&server, 1) >= 0);
+
+ assert_se(sd_dhcp_server_configure_pool(server, address, 8, 0, size) == ret);
+}
+
+static int test_basic(sd_event *event) {
+ _cleanup_(sd_dhcp_server_unrefp) sd_dhcp_server *server = NULL;
+ struct in_addr address_lo = {
+ .s_addr = htobe32(INADDR_LOOPBACK),
+ };
+ struct in_addr address_any = {
+ .s_addr = htobe32(INADDR_ANY),
+ };
+ int r;
+
+ /* attach to loopback interface */
+ assert_se(sd_dhcp_server_new(&server, 1) >= 0);
+ assert_se(server);
+
+ assert_se(sd_dhcp_server_attach_event(server, event, 0) >= 0);
+ assert_se(sd_dhcp_server_attach_event(server, event, 0) == -EBUSY);
+ assert_se(sd_dhcp_server_get_event(server) == event);
+ assert_se(sd_dhcp_server_detach_event(server) >= 0);
+ assert_se(!sd_dhcp_server_get_event(server));
+ assert_se(sd_dhcp_server_attach_event(server, NULL, 0) >= 0);
+ assert_se(sd_dhcp_server_attach_event(server, NULL, 0) == -EBUSY);
+
+ assert_se(sd_dhcp_server_ref(server) == server);
+ assert_se(!sd_dhcp_server_unref(server));
+
+ assert_se(sd_dhcp_server_start(server) == -EUNATCH);
+
+ assert_se(sd_dhcp_server_configure_pool(server, &address_any, 28, 0, 0) == -EINVAL);
+ assert_se(sd_dhcp_server_configure_pool(server, &address_lo, 38, 0, 0) == -ERANGE);
+ assert_se(sd_dhcp_server_configure_pool(server, &address_lo, 8, 0, 0) >= 0);
+ assert_se(sd_dhcp_server_configure_pool(server, &address_lo, 8, 0, 0) >= 0);
+
+ test_pool(&address_any, 1, -EINVAL);
+ test_pool(&address_lo, 1, 0);
+
+ r = sd_dhcp_server_start(server);
+ if (r == -EPERM)
+ return log_info_errno(r, "sd_dhcp_server_start failed: %m");
+ assert_se(r >= 0);
+
+ assert_se(sd_dhcp_server_start(server) == -EBUSY);
+ assert_se(sd_dhcp_server_stop(server) >= 0);
+ assert_se(sd_dhcp_server_stop(server) >= 0);
+ assert_se(sd_dhcp_server_start(server) >= 0);
+
+ return 0;
+}
+
+static void test_message_handler(void) {
+ _cleanup_(sd_dhcp_server_unrefp) sd_dhcp_server *server = NULL;
+ struct {
+ DHCPMessage message;
+ struct {
+ uint8_t code;
+ uint8_t length;
+ uint8_t type;
+ } _packed_ option_type;
+ struct {
+ uint8_t code;
+ uint8_t length;
+ be32_t address;
+ } _packed_ option_requested_ip;
+ struct {
+ uint8_t code;
+ uint8_t length;
+ be32_t address;
+ } _packed_ option_server_id;
+ struct {
+ uint8_t code;
+ uint8_t length;
+ uint8_t id[7];
+ } _packed_ option_client_id;
+ uint8_t end;
+ } _packed_ test = {
+ .message.op = BOOTREQUEST,
+ .message.htype = ARPHRD_ETHER,
+ .message.hlen = ETHER_ADDR_LEN,
+ .message.xid = htobe32(0x12345678),
+ .message.chaddr = { 'A', 'B', 'C', 'D', 'E', 'F' },
+ .option_type.code = SD_DHCP_OPTION_MESSAGE_TYPE,
+ .option_type.length = 1,
+ .option_type.type = DHCP_DISCOVER,
+ .end = SD_DHCP_OPTION_END,
+ };
+ struct in_addr address_lo = {
+ .s_addr = htobe32(INADDR_LOOPBACK),
+ };
+
+ assert_se(sd_dhcp_server_new(&server, 1) >= 0);
+ assert_se(sd_dhcp_server_configure_pool(server, &address_lo, 8, 0, 0) >= 0);
+ assert_se(sd_dhcp_server_attach_event(server, NULL, 0) >= 0);
+ assert_se(sd_dhcp_server_start(server) >= 0);
+
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_OFFER);
+
+ test.end = 0;
+ /* TODO, shouldn't this fail? */
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_OFFER);
+ test.end = SD_DHCP_OPTION_END;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_OFFER);
+
+ test.option_type.code = 0;
+ test.option_type.length = 0;
+ test.option_type.type = 0;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == 0);
+ test.option_type.code = SD_DHCP_OPTION_MESSAGE_TYPE;
+ test.option_type.length = 1;
+ test.option_type.type = DHCP_DISCOVER;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_OFFER);
+
+ test.message.op = 0;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == 0);
+ test.message.op = BOOTREQUEST;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_OFFER);
+
+ test.message.htype = 0;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == 0);
+ test.message.htype = ARPHRD_ETHER;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_OFFER);
+
+ test.message.hlen = 0;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == 0);
+ test.message.hlen = ETHER_ADDR_LEN;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_OFFER);
+
+ test.option_type.type = DHCP_REQUEST;
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == 0);
+ test.option_requested_ip.code = SD_DHCP_OPTION_REQUESTED_IP_ADDRESS;
+ test.option_requested_ip.length = 4;
+ test.option_requested_ip.address = htobe32(0x12345678);
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_NAK);
+ test.option_server_id.code = SD_DHCP_OPTION_SERVER_IDENTIFIER;
+ test.option_server_id.length = 4;
+ test.option_server_id.address = htobe32(INADDR_LOOPBACK);
+ test.option_requested_ip.address = htobe32(INADDR_LOOPBACK + 3);
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_ACK);
+
+ test.option_server_id.address = htobe32(0x12345678);
+ test.option_requested_ip.address = htobe32(INADDR_LOOPBACK + 3);
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == 0);
+ test.option_server_id.address = htobe32(INADDR_LOOPBACK);
+ test.option_requested_ip.address = htobe32(INADDR_LOOPBACK + 4);
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == 0);
+ test.option_requested_ip.address = htobe32(INADDR_LOOPBACK + 3);
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_ACK);
+
+ test.option_client_id.code = SD_DHCP_OPTION_CLIENT_IDENTIFIER;
+ test.option_client_id.length = 7;
+ test.option_client_id.id[0] = 0x01;
+ test.option_client_id.id[1] = 'A';
+ test.option_client_id.id[2] = 'B';
+ test.option_client_id.id[3] = 'C';
+ test.option_client_id.id[4] = 'D';
+ test.option_client_id.id[5] = 'E';
+ test.option_client_id.id[6] = 'F';
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == DHCP_ACK);
+
+ test.option_requested_ip.address = htobe32(INADDR_LOOPBACK + 30);
+ assert_se(dhcp_server_handle_message(server, (DHCPMessage*)&test, sizeof(test)) == 0);
+}
+
+static uint64_t client_id_hash_helper(DHCPClientId *id, uint8_t key[HASH_KEY_SIZE]) {
+ struct siphash state;
+
+ siphash24_init(&state, key);
+ client_id_hash_func(id, &state);
+
+ return htole64(siphash24_finalize(&state));
+}
+
+static void test_client_id_hash(void) {
+ DHCPClientId a = {
+ .length = 4,
+ }, b = {
+ .length = 4,
+ };
+ uint8_t hash_key[HASH_KEY_SIZE] = {
+ '0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+ };
+
+ a.data = (uint8_t*)strdup("abcd");
+ b.data = (uint8_t*)strdup("abcd");
+
+ assert_se(client_id_compare_func(&a, &b) == 0);
+ assert_se(client_id_hash_helper(&a, hash_key) == client_id_hash_helper(&b, hash_key));
+ a.length = 3;
+ assert_se(client_id_compare_func(&a, &b) != 0);
+ a.length = 4;
+ assert_se(client_id_compare_func(&a, &b) == 0);
+ assert_se(client_id_hash_helper(&a, hash_key) == client_id_hash_helper(&b, hash_key));
+
+ b.length = 3;
+ assert_se(client_id_compare_func(&a, &b) != 0);
+ b.length = 4;
+ assert_se(client_id_compare_func(&a, &b) == 0);
+ assert_se(client_id_hash_helper(&a, hash_key) == client_id_hash_helper(&b, hash_key));
+
+ free(b.data);
+ b.data = (uint8_t*)strdup("abce");
+ assert_se(client_id_compare_func(&a, &b) != 0);
+
+ free(a.data);
+ free(b.data);
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_event_unrefp) sd_event *e;
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(sd_event_new(&e) >= 0);
+
+ r = test_basic(e);
+ if (r != 0)
+ return log_tests_skipped("cannot start dhcp server");
+
+ test_message_handler();
+ test_client_id_hash();
+
+ return 0;
+}
diff --git a/src/libsystemd-network/test-dhcp6-client.c b/src/libsystemd-network/test-dhcp6-client.c
new file mode 100644
index 0000000..e6a43dc
--- /dev/null
+++ b/src/libsystemd-network/test-dhcp6-client.c
@@ -0,0 +1,1004 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <net/ethernet.h>
+#include <net/if_arp.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-dhcp6-client.h"
+#include "sd-event.h"
+
+#include "dhcp6-internal.h"
+#include "dhcp6-lease-internal.h"
+#include "dhcp6-protocol.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "time-util.h"
+#include "virt.h"
+
+static struct ether_addr mac_addr = {
+ .ether_addr_octet = {'A', 'B', 'C', '1', '2', '3'}
+};
+
+static sd_event_source *hangcheck;
+static int test_dhcp_fd[2];
+static int test_ifindex = 42;
+static int test_client_message_num;
+static be32_t test_iaid = 0;
+static uint8_t test_duid[14] = { };
+
+static int test_client_basic(sd_event *e) {
+ sd_dhcp6_client *client;
+ int v;
+
+ log_debug("/* %s */", __func__);
+
+ assert_se(sd_dhcp6_client_new(&client) >= 0);
+ assert_se(client);
+
+ assert_se(sd_dhcp6_client_attach_event(client, e, 0) >= 0);
+
+ assert_se(sd_dhcp6_client_set_ifindex(client, 15) == 0);
+ assert_se(sd_dhcp6_client_set_ifindex(client, -42) == -EINVAL);
+ assert_se(sd_dhcp6_client_set_ifindex(client, -1) == -EINVAL);
+ assert_se(sd_dhcp6_client_set_ifindex(client, 42) >= 0);
+
+ assert_se(sd_dhcp6_client_set_mac(client, (const uint8_t *) &mac_addr,
+ sizeof (mac_addr),
+ ARPHRD_ETHER) >= 0);
+
+ assert_se(sd_dhcp6_client_set_fqdn(client, "host") == 1);
+ assert_se(sd_dhcp6_client_set_fqdn(client, "host.domain") == 1);
+ assert_se(sd_dhcp6_client_set_fqdn(client, NULL) == 1);
+ assert_se(sd_dhcp6_client_set_fqdn(client, "~host") == -EINVAL);
+ assert_se(sd_dhcp6_client_set_fqdn(client, "~host.domain") == -EINVAL);
+
+ assert_se(sd_dhcp6_client_set_request_option(client, SD_DHCP6_OPTION_CLIENTID) == 0);
+ assert_se(sd_dhcp6_client_set_request_option(client, SD_DHCP6_OPTION_DNS_SERVERS) == -EEXIST);
+ assert_se(sd_dhcp6_client_set_request_option(client, SD_DHCP6_OPTION_NTP_SERVER) == -EEXIST);
+ assert_se(sd_dhcp6_client_set_request_option(client, SD_DHCP6_OPTION_SNTP_SERVERS) == -EEXIST);
+ assert_se(sd_dhcp6_client_set_request_option(client, SD_DHCP6_OPTION_DOMAIN_LIST) == -EEXIST);
+ assert_se(sd_dhcp6_client_set_request_option(client, 10) == 0);
+
+ assert_se(sd_dhcp6_client_set_information_request(client, 1) >= 0);
+ v = 0;
+ assert_se(sd_dhcp6_client_get_information_request(client, &v) >= 0);
+ assert_se(v);
+ assert_se(sd_dhcp6_client_set_information_request(client, 0) >= 0);
+ v = 42;
+ assert_se(sd_dhcp6_client_get_information_request(client, &v) >= 0);
+ assert_se(v == 0);
+
+ v = 0;
+ assert_se(sd_dhcp6_client_get_address_request(client, &v) >= 0);
+ assert_se(v);
+ v = 0;
+ assert_se(sd_dhcp6_client_set_address_request(client, 1) >= 0);
+ assert_se(sd_dhcp6_client_get_address_request(client, &v) >= 0);
+ assert_se(v);
+ v = 42;
+ assert_se(sd_dhcp6_client_set_address_request(client, 1) >= 0);
+ assert_se(sd_dhcp6_client_get_address_request(client, &v) >= 0);
+ assert_se(v);
+
+ assert_se(sd_dhcp6_client_set_address_request(client, 1) >= 0);
+ assert_se(sd_dhcp6_client_set_prefix_delegation(client, 1) >= 0);
+ v = 0;
+ assert_se(sd_dhcp6_client_get_address_request(client, &v) >= 0);
+ assert_se(v);
+ v = 0;
+ assert_se(sd_dhcp6_client_get_prefix_delegation(client, &v) >= 0);
+ assert_se(v);
+
+ assert_se(sd_dhcp6_client_set_callback(client, NULL, NULL) >= 0);
+
+ assert_se(sd_dhcp6_client_detach_event(client) >= 0);
+ assert_se(!sd_dhcp6_client_unref(client));
+
+ return 0;
+}
+
+static int test_parse_domain(sd_event *e) {
+ uint8_t *data;
+ char *domain;
+ char **list;
+ int r;
+
+ log_debug("/* %s */", __func__);
+
+ data = (uint8_t []) { 7, 'e', 'x', 'a', 'm', 'p', 'l', 'e', 3, 'c', 'o', 'm', 0 };
+ r = dhcp6_option_parse_domainname(data, 13, &domain);
+ assert_se(r == 0);
+ assert_se(domain);
+ assert_se(streq(domain, "example.com"));
+ free(domain);
+
+ data = (uint8_t []) { 4, 't', 'e', 's', 't' };
+ r = dhcp6_option_parse_domainname(data, 5, &domain);
+ assert_se(r == 0);
+ assert_se(domain);
+ assert_se(streq(domain, "test"));
+ free(domain);
+
+ data = (uint8_t []) { 0 };
+ r = dhcp6_option_parse_domainname(data, 1, &domain);
+ assert_se(r < 0);
+
+ data = (uint8_t []) { 7, 'e', 'x', 'a', 'm', 'p', 'l', 'e', 3, 'c', 'o', 'm', 0,
+ 6, 'f', 'o', 'o', 'b', 'a', 'r', 0 };
+ r = dhcp6_option_parse_domainname_list(data, 21, &list);
+ assert_se(r == 2);
+ assert_se(list);
+ assert_se(streq(list[0], "example.com"));
+ assert_se(streq(list[1], "foobar"));
+ strv_free(list);
+
+ data = (uint8_t []) { 1, 'a', 0, 20, 'b', 'c' };
+ r = dhcp6_option_parse_domainname_list(data, 6, &list);
+ assert_se(r < 0);
+
+ data = (uint8_t []) { 0 , 0 };
+ r = dhcp6_option_parse_domainname_list(data, 2, &list);
+ assert_se(r < 0);
+
+ return 0;
+}
+
+static int test_option(sd_event *e) {
+ uint8_t packet[] = {
+ 'F', 'O', 'O',
+ 0x00, SD_DHCP6_OPTION_ORO, 0x00, 0x07,
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G',
+ 0x00, SD_DHCP6_OPTION_VENDOR_CLASS, 0x00, 0x09,
+ '1', '2', '3', '4', '5', '6', '7', '8', '9',
+ 'B', 'A', 'R',
+ };
+ uint8_t result[] = {
+ 'F', 'O', 'O',
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 'B', 'A', 'R',
+ };
+ uint16_t optcode;
+ size_t optlen;
+ uint8_t *optval, *buf, *out;
+ size_t zero = 0, pos = 3;
+ size_t buflen = sizeof(packet), outlen = sizeof(result);
+
+ log_debug("/* %s */", __func__);
+
+ assert_se(buflen == outlen);
+
+ assert_se(dhcp6_option_parse(&buf, &zero, &optcode, &optlen,
+ &optval) == -ENOMSG);
+
+ buflen -= 3;
+ buf = &packet[3];
+ outlen -= 3;
+ out = &result[3];
+
+ assert_se(dhcp6_option_parse(&buf, &buflen, &optcode, &optlen,
+ &optval) >= 0);
+ pos += 4 + optlen;
+ assert_se(buf == &packet[pos]);
+ assert_se(optcode == SD_DHCP6_OPTION_ORO);
+ assert_se(optlen == 7);
+ assert_se(buflen + pos == sizeof(packet));
+
+ assert_se(dhcp6_option_append(&out, &outlen, optcode, optlen,
+ optval) >= 0);
+ assert_se(out == &result[pos]);
+ assert_se(*out == 0x00);
+
+ assert_se(dhcp6_option_parse(&buf, &buflen, &optcode, &optlen,
+ &optval) >= 0);
+ pos += 4 + optlen;
+ assert_se(buf == &packet[pos]);
+ assert_se(optcode == SD_DHCP6_OPTION_VENDOR_CLASS);
+ assert_se(optlen == 9);
+ assert_se(buflen + pos == sizeof(packet));
+
+ assert_se(dhcp6_option_append(&out, &outlen, optcode, optlen,
+ optval) >= 0);
+ assert_se(out == &result[pos]);
+ assert_se(*out == 'B');
+
+ assert_se(memcmp(packet, result, sizeof(packet)) == 0);
+
+ return 0;
+}
+
+static int test_option_status(sd_event *e) {
+ uint8_t option1[] = {
+ /* IA NA */
+ 0x00, 0x03, 0x00, 0x12, 0x1a, 0x1d, 0x1a, 0x1d,
+ 0x00, 0x01, 0x00, 0x01, 0x00, 0x02, 0x00, 0x02,
+ /* status option */
+ 0x00, 0x0d, 0x00, 0x02, 0x00, 0x01,
+ };
+ static const uint8_t option2[] = {
+ /* IA NA */
+ 0x00, 0x03, 0x00, 0x2e, 0x1a, 0x1d, 0x1a, 0x1d,
+ 0x00, 0x01, 0x00, 0x01, 0x00, 0x02, 0x00, 0x02,
+ /* IA Addr */
+ 0x00, 0x05, 0x00, 0x1e,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
+ 0x01, 0x02, 0x03, 0x04, 0x0a, 0x0b, 0x0c, 0x0d,
+ /* status option */
+ 0x00, 0x0d, 0x00, 0x02, 0x00, 0x01,
+ };
+ static const uint8_t option3[] = {
+ /* IA NA */
+ 0x00, 0x03, 0x00, 0x34, 0x1a, 0x1d, 0x1a, 0x1d,
+ 0x00, 0x01, 0x00, 0x01, 0x00, 0x02, 0x00, 0x02,
+ /* IA Addr */
+ 0x00, 0x05, 0x00, 0x24,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
+ 0x01, 0x02, 0x03, 0x04, 0x0a, 0x0b, 0x0c, 0x0d,
+ /* status option */
+ 0x00, 0x0d, 0x00, 0x08, 0x00, 0x00, 'f', 'o',
+ 'o', 'b', 'a', 'r',
+ };
+ static const uint8_t option4[] = {
+ /* IA PD */
+ 0x00, 0x19, 0x00, 0x2f, 0x1a, 0x1d, 0x1a, 0x1d,
+ 0x00, 0x01, 0x00, 0x01, 0x00, 0x02, 0x00, 0x02,
+ /* IA PD Prefix */
+ 0x00, 0x1a, 0x00, 0x1f,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x80, 0x20, 0x01, 0x0d, 0xb8, 0xde, 0xad, 0xbe,
+ 0xef, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00,
+ /* status option */
+ 0x00, 0x0d, 0x00, 0x02, 0x00, 0x00,
+ };
+ static const uint8_t option5[] = {
+ /* IA PD */
+ 0x00, 0x19, 0x00, 0x52, 0x1a, 0x1d, 0x1a, 0x1d,
+ 0x00, 0x01, 0x00, 0x01, 0x00, 0x02, 0x00, 0x02,
+ /* IA PD Prefix #1 */
+ 0x00, 0x1a, 0x00, 0x1f,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x80, 0x20, 0x01, 0x0d, 0xb8, 0xde, 0xad, 0xbe,
+ 0xef, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00,
+ /* status option */
+ 0x00, 0x0d, 0x00, 0x02, 0x00, 0x00,
+ /* IA PD Prefix #2 */
+ 0x00, 0x1a, 0x00, 0x1f,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x80, 0x20, 0x01, 0x0d, 0xb8, 0xc0, 0x0l, 0xd0,
+ 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00,
+ 0x00, 0x0d, 0x00, 0x02, 0x00, 0x00,
+ };
+ DHCP6Option *option;
+ DHCP6IA ia, pd;
+ int r = 0;
+
+ log_debug("/* %s */", __func__);
+
+ zero(ia);
+ option = (DHCP6Option *)option1;
+ assert_se(sizeof(option1) == sizeof(DHCP6Option) + be16toh(option->len));
+
+ r = dhcp6_option_parse_ia(option, &ia, NULL);
+ assert_se(r == 0);
+ assert_se(ia.addresses == NULL);
+
+ option->len = htobe16(17);
+ r = dhcp6_option_parse_ia(option, &ia, NULL);
+ assert_se(r == -ENOBUFS);
+ assert_se(ia.addresses == NULL);
+
+ option->len = htobe16(sizeof(DHCP6Option));
+ r = dhcp6_option_parse_ia(option, &ia, NULL);
+ assert_se(r == -ENOBUFS);
+ assert_se(ia.addresses == NULL);
+
+ zero(ia);
+ option = (DHCP6Option *)option2;
+ assert_se(sizeof(option2) == sizeof(DHCP6Option) + be16toh(option->len));
+
+ r = dhcp6_option_parse_ia(option, &ia, NULL);
+ assert_se(r >= 0);
+ assert_se(ia.addresses == NULL);
+
+ zero(ia);
+ option = (DHCP6Option *)option3;
+ assert_se(sizeof(option3) == sizeof(DHCP6Option) + be16toh(option->len));
+
+ r = dhcp6_option_parse_ia(option, &ia, NULL);
+ assert_se(r >= 0);
+ assert_se(ia.addresses != NULL);
+ dhcp6_lease_free_ia(&ia);
+
+ zero(pd);
+ option = (DHCP6Option *)option4;
+ assert_se(sizeof(option4) == sizeof(DHCP6Option) + be16toh(option->len));
+
+ r = dhcp6_option_parse_ia(option, &pd, NULL);
+ assert_se(r >= 0);
+ assert_se(pd.addresses != NULL);
+ assert_se(memcmp(&pd.ia_pd.id, &option4[4], 4) == 0);
+ assert_se(memcmp(&pd.ia_pd.lifetime_t1, &option4[8], 4) == 0);
+ assert_se(memcmp(&pd.ia_pd.lifetime_t2, &option4[12], 4) == 0);
+ dhcp6_lease_free_ia(&pd);
+
+ zero(pd);
+ option = (DHCP6Option *)option5;
+ assert_se(sizeof(option5) == sizeof(DHCP6Option) + be16toh(option->len));
+
+ r = dhcp6_option_parse_ia(option, &pd, NULL);
+ assert_se(r >= 0);
+ assert_se(pd.addresses != NULL);
+ dhcp6_lease_free_ia(&pd);
+
+ return 0;
+}
+
+static uint8_t msg_advertise[198] = {
+ 0x02, 0x0f, 0xb4, 0xe5, 0x00, 0x01, 0x00, 0x0e,
+ 0x00, 0x01, 0x00, 0x01, 0x1a, 0x6b, 0xf3, 0x30,
+ 0x3c, 0x97, 0x0e, 0xcf, 0xa3, 0x7d, 0x00, 0x03,
+ 0x00, 0x5e, 0x0e, 0xcf, 0xa3, 0x7d, 0x00, 0x00,
+ 0x00, 0x50, 0x00, 0x00, 0x00, 0x78, 0x00, 0x05,
+ 0x00, 0x18, 0x20, 0x01, 0x0d, 0xb8, 0xde, 0xad,
+ 0xbe, 0xef, 0x78, 0xee, 0x1c, 0xf3, 0x09, 0x3c,
+ 0x55, 0xad, 0x00, 0x00, 0x00, 0x96, 0x00, 0x00,
+ 0x00, 0xb4, 0x00, 0x0d, 0x00, 0x32, 0x00, 0x00,
+ 0x41, 0x64, 0x64, 0x72, 0x65, 0x73, 0x73, 0x28,
+ 0x65, 0x73, 0x29, 0x20, 0x72, 0x65, 0x6e, 0x65,
+ 0x77, 0x65, 0x64, 0x2e, 0x20, 0x47, 0x72, 0x65,
+ 0x65, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x66,
+ 0x72, 0x6f, 0x6d, 0x20, 0x70, 0x6c, 0x61, 0x6e,
+ 0x65, 0x74, 0x20, 0x45, 0x61, 0x72, 0x74, 0x68,
+ 0x00, 0x17, 0x00, 0x10, 0x20, 0x01, 0x0d, 0xb8,
+ 0xde, 0xad, 0xbe, 0xef, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x01, 0x00, 0x18, 0x00, 0x0b,
+ 0x03, 0x6c, 0x61, 0x62, 0x05, 0x69, 0x6e, 0x74,
+ 0x72, 0x61, 0x00, 0x00, 0x1f, 0x00, 0x10, 0x20,
+ 0x01, 0x0d, 0xb8, 0xde, 0xad, 0xbe, 0xef, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
+ 0x02, 0x00, 0x0e, 0x00, 0x01, 0x00, 0x01, 0x19,
+ 0x40, 0x5c, 0x53, 0x78, 0x2b, 0xcb, 0xb3, 0x6d,
+ 0x53, 0x00, 0x07, 0x00, 0x01, 0x00
+};
+
+static uint8_t msg_reply[191] = {
+ 0x07, 0xf7, 0x4e, 0x57, 0x00, 0x02, 0x00, 0x0e,
+ 0x00, 0x01, 0x00, 0x01, 0x19, 0x40, 0x5c, 0x53,
+ 0x78, 0x2b, 0xcb, 0xb3, 0x6d, 0x53, 0x00, 0x01,
+ 0x00, 0x0e, 0x00, 0x01, 0x00, 0x01, 0x1a, 0x6b,
+ 0xf3, 0x30, 0x3c, 0x97, 0x0e, 0xcf, 0xa3, 0x7d,
+ 0x00, 0x03, 0x00, 0x4a, 0x0e, 0xcf, 0xa3, 0x7d,
+ 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x78,
+ 0x00, 0x05, 0x00, 0x18, 0x20, 0x01, 0x0d, 0xb8,
+ 0xde, 0xad, 0xbe, 0xef, 0x78, 0xee, 0x1c, 0xf3,
+ 0x09, 0x3c, 0x55, 0xad, 0x00, 0x00, 0x00, 0x96,
+ 0x00, 0x00, 0x00, 0xb4, 0x00, 0x0d, 0x00, 0x1e,
+ 0x00, 0x00, 0x41, 0x6c, 0x6c, 0x20, 0x61, 0x64,
+ 0x64, 0x72, 0x65, 0x73, 0x73, 0x65, 0x73, 0x20,
+ 0x77, 0x65, 0x72, 0x65, 0x20, 0x61, 0x73, 0x73,
+ 0x69, 0x67, 0x6e, 0x65, 0x64, 0x2e, 0x00, 0x17,
+ 0x00, 0x10, 0x20, 0x01, 0x0d, 0xb8, 0xde, 0xad,
+ 0xbe, 0xef, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x01, 0x00, 0x18, 0x00, 0x0b, 0x03, 0x6c,
+ 0x61, 0x62, 0x05, 0x69, 0x6e, 0x74, 0x72, 0x61,
+ 0x00, 0x00, 0x1f, 0x00, 0x10, 0x20, 0x01, 0x0d,
+ 0xb8, 0xde, 0xad, 0xbe, 0xef, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x27, 0x00,
+ 0x0e, 0x01, 0x06, 0x63, 0x6c, 0x69, 0x65, 0x6e,
+ 0x74, 0x05, 0x69, 0x6e, 0x74, 0x72, 0x61
+};
+
+static uint8_t fqdn_wire[16] = {
+ 0x04, 'h', 'o', 's', 't', 0x03, 'l', 'a', 'b',
+ 0x05, 'i', 'n', 't', 'r', 'a', 0x00
+};
+
+static int test_advertise_option(sd_event *e) {
+ _cleanup_(sd_dhcp6_lease_unrefp) sd_dhcp6_lease *lease = NULL;
+ DHCP6Message *advertise = (DHCP6Message *)msg_advertise;
+ size_t len = sizeof(msg_advertise) - sizeof(DHCP6Message), pos = 0;
+ uint32_t lt_pref, lt_valid;
+ bool opt_clientid = false;
+ const struct in6_addr *addrs;
+ uint8_t preference = 255;
+ struct in6_addr addr;
+ char **domains;
+ uint8_t *opt;
+ int r;
+ be32_t val;
+
+ log_debug("/* %s */", __func__);
+
+ assert_se(len >= sizeof(DHCP6Message));
+
+ assert_se(dhcp6_lease_new(&lease) >= 0);
+
+ assert_se(advertise->type == DHCP6_ADVERTISE);
+ assert_se((be32toh(advertise->transaction_id) & 0x00ffffff) ==
+ 0x0fb4e5);
+
+ while (pos < len) {
+ DHCP6Option *option = (DHCP6Option *)&advertise->options[pos];
+ const uint16_t optcode = be16toh(option->code);
+ const uint16_t optlen = be16toh(option->len);
+ uint8_t *optval = option->data;
+
+ switch(optcode) {
+ case SD_DHCP6_OPTION_CLIENTID:
+ assert_se(optlen == 14);
+
+ opt_clientid = true;
+ break;
+
+ case SD_DHCP6_OPTION_IA_NA:
+ assert_se(optlen == 94);
+ assert_se(!memcmp(optval, &msg_advertise[26], optlen));
+
+ val = htobe32(0x0ecfa37d);
+ assert_se(!memcmp(optval, &val, sizeof(val)));
+
+ val = htobe32(80);
+ assert_se(!memcmp(optval + 4, &val, sizeof(val)));
+
+ val = htobe32(120);
+ assert_se(!memcmp(optval + 8, &val, sizeof(val)));
+
+ assert_se(dhcp6_option_parse_ia(option, &lease->ia, NULL) >= 0);
+
+ break;
+
+ case SD_DHCP6_OPTION_SERVERID:
+ assert_se(optlen == 14);
+ assert_se(!memcmp(optval, &msg_advertise[179], optlen));
+
+ assert_se(dhcp6_lease_set_serverid(lease, optval,
+ optlen) >= 0);
+ break;
+
+ case SD_DHCP6_OPTION_PREFERENCE:
+ assert_se(optlen == 1);
+ assert_se(!*optval);
+
+ assert_se(dhcp6_lease_set_preference(lease,
+ *optval) >= 0);
+ break;
+
+ case SD_DHCP6_OPTION_ELAPSED_TIME:
+ assert_se(optlen == 2);
+
+ break;
+
+ case SD_DHCP6_OPTION_DNS_SERVERS:
+ assert_se(optlen == 16);
+ assert_se(dhcp6_lease_set_dns(lease, optval,
+ optlen) >= 0);
+ break;
+
+ case SD_DHCP6_OPTION_DOMAIN_LIST:
+ assert_se(optlen == 11);
+ assert_se(dhcp6_lease_set_domains(lease, optval,
+ optlen) >= 0);
+ break;
+
+ case SD_DHCP6_OPTION_SNTP_SERVERS:
+ assert_se(optlen == 16);
+ assert_se(dhcp6_lease_set_sntp(lease, optval,
+ optlen) >= 0);
+ break;
+
+ default:
+ break;
+ }
+
+ pos += sizeof(*option) + optlen;
+ }
+
+ assert_se(pos == len);
+ assert_se(opt_clientid);
+
+ sd_dhcp6_lease_reset_address_iter(lease);
+ assert_se(sd_dhcp6_lease_get_address(lease, &addr, &lt_pref,
+ &lt_valid) >= 0);
+ assert_se(!memcmp(&addr, &msg_advertise[42], sizeof(addr)));
+ assert_se(lt_pref == 150);
+ assert_se(lt_valid == 180);
+ assert_se(sd_dhcp6_lease_get_address(lease, &addr, &lt_pref,
+ &lt_valid) == -ENOMSG);
+
+ sd_dhcp6_lease_reset_address_iter(lease);
+ assert_se(sd_dhcp6_lease_get_address(lease, &addr, &lt_pref,
+ &lt_valid) >= 0);
+ assert_se(!memcmp(&addr, &msg_advertise[42], sizeof(addr)));
+ assert_se(sd_dhcp6_lease_get_address(lease, &addr, &lt_pref,
+ &lt_valid) == -ENOMSG);
+ sd_dhcp6_lease_reset_address_iter(lease);
+ assert_se(sd_dhcp6_lease_get_address(lease, &addr, &lt_pref,
+ &lt_valid) >= 0);
+ assert_se(!memcmp(&addr, &msg_advertise[42], sizeof(addr)));
+ assert_se(sd_dhcp6_lease_get_address(lease, &addr, &lt_pref,
+ &lt_valid) == -ENOMSG);
+
+ assert_se(dhcp6_lease_get_serverid(lease, &opt, &len) >= 0);
+ assert_se(len == 14);
+ assert_se(!memcmp(opt, &msg_advertise[179], len));
+
+ assert_se(dhcp6_lease_get_preference(lease, &preference) >= 0);
+ assert_se(preference == 0);
+
+ r = sd_dhcp6_lease_get_dns(lease, &addrs);
+ assert_se(r == 1);
+ assert_se(!memcmp(addrs, &msg_advertise[124], r * 16));
+
+ r = sd_dhcp6_lease_get_domains(lease, &domains);
+ assert_se(r == 1);
+ assert_se(!strcmp("lab.intra", domains[0]));
+ assert_se(domains[1] == NULL);
+
+ r = sd_dhcp6_lease_get_ntp_addrs(lease, &addrs);
+ assert_se(r == 1);
+ assert_se(!memcmp(addrs, &msg_advertise[159], r * 16));
+
+ return 0;
+}
+
+static int test_hangcheck(sd_event_source *s, uint64_t usec, void *userdata) {
+ assert_not_reached("Test case should have completed in 2 seconds");
+
+ return 0;
+}
+
+static void test_client_solicit_cb(sd_dhcp6_client *client, int event,
+ void *userdata) {
+ sd_event *e = userdata;
+ sd_dhcp6_lease *lease;
+ const struct in6_addr *addrs;
+ char **domains;
+
+ log_debug("/* %s */", __func__);
+
+ assert_se(e);
+ assert_se(event == SD_DHCP6_CLIENT_EVENT_IP_ACQUIRE);
+
+ assert_se(sd_dhcp6_client_get_lease(client, &lease) >= 0);
+
+ assert_se(sd_dhcp6_lease_get_domains(lease, &domains) == 1);
+ assert_se(!strcmp("lab.intra", domains[0]));
+ assert_se(domains[1] == NULL);
+
+ assert_se(sd_dhcp6_lease_get_dns(lease, &addrs) == 1);
+ assert_se(!memcmp(addrs, &msg_advertise[124], 16));
+
+ assert_se(sd_dhcp6_lease_get_ntp_addrs(lease, &addrs) == 1);
+ assert_se(!memcmp(addrs, &msg_advertise[159], 16));
+
+ assert_se(sd_dhcp6_client_set_request_option(client, SD_DHCP6_OPTION_DNS_SERVERS) == -EBUSY);
+
+ sd_event_exit(e, 0);
+}
+
+static int test_client_send_reply(DHCP6Message *request) {
+ DHCP6Message reply;
+
+ reply.transaction_id = request->transaction_id;
+ reply.type = DHCP6_REPLY;
+
+ memcpy(msg_reply, &reply.transaction_id, 4);
+
+ memcpy(&msg_reply[26], test_duid, sizeof(test_duid));
+
+ memcpy(&msg_reply[44], &test_iaid, sizeof(test_iaid));
+
+ assert_se(write(test_dhcp_fd[1], msg_reply, sizeof(msg_reply))
+ == sizeof(msg_reply));
+
+ return 0;
+}
+
+static int test_client_verify_request(DHCP6Message *request, size_t len) {
+ _cleanup_(sd_dhcp6_lease_unrefp) sd_dhcp6_lease *lease = NULL;
+ bool found_clientid = false, found_iana = false, found_serverid = false,
+ found_elapsed_time = false, found_fqdn = false;
+ uint32_t lt_pref, lt_valid;
+ struct in6_addr addr;
+ size_t pos = 0;
+ be32_t val;
+
+ log_debug("/* %s */", __func__);
+
+ assert_se(request->type == DHCP6_REQUEST);
+ assert_se(dhcp6_lease_new(&lease) >= 0);
+
+ len -= sizeof(DHCP6Message);
+
+ while (pos < len) {
+ DHCP6Option *option = (DHCP6Option *)&request->options[pos];
+ uint16_t optcode = be16toh(option->code);
+ uint16_t optlen = be16toh(option->len);
+ uint8_t *optval = option->data;
+
+ switch(optcode) {
+ case SD_DHCP6_OPTION_CLIENTID:
+ assert_se(!found_clientid);
+ found_clientid = true;
+
+ assert_se(!memcmp(optval, &test_duid,
+ sizeof(test_duid)));
+
+ break;
+
+ case SD_DHCP6_OPTION_IA_NA:
+ assert_se(!found_iana);
+ found_iana = true;
+
+ assert_se(optlen == 40);
+ assert_se(!memcmp(optval, &test_iaid, sizeof(test_iaid)));
+
+ val = htobe32(80);
+ assert_se(!memcmp(optval + 4, &val, sizeof(val)));
+
+ val = htobe32(120);
+ assert_se(!memcmp(optval + 8, &val, sizeof(val)));
+
+ assert_se(dhcp6_option_parse_ia(option, &lease->ia, NULL) >= 0);
+
+ break;
+
+ case SD_DHCP6_OPTION_SERVERID:
+ assert_se(!found_serverid);
+ found_serverid = true;
+
+ assert_se(optlen == 14);
+ assert_se(!memcmp(&msg_advertise[179], optval, optlen));
+
+ break;
+
+ case SD_DHCP6_OPTION_ELAPSED_TIME:
+ assert_se(!found_elapsed_time);
+ found_elapsed_time = true;
+
+ assert_se(optlen == 2);
+
+ break;
+ case SD_DHCP6_OPTION_FQDN:
+ assert_se(!found_fqdn);
+ found_fqdn = true;
+
+ assert_se(optlen == 17);
+
+ assert_se(optval[0] == 0x01);
+ assert_se(!memcmp(optval + 1, fqdn_wire, sizeof(fqdn_wire)));
+ break;
+ }
+
+ pos += sizeof(*option) + optlen;
+ }
+
+ assert_se(found_clientid && found_iana && found_serverid &&
+ found_elapsed_time);
+
+ sd_dhcp6_lease_reset_address_iter(lease);
+ assert_se(sd_dhcp6_lease_get_address(lease, &addr, &lt_pref,
+ &lt_valid) >= 0);
+ assert_se(!memcmp(&addr, &msg_advertise[42], sizeof(addr)));
+ assert_se(lt_pref == 150);
+ assert_se(lt_valid == 180);
+
+ assert_se(sd_dhcp6_lease_get_address(lease, &addr, &lt_pref,
+ &lt_valid) == -ENOMSG);
+
+ return 0;
+}
+
+static int test_client_send_advertise(DHCP6Message *solicit) {
+ DHCP6Message advertise;
+
+ advertise.transaction_id = solicit->transaction_id;
+ advertise.type = DHCP6_ADVERTISE;
+
+ memcpy(msg_advertise, &advertise.transaction_id, 4);
+
+ memcpy(&msg_advertise[8], test_duid, sizeof(test_duid));
+
+ memcpy(&msg_advertise[26], &test_iaid, sizeof(test_iaid));
+
+ assert_se(write(test_dhcp_fd[1], msg_advertise, sizeof(msg_advertise))
+ == sizeof(msg_advertise));
+
+ return 0;
+}
+
+static int test_client_verify_solicit(DHCP6Message *solicit, size_t len) {
+ bool found_clientid = false, found_iana = false,
+ found_elapsed_time = false, found_fqdn = false;
+ size_t pos = 0;
+
+ log_debug("/* %s */", __func__);
+
+ assert_se(solicit->type == DHCP6_SOLICIT);
+
+ len -= sizeof(DHCP6Message);
+
+ while (pos < len) {
+ DHCP6Option *option = (DHCP6Option *)&solicit->options[pos];
+ uint16_t optcode = be16toh(option->code);
+ uint16_t optlen = be16toh(option->len);
+ uint8_t *optval = option->data;
+
+ switch(optcode) {
+ case SD_DHCP6_OPTION_CLIENTID:
+ assert_se(!found_clientid);
+ found_clientid = true;
+
+ assert_se(optlen == sizeof(test_duid));
+ memcpy(&test_duid, optval, sizeof(test_duid));
+
+ break;
+
+ case SD_DHCP6_OPTION_IA_NA:
+ assert_se(!found_iana);
+ found_iana = true;
+
+ assert_se(optlen == 12);
+
+ memcpy(&test_iaid, optval, sizeof(test_iaid));
+
+ break;
+
+ case SD_DHCP6_OPTION_ELAPSED_TIME:
+ assert_se(!found_elapsed_time);
+ found_elapsed_time = true;
+
+ assert_se(optlen == 2);
+
+ break;
+
+ case SD_DHCP6_OPTION_FQDN:
+ assert_se(!found_fqdn);
+ found_fqdn = true;
+
+ assert_se(optlen == 17);
+
+ assert_se(optval[0] == 0x01);
+ assert_se(!memcmp(optval + 1, fqdn_wire, sizeof(fqdn_wire)));
+
+ break;
+ }
+
+ pos += sizeof(*option) + optlen;
+ }
+
+ assert_se(pos == len);
+ assert_se(found_clientid && found_iana && found_elapsed_time);
+
+ return 0;
+}
+
+static void test_client_information_cb(sd_dhcp6_client *client, int event,
+ void *userdata) {
+ sd_event *e = userdata;
+ sd_dhcp6_lease *lease;
+ const struct in6_addr *addrs;
+ struct in6_addr address = { { { 0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01 } } };
+ char **domains;
+ const char *fqdn;
+
+ log_debug("/* %s */", __func__);
+
+ assert_se(e);
+ assert_se(event == SD_DHCP6_CLIENT_EVENT_INFORMATION_REQUEST);
+
+ assert_se(sd_dhcp6_client_get_lease(client, &lease) >= 0);
+
+ assert_se(sd_dhcp6_lease_get_domains(lease, &domains) == 1);
+ assert_se(!strcmp("lab.intra", domains[0]));
+ assert_se(domains[1] == NULL);
+
+ assert_se(sd_dhcp6_lease_get_fqdn(lease, &fqdn) >= 0);
+ assert_se(streq(fqdn, "client.intra"));
+
+ assert_se(sd_dhcp6_lease_get_dns(lease, &addrs) == 1);
+ assert_se(!memcmp(addrs, &msg_advertise[124], 16));
+
+ assert_se(sd_dhcp6_lease_get_ntp_addrs(lease, &addrs) == 1);
+ assert_se(!memcmp(addrs, &msg_advertise[159], 16));
+
+ assert_se(sd_dhcp6_client_set_information_request(client, false) == -EBUSY);
+ assert_se(sd_dhcp6_client_set_callback(client, NULL, e) >= 0);
+ assert_se(sd_dhcp6_client_stop(client) >= 0);
+ assert_se(sd_dhcp6_client_set_information_request(client, false) >= 0);
+
+ assert_se(sd_dhcp6_client_set_callback(client,
+ test_client_solicit_cb, e) >= 0);
+
+ assert_se(sd_dhcp6_client_set_local_address(client, &address) >= 0);
+
+ assert_se(sd_dhcp6_client_start(client) >= 0);
+
+}
+
+static int test_client_verify_information_request(DHCP6Message *information_request,
+ size_t len) {
+
+ _cleanup_(sd_dhcp6_lease_unrefp) sd_dhcp6_lease *lease = NULL;
+ size_t pos = 0;
+ bool found_clientid = false, found_elapsed_time = false;
+ struct in6_addr addr;
+ uint32_t lt_pref, lt_valid;
+
+ log_debug("/* %s */", __func__);
+
+ assert_se(information_request->type == DHCP6_INFORMATION_REQUEST);
+ assert_se(dhcp6_lease_new(&lease) >= 0);
+
+ len -= sizeof(DHCP6Message);
+
+ while (pos < len) {
+ DHCP6Option *option = (DHCP6Option *)&information_request->options[pos];
+ uint16_t optcode = be16toh(option->code);
+ uint16_t optlen = be16toh(option->len);
+ uint8_t *optval = option->data;
+
+ switch(optcode) {
+ case SD_DHCP6_OPTION_CLIENTID:
+ assert_se(!found_clientid);
+ found_clientid = true;
+
+ assert_se(optlen == sizeof(test_duid));
+ memcpy(&test_duid, optval, sizeof(test_duid));
+
+ break;
+
+ case SD_DHCP6_OPTION_IA_NA:
+ assert_not_reached("IA TA option must not be present");
+
+ break;
+
+ case SD_DHCP6_OPTION_SERVERID:
+ assert_not_reached("Server ID option must not be present");
+
+ break;
+
+ case SD_DHCP6_OPTION_ELAPSED_TIME:
+ assert_se(!found_elapsed_time);
+ found_elapsed_time = true;
+
+ assert_se(optlen == 2);
+
+ break;
+ }
+
+ pos += sizeof(*option) + optlen;
+ }
+
+ assert_se(pos == len);
+ assert_se(found_clientid && found_elapsed_time);
+
+ sd_dhcp6_lease_reset_address_iter(lease);
+
+ assert_se(sd_dhcp6_lease_get_address(lease, &addr, &lt_pref,
+ &lt_valid) == -ENOMSG);
+
+ return 0;
+}
+
+int dhcp6_network_send_udp_socket(int s, struct in6_addr *server_address,
+ const void *packet, size_t len) {
+ struct in6_addr mcast =
+ IN6ADDR_ALL_DHCP6_RELAY_AGENTS_AND_SERVERS_INIT;
+ DHCP6Message *message;
+
+ assert_se(s == test_dhcp_fd[0]);
+ assert_se(server_address);
+ assert_se(packet);
+ assert_se(len > sizeof(DHCP6Message) + 4);
+ assert_se(IN6_ARE_ADDR_EQUAL(server_address, &mcast));
+
+ message = (DHCP6Message *)packet;
+
+ assert_se(message->transaction_id & 0x00ffffff);
+
+ if (test_client_message_num == 0) {
+ test_client_verify_information_request(message, len);
+ test_client_send_reply(message);
+ test_client_message_num++;
+ } else if (test_client_message_num == 1) {
+ test_client_verify_solicit(message, len);
+ test_client_send_advertise(message);
+ test_client_message_num++;
+ } else if (test_client_message_num == 2) {
+ test_client_verify_request(message, len);
+ test_client_send_reply(message);
+ test_client_message_num++;
+ }
+
+ return len;
+}
+
+int dhcp6_network_bind_udp_socket(int ifindex, struct in6_addr *local_address) {
+ assert_se(ifindex == test_ifindex);
+
+ if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, test_dhcp_fd) < 0)
+ return -errno;
+
+ return test_dhcp_fd[0];
+}
+
+static int test_client_solicit(sd_event *e) {
+ sd_dhcp6_client *client;
+ struct in6_addr address = { { { 0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01 } } };
+ int val;
+
+ log_debug("/* %s */", __func__);
+
+ assert_se(sd_dhcp6_client_new(&client) >= 0);
+ assert_se(client);
+
+ assert_se(sd_dhcp6_client_attach_event(client, e, 0) >= 0);
+
+ assert_se(sd_dhcp6_client_set_ifindex(client, test_ifindex) == 0);
+ assert_se(sd_dhcp6_client_set_mac(client, (const uint8_t *) &mac_addr,
+ sizeof (mac_addr),
+ ARPHRD_ETHER) >= 0);
+ assert_se(sd_dhcp6_client_set_fqdn(client, "host.lab.intra") == 1);
+
+ assert_se(sd_dhcp6_client_get_information_request(client, &val) >= 0);
+ assert_se(val == 0);
+ assert_se(sd_dhcp6_client_set_information_request(client, 42) >= 0);
+ assert_se(sd_dhcp6_client_get_information_request(client, &val) >= 0);
+ assert_se(val);
+
+ assert_se(sd_dhcp6_client_set_callback(client,
+ test_client_information_cb, e) >= 0);
+
+ assert_se(sd_event_add_time_relative(e, &hangcheck, clock_boottime_or_monotonic(),
+ 2 * USEC_PER_SEC, 0,
+ test_hangcheck, NULL) >= 0);
+
+ assert_se(sd_dhcp6_client_set_local_address(client, &address) >= 0);
+
+ assert_se(sd_dhcp6_client_start(client) >= 0);
+
+ sd_event_loop(e);
+
+ hangcheck = sd_event_source_unref(hangcheck);
+
+ assert_se(!sd_dhcp6_client_unref(client));
+
+ test_dhcp_fd[1] = safe_close(test_dhcp_fd[1]);
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_event_unrefp) sd_event *e;
+
+ assert_se(sd_event_new(&e) >= 0);
+
+ test_setup_logging(LOG_DEBUG);
+
+ test_client_basic(e);
+ test_option(e);
+ test_option_status(e);
+ test_advertise_option(e);
+ test_client_solicit(e);
+ test_parse_domain(e);
+
+ return 0;
+}
diff --git a/src/libsystemd-network/test-ipv4ll-manual.c b/src/libsystemd-network/test-ipv4ll-manual.c
new file mode 100644
index 0000000..a253acb
--- /dev/null
+++ b/src/libsystemd-network/test-ipv4ll-manual.c
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <net/if.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <linux/veth.h>
+
+#include "sd-event.h"
+#include "sd-ipv4ll.h"
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "in-addr-util.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "util.h"
+
+static void ll_handler(sd_ipv4ll *ll, int event, void *userdata) {
+ _cleanup_free_ char *address = NULL;
+ struct in_addr addr = {};
+
+ assert_se(ll);
+
+ if (sd_ipv4ll_get_address(ll, &addr) >= 0)
+ assert_se(in_addr_to_string(AF_INET, (const union in_addr_union*) &addr, &address) >= 0);
+
+ switch (event) {
+ case SD_IPV4LL_EVENT_BIND:
+ log_info("bound %s", strna(address));
+ break;
+ case SD_IPV4LL_EVENT_CONFLICT:
+ log_info("conflict on %s", strna(address));
+ break;
+ case SD_IPV4LL_EVENT_STOP:
+ log_error("the client was stopped with address %s", strna(address));
+ break;
+ default:
+ assert_not_reached("invalid LL event");
+ }
+}
+
+static int client_run(int ifindex, const char *seed_str, const struct ether_addr *ha, sd_event *e) {
+ sd_ipv4ll *ll;
+
+ assert_se(sd_ipv4ll_new(&ll) >= 0);
+ assert_se(sd_ipv4ll_attach_event(ll, e, 0) >= 0);
+
+ assert_se(sd_ipv4ll_set_ifindex(ll, ifindex) >= 0);
+ assert_se(sd_ipv4ll_set_mac(ll, ha) >= 0);
+ assert_se(sd_ipv4ll_set_callback(ll, ll_handler, NULL) >= 0);
+
+ if (seed_str) {
+ unsigned seed;
+
+ assert_se(safe_atou(seed_str, &seed) >= 0);
+
+ assert_se(sd_ipv4ll_set_address_seed(ll, seed) >= 0);
+ }
+
+ log_info("starting IPv4LL client");
+
+ assert_se(sd_ipv4ll_start(ll) >= 0);
+
+ assert_se(sd_event_loop(e) >= 0);
+
+ assert_se(!sd_ipv4ll_unref(ll));
+
+ return EXIT_SUCCESS;
+}
+
+static int test_ll(const char *ifname, const char *seed) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *reply = NULL;
+ struct ether_addr ha;
+ int ifindex;
+
+ assert_se(sd_event_new(&e) >= 0);
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+ assert_se(sd_netlink_attach_event(rtnl, e, 0) >= 0);
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, 0) >= 0);
+ assert_se(sd_netlink_message_append_string(m, IFLA_IFNAME, ifname) >= 0);
+ assert_se(sd_netlink_call(rtnl, m, 0, &reply) >= 0);
+
+ assert_se(sd_rtnl_message_link_get_ifindex(reply, &ifindex) >= 0);
+ assert_se(sd_netlink_message_read_ether_addr(reply, IFLA_ADDRESS, &ha) >= 0);
+
+ client_run(ifindex, seed, &ha, e);
+
+ return EXIT_SUCCESS;
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ if (argc == 2)
+ return test_ll(argv[1], NULL);
+ else if (argc == 3)
+ return test_ll(argv[1], argv[2]);
+ else {
+ log_error("This program takes one or two arguments.\n"
+ "\t %s <ifname> [<seed>]", program_invocation_short_name);
+ return EXIT_FAILURE;
+ }
+}
diff --git a/src/libsystemd-network/test-ipv4ll.c b/src/libsystemd-network/test-ipv4ll.c
new file mode 100644
index 0000000..b213f41
--- /dev/null
+++ b/src/libsystemd-network/test-ipv4ll.c
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2014 Axis Communications AB. All rights reserved.
+***/
+
+#include <errno.h>
+#include <netinet/if_ether.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-ipv4ll.h"
+
+#include "arp-util.h"
+#include "fd-util.h"
+#include "socket-util.h"
+#include "tests.h"
+#include "util.h"
+
+static bool verbose = false;
+static bool extended = false;
+static int test_fd[2];
+
+static int basic_request_handler_bind = 0;
+static int basic_request_handler_stop = 0;
+static void* basic_request_handler_userdata = (void*) 0xCABCAB;
+
+static void basic_request_handler(sd_ipv4ll *ll, int event, void *userdata) {
+ assert_se(userdata == basic_request_handler_userdata);
+
+ switch(event) {
+ case SD_IPV4LL_EVENT_STOP:
+ basic_request_handler_stop = 1;
+ break;
+ case SD_IPV4LL_EVENT_BIND:
+ basic_request_handler_bind = 1;
+ break;
+ default:
+ assert_se(0);
+ break;
+ }
+}
+
+static int arp_network_send_raw_socket(int fd, int ifindex,
+ const struct ether_arp *arp) {
+ assert_se(arp);
+ assert_se(ifindex > 0);
+ assert_se(fd >= 0);
+
+ if (send(fd, arp, sizeof(struct ether_arp), 0) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int arp_send_probe(int fd, int ifindex,
+ be32_t pa, const struct ether_addr *ha) {
+ struct ether_arp ea = {};
+
+ assert_se(fd >= 0);
+ assert_se(ifindex > 0);
+ assert_se(pa != 0);
+ assert_se(ha);
+
+ return arp_network_send_raw_socket(fd, ifindex, &ea);
+}
+
+int arp_send_announcement(int fd, int ifindex,
+ be32_t pa, const struct ether_addr *ha) {
+ struct ether_arp ea = {};
+
+ assert_se(fd >= 0);
+ assert_se(ifindex > 0);
+ assert_se(pa != 0);
+ assert_se(ha);
+
+ return arp_network_send_raw_socket(fd, ifindex, &ea);
+}
+
+int arp_network_bind_raw_socket(int ifindex, be32_t address, const struct ether_addr *eth_mac) {
+ if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, test_fd) < 0)
+ return -errno;
+
+ return test_fd[0];
+}
+
+static void test_public_api_setters(sd_event *e) {
+ struct in_addr address = {};
+ uint64_t seed = 0;
+ sd_ipv4ll *ll;
+ struct ether_addr mac_addr = {
+ .ether_addr_octet = {'A', 'B', 'C', '1', '2', '3'}};
+
+ if (verbose)
+ printf("* %s\n", __FUNCTION__);
+
+ assert_se(sd_ipv4ll_new(&ll) == 0);
+ assert_se(ll);
+
+ assert_se(sd_ipv4ll_attach_event(NULL, NULL, 0) == -EINVAL);
+ assert_se(sd_ipv4ll_attach_event(ll, e, 0) == 0);
+ assert_se(sd_ipv4ll_attach_event(ll, e, 0) == -EBUSY);
+
+ assert_se(sd_ipv4ll_set_callback(NULL, NULL, NULL) == -EINVAL);
+ assert_se(sd_ipv4ll_set_callback(ll, NULL, NULL) == 0);
+
+ assert_se(sd_ipv4ll_set_address(ll, &address) == -EINVAL);
+ address.s_addr |= htobe32(169U << 24 | 254U << 16);
+ assert_se(sd_ipv4ll_set_address(ll, &address) == -EINVAL);
+ address.s_addr |= htobe32(0x00FF);
+ assert_se(sd_ipv4ll_set_address(ll, &address) == -EINVAL);
+ address.s_addr |= htobe32(0xF000);
+ assert_se(sd_ipv4ll_set_address(ll, &address) == 0);
+ address.s_addr |= htobe32(0x0F00);
+ assert_se(sd_ipv4ll_set_address(ll, &address) == -EINVAL);
+
+ assert_se(sd_ipv4ll_set_address_seed(NULL, seed) == -EINVAL);
+ assert_se(sd_ipv4ll_set_address_seed(ll, seed) == 0);
+
+ assert_se(sd_ipv4ll_set_mac(NULL, NULL) == -EINVAL);
+ assert_se(sd_ipv4ll_set_mac(ll, NULL) == -EINVAL);
+ assert_se(sd_ipv4ll_set_mac(ll, &mac_addr) == 0);
+
+ assert_se(sd_ipv4ll_set_ifindex(NULL, -1) == -EINVAL);
+ assert_se(sd_ipv4ll_set_ifindex(ll, -1) == -EINVAL);
+ assert_se(sd_ipv4ll_set_ifindex(ll, -99) == -EINVAL);
+ assert_se(sd_ipv4ll_set_ifindex(ll, 1) == 0);
+ assert_se(sd_ipv4ll_set_ifindex(ll, 99) == 0);
+
+ assert_se(sd_ipv4ll_ref(ll) == ll);
+ assert_se(sd_ipv4ll_unref(ll) == NULL);
+
+ /* Cleanup */
+ assert_se(sd_ipv4ll_unref(ll) == NULL);
+}
+
+static void test_basic_request(sd_event *e) {
+
+ sd_ipv4ll *ll;
+ struct ether_arp arp;
+ struct ether_addr mac_addr = {
+ .ether_addr_octet = {'A', 'B', 'C', '1', '2', '3'}};
+
+ if (verbose)
+ printf("* %s\n", __FUNCTION__);
+
+ assert_se(sd_ipv4ll_new(&ll) == 0);
+ assert_se(sd_ipv4ll_start(ll) == -EINVAL);
+
+ assert_se(sd_ipv4ll_attach_event(ll, e, 0) == 0);
+ assert_se(sd_ipv4ll_start(ll) == -EINVAL);
+
+ assert_se(sd_ipv4ll_set_mac(ll, &mac_addr) == 0);
+ assert_se(sd_ipv4ll_start(ll) == -EINVAL);
+
+ assert_se(sd_ipv4ll_set_callback(ll, basic_request_handler,
+ basic_request_handler_userdata) == 0);
+ assert_se(sd_ipv4ll_start(ll) == -EINVAL);
+
+ assert_se(sd_ipv4ll_set_ifindex(ll, 1) == 0);
+ assert_se(sd_ipv4ll_start(ll) == 1);
+
+ sd_event_run(e, (uint64_t) -1);
+ assert_se(sd_ipv4ll_start(ll) == 0);
+
+ assert_se(sd_ipv4ll_is_running(ll));
+
+ /* PROBE */
+ sd_event_run(e, (uint64_t) -1);
+ assert_se(recv(test_fd[1], &arp, sizeof(struct ether_arp), 0) == sizeof(struct ether_arp));
+
+ if (extended) {
+ /* PROBE */
+ sd_event_run(e, (uint64_t) -1);
+ assert_se(recv(test_fd[1], &arp, sizeof(struct ether_arp), 0) == sizeof(struct ether_arp));
+
+ /* PROBE */
+ sd_event_run(e, (uint64_t) -1);
+ assert_se(recv(test_fd[1], &arp, sizeof(struct ether_arp), 0) == sizeof(struct ether_arp));
+
+ sd_event_run(e, (uint64_t) -1);
+ assert_se(basic_request_handler_bind == 1);
+ }
+
+ sd_ipv4ll_stop(ll);
+ assert_se(basic_request_handler_stop == 1);
+
+ /* Cleanup */
+ assert_se(sd_ipv4ll_unref(ll) == NULL);
+ safe_close(test_fd[1]);
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(sd_event_new(&e) >= 0);
+
+ test_public_api_setters(e);
+ test_basic_request(e);
+
+ return 0;
+}
diff --git a/src/libsystemd-network/test-lldp.c b/src/libsystemd-network/test-lldp.c
new file mode 100644
index 0000000..c52d422
--- /dev/null
+++ b/src/libsystemd-network/test-lldp.c
@@ -0,0 +1,378 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <net/ethernet.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "sd-event.h"
+#include "sd-lldp.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "lldp-network.h"
+#include "macro.h"
+#include "string-util.h"
+#include "tests.h"
+
+#define TEST_LLDP_PORT "em1"
+#define TEST_LLDP_TYPE_SYSTEM_NAME "systemd-lldp"
+#define TEST_LLDP_TYPE_SYSTEM_DESC "systemd-lldp-desc"
+
+static int test_fd[2] = { -1, -1 };
+static int lldp_handler_calls;
+
+int lldp_network_bind_raw_socket(int ifindex) {
+ if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, test_fd) < 0)
+ return -errno;
+
+ return test_fd[0];
+}
+
+static void lldp_handler(sd_lldp *lldp, sd_lldp_event event, sd_lldp_neighbor *n, void *userdata) {
+ lldp_handler_calls++;
+}
+
+static int start_lldp(sd_lldp **lldp, sd_event *e, sd_lldp_callback_t cb, void *cb_data) {
+ int r;
+
+ r = sd_lldp_new(lldp);
+ if (r < 0)
+ return r;
+
+ r = sd_lldp_set_ifindex(*lldp, 42);
+ if (r < 0)
+ return r;
+
+ r = sd_lldp_set_callback(*lldp, cb, cb_data);
+ if (r < 0)
+ return r;
+
+ r = sd_lldp_attach_event(*lldp, e, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_lldp_start(*lldp);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int stop_lldp(sd_lldp *lldp) {
+ int r;
+
+ r = sd_lldp_stop(lldp);
+ if (r < 0)
+ return r;
+
+ r = sd_lldp_detach_event(lldp);
+ if (r < 0)
+ return r;
+
+ sd_lldp_unref(lldp);
+ safe_close(test_fd[1]);
+
+ return 0;
+}
+
+static void test_receive_basic_packet(sd_event *e) {
+
+ static const uint8_t frame[] = {
+ /* Ethernet header */
+ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03, /* Destination MAC */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, /* Source MAC */
+ 0x88, 0xcc, /* Ethertype */
+ /* LLDP mandatory TLVs */
+ 0x02, 0x07, 0x04, 0x00, 0x01, 0x02, /* Chassis: MAC, 00:01:02:03:04:05 */
+ 0x03, 0x04, 0x05,
+ 0x04, 0x04, 0x05, 0x31, 0x2f, 0x33, /* Port: interface name, "1/3" */
+ 0x06, 0x02, 0x00, 0x78, /* TTL: 120 seconds */
+ /* LLDP optional TLVs */
+ 0x08, 0x04, 0x50, 0x6f, 0x72, 0x74, /* Port Description: "Port" */
+ 0x0a, 0x03, 0x53, 0x59, 0x53, /* System Name: "SYS" */
+ 0x0c, 0x04, 0x66, 0x6f, 0x6f, 0x00, /* System Description: "foo" (NULL-terminated) */
+ 0x00, 0x00 /* End Of LLDPDU */
+ };
+
+ sd_lldp *lldp;
+ sd_lldp_neighbor **neighbors;
+ uint8_t type;
+ const void *data;
+ uint16_t ttl;
+ size_t length;
+ const char *str;
+
+ lldp_handler_calls = 0;
+ assert_se(start_lldp(&lldp, e, lldp_handler, NULL) == 0);
+
+ assert_se(write(test_fd[1], frame, sizeof(frame)) == sizeof(frame));
+ sd_event_run(e, 0);
+ assert_se(lldp_handler_calls == 1);
+ assert_se(sd_lldp_get_neighbors(lldp, &neighbors) == 1);
+
+ assert_se(sd_lldp_neighbor_get_chassis_id(neighbors[0], &type, &data, &length) == 0);
+ assert_se(type == SD_LLDP_CHASSIS_SUBTYPE_MAC_ADDRESS);
+ assert_se(length == ETH_ALEN);
+ assert_se(!memcmp(data, "\x00\x01\x02\x03\x04\x05", ETH_ALEN));
+
+ assert_se(sd_lldp_neighbor_get_port_id(neighbors[0], &type, &data, &length) == 0);
+ assert_se(type == SD_LLDP_PORT_SUBTYPE_INTERFACE_NAME);
+ assert_se(length == 3);
+ assert_se(!memcmp(data, "1/3", 3));
+
+ assert_se(sd_lldp_neighbor_get_port_description(neighbors[0], &str) == 0);
+ assert_se(streq(str, "Port"));
+
+ assert_se(sd_lldp_neighbor_get_system_name(neighbors[0], &str) == 0);
+ assert_se(streq(str, "SYS"));
+
+ assert_se(sd_lldp_neighbor_get_system_description(neighbors[0], &str) == 0);
+ assert_se(streq(str, "foo"));
+
+ assert_se(sd_lldp_neighbor_get_ttl(neighbors[0], &ttl) == 0);
+ assert_se(ttl == 120);
+
+ sd_lldp_neighbor_unref(neighbors[0]);
+ free(neighbors);
+
+ assert_se(stop_lldp(lldp) == 0);
+}
+
+static void test_receive_incomplete_packet(sd_event *e) {
+ sd_lldp *lldp;
+ sd_lldp_neighbor **neighbors;
+ uint8_t frame[] = {
+ /* Ethernet header */
+ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03, /* Destination MAC */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, /* Source MAC */
+ 0x88, 0xcc, /* Ethertype */
+ /* LLDP mandatory TLVs */
+ 0x02, 0x07, 0x04, 0x00, 0x01, 0x02, /* Chassis: MAC, 00:01:02:03:04:05 */
+ 0x03, 0x04, 0x05,
+ 0x04, 0x04, 0x05, 0x31, 0x2f, 0x33, /* Port: interface name, "1/3" */
+ /* Missing TTL */
+ 0x00, 0x00 /* End Of LLDPDU */
+ };
+
+ lldp_handler_calls = 0;
+ assert_se(start_lldp(&lldp, e, lldp_handler, NULL) == 0);
+
+ assert_se(write(test_fd[1], frame, sizeof(frame)) == sizeof(frame));
+ sd_event_run(e, 0);
+ assert_se(lldp_handler_calls == 0);
+ assert_se(sd_lldp_get_neighbors(lldp, &neighbors) == 0);
+
+ assert_se(stop_lldp(lldp) == 0);
+}
+
+static void test_receive_oui_packet(sd_event *e) {
+ sd_lldp *lldp;
+ sd_lldp_neighbor **neighbors;
+ uint8_t frame[] = {
+ /* Ethernet header */
+ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03, /* Destination MAC */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, /* Source MAC */
+ 0x88, 0xcc, /* Ethertype */
+ /* LLDP mandatory TLVs */
+ 0x02, 0x07, 0x04, 0x00, 0x01, 0x02, /* Chassis: MAC, 00:01:02:03:04:05 */
+ 0x03, 0x04, 0x05,
+ 0x04, 0x04, 0x05, 0x31, 0x2f, 0x33, /* Port TLV: interface name, "1/3" */
+ 0x06, 0x02, 0x00, 0x78, /* TTL: 120 seconds */
+ /* LLDP optional TLVs */
+ 0xfe, 0x06, 0x00, 0x80, 0xc2, 0x01, /* Port VLAN ID: 0x1234 */
+ 0x12, 0x34,
+ 0xfe, 0x07, 0x00, 0x80, 0xc2, 0x02, /* Port and protocol: flag 1, PPVID 0x7788 */
+ 0x01, 0x77, 0x88,
+ 0xfe, 0x0d, 0x00, 0x80, 0xc2, 0x03, /* VLAN Name: ID 0x1234, name "Vlan51" */
+ 0x12, 0x34, 0x06, 0x56, 0x6c, 0x61,
+ 0x6e, 0x35, 0x31,
+ 0xfe, 0x06, 0x00, 0x80, 0xc2, 0x06, /* Management VID: 0x0102 */
+ 0x01, 0x02,
+ 0xfe, 0x09, 0x00, 0x80, 0xc2, 0x07, /* Link aggregation: status 1, ID 0x00140012 */
+ 0x01, 0x00, 0x14, 0x00, 0x12,
+ 0xfe, 0x07, 0x00, 0x12, 0x0f, 0x02, /* 802.3 Power via MDI: PSE, MDI enabled */
+ 0x07, 0x01, 0x00,
+ 0x00, 0x00 /* End of LLDPDU */
+ };
+
+ lldp_handler_calls = 0;
+ assert_se(start_lldp(&lldp, e, lldp_handler, NULL) == 0);
+
+ assert_se(write(test_fd[1], frame, sizeof(frame)) == sizeof(frame));
+ sd_event_run(e, 0);
+ assert_se(lldp_handler_calls == 1);
+ assert_se(sd_lldp_get_neighbors(lldp, &neighbors) == 1);
+
+ assert_se(sd_lldp_neighbor_tlv_rewind(neighbors[0]) >= 0);
+ assert_se(sd_lldp_neighbor_tlv_is_type(neighbors[0], SD_LLDP_TYPE_CHASSIS_ID) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) > 0);
+ assert_se(sd_lldp_neighbor_tlv_is_type(neighbors[0], SD_LLDP_TYPE_PORT_ID) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) > 0);
+ assert_se(sd_lldp_neighbor_tlv_is_type(neighbors[0], SD_LLDP_TYPE_TTL) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) > 0);
+ assert_se(sd_lldp_neighbor_tlv_is_oui(neighbors[0], SD_LLDP_OUI_802_1, SD_LLDP_OUI_802_1_SUBTYPE_PORT_VLAN_ID) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) > 0);
+ assert_se(sd_lldp_neighbor_tlv_is_oui(neighbors[0], SD_LLDP_OUI_802_1, SD_LLDP_OUI_802_1_SUBTYPE_PORT_PROTOCOL_VLAN_ID) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) > 0);
+ assert_se(sd_lldp_neighbor_tlv_is_oui(neighbors[0], SD_LLDP_OUI_802_1, SD_LLDP_OUI_802_1_SUBTYPE_VLAN_NAME) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) > 0);
+ assert_se(sd_lldp_neighbor_tlv_is_oui(neighbors[0], SD_LLDP_OUI_802_1, SD_LLDP_OUI_802_1_SUBTYPE_MANAGEMENT_VID) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) > 0);
+ assert_se(sd_lldp_neighbor_tlv_is_oui(neighbors[0], SD_LLDP_OUI_802_1, SD_LLDP_OUI_802_1_SUBTYPE_LINK_AGGREGATION) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) > 0);
+ assert_se(sd_lldp_neighbor_tlv_is_oui(neighbors[0], SD_LLDP_OUI_802_3, SD_LLDP_OUI_802_3_SUBTYPE_POWER_VIA_MDI) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) > 0);
+ assert_se(sd_lldp_neighbor_tlv_is_type(neighbors[0], SD_LLDP_TYPE_END) > 0);
+ assert_se(sd_lldp_neighbor_tlv_next(neighbors[0]) == 0);
+
+ sd_lldp_neighbor_unref(neighbors[0]);
+ free(neighbors);
+
+ assert_se(stop_lldp(lldp) == 0);
+}
+
+static void test_multiple_neighbors_sorted(sd_event *e) {
+
+ static const uint8_t frame1[] = {
+ /* Ethernet header */
+ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03, /* Destination MAC */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, /* Source MAC */
+ 0x88, 0xcc, /* Ethertype */
+ /* LLDP mandatory TLVs */
+ 0x02, 0x04, 0x01, '1', '/', '2', /* Chassis component: "1/2" */
+ 0x04, 0x04, 0x02, '2', '/', '3', /* Port component: "2/3" */
+ 0x06, 0x02, 0x00, 0x78, /* TTL: 120 seconds */
+ 0x00, 0x00 /* End Of LLDPDU */
+ };
+ static const uint8_t frame2[] = {
+ /* Ethernet header */
+ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03, /* Destination MAC */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, /* Source MAC */
+ 0x88, 0xcc, /* Ethertype */
+ /* LLDP mandatory TLVs */
+ 0x02, 0x04, 0x01, '2', '/', '1', /* Chassis component: "2/1" */
+ 0x04, 0x04, 0x02, '1', '/', '3', /* Port component: "1/3" */
+ 0x06, 0x02, 0x00, 0x78, /* TTL: 120 seconds */
+ 0x00, 0x00 /* End Of LLDPDU */
+ };
+ static const uint8_t frame3[] = {
+ /* Ethernet header */
+ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03, /* Destination MAC */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, /* Source MAC */
+ 0x88, 0xcc, /* Ethertype */
+ /* LLDP mandatory TLVs */
+ 0x02, 0x05, 0x01, '2', '/', '1', '0', /* Chassis component: "2/10" */
+ 0x04, 0x04, 0x02, '1', '/', '0', /* Port component: "1/0" */
+ 0x06, 0x02, 0x00, 0x78, /* TTL: 120 seconds */
+ 0x00, 0x00 /* End Of LLDPDU */
+ };
+ static const uint8_t frame4[] = {
+ /* Ethernet header */
+ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03, /* Destination MAC */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, /* Source MAC */
+ 0x88, 0xcc, /* Ethertype */
+ /* LLDP mandatory TLVs */
+ 0x02, 0x05, 0x01, '2', '/', '1', '9', /* Chassis component: "2/19" */
+ 0x04, 0x04, 0x02, '1', '/', '0', /* Port component: "1/0" */
+ 0x06, 0x02, 0x00, 0x78, /* TTL: 120 seconds */
+ 0x00, 0x00 /* End Of LLDPDU */
+ };
+ static const uint8_t frame5[] = {
+ /* Ethernet header */
+ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03, /* Destination MAC */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, /* Source MAC */
+ 0x88, 0xcc, /* Ethertype */
+ /* LLDP mandatory TLVs */
+ 0x02, 0x04, 0x01, '1', '/', '2', /* Chassis component: "1/2" */
+ 0x04, 0x05, 0x02, '2', '/', '1', '0', /* Port component: "2/10" */
+ 0x06, 0x02, 0x00, 0x78, /* TTL: 120 seconds */
+ 0x00, 0x00 /* End Of LLDPDU */
+ };
+ static const uint8_t frame6[] = {
+ /* Ethernet header */
+ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03, /* Destination MAC */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, /* Source MAC */
+ 0x88, 0xcc, /* Ethertype */
+ /* LLDP mandatory TLVs */
+ 0x02, 0x04, 0x01, '1', '/', '2', /* Chassis component: "1/2" */
+ 0x04, 0x05, 0x02, '2', '/', '3', '9', /* Port component: "2/10" */
+ 0x06, 0x02, 0x00, 0x78, /* TTL: 120 seconds */
+ 0x00, 0x00 /* End Of LLDPDU */
+ };
+ static const char* expected[] = {
+ /* ordered pairs of Chassis+Port */
+ "1/2", "2/10",
+ "1/2", "2/3",
+ "1/2", "2/39",
+ "2/1", "1/3",
+ "2/10", "1/0",
+ "2/19", "1/0",
+ };
+
+ sd_lldp *lldp;
+ sd_lldp_neighbor **neighbors;
+ int i;
+ uint8_t type;
+ const void *data;
+ size_t length, expected_length;
+ uint16_t ttl;
+
+ lldp_handler_calls = 0;
+ assert_se(start_lldp(&lldp, e, lldp_handler, NULL) == 0);
+
+ assert_se(write(test_fd[1], frame1, sizeof(frame1)) == sizeof(frame1));
+ sd_event_run(e, 0);
+ assert_se(write(test_fd[1], frame2, sizeof(frame2)) == sizeof(frame2));
+ sd_event_run(e, 0);
+ assert_se(write(test_fd[1], frame3, sizeof(frame3)) == sizeof(frame3));
+ sd_event_run(e, 0);
+ assert_se(write(test_fd[1], frame4, sizeof(frame4)) == sizeof(frame4));
+ sd_event_run(e, 0);
+ assert_se(write(test_fd[1], frame5, sizeof(frame5)) == sizeof(frame5));
+ sd_event_run(e, 0);
+ assert_se(write(test_fd[1], frame6, sizeof(frame6)) == sizeof(frame6));
+ sd_event_run(e, 0);
+ assert_se(lldp_handler_calls == 6);
+
+ assert_se(sd_lldp_get_neighbors(lldp, &neighbors) == 6);
+
+ for (i = 0; i < 6; i++) {
+ assert_se(sd_lldp_neighbor_get_chassis_id(neighbors[i], &type, &data, &length) == 0);
+ assert_se(type == SD_LLDP_CHASSIS_SUBTYPE_CHASSIS_COMPONENT);
+ expected_length = strlen(expected[2 * i]);
+ assert_se(length == expected_length);
+ assert_se(memcmp(data, expected[2 * i], expected_length) == 0);
+
+ assert_se(sd_lldp_neighbor_get_port_id(neighbors[i], &type, &data, &length) == 0);
+ assert_se(type == SD_LLDP_PORT_SUBTYPE_PORT_COMPONENT);
+ expected_length = strlen(expected[2 * i + 1]);
+ assert_se(length == expected_length);
+ assert_se(memcmp(data, expected[2 * i + 1], expected_length) == 0);
+
+ assert_se(sd_lldp_neighbor_get_ttl(neighbors[i], &ttl) == 0);
+ assert_se(ttl == 120);
+ }
+
+ for (i = 0; i < 6; i++)
+ sd_lldp_neighbor_unref(neighbors[i]);
+ free(neighbors);
+
+ assert_se(stop_lldp(lldp) == 0);
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+
+ test_setup_logging(LOG_DEBUG);
+
+ /* LLDP reception tests */
+ assert_se(sd_event_new(&e) == 0);
+ test_receive_basic_packet(e);
+ test_receive_incomplete_packet(e);
+ test_receive_oui_packet(e);
+ test_multiple_neighbors_sorted(e);
+
+ return 0;
+}
diff --git a/src/libsystemd-network/test-ndisc-ra.c b/src/libsystemd-network/test-ndisc-ra.c
new file mode 100644
index 0000000..927e21b
--- /dev/null
+++ b/src/libsystemd-network/test-ndisc-ra.c
@@ -0,0 +1,370 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2017 Intel Corporation. All rights reserved.
+***/
+
+#include <netinet/icmp6.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+
+#include "sd-radv.h"
+
+#include "alloc-util.h"
+#include "hexdecoct.h"
+#include "icmp6-util.h"
+#include "socket-util.h"
+#include "strv.h"
+#include "tests.h"
+
+static struct ether_addr mac_addr = {
+ .ether_addr_octet = { 0x78, 0x2b, 0xcb, 0xb3, 0x6d, 0x53 }
+};
+
+static uint8_t advertisement[] = {
+ /* ICMPv6 Router Advertisement, no checksum */
+ 0x86, 0x00, 0x00, 0x00, 0x40, 0xc0, 0x00, 0xb4,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ /* Source Link Layer Address Option */
+ 0x01, 0x01, 0x78, 0x2b, 0xcb, 0xb3, 0x6d, 0x53,
+ /* Prefix Information Option */
+ 0x03, 0x04, 0x40, 0xc0, 0x00, 0x00, 0x01, 0xf4,
+ 0x00, 0x00, 0x01, 0xb8, 0x00, 0x00, 0x00, 0x00,
+ 0x20, 0x01, 0x0d, 0xb8, 0xde, 0xad, 0xbe, 0xef,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ /* Prefix Information Option */
+ 0x03, 0x04, 0x40, 0xc0, 0x00, 0x27, 0x8d, 0x00,
+ 0x00, 0x09, 0x3a, 0x80, 0x00, 0x00, 0x00, 0x00,
+ 0x20, 0x01, 0x0d, 0xb8, 0x0b, 0x16, 0xd0, 0x0d,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ /* Prefix Information Option */
+ 0x03, 0x04, 0x30, 0xc0, 0x00, 0x27, 0x8d, 0x00,
+ 0x00, 0x09, 0x3a, 0x80, 0x00, 0x00, 0x00, 0x00,
+ 0x20, 0x01, 0x0d, 0xb8, 0xc0, 0x01, 0x0d, 0xad,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ /* Recursive DNS Server Option */
+ 0x19, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c,
+ 0x20, 0x01, 0x0d, 0xb8, 0xde, 0xad, 0xbe, 0xef,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+ /* DNS Search List Option */
+ 0x1f, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c,
+ 0x03, 0x6c, 0x61, 0x62, 0x05, 0x69, 0x6e, 0x74,
+ 0x72, 0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static sd_event_source *test_hangcheck;
+static bool test_stopped;
+static int test_fd[2];
+static sd_event_source *recv_router_advertisement;
+static struct {
+ struct in6_addr address;
+ unsigned char prefixlen;
+ uint32_t valid;
+ uint32_t preferred;
+ bool successful;
+} prefix[] = {
+ { { { { 0x20, 0x01, 0x0d, 0xb8, 0xde, 0xad, 0xbe, 0xef,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } } }, 64,
+ 500, 440, true },
+ { { { { 0x20, 0x01, 0x0d, 0xb8, 0x0b, 0x16, 0xd0, 0x0d,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } } }, 64,
+ /* indicate default valid and preferred lifetimes for the test code */
+ 0, 0, true },
+ { { { { 0x20, 0x01, 0x0d, 0xb8, 0x0b, 0x16, 0xd0, 0x0d,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } } }, 58,
+ 0, 0,
+ /* indicate that this prefix already exists */
+ false },
+ { { { { 0x20, 0x01, 0x0d, 0xb8, 0x0b, 0x16, 0xd0, 0x0d,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } } }, 120,
+ 0, 0,
+ /* indicate that this prefix already exists */
+ false },
+ { { { { 0x20, 0x01, 0x0d, 0xb8, 0x0b, 0x16, 0xd0, 0x0d,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } } }, 12,
+ 0, 0,
+ /* indicate that this prefix already exists */
+ false },
+ { { { { 0x20, 0x01, 0x0d, 0xb8, 0xc0, 0x01, 0x0d, 0xad,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } } }, 48,
+ 0, 0, true },
+ { { { { 0x20, 0x01, 0x0d, 0xb8, 0xc0, 0x01, 0x0d, 0xad,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } } }, 60,
+ 0, 0,
+ /* indicate that this prefix already exists */
+ false },
+};
+
+static const struct in6_addr test_rdnss = { { { 0x20, 0x01, 0x0d, 0xb8,
+ 0xde, 0xad, 0xbe, 0xef,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x01 } } };
+static const char *test_dnssl[] = { "lab.intra",
+ NULL };
+
+static int test_rs_hangcheck(sd_event_source *s, uint64_t usec,
+ void *userdata) {
+ assert_se(false);
+
+ return 0;
+}
+
+static void test_radv_prefix(void) {
+ sd_radv_prefix *p;
+
+ printf("* %s\n", __FUNCTION__);
+
+ assert_se(sd_radv_prefix_new(&p) >= 0);
+
+ assert_se(sd_radv_prefix_set_onlink(NULL, true) < 0);
+ assert_se(sd_radv_prefix_set_onlink(p, true) >= 0);
+ assert_se(sd_radv_prefix_set_onlink(p, false) >= 0);
+
+ assert_se(sd_radv_prefix_set_address_autoconfiguration(NULL, true) < 0);
+ assert_se(sd_radv_prefix_set_address_autoconfiguration(p, true) >= 0);
+ assert_se(sd_radv_prefix_set_address_autoconfiguration(p, false) >= 0);
+
+ assert_se(sd_radv_prefix_set_valid_lifetime(NULL, true) < 0);
+ assert_se(sd_radv_prefix_set_valid_lifetime(p, ~0) >= 0);
+ assert_se(sd_radv_prefix_set_valid_lifetime(p, 42) >= 0);
+ assert_se(sd_radv_prefix_set_valid_lifetime(p, 0) >= 0);
+
+ assert_se(sd_radv_prefix_set_preferred_lifetime(NULL, true) < 0);
+ assert_se(sd_radv_prefix_set_preferred_lifetime(p, ~0) >= 0);
+ assert_se(sd_radv_prefix_set_preferred_lifetime(p, 42) >= 0);
+ assert_se(sd_radv_prefix_set_preferred_lifetime(p, 0) >= 0);
+
+ assert_se(sd_radv_prefix_set_prefix(NULL, NULL, 0) < 0);
+ assert_se(sd_radv_prefix_set_prefix(p, NULL, 0) < 0);
+
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[0].address, 64) >= 0);
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[0].address, 0) < 0);
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[0].address, 1) < 0);
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[0].address, 2) < 0);
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[0].address, 3) >= 0);
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[0].address, 125) >= 0);
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[0].address, 128) >= 0);
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[0].address, 129) < 0);
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[0].address, 255) < 0);
+
+ p = sd_radv_prefix_unref(p);
+ assert_se(!p);
+}
+
+static void test_radv(void) {
+ sd_radv *ra;
+
+ printf("* %s\n", __FUNCTION__);
+
+ assert_se(sd_radv_new(&ra) >= 0);
+ assert_se(ra);
+
+ assert_se(sd_radv_set_ifindex(NULL, 0) < 0);
+ assert_se(sd_radv_set_ifindex(ra, 0) < 0);
+ assert_se(sd_radv_set_ifindex(ra, -1) < 0);
+ assert_se(sd_radv_set_ifindex(ra, -2) < 0);
+ assert_se(sd_radv_set_ifindex(ra, 42) >= 0);
+
+ assert_se(sd_radv_set_mac(NULL, NULL) < 0);
+ assert_se(sd_radv_set_mac(ra, NULL) >= 0);
+ assert_se(sd_radv_set_mac(ra, &mac_addr) >= 0);
+
+ assert_se(sd_radv_set_mtu(NULL, 0) < 0);
+ assert_se(sd_radv_set_mtu(ra, 0) < 0);
+ assert_se(sd_radv_set_mtu(ra, 1279) < 0);
+ assert_se(sd_radv_set_mtu(ra, 1280) >= 0);
+ assert_se(sd_radv_set_mtu(ra, ~0) >= 0);
+
+ assert_se(sd_radv_set_hop_limit(NULL, 0) < 0);
+ assert_se(sd_radv_set_hop_limit(ra, 0) >= 0);
+ assert_se(sd_radv_set_hop_limit(ra, ~0) >= 0);
+
+ assert_se(sd_radv_set_router_lifetime(NULL, 0) < 0);
+ assert_se(sd_radv_set_router_lifetime(ra, 0) >= 0);
+ assert_se(sd_radv_set_router_lifetime(ra, ~0) >= 0);
+
+ assert_se(sd_radv_set_preference(NULL, 0) < 0);
+ assert_se(sd_radv_set_preference(ra, SD_NDISC_PREFERENCE_LOW) >= 0);
+ assert_se(sd_radv_set_preference(ra, SD_NDISC_PREFERENCE_MEDIUM) >= 0);
+ assert_se(sd_radv_set_preference(ra, SD_NDISC_PREFERENCE_HIGH) >= 0);
+ assert_se(sd_radv_set_preference(ra, ~0) < 0);
+
+ assert_se(sd_radv_set_preference(ra, SD_NDISC_PREFERENCE_HIGH) >= 0);
+ assert_se(sd_radv_set_router_lifetime(ra, 42000) >= 0);
+ assert_se(sd_radv_set_router_lifetime(ra, 0) < 0);
+ assert_se(sd_radv_set_preference(ra, SD_NDISC_PREFERENCE_MEDIUM) >= 0);
+ assert_se(sd_radv_set_router_lifetime(ra, 0) >= 0);
+
+ assert_se(sd_radv_set_managed_information(NULL, true) < 0);
+ assert_se(sd_radv_set_managed_information(ra, true) >= 0);
+ assert_se(sd_radv_set_managed_information(ra, false) >= 0);
+
+ assert_se(sd_radv_set_other_information(NULL, true) < 0);
+ assert_se(sd_radv_set_other_information(ra, true) >= 0);
+ assert_se(sd_radv_set_other_information(ra, false) >= 0);
+
+ assert_se(sd_radv_set_rdnss(NULL, 0, NULL, 0) < 0);
+ assert_se(sd_radv_set_rdnss(ra, 0, NULL, 0) >= 0);
+ assert_se(sd_radv_set_rdnss(ra, 0, NULL, 128) < 0);
+ assert_se(sd_radv_set_rdnss(ra, 600, &test_rdnss, 0) >= 0);
+ assert_se(sd_radv_set_rdnss(ra, 600, &test_rdnss, 1) >= 0);
+ assert_se(sd_radv_set_rdnss(ra, 0, &test_rdnss, 1) >= 0);
+ assert_se(sd_radv_set_rdnss(ra, 0, NULL, 0) >= 0);
+
+ assert_se(sd_radv_set_dnssl(ra, 0, NULL) >= 0);
+ assert_se(sd_radv_set_dnssl(ra, 600, NULL) >= 0);
+ assert_se(sd_radv_set_dnssl(ra, 0, (char **)test_dnssl) >= 0);
+ assert_se(sd_radv_set_dnssl(ra, 600, (char **)test_dnssl) >= 0);
+
+ ra = sd_radv_unref(ra);
+ assert_se(!ra);
+}
+
+int icmp6_bind_router_solicitation(int ifindex) {
+ return -ENOSYS;
+}
+
+int icmp6_bind_router_advertisement(int ifindex) {
+ assert_se(ifindex == 42);
+
+ return test_fd[1];
+}
+
+int icmp6_send_router_solicitation(int s, const struct ether_addr *ether_addr) {
+
+ return 0;
+}
+
+int icmp6_receive(int fd, void *iov_base, size_t iov_len,
+ struct in6_addr *dst, triple_timestamp *timestamp) {
+ assert_se(read (fd, iov_base, iov_len) == (ssize_t)iov_len);
+
+ if (timestamp)
+ triple_timestamp_get(timestamp);
+
+ return 0;
+}
+
+static int radv_recv(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ sd_radv *ra = userdata;
+ unsigned char buf[168];
+ size_t i;
+
+ assert_se(read(test_fd[0], &buf, sizeof(buf)) == sizeof(buf));
+
+ /* router lifetime must be zero when test is stopped */
+ if (test_stopped) {
+ advertisement[6] = 0x00;
+ advertisement[7] = 0x00;
+ }
+
+ printf ("Received Router Advertisement with lifetime %u\n",
+ (advertisement[6] << 8) + advertisement[7]);
+
+ /* test only up to buf size, rest is not yet implemented */
+ for (i = 0; i < sizeof(buf); i++) {
+ if (!(i % 8))
+ printf("%3zd: ", i);
+
+ printf("0x%02x", buf[i]);
+
+ assert_se(buf[i] == advertisement[i]);
+
+ if ((i + 1) % 8)
+ printf(", ");
+ else
+ printf("\n");
+ }
+
+ if (test_stopped) {
+ sd_event *e;
+
+ e = sd_radv_get_event(ra);
+ sd_event_exit(e, 0);
+
+ return 0;
+ }
+
+ assert_se(sd_radv_stop(ra) >= 0);
+ test_stopped = true;
+
+ return 0;
+}
+
+static void test_ra(void) {
+ sd_event *e;
+ sd_radv *ra;
+ unsigned i;
+
+ printf("* %s\n", __FUNCTION__);
+
+ assert_se(socketpair(AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, test_fd) >= 0);
+
+ assert_se(sd_event_new(&e) >= 0);
+
+ assert_se(sd_radv_new(&ra) >= 0);
+ assert_se(ra);
+
+ assert_se(sd_radv_attach_event(ra, e, 0) >= 0);
+
+ assert_se(sd_radv_set_ifindex(ra, 42) >= 0);
+ assert_se(sd_radv_set_mac(ra, &mac_addr) >= 0);
+ assert_se(sd_radv_set_router_lifetime(ra, 180) >= 0);
+ assert_se(sd_radv_set_hop_limit(ra, 64) >= 0);
+ assert_se(sd_radv_set_managed_information(ra, true) >= 0);
+ assert_se(sd_radv_set_other_information(ra, true) >= 0);
+ assert_se(sd_radv_set_rdnss(ra, 60, &test_rdnss, 1) >= 0);
+ assert_se(sd_radv_set_dnssl(ra, 60, (char **)test_dnssl) >= 0);
+
+ for (i = 0; i < ELEMENTSOF(prefix); i++) {
+ sd_radv_prefix *p;
+
+ printf("Test prefix %u\n", i);
+ assert_se(sd_radv_prefix_new(&p) >= 0);
+
+ assert_se(sd_radv_prefix_set_prefix(p, &prefix[i].address,
+ prefix[i].prefixlen) >= 0);
+ if (prefix[i].valid)
+ assert_se(sd_radv_prefix_set_valid_lifetime(p, prefix[i].valid) >= 0);
+ if (prefix[i].preferred)
+ assert_se(sd_radv_prefix_set_preferred_lifetime(p, prefix[i].preferred) >= 0);
+
+ assert_se((sd_radv_add_prefix(ra, p, false) >= 0) == prefix[i].successful);
+ assert_se(sd_radv_add_prefix(ra, p, false) < 0);
+
+ p = sd_radv_prefix_unref(p);
+ assert_se(!p);
+ }
+
+ assert_se(sd_event_add_io(e, &recv_router_advertisement, test_fd[0],
+ EPOLLIN, radv_recv, ra) >= 0);
+
+ assert_se(sd_event_add_time_relative(
+ e, &test_hangcheck, clock_boottime_or_monotonic(),
+ 2 *USEC_PER_SEC, 0,
+ test_rs_hangcheck, NULL) >= 0);
+
+ assert_se(sd_radv_start(ra) >= 0);
+
+ sd_event_loop(e);
+
+ test_hangcheck = sd_event_source_unref(test_hangcheck);
+
+ ra = sd_radv_unref(ra);
+ assert_se(!ra);
+
+ close(test_fd[0]);
+
+ sd_event_unref(e);
+}
+
+int main(int argc, char *argv[]) {
+
+ test_setup_logging(LOG_DEBUG);
+
+ test_radv_prefix();
+ test_radv();
+ test_ra();
+
+ printf("* done\n");
+ return 0;
+}
diff --git a/src/libsystemd-network/test-ndisc-rs.c b/src/libsystemd-network/test-ndisc-rs.c
new file mode 100644
index 0000000..1b2bba8
--- /dev/null
+++ b/src/libsystemd-network/test-ndisc-rs.c
@@ -0,0 +1,418 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <netinet/icmp6.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+
+#include "sd-ndisc.h"
+
+#include "alloc-util.h"
+#include "hexdecoct.h"
+#include "icmp6-util.h"
+#include "socket-util.h"
+#include "strv.h"
+#include "ndisc-internal.h"
+#include "tests.h"
+
+static struct ether_addr mac_addr = {
+ .ether_addr_octet = {'A', 'B', 'C', '1', '2', '3'}
+};
+
+static bool verbose = false;
+static sd_event_source *test_hangcheck;
+static int test_fd[2];
+static sd_ndisc *test_timeout_nd;
+
+typedef int (*send_ra_t)(uint8_t flags);
+static send_ra_t send_ra_function;
+
+static void router_dump(sd_ndisc_router *rt) {
+ struct in6_addr addr;
+ char buf[FORMAT_TIMESTAMP_MAX];
+ uint8_t hop_limit;
+ uint64_t t, flags;
+ uint32_t mtu;
+ uint16_t lifetime;
+ unsigned preference;
+ int r;
+
+ assert_se(rt);
+
+ log_info("--");
+ assert_se(sd_ndisc_router_get_address(rt, &addr) == -ENODATA);
+
+ assert_se(sd_ndisc_router_get_timestamp(rt, CLOCK_REALTIME, &t) >= 0);
+ log_info("Timestamp: %s", format_timestamp(buf, sizeof(buf), t));
+
+ assert_se(sd_ndisc_router_get_timestamp(rt, CLOCK_MONOTONIC, &t) >= 0);
+ log_info("Monotonic: %" PRIu64, t);
+
+ if (sd_ndisc_router_get_hop_limit(rt, &hop_limit) < 0)
+ log_info("No hop limit set");
+ else
+ log_info("Hop limit: %u", hop_limit);
+
+ assert_se(sd_ndisc_router_get_flags(rt, &flags) >= 0);
+ log_info("Flags: <%s|%s>",
+ flags & ND_RA_FLAG_OTHER ? "OTHER" : "",
+ flags & ND_RA_FLAG_MANAGED ? "MANAGED" : "");
+
+ assert_se(sd_ndisc_router_get_preference(rt, &preference) >= 0);
+ log_info("Preference: %s",
+ preference == SD_NDISC_PREFERENCE_LOW ? "low" :
+ preference == SD_NDISC_PREFERENCE_HIGH ? "high" : "medium");
+
+ assert_se(sd_ndisc_router_get_lifetime(rt, &lifetime) >= 0);
+ log_info("Lifetime: %" PRIu16, lifetime);
+
+ if (sd_ndisc_router_get_mtu(rt, &mtu) < 0)
+ log_info("No MTU set");
+ else
+ log_info("MTU: %" PRIu32, mtu);
+
+ r = sd_ndisc_router_option_rewind(rt);
+ for (;;) {
+ uint8_t type;
+
+ assert_se(r >= 0);
+
+ if (r == 0)
+ break;
+
+ assert_se(sd_ndisc_router_option_get_type(rt, &type) >= 0);
+
+ log_info(">> Option %u", type);
+
+ switch (type) {
+
+ case SD_NDISC_OPTION_SOURCE_LL_ADDRESS:
+ case SD_NDISC_OPTION_TARGET_LL_ADDRESS: {
+ _cleanup_free_ char *c = NULL;
+ const void *p;
+ size_t n;
+
+ assert_se(sd_ndisc_router_option_get_raw(rt, &p, &n) >= 0);
+ assert_se(n > 2);
+ assert_se(c = hexmem((uint8_t*) p + 2, n - 2));
+
+ log_info("Address: %s", c);
+ break;
+ }
+
+ case SD_NDISC_OPTION_PREFIX_INFORMATION: {
+ uint32_t lifetime_valid, lifetime_preferred;
+ unsigned prefix_len;
+ uint8_t pfl;
+ struct in6_addr a;
+ char buff[INET6_ADDRSTRLEN];
+
+ assert_se(sd_ndisc_router_prefix_get_valid_lifetime(rt, &lifetime_valid) >= 0);
+ log_info("Valid Lifetime: %" PRIu32, lifetime_valid);
+
+ assert_se(sd_ndisc_router_prefix_get_preferred_lifetime(rt, &lifetime_preferred) >= 0);
+ log_info("Preferred Lifetime: %" PRIu32, lifetime_preferred);
+
+ assert_se(sd_ndisc_router_prefix_get_flags(rt, &pfl) >= 0);
+ log_info("Flags: <%s|%s>",
+ pfl & ND_OPT_PI_FLAG_ONLINK ? "ONLINK" : "",
+ pfl & ND_OPT_PI_FLAG_AUTO ? "AUTO" : "");
+
+ assert_se(sd_ndisc_router_prefix_get_prefixlen(rt, &prefix_len) >= 0);
+ log_info("Prefix Length: %u", prefix_len);
+
+ assert_se(sd_ndisc_router_prefix_get_address(rt, &a) >= 0);
+ log_info("Prefix: %s", inet_ntop(AF_INET6, &a, buff, sizeof(buff)));
+
+ break;
+ }
+
+ case SD_NDISC_OPTION_RDNSS: {
+ const struct in6_addr *a;
+ uint32_t lt;
+ int n, i;
+
+ n = sd_ndisc_router_rdnss_get_addresses(rt, &a);
+ assert_se(n > 0);
+
+ for (i = 0; i < n; i++) {
+ char buff[INET6_ADDRSTRLEN];
+ log_info("DNS: %s", inet_ntop(AF_INET6, a + i, buff, sizeof(buff)));
+ }
+
+ assert_se(sd_ndisc_router_rdnss_get_lifetime(rt, &lt) >= 0);
+ log_info("Lifetime: %" PRIu32, lt);
+ break;
+ }
+
+ case SD_NDISC_OPTION_DNSSL: {
+ _cleanup_strv_free_ char **l = NULL;
+ uint32_t lt;
+ int n, i;
+
+ n = sd_ndisc_router_dnssl_get_domains(rt, &l);
+ assert_se(n > 0);
+
+ for (i = 0; i < n; i++)
+ log_info("Domain: %s", l[i]);
+
+ assert_se(sd_ndisc_router_dnssl_get_lifetime(rt, &lt) >= 0);
+ log_info("Lifetime: %" PRIu32, lt);
+ break;
+ }}
+
+ r = sd_ndisc_router_option_next(rt);
+ }
+}
+
+static int test_rs_hangcheck(sd_event_source *s, uint64_t usec,
+ void *userdata) {
+ assert_se(false);
+
+ return 0;
+}
+
+int icmp6_bind_router_solicitation(int ifindex) {
+ assert_se(ifindex == 42);
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, test_fd) < 0)
+ return -errno;
+
+ return test_fd[0];
+}
+
+int icmp6_bind_router_advertisement(int ifindex) {
+ return -ENOSYS;
+}
+
+int icmp6_receive(int fd, void *iov_base, size_t iov_len,
+ struct in6_addr *dst, triple_timestamp *timestamp) {
+ assert_se(read (fd, iov_base, iov_len) == (ssize_t)iov_len);
+
+ if (timestamp)
+ triple_timestamp_get(timestamp);
+
+ return 0;
+}
+
+static int send_ra(uint8_t flags) {
+ uint8_t advertisement[] = {
+ 0x86, 0x00, 0xde, 0x83, 0x40, 0xc0, 0x00, 0xb4,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x03, 0x04, 0x40, 0xc0, 0x00, 0x00, 0x01, 0xf4,
+ 0x00, 0x00, 0x01, 0xb8, 0x00, 0x00, 0x00, 0x00,
+ 0x20, 0x01, 0x0d, 0xb8, 0xde, 0xad, 0xbe, 0xef,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x19, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c,
+ 0x20, 0x01, 0x0d, 0xb8, 0xde, 0xad, 0xbe, 0xef,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+ 0x1f, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c,
+ 0x03, 0x6c, 0x61, 0x62, 0x05, 0x69, 0x6e, 0x74,
+ 0x72, 0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x01, 0x78, 0x2b, 0xcb, 0xb3, 0x6d, 0x53,
+ };
+
+ advertisement[5] = flags;
+
+ assert_se(write(test_fd[1], advertisement, sizeof(advertisement)) ==
+ sizeof(advertisement));
+
+ if (verbose)
+ printf(" sent RA with flag 0x%02x\n", flags);
+
+ return 0;
+}
+
+int icmp6_send_router_solicitation(int s, const struct ether_addr *ether_addr) {
+ if (!send_ra_function)
+ return 0;
+
+ return send_ra_function(0);
+}
+
+static void test_callback(sd_ndisc *nd, sd_ndisc_event event, sd_ndisc_router *rt, void *userdata) {
+ sd_event *e = userdata;
+ static unsigned idx = 0;
+ uint64_t flags_array[] = {
+ 0,
+ 0,
+ 0,
+ ND_RA_FLAG_OTHER,
+ ND_RA_FLAG_MANAGED
+ };
+ uint64_t flags;
+ uint32_t mtu;
+
+ assert_se(nd);
+
+ if (event != SD_NDISC_EVENT_ROUTER)
+ return;
+
+ router_dump(rt);
+
+ assert_se(sd_ndisc_router_get_flags(rt, &flags) >= 0);
+ assert_se(flags == flags_array[idx]);
+ idx++;
+
+ if (verbose)
+ printf(" got event 0x%02" PRIx64 "\n", flags);
+
+ if (idx < ELEMENTSOF(flags_array)) {
+ send_ra(flags_array[idx]);
+ return;
+ }
+
+ assert_se(sd_ndisc_get_mtu(nd, &mtu) == -ENODATA);
+
+ sd_event_exit(e, 0);
+}
+
+static void test_rs(void) {
+ sd_event *e;
+ sd_ndisc *nd;
+
+ if (verbose)
+ printf("* %s\n", __FUNCTION__);
+
+ send_ra_function = send_ra;
+
+ assert_se(sd_event_new(&e) >= 0);
+
+ assert_se(sd_ndisc_new(&nd) >= 0);
+ assert_se(nd);
+
+ assert_se(sd_ndisc_attach_event(nd, e, 0) >= 0);
+
+ assert_se(sd_ndisc_set_ifindex(nd, 42) >= 0);
+ assert_se(sd_ndisc_set_mac(nd, &mac_addr) >= 0);
+ assert_se(sd_ndisc_set_callback(nd, test_callback, e) >= 0);
+
+ assert_se(sd_event_add_time_relative(
+ e, &test_hangcheck, clock_boottime_or_monotonic(),
+ 30 * USEC_PER_SEC, 0,
+ test_rs_hangcheck, NULL) >= 0);
+
+ assert_se(sd_ndisc_stop(nd) >= 0);
+ assert_se(sd_ndisc_start(nd) >= 0);
+ assert_se(sd_ndisc_start(nd) >= 0);
+ assert_se(sd_ndisc_stop(nd) >= 0);
+
+ assert_se(sd_ndisc_start(nd) >= 0);
+
+ sd_event_loop(e);
+
+ test_hangcheck = sd_event_source_unref(test_hangcheck);
+
+ nd = sd_ndisc_unref(nd);
+ assert_se(!nd);
+
+ close(test_fd[1]);
+
+ sd_event_unref(e);
+}
+
+static int test_timeout_value(uint8_t flags) {
+ static int count = 0;
+ static usec_t last = 0;
+ sd_ndisc *nd = test_timeout_nd;
+ usec_t min, max;
+ char time_string_min[FORMAT_TIMESPAN_MAX];
+ char time_string_nd[FORMAT_TIMESPAN_MAX];
+ char time_string_max[FORMAT_TIMESPAN_MAX];
+
+ assert_se(nd);
+ assert_se(nd->event);
+
+ if (++count >= 20)
+ sd_event_exit(nd->event, 0);
+
+ if (last == 0) {
+ /* initial RT = IRT + RAND*IRT */
+ min = NDISC_ROUTER_SOLICITATION_INTERVAL -
+ NDISC_ROUTER_SOLICITATION_INTERVAL / 10;
+ max = NDISC_ROUTER_SOLICITATION_INTERVAL +
+ NDISC_ROUTER_SOLICITATION_INTERVAL / 10;
+ } else {
+ /* next RT = 2*RTprev + RAND*RTprev */
+ min = 2 * last - last / 10;
+ max = 2 * last + last / 10;
+ }
+
+ /* final RT > MRT */
+ if (last * 2 > NDISC_MAX_ROUTER_SOLICITATION_INTERVAL) {
+ min = NDISC_MAX_ROUTER_SOLICITATION_INTERVAL -
+ NDISC_MAX_ROUTER_SOLICITATION_INTERVAL / 10;
+ max = NDISC_MAX_ROUTER_SOLICITATION_INTERVAL +
+ NDISC_MAX_ROUTER_SOLICITATION_INTERVAL / 10;
+ }
+
+ format_timespan(time_string_min, FORMAT_TIMESPAN_MAX,
+ min, USEC_PER_MSEC);
+ format_timespan(time_string_nd, FORMAT_TIMESPAN_MAX,
+ nd->retransmit_time, USEC_PER_MSEC);
+ format_timespan(time_string_max, FORMAT_TIMESPAN_MAX,
+ max, USEC_PER_MSEC);
+
+ log_info("backoff timeout interval %2d %s%s <= %s <= %s",
+ count,
+ (last * 2 > NDISC_MAX_ROUTER_SOLICITATION_INTERVAL)? "(max) ": "",
+ time_string_min, time_string_nd, time_string_max);
+
+ assert_se(min <= nd->retransmit_time);
+ assert_se(max >= nd->retransmit_time);
+
+ last = nd->retransmit_time;
+
+ assert_se(sd_event_source_set_time(nd->timeout_event_source, 0) >= 0);
+
+ return 0;
+}
+
+static void test_timeout(void) {
+ sd_event *e;
+ sd_ndisc *nd;
+
+ if (verbose)
+ printf("* %s\n", __FUNCTION__);
+
+ send_ra_function = test_timeout_value;
+
+ assert_se(sd_event_new(&e) >= 0);
+
+ assert_se(sd_ndisc_new(&nd) >= 0);
+ assert_se(nd);
+
+ test_timeout_nd = nd;
+
+ assert_se(sd_ndisc_attach_event(nd, e, 0) >= 0);
+
+ assert_se(sd_ndisc_set_ifindex(nd, 42) >= 0);
+ assert_se(sd_ndisc_set_mac(nd, &mac_addr) >= 0);
+
+ assert_se(sd_event_add_time_relative(
+ e, &test_hangcheck, clock_boottime_or_monotonic(),
+ 30 * USEC_PER_SEC, 0,
+ test_rs_hangcheck, NULL) >= 0);
+
+ assert_se(sd_ndisc_start(nd) >= 0);
+
+ sd_event_loop(e);
+
+ test_hangcheck = sd_event_source_unref(test_hangcheck);
+
+ nd = sd_ndisc_unref(nd);
+
+ sd_event_unref(e);
+}
+
+int main(int argc, char *argv[]) {
+
+ test_setup_logging(LOG_DEBUG);
+
+ test_rs();
+ test_timeout();
+
+ return 0;
+}
diff --git a/src/libsystemd-network/test-sd-dhcp-lease.c b/src/libsystemd-network/test-sd-dhcp-lease.c
new file mode 100644
index 0000000..9f13226
--- /dev/null
+++ b/src/libsystemd-network/test-sd-dhcp-lease.c
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "dhcp-lease-internal.h"
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+
+/* According to RFC1035 section 4.1.4, a domain name in a message can be either:
+ * - a sequence of labels ending in a zero octet
+ * - a pointer
+ * - a sequence of labels ending with a pointer
+ */
+static void test_dhcp_lease_parse_search_domains_basic(void) {
+ int r;
+ _cleanup_strv_free_ char **domains = NULL;
+ static const uint8_t optionbuf[] = {
+ 0x03, 'F', 'O', 'O', 0x03, 'B', 'A', 'R', 0x00,
+ 0x04, 'A', 'B', 'C', 'D', 0x03, 'E', 'F', 'G', 0x00,
+ };
+
+ r = dhcp_lease_parse_search_domains(optionbuf, sizeof(optionbuf), &domains);
+ assert_se(r == 2);
+ assert_se(streq(domains[0], "FOO.BAR"));
+ assert_se(streq(domains[1], "ABCD.EFG"));
+}
+
+static void test_dhcp_lease_parse_search_domains_ptr(void) {
+ int r;
+ _cleanup_strv_free_ char **domains = NULL;
+ static const uint8_t optionbuf[] = {
+ 0x03, 'F', 'O', 'O', 0x00, 0xC0, 0x00,
+ };
+
+ r = dhcp_lease_parse_search_domains(optionbuf, sizeof(optionbuf), &domains);
+ assert_se(r == 2);
+ assert_se(streq(domains[0], "FOO"));
+ assert_se(streq(domains[1], "FOO"));
+}
+
+static void test_dhcp_lease_parse_search_domains_labels_and_ptr(void) {
+ int r;
+ _cleanup_strv_free_ char **domains = NULL;
+ static const uint8_t optionbuf[] = {
+ 0x03, 'F', 'O', 'O', 0x03, 'B', 'A', 'R', 0x00,
+ 0x03, 'A', 'B', 'C', 0xC0, 0x04,
+ };
+
+ r = dhcp_lease_parse_search_domains(optionbuf, sizeof(optionbuf), &domains);
+ assert_se(r == 2);
+ assert_se(streq(domains[0], "FOO.BAR"));
+ assert_se(streq(domains[1], "ABC.BAR"));
+}
+
+/* Tests for exceptions. */
+
+static void test_dhcp_lease_parse_search_domains_no_data(void) {
+ _cleanup_strv_free_ char **domains = NULL;
+ static const uint8_t optionbuf[3] = {0, 0, 0};
+
+ assert_se(dhcp_lease_parse_search_domains(NULL, 0, &domains) == -ENODATA);
+ assert_se(dhcp_lease_parse_search_domains(optionbuf, 0, &domains) == -ENODATA);
+}
+
+static void test_dhcp_lease_parse_search_domains_loops(void) {
+ _cleanup_strv_free_ char **domains = NULL;
+ static const uint8_t optionbuf[] = {
+ 0x03, 'F', 'O', 'O', 0x00, 0x03, 'B', 'A', 'R', 0xC0, 0x06,
+ };
+
+ assert_se(dhcp_lease_parse_search_domains(optionbuf, sizeof(optionbuf), &domains) == -EBADMSG);
+}
+
+static void test_dhcp_lease_parse_search_domains_wrong_len(void) {
+ _cleanup_strv_free_ char **domains = NULL;
+ static const uint8_t optionbuf[] = {
+ 0x03, 'F', 'O', 'O', 0x03, 'B', 'A', 'R', 0x00,
+ 0x04, 'A', 'B', 'C', 'D', 0x03, 'E', 'F', 'G', 0x00,
+ };
+
+ assert_se(dhcp_lease_parse_search_domains(optionbuf, sizeof(optionbuf) - 5, &domains) == -EBADMSG);
+}
+
+int main(int argc, char *argv[]) {
+ test_dhcp_lease_parse_search_domains_basic();
+ test_dhcp_lease_parse_search_domains_ptr();
+ test_dhcp_lease_parse_search_domains_labels_and_ptr();
+ test_dhcp_lease_parse_search_domains_no_data();
+ test_dhcp_lease_parse_search_domains_loops();
+ test_dhcp_lease_parse_search_domains_wrong_len();
+ return 0;
+}
diff --git a/src/libsystemd/disable-mempool.c b/src/libsystemd/disable-mempool.c
new file mode 100644
index 0000000..1baf91f
--- /dev/null
+++ b/src/libsystemd/disable-mempool.c
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "mempool.h"
+
+const bool mempool_use_allowed = false;
diff --git a/src/libsystemd/libsystemd.pc.in b/src/libsystemd/libsystemd.pc.in
new file mode 100644
index 0000000..74f33b7
--- /dev/null
+++ b/src/libsystemd/libsystemd.pc.in
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@rootlibdir@
+includedir=@includedir@
+
+Name: systemd
+Description: systemd Library
+URL: @PROJECT_URL@
+Version: @PROJECT_VERSION@
+Libs: -L${libdir} -lsystemd
+Cflags: -I${includedir}
diff --git a/src/libsystemd/libsystemd.sym b/src/libsystemd/libsystemd.sym
new file mode 100644
index 0000000..f83b364
--- /dev/null
+++ b/src/libsystemd/libsystemd.sym
@@ -0,0 +1,738 @@
+/***
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+***/
+
+LIBSYSTEMD_209 {
+global:
+ /* sd-journal */
+ sd_journal_print;
+ sd_journal_printv;
+ sd_journal_send;
+ sd_journal_sendv;
+ sd_journal_stream_fd;
+ sd_journal_open;
+ sd_journal_close;
+ sd_journal_previous;
+ sd_journal_next;
+ sd_journal_previous_skip;
+ sd_journal_next_skip;
+ sd_journal_get_realtime_usec;
+ sd_journal_get_monotonic_usec;
+ sd_journal_get_data;
+ sd_journal_enumerate_data;
+ sd_journal_restart_data;
+ sd_journal_add_match;
+ sd_journal_flush_matches;
+ sd_journal_seek_head;
+ sd_journal_seek_tail;
+ sd_journal_seek_monotonic_usec;
+ sd_journal_seek_realtime_usec;
+ sd_journal_seek_cursor;
+ sd_journal_get_cursor;
+ sd_journal_get_fd;
+ sd_journal_process;
+ sd_journal_print_with_location;
+ sd_journal_printv_with_location;
+ sd_journal_send_with_location;
+ sd_journal_sendv_with_location;
+ sd_journal_get_cutoff_realtime_usec;
+ sd_journal_get_cutoff_monotonic_usec;
+ sd_journal_wait;
+ sd_journal_open_directory;
+ sd_journal_add_disjunction;
+ sd_journal_perror;
+ sd_journal_perror_with_location;
+ sd_journal_get_usage;
+ sd_journal_test_cursor;
+ sd_journal_query_unique;
+ sd_journal_enumerate_unique;
+ sd_journal_restart_unique;
+ sd_journal_get_catalog;
+ sd_journal_get_catalog_for_message_id;
+ sd_journal_set_data_threshold;
+ sd_journal_get_data_threshold;
+ sd_journal_reliable_fd;
+ sd_journal_get_events;
+ sd_journal_get_timeout;
+ sd_journal_add_conjunction;
+ sd_journal_open_files;
+ sd_journal_open_container;
+
+ /* sd-daemon */
+ sd_booted;
+ sd_is_fifo;
+ sd_is_mq;
+ sd_is_socket;
+ sd_is_socket_inet;
+ sd_is_socket_unix;
+ sd_is_special;
+ sd_listen_fds;
+ sd_notify;
+ sd_notifyf;
+ sd_watchdog_enabled;
+
+ /* sd-id128 */
+ sd_id128_to_string;
+ sd_id128_from_string;
+ sd_id128_randomize;
+ sd_id128_get_machine;
+ sd_id128_get_boot;
+
+ /* sd-login */
+ sd_get_seats;
+ sd_get_sessions;
+ sd_get_uids;
+ sd_login_monitor_flush;
+ sd_login_monitor_get_fd;
+ sd_login_monitor_new;
+ sd_login_monitor_unref;
+ sd_pid_get_owner_uid;
+ sd_pid_get_session;
+ sd_seat_can_multi_session;
+ sd_seat_get_active;
+ sd_seat_get_sessions;
+ sd_session_get_seat;
+ sd_session_get_uid;
+ sd_session_is_active;
+ sd_uid_get_seats;
+ sd_uid_get_sessions;
+ sd_uid_get_state;
+ sd_uid_is_on_seat;
+ sd_pid_get_unit;
+ sd_session_get_service;
+ sd_session_get_type;
+ sd_session_get_class;
+ sd_session_get_display;
+ sd_session_get_state;
+ sd_seat_can_tty;
+ sd_seat_can_graphical;
+ sd_session_get_tty;
+ sd_login_monitor_get_events;
+ sd_login_monitor_get_timeout;
+ sd_pid_get_user_unit;
+ sd_pid_get_machine_name;
+ sd_get_machine_names;
+ sd_pid_get_slice;
+ sd_session_get_vt;
+ sd_session_is_remote;
+ sd_session_get_remote_user;
+ sd_session_get_remote_host;
+local:
+ *;
+};
+
+LIBSYSTEMD_211 {
+global:
+ sd_machine_get_class;
+ sd_peer_get_session;
+ sd_peer_get_owner_uid;
+ sd_peer_get_unit;
+ sd_peer_get_user_unit;
+ sd_peer_get_machine_name;
+ sd_peer_get_slice;
+} LIBSYSTEMD_209;
+
+LIBSYSTEMD_213 {
+global:
+ sd_uid_get_display;
+} LIBSYSTEMD_211;
+
+LIBSYSTEMD_214 {
+global:
+ sd_pid_notify;
+ sd_pid_notifyf;
+} LIBSYSTEMD_213;
+
+LIBSYSTEMD_216 {
+global:
+ sd_machine_get_ifindices;
+} LIBSYSTEMD_214;
+
+LIBSYSTEMD_217 {
+global:
+ sd_session_get_desktop;
+} LIBSYSTEMD_216;
+
+LIBSYSTEMD_219 {
+global:
+ sd_pid_notify_with_fds;
+} LIBSYSTEMD_217;
+
+LIBSYSTEMD_220 {
+global:
+ sd_pid_get_user_slice;
+ sd_peer_get_user_slice;
+} LIBSYSTEMD_219;
+
+LIBSYSTEMD_221 {
+global:
+ /* sd-bus */
+ sd_bus_default;
+ sd_bus_default_user;
+ sd_bus_default_system;
+ sd_bus_open;
+ sd_bus_open_user;
+ sd_bus_open_system;
+ sd_bus_open_system_remote;
+ sd_bus_open_system_machine;
+ sd_bus_new;
+ sd_bus_set_address;
+ sd_bus_set_fd;
+ sd_bus_set_exec;
+ sd_bus_get_address;
+ sd_bus_set_bus_client;
+ sd_bus_is_bus_client;
+ sd_bus_set_server;
+ sd_bus_is_server;
+ sd_bus_set_anonymous;
+ sd_bus_is_anonymous;
+ sd_bus_set_trusted;
+ sd_bus_is_trusted;
+ sd_bus_set_monitor;
+ sd_bus_is_monitor;
+ sd_bus_set_description;
+ sd_bus_get_description;
+ sd_bus_negotiate_creds;
+ sd_bus_negotiate_timestamp;
+ sd_bus_negotiate_fds;
+ sd_bus_can_send;
+ sd_bus_get_creds_mask;
+ sd_bus_set_allow_interactive_authorization;
+ sd_bus_get_allow_interactive_authorization;
+ sd_bus_start;
+ sd_bus_close;
+ sd_bus_try_close;
+ sd_bus_ref;
+ sd_bus_unref;
+ sd_bus_is_open;
+ sd_bus_get_bus_id;
+ sd_bus_get_scope;
+ sd_bus_get_tid;
+ sd_bus_get_owner_creds;
+ sd_bus_send;
+ sd_bus_send_to;
+ sd_bus_call;
+ sd_bus_call_async;
+ sd_bus_get_fd;
+ sd_bus_get_events;
+ sd_bus_get_timeout;
+ sd_bus_process;
+ sd_bus_process_priority;
+ sd_bus_wait;
+ sd_bus_flush;
+ sd_bus_get_current_slot;
+ sd_bus_get_current_message;
+ sd_bus_get_current_handler;
+ sd_bus_get_current_userdata;
+ sd_bus_attach_event;
+ sd_bus_detach_event;
+ sd_bus_get_event;
+ sd_bus_add_filter;
+ sd_bus_add_match;
+ sd_bus_add_object;
+ sd_bus_add_fallback;
+ sd_bus_add_object_vtable;
+ sd_bus_add_fallback_vtable;
+ sd_bus_add_node_enumerator;
+ sd_bus_add_object_manager;
+ sd_bus_slot_ref;
+ sd_bus_slot_unref;
+ sd_bus_slot_get_bus;
+ sd_bus_slot_get_userdata;
+ sd_bus_slot_set_userdata;
+ sd_bus_slot_get_description;
+ sd_bus_slot_set_description;
+ sd_bus_slot_get_current_message;
+ sd_bus_slot_get_current_handler;
+ sd_bus_slot_get_current_userdata;
+ sd_bus_message_new_signal;
+ sd_bus_message_new_method_call;
+ sd_bus_message_new_method_return;
+ sd_bus_message_new_method_error;
+ sd_bus_message_new_method_errorf;
+ sd_bus_message_new_method_errno;
+ sd_bus_message_new_method_errnof;
+ sd_bus_message_ref;
+ sd_bus_message_unref;
+ sd_bus_message_get_type;
+ sd_bus_message_get_cookie;
+ sd_bus_message_get_reply_cookie;
+ sd_bus_message_get_priority;
+ sd_bus_message_get_expect_reply;
+ sd_bus_message_get_auto_start;
+ sd_bus_message_get_allow_interactive_authorization;
+ sd_bus_message_get_signature;
+ sd_bus_message_get_path;
+ sd_bus_message_get_interface;
+ sd_bus_message_get_member;
+ sd_bus_message_get_destination;
+ sd_bus_message_get_sender;
+ sd_bus_message_get_error;
+ sd_bus_message_get_errno;
+ sd_bus_message_get_monotonic_usec;
+ sd_bus_message_get_realtime_usec;
+ sd_bus_message_get_seqnum;
+ sd_bus_message_get_bus;
+ sd_bus_message_get_creds;
+ sd_bus_message_is_signal;
+ sd_bus_message_is_method_call;
+ sd_bus_message_is_method_error;
+ sd_bus_message_is_empty;
+ sd_bus_message_has_signature;
+ sd_bus_message_set_expect_reply;
+ sd_bus_message_set_auto_start;
+ sd_bus_message_set_allow_interactive_authorization;
+ sd_bus_message_set_destination;
+ sd_bus_message_set_priority;
+ sd_bus_message_append;
+ sd_bus_message_append_basic;
+ sd_bus_message_append_array;
+ sd_bus_message_append_array_space;
+ sd_bus_message_append_array_iovec;
+ sd_bus_message_append_array_memfd;
+ sd_bus_message_append_string_space;
+ sd_bus_message_append_string_iovec;
+ sd_bus_message_append_string_memfd;
+ sd_bus_message_append_strv;
+ sd_bus_message_open_container;
+ sd_bus_message_close_container;
+ sd_bus_message_copy;
+ sd_bus_message_read;
+ sd_bus_message_read_basic;
+ sd_bus_message_read_array;
+ sd_bus_message_read_strv;
+ sd_bus_message_skip;
+ sd_bus_message_enter_container;
+ sd_bus_message_exit_container;
+ sd_bus_message_peek_type;
+ sd_bus_message_verify_type;
+ sd_bus_message_at_end;
+ sd_bus_message_rewind;
+ sd_bus_get_unique_name;
+ sd_bus_request_name;
+ sd_bus_release_name;
+ sd_bus_list_names;
+ sd_bus_get_name_creds;
+ sd_bus_get_name_machine_id;
+ sd_bus_call_method;
+ sd_bus_call_method_async;
+ sd_bus_get_property;
+ sd_bus_get_property_trivial;
+ sd_bus_get_property_string;
+ sd_bus_get_property_strv;
+ sd_bus_set_property;
+ sd_bus_reply_method_return;
+ sd_bus_reply_method_error;
+ sd_bus_reply_method_errorf;
+ sd_bus_reply_method_errno;
+ sd_bus_reply_method_errnof;
+ sd_bus_emit_signal;
+ sd_bus_emit_properties_changed_strv;
+ sd_bus_emit_properties_changed;
+ sd_bus_emit_interfaces_added_strv;
+ sd_bus_emit_interfaces_added;
+ sd_bus_emit_interfaces_removed_strv;
+ sd_bus_emit_interfaces_removed;
+ sd_bus_query_sender_creds;
+ sd_bus_query_sender_privilege;
+ sd_bus_creds_new_from_pid;
+ sd_bus_creds_ref;
+ sd_bus_creds_unref;
+ sd_bus_creds_get_mask;
+ sd_bus_creds_get_augmented_mask;
+ sd_bus_creds_get_pid;
+ sd_bus_creds_get_ppid;
+ sd_bus_creds_get_tid;
+ sd_bus_creds_get_uid;
+ sd_bus_creds_get_euid;
+ sd_bus_creds_get_suid;
+ sd_bus_creds_get_fsuid;
+ sd_bus_creds_get_gid;
+ sd_bus_creds_get_egid;
+ sd_bus_creds_get_sgid;
+ sd_bus_creds_get_fsgid;
+ sd_bus_creds_get_supplementary_gids;
+ sd_bus_creds_get_comm;
+ sd_bus_creds_get_tid_comm;
+ sd_bus_creds_get_exe;
+ sd_bus_creds_get_cmdline;
+ sd_bus_creds_get_cgroup;
+ sd_bus_creds_get_unit;
+ sd_bus_creds_get_slice;
+ sd_bus_creds_get_user_unit;
+ sd_bus_creds_get_user_slice;
+ sd_bus_creds_get_session;
+ sd_bus_creds_get_owner_uid;
+ sd_bus_creds_has_effective_cap;
+ sd_bus_creds_has_permitted_cap;
+ sd_bus_creds_has_inheritable_cap;
+ sd_bus_creds_has_bounding_cap;
+ sd_bus_creds_get_selinux_context;
+ sd_bus_creds_get_audit_session_id;
+ sd_bus_creds_get_audit_login_uid;
+ sd_bus_creds_get_tty;
+ sd_bus_creds_get_unique_name;
+ sd_bus_creds_get_well_known_names;
+ sd_bus_creds_get_description;
+ sd_bus_error_free;
+ sd_bus_error_set;
+ sd_bus_error_setf;
+ sd_bus_error_set_const;
+ sd_bus_error_set_errno;
+ sd_bus_error_set_errnof;
+ sd_bus_error_set_errnofv;
+ sd_bus_error_get_errno;
+ sd_bus_error_copy;
+ sd_bus_error_is_set;
+ sd_bus_error_has_name;
+ sd_bus_error_add_map;
+ sd_bus_path_encode;
+ sd_bus_path_decode;
+ sd_bus_track_new;
+ sd_bus_track_ref;
+ sd_bus_track_unref;
+ sd_bus_track_get_bus;
+ sd_bus_track_get_userdata;
+ sd_bus_track_set_userdata;
+ sd_bus_track_add_sender;
+ sd_bus_track_remove_sender;
+ sd_bus_track_add_name;
+ sd_bus_track_remove_name;
+ sd_bus_track_count;
+ sd_bus_track_contains;
+ sd_bus_track_first;
+ sd_bus_track_next;
+
+ /* sd-event */
+ sd_event_default;
+ sd_event_new;
+ sd_event_ref;
+ sd_event_unref;
+ sd_event_add_io;
+ sd_event_add_time;
+ sd_event_add_signal;
+ sd_event_add_child;
+ sd_event_add_defer;
+ sd_event_add_post;
+ sd_event_add_exit;
+ sd_event_prepare;
+ sd_event_wait;
+ sd_event_dispatch;
+ sd_event_run;
+ sd_event_loop;
+ sd_event_exit;
+ sd_event_now;
+ sd_event_get_fd;
+ sd_event_get_state;
+ sd_event_get_tid;
+ sd_event_get_exit_code;
+ sd_event_set_watchdog;
+ sd_event_get_watchdog;
+ sd_event_source_ref;
+ sd_event_source_unref;
+ sd_event_source_get_event;
+ sd_event_source_get_userdata;
+ sd_event_source_set_userdata;
+ sd_event_source_set_description;
+ sd_event_source_get_description;
+ sd_event_source_set_prepare;
+ sd_event_source_get_pending;
+ sd_event_source_get_priority;
+ sd_event_source_set_priority;
+ sd_event_source_get_enabled;
+ sd_event_source_set_enabled;
+ sd_event_source_get_io_fd;
+ sd_event_source_set_io_fd;
+ sd_event_source_get_io_events;
+ sd_event_source_set_io_events;
+ sd_event_source_get_io_revents;
+ sd_event_source_get_time;
+ sd_event_source_set_time;
+ sd_event_source_set_time_accuracy;
+ sd_event_source_get_time_accuracy;
+ sd_event_source_get_time_clock;
+ sd_event_source_get_signal;
+ sd_event_source_get_child_pid;
+} LIBSYSTEMD_220;
+
+LIBSYSTEMD_222 {
+global:
+ /* sd-bus */
+ sd_bus_emit_object_added;
+ sd_bus_emit_object_removed;
+ sd_bus_flush_close_unref;
+} LIBSYSTEMD_221;
+
+LIBSYSTEMD_226 {
+global:
+ sd_pid_get_cgroup;
+ sd_peer_get_cgroup;
+} LIBSYSTEMD_222;
+
+LIBSYSTEMD_227 {
+global:
+ sd_bus_default_flush_close;
+ sd_bus_path_decode_many;
+ sd_bus_path_encode_many;
+ sd_listen_fds_with_names;
+} LIBSYSTEMD_226;
+
+LIBSYSTEMD_229 {
+global:
+ sd_journal_has_runtime_files;
+ sd_journal_has_persistent_files;
+ sd_journal_enumerate_fields;
+ sd_journal_restart_fields;
+} LIBSYSTEMD_227;
+
+LIBSYSTEMD_230 {
+global:
+ sd_journal_open_directory_fd;
+ sd_journal_open_files_fd;
+} LIBSYSTEMD_229;
+
+LIBSYSTEMD_231 {
+global:
+ sd_event_get_iteration;
+} LIBSYSTEMD_230;
+
+LIBSYSTEMD_232 {
+global:
+ sd_bus_track_set_recursive;
+ sd_bus_track_get_recursive;
+ sd_bus_track_count_name;
+ sd_bus_track_count_sender;
+ sd_bus_set_exit_on_disconnect;
+ sd_bus_get_exit_on_disconnect;
+ sd_id128_get_invocation;
+} LIBSYSTEMD_231;
+
+LIBSYSTEMD_233 {
+global:
+ sd_id128_get_machine_app_specific;
+ sd_is_socket_sockaddr;
+} LIBSYSTEMD_232;
+
+LIBSYSTEMD_234 {
+global:
+ sd_bus_message_appendv;
+} LIBSYSTEMD_233;
+
+LIBSYSTEMD_236 {
+global:
+ sd_bus_message_new;
+ sd_bus_message_seal;
+} LIBSYSTEMD_234;
+
+LIBSYSTEMD_237 {
+global:
+ sd_bus_set_watch_bind;
+ sd_bus_get_watch_bind;
+ sd_bus_request_name_async;
+ sd_bus_release_name_async;
+ sd_bus_add_match_async;
+ sd_bus_match_signal;
+ sd_bus_match_signal_async;
+ sd_bus_is_ready;
+ sd_bus_set_connected_signal;
+ sd_bus_get_connected_signal;
+ sd_bus_set_sender;
+ sd_bus_get_sender;
+ sd_bus_message_set_sender;
+ sd_event_source_get_io_fd_own;
+ sd_event_source_set_io_fd_own;
+} LIBSYSTEMD_236;
+
+LIBSYSTEMD_238 {
+global:
+ sd_bus_get_n_queued_read;
+ sd_bus_get_n_queued_write;
+} LIBSYSTEMD_237;
+
+LIBSYSTEMD_239 {
+global:
+ sd_bus_open_with_description;
+ sd_bus_open_user_with_description;
+ sd_bus_open_system_with_description;
+ sd_bus_slot_get_floating;
+ sd_bus_slot_set_floating;
+ sd_bus_slot_get_destroy_callback;
+ sd_bus_slot_set_destroy_callback;
+ sd_bus_track_get_destroy_callback;
+ sd_bus_track_set_destroy_callback;
+ sd_event_add_inotify;
+ sd_event_source_get_inotify_mask;
+ sd_event_source_set_destroy_callback;
+ sd_event_source_get_destroy_callback;
+} LIBSYSTEMD_238;
+
+LIBSYSTEMD_240 {
+global:
+ sd_bus_message_readv;
+ sd_bus_set_method_call_timeout;
+ sd_bus_get_method_call_timeout;
+
+ sd_bus_error_move;
+
+ sd_bus_set_close_on_exit;
+ sd_bus_get_close_on_exit;
+
+ sd_device_ref;
+ sd_device_unref;
+
+ sd_device_new_from_syspath;
+ sd_device_new_from_devnum;
+ sd_device_new_from_subsystem_sysname;
+ sd_device_new_from_device_id;
+
+ sd_device_get_parent;
+ sd_device_get_parent_with_subsystem_devtype;
+
+ sd_device_get_syspath;
+ sd_device_get_subsystem;
+ sd_device_get_devtype;
+ sd_device_get_devnum;
+ sd_device_get_ifindex;
+ sd_device_get_driver;
+ sd_device_get_devpath;
+ sd_device_get_devname;
+ sd_device_get_sysname;
+ sd_device_get_sysnum;
+
+ sd_device_get_is_initialized;
+ sd_device_get_usec_since_initialized;
+
+ sd_device_get_tag_first;
+ sd_device_get_tag_next;
+ sd_device_get_devlink_first;
+ sd_device_get_devlink_next;
+ sd_device_get_property_first;
+ sd_device_get_property_next;
+ sd_device_get_sysattr_first;
+ sd_device_get_sysattr_next;
+
+ sd_device_has_tag;
+ sd_device_get_property_value;
+ sd_device_get_sysattr_value;
+
+ sd_device_set_sysattr_value;
+
+ sd_device_enumerator_new;
+ sd_device_enumerator_ref;
+ sd_device_enumerator_unref;
+
+ sd_device_enumerator_get_device_first;
+ sd_device_enumerator_get_device_next;
+ sd_device_enumerator_get_subsystem_first;
+ sd_device_enumerator_get_subsystem_next;
+
+ sd_device_enumerator_add_match_subsystem;
+ sd_device_enumerator_add_match_sysattr;
+ sd_device_enumerator_add_match_property;
+ sd_device_enumerator_add_match_sysname;
+ sd_device_enumerator_add_match_tag;
+ sd_device_enumerator_add_match_parent;
+ sd_device_enumerator_allow_uninitialized;
+
+ sd_hwdb_ref;
+ sd_hwdb_unref;
+
+ sd_hwdb_new;
+
+ sd_hwdb_get;
+
+ sd_hwdb_seek;
+ sd_hwdb_enumerate;
+
+ sd_id128_get_boot_app_specific;
+
+ sd_device_monitor_new;
+ sd_device_monitor_ref;
+ sd_device_monitor_unref;
+
+ sd_device_monitor_set_receive_buffer_size;
+ sd_device_monitor_attach_event;
+ sd_device_monitor_detach_event;
+ sd_device_monitor_get_event;
+ sd_device_monitor_get_event_source;
+ sd_device_monitor_start;
+ sd_device_monitor_stop;
+
+ sd_device_monitor_filter_add_match_subsystem_devtype;
+ sd_device_monitor_filter_add_match_tag;
+ sd_device_monitor_filter_update;
+ sd_device_monitor_filter_remove;
+
+ sd_event_source_get_floating;
+ sd_event_source_set_floating;
+} LIBSYSTEMD_239;
+
+LIBSYSTEMD_241 {
+global:
+ sd_bus_close_unref;
+} LIBSYSTEMD_240;
+
+LIBSYSTEMD_243 {
+global:
+ sd_bus_object_vtable_format;
+ sd_event_source_disable_unref;
+} LIBSYSTEMD_241;
+
+LIBSYSTEMD_245 {
+global:
+ sd_bus_enqueue_for_read;
+ sd_bus_message_dump;
+ sd_bus_message_sensitive;
+ sd_event_add_child_pidfd;
+ sd_event_source_get_child_pidfd;
+ sd_event_source_get_child_pidfd_own;
+ sd_event_source_set_child_pidfd_own;
+ sd_event_source_get_child_process_own;
+ sd_event_source_set_child_process_own;
+ sd_event_source_send_child_signal;
+ sd_journal_open_namespace;
+} LIBSYSTEMD_243;
+
+LIBSYSTEMD_246 {
+global:
+ sd_bus_interface_name_is_valid;
+ sd_bus_service_name_is_valid;
+ sd_bus_member_name_is_valid;
+ sd_bus_object_path_is_valid;
+
+ sd_bus_call_methodv;
+ sd_bus_call_method_asyncv;
+ sd_bus_emit_signalv;
+ sd_bus_reply_method_errnofv;
+ sd_bus_reply_method_errorfv;
+ sd_bus_reply_method_returnv;
+ sd_bus_set_propertyv;
+
+ sd_path_lookup;
+ sd_path_lookup_strv;
+
+ sd_notify_barrier;
+
+ sd_journal_enumerate_available_data;
+ sd_journal_enumerate_available_unique;
+} LIBSYSTEMD_245;
+
+LIBSYSTEMD_247 {
+global:
+ sd_event_add_time_relative;
+ sd_event_source_set_time_relative;
+ sd_event_source_get_exit_on_failure;
+ sd_event_source_set_exit_on_failure;
+
+ sd_bus_error_has_names_sentinel;
+
+ sd_device_get_current_tag_first;
+ sd_device_get_current_tag_next;
+ sd_device_has_current_tag;
+ sd_device_set_sysattr_valuef;
+} LIBSYSTEMD_246;
diff --git a/src/libsystemd/meson.build b/src/libsystemd/meson.build
new file mode 100644
index 0000000..50716f7
--- /dev/null
+++ b/src/libsystemd/meson.build
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+id128_sources = files('''
+ sd-id128/id128-util.c
+ sd-id128/id128-util.h
+ sd-id128/sd-id128.c
+'''.split())
+
+sd_daemon_sources = files('sd-daemon/sd-daemon.c')
+
+sd_event_sources = files('''
+ sd-event/event-source.h
+ sd-event/event-util.c
+ sd-event/event-util.h
+ sd-event/sd-event.c
+'''.split())
+
+sd_login_sources = files('sd-login/sd-login.c')
+
+libsystemd_sources = files('''
+ sd-bus/bus-common-errors.c
+ sd-bus/bus-common-errors.h
+ sd-bus/bus-container.c
+ sd-bus/bus-container.h
+ sd-bus/bus-control.c
+ sd-bus/bus-control.h
+ sd-bus/bus-convenience.c
+ sd-bus/bus-creds.c
+ sd-bus/bus-creds.h
+ sd-bus/bus-dump.c
+ sd-bus/bus-dump.h
+ sd-bus/bus-error.c
+ sd-bus/bus-error.h
+ sd-bus/bus-gvariant.c
+ sd-bus/bus-gvariant.h
+ sd-bus/bus-internal.c
+ sd-bus/bus-internal.h
+ sd-bus/bus-introspect.c
+ sd-bus/bus-introspect.h
+ sd-bus/bus-kernel.c
+ sd-bus/bus-kernel.h
+ sd-bus/bus-match.c
+ sd-bus/bus-match.h
+ sd-bus/bus-message.c
+ sd-bus/bus-message.h
+ sd-bus/bus-objects.c
+ sd-bus/bus-objects.h
+ sd-bus/bus-protocol.h
+ sd-bus/bus-signature.c
+ sd-bus/bus-signature.h
+ sd-bus/bus-slot.c
+ sd-bus/bus-slot.h
+ sd-bus/bus-socket.c
+ sd-bus/bus-socket.h
+ sd-bus/bus-track.c
+ sd-bus/bus-track.h
+ sd-bus/bus-type.c
+ sd-bus/bus-type.h
+ sd-bus/sd-bus.c
+ sd-device/device-enumerator-private.h
+ sd-device/device-enumerator.c
+ sd-device/device-internal.h
+ sd-device/device-monitor-private.h
+ sd-device/device-monitor.c
+ sd-device/device-private.c
+ sd-device/device-private.h
+ sd-device/device-util.h
+ sd-device/sd-device.c
+ sd-hwdb/hwdb-internal.h
+ sd-hwdb/hwdb-util.c
+ sd-hwdb/hwdb-util.h
+ sd-hwdb/sd-hwdb.c
+ sd-netlink/generic-netlink.c
+ sd-netlink/generic-netlink.h
+ sd-netlink/netlink-internal.h
+ sd-netlink/netlink-message.c
+ sd-netlink/netlink-slot.c
+ sd-netlink/netlink-slot.h
+ sd-netlink/netlink-socket.c
+ sd-netlink/netlink-types.c
+ sd-netlink/netlink-types.h
+ sd-netlink/netlink-util.c
+ sd-netlink/netlink-util.h
+ sd-netlink/rtnl-message.c
+ sd-netlink/sd-netlink.c
+ sd-network/network-util.c
+ sd-network/network-util.h
+ sd-network/sd-network.c
+ sd-path/sd-path.c
+ sd-resolve/resolve-private.h
+ sd-resolve/sd-resolve.c
+ sd-utf8/sd-utf8.c
+'''.split()) + id128_sources + sd_daemon_sources + sd_event_sources + sd_login_sources
+
+disable_mempool_c = files('disable-mempool.c')
+
+libsystemd_c_args = ['-fvisibility=default']
+
+libsystemd_static = static_library(
+ 'systemd_static',
+ libsystemd_sources,
+ install : false,
+ include_directories : includes,
+ link_with : libbasic,
+ dependencies : [threads,
+ librt],
+ c_args : libsystemd_c_args)
+
+libsystemd_sym = 'src/libsystemd/libsystemd.sym'
+
+configure_file(
+ input : 'libsystemd.pc.in',
+ output : 'libsystemd.pc',
+ configuration : substs,
+ install_dir : pkgconfiglibdir == 'no' ? '' : pkgconfiglibdir)
diff --git a/src/libsystemd/sd-bus/GVARIANT-SERIALIZATION b/src/libsystemd/sd-bus/GVARIANT-SERIALIZATION
new file mode 100644
index 0000000..973a063
--- /dev/null
+++ b/src/libsystemd/sd-bus/GVARIANT-SERIALIZATION
@@ -0,0 +1,105 @@
+How we use GVariant for serializing D-Bus messages
+--------------------------------------------------
+
+We stay close to the original dbus1 framing as possible, but make
+certain changes to adapt for GVariant. dbus1 has the following
+framing:
+
+ 1. A fixed header of "yyyyuu"
+ 2. Additional header fields of "a(yv)"
+ 3. Padding with NUL bytes to pad up to next 8byte boundary
+ 4. The body
+
+Note that the body is not padded at the end, the complete message
+hence might have a non-aligned size. Reading multiple messages at once
+will hence result in possibly unaligned messages in memory.
+
+The header consists of the following:
+
+ y Endianness, 'l' or 'B'
+ y Message Type
+ y Flags
+ y Protocol version, '1'
+ u Length of the body, i.e. the length of part 4 above
+ u 32bit Serial number
+
+ = 12 bytes
+
+This header is then followed by the fields array, whose first value is
+a 32bit array size.
+
+When using GVariant we keep the basic structure in place, only
+slightly alter the header, and define protocol version '2'. The new
+header:
+
+ y Endianness, 'l' or 'B'
+ y Message Type
+ y Flags
+ y Protocol version, '2'
+ u Reserved, must be 0
+ t 64bit Cookie
+
+ = 16 bytes
+
+This is then followed by the GVariant fields array ("a{tv}"), and
+finally the actual body as variant (v). Putting this altogether a
+packet on dbus2 hence qualifies as a fully compliant GVariant
+structure of (yyyyuta{tv}v).
+
+For details on gvariant, see:
+
+https://people.gnome.org/~desrt/gvariant-serialisation.pdf
+
+Regarding the framing of dbus2, also see:
+
+https://wiki.gnome.org/Projects/GLib/GDBus/Version2
+
+The first four bytes of the header are defined the same way for dbus1
+and dbus2. The first bytes contain the endianness field and the
+protocol version, so that the remainder of the message can be safely
+made sense of just by looking at the first 32bit.
+
+Note that the length of the body is no longer included in the header
+on dbus2! In fact, the message size must be known in advance, from the
+underlying transport in order to parse dbus2 messages, while it is
+directly included in dbus1 message headers. This change of semantics
+is an effect of GVariant's basic design.
+
+The serial number has been renamed cookie and has been extended from
+32bit to 64bit. It is recommended to avoid the higher 32bit of the
+cookie field though, to simplify compatibility with dbus1 peers. Note
+that not only the cookie/serial field in the fixed header, but also
+the reply_cookie/reply_serial additional header field has been
+increased from 32bit to 64bit, too!
+
+The header field identifiers have been extended from 8bit to
+64bit. This has been done to simplify things, and has no effect
+on the serialization size, as due to alignment for each 8bit
+header field identifier 56 bits of padding had to be added.
+
+Note that the header size changed, due to these changes. However,
+consider that on dbus1 the beginning of the fields array contains the
+32bit array size (since that is how arrays are encoded on dbus1),
+thus, if one considers that size part of the header, instead of the
+array, the size of the header on dbus1 and dbus2 stays identical, at
+16 bytes.
+
+ 0 4 8 12 16
+ Common: | E | T | F | V | ...
+
+ dbus1: | (as above) | Body Length | Serial | Fields Length | Fields array ...
+
+ gvariant: | (as above) | Reserved | Cookie | Fields array ...
+
+And that's already it.
+
+Note: To simplify parsing, valid dbus2 messages must include the entire
+ fixed header and additional header fields in a single non-memfd
+ message part. Also, the signature string of the body variant all the
+ way to the end of the message must be in a single non-memfd part
+ too. The parts for this extended header and footer can be the same
+ one, and can also continue any amount of additional body bytes.
+
+Note: The GVariant "MAYBE" type is not supported, so that messages can
+ be fully converted forth and back between dbus1 and gvariant
+ representations.
diff --git a/src/libsystemd/sd-bus/bus-common-errors.c b/src/libsystemd/sd-bus/bus-common-errors.c
new file mode 100644
index 0000000..ef1fa37
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-common-errors.c
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "sd-bus.h"
+
+#include "bus-common-errors.h"
+#include "bus-error.h"
+
+BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map bus_common_errors[] = {
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_UNIT, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_UNIT_FOR_PID, ESRCH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_UNIT_EXISTS, EEXIST),
+ SD_BUS_ERROR_MAP(BUS_ERROR_LOAD_FAILED, EIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_BAD_UNIT_SETTING, ENOEXEC),
+ SD_BUS_ERROR_MAP(BUS_ERROR_JOB_FAILED, EREMOTEIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_JOB, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NOT_SUBSCRIBED, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_ALREADY_SUBSCRIBED, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_ONLY_BY_DEPENDENCY, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TRANSACTION_JOBS_CONFLICTING, EDEADLK),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TRANSACTION_ORDER_IS_CYCLIC, EDEADLK),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TRANSACTION_IS_DESTRUCTIVE, EDEADLK),
+ SD_BUS_ERROR_MAP(BUS_ERROR_UNIT_MASKED, ERFKILL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_UNIT_GENERATED, EADDRNOTAVAIL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_UNIT_LINKED, ELOOP),
+ SD_BUS_ERROR_MAP(BUS_ERROR_JOB_TYPE_NOT_APPLICABLE, EBADR),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_ISOLATION, EPERM),
+ SD_BUS_ERROR_MAP(BUS_ERROR_SHUTTING_DOWN, ECANCELED),
+ SD_BUS_ERROR_MAP(BUS_ERROR_SCOPE_NOT_RUNNING, EHOSTDOWN),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_DYNAMIC_USER, ESRCH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NOT_REFERENCED, EUNATCH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DISK_FULL, ENOSPC),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_MACHINE, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_IMAGE, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_MACHINE_FOR_PID, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_MACHINE_EXISTS, EEXIST),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_PRIVATE_NETWORKING, ENOSYS),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_USER_MAPPING, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_GROUP_MAPPING, ENXIO),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_PORTABLE_IMAGE, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_BAD_PORTABLE_IMAGE_TYPE, EMEDIUMTYPE),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_SESSION, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SESSION_FOR_PID, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_USER, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_USER_FOR_PID, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_SEAT, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_SESSION_NOT_ON_SEAT, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NOT_IN_CONTROL, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DEVICE_IS_TAKEN, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DEVICE_NOT_TAKEN, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_OPERATION_IN_PROGRESS, EINPROGRESS),
+ SD_BUS_ERROR_MAP(BUS_ERROR_SLEEP_VERB_NOT_SUPPORTED, EOPNOTSUPP),
+ SD_BUS_ERROR_MAP(BUS_ERROR_SESSION_BUSY, EBUSY),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NOT_YOUR_DEVICE, EPERM),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_AUTOMATIC_TIME_SYNC_ENABLED, EALREADY),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_NTP_SUPPORT, EOPNOTSUPP),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_PROCESS, ESRCH),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_NAME_SERVERS, ESRCH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_INVALID_REPLY, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_RR, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_CNAME_LOOP, EDEADLK),
+ SD_BUS_ERROR_MAP(BUS_ERROR_ABORTED, ECANCELED),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_SERVICE, EUNATCH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNSSEC_FAILED, EHOSTUNREACH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_TRUST_ANCHOR, EHOSTUNREACH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_RR_TYPE_UNSUPPORTED, EOPNOTSUPP),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_LINK, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_LINK_BUSY, EBUSY),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NETWORK_DOWN, ENETDOWN),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_DNSSD_SERVICE, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNSSD_SERVICE_EXISTS, EEXIST),
+
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "FORMERR", EBADMSG),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "SERVFAIL", EHOSTDOWN),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "NXDOMAIN", ENXIO),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "NOTIMP", ENOSYS),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "REFUSED", EACCES),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "YXDOMAIN", EEXIST),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "YRRSET", EEXIST),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "NXRRSET", ENOENT),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "NOTAUTH", EACCES),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "NOTZONE", EREMOTE),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "BADVERS", EBADMSG),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "BADKEY", EKEYREJECTED),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "BADTIME", EBADMSG),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "BADMODE", EBADMSG),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "BADNAME", EBADMSG),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "BADALG", EBADMSG),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "BADTRUNC", EBADMSG),
+ SD_BUS_ERROR_MAP(_BUS_ERROR_DNS "BADCOOKIE", EBADR),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_TRANSFER, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TRANSFER_IN_PROGRESS, EBUSY),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_PRODUCT_UUID, EOPNOTSUPP),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_SPEED_METER_INACTIVE, EOPNOTSUPP),
+ SD_BUS_ERROR_MAP(BUS_ERROR_UNMANAGED_INTERFACE, EOPNOTSUPP),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_HOME, EEXIST),
+ SD_BUS_ERROR_MAP(BUS_ERROR_UID_IN_USE, EEXIST),
+ SD_BUS_ERROR_MAP(BUS_ERROR_USER_NAME_EXISTS, EEXIST),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_EXISTS, EEXIST),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_ALREADY_ACTIVE, EALREADY),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_ALREADY_FIXATED, EALREADY),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_UNFIXATED, EADDRNOTAVAIL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_NOT_ACTIVE, EALREADY),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_ABSENT, EREMOTE),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_BUSY, EBUSY),
+ SD_BUS_ERROR_MAP(BUS_ERROR_BAD_PASSWORD, ENOKEY),
+ SD_BUS_ERROR_MAP(BUS_ERROR_LOW_PASSWORD_QUALITY, EUCLEAN),
+ SD_BUS_ERROR_MAP(BUS_ERROR_BAD_PASSWORD_AND_NO_TOKEN, EBADSLT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_PIN_NEEDED, ENOANO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_PROTECTED_AUTHENTICATION_PATH_NEEDED, ERFKILL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_USER_PRESENCE_NEEDED, EMEDIUMTYPE),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_ACTION_TIMEOUT, ENOSTR),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_PIN_LOCKED, EOWNERDEAD),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_BAD_PIN, ENOLCK),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_BAD_PIN_FEW_TRIES_LEFT, ETOOMANYREFS),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_BAD_PIN_ONE_TRY_LEFT, EUCLEAN),
+ SD_BUS_ERROR_MAP(BUS_ERROR_BAD_SIGNATURE, EKEYREJECTED),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_RECORD_MISMATCH, EUCLEAN),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_RECORD_DOWNGRADE, ESTALE),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_RECORD_SIGNED, EROFS),
+ SD_BUS_ERROR_MAP(BUS_ERROR_BAD_HOME_SIZE, ERANGE),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_PRIVATE_KEY, ENOPKG),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_LOCKED, ENOEXEC),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_NOT_LOCKED, ENOEXEC),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TOO_MANY_OPERATIONS, ENOBUFS),
+ SD_BUS_ERROR_MAP(BUS_ERROR_AUTHENTICATION_LIMIT_HIT, ETOOMANYREFS),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_CANT_AUTHENTICATE, EKEYREVOKED),
+
+ SD_BUS_ERROR_MAP_END
+};
diff --git a/src/libsystemd/sd-bus/bus-common-errors.h b/src/libsystemd/sd-bus/bus-common-errors.h
new file mode 100644
index 0000000..7e5be17
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-common-errors.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "bus-error.h"
+
+#define BUS_ERROR_NO_SUCH_UNIT "org.freedesktop.systemd1.NoSuchUnit"
+#define BUS_ERROR_NO_UNIT_FOR_PID "org.freedesktop.systemd1.NoUnitForPID"
+#define BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID "org.freedesktop.systemd1.NoUnitForInvocationID"
+#define BUS_ERROR_UNIT_EXISTS "org.freedesktop.systemd1.UnitExists"
+#define BUS_ERROR_LOAD_FAILED "org.freedesktop.systemd1.LoadFailed"
+#define BUS_ERROR_BAD_UNIT_SETTING "org.freedesktop.systemd1.BadUnitSetting"
+#define BUS_ERROR_JOB_FAILED "org.freedesktop.systemd1.JobFailed"
+#define BUS_ERROR_NO_SUCH_JOB "org.freedesktop.systemd1.NoSuchJob"
+#define BUS_ERROR_NOT_SUBSCRIBED "org.freedesktop.systemd1.NotSubscribed"
+#define BUS_ERROR_ALREADY_SUBSCRIBED "org.freedesktop.systemd1.AlreadySubscribed"
+#define BUS_ERROR_ONLY_BY_DEPENDENCY "org.freedesktop.systemd1.OnlyByDependency"
+#define BUS_ERROR_TRANSACTION_JOBS_CONFLICTING "org.freedesktop.systemd1.TransactionJobsConflicting"
+#define BUS_ERROR_TRANSACTION_ORDER_IS_CYCLIC "org.freedesktop.systemd1.TransactionOrderIsCyclic"
+#define BUS_ERROR_TRANSACTION_IS_DESTRUCTIVE "org.freedesktop.systemd1.TransactionIsDestructive"
+#define BUS_ERROR_UNIT_MASKED "org.freedesktop.systemd1.UnitMasked"
+#define BUS_ERROR_UNIT_GENERATED "org.freedesktop.systemd1.UnitGenerated"
+#define BUS_ERROR_UNIT_LINKED "org.freedesktop.systemd1.UnitLinked"
+#define BUS_ERROR_JOB_TYPE_NOT_APPLICABLE "org.freedesktop.systemd1.JobTypeNotApplicable"
+#define BUS_ERROR_NO_ISOLATION "org.freedesktop.systemd1.NoIsolation"
+#define BUS_ERROR_SHUTTING_DOWN "org.freedesktop.systemd1.ShuttingDown"
+#define BUS_ERROR_SCOPE_NOT_RUNNING "org.freedesktop.systemd1.ScopeNotRunning"
+#define BUS_ERROR_NO_SUCH_DYNAMIC_USER "org.freedesktop.systemd1.NoSuchDynamicUser"
+#define BUS_ERROR_NOT_REFERENCED "org.freedesktop.systemd1.NotReferenced"
+#define BUS_ERROR_DISK_FULL "org.freedesktop.systemd1.DiskFull"
+#define BUS_ERROR_NOTHING_TO_CLEAN "org.freedesktop.systemd1.NothingToClean"
+#define BUS_ERROR_UNIT_BUSY "org.freedesktop.systemd1.UnitBusy"
+#define BUS_ERROR_UNIT_INACTIVE "org.freedesktop.systemd1.UnitInactive"
+
+#define BUS_ERROR_NO_SUCH_MACHINE "org.freedesktop.machine1.NoSuchMachine"
+#define BUS_ERROR_NO_SUCH_IMAGE "org.freedesktop.machine1.NoSuchImage"
+#define BUS_ERROR_NO_MACHINE_FOR_PID "org.freedesktop.machine1.NoMachineForPID"
+#define BUS_ERROR_MACHINE_EXISTS "org.freedesktop.machine1.MachineExists"
+#define BUS_ERROR_NO_PRIVATE_NETWORKING "org.freedesktop.machine1.NoPrivateNetworking"
+#define BUS_ERROR_NO_SUCH_USER_MAPPING "org.freedesktop.machine1.NoSuchUserMapping"
+#define BUS_ERROR_NO_SUCH_GROUP_MAPPING "org.freedesktop.machine1.NoSuchGroupMapping"
+
+#define BUS_ERROR_NO_SUCH_PORTABLE_IMAGE "org.freedesktop.portable1.NoSuchImage"
+#define BUS_ERROR_BAD_PORTABLE_IMAGE_TYPE "org.freedesktop.portable1.BadImageType"
+
+#define BUS_ERROR_NO_SUCH_SESSION "org.freedesktop.login1.NoSuchSession"
+#define BUS_ERROR_NO_SESSION_FOR_PID "org.freedesktop.login1.NoSessionForPID"
+#define BUS_ERROR_NO_SUCH_USER "org.freedesktop.login1.NoSuchUser"
+#define BUS_ERROR_NO_USER_FOR_PID "org.freedesktop.login1.NoUserForPID"
+#define BUS_ERROR_NO_SUCH_SEAT "org.freedesktop.login1.NoSuchSeat"
+#define BUS_ERROR_SESSION_NOT_ON_SEAT "org.freedesktop.login1.SessionNotOnSeat"
+#define BUS_ERROR_NOT_IN_CONTROL "org.freedesktop.login1.NotInControl"
+#define BUS_ERROR_DEVICE_IS_TAKEN "org.freedesktop.login1.DeviceIsTaken"
+#define BUS_ERROR_DEVICE_NOT_TAKEN "org.freedesktop.login1.DeviceNotTaken"
+#define BUS_ERROR_OPERATION_IN_PROGRESS "org.freedesktop.login1.OperationInProgress"
+#define BUS_ERROR_SLEEP_VERB_NOT_SUPPORTED "org.freedesktop.login1.SleepVerbNotSupported"
+#define BUS_ERROR_SESSION_BUSY "org.freedesktop.login1.SessionBusy"
+#define BUS_ERROR_NOT_YOUR_DEVICE "org.freedesktop.login1.NotYourDevice"
+
+#define BUS_ERROR_AUTOMATIC_TIME_SYNC_ENABLED "org.freedesktop.timedate1.AutomaticTimeSyncEnabled"
+#define BUS_ERROR_NO_NTP_SUPPORT "org.freedesktop.timedate1.NoNTPSupport"
+
+#define BUS_ERROR_NO_SUCH_PROCESS "org.freedesktop.systemd1.NoSuchProcess"
+
+#define BUS_ERROR_NO_NAME_SERVERS "org.freedesktop.resolve1.NoNameServers"
+#define BUS_ERROR_INVALID_REPLY "org.freedesktop.resolve1.InvalidReply"
+#define BUS_ERROR_NO_SUCH_RR "org.freedesktop.resolve1.NoSuchRR"
+#define BUS_ERROR_CNAME_LOOP "org.freedesktop.resolve1.CNameLoop"
+#define BUS_ERROR_ABORTED "org.freedesktop.resolve1.Aborted"
+#define BUS_ERROR_NO_SUCH_SERVICE "org.freedesktop.resolve1.NoSuchService"
+#define BUS_ERROR_DNSSEC_FAILED "org.freedesktop.resolve1.DnssecFailed"
+#define BUS_ERROR_NO_TRUST_ANCHOR "org.freedesktop.resolve1.NoTrustAnchor"
+#define BUS_ERROR_RR_TYPE_UNSUPPORTED "org.freedesktop.resolve1.ResourceRecordTypeUnsupported"
+#define BUS_ERROR_NO_SUCH_LINK "org.freedesktop.resolve1.NoSuchLink"
+#define BUS_ERROR_LINK_BUSY "org.freedesktop.resolve1.LinkBusy"
+#define BUS_ERROR_NETWORK_DOWN "org.freedesktop.resolve1.NetworkDown"
+#define BUS_ERROR_NO_SUCH_DNSSD_SERVICE "org.freedesktop.resolve1.NoSuchDnssdService"
+#define BUS_ERROR_DNSSD_SERVICE_EXISTS "org.freedesktop.resolve1.DnssdServiceExists"
+#define _BUS_ERROR_DNS "org.freedesktop.resolve1.DnsError."
+
+#define BUS_ERROR_NO_SUCH_TRANSFER "org.freedesktop.import1.NoSuchTransfer"
+#define BUS_ERROR_TRANSFER_IN_PROGRESS "org.freedesktop.import1.TransferInProgress"
+
+#define BUS_ERROR_NO_PRODUCT_UUID "org.freedesktop.hostname1.NoProductUUID"
+
+#define BUS_ERROR_SPEED_METER_INACTIVE "org.freedesktop.network1.SpeedMeterInactive"
+#define BUS_ERROR_UNMANAGED_INTERFACE "org.freedesktop.network1.UnmanagedInterface"
+
+#define BUS_ERROR_NO_SUCH_HOME "org.freedesktop.home1.NoSuchHome"
+#define BUS_ERROR_UID_IN_USE "org.freedesktop.home1.UIDInUse"
+#define BUS_ERROR_USER_NAME_EXISTS "org.freedesktop.home1.UserNameExists"
+#define BUS_ERROR_HOME_EXISTS "org.freedesktop.home1.HomeExists"
+#define BUS_ERROR_HOME_ALREADY_ACTIVE "org.freedesktop.home1.HomeAlreadyActive"
+#define BUS_ERROR_HOME_ALREADY_FIXATED "org.freedesktop.home1.HomeAlreadyFixated"
+#define BUS_ERROR_HOME_UNFIXATED "org.freedesktop.home1.HomeUnfixated"
+#define BUS_ERROR_HOME_NOT_ACTIVE "org.freedesktop.home1.HomeNotActive"
+#define BUS_ERROR_HOME_ABSENT "org.freedesktop.home1.HomeAbsent"
+#define BUS_ERROR_HOME_BUSY "org.freedesktop.home1.HomeBusy"
+#define BUS_ERROR_BAD_PASSWORD "org.freedesktop.home1.BadPassword"
+#define BUS_ERROR_BAD_RECOVERY_KEY "org.freedesktop.home1.BadRecoveryKey"
+#define BUS_ERROR_LOW_PASSWORD_QUALITY "org.freedesktop.home1.LowPasswordQuality"
+#define BUS_ERROR_BAD_PASSWORD_AND_NO_TOKEN "org.freedesktop.home1.BadPasswordAndNoToken"
+#define BUS_ERROR_TOKEN_PIN_NEEDED "org.freedesktop.home1.TokenPinNeeded"
+#define BUS_ERROR_TOKEN_PROTECTED_AUTHENTICATION_PATH_NEEDED "org.freedesktop.home1.TokenProtectedAuthenticationPathNeeded"
+#define BUS_ERROR_TOKEN_USER_PRESENCE_NEEDED "org.freedesktop.home1.TokenUserPresenceNeeded"
+#define BUS_ERROR_TOKEN_ACTION_TIMEOUT "org.freedesktop.home1.TokenActionTimeout"
+#define BUS_ERROR_TOKEN_PIN_LOCKED "org.freedesktop.home1.TokenPinLocked"
+#define BUS_ERROR_TOKEN_BAD_PIN "org.freedesktop.home1.BadPin"
+#define BUS_ERROR_TOKEN_BAD_PIN_FEW_TRIES_LEFT "org.freedesktop.home1.BadPinFewTriesLeft"
+#define BUS_ERROR_TOKEN_BAD_PIN_ONE_TRY_LEFT "org.freedesktop.home1.BadPinOneTryLeft"
+#define BUS_ERROR_BAD_SIGNATURE "org.freedesktop.home1.BadSignature"
+#define BUS_ERROR_HOME_RECORD_MISMATCH "org.freedesktop.home1.RecordMismatch"
+#define BUS_ERROR_HOME_RECORD_DOWNGRADE "org.freedesktop.home1.RecordDowngrade"
+#define BUS_ERROR_HOME_RECORD_SIGNED "org.freedesktop.home1.RecordSigned"
+#define BUS_ERROR_BAD_HOME_SIZE "org.freedesktop.home1.BadHomeSize"
+#define BUS_ERROR_NO_PRIVATE_KEY "org.freedesktop.home1.NoPrivateKey"
+#define BUS_ERROR_HOME_LOCKED "org.freedesktop.home1.HomeLocked"
+#define BUS_ERROR_HOME_NOT_LOCKED "org.freedesktop.home1.HomeNotLocked"
+#define BUS_ERROR_NO_DISK_SPACE "org.freedesktop.home1.NoDiskSpace"
+#define BUS_ERROR_TOO_MANY_OPERATIONS "org.freedesktop.home1.TooManyOperations"
+#define BUS_ERROR_AUTHENTICATION_LIMIT_HIT "org.freedesktop.home1.AuthenticationLimitHit"
+#define BUS_ERROR_HOME_CANT_AUTHENTICATE "org.freedesktop.home1.HomeCantAuthenticate"
+
+BUS_ERROR_MAP_ELF_USE(bus_common_errors);
diff --git a/src/libsystemd/sd-bus/bus-container.c b/src/libsystemd/sd-bus/bus-container.c
new file mode 100644
index 0000000..b11ebb3
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-container.c
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "bus-container.h"
+#include "bus-internal.h"
+#include "bus-socket.h"
+#include "fd-util.h"
+#include "namespace-util.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "util.h"
+
+int bus_container_connect_socket(sd_bus *b) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, usernsfd = -1, rootfd = -1;
+ int r, error_buf = 0;
+ pid_t child;
+ ssize_t n;
+
+ assert(b);
+ assert(b->input_fd < 0);
+ assert(b->output_fd < 0);
+ assert(b->nspid > 0 || b->machine);
+
+ if (b->nspid <= 0) {
+ log_debug("sd-bus: connecting bus%s%s to machine %s...",
+ b->description ? " " : "", strempty(b->description), b->machine);
+
+ r = container_get_leader(b->machine, &b->nspid);
+ if (r < 0)
+ return r;
+ } else
+ log_debug("sd-bus: connecting bus%s%s to namespace of PID "PID_FMT"...",
+ b->description ? " " : "", strempty(b->description), b->nspid);
+
+ r = namespace_open(b->nspid, &pidnsfd, &mntnsfd, NULL, &usernsfd, &rootfd);
+ if (r < 0)
+ return r;
+
+ b->input_fd = socket(b->sockaddr.sa.sa_family, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (b->input_fd < 0)
+ return -errno;
+
+ b->input_fd = fd_move_above_stdio(b->input_fd);
+
+ b->output_fd = b->input_fd;
+
+ bus_socket_setup(b);
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
+ return -errno;
+
+ r = namespace_fork("(sd-buscntrns)", "(sd-buscntr)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ pidnsfd, mntnsfd, -1, usernsfd, rootfd, &child);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ pair[0] = safe_close(pair[0]);
+
+ r = connect(b->input_fd, &b->sockaddr.sa, b->sockaddr_size);
+ if (r < 0) {
+ /* Try to send error up */
+ error_buf = errno;
+ (void) write(pair[1], &error_buf, sizeof(error_buf));
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ r = wait_for_terminate_and_check("(sd-buscntrns)", child, 0);
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS)
+ return -EPROTO;
+
+ n = read(pair[0], &error_buf, sizeof(error_buf));
+ if (n < 0)
+ return -errno;
+
+ if (n > 0) {
+ if (n != sizeof(error_buf))
+ return -EIO;
+
+ if (error_buf < 0)
+ return -EIO;
+
+ if (error_buf == EINPROGRESS)
+ return 1;
+
+ if (error_buf > 0)
+ return -error_buf;
+ }
+
+ return bus_socket_start_auth(b);
+}
diff --git a/src/libsystemd/sd-bus/bus-container.h b/src/libsystemd/sd-bus/bus-container.h
new file mode 100644
index 0000000..cb503a5
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-container.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+int bus_container_connect_socket(sd_bus *b);
diff --git a/src/libsystemd/sd-bus/bus-control.c b/src/libsystemd/sd-bus/bus-control.c
new file mode 100644
index 0000000..3ee22c9
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-control.c
@@ -0,0 +1,942 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_VALGRIND_MEMCHECK_H
+#include <valgrind/memcheck.h>
+#endif
+
+#include <errno.h>
+#include <stddef.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-control.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "capability-util.h"
+#include "process-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+_public_ int sd_bus_get_unique_name(sd_bus *bus, const char **unique) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(unique, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ r = bus_ensure_running(bus);
+ if (r < 0)
+ return r;
+
+ *unique = bus->unique_name;
+ return 0;
+}
+
+static int validate_request_name_parameters(
+ sd_bus *bus,
+ const char *name,
+ uint64_t flags,
+ uint32_t *ret_param) {
+
+ uint32_t param = 0;
+
+ assert(bus);
+ assert(name);
+ assert(ret_param);
+
+ assert_return(!(flags & ~(SD_BUS_NAME_ALLOW_REPLACEMENT|SD_BUS_NAME_REPLACE_EXISTING|SD_BUS_NAME_QUEUE)), -EINVAL);
+ assert_return(service_name_is_valid(name), -EINVAL);
+ assert_return(name[0] != ':', -EINVAL);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ /* Don't allow requesting the special driver and local names */
+ if (STR_IN_SET(name, "org.freedesktop.DBus", "org.freedesktop.DBus.Local"))
+ return -EINVAL;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (flags & SD_BUS_NAME_ALLOW_REPLACEMENT)
+ param |= BUS_NAME_ALLOW_REPLACEMENT;
+ if (flags & SD_BUS_NAME_REPLACE_EXISTING)
+ param |= BUS_NAME_REPLACE_EXISTING;
+ if (!(flags & SD_BUS_NAME_QUEUE))
+ param |= BUS_NAME_DO_NOT_QUEUE;
+
+ *ret_param = param;
+
+ return 0;
+}
+
+_public_ int sd_bus_request_name(
+ sd_bus *bus,
+ const char *name,
+ uint64_t flags) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ uint32_t ret, param = 0;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(name, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ r = validate_request_name_parameters(bus, name, flags, &param);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "RequestName",
+ NULL,
+ &reply,
+ "su",
+ name,
+ param);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "u", &ret);
+ if (r < 0)
+ return r;
+
+ switch (ret) {
+
+ case BUS_NAME_ALREADY_OWNER:
+ return -EALREADY;
+
+ case BUS_NAME_EXISTS:
+ return -EEXIST;
+
+ case BUS_NAME_IN_QUEUE:
+ return 0;
+
+ case BUS_NAME_PRIMARY_OWNER:
+ return 1;
+ }
+
+ return -EIO;
+}
+
+static int default_request_name_handler(
+ sd_bus_message *m,
+ void *userdata,
+ sd_bus_error *ret_error) {
+
+ uint32_t ret;
+ int r;
+
+ assert(m);
+
+ if (sd_bus_message_is_method_error(m, NULL)) {
+ log_debug_errno(sd_bus_message_get_errno(m),
+ "Unable to request name, failing connection: %s",
+ sd_bus_message_get_error(m)->message);
+
+ bus_enter_closing(sd_bus_message_get_bus(m));
+ return 1;
+ }
+
+ r = sd_bus_message_read(m, "u", &ret);
+ if (r < 0)
+ return r;
+
+ switch (ret) {
+
+ case BUS_NAME_ALREADY_OWNER:
+ log_debug("Already owner of requested service name, ignoring.");
+ return 1;
+
+ case BUS_NAME_IN_QUEUE:
+ log_debug("In queue for requested service name.");
+ return 1;
+
+ case BUS_NAME_PRIMARY_OWNER:
+ log_debug("Successfully acquired requested service name.");
+ return 1;
+
+ case BUS_NAME_EXISTS:
+ log_debug("Requested service name already owned, failing connection.");
+ bus_enter_closing(sd_bus_message_get_bus(m));
+ return 1;
+ }
+
+ log_debug("Unexpected response from RequestName(), failing connection.");
+ bus_enter_closing(sd_bus_message_get_bus(m));
+ return 1;
+}
+
+_public_ int sd_bus_request_name_async(
+ sd_bus *bus,
+ sd_bus_slot **ret_slot,
+ const char *name,
+ uint64_t flags,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ uint32_t param = 0;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(name, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ r = validate_request_name_parameters(bus, name, flags, &param);
+ if (r < 0)
+ return r;
+
+ return sd_bus_call_method_async(
+ bus,
+ ret_slot,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "RequestName",
+ callback ?: default_request_name_handler,
+ userdata,
+ "su",
+ name,
+ param);
+}
+
+static int validate_release_name_parameters(
+ sd_bus *bus,
+ const char *name) {
+
+ assert(bus);
+ assert(name);
+
+ assert_return(service_name_is_valid(name), -EINVAL);
+ assert_return(name[0] != ':', -EINVAL);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ /* Don't allow releasing the special driver and local names */
+ if (STR_IN_SET(name, "org.freedesktop.DBus", "org.freedesktop.DBus.Local"))
+ return -EINVAL;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ return 0;
+}
+
+_public_ int sd_bus_release_name(
+ sd_bus *bus,
+ const char *name) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ uint32_t ret;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(name, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ r = validate_release_name_parameters(bus, name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "ReleaseName",
+ NULL,
+ &reply,
+ "s",
+ name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "u", &ret);
+ if (r < 0)
+ return r;
+
+ switch (ret) {
+
+ case BUS_NAME_NON_EXISTENT:
+ return -ESRCH;
+
+ case BUS_NAME_NOT_OWNER:
+ return -EADDRINUSE;
+
+ case BUS_NAME_RELEASED:
+ return 0;
+ }
+
+ return -EIO;
+}
+
+static int default_release_name_handler(
+ sd_bus_message *m,
+ void *userdata,
+ sd_bus_error *ret_error) {
+
+ uint32_t ret;
+ int r;
+
+ assert(m);
+
+ if (sd_bus_message_is_method_error(m, NULL)) {
+ log_debug_errno(sd_bus_message_get_errno(m),
+ "Unable to release name, failing connection: %s",
+ sd_bus_message_get_error(m)->message);
+
+ bus_enter_closing(sd_bus_message_get_bus(m));
+ return 1;
+ }
+
+ r = sd_bus_message_read(m, "u", &ret);
+ if (r < 0)
+ return r;
+
+ switch (ret) {
+
+ case BUS_NAME_NON_EXISTENT:
+ log_debug("Name asked to release is not taken currently, ignoring.");
+ return 1;
+
+ case BUS_NAME_NOT_OWNER:
+ log_debug("Name asked to release is owned by somebody else, ignoring.");
+ return 1;
+
+ case BUS_NAME_RELEASED:
+ log_debug("Name successfully released.");
+ return 1;
+ }
+
+ log_debug("Unexpected response from ReleaseName(), failing connection.");
+ bus_enter_closing(sd_bus_message_get_bus(m));
+ return 1;
+}
+
+_public_ int sd_bus_release_name_async(
+ sd_bus *bus,
+ sd_bus_slot **ret_slot,
+ const char *name,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(name, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ r = validate_release_name_parameters(bus, name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_call_method_async(
+ bus,
+ ret_slot,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "ReleaseName",
+ callback ?: default_release_name_handler,
+ userdata,
+ "s",
+ name);
+}
+
+_public_ int sd_bus_list_names(sd_bus *bus, char ***acquired, char ***activatable) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_strv_free_ char **x = NULL, **y = NULL;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(acquired || activatable, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (acquired) {
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "ListNames",
+ NULL,
+ &reply,
+ NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(reply, &x);
+ if (r < 0)
+ return r;
+
+ reply = sd_bus_message_unref(reply);
+ }
+
+ if (activatable) {
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "ListActivatableNames",
+ NULL,
+ &reply,
+ NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(reply, &y);
+ if (r < 0)
+ return r;
+
+ *activatable = TAKE_PTR(y);
+ }
+
+ if (acquired)
+ *acquired = TAKE_PTR(x);
+
+ return 0;
+}
+
+_public_ int sd_bus_get_name_creds(
+ sd_bus *bus,
+ const char *name,
+ uint64_t mask,
+ sd_bus_creds **creds) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply_unique = NULL, *reply = NULL;
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *c = NULL;
+ const char *unique;
+ pid_t pid = 0;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(name, -EINVAL);
+ assert_return((mask & ~SD_BUS_CREDS_AUGMENT) <= _SD_BUS_CREDS_ALL, -EOPNOTSUPP);
+ assert_return(mask == 0 || creds, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+ assert_return(service_name_is_valid(name), -EINVAL);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ /* Turn off augmenting if this isn't a local connection. If the connection is not local, then /proc is not
+ * going to match. */
+ if (!bus->is_local)
+ mask &= ~SD_BUS_CREDS_AUGMENT;
+
+ if (streq(name, "org.freedesktop.DBus.Local"))
+ return -EINVAL;
+
+ if (streq(name, "org.freedesktop.DBus"))
+ return sd_bus_get_owner_creds(bus, mask, creds);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ /* If the name is unique anyway, we can use it directly */
+ unique = name[0] == ':' ? name : NULL;
+
+ /* Only query the owner if the caller wants to know it and the name is not unique anyway, or if the caller just
+ * wants to check whether a name exists */
+ if ((FLAGS_SET(mask, SD_BUS_CREDS_UNIQUE_NAME) && !unique) || mask == 0) {
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "GetNameOwner",
+ NULL,
+ &reply_unique,
+ "s",
+ name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply_unique, "s", &unique);
+ if (r < 0)
+ return r;
+ }
+
+ if (mask != 0) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ bool need_pid, need_uid, need_selinux, need_separate_calls;
+
+ c = bus_creds_new();
+ if (!c)
+ return -ENOMEM;
+
+ if ((mask & SD_BUS_CREDS_UNIQUE_NAME) && unique) {
+ c->unique_name = strdup(unique);
+ if (!c->unique_name)
+ return -ENOMEM;
+
+ c->mask |= SD_BUS_CREDS_UNIQUE_NAME;
+ }
+
+ need_pid = (mask & SD_BUS_CREDS_PID) ||
+ ((mask & SD_BUS_CREDS_AUGMENT) &&
+ (mask & (SD_BUS_CREDS_UID|SD_BUS_CREDS_SUID|SD_BUS_CREDS_FSUID|
+ SD_BUS_CREDS_GID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_SGID|SD_BUS_CREDS_FSGID|
+ SD_BUS_CREDS_SUPPLEMENTARY_GIDS|
+ SD_BUS_CREDS_COMM|SD_BUS_CREDS_EXE|SD_BUS_CREDS_CMDLINE|
+ SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_OWNER_UID|
+ SD_BUS_CREDS_EFFECTIVE_CAPS|SD_BUS_CREDS_PERMITTED_CAPS|SD_BUS_CREDS_INHERITABLE_CAPS|SD_BUS_CREDS_BOUNDING_CAPS|
+ SD_BUS_CREDS_SELINUX_CONTEXT|
+ SD_BUS_CREDS_AUDIT_SESSION_ID|SD_BUS_CREDS_AUDIT_LOGIN_UID)));
+ need_uid = mask & SD_BUS_CREDS_EUID;
+ need_selinux = mask & SD_BUS_CREDS_SELINUX_CONTEXT;
+
+ if (need_pid + need_uid + need_selinux > 1) {
+
+ /* If we need more than one of the credentials, then use GetConnectionCredentials() */
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "GetConnectionCredentials",
+ &error,
+ &reply,
+ "s",
+ unique ?: name);
+
+ if (r < 0) {
+
+ if (!sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD))
+ return r;
+
+ /* If we got an unknown method error, fall back to the individual calls... */
+ need_separate_calls = true;
+ sd_bus_error_free(&error);
+
+ } else {
+ need_separate_calls = false;
+
+ r = sd_bus_message_enter_container(reply, 'a', "{sv}");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *m;
+
+ r = sd_bus_message_enter_container(reply, 'e', "sv");
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_read(reply, "s", &m);
+ if (r < 0)
+ return r;
+
+ if (need_uid && streq(m, "UnixUserID")) {
+ uint32_t u;
+
+ r = sd_bus_message_read(reply, "v", "u", &u);
+ if (r < 0)
+ return r;
+
+ c->euid = u;
+ c->mask |= SD_BUS_CREDS_EUID;
+
+ } else if (need_pid && streq(m, "ProcessID")) {
+ uint32_t p;
+
+ r = sd_bus_message_read(reply, "v", "u", &p);
+ if (r < 0)
+ return r;
+
+ pid = p;
+ if (mask & SD_BUS_CREDS_PID) {
+ c->pid = p;
+ c->mask |= SD_BUS_CREDS_PID;
+ }
+
+ } else if (need_selinux && streq(m, "LinuxSecurityLabel")) {
+ const void *p = NULL;
+ size_t sz = 0;
+
+ r = sd_bus_message_enter_container(reply, 'v', "ay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_array(reply, 'y', &p, &sz);
+ if (r < 0)
+ return r;
+
+ free(c->label);
+ c->label = strndup(p, sz);
+ if (!c->label)
+ return -ENOMEM;
+
+ c->mask |= SD_BUS_CREDS_SELINUX_CONTEXT;
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return r;
+ } else {
+ r = sd_bus_message_skip(reply, "v");
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return r;
+
+ if (need_pid && pid == 0)
+ return -EPROTO;
+ }
+
+ } else /* When we only need a single field, then let's use separate calls */
+ need_separate_calls = true;
+
+ if (need_separate_calls) {
+ if (need_pid) {
+ uint32_t u;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "GetConnectionUnixProcessID",
+ NULL,
+ &reply,
+ "s",
+ unique ?: name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "u", &u);
+ if (r < 0)
+ return r;
+
+ pid = u;
+ if (mask & SD_BUS_CREDS_PID) {
+ c->pid = u;
+ c->mask |= SD_BUS_CREDS_PID;
+ }
+
+ reply = sd_bus_message_unref(reply);
+ }
+
+ if (need_uid) {
+ uint32_t u;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "GetConnectionUnixUser",
+ NULL,
+ &reply,
+ "s",
+ unique ?: name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "u", &u);
+ if (r < 0)
+ return r;
+
+ c->euid = u;
+ c->mask |= SD_BUS_CREDS_EUID;
+
+ reply = sd_bus_message_unref(reply);
+ }
+
+ if (need_selinux) {
+ const void *p = NULL;
+ size_t sz = 0;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "GetConnectionSELinuxSecurityContext",
+ &error,
+ &reply,
+ "s",
+ unique ?: name);
+ if (r < 0) {
+ if (!sd_bus_error_has_name(&error, "org.freedesktop.DBus.Error.SELinuxSecurityContextUnknown"))
+ return r;
+
+ /* no data is fine */
+ } else {
+ r = sd_bus_message_read_array(reply, 'y', &p, &sz);
+ if (r < 0)
+ return r;
+
+ c->label = memdup_suffix0(p, sz);
+ if (!c->label)
+ return -ENOMEM;
+
+ c->mask |= SD_BUS_CREDS_SELINUX_CONTEXT;
+ }
+ }
+ }
+
+ r = bus_creds_add_more(c, mask, pid, 0);
+ if (r < 0 && r != -ESRCH) /* Return the error, but ignore ESRCH which just means the process is already gone */
+ return r;
+ }
+
+ if (creds)
+ *creds = TAKE_PTR(c);
+
+ return 0;
+}
+
+_public_ int sd_bus_get_owner_creds(sd_bus *bus, uint64_t mask, sd_bus_creds **ret) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *c = NULL;
+ bool do_label, do_groups;
+ pid_t pid = 0;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return((mask & ~SD_BUS_CREDS_AUGMENT) <= _SD_BUS_CREDS_ALL, -EOPNOTSUPP);
+ assert_return(ret, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (!bus->is_local)
+ mask &= ~SD_BUS_CREDS_AUGMENT;
+
+ do_label = bus->label && (mask & SD_BUS_CREDS_SELINUX_CONTEXT);
+ do_groups = bus->n_groups != (size_t) -1 && (mask & SD_BUS_CREDS_SUPPLEMENTARY_GIDS);
+
+ /* Avoid allocating anything if we have no chance of returning useful data */
+ if (!bus->ucred_valid && !do_label && !do_groups)
+ return -ENODATA;
+
+ c = bus_creds_new();
+ if (!c)
+ return -ENOMEM;
+
+ if (bus->ucred_valid) {
+ if (pid_is_valid(bus->ucred.pid)) {
+ pid = c->pid = bus->ucred.pid;
+ c->mask |= SD_BUS_CREDS_PID & mask;
+ }
+
+ if (uid_is_valid(bus->ucred.uid)) {
+ c->euid = bus->ucred.uid;
+ c->mask |= SD_BUS_CREDS_EUID & mask;
+ }
+
+ if (gid_is_valid(bus->ucred.gid)) {
+ c->egid = bus->ucred.gid;
+ c->mask |= SD_BUS_CREDS_EGID & mask;
+ }
+ }
+
+ if (do_label) {
+ c->label = strdup(bus->label);
+ if (!c->label)
+ return -ENOMEM;
+
+ c->mask |= SD_BUS_CREDS_SELINUX_CONTEXT;
+ }
+
+ if (do_groups) {
+ c->supplementary_gids = newdup(gid_t, bus->groups, bus->n_groups);
+ if (!c->supplementary_gids)
+ return -ENOMEM;
+
+ c->n_supplementary_gids = bus->n_groups;
+
+ c->mask |= SD_BUS_CREDS_SUPPLEMENTARY_GIDS;
+ }
+
+ r = bus_creds_add_more(c, mask, pid, 0);
+ if (r < 0 && r != -ESRCH) /* If the process vanished, then don't complain, just return what we got */
+ return r;
+
+ *ret = TAKE_PTR(c);
+
+ return 0;
+}
+
+#define append_eavesdrop(bus, m) \
+ ((bus)->is_monitor \
+ ? (isempty(m) ? "eavesdrop='true'" : strjoina((m), ",eavesdrop='true'")) \
+ : (m))
+
+int bus_add_match_internal(
+ sd_bus *bus,
+ const char *match,
+ uint64_t *ret_counter) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *e;
+ int r;
+
+ assert(bus);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ e = append_eavesdrop(bus, match);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "AddMatch",
+ NULL,
+ &reply,
+ "s",
+ e);
+ if (r < 0)
+ return r;
+
+ /* If the caller asked for it, return the read counter of the reply */
+ if (ret_counter)
+ *ret_counter = reply->read_counter;
+
+ return r;
+}
+
+int bus_add_match_internal_async(
+ sd_bus *bus,
+ sd_bus_slot **ret_slot,
+ const char *match,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ const char *e;
+
+ assert(bus);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ e = append_eavesdrop(bus, match);
+
+ return sd_bus_call_method_async(
+ bus,
+ ret_slot,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "AddMatch",
+ callback,
+ userdata,
+ "s",
+ e);
+}
+
+int bus_remove_match_internal(
+ sd_bus *bus,
+ const char *match) {
+
+ const char *e;
+
+ assert(bus);
+ assert(match);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ e = append_eavesdrop(bus, match);
+
+ /* Fire and forget */
+
+ return sd_bus_call_method_async(
+ bus,
+ NULL,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "RemoveMatch",
+ NULL,
+ NULL,
+ "s",
+ e);
+}
+
+_public_ int sd_bus_get_name_machine_id(sd_bus *bus, const char *name, sd_id128_t *machine) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL, *m = NULL;
+ const char *mid;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(name, -EINVAL);
+ assert_return(machine, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+ assert_return(service_name_is_valid(name), -EINVAL);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (streq_ptr(name, bus->unique_name))
+ return sd_id128_get_machine(machine);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ name,
+ "/",
+ "org.freedesktop.DBus.Peer",
+ "GetMachineId");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_set_auto_start(m, false);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call(bus, m, 0, NULL, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "s", &mid);
+ if (r < 0)
+ return r;
+
+ return sd_id128_from_string(mid, machine);
+}
diff --git a/src/libsystemd/sd-bus/bus-control.h b/src/libsystemd/sd-bus/bus-control.h
new file mode 100644
index 0000000..8182b9c
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-control.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+int bus_add_match_internal(sd_bus *bus, const char *match, uint64_t *ret_counter);
+int bus_add_match_internal_async(sd_bus *bus, sd_bus_slot **ret, const char *match, sd_bus_message_handler_t callback, void *userdata);
+
+int bus_remove_match_internal(sd_bus *bus, const char *match);
diff --git a/src/libsystemd/sd-bus/bus-convenience.c b/src/libsystemd/sd-bus/bus-convenience.c
new file mode 100644
index 0000000..0314642
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-convenience.c
@@ -0,0 +1,788 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+#include <sys/types.h>
+
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-signature.h"
+#include "bus-type.h"
+#include "bus-util.h"
+#include "string-util.h"
+
+_public_ int sd_bus_emit_signalv(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *member,
+ const char *types, va_list ap) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ r = sd_bus_message_new_signal(bus, &m, path, interface, member);
+ if (r < 0)
+ return r;
+
+ if (!isempty(types)) {
+ r = sd_bus_message_appendv(m, types, ap);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+_public_ int sd_bus_emit_signal(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *member,
+ const char *types, ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, types);
+ r = sd_bus_emit_signalv(bus, path, interface, member, types, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_call_method_asyncv(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_message_handler_t callback,
+ void *userdata,
+ const char *types, va_list ap) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ r = sd_bus_message_new_method_call(bus, &m, destination, path, interface, member);
+ if (r < 0)
+ return r;
+
+ if (!isempty(types)) {
+ r = sd_bus_message_appendv(m, types, ap);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_call_async(bus, slot, m, callback, userdata, 0);
+}
+
+_public_ int sd_bus_call_method_async(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_message_handler_t callback,
+ void *userdata,
+ const char *types, ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, types);
+ r = sd_bus_call_method_asyncv(bus, slot, destination, path, interface, member, callback, userdata, types, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_call_methodv(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ sd_bus_message **reply,
+ const char *types, va_list ap) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ bus_assert_return(bus, -EINVAL, error);
+ bus_assert_return(bus = bus_resolve(bus), -ENOPKG, error);
+ bus_assert_return(!bus_pid_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = sd_bus_message_new_method_call(bus, &m, destination, path, interface, member);
+ if (r < 0)
+ goto fail;
+
+ if (!isempty(types)) {
+ r = sd_bus_message_appendv(m, types, ap);
+ if (r < 0)
+ goto fail;
+ }
+
+ return sd_bus_call(bus, m, 0, error, reply);
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_call_method(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ sd_bus_message **reply,
+ const char *types, ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, types);
+ r = sd_bus_call_methodv(bus, destination, path, interface, member, error, reply, types, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_reply_method_returnv(
+ sd_bus_message *call,
+ const char *types, va_list ap) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_pid_changed(call->bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)
+ return 0;
+
+ r = sd_bus_message_new_method_return(call, &m);
+ if (r < 0)
+ return r;
+
+ if (!isempty(types)) {
+ r = sd_bus_message_appendv(m, types, ap);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_send(call->bus, m, NULL);
+}
+
+_public_ int sd_bus_reply_method_return(
+ sd_bus_message *call,
+ const char *types, ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, types);
+ r = sd_bus_reply_method_returnv(call, types, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_reply_method_error(
+ sd_bus_message *call,
+ const sd_bus_error *e) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(sd_bus_error_is_set(e), -EINVAL);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_pid_changed(call->bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)
+ return 0;
+
+ r = sd_bus_message_new_method_error(call, &m, e);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(call->bus, m, NULL);
+}
+
+_public_ int sd_bus_reply_method_errorfv(
+ sd_bus_message *call,
+ const char *name,
+ const char *format,
+ va_list ap) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_pid_changed(call->bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)
+ return 0;
+
+ bus_error_setfv(&error, name, format, ap);
+
+ return sd_bus_reply_method_error(call, &error);
+}
+
+_public_ int sd_bus_reply_method_errorf(
+ sd_bus_message *call,
+ const char *name,
+ const char *format,
+ ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, format);
+ r = sd_bus_reply_method_errorfv(call, name, format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_reply_method_errno(
+ sd_bus_message *call,
+ int error,
+ const sd_bus_error *p) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error berror = SD_BUS_ERROR_NULL;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_pid_changed(call->bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)
+ return 0;
+
+ if (sd_bus_error_is_set(p))
+ return sd_bus_reply_method_error(call, p);
+
+ sd_bus_error_set_errno(&berror, error);
+
+ return sd_bus_reply_method_error(call, &berror);
+}
+
+_public_ int sd_bus_reply_method_errnofv(
+ sd_bus_message *call,
+ int error,
+ const char *format,
+ va_list ap) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error berror = SD_BUS_ERROR_NULL;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_pid_changed(call->bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)
+ return 0;
+
+ sd_bus_error_set_errnofv(&berror, error, format, ap);
+
+ return sd_bus_reply_method_error(call, &berror);
+}
+
+_public_ int sd_bus_reply_method_errnof(
+ sd_bus_message *call,
+ int error,
+ const char *format,
+ ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, format);
+ r = sd_bus_reply_method_errnofv(call, error, format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_get_property(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ sd_bus_message **reply,
+ const char *type) {
+
+ sd_bus_message *rep = NULL;
+ int r;
+
+ bus_assert_return(bus, -EINVAL, error);
+ bus_assert_return(bus = bus_resolve(bus), -ENOPKG, error);
+ bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error);
+ bus_assert_return(member_name_is_valid(member), -EINVAL, error);
+ bus_assert_return(reply, -EINVAL, error);
+ bus_assert_return(signature_is_single(type, false), -EINVAL, error);
+ bus_assert_return(!bus_pid_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = sd_bus_call_method(bus, destination, path,
+ "org.freedesktop.DBus.Properties", "Get",
+ error, &rep,
+ "ss", strempty(interface), member);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(rep, 'v', type);
+ if (r < 0) {
+ sd_bus_message_unref(rep);
+ goto fail;
+ }
+
+ *reply = rep;
+ return 0;
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_get_property_trivial(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ char type, void *ptr) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ bus_assert_return(bus, -EINVAL, error);
+ bus_assert_return(bus = bus_resolve(bus), -ENOPKG, error);
+ bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error);
+ bus_assert_return(member_name_is_valid(member), -EINVAL, error);
+ bus_assert_return(bus_type_is_trivial(type), -EINVAL, error);
+ bus_assert_return(ptr, -EINVAL, error);
+ bus_assert_return(!bus_pid_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = sd_bus_call_method(bus, destination, path, "org.freedesktop.DBus.Properties", "Get", error, &reply, "ss", strempty(interface), member);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(reply, 'v', CHAR_TO_STR(type));
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_read_basic(reply, type, ptr);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_get_property_string(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ char **ret) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *s;
+ char *n;
+ int r;
+
+ bus_assert_return(bus, -EINVAL, error);
+ bus_assert_return(bus = bus_resolve(bus), -ENOPKG, error);
+ bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error);
+ bus_assert_return(member_name_is_valid(member), -EINVAL, error);
+ bus_assert_return(ret, -EINVAL, error);
+ bus_assert_return(!bus_pid_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = sd_bus_call_method(bus, destination, path, "org.freedesktop.DBus.Properties", "Get", error, &reply, "ss", strempty(interface), member);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(reply, 'v', "s");
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_read_basic(reply, 's', &s);
+ if (r < 0)
+ goto fail;
+
+ n = strdup(s);
+ if (!n) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ *ret = n;
+ return 0;
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_get_property_strv(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ char ***ret) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ bus_assert_return(bus, -EINVAL, error);
+ bus_assert_return(bus = bus_resolve(bus), -ENOPKG, error);
+ bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error);
+ bus_assert_return(member_name_is_valid(member), -EINVAL, error);
+ bus_assert_return(ret, -EINVAL, error);
+ bus_assert_return(!bus_pid_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = sd_bus_call_method(bus, destination, path, "org.freedesktop.DBus.Properties", "Get", error, &reply, "ss", strempty(interface), member);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(reply, 'v', NULL);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_read_strv(reply, ret);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_set_propertyv(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ const char *type, va_list ap) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ bus_assert_return(bus, -EINVAL, error);
+ bus_assert_return(bus = bus_resolve(bus), -ENOPKG, error);
+ bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error);
+ bus_assert_return(member_name_is_valid(member), -EINVAL, error);
+ bus_assert_return(signature_is_single(type, false), -EINVAL, error);
+ bus_assert_return(!bus_pid_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = sd_bus_message_new_method_call(bus, &m, destination, path, "org.freedesktop.DBus.Properties", "Set");
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_append(m, "ss", strempty(interface), member);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_open_container(m, 'v', type);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_appendv(m, type, ap);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ goto fail;
+
+ return sd_bus_call(bus, m, 0, error, NULL);
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_set_property(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ const char *type, ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, type);
+ r = sd_bus_set_propertyv(bus, destination, path, interface, member, error, type, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_query_sender_creds(sd_bus_message *call, uint64_t mask, sd_bus_creds **ret) {
+ sd_bus_creds *c;
+ int r;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_pid_changed(call->bus), -ECHILD);
+ assert_return(ret, -EINVAL);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ c = sd_bus_message_get_creds(call);
+
+ /* All data we need? */
+ if (c && (mask & ~c->mask) == 0) {
+ *ret = sd_bus_creds_ref(c);
+ return 0;
+ }
+
+ /* No data passed? Or not enough data passed to retrieve the missing bits? */
+ if (!c || !(c->mask & SD_BUS_CREDS_PID)) {
+ /* We couldn't read anything from the call, let's try
+ * to get it from the sender or peer. */
+
+ if (call->sender)
+ /* There's a sender, but the creds are missing. */
+ return sd_bus_get_name_creds(call->bus, call->sender, mask, ret);
+ else
+ /* There's no sender. For direct connections
+ * the credentials of the AF_UNIX peer matter,
+ * which may be queried via sd_bus_get_owner_creds(). */
+ return sd_bus_get_owner_creds(call->bus, mask, ret);
+ }
+
+ r = bus_creds_extend_by_pid(c, mask, ret);
+ if (r == -ESRCH) {
+ /* Process doesn't exist anymore? propagate the few things we have */
+ *ret = sd_bus_creds_ref(c);
+ return 0;
+ }
+
+ return r;
+}
+
+_public_ int sd_bus_query_sender_privilege(sd_bus_message *call, int capability) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ uid_t our_uid;
+ bool know_caps = false;
+ int r;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_pid_changed(call->bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ if (capability >= 0) {
+
+ r = sd_bus_query_sender_creds(call, SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_EFFECTIVE_CAPS, &creds);
+ if (r < 0)
+ return r;
+
+ /* We cannot use augmented caps for authorization,
+ * since then data is acquired raceful from
+ * /proc. This can never actually happen, but let's
+ * better be safe than sorry, and do an extra check
+ * here. */
+ assert_return((sd_bus_creds_get_augmented_mask(creds) & SD_BUS_CREDS_EFFECTIVE_CAPS) == 0, -EPERM);
+
+ r = sd_bus_creds_has_effective_cap(creds, capability);
+ if (r > 0)
+ return 1;
+ if (r == 0)
+ know_caps = true;
+ } else {
+ r = sd_bus_query_sender_creds(call, SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+ }
+
+ /* Now, check the UID, but only if the capability check wasn't
+ * sufficient */
+ our_uid = getuid();
+ if (our_uid != 0 || !know_caps || capability < 0) {
+ uid_t sender_uid;
+
+ /* We cannot use augmented uid/euid for authorization,
+ * since then data is acquired raceful from
+ * /proc. This can never actually happen, but let's
+ * better be safe than sorry, and do an extra check
+ * here. */
+ assert_return((sd_bus_creds_get_augmented_mask(creds) & (SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID)) == 0, -EPERM);
+
+ /* Try to use the EUID, if we have it. */
+ r = sd_bus_creds_get_euid(creds, &sender_uid);
+ if (r < 0)
+ r = sd_bus_creds_get_uid(creds, &sender_uid);
+
+ if (r >= 0) {
+ /* Sender has same UID as us, then let's grant access */
+ if (sender_uid == our_uid)
+ return 1;
+
+ /* Sender is root, we are not root. */
+ if (our_uid != 0 && sender_uid == 0)
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+#define make_expression(sender, path, interface, member) \
+ strjoina( \
+ "type='signal'", \
+ sender ? ",sender='" : "", \
+ sender ?: "", \
+ sender ? "'" : "", \
+ path ? ",path='" : "", \
+ path ?: "", \
+ path ? "'" : "", \
+ interface ? ",interface='" : "", \
+ interface ?: "", \
+ interface ? "'" : "", \
+ member ? ",member='" : "", \
+ member ?: "", \
+ member ? "'" : "" \
+ )
+
+_public_ int sd_bus_match_signal(
+ sd_bus *bus,
+ sd_bus_slot **ret,
+ const char *sender,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ const char *expression;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+ assert_return(!sender || service_name_is_valid(sender), -EINVAL);
+ assert_return(!path || object_path_is_valid(path), -EINVAL);
+ assert_return(!interface || interface_name_is_valid(interface), -EINVAL);
+ assert_return(!member || member_name_is_valid(member), -EINVAL);
+
+ expression = make_expression(sender, path, interface, member);
+
+ return sd_bus_add_match(bus, ret, expression, callback, userdata);
+}
+
+_public_ int sd_bus_match_signal_async(
+ sd_bus *bus,
+ sd_bus_slot **ret,
+ const char *sender,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_message_handler_t callback,
+ sd_bus_message_handler_t install_callback,
+ void *userdata) {
+
+ const char *expression;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+ assert_return(!sender || service_name_is_valid(sender), -EINVAL);
+ assert_return(!path || object_path_is_valid(path), -EINVAL);
+ assert_return(!interface || interface_name_is_valid(interface), -EINVAL);
+ assert_return(!member || member_name_is_valid(member), -EINVAL);
+
+ expression = make_expression(sender, path, interface, member);
+
+ return sd_bus_add_match_async(bus, ret, expression, callback, install_callback, userdata);
+}
diff --git a/src/libsystemd/sd-bus/bus-creds.c b/src/libsystemd/sd-bus/bus-creds.c
new file mode 100644
index 0000000..3896d94
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-creds.c
@@ -0,0 +1,1340 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/capability.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "audit-util.h"
+#include "bus-creds.h"
+#include "bus-label.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "capability-util.h"
+#include "cgroup-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "hexdecoct.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "user-util.h"
+#include "util.h"
+
+enum {
+ CAP_OFFSET_INHERITABLE = 0,
+ CAP_OFFSET_PERMITTED = 1,
+ CAP_OFFSET_EFFECTIVE = 2,
+ CAP_OFFSET_BOUNDING = 3
+};
+
+void bus_creds_done(sd_bus_creds *c) {
+ assert(c);
+
+ /* For internal bus cred structures that are allocated by
+ * something else */
+
+ free(c->session);
+ free(c->unit);
+ free(c->user_unit);
+ free(c->slice);
+ free(c->user_slice);
+ free(c->unescaped_description);
+ free(c->supplementary_gids);
+ free(c->tty);
+
+ free(c->well_known_names); /* note that this is an strv, but
+ * we only free the array, not the
+ * strings the array points to. The
+ * full strv we only free if
+ * c->allocated is set, see
+ * below. */
+
+ strv_free(c->cmdline_array);
+}
+
+_public_ sd_bus_creds *sd_bus_creds_ref(sd_bus_creds *c) {
+
+ if (!c)
+ return NULL;
+
+ if (c->allocated) {
+ assert(c->n_ref > 0);
+ c->n_ref++;
+ } else {
+ sd_bus_message *m;
+
+ /* If this is an embedded creds structure, then
+ * forward ref counting to the message */
+ m = container_of(c, sd_bus_message, creds);
+ sd_bus_message_ref(m);
+ }
+
+ return c;
+}
+
+_public_ sd_bus_creds *sd_bus_creds_unref(sd_bus_creds *c) {
+
+ if (!c)
+ return NULL;
+
+ if (c->allocated) {
+ assert(c->n_ref > 0);
+ c->n_ref--;
+
+ if (c->n_ref == 0) {
+ free(c->comm);
+ free(c->tid_comm);
+ free(c->exe);
+ free(c->cmdline);
+ free(c->cgroup);
+ free(c->capability);
+ free(c->label);
+ free(c->unique_name);
+ free(c->cgroup_root);
+ free(c->description);
+
+ c->supplementary_gids = mfree(c->supplementary_gids);
+
+ c->well_known_names = strv_free(c->well_known_names);
+
+ bus_creds_done(c);
+
+ free(c);
+ }
+ } else {
+ sd_bus_message *m;
+
+ m = container_of(c, sd_bus_message, creds);
+ sd_bus_message_unref(m);
+ }
+
+ return NULL;
+}
+
+_public_ uint64_t sd_bus_creds_get_mask(const sd_bus_creds *c) {
+ assert_return(c, 0);
+
+ return c->mask;
+}
+
+_public_ uint64_t sd_bus_creds_get_augmented_mask(const sd_bus_creds *c) {
+ assert_return(c, 0);
+
+ return c->augmented;
+}
+
+sd_bus_creds* bus_creds_new(void) {
+ sd_bus_creds *c;
+
+ c = new0(sd_bus_creds, 1);
+ if (!c)
+ return NULL;
+
+ c->allocated = true;
+ c->n_ref = 1;
+ return c;
+}
+
+_public_ int sd_bus_creds_new_from_pid(sd_bus_creds **ret, pid_t pid, uint64_t mask) {
+ sd_bus_creds *c;
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(mask <= _SD_BUS_CREDS_ALL, -EOPNOTSUPP);
+ assert_return(ret, -EINVAL);
+
+ if (pid == 0)
+ pid = getpid_cached();
+
+ c = bus_creds_new();
+ if (!c)
+ return -ENOMEM;
+
+ r = bus_creds_add_more(c, mask | SD_BUS_CREDS_AUGMENT, pid, 0);
+ if (r < 0) {
+ sd_bus_creds_unref(c);
+ return r;
+ }
+
+ /* Check if the process existed at all, in case we haven't
+ * figured that out already */
+ if (!pid_is_alive(pid)) {
+ sd_bus_creds_unref(c);
+ return -ESRCH;
+ }
+
+ *ret = c;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_uid(sd_bus_creds *c, uid_t *uid) {
+ assert_return(c, -EINVAL);
+ assert_return(uid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_UID))
+ return -ENODATA;
+
+ *uid = c->uid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_euid(sd_bus_creds *c, uid_t *euid) {
+ assert_return(c, -EINVAL);
+ assert_return(euid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_EUID))
+ return -ENODATA;
+
+ *euid = c->euid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_suid(sd_bus_creds *c, uid_t *suid) {
+ assert_return(c, -EINVAL);
+ assert_return(suid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_SUID))
+ return -ENODATA;
+
+ *suid = c->suid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_fsuid(sd_bus_creds *c, uid_t *fsuid) {
+ assert_return(c, -EINVAL);
+ assert_return(fsuid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_FSUID))
+ return -ENODATA;
+
+ *fsuid = c->fsuid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_gid(sd_bus_creds *c, gid_t *gid) {
+ assert_return(c, -EINVAL);
+ assert_return(gid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_GID))
+ return -ENODATA;
+
+ *gid = c->gid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_egid(sd_bus_creds *c, gid_t *egid) {
+ assert_return(c, -EINVAL);
+ assert_return(egid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_EGID))
+ return -ENODATA;
+
+ *egid = c->egid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_sgid(sd_bus_creds *c, gid_t *sgid) {
+ assert_return(c, -EINVAL);
+ assert_return(sgid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_SGID))
+ return -ENODATA;
+
+ *sgid = c->sgid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_fsgid(sd_bus_creds *c, gid_t *fsgid) {
+ assert_return(c, -EINVAL);
+ assert_return(fsgid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_FSGID))
+ return -ENODATA;
+
+ *fsgid = c->fsgid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_supplementary_gids(sd_bus_creds *c, const gid_t **gids) {
+ assert_return(c, -EINVAL);
+ assert_return(gids, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_SUPPLEMENTARY_GIDS))
+ return -ENODATA;
+
+ *gids = c->supplementary_gids;
+ return (int) c->n_supplementary_gids;
+}
+
+_public_ int sd_bus_creds_get_pid(sd_bus_creds *c, pid_t *pid) {
+ assert_return(c, -EINVAL);
+ assert_return(pid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_PID))
+ return -ENODATA;
+
+ assert(c->pid > 0);
+ *pid = c->pid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_ppid(sd_bus_creds *c, pid_t *ppid) {
+ assert_return(c, -EINVAL);
+ assert_return(ppid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_PPID))
+ return -ENODATA;
+
+ /* PID 1 has no parent process. Let's distinguish the case of
+ * not knowing and not having a parent process by the returned
+ * error code. */
+ if (c->ppid == 0)
+ return -ENXIO;
+
+ *ppid = c->ppid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_tid(sd_bus_creds *c, pid_t *tid) {
+ assert_return(c, -EINVAL);
+ assert_return(tid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_TID))
+ return -ENODATA;
+
+ assert(c->tid > 0);
+ *tid = c->tid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_selinux_context(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_SELINUX_CONTEXT))
+ return -ENODATA;
+
+ assert(c->label);
+ *ret = c->label;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_comm(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_COMM))
+ return -ENODATA;
+
+ assert(c->comm);
+ *ret = c->comm;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_tid_comm(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_TID_COMM))
+ return -ENODATA;
+
+ assert(c->tid_comm);
+ *ret = c->tid_comm;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_exe(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_EXE))
+ return -ENODATA;
+
+ if (!c->exe)
+ return -ENXIO;
+
+ *ret = c->exe;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_cgroup(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_CGROUP))
+ return -ENODATA;
+
+ assert(c->cgroup);
+ *ret = c->cgroup;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_unit(sd_bus_creds *c, const char **ret) {
+ int r;
+
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_UNIT))
+ return -ENODATA;
+
+ assert(c->cgroup);
+
+ if (!c->unit) {
+ const char *shifted;
+
+ r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted);
+ if (r < 0)
+ return r;
+
+ r = cg_path_get_unit(shifted, (char**) &c->unit);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = c->unit;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_user_unit(sd_bus_creds *c, const char **ret) {
+ int r;
+
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_USER_UNIT))
+ return -ENODATA;
+
+ assert(c->cgroup);
+
+ if (!c->user_unit) {
+ const char *shifted;
+
+ r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted);
+ if (r < 0)
+ return r;
+
+ r = cg_path_get_user_unit(shifted, (char**) &c->user_unit);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = c->user_unit;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_slice(sd_bus_creds *c, const char **ret) {
+ int r;
+
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_SLICE))
+ return -ENODATA;
+
+ assert(c->cgroup);
+
+ if (!c->slice) {
+ const char *shifted;
+
+ r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted);
+ if (r < 0)
+ return r;
+
+ r = cg_path_get_slice(shifted, (char**) &c->slice);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = c->slice;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_user_slice(sd_bus_creds *c, const char **ret) {
+ int r;
+
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_USER_SLICE))
+ return -ENODATA;
+
+ assert(c->cgroup);
+
+ if (!c->user_slice) {
+ const char *shifted;
+
+ r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted);
+ if (r < 0)
+ return r;
+
+ r = cg_path_get_user_slice(shifted, (char**) &c->user_slice);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = c->user_slice;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_session(sd_bus_creds *c, const char **ret) {
+ int r;
+
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_SESSION))
+ return -ENODATA;
+
+ assert(c->cgroup);
+
+ if (!c->session) {
+ const char *shifted;
+
+ r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted);
+ if (r < 0)
+ return r;
+
+ r = cg_path_get_session(shifted, (char**) &c->session);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = c->session;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_owner_uid(sd_bus_creds *c, uid_t *uid) {
+ const char *shifted;
+ int r;
+
+ assert_return(c, -EINVAL);
+ assert_return(uid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_OWNER_UID))
+ return -ENODATA;
+
+ assert(c->cgroup);
+
+ r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_owner_uid(shifted, uid);
+}
+
+_public_ int sd_bus_creds_get_cmdline(sd_bus_creds *c, char ***cmdline) {
+ assert_return(c, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_CMDLINE))
+ return -ENODATA;
+
+ if (!c->cmdline)
+ return -ENXIO;
+
+ if (!c->cmdline_array) {
+ c->cmdline_array = strv_parse_nulstr(c->cmdline, c->cmdline_size);
+ if (!c->cmdline_array)
+ return -ENOMEM;
+ }
+
+ *cmdline = c->cmdline_array;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_audit_session_id(sd_bus_creds *c, uint32_t *sessionid) {
+ assert_return(c, -EINVAL);
+ assert_return(sessionid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_AUDIT_SESSION_ID))
+ return -ENODATA;
+
+ if (!audit_session_is_valid(c->audit_session_id))
+ return -ENXIO;
+
+ *sessionid = c->audit_session_id;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_audit_login_uid(sd_bus_creds *c, uid_t *uid) {
+ assert_return(c, -EINVAL);
+ assert_return(uid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_AUDIT_LOGIN_UID))
+ return -ENODATA;
+
+ if (!uid_is_valid(c->audit_login_uid))
+ return -ENXIO;
+
+ *uid = c->audit_login_uid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_tty(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_TTY))
+ return -ENODATA;
+
+ if (!c->tty)
+ return -ENXIO;
+
+ *ret = c->tty;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_unique_name(sd_bus_creds *c, const char **unique_name) {
+ assert_return(c, -EINVAL);
+ assert_return(unique_name, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_UNIQUE_NAME))
+ return -ENODATA;
+
+ *unique_name = c->unique_name;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_well_known_names(sd_bus_creds *c, char ***well_known_names) {
+ assert_return(c, -EINVAL);
+ assert_return(well_known_names, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_WELL_KNOWN_NAMES))
+ return -ENODATA;
+
+ /* As a special hack we return the bus driver as well-known
+ * names list when this is requested. */
+ if (c->well_known_names_driver) {
+ static const char* const wkn[] = {
+ "org.freedesktop.DBus",
+ NULL
+ };
+
+ *well_known_names = (char**) wkn;
+ return 0;
+ }
+
+ if (c->well_known_names_local) {
+ static const char* const wkn[] = {
+ "org.freedesktop.DBus.Local",
+ NULL
+ };
+
+ *well_known_names = (char**) wkn;
+ return 0;
+ }
+
+ *well_known_names = c->well_known_names;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_description(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_DESCRIPTION))
+ return -ENODATA;
+
+ assert(c->description);
+
+ if (!c->unescaped_description) {
+ c->unescaped_description = bus_label_unescape(c->description);
+ if (!c->unescaped_description)
+ return -ENOMEM;
+ }
+
+ *ret = c->unescaped_description;
+ return 0;
+}
+
+static int has_cap(sd_bus_creds *c, size_t offset, int capability) {
+ size_t sz;
+
+ assert(c);
+ assert(capability >= 0);
+ assert(c->capability);
+
+ unsigned lc = cap_last_cap();
+
+ if ((unsigned) capability > lc)
+ return 0;
+
+ /* If the last cap is 63, then there are 64 caps defined, and we need 2 entries á 32bit hence. *
+ * If the last cap is 64, then there are 65 caps defined, and we need 3 entries á 32bit hence. */
+ sz = DIV_ROUND_UP(lc+1, 32LU);
+
+ return !!(c->capability[offset * sz + CAP_TO_INDEX((uint32_t) capability)] & CAP_TO_MASK_CORRECTED((uint32_t) capability));
+}
+
+_public_ int sd_bus_creds_has_effective_cap(sd_bus_creds *c, int capability) {
+ assert_return(c, -EINVAL);
+ assert_return(capability >= 0, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_EFFECTIVE_CAPS))
+ return -ENODATA;
+
+ return has_cap(c, CAP_OFFSET_EFFECTIVE, capability);
+}
+
+_public_ int sd_bus_creds_has_permitted_cap(sd_bus_creds *c, int capability) {
+ assert_return(c, -EINVAL);
+ assert_return(capability >= 0, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_PERMITTED_CAPS))
+ return -ENODATA;
+
+ return has_cap(c, CAP_OFFSET_PERMITTED, capability);
+}
+
+_public_ int sd_bus_creds_has_inheritable_cap(sd_bus_creds *c, int capability) {
+ assert_return(c, -EINVAL);
+ assert_return(capability >= 0, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_INHERITABLE_CAPS))
+ return -ENODATA;
+
+ return has_cap(c, CAP_OFFSET_INHERITABLE, capability);
+}
+
+_public_ int sd_bus_creds_has_bounding_cap(sd_bus_creds *c, int capability) {
+ assert_return(c, -EINVAL);
+ assert_return(capability >= 0, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_BOUNDING_CAPS))
+ return -ENODATA;
+
+ return has_cap(c, CAP_OFFSET_BOUNDING, capability);
+}
+
+static int parse_caps(sd_bus_creds *c, unsigned offset, const char *p) {
+ size_t sz, max;
+ unsigned i, j;
+
+ assert(c);
+ assert(p);
+
+ max = DIV_ROUND_UP(cap_last_cap()+1, 32U);
+ p += strspn(p, WHITESPACE);
+
+ sz = strlen(p);
+ if (sz % 8 != 0)
+ return -EINVAL;
+
+ sz /= 8;
+ if (sz > max)
+ return -EINVAL;
+
+ if (!c->capability) {
+ c->capability = new0(uint32_t, max * 4);
+ if (!c->capability)
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < sz; i ++) {
+ uint32_t v = 0;
+
+ for (j = 0; j < 8; ++j) {
+ int t;
+
+ t = unhexchar(*p++);
+ if (t < 0)
+ return -EINVAL;
+
+ v = (v << 4) | t;
+ }
+
+ c->capability[offset * max + (sz - i - 1)] = v;
+ }
+
+ return 0;
+}
+
+int bus_creds_add_more(sd_bus_creds *c, uint64_t mask, pid_t pid, pid_t tid) {
+ uint64_t missing;
+ int r;
+
+ assert(c);
+ assert(c->allocated);
+
+ if (!(mask & SD_BUS_CREDS_AUGMENT))
+ return 0;
+
+ /* Try to retrieve PID from creds if it wasn't passed to us */
+ if (pid > 0) {
+ c->pid = pid;
+ c->mask |= SD_BUS_CREDS_PID;
+ } else if (c->mask & SD_BUS_CREDS_PID)
+ pid = c->pid;
+ else
+ /* Without pid we cannot do much... */
+ return 0;
+
+ /* Try to retrieve TID from creds if it wasn't passed to us */
+ if (tid <= 0 && (c->mask & SD_BUS_CREDS_TID))
+ tid = c->tid;
+
+ /* Calculate what we shall and can add */
+ missing = mask & ~(c->mask|SD_BUS_CREDS_PID|SD_BUS_CREDS_TID|SD_BUS_CREDS_UNIQUE_NAME|SD_BUS_CREDS_WELL_KNOWN_NAMES|SD_BUS_CREDS_DESCRIPTION|SD_BUS_CREDS_AUGMENT);
+ if (missing == 0)
+ return 0;
+
+ if (tid > 0) {
+ c->tid = tid;
+ c->mask |= SD_BUS_CREDS_TID;
+ }
+
+ if (missing & (SD_BUS_CREDS_PPID |
+ SD_BUS_CREDS_UID | SD_BUS_CREDS_EUID | SD_BUS_CREDS_SUID | SD_BUS_CREDS_FSUID |
+ SD_BUS_CREDS_GID | SD_BUS_CREDS_EGID | SD_BUS_CREDS_SGID | SD_BUS_CREDS_FSGID |
+ SD_BUS_CREDS_SUPPLEMENTARY_GIDS |
+ SD_BUS_CREDS_EFFECTIVE_CAPS | SD_BUS_CREDS_INHERITABLE_CAPS |
+ SD_BUS_CREDS_PERMITTED_CAPS | SD_BUS_CREDS_BOUNDING_CAPS)) {
+
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *p;
+
+ p = procfs_file_alloca(pid, "status");
+
+ f = fopen(p, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return -ESRCH;
+ else if (!ERRNO_IS_PRIVILEGE(errno))
+ return -errno;
+ } else {
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (missing & SD_BUS_CREDS_PPID) {
+ p = startswith(line, "PPid:");
+ if (p) {
+ p += strspn(p, WHITESPACE);
+
+ /* Explicitly check for PPID 0 (which is the case for PID 1) */
+ if (!streq(p, "0")) {
+ r = parse_pid(p, &c->ppid);
+ if (r < 0)
+ return r;
+
+ } else
+ c->ppid = 0;
+
+ c->mask |= SD_BUS_CREDS_PPID;
+ continue;
+ }
+ }
+
+ if (missing & (SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_SUID|SD_BUS_CREDS_FSUID)) {
+ p = startswith(line, "Uid:");
+ if (p) {
+ unsigned long uid, euid, suid, fsuid;
+
+ p += strspn(p, WHITESPACE);
+ if (sscanf(p, "%lu %lu %lu %lu", &uid, &euid, &suid, &fsuid) != 4)
+ return -EIO;
+
+ if (missing & SD_BUS_CREDS_UID)
+ c->uid = (uid_t) uid;
+ if (missing & SD_BUS_CREDS_EUID)
+ c->euid = (uid_t) euid;
+ if (missing & SD_BUS_CREDS_SUID)
+ c->suid = (uid_t) suid;
+ if (missing & SD_BUS_CREDS_FSUID)
+ c->fsuid = (uid_t) fsuid;
+
+ c->mask |= missing & (SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_SUID|SD_BUS_CREDS_FSUID);
+ continue;
+ }
+ }
+
+ if (missing & (SD_BUS_CREDS_GID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_SGID|SD_BUS_CREDS_FSGID)) {
+ p = startswith(line, "Gid:");
+ if (p) {
+ unsigned long gid, egid, sgid, fsgid;
+
+ p += strspn(p, WHITESPACE);
+ if (sscanf(p, "%lu %lu %lu %lu", &gid, &egid, &sgid, &fsgid) != 4)
+ return -EIO;
+
+ if (missing & SD_BUS_CREDS_GID)
+ c->gid = (gid_t) gid;
+ if (missing & SD_BUS_CREDS_EGID)
+ c->egid = (gid_t) egid;
+ if (missing & SD_BUS_CREDS_SGID)
+ c->sgid = (gid_t) sgid;
+ if (missing & SD_BUS_CREDS_FSGID)
+ c->fsgid = (gid_t) fsgid;
+
+ c->mask |= missing & (SD_BUS_CREDS_GID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_SGID|SD_BUS_CREDS_FSGID);
+ continue;
+ }
+ }
+
+ if (missing & SD_BUS_CREDS_SUPPLEMENTARY_GIDS) {
+ p = startswith(line, "Groups:");
+ if (p) {
+ size_t allocated = 0;
+
+ for (;;) {
+ unsigned long g;
+ int n = 0;
+
+ p += strspn(p, WHITESPACE);
+ if (*p == 0)
+ break;
+
+ if (sscanf(p, "%lu%n", &g, &n) != 1)
+ return -EIO;
+
+ if (!GREEDY_REALLOC(c->supplementary_gids, allocated, c->n_supplementary_gids+1))
+ return -ENOMEM;
+
+ c->supplementary_gids[c->n_supplementary_gids++] = (gid_t) g;
+ p += n;
+ }
+
+ c->mask |= SD_BUS_CREDS_SUPPLEMENTARY_GIDS;
+ continue;
+ }
+ }
+
+ if (missing & SD_BUS_CREDS_EFFECTIVE_CAPS) {
+ p = startswith(line, "CapEff:");
+ if (p) {
+ r = parse_caps(c, CAP_OFFSET_EFFECTIVE, p);
+ if (r < 0)
+ return r;
+
+ c->mask |= SD_BUS_CREDS_EFFECTIVE_CAPS;
+ continue;
+ }
+ }
+
+ if (missing & SD_BUS_CREDS_PERMITTED_CAPS) {
+ p = startswith(line, "CapPrm:");
+ if (p) {
+ r = parse_caps(c, CAP_OFFSET_PERMITTED, p);
+ if (r < 0)
+ return r;
+
+ c->mask |= SD_BUS_CREDS_PERMITTED_CAPS;
+ continue;
+ }
+ }
+
+ if (missing & SD_BUS_CREDS_INHERITABLE_CAPS) {
+ p = startswith(line, "CapInh:");
+ if (p) {
+ r = parse_caps(c, CAP_OFFSET_INHERITABLE, p);
+ if (r < 0)
+ return r;
+
+ c->mask |= SD_BUS_CREDS_INHERITABLE_CAPS;
+ continue;
+ }
+ }
+
+ if (missing & SD_BUS_CREDS_BOUNDING_CAPS) {
+ p = startswith(line, "CapBnd:");
+ if (p) {
+ r = parse_caps(c, CAP_OFFSET_BOUNDING, p);
+ if (r < 0)
+ return r;
+
+ c->mask |= SD_BUS_CREDS_BOUNDING_CAPS;
+ continue;
+ }
+ }
+ }
+ }
+ }
+
+ if (missing & SD_BUS_CREDS_SELINUX_CONTEXT) {
+ const char *p;
+
+ p = procfs_file_alloca(pid, "attr/current");
+ r = read_one_line_file(p, &c->label);
+ if (r < 0) {
+ if (!IN_SET(r, -ENOENT, -EINVAL, -EPERM, -EACCES))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_SELINUX_CONTEXT;
+ }
+
+ if (missing & SD_BUS_CREDS_COMM) {
+ r = get_process_comm(pid, &c->comm);
+ if (r < 0) {
+ if (!ERRNO_IS_PRIVILEGE(r))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_COMM;
+ }
+
+ if (missing & SD_BUS_CREDS_EXE) {
+ r = get_process_exe(pid, &c->exe);
+ if (r == -ESRCH) {
+ /* Unfortunately we cannot really distinguish
+ * the case here where the process does not
+ * exist, and /proc/$PID/exe being unreadable
+ * because $PID is a kernel thread. Hence,
+ * assume it is a kernel thread, and rely on
+ * that this case is caught with a later
+ * call. */
+ c->exe = NULL;
+ c->mask |= SD_BUS_CREDS_EXE;
+ } else if (r < 0) {
+ if (!ERRNO_IS_PRIVILEGE(r))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_EXE;
+ }
+
+ if (missing & SD_BUS_CREDS_CMDLINE) {
+ const char *p;
+
+ p = procfs_file_alloca(pid, "cmdline");
+ r = read_full_file(p, &c->cmdline, &c->cmdline_size);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0) {
+ if (!ERRNO_IS_PRIVILEGE(r))
+ return r;
+ } else {
+ if (c->cmdline_size == 0)
+ c->cmdline = mfree(c->cmdline);
+
+ c->mask |= SD_BUS_CREDS_CMDLINE;
+ }
+ }
+
+ if (tid > 0 && (missing & SD_BUS_CREDS_TID_COMM)) {
+ _cleanup_free_ char *p = NULL;
+
+ if (asprintf(&p, "/proc/"PID_FMT"/task/"PID_FMT"/comm", pid, tid) < 0)
+ return -ENOMEM;
+
+ r = read_one_line_file(p, &c->tid_comm);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0) {
+ if (!ERRNO_IS_PRIVILEGE(r))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_TID_COMM;
+ }
+
+ if (missing & (SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_USER_SLICE|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_OWNER_UID)) {
+
+ if (!c->cgroup) {
+ r = cg_pid_get_path(NULL, pid, &c->cgroup);
+ if (r < 0) {
+ if (!ERRNO_IS_PRIVILEGE(r))
+ return r;
+ }
+ }
+
+ if (!c->cgroup_root) {
+ r = cg_get_root_path(&c->cgroup_root);
+ if (r < 0)
+ return r;
+ }
+
+ if (c->cgroup)
+ c->mask |= missing & (SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_USER_SLICE|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_OWNER_UID);
+ }
+
+ if (missing & SD_BUS_CREDS_AUDIT_SESSION_ID) {
+ r = audit_session_from_pid(pid, &c->audit_session_id);
+ if (r == -ENODATA) {
+ /* ENODATA means: no audit session id assigned */
+ c->audit_session_id = AUDIT_SESSION_INVALID;
+ c->mask |= SD_BUS_CREDS_AUDIT_SESSION_ID;
+ } else if (r < 0) {
+ if (!IN_SET(r, -EOPNOTSUPP, -ENOENT, -EPERM, -EACCES))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_AUDIT_SESSION_ID;
+ }
+
+ if (missing & SD_BUS_CREDS_AUDIT_LOGIN_UID) {
+ r = audit_loginuid_from_pid(pid, &c->audit_login_uid);
+ if (r == -ENODATA) {
+ /* ENODATA means: no audit login uid assigned */
+ c->audit_login_uid = UID_INVALID;
+ c->mask |= SD_BUS_CREDS_AUDIT_LOGIN_UID;
+ } else if (r < 0) {
+ if (!IN_SET(r, -EOPNOTSUPP, -ENOENT, -EPERM, -EACCES))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_AUDIT_LOGIN_UID;
+ }
+
+ if (missing & SD_BUS_CREDS_TTY) {
+ r = get_ctty(pid, NULL, &c->tty);
+ if (r == -ENXIO) {
+ /* ENXIO means: process has no controlling TTY */
+ c->tty = NULL;
+ c->mask |= SD_BUS_CREDS_TTY;
+ } else if (r < 0) {
+ if (!IN_SET(r, -EPERM, -EACCES, -ENOENT))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_TTY;
+ }
+
+ /* In case only the exe path was to be read we cannot
+ * distinguish the case where the exe path was unreadable
+ * because the process was a kernel thread, or when the
+ * process didn't exist at all. Hence, let's do a final check,
+ * to be sure. */
+ if (!pid_is_alive(pid))
+ return -ESRCH;
+
+ if (tid > 0 && tid != pid && !pid_is_unwaited(tid))
+ return -ESRCH;
+
+ c->augmented = missing & c->mask;
+
+ return 0;
+}
+
+int bus_creds_extend_by_pid(sd_bus_creds *c, uint64_t mask, sd_bus_creds **ret) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *n = NULL;
+ int r;
+
+ assert(c);
+ assert(ret);
+
+ if ((mask & ~c->mask) == 0 || (!(mask & SD_BUS_CREDS_AUGMENT))) {
+ /* There's already all data we need, or augmentation
+ * wasn't turned on. */
+
+ *ret = sd_bus_creds_ref(c);
+ return 0;
+ }
+
+ n = bus_creds_new();
+ if (!n)
+ return -ENOMEM;
+
+ /* Copy the original data over */
+
+ if (c->mask & mask & SD_BUS_CREDS_PID) {
+ n->pid = c->pid;
+ n->mask |= SD_BUS_CREDS_PID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_TID) {
+ n->tid = c->tid;
+ n->mask |= SD_BUS_CREDS_TID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_PPID) {
+ n->ppid = c->ppid;
+ n->mask |= SD_BUS_CREDS_PPID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_UID) {
+ n->uid = c->uid;
+ n->mask |= SD_BUS_CREDS_UID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_EUID) {
+ n->euid = c->euid;
+ n->mask |= SD_BUS_CREDS_EUID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_SUID) {
+ n->suid = c->suid;
+ n->mask |= SD_BUS_CREDS_SUID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_FSUID) {
+ n->fsuid = c->fsuid;
+ n->mask |= SD_BUS_CREDS_FSUID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_GID) {
+ n->gid = c->gid;
+ n->mask |= SD_BUS_CREDS_GID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_EGID) {
+ n->egid = c->egid;
+ n->mask |= SD_BUS_CREDS_EGID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_SGID) {
+ n->sgid = c->sgid;
+ n->mask |= SD_BUS_CREDS_SGID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_FSGID) {
+ n->fsgid = c->fsgid;
+ n->mask |= SD_BUS_CREDS_FSGID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_SUPPLEMENTARY_GIDS) {
+ if (c->supplementary_gids) {
+ n->supplementary_gids = newdup(gid_t, c->supplementary_gids, c->n_supplementary_gids);
+ if (!n->supplementary_gids)
+ return -ENOMEM;
+ n->n_supplementary_gids = c->n_supplementary_gids;
+ } else {
+ n->supplementary_gids = NULL;
+ n->n_supplementary_gids = 0;
+ }
+
+ n->mask |= SD_BUS_CREDS_SUPPLEMENTARY_GIDS;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_COMM) {
+ assert(c->comm);
+
+ n->comm = strdup(c->comm);
+ if (!n->comm)
+ return -ENOMEM;
+
+ n->mask |= SD_BUS_CREDS_COMM;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_TID_COMM) {
+ assert(c->tid_comm);
+
+ n->tid_comm = strdup(c->tid_comm);
+ if (!n->tid_comm)
+ return -ENOMEM;
+
+ n->mask |= SD_BUS_CREDS_TID_COMM;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_EXE) {
+ if (c->exe) {
+ n->exe = strdup(c->exe);
+ if (!n->exe)
+ return -ENOMEM;
+ } else
+ n->exe = NULL;
+
+ n->mask |= SD_BUS_CREDS_EXE;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_CMDLINE) {
+ if (c->cmdline) {
+ n->cmdline = memdup(c->cmdline, c->cmdline_size);
+ if (!n->cmdline)
+ return -ENOMEM;
+
+ n->cmdline_size = c->cmdline_size;
+ } else {
+ n->cmdline = NULL;
+ n->cmdline_size = 0;
+ }
+
+ n->mask |= SD_BUS_CREDS_CMDLINE;
+ }
+
+ if (c->mask & mask & (SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_USER_SLICE|SD_BUS_CREDS_OWNER_UID)) {
+ assert(c->cgroup);
+
+ n->cgroup = strdup(c->cgroup);
+ if (!n->cgroup)
+ return -ENOMEM;
+
+ n->cgroup_root = strdup(c->cgroup_root);
+ if (!n->cgroup_root)
+ return -ENOMEM;
+
+ n->mask |= mask & (SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_USER_SLICE|SD_BUS_CREDS_OWNER_UID);
+ }
+
+ if (c->mask & mask & (SD_BUS_CREDS_EFFECTIVE_CAPS|SD_BUS_CREDS_PERMITTED_CAPS|SD_BUS_CREDS_INHERITABLE_CAPS|SD_BUS_CREDS_BOUNDING_CAPS)) {
+ assert(c->capability);
+
+ n->capability = memdup(c->capability, DIV_ROUND_UP(cap_last_cap()+1, 32U) * 4 * 4);
+ if (!n->capability)
+ return -ENOMEM;
+
+ n->mask |= c->mask & mask & (SD_BUS_CREDS_EFFECTIVE_CAPS|SD_BUS_CREDS_PERMITTED_CAPS|SD_BUS_CREDS_INHERITABLE_CAPS|SD_BUS_CREDS_BOUNDING_CAPS);
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_SELINUX_CONTEXT) {
+ assert(c->label);
+
+ n->label = strdup(c->label);
+ if (!n->label)
+ return -ENOMEM;
+ n->mask |= SD_BUS_CREDS_SELINUX_CONTEXT;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_AUDIT_SESSION_ID) {
+ n->audit_session_id = c->audit_session_id;
+ n->mask |= SD_BUS_CREDS_AUDIT_SESSION_ID;
+ }
+ if (c->mask & mask & SD_BUS_CREDS_AUDIT_LOGIN_UID) {
+ n->audit_login_uid = c->audit_login_uid;
+ n->mask |= SD_BUS_CREDS_AUDIT_LOGIN_UID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_TTY) {
+ if (c->tty) {
+ n->tty = strdup(c->tty);
+ if (!n->tty)
+ return -ENOMEM;
+ } else
+ n->tty = NULL;
+ n->mask |= SD_BUS_CREDS_TTY;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_UNIQUE_NAME) {
+ assert(c->unique_name);
+
+ n->unique_name = strdup(c->unique_name);
+ if (!n->unique_name)
+ return -ENOMEM;
+ n->mask |= SD_BUS_CREDS_UNIQUE_NAME;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_WELL_KNOWN_NAMES) {
+ if (strv_isempty(c->well_known_names))
+ n->well_known_names = NULL;
+ else {
+ n->well_known_names = strv_copy(c->well_known_names);
+ if (!n->well_known_names)
+ return -ENOMEM;
+ }
+ n->well_known_names_driver = c->well_known_names_driver;
+ n->well_known_names_local = c->well_known_names_local;
+ n->mask |= SD_BUS_CREDS_WELL_KNOWN_NAMES;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_DESCRIPTION) {
+ assert(c->description);
+ n->description = strdup(c->description);
+ if (!n->description)
+ return -ENOMEM;
+ n->mask |= SD_BUS_CREDS_DESCRIPTION;
+ }
+
+ n->augmented = c->augmented & n->mask;
+
+ /* Get more data */
+
+ r = bus_creds_add_more(n, mask, 0, 0);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(n);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/bus-creds.h b/src/libsystemd/sd-bus/bus-creds.h
new file mode 100644
index 0000000..7806d9e
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-creds.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-bus.h"
+
+struct sd_bus_creds {
+ bool allocated;
+ unsigned n_ref;
+
+ uint64_t mask;
+ uint64_t augmented;
+
+ uid_t uid;
+ uid_t euid;
+ uid_t suid;
+ uid_t fsuid;
+ gid_t gid;
+ gid_t egid;
+ gid_t sgid;
+ gid_t fsgid;
+
+ gid_t *supplementary_gids;
+ unsigned n_supplementary_gids;
+
+ pid_t ppid;
+ pid_t pid;
+ pid_t tid;
+
+ char *comm;
+ char *tid_comm;
+ char *exe;
+
+ char *cmdline;
+ size_t cmdline_size;
+ char **cmdline_array;
+
+ char *cgroup;
+ char *session;
+ char *unit;
+ char *user_unit;
+ char *slice;
+ char *user_slice;
+
+ char *tty;
+
+ uint32_t *capability;
+
+ uint32_t audit_session_id;
+ uid_t audit_login_uid;
+
+ char *label;
+
+ char *unique_name;
+
+ char **well_known_names;
+ bool well_known_names_driver:1;
+ bool well_known_names_local:1;
+
+ char *cgroup_root;
+
+ char *description, *unescaped_description;
+};
+
+sd_bus_creds* bus_creds_new(void);
+
+void bus_creds_done(sd_bus_creds *c);
+
+int bus_creds_add_more(sd_bus_creds *c, uint64_t mask, pid_t pid, pid_t tid);
+
+int bus_creds_extend_by_pid(sd_bus_creds *c, uint64_t mask, sd_bus_creds **ret);
diff --git a/src/libsystemd/sd-bus/bus-dump.c b/src/libsystemd/sd-bus/bus-dump.c
new file mode 100644
index 0000000..3ff87be
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-dump.c
@@ -0,0 +1,592 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/time.h>
+
+#include "alloc-util.h"
+#include "bus-dump.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-type.h"
+#include "cap-list.h"
+#include "capability-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static char *indent(unsigned level, uint64_t flags) {
+ char *p;
+ unsigned n, i = 0;
+
+ n = 0;
+
+ if (flags & SD_BUS_MESSAGE_DUMP_SUBTREE_ONLY && level > 0)
+ level -= 1;
+
+ if (flags & SD_BUS_MESSAGE_DUMP_WITH_HEADER)
+ n += 2;
+
+ p = new(char, n + level*8 + 1);
+ if (!p)
+ return NULL;
+
+ if (flags & SD_BUS_MESSAGE_DUMP_WITH_HEADER) {
+ p[i++] = ' ';
+ p[i++] = ' ';
+ }
+
+ memset(p + i, ' ', level*8);
+ p[i + level*8] = 0;
+
+ return p;
+}
+
+_public_ int sd_bus_message_dump(sd_bus_message *m, FILE *f, uint64_t flags) {
+ unsigned level = 1;
+ int r;
+
+ assert(m);
+
+ if (!f)
+ f = stdout;
+
+ if (flags & SD_BUS_MESSAGE_DUMP_WITH_HEADER) {
+ fprintf(f,
+ "%s%s%s Type=%s%s%s Endian=%c Flags=%u Version=%u",
+ m->header->type == SD_BUS_MESSAGE_METHOD_ERROR ? ansi_highlight_red() :
+ m->header->type == SD_BUS_MESSAGE_METHOD_RETURN ? ansi_highlight_green() :
+ m->header->type != SD_BUS_MESSAGE_SIGNAL ? ansi_highlight() : "",
+ special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET),
+ ansi_normal(),
+
+ ansi_highlight(),
+ bus_message_type_to_string(m->header->type) ?: "(unknown)",
+ ansi_normal(),
+
+ m->header->endian,
+ m->header->flags,
+ m->header->version);
+
+ /* Display synthetic message serial number in a more readable
+ * format than (uint32_t) -1 */
+ if (BUS_MESSAGE_COOKIE(m) == 0xFFFFFFFFULL)
+ fprintf(f, " Cookie=-1");
+ else
+ fprintf(f, " Cookie=%" PRIu64, BUS_MESSAGE_COOKIE(m));
+
+ if (m->reply_cookie != 0)
+ fprintf(f, " ReplyCookie=%" PRIu64, m->reply_cookie);
+
+ fputs("\n", f);
+
+ if (m->sender)
+ fprintf(f, " Sender=%s%s%s", ansi_highlight(), m->sender, ansi_normal());
+ if (m->destination)
+ fprintf(f, " Destination=%s%s%s", ansi_highlight(), m->destination, ansi_normal());
+ if (m->path)
+ fprintf(f, " Path=%s%s%s", ansi_highlight(), m->path, ansi_normal());
+ if (m->interface)
+ fprintf(f, " Interface=%s%s%s", ansi_highlight(), m->interface, ansi_normal());
+ if (m->member)
+ fprintf(f, " Member=%s%s%s", ansi_highlight(), m->member, ansi_normal());
+
+ if (m->sender || m->destination || m->path || m->interface || m->member)
+ fputs("\n", f);
+
+ if (sd_bus_error_is_set(&m->error))
+ fprintf(f,
+ " ErrorName=%s%s%s"
+ " ErrorMessage=%s\"%s\"%s\n",
+ ansi_highlight_red(), strna(m->error.name), ansi_normal(),
+ ansi_highlight_red(), strna(m->error.message), ansi_normal());
+
+ if (m->monotonic != 0)
+ fprintf(f, " Monotonic="USEC_FMT, m->monotonic);
+ if (m->realtime != 0)
+ fprintf(f, " Realtime="USEC_FMT, m->realtime);
+ if (m->seqnum != 0)
+ fprintf(f, " SequenceNumber=%"PRIu64, m->seqnum);
+
+ if (m->monotonic != 0 || m->realtime != 0 || m->seqnum != 0)
+ fputs("\n", f);
+
+ bus_creds_dump(&m->creds, f, true);
+ }
+
+ r = sd_bus_message_rewind(m, !(flags & SD_BUS_MESSAGE_DUMP_SUBTREE_ONLY));
+ if (r < 0)
+ return log_error_errno(r, "Failed to rewind: %m");
+
+ if (!(flags & SD_BUS_MESSAGE_DUMP_SUBTREE_ONLY)) {
+ _cleanup_free_ char *prefix = NULL;
+
+ prefix = indent(0, flags);
+ if (!prefix)
+ return log_oom();
+
+ fprintf(f, "%sMESSAGE \"%s\" {\n", prefix, strempty(m->root_container.signature));
+ }
+
+ for (;;) {
+ _cleanup_free_ char *prefix = NULL;
+ const char *contents = NULL;
+ char type;
+ union {
+ uint8_t u8;
+ uint16_t u16;
+ int16_t s16;
+ uint32_t u32;
+ int32_t s32;
+ uint64_t u64;
+ int64_t s64;
+ double d64;
+ const char *string;
+ int i;
+ } basic;
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return log_error_errno(r, "Failed to peek type: %m");
+
+ if (r == 0) {
+ if (level <= 1)
+ break;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to exit container: %m");
+
+ level--;
+
+ prefix = indent(level, flags);
+ if (!prefix)
+ return log_oom();
+
+ fprintf(f, "%s};\n", prefix);
+ continue;
+ }
+
+ prefix = indent(level, flags);
+ if (!prefix)
+ return log_oom();
+
+ if (bus_type_is_container(type) > 0) {
+ r = sd_bus_message_enter_container(m, type, contents);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enter container: %m");
+
+ if (type == SD_BUS_TYPE_ARRAY)
+ fprintf(f, "%sARRAY \"%s\" {\n", prefix, contents);
+ else if (type == SD_BUS_TYPE_VARIANT)
+ fprintf(f, "%sVARIANT \"%s\" {\n", prefix, contents);
+ else if (type == SD_BUS_TYPE_STRUCT)
+ fprintf(f, "%sSTRUCT \"%s\" {\n", prefix, contents);
+ else if (type == SD_BUS_TYPE_DICT_ENTRY)
+ fprintf(f, "%sDICT_ENTRY \"%s\" {\n", prefix, contents);
+
+ level++;
+
+ continue;
+ }
+
+ r = sd_bus_message_read_basic(m, type, &basic);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get basic: %m");
+
+ assert(r > 0);
+
+ switch (type) {
+
+ case SD_BUS_TYPE_BYTE:
+ fprintf(f, "%sBYTE %s%u%s;\n", prefix, ansi_highlight(), basic.u8, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_BOOLEAN:
+ fprintf(f, "%sBOOLEAN %s%s%s;\n", prefix, ansi_highlight(), true_false(basic.i), ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_INT16:
+ fprintf(f, "%sINT16 %s%i%s;\n", prefix, ansi_highlight(), basic.s16, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_UINT16:
+ fprintf(f, "%sUINT16 %s%u%s;\n", prefix, ansi_highlight(), basic.u16, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_INT32:
+ fprintf(f, "%sINT32 %s%i%s;\n", prefix, ansi_highlight(), basic.s32, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_UINT32:
+ fprintf(f, "%sUINT32 %s%u%s;\n", prefix, ansi_highlight(), basic.u32, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_INT64:
+ fprintf(f, "%sINT64 %s%"PRIi64"%s;\n", prefix, ansi_highlight(), basic.s64, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_UINT64:
+ fprintf(f, "%sUINT64 %s%"PRIu64"%s;\n", prefix, ansi_highlight(), basic.u64, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_DOUBLE:
+ fprintf(f, "%sDOUBLE %s%g%s;\n", prefix, ansi_highlight(), basic.d64, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_STRING:
+ fprintf(f, "%sSTRING \"%s%s%s\";\n", prefix, ansi_highlight(), basic.string, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_OBJECT_PATH:
+ fprintf(f, "%sOBJECT_PATH \"%s%s%s\";\n", prefix, ansi_highlight(), basic.string, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_SIGNATURE:
+ fprintf(f, "%sSIGNATURE \"%s%s%s\";\n", prefix, ansi_highlight(), basic.string, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_UNIX_FD:
+ fprintf(f, "%sUNIX_FD %s%i%s;\n", prefix, ansi_highlight(), basic.i, ansi_normal());
+ break;
+
+ default:
+ assert_not_reached("Unknown basic type.");
+ }
+ }
+
+ if (!(flags & SD_BUS_MESSAGE_DUMP_SUBTREE_ONLY)) {
+ _cleanup_free_ char *prefix = NULL;
+
+ prefix = indent(0, flags);
+ if (!prefix)
+ return log_oom();
+
+ fprintf(f, "%s};\n\n", prefix);
+ }
+
+ return 0;
+}
+
+static void dump_capabilities(
+ sd_bus_creds *c,
+ FILE *f,
+ const char *name,
+ bool terse,
+ int (*has)(sd_bus_creds *c, int capability)) {
+
+ unsigned long i, last_cap;
+ unsigned n = 0;
+ int r;
+
+ assert(c);
+ assert(f);
+ assert(name);
+ assert(has);
+
+ i = 0;
+ r = has(c, i);
+ if (r < 0)
+ return;
+
+ fprintf(f, "%s%s=%s", terse ? " " : "", name, terse ? "" : ansi_highlight());
+ last_cap = cap_last_cap();
+
+ for (;;) {
+ if (r > 0) {
+
+ if (n > 0)
+ fputc(' ', f);
+ if (n % 4 == 3)
+ fprintf(f, terse ? "\n " : "\n ");
+
+ fprintf(f, "%s", strna(capability_to_name(i)));
+ n++;
+ }
+
+ i++;
+
+ if (i > last_cap)
+ break;
+
+ r = has(c, i);
+ }
+
+ fputs("\n", f);
+
+ if (!terse)
+ fputs(ansi_normal(), f);
+}
+
+int bus_creds_dump(sd_bus_creds *c, FILE *f, bool terse) {
+ uid_t owner, audit_loginuid;
+ uint32_t audit_sessionid;
+ char **cmdline = NULL, **well_known = NULL;
+ const char *prefix, *color, *suffix, *s;
+ int r, q, v, w, z;
+
+ assert(c);
+
+ if (!f)
+ f = stdout;
+
+ if (terse) {
+ prefix = " ";
+ suffix = "";
+ color = "";
+ } else {
+ const char *off;
+
+ prefix = "";
+ color = ansi_highlight();
+
+ off = ansi_normal();
+ suffix = strjoina(off, "\n");
+ }
+
+ if (c->mask & SD_BUS_CREDS_PID)
+ fprintf(f, "%sPID=%s"PID_FMT"%s", prefix, color, c->pid, suffix);
+ if (c->mask & SD_BUS_CREDS_TID)
+ fprintf(f, "%sTID=%s"PID_FMT"%s", prefix, color, c->tid, suffix);
+ if (c->mask & SD_BUS_CREDS_PPID) {
+ if (c->ppid == 0)
+ fprintf(f, "%sPPID=%sn/a%s", prefix, color, suffix);
+ else
+ fprintf(f, "%sPPID=%s"PID_FMT"%s", prefix, color, c->ppid, suffix);
+ }
+ if (c->mask & SD_BUS_CREDS_TTY)
+ fprintf(f, "%sTTY=%s%s%s", prefix, color, strna(c->tty), suffix);
+
+ if (terse && ((c->mask & (SD_BUS_CREDS_PID|SD_BUS_CREDS_TID|SD_BUS_CREDS_PPID|SD_BUS_CREDS_TTY))))
+ fputs("\n", f);
+
+ if (c->mask & SD_BUS_CREDS_UID)
+ fprintf(f, "%sUID=%s"UID_FMT"%s", prefix, color, c->uid, suffix);
+ if (c->mask & SD_BUS_CREDS_EUID)
+ fprintf(f, "%sEUID=%s"UID_FMT"%s", prefix, color, c->euid, suffix);
+ if (c->mask & SD_BUS_CREDS_SUID)
+ fprintf(f, "%sSUID=%s"UID_FMT"%s", prefix, color, c->suid, suffix);
+ if (c->mask & SD_BUS_CREDS_FSUID)
+ fprintf(f, "%sFSUID=%s"UID_FMT"%s", prefix, color, c->fsuid, suffix);
+ r = sd_bus_creds_get_owner_uid(c, &owner);
+ if (r >= 0)
+ fprintf(f, "%sOwnerUID=%s"UID_FMT"%s", prefix, color, owner, suffix);
+ if (c->mask & SD_BUS_CREDS_GID)
+ fprintf(f, "%sGID=%s"GID_FMT"%s", prefix, color, c->gid, suffix);
+ if (c->mask & SD_BUS_CREDS_EGID)
+ fprintf(f, "%sEGID=%s"GID_FMT"%s", prefix, color, c->egid, suffix);
+ if (c->mask & SD_BUS_CREDS_SGID)
+ fprintf(f, "%sSGID=%s"GID_FMT"%s", prefix, color, c->sgid, suffix);
+ if (c->mask & SD_BUS_CREDS_FSGID)
+ fprintf(f, "%sFSGID=%s"GID_FMT"%s", prefix, color, c->fsgid, suffix);
+
+ if (c->mask & SD_BUS_CREDS_SUPPLEMENTARY_GIDS) {
+ unsigned i;
+
+ fprintf(f, "%sSupplementaryGIDs=%s", prefix, color);
+ for (i = 0; i < c->n_supplementary_gids; i++)
+ fprintf(f, "%s" GID_FMT, i > 0 ? " " : "", c->supplementary_gids[i]);
+ fprintf(f, "%s", suffix);
+ }
+
+ if (terse && ((c->mask & (SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_SUID|SD_BUS_CREDS_FSUID|
+ SD_BUS_CREDS_GID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_SGID|SD_BUS_CREDS_FSGID|
+ SD_BUS_CREDS_SUPPLEMENTARY_GIDS)) || r >= 0))
+ fputs("\n", f);
+
+ if (c->mask & SD_BUS_CREDS_COMM)
+ fprintf(f, "%sComm=%s%s%s", prefix, color, c->comm, suffix);
+ if (c->mask & SD_BUS_CREDS_TID_COMM)
+ fprintf(f, "%sTIDComm=%s%s%s", prefix, color, c->tid_comm, suffix);
+ if (c->mask & SD_BUS_CREDS_EXE)
+ fprintf(f, "%sExe=%s%s%s", prefix, color, strna(c->exe), suffix);
+
+ if (terse && (c->mask & (SD_BUS_CREDS_EXE|SD_BUS_CREDS_COMM|SD_BUS_CREDS_TID_COMM)))
+ fputs("\n", f);
+
+ r = sd_bus_creds_get_cmdline(c, &cmdline);
+ if (r >= 0) {
+ char **i;
+
+ fprintf(f, "%sCommandLine=%s", prefix, color);
+ STRV_FOREACH(i, cmdline) {
+ if (i != cmdline)
+ fputc(' ', f);
+
+ fputs(*i, f);
+ }
+
+ fprintf(f, "%s", suffix);
+ } else if (r != -ENODATA)
+ fprintf(f, "%sCommandLine=%sn/a%s", prefix, color, suffix);
+
+ if (c->mask & SD_BUS_CREDS_SELINUX_CONTEXT)
+ fprintf(f, "%sLabel=%s%s%s", prefix, color, c->label, suffix);
+ if (c->mask & SD_BUS_CREDS_DESCRIPTION)
+ fprintf(f, "%sDescription=%s%s%s", prefix, color, c->description, suffix);
+
+ if (terse && (c->mask & (SD_BUS_CREDS_SELINUX_CONTEXT|SD_BUS_CREDS_DESCRIPTION)))
+ fputs("\n", f);
+
+ if (c->mask & SD_BUS_CREDS_CGROUP)
+ fprintf(f, "%sCGroup=%s%s%s", prefix, color, c->cgroup, suffix);
+ s = NULL;
+ r = sd_bus_creds_get_unit(c, &s);
+ if (r != -ENODATA)
+ fprintf(f, "%sUnit=%s%s%s", prefix, color, strna(s), suffix);
+ s = NULL;
+ v = sd_bus_creds_get_slice(c, &s);
+ if (v != -ENODATA)
+ fprintf(f, "%sSlice=%s%s%s", prefix, color, strna(s), suffix);
+ s = NULL;
+ q = sd_bus_creds_get_user_unit(c, &s);
+ if (q != -ENODATA)
+ fprintf(f, "%sUserUnit=%s%s%s", prefix, color, strna(s), suffix);
+ s = NULL;
+ w = sd_bus_creds_get_user_slice(c, &s);
+ if (w != -ENODATA)
+ fprintf(f, "%sUserSlice=%s%s%s", prefix, color, strna(s), suffix);
+ s = NULL;
+ z = sd_bus_creds_get_session(c, &s);
+ if (z != -ENODATA)
+ fprintf(f, "%sSession=%s%s%s", prefix, color, strna(s), suffix);
+
+ if (terse && ((c->mask & SD_BUS_CREDS_CGROUP) || r != -ENODATA || q != -ENODATA || v != -ENODATA || w != -ENODATA || z != -ENODATA))
+ fputs("\n", f);
+
+ r = sd_bus_creds_get_audit_login_uid(c, &audit_loginuid);
+ if (r >= 0)
+ fprintf(f, "%sAuditLoginUID=%s"UID_FMT"%s", prefix, color, audit_loginuid, suffix);
+ else if (r != -ENODATA)
+ fprintf(f, "%sAuditLoginUID=%sn/a%s", prefix, color, suffix);
+ q = sd_bus_creds_get_audit_session_id(c, &audit_sessionid);
+ if (q >= 0)
+ fprintf(f, "%sAuditSessionID=%s%"PRIu32"%s", prefix, color, audit_sessionid, suffix);
+ else if (q != -ENODATA)
+ fprintf(f, "%sAuditSessionID=%sn/a%s", prefix, color, suffix);
+
+ if (terse && (r != -ENODATA || q != -ENODATA))
+ fputs("\n", f);
+
+ if (c->mask & SD_BUS_CREDS_UNIQUE_NAME)
+ fprintf(f, "%sUniqueName=%s%s%s", prefix, color, c->unique_name, suffix);
+
+ if (sd_bus_creds_get_well_known_names(c, &well_known) >= 0) {
+ char **i;
+
+ fprintf(f, "%sWellKnownNames=%s", prefix, color);
+ STRV_FOREACH(i, well_known) {
+ if (i != well_known)
+ fputc(' ', f);
+
+ fputs(*i, f);
+ }
+
+ fprintf(f, "%s", suffix);
+ }
+
+ if (terse && (c->mask & SD_BUS_CREDS_UNIQUE_NAME || well_known))
+ fputc('\n', f);
+
+ dump_capabilities(c, f, "EffectiveCapabilities", terse, sd_bus_creds_has_effective_cap);
+ dump_capabilities(c, f, "PermittedCapabilities", terse, sd_bus_creds_has_permitted_cap);
+ dump_capabilities(c, f, "InheritableCapabilities", terse, sd_bus_creds_has_inheritable_cap);
+ dump_capabilities(c, f, "BoundingCapabilities", terse, sd_bus_creds_has_bounding_cap);
+
+ return 0;
+}
+
+/*
+ * For details about the file format, see:
+ *
+ * http://wiki.wireshark.org/Development/LibpcapFileFormat
+ */
+
+typedef struct _packed_ pcap_hdr_s {
+ uint32_t magic_number; /* magic number */
+ uint16_t version_major; /* major version number */
+ uint16_t version_minor; /* minor version number */
+ int32_t thiszone; /* GMT to local correction */
+ uint32_t sigfigs; /* accuracy of timestamps */
+ uint32_t snaplen; /* max length of captured packets, in octets */
+ uint32_t network; /* data link type */
+} pcap_hdr_t ;
+
+typedef struct _packed_ pcaprec_hdr_s {
+ uint32_t ts_sec; /* timestamp seconds */
+ uint32_t ts_usec; /* timestamp microseconds */
+ uint32_t incl_len; /* number of octets of packet saved in file */
+ uint32_t orig_len; /* actual length of packet */
+} pcaprec_hdr_t;
+
+int bus_pcap_header(size_t snaplen, FILE *f) {
+
+ pcap_hdr_t hdr = {
+ .magic_number = 0xa1b2c3d4U,
+ .version_major = 2,
+ .version_minor = 4,
+ .thiszone = 0, /* UTC */
+ .sigfigs = 0,
+ .network = 231, /* D-Bus */
+ };
+
+ if (!f)
+ f = stdout;
+
+ assert(snaplen > 0);
+ assert((size_t) (uint32_t) snaplen == snaplen);
+
+ hdr.snaplen = (uint32_t) snaplen;
+
+ fwrite(&hdr, 1, sizeof(hdr), f);
+
+ return fflush_and_check(f);
+}
+
+int bus_message_pcap_frame(sd_bus_message *m, size_t snaplen, FILE *f) {
+ struct bus_body_part *part;
+ pcaprec_hdr_t hdr = {};
+ struct timeval tv;
+ unsigned i;
+ size_t w;
+
+ if (!f)
+ f = stdout;
+
+ assert(m);
+ assert(snaplen > 0);
+ assert((size_t) (uint32_t) snaplen == snaplen);
+
+ if (m->realtime != 0)
+ timeval_store(&tv, m->realtime);
+ else
+ assert_se(gettimeofday(&tv, NULL) >= 0);
+
+ hdr.ts_sec = tv.tv_sec;
+ hdr.ts_usec = tv.tv_usec;
+ hdr.orig_len = BUS_MESSAGE_SIZE(m);
+ hdr.incl_len = MIN(hdr.orig_len, snaplen);
+
+ /* write the pcap header */
+ fwrite(&hdr, 1, sizeof(hdr), f);
+
+ /* write the dbus header */
+ w = MIN(BUS_MESSAGE_BODY_BEGIN(m), snaplen);
+ fwrite(m->header, 1, w, f);
+ snaplen -= w;
+
+ /* write the dbus body */
+ MESSAGE_FOREACH_PART(part, i, m) {
+ if (snaplen <= 0)
+ break;
+
+ w = MIN(part->size, snaplen);
+ fwrite(part->data, 1, w, f);
+ snaplen -= w;
+ }
+
+ return fflush_and_check(f);
+}
diff --git a/src/libsystemd/sd-bus/bus-dump.h b/src/libsystemd/sd-bus/bus-dump.h
new file mode 100644
index 0000000..aeb4616
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-dump.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "sd-bus.h"
+
+int bus_creds_dump(sd_bus_creds *c, FILE *f, bool terse);
+
+int bus_pcap_header(size_t snaplen, FILE *f);
+int bus_message_pcap_frame(sd_bus_message *m, size_t snaplen, FILE *f);
diff --git a/src/libsystemd/sd-bus/bus-error.c b/src/libsystemd/sd-bus/bus-error.c
new file mode 100644
index 0000000..8da2024
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-error.c
@@ -0,0 +1,615 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "errno-list.h"
+#include "errno-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map bus_standard_errors[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.Failed", EACCES),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.NoMemory", ENOMEM),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.ServiceUnknown", EHOSTUNREACH),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.NameHasNoOwner", ENXIO),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.NoReply", ETIMEDOUT),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.IOError", EIO),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.BadAddress", EADDRNOTAVAIL),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.NotSupported", EOPNOTSUPP),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.LimitsExceeded", ENOBUFS),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.AccessDenied", EACCES),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.AuthFailed", EACCES),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.InteractiveAuthorizationRequired", EACCES),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.NoServer", EHOSTDOWN),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.Timeout", ETIMEDOUT),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.NoNetwork", ENONET),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.AddressInUse", EADDRINUSE),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.Disconnected", ECONNRESET),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.InvalidArgs", EINVAL),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.FileNotFound", ENOENT),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.FileExists", EEXIST),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.UnknownMethod", EBADR),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.UnknownObject", EBADR),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.UnknownInterface", EBADR),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.UnknownProperty", EBADR),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.PropertyReadOnly", EROFS),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.UnixProcessIdUnknown", ESRCH),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.InvalidSignature", EINVAL),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.InconsistentMessage", EBADMSG),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.TimedOut", ETIMEDOUT),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.MatchRuleInvalid", EINVAL),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.InvalidFileContent", EINVAL),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.MatchRuleNotFound", ENOENT),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.SELinuxSecurityContextUnknown", ESRCH),
+ SD_BUS_ERROR_MAP("org.freedesktop.DBus.Error.ObjectPathInUse", EBUSY),
+ SD_BUS_ERROR_MAP_END
+};
+
+/* GCC maps this magically to the beginning and end of the BUS_ERROR_MAP section */
+extern const sd_bus_error_map __start_SYSTEMD_BUS_ERROR_MAP[];
+extern const sd_bus_error_map __stop_SYSTEMD_BUS_ERROR_MAP[];
+
+/* Additional maps registered with sd_bus_error_add_map() are in this
+ * NULL terminated array */
+static const sd_bus_error_map **additional_error_maps = NULL;
+
+static int bus_error_name_to_errno(const char *name) {
+ const sd_bus_error_map **map, *m;
+ const char *p;
+ int r;
+
+ if (!name)
+ return EINVAL;
+
+ p = startswith(name, "System.Error.");
+ if (p) {
+ r = errno_from_name(p);
+ if (r < 0)
+ return EIO;
+
+ return r;
+ }
+
+ if (additional_error_maps)
+ for (map = additional_error_maps; *map; map++)
+ for (m = *map;; m++) {
+ /* For additional error maps the end marker is actually the end marker */
+ if (m->code == BUS_ERROR_MAP_END_MARKER)
+ break;
+
+ if (streq(m->name, name))
+ return m->code;
+ }
+
+ m = ALIGN_TO_PTR(__start_SYSTEMD_BUS_ERROR_MAP, sizeof(void*));
+ while (m < __stop_SYSTEMD_BUS_ERROR_MAP) {
+ /* For magic ELF error maps, the end marker might
+ * appear in the middle of things, since multiple maps
+ * might appear in the same section. Hence, let's skip
+ * over it, but realign the pointer to the next 8 byte
+ * boundary, which is the selected alignment for the
+ * arrays. */
+ if (m->code == BUS_ERROR_MAP_END_MARKER) {
+ m = ALIGN_TO_PTR(m + 1, sizeof(void*));
+ continue;
+ }
+
+ if (streq(m->name, name))
+ return m->code;
+
+ m++;
+ }
+
+ return EIO;
+}
+
+static sd_bus_error errno_to_bus_error_const(int error) {
+
+ if (error < 0)
+ error = -error;
+
+ switch (error) {
+
+ case ENOMEM:
+ return BUS_ERROR_OOM;
+
+ case EPERM:
+ case EACCES:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_ACCESS_DENIED, "Access denied");
+
+ case EINVAL:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_INVALID_ARGS, "Invalid argument");
+
+ case ESRCH:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_UNIX_PROCESS_ID_UNKNOWN, "No such process");
+
+ case ENOENT:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_FILE_NOT_FOUND, "File not found");
+
+ case EEXIST:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_FILE_EXISTS, "File exists");
+
+ case ETIMEDOUT:
+ case ETIME:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_TIMEOUT, "Timed out");
+
+ case EIO:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_IO_ERROR, "Input/output error");
+
+ case ENETRESET:
+ case ECONNABORTED:
+ case ECONNRESET:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_DISCONNECTED, "Disconnected");
+
+ case EOPNOTSUPP:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_NOT_SUPPORTED, "Not supported");
+
+ case EADDRNOTAVAIL:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_BAD_ADDRESS, "Address not available");
+
+ case ENOBUFS:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_LIMITS_EXCEEDED, "Limits exceeded");
+
+ case EADDRINUSE:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_ADDRESS_IN_USE, "Address in use");
+
+ case EBADMSG:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_INCONSISTENT_MESSAGE, "Inconsistent message");
+ }
+
+ return SD_BUS_ERROR_NULL;
+}
+
+static int errno_to_bus_error_name_new(int error, char **ret) {
+ const char *name;
+ char *n;
+
+ if (error < 0)
+ error = -error;
+
+ name = errno_to_name(error);
+ if (!name)
+ return 0;
+
+ n = strjoin("System.Error.", name);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 1;
+}
+
+bool bus_error_is_dirty(sd_bus_error *e) {
+ if (!e)
+ return false;
+
+ return e->name || e->message || e->_need_free != 0;
+}
+
+_public_ void sd_bus_error_free(sd_bus_error *e) {
+ if (!e)
+ return;
+
+ if (e->_need_free > 0) {
+ free((void*) e->name);
+ free((void*) e->message);
+ }
+
+ *e = SD_BUS_ERROR_NULL;
+}
+
+_public_ int sd_bus_error_set(sd_bus_error *e, const char *name, const char *message) {
+
+ if (!name)
+ return 0;
+ if (!e)
+ goto finish;
+
+ assert_return(!bus_error_is_dirty(e), -EINVAL);
+
+ e->name = strdup(name);
+ if (!e->name) {
+ *e = BUS_ERROR_OOM;
+ return -ENOMEM;
+ }
+
+ if (message)
+ e->message = strdup(message);
+
+ e->_need_free = 1;
+
+finish:
+ return -bus_error_name_to_errno(name);
+}
+
+int bus_error_setfv(sd_bus_error *e, const char *name, const char *format, va_list ap) {
+
+ if (!name)
+ return 0;
+
+ if (e) {
+ assert_return(!bus_error_is_dirty(e), -EINVAL);
+
+ e->name = strdup(name);
+ if (!e->name) {
+ *e = BUS_ERROR_OOM;
+ return -ENOMEM;
+ }
+
+ /* If we hit OOM on formatting the pretty message, we ignore
+ * this, since we at least managed to write the error name */
+ if (format)
+ (void) vasprintf((char**) &e->message, format, ap);
+
+ e->_need_free = 1;
+ }
+
+ return -bus_error_name_to_errno(name);
+}
+
+_public_ int sd_bus_error_setf(sd_bus_error *e, const char *name, const char *format, ...) {
+
+ if (format) {
+ int r;
+ va_list ap;
+
+ va_start(ap, format);
+ r = bus_error_setfv(e, name, format, ap);
+ va_end(ap);
+
+ return r;
+ }
+
+ return sd_bus_error_set(e, name, NULL);
+}
+
+_public_ int sd_bus_error_copy(sd_bus_error *dest, const sd_bus_error *e) {
+
+ if (!sd_bus_error_is_set(e))
+ return 0;
+ if (!dest)
+ goto finish;
+
+ assert_return(!bus_error_is_dirty(dest), -EINVAL);
+
+ /*
+ * _need_free < 0 indicates that the error is temporarily const, needs deep copying
+ * _need_free == 0 indicates that the error is perpetually const, needs no deep copying
+ * _need_free > 0 indicates that the error is fully dynamic, needs deep copying
+ */
+
+ if (e->_need_free == 0)
+ *dest = *e;
+ else {
+ dest->name = strdup(e->name);
+ if (!dest->name) {
+ *dest = BUS_ERROR_OOM;
+ return -ENOMEM;
+ }
+
+ if (e->message)
+ dest->message = strdup(e->message);
+
+ dest->_need_free = 1;
+ }
+
+finish:
+ return -bus_error_name_to_errno(e->name);
+}
+
+_public_ int sd_bus_error_move(sd_bus_error *dest, sd_bus_error *e) {
+ int r;
+
+ if (!sd_bus_error_is_set(e)) {
+
+ if (dest)
+ *dest = SD_BUS_ERROR_NULL;
+
+ return 0;
+ }
+
+ r = -bus_error_name_to_errno(e->name);
+
+ if (dest) {
+ *dest = *e;
+ *e = SD_BUS_ERROR_NULL;
+ } else
+ sd_bus_error_free(e);
+
+ return r;
+}
+
+_public_ int sd_bus_error_set_const(sd_bus_error *e, const char *name, const char *message) {
+ if (!name)
+ return 0;
+ if (!e)
+ goto finish;
+
+ assert_return(!bus_error_is_dirty(e), -EINVAL);
+
+ *e = SD_BUS_ERROR_MAKE_CONST(name, message);
+
+finish:
+ return -bus_error_name_to_errno(name);
+}
+
+_public_ int sd_bus_error_is_set(const sd_bus_error *e) {
+ if (!e)
+ return 0;
+
+ return !!e->name;
+}
+
+_public_ int sd_bus_error_has_name(const sd_bus_error *e, const char *name) {
+ if (!e)
+ return 0;
+
+ return streq_ptr(e->name, name);
+}
+
+_public_ int sd_bus_error_has_names_sentinel(const sd_bus_error *e, ...) {
+ if (!e || !e->name)
+ return 0;
+
+ va_list ap;
+ const char *p;
+
+ va_start(ap, e);
+ while ((p = va_arg(ap, const char *)))
+ if (streq(p, e->name))
+ break;
+ va_end(ap);
+ return !!p;
+}
+
+_public_ int sd_bus_error_get_errno(const sd_bus_error* e) {
+ if (!e || !e->name)
+ return 0;
+
+ return bus_error_name_to_errno(e->name);
+}
+
+static void bus_error_strerror(sd_bus_error *e, int error) {
+ size_t k = 64;
+ char *m;
+
+ assert(e);
+
+ for (;;) {
+ char *x;
+
+ m = new(char, k);
+ if (!m)
+ return;
+
+ errno = 0;
+ x = strerror_r(error, m, k);
+ if (errno == ERANGE || strlen(x) >= k - 1) {
+ free(m);
+ k *= 2;
+ continue;
+ }
+
+ if (errno) {
+ free(m);
+ return;
+ }
+
+ if (x == m) {
+ if (e->_need_free > 0) {
+ /* Error is already dynamic, let's just update the message */
+ free((char*) e->message);
+ e->message = x;
+
+ } else {
+ char *t;
+ /* Error was const so far, let's make it dynamic, if we can */
+
+ t = strdup(e->name);
+ if (!t) {
+ free(m);
+ return;
+ }
+
+ e->_need_free = 1;
+ e->name = t;
+ e->message = x;
+ }
+ } else {
+ free(m);
+
+ if (e->_need_free > 0) {
+ char *t;
+
+ /* Error is dynamic, let's hence make the message also dynamic */
+ t = strdup(x);
+ if (!t)
+ return;
+
+ free((char*) e->message);
+ e->message = t;
+ } else {
+ /* Error is const, hence we can just override */
+ e->message = x;
+ }
+ }
+
+ return;
+ }
+}
+
+_public_ int sd_bus_error_set_errno(sd_bus_error *e, int error) {
+
+ if (error < 0)
+ error = -error;
+
+ if (!e)
+ return -error;
+ if (error == 0)
+ return -error;
+
+ assert_return(!bus_error_is_dirty(e), -EINVAL);
+
+ /* First, try a const translation */
+ *e = errno_to_bus_error_const(error);
+
+ if (!sd_bus_error_is_set(e)) {
+ int k;
+
+ /* If that didn't work, try a dynamic one. */
+
+ k = errno_to_bus_error_name_new(error, (char**) &e->name);
+ if (k > 0)
+ e->_need_free = 1;
+ else if (k < 0) {
+ *e = BUS_ERROR_OOM;
+ return -error;
+ } else
+ *e = BUS_ERROR_FAILED;
+ }
+
+ /* Now, fill in the message from strerror() if we can */
+ bus_error_strerror(e, error);
+ return -error;
+}
+
+_public_ int sd_bus_error_set_errnofv(sd_bus_error *e, int error, const char *format, va_list ap) {
+ PROTECT_ERRNO;
+
+ if (error < 0)
+ error = -error;
+
+ if (!e)
+ return -error;
+ if (error == 0)
+ return 0;
+
+ assert_return(!bus_error_is_dirty(e), -EINVAL);
+
+ /* First, try a const translation */
+ *e = errno_to_bus_error_const(error);
+
+ if (!sd_bus_error_is_set(e)) {
+ int k;
+
+ /* If that didn't work, try a dynamic one */
+
+ k = errno_to_bus_error_name_new(error, (char**) &e->name);
+ if (k > 0)
+ e->_need_free = 1;
+ else if (k < 0) {
+ *e = BUS_ERROR_OOM;
+ return -ENOMEM;
+ } else
+ *e = BUS_ERROR_FAILED;
+ }
+
+ if (format) {
+ _cleanup_free_ char *m = NULL;
+
+ /* Then, let's try to fill in the supplied message */
+
+ errno = error; /* Make sure that %m resolves to the specified error */
+ if (vasprintf(&m, format, ap) < 0)
+ goto fail;
+
+ if (e->_need_free <= 0) {
+ char *t;
+
+ t = strdup(e->name);
+ if (!t)
+ goto fail;
+
+ e->_need_free = 1;
+ e->name = t;
+ }
+
+ e->message = TAKE_PTR(m);
+ return -error;
+ }
+
+fail:
+ /* If that didn't work, use strerror() for the message */
+ bus_error_strerror(e, error);
+ return -error;
+}
+
+_public_ int sd_bus_error_set_errnof(sd_bus_error *e, int error, const char *format, ...) {
+ int r;
+
+ if (error < 0)
+ error = -error;
+
+ if (!e)
+ return -error;
+ if (error == 0)
+ return 0;
+
+ assert_return(!bus_error_is_dirty(e), -EINVAL);
+
+ if (format) {
+ va_list ap;
+
+ va_start(ap, format);
+ r = sd_bus_error_set_errnofv(e, error, format, ap);
+ va_end(ap);
+
+ return r;
+ }
+
+ return sd_bus_error_set_errno(e, error);
+}
+
+const char *bus_error_message(const sd_bus_error *e, int error) {
+
+ if (e) {
+ /* Sometimes, the D-Bus server is a little bit too verbose with
+ * its error messages, so let's override them here */
+ if (sd_bus_error_has_name(e, SD_BUS_ERROR_ACCESS_DENIED))
+ return "Access denied";
+
+ if (e->message)
+ return e->message;
+ }
+
+ return strerror_safe(error);
+}
+
+static bool map_ok(const sd_bus_error_map *map) {
+ for (; map->code != BUS_ERROR_MAP_END_MARKER; map++)
+ if (!map->name || map->code <=0)
+ return false;
+ return true;
+}
+
+_public_ int sd_bus_error_add_map(const sd_bus_error_map *map) {
+ const sd_bus_error_map **maps = NULL;
+ unsigned n = 0;
+
+ assert_return(map, -EINVAL);
+ assert_return(map_ok(map), -EINVAL);
+
+ if (additional_error_maps)
+ for (; additional_error_maps[n] != NULL; n++)
+ if (additional_error_maps[n] == map)
+ return 0;
+
+ maps = reallocarray(additional_error_maps, n + 2, sizeof(struct sd_bus_error_map*));
+ if (!maps)
+ return -ENOMEM;
+
+ maps[n] = map;
+ maps[n+1] = NULL;
+
+ additional_error_maps = maps;
+ return 1;
+}
diff --git a/src/libsystemd/sd-bus/bus-error.h b/src/libsystemd/sd-bus/bus-error.h
new file mode 100644
index 0000000..557284f
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-error.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-bus.h"
+
+#include "macro.h"
+
+bool bus_error_is_dirty(sd_bus_error *e);
+
+const char *bus_error_message(const sd_bus_error *e, int error);
+
+int bus_error_setfv(sd_bus_error *e, const char *name, const char *format, va_list ap) _printf_(3,0);
+int bus_error_set_errnofv(sd_bus_error *e, int error, const char *format, va_list ap) _printf_(3,0);
+
+#define BUS_ERROR_OOM SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_NO_MEMORY, "Out of memory")
+#define BUS_ERROR_FAILED SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_FAILED, "Operation failed")
+
+/*
+ * There are two ways to register error maps with the error translation
+ * logic: by using BUS_ERROR_MAP_ELF_REGISTER, which however only
+ * works when linked into the same ELF module, or via
+ * sd_bus_error_add_map() which is the official, external API, that
+ * works from any module.
+ *
+ * Note that BUS_ERROR_MAP_ELF_REGISTER has to be used as decorator in
+ * the bus error table, and BUS_ERROR_MAP_ELF_USE has to be used at
+ * least once per compilation unit (i.e. per library), to ensure that
+ * the error map is really added to the final binary.
+ */
+
+#define BUS_ERROR_MAP_ELF_REGISTER \
+ _section_("SYSTEMD_BUS_ERROR_MAP") \
+ _used_ \
+ _alignptr_ \
+ _variable_no_sanitize_address_
+
+#define BUS_ERROR_MAP_ELF_USE(errors) \
+ extern const sd_bus_error_map errors[]; \
+ _used_ \
+ static const sd_bus_error_map * const CONCATENATE(errors ## _copy_, __COUNTER__) = errors;
+
+/* We use something exotic as end marker, to ensure people build the
+ * maps using the macsd-ros. */
+#define BUS_ERROR_MAP_END_MARKER -'x'
+
+BUS_ERROR_MAP_ELF_USE(bus_standard_errors);
diff --git a/src/libsystemd/sd-bus/bus-gvariant.c b/src/libsystemd/sd-bus/bus-gvariant.c
new file mode 100644
index 0000000..850540a
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-gvariant.c
@@ -0,0 +1,299 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <string.h>
+
+#include "sd-bus.h"
+
+#include "bus-gvariant.h"
+#include "bus-signature.h"
+#include "bus-type.h"
+
+int bus_gvariant_get_size(const char *signature) {
+ const char *p;
+ int sum = 0, r;
+
+ /* For fixed size structs. Fails for variable size structs. */
+
+ p = signature;
+ while (*p != 0) {
+ size_t n;
+
+ r = signature_element_length(p, &n);
+ if (r < 0)
+ return r;
+ else {
+ char t[n+1];
+
+ memcpy(t, p, n);
+ t[n] = 0;
+
+ r = bus_gvariant_get_alignment(t);
+ if (r < 0)
+ return r;
+
+ sum = ALIGN_TO(sum, r);
+ }
+
+ switch (*p) {
+
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_BYTE:
+ sum += 1;
+ break;
+
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ sum += 2;
+ break;
+
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_UNIX_FD:
+ sum += 4;
+ break;
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ sum += 8;
+ break;
+
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN: {
+ if (n == 2) {
+ /* unary type () has fixed size of 1 */
+ r = 1;
+ } else {
+ char t[n-1];
+
+ memcpy(t, p + 1, n - 2);
+ t[n - 2] = 0;
+
+ r = bus_gvariant_get_size(t);
+ if (r < 0)
+ return r;
+ }
+
+ sum += r;
+ break;
+ }
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE:
+ case SD_BUS_TYPE_ARRAY:
+ case SD_BUS_TYPE_VARIANT:
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unknown signature type");
+ }
+
+ p += n;
+ }
+
+ r = bus_gvariant_get_alignment(signature);
+ if (r < 0)
+ return r;
+
+ return ALIGN_TO(sum, r);
+}
+
+int bus_gvariant_get_alignment(const char *signature) {
+ size_t alignment = 1;
+ const char *p;
+ int r;
+
+ p = signature;
+ while (*p != 0 && alignment < 8) {
+ size_t n;
+ int a;
+
+ r = signature_element_length(p, &n);
+ if (r < 0)
+ return r;
+
+ switch (*p) {
+
+ case SD_BUS_TYPE_BYTE:
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE:
+ a = 1;
+ break;
+
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ a = 2;
+ break;
+
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_UNIX_FD:
+ a = 4;
+ break;
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ case SD_BUS_TYPE_VARIANT:
+ a = 8;
+ break;
+
+ case SD_BUS_TYPE_ARRAY: {
+ char t[n];
+
+ memcpy(t, p + 1, n - 1);
+ t[n - 1] = 0;
+
+ a = bus_gvariant_get_alignment(t);
+ break;
+ }
+
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN: {
+ char t[n-1];
+
+ memcpy(t, p + 1, n - 2);
+ t[n - 2] = 0;
+
+ a = bus_gvariant_get_alignment(t);
+ break;
+ }
+
+ default:
+ assert_not_reached("Unknown signature type");
+ }
+
+ if (a < 0)
+ return a;
+
+ assert(a > 0 && a <= 8);
+ if ((size_t) a > alignment)
+ alignment = (size_t) a;
+
+ p += n;
+ }
+
+ return alignment;
+}
+
+int bus_gvariant_is_fixed_size(const char *signature) {
+ const char *p;
+ int r;
+
+ assert(signature);
+
+ p = signature;
+ while (*p != 0) {
+ size_t n;
+
+ r = signature_element_length(p, &n);
+ if (r < 0)
+ return r;
+
+ switch (*p) {
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE:
+ case SD_BUS_TYPE_ARRAY:
+ case SD_BUS_TYPE_VARIANT:
+ return 0;
+
+ case SD_BUS_TYPE_BYTE:
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_UNIX_FD:
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ break;
+
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN: {
+ char t[n-1];
+
+ memcpy(t, p + 1, n - 2);
+ t[n - 2] = 0;
+
+ r = bus_gvariant_is_fixed_size(t);
+ if (r <= 0)
+ return r;
+ break;
+ }
+
+ default:
+ assert_not_reached("Unknown signature type");
+ }
+
+ p += n;
+ }
+
+ return true;
+}
+
+size_t bus_gvariant_determine_word_size(size_t sz, size_t extra) {
+ if (sz + extra <= 0xFF)
+ return 1;
+ else if (sz + extra*2 <= 0xFFFF)
+ return 2;
+ else if (sz + extra*4 <= 0xFFFFFFFF)
+ return 4;
+ else
+ return 8;
+}
+
+size_t bus_gvariant_read_word_le(void *p, size_t sz) {
+ union {
+ uint16_t u16;
+ uint32_t u32;
+ uint64_t u64;
+ } x;
+
+ assert(p);
+
+ if (sz == 1)
+ return *(uint8_t*) p;
+
+ memcpy(&x, p, sz);
+
+ if (sz == 2)
+ return le16toh(x.u16);
+ else if (sz == 4)
+ return le32toh(x.u32);
+ else if (sz == 8)
+ return le64toh(x.u64);
+
+ assert_not_reached("unknown word width");
+}
+
+void bus_gvariant_write_word_le(void *p, size_t sz, size_t value) {
+ union {
+ uint16_t u16;
+ uint32_t u32;
+ uint64_t u64;
+ } x;
+
+ assert(p);
+ assert(sz == 8 || (value < (1ULL << (sz*8))));
+
+ if (sz == 1) {
+ *(uint8_t*) p = value;
+ return;
+ } else if (sz == 2)
+ x.u16 = htole16((uint16_t) value);
+ else if (sz == 4)
+ x.u32 = htole32((uint32_t) value);
+ else if (sz == 8)
+ x.u64 = htole64((uint64_t) value);
+ else
+ assert_not_reached("unknown word width");
+
+ memcpy(p, &x, sz);
+}
diff --git a/src/libsystemd/sd-bus/bus-gvariant.h b/src/libsystemd/sd-bus/bus-gvariant.h
new file mode 100644
index 0000000..213fa97
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-gvariant.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "macro.h"
+
+int bus_gvariant_get_size(const char *signature) _pure_;
+int bus_gvariant_get_alignment(const char *signature) _pure_;
+int bus_gvariant_is_fixed_size(const char *signature) _pure_;
+
+size_t bus_gvariant_determine_word_size(size_t sz, size_t extra);
+void bus_gvariant_write_word_le(void *p, size_t sz, size_t value);
+size_t bus_gvariant_read_word_le(void *p, size_t sz);
diff --git a/src/libsystemd/sd-bus/bus-internal.c b/src/libsystemd/sd-bus/bus-internal.c
new file mode 100644
index 0000000..3f03ad7
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-internal.c
@@ -0,0 +1,338 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "hexdecoct.h"
+#include "string-util.h"
+
+bool object_path_is_valid(const char *p) {
+ const char *q;
+ bool slash;
+
+ if (!p)
+ return false;
+
+ if (p[0] != '/')
+ return false;
+
+ if (p[1] == 0)
+ return true;
+
+ for (slash = true, q = p+1; *q; q++)
+ if (*q == '/') {
+ if (slash)
+ return false;
+
+ slash = true;
+ } else {
+ bool good;
+
+ good =
+ (*q >= 'a' && *q <= 'z') ||
+ (*q >= 'A' && *q <= 'Z') ||
+ (*q >= '0' && *q <= '9') ||
+ *q == '_';
+
+ if (!good)
+ return false;
+
+ slash = false;
+ }
+
+ if (slash)
+ return false;
+
+ return (q - p) <= BUS_PATH_SIZE_MAX;
+}
+
+char* object_path_startswith(const char *a, const char *b) {
+ const char *p;
+
+ if (!object_path_is_valid(a) ||
+ !object_path_is_valid(b))
+ return NULL;
+
+ if (streq(b, "/"))
+ return (char*) a + 1;
+
+ p = startswith(a, b);
+ if (!p)
+ return NULL;
+
+ if (*p == 0)
+ return (char*) p;
+
+ if (*p == '/')
+ return (char*) p + 1;
+
+ return NULL;
+}
+
+bool interface_name_is_valid(const char *p) {
+ const char *q;
+ bool dot, found_dot = false;
+
+ if (isempty(p))
+ return false;
+
+ for (dot = true, q = p; *q; q++)
+ if (*q == '.') {
+ if (dot)
+ return false;
+
+ found_dot = dot = true;
+ } else {
+ bool good;
+
+ good =
+ (*q >= 'a' && *q <= 'z') ||
+ (*q >= 'A' && *q <= 'Z') ||
+ (!dot && *q >= '0' && *q <= '9') ||
+ *q == '_';
+
+ if (!good)
+ return false;
+
+ dot = false;
+ }
+
+ if (q - p > SD_BUS_MAXIMUM_NAME_LENGTH)
+ return false;
+
+ if (dot)
+ return false;
+
+ if (!found_dot)
+ return false;
+
+ return true;
+}
+
+bool service_name_is_valid(const char *p) {
+ const char *q;
+ bool dot, found_dot = false, unique;
+
+ if (isempty(p))
+ return false;
+
+ unique = p[0] == ':';
+
+ for (dot = true, q = unique ? p+1 : p; *q; q++)
+ if (*q == '.') {
+ if (dot)
+ return false;
+
+ found_dot = dot = true;
+ } else {
+ bool good;
+
+ good =
+ (*q >= 'a' && *q <= 'z') ||
+ (*q >= 'A' && *q <= 'Z') ||
+ ((!dot || unique) && *q >= '0' && *q <= '9') ||
+ IN_SET(*q, '_', '-');
+
+ if (!good)
+ return false;
+
+ dot = false;
+ }
+
+ if (q - p > SD_BUS_MAXIMUM_NAME_LENGTH)
+ return false;
+
+ if (dot)
+ return false;
+
+ if (!found_dot)
+ return false;
+
+ return true;
+}
+
+bool member_name_is_valid(const char *p) {
+ const char *q;
+
+ if (isempty(p))
+ return false;
+
+ for (q = p; *q; q++) {
+ bool good;
+
+ good =
+ (*q >= 'a' && *q <= 'z') ||
+ (*q >= 'A' && *q <= 'Z') ||
+ (*q >= '0' && *q <= '9') ||
+ *q == '_';
+
+ if (!good)
+ return false;
+ }
+
+ if (q - p > SD_BUS_MAXIMUM_NAME_LENGTH)
+ return false;
+
+ return true;
+}
+
+/*
+ * Complex pattern match
+ * This checks whether @a is a 'complex-prefix' of @b, or @b is a
+ * 'complex-prefix' of @a, based on strings that consist of labels with @c as
+ * separator. This function returns true if:
+ * - both strings are equal
+ * - either is a prefix of the other and ends with @c
+ * The second rule makes sure that either string needs to be fully included in
+ * the other, and the string which is considered the prefix needs to end with a
+ * separator.
+ */
+static bool complex_pattern_check(char c, const char *a, const char *b) {
+ bool separator = false;
+
+ if (!a && !b)
+ return true;
+
+ if (!a || !b)
+ return false;
+
+ for (;;) {
+ if (*a != *b)
+ return (separator && (*a == 0 || *b == 0));
+
+ if (*a == 0)
+ return true;
+
+ separator = *a == c;
+
+ a++, b++;
+ }
+}
+
+bool namespace_complex_pattern(const char *pattern, const char *value) {
+ return complex_pattern_check('.', pattern, value);
+}
+
+bool path_complex_pattern(const char *pattern, const char *value) {
+ return complex_pattern_check('/', pattern, value);
+}
+
+/*
+ * Simple pattern match
+ * This checks whether @a is a 'simple-prefix' of @b, based on strings that
+ * consist of labels with @c as separator. This function returns true, if:
+ * - if @a and @b are equal
+ * - if @a is a prefix of @b, and the first following character in @b (or the
+ * last character in @a) is @c
+ * The second rule basically makes sure that if @a is a prefix of @b, then @b
+ * must follow with a new label separated by @c. It cannot extend the label.
+ */
+static bool simple_pattern_check(char c, const char *a, const char *b) {
+ bool separator = false;
+
+ if (!a && !b)
+ return true;
+
+ if (!a || !b)
+ return false;
+
+ for (;;) {
+ if (*a != *b)
+ return *a == 0 && (*b == c || separator);
+
+ if (*a == 0)
+ return true;
+
+ separator = *a == c;
+
+ a++, b++;
+ }
+}
+
+bool namespace_simple_pattern(const char *pattern, const char *value) {
+ return simple_pattern_check('.', pattern, value);
+}
+
+bool path_simple_pattern(const char *pattern, const char *value) {
+ return simple_pattern_check('/', pattern, value);
+}
+
+int bus_message_type_from_string(const char *s, uint8_t *u) {
+ if (streq(s, "signal"))
+ *u = SD_BUS_MESSAGE_SIGNAL;
+ else if (streq(s, "method_call"))
+ *u = SD_BUS_MESSAGE_METHOD_CALL;
+ else if (streq(s, "error"))
+ *u = SD_BUS_MESSAGE_METHOD_ERROR;
+ else if (streq(s, "method_return"))
+ *u = SD_BUS_MESSAGE_METHOD_RETURN;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+const char *bus_message_type_to_string(uint8_t u) {
+ if (u == SD_BUS_MESSAGE_SIGNAL)
+ return "signal";
+ else if (u == SD_BUS_MESSAGE_METHOD_CALL)
+ return "method_call";
+ else if (u == SD_BUS_MESSAGE_METHOD_ERROR)
+ return "error";
+ else if (u == SD_BUS_MESSAGE_METHOD_RETURN)
+ return "method_return";
+ else
+ return NULL;
+}
+
+char *bus_address_escape(const char *v) {
+ const char *a;
+ char *r, *b;
+
+ r = new(char, strlen(v)*3+1);
+ if (!r)
+ return NULL;
+
+ for (a = v, b = r; *a; a++) {
+
+ if ((*a >= '0' && *a <= '9') ||
+ (*a >= 'a' && *a <= 'z') ||
+ (*a >= 'A' && *a <= 'Z') ||
+ strchr("_-/.", *a))
+ *(b++) = *a;
+ else {
+ *(b++) = '%';
+ *(b++) = hexchar(*a >> 4);
+ *(b++) = hexchar(*a & 0xF);
+ }
+ }
+
+ *b = 0;
+ return r;
+}
+
+int bus_maybe_reply_error(sd_bus_message *m, int r, sd_bus_error *error) {
+ assert(m);
+
+ if (sd_bus_error_is_set(error) || r < 0) {
+ if (m->header->type == SD_BUS_MESSAGE_METHOD_CALL)
+ sd_bus_reply_method_errno(m, r, error);
+ } else
+ return r;
+
+ log_debug("Failed to process message type=%s sender=%s destination=%s path=%s interface=%s member=%s cookie=%" PRIu64 " reply_cookie=%" PRIu64 " signature=%s error-name=%s error-message=%s: %s",
+ bus_message_type_to_string(m->header->type),
+ strna(sd_bus_message_get_sender(m)),
+ strna(sd_bus_message_get_destination(m)),
+ strna(sd_bus_message_get_path(m)),
+ strna(sd_bus_message_get_interface(m)),
+ strna(sd_bus_message_get_member(m)),
+ BUS_MESSAGE_COOKIE(m),
+ m->reply_cookie,
+ strna(m->root_container.signature),
+ strna(m->error.name),
+ strna(m->error.message),
+ bus_error_message(error, r));
+
+ return 1;
+}
diff --git a/src/libsystemd/sd-bus/bus-internal.h b/src/libsystemd/sd-bus/bus-internal.h
new file mode 100644
index 0000000..233a228
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-internal.h
@@ -0,0 +1,416 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <pthread.h>
+
+#include "sd-bus.h"
+
+#include "bus-error.h"
+#include "bus-kernel.h"
+#include "bus-match.h"
+#include "def.h"
+#include "hashmap.h"
+#include "list.h"
+#include "prioq.h"
+#include "socket-util.h"
+#include "time-util.h"
+
+/* Note that we use the new /run prefix here (instead of /var/run) since we require them to be aliases and
+ * that way we become independent of /var being mounted */
+#define DEFAULT_SYSTEM_BUS_ADDRESS "unix:path=/run/dbus/system_bus_socket"
+#define DEFAULT_USER_BUS_ADDRESS_FMT "unix:path=%s/bus"
+
+struct reply_callback {
+ sd_bus_message_handler_t callback;
+ usec_t timeout_usec; /* this is a relative timeout until we reach the BUS_HELLO state, and an absolute one right after */
+ uint64_t cookie;
+ unsigned prioq_idx;
+};
+
+struct filter_callback {
+ sd_bus_message_handler_t callback;
+
+ unsigned last_iteration;
+
+ LIST_FIELDS(struct filter_callback, callbacks);
+};
+
+struct match_callback {
+ sd_bus_message_handler_t callback;
+ sd_bus_message_handler_t install_callback;
+
+ sd_bus_slot *install_slot; /* The AddMatch() call */
+
+ unsigned last_iteration;
+
+ /* Don't dispatch this slot with messages that arrived in any iteration before or at the this
+ * one. We use this to ensure that matches don't apply "retroactively" and thus can confuse the
+ * caller: matches will only match incoming messages from the moment on the match was installed. */
+ uint64_t after;
+
+ char *match_string;
+
+ struct bus_match_node *match_node;
+};
+
+struct node {
+ char *path;
+ struct node *parent;
+ LIST_HEAD(struct node, child);
+ LIST_FIELDS(struct node, siblings);
+
+ LIST_HEAD(struct node_callback, callbacks);
+ LIST_HEAD(struct node_vtable, vtables);
+ LIST_HEAD(struct node_enumerator, enumerators);
+ LIST_HEAD(struct node_object_manager, object_managers);
+};
+
+struct node_callback {
+ struct node *node;
+
+ bool is_fallback:1;
+ unsigned last_iteration;
+
+ sd_bus_message_handler_t callback;
+
+ LIST_FIELDS(struct node_callback, callbacks);
+};
+
+struct node_enumerator {
+ struct node *node;
+
+ sd_bus_node_enumerator_t callback;
+
+ unsigned last_iteration;
+
+ LIST_FIELDS(struct node_enumerator, enumerators);
+};
+
+struct node_object_manager {
+ struct node *node;
+
+ LIST_FIELDS(struct node_object_manager, object_managers);
+};
+
+struct node_vtable {
+ struct node *node;
+
+ bool is_fallback:1;
+ unsigned last_iteration;
+
+ char *interface;
+ const sd_bus_vtable *vtable;
+ sd_bus_object_find_t find;
+
+ LIST_FIELDS(struct node_vtable, vtables);
+};
+
+struct vtable_member {
+ const char *path;
+ const char *interface;
+ const char *member;
+ struct node_vtable *parent;
+ unsigned last_iteration;
+ const sd_bus_vtable *vtable;
+};
+
+typedef enum BusSlotType {
+ BUS_REPLY_CALLBACK,
+ BUS_FILTER_CALLBACK,
+ BUS_MATCH_CALLBACK,
+ BUS_NODE_CALLBACK,
+ BUS_NODE_ENUMERATOR,
+ BUS_NODE_VTABLE,
+ BUS_NODE_OBJECT_MANAGER,
+ _BUS_SLOT_INVALID = -1,
+} BusSlotType;
+
+struct sd_bus_slot {
+ unsigned n_ref;
+ BusSlotType type:5;
+
+ /* Slots can be "floating" or not. If they are not floating (the usual case) then they reference the bus object
+ * they are associated with. This means the bus object stays allocated at least as long as there is a slot
+ * around associated with it. If it is floating, then the slot's lifecycle is bound to the lifecycle of the
+ * bus: it will be disconnected from the bus when the bus is destroyed, and it keeping the slot reffed hence
+ * won't mean the bus stays reffed too. Internally this means the reference direction is reversed: floating
+ * slots objects are referenced by the bus object, and not vice versa. */
+ bool floating:1;
+
+ bool match_added:1;
+
+ sd_bus *bus;
+ void *userdata;
+ sd_bus_destroy_t destroy_callback;
+
+ char *description;
+
+ LIST_FIELDS(sd_bus_slot, slots);
+
+ union {
+ struct reply_callback reply_callback;
+ struct filter_callback filter_callback;
+ struct match_callback match_callback;
+ struct node_callback node_callback;
+ struct node_enumerator node_enumerator;
+ struct node_object_manager node_object_manager;
+ struct node_vtable node_vtable;
+ };
+};
+
+enum bus_state {
+ BUS_UNSET,
+ BUS_WATCH_BIND, /* waiting for the socket to appear via inotify */
+ BUS_OPENING, /* the kernel's connect() is still not ready */
+ BUS_AUTHENTICATING, /* we are currently in the "SASL" authorization phase of dbus */
+ BUS_HELLO, /* we are waiting for the Hello() response */
+ BUS_RUNNING,
+ BUS_CLOSING,
+ BUS_CLOSED,
+ _BUS_STATE_MAX,
+};
+
+static inline bool BUS_IS_OPEN(enum bus_state state) {
+ return state > BUS_UNSET && state < BUS_CLOSING;
+}
+
+enum bus_auth {
+ _BUS_AUTH_INVALID,
+ BUS_AUTH_EXTERNAL,
+ BUS_AUTH_ANONYMOUS
+};
+
+struct sd_bus {
+ unsigned n_ref;
+
+ enum bus_state state;
+ int input_fd, output_fd;
+ int inotify_fd;
+ int message_version;
+ int message_endian;
+
+ bool can_fds:1;
+ bool bus_client:1;
+ bool ucred_valid:1;
+ bool is_server:1;
+ bool anonymous_auth:1;
+ bool prefer_readv:1;
+ bool prefer_writev:1;
+ bool match_callbacks_modified:1;
+ bool filter_callbacks_modified:1;
+ bool nodes_modified:1;
+ bool trusted:1;
+ bool manual_peer_interface:1;
+ bool is_system:1;
+ bool is_user:1;
+ bool allow_interactive_authorization:1;
+ bool exit_on_disconnect:1;
+ bool exited:1;
+ bool exit_triggered:1;
+ bool is_local:1;
+ bool watch_bind:1;
+ bool is_monitor:1;
+ bool accept_fd:1;
+ bool attach_timestamp:1;
+ bool connected_signal:1;
+ bool close_on_exit:1;
+
+ signed int use_memfd:2;
+
+ void *rbuffer;
+ size_t rbuffer_size;
+
+ sd_bus_message **rqueue;
+ size_t rqueue_size;
+ size_t rqueue_allocated;
+
+ sd_bus_message **wqueue;
+ size_t wqueue_size;
+ size_t windex;
+ size_t wqueue_allocated;
+
+ uint64_t cookie;
+ uint64_t read_counter; /* A counter for each incoming msg */
+
+ char *unique_name;
+ uint64_t unique_id;
+
+ struct bus_match_node match_callbacks;
+ Prioq *reply_callbacks_prioq;
+ OrderedHashmap *reply_callbacks;
+ LIST_HEAD(struct filter_callback, filter_callbacks);
+
+ Hashmap *nodes;
+ Hashmap *vtable_methods;
+ Hashmap *vtable_properties;
+
+ union sockaddr_union sockaddr;
+ socklen_t sockaddr_size;
+
+ pid_t nspid;
+ char *machine;
+
+ sd_id128_t server_id;
+
+ char *address;
+ unsigned address_index;
+
+ int last_connect_error;
+
+ enum bus_auth auth;
+ unsigned auth_index;
+ struct iovec auth_iovec[3];
+ size_t auth_rbegin;
+ char *auth_buffer;
+ usec_t auth_timeout;
+
+ struct ucred ucred;
+ char *label;
+ gid_t *groups;
+ size_t n_groups;
+
+ uint64_t creds_mask;
+
+ int *fds;
+ size_t n_fds;
+
+ char *exec_path;
+ char **exec_argv;
+
+ /* We do locking around the memfd cache, since we want to
+ * allow people to process a sd_bus_message in a different
+ * thread then it was generated on and free it there. Since
+ * adding something to the memfd cache might happen when a
+ * message is released, we hence need to protect this bit with
+ * a mutex. */
+ pthread_mutex_t memfd_cache_mutex;
+ struct memfd_cache memfd_cache[MEMFD_CACHE_MAX];
+ unsigned n_memfd_cache;
+
+ pid_t original_pid;
+ pid_t busexec_pid;
+
+ unsigned iteration_counter;
+
+ sd_event_source *input_io_event_source;
+ sd_event_source *output_io_event_source;
+ sd_event_source *time_event_source;
+ sd_event_source *quit_event_source;
+ sd_event_source *inotify_event_source;
+ sd_event *event;
+ int event_priority;
+
+ pid_t tid;
+
+ sd_bus_message *current_message;
+ sd_bus_slot *current_slot;
+ sd_bus_message_handler_t current_handler;
+ void *current_userdata;
+
+ sd_bus **default_bus_ptr;
+
+ char *description;
+ char *patch_sender;
+
+ sd_bus_track *track_queue;
+
+ LIST_HEAD(sd_bus_slot, slots);
+ LIST_HEAD(sd_bus_track, tracks);
+
+ int *inotify_watches;
+ size_t n_inotify_watches;
+
+ /* zero means use value specified by $SYSTEMD_BUS_TIMEOUT= environment variable or built-in default */
+ usec_t method_call_timeout;
+};
+
+/* For method calls we timeout at 25s, like in the D-Bus reference implementation */
+#define BUS_DEFAULT_TIMEOUT ((usec_t) (25 * USEC_PER_SEC))
+
+/* For the authentication phase we grant 90s, to provide extra room during boot, when RNGs and such are not filled up
+ * with enough entropy yet and might delay the boot */
+#define BUS_AUTH_TIMEOUT ((usec_t) DEFAULT_TIMEOUT_USEC)
+
+#define BUS_WQUEUE_MAX (384*1024)
+#define BUS_RQUEUE_MAX (384*1024)
+
+#define BUS_MESSAGE_SIZE_MAX (128*1024*1024)
+#define BUS_AUTH_SIZE_MAX (64*1024)
+/* Note that the D-Bus specification states that bus paths shall have no size limit. We enforce here one
+ * anyway, since truly unbounded strings are a security problem. The limit we pick is relatively large however,
+ * to not clash unnecessarily with real-life applications. */
+#define BUS_PATH_SIZE_MAX (64*1024)
+
+#define BUS_CONTAINER_DEPTH 128
+
+/* Defined by the specification as maximum size of an array in bytes */
+#define BUS_ARRAY_MAX_SIZE 67108864
+
+#define BUS_FDS_MAX 1024
+
+#define BUS_EXEC_ARGV_MAX 256
+
+bool interface_name_is_valid(const char *p) _pure_;
+bool service_name_is_valid(const char *p) _pure_;
+bool member_name_is_valid(const char *p) _pure_;
+bool object_path_is_valid(const char *p) _pure_;
+
+char *object_path_startswith(const char *a, const char *b) _pure_;
+
+bool namespace_complex_pattern(const char *pattern, const char *value) _pure_;
+bool path_complex_pattern(const char *pattern, const char *value) _pure_;
+
+bool namespace_simple_pattern(const char *pattern, const char *value) _pure_;
+bool path_simple_pattern(const char *pattern, const char *value) _pure_;
+
+int bus_message_type_from_string(const char *s, uint8_t *u) _pure_;
+const char *bus_message_type_to_string(uint8_t u) _pure_;
+
+#define error_name_is_valid interface_name_is_valid
+
+sd_bus *bus_resolve(sd_bus *bus);
+
+int bus_ensure_running(sd_bus *bus);
+int bus_start_running(sd_bus *bus);
+int bus_next_address(sd_bus *bus);
+
+int bus_seal_synthetic_message(sd_bus *b, sd_bus_message *m);
+
+int bus_rqueue_make_room(sd_bus *bus);
+
+bool bus_pid_changed(sd_bus *bus);
+
+char *bus_address_escape(const char *v);
+
+int bus_attach_io_events(sd_bus *b);
+int bus_attach_inotify_event(sd_bus *b);
+
+void bus_close_inotify_fd(sd_bus *b);
+void bus_close_io_fds(sd_bus *b);
+
+#define OBJECT_PATH_FOREACH_PREFIX(prefix, path) \
+ for (char *_slash = ({ strcpy((prefix), (path)); streq((prefix), "/") ? NULL : strrchr((prefix), '/'); }) ; \
+ _slash && ((_slash[(_slash) == (prefix)] = 0), true); \
+ _slash = streq((prefix), "/") ? NULL : strrchr((prefix), '/'))
+
+/* If we are invoking callbacks of a bus object, ensure unreffing the
+ * bus from the callback doesn't destroy the object we are working on */
+#define BUS_DONT_DESTROY(bus) \
+ _cleanup_(sd_bus_unrefp) _unused_ sd_bus *_dont_destroy_##bus = sd_bus_ref(bus)
+
+int bus_set_address_system(sd_bus *bus);
+int bus_set_address_user(sd_bus *bus);
+int bus_set_address_system_remote(sd_bus *b, const char *host);
+int bus_set_address_system_machine(sd_bus *b, const char *machine);
+
+int bus_maybe_reply_error(sd_bus_message *m, int r, sd_bus_error *error);
+
+#define bus_assert_return(expr, r, error) \
+ do { \
+ if (!assert_log(expr, #expr)) \
+ return sd_bus_error_set_errno(error, r); \
+ } while (false)
+
+void bus_enter_closing(sd_bus *bus);
+
+void bus_set_state(sd_bus *bus, enum bus_state state);
diff --git a/src/libsystemd/sd-bus/bus-introspect.c b/src/libsystemd/sd-bus/bus-introspect.c
new file mode 100644
index 0000000..866fca7
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-introspect.c
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-internal.h"
+#include "bus-introspect.h"
+#include "bus-objects.h"
+#include "bus-protocol.h"
+#include "bus-signature.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "memory-util.h"
+#include "string-util.h"
+
+#define BUS_INTROSPECT_DOCTYPE \
+ "<!DOCTYPE node PUBLIC \"-//freedesktop//DTD D-BUS Object Introspection 1.0//EN\"\n" \
+ "\"http://www.freedesktop.org/standards/dbus/1.0/introspect.dtd\">\n"
+
+#define BUS_INTROSPECT_INTERFACE_PEER \
+ " <interface name=\"org.freedesktop.DBus.Peer\">\n" \
+ " <method name=\"Ping\"/>\n" \
+ " <method name=\"GetMachineId\">\n" \
+ " <arg type=\"s\" name=\"machine_uuid\" direction=\"out\"/>\n" \
+ " </method>\n" \
+ " </interface>\n"
+
+#define BUS_INTROSPECT_INTERFACE_INTROSPECTABLE \
+ " <interface name=\"org.freedesktop.DBus.Introspectable\">\n" \
+ " <method name=\"Introspect\">\n" \
+ " <arg name=\"data\" type=\"s\" direction=\"out\"/>\n" \
+ " </method>\n" \
+ " </interface>\n"
+
+#define BUS_INTROSPECT_INTERFACE_PROPERTIES \
+ " <interface name=\"org.freedesktop.DBus.Properties\">\n" \
+ " <method name=\"Get\">\n" \
+ " <arg name=\"interface\" direction=\"in\" type=\"s\"/>\n" \
+ " <arg name=\"property\" direction=\"in\" type=\"s\"/>\n" \
+ " <arg name=\"value\" direction=\"out\" type=\"v\"/>\n" \
+ " </method>\n" \
+ " <method name=\"GetAll\">\n" \
+ " <arg name=\"interface\" direction=\"in\" type=\"s\"/>\n" \
+ " <arg name=\"properties\" direction=\"out\" type=\"a{sv}\"/>\n" \
+ " </method>\n" \
+ " <method name=\"Set\">\n" \
+ " <arg name=\"interface\" direction=\"in\" type=\"s\"/>\n" \
+ " <arg name=\"property\" direction=\"in\" type=\"s\"/>\n" \
+ " <arg name=\"value\" direction=\"in\" type=\"v\"/>\n" \
+ " </method>\n" \
+ " <signal name=\"PropertiesChanged\">\n" \
+ " <arg type=\"s\" name=\"interface\"/>\n" \
+ " <arg type=\"a{sv}\" name=\"changed_properties\"/>\n" \
+ " <arg type=\"as\" name=\"invalidated_properties\"/>\n" \
+ " </signal>\n" \
+ " </interface>\n"
+
+#define BUS_INTROSPECT_INTERFACE_OBJECT_MANAGER \
+ " <interface name=\"org.freedesktop.DBus.ObjectManager\">\n" \
+ " <method name=\"GetManagedObjects\">\n" \
+ " <arg type=\"a{oa{sa{sv}}}\" name=\"object_paths_interfaces_and_properties\" direction=\"out\"/>\n" \
+ " </method>\n" \
+ " <signal name=\"InterfacesAdded\">\n" \
+ " <arg type=\"o\" name=\"object_path\"/>\n" \
+ " <arg type=\"a{sa{sv}}\" name=\"interfaces_and_properties\"/>\n" \
+ " </signal>\n" \
+ " <signal name=\"InterfacesRemoved\">\n" \
+ " <arg type=\"o\" name=\"object_path\"/>\n" \
+ " <arg type=\"as\" name=\"interfaces\"/>\n" \
+ " </signal>\n" \
+ " </interface>\n"
+
+int introspect_begin(struct introspect *i, bool trusted) {
+ assert(i);
+
+ *i = (struct introspect) {
+ .trusted = trusted,
+ };
+
+ i->f = open_memstream_unlocked(&i->introspection, &i->size);
+ if (!i->f)
+ return -ENOMEM;
+
+ fputs(BUS_INTROSPECT_DOCTYPE
+ "<node>\n", i->f);
+
+ return 0;
+}
+
+int introspect_write_default_interfaces(struct introspect *i, bool object_manager) {
+ assert(i);
+
+ fputs(BUS_INTROSPECT_INTERFACE_PEER
+ BUS_INTROSPECT_INTERFACE_INTROSPECTABLE
+ BUS_INTROSPECT_INTERFACE_PROPERTIES, i->f);
+
+ if (object_manager)
+ fputs(BUS_INTROSPECT_INTERFACE_OBJECT_MANAGER, i->f);
+
+ return 0;
+}
+
+static int set_interface_name(struct introspect *intro, const char *interface_name) {
+ if (streq_ptr(intro->interface_name, interface_name))
+ return 0;
+
+ if (intro->interface_name)
+ fputs(" </interface>\n", intro->f);
+
+ if (interface_name)
+ fprintf(intro->f, " <interface name=\"%s\">\n", interface_name);
+
+ return free_and_strdup(&intro->interface_name, interface_name);
+}
+
+int introspect_write_child_nodes(struct introspect *i, Set *s, const char *prefix) {
+ char *node;
+
+ assert(i);
+ assert(prefix);
+
+ assert_se(set_interface_name(i, NULL) >= 0);
+
+ while ((node = set_steal_first(s))) {
+ const char *e;
+
+ e = object_path_startswith(node, prefix);
+ if (e && e[0])
+ fprintf(i->f, " <node name=\"%s\"/>\n", e);
+
+ free(node);
+ }
+
+ return 0;
+}
+
+static void introspect_write_flags(struct introspect *i, int type, uint64_t flags) {
+ if (flags & SD_BUS_VTABLE_DEPRECATED)
+ fputs(" <annotation name=\"org.freedesktop.DBus.Deprecated\" value=\"true\"/>\n", i->f);
+
+ if (type == _SD_BUS_VTABLE_METHOD && (flags & SD_BUS_VTABLE_METHOD_NO_REPLY))
+ fputs(" <annotation name=\"org.freedesktop.DBus.Method.NoReply\" value=\"true\"/>\n", i->f);
+
+ if (IN_SET(type, _SD_BUS_VTABLE_PROPERTY, _SD_BUS_VTABLE_WRITABLE_PROPERTY)) {
+ if (flags & SD_BUS_VTABLE_PROPERTY_EXPLICIT)
+ fputs(" <annotation name=\"org.freedesktop.systemd1.Explicit\" value=\"true\"/>\n", i->f);
+
+ if (flags & SD_BUS_VTABLE_PROPERTY_CONST)
+ fputs(" <annotation name=\"org.freedesktop.DBus.Property.EmitsChangedSignal\" value=\"const\"/>\n", i->f);
+ else if (flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION)
+ fputs(" <annotation name=\"org.freedesktop.DBus.Property.EmitsChangedSignal\" value=\"invalidates\"/>\n", i->f);
+ else if (!(flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE))
+ fputs(" <annotation name=\"org.freedesktop.DBus.Property.EmitsChangedSignal\" value=\"false\"/>\n", i->f);
+ }
+
+ if (!i->trusted &&
+ IN_SET(type, _SD_BUS_VTABLE_METHOD, _SD_BUS_VTABLE_WRITABLE_PROPERTY) &&
+ !(flags & SD_BUS_VTABLE_UNPRIVILEGED))
+ fputs(" <annotation name=\"org.freedesktop.systemd1.Privileged\" value=\"true\"/>\n", i->f);
+}
+
+/* Note that "names" is both an input and an output parameter. It initially points to the first argument name in a
+ NULL-separated list of strings, and is then advanced with each argument, and the resulting pointer is returned. */
+static int introspect_write_arguments(struct introspect *i, const char *signature, const char **names, const char *direction) {
+ int r;
+
+ for (;;) {
+ size_t l;
+
+ if (!*signature)
+ return 0;
+
+ r = signature_element_length(signature, &l);
+ if (r < 0)
+ return r;
+
+ fprintf(i->f, " <arg type=\"%.*s\"", (int) l, signature);
+
+ if (**names != '\0') {
+ fprintf(i->f, " name=\"%s\"", *names);
+ *names += strlen(*names) + 1;
+ }
+
+ if (direction)
+ fprintf(i->f, " direction=\"%s\"/>\n", direction);
+ else
+ fputs("/>\n", i->f);
+
+ signature += l;
+ }
+}
+
+int introspect_write_interface(
+ struct introspect *i,
+ const char *interface_name,
+ const sd_bus_vtable *v) {
+
+ const sd_bus_vtable *vtable = v;
+ const char *names = "";
+ int r;
+
+ assert(i);
+ assert(interface_name);
+ assert(v);
+
+ r = set_interface_name(i, interface_name);
+ if (r < 0)
+ return r;
+
+ for (; v->type != _SD_BUS_VTABLE_END; v = bus_vtable_next(vtable, v)) {
+
+ /* Ignore methods, signals and properties that are
+ * marked "hidden", but do show the interface
+ * itself */
+
+ if (v->type != _SD_BUS_VTABLE_START && (v->flags & SD_BUS_VTABLE_HIDDEN))
+ continue;
+
+ switch (v->type) {
+
+ case _SD_BUS_VTABLE_START:
+ if (v->flags & SD_BUS_VTABLE_DEPRECATED)
+ fputs(" <annotation name=\"org.freedesktop.DBus.Deprecated\" value=\"true\"/>\n", i->f);
+ break;
+
+ case _SD_BUS_VTABLE_METHOD:
+ fprintf(i->f, " <method name=\"%s\">\n", v->x.method.member);
+ if (bus_vtable_has_names(vtable))
+ names = strempty(v->x.method.names);
+ introspect_write_arguments(i, strempty(v->x.method.signature), &names, "in");
+ introspect_write_arguments(i, strempty(v->x.method.result), &names, "out");
+ introspect_write_flags(i, v->type, v->flags);
+ fputs(" </method>\n", i->f);
+ break;
+
+ case _SD_BUS_VTABLE_PROPERTY:
+ case _SD_BUS_VTABLE_WRITABLE_PROPERTY:
+ fprintf(i->f, " <property name=\"%s\" type=\"%s\" access=\"%s\">\n",
+ v->x.property.member,
+ v->x.property.signature,
+ v->type == _SD_BUS_VTABLE_WRITABLE_PROPERTY ? "readwrite" : "read");
+ introspect_write_flags(i, v->type, v->flags);
+ fputs(" </property>\n", i->f);
+ break;
+
+ case _SD_BUS_VTABLE_SIGNAL:
+ fprintf(i->f, " <signal name=\"%s\">\n", v->x.signal.member);
+ if (bus_vtable_has_names(vtable))
+ names = strempty(v->x.signal.names);
+ introspect_write_arguments(i, strempty(v->x.signal.signature), &names, NULL);
+ introspect_write_flags(i, v->type, v->flags);
+ fputs(" </signal>\n", i->f);
+ break;
+ }
+
+ }
+
+ return 0;
+}
+
+int introspect_finish(struct introspect *i, char **ret) {
+ int r;
+
+ assert(i);
+
+ assert_se(set_interface_name(i, NULL) >= 0);
+
+ fputs("</node>\n", i->f);
+
+ r = fflush_and_check(i->f);
+ if (r < 0)
+ return r;
+
+ i->f = safe_fclose(i->f);
+ *ret = TAKE_PTR(i->introspection);
+
+ return 0;
+}
+
+void introspect_free(struct introspect *i) {
+ assert(i);
+
+ /* Normally introspect_finish() does all the work, this is just a backup for error paths */
+
+ safe_fclose(i->f);
+ free(i->interface_name);
+ free(i->introspection);
+}
diff --git a/src/libsystemd/sd-bus/bus-introspect.h b/src/libsystemd/sd-bus/bus-introspect.h
new file mode 100644
index 0000000..34f32a4
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-introspect.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdio.h>
+
+#include "sd-bus.h"
+
+#include "set.h"
+
+struct introspect {
+ FILE *f;
+ char *interface_name;
+ char *introspection;
+ size_t size;
+ bool trusted;
+};
+
+int introspect_begin(struct introspect *i, bool trusted);
+int introspect_write_default_interfaces(struct introspect *i, bool object_manager);
+int introspect_write_child_nodes(struct introspect *i, Set *s, const char *prefix);
+int introspect_write_interface(
+ struct introspect *i,
+ const char *interface_name,
+ const sd_bus_vtable *v);
+int introspect_finish(struct introspect *i, char **ret);
+void introspect_free(struct introspect *i);
diff --git a/src/libsystemd/sd-bus/bus-kernel.c b/src/libsystemd/sd-bus/bus-kernel.c
new file mode 100644
index 0000000..6bba446
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-kernel.c
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_VALGRIND_MEMCHECK_H
+#include <valgrind/memcheck.h>
+#endif
+
+#include <fcntl.h>
+#include <malloc.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+
+/* When we include libgen.h because we need dirname() we immediately
+ * undefine basename() since libgen.h defines it as a macro to the POSIX
+ * version which is really broken. We prefer GNU basename(). */
+#include <libgen.h>
+#undef basename
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-kernel.h"
+#include "bus-label.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "capability-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "memfd-util.h"
+#include "parse-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "memory-util.h"
+
+void close_and_munmap(int fd, void *address, size_t size) {
+ if (size > 0)
+ assert_se(munmap(address, PAGE_ALIGN(size)) >= 0);
+
+ safe_close(fd);
+}
+
+void bus_flush_memfd(sd_bus *b) {
+ unsigned i;
+
+ assert(b);
+
+ for (i = 0; i < b->n_memfd_cache; i++)
+ close_and_munmap(b->memfd_cache[i].fd, b->memfd_cache[i].address, b->memfd_cache[i].mapped);
+}
diff --git a/src/libsystemd/sd-bus/bus-kernel.h b/src/libsystemd/sd-bus/bus-kernel.h
new file mode 100644
index 0000000..be8e0ce
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-kernel.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#define MEMFD_CACHE_MAX 32
+
+/* When we cache a memfd block for reuse, we will truncate blocks
+ * longer than this in order not to keep too much data around. */
+#define MEMFD_CACHE_ITEM_SIZE_MAX (128*1024)
+
+/* This determines at which minimum size we prefer sending memfds over
+ * sending vectors */
+#define MEMFD_MIN_SIZE (512*1024)
+
+struct memfd_cache {
+ int fd;
+ void *address;
+ size_t mapped;
+ size_t allocated;
+};
+
+void close_and_munmap(int fd, void *address, size_t size);
+void bus_flush_memfd(sd_bus *bus);
diff --git a/src/libsystemd/sd-bus/bus-match.c b/src/libsystemd/sd-bus/bus-match.c
new file mode 100644
index 0000000..d7da4bf
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-match.c
@@ -0,0 +1,1100 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-match.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hexdecoct.h"
+#include "sort-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+/* Example:
+ *
+ * A: type=signal,sender=foo,interface=bar
+ * B: type=signal,sender=quux,interface=fips
+ * C: type=signal,sender=quux,interface=waldo
+ * D: type=signal,member=test
+ * E: sender=miau
+ * F: type=signal
+ * G: type=signal
+ *
+ * results in this tree:
+ *
+ * BUS_MATCH_ROOT
+ * + BUS_MATCH_MESSAGE_TYPE
+ * | ` BUS_MATCH_VALUE: value == signal
+ * | + DBUS_MATCH_SENDER
+ * | | + BUS_MATCH_VALUE: value == foo
+ * | | | ` DBUS_MATCH_INTERFACE
+ * | | | ` BUS_MATCH_VALUE: value == bar
+ * | | | ` BUS_MATCH_LEAF: A
+ * | | ` BUS_MATCH_VALUE: value == quux
+ * | | ` DBUS_MATCH_INTERFACE
+ * | | | BUS_MATCH_VALUE: value == fips
+ * | | | ` BUS_MATCH_LEAF: B
+ * | | ` BUS_MATCH_VALUE: value == waldo
+ * | | ` BUS_MATCH_LEAF: C
+ * | + DBUS_MATCH_MEMBER
+ * | | ` BUS_MATCH_VALUE: value == test
+ * | | ` BUS_MATCH_LEAF: D
+ * | + BUS_MATCH_LEAF: F
+ * | ` BUS_MATCH_LEAF: G
+ * ` BUS_MATCH_SENDER
+ * ` BUS_MATCH_VALUE: value == miau
+ * ` BUS_MATCH_LEAF: E
+ */
+
+static bool BUS_MATCH_IS_COMPARE(enum bus_match_node_type t) {
+ return t >= BUS_MATCH_SENDER && t <= BUS_MATCH_ARG_HAS_LAST;
+}
+
+static bool BUS_MATCH_CAN_HASH(enum bus_match_node_type t) {
+ return (t >= BUS_MATCH_MESSAGE_TYPE && t <= BUS_MATCH_PATH) ||
+ (t >= BUS_MATCH_ARG && t <= BUS_MATCH_ARG_LAST) ||
+ (t >= BUS_MATCH_ARG_HAS && t <= BUS_MATCH_ARG_HAS_LAST);
+}
+
+static void bus_match_node_free(struct bus_match_node *node) {
+ assert(node);
+ assert(node->parent);
+ assert(!node->child);
+ assert(node->type != BUS_MATCH_ROOT);
+ assert(node->type < _BUS_MATCH_NODE_TYPE_MAX);
+
+ if (node->parent->child) {
+ /* We are apparently linked into the parent's child
+ * list. Let's remove us from there. */
+ if (node->prev) {
+ assert(node->prev->next == node);
+ node->prev->next = node->next;
+ } else {
+ assert(node->parent->child == node);
+ node->parent->child = node->next;
+ }
+
+ if (node->next)
+ node->next->prev = node->prev;
+ }
+
+ if (node->type == BUS_MATCH_VALUE) {
+ /* We might be in the parent's hash table, so clean
+ * this up */
+
+ if (node->parent->type == BUS_MATCH_MESSAGE_TYPE)
+ hashmap_remove(node->parent->compare.children, UINT_TO_PTR(node->value.u8));
+ else if (BUS_MATCH_CAN_HASH(node->parent->type) && node->value.str)
+ hashmap_remove(node->parent->compare.children, node->value.str);
+
+ free(node->value.str);
+ }
+
+ if (BUS_MATCH_IS_COMPARE(node->type)) {
+ assert(hashmap_isempty(node->compare.children));
+ hashmap_free(node->compare.children);
+ }
+
+ free(node);
+}
+
+static bool bus_match_node_maybe_free(struct bus_match_node *node) {
+ assert(node);
+
+ if (node->type == BUS_MATCH_ROOT)
+ return false;
+
+ if (node->child)
+ return false;
+
+ if (BUS_MATCH_IS_COMPARE(node->type) && !hashmap_isempty(node->compare.children))
+ return true;
+
+ bus_match_node_free(node);
+ return true;
+}
+
+static bool value_node_test(
+ struct bus_match_node *node,
+ enum bus_match_node_type parent_type,
+ uint8_t value_u8,
+ const char *value_str,
+ char **value_strv,
+ sd_bus_message *m) {
+
+ assert(node);
+ assert(node->type == BUS_MATCH_VALUE);
+
+ /* Tests parameters against this value node, doing prefix
+ * magic and stuff. */
+
+ switch (parent_type) {
+
+ case BUS_MATCH_MESSAGE_TYPE:
+ return node->value.u8 == value_u8;
+
+ case BUS_MATCH_SENDER:
+ if (streq_ptr(node->value.str, value_str))
+ return true;
+
+ if (m->creds.mask & SD_BUS_CREDS_WELL_KNOWN_NAMES) {
+ char **i;
+
+ /* on kdbus we have the well known names list
+ * in the credentials, let's make use of that
+ * for an accurate match */
+
+ STRV_FOREACH(i, m->creds.well_known_names)
+ if (streq_ptr(node->value.str, *i))
+ return true;
+
+ } else {
+
+ /* If we don't have kdbus, we don't know the
+ * well-known names of the senders. In that,
+ * let's just hope that dbus-daemon doesn't
+ * send us stuff we didn't want. */
+
+ if (node->value.str[0] != ':' && value_str && value_str[0] == ':')
+ return true;
+ }
+
+ return false;
+
+ case BUS_MATCH_DESTINATION:
+ case BUS_MATCH_INTERFACE:
+ case BUS_MATCH_MEMBER:
+ case BUS_MATCH_PATH:
+ case BUS_MATCH_ARG ... BUS_MATCH_ARG_LAST:
+
+ if (value_str)
+ return streq_ptr(node->value.str, value_str);
+
+ return false;
+
+ case BUS_MATCH_ARG_HAS ... BUS_MATCH_ARG_HAS_LAST: {
+ char **i;
+
+ STRV_FOREACH(i, value_strv)
+ if (streq_ptr(node->value.str, *i))
+ return true;
+
+ return false;
+ }
+
+ case BUS_MATCH_ARG_NAMESPACE ... BUS_MATCH_ARG_NAMESPACE_LAST:
+ if (value_str)
+ return namespace_simple_pattern(node->value.str, value_str);
+
+ return false;
+
+ case BUS_MATCH_PATH_NAMESPACE:
+ return path_simple_pattern(node->value.str, value_str);
+
+ case BUS_MATCH_ARG_PATH ... BUS_MATCH_ARG_PATH_LAST:
+ if (value_str)
+ return path_complex_pattern(node->value.str, value_str);
+
+ return false;
+
+ default:
+ assert_not_reached("Invalid node type");
+ }
+}
+
+static bool value_node_same(
+ struct bus_match_node *node,
+ enum bus_match_node_type parent_type,
+ uint8_t value_u8,
+ const char *value_str) {
+
+ /* Tests parameters against this value node, not doing prefix
+ * magic and stuff, i.e. this one actually compares the match
+ * itself. */
+
+ assert(node);
+ assert(node->type == BUS_MATCH_VALUE);
+
+ switch (parent_type) {
+
+ case BUS_MATCH_MESSAGE_TYPE:
+ return node->value.u8 == value_u8;
+
+ case BUS_MATCH_SENDER:
+ case BUS_MATCH_DESTINATION:
+ case BUS_MATCH_INTERFACE:
+ case BUS_MATCH_MEMBER:
+ case BUS_MATCH_PATH:
+ case BUS_MATCH_ARG ... BUS_MATCH_ARG_LAST:
+ case BUS_MATCH_ARG_HAS ... BUS_MATCH_ARG_HAS_LAST:
+ case BUS_MATCH_ARG_NAMESPACE ... BUS_MATCH_ARG_NAMESPACE_LAST:
+ case BUS_MATCH_PATH_NAMESPACE:
+ case BUS_MATCH_ARG_PATH ... BUS_MATCH_ARG_PATH_LAST:
+ return streq(node->value.str, value_str);
+
+ default:
+ assert_not_reached("Invalid node type");
+ }
+}
+
+int bus_match_run(
+ sd_bus *bus,
+ struct bus_match_node *node,
+ sd_bus_message *m) {
+
+ _cleanup_strv_free_ char **test_strv = NULL;
+ const char *test_str = NULL;
+ uint8_t test_u8 = 0;
+ int r;
+
+ assert(m);
+
+ if (!node)
+ return 0;
+
+ if (bus && bus->match_callbacks_modified)
+ return 0;
+
+ /* Not these special semantics: when traversing the tree we
+ * usually let bus_match_run() when called for a node
+ * recursively invoke bus_match_run(). There's are two
+ * exceptions here though, which are BUS_NODE_ROOT (which
+ * cannot have a sibling), and BUS_NODE_VALUE (whose siblings
+ * are invoked anyway by its parent. */
+
+ switch (node->type) {
+
+ case BUS_MATCH_ROOT:
+
+ /* Run all children. Since we cannot have any siblings
+ * we won't call any. The children of the root node
+ * are compares or leaves, they will automatically
+ * call their siblings. */
+ return bus_match_run(bus, node->child, m);
+
+ case BUS_MATCH_VALUE:
+
+ /* Run all children. We don't execute any siblings, we
+ * assume our caller does that. The children of value
+ * nodes are compares or leaves, they will
+ * automatically call their siblings */
+
+ assert(node->child);
+ return bus_match_run(bus, node->child, m);
+
+ case BUS_MATCH_LEAF:
+
+ if (bus) {
+ /* Don't run this match as long as the AddMatch() call is not complete yet.
+ *
+ * Don't run this match unless the 'after' counter has been reached.
+ *
+ * Don't run this match more than once per iteration */
+
+ if (node->leaf.callback->install_slot ||
+ m->read_counter <= node->leaf.callback->after ||
+ node->leaf.callback->last_iteration == bus->iteration_counter)
+ return bus_match_run(bus, node->next, m);
+
+ node->leaf.callback->last_iteration = bus->iteration_counter;
+ }
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ /* Run the callback. And then invoke siblings. */
+ if (node->leaf.callback->callback) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ sd_bus_slot *slot;
+
+ slot = container_of(node->leaf.callback, sd_bus_slot, match_callback);
+ if (bus) {
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = node->leaf.callback->callback;
+ bus->current_userdata = slot->userdata;
+ }
+ r = node->leaf.callback->callback(m, slot->userdata, &error_buffer);
+ if (bus) {
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+ }
+
+ r = bus_maybe_reply_error(m, r, &error_buffer);
+ if (r != 0)
+ return r;
+
+ if (bus && bus->match_callbacks_modified)
+ return 0;
+ }
+
+ return bus_match_run(bus, node->next, m);
+
+ case BUS_MATCH_MESSAGE_TYPE:
+ test_u8 = m->header->type;
+ break;
+
+ case BUS_MATCH_SENDER:
+ test_str = m->sender;
+ /* FIXME: resolve test_str from a well-known to a unique name first */
+ break;
+
+ case BUS_MATCH_DESTINATION:
+ test_str = m->destination;
+ break;
+
+ case BUS_MATCH_INTERFACE:
+ test_str = m->interface;
+ break;
+
+ case BUS_MATCH_MEMBER:
+ test_str = m->member;
+ break;
+
+ case BUS_MATCH_PATH:
+ case BUS_MATCH_PATH_NAMESPACE:
+ test_str = m->path;
+ break;
+
+ case BUS_MATCH_ARG ... BUS_MATCH_ARG_LAST:
+ (void) bus_message_get_arg(m, node->type - BUS_MATCH_ARG, &test_str);
+ break;
+
+ case BUS_MATCH_ARG_PATH ... BUS_MATCH_ARG_PATH_LAST:
+ (void) bus_message_get_arg(m, node->type - BUS_MATCH_ARG_PATH, &test_str);
+ break;
+
+ case BUS_MATCH_ARG_NAMESPACE ... BUS_MATCH_ARG_NAMESPACE_LAST:
+ (void) bus_message_get_arg(m, node->type - BUS_MATCH_ARG_NAMESPACE, &test_str);
+ break;
+
+ case BUS_MATCH_ARG_HAS ... BUS_MATCH_ARG_HAS_LAST:
+ (void) bus_message_get_arg_strv(m, node->type - BUS_MATCH_ARG_HAS, &test_strv);
+ break;
+
+ default:
+ assert_not_reached("Unknown match type.");
+ }
+
+ if (BUS_MATCH_CAN_HASH(node->type)) {
+ struct bus_match_node *found;
+
+ /* Lookup via hash table, nice! So let's jump directly. */
+
+ if (test_str)
+ found = hashmap_get(node->compare.children, test_str);
+ else if (test_strv) {
+ char **i;
+
+ STRV_FOREACH(i, test_strv) {
+ found = hashmap_get(node->compare.children, *i);
+ if (found) {
+ r = bus_match_run(bus, found, m);
+ if (r != 0)
+ return r;
+ }
+ }
+
+ found = NULL;
+ } else if (node->type == BUS_MATCH_MESSAGE_TYPE)
+ found = hashmap_get(node->compare.children, UINT_TO_PTR(test_u8));
+ else
+ found = NULL;
+
+ if (found) {
+ r = bus_match_run(bus, found, m);
+ if (r != 0)
+ return r;
+ }
+ } else {
+ struct bus_match_node *c;
+
+ /* No hash table, so let's iterate manually... */
+
+ for (c = node->child; c; c = c->next) {
+ if (!value_node_test(c, node->type, test_u8, test_str, test_strv, m))
+ continue;
+
+ r = bus_match_run(bus, c, m);
+ if (r != 0)
+ return r;
+
+ if (bus && bus->match_callbacks_modified)
+ return 0;
+ }
+ }
+
+ if (bus && bus->match_callbacks_modified)
+ return 0;
+
+ /* And now, let's invoke our siblings */
+ return bus_match_run(bus, node->next, m);
+}
+
+static int bus_match_add_compare_value(
+ struct bus_match_node *where,
+ enum bus_match_node_type t,
+ uint8_t value_u8,
+ const char *value_str,
+ struct bus_match_node **ret) {
+
+ struct bus_match_node *c = NULL, *n = NULL;
+ int r;
+
+ assert(where);
+ assert(IN_SET(where->type, BUS_MATCH_ROOT, BUS_MATCH_VALUE));
+ assert(BUS_MATCH_IS_COMPARE(t));
+ assert(ret);
+
+ for (c = where->child; c && c->type != t; c = c->next)
+ ;
+
+ if (c) {
+ /* Comparison node already exists? Then let's see if
+ * the value node exists too. */
+
+ if (t == BUS_MATCH_MESSAGE_TYPE)
+ n = hashmap_get(c->compare.children, UINT_TO_PTR(value_u8));
+ else if (BUS_MATCH_CAN_HASH(t))
+ n = hashmap_get(c->compare.children, value_str);
+ else {
+ for (n = c->child; n && !value_node_same(n, t, value_u8, value_str); n = n->next)
+ ;
+ }
+
+ if (n) {
+ *ret = n;
+ return 0;
+ }
+ } else {
+ /* Comparison node, doesn't exist yet? Then let's
+ * create it. */
+
+ c = new0(struct bus_match_node, 1);
+ if (!c) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ c->type = t;
+ c->parent = where;
+ c->next = where->child;
+ if (c->next)
+ c->next->prev = c;
+ where->child = c;
+
+ if (t == BUS_MATCH_MESSAGE_TYPE) {
+ c->compare.children = hashmap_new(NULL);
+ if (!c->compare.children) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ } else if (BUS_MATCH_CAN_HASH(t)) {
+ c->compare.children = hashmap_new(&string_hash_ops);
+ if (!c->compare.children) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+ }
+
+ n = new0(struct bus_match_node, 1);
+ if (!n) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ n->type = BUS_MATCH_VALUE;
+ n->value.u8 = value_u8;
+ if (value_str) {
+ n->value.str = strdup(value_str);
+ if (!n->value.str) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ n->parent = c;
+ if (c->compare.children) {
+
+ if (t == BUS_MATCH_MESSAGE_TYPE)
+ r = hashmap_put(c->compare.children, UINT_TO_PTR(value_u8), n);
+ else
+ r = hashmap_put(c->compare.children, n->value.str, n);
+
+ if (r < 0)
+ goto fail;
+ } else {
+ n->next = c->child;
+ if (n->next)
+ n->next->prev = n;
+ c->child = n;
+ }
+
+ *ret = n;
+ return 1;
+
+fail:
+ if (c)
+ bus_match_node_maybe_free(c);
+
+ if (n) {
+ free(n->value.str);
+ free(n);
+ }
+
+ return r;
+}
+
+static int bus_match_add_leaf(
+ struct bus_match_node *where,
+ struct match_callback *callback) {
+
+ struct bus_match_node *n;
+
+ assert(where);
+ assert(IN_SET(where->type, BUS_MATCH_ROOT, BUS_MATCH_VALUE));
+ assert(callback);
+
+ n = new0(struct bus_match_node, 1);
+ if (!n)
+ return -ENOMEM;
+
+ n->type = BUS_MATCH_LEAF;
+ n->parent = where;
+ n->next = where->child;
+ if (n->next)
+ n->next->prev = n;
+
+ n->leaf.callback = callback;
+ callback->match_node = n;
+
+ where->child = n;
+
+ return 1;
+}
+
+enum bus_match_node_type bus_match_node_type_from_string(const char *k, size_t n) {
+ assert(k);
+
+ if (n == 4 && startswith(k, "type"))
+ return BUS_MATCH_MESSAGE_TYPE;
+ if (n == 6 && startswith(k, "sender"))
+ return BUS_MATCH_SENDER;
+ if (n == 11 && startswith(k, "destination"))
+ return BUS_MATCH_DESTINATION;
+ if (n == 9 && startswith(k, "interface"))
+ return BUS_MATCH_INTERFACE;
+ if (n == 6 && startswith(k, "member"))
+ return BUS_MATCH_MEMBER;
+ if (n == 4 && startswith(k, "path"))
+ return BUS_MATCH_PATH;
+ if (n == 14 && startswith(k, "path_namespace"))
+ return BUS_MATCH_PATH_NAMESPACE;
+
+ if (n == 4 && startswith(k, "arg")) {
+ int j;
+
+ j = undecchar(k[3]);
+ if (j < 0)
+ return -EINVAL;
+
+ return BUS_MATCH_ARG + j;
+ }
+
+ if (n == 5 && startswith(k, "arg")) {
+ int a, b;
+ enum bus_match_node_type t;
+
+ a = undecchar(k[3]);
+ b = undecchar(k[4]);
+ if (a <= 0 || b < 0)
+ return -EINVAL;
+
+ t = BUS_MATCH_ARG + a * 10 + b;
+ if (t > BUS_MATCH_ARG_LAST)
+ return -EINVAL;
+
+ return t;
+ }
+
+ if (n == 8 && startswith(k, "arg") && startswith(k + 4, "path")) {
+ int j;
+
+ j = undecchar(k[3]);
+ if (j < 0)
+ return -EINVAL;
+
+ return BUS_MATCH_ARG_PATH + j;
+ }
+
+ if (n == 9 && startswith(k, "arg") && startswith(k + 5, "path")) {
+ enum bus_match_node_type t;
+ int a, b;
+
+ a = undecchar(k[3]);
+ b = undecchar(k[4]);
+ if (a <= 0 || b < 0)
+ return -EINVAL;
+
+ t = BUS_MATCH_ARG_PATH + a * 10 + b;
+ if (t > BUS_MATCH_ARG_PATH_LAST)
+ return -EINVAL;
+
+ return t;
+ }
+
+ if (n == 13 && startswith(k, "arg") && startswith(k + 4, "namespace")) {
+ int j;
+
+ j = undecchar(k[3]);
+ if (j < 0)
+ return -EINVAL;
+
+ return BUS_MATCH_ARG_NAMESPACE + j;
+ }
+
+ if (n == 14 && startswith(k, "arg") && startswith(k + 5, "namespace")) {
+ enum bus_match_node_type t;
+ int a, b;
+
+ a = undecchar(k[3]);
+ b = undecchar(k[4]);
+ if (a <= 0 || b < 0)
+ return -EINVAL;
+
+ t = BUS_MATCH_ARG_NAMESPACE + a * 10 + b;
+ if (t > BUS_MATCH_ARG_NAMESPACE_LAST)
+ return -EINVAL;
+
+ return t;
+ }
+
+ if (n == 7 && startswith(k, "arg") && startswith(k + 4, "has")) {
+ int j;
+
+ j = undecchar(k[3]);
+ if (j < 0)
+ return -EINVAL;
+
+ return BUS_MATCH_ARG_HAS + j;
+ }
+
+ if (n == 8 && startswith(k, "arg") && startswith(k + 5, "has")) {
+ enum bus_match_node_type t;
+ int a, b;
+
+ a = undecchar(k[3]);
+ b = undecchar(k[4]);
+ if (a <= 0 || b < 0)
+ return -EINVAL;
+
+ t = BUS_MATCH_ARG_HAS + a * 10 + b;
+ if (t > BUS_MATCH_ARG_HAS_LAST)
+ return -EINVAL;
+
+ return t;
+ }
+
+ return -EINVAL;
+}
+
+static int match_component_compare(const struct bus_match_component *a, const struct bus_match_component *b) {
+ return CMP(a->type, b->type);
+}
+
+void bus_match_parse_free(struct bus_match_component *components, unsigned n_components) {
+ unsigned i;
+
+ for (i = 0; i < n_components; i++)
+ free(components[i].value_str);
+
+ free(components);
+}
+
+int bus_match_parse(
+ const char *match,
+ struct bus_match_component **_components,
+ unsigned *_n_components) {
+
+ const char *p = match;
+ struct bus_match_component *components = NULL;
+ size_t components_allocated = 0;
+ unsigned n_components = 0, i;
+ _cleanup_free_ char *value = NULL;
+ int r;
+
+ assert(match);
+ assert(_components);
+ assert(_n_components);
+
+ while (*p != 0) {
+ const char *eq, *q;
+ enum bus_match_node_type t;
+ unsigned j = 0;
+ size_t value_allocated = 0;
+ bool escaped = false, quoted;
+ uint8_t u;
+
+ /* Avahi's match rules appear to include whitespace, skip over it */
+ p += strspn(p, " ");
+
+ eq = strchr(p, '=');
+ if (!eq)
+ return -EINVAL;
+
+ t = bus_match_node_type_from_string(p, eq - p);
+ if (t < 0)
+ return -EINVAL;
+
+ quoted = eq[1] == '\'';
+
+ for (q = eq + 1 + quoted;; q++) {
+
+ if (*q == 0) {
+
+ if (quoted) {
+ r = -EINVAL;
+ goto fail;
+ } else {
+ if (value)
+ value[j] = 0;
+ break;
+ }
+ }
+
+ if (!escaped) {
+ if (*q == '\\') {
+ escaped = true;
+ continue;
+ }
+
+ if (quoted) {
+ if (*q == '\'') {
+ if (value)
+ value[j] = 0;
+ break;
+ }
+ } else {
+ if (*q == ',') {
+ if (value)
+ value[j] = 0;
+
+ break;
+ }
+ }
+ }
+
+ if (!GREEDY_REALLOC(value, value_allocated, j + 2)) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ value[j++] = *q;
+ escaped = false;
+ }
+
+ if (!value) {
+ value = strdup("");
+ if (!value) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (t == BUS_MATCH_MESSAGE_TYPE) {
+ r = bus_message_type_from_string(value, &u);
+ if (r < 0)
+ goto fail;
+
+ value = mfree(value);
+ } else
+ u = 0;
+
+ if (!GREEDY_REALLOC(components, components_allocated, n_components + 1)) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ components[n_components].type = t;
+ components[n_components].value_str = TAKE_PTR(value);
+ components[n_components].value_u8 = u;
+ n_components++;
+
+ if (q[quoted] == 0)
+ break;
+
+ if (q[quoted] != ',') {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ p = q + 1 + quoted;
+ }
+
+ /* Order the whole thing, so that we always generate the same tree */
+ typesafe_qsort(components, n_components, match_component_compare);
+
+ /* Check for duplicates */
+ for (i = 0; i+1 < n_components; i++)
+ if (components[i].type == components[i+1].type) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ *_components = components;
+ *_n_components = n_components;
+
+ return 0;
+
+fail:
+ bus_match_parse_free(components, n_components);
+ return r;
+}
+
+char *bus_match_to_string(struct bus_match_component *components, unsigned n_components) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char *buffer = NULL;
+ size_t size = 0;
+ unsigned i;
+ int r;
+
+ if (n_components <= 0)
+ return strdup("");
+
+ assert(components);
+
+ f = open_memstream_unlocked(&buffer, &size);
+ if (!f)
+ return NULL;
+
+ for (i = 0; i < n_components; i++) {
+ char buf[32];
+
+ if (i != 0)
+ fputc(',', f);
+
+ fputs(bus_match_node_type_to_string(components[i].type, buf, sizeof(buf)), f);
+ fputc('=', f);
+ fputc('\'', f);
+
+ if (components[i].type == BUS_MATCH_MESSAGE_TYPE)
+ fputs(bus_message_type_to_string(components[i].value_u8), f);
+ else
+ fputs(components[i].value_str, f);
+
+ fputc('\'', f);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return NULL;
+
+ return buffer;
+}
+
+int bus_match_add(
+ struct bus_match_node *root,
+ struct bus_match_component *components,
+ unsigned n_components,
+ struct match_callback *callback) {
+
+ unsigned i;
+ struct bus_match_node *n;
+ int r;
+
+ assert(root);
+ assert(callback);
+
+ n = root;
+ for (i = 0; i < n_components; i++) {
+ r = bus_match_add_compare_value(
+ n, components[i].type,
+ components[i].value_u8, components[i].value_str, &n);
+ if (r < 0)
+ return r;
+ }
+
+ return bus_match_add_leaf(n, callback);
+}
+
+int bus_match_remove(
+ struct bus_match_node *root,
+ struct match_callback *callback) {
+
+ struct bus_match_node *node, *pp;
+
+ assert(root);
+ assert(callback);
+
+ node = callback->match_node;
+ if (!node)
+ return 0;
+
+ assert(node->type == BUS_MATCH_LEAF);
+
+ callback->match_node = NULL;
+
+ /* Free the leaf */
+ pp = node->parent;
+ bus_match_node_free(node);
+
+ /* Prune the tree above */
+ while (pp) {
+ node = pp;
+ pp = node->parent;
+
+ if (!bus_match_node_maybe_free(node))
+ break;
+ }
+
+ return 1;
+}
+
+void bus_match_free(struct bus_match_node *node) {
+ struct bus_match_node *c;
+
+ if (!node)
+ return;
+
+ if (BUS_MATCH_CAN_HASH(node->type)) {
+
+ HASHMAP_FOREACH(c, node->compare.children)
+ bus_match_free(c);
+
+ assert(hashmap_isempty(node->compare.children));
+ }
+
+ while ((c = node->child))
+ bus_match_free(c);
+
+ if (node->type != BUS_MATCH_ROOT)
+ bus_match_node_free(node);
+}
+
+const char* bus_match_node_type_to_string(enum bus_match_node_type t, char buf[], size_t l) {
+ switch (t) {
+
+ case BUS_MATCH_ROOT:
+ return "root";
+
+ case BUS_MATCH_VALUE:
+ return "value";
+
+ case BUS_MATCH_LEAF:
+ return "leaf";
+
+ case BUS_MATCH_MESSAGE_TYPE:
+ return "type";
+
+ case BUS_MATCH_SENDER:
+ return "sender";
+
+ case BUS_MATCH_DESTINATION:
+ return "destination";
+
+ case BUS_MATCH_INTERFACE:
+ return "interface";
+
+ case BUS_MATCH_MEMBER:
+ return "member";
+
+ case BUS_MATCH_PATH:
+ return "path";
+
+ case BUS_MATCH_PATH_NAMESPACE:
+ return "path_namespace";
+
+ case BUS_MATCH_ARG ... BUS_MATCH_ARG_LAST:
+ snprintf(buf, l, "arg%i", t - BUS_MATCH_ARG);
+ return buf;
+
+ case BUS_MATCH_ARG_PATH ... BUS_MATCH_ARG_PATH_LAST:
+ snprintf(buf, l, "arg%ipath", t - BUS_MATCH_ARG_PATH);
+ return buf;
+
+ case BUS_MATCH_ARG_NAMESPACE ... BUS_MATCH_ARG_NAMESPACE_LAST:
+ snprintf(buf, l, "arg%inamespace", t - BUS_MATCH_ARG_NAMESPACE);
+ return buf;
+
+ case BUS_MATCH_ARG_HAS ... BUS_MATCH_ARG_HAS_LAST:
+ snprintf(buf, l, "arg%ihas", t - BUS_MATCH_ARG_HAS);
+ return buf;
+
+ default:
+ return NULL;
+ }
+}
+
+void bus_match_dump(struct bus_match_node *node, unsigned level) {
+ struct bus_match_node *c;
+ _cleanup_free_ char *pfx = NULL;
+ char buf[32];
+
+ if (!node)
+ return;
+
+ pfx = strrep(" ", level);
+ printf("%s[%s]", strempty(pfx), bus_match_node_type_to_string(node->type, buf, sizeof(buf)));
+
+ if (node->type == BUS_MATCH_VALUE) {
+ if (node->parent->type == BUS_MATCH_MESSAGE_TYPE)
+ printf(" <%u>\n", node->value.u8);
+ else
+ printf(" <%s>\n", node->value.str);
+ } else if (node->type == BUS_MATCH_ROOT)
+ puts(" root");
+ else if (node->type == BUS_MATCH_LEAF)
+ printf(" %p/%p\n", node->leaf.callback->callback, container_of(node->leaf.callback, sd_bus_slot, match_callback)->userdata);
+ else
+ putchar('\n');
+
+ if (BUS_MATCH_CAN_HASH(node->type)) {
+
+ HASHMAP_FOREACH(c, node->compare.children)
+ bus_match_dump(c, level + 1);
+ }
+
+ for (c = node->child; c; c = c->next)
+ bus_match_dump(c, level + 1);
+}
+
+enum bus_match_scope bus_match_get_scope(const struct bus_match_component *components, unsigned n_components) {
+ bool found_driver = false;
+ unsigned i;
+
+ if (n_components <= 0)
+ return BUS_MATCH_GENERIC;
+
+ assert(components);
+
+ /* Checks whether the specified match can only match the
+ * pseudo-service for local messages, which we detect by
+ * sender, interface or path. If a match is not restricted to
+ * local messages, then we check if it only matches on the
+ * driver. */
+
+ for (i = 0; i < n_components; i++) {
+ const struct bus_match_component *c = components + i;
+
+ if (c->type == BUS_MATCH_SENDER) {
+ if (streq_ptr(c->value_str, "org.freedesktop.DBus.Local"))
+ return BUS_MATCH_LOCAL;
+
+ if (streq_ptr(c->value_str, "org.freedesktop.DBus"))
+ found_driver = true;
+ }
+
+ if (c->type == BUS_MATCH_INTERFACE && streq_ptr(c->value_str, "org.freedesktop.DBus.Local"))
+ return BUS_MATCH_LOCAL;
+
+ if (c->type == BUS_MATCH_PATH && streq_ptr(c->value_str, "/org/freedesktop/DBus/Local"))
+ return BUS_MATCH_LOCAL;
+ }
+
+ return found_driver ? BUS_MATCH_DRIVER : BUS_MATCH_GENERIC;
+
+}
diff --git a/src/libsystemd/sd-bus/bus-match.h b/src/libsystemd/sd-bus/bus-match.h
new file mode 100644
index 0000000..e44e406
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-match.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "hashmap.h"
+
+enum bus_match_node_type {
+ BUS_MATCH_ROOT,
+ BUS_MATCH_VALUE,
+ BUS_MATCH_LEAF,
+
+ /* The following are all different kinds of compare nodes */
+ BUS_MATCH_SENDER,
+ BUS_MATCH_MESSAGE_TYPE,
+ BUS_MATCH_DESTINATION,
+ BUS_MATCH_INTERFACE,
+ BUS_MATCH_MEMBER,
+ BUS_MATCH_PATH,
+ BUS_MATCH_PATH_NAMESPACE,
+ BUS_MATCH_ARG,
+ BUS_MATCH_ARG_LAST = BUS_MATCH_ARG + 63,
+ BUS_MATCH_ARG_PATH,
+ BUS_MATCH_ARG_PATH_LAST = BUS_MATCH_ARG_PATH + 63,
+ BUS_MATCH_ARG_NAMESPACE,
+ BUS_MATCH_ARG_NAMESPACE_LAST = BUS_MATCH_ARG_NAMESPACE + 63,
+ BUS_MATCH_ARG_HAS,
+ BUS_MATCH_ARG_HAS_LAST = BUS_MATCH_ARG_HAS + 63,
+ _BUS_MATCH_NODE_TYPE_MAX,
+ _BUS_MATCH_NODE_TYPE_INVALID = -1
+};
+
+struct bus_match_node {
+ enum bus_match_node_type type;
+ struct bus_match_node *parent, *next, *prev, *child;
+
+ union {
+ struct {
+ char *str;
+ uint8_t u8;
+ } value;
+ struct {
+ struct match_callback *callback;
+ } leaf;
+ struct {
+ /* If this is set, then the child is NULL */
+ Hashmap *children;
+ } compare;
+ };
+};
+
+struct bus_match_component {
+ enum bus_match_node_type type;
+ uint8_t value_u8;
+ char *value_str;
+};
+
+enum bus_match_scope {
+ BUS_MATCH_GENERIC,
+ BUS_MATCH_LOCAL,
+ BUS_MATCH_DRIVER,
+};
+
+int bus_match_run(sd_bus *bus, struct bus_match_node *root, sd_bus_message *m);
+
+int bus_match_add(struct bus_match_node *root, struct bus_match_component *components, unsigned n_components, struct match_callback *callback);
+int bus_match_remove(struct bus_match_node *root, struct match_callback *callback);
+
+void bus_match_free(struct bus_match_node *node);
+
+void bus_match_dump(struct bus_match_node *node, unsigned level);
+
+const char* bus_match_node_type_to_string(enum bus_match_node_type t, char buf[], size_t l);
+enum bus_match_node_type bus_match_node_type_from_string(const char *k, size_t n);
+
+int bus_match_parse(const char *match, struct bus_match_component **_components, unsigned *_n_components);
+void bus_match_parse_free(struct bus_match_component *components, unsigned n_components);
+char *bus_match_to_string(struct bus_match_component *components, unsigned n_components);
+
+enum bus_match_scope bus_match_get_scope(const struct bus_match_component *components, unsigned n_components);
diff --git a/src/libsystemd/sd-bus/bus-message.c b/src/libsystemd/sd-bus/bus-message.c
new file mode 100644
index 0000000..86ff5bd
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-message.c
@@ -0,0 +1,5973 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-gvariant.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-signature.h"
+#include "bus-type.h"
+#include "bus-util.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "memfd-util.h"
+#include "memory-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "utf8.h"
+
+static int message_append_basic(sd_bus_message *m, char type, const void *p, const void **stored);
+
+static void *adjust_pointer(const void *p, void *old_base, size_t sz, void *new_base) {
+
+ if (!p)
+ return NULL;
+
+ if (old_base == new_base)
+ return (void*) p;
+
+ if ((uint8_t*) p < (uint8_t*) old_base)
+ return (void*) p;
+
+ if ((uint8_t*) p >= (uint8_t*) old_base + sz)
+ return (void*) p;
+
+ return (uint8_t*) new_base + ((uint8_t*) p - (uint8_t*) old_base);
+}
+
+static void message_free_part(sd_bus_message *m, struct bus_body_part *part) {
+ assert(m);
+ assert(part);
+
+ if (part->memfd >= 0) {
+ /* erase if requested, but only if the memfd is not sealed yet, i.e. is writable */
+ if (m->sensitive && !m->sealed)
+ explicit_bzero_safe(part->data, part->size);
+
+ close_and_munmap(part->memfd, part->mmap_begin, part->mapped);
+ } else if (part->munmap_this)
+ /* We don't erase sensitive data here, since the data is memory mapped from someone else, and
+ * we just don't know if it's OK to write to it */
+ munmap(part->mmap_begin, part->mapped);
+ else {
+ /* Erase this if that is requested. Since this is regular memory we know we can write it. */
+ if (m->sensitive)
+ explicit_bzero_safe(part->data, part->size);
+
+ if (part->free_this)
+ free(part->data);
+ }
+
+ if (part != &m->body)
+ free(part);
+}
+
+static void message_reset_parts(sd_bus_message *m) {
+ struct bus_body_part *part;
+
+ assert(m);
+
+ part = &m->body;
+ while (m->n_body_parts > 0) {
+ struct bus_body_part *next = part->next;
+ message_free_part(m, part);
+ part = next;
+ m->n_body_parts--;
+ }
+
+ m->body_end = NULL;
+
+ m->cached_rindex_part = NULL;
+ m->cached_rindex_part_begin = 0;
+}
+
+static struct bus_container *message_get_last_container(sd_bus_message *m) {
+ assert(m);
+
+ if (m->n_containers == 0)
+ return &m->root_container;
+
+ assert(m->containers);
+ return m->containers + m->n_containers - 1;
+}
+
+static void message_free_last_container(sd_bus_message *m) {
+ struct bus_container *c;
+
+ c = message_get_last_container(m);
+
+ free(c->signature);
+ free(c->peeked_signature);
+ free(c->offsets);
+
+ /* Move to previous container, but not if we are on root container */
+ if (m->n_containers > 0)
+ m->n_containers--;
+}
+
+static void message_reset_containers(sd_bus_message *m) {
+ assert(m);
+
+ while (m->n_containers > 0)
+ message_free_last_container(m);
+
+ m->containers = mfree(m->containers);
+ m->containers_allocated = 0;
+ m->root_container.index = 0;
+}
+
+static sd_bus_message* message_free(sd_bus_message *m) {
+ assert(m);
+
+ message_reset_parts(m);
+
+ if (m->free_header)
+ free(m->header);
+
+ /* Note that we don't unref m->bus here. That's already done by sd_bus_message_unref() as each user
+ * reference to the bus message also is considered a reference to the bus connection itself. */
+
+ if (m->free_fds) {
+ close_many(m->fds, m->n_fds);
+ free(m->fds);
+ }
+
+ if (m->iovec != m->iovec_fixed)
+ free(m->iovec);
+
+ message_reset_containers(m);
+ assert(m->n_containers == 0);
+ message_free_last_container(m);
+
+ bus_creds_done(&m->creds);
+ return mfree(m);
+}
+
+static void *message_extend_fields(sd_bus_message *m, size_t align, size_t sz, bool add_offset) {
+ void *op, *np;
+ size_t old_size, new_size, start;
+
+ assert(m);
+
+ if (m->poisoned)
+ return NULL;
+
+ old_size = sizeof(struct bus_header) + m->fields_size;
+ start = ALIGN_TO(old_size, align);
+ new_size = start + sz;
+
+ if (new_size < start ||
+ new_size > (size_t) ((uint32_t) -1))
+ goto poison;
+
+ if (old_size == new_size)
+ return (uint8_t*) m->header + old_size;
+
+ if (m->free_header) {
+ np = realloc(m->header, ALIGN8(new_size));
+ if (!np)
+ goto poison;
+ } else {
+ /* Initially, the header is allocated as part of
+ * the sd_bus_message itself, let's replace it by
+ * dynamic data */
+
+ np = malloc(ALIGN8(new_size));
+ if (!np)
+ goto poison;
+
+ memcpy(np, m->header, sizeof(struct bus_header));
+ }
+
+ /* Zero out padding */
+ if (start > old_size)
+ memzero((uint8_t*) np + old_size, start - old_size);
+
+ op = m->header;
+ m->header = np;
+ m->fields_size = new_size - sizeof(struct bus_header);
+
+ /* Adjust quick access pointers */
+ m->path = adjust_pointer(m->path, op, old_size, m->header);
+ m->interface = adjust_pointer(m->interface, op, old_size, m->header);
+ m->member = adjust_pointer(m->member, op, old_size, m->header);
+ m->destination = adjust_pointer(m->destination, op, old_size, m->header);
+ m->sender = adjust_pointer(m->sender, op, old_size, m->header);
+ m->error.name = adjust_pointer(m->error.name, op, old_size, m->header);
+
+ m->free_header = true;
+
+ if (add_offset) {
+ if (m->n_header_offsets >= ELEMENTSOF(m->header_offsets))
+ goto poison;
+
+ m->header_offsets[m->n_header_offsets++] = new_size - sizeof(struct bus_header);
+ }
+
+ return (uint8_t*) np + start;
+
+poison:
+ m->poisoned = true;
+ return NULL;
+}
+
+static int message_append_field_string(
+ sd_bus_message *m,
+ uint64_t h,
+ char type,
+ const char *s,
+ const char **ret) {
+
+ size_t l;
+ uint8_t *p;
+
+ assert(m);
+
+ /* dbus1 only allows 8bit header field ids */
+ if (h > 0xFF)
+ return -EINVAL;
+
+ /* dbus1 doesn't allow strings over 32bit, let's enforce this
+ * globally, to not risk convertability */
+ l = strlen(s);
+ if (l > UINT32_MAX)
+ return -EINVAL;
+
+ /* Signature "(yv)" where the variant contains "s" */
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+
+ /* (field id 64bit, ((string + NUL) + NUL + signature string 's') */
+ p = message_extend_fields(m, 8, 8 + l + 1 + 1 + 1, true);
+ if (!p)
+ return -ENOMEM;
+
+ *((uint64_t*) p) = h;
+ memcpy(p+8, s, l);
+ p[8+l] = 0;
+ p[8+l+1] = 0;
+ p[8+l+2] = type;
+
+ if (ret)
+ *ret = (char*) p + 8;
+
+ } else {
+ /* (field id byte + (signature length + signature 's' + NUL) + (string length + string + NUL)) */
+ p = message_extend_fields(m, 8, 4 + 4 + l + 1, false);
+ if (!p)
+ return -ENOMEM;
+
+ p[0] = (uint8_t) h;
+ p[1] = 1;
+ p[2] = type;
+ p[3] = 0;
+
+ ((uint32_t*) p)[1] = l;
+ memcpy(p + 8, s, l + 1);
+
+ if (ret)
+ *ret = (char*) p + 8;
+ }
+
+ return 0;
+}
+
+static int message_append_field_signature(
+ sd_bus_message *m,
+ uint64_t h,
+ const char *s,
+ const char **ret) {
+
+ size_t l;
+ uint8_t *p;
+
+ assert(m);
+
+ /* dbus1 only allows 8bit header field ids */
+ if (h > 0xFF)
+ return -EINVAL;
+
+ /* dbus1 doesn't allow signatures over 8bit, let's enforce
+ * this globally, to not risk convertability */
+ l = strlen(s);
+ if (l > SD_BUS_MAXIMUM_SIGNATURE_LENGTH)
+ return -EINVAL;
+
+ /* Signature "(yv)" where the variant contains "g" */
+
+ if (BUS_MESSAGE_IS_GVARIANT(m))
+ /* For gvariant the serialization is the same as for normal strings */
+ return message_append_field_string(m, h, 'g', s, ret);
+ else {
+ /* (field id byte + (signature length + signature 'g' + NUL) + (string length + string + NUL)) */
+ p = message_extend_fields(m, 8, 4 + 1 + l + 1, false);
+ if (!p)
+ return -ENOMEM;
+
+ p[0] = (uint8_t) h;
+ p[1] = 1;
+ p[2] = SD_BUS_TYPE_SIGNATURE;
+ p[3] = 0;
+ p[4] = l;
+ memcpy(p + 5, s, l + 1);
+
+ if (ret)
+ *ret = (const char*) p + 5;
+ }
+
+ return 0;
+}
+
+static int message_append_field_uint32(sd_bus_message *m, uint64_t h, uint32_t x) {
+ uint8_t *p;
+
+ assert(m);
+
+ /* dbus1 only allows 8bit header field ids */
+ if (h > 0xFF)
+ return -EINVAL;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ /* (field id 64bit + ((value + NUL + signature string 'u') */
+
+ p = message_extend_fields(m, 8, 8 + 4 + 1 + 1, true);
+ if (!p)
+ return -ENOMEM;
+
+ *((uint64_t*) p) = h;
+ *((uint32_t*) (p + 8)) = x;
+ p[12] = 0;
+ p[13] = 'u';
+ } else {
+ /* (field id byte + (signature length + signature 'u' + NUL) + value) */
+ p = message_extend_fields(m, 8, 4 + 4, false);
+ if (!p)
+ return -ENOMEM;
+
+ p[0] = (uint8_t) h;
+ p[1] = 1;
+ p[2] = 'u';
+ p[3] = 0;
+
+ ((uint32_t*) p)[1] = x;
+ }
+
+ return 0;
+}
+
+static int message_append_field_uint64(sd_bus_message *m, uint64_t h, uint64_t x) {
+ uint8_t *p;
+
+ assert(m);
+
+ /* dbus1 only allows 8bit header field ids */
+ if (h > 0xFF)
+ return -EINVAL;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ /* (field id 64bit + ((value + NUL + signature string 't') */
+
+ p = message_extend_fields(m, 8, 8 + 8 + 1 + 1, true);
+ if (!p)
+ return -ENOMEM;
+
+ *((uint64_t*) p) = h;
+ *((uint64_t*) (p + 8)) = x;
+ p[16] = 0;
+ p[17] = 't';
+ } else {
+ /* (field id byte + (signature length + signature 't' + NUL) + 4 byte padding + value) */
+ p = message_extend_fields(m, 8, 4 + 4 + 8, false);
+ if (!p)
+ return -ENOMEM;
+
+ p[0] = (uint8_t) h;
+ p[1] = 1;
+ p[2] = 't';
+ p[3] = 0;
+ p[4] = 0;
+ p[5] = 0;
+ p[6] = 0;
+ p[7] = 0;
+
+ ((uint64_t*) p)[1] = x;
+ }
+
+ return 0;
+}
+
+static int message_append_reply_cookie(sd_bus_message *m, uint64_t cookie) {
+ assert(m);
+
+ if (BUS_MESSAGE_IS_GVARIANT(m))
+ return message_append_field_uint64(m, BUS_MESSAGE_HEADER_REPLY_SERIAL, cookie);
+ else {
+ /* 64bit cookies are not supported on dbus1 */
+ if (cookie > 0xffffffffUL)
+ return -EOPNOTSUPP;
+
+ return message_append_field_uint32(m, BUS_MESSAGE_HEADER_REPLY_SERIAL, (uint32_t) cookie);
+ }
+}
+
+int bus_message_from_header(
+ sd_bus *bus,
+ void *header,
+ size_t header_accessible,
+ void *footer,
+ size_t footer_accessible,
+ size_t message_size,
+ int *fds,
+ size_t n_fds,
+ const char *label,
+ size_t extra,
+ sd_bus_message **ret) {
+
+ _cleanup_free_ sd_bus_message *m = NULL;
+ struct bus_header *h;
+ size_t a, label_sz;
+
+ assert(bus);
+ assert(header || header_accessible <= 0);
+ assert(footer || footer_accessible <= 0);
+ assert(fds || n_fds <= 0);
+ assert(ret);
+
+ if (header_accessible < sizeof(struct bus_header))
+ return -EBADMSG;
+
+ if (header_accessible > message_size)
+ return -EBADMSG;
+ if (footer_accessible > message_size)
+ return -EBADMSG;
+
+ h = header;
+ if (!IN_SET(h->version, 1, 2))
+ return -EBADMSG;
+
+ if (h->type == _SD_BUS_MESSAGE_TYPE_INVALID)
+ return -EBADMSG;
+
+ if (!IN_SET(h->endian, BUS_LITTLE_ENDIAN, BUS_BIG_ENDIAN))
+ return -EBADMSG;
+
+ /* Note that we are happy with unknown flags in the flags header! */
+
+ a = ALIGN(sizeof(sd_bus_message)) + ALIGN(extra);
+
+ if (label) {
+ label_sz = strlen(label);
+ a += label_sz + 1;
+ }
+
+ m = malloc0(a);
+ if (!m)
+ return -ENOMEM;
+
+ m->sealed = true;
+ m->header = header;
+ m->header_accessible = header_accessible;
+ m->footer = footer;
+ m->footer_accessible = footer_accessible;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ size_t ws;
+
+ if (h->dbus2.cookie == 0)
+ return -EBADMSG;
+
+ /* dbus2 derives the sizes from the message size and
+ the offset table at the end, since it is formatted as
+ gvariant "yyyyuta{tv}v". Since the message itself is a
+ structure with precisely to variable sized entries,
+ there's only one offset in the table, which marks the
+ end of the fields array. */
+
+ ws = bus_gvariant_determine_word_size(message_size, 0);
+ if (footer_accessible < ws)
+ return -EBADMSG;
+
+ m->fields_size = bus_gvariant_read_word_le((uint8_t*) footer + footer_accessible - ws, ws);
+ if (ALIGN8(m->fields_size) > message_size - ws)
+ return -EBADMSG;
+ if (m->fields_size < sizeof(struct bus_header))
+ return -EBADMSG;
+
+ m->fields_size -= sizeof(struct bus_header);
+ m->body_size = message_size - (sizeof(struct bus_header) + ALIGN8(m->fields_size));
+ } else {
+ if (h->dbus1.serial == 0)
+ return -EBADMSG;
+
+ /* dbus1 has the sizes in the header */
+ m->fields_size = BUS_MESSAGE_BSWAP32(m, h->dbus1.fields_size);
+ m->body_size = BUS_MESSAGE_BSWAP32(m, h->dbus1.body_size);
+
+ if (sizeof(struct bus_header) + ALIGN8(m->fields_size) + m->body_size != message_size)
+ return -EBADMSG;
+ }
+
+ m->fds = fds;
+ m->n_fds = n_fds;
+
+ if (label) {
+ m->creds.label = (char*) m + ALIGN(sizeof(sd_bus_message)) + ALIGN(extra);
+ memcpy(m->creds.label, label, label_sz + 1);
+
+ m->creds.mask |= SD_BUS_CREDS_SELINUX_CONTEXT;
+ }
+
+ m->n_ref = 1;
+ m->bus = sd_bus_ref(bus);
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
+
+int bus_message_from_malloc(
+ sd_bus *bus,
+ void *buffer,
+ size_t length,
+ int *fds,
+ size_t n_fds,
+ const char *label,
+ sd_bus_message **ret) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ size_t sz;
+ int r;
+
+ r = bus_message_from_header(
+ bus,
+ buffer, length, /* in this case the initial bytes and the final bytes are the same */
+ buffer, length,
+ length,
+ fds, n_fds,
+ label,
+ 0, &m);
+ if (r < 0)
+ return r;
+
+ sz = length - sizeof(struct bus_header) - ALIGN8(m->fields_size);
+ if (sz > 0) {
+ m->n_body_parts = 1;
+ m->body.data = (uint8_t*) buffer + sizeof(struct bus_header) + ALIGN8(m->fields_size);
+ m->body.size = sz;
+ m->body.sealed = true;
+ m->body.memfd = -1;
+ }
+
+ m->n_iovec = 1;
+ m->iovec = m->iovec_fixed;
+ m->iovec[0] = IOVEC_MAKE(buffer, length);
+
+ r = bus_message_parse_fields(m);
+ if (r < 0)
+ return r;
+
+ /* We take possession of the memory and fds now */
+ m->free_header = true;
+ m->free_fds = true;
+
+ *ret = TAKE_PTR(m);
+ return 0;
+}
+
+_public_ int sd_bus_message_new(
+ sd_bus *bus,
+ sd_bus_message **m,
+ uint8_t type) {
+
+ assert_return(bus, -ENOTCONN);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state != BUS_UNSET, -ENOTCONN);
+ assert_return(m, -EINVAL);
+ /* Creation of messages with _SD_BUS_MESSAGE_TYPE_INVALID is allowed. */
+ assert_return(type < _SD_BUS_MESSAGE_TYPE_MAX, -EINVAL);
+
+ sd_bus_message *t = malloc0(ALIGN(sizeof(sd_bus_message)) + sizeof(struct bus_header));
+ if (!t)
+ return -ENOMEM;
+
+ t->n_ref = 1;
+ t->bus = sd_bus_ref(bus);
+ t->header = (struct bus_header*) ((uint8_t*) t + ALIGN(sizeof(struct sd_bus_message)));
+ t->header->endian = BUS_NATIVE_ENDIAN;
+ t->header->type = type;
+ t->header->version = bus->message_version;
+ t->allow_fds = bus->can_fds || !IN_SET(bus->state, BUS_HELLO, BUS_RUNNING);
+ t->root_container.need_offsets = BUS_MESSAGE_IS_GVARIANT(t);
+
+ if (bus->allow_interactive_authorization)
+ t->header->flags |= BUS_MESSAGE_ALLOW_INTERACTIVE_AUTHORIZATION;
+
+ *m = t;
+ return 0;
+}
+
+_public_ int sd_bus_message_new_signal(
+ sd_bus *bus,
+ sd_bus_message **m,
+ const char *path,
+ const char *interface,
+ const char *member) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *t = NULL;
+ int r;
+
+ assert_return(bus, -ENOTCONN);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state != BUS_UNSET, -ENOTCONN);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(interface_name_is_valid(interface), -EINVAL);
+ assert_return(member_name_is_valid(member), -EINVAL);
+ assert_return(m, -EINVAL);
+
+ r = sd_bus_message_new(bus, &t, SD_BUS_MESSAGE_SIGNAL);
+ if (r < 0)
+ return -ENOMEM;
+
+ assert(t);
+
+ t->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED;
+
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_PATH, SD_BUS_TYPE_OBJECT_PATH, path, &t->path);
+ if (r < 0)
+ return r;
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_INTERFACE, SD_BUS_TYPE_STRING, interface, &t->interface);
+ if (r < 0)
+ return r;
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_MEMBER, SD_BUS_TYPE_STRING, member, &t->member);
+ if (r < 0)
+ return r;
+
+ *m = TAKE_PTR(t);
+ return 0;
+}
+
+_public_ int sd_bus_message_new_method_call(
+ sd_bus *bus,
+ sd_bus_message **m,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *t = NULL;
+ int r;
+
+ assert_return(bus, -ENOTCONN);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state != BUS_UNSET, -ENOTCONN);
+ assert_return(!destination || service_name_is_valid(destination), -EINVAL);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!interface || interface_name_is_valid(interface), -EINVAL);
+ assert_return(member_name_is_valid(member), -EINVAL);
+ assert_return(m, -EINVAL);
+
+ r = sd_bus_message_new(bus, &t, SD_BUS_MESSAGE_METHOD_CALL);
+ if (r < 0)
+ return -ENOMEM;
+
+ assert(t);
+
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_PATH, SD_BUS_TYPE_OBJECT_PATH, path, &t->path);
+ if (r < 0)
+ return r;
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_MEMBER, SD_BUS_TYPE_STRING, member, &t->member);
+ if (r < 0)
+ return r;
+
+ if (interface) {
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_INTERFACE, SD_BUS_TYPE_STRING, interface, &t->interface);
+ if (r < 0)
+ return r;
+ }
+
+ if (destination) {
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, destination, &t->destination);
+ if (r < 0)
+ return r;
+ }
+
+ *m = TAKE_PTR(t);
+ return 0;
+}
+
+static int message_new_reply(
+ sd_bus_message *call,
+ uint8_t type,
+ sd_bus_message **m) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *t = NULL;
+ uint64_t cookie;
+ int r;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(call->bus->state != BUS_UNSET, -ENOTCONN);
+ assert_return(m, -EINVAL);
+
+ cookie = BUS_MESSAGE_COOKIE(call);
+ if (cookie == 0)
+ return -EOPNOTSUPP;
+
+ r = sd_bus_message_new(call->bus, &t, type);
+ if (r < 0)
+ return -ENOMEM;
+
+ assert(t);
+
+ t->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED;
+ t->reply_cookie = cookie;
+ r = message_append_reply_cookie(t, t->reply_cookie);
+ if (r < 0)
+ return r;
+
+ if (call->sender) {
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, call->sender, &t->destination);
+ if (r < 0)
+ return r;
+ }
+
+ t->dont_send = !!(call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED);
+ t->enforced_reply_signature = call->enforced_reply_signature;
+
+ /* let's copy the sensitive flag over. Let's do that as a safety precaution to keep a transaction
+ * wholly sensitive if already the incoming message was sensitive. This is particularly useful when a
+ * vtable record sets the SD_BUS_VTABLE_SENSITIVE flag on a method call, since this means it applies
+ * to both the message call and the reply. */
+ t->sensitive = call->sensitive;
+
+ *m = TAKE_PTR(t);
+ return 0;
+}
+
+_public_ int sd_bus_message_new_method_return(
+ sd_bus_message *call,
+ sd_bus_message **m) {
+
+ return message_new_reply(call, SD_BUS_MESSAGE_METHOD_RETURN, m);
+}
+
+_public_ int sd_bus_message_new_method_error(
+ sd_bus_message *call,
+ sd_bus_message **m,
+ const sd_bus_error *e) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *t = NULL;
+ int r;
+
+ assert_return(sd_bus_error_is_set(e), -EINVAL);
+ assert_return(m, -EINVAL);
+
+ r = message_new_reply(call, SD_BUS_MESSAGE_METHOD_ERROR, &t);
+ if (r < 0)
+ return r;
+
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_ERROR_NAME, SD_BUS_TYPE_STRING, e->name, &t->error.name);
+ if (r < 0)
+ return r;
+
+ if (e->message) {
+ r = message_append_basic(t, SD_BUS_TYPE_STRING, e->message, (const void**) &t->error.message);
+ if (r < 0)
+ return r;
+ }
+
+ t->error._need_free = -1;
+
+ *m = TAKE_PTR(t);
+ return 0;
+}
+
+_public_ int sd_bus_message_new_method_errorf(
+ sd_bus_message *call,
+ sd_bus_message **m,
+ const char *name,
+ const char *format,
+ ...) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ va_list ap;
+
+ assert_return(name, -EINVAL);
+ assert_return(m, -EINVAL);
+
+ va_start(ap, format);
+ bus_error_setfv(&error, name, format, ap);
+ va_end(ap);
+
+ return sd_bus_message_new_method_error(call, m, &error);
+}
+
+_public_ int sd_bus_message_new_method_errno(
+ sd_bus_message *call,
+ sd_bus_message **m,
+ int error,
+ const sd_bus_error *p) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error berror = SD_BUS_ERROR_NULL;
+
+ if (sd_bus_error_is_set(p))
+ return sd_bus_message_new_method_error(call, m, p);
+
+ sd_bus_error_set_errno(&berror, error);
+
+ return sd_bus_message_new_method_error(call, m, &berror);
+}
+
+_public_ int sd_bus_message_new_method_errnof(
+ sd_bus_message *call,
+ sd_bus_message **m,
+ int error,
+ const char *format,
+ ...) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error berror = SD_BUS_ERROR_NULL;
+ va_list ap;
+
+ va_start(ap, format);
+ sd_bus_error_set_errnofv(&berror, error, format, ap);
+ va_end(ap);
+
+ return sd_bus_message_new_method_error(call, m, &berror);
+}
+
+void bus_message_set_sender_local(sd_bus *bus, sd_bus_message *m) {
+ assert(bus);
+ assert(m);
+
+ m->sender = m->creds.unique_name = (char*) "org.freedesktop.DBus.Local";
+ m->creds.well_known_names_local = true;
+ m->creds.mask |= (SD_BUS_CREDS_UNIQUE_NAME|SD_BUS_CREDS_WELL_KNOWN_NAMES) & bus->creds_mask;
+}
+
+void bus_message_set_sender_driver(sd_bus *bus, sd_bus_message *m) {
+ assert(bus);
+ assert(m);
+
+ m->sender = m->creds.unique_name = (char*) "org.freedesktop.DBus";
+ m->creds.well_known_names_driver = true;
+ m->creds.mask |= (SD_BUS_CREDS_UNIQUE_NAME|SD_BUS_CREDS_WELL_KNOWN_NAMES) & bus->creds_mask;
+}
+
+int bus_message_new_synthetic_error(
+ sd_bus *bus,
+ uint64_t cookie,
+ const sd_bus_error *e,
+ sd_bus_message **m) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *t = NULL;
+ int r;
+
+ assert(bus);
+ assert(sd_bus_error_is_set(e));
+ assert(m);
+
+ r = sd_bus_message_new(bus, &t, SD_BUS_MESSAGE_METHOD_ERROR);
+ if (r < 0)
+ return -ENOMEM;
+
+ assert(t);
+
+ t->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED;
+ t->reply_cookie = cookie;
+
+ r = message_append_reply_cookie(t, t->reply_cookie);
+ if (r < 0)
+ return r;
+
+ if (bus && bus->unique_name) {
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, bus->unique_name, &t->destination);
+ if (r < 0)
+ return r;
+ }
+
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_ERROR_NAME, SD_BUS_TYPE_STRING, e->name, &t->error.name);
+ if (r < 0)
+ return r;
+
+ if (e->message) {
+ r = message_append_basic(t, SD_BUS_TYPE_STRING, e->message, (const void**) &t->error.message);
+ if (r < 0)
+ return r;
+ }
+
+ t->error._need_free = -1;
+
+ bus_message_set_sender_driver(bus, t);
+
+ *m = TAKE_PTR(t);
+ return 0;
+}
+
+_public_ sd_bus_message* sd_bus_message_ref(sd_bus_message *m) {
+ if (!m)
+ return NULL;
+
+ /* We are fine if this message so far was either explicitly reffed or not reffed but queued into at
+ * least one bus connection object. */
+ assert(m->n_ref > 0 || m->n_queued > 0);
+
+ m->n_ref++;
+
+ /* Each user reference to a bus message shall also be considered a ref on the bus */
+ sd_bus_ref(m->bus);
+ return m;
+}
+
+_public_ sd_bus_message* sd_bus_message_unref(sd_bus_message *m) {
+ if (!m)
+ return NULL;
+
+ assert(m->n_ref > 0);
+
+ sd_bus_unref(m->bus); /* Each regular ref is also a ref on the bus connection. Let's hence drop it
+ * here. Note we have to do this before decrementing our own n_ref here, since
+ * otherwise, if this message is currently queued sd_bus_unref() might call
+ * bus_message_unref_queued() for this which might then destroy the message
+ * while we are still processing it. */
+ m->n_ref--;
+
+ if (m->n_ref > 0 || m->n_queued > 0)
+ return NULL;
+
+ /* Unset the bus field if neither the user has a reference nor this message is queued. We are careful
+ * to reset the field only after the last reference to the bus is dropped, after all we might keep
+ * multiple references to the bus, once for each reference kept on ourselves. */
+ m->bus = NULL;
+
+ return message_free(m);
+}
+
+sd_bus_message* bus_message_ref_queued(sd_bus_message *m, sd_bus *bus) {
+ if (!m)
+ return NULL;
+
+ /* If this is a different bus than the message is associated with, then implicitly turn this into a
+ * regular reference. This means that you can create a memory leak by enqueuing a message generated
+ * on one bus onto another at the same time as enqueueing a message from the second one on the first,
+ * as we'll not detect the cyclic references there. */
+ if (bus != m->bus)
+ return sd_bus_message_ref(m);
+
+ assert(m->n_ref > 0 || m->n_queued > 0);
+ m->n_queued++;
+
+ return m;
+}
+
+sd_bus_message* bus_message_unref_queued(sd_bus_message *m, sd_bus *bus) {
+ if (!m)
+ return NULL;
+
+ if (bus != m->bus)
+ return sd_bus_message_unref(m);
+
+ assert(m->n_queued > 0);
+ m->n_queued--;
+
+ if (m->n_ref > 0 || m->n_queued > 0)
+ return NULL;
+
+ m->bus = NULL;
+
+ return message_free(m);
+}
+
+_public_ int sd_bus_message_get_type(sd_bus_message *m, uint8_t *type) {
+ assert_return(m, -EINVAL);
+ assert_return(type, -EINVAL);
+
+ *type = m->header->type;
+ return 0;
+}
+
+_public_ int sd_bus_message_get_cookie(sd_bus_message *m, uint64_t *cookie) {
+ uint64_t c;
+
+ assert_return(m, -EINVAL);
+ assert_return(cookie, -EINVAL);
+
+ c = BUS_MESSAGE_COOKIE(m);
+ if (c == 0)
+ return -ENODATA;
+
+ *cookie = BUS_MESSAGE_COOKIE(m);
+ return 0;
+}
+
+_public_ int sd_bus_message_get_reply_cookie(sd_bus_message *m, uint64_t *cookie) {
+ assert_return(m, -EINVAL);
+ assert_return(cookie, -EINVAL);
+
+ if (m->reply_cookie == 0)
+ return -ENODATA;
+
+ *cookie = m->reply_cookie;
+ return 0;
+}
+
+_public_ int sd_bus_message_get_expect_reply(sd_bus_message *m) {
+ assert_return(m, -EINVAL);
+
+ return m->header->type == SD_BUS_MESSAGE_METHOD_CALL &&
+ !(m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED);
+}
+
+_public_ int sd_bus_message_get_auto_start(sd_bus_message *m) {
+ assert_return(m, -EINVAL);
+
+ return !(m->header->flags & BUS_MESSAGE_NO_AUTO_START);
+}
+
+_public_ int sd_bus_message_get_allow_interactive_authorization(sd_bus_message *m) {
+ assert_return(m, -EINVAL);
+
+ return m->header->type == SD_BUS_MESSAGE_METHOD_CALL &&
+ (m->header->flags & BUS_MESSAGE_ALLOW_INTERACTIVE_AUTHORIZATION);
+}
+
+_public_ const char *sd_bus_message_get_path(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ return m->path;
+}
+
+_public_ const char *sd_bus_message_get_interface(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ return m->interface;
+}
+
+_public_ const char *sd_bus_message_get_member(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ return m->member;
+}
+
+_public_ const char *sd_bus_message_get_destination(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ return m->destination;
+}
+
+_public_ const char *sd_bus_message_get_sender(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ return m->sender;
+}
+
+_public_ const sd_bus_error *sd_bus_message_get_error(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ if (!sd_bus_error_is_set(&m->error))
+ return NULL;
+
+ return &m->error;
+}
+
+_public_ int sd_bus_message_get_monotonic_usec(sd_bus_message *m, uint64_t *usec) {
+ assert_return(m, -EINVAL);
+ assert_return(usec, -EINVAL);
+
+ if (m->monotonic <= 0)
+ return -ENODATA;
+
+ *usec = m->monotonic;
+ return 0;
+}
+
+_public_ int sd_bus_message_get_realtime_usec(sd_bus_message *m, uint64_t *usec) {
+ assert_return(m, -EINVAL);
+ assert_return(usec, -EINVAL);
+
+ if (m->realtime <= 0)
+ return -ENODATA;
+
+ *usec = m->realtime;
+ return 0;
+}
+
+_public_ int sd_bus_message_get_seqnum(sd_bus_message *m, uint64_t *seqnum) {
+ assert_return(m, -EINVAL);
+ assert_return(seqnum, -EINVAL);
+
+ if (m->seqnum <= 0)
+ return -ENODATA;
+
+ *seqnum = m->seqnum;
+ return 0;
+}
+
+_public_ sd_bus_creds *sd_bus_message_get_creds(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ if (m->creds.mask == 0)
+ return NULL;
+
+ return &m->creds;
+}
+
+_public_ int sd_bus_message_is_signal(
+ sd_bus_message *m,
+ const char *interface,
+ const char *member) {
+
+ assert_return(m, -EINVAL);
+
+ if (m->header->type != SD_BUS_MESSAGE_SIGNAL)
+ return 0;
+
+ if (interface && !streq_ptr(m->interface, interface))
+ return 0;
+
+ if (member && !streq_ptr(m->member, member))
+ return 0;
+
+ return 1;
+}
+
+_public_ int sd_bus_message_is_method_call(
+ sd_bus_message *m,
+ const char *interface,
+ const char *member) {
+
+ assert_return(m, -EINVAL);
+
+ if (m->header->type != SD_BUS_MESSAGE_METHOD_CALL)
+ return 0;
+
+ if (interface && !streq_ptr(m->interface, interface))
+ return 0;
+
+ if (member && !streq_ptr(m->member, member))
+ return 0;
+
+ return 1;
+}
+
+_public_ int sd_bus_message_is_method_error(sd_bus_message *m, const char *name) {
+ assert_return(m, -EINVAL);
+
+ if (m->header->type != SD_BUS_MESSAGE_METHOD_ERROR)
+ return 0;
+
+ if (name && !streq_ptr(m->error.name, name))
+ return 0;
+
+ return 1;
+}
+
+_public_ int sd_bus_message_set_expect_reply(sd_bus_message *m, int b) {
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(m->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EPERM);
+
+ SET_FLAG(m->header->flags, BUS_MESSAGE_NO_REPLY_EXPECTED, !b);
+
+ return 0;
+}
+
+_public_ int sd_bus_message_set_auto_start(sd_bus_message *m, int b) {
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ SET_FLAG(m->header->flags, BUS_MESSAGE_NO_AUTO_START, !b);
+
+ return 0;
+}
+
+_public_ int sd_bus_message_set_allow_interactive_authorization(sd_bus_message *m, int b) {
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ SET_FLAG(m->header->flags, BUS_MESSAGE_ALLOW_INTERACTIVE_AUTHORIZATION, b);
+
+ return 0;
+}
+
+struct bus_body_part *message_append_part(sd_bus_message *m) {
+ struct bus_body_part *part;
+
+ assert(m);
+
+ if (m->poisoned)
+ return NULL;
+
+ if (m->n_body_parts <= 0) {
+ part = &m->body;
+ zero(*part);
+ } else {
+ assert(m->body_end);
+
+ part = new0(struct bus_body_part, 1);
+ if (!part) {
+ m->poisoned = true;
+ return NULL;
+ }
+
+ m->body_end->next = part;
+ }
+
+ part->memfd = -1;
+ m->body_end = part;
+ m->n_body_parts++;
+
+ return part;
+}
+
+static void part_zero(struct bus_body_part *part, size_t sz) {
+ assert(part);
+ assert(sz > 0);
+ assert(sz < 8);
+
+ /* All other fields can be left in their defaults */
+ assert(!part->data);
+ assert(part->memfd < 0);
+
+ part->size = sz;
+ part->is_zero = true;
+ part->sealed = true;
+}
+
+static int part_make_space(
+ struct sd_bus_message *m,
+ struct bus_body_part *part,
+ size_t sz,
+ void **q) {
+
+ void *n;
+
+ assert(m);
+ assert(part);
+ assert(!part->sealed);
+
+ if (m->poisoned)
+ return -ENOMEM;
+
+ if (part->allocated == 0 || sz > part->allocated) {
+ size_t new_allocated;
+
+ new_allocated = sz > 0 ? 2 * sz : 64;
+ n = realloc(part->data, new_allocated);
+ if (!n) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+
+ part->data = n;
+ part->allocated = new_allocated;
+ part->free_this = true;
+ }
+
+ if (q)
+ *q = part->data ? (uint8_t*) part->data + part->size : NULL;
+
+ part->size = sz;
+ return 0;
+}
+
+static int message_add_offset(sd_bus_message *m, size_t offset) {
+ struct bus_container *c;
+
+ assert(m);
+ assert(BUS_MESSAGE_IS_GVARIANT(m));
+
+ /* Add offset to current container, unless this is the first
+ * item in it, which will have the 0 offset, which we can
+ * ignore. */
+ c = message_get_last_container(m);
+
+ if (!c->need_offsets)
+ return 0;
+
+ if (!GREEDY_REALLOC(c->offsets, c->offsets_allocated, c->n_offsets + 1))
+ return -ENOMEM;
+
+ c->offsets[c->n_offsets++] = offset;
+ return 0;
+}
+
+static void message_extend_containers(sd_bus_message *m, size_t expand) {
+ assert(m);
+
+ if (expand <= 0)
+ return;
+
+ if (m->n_containers <= 0)
+ return;
+
+ /* Update counters */
+ for (struct bus_container *c = m->containers; c < m->containers + m->n_containers; c++)
+ if (c->array_size)
+ *c->array_size += expand;
+}
+
+static void *message_extend_body(
+ sd_bus_message *m,
+ size_t align,
+ size_t sz,
+ bool add_offset,
+ bool force_inline) {
+
+ size_t start_body, end_body, padding, added;
+ void *p;
+ int r;
+
+ assert(m);
+ assert(align > 0);
+ assert(!m->sealed);
+
+ if (m->poisoned)
+ return NULL;
+
+ start_body = ALIGN_TO((size_t) m->body_size, align);
+ end_body = start_body + sz;
+
+ padding = start_body - m->body_size;
+ added = padding + sz;
+
+ /* Check for 32bit overflows */
+ if (end_body > (size_t) ((uint32_t) -1) ||
+ end_body < start_body) {
+ m->poisoned = true;
+ return NULL;
+ }
+
+ if (added > 0) {
+ struct bus_body_part *part = NULL;
+ bool add_new_part;
+
+ add_new_part =
+ m->n_body_parts <= 0 ||
+ m->body_end->sealed ||
+ (padding != ALIGN_TO(m->body_end->size, align) - m->body_end->size) ||
+ (force_inline && m->body_end->size > MEMFD_MIN_SIZE);
+ /* If this must be an inlined extension, let's create a new part if
+ * the previous part is large enough to be inlined. */
+
+ if (add_new_part) {
+ if (padding > 0) {
+ part = message_append_part(m);
+ if (!part)
+ return NULL;
+
+ part_zero(part, padding);
+ }
+
+ part = message_append_part(m);
+ if (!part)
+ return NULL;
+
+ r = part_make_space(m, part, sz, &p);
+ if (r < 0)
+ return NULL;
+ } else {
+ void *op;
+ size_t os, start_part, end_part;
+
+ part = m->body_end;
+ op = part->data;
+ os = part->size;
+
+ start_part = ALIGN_TO(part->size, align);
+ end_part = start_part + sz;
+
+ r = part_make_space(m, part, end_part, &p);
+ if (r < 0)
+ return NULL;
+
+ if (padding > 0) {
+ memzero(p, padding);
+ p = (uint8_t*) p + padding;
+ }
+
+ /* Readjust pointers */
+ if (m->n_containers > 0)
+ for (struct bus_container *c = m->containers; c < m->containers + m->n_containers; c++)
+ c->array_size = adjust_pointer(c->array_size, op, os, part->data);
+
+ m->error.message = (const char*) adjust_pointer(m->error.message, op, os, part->data);
+ }
+ } else
+ /* Return something that is not NULL and is aligned */
+ p = (uint8_t*) align;
+
+ m->body_size = end_body;
+ message_extend_containers(m, added);
+
+ if (add_offset) {
+ r = message_add_offset(m, end_body);
+ if (r < 0) {
+ m->poisoned = true;
+ return NULL;
+ }
+ }
+
+ return p;
+}
+
+static int message_push_fd(sd_bus_message *m, int fd) {
+ int *f, copy;
+
+ assert(m);
+
+ if (fd < 0)
+ return -EINVAL;
+
+ if (!m->allow_fds)
+ return -EOPNOTSUPP;
+
+ copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (copy < 0)
+ return -errno;
+
+ f = reallocarray(m->fds, sizeof(int), m->n_fds + 1);
+ if (!f) {
+ m->poisoned = true;
+ safe_close(copy);
+ return -ENOMEM;
+ }
+
+ m->fds = f;
+ m->fds[m->n_fds] = copy;
+ m->free_fds = true;
+
+ return copy;
+}
+
+int message_append_basic(sd_bus_message *m, char type, const void *p, const void **stored) {
+ _cleanup_close_ int fd = -1;
+ struct bus_container *c;
+ ssize_t align, sz;
+ void *a;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(bus_type_is_basic(type), -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ c = message_get_last_container(m);
+
+ if (c->signature && c->signature[c->index]) {
+ /* Container signature is already set */
+
+ if (c->signature[c->index] != type)
+ return -ENXIO;
+ } else {
+ char *e;
+
+ /* Maybe we can append to the signature? But only if this is the top-level container */
+ if (c->enclosing != 0)
+ return -ENXIO;
+
+ e = strextend(&c->signature, CHAR_TO_STR(type), NULL);
+ if (!e) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+ }
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ uint8_t u8;
+ uint32_t u32;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_SIGNATURE:
+ case SD_BUS_TYPE_STRING:
+ p = strempty(p);
+
+ _fallthrough_;
+ case SD_BUS_TYPE_OBJECT_PATH:
+ if (!p)
+ return -EINVAL;
+
+ align = 1;
+ sz = strlen(p) + 1;
+ break;
+
+ case SD_BUS_TYPE_BOOLEAN:
+
+ u8 = p && *(int*) p;
+ p = &u8;
+
+ align = sz = 1;
+ break;
+
+ case SD_BUS_TYPE_UNIX_FD:
+
+ if (!p)
+ return -EINVAL;
+
+ fd = message_push_fd(m, *(int*) p);
+ if (fd < 0)
+ return fd;
+
+ u32 = m->n_fds;
+ p = &u32;
+
+ align = sz = 4;
+ break;
+
+ default:
+ align = bus_gvariant_get_alignment(CHAR_TO_STR(type));
+ sz = bus_gvariant_get_size(CHAR_TO_STR(type));
+ break;
+ }
+
+ assert(align > 0);
+ assert(sz > 0);
+
+ a = message_extend_body(m, align, sz, true, false);
+ if (!a)
+ return -ENOMEM;
+
+ memcpy(a, p, sz);
+
+ if (stored)
+ *stored = (const uint8_t*) a;
+
+ } else {
+ uint32_t u32;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_STRING:
+ /* To make things easy we'll serialize a NULL string
+ * into the empty string */
+ p = strempty(p);
+
+ _fallthrough_;
+ case SD_BUS_TYPE_OBJECT_PATH:
+
+ if (!p)
+ return -EINVAL;
+
+ align = 4;
+ sz = 4 + strlen(p) + 1;
+ break;
+
+ case SD_BUS_TYPE_SIGNATURE:
+
+ p = strempty(p);
+
+ align = 1;
+ sz = 1 + strlen(p) + 1;
+ break;
+
+ case SD_BUS_TYPE_BOOLEAN:
+
+ u32 = p && *(int*) p;
+ p = &u32;
+
+ align = sz = 4;
+ break;
+
+ case SD_BUS_TYPE_UNIX_FD:
+
+ if (!p)
+ return -EINVAL;
+
+ fd = message_push_fd(m, *(int*) p);
+ if (fd < 0)
+ return fd;
+
+ u32 = m->n_fds;
+ p = &u32;
+
+ align = sz = 4;
+ break;
+
+ default:
+ align = bus_type_get_alignment(type);
+ sz = bus_type_get_size(type);
+ break;
+ }
+
+ assert(align > 0);
+ assert(sz > 0);
+
+ a = message_extend_body(m, align, sz, false, false);
+ if (!a)
+ return -ENOMEM;
+
+ if (IN_SET(type, SD_BUS_TYPE_STRING, SD_BUS_TYPE_OBJECT_PATH)) {
+ *(uint32_t*) a = sz - 5;
+ memcpy((uint8_t*) a + 4, p, sz - 4);
+
+ if (stored)
+ *stored = (const uint8_t*) a + 4;
+
+ } else if (type == SD_BUS_TYPE_SIGNATURE) {
+ *(uint8_t*) a = sz - 2;
+ memcpy((uint8_t*) a + 1, p, sz - 1);
+
+ if (stored)
+ *stored = (const uint8_t*) a + 1;
+ } else {
+ memcpy(a, p, sz);
+
+ if (stored)
+ *stored = a;
+ }
+ }
+
+ if (type == SD_BUS_TYPE_UNIX_FD)
+ m->n_fds++;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index++;
+
+ fd = -1;
+ return 0;
+}
+
+_public_ int sd_bus_message_append_basic(sd_bus_message *m, char type, const void *p) {
+ return message_append_basic(m, type, p, NULL);
+}
+
+_public_ int sd_bus_message_append_string_space(
+ sd_bus_message *m,
+ size_t size,
+ char **s) {
+
+ struct bus_container *c;
+ void *a;
+
+ assert_return(m, -EINVAL);
+ assert_return(s, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->poisoned, -ESTALE);
+
+ c = message_get_last_container(m);
+
+ if (c->signature && c->signature[c->index]) {
+ /* Container signature is already set */
+
+ if (c->signature[c->index] != SD_BUS_TYPE_STRING)
+ return -ENXIO;
+ } else {
+ char *e;
+
+ /* Maybe we can append to the signature? But only if this is the top-level container */
+ if (c->enclosing != 0)
+ return -ENXIO;
+
+ e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_STRING), NULL);
+ if (!e) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+ }
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ a = message_extend_body(m, 1, size + 1, true, false);
+ if (!a)
+ return -ENOMEM;
+
+ *s = a;
+ } else {
+ a = message_extend_body(m, 4, 4 + size + 1, false, false);
+ if (!a)
+ return -ENOMEM;
+
+ *(uint32_t*) a = size;
+ *s = (char*) a + 4;
+ }
+
+ (*s)[size] = 0;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index++;
+
+ return 0;
+}
+
+_public_ int sd_bus_message_append_string_iovec(
+ sd_bus_message *m,
+ const struct iovec *iov,
+ unsigned n /* should be size_t, but is API now… 😞 */) {
+
+ size_t size;
+ unsigned i;
+ char *p;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(iov || n == 0, -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ size = IOVEC_TOTAL_SIZE(iov, n);
+
+ r = sd_bus_message_append_string_space(m, size, &p);
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < n; i++) {
+
+ if (iov[i].iov_base)
+ memcpy(p, iov[i].iov_base, iov[i].iov_len);
+ else
+ memset(p, ' ', iov[i].iov_len);
+
+ p += iov[i].iov_len;
+ }
+
+ return 0;
+}
+
+static int bus_message_open_array(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ uint32_t **array_size,
+ size_t *begin,
+ bool *need_offsets) {
+
+ unsigned nindex;
+ int alignment, r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+ assert(array_size);
+ assert(begin);
+ assert(need_offsets);
+
+ if (!signature_is_single(contents, true))
+ return -EINVAL;
+
+ if (c->signature && c->signature[c->index]) {
+
+ /* Verify the existing signature */
+
+ if (c->signature[c->index] != SD_BUS_TYPE_ARRAY)
+ return -ENXIO;
+
+ if (!startswith(c->signature + c->index + 1, contents))
+ return -ENXIO;
+
+ nindex = c->index + 1 + strlen(contents);
+ } else {
+ char *e;
+
+ if (c->enclosing != 0)
+ return -ENXIO;
+
+ /* Extend the existing signature */
+
+ e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_ARRAY), contents, NULL);
+ if (!e) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+
+ nindex = e - c->signature;
+ }
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ alignment = bus_gvariant_get_alignment(contents);
+ if (alignment < 0)
+ return alignment;
+
+ /* Add alignment padding and add to offset list */
+ if (!message_extend_body(m, alignment, 0, false, false))
+ return -ENOMEM;
+
+ r = bus_gvariant_is_fixed_size(contents);
+ if (r < 0)
+ return r;
+
+ *begin = m->body_size;
+ *need_offsets = r == 0;
+ } else {
+ void *a, *op;
+ size_t os;
+ struct bus_body_part *o;
+
+ alignment = bus_type_get_alignment(contents[0]);
+ if (alignment < 0)
+ return alignment;
+
+ a = message_extend_body(m, 4, 4, false, false);
+ if (!a)
+ return -ENOMEM;
+
+ o = m->body_end;
+ op = m->body_end->data;
+ os = m->body_end->size;
+
+ /* Add alignment between size and first element */
+ if (!message_extend_body(m, alignment, 0, false, false))
+ return -ENOMEM;
+
+ /* location of array size might have changed so let's readjust a */
+ if (o == m->body_end)
+ a = adjust_pointer(a, op, os, m->body_end->data);
+
+ *(uint32_t*) a = 0;
+ *array_size = a;
+ }
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index = nindex;
+
+ return 0;
+}
+
+static int bus_message_open_variant(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents) {
+
+ assert(m);
+ assert(c);
+ assert(contents);
+
+ if (!signature_is_single(contents, false))
+ return -EINVAL;
+
+ if (*contents == SD_BUS_TYPE_DICT_ENTRY_BEGIN)
+ return -EINVAL;
+
+ if (c->signature && c->signature[c->index]) {
+
+ if (c->signature[c->index] != SD_BUS_TYPE_VARIANT)
+ return -ENXIO;
+
+ } else {
+ char *e;
+
+ if (c->enclosing != 0)
+ return -ENXIO;
+
+ e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_VARIANT), NULL);
+ if (!e) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+ }
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ /* Variants are always aligned to 8 */
+
+ if (!message_extend_body(m, 8, 0, false, false))
+ return -ENOMEM;
+
+ } else {
+ size_t l;
+ void *a;
+
+ l = strlen(contents);
+ a = message_extend_body(m, 1, 1 + l + 1, false, false);
+ if (!a)
+ return -ENOMEM;
+
+ *(uint8_t*) a = l;
+ memcpy((uint8_t*) a + 1, contents, l + 1);
+ }
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index++;
+
+ return 0;
+}
+
+static int bus_message_open_struct(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ size_t *begin,
+ bool *need_offsets) {
+
+ size_t nindex;
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+ assert(begin);
+ assert(need_offsets);
+
+ if (!signature_is_valid(contents, false))
+ return -EINVAL;
+
+ if (c->signature && c->signature[c->index]) {
+ size_t l;
+
+ l = strlen(contents);
+
+ if (c->signature[c->index] != SD_BUS_TYPE_STRUCT_BEGIN ||
+ !startswith(c->signature + c->index + 1, contents) ||
+ c->signature[c->index + 1 + l] != SD_BUS_TYPE_STRUCT_END)
+ return -ENXIO;
+
+ nindex = c->index + 1 + l + 1;
+ } else {
+ char *e;
+
+ if (c->enclosing != 0)
+ return -ENXIO;
+
+ e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_STRUCT_BEGIN), contents, CHAR_TO_STR(SD_BUS_TYPE_STRUCT_END), NULL);
+ if (!e) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+
+ nindex = e - c->signature;
+ }
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ int alignment;
+
+ alignment = bus_gvariant_get_alignment(contents);
+ if (alignment < 0)
+ return alignment;
+
+ if (!message_extend_body(m, alignment, 0, false, false))
+ return -ENOMEM;
+
+ r = bus_gvariant_is_fixed_size(contents);
+ if (r < 0)
+ return r;
+
+ *begin = m->body_size;
+ *need_offsets = r == 0;
+ } else {
+ /* Align contents to 8 byte boundary */
+ if (!message_extend_body(m, 8, 0, false, false))
+ return -ENOMEM;
+ }
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index = nindex;
+
+ return 0;
+}
+
+static int bus_message_open_dict_entry(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ size_t *begin,
+ bool *need_offsets) {
+
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+ assert(begin);
+ assert(need_offsets);
+
+ if (!signature_is_pair(contents))
+ return -EINVAL;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ return -ENXIO;
+
+ if (c->signature && c->signature[c->index]) {
+ size_t l;
+
+ l = strlen(contents);
+
+ if (c->signature[c->index] != SD_BUS_TYPE_DICT_ENTRY_BEGIN ||
+ !startswith(c->signature + c->index + 1, contents) ||
+ c->signature[c->index + 1 + l] != SD_BUS_TYPE_DICT_ENTRY_END)
+ return -ENXIO;
+ } else
+ return -ENXIO;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ int alignment;
+
+ alignment = bus_gvariant_get_alignment(contents);
+ if (alignment < 0)
+ return alignment;
+
+ if (!message_extend_body(m, alignment, 0, false, false))
+ return -ENOMEM;
+
+ r = bus_gvariant_is_fixed_size(contents);
+ if (r < 0)
+ return r;
+
+ *begin = m->body_size;
+ *need_offsets = r == 0;
+ } else {
+ /* Align contents to 8 byte boundary */
+ if (!message_extend_body(m, 8, 0, false, false))
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+_public_ int sd_bus_message_open_container(
+ sd_bus_message *m,
+ char type,
+ const char *contents) {
+
+ struct bus_container *c;
+ uint32_t *array_size = NULL;
+ _cleanup_free_ char *signature = NULL;
+ size_t before, begin = 0;
+ bool need_offsets = false;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(contents, -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ /* Make sure we have space for one more container */
+ if (!GREEDY_REALLOC(m->containers, m->containers_allocated, m->n_containers + 1)) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+
+ c = message_get_last_container(m);
+
+ signature = strdup(contents);
+ if (!signature) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+
+ /* Save old index in the parent container, in case we have to
+ * abort this container */
+ c->saved_index = c->index;
+ before = m->body_size;
+
+ if (type == SD_BUS_TYPE_ARRAY)
+ r = bus_message_open_array(m, c, contents, &array_size, &begin, &need_offsets);
+ else if (type == SD_BUS_TYPE_VARIANT)
+ r = bus_message_open_variant(m, c, contents);
+ else if (type == SD_BUS_TYPE_STRUCT)
+ r = bus_message_open_struct(m, c, contents, &begin, &need_offsets);
+ else if (type == SD_BUS_TYPE_DICT_ENTRY)
+ r = bus_message_open_dict_entry(m, c, contents, &begin, &need_offsets);
+ else
+ r = -EINVAL;
+ if (r < 0)
+ return r;
+
+ /* OK, let's fill it in */
+ m->containers[m->n_containers++] = (struct bus_container) {
+ .enclosing = type,
+ .signature = TAKE_PTR(signature),
+ .array_size = array_size,
+ .before = before,
+ .begin = begin,
+ .need_offsets = need_offsets,
+ };
+
+ return 0;
+}
+
+static int bus_message_close_array(sd_bus_message *m, struct bus_container *c) {
+
+ assert(m);
+ assert(c);
+
+ if (!BUS_MESSAGE_IS_GVARIANT(m))
+ return 0;
+
+ if (c->need_offsets) {
+ size_t payload, sz, i;
+ uint8_t *a;
+
+ /* Variable-width arrays */
+
+ payload = c->n_offsets > 0 ? c->offsets[c->n_offsets-1] - c->begin : 0;
+ sz = bus_gvariant_determine_word_size(payload, c->n_offsets);
+
+ a = message_extend_body(m, 1, sz * c->n_offsets, true, false);
+ if (!a)
+ return -ENOMEM;
+
+ for (i = 0; i < c->n_offsets; i++)
+ bus_gvariant_write_word_le(a + sz*i, sz, c->offsets[i] - c->begin);
+ } else {
+ void *a;
+
+ /* Fixed-width or empty arrays */
+
+ a = message_extend_body(m, 1, 0, true, false); /* let's add offset to parent */
+ if (!a)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int bus_message_close_variant(sd_bus_message *m, struct bus_container *c) {
+ uint8_t *a;
+ size_t l;
+
+ assert(m);
+ assert(c);
+ assert(c->signature);
+
+ if (!BUS_MESSAGE_IS_GVARIANT(m))
+ return 0;
+
+ l = strlen(c->signature);
+
+ a = message_extend_body(m, 1, 1 + l, true, false);
+ if (!a)
+ return -ENOMEM;
+
+ a[0] = 0;
+ memcpy(a+1, c->signature, l);
+
+ return 0;
+}
+
+static int bus_message_close_struct(sd_bus_message *m, struct bus_container *c, bool add_offset) {
+ bool fixed_size = true;
+ size_t n_variable = 0;
+ unsigned i = 0;
+ const char *p;
+ uint8_t *a;
+ int r;
+
+ assert(m);
+ assert(c);
+
+ if (!BUS_MESSAGE_IS_GVARIANT(m))
+ return 0;
+
+ p = strempty(c->signature);
+ while (*p != 0) {
+ size_t n;
+
+ r = signature_element_length(p, &n);
+ if (r < 0)
+ return r;
+ else {
+ char t[n+1];
+
+ memcpy(t, p, n);
+ t[n] = 0;
+
+ r = bus_gvariant_is_fixed_size(t);
+ if (r < 0)
+ return r;
+ }
+
+ assert(!c->need_offsets || i <= c->n_offsets);
+
+ /* We need to add an offset for each item that has a
+ * variable size and that is not the last one in the
+ * list */
+ if (r == 0)
+ fixed_size = false;
+ if (r == 0 && p[n] != 0)
+ n_variable++;
+
+ i++;
+ p += n;
+ }
+
+ assert(!c->need_offsets || i == c->n_offsets);
+ assert(c->need_offsets || n_variable == 0);
+
+ if (isempty(c->signature)) {
+ /* The unary type is encoded as fixed 1 byte padding */
+ a = message_extend_body(m, 1, 1, add_offset, false);
+ if (!a)
+ return -ENOMEM;
+
+ *a = 0;
+ } else if (n_variable <= 0) {
+ int alignment = 1;
+
+ /* Structures with fixed-size members only have to be
+ * fixed-size themselves. But gvariant requires all fixed-size
+ * elements to be sized a multiple of their alignment. Hence,
+ * we must *always* add final padding after the last member so
+ * the overall size of the structure is properly aligned. */
+ if (fixed_size)
+ alignment = bus_gvariant_get_alignment(strempty(c->signature));
+
+ assert(alignment > 0);
+
+ a = message_extend_body(m, alignment, 0, add_offset, false);
+ if (!a)
+ return -ENOMEM;
+ } else {
+ size_t sz;
+ unsigned j;
+
+ assert(c->offsets[c->n_offsets-1] == m->body_size);
+
+ sz = bus_gvariant_determine_word_size(m->body_size - c->begin, n_variable);
+
+ a = message_extend_body(m, 1, sz * n_variable, add_offset, false);
+ if (!a)
+ return -ENOMEM;
+
+ p = strempty(c->signature);
+ for (i = 0, j = 0; i < c->n_offsets; i++) {
+ unsigned k;
+ size_t n;
+
+ r = signature_element_length(p, &n);
+ if (r < 0)
+ return r;
+ else {
+ char t[n+1];
+
+ memcpy(t, p, n);
+ t[n] = 0;
+
+ p += n;
+
+ r = bus_gvariant_is_fixed_size(t);
+ if (r < 0)
+ return r;
+ if (r > 0 || p[0] == 0)
+ continue;
+ }
+
+ k = n_variable - 1 - j;
+
+ bus_gvariant_write_word_le(a + k * sz, sz, c->offsets[i] - c->begin);
+
+ j++;
+ }
+ }
+
+ return 0;
+}
+
+_public_ int sd_bus_message_close_container(sd_bus_message *m) {
+ struct bus_container *c;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(m->n_containers > 0, -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ c = message_get_last_container(m);
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ if (c->signature && c->signature[c->index] != 0)
+ return -EINVAL;
+
+ m->n_containers--;
+
+ if (c->enclosing == SD_BUS_TYPE_ARRAY)
+ r = bus_message_close_array(m, c);
+ else if (c->enclosing == SD_BUS_TYPE_VARIANT)
+ r = bus_message_close_variant(m, c);
+ else if (IN_SET(c->enclosing, SD_BUS_TYPE_STRUCT, SD_BUS_TYPE_DICT_ENTRY))
+ r = bus_message_close_struct(m, c, true);
+ else
+ assert_not_reached("Unknown container type");
+
+ free(c->signature);
+ free(c->offsets);
+
+ return r;
+}
+
+typedef struct {
+ const char *types;
+ unsigned n_struct;
+ unsigned n_array;
+} TypeStack;
+
+static int type_stack_push(TypeStack *stack, unsigned max, unsigned *i, const char *types, unsigned n_struct, unsigned n_array) {
+ assert(stack);
+ assert(max > 0);
+
+ if (*i >= max)
+ return -EINVAL;
+
+ stack[*i].types = types;
+ stack[*i].n_struct = n_struct;
+ stack[*i].n_array = n_array;
+ (*i)++;
+
+ return 0;
+}
+
+static int type_stack_pop(TypeStack *stack, unsigned max, unsigned *i, const char **types, unsigned *n_struct, unsigned *n_array) {
+ assert(stack);
+ assert(max > 0);
+ assert(types);
+ assert(n_struct);
+ assert(n_array);
+
+ if (*i <= 0)
+ return 0;
+
+ (*i)--;
+ *types = stack[*i].types;
+ *n_struct = stack[*i].n_struct;
+ *n_array = stack[*i].n_array;
+
+ return 1;
+}
+
+_public_ int sd_bus_message_appendv(
+ sd_bus_message *m,
+ const char *types,
+ va_list ap) {
+
+ unsigned n_array, n_struct;
+ TypeStack stack[BUS_CONTAINER_DEPTH];
+ unsigned stack_ptr = 0;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(types, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->poisoned, -ESTALE);
+
+ n_array = (unsigned) -1;
+ n_struct = strlen(types);
+
+ for (;;) {
+ const char *t;
+
+ if (n_array == 0 || (n_array == (unsigned) -1 && n_struct == 0)) {
+ r = type_stack_pop(stack, ELEMENTSOF(stack), &stack_ptr, &types, &n_struct, &n_array);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ t = types;
+ if (n_array != (unsigned) -1)
+ n_array--;
+ else {
+ types++;
+ n_struct--;
+ }
+
+ switch (*t) {
+
+ case SD_BUS_TYPE_BYTE: {
+ uint8_t x;
+
+ x = (uint8_t) va_arg(ap, int);
+ r = sd_bus_message_append_basic(m, *t, &x);
+ break;
+ }
+
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_UNIX_FD: {
+ uint32_t x;
+
+ /* We assume a boolean is the same as int32_t */
+ assert_cc(sizeof(int32_t) == sizeof(int));
+
+ x = va_arg(ap, uint32_t);
+ r = sd_bus_message_append_basic(m, *t, &x);
+ break;
+ }
+
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16: {
+ uint16_t x;
+
+ x = (uint16_t) va_arg(ap, int);
+ r = sd_bus_message_append_basic(m, *t, &x);
+ break;
+ }
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64: {
+ uint64_t x;
+
+ x = va_arg(ap, uint64_t);
+ r = sd_bus_message_append_basic(m, *t, &x);
+ break;
+ }
+
+ case SD_BUS_TYPE_DOUBLE: {
+ double x;
+
+ x = va_arg(ap, double);
+ r = sd_bus_message_append_basic(m, *t, &x);
+ break;
+ }
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE: {
+ const char *x;
+
+ x = va_arg(ap, const char*);
+ r = sd_bus_message_append_basic(m, *t, x);
+ break;
+ }
+
+ case SD_BUS_TYPE_ARRAY: {
+ size_t k;
+
+ r = signature_element_length(t + 1, &k);
+ if (r < 0)
+ return r;
+
+ {
+ char s[k + 1];
+ memcpy(s, t + 1, k);
+ s[k] = 0;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_ARRAY, s);
+ if (r < 0)
+ return r;
+ }
+
+ if (n_array == (unsigned) -1) {
+ types += k;
+ n_struct -= k;
+ }
+
+ r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array);
+ if (r < 0)
+ return r;
+
+ types = t + 1;
+ n_struct = k;
+ n_array = va_arg(ap, unsigned);
+
+ break;
+ }
+
+ case SD_BUS_TYPE_VARIANT: {
+ const char *s;
+
+ s = va_arg(ap, const char*);
+ if (!s)
+ return -EINVAL;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_VARIANT, s);
+ if (r < 0)
+ return r;
+
+ r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array);
+ if (r < 0)
+ return r;
+
+ types = s;
+ n_struct = strlen(s);
+ n_array = (unsigned) -1;
+
+ break;
+ }
+
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN: {
+ size_t k;
+
+ r = signature_element_length(t, &k);
+ if (r < 0)
+ return r;
+
+ {
+ char s[k - 1];
+
+ memcpy(s, t + 1, k - 2);
+ s[k - 2] = 0;
+
+ r = sd_bus_message_open_container(m, *t == SD_BUS_TYPE_STRUCT_BEGIN ? SD_BUS_TYPE_STRUCT : SD_BUS_TYPE_DICT_ENTRY, s);
+ if (r < 0)
+ return r;
+ }
+
+ if (n_array == (unsigned) -1) {
+ types += k - 1;
+ n_struct -= k - 1;
+ }
+
+ r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array);
+ if (r < 0)
+ return r;
+
+ types = t + 1;
+ n_struct = k - 2;
+ n_array = (unsigned) -1;
+
+ break;
+ }
+
+ default:
+ r = -EINVAL;
+ }
+
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+_public_ int sd_bus_message_append(sd_bus_message *m, const char *types, ...) {
+ va_list ap;
+ int r;
+
+ va_start(ap, types);
+ r = sd_bus_message_appendv(m, types, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_message_append_array_space(
+ sd_bus_message *m,
+ char type,
+ size_t size,
+ void **ptr) {
+
+ ssize_t align, sz;
+ void *a;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(bus_type_is_trivial(type) && type != SD_BUS_TYPE_BOOLEAN, -EINVAL);
+ assert_return(ptr || size == 0, -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ /* alignment and size of the trivial types (except bool) is
+ * identical for gvariant and dbus1 marshalling */
+ align = bus_type_get_alignment(type);
+ sz = bus_type_get_size(type);
+
+ assert_se(align > 0);
+ assert_se(sz > 0);
+
+ if (size % sz != 0)
+ return -EINVAL;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_ARRAY, CHAR_TO_STR(type));
+ if (r < 0)
+ return r;
+
+ a = message_extend_body(m, align, size, false, false);
+ if (!a)
+ return -ENOMEM;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ *ptr = a;
+ return 0;
+}
+
+_public_ int sd_bus_message_append_array(
+ sd_bus_message *m,
+ char type,
+ const void *ptr,
+ size_t size) {
+ int r;
+ void *p;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(bus_type_is_trivial(type), -EINVAL);
+ assert_return(ptr || size == 0, -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ r = sd_bus_message_append_array_space(m, type, size, &p);
+ if (r < 0)
+ return r;
+
+ memcpy_safe(p, ptr, size);
+
+ return 0;
+}
+
+_public_ int sd_bus_message_append_array_iovec(
+ sd_bus_message *m,
+ char type,
+ const struct iovec *iov,
+ unsigned n /* should be size_t, but is API now… 😞 */) {
+
+ size_t size;
+ unsigned i;
+ void *p;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(bus_type_is_trivial(type), -EINVAL);
+ assert_return(iov || n == 0, -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ size = IOVEC_TOTAL_SIZE(iov, n);
+
+ r = sd_bus_message_append_array_space(m, type, size, &p);
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < n; i++) {
+
+ if (iov[i].iov_base)
+ memcpy(p, iov[i].iov_base, iov[i].iov_len);
+ else
+ memzero(p, iov[i].iov_len);
+
+ p = (uint8_t*) p + iov[i].iov_len;
+ }
+
+ return 0;
+}
+
+_public_ int sd_bus_message_append_array_memfd(
+ sd_bus_message *m,
+ char type,
+ int memfd,
+ uint64_t offset,
+ uint64_t size) {
+
+ _cleanup_close_ int copy_fd = -1;
+ struct bus_body_part *part;
+ ssize_t align, sz;
+ uint64_t real_size;
+ void *a;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(memfd >= 0, -EBADF);
+ assert_return(bus_type_is_trivial(type), -EINVAL);
+ assert_return(size > 0, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->poisoned, -ESTALE);
+
+ r = memfd_set_sealed(memfd);
+ if (r < 0)
+ return r;
+
+ copy_fd = fcntl(memfd, F_DUPFD_CLOEXEC, 3);
+ if (copy_fd < 0)
+ return copy_fd;
+
+ r = memfd_get_size(memfd, &real_size);
+ if (r < 0)
+ return r;
+
+ if (offset == 0 && size == (uint64_t) -1)
+ size = real_size;
+ else if (offset + size > real_size)
+ return -EMSGSIZE;
+
+ align = bus_type_get_alignment(type);
+ sz = bus_type_get_size(type);
+
+ assert_se(align > 0);
+ assert_se(sz > 0);
+
+ if (offset % align != 0)
+ return -EINVAL;
+
+ if (size % sz != 0)
+ return -EINVAL;
+
+ if (size > (uint64_t) (uint32_t) -1)
+ return -EINVAL;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_ARRAY, CHAR_TO_STR(type));
+ if (r < 0)
+ return r;
+
+ a = message_extend_body(m, align, 0, false, false);
+ if (!a)
+ return -ENOMEM;
+
+ part = message_append_part(m);
+ if (!part)
+ return -ENOMEM;
+
+ part->memfd = copy_fd;
+ part->memfd_offset = offset;
+ part->sealed = true;
+ part->size = size;
+ copy_fd = -1;
+
+ m->body_size += size;
+ message_extend_containers(m, size);
+
+ return sd_bus_message_close_container(m);
+}
+
+_public_ int sd_bus_message_append_string_memfd(
+ sd_bus_message *m,
+ int memfd,
+ uint64_t offset,
+ uint64_t size) {
+
+ _cleanup_close_ int copy_fd = -1;
+ struct bus_body_part *part;
+ struct bus_container *c;
+ uint64_t real_size;
+ void *a;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(memfd >= 0, -EBADF);
+ assert_return(size > 0, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->poisoned, -ESTALE);
+
+ r = memfd_set_sealed(memfd);
+ if (r < 0)
+ return r;
+
+ copy_fd = fcntl(memfd, FD_CLOEXEC, 3);
+ if (copy_fd < 0)
+ return copy_fd;
+
+ r = memfd_get_size(memfd, &real_size);
+ if (r < 0)
+ return r;
+
+ if (offset == 0 && size == (uint64_t) -1)
+ size = real_size;
+ else if (offset + size > real_size)
+ return -EMSGSIZE;
+
+ /* We require this to be NUL terminated */
+ if (size == 0)
+ return -EINVAL;
+
+ if (size > (uint64_t) (uint32_t) -1)
+ return -EINVAL;
+
+ c = message_get_last_container(m);
+ if (c->signature && c->signature[c->index]) {
+ /* Container signature is already set */
+
+ if (c->signature[c->index] != SD_BUS_TYPE_STRING)
+ return -ENXIO;
+ } else {
+ char *e;
+
+ /* Maybe we can append to the signature? But only if this is the top-level container */
+ if (c->enclosing != 0)
+ return -ENXIO;
+
+ e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_STRING), NULL);
+ if (!e) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+ }
+
+ if (!BUS_MESSAGE_IS_GVARIANT(m)) {
+ a = message_extend_body(m, 4, 4, false, false);
+ if (!a)
+ return -ENOMEM;
+
+ *(uint32_t*) a = size - 1;
+ }
+
+ part = message_append_part(m);
+ if (!part)
+ return -ENOMEM;
+
+ part->memfd = copy_fd;
+ part->memfd_offset = offset;
+ part->sealed = true;
+ part->size = size;
+ copy_fd = -1;
+
+ m->body_size += size;
+ message_extend_containers(m, size);
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ r = message_add_offset(m, m->body_size);
+ if (r < 0) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+ }
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index++;
+
+ return 0;
+}
+
+_public_ int sd_bus_message_append_strv(sd_bus_message *m, char **l) {
+ char **i;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->poisoned, -ESTALE);
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, l) {
+ r = sd_bus_message_append_basic(m, 's', *i);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(m);
+}
+
+static int bus_message_close_header(sd_bus_message *m) {
+
+ assert(m);
+
+ /* The actual user data is finished now, we just complete the
+ variant and struct now (at least on gvariant). Remember
+ this position, so that during parsing we know where to
+ put the outer container end. */
+ m->user_body_size = m->body_size;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ const char *signature;
+ size_t sz, l;
+ void *d;
+
+ /* Add offset table to end of fields array */
+ if (m->n_header_offsets >= 1) {
+ uint8_t *a;
+ unsigned i;
+
+ assert(m->fields_size == m->header_offsets[m->n_header_offsets-1]);
+
+ sz = bus_gvariant_determine_word_size(m->fields_size, m->n_header_offsets);
+ a = message_extend_fields(m, 1, sz * m->n_header_offsets, false);
+ if (!a)
+ return -ENOMEM;
+
+ for (i = 0; i < m->n_header_offsets; i++)
+ bus_gvariant_write_word_le(a + sz*i, sz, m->header_offsets[i]);
+ }
+
+ /* Add gvariant NUL byte plus signature to the end of
+ * the body, followed by the final offset pointing to
+ * the end of the fields array */
+
+ signature = strempty(m->root_container.signature);
+ l = strlen(signature);
+
+ sz = bus_gvariant_determine_word_size(sizeof(struct bus_header) + ALIGN8(m->fields_size) + m->body_size + 1 + l + 2, 1);
+ d = message_extend_body(m, 1, 1 + l + 2 + sz, false, true);
+ if (!d)
+ return -ENOMEM;
+
+ *(uint8_t*) d = 0;
+ *((uint8_t*) d + 1) = SD_BUS_TYPE_STRUCT_BEGIN;
+ memcpy((uint8_t*) d + 2, signature, l);
+ *((uint8_t*) d + 1 + l + 1) = SD_BUS_TYPE_STRUCT_END;
+
+ bus_gvariant_write_word_le((uint8_t*) d + 1 + l + 2, sz, sizeof(struct bus_header) + m->fields_size);
+
+ m->footer = d;
+ m->footer_accessible = 1 + l + 2 + sz;
+ } else {
+ m->header->dbus1.fields_size = m->fields_size;
+ m->header->dbus1.body_size = m->body_size;
+ }
+
+ return 0;
+}
+
+_public_ int sd_bus_message_seal(sd_bus_message *m, uint64_t cookie, uint64_t timeout_usec) {
+ struct bus_body_part *part;
+ size_t a;
+ unsigned i;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ if (m->sealed)
+ return -EPERM;
+
+ if (m->n_containers > 0)
+ return -EBADMSG;
+
+ if (m->poisoned)
+ return -ESTALE;
+
+ if (cookie > 0xffffffffULL &&
+ !BUS_MESSAGE_IS_GVARIANT(m))
+ return -EOPNOTSUPP;
+
+ /* In vtables the return signature of method calls is listed,
+ * let's check if they match if this is a response */
+ if (m->header->type == SD_BUS_MESSAGE_METHOD_RETURN &&
+ m->enforced_reply_signature &&
+ !streq(strempty(m->root_container.signature), m->enforced_reply_signature))
+ return -ENOMSG;
+
+ /* If gvariant marshalling is used we need to close the body structure */
+ r = bus_message_close_struct(m, &m->root_container, false);
+ if (r < 0)
+ return r;
+
+ /* If there's a non-trivial signature set, then add it in
+ * here, but only on dbus1 */
+ if (!isempty(m->root_container.signature) && !BUS_MESSAGE_IS_GVARIANT(m)) {
+ r = message_append_field_signature(m, BUS_MESSAGE_HEADER_SIGNATURE, m->root_container.signature, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ if (m->n_fds > 0) {
+ r = message_append_field_uint32(m, BUS_MESSAGE_HEADER_UNIX_FDS, m->n_fds);
+ if (r < 0)
+ return r;
+ }
+
+ r = bus_message_close_header(m);
+ if (r < 0)
+ return r;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m))
+ m->header->dbus2.cookie = cookie;
+ else
+ m->header->dbus1.serial = (uint32_t) cookie;
+
+ m->timeout = m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED ? 0 : timeout_usec;
+
+ /* Add padding at the end of the fields part, since we know
+ * the body needs to start at an 8 byte alignment. We made
+ * sure we allocated enough space for this, so all we need to
+ * do here is to zero it out. */
+ a = ALIGN8(m->fields_size) - m->fields_size;
+ if (a > 0)
+ memzero((uint8_t*) BUS_MESSAGE_FIELDS(m) + m->fields_size, a);
+
+ /* If this is something we can send as memfd, then let's seal
+ the memfd now. Note that we can send memfds as payload only
+ for directed messages, and not for broadcasts. */
+ if (m->destination && m->bus->use_memfd) {
+ MESSAGE_FOREACH_PART(part, i, m)
+ if (part->memfd >= 0 &&
+ !part->sealed &&
+ (part->size > MEMFD_MIN_SIZE || m->bus->use_memfd < 0) &&
+ part != m->body_end) { /* The last part may never be sent as memfd */
+ uint64_t sz;
+
+ /* Try to seal it if that makes
+ * sense. First, unmap our own map to
+ * make sure we don't keep it busy. */
+ bus_body_part_unmap(part);
+
+ /* Then, sync up real memfd size */
+ sz = part->size;
+ r = memfd_set_size(part->memfd, sz);
+ if (r < 0)
+ return r;
+
+ /* Finally, try to seal */
+ if (memfd_set_sealed(part->memfd) >= 0)
+ part->sealed = true;
+ }
+ }
+
+ m->root_container.end = m->user_body_size;
+ m->root_container.index = 0;
+ m->root_container.offset_index = 0;
+ m->root_container.item_size = m->root_container.n_offsets > 0 ? m->root_container.offsets[0] : 0;
+
+ m->sealed = true;
+
+ return 0;
+}
+
+int bus_body_part_map(struct bus_body_part *part) {
+ void *p;
+ size_t psz, shift;
+
+ assert_se(part);
+
+ if (part->data)
+ return 0;
+
+ if (part->size <= 0)
+ return 0;
+
+ /* For smaller zero parts (as used for padding) we don't need to map anything... */
+ if (part->memfd < 0 && part->is_zero && part->size < 8) {
+ static const uint8_t zeroes[7] = { };
+ part->data = (void*) zeroes;
+ return 0;
+ }
+
+ shift = PAGE_OFFSET(part->memfd_offset);
+ psz = PAGE_ALIGN(part->size + shift);
+
+ if (part->memfd >= 0)
+ p = mmap(NULL, psz, PROT_READ, MAP_PRIVATE, part->memfd, part->memfd_offset - shift);
+ else if (part->is_zero)
+ p = mmap(NULL, psz, PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ else
+ return -EINVAL;
+
+ if (p == MAP_FAILED)
+ return -errno;
+
+ part->mapped = psz;
+ part->mmap_begin = p;
+ part->data = (uint8_t*) p + shift;
+ part->munmap_this = true;
+
+ return 0;
+}
+
+void bus_body_part_unmap(struct bus_body_part *part) {
+
+ assert_se(part);
+
+ if (part->memfd < 0)
+ return;
+
+ if (!part->mmap_begin)
+ return;
+
+ if (!part->munmap_this)
+ return;
+
+ assert_se(munmap(part->mmap_begin, part->mapped) == 0);
+
+ part->mmap_begin = NULL;
+ part->data = NULL;
+ part->mapped = 0;
+ part->munmap_this = false;
+
+ return;
+}
+
+static int buffer_peek(const void *p, uint32_t sz, size_t *rindex, size_t align, size_t nbytes, void **r) {
+ size_t k, start, end;
+
+ assert(rindex);
+ assert(align > 0);
+
+ start = ALIGN_TO((size_t) *rindex, align);
+ end = start + nbytes;
+
+ if (end > sz)
+ return -EBADMSG;
+
+ /* Verify that padding is 0 */
+ for (k = *rindex; k < start; k++)
+ if (((const uint8_t*) p)[k] != 0)
+ return -EBADMSG;
+
+ if (r)
+ *r = (uint8_t*) p + start;
+
+ *rindex = end;
+
+ return 1;
+}
+
+static bool message_end_of_signature(sd_bus_message *m) {
+ struct bus_container *c;
+
+ assert(m);
+
+ c = message_get_last_container(m);
+ return !c->signature || c->signature[c->index] == 0;
+}
+
+static bool message_end_of_array(sd_bus_message *m, size_t index) {
+ struct bus_container *c;
+
+ assert(m);
+
+ c = message_get_last_container(m);
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ return false;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m))
+ return index >= c->end;
+ else {
+ assert(c->array_size);
+ return index >= c->begin + BUS_MESSAGE_BSWAP32(m, *c->array_size);
+ }
+}
+
+_public_ int sd_bus_message_at_end(sd_bus_message *m, int complete) {
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+
+ if (complete && m->n_containers > 0)
+ return false;
+
+ if (message_end_of_signature(m))
+ return true;
+
+ if (message_end_of_array(m, m->rindex))
+ return true;
+
+ return false;
+}
+
+static struct bus_body_part* find_part(sd_bus_message *m, size_t index, size_t sz, void **p) {
+ struct bus_body_part *part;
+ size_t begin;
+ int r;
+
+ assert(m);
+
+ if (m->cached_rindex_part && index >= m->cached_rindex_part_begin) {
+ part = m->cached_rindex_part;
+ begin = m->cached_rindex_part_begin;
+ } else {
+ part = &m->body;
+ begin = 0;
+ }
+
+ while (part) {
+ if (index < begin)
+ return NULL;
+
+ if (index + sz <= begin + part->size) {
+
+ r = bus_body_part_map(part);
+ if (r < 0)
+ return NULL;
+
+ if (p)
+ *p = part->data ? (uint8_t*) part->data + index - begin
+ : NULL; /* Avoid dereferencing a NULL pointer. */
+
+ m->cached_rindex_part = part;
+ m->cached_rindex_part_begin = begin;
+
+ return part;
+ }
+
+ begin += part->size;
+ part = part->next;
+ }
+
+ return NULL;
+}
+
+static int container_next_item(sd_bus_message *m, struct bus_container *c, size_t *rindex) {
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(rindex);
+
+ if (!BUS_MESSAGE_IS_GVARIANT(m))
+ return 0;
+
+ if (c->enclosing == SD_BUS_TYPE_ARRAY) {
+ int sz;
+
+ sz = bus_gvariant_get_size(c->signature);
+ if (sz == 0)
+ return -EBADMSG;
+ if (sz < 0) {
+ int alignment;
+
+ if (c->offset_index+1 >= c->n_offsets)
+ goto end;
+
+ /* Variable-size array */
+
+ alignment = bus_gvariant_get_alignment(c->signature);
+ assert(alignment > 0);
+
+ *rindex = ALIGN_TO(c->offsets[c->offset_index], alignment);
+ assert(c->offsets[c->offset_index+1] >= *rindex);
+ c->item_size = c->offsets[c->offset_index+1] - *rindex;
+ } else {
+
+ if (c->offset_index+1 >= (c->end-c->begin)/sz)
+ goto end;
+
+ /* Fixed-size array */
+ *rindex = c->begin + (c->offset_index+1) * sz;
+ c->item_size = sz;
+ }
+
+ c->offset_index++;
+
+ } else if (IN_SET(c->enclosing, 0, SD_BUS_TYPE_STRUCT, SD_BUS_TYPE_DICT_ENTRY)) {
+
+ int alignment;
+ size_t n, j;
+
+ if (c->offset_index+1 >= c->n_offsets)
+ goto end;
+
+ r = signature_element_length(c->signature + c->index, &n);
+ if (r < 0)
+ return r;
+
+ r = signature_element_length(c->signature + c->index + n, &j);
+ if (r < 0)
+ return r;
+ else {
+ char t[j+1];
+ memcpy(t, c->signature + c->index + n, j);
+ t[j] = 0;
+
+ alignment = bus_gvariant_get_alignment(t);
+ }
+
+ assert(alignment > 0);
+
+ *rindex = ALIGN_TO(c->offsets[c->offset_index], alignment);
+ assert(c->offsets[c->offset_index+1] >= *rindex);
+ c->item_size = c->offsets[c->offset_index+1] - *rindex;
+
+ c->offset_index++;
+
+ } else if (c->enclosing == SD_BUS_TYPE_VARIANT)
+ goto end;
+ else
+ assert_not_reached("Unknown container type");
+
+ return 0;
+
+end:
+ /* Reached the end */
+ *rindex = c->end;
+ c->item_size = 0;
+ return 0;
+}
+
+static int message_peek_body(
+ sd_bus_message *m,
+ size_t *rindex,
+ size_t align,
+ size_t nbytes,
+ void **ret) {
+
+ size_t k, start, end, padding;
+ struct bus_body_part *part;
+ uint8_t *q;
+
+ assert(m);
+ assert(rindex);
+ assert(align > 0);
+
+ start = ALIGN_TO((size_t) *rindex, align);
+ padding = start - *rindex;
+ end = start + nbytes;
+
+ if (end > m->user_body_size)
+ return -EBADMSG;
+
+ part = find_part(m, *rindex, padding, (void**) &q);
+ if (!part)
+ return -EBADMSG;
+
+ if (q) {
+ /* Verify padding */
+ for (k = 0; k < padding; k++)
+ if (q[k] != 0)
+ return -EBADMSG;
+ }
+
+ part = find_part(m, start, nbytes, (void**) &q);
+ if (!part || (nbytes > 0 && !q))
+ return -EBADMSG;
+
+ *rindex = end;
+
+ if (ret)
+ *ret = q;
+
+ return 0;
+}
+
+static bool validate_nul(const char *s, size_t l) {
+
+ /* Check for NUL chars in the string */
+ if (memchr(s, 0, l))
+ return false;
+
+ /* Check for NUL termination */
+ if (s[l] != 0)
+ return false;
+
+ return true;
+}
+
+static bool validate_string(const char *s, size_t l) {
+
+ if (!validate_nul(s, l))
+ return false;
+
+ /* Check if valid UTF8 */
+ if (!utf8_is_valid(s))
+ return false;
+
+ return true;
+}
+
+static bool validate_signature(const char *s, size_t l) {
+
+ if (!validate_nul(s, l))
+ return false;
+
+ /* Check if valid signature */
+ if (!signature_is_valid(s, true))
+ return false;
+
+ return true;
+}
+
+static bool validate_object_path(const char *s, size_t l) {
+
+ if (!validate_nul(s, l))
+ return false;
+
+ if (!object_path_is_valid(s))
+ return false;
+
+ return true;
+}
+
+_public_ int sd_bus_message_read_basic(sd_bus_message *m, char type, void *p) {
+ struct bus_container *c;
+ size_t rindex;
+ void *q;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(bus_type_is_basic(type), -EINVAL);
+
+ if (message_end_of_signature(m))
+ return -ENXIO;
+
+ if (message_end_of_array(m, m->rindex))
+ return 0;
+
+ c = message_get_last_container(m);
+ if (c->signature[c->index] != type)
+ return -ENXIO;
+
+ rindex = m->rindex;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+
+ if (IN_SET(type, SD_BUS_TYPE_STRING, SD_BUS_TYPE_OBJECT_PATH, SD_BUS_TYPE_SIGNATURE)) {
+ bool ok;
+
+ /* D-Bus spec: The marshalling formats for the string-like types all end
+ * with a single zero (NUL) byte, but that byte is not considered to be part
+ * of the text. */
+ if (c->item_size == 0)
+ return -EBADMSG;
+
+ r = message_peek_body(m, &rindex, 1, c->item_size, &q);
+ if (r < 0)
+ return r;
+
+ if (type == SD_BUS_TYPE_STRING)
+ ok = validate_string(q, c->item_size-1);
+ else if (type == SD_BUS_TYPE_OBJECT_PATH)
+ ok = validate_object_path(q, c->item_size-1);
+ else
+ ok = validate_signature(q, c->item_size-1);
+
+ if (!ok)
+ return -EBADMSG;
+
+ if (p)
+ *(const char**) p = q;
+ } else {
+ int sz, align;
+
+ sz = bus_gvariant_get_size(CHAR_TO_STR(type));
+ assert(sz > 0);
+ if ((size_t) sz != c->item_size)
+ return -EBADMSG;
+
+ align = bus_gvariant_get_alignment(CHAR_TO_STR(type));
+ assert(align > 0);
+
+ r = message_peek_body(m, &rindex, align, c->item_size, &q);
+ if (r < 0)
+ return r;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_BYTE:
+ if (p)
+ *(uint8_t*) p = *(uint8_t*) q;
+ break;
+
+ case SD_BUS_TYPE_BOOLEAN:
+ if (p)
+ *(int*) p = !!*(uint8_t*) q;
+ break;
+
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ if (p)
+ *(uint16_t*) p = BUS_MESSAGE_BSWAP16(m, *(uint16_t*) q);
+ break;
+
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ if (p)
+ *(uint32_t*) p = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q);
+ break;
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ if (p)
+ *(uint64_t*) p = BUS_MESSAGE_BSWAP64(m, *(uint64_t*) q);
+ break;
+
+ case SD_BUS_TYPE_UNIX_FD: {
+ uint32_t j;
+
+ j = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q);
+ if (j >= m->n_fds)
+ return -EBADMSG;
+
+ if (p)
+ *(int*) p = m->fds[j];
+
+ break;
+ }
+
+ default:
+ assert_not_reached("unexpected type");
+ }
+ }
+
+ r = container_next_item(m, c, &rindex);
+ if (r < 0)
+ return r;
+ } else {
+
+ if (IN_SET(type, SD_BUS_TYPE_STRING, SD_BUS_TYPE_OBJECT_PATH)) {
+ uint32_t l;
+ bool ok;
+
+ r = message_peek_body(m, &rindex, 4, 4, &q);
+ if (r < 0)
+ return r;
+
+ l = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q);
+ if (l == UINT32_MAX)
+ /* avoid overflow right below */
+ return -EBADMSG;
+
+ r = message_peek_body(m, &rindex, 1, l+1, &q);
+ if (r < 0)
+ return r;
+
+ if (type == SD_BUS_TYPE_OBJECT_PATH)
+ ok = validate_object_path(q, l);
+ else
+ ok = validate_string(q, l);
+ if (!ok)
+ return -EBADMSG;
+
+ if (p)
+ *(const char**) p = q;
+
+ } else if (type == SD_BUS_TYPE_SIGNATURE) {
+ uint8_t l;
+
+ r = message_peek_body(m, &rindex, 1, 1, &q);
+ if (r < 0)
+ return r;
+
+ l = *(uint8_t*) q;
+ if (l == UINT8_MAX)
+ /* avoid overflow right below */
+ return -EBADMSG;
+
+ r = message_peek_body(m, &rindex, 1, l+1, &q);
+ if (r < 0)
+ return r;
+
+ if (!validate_signature(q, l))
+ return -EBADMSG;
+
+ if (p)
+ *(const char**) p = q;
+
+ } else {
+ ssize_t sz, align;
+
+ align = bus_type_get_alignment(type);
+ assert(align > 0);
+
+ sz = bus_type_get_size(type);
+ assert(sz > 0);
+
+ r = message_peek_body(m, &rindex, align, sz, &q);
+ if (r < 0)
+ return r;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_BYTE:
+ if (p)
+ *(uint8_t*) p = *(uint8_t*) q;
+ break;
+
+ case SD_BUS_TYPE_BOOLEAN:
+ if (p)
+ *(int*) p = !!*(uint32_t*) q;
+ break;
+
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ if (p)
+ *(uint16_t*) p = BUS_MESSAGE_BSWAP16(m, *(uint16_t*) q);
+ break;
+
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ if (p)
+ *(uint32_t*) p = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q);
+ break;
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ if (p)
+ *(uint64_t*) p = BUS_MESSAGE_BSWAP64(m, *(uint64_t*) q);
+ break;
+
+ case SD_BUS_TYPE_UNIX_FD: {
+ uint32_t j;
+
+ j = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q);
+ if (j >= m->n_fds)
+ return -EBADMSG;
+
+ if (p)
+ *(int*) p = m->fds[j];
+ break;
+ }
+
+ default:
+ assert_not_reached("Unknown basic type...");
+ }
+ }
+ }
+
+ m->rindex = rindex;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index++;
+
+ return 1;
+}
+
+static int bus_message_enter_array(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ uint32_t **array_size,
+ size_t *item_size,
+ size_t **offsets,
+ size_t *n_offsets) {
+
+ size_t rindex;
+ void *q;
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+ assert(array_size);
+ assert(item_size);
+ assert(offsets);
+ assert(n_offsets);
+
+ if (!signature_is_single(contents, true))
+ return -EINVAL;
+
+ if (!c->signature || c->signature[c->index] == 0)
+ return -ENXIO;
+
+ if (c->signature[c->index] != SD_BUS_TYPE_ARRAY)
+ return -ENXIO;
+
+ if (!startswith(c->signature + c->index + 1, contents))
+ return -ENXIO;
+
+ rindex = m->rindex;
+
+ if (!BUS_MESSAGE_IS_GVARIANT(m)) {
+ /* dbus1 */
+ int alignment;
+
+ r = message_peek_body(m, &rindex, 4, 4, &q);
+ if (r < 0)
+ return r;
+
+ if (BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q) > BUS_ARRAY_MAX_SIZE)
+ return -EBADMSG;
+
+ alignment = bus_type_get_alignment(contents[0]);
+ if (alignment < 0)
+ return alignment;
+
+ r = message_peek_body(m, &rindex, alignment, 0, NULL);
+ if (r < 0)
+ return r;
+
+ *array_size = (uint32_t*) q;
+
+ } else if (c->item_size <= 0) {
+
+ /* gvariant: empty array */
+ *item_size = 0;
+ *offsets = NULL;
+ *n_offsets = 0;
+
+ } else if (bus_gvariant_is_fixed_size(contents)) {
+
+ /* gvariant: fixed length array */
+ *item_size = bus_gvariant_get_size(contents);
+ *offsets = NULL;
+ *n_offsets = 0;
+
+ } else {
+ size_t where, previous = 0, framing, sz;
+ int alignment;
+ unsigned i;
+
+ /* gvariant: variable length array */
+ sz = bus_gvariant_determine_word_size(c->item_size, 0);
+
+ where = rindex + c->item_size - sz;
+ r = message_peek_body(m, &where, 1, sz, &q);
+ if (r < 0)
+ return r;
+
+ framing = bus_gvariant_read_word_le(q, sz);
+ if (framing > c->item_size - sz)
+ return -EBADMSG;
+ if ((c->item_size - framing) % sz != 0)
+ return -EBADMSG;
+
+ *n_offsets = (c->item_size - framing) / sz;
+
+ where = rindex + framing;
+ r = message_peek_body(m, &where, 1, *n_offsets * sz, &q);
+ if (r < 0)
+ return r;
+
+ *offsets = new(size_t, *n_offsets);
+ if (!*offsets)
+ return -ENOMEM;
+
+ alignment = bus_gvariant_get_alignment(c->signature);
+ assert(alignment > 0);
+
+ for (i = 0; i < *n_offsets; i++) {
+ size_t x, start;
+
+ start = ALIGN_TO(previous, alignment);
+
+ x = bus_gvariant_read_word_le((uint8_t*) q + i * sz, sz);
+ if (x > c->item_size - sz)
+ return -EBADMSG;
+ if (x < start)
+ return -EBADMSG;
+
+ (*offsets)[i] = rindex + x;
+ previous = x;
+ }
+
+ *item_size = (*offsets)[0] - rindex;
+ }
+
+ m->rindex = rindex;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index += 1 + strlen(contents);
+
+ return 1;
+}
+
+static int bus_message_enter_variant(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ size_t *item_size) {
+
+ size_t rindex;
+ uint8_t l;
+ void *q;
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+ assert(item_size);
+
+ if (!signature_is_single(contents, false))
+ return -EINVAL;
+
+ if (*contents == SD_BUS_TYPE_DICT_ENTRY_BEGIN)
+ return -EINVAL;
+
+ if (!c->signature || c->signature[c->index] == 0)
+ return -ENXIO;
+
+ if (c->signature[c->index] != SD_BUS_TYPE_VARIANT)
+ return -ENXIO;
+
+ rindex = m->rindex;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ size_t k, where;
+
+ k = strlen(contents);
+ if (1+k > c->item_size)
+ return -EBADMSG;
+
+ where = rindex + c->item_size - (1+k);
+ r = message_peek_body(m, &where, 1, 1+k, &q);
+ if (r < 0)
+ return r;
+
+ if (*(char*) q != 0)
+ return -EBADMSG;
+
+ if (memcmp((uint8_t*) q+1, contents, k))
+ return -ENXIO;
+
+ *item_size = c->item_size - (1+k);
+
+ } else {
+ r = message_peek_body(m, &rindex, 1, 1, &q);
+ if (r < 0)
+ return r;
+
+ l = *(uint8_t*) q;
+ if (l == UINT8_MAX)
+ /* avoid overflow right below */
+ return -EBADMSG;
+
+ r = message_peek_body(m, &rindex, 1, l+1, &q);
+ if (r < 0)
+ return r;
+
+ if (!validate_signature(q, l))
+ return -EBADMSG;
+
+ if (!streq(q, contents))
+ return -ENXIO;
+ }
+
+ m->rindex = rindex;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index++;
+
+ return 1;
+}
+
+static int build_struct_offsets(
+ sd_bus_message *m,
+ const char *signature,
+ size_t size,
+ size_t *item_size,
+ size_t **offsets,
+ size_t *n_offsets) {
+
+ unsigned n_variable = 0, n_total = 0, v;
+ size_t previous, where;
+ const char *p;
+ size_t sz;
+ void *q;
+ int r;
+
+ assert(m);
+ assert(item_size);
+ assert(offsets);
+ assert(n_offsets);
+
+ if (isempty(signature)) {
+ /* Unary type is encoded as *fixed* 1 byte padding */
+ r = message_peek_body(m, &m->rindex, 1, 1, &q);
+ if (r < 0)
+ return r;
+
+ if (*(uint8_t *) q != 0)
+ return -EBADMSG;
+
+ *item_size = 0;
+ *offsets = NULL;
+ *n_offsets = 0;
+ return 0;
+ }
+
+ sz = bus_gvariant_determine_word_size(size, 0);
+ if (sz <= 0)
+ return -EBADMSG;
+
+ /* First, loop over signature and count variable elements and
+ * elements in general. We use this to know how large the
+ * offset array is at the end of the structure. Note that
+ * GVariant only stores offsets for all variable size elements
+ * that are not the last item. */
+
+ p = signature;
+ while (*p != 0) {
+ size_t n;
+
+ r = signature_element_length(p, &n);
+ if (r < 0)
+ return r;
+ else {
+ char t[n+1];
+
+ memcpy(t, p, n);
+ t[n] = 0;
+
+ r = bus_gvariant_is_fixed_size(t);
+ }
+
+ if (r < 0)
+ return r;
+ if (r == 0 && p[n] != 0) /* except the last item */
+ n_variable++;
+ n_total++;
+
+ p += n;
+ }
+
+ if (size < n_variable * sz)
+ return -EBADMSG;
+
+ where = m->rindex + size - (n_variable * sz);
+ r = message_peek_body(m, &where, 1, n_variable * sz, &q);
+ if (r < 0)
+ return r;
+
+ v = n_variable;
+
+ *offsets = new(size_t, n_total);
+ if (!*offsets)
+ return -ENOMEM;
+
+ *n_offsets = 0;
+
+ /* Second, loop again and build an offset table */
+ p = signature;
+ previous = m->rindex;
+ while (*p != 0) {
+ size_t n, offset;
+ int k;
+
+ r = signature_element_length(p, &n);
+ if (r < 0)
+ return r;
+ else {
+ char t[n+1];
+
+ memcpy(t, p, n);
+ t[n] = 0;
+
+ size_t align = bus_gvariant_get_alignment(t);
+ assert(align > 0);
+
+ /* The possible start of this member after including alignment */
+ size_t start = ALIGN_TO(previous, align);
+
+ k = bus_gvariant_get_size(t);
+ if (k < 0) {
+ size_t x;
+
+ /* Variable size */
+ if (v > 0) {
+ v--;
+
+ x = bus_gvariant_read_word_le((uint8_t*) q + v*sz, sz);
+ if (x >= size)
+ return -EBADMSG;
+ } else
+ /* The last item's end is determined
+ * from the start of the offset array */
+ x = size - (n_variable * sz);
+
+ offset = m->rindex + x;
+ if (offset < start)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "For type %s with alignment %zu, message specifies offset %zu which is smaller than previous end %zu + alignment = %zu",
+ t, align,
+ offset,
+ previous,
+ start);
+ } else
+ /* Fixed size */
+ offset = start + k;
+ }
+
+ previous = (*offsets)[(*n_offsets)++] = offset;
+ p += n;
+ }
+
+ assert(v == 0);
+ assert(*n_offsets == n_total);
+
+ *item_size = (*offsets)[0] - m->rindex;
+ return 0;
+}
+
+static int enter_struct_or_dict_entry(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ size_t *item_size,
+ size_t **offsets,
+ size_t *n_offsets) {
+
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+ assert(item_size);
+ assert(offsets);
+ assert(n_offsets);
+
+ if (!BUS_MESSAGE_IS_GVARIANT(m)) {
+
+ /* dbus1 */
+ r = message_peek_body(m, &m->rindex, 8, 0, NULL);
+ if (r < 0)
+ return r;
+
+ } else
+ /* gvariant with contents */
+ return build_struct_offsets(m, contents, c->item_size, item_size, offsets, n_offsets);
+
+ return 0;
+}
+
+static int bus_message_enter_struct(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ size_t *item_size,
+ size_t **offsets,
+ size_t *n_offsets) {
+
+ size_t l;
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+ assert(item_size);
+ assert(offsets);
+ assert(n_offsets);
+
+ if (!signature_is_valid(contents, false))
+ return -EINVAL;
+
+ if (!c->signature || c->signature[c->index] == 0)
+ return -ENXIO;
+
+ l = strlen(contents);
+
+ if (c->signature[c->index] != SD_BUS_TYPE_STRUCT_BEGIN ||
+ !startswith(c->signature + c->index + 1, contents) ||
+ c->signature[c->index + 1 + l] != SD_BUS_TYPE_STRUCT_END)
+ return -ENXIO;
+
+ r = enter_struct_or_dict_entry(m, c, contents, item_size, offsets, n_offsets);
+ if (r < 0)
+ return r;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index += 1 + l + 1;
+
+ return 1;
+}
+
+static int bus_message_enter_dict_entry(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ size_t *item_size,
+ size_t **offsets,
+ size_t *n_offsets) {
+
+ size_t l;
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+
+ if (!signature_is_pair(contents))
+ return -EINVAL;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ return -ENXIO;
+
+ if (!c->signature || c->signature[c->index] == 0)
+ return 0;
+
+ l = strlen(contents);
+
+ if (c->signature[c->index] != SD_BUS_TYPE_DICT_ENTRY_BEGIN ||
+ !startswith(c->signature + c->index + 1, contents) ||
+ c->signature[c->index + 1 + l] != SD_BUS_TYPE_DICT_ENTRY_END)
+ return -ENXIO;
+
+ r = enter_struct_or_dict_entry(m, c, contents, item_size, offsets, n_offsets);
+ if (r < 0)
+ return r;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index += 1 + l + 1;
+
+ return 1;
+}
+
+_public_ int sd_bus_message_enter_container(sd_bus_message *m,
+ char type,
+ const char *contents) {
+ struct bus_container *c;
+ uint32_t *array_size = NULL;
+ _cleanup_free_ char *signature = NULL;
+ size_t before, end;
+ _cleanup_free_ size_t *offsets = NULL;
+ size_t n_offsets = 0, item_size = 0;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(type != 0 || !contents, -EINVAL);
+
+ if (type == 0 || !contents) {
+ const char *cc;
+ char tt;
+
+ /* Allow entering into anonymous containers */
+ r = sd_bus_message_peek_type(m, &tt, &cc);
+ if (r < 0)
+ return r;
+
+ if (type != 0 && type != tt)
+ return -ENXIO;
+
+ if (contents && !streq(contents, cc))
+ return -ENXIO;
+
+ type = tt;
+ contents = cc;
+ }
+
+ /*
+ * We enforce a global limit on container depth, that is much
+ * higher than the 32 structs and 32 arrays the specification
+ * mandates. This is simpler to implement for us, and we need
+ * this only to ensure our container array doesn't grow
+ * without bounds. We are happy to return any data from a
+ * message as long as the data itself is valid, even if the
+ * overall message might be not.
+ *
+ * Note that the message signature is validated when
+ * parsing the headers, and that validation does check the
+ * 32/32 limit.
+ *
+ * Note that the specification defines no limits on the depth
+ * of stacked variants, but we do.
+ */
+ if (m->n_containers >= BUS_CONTAINER_DEPTH)
+ return -EBADMSG;
+
+ if (!GREEDY_REALLOC(m->containers, m->containers_allocated, m->n_containers + 1))
+ return -ENOMEM;
+
+ if (message_end_of_signature(m))
+ return -ENXIO;
+
+ if (message_end_of_array(m, m->rindex))
+ return 0;
+
+ c = message_get_last_container(m);
+
+ signature = strdup(contents);
+ if (!signature)
+ return -ENOMEM;
+
+ c->saved_index = c->index;
+ before = m->rindex;
+
+ if (type == SD_BUS_TYPE_ARRAY)
+ r = bus_message_enter_array(m, c, contents, &array_size, &item_size, &offsets, &n_offsets);
+ else if (type == SD_BUS_TYPE_VARIANT)
+ r = bus_message_enter_variant(m, c, contents, &item_size);
+ else if (type == SD_BUS_TYPE_STRUCT)
+ r = bus_message_enter_struct(m, c, contents, &item_size, &offsets, &n_offsets);
+ else if (type == SD_BUS_TYPE_DICT_ENTRY)
+ r = bus_message_enter_dict_entry(m, c, contents, &item_size, &offsets, &n_offsets);
+ else
+ r = -EINVAL;
+ if (r <= 0)
+ return r;
+
+ /* OK, let's fill it in */
+ if (BUS_MESSAGE_IS_GVARIANT(m) &&
+ type == SD_BUS_TYPE_STRUCT &&
+ isempty(signature))
+ end = m->rindex + 0;
+ else
+ end = m->rindex + c->item_size;
+
+ m->containers[m->n_containers++] = (struct bus_container) {
+ .enclosing = type,
+ .signature = TAKE_PTR(signature),
+
+ .before = before,
+ .begin = m->rindex,
+ /* Unary type has fixed size of 1, but virtual size of 0 */
+ .end = end,
+ .array_size = array_size,
+ .item_size = item_size,
+ .offsets = TAKE_PTR(offsets),
+ .n_offsets = n_offsets,
+ };
+
+ return 1;
+}
+
+_public_ int sd_bus_message_exit_container(sd_bus_message *m) {
+ struct bus_container *c;
+ unsigned saved;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(m->n_containers > 0, -ENXIO);
+
+ c = message_get_last_container(m);
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY) {
+ if (c->signature && c->signature[c->index] != 0)
+ return -EBUSY;
+ }
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ if (m->rindex < c->end)
+ return -EBUSY;
+
+ } else if (c->enclosing == SD_BUS_TYPE_ARRAY) {
+ uint32_t l;
+
+ l = BUS_MESSAGE_BSWAP32(m, *c->array_size);
+ if (c->begin + l != m->rindex)
+ return -EBUSY;
+ }
+
+ message_free_last_container(m);
+
+ c = message_get_last_container(m);
+ saved = c->index;
+ c->index = c->saved_index;
+ r = container_next_item(m, c, &m->rindex);
+ c->index = saved;
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static void message_quit_container(sd_bus_message *m) {
+ struct bus_container *c;
+
+ assert(m);
+ assert(m->sealed);
+ assert(m->n_containers > 0);
+
+ /* Undo seeks */
+ c = message_get_last_container(m);
+ assert(m->rindex >= c->before);
+ m->rindex = c->before;
+
+ /* Free container */
+ message_free_last_container(m);
+
+ /* Correct index of new top-level container */
+ c = message_get_last_container(m);
+ c->index = c->saved_index;
+}
+
+_public_ int sd_bus_message_peek_type(sd_bus_message *m, char *type, const char **contents) {
+ struct bus_container *c;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+
+ if (message_end_of_signature(m))
+ goto eof;
+
+ if (message_end_of_array(m, m->rindex))
+ goto eof;
+
+ c = message_get_last_container(m);
+
+ if (bus_type_is_basic(c->signature[c->index])) {
+ if (contents)
+ *contents = NULL;
+ if (type)
+ *type = c->signature[c->index];
+ return 1;
+ }
+
+ if (c->signature[c->index] == SD_BUS_TYPE_ARRAY) {
+
+ if (contents) {
+ size_t l;
+
+ r = signature_element_length(c->signature+c->index+1, &l);
+ if (r < 0)
+ return r;
+
+ /* signature_element_length does verification internally */
+
+ /* The array element must not be empty */
+ assert(l >= 1);
+ if (free_and_strndup(&c->peeked_signature,
+ c->signature + c->index + 1, l) < 0)
+ return -ENOMEM;
+
+ *contents = c->peeked_signature;
+ }
+
+ if (type)
+ *type = SD_BUS_TYPE_ARRAY;
+
+ return 1;
+ }
+
+ if (IN_SET(c->signature[c->index], SD_BUS_TYPE_STRUCT_BEGIN, SD_BUS_TYPE_DICT_ENTRY_BEGIN)) {
+
+ if (contents) {
+ size_t l;
+
+ r = signature_element_length(c->signature+c->index, &l);
+ if (r < 0)
+ return r;
+
+ assert(l >= 3);
+ if (free_and_strndup(&c->peeked_signature,
+ c->signature + c->index + 1, l - 2) < 0)
+ return -ENOMEM;
+
+ *contents = c->peeked_signature;
+ }
+
+ if (type)
+ *type = c->signature[c->index] == SD_BUS_TYPE_STRUCT_BEGIN ? SD_BUS_TYPE_STRUCT : SD_BUS_TYPE_DICT_ENTRY;
+
+ return 1;
+ }
+
+ if (c->signature[c->index] == SD_BUS_TYPE_VARIANT) {
+ if (contents) {
+ void *q;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ size_t k;
+
+ if (c->item_size < 2)
+ return -EBADMSG;
+
+ /* Look for the NUL delimiter that
+ separates the payload from the
+ signature. Since the body might be
+ in a different part that then the
+ signature we map byte by byte. */
+
+ for (k = 2; k <= c->item_size; k++) {
+ size_t where;
+
+ where = m->rindex + c->item_size - k;
+ r = message_peek_body(m, &where, 1, k, &q);
+ if (r < 0)
+ return r;
+
+ if (*(char*) q == 0)
+ break;
+ }
+
+ if (k > c->item_size)
+ return -EBADMSG;
+
+ if (free_and_strndup(&c->peeked_signature,
+ (char*) q + 1, k - 1) < 0)
+ return -ENOMEM;
+
+ if (!signature_is_valid(c->peeked_signature, true))
+ return -EBADMSG;
+
+ *contents = c->peeked_signature;
+ } else {
+ size_t rindex, l;
+
+ rindex = m->rindex;
+ r = message_peek_body(m, &rindex, 1, 1, &q);
+ if (r < 0)
+ return r;
+
+ l = *(uint8_t*) q;
+ if (l == UINT8_MAX)
+ /* avoid overflow right below */
+ return -EBADMSG;
+
+ r = message_peek_body(m, &rindex, 1, l+1, &q);
+ if (r < 0)
+ return r;
+
+ if (!validate_signature(q, l))
+ return -EBADMSG;
+
+ *contents = q;
+ }
+ }
+
+ if (type)
+ *type = SD_BUS_TYPE_VARIANT;
+
+ return 1;
+ }
+
+ return -EINVAL;
+
+eof:
+ if (type)
+ *type = 0;
+ if (contents)
+ *contents = NULL;
+ return 0;
+}
+
+_public_ int sd_bus_message_rewind(sd_bus_message *m, int complete) {
+ struct bus_container *c;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+
+ if (complete) {
+ message_reset_containers(m);
+ m->rindex = 0;
+
+ c = message_get_last_container(m);
+ } else {
+ c = message_get_last_container(m);
+
+ c->index = 0;
+ m->rindex = c->begin;
+ }
+
+ c->offset_index = 0;
+ c->item_size = (c->n_offsets > 0 ? c->offsets[0] : c->end) - c->begin;
+
+ return !isempty(c->signature);
+}
+
+_public_ int sd_bus_message_readv(
+ sd_bus_message *m,
+ const char *types,
+ va_list ap) {
+
+ unsigned n_array, n_struct;
+ TypeStack stack[BUS_CONTAINER_DEPTH];
+ unsigned stack_ptr = 0;
+ unsigned n_loop = 0;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(types, -EINVAL);
+
+ if (isempty(types))
+ return 0;
+
+ /* Ideally, we'd just call ourselves recursively on every
+ * complex type. However, the state of a va_list that is
+ * passed to a function is undefined after that function
+ * returns. This means we need to decode the va_list linearly
+ * in a single stackframe. We hence implement our own
+ * home-grown stack in an array. */
+
+ n_array = (unsigned) -1; /* length of current array entries */
+ n_struct = strlen(types); /* length of current struct contents signature */
+
+ for (;;) {
+ const char *t;
+
+ n_loop++;
+
+ if (n_array == 0 || (n_array == (unsigned) -1 && n_struct == 0)) {
+ r = type_stack_pop(stack, ELEMENTSOF(stack), &stack_ptr, &types, &n_struct, &n_array);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ t = types;
+ if (n_array != (unsigned) -1)
+ n_array--;
+ else {
+ types++;
+ n_struct--;
+ }
+
+ switch (*t) {
+
+ case SD_BUS_TYPE_BYTE:
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE:
+ case SD_BUS_TYPE_UNIX_FD: {
+ void *p;
+
+ p = va_arg(ap, void*);
+ r = sd_bus_message_read_basic(m, *t, p);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (n_loop <= 1)
+ return 0;
+
+ return -ENXIO;
+ }
+
+ break;
+ }
+
+ case SD_BUS_TYPE_ARRAY: {
+ size_t k;
+
+ r = signature_element_length(t + 1, &k);
+ if (r < 0)
+ return r;
+
+ {
+ char s[k + 1];
+ memcpy(s, t + 1, k);
+ s[k] = 0;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, s);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (n_loop <= 1)
+ return 0;
+
+ return -ENXIO;
+ }
+ }
+
+ if (n_array == (unsigned) -1) {
+ types += k;
+ n_struct -= k;
+ }
+
+ r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array);
+ if (r < 0)
+ return r;
+
+ types = t + 1;
+ n_struct = k;
+ n_array = va_arg(ap, unsigned);
+
+ break;
+ }
+
+ case SD_BUS_TYPE_VARIANT: {
+ const char *s;
+
+ s = va_arg(ap, const char *);
+ if (!s)
+ return -EINVAL;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, s);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (n_loop <= 1)
+ return 0;
+
+ return -ENXIO;
+ }
+
+ r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array);
+ if (r < 0)
+ return r;
+
+ types = s;
+ n_struct = strlen(s);
+ n_array = (unsigned) -1;
+
+ break;
+ }
+
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN: {
+ size_t k;
+
+ r = signature_element_length(t, &k);
+ if (r < 0)
+ return r;
+
+ {
+ char s[k - 1];
+ memcpy(s, t + 1, k - 2);
+ s[k - 2] = 0;
+
+ r = sd_bus_message_enter_container(m, *t == SD_BUS_TYPE_STRUCT_BEGIN ? SD_BUS_TYPE_STRUCT : SD_BUS_TYPE_DICT_ENTRY, s);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (n_loop <= 1)
+ return 0;
+ return -ENXIO;
+ }
+ }
+
+ if (n_array == (unsigned) -1) {
+ types += k - 1;
+ n_struct -= k - 1;
+ }
+
+ r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array);
+ if (r < 0)
+ return r;
+
+ types = t + 1;
+ n_struct = k - 2;
+ n_array = (unsigned) -1;
+
+ break;
+ }
+
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return 1;
+}
+
+_public_ int sd_bus_message_read(sd_bus_message *m, const char *types, ...) {
+ va_list ap;
+ int r;
+
+ va_start(ap, types);
+ r = sd_bus_message_readv(m, types, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_message_skip(sd_bus_message *m, const char *types) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+
+ /* If types is NULL, read exactly one element */
+ if (!types) {
+ struct bus_container *c;
+ size_t l;
+
+ if (message_end_of_signature(m))
+ return -ENXIO;
+
+ if (message_end_of_array(m, m->rindex))
+ return 0;
+
+ c = message_get_last_container(m);
+
+ r = signature_element_length(c->signature + c->index, &l);
+ if (r < 0)
+ return r;
+
+ types = strndupa(c->signature + c->index, l);
+ }
+
+ switch (*types) {
+
+ case 0: /* Nothing to drop */
+ return 0;
+
+ case SD_BUS_TYPE_BYTE:
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE:
+ case SD_BUS_TYPE_UNIX_FD:
+
+ r = sd_bus_message_read_basic(m, *types, NULL);
+ if (r <= 0)
+ return r;
+
+ r = sd_bus_message_skip(m, types + 1);
+ if (r < 0)
+ return r;
+
+ return 1;
+
+ case SD_BUS_TYPE_ARRAY: {
+ size_t k;
+
+ r = signature_element_length(types + 1, &k);
+ if (r < 0)
+ return r;
+
+ {
+ char s[k+1];
+ memcpy(s, types+1, k);
+ s[k] = 0;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, s);
+ if (r <= 0)
+ return r;
+
+ for (;;) {
+ r = sd_bus_message_skip(m, s);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_skip(m, types + 1 + k);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ case SD_BUS_TYPE_VARIANT: {
+ const char *contents;
+ char x;
+
+ r = sd_bus_message_peek_type(m, &x, &contents);
+ if (r <= 0)
+ return r;
+
+ if (x != SD_BUS_TYPE_VARIANT)
+ return -ENXIO;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, contents);
+ if (r <= 0)
+ return r;
+
+ r = sd_bus_message_skip(m, contents);
+ if (r < 0)
+ return r;
+ assert(r != 0);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_skip(m, types + 1);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN: {
+ size_t k;
+
+ r = signature_element_length(types, &k);
+ if (r < 0)
+ return r;
+
+ {
+ char s[k-1];
+ memcpy(s, types+1, k-2);
+ s[k-2] = 0;
+
+ r = sd_bus_message_enter_container(m, *types == SD_BUS_TYPE_STRUCT_BEGIN ? SD_BUS_TYPE_STRUCT : SD_BUS_TYPE_DICT_ENTRY, s);
+ if (r <= 0)
+ return r;
+
+ r = sd_bus_message_skip(m, s);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_skip(m, types + k);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ default:
+ return -EINVAL;
+ }
+}
+
+_public_ int sd_bus_message_read_array(
+ sd_bus_message *m,
+ char type,
+ const void **ptr,
+ size_t *size) {
+
+ struct bus_container *c;
+ void *p;
+ size_t sz;
+ ssize_t align;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(bus_type_is_trivial(type), -EINVAL);
+ assert_return(ptr, -EINVAL);
+ assert_return(size, -EINVAL);
+ assert_return(!BUS_MESSAGE_NEED_BSWAP(m), -EOPNOTSUPP);
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, CHAR_TO_STR(type));
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ *ptr = NULL;
+ *size = 0;
+ return 0;
+ }
+
+ c = message_get_last_container(m);
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ align = bus_gvariant_get_alignment(CHAR_TO_STR(type));
+ if (align < 0)
+ return align;
+
+ sz = c->end - c->begin;
+ } else {
+ align = bus_type_get_alignment(type);
+ if (align < 0)
+ return align;
+
+ sz = BUS_MESSAGE_BSWAP32(m, *c->array_size);
+ }
+
+ if (sz == 0)
+ /* Zero length array, let's return some aligned
+ * pointer that is not NULL */
+ p = (uint8_t*) align;
+ else {
+ r = message_peek_body(m, &m->rindex, align, sz, &p);
+ if (r < 0)
+ goto fail;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ goto fail;
+
+ *ptr = (const void*) p;
+ *size = sz;
+
+ return 1;
+
+fail:
+ message_quit_container(m);
+ return r;
+}
+
+static int message_peek_fields(
+ sd_bus_message *m,
+ size_t *rindex,
+ size_t align,
+ size_t nbytes,
+ void **ret) {
+
+ assert(m);
+ assert(rindex);
+ assert(align > 0);
+
+ return buffer_peek(BUS_MESSAGE_FIELDS(m), m->fields_size, rindex, align, nbytes, ret);
+}
+
+static int message_peek_field_uint32(
+ sd_bus_message *m,
+ size_t *ri,
+ size_t item_size,
+ uint32_t *ret) {
+
+ int r;
+ void *q;
+
+ assert(m);
+ assert(ri);
+
+ if (BUS_MESSAGE_IS_GVARIANT(m) && item_size != 4)
+ return -EBADMSG;
+
+ /* identical for gvariant and dbus1 */
+
+ r = message_peek_fields(m, ri, 4, 4, &q);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q);
+
+ return 0;
+}
+
+static int message_peek_field_uint64(
+ sd_bus_message *m,
+ size_t *ri,
+ size_t item_size,
+ uint64_t *ret) {
+
+ int r;
+ void *q;
+
+ assert(m);
+ assert(ri);
+
+ if (BUS_MESSAGE_IS_GVARIANT(m) && item_size != 8)
+ return -EBADMSG;
+
+ /* identical for gvariant and dbus1 */
+
+ r = message_peek_fields(m, ri, 8, 8, &q);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = BUS_MESSAGE_BSWAP64(m, *(uint64_t*) q);
+
+ return 0;
+}
+
+static int message_peek_field_string(
+ sd_bus_message *m,
+ bool (*validate)(const char *p),
+ size_t *ri,
+ size_t item_size,
+ const char **ret) {
+
+ uint32_t l;
+ int r;
+ void *q;
+
+ assert(m);
+ assert(ri);
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+
+ if (item_size <= 0)
+ return -EBADMSG;
+
+ r = message_peek_fields(m, ri, 1, item_size, &q);
+ if (r < 0)
+ return r;
+
+ l = item_size - 1;
+ } else {
+ r = message_peek_field_uint32(m, ri, 4, &l);
+ if (r < 0)
+ return r;
+
+ if (l == UINT32_MAX)
+ /* avoid overflow right below */
+ return -EBADMSG;
+
+ r = message_peek_fields(m, ri, 1, l+1, &q);
+ if (r < 0)
+ return r;
+ }
+
+ if (validate) {
+ if (!validate_nul(q, l))
+ return -EBADMSG;
+
+ if (!validate(q))
+ return -EBADMSG;
+ } else {
+ if (!validate_string(q, l))
+ return -EBADMSG;
+ }
+
+ if (ret)
+ *ret = q;
+
+ return 0;
+}
+
+static int message_peek_field_signature(
+ sd_bus_message *m,
+ size_t *ri,
+ size_t item_size,
+ const char **ret) {
+
+ size_t l;
+ int r;
+ void *q;
+
+ assert(m);
+ assert(ri);
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+
+ if (item_size <= 0)
+ return -EBADMSG;
+
+ r = message_peek_fields(m, ri, 1, item_size, &q);
+ if (r < 0)
+ return r;
+
+ l = item_size - 1;
+ } else {
+ r = message_peek_fields(m, ri, 1, 1, &q);
+ if (r < 0)
+ return r;
+
+ l = *(uint8_t*) q;
+ if (l == UINT8_MAX)
+ /* avoid overflow right below */
+ return -EBADMSG;
+
+ r = message_peek_fields(m, ri, 1, l+1, &q);
+ if (r < 0)
+ return r;
+ }
+
+ if (!validate_signature(q, l))
+ return -EBADMSG;
+
+ if (ret)
+ *ret = q;
+
+ return 0;
+}
+
+static int message_skip_fields(
+ sd_bus_message *m,
+ size_t *ri,
+ uint32_t array_size,
+ const char **signature) {
+
+ size_t original_index;
+ int r;
+
+ assert(m);
+ assert(ri);
+ assert(signature);
+ assert(!BUS_MESSAGE_IS_GVARIANT(m));
+
+ original_index = *ri;
+
+ for (;;) {
+ char t;
+ size_t l;
+
+ if (array_size != (uint32_t) -1 &&
+ array_size <= *ri - original_index)
+ return 0;
+
+ t = **signature;
+ if (!t)
+ return 0;
+
+ if (t == SD_BUS_TYPE_STRING) {
+
+ r = message_peek_field_string(m, NULL, ri, 0, NULL);
+ if (r < 0)
+ return r;
+
+ (*signature)++;
+
+ } else if (t == SD_BUS_TYPE_OBJECT_PATH) {
+
+ r = message_peek_field_string(m, object_path_is_valid, ri, 0, NULL);
+ if (r < 0)
+ return r;
+
+ (*signature)++;
+
+ } else if (t == SD_BUS_TYPE_SIGNATURE) {
+
+ r = message_peek_field_signature(m, ri, 0, NULL);
+ if (r < 0)
+ return r;
+
+ (*signature)++;
+
+ } else if (bus_type_is_basic(t)) {
+ ssize_t align, k;
+
+ align = bus_type_get_alignment(t);
+ k = bus_type_get_size(t);
+ assert(align > 0 && k > 0);
+
+ r = message_peek_fields(m, ri, align, k, NULL);
+ if (r < 0)
+ return r;
+
+ (*signature)++;
+
+ } else if (t == SD_BUS_TYPE_ARRAY) {
+
+ r = signature_element_length(*signature + 1, &l);
+ if (r < 0)
+ return r;
+
+ assert(l >= 1);
+ {
+ char sig[l + 1], *s = sig;
+ uint32_t nas;
+ int alignment;
+
+ strncpy(sig, *signature + 1, l);
+ sig[l] = '\0';
+
+ alignment = bus_type_get_alignment(sig[0]);
+ if (alignment < 0)
+ return alignment;
+
+ r = message_peek_field_uint32(m, ri, 0, &nas);
+ if (r < 0)
+ return r;
+ if (nas > BUS_ARRAY_MAX_SIZE)
+ return -EBADMSG;
+
+ r = message_peek_fields(m, ri, alignment, 0, NULL);
+ if (r < 0)
+ return r;
+
+ r = message_skip_fields(m, ri, nas, (const char**) &s);
+ if (r < 0)
+ return r;
+ }
+
+ (*signature) += 1 + l;
+
+ } else if (t == SD_BUS_TYPE_VARIANT) {
+ const char *s;
+
+ r = message_peek_field_signature(m, ri, 0, &s);
+ if (r < 0)
+ return r;
+
+ r = message_skip_fields(m, ri, (uint32_t) -1, (const char**) &s);
+ if (r < 0)
+ return r;
+
+ (*signature)++;
+
+ } else if (IN_SET(t, SD_BUS_TYPE_STRUCT, SD_BUS_TYPE_DICT_ENTRY)) {
+
+ r = signature_element_length(*signature, &l);
+ if (r < 0)
+ return r;
+
+ assert(l >= 2);
+ {
+ char sig[l + 1], *s = sig;
+ strncpy(sig, *signature + 1, l);
+ sig[l] = '\0';
+
+ r = message_skip_fields(m, ri, (uint32_t) -1, (const char**) &s);
+ if (r < 0)
+ return r;
+ }
+
+ *signature += l;
+ } else
+ return -EBADMSG;
+ }
+}
+
+int bus_message_parse_fields(sd_bus_message *m) {
+ size_t ri;
+ int r;
+ uint32_t unix_fds = 0;
+ bool unix_fds_set = false;
+ void *offsets = NULL;
+ unsigned n_offsets = 0;
+ size_t sz = 0;
+ unsigned i = 0;
+
+ assert(m);
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ char *p;
+
+ /* Read the signature from the end of the body variant first */
+ sz = bus_gvariant_determine_word_size(BUS_MESSAGE_SIZE(m), 0);
+ if (m->footer_accessible < 1 + sz)
+ return -EBADMSG;
+
+ p = (char*) m->footer + m->footer_accessible - (1 + sz);
+ for (;;) {
+ if (p < (char*) m->footer)
+ return -EBADMSG;
+
+ if (*p == 0) {
+ _cleanup_free_ char *k = NULL;
+ size_t l;
+
+ /* We found the beginning of the signature
+ * string, yay! We require the body to be a
+ * structure, so verify it and then strip the
+ * opening/closing brackets. */
+
+ l = (char*) m->footer + m->footer_accessible - p - (1 + sz);
+ if (l < 2 ||
+ p[1] != SD_BUS_TYPE_STRUCT_BEGIN ||
+ p[1 + l - 1] != SD_BUS_TYPE_STRUCT_END)
+ return -EBADMSG;
+
+ k = memdup_suffix0(p + 1 + 1, l - 2);
+ if (!k)
+ return -ENOMEM;
+
+ if (!signature_is_valid(k, true))
+ return -EBADMSG;
+
+ free_and_replace(m->root_container.signature, k);
+ break;
+ }
+
+ p--;
+ }
+
+ /* Calculate the actual user body size, by removing
+ * the trailing variant signature and struct offset
+ * table */
+ m->user_body_size = m->body_size - ((char*) m->footer + m->footer_accessible - p);
+
+ /* Pull out the offset table for the fields array, if any */
+ if (m->fields_size > 0) {
+ sz = bus_gvariant_determine_word_size(m->fields_size, 0);
+ if (sz > 0) {
+ size_t framing;
+ void *q;
+
+ if (m->fields_size < sz)
+ return -EBADMSG;
+
+ ri = m->fields_size - sz;
+ r = message_peek_fields(m, &ri, 1, sz, &q);
+ if (r < 0)
+ return r;
+
+ framing = bus_gvariant_read_word_le(q, sz);
+ if (framing >= m->fields_size - sz)
+ return -EBADMSG;
+ if ((m->fields_size - framing) % sz != 0)
+ return -EBADMSG;
+
+ ri = framing;
+ r = message_peek_fields(m, &ri, 1, m->fields_size - framing, &offsets);
+ if (r < 0)
+ return r;
+
+ n_offsets = (m->fields_size - framing) / sz;
+ }
+ }
+ } else
+ m->user_body_size = m->body_size;
+
+ ri = 0;
+ while (ri < m->fields_size) {
+ _cleanup_free_ char *sig = NULL;
+ const char *signature;
+ uint64_t field_type;
+ size_t item_size = (size_t) -1;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ uint64_t *u64;
+
+ if (i >= n_offsets)
+ break;
+
+ if (i == 0)
+ ri = 0;
+ else
+ ri = ALIGN_TO(bus_gvariant_read_word_le((uint8_t*) offsets + (i-1)*sz, sz), 8);
+
+ r = message_peek_fields(m, &ri, 8, 8, (void**) &u64);
+ if (r < 0)
+ return r;
+
+ field_type = BUS_MESSAGE_BSWAP64(m, *u64);
+ } else {
+ uint8_t *u8;
+
+ r = message_peek_fields(m, &ri, 8, 1, (void**) &u8);
+ if (r < 0)
+ return r;
+
+ field_type = *u8;
+ }
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ size_t where, end;
+ char *b;
+ void *q;
+
+ end = bus_gvariant_read_word_le((uint8_t*) offsets + i*sz, sz);
+
+ if (end < ri)
+ return -EBADMSG;
+
+ where = ri = ALIGN_TO(ri, 8);
+ item_size = end - ri;
+ r = message_peek_fields(m, &where, 1, item_size, &q);
+ if (r < 0)
+ return r;
+
+ b = memrchr(q, 0, item_size);
+ if (!b)
+ return -EBADMSG;
+
+ sig = memdup_suffix0(b+1, item_size - (b+1-(char*) q));
+ if (!sig)
+ return -ENOMEM;
+
+ signature = sig;
+ item_size = b - (char*) q;
+ } else {
+ r = message_peek_field_signature(m, &ri, 0, &signature);
+ if (r < 0)
+ return r;
+ }
+
+ switch (field_type) {
+
+ case _BUS_MESSAGE_HEADER_INVALID:
+ return -EBADMSG;
+
+ case BUS_MESSAGE_HEADER_PATH:
+
+ if (m->path)
+ return -EBADMSG;
+
+ if (!streq(signature, "o"))
+ return -EBADMSG;
+
+ r = message_peek_field_string(m, object_path_is_valid, &ri, item_size, &m->path);
+ break;
+
+ case BUS_MESSAGE_HEADER_INTERFACE:
+
+ if (m->interface)
+ return -EBADMSG;
+
+ if (!streq(signature, "s"))
+ return -EBADMSG;
+
+ r = message_peek_field_string(m, interface_name_is_valid, &ri, item_size, &m->interface);
+ break;
+
+ case BUS_MESSAGE_HEADER_MEMBER:
+
+ if (m->member)
+ return -EBADMSG;
+
+ if (!streq(signature, "s"))
+ return -EBADMSG;
+
+ r = message_peek_field_string(m, member_name_is_valid, &ri, item_size, &m->member);
+ break;
+
+ case BUS_MESSAGE_HEADER_ERROR_NAME:
+
+ if (m->error.name)
+ return -EBADMSG;
+
+ if (!streq(signature, "s"))
+ return -EBADMSG;
+
+ r = message_peek_field_string(m, error_name_is_valid, &ri, item_size, &m->error.name);
+ if (r >= 0)
+ m->error._need_free = -1;
+
+ break;
+
+ case BUS_MESSAGE_HEADER_DESTINATION:
+
+ if (m->destination)
+ return -EBADMSG;
+
+ if (!streq(signature, "s"))
+ return -EBADMSG;
+
+ r = message_peek_field_string(m, service_name_is_valid, &ri, item_size, &m->destination);
+ break;
+
+ case BUS_MESSAGE_HEADER_SENDER:
+
+ if (m->sender)
+ return -EBADMSG;
+
+ if (!streq(signature, "s"))
+ return -EBADMSG;
+
+ r = message_peek_field_string(m, service_name_is_valid, &ri, item_size, &m->sender);
+
+ if (r >= 0 && m->sender[0] == ':' && m->bus->bus_client) {
+ m->creds.unique_name = (char*) m->sender;
+ m->creds.mask |= SD_BUS_CREDS_UNIQUE_NAME & m->bus->creds_mask;
+ }
+
+ break;
+
+ case BUS_MESSAGE_HEADER_SIGNATURE: {
+ const char *s;
+ char *c;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) /* only applies to dbus1 */
+ return -EBADMSG;
+
+ if (m->root_container.signature)
+ return -EBADMSG;
+
+ if (!streq(signature, "g"))
+ return -EBADMSG;
+
+ r = message_peek_field_signature(m, &ri, item_size, &s);
+ if (r < 0)
+ return r;
+
+ c = strdup(s);
+ if (!c)
+ return -ENOMEM;
+
+ free_and_replace(m->root_container.signature, c);
+ break;
+ }
+
+ case BUS_MESSAGE_HEADER_REPLY_SERIAL:
+
+ if (m->reply_cookie != 0)
+ return -EBADMSG;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ /* 64bit on dbus2 */
+
+ if (!streq(signature, "t"))
+ return -EBADMSG;
+
+ r = message_peek_field_uint64(m, &ri, item_size, &m->reply_cookie);
+ if (r < 0)
+ return r;
+ } else {
+ /* 32bit on dbus1 */
+ uint32_t serial;
+
+ if (!streq(signature, "u"))
+ return -EBADMSG;
+
+ r = message_peek_field_uint32(m, &ri, item_size, &serial);
+ if (r < 0)
+ return r;
+
+ m->reply_cookie = serial;
+ }
+
+ if (m->reply_cookie == 0)
+ return -EBADMSG;
+
+ break;
+
+ case BUS_MESSAGE_HEADER_UNIX_FDS:
+ if (unix_fds_set)
+ return -EBADMSG;
+
+ if (!streq(signature, "u"))
+ return -EBADMSG;
+
+ r = message_peek_field_uint32(m, &ri, item_size, &unix_fds);
+ if (r < 0)
+ return -EBADMSG;
+
+ unix_fds_set = true;
+ break;
+
+ default:
+ if (!BUS_MESSAGE_IS_GVARIANT(m))
+ r = message_skip_fields(m, &ri, (uint32_t) -1, (const char **) &signature);
+ }
+
+ if (r < 0)
+ return r;
+
+ i++;
+ }
+
+ if (m->n_fds != unix_fds)
+ return -EBADMSG;
+
+ switch (m->header->type) {
+
+ case SD_BUS_MESSAGE_SIGNAL:
+ if (!m->path || !m->interface || !m->member)
+ return -EBADMSG;
+
+ if (m->reply_cookie != 0)
+ return -EBADMSG;
+
+ break;
+
+ case SD_BUS_MESSAGE_METHOD_CALL:
+
+ if (!m->path || !m->member)
+ return -EBADMSG;
+
+ if (m->reply_cookie != 0)
+ return -EBADMSG;
+
+ break;
+
+ case SD_BUS_MESSAGE_METHOD_RETURN:
+
+ if (m->reply_cookie == 0)
+ return -EBADMSG;
+ break;
+
+ case SD_BUS_MESSAGE_METHOD_ERROR:
+
+ if (m->reply_cookie == 0 || !m->error.name)
+ return -EBADMSG;
+ break;
+ }
+
+ /* Refuse non-local messages that claim they are local */
+ if (streq_ptr(m->path, "/org/freedesktop/DBus/Local"))
+ return -EBADMSG;
+ if (streq_ptr(m->interface, "org.freedesktop.DBus.Local"))
+ return -EBADMSG;
+ if (streq_ptr(m->sender, "org.freedesktop.DBus.Local"))
+ return -EBADMSG;
+
+ m->root_container.end = m->user_body_size;
+
+ if (BUS_MESSAGE_IS_GVARIANT(m)) {
+ r = build_struct_offsets(
+ m,
+ m->root_container.signature,
+ m->user_body_size,
+ &m->root_container.item_size,
+ &m->root_container.offsets,
+ &m->root_container.n_offsets);
+ if (r == -EINVAL)
+ return -EBADMSG;
+ if (r < 0)
+ return r;
+ }
+
+ /* Try to read the error message, but if we can't it's a non-issue */
+ if (m->header->type == SD_BUS_MESSAGE_METHOD_ERROR)
+ (void) sd_bus_message_read(m, "s", &m->error.message);
+
+ return 0;
+}
+
+_public_ int sd_bus_message_set_destination(sd_bus_message *m, const char *destination) {
+ assert_return(m, -EINVAL);
+ assert_return(destination, -EINVAL);
+ assert_return(service_name_is_valid(destination), -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->destination, -EEXIST);
+
+ return message_append_field_string(m, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, destination, &m->destination);
+}
+
+_public_ int sd_bus_message_set_sender(sd_bus_message *m, const char *sender) {
+ assert_return(m, -EINVAL);
+ assert_return(sender, -EINVAL);
+ assert_return(service_name_is_valid(sender), -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->sender, -EEXIST);
+
+ return message_append_field_string(m, BUS_MESSAGE_HEADER_SENDER, SD_BUS_TYPE_STRING, sender, &m->sender);
+}
+
+int bus_message_get_blob(sd_bus_message *m, void **buffer, size_t *sz) {
+ size_t total;
+ void *p, *e;
+ size_t i;
+ struct bus_body_part *part;
+
+ assert(m);
+ assert(buffer);
+ assert(sz);
+
+ total = BUS_MESSAGE_SIZE(m);
+
+ p = malloc(total);
+ if (!p)
+ return -ENOMEM;
+
+ e = mempcpy(p, m->header, BUS_MESSAGE_BODY_BEGIN(m));
+ MESSAGE_FOREACH_PART(part, i, m)
+ e = mempcpy(e, part->data, part->size);
+
+ assert(total == (size_t) ((uint8_t*) e - (uint8_t*) p));
+
+ *buffer = p;
+ *sz = total;
+
+ return 0;
+}
+
+int bus_message_read_strv_extend(sd_bus_message *m, char ***l) {
+ const char *s;
+ int r;
+
+ assert(m);
+ assert(l);
+
+ r = sd_bus_message_enter_container(m, 'a', "s");
+ if (r <= 0)
+ return r;
+
+ while ((r = sd_bus_message_read_basic(m, 's', &s)) > 0) {
+ r = strv_extend(l, s);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+_public_ int sd_bus_message_read_strv(sd_bus_message *m, char ***l) {
+ _cleanup_strv_free_ char **strv = NULL;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(l, -EINVAL);
+
+ r = bus_message_read_strv_extend(m, &strv);
+ if (r <= 0)
+ return r;
+
+ *l = TAKE_PTR(strv);
+ return 1;
+}
+
+static int bus_message_get_arg_skip(
+ sd_bus_message *m,
+ unsigned i,
+ char *_type,
+ const char **_contents) {
+
+ unsigned j;
+ int r;
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ for (j = 0;; j++) {
+ const char *contents;
+ char type;
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENXIO;
+
+ /* Don't match against arguments after the first one we don't understand */
+ if (!IN_SET(type, SD_BUS_TYPE_STRING, SD_BUS_TYPE_OBJECT_PATH, SD_BUS_TYPE_SIGNATURE) &&
+ !(type == SD_BUS_TYPE_ARRAY && STR_IN_SET(contents, "s", "o", "g")))
+ return -ENXIO;
+
+ if (j >= i) {
+ if (_contents)
+ *_contents = contents;
+ if (_type)
+ *_type = type;
+ return 0;
+ }
+
+ r = sd_bus_message_skip(m, NULL);
+ if (r < 0)
+ return r;
+ }
+
+}
+
+int bus_message_get_arg(sd_bus_message *m, unsigned i, const char **str) {
+ char type;
+ int r;
+
+ assert(m);
+ assert(str);
+
+ r = bus_message_get_arg_skip(m, i, &type, NULL);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(type, SD_BUS_TYPE_STRING, SD_BUS_TYPE_OBJECT_PATH, SD_BUS_TYPE_SIGNATURE))
+ return -ENXIO;
+
+ return sd_bus_message_read_basic(m, type, str);
+}
+
+int bus_message_get_arg_strv(sd_bus_message *m, unsigned i, char ***strv) {
+ const char *contents;
+ char type;
+ int r;
+
+ assert(m);
+ assert(strv);
+
+ r = bus_message_get_arg_skip(m, i, &type, &contents);
+ if (r < 0)
+ return r;
+
+ if (type != SD_BUS_TYPE_ARRAY)
+ return -ENXIO;
+ if (!STR_IN_SET(contents, "s", "o", "g"))
+ return -ENXIO;
+
+ return sd_bus_message_read_strv(m, strv);
+}
+
+_public_ int sd_bus_message_get_errno(sd_bus_message *m) {
+ assert_return(m, EINVAL);
+
+ if (m->header->type != SD_BUS_MESSAGE_METHOD_ERROR)
+ return 0;
+
+ return sd_bus_error_get_errno(&m->error);
+}
+
+_public_ const char* sd_bus_message_get_signature(sd_bus_message *m, int complete) {
+ struct bus_container *c;
+
+ assert_return(m, NULL);
+
+ c = complete ? &m->root_container : message_get_last_container(m);
+ return strempty(c->signature);
+}
+
+_public_ int sd_bus_message_is_empty(sd_bus_message *m) {
+ assert_return(m, -EINVAL);
+
+ return isempty(m->root_container.signature);
+}
+
+_public_ int sd_bus_message_has_signature(sd_bus_message *m, const char *signature) {
+ assert_return(m, -EINVAL);
+
+ return streq(strempty(m->root_container.signature), strempty(signature));
+}
+
+_public_ int sd_bus_message_copy(sd_bus_message *m, sd_bus_message *source, int all) {
+ bool done_something = false;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(source, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(source->sealed, -EPERM);
+
+ do {
+ const char *contents;
+ char type;
+ union {
+ uint8_t u8;
+ uint16_t u16;
+ int16_t s16;
+ uint32_t u32;
+ int32_t s32;
+ uint64_t u64;
+ int64_t s64;
+ double d64;
+ const char *string;
+ int i;
+ } basic;
+
+ r = sd_bus_message_peek_type(source, &type, &contents);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ done_something = true;
+
+ if (bus_type_is_container(type) > 0) {
+
+ r = sd_bus_message_enter_container(source, type, contents);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, type, contents);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_copy(m, source, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(source);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ r = sd_bus_message_read_basic(source, type, &basic);
+ if (r < 0)
+ return r;
+
+ assert(r > 0);
+
+ if (IN_SET(type, SD_BUS_TYPE_OBJECT_PATH, SD_BUS_TYPE_SIGNATURE, SD_BUS_TYPE_STRING))
+ r = sd_bus_message_append_basic(m, type, basic.string);
+ else
+ r = sd_bus_message_append_basic(m, type, &basic);
+
+ if (r < 0)
+ return r;
+
+ } while (all);
+
+ return done_something;
+}
+
+_public_ int sd_bus_message_verify_type(sd_bus_message *m, char type, const char *contents) {
+ const char *c;
+ char t;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(!type || bus_type_is_valid(type), -EINVAL);
+ assert_return(!contents || signature_is_valid(contents, true), -EINVAL);
+ assert_return(type || contents, -EINVAL);
+ assert_return(!contents || !type || bus_type_is_container(type), -EINVAL);
+
+ r = sd_bus_message_peek_type(m, &t, &c);
+ if (r <= 0)
+ return r;
+
+ if (type != 0 && type != t)
+ return 0;
+
+ if (contents && !streq_ptr(contents, c))
+ return 0;
+
+ return 1;
+}
+
+_public_ sd_bus *sd_bus_message_get_bus(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ return m->bus;
+}
+
+int bus_message_remarshal(sd_bus *bus, sd_bus_message **m) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *n = NULL;
+ usec_t timeout;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(*m);
+
+ switch ((*m)->header->type) {
+
+ case SD_BUS_MESSAGE_SIGNAL:
+ r = sd_bus_message_new_signal(bus, &n, (*m)->path, (*m)->interface, (*m)->member);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_BUS_MESSAGE_METHOD_CALL:
+ r = sd_bus_message_new_method_call(bus, &n, (*m)->destination, (*m)->path, (*m)->interface, (*m)->member);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_BUS_MESSAGE_METHOD_RETURN:
+ case SD_BUS_MESSAGE_METHOD_ERROR:
+
+ r = sd_bus_message_new(bus, &n, (*m)->header->type);
+ if (r < 0)
+ return -ENOMEM;
+
+ assert(n);
+
+ n->reply_cookie = (*m)->reply_cookie;
+
+ r = message_append_reply_cookie(n, n->reply_cookie);
+ if (r < 0)
+ return r;
+
+ if ((*m)->header->type == SD_BUS_MESSAGE_METHOD_ERROR && (*m)->error.name) {
+ r = message_append_field_string(n, BUS_MESSAGE_HEADER_ERROR_NAME, SD_BUS_TYPE_STRING, (*m)->error.name, &n->error.message);
+ if (r < 0)
+ return r;
+
+ n->error._need_free = -1;
+ }
+
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ if ((*m)->destination && !n->destination) {
+ r = message_append_field_string(n, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, (*m)->destination, &n->destination);
+ if (r < 0)
+ return r;
+ }
+
+ if ((*m)->sender && !n->sender) {
+ r = message_append_field_string(n, BUS_MESSAGE_HEADER_SENDER, SD_BUS_TYPE_STRING, (*m)->sender, &n->sender);
+ if (r < 0)
+ return r;
+ }
+
+ n->header->flags |= (*m)->header->flags & (BUS_MESSAGE_NO_REPLY_EXPECTED|BUS_MESSAGE_NO_AUTO_START);
+
+ r = sd_bus_message_copy(n, *m, true);
+ if (r < 0)
+ return r;
+
+ timeout = (*m)->timeout;
+ if (timeout == 0 && !((*m)->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)) {
+ r = sd_bus_get_method_call_timeout(bus, &timeout);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_seal(n, BUS_MESSAGE_COOKIE(*m), timeout);
+ if (r < 0)
+ return r;
+
+ sd_bus_message_unref(*m);
+ *m = TAKE_PTR(n);
+
+ return 0;
+}
+
+_public_ int sd_bus_message_get_priority(sd_bus_message *m, int64_t *priority) {
+ static bool warned = false;
+
+ assert_return(m, -EINVAL);
+ assert_return(priority, -EINVAL);
+
+ if (!warned) {
+ log_debug("sd_bus_message_get_priority() is deprecated and always returns 0.");
+ warned = true;
+ }
+
+ *priority = 0;
+ return 0;
+}
+
+_public_ int sd_bus_message_set_priority(sd_bus_message *m, int64_t priority) {
+ static bool warned = false;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ if (!warned) {
+ log_debug("sd_bus_message_set_priority() is deprecated and does nothing.");
+ warned = true;
+ }
+
+ return 0;
+}
+
+_public_ int sd_bus_message_sensitive(sd_bus_message *m) {
+ assert_return(m, -EINVAL);
+
+ m->sensitive = true;
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/bus-message.h b/src/libsystemd/sd-bus/bus-message.h
new file mode 100644
index 0000000..3561737
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-message.h
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <byteswap.h>
+#include <stdbool.h>
+#include <sys/socket.h>
+
+#include "sd-bus.h"
+
+#include "bus-creds.h"
+#include "bus-protocol.h"
+#include "macro.h"
+#include "time-util.h"
+
+struct bus_container {
+ char enclosing;
+ bool need_offsets:1;
+
+ /* Indexes into the signature string */
+ unsigned index, saved_index;
+ char *signature;
+
+ size_t before, begin, end;
+
+ /* dbus1: pointer to the array size value, if this is a value */
+ uint32_t *array_size;
+
+ /* gvariant: list of offsets to end of children if this is struct/dict entry/array */
+ size_t *offsets, n_offsets, offsets_allocated, offset_index;
+ size_t item_size;
+
+ char *peeked_signature;
+};
+
+struct bus_body_part {
+ struct bus_body_part *next;
+ void *data;
+ void *mmap_begin;
+ size_t size;
+ size_t mapped;
+ size_t allocated;
+ uint64_t memfd_offset;
+ int memfd;
+ bool free_this:1;
+ bool munmap_this:1;
+ bool sealed:1;
+ bool is_zero:1;
+};
+
+struct sd_bus_message {
+ /* Caveat: a message can be referenced in two different ways: the main (user-facing) way will also
+ * pin the bus connection object the message is associated with. The secondary way ("queued") is used
+ * when a message is in the read or write queues of the bus connection object, which will not pin the
+ * bus connection object. This is necessary so that we don't have to have a pair of cyclic references
+ * between a message that is queued and its connection: as soon as a message is only referenced by
+ * the connection (by means of being queued) and the connection itself has no other references it
+ * will be freed. */
+
+ unsigned n_ref; /* Counter of references that pin the connection */
+ unsigned n_queued; /* Counter of references that do not pin the connection */
+
+ sd_bus *bus;
+
+ uint64_t reply_cookie;
+
+ const char *path;
+ const char *interface;
+ const char *member;
+ const char *destination;
+ const char *sender;
+
+ sd_bus_error error;
+
+ sd_bus_creds creds;
+
+ usec_t monotonic;
+ usec_t realtime;
+ uint64_t seqnum;
+ uint64_t verify_destination_id;
+
+ bool sealed:1;
+ bool dont_send:1;
+ bool allow_fds:1;
+ bool free_header:1;
+ bool free_fds:1;
+ bool poisoned:1;
+ bool sensitive:1;
+
+ /* The first and last bytes of the message */
+ struct bus_header *header;
+ void *footer;
+
+ /* How many bytes are accessible in the above pointers */
+ size_t header_accessible;
+ size_t footer_accessible;
+
+ size_t fields_size;
+ size_t body_size;
+ size_t user_body_size;
+
+ struct bus_body_part body;
+ struct bus_body_part *body_end;
+ unsigned n_body_parts;
+
+ size_t rindex;
+ struct bus_body_part *cached_rindex_part;
+ size_t cached_rindex_part_begin;
+
+ uint32_t n_fds;
+ int *fds;
+
+ struct bus_container root_container, *containers;
+ size_t n_containers;
+ size_t containers_allocated;
+
+ struct iovec *iovec;
+ struct iovec iovec_fixed[2];
+ unsigned n_iovec;
+
+ char *peeked_signature;
+
+ /* If set replies to this message must carry the signature
+ * specified here to successfully seal. This is initialized
+ * from the vtable data */
+ const char *enforced_reply_signature;
+
+ usec_t timeout;
+
+ size_t header_offsets[_BUS_MESSAGE_HEADER_MAX];
+ unsigned n_header_offsets;
+
+ uint64_t read_counter;
+};
+
+static inline bool BUS_MESSAGE_NEED_BSWAP(sd_bus_message *m) {
+ return m->header->endian != BUS_NATIVE_ENDIAN;
+}
+
+static inline uint16_t BUS_MESSAGE_BSWAP16(sd_bus_message *m, uint16_t u) {
+ return BUS_MESSAGE_NEED_BSWAP(m) ? bswap_16(u) : u;
+}
+
+static inline uint32_t BUS_MESSAGE_BSWAP32(sd_bus_message *m, uint32_t u) {
+ return BUS_MESSAGE_NEED_BSWAP(m) ? bswap_32(u) : u;
+}
+
+static inline uint64_t BUS_MESSAGE_BSWAP64(sd_bus_message *m, uint64_t u) {
+ return BUS_MESSAGE_NEED_BSWAP(m) ? bswap_64(u) : u;
+}
+
+static inline uint64_t BUS_MESSAGE_COOKIE(sd_bus_message *m) {
+ if (m->header->version == 2)
+ return BUS_MESSAGE_BSWAP64(m, m->header->dbus2.cookie);
+
+ return BUS_MESSAGE_BSWAP32(m, m->header->dbus1.serial);
+}
+
+static inline size_t BUS_MESSAGE_SIZE(sd_bus_message *m) {
+ return
+ sizeof(struct bus_header) +
+ ALIGN8(m->fields_size) +
+ m->body_size;
+}
+
+static inline size_t BUS_MESSAGE_BODY_BEGIN(sd_bus_message *m) {
+ return
+ sizeof(struct bus_header) +
+ ALIGN8(m->fields_size);
+}
+
+static inline void* BUS_MESSAGE_FIELDS(sd_bus_message *m) {
+ return (uint8_t*) m->header + sizeof(struct bus_header);
+}
+
+static inline bool BUS_MESSAGE_IS_GVARIANT(sd_bus_message *m) {
+ return m->header->version == 2;
+}
+
+int bus_message_get_blob(sd_bus_message *m, void **buffer, size_t *sz);
+int bus_message_read_strv_extend(sd_bus_message *m, char ***l);
+
+int bus_message_from_header(
+ sd_bus *bus,
+ void *header,
+ size_t header_accessible,
+ void *footer,
+ size_t footer_accessible,
+ size_t message_size,
+ int *fds,
+ size_t n_fds,
+ const char *label,
+ size_t extra,
+ sd_bus_message **ret);
+
+int bus_message_from_malloc(
+ sd_bus *bus,
+ void *buffer,
+ size_t length,
+ int *fds,
+ size_t n_fds,
+ const char *label,
+ sd_bus_message **ret);
+
+int bus_message_get_arg(sd_bus_message *m, unsigned i, const char **str);
+int bus_message_get_arg_strv(sd_bus_message *m, unsigned i, char ***strv);
+
+int bus_message_parse_fields(sd_bus_message *m);
+
+struct bus_body_part *message_append_part(sd_bus_message *m);
+
+#define MESSAGE_FOREACH_PART(part, i, m) \
+ for ((i) = 0, (part) = &(m)->body; (i) < (m)->n_body_parts; (i)++, (part) = (part)->next)
+
+int bus_body_part_map(struct bus_body_part *part);
+void bus_body_part_unmap(struct bus_body_part *part);
+
+int bus_message_to_errno(sd_bus_message *m);
+
+int bus_message_new_synthetic_error(sd_bus *bus, uint64_t serial, const sd_bus_error *e, sd_bus_message **m);
+
+int bus_message_remarshal(sd_bus *bus, sd_bus_message **m);
+
+void bus_message_set_sender_driver(sd_bus *bus, sd_bus_message *m);
+void bus_message_set_sender_local(sd_bus *bus, sd_bus_message *m);
+
+sd_bus_message* bus_message_ref_queued(sd_bus_message *m, sd_bus *bus);
+sd_bus_message* bus_message_unref_queued(sd_bus_message *m, sd_bus *bus);
diff --git a/src/libsystemd/sd-bus/bus-objects.c b/src/libsystemd/sd-bus/bus-objects.c
new file mode 100644
index 0000000..275c431
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-objects.c
@@ -0,0 +1,3031 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-introspect.h"
+#include "bus-message.h"
+#include "bus-objects.h"
+#include "bus-signature.h"
+#include "bus-slot.h"
+#include "bus-type.h"
+#include "bus-util.h"
+#include "missing_capability.h"
+#include "set.h"
+#include "string-util.h"
+#include "strv.h"
+
+static int node_vtable_get_userdata(
+ sd_bus *bus,
+ const char *path,
+ struct node_vtable *c,
+ void **userdata,
+ sd_bus_error *error) {
+
+ sd_bus_slot *s;
+ void *u, *found_u = NULL;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(c);
+
+ s = container_of(c, sd_bus_slot, node_vtable);
+ u = s->userdata;
+ if (c->find) {
+ bus->current_slot = sd_bus_slot_ref(s);
+ bus->current_userdata = u;
+ r = c->find(bus, path, c->interface, u, &found_u, error);
+ bus->current_userdata = NULL;
+ bus->current_slot = sd_bus_slot_unref(s);
+
+ if (r < 0)
+ return r;
+ if (sd_bus_error_is_set(error))
+ return -sd_bus_error_get_errno(error);
+ if (r == 0)
+ return r;
+ } else
+ found_u = u;
+
+ if (userdata)
+ *userdata = found_u;
+
+ return 1;
+}
+
+static void *vtable_method_convert_userdata(const sd_bus_vtable *p, void *u) {
+ assert(p);
+
+ if (!u || FLAGS_SET(p->flags, SD_BUS_VTABLE_ABSOLUTE_OFFSET))
+ return SIZE_TO_PTR(p->x.method.offset); /* don't add offset on NULL, to make ubsan happy */
+
+ return (uint8_t*) u + p->x.method.offset;
+}
+
+static void *vtable_property_convert_userdata(const sd_bus_vtable *p, void *u) {
+ assert(p);
+
+ if (!u || FLAGS_SET(p->flags, SD_BUS_VTABLE_ABSOLUTE_OFFSET))
+ return SIZE_TO_PTR(p->x.property.offset); /* as above */
+
+ return (uint8_t*) u + p->x.property.offset;
+}
+
+static int vtable_property_get_userdata(
+ sd_bus *bus,
+ const char *path,
+ struct vtable_member *p,
+ void **userdata,
+ sd_bus_error *error) {
+
+ void *u;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(p);
+ assert(userdata);
+
+ r = node_vtable_get_userdata(bus, path, p->parent, &u, error);
+ if (r <= 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ *userdata = vtable_property_convert_userdata(p->vtable, u);
+ return 1;
+}
+
+static int add_enumerated_to_set(
+ sd_bus *bus,
+ const char *prefix,
+ struct node_enumerator *first,
+ Set *s,
+ sd_bus_error *error) {
+
+ struct node_enumerator *c;
+ int r;
+
+ assert(bus);
+ assert(prefix);
+ assert(s);
+
+ LIST_FOREACH(enumerators, c, first) {
+ char **children = NULL, **k;
+ sd_bus_slot *slot;
+
+ if (bus->nodes_modified)
+ return 0;
+
+ slot = container_of(c, sd_bus_slot, node_enumerator);
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_userdata = slot->userdata;
+ r = c->callback(bus, prefix, slot->userdata, &children, error);
+ bus->current_userdata = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+
+ if (r < 0)
+ return r;
+ if (sd_bus_error_is_set(error))
+ return -sd_bus_error_get_errno(error);
+
+ STRV_FOREACH(k, children) {
+ if (r < 0) {
+ free(*k);
+ continue;
+ }
+
+ if (!object_path_is_valid(*k)) {
+ free(*k);
+ r = -EINVAL;
+ continue;
+ }
+
+ if (!object_path_startswith(*k, prefix)) {
+ free(*k);
+ continue;
+ }
+
+ r = set_consume(s, *k);
+ if (r == -EEXIST)
+ r = 0;
+ }
+
+ free(children);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+enum {
+ /* if set, add_subtree() works recursively */
+ CHILDREN_RECURSIVE = 1 << 0,
+ /* if set, add_subtree() scans object-manager hierarchies recursively */
+ CHILDREN_SUBHIERARCHIES = 1 << 1,
+};
+
+static int add_subtree_to_set(
+ sd_bus *bus,
+ const char *prefix,
+ struct node *n,
+ unsigned flags,
+ Set *s,
+ sd_bus_error *error) {
+
+ struct node *i;
+ int r;
+
+ assert(bus);
+ assert(prefix);
+ assert(n);
+ assert(s);
+
+ r = add_enumerated_to_set(bus, prefix, n->enumerators, s, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ LIST_FOREACH(siblings, i, n->child) {
+ char *t;
+
+ if (!object_path_startswith(i->path, prefix))
+ continue;
+
+ t = strdup(i->path);
+ if (!t)
+ return -ENOMEM;
+
+ r = set_consume(s, t);
+ if (r < 0 && r != -EEXIST)
+ return r;
+
+ if ((flags & CHILDREN_RECURSIVE) &&
+ ((flags & CHILDREN_SUBHIERARCHIES) || !i->object_managers)) {
+ r = add_subtree_to_set(bus, prefix, i, flags, s, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+static int get_child_nodes(
+ sd_bus *bus,
+ const char *prefix,
+ struct node *n,
+ unsigned flags,
+ Set **_s,
+ sd_bus_error *error) {
+
+ Set *s = NULL;
+ int r;
+
+ assert(bus);
+ assert(prefix);
+ assert(n);
+ assert(_s);
+
+ s = set_new(&string_hash_ops);
+ if (!s)
+ return -ENOMEM;
+
+ r = add_subtree_to_set(bus, prefix, n, flags, s, error);
+ if (r < 0) {
+ set_free_free(s);
+ return r;
+ }
+
+ *_s = s;
+ return 0;
+}
+
+static int node_callbacks_run(
+ sd_bus *bus,
+ sd_bus_message *m,
+ struct node_callback *first,
+ bool require_fallback,
+ bool *found_object) {
+
+ struct node_callback *c;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(found_object);
+
+ LIST_FOREACH(callbacks, c, first) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ sd_bus_slot *slot;
+
+ if (bus->nodes_modified)
+ return 0;
+
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ *found_object = true;
+
+ if (c->last_iteration == bus->iteration_counter)
+ continue;
+
+ c->last_iteration = bus->iteration_counter;
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ slot = container_of(c, sd_bus_slot, node_callback);
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = c->callback;
+ bus->current_userdata = slot->userdata;
+ r = c->callback(m, slot->userdata, &error_buffer);
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+
+ r = bus_maybe_reply_error(m, r, &error_buffer);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+#define CAPABILITY_SHIFT(x) (((x) >> __builtin_ctzll(_SD_BUS_VTABLE_CAPABILITY_MASK)) & 0xFFFF)
+
+static int check_access(sd_bus *bus, sd_bus_message *m, struct vtable_member *c, sd_bus_error *error) {
+ uint64_t cap;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(c);
+
+ /* If the entire bus is trusted let's grant access */
+ if (bus->trusted)
+ return 0;
+
+ /* If the member is marked UNPRIVILEGED let's grant access */
+ if (c->vtable->flags & SD_BUS_VTABLE_UNPRIVILEGED)
+ return 0;
+
+ /* Check have the caller has the requested capability
+ * set. Note that the flags value contains the capability
+ * number plus one, which we need to subtract here. We do this
+ * so that we have 0 as special value for "default
+ * capability". */
+ cap = CAPABILITY_SHIFT(c->vtable->flags);
+ if (cap == 0)
+ cap = CAPABILITY_SHIFT(c->parent->vtable[0].flags);
+ if (cap == 0)
+ cap = CAP_SYS_ADMIN;
+ else
+ cap--;
+
+ r = sd_bus_query_sender_privilege(m, cap);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Access to %s.%s() not permitted.", c->interface, c->member);
+}
+
+static int method_callbacks_run(
+ sd_bus *bus,
+ sd_bus_message *m,
+ struct vtable_member *c,
+ bool require_fallback,
+ bool *found_object) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *signature;
+ void *u;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(c);
+ assert(found_object);
+
+ if (require_fallback && !c->parent->is_fallback)
+ return 0;
+
+ if (FLAGS_SET(c->vtable->flags, SD_BUS_VTABLE_SENSITIVE)) {
+ r = sd_bus_message_sensitive(m);
+ if (r < 0)
+ return r;
+ }
+
+ r = check_access(bus, m, c, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+
+ r = node_vtable_get_userdata(bus, m->path, c->parent, &u, &error);
+ if (r <= 0)
+ return bus_maybe_reply_error(m, r, &error);
+ if (bus->nodes_modified)
+ return 0;
+
+ u = vtable_method_convert_userdata(c->vtable, u);
+
+ *found_object = true;
+
+ if (c->last_iteration == bus->iteration_counter)
+ return 0;
+
+ c->last_iteration = bus->iteration_counter;
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ signature = sd_bus_message_get_signature(m, true);
+ if (!signature)
+ return -EINVAL;
+
+ if (!streq(strempty(c->vtable->x.method.signature), signature))
+ return sd_bus_reply_method_errorf(
+ m,
+ SD_BUS_ERROR_INVALID_ARGS,
+ "Invalid arguments '%s' to call %s.%s(), expecting '%s'.",
+ signature, c->interface, c->member, strempty(c->vtable->x.method.signature));
+
+ /* Keep track what the signature of the reply to this message
+ * should be, so that this can be enforced when sealing the
+ * reply. */
+ m->enforced_reply_signature = strempty(c->vtable->x.method.result);
+
+ if (c->vtable->x.method.handler) {
+ sd_bus_slot *slot;
+
+ slot = container_of(c->parent, sd_bus_slot, node_vtable);
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = c->vtable->x.method.handler;
+ bus->current_userdata = u;
+ r = c->vtable->x.method.handler(m, u, &error);
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+
+ return bus_maybe_reply_error(m, r, &error);
+ }
+
+ /* If the method callback is NULL, make this a successful NOP */
+ r = sd_bus_reply_method_return(m, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int invoke_property_get(
+ sd_bus *bus,
+ sd_bus_slot *slot,
+ const sd_bus_vtable *v,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ const void *p;
+ int r;
+
+ assert(bus);
+ assert(slot);
+ assert(v);
+ assert(path);
+ assert(interface);
+ assert(property);
+ assert(reply);
+
+ if (v->x.property.get) {
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_userdata = userdata;
+ r = v->x.property.get(bus, path, interface, property, reply, userdata, error);
+ bus->current_userdata = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+
+ if (r < 0)
+ return r;
+ if (sd_bus_error_is_set(error))
+ return -sd_bus_error_get_errno(error);
+ return r;
+ }
+
+ /* Automatic handling if no callback is defined. */
+
+ if (streq(v->x.property.signature, "as"))
+ return sd_bus_message_append_strv(reply, *(char***) userdata);
+
+ assert(signature_is_single(v->x.property.signature, false));
+ assert(bus_type_is_basic(v->x.property.signature[0]));
+
+ switch (v->x.property.signature[0]) {
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_SIGNATURE:
+ p = strempty(*(char**) userdata);
+ break;
+
+ case SD_BUS_TYPE_OBJECT_PATH:
+ p = *(char**) userdata;
+ assert(p);
+ break;
+
+ default:
+ p = userdata;
+ break;
+ }
+
+ return sd_bus_message_append_basic(reply, v->x.property.signature[0], p);
+}
+
+static int invoke_property_set(
+ sd_bus *bus,
+ sd_bus_slot *slot,
+ const sd_bus_vtable *v,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *value,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(bus);
+ assert(slot);
+ assert(v);
+ assert(path);
+ assert(interface);
+ assert(property);
+ assert(value);
+
+ if (v->x.property.set) {
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_userdata = userdata;
+ r = v->x.property.set(bus, path, interface, property, value, userdata, error);
+ bus->current_userdata = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+
+ if (r < 0)
+ return r;
+ if (sd_bus_error_is_set(error))
+ return -sd_bus_error_get_errno(error);
+ return r;
+ }
+
+ /* Automatic handling if no callback is defined. */
+
+ assert(signature_is_single(v->x.property.signature, false));
+ assert(bus_type_is_basic(v->x.property.signature[0]));
+
+ switch (v->x.property.signature[0]) {
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE: {
+ const char *p;
+ char *n;
+
+ r = sd_bus_message_read_basic(value, v->x.property.signature[0], &p);
+ if (r < 0)
+ return r;
+
+ n = strdup(p);
+ if (!n)
+ return -ENOMEM;
+
+ free(*(char**) userdata);
+ *(char**) userdata = n;
+
+ break;
+ }
+
+ default:
+ r = sd_bus_message_read_basic(value, v->x.property.signature[0], userdata);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ return 1;
+}
+
+static int property_get_set_callbacks_run(
+ sd_bus *bus,
+ sd_bus_message *m,
+ struct vtable_member *c,
+ bool require_fallback,
+ bool is_get,
+ bool *found_object) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ sd_bus_slot *slot;
+ void *u = NULL;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(c);
+ assert(found_object);
+
+ if (require_fallback && !c->parent->is_fallback)
+ return 0;
+
+ if (FLAGS_SET(c->vtable->flags, SD_BUS_VTABLE_SENSITIVE)) {
+ r = sd_bus_message_sensitive(m);
+ if (r < 0)
+ return r;
+ }
+
+ r = vtable_property_get_userdata(bus, m->path, c, &u, &error);
+ if (r <= 0)
+ return bus_maybe_reply_error(m, r, &error);
+ if (bus->nodes_modified)
+ return 0;
+
+ slot = container_of(c->parent, sd_bus_slot, node_vtable);
+
+ *found_object = true;
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ if (FLAGS_SET(c->vtable->flags, SD_BUS_VTABLE_SENSITIVE)) {
+ r = sd_bus_message_sensitive(reply);
+ if (r < 0)
+ return r;
+ }
+
+ if (is_get) {
+ /* Note that we do not protect against reexecution
+ * here (using the last_iteration check, see below),
+ * should the node tree have changed and we got called
+ * again. We assume that property Get() calls are
+ * ultimately without side-effects or if they aren't
+ * then at least idempotent. */
+
+ r = sd_bus_message_open_container(reply, 'v', c->vtable->x.property.signature);
+ if (r < 0)
+ return r;
+
+ /* Note that we do not do an access check here. Read
+ * access to properties is always unrestricted, since
+ * PropertiesChanged signals broadcast contents
+ * anyway. */
+
+ r = invoke_property_get(bus, slot, c->vtable, m->path, c->interface, c->member, reply, u, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+
+ if (bus->nodes_modified)
+ return 0;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ } else {
+ const char *signature = NULL;
+ char type = 0;
+
+ if (c->vtable->type != _SD_BUS_VTABLE_WRITABLE_PROPERTY)
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_PROPERTY_READ_ONLY, "Property '%s' is not writable.", c->member);
+
+ /* Avoid that we call the set routine more than once
+ * if the processing of this message got restarted
+ * because the node tree changed. */
+ if (c->last_iteration == bus->iteration_counter)
+ return 0;
+
+ c->last_iteration = bus->iteration_counter;
+
+ r = sd_bus_message_peek_type(m, &type, &signature);
+ if (r < 0)
+ return r;
+
+ if (type != 'v')
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_SIGNATURE,
+ "Incorrect signature when setting property '%s', expected 'v', got '%c'.",
+ c->member, type);
+ if (!streq(strempty(signature), strempty(c->vtable->x.property.signature)))
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS,
+ "Incorrect parameters for property '%s', expected '%s', got '%s'.",
+ c->member, strempty(c->vtable->x.property.signature), strempty(signature));
+
+ r = sd_bus_message_enter_container(m, 'v', c->vtable->x.property.signature);
+ if (r < 0)
+ return r;
+
+ r = check_access(bus, m, c, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+
+ r = invoke_property_set(bus, slot, c->vtable, m->path, c->interface, c->member, m, u, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+
+ if (bus->nodes_modified)
+ return 0;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_send(bus, reply, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int vtable_append_one_property(
+ sd_bus *bus,
+ sd_bus_message *reply,
+ const char *path,
+ struct node_vtable *c,
+ const sd_bus_vtable *v,
+ void *userdata,
+ sd_bus_error *error) {
+
+ sd_bus_slot *slot;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(path);
+ assert(c);
+ assert(v);
+
+ if (FLAGS_SET(c->vtable->flags, SD_BUS_VTABLE_SENSITIVE)) {
+ r = sd_bus_message_sensitive(reply);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_open_container(reply, 'e', "sv");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", v->x.property.member);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'v', v->x.property.signature);
+ if (r < 0)
+ return r;
+
+ slot = container_of(c, sd_bus_slot, node_vtable);
+
+ r = invoke_property_get(bus, slot, v, path, c->interface, v->x.property.member, reply, vtable_property_convert_userdata(v, userdata), error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int vtable_append_all_properties(
+ sd_bus *bus,
+ sd_bus_message *reply,
+ const char *path,
+ struct node_vtable *c,
+ void *userdata,
+ sd_bus_error *error) {
+
+ const sd_bus_vtable *v;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(path);
+ assert(c);
+
+ if (c->vtable[0].flags & SD_BUS_VTABLE_HIDDEN)
+ return 1;
+
+ v = c->vtable;
+ for (v = bus_vtable_next(c->vtable, v); v->type != _SD_BUS_VTABLE_END; v = bus_vtable_next(c->vtable, v)) {
+ if (!IN_SET(v->type, _SD_BUS_VTABLE_PROPERTY, _SD_BUS_VTABLE_WRITABLE_PROPERTY))
+ continue;
+
+ if (v->flags & SD_BUS_VTABLE_HIDDEN)
+ continue;
+
+ /* Let's not include properties marked as "explicit" in any message that contains a generic
+ * dump of properties, but only in those generated as a response to an explicit request. */
+ if (v->flags & SD_BUS_VTABLE_PROPERTY_EXPLICIT)
+ continue;
+
+ /* Let's not include properties marked only for invalidation on change (i.e. in contrast to
+ * those whose new values are included in PropertiesChanges message) in any signals. This is
+ * useful to ensure they aren't included in InterfacesAdded messages. */
+ if (reply->header->type != SD_BUS_MESSAGE_METHOD_RETURN &&
+ FLAGS_SET(v->flags, SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION))
+ continue;
+
+ r = vtable_append_one_property(bus, reply, path, c, v, userdata, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ return 1;
+}
+
+static int property_get_all_callbacks_run(
+ sd_bus *bus,
+ sd_bus_message *m,
+ struct node_vtable *first,
+ bool require_fallback,
+ const char *iface,
+ bool *found_object) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ struct node_vtable *c;
+ bool found_interface;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(found_object);
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "{sv}");
+ if (r < 0)
+ return r;
+
+ found_interface = !iface || STR_IN_SET(iface,
+ "org.freedesktop.DBus.Properties",
+ "org.freedesktop.DBus.Peer",
+ "org.freedesktop.DBus.Introspectable");
+
+ LIST_FOREACH(vtables, c, first) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ void *u;
+
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ r = node_vtable_get_userdata(bus, m->path, c, &u, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ *found_object = true;
+
+ if (iface && !streq(c->interface, iface))
+ continue;
+ found_interface = true;
+
+ r = vtable_append_all_properties(bus, reply, m->path, c, u, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ if (!*found_object)
+ return 0;
+
+ if (!found_interface) {
+ r = sd_bus_reply_method_errorf(
+ m,
+ SD_BUS_ERROR_UNKNOWN_INTERFACE,
+ "Unknown interface '%s'.", iface);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_send(bus, reply, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int bus_node_exists(
+ sd_bus *bus,
+ struct node *n,
+ const char *path,
+ bool require_fallback) {
+
+ struct node_vtable *c;
+ struct node_callback *k;
+ int r;
+
+ assert(bus);
+ assert(n);
+ assert(path);
+
+ /* Tests if there's anything attached directly to this node
+ * for the specified path */
+
+ if (!require_fallback && (n->enumerators || n->object_managers))
+ return true;
+
+ LIST_FOREACH(callbacks, k, n->callbacks) {
+ if (require_fallback && !k->is_fallback)
+ continue;
+
+ return 1;
+ }
+
+ LIST_FOREACH(vtables, c, n->vtables) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, NULL, &error);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ return 0;
+}
+
+int introspect_path(
+ sd_bus *bus,
+ const char *path,
+ struct node *n,
+ bool require_fallback,
+ bool ignore_nodes_modified,
+ bool *found_object,
+ char **ret,
+ sd_bus_error *error) {
+
+ _cleanup_set_free_free_ Set *s = NULL;
+ _cleanup_(introspect_free) struct introspect intro = {};
+ struct node_vtable *c;
+ bool empty;
+ int r;
+
+ if (!n) {
+ n = hashmap_get(bus->nodes, path);
+ if (!n)
+ return -ENOENT;
+ }
+
+ r = get_child_nodes(bus, path, n, 0, &s, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified && !ignore_nodes_modified)
+ return 0;
+
+ r = introspect_begin(&intro, bus->trusted);
+ if (r < 0)
+ return r;
+
+ r = introspect_write_default_interfaces(&intro, !require_fallback && n->object_managers);
+ if (r < 0)
+ return r;
+
+ empty = set_isempty(s);
+
+ LIST_FOREACH(vtables, c, n->vtables) {
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, NULL, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified && !ignore_nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ empty = false;
+
+ if (c->vtable[0].flags & SD_BUS_VTABLE_HIDDEN)
+ continue;
+
+ r = introspect_write_interface(&intro, c->interface, c->vtable);
+ if (r < 0)
+ return r;
+ }
+
+ if (empty) {
+ /* Nothing?, let's see if we exist at all, and if not
+ * refuse to do anything */
+ r = bus_node_exists(bus, n, path, require_fallback);
+ if (r <= 0)
+ return r;
+ if (bus->nodes_modified && !ignore_nodes_modified)
+ return 0;
+ }
+
+ if (found_object)
+ *found_object = true;
+
+ r = introspect_write_child_nodes(&intro, s, path);
+ if (r < 0)
+ return r;
+
+ r = introspect_finish(&intro, ret);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int process_introspect(
+ sd_bus *bus,
+ sd_bus_message *m,
+ struct node *n,
+ bool require_fallback,
+ bool *found_object) {
+
+ _cleanup_free_ char *s = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(n);
+ assert(found_object);
+
+ r = introspect_path(bus, m->path, n, require_fallback, false, found_object, &s, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+ if (r == 0)
+ /* nodes_modified == true */
+ return 0;
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", s);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_send(bus, reply, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int object_manager_serialize_path(
+ sd_bus *bus,
+ sd_bus_message *reply,
+ const char *prefix,
+ const char *path,
+ bool require_fallback,
+ sd_bus_error *error) {
+
+ const char *previous_interface = NULL;
+ bool found_something = false;
+ struct node_vtable *i;
+ struct node *n;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(prefix);
+ assert(path);
+ assert(error);
+
+ n = hashmap_get(bus->nodes, prefix);
+ if (!n)
+ return 0;
+
+ LIST_FOREACH(vtables, i, n->vtables) {
+ void *u;
+
+ if (require_fallback && !i->is_fallback)
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, i, &u, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ if (!found_something) {
+
+ /* Open the object part */
+
+ r = sd_bus_message_open_container(reply, 'e', "oa{sa{sv}}");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "o", path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "{sa{sv}}");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "{sa{sv}}", "org.freedesktop.DBus.Peer", 0);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "{sa{sv}}", "org.freedesktop.DBus.Introspectable", 0);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "{sa{sv}}", "org.freedesktop.DBus.Properties", 0);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "{sa{sv}}", "org.freedesktop.DBus.ObjectManager", 0);
+ if (r < 0)
+ return r;
+
+ found_something = true;
+ }
+
+ if (!streq_ptr(previous_interface, i->interface)) {
+
+ /* Maybe close the previous interface part */
+
+ if (previous_interface) {
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ /* Open the new interface part */
+
+ r = sd_bus_message_open_container(reply, 'e', "sa{sv}");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", i->interface);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "{sv}");
+ if (r < 0)
+ return r;
+ }
+
+ r = vtable_append_all_properties(bus, reply, path, i, u, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ previous_interface = i->interface;
+ }
+
+ if (previous_interface) {
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ if (found_something) {
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+static int object_manager_serialize_path_and_fallbacks(
+ sd_bus *bus,
+ sd_bus_message *reply,
+ const char *path,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *prefix = NULL;
+ size_t pl;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(path);
+ assert(error);
+
+ /* First, add all vtables registered for this path */
+ r = object_manager_serialize_path(bus, reply, path, path, false, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ /* Second, add fallback vtables registered for any of the prefixes */
+ pl = strlen(path);
+ assert(pl <= BUS_PATH_SIZE_MAX);
+ prefix = new(char, pl + 1);
+ if (!prefix)
+ return -ENOMEM;
+
+ OBJECT_PATH_FOREACH_PREFIX(prefix, path) {
+ r = object_manager_serialize_path(bus, reply, prefix, path, true, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ return 0;
+}
+
+static int process_get_managed_objects(
+ sd_bus *bus,
+ sd_bus_message *m,
+ struct node *n,
+ bool require_fallback,
+ bool *found_object) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_set_free_free_ Set *s = NULL;
+ char *path;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(n);
+ assert(found_object);
+
+ /* Spec says, GetManagedObjects() is only implemented on the root of a
+ * sub-tree. Therefore, we require a registered object-manager on
+ * exactly the queried path, otherwise, we refuse to respond. */
+
+ if (require_fallback || !n->object_managers)
+ return 0;
+
+ r = get_child_nodes(bus, m->path, n, CHILDREN_RECURSIVE, &s, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+ if (bus->nodes_modified)
+ return 0;
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "{oa{sa{sv}}}");
+ if (r < 0)
+ return r;
+
+ SET_FOREACH(path, s) {
+ r = object_manager_serialize_path_and_fallbacks(bus, reply, path, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_send(bus, reply, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int object_find_and_run(
+ sd_bus *bus,
+ sd_bus_message *m,
+ const char *p,
+ bool require_fallback,
+ bool *found_object) {
+
+ struct node *n;
+ struct vtable_member vtable_key, *v;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(p);
+ assert(found_object);
+
+ n = hashmap_get(bus->nodes, p);
+ if (!n)
+ return 0;
+
+ /* First, try object callbacks */
+ r = node_callbacks_run(bus, m, n->callbacks, require_fallback, found_object);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ if (!m->interface || !m->member)
+ return 0;
+
+ /* Then, look for a known method */
+ vtable_key.path = (char*) p;
+ vtable_key.interface = m->interface;
+ vtable_key.member = m->member;
+
+ v = hashmap_get(bus->vtable_methods, &vtable_key);
+ if (v) {
+ r = method_callbacks_run(bus, m, v, require_fallback, found_object);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ /* Then, look for a known property */
+ if (streq(m->interface, "org.freedesktop.DBus.Properties")) {
+ bool get = false;
+
+ get = streq(m->member, "Get");
+
+ if (get || streq(m->member, "Set")) {
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ vtable_key.path = (char*) p;
+
+ r = sd_bus_message_read(m, "ss", &vtable_key.interface, &vtable_key.member);
+ if (r < 0)
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS, "Expected interface and member parameters");
+
+ v = hashmap_get(bus->vtable_properties, &vtable_key);
+ if (v) {
+ r = property_get_set_callbacks_run(bus, m, v, require_fallback, get, found_object);
+ if (r != 0)
+ return r;
+ }
+
+ } else if (streq(m->member, "GetAll")) {
+ const char *iface;
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(m, "s", &iface);
+ if (r < 0)
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS, "Expected interface parameter");
+
+ if (iface[0] == 0)
+ iface = NULL;
+
+ r = property_get_all_callbacks_run(bus, m, n->vtables, require_fallback, iface, found_object);
+ if (r != 0)
+ return r;
+ }
+
+ } else if (sd_bus_message_is_method_call(m, "org.freedesktop.DBus.Introspectable", "Introspect")) {
+
+ if (!isempty(sd_bus_message_get_signature(m, true)))
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS, "Expected no parameters");
+
+ r = process_introspect(bus, m, n, require_fallback, found_object);
+ if (r != 0)
+ return r;
+
+ } else if (sd_bus_message_is_method_call(m, "org.freedesktop.DBus.ObjectManager", "GetManagedObjects")) {
+
+ if (!isempty(sd_bus_message_get_signature(m, true)))
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS, "Expected no parameters");
+
+ r = process_get_managed_objects(bus, m, n, require_fallback, found_object);
+ if (r != 0)
+ return r;
+ }
+
+ if (bus->nodes_modified)
+ return 0;
+
+ if (!*found_object) {
+ r = bus_node_exists(bus, n, m->path, require_fallback);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, NULL);
+ if (bus->nodes_modified)
+ return 0;
+ if (r > 0)
+ *found_object = true;
+ }
+
+ return 0;
+}
+
+int bus_process_object(sd_bus *bus, sd_bus_message *m) {
+ _cleanup_free_ char *prefix = NULL;
+ int r;
+ size_t pl;
+ bool found_object = false;
+
+ assert(bus);
+ assert(m);
+
+ if (bus->is_monitor)
+ return 0;
+
+ if (m->header->type != SD_BUS_MESSAGE_METHOD_CALL)
+ return 0;
+
+ if (hashmap_isempty(bus->nodes))
+ return 0;
+
+ /* Never respond to broadcast messages */
+ if (bus->bus_client && !m->destination)
+ return 0;
+
+ assert(m->path);
+ assert(m->member);
+
+ pl = strlen(m->path);
+ assert(pl <= BUS_PATH_SIZE_MAX);
+ prefix = new(char, pl + 1);
+ if (!prefix)
+ return -ENOMEM;
+
+ do {
+ bus->nodes_modified = false;
+
+ r = object_find_and_run(bus, m, m->path, false, &found_object);
+ if (r != 0)
+ return r;
+
+ /* Look for fallback prefixes */
+ OBJECT_PATH_FOREACH_PREFIX(prefix, m->path) {
+
+ if (bus->nodes_modified)
+ break;
+
+ r = object_find_and_run(bus, m, prefix, true, &found_object);
+ if (r != 0)
+ return r;
+ }
+
+ } while (bus->nodes_modified);
+
+ if (!found_object)
+ return 0;
+
+ if (sd_bus_message_is_method_call(m, "org.freedesktop.DBus.Properties", "Get") ||
+ sd_bus_message_is_method_call(m, "org.freedesktop.DBus.Properties", "Set")) {
+ const char *interface = NULL, *property = NULL;
+
+ (void) sd_bus_message_rewind(m, true);
+ (void) sd_bus_message_read_basic(m, 's', &interface);
+ (void) sd_bus_message_read_basic(m, 's', &property);
+
+ r = sd_bus_reply_method_errorf(
+ m,
+ SD_BUS_ERROR_UNKNOWN_PROPERTY,
+ "Unknown interface %s or property %s.", strnull(interface), strnull(property));
+ } else
+ r = sd_bus_reply_method_errorf(
+ m,
+ SD_BUS_ERROR_UNKNOWN_METHOD,
+ "Unknown method %s or interface %s.", m->member, m->interface);
+
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static struct node *bus_node_allocate(sd_bus *bus, const char *path) {
+ struct node *n, *parent;
+ const char *e;
+ _cleanup_free_ char *s = NULL;
+ char *p;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(path[0] == '/');
+
+ n = hashmap_get(bus->nodes, path);
+ if (n)
+ return n;
+
+ r = hashmap_ensure_allocated(&bus->nodes, &string_hash_ops);
+ if (r < 0)
+ return NULL;
+
+ s = strdup(path);
+ if (!s)
+ return NULL;
+
+ if (streq(path, "/"))
+ parent = NULL;
+ else {
+ e = strrchr(path, '/');
+ assert(e);
+
+ p = strndupa(path, MAX(1, e - path));
+
+ parent = bus_node_allocate(bus, p);
+ if (!parent)
+ return NULL;
+ }
+
+ n = new0(struct node, 1);
+ if (!n)
+ return NULL;
+
+ n->parent = parent;
+ n->path = TAKE_PTR(s);
+
+ r = hashmap_put(bus->nodes, n->path, n);
+ if (r < 0) {
+ free(n->path);
+ return mfree(n);
+ }
+
+ if (parent)
+ LIST_PREPEND(siblings, parent->child, n);
+
+ return n;
+}
+
+void bus_node_gc(sd_bus *b, struct node *n) {
+ assert(b);
+
+ if (!n)
+ return;
+
+ if (n->child ||
+ n->callbacks ||
+ n->vtables ||
+ n->enumerators ||
+ n->object_managers)
+ return;
+
+ assert_se(hashmap_remove(b->nodes, n->path) == n);
+
+ if (n->parent)
+ LIST_REMOVE(siblings, n->parent->child, n);
+
+ free(n->path);
+ bus_node_gc(b, n->parent);
+ free(n);
+}
+
+static int bus_find_parent_object_manager(sd_bus *bus, struct node **out, const char *path) {
+ struct node *n;
+
+ assert(bus);
+ assert(path);
+
+ n = hashmap_get(bus->nodes, path);
+ if (!n) {
+ _cleanup_free_ char *prefix = NULL;
+ size_t pl;
+
+ pl = strlen(path);
+ assert(pl <= BUS_PATH_SIZE_MAX);
+ prefix = new(char, pl + 1);
+ if (!prefix)
+ return -ENOMEM;
+
+ OBJECT_PATH_FOREACH_PREFIX(prefix, path) {
+ n = hashmap_get(bus->nodes, prefix);
+ if (n)
+ break;
+ }
+ }
+
+ while (n && !n->object_managers)
+ n = n->parent;
+
+ if (out)
+ *out = n;
+ return !!n;
+}
+
+static int bus_add_object(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ bool fallback,
+ const char *path,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ sd_bus_slot *s;
+ struct node *n;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ n = bus_node_allocate(bus, path);
+ if (!n)
+ return -ENOMEM;
+
+ s = bus_slot_allocate(bus, !slot, BUS_NODE_CALLBACK, sizeof(struct node_callback), userdata);
+ if (!s) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ s->node_callback.callback = callback;
+ s->node_callback.is_fallback = fallback;
+
+ s->node_callback.node = n;
+ LIST_PREPEND(callbacks, n->callbacks, &s->node_callback);
+ bus->nodes_modified = true;
+
+ if (slot)
+ *slot = s;
+
+ return 0;
+
+fail:
+ sd_bus_slot_unref(s);
+ bus_node_gc(bus, n);
+
+ return r;
+}
+
+_public_ int sd_bus_add_object(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *path,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ return bus_add_object(bus, slot, false, path, callback, userdata);
+}
+
+_public_ int sd_bus_add_fallback(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *prefix,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ return bus_add_object(bus, slot, true, prefix, callback, userdata);
+}
+
+static void vtable_member_hash_func(const struct vtable_member *m, struct siphash *state) {
+ assert(m);
+
+ string_hash_func(m->path, state);
+ string_hash_func(m->interface, state);
+ string_hash_func(m->member, state);
+}
+
+static int vtable_member_compare_func(const struct vtable_member *x, const struct vtable_member *y) {
+ int r;
+
+ assert(x);
+ assert(y);
+
+ r = strcmp(x->path, y->path);
+ if (r != 0)
+ return r;
+
+ r = strcmp(x->interface, y->interface);
+ if (r != 0)
+ return r;
+
+ return strcmp(x->member, y->member);
+}
+
+DEFINE_PRIVATE_HASH_OPS(vtable_member_hash_ops, struct vtable_member, vtable_member_hash_func, vtable_member_compare_func);
+
+typedef enum {
+ NAMES_FIRST_PART = 1 << 0, /* first part of argument name list (input names). It is reset by names_are_valid() */
+ NAMES_PRESENT = 1 << 1, /* at least one argument name is present, so the names will checked.
+ This flag is set and used internally by names_are_valid(), but needs to be stored across calls for 2-parts list */
+ NAMES_SINGLE_PART = 1 << 2, /* argument name list consisting of a single part */
+} names_flags;
+
+static bool names_are_valid(const char *signature, const char **names, names_flags *flags) {
+ int r;
+
+ if ((*flags & NAMES_FIRST_PART || *flags & NAMES_SINGLE_PART) && **names != '\0')
+ *flags |= NAMES_PRESENT;
+
+ for (;*flags & NAMES_PRESENT;) {
+ size_t l;
+
+ if (!*signature)
+ break;
+
+ r = signature_element_length(signature, &l);
+ if (r < 0)
+ return false;
+
+ if (**names != '\0') {
+ if (!member_name_is_valid(*names))
+ return false;
+ *names += strlen(*names) + 1;
+ } else if (*flags & NAMES_PRESENT)
+ return false;
+
+ signature += l;
+ }
+ /* let's check if there are more argument names specified than the signature allows */
+ if (*flags & NAMES_PRESENT && **names != '\0' && !(*flags & NAMES_FIRST_PART))
+ return false;
+ *flags &= ~NAMES_FIRST_PART;
+ return true;
+}
+
+/* the current version of this struct is defined in sd-bus-vtable.h, but we need to list here the historical versions
+ to make sure the calling code is compatible with one of these */
+struct sd_bus_vtable_221 {
+ uint8_t type:8;
+ uint64_t flags:56;
+ union {
+ struct {
+ size_t element_size;
+ } start;
+ struct {
+ const char *member;
+ const char *signature;
+ const char *result;
+ sd_bus_message_handler_t handler;
+ size_t offset;
+ } method;
+ struct {
+ const char *member;
+ const char *signature;
+ } signal;
+ struct {
+ const char *member;
+ const char *signature;
+ sd_bus_property_get_t get;
+ sd_bus_property_set_t set;
+ size_t offset;
+ } property;
+ } x;
+};
+/* Structure size up to v241 */
+#define VTABLE_ELEMENT_SIZE_221 sizeof(struct sd_bus_vtable_221)
+
+/* Size of the structure when "features" field was added. If the structure definition is augmented, a copy of
+ * the structure definition will need to be made (similarly to the sd_bus_vtable_221 above), and this
+ * definition updated to refer to it. */
+#define VTABLE_ELEMENT_SIZE_242 sizeof(struct sd_bus_vtable)
+
+static int vtable_features(const sd_bus_vtable *vtable) {
+ if (vtable[0].x.start.element_size < VTABLE_ELEMENT_SIZE_242 ||
+ !vtable[0].x.start.vtable_format_reference)
+ return 0;
+ return vtable[0].x.start.features;
+}
+
+bool bus_vtable_has_names(const sd_bus_vtable *vtable) {
+ return vtable_features(vtable) & _SD_BUS_VTABLE_PARAM_NAMES;
+}
+
+const sd_bus_vtable* bus_vtable_next(const sd_bus_vtable *vtable, const sd_bus_vtable *v) {
+ return (const sd_bus_vtable*) ((char*) v + vtable[0].x.start.element_size);
+}
+
+static int add_object_vtable_internal(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *path,
+ const char *interface,
+ const sd_bus_vtable *vtable,
+ bool fallback,
+ sd_bus_object_find_t find,
+ void *userdata) {
+
+ sd_bus_slot *s = NULL;
+ struct node_vtable *i, *existing = NULL;
+ const sd_bus_vtable *v;
+ struct node *n;
+ int r;
+ const char *names = "";
+ names_flags nf;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(interface_name_is_valid(interface), -EINVAL);
+ assert_return(vtable, -EINVAL);
+ assert_return(vtable[0].type == _SD_BUS_VTABLE_START, -EINVAL);
+ assert_return(vtable[0].x.start.element_size == VTABLE_ELEMENT_SIZE_221 ||
+ vtable[0].x.start.element_size >= VTABLE_ELEMENT_SIZE_242,
+ -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+ assert_return(!streq(interface, "org.freedesktop.DBus.Properties") &&
+ !streq(interface, "org.freedesktop.DBus.Introspectable") &&
+ !streq(interface, "org.freedesktop.DBus.Peer") &&
+ !streq(interface, "org.freedesktop.DBus.ObjectManager"), -EINVAL);
+
+ r = hashmap_ensure_allocated(&bus->vtable_methods, &vtable_member_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&bus->vtable_properties, &vtable_member_hash_ops);
+ if (r < 0)
+ return r;
+
+ n = bus_node_allocate(bus, path);
+ if (!n)
+ return -ENOMEM;
+
+ LIST_FOREACH(vtables, i, n->vtables) {
+ if (i->is_fallback != fallback) {
+ r = -EPROTOTYPE;
+ goto fail;
+ }
+
+ if (streq(i->interface, interface)) {
+
+ if (i->vtable == vtable) {
+ r = -EEXIST;
+ goto fail;
+ }
+
+ existing = i;
+ }
+ }
+
+ s = bus_slot_allocate(bus, !slot, BUS_NODE_VTABLE, sizeof(struct node_vtable), userdata);
+ if (!s) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ s->node_vtable.is_fallback = fallback;
+ s->node_vtable.vtable = vtable;
+ s->node_vtable.find = find;
+
+ s->node_vtable.interface = strdup(interface);
+ if (!s->node_vtable.interface) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ v = s->node_vtable.vtable;
+ for (v = bus_vtable_next(vtable, v); v->type != _SD_BUS_VTABLE_END; v = bus_vtable_next(vtable, v)) {
+
+ switch (v->type) {
+
+ case _SD_BUS_VTABLE_METHOD: {
+ struct vtable_member *m;
+ nf = NAMES_FIRST_PART;
+
+ if (bus_vtable_has_names(vtable))
+ names = strempty(v->x.method.names);
+
+ if (!member_name_is_valid(v->x.method.member) ||
+ !signature_is_valid(strempty(v->x.method.signature), false) ||
+ !signature_is_valid(strempty(v->x.method.result), false) ||
+ !names_are_valid(strempty(v->x.method.signature), &names, &nf) ||
+ !names_are_valid(strempty(v->x.method.result), &names, &nf) ||
+ !(v->x.method.handler || (isempty(v->x.method.signature) && isempty(v->x.method.result))) ||
+ v->flags & (SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE|SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION)) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ m = new0(struct vtable_member, 1);
+ if (!m) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ m->parent = &s->node_vtable;
+ m->path = n->path;
+ m->interface = s->node_vtable.interface;
+ m->member = v->x.method.member;
+ m->vtable = v;
+
+ r = hashmap_put(bus->vtable_methods, m, m);
+ if (r < 0) {
+ free(m);
+ goto fail;
+ }
+
+ break;
+ }
+
+ case _SD_BUS_VTABLE_WRITABLE_PROPERTY:
+
+ if (!(v->x.property.set || bus_type_is_basic(v->x.property.signature[0]))) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ if (v->flags & SD_BUS_VTABLE_PROPERTY_CONST) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ _fallthrough_;
+ case _SD_BUS_VTABLE_PROPERTY: {
+ struct vtable_member *m;
+
+ if (!member_name_is_valid(v->x.property.member) ||
+ !signature_is_single(v->x.property.signature, false) ||
+ !(v->x.property.get || bus_type_is_basic(v->x.property.signature[0]) || streq(v->x.property.signature, "as")) ||
+ (v->flags & SD_BUS_VTABLE_METHOD_NO_REPLY) ||
+ (!!(v->flags & SD_BUS_VTABLE_PROPERTY_CONST) + !!(v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE) + !!(v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION)) > 1 ||
+ ((v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE) && (v->flags & SD_BUS_VTABLE_PROPERTY_EXPLICIT)) ||
+ (v->flags & SD_BUS_VTABLE_UNPRIVILEGED && v->type == _SD_BUS_VTABLE_PROPERTY)) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ m = new0(struct vtable_member, 1);
+ if (!m) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ m->parent = &s->node_vtable;
+ m->path = n->path;
+ m->interface = s->node_vtable.interface;
+ m->member = v->x.property.member;
+ m->vtable = v;
+
+ r = hashmap_put(bus->vtable_properties, m, m);
+ if (r < 0) {
+ free(m);
+ goto fail;
+ }
+
+ break;
+ }
+
+ case _SD_BUS_VTABLE_SIGNAL:
+ nf = NAMES_SINGLE_PART;
+
+ if (bus_vtable_has_names(vtable))
+ names = strempty(v->x.signal.names);
+
+ if (!member_name_is_valid(v->x.signal.member) ||
+ !signature_is_valid(strempty(v->x.signal.signature), false) ||
+ !names_are_valid(strempty(v->x.signal.signature), &names, &nf) ||
+ v->flags & SD_BUS_VTABLE_UNPRIVILEGED) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ break;
+
+ default:
+ r = -EINVAL;
+ goto fail;
+ }
+ }
+
+ s->node_vtable.node = n;
+ LIST_INSERT_AFTER(vtables, n->vtables, existing, &s->node_vtable);
+ bus->nodes_modified = true;
+
+ if (slot)
+ *slot = s;
+
+ return 0;
+
+fail:
+ sd_bus_slot_unref(s);
+ bus_node_gc(bus, n);
+
+ return r;
+}
+
+/* This symbol exists solely to tell the linker that the "new" vtable format is used. */
+_public_ const unsigned sd_bus_object_vtable_format = 242;
+
+_public_ int sd_bus_add_object_vtable(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *path,
+ const char *interface,
+ const sd_bus_vtable *vtable,
+ void *userdata) {
+
+ return add_object_vtable_internal(bus, slot, path, interface, vtable, false, NULL, userdata);
+}
+
+_public_ int sd_bus_add_fallback_vtable(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *prefix,
+ const char *interface,
+ const sd_bus_vtable *vtable,
+ sd_bus_object_find_t find,
+ void *userdata) {
+
+ return add_object_vtable_internal(bus, slot, prefix, interface, vtable, true, find, userdata);
+}
+
+_public_ int sd_bus_add_node_enumerator(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *path,
+ sd_bus_node_enumerator_t callback,
+ void *userdata) {
+
+ sd_bus_slot *s;
+ struct node *n;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ n = bus_node_allocate(bus, path);
+ if (!n)
+ return -ENOMEM;
+
+ s = bus_slot_allocate(bus, !slot, BUS_NODE_ENUMERATOR, sizeof(struct node_enumerator), userdata);
+ if (!s) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ s->node_enumerator.callback = callback;
+
+ s->node_enumerator.node = n;
+ LIST_PREPEND(enumerators, n->enumerators, &s->node_enumerator);
+ bus->nodes_modified = true;
+
+ if (slot)
+ *slot = s;
+
+ return 0;
+
+fail:
+ sd_bus_slot_unref(s);
+ bus_node_gc(bus, n);
+
+ return r;
+}
+
+static int emit_properties_changed_on_interface(
+ sd_bus *bus,
+ const char *prefix,
+ const char *path,
+ const char *interface,
+ bool require_fallback,
+ bool *found_interface,
+ char **names) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ bool has_invalidating = false, has_changing = false;
+ struct vtable_member key = {};
+ struct node_vtable *c;
+ struct node *n;
+ char **property;
+ void *u = NULL;
+ int r;
+
+ assert(bus);
+ assert(prefix);
+ assert(path);
+ assert(interface);
+ assert(found_interface);
+
+ n = hashmap_get(bus->nodes, prefix);
+ if (!n)
+ return 0;
+
+ r = sd_bus_message_new_signal(bus, &m, path, "org.freedesktop.DBus.Properties", "PropertiesChanged");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "s", interface);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "{sv}");
+ if (r < 0)
+ return r;
+
+ key.path = prefix;
+ key.interface = interface;
+
+ LIST_FOREACH(vtables, c, n->vtables) {
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ if (!streq(c->interface, interface))
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, &u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ *found_interface = true;
+
+ if (names) {
+ /* If the caller specified a list of
+ * properties we include exactly those in the
+ * PropertiesChanged message */
+
+ STRV_FOREACH(property, names) {
+ struct vtable_member *v;
+
+ assert_return(member_name_is_valid(*property), -EINVAL);
+
+ key.member = *property;
+ v = hashmap_get(bus->vtable_properties, &key);
+ if (!v)
+ return -ENOENT;
+
+ /* If there are two vtables for the same
+ * interface, let's handle this property when
+ * we come to that vtable. */
+ if (c != v->parent)
+ continue;
+
+ assert_return(v->vtable->flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE ||
+ v->vtable->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION, -EDOM);
+
+ assert_return(!(v->vtable->flags & SD_BUS_VTABLE_HIDDEN), -EDOM);
+
+ if (v->vtable->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION) {
+ has_invalidating = true;
+ continue;
+ }
+
+ has_changing = true;
+
+ r = vtable_append_one_property(bus, m, m->path, c, v->vtable, u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+ } else {
+ const sd_bus_vtable *v;
+
+ /* If the caller specified no properties list
+ * we include all properties that are marked
+ * as changing in the message. */
+
+ v = c->vtable;
+ for (v = bus_vtable_next(c->vtable, v); v->type != _SD_BUS_VTABLE_END; v = bus_vtable_next(c->vtable, v)) {
+ if (!IN_SET(v->type, _SD_BUS_VTABLE_PROPERTY, _SD_BUS_VTABLE_WRITABLE_PROPERTY))
+ continue;
+
+ if (v->flags & SD_BUS_VTABLE_HIDDEN)
+ continue;
+
+ if (v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION) {
+ has_invalidating = true;
+ continue;
+ }
+
+ if (!(v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE))
+ continue;
+
+ has_changing = true;
+
+ r = vtable_append_one_property(bus, m, m->path, c, v, u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+ }
+ }
+
+ if (!has_invalidating && !has_changing)
+ return 0;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return r;
+
+ if (has_invalidating) {
+ LIST_FOREACH(vtables, c, n->vtables) {
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ if (!streq(c->interface, interface))
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, &u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ if (names) {
+ STRV_FOREACH(property, names) {
+ struct vtable_member *v;
+
+ key.member = *property;
+ assert_se(v = hashmap_get(bus->vtable_properties, &key));
+ assert(c == v->parent);
+
+ if (!(v->vtable->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION))
+ continue;
+
+ r = sd_bus_message_append(m, "s", *property);
+ if (r < 0)
+ return r;
+ }
+ } else {
+ const sd_bus_vtable *v;
+
+ v = c->vtable;
+ for (v = bus_vtable_next(c->vtable, v); v->type != _SD_BUS_VTABLE_END; v = bus_vtable_next(c->vtable, v)) {
+ if (!IN_SET(v->type, _SD_BUS_VTABLE_PROPERTY, _SD_BUS_VTABLE_WRITABLE_PROPERTY))
+ continue;
+
+ if (v->flags & SD_BUS_VTABLE_HIDDEN)
+ continue;
+
+ if (!(v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION))
+ continue;
+
+ r = sd_bus_message_append(m, "s", v->x.property.member);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_send(bus, m, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+_public_ int sd_bus_emit_properties_changed_strv(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ char **names) {
+
+ _cleanup_free_ char *prefix = NULL;
+ bool found_interface = false;
+ size_t pl;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(interface_name_is_valid(interface), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ /* A non-NULL but empty names list means nothing needs to be
+ generated. A NULL list OTOH indicates that all properties
+ that are set to EMITS_CHANGE or EMITS_INVALIDATION shall be
+ included in the PropertiesChanged message. */
+ if (names && names[0] == NULL)
+ return 0;
+
+ BUS_DONT_DESTROY(bus);
+
+ pl = strlen(path);
+ assert(pl <= BUS_PATH_SIZE_MAX);
+ prefix = new(char, pl + 1);
+ if (!prefix)
+ return -ENOMEM;
+
+ do {
+ bus->nodes_modified = false;
+
+ r = emit_properties_changed_on_interface(bus, path, path, interface, false, &found_interface, names);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ continue;
+
+ OBJECT_PATH_FOREACH_PREFIX(prefix, path) {
+ r = emit_properties_changed_on_interface(bus, prefix, path, interface, true, &found_interface, names);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ break;
+ }
+
+ } while (bus->nodes_modified);
+
+ return found_interface ? 0 : -ENOENT;
+}
+
+_public_ int sd_bus_emit_properties_changed(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *name, ...) {
+
+ char **names;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(interface_name_is_valid(interface), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (!name)
+ return 0;
+
+ names = strv_from_stdarg_alloca(name);
+
+ return sd_bus_emit_properties_changed_strv(bus, path, interface, names);
+}
+
+static int object_added_append_all_prefix(
+ sd_bus *bus,
+ sd_bus_message *m,
+ Set *s,
+ const char *prefix,
+ const char *path,
+ bool require_fallback) {
+
+ const char *previous_interface = NULL;
+ struct node_vtable *c;
+ struct node *n;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(s);
+ assert(prefix);
+ assert(path);
+
+ n = hashmap_get(bus->nodes, prefix);
+ if (!n)
+ return 0;
+
+ LIST_FOREACH(vtables, c, n->vtables) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ void *u = NULL;
+
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, &u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ if (!streq_ptr(c->interface, previous_interface)) {
+ /* If a child-node already handled this interface, we
+ * skip it on any of its parents. The child vtables
+ * always fully override any conflicting vtables of
+ * any parent node. */
+ if (set_get(s, c->interface))
+ continue;
+
+ r = set_put(s, c->interface);
+ if (r < 0)
+ return r;
+
+ if (previous_interface) {
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_open_container(m, 'e', "sa{sv}");
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(m, "s", c->interface);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_open_container(m, 'a', "{sv}");
+ if (r < 0)
+ return r;
+
+ previous_interface = c->interface;
+ }
+
+ r = vtable_append_all_properties(bus, m, path, c, u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ if (previous_interface) {
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int object_added_append_all(sd_bus *bus, sd_bus_message *m, const char *path) {
+ _cleanup_set_free_ Set *s = NULL;
+ _cleanup_free_ char *prefix = NULL;
+ size_t pl;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(path);
+
+ /*
+ * This appends all interfaces registered on path @path. We first add
+ * the builtin interfaces, which are always available and handled by
+ * sd-bus. Then, we add all interfaces registered on the exact node,
+ * followed by all fallback interfaces registered on any parent prefix.
+ *
+ * If an interface is registered multiple times on the same node with
+ * different vtables, we merge all the properties across all vtables.
+ * However, if a child node has the same interface registered as one of
+ * its parent nodes has as fallback, we make the child overwrite the
+ * parent instead of extending it. Therefore, we keep a "Set" of all
+ * handled interfaces during parent traversal, so we skip interfaces on
+ * a parent that were overwritten by a child.
+ */
+
+ s = set_new(&string_hash_ops);
+ if (!s)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(m, "{sa{sv}}", "org.freedesktop.DBus.Peer", 0);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(m, "{sa{sv}}", "org.freedesktop.DBus.Introspectable", 0);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(m, "{sa{sv}}", "org.freedesktop.DBus.Properties", 0);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(m, "{sa{sv}}", "org.freedesktop.DBus.ObjectManager", 0);
+ if (r < 0)
+ return r;
+
+ r = object_added_append_all_prefix(bus, m, s, path, path, false);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ pl = strlen(path);
+ assert(pl <= BUS_PATH_SIZE_MAX);
+ prefix = new(char, pl + 1);
+ if (!prefix)
+ return -ENOMEM;
+
+ OBJECT_PATH_FOREACH_PREFIX(prefix, path) {
+ r = object_added_append_all_prefix(bus, m, s, prefix, path, true);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ return 0;
+}
+
+_public_ int sd_bus_emit_object_added(sd_bus *bus, const char *path) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ struct node *object_manager;
+ int r;
+
+ /*
+ * This emits an InterfacesAdded signal on the given path, by iterating
+ * all registered vtables and fallback vtables on the path. All
+ * properties are queried and included in the signal.
+ * This call is equivalent to sd_bus_emit_interfaces_added() with an
+ * explicit list of registered interfaces. However, unlike
+ * interfaces_added(), this call can figure out the list of supported
+ * interfaces itself. Furthermore, it properly adds the builtin
+ * org.freedesktop.DBus.* interfaces.
+ */
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ r = bus_find_parent_object_manager(bus, &object_manager, path);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ESRCH;
+
+ BUS_DONT_DESTROY(bus);
+
+ do {
+ bus->nodes_modified = false;
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_signal(bus, &m, object_manager->path, "org.freedesktop.DBus.ObjectManager", "InterfacesAdded");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_basic(m, 'o', path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "{sa{sv}}");
+ if (r < 0)
+ return r;
+
+ r = object_added_append_all(bus, m, path);
+ if (r < 0)
+ return r;
+
+ if (bus->nodes_modified)
+ continue;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ } while (bus->nodes_modified);
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+static int object_removed_append_all_prefix(
+ sd_bus *bus,
+ sd_bus_message *m,
+ Set *s,
+ const char *prefix,
+ const char *path,
+ bool require_fallback) {
+
+ const char *previous_interface = NULL;
+ struct node_vtable *c;
+ struct node *n;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(s);
+ assert(prefix);
+ assert(path);
+
+ n = hashmap_get(bus->nodes, prefix);
+ if (!n)
+ return 0;
+
+ LIST_FOREACH(vtables, c, n->vtables) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ void *u = NULL;
+
+ if (require_fallback && !c->is_fallback)
+ continue;
+ if (streq_ptr(c->interface, previous_interface))
+ continue;
+
+ /* If a child-node already handled this interface, we
+ * skip it on any of its parents. The child vtables
+ * always fully override any conflicting vtables of
+ * any parent node. */
+ if (set_get(s, c->interface))
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, &u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ r = set_put(s, c->interface);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "s", c->interface);
+ if (r < 0)
+ return r;
+
+ previous_interface = c->interface;
+ }
+
+ return 0;
+}
+
+static int object_removed_append_all(sd_bus *bus, sd_bus_message *m, const char *path) {
+ _cleanup_set_free_ Set *s = NULL;
+ _cleanup_free_ char *prefix = NULL;
+ size_t pl;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(path);
+
+ /* see sd_bus_emit_object_added() for details */
+
+ s = set_new(&string_hash_ops);
+ if (!s)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(m, "s", "org.freedesktop.DBus.Peer");
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(m, "s", "org.freedesktop.DBus.Introspectable");
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(m, "s", "org.freedesktop.DBus.Properties");
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(m, "s", "org.freedesktop.DBus.ObjectManager");
+ if (r < 0)
+ return r;
+
+ r = object_removed_append_all_prefix(bus, m, s, path, path, false);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ pl = strlen(path);
+ assert(pl <= BUS_PATH_SIZE_MAX);
+ prefix = new(char, pl + 1);
+ if (!prefix)
+ return -ENOMEM;
+
+ OBJECT_PATH_FOREACH_PREFIX(prefix, path) {
+ r = object_removed_append_all_prefix(bus, m, s, prefix, path, true);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ return 0;
+}
+
+_public_ int sd_bus_emit_object_removed(sd_bus *bus, const char *path) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ struct node *object_manager;
+ int r;
+
+ /*
+ * This is like sd_bus_emit_object_added(), but emits an
+ * InterfacesRemoved signal on the given path. This only includes any
+ * registered interfaces but skips the properties. Note that this will
+ * call into the find() callbacks of any registered vtable. Therefore,
+ * you must call this function before destroying/unlinking your object.
+ * Otherwise, the list of interfaces will be incomplete. However, note
+ * that this will *NOT* call into any property callback. Therefore, the
+ * object might be in an "destructed" state, as long as we can find it.
+ */
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ r = bus_find_parent_object_manager(bus, &object_manager, path);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ESRCH;
+
+ BUS_DONT_DESTROY(bus);
+
+ do {
+ bus->nodes_modified = false;
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_signal(bus, &m, object_manager->path, "org.freedesktop.DBus.ObjectManager", "InterfacesRemoved");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_basic(m, 'o', path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return r;
+
+ r = object_removed_append_all(bus, m, path);
+ if (r < 0)
+ return r;
+
+ if (bus->nodes_modified)
+ continue;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ } while (bus->nodes_modified);
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+static int interfaces_added_append_one_prefix(
+ sd_bus *bus,
+ sd_bus_message *m,
+ const char *prefix,
+ const char *path,
+ const char *interface,
+ bool require_fallback) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ bool found_interface = false;
+ struct node_vtable *c;
+ struct node *n;
+ void *u = NULL;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(prefix);
+ assert(path);
+ assert(interface);
+
+ n = hashmap_get(bus->nodes, prefix);
+ if (!n)
+ return 0;
+
+ LIST_FOREACH(vtables, c, n->vtables) {
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ if (!streq(c->interface, interface))
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, &u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ if (!found_interface) {
+ r = sd_bus_message_append_basic(m, 's', interface);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "{sv}");
+ if (r < 0)
+ return r;
+
+ found_interface = true;
+ }
+
+ r = vtable_append_all_properties(bus, m, path, c, u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ if (found_interface) {
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ return found_interface;
+}
+
+static int interfaces_added_append_one(
+ sd_bus *bus,
+ sd_bus_message *m,
+ const char *path,
+ const char *interface) {
+
+ _cleanup_free_ char *prefix = NULL;
+ size_t pl;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(path);
+ assert(interface);
+
+ r = interfaces_added_append_one_prefix(bus, m, path, path, interface, false);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ pl = strlen(path);
+ assert(pl <= BUS_PATH_SIZE_MAX);
+ prefix = new(char, pl + 1);
+ if (!prefix)
+ return -ENOMEM;
+
+ OBJECT_PATH_FOREACH_PREFIX(prefix, path) {
+ r = interfaces_added_append_one_prefix(bus, m, prefix, path, interface, true);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+_public_ int sd_bus_emit_interfaces_added_strv(sd_bus *bus, const char *path, char **interfaces) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ struct node *object_manager;
+ char **i;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (strv_isempty(interfaces))
+ return 0;
+
+ r = bus_find_parent_object_manager(bus, &object_manager, path);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ESRCH;
+
+ BUS_DONT_DESTROY(bus);
+
+ do {
+ bus->nodes_modified = false;
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_signal(bus, &m, object_manager->path, "org.freedesktop.DBus.ObjectManager", "InterfacesAdded");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_basic(m, 'o', path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "{sa{sv}}");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, interfaces) {
+ assert_return(interface_name_is_valid(*i), -EINVAL);
+
+ r = sd_bus_message_open_container(m, 'e', "sa{sv}");
+ if (r < 0)
+ return r;
+
+ r = interfaces_added_append_one(bus, m, path, *i);
+ if (r < 0)
+ return r;
+
+ if (bus->nodes_modified)
+ break;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ if (bus->nodes_modified)
+ continue;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ } while (bus->nodes_modified);
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+_public_ int sd_bus_emit_interfaces_added(sd_bus *bus, const char *path, const char *interface, ...) {
+ char **interfaces;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ interfaces = strv_from_stdarg_alloca(interface);
+
+ return sd_bus_emit_interfaces_added_strv(bus, path, interfaces);
+}
+
+_public_ int sd_bus_emit_interfaces_removed_strv(sd_bus *bus, const char *path, char **interfaces) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ struct node *object_manager;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (strv_isempty(interfaces))
+ return 0;
+
+ r = bus_find_parent_object_manager(bus, &object_manager, path);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ESRCH;
+
+ r = sd_bus_message_new_signal(bus, &m, object_manager->path, "org.freedesktop.DBus.ObjectManager", "InterfacesRemoved");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_basic(m, 'o', path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_strv(m, interfaces);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+_public_ int sd_bus_emit_interfaces_removed(sd_bus *bus, const char *path, const char *interface, ...) {
+ char **interfaces;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ interfaces = strv_from_stdarg_alloca(interface);
+
+ return sd_bus_emit_interfaces_removed_strv(bus, path, interfaces);
+}
+
+_public_ int sd_bus_add_object_manager(sd_bus *bus, sd_bus_slot **slot, const char *path) {
+ sd_bus_slot *s;
+ struct node *n;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ n = bus_node_allocate(bus, path);
+ if (!n)
+ return -ENOMEM;
+
+ s = bus_slot_allocate(bus, !slot, BUS_NODE_OBJECT_MANAGER, sizeof(struct node_object_manager), NULL);
+ if (!s) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ s->node_object_manager.node = n;
+ LIST_PREPEND(object_managers, n->object_managers, &s->node_object_manager);
+ bus->nodes_modified = true;
+
+ if (slot)
+ *slot = s;
+
+ return 0;
+
+fail:
+ sd_bus_slot_unref(s);
+ bus_node_gc(bus, n);
+
+ return r;
+}
diff --git a/src/libsystemd/sd-bus/bus-objects.h b/src/libsystemd/sd-bus/bus-objects.h
new file mode 100644
index 0000000..20fccfa
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-objects.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "bus-internal.h"
+#include "bus-introspect.h"
+
+const sd_bus_vtable* bus_vtable_next(const sd_bus_vtable *vtable, const sd_bus_vtable *v);
+bool bus_vtable_has_names(const sd_bus_vtable *vtable);
+int bus_process_object(sd_bus *bus, sd_bus_message *m);
+void bus_node_gc(sd_bus *b, struct node *n);
+
+int introspect_path(
+ sd_bus *bus,
+ const char *path,
+ struct node *n,
+ bool require_fallback,
+ bool ignore_nodes_modified,
+ bool *found_object,
+ char **ret,
+ sd_bus_error *error);
diff --git a/src/libsystemd/sd-bus/bus-protocol.h b/src/libsystemd/sd-bus/bus-protocol.h
new file mode 100644
index 0000000..c41ab58
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-protocol.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <endian.h>
+
+#include "macro.h"
+
+/* Packet header */
+
+struct _packed_ bus_header {
+ /* The first four fields are identical for dbus1, and dbus2 */
+ uint8_t endian;
+ uint8_t type;
+ uint8_t flags;
+ uint8_t version;
+
+ union _packed_ {
+ /* dbus1: Used for SOCK_STREAM connections */
+ struct _packed_ {
+ uint32_t body_size;
+
+ /* Note that what the bus spec calls "serial" we'll call
+ "cookie" instead, because we don't want to imply that the
+ cookie was in any way monotonically increasing. */
+ uint32_t serial;
+ uint32_t fields_size;
+ } dbus1;
+
+ /* dbus2: Used for kdbus connections */
+ struct _packed_ {
+ uint32_t _reserved;
+ uint64_t cookie;
+ } dbus2;
+
+ /* Note that both header versions have the same size! */
+ };
+};
+
+/* Endianness */
+
+enum {
+ _BUS_INVALID_ENDIAN = 0,
+ BUS_LITTLE_ENDIAN = 'l',
+ BUS_BIG_ENDIAN = 'B',
+#if __BYTE_ORDER == __BIG_ENDIAN
+ BUS_NATIVE_ENDIAN = BUS_BIG_ENDIAN,
+ BUS_REVERSE_ENDIAN = BUS_LITTLE_ENDIAN
+#else
+ BUS_NATIVE_ENDIAN = BUS_LITTLE_ENDIAN,
+ BUS_REVERSE_ENDIAN = BUS_BIG_ENDIAN
+#endif
+};
+
+/* Flags */
+
+enum {
+ BUS_MESSAGE_NO_REPLY_EXPECTED = 1 << 0,
+ BUS_MESSAGE_NO_AUTO_START = 1 << 1,
+ BUS_MESSAGE_ALLOW_INTERACTIVE_AUTHORIZATION = 1 << 2,
+};
+
+/* Header fields */
+
+enum {
+ _BUS_MESSAGE_HEADER_INVALID = 0,
+ BUS_MESSAGE_HEADER_PATH,
+ BUS_MESSAGE_HEADER_INTERFACE,
+ BUS_MESSAGE_HEADER_MEMBER,
+ BUS_MESSAGE_HEADER_ERROR_NAME,
+ BUS_MESSAGE_HEADER_REPLY_SERIAL,
+ BUS_MESSAGE_HEADER_DESTINATION,
+ BUS_MESSAGE_HEADER_SENDER,
+ BUS_MESSAGE_HEADER_SIGNATURE,
+ BUS_MESSAGE_HEADER_UNIX_FDS,
+ _BUS_MESSAGE_HEADER_MAX
+};
+
+/* RequestName parameters */
+
+enum {
+ BUS_NAME_ALLOW_REPLACEMENT = 1 << 0,
+ BUS_NAME_REPLACE_EXISTING = 1 << 1,
+ BUS_NAME_DO_NOT_QUEUE = 1 << 2,
+};
+
+/* RequestName returns */
+enum {
+ BUS_NAME_PRIMARY_OWNER = 1,
+ BUS_NAME_IN_QUEUE = 2,
+ BUS_NAME_EXISTS = 3,
+ BUS_NAME_ALREADY_OWNER = 4
+};
+
+/* ReleaseName returns */
+enum {
+ BUS_NAME_RELEASED = 1,
+ BUS_NAME_NON_EXISTENT = 2,
+ BUS_NAME_NOT_OWNER = 3,
+};
+
+/* StartServiceByName returns */
+enum {
+ BUS_START_REPLY_SUCCESS = 1,
+ BUS_START_REPLY_ALREADY_RUNNING = 2,
+};
diff --git a/src/libsystemd/sd-bus/bus-signature.c b/src/libsystemd/sd-bus/bus-signature.c
new file mode 100644
index 0000000..bd0842f
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-signature.c
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <util.h>
+
+#include "sd-bus.h"
+
+#include "bus-signature.h"
+#include "bus-type.h"
+
+static int signature_element_length_internal(
+ const char *s,
+ bool allow_dict_entry,
+ unsigned array_depth,
+ unsigned struct_depth,
+ size_t *l) {
+
+ int r;
+
+ if (!s)
+ return -EINVAL;
+
+ assert(l);
+
+ if (bus_type_is_basic(*s) || *s == SD_BUS_TYPE_VARIANT) {
+ *l = 1;
+ return 0;
+ }
+
+ if (*s == SD_BUS_TYPE_ARRAY) {
+ size_t t;
+
+ if (array_depth >= 32)
+ return -EINVAL;
+
+ r = signature_element_length_internal(s + 1, true, array_depth+1, struct_depth, &t);
+ if (r < 0)
+ return r;
+
+ *l = t + 1;
+ return 0;
+ }
+
+ if (*s == SD_BUS_TYPE_STRUCT_BEGIN) {
+ const char *p = s + 1;
+
+ if (struct_depth >= 32)
+ return -EINVAL;
+
+ while (*p != SD_BUS_TYPE_STRUCT_END) {
+ size_t t;
+
+ r = signature_element_length_internal(p, false, array_depth, struct_depth+1, &t);
+ if (r < 0)
+ return r;
+
+ p += t;
+ }
+
+ if (p - s < 2)
+ /* D-Bus spec: Empty structures are not allowed; there
+ * must be at least one type code between the parentheses.
+ */
+ return -EINVAL;
+
+ *l = p - s + 1;
+ return 0;
+ }
+
+ if (*s == SD_BUS_TYPE_DICT_ENTRY_BEGIN && allow_dict_entry) {
+ const char *p = s + 1;
+ unsigned n = 0;
+
+ if (struct_depth >= 32)
+ return -EINVAL;
+
+ while (*p != SD_BUS_TYPE_DICT_ENTRY_END) {
+ size_t t;
+
+ if (n == 0 && !bus_type_is_basic(*p))
+ return -EINVAL;
+
+ r = signature_element_length_internal(p, false, array_depth, struct_depth+1, &t);
+ if (r < 0)
+ return r;
+
+ p += t;
+ n++;
+ }
+
+ if (n != 2)
+ return -EINVAL;
+
+ *l = p - s + 1;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+int signature_element_length(const char *s, size_t *l) {
+ return signature_element_length_internal(s, true, 0, 0, l);
+}
+
+bool signature_is_single(const char *s, bool allow_dict_entry) {
+ int r;
+ size_t t;
+
+ if (!s)
+ return false;
+
+ r = signature_element_length_internal(s, allow_dict_entry, 0, 0, &t);
+ if (r < 0)
+ return false;
+
+ return s[t] == 0;
+}
+
+bool signature_is_pair(const char *s) {
+
+ if (!s)
+ return false;
+
+ if (!bus_type_is_basic(*s))
+ return false;
+
+ return signature_is_single(s + 1, false);
+}
+
+bool signature_is_valid(const char *s, bool allow_dict_entry) {
+ const char *p;
+ int r;
+
+ if (!s)
+ return false;
+
+ p = s;
+ while (*p) {
+ size_t t;
+
+ r = signature_element_length_internal(p, allow_dict_entry, 0, 0, &t);
+ if (r < 0)
+ return false;
+
+ p += t;
+ }
+
+ return p - s <= SD_BUS_MAXIMUM_SIGNATURE_LENGTH;
+}
diff --git a/src/libsystemd/sd-bus/bus-signature.h b/src/libsystemd/sd-bus/bus-signature.h
new file mode 100644
index 0000000..314fcc2
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-signature.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+bool signature_is_single(const char *s, bool allow_dict_entry);
+bool signature_is_pair(const char *s);
+bool signature_is_valid(const char *s, bool allow_dict_entry);
+
+int signature_element_length(const char *s, size_t *l);
diff --git a/src/libsystemd/sd-bus/bus-slot.c b/src/libsystemd/sd-bus/bus-slot.c
new file mode 100644
index 0000000..a0009a7
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-slot.c
@@ -0,0 +1,311 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-control.h"
+#include "bus-objects.h"
+#include "bus-slot.h"
+#include "string-util.h"
+
+sd_bus_slot *bus_slot_allocate(
+ sd_bus *bus,
+ bool floating,
+ BusSlotType type,
+ size_t extra,
+ void *userdata) {
+
+ sd_bus_slot *slot;
+
+ assert(bus);
+
+ slot = malloc0(offsetof(sd_bus_slot, reply_callback) + extra);
+ if (!slot)
+ return NULL;
+
+ slot->n_ref = 1;
+ slot->type = type;
+ slot->bus = bus;
+ slot->floating = floating;
+ slot->userdata = userdata;
+
+ if (!floating)
+ sd_bus_ref(bus);
+
+ LIST_PREPEND(slots, bus->slots, slot);
+
+ return slot;
+}
+
+void bus_slot_disconnect(sd_bus_slot *slot, bool unref) {
+ sd_bus *bus;
+
+ assert(slot);
+
+ if (!slot->bus)
+ return;
+
+ switch (slot->type) {
+
+ case BUS_REPLY_CALLBACK:
+
+ if (slot->reply_callback.cookie != 0)
+ ordered_hashmap_remove(slot->bus->reply_callbacks, &slot->reply_callback.cookie);
+
+ if (slot->reply_callback.timeout_usec != 0)
+ prioq_remove(slot->bus->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx);
+
+ break;
+
+ case BUS_FILTER_CALLBACK:
+ slot->bus->filter_callbacks_modified = true;
+ LIST_REMOVE(callbacks, slot->bus->filter_callbacks, &slot->filter_callback);
+ break;
+
+ case BUS_MATCH_CALLBACK:
+
+ if (slot->match_added)
+ (void) bus_remove_match_internal(slot->bus, slot->match_callback.match_string);
+
+ if (slot->match_callback.install_slot) {
+ bus_slot_disconnect(slot->match_callback.install_slot, true);
+ slot->match_callback.install_slot = sd_bus_slot_unref(slot->match_callback.install_slot);
+ }
+
+ slot->bus->match_callbacks_modified = true;
+ bus_match_remove(&slot->bus->match_callbacks, &slot->match_callback);
+
+ slot->match_callback.match_string = mfree(slot->match_callback.match_string);
+
+ break;
+
+ case BUS_NODE_CALLBACK:
+
+ if (slot->node_callback.node) {
+ LIST_REMOVE(callbacks, slot->node_callback.node->callbacks, &slot->node_callback);
+ slot->bus->nodes_modified = true;
+
+ bus_node_gc(slot->bus, slot->node_callback.node);
+ }
+
+ break;
+
+ case BUS_NODE_ENUMERATOR:
+
+ if (slot->node_enumerator.node) {
+ LIST_REMOVE(enumerators, slot->node_enumerator.node->enumerators, &slot->node_enumerator);
+ slot->bus->nodes_modified = true;
+
+ bus_node_gc(slot->bus, slot->node_enumerator.node);
+ }
+
+ break;
+
+ case BUS_NODE_OBJECT_MANAGER:
+
+ if (slot->node_object_manager.node) {
+ LIST_REMOVE(object_managers, slot->node_object_manager.node->object_managers, &slot->node_object_manager);
+ slot->bus->nodes_modified = true;
+
+ bus_node_gc(slot->bus, slot->node_object_manager.node);
+ }
+
+ break;
+
+ case BUS_NODE_VTABLE:
+
+ if (slot->node_vtable.node && slot->node_vtable.interface && slot->node_vtable.vtable) {
+ const sd_bus_vtable *v;
+
+ for (v = slot->node_vtable.vtable; v->type != _SD_BUS_VTABLE_END; v = bus_vtable_next(slot->node_vtable.vtable, v)) {
+ struct vtable_member *x = NULL;
+
+ switch (v->type) {
+
+ case _SD_BUS_VTABLE_METHOD: {
+ struct vtable_member key;
+
+ key.path = slot->node_vtable.node->path;
+ key.interface = slot->node_vtable.interface;
+ key.member = v->x.method.member;
+
+ x = hashmap_remove(slot->bus->vtable_methods, &key);
+ break;
+ }
+
+ case _SD_BUS_VTABLE_PROPERTY:
+ case _SD_BUS_VTABLE_WRITABLE_PROPERTY: {
+ struct vtable_member key;
+
+ key.path = slot->node_vtable.node->path;
+ key.interface = slot->node_vtable.interface;
+ key.member = v->x.method.member;
+
+ x = hashmap_remove(slot->bus->vtable_properties, &key);
+ break;
+ }}
+
+ free(x);
+ }
+ }
+
+ slot->node_vtable.interface = mfree(slot->node_vtable.interface);
+
+ if (slot->node_vtable.node) {
+ LIST_REMOVE(vtables, slot->node_vtable.node->vtables, &slot->node_vtable);
+ slot->bus->nodes_modified = true;
+
+ bus_node_gc(slot->bus, slot->node_vtable.node);
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Wut? Unknown slot type?");
+ }
+
+ bus = slot->bus;
+
+ slot->type = _BUS_SLOT_INVALID;
+ slot->bus = NULL;
+ LIST_REMOVE(slots, bus->slots, slot);
+
+ if (!slot->floating)
+ sd_bus_unref(bus);
+ else if (unref)
+ sd_bus_slot_unref(slot);
+}
+
+static sd_bus_slot* bus_slot_free(sd_bus_slot *slot) {
+ assert(slot);
+
+ bus_slot_disconnect(slot, false);
+
+ if (slot->destroy_callback)
+ slot->destroy_callback(slot->userdata);
+
+ free(slot->description);
+ return mfree(slot);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_bus_slot, sd_bus_slot, bus_slot_free);
+
+_public_ sd_bus* sd_bus_slot_get_bus(sd_bus_slot *slot) {
+ assert_return(slot, NULL);
+
+ return slot->bus;
+}
+
+_public_ void *sd_bus_slot_get_userdata(sd_bus_slot *slot) {
+ assert_return(slot, NULL);
+
+ return slot->userdata;
+}
+
+_public_ void *sd_bus_slot_set_userdata(sd_bus_slot *slot, void *userdata) {
+ void *ret;
+
+ assert_return(slot, NULL);
+
+ ret = slot->userdata;
+ slot->userdata = userdata;
+
+ return ret;
+}
+
+_public_ int sd_bus_slot_set_destroy_callback(sd_bus_slot *slot, sd_bus_destroy_t callback) {
+ assert_return(slot, -EINVAL);
+
+ slot->destroy_callback = callback;
+ return 0;
+}
+
+_public_ int sd_bus_slot_get_destroy_callback(sd_bus_slot *slot, sd_bus_destroy_t *callback) {
+ assert_return(slot, -EINVAL);
+
+ if (callback)
+ *callback = slot->destroy_callback;
+
+ return !!slot->destroy_callback;
+}
+
+_public_ sd_bus_message *sd_bus_slot_get_current_message(sd_bus_slot *slot) {
+ assert_return(slot, NULL);
+ assert_return(slot->type >= 0, NULL);
+
+ if (slot->bus->current_slot != slot)
+ return NULL;
+
+ return slot->bus->current_message;
+}
+
+_public_ sd_bus_message_handler_t sd_bus_slot_get_current_handler(sd_bus_slot *slot) {
+ assert_return(slot, NULL);
+ assert_return(slot->type >= 0, NULL);
+
+ if (slot->bus->current_slot != slot)
+ return NULL;
+
+ return slot->bus->current_handler;
+}
+
+_public_ void* sd_bus_slot_get_current_userdata(sd_bus_slot *slot) {
+ assert_return(slot, NULL);
+ assert_return(slot->type >= 0, NULL);
+
+ if (slot->bus->current_slot != slot)
+ return NULL;
+
+ return slot->bus->current_userdata;
+}
+
+_public_ int sd_bus_slot_get_floating(sd_bus_slot *slot) {
+ assert_return(slot, -EINVAL);
+
+ return slot->floating;
+}
+
+_public_ int sd_bus_slot_set_floating(sd_bus_slot *slot, int b) {
+ assert_return(slot, -EINVAL);
+
+ if (slot->floating == !!b)
+ return 0;
+
+ if (!slot->bus) /* already disconnected slots can't be reconnected */
+ return -ESTALE;
+
+ slot->floating = b;
+
+ /* When a slot is "floating" then the bus references the slot. Otherwise the slot references the bus. Hence,
+ * when we move from one to the other, let's increase one reference and decrease the other. */
+
+ if (b) {
+ sd_bus_slot_ref(slot);
+ sd_bus_unref(slot->bus);
+ } else {
+ sd_bus_ref(slot->bus);
+ sd_bus_slot_unref(slot);
+ }
+
+ return 1;
+}
+
+_public_ int sd_bus_slot_set_description(sd_bus_slot *slot, const char *description) {
+ assert_return(slot, -EINVAL);
+
+ return free_and_strdup(&slot->description, description);
+}
+
+_public_ int sd_bus_slot_get_description(sd_bus_slot *slot, const char **description) {
+ assert_return(slot, -EINVAL);
+ assert_return(description, -EINVAL);
+
+ if (slot->description)
+ *description = slot->description;
+ else if (slot->type == BUS_MATCH_CALLBACK)
+ *description = slot->match_callback.match_string;
+ else
+ return -ENXIO;
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/bus-slot.h b/src/libsystemd/sd-bus/bus-slot.h
new file mode 100644
index 0000000..8116195
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-slot.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "bus-internal.h"
+
+sd_bus_slot *bus_slot_allocate(sd_bus *bus, bool floating, BusSlotType type, size_t extra, void *userdata);
+
+void bus_slot_disconnect(sd_bus_slot *slot, bool unref);
diff --git a/src/libsystemd/sd-bus/bus-socket.c b/src/libsystemd/sd-bus/bus-socket.c
new file mode 100644
index 0000000..4881fd0
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-socket.c
@@ -0,0 +1,1348 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <endian.h>
+#include <poll.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-socket.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hexdecoct.h"
+#include "io-util.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "user-util.h"
+#include "utf8.h"
+
+#define SNDBUF_SIZE (8*1024*1024)
+
+static void iovec_advance(struct iovec iov[], unsigned *idx, size_t size) {
+
+ while (size > 0) {
+ struct iovec *i = iov + *idx;
+
+ if (i->iov_len > size) {
+ i->iov_base = (uint8_t*) i->iov_base + size;
+ i->iov_len -= size;
+ return;
+ }
+
+ size -= i->iov_len;
+
+ *i = IOVEC_MAKE(NULL, 0);
+
+ (*idx)++;
+ }
+}
+
+static int append_iovec(sd_bus_message *m, const void *p, size_t sz) {
+ assert(m);
+ assert(p);
+ assert(sz > 0);
+
+ m->iovec[m->n_iovec++] = IOVEC_MAKE((void*) p, sz);
+
+ return 0;
+}
+
+static int bus_message_setup_iovec(sd_bus_message *m) {
+ struct bus_body_part *part;
+ unsigned n, i;
+ int r;
+
+ assert(m);
+ assert(m->sealed);
+
+ if (m->n_iovec > 0)
+ return 0;
+
+ assert(!m->iovec);
+
+ n = 1 + m->n_body_parts;
+ if (n < ELEMENTSOF(m->iovec_fixed))
+ m->iovec = m->iovec_fixed;
+ else {
+ m->iovec = new(struct iovec, n);
+ if (!m->iovec) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ r = append_iovec(m, m->header, BUS_MESSAGE_BODY_BEGIN(m));
+ if (r < 0)
+ goto fail;
+
+ MESSAGE_FOREACH_PART(part, i, m) {
+ r = bus_body_part_map(part);
+ if (r < 0)
+ goto fail;
+
+ r = append_iovec(m, part->data, part->size);
+ if (r < 0)
+ goto fail;
+ }
+
+ assert(n == m->n_iovec);
+
+ return 0;
+
+fail:
+ m->poisoned = true;
+ return r;
+}
+
+bool bus_socket_auth_needs_write(sd_bus *b) {
+
+ unsigned i;
+
+ if (b->auth_index >= ELEMENTSOF(b->auth_iovec))
+ return false;
+
+ for (i = b->auth_index; i < ELEMENTSOF(b->auth_iovec); i++) {
+ struct iovec *j = b->auth_iovec + i;
+
+ if (j->iov_len > 0)
+ return true;
+ }
+
+ return false;
+}
+
+static int bus_socket_write_auth(sd_bus *b) {
+ ssize_t k;
+
+ assert(b);
+ assert(b->state == BUS_AUTHENTICATING);
+
+ if (!bus_socket_auth_needs_write(b))
+ return 0;
+
+ if (b->prefer_writev)
+ k = writev(b->output_fd, b->auth_iovec + b->auth_index, ELEMENTSOF(b->auth_iovec) - b->auth_index);
+ else {
+ struct msghdr mh = {
+ .msg_iov = b->auth_iovec + b->auth_index,
+ .msg_iovlen = ELEMENTSOF(b->auth_iovec) - b->auth_index,
+ };
+
+ k = sendmsg(b->output_fd, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
+ if (k < 0 && errno == ENOTSOCK) {
+ b->prefer_writev = true;
+ k = writev(b->output_fd, b->auth_iovec + b->auth_index, ELEMENTSOF(b->auth_iovec) - b->auth_index);
+ }
+ }
+
+ if (k < 0)
+ return errno == EAGAIN ? 0 : -errno;
+
+ iovec_advance(b->auth_iovec, &b->auth_index, (size_t) k);
+ return 1;
+}
+
+static int bus_socket_auth_verify_client(sd_bus *b) {
+ char *d, *e, *f, *start;
+ sd_id128_t peer;
+ int r;
+
+ assert(b);
+
+ /*
+ * We expect three response lines:
+ * "DATA\r\n"
+ * "OK <server-id>\r\n"
+ * "AGREE_UNIX_FD\r\n" (optional)
+ */
+
+ d = memmem_safe(b->rbuffer, b->rbuffer_size, "\r\n", 2);
+ if (!d)
+ return 0;
+
+ e = memmem(d + 2, b->rbuffer_size - (d - (char*) b->rbuffer) - 2, "\r\n", 2);
+ if (!e)
+ return 0;
+
+ if (b->accept_fd) {
+ f = memmem(e + 2, b->rbuffer_size - (e - (char*) b->rbuffer) - 2, "\r\n", 2);
+ if (!f)
+ return 0;
+
+ start = f + 2;
+ } else {
+ f = NULL;
+ start = e + 2;
+ }
+
+ /* Nice! We got all the lines we need. First check the DATA line. */
+
+ if (d - (char*) b->rbuffer == 4) {
+ if (memcmp(b->rbuffer, "DATA", 4))
+ return -EPERM;
+ } else if (d - (char*) b->rbuffer == 3 + 32) {
+ /*
+ * Old versions of the server-side implementation of `sd-bus` replied with "OK <id>" to
+ * "AUTH" requests from a client, even if the "AUTH" line did not contain inlined
+ * arguments. Therefore, we also accept "OK <id>" here, even though it is technically the
+ * wrong reply. We ignore the "<id>" parameter, though, since it has no real value.
+ */
+ if (memcmp(b->rbuffer, "OK ", 3))
+ return -EPERM;
+ } else
+ return -EPERM;
+
+ /* Now check the OK line. */
+
+ if (e - d != 2 + 3 + 32)
+ return -EPERM;
+
+ if (memcmp(d + 2, "OK ", 3))
+ return -EPERM;
+
+ b->auth = b->anonymous_auth ? BUS_AUTH_ANONYMOUS : BUS_AUTH_EXTERNAL;
+
+ for (unsigned i = 0; i < 32; i += 2) {
+ int x, y;
+
+ x = unhexchar(d[2 + 3 + i]);
+ y = unhexchar(d[2 + 3 + i + 1]);
+
+ if (x < 0 || y < 0)
+ return -EINVAL;
+
+ peer.bytes[i/2] = ((uint8_t) x << 4 | (uint8_t) y);
+ }
+
+ if (!sd_id128_is_null(b->server_id) &&
+ !sd_id128_equal(b->server_id, peer))
+ return -EPERM;
+
+ b->server_id = peer;
+
+ /* And possibly check the third line, too */
+
+ if (f)
+ b->can_fds =
+ (f - e == STRLEN("\r\nAGREE_UNIX_FD")) &&
+ memcmp(e + 2, "AGREE_UNIX_FD",
+ STRLEN("AGREE_UNIX_FD")) == 0;
+
+ b->rbuffer_size -= (start - (char*) b->rbuffer);
+ memmove(b->rbuffer, start, b->rbuffer_size);
+
+ r = bus_start_running(b);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static bool line_equals(const char *s, size_t m, const char *line) {
+ size_t l;
+
+ l = strlen(line);
+ if (l != m)
+ return false;
+
+ return memcmp(s, line, l) == 0;
+}
+
+static bool line_begins(const char *s, size_t m, const char *word) {
+ const char *p;
+
+ p = memory_startswith(s, m, word);
+ return p && (p == (s + m) || *p == ' ');
+}
+
+static int verify_anonymous_token(sd_bus *b, const char *p, size_t l) {
+ _cleanup_free_ char *token = NULL;
+ size_t len;
+ int r;
+
+ if (!b->anonymous_auth)
+ return 0;
+
+ if (l <= 0)
+ return 1;
+
+ assert(p[0] == ' ');
+ p++; l--;
+
+ if (l % 2 != 0)
+ return 0;
+
+ r = unhexmem(p, l, (void **) &token, &len);
+ if (r < 0)
+ return 0;
+
+ if (memchr(token, 0, len))
+ return 0;
+
+ return !!utf8_is_valid(token);
+}
+
+static int verify_external_token(sd_bus *b, const char *p, size_t l) {
+ _cleanup_free_ char *token = NULL;
+ size_t len;
+ uid_t u;
+ int r;
+
+ /* We don't do any real authentication here. Instead, we if
+ * the owner of this bus wanted authentication he should have
+ * checked SO_PEERCRED before even creating the bus object. */
+
+ if (!b->anonymous_auth && !b->ucred_valid)
+ return 0;
+
+ if (l <= 0)
+ return 1;
+
+ assert(p[0] == ' ');
+ p++; l--;
+
+ if (l % 2 != 0)
+ return 0;
+
+ r = unhexmem(p, l, (void**) &token, &len);
+ if (r < 0)
+ return 0;
+
+ if (memchr(token, 0, len))
+ return 0;
+
+ r = parse_uid(token, &u);
+ if (r < 0)
+ return 0;
+
+ /* We ignore the passed value if anonymous authentication is
+ * on anyway. */
+ if (!b->anonymous_auth && u != b->ucred.uid)
+ return 0;
+
+ return 1;
+}
+
+static int bus_socket_auth_write(sd_bus *b, const char *t) {
+ char *p;
+ size_t l;
+
+ assert(b);
+ assert(t);
+
+ /* We only make use of the first iovec */
+ assert(IN_SET(b->auth_index, 0, 1));
+
+ l = strlen(t);
+ p = malloc(b->auth_iovec[0].iov_len + l);
+ if (!p)
+ return -ENOMEM;
+
+ memcpy_safe(p, b->auth_iovec[0].iov_base, b->auth_iovec[0].iov_len);
+ memcpy(p + b->auth_iovec[0].iov_len, t, l);
+
+ b->auth_iovec[0].iov_base = p;
+ b->auth_iovec[0].iov_len += l;
+
+ free(b->auth_buffer);
+ b->auth_buffer = p;
+ b->auth_index = 0;
+ return 0;
+}
+
+static int bus_socket_auth_write_ok(sd_bus *b) {
+ char t[3 + 32 + 2 + 1];
+
+ assert(b);
+
+ xsprintf(t, "OK " SD_ID128_FORMAT_STR "\r\n", SD_ID128_FORMAT_VAL(b->server_id));
+
+ return bus_socket_auth_write(b, t);
+}
+
+static int bus_socket_auth_verify_server(sd_bus *b) {
+ char *e;
+ const char *line;
+ size_t l;
+ bool processed = false;
+ int r;
+
+ assert(b);
+
+ if (b->rbuffer_size < 1)
+ return 0;
+
+ /* First char must be a NUL byte */
+ if (*(char*) b->rbuffer != 0)
+ return -EIO;
+
+ if (b->rbuffer_size < 3)
+ return 0;
+
+ /* Begin with the first line */
+ if (b->auth_rbegin <= 0)
+ b->auth_rbegin = 1;
+
+ for (;;) {
+ /* Check if line is complete */
+ line = (char*) b->rbuffer + b->auth_rbegin;
+ e = memmem(line, b->rbuffer_size - b->auth_rbegin, "\r\n", 2);
+ if (!e)
+ return processed;
+
+ l = e - line;
+
+ if (line_begins(line, l, "AUTH ANONYMOUS")) {
+
+ r = verify_anonymous_token(b,
+ line + strlen("AUTH ANONYMOUS"),
+ l - strlen("AUTH ANONYMOUS"));
+ if (r < 0)
+ return r;
+ if (r == 0)
+ r = bus_socket_auth_write(b, "REJECTED\r\n");
+ else {
+ b->auth = BUS_AUTH_ANONYMOUS;
+ if (l <= strlen("AUTH ANONYMOUS"))
+ r = bus_socket_auth_write(b, "DATA\r\n");
+ else
+ r = bus_socket_auth_write_ok(b);
+ }
+
+ } else if (line_begins(line, l, "AUTH EXTERNAL")) {
+
+ r = verify_external_token(b,
+ line + strlen("AUTH EXTERNAL"),
+ l - strlen("AUTH EXTERNAL"));
+ if (r < 0)
+ return r;
+ if (r == 0)
+ r = bus_socket_auth_write(b, "REJECTED\r\n");
+ else {
+ b->auth = BUS_AUTH_EXTERNAL;
+ if (l <= strlen("AUTH EXTERNAL"))
+ r = bus_socket_auth_write(b, "DATA\r\n");
+ else
+ r = bus_socket_auth_write_ok(b);
+ }
+
+ } else if (line_begins(line, l, "AUTH"))
+ r = bus_socket_auth_write(b, "REJECTED EXTERNAL ANONYMOUS\r\n");
+ else if (line_equals(line, l, "CANCEL") ||
+ line_begins(line, l, "ERROR")) {
+
+ b->auth = _BUS_AUTH_INVALID;
+ r = bus_socket_auth_write(b, "REJECTED\r\n");
+
+ } else if (line_equals(line, l, "BEGIN")) {
+
+ if (b->auth == _BUS_AUTH_INVALID)
+ r = bus_socket_auth_write(b, "ERROR\r\n");
+ else {
+ /* We can't leave from the auth phase
+ * before we haven't written
+ * everything queued, so let's check
+ * that */
+
+ if (bus_socket_auth_needs_write(b))
+ return 1;
+
+ b->rbuffer_size -= (e + 2 - (char*) b->rbuffer);
+ memmove(b->rbuffer, e + 2, b->rbuffer_size);
+ return bus_start_running(b);
+ }
+
+ } else if (line_begins(line, l, "DATA")) {
+
+ if (b->auth == _BUS_AUTH_INVALID)
+ r = bus_socket_auth_write(b, "ERROR\r\n");
+ else {
+ if (b->auth == BUS_AUTH_ANONYMOUS)
+ r = verify_anonymous_token(b, line + 4, l - 4);
+ else
+ r = verify_external_token(b, line + 4, l - 4);
+
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ b->auth = _BUS_AUTH_INVALID;
+ r = bus_socket_auth_write(b, "REJECTED\r\n");
+ } else
+ r = bus_socket_auth_write_ok(b);
+ }
+ } else if (line_equals(line, l, "NEGOTIATE_UNIX_FD")) {
+ if (b->auth == _BUS_AUTH_INVALID || !b->accept_fd)
+ r = bus_socket_auth_write(b, "ERROR\r\n");
+ else {
+ b->can_fds = true;
+ r = bus_socket_auth_write(b, "AGREE_UNIX_FD\r\n");
+ }
+ } else
+ r = bus_socket_auth_write(b, "ERROR\r\n");
+
+ if (r < 0)
+ return r;
+
+ b->auth_rbegin = e + 2 - (char*) b->rbuffer;
+
+ processed = true;
+ }
+}
+
+static int bus_socket_auth_verify(sd_bus *b) {
+ assert(b);
+
+ if (b->is_server)
+ return bus_socket_auth_verify_server(b);
+ else
+ return bus_socket_auth_verify_client(b);
+}
+
+static int bus_socket_read_auth(sd_bus *b) {
+ struct msghdr mh;
+ struct iovec iov = {};
+ size_t n;
+ ssize_t k;
+ int r;
+ void *p;
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int) * BUS_FDS_MAX)) control;
+ bool handle_cmsg = false;
+
+ assert(b);
+ assert(b->state == BUS_AUTHENTICATING);
+
+ r = bus_socket_auth_verify(b);
+ if (r != 0)
+ return r;
+
+ n = MAX(256u, b->rbuffer_size * 2);
+
+ if (n > BUS_AUTH_SIZE_MAX)
+ n = BUS_AUTH_SIZE_MAX;
+
+ if (b->rbuffer_size >= n)
+ return -ENOBUFS;
+
+ p = realloc(b->rbuffer, n);
+ if (!p)
+ return -ENOMEM;
+
+ b->rbuffer = p;
+
+ iov = IOVEC_MAKE((uint8_t *)b->rbuffer + b->rbuffer_size, n - b->rbuffer_size);
+
+ if (b->prefer_readv) {
+ k = readv(b->input_fd, &iov, 1);
+ if (k < 0)
+ k = -errno;
+ } else {
+ mh = (struct msghdr) {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+
+ k = recvmsg_safe(b->input_fd, &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ if (k == -ENOTSOCK) {
+ b->prefer_readv = true;
+ k = readv(b->input_fd, &iov, 1);
+ if (k < 0)
+ k = -errno;
+ } else
+ handle_cmsg = true;
+ }
+ if (k == -EAGAIN)
+ return 0;
+ if (k < 0)
+ return (int) k;
+ if (k == 0) {
+ if (handle_cmsg)
+ cmsg_close_all(&mh); /* paranoia, we shouldn't have gotten any fds on EOF */
+ return -ECONNRESET;
+ }
+
+ b->rbuffer_size += k;
+
+ if (handle_cmsg) {
+ struct cmsghdr *cmsg;
+
+ CMSG_FOREACH(cmsg, &mh)
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_RIGHTS) {
+ int j;
+
+ /* Whut? We received fds during the auth
+ * protocol? Somebody is playing games with
+ * us. Close them all, and fail */
+ j = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+ close_many((int*) CMSG_DATA(cmsg), j);
+ return -EIO;
+ } else
+ log_debug("Got unexpected auxiliary data with level=%d and type=%d",
+ cmsg->cmsg_level, cmsg->cmsg_type);
+ }
+
+ r = bus_socket_auth_verify(b);
+ if (r != 0)
+ return r;
+
+ return 1;
+}
+
+void bus_socket_setup(sd_bus *b) {
+ assert(b);
+
+ /* Increase the buffers to 8 MB */
+ (void) fd_inc_rcvbuf(b->input_fd, SNDBUF_SIZE);
+ (void) fd_inc_sndbuf(b->output_fd, SNDBUF_SIZE);
+
+ b->message_version = 1;
+ b->message_endian = 0;
+}
+
+static void bus_get_peercred(sd_bus *b) {
+ int r;
+
+ assert(b);
+ assert(!b->ucred_valid);
+ assert(!b->label);
+ assert(b->n_groups == (size_t) -1);
+
+ /* Get the peer for socketpair() sockets */
+ b->ucred_valid = getpeercred(b->input_fd, &b->ucred) >= 0;
+
+ /* Get the SELinux context of the peer */
+ r = getpeersec(b->input_fd, &b->label);
+ if (r < 0 && !IN_SET(r, -EOPNOTSUPP, -ENOPROTOOPT))
+ log_debug_errno(r, "Failed to determine peer security context: %m");
+
+ /* Get the list of auxiliary groups of the peer */
+ r = getpeergroups(b->input_fd, &b->groups);
+ if (r >= 0)
+ b->n_groups = (size_t) r;
+ else if (!IN_SET(r, -EOPNOTSUPP, -ENOPROTOOPT))
+ log_debug_errno(r, "Failed to determine peer's group list: %m");
+}
+
+static int bus_socket_start_auth_client(sd_bus *b) {
+ static const char sasl_auth_anonymous[] = {
+ /*
+ * We use an arbitrary trace-string for the ANONYMOUS authentication. It can be used by the
+ * message broker to aid debugging of clients. We fully anonymize the connection and use a
+ * static default.
+ */
+ "\0AUTH ANONYMOUS\r\n"
+ /* HEX a n o n y m o u s */
+ "DATA 616e6f6e796d6f7573\r\n"
+ };
+ static const char sasl_auth_external[] = {
+ "\0AUTH EXTERNAL\r\n"
+ "DATA\r\n"
+ };
+ static const char sasl_negotiate_unix_fd[] = {
+ "NEGOTIATE_UNIX_FD\r\n"
+ };
+ static const char sasl_begin[] = {
+ "BEGIN\r\n"
+ };
+ size_t i = 0;
+
+ assert(b);
+
+ if (b->anonymous_auth)
+ b->auth_iovec[i++] = IOVEC_MAKE((char*) sasl_auth_anonymous, sizeof(sasl_auth_anonymous) - 1);
+ else
+ b->auth_iovec[i++] = IOVEC_MAKE((char*) sasl_auth_external, sizeof(sasl_auth_external) - 1);
+
+ if (b->accept_fd)
+ b->auth_iovec[i++] = IOVEC_MAKE_STRING(sasl_negotiate_unix_fd);
+
+ b->auth_iovec[i++] = IOVEC_MAKE_STRING(sasl_begin);
+
+ return bus_socket_write_auth(b);
+}
+
+int bus_socket_start_auth(sd_bus *b) {
+ assert(b);
+
+ bus_get_peercred(b);
+
+ bus_set_state(b, BUS_AUTHENTICATING);
+ b->auth_timeout = now(CLOCK_MONOTONIC) + BUS_AUTH_TIMEOUT;
+
+ if (sd_is_socket(b->input_fd, AF_UNIX, 0, 0) <= 0)
+ b->accept_fd = false;
+
+ if (b->output_fd != b->input_fd)
+ if (sd_is_socket(b->output_fd, AF_UNIX, 0, 0) <= 0)
+ b->accept_fd = false;
+
+ if (b->is_server)
+ return bus_socket_read_auth(b);
+ else
+ return bus_socket_start_auth_client(b);
+}
+
+static int bus_socket_inotify_setup(sd_bus *b) {
+ _cleanup_free_ int *new_watches = NULL;
+ _cleanup_free_ char *absolute = NULL;
+ size_t n_allocated = 0, n = 0, done = 0, i;
+ unsigned max_follow = 32;
+ const char *p;
+ int wd, r;
+
+ assert(b);
+ assert(b->watch_bind);
+ assert(b->sockaddr.sa.sa_family == AF_UNIX);
+ assert(b->sockaddr.un.sun_path[0] != 0);
+
+ /* Sets up an inotify fd in case watch_bind is enabled: wait until the configured AF_UNIX file system socket
+ * appears before connecting to it. The implemented is pretty simplistic: we just subscribe to relevant changes
+ * to all prefix components of the path, and every time we get an event for that we try to reconnect again,
+ * without actually caring what precisely the event we got told us. If we still can't connect we re-subscribe
+ * to all relevant changes of anything in the path, so that our watches include any possibly newly created path
+ * components. */
+
+ if (b->inotify_fd < 0) {
+ b->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (b->inotify_fd < 0)
+ return -errno;
+
+ b->inotify_fd = fd_move_above_stdio(b->inotify_fd);
+ }
+
+ /* Make sure the path is NUL terminated */
+ p = strndupa(b->sockaddr.un.sun_path, sizeof(b->sockaddr.un.sun_path));
+
+ /* Make sure the path is absolute */
+ r = path_make_absolute_cwd(p, &absolute);
+ if (r < 0)
+ goto fail;
+
+ /* Watch all parent directories, and don't mind any prefix that doesn't exist yet. For the innermost directory
+ * that exists we want to know when files are created or moved into it. For all parents of it we just care if
+ * they are removed or renamed. */
+
+ if (!GREEDY_REALLOC(new_watches, n_allocated, n + 1)) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ /* Start with the top-level directory, which is a bit simpler than the rest, since it can't be a symlink, and
+ * always exists */
+ wd = inotify_add_watch(b->inotify_fd, "/", IN_CREATE|IN_MOVED_TO);
+ if (wd < 0) {
+ r = log_debug_errno(errno, "Failed to add inotify watch on /: %m");
+ goto fail;
+ } else
+ new_watches[n++] = wd;
+
+ for (;;) {
+ _cleanup_free_ char *component = NULL, *prefix = NULL, *destination = NULL;
+ size_t n_slashes, n_component;
+ char *c = NULL;
+
+ n_slashes = strspn(absolute + done, "/");
+ n_component = n_slashes + strcspn(absolute + done + n_slashes, "/");
+
+ if (n_component == 0) /* The end */
+ break;
+
+ component = strndup(absolute + done, n_component);
+ if (!component) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ /* A trailing slash? That's a directory, and not a socket then */
+ if (path_equal(component, "/")) {
+ r = -EISDIR;
+ goto fail;
+ }
+
+ /* A single dot? Let's eat this up */
+ if (path_equal(component, "/.")) {
+ done += n_component;
+ continue;
+ }
+
+ prefix = strndup(absolute, done + n_component);
+ if (!prefix) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ if (!GREEDY_REALLOC(new_watches, n_allocated, n + 1)) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ wd = inotify_add_watch(b->inotify_fd, prefix, IN_DELETE_SELF|IN_MOVE_SELF|IN_ATTRIB|IN_CREATE|IN_MOVED_TO|IN_DONT_FOLLOW);
+ log_debug("Added inotify watch for %s on bus %s: %i", prefix, strna(b->description), wd);
+
+ if (wd < 0) {
+ if (IN_SET(errno, ENOENT, ELOOP))
+ break; /* This component doesn't exist yet, or the path contains a cyclic symlink right now */
+
+ r = log_debug_errno(errno, "Failed to add inotify watch on %s: %m", empty_to_root(prefix));
+ goto fail;
+ } else
+ new_watches[n++] = wd;
+
+ /* Check if this is possibly a symlink. If so, let's follow it and watch it too. */
+ r = readlink_malloc(prefix, &destination);
+ if (r == -EINVAL) { /* not a symlink */
+ done += n_component;
+ continue;
+ }
+ if (r < 0)
+ goto fail;
+
+ if (isempty(destination)) { /* Empty symlink target? Yuck! */
+ r = -EINVAL;
+ goto fail;
+ }
+
+ if (max_follow <= 0) { /* Let's make sure we don't follow symlinks forever */
+ r = -ELOOP;
+ goto fail;
+ }
+
+ if (path_is_absolute(destination)) {
+ /* For absolute symlinks we build the new path and start anew */
+ c = strjoin(destination, absolute + done + n_component);
+ done = 0;
+ } else {
+ _cleanup_free_ char *t = NULL;
+
+ /* For relative symlinks we replace the last component, and try again */
+ t = strndup(absolute, done);
+ if (!t)
+ return -ENOMEM;
+
+ c = strjoin(t, "/", destination, absolute + done + n_component);
+ }
+ if (!c) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ free(absolute);
+ absolute = c;
+
+ max_follow--;
+ }
+
+ /* And now, let's remove all watches from the previous iteration we don't need anymore */
+ for (i = 0; i < b->n_inotify_watches; i++) {
+ bool found = false;
+ size_t j;
+
+ for (j = 0; j < n; j++)
+ if (new_watches[j] == b->inotify_watches[i]) {
+ found = true;
+ break;
+ }
+
+ if (found)
+ continue;
+
+ (void) inotify_rm_watch(b->inotify_fd, b->inotify_watches[i]);
+ }
+
+ free_and_replace(b->inotify_watches, new_watches);
+ b->n_inotify_watches = n;
+
+ return 0;
+
+fail:
+ bus_close_inotify_fd(b);
+ return r;
+}
+
+int bus_socket_connect(sd_bus *b) {
+ bool inotify_done = false;
+ int r;
+
+ assert(b);
+
+ for (;;) {
+ assert(b->input_fd < 0);
+ assert(b->output_fd < 0);
+ assert(b->sockaddr.sa.sa_family != AF_UNSPEC);
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *pretty = NULL;
+ (void) sockaddr_pretty(&b->sockaddr.sa, b->sockaddr_size, false, true, &pretty);
+ log_debug("sd-bus: starting bus%s%s by connecting to %s...",
+ b->description ? " " : "", strempty(b->description), strnull(pretty));
+ }
+
+ b->input_fd = socket(b->sockaddr.sa.sa_family, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (b->input_fd < 0)
+ return -errno;
+
+ b->input_fd = fd_move_above_stdio(b->input_fd);
+
+ b->output_fd = b->input_fd;
+ bus_socket_setup(b);
+
+ if (connect(b->input_fd, &b->sockaddr.sa, b->sockaddr_size) < 0) {
+ if (errno == EINPROGRESS) {
+
+ /* If we have any inotify watches open, close them now, we don't need them anymore, as
+ * we have successfully initiated a connection */
+ bus_close_inotify_fd(b);
+
+ /* Note that very likely we are already in BUS_OPENING state here, as we enter it when
+ * we start parsing the address string. The only reason we set the state explicitly
+ * here, is to undo BUS_WATCH_BIND, in case we did the inotify magic. */
+ bus_set_state(b, BUS_OPENING);
+ return 1;
+ }
+
+ if (IN_SET(errno, ENOENT, ECONNREFUSED) && /* ENOENT → unix socket doesn't exist at all; ECONNREFUSED → unix socket stale */
+ b->watch_bind &&
+ b->sockaddr.sa.sa_family == AF_UNIX &&
+ b->sockaddr.un.sun_path[0] != 0) {
+
+ /* This connection attempt failed, let's release the socket for now, and start with a
+ * fresh one when reconnecting. */
+ bus_close_io_fds(b);
+
+ if (inotify_done) {
+ /* inotify set up already, don't do it again, just return now, and remember
+ * that we are waiting for inotify events now. */
+ bus_set_state(b, BUS_WATCH_BIND);
+ return 1;
+ }
+
+ /* This is a file system socket, and the inotify logic is enabled. Let's create the necessary inotify fd. */
+ r = bus_socket_inotify_setup(b);
+ if (r < 0)
+ return r;
+
+ /* Let's now try to connect a second time, because in theory there's otherwise a race
+ * here: the socket might have been created in the time between our first connect() and
+ * the time we set up the inotify logic. But let's remember that we set up inotify now,
+ * so that we don't do the connect() more than twice. */
+ inotify_done = true;
+
+ } else
+ return -errno;
+ } else
+ break;
+ }
+
+ /* Yay, established, we don't need no inotify anymore! */
+ bus_close_inotify_fd(b);
+
+ return bus_socket_start_auth(b);
+}
+
+int bus_socket_exec(sd_bus *b) {
+ int s[2], r;
+
+ assert(b);
+ assert(b->input_fd < 0);
+ assert(b->output_fd < 0);
+ assert(b->exec_path);
+ assert(b->busexec_pid == 0);
+
+ log_debug("sd-bus: starting bus%s%s with %s...",
+ b->description ? " " : "", strempty(b->description), b->exec_path);
+
+ r = socketpair(AF_UNIX, SOCK_STREAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, s);
+ if (r < 0)
+ return -errno;
+
+ r = safe_fork_full("(sd-busexec)", s+1, 1, FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS, &b->busexec_pid);
+ if (r < 0) {
+ safe_close_pair(s);
+ return r;
+ }
+ if (r == 0) {
+ /* Child */
+
+ if (rearrange_stdio(s[1], s[1], STDERR_FILENO) < 0)
+ _exit(EXIT_FAILURE);
+
+ (void) rlimit_nofile_safe();
+
+ if (b->exec_argv)
+ execvp(b->exec_path, b->exec_argv);
+ else {
+ const char *argv[] = { b->exec_path, NULL };
+ execvp(b->exec_path, (char**) argv);
+ }
+
+ _exit(EXIT_FAILURE);
+ }
+
+ safe_close(s[1]);
+ b->output_fd = b->input_fd = fd_move_above_stdio(s[0]);
+
+ bus_socket_setup(b);
+
+ return bus_socket_start_auth(b);
+}
+
+int bus_socket_take_fd(sd_bus *b) {
+ assert(b);
+
+ bus_socket_setup(b);
+
+ return bus_socket_start_auth(b);
+}
+
+int bus_socket_write_message(sd_bus *bus, sd_bus_message *m, size_t *idx) {
+ struct iovec *iov;
+ ssize_t k;
+ size_t n;
+ unsigned j;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(idx);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ if (*idx >= BUS_MESSAGE_SIZE(m))
+ return 0;
+
+ r = bus_message_setup_iovec(m);
+ if (r < 0)
+ return r;
+
+ n = m->n_iovec * sizeof(struct iovec);
+ iov = newa(struct iovec, n);
+ memcpy_safe(iov, m->iovec, n);
+
+ j = 0;
+ iovec_advance(iov, &j, *idx);
+
+ if (bus->prefer_writev)
+ k = writev(bus->output_fd, iov, m->n_iovec);
+ else {
+ struct msghdr mh = {
+ .msg_iov = iov,
+ .msg_iovlen = m->n_iovec,
+ };
+
+ if (m->n_fds > 0 && *idx == 0) {
+ struct cmsghdr *control;
+
+ mh.msg_controllen = CMSG_SPACE(sizeof(int) * m->n_fds);
+ mh.msg_control = alloca0(mh.msg_controllen);
+ control = CMSG_FIRSTHDR(&mh);
+ control->cmsg_len = CMSG_LEN(sizeof(int) * m->n_fds);
+ control->cmsg_level = SOL_SOCKET;
+ control->cmsg_type = SCM_RIGHTS;
+ memcpy(CMSG_DATA(control), m->fds, sizeof(int) * m->n_fds);
+ }
+
+ k = sendmsg(bus->output_fd, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
+ if (k < 0 && errno == ENOTSOCK) {
+ bus->prefer_writev = true;
+ k = writev(bus->output_fd, iov, m->n_iovec);
+ }
+ }
+
+ if (k < 0)
+ return errno == EAGAIN ? 0 : -errno;
+
+ *idx += (size_t) k;
+ return 1;
+}
+
+static int bus_socket_read_message_need(sd_bus *bus, size_t *need) {
+ uint32_t a, b;
+ uint8_t e;
+ uint64_t sum;
+
+ assert(bus);
+ assert(need);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ if (bus->rbuffer_size < sizeof(struct bus_header)) {
+ *need = sizeof(struct bus_header) + 8;
+
+ /* Minimum message size:
+ *
+ * Header +
+ *
+ * Method Call: +2 string headers
+ * Signal: +3 string headers
+ * Method Error: +1 string headers
+ * +1 uint32 headers
+ * Method Reply: +1 uint32 headers
+ *
+ * A string header is at least 9 bytes
+ * A uint32 header is at least 8 bytes
+ *
+ * Hence the minimum message size of a valid message
+ * is header + 8 bytes */
+
+ return 0;
+ }
+
+ a = ((const uint32_t*) bus->rbuffer)[1];
+ b = ((const uint32_t*) bus->rbuffer)[3];
+
+ e = ((const uint8_t*) bus->rbuffer)[0];
+ if (e == BUS_LITTLE_ENDIAN) {
+ a = le32toh(a);
+ b = le32toh(b);
+ } else if (e == BUS_BIG_ENDIAN) {
+ a = be32toh(a);
+ b = be32toh(b);
+ } else
+ return -EBADMSG;
+
+ sum = (uint64_t) sizeof(struct bus_header) + (uint64_t) ALIGN_TO(b, 8) + (uint64_t) a;
+ if (sum >= BUS_MESSAGE_SIZE_MAX)
+ return -ENOBUFS;
+
+ *need = (size_t) sum;
+ return 0;
+}
+
+static int bus_socket_make_message(sd_bus *bus, size_t size) {
+ sd_bus_message *t = NULL;
+ void *b;
+ int r;
+
+ assert(bus);
+ assert(bus->rbuffer_size >= size);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ r = bus_rqueue_make_room(bus);
+ if (r < 0)
+ return r;
+
+ if (bus->rbuffer_size > size) {
+ b = memdup((const uint8_t*) bus->rbuffer + size,
+ bus->rbuffer_size - size);
+ if (!b)
+ return -ENOMEM;
+ } else
+ b = NULL;
+
+ r = bus_message_from_malloc(bus,
+ bus->rbuffer, size,
+ bus->fds, bus->n_fds,
+ NULL,
+ &t);
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Received invalid message from connection %s, dropping.", strna(bus->description));
+ free(bus->rbuffer); /* We want to drop current rbuffer and proceed with whatever remains in b */
+ } else if (r < 0) {
+ free(b);
+ return r;
+ }
+
+ /* rbuffer ownership was either transferred to t, or we got EBADMSG and dropped it. */
+ bus->rbuffer = b;
+ bus->rbuffer_size -= size;
+
+ bus->fds = NULL;
+ bus->n_fds = 0;
+
+ if (t) {
+ t->read_counter = ++bus->read_counter;
+ bus->rqueue[bus->rqueue_size++] = bus_message_ref_queued(t, bus);
+ sd_bus_message_unref(t);
+ }
+
+ return 1;
+}
+
+int bus_socket_read_message(sd_bus *bus) {
+ struct msghdr mh;
+ struct iovec iov = {};
+ ssize_t k;
+ size_t need;
+ int r;
+ void *b;
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int) * BUS_FDS_MAX)) control;
+ bool handle_cmsg = false;
+
+ assert(bus);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ r = bus_socket_read_message_need(bus, &need);
+ if (r < 0)
+ return r;
+
+ if (bus->rbuffer_size >= need)
+ return bus_socket_make_message(bus, need);
+
+ b = realloc(bus->rbuffer, need);
+ if (!b)
+ return -ENOMEM;
+
+ bus->rbuffer = b;
+
+ iov = IOVEC_MAKE((uint8_t *)bus->rbuffer + bus->rbuffer_size, need - bus->rbuffer_size);
+
+ if (bus->prefer_readv) {
+ k = readv(bus->input_fd, &iov, 1);
+ if (k < 0)
+ k = -errno;
+ } else {
+ mh = (struct msghdr) {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+
+ k = recvmsg_safe(bus->input_fd, &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ if (k == -ENOTSOCK) {
+ bus->prefer_readv = true;
+ k = readv(bus->input_fd, &iov, 1);
+ if (k < 0)
+ k = -errno;
+ } else
+ handle_cmsg = true;
+ }
+ if (k == -EAGAIN)
+ return 0;
+ if (k < 0)
+ return (int) k;
+ if (k == 0) {
+ if (handle_cmsg)
+ cmsg_close_all(&mh); /* On EOF we shouldn't have gotten an fd, but let's make sure */
+ return -ECONNRESET;
+ }
+
+ bus->rbuffer_size += k;
+
+ if (handle_cmsg) {
+ struct cmsghdr *cmsg;
+
+ CMSG_FOREACH(cmsg, &mh)
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_RIGHTS) {
+ int n, *f, i;
+
+ n = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+
+ if (!bus->can_fds) {
+ /* Whut? We received fds but this
+ * isn't actually enabled? Close them,
+ * and fail */
+
+ close_many((int*) CMSG_DATA(cmsg), n);
+ return -EIO;
+ }
+
+ f = reallocarray(bus->fds, bus->n_fds + n, sizeof(int));
+ if (!f) {
+ close_many((int*) CMSG_DATA(cmsg), n);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < n; i++)
+ f[bus->n_fds++] = fd_move_above_stdio(((int*) CMSG_DATA(cmsg))[i]);
+ bus->fds = f;
+ } else
+ log_debug("Got unexpected auxiliary data with level=%d and type=%d",
+ cmsg->cmsg_level, cmsg->cmsg_type);
+ }
+
+ r = bus_socket_read_message_need(bus, &need);
+ if (r < 0)
+ return r;
+
+ if (bus->rbuffer_size >= need)
+ return bus_socket_make_message(bus, need);
+
+ return 1;
+}
+
+int bus_socket_process_opening(sd_bus *b) {
+ int error = 0, events, r;
+ socklen_t slen = sizeof(error);
+
+ assert(b->state == BUS_OPENING);
+
+ events = fd_wait_for_event(b->output_fd, POLLOUT, 0);
+ if (events < 0)
+ return events;
+ if (!(events & (POLLOUT|POLLERR|POLLHUP)))
+ return 0;
+
+ r = getsockopt(b->output_fd, SOL_SOCKET, SO_ERROR, &error, &slen);
+ if (r < 0)
+ b->last_connect_error = errno;
+ else if (error != 0)
+ b->last_connect_error = error;
+ else if (events & (POLLERR|POLLHUP))
+ b->last_connect_error = ECONNREFUSED;
+ else
+ return bus_socket_start_auth(b);
+
+ return bus_next_address(b);
+}
+
+int bus_socket_process_authenticating(sd_bus *b) {
+ int r;
+
+ assert(b);
+ assert(b->state == BUS_AUTHENTICATING);
+
+ if (now(CLOCK_MONOTONIC) >= b->auth_timeout)
+ return -ETIMEDOUT;
+
+ r = bus_socket_write_auth(b);
+ if (r != 0)
+ return r;
+
+ return bus_socket_read_auth(b);
+}
+
+int bus_socket_process_watch_bind(sd_bus *b) {
+ int r, q;
+
+ assert(b);
+ assert(b->state == BUS_WATCH_BIND);
+ assert(b->inotify_fd >= 0);
+
+ r = flush_fd(b->inotify_fd);
+ if (r <= 0)
+ return r;
+
+ log_debug("Got inotify event on bus %s.", strna(b->description));
+
+ /* We flushed events out of the inotify fd. In that case, maybe the socket is valid now? Let's try to connect
+ * to it again */
+
+ r = bus_socket_connect(b);
+ if (r < 0)
+ return r;
+
+ q = bus_attach_io_events(b);
+ if (q < 0)
+ return q;
+
+ q = bus_attach_inotify_event(b);
+ if (q < 0)
+ return q;
+
+ return r;
+}
diff --git a/src/libsystemd/sd-bus/bus-socket.h b/src/libsystemd/sd-bus/bus-socket.h
new file mode 100644
index 0000000..52bc404
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-socket.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+void bus_socket_setup(sd_bus *b);
+
+int bus_socket_connect(sd_bus *b);
+int bus_socket_exec(sd_bus *b);
+int bus_socket_take_fd(sd_bus *b);
+int bus_socket_start_auth(sd_bus *b);
+
+int bus_socket_write_message(sd_bus *bus, sd_bus_message *m, size_t *idx);
+int bus_socket_read_message(sd_bus *bus);
+
+int bus_socket_process_opening(sd_bus *b);
+int bus_socket_process_authenticating(sd_bus *b);
+int bus_socket_process_watch_bind(sd_bus *b);
+
+bool bus_socket_auth_needs_write(sd_bus *b);
diff --git a/src/libsystemd/sd-bus/bus-track.c b/src/libsystemd/sd-bus/bus-track.c
new file mode 100644
index 0000000..5f8716e
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-track.c
@@ -0,0 +1,493 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-track.h"
+#include "bus-util.h"
+#include "string-util.h"
+
+struct track_item {
+ unsigned n_ref;
+ char *name;
+ sd_bus_slot *slot;
+};
+
+struct sd_bus_track {
+ unsigned n_ref;
+ unsigned n_adding; /* are we in the process of adding a new name? */
+ sd_bus *bus;
+ sd_bus_track_handler_t handler;
+ void *userdata;
+ Hashmap *names;
+ LIST_FIELDS(sd_bus_track, queue);
+ Iterator iterator;
+ bool in_list:1; /* In bus->tracks? */
+ bool in_queue:1; /* In bus->track_queue? */
+ bool modified:1;
+ bool recursive:1;
+ sd_bus_destroy_t destroy_callback;
+
+ LIST_FIELDS(sd_bus_track, tracks);
+};
+
+#define MATCH_FOR_NAME(name) \
+ strjoina("type='signal'," \
+ "sender='org.freedesktop.DBus'," \
+ "path='/org/freedesktop/DBus'," \
+ "interface='org.freedesktop.DBus'," \
+ "member='NameOwnerChanged'," \
+ "arg0='", name, "'")
+
+static struct track_item* track_item_free(struct track_item *i) {
+
+ if (!i)
+ return NULL;
+
+ sd_bus_slot_unref(i->slot);
+ free(i->name);
+ return mfree(i);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct track_item*, track_item_free);
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(track_item_hash_ops, char, string_hash_func, string_compare_func,
+ struct track_item, track_item_free);
+
+static void bus_track_add_to_queue(sd_bus_track *track) {
+ assert(track);
+
+ /* Adds the bus track object to the queue of objects we should dispatch next, subject to a number of
+ * conditions. */
+
+ /* Already in the queue? */
+ if (track->in_queue)
+ return;
+
+ /* if we are currently in the process of adding a new name, then let's not enqueue this just yet, let's wait
+ * until the addition is complete. */
+ if (track->n_adding > 0)
+ return;
+
+ /* still referenced? */
+ if (hashmap_size(track->names) > 0)
+ return;
+
+ /* Nothing to call? */
+ if (!track->handler)
+ return;
+
+ /* Already closed? */
+ if (!track->in_list)
+ return;
+
+ LIST_PREPEND(queue, track->bus->track_queue, track);
+ track->in_queue = true;
+}
+
+static void bus_track_remove_from_queue(sd_bus_track *track) {
+ assert(track);
+
+ if (!track->in_queue)
+ return;
+
+ LIST_REMOVE(queue, track->bus->track_queue, track);
+ track->in_queue = false;
+}
+
+static int bus_track_remove_name_fully(sd_bus_track *track, const char *name) {
+ struct track_item *i;
+
+ assert(track);
+ assert(name);
+
+ i = hashmap_remove(track->names, name);
+ if (!i)
+ return 0;
+
+ track_item_free(i);
+
+ bus_track_add_to_queue(track);
+
+ track->modified = true;
+ return 1;
+}
+
+_public_ int sd_bus_track_new(
+ sd_bus *bus,
+ sd_bus_track **track,
+ sd_bus_track_handler_t handler,
+ void *userdata) {
+
+ sd_bus_track *t;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(track, -EINVAL);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ t = new0(sd_bus_track, 1);
+ if (!t)
+ return -ENOMEM;
+
+ t->n_ref = 1;
+ t->handler = handler;
+ t->userdata = userdata;
+ t->bus = sd_bus_ref(bus);
+
+ LIST_PREPEND(tracks, bus->tracks, t);
+ t->in_list = true;
+
+ bus_track_add_to_queue(t);
+
+ *track = t;
+ return 0;
+}
+
+static sd_bus_track *track_free(sd_bus_track *track) {
+ assert(track);
+
+ if (track->in_list)
+ LIST_REMOVE(tracks, track->bus->tracks, track);
+
+ bus_track_remove_from_queue(track);
+ track->names = hashmap_free(track->names);
+ track->bus = sd_bus_unref(track->bus);
+
+ if (track->destroy_callback)
+ track->destroy_callback(track->userdata);
+
+ return mfree(track);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_bus_track, sd_bus_track, track_free);
+
+static int on_name_owner_changed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ sd_bus_track *track = userdata;
+ const char *name, *old, *new;
+ int r;
+
+ assert(message);
+ assert(track);
+
+ r = sd_bus_message_read(message, "sss", &name, &old, &new);
+ if (r < 0)
+ return 0;
+
+ bus_track_remove_name_fully(track, name);
+ return 0;
+}
+
+_public_ int sd_bus_track_add_name(sd_bus_track *track, const char *name) {
+ _cleanup_(track_item_freep) struct track_item *n = NULL;
+ struct track_item *i;
+ const char *match;
+ int r;
+
+ assert_return(track, -EINVAL);
+ assert_return(service_name_is_valid(name), -EINVAL);
+
+ i = hashmap_get(track->names, name);
+ if (i) {
+ if (track->recursive) {
+ unsigned k = track->n_ref + 1;
+
+ if (k < track->n_ref) /* Check for overflow */
+ return -EOVERFLOW;
+
+ track->n_ref = k;
+ }
+
+ bus_track_remove_from_queue(track);
+ return 0;
+ }
+
+ r = hashmap_ensure_allocated(&track->names, &track_item_hash_ops);
+ if (r < 0)
+ return r;
+
+ n = new0(struct track_item, 1);
+ if (!n)
+ return -ENOMEM;
+ n->name = strdup(name);
+ if (!n->name)
+ return -ENOMEM;
+
+ /* First, subscribe to this name */
+ match = MATCH_FOR_NAME(name);
+
+ bus_track_remove_from_queue(track); /* don't dispatch this while we work in it */
+
+ r = sd_bus_add_match_async(track->bus, &n->slot, match, on_name_owner_changed, NULL, track);
+ if (r < 0) {
+ bus_track_add_to_queue(track);
+ return r;
+ }
+
+ r = hashmap_put(track->names, n->name, n);
+ if (r < 0) {
+ bus_track_add_to_queue(track);
+ return r;
+ }
+
+ /* Second, check if it is currently existing, or maybe doesn't, or maybe disappeared already. */
+ track->n_adding++; /* again, make sure this isn't dispatch while we are working in it */
+ r = sd_bus_get_name_creds(track->bus, name, 0, NULL);
+ track->n_adding--;
+ if (r < 0) {
+ hashmap_remove(track->names, name);
+ bus_track_add_to_queue(track);
+ return r;
+ }
+
+ n->n_ref = 1;
+ n = NULL;
+
+ bus_track_remove_from_queue(track);
+ track->modified = true;
+
+ return 1;
+}
+
+_public_ int sd_bus_track_remove_name(sd_bus_track *track, const char *name) {
+ struct track_item *i;
+
+ assert_return(name, -EINVAL);
+
+ if (!track) /* Treat a NULL track object as an empty track object */
+ return 0;
+
+ if (!track->recursive)
+ return bus_track_remove_name_fully(track, name);
+
+ i = hashmap_get(track->names, name);
+ if (!i)
+ return -EUNATCH;
+ if (i->n_ref <= 0)
+ return -EUNATCH;
+
+ i->n_ref--;
+
+ if (i->n_ref <= 0)
+ return bus_track_remove_name_fully(track, name);
+
+ return 1;
+}
+
+_public_ unsigned sd_bus_track_count(sd_bus_track *track) {
+
+ if (!track) /* Let's consider a NULL object equivalent to an empty object */
+ return 0;
+
+ /* This signature really should have returned an int, so that we can propagate errors. But well, ... Also, note
+ * that this returns the number of names being watched, and multiple references to the same name are not
+ * counted. */
+
+ return hashmap_size(track->names);
+}
+
+_public_ const char* sd_bus_track_contains(sd_bus_track *track, const char *name) {
+ assert_return(name, NULL);
+
+ if (!track) /* Let's consider a NULL object equivalent to an empty object */
+ return NULL;
+
+ return hashmap_get(track->names, (void*) name) ? name : NULL;
+}
+
+_public_ const char* sd_bus_track_first(sd_bus_track *track) {
+ const char *n = NULL;
+
+ if (!track)
+ return NULL;
+
+ track->modified = false;
+ track->iterator = ITERATOR_FIRST;
+
+ (void) hashmap_iterate(track->names, &track->iterator, NULL, (const void**) &n);
+ return n;
+}
+
+_public_ const char* sd_bus_track_next(sd_bus_track *track) {
+ const char *n = NULL;
+
+ if (!track)
+ return NULL;
+
+ if (track->modified)
+ return NULL;
+
+ (void) hashmap_iterate(track->names, &track->iterator, NULL, (const void**) &n);
+ return n;
+}
+
+_public_ int sd_bus_track_add_sender(sd_bus_track *track, sd_bus_message *m) {
+ const char *sender;
+
+ assert_return(track, -EINVAL);
+ assert_return(m, -EINVAL);
+
+ if (sd_bus_message_get_bus(m) != track->bus)
+ return -EINVAL;
+
+ sender = sd_bus_message_get_sender(m);
+ if (!sender)
+ return -EINVAL;
+
+ return sd_bus_track_add_name(track, sender);
+}
+
+_public_ int sd_bus_track_remove_sender(sd_bus_track *track, sd_bus_message *m) {
+ const char *sender;
+
+ assert_return(m, -EINVAL);
+
+ if (!track) /* Treat a NULL track object as an empty track object */
+ return 0;
+
+ if (sd_bus_message_get_bus(m) != track->bus)
+ return -EINVAL;
+
+ sender = sd_bus_message_get_sender(m);
+ if (!sender)
+ return -EINVAL;
+
+ return sd_bus_track_remove_name(track, sender);
+}
+
+_public_ sd_bus* sd_bus_track_get_bus(sd_bus_track *track) {
+ assert_return(track, NULL);
+
+ return track->bus;
+}
+
+void bus_track_dispatch(sd_bus_track *track) {
+ int r;
+
+ assert(track);
+ assert(track->handler);
+
+ bus_track_remove_from_queue(track);
+
+ sd_bus_track_ref(track);
+
+ r = track->handler(track, track->userdata);
+ if (r < 0)
+ log_debug_errno(r, "Failed to process track handler: %m");
+ else if (r == 0)
+ bus_track_add_to_queue(track);
+
+ sd_bus_track_unref(track);
+}
+
+void bus_track_close(sd_bus_track *track) {
+ assert(track);
+
+ /* Called whenever our bus connected is closed. If so, and our track object is non-empty, dispatch it
+ * immediately, as we are closing now, but first flush out all names. */
+
+ if (!track->in_list)
+ return; /* We already closed this one, don't close it again. */
+
+ /* Remember that this one is closed now */
+ LIST_REMOVE(tracks, track->bus->tracks, track);
+ track->in_list = false;
+
+ /* If there's no name in this one anyway, we don't have to dispatch */
+ if (hashmap_isempty(track->names))
+ return;
+
+ /* Let's flush out all names */
+ hashmap_clear(track->names);
+
+ /* Invoke handler */
+ if (track->handler)
+ bus_track_dispatch(track);
+}
+
+_public_ void *sd_bus_track_get_userdata(sd_bus_track *track) {
+ assert_return(track, NULL);
+
+ return track->userdata;
+}
+
+_public_ void *sd_bus_track_set_userdata(sd_bus_track *track, void *userdata) {
+ void *ret;
+
+ assert_return(track, NULL);
+
+ ret = track->userdata;
+ track->userdata = userdata;
+
+ return ret;
+}
+
+_public_ int sd_bus_track_set_destroy_callback(sd_bus_track *track, sd_bus_destroy_t callback) {
+ assert_return(track, -EINVAL);
+
+ track->destroy_callback = callback;
+ return 0;
+}
+
+_public_ int sd_bus_track_get_destroy_callback(sd_bus_track *track, sd_bus_destroy_t *ret) {
+ assert_return(track, -EINVAL);
+
+ if (ret)
+ *ret = track->destroy_callback;
+
+ return !!track->destroy_callback;
+}
+
+_public_ int sd_bus_track_set_recursive(sd_bus_track *track, int b) {
+ assert_return(track, -EINVAL);
+
+ if (track->recursive == !!b)
+ return 0;
+
+ if (!hashmap_isempty(track->names))
+ return -EBUSY;
+
+ track->recursive = b;
+ return 0;
+}
+
+_public_ int sd_bus_track_get_recursive(sd_bus_track *track) {
+ assert_return(track, -EINVAL);
+
+ return track->recursive;
+}
+
+_public_ int sd_bus_track_count_sender(sd_bus_track *track, sd_bus_message *m) {
+ const char *sender;
+
+ assert_return(m, -EINVAL);
+
+ if (!track) /* Let's consider a NULL object equivalent to an empty object */
+ return 0;
+
+ if (sd_bus_message_get_bus(m) != track->bus)
+ return -EINVAL;
+
+ sender = sd_bus_message_get_sender(m);
+ if (!sender)
+ return -EINVAL;
+
+ return sd_bus_track_count_name(track, sender);
+}
+
+_public_ int sd_bus_track_count_name(sd_bus_track *track, const char *name) {
+ struct track_item *i;
+
+ assert_return(service_name_is_valid(name), -EINVAL);
+
+ if (!track) /* Let's consider a NULL object equivalent to an empty object */
+ return 0;
+
+ i = hashmap_get(track->names, name);
+ if (!i)
+ return 0;
+
+ return i->n_ref;
+}
diff --git a/src/libsystemd/sd-bus/bus-track.h b/src/libsystemd/sd-bus/bus-track.h
new file mode 100644
index 0000000..8dae1f3
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-track.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+void bus_track_dispatch(sd_bus_track *track);
+void bus_track_close(sd_bus_track *track);
diff --git a/src/libsystemd/sd-bus/bus-type.c b/src/libsystemd/sd-bus/bus-type.c
new file mode 100644
index 0000000..6a0f53d
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-type.c
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "sd-bus.h"
+
+#include "bus-internal.h"
+#include "bus-type.h"
+
+bool bus_type_is_valid(char c) {
+ static const char valid[] = {
+ SD_BUS_TYPE_BYTE,
+ SD_BUS_TYPE_BOOLEAN,
+ SD_BUS_TYPE_INT16,
+ SD_BUS_TYPE_UINT16,
+ SD_BUS_TYPE_INT32,
+ SD_BUS_TYPE_UINT32,
+ SD_BUS_TYPE_INT64,
+ SD_BUS_TYPE_UINT64,
+ SD_BUS_TYPE_DOUBLE,
+ SD_BUS_TYPE_STRING,
+ SD_BUS_TYPE_OBJECT_PATH,
+ SD_BUS_TYPE_SIGNATURE,
+ SD_BUS_TYPE_ARRAY,
+ SD_BUS_TYPE_VARIANT,
+ SD_BUS_TYPE_STRUCT,
+ SD_BUS_TYPE_DICT_ENTRY,
+ SD_BUS_TYPE_UNIX_FD
+ };
+
+ return !!memchr(valid, c, sizeof(valid));
+}
+
+bool bus_type_is_basic(char c) {
+ static const char valid[] = {
+ SD_BUS_TYPE_BYTE,
+ SD_BUS_TYPE_BOOLEAN,
+ SD_BUS_TYPE_INT16,
+ SD_BUS_TYPE_UINT16,
+ SD_BUS_TYPE_INT32,
+ SD_BUS_TYPE_UINT32,
+ SD_BUS_TYPE_INT64,
+ SD_BUS_TYPE_UINT64,
+ SD_BUS_TYPE_DOUBLE,
+ SD_BUS_TYPE_STRING,
+ SD_BUS_TYPE_OBJECT_PATH,
+ SD_BUS_TYPE_SIGNATURE,
+ SD_BUS_TYPE_UNIX_FD
+ };
+
+ return !!memchr(valid, c, sizeof(valid));
+}
+
+bool bus_type_is_trivial(char c) {
+ static const char valid[] = {
+ SD_BUS_TYPE_BYTE,
+ SD_BUS_TYPE_BOOLEAN,
+ SD_BUS_TYPE_INT16,
+ SD_BUS_TYPE_UINT16,
+ SD_BUS_TYPE_INT32,
+ SD_BUS_TYPE_UINT32,
+ SD_BUS_TYPE_INT64,
+ SD_BUS_TYPE_UINT64,
+ SD_BUS_TYPE_DOUBLE
+ };
+
+ return !!memchr(valid, c, sizeof(valid));
+}
+
+bool bus_type_is_container(char c) {
+ static const char valid[] = {
+ SD_BUS_TYPE_ARRAY,
+ SD_BUS_TYPE_VARIANT,
+ SD_BUS_TYPE_STRUCT,
+ SD_BUS_TYPE_DICT_ENTRY
+ };
+
+ return !!memchr(valid, c, sizeof(valid));
+}
+
+int bus_type_get_alignment(char c) {
+
+ switch (c) {
+ case SD_BUS_TYPE_BYTE:
+ case SD_BUS_TYPE_SIGNATURE:
+ case SD_BUS_TYPE_VARIANT:
+ return 1;
+
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ return 2;
+
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_ARRAY:
+ case SD_BUS_TYPE_UNIX_FD:
+ return 4;
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ case SD_BUS_TYPE_STRUCT:
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN:
+ return 8;
+ }
+
+ return -EINVAL;
+}
+
+int bus_type_get_size(char c) {
+
+ switch (c) {
+ case SD_BUS_TYPE_BYTE:
+ return 1;
+
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ return 2;
+
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_UNIX_FD:
+ return 4;
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ return 8;
+ }
+
+ return -EINVAL;
+}
+
+_public_ int sd_bus_interface_name_is_valid(const char *p) {
+ assert_return(p, -EINVAL);
+
+ return interface_name_is_valid(p);
+}
+
+_public_ int sd_bus_service_name_is_valid(const char *p) {
+ assert_return(p, -EINVAL);
+
+ return service_name_is_valid(p);
+}
+
+_public_ int sd_bus_member_name_is_valid(const char *p) {
+ assert_return(p, -EINVAL);
+
+ return member_name_is_valid(p);
+}
+
+_public_ int sd_bus_object_path_is_valid(const char *p) {
+ assert_return(p, -EINVAL);
+
+ return object_path_is_valid(p);
+}
diff --git a/src/libsystemd/sd-bus/bus-type.h b/src/libsystemd/sd-bus/bus-type.h
new file mode 100644
index 0000000..490108a
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-type.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+bool bus_type_is_valid(char c) _const_;
+bool bus_type_is_basic(char c) _const_;
+/* "trivial" is systemd's term for what the D-Bus Specification calls
+ * a "fixed type": that is, a basic type of fixed length */
+bool bus_type_is_trivial(char c) _const_;
+bool bus_type_is_container(char c) _const_;
+
+int bus_type_get_alignment(char c) _const_;
+int bus_type_get_size(char c) _const_;
diff --git a/src/libsystemd/sd-bus/sd-bus.c b/src/libsystemd/sd-bus/sd-bus.c
new file mode 100644
index 0000000..b8d4dc8
--- /dev/null
+++ b/src/libsystemd/sd-bus/sd-bus.c
@@ -0,0 +1,4252 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <endian.h>
+#include <netdb.h>
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-container.h"
+#include "bus-control.h"
+#include "bus-internal.h"
+#include "bus-kernel.h"
+#include "bus-label.h"
+#include "bus-message.h"
+#include "bus-objects.h"
+#include "bus-protocol.h"
+#include "bus-slot.h"
+#include "bus-socket.h"
+#include "bus-track.h"
+#include "bus-type.h"
+#include "bus-util.h"
+#include "cgroup-util.h"
+#include "def.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "missing_syscall.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+#define log_debug_bus_message(m) \
+ do { \
+ sd_bus_message *_mm = (m); \
+ log_debug("Got message type=%s sender=%s destination=%s path=%s interface=%s member=%s cookie=%" PRIu64 " reply_cookie=%" PRIu64 " signature=%s error-name=%s error-message=%s", \
+ bus_message_type_to_string(_mm->header->type), \
+ strna(sd_bus_message_get_sender(_mm)), \
+ strna(sd_bus_message_get_destination(_mm)), \
+ strna(sd_bus_message_get_path(_mm)), \
+ strna(sd_bus_message_get_interface(_mm)), \
+ strna(sd_bus_message_get_member(_mm)), \
+ BUS_MESSAGE_COOKIE(_mm), \
+ _mm->reply_cookie, \
+ strna(_mm->root_container.signature), \
+ strna(_mm->error.name), \
+ strna(_mm->error.message)); \
+ } while (false)
+
+static int bus_poll(sd_bus *bus, bool need_more, uint64_t timeout_usec);
+static void bus_detach_io_events(sd_bus *b);
+static void bus_detach_inotify_event(sd_bus *b);
+
+static thread_local sd_bus *default_system_bus = NULL;
+static thread_local sd_bus *default_user_bus = NULL;
+static thread_local sd_bus *default_starter_bus = NULL;
+
+static sd_bus **bus_choose_default(int (**bus_open)(sd_bus **)) {
+ const char *e;
+
+ /* Let's try our best to reuse another cached connection. If
+ * the starter bus type is set, connect via our normal
+ * connection logic, ignoring $DBUS_STARTER_ADDRESS, so that
+ * we can share the connection with the user/system default
+ * bus. */
+
+ e = secure_getenv("DBUS_STARTER_BUS_TYPE");
+ if (e) {
+ if (streq(e, "system")) {
+ if (bus_open)
+ *bus_open = sd_bus_open_system;
+ return &default_system_bus;
+ } else if (STR_IN_SET(e, "user", "session")) {
+ if (bus_open)
+ *bus_open = sd_bus_open_user;
+ return &default_user_bus;
+ }
+ }
+
+ /* No type is specified, so we have not other option than to
+ * use the starter address if it is set. */
+ e = secure_getenv("DBUS_STARTER_ADDRESS");
+ if (e) {
+ if (bus_open)
+ *bus_open = sd_bus_open;
+ return &default_starter_bus;
+ }
+
+ /* Finally, if nothing is set use the cached connection for
+ * the right scope */
+
+ if (cg_pid_get_owner_uid(0, NULL) >= 0) {
+ if (bus_open)
+ *bus_open = sd_bus_open_user;
+ return &default_user_bus;
+ } else {
+ if (bus_open)
+ *bus_open = sd_bus_open_system;
+ return &default_system_bus;
+ }
+}
+
+sd_bus *bus_resolve(sd_bus *bus) {
+ switch ((uintptr_t) bus) {
+ case (uintptr_t) SD_BUS_DEFAULT:
+ return *(bus_choose_default(NULL));
+ case (uintptr_t) SD_BUS_DEFAULT_USER:
+ return default_user_bus;
+ case (uintptr_t) SD_BUS_DEFAULT_SYSTEM:
+ return default_system_bus;
+ default:
+ return bus;
+ }
+}
+
+void bus_close_io_fds(sd_bus *b) {
+ assert(b);
+
+ bus_detach_io_events(b);
+
+ if (b->input_fd != b->output_fd)
+ safe_close(b->output_fd);
+ b->output_fd = b->input_fd = safe_close(b->input_fd);
+}
+
+void bus_close_inotify_fd(sd_bus *b) {
+ assert(b);
+
+ bus_detach_inotify_event(b);
+
+ b->inotify_fd = safe_close(b->inotify_fd);
+ b->inotify_watches = mfree(b->inotify_watches);
+ b->n_inotify_watches = 0;
+}
+
+static void bus_reset_queues(sd_bus *b) {
+ assert(b);
+
+ while (b->rqueue_size > 0)
+ bus_message_unref_queued(b->rqueue[--b->rqueue_size], b);
+
+ b->rqueue = mfree(b->rqueue);
+ b->rqueue_allocated = 0;
+
+ while (b->wqueue_size > 0)
+ bus_message_unref_queued(b->wqueue[--b->wqueue_size], b);
+
+ b->wqueue = mfree(b->wqueue);
+ b->wqueue_allocated = 0;
+}
+
+static sd_bus* bus_free(sd_bus *b) {
+ sd_bus_slot *s;
+
+ assert(b);
+ assert(!b->track_queue);
+ assert(!b->tracks);
+
+ b->state = BUS_CLOSED;
+
+ sd_bus_detach_event(b);
+
+ while ((s = b->slots)) {
+ /* At this point only floating slots can still be
+ * around, because the non-floating ones keep a
+ * reference to the bus, and we thus couldn't be
+ * destructing right now... We forcibly disconnect the
+ * slots here, so that they still can be referenced by
+ * apps, but are dead. */
+
+ assert(s->floating);
+ bus_slot_disconnect(s, true);
+ }
+
+ if (b->default_bus_ptr)
+ *b->default_bus_ptr = NULL;
+
+ bus_close_io_fds(b);
+ bus_close_inotify_fd(b);
+
+ free(b->label);
+ free(b->groups);
+ free(b->rbuffer);
+ free(b->unique_name);
+ free(b->auth_buffer);
+ free(b->address);
+ free(b->machine);
+ free(b->description);
+ free(b->patch_sender);
+
+ free(b->exec_path);
+ strv_free(b->exec_argv);
+
+ close_many(b->fds, b->n_fds);
+ free(b->fds);
+
+ bus_reset_queues(b);
+
+ ordered_hashmap_free_free(b->reply_callbacks);
+ prioq_free(b->reply_callbacks_prioq);
+
+ assert(b->match_callbacks.type == BUS_MATCH_ROOT);
+ bus_match_free(&b->match_callbacks);
+
+ hashmap_free_free(b->vtable_methods);
+ hashmap_free_free(b->vtable_properties);
+
+ assert(hashmap_isempty(b->nodes));
+ hashmap_free(b->nodes);
+
+ bus_flush_memfd(b);
+
+ assert_se(pthread_mutex_destroy(&b->memfd_cache_mutex) == 0);
+
+ return mfree(b);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(sd_bus*, bus_free);
+
+_public_ int sd_bus_new(sd_bus **ret) {
+ _cleanup_free_ sd_bus *b = NULL;
+
+ assert_return(ret, -EINVAL);
+
+ b = new(sd_bus, 1);
+ if (!b)
+ return -ENOMEM;
+
+ *b = (sd_bus) {
+ .n_ref = 1,
+ .input_fd = -1,
+ .output_fd = -1,
+ .inotify_fd = -1,
+ .message_version = 1,
+ .creds_mask = SD_BUS_CREDS_WELL_KNOWN_NAMES|SD_BUS_CREDS_UNIQUE_NAME,
+ .accept_fd = true,
+ .original_pid = getpid_cached(),
+ .n_groups = (size_t) -1,
+ .close_on_exit = true,
+ };
+
+ /* We guarantee that wqueue always has space for at least one entry */
+ if (!GREEDY_REALLOC(b->wqueue, b->wqueue_allocated, 1))
+ return -ENOMEM;
+
+ assert_se(pthread_mutex_init(&b->memfd_cache_mutex, NULL) == 0);
+
+ *ret = TAKE_PTR(b);
+ return 0;
+}
+
+_public_ int sd_bus_set_address(sd_bus *bus, const char *address) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(address, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return free_and_strdup(&bus->address, address);
+}
+
+_public_ int sd_bus_set_fd(sd_bus *bus, int input_fd, int output_fd) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(input_fd >= 0, -EBADF);
+ assert_return(output_fd >= 0, -EBADF);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->input_fd = input_fd;
+ bus->output_fd = output_fd;
+ return 0;
+}
+
+_public_ int sd_bus_set_exec(sd_bus *bus, const char *path, char *const *argv) {
+ _cleanup_strv_free_ char **a = NULL;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(path, -EINVAL);
+ assert_return(!strv_isempty(argv), -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ a = strv_copy(argv);
+ if (!a)
+ return -ENOMEM;
+
+ r = free_and_strdup(&bus->exec_path, path);
+ if (r < 0)
+ return r;
+
+ return strv_free_and_replace(bus->exec_argv, a);
+}
+
+_public_ int sd_bus_set_bus_client(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus->patch_sender, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->bus_client = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_set_monitor(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->is_monitor = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_negotiate_fds(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->accept_fd = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_negotiate_timestamp(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!IN_SET(bus->state, BUS_CLOSING, BUS_CLOSED), -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ /* This is not actually supported by any of our transports these days, but we do honour it for synthetic
+ * replies, and maybe one day classic D-Bus learns this too */
+ bus->attach_timestamp = !!b;
+
+ return 0;
+}
+
+_public_ int sd_bus_negotiate_creds(sd_bus *bus, int b, uint64_t mask) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(mask <= _SD_BUS_CREDS_ALL, -EINVAL);
+ assert_return(!IN_SET(bus->state, BUS_CLOSING, BUS_CLOSED), -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ SET_FLAG(bus->creds_mask, mask, b);
+
+ /* The well knowns we need unconditionally, so that matches can work */
+ bus->creds_mask |= SD_BUS_CREDS_WELL_KNOWN_NAMES|SD_BUS_CREDS_UNIQUE_NAME;
+
+ return 0;
+}
+
+_public_ int sd_bus_set_server(sd_bus *bus, int b, sd_id128_t server_id) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(b || sd_id128_equal(server_id, SD_ID128_NULL), -EINVAL);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->is_server = !!b;
+ bus->server_id = server_id;
+ return 0;
+}
+
+_public_ int sd_bus_set_anonymous(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->anonymous_auth = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_set_trusted(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->trusted = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_set_description(sd_bus *bus, const char *description) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return free_and_strdup(&bus->description, description);
+}
+
+_public_ int sd_bus_set_allow_interactive_authorization(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->allow_interactive_authorization = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_get_allow_interactive_authorization(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return bus->allow_interactive_authorization;
+}
+
+_public_ int sd_bus_set_watch_bind(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->watch_bind = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_get_watch_bind(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return bus->watch_bind;
+}
+
+_public_ int sd_bus_set_connected_signal(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus->connected_signal = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_get_connected_signal(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return bus->connected_signal;
+}
+
+static int synthesize_connected_signal(sd_bus *bus) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert(bus);
+
+ /* If enabled, synthesizes a local "Connected" signal mirroring the local "Disconnected" signal. This is called
+ * whenever we fully established a connection, i.e. after the authorization phase, and after receiving the
+ * Hello() reply. Or in other words, whenever we enter BUS_RUNNING state.
+ *
+ * This is useful so that clients can start doing stuff whenever the connection is fully established in a way
+ * that works independently from whether we connected to a full bus or just a direct connection. */
+
+ if (!bus->connected_signal)
+ return 0;
+
+ r = sd_bus_message_new_signal(
+ bus,
+ &m,
+ "/org/freedesktop/DBus/Local",
+ "org.freedesktop.DBus.Local",
+ "Connected");
+ if (r < 0)
+ return r;
+
+ bus_message_set_sender_local(bus, m);
+ m->read_counter = ++bus->read_counter;
+
+ r = bus_seal_synthetic_message(bus, m);
+ if (r < 0)
+ return r;
+
+ r = bus_rqueue_make_room(bus);
+ if (r < 0)
+ return r;
+
+ /* Insert at the very front */
+ memmove(bus->rqueue + 1, bus->rqueue, sizeof(sd_bus_message*) * bus->rqueue_size);
+ bus->rqueue[0] = bus_message_ref_queued(m, bus);
+ bus->rqueue_size++;
+
+ return 0;
+}
+
+void bus_set_state(sd_bus *bus, enum bus_state state) {
+ static const char * const table[_BUS_STATE_MAX] = {
+ [BUS_UNSET] = "UNSET",
+ [BUS_WATCH_BIND] = "WATCH_BIND",
+ [BUS_OPENING] = "OPENING",
+ [BUS_AUTHENTICATING] = "AUTHENTICATING",
+ [BUS_HELLO] = "HELLO",
+ [BUS_RUNNING] = "RUNNING",
+ [BUS_CLOSING] = "CLOSING",
+ [BUS_CLOSED] = "CLOSED",
+ };
+
+ assert(bus);
+ assert(state < _BUS_STATE_MAX);
+
+ if (state == bus->state)
+ return;
+
+ log_debug("Bus %s: changing state %s → %s", strna(bus->description), table[bus->state], table[state]);
+ bus->state = state;
+}
+
+static int hello_callback(sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ const char *s;
+ sd_bus *bus;
+ int r;
+
+ assert(reply);
+ bus = reply->bus;
+ assert(bus);
+ assert(IN_SET(bus->state, BUS_HELLO, BUS_CLOSING));
+
+ r = sd_bus_message_get_errno(reply);
+ if (r > 0) {
+ r = -r;
+ goto fail;
+ }
+
+ r = sd_bus_message_read(reply, "s", &s);
+ if (r < 0)
+ goto fail;
+
+ if (!service_name_is_valid(s) || s[0] != ':') {
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ r = free_and_strdup(&bus->unique_name, s);
+ if (r < 0)
+ goto fail;
+
+ if (bus->state == BUS_HELLO) {
+ bus_set_state(bus, BUS_RUNNING);
+
+ r = synthesize_connected_signal(bus);
+ if (r < 0)
+ goto fail;
+ }
+
+ return 1;
+
+fail:
+ /* When Hello() failed, let's propagate this in two ways: first we return the error immediately here,
+ * which is the propagated up towards the event loop. Let's also invalidate the connection, so that
+ * if the user then calls back into us again we won't wait any longer. */
+
+ bus_set_state(bus, BUS_CLOSING);
+ return r;
+}
+
+static int bus_send_hello(sd_bus *bus) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert(bus);
+
+ if (!bus->bus_client)
+ return 0;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "Hello");
+ if (r < 0)
+ return r;
+
+ return sd_bus_call_async(bus, NULL, m, hello_callback, NULL, 0);
+}
+
+int bus_start_running(sd_bus *bus) {
+ struct reply_callback *c;
+ usec_t n;
+ int r;
+
+ assert(bus);
+ assert(bus->state < BUS_HELLO);
+
+ /* We start all method call timeouts when we enter BUS_HELLO or BUS_RUNNING mode. At this point let's convert
+ * all relative to absolute timestamps. Note that we do not reshuffle the reply callback priority queue since
+ * adding a fixed value to all entries should not alter the internal order. */
+
+ n = now(CLOCK_MONOTONIC);
+ ORDERED_HASHMAP_FOREACH(c, bus->reply_callbacks) {
+ if (c->timeout_usec == 0)
+ continue;
+
+ c->timeout_usec = usec_add(n, c->timeout_usec);
+ }
+
+ if (bus->bus_client) {
+ bus_set_state(bus, BUS_HELLO);
+ return 1;
+ }
+
+ bus_set_state(bus, BUS_RUNNING);
+
+ r = synthesize_connected_signal(bus);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int parse_address_key(const char **p, const char *key, char **value) {
+ size_t l, n = 0, allocated = 0;
+ _cleanup_free_ char *r = NULL;
+ const char *a;
+
+ assert(p);
+ assert(*p);
+ assert(value);
+
+ if (key) {
+ l = strlen(key);
+ if (strncmp(*p, key, l) != 0)
+ return 0;
+
+ if ((*p)[l] != '=')
+ return 0;
+
+ if (*value)
+ return -EINVAL;
+
+ a = *p + l + 1;
+ } else
+ a = *p;
+
+ while (!IN_SET(*a, ';', ',', 0)) {
+ char c;
+
+ if (*a == '%') {
+ int x, y;
+
+ x = unhexchar(a[1]);
+ if (x < 0)
+ return x;
+
+ y = unhexchar(a[2]);
+ if (y < 0)
+ return y;
+
+ c = (char) ((x << 4) | y);
+ a += 3;
+ } else {
+ c = *a;
+ a++;
+ }
+
+ if (!GREEDY_REALLOC(r, allocated, n + 2))
+ return -ENOMEM;
+
+ r[n++] = c;
+ }
+
+ if (!r) {
+ r = strdup("");
+ if (!r)
+ return -ENOMEM;
+ } else
+ r[n] = 0;
+
+ if (*a == ',')
+ a++;
+
+ *p = a;
+
+ free_and_replace(*value, r);
+
+ return 1;
+}
+
+static void skip_address_key(const char **p) {
+ assert(p);
+ assert(*p);
+
+ *p += strcspn(*p, ",");
+
+ if (**p == ',')
+ (*p)++;
+}
+
+static int parse_unix_address(sd_bus *b, const char **p, char **guid) {
+ _cleanup_free_ char *path = NULL, *abstract = NULL;
+ size_t l;
+ int r;
+
+ assert(b);
+ assert(p);
+ assert(*p);
+ assert(guid);
+
+ while (!IN_SET(**p, 0, ';')) {
+ r = parse_address_key(p, "guid", guid);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "path", &path);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "abstract", &abstract);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ skip_address_key(p);
+ }
+
+ if (!path && !abstract)
+ return -EINVAL;
+
+ if (path && abstract)
+ return -EINVAL;
+
+ if (path) {
+ l = strlen(path);
+ if (l >= sizeof(b->sockaddr.un.sun_path)) /* We insist on NUL termination */
+ return -E2BIG;
+
+ b->sockaddr.un = (struct sockaddr_un) {
+ .sun_family = AF_UNIX,
+ };
+
+ memcpy(b->sockaddr.un.sun_path, path, l);
+ b->sockaddr_size = offsetof(struct sockaddr_un, sun_path) + l + 1;
+
+ } else {
+ assert(abstract);
+
+ l = strlen(abstract);
+ if (l >= sizeof(b->sockaddr.un.sun_path) - 1) /* We insist on NUL termination */
+ return -E2BIG;
+
+ b->sockaddr.un = (struct sockaddr_un) {
+ .sun_family = AF_UNIX,
+ };
+
+ memcpy(b->sockaddr.un.sun_path+1, abstract, l);
+ b->sockaddr_size = offsetof(struct sockaddr_un, sun_path) + 1 + l;
+ }
+
+ b->is_local = true;
+
+ return 0;
+}
+
+static int parse_tcp_address(sd_bus *b, const char **p, char **guid) {
+ _cleanup_free_ char *host = NULL, *port = NULL, *family = NULL;
+ int r;
+ struct addrinfo *result, hints = {
+ .ai_socktype = SOCK_STREAM,
+ };
+
+ assert(b);
+ assert(p);
+ assert(*p);
+ assert(guid);
+
+ while (!IN_SET(**p, 0, ';')) {
+ r = parse_address_key(p, "guid", guid);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "host", &host);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "port", &port);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "family", &family);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ skip_address_key(p);
+ }
+
+ if (!host || !port)
+ return -EINVAL;
+
+ if (family) {
+ if (streq(family, "ipv4"))
+ hints.ai_family = AF_INET;
+ else if (streq(family, "ipv6"))
+ hints.ai_family = AF_INET6;
+ else
+ return -EINVAL;
+ }
+
+ r = getaddrinfo(host, port, &hints, &result);
+ if (r == EAI_SYSTEM)
+ return -errno;
+ else if (r != 0)
+ return -EADDRNOTAVAIL;
+
+ memcpy(&b->sockaddr, result->ai_addr, result->ai_addrlen);
+ b->sockaddr_size = result->ai_addrlen;
+
+ freeaddrinfo(result);
+
+ b->is_local = false;
+
+ return 0;
+}
+
+static int parse_exec_address(sd_bus *b, const char **p, char **guid) {
+ char *path = NULL;
+ unsigned n_argv = 0, j;
+ char **argv = NULL;
+ size_t allocated = 0;
+ int r;
+
+ assert(b);
+ assert(p);
+ assert(*p);
+ assert(guid);
+
+ while (!IN_SET(**p, 0, ';')) {
+ r = parse_address_key(p, "guid", guid);
+ if (r < 0)
+ goto fail;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "path", &path);
+ if (r < 0)
+ goto fail;
+ else if (r > 0)
+ continue;
+
+ if (startswith(*p, "argv")) {
+ unsigned ul;
+
+ errno = 0;
+ ul = strtoul(*p + 4, (char**) p, 10);
+ if (errno > 0 || **p != '=' || ul > 256) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ (*p)++;
+
+ if (ul >= n_argv) {
+ if (!GREEDY_REALLOC0(argv, allocated, ul + 2)) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ n_argv = ul + 1;
+ }
+
+ r = parse_address_key(p, NULL, argv + ul);
+ if (r < 0)
+ goto fail;
+
+ continue;
+ }
+
+ skip_address_key(p);
+ }
+
+ if (!path) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ /* Make sure there are no holes in the array, with the
+ * exception of argv[0] */
+ for (j = 1; j < n_argv; j++)
+ if (!argv[j]) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ if (argv && argv[0] == NULL) {
+ argv[0] = strdup(path);
+ if (!argv[0]) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ b->exec_path = path;
+ b->exec_argv = argv;
+
+ b->is_local = false;
+
+ return 0;
+
+fail:
+ for (j = 0; j < n_argv; j++)
+ free(argv[j]);
+
+ free(argv);
+ free(path);
+ return r;
+}
+
+static int parse_container_unix_address(sd_bus *b, const char **p, char **guid) {
+ _cleanup_free_ char *machine = NULL, *pid = NULL;
+ int r;
+
+ assert(b);
+ assert(p);
+ assert(*p);
+ assert(guid);
+
+ while (!IN_SET(**p, 0, ';')) {
+ r = parse_address_key(p, "guid", guid);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "machine", &machine);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "pid", &pid);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ skip_address_key(p);
+ }
+
+ if (!machine == !pid)
+ return -EINVAL;
+
+ if (machine) {
+ if (!streq(machine, ".host") && !machine_name_is_valid(machine))
+ return -EINVAL;
+
+ free_and_replace(b->machine, machine);
+ } else
+ b->machine = mfree(b->machine);
+
+ if (pid) {
+ r = parse_pid(pid, &b->nspid);
+ if (r < 0)
+ return r;
+ } else
+ b->nspid = 0;
+
+ b->sockaddr.un = (struct sockaddr_un) {
+ .sun_family = AF_UNIX,
+ /* Note that we use the old /var/run prefix here, to increase compatibility with really old containers */
+ .sun_path = "/var/run/dbus/system_bus_socket",
+ };
+ b->sockaddr_size = SOCKADDR_UN_LEN(b->sockaddr.un);
+ b->is_local = false;
+
+ return 0;
+}
+
+static void bus_reset_parsed_address(sd_bus *b) {
+ assert(b);
+
+ zero(b->sockaddr);
+ b->sockaddr_size = 0;
+ b->exec_argv = strv_free(b->exec_argv);
+ b->exec_path = mfree(b->exec_path);
+ b->server_id = SD_ID128_NULL;
+ b->machine = mfree(b->machine);
+ b->nspid = 0;
+}
+
+static int bus_parse_next_address(sd_bus *b) {
+ _cleanup_free_ char *guid = NULL;
+ const char *a;
+ int r;
+
+ assert(b);
+
+ if (!b->address)
+ return 0;
+ if (b->address[b->address_index] == 0)
+ return 0;
+
+ bus_reset_parsed_address(b);
+
+ a = b->address + b->address_index;
+
+ while (*a != 0) {
+
+ if (*a == ';') {
+ a++;
+ continue;
+ }
+
+ if (startswith(a, "unix:")) {
+ a += 5;
+
+ r = parse_unix_address(b, &a, &guid);
+ if (r < 0)
+ return r;
+ break;
+
+ } else if (startswith(a, "tcp:")) {
+
+ a += 4;
+ r = parse_tcp_address(b, &a, &guid);
+ if (r < 0)
+ return r;
+
+ break;
+
+ } else if (startswith(a, "unixexec:")) {
+
+ a += 9;
+ r = parse_exec_address(b, &a, &guid);
+ if (r < 0)
+ return r;
+
+ break;
+
+ } else if (startswith(a, "x-machine-unix:")) {
+
+ a += 15;
+ r = parse_container_unix_address(b, &a, &guid);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ a = strchr(a, ';');
+ if (!a)
+ return 0;
+ }
+
+ if (guid) {
+ r = sd_id128_from_string(guid, &b->server_id);
+ if (r < 0)
+ return r;
+ }
+
+ b->address_index = a - b->address;
+ return 1;
+}
+
+static void bus_kill_exec(sd_bus *bus) {
+ if (pid_is_valid(bus->busexec_pid) > 0) {
+ sigterm_wait(bus->busexec_pid);
+ bus->busexec_pid = 0;
+ }
+}
+
+static int bus_start_address(sd_bus *b) {
+ int r;
+
+ assert(b);
+
+ for (;;) {
+ bus_close_io_fds(b);
+ bus_close_inotify_fd(b);
+
+ bus_kill_exec(b);
+
+ /* If you provide multiple different bus-addresses, we
+ * try all of them in order and use the first one that
+ * succeeds. */
+
+ if (b->exec_path)
+ r = bus_socket_exec(b);
+ else if ((b->nspid > 0 || b->machine) && b->sockaddr.sa.sa_family != AF_UNSPEC)
+ r = bus_container_connect_socket(b);
+ else if (b->sockaddr.sa.sa_family != AF_UNSPEC)
+ r = bus_socket_connect(b);
+ else
+ goto next;
+
+ if (r >= 0) {
+ int q;
+
+ q = bus_attach_io_events(b);
+ if (q < 0)
+ return q;
+
+ q = bus_attach_inotify_event(b);
+ if (q < 0)
+ return q;
+
+ return r;
+ }
+
+ b->last_connect_error = -r;
+
+ next:
+ r = bus_parse_next_address(b);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return b->last_connect_error > 0 ? -b->last_connect_error : -ECONNREFUSED;
+ }
+}
+
+int bus_next_address(sd_bus *b) {
+ assert(b);
+
+ bus_reset_parsed_address(b);
+ return bus_start_address(b);
+}
+
+static int bus_start_fd(sd_bus *b) {
+ struct stat st;
+ int r;
+
+ assert(b);
+ assert(b->input_fd >= 0);
+ assert(b->output_fd >= 0);
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *pi = NULL, *po = NULL;
+ (void) fd_get_path(b->input_fd, &pi);
+ (void) fd_get_path(b->output_fd, &po);
+ log_debug("sd-bus: starting bus%s%s on fds %d/%d (%s, %s)...",
+ b->description ? " " : "", strempty(b->description),
+ b->input_fd, b->output_fd,
+ pi ?: "???", po ?: "???");
+ }
+
+ r = fd_nonblock(b->input_fd, true);
+ if (r < 0)
+ return r;
+
+ r = fd_cloexec(b->input_fd, true);
+ if (r < 0)
+ return r;
+
+ if (b->input_fd != b->output_fd) {
+ r = fd_nonblock(b->output_fd, true);
+ if (r < 0)
+ return r;
+
+ r = fd_cloexec(b->output_fd, true);
+ if (r < 0)
+ return r;
+ }
+
+ if (fstat(b->input_fd, &st) < 0)
+ return -errno;
+
+ return bus_socket_take_fd(b);
+}
+
+_public_ int sd_bus_start(sd_bus *bus) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ bus_set_state(bus, BUS_OPENING);
+
+ if (bus->is_server && bus->bus_client)
+ return -EINVAL;
+
+ if (bus->input_fd >= 0)
+ r = bus_start_fd(bus);
+ else if (bus->address || bus->sockaddr.sa.sa_family != AF_UNSPEC || bus->exec_path || bus->machine)
+ r = bus_start_address(bus);
+ else
+ return -EINVAL;
+
+ if (r < 0) {
+ sd_bus_close(bus);
+ return r;
+ }
+
+ return bus_send_hello(bus);
+}
+
+_public_ int sd_bus_open_with_description(sd_bus **ret, const char *description) {
+ const char *e;
+ _cleanup_(bus_freep) sd_bus *b = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ /* Let's connect to the starter bus if it is set, and
+ * otherwise to the bus that is appropriate for the scope
+ * we are running in */
+
+ e = secure_getenv("DBUS_STARTER_BUS_TYPE");
+ if (e) {
+ if (streq(e, "system"))
+ return sd_bus_open_system_with_description(ret, description);
+ else if (STR_IN_SET(e, "session", "user"))
+ return sd_bus_open_user_with_description(ret, description);
+ }
+
+ e = secure_getenv("DBUS_STARTER_ADDRESS");
+ if (!e) {
+ if (cg_pid_get_owner_uid(0, NULL) >= 0)
+ return sd_bus_open_user_with_description(ret, description);
+ else
+ return sd_bus_open_system_with_description(ret, description);
+ }
+
+ r = sd_bus_new(&b);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_address(b, e);
+ if (r < 0)
+ return r;
+
+ b->bus_client = true;
+
+ /* We don't know whether the bus is trusted or not, so better
+ * be safe, and authenticate everything */
+ b->trusted = false;
+ b->is_local = false;
+ b->creds_mask |= SD_BUS_CREDS_UID | SD_BUS_CREDS_EUID | SD_BUS_CREDS_EFFECTIVE_CAPS;
+
+ r = sd_bus_start(b);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(b);
+ return 0;
+}
+
+_public_ int sd_bus_open(sd_bus **ret) {
+ return sd_bus_open_with_description(ret, NULL);
+}
+
+int bus_set_address_system(sd_bus *b) {
+ const char *e;
+ int r;
+
+ assert(b);
+
+ e = secure_getenv("DBUS_SYSTEM_BUS_ADDRESS");
+
+ r = sd_bus_set_address(b, e ?: DEFAULT_SYSTEM_BUS_ADDRESS);
+ if (r >= 0)
+ b->is_system = true;
+ return r;
+}
+
+_public_ int sd_bus_open_system_with_description(sd_bus **ret, const char *description) {
+ _cleanup_(bus_freep) sd_bus *b = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ r = sd_bus_new(&b);
+ if (r < 0)
+ return r;
+
+ if (description) {
+ r = sd_bus_set_description(b, description);
+ if (r < 0)
+ return r;
+ }
+
+ r = bus_set_address_system(b);
+ if (r < 0)
+ return r;
+
+ b->bus_client = true;
+
+ /* Let's do per-method access control on the system bus. We
+ * need the caller's UID and capability set for that. */
+ b->trusted = false;
+ b->creds_mask |= SD_BUS_CREDS_UID | SD_BUS_CREDS_EUID | SD_BUS_CREDS_EFFECTIVE_CAPS;
+ b->is_local = true;
+
+ r = sd_bus_start(b);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(b);
+ return 0;
+}
+
+_public_ int sd_bus_open_system(sd_bus **ret) {
+ return sd_bus_open_system_with_description(ret, NULL);
+}
+
+int bus_set_address_user(sd_bus *b) {
+ const char *a;
+ _cleanup_free_ char *_a = NULL;
+ int r;
+
+ assert(b);
+
+ a = secure_getenv("DBUS_SESSION_BUS_ADDRESS");
+ if (!a) {
+ const char *e;
+ _cleanup_free_ char *ee = NULL;
+
+ e = secure_getenv("XDG_RUNTIME_DIR");
+ if (!e)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
+ "sd-bus: $XDG_RUNTIME_DIR not set, cannot connect to user bus.");
+
+ ee = bus_address_escape(e);
+ if (!ee)
+ return -ENOMEM;
+
+ if (asprintf(&_a, DEFAULT_USER_BUS_ADDRESS_FMT, ee) < 0)
+ return -ENOMEM;
+ a = _a;
+ }
+
+ r = sd_bus_set_address(b, a);
+ if (r >= 0)
+ b->is_user = true;
+ return r;
+}
+
+_public_ int sd_bus_open_user_with_description(sd_bus **ret, const char *description) {
+ _cleanup_(bus_freep) sd_bus *b = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ r = sd_bus_new(&b);
+ if (r < 0)
+ return r;
+
+ if (description) {
+ r = sd_bus_set_description(b, description);
+ if (r < 0)
+ return r;
+ }
+
+ r = bus_set_address_user(b);
+ if (r < 0)
+ return r;
+
+ b->bus_client = true;
+
+ /* We don't do any per-method access control on the user bus. */
+ b->trusted = true;
+ b->is_local = true;
+
+ r = sd_bus_start(b);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(b);
+ return 0;
+}
+
+_public_ int sd_bus_open_user(sd_bus **ret) {
+ return sd_bus_open_user_with_description(ret, NULL);
+}
+
+int bus_set_address_system_remote(sd_bus *b, const char *host) {
+ _cleanup_free_ char *e = NULL;
+ char *m = NULL, *c = NULL, *a, *rbracket = NULL, *p = NULL;
+
+ assert(b);
+ assert(host);
+
+ /* Skip ":"s in ipv6 addresses */
+ if (*host == '[') {
+ char *t;
+
+ rbracket = strchr(host, ']');
+ if (!rbracket)
+ return -EINVAL;
+ t = strndupa(host + 1, rbracket - host - 1);
+ e = bus_address_escape(t);
+ if (!e)
+ return -ENOMEM;
+ } else if ((a = strchr(host, '@'))) {
+ if (*(a + 1) == '[') {
+ _cleanup_free_ char *t = NULL;
+
+ rbracket = strchr(a + 1, ']');
+ if (!rbracket)
+ return -EINVAL;
+ t = new0(char, strlen(host));
+ if (!t)
+ return -ENOMEM;
+ strncat(t, host, a - host + 1);
+ strncat(t, a + 2, rbracket - a - 2);
+ e = bus_address_escape(t);
+ if (!e)
+ return -ENOMEM;
+ } else if (*(a + 1) == '\0' || strchr(a + 1, '@'))
+ return -EINVAL;
+ }
+
+ /* Let's see if a port was given */
+ m = strchr(rbracket ? rbracket + 1 : host, ':');
+ if (m) {
+ char *t;
+ bool got_forward_slash = false;
+
+ p = m + 1;
+
+ t = strchr(p, '/');
+ if (t) {
+ p = strndupa(p, t - p);
+ got_forward_slash = true;
+ }
+
+ if (!in_charset(p, "0123456789") || *p == '\0') {
+ if (!machine_name_is_valid(p) || got_forward_slash)
+ return -EINVAL;
+
+ m = TAKE_PTR(p);
+ goto interpret_port_as_machine_old_syntax;
+ }
+ }
+
+ /* Let's see if a machine was given */
+ m = strchr(rbracket ? rbracket + 1 : host, '/');
+ if (m) {
+ m++;
+interpret_port_as_machine_old_syntax:
+ /* Let's make sure this is not a port of some kind,
+ * and is a valid machine name. */
+ if (!in_charset(m, "0123456789") && machine_name_is_valid(m))
+ c = strjoina(",argv", p ? "7" : "5", "=--machine=", m);
+ }
+
+ if (!e) {
+ char *t;
+
+ t = strndupa(host, strcspn(host, ":/"));
+
+ e = bus_address_escape(t);
+ if (!e)
+ return -ENOMEM;
+ }
+
+ a = strjoin("unixexec:path=ssh,argv1=-xT", p ? ",argv2=-p,argv3=" : "", strempty(p),
+ ",argv", p ? "4" : "2", "=--,argv", p ? "5" : "3", "=", e,
+ ",argv", p ? "6" : "4", "=systemd-stdio-bridge", c);
+ if (!a)
+ return -ENOMEM;
+
+ return free_and_replace(b->address, a);
+}
+
+_public_ int sd_bus_open_system_remote(sd_bus **ret, const char *host) {
+ _cleanup_(bus_freep) sd_bus *b = NULL;
+ int r;
+
+ assert_return(host, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = sd_bus_new(&b);
+ if (r < 0)
+ return r;
+
+ r = bus_set_address_system_remote(b, host);
+ if (r < 0)
+ return r;
+
+ b->bus_client = true;
+ b->trusted = false;
+ b->is_system = true;
+ b->is_local = false;
+
+ r = sd_bus_start(b);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(b);
+ return 0;
+}
+
+int bus_set_address_system_machine(sd_bus *b, const char *machine) {
+ _cleanup_free_ char *e = NULL;
+ char *a;
+
+ assert(b);
+ assert(machine);
+
+ e = bus_address_escape(machine);
+ if (!e)
+ return -ENOMEM;
+
+ a = strjoin("x-machine-unix:machine=", e);
+ if (!a)
+ return -ENOMEM;
+
+ return free_and_replace(b->address, a);
+}
+
+_public_ int sd_bus_open_system_machine(sd_bus **ret, const char *machine) {
+ _cleanup_(bus_freep) sd_bus *b = NULL;
+ int r;
+
+ assert_return(machine, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(streq(machine, ".host") || machine_name_is_valid(machine), -EINVAL);
+
+ r = sd_bus_new(&b);
+ if (r < 0)
+ return r;
+
+ r = bus_set_address_system_machine(b, machine);
+ if (r < 0)
+ return r;
+
+ b->bus_client = true;
+ b->trusted = false;
+ b->is_system = true;
+ b->is_local = false;
+
+ r = sd_bus_start(b);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(b);
+ return 0;
+}
+
+_public_ void sd_bus_close(sd_bus *bus) {
+ if (!bus)
+ return;
+ if (bus->state == BUS_CLOSED)
+ return;
+ if (bus_pid_changed(bus))
+ return;
+
+ /* Don't leave ssh hanging around */
+ bus_kill_exec(bus);
+
+ bus_set_state(bus, BUS_CLOSED);
+
+ sd_bus_detach_event(bus);
+
+ /* Drop all queued messages so that they drop references to
+ * the bus object and the bus may be freed */
+ bus_reset_queues(bus);
+
+ bus_close_io_fds(bus);
+ bus_close_inotify_fd(bus);
+}
+
+_public_ sd_bus *sd_bus_close_unref(sd_bus *bus) {
+ if (!bus)
+ return NULL;
+
+ sd_bus_close(bus);
+
+ return sd_bus_unref(bus);
+}
+
+_public_ sd_bus* sd_bus_flush_close_unref(sd_bus *bus) {
+ if (!bus)
+ return NULL;
+
+ /* Have to do this before flush() to prevent hang */
+ bus_kill_exec(bus);
+ sd_bus_flush(bus);
+
+ return sd_bus_close_unref(bus);
+}
+
+void bus_enter_closing(sd_bus *bus) {
+ assert(bus);
+
+ if (!IN_SET(bus->state, BUS_WATCH_BIND, BUS_OPENING, BUS_AUTHENTICATING, BUS_HELLO, BUS_RUNNING))
+ return;
+
+ bus_set_state(bus, BUS_CLOSING);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_bus, sd_bus, bus_free);
+
+_public_ int sd_bus_is_open(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return BUS_IS_OPEN(bus->state);
+}
+
+_public_ int sd_bus_is_ready(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return bus->state == BUS_RUNNING;
+}
+
+_public_ int sd_bus_can_send(sd_bus *bus, char type) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state != BUS_UNSET, -ENOTCONN);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (bus->is_monitor)
+ return 0;
+
+ if (type == SD_BUS_TYPE_UNIX_FD) {
+ if (!bus->accept_fd)
+ return 0;
+
+ r = bus_ensure_running(bus);
+ if (r < 0)
+ return r;
+
+ return bus->can_fds;
+ }
+
+ return bus_type_is_valid(type);
+}
+
+_public_ int sd_bus_get_bus_id(sd_bus *bus, sd_id128_t *id) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(id, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ r = bus_ensure_running(bus);
+ if (r < 0)
+ return r;
+
+ *id = bus->server_id;
+ return 0;
+}
+
+#define COOKIE_CYCLED (UINT32_C(1) << 31)
+
+static uint64_t cookie_inc(uint64_t cookie) {
+
+ /* Stay within the 32bit range, since classic D-Bus can't deal with more */
+ if (cookie >= UINT32_MAX)
+ return COOKIE_CYCLED; /* Don't go back to zero, but use the highest bit for checking
+ * whether we are looping. */
+
+ return cookie + 1;
+}
+
+static int next_cookie(sd_bus *b) {
+ uint64_t new_cookie;
+
+ assert(b);
+
+ new_cookie = cookie_inc(b->cookie);
+
+ /* Small optimization: don't bother with checking for cookie reuse until we overran cookiespace at
+ * least once, but then do it thorougly. */
+ if (FLAGS_SET(new_cookie, COOKIE_CYCLED)) {
+ uint32_t i;
+
+ /* Check if the cookie is currently in use. If so, pick the next one */
+ for (i = 0; i < COOKIE_CYCLED; i++) {
+ if (!ordered_hashmap_contains(b->reply_callbacks, &new_cookie))
+ goto good;
+
+ new_cookie = cookie_inc(new_cookie);
+ }
+
+ /* Can't fulfill request */
+ return -EBUSY;
+ }
+
+good:
+ b->cookie = new_cookie;
+ return 0;
+}
+
+static int bus_seal_message(sd_bus *b, sd_bus_message *m, usec_t timeout) {
+ int r;
+
+ assert(b);
+ assert(m);
+
+ if (m->sealed) {
+ /* If we copy the same message to multiple
+ * destinations, avoid using the same cookie
+ * numbers. */
+ b->cookie = MAX(b->cookie, BUS_MESSAGE_COOKIE(m));
+ return 0;
+ }
+
+ if (timeout == 0) {
+ r = sd_bus_get_method_call_timeout(b, &timeout);
+ if (r < 0)
+ return r;
+ }
+
+ if (!m->sender && b->patch_sender) {
+ r = sd_bus_message_set_sender(m, b->patch_sender);
+ if (r < 0)
+ return r;
+ }
+
+ r = next_cookie(b);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_seal(m, b->cookie, timeout);
+}
+
+static int bus_remarshal_message(sd_bus *b, sd_bus_message **m) {
+ bool remarshal = false;
+
+ assert(b);
+
+ /* wrong packet version */
+ if (b->message_version != 0 && b->message_version != (*m)->header->version)
+ remarshal = true;
+
+ /* wrong packet endianness */
+ if (b->message_endian != 0 && b->message_endian != (*m)->header->endian)
+ remarshal = true;
+
+ return remarshal ? bus_message_remarshal(b, m) : 0;
+}
+
+int bus_seal_synthetic_message(sd_bus *b, sd_bus_message *m) {
+ assert(b);
+ assert(m);
+
+ /* Fake some timestamps, if they were requested, and not
+ * already initialized */
+ if (b->attach_timestamp) {
+ if (m->realtime <= 0)
+ m->realtime = now(CLOCK_REALTIME);
+
+ if (m->monotonic <= 0)
+ m->monotonic = now(CLOCK_MONOTONIC);
+ }
+
+ /* The bus specification says the serial number cannot be 0,
+ * hence let's fill something in for synthetic messages. Since
+ * synthetic messages might have a fake sender and we don't
+ * want to interfere with the real sender's serial numbers we
+ * pick a fixed, artificial one. We use (uint32_t) -1 rather
+ * than (uint64_t) -1 since dbus1 only had 32bit identifiers,
+ * even though kdbus can do 64bit. */
+ return sd_bus_message_seal(m, 0xFFFFFFFFULL, 0);
+}
+
+static int bus_write_message(sd_bus *bus, sd_bus_message *m, size_t *idx) {
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ r = bus_socket_write_message(bus, m, idx);
+ if (r <= 0)
+ return r;
+
+ if (*idx >= BUS_MESSAGE_SIZE(m))
+ log_debug("Sent message type=%s sender=%s destination=%s path=%s interface=%s member=%s cookie=%" PRIu64 " reply_cookie=%" PRIu64 " signature=%s error-name=%s error-message=%s",
+ bus_message_type_to_string(m->header->type),
+ strna(sd_bus_message_get_sender(m)),
+ strna(sd_bus_message_get_destination(m)),
+ strna(sd_bus_message_get_path(m)),
+ strna(sd_bus_message_get_interface(m)),
+ strna(sd_bus_message_get_member(m)),
+ BUS_MESSAGE_COOKIE(m),
+ m->reply_cookie,
+ strna(m->root_container.signature),
+ strna(m->error.name),
+ strna(m->error.message));
+
+ return r;
+}
+
+static int dispatch_wqueue(sd_bus *bus) {
+ int r, ret = 0;
+
+ assert(bus);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ while (bus->wqueue_size > 0) {
+
+ r = bus_write_message(bus, bus->wqueue[0], &bus->windex);
+ if (r < 0)
+ return r;
+ else if (r == 0)
+ /* Didn't do anything this time */
+ return ret;
+ else if (bus->windex >= BUS_MESSAGE_SIZE(bus->wqueue[0])) {
+ /* Fully written. Let's drop the entry from
+ * the queue.
+ *
+ * This isn't particularly optimized, but
+ * well, this is supposed to be our worst-case
+ * buffer only, and the socket buffer is
+ * supposed to be our primary buffer, and if
+ * it got full, then all bets are off
+ * anyway. */
+
+ bus->wqueue_size--;
+ bus_message_unref_queued(bus->wqueue[0], bus);
+ memmove(bus->wqueue, bus->wqueue + 1, sizeof(sd_bus_message*) * bus->wqueue_size);
+ bus->windex = 0;
+
+ ret = 1;
+ }
+ }
+
+ return ret;
+}
+
+static int bus_read_message(sd_bus *bus) {
+ assert(bus);
+
+ return bus_socket_read_message(bus);
+}
+
+int bus_rqueue_make_room(sd_bus *bus) {
+ assert(bus);
+
+ if (bus->rqueue_size >= BUS_RQUEUE_MAX)
+ return -ENOBUFS;
+
+ if (!GREEDY_REALLOC(bus->rqueue, bus->rqueue_allocated, bus->rqueue_size + 1))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void rqueue_drop_one(sd_bus *bus, size_t i) {
+ assert(bus);
+ assert(i < bus->rqueue_size);
+
+ bus_message_unref_queued(bus->rqueue[i], bus);
+ memmove(bus->rqueue + i, bus->rqueue + i + 1, sizeof(sd_bus_message*) * (bus->rqueue_size - i - 1));
+ bus->rqueue_size--;
+}
+
+static int dispatch_rqueue(sd_bus *bus, sd_bus_message **m) {
+ int r, ret = 0;
+
+ assert(bus);
+ assert(m);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ for (;;) {
+ if (bus->rqueue_size > 0) {
+ /* Dispatch a queued message */
+ *m = sd_bus_message_ref(bus->rqueue[0]);
+ rqueue_drop_one(bus, 0);
+ return 1;
+ }
+
+ /* Try to read a new message */
+ r = bus_read_message(bus);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ *m = NULL;
+ return ret;
+ }
+
+ ret = 1;
+ }
+}
+
+_public_ int sd_bus_send(sd_bus *bus, sd_bus_message *_m, uint64_t *cookie) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = sd_bus_message_ref(_m);
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ if (bus)
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ else
+ assert_return(bus = m->bus, -ENOTCONN);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (m->n_fds > 0) {
+ r = sd_bus_can_send(bus, SD_BUS_TYPE_UNIX_FD);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EOPNOTSUPP;
+ }
+
+ /* If the cookie number isn't kept, then we know that no reply
+ * is expected */
+ if (!cookie && !m->sealed)
+ m->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED;
+
+ r = bus_seal_message(bus, m, 0);
+ if (r < 0)
+ return r;
+
+ /* Remarshall if we have to. This will possibly unref the
+ * message and place a replacement in m */
+ r = bus_remarshal_message(bus, &m);
+ if (r < 0)
+ return r;
+
+ /* If this is a reply and no reply was requested, then let's
+ * suppress this, if we can */
+ if (m->dont_send)
+ goto finish;
+
+ if (IN_SET(bus->state, BUS_RUNNING, BUS_HELLO) && bus->wqueue_size <= 0) {
+ size_t idx = 0;
+
+ r = bus_write_message(bus, m, &idx);
+ if (r < 0) {
+ if (ERRNO_IS_DISCONNECT(r)) {
+ bus_enter_closing(bus);
+ return -ECONNRESET;
+ }
+
+ return r;
+ }
+
+ if (idx < BUS_MESSAGE_SIZE(m)) {
+ /* Wasn't fully written. So let's remember how
+ * much was written. Note that the first entry
+ * of the wqueue array is always allocated so
+ * that we always can remember how much was
+ * written. */
+ bus->wqueue[0] = bus_message_ref_queued(m, bus);
+ bus->wqueue_size = 1;
+ bus->windex = idx;
+ }
+
+ } else {
+ /* Just append it to the queue. */
+
+ if (bus->wqueue_size >= BUS_WQUEUE_MAX)
+ return -ENOBUFS;
+
+ if (!GREEDY_REALLOC(bus->wqueue, bus->wqueue_allocated, bus->wqueue_size + 1))
+ return -ENOMEM;
+
+ bus->wqueue[bus->wqueue_size++] = bus_message_ref_queued(m, bus);
+ }
+
+finish:
+ if (cookie)
+ *cookie = BUS_MESSAGE_COOKIE(m);
+
+ return 1;
+}
+
+_public_ int sd_bus_send_to(sd_bus *bus, sd_bus_message *m, const char *destination, uint64_t *cookie) {
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ if (bus)
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ else
+ assert_return(bus = m->bus, -ENOTCONN);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (!streq_ptr(m->destination, destination)) {
+
+ if (!destination)
+ return -EEXIST;
+
+ r = sd_bus_message_set_destination(m, destination);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_send(bus, m, cookie);
+}
+
+static usec_t calc_elapse(sd_bus *bus, uint64_t usec) {
+ assert(bus);
+
+ if (usec == (uint64_t) -1)
+ return 0;
+
+ /* We start all timeouts the instant we enter BUS_HELLO/BUS_RUNNING state, so that the don't run in parallel
+ * with any connection setup states. Hence, if a method callback is started earlier than that we just store the
+ * relative timestamp, and afterwards the absolute one. */
+
+ if (IN_SET(bus->state, BUS_WATCH_BIND, BUS_OPENING, BUS_AUTHENTICATING))
+ return usec;
+ else
+ return now(CLOCK_MONOTONIC) + usec;
+}
+
+static int timeout_compare(const void *a, const void *b) {
+ const struct reply_callback *x = a, *y = b;
+
+ if (x->timeout_usec != 0 && y->timeout_usec == 0)
+ return -1;
+
+ if (x->timeout_usec == 0 && y->timeout_usec != 0)
+ return 1;
+
+ return CMP(x->timeout_usec, y->timeout_usec);
+}
+
+_public_ int sd_bus_call_async(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ sd_bus_message *_m,
+ sd_bus_message_handler_t callback,
+ void *userdata,
+ uint64_t usec) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = sd_bus_message_ref(_m);
+ _cleanup_(sd_bus_slot_unrefp) sd_bus_slot *s = NULL;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(!m->sealed || (!!callback == !(m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)), -EINVAL);
+
+ if (bus)
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ else
+ assert_return(bus = m->bus, -ENOTCONN);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ /* If no callback is specified and there's no interest in a slot, then there's no reason to ask for a reply */
+ if (!callback && !slot && !m->sealed)
+ m->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED;
+
+ r = ordered_hashmap_ensure_allocated(&bus->reply_callbacks, &uint64_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = prioq_ensure_allocated(&bus->reply_callbacks_prioq, timeout_compare);
+ if (r < 0)
+ return r;
+
+ r = bus_seal_message(bus, m, usec);
+ if (r < 0)
+ return r;
+
+ r = bus_remarshal_message(bus, &m);
+ if (r < 0)
+ return r;
+
+ if (slot || callback) {
+ s = bus_slot_allocate(bus, !slot, BUS_REPLY_CALLBACK, sizeof(struct reply_callback), userdata);
+ if (!s)
+ return -ENOMEM;
+
+ s->reply_callback.callback = callback;
+
+ s->reply_callback.cookie = BUS_MESSAGE_COOKIE(m);
+ r = ordered_hashmap_put(bus->reply_callbacks, &s->reply_callback.cookie, &s->reply_callback);
+ if (r < 0) {
+ s->reply_callback.cookie = 0;
+ return r;
+ }
+
+ s->reply_callback.timeout_usec = calc_elapse(bus, m->timeout);
+ if (s->reply_callback.timeout_usec != 0) {
+ r = prioq_put(bus->reply_callbacks_prioq, &s->reply_callback, &s->reply_callback.prioq_idx);
+ if (r < 0) {
+ s->reply_callback.timeout_usec = 0;
+ return r;
+ }
+ }
+ }
+
+ r = sd_bus_send(bus, m, s ? &s->reply_callback.cookie : NULL);
+ if (r < 0)
+ return r;
+
+ if (slot)
+ *slot = s;
+ s = NULL;
+
+ return r;
+}
+
+int bus_ensure_running(sd_bus *bus) {
+ int r;
+
+ assert(bus);
+
+ if (bus->state == BUS_RUNNING)
+ return 1;
+
+ for (;;) {
+ if (IN_SET(bus->state, BUS_UNSET, BUS_CLOSED, BUS_CLOSING))
+ return -ENOTCONN;
+
+ r = sd_bus_process(bus, NULL);
+ if (r < 0)
+ return r;
+ if (bus->state == BUS_RUNNING)
+ return 1;
+ if (r > 0)
+ continue;
+
+ r = sd_bus_wait(bus, (uint64_t) -1);
+ if (r < 0)
+ return r;
+ }
+}
+
+_public_ int sd_bus_call(
+ sd_bus *bus,
+ sd_bus_message *_m,
+ uint64_t usec,
+ sd_bus_error *error,
+ sd_bus_message **reply) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = sd_bus_message_ref(_m);
+ usec_t timeout;
+ uint64_t cookie;
+ size_t i;
+ int r;
+
+ bus_assert_return(m, -EINVAL, error);
+ bus_assert_return(m->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL, error);
+ bus_assert_return(!(m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED), -EINVAL, error);
+ bus_assert_return(!bus_error_is_dirty(error), -EINVAL, error);
+
+ if (bus)
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ else
+ assert_return(bus = m->bus, -ENOTCONN);
+ bus_assert_return(!bus_pid_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = bus_ensure_running(bus);
+ if (r < 0)
+ goto fail;
+
+ i = bus->rqueue_size;
+
+ r = bus_seal_message(bus, m, usec);
+ if (r < 0)
+ goto fail;
+
+ r = bus_remarshal_message(bus, &m);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_send(bus, m, &cookie);
+ if (r < 0)
+ goto fail;
+
+ timeout = calc_elapse(bus, m->timeout);
+
+ for (;;) {
+ usec_t left;
+
+ while (i < bus->rqueue_size) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *incoming = NULL;
+
+ incoming = sd_bus_message_ref(bus->rqueue[i]);
+
+ if (incoming->reply_cookie == cookie) {
+ /* Found a match! */
+
+ rqueue_drop_one(bus, i);
+ log_debug_bus_message(incoming);
+
+ if (incoming->header->type == SD_BUS_MESSAGE_METHOD_RETURN) {
+
+ if (incoming->n_fds <= 0 || bus->accept_fd) {
+ if (reply)
+ *reply = TAKE_PTR(incoming);
+
+ return 1;
+ }
+
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INCONSISTENT_MESSAGE, "Reply message contained file descriptors which I couldn't accept. Sorry.");
+
+ } else if (incoming->header->type == SD_BUS_MESSAGE_METHOD_ERROR)
+ return sd_bus_error_copy(error, &incoming->error);
+ else {
+ r = -EIO;
+ goto fail;
+ }
+
+ } else if (BUS_MESSAGE_COOKIE(incoming) == cookie &&
+ bus->unique_name &&
+ incoming->sender &&
+ streq(bus->unique_name, incoming->sender)) {
+
+ rqueue_drop_one(bus, i);
+
+ /* Our own message? Somebody is trying to send its own client a message,
+ * let's not dead-lock, let's fail immediately. */
+
+ r = -ELOOP;
+ goto fail;
+ }
+
+ /* Try to read more, right-away */
+ i++;
+ }
+
+ r = bus_read_message(bus);
+ if (r < 0) {
+ if (ERRNO_IS_DISCONNECT(r)) {
+ bus_enter_closing(bus);
+ r = -ECONNRESET;
+ }
+
+ goto fail;
+ }
+ if (r > 0)
+ continue;
+
+ if (timeout > 0) {
+ usec_t n;
+
+ n = now(CLOCK_MONOTONIC);
+ if (n >= timeout) {
+ r = -ETIMEDOUT;
+ goto fail;
+ }
+
+ left = timeout - n;
+ } else
+ left = (uint64_t) -1;
+
+ r = bus_poll(bus, true, left);
+ if (r < 0)
+ goto fail;
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto fail;
+ }
+
+ r = dispatch_wqueue(bus);
+ if (r < 0) {
+ if (ERRNO_IS_DISCONNECT(r)) {
+ bus_enter_closing(bus);
+ r = -ECONNRESET;
+ }
+
+ goto fail;
+ }
+ }
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_get_fd(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->input_fd == bus->output_fd, -EPERM);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (bus->state == BUS_CLOSED)
+ return -ENOTCONN;
+
+ if (bus->inotify_fd >= 0)
+ return bus->inotify_fd;
+
+ if (bus->input_fd >= 0)
+ return bus->input_fd;
+
+ return -ENOTCONN;
+}
+
+_public_ int sd_bus_get_events(sd_bus *bus) {
+ int flags = 0;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ switch (bus->state) {
+
+ case BUS_UNSET:
+ case BUS_CLOSED:
+ return -ENOTCONN;
+
+ case BUS_WATCH_BIND:
+ flags |= POLLIN;
+ break;
+
+ case BUS_OPENING:
+ flags |= POLLOUT;
+ break;
+
+ case BUS_AUTHENTICATING:
+ if (bus_socket_auth_needs_write(bus))
+ flags |= POLLOUT;
+
+ flags |= POLLIN;
+ break;
+
+ case BUS_RUNNING:
+ case BUS_HELLO:
+ if (bus->rqueue_size <= 0)
+ flags |= POLLIN;
+ if (bus->wqueue_size > 0)
+ flags |= POLLOUT;
+ break;
+
+ case BUS_CLOSING:
+ break;
+
+ default:
+ assert_not_reached("Unknown state");
+ }
+
+ return flags;
+}
+
+_public_ int sd_bus_get_timeout(sd_bus *bus, uint64_t *timeout_usec) {
+ struct reply_callback *c;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(timeout_usec, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state) && bus->state != BUS_CLOSING)
+ return -ENOTCONN;
+
+ if (bus->track_queue) {
+ *timeout_usec = 0;
+ return 1;
+ }
+
+ switch (bus->state) {
+
+ case BUS_AUTHENTICATING:
+ *timeout_usec = bus->auth_timeout;
+ return 1;
+
+ case BUS_RUNNING:
+ case BUS_HELLO:
+ if (bus->rqueue_size > 0) {
+ *timeout_usec = 0;
+ return 1;
+ }
+
+ c = prioq_peek(bus->reply_callbacks_prioq);
+ if (!c) {
+ *timeout_usec = (uint64_t) -1;
+ return 0;
+ }
+
+ if (c->timeout_usec == 0) {
+ *timeout_usec = (uint64_t) -1;
+ return 0;
+ }
+
+ *timeout_usec = c->timeout_usec;
+ return 1;
+
+ case BUS_CLOSING:
+ *timeout_usec = 0;
+ return 1;
+
+ case BUS_WATCH_BIND:
+ case BUS_OPENING:
+ *timeout_usec = (uint64_t) -1;
+ return 0;
+
+ default:
+ assert_not_reached("Unknown or unexpected stat");
+ }
+}
+
+static int process_timeout(sd_bus *bus) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message* m = NULL;
+ struct reply_callback *c;
+ sd_bus_slot *slot;
+ bool is_hello;
+ usec_t n;
+ int r;
+
+ assert(bus);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ c = prioq_peek(bus->reply_callbacks_prioq);
+ if (!c)
+ return 0;
+
+ n = now(CLOCK_MONOTONIC);
+ if (c->timeout_usec > n)
+ return 0;
+
+ r = bus_message_new_synthetic_error(
+ bus,
+ c->cookie,
+ &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_NO_REPLY, "Method call timed out"),
+ &m);
+ if (r < 0)
+ return r;
+
+ m->read_counter = ++bus->read_counter;
+
+ r = bus_seal_synthetic_message(bus, m);
+ if (r < 0)
+ return r;
+
+ assert_se(prioq_pop(bus->reply_callbacks_prioq) == c);
+ c->timeout_usec = 0;
+
+ ordered_hashmap_remove(bus->reply_callbacks, &c->cookie);
+ c->cookie = 0;
+
+ slot = container_of(c, sd_bus_slot, reply_callback);
+
+ bus->iteration_counter++;
+
+ is_hello = bus->state == BUS_HELLO && c->callback == hello_callback;
+
+ bus->current_message = m;
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = c->callback;
+ bus->current_userdata = slot->userdata;
+ r = c->callback(m, slot->userdata, &error_buffer);
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = NULL;
+ bus->current_message = NULL;
+
+ if (slot->floating)
+ bus_slot_disconnect(slot, true);
+
+ sd_bus_slot_unref(slot);
+
+ /* When this is the hello message and it timed out, then make sure to propagate the error up, don't just log
+ * and ignore the callback handler's return value. */
+ if (is_hello)
+ return r;
+
+ return bus_maybe_reply_error(m, r, &error_buffer);
+}
+
+static int process_hello(sd_bus *bus, sd_bus_message *m) {
+ assert(bus);
+ assert(m);
+
+ if (bus->state != BUS_HELLO)
+ return 0;
+
+ /* Let's make sure the first message on the bus is the HELLO
+ * reply. But note that we don't actually parse the message
+ * here (we leave that to the usual handling), we just verify
+ * we don't let any earlier msg through. */
+
+ if (!IN_SET(m->header->type, SD_BUS_MESSAGE_METHOD_RETURN, SD_BUS_MESSAGE_METHOD_ERROR))
+ return -EIO;
+
+ if (m->reply_cookie != 1)
+ return -EIO;
+
+ return 0;
+}
+
+static int process_reply(sd_bus *bus, sd_bus_message *m) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *synthetic_reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ struct reply_callback *c;
+ sd_bus_slot *slot;
+ bool is_hello;
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ if (!IN_SET(m->header->type, SD_BUS_MESSAGE_METHOD_RETURN, SD_BUS_MESSAGE_METHOD_ERROR))
+ return 0;
+
+ if (m->destination && bus->unique_name && !streq_ptr(m->destination, bus->unique_name))
+ return 0;
+
+ c = ordered_hashmap_remove(bus->reply_callbacks, &m->reply_cookie);
+ if (!c)
+ return 0;
+
+ c->cookie = 0;
+
+ slot = container_of(c, sd_bus_slot, reply_callback);
+
+ if (m->n_fds > 0 && !bus->accept_fd) {
+
+ /* If the reply contained a file descriptor which we
+ * didn't want we pass an error instead. */
+
+ r = bus_message_new_synthetic_error(
+ bus,
+ m->reply_cookie,
+ &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_INCONSISTENT_MESSAGE, "Reply message contained file descriptor"),
+ &synthetic_reply);
+ if (r < 0)
+ return r;
+
+ /* Copy over original timestamp */
+ synthetic_reply->realtime = m->realtime;
+ synthetic_reply->monotonic = m->monotonic;
+ synthetic_reply->seqnum = m->seqnum;
+ synthetic_reply->read_counter = m->read_counter;
+
+ r = bus_seal_synthetic_message(bus, synthetic_reply);
+ if (r < 0)
+ return r;
+
+ m = synthetic_reply;
+ } else {
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+ }
+
+ if (c->timeout_usec != 0) {
+ prioq_remove(bus->reply_callbacks_prioq, c, &c->prioq_idx);
+ c->timeout_usec = 0;
+ }
+
+ is_hello = bus->state == BUS_HELLO && c->callback == hello_callback;
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = c->callback;
+ bus->current_userdata = slot->userdata;
+ r = c->callback(m, slot->userdata, &error_buffer);
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = NULL;
+
+ if (slot->floating)
+ bus_slot_disconnect(slot, true);
+
+ sd_bus_slot_unref(slot);
+
+ /* When this is the hello message and it failed, then make sure to propagate the error up, don't just log and
+ * ignore the callback handler's return value. */
+ if (is_hello)
+ return r;
+
+ return bus_maybe_reply_error(m, r, &error_buffer);
+}
+
+static int process_filter(sd_bus *bus, sd_bus_message *m) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ struct filter_callback *l;
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ do {
+ bus->filter_callbacks_modified = false;
+
+ LIST_FOREACH(callbacks, l, bus->filter_callbacks) {
+ sd_bus_slot *slot;
+
+ if (bus->filter_callbacks_modified)
+ break;
+
+ /* Don't run this more than once per iteration */
+ if (l->last_iteration == bus->iteration_counter)
+ continue;
+
+ l->last_iteration = bus->iteration_counter;
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ slot = container_of(l, sd_bus_slot, filter_callback);
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = l->callback;
+ bus->current_userdata = slot->userdata;
+ r = l->callback(m, slot->userdata, &error_buffer);
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+
+ r = bus_maybe_reply_error(m, r, &error_buffer);
+ if (r != 0)
+ return r;
+
+ }
+
+ } while (bus->filter_callbacks_modified);
+
+ return 0;
+}
+
+static int process_match(sd_bus *bus, sd_bus_message *m) {
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ do {
+ bus->match_callbacks_modified = false;
+
+ r = bus_match_run(bus, &bus->match_callbacks, m);
+ if (r != 0)
+ return r;
+
+ } while (bus->match_callbacks_modified);
+
+ return 0;
+}
+
+static int process_builtin(sd_bus *bus, sd_bus_message *m) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ if (bus->is_monitor)
+ return 0;
+
+ if (bus->manual_peer_interface)
+ return 0;
+
+ if (m->header->type != SD_BUS_MESSAGE_METHOD_CALL)
+ return 0;
+
+ if (!streq_ptr(m->interface, "org.freedesktop.DBus.Peer"))
+ return 0;
+
+ if (m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)
+ return 1;
+
+ if (streq_ptr(m->member, "Ping"))
+ r = sd_bus_message_new_method_return(m, &reply);
+ else if (streq_ptr(m->member, "GetMachineId")) {
+ sd_id128_t id;
+ char sid[SD_ID128_STRING_MAX];
+
+ r = sd_id128_get_machine(&id);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", sd_id128_to_string(id, sid));
+ } else {
+ r = sd_bus_message_new_method_errorf(
+ m, &reply,
+ SD_BUS_ERROR_UNKNOWN_METHOD,
+ "Unknown method '%s' on interface '%s'.", m->member, m->interface);
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_send(bus, reply, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int process_fd_check(sd_bus *bus, sd_bus_message *m) {
+ assert(bus);
+ assert(m);
+
+ /* If we got a message with a file descriptor which we didn't
+ * want to accept, then let's drop it. How can this even
+ * happen? For example, when the kernel queues a message into
+ * an activatable names's queue which allows fds, and then is
+ * delivered to us later even though we ourselves did not
+ * negotiate it. */
+
+ if (bus->is_monitor)
+ return 0;
+
+ if (m->n_fds <= 0)
+ return 0;
+
+ if (bus->accept_fd)
+ return 0;
+
+ if (m->header->type != SD_BUS_MESSAGE_METHOD_CALL)
+ return 1; /* just eat it up */
+
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INCONSISTENT_MESSAGE, "Message contains file descriptors, which I cannot accept. Sorry.");
+}
+
+static int process_message(sd_bus *bus, sd_bus_message *m) {
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ bus->current_message = m;
+ bus->iteration_counter++;
+
+ log_debug_bus_message(m);
+
+ r = process_hello(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = process_reply(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = process_fd_check(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = process_filter(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = process_match(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = process_builtin(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = bus_process_object(bus, m);
+
+finish:
+ bus->current_message = NULL;
+ return r;
+}
+
+static int dispatch_track(sd_bus *bus) {
+ assert(bus);
+
+ if (!bus->track_queue)
+ return 0;
+
+ bus_track_dispatch(bus->track_queue);
+ return 1;
+}
+
+static int process_running(sd_bus *bus, sd_bus_message **ret) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert(bus);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ r = process_timeout(bus);
+ if (r != 0)
+ goto null_message;
+
+ r = dispatch_wqueue(bus);
+ if (r != 0)
+ goto null_message;
+
+ r = dispatch_track(bus);
+ if (r != 0)
+ goto null_message;
+
+ r = dispatch_rqueue(bus, &m);
+ if (r < 0)
+ return r;
+ if (!m)
+ goto null_message;
+
+ r = process_message(bus, m);
+ if (r != 0)
+ goto null_message;
+
+ if (ret) {
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(m);
+ return 1;
+ }
+
+ if (m->header->type == SD_BUS_MESSAGE_METHOD_CALL) {
+
+ log_debug("Unprocessed message call sender=%s object=%s interface=%s member=%s",
+ strna(sd_bus_message_get_sender(m)),
+ strna(sd_bus_message_get_path(m)),
+ strna(sd_bus_message_get_interface(m)),
+ strna(sd_bus_message_get_member(m)));
+
+ r = sd_bus_reply_method_errorf(
+ m,
+ SD_BUS_ERROR_UNKNOWN_OBJECT,
+ "Unknown object '%s'.", m->path);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+
+null_message:
+ if (r >= 0 && ret)
+ *ret = NULL;
+
+ return r;
+}
+
+static int bus_exit_now(sd_bus *bus) {
+ assert(bus);
+
+ /* Exit due to close, if this is requested. If this is bus object is attached to an event source, invokes
+ * sd_event_exit(), otherwise invokes libc exit(). */
+
+ if (bus->exited) /* did we already exit? */
+ return 0;
+ if (!bus->exit_triggered) /* was the exit condition triggered? */
+ return 0;
+ if (!bus->exit_on_disconnect) /* Shall we actually exit on disconnection? */
+ return 0;
+
+ bus->exited = true; /* never exit more than once */
+
+ log_debug("Bus connection disconnected, exiting.");
+
+ if (bus->event)
+ return sd_event_exit(bus->event, EXIT_FAILURE);
+ else
+ exit(EXIT_FAILURE);
+
+ assert_not_reached("exit() didn't exit?");
+}
+
+static int process_closing_reply_callback(sd_bus *bus, struct reply_callback *c) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ sd_bus_slot *slot;
+ int r;
+
+ assert(bus);
+ assert(c);
+
+ r = bus_message_new_synthetic_error(
+ bus,
+ c->cookie,
+ &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_NO_REPLY, "Connection terminated"),
+ &m);
+ if (r < 0)
+ return r;
+
+ m->read_counter = ++bus->read_counter;
+
+ r = bus_seal_synthetic_message(bus, m);
+ if (r < 0)
+ return r;
+
+ if (c->timeout_usec != 0) {
+ prioq_remove(bus->reply_callbacks_prioq, c, &c->prioq_idx);
+ c->timeout_usec = 0;
+ }
+
+ ordered_hashmap_remove(bus->reply_callbacks, &c->cookie);
+ c->cookie = 0;
+
+ slot = container_of(c, sd_bus_slot, reply_callback);
+
+ bus->iteration_counter++;
+
+ bus->current_message = m;
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = c->callback;
+ bus->current_userdata = slot->userdata;
+ r = c->callback(m, slot->userdata, &error_buffer);
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = NULL;
+ bus->current_message = NULL;
+
+ if (slot->floating)
+ bus_slot_disconnect(slot, true);
+
+ sd_bus_slot_unref(slot);
+
+ return bus_maybe_reply_error(m, r, &error_buffer);
+}
+
+static int process_closing(sd_bus *bus, sd_bus_message **ret) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ struct reply_callback *c;
+ int r;
+
+ assert(bus);
+ assert(bus->state == BUS_CLOSING);
+
+ /* First, fail all outstanding method calls */
+ c = ordered_hashmap_first(bus->reply_callbacks);
+ if (c)
+ return process_closing_reply_callback(bus, c);
+
+ /* Then, fake-drop all remaining bus tracking references */
+ if (bus->tracks) {
+ bus_track_close(bus->tracks);
+ return 1;
+ }
+
+ /* Then, synthesize a Disconnected message */
+ r = sd_bus_message_new_signal(
+ bus,
+ &m,
+ "/org/freedesktop/DBus/Local",
+ "org.freedesktop.DBus.Local",
+ "Disconnected");
+ if (r < 0)
+ return r;
+
+ bus_message_set_sender_local(bus, m);
+ m->read_counter = ++bus->read_counter;
+
+ r = bus_seal_synthetic_message(bus, m);
+ if (r < 0)
+ return r;
+
+ sd_bus_close(bus);
+
+ bus->current_message = m;
+ bus->iteration_counter++;
+
+ r = process_filter(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = process_match(bus, m);
+ if (r != 0)
+ goto finish;
+
+ /* Nothing else to do, exit now, if the condition holds */
+ bus->exit_triggered = true;
+ (void) bus_exit_now(bus);
+
+ if (ret)
+ *ret = TAKE_PTR(m);
+
+ r = 1;
+
+finish:
+ bus->current_message = NULL;
+
+ return r;
+}
+
+static int bus_process_internal(sd_bus *bus, sd_bus_message **ret) {
+ int r;
+
+ /* Returns 0 when we didn't do anything. This should cause the
+ * caller to invoke sd_bus_wait() before returning the next
+ * time. Returns > 0 when we did something, which possibly
+ * means *ret is filled in with an unprocessed message. */
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ /* We don't allow recursively invoking sd_bus_process(). */
+ assert_return(!bus->current_message, -EBUSY);
+ assert(!bus->current_slot); /* This should be NULL whenever bus->current_message is */
+
+ BUS_DONT_DESTROY(bus);
+
+ switch (bus->state) {
+
+ case BUS_UNSET:
+ return -ENOTCONN;
+
+ case BUS_CLOSED:
+ return -ECONNRESET;
+
+ case BUS_WATCH_BIND:
+ r = bus_socket_process_watch_bind(bus);
+ break;
+
+ case BUS_OPENING:
+ r = bus_socket_process_opening(bus);
+ break;
+
+ case BUS_AUTHENTICATING:
+ r = bus_socket_process_authenticating(bus);
+ break;
+
+ case BUS_RUNNING:
+ case BUS_HELLO:
+ r = process_running(bus, ret);
+ if (r >= 0)
+ return r;
+
+ /* This branch initializes *ret, hence we don't use the generic error checking below */
+ break;
+
+ case BUS_CLOSING:
+ return process_closing(bus, ret);
+
+ default:
+ assert_not_reached("Unknown state");
+ }
+
+ if (ERRNO_IS_DISCONNECT(r)) {
+ bus_enter_closing(bus);
+ r = 1;
+ } else if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = NULL;
+
+ return r;
+}
+
+_public_ int sd_bus_process(sd_bus *bus, sd_bus_message **ret) {
+ return bus_process_internal(bus, ret);
+}
+
+_public_ int sd_bus_process_priority(sd_bus *bus, int64_t priority, sd_bus_message **ret) {
+ return bus_process_internal(bus, ret);
+}
+
+static int bus_poll(sd_bus *bus, bool need_more, uint64_t timeout_usec) {
+ struct pollfd p[2] = {};
+ int r, n;
+ struct timespec ts;
+ usec_t m = USEC_INFINITY;
+
+ assert(bus);
+
+ if (bus->state == BUS_CLOSING)
+ return 1;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (bus->state == BUS_WATCH_BIND) {
+ assert(bus->inotify_fd >= 0);
+
+ p[0].events = POLLIN;
+ p[0].fd = bus->inotify_fd;
+ n = 1;
+ } else {
+ int e;
+
+ e = sd_bus_get_events(bus);
+ if (e < 0)
+ return e;
+
+ if (need_more)
+ /* The caller really needs some more data, he doesn't
+ * care about what's already read, or any timeouts
+ * except its own. */
+ e |= POLLIN;
+ else {
+ usec_t until;
+ /* The caller wants to process if there's something to
+ * process, but doesn't care otherwise */
+
+ r = sd_bus_get_timeout(bus, &until);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ m = usec_sub_unsigned(until, now(CLOCK_MONOTONIC));
+ }
+
+ p[0].fd = bus->input_fd;
+ if (bus->output_fd == bus->input_fd) {
+ p[0].events = e;
+ n = 1;
+ } else {
+ p[0].events = e & POLLIN;
+ p[1].fd = bus->output_fd;
+ p[1].events = e & POLLOUT;
+ n = 2;
+ }
+ }
+
+ if (timeout_usec != (uint64_t) -1 && (m == USEC_INFINITY || timeout_usec < m))
+ m = timeout_usec;
+
+ r = ppoll(p, n, m == USEC_INFINITY ? NULL : timespec_store(&ts, m), NULL);
+ if (r < 0)
+ return -errno;
+ if (r == 0)
+ return 0;
+
+ if (p[0].revents & POLLNVAL)
+ return -EBADF;
+ if (n >= 2 && (p[1].revents & POLLNVAL))
+ return -EBADF;
+
+ return 1;
+}
+
+_public_ int sd_bus_wait(sd_bus *bus, uint64_t timeout_usec) {
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (bus->state == BUS_CLOSING)
+ return 0;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (bus->rqueue_size > 0)
+ return 0;
+
+ return bus_poll(bus, false, timeout_usec);
+}
+
+_public_ int sd_bus_flush(sd_bus *bus) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (bus->state == BUS_CLOSING)
+ return 0;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ /* We never were connected? Don't hang in inotify for good, as there's no timeout set for it */
+ if (bus->state == BUS_WATCH_BIND)
+ return -EUNATCH;
+
+ r = bus_ensure_running(bus);
+ if (r < 0)
+ return r;
+
+ if (bus->wqueue_size <= 0)
+ return 0;
+
+ for (;;) {
+ r = dispatch_wqueue(bus);
+ if (r < 0) {
+ if (ERRNO_IS_DISCONNECT(r)) {
+ bus_enter_closing(bus);
+ return -ECONNRESET;
+ }
+
+ return r;
+ }
+
+ if (bus->wqueue_size <= 0)
+ return 0;
+
+ r = bus_poll(bus, false, (uint64_t) -1);
+ if (r < 0)
+ return r;
+ }
+}
+
+_public_ int sd_bus_add_filter(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ sd_bus_slot *s;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(callback, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ s = bus_slot_allocate(bus, !slot, BUS_FILTER_CALLBACK, sizeof(struct filter_callback), userdata);
+ if (!s)
+ return -ENOMEM;
+
+ s->filter_callback.callback = callback;
+
+ bus->filter_callbacks_modified = true;
+ LIST_PREPEND(callbacks, bus->filter_callbacks, &s->filter_callback);
+
+ if (slot)
+ *slot = s;
+
+ return 0;
+}
+
+static int add_match_callback(
+ sd_bus_message *m,
+ void *userdata,
+ sd_bus_error *ret_error) {
+
+ sd_bus_slot *match_slot = userdata;
+ bool failed = false;
+ int r;
+
+ assert(m);
+ assert(match_slot);
+
+ sd_bus_slot_ref(match_slot);
+
+ if (sd_bus_message_is_method_error(m, NULL)) {
+ log_debug_errno(sd_bus_message_get_errno(m),
+ "Unable to add match %s, failing connection: %s",
+ match_slot->match_callback.match_string,
+ sd_bus_message_get_error(m)->message);
+
+ failed = true;
+ } else
+ log_debug("Match %s successfully installed.", match_slot->match_callback.match_string);
+
+ if (match_slot->match_callback.install_callback) {
+ sd_bus *bus;
+
+ bus = sd_bus_message_get_bus(m);
+
+ /* This function has been called as slot handler, and we want to call another slot handler. Let's
+ * update the slot callback metadata temporarily with our own data, and then revert back to the old
+ * values. */
+
+ assert(bus->current_slot == match_slot->match_callback.install_slot);
+ assert(bus->current_handler == add_match_callback);
+ assert(bus->current_userdata == userdata);
+
+ bus->current_slot = match_slot;
+ bus->current_handler = match_slot->match_callback.install_callback;
+ bus->current_userdata = match_slot->userdata;
+
+ r = match_slot->match_callback.install_callback(m, match_slot->userdata, ret_error);
+
+ bus->current_slot = match_slot->match_callback.install_slot;
+ bus->current_handler = add_match_callback;
+ bus->current_userdata = userdata;
+ } else {
+ if (failed) /* Generic failure handling: destroy the connection */
+ bus_enter_closing(sd_bus_message_get_bus(m));
+
+ r = 1;
+ }
+
+ /* We don't need the install method reply slot anymore, let's free it */
+ match_slot->match_callback.install_slot = sd_bus_slot_unref(match_slot->match_callback.install_slot);
+
+ if (failed && match_slot->floating)
+ bus_slot_disconnect(match_slot, true);
+
+ sd_bus_slot_unref(match_slot);
+
+ return r;
+}
+
+static int bus_add_match_full(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ bool asynchronous,
+ const char *match,
+ sd_bus_message_handler_t callback,
+ sd_bus_message_handler_t install_callback,
+ void *userdata) {
+
+ struct bus_match_component *components = NULL;
+ unsigned n_components = 0;
+ sd_bus_slot *s = NULL;
+ int r = 0;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(match, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ r = bus_match_parse(match, &components, &n_components);
+ if (r < 0)
+ goto finish;
+
+ s = bus_slot_allocate(bus, !slot, BUS_MATCH_CALLBACK, sizeof(struct match_callback), userdata);
+ if (!s) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ s->match_callback.callback = callback;
+ s->match_callback.install_callback = install_callback;
+
+ if (bus->bus_client) {
+ enum bus_match_scope scope;
+
+ scope = bus_match_get_scope(components, n_components);
+
+ /* Do not install server-side matches for matches against the local service, interface or bus path. */
+ if (scope != BUS_MATCH_LOCAL) {
+
+ /* We store the original match string, so that we can use it to remove the match again. */
+
+ s->match_callback.match_string = strdup(match);
+ if (!s->match_callback.match_string) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ if (asynchronous) {
+ r = bus_add_match_internal_async(bus,
+ &s->match_callback.install_slot,
+ s->match_callback.match_string,
+ add_match_callback,
+ s);
+
+ if (r < 0)
+ return r;
+
+ /* Make the slot of the match call floating now. We need the reference, but we don't
+ * want that this match pins the bus object, hence we first create it non-floating, but
+ * then make it floating. */
+ r = sd_bus_slot_set_floating(s->match_callback.install_slot, true);
+ } else
+ r = bus_add_match_internal(bus, s->match_callback.match_string, &s->match_callback.after);
+ if (r < 0)
+ goto finish;
+
+ s->match_added = true;
+ }
+ }
+
+ bus->match_callbacks_modified = true;
+ r = bus_match_add(&bus->match_callbacks, components, n_components, &s->match_callback);
+ if (r < 0)
+ goto finish;
+
+ if (slot)
+ *slot = s;
+ s = NULL;
+
+finish:
+ bus_match_parse_free(components, n_components);
+ sd_bus_slot_unref(s);
+
+ return r;
+}
+
+_public_ int sd_bus_add_match(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *match,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ return bus_add_match_full(bus, slot, false, match, callback, NULL, userdata);
+}
+
+_public_ int sd_bus_add_match_async(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *match,
+ sd_bus_message_handler_t callback,
+ sd_bus_message_handler_t install_callback,
+ void *userdata) {
+
+ return bus_add_match_full(bus, slot, true, match, callback, install_callback, userdata);
+}
+
+bool bus_pid_changed(sd_bus *bus) {
+ assert(bus);
+
+ /* We don't support people creating a bus connection and
+ * keeping it around over a fork(). Let's complain. */
+
+ return bus->original_pid != getpid_cached();
+}
+
+static int io_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ /* Note that this is called both on input_fd, output_fd as well as inotify_fd events */
+
+ r = sd_bus_process(bus, NULL);
+ if (r < 0) {
+ log_debug_errno(r, "Processing of bus failed, closing down: %m");
+ bus_enter_closing(bus);
+ }
+
+ return 1;
+}
+
+static int time_callback(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ r = sd_bus_process(bus, NULL);
+ if (r < 0) {
+ log_debug_errno(r, "Processing of bus failed, closing down: %m");
+ bus_enter_closing(bus);
+ }
+
+ return 1;
+}
+
+static int prepare_callback(sd_event_source *s, void *userdata) {
+ sd_bus *bus = userdata;
+ int r, e;
+ usec_t until;
+
+ assert(s);
+ assert(bus);
+
+ e = sd_bus_get_events(bus);
+ if (e < 0) {
+ r = e;
+ goto fail;
+ }
+
+ if (bus->output_fd != bus->input_fd) {
+
+ r = sd_event_source_set_io_events(bus->input_io_event_source, e & POLLIN);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_io_events(bus->output_io_event_source, e & POLLOUT);
+ } else
+ r = sd_event_source_set_io_events(bus->input_io_event_source, e);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_get_timeout(bus, &until);
+ if (r < 0)
+ goto fail;
+ if (r > 0) {
+ int j;
+
+ j = sd_event_source_set_time(bus->time_event_source, until);
+ if (j < 0) {
+ r = j;
+ goto fail;
+ }
+ }
+
+ r = sd_event_source_set_enabled(bus->time_event_source, r > 0);
+ if (r < 0)
+ goto fail;
+
+ return 1;
+
+fail:
+ log_debug_errno(r, "Preparing of bus events failed, closing down: %m");
+ bus_enter_closing(bus);
+
+ return 1;
+}
+
+static int quit_callback(sd_event_source *event, void *userdata) {
+ sd_bus *bus = userdata;
+
+ assert(event);
+
+ if (bus->close_on_exit) {
+ sd_bus_flush(bus);
+ sd_bus_close(bus);
+ }
+
+ return 1;
+}
+
+int bus_attach_io_events(sd_bus *bus) {
+ int r;
+
+ assert(bus);
+
+ if (bus->input_fd < 0)
+ return 0;
+
+ if (!bus->event)
+ return 0;
+
+ if (!bus->input_io_event_source) {
+ r = sd_event_add_io(bus->event, &bus->input_io_event_source, bus->input_fd, 0, io_callback, bus);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_prepare(bus->input_io_event_source, prepare_callback);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(bus->input_io_event_source, bus->event_priority);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_description(bus->input_io_event_source, "bus-input");
+ } else
+ r = sd_event_source_set_io_fd(bus->input_io_event_source, bus->input_fd);
+
+ if (r < 0)
+ return r;
+
+ if (bus->output_fd != bus->input_fd) {
+ assert(bus->output_fd >= 0);
+
+ if (!bus->output_io_event_source) {
+ r = sd_event_add_io(bus->event, &bus->output_io_event_source, bus->output_fd, 0, io_callback, bus);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(bus->output_io_event_source, bus->event_priority);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_description(bus->input_io_event_source, "bus-output");
+ } else
+ r = sd_event_source_set_io_fd(bus->output_io_event_source, bus->output_fd);
+
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static void bus_detach_io_events(sd_bus *bus) {
+ assert(bus);
+
+ if (bus->input_io_event_source) {
+ sd_event_source_set_enabled(bus->input_io_event_source, SD_EVENT_OFF);
+ bus->input_io_event_source = sd_event_source_unref(bus->input_io_event_source);
+ }
+
+ if (bus->output_io_event_source) {
+ sd_event_source_set_enabled(bus->output_io_event_source, SD_EVENT_OFF);
+ bus->output_io_event_source = sd_event_source_unref(bus->output_io_event_source);
+ }
+}
+
+int bus_attach_inotify_event(sd_bus *bus) {
+ int r;
+
+ assert(bus);
+
+ if (bus->inotify_fd < 0)
+ return 0;
+
+ if (!bus->event)
+ return 0;
+
+ if (!bus->inotify_event_source) {
+ r = sd_event_add_io(bus->event, &bus->inotify_event_source, bus->inotify_fd, EPOLLIN, io_callback, bus);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(bus->inotify_event_source, bus->event_priority);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_description(bus->inotify_event_source, "bus-inotify");
+ } else
+ r = sd_event_source_set_io_fd(bus->inotify_event_source, bus->inotify_fd);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static void bus_detach_inotify_event(sd_bus *bus) {
+ assert(bus);
+
+ if (bus->inotify_event_source) {
+ sd_event_source_set_enabled(bus->inotify_event_source, SD_EVENT_OFF);
+ bus->inotify_event_source = sd_event_source_unref(bus->inotify_event_source);
+ }
+}
+
+_public_ int sd_bus_attach_event(sd_bus *bus, sd_event *event, int priority) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus->event, -EBUSY);
+
+ assert(!bus->input_io_event_source);
+ assert(!bus->output_io_event_source);
+ assert(!bus->time_event_source);
+
+ if (event)
+ bus->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&bus->event);
+ if (r < 0)
+ return r;
+ }
+
+ bus->event_priority = priority;
+
+ r = sd_event_add_time(bus->event, &bus->time_event_source, CLOCK_MONOTONIC, 0, 0, time_callback, bus);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(bus->time_event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_description(bus->time_event_source, "bus-time");
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_add_exit(bus->event, &bus->quit_event_source, quit_callback, bus);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_description(bus->quit_event_source, "bus-exit");
+ if (r < 0)
+ goto fail;
+
+ r = bus_attach_io_events(bus);
+ if (r < 0)
+ goto fail;
+
+ r = bus_attach_inotify_event(bus);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ sd_bus_detach_event(bus);
+ return r;
+}
+
+_public_ int sd_bus_detach_event(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+
+ if (!bus->event)
+ return 0;
+
+ bus_detach_io_events(bus);
+ bus_detach_inotify_event(bus);
+
+ if (bus->time_event_source) {
+ sd_event_source_set_enabled(bus->time_event_source, SD_EVENT_OFF);
+ bus->time_event_source = sd_event_source_unref(bus->time_event_source);
+ }
+
+ if (bus->quit_event_source) {
+ sd_event_source_set_enabled(bus->quit_event_source, SD_EVENT_OFF);
+ bus->quit_event_source = sd_event_source_unref(bus->quit_event_source);
+ }
+
+ bus->event = sd_event_unref(bus->event);
+ return 1;
+}
+
+_public_ sd_event* sd_bus_get_event(sd_bus *bus) {
+ assert_return(bus = bus_resolve(bus), NULL);
+
+ return bus->event;
+}
+
+_public_ sd_bus_message* sd_bus_get_current_message(sd_bus *bus) {
+ assert_return(bus = bus_resolve(bus), NULL);
+
+ return bus->current_message;
+}
+
+_public_ sd_bus_slot* sd_bus_get_current_slot(sd_bus *bus) {
+ assert_return(bus = bus_resolve(bus), NULL);
+
+ return bus->current_slot;
+}
+
+_public_ sd_bus_message_handler_t sd_bus_get_current_handler(sd_bus *bus) {
+ assert_return(bus = bus_resolve(bus), NULL);
+
+ return bus->current_handler;
+}
+
+_public_ void* sd_bus_get_current_userdata(sd_bus *bus) {
+ assert_return(bus = bus_resolve(bus), NULL);
+
+ return bus->current_userdata;
+}
+
+static int bus_default(int (*bus_open)(sd_bus **), sd_bus **default_bus, sd_bus **ret) {
+ sd_bus *b = NULL;
+ int r;
+
+ assert(bus_open);
+ assert(default_bus);
+
+ if (!ret)
+ return !!*default_bus;
+
+ if (*default_bus) {
+ *ret = sd_bus_ref(*default_bus);
+ return 0;
+ }
+
+ r = bus_open(&b);
+ if (r < 0)
+ return r;
+
+ b->default_bus_ptr = default_bus;
+ b->tid = gettid();
+ *default_bus = b;
+
+ *ret = b;
+ return 1;
+}
+
+_public_ int sd_bus_default_system(sd_bus **ret) {
+ return bus_default(sd_bus_open_system, &default_system_bus, ret);
+}
+
+_public_ int sd_bus_default_user(sd_bus **ret) {
+ return bus_default(sd_bus_open_user, &default_user_bus, ret);
+}
+
+_public_ int sd_bus_default(sd_bus **ret) {
+ int (*bus_open)(sd_bus **) = NULL;
+ sd_bus **busp;
+
+ busp = bus_choose_default(&bus_open);
+ return bus_default(bus_open, busp, ret);
+}
+
+_public_ int sd_bus_get_tid(sd_bus *b, pid_t *tid) {
+ assert_return(b, -EINVAL);
+ assert_return(tid, -EINVAL);
+ assert_return(!bus_pid_changed(b), -ECHILD);
+
+ if (b->tid != 0) {
+ *tid = b->tid;
+ return 0;
+ }
+
+ if (b->event)
+ return sd_event_get_tid(b->event, tid);
+
+ return -ENXIO;
+}
+
+_public_ int sd_bus_path_encode(const char *prefix, const char *external_id, char **ret_path) {
+ _cleanup_free_ char *e = NULL;
+ char *ret;
+
+ assert_return(object_path_is_valid(prefix), -EINVAL);
+ assert_return(external_id, -EINVAL);
+ assert_return(ret_path, -EINVAL);
+
+ e = bus_label_escape(external_id);
+ if (!e)
+ return -ENOMEM;
+
+ ret = path_join(prefix, e);
+ if (!ret)
+ return -ENOMEM;
+
+ *ret_path = ret;
+ return 0;
+}
+
+_public_ int sd_bus_path_decode(const char *path, const char *prefix, char **external_id) {
+ const char *e;
+ char *ret;
+
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(object_path_is_valid(prefix), -EINVAL);
+ assert_return(external_id, -EINVAL);
+
+ e = object_path_startswith(path, prefix);
+ if (!e) {
+ *external_id = NULL;
+ return 0;
+ }
+
+ ret = bus_label_unescape(e);
+ if (!ret)
+ return -ENOMEM;
+
+ *external_id = ret;
+ return 1;
+}
+
+_public_ int sd_bus_path_encode_many(char **out, const char *path_template, ...) {
+ _cleanup_strv_free_ char **labels = NULL;
+ char *path, *path_pos, **label_pos;
+ const char *sep, *template_pos;
+ size_t path_length;
+ va_list list;
+ int r;
+
+ assert_return(out, -EINVAL);
+ assert_return(path_template, -EINVAL);
+
+ path_length = strlen(path_template);
+
+ va_start(list, path_template);
+ for (sep = strchr(path_template, '%'); sep; sep = strchr(sep + 1, '%')) {
+ const char *arg;
+ char *label;
+
+ arg = va_arg(list, const char *);
+ if (!arg) {
+ va_end(list);
+ return -EINVAL;
+ }
+
+ label = bus_label_escape(arg);
+ if (!label) {
+ va_end(list);
+ return -ENOMEM;
+ }
+
+ r = strv_consume(&labels, label);
+ if (r < 0) {
+ va_end(list);
+ return r;
+ }
+
+ /* add label length, but account for the format character */
+ path_length += strlen(label) - 1;
+ }
+ va_end(list);
+
+ path = malloc(path_length + 1);
+ if (!path)
+ return -ENOMEM;
+
+ path_pos = path;
+ label_pos = labels;
+
+ for (template_pos = path_template; *template_pos; ) {
+ sep = strchrnul(template_pos, '%');
+ path_pos = mempcpy(path_pos, template_pos, sep - template_pos);
+ if (!*sep)
+ break;
+
+ path_pos = stpcpy(path_pos, *label_pos++);
+ template_pos = sep + 1;
+ }
+
+ *path_pos = 0;
+ *out = path;
+ return 0;
+}
+
+_public_ int sd_bus_path_decode_many(const char *path, const char *path_template, ...) {
+ _cleanup_strv_free_ char **labels = NULL;
+ const char *template_pos, *path_pos;
+ char **label_pos;
+ va_list list;
+ int r;
+
+ /*
+ * This decodes an object-path based on a template argument. The
+ * template consists of a verbatim path, optionally including special
+ * directives:
+ *
+ * - Each occurrence of '%' in the template matches an arbitrary
+ * substring of a label in the given path. At most one such
+ * directive is allowed per label. For each such directive, the
+ * caller must provide an output parameter (char **) via va_arg. If
+ * NULL is passed, the given label is verified, but not returned.
+ * For each matched label, the *decoded* label is stored in the
+ * passed output argument, and the caller is responsible to free
+ * it. Note that the output arguments are only modified if the
+ * actually path matched the template. Otherwise, they're left
+ * untouched.
+ *
+ * This function returns <0 on error, 0 if the path does not match the
+ * template, 1 if it matched.
+ */
+
+ assert_return(path, -EINVAL);
+ assert_return(path_template, -EINVAL);
+
+ path_pos = path;
+
+ for (template_pos = path_template; *template_pos; ) {
+ const char *sep;
+ size_t length;
+ char *label;
+
+ /* verify everything until the next '%' matches verbatim */
+ sep = strchrnul(template_pos, '%');
+ length = sep - template_pos;
+ if (strncmp(path_pos, template_pos, length))
+ return 0;
+
+ path_pos += length;
+ template_pos += length;
+
+ if (!*template_pos)
+ break;
+
+ /* We found the next '%' character. Everything up until here
+ * matched. We now skip ahead to the end of this label and make
+ * sure it matches the tail of the label in the path. Then we
+ * decode the string in-between and save it for later use. */
+
+ ++template_pos; /* skip over '%' */
+
+ sep = strchrnul(template_pos, '/');
+ length = sep - template_pos; /* length of suffix to match verbatim */
+
+ /* verify the suffixes match */
+ sep = strchrnul(path_pos, '/');
+ if (sep - path_pos < (ssize_t)length ||
+ strncmp(sep - length, template_pos, length))
+ return 0;
+
+ template_pos += length; /* skip over matched label */
+ length = sep - path_pos - length; /* length of sub-label to decode */
+
+ /* store unescaped label for later use */
+ label = bus_label_unescape_n(path_pos, length);
+ if (!label)
+ return -ENOMEM;
+
+ r = strv_consume(&labels, label);
+ if (r < 0)
+ return r;
+
+ path_pos = sep; /* skip decoded label and suffix */
+ }
+
+ /* end of template must match end of path */
+ if (*path_pos)
+ return 0;
+
+ /* copy the labels over to the caller */
+ va_start(list, path_template);
+ for (label_pos = labels; label_pos && *label_pos; ++label_pos) {
+ char **arg;
+
+ arg = va_arg(list, char **);
+ if (arg)
+ *arg = *label_pos;
+ else
+ free(*label_pos);
+ }
+ va_end(list);
+
+ labels = mfree(labels);
+ return 1;
+}
+
+_public_ int sd_bus_try_close(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return -EOPNOTSUPP;
+}
+
+_public_ int sd_bus_get_description(sd_bus *bus, const char **description) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(description, -EINVAL);
+ assert_return(bus->description, -ENXIO);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (bus->description)
+ *description = bus->description;
+ else if (bus->is_system)
+ *description = "system";
+ else if (bus->is_user)
+ *description = "user";
+ else
+ *description = NULL;
+
+ return 0;
+}
+
+_public_ int sd_bus_get_scope(sd_bus *bus, const char **scope) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(scope, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (bus->is_user) {
+ *scope = "user";
+ return 0;
+ }
+
+ if (bus->is_system) {
+ *scope = "system";
+ return 0;
+ }
+
+ return -ENODATA;
+}
+
+_public_ int sd_bus_get_address(sd_bus *bus, const char **address) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(address, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (bus->address) {
+ *address = bus->address;
+ return 0;
+ }
+
+ return -ENODATA;
+}
+
+_public_ int sd_bus_get_creds_mask(sd_bus *bus, uint64_t *mask) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(mask, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ *mask = bus->creds_mask;
+ return 0;
+}
+
+_public_ int sd_bus_is_bus_client(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return bus->bus_client;
+}
+
+_public_ int sd_bus_is_server(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return bus->is_server;
+}
+
+_public_ int sd_bus_is_anonymous(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return bus->anonymous_auth;
+}
+
+_public_ int sd_bus_is_trusted(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return bus->trusted;
+}
+
+_public_ int sd_bus_is_monitor(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ return bus->is_monitor;
+}
+
+static void flush_close(sd_bus *bus) {
+ if (!bus)
+ return;
+
+ /* Flushes and closes the specified bus. We take a ref before,
+ * to ensure the flushing does not cause the bus to be
+ * unreferenced. */
+
+ sd_bus_flush_close_unref(sd_bus_ref(bus));
+}
+
+_public_ void sd_bus_default_flush_close(void) {
+ flush_close(default_starter_bus);
+ flush_close(default_user_bus);
+ flush_close(default_system_bus);
+}
+
+_public_ int sd_bus_set_exit_on_disconnect(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+
+ /* Turns on exit-on-disconnect, and triggers it immediately if the bus connection was already
+ * disconnected. Note that this is triggered exclusively on disconnections triggered by the server side, never
+ * from the client side. */
+ bus->exit_on_disconnect = b;
+
+ /* If the exit condition was triggered already, exit immediately. */
+ return bus_exit_now(bus);
+}
+
+_public_ int sd_bus_get_exit_on_disconnect(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+
+ return bus->exit_on_disconnect;
+}
+
+_public_ int sd_bus_set_sender(sd_bus *bus, const char *sender) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus->bus_client, -EPERM);
+ assert_return(!sender || service_name_is_valid(sender), -EINVAL);
+
+ return free_and_strdup(&bus->patch_sender, sender);
+}
+
+_public_ int sd_bus_get_sender(sd_bus *bus, const char **ret) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(ret, -EINVAL);
+
+ if (!bus->patch_sender)
+ return -ENODATA;
+
+ *ret = bus->patch_sender;
+ return 0;
+}
+
+_public_ int sd_bus_get_n_queued_read(sd_bus *bus, uint64_t *ret) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+ assert_return(ret, -EINVAL);
+
+ *ret = bus->rqueue_size;
+ return 0;
+}
+
+_public_ int sd_bus_get_n_queued_write(sd_bus *bus, uint64_t *ret) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+ assert_return(ret, -EINVAL);
+
+ *ret = bus->wqueue_size;
+ return 0;
+}
+
+_public_ int sd_bus_set_method_call_timeout(sd_bus *bus, uint64_t usec) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+
+ bus->method_call_timeout = usec;
+ return 0;
+}
+
+_public_ int sd_bus_get_method_call_timeout(sd_bus *bus, uint64_t *ret) {
+ const char *e;
+ usec_t usec;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(ret, -EINVAL);
+
+ if (bus->method_call_timeout != 0) {
+ *ret = bus->method_call_timeout;
+ return 0;
+ }
+
+ e = secure_getenv("SYSTEMD_BUS_TIMEOUT");
+ if (e && parse_sec(e, &usec) >= 0 && usec != 0) {
+ /* Save the parsed value to avoid multiple parsing. To change the timeout value,
+ * use sd_bus_set_method_call_timeout() instead of setenv(). */
+ *ret = bus->method_call_timeout = usec;
+ return 0;
+ }
+
+ *ret = bus->method_call_timeout = BUS_DEFAULT_TIMEOUT;
+ return 0;
+}
+
+_public_ int sd_bus_set_close_on_exit(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+
+ bus->close_on_exit = b;
+ return 0;
+}
+
+_public_ int sd_bus_get_close_on_exit(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+
+ return bus->close_on_exit;
+}
+
+_public_ int sd_bus_enqueue_for_read(sd_bus *bus, sd_bus_message *m) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EINVAL);
+ assert_return(!bus_pid_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ /* Re-enqueue a message for reading. This is primarily useful for PolicyKit-style authentication,
+ * where we accept a message, then determine we need to interactively authenticate the user, and then
+ * we want to process the message again. */
+
+ r = bus_rqueue_make_room(bus);
+ if (r < 0)
+ return r;
+
+ bus->rqueue[bus->rqueue_size++] = bus_message_ref_queued(m, bus);
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-address.c b/src/libsystemd/sd-bus/test-bus-address.c
new file mode 100644
index 0000000..b92558f
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-address.c
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-bus.h"
+
+#include "bus-internal.h"
+#include "log.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+
+static void test_one_address(sd_bus *b,
+ const char *host,
+ int result, const char *expected) {
+ int r;
+
+ r = bus_set_address_system_remote(b, host);
+ log_info("\"%s\" → %d, \"%s\"", host, r, strna(r >= 0 ? b->address : NULL));
+ if (result < 0 || expected) {
+ assert(r == result);
+ if (r >= 0)
+ assert_se(streq(b->address, expected));
+ }
+}
+
+static void test_bus_set_address_system_remote(char **args) {
+ _cleanup_(sd_bus_unrefp) sd_bus *b = NULL;
+
+ assert_se(sd_bus_new(&b) >= 0);
+ if (!strv_isempty(args)) {
+ char **a;
+ STRV_FOREACH(a, args)
+ test_one_address(b, *a, 0, NULL);
+ return;
+ };
+
+ test_one_address(b, "host",
+ 0, "unixexec:path=ssh,argv1=-xT,argv2=--,argv3=host,argv4=systemd-stdio-bridge");
+ test_one_address(b, "host:123",
+ 0, "unixexec:path=ssh,argv1=-xT,argv2=-p,argv3=123,argv4=--,argv5=host,argv6=systemd-stdio-bridge");
+ test_one_address(b, "host:123:123",
+ -EINVAL, NULL);
+ test_one_address(b, "host:",
+ -EINVAL, NULL);
+ test_one_address(b, "user@host",
+ 0, "unixexec:path=ssh,argv1=-xT,argv2=--,argv3=user%40host,argv4=systemd-stdio-bridge");
+ test_one_address(b, "user@host@host",
+ -EINVAL, NULL);
+ test_one_address(b, "[::1]",
+ 0, "unixexec:path=ssh,argv1=-xT,argv2=--,argv3=%3a%3a1,argv4=systemd-stdio-bridge");
+ test_one_address(b, "user@[::1]",
+ 0, "unixexec:path=ssh,argv1=-xT,argv2=--,argv3=user%40%3a%3a1,argv4=systemd-stdio-bridge");
+ test_one_address(b, "user@[::1]:99",
+ 0, "unixexec:path=ssh,argv1=-xT,argv2=-p,argv3=99,argv4=--,argv5=user%40%3a%3a1,argv6=systemd-stdio-bridge");
+ test_one_address(b, "user@[::1]:",
+ -EINVAL, NULL);
+ test_one_address(b, "user@[::1:",
+ -EINVAL, NULL);
+ test_one_address(b, "user@",
+ -EINVAL, NULL);
+ test_one_address(b, "user@@",
+ -EINVAL, NULL);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_INFO);
+
+ test_bus_set_address_system_remote(argv + 1);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-benchmark.c b/src/libsystemd/sd-bus/test-bus-benchmark.c
new file mode 100644
index 0000000..8c67117
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-benchmark.c
@@ -0,0 +1,324 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-kernel.h"
+#include "bus-util.h"
+#include "def.h"
+#include "fd-util.h"
+#include "missing_resource.h"
+#include "time-util.h"
+#include "util.h"
+
+#define MAX_SIZE (2*1024*1024)
+
+static usec_t arg_loop_usec = 100 * USEC_PER_MSEC;
+
+typedef enum Type {
+ TYPE_LEGACY,
+ TYPE_DIRECT,
+} Type;
+
+static void server(sd_bus *b, size_t *result) {
+ int r;
+
+ for (;;) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = sd_bus_process(b, &m);
+ assert_se(r >= 0);
+
+ if (r == 0)
+ assert_se(sd_bus_wait(b, USEC_INFINITY) >= 0);
+ if (!m)
+ continue;
+
+ if (sd_bus_message_is_method_call(m, "benchmark.server", "Ping"))
+ assert_se(sd_bus_reply_method_return(m, NULL) >= 0);
+ else if (sd_bus_message_is_method_call(m, "benchmark.server", "Work")) {
+ const void *p;
+ size_t sz;
+
+ /* Make sure the mmap is mapped */
+ assert_se(sd_bus_message_read_array(m, 'y', &p, &sz) > 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+ } else if (sd_bus_message_is_method_call(m, "benchmark.server", "Exit")) {
+ uint64_t res;
+ assert_se(sd_bus_message_read(m, "t", &res) > 0);
+
+ *result = res;
+ return;
+
+ } else if (!sd_bus_message_is_signal(m, NULL, NULL))
+ assert_not_reached("Unknown method");
+ }
+}
+
+static void transaction(sd_bus *b, size_t sz, const char *server_name) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ uint8_t *p;
+
+ assert_se(sd_bus_message_new_method_call(b, &m, server_name, "/", "benchmark.server", "Work") >= 0);
+ assert_se(sd_bus_message_append_array_space(m, 'y', sz, (void**) &p) >= 0);
+
+ memset(p, 0x80, sz);
+
+ assert_se(sd_bus_call(b, m, 0, NULL, &reply) >= 0);
+}
+
+static void client_bisect(const char *address, const char *server_name) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *x = NULL;
+ size_t lsize, rsize, csize;
+ sd_bus *b;
+ int r;
+
+ r = sd_bus_new(&b);
+ assert_se(r >= 0);
+
+ r = sd_bus_set_address(b, address);
+ assert_se(r >= 0);
+
+ r = sd_bus_start(b);
+ assert_se(r >= 0);
+
+ r = sd_bus_call_method(b, server_name, "/", "benchmark.server", "Ping", NULL, NULL, NULL);
+ assert_se(r >= 0);
+
+ lsize = 1;
+ rsize = MAX_SIZE;
+
+ printf("SIZE\tCOPY\tMEMFD\n");
+
+ for (;;) {
+ usec_t t;
+ unsigned n_copying, n_memfd;
+
+ csize = (lsize + rsize) / 2;
+
+ if (csize <= lsize)
+ break;
+
+ if (csize <= 0)
+ break;
+
+ printf("%zu\t", csize);
+
+ b->use_memfd = 0;
+
+ t = now(CLOCK_MONOTONIC);
+ for (n_copying = 0;; n_copying++) {
+ transaction(b, csize, server_name);
+ if (now(CLOCK_MONOTONIC) >= t + arg_loop_usec)
+ break;
+ }
+ printf("%u\t", (unsigned) ((n_copying * USEC_PER_SEC) / arg_loop_usec));
+
+ b->use_memfd = -1;
+
+ t = now(CLOCK_MONOTONIC);
+ for (n_memfd = 0;; n_memfd++) {
+ transaction(b, csize, server_name);
+ if (now(CLOCK_MONOTONIC) >= t + arg_loop_usec)
+ break;
+ }
+ printf("%u\n", (unsigned) ((n_memfd * USEC_PER_SEC) / arg_loop_usec));
+
+ if (n_copying == n_memfd)
+ break;
+
+ if (n_copying > n_memfd)
+ lsize = csize;
+ else
+ rsize = csize;
+ }
+
+ b->use_memfd = 1;
+ assert_se(sd_bus_message_new_method_call(b, &x, server_name, "/", "benchmark.server", "Exit") >= 0);
+ assert_se(sd_bus_message_append(x, "t", csize) >= 0);
+ assert_se(sd_bus_send(b, x, NULL) >= 0);
+
+ sd_bus_unref(b);
+}
+
+static void client_chart(Type type, const char *address, const char *server_name, int fd) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *x = NULL;
+ size_t csize;
+ sd_bus *b;
+ int r;
+
+ r = sd_bus_new(&b);
+ assert_se(r >= 0);
+
+ if (type == TYPE_DIRECT) {
+ r = sd_bus_set_fd(b, fd, fd);
+ assert_se(r >= 0);
+ } else {
+ r = sd_bus_set_address(b, address);
+ assert_se(r >= 0);
+
+ r = sd_bus_set_bus_client(b, true);
+ assert_se(r >= 0);
+ }
+
+ r = sd_bus_start(b);
+ assert_se(r >= 0);
+
+ r = sd_bus_call_method(b, server_name, "/", "benchmark.server", "Ping", NULL, NULL, NULL);
+ assert_se(r >= 0);
+
+ switch (type) {
+ case TYPE_LEGACY:
+ printf("SIZE\tLEGACY\n");
+ break;
+ case TYPE_DIRECT:
+ printf("SIZE\tDIRECT\n");
+ break;
+ }
+
+ for (csize = 1; csize <= MAX_SIZE; csize *= 2) {
+ usec_t t;
+ unsigned n_memfd;
+
+ printf("%zu\t", csize);
+
+ t = now(CLOCK_MONOTONIC);
+ for (n_memfd = 0;; n_memfd++) {
+ transaction(b, csize, server_name);
+ if (now(CLOCK_MONOTONIC) >= t + arg_loop_usec)
+ break;
+ }
+
+ printf("%u\n", (unsigned) ((n_memfd * USEC_PER_SEC) / arg_loop_usec));
+ }
+
+ b->use_memfd = 1;
+ assert_se(sd_bus_message_new_method_call(b, &x, server_name, "/", "benchmark.server", "Exit") >= 0);
+ assert_se(sd_bus_message_append(x, "t", csize) >= 0);
+ assert_se(sd_bus_send(b, x, NULL) >= 0);
+
+ sd_bus_unref(b);
+}
+
+int main(int argc, char *argv[]) {
+ enum {
+ MODE_BISECT,
+ MODE_CHART,
+ } mode = MODE_BISECT;
+ Type type = TYPE_LEGACY;
+ int i, pair[2] = { -1, -1 };
+ _cleanup_free_ char *address = NULL, *server_name = NULL;
+ _cleanup_close_ int bus_ref = -1;
+ const char *unique;
+ cpu_set_t cpuset;
+ size_t result;
+ sd_bus *b;
+ pid_t pid;
+ int r;
+
+ for (i = 1; i < argc; i++) {
+ if (streq(argv[i], "chart")) {
+ mode = MODE_CHART;
+ continue;
+ } else if (streq(argv[i], "legacy")) {
+ type = TYPE_LEGACY;
+ continue;
+ } else if (streq(argv[i], "direct")) {
+ type = TYPE_DIRECT;
+ continue;
+ }
+
+ assert_se(parse_sec(argv[i], &arg_loop_usec) >= 0);
+ }
+
+ assert_se(arg_loop_usec > 0);
+
+ if (type == TYPE_LEGACY) {
+ const char *e;
+
+ e = secure_getenv("DBUS_SESSION_BUS_ADDRESS");
+ assert_se(e);
+
+ address = strdup(e);
+ assert_se(address);
+ }
+
+ r = sd_bus_new(&b);
+ assert_se(r >= 0);
+
+ if (type == TYPE_DIRECT) {
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, pair) >= 0);
+
+ r = sd_bus_set_fd(b, pair[0], pair[0]);
+ assert_se(r >= 0);
+
+ r = sd_bus_set_server(b, true, SD_ID128_NULL);
+ assert_se(r >= 0);
+ } else {
+ r = sd_bus_set_address(b, address);
+ assert_se(r >= 0);
+
+ r = sd_bus_set_bus_client(b, true);
+ assert_se(r >= 0);
+ }
+
+ r = sd_bus_start(b);
+ assert_se(r >= 0);
+
+ if (type != TYPE_DIRECT) {
+ r = sd_bus_get_unique_name(b, &unique);
+ assert_se(r >= 0);
+
+ server_name = strdup(unique);
+ assert_se(server_name);
+ }
+
+ sync();
+ setpriority(PRIO_PROCESS, 0, -19);
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
+
+ safe_close(bus_ref);
+ sd_bus_unref(b);
+
+ switch (mode) {
+ case MODE_BISECT:
+ client_bisect(address, server_name);
+ break;
+
+ case MODE_CHART:
+ client_chart(type, address, server_name, pair[1]);
+ break;
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(1, &cpuset);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
+
+ server(b, &result);
+
+ if (mode == MODE_BISECT)
+ printf("Copying/memfd are equally fast at %zu bytes\n", result);
+
+ assert_se(waitpid(pid, NULL, 0) == pid);
+
+ safe_close(pair[1]);
+ sd_bus_unref(b);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-chat.c b/src/libsystemd/sd-bus/test-bus-chat.c
new file mode 100644
index 0000000..c3c31c6
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-chat.c
@@ -0,0 +1,547 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-internal.h"
+#include "bus-match.h"
+#include "bus-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "log.h"
+#include "macro.h"
+#include "tests.h"
+#include "util.h"
+
+static int match_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ log_info("Match triggered! interface=%s member=%s", strna(sd_bus_message_get_interface(m)), strna(sd_bus_message_get_member(m)));
+ return 0;
+}
+
+static int object_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ int r;
+
+ if (sd_bus_message_is_method_error(m, NULL))
+ return 0;
+
+ if (sd_bus_message_is_method_call(m, "org.object.test", "Foobar")) {
+ log_info("Invoked Foobar() on %s", sd_bus_message_get_path(m));
+
+ r = sd_bus_reply_method_return(m, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send reply: %m");
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int server_init(sd_bus **_bus) {
+ sd_bus *bus = NULL;
+ sd_id128_t id;
+ int r;
+ const char *unique, *desc;
+
+ assert_se(_bus);
+
+ r = sd_bus_open_user_with_description(&bus, "my bus!");
+ if (r < 0) {
+ log_error_errno(r, "Failed to connect to user bus: %m");
+ goto fail;
+ }
+
+ r = sd_bus_get_bus_id(bus, &id);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get server ID: %m");
+ goto fail;
+ }
+
+ r = sd_bus_get_unique_name(bus, &unique);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get unique name: %m");
+ goto fail;
+ }
+
+ assert_se(sd_bus_get_description(bus, &desc) >= 0);
+ assert_se(streq(desc, "my bus!"));
+
+ log_info("Peer ID is " SD_ID128_FORMAT_STR ".", SD_ID128_FORMAT_VAL(id));
+ log_info("Unique ID: %s", unique);
+ log_info("Can send file handles: %i", sd_bus_can_send(bus, 'h'));
+
+ r = sd_bus_request_name(bus, "org.freedesktop.systemd.test", 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to acquire name: %m");
+ goto fail;
+ }
+
+ r = sd_bus_add_fallback(bus, NULL, "/foo/bar", object_callback, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add object: %m");
+ goto fail;
+ }
+
+ r = sd_bus_match_signal(bus, NULL, NULL, NULL, "foo.bar", "Notify", match_callback, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to request match: %m");
+ goto fail;
+ }
+
+ r = sd_bus_add_match(bus, NULL, "type='signal',interface='org.freedesktop.DBus',member='NameOwnerChanged'", match_callback, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add match: %m");
+ goto fail;
+ }
+
+ bus_match_dump(&bus->match_callbacks, 0);
+
+ *_bus = bus;
+ return 0;
+
+fail:
+ sd_bus_unref(bus);
+ return r;
+}
+
+static int server(sd_bus *bus) {
+ int r;
+ bool client1_gone = false, client2_gone = false;
+
+ while (!client1_gone || !client2_gone) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ pid_t pid = 0;
+ const char *label = NULL;
+
+ r = sd_bus_process(bus, &m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to process requests: %m");
+ goto fail;
+ }
+
+ if (r == 0) {
+ r = sd_bus_wait(bus, (uint64_t) -1);
+ if (r < 0) {
+ log_error_errno(r, "Failed to wait: %m");
+ goto fail;
+ }
+
+ continue;
+ }
+
+ if (!m)
+ continue;
+
+ sd_bus_creds_get_pid(sd_bus_message_get_creds(m), &pid);
+ sd_bus_creds_get_selinux_context(sd_bus_message_get_creds(m), &label);
+ log_info("Got message! member=%s pid="PID_FMT" label=%s",
+ strna(sd_bus_message_get_member(m)),
+ pid,
+ strna(label));
+ /* sd_bus_message_dump(m); */
+ /* sd_bus_message_rewind(m, true); */
+
+ if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "LowerCase")) {
+ const char *hello;
+ _cleanup_free_ char *lowercase = NULL;
+
+ r = sd_bus_message_read(m, "s", &hello);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get parameter: %m");
+ goto fail;
+ }
+
+ lowercase = strdup(hello);
+ if (!lowercase) {
+ r = log_oom();
+ goto fail;
+ }
+
+ ascii_strlower(lowercase);
+
+ r = sd_bus_reply_method_return(m, "s", lowercase);
+ if (r < 0) {
+ log_error_errno(r, "Failed to send reply: %m");
+ goto fail;
+ }
+ } else if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "ExitClient1")) {
+
+ r = sd_bus_reply_method_return(m, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to send reply: %m");
+ goto fail;
+ }
+
+ client1_gone = true;
+ } else if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "ExitClient2")) {
+
+ r = sd_bus_reply_method_return(m, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to send reply: %m");
+ goto fail;
+ }
+
+ client2_gone = true;
+ } else if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "Slow")) {
+
+ sleep(1);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to send reply: %m");
+ goto fail;
+ }
+
+ } else if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "FileDescriptor")) {
+ int fd;
+ static const char x = 'X';
+
+ r = sd_bus_message_read(m, "h", &fd);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get parameter: %m");
+ goto fail;
+ }
+
+ log_info("Received fd=%d", fd);
+
+ if (write(fd, &x, 1) < 0) {
+ log_error_errno(errno, "Failed to write to fd: %m");
+ safe_close(fd);
+ goto fail;
+ }
+
+ r = sd_bus_reply_method_return(m, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to send reply: %m");
+ goto fail;
+ }
+
+ } else if (sd_bus_message_is_method_call(m, NULL, NULL)) {
+
+ r = sd_bus_reply_method_error(
+ m,
+ &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_UNKNOWN_METHOD, "Unknown method."));
+ if (r < 0) {
+ log_error_errno(r, "Failed to send reply: %m");
+ goto fail;
+ }
+ }
+ }
+
+ r = 0;
+
+fail:
+ if (bus) {
+ sd_bus_flush(bus);
+ sd_bus_unref(bus);
+ }
+
+ return r;
+}
+
+static void* client1(void *p) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *hello;
+ int r;
+ _cleanup_close_pair_ int pp[2] = { -1, -1 };
+ char x;
+
+ r = sd_bus_open_user(&bus);
+ if (r < 0) {
+ log_error_errno(r, "Failed to connect to user bus: %m");
+ goto finish;
+ }
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "LowerCase",
+ &error,
+ &reply,
+ "s",
+ "HELLO");
+ if (r < 0) {
+ log_error_errno(r, "Failed to issue method call: %m");
+ goto finish;
+ }
+
+ r = sd_bus_message_read(reply, "s", &hello);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get string: %m");
+ goto finish;
+ }
+
+ assert_se(streq(hello, "hello"));
+
+ if (pipe2(pp, O_CLOEXEC|O_NONBLOCK) < 0) {
+ r = log_error_errno(errno, "Failed to allocate pipe: %m");
+ goto finish;
+ }
+
+ log_info("Sending fd=%d", pp[1]);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "FileDescriptor",
+ &error,
+ NULL,
+ "h",
+ pp[1]);
+ if (r < 0) {
+ log_error_errno(r, "Failed to issue method call: %m");
+ goto finish;
+ }
+
+ errno = 0;
+ if (read(pp[0], &x, 1) <= 0) {
+ log_error("Failed to read from pipe: %s", errno != 0 ? strerror_safe(errno) : "early read");
+ goto finish;
+ }
+
+ r = 0;
+
+finish:
+ if (bus) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *q = NULL;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &q,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "ExitClient1");
+ if (r < 0)
+ log_error_errno(r, "Failed to allocate method call: %m");
+ else
+ sd_bus_send(bus, q, NULL);
+
+ }
+
+ return INT_TO_PTR(r);
+}
+
+static int quit_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ bool *x = userdata;
+
+ log_error_errno(sd_bus_message_get_errno(m), "Quit callback: %m");
+
+ *x = 1;
+ return 1;
+}
+
+static void* client2(void *p) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ bool quit = false;
+ const char *mid;
+ int r;
+
+ r = sd_bus_open_user(&bus);
+ if (r < 0) {
+ log_error_errno(r, "Failed to connect to user bus: %m");
+ goto finish;
+ }
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd.test",
+ "/foo/bar/waldo/piep",
+ "org.object.test",
+ "Foobar");
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate method call: %m");
+ goto finish;
+ }
+
+ r = sd_bus_send(bus, m, NULL);
+ if (r < 0) {
+ log_error("Failed to issue method call: %s", bus_error_message(&error, r));
+ goto finish;
+ }
+
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_signal(
+ bus,
+ &m,
+ "/foobar",
+ "foo.bar",
+ "Notify");
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate signal: %m");
+ goto finish;
+ }
+
+ r = sd_bus_send(bus, m, NULL);
+ if (r < 0) {
+ log_error("Failed to issue signal: %s", bus_error_message(&error, r));
+ goto finish;
+ }
+
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.DBus.Peer",
+ "GetMachineId");
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate method call: %m");
+ goto finish;
+ }
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0) {
+ log_error("Failed to issue method call: %s", bus_error_message(&error, r));
+ goto finish;
+ }
+
+ r = sd_bus_message_read(reply, "s", &mid);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse machine ID: %m");
+ goto finish;
+ }
+
+ log_info("Machine ID is %s.", mid);
+
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "Slow");
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate method call: %m");
+ goto finish;
+ }
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_call(bus, m, 200 * USEC_PER_MSEC, &error, &reply);
+ if (r < 0)
+ log_info("Failed to issue method call: %s", bus_error_message(&error, r));
+ else
+ log_info("Slow call succeed.");
+
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "Slow");
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate method call: %m");
+ goto finish;
+ }
+
+ r = sd_bus_call_async(bus, NULL, m, quit_callback, &quit, 200 * USEC_PER_MSEC);
+ if (r < 0) {
+ log_info("Failed to issue method call: %s", bus_error_message(&error, r));
+ goto finish;
+ }
+
+ while (!quit) {
+ r = sd_bus_process(bus, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to process requests: %m");
+ goto finish;
+ }
+ if (r == 0) {
+ r = sd_bus_wait(bus, (uint64_t) -1);
+ if (r < 0) {
+ log_error_errno(r, "Failed to wait: %m");
+ goto finish;
+ }
+ }
+ }
+
+ r = 0;
+
+finish:
+ if (bus) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *q = NULL;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &q,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "ExitClient2");
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate method call: %m");
+ goto finish;
+ }
+
+ (void) sd_bus_send(bus, q, NULL);
+ }
+
+ return INT_TO_PTR(r);
+}
+
+int main(int argc, char *argv[]) {
+ pthread_t c1, c2;
+ sd_bus *bus;
+ void *p;
+ int q, r;
+
+ test_setup_logging(LOG_INFO);
+
+ r = server_init(&bus);
+ if (r < 0)
+ return log_tests_skipped("Failed to connect to bus");
+
+ log_info("Initialized...");
+
+ r = pthread_create(&c1, NULL, client1, bus);
+ if (r != 0)
+ return EXIT_FAILURE;
+
+ r = pthread_create(&c2, NULL, client2, bus);
+ if (r != 0)
+ return EXIT_FAILURE;
+
+ r = server(bus);
+
+ q = pthread_join(c1, &p);
+ if (q != 0)
+ return EXIT_FAILURE;
+ if (PTR_TO_INT(p) < 0)
+ return EXIT_FAILURE;
+
+ q = pthread_join(c2, &p);
+ if (q != 0)
+ return EXIT_FAILURE;
+ if (PTR_TO_INT(p) < 0)
+ return EXIT_FAILURE;
+
+ if (r < 0)
+ return EXIT_FAILURE;
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-cleanup.c b/src/libsystemd/sd-bus/test-bus-cleanup.c
new file mode 100644
index 0000000..ece0a12
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-cleanup.c
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "sd-bus.h"
+
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "tests.h"
+
+static bool use_system_bus = false;
+
+static void test_bus_new(void) {
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+
+ assert_se(sd_bus_new(&bus) == 0);
+ assert_se(bus->n_ref == 1);
+}
+
+static int test_bus_open(void) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ r = sd_bus_open_user(&bus);
+ if (IN_SET(r, -ECONNREFUSED, -ENOENT, -ENOMEDIUM)) {
+ r = sd_bus_open_system(&bus);
+ if (IN_SET(r, -ECONNREFUSED, -ENOENT))
+ return r;
+ use_system_bus = true;
+ }
+
+ assert_se(r >= 0);
+ assert_se(bus->n_ref >= 1); /* we send a hello message when opening, so the count is above 1 */
+
+ return 0;
+}
+
+static void test_bus_new_method_call(void) {
+ sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ assert_se(use_system_bus ? sd_bus_open_system(&bus) >= 0 : sd_bus_open_user(&bus) >= 0);
+
+ assert_se(sd_bus_message_new_method_call(bus, &m, "a.service.name", "/an/object/path", "an.interface.name", "AMethodName") >= 0);
+
+ assert_se(m->n_ref == 1); /* We hold the only reference to the message */
+ assert_se(bus->n_ref >= 2);
+ sd_bus_flush_close_unref(bus);
+ assert_se(m->n_ref == 1);
+}
+
+static void test_bus_new_signal(void) {
+ sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ assert_se(use_system_bus ? sd_bus_open_system(&bus) >= 0 : sd_bus_open_user(&bus) >= 0);
+
+ assert_se(sd_bus_message_new_signal(bus, &m, "/an/object/path", "an.interface.name", "Name") >= 0);
+
+ assert_se(m->n_ref == 1); /* We hold the only reference to the message */
+ assert_se(bus->n_ref >= 2);
+ sd_bus_flush_close_unref(bus);
+ assert_se(m->n_ref == 1);
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_INFO);
+
+ test_bus_new();
+
+ if (test_bus_open() < 0)
+ return log_tests_skipped("Failed to connect to bus");
+
+ test_bus_new_method_call();
+ test_bus_new_signal();
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-creds.c b/src/libsystemd/sd-bus/test-bus-creds.c
new file mode 100644
index 0000000..6551e69
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-creds.c
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-bus.h"
+
+#include "bus-dump.h"
+#include "bus-util.h"
+#include "cgroup-util.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ if (cg_unified() == -ENOMEDIUM)
+ return log_tests_skipped("/sys/fs/cgroup/ not available");
+
+ r = sd_bus_creds_new_from_pid(&creds, 0, _SD_BUS_CREDS_ALL);
+ log_full_errno(r < 0 ? LOG_ERR : LOG_DEBUG, r, "sd_bus_creds_new_from_pid: %m");
+ assert_se(r >= 0);
+
+ bus_creds_dump(creds, NULL, true);
+
+ creds = sd_bus_creds_unref(creds);
+
+ r = sd_bus_creds_new_from_pid(&creds, 1, _SD_BUS_CREDS_ALL);
+ if (r != -EACCES) {
+ assert_se(r >= 0);
+ putchar('\n');
+ bus_creds_dump(creds, NULL, true);
+ }
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-error.c b/src/libsystemd/sd-bus/test-bus-error.c
new file mode 100644
index 0000000..2e3803c
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-error.c
@@ -0,0 +1,299 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-bus.h"
+
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "errno-list.h"
+#include "errno-util.h"
+
+static void test_error(void) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL, second = SD_BUS_ERROR_NULL;
+ const sd_bus_error const_error = SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_FILE_EXISTS, "const error");
+ const sd_bus_error temporarily_const_error = {
+ .name = SD_BUS_ERROR_ACCESS_DENIED,
+ .message = "oh! no",
+ ._need_free = -1
+ };
+
+ assert_se(!sd_bus_error_is_set(&error));
+ assert_se(sd_bus_error_set(&error, SD_BUS_ERROR_NOT_SUPPORTED, "xxx") == -EOPNOTSUPP);
+ assert_se(streq(error.name, SD_BUS_ERROR_NOT_SUPPORTED));
+ assert_se(streq(error.message, "xxx"));
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_NOT_SUPPORTED));
+ assert_se(sd_bus_error_has_names_sentinel(&error, SD_BUS_ERROR_NOT_SUPPORTED, NULL));
+ assert_se(sd_bus_error_has_names(&error, SD_BUS_ERROR_NOT_SUPPORTED));
+ assert_se(sd_bus_error_has_names(&error, SD_BUS_ERROR_NOT_SUPPORTED, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(sd_bus_error_has_names(&error, SD_BUS_ERROR_FILE_NOT_FOUND, SD_BUS_ERROR_NOT_SUPPORTED, NULL));
+ assert_se(!sd_bus_error_has_names(&error, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(sd_bus_error_get_errno(&error) == EOPNOTSUPP);
+ assert_se(sd_bus_error_is_set(&error));
+ sd_bus_error_free(&error);
+
+ /* Check with no error */
+ assert_se(!sd_bus_error_is_set(&error));
+ assert_se(sd_bus_error_setf(&error, NULL, "yyy %i", -1) == 0);
+ assert_se(error.name == NULL);
+ assert_se(error.message == NULL);
+ assert_se(!sd_bus_error_has_name(&error, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(!sd_bus_error_has_names(&error, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(sd_bus_error_get_errno(&error) == 0);
+ assert_se(!sd_bus_error_is_set(&error));
+
+ assert_se(sd_bus_error_setf(&error, SD_BUS_ERROR_FILE_NOT_FOUND, "yyy %i", -1) == -ENOENT);
+ assert_se(streq(error.name, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(streq(error.message, "yyy -1"));
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(sd_bus_error_has_names(&error, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(sd_bus_error_get_errno(&error) == ENOENT);
+ assert_se(sd_bus_error_is_set(&error));
+
+ assert_se(!sd_bus_error_is_set(&second));
+ assert_se(second._need_free == 0);
+ assert_se(error._need_free > 0);
+ assert_se(sd_bus_error_copy(&second, &error) == -ENOENT);
+ assert_se(second._need_free > 0);
+ assert_se(streq(error.name, second.name));
+ assert_se(streq(error.message, second.message));
+ assert_se(sd_bus_error_get_errno(&second) == ENOENT);
+ assert_se(sd_bus_error_has_name(&second, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(sd_bus_error_has_names(&second, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(sd_bus_error_is_set(&second));
+
+ sd_bus_error_free(&error);
+ sd_bus_error_free(&second);
+
+ assert_se(!sd_bus_error_is_set(&second));
+ assert_se(const_error._need_free == 0);
+ assert_se(sd_bus_error_copy(&second, &const_error) == -EEXIST);
+ assert_se(second._need_free == 0);
+ assert_se(streq(const_error.name, second.name));
+ assert_se(streq(const_error.message, second.message));
+ assert_se(sd_bus_error_get_errno(&second) == EEXIST);
+ assert_se(sd_bus_error_has_name(&second, SD_BUS_ERROR_FILE_EXISTS));
+ assert_se(sd_bus_error_is_set(&second));
+ sd_bus_error_free(&second);
+
+ assert_se(!sd_bus_error_is_set(&second));
+ assert_se(temporarily_const_error._need_free < 0);
+ assert_se(sd_bus_error_copy(&second, &temporarily_const_error) == -EACCES);
+ assert_se(second._need_free > 0);
+ assert_se(streq(temporarily_const_error.name, second.name));
+ assert_se(streq(temporarily_const_error.message, second.message));
+ assert_se(sd_bus_error_get_errno(&second) == EACCES);
+ assert_se(sd_bus_error_has_name(&second, SD_BUS_ERROR_ACCESS_DENIED));
+ assert_se(sd_bus_error_is_set(&second));
+
+ assert_se(!sd_bus_error_is_set(&error));
+ assert_se(sd_bus_error_set_const(&error, "System.Error.EUCLEAN", "Hallo") == -EUCLEAN);
+ assert_se(streq(error.name, "System.Error.EUCLEAN"));
+ assert_se(streq(error.message, "Hallo"));
+ assert_se(sd_bus_error_has_name(&error, "System.Error.EUCLEAN"));
+ assert_se(sd_bus_error_get_errno(&error) == EUCLEAN);
+ assert_se(sd_bus_error_is_set(&error));
+ sd_bus_error_free(&error);
+
+ assert_se(!sd_bus_error_is_set(&error));
+ assert_se(sd_bus_error_set_errno(&error, EBUSY) == -EBUSY);
+ assert_se(streq(error.name, "System.Error.EBUSY"));
+ assert_se(streq(error.message, strerror_safe(EBUSY)));
+ assert_se(sd_bus_error_has_name(&error, "System.Error.EBUSY"));
+ assert_se(sd_bus_error_get_errno(&error) == EBUSY);
+ assert_se(sd_bus_error_is_set(&error));
+ sd_bus_error_free(&error);
+
+ assert_se(!sd_bus_error_is_set(&error));
+ assert_se(sd_bus_error_set_errnof(&error, EIO, "Waldi %c", 'X') == -EIO);
+ assert_se(streq(error.name, SD_BUS_ERROR_IO_ERROR));
+ assert_se(streq(error.message, "Waldi X"));
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_IO_ERROR));
+ assert_se(sd_bus_error_get_errno(&error) == EIO);
+ assert_se(sd_bus_error_is_set(&error));
+ sd_bus_error_free(&error);
+
+ /* Check with no error */
+ assert_se(!sd_bus_error_is_set(&error));
+ assert_se(sd_bus_error_set_errnof(&error, 0, "Waldi %c", 'X') == 0);
+ assert_se(error.name == NULL);
+ assert_se(error.message == NULL);
+ assert_se(!sd_bus_error_has_name(&error, SD_BUS_ERROR_IO_ERROR));
+ assert_se(sd_bus_error_get_errno(&error) == 0);
+ assert_se(!sd_bus_error_is_set(&error));
+}
+
+extern const sd_bus_error_map __start_SYSTEMD_BUS_ERROR_MAP[];
+extern const sd_bus_error_map __stop_SYSTEMD_BUS_ERROR_MAP[];
+
+static void dump_mapping_table(void) {
+ const sd_bus_error_map *m;
+
+ printf("----- errno mappings ------\n");
+ m = ALIGN_TO_PTR(__start_SYSTEMD_BUS_ERROR_MAP, sizeof(void*));
+ while (m < __stop_SYSTEMD_BUS_ERROR_MAP) {
+
+ if (m->code == BUS_ERROR_MAP_END_MARKER) {
+ m = ALIGN_TO_PTR(m + 1, sizeof(void*));
+ continue;
+ }
+
+ printf("%s -> %i/%s\n", strna(m->name), m->code, strna(errno_to_name(m->code)));
+ m++;
+ }
+ printf("---------------------------\n");
+}
+
+static void test_errno_mapping_standard(void) {
+ assert_se(sd_bus_error_set(NULL, "System.Error.EUCLEAN", NULL) == -EUCLEAN);
+ assert_se(sd_bus_error_set(NULL, "System.Error.EBUSY", NULL) == -EBUSY);
+ assert_se(sd_bus_error_set(NULL, "System.Error.EINVAL", NULL) == -EINVAL);
+ assert_se(sd_bus_error_set(NULL, "System.Error.WHATSIT", NULL) == -EIO);
+}
+
+BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map test_errors[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error", 5),
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-2", 52),
+ SD_BUS_ERROR_MAP_END
+};
+
+BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map test_errors2[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-3", 33),
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-4", 44),
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-33", 333),
+ SD_BUS_ERROR_MAP_END
+};
+
+static const sd_bus_error_map test_errors3[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-88", 888),
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-99", 999),
+ SD_BUS_ERROR_MAP_END
+};
+
+static const sd_bus_error_map test_errors4[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-77", 777),
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-78", 778),
+ SD_BUS_ERROR_MAP_END
+};
+
+static const sd_bus_error_map test_errors_bad1[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-1", 0),
+ SD_BUS_ERROR_MAP_END
+};
+
+static const sd_bus_error_map test_errors_bad2[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-1", -1),
+ SD_BUS_ERROR_MAP_END
+};
+
+static void test_errno_mapping_custom(void) {
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error", NULL) == -5);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-2", NULL) == -52);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-x", NULL) == -EIO);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-33", NULL) == -333);
+
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-88", NULL) == -EIO);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-99", NULL) == -EIO);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-77", NULL) == -EIO);
+
+ assert_se(sd_bus_error_add_map(test_errors3) > 0);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-88", NULL) == -888);
+ assert_se(sd_bus_error_add_map(test_errors4) > 0);
+ assert_se(sd_bus_error_add_map(test_errors4) == 0);
+ assert_se(sd_bus_error_add_map(test_errors3) == 0);
+
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-99", NULL) == -999);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-77", NULL) == -777);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-78", NULL) == -778);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-2", NULL) == -52);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-y", NULL) == -EIO);
+
+ assert_se(sd_bus_error_set(NULL, BUS_ERROR_NO_SUCH_UNIT, NULL) == -ENOENT);
+
+ assert_se(sd_bus_error_add_map(test_errors_bad1) == -EINVAL);
+ assert_se(sd_bus_error_add_map(test_errors_bad2) == -EINVAL);
+}
+
+static void test_sd_bus_error_set_errnof(void) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *str = NULL;
+
+ assert_se(sd_bus_error_set_errnof(NULL, 0, NULL) == 0);
+ assert_se(sd_bus_error_set_errnof(NULL, ENOANO, NULL) == -ENOANO);
+
+ assert_se(sd_bus_error_set_errnof(&error, 0, NULL) == 0);
+ assert_se(!bus_error_is_dirty(&error));
+
+ assert_se(sd_bus_error_set_errnof(&error, EACCES, NULL) == -EACCES);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_ACCESS_DENIED));
+ errno = EACCES;
+ assert_se(asprintf(&str, "%m") >= 0);
+ assert_se(streq(error.message, str));
+ assert_se(error._need_free == 0);
+
+ str = mfree(str);
+ sd_bus_error_free(&error);
+
+ assert_se(sd_bus_error_set_errnof(&error, ENOANO, NULL) == -ENOANO);
+ assert_se(sd_bus_error_has_name(&error, "System.Error.ENOANO"));
+ errno = ENOANO;
+ assert_se(asprintf(&str, "%m") >= 0);
+ assert_se(streq(error.message, str));
+ assert_se(error._need_free == 1);
+
+ str = mfree(str);
+ sd_bus_error_free(&error);
+
+ assert_se(sd_bus_error_set_errnof(&error, 100000, NULL) == -100000);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_FAILED));
+ errno = 100000;
+ assert_se(asprintf(&str, "%m") >= 0);
+ assert_se(streq(error.message, str));
+ assert_se(error._need_free == 1);
+
+ str = mfree(str);
+ sd_bus_error_free(&error);
+
+ assert_se(sd_bus_error_set_errnof(NULL, 0, "hoge %s: %m", "foo") == 0);
+ assert_se(sd_bus_error_set_errnof(NULL, ENOANO, "hoge %s: %m", "foo") == -ENOANO);
+
+ assert_se(sd_bus_error_set_errnof(&error, 0, "hoge %s: %m", "foo") == 0);
+ assert_se(!bus_error_is_dirty(&error));
+
+ assert_se(sd_bus_error_set_errnof(&error, EACCES, "hoge %s: %m", "foo") == -EACCES);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_ACCESS_DENIED));
+ errno = EACCES;
+ assert_se(asprintf(&str, "hoge %s: %m", "foo") >= 0);
+ assert_se(streq(error.message, str));
+ assert_se(error._need_free == 1);
+
+ str = mfree(str);
+ sd_bus_error_free(&error);
+
+ assert_se(sd_bus_error_set_errnof(&error, ENOANO, "hoge %s: %m", "foo") == -ENOANO);
+ assert_se(sd_bus_error_has_name(&error, "System.Error.ENOANO"));
+ errno = ENOANO;
+ assert_se(asprintf(&str, "hoge %s: %m", "foo") >= 0);
+ assert_se(streq(error.message, str));
+ assert_se(error._need_free == 1);
+
+ str = mfree(str);
+ sd_bus_error_free(&error);
+
+ assert_se(sd_bus_error_set_errnof(&error, 100000, "hoge %s: %m", "foo") == -100000);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_FAILED));
+ errno = 100000;
+ assert_se(asprintf(&str, "hoge %s: %m", "foo") >= 0);
+ assert_se(streq(error.message, str));
+ assert_se(error._need_free == 1);
+}
+
+int main(int argc, char *argv[]) {
+ dump_mapping_table();
+
+ test_error();
+ test_errno_mapping_standard();
+ test_errno_mapping_custom();
+ test_sd_bus_error_set_errnof();
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-gvariant.c b/src/libsystemd/sd-bus/test-bus-gvariant.c
new file mode 100644
index 0000000..b0033f1
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-gvariant.c
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_GLIB
+#include <glib.h>
+#endif
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-dump.h"
+#include "bus-gvariant.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "macro.h"
+#include "tests.h"
+#include "util.h"
+
+static void test_bus_gvariant_is_fixed_size(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(bus_gvariant_is_fixed_size("") > 0);
+ assert_se(bus_gvariant_is_fixed_size("()") == -EINVAL);
+ assert_se(bus_gvariant_is_fixed_size("y") > 0);
+ assert_se(bus_gvariant_is_fixed_size("u") > 0);
+ assert_se(bus_gvariant_is_fixed_size("b") > 0);
+ assert_se(bus_gvariant_is_fixed_size("n") > 0);
+ assert_se(bus_gvariant_is_fixed_size("q") > 0);
+ assert_se(bus_gvariant_is_fixed_size("i") > 0);
+ assert_se(bus_gvariant_is_fixed_size("t") > 0);
+ assert_se(bus_gvariant_is_fixed_size("d") > 0);
+ assert_se(bus_gvariant_is_fixed_size("s") == 0);
+ assert_se(bus_gvariant_is_fixed_size("o") == 0);
+ assert_se(bus_gvariant_is_fixed_size("g") == 0);
+ assert_se(bus_gvariant_is_fixed_size("h") > 0);
+ assert_se(bus_gvariant_is_fixed_size("ay") == 0);
+ assert_se(bus_gvariant_is_fixed_size("v") == 0);
+ assert_se(bus_gvariant_is_fixed_size("(u)") > 0);
+ assert_se(bus_gvariant_is_fixed_size("(uuuuy)") > 0);
+ assert_se(bus_gvariant_is_fixed_size("(uusuuy)") == 0);
+ assert_se(bus_gvariant_is_fixed_size("a{ss}") == 0);
+ assert_se(bus_gvariant_is_fixed_size("((u)yyy(b(iiii)))") > 0);
+ assert_se(bus_gvariant_is_fixed_size("((u)yyy(b(iiivi)))") == 0);
+}
+
+static void test_bus_gvariant_get_size(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(bus_gvariant_get_size("") == 0);
+ assert_se(bus_gvariant_get_size("()") == -EINVAL);
+ assert_se(bus_gvariant_get_size("y") == 1);
+ assert_se(bus_gvariant_get_size("u") == 4);
+ assert_se(bus_gvariant_get_size("b") == 1);
+ assert_se(bus_gvariant_get_size("n") == 2);
+ assert_se(bus_gvariant_get_size("q") == 2);
+ assert_se(bus_gvariant_get_size("i") == 4);
+ assert_se(bus_gvariant_get_size("t") == 8);
+ assert_se(bus_gvariant_get_size("d") == 8);
+ assert_se(bus_gvariant_get_size("s") < 0);
+ assert_se(bus_gvariant_get_size("o") < 0);
+ assert_se(bus_gvariant_get_size("g") < 0);
+ assert_se(bus_gvariant_get_size("h") == 4);
+ assert_se(bus_gvariant_get_size("ay") < 0);
+ assert_se(bus_gvariant_get_size("v") < 0);
+ assert_se(bus_gvariant_get_size("(u)") == 4);
+ assert_se(bus_gvariant_get_size("(uuuuy)") == 20);
+ assert_se(bus_gvariant_get_size("(uusuuy)") < 0);
+ assert_se(bus_gvariant_get_size("a{ss}") < 0);
+ assert_se(bus_gvariant_get_size("((u)yyy(b(iiii)))") == 28);
+ assert_se(bus_gvariant_get_size("((u)yyy(b(iiivi)))") < 0);
+ assert_se(bus_gvariant_get_size("((b)(t))") == 16);
+ assert_se(bus_gvariant_get_size("((b)(b)(t))") == 16);
+ assert_se(bus_gvariant_get_size("(bt)") == 16);
+ assert_se(bus_gvariant_get_size("((t)(b))") == 16);
+ assert_se(bus_gvariant_get_size("(tb)") == 16);
+ assert_se(bus_gvariant_get_size("((b)(b))") == 2);
+ assert_se(bus_gvariant_get_size("((t)(t))") == 16);
+}
+
+static void test_bus_gvariant_get_alignment(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(bus_gvariant_get_alignment("") == 1);
+ assert_se(bus_gvariant_get_alignment("()") == -EINVAL);
+ assert_se(bus_gvariant_get_alignment("y") == 1);
+ assert_se(bus_gvariant_get_alignment("b") == 1);
+ assert_se(bus_gvariant_get_alignment("u") == 4);
+ assert_se(bus_gvariant_get_alignment("s") == 1);
+ assert_se(bus_gvariant_get_alignment("o") == 1);
+ assert_se(bus_gvariant_get_alignment("g") == 1);
+ assert_se(bus_gvariant_get_alignment("v") == 8);
+ assert_se(bus_gvariant_get_alignment("h") == 4);
+ assert_se(bus_gvariant_get_alignment("i") == 4);
+ assert_se(bus_gvariant_get_alignment("t") == 8);
+ assert_se(bus_gvariant_get_alignment("x") == 8);
+ assert_se(bus_gvariant_get_alignment("q") == 2);
+ assert_se(bus_gvariant_get_alignment("n") == 2);
+ assert_se(bus_gvariant_get_alignment("d") == 8);
+ assert_se(bus_gvariant_get_alignment("ay") == 1);
+ assert_se(bus_gvariant_get_alignment("as") == 1);
+ assert_se(bus_gvariant_get_alignment("au") == 4);
+ assert_se(bus_gvariant_get_alignment("an") == 2);
+ assert_se(bus_gvariant_get_alignment("ans") == 2);
+ assert_se(bus_gvariant_get_alignment("ant") == 8);
+ assert_se(bus_gvariant_get_alignment("(ss)") == 1);
+ assert_se(bus_gvariant_get_alignment("(ssu)") == 4);
+ assert_se(bus_gvariant_get_alignment("a(ssu)") == 4);
+ assert_se(bus_gvariant_get_alignment("(u)") == 4);
+ assert_se(bus_gvariant_get_alignment("(uuuuy)") == 4);
+ assert_se(bus_gvariant_get_alignment("(uusuuy)") == 4);
+ assert_se(bus_gvariant_get_alignment("a{ss}") == 1);
+ assert_se(bus_gvariant_get_alignment("((u)yyy(b(iiii)))") == 4);
+ assert_se(bus_gvariant_get_alignment("((u)yyy(b(iiivi)))") == 8);
+ assert_se(bus_gvariant_get_alignment("((b)(t))") == 8);
+ assert_se(bus_gvariant_get_alignment("((b)(b)(t))") == 8);
+ assert_se(bus_gvariant_get_alignment("(bt)") == 8);
+ assert_se(bus_gvariant_get_alignment("((t)(b))") == 8);
+ assert_se(bus_gvariant_get_alignment("(tb)") == 8);
+ assert_se(bus_gvariant_get_alignment("((b)(b))") == 1);
+ assert_se(bus_gvariant_get_alignment("((t)(t))") == 8);
+}
+
+static int test_marshal(void) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *n = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ void *blob = NULL;
+ size_t sz;
+ int r;
+
+ r = sd_bus_open_user(&bus);
+ if (r < 0)
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_tests_skipped_errno(r, "Failed to connect to bus");
+
+ bus->message_version = 2; /* dirty hack to enable gvariant */
+
+ r = sd_bus_message_new_method_call(bus, &m, "a.service.name",
+ "/an/object/path/which/is/really/really/long/so/that/we/hit/the/eight/bit/boundary/by/quite/some/margin/to/test/this/stuff/that/it/really/works",
+ "an.interface.name", "AMethodName");
+ assert_se(r >= 0);
+
+ assert_cc(sizeof(struct bus_header) == 16);
+
+ assert_se(sd_bus_message_append(m,
+ "a(usv)", 3,
+ 4711, "first-string-parameter", "(st)", "X", (uint64_t) 1111,
+ 4712, "second-string-parameter", "(a(si))", 2, "Y", 5, "Z", 6,
+ 4713, "third-string-parameter", "(uu)", 1, 2) >= 0);
+
+ assert_se(sd_bus_message_seal(m, 4711, 0) >= 0);
+
+#if HAVE_GLIB
+ {
+ GVariant *v;
+ char *t;
+
+#if !defined(GLIB_VERSION_2_36)
+ g_type_init();
+#endif
+
+ v = g_variant_new_from_data(G_VARIANT_TYPE("(yyyyuta{tv})"), m->header, sizeof(struct bus_header) + m->fields_size, false, NULL, NULL);
+ assert_se(g_variant_is_normal_form(v));
+ t = g_variant_print(v, TRUE);
+ printf("%s\n", t);
+ g_free(t);
+ g_variant_unref(v);
+
+ v = g_variant_new_from_data(G_VARIANT_TYPE("(a(usv))"), m->body.data, m->user_body_size, false, NULL, NULL);
+ assert_se(g_variant_is_normal_form(v));
+ t = g_variant_print(v, TRUE);
+ printf("%s\n", t);
+ g_free(t);
+ g_variant_unref(v);
+ }
+#endif
+
+ assert_se(sd_bus_message_dump(m, NULL, SD_BUS_MESSAGE_DUMP_WITH_HEADER) >= 0);
+
+ assert_se(bus_message_get_blob(m, &blob, &sz) >= 0);
+
+#if HAVE_GLIB
+ {
+ GVariant *v;
+ char *t;
+
+ v = g_variant_new_from_data(G_VARIANT_TYPE("(yyyyuta{tv}v)"), blob, sz, false, NULL, NULL);
+ assert_se(g_variant_is_normal_form(v));
+ t = g_variant_print(v, TRUE);
+ printf("%s\n", t);
+ g_free(t);
+ g_variant_unref(v);
+ }
+#endif
+
+ assert_se(bus_message_from_malloc(bus, blob, sz, NULL, 0, NULL, &n) >= 0);
+ blob = NULL;
+
+ assert_se(sd_bus_message_dump(n, NULL, SD_BUS_MESSAGE_DUMP_WITH_HEADER) >= 0);
+
+ m = sd_bus_message_unref(m);
+
+ assert_se(sd_bus_message_new_method_call(bus, &m, "a.x", "/a/x", "a.x", "Ax") >= 0);
+
+ assert_se(sd_bus_message_append(m, "as", 0) >= 0);
+
+ assert_se(sd_bus_message_seal(m, 4712, 0) >= 0);
+ assert_se(sd_bus_message_dump(m, NULL, SD_BUS_MESSAGE_DUMP_WITH_HEADER) >= 0);
+
+ return EXIT_SUCCESS;
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_bus_gvariant_is_fixed_size();
+ test_bus_gvariant_get_size();
+ test_bus_gvariant_get_alignment();
+
+ return test_marshal();
+}
diff --git a/src/libsystemd/sd-bus/test-bus-introspect.c b/src/libsystemd/sd-bus/test-bus-introspect.c
new file mode 100644
index 0000000..fb247c4
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-introspect.c
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-introspect.h"
+#include "log.h"
+#include "tests.h"
+
+#include "test-vtable-data.h"
+
+static void test_manual_introspection(const sd_bus_vtable vtable[]) {
+ struct introspect intro = {};
+ _cleanup_free_ char *s = NULL;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(introspect_begin(&intro, false) >= 0);
+
+ assert_se(introspect_write_interface(&intro, "org.foo", vtable) >= 0);
+ /* write again to check if output looks OK for a different interface */
+ assert_se(introspect_write_interface(&intro, "org.foo.bar", vtable) >= 0);
+ assert_se(introspect_finish(&intro, &s) == 0);
+
+ fputs(s, stdout);
+ fputs("\n", stdout);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_manual_introspection(test_vtable_1);
+ test_manual_introspection(test_vtable_2);
+ test_manual_introspection(test_vtable_deprecated);
+ test_manual_introspection((const sd_bus_vtable *) vtable_format_221);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-marshal.c b/src/libsystemd/sd-bus/test-bus-marshal.c
new file mode 100644
index 0000000..9feeaf4
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-marshal.c
@@ -0,0 +1,422 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <math.h>
+#include <stdlib.h>
+
+#if HAVE_GLIB
+#include <gio/gio.h>
+#endif
+
+#if HAVE_DBUS
+#include <dbus/dbus.h>
+#endif
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-dump.h"
+#include "bus-label.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "tests.h"
+#include "util.h"
+
+static void test_bus_path_encode_unique(void) {
+ _cleanup_free_ char *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL;
+
+ assert_se(bus_path_encode_unique(NULL, "/foo/bar", "some.sender", "a.suffix", &a) >= 0 && streq_ptr(a, "/foo/bar/some_2esender/a_2esuffix"));
+ assert_se(bus_path_decode_unique(a, "/foo/bar", &b, &c) > 0 && streq_ptr(b, "some.sender") && streq_ptr(c, "a.suffix"));
+ assert_se(bus_path_decode_unique(a, "/bar/foo", &d, &d) == 0 && !d);
+ assert_se(bus_path_decode_unique("/foo/bar/onlyOneSuffix", "/foo/bar", &d, &d) == 0 && !d);
+ assert_se(bus_path_decode_unique("/foo/bar/_/_", "/foo/bar", &d, &e) > 0 && streq_ptr(d, "") && streq_ptr(e, ""));
+}
+
+static void test_bus_path_encode(void) {
+ _cleanup_free_ char *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL, *f = NULL;
+
+ assert_se(sd_bus_path_encode("/foo/bar", "waldo", &a) >= 0 && streq(a, "/foo/bar/waldo"));
+ assert_se(sd_bus_path_decode(a, "/waldo", &b) == 0 && b == NULL);
+ assert_se(sd_bus_path_decode(a, "/foo/bar", &b) > 0 && streq(b, "waldo"));
+
+ assert_se(sd_bus_path_encode("xxxx", "waldo", &c) < 0);
+ assert_se(sd_bus_path_encode("/foo/", "waldo", &c) < 0);
+
+ assert_se(sd_bus_path_encode("/foo/bar", "", &c) >= 0 && streq(c, "/foo/bar/_"));
+ assert_se(sd_bus_path_decode(c, "/foo/bar", &d) > 0 && streq(d, ""));
+
+ assert_se(sd_bus_path_encode("/foo/bar", "foo.bar", &e) >= 0 && streq(e, "/foo/bar/foo_2ebar"));
+ assert_se(sd_bus_path_decode(e, "/foo/bar", &f) > 0 && streq(f, "foo.bar"));
+}
+
+static void test_bus_path_encode_many(void) {
+ _cleanup_free_ char *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL, *f = NULL;
+
+ assert_se(sd_bus_path_decode_many("/foo/bar", "/prefix/%", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/prefix/bar", "/prefix/%bar", NULL) == 1);
+ assert_se(sd_bus_path_decode_many("/foo/bar", "/prefix/%/suffix", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/prefix/foobar/suffix", "/prefix/%/suffix", &a) == 1 && streq_ptr(a, "foobar"));
+ assert_se(sd_bus_path_decode_many("/prefix/one_foo_two/mid/three_bar_four/suffix", "/prefix/one_%_two/mid/three_%_four/suffix", &b, &c) == 1 && streq_ptr(b, "foo") && streq_ptr(c, "bar"));
+ assert_se(sd_bus_path_decode_many("/prefix/one_foo_two/mid/three_bar_four/suffix", "/prefix/one_%_two/mid/three_%_four/suffix", NULL, &d) == 1 && streq_ptr(d, "bar"));
+
+ assert_se(sd_bus_path_decode_many("/foo/bar", "/foo/bar/%", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/bar%", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%/bar", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%bar", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/bar/suffix") == 1);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%%/suffix", NULL, NULL) == 0); /* multiple '%' are treated verbatim */
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%/suffi", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%/suffix", &e) == 1 && streq_ptr(e, "bar"));
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%/%", NULL, NULL) == 1);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%/%/%", NULL, NULL, NULL) == 1);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "%/%/%", NULL, NULL, NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%/%", NULL, NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%/%/", NULL, NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%/", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "%", NULL) == 0);
+
+ assert_se(sd_bus_path_encode_many(&f, "/prefix/one_%_two/mid/three_%_four/suffix", "foo", "bar") >= 0 && streq_ptr(f, "/prefix/one_foo_two/mid/three_bar_four/suffix"));
+}
+
+static void test_bus_label_escape_one(const char *a, const char *b) {
+ _cleanup_free_ char *t = NULL, *x = NULL, *y = NULL;
+
+ assert_se(t = bus_label_escape(a));
+ assert_se(streq(t, b));
+
+ assert_se(x = bus_label_unescape(t));
+ assert_se(streq(a, x));
+
+ assert_se(y = bus_label_unescape(b));
+ assert_se(streq(a, y));
+}
+
+static void test_bus_label_escape(void) {
+ test_bus_label_escape_one("foo123bar", "foo123bar");
+ test_bus_label_escape_one("foo.bar", "foo_2ebar");
+ test_bus_label_escape_one("foo_2ebar", "foo_5f2ebar");
+ test_bus_label_escape_one("", "_");
+ test_bus_label_escape_one("_", "_5f");
+ test_bus_label_escape_one("1", "_31");
+ test_bus_label_escape_one(":1", "_3a1");
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *copy = NULL;
+ int r, boolean;
+ const char *x, *x2, *y, *z, *a, *b, *c, *d, *a_signature;
+ uint8_t u, v;
+ void *buffer = NULL;
+ size_t sz;
+ _cleanup_free_ char *h = NULL;
+ const int32_t integer_array[] = { -1, -2, 0, 1, 2 }, *return_array;
+ char *s;
+ _cleanup_free_ char *first = NULL, *second = NULL, *third = NULL;
+ _cleanup_fclose_ FILE *ms = NULL;
+ size_t first_size = 0, second_size = 0, third_size = 0;
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ double dbl;
+ uint64_t u64;
+
+ test_setup_logging(LOG_INFO);
+
+ r = sd_bus_default_user(&bus);
+ if (r < 0)
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_tests_skipped("Failed to connect to bus");
+
+ r = sd_bus_message_new_method_call(bus, &m, "foobar.waldo", "/", "foobar.waldo", "Piep");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "s", "a string");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "s", NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "asg", 2, "string #1", "string #2", "sba(tt)ss");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "sass", "foobar", 5, "foo", "bar", "waldo", "piep", "pap", "after");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "a{yv}", 2, 3, "s", "foo", 5, "s", "waldo");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "y(ty)y(yt)y", 8, 777ULL, 7, 9, 77, 7777ULL, 10);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "()");
+ assert_se(r == -EINVAL);
+
+ r = sd_bus_message_append(m, "ba(ss)", 255, 3, "aaa", "1", "bbb", "2", "ccc", "3");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append_basic(m, 's', "foobar");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append_basic(m, 's', "waldo");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_close_container(m);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append_string_space(m, 5, &s);
+ assert_se(r >= 0);
+ strcpy(s, "hallo");
+
+ r = sd_bus_message_append_array(m, 'i', integer_array, sizeof(integer_array));
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append_array(m, 'u', NULL, 0);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "a(stdo)", 1, "foo", 815ULL, 47.0, "/");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_seal(m, 4711, 0);
+ assert_se(r >= 0);
+
+ sd_bus_message_dump(m, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ ms = open_memstream_unlocked(&first, &first_size);
+ sd_bus_message_dump(m, ms, 0);
+ fflush(ms);
+ assert_se(!ferror(ms));
+
+ r = bus_message_get_blob(m, &buffer, &sz);
+ assert_se(r >= 0);
+
+ h = cescape_length(buffer, sz);
+ assert_se(h);
+ log_info("message size = %zu, contents =\n%s", sz, h);
+
+#if HAVE_GLIB
+ /* Work-around for asan bug. See c8d980a3e962aba2ea3a4cedf75fa94890a6d746. */
+#if !HAS_FEATURE_ADDRESS_SANITIZER
+ {
+ GDBusMessage *g;
+ char *p;
+
+#if !defined(GLIB_VERSION_2_36)
+ g_type_init();
+#endif
+
+ g = g_dbus_message_new_from_blob(buffer, sz, 0, NULL);
+ p = g_dbus_message_print(g, 0);
+ log_info("%s", p);
+ g_free(p);
+ g_object_unref(g);
+ }
+#endif
+#endif
+
+#if HAVE_DBUS
+ {
+ DBusMessage *w;
+ DBusError error;
+
+ dbus_error_init(&error);
+
+ w = dbus_message_demarshal(buffer, sz, &error);
+ if (!w)
+ log_error("%s", error.message);
+ else
+ dbus_message_unref(w);
+
+ dbus_error_free(&error);
+ }
+#endif
+
+ m = sd_bus_message_unref(m);
+
+ r = bus_message_from_malloc(bus, buffer, sz, NULL, 0, NULL, &m);
+ assert_se(r >= 0);
+
+ sd_bus_message_dump(m, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ fclose(ms);
+ ms = open_memstream_unlocked(&second, &second_size);
+ sd_bus_message_dump(m, ms, 0);
+ fflush(ms);
+ assert_se(!ferror(ms));
+ assert_se(first_size == second_size);
+ assert_se(memcmp(first, second, first_size) == 0);
+
+ assert_se(sd_bus_message_rewind(m, true) >= 0);
+
+ r = sd_bus_message_read(m, "ssasg", &x, &x2, 2, &y, &z, &a_signature);
+ assert_se(r > 0);
+ assert_se(streq(x, "a string"));
+ assert_se(streq(x2, ""));
+ assert_se(streq(y, "string #1"));
+ assert_se(streq(z, "string #2"));
+ assert_se(streq(a_signature, "sba(tt)ss"));
+
+ r = sd_bus_message_read(m, "sass", &x, 5, &y, &z, &a, &b, &c, &d);
+ assert_se(r > 0);
+ assert_se(streq(x, "foobar"));
+ assert_se(streq(y, "foo"));
+ assert_se(streq(z, "bar"));
+ assert_se(streq(a, "waldo"));
+ assert_se(streq(b, "piep"));
+ assert_se(streq(c, "pap"));
+ assert_se(streq(d, "after"));
+
+ r = sd_bus_message_read(m, "a{yv}", 2, &u, "s", &x, &v, "s", &y);
+ assert_se(r > 0);
+ assert_se(u == 3);
+ assert_se(streq(x, "foo"));
+ assert_se(v == 5);
+ assert_se(streq(y, "waldo"));
+
+ r = sd_bus_message_read(m, "y(ty)", &v, &u64, &u);
+ assert_se(r > 0);
+ assert_se(v == 8);
+ assert_se(u64 == 777);
+ assert_se(u == 7);
+
+ r = sd_bus_message_read(m, "y(yt)", &v, &u, &u64);
+ assert_se(r > 0);
+ assert_se(v == 9);
+ assert_se(u == 77);
+ assert_se(u64 == 7777);
+
+ r = sd_bus_message_read(m, "y", &v);
+ assert_se(r > 0);
+ assert_se(v == 10);
+
+ r = sd_bus_message_read(m, "()");
+ assert_se(r < 0);
+
+ r = sd_bus_message_read(m, "ba(ss)", &boolean, 3, &x, &y, &a, &b, &c, &d);
+ assert_se(r > 0);
+ assert_se(boolean);
+ assert_se(streq(x, "aaa"));
+ assert_se(streq(y, "1"));
+ assert_se(streq(a, "bbb"));
+ assert_se(streq(b, "2"));
+ assert_se(streq(c, "ccc"));
+ assert_se(streq(d, "3"));
+
+ assert_se(sd_bus_message_verify_type(m, 'a', "s") > 0);
+
+ r = sd_bus_message_read(m, "as", 2, &x, &y);
+ assert_se(r > 0);
+ assert_se(streq(x, "foobar"));
+ assert_se(streq(y, "waldo"));
+
+ r = sd_bus_message_read_basic(m, 's', &s);
+ assert_se(r > 0);
+ assert_se(streq(s, "hallo"));
+
+ r = sd_bus_message_read_array(m, 'i', (const void**) &return_array, &sz);
+ assert_se(r > 0);
+ assert_se(sz == sizeof(integer_array));
+ assert_se(memcmp(integer_array, return_array, sz) == 0);
+
+ r = sd_bus_message_read_array(m, 'u', (const void**) &return_array, &sz);
+ assert_se(r > 0);
+ assert_se(sz == 0);
+
+ r = sd_bus_message_read(m, "a(stdo)", 1, &x, &u64, &dbl, &y);
+ assert_se(r > 0);
+ assert_se(streq(x, "foo"));
+ assert_se(u64 == 815ULL);
+ assert_se(fabs(dbl - 47.0) < 0.1);
+ assert_se(streq(y, "/"));
+
+ r = sd_bus_message_peek_type(m, NULL, NULL);
+ assert_se(r == 0);
+
+ r = sd_bus_message_new_method_call(bus, &copy, "foobar.waldo", "/", "foobar.waldo", "Piep");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_rewind(m, true);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_copy(copy, m, true);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_seal(copy, 4712, 0);
+ assert_se(r >= 0);
+
+ fclose(ms);
+ ms = open_memstream_unlocked(&third, &third_size);
+ sd_bus_message_dump(copy, ms, 0);
+ fflush(ms);
+ assert_se(!ferror(ms));
+
+ printf("<%.*s>\n", (int) first_size, first);
+ printf("<%.*s>\n", (int) third_size, third);
+
+ assert_se(first_size == third_size);
+ assert_se(memcmp(first, third, third_size) == 0);
+
+ r = sd_bus_message_rewind(m, true);
+ assert_se(r >= 0);
+
+ assert_se(sd_bus_message_verify_type(m, 's', NULL) > 0);
+
+ r = sd_bus_message_skip(m, "ssasg");
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_verify_type(m, 's', NULL) > 0);
+
+ r = sd_bus_message_skip(m, "sass");
+ assert_se(r >= 0);
+
+ assert_se(sd_bus_message_verify_type(m, 'a', "{yv}") > 0);
+
+ r = sd_bus_message_skip(m, "a{yv}y(ty)y(yt)y");
+ assert_se(r >= 0);
+
+ assert_se(sd_bus_message_verify_type(m, 'b', NULL) > 0);
+
+ r = sd_bus_message_read(m, "b", &boolean);
+ assert_se(r > 0);
+ assert_se(boolean);
+
+ r = sd_bus_message_enter_container(m, 0, NULL);
+ assert_se(r > 0);
+
+ r = sd_bus_message_read(m, "(ss)", &x, &y);
+ assert_se(r > 0);
+
+ r = sd_bus_message_read(m, "(ss)", &a, &b);
+ assert_se(r > 0);
+
+ r = sd_bus_message_read(m, "(ss)", &c, &d);
+ assert_se(r > 0);
+
+ r = sd_bus_message_read(m, "(ss)", &x, &y);
+ assert_se(r == 0);
+
+ r = sd_bus_message_exit_container(m);
+ assert_se(r >= 0);
+
+ assert_se(streq(x, "aaa"));
+ assert_se(streq(y, "1"));
+ assert_se(streq(a, "bbb"));
+ assert_se(streq(b, "2"));
+ assert_se(streq(c, "ccc"));
+ assert_se(streq(d, "3"));
+
+ test_bus_label_escape();
+ test_bus_path_encode();
+ test_bus_path_encode_unique();
+ test_bus_path_encode_many();
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-match.c b/src/libsystemd/sd-bus/test-bus-match.c
new file mode 100644
index 0000000..aa6ddd3
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-match.c
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-match.h"
+#include "bus-message.h"
+#include "bus-slot.h"
+#include "bus-util.h"
+#include "log.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "tests.h"
+
+static bool mask[32];
+
+static int filter(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ log_info("Ran %u", PTR_TO_UINT(userdata));
+ assert_se(PTR_TO_UINT(userdata) < ELEMENTSOF(mask));
+ mask[PTR_TO_UINT(userdata)] = true;
+ return 0;
+}
+
+static bool mask_contains(unsigned a[], unsigned n) {
+ unsigned i, j;
+
+ for (i = 0; i < ELEMENTSOF(mask); i++) {
+ bool found = false;
+
+ for (j = 0; j < n; j++)
+ if (a[j] == i) {
+ found = true;
+ break;
+ }
+
+ if (found != mask[i])
+ return false;
+ }
+
+ return true;
+}
+
+static int match_add(sd_bus_slot *slots, struct bus_match_node *root, const char *match, int value) {
+ struct bus_match_component *components = NULL;
+ unsigned n_components = 0;
+ sd_bus_slot *s;
+ int r;
+
+ s = slots + value;
+ zero(*s);
+
+ r = bus_match_parse(match, &components, &n_components);
+ if (r < 0)
+ return r;
+
+ s->userdata = INT_TO_PTR(value);
+ s->match_callback.callback = filter;
+
+ r = bus_match_add(root, components, n_components, &s->match_callback);
+ bus_match_parse_free(components, n_components);
+
+ return r;
+}
+
+static void test_match_scope(const char *match, enum bus_match_scope scope) {
+ struct bus_match_component *components = NULL;
+ unsigned n_components = 0;
+
+ assert_se(bus_match_parse(match, &components, &n_components) >= 0);
+ assert_se(bus_match_get_scope(components, n_components) == scope);
+ bus_match_parse_free(components, n_components);
+}
+
+int main(int argc, char *argv[]) {
+ struct bus_match_node root = {
+ .type = BUS_MATCH_ROOT,
+ };
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ enum bus_match_node_type i;
+ sd_bus_slot slots[19];
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ r = sd_bus_open_user(&bus);
+ if (r < 0)
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_tests_skipped("Failed to connect to bus");
+
+ assert_se(match_add(slots, &root, "arg2='wal\\'do',sender='foo',type='signal',interface='bar.x',", 1) >= 0);
+ assert_se(match_add(slots, &root, "arg2='wal\\'do2',sender='foo',type='signal',interface='bar.x',", 2) >= 0);
+ assert_se(match_add(slots, &root, "arg3='test',sender='foo',type='signal',interface='bar.x',", 3) >= 0);
+ assert_se(match_add(slots, &root, "arg3='test',sender='foo',type='method_call',interface='bar.x',", 4) >= 0);
+ assert_se(match_add(slots, &root, "", 5) >= 0);
+ assert_se(match_add(slots, &root, "interface='quux.x'", 6) >= 0);
+ assert_se(match_add(slots, &root, "interface='bar.x'", 7) >= 0);
+ assert_se(match_add(slots, &root, "member='waldo',path='/foo/bar'", 8) >= 0);
+ assert_se(match_add(slots, &root, "path='/foo/bar'", 9) >= 0);
+ assert_se(match_add(slots, &root, "path_namespace='/foo'", 10) >= 0);
+ assert_se(match_add(slots, &root, "path_namespace='/foo/quux'", 11) >= 0);
+ assert_se(match_add(slots, &root, "arg1='two'", 12) >= 0);
+ assert_se(match_add(slots, &root, "member='waldo',arg2path='/prefix/'", 13) >= 0);
+ assert_se(match_add(slots, &root, "member=waldo,path='/foo/bar',arg3namespace='prefix'", 14) >= 0);
+ assert_se(match_add(slots, &root, "arg4has='pi'", 15) >= 0);
+ assert_se(match_add(slots, &root, "arg4has='pa'", 16) >= 0);
+ assert_se(match_add(slots, &root, "arg4has='po'", 17) >= 0);
+ assert_se(match_add(slots, &root, "arg4='pi'", 18) >= 0);
+
+ bus_match_dump(&root, 0);
+
+ assert_se(sd_bus_message_new_signal(bus, &m, "/foo/bar", "bar.x", "waldo") >= 0);
+ assert_se(sd_bus_message_append(m, "ssssas", "one", "two", "/prefix/three", "prefix.four", 3, "pi", "pa", "po") >= 0);
+ assert_se(sd_bus_message_seal(m, 1, 0) >= 0);
+
+ zero(mask);
+ assert_se(bus_match_run(NULL, &root, m) == 0);
+ assert_se(mask_contains((unsigned[]) { 9, 8, 7, 5, 10, 12, 13, 14, 15, 16, 17 }, 11));
+
+ assert_se(bus_match_remove(&root, &slots[8].match_callback) >= 0);
+ assert_se(bus_match_remove(&root, &slots[13].match_callback) >= 0);
+
+ bus_match_dump(&root, 0);
+
+ zero(mask);
+ assert_se(bus_match_run(NULL, &root, m) == 0);
+ assert_se(mask_contains((unsigned[]) { 9, 5, 10, 12, 14, 7, 15, 16, 17 }, 9));
+
+ for (i = 0; i < _BUS_MATCH_NODE_TYPE_MAX; i++) {
+ char buf[32];
+ const char *x;
+
+ assert_se(x = bus_match_node_type_to_string(i, buf, sizeof(buf)));
+
+ if (i >= BUS_MATCH_MESSAGE_TYPE)
+ assert_se(bus_match_node_type_from_string(x, strlen(x)) == i);
+ }
+
+ bus_match_free(&root);
+
+ test_match_scope("interface='foobar'", BUS_MATCH_GENERIC);
+ test_match_scope("", BUS_MATCH_GENERIC);
+ test_match_scope("interface='org.freedesktop.DBus.Local'", BUS_MATCH_LOCAL);
+ test_match_scope("sender='org.freedesktop.DBus.Local'", BUS_MATCH_LOCAL);
+ test_match_scope("member='gurke',path='/org/freedesktop/DBus/Local'", BUS_MATCH_LOCAL);
+ test_match_scope("arg2='piep',sender='org.freedesktop.DBus',member='waldo'", BUS_MATCH_DRIVER);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-objects.c b/src/libsystemd/sd-bus/test-bus-objects.c
new file mode 100644
index 0000000..9116211
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-objects.c
@@ -0,0 +1,536 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <pthread.h>
+#include <stdlib.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-dump.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "log.h"
+#include "macro.h"
+#include "strv.h"
+#include "util.h"
+
+struct context {
+ int fds[2];
+ bool quit;
+ char *something;
+ char *automatic_string_property;
+ uint32_t automatic_integer_property;
+};
+
+static int something_handler(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ struct context *c = userdata;
+ const char *s;
+ char *n = NULL;
+ int r;
+
+ r = sd_bus_message_read(m, "s", &s);
+ assert_se(r > 0);
+
+ n = strjoin("<<<", s, ">>>");
+ assert_se(n);
+
+ free(c->something);
+ c->something = n;
+
+ log_info("AlterSomething() called, got %s, returning %s", s, n);
+
+ /* This should fail, since the return type doesn't match */
+ assert_se(sd_bus_reply_method_return(m, "u", 4711) == -ENOMSG);
+
+ r = sd_bus_reply_method_return(m, "s", n);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int exit_handler(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ struct context *c = userdata;
+ int r;
+
+ c->quit = true;
+
+ log_info("Exit called");
+
+ r = sd_bus_reply_method_return(m, "");
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int get_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ struct context *c = userdata;
+ int r;
+
+ log_info("property get for %s called, returning \"%s\".", property, c->something);
+
+ r = sd_bus_message_append(reply, "s", c->something);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int set_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error) {
+ struct context *c = userdata;
+ const char *s;
+ char *n;
+ int r;
+
+ log_info("property set for %s called", property);
+
+ r = sd_bus_message_read(value, "s", &s);
+ assert_se(r >= 0);
+
+ n = strdup(s);
+ assert_se(n);
+
+ free(c->something);
+ c->something = n;
+
+ return 1;
+}
+
+static int value_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *s = NULL;
+ const char *x;
+ int r;
+
+ assert_se(asprintf(&s, "object %p, path %s", userdata, path) >= 0);
+ r = sd_bus_message_append(reply, "s", s);
+ assert_se(r >= 0);
+
+ assert_se(x = startswith(path, "/value/"));
+
+ assert_se(PTR_TO_UINT(userdata) == 30);
+
+ return 1;
+}
+
+static int notify_test(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int r;
+
+ assert_se(sd_bus_emit_properties_changed(sd_bus_message_get_bus(m), m->path, "org.freedesktop.systemd.ValueTest", "Value", NULL) >= 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int notify_test2(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int r;
+
+ assert_se(sd_bus_emit_properties_changed_strv(sd_bus_message_get_bus(m), m->path, "org.freedesktop.systemd.ValueTest", NULL) >= 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int emit_interfaces_added(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int r;
+
+ assert_se(sd_bus_emit_interfaces_added(sd_bus_message_get_bus(m), "/value/a/x", "org.freedesktop.systemd.ValueTest", NULL) >= 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int emit_interfaces_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int r;
+
+ assert_se(sd_bus_emit_interfaces_removed(sd_bus_message_get_bus(m), "/value/a/x", "org.freedesktop.systemd.ValueTest", NULL) >= 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int emit_object_added(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int r;
+
+ assert_se(sd_bus_emit_object_added(sd_bus_message_get_bus(m), "/value/a/x") >= 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int emit_object_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int r;
+
+ assert_se(sd_bus_emit_object_removed(sd_bus_message_get_bus(m), "/value/a/x") >= 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static const sd_bus_vtable vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_METHOD("AlterSomething", "s", "s", something_handler, 0),
+ SD_BUS_METHOD("Exit", "", "", exit_handler, 0),
+ SD_BUS_WRITABLE_PROPERTY("Something", "s", get_handler, set_handler, 0, 0),
+ SD_BUS_WRITABLE_PROPERTY("AutomaticStringProperty", "s", NULL, NULL, offsetof(struct context, automatic_string_property), 0),
+ SD_BUS_WRITABLE_PROPERTY("AutomaticIntegerProperty", "u", NULL, NULL, offsetof(struct context, automatic_integer_property), 0),
+ SD_BUS_METHOD("NoOperation", NULL, NULL, NULL, 0),
+ SD_BUS_METHOD("EmitInterfacesAdded", NULL, NULL, emit_interfaces_added, 0),
+ SD_BUS_METHOD("EmitInterfacesRemoved", NULL, NULL, emit_interfaces_removed, 0),
+ SD_BUS_METHOD("EmitObjectAdded", NULL, NULL, emit_object_added, 0),
+ SD_BUS_METHOD("EmitObjectRemoved", NULL, NULL, emit_object_removed, 0),
+ SD_BUS_VTABLE_END
+};
+
+static const sd_bus_vtable vtable2[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_METHOD("NotifyTest", "", "", notify_test, 0),
+ SD_BUS_METHOD("NotifyTest2", "", "", notify_test2, 0),
+ SD_BUS_PROPERTY("Value", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Value2", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_PROPERTY("Value3", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Value4", "s", value_handler, 10, 0),
+ SD_BUS_PROPERTY("AnExplicitProperty", "s", NULL, offsetof(struct context, something), SD_BUS_VTABLE_PROPERTY_EXPLICIT|SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_VTABLE_END
+};
+
+static int enumerator_callback(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+
+ if (object_path_startswith("/value", path))
+ assert_se(*nodes = strv_new("/value/a", "/value/b", "/value/c"));
+
+ return 1;
+}
+
+static int enumerator2_callback(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+
+ if (object_path_startswith("/value/a", path))
+ assert_se(*nodes = strv_new("/value/a/x", "/value/a/y", "/value/a/z"));
+
+ return 1;
+}
+
+static void *server(void *p) {
+ struct context *c = p;
+ sd_bus *bus = NULL;
+ sd_id128_t id;
+ int r;
+
+ c->quit = false;
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_fd(bus, c->fds[0], c->fds[0]) >= 0);
+ assert_se(sd_bus_set_server(bus, 1, id) >= 0);
+
+ assert_se(sd_bus_add_object_vtable(bus, NULL, "/foo", "org.freedesktop.systemd.test", vtable, c) >= 0);
+ assert_se(sd_bus_add_object_vtable(bus, NULL, "/foo", "org.freedesktop.systemd.test2", vtable, c) >= 0);
+ assert_se(sd_bus_add_fallback_vtable(bus, NULL, "/value", "org.freedesktop.systemd.ValueTest", vtable2, NULL, UINT_TO_PTR(20)) >= 0);
+ assert_se(sd_bus_add_node_enumerator(bus, NULL, "/value", enumerator_callback, NULL) >= 0);
+ assert_se(sd_bus_add_node_enumerator(bus, NULL, "/value/a", enumerator2_callback, NULL) >= 0);
+ assert_se(sd_bus_add_object_manager(bus, NULL, "/value") >= 0);
+ assert_se(sd_bus_add_object_manager(bus, NULL, "/value/a") >= 0);
+
+ assert_se(sd_bus_start(bus) >= 0);
+
+ log_error("Entering event loop on server");
+
+ while (!c->quit) {
+ log_error("Loop!");
+
+ r = sd_bus_process(bus, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to process requests: %m");
+ goto fail;
+ }
+
+ if (r == 0) {
+ r = sd_bus_wait(bus, (uint64_t) -1);
+ if (r < 0) {
+ log_error_errno(r, "Failed to wait: %m");
+ goto fail;
+ }
+
+ continue;
+ }
+ }
+
+ r = 0;
+
+fail:
+ if (bus) {
+ sd_bus_flush(bus);
+ sd_bus_unref(bus);
+ }
+
+ return INT_TO_PTR(r);
+}
+
+static int client(struct context *c) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *s;
+ int r;
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_fd(bus, c->fds[1], c->fds[1]) >= 0);
+ assert_se(sd_bus_start(bus) >= 0);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "NoOperation", &error, NULL, NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "AlterSomething", &error, &reply, "s", "hallo");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ assert_se(streq(s, "<<<hallo>>>"));
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Doesntexist", &error, &reply, "");
+ assert_se(r < 0);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD));
+
+ sd_bus_error_free(&error);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "AlterSomething", &error, &reply, "as", 1, "hallo");
+ assert_se(r < 0);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_INVALID_ARGS));
+
+ sd_bus_error_free(&error);
+
+ r = sd_bus_get_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Something", &error, &reply, "s");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ assert_se(streq(s, "<<<hallo>>>"));
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_set_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Something", &error, "s", "test");
+ assert_se(r >= 0);
+
+ r = sd_bus_get_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Something", &error, &reply, "s");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ assert_se(streq(s, "test"));
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_set_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "AutomaticIntegerProperty", &error, "u", 815);
+ assert_se(r >= 0);
+
+ assert_se(c->automatic_integer_property == 815);
+
+ r = sd_bus_set_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "AutomaticStringProperty", &error, "s", "Du Dödel, Du!");
+ assert_se(r >= 0);
+
+ assert_se(streq(c->automatic_string_property, "Du Dödel, Du!"));
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ fputs(s, stdout);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_get_property(bus, "org.freedesktop.systemd.test", "/value/xuzz", "org.freedesktop.systemd.ValueTest", "Value", &error, &reply, "s");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ log_info("read %s", s);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ fputs(s, stdout);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ fputs(s, stdout);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value/a", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ fputs(s, stdout);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.DBus.Properties", "GetAll", &error, &reply, "s", "");
+ assert_se(r >= 0);
+
+ sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value/a", "org.freedesktop.DBus.Properties", "GetAll", &error, &reply, "s", "org.freedesktop.systemd.ValueTest2");
+ assert_se(r < 0);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_INTERFACE));
+ sd_bus_error_free(&error);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.DBus.ObjectManager", "GetManagedObjects", &error, &reply, "");
+ assert_se(r < 0);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD));
+ sd_bus_error_free(&error);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value", "org.freedesktop.DBus.ObjectManager", "GetManagedObjects", &error, &reply, "");
+ assert_se(r >= 0);
+
+ sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value/a", "org.freedesktop.systemd.ValueTest", "NotifyTest", &error, NULL, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_process(bus, &reply);
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.Properties", "PropertiesChanged"));
+ sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value/a", "org.freedesktop.systemd.ValueTest", "NotifyTest2", &error, NULL, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_process(bus, &reply);
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.Properties", "PropertiesChanged"));
+ sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "EmitInterfacesAdded", &error, NULL, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_process(bus, &reply);
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.ObjectManager", "InterfacesAdded"));
+ sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "EmitInterfacesRemoved", &error, NULL, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_process(bus, &reply);
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.ObjectManager", "InterfacesRemoved"));
+ sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "EmitObjectAdded", &error, NULL, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_process(bus, &reply);
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.ObjectManager", "InterfacesAdded"));
+ sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "EmitObjectRemoved", &error, NULL, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_process(bus, &reply);
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.ObjectManager", "InterfacesRemoved"));
+ sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ sd_bus_message_unref(reply);
+ reply = NULL;
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Exit", &error, NULL, "");
+ assert_se(r >= 0);
+
+ sd_bus_flush(bus);
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ struct context c = {};
+ pthread_t s;
+ void *p;
+ int r, q;
+
+ c.automatic_integer_property = 4711;
+ assert_se(c.automatic_string_property = strdup("dudeldu"));
+
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, c.fds) >= 0);
+
+ r = pthread_create(&s, NULL, server, &c);
+ if (r != 0)
+ return -r;
+
+ r = client(&c);
+
+ q = pthread_join(s, &p);
+ if (q != 0)
+ return -q;
+
+ if (r < 0)
+ return r;
+
+ if (PTR_TO_INT(p) < 0)
+ return PTR_TO_INT(p);
+
+ free(c.something);
+ free(c.automatic_string_property);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-queue-ref-cycle.c b/src/libsystemd/sd-bus/test-bus-queue-ref-cycle.c
new file mode 100644
index 0000000..7c2fa72
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-queue-ref-cycle.c
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#include "sd-bus.h"
+
+#include "main-func.h"
+#include "tests.h"
+
+static int test_ref_unref(void) {
+ sd_bus_message *m = NULL;
+ sd_bus *bus = NULL;
+ int r;
+
+ /* This test will result in a memory leak in <= v240, but not on v241. Hence to be really useful it
+ * should be run through a leak tracker such as valgrind. */
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_tests_skipped("Failed to connect to bus");
+
+ /* Create a message and enqueue it (this shouldn't send it though as the connection setup is not complete yet) */
+ assert_se(sd_bus_message_new_method_call(bus, &m, "foo.bar", "/foo", "quux.quux", "waldo") >= 0);
+ assert_se(sd_bus_send(bus, m, NULL) >= 0);
+
+ /* Let's now unref the message first and the bus second. */
+ m = sd_bus_message_unref(m);
+ bus = sd_bus_unref(bus);
+
+ /* We should have a memory leak now on <= v240. Let's do this again, but destroy in the opposite
+ * order. On v240 that too should be a leak. */
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_tests_skipped("Failed to connect to bus");
+
+ assert_se(sd_bus_message_new_method_call(bus, &m, "foo.bar", "/foo", "quux.quux", "waldo") >= 0);
+ assert_se(sd_bus_send(bus, m, NULL) >= 0);
+
+ /* Let's now unref things in the opposite order */
+ bus = sd_bus_unref(bus);
+ m = sd_bus_message_unref(m);
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ r = test_ref_unref();
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/libsystemd/sd-bus/test-bus-server.c b/src/libsystemd/sd-bus/test-bus-server.c
new file mode 100644
index 0000000..5cb100b
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-server.c
@@ -0,0 +1,198 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <pthread.h>
+#include <stdlib.h>
+
+#include "sd-bus.h"
+
+#include "bus-internal.h"
+#include "bus-util.h"
+#include "log.h"
+#include "macro.h"
+#include "memory-util.h"
+
+struct context {
+ int fds[2];
+
+ bool client_negotiate_unix_fds;
+ bool server_negotiate_unix_fds;
+
+ bool client_anonymous_auth;
+ bool server_anonymous_auth;
+};
+
+static void *server(void *p) {
+ struct context *c = p;
+ sd_bus *bus = NULL;
+ sd_id128_t id;
+ bool quit = false;
+ int r;
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_fd(bus, c->fds[0], c->fds[0]) >= 0);
+ assert_se(sd_bus_set_server(bus, 1, id) >= 0);
+ assert_se(sd_bus_set_anonymous(bus, c->server_anonymous_auth) >= 0);
+ assert_se(sd_bus_negotiate_fds(bus, c->server_negotiate_unix_fds) >= 0);
+ assert_se(sd_bus_start(bus) >= 0);
+
+ while (!quit) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+
+ r = sd_bus_process(bus, &m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to process requests: %m");
+ goto fail;
+ }
+
+ if (r == 0) {
+ r = sd_bus_wait(bus, (uint64_t) -1);
+ if (r < 0) {
+ log_error_errno(r, "Failed to wait: %m");
+ goto fail;
+ }
+
+ continue;
+ }
+
+ if (!m)
+ continue;
+
+ log_info("Got message! member=%s", strna(sd_bus_message_get_member(m)));
+
+ if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "Exit")) {
+
+ assert_se((sd_bus_can_send(bus, 'h') >= 1) ==
+ (c->server_negotiate_unix_fds && c->client_negotiate_unix_fds));
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate return: %m");
+ goto fail;
+ }
+
+ quit = true;
+
+ } else if (sd_bus_message_is_method_call(m, NULL, NULL)) {
+ r = sd_bus_message_new_method_error(
+ m,
+ &reply,
+ &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_UNKNOWN_METHOD, "Unknown method."));
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate return: %m");
+ goto fail;
+ }
+ }
+
+ if (reply) {
+ r = sd_bus_send(bus, reply, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to send reply: %m");
+ goto fail;
+ }
+ }
+ }
+
+ r = 0;
+
+fail:
+ if (bus) {
+ sd_bus_flush(bus);
+ sd_bus_unref(bus);
+ }
+
+ return INT_TO_PTR(r);
+}
+
+static int client(struct context *c) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_fd(bus, c->fds[1], c->fds[1]) >= 0);
+ assert_se(sd_bus_negotiate_fds(bus, c->client_negotiate_unix_fds) >= 0);
+ assert_se(sd_bus_set_anonymous(bus, c->client_anonymous_auth) >= 0);
+ assert_se(sd_bus_start(bus) >= 0);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "Exit");
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate method call: %m");
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to issue method call: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int test_one(bool client_negotiate_unix_fds, bool server_negotiate_unix_fds,
+ bool client_anonymous_auth, bool server_anonymous_auth) {
+
+ struct context c;
+ pthread_t s;
+ void *p;
+ int r, q;
+
+ zero(c);
+
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, c.fds) >= 0);
+
+ c.client_negotiate_unix_fds = client_negotiate_unix_fds;
+ c.server_negotiate_unix_fds = server_negotiate_unix_fds;
+ c.client_anonymous_auth = client_anonymous_auth;
+ c.server_anonymous_auth = server_anonymous_auth;
+
+ r = pthread_create(&s, NULL, server, &c);
+ if (r != 0)
+ return -r;
+
+ r = client(&c);
+
+ q = pthread_join(s, &p);
+ if (q != 0)
+ return -q;
+
+ if (r < 0)
+ return r;
+
+ if (PTR_TO_INT(p) < 0)
+ return PTR_TO_INT(p);
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ int r;
+
+ r = test_one(true, true, false, false);
+ assert_se(r >= 0);
+
+ r = test_one(true, false, false, false);
+ assert_se(r >= 0);
+
+ r = test_one(false, true, false, false);
+ assert_se(r >= 0);
+
+ r = test_one(false, false, false, false);
+ assert_se(r >= 0);
+
+ r = test_one(true, true, true, true);
+ assert_se(r >= 0);
+
+ r = test_one(true, true, false, true);
+ assert_se(r >= 0);
+
+ r = test_one(true, true, true, false);
+ assert_se(r == -EPERM);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-signature.c b/src/libsystemd/sd-bus/test-bus-signature.c
new file mode 100644
index 0000000..ea7efe6
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-signature.c
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-internal.h"
+#include "bus-signature.h"
+#include "log.h"
+#include "string-util.h"
+
+int main(int argc, char *argv[]) {
+ char prefix[256];
+ int r;
+
+ assert_se(signature_is_single("y", false));
+ assert_se(signature_is_single("u", false));
+ assert_se(signature_is_single("v", false));
+ assert_se(signature_is_single("as", false));
+ assert_se(signature_is_single("(ss)", false));
+ assert_se(!signature_is_single("()", false));
+ assert_se(!signature_is_single("(()()()()())", false));
+ assert_se(!signature_is_single("(((())))", false));
+ assert_se(signature_is_single("((((s))))", false));
+ assert_se(signature_is_single("{ss}", true));
+ assert_se(signature_is_single("a{ss}", false));
+ assert_se(!signature_is_single("uu", false));
+ assert_se(!signature_is_single("", false));
+ assert_se(!signature_is_single("(", false));
+ assert_se(!signature_is_single(")", false));
+ assert_se(!signature_is_single("())", false));
+ assert_se(!signature_is_single("((())", false));
+ assert_se(!signature_is_single("{)", false));
+ assert_se(!signature_is_single("{}", true));
+ assert_se(!signature_is_single("{sss}", true));
+ assert_se(!signature_is_single("{s}", true));
+ assert_se(!signature_is_single("{ss}", false));
+ assert_se(!signature_is_single("{ass}", true));
+ assert_se(!signature_is_single("a}", true));
+
+ assert_se(signature_is_pair("yy"));
+ assert_se(signature_is_pair("ss"));
+ assert_se(signature_is_pair("sas"));
+ assert_se(signature_is_pair("sv"));
+ assert_se(signature_is_pair("sa(vs)"));
+ assert_se(!signature_is_pair(""));
+ assert_se(!signature_is_pair("va"));
+ assert_se(!signature_is_pair("sss"));
+ assert_se(!signature_is_pair("{s}ss"));
+
+ assert_se(signature_is_valid("ssa{ss}sssub", true));
+ assert_se(signature_is_valid("ssa{ss}sssub", false));
+ assert_se(signature_is_valid("{ss}", true));
+ assert_se(!signature_is_valid("{ss}", false));
+ assert_se(signature_is_valid("", true));
+ assert_se(signature_is_valid("", false));
+
+ assert_se(signature_is_valid("sssusa(uuubbba(uu)uuuu)a{u(uuuvas)}", false));
+
+ assert_se(!signature_is_valid("a", false));
+ assert_se(signature_is_valid("as", false));
+ assert_se(signature_is_valid("aas", false));
+ assert_se(signature_is_valid("aaas", false));
+ assert_se(signature_is_valid("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaad", false));
+ assert_se(signature_is_valid("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaas", false));
+ assert_se(!signature_is_valid("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaau", false));
+
+ assert_se(signature_is_valid("((((((((((((((((((((((((((((((((s))))))))))))))))))))))))))))))))", false));
+ assert_se(!signature_is_valid("((((((((((((((((((((((((((((((((()))))))))))))))))))))))))))))))))", false));
+
+ assert_se(namespace_complex_pattern("", ""));
+ assert_se(namespace_complex_pattern("foobar", "foobar"));
+ assert_se(namespace_complex_pattern("foobar.waldo", "foobar.waldo"));
+ assert_se(namespace_complex_pattern("foobar.", "foobar.waldo"));
+ assert_se(namespace_complex_pattern("foobar.waldo", "foobar."));
+ assert_se(!namespace_complex_pattern("foobar.waldo", "foobar"));
+ assert_se(!namespace_complex_pattern("foobar", "foobar.waldo"));
+ assert_se(!namespace_complex_pattern("", "foo"));
+ assert_se(!namespace_complex_pattern("foo", ""));
+ assert_se(!namespace_complex_pattern("foo.", ""));
+
+ assert_se(path_complex_pattern("", ""));
+ assert_se(!path_complex_pattern("", "/"));
+ assert_se(!path_complex_pattern("/", ""));
+ assert_se(path_complex_pattern("/", "/"));
+ assert_se(path_complex_pattern("/foobar/", "/"));
+ assert_se(!path_complex_pattern("/foobar/", "/foobar"));
+ assert_se(path_complex_pattern("/foobar", "/foobar"));
+ assert_se(!path_complex_pattern("/foobar", "/foobar/"));
+ assert_se(!path_complex_pattern("/foobar", "/foobar/waldo"));
+ assert_se(path_complex_pattern("/foobar/", "/foobar/waldo"));
+ assert_se(path_complex_pattern("/foobar/waldo", "/foobar/"));
+
+ assert_se(path_simple_pattern("/foo/", "/foo/bar/waldo"));
+
+ assert_se(namespace_simple_pattern("", ""));
+ assert_se(namespace_simple_pattern("", ".foobar"));
+ assert_se(namespace_simple_pattern("foobar", "foobar"));
+ assert_se(namespace_simple_pattern("foobar.waldo", "foobar.waldo"));
+ assert_se(namespace_simple_pattern("foobar", "foobar.waldo"));
+ assert_se(!namespace_simple_pattern("foobar.waldo", "foobar"));
+ assert_se(!namespace_simple_pattern("", "foo"));
+ assert_se(!namespace_simple_pattern("foo", ""));
+ assert_se(namespace_simple_pattern("foo.", "foo.bar.waldo"));
+
+ assert_se(streq(object_path_startswith("/foo/bar", "/foo"), "bar"));
+ assert_se(streq(object_path_startswith("/foo", "/foo"), ""));
+ assert_se(streq(object_path_startswith("/foo", "/"), "foo"));
+ assert_se(streq(object_path_startswith("/", "/"), ""));
+ assert_se(!object_path_startswith("/foo", "/bar"));
+ assert_se(!object_path_startswith("/", "/bar"));
+ assert_se(!object_path_startswith("/foo", ""));
+
+ assert_se(object_path_is_valid("/foo/bar"));
+ assert_se(object_path_is_valid("/foo"));
+ assert_se(object_path_is_valid("/"));
+ assert_se(object_path_is_valid("/foo5"));
+ assert_se(object_path_is_valid("/foo_5"));
+ assert_se(!object_path_is_valid(""));
+ assert_se(!object_path_is_valid("/foo/"));
+ assert_se(!object_path_is_valid("//"));
+ assert_se(!object_path_is_valid("//foo"));
+ assert_se(!object_path_is_valid("/foo//bar"));
+ assert_se(!object_path_is_valid("/foo/aaaäöä"));
+
+ OBJECT_PATH_FOREACH_PREFIX(prefix, "/") {
+ log_info("<%s>", prefix);
+ assert_not_reached("???");
+ }
+
+ r = 0;
+ OBJECT_PATH_FOREACH_PREFIX(prefix, "/xxx") {
+ log_info("<%s>", prefix);
+ assert_se(streq(prefix, "/"));
+ assert_se(r == 0);
+ r++;
+ }
+ assert_se(r == 1);
+
+ r = 0;
+ OBJECT_PATH_FOREACH_PREFIX(prefix, "/xxx/yyy/zzz") {
+ log_info("<%s>", prefix);
+ assert_se(r != 0 || streq(prefix, "/xxx/yyy"));
+ assert_se(r != 1 || streq(prefix, "/xxx"));
+ assert_se(r != 2 || streq(prefix, "/"));
+ r++;
+ }
+ assert_se(r == 3);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-track.c b/src/libsystemd/sd-bus/test-bus-track.c
new file mode 100644
index 0000000..64aa88b
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-track.c
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/socket.h>
+
+#include "sd-bus.h"
+
+#include "macro.h"
+#include "tests.h"
+
+static bool track_cb_called_x = false;
+static bool track_cb_called_y = false;
+
+static int track_cb_x(sd_bus_track *t, void *userdata) {
+
+ log_error("TRACK CB X");
+
+ assert_se(!track_cb_called_x);
+ track_cb_called_x = true;
+
+ /* This means b's name disappeared. Let's now disconnect, to make sure the track handling on disconnect works
+ * as it should. */
+
+ assert_se(shutdown(sd_bus_get_fd(sd_bus_track_get_bus(t)), SHUT_RDWR) >= 0);
+ return 1;
+}
+
+static int track_cb_y(sd_bus_track *t, void *userdata) {
+ int r;
+
+ log_error("TRACK CB Y");
+
+ assert_se(!track_cb_called_y);
+ track_cb_called_y = true;
+
+ /* We got disconnected, let's close everything */
+
+ r = sd_event_exit(sd_bus_get_event(sd_bus_track_get_bus(t)), EXIT_SUCCESS);
+ assert_se(r >= 0);
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_(sd_bus_track_unrefp) sd_bus_track *x = NULL, *y = NULL;
+ _cleanup_(sd_bus_unrefp) sd_bus *a = NULL, *b = NULL;
+ bool use_system_bus = false;
+ const char *unique;
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ r = sd_event_default(&event);
+ assert_se(r >= 0);
+
+ r = sd_bus_open_user(&a);
+ if (IN_SET(r, -ECONNREFUSED, -ENOENT, -ENOMEDIUM)) {
+ r = sd_bus_open_system(&a);
+ if (IN_SET(r, -ECONNREFUSED, -ENOENT))
+ return log_tests_skipped("Failed to connect to bus");
+ use_system_bus = true;
+ }
+ assert_se(r >= 0);
+
+ r = sd_bus_attach_event(a, event, SD_EVENT_PRIORITY_NORMAL);
+ assert_se(r >= 0);
+
+ if (use_system_bus)
+ r = sd_bus_open_system(&b);
+ else
+ r = sd_bus_open_user(&b);
+ assert_se(r >= 0);
+
+ r = sd_bus_attach_event(b, event, SD_EVENT_PRIORITY_NORMAL);
+ assert_se(r >= 0);
+
+ /* Watch b's name from a */
+ r = sd_bus_track_new(a, &x, track_cb_x, NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_get_unique_name(b, &unique);
+ assert_se(r >= 0);
+
+ r = sd_bus_track_add_name(x, unique);
+ assert_se(r >= 0);
+
+ /* Watch's a's own name from a */
+ r = sd_bus_track_new(a, &y, track_cb_y, NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_get_unique_name(a, &unique);
+ assert_se(r >= 0);
+
+ r = sd_bus_track_add_name(y, unique);
+ assert_se(r >= 0);
+
+ /* Now make b's name disappear */
+ sd_bus_close(b);
+
+ r = sd_event_loop(event);
+ assert_se(r >= 0);
+
+ assert_se(track_cb_called_x);
+ assert_se(track_cb_called_y);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-vtable-cc.cc b/src/libsystemd/sd-bus/test-bus-vtable-cc.cc
new file mode 120000
index 0000000..abee398
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-vtable-cc.cc
@@ -0,0 +1 @@
+test-bus-vtable.c \ No newline at end of file
diff --git a/src/libsystemd/sd-bus/test-bus-vtable.c b/src/libsystemd/sd-bus/test-bus-vtable.c
new file mode 100644
index 0000000..fe12238
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-vtable.c
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdbool.h>
+#include <stddef.h>
+
+/* We use system assert.h here, because we don't want to keep macro.h and log.h C++ compatible */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+
+#include "sd-bus-vtable.h"
+
+#ifndef __cplusplus
+# include "bus-objects.h"
+#endif
+
+#include "test-vtable-data.h"
+
+#define DEFAULT_BUS_PATH "unix:path=/run/dbus/system_bus_socket"
+
+static struct context c = {};
+static int happy_finder_object = 0;
+
+static int happy_finder(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ assert(userdata);
+ assert(userdata == &c);
+
+#ifndef __cplusplus
+ log_info("%s called", __func__);
+#endif
+
+ happy_finder_object++;
+ *found = &happy_finder_object;
+ return 1; /* found */
+}
+
+static void test_vtable(void) {
+ sd_bus *bus = NULL;
+ int r;
+
+ assert(sd_bus_new(&bus) >= 0);
+
+ assert(sd_bus_add_object_vtable(bus, NULL, "/foo", "org.freedesktop.systemd.testVtable", test_vtable_2, &c) >= 0);
+ assert(sd_bus_add_object_vtable(bus, NULL, "/foo", "org.freedesktop.systemd.testVtable2", test_vtable_2, &c) >= 0);
+ /* the cast on the line below is needed to test with the old version of the table */
+ assert(sd_bus_add_object_vtable(bus, NULL, "/foo", "org.freedesktop.systemd.testVtable221",
+ (const sd_bus_vtable *)vtable_format_221, &c) >= 0);
+
+ assert(sd_bus_add_fallback_vtable(bus, NULL, "/fallback", "org.freedesktop.systemd.testVtable2", test_vtable_2, happy_finder, &c) >= 0);
+
+ assert(sd_bus_set_address(bus, DEFAULT_BUS_PATH) >= 0);
+ r = sd_bus_start(bus);
+ assert(r == 0 || /* success */
+ r == -ENOENT /* dbus is inactive */ );
+
+#ifndef __cplusplus
+ _cleanup_free_ char *s, *s2;
+
+ assert_se(introspect_path(bus, "/foo", NULL, false, true, NULL, &s, NULL) == 1);
+ fputs(s, stdout);
+
+ assert_se(introspect_path(bus, "/fallback", NULL, false, true, NULL, &s2, NULL) == 1);
+ fputs(s2, stdout);
+
+ assert_se(happy_finder_object == 1);
+#endif
+
+ sd_bus_unref(bus);
+}
+
+int main(int argc, char **argv) {
+ test_vtable();
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-watch-bind.c b/src/libsystemd/sd-bus/test-bus-watch-bind.c
new file mode 100644
index 0000000..fdc8772
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-watch-bind.c
@@ -0,0 +1,224 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <pthread.h>
+
+#include "sd-bus.h"
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "random-util.h"
+#include "rm-rf.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "tests.h"
+
+static int method_foobar(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ log_info("Got Foobar() call.");
+
+ assert_se(sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), 0) >= 0);
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static int method_exit(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ log_info("Got Exit() call");
+ assert_se(sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), 1) >= 0);
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static const sd_bus_vtable vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_METHOD("Foobar", NULL, NULL, method_foobar, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Exit", NULL, NULL, method_exit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_VTABLE_END,
+};
+
+static void* thread_server(void *p) {
+ _cleanup_free_ char *suffixed = NULL, *suffixed2 = NULL, *d = NULL;
+ _cleanup_close_ int fd = -1;
+ union sockaddr_union u;
+ const char *path = p;
+ int r;
+
+ log_debug("Initializing server");
+
+ /* Let's play some games, by slowly creating the socket directory, and renaming it in the middle */
+ (void) usleep(100 * USEC_PER_MSEC);
+
+ assert_se(mkdir_parents(path, 0755) >= 0);
+ (void) usleep(100 * USEC_PER_MSEC);
+
+ d = dirname_malloc(path);
+ assert_se(d);
+ assert_se(asprintf(&suffixed, "%s.%" PRIx64, d, random_u64()) >= 0);
+ assert_se(rename(d, suffixed) >= 0);
+ (void) usleep(100 * USEC_PER_MSEC);
+
+ assert_se(asprintf(&suffixed2, "%s.%" PRIx64, d, random_u64()) >= 0);
+ assert_se(symlink(suffixed2, d) >= 0);
+ (void) usleep(100 * USEC_PER_MSEC);
+
+ assert_se(symlink(basename(suffixed), suffixed2) >= 0);
+ (void) usleep(100 * USEC_PER_MSEC);
+
+ socklen_t sa_len;
+ r = sockaddr_un_set_path(&u.un, path);
+ assert_se(r >= 0);
+ sa_len = r;
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
+ assert_se(fd >= 0);
+
+ assert_se(bind(fd, &u.sa, sa_len) >= 0);
+ usleep(100 * USEC_PER_MSEC);
+
+ assert_se(listen(fd, SOMAXCONN) >= 0);
+ usleep(100 * USEC_PER_MSEC);
+
+ assert_se(touch(path) >= 0);
+ usleep(100 * USEC_PER_MSEC);
+
+ log_debug("Initialized server");
+
+ for (;;) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ sd_id128_t id;
+ int bus_fd, code;
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+
+ assert_se(sd_event_new(&event) >= 0);
+
+ bus_fd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ assert_se(bus_fd >= 0);
+
+ log_debug("Accepted server connection");
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_description(bus, "server") >= 0);
+ assert_se(sd_bus_set_fd(bus, bus_fd, bus_fd) >= 0);
+ assert_se(sd_bus_set_server(bus, true, id) >= 0);
+ /* assert_se(sd_bus_set_anonymous(bus, true) >= 0); */
+
+ assert_se(sd_bus_attach_event(bus, event, 0) >= 0);
+
+ assert_se(sd_bus_add_object_vtable(bus, NULL, "/foo", "foo.TestInterface", vtable, NULL) >= 0);
+
+ assert_se(sd_bus_start(bus) >= 0);
+
+ assert_se(sd_event_loop(event) >= 0);
+
+ assert_se(sd_event_get_exit_code(event, &code) >= 0);
+
+ if (code > 0)
+ break;
+ }
+
+ log_debug("Server done");
+
+ return NULL;
+}
+
+static void* thread_client1(void *p) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ const char *path = p, *t;
+ int r;
+
+ log_debug("Initializing client1");
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_description(bus, "client1") >= 0);
+
+ t = strjoina("unix:path=", path);
+ assert_se(sd_bus_set_address(bus, t) >= 0);
+ assert_se(sd_bus_set_watch_bind(bus, true) >= 0);
+ assert_se(sd_bus_start(bus) >= 0);
+
+ r = sd_bus_call_method(bus, "foo.bar", "/foo", "foo.TestInterface", "Foobar", &error, NULL, NULL);
+ assert_se(r >= 0);
+
+ log_debug("Client1 done");
+
+ return NULL;
+}
+
+static int client2_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ assert_se(sd_bus_message_is_method_error(m, NULL) == 0);
+ assert_se(sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), 0) >= 0);
+ return 0;
+}
+
+static void* thread_client2(void *p) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ const char *path = p, *t;
+
+ log_debug("Initializing client2");
+
+ assert_se(sd_event_new(&event) >= 0);
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_description(bus, "client2") >= 0);
+
+ t = strjoina("unix:path=", path);
+ assert_se(sd_bus_set_address(bus, t) >= 0);
+ assert_se(sd_bus_set_watch_bind(bus, true) >= 0);
+ assert_se(sd_bus_attach_event(bus, event, 0) >= 0);
+ assert_se(sd_bus_start(bus) >= 0);
+
+ assert_se(sd_bus_call_method_async(bus, NULL, "foo.bar", "/foo", "foo.TestInterface", "Foobar", client2_callback, NULL, NULL) >= 0);
+
+ assert_se(sd_event_loop(event) >= 0);
+
+ log_debug("Client2 done");
+
+ return NULL;
+}
+
+static void request_exit(const char *path) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ const char *t;
+
+ assert_se(sd_bus_new(&bus) >= 0);
+
+ t = strjoina("unix:path=", path);
+ assert_se(sd_bus_set_address(bus, t) >= 0);
+ assert_se(sd_bus_set_watch_bind(bus, true) >= 0);
+ assert_se(sd_bus_set_description(bus, "request-exit") >= 0);
+ assert_se(sd_bus_start(bus) >= 0);
+
+ assert_se(sd_bus_call_method(bus, "foo.bar", "/foo", "foo.TestInterface", "Exit", NULL, NULL, NULL) >= 0);
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(rm_rf_physical_and_freep) char *d = NULL;
+ pthread_t server, client1, client2;
+ char *path;
+
+ test_setup_logging(LOG_DEBUG);
+
+ /* We use /dev/shm here rather than /tmp, since some weird distros might set up /tmp as some weird fs that
+ * doesn't support inotify properly. */
+ assert_se(mkdtemp_malloc("/dev/shm/systemd-watch-bind-XXXXXX", &d) >= 0);
+
+ path = strjoina(d, "/this/is/a/socket");
+
+ assert_se(pthread_create(&server, NULL, thread_server, path) == 0);
+ assert_se(pthread_create(&client1, NULL, thread_client1, path) == 0);
+ assert_se(pthread_create(&client2, NULL, thread_client2, path) == 0);
+
+ assert_se(pthread_join(client1, NULL) == 0);
+ assert_se(pthread_join(client2, NULL) == 0);
+
+ request_exit(path);
+
+ assert_se(pthread_join(server, NULL) == 0);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-vtable-data.h b/src/libsystemd/sd-bus/test-vtable-data.h
new file mode 100644
index 0000000..7269a49
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-vtable-data.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/* This is meant to be included in other files, hence no headers */
+
+struct context {
+ bool quit;
+ char *something;
+ char *automatic_string_property;
+ uint32_t automatic_integer_property;
+};
+
+static int handler(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ return 1;
+}
+
+static int value_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ return 1;
+}
+
+static int get_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ return 1;
+}
+
+static int set_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error) {
+ return 1;
+}
+
+static const sd_bus_vtable test_vtable_1[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_METHOD("Hello", "ssas", "a(uu)", NULL, 0),
+ SD_BUS_METHOD("DeprecatedHello", "", "", NULL, SD_BUS_VTABLE_DEPRECATED),
+ SD_BUS_METHOD("DeprecatedHelloNoReply", "", "", NULL, SD_BUS_VTABLE_DEPRECATED|SD_BUS_VTABLE_METHOD_NO_REPLY),
+ SD_BUS_SIGNAL("Wowza", "sss", 0),
+ SD_BUS_SIGNAL("DeprecatedWowza", "ut", SD_BUS_VTABLE_DEPRECATED),
+ SD_BUS_WRITABLE_PROPERTY("AProperty", "s", get_handler, set_handler, 0, 0),
+ SD_BUS_PROPERTY("AReadOnlyDeprecatedProperty", "(ut)", get_handler, 0, SD_BUS_VTABLE_DEPRECATED),
+ SD_BUS_PROPERTY("ChangingProperty", "t", get_handler, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Invalidating", "t", get_handler, 0, SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_PROPERTY("Constant", "t", get_handler, 0, SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_PROPERTY_EXPLICIT),
+ SD_BUS_VTABLE_END
+};
+
+static const sd_bus_vtable test_vtable_2[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_METHOD("AlterSomething", "s", "s", handler, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Exit", "", "", handler, 0),
+ SD_BUS_METHOD_WITH_OFFSET("AlterSomething2", "s", "s", handler, 200, 0),
+ SD_BUS_METHOD_WITH_OFFSET("Exit2", "", "", handler, 200, 0),
+ SD_BUS_METHOD_WITH_NAMES_OFFSET("AlterSomething3", "so", SD_BUS_PARAM(string) SD_BUS_PARAM(path),
+ "s", SD_BUS_PARAM(returnstring), handler, 200, 0),
+ SD_BUS_METHOD_WITH_NAMES("Exit3", "bx", SD_BUS_PARAM(with_confirmation) SD_BUS_PARAM(after_msec),
+ "bb", SD_BUS_PARAM(accepted) SD_BUS_PARAM(scheduled), handler, 0),
+ SD_BUS_PROPERTY("Value", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Value2", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_PROPERTY("Value3", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Value4", "s", value_handler, 10, 0),
+ SD_BUS_PROPERTY("AnExplicitProperty", "s", NULL, offsetof(struct context, something),
+ SD_BUS_VTABLE_PROPERTY_EXPLICIT|SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_WRITABLE_PROPERTY("Something", "s", get_handler, set_handler, 0, 0),
+ SD_BUS_WRITABLE_PROPERTY("AutomaticStringProperty", "s", NULL, NULL,
+ offsetof(struct context, automatic_string_property), 0),
+ SD_BUS_WRITABLE_PROPERTY("AutomaticIntegerProperty", "u", NULL, NULL,
+ offsetof(struct context, automatic_integer_property), 0),
+ SD_BUS_METHOD("NoOperation", NULL, NULL, NULL, 0),
+ SD_BUS_SIGNAL("DummySignal", "b", 0),
+ SD_BUS_SIGNAL("DummySignal2", "so", 0),
+ SD_BUS_SIGNAL_WITH_NAMES("DummySignal3", "so", SD_BUS_PARAM(string) SD_BUS_PARAM(path), 0),
+ SD_BUS_VTABLE_END
+};
+
+static const sd_bus_vtable test_vtable_deprecated[] = {
+ SD_BUS_VTABLE_START(SD_BUS_VTABLE_DEPRECATED),
+ SD_BUS_VTABLE_END
+};
+
+struct sd_bus_vtable_221 {
+ uint8_t type:8;
+ uint64_t flags:56;
+ union {
+ struct {
+ size_t element_size;
+ } start;
+ struct {
+ const char *member;
+ const char *signature;
+ const char *result;
+ sd_bus_message_handler_t handler;
+ size_t offset;
+ } method;
+ struct {
+ const char *member;
+ const char *signature;
+ } signal;
+ struct {
+ const char *member;
+ const char *signature;
+ sd_bus_property_get_t get;
+ sd_bus_property_set_t set;
+ size_t offset;
+ } property;
+ } x;
+};
+
+static const struct sd_bus_vtable_221 vtable_format_221[] = {
+ {
+ .type = _SD_BUS_VTABLE_START,
+ .flags = 0,
+ .x = {
+ .start = {
+ .element_size = sizeof(struct sd_bus_vtable_221)
+ },
+ },
+ },
+ {
+ .type = _SD_BUS_VTABLE_METHOD,
+ .flags = 0,
+ .x = {
+ .method = {
+ .member = "Exit",
+ .signature = "",
+ .result = "",
+ .handler = handler,
+ .offset = 0,
+ },
+ },
+ },
+ {
+ .type = _SD_BUS_VTABLE_END,
+ .flags = 0,
+ .x = { { 0 } },
+ }
+};
diff --git a/src/libsystemd/sd-daemon/sd-daemon.c b/src/libsystemd/sd-daemon/sd-daemon.c
new file mode 100644
index 0000000..6f0b975
--- /dev/null
+++ b/src/libsystemd/sd-daemon/sd-daemon.c
@@ -0,0 +1,679 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <limits.h>
+#include <mqueue.h>
+#include <netinet/in.h>
+#include <poll.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "util.h"
+
+#define SNDBUF_SIZE (8*1024*1024)
+
+static void unsetenv_all(bool unset_environment) {
+ if (!unset_environment)
+ return;
+
+ assert_se(unsetenv("LISTEN_PID") == 0);
+ assert_se(unsetenv("LISTEN_FDS") == 0);
+ assert_se(unsetenv("LISTEN_FDNAMES") == 0);
+}
+
+_public_ int sd_listen_fds(int unset_environment) {
+ const char *e;
+ int n, r, fd;
+ pid_t pid;
+
+ e = getenv("LISTEN_PID");
+ if (!e) {
+ r = 0;
+ goto finish;
+ }
+
+ r = parse_pid(e, &pid);
+ if (r < 0)
+ goto finish;
+
+ /* Is this for us? */
+ if (getpid_cached() != pid) {
+ r = 0;
+ goto finish;
+ }
+
+ e = getenv("LISTEN_FDS");
+ if (!e) {
+ r = 0;
+ goto finish;
+ }
+
+ r = safe_atoi(e, &n);
+ if (r < 0)
+ goto finish;
+
+ assert_cc(SD_LISTEN_FDS_START < INT_MAX);
+ if (n <= 0 || n > INT_MAX - SD_LISTEN_FDS_START) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd ++) {
+ r = fd_cloexec(fd, true);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = n;
+
+finish:
+ unsetenv_all(unset_environment);
+ return r;
+}
+
+_public_ int sd_listen_fds_with_names(int unset_environment, char ***names) {
+ _cleanup_strv_free_ char **l = NULL;
+ bool have_names;
+ int n_names = 0, n_fds;
+ const char *e;
+ int r;
+
+ if (!names)
+ return sd_listen_fds(unset_environment);
+
+ e = getenv("LISTEN_FDNAMES");
+ if (e) {
+ n_names = strv_split_full(&l, e, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (n_names < 0) {
+ unsetenv_all(unset_environment);
+ return n_names;
+ }
+
+ have_names = true;
+ } else
+ have_names = false;
+
+ n_fds = sd_listen_fds(unset_environment);
+ if (n_fds <= 0)
+ return n_fds;
+
+ if (have_names) {
+ if (n_names != n_fds)
+ return -EINVAL;
+ } else {
+ r = strv_extend_n(&l, "unknown", n_fds);
+ if (r < 0)
+ return r;
+ }
+
+ *names = TAKE_PTR(l);
+
+ return n_fds;
+}
+
+_public_ int sd_is_fifo(int fd, const char *path) {
+ struct stat st_fd;
+
+ assert_return(fd >= 0, -EBADF);
+
+ if (fstat(fd, &st_fd) < 0)
+ return -errno;
+
+ if (!S_ISFIFO(st_fd.st_mode))
+ return 0;
+
+ if (path) {
+ struct stat st_path;
+
+ if (stat(path, &st_path) < 0) {
+
+ if (IN_SET(errno, ENOENT, ENOTDIR))
+ return 0;
+
+ return -errno;
+ }
+
+ return
+ st_path.st_dev == st_fd.st_dev &&
+ st_path.st_ino == st_fd.st_ino;
+ }
+
+ return 1;
+}
+
+_public_ int sd_is_special(int fd, const char *path) {
+ struct stat st_fd;
+
+ assert_return(fd >= 0, -EBADF);
+
+ if (fstat(fd, &st_fd) < 0)
+ return -errno;
+
+ if (!S_ISREG(st_fd.st_mode) && !S_ISCHR(st_fd.st_mode))
+ return 0;
+
+ if (path) {
+ struct stat st_path;
+
+ if (stat(path, &st_path) < 0) {
+
+ if (IN_SET(errno, ENOENT, ENOTDIR))
+ return 0;
+
+ return -errno;
+ }
+
+ if (S_ISREG(st_fd.st_mode) && S_ISREG(st_path.st_mode))
+ return
+ st_path.st_dev == st_fd.st_dev &&
+ st_path.st_ino == st_fd.st_ino;
+ else if (S_ISCHR(st_fd.st_mode) && S_ISCHR(st_path.st_mode))
+ return st_path.st_rdev == st_fd.st_rdev;
+ else
+ return 0;
+ }
+
+ return 1;
+}
+
+static int sd_is_socket_internal(int fd, int type, int listening) {
+ struct stat st_fd;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(type >= 0, -EINVAL);
+
+ if (fstat(fd, &st_fd) < 0)
+ return -errno;
+
+ if (!S_ISSOCK(st_fd.st_mode))
+ return 0;
+
+ if (type != 0) {
+ int other_type = 0;
+ socklen_t l = sizeof(other_type);
+
+ if (getsockopt(fd, SOL_SOCKET, SO_TYPE, &other_type, &l) < 0)
+ return -errno;
+
+ if (l != sizeof(other_type))
+ return -EINVAL;
+
+ if (other_type != type)
+ return 0;
+ }
+
+ if (listening >= 0) {
+ int accepting = 0;
+ socklen_t l = sizeof(accepting);
+
+ if (getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &accepting, &l) < 0)
+ return -errno;
+
+ if (l != sizeof(accepting))
+ return -EINVAL;
+
+ if (!accepting != !listening)
+ return 0;
+ }
+
+ return 1;
+}
+
+_public_ int sd_is_socket(int fd, int family, int type, int listening) {
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(family >= 0, -EINVAL);
+
+ r = sd_is_socket_internal(fd, type, listening);
+ if (r <= 0)
+ return r;
+
+ if (family > 0) {
+ union sockaddr_union sockaddr = {};
+ socklen_t l = sizeof(sockaddr);
+
+ if (getsockname(fd, &sockaddr.sa, &l) < 0)
+ return -errno;
+
+ if (l < sizeof(sa_family_t))
+ return -EINVAL;
+
+ return sockaddr.sa.sa_family == family;
+ }
+
+ return 1;
+}
+
+_public_ int sd_is_socket_inet(int fd, int family, int type, int listening, uint16_t port) {
+ union sockaddr_union sockaddr = {};
+ socklen_t l = sizeof(sockaddr);
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(IN_SET(family, 0, AF_INET, AF_INET6), -EINVAL);
+
+ r = sd_is_socket_internal(fd, type, listening);
+ if (r <= 0)
+ return r;
+
+ if (getsockname(fd, &sockaddr.sa, &l) < 0)
+ return -errno;
+
+ if (l < sizeof(sa_family_t))
+ return -EINVAL;
+
+ if (!IN_SET(sockaddr.sa.sa_family, AF_INET, AF_INET6))
+ return 0;
+
+ if (family != 0)
+ if (sockaddr.sa.sa_family != family)
+ return 0;
+
+ if (port > 0) {
+ unsigned sa_port;
+
+ r = sockaddr_port(&sockaddr.sa, &sa_port);
+ if (r < 0)
+ return r;
+
+ return port == sa_port;
+ }
+
+ return 1;
+}
+
+_public_ int sd_is_socket_sockaddr(int fd, int type, const struct sockaddr* addr, unsigned addr_len, int listening) {
+ union sockaddr_union sockaddr = {};
+ socklen_t l = sizeof(sockaddr);
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(addr, -EINVAL);
+ assert_return(addr_len >= sizeof(sa_family_t), -ENOBUFS);
+ assert_return(IN_SET(addr->sa_family, AF_INET, AF_INET6), -EPFNOSUPPORT);
+
+ r = sd_is_socket_internal(fd, type, listening);
+ if (r <= 0)
+ return r;
+
+ if (getsockname(fd, &sockaddr.sa, &l) < 0)
+ return -errno;
+
+ if (l < sizeof(sa_family_t))
+ return -EINVAL;
+
+ if (sockaddr.sa.sa_family != addr->sa_family)
+ return 0;
+
+ if (sockaddr.sa.sa_family == AF_INET) {
+ const struct sockaddr_in *in = (const struct sockaddr_in *) addr;
+
+ if (l < sizeof(struct sockaddr_in) || addr_len < sizeof(struct sockaddr_in))
+ return -EINVAL;
+
+ if (in->sin_port != 0 &&
+ sockaddr.in.sin_port != in->sin_port)
+ return false;
+
+ return sockaddr.in.sin_addr.s_addr == in->sin_addr.s_addr;
+
+ } else {
+ const struct sockaddr_in6 *in = (const struct sockaddr_in6 *) addr;
+
+ if (l < sizeof(struct sockaddr_in6) || addr_len < sizeof(struct sockaddr_in6))
+ return -EINVAL;
+
+ if (in->sin6_port != 0 &&
+ sockaddr.in6.sin6_port != in->sin6_port)
+ return false;
+
+ if (in->sin6_flowinfo != 0 &&
+ sockaddr.in6.sin6_flowinfo != in->sin6_flowinfo)
+ return false;
+
+ if (in->sin6_scope_id != 0 &&
+ sockaddr.in6.sin6_scope_id != in->sin6_scope_id)
+ return false;
+
+ return memcmp(sockaddr.in6.sin6_addr.s6_addr, in->sin6_addr.s6_addr,
+ sizeof(in->sin6_addr.s6_addr)) == 0;
+ }
+}
+
+_public_ int sd_is_socket_unix(int fd, int type, int listening, const char *path, size_t length) {
+ union sockaddr_union sockaddr = {};
+ socklen_t l = sizeof(sockaddr);
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+
+ r = sd_is_socket_internal(fd, type, listening);
+ if (r <= 0)
+ return r;
+
+ if (getsockname(fd, &sockaddr.sa, &l) < 0)
+ return -errno;
+
+ if (l < sizeof(sa_family_t))
+ return -EINVAL;
+
+ if (sockaddr.sa.sa_family != AF_UNIX)
+ return 0;
+
+ if (path) {
+ if (length == 0)
+ length = strlen(path);
+
+ if (length == 0)
+ /* Unnamed socket */
+ return l == offsetof(struct sockaddr_un, sun_path);
+
+ if (path[0])
+ /* Normal path socket */
+ return
+ (l >= offsetof(struct sockaddr_un, sun_path) + length + 1) &&
+ memcmp(path, sockaddr.un.sun_path, length+1) == 0;
+ else
+ /* Abstract namespace socket */
+ return
+ (l == offsetof(struct sockaddr_un, sun_path) + length) &&
+ memcmp(path, sockaddr.un.sun_path, length) == 0;
+ }
+
+ return 1;
+}
+
+_public_ int sd_is_mq(int fd, const char *path) {
+ struct mq_attr attr;
+
+ /* Check that the fd is valid */
+ assert_return(fcntl(fd, F_GETFD) >= 0, -errno);
+
+ if (mq_getattr(fd, &attr) < 0) {
+ if (errno == EBADF)
+ /* A non-mq fd (or an invalid one, but we ruled that out above) */
+ return 0;
+ return -errno;
+ }
+
+ if (path) {
+ char fpath[PATH_MAX];
+ struct stat a, b;
+
+ assert_return(path_is_absolute(path), -EINVAL);
+
+ if (fstat(fd, &a) < 0)
+ return -errno;
+
+ strncpy(stpcpy(fpath, "/dev/mqueue"), path, sizeof(fpath) - 12);
+ fpath[sizeof(fpath)-1] = 0;
+
+ if (stat(fpath, &b) < 0)
+ return -errno;
+
+ if (a.st_dev != b.st_dev ||
+ a.st_ino != b.st_ino)
+ return 0;
+ }
+
+ return 1;
+}
+
+_public_ int sd_pid_notify_with_fds(
+ pid_t pid,
+ int unset_environment,
+ const char *state,
+ const int *fds,
+ unsigned n_fds) {
+
+ union sockaddr_union sockaddr;
+ struct iovec iovec;
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_name = &sockaddr,
+ };
+ _cleanup_close_ int fd = -1;
+ struct cmsghdr *cmsg = NULL;
+ const char *e;
+ bool send_ucred;
+ int r;
+
+ if (!state) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (n_fds > 0 && !fds) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ e = getenv("NOTIFY_SOCKET");
+ if (!e)
+ return 0;
+
+ r = sockaddr_un_set_path(&sockaddr.un, e);
+ if (r < 0)
+ goto finish;
+ msghdr.msg_namelen = r;
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0);
+ if (fd < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
+
+ iovec = IOVEC_MAKE_STRING(state);
+
+ send_ucred =
+ (pid != 0 && pid != getpid_cached()) ||
+ getuid() != geteuid() ||
+ getgid() != getegid();
+
+ if (n_fds > 0 || send_ucred) {
+ /* CMSG_SPACE(0) may return value different than zero, which results in miscalculated controllen. */
+ msghdr.msg_controllen =
+ (n_fds > 0 ? CMSG_SPACE(sizeof(int) * n_fds) : 0) +
+ (send_ucred ? CMSG_SPACE(sizeof(struct ucred)) : 0);
+
+ msghdr.msg_control = alloca0(msghdr.msg_controllen);
+
+ cmsg = CMSG_FIRSTHDR(&msghdr);
+ if (n_fds > 0) {
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int) * n_fds);
+
+ memcpy(CMSG_DATA(cmsg), fds, sizeof(int) * n_fds);
+
+ if (send_ucred)
+ assert_se(cmsg = CMSG_NXTHDR(&msghdr, cmsg));
+ }
+
+ if (send_ucred) {
+ struct ucred *ucred;
+
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_CREDENTIALS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
+
+ ucred = (struct ucred*) CMSG_DATA(cmsg);
+ ucred->pid = pid != 0 ? pid : getpid_cached();
+ ucred->uid = getuid();
+ ucred->gid = getgid();
+ }
+ }
+
+ /* First try with fake ucred data, as requested */
+ if (sendmsg(fd, &msghdr, MSG_NOSIGNAL) >= 0) {
+ r = 1;
+ goto finish;
+ }
+
+ /* If that failed, try with our own ucred instead */
+ if (send_ucred) {
+ msghdr.msg_controllen -= CMSG_SPACE(sizeof(struct ucred));
+ if (msghdr.msg_controllen == 0)
+ msghdr.msg_control = NULL;
+
+ if (sendmsg(fd, &msghdr, MSG_NOSIGNAL) >= 0) {
+ r = 1;
+ goto finish;
+ }
+ }
+
+ r = -errno;
+
+finish:
+ if (unset_environment)
+ assert_se(unsetenv("NOTIFY_SOCKET") == 0);
+
+ return r;
+}
+
+_public_ int sd_notify_barrier(int unset_environment, uint64_t timeout) {
+ _cleanup_close_pair_ int pipe_fd[2] = { -1, -1 };
+ int r;
+
+ if (pipe2(pipe_fd, O_CLOEXEC) < 0)
+ return -errno;
+
+ r = sd_pid_notify_with_fds(0, unset_environment, "BARRIER=1", &pipe_fd[1], 1);
+ if (r <= 0)
+ return r;
+
+ pipe_fd[1] = safe_close(pipe_fd[1]);
+
+ r = fd_wait_for_event(pipe_fd[0], 0 /* POLLHUP is implicit */, timeout);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ETIMEDOUT;
+
+ return 1;
+}
+
+_public_ int sd_pid_notify(pid_t pid, int unset_environment, const char *state) {
+ return sd_pid_notify_with_fds(pid, unset_environment, state, NULL, 0);
+}
+
+_public_ int sd_notify(int unset_environment, const char *state) {
+ return sd_pid_notify_with_fds(0, unset_environment, state, NULL, 0);
+}
+
+_public_ int sd_pid_notifyf(pid_t pid, int unset_environment, const char *format, ...) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ if (format) {
+ va_list ap;
+
+ va_start(ap, format);
+ r = vasprintf(&p, format, ap);
+ va_end(ap);
+
+ if (r < 0 || !p)
+ return -ENOMEM;
+ }
+
+ return sd_pid_notify(pid, unset_environment, p);
+}
+
+_public_ int sd_notifyf(int unset_environment, const char *format, ...) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ if (format) {
+ va_list ap;
+
+ va_start(ap, format);
+ r = vasprintf(&p, format, ap);
+ va_end(ap);
+
+ if (r < 0 || !p)
+ return -ENOMEM;
+ }
+
+ return sd_pid_notify(0, unset_environment, p);
+}
+
+_public_ int sd_booted(void) {
+ /* We test whether the runtime unit file directory has been
+ * created. This takes place in mount-setup.c, so is
+ * guaranteed to happen very early during boot. */
+
+ if (laccess("/run/systemd/system/", F_OK) >= 0)
+ return true;
+
+ if (errno == ENOENT)
+ return false;
+
+ return -errno;
+}
+
+_public_ int sd_watchdog_enabled(int unset_environment, uint64_t *usec) {
+ const char *s, *p = ""; /* p is set to dummy value to do unsetting */
+ uint64_t u;
+ int r = 0;
+
+ s = getenv("WATCHDOG_USEC");
+ if (!s)
+ goto finish;
+
+ r = safe_atou64(s, &u);
+ if (r < 0)
+ goto finish;
+ if (u <= 0 || u >= USEC_INFINITY) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ p = getenv("WATCHDOG_PID");
+ if (p) {
+ pid_t pid;
+
+ r = parse_pid(p, &pid);
+ if (r < 0)
+ goto finish;
+
+ /* Is this for us? */
+ if (getpid_cached() != pid) {
+ r = 0;
+ goto finish;
+ }
+ }
+
+ if (usec)
+ *usec = u;
+
+ r = 1;
+
+finish:
+ if (unset_environment && s)
+ assert_se(unsetenv("WATCHDOG_USEC") == 0);
+ if (unset_environment && p)
+ assert_se(unsetenv("WATCHDOG_PID") == 0);
+
+ return r;
+}
diff --git a/src/libsystemd/sd-device/device-enumerator-private.h b/src/libsystemd/sd-device/device-enumerator-private.h
new file mode 100644
index 0000000..9c6437d
--- /dev/null
+++ b/src/libsystemd/sd-device/device-enumerator-private.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-device.h"
+
+int device_enumerator_scan_devices(sd_device_enumerator *enumeartor);
+int device_enumerator_scan_subsystems(sd_device_enumerator *enumeartor);
+int device_enumerator_add_device(sd_device_enumerator *enumerator, sd_device *device);
+int device_enumerator_add_match_is_initialized(sd_device_enumerator *enumerator);
+int device_enumerator_add_match_parent_incremental(sd_device_enumerator *enumerator, sd_device *parent);
+sd_device *device_enumerator_get_first(sd_device_enumerator *enumerator);
+sd_device *device_enumerator_get_next(sd_device_enumerator *enumerator);
+sd_device **device_enumerator_get_devices(sd_device_enumerator *enumerator, size_t *ret_n_devices);
+
+#define FOREACH_DEVICE_AND_SUBSYSTEM(enumerator, device) \
+ for (device = device_enumerator_get_first(enumerator); \
+ device; \
+ device = device_enumerator_get_next(enumerator))
diff --git a/src/libsystemd/sd-device/device-enumerator.c b/src/libsystemd/sd-device/device-enumerator.c
new file mode 100644
index 0000000..3641348
--- /dev/null
+++ b/src/libsystemd/sd-device/device-enumerator.c
@@ -0,0 +1,963 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-enumerator-private.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "set.h"
+#include "sort-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+#define DEVICE_ENUMERATE_MAX_DEPTH 256
+
+typedef enum DeviceEnumerationType {
+ DEVICE_ENUMERATION_TYPE_DEVICES,
+ DEVICE_ENUMERATION_TYPE_SUBSYSTEMS,
+ _DEVICE_ENUMERATION_TYPE_MAX,
+ _DEVICE_ENUMERATION_TYPE_INVALID = -1,
+} DeviceEnumerationType;
+
+struct sd_device_enumerator {
+ unsigned n_ref;
+
+ DeviceEnumerationType type;
+ sd_device **devices;
+ size_t n_devices, n_allocated, current_device_index;
+ bool scan_uptodate;
+
+ Set *match_subsystem;
+ Set *nomatch_subsystem;
+ Hashmap *match_sysattr;
+ Hashmap *nomatch_sysattr;
+ Hashmap *match_property;
+ Set *match_sysname;
+ Set *match_tag;
+ Set *match_parent;
+ bool match_allow_uninitialized;
+};
+
+_public_ int sd_device_enumerator_new(sd_device_enumerator **ret) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *enumerator = NULL;
+
+ assert(ret);
+
+ enumerator = new(sd_device_enumerator, 1);
+ if (!enumerator)
+ return -ENOMEM;
+
+ *enumerator = (sd_device_enumerator) {
+ .n_ref = 1,
+ .type = _DEVICE_ENUMERATION_TYPE_INVALID,
+ };
+
+ *ret = TAKE_PTR(enumerator);
+
+ return 0;
+}
+
+static sd_device_enumerator *device_enumerator_free(sd_device_enumerator *enumerator) {
+ size_t i;
+
+ assert(enumerator);
+
+ for (i = 0; i < enumerator->n_devices; i++)
+ sd_device_unref(enumerator->devices[i]);
+
+ free(enumerator->devices);
+ set_free(enumerator->match_subsystem);
+ set_free(enumerator->nomatch_subsystem);
+ hashmap_free(enumerator->match_sysattr);
+ hashmap_free(enumerator->nomatch_sysattr);
+ hashmap_free(enumerator->match_property);
+ set_free(enumerator->match_sysname);
+ set_free(enumerator->match_tag);
+ set_free(enumerator->match_parent);
+
+ return mfree(enumerator);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_device_enumerator, sd_device_enumerator, device_enumerator_free);
+
+_public_ int sd_device_enumerator_add_match_subsystem(sd_device_enumerator *enumerator, const char *subsystem, int match) {
+ Set **set;
+ int r;
+
+ assert_return(enumerator, -EINVAL);
+ assert_return(subsystem, -EINVAL);
+
+ if (match)
+ set = &enumerator->match_subsystem;
+ else
+ set = &enumerator->nomatch_subsystem;
+
+ r = set_put_strdup(set, subsystem);
+ if (r <= 0)
+ return r;
+
+ enumerator->scan_uptodate = false;
+
+ return 1;
+}
+
+_public_ int sd_device_enumerator_add_match_sysattr(sd_device_enumerator *enumerator, const char *sysattr, const char *value, int match) {
+ Hashmap **hashmap;
+ int r;
+
+ assert_return(enumerator, -EINVAL);
+ assert_return(sysattr, -EINVAL);
+
+ if (match)
+ hashmap = &enumerator->match_sysattr;
+ else
+ hashmap = &enumerator->nomatch_sysattr;
+
+ r = hashmap_put_strdup_full(hashmap, &trivial_hash_ops_free_free, sysattr, value);
+ if (r <= 0)
+ return r;
+
+ enumerator->scan_uptodate = false;
+
+ return 1;
+}
+
+_public_ int sd_device_enumerator_add_match_property(sd_device_enumerator *enumerator, const char *property, const char *value) {
+ int r;
+
+ assert_return(enumerator, -EINVAL);
+ assert_return(property, -EINVAL);
+
+ r = hashmap_put_strdup_full(&enumerator->match_property, &trivial_hash_ops_free_free, property, value);
+ if (r <= 0)
+ return r;
+
+ enumerator->scan_uptodate = false;
+
+ return 1;
+}
+
+_public_ int sd_device_enumerator_add_match_sysname(sd_device_enumerator *enumerator, const char *sysname) {
+ int r;
+
+ assert_return(enumerator, -EINVAL);
+ assert_return(sysname, -EINVAL);
+
+ r = set_put_strdup(&enumerator->match_sysname, sysname);
+ if (r <= 0)
+ return r;
+
+ enumerator->scan_uptodate = false;
+
+ return 1;
+}
+
+_public_ int sd_device_enumerator_add_match_tag(sd_device_enumerator *enumerator, const char *tag) {
+ int r;
+
+ assert_return(enumerator, -EINVAL);
+ assert_return(tag, -EINVAL);
+
+ r = set_put_strdup(&enumerator->match_tag, tag);
+ if (r <= 0)
+ return r;
+
+ enumerator->scan_uptodate = false;
+
+ return 1;
+}
+
+int device_enumerator_add_match_parent_incremental(sd_device_enumerator *enumerator, sd_device *parent) {
+ const char *path;
+ int r;
+
+ assert(enumerator);
+ assert(parent);
+
+ r = sd_device_get_syspath(parent, &path);
+ if (r < 0)
+ return r;
+
+ r = set_put_strdup(&enumerator->match_parent, path);
+ if (r <= 0)
+ return r;
+
+ enumerator->scan_uptodate = false;
+
+ return 1;
+}
+
+_public_ int sd_device_enumerator_add_match_parent(sd_device_enumerator *enumerator, sd_device *parent) {
+ assert_return(enumerator, -EINVAL);
+ assert_return(parent, -EINVAL);
+
+ set_clear(enumerator->match_parent);
+
+ return device_enumerator_add_match_parent_incremental(enumerator, parent);
+}
+
+_public_ int sd_device_enumerator_allow_uninitialized(sd_device_enumerator *enumerator) {
+ assert_return(enumerator, -EINVAL);
+
+ enumerator->match_allow_uninitialized = true;
+
+ enumerator->scan_uptodate = false;
+
+ return 1;
+}
+
+int device_enumerator_add_match_is_initialized(sd_device_enumerator *enumerator) {
+ assert_return(enumerator, -EINVAL);
+
+ enumerator->match_allow_uninitialized = false;
+
+ enumerator->scan_uptodate = false;
+
+ return 1;
+}
+
+static int device_compare(sd_device * const *_a, sd_device * const *_b) {
+ sd_device *a = *(sd_device **)_a, *b = *(sd_device **)_b;
+ const char *devpath_a, *devpath_b, *sound_a;
+ bool delay_a, delay_b;
+ int r;
+
+ assert_se(sd_device_get_devpath(a, &devpath_a) >= 0);
+ assert_se(sd_device_get_devpath(b, &devpath_b) >= 0);
+
+ sound_a = strstr(devpath_a, "/sound/card");
+ if (sound_a) {
+ /* For sound cards the control device must be enumerated last to
+ * make sure it's the final device node that gets ACLs applied.
+ * Applications rely on this fact and use ACL changes on the
+ * control node as an indicator that the ACL change of the
+ * entire sound card completed. The kernel makes this guarantee
+ * when creating those devices, and hence we should too when
+ * enumerating them. */
+ sound_a += STRLEN("/sound/card");
+ sound_a = strchr(sound_a, '/');
+
+ if (sound_a) {
+ unsigned prefix_len;
+
+ prefix_len = sound_a - devpath_a;
+
+ if (strncmp(devpath_a, devpath_b, prefix_len) == 0) {
+ const char *sound_b;
+
+ sound_b = devpath_b + prefix_len;
+
+ if (startswith(sound_a, "/controlC") &&
+ !startswith(sound_b, "/contolC"))
+ return 1;
+
+ if (!startswith(sound_a, "/controlC") &&
+ startswith(sound_b, "/controlC"))
+ return -1;
+ }
+ }
+ }
+
+ /* md and dm devices are enumerated after all other devices */
+ delay_a = strstr(devpath_a, "/block/md") || strstr(devpath_a, "/block/dm-");
+ delay_b = strstr(devpath_b, "/block/md") || strstr(devpath_b, "/block/dm-");
+ r = CMP(delay_a, delay_b);
+ if (r != 0)
+ return r;
+
+ return strcmp(devpath_a, devpath_b);
+}
+
+int device_enumerator_add_device(sd_device_enumerator *enumerator, sd_device *device) {
+ assert_return(enumerator, -EINVAL);
+ assert_return(device, -EINVAL);
+
+ if (!GREEDY_REALLOC(enumerator->devices, enumerator->n_allocated, enumerator->n_devices + 1))
+ return -ENOMEM;
+
+ enumerator->devices[enumerator->n_devices++] = sd_device_ref(device);
+
+ return 0;
+}
+
+static bool match_sysattr_value(sd_device *device, const char *sysattr, const char *match_value) {
+ const char *value;
+ int r;
+
+ assert(device);
+ assert(sysattr);
+
+ r = sd_device_get_sysattr_value(device, sysattr, &value);
+ if (r < 0)
+ return false;
+
+ if (!match_value)
+ return true;
+
+ if (fnmatch(match_value, value, 0) == 0)
+ return true;
+
+ return false;
+}
+
+static bool match_sysattr(sd_device_enumerator *enumerator, sd_device *device) {
+ const char *sysattr;
+ const char *value;
+
+ assert(enumerator);
+ assert(device);
+
+ HASHMAP_FOREACH_KEY(value, sysattr, enumerator->nomatch_sysattr)
+ if (match_sysattr_value(device, sysattr, value))
+ return false;
+
+ HASHMAP_FOREACH_KEY(value, sysattr, enumerator->match_sysattr)
+ if (!match_sysattr_value(device, sysattr, value))
+ return false;
+
+ return true;
+}
+
+static bool match_property(sd_device_enumerator *enumerator, sd_device *device) {
+ const char *property;
+ const char *value;
+
+ assert(enumerator);
+ assert(device);
+
+ if (hashmap_isempty(enumerator->match_property))
+ return true;
+
+ HASHMAP_FOREACH_KEY(value, property, enumerator->match_property) {
+ const char *property_dev, *value_dev;
+
+ FOREACH_DEVICE_PROPERTY(device, property_dev, value_dev) {
+ if (fnmatch(property, property_dev, 0) != 0)
+ continue;
+
+ if (!value && !value_dev)
+ return true;
+
+ if (!value || !value_dev)
+ continue;
+
+ if (fnmatch(value, value_dev, 0) == 0)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool match_tag(sd_device_enumerator *enumerator, sd_device *device) {
+ const char *tag;
+
+ assert(enumerator);
+ assert(device);
+
+ SET_FOREACH(tag, enumerator->match_tag)
+ if (!sd_device_has_tag(device, tag))
+ return false;
+
+ return true;
+}
+
+static bool match_parent(sd_device_enumerator *enumerator, sd_device *device) {
+ const char *syspath_parent, *syspath;
+
+ assert(enumerator);
+ assert(device);
+
+ if (set_isempty(enumerator->match_parent))
+ return true;
+
+ assert_se(sd_device_get_syspath(device, &syspath) >= 0);
+
+ SET_FOREACH(syspath_parent, enumerator->match_parent)
+ if (path_startswith(syspath, syspath_parent))
+ return true;
+
+ return false;
+}
+
+static bool match_sysname(sd_device_enumerator *enumerator, const char *sysname) {
+ const char *sysname_match;
+
+ assert(enumerator);
+ assert(sysname);
+
+ if (set_isempty(enumerator->match_sysname))
+ return true;
+
+ SET_FOREACH(sysname_match, enumerator->match_sysname)
+ if (fnmatch(sysname_match, sysname, 0) == 0)
+ return true;
+
+ return false;
+}
+
+static int enumerator_scan_dir_and_add_devices(sd_device_enumerator *enumerator, const char *basedir, const char *subdir1, const char *subdir2) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ char *path;
+ struct dirent *dent;
+ int r = 0;
+
+ assert(enumerator);
+ assert(basedir);
+
+ path = strjoina("/sys/", basedir, "/");
+
+ if (subdir1)
+ path = strjoina(path, subdir1, "/");
+
+ if (subdir2)
+ path = strjoina(path, subdir2, "/");
+
+ dir = opendir(path);
+ if (!dir)
+ /* this is necessarily racey, so ignore missing directories */
+ return (errno == ENOENT && (subdir1 || subdir2)) ? 0 : -errno;
+
+ FOREACH_DIRENT_ALL(dent, dir, return -errno) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ char syspath[strlen(path) + 1 + strlen(dent->d_name) + 1];
+ int initialized, k;
+
+ if (dent->d_name[0] == '.')
+ continue;
+
+ if (!match_sysname(enumerator, dent->d_name))
+ continue;
+
+ (void) sprintf(syspath, "%s%s", path, dent->d_name);
+
+ k = sd_device_new_from_syspath(&device, syspath);
+ if (k < 0) {
+ if (k != -ENODEV)
+ /* this is necessarily racey, so ignore missing devices */
+ r = k;
+
+ continue;
+ }
+
+ initialized = sd_device_get_is_initialized(device);
+ if (initialized < 0) {
+ if (initialized != -ENOENT)
+ /* this is necessarily racey, so ignore missing devices */
+ r = initialized;
+
+ continue;
+ }
+
+ /*
+ * All devices with a device node or network interfaces
+ * possibly need udev to adjust the device node permission
+ * or context, or rename the interface before it can be
+ * reliably used from other processes.
+ *
+ * For now, we can only check these types of devices, we
+ * might not store a database, and have no way to find out
+ * for all other types of devices.
+ */
+ if (!enumerator->match_allow_uninitialized &&
+ !initialized &&
+ (sd_device_get_devnum(device, NULL) >= 0 ||
+ sd_device_get_ifindex(device, NULL) >= 0))
+ continue;
+
+ if (!match_parent(enumerator, device))
+ continue;
+
+ if (!match_tag(enumerator, device))
+ continue;
+
+ if (!match_property(enumerator, device))
+ continue;
+
+ if (!match_sysattr(enumerator, device))
+ continue;
+
+ k = device_enumerator_add_device(enumerator, device);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static bool match_subsystem(sd_device_enumerator *enumerator, const char *subsystem) {
+ const char *subsystem_match;
+
+ assert(enumerator);
+
+ if (!subsystem)
+ return false;
+
+ SET_FOREACH(subsystem_match, enumerator->nomatch_subsystem)
+ if (fnmatch(subsystem_match, subsystem, 0) == 0)
+ return false;
+
+ if (set_isempty(enumerator->match_subsystem))
+ return true;
+
+ SET_FOREACH(subsystem_match, enumerator->match_subsystem)
+ if (fnmatch(subsystem_match, subsystem, 0) == 0)
+ return true;
+
+ return false;
+}
+
+static int enumerator_scan_dir(sd_device_enumerator *enumerator, const char *basedir, const char *subdir, const char *subsystem) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ char *path;
+ struct dirent *dent;
+ int r = 0;
+
+ path = strjoina("/sys/", basedir);
+
+ dir = opendir(path);
+ if (!dir)
+ return -errno;
+
+ log_debug("sd-device-enumerator: Scanning %s", path);
+
+ FOREACH_DIRENT_ALL(dent, dir, return -errno) {
+ int k;
+
+ if (dent->d_name[0] == '.')
+ continue;
+
+ if (!match_subsystem(enumerator, subsystem ? : dent->d_name))
+ continue;
+
+ k = enumerator_scan_dir_and_add_devices(enumerator, basedir, dent->d_name, subdir);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int enumerator_scan_devices_tag(sd_device_enumerator *enumerator, const char *tag) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ char *path;
+ struct dirent *dent;
+ int r = 0;
+
+ assert(enumerator);
+ assert(tag);
+
+ path = strjoina("/run/udev/tags/", tag);
+
+ dir = opendir(path);
+ if (!dir) {
+ if (errno != ENOENT)
+ return log_debug_errno(errno, "sd-device-enumerator: Failed to open tags directory %s: %m", path);
+ return 0;
+ }
+
+ /* TODO: filter away subsystems? */
+
+ FOREACH_DIRENT_ALL(dent, dir, return -errno) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ const char *subsystem, *sysname;
+ int k;
+
+ if (dent->d_name[0] == '.')
+ continue;
+
+ k = sd_device_new_from_device_id(&device, dent->d_name);
+ if (k < 0) {
+ if (k != -ENODEV)
+ /* this is necessarily racy, so ignore missing devices */
+ r = k;
+
+ continue;
+ }
+
+ k = sd_device_get_subsystem(device, &subsystem);
+ if (k < 0) {
+ if (k != -ENOENT)
+ /* this is necessarily racy, so ignore missing devices */
+ r = k;
+ continue;
+ }
+
+ if (!match_subsystem(enumerator, subsystem))
+ continue;
+
+ k = sd_device_get_sysname(device, &sysname);
+ if (k < 0) {
+ r = k;
+ continue;
+ }
+
+ if (!match_sysname(enumerator, sysname))
+ continue;
+
+ if (!match_parent(enumerator, device))
+ continue;
+
+ if (!match_property(enumerator, device))
+ continue;
+
+ if (!match_sysattr(enumerator, device))
+ continue;
+
+ k = device_enumerator_add_device(enumerator, device);
+ if (k < 0) {
+ r = k;
+ continue;
+ }
+ }
+
+ return r;
+}
+
+static int enumerator_scan_devices_tags(sd_device_enumerator *enumerator) {
+ const char *tag;
+ int r = 0;
+
+ assert(enumerator);
+
+ SET_FOREACH(tag, enumerator->match_tag) {
+ int k;
+
+ k = enumerator_scan_devices_tag(enumerator, tag);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int parent_add_child(sd_device_enumerator *enumerator, const char *path) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ const char *subsystem, *sysname;
+ int r;
+
+ r = sd_device_new_from_syspath(&device, path);
+ if (r == -ENODEV)
+ /* this is necessarily racy, so ignore missing devices */
+ return 0;
+ else if (r < 0)
+ return r;
+
+ r = sd_device_get_subsystem(device, &subsystem);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ if (!match_subsystem(enumerator, subsystem))
+ return 0;
+
+ r = sd_device_get_sysname(device, &sysname);
+ if (r < 0)
+ return r;
+
+ if (!match_sysname(enumerator, sysname))
+ return 0;
+
+ if (!match_property(enumerator, device))
+ return 0;
+
+ if (!match_sysattr(enumerator, device))
+ return 0;
+
+ r = device_enumerator_add_device(enumerator, device);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int parent_crawl_children(sd_device_enumerator *enumerator, const char *path, unsigned maxdepth) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *dent;
+ int r = 0;
+
+ dir = opendir(path);
+ if (!dir)
+ return log_debug_errno(errno, "sd-device-enumerator: Failed to open parent directory %s: %m", path);
+
+ FOREACH_DIRENT_ALL(dent, dir, return -errno) {
+ _cleanup_free_ char *child = NULL;
+ int k;
+
+ if (dent->d_name[0] == '.')
+ continue;
+
+ if (dent->d_type != DT_DIR)
+ continue;
+
+ child = path_join(path, dent->d_name);
+ if (!child)
+ return -ENOMEM;
+
+ k = parent_add_child(enumerator, child);
+ if (k < 0)
+ r = k;
+
+ if (maxdepth > 0)
+ parent_crawl_children(enumerator, child, maxdepth - 1);
+ else
+ log_debug("sd-device-enumerator: Max depth reached, %s: ignoring devices", child);
+ }
+
+ return r;
+}
+
+static int enumerator_scan_devices_children(sd_device_enumerator *enumerator) {
+ const char *path;
+ int r = 0, k;
+
+ SET_FOREACH(path, enumerator->match_parent) {
+ k = parent_add_child(enumerator, path);
+ if (k < 0)
+ r = k;
+
+ k = parent_crawl_children(enumerator, path, DEVICE_ENUMERATE_MAX_DEPTH);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int enumerator_scan_devices_all(sd_device_enumerator *enumerator) {
+ int r = 0;
+
+ log_debug("sd-device-enumerator: Scan all dirs");
+
+ if (access("/sys/subsystem", F_OK) >= 0) {
+ /* we have /subsystem/, forget all the old stuff */
+ r = enumerator_scan_dir(enumerator, "subsystem", "devices", NULL);
+ if (r < 0)
+ return log_debug_errno(r, "sd-device-enumerator: Failed to scan /sys/subsystem: %m");
+ } else {
+ int k;
+
+ k = enumerator_scan_dir(enumerator, "bus", "devices", NULL);
+ if (k < 0)
+ r = log_debug_errno(k, "sd-device-enumerator: Failed to scan /sys/bus: %m");
+
+ k = enumerator_scan_dir(enumerator, "class", NULL, NULL);
+ if (k < 0)
+ r = log_debug_errno(k, "sd-device-enumerator: Failed to scan /sys/class: %m");
+ }
+
+ return r;
+}
+
+static void device_enumerator_dedup_devices(sd_device_enumerator *enumerator) {
+ sd_device **a, **b, **end;
+
+ assert(enumerator);
+
+ if (enumerator->n_devices <= 1)
+ return;
+
+ a = enumerator->devices + 1;
+ b = enumerator->devices;
+ end = enumerator->devices + enumerator->n_devices;
+
+ for (; a < end; a++) {
+ const char *devpath_a, *devpath_b;
+
+ assert_se(sd_device_get_devpath(*a, &devpath_a) >= 0);
+ assert_se(sd_device_get_devpath(*b, &devpath_b) >= 0);
+
+ if (path_equal(devpath_a, devpath_b))
+ sd_device_unref(*a);
+ else
+ *(++b) = *a;
+ }
+
+ enumerator->n_devices = b - enumerator->devices + 1;
+}
+
+int device_enumerator_scan_devices(sd_device_enumerator *enumerator) {
+ int r = 0, k;
+ size_t i;
+
+ assert(enumerator);
+
+ if (enumerator->scan_uptodate &&
+ enumerator->type == DEVICE_ENUMERATION_TYPE_DEVICES)
+ return 0;
+
+ for (i = 0; i < enumerator->n_devices; i++)
+ sd_device_unref(enumerator->devices[i]);
+
+ enumerator->n_devices = 0;
+
+ if (!set_isempty(enumerator->match_tag)) {
+ k = enumerator_scan_devices_tags(enumerator);
+ if (k < 0)
+ r = k;
+ } else if (enumerator->match_parent) {
+ k = enumerator_scan_devices_children(enumerator);
+ if (k < 0)
+ r = k;
+ } else {
+ k = enumerator_scan_devices_all(enumerator);
+ if (k < 0)
+ r = k;
+ }
+
+ typesafe_qsort(enumerator->devices, enumerator->n_devices, device_compare);
+ device_enumerator_dedup_devices(enumerator);
+
+ enumerator->scan_uptodate = true;
+ enumerator->type = DEVICE_ENUMERATION_TYPE_DEVICES;
+
+ return r;
+}
+
+_public_ sd_device *sd_device_enumerator_get_device_first(sd_device_enumerator *enumerator) {
+ int r;
+
+ assert_return(enumerator, NULL);
+
+ r = device_enumerator_scan_devices(enumerator);
+ if (r < 0)
+ return NULL;
+
+ enumerator->current_device_index = 0;
+
+ if (enumerator->n_devices == 0)
+ return NULL;
+
+ return enumerator->devices[0];
+}
+
+_public_ sd_device *sd_device_enumerator_get_device_next(sd_device_enumerator *enumerator) {
+ assert_return(enumerator, NULL);
+
+ if (!enumerator->scan_uptodate ||
+ enumerator->type != DEVICE_ENUMERATION_TYPE_DEVICES ||
+ enumerator->current_device_index + 1 >= enumerator->n_devices)
+ return NULL;
+
+ return enumerator->devices[++enumerator->current_device_index];
+}
+
+int device_enumerator_scan_subsystems(sd_device_enumerator *enumerator) {
+ const char *subsysdir;
+ int r = 0, k;
+ size_t i;
+
+ assert(enumerator);
+
+ if (enumerator->scan_uptodate &&
+ enumerator->type == DEVICE_ENUMERATION_TYPE_SUBSYSTEMS)
+ return 0;
+
+ for (i = 0; i < enumerator->n_devices; i++)
+ sd_device_unref(enumerator->devices[i]);
+
+ enumerator->n_devices = 0;
+
+ /* modules */
+ if (match_subsystem(enumerator, "module")) {
+ k = enumerator_scan_dir_and_add_devices(enumerator, "module", NULL, NULL);
+ if (k < 0)
+ r = log_debug_errno(k, "sd-device-enumerator: Failed to scan modules: %m");
+ }
+
+ if (access("/sys/subsystem", F_OK) >= 0)
+ subsysdir = "subsystem";
+ else
+ subsysdir = "bus";
+
+ /* subsystems (only buses support coldplug) */
+ if (match_subsystem(enumerator, "subsystem")) {
+ k = enumerator_scan_dir_and_add_devices(enumerator, subsysdir, NULL, NULL);
+ if (k < 0)
+ r = log_debug_errno(k, "sd-device-enumerator: Failed to scan subsystems: %m");
+ }
+
+ /* subsystem drivers */
+ if (match_subsystem(enumerator, "drivers")) {
+ k = enumerator_scan_dir(enumerator, subsysdir, "drivers", "drivers");
+ if (k < 0)
+ r = log_debug_errno(k, "sd-device-enumerator: Failed to scan drivers: %m");
+ }
+
+ typesafe_qsort(enumerator->devices, enumerator->n_devices, device_compare);
+ device_enumerator_dedup_devices(enumerator);
+
+ enumerator->scan_uptodate = true;
+ enumerator->type = DEVICE_ENUMERATION_TYPE_SUBSYSTEMS;
+
+ return r;
+}
+
+_public_ sd_device *sd_device_enumerator_get_subsystem_first(sd_device_enumerator *enumerator) {
+ int r;
+
+ assert_return(enumerator, NULL);
+
+ r = device_enumerator_scan_subsystems(enumerator);
+ if (r < 0)
+ return NULL;
+
+ enumerator->current_device_index = 0;
+
+ if (enumerator->n_devices == 0)
+ return NULL;
+
+ return enumerator->devices[0];
+}
+
+_public_ sd_device *sd_device_enumerator_get_subsystem_next(sd_device_enumerator *enumerator) {
+ assert_return(enumerator, NULL);
+
+ if (!enumerator->scan_uptodate ||
+ enumerator->type != DEVICE_ENUMERATION_TYPE_SUBSYSTEMS ||
+ enumerator->current_device_index + 1 >= enumerator->n_devices)
+ return NULL;
+
+ return enumerator->devices[++enumerator->current_device_index];
+}
+
+sd_device *device_enumerator_get_first(sd_device_enumerator *enumerator) {
+ assert_return(enumerator, NULL);
+
+ if (!enumerator->scan_uptodate)
+ return NULL;
+
+ enumerator->current_device_index = 0;
+
+ if (enumerator->n_devices == 0)
+ return NULL;
+
+ return enumerator->devices[0];
+}
+
+sd_device *device_enumerator_get_next(sd_device_enumerator *enumerator) {
+ assert_return(enumerator, NULL);
+
+ if (!enumerator->scan_uptodate ||
+ enumerator->current_device_index + 1 >= enumerator->n_devices)
+ return NULL;
+
+ return enumerator->devices[++enumerator->current_device_index];
+}
+
+sd_device **device_enumerator_get_devices(sd_device_enumerator *enumerator, size_t *ret_n_devices) {
+ assert(enumerator);
+ assert(ret_n_devices);
+
+ if (!enumerator->scan_uptodate)
+ return NULL;
+
+ *ret_n_devices = enumerator->n_devices;
+ return enumerator->devices;
+}
diff --git a/src/libsystemd/sd-device/device-internal.h b/src/libsystemd/sd-device/device-internal.h
new file mode 100644
index 0000000..3321c8e
--- /dev/null
+++ b/src/libsystemd/sd-device/device-internal.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-device.h"
+
+#include "device-private.h"
+#include "hashmap.h"
+#include "set.h"
+#include "time-util.h"
+
+#define LATEST_UDEV_DATABASE_VERSION 1
+
+struct sd_device {
+ unsigned n_ref;
+
+ /* The database version indicates the supported features by the udev database.
+ * This is saved and parsed in V field.
+ *
+ * 0: None of the following features are supported (systemd version <= 246).
+ * 1: The current tags (Q) and the database version (V) features are implemented (>= 247).
+ */
+ unsigned database_version;
+
+ int watch_handle;
+
+ sd_device *parent;
+
+ OrderedHashmap *properties;
+ Iterator properties_iterator;
+ uint64_t properties_generation; /* changes whenever the properties are changed */
+ uint64_t properties_iterator_generation; /* generation when iteration was started */
+
+ /* the subset of the properties that should be written to the db */
+ OrderedHashmap *properties_db;
+
+ Hashmap *sysattr_values; /* cached sysattr values */
+
+ Set *sysattrs; /* names of sysattrs */
+ Iterator sysattrs_iterator;
+
+ Set *all_tags, *current_tags;
+ Iterator all_tags_iterator, current_tags_iterator;
+ uint64_t all_tags_iterator_generation, current_tags_iterator_generation; /* generation when iteration was started */
+ uint64_t tags_generation; /* changes whenever the tags are changed */
+
+ Set *devlinks;
+ Iterator devlinks_iterator;
+ uint64_t devlinks_generation; /* changes whenever the devlinks are changed */
+ uint64_t devlinks_iterator_generation; /* generation when iteration was started */
+ int devlink_priority;
+
+ int ifindex;
+ char *devtype;
+ char *devname;
+ dev_t devnum;
+
+ char **properties_strv; /* the properties hashmap as a strv */
+ uint8_t *properties_nulstr; /* the same as a nulstr */
+ size_t properties_nulstr_len;
+
+ char *syspath;
+ const char *devpath;
+ const char *sysnum;
+ char *sysname;
+
+ char *subsystem;
+ char *driver_subsystem; /* only set for the 'drivers' subsystem */
+ char *driver;
+
+ char *id_filename;
+
+ uint64_t usec_initialized;
+
+ mode_t devmode;
+ uid_t devuid;
+ gid_t devgid;
+
+ /* only set when device is passed through netlink */
+ DeviceAction action;
+ uint64_t seqnum;
+
+ bool parent_set:1; /* no need to try to reload parent */
+ bool sysattrs_read:1; /* don't try to re-read sysattrs once read */
+ bool property_tags_outdated:1; /* need to update TAGS= or CURRENT_TAGS= property */
+ bool property_devlinks_outdated:1; /* need to update DEVLINKS= property */
+ bool properties_buf_outdated:1; /* need to reread hashmap */
+ bool sysname_set:1; /* don't reread sysname */
+ bool subsystem_set:1; /* don't reread subsystem */
+ bool driver_subsystem_set:1; /* don't reread subsystem */
+ bool driver_set:1; /* don't reread driver */
+ bool uevent_loaded:1; /* don't reread uevent */
+ bool db_loaded; /* don't reread db */
+
+ bool is_initialized:1;
+ bool sealed:1; /* don't read more information from uevent/db */
+ bool db_persist:1; /* don't clean up the db when switching from initrd to real root */
+};
+
+int device_new_aux(sd_device **ret);
+int device_add_property_aux(sd_device *device, const char *key, const char *value, bool db);
+static inline int device_add_property_internal(sd_device *device, const char *key, const char *value) {
+ return device_add_property_aux(device, key, value, false);
+}
+int device_read_uevent_file(sd_device *device);
+
+int device_set_syspath(sd_device *device, const char *_syspath, bool verify);
+int device_set_ifindex(sd_device *device, const char *ifindex);
+int device_set_devmode(sd_device *device, const char *devmode);
+int device_set_devname(sd_device *device, const char *devname);
+int device_set_devtype(sd_device *device, const char *devtype);
+int device_set_devnum(sd_device *device, const char *major, const char *minor);
+int device_set_subsystem(sd_device *device, const char *_subsystem);
+int device_set_driver(sd_device *device, const char *_driver);
+int device_set_usec_initialized(sd_device *device, usec_t when);
diff --git a/src/libsystemd/sd-device/device-monitor-private.h b/src/libsystemd/sd-device/device-monitor-private.h
new file mode 100644
index 0000000..2ca3a31
--- /dev/null
+++ b/src/libsystemd/sd-device/device-monitor-private.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-device.h"
+
+typedef enum MonitorNetlinkGroup {
+ MONITOR_GROUP_NONE,
+ MONITOR_GROUP_KERNEL,
+ MONITOR_GROUP_UDEV,
+ _MONITOR_NETLINK_GROUP_MAX,
+ _MONITOR_NETLINK_GROUP_INVALID = -1,
+} MonitorNetlinkGroup;
+
+int device_monitor_new_full(sd_device_monitor **ret, MonitorNetlinkGroup group, int fd);
+int device_monitor_disconnect(sd_device_monitor *m);
+int device_monitor_allow_unicast_sender(sd_device_monitor *m, sd_device_monitor *sender);
+int device_monitor_enable_receiving(sd_device_monitor *m);
+int device_monitor_get_fd(sd_device_monitor *m);
+int device_monitor_send_device(sd_device_monitor *m, sd_device_monitor *destination, sd_device *device);
+int device_monitor_receive_device(sd_device_monitor *m, sd_device **ret);
diff --git a/src/libsystemd/sd-device/device-monitor.c b/src/libsystemd/sd-device/device-monitor.c
new file mode 100644
index 0000000..fd59007
--- /dev/null
+++ b/src/libsystemd/sd-device/device-monitor.c
@@ -0,0 +1,772 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <linux/filter.h>
+#include <linux/netlink.h>
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+#include "sd-event.h"
+
+#include "MurmurHash2.h"
+#include "alloc-util.h"
+#include "device-monitor-private.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "hashmap.h"
+#include "io-util.h"
+#include "missing_socket.h"
+#include "mountpoint-util.h"
+#include "set.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+struct sd_device_monitor {
+ unsigned n_ref;
+
+ int sock;
+ union sockaddr_union snl;
+ union sockaddr_union snl_trusted_sender;
+ bool bound;
+
+ Hashmap *subsystem_filter;
+ Set *tag_filter;
+ bool filter_uptodate;
+
+ sd_event *event;
+ sd_event_source *event_source;
+ sd_device_monitor_handler_t callback;
+ void *userdata;
+};
+
+#define UDEV_MONITOR_MAGIC 0xfeedcafe
+
+typedef struct monitor_netlink_header {
+ /* "libudev" prefix to distinguish libudev and kernel messages */
+ char prefix[8];
+ /* Magic to protect against daemon <-> Library message format mismatch
+ * Used in the kernel from socket filter rules; needs to be stored in network order */
+ unsigned magic;
+ /* Total length of header structure known to the sender */
+ unsigned header_size;
+ /* Properties string buffer */
+ unsigned properties_off;
+ unsigned properties_len;
+ /* Hashes of primary device properties strings, to let libudev subscribers
+ * use in-kernel socket filters; values need to be stored in network order */
+ unsigned filter_subsystem_hash;
+ unsigned filter_devtype_hash;
+ unsigned filter_tag_bloom_hi;
+ unsigned filter_tag_bloom_lo;
+} monitor_netlink_header;
+
+static int monitor_set_nl_address(sd_device_monitor *m) {
+ union sockaddr_union snl;
+ socklen_t addrlen;
+
+ assert(m);
+
+ /* Get the address the kernel has assigned us.
+ * It is usually, but not necessarily the pid. */
+ addrlen = sizeof(struct sockaddr_nl);
+ if (getsockname(m->sock, &snl.sa, &addrlen) < 0)
+ return -errno;
+
+ m->snl.nl.nl_pid = snl.nl.nl_pid;
+ return 0;
+}
+
+int device_monitor_allow_unicast_sender(sd_device_monitor *m, sd_device_monitor *sender) {
+ assert_return(m, -EINVAL);
+ assert_return(sender, -EINVAL);
+
+ m->snl_trusted_sender.nl.nl_pid = sender->snl.nl.nl_pid;
+ return 0;
+}
+
+_public_ int sd_device_monitor_set_receive_buffer_size(sd_device_monitor *m, size_t size) {
+ assert_return(m, -EINVAL);
+
+ return fd_set_rcvbuf(m->sock, size, false);
+}
+
+int device_monitor_disconnect(sd_device_monitor *m) {
+ assert(m);
+
+ m->sock = safe_close(m->sock);
+ return 0;
+}
+
+int device_monitor_get_fd(sd_device_monitor *m) {
+ assert_return(m, -EINVAL);
+
+ return m->sock;
+}
+
+int device_monitor_new_full(sd_device_monitor **ret, MonitorNetlinkGroup group, int fd) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *m = NULL;
+ _cleanup_close_ int sock = -1;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(group >= 0 && group < _MONITOR_NETLINK_GROUP_MAX, -EINVAL);
+
+ if (group == MONITOR_GROUP_UDEV &&
+ access("/run/udev/control", F_OK) < 0 &&
+ dev_is_devtmpfs() <= 0) {
+
+ /*
+ * We do not support subscribing to uevents if no instance of
+ * udev is running. Uevents would otherwise broadcast the
+ * processing data of the host into containers, which is not
+ * desired.
+ *
+ * Containers will currently not get any udev uevents, until
+ * a supporting infrastructure is available.
+ *
+ * We do not set a netlink multicast group here, so the socket
+ * will not receive any messages.
+ */
+
+ log_debug("sd-device-monitor: The udev service seems not to be active, disabling the monitor");
+ group = MONITOR_GROUP_NONE;
+ }
+
+ if (fd < 0) {
+ sock = socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_KOBJECT_UEVENT);
+ if (sock < 0)
+ return log_debug_errno(errno, "sd-device-monitor: Failed to create socket: %m");
+ }
+
+ m = new(sd_device_monitor, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (sd_device_monitor) {
+ .n_ref = 1,
+ .sock = fd >= 0 ? fd : TAKE_FD(sock),
+ .bound = fd >= 0,
+ .snl.nl.nl_family = AF_NETLINK,
+ .snl.nl.nl_groups = group,
+ };
+
+ if (fd >= 0) {
+ r = monitor_set_nl_address(m);
+ if (r < 0) {
+ log_debug_errno(r, "sd-device-monitor: Failed to set netlink address: %m");
+ goto fail;
+ }
+ }
+
+ if (DEBUG_LOGGING) {
+ _cleanup_close_ int netns = -1;
+
+ /* So here's the thing: only AF_NETLINK sockets from the main network namespace will get
+ * hardware events. Let's check if ours is from there, and if not generate a debug message,
+ * since we cannot possibly work correctly otherwise. This is just a safety check to make
+ * things easier to debug. */
+
+ netns = ioctl(m->sock, SIOCGSKNS);
+ if (netns < 0)
+ log_debug_errno(errno, "sd-device-monitor: Unable to get network namespace of udev netlink socket, unable to determine if we are in host netns: %m");
+ else {
+ struct stat a, b;
+
+ if (fstat(netns, &a) < 0) {
+ r = log_debug_errno(errno, "sd-device-monitor: Failed to stat netns of udev netlink socket: %m");
+ goto fail;
+ }
+
+ if (stat("/proc/1/ns/net", &b) < 0) {
+ if (ERRNO_IS_PRIVILEGE(errno))
+ /* If we can't access PID1's netns info due to permissions, it's fine, this is a
+ * safety check only after all. */
+ log_debug_errno(errno, "sd-device-monitor: No permission to stat PID1's netns, unable to determine if we are in host netns: %m");
+ else
+ log_debug_errno(errno, "sd-device-monitor: Failed to stat PID1's netns: %m");
+
+ } else if (a.st_dev != b.st_dev || a.st_ino != b.st_ino)
+ log_debug("sd-device-monitor: Netlink socket we listen on is not from host netns, we won't see device events.");
+ }
+ }
+
+ *ret = TAKE_PTR(m);
+ return 0;
+
+fail:
+ /* Let's unset the socket fd in the monitor object before we destroy it so that the fd passed in is
+ * not closed on failure. */
+ if (fd >= 0)
+ m->sock = -1;
+
+ return r;
+}
+
+_public_ int sd_device_monitor_new(sd_device_monitor **ret) {
+ return device_monitor_new_full(ret, MONITOR_GROUP_UDEV, -1);
+}
+
+_public_ int sd_device_monitor_stop(sd_device_monitor *m) {
+ assert_return(m, -EINVAL);
+
+ m->event_source = sd_event_source_unref(m->event_source);
+ (void) device_monitor_disconnect(m);
+
+ return 0;
+}
+
+static int device_monitor_event_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ sd_device_monitor *m = userdata;
+
+ assert(m);
+
+ if (device_monitor_receive_device(m, &device) <= 0)
+ return 0;
+
+ if (m->callback)
+ return m->callback(m, device, m->userdata);
+
+ return 0;
+}
+
+_public_ int sd_device_monitor_start(sd_device_monitor *m, sd_device_monitor_handler_t callback, void *userdata) {
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ if (!m->event) {
+ r = sd_device_monitor_attach_event(m, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ r = device_monitor_enable_receiving(m);
+ if (r < 0)
+ return r;
+
+ m->callback = callback;
+ m->userdata = userdata;
+
+ r = sd_event_add_io(m->event, &m->event_source, m->sock, EPOLLIN, device_monitor_event_handler, m);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(m->event_source, "sd-device-monitor");
+
+ return 0;
+}
+
+_public_ int sd_device_monitor_detach_event(sd_device_monitor *m) {
+ assert_return(m, -EINVAL);
+
+ (void) sd_device_monitor_stop(m);
+ m->event = sd_event_unref(m->event);
+
+ return 0;
+}
+
+_public_ int sd_device_monitor_attach_event(sd_device_monitor *m, sd_event *event) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->event, -EBUSY);
+
+ if (event)
+ m->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+_public_ sd_event *sd_device_monitor_get_event(sd_device_monitor *m) {
+ assert_return(m, NULL);
+
+ return m->event;
+}
+
+_public_ sd_event_source *sd_device_monitor_get_event_source(sd_device_monitor *m) {
+ assert_return(m, NULL);
+
+ return m->event_source;
+}
+
+int device_monitor_enable_receiving(sd_device_monitor *m) {
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = sd_device_monitor_filter_update(m);
+ if (r < 0)
+ return log_debug_errno(r, "sd-device-monitor: Failed to update filter: %m");
+
+ if (!m->bound) {
+ /* enable receiving of sender credentials */
+ r = setsockopt_int(m->sock, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return log_debug_errno(r, "sd-device-monitor: Failed to set socket option SO_PASSCRED: %m");
+
+ if (bind(m->sock, &m->snl.sa, sizeof(struct sockaddr_nl)) < 0)
+ return log_debug_errno(errno, "sd-device-monitor: Failed to bind monitoring socket: %m");
+
+ m->bound = true;
+
+ r = monitor_set_nl_address(m);
+ if (r < 0)
+ return log_debug_errno(r, "sd-device-monitor: Failed to set address: %m");
+ }
+
+ return 0;
+}
+
+static sd_device_monitor *device_monitor_free(sd_device_monitor *m) {
+ assert(m);
+
+ (void) sd_device_monitor_detach_event(m);
+
+ hashmap_free_free_free(m->subsystem_filter);
+ set_free_free(m->tag_filter);
+
+ return mfree(m);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_device_monitor, sd_device_monitor, device_monitor_free);
+
+static int passes_filter(sd_device_monitor *m, sd_device *device) {
+ const char *tag, *subsystem, *devtype, *s, *d = NULL;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(device, -EINVAL);
+
+ if (hashmap_isempty(m->subsystem_filter))
+ goto tag;
+
+ r = sd_device_get_subsystem(device, &s);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_devtype(device, &d);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ HASHMAP_FOREACH_KEY(devtype, subsystem, m->subsystem_filter) {
+ if (!streq(s, subsystem))
+ continue;
+
+ if (!devtype)
+ goto tag;
+
+ if (!d)
+ continue;
+
+ if (streq(d, devtype))
+ goto tag;
+ }
+
+ return 0;
+
+tag:
+ if (set_isempty(m->tag_filter))
+ return 1;
+
+ SET_FOREACH(tag, m->tag_filter)
+ if (sd_device_has_tag(device, tag) > 0)
+ return 1;
+
+ return 0;
+}
+
+int device_monitor_receive_device(sd_device_monitor *m, sd_device **ret) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ union {
+ monitor_netlink_header nlh;
+ char raw[8192];
+ } buf;
+ struct iovec iov = {
+ .iov_base = &buf,
+ .iov_len = sizeof(buf)
+ };
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control;
+ union sockaddr_union snl;
+ struct msghdr smsg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_name = &snl,
+ .msg_namelen = sizeof(snl),
+ };
+ struct cmsghdr *cmsg;
+ struct ucred *cred;
+ ssize_t buflen, bufpos;
+ bool is_initialized = false;
+ int r;
+
+ assert(ret);
+
+ buflen = recvmsg(m->sock, &smsg, 0);
+ if (buflen < 0) {
+ if (errno != EINTR)
+ log_debug_errno(errno, "sd-device-monitor: Failed to receive message: %m");
+ return -errno;
+ }
+
+ if (buflen < 32 || (smsg.msg_flags & MSG_TRUNC))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "sd-device-monitor: Invalid message length.");
+
+ if (snl.nl.nl_groups == MONITOR_GROUP_NONE) {
+ /* unicast message, check if we trust the sender */
+ if (m->snl_trusted_sender.nl.nl_pid == 0 ||
+ snl.nl.nl_pid != m->snl_trusted_sender.nl.nl_pid)
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "sd-device-monitor: Unicast netlink message ignored.");
+
+ } else if (snl.nl.nl_groups == MONITOR_GROUP_KERNEL) {
+ if (snl.nl.nl_pid > 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "sd-device-monitor: Multicast kernel netlink message from PID %"PRIu32" ignored.", snl.nl.nl_pid);
+ }
+
+ cmsg = CMSG_FIRSTHDR(&smsg);
+ if (!cmsg || cmsg->cmsg_type != SCM_CREDENTIALS)
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "sd-device-monitor: No sender credentials received, message ignored.");
+
+ cred = (struct ucred*) CMSG_DATA(cmsg);
+ if (cred->uid != 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "sd-device-monitor: Sender uid="UID_FMT", message ignored.", cred->uid);
+
+ if (streq(buf.raw, "libudev")) {
+ /* udev message needs proper version magic */
+ if (buf.nlh.magic != htobe32(UDEV_MONITOR_MAGIC))
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "sd-device-monitor: Invalid message signature (%x != %x)",
+ buf.nlh.magic, htobe32(UDEV_MONITOR_MAGIC));
+
+ if (buf.nlh.properties_off+32 > (size_t) buflen)
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "sd-device-monitor: Invalid message length (%u > %zd)",
+ buf.nlh.properties_off+32, buflen);
+
+ bufpos = buf.nlh.properties_off;
+
+ /* devices received from udev are always initialized */
+ is_initialized = true;
+
+ } else {
+ /* kernel message with header */
+ bufpos = strlen(buf.raw) + 1;
+ if ((size_t) bufpos < sizeof("a@/d") || bufpos >= buflen)
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "sd-device-monitor: Invalid message length");
+
+ /* check message header */
+ if (!strstr(buf.raw, "@/"))
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "sd-device-monitor: Invalid message header");
+ }
+
+ r = device_new_from_nulstr(&device, (uint8_t*) &buf.raw[bufpos], buflen - bufpos);
+ if (r < 0)
+ return log_debug_errno(r, "sd-device-monitor: Failed to create device from received message: %m");
+
+ if (is_initialized)
+ device_set_is_initialized(device);
+
+ /* Skip device, if it does not pass the current filter */
+ r = passes_filter(m, device);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device-monitor: Failed to check received device passing filter: %m");
+ if (r == 0)
+ log_device_debug(device, "sd-device-monitor: Received device does not pass filter, ignoring");
+ else
+ *ret = TAKE_PTR(device);
+
+ return r;
+}
+
+static uint32_t string_hash32(const char *str) {
+ return MurmurHash2(str, strlen(str), 0);
+}
+
+/* Get a bunch of bit numbers out of the hash, and set the bits in our bit field */
+static uint64_t string_bloom64(const char *str) {
+ uint64_t bits = 0;
+ uint32_t hash = string_hash32(str);
+
+ bits |= 1LLU << (hash & 63);
+ bits |= 1LLU << ((hash >> 6) & 63);
+ bits |= 1LLU << ((hash >> 12) & 63);
+ bits |= 1LLU << ((hash >> 18) & 63);
+ return bits;
+}
+
+int device_monitor_send_device(
+ sd_device_monitor *m,
+ sd_device_monitor *destination,
+ sd_device *device) {
+
+ monitor_netlink_header nlh = {
+ .prefix = "libudev",
+ .magic = htobe32(UDEV_MONITOR_MAGIC),
+ .header_size = sizeof nlh,
+ };
+ struct iovec iov[2] = {
+ { .iov_base = &nlh, .iov_len = sizeof nlh },
+ };
+ struct msghdr smsg = {
+ .msg_iov = iov,
+ .msg_iovlen = 2,
+ };
+ /* default destination for sending */
+ union sockaddr_union default_destination = {
+ .nl.nl_family = AF_NETLINK,
+ .nl.nl_groups = MONITOR_GROUP_UDEV,
+ };
+ uint64_t tag_bloom_bits;
+ const char *buf, *val;
+ ssize_t count;
+ size_t blen;
+ int r;
+
+ assert(m);
+ assert(device);
+
+ r = device_get_properties_nulstr(device, (const uint8_t **) &buf, &blen);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device-monitor: Failed to get device properties: %m");
+ if (blen < 32)
+ log_device_debug_errno(device, SYNTHETIC_ERRNO(EINVAL),
+ "sd-device-monitor: Length of device property nulstr is too small to contain valid device information");
+
+ /* fill in versioned header */
+ r = sd_device_get_subsystem(device, &val);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device-monitor: Failed to get device subsystem: %m");
+ nlh.filter_subsystem_hash = htobe32(string_hash32(val));
+
+ if (sd_device_get_devtype(device, &val) >= 0)
+ nlh.filter_devtype_hash = htobe32(string_hash32(val));
+
+ /* add tag bloom filter */
+ tag_bloom_bits = 0;
+ FOREACH_DEVICE_TAG(device, val)
+ tag_bloom_bits |= string_bloom64(val);
+
+ if (tag_bloom_bits > 0) {
+ nlh.filter_tag_bloom_hi = htobe32(tag_bloom_bits >> 32);
+ nlh.filter_tag_bloom_lo = htobe32(tag_bloom_bits & 0xffffffff);
+ }
+
+ /* add properties list */
+ nlh.properties_off = iov[0].iov_len;
+ nlh.properties_len = blen;
+ iov[1] = IOVEC_MAKE((char*) buf, blen);
+
+ /*
+ * Use custom address for target, or the default one.
+ *
+ * If we send to a multicast group, we will get
+ * ECONNREFUSED, which is expected.
+ */
+ smsg.msg_name = destination ? &destination->snl : &default_destination;
+ smsg.msg_namelen = sizeof(struct sockaddr_nl);
+ count = sendmsg(m->sock, &smsg, 0);
+ if (count < 0) {
+ if (!destination && errno == ECONNREFUSED) {
+ log_device_debug(device, "sd-device-monitor: Passed to netlink monitor");
+ return 0;
+ } else
+ return log_device_debug_errno(device, errno, "sd-device-monitor: Failed to send device to netlink monitor: %m");
+ }
+
+ log_device_debug(device, "sd-device-monitor: Passed %zi byte to netlink monitor", count);
+ return count;
+}
+
+static void bpf_stmt(struct sock_filter *ins, unsigned *i,
+ unsigned short code, unsigned data) {
+ ins[(*i)++] = (struct sock_filter) {
+ .code = code,
+ .k = data,
+ };
+}
+
+static void bpf_jmp(struct sock_filter *ins, unsigned *i,
+ unsigned short code, unsigned data,
+ unsigned short jt, unsigned short jf) {
+ ins[(*i)++] = (struct sock_filter) {
+ .code = code,
+ .jt = jt,
+ .jf = jf,
+ .k = data,
+ };
+}
+
+_public_ int sd_device_monitor_filter_update(sd_device_monitor *m) {
+ struct sock_filter ins[512] = {};
+ struct sock_fprog filter;
+ const char *subsystem, *devtype, *tag;
+ unsigned i = 0;
+
+ assert_return(m, -EINVAL);
+
+ if (m->filter_uptodate)
+ return 0;
+
+ if (hashmap_isempty(m->subsystem_filter) &&
+ set_isempty(m->tag_filter)) {
+ m->filter_uptodate = true;
+ return 0;
+ }
+
+ /* load magic in A */
+ bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, magic));
+ /* jump if magic matches */
+ bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, UDEV_MONITOR_MAGIC, 1, 0);
+ /* wrong magic, pass packet */
+ bpf_stmt(ins, &i, BPF_RET|BPF_K, 0xffffffff);
+
+ if (!set_isempty(m->tag_filter)) {
+ int tag_matches = set_size(m->tag_filter);
+
+ /* add all tags matches */
+ SET_FOREACH(tag, m->tag_filter) {
+ uint64_t tag_bloom_bits = string_bloom64(tag);
+ uint32_t tag_bloom_hi = tag_bloom_bits >> 32;
+ uint32_t tag_bloom_lo = tag_bloom_bits & 0xffffffff;
+
+ /* load device bloom bits in A */
+ bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_tag_bloom_hi));
+ /* clear bits (tag bits & bloom bits) */
+ bpf_stmt(ins, &i, BPF_ALU|BPF_AND|BPF_K, tag_bloom_hi);
+ /* jump to next tag if it does not match */
+ bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, tag_bloom_hi, 0, 3);
+
+ /* load device bloom bits in A */
+ bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_tag_bloom_lo));
+ /* clear bits (tag bits & bloom bits) */
+ bpf_stmt(ins, &i, BPF_ALU|BPF_AND|BPF_K, tag_bloom_lo);
+ /* jump behind end of tag match block if tag matches */
+ tag_matches--;
+ bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, tag_bloom_lo, 1 + (tag_matches * 6), 0);
+ }
+
+ /* nothing matched, drop packet */
+ bpf_stmt(ins, &i, BPF_RET|BPF_K, 0);
+ }
+
+ /* add all subsystem matches */
+ if (!hashmap_isempty(m->subsystem_filter)) {
+ HASHMAP_FOREACH_KEY(devtype, subsystem, m->subsystem_filter) {
+ uint32_t hash = string_hash32(subsystem);
+
+ /* load device subsystem value in A */
+ bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_subsystem_hash));
+ if (!devtype) {
+ /* jump if subsystem does not match */
+ bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, hash, 0, 1);
+ } else {
+ /* jump if subsystem does not match */
+ bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, hash, 0, 3);
+ /* load device devtype value in A */
+ bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_devtype_hash));
+ /* jump if value does not match */
+ hash = string_hash32(devtype);
+ bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, hash, 0, 1);
+ }
+
+ /* matched, pass packet */
+ bpf_stmt(ins, &i, BPF_RET|BPF_K, 0xffffffff);
+
+ if (i+1 >= ELEMENTSOF(ins))
+ return -E2BIG;
+ }
+
+ /* nothing matched, drop packet */
+ bpf_stmt(ins, &i, BPF_RET|BPF_K, 0);
+ }
+
+ /* matched, pass packet */
+ bpf_stmt(ins, &i, BPF_RET|BPF_K, 0xffffffff);
+
+ /* install filter */
+ filter = (struct sock_fprog) {
+ .len = i,
+ .filter = ins,
+ };
+ if (setsockopt(m->sock, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter)) < 0)
+ return -errno;
+
+ m->filter_uptodate = true;
+ return 0;
+}
+
+_public_ int sd_device_monitor_filter_add_match_subsystem_devtype(sd_device_monitor *m, const char *subsystem, const char *devtype) {
+ _cleanup_free_ char *s = NULL, *d = NULL;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(subsystem, -EINVAL);
+
+ s = strdup(subsystem);
+ if (!s)
+ return -ENOMEM;
+
+ if (devtype) {
+ d = strdup(devtype);
+ if (!d)
+ return -ENOMEM;
+ }
+
+ r = hashmap_ensure_allocated(&m->subsystem_filter, NULL);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(m->subsystem_filter, s, d);
+ if (r < 0)
+ return r;
+
+ s = d = NULL;
+ m->filter_uptodate = false;
+
+ return 0;
+}
+
+_public_ int sd_device_monitor_filter_add_match_tag(sd_device_monitor *m, const char *tag) {
+ assert_return(m, -EINVAL);
+ assert_return(tag, -EINVAL);
+
+ int r = set_put_strdup(&m->tag_filter, tag);
+ if (r > 0)
+ m->filter_uptodate = false;
+ return r;
+}
+
+_public_ int sd_device_monitor_filter_remove(sd_device_monitor *m) {
+ static const struct sock_fprog filter = { 0, NULL };
+
+ assert_return(m, -EINVAL);
+
+ m->subsystem_filter = hashmap_free_free_free(m->subsystem_filter);
+ m->tag_filter = set_free_free(m->tag_filter);
+
+ if (setsockopt(m->sock, SOL_SOCKET, SO_DETACH_FILTER, &filter, sizeof(filter)) < 0)
+ return -errno;
+
+ m->filter_uptodate = true;
+ return 0;
+}
diff --git a/src/libsystemd/sd-device/device-private.c b/src/libsystemd/sd-device/device-private.c
new file mode 100644
index 0000000..9070dfb
--- /dev/null
+++ b/src/libsystemd/sd-device/device-private.c
@@ -0,0 +1,1017 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ctype.h>
+#include <net/if.h>
+#include <sys/types.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-internal.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "nulstr-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "set.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+
+int device_add_property(sd_device *device, const char *key, const char *value) {
+ int r;
+
+ assert(device);
+ assert(key);
+
+ r = device_add_property_aux(device, key, value, false);
+ if (r < 0)
+ return r;
+
+ if (key[0] != '.') {
+ r = device_add_property_aux(device, key, value, true);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+void device_set_devlink_priority(sd_device *device, int priority) {
+ assert(device);
+
+ device->devlink_priority = priority;
+}
+
+void device_set_is_initialized(sd_device *device) {
+ assert(device);
+
+ device->is_initialized = true;
+}
+
+int device_ensure_usec_initialized(sd_device *device, sd_device *device_old) {
+ usec_t when;
+
+ assert(device);
+
+ if (device_old && device_old->usec_initialized > 0)
+ when = device_old->usec_initialized;
+ else
+ when = now(CLOCK_MONOTONIC);
+
+ return device_set_usec_initialized(device, when);
+}
+
+uint64_t device_get_properties_generation(sd_device *device) {
+ assert(device);
+
+ return device->properties_generation;
+}
+
+uint64_t device_get_tags_generation(sd_device *device) {
+ assert(device);
+
+ return device->tags_generation;
+}
+
+uint64_t device_get_devlinks_generation(sd_device *device) {
+ assert(device);
+
+ return device->devlinks_generation;
+}
+
+int device_get_devnode_mode(sd_device *device, mode_t *mode) {
+ int r;
+
+ assert(device);
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ if (device->devmode == (mode_t) -1)
+ return -ENOENT;
+
+ if (mode)
+ *mode = device->devmode;
+
+ return 0;
+}
+
+int device_get_devnode_uid(sd_device *device, uid_t *uid) {
+ int r;
+
+ assert(device);
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ if (device->devuid == (uid_t) -1)
+ return -ENOENT;
+
+ if (uid)
+ *uid = device->devuid;
+
+ return 0;
+}
+
+static int device_set_devuid(sd_device *device, const char *uid) {
+ unsigned u;
+ int r;
+
+ assert(device);
+ assert(uid);
+
+ r = safe_atou(uid, &u);
+ if (r < 0)
+ return r;
+
+ r = device_add_property_internal(device, "DEVUID", uid);
+ if (r < 0)
+ return r;
+
+ device->devuid = u;
+
+ return 0;
+}
+
+int device_get_devnode_gid(sd_device *device, gid_t *gid) {
+ int r;
+
+ assert(device);
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ if (device->devgid == (gid_t) -1)
+ return -ENOENT;
+
+ if (gid)
+ *gid = device->devgid;
+
+ return 0;
+}
+
+static int device_set_devgid(sd_device *device, const char *gid) {
+ unsigned g;
+ int r;
+
+ assert(device);
+ assert(gid);
+
+ r = safe_atou(gid, &g);
+ if (r < 0)
+ return r;
+
+ r = device_add_property_internal(device, "DEVGID", gid);
+ if (r < 0)
+ return r;
+
+ device->devgid = g;
+
+ return 0;
+}
+
+int device_get_action(sd_device *device, DeviceAction *action) {
+ assert(device);
+
+ if (device->action < 0)
+ return -ENOENT;
+
+ if (action)
+ *action = device->action;
+
+ return 0;
+}
+
+static int device_set_action(sd_device *device, const char *action) {
+ DeviceAction a;
+ int r;
+
+ assert(device);
+ assert(action);
+
+ a = device_action_from_string(action);
+ if (a < 0)
+ return -EINVAL;
+
+ r = device_add_property_internal(device, "ACTION", action);
+ if (r < 0)
+ return r;
+
+ device->action = a;
+
+ return 0;
+}
+
+int device_get_seqnum(sd_device *device, uint64_t *seqnum) {
+ assert(device);
+
+ if (device->seqnum == 0)
+ return -ENOENT;
+
+ if (seqnum)
+ *seqnum = device->seqnum;
+
+ return 0;
+}
+
+static int device_set_seqnum(sd_device *device, const char *str) {
+ uint64_t seqnum;
+ int r;
+
+ assert(device);
+ assert(str);
+
+ r = safe_atou64(str, &seqnum);
+ if (r < 0)
+ return r;
+ if (seqnum == 0)
+ return -EINVAL;
+
+ r = device_add_property_internal(device, "SEQNUM", str);
+ if (r < 0)
+ return r;
+
+ device->seqnum = seqnum;
+
+ return 0;
+}
+
+static int device_amend(sd_device *device, const char *key, const char *value) {
+ int r;
+
+ assert(device);
+ assert(key);
+ assert(value);
+
+ if (streq(key, "DEVPATH")) {
+ char *path;
+
+ path = strjoina("/sys", value);
+
+ /* the caller must verify or trust this data (e.g., if it comes from the kernel) */
+ r = device_set_syspath(device, path, false);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set syspath to '%s': %m", path);
+ } else if (streq(key, "SUBSYSTEM")) {
+ r = device_set_subsystem(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set subsystem to '%s': %m", value);
+ } else if (streq(key, "DEVTYPE")) {
+ r = device_set_devtype(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devtype to '%s': %m", value);
+ } else if (streq(key, "DEVNAME")) {
+ r = device_set_devname(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devname to '%s': %m", value);
+ } else if (streq(key, "USEC_INITIALIZED")) {
+ usec_t t;
+
+ r = safe_atou64(value, &t);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to parse timestamp '%s': %m", value);
+
+ r = device_set_usec_initialized(device, t);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set usec-initialized to '%s': %m", value);
+ } else if (streq(key, "DRIVER")) {
+ r = device_set_driver(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set driver to '%s': %m", value);
+ } else if (streq(key, "IFINDEX")) {
+ r = device_set_ifindex(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set ifindex to '%s': %m", value);
+ } else if (streq(key, "DEVMODE")) {
+ r = device_set_devmode(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devmode to '%s': %m", value);
+ } else if (streq(key, "DEVUID")) {
+ r = device_set_devuid(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devuid to '%s': %m", value);
+ } else if (streq(key, "DEVGID")) {
+ r = device_set_devgid(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devgid to '%s': %m", value);
+ } else if (streq(key, "ACTION")) {
+ r = device_set_action(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set action to '%s': %m", value);
+ } else if (streq(key, "SEQNUM")) {
+ r = device_set_seqnum(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set SEQNUM to '%s': %m", value);
+ } else if (streq(key, "DEVLINKS")) {
+ for (const char *p = value;;) {
+ _cleanup_free_ char *word = NULL;
+
+ /* udev rules may set escaped strings, and sd-device does not modify the input
+ * strings. So, it is also necessary to keep the strings received through
+ * sd-device-monitor. */
+ r = extract_first_word(&p, &word, NULL, EXTRACT_RETAIN_ESCAPE);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = device_add_devlink(device, word);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to add devlink '%s': %m", word);
+ }
+ } else if (STR_IN_SET(key, "TAGS", "CURRENT_TAGS")) {
+ for (const char *p = value;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = device_add_tag(device, word, streq(key, "CURRENT_TAGS"));
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to add tag '%s': %m", word);
+ }
+ } else {
+ r = device_add_property_internal(device, key, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to add property '%s=%s': %m", key, value);
+ }
+
+ return 0;
+}
+
+static int device_append(sd_device *device, char *key, const char **_major, const char **_minor) {
+ const char *major = NULL, *minor = NULL;
+ char *value;
+ int r;
+
+ assert(device);
+ assert(key);
+ assert(_major);
+ assert(_minor);
+
+ value = strchr(key, '=');
+ if (!value)
+ return log_device_debug_errno(device, SYNTHETIC_ERRNO(EINVAL),
+ "sd-device: Not a key-value pair: '%s'", key);
+
+ *value = '\0';
+
+ value++;
+
+ if (streq(key, "MAJOR"))
+ major = value;
+ else if (streq(key, "MINOR"))
+ minor = value;
+ else {
+ r = device_amend(device, key, value);
+ if (r < 0)
+ return r;
+ }
+
+ if (major != 0)
+ *_major = major;
+
+ if (minor != 0)
+ *_minor = minor;
+
+ return 0;
+}
+
+void device_seal(sd_device *device) {
+ assert(device);
+
+ device->sealed = true;
+}
+
+static int device_verify(sd_device *device) {
+ assert(device);
+
+ if (!device->devpath || !device->subsystem || device->action < 0 || device->seqnum == 0)
+ return log_device_debug_errno(device, SYNTHETIC_ERRNO(EINVAL),
+ "sd-device: Device created from strv or nulstr lacks devpath, subsystem, action or seqnum.");
+
+ device->sealed = true;
+
+ return 0;
+}
+
+int device_new_from_strv(sd_device **ret, char **strv) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ char **key;
+ const char *major = NULL, *minor = NULL;
+ int r;
+
+ assert(ret);
+ assert(strv);
+
+ r = device_new_aux(&device);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(key, strv) {
+ r = device_append(device, *key, &major, &minor);
+ if (r < 0)
+ return r;
+ }
+
+ if (major) {
+ r = device_set_devnum(device, major, minor);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devnum %s:%s: %m", major, minor);
+ }
+
+ r = device_verify(device);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(device);
+
+ return 0;
+}
+
+int device_new_from_nulstr(sd_device **ret, uint8_t *nulstr, size_t len) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ const char *major = NULL, *minor = NULL;
+ unsigned i = 0;
+ int r;
+
+ assert(ret);
+ assert(nulstr);
+ assert(len);
+
+ r = device_new_aux(&device);
+ if (r < 0)
+ return r;
+
+ while (i < len) {
+ char *key;
+ const char *end;
+
+ key = (char*)&nulstr[i];
+ end = memchr(key, '\0', len - i);
+ if (!end)
+ return log_device_debug_errno(device, SYNTHETIC_ERRNO(EINVAL),
+ "sd-device: Failed to parse nulstr");
+
+ i += end - key + 1;
+
+ /* netlink messages for some devices contain an unwanted newline at the end of value.
+ * Let's drop the newline and remaining characters after the newline. */
+ truncate_nl(key);
+
+ r = device_append(device, key, &major, &minor);
+ if (r < 0)
+ return r;
+ }
+
+ if (major) {
+ r = device_set_devnum(device, major, minor);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devnum %s:%s: %m", major, minor);
+ }
+
+ r = device_verify(device);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(device);
+
+ return 0;
+}
+
+static int device_update_properties_bufs(sd_device *device) {
+ const char *val, *prop;
+ _cleanup_free_ char **buf_strv = NULL;
+ _cleanup_free_ uint8_t *buf_nulstr = NULL;
+ size_t allocated_nulstr = 0;
+ size_t nulstr_len = 0, num = 0, i = 0;
+
+ assert(device);
+
+ if (!device->properties_buf_outdated)
+ return 0;
+
+ FOREACH_DEVICE_PROPERTY(device, prop, val) {
+ size_t len = 0;
+
+ len = strlen(prop) + 1 + strlen(val);
+
+ buf_nulstr = GREEDY_REALLOC0(buf_nulstr, allocated_nulstr, nulstr_len + len + 2);
+ if (!buf_nulstr)
+ return -ENOMEM;
+
+ strscpyl((char *)buf_nulstr + nulstr_len, len + 1, prop, "=", val, NULL);
+ nulstr_len += len + 1;
+ ++num;
+ }
+
+ /* build buf_strv from buf_nulstr */
+ buf_strv = new0(char *, num + 1);
+ if (!buf_strv)
+ return -ENOMEM;
+
+ NULSTR_FOREACH(val, (char*) buf_nulstr) {
+ buf_strv[i] = (char *) val;
+ assert(i < num);
+ i++;
+ }
+
+ free_and_replace(device->properties_nulstr, buf_nulstr);
+ device->properties_nulstr_len = nulstr_len;
+ free_and_replace(device->properties_strv, buf_strv);
+
+ device->properties_buf_outdated = false;
+
+ return 0;
+}
+
+int device_get_properties_nulstr(sd_device *device, const uint8_t **nulstr, size_t *len) {
+ int r;
+
+ assert(device);
+ assert(nulstr);
+ assert(len);
+
+ r = device_update_properties_bufs(device);
+ if (r < 0)
+ return r;
+
+ *nulstr = device->properties_nulstr;
+ *len = device->properties_nulstr_len;
+
+ return 0;
+}
+
+int device_get_properties_strv(sd_device *device, char ***strv) {
+ int r;
+
+ assert(device);
+ assert(strv);
+
+ r = device_update_properties_bufs(device);
+ if (r < 0)
+ return r;
+
+ *strv = device->properties_strv;
+
+ return 0;
+}
+
+int device_get_devlink_priority(sd_device *device, int *priority) {
+ int r;
+
+ assert(device);
+ assert(priority);
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ *priority = device->devlink_priority;
+
+ return 0;
+}
+
+int device_get_watch_handle(sd_device *device, int *handle) {
+ int r;
+
+ assert(device);
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ if (device->watch_handle < 0)
+ return -ENOENT;
+
+ if (handle)
+ *handle = device->watch_handle;
+
+ return 0;
+}
+
+void device_set_watch_handle(sd_device *device, int handle) {
+ assert(device);
+
+ device->watch_handle = handle;
+}
+
+int device_rename(sd_device *device, const char *name) {
+ _cleanup_free_ char *dirname = NULL;
+ const char *new_syspath, *interface;
+ int r;
+
+ assert(device);
+ assert(name);
+
+ dirname = dirname_malloc(device->syspath);
+ if (!dirname)
+ return -ENOMEM;
+
+ new_syspath = prefix_roota(dirname, name);
+
+ /* the user must trust that the new name is correct */
+ r = device_set_syspath(device, new_syspath, false);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_property_value(device, "INTERFACE", &interface);
+ if (r >= 0) {
+ /* like DEVPATH_OLD, INTERFACE_OLD is not saved to the db, but only stays around for the current event */
+ r = device_add_property_internal(device, "INTERFACE_OLD", interface);
+ if (r < 0)
+ return r;
+
+ r = device_add_property_internal(device, "INTERFACE", name);
+ if (r < 0)
+ return r;
+ } else if (r != -ENOENT)
+ return r;
+
+ return 0;
+}
+
+int device_shallow_clone(sd_device *old_device, sd_device **new_device) {
+ _cleanup_(sd_device_unrefp) sd_device *ret = NULL;
+ int r;
+
+ assert(old_device);
+ assert(new_device);
+
+ r = device_new_aux(&ret);
+ if (r < 0)
+ return r;
+
+ r = device_set_syspath(ret, old_device->syspath, false);
+ if (r < 0)
+ return r;
+
+ r = device_set_subsystem(ret, old_device->subsystem);
+ if (r < 0)
+ return r;
+
+ ret->devnum = old_device->devnum;
+
+ *new_device = TAKE_PTR(ret);
+
+ return 0;
+}
+
+int device_clone_with_db(sd_device *old_device, sd_device **new_device) {
+ _cleanup_(sd_device_unrefp) sd_device *ret = NULL;
+ int r;
+
+ assert(old_device);
+ assert(new_device);
+
+ r = device_shallow_clone(old_device, &ret);
+ if (r < 0)
+ return r;
+
+ r = device_read_db(ret);
+ if (r < 0)
+ return r;
+
+ ret->sealed = true;
+
+ *new_device = TAKE_PTR(ret);
+
+ return 0;
+}
+
+int device_new_from_synthetic_event(sd_device **new_device, const char *syspath, const char *action) {
+ _cleanup_(sd_device_unrefp) sd_device *ret = NULL;
+ int r;
+
+ assert(new_device);
+ assert(syspath);
+ assert(action);
+
+ r = sd_device_new_from_syspath(&ret, syspath);
+ if (r < 0)
+ return r;
+
+ r = device_read_uevent_file(ret);
+ if (r < 0)
+ return r;
+
+ r = device_set_action(ret, action);
+ if (r < 0)
+ return r;
+
+ *new_device = TAKE_PTR(ret);
+
+ return 0;
+}
+
+int device_new_from_stat_rdev(sd_device **ret, const struct stat *st) {
+ char type;
+
+ assert(ret);
+ assert(st);
+
+ if (S_ISBLK(st->st_mode))
+ type = 'b';
+ else if (S_ISCHR(st->st_mode))
+ type = 'c';
+ else
+ return -ENOTTY;
+
+ return sd_device_new_from_devnum(ret, type, st->st_rdev);
+}
+
+int device_copy_properties(sd_device *device_dst, sd_device *device_src) {
+ const char *property, *value;
+ int r;
+
+ assert(device_dst);
+ assert(device_src);
+
+ r = device_properties_prepare(device_src);
+ if (r < 0)
+ return r;
+
+ ORDERED_HASHMAP_FOREACH_KEY(value, property, device_src->properties_db) {
+ r = device_add_property_aux(device_dst, property, value, true);
+ if (r < 0)
+ return r;
+ }
+
+ ORDERED_HASHMAP_FOREACH_KEY(value, property, device_src->properties) {
+ r = device_add_property_aux(device_dst, property, value, false);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+void device_cleanup_tags(sd_device *device) {
+ assert(device);
+
+ device->all_tags = set_free_free(device->all_tags);
+ device->current_tags = set_free_free(device->current_tags);
+ device->property_tags_outdated = true;
+ device->tags_generation++;
+}
+
+void device_cleanup_devlinks(sd_device *device) {
+ assert(device);
+
+ set_free_free(device->devlinks);
+ device->devlinks = NULL;
+ device->property_devlinks_outdated = true;
+ device->devlinks_generation++;
+}
+
+void device_remove_tag(sd_device *device, const char *tag) {
+ assert(device);
+ assert(tag);
+
+ free(set_remove(device->current_tags, tag));
+ device->property_tags_outdated = true;
+ device->tags_generation++;
+}
+
+static int device_tag(sd_device *device, const char *tag, bool add) {
+ const char *id;
+ char *path;
+ int r;
+
+ assert(device);
+ assert(tag);
+
+ r = device_get_id_filename(device, &id);
+ if (r < 0)
+ return r;
+
+ path = strjoina("/run/udev/tags/", tag, "/", id);
+
+ if (add) {
+ r = touch_file(path, true, USEC_INFINITY, UID_INVALID, GID_INVALID, 0444);
+ if (r < 0)
+ return r;
+ } else {
+ r = unlink(path);
+ if (r < 0 && errno != ENOENT)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int device_tag_index(sd_device *device, sd_device *device_old, bool add) {
+ const char *tag;
+ int r = 0, k;
+
+ if (add && device_old) {
+ /* delete possible left-over tags */
+ FOREACH_DEVICE_TAG(device_old, tag) {
+ if (!sd_device_has_tag(device, tag)) {
+ k = device_tag(device_old, tag, false);
+ if (r >= 0 && k < 0)
+ r = k;
+ }
+ }
+ }
+
+ FOREACH_DEVICE_TAG(device, tag) {
+ k = device_tag(device, tag, add);
+ if (r >= 0 && k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static bool device_has_info(sd_device *device) {
+ assert(device);
+
+ if (!set_isempty(device->devlinks))
+ return true;
+
+ if (device->devlink_priority != 0)
+ return true;
+
+ if (!ordered_hashmap_isempty(device->properties_db))
+ return true;
+
+ if (!set_isempty(device->all_tags))
+ return true;
+
+ if (!set_isempty(device->current_tags))
+ return true;
+
+ if (device->watch_handle >= 0)
+ return true;
+
+ return false;
+}
+
+void device_set_db_persist(sd_device *device) {
+ assert(device);
+
+ device->db_persist = true;
+}
+
+int device_update_db(sd_device *device) {
+ const char *id;
+ char *path;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *path_tmp = NULL;
+ bool has_info;
+ int r;
+
+ assert(device);
+
+ has_info = device_has_info(device);
+
+ r = device_get_id_filename(device, &id);
+ if (r < 0)
+ return r;
+
+ path = strjoina("/run/udev/data/", id);
+
+ /* do not store anything for otherwise empty devices */
+ if (!has_info && major(device->devnum) == 0 && device->ifindex == 0) {
+ r = unlink(path);
+ if (r < 0 && errno != ENOENT)
+ return -errno;
+
+ return 0;
+ }
+
+ /* write a database file */
+ r = mkdir_parents(path, 0755);
+ if (r < 0)
+ return r;
+
+ r = fopen_temporary(path, &f, &path_tmp);
+ if (r < 0)
+ return r;
+
+ /*
+ * set 'sticky' bit to indicate that we should not clean the
+ * database when we transition from initramfs to the real root
+ */
+ if (device->db_persist) {
+ r = fchmod(fileno(f), 01644);
+ if (r < 0) {
+ r = -errno;
+ goto fail;
+ }
+ } else {
+ r = fchmod(fileno(f), 0644);
+ if (r < 0) {
+ r = -errno;
+ goto fail;
+ }
+ }
+
+ if (has_info) {
+ const char *property, *value, *tag;
+
+ if (major(device->devnum) > 0) {
+ const char *devlink;
+
+ FOREACH_DEVICE_DEVLINK(device, devlink)
+ fprintf(f, "S:%s\n", devlink + STRLEN("/dev/"));
+
+ if (device->devlink_priority != 0)
+ fprintf(f, "L:%i\n", device->devlink_priority);
+
+ if (device->watch_handle >= 0)
+ fprintf(f, "W:%i\n", device->watch_handle);
+ }
+
+ if (device->usec_initialized > 0)
+ fprintf(f, "I:"USEC_FMT"\n", device->usec_initialized);
+
+ ORDERED_HASHMAP_FOREACH_KEY(value, property, device->properties_db)
+ fprintf(f, "E:%s=%s\n", property, value);
+
+ FOREACH_DEVICE_TAG(device, tag)
+ fprintf(f, "G:%s\n", tag); /* Any tag */
+
+ SET_FOREACH(tag, device->current_tags)
+ fprintf(f, "Q:%s\n", tag); /* Current tag */
+
+ /* Always write the latest database version here, instead of the value stored in
+ * device->database_version, as which may be 0. */
+ fputs("V:" STRINGIFY(LATEST_UDEV_DATABASE_VERSION) "\n", f);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ r = rename(path_tmp, path);
+ if (r < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ log_device_debug(device, "sd-device: Created %s file '%s' for '%s'", has_info ? "db" : "empty",
+ path, device->devpath);
+
+ return 0;
+
+fail:
+ (void) unlink(path);
+ (void) unlink(path_tmp);
+
+ return log_device_debug_errno(device, r, "sd-device: Failed to create %s file '%s' for '%s'", has_info ? "db" : "empty", path, device->devpath);
+}
+
+int device_delete_db(sd_device *device) {
+ const char *id;
+ char *path;
+ int r;
+
+ assert(device);
+
+ r = device_get_id_filename(device, &id);
+ if (r < 0)
+ return r;
+
+ path = strjoina("/run/udev/data/", id);
+
+ r = unlink(path);
+ if (r < 0 && errno != ENOENT)
+ return -errno;
+
+ return 0;
+}
+
+static const char* const device_action_table[_DEVICE_ACTION_MAX] = {
+ [DEVICE_ACTION_ADD] = "add",
+ [DEVICE_ACTION_REMOVE] = "remove",
+ [DEVICE_ACTION_CHANGE] = "change",
+ [DEVICE_ACTION_MOVE] = "move",
+ [DEVICE_ACTION_ONLINE] = "online",
+ [DEVICE_ACTION_OFFLINE] = "offline",
+ [DEVICE_ACTION_BIND] = "bind",
+ [DEVICE_ACTION_UNBIND] = "unbind",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(device_action, DeviceAction);
+
+void dump_device_action_table(void) {
+ DUMP_STRING_TABLE(device_action, DeviceAction, _DEVICE_ACTION_MAX);
+}
diff --git a/src/libsystemd/sd-device/device-private.h b/src/libsystemd/sd-device/device-private.h
new file mode 100644
index 0000000..db81934
--- /dev/null
+++ b/src/libsystemd/sd-device/device-private.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "sd-device.h"
+
+#include "macro.h"
+
+typedef enum DeviceAction {
+ DEVICE_ACTION_ADD,
+ DEVICE_ACTION_REMOVE,
+ DEVICE_ACTION_CHANGE,
+ DEVICE_ACTION_MOVE,
+ DEVICE_ACTION_ONLINE,
+ DEVICE_ACTION_OFFLINE,
+ DEVICE_ACTION_BIND,
+ DEVICE_ACTION_UNBIND,
+ _DEVICE_ACTION_MAX,
+ _DEVICE_ACTION_INVALID = -1,
+} DeviceAction;
+
+int device_new_from_nulstr(sd_device **ret, uint8_t *nulstr, size_t len);
+int device_new_from_strv(sd_device **ret, char **strv);
+int device_new_from_stat_rdev(sd_device **ret, const struct stat *st);
+
+int device_get_id_filename(sd_device *device, const char **ret);
+
+int device_get_devlink_priority(sd_device *device, int *priority);
+int device_get_watch_handle(sd_device *device, int *handle);
+int device_get_devnode_mode(sd_device *device, mode_t *mode);
+int device_get_devnode_uid(sd_device *device, uid_t *uid);
+int device_get_devnode_gid(sd_device *device, gid_t *gid);
+int device_get_action(sd_device *device, DeviceAction *action);
+int device_get_seqnum(sd_device *device, uint64_t *seqnum);
+
+void device_seal(sd_device *device);
+void device_set_is_initialized(sd_device *device);
+void device_set_watch_handle(sd_device *device, int fd);
+void device_set_db_persist(sd_device *device);
+void device_set_devlink_priority(sd_device *device, int priority);
+int device_ensure_usec_initialized(sd_device *device, sd_device *device_old);
+int device_add_devlink(sd_device *device, const char *devlink);
+int device_add_property(sd_device *device, const char *property, const char *value);
+int device_add_tag(sd_device *device, const char *tag, bool both);
+void device_remove_tag(sd_device *device, const char *tag);
+void device_cleanup_tags(sd_device *device);
+void device_cleanup_devlinks(sd_device *device);
+
+uint64_t device_get_properties_generation(sd_device *device);
+uint64_t device_get_tags_generation(sd_device *device);
+uint64_t device_get_devlinks_generation(sd_device *device);
+
+int device_properties_prepare(sd_device *device);
+int device_get_properties_nulstr(sd_device *device, const uint8_t **nulstr, size_t *len);
+int device_get_properties_strv(sd_device *device, char ***strv);
+
+int device_rename(sd_device *device, const char *name);
+int device_shallow_clone(sd_device *old_device, sd_device **new_device);
+int device_clone_with_db(sd_device *old_device, sd_device **new_device);
+int device_copy_properties(sd_device *device_dst, sd_device *device_src);
+int device_new_from_synthetic_event(sd_device **new_device, const char *syspath, const char *action);
+
+int device_tag_index(sd_device *dev, sd_device *dev_old, bool add);
+int device_update_db(sd_device *device);
+int device_delete_db(sd_device *device);
+int device_read_db_internal_filename(sd_device *device, const char *filename); /* For fuzzer */
+int device_read_db_internal(sd_device *device, bool force);
+static inline int device_read_db(sd_device *device) {
+ return device_read_db_internal(device, false);
+}
+
+DeviceAction device_action_from_string(const char *s) _pure_;
+const char *device_action_to_string(DeviceAction a) _const_;
+void dump_device_action_table(void);
diff --git a/src/libsystemd/sd-device/device-util.h b/src/libsystemd/sd-device/device-util.h
new file mode 100644
index 0000000..1226209
--- /dev/null
+++ b/src/libsystemd/sd-device/device-util.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#define FOREACH_DEVICE_PROPERTY(device, key, value) \
+ for (key = sd_device_get_property_first(device, &(value)); \
+ key; \
+ key = sd_device_get_property_next(device, &(value)))
+
+#define FOREACH_DEVICE_TAG(device, tag) \
+ for (tag = sd_device_get_tag_first(device); \
+ tag; \
+ tag = sd_device_get_tag_next(device))
+
+#define FOREACH_DEVICE_CURRENT_TAG(device, tag) \
+ for (tag = sd_device_get_current_tag_first(device); \
+ tag; \
+ tag = sd_device_get_current_tag_next(device))
+
+#define FOREACH_DEVICE_SYSATTR(device, attr) \
+ for (attr = sd_device_get_sysattr_first(device); \
+ attr; \
+ attr = sd_device_get_sysattr_next(device))
+
+#define FOREACH_DEVICE_DEVLINK(device, devlink) \
+ for (devlink = sd_device_get_devlink_first(device); \
+ devlink; \
+ devlink = sd_device_get_devlink_next(device))
+
+#define FOREACH_DEVICE(enumerator, device) \
+ for (device = sd_device_enumerator_get_device_first(enumerator); \
+ device; \
+ device = sd_device_enumerator_get_device_next(enumerator))
+
+#define FOREACH_SUBSYSTEM(enumerator, device) \
+ for (device = sd_device_enumerator_get_subsystem_first(enumerator); \
+ device; \
+ device = sd_device_enumerator_get_subsystem_next(enumerator))
+
+#define log_device_full_errno(device, level, error, ...) \
+ ({ \
+ const char *_sysname = NULL; \
+ sd_device *_d = (device); \
+ int _level = (level), _error = (error); \
+ \
+ if (_d && _unlikely_(log_get_max_level() >= LOG_PRI(_level))) \
+ (void) sd_device_get_sysname(_d, &_sysname); \
+ log_object_internal(_level, _error, PROJECT_FILE, __LINE__, __func__, \
+ _sysname ? "DEVICE=" : NULL, _sysname, \
+ NULL, NULL, __VA_ARGS__); \
+ })
+
+#define log_device_full(device, level, ...) (void) log_device_full_errno(device, level, 0, __VA_ARGS__)
+
+#define log_device_debug(device, ...) log_device_full_errno(device, LOG_DEBUG, 0, __VA_ARGS__)
+#define log_device_info(device, ...) log_device_full(device, LOG_INFO, __VA_ARGS__)
+#define log_device_notice(device, ...) log_device_full(device, LOG_NOTICE, __VA_ARGS__)
+#define log_device_warning(device, ...) log_device_full(device, LOG_WARNING, __VA_ARGS__)
+#define log_device_error(device, ...) log_device_full(device, LOG_ERR, __VA_ARGS__)
+
+#define log_device_debug_errno(device, error, ...) log_device_full_errno(device, LOG_DEBUG, error, __VA_ARGS__)
+#define log_device_info_errno(device, error, ...) log_device_full_errno(device, LOG_INFO, error, __VA_ARGS__)
+#define log_device_notice_errno(device, error, ...) log_device_full_errno(device, LOG_NOTICE, error, __VA_ARGS__)
+#define log_device_warning_errno(device, error, ...) log_device_full_errno(device, LOG_WARNING, error, __VA_ARGS__)
+#define log_device_error_errno(device, error, ...) log_device_full_errno(device, LOG_ERR, error, __VA_ARGS__)
diff --git a/src/libsystemd/sd-device/sd-device.c b/src/libsystemd/sd-device/sd-device.c
new file mode 100644
index 0000000..d1aa328
--- /dev/null
+++ b/src/libsystemd/sd-device/sd-device.c
@@ -0,0 +1,1996 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ctype.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-internal.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "set.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "util.h"
+
+int device_new_aux(sd_device **ret) {
+ sd_device *device;
+
+ assert(ret);
+
+ device = new(sd_device, 1);
+ if (!device)
+ return -ENOMEM;
+
+ *device = (sd_device) {
+ .n_ref = 1,
+ .watch_handle = -1,
+ .devmode = (mode_t) -1,
+ .devuid = (uid_t) -1,
+ .devgid = (gid_t) -1,
+ .action = _DEVICE_ACTION_INVALID,
+ };
+
+ *ret = device;
+ return 0;
+}
+
+static sd_device *device_free(sd_device *device) {
+ assert(device);
+
+ sd_device_unref(device->parent);
+ free(device->syspath);
+ free(device->sysname);
+ free(device->devtype);
+ free(device->devname);
+ free(device->subsystem);
+ free(device->driver_subsystem);
+ free(device->driver);
+ free(device->id_filename);
+ free(device->properties_strv);
+ free(device->properties_nulstr);
+
+ ordered_hashmap_free_free_free(device->properties);
+ ordered_hashmap_free_free_free(device->properties_db);
+ hashmap_free_free_free(device->sysattr_values);
+ set_free(device->sysattrs);
+ set_free(device->all_tags);
+ set_free(device->current_tags);
+ set_free(device->devlinks);
+
+ return mfree(device);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_device, sd_device, device_free);
+
+int device_add_property_aux(sd_device *device, const char *_key, const char *_value, bool db) {
+ OrderedHashmap **properties;
+
+ assert(device);
+ assert(_key);
+
+ if (db)
+ properties = &device->properties_db;
+ else
+ properties = &device->properties;
+
+ if (_value) {
+ _cleanup_free_ char *key = NULL, *value = NULL, *old_key = NULL, *old_value = NULL;
+ int r;
+
+ r = ordered_hashmap_ensure_allocated(properties, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ key = strdup(_key);
+ if (!key)
+ return -ENOMEM;
+
+ value = strdup(_value);
+ if (!value)
+ return -ENOMEM;
+
+ old_value = ordered_hashmap_get2(*properties, key, (void**) &old_key);
+
+ r = ordered_hashmap_replace(*properties, key, value);
+ if (r < 0)
+ return r;
+
+ key = NULL;
+ value = NULL;
+ } else {
+ _cleanup_free_ char *key = NULL;
+ _cleanup_free_ char *value = NULL;
+
+ value = ordered_hashmap_remove2(*properties, _key, (void**) &key);
+ }
+
+ if (!db) {
+ device->properties_generation++;
+ device->properties_buf_outdated = true;
+ }
+
+ return 0;
+}
+
+int device_set_syspath(sd_device *device, const char *_syspath, bool verify) {
+ _cleanup_free_ char *syspath = NULL;
+ const char *devpath;
+ int r;
+
+ assert(device);
+ assert(_syspath);
+
+ /* must be a subdirectory of /sys */
+ if (!path_startswith(_syspath, "/sys/"))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "sd-device: Syspath '%s' is not a subdirectory of /sys",
+ _syspath);
+
+ if (verify) {
+ r = chase_symlinks(_syspath, NULL, 0, &syspath, NULL);
+ if (r == -ENOENT)
+ return -ENODEV; /* the device does not exist (any more?) */
+ if (r < 0)
+ return log_debug_errno(r, "sd-device: Failed to get target of '%s': %m", _syspath);
+
+ if (!path_startswith(syspath, "/sys")) {
+ _cleanup_free_ char *real_sys = NULL, *new_syspath = NULL;
+ char *p;
+
+ /* /sys is a symlink to somewhere sysfs is mounted on? In that case, we convert the path to real sysfs to "/sys". */
+ r = chase_symlinks("/sys", NULL, 0, &real_sys, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "sd-device: Failed to chase symlink /sys: %m");
+
+ p = path_startswith(syspath, real_sys);
+ if (!p)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENODEV),
+ "sd-device: Canonicalized path '%s' does not starts with sysfs mount point '%s'",
+ syspath, real_sys);
+
+ new_syspath = path_join("/sys", p);
+ if (!new_syspath)
+ return -ENOMEM;
+
+ free_and_replace(syspath, new_syspath);
+ path_simplify(syspath, false);
+ }
+
+ if (path_startswith(syspath, "/sys/devices/")) {
+ char *path;
+
+ /* all 'devices' require an 'uevent' file */
+ path = strjoina(syspath, "/uevent");
+ r = access(path, F_OK);
+ if (r < 0) {
+ if (errno == ENOENT)
+ /* this is not a valid device */
+ return -ENODEV;
+
+ return log_debug_errno(errno, "sd-device: %s does not have an uevent file: %m", syspath);
+ }
+ } else {
+ /* everything else just needs to be a directory */
+ if (!is_dir(syspath, false))
+ return -ENODEV;
+ }
+ } else {
+ syspath = strdup(_syspath);
+ if (!syspath)
+ return -ENOMEM;
+ }
+
+ devpath = syspath + STRLEN("/sys");
+
+ if (devpath[0] == '\0')
+ /* '/sys' alone is not a valid device path */
+ return -ENODEV;
+
+ r = device_add_property_internal(device, "DEVPATH", devpath);
+ if (r < 0)
+ return r;
+
+ free_and_replace(device->syspath, syspath);
+ device->devpath = devpath;
+ return 0;
+}
+
+_public_ int sd_device_new_from_syspath(sd_device **ret, const char *syspath) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(syspath, -EINVAL);
+
+ r = device_new_aux(&device);
+ if (r < 0)
+ return r;
+
+ r = device_set_syspath(device, syspath, true);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(device);
+ return 0;
+}
+
+_public_ int sd_device_new_from_devnum(sd_device **ret, char type, dev_t devnum) {
+ char *syspath;
+ char id[DECIMAL_STR_MAX(unsigned) * 2 + 1];
+
+ assert_return(ret, -EINVAL);
+ assert_return(IN_SET(type, 'b', 'c'), -EINVAL);
+
+ /* use /sys/dev/{block,char}/<maj>:<min> link */
+ xsprintf(id, "%u:%u", major(devnum), minor(devnum));
+
+ syspath = strjoina("/sys/dev/", (type == 'b' ? "block" : "char"), "/", id);
+
+ return sd_device_new_from_syspath(ret, syspath);
+}
+
+_public_ int sd_device_new_from_subsystem_sysname(sd_device **ret, const char *subsystem, const char *sysname) {
+ char *name, *syspath;
+ size_t len = 0;
+
+ assert_return(ret, -EINVAL);
+ assert_return(subsystem, -EINVAL);
+ assert_return(sysname, -EINVAL);
+
+ if (streq(subsystem, "subsystem")) {
+ syspath = strjoina("/sys/subsystem/", sysname);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+
+ syspath = strjoina("/sys/bus/", sysname);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+
+ syspath = strjoina("/sys/class/", sysname);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+ } else if (streq(subsystem, "module")) {
+ syspath = strjoina("/sys/module/", sysname);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+ } else if (streq(subsystem, "drivers")) {
+ char subsys[PATH_MAX];
+ char *driver;
+
+ strscpy(subsys, sizeof(subsys), sysname);
+ driver = strchr(subsys, ':');
+ if (driver) {
+ driver[0] = '\0';
+ driver++;
+
+ syspath = strjoina("/sys/subsystem/", subsys, "/drivers/", driver);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+
+ syspath = strjoina("/sys/bus/", subsys, "/drivers/", driver);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+ }
+ }
+
+ /* translate sysname back to sysfs filename */
+ name = strdupa(sysname);
+ while (name[len] != '\0') {
+ if (name[len] == '/')
+ name[len] = '!';
+
+ len++;
+ }
+
+ syspath = strjoina("/sys/subsystem/", subsystem, "/devices/", name);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+
+ syspath = strjoina("/sys/bus/", subsystem, "/devices/", name);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+
+ syspath = strjoina("/sys/class/", subsystem, "/", name);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+
+ syspath = strjoina("/sys/firmware/", subsystem, "/", sysname);
+ if (access(syspath, F_OK) >= 0)
+ return sd_device_new_from_syspath(ret, syspath);
+
+ return -ENODEV;
+}
+
+int device_set_devtype(sd_device *device, const char *devtype) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(device);
+ assert(devtype);
+
+ t = strdup(devtype);
+ if (!t)
+ return -ENOMEM;
+
+ r = device_add_property_internal(device, "DEVTYPE", t);
+ if (r < 0)
+ return r;
+
+ return free_and_replace(device->devtype, t);
+}
+
+int device_set_ifindex(sd_device *device, const char *name) {
+ int r, ifindex;
+
+ assert(device);
+ assert(name);
+
+ ifindex = parse_ifindex(name);
+ if (ifindex < 0)
+ return ifindex;
+
+ r = device_add_property_internal(device, "IFINDEX", name);
+ if (r < 0)
+ return r;
+
+ device->ifindex = ifindex;
+
+ return 0;
+}
+
+int device_set_devname(sd_device *device, const char *devname) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(device);
+ assert(devname);
+
+ if (devname[0] != '/')
+ t = strjoin("/dev/", devname);
+ else
+ t = strdup(devname);
+ if (!t)
+ return -ENOMEM;
+
+ r = device_add_property_internal(device, "DEVNAME", t);
+ if (r < 0)
+ return r;
+
+ return free_and_replace(device->devname, t);
+}
+
+int device_set_devmode(sd_device *device, const char *_devmode) {
+ unsigned devmode;
+ int r;
+
+ assert(device);
+ assert(_devmode);
+
+ r = safe_atou(_devmode, &devmode);
+ if (r < 0)
+ return r;
+
+ if (devmode > 07777)
+ return -EINVAL;
+
+ r = device_add_property_internal(device, "DEVMODE", _devmode);
+ if (r < 0)
+ return r;
+
+ device->devmode = devmode;
+
+ return 0;
+}
+
+int device_set_devnum(sd_device *device, const char *major, const char *minor) {
+ unsigned maj = 0, min = 0;
+ int r;
+
+ assert(device);
+ assert(major);
+
+ r = safe_atou(major, &maj);
+ if (r < 0)
+ return r;
+ if (!maj)
+ return 0;
+
+ if (minor) {
+ r = safe_atou(minor, &min);
+ if (r < 0)
+ return r;
+ }
+
+ r = device_add_property_internal(device, "MAJOR", major);
+ if (r < 0)
+ return r;
+
+ if (minor) {
+ r = device_add_property_internal(device, "MINOR", minor);
+ if (r < 0)
+ return r;
+ }
+
+ device->devnum = makedev(maj, min);
+
+ return 0;
+}
+
+static int handle_uevent_line(sd_device *device, const char *key, const char *value, const char **major, const char **minor) {
+ int r;
+
+ assert(device);
+ assert(key);
+ assert(value);
+ assert(major);
+ assert(minor);
+
+ if (streq(key, "DEVTYPE")) {
+ r = device_set_devtype(device, value);
+ if (r < 0)
+ return r;
+ } else if (streq(key, "IFINDEX")) {
+ r = device_set_ifindex(device, value);
+ if (r < 0)
+ return r;
+ } else if (streq(key, "DEVNAME")) {
+ r = device_set_devname(device, value);
+ if (r < 0)
+ return r;
+ } else if (streq(key, "DEVMODE")) {
+ r = device_set_devmode(device, value);
+ if (r < 0)
+ return r;
+ } else if (streq(key, "MAJOR"))
+ *major = value;
+ else if (streq(key, "MINOR"))
+ *minor = value;
+ else {
+ r = device_add_property_internal(device, key, value);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int device_read_uevent_file(sd_device *device) {
+ _cleanup_free_ char *uevent = NULL;
+ const char *syspath, *key = NULL, *value = NULL, *major = NULL, *minor = NULL;
+ char *path;
+ size_t uevent_len;
+ unsigned i;
+ int r;
+
+ enum {
+ PRE_KEY,
+ KEY,
+ PRE_VALUE,
+ VALUE,
+ INVALID_LINE,
+ } state = PRE_KEY;
+
+ assert(device);
+
+ if (device->uevent_loaded || device->sealed)
+ return 0;
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ path = strjoina(syspath, "/uevent");
+
+ r = read_full_file(path, &uevent, &uevent_len);
+ if (r == -EACCES) {
+ /* empty uevent files may be write-only */
+ device->uevent_loaded = true;
+ return 0;
+ }
+ if (r == -ENOENT)
+ /* some devices may not have uevent files, see set_syspath() */
+ return 0;
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to read uevent file '%s': %m", path);
+
+ device->uevent_loaded = true;
+
+ for (i = 0; i < uevent_len; i++)
+ switch (state) {
+ case PRE_KEY:
+ if (!strchr(NEWLINE, uevent[i])) {
+ key = &uevent[i];
+
+ state = KEY;
+ }
+
+ break;
+ case KEY:
+ if (uevent[i] == '=') {
+ uevent[i] = '\0';
+
+ state = PRE_VALUE;
+ } else if (strchr(NEWLINE, uevent[i])) {
+ uevent[i] = '\0';
+ log_device_debug(device, "sd-device: Invalid uevent line '%s', ignoring", key);
+
+ state = PRE_KEY;
+ }
+
+ break;
+ case PRE_VALUE:
+ value = &uevent[i];
+ state = VALUE;
+
+ _fallthrough_; /* to handle empty property */
+ case VALUE:
+ if (strchr(NEWLINE, uevent[i])) {
+ uevent[i] = '\0';
+
+ r = handle_uevent_line(device, key, value, &major, &minor);
+ if (r < 0)
+ log_device_debug_errno(device, r, "sd-device: Failed to handle uevent entry '%s=%s', ignoring: %m", key, value);
+
+ state = PRE_KEY;
+ }
+
+ break;
+ default:
+ assert_not_reached("Invalid state when parsing uevent file");
+ }
+
+ if (major) {
+ r = device_set_devnum(device, major, minor);
+ if (r < 0)
+ log_device_debug_errno(device, r, "sd-device: Failed to set 'MAJOR=%s' or 'MINOR=%s' from '%s', ignoring: %m", major, minor, path);
+ }
+
+ return 0;
+}
+
+_public_ int sd_device_get_ifindex(sd_device *device, int *ifindex) {
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ r = device_read_uevent_file(device);
+ if (r < 0)
+ return r;
+
+ if (device->ifindex <= 0)
+ return -ENOENT;
+
+ if (ifindex)
+ *ifindex = device->ifindex;
+
+ return 0;
+}
+
+_public_ int sd_device_new_from_device_id(sd_device **ret, const char *id) {
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(id, -EINVAL);
+
+ switch (id[0]) {
+ case 'b':
+ case 'c': {
+ dev_t devt;
+
+ if (isempty(id))
+ return -EINVAL;
+
+ r = parse_dev(id + 1, &devt);
+ if (r < 0)
+ return r;
+
+ return sd_device_new_from_devnum(ret, id[0], devt);
+ }
+
+ case 'n': {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ _cleanup_close_ int sk = -1;
+ struct ifreq ifr = {};
+ int ifindex;
+
+ r = ifr.ifr_ifindex = parse_ifindex(&id[1]);
+ if (r < 0)
+ return r;
+
+ sk = socket_ioctl_fd();
+ if (sk < 0)
+ return sk;
+
+ r = ioctl(sk, SIOCGIFNAME, &ifr);
+ if (r < 0)
+ return -errno;
+
+ r = sd_device_new_from_subsystem_sysname(&device, "net", ifr.ifr_name);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_ifindex(device, &ifindex);
+ if (r < 0)
+ return r;
+
+ /* this is racey, so we might end up with the wrong device */
+ if (ifr.ifr_ifindex != ifindex)
+ return -ENODEV;
+
+ *ret = TAKE_PTR(device);
+ return 0;
+ }
+
+ case '+': {
+ char subsys[PATH_MAX];
+ char *sysname;
+
+ (void) strscpy(subsys, sizeof(subsys), id + 1);
+ sysname = strchr(subsys, ':');
+ if (!sysname)
+ return -EINVAL;
+
+ sysname[0] = '\0';
+ sysname++;
+
+ return sd_device_new_from_subsystem_sysname(ret, subsys, sysname);
+ }
+
+ default:
+ return -EINVAL;
+ }
+}
+
+_public_ int sd_device_get_syspath(sd_device *device, const char **ret) {
+ assert_return(device, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ assert(path_startswith(device->syspath, "/sys/"));
+
+ *ret = device->syspath;
+
+ return 0;
+}
+
+static int device_new_from_child(sd_device **ret, sd_device *child) {
+ _cleanup_free_ char *path = NULL;
+ const char *subdir, *syspath;
+ int r;
+
+ assert(ret);
+ assert(child);
+
+ r = sd_device_get_syspath(child, &syspath);
+ if (r < 0)
+ return r;
+
+ path = strdup(syspath);
+ if (!path)
+ return -ENOMEM;
+ subdir = path + STRLEN("/sys");
+
+ for (;;) {
+ char *pos;
+
+ pos = strrchr(subdir, '/');
+ if (!pos || pos < subdir + 2)
+ break;
+
+ *pos = '\0';
+
+ r = sd_device_new_from_syspath(ret, path);
+ if (r < 0)
+ continue;
+
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+_public_ int sd_device_get_parent(sd_device *child, sd_device **ret) {
+
+ assert_return(ret, -EINVAL);
+ assert_return(child, -EINVAL);
+
+ if (!child->parent_set) {
+ child->parent_set = true;
+
+ (void) device_new_from_child(&child->parent, child);
+ }
+
+ if (!child->parent)
+ return -ENOENT;
+
+ *ret = child->parent;
+ return 0;
+}
+
+int device_set_subsystem(sd_device *device, const char *_subsystem) {
+ _cleanup_free_ char *subsystem = NULL;
+ int r;
+
+ assert(device);
+ assert(_subsystem);
+
+ subsystem = strdup(_subsystem);
+ if (!subsystem)
+ return -ENOMEM;
+
+ r = device_add_property_internal(device, "SUBSYSTEM", subsystem);
+ if (r < 0)
+ return r;
+
+ device->subsystem_set = true;
+ return free_and_replace(device->subsystem, subsystem);
+}
+
+static int device_set_drivers_subsystem(sd_device *device, const char *_subsystem) {
+ _cleanup_free_ char *subsystem = NULL;
+ int r;
+
+ assert(device);
+ assert(_subsystem);
+ assert(*_subsystem);
+
+ subsystem = strdup(_subsystem);
+ if (!subsystem)
+ return -ENOMEM;
+
+ r = device_set_subsystem(device, "drivers");
+ if (r < 0)
+ return r;
+
+ return free_and_replace(device->driver_subsystem, subsystem);
+}
+
+_public_ int sd_device_get_subsystem(sd_device *device, const char **ret) {
+ const char *syspath, *drivers = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(device, -EINVAL);
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ if (!device->subsystem_set) {
+ _cleanup_free_ char *subsystem = NULL;
+ char *path;
+
+ /* read 'subsystem' link */
+ path = strjoina(syspath, "/subsystem");
+ r = readlink_value(path, &subsystem);
+ if (r >= 0)
+ r = device_set_subsystem(device, subsystem);
+ /* use implicit names */
+ else if (path_startswith(device->devpath, "/module/"))
+ r = device_set_subsystem(device, "module");
+ else if (!(drivers = strstr(syspath, "/drivers/")) &&
+ PATH_STARTSWITH_SET(device->devpath, "/subsystem/",
+ "/class/",
+ "/bus/"))
+ r = device_set_subsystem(device, "subsystem");
+ if (r < 0 && r != -ENOENT)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set subsystem for %s: %m", device->devpath);
+
+ device->subsystem_set = true;
+ } else if (!device->driver_subsystem_set)
+ drivers = strstr(syspath, "/drivers/");
+
+ if (!device->driver_subsystem_set) {
+ if (drivers) {
+ _cleanup_free_ char *subpath = NULL;
+
+ subpath = strndup(syspath, drivers - syspath);
+ if (!subpath)
+ r = -ENOMEM;
+ else {
+ const char *subsys;
+
+ subsys = strrchr(subpath, '/');
+ if (!subsys)
+ r = -EINVAL;
+ else
+ r = device_set_drivers_subsystem(device, subsys + 1);
+ }
+ if (r < 0 && r != -ENOENT)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set subsystem for driver %s: %m", device->devpath);
+ }
+
+ device->driver_subsystem_set = true;
+ }
+
+ if (!device->subsystem)
+ return -ENOENT;
+
+ *ret = device->subsystem;
+ return 0;
+}
+
+_public_ int sd_device_get_devtype(sd_device *device, const char **devtype) {
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ r = device_read_uevent_file(device);
+ if (r < 0)
+ return r;
+
+ if (!device->devtype)
+ return -ENOENT;
+
+ if (devtype)
+ *devtype = device->devtype;
+
+ return !!device->devtype;
+}
+
+_public_ int sd_device_get_parent_with_subsystem_devtype(sd_device *child, const char *subsystem, const char *devtype, sd_device **ret) {
+ sd_device *parent = NULL;
+ int r;
+
+ assert_return(child, -EINVAL);
+ assert_return(subsystem, -EINVAL);
+
+ r = sd_device_get_parent(child, &parent);
+ while (r >= 0) {
+ const char *parent_subsystem = NULL;
+ const char *parent_devtype = NULL;
+
+ (void) sd_device_get_subsystem(parent, &parent_subsystem);
+ if (streq_ptr(parent_subsystem, subsystem)) {
+ if (!devtype)
+ break;
+
+ (void) sd_device_get_devtype(parent, &parent_devtype);
+ if (streq_ptr(parent_devtype, devtype))
+ break;
+ }
+ r = sd_device_get_parent(parent, &parent);
+ }
+
+ if (r < 0)
+ return r;
+
+ *ret = parent;
+ return 0;
+}
+
+_public_ int sd_device_get_devnum(sd_device *device, dev_t *devnum) {
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ r = device_read_uevent_file(device);
+ if (r < 0)
+ return r;
+
+ if (major(device->devnum) <= 0)
+ return -ENOENT;
+
+ if (devnum)
+ *devnum = device->devnum;
+
+ return 0;
+}
+
+int device_set_driver(sd_device *device, const char *_driver) {
+ _cleanup_free_ char *driver = NULL;
+ int r;
+
+ assert(device);
+ assert(_driver);
+
+ driver = strdup(_driver);
+ if (!driver)
+ return -ENOMEM;
+
+ r = device_add_property_internal(device, "DRIVER", driver);
+ if (r < 0)
+ return r;
+
+ device->driver_set = true;
+ return free_and_replace(device->driver, driver);
+}
+
+_public_ int sd_device_get_driver(sd_device *device, const char **ret) {
+ assert_return(device, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!device->driver_set) {
+ _cleanup_free_ char *driver = NULL;
+ const char *syspath;
+ char *path;
+ int r;
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ path = strjoina(syspath, "/driver");
+ r = readlink_value(path, &driver);
+ if (r >= 0) {
+ r = device_set_driver(device, driver);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set driver for %s: %m", device->devpath);
+ } else if (r == -ENOENT)
+ device->driver_set = true;
+ else
+ return log_device_debug_errno(device, r, "sd-device: Failed to set driver for %s: %m", device->devpath);
+ }
+
+ if (!device->driver)
+ return -ENOENT;
+
+ *ret = device->driver;
+ return 0;
+}
+
+_public_ int sd_device_get_devpath(sd_device *device, const char **devpath) {
+ assert_return(device, -EINVAL);
+ assert_return(devpath, -EINVAL);
+
+ assert(device->devpath);
+ assert(device->devpath[0] == '/');
+
+ *devpath = device->devpath;
+ return 0;
+}
+
+_public_ int sd_device_get_devname(sd_device *device, const char **devname) {
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(devname, -EINVAL);
+
+ r = device_read_uevent_file(device);
+ if (r < 0)
+ return r;
+
+ if (!device->devname)
+ return -ENOENT;
+
+ assert(path_startswith(device->devname, "/dev/"));
+
+ *devname = device->devname;
+ return 0;
+}
+
+static int device_set_sysname(sd_device *device) {
+ _cleanup_free_ char *sysname = NULL;
+ const char *sysnum = NULL;
+ const char *pos;
+ size_t len = 0;
+
+ if (!device->devpath)
+ return -EINVAL;
+
+ pos = strrchr(device->devpath, '/');
+ if (!pos)
+ return -EINVAL;
+ pos++;
+
+ /* devpath is not a root directory */
+ if (*pos == '\0' || pos <= device->devpath)
+ return -EINVAL;
+
+ sysname = strdup(pos);
+ if (!sysname)
+ return -ENOMEM;
+
+ /* some devices have '!' in their name, change that to '/' */
+ while (sysname[len] != '\0') {
+ if (sysname[len] == '!')
+ sysname[len] = '/';
+
+ len++;
+ }
+
+ /* trailing number */
+ while (len > 0 && isdigit(sysname[--len]))
+ sysnum = &sysname[len];
+
+ if (len == 0)
+ sysnum = NULL;
+
+ device->sysname_set = true;
+ device->sysnum = sysnum;
+ return free_and_replace(device->sysname, sysname);
+}
+
+_public_ int sd_device_get_sysname(sd_device *device, const char **ret) {
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!device->sysname_set) {
+ r = device_set_sysname(device);
+ if (r < 0)
+ return r;
+ }
+
+ assert_return(device->sysname, -ENOENT);
+
+ *ret = device->sysname;
+ return 0;
+}
+
+_public_ int sd_device_get_sysnum(sd_device *device, const char **ret) {
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!device->sysname_set) {
+ r = device_set_sysname(device);
+ if (r < 0)
+ return r;
+ }
+
+ if (!device->sysnum)
+ return -ENOENT;
+
+ *ret = device->sysnum;
+ return 0;
+}
+
+static bool is_valid_tag(const char *tag) {
+ assert(tag);
+
+ return !strchr(tag, ':') && !strchr(tag, ' ');
+}
+
+int device_add_tag(sd_device *device, const char *tag, bool both) {
+ int r, added;
+
+ assert(device);
+ assert(tag);
+
+ if (!is_valid_tag(tag))
+ return -EINVAL;
+
+ /* Definitely add to the "all" list of tags (i.e. the sticky list) */
+ added = set_put_strdup(&device->all_tags, tag);
+ if (added < 0)
+ return added;
+
+ /* And optionally, also add it to the current list of tags */
+ if (both) {
+ r = set_put_strdup(&device->current_tags, tag);
+ if (r < 0) {
+ if (added > 0)
+ (void) set_remove(device->all_tags, tag);
+
+ return r;
+ }
+ }
+
+ device->tags_generation++;
+ device->property_tags_outdated = true;
+
+ return 0;
+}
+
+int device_add_devlink(sd_device *device, const char *devlink) {
+ int r;
+
+ assert(device);
+ assert(devlink);
+
+ r = set_put_strdup(&device->devlinks, devlink);
+ if (r < 0)
+ return r;
+
+ device->devlinks_generation++;
+ device->property_devlinks_outdated = true;
+
+ return 0;
+}
+
+static int device_add_property_internal_from_string(sd_device *device, const char *str) {
+ _cleanup_free_ char *key = NULL;
+ char *value;
+ int r;
+
+ assert(device);
+ assert(str);
+
+ key = strdup(str);
+ if (!key)
+ return -ENOMEM;
+
+ value = strchr(key, '=');
+ if (!value)
+ return -EINVAL;
+
+ *value = '\0';
+
+ if (isempty(++value))
+ value = NULL;
+
+ /* Add the property to both sd_device::properties and sd_device::properties_db,
+ * as this is called by only handle_db_line(). */
+ r = device_add_property_aux(device, key, value, false);
+ if (r < 0)
+ return r;
+
+ return device_add_property_aux(device, key, value, true);
+}
+
+int device_set_usec_initialized(sd_device *device, usec_t when) {
+ char s[DECIMAL_STR_MAX(usec_t)];
+ int r;
+
+ assert(device);
+
+ xsprintf(s, USEC_FMT, when);
+
+ r = device_add_property_internal(device, "USEC_INITIALIZED", s);
+ if (r < 0)
+ return r;
+
+ device->usec_initialized = when;
+ return 0;
+}
+
+static int handle_db_line(sd_device *device, char key, const char *value) {
+ char *path;
+ int r;
+
+ assert(device);
+ assert(value);
+
+ switch (key) {
+ case 'G': /* Any tag */
+ case 'Q': /* Current tag */
+ r = device_add_tag(device, value, key == 'Q');
+ if (r < 0)
+ return r;
+
+ break;
+ case 'S':
+ path = strjoina("/dev/", value);
+ r = device_add_devlink(device, path);
+ if (r < 0)
+ return r;
+
+ break;
+ case 'E':
+ r = device_add_property_internal_from_string(device, value);
+ if (r < 0)
+ return r;
+
+ break;
+ case 'I': {
+ usec_t t;
+
+ r = safe_atou64(value, &t);
+ if (r < 0)
+ return r;
+
+ r = device_set_usec_initialized(device, t);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+ case 'L':
+ r = safe_atoi(value, &device->devlink_priority);
+ if (r < 0)
+ return r;
+
+ break;
+ case 'W':
+ r = safe_atoi(value, &device->watch_handle);
+ if (r < 0)
+ return r;
+
+ break;
+ case 'V':
+ r = safe_atou(value, &device->database_version);
+ if (r < 0)
+ return r;
+
+ break;
+ default:
+ log_device_debug(device, "sd-device: Unknown key '%c' in device db, ignoring", key);
+ }
+
+ return 0;
+}
+
+int device_get_id_filename(sd_device *device, const char **ret) {
+ assert(device);
+ assert(ret);
+
+ if (!device->id_filename) {
+ _cleanup_free_ char *id = NULL;
+ const char *subsystem;
+ dev_t devnum;
+ int ifindex, r;
+
+ r = sd_device_get_subsystem(device, &subsystem);
+ if (r < 0)
+ return r;
+
+ if (sd_device_get_devnum(device, &devnum) >= 0) {
+ assert(subsystem);
+
+ /* use dev_t — b259:131072, c254:0 */
+ r = asprintf(&id, "%c%u:%u",
+ streq(subsystem, "block") ? 'b' : 'c',
+ major(devnum), minor(devnum));
+ if (r < 0)
+ return -ENOMEM;
+ } else if (sd_device_get_ifindex(device, &ifindex) >= 0) {
+ /* use netdev ifindex — n3 */
+ r = asprintf(&id, "n%u", (unsigned) ifindex);
+ if (r < 0)
+ return -ENOMEM;
+ } else {
+ /* use $subsys:$sysname — pci:0000:00:1f.2
+ * sysname() has '!' translated, get it from devpath
+ */
+ const char *sysname;
+
+ sysname = basename(device->devpath);
+ if (!sysname)
+ return -EINVAL;
+
+ if (!subsystem)
+ return -EINVAL;
+
+
+ if (streq(subsystem, "drivers"))
+ /* the 'drivers' pseudo-subsystem is special, and needs the real subsystem
+ * encoded as well */
+ id = strjoin("+drivers:", device->driver_subsystem, ":", sysname);
+ else
+ id = strjoin("+", subsystem, ":", sysname);
+ if (!id)
+ return -ENOMEM;
+ }
+
+ device->id_filename = TAKE_PTR(id);
+ }
+
+ *ret = device->id_filename;
+ return 0;
+}
+
+int device_read_db_internal_filename(sd_device *device, const char *filename) {
+ _cleanup_free_ char *db = NULL;
+ const char *value;
+ size_t db_len, i;
+ char key;
+ int r;
+
+ enum {
+ PRE_KEY,
+ KEY,
+ PRE_VALUE,
+ VALUE,
+ INVALID_LINE,
+ } state = PRE_KEY;
+
+ assert(device);
+ assert(filename);
+
+ r = read_full_file(filename, &db, &db_len);
+ if (r < 0) {
+ if (r == -ENOENT)
+ return 0;
+
+ return log_device_debug_errno(device, r, "sd-device: Failed to read db '%s': %m", filename);
+ }
+
+ /* devices with a database entry are initialized */
+ device->is_initialized = true;
+
+ device->db_loaded = true;
+
+ for (i = 0; i < db_len; i++) {
+ switch (state) {
+ case PRE_KEY:
+ if (!strchr(NEWLINE, db[i])) {
+ key = db[i];
+
+ state = KEY;
+ }
+
+ break;
+ case KEY:
+ if (db[i] != ':') {
+ log_device_debug(device, "sd-device: Invalid db entry with key '%c', ignoring", key);
+
+ state = INVALID_LINE;
+ } else {
+ db[i] = '\0';
+
+ state = PRE_VALUE;
+ }
+
+ break;
+ case PRE_VALUE:
+ value = &db[i];
+
+ state = VALUE;
+
+ break;
+ case INVALID_LINE:
+ if (strchr(NEWLINE, db[i]))
+ state = PRE_KEY;
+
+ break;
+ case VALUE:
+ if (strchr(NEWLINE, db[i])) {
+ db[i] = '\0';
+ r = handle_db_line(device, key, value);
+ if (r < 0)
+ log_device_debug_errno(device, r, "sd-device: Failed to handle db entry '%c:%s', ignoring: %m", key, value);
+
+ state = PRE_KEY;
+ }
+
+ break;
+ default:
+ return log_device_debug_errno(device, SYNTHETIC_ERRNO(EINVAL), "sd-device: invalid db syntax.");
+ }
+ }
+
+ return 0;
+}
+
+int device_read_db_internal(sd_device *device, bool force) {
+ const char *id, *path;
+ int r;
+
+ assert(device);
+
+ if (device->db_loaded || (!force && device->sealed))
+ return 0;
+
+ r = device_get_id_filename(device, &id);
+ if (r < 0)
+ return r;
+
+ path = strjoina("/run/udev/data/", id);
+
+ return device_read_db_internal_filename(device, path);
+}
+
+_public_ int sd_device_get_is_initialized(sd_device *device) {
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ return device->is_initialized;
+}
+
+_public_ int sd_device_get_usec_since_initialized(sd_device *device, uint64_t *usec) {
+ usec_t now_ts;
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(usec, -EINVAL);
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ if (!device->is_initialized)
+ return -EBUSY;
+
+ if (!device->usec_initialized)
+ return -ENODATA;
+
+ now_ts = now(clock_boottime_or_monotonic());
+
+ if (now_ts < device->usec_initialized)
+ return -EIO;
+
+ *usec = now_ts - device->usec_initialized;
+ return 0;
+}
+
+_public_ const char *sd_device_get_tag_first(sd_device *device) {
+ void *v;
+
+ assert_return(device, NULL);
+
+ (void) device_read_db(device);
+
+ device->all_tags_iterator_generation = device->tags_generation;
+ device->all_tags_iterator = ITERATOR_FIRST;
+
+ (void) set_iterate(device->all_tags, &device->all_tags_iterator, &v);
+ return v;
+}
+
+_public_ const char *sd_device_get_tag_next(sd_device *device) {
+ void *v;
+
+ assert_return(device, NULL);
+
+ (void) device_read_db(device);
+
+ if (device->all_tags_iterator_generation != device->tags_generation)
+ return NULL;
+
+ (void) set_iterate(device->all_tags, &device->all_tags_iterator, &v);
+ return v;
+}
+
+static bool device_database_supports_current_tags(sd_device *device) {
+ assert(device);
+
+ (void) device_read_db(device);
+
+ /* The current tags (saved in Q field) feature is implemented in database version 1.
+ * If the database version is 0, then the tags (NOT current tags, saved in G field) are not
+ * sticky. Thus, we can safely bypass the operations for the current tags (Q) to tags (G). */
+
+ return device->database_version >= 1;
+}
+
+_public_ const char *sd_device_get_current_tag_first(sd_device *device) {
+ void *v;
+
+ assert_return(device, NULL);
+
+ if (!device_database_supports_current_tags(device))
+ return sd_device_get_tag_first(device);
+
+ (void) device_read_db(device);
+
+ device->current_tags_iterator_generation = device->tags_generation;
+ device->current_tags_iterator = ITERATOR_FIRST;
+
+ (void) set_iterate(device->current_tags, &device->current_tags_iterator, &v);
+ return v;
+}
+
+_public_ const char *sd_device_get_current_tag_next(sd_device *device) {
+ void *v;
+
+ assert_return(device, NULL);
+
+ if (!device_database_supports_current_tags(device))
+ return sd_device_get_tag_next(device);
+
+ (void) device_read_db(device);
+
+ if (device->current_tags_iterator_generation != device->tags_generation)
+ return NULL;
+
+ (void) set_iterate(device->current_tags, &device->current_tags_iterator, &v);
+ return v;
+}
+
+_public_ const char *sd_device_get_devlink_first(sd_device *device) {
+ void *v;
+
+ assert_return(device, NULL);
+
+ (void) device_read_db(device);
+
+ device->devlinks_iterator_generation = device->devlinks_generation;
+ device->devlinks_iterator = ITERATOR_FIRST;
+
+ (void) set_iterate(device->devlinks, &device->devlinks_iterator, &v);
+ return v;
+}
+
+_public_ const char *sd_device_get_devlink_next(sd_device *device) {
+ void *v;
+
+ assert_return(device, NULL);
+
+ (void) device_read_db(device);
+
+ if (device->devlinks_iterator_generation != device->devlinks_generation)
+ return NULL;
+
+ (void) set_iterate(device->devlinks, &device->devlinks_iterator, &v);
+ return v;
+}
+
+int device_properties_prepare(sd_device *device) {
+ int r;
+
+ assert(device);
+
+ r = device_read_uevent_file(device);
+ if (r < 0)
+ return r;
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ if (device->property_devlinks_outdated) {
+ _cleanup_free_ char *devlinks = NULL;
+
+ r = set_strjoin(device->devlinks, " ", false, &devlinks);
+ if (r < 0)
+ return r;
+
+ if (!isempty(devlinks)) {
+ r = device_add_property_internal(device, "DEVLINKS", devlinks);
+ if (r < 0)
+ return r;
+ }
+
+ device->property_devlinks_outdated = false;
+ }
+
+ if (device->property_tags_outdated) {
+ _cleanup_free_ char *tags = NULL;
+
+ r = set_strjoin(device->all_tags, ":", true, &tags);
+ if (r < 0)
+ return r;
+
+ if (!isempty(tags)) {
+ r = device_add_property_internal(device, "TAGS", tags);
+ if (r < 0)
+ return r;
+ }
+
+ tags = mfree(tags);
+ r = set_strjoin(device->current_tags, ":", true, &tags);
+ if (r < 0)
+ return r;
+
+ if (!isempty(tags)) {
+ r = device_add_property_internal(device, "CURRENT_TAGS", tags);
+ if (r < 0)
+ return r;
+ }
+
+ device->property_tags_outdated = false;
+ }
+
+ return 0;
+}
+
+_public_ const char *sd_device_get_property_first(sd_device *device, const char **_value) {
+ const char *key;
+ int r;
+
+ assert_return(device, NULL);
+
+ r = device_properties_prepare(device);
+ if (r < 0)
+ return NULL;
+
+ device->properties_iterator_generation = device->properties_generation;
+ device->properties_iterator = ITERATOR_FIRST;
+
+ (void) ordered_hashmap_iterate(device->properties, &device->properties_iterator, (void**)_value, (const void**)&key);
+ return key;
+}
+
+_public_ const char *sd_device_get_property_next(sd_device *device, const char **_value) {
+ const char *key;
+ int r;
+
+ assert_return(device, NULL);
+
+ r = device_properties_prepare(device);
+ if (r < 0)
+ return NULL;
+
+ if (device->properties_iterator_generation != device->properties_generation)
+ return NULL;
+
+ (void) ordered_hashmap_iterate(device->properties, &device->properties_iterator, (void**)_value, (const void**)&key);
+ return key;
+}
+
+static int device_sysattrs_read_all_internal(sd_device *device, const char *subdir) {
+ _cleanup_free_ char *path_dir = NULL;
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *dent;
+ const char *syspath;
+ int r;
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ if (subdir) {
+ _cleanup_free_ char *p = NULL;
+
+ p = path_join(syspath, subdir, "uevent");
+ if (!p)
+ return -ENOMEM;
+
+ if (access(p, F_OK) >= 0)
+ /* this is a child device, skipping */
+ return 0;
+ if (errno != ENOENT) {
+ log_device_debug_errno(device, errno, "sd-device: Failed to stat %s, ignoring subdir: %m", p);
+ return 0;
+ }
+
+ path_dir = path_join(syspath, subdir);
+ if (!path_dir)
+ return -ENOMEM;
+ }
+
+ dir = opendir(path_dir ?: syspath);
+ if (!dir)
+ return -errno;
+
+ FOREACH_DIRENT_ALL(dent, dir, return -errno) {
+ _cleanup_free_ char *path = NULL, *p = NULL;
+ struct stat statbuf;
+
+ if (dot_or_dot_dot(dent->d_name))
+ continue;
+
+ /* only handle symlinks, regular files, and directories */
+ if (!IN_SET(dent->d_type, DT_LNK, DT_REG, DT_DIR))
+ continue;
+
+ if (subdir) {
+ p = path_join(subdir, dent->d_name);
+ if (!p)
+ return -ENOMEM;
+ }
+
+ if (dent->d_type == DT_DIR) {
+ /* read subdirectory */
+ r = device_sysattrs_read_all_internal(device, p ?: dent->d_name);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ path = path_join(syspath, p ?: dent->d_name);
+ if (!path)
+ return -ENOMEM;
+
+ if (lstat(path, &statbuf) != 0)
+ continue;
+
+ if (!(statbuf.st_mode & S_IRUSR))
+ continue;
+
+ r = set_put_strdup(&device->sysattrs, p ?: dent->d_name);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int device_sysattrs_read_all(sd_device *device) {
+ int r;
+
+ assert(device);
+
+ if (device->sysattrs_read)
+ return 0;
+
+ r = device_sysattrs_read_all_internal(device, NULL);
+ if (r < 0)
+ return r;
+
+ device->sysattrs_read = true;
+
+ return 0;
+}
+
+_public_ const char *sd_device_get_sysattr_first(sd_device *device) {
+ void *v;
+ int r;
+
+ assert_return(device, NULL);
+
+ if (!device->sysattrs_read) {
+ r = device_sysattrs_read_all(device);
+ if (r < 0) {
+ errno = -r;
+ return NULL;
+ }
+ }
+
+ device->sysattrs_iterator = ITERATOR_FIRST;
+
+ (void) set_iterate(device->sysattrs, &device->sysattrs_iterator, &v);
+ return v;
+}
+
+_public_ const char *sd_device_get_sysattr_next(sd_device *device) {
+ void *v;
+
+ assert_return(device, NULL);
+
+ if (!device->sysattrs_read)
+ return NULL;
+
+ (void) set_iterate(device->sysattrs, &device->sysattrs_iterator, &v);
+ return v;
+}
+
+_public_ int sd_device_has_tag(sd_device *device, const char *tag) {
+ assert_return(device, -EINVAL);
+ assert_return(tag, -EINVAL);
+
+ (void) device_read_db(device);
+
+ return set_contains(device->all_tags, tag);
+}
+
+_public_ int sd_device_has_current_tag(sd_device *device, const char *tag) {
+ assert_return(device, -EINVAL);
+ assert_return(tag, -EINVAL);
+
+ if (!device_database_supports_current_tags(device))
+ return sd_device_has_tag(device, tag);
+
+ (void) device_read_db(device);
+
+ return set_contains(device->current_tags, tag);
+}
+
+_public_ int sd_device_get_property_value(sd_device *device, const char *key, const char **_value) {
+ char *value;
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(key, -EINVAL);
+
+ r = device_properties_prepare(device);
+ if (r < 0)
+ return r;
+
+ value = ordered_hashmap_get(device->properties, key);
+ if (!value)
+ return -ENOENT;
+
+ if (_value)
+ *_value = value;
+ return 0;
+}
+
+/* replaces the value if it already exists */
+static int device_add_sysattr_value(sd_device *device, const char *_key, char *value) {
+ _cleanup_free_ char *key = NULL;
+ _cleanup_free_ char *value_old = NULL;
+ int r;
+
+ assert(device);
+ assert(_key);
+
+ r = hashmap_ensure_allocated(&device->sysattr_values, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ value_old = hashmap_remove2(device->sysattr_values, _key, (void **)&key);
+ if (!key) {
+ key = strdup(_key);
+ if (!key)
+ return -ENOMEM;
+ }
+
+ r = hashmap_put(device->sysattr_values, key, value);
+ if (r < 0)
+ return r;
+ TAKE_PTR(key);
+
+ return 0;
+}
+
+static int device_get_sysattr_value(sd_device *device, const char *_key, const char **_value) {
+ const char *key = NULL, *value;
+
+ assert(device);
+ assert(_key);
+
+ value = hashmap_get2(device->sysattr_values, _key, (void **) &key);
+ if (!key)
+ return -ENOENT;
+
+ if (_value)
+ *_value = value;
+ return 0;
+}
+
+/* We cache all sysattr lookups. If an attribute does not exist, it is stored
+ * with a NULL value in the cache, otherwise the returned string is stored */
+_public_ int sd_device_get_sysattr_value(sd_device *device, const char *sysattr, const char **_value) {
+ _cleanup_free_ char *value = NULL;
+ const char *path, *syspath, *cached_value = NULL;
+ struct stat statbuf;
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(sysattr, -EINVAL);
+
+ /* look for possibly already cached result */
+ r = device_get_sysattr_value(device, sysattr, &cached_value);
+ if (r != -ENOENT) {
+ if (r < 0)
+ return r;
+
+ if (!cached_value)
+ /* we looked up the sysattr before and it did not exist */
+ return -ENOENT;
+
+ if (_value)
+ *_value = cached_value;
+
+ return 0;
+ }
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ path = prefix_roota(syspath, sysattr);
+ r = lstat(path, &statbuf);
+ if (r < 0) {
+ /* remember that we could not access the sysattr */
+ r = device_add_sysattr_value(device, sysattr, NULL);
+ if (r < 0)
+ return r;
+
+ return -ENOENT;
+ } else if (S_ISLNK(statbuf.st_mode)) {
+ /* Some core links return only the last element of the target path,
+ * these are just values, the paths should not be exposed. */
+ if (STR_IN_SET(sysattr, "driver", "subsystem", "module")) {
+ r = readlink_value(path, &value);
+ if (r < 0)
+ return r;
+ } else
+ return -EINVAL;
+ } else if (S_ISDIR(statbuf.st_mode)) {
+ /* skip directories */
+ return -EINVAL;
+ } else if (!(statbuf.st_mode & S_IRUSR)) {
+ /* skip non-readable files */
+ return -EPERM;
+ } else {
+ size_t size;
+
+ /* read attribute value */
+ r = read_full_virtual_file(path, &value, &size);
+ if (r < 0)
+ return r;
+
+ /* drop trailing newlines */
+ while (size > 0 && value[--size] == '\n')
+ value[size] = '\0';
+ }
+
+ r = device_add_sysattr_value(device, sysattr, value);
+ if (r < 0)
+ return r;
+
+ *_value = TAKE_PTR(value);
+
+ return 0;
+}
+
+static void device_remove_sysattr_value(sd_device *device, const char *_key) {
+ _cleanup_free_ char *key = NULL;
+
+ assert(device);
+ assert(_key);
+
+ free(hashmap_remove2(device->sysattr_values, _key, (void **) &key));
+}
+
+/* set the attribute and save it in the cache. If a NULL value is passed the
+ * attribute is cleared from the cache */
+_public_ int sd_device_set_sysattr_value(sd_device *device, const char *sysattr, const char *_value) {
+ _cleanup_free_ char *value = NULL;
+ const char *syspath, *path;
+ size_t len;
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(sysattr, -EINVAL);
+
+ if (!_value) {
+ device_remove_sysattr_value(device, sysattr);
+ return 0;
+ }
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ path = prefix_roota(syspath, sysattr);
+
+ len = strlen(_value);
+
+ /* drop trailing newlines */
+ while (len > 0 && _value[len - 1] == '\n')
+ len --;
+
+ /* value length is limited to 4k */
+ if (len > 4096)
+ return -EINVAL;
+
+ value = strndup(_value, len);
+ if (!value)
+ return -ENOMEM;
+
+ r = write_string_file(path, value, WRITE_STRING_FILE_DISABLE_BUFFER | WRITE_STRING_FILE_NOFOLLOW);
+ if (r < 0) {
+ if (r == -ELOOP)
+ return -EINVAL;
+ if (r == -EISDIR)
+ return r;
+
+ r = free_and_strdup(&value, "");
+ if (r < 0)
+ return r;
+
+ r = device_add_sysattr_value(device, sysattr, value);
+ if (r < 0)
+ return r;
+ TAKE_PTR(value);
+
+ return -ENXIO;
+ }
+
+ r = device_add_sysattr_value(device, sysattr, value);
+ if (r < 0)
+ return r;
+ TAKE_PTR(value);
+
+ return 0;
+}
+
+_public_ int sd_device_set_sysattr_valuef(sd_device *device, const char *sysattr, const char *format, ...) {
+ _cleanup_free_ char *value = NULL;
+ va_list ap;
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(sysattr, -EINVAL);
+
+ if (!format) {
+ device_remove_sysattr_value(device, sysattr);
+ return 0;
+ }
+
+ va_start(ap, format);
+ r = vasprintf(&value, format, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return -ENOMEM;
+
+ return sd_device_set_sysattr_value(device, sysattr, value);
+}
diff --git a/src/libsystemd/sd-device/test-sd-device-monitor.c b/src/libsystemd/sd-device/test-sd-device-monitor.c
new file mode 100644
index 0000000..fddd1c1
--- /dev/null
+++ b/src/libsystemd/sd-device/test-sd-device-monitor.c
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdbool.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+#include "sd-event.h"
+
+#include "device-monitor-private.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "macro.h"
+#include "string-util.h"
+#include "tests.h"
+#include "util.h"
+#include "virt.h"
+
+static int monitor_handler(sd_device_monitor *m, sd_device *d, void *userdata) {
+ const char *s, *syspath = userdata;
+
+ assert_se(sd_device_get_syspath(d, &s) >= 0);
+ assert_se(streq(s, syspath));
+
+ return sd_event_exit(sd_device_monitor_get_event(m), 100);
+}
+
+static int test_receive_device_fail(void) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL;
+ _cleanup_(sd_device_unrefp) sd_device *loopback = NULL;
+ const char *syspath;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ /* Try to send device with invalid action and without seqnum. */
+ assert_se(sd_device_new_from_syspath(&loopback, "/sys/class/net/lo") >= 0);
+ assert_se(device_add_property(loopback, "ACTION", "hoge") >= 0);
+
+ assert_se(sd_device_get_syspath(loopback, &syspath) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0);
+ assert_se(sd_event_source_set_description(sd_device_monitor_get_event_source(monitor_server), "sender") >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0);
+ assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0);
+ assert_se(sd_event_source_set_description(sd_device_monitor_get_event_source(monitor_client), "receiver") >= 0);
+
+ /* Do not use assert_se() here. */
+ r = device_monitor_send_device(monitor_server, monitor_client, loopback);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send loopback device: %m");
+
+ assert_se(sd_event_run(sd_device_monitor_get_event(monitor_client), 0) >= 0);
+
+ return 0;
+}
+
+static void test_send_receive_one(sd_device *device, bool subsystem_filter, bool tag_filter, bool use_bpf) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL;
+ const char *syspath, *subsystem, *tag, *devtype = NULL;
+
+ log_device_info(device, "/* %s(subsystem_filter=%s, tag_filter=%s, use_bpf=%s) */", __func__,
+ true_false(subsystem_filter), true_false(tag_filter), true_false(use_bpf));
+
+ assert_se(sd_device_get_syspath(device, &syspath) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0);
+ assert_se(sd_event_source_set_description(sd_device_monitor_get_event_source(monitor_server), "sender") >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0);
+ assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0);
+ assert_se(sd_event_source_set_description(sd_device_monitor_get_event_source(monitor_client), "receiver") >= 0);
+
+ if (subsystem_filter) {
+ assert_se(sd_device_get_subsystem(device, &subsystem) >= 0);
+ (void) sd_device_get_devtype(device, &devtype);
+ assert_se(sd_device_monitor_filter_add_match_subsystem_devtype(monitor_client, subsystem, devtype) >= 0);
+ }
+
+ if (tag_filter)
+ FOREACH_DEVICE_TAG(device, tag)
+ assert_se(sd_device_monitor_filter_add_match_tag(monitor_client, tag) >= 0);
+
+ if ((subsystem_filter || tag_filter) && use_bpf)
+ assert_se(sd_device_monitor_filter_update(monitor_client) >= 0);
+
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0);
+ assert_se(sd_event_loop(sd_device_monitor_get_event(monitor_client)) == 100);
+}
+
+static void test_subsystem_filter(sd_device *device) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL;
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ const char *syspath, *subsystem, *p, *s;
+ sd_device *d;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(sd_device_get_syspath(device, &syspath) >= 0);
+ assert_se(sd_device_get_subsystem(device, &subsystem) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0);
+ assert_se(sd_event_source_set_description(sd_device_monitor_get_event_source(monitor_server), "sender") >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0);
+ assert_se(sd_device_monitor_filter_add_match_subsystem_devtype(monitor_client, subsystem, NULL) >= 0);
+ assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0);
+ assert_se(sd_event_source_set_description(sd_device_monitor_get_event_source(monitor_client), "receiver") >= 0);
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ assert_se(sd_device_enumerator_add_match_subsystem(e, subsystem, false) >= 0);
+ FOREACH_DEVICE(e, d) {
+ assert_se(sd_device_get_syspath(d, &p) >= 0);
+ assert_se(sd_device_get_subsystem(d, &s) >= 0);
+
+ log_info("Sending device subsystem:%s syspath:%s", s, p);
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, d) >= 0);
+ }
+
+ log_info("Sending device subsystem:%s syspath:%s", subsystem, syspath);
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0);
+ assert_se(sd_event_loop(sd_device_monitor_get_event(monitor_client)) == 100);
+}
+
+static void test_sd_device_monitor_filter_remove(sd_device *device) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL;
+ const char *syspath;
+
+ log_device_info(device, "/* %s */", __func__);
+
+ assert_se(sd_device_get_syspath(device, &syspath) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0);
+ assert_se(sd_event_source_set_description(sd_device_monitor_get_event_source(monitor_server), "sender") >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0);
+ assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0);
+ assert_se(sd_event_source_set_description(sd_device_monitor_get_event_source(monitor_client), "receiver") >= 0);
+
+ assert_se(sd_device_monitor_filter_add_match_subsystem_devtype(monitor_client, "hoge", NULL) >= 0);
+ assert_se(sd_device_monitor_filter_update(monitor_client) >= 0);
+
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0);
+ assert_se(sd_event_run(sd_device_monitor_get_event(monitor_client), 0) >= 0);
+
+ assert_se(sd_device_monitor_filter_remove(monitor_client) >= 0);
+
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0);
+ assert_se(sd_event_loop(sd_device_monitor_get_event(monitor_client)) == 100);
+}
+
+static void test_device_copy_properties(sd_device *device) {
+ _cleanup_(sd_device_unrefp) sd_device *copy = NULL;
+
+ assert_se(device_shallow_clone(device, &copy) >= 0);
+ assert_se(device_copy_properties(copy, device) >= 0);
+
+ test_send_receive_one(copy, false, false, false);
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_device_unrefp) sd_device *loopback = NULL, *sda = NULL;
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ if (getuid() != 0)
+ return log_tests_skipped("not root");
+
+ r = test_receive_device_fail();
+ if (r < 0) {
+ assert_se(r == -EPERM && detect_container() > 0);
+ return log_tests_skipped("Running in container? Skipping remaining tests");
+ }
+
+ assert_se(sd_device_new_from_syspath(&loopback, "/sys/class/net/lo") >= 0);
+ assert_se(device_add_property(loopback, "ACTION", "add") >= 0);
+ assert_se(device_add_property(loopback, "SEQNUM", "10") >= 0);
+
+ test_send_receive_one(loopback, false, false, false);
+ test_send_receive_one(loopback, true, false, false);
+ test_send_receive_one(loopback, false, true, false);
+ test_send_receive_one(loopback, true, true, false);
+ test_send_receive_one(loopback, true, false, true);
+ test_send_receive_one(loopback, false, true, true);
+ test_send_receive_one(loopback, true, true, true);
+
+ test_subsystem_filter(loopback);
+ test_sd_device_monitor_filter_remove(loopback);
+ test_device_copy_properties(loopback);
+
+ r = sd_device_new_from_subsystem_sysname(&sda, "block", "sda");
+ if (r < 0) {
+ log_info_errno(r, "Failed to create sd_device for sda, skipping remaining tests: %m");
+ return 0;
+ }
+
+ assert_se(device_add_property(sda, "ACTION", "change") >= 0);
+ assert_se(device_add_property(sda, "SEQNUM", "11") >= 0);
+
+ test_send_receive_one(sda, false, false, false);
+ test_send_receive_one(sda, true, false, false);
+ test_send_receive_one(sda, false, true, false);
+ test_send_receive_one(sda, true, true, false);
+ test_send_receive_one(sda, true, false, true);
+ test_send_receive_one(sda, false, true, true);
+ test_send_receive_one(sda, true, true, true);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-device/test-sd-device-thread.c b/src/libsystemd/sd-device/test-sd-device-thread.c
new file mode 100644
index 0000000..6f30155
--- /dev/null
+++ b/src/libsystemd/sd-device/test-sd-device-thread.c
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "sd-device.h"
+
+#include "device-util.h"
+#include "macro.h"
+
+static void* thread(void *p) {
+ sd_device **d = p;
+
+ assert_se(!(*d = sd_device_unref(*d)));
+
+ return NULL;
+}
+
+int main(int argc, char *argv[]) {
+ sd_device *loopback;
+ pthread_t t;
+ const char *key, *value;
+
+ assert_se(unsetenv("SYSTEMD_MEMPOOL") == 0);
+
+ assert_se(sd_device_new_from_syspath(&loopback, "/sys/class/net/lo") >= 0);
+
+ FOREACH_DEVICE_PROPERTY(loopback, key, value)
+ printf("%s=%s\n", key, value);
+
+ assert_se(pthread_create(&t, NULL, thread, &loopback) == 0);
+ assert_se(pthread_join(t, NULL) == 0);
+
+ assert_se(!loopback);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-device/test-sd-device.c b/src/libsystemd/sd-device/test-sd-device.c
new file mode 100644
index 0000000..9f48d2b
--- /dev/null
+++ b/src/libsystemd/sd-device/test-sd-device.c
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "device-enumerator-private.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "hashmap.h"
+#include "string-util.h"
+#include "tests.h"
+#include "time-util.h"
+
+static void test_sd_device_one(sd_device *d) {
+ const char *syspath, *subsystem, *val;
+ dev_t devnum;
+ usec_t usec;
+ int i, r;
+
+ assert_se(sd_device_get_syspath(d, &syspath) >= 0);
+
+ r = sd_device_get_subsystem(d, &subsystem);
+ assert_se(r >= 0 || r == -ENOENT);
+
+ r = sd_device_get_devtype(d, &val);
+ assert_se(r >= 0 || r == -ENOENT);
+
+ r = sd_device_get_devnum(d, &devnum);
+ assert_se((r >= 0 && major(devnum) > 0) || r == -ENOENT);
+
+ r = sd_device_get_ifindex(d, &i);
+ assert_se((r >= 0 && i > 0) || r == -ENOENT);
+
+ r = sd_device_get_driver(d, &val);
+ assert_se(r >= 0 || r == -ENOENT);
+
+ assert_se(sd_device_get_devpath(d, &val) >= 0);
+
+ r = sd_device_get_devname(d, &val);
+ assert_se(r >= 0 || r == -ENOENT);
+
+ assert_se(sd_device_get_sysname(d, &val) >= 0);
+
+ r = sd_device_get_sysnum(d, &val);
+ assert_se(r >= 0 || r == -ENOENT);
+
+ i = sd_device_get_is_initialized(d);
+ assert_se(i >= 0);
+ if (i > 0) {
+ r = sd_device_get_usec_since_initialized(d, &usec);
+ assert_se((r >= 0 && usec > 0) || r == -ENODATA);
+ }
+
+ r = sd_device_get_sysattr_value(d, "name_assign_type", &val);
+ assert_se(r >= 0 || IN_SET(r, -ENOENT, -EINVAL));
+
+ r = sd_device_get_property_value(d, "ID_NET_DRIVER", &val);
+ assert_se(r >= 0 || r == -ENOENT);
+
+ log_info("syspath:%s subsystem:%s initialized:%s", syspath, strna(subsystem), yes_no(i));
+}
+
+static void test_sd_device_enumerator_devices(void) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0);
+ FOREACH_DEVICE(e, d)
+ test_sd_device_one(d);
+}
+
+static void test_sd_device_enumerator_subsystems(void) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0);
+ FOREACH_SUBSYSTEM(e, d)
+ test_sd_device_one(d);
+}
+
+static unsigned test_sd_device_enumerator_filter_subsystem_one(const char *subsystem, Hashmap *h) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d, *t;
+ unsigned n_new_dev = 0;
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ assert_se(sd_device_enumerator_add_match_subsystem(e, subsystem, true) >= 0);
+
+ FOREACH_DEVICE(e, d) {
+ const char *syspath;
+
+ assert_se(sd_device_get_syspath(d, &syspath) >= 0);
+ t = hashmap_remove(h, syspath);
+ assert_se(!sd_device_unref(t));
+
+ if (t)
+ log_debug("Removed subsystem:%s syspath:%s", subsystem, syspath);
+ else {
+ log_warning("New device found: subsystem:%s syspath:%s", subsystem, syspath);
+ n_new_dev++;
+ }
+ }
+
+ /* Assume no device is unplugged. */
+ assert_se(hashmap_isempty(h));
+
+ return n_new_dev;
+}
+
+static void test_sd_device_enumerator_filter_subsystem(void) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ _cleanup_(hashmap_freep) Hashmap *subsystems;
+ unsigned n_new_dev = 0;
+ sd_device *d;
+ Hashmap *h;
+ char *s;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(subsystems = hashmap_new(&string_hash_ops));
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+
+ FOREACH_DEVICE(e, d) {
+ const char *syspath, *subsystem;
+ int r;
+
+ assert_se(sd_device_get_syspath(d, &syspath) >= 0);
+
+ r = sd_device_get_subsystem(d, &subsystem);
+ assert_se(r >= 0 || r == -ENOENT);
+ if (r < 0)
+ continue;
+
+ h = hashmap_get(subsystems, subsystem);
+ if (!h) {
+ char *str;
+ assert_se(str = strdup(subsystem));
+ assert_se(h = hashmap_new(&string_hash_ops));
+ assert_se(hashmap_put(subsystems, str, h) >= 0);
+ }
+
+ assert_se(hashmap_put(h, syspath, d) >= 0);
+ assert_se(sd_device_ref(d));
+
+ log_debug("Added subsystem:%s syspath:%s", subsystem, syspath);
+ }
+
+ while ((h = hashmap_steal_first_key_and_value(subsystems, (void**) &s))) {
+ n_new_dev += test_sd_device_enumerator_filter_subsystem_one(s, h);
+ hashmap_free(h);
+ free(s);
+ }
+
+ if (n_new_dev > 0)
+ log_warning("%u new device is found in re-scan", n_new_dev);
+
+ /* Assume that not so many devices are plugged. */
+ assert_se(n_new_dev <= 10);
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_INFO);
+
+ test_sd_device_enumerator_devices();
+ test_sd_device_enumerator_subsystems();
+ test_sd_device_enumerator_filter_subsystem();
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-device/test-udev-device-thread.c b/src/libsystemd/sd-device/test-udev-device-thread.c
new file mode 100644
index 0000000..a493152
--- /dev/null
+++ b/src/libsystemd/sd-device/test-udev-device-thread.c
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "libudev.h"
+
+#include "macro.h"
+
+static void* thread(void *p) {
+ struct udev_device **d = p;
+
+ assert_se(!(*d = udev_device_unref(*d)));
+
+ return NULL;
+}
+
+int main(int argc, char *argv[]) {
+ struct udev_device *loopback;
+ pthread_t t;
+
+ assert_se(unsetenv("SYSTEMD_MEMPOOL") == 0);
+
+ assert_se(loopback = udev_device_new_from_syspath(NULL, "/sys/class/net/lo"));
+
+ assert_se(udev_device_get_properties_list_entry(loopback));
+
+ assert_se(pthread_create(&t, NULL, thread, &loopback) == 0);
+ assert_se(pthread_join(t, NULL) == 0);
+
+ assert_se(!loopback);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-event/event-source.h b/src/libsystemd/sd-event/event-source.h
new file mode 100644
index 0000000..62d0718
--- /dev/null
+++ b/src/libsystemd/sd-event/event-source.h
@@ -0,0 +1,213 @@
+#pragma once
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/epoll.h>
+#include <sys/timerfd.h>
+#include <sys/wait.h>
+
+#include "sd-event.h"
+
+#include "fs-util.h"
+#include "hashmap.h"
+#include "list.h"
+#include "prioq.h"
+
+typedef enum EventSourceType {
+ SOURCE_IO,
+ SOURCE_TIME_REALTIME,
+ SOURCE_TIME_BOOTTIME,
+ SOURCE_TIME_MONOTONIC,
+ SOURCE_TIME_REALTIME_ALARM,
+ SOURCE_TIME_BOOTTIME_ALARM,
+ SOURCE_SIGNAL,
+ SOURCE_CHILD,
+ SOURCE_DEFER,
+ SOURCE_POST,
+ SOURCE_EXIT,
+ SOURCE_WATCHDOG,
+ SOURCE_INOTIFY,
+ _SOURCE_EVENT_SOURCE_TYPE_MAX,
+ _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
+} EventSourceType;
+
+/* All objects we use in epoll events start with this value, so that
+ * we know how to dispatch it */
+typedef enum WakeupType {
+ WAKEUP_NONE,
+ WAKEUP_EVENT_SOURCE, /* either I/O or pidfd wakeup */
+ WAKEUP_CLOCK_DATA,
+ WAKEUP_SIGNAL_DATA,
+ WAKEUP_INOTIFY_DATA,
+ _WAKEUP_TYPE_MAX,
+ _WAKEUP_TYPE_INVALID = -1,
+} WakeupType;
+
+struct inode_data;
+
+struct sd_event_source {
+ WakeupType wakeup;
+
+ unsigned n_ref;
+
+ sd_event *event;
+ void *userdata;
+ sd_event_handler_t prepare;
+
+ char *description;
+
+ EventSourceType type:5;
+ signed int enabled:3;
+ bool pending:1;
+ bool dispatching:1;
+ bool floating:1;
+ bool exit_on_failure:1;
+
+ int64_t priority;
+ unsigned pending_index;
+ unsigned prepare_index;
+ uint64_t pending_iteration;
+ uint64_t prepare_iteration;
+
+ sd_event_destroy_t destroy_callback;
+
+ LIST_FIELDS(sd_event_source, sources);
+
+ union {
+ struct {
+ sd_event_io_handler_t callback;
+ int fd;
+ uint32_t events;
+ uint32_t revents;
+ bool registered:1;
+ bool owned:1;
+ } io;
+ struct {
+ sd_event_time_handler_t callback;
+ usec_t next, accuracy;
+ unsigned earliest_index;
+ unsigned latest_index;
+ } time;
+ struct {
+ sd_event_signal_handler_t callback;
+ struct signalfd_siginfo siginfo;
+ int sig;
+ } signal;
+ struct {
+ sd_event_child_handler_t callback;
+ siginfo_t siginfo;
+ pid_t pid;
+ int options;
+ int pidfd;
+ bool registered:1; /* whether the pidfd is registered in the epoll */
+ bool pidfd_owned:1; /* close pidfd when event source is freed */
+ bool process_owned:1; /* kill+reap process when event source is freed */
+ bool exited:1; /* true if process exited (i.e. if there's value in SIGKILLing it if we want to get rid of it) */
+ bool waited:1; /* true if process was waited for (i.e. if there's value in waitid(P_PID)'ing it if we want to get rid of it) */
+ } child;
+ struct {
+ sd_event_handler_t callback;
+ } defer;
+ struct {
+ sd_event_handler_t callback;
+ } post;
+ struct {
+ sd_event_handler_t callback;
+ unsigned prioq_index;
+ } exit;
+ struct {
+ sd_event_inotify_handler_t callback;
+ uint32_t mask;
+ struct inode_data *inode_data;
+ LIST_FIELDS(sd_event_source, by_inode_data);
+ } inotify;
+ };
+};
+
+struct clock_data {
+ WakeupType wakeup;
+ int fd;
+
+ /* For all clocks we maintain two priority queues each, one
+ * ordered for the earliest times the events may be
+ * dispatched, and one ordered by the latest times they must
+ * have been dispatched. The range between the top entries in
+ * the two prioqs is the time window we can freely schedule
+ * wakeups in */
+
+ Prioq *earliest;
+ Prioq *latest;
+ usec_t next;
+
+ bool needs_rearm:1;
+};
+
+struct signal_data {
+ WakeupType wakeup;
+
+ /* For each priority we maintain one signal fd, so that we
+ * only have to dequeue a single event per priority at a
+ * time. */
+
+ int fd;
+ int64_t priority;
+ sigset_t sigset;
+ sd_event_source *current;
+};
+
+/* A structure listing all event sources currently watching a specific inode */
+struct inode_data {
+ /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */
+ ino_t ino;
+ dev_t dev;
+
+ /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can
+ * rearrange the priority still until then, as we need the original inode to change the priority as we need to
+ * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the
+ * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of
+ * the sd-event object, so that it is efficient to close everything, before entering the next event loop
+ * iteration. */
+ int fd;
+
+ /* The inotify "watch descriptor" */
+ int wd;
+
+ /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has
+ * most recently been set on the watch descriptor. */
+ uint32_t combined_mask;
+
+ /* All event sources subscribed to this inode */
+ LIST_HEAD(sd_event_source, event_sources);
+
+ /* The inotify object we watch this inode with */
+ struct inotify_data *inotify_data;
+
+ /* A linked list of all inode data objects with fds to close (see above) */
+ LIST_FIELDS(struct inode_data, to_close);
+};
+
+/* A structure encapsulating an inotify fd */
+struct inotify_data {
+ WakeupType wakeup;
+
+ /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at
+ * a time */
+
+ int fd;
+ int64_t priority;
+
+ Hashmap *inodes; /* The inode_data structures keyed by dev+ino */
+ Hashmap *wd; /* The inode_data structures keyed by the watch descriptor for each */
+
+ /* The buffer we read inotify events into */
+ union inotify_event_buffer buffer;
+ size_t buffer_filled; /* fill level of the buffer */
+
+ /* How many event sources are currently marked pending for this inotify. We won't read new events off the
+ * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing
+ * the events locally if they can't be coalesced). */
+ unsigned n_pending;
+
+ /* A linked list of all inotify objects with data already read, that still need processing. We keep this list
+ * to make it efficient to figure out what inotify objects to process data on next. */
+ LIST_FIELDS(struct inotify_data, buffered);
+};
diff --git a/src/libsystemd/sd-event/event-util.c b/src/libsystemd/sd-event/event-util.c
new file mode 100644
index 0000000..132796f
--- /dev/null
+++ b/src/libsystemd/sd-event/event-util.c
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "event-source.h"
+#include "event-util.h"
+#include "log.h"
+#include "string-util.h"
+
+int event_reset_time(
+ sd_event *e,
+ sd_event_source **s,
+ clockid_t clock,
+ uint64_t usec,
+ uint64_t accuracy,
+ sd_event_time_handler_t callback,
+ void *userdata,
+ int64_t priority,
+ const char *description,
+ bool force_reset) {
+
+ bool created = false;
+ int enabled, r;
+ clockid_t c;
+
+ assert(e);
+ assert(s);
+
+ if (*s) {
+ if (!force_reset) {
+ r = sd_event_source_get_enabled(*s, &enabled);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to query whether event source \"%s\" is enabled or not: %m",
+ strna((*s)->description ?: description));
+
+ if (enabled != SD_EVENT_OFF)
+ return 0;
+ }
+
+ r = sd_event_source_get_time_clock(*s, &c);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to get clock id of event source \"%s\": %m", strna((*s)->description ?: description));
+
+ if (c != clock)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "sd-event: Current clock id %i of event source \"%s\" is different from specified one %i.",
+ (int)c,
+ strna((*s)->description ? : description),
+ (int)clock);
+
+ r = sd_event_source_set_time(*s, usec);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to set time for event source \"%s\": %m", strna((*s)->description ?: description));
+
+ r = sd_event_source_set_time_accuracy(*s, accuracy);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to set accuracy for event source \"%s\": %m", strna((*s)->description ?: description));
+
+ /* callback function is not updated, as we do not have sd_event_source_set_time_callback(). */
+
+ (void) sd_event_source_set_userdata(*s, userdata);
+
+ r = sd_event_source_set_enabled(*s, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to enable event source \"%s\": %m", strna((*s)->description ?: description));
+ } else {
+ r = sd_event_add_time(e, s, clock, usec, accuracy, callback, userdata);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to create timer event \"%s\": %m", strna(description));
+
+ created = true;
+ }
+
+ r = sd_event_source_set_priority(*s, priority);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to set priority for event source \"%s\": %m", strna((*s)->description ?: description));
+
+ if (description) {
+ r = sd_event_source_set_description(*s, description);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to set description for event source \"%s\": %m", description);
+ }
+
+ return created;
+}
+
+int event_source_disable(sd_event_source *s) {
+ if (!s)
+ return 0;
+
+ return sd_event_source_set_enabled(s, SD_EVENT_OFF);
+}
+
+int event_source_is_enabled(sd_event_source *s) {
+ if (!s)
+ return false;
+
+ return sd_event_source_get_enabled(s, NULL);
+}
diff --git a/src/libsystemd/sd-event/event-util.h b/src/libsystemd/sd-event/event-util.h
new file mode 100644
index 0000000..c8f97bc
--- /dev/null
+++ b/src/libsystemd/sd-event/event-util.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-event.h"
+
+int event_reset_time(sd_event *e, sd_event_source **s,
+ clockid_t clock, uint64_t usec, uint64_t accuracy,
+ sd_event_time_handler_t callback, void *userdata,
+ int64_t priority, const char *description, bool force_reset);
+int event_source_disable(sd_event_source *s);
+int event_source_is_enabled(sd_event_source *s);
diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c
new file mode 100644
index 0000000..789a8c7
--- /dev/null
+++ b/src/libsystemd/sd-event/sd-event.c
@@ -0,0 +1,4010 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/epoll.h>
+#include <sys/timerfd.h>
+#include <sys/wait.h>
+
+#include "sd-daemon.h"
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "env-util.h"
+#include "event-source.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "list.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "missing_syscall.h"
+#include "prioq.h"
+#include "process-util.h"
+#include "set.h"
+#include "signal-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "time-util.h"
+
+#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
+
+static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
+ /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
+ return s &&
+ s->type == SOURCE_CHILD &&
+ s->child.pidfd >= 0 &&
+ s->child.options == WEXITED;
+}
+
+static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
+ [SOURCE_IO] = "io",
+ [SOURCE_TIME_REALTIME] = "realtime",
+ [SOURCE_TIME_BOOTTIME] = "bootime",
+ [SOURCE_TIME_MONOTONIC] = "monotonic",
+ [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
+ [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
+ [SOURCE_SIGNAL] = "signal",
+ [SOURCE_CHILD] = "child",
+ [SOURCE_DEFER] = "defer",
+ [SOURCE_POST] = "post",
+ [SOURCE_EXIT] = "exit",
+ [SOURCE_WATCHDOG] = "watchdog",
+ [SOURCE_INOTIFY] = "inotify",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
+
+#define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
+
+struct sd_event {
+ unsigned n_ref;
+
+ int epoll_fd;
+ int watchdog_fd;
+
+ Prioq *pending;
+ Prioq *prepare;
+
+ /* timerfd_create() only supports these five clocks so far. We
+ * can add support for more clocks when the kernel learns to
+ * deal with them, too. */
+ struct clock_data realtime;
+ struct clock_data boottime;
+ struct clock_data monotonic;
+ struct clock_data realtime_alarm;
+ struct clock_data boottime_alarm;
+
+ usec_t perturb;
+
+ sd_event_source **signal_sources; /* indexed by signal number */
+ Hashmap *signal_data; /* indexed by priority */
+
+ Hashmap *child_sources;
+ unsigned n_enabled_child_sources;
+
+ Set *post_sources;
+
+ Prioq *exit;
+
+ Hashmap *inotify_data; /* indexed by priority */
+
+ /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
+ LIST_HEAD(struct inode_data, inode_data_to_close);
+
+ /* A list of inotify objects that already have events buffered which aren't processed yet */
+ LIST_HEAD(struct inotify_data, inotify_data_buffered);
+
+ pid_t original_pid;
+
+ uint64_t iteration;
+ triple_timestamp timestamp;
+ int state;
+
+ bool exit_requested:1;
+ bool need_process_child:1;
+ bool watchdog:1;
+ bool profile_delays:1;
+
+ int exit_code;
+
+ pid_t tid;
+ sd_event **default_event_ptr;
+
+ usec_t watchdog_last, watchdog_period;
+
+ unsigned n_sources;
+
+ struct epoll_event *event_queue;
+ size_t event_queue_allocated;
+
+ LIST_HEAD(sd_event_source, sources);
+
+ usec_t last_run, last_log;
+ unsigned delays[sizeof(usec_t) * 8];
+};
+
+static thread_local sd_event *default_event = NULL;
+
+static void source_disconnect(sd_event_source *s);
+static void event_gc_inode_data(sd_event *e, struct inode_data *d);
+
+static sd_event *event_resolve(sd_event *e) {
+ return e == SD_EVENT_DEFAULT ? default_event : e;
+}
+
+static int pending_prioq_compare(const void *a, const void *b) {
+ const sd_event_source *x = a, *y = b;
+ int r;
+
+ assert(x->pending);
+ assert(y->pending);
+
+ /* Enabled ones first */
+ if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
+ return -1;
+ if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
+ return 1;
+
+ /* Lower priority values first */
+ r = CMP(x->priority, y->priority);
+ if (r != 0)
+ return r;
+
+ /* Older entries first */
+ return CMP(x->pending_iteration, y->pending_iteration);
+}
+
+static int prepare_prioq_compare(const void *a, const void *b) {
+ const sd_event_source *x = a, *y = b;
+ int r;
+
+ assert(x->prepare);
+ assert(y->prepare);
+
+ /* Enabled ones first */
+ if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
+ return -1;
+ if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
+ return 1;
+
+ /* Move most recently prepared ones last, so that we can stop
+ * preparing as soon as we hit one that has already been
+ * prepared in the current iteration */
+ r = CMP(x->prepare_iteration, y->prepare_iteration);
+ if (r != 0)
+ return r;
+
+ /* Lower priority values first */
+ return CMP(x->priority, y->priority);
+}
+
+static int earliest_time_prioq_compare(const void *a, const void *b) {
+ const sd_event_source *x = a, *y = b;
+
+ assert(EVENT_SOURCE_IS_TIME(x->type));
+ assert(x->type == y->type);
+
+ /* Enabled ones first */
+ if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
+ return -1;
+ if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
+ return 1;
+
+ /* Move the pending ones to the end */
+ if (!x->pending && y->pending)
+ return -1;
+ if (x->pending && !y->pending)
+ return 1;
+
+ /* Order by time */
+ return CMP(x->time.next, y->time.next);
+}
+
+static usec_t time_event_source_latest(const sd_event_source *s) {
+ return usec_add(s->time.next, s->time.accuracy);
+}
+
+static int latest_time_prioq_compare(const void *a, const void *b) {
+ const sd_event_source *x = a, *y = b;
+
+ assert(EVENT_SOURCE_IS_TIME(x->type));
+ assert(x->type == y->type);
+
+ /* Enabled ones first */
+ if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
+ return -1;
+ if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
+ return 1;
+
+ /* Move the pending ones to the end */
+ if (!x->pending && y->pending)
+ return -1;
+ if (x->pending && !y->pending)
+ return 1;
+
+ /* Order by time */
+ return CMP(time_event_source_latest(x), time_event_source_latest(y));
+}
+
+static int exit_prioq_compare(const void *a, const void *b) {
+ const sd_event_source *x = a, *y = b;
+
+ assert(x->type == SOURCE_EXIT);
+ assert(y->type == SOURCE_EXIT);
+
+ /* Enabled ones first */
+ if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
+ return -1;
+ if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
+ return 1;
+
+ /* Lower priority values first */
+ return CMP(x->priority, y->priority);
+}
+
+static void free_clock_data(struct clock_data *d) {
+ assert(d);
+ assert(d->wakeup == WAKEUP_CLOCK_DATA);
+
+ safe_close(d->fd);
+ prioq_free(d->earliest);
+ prioq_free(d->latest);
+}
+
+static sd_event *event_free(sd_event *e) {
+ sd_event_source *s;
+
+ assert(e);
+
+ while ((s = e->sources)) {
+ assert(s->floating);
+ source_disconnect(s);
+ sd_event_source_unref(s);
+ }
+
+ assert(e->n_sources == 0);
+
+ if (e->default_event_ptr)
+ *(e->default_event_ptr) = NULL;
+
+ safe_close(e->epoll_fd);
+ safe_close(e->watchdog_fd);
+
+ free_clock_data(&e->realtime);
+ free_clock_data(&e->boottime);
+ free_clock_data(&e->monotonic);
+ free_clock_data(&e->realtime_alarm);
+ free_clock_data(&e->boottime_alarm);
+
+ prioq_free(e->pending);
+ prioq_free(e->prepare);
+ prioq_free(e->exit);
+
+ free(e->signal_sources);
+ hashmap_free(e->signal_data);
+
+ hashmap_free(e->inotify_data);
+
+ hashmap_free(e->child_sources);
+ set_free(e->post_sources);
+
+ free(e->event_queue);
+
+ return mfree(e);
+}
+
+_public_ int sd_event_new(sd_event** ret) {
+ sd_event *e;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ e = new(sd_event, 1);
+ if (!e)
+ return -ENOMEM;
+
+ *e = (sd_event) {
+ .n_ref = 1,
+ .epoll_fd = -1,
+ .watchdog_fd = -1,
+ .realtime.wakeup = WAKEUP_CLOCK_DATA,
+ .realtime.fd = -1,
+ .realtime.next = USEC_INFINITY,
+ .boottime.wakeup = WAKEUP_CLOCK_DATA,
+ .boottime.fd = -1,
+ .boottime.next = USEC_INFINITY,
+ .monotonic.wakeup = WAKEUP_CLOCK_DATA,
+ .monotonic.fd = -1,
+ .monotonic.next = USEC_INFINITY,
+ .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
+ .realtime_alarm.fd = -1,
+ .realtime_alarm.next = USEC_INFINITY,
+ .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
+ .boottime_alarm.fd = -1,
+ .boottime_alarm.next = USEC_INFINITY,
+ .perturb = USEC_INFINITY,
+ .original_pid = getpid_cached(),
+ };
+
+ r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
+ if (r < 0)
+ goto fail;
+
+ e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (e->epoll_fd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
+
+ if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
+ log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
+ e->profile_delays = true;
+ }
+
+ *ret = e;
+ return 0;
+
+fail:
+ event_free(e);
+ return r;
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
+
+_public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
+ if (s)
+ (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
+ return sd_event_source_unref(s);
+}
+
+static bool event_pid_changed(sd_event *e) {
+ assert(e);
+
+ /* We don't support people creating an event loop and keeping
+ * it around over a fork(). Let's complain. */
+
+ return e->original_pid != getpid_cached();
+}
+
+static void source_io_unregister(sd_event_source *s) {
+ assert(s);
+ assert(s->type == SOURCE_IO);
+
+ if (event_pid_changed(s->event))
+ return;
+
+ if (!s->io.registered)
+ return;
+
+ if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
+ log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
+ strna(s->description), event_source_type_to_string(s->type));
+
+ s->io.registered = false;
+}
+
+static int source_io_register(
+ sd_event_source *s,
+ int enabled,
+ uint32_t events) {
+
+ assert(s);
+ assert(s->type == SOURCE_IO);
+ assert(enabled != SD_EVENT_OFF);
+
+ struct epoll_event ev = {
+ .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
+ .data.ptr = s,
+ };
+
+ if (epoll_ctl(s->event->epoll_fd,
+ s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
+ s->io.fd, &ev) < 0)
+ return -errno;
+
+ s->io.registered = true;
+
+ return 0;
+}
+
+static void source_child_pidfd_unregister(sd_event_source *s) {
+ assert(s);
+ assert(s->type == SOURCE_CHILD);
+
+ if (event_pid_changed(s->event))
+ return;
+
+ if (!s->child.registered)
+ return;
+
+ if (EVENT_SOURCE_WATCH_PIDFD(s))
+ if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
+ log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
+ strna(s->description), event_source_type_to_string(s->type));
+
+ s->child.registered = false;
+}
+
+static int source_child_pidfd_register(sd_event_source *s, int enabled) {
+ assert(s);
+ assert(s->type == SOURCE_CHILD);
+ assert(enabled != SD_EVENT_OFF);
+
+ if (EVENT_SOURCE_WATCH_PIDFD(s)) {
+ struct epoll_event ev = {
+ .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
+ .data.ptr = s,
+ };
+
+ if (epoll_ctl(s->event->epoll_fd,
+ s->child.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
+ s->child.pidfd, &ev) < 0)
+ return -errno;
+ }
+
+ s->child.registered = true;
+ return 0;
+}
+
+static clockid_t event_source_type_to_clock(EventSourceType t) {
+
+ switch (t) {
+
+ case SOURCE_TIME_REALTIME:
+ return CLOCK_REALTIME;
+
+ case SOURCE_TIME_BOOTTIME:
+ return CLOCK_BOOTTIME;
+
+ case SOURCE_TIME_MONOTONIC:
+ return CLOCK_MONOTONIC;
+
+ case SOURCE_TIME_REALTIME_ALARM:
+ return CLOCK_REALTIME_ALARM;
+
+ case SOURCE_TIME_BOOTTIME_ALARM:
+ return CLOCK_BOOTTIME_ALARM;
+
+ default:
+ return (clockid_t) -1;
+ }
+}
+
+static EventSourceType clock_to_event_source_type(clockid_t clock) {
+
+ switch (clock) {
+
+ case CLOCK_REALTIME:
+ return SOURCE_TIME_REALTIME;
+
+ case CLOCK_BOOTTIME:
+ return SOURCE_TIME_BOOTTIME;
+
+ case CLOCK_MONOTONIC:
+ return SOURCE_TIME_MONOTONIC;
+
+ case CLOCK_REALTIME_ALARM:
+ return SOURCE_TIME_REALTIME_ALARM;
+
+ case CLOCK_BOOTTIME_ALARM:
+ return SOURCE_TIME_BOOTTIME_ALARM;
+
+ default:
+ return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
+ }
+}
+
+static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
+ assert(e);
+
+ switch (t) {
+
+ case SOURCE_TIME_REALTIME:
+ return &e->realtime;
+
+ case SOURCE_TIME_BOOTTIME:
+ return &e->boottime;
+
+ case SOURCE_TIME_MONOTONIC:
+ return &e->monotonic;
+
+ case SOURCE_TIME_REALTIME_ALARM:
+ return &e->realtime_alarm;
+
+ case SOURCE_TIME_BOOTTIME_ALARM:
+ return &e->boottime_alarm;
+
+ default:
+ return NULL;
+ }
+}
+
+static void event_free_signal_data(sd_event *e, struct signal_data *d) {
+ assert(e);
+
+ if (!d)
+ return;
+
+ hashmap_remove(e->signal_data, &d->priority);
+ safe_close(d->fd);
+ free(d);
+}
+
+static int event_make_signal_data(
+ sd_event *e,
+ int sig,
+ struct signal_data **ret) {
+
+ struct signal_data *d;
+ bool added = false;
+ sigset_t ss_copy;
+ int64_t priority;
+ int r;
+
+ assert(e);
+
+ if (event_pid_changed(e))
+ return -ECHILD;
+
+ if (e->signal_sources && e->signal_sources[sig])
+ priority = e->signal_sources[sig]->priority;
+ else
+ priority = SD_EVENT_PRIORITY_NORMAL;
+
+ d = hashmap_get(e->signal_data, &priority);
+ if (d) {
+ if (sigismember(&d->sigset, sig) > 0) {
+ if (ret)
+ *ret = d;
+ return 0;
+ }
+ } else {
+ r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
+ if (r < 0)
+ return r;
+
+ d = new(struct signal_data, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (struct signal_data) {
+ .wakeup = WAKEUP_SIGNAL_DATA,
+ .fd = -1,
+ .priority = priority,
+ };
+
+ r = hashmap_put(e->signal_data, &d->priority, d);
+ if (r < 0) {
+ free(d);
+ return r;
+ }
+
+ added = true;
+ }
+
+ ss_copy = d->sigset;
+ assert_se(sigaddset(&ss_copy, sig) >= 0);
+
+ r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
+ if (r < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ d->sigset = ss_copy;
+
+ if (d->fd >= 0) {
+ if (ret)
+ *ret = d;
+ return 0;
+ }
+
+ d->fd = fd_move_above_stdio(r);
+
+ struct epoll_event ev = {
+ .events = EPOLLIN,
+ .data.ptr = d,
+ };
+
+ if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (ret)
+ *ret = d;
+
+ return 0;
+
+fail:
+ if (added)
+ event_free_signal_data(e, d);
+
+ return r;
+}
+
+static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
+ assert(e);
+ assert(d);
+
+ /* Turns off the specified signal in the signal data
+ * object. If the signal mask of the object becomes empty that
+ * way removes it. */
+
+ if (sigismember(&d->sigset, sig) == 0)
+ return;
+
+ assert_se(sigdelset(&d->sigset, sig) >= 0);
+
+ if (sigisemptyset(&d->sigset)) {
+ /* If all the mask is all-zero we can get rid of the structure */
+ event_free_signal_data(e, d);
+ return;
+ }
+
+ assert(d->fd >= 0);
+
+ if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
+ log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
+}
+
+static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
+ struct signal_data *d;
+ static const int64_t zero_priority = 0;
+
+ assert(e);
+
+ /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
+ * and possibly drop the signalfd for it. */
+
+ if (sig == SIGCHLD &&
+ e->n_enabled_child_sources > 0)
+ return;
+
+ if (e->signal_sources &&
+ e->signal_sources[sig] &&
+ e->signal_sources[sig]->enabled != SD_EVENT_OFF)
+ return;
+
+ /*
+ * The specified signal might be enabled in three different queues:
+ *
+ * 1) the one that belongs to the priority passed (if it is non-NULL)
+ * 2) the one that belongs to the priority of the event source of the signal (if there is one)
+ * 3) the 0 priority (to cover the SIGCHLD case)
+ *
+ * Hence, let's remove it from all three here.
+ */
+
+ if (priority) {
+ d = hashmap_get(e->signal_data, priority);
+ if (d)
+ event_unmask_signal_data(e, d, sig);
+ }
+
+ if (e->signal_sources && e->signal_sources[sig]) {
+ d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
+ if (d)
+ event_unmask_signal_data(e, d, sig);
+ }
+
+ d = hashmap_get(e->signal_data, &zero_priority);
+ if (d)
+ event_unmask_signal_data(e, d, sig);
+}
+
+static void event_source_pp_prioq_reshuffle(sd_event_source *s) {
+ assert(s);
+
+ /* Reshuffles the pending + prepare prioqs. Called whenever the dispatch order changes, i.e. when
+ * they are enabled/disabled or marked pending and such. */
+
+ if (s->pending)
+ prioq_reshuffle(s->event->pending, s, &s->pending_index);
+
+ if (s->prepare)
+ prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
+}
+
+static void event_source_time_prioq_reshuffle(sd_event_source *s) {
+ struct clock_data *d;
+
+ assert(s);
+ assert(EVENT_SOURCE_IS_TIME(s->type));
+
+ /* Called whenever the event source's timer ordering properties changed, i.e. time, accuracy,
+ * pending, enable state. Makes sure the two prioq's are ordered properly again. */
+ assert_se(d = event_get_clock_data(s->event, s->type));
+ prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
+ prioq_reshuffle(d->latest, s, &s->time.latest_index);
+ d->needs_rearm = true;
+}
+
+static void source_disconnect(sd_event_source *s) {
+ sd_event *event;
+
+ assert(s);
+
+ if (!s->event)
+ return;
+
+ assert(s->event->n_sources > 0);
+
+ switch (s->type) {
+
+ case SOURCE_IO:
+ if (s->io.fd >= 0)
+ source_io_unregister(s);
+
+ break;
+
+ case SOURCE_TIME_REALTIME:
+ case SOURCE_TIME_BOOTTIME:
+ case SOURCE_TIME_MONOTONIC:
+ case SOURCE_TIME_REALTIME_ALARM:
+ case SOURCE_TIME_BOOTTIME_ALARM: {
+ struct clock_data *d;
+
+ d = event_get_clock_data(s->event, s->type);
+ assert(d);
+
+ prioq_remove(d->earliest, s, &s->time.earliest_index);
+ prioq_remove(d->latest, s, &s->time.latest_index);
+ d->needs_rearm = true;
+ break;
+ }
+
+ case SOURCE_SIGNAL:
+ if (s->signal.sig > 0) {
+
+ if (s->event->signal_sources)
+ s->event->signal_sources[s->signal.sig] = NULL;
+
+ event_gc_signal_data(s->event, &s->priority, s->signal.sig);
+ }
+
+ break;
+
+ case SOURCE_CHILD:
+ if (s->child.pid > 0) {
+ if (s->enabled != SD_EVENT_OFF) {
+ assert(s->event->n_enabled_child_sources > 0);
+ s->event->n_enabled_child_sources--;
+ }
+
+ (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
+ }
+
+ if (EVENT_SOURCE_WATCH_PIDFD(s))
+ source_child_pidfd_unregister(s);
+ else
+ event_gc_signal_data(s->event, &s->priority, SIGCHLD);
+
+ break;
+
+ case SOURCE_DEFER:
+ /* nothing */
+ break;
+
+ case SOURCE_POST:
+ set_remove(s->event->post_sources, s);
+ break;
+
+ case SOURCE_EXIT:
+ prioq_remove(s->event->exit, s, &s->exit.prioq_index);
+ break;
+
+ case SOURCE_INOTIFY: {
+ struct inode_data *inode_data;
+
+ inode_data = s->inotify.inode_data;
+ if (inode_data) {
+ struct inotify_data *inotify_data;
+ assert_se(inotify_data = inode_data->inotify_data);
+
+ /* Detach this event source from the inode object */
+ LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
+ s->inotify.inode_data = NULL;
+
+ if (s->pending) {
+ assert(inotify_data->n_pending > 0);
+ inotify_data->n_pending--;
+ }
+
+ /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
+ * continued to being watched. That's because inotify doesn't really have an API for that: we
+ * can only change watch masks with access to the original inode either by fd or by path. But
+ * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
+ * continuously and keeping the mount busy which we can't really do. We could reconstruct the
+ * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
+ * there), but given the need for open_by_handle_at() which is privileged and not universally
+ * available this would be quite an incomplete solution. Hence we go the other way, leave the
+ * mask set, even if it is not minimized now, and ignore all events we aren't interested in
+ * anymore after reception. Yes, this sucks, but … Linux … */
+
+ /* Maybe release the inode data (and its inotify) */
+ event_gc_inode_data(s->event, inode_data);
+ }
+
+ break;
+ }
+
+ default:
+ assert_not_reached("Wut? I shouldn't exist.");
+ }
+
+ if (s->pending)
+ prioq_remove(s->event->pending, s, &s->pending_index);
+
+ if (s->prepare)
+ prioq_remove(s->event->prepare, s, &s->prepare_index);
+
+ event = TAKE_PTR(s->event);
+ LIST_REMOVE(sources, event->sources, s);
+ event->n_sources--;
+
+ /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
+ * pidfd associated with this event source, which we'll do only on source_free(). */
+
+ if (!s->floating)
+ sd_event_unref(event);
+}
+
+static void source_free(sd_event_source *s) {
+ assert(s);
+
+ source_disconnect(s);
+
+ if (s->type == SOURCE_IO && s->io.owned)
+ s->io.fd = safe_close(s->io.fd);
+
+ if (s->type == SOURCE_CHILD) {
+ /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
+
+ if (s->child.process_owned) {
+
+ if (!s->child.exited) {
+ bool sent = false;
+
+ if (s->child.pidfd >= 0) {
+ if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
+ if (errno == ESRCH) /* Already dead */
+ sent = true;
+ else if (!ERRNO_IS_NOT_SUPPORTED(errno))
+ log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
+ s->child.pid);
+ } else
+ sent = true;
+ }
+
+ if (!sent)
+ if (kill(s->child.pid, SIGKILL) < 0)
+ if (errno != ESRCH) /* Already dead */
+ log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
+ s->child.pid);
+ }
+
+ if (!s->child.waited) {
+ siginfo_t si = {};
+
+ /* Reap the child if we can */
+ (void) waitid(P_PID, s->child.pid, &si, WEXITED);
+ }
+ }
+
+ if (s->child.pidfd_owned)
+ s->child.pidfd = safe_close(s->child.pidfd);
+ }
+
+ if (s->destroy_callback)
+ s->destroy_callback(s->userdata);
+
+ free(s->description);
+ free(s);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
+
+static int source_set_pending(sd_event_source *s, bool b) {
+ int r;
+
+ assert(s);
+ assert(s->type != SOURCE_EXIT);
+
+ if (s->pending == b)
+ return 0;
+
+ s->pending = b;
+
+ if (b) {
+ s->pending_iteration = s->event->iteration;
+
+ r = prioq_put(s->event->pending, s, &s->pending_index);
+ if (r < 0) {
+ s->pending = false;
+ return r;
+ }
+ } else
+ assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
+
+ if (EVENT_SOURCE_IS_TIME(s->type))
+ event_source_time_prioq_reshuffle(s);
+
+ if (s->type == SOURCE_SIGNAL && !b) {
+ struct signal_data *d;
+
+ d = hashmap_get(s->event->signal_data, &s->priority);
+ if (d && d->current == s)
+ d->current = NULL;
+ }
+
+ if (s->type == SOURCE_INOTIFY) {
+
+ assert(s->inotify.inode_data);
+ assert(s->inotify.inode_data->inotify_data);
+
+ if (b)
+ s->inotify.inode_data->inotify_data->n_pending ++;
+ else {
+ assert(s->inotify.inode_data->inotify_data->n_pending > 0);
+ s->inotify.inode_data->inotify_data->n_pending --;
+ }
+ }
+
+ return 0;
+}
+
+static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
+ sd_event_source *s;
+
+ assert(e);
+
+ s = new(sd_event_source, 1);
+ if (!s)
+ return NULL;
+
+ *s = (struct sd_event_source) {
+ .n_ref = 1,
+ .event = e,
+ .floating = floating,
+ .type = type,
+ .pending_index = PRIOQ_IDX_NULL,
+ .prepare_index = PRIOQ_IDX_NULL,
+ };
+
+ if (!floating)
+ sd_event_ref(e);
+
+ LIST_PREPEND(sources, e->sources, s);
+ e->n_sources++;
+
+ return s;
+}
+
+static int io_exit_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ assert(s);
+
+ return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
+}
+
+_public_ int sd_event_add_io(
+ sd_event *e,
+ sd_event_source **ret,
+ int fd,
+ uint32_t events,
+ sd_event_io_handler_t callback,
+ void *userdata) {
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(fd >= 0, -EBADF);
+ assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (!callback)
+ callback = io_exit_callback;
+
+ s = source_new(e, !ret, SOURCE_IO);
+ if (!s)
+ return -ENOMEM;
+
+ s->wakeup = WAKEUP_EVENT_SOURCE;
+ s->io.fd = fd;
+ s->io.events = events;
+ s->io.callback = callback;
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ON;
+
+ r = source_io_register(s, s->enabled, events);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+static void initialize_perturb(sd_event *e) {
+ sd_id128_t bootid = {};
+
+ /* When we sleep for longer, we try to realign the wakeup to
+ the same time within each minute/second/250ms, so that
+ events all across the system can be coalesced into a single
+ CPU wakeup. However, let's take some system-specific
+ randomness for this value, so that in a network of systems
+ with synced clocks timer events are distributed a
+ bit. Here, we calculate a perturbation usec offset from the
+ boot ID. */
+
+ if (_likely_(e->perturb != USEC_INFINITY))
+ return;
+
+ if (sd_id128_get_boot(&bootid) >= 0)
+ e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
+}
+
+static int event_setup_timer_fd(
+ sd_event *e,
+ struct clock_data *d,
+ clockid_t clock) {
+
+ assert(e);
+ assert(d);
+
+ if (_likely_(d->fd >= 0))
+ return 0;
+
+ _cleanup_close_ int fd = -1;
+
+ fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ fd = fd_move_above_stdio(fd);
+
+ struct epoll_event ev = {
+ .events = EPOLLIN,
+ .data.ptr = d,
+ };
+
+ if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0)
+ return -errno;
+
+ d->fd = TAKE_FD(fd);
+ return 0;
+}
+
+static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
+ assert(s);
+
+ return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
+}
+
+_public_ int sd_event_add_time(
+ sd_event *e,
+ sd_event_source **ret,
+ clockid_t clock,
+ uint64_t usec,
+ uint64_t accuracy,
+ sd_event_time_handler_t callback,
+ void *userdata) {
+
+ EventSourceType type;
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ struct clock_data *d;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(accuracy != (uint64_t) -1, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
+ return -EOPNOTSUPP;
+
+ type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
+ if (type < 0)
+ return -EOPNOTSUPP;
+
+ if (!callback)
+ callback = time_exit_callback;
+
+ d = event_get_clock_data(e, type);
+ assert(d);
+
+ r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
+ if (r < 0)
+ return r;
+
+ r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
+ if (r < 0)
+ return r;
+
+ if (d->fd < 0) {
+ r = event_setup_timer_fd(e, d, clock);
+ if (r < 0)
+ return r;
+ }
+
+ s = source_new(e, !ret, type);
+ if (!s)
+ return -ENOMEM;
+
+ s->time.next = usec;
+ s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
+ s->time.callback = callback;
+ s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ONESHOT;
+
+ d->needs_rearm = true;
+
+ r = prioq_put(d->earliest, s, &s->time.earliest_index);
+ if (r < 0)
+ return r;
+
+ r = prioq_put(d->latest, s, &s->time.latest_index);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_event_add_time_relative(
+ sd_event *e,
+ sd_event_source **ret,
+ clockid_t clock,
+ uint64_t usec,
+ uint64_t accuracy,
+ sd_event_time_handler_t callback,
+ void *userdata) {
+
+ usec_t t;
+ int r;
+
+ /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
+ * checks for overflow. */
+
+ r = sd_event_now(e, clock, &t);
+ if (r < 0)
+ return r;
+
+ if (usec >= USEC_INFINITY - t)
+ return -EOVERFLOW;
+
+ return sd_event_add_time(e, ret, clock, t + usec, accuracy, callback, userdata);
+}
+
+static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ assert(s);
+
+ return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
+}
+
+_public_ int sd_event_add_signal(
+ sd_event *e,
+ sd_event_source **ret,
+ int sig,
+ sd_event_signal_handler_t callback,
+ void *userdata) {
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ struct signal_data *d;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(SIGNAL_VALID(sig), -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (!callback)
+ callback = signal_exit_callback;
+
+ r = signal_is_blocked(sig);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EBUSY;
+
+ if (!e->signal_sources) {
+ e->signal_sources = new0(sd_event_source*, _NSIG);
+ if (!e->signal_sources)
+ return -ENOMEM;
+ } else if (e->signal_sources[sig])
+ return -EBUSY;
+
+ s = source_new(e, !ret, SOURCE_SIGNAL);
+ if (!s)
+ return -ENOMEM;
+
+ s->signal.sig = sig;
+ s->signal.callback = callback;
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ON;
+
+ e->signal_sources[sig] = s;
+
+ r = event_make_signal_data(e, sig, &d);
+ if (r < 0)
+ return r;
+
+ /* Use the signal name as description for the event source by default */
+ (void) sd_event_source_set_description(s, signal_to_string(sig));
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+static int child_exit_callback(sd_event_source *s, const siginfo_t *si, void *userdata) {
+ assert(s);
+
+ return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
+}
+
+static bool shall_use_pidfd(void) {
+ /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
+ return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
+}
+
+_public_ int sd_event_add_child(
+ sd_event *e,
+ sd_event_source **ret,
+ pid_t pid,
+ int options,
+ sd_event_child_handler_t callback,
+ void *userdata) {
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(pid > 1, -EINVAL);
+ assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
+ assert_return(options != 0, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (!callback)
+ callback = child_exit_callback;
+
+ if (e->n_enabled_child_sources == 0) {
+ /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
+ * for compatibility with pre-pidfd and because we don't want the reap the child processes
+ * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
+ * take effect.
+ *
+ * (As an optimization we only do this check on the first child event source created.) */
+ r = signal_is_blocked(SIGCHLD);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EBUSY;
+ }
+
+ r = hashmap_ensure_allocated(&e->child_sources, NULL);
+ if (r < 0)
+ return r;
+
+ if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
+ return -EBUSY;
+
+ s = source_new(e, !ret, SOURCE_CHILD);
+ if (!s)
+ return -ENOMEM;
+
+ s->wakeup = WAKEUP_EVENT_SOURCE;
+ s->child.pid = pid;
+ s->child.options = options;
+ s->child.callback = callback;
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ONESHOT;
+
+ /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
+ * pin the PID, and make regular waitid() handling race-free. */
+
+ if (shall_use_pidfd()) {
+ s->child.pidfd = pidfd_open(s->child.pid, 0);
+ if (s->child.pidfd < 0) {
+ /* Propagate errors unless the syscall is not supported or blocked */
+ if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
+ return -errno;
+ } else
+ s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
+ } else
+ s->child.pidfd = -1;
+
+ r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
+ if (r < 0)
+ return r;
+
+ if (EVENT_SOURCE_WATCH_PIDFD(s)) {
+ /* We have a pidfd and we only want to watch for exit */
+ r = source_child_pidfd_register(s, s->enabled);
+ if (r < 0)
+ return r;
+
+ } else {
+ /* We have no pidfd or we shall wait for some other event than WEXITED */
+ r = event_make_signal_data(e, SIGCHLD, NULL);
+ if (r < 0)
+ return r;
+
+ e->need_process_child = true;
+ }
+
+ e->n_enabled_child_sources++;
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+ return 0;
+}
+
+_public_ int sd_event_add_child_pidfd(
+ sd_event *e,
+ sd_event_source **ret,
+ int pidfd,
+ int options,
+ sd_event_child_handler_t callback,
+ void *userdata) {
+
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ pid_t pid;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(pidfd >= 0, -EBADF);
+ assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
+ assert_return(options != 0, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (!callback)
+ callback = child_exit_callback;
+
+ if (e->n_enabled_child_sources == 0) {
+ r = signal_is_blocked(SIGCHLD);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EBUSY;
+ }
+
+ r = hashmap_ensure_allocated(&e->child_sources, NULL);
+ if (r < 0)
+ return r;
+
+ r = pidfd_get_pid(pidfd, &pid);
+ if (r < 0)
+ return r;
+
+ if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
+ return -EBUSY;
+
+ s = source_new(e, !ret, SOURCE_CHILD);
+ if (!s)
+ return -ENOMEM;
+
+ s->wakeup = WAKEUP_EVENT_SOURCE;
+ s->child.pidfd = pidfd;
+ s->child.pid = pid;
+ s->child.options = options;
+ s->child.callback = callback;
+ s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ONESHOT;
+
+ r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
+ if (r < 0)
+ return r;
+
+ if (EVENT_SOURCE_WATCH_PIDFD(s)) {
+ /* We only want to watch for WEXITED */
+ r = source_child_pidfd_register(s, s->enabled);
+ if (r < 0)
+ return r;
+ } else {
+ /* We shall wait for some other event than WEXITED */
+ r = event_make_signal_data(e, SIGCHLD, NULL);
+ if (r < 0)
+ return r;
+
+ e->need_process_child = true;
+ }
+
+ e->n_enabled_child_sources++;
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+ return 0;
+}
+
+static int generic_exit_callback(sd_event_source *s, void *userdata) {
+ assert(s);
+
+ return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
+}
+
+_public_ int sd_event_add_defer(
+ sd_event *e,
+ sd_event_source **ret,
+ sd_event_handler_t callback,
+ void *userdata) {
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (!callback)
+ callback = generic_exit_callback;
+
+ s = source_new(e, !ret, SOURCE_DEFER);
+ if (!s)
+ return -ENOMEM;
+
+ s->defer.callback = callback;
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ONESHOT;
+
+ r = source_set_pending(s, true);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_event_add_post(
+ sd_event *e,
+ sd_event_source **ret,
+ sd_event_handler_t callback,
+ void *userdata) {
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (!callback)
+ callback = generic_exit_callback;
+
+ s = source_new(e, !ret, SOURCE_POST);
+ if (!s)
+ return -ENOMEM;
+
+ s->post.callback = callback;
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ON;
+
+ r = set_ensure_put(&e->post_sources, NULL, s);
+ if (r < 0)
+ return r;
+ assert(r > 0);
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_event_add_exit(
+ sd_event *e,
+ sd_event_source **ret,
+ sd_event_handler_t callback,
+ void *userdata) {
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(callback, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
+ if (r < 0)
+ return r;
+
+ s = source_new(e, !ret, SOURCE_EXIT);
+ if (!s)
+ return -ENOMEM;
+
+ s->exit.callback = callback;
+ s->userdata = userdata;
+ s->exit.prioq_index = PRIOQ_IDX_NULL;
+ s->enabled = SD_EVENT_ONESHOT;
+
+ r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
+ assert(e);
+
+ if (!d)
+ return;
+
+ assert(hashmap_isempty(d->inodes));
+ assert(hashmap_isempty(d->wd));
+
+ if (d->buffer_filled > 0)
+ LIST_REMOVE(buffered, e->inotify_data_buffered, d);
+
+ hashmap_free(d->inodes);
+ hashmap_free(d->wd);
+
+ assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
+
+ if (d->fd >= 0) {
+ if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
+ log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
+
+ safe_close(d->fd);
+ }
+ free(d);
+}
+
+static int event_make_inotify_data(
+ sd_event *e,
+ int64_t priority,
+ struct inotify_data **ret) {
+
+ _cleanup_close_ int fd = -1;
+ struct inotify_data *d;
+ int r;
+
+ assert(e);
+
+ d = hashmap_get(e->inotify_data, &priority);
+ if (d) {
+ if (ret)
+ *ret = d;
+ return 0;
+ }
+
+ fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ fd = fd_move_above_stdio(fd);
+
+ r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
+ if (r < 0)
+ return r;
+
+ d = new(struct inotify_data, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (struct inotify_data) {
+ .wakeup = WAKEUP_INOTIFY_DATA,
+ .fd = TAKE_FD(fd),
+ .priority = priority,
+ };
+
+ r = hashmap_put(e->inotify_data, &d->priority, d);
+ if (r < 0) {
+ d->fd = safe_close(d->fd);
+ free(d);
+ return r;
+ }
+
+ struct epoll_event ev = {
+ .events = EPOLLIN,
+ .data.ptr = d,
+ };
+
+ if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
+ r = -errno;
+ d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
+ * remove the fd from the epoll first, which we don't want as we couldn't
+ * add it in the first place. */
+ event_free_inotify_data(e, d);
+ return r;
+ }
+
+ if (ret)
+ *ret = d;
+
+ return 1;
+}
+
+static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
+ int r;
+
+ assert(x);
+ assert(y);
+
+ r = CMP(x->dev, y->dev);
+ if (r != 0)
+ return r;
+
+ return CMP(x->ino, y->ino);
+}
+
+static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
+ assert(d);
+
+ siphash24_compress(&d->dev, sizeof(d->dev), state);
+ siphash24_compress(&d->ino, sizeof(d->ino), state);
+}
+
+DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
+
+static void event_free_inode_data(
+ sd_event *e,
+ struct inode_data *d) {
+
+ assert(e);
+
+ if (!d)
+ return;
+
+ assert(!d->event_sources);
+
+ if (d->fd >= 0) {
+ LIST_REMOVE(to_close, e->inode_data_to_close, d);
+ safe_close(d->fd);
+ }
+
+ if (d->inotify_data) {
+
+ if (d->wd >= 0) {
+ if (d->inotify_data->fd >= 0) {
+ /* So here's a problem. At the time this runs the watch descriptor might already be
+ * invalidated, because an IN_IGNORED event might be queued right the moment we enter
+ * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
+ * likely case to happen. */
+
+ if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
+ log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
+ }
+
+ assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
+ }
+
+ assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
+ }
+
+ free(d);
+}
+
+static void event_gc_inode_data(
+ sd_event *e,
+ struct inode_data *d) {
+
+ struct inotify_data *inotify_data;
+
+ assert(e);
+
+ if (!d)
+ return;
+
+ if (d->event_sources)
+ return;
+
+ inotify_data = d->inotify_data;
+ event_free_inode_data(e, d);
+
+ if (inotify_data && hashmap_isempty(inotify_data->inodes))
+ event_free_inotify_data(e, inotify_data);
+}
+
+static int event_make_inode_data(
+ sd_event *e,
+ struct inotify_data *inotify_data,
+ dev_t dev,
+ ino_t ino,
+ struct inode_data **ret) {
+
+ struct inode_data *d, key;
+ int r;
+
+ assert(e);
+ assert(inotify_data);
+
+ key = (struct inode_data) {
+ .ino = ino,
+ .dev = dev,
+ };
+
+ d = hashmap_get(inotify_data->inodes, &key);
+ if (d) {
+ if (ret)
+ *ret = d;
+
+ return 0;
+ }
+
+ r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
+ if (r < 0)
+ return r;
+
+ d = new(struct inode_data, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (struct inode_data) {
+ .dev = dev,
+ .ino = ino,
+ .wd = -1,
+ .fd = -1,
+ .inotify_data = inotify_data,
+ };
+
+ r = hashmap_put(inotify_data->inodes, d, d);
+ if (r < 0) {
+ free(d);
+ return r;
+ }
+
+ if (ret)
+ *ret = d;
+
+ return 1;
+}
+
+static uint32_t inode_data_determine_mask(struct inode_data *d) {
+ bool excl_unlink = true;
+ uint32_t combined = 0;
+ sd_event_source *s;
+
+ assert(d);
+
+ /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
+ * the IN_EXCL_UNLINK flag is ANDed instead.
+ *
+ * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
+ * because we cannot change the mask anymore after the event source was created once, since the kernel has no
+ * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
+ * events we don't care for client-side. */
+
+ LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
+
+ if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
+ excl_unlink = false;
+
+ combined |= s->inotify.mask;
+ }
+
+ return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
+}
+
+static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
+ uint32_t combined_mask;
+ int wd, r;
+
+ assert(d);
+ assert(d->fd >= 0);
+
+ combined_mask = inode_data_determine_mask(d);
+
+ if (d->wd >= 0 && combined_mask == d->combined_mask)
+ return 0;
+
+ r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
+ if (r < 0)
+ return r;
+
+ wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
+ if (wd < 0)
+ return -errno;
+
+ if (d->wd < 0) {
+ r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
+ if (r < 0) {
+ (void) inotify_rm_watch(d->inotify_data->fd, wd);
+ return r;
+ }
+
+ d->wd = wd;
+
+ } else if (d->wd != wd) {
+
+ log_debug("Weird, the watch descriptor we already knew for this inode changed?");
+ (void) inotify_rm_watch(d->fd, wd);
+ return -EINVAL;
+ }
+
+ d->combined_mask = combined_mask;
+ return 1;
+}
+
+static int inotify_exit_callback(sd_event_source *s, const struct inotify_event *event, void *userdata) {
+ assert(s);
+
+ return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
+}
+
+_public_ int sd_event_add_inotify(
+ sd_event *e,
+ sd_event_source **ret,
+ const char *path,
+ uint32_t mask,
+ sd_event_inotify_handler_t callback,
+ void *userdata) {
+
+ struct inotify_data *inotify_data = NULL;
+ struct inode_data *inode_data = NULL;
+ _cleanup_close_ int fd = -1;
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ struct stat st;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(path, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (!callback)
+ callback = inotify_exit_callback;
+
+ /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
+ * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
+ * the user can't use them for us. */
+ if (mask & IN_MASK_ADD)
+ return -EINVAL;
+
+ fd = open(path, O_PATH|O_CLOEXEC|
+ (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
+ (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ s = source_new(e, !ret, SOURCE_INOTIFY);
+ if (!s)
+ return -ENOMEM;
+
+ s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
+ s->inotify.mask = mask;
+ s->inotify.callback = callback;
+ s->userdata = userdata;
+
+ /* Allocate an inotify object for this priority, and an inode object within it */
+ r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
+ if (r < 0)
+ return r;
+
+ r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
+ if (r < 0) {
+ event_free_inotify_data(e, inotify_data);
+ return r;
+ }
+
+ /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
+ * the event source, until then, for which we need the original inode. */
+ if (inode_data->fd < 0) {
+ inode_data->fd = TAKE_FD(fd);
+ LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
+ }
+
+ /* Link our event source to the inode data object */
+ LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
+ s->inotify.inode_data = inode_data;
+
+ /* Actually realize the watch now */
+ r = inode_data_realize_watch(e, inode_data);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(s, path);
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+static sd_event_source* event_source_free(sd_event_source *s) {
+ if (!s)
+ return NULL;
+
+ /* Here's a special hack: when we are called from a
+ * dispatch handler we won't free the event source
+ * immediately, but we will detach the fd from the
+ * epoll. This way it is safe for the caller to unref
+ * the event source and immediately close the fd, but
+ * we still retain a valid event source object after
+ * the callback. */
+
+ if (s->dispatching) {
+ if (s->type == SOURCE_IO)
+ source_io_unregister(s);
+
+ source_disconnect(s);
+ } else
+ source_free(s);
+
+ return NULL;
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
+
+_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
+ assert_return(s, -EINVAL);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ return free_and_strdup(&s->description, description);
+}
+
+_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
+ assert_return(s, -EINVAL);
+ assert_return(description, -EINVAL);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ if (!s->description)
+ return -ENXIO;
+
+ *description = s->description;
+ return 0;
+}
+
+_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
+ assert_return(s, NULL);
+
+ return s->event;
+}
+
+_public_ int sd_event_source_get_pending(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type != SOURCE_EXIT, -EDOM);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ return s->pending;
+}
+
+_public_ int sd_event_source_get_io_fd(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ return s->io.fd;
+}
+
+_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(fd >= 0, -EBADF);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ if (s->io.fd == fd)
+ return 0;
+
+ if (s->enabled == SD_EVENT_OFF) {
+ s->io.fd = fd;
+ s->io.registered = false;
+ } else {
+ int saved_fd;
+
+ saved_fd = s->io.fd;
+ assert(s->io.registered);
+
+ s->io.fd = fd;
+ s->io.registered = false;
+
+ r = source_io_register(s, s->enabled, s->io.events);
+ if (r < 0) {
+ s->io.fd = saved_fd;
+ s->io.registered = true;
+ return r;
+ }
+
+ (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
+ }
+
+ return 0;
+}
+
+_public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+
+ return s->io.owned;
+}
+
+_public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+
+ s->io.owned = own;
+ return 0;
+}
+
+_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
+ assert_return(s, -EINVAL);
+ assert_return(events, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ *events = s->io.events;
+ return 0;
+}
+
+_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ /* edge-triggered updates are never skipped, so we can reset edges */
+ if (s->io.events == events && !(events & EPOLLET))
+ return 0;
+
+ r = source_set_pending(s, false);
+ if (r < 0)
+ return r;
+
+ if (s->enabled != SD_EVENT_OFF) {
+ r = source_io_register(s, s->enabled, events);
+ if (r < 0)
+ return r;
+ }
+
+ s->io.events = events;
+
+ return 0;
+}
+
+_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
+ assert_return(s, -EINVAL);
+ assert_return(revents, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(s->pending, -ENODATA);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ *revents = s->io.revents;
+ return 0;
+}
+
+_public_ int sd_event_source_get_signal(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_SIGNAL, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ return s->signal.sig;
+}
+
+_public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
+ assert_return(s, -EINVAL);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ *priority = s->priority;
+ return 0;
+}
+
+_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
+ bool rm_inotify = false, rm_inode = false;
+ struct inotify_data *new_inotify_data = NULL;
+ struct inode_data *new_inode_data = NULL;
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ if (s->priority == priority)
+ return 0;
+
+ if (s->type == SOURCE_INOTIFY) {
+ struct inode_data *old_inode_data;
+
+ assert(s->inotify.inode_data);
+ old_inode_data = s->inotify.inode_data;
+
+ /* We need the original fd to change the priority. If we don't have it we can't change the priority,
+ * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
+ * events we allow priority changes only until the first following iteration. */
+ if (old_inode_data->fd < 0)
+ return -EOPNOTSUPP;
+
+ r = event_make_inotify_data(s->event, priority, &new_inotify_data);
+ if (r < 0)
+ return r;
+ rm_inotify = r > 0;
+
+ r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
+ if (r < 0)
+ goto fail;
+ rm_inode = r > 0;
+
+ if (new_inode_data->fd < 0) {
+ /* Duplicate the fd for the new inode object if we don't have any yet */
+ new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
+ if (new_inode_data->fd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
+ }
+
+ /* Move the event source to the new inode data structure */
+ LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
+ LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
+ s->inotify.inode_data = new_inode_data;
+
+ /* Now create the new watch */
+ r = inode_data_realize_watch(s->event, new_inode_data);
+ if (r < 0) {
+ /* Move it back */
+ LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
+ LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
+ s->inotify.inode_data = old_inode_data;
+ goto fail;
+ }
+
+ s->priority = priority;
+
+ event_gc_inode_data(s->event, old_inode_data);
+
+ } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
+ struct signal_data *old, *d;
+
+ /* Move us from the signalfd belonging to the old
+ * priority to the signalfd of the new priority */
+
+ assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
+
+ s->priority = priority;
+
+ r = event_make_signal_data(s->event, s->signal.sig, &d);
+ if (r < 0) {
+ s->priority = old->priority;
+ return r;
+ }
+
+ event_unmask_signal_data(s->event, old, s->signal.sig);
+ } else
+ s->priority = priority;
+
+ event_source_pp_prioq_reshuffle(s);
+
+ if (s->type == SOURCE_EXIT)
+ prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
+
+ return 0;
+
+fail:
+ if (rm_inode)
+ event_free_inode_data(s->event, new_inode_data);
+
+ if (rm_inotify)
+ event_free_inotify_data(s->event, new_inotify_data);
+
+ return r;
+}
+
+_public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
+ assert_return(s, -EINVAL);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ if (m)
+ *m = s->enabled;
+ return s->enabled != SD_EVENT_OFF;
+}
+
+static int event_source_disable(sd_event_source *s) {
+ int r;
+
+ assert(s);
+ assert(s->enabled != SD_EVENT_OFF);
+
+ /* Unset the pending flag when this event source is disabled */
+ if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
+ r = source_set_pending(s, false);
+ if (r < 0)
+ return r;
+ }
+
+ s->enabled = SD_EVENT_OFF;
+
+ switch (s->type) {
+
+ case SOURCE_IO:
+ source_io_unregister(s);
+ break;
+
+ case SOURCE_TIME_REALTIME:
+ case SOURCE_TIME_BOOTTIME:
+ case SOURCE_TIME_MONOTONIC:
+ case SOURCE_TIME_REALTIME_ALARM:
+ case SOURCE_TIME_BOOTTIME_ALARM:
+ event_source_time_prioq_reshuffle(s);
+ break;
+
+ case SOURCE_SIGNAL:
+ event_gc_signal_data(s->event, &s->priority, s->signal.sig);
+ break;
+
+ case SOURCE_CHILD:
+ assert(s->event->n_enabled_child_sources > 0);
+ s->event->n_enabled_child_sources--;
+
+ if (EVENT_SOURCE_WATCH_PIDFD(s))
+ source_child_pidfd_unregister(s);
+ else
+ event_gc_signal_data(s->event, &s->priority, SIGCHLD);
+ break;
+
+ case SOURCE_EXIT:
+ prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
+ break;
+
+ case SOURCE_DEFER:
+ case SOURCE_POST:
+ case SOURCE_INOTIFY:
+ break;
+
+ default:
+ assert_not_reached("Wut? I shouldn't exist.");
+ }
+
+ return 0;
+}
+
+static int event_source_enable(sd_event_source *s, int enable) {
+ int r;
+
+ assert(s);
+ assert(IN_SET(enable, SD_EVENT_ON, SD_EVENT_ONESHOT));
+ assert(s->enabled == SD_EVENT_OFF);
+
+ /* Unset the pending flag when this event source is enabled */
+ if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
+ r = source_set_pending(s, false);
+ if (r < 0)
+ return r;
+ }
+
+ switch (s->type) {
+ case SOURCE_IO:
+ r = source_io_register(s, enable, s->io.events);
+ if (r < 0)
+ return r;
+ break;
+
+ case SOURCE_SIGNAL:
+ r = event_make_signal_data(s->event, s->signal.sig, NULL);
+ if (r < 0) {
+ event_gc_signal_data(s->event, &s->priority, s->signal.sig);
+ return r;
+ }
+
+ break;
+
+ case SOURCE_CHILD:
+ if (EVENT_SOURCE_WATCH_PIDFD(s)) {
+ /* yes, we have pidfd */
+
+ r = source_child_pidfd_register(s, enable);
+ if (r < 0)
+ return r;
+ } else {
+ /* no pidfd, or something other to watch for than WEXITED */
+
+ r = event_make_signal_data(s->event, SIGCHLD, NULL);
+ if (r < 0) {
+ event_gc_signal_data(s->event, &s->priority, SIGCHLD);
+ return r;
+ }
+ }
+
+ s->event->n_enabled_child_sources++;
+
+ break;
+
+ case SOURCE_TIME_REALTIME:
+ case SOURCE_TIME_BOOTTIME:
+ case SOURCE_TIME_MONOTONIC:
+ case SOURCE_TIME_REALTIME_ALARM:
+ case SOURCE_TIME_BOOTTIME_ALARM:
+ case SOURCE_EXIT:
+ case SOURCE_DEFER:
+ case SOURCE_POST:
+ case SOURCE_INOTIFY:
+ break;
+
+ default:
+ assert_not_reached("Wut? I shouldn't exist.");
+ }
+
+ s->enabled = enable;
+
+ /* Non-failing operations below */
+ switch (s->type) {
+ case SOURCE_TIME_REALTIME:
+ case SOURCE_TIME_BOOTTIME:
+ case SOURCE_TIME_MONOTONIC:
+ case SOURCE_TIME_REALTIME_ALARM:
+ case SOURCE_TIME_BOOTTIME_ALARM:
+ event_source_time_prioq_reshuffle(s);
+ break;
+
+ case SOURCE_EXIT:
+ prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ /* If we are dead anyway, we are fine with turning off sources, but everything else needs to fail. */
+ if (s->event->state == SD_EVENT_FINISHED)
+ return m == SD_EVENT_OFF ? 0 : -ESTALE;
+
+ if (s->enabled == m) /* No change? */
+ return 0;
+
+ if (m == SD_EVENT_OFF)
+ r = event_source_disable(s);
+ else {
+ if (s->enabled != SD_EVENT_OFF) {
+ /* Switching from "on" to "oneshot" or back? If that's the case, we can take a shortcut, the
+ * event source is already enabled after all. */
+ s->enabled = m;
+ return 0;
+ }
+
+ r = event_source_enable(s, m);
+ }
+ if (r < 0)
+ return r;
+
+ event_source_pp_prioq_reshuffle(s);
+ return 0;
+}
+
+_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
+ assert_return(s, -EINVAL);
+ assert_return(usec, -EINVAL);
+ assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ *usec = s->time.next;
+ return 0;
+}
+
+_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ r = source_set_pending(s, false);
+ if (r < 0)
+ return r;
+
+ s->time.next = usec;
+
+ event_source_time_prioq_reshuffle(s);
+ return 0;
+}
+
+_public_ int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec) {
+ usec_t t;
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
+
+ r = sd_event_now(s->event, event_source_type_to_clock(s->type), &t);
+ if (r < 0)
+ return r;
+
+ if (usec >= USEC_INFINITY - t)
+ return -EOVERFLOW;
+
+ return sd_event_source_set_time(s, t + usec);
+}
+
+_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
+ assert_return(s, -EINVAL);
+ assert_return(usec, -EINVAL);
+ assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ *usec = s->time.accuracy;
+ return 0;
+}
+
+_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(usec != (uint64_t) -1, -EINVAL);
+ assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ r = source_set_pending(s, false);
+ if (r < 0)
+ return r;
+
+ if (usec == 0)
+ usec = DEFAULT_ACCURACY_USEC;
+
+ s->time.accuracy = usec;
+
+ event_source_time_prioq_reshuffle(s);
+ return 0;
+}
+
+_public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
+ assert_return(s, -EINVAL);
+ assert_return(clock, -EINVAL);
+ assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ *clock = event_source_type_to_clock(s->type);
+ return 0;
+}
+
+_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
+ assert_return(s, -EINVAL);
+ assert_return(pid, -EINVAL);
+ assert_return(s->type == SOURCE_CHILD, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ *pid = s->child.pid;
+ return 0;
+}
+
+_public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_CHILD, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ if (s->child.pidfd < 0)
+ return -EOPNOTSUPP;
+
+ return s->child.pidfd;
+}
+
+_public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_CHILD, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+ assert_return(SIGNAL_VALID(sig), -EINVAL);
+
+ /* If we already have seen indication the process exited refuse sending a signal early. This way we
+ * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
+ * available. */
+ if (s->child.exited)
+ return -ESRCH;
+
+ if (s->child.pidfd >= 0) {
+ siginfo_t copy;
+
+ /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
+ * structure here */
+ if (si)
+ copy = *si;
+
+ if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
+ /* Let's propagate the error only if the system call is not implemented or prohibited */
+ if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
+ return -errno;
+ } else
+ return 0;
+ }
+
+ /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
+ * this here. */
+ if (flags != 0)
+ return -EOPNOTSUPP;
+
+ if (si) {
+ /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
+ siginfo_t copy = *si;
+
+ if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
+ return -errno;
+ } else if (kill(s->child.pid, sig) < 0)
+ return -errno;
+
+ return 0;
+}
+
+_public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_CHILD, -EDOM);
+
+ if (s->child.pidfd < 0)
+ return -EOPNOTSUPP;
+
+ return s->child.pidfd_owned;
+}
+
+_public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_CHILD, -EDOM);
+
+ if (s->child.pidfd < 0)
+ return -EOPNOTSUPP;
+
+ s->child.pidfd_owned = own;
+ return 0;
+}
+
+_public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_CHILD, -EDOM);
+
+ return s->child.process_owned;
+}
+
+_public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_CHILD, -EDOM);
+
+ s->child.process_owned = own;
+ return 0;
+}
+
+_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
+ assert_return(s, -EINVAL);
+ assert_return(mask, -EINVAL);
+ assert_return(s->type == SOURCE_INOTIFY, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ *mask = s->inotify.mask;
+ return 0;
+}
+
+_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(s->type != SOURCE_EXIT, -EDOM);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ if (s->prepare == callback)
+ return 0;
+
+ if (callback && s->prepare) {
+ s->prepare = callback;
+ return 0;
+ }
+
+ r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
+ if (r < 0)
+ return r;
+
+ s->prepare = callback;
+
+ if (callback) {
+ r = prioq_put(s->event->prepare, s, &s->prepare_index);
+ if (r < 0)
+ return r;
+ } else
+ prioq_remove(s->event->prepare, s, &s->prepare_index);
+
+ return 0;
+}
+
+_public_ void* sd_event_source_get_userdata(sd_event_source *s) {
+ assert_return(s, NULL);
+
+ return s->userdata;
+}
+
+_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
+ void *ret;
+
+ assert_return(s, NULL);
+
+ ret = s->userdata;
+ s->userdata = userdata;
+
+ return ret;
+}
+
+static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
+ usec_t c;
+ assert(e);
+ assert(a <= b);
+
+ if (a <= 0)
+ return 0;
+ if (a >= USEC_INFINITY)
+ return USEC_INFINITY;
+
+ if (b <= a + 1)
+ return a;
+
+ initialize_perturb(e);
+
+ /*
+ Find a good time to wake up again between times a and b. We
+ have two goals here:
+
+ a) We want to wake up as seldom as possible, hence prefer
+ later times over earlier times.
+
+ b) But if we have to wake up, then let's make sure to
+ dispatch as much as possible on the entire system.
+
+ We implement this by waking up everywhere at the same time
+ within any given minute if we can, synchronised via the
+ perturbation value determined from the boot ID. If we can't,
+ then we try to find the same spot in every 10s, then 1s and
+ then 250ms step. Otherwise, we pick the last possible time
+ to wake up.
+ */
+
+ c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
+ if (c >= b) {
+ if (_unlikely_(c < USEC_PER_MINUTE))
+ return b;
+
+ c -= USEC_PER_MINUTE;
+ }
+
+ if (c >= a)
+ return c;
+
+ c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
+ if (c >= b) {
+ if (_unlikely_(c < USEC_PER_SEC*10))
+ return b;
+
+ c -= USEC_PER_SEC*10;
+ }
+
+ if (c >= a)
+ return c;
+
+ c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
+ if (c >= b) {
+ if (_unlikely_(c < USEC_PER_SEC))
+ return b;
+
+ c -= USEC_PER_SEC;
+ }
+
+ if (c >= a)
+ return c;
+
+ c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
+ if (c >= b) {
+ if (_unlikely_(c < USEC_PER_MSEC*250))
+ return b;
+
+ c -= USEC_PER_MSEC*250;
+ }
+
+ if (c >= a)
+ return c;
+
+ return b;
+}
+
+static int event_arm_timer(
+ sd_event *e,
+ struct clock_data *d) {
+
+ struct itimerspec its = {};
+ sd_event_source *a, *b;
+ usec_t t;
+
+ assert(e);
+ assert(d);
+
+ if (!d->needs_rearm)
+ return 0;
+ else
+ d->needs_rearm = false;
+
+ a = prioq_peek(d->earliest);
+ if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
+
+ if (d->fd < 0)
+ return 0;
+
+ if (d->next == USEC_INFINITY)
+ return 0;
+
+ /* disarm */
+ if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
+ return -errno;
+
+ d->next = USEC_INFINITY;
+ return 0;
+ }
+
+ b = prioq_peek(d->latest);
+ assert_se(b && b->enabled != SD_EVENT_OFF);
+
+ t = sleep_between(e, a->time.next, time_event_source_latest(b));
+ if (d->next == t)
+ return 0;
+
+ assert_se(d->fd >= 0);
+
+ if (t == 0) {
+ /* We don' want to disarm here, just mean some time looooong ago. */
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 1;
+ } else
+ timespec_store(&its.it_value, t);
+
+ if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
+ return -errno;
+
+ d->next = t;
+ return 0;
+}
+
+static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
+ assert(e);
+ assert(s);
+ assert(s->type == SOURCE_IO);
+
+ /* If the event source was already pending, we just OR in the
+ * new revents, otherwise we reset the value. The ORing is
+ * necessary to handle EPOLLONESHOT events properly where
+ * readability might happen independently of writability, and
+ * we need to keep track of both */
+
+ if (s->pending)
+ s->io.revents |= revents;
+ else
+ s->io.revents = revents;
+
+ return source_set_pending(s, true);
+}
+
+static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
+ uint64_t x;
+ ssize_t ss;
+
+ assert(e);
+ assert(fd >= 0);
+
+ assert_return(events == EPOLLIN, -EIO);
+
+ ss = read(fd, &x, sizeof(x));
+ if (ss < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return -errno;
+ }
+
+ if (_unlikely_(ss != sizeof(x)))
+ return -EIO;
+
+ if (next)
+ *next = USEC_INFINITY;
+
+ return 0;
+}
+
+static int process_timer(
+ sd_event *e,
+ usec_t n,
+ struct clock_data *d) {
+
+ sd_event_source *s;
+ int r;
+
+ assert(e);
+ assert(d);
+
+ for (;;) {
+ s = prioq_peek(d->earliest);
+ if (!s ||
+ s->time.next > n ||
+ s->enabled == SD_EVENT_OFF ||
+ s->pending)
+ break;
+
+ r = source_set_pending(s, true);
+ if (r < 0)
+ return r;
+
+ event_source_time_prioq_reshuffle(s);
+ }
+
+ return 0;
+}
+
+static int process_child(sd_event *e) {
+ sd_event_source *s;
+ int r;
+
+ assert(e);
+
+ e->need_process_child = false;
+
+ /*
+ So, this is ugly. We iteratively invoke waitid() with P_PID
+ + WNOHANG for each PID we wait for, instead of using
+ P_ALL. This is because we only want to get child
+ information of very specific child processes, and not all
+ of them. We might not have processed the SIGCHLD even of a
+ previous invocation and we don't want to maintain a
+ unbounded *per-child* event queue, hence we really don't
+ want anything flushed out of the kernel's queue that we
+ don't care about. Since this is O(n) this means that if you
+ have a lot of processes you probably want to handle SIGCHLD
+ yourself.
+
+ We do not reap the children here (by using WNOWAIT), this
+ is only done after the event source is dispatched so that
+ the callback still sees the process as a zombie.
+ */
+
+ HASHMAP_FOREACH(s, e->child_sources) {
+ assert(s->type == SOURCE_CHILD);
+
+ if (s->pending)
+ continue;
+
+ if (s->enabled == SD_EVENT_OFF)
+ continue;
+
+ if (s->child.exited)
+ continue;
+
+ if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
+ continue;
+
+ zero(s->child.siginfo);
+ if (waitid(P_PID, s->child.pid, &s->child.siginfo,
+ WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options) < 0)
+ return -errno;
+
+ if (s->child.siginfo.si_pid != 0) {
+ bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
+
+ if (zombie)
+ s->child.exited = true;
+
+ if (!zombie && (s->child.options & WEXITED)) {
+ /* If the child isn't dead then let's
+ * immediately remove the state change
+ * from the queue, since there's no
+ * benefit in leaving it queued */
+
+ assert(s->child.options & (WSTOPPED|WCONTINUED));
+ (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
+ }
+
+ r = source_set_pending(s, true);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
+ assert(e);
+ assert(s);
+ assert(s->type == SOURCE_CHILD);
+
+ if (s->pending)
+ return 0;
+
+ if (s->enabled == SD_EVENT_OFF)
+ return 0;
+
+ if (!EVENT_SOURCE_WATCH_PIDFD(s))
+ return 0;
+
+ zero(s->child.siginfo);
+ if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
+ return -errno;
+
+ if (s->child.siginfo.si_pid == 0)
+ return 0;
+
+ if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
+ s->child.exited = true;
+
+ return source_set_pending(s, true);
+}
+
+static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
+ bool read_one = false;
+ int r;
+
+ assert(e);
+ assert(d);
+ assert_return(events == EPOLLIN, -EIO);
+
+ /* If there's a signal queued on this priority and SIGCHLD is
+ on this priority too, then make sure to recheck the
+ children we watch. This is because we only ever dequeue
+ the first signal per priority, and if we dequeue one, and
+ SIGCHLD might be enqueued later we wouldn't know, but we
+ might have higher priority children we care about hence we
+ need to check that explicitly. */
+
+ if (sigismember(&d->sigset, SIGCHLD))
+ e->need_process_child = true;
+
+ /* If there's already an event source pending for this
+ * priority we don't read another */
+ if (d->current)
+ return 0;
+
+ for (;;) {
+ struct signalfd_siginfo si;
+ ssize_t n;
+ sd_event_source *s = NULL;
+
+ n = read(d->fd, &si, sizeof(si));
+ if (n < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return read_one;
+
+ return -errno;
+ }
+
+ if (_unlikely_(n != sizeof(si)))
+ return -EIO;
+
+ assert(SIGNAL_VALID(si.ssi_signo));
+
+ read_one = true;
+
+ if (e->signal_sources)
+ s = e->signal_sources[si.ssi_signo];
+ if (!s)
+ continue;
+ if (s->pending)
+ continue;
+
+ s->signal.siginfo = si;
+ d->current = s;
+
+ r = source_set_pending(s, true);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+}
+
+static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
+ ssize_t n;
+
+ assert(e);
+ assert(d);
+
+ assert_return(revents == EPOLLIN, -EIO);
+
+ /* If there's already an event source pending for this priority, don't read another */
+ if (d->n_pending > 0)
+ return 0;
+
+ /* Is the read buffer non-empty? If so, let's not read more */
+ if (d->buffer_filled > 0)
+ return 0;
+
+ n = read(d->fd, &d->buffer, sizeof(d->buffer));
+ if (n < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return -errno;
+ }
+
+ assert(n > 0);
+ d->buffer_filled = (size_t) n;
+ LIST_PREPEND(buffered, e->inotify_data_buffered, d);
+
+ return 1;
+}
+
+static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
+ assert(e);
+ assert(d);
+ assert(sz <= d->buffer_filled);
+
+ if (sz == 0)
+ return;
+
+ /* Move the rest to the buffer to the front, in order to get things properly aligned again */
+ memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
+ d->buffer_filled -= sz;
+
+ if (d->buffer_filled == 0)
+ LIST_REMOVE(buffered, e->inotify_data_buffered, d);
+}
+
+static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
+ int r;
+
+ assert(e);
+ assert(d);
+
+ /* If there's already an event source pending for this priority, don't read another */
+ if (d->n_pending > 0)
+ return 0;
+
+ while (d->buffer_filled > 0) {
+ size_t sz;
+
+ /* Let's validate that the event structures are complete */
+ if (d->buffer_filled < offsetof(struct inotify_event, name))
+ return -EIO;
+
+ sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
+ if (d->buffer_filled < sz)
+ return -EIO;
+
+ if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
+ struct inode_data *inode_data;
+
+ /* The queue overran, let's pass this event to all event sources connected to this inotify
+ * object */
+
+ HASHMAP_FOREACH(inode_data, d->inodes) {
+ sd_event_source *s;
+
+ LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
+
+ if (s->enabled == SD_EVENT_OFF)
+ continue;
+
+ r = source_set_pending(s, true);
+ if (r < 0)
+ return r;
+ }
+ }
+ } else {
+ struct inode_data *inode_data;
+ sd_event_source *s;
+
+ /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
+ * our watch descriptor table. */
+ if (d->buffer.ev.mask & IN_IGNORED) {
+
+ inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
+ if (!inode_data) {
+ event_inotify_data_drop(e, d, sz);
+ continue;
+ }
+
+ /* The watch descriptor was removed by the kernel, let's drop it here too */
+ inode_data->wd = -1;
+ } else {
+ inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
+ if (!inode_data) {
+ event_inotify_data_drop(e, d, sz);
+ continue;
+ }
+ }
+
+ /* Trigger all event sources that are interested in these events. Also trigger all event
+ * sources if IN_IGNORED or IN_UNMOUNT is set. */
+ LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
+
+ if (s->enabled == SD_EVENT_OFF)
+ continue;
+
+ if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
+ (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
+ continue;
+
+ r = source_set_pending(s, true);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* Something pending now? If so, let's finish, otherwise let's read more. */
+ if (d->n_pending > 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+static int process_inotify(sd_event *e) {
+ struct inotify_data *d;
+ int r, done = 0;
+
+ assert(e);
+
+ LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
+ r = event_inotify_data_process(e, d);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ done ++;
+ }
+
+ return done;
+}
+
+static int source_dispatch(sd_event_source *s) {
+ _cleanup_(sd_event_unrefp) sd_event *saved_event = NULL;
+ EventSourceType saved_type;
+ int r = 0;
+
+ assert(s);
+ assert(s->pending || s->type == SOURCE_EXIT);
+
+ /* Save the event source type, here, so that we still know it after the event callback which might
+ * invalidate the event. */
+ saved_type = s->type;
+
+ /* Similar, store a reference to the event loop object, so that we can still access it after the
+ * callback might have invalidated/disconnected the event source. */
+ saved_event = sd_event_ref(s->event);
+
+ if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
+ r = source_set_pending(s, false);
+ if (r < 0)
+ return r;
+ }
+
+ if (s->type != SOURCE_POST) {
+ sd_event_source *z;
+
+ /* If we execute a non-post source, let's mark all
+ * post sources as pending */
+
+ SET_FOREACH(z, s->event->post_sources) {
+ if (z->enabled == SD_EVENT_OFF)
+ continue;
+
+ r = source_set_pending(z, true);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (s->enabled == SD_EVENT_ONESHOT) {
+ r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
+ if (r < 0)
+ return r;
+ }
+
+ s->dispatching = true;
+
+ switch (s->type) {
+
+ case SOURCE_IO:
+ r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
+ break;
+
+ case SOURCE_TIME_REALTIME:
+ case SOURCE_TIME_BOOTTIME:
+ case SOURCE_TIME_MONOTONIC:
+ case SOURCE_TIME_REALTIME_ALARM:
+ case SOURCE_TIME_BOOTTIME_ALARM:
+ r = s->time.callback(s, s->time.next, s->userdata);
+ break;
+
+ case SOURCE_SIGNAL:
+ r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
+ break;
+
+ case SOURCE_CHILD: {
+ bool zombie;
+
+ zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
+
+ r = s->child.callback(s, &s->child.siginfo, s->userdata);
+
+ /* Now, reap the PID for good. */
+ if (zombie) {
+ (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
+ s->child.waited = true;
+ }
+
+ break;
+ }
+
+ case SOURCE_DEFER:
+ r = s->defer.callback(s, s->userdata);
+ break;
+
+ case SOURCE_POST:
+ r = s->post.callback(s, s->userdata);
+ break;
+
+ case SOURCE_EXIT:
+ r = s->exit.callback(s, s->userdata);
+ break;
+
+ case SOURCE_INOTIFY: {
+ struct sd_event *e = s->event;
+ struct inotify_data *d;
+ size_t sz;
+
+ assert(s->inotify.inode_data);
+ assert_se(d = s->inotify.inode_data->inotify_data);
+
+ assert(d->buffer_filled >= offsetof(struct inotify_event, name));
+ sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
+ assert(d->buffer_filled >= sz);
+
+ r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
+
+ /* When no event is pending anymore on this inotify object, then let's drop the event from the
+ * buffer. */
+ if (d->n_pending == 0)
+ event_inotify_data_drop(e, d, sz);
+
+ break;
+ }
+
+ case SOURCE_WATCHDOG:
+ case _SOURCE_EVENT_SOURCE_TYPE_MAX:
+ case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
+ assert_not_reached("Wut? I shouldn't exist.");
+ }
+
+ s->dispatching = false;
+
+ if (r < 0) {
+ log_debug_errno(r, "Event source %s (type %s) returned error, %s: %m",
+ strna(s->description),
+ event_source_type_to_string(saved_type),
+ s->exit_on_failure ? "exiting" : "disabling");
+
+ if (s->exit_on_failure)
+ (void) sd_event_exit(saved_event, r);
+ }
+
+ if (s->n_ref == 0)
+ source_free(s);
+ else if (r < 0)
+ sd_event_source_set_enabled(s, SD_EVENT_OFF);
+
+ return 1;
+}
+
+static int event_prepare(sd_event *e) {
+ int r;
+
+ assert(e);
+
+ for (;;) {
+ sd_event_source *s;
+
+ s = prioq_peek(e->prepare);
+ if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
+ break;
+
+ s->prepare_iteration = e->iteration;
+ r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
+ if (r < 0)
+ return r;
+
+ assert(s->prepare);
+
+ s->dispatching = true;
+ r = s->prepare(s, s->userdata);
+ s->dispatching = false;
+
+ if (r < 0) {
+ log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, %s: %m",
+ strna(s->description),
+ event_source_type_to_string(s->type),
+ s->exit_on_failure ? "exiting" : "disabling");
+
+ if (s->exit_on_failure)
+ (void) sd_event_exit(e, r);
+ }
+
+ if (s->n_ref == 0)
+ source_free(s);
+ else if (r < 0)
+ sd_event_source_set_enabled(s, SD_EVENT_OFF);
+ }
+
+ return 0;
+}
+
+static int dispatch_exit(sd_event *e) {
+ sd_event_source *p;
+ _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
+ int r;
+
+ assert(e);
+
+ p = prioq_peek(e->exit);
+ if (!p || p->enabled == SD_EVENT_OFF) {
+ e->state = SD_EVENT_FINISHED;
+ return 0;
+ }
+
+ ref = sd_event_ref(e);
+ e->iteration++;
+ e->state = SD_EVENT_EXITING;
+ r = source_dispatch(p);
+ e->state = SD_EVENT_INITIAL;
+ return r;
+}
+
+static sd_event_source* event_next_pending(sd_event *e) {
+ sd_event_source *p;
+
+ assert(e);
+
+ p = prioq_peek(e->pending);
+ if (!p)
+ return NULL;
+
+ if (p->enabled == SD_EVENT_OFF)
+ return NULL;
+
+ return p;
+}
+
+static int arm_watchdog(sd_event *e) {
+ struct itimerspec its = {};
+ usec_t t;
+
+ assert(e);
+ assert(e->watchdog_fd >= 0);
+
+ t = sleep_between(e,
+ e->watchdog_last + (e->watchdog_period / 2),
+ e->watchdog_last + (e->watchdog_period * 3 / 4));
+
+ timespec_store(&its.it_value, t);
+
+ /* Make sure we never set the watchdog to 0, which tells the
+ * kernel to disable it. */
+ if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
+ its.it_value.tv_nsec = 1;
+
+ if (timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int process_watchdog(sd_event *e) {
+ assert(e);
+
+ if (!e->watchdog)
+ return 0;
+
+ /* Don't notify watchdog too often */
+ if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
+ return 0;
+
+ sd_notify(false, "WATCHDOG=1");
+ e->watchdog_last = e->timestamp.monotonic;
+
+ return arm_watchdog(e);
+}
+
+static void event_close_inode_data_fds(sd_event *e) {
+ struct inode_data *d;
+
+ assert(e);
+
+ /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
+ * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
+ * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
+ * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
+ * compromise. */
+
+ while ((d = e->inode_data_to_close)) {
+ assert(d->fd >= 0);
+ d->fd = safe_close(d->fd);
+
+ LIST_REMOVE(to_close, e->inode_data_to_close, d);
+ }
+}
+
+_public_ int sd_event_prepare(sd_event *e) {
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
+
+ /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
+ * this check here once, since gettid() is typically not cached, and thus want to minimize
+ * syscalls */
+ assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
+
+ if (e->exit_requested)
+ goto pending;
+
+ e->iteration++;
+
+ e->state = SD_EVENT_PREPARING;
+ r = event_prepare(e);
+ e->state = SD_EVENT_INITIAL;
+ if (r < 0)
+ return r;
+
+ r = event_arm_timer(e, &e->realtime);
+ if (r < 0)
+ return r;
+
+ r = event_arm_timer(e, &e->boottime);
+ if (r < 0)
+ return r;
+
+ r = event_arm_timer(e, &e->monotonic);
+ if (r < 0)
+ return r;
+
+ r = event_arm_timer(e, &e->realtime_alarm);
+ if (r < 0)
+ return r;
+
+ r = event_arm_timer(e, &e->boottime_alarm);
+ if (r < 0)
+ return r;
+
+ event_close_inode_data_fds(e);
+
+ if (event_next_pending(e) || e->need_process_child)
+ goto pending;
+
+ e->state = SD_EVENT_ARMED;
+
+ return 0;
+
+pending:
+ e->state = SD_EVENT_ARMED;
+ r = sd_event_wait(e, 0);
+ if (r == 0)
+ e->state = SD_EVENT_ARMED;
+
+ return r;
+}
+
+_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
+ size_t event_queue_max;
+ int r, m, i;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
+
+ if (e->exit_requested) {
+ e->state = SD_EVENT_PENDING;
+ return 1;
+ }
+
+ event_queue_max = MAX(e->n_sources, 1u);
+ if (!GREEDY_REALLOC(e->event_queue, e->event_queue_allocated, event_queue_max))
+ return -ENOMEM;
+
+ /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
+ if (e->inotify_data_buffered)
+ timeout = 0;
+
+ m = epoll_wait(e->epoll_fd, e->event_queue, event_queue_max,
+ timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC));
+ if (m < 0) {
+ if (errno == EINTR) {
+ e->state = SD_EVENT_PENDING;
+ return 1;
+ }
+
+ r = -errno;
+ goto finish;
+ }
+
+ triple_timestamp_get(&e->timestamp);
+
+ for (i = 0; i < m; i++) {
+
+ if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
+ r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
+ else {
+ WakeupType *t = e->event_queue[i].data.ptr;
+
+ switch (*t) {
+
+ case WAKEUP_EVENT_SOURCE: {
+ sd_event_source *s = e->event_queue[i].data.ptr;
+
+ assert(s);
+
+ switch (s->type) {
+
+ case SOURCE_IO:
+ r = process_io(e, s, e->event_queue[i].events);
+ break;
+
+ case SOURCE_CHILD:
+ r = process_pidfd(e, s, e->event_queue[i].events);
+ break;
+
+ default:
+ assert_not_reached("Unexpected event source type");
+ }
+
+ break;
+ }
+
+ case WAKEUP_CLOCK_DATA: {
+ struct clock_data *d = e->event_queue[i].data.ptr;
+
+ assert(d);
+
+ r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
+ break;
+ }
+
+ case WAKEUP_SIGNAL_DATA:
+ r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
+ break;
+
+ case WAKEUP_INOTIFY_DATA:
+ r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
+ break;
+
+ default:
+ assert_not_reached("Invalid wake-up pointer");
+ }
+ }
+ if (r < 0)
+ goto finish;
+ }
+
+ r = process_watchdog(e);
+ if (r < 0)
+ goto finish;
+
+ r = process_timer(e, e->timestamp.realtime, &e->realtime);
+ if (r < 0)
+ goto finish;
+
+ r = process_timer(e, e->timestamp.boottime, &e->boottime);
+ if (r < 0)
+ goto finish;
+
+ r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
+ if (r < 0)
+ goto finish;
+
+ r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
+ if (r < 0)
+ goto finish;
+
+ r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
+ if (r < 0)
+ goto finish;
+
+ if (e->need_process_child) {
+ r = process_child(e);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = process_inotify(e);
+ if (r < 0)
+ goto finish;
+
+ if (event_next_pending(e)) {
+ e->state = SD_EVENT_PENDING;
+
+ return 1;
+ }
+
+ r = 0;
+
+finish:
+ e->state = SD_EVENT_INITIAL;
+
+ return r;
+}
+
+_public_ int sd_event_dispatch(sd_event *e) {
+ sd_event_source *p;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
+
+ if (e->exit_requested)
+ return dispatch_exit(e);
+
+ p = event_next_pending(e);
+ if (p) {
+ _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
+
+ ref = sd_event_ref(e);
+ e->state = SD_EVENT_RUNNING;
+ r = source_dispatch(p);
+ e->state = SD_EVENT_INITIAL;
+ return r;
+ }
+
+ e->state = SD_EVENT_INITIAL;
+
+ return 1;
+}
+
+static void event_log_delays(sd_event *e) {
+ char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
+ size_t l, i;
+
+ p = b;
+ l = sizeof(b);
+ for (i = 0; i < ELEMENTSOF(e->delays); i++) {
+ l = strpcpyf(&p, l, "%u ", e->delays[i]);
+ e->delays[i] = 0;
+ }
+ log_debug("Event loop iterations: %s", b);
+}
+
+_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
+
+ if (e->profile_delays && e->last_run) {
+ usec_t this_run;
+ unsigned l;
+
+ this_run = now(CLOCK_MONOTONIC);
+
+ l = u64log2(this_run - e->last_run);
+ assert(l < ELEMENTSOF(e->delays));
+ e->delays[l]++;
+
+ if (this_run - e->last_log >= 5*USEC_PER_SEC) {
+ event_log_delays(e);
+ e->last_log = this_run;
+ }
+ }
+
+ r = sd_event_prepare(e);
+ if (r == 0)
+ /* There was nothing? Then wait... */
+ r = sd_event_wait(e, timeout);
+
+ if (e->profile_delays)
+ e->last_run = now(CLOCK_MONOTONIC);
+
+ if (r > 0) {
+ /* There's something now, then let's dispatch it */
+ r = sd_event_dispatch(e);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ return r;
+}
+
+_public_ int sd_event_loop(sd_event *e) {
+ _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+ assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
+
+ ref = sd_event_ref(e);
+
+ while (e->state != SD_EVENT_FINISHED) {
+ r = sd_event_run(e, (uint64_t) -1);
+ if (r < 0)
+ return r;
+ }
+
+ return e->exit_code;
+}
+
+_public_ int sd_event_get_fd(sd_event *e) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ return e->epoll_fd;
+}
+
+_public_ int sd_event_get_state(sd_event *e) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ return e->state;
+}
+
+_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(code, -EINVAL);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (!e->exit_requested)
+ return -ENODATA;
+
+ *code = e->exit_code;
+ return 0;
+}
+
+_public_ int sd_event_exit(sd_event *e, int code) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ e->exit_requested = true;
+ e->exit_code = code;
+
+ return 0;
+}
+
+_public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(usec, -EINVAL);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
+ return -EOPNOTSUPP;
+
+ /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
+ * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
+ * the purpose of getting the time this doesn't matter. */
+ if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
+ return -EOPNOTSUPP;
+
+ if (!triple_timestamp_is_set(&e->timestamp)) {
+ /* Implicitly fall back to now() if we never ran before and thus have no cached time. */
+ *usec = now(clock);
+ return 1;
+ }
+
+ *usec = triple_timestamp_by_clock(&e->timestamp, clock);
+ return 0;
+}
+
+_public_ int sd_event_default(sd_event **ret) {
+ sd_event *e = NULL;
+ int r;
+
+ if (!ret)
+ return !!default_event;
+
+ if (default_event) {
+ *ret = sd_event_ref(default_event);
+ return 0;
+ }
+
+ r = sd_event_new(&e);
+ if (r < 0)
+ return r;
+
+ e->default_event_ptr = &default_event;
+ e->tid = gettid();
+ default_event = e;
+
+ *ret = e;
+ return 1;
+}
+
+_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(tid, -EINVAL);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (e->tid != 0) {
+ *tid = e->tid;
+ return 0;
+ }
+
+ return -ENXIO;
+}
+
+_public_ int sd_event_set_watchdog(sd_event *e, int b) {
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (e->watchdog == !!b)
+ return e->watchdog;
+
+ if (b) {
+ r = sd_watchdog_enabled(false, &e->watchdog_period);
+ if (r <= 0)
+ return r;
+
+ /* Issue first ping immediately */
+ sd_notify(false, "WATCHDOG=1");
+ e->watchdog_last = now(CLOCK_MONOTONIC);
+
+ e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
+ if (e->watchdog_fd < 0)
+ return -errno;
+
+ r = arm_watchdog(e);
+ if (r < 0)
+ goto fail;
+
+ struct epoll_event ev = {
+ .events = EPOLLIN,
+ .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
+ };
+
+ if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ } else {
+ if (e->watchdog_fd >= 0) {
+ (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
+ e->watchdog_fd = safe_close(e->watchdog_fd);
+ }
+ }
+
+ e->watchdog = !!b;
+ return e->watchdog;
+
+fail:
+ e->watchdog_fd = safe_close(e->watchdog_fd);
+ return r;
+}
+
+_public_ int sd_event_get_watchdog(sd_event *e) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ return e->watchdog;
+}
+
+_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ *ret = e->iteration;
+ return 0;
+}
+
+_public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
+ assert_return(s, -EINVAL);
+
+ s->destroy_callback = callback;
+ return 0;
+}
+
+_public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
+ assert_return(s, -EINVAL);
+
+ if (ret)
+ *ret = s->destroy_callback;
+
+ return !!s->destroy_callback;
+}
+
+_public_ int sd_event_source_get_floating(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+
+ return s->floating;
+}
+
+_public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
+ assert_return(s, -EINVAL);
+
+ if (s->floating == !!b)
+ return 0;
+
+ if (!s->event) /* Already disconnected */
+ return -ESTALE;
+
+ s->floating = b;
+
+ if (b) {
+ sd_event_source_ref(s);
+ sd_event_unref(s->event);
+ } else {
+ sd_event_ref(s->event);
+ sd_event_source_unref(s);
+ }
+
+ return 1;
+}
+
+_public_ int sd_event_source_get_exit_on_failure(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type != SOURCE_EXIT, -EDOM);
+
+ return s->exit_on_failure;
+}
+
+_public_ int sd_event_source_set_exit_on_failure(sd_event_source *s, int b) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type != SOURCE_EXIT, -EDOM);
+
+ if (s->exit_on_failure == !!b)
+ return 0;
+
+ s->exit_on_failure = b;
+ return 1;
+}
diff --git a/src/libsystemd/sd-event/test-event.c b/src/libsystemd/sd-event/test-event.c
new file mode 100644
index 0000000..1c4d0e2
--- /dev/null
+++ b/src/libsystemd/sd-event/test-event.c
@@ -0,0 +1,607 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/wait.h>
+
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing_syscall.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "rm-rf.h"
+#include "signal-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+static int prepare_handler(sd_event_source *s, void *userdata) {
+ log_info("preparing %c", PTR_TO_INT(userdata));
+ return 1;
+}
+
+static bool got_a, got_b, got_c, got_unref;
+static unsigned got_d;
+
+static int unref_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ sd_event_source_unref(s);
+ got_unref = true;
+ return 0;
+}
+
+static int io_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+
+ log_info("got IO on %c", PTR_TO_INT(userdata));
+
+ if (userdata == INT_TO_PTR('a')) {
+ assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
+ assert_se(!got_a);
+ got_a = true;
+ } else if (userdata == INT_TO_PTR('b')) {
+ assert_se(!got_b);
+ got_b = true;
+ } else if (userdata == INT_TO_PTR('d')) {
+ got_d++;
+ if (got_d < 2)
+ assert_se(sd_event_source_set_enabled(s, SD_EVENT_ONESHOT) >= 0);
+ else
+ assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
+ } else
+ assert_not_reached("Yuck!");
+
+ return 1;
+}
+
+static int child_handler(sd_event_source *s, const siginfo_t *si, void *userdata) {
+
+ assert_se(s);
+ assert_se(si);
+
+ assert_se(si->si_uid == getuid());
+ assert_se(si->si_signo == SIGCHLD);
+ assert_se(si->si_code == CLD_EXITED);
+ assert_se(si->si_status == 78);
+
+ log_info("got child on %c", PTR_TO_INT(userdata));
+
+ assert_se(userdata == INT_TO_PTR('f'));
+
+ assert_se(sd_event_exit(sd_event_source_get_event(s), 0) >= 0);
+ sd_event_source_unref(s);
+
+ return 1;
+}
+
+static int signal_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ sd_event_source *p = NULL;
+ pid_t pid;
+ siginfo_t plain_si;
+
+ assert_se(s);
+ assert_se(si);
+
+ log_info("got signal on %c", PTR_TO_INT(userdata));
+
+ assert_se(userdata == INT_TO_PTR('e'));
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, SIGUSR2, -1) >= 0);
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ sigset_t ss;
+
+ assert_se(sigemptyset(&ss) >= 0);
+ assert_se(sigaddset(&ss, SIGUSR2) >= 0);
+
+ zero(plain_si);
+ assert_se(sigwaitinfo(&ss, &plain_si) >= 0);
+
+ assert_se(plain_si.si_signo == SIGUSR2);
+ assert_se(plain_si.si_value.sival_int == 4711);
+
+ _exit(78);
+ }
+
+ assert_se(sd_event_add_child(sd_event_source_get_event(s), &p, pid, WEXITED, child_handler, INT_TO_PTR('f')) >= 0);
+ assert_se(sd_event_source_set_enabled(p, SD_EVENT_ONESHOT) >= 0);
+ assert_se(sd_event_source_set_child_process_own(p, true) >= 0);
+
+ /* We can't use structured initialization here, since the structure contains various unions and these
+ * fields lie in overlapping (carefully aligned) unions that LLVM is allergic to allow assignments
+ * to */
+ zero(plain_si);
+ plain_si.si_signo = SIGUSR2;
+ plain_si.si_code = SI_QUEUE;
+ plain_si.si_pid = getpid();
+ plain_si.si_uid = getuid();
+ plain_si.si_value.sival_int = 4711;
+
+ assert_se(sd_event_source_send_child_signal(p, SIGUSR2, &plain_si, 0) >= 0);
+
+ sd_event_source_unref(s);
+
+ return 1;
+}
+
+static int defer_handler(sd_event_source *s, void *userdata) {
+ sd_event_source *p = NULL;
+
+ assert_se(s);
+
+ log_info("got defer on %c", PTR_TO_INT(userdata));
+
+ assert_se(userdata == INT_TO_PTR('d'));
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGUSR1, -1) >= 0);
+
+ assert_se(sd_event_add_signal(sd_event_source_get_event(s), &p, SIGUSR1, signal_handler, INT_TO_PTR('e')) >= 0);
+ assert_se(sd_event_source_set_enabled(p, SD_EVENT_ONESHOT) >= 0);
+ raise(SIGUSR1);
+
+ sd_event_source_unref(s);
+
+ return 1;
+}
+
+static bool do_quit;
+
+static int time_handler(sd_event_source *s, uint64_t usec, void *userdata) {
+ log_info("got timer on %c", PTR_TO_INT(userdata));
+
+ if (userdata == INT_TO_PTR('c')) {
+
+ if (do_quit) {
+ sd_event_source *p;
+
+ assert_se(sd_event_add_defer(sd_event_source_get_event(s), &p, defer_handler, INT_TO_PTR('d')) >= 0);
+ assert_se(sd_event_source_set_enabled(p, SD_EVENT_ONESHOT) >= 0);
+ } else {
+ assert_se(!got_c);
+ got_c = true;
+ }
+ } else
+ assert_not_reached("Huh?");
+
+ return 2;
+}
+
+static bool got_exit = false;
+
+static int exit_handler(sd_event_source *s, void *userdata) {
+ log_info("got quit handler on %c", PTR_TO_INT(userdata));
+
+ got_exit = true;
+
+ return 3;
+}
+
+static bool got_post = false;
+
+static int post_handler(sd_event_source *s, void *userdata) {
+ log_info("got post handler");
+
+ got_post = true;
+
+ return 2;
+}
+
+static void test_basic(bool with_pidfd) {
+ sd_event *e = NULL;
+ sd_event_source *w = NULL, *x = NULL, *y = NULL, *z = NULL, *q = NULL, *t = NULL;
+ static const char ch = 'x';
+ int a[2] = { -1, -1 }, b[2] = { -1, -1}, d[2] = { -1, -1}, k[2] = { -1, -1 };
+ uint64_t event_now;
+ int64_t priority;
+
+ assert_se(setenv("SYSTEMD_PIDFD", yes_no(with_pidfd), 1) >= 0);
+
+ assert_se(pipe(a) >= 0);
+ assert_se(pipe(b) >= 0);
+ assert_se(pipe(d) >= 0);
+ assert_se(pipe(k) >= 0);
+
+ assert_se(sd_event_default(&e) >= 0);
+ assert_se(sd_event_now(e, CLOCK_MONOTONIC, &event_now) > 0);
+
+ assert_se(sd_event_set_watchdog(e, true) >= 0);
+
+ /* Test whether we cleanly can destroy an io event source from its own handler */
+ got_unref = false;
+ assert_se(sd_event_add_io(e, &t, k[0], EPOLLIN, unref_handler, NULL) >= 0);
+ assert_se(write(k[1], &ch, 1) == 1);
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+ assert_se(got_unref);
+
+ got_a = false, got_b = false, got_c = false, got_d = 0;
+
+ /* Add a oneshot handler, trigger it, reenable it, and trigger
+ * it again. */
+ assert_se(sd_event_add_io(e, &w, d[0], EPOLLIN, io_handler, INT_TO_PTR('d')) >= 0);
+ assert_se(sd_event_source_set_enabled(w, SD_EVENT_ONESHOT) >= 0);
+ assert_se(write(d[1], &ch, 1) >= 0);
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+ assert_se(got_d == 1);
+ assert_se(write(d[1], &ch, 1) >= 0);
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+ assert_se(got_d == 2);
+
+ assert_se(sd_event_add_io(e, &x, a[0], EPOLLIN, io_handler, INT_TO_PTR('a')) >= 0);
+ assert_se(sd_event_add_io(e, &y, b[0], EPOLLIN, io_handler, INT_TO_PTR('b')) >= 0);
+
+ do_quit = false;
+ assert_se(sd_event_add_time(e, &z, CLOCK_MONOTONIC, 0, 0, time_handler, INT_TO_PTR('c')) >= 0);
+ assert_se(sd_event_add_exit(e, &q, exit_handler, INT_TO_PTR('g')) >= 0);
+
+ assert_se(sd_event_source_set_priority(x, 99) >= 0);
+ assert_se(sd_event_source_get_priority(x, &priority) >= 0);
+ assert_se(priority == 99);
+ assert_se(sd_event_source_set_enabled(y, SD_EVENT_ONESHOT) >= 0);
+ assert_se(sd_event_source_set_prepare(x, prepare_handler) >= 0);
+ assert_se(sd_event_source_set_priority(z, 50) >= 0);
+ assert_se(sd_event_source_set_enabled(z, SD_EVENT_ONESHOT) >= 0);
+ assert_se(sd_event_source_set_prepare(z, prepare_handler) >= 0);
+
+ /* Test for floating event sources */
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGRTMIN+1, -1) >= 0);
+ assert_se(sd_event_add_signal(e, NULL, SIGRTMIN+1, NULL, NULL) >= 0);
+
+ assert_se(write(a[1], &ch, 1) >= 0);
+ assert_se(write(b[1], &ch, 1) >= 0);
+
+ assert_se(!got_a && !got_b && !got_c);
+
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+
+ assert_se(!got_a && got_b && !got_c);
+
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+
+ assert_se(!got_a && got_b && got_c);
+
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+
+ assert_se(got_a && got_b && got_c);
+
+ sd_event_source_unref(x);
+ sd_event_source_unref(y);
+
+ do_quit = true;
+ assert_se(sd_event_add_post(e, NULL, post_handler, NULL) >= 0);
+ assert_se(sd_event_now(e, CLOCK_MONOTONIC, &event_now) == 0);
+ assert_se(sd_event_source_set_time(z, event_now + 200 * USEC_PER_MSEC) >= 0);
+ assert_se(sd_event_source_set_enabled(z, SD_EVENT_ONESHOT) >= 0);
+
+ assert_se(sd_event_loop(e) >= 0);
+ assert_se(got_post);
+ assert_se(got_exit);
+
+ sd_event_source_unref(z);
+ sd_event_source_unref(q);
+
+ sd_event_source_unref(w);
+
+ sd_event_unref(e);
+
+ safe_close_pair(a);
+ safe_close_pair(b);
+ safe_close_pair(d);
+ safe_close_pair(k);
+
+ assert_se(unsetenv("SYSTEMD_PIDFD") >= 0);
+}
+
+static void test_sd_event_now(void) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ uint64_t event_now;
+
+ assert_se(sd_event_new(&e) >= 0);
+ assert_se(sd_event_now(e, CLOCK_MONOTONIC, &event_now) > 0);
+ assert_se(sd_event_now(e, CLOCK_REALTIME, &event_now) > 0);
+ assert_se(sd_event_now(e, CLOCK_REALTIME_ALARM, &event_now) > 0);
+ if (clock_boottime_supported()) {
+ assert_se(sd_event_now(e, CLOCK_BOOTTIME, &event_now) > 0);
+ assert_se(sd_event_now(e, CLOCK_BOOTTIME_ALARM, &event_now) > 0);
+ }
+ assert_se(sd_event_now(e, -1, &event_now) == -EOPNOTSUPP);
+ assert_se(sd_event_now(e, 900 /* arbitrary big number */, &event_now) == -EOPNOTSUPP);
+
+ assert_se(sd_event_run(e, 0) == 0);
+
+ assert_se(sd_event_now(e, CLOCK_MONOTONIC, &event_now) == 0);
+ assert_se(sd_event_now(e, CLOCK_REALTIME, &event_now) == 0);
+ assert_se(sd_event_now(e, CLOCK_REALTIME_ALARM, &event_now) == 0);
+ if (clock_boottime_supported()) {
+ assert_se(sd_event_now(e, CLOCK_BOOTTIME, &event_now) == 0);
+ assert_se(sd_event_now(e, CLOCK_BOOTTIME_ALARM, &event_now) == 0);
+ }
+ assert_se(sd_event_now(e, -1, &event_now) == -EOPNOTSUPP);
+ assert_se(sd_event_now(e, 900 /* arbitrary big number */, &event_now) == -EOPNOTSUPP);
+}
+
+static int last_rtqueue_sigval = 0;
+static int n_rtqueue = 0;
+
+static int rtqueue_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ last_rtqueue_sigval = si->ssi_int;
+ n_rtqueue++;
+ return 0;
+}
+
+static void test_rtqueue(void) {
+ sd_event_source *u = NULL, *v = NULL, *s = NULL;
+ sd_event *e = NULL;
+
+ assert_se(sd_event_default(&e) >= 0);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGRTMIN+2, SIGRTMIN+3, SIGUSR2, -1) >= 0);
+ assert_se(sd_event_add_signal(e, &u, SIGRTMIN+2, rtqueue_handler, NULL) >= 0);
+ assert_se(sd_event_add_signal(e, &v, SIGRTMIN+3, rtqueue_handler, NULL) >= 0);
+ assert_se(sd_event_add_signal(e, &s, SIGUSR2, rtqueue_handler, NULL) >= 0);
+
+ assert_se(sd_event_source_set_priority(v, -10) >= 0);
+
+ assert_se(sigqueue(getpid_cached(), SIGRTMIN+2, (union sigval) { .sival_int = 1 }) >= 0);
+ assert_se(sigqueue(getpid_cached(), SIGRTMIN+3, (union sigval) { .sival_int = 2 }) >= 0);
+ assert_se(sigqueue(getpid_cached(), SIGUSR2, (union sigval) { .sival_int = 3 }) >= 0);
+ assert_se(sigqueue(getpid_cached(), SIGRTMIN+3, (union sigval) { .sival_int = 4 }) >= 0);
+ assert_se(sigqueue(getpid_cached(), SIGUSR2, (union sigval) { .sival_int = 5 }) >= 0);
+
+ assert_se(n_rtqueue == 0);
+ assert_se(last_rtqueue_sigval == 0);
+
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+ assert_se(n_rtqueue == 1);
+ assert_se(last_rtqueue_sigval == 2); /* first SIGRTMIN+3 */
+
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+ assert_se(n_rtqueue == 2);
+ assert_se(last_rtqueue_sigval == 4); /* second SIGRTMIN+3 */
+
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+ assert_se(n_rtqueue == 3);
+ assert_se(last_rtqueue_sigval == 3); /* first SIGUSR2 */
+
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+ assert_se(n_rtqueue == 4);
+ assert_se(last_rtqueue_sigval == 1); /* SIGRTMIN+2 */
+
+ assert_se(sd_event_run(e, 0) == 0); /* the other SIGUSR2 is dropped, because the first one was still queued */
+ assert_se(n_rtqueue == 4);
+ assert_se(last_rtqueue_sigval == 1);
+
+ sd_event_source_unref(u);
+ sd_event_source_unref(v);
+ sd_event_source_unref(s);
+
+ sd_event_unref(e);
+}
+
+#define CREATE_EVENTS_MAX (70000U)
+
+struct inotify_context {
+ bool delete_self_handler_called;
+ unsigned create_called[CREATE_EVENTS_MAX];
+ unsigned create_overflow;
+ unsigned n_create_events;
+};
+
+static void maybe_exit(sd_event_source *s, struct inotify_context *c) {
+ unsigned n;
+
+ assert(s);
+ assert(c);
+
+ if (!c->delete_self_handler_called)
+ return;
+
+ for (n = 0; n < 3; n++) {
+ unsigned i;
+
+ if (c->create_overflow & (1U << n))
+ continue;
+
+ for (i = 0; i < c->n_create_events; i++)
+ if (!(c->create_called[i] & (1U << n)))
+ return;
+ }
+
+ sd_event_exit(sd_event_source_get_event(s), 0);
+}
+
+static int inotify_handler(sd_event_source *s, const struct inotify_event *ev, void *userdata) {
+ struct inotify_context *c = userdata;
+ const char *description;
+ unsigned bit, n;
+
+ assert_se(sd_event_source_get_description(s, &description) >= 0);
+ assert_se(safe_atou(description, &n) >= 0);
+
+ assert_se(n <= 3);
+ bit = 1U << n;
+
+ if (ev->mask & IN_Q_OVERFLOW) {
+ log_info("inotify-handler <%s>: overflow", description);
+ c->create_overflow |= bit;
+ } else if (ev->mask & IN_CREATE) {
+ unsigned i;
+
+ log_debug("inotify-handler <%s>: create on %s", description, ev->name);
+
+ if (!streq(ev->name, "sub")) {
+ assert_se(safe_atou(ev->name, &i) >= 0);
+
+ assert_se(i < c->n_create_events);
+ c->create_called[i] |= bit;
+ }
+ } else if (ev->mask & IN_DELETE) {
+ log_info("inotify-handler <%s>: delete of %s", description, ev->name);
+ assert_se(streq(ev->name, "sub"));
+ } else
+ assert_not_reached("unexpected inotify event");
+
+ maybe_exit(s, c);
+ return 1;
+}
+
+static int delete_self_handler(sd_event_source *s, const struct inotify_event *ev, void *userdata) {
+ struct inotify_context *c = userdata;
+
+ if (ev->mask & IN_Q_OVERFLOW) {
+ log_info("delete-self-handler: overflow");
+ c->delete_self_handler_called = true;
+ } else if (ev->mask & IN_DELETE_SELF) {
+ log_info("delete-self-handler: delete-self");
+ c->delete_self_handler_called = true;
+ } else if (ev->mask & IN_IGNORED) {
+ log_info("delete-self-handler: ignore");
+ } else
+ assert_not_reached("unexpected inotify event (delete-self)");
+
+ maybe_exit(s, c);
+ return 1;
+}
+
+static void test_inotify(unsigned n_create_events) {
+ _cleanup_(rm_rf_physical_and_freep) char *p = NULL;
+ sd_event_source *a = NULL, *b = NULL, *c = NULL, *d = NULL;
+ struct inotify_context context = {
+ .n_create_events = n_create_events,
+ };
+ sd_event *e = NULL;
+ const char *q;
+ unsigned i;
+
+ assert_se(sd_event_default(&e) >= 0);
+
+ assert_se(mkdtemp_malloc("/tmp/test-inotify-XXXXXX", &p) >= 0);
+
+ assert_se(sd_event_add_inotify(e, &a, p, IN_CREATE|IN_ONLYDIR, inotify_handler, &context) >= 0);
+ assert_se(sd_event_add_inotify(e, &b, p, IN_CREATE|IN_DELETE|IN_DONT_FOLLOW, inotify_handler, &context) >= 0);
+ assert_se(sd_event_source_set_priority(b, SD_EVENT_PRIORITY_IDLE) >= 0);
+ assert_se(sd_event_source_set_priority(b, SD_EVENT_PRIORITY_NORMAL) >= 0);
+ assert_se(sd_event_add_inotify(e, &c, p, IN_CREATE|IN_DELETE|IN_EXCL_UNLINK, inotify_handler, &context) >= 0);
+ assert_se(sd_event_source_set_priority(c, SD_EVENT_PRIORITY_IDLE) >= 0);
+
+ assert_se(sd_event_source_set_description(a, "0") >= 0);
+ assert_se(sd_event_source_set_description(b, "1") >= 0);
+ assert_se(sd_event_source_set_description(c, "2") >= 0);
+
+ q = strjoina(p, "/sub");
+ assert_se(touch(q) >= 0);
+ assert_se(sd_event_add_inotify(e, &d, q, IN_DELETE_SELF, delete_self_handler, &context) >= 0);
+
+ for (i = 0; i < n_create_events; i++) {
+ char buf[DECIMAL_STR_MAX(unsigned)+1];
+ _cleanup_free_ char *z;
+
+ xsprintf(buf, "%u", i);
+ assert_se(z = path_join(p, buf));
+
+ assert_se(touch(z) >= 0);
+ }
+
+ assert_se(unlink(q) >= 0);
+
+ assert_se(sd_event_loop(e) >= 0);
+
+ sd_event_source_unref(a);
+ sd_event_source_unref(b);
+ sd_event_source_unref(c);
+ sd_event_source_unref(d);
+
+ sd_event_unref(e);
+}
+
+static int pidfd_handler(sd_event_source *s, const siginfo_t *si, void *userdata) {
+ assert_se(s);
+ assert_se(si);
+
+ assert_se(si->si_uid == getuid());
+ assert_se(si->si_signo == SIGCHLD);
+ assert_se(si->si_code == CLD_EXITED);
+ assert_se(si->si_status == 66);
+
+ log_info("got pidfd on %c", PTR_TO_INT(userdata));
+
+ assert_se(userdata == INT_TO_PTR('p'));
+
+ assert_se(sd_event_exit(sd_event_source_get_event(s), 0) >= 0);
+ sd_event_source_unref(s);
+
+ return 0;
+}
+
+static void test_pidfd(void) {
+ sd_event_source *s = NULL, *t = NULL;
+ sd_event *e = NULL;
+ int pidfd;
+ pid_t pid, pid2;
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, -1) >= 0);
+
+ pid = fork();
+ if (pid == 0)
+ /* child */
+ _exit(66);
+
+ assert_se(pid > 1);
+
+ pidfd = pidfd_open(pid, 0);
+ if (pidfd < 0) {
+ /* No pidfd_open() supported or blocked? */
+ assert_se(ERRNO_IS_NOT_SUPPORTED(errno) || ERRNO_IS_PRIVILEGE(errno));
+ (void) wait_for_terminate(pid, NULL);
+ return;
+ }
+
+ pid2 = fork();
+ if (pid2 == 0)
+ freeze();
+
+ assert_se(pid > 2);
+
+ assert_se(sd_event_default(&e) >= 0);
+ assert_se(sd_event_add_child_pidfd(e, &s, pidfd, WEXITED, pidfd_handler, INT_TO_PTR('p')) >= 0);
+ assert_se(sd_event_source_set_child_pidfd_own(s, true) >= 0);
+
+ /* This one should never trigger, since our second child lives forever */
+ assert_se(sd_event_add_child(e, &t, pid2, WEXITED, pidfd_handler, INT_TO_PTR('q')) >= 0);
+ assert_se(sd_event_source_set_child_process_own(t, true) >= 0);
+
+ assert_se(sd_event_loop(e) >= 0);
+
+ /* Child should still be alive */
+ assert_se(kill(pid2, 0) >= 0);
+
+ t = sd_event_source_unref(t);
+
+ /* Child should now be dead, since we dropped the ref */
+ assert_se(kill(pid2, 0) < 0 && errno == ESRCH);
+
+ sd_event_unref(e);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_INFO);
+
+ test_basic(true); /* test with pidfd */
+ test_basic(false); /* test without pidfd */
+
+ test_sd_event_now();
+ test_rtqueue();
+
+ test_inotify(100); /* should work without overflow */
+ test_inotify(33000); /* should trigger a q overflow */
+
+ test_pidfd();
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-hwdb/hwdb-internal.h b/src/libsystemd/sd-hwdb/hwdb-internal.h
new file mode 100644
index 0000000..5c20688
--- /dev/null
+++ b/src/libsystemd/sd-hwdb/hwdb-internal.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdint.h>
+
+#include "sparse-endian.h"
+
+#define HWDB_SIG { 'K', 'S', 'L', 'P', 'H', 'H', 'R', 'H' }
+
+/* on-disk trie objects */
+struct trie_header_f {
+ uint8_t signature[8];
+
+ /* version of tool which created the file */
+ le64_t tool_version;
+ le64_t file_size;
+
+ /* size of structures to allow them to grow */
+ le64_t header_size;
+ le64_t node_size;
+ le64_t child_entry_size;
+ le64_t value_entry_size;
+
+ /* offset of the root trie node */
+ le64_t nodes_root_off;
+
+ /* size of the nodes and string section */
+ le64_t nodes_len;
+ le64_t strings_len;
+} _packed_;
+
+struct trie_node_f {
+ /* prefix of lookup string, shared by all children */
+ le64_t prefix_off;
+ /* size of children entry array appended to the node */
+ uint8_t children_count;
+ uint8_t padding[7];
+ /* size of value entry array appended to the node */
+ le64_t values_count;
+} _packed_;
+
+/* array of child entries, follows directly the node record */
+struct trie_child_entry_f {
+ /* index of the child node */
+ uint8_t c;
+ uint8_t padding[7];
+ /* offset of the child node */
+ le64_t child_off;
+} _packed_;
+
+/* array of value entries, follows directly the node record/child array */
+struct trie_value_entry_f {
+ le64_t key_off;
+ le64_t value_off;
+} _packed_;
+
+/* v2 extends v1 with filename and line-number */
+struct trie_value_entry2_f {
+ le64_t key_off;
+ le64_t value_off;
+ le64_t filename_off;
+ le32_t line_number;
+ le16_t file_priority;
+ le16_t padding;
+} _packed_;
diff --git a/src/libsystemd/sd-hwdb/hwdb-util.c b/src/libsystemd/sd-hwdb/hwdb-util.c
new file mode 100644
index 0000000..4c94ba9
--- /dev/null
+++ b/src/libsystemd/sd-hwdb/hwdb-util.c
@@ -0,0 +1,668 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <sys/stat.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hwdb-internal.h"
+#include "hwdb-util.h"
+#include "label.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "sort-util.h"
+#include "strbuf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+
+static const char *default_hwdb_bin_dir = "/etc/udev";
+static const char * const conf_file_dirs[] = {
+ "/etc/udev/hwdb.d",
+ UDEVLIBEXECDIR "/hwdb.d",
+ NULL
+};
+
+/*
+ * Generic udev properties, key-value database based on modalias strings.
+ * Uses a Patricia/radix trie to index all matches for efficient lookup.
+ */
+
+/* in-memory trie objects */
+struct trie {
+ struct trie_node *root;
+ struct strbuf *strings;
+
+ size_t nodes_count;
+ size_t children_count;
+ size_t values_count;
+};
+
+struct trie_node {
+ /* prefix, common part for all children of this node */
+ size_t prefix_off;
+
+ /* sorted array of pointers to children nodes */
+ struct trie_child_entry *children;
+ uint8_t children_count;
+
+ /* sorted array of key-value pairs */
+ struct trie_value_entry *values;
+ size_t values_count;
+};
+
+/* children array item with char (0-255) index */
+struct trie_child_entry {
+ uint8_t c;
+ struct trie_node *child;
+};
+
+/* value array item with key-value pairs */
+struct trie_value_entry {
+ size_t key_off;
+ size_t value_off;
+ size_t filename_off;
+ uint32_t line_number;
+ uint16_t file_priority;
+};
+
+static int trie_children_cmp(const struct trie_child_entry *a, const struct trie_child_entry *b) {
+ return CMP(a->c, b->c);
+}
+
+static int node_add_child(struct trie *trie, struct trie_node *node, struct trie_node *node_child, uint8_t c) {
+ struct trie_child_entry *child;
+
+ /* extend array, add new entry, sort for bisection */
+ child = reallocarray(node->children, node->children_count + 1, sizeof(struct trie_child_entry));
+ if (!child)
+ return -ENOMEM;
+
+ node->children = child;
+ trie->children_count++;
+ node->children[node->children_count].c = c;
+ node->children[node->children_count].child = node_child;
+ node->children_count++;
+ typesafe_qsort(node->children, node->children_count, trie_children_cmp);
+ trie->nodes_count++;
+
+ return 0;
+}
+
+static struct trie_node *node_lookup(const struct trie_node *node, uint8_t c) {
+ struct trie_child_entry *child;
+ struct trie_child_entry search;
+
+ search.c = c;
+ child = typesafe_bsearch(&search, node->children, node->children_count, trie_children_cmp);
+ if (child)
+ return child->child;
+ return NULL;
+}
+
+static void trie_node_cleanup(struct trie_node *node) {
+ if (!node)
+ return;
+
+ for (size_t i = 0; i < node->children_count; i++)
+ trie_node_cleanup(node->children[i].child);
+ free(node->children);
+ free(node->values);
+ free(node);
+}
+
+static void trie_free(struct trie *trie) {
+ if (!trie)
+ return;
+
+ trie_node_cleanup(trie->root);
+ strbuf_cleanup(trie->strings);
+ free(trie);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct trie*, trie_free);
+
+static int trie_values_cmp(const struct trie_value_entry *a, const struct trie_value_entry *b, struct trie *trie) {
+ return strcmp(trie->strings->buf + a->key_off,
+ trie->strings->buf + b->key_off);
+}
+
+static int trie_node_add_value(struct trie *trie, struct trie_node *node,
+ const char *key, const char *value,
+ const char *filename, uint16_t file_priority, uint32_t line_number, bool compat) {
+ ssize_t k, v, fn = 0;
+ struct trie_value_entry *val;
+
+ k = strbuf_add_string(trie->strings, key, strlen(key));
+ if (k < 0)
+ return k;
+ v = strbuf_add_string(trie->strings, value, strlen(value));
+ if (v < 0)
+ return v;
+
+ if (!compat) {
+ fn = strbuf_add_string(trie->strings, filename, strlen(filename));
+ if (fn < 0)
+ return fn;
+ }
+
+ if (node->values_count) {
+ struct trie_value_entry search = {
+ .key_off = k,
+ .value_off = v,
+ };
+
+ val = typesafe_bsearch_r(&search, node->values, node->values_count, trie_values_cmp, trie);
+ if (val) {
+ /* At this point we have 2 identical properties on the same match-string.
+ * Since we process files in order, we just replace the previous value. */
+ val->value_off = v;
+ val->filename_off = fn;
+ val->file_priority = file_priority;
+ val->line_number = line_number;
+ return 0;
+ }
+ }
+
+ /* extend array, add new entry, sort for bisection */
+ val = reallocarray(node->values, node->values_count + 1, sizeof(struct trie_value_entry));
+ if (!val)
+ return -ENOMEM;
+ trie->values_count++;
+ node->values = val;
+ node->values[node->values_count] = (struct trie_value_entry) {
+ .key_off = k,
+ .value_off = v,
+ .filename_off = fn,
+ .file_priority = file_priority,
+ .line_number = line_number,
+ };
+ node->values_count++;
+ typesafe_qsort_r(node->values, node->values_count, trie_values_cmp, trie);
+ return 0;
+}
+
+static int trie_insert(struct trie *trie, struct trie_node *node, const char *search,
+ const char *key, const char *value,
+ const char *filename, uint16_t file_priority, uint32_t line_number, bool compat) {
+ int r = 0;
+
+ for (size_t i = 0;; i++) {
+ size_t p;
+ uint8_t c;
+ struct trie_node *child;
+
+ for (p = 0; (c = trie->strings->buf[node->prefix_off + p]); p++) {
+ _cleanup_free_ struct trie_node *new_child = NULL;
+ _cleanup_free_ char *s = NULL;
+ ssize_t off;
+
+ if (c == search[i + p])
+ continue;
+
+ /* split node */
+ new_child = new(struct trie_node, 1);
+ if (!new_child)
+ return -ENOMEM;
+
+ /* move values from parent to child */
+ *new_child = (struct trie_node) {
+ .prefix_off = node->prefix_off + p+1,
+ .children = node->children,
+ .children_count = node->children_count,
+ .values = node->values,
+ .values_count = node->values_count,
+ };
+
+ /* update parent; use strdup() because the source gets realloc()d */
+ s = strndup(trie->strings->buf + node->prefix_off, p);
+ if (!s)
+ return -ENOMEM;
+
+ off = strbuf_add_string(trie->strings, s, p);
+ if (off < 0)
+ return off;
+
+ *node = (struct trie_node) {
+ .prefix_off = off,
+ };
+ r = node_add_child(trie, node, new_child, c);
+ if (r < 0)
+ return r;
+
+ new_child = NULL; /* avoid cleanup */
+ break;
+ }
+ i += p;
+
+ c = search[i];
+ if (c == '\0')
+ return trie_node_add_value(trie, node, key, value, filename, file_priority, line_number, compat);
+
+ child = node_lookup(node, c);
+ if (!child) {
+ _cleanup_free_ struct trie_node *new_child = NULL;
+ ssize_t off;
+
+ /* new child */
+ new_child = new(struct trie_node, 1);
+ if (!new_child)
+ return -ENOMEM;
+
+ off = strbuf_add_string(trie->strings, search + i+1, strlen(search + i+1));
+ if (off < 0)
+ return off;
+
+ *new_child = (struct trie_node) {
+ .prefix_off = off,
+ };
+
+ r = node_add_child(trie, node, new_child, c);
+ if (r < 0)
+ return r;
+
+ child = TAKE_PTR(new_child);
+ return trie_node_add_value(trie, child, key, value, filename, file_priority, line_number, compat);
+ }
+
+ node = child;
+ }
+}
+
+struct trie_f {
+ FILE *f;
+ struct trie *trie;
+ uint64_t strings_off;
+
+ uint64_t nodes_count;
+ uint64_t children_count;
+ uint64_t values_count;
+};
+
+/* calculate the storage space for the nodes, children arrays, value arrays */
+static void trie_store_nodes_size(struct trie_f *trie, struct trie_node *node, bool compat) {
+ for (uint64_t i = 0; i < node->children_count; i++)
+ trie_store_nodes_size(trie, node->children[i].child, compat);
+
+ trie->strings_off += sizeof(struct trie_node_f);
+ for (uint64_t i = 0; i < node->children_count; i++)
+ trie->strings_off += sizeof(struct trie_child_entry_f);
+ for (uint64_t i = 0; i < node->values_count; i++)
+ trie->strings_off += compat ? sizeof(struct trie_value_entry_f) : sizeof(struct trie_value_entry2_f);
+}
+
+static int64_t trie_store_nodes(struct trie_f *trie, struct trie_node *node, bool compat) {
+ struct trie_node_f n = {
+ .prefix_off = htole64(trie->strings_off + node->prefix_off),
+ .children_count = node->children_count,
+ .values_count = htole64(node->values_count),
+ };
+ _cleanup_free_ struct trie_child_entry_f *children = NULL;
+ int64_t node_off;
+
+ if (node->children_count) {
+ children = new(struct trie_child_entry_f, node->children_count);
+ if (!children)
+ return -ENOMEM;
+ }
+
+ /* post-order recursion */
+ for (uint64_t i = 0; i < node->children_count; i++) {
+ int64_t child_off;
+
+ child_off = trie_store_nodes(trie, node->children[i].child, compat);
+ if (child_off < 0)
+ return child_off;
+
+ children[i] = (struct trie_child_entry_f) {
+ .c = node->children[i].c,
+ .child_off = htole64(child_off),
+ };
+ }
+
+ /* write node */
+ node_off = ftello(trie->f);
+ fwrite(&n, sizeof(struct trie_node_f), 1, trie->f);
+ trie->nodes_count++;
+
+ /* append children array */
+ if (node->children_count) {
+ fwrite(children, sizeof(struct trie_child_entry_f), node->children_count, trie->f);
+ trie->children_count += node->children_count;
+ }
+
+ /* append values array */
+ for (uint64_t i = 0; i < node->values_count; i++) {
+ struct trie_value_entry2_f v = {
+ .key_off = htole64(trie->strings_off + node->values[i].key_off),
+ .value_off = htole64(trie->strings_off + node->values[i].value_off),
+ .filename_off = htole64(trie->strings_off + node->values[i].filename_off),
+ .line_number = htole32(node->values[i].line_number),
+ .file_priority = htole16(node->values[i].file_priority),
+ };
+
+ fwrite(&v, compat ? sizeof(struct trie_value_entry_f) : sizeof(struct trie_value_entry2_f), 1, trie->f);
+ }
+ trie->values_count += node->values_count;
+
+ return node_off;
+}
+
+static int trie_store(struct trie *trie, const char *filename, bool compat) {
+ struct trie_f t = {
+ .trie = trie,
+ };
+ _cleanup_free_ char *filename_tmp = NULL;
+ int64_t pos;
+ int64_t root_off;
+ int64_t size;
+ struct trie_header_f h = {
+ .signature = HWDB_SIG,
+ .tool_version = htole64(PROJECT_VERSION),
+ .header_size = htole64(sizeof(struct trie_header_f)),
+ .node_size = htole64(sizeof(struct trie_node_f)),
+ .child_entry_size = htole64(sizeof(struct trie_child_entry_f)),
+ .value_entry_size = htole64(compat ? sizeof(struct trie_value_entry_f) : sizeof(struct trie_value_entry2_f)),
+ };
+ int r;
+
+ /* calculate size of header, nodes, children entries, value entries */
+ t.strings_off = sizeof(struct trie_header_f);
+ trie_store_nodes_size(&t, trie->root, compat);
+
+ r = fopen_temporary(filename, &t.f, &filename_tmp);
+ if (r < 0)
+ return r;
+ fchmod(fileno(t.f), 0444);
+
+ /* write nodes */
+ if (fseeko(t.f, sizeof(struct trie_header_f), SEEK_SET) < 0)
+ goto error_fclose;
+
+ root_off = trie_store_nodes(&t, trie->root, compat);
+ h.nodes_root_off = htole64(root_off);
+ pos = ftello(t.f);
+ h.nodes_len = htole64(pos - sizeof(struct trie_header_f));
+
+ /* write string buffer */
+ fwrite(trie->strings->buf, trie->strings->len, 1, t.f);
+ h.strings_len = htole64(trie->strings->len);
+
+ /* write header */
+ size = ftello(t.f);
+ h.file_size = htole64(size);
+ if (fseeko(t.f, 0, SEEK_SET) < 0)
+ goto error_fclose;
+ fwrite(&h, sizeof(struct trie_header_f), 1, t.f);
+
+ if (ferror(t.f))
+ goto error_fclose;
+ if (fflush(t.f) < 0)
+ goto error_fclose;
+ if (fsync(fileno(t.f)) < 0)
+ goto error_fclose;
+ if (rename(filename_tmp, filename) < 0)
+ goto error_fclose;
+
+ /* write succeeded */
+ fclose(t.f);
+
+ log_debug("=== trie on-disk ===");
+ log_debug("size: %8"PRIi64" bytes", size);
+ log_debug("header: %8zu bytes", sizeof(struct trie_header_f));
+ log_debug("nodes: %8"PRIu64" bytes (%8"PRIu64")",
+ t.nodes_count * sizeof(struct trie_node_f), t.nodes_count);
+ log_debug("child pointers: %8"PRIu64" bytes (%8"PRIu64")",
+ t.children_count * sizeof(struct trie_child_entry_f), t.children_count);
+ log_debug("value pointers: %8"PRIu64" bytes (%8"PRIu64")",
+ t.values_count * (compat ? sizeof(struct trie_value_entry_f) : sizeof(struct trie_value_entry2_f)), t.values_count);
+ log_debug("string store: %8zu bytes", trie->strings->len);
+ log_debug("strings start: %8"PRIu64, t.strings_off);
+ return 0;
+
+ error_fclose:
+ r = -errno;
+ fclose(t.f);
+ (void) unlink(filename_tmp);
+ return r;
+}
+
+static int insert_data(struct trie *trie, char **match_list, char *line, const char *filename,
+ uint16_t file_priority, uint32_t line_number, bool compat) {
+ char *value, **entry;
+
+ assert(line[0] == ' ');
+
+ value = strchr(line, '=');
+ if (!value)
+ return log_syntax(NULL, LOG_WARNING, filename, line_number, SYNTHETIC_ERRNO(EINVAL),
+ "Key-value pair expected but got \"%s\", ignoring.", line);
+
+ value[0] = '\0';
+ value++;
+
+ /* Replace multiple leading spaces by a single space */
+ while (isblank(line[0]) && isblank(line[1]))
+ line++;
+
+ if (isempty(line + 1))
+ return log_syntax(NULL, LOG_WARNING, filename, line_number, SYNTHETIC_ERRNO(EINVAL),
+ "Empty key in \"%s=%s\", ignoring.",
+ line, value);
+
+ STRV_FOREACH(entry, match_list)
+ trie_insert(trie, trie->root, *entry, line, value, filename, file_priority, line_number, compat);
+
+ return 0;
+}
+
+static int import_file(struct trie *trie, const char *filename, uint16_t file_priority, bool compat) {
+ enum {
+ HW_NONE,
+ HW_MATCH,
+ HW_DATA,
+ } state = HW_NONE;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **match_list = NULL;
+ uint32_t line_number = 0;
+ int r, err;
+
+ f = fopen(filename, "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ size_t len;
+ char *pos;
+
+ r = read_line_full(f, LONG_LINE_MAX, READ_LINE_NOT_A_TTY, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ line_number ++;
+
+ /* comment line */
+ if (line[0] == '#')
+ continue;
+
+ /* strip trailing comment */
+ pos = strchr(line, '#');
+ if (pos)
+ pos[0] = '\0';
+
+ /* strip trailing whitespace */
+ len = strlen(line);
+ while (len > 0 && isspace(line[len-1]))
+ len--;
+ line[len] = '\0';
+
+ switch (state) {
+ case HW_NONE:
+ if (len == 0)
+ break;
+
+ if (line[0] == ' ') {
+ r = log_syntax(NULL, LOG_WARNING, filename, line_number, SYNTHETIC_ERRNO(EINVAL),
+ "Match expected but got indented property \"%s\", ignoring line.", line);
+ break;
+ }
+
+ /* start of record, first match */
+ state = HW_MATCH;
+
+ err = strv_extend(&match_list, line);
+ if (err < 0)
+ return err;
+
+ break;
+
+ case HW_MATCH:
+ if (len == 0) {
+ r = log_syntax(NULL, LOG_WARNING, filename, line_number, SYNTHETIC_ERRNO(EINVAL),
+ "Property expected, ignoring record with no properties.");
+ state = HW_NONE;
+ match_list = strv_free(match_list);
+ break;
+ }
+
+ if (line[0] != ' ') {
+ /* another match */
+ err = strv_extend(&match_list, line);
+ if (err < 0)
+ return err;
+
+ break;
+ }
+
+ /* first data */
+ state = HW_DATA;
+ err = insert_data(trie, match_list, line, filename, file_priority, line_number, compat);
+ if (err < 0)
+ r = err;
+ break;
+
+ case HW_DATA:
+ if (len == 0) {
+ /* end of record */
+ state = HW_NONE;
+ match_list = strv_free(match_list);
+ break;
+ }
+
+ if (line[0] != ' ') {
+ r = log_syntax(NULL, LOG_WARNING, filename, line_number, SYNTHETIC_ERRNO(EINVAL),
+ "Property or empty line expected, got \"%s\", ignoring record.", line);
+ state = HW_NONE;
+ match_list = strv_free(match_list);
+ break;
+ }
+
+ err = insert_data(trie, match_list, line, filename, file_priority, line_number, compat);
+ if (err < 0)
+ r = err;
+ break;
+ };
+ }
+
+ if (state == HW_MATCH)
+ log_syntax(NULL, LOG_WARNING, filename, line_number, 0,
+ "Property expected, ignoring record with no properties.");
+
+ return r;
+}
+
+int hwdb_update(const char *root, const char *hwdb_bin_dir, bool strict, bool compat) {
+ _cleanup_free_ char *hwdb_bin = NULL;
+ _cleanup_(trie_freep) struct trie *trie = NULL;
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+ uint16_t file_priority = 1;
+ int r = 0, err;
+
+ /* The argument 'compat' controls the format version of database. If false, then hwdb.bin will be created with
+ * additional information such that priority, line number, and filename of database source. If true, then hwdb.bin
+ * will be created without the information. systemd-hwdb command should set the argument false, and 'udevadm hwdb'
+ * command should set it true. */
+
+ trie = new0(struct trie, 1);
+ if (!trie)
+ return -ENOMEM;
+
+ /* string store */
+ trie->strings = strbuf_new();
+ if (!trie->strings)
+ return -ENOMEM;
+
+ /* index */
+ trie->root = new0(struct trie_node, 1);
+ if (!trie->root)
+ return -ENOMEM;
+
+ trie->nodes_count++;
+
+ err = conf_files_list_strv(&files, ".hwdb", root, 0, conf_file_dirs);
+ if (err < 0)
+ return log_error_errno(err, "Failed to enumerate hwdb files: %m");
+
+ STRV_FOREACH(f, files) {
+ log_debug("Reading file \"%s\"", *f);
+ err = import_file(trie, *f, file_priority++, compat);
+ if (err < 0 && strict)
+ r = err;
+ }
+
+ strbuf_complete(trie->strings);
+
+ log_debug("=== trie in-memory ===");
+ log_debug("nodes: %8zu bytes (%8zu)",
+ trie->nodes_count * sizeof(struct trie_node), trie->nodes_count);
+ log_debug("children arrays: %8zu bytes (%8zu)",
+ trie->children_count * sizeof(struct trie_child_entry), trie->children_count);
+ log_debug("values arrays: %8zu bytes (%8zu)",
+ trie->values_count * sizeof(struct trie_value_entry), trie->values_count);
+ log_debug("strings: %8zu bytes",
+ trie->strings->len);
+ log_debug("strings incoming: %8zu bytes (%8zu)",
+ trie->strings->in_len, trie->strings->in_count);
+ log_debug("strings dedup'ed: %8zu bytes (%8zu)",
+ trie->strings->dedup_len, trie->strings->dedup_count);
+
+ hwdb_bin = path_join(root, hwdb_bin_dir ?: default_hwdb_bin_dir, "hwdb.bin");
+ if (!hwdb_bin)
+ return -ENOMEM;
+
+ mkdir_parents_label(hwdb_bin, 0755);
+ err = trie_store(trie, hwdb_bin, compat);
+ if (err < 0)
+ return log_error_errno(err, "Failed to write database %s: %m", hwdb_bin);
+
+ err = label_fix(hwdb_bin, 0);
+ if (err < 0)
+ return err;
+
+ return r;
+}
+
+int hwdb_query(const char *modalias) {
+ _cleanup_(sd_hwdb_unrefp) sd_hwdb *hwdb = NULL;
+ const char *key, *value;
+ int r;
+
+ assert(modalias);
+
+ r = sd_hwdb_new(&hwdb);
+ if (r < 0)
+ return r;
+
+ SD_HWDB_FOREACH_PROPERTY(hwdb, modalias, key, value)
+ printf("%s=%s\n", key, value);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-hwdb/hwdb-util.h b/src/libsystemd/sd-hwdb/hwdb-util.h
new file mode 100644
index 0000000..5afde74
--- /dev/null
+++ b/src/libsystemd/sd-hwdb/hwdb-util.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-hwdb.h"
+
+bool hwdb_validate(sd_hwdb *hwdb);
+int hwdb_update(const char *root, const char *hwdb_bin_dir, bool strict, bool compat);
+int hwdb_query(const char *modalias);
diff --git a/src/libsystemd/sd-hwdb/sd-hwdb.c b/src/libsystemd/sd-hwdb/sd-hwdb.c
new file mode 100644
index 0000000..cb3c77c
--- /dev/null
+++ b/src/libsystemd/sd-hwdb/sd-hwdb.c
@@ -0,0 +1,466 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2008 Alan Jenkins <alan.christopher.jenkins@googlemail.com>
+***/
+
+#include <errno.h>
+#include <fnmatch.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include "sd-hwdb.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "hashmap.h"
+#include "hwdb-internal.h"
+#include "hwdb-util.h"
+#include "nulstr-util.h"
+#include "string-util.h"
+#include "time-util.h"
+
+struct sd_hwdb {
+ unsigned n_ref;
+
+ FILE *f;
+ struct stat st;
+ union {
+ struct trie_header_f *head;
+ const char *map;
+ };
+
+ OrderedHashmap *properties;
+ Iterator properties_iterator;
+ bool properties_modified;
+};
+
+struct linebuf {
+ char bytes[LINE_MAX];
+ size_t size;
+ size_t len;
+};
+
+static void linebuf_init(struct linebuf *buf) {
+ buf->size = 0;
+ buf->len = 0;
+}
+
+static const char *linebuf_get(struct linebuf *buf) {
+ if (buf->len + 1 >= sizeof(buf->bytes))
+ return NULL;
+ buf->bytes[buf->len] = '\0';
+ return buf->bytes;
+}
+
+static bool linebuf_add(struct linebuf *buf, const char *s, size_t len) {
+ if (buf->len + len >= sizeof(buf->bytes))
+ return false;
+ memcpy(buf->bytes + buf->len, s, len);
+ buf->len += len;
+ return true;
+}
+
+static bool linebuf_add_char(struct linebuf *buf, char c) {
+ if (buf->len + 1 >= sizeof(buf->bytes))
+ return false;
+ buf->bytes[buf->len++] = c;
+ return true;
+}
+
+static void linebuf_rem(struct linebuf *buf, size_t count) {
+ assert(buf->len >= count);
+ buf->len -= count;
+}
+
+static void linebuf_rem_char(struct linebuf *buf) {
+ linebuf_rem(buf, 1);
+}
+
+static const struct trie_child_entry_f *trie_node_child(sd_hwdb *hwdb, const struct trie_node_f *node, size_t idx) {
+ const char *base = (const char *)node;
+
+ base += le64toh(hwdb->head->node_size);
+ base += idx * le64toh(hwdb->head->child_entry_size);
+ return (const struct trie_child_entry_f *)base;
+}
+
+static const struct trie_value_entry_f *trie_node_value(sd_hwdb *hwdb, const struct trie_node_f *node, size_t idx) {
+ const char *base = (const char *)node;
+
+ base += le64toh(hwdb->head->node_size);
+ base += node->children_count * le64toh(hwdb->head->child_entry_size);
+ base += idx * le64toh(hwdb->head->value_entry_size);
+ return (const struct trie_value_entry_f *)base;
+}
+
+static const struct trie_node_f *trie_node_from_off(sd_hwdb *hwdb, le64_t off) {
+ return (const struct trie_node_f *)(hwdb->map + le64toh(off));
+}
+
+static const char *trie_string(sd_hwdb *hwdb, le64_t off) {
+ return hwdb->map + le64toh(off);
+}
+
+static int trie_children_cmp_f(const void *v1, const void *v2) {
+ const struct trie_child_entry_f *n1 = v1;
+ const struct trie_child_entry_f *n2 = v2;
+
+ return n1->c - n2->c;
+}
+
+static const struct trie_node_f *node_lookup_f(sd_hwdb *hwdb, const struct trie_node_f *node, uint8_t c) {
+ struct trie_child_entry_f *child;
+ struct trie_child_entry_f search;
+
+ search.c = c;
+ child = bsearch(&search, (const char *)node + le64toh(hwdb->head->node_size), node->children_count,
+ le64toh(hwdb->head->child_entry_size), trie_children_cmp_f);
+ if (child)
+ return trie_node_from_off(hwdb, child->child_off);
+ return NULL;
+}
+
+static int hwdb_add_property(sd_hwdb *hwdb, const struct trie_value_entry_f *entry) {
+ const char *key;
+ int r;
+
+ assert(hwdb);
+
+ key = trie_string(hwdb, entry->key_off);
+
+ /*
+ * Silently ignore all properties which do not start with a
+ * space; future extensions might use additional prefixes.
+ */
+ if (key[0] != ' ')
+ return 0;
+
+ key++;
+
+ if (le64toh(hwdb->head->value_entry_size) >= sizeof(struct trie_value_entry2_f)) {
+ const struct trie_value_entry2_f *old, *entry2;
+
+ entry2 = (const struct trie_value_entry2_f *)entry;
+ old = ordered_hashmap_get(hwdb->properties, key);
+ if (old) {
+ /* On duplicates, we order by filename priority and line-number.
+ *
+ * v2 of the format had 64 bits for the line number.
+ * v3 reuses top 32 bits of line_number to store the priority.
+ * We check the top bits — if they are zero we have v2 format.
+ * This means that v2 clients will print wrong line numbers with
+ * v3 data.
+ *
+ * For v3 data: we compare the priority (of the source file)
+ * and the line number.
+ *
+ * For v2 data: we rely on the fact that the filenames in the hwdb
+ * are added in the order of priority (higher later), because they
+ * are *processed* in the order of priority. So we compare the
+ * indices to determine which file had higher priority. Comparing
+ * the strings alphabetically would be useless, because those are
+ * full paths, and e.g. /usr/lib would sort after /etc, even
+ * though it has lower priority. This is not reliable because of
+ * suffix compression, but should work for the most common case of
+ * /usr/lib/udev/hwbd.d and /etc/udev/hwdb.d, and is better than
+ * not doing the comparison at all.
+ */
+ bool lower;
+
+ if (entry2->file_priority == 0)
+ lower = entry2->filename_off < old->filename_off ||
+ (entry2->filename_off == old->filename_off && entry2->line_number < old->line_number);
+ else
+ lower = entry2->file_priority < old->file_priority ||
+ (entry2->file_priority == old->file_priority && entry2->line_number < old->line_number);
+ if (lower)
+ return 0;
+ }
+ }
+
+ r = ordered_hashmap_ensure_allocated(&hwdb->properties, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = ordered_hashmap_replace(hwdb->properties, key, (void *)entry);
+ if (r < 0)
+ return r;
+
+ hwdb->properties_modified = true;
+
+ return 0;
+}
+
+static int trie_fnmatch_f(sd_hwdb *hwdb, const struct trie_node_f *node, size_t p,
+ struct linebuf *buf, const char *search) {
+ size_t len;
+ size_t i;
+ const char *prefix;
+ int err;
+
+ prefix = trie_string(hwdb, node->prefix_off);
+ len = strlen(prefix + p);
+ linebuf_add(buf, prefix + p, len);
+
+ for (i = 0; i < node->children_count; i++) {
+ const struct trie_child_entry_f *child = trie_node_child(hwdb, node, i);
+
+ linebuf_add_char(buf, child->c);
+ err = trie_fnmatch_f(hwdb, trie_node_from_off(hwdb, child->child_off), 0, buf, search);
+ if (err < 0)
+ return err;
+ linebuf_rem_char(buf);
+ }
+
+ if (le64toh(node->values_count) && fnmatch(linebuf_get(buf), search, 0) == 0)
+ for (i = 0; i < le64toh(node->values_count); i++) {
+ err = hwdb_add_property(hwdb, trie_node_value(hwdb, node, i));
+ if (err < 0)
+ return err;
+ }
+
+ linebuf_rem(buf, len);
+ return 0;
+}
+
+static int trie_search_f(sd_hwdb *hwdb, const char *search) {
+ struct linebuf buf;
+ const struct trie_node_f *node;
+ size_t i = 0;
+ int err;
+
+ linebuf_init(&buf);
+
+ node = trie_node_from_off(hwdb, hwdb->head->nodes_root_off);
+ while (node) {
+ const struct trie_node_f *child;
+ size_t p = 0;
+
+ if (node->prefix_off) {
+ char c;
+
+ for (; (c = trie_string(hwdb, node->prefix_off)[p]); p++) {
+ if (IN_SET(c, '*', '?', '['))
+ return trie_fnmatch_f(hwdb, node, p, &buf, search + i + p);
+ if (c != search[i + p])
+ return 0;
+ }
+ i += p;
+ }
+
+ child = node_lookup_f(hwdb, node, '*');
+ if (child) {
+ linebuf_add_char(&buf, '*');
+ err = trie_fnmatch_f(hwdb, child, 0, &buf, search + i);
+ if (err < 0)
+ return err;
+ linebuf_rem_char(&buf);
+ }
+
+ child = node_lookup_f(hwdb, node, '?');
+ if (child) {
+ linebuf_add_char(&buf, '?');
+ err = trie_fnmatch_f(hwdb, child, 0, &buf, search + i);
+ if (err < 0)
+ return err;
+ linebuf_rem_char(&buf);
+ }
+
+ child = node_lookup_f(hwdb, node, '[');
+ if (child) {
+ linebuf_add_char(&buf, '[');
+ err = trie_fnmatch_f(hwdb, child, 0, &buf, search + i);
+ if (err < 0)
+ return err;
+ linebuf_rem_char(&buf);
+ }
+
+ if (search[i] == '\0') {
+ size_t n;
+
+ for (n = 0; n < le64toh(node->values_count); n++) {
+ err = hwdb_add_property(hwdb, trie_node_value(hwdb, node, n));
+ if (err < 0)
+ return err;
+ }
+ return 0;
+ }
+
+ child = node_lookup_f(hwdb, node, search[i]);
+ node = child;
+ i++;
+ }
+ return 0;
+}
+
+static const char hwdb_bin_paths[] =
+ "/etc/systemd/hwdb/hwdb.bin\0"
+ "/etc/udev/hwdb.bin\0"
+ "/usr/lib/systemd/hwdb/hwdb.bin\0"
+#if HAVE_SPLIT_USR
+ "/lib/systemd/hwdb/hwdb.bin\0"
+#endif
+ UDEVLIBEXECDIR "/hwdb.bin\0";
+
+_public_ int sd_hwdb_new(sd_hwdb **ret) {
+ _cleanup_(sd_hwdb_unrefp) sd_hwdb *hwdb = NULL;
+ const char *hwdb_bin_path;
+ const char sig[] = HWDB_SIG;
+
+ assert_return(ret, -EINVAL);
+
+ hwdb = new0(sd_hwdb, 1);
+ if (!hwdb)
+ return -ENOMEM;
+
+ hwdb->n_ref = 1;
+
+ /* find hwdb.bin in hwdb_bin_paths */
+ NULSTR_FOREACH(hwdb_bin_path, hwdb_bin_paths) {
+ log_debug("Trying to open \"%s\"...", hwdb_bin_path);
+ hwdb->f = fopen(hwdb_bin_path, "re");
+ if (hwdb->f)
+ break;
+ if (errno != ENOENT)
+ return log_debug_errno(errno, "Failed to open %s: %m", hwdb_bin_path);
+ }
+
+ if (!hwdb->f)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
+ "hwdb.bin does not exist, please run 'systemd-hwdb update'");
+
+ if (fstat(fileno(hwdb->f), &hwdb->st) < 0)
+ return log_debug_errno(errno, "Failed to stat %s: %m", hwdb_bin_path);
+ if (hwdb->st.st_size < (off_t) offsetof(struct trie_header_f, strings_len) + 8)
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO),
+ "File %s is too short: %m", hwdb_bin_path);
+
+ hwdb->map = mmap(0, hwdb->st.st_size, PROT_READ, MAP_SHARED, fileno(hwdb->f), 0);
+ if (hwdb->map == MAP_FAILED)
+ return log_debug_errno(errno, "Failed to map %s: %m", hwdb_bin_path);
+
+ if (memcmp(hwdb->map, sig, sizeof(hwdb->head->signature)) != 0 ||
+ (size_t) hwdb->st.st_size != le64toh(hwdb->head->file_size))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to recognize the format of %s",
+ hwdb_bin_path);
+
+ log_debug("=== trie on-disk ===");
+ log_debug("tool version: %"PRIu64, le64toh(hwdb->head->tool_version));
+ log_debug("file size: %8"PRIi64" bytes", hwdb->st.st_size);
+ log_debug("header size %8"PRIu64" bytes", le64toh(hwdb->head->header_size));
+ log_debug("strings %8"PRIu64" bytes", le64toh(hwdb->head->strings_len));
+ log_debug("nodes %8"PRIu64" bytes", le64toh(hwdb->head->nodes_len));
+
+ *ret = TAKE_PTR(hwdb);
+
+ return 0;
+}
+
+static sd_hwdb *hwdb_free(sd_hwdb *hwdb) {
+ assert(hwdb);
+
+ if (hwdb->map)
+ munmap((void *)hwdb->map, hwdb->st.st_size);
+ safe_fclose(hwdb->f);
+ ordered_hashmap_free(hwdb->properties);
+ return mfree(hwdb);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_hwdb, sd_hwdb, hwdb_free)
+
+bool hwdb_validate(sd_hwdb *hwdb) {
+ bool found = false;
+ const char* p;
+ struct stat st;
+
+ if (!hwdb)
+ return false;
+ if (!hwdb->f)
+ return false;
+
+ /* if hwdb.bin doesn't exist anywhere, we need to update */
+ NULSTR_FOREACH(p, hwdb_bin_paths)
+ if (stat(p, &st) >= 0) {
+ found = true;
+ break;
+ }
+ if (!found)
+ return true;
+
+ if (timespec_load(&hwdb->st.st_mtim) != timespec_load(&st.st_mtim))
+ return true;
+ return false;
+}
+
+static int properties_prepare(sd_hwdb *hwdb, const char *modalias) {
+ assert(hwdb);
+ assert(modalias);
+
+ ordered_hashmap_clear(hwdb->properties);
+ hwdb->properties_modified = true;
+
+ return trie_search_f(hwdb, modalias);
+}
+
+_public_ int sd_hwdb_get(sd_hwdb *hwdb, const char *modalias, const char *key, const char **_value) {
+ const struct trie_value_entry_f *entry;
+ int r;
+
+ assert_return(hwdb, -EINVAL);
+ assert_return(hwdb->f, -EINVAL);
+ assert_return(modalias, -EINVAL);
+ assert_return(_value, -EINVAL);
+
+ r = properties_prepare(hwdb, modalias);
+ if (r < 0)
+ return r;
+
+ entry = ordered_hashmap_get(hwdb->properties, key);
+ if (!entry)
+ return -ENOENT;
+
+ *_value = trie_string(hwdb, entry->value_off);
+
+ return 0;
+}
+
+_public_ int sd_hwdb_seek(sd_hwdb *hwdb, const char *modalias) {
+ int r;
+
+ assert_return(hwdb, -EINVAL);
+ assert_return(hwdb->f, -EINVAL);
+ assert_return(modalias, -EINVAL);
+
+ r = properties_prepare(hwdb, modalias);
+ if (r < 0)
+ return r;
+
+ hwdb->properties_modified = false;
+ hwdb->properties_iterator = ITERATOR_FIRST;
+
+ return 0;
+}
+
+_public_ int sd_hwdb_enumerate(sd_hwdb *hwdb, const char **key, const char **value) {
+ const struct trie_value_entry_f *entry;
+ const void *k;
+
+ assert_return(hwdb, -EINVAL);
+ assert_return(key, -EINVAL);
+ assert_return(value, -EINVAL);
+
+ if (hwdb->properties_modified)
+ return -EAGAIN;
+
+ if (!ordered_hashmap_iterate(hwdb->properties, &hwdb->properties_iterator, (void **)&entry, &k))
+ return 0;
+
+ *key = k;
+ *value = trie_string(hwdb, entry->value_off);
+
+ return 1;
+}
diff --git a/src/libsystemd/sd-id128/id128-util.c b/src/libsystemd/sd-id128/id128-util.c
new file mode 100644
index 0000000..a3f6da6
--- /dev/null
+++ b/src/libsystemd/sd-id128/id128-util.c
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hexdecoct.h"
+#include "id128-util.h"
+#include "io-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+char *id128_to_uuid_string(sd_id128_t id, char s[static ID128_UUID_STRING_MAX]) {
+ unsigned n, k = 0;
+
+ assert(s);
+
+ /* Similar to sd_id128_to_string() but formats the result as UUID instead of plain hex chars */
+
+ for (n = 0; n < 16; n++) {
+
+ if (IN_SET(n, 4, 6, 8, 10))
+ s[k++] = '-';
+
+ s[k++] = hexchar(id.bytes[n] >> 4);
+ s[k++] = hexchar(id.bytes[n] & 0xF);
+ }
+
+ assert(k == 36);
+
+ s[k] = 0;
+
+ return s;
+}
+
+bool id128_is_valid(const char *s) {
+ size_t i, l;
+
+ assert(s);
+
+ l = strlen(s);
+ if (l == 32) {
+
+ /* Plain formatted 128bit hex string */
+
+ for (i = 0; i < l; i++) {
+ char c = s[i];
+
+ if (!(c >= '0' && c <= '9') &&
+ !(c >= 'a' && c <= 'z') &&
+ !(c >= 'A' && c <= 'Z'))
+ return false;
+ }
+
+ } else if (l == 36) {
+
+ /* Formatted UUID */
+
+ for (i = 0; i < l; i++) {
+ char c = s[i];
+
+ if (IN_SET(i, 8, 13, 18, 23)) {
+ if (c != '-')
+ return false;
+ } else {
+ if (!(c >= '0' && c <= '9') &&
+ !(c >= 'a' && c <= 'z') &&
+ !(c >= 'A' && c <= 'Z'))
+ return false;
+ }
+ }
+
+ } else
+ return false;
+
+ return true;
+}
+
+int id128_read_fd(int fd, Id128Format f, sd_id128_t *ret) {
+ char buffer[36 + 2];
+ ssize_t l;
+
+ assert(fd >= 0);
+ assert(f < _ID128_FORMAT_MAX);
+
+ /* Reads an 128bit ID from a file, which may either be in plain format (32 hex digits), or in UUID format, both
+ * optionally followed by a newline and nothing else. ID files should really be newline terminated, but if they
+ * aren't that's OK too, following the rule of "Be conservative in what you send, be liberal in what you
+ * accept". */
+
+ l = loop_read(fd, buffer, sizeof(buffer), false); /* we expect a short read of either 32/33 or 36/37 chars */
+ if (l < 0)
+ return (int) l;
+ if (l == 0) /* empty? */
+ return -ENOMEDIUM;
+
+ switch (l) {
+
+ case 13:
+ case 14:
+ /* Treat an "uninitialized" id file like an empty one */
+ return f == ID128_PLAIN_OR_UNINIT && strneq(buffer, "uninitialized\n", l) ? -ENOMEDIUM : -EINVAL;
+
+ case 33: /* plain UUID with trailing newline */
+ if (buffer[32] != '\n')
+ return -EINVAL;
+
+ _fallthrough_;
+ case 32: /* plain UUID without trailing newline */
+ if (f == ID128_UUID)
+ return -EINVAL;
+
+ buffer[32] = 0;
+ break;
+
+ case 37: /* RFC UUID with trailing newline */
+ if (buffer[36] != '\n')
+ return -EINVAL;
+
+ _fallthrough_;
+ case 36: /* RFC UUID without trailing newline */
+ if (IN_SET(f, ID128_PLAIN, ID128_PLAIN_OR_UNINIT))
+ return -EINVAL;
+
+ buffer[36] = 0;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return sd_id128_from_string(buffer, ret);
+}
+
+int id128_read(const char *p, Id128Format f, sd_id128_t *ret) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(p, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return -errno;
+
+ return id128_read_fd(fd, f, ret);
+}
+
+int id128_write_fd(int fd, Id128Format f, sd_id128_t id, bool do_sync) {
+ char buffer[36 + 2];
+ size_t sz;
+ int r;
+
+ assert(fd >= 0);
+ assert(f < _ID128_FORMAT_MAX);
+
+ if (f != ID128_UUID) {
+ sd_id128_to_string(id, buffer);
+ buffer[32] = '\n';
+ sz = 33;
+ } else {
+ id128_to_uuid_string(id, buffer);
+ buffer[36] = '\n';
+ sz = 37;
+ }
+
+ r = loop_write(fd, buffer, sz, false);
+ if (r < 0)
+ return r;
+
+ if (do_sync) {
+ if (fsync(fd) < 0)
+ return -errno;
+
+ r = fsync_directory_of_file(fd);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int id128_write(const char *p, Id128Format f, sd_id128_t id, bool do_sync) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(p, O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY|O_TRUNC, 0444);
+ if (fd < 0)
+ return -errno;
+
+ return id128_write_fd(fd, f, id, do_sync);
+}
+
+void id128_hash_func(const sd_id128_t *p, struct siphash *state) {
+ siphash24_compress(p, sizeof(sd_id128_t), state);
+}
+
+int id128_compare_func(const sd_id128_t *a, const sd_id128_t *b) {
+ return memcmp(a, b, 16);
+}
+
+sd_id128_t id128_make_v4_uuid(sd_id128_t id) {
+ /* Stolen from generate_random_uuid() of drivers/char/random.c
+ * in the kernel sources */
+
+ /* Set UUID version to 4 --- truly random generation */
+ id.bytes[6] = (id.bytes[6] & 0x0F) | 0x40;
+
+ /* Set the UUID variant to DCE */
+ id.bytes[8] = (id.bytes[8] & 0x3F) | 0x80;
+
+ return id;
+}
+
+DEFINE_HASH_OPS(id128_hash_ops, sd_id128_t, id128_hash_func, id128_compare_func);
diff --git a/src/libsystemd/sd-id128/id128-util.h b/src/libsystemd/sd-id128/id128-util.h
new file mode 100644
index 0000000..6b09bcd
--- /dev/null
+++ b/src/libsystemd/sd-id128/id128-util.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-id128.h"
+
+#include "hash-funcs.h"
+#include "macro.h"
+
+#define ID128_UUID_STRING_MAX 37
+
+char *id128_to_uuid_string(sd_id128_t id, char s[static ID128_UUID_STRING_MAX]);
+
+bool id128_is_valid(const char *s) _pure_;
+
+typedef enum Id128Format {
+ ID128_ANY,
+ ID128_PLAIN, /* formatted as 32 hex chars as-is */
+ ID128_PLAIN_OR_UNINIT, /* formatted as 32 hex chars as-is; allow special "uninitialized"
+ * value when reading from file (id128_read() and id128_read_fd()).
+ *
+ * This format should be used when reading a machine-id file. */
+ ID128_UUID, /* formatted as 36 character uuid string */
+ _ID128_FORMAT_MAX,
+} Id128Format;
+
+int id128_read_fd(int fd, Id128Format f, sd_id128_t *ret);
+int id128_read(const char *p, Id128Format f, sd_id128_t *ret);
+
+int id128_write_fd(int fd, Id128Format f, sd_id128_t id, bool do_sync);
+int id128_write(const char *p, Id128Format f, sd_id128_t id, bool do_sync);
+
+void id128_hash_func(const sd_id128_t *p, struct siphash *state);
+int id128_compare_func(const sd_id128_t *a, const sd_id128_t *b) _pure_;
+extern const struct hash_ops id128_hash_ops;
+
+sd_id128_t id128_make_v4_uuid(sd_id128_t id);
diff --git a/src/libsystemd/sd-id128/sd-id128.c b/src/libsystemd/sd-id128/sd-id128.c
new file mode 100644
index 0000000..d5de935
--- /dev/null
+++ b/src/libsystemd/sd-id128/sd-id128.c
@@ -0,0 +1,324 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "hexdecoct.h"
+#include "id128-util.h"
+#include "io-util.h"
+#include "khash.h"
+#include "macro.h"
+#include "missing_syscall.h"
+#include "random-util.h"
+#include "user-util.h"
+#include "util.h"
+
+_public_ char *sd_id128_to_string(sd_id128_t id, char s[_SD_ARRAY_STATIC SD_ID128_STRING_MAX]) {
+ unsigned n;
+
+ assert_return(s, NULL);
+
+ for (n = 0; n < 16; n++) {
+ s[n*2] = hexchar(id.bytes[n] >> 4);
+ s[n*2+1] = hexchar(id.bytes[n] & 0xF);
+ }
+
+ s[32] = 0;
+
+ return s;
+}
+
+_public_ int sd_id128_from_string(const char s[], sd_id128_t *ret) {
+ unsigned n, i;
+ sd_id128_t t;
+ bool is_guid = false;
+
+ assert_return(s, -EINVAL);
+
+ for (n = 0, i = 0; n < 16;) {
+ int a, b;
+
+ if (s[i] == '-') {
+ /* Is this a GUID? Then be nice, and skip over
+ * the dashes */
+
+ if (i == 8)
+ is_guid = true;
+ else if (IN_SET(i, 13, 18, 23)) {
+ if (!is_guid)
+ return -EINVAL;
+ } else
+ return -EINVAL;
+
+ i++;
+ continue;
+ }
+
+ a = unhexchar(s[i++]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unhexchar(s[i++]);
+ if (b < 0)
+ return -EINVAL;
+
+ t.bytes[n++] = (a << 4) | b;
+ }
+
+ if (i != (is_guid ? 36 : 32))
+ return -EINVAL;
+
+ if (s[i] != 0)
+ return -EINVAL;
+
+ if (ret)
+ *ret = t;
+ return 0;
+}
+
+_public_ int sd_id128_get_machine(sd_id128_t *ret) {
+ static thread_local sd_id128_t saved_machine_id = {};
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ if (sd_id128_is_null(saved_machine_id)) {
+ r = id128_read("/etc/machine-id", ID128_PLAIN, &saved_machine_id);
+ if (r < 0)
+ return r;
+
+ if (sd_id128_is_null(saved_machine_id))
+ return -ENOMEDIUM;
+ }
+
+ *ret = saved_machine_id;
+ return 0;
+}
+
+_public_ int sd_id128_get_boot(sd_id128_t *ret) {
+ static thread_local sd_id128_t saved_boot_id = {};
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ if (sd_id128_is_null(saved_boot_id)) {
+ r = id128_read("/proc/sys/kernel/random/boot_id", ID128_UUID, &saved_boot_id);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = saved_boot_id;
+ return 0;
+}
+
+static int get_invocation_from_keyring(sd_id128_t *ret) {
+ _cleanup_free_ char *description = NULL;
+ char *d, *p, *g, *u, *e;
+ unsigned long perms;
+ key_serial_t key;
+ size_t sz = 256;
+ uid_t uid;
+ gid_t gid;
+ int r, c;
+
+#define MAX_PERMS ((unsigned long) (KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH| \
+ KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH))
+
+ assert(ret);
+
+ key = request_key("user", "invocation_id", NULL, 0);
+ if (key == -1) {
+ /* Keyring support not available? No invocation key stored? */
+ if (IN_SET(errno, ENOSYS, ENOKEY))
+ return -ENXIO;
+
+ return -errno;
+ }
+
+ for (;;) {
+ description = new(char, sz);
+ if (!description)
+ return -ENOMEM;
+
+ c = keyctl(KEYCTL_DESCRIBE, key, (unsigned long) description, sz, 0);
+ if (c < 0)
+ return -errno;
+
+ if ((size_t) c <= sz)
+ break;
+
+ sz = c;
+ free(description);
+ }
+
+ /* The kernel returns a final NUL in the string, verify that. */
+ assert(description[c-1] == 0);
+
+ /* Chop off the final description string */
+ d = strrchr(description, ';');
+ if (!d)
+ return -EIO;
+ *d = 0;
+
+ /* Look for the permissions */
+ p = strrchr(description, ';');
+ if (!p)
+ return -EIO;
+
+ errno = 0;
+ perms = strtoul(p + 1, &e, 16);
+ if (errno > 0)
+ return -errno;
+ if (e == p + 1) /* Read at least one character */
+ return -EIO;
+ if (e != d) /* Must reached the end */
+ return -EIO;
+
+ if ((perms & ~MAX_PERMS) != 0)
+ return -EPERM;
+
+ *p = 0;
+
+ /* Look for the group ID */
+ g = strrchr(description, ';');
+ if (!g)
+ return -EIO;
+ r = parse_gid(g + 1, &gid);
+ if (r < 0)
+ return r;
+ if (gid != 0)
+ return -EPERM;
+ *g = 0;
+
+ /* Look for the user ID */
+ u = strrchr(description, ';');
+ if (!u)
+ return -EIO;
+ r = parse_uid(u + 1, &uid);
+ if (r < 0)
+ return r;
+ if (uid != 0)
+ return -EPERM;
+
+ c = keyctl(KEYCTL_READ, key, (unsigned long) ret, sizeof(sd_id128_t), 0);
+ if (c < 0)
+ return -errno;
+ if (c != sizeof(sd_id128_t))
+ return -EIO;
+
+ return 0;
+}
+
+static int get_invocation_from_environment(sd_id128_t *ret) {
+ const char *e;
+
+ assert(ret);
+
+ e = secure_getenv("INVOCATION_ID");
+ if (!e)
+ return -ENXIO;
+
+ return sd_id128_from_string(e, ret);
+}
+
+_public_ int sd_id128_get_invocation(sd_id128_t *ret) {
+ static thread_local sd_id128_t saved_invocation_id = {};
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ if (sd_id128_is_null(saved_invocation_id)) {
+ /* We first check the environment. The environment variable is primarily relevant for user
+ * services, and sufficiently safe as long as no privilege boundary is involved. */
+ r = get_invocation_from_environment(&saved_invocation_id);
+ if (r < 0 && r != -ENXIO)
+ return r;
+
+ /* The kernel keyring is relevant for system services (as for user services we don't store
+ * the invocation ID in the keyring, as there'd be no trust benefit in that). */
+ r = get_invocation_from_keyring(&saved_invocation_id);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = saved_invocation_id;
+ return 0;
+}
+
+_public_ int sd_id128_randomize(sd_id128_t *ret) {
+ sd_id128_t t;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ /* We allow usage if x86-64 RDRAND here. It might not be trusted enough for keeping secrets, but it should be
+ * fine for UUIDS. */
+ r = genuine_random_bytes(&t, sizeof t, RANDOM_ALLOW_RDRAND);
+ if (r < 0)
+ return r;
+
+ /* Turn this into a valid v4 UUID, to be nice. Note that we
+ * only guarantee this for newly generated UUIDs, not for
+ * pre-existing ones. */
+
+ *ret = id128_make_v4_uuid(t);
+ return 0;
+}
+
+static int get_app_specific(sd_id128_t base, sd_id128_t app_id, sd_id128_t *ret) {
+ _cleanup_(khash_unrefp) khash *h = NULL;
+ sd_id128_t result;
+ const void *p;
+ int r;
+
+ assert(ret);
+
+ r = khash_new_with_key(&h, "hmac(sha256)", &base, sizeof(base));
+ if (r < 0)
+ return r;
+
+ r = khash_put(h, &app_id, sizeof(app_id));
+ if (r < 0)
+ return r;
+
+ r = khash_digest_data(h, &p);
+ if (r < 0)
+ return r;
+
+ /* We chop off the trailing 16 bytes */
+ memcpy(&result, p, MIN(khash_get_size(h), sizeof(result)));
+
+ *ret = id128_make_v4_uuid(result);
+ return 0;
+}
+
+_public_ int sd_id128_get_machine_app_specific(sd_id128_t app_id, sd_id128_t *ret) {
+ sd_id128_t id;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ r = sd_id128_get_machine(&id);
+ if (r < 0)
+ return r;
+
+ return get_app_specific(id, app_id, ret);
+}
+
+_public_ int sd_id128_get_boot_app_specific(sd_id128_t app_id, sd_id128_t *ret) {
+ sd_id128_t id;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ r = sd_id128_get_boot(&id);
+ if (r < 0)
+ return r;
+
+ return get_app_specific(id, app_id, ret);
+}
diff --git a/src/libsystemd/sd-login/sd-login.c b/src/libsystemd/sd-login/sd-login.c
new file mode 100644
index 0000000..1fc3795
--- /dev/null
+++ b/src/libsystemd/sd-login/sd-login.c
@@ -0,0 +1,1051 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <poll.h>
+#include <sys/inotify.h>
+#include <unistd.h>
+
+#include "sd-login.h"
+
+#include "alloc-util.h"
+#include "cgroup-util.h"
+#include "dirent-util.h"
+#include "env-file.h"
+#include "escape.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "io-util.h"
+#include "login-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+/* Error codes:
+ *
+ * invalid input parameters → -EINVAL
+ * invalid fd → -EBADF
+ * process does not exist → -ESRCH
+ * cgroup does not exist → -ENOENT
+ * machine, session does not exist → -ENXIO
+ * requested metadata on object is missing → -ENODATA
+ */
+
+_public_ int sd_pid_get_session(pid_t pid, char **session) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(session, -EINVAL);
+
+ r = cg_pid_get_session(pid, session);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_unit(pid_t pid, char **unit) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(unit, -EINVAL);
+
+ r = cg_pid_get_unit(pid, unit);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_user_unit(pid_t pid, char **unit) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(unit, -EINVAL);
+
+ r = cg_pid_get_user_unit(pid, unit);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_machine_name(pid_t pid, char **name) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(name, -EINVAL);
+
+ r = cg_pid_get_machine_name(pid, name);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_slice(pid_t pid, char **slice) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(slice, -EINVAL);
+
+ r = cg_pid_get_slice(pid, slice);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_user_slice(pid_t pid, char **slice) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(slice, -EINVAL);
+
+ r = cg_pid_get_user_slice(pid, slice);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_owner_uid(pid_t pid, uid_t *uid) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(uid, -EINVAL);
+
+ r = cg_pid_get_owner_uid(pid, uid);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_cgroup(pid_t pid, char **cgroup) {
+ char *c;
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(cgroup, -EINVAL);
+
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &c);
+ if (r < 0)
+ return r;
+
+ /* The internal APIs return the empty string for the root
+ * cgroup, let's return the "/" in the public APIs instead, as
+ * that's easier and less ambiguous for people to grok. */
+ if (isempty(c)) {
+ free(c);
+ c = strdup("/");
+ if (!c)
+ return -ENOMEM;
+
+ }
+
+ *cgroup = c;
+ return 0;
+}
+
+_public_ int sd_peer_get_session(int fd, char **session) {
+ struct ucred ucred = {};
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(session, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_session(ucred.pid, session);
+}
+
+_public_ int sd_peer_get_owner_uid(int fd, uid_t *uid) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(uid, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_owner_uid(ucred.pid, uid);
+}
+
+_public_ int sd_peer_get_unit(int fd, char **unit) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(unit, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_unit(ucred.pid, unit);
+}
+
+_public_ int sd_peer_get_user_unit(int fd, char **unit) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(unit, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_user_unit(ucred.pid, unit);
+}
+
+_public_ int sd_peer_get_machine_name(int fd, char **machine) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(machine, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_machine_name(ucred.pid, machine);
+}
+
+_public_ int sd_peer_get_slice(int fd, char **slice) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(slice, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_slice(ucred.pid, slice);
+}
+
+_public_ int sd_peer_get_user_slice(int fd, char **slice) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(slice, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_user_slice(ucred.pid, slice);
+}
+
+_public_ int sd_peer_get_cgroup(int fd, char **cgroup) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(cgroup, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return sd_pid_get_cgroup(ucred.pid, cgroup);
+}
+
+static int file_of_uid(uid_t uid, char **p) {
+
+ assert_return(uid_is_valid(uid), -EINVAL);
+ assert(p);
+
+ if (asprintf(p, "/run/systemd/users/" UID_FMT, uid) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+_public_ int sd_uid_get_state(uid_t uid, char**state) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ assert_return(state, -EINVAL);
+
+ r = file_of_uid(uid, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "STATE", &s);
+ if (r == -ENOENT) {
+ r = free_and_strdup(&s, "offline");
+ if (r < 0)
+ return r;
+ } else if (r < 0)
+ return r;
+ else if (isempty(s))
+ return -EIO;
+
+ *state = TAKE_PTR(s);
+ return 0;
+}
+
+_public_ int sd_uid_get_display(uid_t uid, char **session) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ assert_return(session, -EINVAL);
+
+ r = file_of_uid(uid, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "DISPLAY", &s);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+
+ *session = TAKE_PTR(s);
+
+ return 0;
+}
+
+static int file_of_seat(const char *seat, char **_p) {
+ char *p;
+ int r;
+
+ assert(_p);
+
+ if (seat) {
+ if (!filename_is_valid(seat))
+ return -EINVAL;
+
+ p = path_join("/run/systemd/seats", seat);
+ } else {
+ _cleanup_free_ char *buf = NULL;
+
+ r = sd_session_get_seat(NULL, &buf);
+ if (r < 0)
+ return r;
+
+ p = path_join("/run/systemd/seats", buf);
+ }
+ if (!p)
+ return -ENOMEM;
+
+ *_p = TAKE_PTR(p);
+ return 0;
+}
+
+_public_ int sd_uid_is_on_seat(uid_t uid, int require_active, const char *seat) {
+ _cleanup_free_ char *filename = NULL, *content = NULL;
+ int r;
+
+ assert_return(uid_is_valid(uid), -EINVAL);
+
+ r = file_of_seat(seat, &filename);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, filename,
+ require_active ? "ACTIVE_UID" : "UIDS",
+ &content);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+ if (isempty(content))
+ return 0;
+
+ char t[DECIMAL_STR_MAX(uid_t)];
+ xsprintf(t, UID_FMT, uid);
+
+ return string_contains_word(content, NULL, t);
+}
+
+static int uid_get_array(uid_t uid, const char *variable, char ***array) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ char **a;
+ int r;
+
+ assert(variable);
+
+ r = file_of_uid(uid, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, variable, &s);
+ if (r == -ENOENT || (r >= 0 && isempty(s))) {
+ if (array)
+ *array = NULL;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ a = strv_split(s, NULL);
+ if (!a)
+ return -ENOMEM;
+
+ strv_uniq(a);
+ r = (int) strv_length(a);
+
+ if (array)
+ *array = a;
+ else
+ strv_free(a);
+
+ return r;
+}
+
+_public_ int sd_uid_get_sessions(uid_t uid, int require_active, char ***sessions) {
+ return uid_get_array(
+ uid,
+ require_active == 0 ? "ONLINE_SESSIONS" :
+ require_active > 0 ? "ACTIVE_SESSIONS" :
+ "SESSIONS",
+ sessions);
+}
+
+_public_ int sd_uid_get_seats(uid_t uid, int require_active, char ***seats) {
+ return uid_get_array(
+ uid,
+ require_active == 0 ? "ONLINE_SEATS" :
+ require_active > 0 ? "ACTIVE_SEATS" :
+ "SEATS",
+ seats);
+}
+
+static int file_of_session(const char *session, char **_p) {
+ char *p;
+ int r;
+
+ assert(_p);
+
+ if (session) {
+ if (!session_id_valid(session))
+ return -EINVAL;
+
+ p = path_join("/run/systemd/sessions", session);
+ } else {
+ _cleanup_free_ char *buf = NULL;
+
+ r = sd_pid_get_session(0, &buf);
+ if (r < 0)
+ return r;
+
+ p = path_join("/run/systemd/sessions", buf);
+ }
+
+ if (!p)
+ return -ENOMEM;
+
+ *_p = p;
+ return 0;
+}
+
+_public_ int sd_session_is_active(const char *session) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ r = file_of_session(session, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "ACTIVE", &s);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -EIO;
+
+ return parse_boolean(s);
+}
+
+_public_ int sd_session_is_remote(const char *session) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ r = file_of_session(session, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "REMOTE", &s);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+
+ return parse_boolean(s);
+}
+
+_public_ int sd_session_get_state(const char *session, char **state) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ assert_return(state, -EINVAL);
+
+ r = file_of_session(session, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "STATE", &s);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -EIO;
+
+ *state = TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_session_get_uid(const char *session, uid_t *uid) {
+ int r;
+ _cleanup_free_ char *p = NULL, *s = NULL;
+
+ assert_return(uid, -EINVAL);
+
+ r = file_of_session(session, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "UID", &s);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -EIO;
+
+ return parse_uid(s, uid);
+}
+
+static int session_get_string(const char *session, const char *field, char **value) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ assert_return(value, -EINVAL);
+ assert(field);
+
+ r = file_of_session(session, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, field, &s);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+
+ *value = TAKE_PTR(s);
+ return 0;
+}
+
+_public_ int sd_session_get_seat(const char *session, char **seat) {
+ return session_get_string(session, "SEAT", seat);
+}
+
+_public_ int sd_session_get_tty(const char *session, char **tty) {
+ return session_get_string(session, "TTY", tty);
+}
+
+_public_ int sd_session_get_vt(const char *session, unsigned *vtnr) {
+ _cleanup_free_ char *vtnr_string = NULL;
+ unsigned u;
+ int r;
+
+ assert_return(vtnr, -EINVAL);
+
+ r = session_get_string(session, "VTNR", &vtnr_string);
+ if (r < 0)
+ return r;
+
+ r = safe_atou(vtnr_string, &u);
+ if (r < 0)
+ return r;
+
+ *vtnr = u;
+ return 0;
+}
+
+_public_ int sd_session_get_service(const char *session, char **service) {
+ return session_get_string(session, "SERVICE", service);
+}
+
+_public_ int sd_session_get_type(const char *session, char **type) {
+ return session_get_string(session, "TYPE", type);
+}
+
+_public_ int sd_session_get_class(const char *session, char **class) {
+ return session_get_string(session, "CLASS", class);
+}
+
+_public_ int sd_session_get_desktop(const char *session, char **desktop) {
+ _cleanup_free_ char *escaped = NULL;
+ char *t;
+ int r;
+
+ assert_return(desktop, -EINVAL);
+
+ r = session_get_string(session, "DESKTOP", &escaped);
+ if (r < 0)
+ return r;
+
+ r = cunescape(escaped, 0, &t);
+ if (r < 0)
+ return r;
+
+ *desktop = t;
+ return 0;
+}
+
+_public_ int sd_session_get_display(const char *session, char **display) {
+ return session_get_string(session, "DISPLAY", display);
+}
+
+_public_ int sd_session_get_remote_user(const char *session, char **remote_user) {
+ return session_get_string(session, "REMOTE_USER", remote_user);
+}
+
+_public_ int sd_session_get_remote_host(const char *session, char **remote_host) {
+ return session_get_string(session, "REMOTE_HOST", remote_host);
+}
+
+_public_ int sd_seat_get_active(const char *seat, char **session, uid_t *uid) {
+ _cleanup_free_ char *p = NULL, *s = NULL, *t = NULL;
+ int r;
+
+ assert_return(session || uid, -EINVAL);
+
+ r = file_of_seat(seat, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p,
+ "ACTIVE", &s,
+ "ACTIVE_UID", &t);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+
+ if (session && !s)
+ return -ENODATA;
+
+ if (uid && !t)
+ return -ENODATA;
+
+ if (uid && t) {
+ r = parse_uid(t, uid);
+ if (r < 0)
+ return r;
+ }
+
+ if (session && s)
+ *session = TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_seat_get_sessions(
+ const char *seat,
+ char ***ret_sessions,
+ uid_t **ret_uids,
+ unsigned *ret_n_uids) {
+
+ _cleanup_free_ char *fname = NULL, *session_line = NULL, *uid_line = NULL;
+ _cleanup_strv_free_ char **sessions = NULL;
+ _cleanup_free_ uid_t *uids = NULL;
+ unsigned n_sessions = 0;
+ int r;
+
+ r = file_of_seat(seat, &fname);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, fname,
+ "SESSIONS", &session_line,
+ "UIDS", &uid_line);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+
+ if (session_line) {
+ sessions = strv_split(session_line, NULL);
+ if (!sessions)
+ return -ENOMEM;
+
+ n_sessions = strv_length(sessions);
+ };
+
+ if (ret_uids && uid_line) {
+ uids = new(uid_t, n_sessions);
+ if (!uids)
+ return -ENOMEM;
+
+ size_t n = 0;
+ for (const char *p = uid_line;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = parse_uid(word, &uids[n++]);
+ if (r < 0)
+ return r;
+ }
+
+ if (n != n_sessions)
+ return -EUCLEAN;
+ }
+
+ if (ret_sessions)
+ *ret_sessions = TAKE_PTR(sessions);
+ if (ret_uids)
+ *ret_uids = TAKE_PTR(uids);
+ if (ret_n_uids)
+ *ret_n_uids = n_sessions;
+
+ return n_sessions;
+}
+
+static int seat_get_can(const char *seat, const char *variable) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ assert(variable);
+
+ r = file_of_seat(seat, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p,
+ variable, &s);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+
+ return parse_boolean(s);
+}
+
+_public_ int sd_seat_can_multi_session(const char *seat) {
+ return true;
+}
+
+_public_ int sd_seat_can_tty(const char *seat) {
+ return seat_get_can(seat, "CAN_TTY");
+}
+
+_public_ int sd_seat_can_graphical(const char *seat) {
+ return seat_get_can(seat, "CAN_GRAPHICAL");
+}
+
+_public_ int sd_get_seats(char ***seats) {
+ int r;
+
+ r = get_files_in_directory("/run/systemd/seats/", seats);
+ if (r == -ENOENT) {
+ if (seats)
+ *seats = NULL;
+ return 0;
+ }
+ return r;
+}
+
+_public_ int sd_get_sessions(char ***sessions) {
+ int r;
+
+ r = get_files_in_directory("/run/systemd/sessions/", sessions);
+ if (r == -ENOENT) {
+ if (sessions)
+ *sessions = NULL;
+ return 0;
+ }
+ return r;
+}
+
+_public_ int sd_get_uids(uid_t **users) {
+ _cleanup_closedir_ DIR *d;
+ struct dirent *de;
+ int r = 0;
+ unsigned n = 0;
+ _cleanup_free_ uid_t *l = NULL;
+
+ d = opendir("/run/systemd/users/");
+ if (!d) {
+ if (errno == ENOENT) {
+ if (users)
+ *users = NULL;
+ return 0;
+ }
+ return -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ int k;
+ uid_t uid;
+
+ dirent_ensure_type(d, de);
+
+ if (!dirent_is_file(de))
+ continue;
+
+ k = parse_uid(de->d_name, &uid);
+ if (k < 0)
+ continue;
+
+ if (users) {
+ if ((unsigned) r >= n) {
+ uid_t *t;
+
+ n = MAX(16, 2*r);
+ t = reallocarray(l, sizeof(uid_t), n);
+ if (!t)
+ return -ENOMEM;
+
+ l = t;
+ }
+
+ assert((unsigned) r < n);
+ l[r++] = uid;
+ } else
+ r++;
+ }
+
+ if (users)
+ *users = TAKE_PTR(l);
+
+ return r;
+}
+
+_public_ int sd_get_machine_names(char ***machines) {
+ _cleanup_strv_free_ char **l = NULL;
+ char **a, **b;
+ int r;
+
+ r = get_files_in_directory("/run/systemd/machines/", &l);
+ if (r == -ENOENT) {
+ if (machines)
+ *machines = NULL;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ if (l) {
+ r = 0;
+
+ /* Filter out the unit: symlinks */
+ for (a = b = l; *a; a++) {
+ if (startswith(*a, "unit:") || !machine_name_is_valid(*a))
+ free(*a);
+ else {
+ *b = *a;
+ b++;
+ r++;
+ }
+ }
+
+ *b = NULL;
+ }
+
+ if (machines)
+ *machines = TAKE_PTR(l);
+
+ return r;
+}
+
+_public_ int sd_machine_get_class(const char *machine, char **class) {
+ _cleanup_free_ char *c = NULL;
+ const char *p;
+ int r;
+
+ assert_return(class, -EINVAL);
+
+ if (streq(machine, ".host")) {
+ c = strdup("host");
+ if (!c)
+ return -ENOMEM;
+ } else {
+ if (!machine_name_is_valid(machine))
+ return -EINVAL;
+
+ p = strjoina("/run/systemd/machines/", machine);
+ r = parse_env_file(NULL, p, "CLASS", &c);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (!c)
+ return -EIO;
+ }
+
+ *class = TAKE_PTR(c);
+ return 0;
+}
+
+_public_ int sd_machine_get_ifindices(const char *machine, int **ret_ifindices) {
+ _cleanup_free_ char *netif_line = NULL;
+ const char *p;
+ int r;
+
+ assert_return(machine_name_is_valid(machine), -EINVAL);
+
+ p = strjoina("/run/systemd/machines/", machine);
+ r = parse_env_file(NULL, p, "NETIF", &netif_line);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (!netif_line) {
+ *ret_ifindices = NULL;
+ return 0;
+ }
+
+ _cleanup_strv_free_ char **tt = strv_split(netif_line, NULL);
+ if (!tt)
+ return -ENOMEM;
+
+ _cleanup_free_ int *ifindices = NULL;
+ if (ret_ifindices) {
+ ifindices = new(int, strv_length(tt));
+ if (!ifindices)
+ return -ENOMEM;
+ }
+
+ size_t n = 0;
+ for (size_t i = 0; tt[i]; i++) {
+ int ind;
+
+ ind = parse_ifindex(tt[i]);
+ if (ind < 0)
+ /* Return -EUCLEAN to distinguish from -EINVAL for invalid args */
+ return ind == -EINVAL ? -EUCLEAN : ind;
+
+ if (ret_ifindices)
+ ifindices[n] = ind;
+ n++;
+ }
+
+ if (ret_ifindices)
+ *ret_ifindices = TAKE_PTR(ifindices);
+
+ return n;
+}
+
+static int MONITOR_TO_FD(sd_login_monitor *m) {
+ return (int) (unsigned long) m - 1;
+}
+
+static sd_login_monitor* FD_TO_MONITOR(int fd) {
+ return (sd_login_monitor*) (unsigned long) (fd + 1);
+}
+
+_public_ int sd_login_monitor_new(const char *category, sd_login_monitor **m) {
+ _cleanup_close_ int fd = -1;
+ bool good = false;
+ int k;
+
+ assert_return(m, -EINVAL);
+
+ fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (!category || streq(category, "seat")) {
+ k = inotify_add_watch(fd, "/run/systemd/seats/", IN_MOVED_TO|IN_DELETE);
+ if (k < 0)
+ return -errno;
+
+ good = true;
+ }
+
+ if (!category || streq(category, "session")) {
+ k = inotify_add_watch(fd, "/run/systemd/sessions/", IN_MOVED_TO|IN_DELETE);
+ if (k < 0)
+ return -errno;
+
+ good = true;
+ }
+
+ if (!category || streq(category, "uid")) {
+ k = inotify_add_watch(fd, "/run/systemd/users/", IN_MOVED_TO|IN_DELETE);
+ if (k < 0)
+ return -errno;
+
+ good = true;
+ }
+
+ if (!category || streq(category, "machine")) {
+ k = inotify_add_watch(fd, "/run/systemd/machines/", IN_MOVED_TO|IN_DELETE);
+ if (k < 0)
+ return -errno;
+
+ good = true;
+ }
+
+ if (!good)
+ return -EINVAL;
+
+ *m = FD_TO_MONITOR(TAKE_FD(fd));
+ return 0;
+}
+
+_public_ sd_login_monitor* sd_login_monitor_unref(sd_login_monitor *m) {
+ if (m)
+ close_nointr(MONITOR_TO_FD(m));
+
+ return NULL;
+}
+
+_public_ int sd_login_monitor_flush(sd_login_monitor *m) {
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = flush_fd(MONITOR_TO_FD(m));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+_public_ int sd_login_monitor_get_fd(sd_login_monitor *m) {
+
+ assert_return(m, -EINVAL);
+
+ return MONITOR_TO_FD(m);
+}
+
+_public_ int sd_login_monitor_get_events(sd_login_monitor *m) {
+
+ assert_return(m, -EINVAL);
+
+ /* For now we will only return POLLIN here, since we don't
+ * need anything else ever for inotify. However, let's have
+ * this API to keep our options open should we later on need
+ * it. */
+ return POLLIN;
+}
+
+_public_ int sd_login_monitor_get_timeout(sd_login_monitor *m, uint64_t *timeout_usec) {
+
+ assert_return(m, -EINVAL);
+ assert_return(timeout_usec, -EINVAL);
+
+ /* For now we will only return (uint64_t) -1, since we don't
+ * need any timeout. However, let's have this API to keep our
+ * options open should we later on need it. */
+ *timeout_usec = (uint64_t) -1;
+ return 0;
+}
diff --git a/src/libsystemd/sd-login/test-login.c b/src/libsystemd/sd-login/test-login.c
new file mode 100644
index 0000000..d850323
--- /dev/null
+++ b/src/libsystemd/sd-login/test-login.c
@@ -0,0 +1,305 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <poll.h>
+
+#include "sd-login.h"
+
+#include "alloc-util.h"
+#include "errno-list.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "log.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "user-util.h"
+
+static char* format_uids(char **buf, uid_t* uids, int count) {
+ int pos = 0, inc;
+ size_t size = (DECIMAL_STR_MAX(uid_t) + 1) * count + 1;
+
+ assert_se(*buf = malloc(size));
+
+ for (int k = 0; k < count; k++) {
+ sprintf(*buf + pos, "%s"UID_FMT"%n", k > 0 ? " " : "", uids[k], &inc);
+ pos += inc;
+ }
+
+ assert_se(pos < (ssize_t)size);
+ (*buf)[pos] = '\0';
+
+ return *buf;
+}
+
+static const char *e(int r) {
+ return r == 0 ? "OK" : errno_to_name(r);
+}
+
+static void test_login(void) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ _cleanup_free_ char *pp = NULL, *qq = NULL,
+ *display_session = NULL, *cgroup = NULL,
+ *display = NULL, *remote_user = NULL, *remote_host = NULL,
+ *type = NULL, *class = NULL, *state = NULL, *state2 = NULL,
+ *seat = NULL, *session = NULL,
+ *unit = NULL, *user_unit = NULL, *slice = NULL;
+ int r;
+ uid_t u, u2 = UID_INVALID;
+ char *t, **seats = NULL, **sessions = NULL;
+
+ r = sd_pid_get_unit(0, &unit);
+ log_info("sd_pid_get_unit(0, …) → %s / \"%s\"", e(r), strnull(unit));
+ assert_se(IN_SET(r, 0, -ENODATA));
+
+ r = sd_pid_get_user_unit(0, &user_unit);
+ log_info("sd_pid_get_user_unit(0, …) → %s / \"%s\"", e(r), strnull(user_unit));
+ assert_se(IN_SET(r, 0, -ENODATA));
+
+ r = sd_pid_get_slice(0, &slice);
+ log_info("sd_pid_get_slice(0, …) → %s / \"%s\"", e(r), strnull(slice));
+ assert_se(IN_SET(r, 0, -ENODATA));
+
+ r = sd_pid_get_owner_uid(0, &u2);
+ log_info("sd_pid_get_owner_uid(0, …) → %s / "UID_FMT, e(r), u2);
+ assert_se(IN_SET(r, 0, -ENODATA));
+
+ r = sd_pid_get_session(0, &session);
+ log_info("sd_pid_get_session(0, …) → %s / \"%s\"", e(r), strnull(session));
+
+ r = sd_pid_get_cgroup(0, &cgroup);
+ log_info("sd_pid_get_cgroup(0, …) → %s / \"%s\"", e(r), strnull(cgroup));
+ assert_se(IN_SET(r, 0, -ENOMEDIUM));
+
+ r = sd_uid_get_display(u2, &display_session);
+ log_info("sd_uid_get_display("UID_FMT", …) → %s / \"%s\"", u2, e(r), strnull(display_session));
+ if (u2 == UID_INVALID)
+ assert_se(r == -EINVAL);
+ else
+ assert_se(IN_SET(r, 0, -ENODATA));
+
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, pair) == 0);
+ sd_peer_get_session(pair[0], &pp);
+ sd_peer_get_session(pair[1], &qq);
+ assert_se(streq_ptr(pp, qq));
+
+ r = sd_uid_get_sessions(u2, false, &sessions);
+ assert_se(t = strv_join(sessions, " "));
+ log_info("sd_uid_get_sessions("UID_FMT", …) → %s \"%s\"", u2, e(r), t);
+ if (u2 == UID_INVALID)
+ assert_se(r == -EINVAL);
+ else {
+ assert_se(r >= 0);
+ assert_se(r == (int) strv_length(sessions));
+ }
+ sessions = strv_free(sessions);
+ free(t);
+
+ assert_se(r == sd_uid_get_sessions(u2, false, NULL));
+
+ r = sd_uid_get_seats(u2, false, &seats);
+ assert_se(t = strv_join(seats, " "));
+ log_info("sd_uid_get_seats("UID_FMT", …) → %s \"%s\"", u2, e(r), t);
+ if (u2 == UID_INVALID)
+ assert_se(r == -EINVAL);
+ else {
+ assert_se(r >= 0);
+ assert_se(r == (int) strv_length(seats));
+ }
+ seats = strv_free(seats);
+ free(t);
+
+ assert_se(r == sd_uid_get_seats(u2, false, NULL));
+
+ if (session) {
+ r = sd_session_is_active(session);
+ if (r == -ENXIO)
+ log_notice("sd_session_is_active() failed with ENXIO, it seems logind is not running.");
+ else {
+ /* All those tests will fail with ENXIO, so let's skip them. */
+
+ assert_se(r >= 0);
+ log_info("sd_session_is_active(\"%s\") → %s", session, yes_no(r));
+
+ r = sd_session_is_remote(session);
+ assert_se(r >= 0);
+ log_info("sd_session_is_remote(\"%s\") → %s", session, yes_no(r));
+
+ r = sd_session_get_state(session, &state);
+ assert_se(r == 0);
+ log_info("sd_session_get_state(\"%s\") → \"%s\"", session, state);
+
+ assert_se(sd_session_get_uid(session, &u) >= 0);
+ log_info("sd_session_get_uid(\"%s\") → "UID_FMT, session, u);
+ assert_se(u == u2);
+
+ assert_se(sd_session_get_type(session, &type) >= 0);
+ log_info("sd_session_get_type(\"%s\") → \"%s\"", session, type);
+
+ assert_se(sd_session_get_class(session, &class) >= 0);
+ log_info("sd_session_get_class(\"%s\") → \"%s\"", session, class);
+
+ r = sd_session_get_display(session, &display);
+ assert_se(IN_SET(r, 0, -ENODATA));
+ log_info("sd_session_get_display(\"%s\") → \"%s\"", session, strna(display));
+
+ r = sd_session_get_remote_user(session, &remote_user);
+ assert_se(IN_SET(r, 0, -ENODATA));
+ log_info("sd_session_get_remote_user(\"%s\") → \"%s\"",
+ session, strna(remote_user));
+
+ r = sd_session_get_remote_host(session, &remote_host);
+ assert_se(IN_SET(r, 0, -ENODATA));
+ log_info("sd_session_get_remote_host(\"%s\") → \"%s\"",
+ session, strna(remote_host));
+
+ r = sd_session_get_seat(session, &seat);
+ if (r >= 0) {
+ assert_se(seat);
+
+ log_info("sd_session_get_seat(\"%s\") → \"%s\"", session, seat);
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+ r = sd_seat_can_multi_session(seat);
+#pragma GCC diagnostic pop
+ assert_se(r == 1);
+ log_info("sd_session_can_multi_seat(\"%s\") → %s", seat, yes_no(r));
+
+ r = sd_seat_can_tty(seat);
+ assert_se(r >= 0);
+ log_info("sd_session_can_tty(\"%s\") → %s", seat, yes_no(r));
+
+ r = sd_seat_can_graphical(seat);
+ assert_se(r >= 0);
+ log_info("sd_session_can_graphical(\"%s\") → %s", seat, yes_no(r));
+ } else {
+ log_info_errno(r, "sd_session_get_seat(\"%s\"): %m", session);
+ assert_se(r == -ENODATA);
+ }
+
+ assert_se(sd_uid_get_state(u, &state2) == 0);
+ log_info("sd_uid_get_state("UID_FMT", …) → %s", u, state2);
+ }
+ }
+
+ if (seat) {
+ _cleanup_free_ char *session2 = NULL, *buf = NULL;
+ _cleanup_free_ uid_t *uids = NULL;
+ unsigned n;
+
+ assert_se(sd_uid_is_on_seat(u, 0, seat) > 0);
+
+ r = sd_seat_get_active(seat, &session2, &u2);
+ assert_se(r == 0);
+ log_info("sd_seat_get_active(\"%s\", …) → \"%s\", "UID_FMT, seat, session2, u2);
+
+ r = sd_uid_is_on_seat(u, 1, seat);
+ assert_se(IN_SET(r, 0, 1));
+ assert_se(!!r == streq(session, session2));
+
+ r = sd_seat_get_sessions(seat, &sessions, &uids, &n);
+ assert_se(r >= 0);
+ assert_se(r == (int) strv_length(sessions));
+ assert_se(t = strv_join(sessions, " "));
+ strv_free(sessions);
+ log_info("sd_seat_get_sessions(\"%s\", …) → %s, \"%s\", [%u] {%s}",
+ seat, e(r), t, n, format_uids(&buf, uids, n));
+ free(t);
+
+ assert_se(sd_seat_get_sessions(seat, NULL, NULL, NULL) == r);
+ }
+
+ r = sd_get_seats(&seats);
+ assert_se(r >= 0);
+ assert_se(r == (int) strv_length(seats));
+ assert_se(t = strv_join(seats, ", "));
+ strv_free(seats);
+ log_info("sd_get_seats(…) → [%i] \"%s\"", r, t);
+ t = mfree(t);
+
+ assert_se(sd_get_seats(NULL) == r);
+
+ r = sd_seat_get_active(NULL, &t, NULL);
+ assert_se(IN_SET(r, 0, -ENODATA, -ENXIO));
+ log_info("sd_seat_get_active(NULL, …) (active session on current seat) → %s / \"%s\"", e(r), strnull(t));
+ free(t);
+
+ r = sd_get_sessions(&sessions);
+ assert_se(r >= 0);
+ assert_se(r == (int) strv_length(sessions));
+ assert_se(t = strv_join(sessions, ", "));
+ strv_free(sessions);
+ log_info("sd_get_sessions(…) → [%i] \"%s\"", r, t);
+ free(t);
+
+ assert_se(sd_get_sessions(NULL) == r);
+
+ {
+ _cleanup_free_ uid_t *uids = NULL;
+ _cleanup_free_ char *buf = NULL;
+
+ r = sd_get_uids(&uids);
+ assert_se(r >= 0);
+ log_info("sd_get_uids(…) → [%i] {%s}", r, format_uids(&buf, uids, r));
+
+ assert_se(sd_get_uids(NULL) == r);
+ }
+
+ {
+ _cleanup_strv_free_ char **machines = NULL;
+ _cleanup_free_ char *buf = NULL;
+
+ r = sd_get_machine_names(&machines);
+ assert_se(r >= 0);
+ assert_se(r == (int) strv_length(machines));
+ assert_se(buf = strv_join(machines, " "));
+ log_info("sd_get_machines(…) → [%i] \"%s\"", r, buf);
+
+ assert_se(sd_get_machine_names(NULL) == r);
+ }
+}
+
+static void test_monitor(void) {
+ sd_login_monitor *m = NULL;
+ int r;
+
+ assert_se(sd_login_monitor_new("session", &m) == 0);
+
+ for (unsigned n = 0; n < 5; n++) {
+ struct pollfd pollfd = {};
+ usec_t timeout, nw;
+
+ assert_se((pollfd.fd = sd_login_monitor_get_fd(m)) >= 0);
+ assert_se((pollfd.events = sd_login_monitor_get_events(m)) >= 0);
+
+ assert_se(sd_login_monitor_get_timeout(m, &timeout) >= 0);
+
+ nw = now(CLOCK_MONOTONIC);
+
+ r = poll(&pollfd, 1,
+ timeout == (uint64_t) -1 ? -1 :
+ timeout > nw ? (int) ((timeout - nw) / 1000) :
+ 0);
+
+ assert_se(r >= 0);
+
+ sd_login_monitor_flush(m);
+ printf("Wake!\n");
+ }
+
+ sd_login_monitor_unref(m);
+}
+
+int main(int argc, char* argv[]) {
+ log_parse_environment();
+ log_open();
+
+ log_info("/* Information printed is from the live system */");
+
+ test_login();
+
+ if (streq_ptr(argv[1], "-m"))
+ test_monitor();
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-netlink/generic-netlink.c b/src/libsystemd/sd-netlink/generic-netlink.c
new file mode 100644
index 0000000..f295fa9
--- /dev/null
+++ b/src/libsystemd/sd-netlink/generic-netlink.c
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/genetlink.h>
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "generic-netlink.h"
+#include "netlink-internal.h"
+
+typedef struct {
+ const char* name;
+ uint8_t version;
+} genl_family;
+
+static const genl_family genl_families[] = {
+ [SD_GENL_ID_CTRL] = { .name = "", .version = 1 },
+ [SD_GENL_WIREGUARD] = { .name = "wireguard", .version = 1 },
+ [SD_GENL_FOU] = { .name = "fou", .version = 1 },
+ [SD_GENL_L2TP] = { .name = "l2tp", .version = 1 },
+ [SD_GENL_MACSEC] = { .name = "macsec", .version = 1 },
+ [SD_GENL_NL80211] = { .name = "nl80211", .version = 1 },
+};
+
+int sd_genl_socket_open(sd_netlink **ret) {
+ return netlink_open_family(ret, NETLINK_GENERIC);
+}
+static int lookup_id(sd_netlink *nl, sd_genl_family family, uint16_t *id);
+
+static int genl_message_new(sd_netlink *nl, sd_genl_family family, uint16_t nlmsg_type, uint8_t cmd, sd_netlink_message **ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ const NLType *genl_cmd_type, *nl_type;
+ const NLTypeSystem *type_system;
+ struct genlmsghdr *genl;
+ size_t size;
+ int r;
+
+ assert_return(nl->protocol == NETLINK_GENERIC, -EINVAL);
+
+ r = type_system_get_type(&genl_family_type_system_root, &genl_cmd_type, family);
+ if (r < 0)
+ return r;
+
+ r = message_new_empty(nl, &m);
+ if (r < 0)
+ return r;
+
+ size = NLMSG_SPACE(sizeof(struct genlmsghdr));
+ m->hdr = malloc0(size);
+ if (!m->hdr)
+ return -ENOMEM;
+
+ m->hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+
+ type_get_type_system(genl_cmd_type, &type_system);
+
+ r = type_system_get_type(type_system, &nl_type, cmd);
+ if (r < 0)
+ return r;
+
+ m->hdr->nlmsg_len = size;
+ m->hdr->nlmsg_type = nlmsg_type;
+
+ type_get_type_system(nl_type, &m->containers[0].type_system);
+ genl = NLMSG_DATA(m->hdr);
+ genl->cmd = cmd;
+ genl->version = genl_families[family].version;
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
+
+int sd_genl_message_new(sd_netlink *nl, sd_genl_family family, uint8_t cmd, sd_netlink_message **ret) {
+ uint16_t id;
+ int r;
+
+ r = lookup_id(nl, family, &id);
+ if (r < 0)
+ return r;
+
+ return genl_message_new(nl, family, id, cmd, ret);
+}
+
+static int lookup_id(sd_netlink *nl, sd_genl_family family, uint16_t *id) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ uint16_t u;
+ void *v;
+ int r;
+
+ if (family == SD_GENL_ID_CTRL) {
+ *id = GENL_ID_CTRL;
+ return 0;
+ }
+
+ v = hashmap_get(nl->genl_family_to_nlmsg_type, INT_TO_PTR(family));
+ if (v) {
+ *id = PTR_TO_UINT(v);
+ return 0;
+ }
+
+ r = sd_genl_message_new(nl, SD_GENL_ID_CTRL, CTRL_CMD_GETFAMILY, &req);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(req, CTRL_ATTR_FAMILY_NAME, genl_families[family].name);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(nl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_read_u16(reply, CTRL_ATTR_FAMILY_ID, &u);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&nl->genl_family_to_nlmsg_type, NULL);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&nl->nlmsg_type_to_genl_family, NULL);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(nl->genl_family_to_nlmsg_type, INT_TO_PTR(family), UINT_TO_PTR(u));
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(nl->nlmsg_type_to_genl_family, UINT_TO_PTR(u), INT_TO_PTR(family));
+ if (r < 0)
+ return r;
+
+ *id = u;
+ return 0;
+}
+
+int nlmsg_type_to_genl_family(const sd_netlink *nl, uint16_t type, sd_genl_family *ret) {
+ void *p;
+
+ assert_return(nl, -EINVAL);
+ assert_return(nl->protocol == NETLINK_GENERIC, -EINVAL);
+ assert(ret);
+
+ if (type == NLMSG_ERROR)
+ *ret = SD_GENL_ERROR;
+ else if (type == NLMSG_DONE)
+ *ret = SD_GENL_DONE;
+ else if (type == GENL_ID_CTRL)
+ *ret = SD_GENL_ID_CTRL;
+ else {
+ p = hashmap_get(nl->nlmsg_type_to_genl_family, UINT_TO_PTR(type));
+ if (!p)
+ return -EOPNOTSUPP;
+
+ *ret = PTR_TO_INT(p);
+ }
+
+ return 0;
+}
+
+int sd_genl_message_get_family(const sd_netlink *nl, const sd_netlink_message *m, sd_genl_family *family) {
+ uint16_t type;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(nl, -EINVAL);
+ assert_return(nl->protocol == NETLINK_GENERIC, -EINVAL);
+ assert_return(family, -EINVAL);
+
+ r = sd_netlink_message_get_type(m, &type);
+ if (r < 0)
+ return r;
+
+ return nlmsg_type_to_genl_family(nl, type, family);
+}
diff --git a/src/libsystemd/sd-netlink/generic-netlink.h b/src/libsystemd/sd-netlink/generic-netlink.h
new file mode 100644
index 0000000..72001e8
--- /dev/null
+++ b/src/libsystemd/sd-netlink/generic-netlink.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-netlink.h"
+
+int nlmsg_type_to_genl_family(const sd_netlink *nl, uint16_t type, sd_genl_family *ret);
diff --git a/src/libsystemd/sd-netlink/netlink-internal.h b/src/libsystemd/sd-netlink/netlink-internal.h
new file mode 100644
index 0000000..1240f0d
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-internal.h
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/netlink.h>
+
+#include "sd-netlink.h"
+
+#include "list.h"
+#include "netlink-types.h"
+#include "prioq.h"
+#include "time-util.h"
+
+#define RTNL_DEFAULT_TIMEOUT ((usec_t) (25 * USEC_PER_SEC))
+
+#define RTNL_RQUEUE_MAX 64*1024
+
+#define RTNL_CONTAINER_DEPTH 32
+
+struct reply_callback {
+ sd_netlink_message_handler_t callback;
+ usec_t timeout;
+ uint64_t serial;
+ unsigned prioq_idx;
+};
+
+struct match_callback {
+ sd_netlink_message_handler_t callback;
+ uint16_t type;
+
+ LIST_FIELDS(struct match_callback, match_callbacks);
+};
+
+typedef enum NetlinkSlotType {
+ NETLINK_REPLY_CALLBACK,
+ NETLINK_MATCH_CALLBACK,
+ _NETLINK_SLOT_INVALID = -1,
+} NetlinkSlotType;
+
+struct sd_netlink_slot {
+ unsigned n_ref;
+ sd_netlink *netlink;
+ void *userdata;
+ sd_netlink_destroy_t destroy_callback;
+ NetlinkSlotType type:2;
+
+ bool floating:1;
+ char *description;
+
+ LIST_FIELDS(sd_netlink_slot, slots);
+
+ union {
+ struct reply_callback reply_callback;
+ struct match_callback match_callback;
+ };
+};
+
+struct sd_netlink {
+ unsigned n_ref;
+
+ int fd;
+
+ union {
+ struct sockaddr sa;
+ struct sockaddr_nl nl;
+ } sockaddr;
+
+ int protocol;
+
+ Hashmap *broadcast_group_refs;
+ bool broadcast_group_dont_leave:1; /* until we can rely on 4.2 */
+
+ sd_netlink_message **rqueue;
+ unsigned rqueue_size;
+ size_t rqueue_allocated;
+
+ sd_netlink_message **rqueue_partial;
+ unsigned rqueue_partial_size;
+ size_t rqueue_partial_allocated;
+
+ struct nlmsghdr *rbuffer;
+ size_t rbuffer_allocated;
+
+ bool processing:1;
+
+ uint32_t serial;
+
+ struct Prioq *reply_callbacks_prioq;
+ Hashmap *reply_callbacks;
+
+ LIST_HEAD(struct match_callback, match_callbacks);
+
+ LIST_HEAD(sd_netlink_slot, slots);
+
+ pid_t original_pid;
+
+ sd_event_source *io_event_source;
+ sd_event_source *time_event_source;
+ sd_event_source *exit_event_source;
+ sd_event *event;
+
+ Hashmap *genl_family_to_nlmsg_type;
+ Hashmap *nlmsg_type_to_genl_family;
+};
+
+struct netlink_attribute {
+ size_t offset; /* offset from hdr to attribute */
+ bool nested:1;
+ bool net_byteorder:1;
+};
+
+struct netlink_container {
+ const struct NLTypeSystem *type_system; /* the type system of the container */
+ size_t offset; /* offset from hdr to the start of the container */
+ struct netlink_attribute *attributes;
+ unsigned short n_attributes; /* number of attributes in container */
+};
+
+struct sd_netlink_message {
+ unsigned n_ref;
+
+ int protocol;
+
+ struct nlmsghdr *hdr;
+ struct netlink_container containers[RTNL_CONTAINER_DEPTH];
+ unsigned n_containers; /* number of containers */
+ bool sealed:1;
+ bool broadcast:1;
+
+ sd_netlink_message *next; /* next in a chain of multi-part messages */
+};
+
+int message_new(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t type);
+int message_new_empty(sd_netlink *rtnl, sd_netlink_message **ret);
+
+int netlink_open_family(sd_netlink **ret, int family);
+
+int socket_open(int family);
+int socket_bind(sd_netlink *nl);
+int socket_broadcast_group_ref(sd_netlink *nl, unsigned group);
+int socket_broadcast_group_unref(sd_netlink *nl, unsigned group);
+int socket_write_message(sd_netlink *nl, sd_netlink_message *m);
+int socket_read_message(sd_netlink *nl);
+
+int rtnl_rqueue_make_room(sd_netlink *rtnl);
+int rtnl_rqueue_partial_make_room(sd_netlink *rtnl);
+
+/* Make sure callbacks don't destroy the rtnl connection */
+#define NETLINK_DONT_DESTROY(rtnl) \
+ _cleanup_(sd_netlink_unrefp) _unused_ sd_netlink *_dont_destroy_##rtnl = sd_netlink_ref(rtnl)
diff --git a/src/libsystemd/sd-netlink/netlink-message.c b/src/libsystemd/sd-netlink/netlink-message.c
new file mode 100644
index 0000000..bd56536
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-message.c
@@ -0,0 +1,1320 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <unistd.h>
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "format-util.h"
+#include "memory-util.h"
+#include "netlink-internal.h"
+#include "netlink-types.h"
+#include "netlink-util.h"
+#include "socket-util.h"
+#include "strv.h"
+
+#define GET_CONTAINER(m, i) ((struct rtattr*)((uint8_t*)(m)->hdr + (m)->containers[i].offset))
+
+#define RTA_TYPE(rta) ((rta)->rta_type & NLA_TYPE_MASK)
+#define RTA_FLAGS(rta) ((rta)->rta_type & ~NLA_TYPE_MASK)
+
+int message_new_empty(sd_netlink *rtnl, sd_netlink_message **ret) {
+ sd_netlink_message *m;
+
+ assert_return(ret, -EINVAL);
+
+ /* Note that 'rtnl' is currently unused, if we start using it internally
+ we must take care to avoid problems due to mutual references between
+ buses and their queued messages. See sd-bus.
+ */
+
+ m = new(sd_netlink_message, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (sd_netlink_message) {
+ .n_ref = 1,
+ .protocol = rtnl->protocol,
+ .sealed = false,
+ };
+
+ *ret = m;
+
+ return 0;
+}
+
+int message_new(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t type) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ const NLType *nl_type;
+ size_t size;
+ int r;
+
+ assert_return(rtnl, -EINVAL);
+
+ r = type_system_root_get_type(rtnl, &nl_type, type);
+ if (r < 0)
+ return r;
+
+ if (type_get_type(nl_type) != NETLINK_TYPE_NESTED)
+ return -EINVAL;
+
+ r = message_new_empty(rtnl, &m);
+ if (r < 0)
+ return r;
+
+ size = NLMSG_SPACE(type_get_size(nl_type));
+
+ assert(size >= sizeof(struct nlmsghdr));
+ m->hdr = malloc0(size);
+ if (!m->hdr)
+ return -ENOMEM;
+
+ m->hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+
+ type_get_type_system(nl_type, &m->containers[0].type_system);
+ m->hdr->nlmsg_len = size;
+ m->hdr->nlmsg_type = type;
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
+
+int sd_netlink_message_request_dump(sd_netlink_message *m, int dump) {
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+
+ assert_return(IN_SET(m->hdr->nlmsg_type,
+ RTM_GETLINK, RTM_GETLINKPROP, RTM_GETADDR, RTM_GETROUTE, RTM_GETNEIGH,
+ RTM_GETRULE, RTM_GETADDRLABEL, RTM_GETNEXTHOP), -EINVAL);
+
+ SET_FLAG(m->hdr->nlmsg_flags, NLM_F_DUMP, dump);
+
+ return 0;
+}
+
+DEFINE_TRIVIAL_REF_FUNC(sd_netlink_message, sd_netlink_message);
+
+sd_netlink_message *sd_netlink_message_unref(sd_netlink_message *m) {
+ while (m && --m->n_ref == 0) {
+ unsigned i;
+
+ free(m->hdr);
+
+ for (i = 0; i <= m->n_containers; i++)
+ free(m->containers[i].attributes);
+
+ sd_netlink_message *t = m;
+ m = m->next;
+ free(t);
+ }
+
+ return NULL;
+}
+
+int sd_netlink_message_get_type(const sd_netlink_message *m, uint16_t *type) {
+ assert_return(m, -EINVAL);
+ assert_return(type, -EINVAL);
+
+ *type = m->hdr->nlmsg_type;
+
+ return 0;
+}
+
+int sd_netlink_message_set_flags(sd_netlink_message *m, uint16_t flags) {
+ assert_return(m, -EINVAL);
+ assert_return(flags, -EINVAL);
+
+ m->hdr->nlmsg_flags = flags;
+
+ return 0;
+}
+
+int sd_netlink_message_is_broadcast(const sd_netlink_message *m) {
+ assert_return(m, -EINVAL);
+
+ return m->broadcast;
+}
+
+/* If successful the updated message will be correctly aligned, if
+ unsuccessful the old message is untouched. */
+static int add_rtattr(sd_netlink_message *m, unsigned short type, const void *data, size_t data_length) {
+ size_t message_length;
+ struct nlmsghdr *new_hdr;
+ struct rtattr *rta;
+ unsigned i;
+ int offset;
+
+ assert(m);
+ assert(m->hdr);
+ assert(!m->sealed);
+ assert(NLMSG_ALIGN(m->hdr->nlmsg_len) == m->hdr->nlmsg_len);
+ assert(!data || data_length > 0);
+
+ /* get the new message size (with padding at the end) */
+ message_length = m->hdr->nlmsg_len + RTA_SPACE(data_length);
+
+ /* buffer should be smaller than both one page or 8K to be accepted by the kernel */
+ if (message_length > MIN(page_size(), 8192UL))
+ return -ENOBUFS;
+
+ /* realloc to fit the new attribute */
+ new_hdr = realloc(m->hdr, message_length);
+ if (!new_hdr)
+ return -ENOMEM;
+ m->hdr = new_hdr;
+
+ /* get pointer to the attribute we are about to add */
+ rta = (struct rtattr *) ((uint8_t *) m->hdr + m->hdr->nlmsg_len);
+
+ rtattr_append_attribute_internal(rta, type, data, data_length);
+
+ /* if we are inside containers, extend them */
+ for (i = 0; i < m->n_containers; i++)
+ GET_CONTAINER(m, i)->rta_len += RTA_SPACE(data_length);
+
+ /* update message size */
+ offset = m->hdr->nlmsg_len;
+ m->hdr->nlmsg_len = message_length;
+
+ /* return old message size */
+ return offset;
+}
+
+static int message_attribute_has_type(sd_netlink_message *m, size_t *out_size, uint16_t attribute_type, uint16_t data_type) {
+ const NLType *type;
+ int r;
+
+ assert(m);
+
+ r = type_system_get_type(m->containers[m->n_containers].type_system, &type, attribute_type);
+ if (r < 0)
+ return r;
+
+ if (type_get_type(type) != data_type)
+ return -EINVAL;
+
+ if (out_size)
+ *out_size = type_get_size(type);
+ return 0;
+}
+
+int sd_netlink_message_append_string(sd_netlink_message *m, unsigned short type, const char *data) {
+ size_t length, size;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+
+ r = message_attribute_has_type(m, &size, type, NETLINK_TYPE_STRING);
+ if (r < 0)
+ return r;
+
+ if (size) {
+ length = strnlen(data, size+1);
+ if (length > size)
+ return -EINVAL;
+ } else
+ length = strlen(data);
+
+ r = add_rtattr(m, type, data, length + 1);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_strv(sd_netlink_message *m, unsigned short type, char * const *data) {
+ size_t length, size;
+ char * const *p;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+
+ r = message_attribute_has_type(m, &size, type, NETLINK_TYPE_STRING);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, data) {
+ if (size) {
+ length = strnlen(*p, size+1);
+ if (length > size)
+ return -EINVAL;
+ } else
+ length = strlen(*p);
+
+ r = add_rtattr(m, type, *p, length + 1);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int sd_netlink_message_append_flag(sd_netlink_message *m, unsigned short type) {
+ size_t size;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, &size, type, NETLINK_TYPE_FLAG);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, NULL, 0);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_u8(sd_netlink_message *m, unsigned short type, uint8_t data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_U8);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, &data, sizeof(uint8_t));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_u16(sd_netlink_message *m, unsigned short type, uint16_t data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_U16);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, &data, sizeof(uint16_t));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_u32(sd_netlink_message *m, unsigned short type, uint32_t data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_U32);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, &data, sizeof(uint32_t));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_u64(sd_netlink_message *m, unsigned short type, uint64_t data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_U64);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, &data, sizeof(uint64_t));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_s8(sd_netlink_message *m, unsigned short type, int8_t data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_S8);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, &data, sizeof(int8_t));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_s16(sd_netlink_message *m, unsigned short type, int16_t data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_S16);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, &data, sizeof(int16_t));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_s32(sd_netlink_message *m, unsigned short type, int32_t data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_S32);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, &data, sizeof(int32_t));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_s64(sd_netlink_message *m, unsigned short type, int64_t data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_S64);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, &data, sizeof(int64_t));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_data(sd_netlink_message *m, unsigned short type, const void *data, size_t len) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = add_rtattr(m, type, data, len);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int netlink_message_append_in_addr_union(sd_netlink_message *m, unsigned short type, int family, const union in_addr_union *data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+ assert_return(IN_SET(family, AF_INET, AF_INET6), -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_IN_ADDR);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, data, FAMILY_ADDRESS_SIZE(family));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_in_addr(sd_netlink_message *m, unsigned short type, const struct in_addr *data) {
+ return netlink_message_append_in_addr_union(m, type, AF_INET, (const union in_addr_union *) data);
+}
+
+int sd_netlink_message_append_in6_addr(sd_netlink_message *m, unsigned short type, const struct in6_addr *data) {
+ return netlink_message_append_in_addr_union(m, type, AF_INET6, (const union in_addr_union *) data);
+}
+
+int netlink_message_append_sockaddr_union(sd_netlink_message *m, unsigned short type, const union sockaddr_union *data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+ assert_return(IN_SET(data->sa.sa_family, AF_INET, AF_INET6), -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_SOCKADDR);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, data, data->sa.sa_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_sockaddr_in(sd_netlink_message *m, unsigned short type, const struct sockaddr_in *data) {
+ return netlink_message_append_sockaddr_union(m, type, (const union sockaddr_union *) data);
+}
+
+int sd_netlink_message_append_sockaddr_in6(sd_netlink_message *m, unsigned short type, const struct sockaddr_in6 *data) {
+ return netlink_message_append_sockaddr_union(m, type, (const union sockaddr_union *) data);
+}
+
+int sd_netlink_message_append_ether_addr(sd_netlink_message *m, unsigned short type, const struct ether_addr *data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_ETHER_ADDR);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, data, ETH_ALEN);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int netlink_message_append_hw_addr(sd_netlink_message *m, unsigned short type, const hw_addr_data *data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+ assert_return(data->length > 0, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_ETHER_ADDR);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, data->addr.bytes, data->length);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_cache_info(sd_netlink_message *m, unsigned short type, const struct ifa_cacheinfo *info) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(info, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_CACHE_INFO);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, type, info, sizeof(struct ifa_cacheinfo));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_open_container(sd_netlink_message *m, unsigned short type) {
+ size_t size;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ /* m->containers[m->n_containers + 1] is accessed both in read and write. Prevent access out of bound */
+ assert_return(m->n_containers < (RTNL_CONTAINER_DEPTH - 1), -ERANGE);
+
+ r = message_attribute_has_type(m, &size, type, NETLINK_TYPE_NESTED);
+ if (r < 0) {
+ const NLTypeSystemUnion *type_system_union;
+ int family;
+
+ r = message_attribute_has_type(m, &size, type, NETLINK_TYPE_UNION);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_get_family(m, &family);
+ if (r < 0)
+ return r;
+
+ r = type_system_get_type_system_union(m->containers[m->n_containers].type_system, &type_system_union, type);
+ if (r < 0)
+ return r;
+
+ r = type_system_union_protocol_get_type_system(type_system_union,
+ &m->containers[m->n_containers + 1].type_system,
+ family);
+ if (r < 0)
+ return r;
+ } else {
+ r = type_system_get_type_system(m->containers[m->n_containers].type_system,
+ &m->containers[m->n_containers + 1].type_system,
+ type);
+ if (r < 0)
+ return r;
+ }
+
+ r = add_rtattr(m, type | NLA_F_NESTED, NULL, size);
+ if (r < 0)
+ return r;
+
+ m->containers[m->n_containers++].offset = r;
+
+ return 0;
+}
+
+int sd_netlink_message_open_container_union(sd_netlink_message *m, unsigned short type, const char *key) {
+ const NLTypeSystemUnion *type_system_union;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(m->n_containers < (RTNL_CONTAINER_DEPTH - 1), -ERANGE);
+
+ r = type_system_get_type_system_union(m->containers[m->n_containers].type_system, &type_system_union, type);
+ if (r < 0)
+ return r;
+
+ r = type_system_union_get_type_system(type_system_union,
+ &m->containers[m->n_containers + 1].type_system,
+ key);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(m, type_system_union->match, key);
+ if (r < 0)
+ return r;
+
+ /* do we ever need non-null size */
+ r = add_rtattr(m, type | NLA_F_NESTED, NULL, 0);
+ if (r < 0)
+ return r;
+
+ m->containers[m->n_containers++].offset = r;
+
+ return 0;
+}
+
+int sd_netlink_message_close_container(sd_netlink_message *m) {
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(m->n_containers > 0, -EINVAL);
+
+ m->containers[m->n_containers].type_system = NULL;
+ m->containers[m->n_containers].offset = 0;
+ m->n_containers--;
+
+ return 0;
+}
+
+int sd_netlink_message_open_array(sd_netlink_message *m, uint16_t type) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(m->n_containers < (RTNL_CONTAINER_DEPTH - 1), -ERANGE);
+
+ r = add_rtattr(m, type | NLA_F_NESTED, NULL, 0);
+ if (r < 0)
+ return r;
+
+ m->containers[m->n_containers].offset = r;
+ m->n_containers++;
+ m->containers[m->n_containers].type_system = m->containers[m->n_containers - 1].type_system;
+
+ return 0;
+}
+
+int sd_netlink_message_cancel_array(sd_netlink_message *m) {
+ unsigned i;
+ uint32_t rta_len;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(m->n_containers > 1, -EINVAL);
+
+ rta_len = GET_CONTAINER(m, (m->n_containers - 1))->rta_len;
+
+ for (i = 0; i < m->n_containers; i++)
+ GET_CONTAINER(m, i)->rta_len -= rta_len;
+
+ m->hdr->nlmsg_len -= rta_len;
+
+ m->n_containers--;
+ m->containers[m->n_containers].type_system = NULL;
+
+ return 0;
+}
+
+static int netlink_message_read_internal(sd_netlink_message *m, unsigned short type, void **data, bool *net_byteorder) {
+ struct netlink_attribute *attribute;
+ struct rtattr *rta;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+
+ assert(m->n_containers < RTNL_CONTAINER_DEPTH);
+
+ if (!m->containers[m->n_containers].attributes)
+ return -ENODATA;
+
+ if (type >= m->containers[m->n_containers].n_attributes)
+ return -ENODATA;
+
+ attribute = &m->containers[m->n_containers].attributes[type];
+
+ if (attribute->offset == 0)
+ return -ENODATA;
+
+ rta = (struct rtattr*)((uint8_t *) m->hdr + attribute->offset);
+
+ *data = RTA_DATA(rta);
+
+ if (net_byteorder)
+ *net_byteorder = attribute->net_byteorder;
+
+ return RTA_PAYLOAD(rta);
+}
+
+int sd_netlink_message_read(sd_netlink_message *m, unsigned short type, size_t size, void *data) {
+ void *attr_data;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = netlink_message_read_internal(m, type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+
+ if ((size_t) r < size)
+ return -EIO;
+
+ if (data)
+ memcpy(data, attr_data, size);
+
+ return r;
+}
+
+int sd_netlink_message_read_data(sd_netlink_message *m, unsigned short type, size_t *ret_size, void **ret_data) {
+ void *attr_data, *data;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = netlink_message_read_internal(m, type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+
+ if (ret_data) {
+ data = memdup(attr_data, r);
+ if (!data)
+ return -ENOMEM;
+
+ *ret_data = data;
+ }
+
+ if (ret_size)
+ *ret_size = r;
+
+ return r;
+}
+
+int sd_netlink_message_read_string_strdup(sd_netlink_message *m, unsigned short type, char **data) {
+ void *attr_data;
+ char *str;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_STRING);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+
+ if (data) {
+ str = strndup(attr_data, r);
+ if (!str)
+ return -ENOMEM;
+
+ *data = str;
+ }
+
+ return 0;
+}
+
+int sd_netlink_message_read_string(sd_netlink_message *m, unsigned short type, const char **data) {
+ int r;
+ void *attr_data;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_STRING);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+ else if (strnlen(attr_data, r) >= (size_t) r)
+ return -EIO;
+
+ if (data)
+ *data = (const char *) attr_data;
+
+ return 0;
+}
+
+int sd_netlink_message_read_u8(sd_netlink_message *m, unsigned short type, uint8_t *data) {
+ int r;
+ void *attr_data;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_U8);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+ else if ((size_t) r < sizeof(uint8_t))
+ return -EIO;
+
+ if (data)
+ *data = *(uint8_t *) attr_data;
+
+ return 0;
+}
+
+int sd_netlink_message_read_u16(sd_netlink_message *m, unsigned short type, uint16_t *data) {
+ void *attr_data;
+ bool net_byteorder;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_U16);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, type, &attr_data, &net_byteorder);
+ if (r < 0)
+ return r;
+ else if ((size_t) r < sizeof(uint16_t))
+ return -EIO;
+
+ if (data) {
+ if (net_byteorder)
+ *data = be16toh(*(uint16_t *) attr_data);
+ else
+ *data = *(uint16_t *) attr_data;
+ }
+
+ return 0;
+}
+
+int sd_netlink_message_read_u32(sd_netlink_message *m, unsigned short type, uint32_t *data) {
+ void *attr_data;
+ bool net_byteorder;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_U32);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, type, &attr_data, &net_byteorder);
+ if (r < 0)
+ return r;
+ else if ((size_t) r < sizeof(uint32_t))
+ return -EIO;
+
+ if (data) {
+ if (net_byteorder)
+ *data = be32toh(*(uint32_t *) attr_data);
+ else
+ *data = *(uint32_t *) attr_data;
+ }
+
+ return 0;
+}
+
+int sd_netlink_message_read_ether_addr(sd_netlink_message *m, unsigned short type, struct ether_addr *data) {
+ int r;
+ void *attr_data;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_ETHER_ADDR);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+ else if ((size_t) r < sizeof(struct ether_addr))
+ return -EIO;
+
+ if (data)
+ memcpy(data, attr_data, sizeof(struct ether_addr));
+
+ return 0;
+}
+
+int netlink_message_read_hw_addr(sd_netlink_message *m, unsigned short type, hw_addr_data *data) {
+ int r;
+ void *attr_data;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_ETHER_ADDR);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+ else if ((size_t) r > sizeof(union hw_addr_union))
+ return -EIO;
+
+ if (data) {
+ memcpy(data->addr.bytes, attr_data, r);
+ data->length = r;
+ }
+
+ return 0;
+}
+
+int sd_netlink_message_read_cache_info(sd_netlink_message *m, unsigned short type, struct ifa_cacheinfo *info) {
+ int r;
+ void *attr_data;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_CACHE_INFO);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+ else if ((size_t) r < sizeof(struct ifa_cacheinfo))
+ return -EIO;
+
+ if (info)
+ memcpy(info, attr_data, sizeof(struct ifa_cacheinfo));
+
+ return 0;
+}
+
+int netlink_message_read_in_addr_union(sd_netlink_message *m, unsigned short type, int family, union in_addr_union *data) {
+ void *attr_data;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(IN_SET(family, AF_INET, AF_INET6), -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, type, NETLINK_TYPE_IN_ADDR);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+ else if ((size_t) r < FAMILY_ADDRESS_SIZE(family))
+ return -EIO;
+
+ if (data)
+ memcpy(data, attr_data, FAMILY_ADDRESS_SIZE(family));
+
+ return 0;
+}
+
+int sd_netlink_message_read_in_addr(sd_netlink_message *m, unsigned short type, struct in_addr *data) {
+ union in_addr_union u;
+ int r;
+
+ r = netlink_message_read_in_addr_union(m, type, AF_INET, &u);
+ if (r >= 0 && data)
+ *data = u.in;
+
+ return r;
+}
+
+int sd_netlink_message_read_in6_addr(sd_netlink_message *m, unsigned short type, struct in6_addr *data) {
+ union in_addr_union u;
+ int r;
+
+ r = netlink_message_read_in_addr_union(m, type, AF_INET6, &u);
+ if (r >= 0 && data)
+ *data = u.in6;
+
+ return r;
+}
+
+int sd_netlink_message_read_strv(sd_netlink_message *m, unsigned short container_type, unsigned short type_id, char ***ret) {
+ _cleanup_strv_free_ char **s = NULL;
+ const NLTypeSystem *type_system;
+ const NLType *nl_type;
+ struct rtattr *rta;
+ void *container;
+ size_t rt_len;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->n_containers < RTNL_CONTAINER_DEPTH, -EINVAL);
+
+ r = type_system_get_type(m->containers[m->n_containers].type_system,
+ &nl_type,
+ container_type);
+ if (r < 0)
+ return r;
+
+ if (type_get_type(nl_type) != NETLINK_TYPE_NESTED)
+ return -EINVAL;
+
+ r = type_system_get_type_system(m->containers[m->n_containers].type_system,
+ &type_system,
+ container_type);
+ if (r < 0)
+ return r;
+
+ r = type_system_get_type(type_system, &nl_type, type_id);
+ if (r < 0)
+ return r;
+
+ if (type_get_type(nl_type) != NETLINK_TYPE_STRING)
+ return -EINVAL;
+
+ r = netlink_message_read_internal(m, container_type, &container, NULL);
+ if (r < 0)
+ return r;
+
+ rt_len = (size_t) r;
+ rta = container;
+
+ /* RTA_OK() macro compares with rta->rt_len, which is unsigned short, and
+ * LGTM.com analysis does not like the type difference. Hence, here we
+ * introduce an unsigned short variable as a workaround. */
+ unsigned short len = rt_len;
+ for (; RTA_OK(rta, len); rta = RTA_NEXT(rta, len)) {
+ unsigned short type;
+
+ type = RTA_TYPE(rta);
+ if (type != type_id)
+ continue;
+
+ r = strv_extend(&s, RTA_DATA(rta));
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
+
+static int netlink_container_parse(sd_netlink_message *m,
+ struct netlink_container *container,
+ struct rtattr *rta,
+ size_t rt_len) {
+ _cleanup_free_ struct netlink_attribute *attributes = NULL;
+ size_t n_allocated = 0;
+
+ /* RTA_OK() macro compares with rta->rt_len, which is unsigned short, and
+ * LGTM.com analysis does not like the type difference. Hence, here we
+ * introduce an unsigned short variable as a workaround. */
+ unsigned short len = rt_len;
+ for (; RTA_OK(rta, len); rta = RTA_NEXT(rta, len)) {
+ unsigned short type;
+
+ type = RTA_TYPE(rta);
+
+ if (!GREEDY_REALLOC0(attributes, n_allocated, type + 1))
+ return -ENOMEM;
+
+ if (attributes[type].offset != 0)
+ log_debug("rtnl: message parse - overwriting repeated attribute");
+
+ attributes[type].offset = (uint8_t *) rta - (uint8_t *) m->hdr;
+ attributes[type].nested = RTA_FLAGS(rta) & NLA_F_NESTED;
+ attributes[type].net_byteorder = RTA_FLAGS(rta) & NLA_F_NET_BYTEORDER;
+ }
+
+ container->attributes = TAKE_PTR(attributes);
+ container->n_attributes = n_allocated;
+
+ return 0;
+}
+
+int sd_netlink_message_enter_container(sd_netlink_message *m, unsigned short type_id) {
+ const NLType *nl_type;
+ const NLTypeSystem *type_system;
+ void *container;
+ uint16_t type;
+ size_t size;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->n_containers < (RTNL_CONTAINER_DEPTH - 1), -EINVAL);
+
+ r = type_system_get_type(m->containers[m->n_containers].type_system,
+ &nl_type,
+ type_id);
+ if (r < 0)
+ return r;
+
+ type = type_get_type(nl_type);
+
+ if (type == NETLINK_TYPE_NESTED) {
+ r = type_system_get_type_system(m->containers[m->n_containers].type_system,
+ &type_system,
+ type_id);
+ if (r < 0)
+ return r;
+ } else if (type == NETLINK_TYPE_UNION) {
+ const NLTypeSystemUnion *type_system_union;
+
+ r = type_system_get_type_system_union(m->containers[m->n_containers].type_system,
+ &type_system_union,
+ type_id);
+ if (r < 0)
+ return r;
+
+ switch (type_system_union->match_type) {
+ case NL_MATCH_SIBLING:
+ {
+ const char *key;
+
+ r = sd_netlink_message_read_string(m, type_system_union->match, &key);
+ if (r < 0)
+ return r;
+
+ r = type_system_union_get_type_system(type_system_union,
+ &type_system,
+ key);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+ case NL_MATCH_PROTOCOL:
+ {
+ int family;
+
+ r = sd_rtnl_message_get_family(m, &family);
+ if (r < 0)
+ return r;
+
+ r = type_system_union_protocol_get_type_system(type_system_union,
+ &type_system,
+ family);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+ default:
+ assert_not_reached("sd-netlink: invalid type system union type");
+ }
+ } else
+ return -EINVAL;
+
+ r = netlink_message_read_internal(m, type_id, &container, NULL);
+ if (r < 0)
+ return r;
+
+ size = (size_t) r;
+ m->n_containers++;
+
+ r = netlink_container_parse(m,
+ &m->containers[m->n_containers],
+ container,
+ size);
+ if (r < 0) {
+ m->n_containers--;
+ return r;
+ }
+
+ m->containers[m->n_containers].type_system = type_system;
+
+ return 0;
+}
+
+int sd_netlink_message_enter_array(sd_netlink_message *m, unsigned short type_id) {
+ void *container;
+ size_t size;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->n_containers < (RTNL_CONTAINER_DEPTH - 1), -EINVAL);
+
+ r = netlink_message_read_internal(m, type_id, &container, NULL);
+ if (r < 0)
+ return r;
+
+ size = (size_t) r;
+ m->n_containers++;
+
+ r = netlink_container_parse(m,
+ &m->containers[m->n_containers],
+ container,
+ size);
+ if (r < 0) {
+ m->n_containers--;
+ return r;
+ }
+
+ m->containers[m->n_containers].type_system = m->containers[m->n_containers - 1].type_system;
+
+ return 0;
+}
+
+int sd_netlink_message_exit_container(sd_netlink_message *m) {
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EINVAL);
+ assert_return(m->n_containers > 0, -EINVAL);
+
+ m->containers[m->n_containers].attributes = mfree(m->containers[m->n_containers].attributes);
+ m->containers[m->n_containers].type_system = NULL;
+
+ m->n_containers--;
+
+ return 0;
+}
+
+uint32_t rtnl_message_get_serial(sd_netlink_message *m) {
+ assert(m);
+ assert(m->hdr);
+
+ return m->hdr->nlmsg_seq;
+}
+
+int sd_netlink_message_is_error(const sd_netlink_message *m) {
+ assert_return(m, 0);
+ assert_return(m->hdr, 0);
+
+ return m->hdr->nlmsg_type == NLMSG_ERROR;
+}
+
+int sd_netlink_message_get_errno(const sd_netlink_message *m) {
+ struct nlmsgerr *err;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+
+ if (!sd_netlink_message_is_error(m))
+ return 0;
+
+ err = NLMSG_DATA(m->hdr);
+
+ return err->error;
+}
+
+static int netlink_message_parse_error(sd_netlink_message *m) {
+ struct nlmsgerr *err = NLMSG_DATA(m->hdr);
+ size_t hlen = sizeof(struct nlmsgerr);
+
+ /* no TLVs, nothing to do here */
+ if (!(m->hdr->nlmsg_flags & NLM_F_ACK_TLVS))
+ return 0;
+
+ /* if NLM_F_CAPPED is set then the inner err msg was capped */
+ if (!(m->hdr->nlmsg_flags & NLM_F_CAPPED))
+ hlen += err->msg.nlmsg_len - sizeof(struct nlmsghdr);
+
+ if (m->hdr->nlmsg_len <= NLMSG_SPACE(hlen))
+ return 0;
+
+ return netlink_container_parse(m,
+ &m->containers[m->n_containers],
+ (struct rtattr*)((uint8_t*) NLMSG_DATA(m->hdr) + hlen),
+ NLMSG_PAYLOAD(m->hdr, hlen));
+}
+
+int sd_netlink_message_rewind(sd_netlink_message *m, sd_netlink *genl) {
+ const NLType *nl_type;
+ uint16_t type;
+ size_t size;
+ unsigned i;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(genl || m->protocol != NETLINK_GENERIC, -EINVAL);
+
+ /* don't allow appending to message once parsed */
+ if (!m->sealed)
+ rtnl_message_seal(m);
+
+ for (i = 1; i <= m->n_containers; i++)
+ m->containers[i].attributes = mfree(m->containers[i].attributes);
+
+ m->n_containers = 0;
+
+ if (m->containers[0].attributes)
+ /* top-level attributes have already been parsed */
+ return 0;
+
+ assert(m->hdr);
+
+ r = type_system_root_get_type(genl, &nl_type, m->hdr->nlmsg_type);
+ if (r < 0)
+ return r;
+
+ type = type_get_type(nl_type);
+ size = type_get_size(nl_type);
+
+ if (type == NETLINK_TYPE_NESTED) {
+ const NLTypeSystem *type_system;
+
+ type_get_type_system(nl_type, &type_system);
+
+ m->containers[0].type_system = type_system;
+
+ if (sd_netlink_message_is_error(m))
+ r = netlink_message_parse_error(m);
+ else
+ r = netlink_container_parse(m,
+ &m->containers[m->n_containers],
+ (struct rtattr*)((uint8_t*) NLMSG_DATA(m->hdr) + NLMSG_ALIGN(size)),
+ NLMSG_PAYLOAD(m->hdr, size));
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+void rtnl_message_seal(sd_netlink_message *m) {
+ assert(m);
+ assert(!m->sealed);
+
+ m->sealed = true;
+}
+
+sd_netlink_message *sd_netlink_message_next(sd_netlink_message *m) {
+ assert_return(m, NULL);
+
+ return m->next;
+}
diff --git a/src/libsystemd/sd-netlink/netlink-slot.c b/src/libsystemd/sd-netlink/netlink-slot.c
new file mode 100644
index 0000000..fa951bf
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-slot.c
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "netlink-internal.h"
+#include "netlink-slot.h"
+#include "string-util.h"
+
+int netlink_slot_allocate(
+ sd_netlink *nl,
+ bool floating,
+ NetlinkSlotType type,
+ size_t extra,
+ void *userdata,
+ const char *description,
+ sd_netlink_slot **ret) {
+
+ _cleanup_free_ sd_netlink_slot *slot = NULL;
+
+ assert(nl);
+ assert(ret);
+
+ slot = malloc0(offsetof(sd_netlink_slot, reply_callback) + extra);
+ if (!slot)
+ return -ENOMEM;
+
+ slot->n_ref = 1;
+ slot->netlink = nl;
+ slot->userdata = userdata;
+ slot->type = type;
+ slot->floating = floating;
+
+ if (description) {
+ slot->description = strdup(description);
+ if (!slot->description)
+ return -ENOMEM;
+ }
+
+ if (!floating)
+ sd_netlink_ref(nl);
+
+ LIST_PREPEND(slots, nl->slots, slot);
+
+ *ret = TAKE_PTR(slot);
+
+ return 0;
+}
+
+void netlink_slot_disconnect(sd_netlink_slot *slot, bool unref) {
+ sd_netlink *nl;
+
+ assert(slot);
+
+ nl = slot->netlink;
+ if (!nl)
+ return;
+
+ switch (slot->type) {
+
+ case NETLINK_REPLY_CALLBACK:
+ (void) hashmap_remove(nl->reply_callbacks, &slot->reply_callback.serial);
+
+ if (slot->reply_callback.timeout != 0)
+ prioq_remove(nl->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx);
+
+ break;
+ case NETLINK_MATCH_CALLBACK:
+ LIST_REMOVE(match_callbacks, nl->match_callbacks, &slot->match_callback);
+
+ switch (slot->match_callback.type) {
+ case RTM_NEWLINK:
+ case RTM_DELLINK:
+ (void) socket_broadcast_group_unref(nl, RTNLGRP_LINK);
+
+ break;
+ case RTM_NEWADDR:
+ case RTM_DELADDR:
+ (void) socket_broadcast_group_unref(nl, RTNLGRP_IPV4_IFADDR);
+ (void) socket_broadcast_group_unref(nl, RTNLGRP_IPV6_IFADDR);
+
+ break;
+ case RTM_NEWROUTE:
+ case RTM_DELROUTE:
+ (void) socket_broadcast_group_unref(nl, RTNLGRP_IPV4_ROUTE);
+ (void) socket_broadcast_group_unref(nl, RTNLGRP_IPV6_ROUTE);
+
+ break;
+ }
+
+ break;
+ default:
+ assert_not_reached("Wut? Unknown slot type?");
+ }
+
+ slot->type = _NETLINK_SLOT_INVALID;
+ slot->netlink = NULL;
+ LIST_REMOVE(slots, nl->slots, slot);
+
+ if (!slot->floating)
+ sd_netlink_unref(nl);
+ else if (unref)
+ sd_netlink_slot_unref(slot);
+}
+
+static sd_netlink_slot* netlink_slot_free(sd_netlink_slot *slot) {
+ assert(slot);
+
+ netlink_slot_disconnect(slot, false);
+
+ if (slot->destroy_callback)
+ slot->destroy_callback(slot->userdata);
+
+ free(slot->description);
+ return mfree(slot);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_netlink_slot, sd_netlink_slot, netlink_slot_free);
+
+sd_netlink *sd_netlink_slot_get_netlink(sd_netlink_slot *slot) {
+ assert_return(slot, NULL);
+
+ return slot->netlink;
+}
+
+void *sd_netlink_slot_get_userdata(sd_netlink_slot *slot) {
+ assert_return(slot, NULL);
+
+ return slot->userdata;
+}
+
+void *sd_netlink_slot_set_userdata(sd_netlink_slot *slot, void *userdata) {
+ void *ret;
+
+ assert_return(slot, NULL);
+
+ ret = slot->userdata;
+ slot->userdata = userdata;
+
+ return ret;
+}
+
+int sd_netlink_slot_get_destroy_callback(const sd_netlink_slot *slot, sd_netlink_destroy_t *callback) {
+ assert_return(slot, -EINVAL);
+
+ if (callback)
+ *callback = slot->destroy_callback;
+
+ return !!slot->destroy_callback;
+}
+
+int sd_netlink_slot_set_destroy_callback(sd_netlink_slot *slot, sd_netlink_destroy_t callback) {
+ assert_return(slot, -EINVAL);
+
+ slot->destroy_callback = callback;
+ return 0;
+}
+
+int sd_netlink_slot_get_floating(const sd_netlink_slot *slot) {
+ assert_return(slot, -EINVAL);
+
+ return slot->floating;
+}
+
+int sd_netlink_slot_set_floating(sd_netlink_slot *slot, int b) {
+ assert_return(slot, -EINVAL);
+
+ if (slot->floating == !!b)
+ return 0;
+
+ if (!slot->netlink) /* Already disconnected */
+ return -ESTALE;
+
+ slot->floating = b;
+
+ if (b) {
+ sd_netlink_slot_ref(slot);
+ sd_netlink_unref(slot->netlink);
+ } else {
+ sd_netlink_ref(slot->netlink);
+ sd_netlink_slot_unref(slot);
+ }
+
+ return 1;
+}
+
+int sd_netlink_slot_get_description(const sd_netlink_slot *slot, const char **description) {
+ assert_return(slot, -EINVAL);
+
+ if (description)
+ *description = slot->description;
+
+ return !!slot->description;
+}
+
+int sd_netlink_slot_set_description(sd_netlink_slot *slot, const char *description) {
+ assert_return(slot, -EINVAL);
+
+ return free_and_strdup(&slot->description, description);
+}
diff --git a/src/libsystemd/sd-netlink/netlink-slot.h b/src/libsystemd/sd-netlink/netlink-slot.h
new file mode 100644
index 0000000..79de817
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-slot.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-netlink.h"
+
+int netlink_slot_allocate(
+ sd_netlink *nl,
+ bool floating,
+ NetlinkSlotType type,
+ size_t extra,
+ void *userdata,
+ const char *description,
+ sd_netlink_slot **ret);
+void netlink_slot_disconnect(sd_netlink_slot *slot, bool unref);
diff --git a/src/libsystemd/sd-netlink/netlink-socket.c b/src/libsystemd/sd-netlink/netlink-socket.c
new file mode 100644
index 0000000..228e38d
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-socket.c
@@ -0,0 +1,444 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <unistd.h>
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "netlink-internal.h"
+#include "netlink-types.h"
+#include "netlink-util.h"
+#include "socket-util.h"
+#include "util.h"
+
+int socket_open(int family) {
+ int fd;
+
+ fd = socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, family);
+ if (fd < 0)
+ return -errno;
+
+ return fd_move_above_stdio(fd);
+}
+
+static int broadcast_groups_get(sd_netlink *nl) {
+ _cleanup_free_ uint32_t *groups = NULL;
+ socklen_t len = 0, old_len;
+ unsigned i, j;
+ int r;
+
+ assert(nl);
+ assert(nl->fd >= 0);
+
+ r = getsockopt(nl->fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, NULL, &len);
+ if (r < 0) {
+ if (errno == ENOPROTOOPT) {
+ nl->broadcast_group_dont_leave = true;
+ return 0;
+ } else
+ return -errno;
+ }
+
+ if (len == 0)
+ return 0;
+
+ groups = new0(uint32_t, len);
+ if (!groups)
+ return -ENOMEM;
+
+ old_len = len;
+
+ r = getsockopt(nl->fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, groups, &len);
+ if (r < 0)
+ return -errno;
+
+ if (old_len != len)
+ return -EIO;
+
+ r = hashmap_ensure_allocated(&nl->broadcast_group_refs, NULL);
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < len; i++) {
+ for (j = 0; j < sizeof(uint32_t) * 8; j++) {
+ uint32_t offset;
+ unsigned group;
+
+ offset = 1U << j;
+
+ if (!(groups[i] & offset))
+ continue;
+
+ group = i * sizeof(uint32_t) * 8 + j + 1;
+
+ r = hashmap_put(nl->broadcast_group_refs, UINT_TO_PTR(group), UINT_TO_PTR(1));
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+int socket_bind(sd_netlink *nl) {
+ socklen_t addrlen;
+ int r;
+
+ r = setsockopt_int(nl->fd, SOL_NETLINK, NETLINK_PKTINFO, true);
+ if (r < 0)
+ return r;
+
+ addrlen = sizeof(nl->sockaddr);
+
+ r = bind(nl->fd, &nl->sockaddr.sa, addrlen);
+ /* ignore EINVAL to allow opening an already bound socket */
+ if (r < 0 && errno != EINVAL)
+ return -errno;
+
+ r = getsockname(nl->fd, &nl->sockaddr.sa, &addrlen);
+ if (r < 0)
+ return -errno;
+
+ r = broadcast_groups_get(nl);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static unsigned broadcast_group_get_ref(sd_netlink *nl, unsigned group) {
+ assert(nl);
+
+ return PTR_TO_UINT(hashmap_get(nl->broadcast_group_refs, UINT_TO_PTR(group)));
+}
+
+static int broadcast_group_set_ref(sd_netlink *nl, unsigned group, unsigned n_ref) {
+ int r;
+
+ assert(nl);
+
+ r = hashmap_replace(nl->broadcast_group_refs, UINT_TO_PTR(group), UINT_TO_PTR(n_ref));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int broadcast_group_join(sd_netlink *nl, unsigned group) {
+ int r;
+
+ assert(nl);
+ assert(nl->fd >= 0);
+ assert(group > 0);
+
+ r = setsockopt(nl->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &group, sizeof(group));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int socket_broadcast_group_ref(sd_netlink *nl, unsigned group) {
+ unsigned n_ref;
+ int r;
+
+ assert(nl);
+
+ n_ref = broadcast_group_get_ref(nl, group);
+
+ n_ref++;
+
+ r = hashmap_ensure_allocated(&nl->broadcast_group_refs, NULL);
+ if (r < 0)
+ return r;
+
+ r = broadcast_group_set_ref(nl, group, n_ref);
+ if (r < 0)
+ return r;
+
+ if (n_ref > 1)
+ /* not yet in the group */
+ return 0;
+
+ r = broadcast_group_join(nl, group);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int broadcast_group_leave(sd_netlink *nl, unsigned group) {
+ int r;
+
+ assert(nl);
+ assert(nl->fd >= 0);
+ assert(group > 0);
+
+ if (nl->broadcast_group_dont_leave)
+ return 0;
+
+ r = setsockopt(nl->fd, SOL_NETLINK, NETLINK_DROP_MEMBERSHIP, &group, sizeof(group));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int socket_broadcast_group_unref(sd_netlink *nl, unsigned group) {
+ unsigned n_ref;
+ int r;
+
+ assert(nl);
+
+ n_ref = broadcast_group_get_ref(nl, group);
+
+ assert(n_ref > 0);
+
+ n_ref--;
+
+ r = broadcast_group_set_ref(nl, group, n_ref);
+ if (r < 0)
+ return r;
+
+ if (n_ref > 0)
+ /* still refs left */
+ return 0;
+
+ r = broadcast_group_leave(nl, group);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+/* returns the number of bytes sent, or a negative error code */
+int socket_write_message(sd_netlink *nl, sd_netlink_message *m) {
+ union {
+ struct sockaddr sa;
+ struct sockaddr_nl nl;
+ } addr = {
+ .nl.nl_family = AF_NETLINK,
+ };
+ ssize_t k;
+
+ assert(nl);
+ assert(m);
+ assert(m->hdr);
+
+ k = sendto(nl->fd, m->hdr, m->hdr->nlmsg_len,
+ 0, &addr.sa, sizeof(addr));
+ if (k < 0)
+ return -errno;
+
+ return k;
+}
+
+static int socket_recv_message(int fd, struct iovec *iov, uint32_t *ret_mcast_group, bool peek) {
+ union sockaddr_union sender;
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct nl_pktinfo))) control;
+ struct msghdr msg = {
+ .msg_iov = iov,
+ .msg_iovlen = 1,
+ .msg_name = &sender,
+ .msg_namelen = sizeof(sender),
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ ssize_t n;
+
+ assert(fd >= 0);
+ assert(iov);
+
+ n = recvmsg_safe(fd, &msg, MSG_TRUNC | (peek ? MSG_PEEK : 0));
+ if (n == -ENOBUFS)
+ return log_debug_errno(n, "rtnl: kernel receive buffer overrun");
+ if (IN_SET(n, -EAGAIN, -EINTR))
+ return 0;
+ if (n < 0)
+ return (int) n;
+
+ if (sender.nl.nl_pid != 0) {
+ /* not from the kernel, ignore */
+ log_debug("rtnl: ignoring message from portid %"PRIu32, sender.nl.nl_pid);
+
+ if (peek) {
+ /* drop the message */
+ n = recvmsg_safe(fd, &msg, 0);
+ if (n < 0)
+ return (int) n;
+ }
+
+ return 0;
+ }
+
+ if (ret_mcast_group) {
+ struct nl_pktinfo *pi;
+
+ pi = CMSG_FIND_DATA(&msg, SOL_NETLINK, NETLINK_PKTINFO, struct nl_pktinfo);
+ if (pi)
+ *ret_mcast_group = pi->group;
+ else
+ *ret_mcast_group = 0;
+ }
+
+ return (int) n;
+}
+
+/* On success, the number of bytes received is returned and *ret points to the received message
+ * which has a valid header and the correct size.
+ * If nothing useful was received 0 is returned.
+ * On failure, a negative error code is returned.
+ */
+int socket_read_message(sd_netlink *rtnl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *first = NULL;
+ struct iovec iov = {};
+ uint32_t group = 0;
+ bool multi_part = false, done = false;
+ struct nlmsghdr *new_msg;
+ size_t len;
+ int r;
+ unsigned i = 0;
+
+ assert(rtnl);
+ assert(rtnl->rbuffer);
+ assert(rtnl->rbuffer_allocated >= sizeof(struct nlmsghdr));
+
+ /* read nothing, just get the pending message size */
+ r = socket_recv_message(rtnl->fd, &iov, NULL, true);
+ if (r <= 0)
+ return r;
+ else
+ len = (size_t) r;
+
+ /* make room for the pending message */
+ if (!greedy_realloc((void **)&rtnl->rbuffer,
+ &rtnl->rbuffer_allocated,
+ len, sizeof(uint8_t)))
+ return -ENOMEM;
+
+ iov = IOVEC_MAKE(rtnl->rbuffer, rtnl->rbuffer_allocated);
+
+ /* read the pending message */
+ r = socket_recv_message(rtnl->fd, &iov, &group, false);
+ if (r <= 0)
+ return r;
+ else
+ len = (size_t) r;
+
+ if (len > rtnl->rbuffer_allocated)
+ /* message did not fit in read buffer */
+ return -EIO;
+
+ if (NLMSG_OK(rtnl->rbuffer, len) && rtnl->rbuffer->nlmsg_flags & NLM_F_MULTI) {
+ multi_part = true;
+
+ for (i = 0; i < rtnl->rqueue_partial_size; i++) {
+ if (rtnl_message_get_serial(rtnl->rqueue_partial[i]) ==
+ rtnl->rbuffer->nlmsg_seq) {
+ first = rtnl->rqueue_partial[i];
+ break;
+ }
+ }
+ }
+
+ for (new_msg = rtnl->rbuffer; NLMSG_OK(new_msg, len) && !done; new_msg = NLMSG_NEXT(new_msg, len)) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ const NLType *nl_type;
+
+ if (!group && new_msg->nlmsg_pid != rtnl->sockaddr.nl.nl_pid)
+ /* not broadcast and not for us */
+ continue;
+
+ if (new_msg->nlmsg_type == NLMSG_NOOP)
+ /* silently drop noop messages */
+ continue;
+
+ if (new_msg->nlmsg_type == NLMSG_DONE) {
+ /* finished reading multi-part message */
+ done = true;
+
+ /* if first is not defined, put NLMSG_DONE into the receive queue. */
+ if (first)
+ continue;
+ }
+
+ /* check that we support this message type */
+ r = type_system_root_get_type(rtnl, &nl_type, new_msg->nlmsg_type);
+ if (r < 0) {
+ if (r == -EOPNOTSUPP)
+ log_debug("sd-netlink: ignored message with unknown type: %i",
+ new_msg->nlmsg_type);
+
+ continue;
+ }
+
+ /* check that the size matches the message type */
+ if (new_msg->nlmsg_len < NLMSG_LENGTH(type_get_size(nl_type))) {
+ log_debug("sd-netlink: message is shorter than expected, dropping");
+ continue;
+ }
+
+ r = message_new_empty(rtnl, &m);
+ if (r < 0)
+ return r;
+
+ m->broadcast = !!group;
+
+ m->hdr = memdup(new_msg, new_msg->nlmsg_len);
+ if (!m->hdr)
+ return -ENOMEM;
+
+ /* seal and parse the top-level message */
+ r = sd_netlink_message_rewind(m, rtnl);
+ if (r < 0)
+ return r;
+
+ /* push the message onto the multi-part message stack */
+ if (first)
+ m->next = first;
+ first = TAKE_PTR(m);
+ }
+
+ if (len > 0)
+ log_debug("sd-netlink: discarding %zu bytes of incoming message", len);
+
+ if (!first)
+ return 0;
+
+ if (!multi_part || done) {
+ /* we got a complete message, push it on the read queue */
+ r = rtnl_rqueue_make_room(rtnl);
+ if (r < 0)
+ return r;
+
+ rtnl->rqueue[rtnl->rqueue_size++] = TAKE_PTR(first);
+
+ if (multi_part && (i < rtnl->rqueue_partial_size)) {
+ /* remove the message form the partial read queue */
+ memmove(rtnl->rqueue_partial + i,rtnl->rqueue_partial + i + 1,
+ sizeof(sd_netlink_message*) * (rtnl->rqueue_partial_size - i - 1));
+ rtnl->rqueue_partial_size--;
+ }
+
+ return 1;
+ } else {
+ /* we only got a partial multi-part message, push it on the
+ partial read queue */
+ if (i < rtnl->rqueue_partial_size)
+ rtnl->rqueue_partial[i] = TAKE_PTR(first);
+ else {
+ r = rtnl_rqueue_partial_make_room(rtnl);
+ if (r < 0)
+ return r;
+
+ rtnl->rqueue_partial[rtnl->rqueue_partial_size++] = TAKE_PTR(first);
+ }
+
+ return 0;
+ }
+}
diff --git a/src/libsystemd/sd-netlink/netlink-types.c b/src/libsystemd/sd-netlink/netlink-types.c
new file mode 100644
index 0000000..6fb6c14
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-types.c
@@ -0,0 +1,1488 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <stdint.h>
+#include <sys/socket.h>
+#include <linux/can/vxcan.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/genetlink.h>
+#include <linux/ip.h>
+#include <linux/if.h>
+#include <linux/can/netlink.h>
+#include <linux/fib_rules.h>
+#include <linux/fou.h>
+#include <linux/if_addr.h>
+#include <linux/if_addrlabel.h>
+#include <linux/if_bridge.h>
+#include <linux/if_link.h>
+#include <linux/if_macsec.h>
+#include <linux/if_tunnel.h>
+#include <linux/l2tp.h>
+#include <linux/nexthop.h>
+#include <linux/nl80211.h>
+#include <linux/pkt_sched.h>
+#include <linux/veth.h>
+#include <linux/wireguard.h>
+
+#include "sd-netlink.h"
+
+#include "generic-netlink.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "netlink-internal.h"
+#include "netlink-types.h"
+#include "string-table.h"
+#include "util.h"
+
+/* Maximum ARP IP target defined in kernel */
+#define BOND_MAX_ARP_TARGETS 16
+
+typedef enum {
+ BOND_ARP_TARGETS_0,
+ BOND_ARP_TARGETS_1,
+ BOND_ARP_TARGETS_2,
+ BOND_ARP_TARGETS_3,
+ BOND_ARP_TARGETS_4,
+ BOND_ARP_TARGETS_5,
+ BOND_ARP_TARGETS_6,
+ BOND_ARP_TARGETS_7,
+ BOND_ARP_TARGETS_8,
+ BOND_ARP_TARGETS_9,
+ BOND_ARP_TARGETS_10,
+ BOND_ARP_TARGETS_11,
+ BOND_ARP_TARGETS_12,
+ BOND_ARP_TARGETS_13,
+ BOND_ARP_TARGETS_14,
+ BOND_ARP_TARGETS_MAX = BOND_MAX_ARP_TARGETS,
+} BondArpTargets;
+
+struct NLType {
+ uint16_t type;
+ size_t size;
+ const NLTypeSystem *type_system;
+ const NLTypeSystemUnion *type_system_union;
+};
+
+struct NLTypeSystem {
+ uint16_t count;
+ const NLType *types;
+};
+
+static const NLTypeSystem rtnl_link_type_system;
+
+static const NLType empty_types[1] = {
+ /* fake array to avoid .types==NULL, which denotes invalid type-systems */
+};
+
+static const NLTypeSystem empty_type_system = {
+ .count = 0,
+ .types = empty_types,
+};
+
+static const NLType rtnl_link_info_data_veth_types[] = {
+ [VETH_INFO_PEER] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_link_type_system, .size = sizeof(struct ifinfomsg) },
+};
+
+static const NLType rtnl_link_info_data_vxcan_types[] = {
+ [VXCAN_INFO_PEER] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_link_type_system, .size = sizeof(struct ifinfomsg) },
+};
+
+static const NLType rtnl_link_info_data_ipvlan_types[] = {
+ [IFLA_IPVLAN_MODE] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_IPVLAN_FLAGS] = { .type = NETLINK_TYPE_U16 },
+};
+
+static const NLType rtnl_macvlan_macaddr_types[] = {
+ [IFLA_MACVLAN_MACADDR] = { .type = NETLINK_TYPE_ETHER_ADDR },
+};
+
+static const NLTypeSystem rtnl_macvlan_macaddr_type_system = {
+ .count = ELEMENTSOF(rtnl_macvlan_macaddr_types),
+ .types = rtnl_macvlan_macaddr_types,
+};
+
+static const NLType rtnl_link_info_data_macvlan_types[] = {
+ [IFLA_MACVLAN_MODE] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_MACVLAN_FLAGS] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_MACVLAN_MACADDR_MODE] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_MACVLAN_MACADDR_DATA] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_macvlan_macaddr_type_system },
+};
+
+static const NLType rtnl_link_info_data_bridge_types[] = {
+ [IFLA_BR_FORWARD_DELAY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BR_HELLO_TIME] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BR_MAX_AGE] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BR_AGEING_TIME] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BR_STP_STATE] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BR_PRIORITY] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_VLAN_FILTERING] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_VLAN_PROTOCOL] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_GROUP_FWD_MASK] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_ROOT_PORT] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_ROOT_PATH_COST] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BR_TOPOLOGY_CHANGE] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_TOPOLOGY_CHANGE_DETECTED] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_HELLO_TIMER] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_TCN_TIMER] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_TOPOLOGY_CHANGE_TIMER] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_GC_TIMER] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BR_GROUP_ADDR] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_FDB_FLUSH] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_MCAST_ROUTER] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_MCAST_SNOOPING] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_MCAST_QUERY_USE_IFADDR] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_MCAST_QUERIER] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_MCAST_HASH_ELASTICITY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BR_MCAST_HASH_MAX] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_MCAST_LAST_MEMBER_CNT] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BR_MCAST_STARTUP_QUERY_CNT] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_MCAST_LAST_MEMBER_INTVL] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BR_MCAST_MEMBERSHIP_INTVL] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BR_MCAST_QUERIER_INTVL] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BR_MCAST_QUERY_INTVL] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BR_MCAST_QUERY_RESPONSE_INTVL] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BR_MCAST_STARTUP_QUERY_INTVL] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BR_NF_CALL_IPTABLES] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_NF_CALL_IP6TABLES] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_NF_CALL_ARPTABLES] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BR_MCAST_IGMP_VERSION] = { .type = NETLINK_TYPE_U8 },
+};
+
+static const NLType rtnl_link_info_data_vlan_types[] = {
+ [IFLA_VLAN_ID] = { .type = NETLINK_TYPE_U16 },
+/*
+ [IFLA_VLAN_FLAGS] = { .len = sizeof(struct ifla_vlan_flags) },
+ [IFLA_VLAN_EGRESS_QOS] = { .type = NETLINK_TYPE_NESTED },
+ [IFLA_VLAN_INGRESS_QOS] = { .type = NETLINK_TYPE_NESTED },
+*/
+ [IFLA_VLAN_PROTOCOL] = { .type = NETLINK_TYPE_U16 },
+};
+
+static const NLType rtnl_link_info_data_vxlan_types[] = {
+ [IFLA_VXLAN_ID] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_VXLAN_GROUP] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_VXLAN_LINK] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_VXLAN_LOCAL] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_VXLAN_TTL] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_TOS] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_LEARNING] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_AGEING] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_VXLAN_LIMIT] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_VXLAN_PORT_RANGE] = { .type = NETLINK_TYPE_U32},
+ [IFLA_VXLAN_PROXY] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_RSC] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_L2MISS] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_L3MISS] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_PORT] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_VXLAN_GROUP6] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_VXLAN_LOCAL6] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_VXLAN_UDP_CSUM] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_UDP_ZERO_CSUM6_TX] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_REMCSUM_TX] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_REMCSUM_RX] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_GBP] = { .type = NETLINK_TYPE_FLAG },
+ [IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NETLINK_TYPE_FLAG },
+ [IFLA_VXLAN_COLLECT_METADATA] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_VXLAN_LABEL] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_VXLAN_GPE] = { .type = NETLINK_TYPE_FLAG },
+ [IFLA_VXLAN_TTL_INHERIT] = { .type = NETLINK_TYPE_FLAG },
+ [IFLA_VXLAN_DF] = { .type = NETLINK_TYPE_U8 },
+};
+
+static const NLType rtnl_bond_arp_target_types[] = {
+ [BOND_ARP_TARGETS_0] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_1] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_2] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_3] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_4] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_5] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_6] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_7] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_8] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_9] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_10] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_11] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_12] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_13] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_14] = { .type = NETLINK_TYPE_U32 },
+ [BOND_ARP_TARGETS_MAX] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLTypeSystem rtnl_bond_arp_type_system = {
+ .count = ELEMENTSOF(rtnl_bond_arp_target_types),
+ .types = rtnl_bond_arp_target_types,
+};
+
+static const NLType rtnl_link_info_data_bond_types[] = {
+ [IFLA_BOND_MODE] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BOND_ACTIVE_SLAVE] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_MIIMON] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_UPDELAY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_DOWNDELAY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_USE_CARRIER] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BOND_ARP_INTERVAL] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_ARP_IP_TARGET] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_bond_arp_type_system },
+ [IFLA_BOND_ARP_VALIDATE] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_ARP_ALL_TARGETS] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_PRIMARY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_PRIMARY_RESELECT] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BOND_FAIL_OVER_MAC] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BOND_XMIT_HASH_POLICY] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BOND_RESEND_IGMP] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_NUM_PEER_NOTIF] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BOND_ALL_SLAVES_ACTIVE] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BOND_MIN_LINKS] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_LP_INTERVAL] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_PACKETS_PER_SLAVE] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BOND_AD_LACP_RATE] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BOND_AD_SELECT] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BOND_AD_INFO] = { .type = NETLINK_TYPE_NESTED },
+ [IFLA_BOND_AD_ACTOR_SYS_PRIO] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BOND_AD_USER_PORT_KEY] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BOND_AD_ACTOR_SYSTEM] = { .type = NETLINK_TYPE_ETHER_ADDR },
+ [IFLA_BOND_TLB_DYNAMIC_LB] = { .type = NETLINK_TYPE_U8 },
+};
+
+static const NLType rtnl_link_info_data_iptun_types[] = {
+ [IFLA_IPTUN_LINK] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_IPTUN_LOCAL] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_IPTUN_REMOTE] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_IPTUN_TTL] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_IPTUN_TOS] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_IPTUN_PMTUDISC] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_IPTUN_FLAGS] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_IPTUN_PROTO] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_IPTUN_6RD_PREFIX] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_IPTUN_6RD_PREFIXLEN] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_IPTUN_ENCAP_TYPE] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_IPTUN_ENCAP_SPORT] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_IPTUN_ENCAP_DPORT] = { .type = NETLINK_TYPE_U16 },
+};
+
+static const NLType rtnl_link_info_data_ipgre_types[] = {
+ [IFLA_GRE_LINK] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_GRE_IFLAGS] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_GRE_OFLAGS] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_GRE_IKEY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_GRE_OKEY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_GRE_LOCAL] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_GRE_REMOTE] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_GRE_TTL] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_GRE_TOS] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_GRE_PMTUDISC] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_GRE_FLOWINFO] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_GRE_FLAGS] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_GRE_ENCAP_TYPE] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_GRE_ENCAP_FLAGS] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_GRE_ENCAP_SPORT] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_GRE_ENCAP_DPORT] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_GRE_ERSPAN_INDEX] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLType rtnl_link_info_data_ipvti_types[] = {
+ [IFLA_VTI_LINK] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_VTI_IKEY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_VTI_OKEY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_VTI_LOCAL] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_VTI_REMOTE] = { .type = NETLINK_TYPE_IN_ADDR },
+};
+
+static const NLType rtnl_link_info_data_ip6tnl_types[] = {
+ [IFLA_IPTUN_LINK] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_IPTUN_LOCAL] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_IPTUN_REMOTE] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_IPTUN_TTL] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_IPTUN_FLAGS] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_IPTUN_PROTO] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_IPTUN_ENCAP_LIMIT] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_IPTUN_FLOWINFO] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLType rtnl_link_info_data_vrf_types[] = {
+ [IFLA_VRF_TABLE] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLType rtnl_link_info_data_geneve_types[] = {
+ [IFLA_GENEVE_ID] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_GENEVE_TTL] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_GENEVE_TOS] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_GENEVE_PORT] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_GENEVE_REMOTE] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_GENEVE_REMOTE6] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_GENEVE_UDP_CSUM] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_GENEVE_LABEL] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_GENEVE_TTL_INHERIT] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_GENEVE_DF] = { .type = NETLINK_TYPE_U8 },
+};
+
+static const NLType rtnl_link_info_data_can_types[] = {
+ [IFLA_CAN_BITTIMING] = { .size = sizeof(struct can_bittiming) },
+ [IFLA_CAN_RESTART_MS] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_CAN_CTRLMODE] = { .size = sizeof(struct can_ctrlmode) },
+ [IFLA_CAN_TERMINATION] = { .type = NETLINK_TYPE_U16 },
+};
+
+static const NLType rtnl_link_info_data_macsec_types[] = {
+ [IFLA_MACSEC_SCI] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_MACSEC_PORT] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_MACSEC_ICV_LEN] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_MACSEC_CIPHER_SUITE] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_MACSEC_WINDOW] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_MACSEC_ENCODING_SA] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_MACSEC_ENCRYPT] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_MACSEC_PROTECT] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_MACSEC_INC_SCI] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_MACSEC_ES] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_MACSEC_SCB] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_MACSEC_REPLAY_PROTECT] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_MACSEC_VALIDATION] = { .type = NETLINK_TYPE_U8 },
+};
+
+static const NLType rtnl_link_info_data_xfrm_types[] = {
+ [IFLA_XFRM_LINK] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_XFRM_IF_ID] = { .type = NETLINK_TYPE_U32 }
+};
+
+static const NLType rtnl_link_info_data_bareudp_types[] = {
+ [IFLA_BAREUDP_PORT] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BAREUDP_ETHERTYPE] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BAREUDP_SRCPORT_MIN] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BAREUDP_MULTIPROTO_MODE] = { .type = NETLINK_TYPE_FLAG },
+};
+/* these strings must match the .kind entries in the kernel */
+static const char* const nl_union_link_info_data_table[] = {
+ [NL_UNION_LINK_INFO_DATA_BOND] = "bond",
+ [NL_UNION_LINK_INFO_DATA_BRIDGE] = "bridge",
+ [NL_UNION_LINK_INFO_DATA_VLAN] = "vlan",
+ [NL_UNION_LINK_INFO_DATA_VETH] = "veth",
+ [NL_UNION_LINK_INFO_DATA_DUMMY] = "dummy",
+ [NL_UNION_LINK_INFO_DATA_MACVLAN] = "macvlan",
+ [NL_UNION_LINK_INFO_DATA_MACVTAP] = "macvtap",
+ [NL_UNION_LINK_INFO_DATA_IPVLAN] = "ipvlan",
+ [NL_UNION_LINK_INFO_DATA_IPVTAP] = "ipvtap",
+ [NL_UNION_LINK_INFO_DATA_VXLAN] = "vxlan",
+ [NL_UNION_LINK_INFO_DATA_IPIP_TUNNEL] = "ipip",
+ [NL_UNION_LINK_INFO_DATA_IPGRE_TUNNEL] = "gre",
+ [NL_UNION_LINK_INFO_DATA_ERSPAN] = "erspan",
+ [NL_UNION_LINK_INFO_DATA_IPGRETAP_TUNNEL] = "gretap",
+ [NL_UNION_LINK_INFO_DATA_IP6GRE_TUNNEL] = "ip6gre",
+ [NL_UNION_LINK_INFO_DATA_IP6GRETAP_TUNNEL] = "ip6gretap",
+ [NL_UNION_LINK_INFO_DATA_SIT_TUNNEL] = "sit",
+ [NL_UNION_LINK_INFO_DATA_VTI_TUNNEL] = "vti",
+ [NL_UNION_LINK_INFO_DATA_VTI6_TUNNEL] = "vti6",
+ [NL_UNION_LINK_INFO_DATA_IP6TNL_TUNNEL] = "ip6tnl",
+ [NL_UNION_LINK_INFO_DATA_VRF] = "vrf",
+ [NL_UNION_LINK_INFO_DATA_VCAN] = "vcan",
+ [NL_UNION_LINK_INFO_DATA_GENEVE] = "geneve",
+ [NL_UNION_LINK_INFO_DATA_VXCAN] = "vxcan",
+ [NL_UNION_LINK_INFO_DATA_WIREGUARD] = "wireguard",
+ [NL_UNION_LINK_INFO_DATA_NETDEVSIM] = "netdevsim",
+ [NL_UNION_LINK_INFO_DATA_CAN] = "can",
+ [NL_UNION_LINK_INFO_DATA_MACSEC] = "macsec",
+ [NL_UNION_LINK_INFO_DATA_NLMON] = "nlmon",
+ [NL_UNION_LINK_INFO_DATA_XFRM] = "xfrm",
+ [NL_UNION_LINK_INFO_DATA_IFB] = "ifb",
+ [NL_UNION_LINK_INFO_DATA_BAREUDP] = "bareudp",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(nl_union_link_info_data, NLUnionLinkInfoData);
+
+static const NLTypeSystem rtnl_link_info_data_type_systems[] = {
+ [NL_UNION_LINK_INFO_DATA_BOND] = { .count = ELEMENTSOF(rtnl_link_info_data_bond_types),
+ .types = rtnl_link_info_data_bond_types },
+ [NL_UNION_LINK_INFO_DATA_BRIDGE] = { .count = ELEMENTSOF(rtnl_link_info_data_bridge_types),
+ .types = rtnl_link_info_data_bridge_types },
+ [NL_UNION_LINK_INFO_DATA_VLAN] = { .count = ELEMENTSOF(rtnl_link_info_data_vlan_types),
+ .types = rtnl_link_info_data_vlan_types },
+ [NL_UNION_LINK_INFO_DATA_VETH] = { .count = ELEMENTSOF(rtnl_link_info_data_veth_types),
+ .types = rtnl_link_info_data_veth_types },
+ [NL_UNION_LINK_INFO_DATA_MACVLAN] = { .count = ELEMENTSOF(rtnl_link_info_data_macvlan_types),
+ .types = rtnl_link_info_data_macvlan_types },
+ [NL_UNION_LINK_INFO_DATA_MACVTAP] = { .count = ELEMENTSOF(rtnl_link_info_data_macvlan_types),
+ .types = rtnl_link_info_data_macvlan_types },
+ [NL_UNION_LINK_INFO_DATA_IPVLAN] = { .count = ELEMENTSOF(rtnl_link_info_data_ipvlan_types),
+ .types = rtnl_link_info_data_ipvlan_types },
+ [NL_UNION_LINK_INFO_DATA_IPVTAP] = { .count = ELEMENTSOF(rtnl_link_info_data_ipvlan_types),
+ .types = rtnl_link_info_data_ipvlan_types },
+ [NL_UNION_LINK_INFO_DATA_VXLAN] = { .count = ELEMENTSOF(rtnl_link_info_data_vxlan_types),
+ .types = rtnl_link_info_data_vxlan_types },
+ [NL_UNION_LINK_INFO_DATA_IPIP_TUNNEL] = { .count = ELEMENTSOF(rtnl_link_info_data_iptun_types),
+ .types = rtnl_link_info_data_iptun_types },
+ [NL_UNION_LINK_INFO_DATA_IPGRE_TUNNEL] = { .count = ELEMENTSOF(rtnl_link_info_data_ipgre_types),
+ .types = rtnl_link_info_data_ipgre_types },
+ [NL_UNION_LINK_INFO_DATA_ERSPAN] = { .count = ELEMENTSOF(rtnl_link_info_data_ipgre_types),
+ .types = rtnl_link_info_data_ipgre_types },
+ [NL_UNION_LINK_INFO_DATA_IPGRETAP_TUNNEL] = { .count = ELEMENTSOF(rtnl_link_info_data_ipgre_types),
+ .types = rtnl_link_info_data_ipgre_types },
+ [NL_UNION_LINK_INFO_DATA_IP6GRE_TUNNEL] = { .count = ELEMENTSOF(rtnl_link_info_data_ipgre_types),
+ .types = rtnl_link_info_data_ipgre_types },
+ [NL_UNION_LINK_INFO_DATA_IP6GRETAP_TUNNEL] = { .count = ELEMENTSOF(rtnl_link_info_data_ipgre_types),
+ .types = rtnl_link_info_data_ipgre_types },
+ [NL_UNION_LINK_INFO_DATA_SIT_TUNNEL] = { .count = ELEMENTSOF(rtnl_link_info_data_iptun_types),
+ .types = rtnl_link_info_data_iptun_types },
+ [NL_UNION_LINK_INFO_DATA_VTI_TUNNEL] = { .count = ELEMENTSOF(rtnl_link_info_data_ipvti_types),
+ .types = rtnl_link_info_data_ipvti_types },
+ [NL_UNION_LINK_INFO_DATA_VTI6_TUNNEL] = { .count = ELEMENTSOF(rtnl_link_info_data_ipvti_types),
+ .types = rtnl_link_info_data_ipvti_types },
+ [NL_UNION_LINK_INFO_DATA_IP6TNL_TUNNEL] = { .count = ELEMENTSOF(rtnl_link_info_data_ip6tnl_types),
+ .types = rtnl_link_info_data_ip6tnl_types },
+ [NL_UNION_LINK_INFO_DATA_VRF] = { .count = ELEMENTSOF(rtnl_link_info_data_vrf_types),
+ .types = rtnl_link_info_data_vrf_types },
+ [NL_UNION_LINK_INFO_DATA_GENEVE] = { .count = ELEMENTSOF(rtnl_link_info_data_geneve_types),
+ .types = rtnl_link_info_data_geneve_types },
+ [NL_UNION_LINK_INFO_DATA_VXCAN] = { .count = ELEMENTSOF(rtnl_link_info_data_vxcan_types),
+ .types = rtnl_link_info_data_vxcan_types },
+ [NL_UNION_LINK_INFO_DATA_CAN] = { .count = ELEMENTSOF(rtnl_link_info_data_can_types),
+ .types = rtnl_link_info_data_can_types },
+ [NL_UNION_LINK_INFO_DATA_MACSEC] = { .count = ELEMENTSOF(rtnl_link_info_data_macsec_types),
+ .types = rtnl_link_info_data_macsec_types },
+ [NL_UNION_LINK_INFO_DATA_XFRM] = { .count = ELEMENTSOF(rtnl_link_info_data_xfrm_types),
+ .types = rtnl_link_info_data_xfrm_types },
+ [NL_UNION_LINK_INFO_DATA_BAREUDP] = { .count = ELEMENTSOF(rtnl_link_info_data_bareudp_types),
+ .types = rtnl_link_info_data_bareudp_types },
+};
+
+static const NLTypeSystemUnion rtnl_link_info_data_type_system_union = {
+ .num = _NL_UNION_LINK_INFO_DATA_MAX,
+ .lookup = nl_union_link_info_data_from_string,
+ .type_systems = rtnl_link_info_data_type_systems,
+ .match_type = NL_MATCH_SIBLING,
+ .match = IFLA_INFO_KIND,
+};
+
+static const NLType rtnl_link_info_types[] = {
+ [IFLA_INFO_KIND] = { .type = NETLINK_TYPE_STRING },
+ [IFLA_INFO_DATA] = { .type = NETLINK_TYPE_UNION, .type_system_union = &rtnl_link_info_data_type_system_union },
+/*
+ [IFLA_INFO_XSTATS],
+ [IFLA_INFO_SLAVE_KIND] = { .type = NETLINK_TYPE_STRING },
+ [IFLA_INFO_SLAVE_DATA] = { .type = NETLINK_TYPE_NESTED },
+*/
+};
+
+static const NLTypeSystem rtnl_link_info_type_system = {
+ .count = ELEMENTSOF(rtnl_link_info_types),
+ .types = rtnl_link_info_types,
+};
+
+static const struct NLType rtnl_prot_info_bridge_port_types[] = {
+ [IFLA_BRPORT_STATE] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_COST] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_BRPORT_PRIORITY] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BRPORT_MODE] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_GUARD] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_PROTECT] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_FAST_LEAVE] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_LEARNING] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_PROXYARP] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_LEARNING_SYNC] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_PROXYARP_WIFI] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_ROOT_ID] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_BRIDGE_ID] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_DESIGNATED_PORT] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BRPORT_DESIGNATED_COST] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BRPORT_ID] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BRPORT_NO] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BRPORT_TOPOLOGY_CHANGE_ACK] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_CONFIG_PENDING] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_MESSAGE_AGE_TIMER] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BRPORT_FORWARD_DELAY_TIMER] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BRPORT_HOLD_TIMER] = { .type = NETLINK_TYPE_U64 },
+ [IFLA_BRPORT_FLUSH] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_MULTICAST_ROUTER] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_PAD] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_MCAST_FLOOD] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_VLAN_TUNNEL] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_BCAST_FLOOD] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NETLINK_TYPE_U16 },
+ [IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_ISOLATED] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_BRPORT_BACKUP_PORT] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLTypeSystem rtnl_prot_info_type_systems[] = {
+ [AF_BRIDGE] = { .count = ELEMENTSOF(rtnl_prot_info_bridge_port_types),
+ .types = rtnl_prot_info_bridge_port_types },
+};
+
+static const NLTypeSystemUnion rtnl_prot_info_type_system_union = {
+ .num = AF_MAX,
+ .type_systems = rtnl_prot_info_type_systems,
+ .match_type = NL_MATCH_PROTOCOL,
+};
+
+static const struct NLType rtnl_af_spec_inet6_types[] = {
+ [IFLA_INET6_FLAGS] = { .type = NETLINK_TYPE_U32 },
+/*
+ IFLA_INET6_CONF,
+ IFLA_INET6_STATS,
+ IFLA_INET6_MCAST,
+ IFLA_INET6_CACHEINFO,
+ IFLA_INET6_ICMP6STATS,
+*/
+ [IFLA_INET6_TOKEN] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFLA_INET6_ADDR_GEN_MODE] = { .type = NETLINK_TYPE_U8 },
+};
+
+static const NLTypeSystem rtnl_af_spec_inet6_type_system = {
+ .count = ELEMENTSOF(rtnl_af_spec_inet6_types),
+ .types = rtnl_af_spec_inet6_types,
+};
+
+static const NLType rtnl_af_spec_types[] = {
+ [AF_INET6] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_af_spec_inet6_type_system },
+};
+
+static const NLTypeSystem rtnl_af_spec_type_system = {
+ .count = ELEMENTSOF(rtnl_af_spec_types),
+ .types = rtnl_af_spec_types,
+};
+
+static const NLType rtnl_prop_list_types[] = {
+ [IFLA_ALT_IFNAME] = { .type = NETLINK_TYPE_STRING, .size = ALTIFNAMSIZ - 1 },
+};
+
+static const NLTypeSystem rtnl_prop_list_type_system = {
+ .count = ELEMENTSOF(rtnl_prop_list_types),
+ .types = rtnl_prop_list_types,
+};
+
+static const NLType rtnl_vf_vlan_list_types[] = {
+ [IFLA_VF_VLAN_INFO] = { .size = sizeof(struct ifla_vf_vlan_info) },
+};
+
+static const NLTypeSystem rtnl_vf_vlan_type_system = {
+ .count = ELEMENTSOF(rtnl_vf_vlan_list_types),
+ .types = rtnl_vf_vlan_list_types,
+};
+
+static const NLType rtnl_vf_vlan_info_types[] = {
+ [IFLA_VF_MAC] = { .size = sizeof(struct ifla_vf_mac) },
+ [IFLA_VF_VLAN] = { .size = sizeof(struct ifla_vf_vlan) },
+ [IFLA_VF_VLAN_LIST] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_vf_vlan_type_system},
+ [IFLA_VF_TX_RATE] = { .size = sizeof(struct ifla_vf_tx_rate) },
+ [IFLA_VF_SPOOFCHK] = { .size = sizeof(struct ifla_vf_spoofchk) },
+ [IFLA_VF_RATE] = { .size = sizeof(struct ifla_vf_rate) },
+ [IFLA_VF_LINK_STATE] = { .size = sizeof(struct ifla_vf_link_state) },
+ [IFLA_VF_RSS_QUERY_EN] = { .size = sizeof(struct ifla_vf_rss_query_en) },
+ [IFLA_VF_TRUST] = { .size = sizeof(struct ifla_vf_trust) },
+ [IFLA_VF_IB_NODE_GUID] = { .size = sizeof(struct ifla_vf_guid) },
+ [IFLA_VF_IB_PORT_GUID] = { .size = sizeof(struct ifla_vf_guid) },
+};
+
+static const NLTypeSystem rtnl_vf_vlan_info_type_system = {
+ .count = ELEMENTSOF(rtnl_vf_vlan_info_types),
+ .types = rtnl_vf_vlan_info_types,
+};
+
+static const NLType rtnl_link_io_srv_types[] = {
+ [IFLA_VF_INFO] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_vf_vlan_info_type_system },
+};
+
+static const NLTypeSystem rtnl_io_srv_type_system = {
+ .count = ELEMENTSOF(rtnl_link_io_srv_types),
+ .types = rtnl_link_io_srv_types,
+};
+
+static const NLType rtnl_link_types[] = {
+ [IFLA_ADDRESS] = { .type = NETLINK_TYPE_ETHER_ADDR },
+ [IFLA_BROADCAST] = { .type = NETLINK_TYPE_ETHER_ADDR },
+ [IFLA_IFNAME] = { .type = NETLINK_TYPE_STRING, .size = IFNAMSIZ - 1 },
+ [IFLA_MTU] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_LINK] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_QDISC] = { .type = NETLINK_TYPE_STRING },
+ [IFLA_STATS] = { .size = sizeof(struct rtnl_link_stats) },
+/*
+ [IFLA_COST],
+ [IFLA_PRIORITY],
+*/
+ [IFLA_MASTER] = { .type = NETLINK_TYPE_U32 },
+/*
+ [IFLA_WIRELESS],
+*/
+ [IFLA_PROTINFO] = { .type = NETLINK_TYPE_UNION, .type_system_union = &rtnl_prot_info_type_system_union },
+ [IFLA_TXQLEN] = { .type = NETLINK_TYPE_U32 },
+/*
+ [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) },
+*/
+ [IFLA_WEIGHT] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_OPERSTATE] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_LINKMODE] = { .type = NETLINK_TYPE_U8 },
+ [IFLA_LINKINFO] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_link_info_type_system },
+ [IFLA_NET_NS_PID] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_IFALIAS] = { .type = NETLINK_TYPE_STRING, .size = IFALIASZ - 1 },
+ [IFLA_NUM_VF] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_VFINFO_LIST] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_io_srv_type_system },
+ [IFLA_STATS64] = { .size = sizeof(struct rtnl_link_stats64) },
+/*
+ [IFLA_VF_PORTS] = { .type = NETLINK_TYPE_NESTED },
+ [IFLA_PORT_SELF] = { .type = NETLINK_TYPE_NESTED },
+*/
+ [IFLA_AF_SPEC] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_af_spec_type_system },
+/*
+ [IFLA_VF_PORTS],
+ [IFLA_PORT_SELF],
+ [IFLA_AF_SPEC],
+*/
+ [IFLA_GROUP] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_NET_NS_FD] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_EXT_MASK] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_PROMISCUITY] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_NUM_TX_QUEUES] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_NUM_RX_QUEUES] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_CARRIER] = { .type = NETLINK_TYPE_U8 },
+/*
+ [IFLA_PHYS_PORT_ID] = { .type = NETLINK_TYPE_BINARY, .len = MAX_PHYS_PORT_ID_LEN },
+*/
+ [IFLA_MIN_MTU] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_MAX_MTU] = { .type = NETLINK_TYPE_U32 },
+ [IFLA_PROP_LIST] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_prop_list_type_system },
+ [IFLA_ALT_IFNAME] = { .type = NETLINK_TYPE_STRING, .size = ALTIFNAMSIZ - 1 },
+};
+
+static const NLTypeSystem rtnl_link_type_system = {
+ .count = ELEMENTSOF(rtnl_link_types),
+ .types = rtnl_link_types,
+};
+
+/* IFA_FLAGS was defined in kernel 3.14, but we still support older
+ * kernels where IFA_MAX is lower. */
+static const NLType rtnl_address_types[] = {
+ [IFA_ADDRESS] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFA_LOCAL] = { .type = NETLINK_TYPE_IN_ADDR },
+ [IFA_LABEL] = { .type = NETLINK_TYPE_STRING, .size = IFNAMSIZ - 1 },
+ [IFA_BROADCAST] = { .type = NETLINK_TYPE_IN_ADDR }, /* 6? */
+ [IFA_CACHEINFO] = { .type = NETLINK_TYPE_CACHE_INFO, .size = sizeof(struct ifa_cacheinfo) },
+/*
+ [IFA_ANYCAST],
+ [IFA_MULTICAST],
+*/
+ [IFA_FLAGS] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLTypeSystem rtnl_address_type_system = {
+ .count = ELEMENTSOF(rtnl_address_types),
+ .types = rtnl_address_types,
+};
+
+/* RTM_METRICS --- array of struct rtattr with types of RTAX_* */
+
+static const NLType rtnl_route_metrics_types[] = {
+ [RTAX_MTU] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_WINDOW] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_RTT] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_RTTVAR] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_SSTHRESH] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_CWND] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_ADVMSS] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_REORDERING] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_HOPLIMIT] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_INITCWND] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_FEATURES] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_RTO_MIN] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_INITRWND] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_QUICKACK] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_CC_ALGO] = { .type = NETLINK_TYPE_U32 },
+ [RTAX_FASTOPEN_NO_COOKIE] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLTypeSystem rtnl_route_metrics_type_system = {
+ .count = ELEMENTSOF(rtnl_route_metrics_types),
+ .types = rtnl_route_metrics_types,
+};
+
+static const NLType rtnl_route_types[] = {
+ [RTA_DST] = { .type = NETLINK_TYPE_IN_ADDR }, /* 6? */
+ [RTA_SRC] = { .type = NETLINK_TYPE_IN_ADDR }, /* 6? */
+ [RTA_IIF] = { .type = NETLINK_TYPE_U32 },
+ [RTA_OIF] = { .type = NETLINK_TYPE_U32 },
+ [RTA_GATEWAY] = { .type = NETLINK_TYPE_IN_ADDR },
+ [RTA_PRIORITY] = { .type = NETLINK_TYPE_U32 },
+ [RTA_PREFSRC] = { .type = NETLINK_TYPE_IN_ADDR }, /* 6? */
+ [RTA_METRICS] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_route_metrics_type_system},
+ [RTA_MULTIPATH] = { .size = sizeof(struct rtnexthop) },
+ [RTA_FLOW] = { .type = NETLINK_TYPE_U32 }, /* 6? */
+ [RTA_CACHEINFO] = { .size = sizeof(struct rta_cacheinfo) },
+ [RTA_TABLE] = { .type = NETLINK_TYPE_U32 },
+ [RTA_MARK] = { .type = NETLINK_TYPE_U32 },
+ [RTA_MFC_STATS] = { .type = NETLINK_TYPE_U64 },
+ [RTA_VIA] = { /* See struct rtvia */ },
+ [RTA_NEWDST] = { .type = NETLINK_TYPE_U32 },
+ [RTA_PREF] = { .type = NETLINK_TYPE_U8 },
+ [RTA_EXPIRES] = { .type = NETLINK_TYPE_U32 },
+ [RTA_ENCAP_TYPE] = { .type = NETLINK_TYPE_U16 },
+ [RTA_ENCAP] = { .type = NETLINK_TYPE_NESTED }, /* Multiple type systems i.e. LWTUNNEL_ENCAP_MPLS/LWTUNNEL_ENCAP_IP/LWTUNNEL_ENCAP_ILA etc... */
+ [RTA_UID] = { .type = NETLINK_TYPE_U32 },
+ [RTA_TTL_PROPAGATE] = { .type = NETLINK_TYPE_U8 },
+ [RTA_IP_PROTO] = { .type = NETLINK_TYPE_U8 },
+ [RTA_SPORT] = { .type = NETLINK_TYPE_U16 },
+ [RTA_DPORT] = { .type = NETLINK_TYPE_U16 },
+};
+
+static const NLTypeSystem rtnl_route_type_system = {
+ .count = ELEMENTSOF(rtnl_route_types),
+ .types = rtnl_route_types,
+};
+
+static const NLType rtnl_neigh_types[] = {
+ [NDA_DST] = { .type = NETLINK_TYPE_IN_ADDR },
+ [NDA_LLADDR] = { /* struct ether_addr, struct in_addr, or struct in6_addr */ },
+ [NDA_CACHEINFO] = { .type = NETLINK_TYPE_CACHE_INFO, .size = sizeof(struct nda_cacheinfo) },
+ [NDA_PROBES] = { .type = NETLINK_TYPE_U32 },
+ [NDA_VLAN] = { .type = NETLINK_TYPE_U16 },
+ [NDA_PORT] = { .type = NETLINK_TYPE_U16 },
+ [NDA_VNI] = { .type = NETLINK_TYPE_U32 },
+ [NDA_IFINDEX] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLTypeSystem rtnl_neigh_type_system = {
+ .count = ELEMENTSOF(rtnl_neigh_types),
+ .types = rtnl_neigh_types,
+};
+
+static const NLType rtnl_addrlabel_types[] = {
+ [IFAL_ADDRESS] = { .type = NETLINK_TYPE_IN_ADDR, .size = sizeof(struct in6_addr) },
+ [IFAL_LABEL] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLTypeSystem rtnl_addrlabel_type_system = {
+ .count = ELEMENTSOF(rtnl_addrlabel_types),
+ .types = rtnl_addrlabel_types,
+};
+
+static const NLType rtnl_routing_policy_rule_types[] = {
+ [FRA_DST] = { .type = NETLINK_TYPE_IN_ADDR },
+ [FRA_SRC] = { .type = NETLINK_TYPE_IN_ADDR },
+ [FRA_IIFNAME] = { .type = NETLINK_TYPE_STRING },
+ [FRA_GOTO] = { .type = NETLINK_TYPE_U32 },
+ [FRA_PRIORITY] = { .type = NETLINK_TYPE_U32 },
+ [FRA_FWMARK] = { .type = NETLINK_TYPE_U32 },
+ [FRA_FLOW] = { .type = NETLINK_TYPE_U32 },
+ [FRA_TUN_ID] = { .type = NETLINK_TYPE_U64 },
+ [FRA_SUPPRESS_IFGROUP] = { .type = NETLINK_TYPE_U32 },
+ [FRA_SUPPRESS_PREFIXLEN] = { .type = NETLINK_TYPE_U32 },
+ [FRA_TABLE] = { .type = NETLINK_TYPE_U32 },
+ [FRA_FWMASK] = { .type = NETLINK_TYPE_U32 },
+ [FRA_OIFNAME] = { .type = NETLINK_TYPE_STRING },
+ [FRA_PAD] = { .type = NETLINK_TYPE_U32 },
+ [FRA_L3MDEV] = { .type = NETLINK_TYPE_U8 },
+ [FRA_UID_RANGE] = { .size = sizeof(struct fib_rule_uid_range) },
+ [FRA_PROTOCOL] = { .type = NETLINK_TYPE_U8 },
+ [FRA_IP_PROTO] = { .type = NETLINK_TYPE_U8 },
+ [FRA_SPORT_RANGE] = { .size = sizeof(struct fib_rule_port_range) },
+ [FRA_DPORT_RANGE] = { .size = sizeof(struct fib_rule_port_range) },
+};
+
+static const NLTypeSystem rtnl_routing_policy_rule_type_system = {
+ .count = ELEMENTSOF(rtnl_routing_policy_rule_types),
+ .types = rtnl_routing_policy_rule_types,
+};
+
+static const NLType rtnl_nexthop_types[] = {
+ [NHA_ID] = { .type = NETLINK_TYPE_U32 },
+ [NHA_OIF] = { .type = NETLINK_TYPE_U32 },
+ [NHA_GATEWAY] = { .type = NETLINK_TYPE_IN_ADDR },
+};
+
+static const NLTypeSystem rtnl_nexthop_type_system = {
+ .count = ELEMENTSOF(rtnl_nexthop_types),
+ .types = rtnl_nexthop_types,
+};
+
+static const NLType rtnl_tca_option_data_cake_types[] = {
+ [TCA_CAKE_BASE_RATE64] = { .type = NETLINK_TYPE_U64 },
+ [TCA_CAKE_OVERHEAD] = { .type = NETLINK_TYPE_S32 },
+ [TCA_CAKE_MPU] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLType rtnl_tca_option_data_codel_types[] = {
+ [TCA_CODEL_TARGET] = { .type = NETLINK_TYPE_U32 },
+ [TCA_CODEL_LIMIT] = { .type = NETLINK_TYPE_U32 },
+ [TCA_CODEL_INTERVAL] = { .type = NETLINK_TYPE_U32 },
+ [TCA_CODEL_ECN] = { .type = NETLINK_TYPE_U32 },
+ [TCA_CODEL_CE_THRESHOLD] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLType rtnl_tca_option_data_drr_types[] = {
+ [TCA_DRR_QUANTUM] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLType rtnl_tca_option_data_ets_quanta_types[] = {
+ [TCA_ETS_QUANTA_BAND] = { .type = NETLINK_TYPE_U32, },
+};
+
+static const NLTypeSystem rtnl_tca_option_data_ets_quanta_type_system = {
+ .count = ELEMENTSOF(rtnl_tca_option_data_ets_quanta_types),
+ .types = rtnl_tca_option_data_ets_quanta_types,
+};
+
+static const NLType rtnl_tca_option_data_ets_prio_types[] = {
+ [TCA_ETS_PRIOMAP_BAND] = { .type = NETLINK_TYPE_U8, },
+};
+
+static const NLTypeSystem rtnl_tca_option_data_ets_prio_type_system = {
+ .count = ELEMENTSOF(rtnl_tca_option_data_ets_prio_types),
+ .types = rtnl_tca_option_data_ets_prio_types,
+};
+
+static const NLType rtnl_tca_option_data_ets_types[] = {
+ [TCA_ETS_NBANDS] = { .type = NETLINK_TYPE_U8 },
+ [TCA_ETS_NSTRICT] = { .type = NETLINK_TYPE_U8 },
+ [TCA_ETS_QUANTA] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_tca_option_data_ets_quanta_type_system },
+ [TCA_ETS_PRIOMAP] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_tca_option_data_ets_prio_type_system },
+ [TCA_ETS_QUANTA_BAND] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLType rtnl_tca_option_data_fq_types[] = {
+ [TCA_FQ_PLIMIT] = { .type = NETLINK_TYPE_U32 },
+ [TCA_FQ_FLOW_PLIMIT] = { .type = NETLINK_TYPE_U32 },
+ [TCA_FQ_QUANTUM] = { .type = NETLINK_TYPE_U32 },
+ [TCA_FQ_INITIAL_QUANTUM] = { .type = NETLINK_TYPE_U32 },
+ [TCA_FQ_RATE_ENABLE] = { .type = NETLINK_TYPE_U32 },
+ [TCA_FQ_FLOW_DEFAULT_RATE] = { .type = NETLINK_TYPE_U32 },
+ [TCA_FQ_FLOW_MAX_RATE] = { .type = NETLINK_TYPE_U32 },
+ [TCA_FQ_BUCKETS_LOG] = { .type = NETLINK_TYPE_U32 },
+ [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NETLINK_TYPE_U32 },
+ [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NETLINK_TYPE_U32 },
+ [TCA_FQ_CE_THRESHOLD] = { .type = NETLINK_TYPE_U32 },
+ [TCA_FQ_ORPHAN_MASK] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLType rtnl_tca_option_data_fq_codel_types[] = {
+ [TCA_FQ_CODEL_TARGET] = { .type = NETLINK_TYPE_U32 },
+ [TCA_FQ_CODEL_LIMIT] = { .type = NETLINK_TYPE_U32 },
+ [TCA_FQ_CODEL_INTERVAL] = { .type = NETLINK_TYPE_U32 },
+ [TCA_FQ_CODEL_ECN] = { .type = NETLINK_TYPE_U32 },
+ [TCA_FQ_CODEL_FLOWS] = { .type = NETLINK_TYPE_U32 },
+ [TCA_FQ_CODEL_QUANTUM] = { .type = NETLINK_TYPE_U32 },
+ [TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NETLINK_TYPE_U32 },
+ [TCA_FQ_CODEL_DROP_BATCH_SIZE] = { .type = NETLINK_TYPE_U32 },
+ [TCA_FQ_CODEL_MEMORY_LIMIT] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLType rtnl_tca_option_data_fq_pie_types[] = {
+ [TCA_FQ_PIE_LIMIT] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLType rtnl_tca_option_data_gred_types[] = {
+ [TCA_GRED_DPS] = { .size = sizeof(struct tc_gred_sopt) },
+};
+
+static const NLType rtnl_tca_option_data_hhf_types[] = {
+ [TCA_HHF_BACKLOG_LIMIT] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLType rtnl_tca_option_data_htb_types[] = {
+ [TCA_HTB_PARMS] = { .size = sizeof(struct tc_htb_opt) },
+ [TCA_HTB_INIT] = { .size = sizeof(struct tc_htb_glob) },
+ [TCA_HTB_CTAB] = { .size = TC_RTAB_SIZE },
+ [TCA_HTB_RTAB] = { .size = TC_RTAB_SIZE },
+ [TCA_HTB_RATE64] = { .type = NETLINK_TYPE_U64 },
+ [TCA_HTB_CEIL64] = { .type = NETLINK_TYPE_U64 },
+};
+
+static const NLType rtnl_tca_option_data_pie_types[] = {
+ [TCA_PIE_LIMIT] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLType rtnl_tca_option_data_qfq_types[] = {
+ [TCA_QFQ_WEIGHT] = { .type = NETLINK_TYPE_U32 },
+ [TCA_QFQ_LMAX] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLType rtnl_tca_option_data_sfb_types[] = {
+ [TCA_SFB_PARMS] = { .size = sizeof(struct tc_sfb_qopt) },
+};
+
+static const NLType rtnl_tca_option_data_tbf_types[] = {
+ [TCA_TBF_PARMS] = { .size = sizeof(struct tc_tbf_qopt) },
+ [TCA_TBF_RTAB] = { .size = TC_RTAB_SIZE },
+ [TCA_TBF_PTAB] = { .size = TC_RTAB_SIZE },
+ [TCA_TBF_RATE64] = { .type = NETLINK_TYPE_U64 },
+ [TCA_TBF_PRATE64] = { .type = NETLINK_TYPE_U64 },
+ [TCA_TBF_BURST] = { .type = NETLINK_TYPE_U32 },
+ [TCA_TBF_PBURST] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const char* const nl_union_tca_option_data_table[] = {
+ [NL_UNION_TCA_OPTION_DATA_CAKE] = "cake",
+ [NL_UNION_TCA_OPTION_DATA_CODEL] = "codel",
+ [NL_UNION_TCA_OPTION_DATA_DRR] = "drr",
+ [NL_UNION_TCA_OPTION_DATA_ETS] = "ets",
+ [NL_UNION_TCA_OPTION_DATA_FQ] = "fq",
+ [NL_UNION_TCA_OPTION_DATA_FQ_CODEL] = "fq_codel",
+ [NL_UNION_TCA_OPTION_DATA_FQ_PIE] = "fq_pie",
+ [NL_UNION_TCA_OPTION_DATA_GRED] = "gred",
+ [NL_UNION_TCA_OPTION_DATA_HHF] = "hhf",
+ [NL_UNION_TCA_OPTION_DATA_HTB] = "htb",
+ [NL_UNION_TCA_OPTION_DATA_PIE] = "pie",
+ [NL_UNION_TCA_OPTION_DATA_QFQ] = "qfq",
+ [NL_UNION_TCA_OPTION_DATA_SFB] = "sfb",
+ [NL_UNION_TCA_OPTION_DATA_TBF] = "tbf",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(nl_union_tca_option_data, NLUnionTCAOptionData);
+
+static const NLTypeSystem rtnl_tca_option_data_type_systems[] = {
+ [NL_UNION_TCA_OPTION_DATA_CAKE] = { .count = ELEMENTSOF(rtnl_tca_option_data_cake_types),
+ .types = rtnl_tca_option_data_cake_types },
+ [NL_UNION_TCA_OPTION_DATA_CODEL] = { .count = ELEMENTSOF(rtnl_tca_option_data_codel_types),
+ .types = rtnl_tca_option_data_codel_types },
+ [NL_UNION_TCA_OPTION_DATA_DRR] = { .count = ELEMENTSOF(rtnl_tca_option_data_drr_types),
+ .types = rtnl_tca_option_data_drr_types },
+ [NL_UNION_TCA_OPTION_DATA_ETS] = { .count = ELEMENTSOF(rtnl_tca_option_data_ets_types),
+ .types = rtnl_tca_option_data_ets_types },
+ [NL_UNION_TCA_OPTION_DATA_FQ] = { .count = ELEMENTSOF(rtnl_tca_option_data_fq_types),
+ .types = rtnl_tca_option_data_fq_types },
+ [NL_UNION_TCA_OPTION_DATA_FQ_CODEL] = { .count = ELEMENTSOF(rtnl_tca_option_data_fq_codel_types),
+ .types = rtnl_tca_option_data_fq_codel_types },
+ [NL_UNION_TCA_OPTION_DATA_FQ_PIE] = { .count = ELEMENTSOF(rtnl_tca_option_data_fq_pie_types),
+ .types = rtnl_tca_option_data_fq_pie_types },
+ [NL_UNION_TCA_OPTION_DATA_GRED] = { .count = ELEMENTSOF(rtnl_tca_option_data_gred_types),
+ .types = rtnl_tca_option_data_gred_types },
+ [NL_UNION_TCA_OPTION_DATA_HHF] = { .count = ELEMENTSOF(rtnl_tca_option_data_hhf_types),
+ .types = rtnl_tca_option_data_hhf_types },
+ [NL_UNION_TCA_OPTION_DATA_HTB] = { .count = ELEMENTSOF(rtnl_tca_option_data_htb_types),
+ .types = rtnl_tca_option_data_htb_types },
+ [NL_UNION_TCA_OPTION_DATA_PIE] = { .count = ELEMENTSOF(rtnl_tca_option_data_pie_types),
+ .types = rtnl_tca_option_data_pie_types },
+ [NL_UNION_TCA_OPTION_DATA_QFQ] = { .count = ELEMENTSOF(rtnl_tca_option_data_qfq_types),
+ .types = rtnl_tca_option_data_qfq_types },
+ [NL_UNION_TCA_OPTION_DATA_SFB] = { .count = ELEMENTSOF(rtnl_tca_option_data_sfb_types),
+ .types = rtnl_tca_option_data_sfb_types },
+ [NL_UNION_TCA_OPTION_DATA_TBF] = { .count = ELEMENTSOF(rtnl_tca_option_data_tbf_types),
+ .types = rtnl_tca_option_data_tbf_types },
+};
+
+static const NLTypeSystemUnion rtnl_tca_option_data_type_system_union = {
+ .num = _NL_UNION_TCA_OPTION_DATA_MAX,
+ .lookup = nl_union_tca_option_data_from_string,
+ .type_systems = rtnl_tca_option_data_type_systems,
+ .match_type = NL_MATCH_SIBLING,
+ .match = TCA_KIND,
+};
+
+static const NLType rtnl_tca_types[] = {
+ [TCA_KIND] = { .type = NETLINK_TYPE_STRING },
+ [TCA_OPTIONS] = { .type = NETLINK_TYPE_UNION, .type_system_union = &rtnl_tca_option_data_type_system_union },
+ [TCA_INGRESS_BLOCK] = { .type = NETLINK_TYPE_U32 },
+ [TCA_EGRESS_BLOCK] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLTypeSystem rtnl_tca_type_system = {
+ .count = ELEMENTSOF(rtnl_tca_types),
+ .types = rtnl_tca_types,
+};
+
+static const NLType mdb_types[] = {
+ [MDBA_SET_ENTRY] = { .size = sizeof(struct br_port_msg) },
+};
+
+static const NLTypeSystem rtnl_mdb_type_system = {
+ .count = ELEMENTSOF(mdb_types),
+ .types = mdb_types,
+};
+
+static const NLType error_types[] = {
+ [NLMSGERR_ATTR_MSG] = { .type = NETLINK_TYPE_STRING },
+ [NLMSGERR_ATTR_OFFS] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLTypeSystem error_type_system = {
+ .count = ELEMENTSOF(error_types),
+ .types = error_types,
+};
+
+static const NLType rtnl_types[] = {
+ [NLMSG_DONE] = { .type = NETLINK_TYPE_NESTED, .type_system = &empty_type_system, .size = 0 },
+ [NLMSG_ERROR] = { .type = NETLINK_TYPE_NESTED, .type_system = &error_type_system, .size = sizeof(struct nlmsgerr) },
+ [RTM_NEWLINK] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_link_type_system, .size = sizeof(struct ifinfomsg) },
+ [RTM_DELLINK] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_link_type_system, .size = sizeof(struct ifinfomsg) },
+ [RTM_GETLINK] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_link_type_system, .size = sizeof(struct ifinfomsg) },
+ [RTM_SETLINK] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_link_type_system, .size = sizeof(struct ifinfomsg) },
+ [RTM_NEWLINKPROP] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_link_type_system, .size = sizeof(struct ifinfomsg) },
+ [RTM_DELLINKPROP] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_link_type_system, .size = sizeof(struct ifinfomsg) },
+ [RTM_GETLINKPROP] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_link_type_system, .size = sizeof(struct ifinfomsg) },
+ [RTM_NEWADDR] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_address_type_system, .size = sizeof(struct ifaddrmsg) },
+ [RTM_DELADDR] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_address_type_system, .size = sizeof(struct ifaddrmsg) },
+ [RTM_GETADDR] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_address_type_system, .size = sizeof(struct ifaddrmsg) },
+ [RTM_NEWROUTE] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_route_type_system, .size = sizeof(struct rtmsg) },
+ [RTM_DELROUTE] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_route_type_system, .size = sizeof(struct rtmsg) },
+ [RTM_GETROUTE] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_route_type_system, .size = sizeof(struct rtmsg) },
+ [RTM_NEWNEIGH] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_neigh_type_system, .size = sizeof(struct ndmsg) },
+ [RTM_DELNEIGH] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_neigh_type_system, .size = sizeof(struct ndmsg) },
+ [RTM_GETNEIGH] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_neigh_type_system, .size = sizeof(struct ndmsg) },
+ [RTM_NEWADDRLABEL] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_addrlabel_type_system, .size = sizeof(struct ifaddrlblmsg) },
+ [RTM_DELADDRLABEL] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_addrlabel_type_system, .size = sizeof(struct ifaddrlblmsg) },
+ [RTM_GETADDRLABEL] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_addrlabel_type_system, .size = sizeof(struct ifaddrlblmsg) },
+ [RTM_NEWRULE] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_routing_policy_rule_type_system, .size = sizeof(struct rtmsg) },
+ [RTM_DELRULE] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_routing_policy_rule_type_system, .size = sizeof(struct rtmsg) },
+ [RTM_GETRULE] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_routing_policy_rule_type_system, .size = sizeof(struct rtmsg) },
+ [RTM_NEWNEXTHOP] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_nexthop_type_system, .size = sizeof(struct nhmsg) },
+ [RTM_DELNEXTHOP] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_nexthop_type_system, .size = sizeof(struct nhmsg) },
+ [RTM_GETNEXTHOP] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_nexthop_type_system, .size = sizeof(struct nhmsg) },
+ [RTM_NEWQDISC] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_tca_type_system, .size = sizeof(struct tcmsg) },
+ [RTM_DELQDISC] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_tca_type_system, .size = sizeof(struct tcmsg) },
+ [RTM_GETQDISC] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_tca_type_system, .size = sizeof(struct tcmsg) },
+ [RTM_NEWTCLASS] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_tca_type_system, .size = sizeof(struct tcmsg) },
+ [RTM_DELTCLASS] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_tca_type_system, .size = sizeof(struct tcmsg) },
+ [RTM_GETTCLASS] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_tca_type_system, .size = sizeof(struct tcmsg) },
+ [RTM_NEWMDB] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_mdb_type_system, .size = sizeof(struct br_port_msg) },
+ [RTM_DELMDB] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_mdb_type_system, .size = sizeof(struct br_port_msg) },
+ [RTM_GETMDB] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_mdb_type_system, .size = sizeof(struct br_port_msg) },
+};
+
+const NLTypeSystem rtnl_type_system_root = {
+ .count = ELEMENTSOF(rtnl_types),
+ .types = rtnl_types,
+};
+
+static const NLType genl_wireguard_allowedip_types[] = {
+ [WGALLOWEDIP_A_FAMILY] = { .type = NETLINK_TYPE_U16 },
+ [WGALLOWEDIP_A_IPADDR] = { .type = NETLINK_TYPE_IN_ADDR },
+ [WGALLOWEDIP_A_CIDR_MASK] = { .type = NETLINK_TYPE_U8 },
+};
+
+static const NLTypeSystem genl_wireguard_allowedip_type_system = {
+ .count = ELEMENTSOF(genl_wireguard_allowedip_types),
+ .types = genl_wireguard_allowedip_types,
+};
+
+static const NLType genl_wireguard_peer_types[] = {
+ [WGPEER_A_PUBLIC_KEY] = { .size = WG_KEY_LEN },
+ [WGPEER_A_FLAGS] = { .type = NETLINK_TYPE_U32 },
+ [WGPEER_A_PRESHARED_KEY] = { .size = WG_KEY_LEN },
+ [WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = { .type = NETLINK_TYPE_U16 },
+ [WGPEER_A_ENDPOINT] = { .type = NETLINK_TYPE_SOCKADDR },
+ [WGPEER_A_ALLOWEDIPS] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_wireguard_allowedip_type_system },
+};
+
+static const NLTypeSystem genl_wireguard_peer_type_system = {
+ .count = ELEMENTSOF(genl_wireguard_peer_types),
+ .types = genl_wireguard_peer_types,
+};
+
+static const NLType genl_wireguard_set_device_types[] = {
+ [WGDEVICE_A_IFINDEX] = { .type = NETLINK_TYPE_U32 },
+ [WGDEVICE_A_IFNAME] = { .type = NETLINK_TYPE_STRING, .size = IFNAMSIZ-1 },
+ [WGDEVICE_A_FLAGS] = { .type = NETLINK_TYPE_U32 },
+ [WGDEVICE_A_PRIVATE_KEY] = { .size = WG_KEY_LEN },
+ [WGDEVICE_A_LISTEN_PORT] = { .type = NETLINK_TYPE_U16 },
+ [WGDEVICE_A_FWMARK] = { .type = NETLINK_TYPE_U32 },
+ [WGDEVICE_A_PEERS] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_wireguard_peer_type_system },
+};
+
+static const NLTypeSystem genl_wireguard_set_device_type_system = {
+ .count = ELEMENTSOF(genl_wireguard_set_device_types),
+ .types = genl_wireguard_set_device_types,
+};
+
+static const NLType genl_wireguard_cmds[] = {
+ [WG_CMD_SET_DEVICE] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_wireguard_set_device_type_system },
+};
+
+static const NLTypeSystem genl_wireguard_type_system = {
+ .count = ELEMENTSOF(genl_wireguard_cmds),
+ .types = genl_wireguard_cmds,
+};
+
+static const NLType genl_mcast_group_types[] = {
+ [CTRL_ATTR_MCAST_GRP_NAME] = { .type = NETLINK_TYPE_STRING },
+ [CTRL_ATTR_MCAST_GRP_ID] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLTypeSystem genl_mcast_group_type_system = {
+ .count = ELEMENTSOF(genl_mcast_group_types),
+ .types = genl_mcast_group_types,
+};
+
+static const NLType genl_get_family_types[] = {
+ [CTRL_ATTR_FAMILY_NAME] = { .type = NETLINK_TYPE_STRING },
+ [CTRL_ATTR_FAMILY_ID] = { .type = NETLINK_TYPE_U16 },
+ [CTRL_ATTR_MCAST_GROUPS] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_mcast_group_type_system },
+};
+
+static const NLTypeSystem genl_get_family_type_system = {
+ .count = ELEMENTSOF(genl_get_family_types),
+ .types = genl_get_family_types,
+};
+
+static const NLType genl_ctrl_id_ctrl_cmds[] = {
+ [CTRL_CMD_GETFAMILY] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_get_family_type_system },
+};
+
+static const NLTypeSystem genl_ctrl_id_ctrl_type_system = {
+ .count = ELEMENTSOF(genl_ctrl_id_ctrl_cmds),
+ .types = genl_ctrl_id_ctrl_cmds,
+};
+
+static const NLType genl_fou_types[] = {
+ [FOU_ATTR_PORT] = { .type = NETLINK_TYPE_U16 },
+ [FOU_ATTR_AF] = { .type = NETLINK_TYPE_U8 },
+ [FOU_ATTR_IPPROTO] = { .type = NETLINK_TYPE_U8 },
+ [FOU_ATTR_TYPE] = { .type = NETLINK_TYPE_U8 },
+ [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NETLINK_TYPE_FLAG },
+ [FOU_ATTR_LOCAL_V4] = { .type = NETLINK_TYPE_IN_ADDR },
+ [FOU_ATTR_PEER_V4] = { .type = NETLINK_TYPE_IN_ADDR },
+ [FOU_ATTR_LOCAL_V6] = { .type = NETLINK_TYPE_IN_ADDR },
+ [FOU_ATTR_PEER_V6] = { .type = NETLINK_TYPE_IN_ADDR},
+ [FOU_ATTR_PEER_PORT] = { .type = NETLINK_TYPE_U16},
+ [FOU_ATTR_IFINDEX] = { .type = NETLINK_TYPE_U32},
+};
+
+static const NLTypeSystem genl_fou_type_system = {
+ .count = ELEMENTSOF(genl_fou_types),
+ .types = genl_fou_types,
+};
+
+static const NLType genl_fou_cmds[] = {
+ [FOU_CMD_ADD] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_fou_type_system },
+ [FOU_CMD_DEL] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_fou_type_system },
+ [FOU_CMD_GET] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_fou_type_system },
+};
+
+static const NLTypeSystem genl_fou_cmds_type_system = {
+ .count = ELEMENTSOF(genl_fou_cmds),
+ .types = genl_fou_cmds,
+};
+
+static const NLType genl_l2tp_types[] = {
+ [L2TP_ATTR_PW_TYPE] = { .type = NETLINK_TYPE_U16 },
+ [L2TP_ATTR_ENCAP_TYPE] = { .type = NETLINK_TYPE_U16 },
+ [L2TP_ATTR_OFFSET] = { .type = NETLINK_TYPE_U16 },
+ [L2TP_ATTR_DATA_SEQ] = { .type = NETLINK_TYPE_U16 },
+ [L2TP_ATTR_L2SPEC_TYPE] = { .type = NETLINK_TYPE_U8 },
+ [L2TP_ATTR_L2SPEC_LEN] = { .type = NETLINK_TYPE_U8 },
+ [L2TP_ATTR_PROTO_VERSION] = { .type = NETLINK_TYPE_U8 },
+ [L2TP_ATTR_IFNAME] = { .type = NETLINK_TYPE_STRING },
+ [L2TP_ATTR_CONN_ID] = { .type = NETLINK_TYPE_U32 },
+ [L2TP_ATTR_PEER_CONN_ID] = { .type = NETLINK_TYPE_U32 },
+ [L2TP_ATTR_SESSION_ID] = { .type = NETLINK_TYPE_U32 },
+ [L2TP_ATTR_PEER_SESSION_ID] = { .type = NETLINK_TYPE_U32 },
+ [L2TP_ATTR_UDP_CSUM] = { .type = NETLINK_TYPE_U8 },
+ [L2TP_ATTR_VLAN_ID] = { .type = NETLINK_TYPE_U16 },
+ [L2TP_ATTR_RECV_SEQ] = { .type = NETLINK_TYPE_U8 },
+ [L2TP_ATTR_SEND_SEQ] = { .type = NETLINK_TYPE_U8 },
+ [L2TP_ATTR_LNS_MODE] = { .type = NETLINK_TYPE_U8 },
+ [L2TP_ATTR_USING_IPSEC] = { .type = NETLINK_TYPE_U8 },
+ [L2TP_ATTR_FD] = { .type = NETLINK_TYPE_U32 },
+ [L2TP_ATTR_IP_SADDR] = { .type = NETLINK_TYPE_IN_ADDR },
+ [L2TP_ATTR_IP_DADDR] = { .type = NETLINK_TYPE_IN_ADDR },
+ [L2TP_ATTR_UDP_SPORT] = { .type = NETLINK_TYPE_U16 },
+ [L2TP_ATTR_UDP_DPORT] = { .type = NETLINK_TYPE_U16 },
+ [L2TP_ATTR_IP6_SADDR] = { .type = NETLINK_TYPE_IN_ADDR },
+ [L2TP_ATTR_IP6_DADDR] = { .type = NETLINK_TYPE_IN_ADDR },
+ [L2TP_ATTR_UDP_ZERO_CSUM6_TX] = { .type = NETLINK_TYPE_FLAG },
+ [L2TP_ATTR_UDP_ZERO_CSUM6_RX] = { .type = NETLINK_TYPE_FLAG },
+};
+
+static const NLTypeSystem genl_l2tp_type_system = {
+ .count = ELEMENTSOF(genl_l2tp_types),
+ .types = genl_l2tp_types,
+};
+
+static const NLType genl_l2tp[] = {
+ [L2TP_CMD_TUNNEL_CREATE] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_l2tp_type_system },
+ [L2TP_CMD_TUNNEL_DELETE] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_l2tp_type_system },
+ [L2TP_CMD_TUNNEL_MODIFY] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_l2tp_type_system },
+ [L2TP_CMD_TUNNEL_GET] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_l2tp_type_system },
+ [L2TP_CMD_SESSION_CREATE] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_l2tp_type_system },
+ [L2TP_CMD_SESSION_DELETE] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_l2tp_type_system },
+ [L2TP_CMD_SESSION_MODIFY] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_l2tp_type_system },
+ [L2TP_CMD_SESSION_GET] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_l2tp_type_system },
+};
+
+static const NLTypeSystem genl_l2tp_tunnel_session_type_system = {
+ .count = ELEMENTSOF(genl_l2tp),
+ .types = genl_l2tp,
+};
+
+static const NLType genl_rxsc_types[] = {
+ [MACSEC_RXSC_ATTR_SCI] = { .type = NETLINK_TYPE_U64 },
+};
+
+static const NLTypeSystem genl_rxsc_config_type_system = {
+ .count = ELEMENTSOF(genl_rxsc_types),
+ .types = genl_rxsc_types,
+};
+
+static const NLType genl_macsec_rxsc_types[] = {
+ [MACSEC_ATTR_IFINDEX] = { .type = NETLINK_TYPE_U32 },
+ [MACSEC_ATTR_RXSC_CONFIG] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_rxsc_config_type_system },
+};
+
+static const NLTypeSystem genl_macsec_rxsc_type_system = {
+ .count = ELEMENTSOF(genl_macsec_rxsc_types),
+ .types = genl_macsec_rxsc_types,
+};
+
+static const NLType genl_macsec_sa_config_types[] = {
+ [MACSEC_SA_ATTR_AN] = { .type = NETLINK_TYPE_U8 },
+ [MACSEC_SA_ATTR_ACTIVE] = { .type = NETLINK_TYPE_U8 },
+ [MACSEC_SA_ATTR_PN] = { .type = NETLINK_TYPE_U32 },
+ [MACSEC_SA_ATTR_KEYID] = { .size = MACSEC_KEYID_LEN },
+ [MACSEC_SA_ATTR_KEY] = { .size = MACSEC_MAX_KEY_LEN },
+};
+
+static const NLTypeSystem genl_macsec_sa_config_type_system = {
+ .count = ELEMENTSOF(genl_macsec_sa_config_types),
+ .types = genl_macsec_sa_config_types,
+};
+
+static const NLType genl_macsec_rxsa_types[] = {
+ [MACSEC_ATTR_IFINDEX] = { .type = NETLINK_TYPE_U32 },
+ [MACSEC_ATTR_SA_CONFIG] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_macsec_sa_config_type_system },
+};
+
+static const NLTypeSystem genl_macsec_rxsa_type_system = {
+ .count = ELEMENTSOF(genl_macsec_rxsa_types),
+ .types = genl_macsec_rxsa_types,
+};
+
+static const NLType genl_macsec_sa_types[] = {
+ [MACSEC_ATTR_IFINDEX] = { .type = NETLINK_TYPE_U32 },
+ [MACSEC_ATTR_RXSC_CONFIG] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_rxsc_config_type_system },
+ [MACSEC_ATTR_SA_CONFIG] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_macsec_sa_config_type_system },
+};
+
+static const NLTypeSystem genl_macsec_sa_type_system = {
+ .count = ELEMENTSOF(genl_macsec_sa_types),
+ .types = genl_macsec_sa_types,
+};
+
+static const NLType genl_macsec[] = {
+ [MACSEC_CMD_ADD_RXSC] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_macsec_rxsc_type_system },
+ [MACSEC_CMD_ADD_TXSA] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_macsec_rxsa_type_system},
+ [MACSEC_CMD_ADD_RXSA] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_macsec_sa_type_system },
+};
+
+static const NLTypeSystem genl_macsec_device_type_system = {
+ .count = ELEMENTSOF(genl_macsec),
+ .types = genl_macsec,
+};
+
+static const NLType genl_nl80211_types[] = {
+ [NL80211_ATTR_IFINDEX] = { .type = NETLINK_TYPE_U32 },
+ [NL80211_ATTR_MAC] = { .type = NETLINK_TYPE_ETHER_ADDR },
+ [NL80211_ATTR_SSID] = { .type = NETLINK_TYPE_STRING },
+ [NL80211_ATTR_IFTYPE] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLTypeSystem genl_nl80211_type_system = {
+ .count = ELEMENTSOF(genl_nl80211_types),
+ .types = genl_nl80211_types,
+};
+
+static const NLType genl_nl80211_cmds[] = {
+ [NL80211_CMD_GET_WIPHY] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_nl80211_type_system },
+ [NL80211_CMD_SET_WIPHY] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_nl80211_type_system },
+ [NL80211_CMD_NEW_WIPHY] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_nl80211_type_system },
+ [NL80211_CMD_DEL_WIPHY] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_nl80211_type_system },
+ [NL80211_CMD_GET_INTERFACE] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_nl80211_type_system },
+ [NL80211_CMD_SET_INTERFACE] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_nl80211_type_system },
+ [NL80211_CMD_NEW_INTERFACE] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_nl80211_type_system },
+ [NL80211_CMD_DEL_INTERFACE] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_nl80211_type_system },
+ [NL80211_CMD_GET_STATION] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_nl80211_type_system },
+ [NL80211_CMD_SET_STATION] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_nl80211_type_system },
+ [NL80211_CMD_NEW_STATION] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_nl80211_type_system },
+ [NL80211_CMD_DEL_STATION] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_nl80211_type_system },
+};
+
+static const NLTypeSystem genl_nl80211_cmds_type_system = {
+ .count = ELEMENTSOF(genl_nl80211_cmds),
+ .types = genl_nl80211_cmds,
+};
+
+static const NLType genl_families[] = {
+ [SD_GENL_ID_CTRL] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_ctrl_id_ctrl_type_system },
+ [SD_GENL_WIREGUARD] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_wireguard_type_system },
+ [SD_GENL_FOU] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_fou_cmds_type_system },
+ [SD_GENL_L2TP] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_l2tp_tunnel_session_type_system },
+ [SD_GENL_MACSEC] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_macsec_device_type_system },
+ [SD_GENL_NL80211] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_nl80211_cmds_type_system },
+};
+
+/* Mainly used when sending message */
+const NLTypeSystem genl_family_type_system_root = {
+ .count = ELEMENTSOF(genl_families),
+ .types = genl_families,
+};
+
+static const NLType genl_types[] = {
+ [SD_GENL_ERROR] = { .type = NETLINK_TYPE_NESTED, .type_system = &error_type_system, .size = sizeof(struct nlmsgerr) },
+ [SD_GENL_DONE] = { .type = NETLINK_TYPE_NESTED, .type_system = &empty_type_system },
+ [SD_GENL_ID_CTRL] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_get_family_type_system, .size = sizeof(struct genlmsghdr) },
+ [SD_GENL_NL80211] = { .type = NETLINK_TYPE_NESTED, .type_system = &genl_nl80211_type_system, .size = sizeof(struct genlmsghdr) },
+};
+
+/* Mainly used when message received */
+const NLTypeSystem genl_type_system_root = {
+ .count = ELEMENTSOF(genl_types),
+ .types = genl_types,
+};
+
+uint16_t type_get_type(const NLType *type) {
+ assert(type);
+ return type->type;
+}
+
+size_t type_get_size(const NLType *type) {
+ assert(type);
+ return type->size;
+}
+
+void type_get_type_system(const NLType *nl_type, const NLTypeSystem **ret) {
+ assert(nl_type);
+ assert(ret);
+ assert(nl_type->type == NETLINK_TYPE_NESTED);
+ assert(nl_type->type_system);
+
+ *ret = nl_type->type_system;
+}
+
+void type_get_type_system_union(const NLType *nl_type, const NLTypeSystemUnion **ret) {
+ assert(nl_type);
+ assert(ret);
+ assert(nl_type->type == NETLINK_TYPE_UNION);
+ assert(nl_type->type_system_union);
+
+ *ret = nl_type->type_system_union;
+}
+
+uint16_t type_system_get_count(const NLTypeSystem *type_system) {
+ assert(type_system);
+ return type_system->count;
+}
+
+const NLTypeSystem *type_system_get_root(int protocol) {
+ switch (protocol) {
+ case NETLINK_GENERIC:
+ return &genl_type_system_root;
+ default: /* NETLINK_ROUTE: */
+ return &rtnl_type_system_root;
+ }
+}
+
+int type_system_root_get_type(sd_netlink *nl, const NLType **ret, uint16_t type) {
+ sd_genl_family family;
+ const NLType *nl_type;
+ int r;
+
+ if (!nl || nl->protocol != NETLINK_GENERIC)
+ return type_system_get_type(&rtnl_type_system_root, ret, type);
+
+ r = nlmsg_type_to_genl_family(nl, type, &family);
+ if (r < 0)
+ return r;
+
+ if (family >= genl_type_system_root.count)
+ return -EOPNOTSUPP;
+
+ nl_type = &genl_type_system_root.types[family];
+
+ if (nl_type->type == NETLINK_TYPE_UNSPEC)
+ return -EOPNOTSUPP;
+
+ *ret = nl_type;
+
+ return 0;
+}
+
+int type_system_get_type(const NLTypeSystem *type_system, const NLType **ret, uint16_t type) {
+ const NLType *nl_type;
+
+ assert(ret);
+ assert(type_system);
+ assert(type_system->types);
+
+ if (type >= type_system->count)
+ return -EOPNOTSUPP;
+
+ nl_type = &type_system->types[type];
+
+ if (nl_type->type == NETLINK_TYPE_UNSPEC)
+ return -EOPNOTSUPP;
+
+ *ret = nl_type;
+
+ return 0;
+}
+
+int type_system_get_type_system(const NLTypeSystem *type_system, const NLTypeSystem **ret, uint16_t type) {
+ const NLType *nl_type;
+ int r;
+
+ assert(ret);
+
+ r = type_system_get_type(type_system, &nl_type, type);
+ if (r < 0)
+ return r;
+
+ type_get_type_system(nl_type, ret);
+ return 0;
+}
+
+int type_system_get_type_system_union(const NLTypeSystem *type_system, const NLTypeSystemUnion **ret, uint16_t type) {
+ const NLType *nl_type;
+ int r;
+
+ assert(ret);
+
+ r = type_system_get_type(type_system, &nl_type, type);
+ if (r < 0)
+ return r;
+
+ type_get_type_system_union(nl_type, ret);
+ return 0;
+}
+
+int type_system_union_get_type_system(const NLTypeSystemUnion *type_system_union, const NLTypeSystem **ret, const char *key) {
+ int type;
+
+ assert(type_system_union);
+ assert(type_system_union->match_type == NL_MATCH_SIBLING);
+ assert(type_system_union->lookup);
+ assert(type_system_union->type_systems);
+ assert(ret);
+ assert(key);
+
+ type = type_system_union->lookup(key);
+ if (type < 0)
+ return -EOPNOTSUPP;
+
+ assert(type < type_system_union->num);
+
+ *ret = &type_system_union->type_systems[type];
+
+ return 0;
+}
+
+int type_system_union_protocol_get_type_system(const NLTypeSystemUnion *type_system_union, const NLTypeSystem **ret, uint16_t protocol) {
+ const NLTypeSystem *type_system;
+
+ assert(type_system_union);
+ assert(type_system_union->type_systems);
+ assert(type_system_union->match_type == NL_MATCH_PROTOCOL);
+ assert(ret);
+
+ if (protocol >= type_system_union->num)
+ return -EOPNOTSUPP;
+
+ type_system = &type_system_union->type_systems[protocol];
+ if (!type_system->types)
+ return -EOPNOTSUPP;
+
+ *ret = type_system;
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-netlink/netlink-types.h b/src/libsystemd/sd-netlink/netlink-types.h
new file mode 100644
index 0000000..b14e66f
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-types.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "macro.h"
+
+enum {
+ NETLINK_TYPE_UNSPEC,
+ NETLINK_TYPE_U8, /* NLA_U8 */
+ NETLINK_TYPE_U16, /* NLA_U16 */
+ NETLINK_TYPE_U32, /* NLA_U32 */
+ NETLINK_TYPE_U64, /* NLA_U64 */
+ NETLINK_TYPE_S8, /* NLA_S8 */
+ NETLINK_TYPE_S16, /* NLA_S16 */
+ NETLINK_TYPE_S32, /* NLA_S32 */
+ NETLINK_TYPE_S64, /* NLA_S64 */
+ NETLINK_TYPE_STRING, /* NLA_STRING */
+ NETLINK_TYPE_FLAG, /* NLA_FLAG */
+ NETLINK_TYPE_IN_ADDR,
+ NETLINK_TYPE_ETHER_ADDR,
+ NETLINK_TYPE_CACHE_INFO,
+ NETLINK_TYPE_NESTED, /* NLA_NESTED */
+ NETLINK_TYPE_UNION,
+ NETLINK_TYPE_SOCKADDR,
+};
+
+typedef enum NLMatchType {
+ NL_MATCH_SIBLING,
+ NL_MATCH_PROTOCOL,
+} NLMatchType;
+
+typedef struct NLTypeSystemUnion NLTypeSystemUnion;
+typedef struct NLTypeSystem NLTypeSystem;
+typedef struct NLType NLType;
+
+struct NLTypeSystemUnion {
+ int num;
+ NLMatchType match_type;
+ uint16_t match;
+ int (*lookup)(const char *);
+ const NLTypeSystem *type_systems;
+};
+
+extern const NLTypeSystem genl_family_type_system_root;
+
+uint16_t type_get_type(const NLType *type);
+size_t type_get_size(const NLType *type);
+void type_get_type_system(const NLType *type, const NLTypeSystem **ret);
+void type_get_type_system_union(const NLType *type, const NLTypeSystemUnion **ret);
+
+const NLTypeSystem* type_system_get_root(int protocol);
+uint16_t type_system_get_count(const NLTypeSystem *type_system);
+int type_system_root_get_type(sd_netlink *nl, const NLType **ret, uint16_t type);
+int type_system_get_type(const NLTypeSystem *type_system, const NLType **ret, uint16_t type);
+int type_system_get_type_system(const NLTypeSystem *type_system, const NLTypeSystem **ret, uint16_t type);
+int type_system_get_type_system_union(const NLTypeSystem *type_system, const NLTypeSystemUnion **ret, uint16_t type);
+int type_system_union_get_type_system(const NLTypeSystemUnion *type_system_union, const NLTypeSystem **ret, const char *key);
+int type_system_union_protocol_get_type_system(const NLTypeSystemUnion *type_system_union, const NLTypeSystem **ret, uint16_t protocol);
+
+typedef enum NLUnionLinkInfoData {
+ NL_UNION_LINK_INFO_DATA_BOND,
+ NL_UNION_LINK_INFO_DATA_BRIDGE,
+ NL_UNION_LINK_INFO_DATA_VLAN,
+ NL_UNION_LINK_INFO_DATA_VETH,
+ NL_UNION_LINK_INFO_DATA_DUMMY,
+ NL_UNION_LINK_INFO_DATA_MACVLAN,
+ NL_UNION_LINK_INFO_DATA_MACVTAP,
+ NL_UNION_LINK_INFO_DATA_IPVLAN,
+ NL_UNION_LINK_INFO_DATA_IPVTAP,
+ NL_UNION_LINK_INFO_DATA_VXLAN,
+ NL_UNION_LINK_INFO_DATA_IPIP_TUNNEL,
+ NL_UNION_LINK_INFO_DATA_IPGRE_TUNNEL,
+ NL_UNION_LINK_INFO_DATA_ERSPAN,
+ NL_UNION_LINK_INFO_DATA_IPGRETAP_TUNNEL,
+ NL_UNION_LINK_INFO_DATA_IP6GRE_TUNNEL,
+ NL_UNION_LINK_INFO_DATA_IP6GRETAP_TUNNEL,
+ NL_UNION_LINK_INFO_DATA_SIT_TUNNEL,
+ NL_UNION_LINK_INFO_DATA_VTI_TUNNEL,
+ NL_UNION_LINK_INFO_DATA_VTI6_TUNNEL,
+ NL_UNION_LINK_INFO_DATA_IP6TNL_TUNNEL,
+ NL_UNION_LINK_INFO_DATA_VRF,
+ NL_UNION_LINK_INFO_DATA_VCAN,
+ NL_UNION_LINK_INFO_DATA_GENEVE,
+ NL_UNION_LINK_INFO_DATA_VXCAN,
+ NL_UNION_LINK_INFO_DATA_WIREGUARD,
+ NL_UNION_LINK_INFO_DATA_NETDEVSIM,
+ NL_UNION_LINK_INFO_DATA_CAN,
+ NL_UNION_LINK_INFO_DATA_MACSEC,
+ NL_UNION_LINK_INFO_DATA_NLMON,
+ NL_UNION_LINK_INFO_DATA_XFRM,
+ NL_UNION_LINK_INFO_DATA_IFB,
+ NL_UNION_LINK_INFO_DATA_BAREUDP,
+ _NL_UNION_LINK_INFO_DATA_MAX,
+ _NL_UNION_LINK_INFO_DATA_INVALID = -1
+} NLUnionLinkInfoData;
+
+const char *nl_union_link_info_data_to_string(NLUnionLinkInfoData p) _const_;
+NLUnionLinkInfoData nl_union_link_info_data_from_string(const char *p) _pure_;
+
+typedef enum NLUnionTCAOptionData {
+ NL_UNION_TCA_OPTION_DATA_CAKE,
+ NL_UNION_TCA_OPTION_DATA_CODEL,
+ NL_UNION_TCA_OPTION_DATA_DRR,
+ NL_UNION_TCA_OPTION_DATA_ETS,
+ NL_UNION_TCA_OPTION_DATA_FQ,
+ NL_UNION_TCA_OPTION_DATA_FQ_CODEL,
+ NL_UNION_TCA_OPTION_DATA_FQ_PIE,
+ NL_UNION_TCA_OPTION_DATA_GRED,
+ NL_UNION_TCA_OPTION_DATA_HHF,
+ NL_UNION_TCA_OPTION_DATA_HTB,
+ NL_UNION_TCA_OPTION_DATA_PIE,
+ NL_UNION_TCA_OPTION_DATA_QFQ,
+ NL_UNION_TCA_OPTION_DATA_SFB,
+ NL_UNION_TCA_OPTION_DATA_TBF,
+ _NL_UNION_TCA_OPTION_DATA_MAX,
+ _NL_UNION_TCA_OPTION_DATA_INVALID = -1,
+} NLUnionTCAOptionData;
+
+const char *nl_union_tca_option_data_to_string(NLUnionTCAOptionData p) _const_;
+NLUnionTCAOptionData nl_union_tca_option_data_from_string(const char *p) _pure_;
diff --git a/src/libsystemd/sd-netlink/netlink-util.c b/src/libsystemd/sd-netlink/netlink-util.c
new file mode 100644
index 0000000..09e2158
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-util.c
@@ -0,0 +1,447 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-netlink.h"
+
+#include "format-util.h"
+#include "memory-util.h"
+#include "netlink-internal.h"
+#include "netlink-util.h"
+#include "strv.h"
+
+int rtnl_set_link_name(sd_netlink **rtnl, int ifindex, const char *name) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL;
+ _cleanup_strv_free_ char **alternative_names = NULL;
+ char old_name[IF_NAMESIZE + 1] = {};
+ int r;
+
+ assert(rtnl);
+ assert(ifindex > 0);
+ assert(name);
+
+ if (!ifname_valid(name))
+ return -EINVAL;
+
+ r = rtnl_get_link_alternative_names(rtnl, ifindex, &alternative_names);
+ if (r < 0)
+ log_debug_errno(r, "Failed to get alternative names on network interface %i, ignoring: %m",
+ ifindex);
+
+ if (strv_contains(alternative_names, name)) {
+ r = rtnl_delete_link_alternative_names(rtnl, ifindex, STRV_MAKE(name));
+ if (r < 0)
+ return log_debug_errno(r, "Failed to remove '%s' from alternative names on network interface %i: %m",
+ name, ifindex);
+
+ format_ifname(ifindex, old_name);
+ }
+
+ r = sd_rtnl_message_new_link(*rtnl, &message, RTM_SETLINK, ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(message, IFLA_IFNAME, name);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(*rtnl, message, 0, NULL);
+ if (r < 0)
+ return r;
+
+ if (!isempty(old_name)) {
+ r = rtnl_set_link_alternative_names(rtnl, ifindex, STRV_MAKE(old_name));
+ if (r < 0)
+ log_debug_errno(r, "Failed to set '%s' as an alternative name on network interface %i, ignoring: %m",
+ old_name, ifindex);
+ }
+
+ return 0;
+}
+
+int rtnl_set_link_properties(sd_netlink **rtnl, int ifindex, const char *alias,
+ const struct ether_addr *mac, uint32_t mtu) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(ifindex > 0);
+
+ if (!alias && !mac && mtu == 0)
+ return 0;
+
+ if (!*rtnl) {
+ r = sd_netlink_open(rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_link(*rtnl, &message, RTM_SETLINK, ifindex);
+ if (r < 0)
+ return r;
+
+ if (alias) {
+ r = sd_netlink_message_append_string(message, IFLA_IFALIAS, alias);
+ if (r < 0)
+ return r;
+ }
+
+ if (mac) {
+ r = sd_netlink_message_append_ether_addr(message, IFLA_ADDRESS, mac);
+ if (r < 0)
+ return r;
+ }
+
+ if (mtu != 0) {
+ r = sd_netlink_message_append_u32(message, IFLA_MTU, mtu);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_netlink_call(*rtnl, message, 0, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int rtnl_get_link_alternative_names(sd_netlink **rtnl, int ifindex, char ***ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL, *reply = NULL;
+ _cleanup_strv_free_ char **names = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(ifindex > 0);
+ assert(ret);
+
+ if (!*rtnl) {
+ r = sd_netlink_open(rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_link(*rtnl, &message, RTM_GETLINK, ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(*rtnl, message, 0, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_read_strv(reply, IFLA_PROP_LIST, IFLA_ALT_IFNAME, &names);
+ if (r < 0 && r != -ENODATA)
+ return r;
+
+ *ret = TAKE_PTR(names);
+
+ return 0;
+}
+
+static int rtnl_update_link_alternative_names(sd_netlink **rtnl, uint16_t nlmsg_type, int ifindex, char * const *alternative_names) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(ifindex > 0);
+ assert(IN_SET(nlmsg_type, RTM_NEWLINKPROP, RTM_DELLINKPROP));
+
+ if (strv_isempty(alternative_names))
+ return 0;
+
+ if (!*rtnl) {
+ r = sd_netlink_open(rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_link(*rtnl, &message, nlmsg_type, ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_open_container(message, IFLA_PROP_LIST);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_strv(message, IFLA_ALT_IFNAME, alternative_names);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_close_container(message);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(*rtnl, message, 0, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int rtnl_set_link_alternative_names(sd_netlink **rtnl, int ifindex, char * const *alternative_names) {
+ return rtnl_update_link_alternative_names(rtnl, RTM_NEWLINKPROP, ifindex, alternative_names);
+}
+
+int rtnl_delete_link_alternative_names(sd_netlink **rtnl, int ifindex, char * const *alternative_names) {
+ return rtnl_update_link_alternative_names(rtnl, RTM_DELLINKPROP, ifindex, alternative_names);
+}
+
+int rtnl_set_link_alternative_names_by_ifname(sd_netlink **rtnl, const char *ifname, char * const *alternative_names) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(ifname);
+
+ if (strv_isempty(alternative_names))
+ return 0;
+
+ if (!*rtnl) {
+ r = sd_netlink_open(rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_link(*rtnl, &message, RTM_NEWLINKPROP, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(message, IFLA_IFNAME, ifname);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_open_container(message, IFLA_PROP_LIST);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_strv(message, IFLA_ALT_IFNAME, alternative_names);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_close_container(message);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(*rtnl, message, 0, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int rtnl_resolve_link_alternative_name(sd_netlink **rtnl, const char *name) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *our_rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL, *reply = NULL;
+ int r, ret;
+
+ assert(name);
+
+ if (!rtnl)
+ rtnl = &our_rtnl;
+ if (!*rtnl) {
+ r = sd_netlink_open(rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_link(*rtnl, &message, RTM_GETLINK, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(message, IFLA_ALT_IFNAME, name);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(*rtnl, message, 0, &reply);
+ if (r == -EINVAL)
+ return -ENODEV; /* The device doesn't exist */
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_link_get_ifindex(reply, &ret);
+ if (r < 0)
+ return r;
+ assert(ret > 0);
+ return ret;
+}
+
+int rtnl_get_link_iftype(sd_netlink **rtnl, int ifindex, unsigned short *ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL, *reply = NULL;
+ int r;
+
+ if (!*rtnl) {
+ r = sd_netlink_open(rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_link(*rtnl, &message, RTM_GETLINK, ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(*rtnl, message, 0, &reply);
+ if (r == -EINVAL)
+ return -ENODEV; /* The device does not exist */
+ if (r < 0)
+ return r;
+
+ return sd_rtnl_message_link_get_type(reply, ret);
+}
+
+int rtnl_message_new_synthetic_error(sd_netlink *rtnl, int error, uint32_t serial, sd_netlink_message **ret) {
+ struct nlmsgerr *err;
+ int r;
+
+ assert(error <= 0);
+
+ r = message_new(rtnl, ret, NLMSG_ERROR);
+ if (r < 0)
+ return r;
+
+ rtnl_message_seal(*ret);
+ (*ret)->hdr->nlmsg_seq = serial;
+
+ err = NLMSG_DATA((*ret)->hdr);
+ err->error = error;
+
+ return 0;
+}
+
+int rtnl_log_parse_error(int r) {
+ return log_error_errno(r, "Failed to parse netlink message: %m");
+}
+
+int rtnl_log_create_error(int r) {
+ return log_error_errno(r, "Failed to create netlink message: %m");
+}
+
+void rtattr_append_attribute_internal(struct rtattr *rta, unsigned short type, const void *data, size_t data_length) {
+ size_t padding_length;
+ uint8_t *padding;
+
+ assert(rta);
+ assert(!data || data_length > 0);
+
+ /* fill in the attribute */
+ rta->rta_type = type;
+ rta->rta_len = RTA_LENGTH(data_length);
+ if (data)
+ /* we don't deal with the case where the user lies about the type
+ * and gives us too little data (so don't do that)
+ */
+ padding = mempcpy(RTA_DATA(rta), data, data_length);
+
+ else
+ /* if no data was passed, make sure we still initialize the padding
+ note that we can have data_length > 0 (used by some containers) */
+ padding = RTA_DATA(rta);
+
+ /* make sure also the padding at the end of the message is initialized */
+ padding_length = (uint8_t *) rta + RTA_SPACE(data_length) - padding;
+ memzero(padding, padding_length);
+}
+
+int rtattr_append_attribute(struct rtattr **rta, unsigned short type, const void *data, size_t data_length) {
+ struct rtattr *new_rta, *sub_rta;
+ size_t message_length;
+
+ assert(rta);
+ assert(!data || data_length > 0);
+
+ /* get the new message size (with padding at the end) */
+ message_length = RTA_ALIGN(rta ? (*rta)->rta_len : 0) + RTA_SPACE(data_length);
+
+ /* buffer should be smaller than both one page or 8K to be accepted by the kernel */
+ if (message_length > MIN(page_size(), 8192UL))
+ return -ENOBUFS;
+
+ /* realloc to fit the new attribute */
+ new_rta = realloc(*rta, message_length);
+ if (!new_rta)
+ return -ENOMEM;
+ *rta = new_rta;
+
+ /* get pointer to the attribute we are about to add */
+ sub_rta = (struct rtattr *) ((uint8_t *) *rta + RTA_ALIGN((*rta)->rta_len));
+
+ rtattr_append_attribute_internal(sub_rta, type, data, data_length);
+
+ /* update rta_len */
+ (*rta)->rta_len = message_length;
+
+ return 0;
+}
+
+int rtattr_read_nexthop(const struct rtnexthop *rtnh, size_t size, int family, OrderedSet **ret) {
+ _cleanup_ordered_set_free_free_ OrderedSet *set = NULL;
+ int r;
+
+ assert(rtnh);
+ assert(IN_SET(family, AF_INET, AF_INET6));
+
+ if (size < sizeof(struct rtnexthop))
+ return -EBADMSG;
+
+ for (; size >= sizeof(struct rtnexthop); ) {
+ _cleanup_free_ MultipathRoute *m = NULL;
+
+ if (NLMSG_ALIGN(rtnh->rtnh_len) > size)
+ return -EBADMSG;
+
+ if (rtnh->rtnh_len < sizeof(struct rtnexthop))
+ return -EBADMSG;
+
+ m = new(MultipathRoute, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (MultipathRoute) {
+ .ifindex = rtnh->rtnh_ifindex,
+ .weight = rtnh->rtnh_hops == 0 ? 0 : rtnh->rtnh_hops + 1,
+ };
+
+ if (rtnh->rtnh_len > sizeof(struct rtnexthop)) {
+ size_t len = rtnh->rtnh_len - sizeof(struct rtnexthop);
+
+ for (struct rtattr *attr = RTNH_DATA(rtnh); RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) {
+ if (attr->rta_type == RTA_GATEWAY) {
+ if (attr->rta_len != RTA_LENGTH(FAMILY_ADDRESS_SIZE(family)))
+ return -EBADMSG;
+
+ m->gateway.family = family;
+ memcpy(&m->gateway.address, RTA_DATA(attr), FAMILY_ADDRESS_SIZE(family));
+ break;
+ } else if (attr->rta_type == RTA_VIA) {
+ uint16_t gw_family;
+
+ if (family != AF_INET)
+ return -EINVAL;
+
+ if (attr->rta_len < RTA_LENGTH(sizeof(uint16_t)))
+ return -EBADMSG;
+
+ gw_family = *(uint16_t *) RTA_DATA(attr);
+
+ if (gw_family != AF_INET6)
+ return -EBADMSG;
+
+ if (attr->rta_len != RTA_LENGTH(FAMILY_ADDRESS_SIZE(gw_family) + sizeof(gw_family)))
+ return -EBADMSG;
+
+ memcpy(&m->gateway, RTA_DATA(attr), FAMILY_ADDRESS_SIZE(gw_family) + sizeof(gw_family));
+ break;
+ }
+ }
+ }
+
+ r = ordered_set_ensure_put(&set, NULL, m);
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(m);
+
+ size -= NLMSG_ALIGN(rtnh->rtnh_len);
+ rtnh = RTNH_NEXT(rtnh);
+ }
+
+ if (ret)
+ *ret = TAKE_PTR(set);
+ return 0;
+}
diff --git a/src/libsystemd/sd-netlink/netlink-util.h b/src/libsystemd/sd-netlink/netlink-util.h
new file mode 100644
index 0000000..d28d07a
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-util.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/rtnetlink.h>
+
+#include "sd-netlink.h"
+
+#include "ether-addr-util.h"
+#include "in-addr-util.h"
+#include "ordered-set.h"
+#include "socket-util.h"
+#include "util.h"
+
+/* See struct rtvia in rtnetlink.h */
+typedef struct RouteVia {
+ uint16_t family;
+ union in_addr_union address;
+} _packed_ RouteVia;
+
+typedef struct MultipathRoute {
+ RouteVia gateway;
+ int ifindex;
+ uint32_t weight;
+} MultipathRoute;
+
+int rtnl_message_new_synthetic_error(sd_netlink *rtnl, int error, uint32_t serial, sd_netlink_message **ret);
+uint32_t rtnl_message_get_serial(sd_netlink_message *m);
+void rtnl_message_seal(sd_netlink_message *m);
+
+static inline bool rtnl_message_type_is_neigh(uint16_t type) {
+ return IN_SET(type, RTM_NEWNEIGH, RTM_GETNEIGH, RTM_DELNEIGH);
+}
+
+static inline bool rtnl_message_type_is_route(uint16_t type) {
+ return IN_SET(type, RTM_NEWROUTE, RTM_GETROUTE, RTM_DELROUTE);
+}
+
+static inline bool rtnl_message_type_is_nexthop(uint16_t type) {
+ return IN_SET(type, RTM_NEWNEXTHOP, RTM_GETNEXTHOP, RTM_DELNEXTHOP);
+}
+
+static inline bool rtnl_message_type_is_link(uint16_t type) {
+ return IN_SET(type,
+ RTM_NEWLINK, RTM_SETLINK, RTM_GETLINK, RTM_DELLINK,
+ RTM_NEWLINKPROP, RTM_DELLINKPROP, RTM_GETLINKPROP);
+}
+
+static inline bool rtnl_message_type_is_addr(uint16_t type) {
+ return IN_SET(type, RTM_NEWADDR, RTM_GETADDR, RTM_DELADDR);
+}
+
+static inline bool rtnl_message_type_is_addrlabel(uint16_t type) {
+ return IN_SET(type, RTM_NEWADDRLABEL, RTM_DELADDRLABEL, RTM_GETADDRLABEL);
+}
+
+static inline bool rtnl_message_type_is_routing_policy_rule(uint16_t type) {
+ return IN_SET(type, RTM_NEWRULE, RTM_DELRULE, RTM_GETRULE);
+}
+
+static inline bool rtnl_message_type_is_qdisc(uint16_t type) {
+ return IN_SET(type, RTM_NEWQDISC, RTM_DELQDISC, RTM_GETQDISC);
+}
+
+static inline bool rtnl_message_type_is_tclass(uint16_t type) {
+ return IN_SET(type, RTM_NEWTCLASS, RTM_DELTCLASS, RTM_GETTCLASS);
+}
+
+static inline bool rtnl_message_type_is_mdb(uint16_t type) {
+ return IN_SET(type, RTM_NEWMDB, RTM_DELMDB, RTM_GETMDB);
+}
+
+int rtnl_set_link_name(sd_netlink **rtnl, int ifindex, const char *name);
+int rtnl_set_link_properties(sd_netlink **rtnl, int ifindex, const char *alias, const struct ether_addr *mac, uint32_t mtu);
+int rtnl_get_link_alternative_names(sd_netlink **rtnl, int ifindex, char ***ret);
+int rtnl_set_link_alternative_names(sd_netlink **rtnl, int ifindex, char * const *alternative_names);
+int rtnl_set_link_alternative_names_by_ifname(sd_netlink **rtnl, const char *ifname, char * const *alternative_names);
+int rtnl_delete_link_alternative_names(sd_netlink **rtnl, int ifindex, char * const *alternative_names);
+int rtnl_resolve_link_alternative_name(sd_netlink **rtnl, const char *name);
+int rtnl_get_link_iftype(sd_netlink **rtnl, int ifindex, unsigned short *ret);
+
+int rtnl_log_parse_error(int r);
+int rtnl_log_create_error(int r);
+
+#define netlink_call_async(nl, ret_slot, message, callback, destroy_callback, userdata) \
+ ({ \
+ int (*_callback_)(sd_netlink *, sd_netlink_message *, typeof(userdata)) = callback; \
+ void (*_destroy_)(typeof(userdata)) = destroy_callback; \
+ sd_netlink_call_async(nl, ret_slot, message, \
+ (sd_netlink_message_handler_t) _callback_, \
+ (sd_netlink_destroy_t) _destroy_, \
+ userdata, 0, __func__); \
+ })
+
+#define netlink_add_match(nl, ret_slot, match, callback, destroy_callback, userdata, description) \
+ ({ \
+ int (*_callback_)(sd_netlink *, sd_netlink_message *, typeof(userdata)) = callback; \
+ void (*_destroy_)(typeof(userdata)) = destroy_callback; \
+ sd_netlink_add_match(nl, ret_slot, match, \
+ (sd_netlink_message_handler_t) _callback_, \
+ (sd_netlink_destroy_t) _destroy_, \
+ userdata, description); \
+ })
+
+int netlink_message_append_hw_addr(sd_netlink_message *m, unsigned short type, const hw_addr_data *data);
+int netlink_message_append_in_addr_union(sd_netlink_message *m, unsigned short type, int family, const union in_addr_union *data);
+int netlink_message_append_sockaddr_union(sd_netlink_message *m, unsigned short type, const union sockaddr_union *data);
+
+int netlink_message_read_hw_addr(sd_netlink_message *m, unsigned short type, hw_addr_data *data);
+int netlink_message_read_in_addr_union(sd_netlink_message *m, unsigned short type, int family, union in_addr_union *data);
+
+void rtattr_append_attribute_internal(struct rtattr *rta, unsigned short type, const void *data, size_t data_length);
+int rtattr_append_attribute(struct rtattr **rta, unsigned short type, const void *data, size_t data_length);
+
+int rtattr_read_nexthop(const struct rtnexthop *rtnh, size_t size, int family, OrderedSet **ret);
diff --git a/src/libsystemd/sd-netlink/rtnl-message.c b/src/libsystemd/sd-netlink/rtnl-message.c
new file mode 100644
index 0000000..4cabbab
--- /dev/null
+++ b/src/libsystemd/sd-netlink/rtnl-message.c
@@ -0,0 +1,1144 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <linux/if_addrlabel.h>
+#include <linux/if_bridge.h>
+#include <linux/nexthop.h>
+#include <stdbool.h>
+#include <unistd.h>
+
+#include "sd-netlink.h"
+
+#include "format-util.h"
+#include "netlink-internal.h"
+#include "netlink-types.h"
+#include "netlink-util.h"
+#include "socket-util.h"
+#include "util.h"
+
+int sd_rtnl_message_route_set_dst_prefixlen(sd_netlink_message *m, unsigned char prefixlen) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ if ((rtm->rtm_family == AF_INET && prefixlen > 32) ||
+ (rtm->rtm_family == AF_INET6 && prefixlen > 128))
+ return -ERANGE;
+
+ rtm->rtm_dst_len = prefixlen;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_set_src_prefixlen(sd_netlink_message *m, unsigned char prefixlen) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ if ((rtm->rtm_family == AF_INET && prefixlen > 32) ||
+ (rtm->rtm_family == AF_INET6 && prefixlen > 128))
+ return -ERANGE;
+
+ rtm->rtm_src_len = prefixlen;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_set_scope(sd_netlink_message *m, unsigned char scope) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ rtm->rtm_scope = scope;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_set_flags(sd_netlink_message *m, unsigned flags) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ rtm->rtm_flags = flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_flags(const sd_netlink_message *m, unsigned *flags) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(flags, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *flags = rtm->rtm_flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_set_table(sd_netlink_message *m, unsigned char table) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ rtm->rtm_table = table;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_family(const sd_netlink_message *m, int *family) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(family, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *family = rtm->rtm_family;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_set_family(sd_netlink_message *m, int family) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ rtm->rtm_family = family;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_type(const sd_netlink_message *m, unsigned char *type) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(type, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *type = rtm->rtm_type;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_set_type(sd_netlink_message *m, unsigned char type) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ rtm->rtm_type = type;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_protocol(const sd_netlink_message *m, unsigned char *protocol) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(protocol, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *protocol = rtm->rtm_protocol;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_scope(const sd_netlink_message *m, unsigned char *scope) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(scope, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *scope = rtm->rtm_scope;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_tos(const sd_netlink_message *m, unsigned char *tos) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(tos, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *tos = rtm->rtm_tos;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_table(const sd_netlink_message *m, unsigned char *table) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(table, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *table = rtm->rtm_table;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_dst_prefixlen(const sd_netlink_message *m, unsigned char *dst_len) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(dst_len, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *dst_len = rtm->rtm_dst_len;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_src_prefixlen(const sd_netlink_message *m, unsigned char *src_len) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(src_len, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *src_len = rtm->rtm_src_len;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_route(sd_netlink *rtnl, sd_netlink_message **ret,
+ uint16_t nlmsg_type, int rtm_family,
+ unsigned char rtm_protocol) {
+ struct rtmsg *rtm;
+ int r;
+
+ assert_return(rtnl_message_type_is_route(nlmsg_type), -EINVAL);
+ assert_return((nlmsg_type == RTM_GETROUTE && rtm_family == AF_UNSPEC) ||
+ IN_SET(rtm_family, AF_INET, AF_INET6), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWROUTE)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_APPEND;
+
+ rtm = NLMSG_DATA((*ret)->hdr);
+
+ rtm->rtm_family = rtm_family;
+ rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+ rtm->rtm_type = RTN_UNICAST;
+ rtm->rtm_table = RT_TABLE_MAIN;
+ rtm->rtm_protocol = rtm_protocol;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_nexthop(sd_netlink *rtnl, sd_netlink_message **ret,
+ uint16_t nhmsg_type, int nh_family,
+ unsigned char nh_protocol) {
+ struct nhmsg *nhm;
+ int r;
+
+ assert_return(rtnl_message_type_is_nexthop(nhmsg_type), -EINVAL);
+ assert_return((nhmsg_type == RTM_GETNEXTHOP && nh_family == AF_UNSPEC) ||
+ IN_SET(nh_family, AF_INET, AF_INET6), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nhmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nhmsg_type == RTM_NEWNEXTHOP)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_APPEND;
+
+ nhm = NLMSG_DATA((*ret)->hdr);
+
+ nhm->nh_family = nh_family;
+ nhm->nh_scope = RT_SCOPE_UNIVERSE;
+ nhm->nh_protocol = nh_protocol;
+
+ return 0;
+}
+
+int sd_rtnl_message_nexthop_set_flags(sd_netlink_message *m, uint8_t flags) {
+ struct nhmsg *nhm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_nexthop(m->hdr->nlmsg_type), -EINVAL);
+
+ nhm = NLMSG_DATA(m->hdr);
+ nhm->nh_flags |= flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_nexthop_set_family(sd_netlink_message *m, uint8_t family) {
+ struct nhmsg *nhm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+
+ nhm = NLMSG_DATA(m->hdr);
+ nhm->nh_family = family;
+
+ return 0;
+}
+
+int sd_rtnl_message_nexthop_get_family(const sd_netlink_message *m, uint8_t *family) {
+ struct nhmsg *nhm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+
+ nhm = NLMSG_DATA(m->hdr);
+ *family = nhm->nh_family ;
+
+ return 0;
+}
+
+int sd_rtnl_message_neigh_set_flags(sd_netlink_message *m, uint8_t flags) {
+ struct ndmsg *ndm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+
+ ndm = NLMSG_DATA(m->hdr);
+ ndm->ndm_flags |= flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_neigh_set_state(sd_netlink_message *m, uint16_t state) {
+ struct ndmsg *ndm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+
+ ndm = NLMSG_DATA(m->hdr);
+ ndm->ndm_state |= state;
+
+ return 0;
+}
+
+int sd_rtnl_message_neigh_get_flags(const sd_netlink_message *m, uint8_t *flags) {
+ struct ndmsg *ndm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+
+ ndm = NLMSG_DATA(m->hdr);
+ *flags = ndm->ndm_flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_neigh_get_state(const sd_netlink_message *m, uint16_t *state) {
+ struct ndmsg *ndm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+
+ ndm = NLMSG_DATA(m->hdr);
+ *state = ndm->ndm_state;
+
+ return 0;
+}
+
+int sd_rtnl_message_neigh_get_family(const sd_netlink_message *m, int *family) {
+ struct ndmsg *ndm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(family, -EINVAL);
+
+ ndm = NLMSG_DATA(m->hdr);
+
+ *family = ndm->ndm_family;
+
+ return 0;
+}
+
+int sd_rtnl_message_neigh_get_ifindex(const sd_netlink_message *m, int *index) {
+ struct ndmsg *ndm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(index, -EINVAL);
+
+ ndm = NLMSG_DATA(m->hdr);
+
+ *index = ndm->ndm_ifindex;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_neigh(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int index, int ndm_family) {
+ struct ndmsg *ndm;
+ int r;
+
+ assert_return(rtnl_message_type_is_neigh(nlmsg_type), -EINVAL);
+ assert_return(IN_SET(ndm_family, AF_UNSPEC, AF_INET, AF_INET6, AF_BRIDGE), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWNEIGH)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_APPEND;
+
+ ndm = NLMSG_DATA((*ret)->hdr);
+
+ ndm->ndm_family = ndm_family;
+ ndm->ndm_ifindex = index;
+
+ return 0;
+}
+
+int sd_rtnl_message_link_set_flags(sd_netlink_message *m, unsigned flags, unsigned change) {
+ struct ifinfomsg *ifi;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(change, -EINVAL);
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ ifi->ifi_flags = flags;
+ ifi->ifi_change = change;
+
+ return 0;
+}
+
+int sd_rtnl_message_link_set_type(sd_netlink_message *m, unsigned type) {
+ struct ifinfomsg *ifi;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ ifi->ifi_type = type;
+
+ return 0;
+}
+
+int sd_rtnl_message_link_set_family(sd_netlink_message *m, unsigned family) {
+ struct ifinfomsg *ifi;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ ifi->ifi_family = family;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_link(sd_netlink *rtnl, sd_netlink_message **ret,
+ uint16_t nlmsg_type, int index) {
+ struct ifinfomsg *ifi;
+ int r;
+
+ assert_return(rtnl_message_type_is_link(nlmsg_type), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWLINK)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
+ else if (nlmsg_type == RTM_NEWLINKPROP)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL | NLM_F_APPEND;
+
+ ifi = NLMSG_DATA((*ret)->hdr);
+
+ ifi->ifi_family = AF_UNSPEC;
+ ifi->ifi_index = index;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_set_prefixlen(sd_netlink_message *m, unsigned char prefixlen) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ if ((ifa->ifa_family == AF_INET && prefixlen > 32) ||
+ (ifa->ifa_family == AF_INET6 && prefixlen > 128))
+ return -ERANGE;
+
+ ifa->ifa_prefixlen = prefixlen;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_set_flags(sd_netlink_message *m, unsigned char flags) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ ifa->ifa_flags = flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_set_scope(sd_netlink_message *m, unsigned char scope) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ ifa->ifa_scope = scope;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_get_family(const sd_netlink_message *m, int *family) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(family, -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ *family = ifa->ifa_family;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_get_prefixlen(const sd_netlink_message *m, unsigned char *prefixlen) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(prefixlen, -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ *prefixlen = ifa->ifa_prefixlen;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_get_scope(const sd_netlink_message *m, unsigned char *scope) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(scope, -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ *scope = ifa->ifa_scope;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_get_flags(const sd_netlink_message *m, unsigned char *flags) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(flags, -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ *flags = ifa->ifa_flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_get_ifindex(const sd_netlink_message *m, int *ifindex) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(ifindex, -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ *ifindex = ifa->ifa_index;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_addr(sd_netlink *rtnl, sd_netlink_message **ret,
+ uint16_t nlmsg_type, int index,
+ int family) {
+ struct ifaddrmsg *ifa;
+ int r;
+
+ assert_return(rtnl_message_type_is_addr(nlmsg_type), -EINVAL);
+ assert_return((nlmsg_type == RTM_GETADDR && index == 0) ||
+ index > 0, -EINVAL);
+ assert_return((nlmsg_type == RTM_GETADDR && family == AF_UNSPEC) ||
+ IN_SET(family, AF_INET, AF_INET6), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_GETADDR)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_DUMP;
+
+ ifa = NLMSG_DATA((*ret)->hdr);
+
+ ifa->ifa_index = index;
+ ifa->ifa_family = family;
+ if (family == AF_INET)
+ ifa->ifa_prefixlen = 32;
+ else if (family == AF_INET6)
+ ifa->ifa_prefixlen = 128;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_addr_update(sd_netlink *rtnl, sd_netlink_message **ret,
+ int index, int family) {
+ int r;
+
+ r = sd_rtnl_message_new_addr(rtnl, ret, RTM_NEWADDR, index, family);
+ if (r < 0)
+ return r;
+
+ (*ret)->hdr->nlmsg_flags |= NLM_F_REPLACE;
+
+ return 0;
+}
+
+int sd_rtnl_message_link_get_ifindex(const sd_netlink_message *m, int *ifindex) {
+ struct ifinfomsg *ifi;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(ifindex, -EINVAL);
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ *ifindex = ifi->ifi_index;
+
+ return 0;
+}
+
+int sd_rtnl_message_link_get_flags(const sd_netlink_message *m, unsigned *flags) {
+ struct ifinfomsg *ifi;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(flags, -EINVAL);
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ *flags = ifi->ifi_flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_link_get_type(const sd_netlink_message *m, unsigned short *type) {
+ struct ifinfomsg *ifi;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(type, -EINVAL);
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ *type = ifi->ifi_type;
+
+ return 0;
+}
+
+int sd_rtnl_message_get_family(const sd_netlink_message *m, int *family) {
+ assert_return(m, -EINVAL);
+ assert_return(family, -EINVAL);
+
+ assert(m->hdr);
+
+ if (rtnl_message_type_is_link(m->hdr->nlmsg_type)) {
+ struct ifinfomsg *ifi;
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ *family = ifi->ifi_family;
+
+ return 0;
+ } else if (rtnl_message_type_is_route(m->hdr->nlmsg_type)) {
+ struct rtmsg *rtm;
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *family = rtm->rtm_family;
+
+ return 0;
+ } else if (rtnl_message_type_is_neigh(m->hdr->nlmsg_type)) {
+ struct ndmsg *ndm;
+
+ ndm = NLMSG_DATA(m->hdr);
+
+ *family = ndm->ndm_family;
+
+ return 0;
+ } else if (rtnl_message_type_is_addr(m->hdr->nlmsg_type)) {
+ struct ifaddrmsg *ifa;
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ *family = ifa->ifa_family;
+
+ return 0;
+ } else if (rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type)) {
+ struct rtmsg *rtm;
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *family = rtm->rtm_family;
+
+ return 0;
+ } else if (rtnl_message_type_is_nexthop(m->hdr->nlmsg_type)) {
+ struct nhmsg *nhm;
+
+ nhm = NLMSG_DATA(m->hdr);
+
+ *family = nhm->nh_family;
+
+ return 0;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+int sd_rtnl_message_new_addrlabel(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int ifindex, int ifal_family) {
+ struct ifaddrlblmsg *addrlabel;
+ int r;
+
+ assert_return(rtnl_message_type_is_addrlabel(nlmsg_type), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWADDRLABEL)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
+
+ addrlabel = NLMSG_DATA((*ret)->hdr);
+
+ addrlabel->ifal_family = ifal_family;
+ addrlabel->ifal_index = ifindex;
+
+ return 0;
+}
+
+int sd_rtnl_message_addrlabel_set_prefixlen(sd_netlink_message *m, unsigned char prefixlen) {
+ struct ifaddrlblmsg *addrlabel;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addrlabel(m->hdr->nlmsg_type), -EINVAL);
+
+ addrlabel = NLMSG_DATA(m->hdr);
+
+ if (prefixlen > 128)
+ return -ERANGE;
+
+ addrlabel->ifal_prefixlen = prefixlen;
+
+ return 0;
+}
+
+int sd_rtnl_message_addrlabel_get_prefixlen(const sd_netlink_message *m, unsigned char *prefixlen) {
+ struct ifaddrlblmsg *addrlabel;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addrlabel(m->hdr->nlmsg_type), -EINVAL);
+
+ addrlabel = NLMSG_DATA(m->hdr);
+
+ *prefixlen = addrlabel->ifal_prefixlen;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_routing_policy_rule(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int ifal_family) {
+ struct rtmsg *rtm;
+ int r;
+
+ assert_return(rtnl_message_type_is_routing_policy_rule(nlmsg_type), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWRULE)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
+
+ rtm = NLMSG_DATA((*ret)->hdr);
+ rtm->rtm_family = ifal_family;
+ rtm->rtm_protocol = RTPROT_BOOT;
+ rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+ rtm->rtm_type = RTN_UNICAST;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_set_tos(sd_netlink_message *m, unsigned char tos) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ routing_policy_rule->rtm_tos = tos;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_get_tos(const sd_netlink_message *m, unsigned char *tos) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ *tos = routing_policy_rule->rtm_tos;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_set_table(sd_netlink_message *m, unsigned char table) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ routing_policy_rule->rtm_table = table;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_get_table(const sd_netlink_message *m, unsigned char *table) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ *table = routing_policy_rule->rtm_table;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_set_flags(sd_netlink_message *m, unsigned flags) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+ routing_policy_rule->rtm_flags |= flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_get_flags(const sd_netlink_message *m, unsigned *flags) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+ *flags = routing_policy_rule->rtm_flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_set_rtm_type(sd_netlink_message *m, unsigned char type) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ routing_policy_rule->rtm_type = type;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_get_rtm_type(const sd_netlink_message *m, unsigned char *type) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ *type = routing_policy_rule->rtm_type;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_set_rtm_dst_prefixlen(sd_netlink_message *m, unsigned char len) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ routing_policy_rule->rtm_dst_len = len;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_get_rtm_dst_prefixlen(const sd_netlink_message *m, unsigned char *len) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ *len = routing_policy_rule->rtm_dst_len;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_set_rtm_src_prefixlen(sd_netlink_message *m, unsigned char len) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ routing_policy_rule->rtm_src_len = len;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_get_rtm_src_prefixlen(const sd_netlink_message *m, unsigned char *len) {
+ struct rtmsg *routing_policy_rule;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ routing_policy_rule = NLMSG_DATA(m->hdr);
+
+ *len = routing_policy_rule->rtm_src_len;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_qdisc(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int tcm_family, int tcm_ifindex) {
+ struct tcmsg *tcm;
+ int r;
+
+ assert_return(rtnl_message_type_is_qdisc(nlmsg_type), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWQDISC)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
+
+ tcm = NLMSG_DATA((*ret)->hdr);
+ tcm->tcm_family = tcm_family;
+ tcm->tcm_ifindex = tcm_ifindex;
+
+ return 0;
+}
+
+int sd_rtnl_message_set_qdisc_parent(sd_netlink_message *m, uint32_t parent) {
+ struct tcmsg *tcm;
+
+ assert_return(rtnl_message_type_is_qdisc(m->hdr->nlmsg_type), -EINVAL);
+
+ tcm = NLMSG_DATA(m->hdr);
+ tcm->tcm_parent = parent;
+
+ return 0;
+}
+
+int sd_rtnl_message_set_qdisc_handle(sd_netlink_message *m, uint32_t handle) {
+ struct tcmsg *tcm;
+
+ assert_return(rtnl_message_type_is_qdisc(m->hdr->nlmsg_type), -EINVAL);
+
+ tcm = NLMSG_DATA(m->hdr);
+ tcm->tcm_handle = handle;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_tclass(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int tcm_family, int tcm_ifindex) {
+ struct tcmsg *tcm;
+ int r;
+
+ assert_return(rtnl_message_type_is_tclass(nlmsg_type), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWTCLASS)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
+
+ tcm = NLMSG_DATA((*ret)->hdr);
+ tcm->tcm_family = tcm_family;
+ tcm->tcm_ifindex = tcm_ifindex;
+
+ return 0;
+}
+
+int sd_rtnl_message_set_tclass_parent(sd_netlink_message *m, uint32_t parent) {
+ struct tcmsg *tcm;
+
+ assert_return(rtnl_message_type_is_tclass(m->hdr->nlmsg_type), -EINVAL);
+
+ tcm = NLMSG_DATA(m->hdr);
+ tcm->tcm_parent = parent;
+
+ return 0;
+}
+
+int sd_rtnl_message_set_tclass_handle(sd_netlink_message *m, uint32_t handle) {
+ struct tcmsg *tcm;
+
+ assert_return(rtnl_message_type_is_tclass(m->hdr->nlmsg_type), -EINVAL);
+
+ tcm = NLMSG_DATA(m->hdr);
+ tcm->tcm_handle = handle;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_mdb(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int mdb_ifindex) {
+ struct br_port_msg *bpm;
+ int r;
+
+ assert_return(rtnl_message_type_is_mdb(nlmsg_type), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWMDB)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
+
+ bpm = NLMSG_DATA((*ret)->hdr);
+ bpm->family = AF_BRIDGE;
+ bpm->ifindex = mdb_ifindex;
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-netlink/sd-netlink.c b/src/libsystemd/sd-netlink/sd-netlink.c
new file mode 100644
index 0000000..7801101
--- /dev/null
+++ b/src/libsystemd/sd-netlink/sd-netlink.c
@@ -0,0 +1,921 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <poll.h>
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "hashmap.h"
+#include "io-util.h"
+#include "macro.h"
+#include "netlink-internal.h"
+#include "netlink-slot.h"
+#include "netlink-util.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "util.h"
+
+static int sd_netlink_new(sd_netlink **ret) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+
+ assert_return(ret, -EINVAL);
+
+ rtnl = new(sd_netlink, 1);
+ if (!rtnl)
+ return -ENOMEM;
+
+ *rtnl = (sd_netlink) {
+ .n_ref = 1,
+ .fd = -1,
+ .sockaddr.nl.nl_family = AF_NETLINK,
+ .original_pid = getpid_cached(),
+ .protocol = -1,
+
+ /* Change notification responses have sequence 0, so we must
+ * start our request sequence numbers at 1, or we may confuse our
+ * responses with notifications from the kernel */
+ .serial = 1,
+
+ };
+
+ /* We guarantee that the read buffer has at least space for
+ * a message header */
+ if (!greedy_realloc((void**)&rtnl->rbuffer, &rtnl->rbuffer_allocated,
+ sizeof(struct nlmsghdr), sizeof(uint8_t)))
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(rtnl);
+
+ return 0;
+}
+
+int sd_netlink_new_from_netlink(sd_netlink **ret, int fd) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ socklen_t addrlen;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ r = sd_netlink_new(&rtnl);
+ if (r < 0)
+ return r;
+
+ addrlen = sizeof(rtnl->sockaddr);
+
+ r = getsockname(fd, &rtnl->sockaddr.sa, &addrlen);
+ if (r < 0)
+ return -errno;
+
+ if (rtnl->sockaddr.nl.nl_family != AF_NETLINK)
+ return -EINVAL;
+
+ rtnl->fd = fd;
+
+ *ret = TAKE_PTR(rtnl);
+
+ return 0;
+}
+
+static bool rtnl_pid_changed(const sd_netlink *rtnl) {
+ assert(rtnl);
+
+ /* We don't support people creating an rtnl connection and
+ * keeping it around over a fork(). Let's complain. */
+
+ return rtnl->original_pid != getpid_cached();
+}
+
+int sd_netlink_open_fd(sd_netlink **ret, int fd) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ int r;
+ int protocol;
+ socklen_t l;
+
+ assert_return(ret, -EINVAL);
+ assert_return(fd >= 0, -EBADF);
+
+ r = sd_netlink_new(&rtnl);
+ if (r < 0)
+ return r;
+
+ l = sizeof(protocol);
+ r = getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &l);
+ if (r < 0)
+ return r;
+
+ rtnl->fd = fd;
+ rtnl->protocol = protocol;
+
+ r = setsockopt_int(fd, SOL_NETLINK, NETLINK_EXT_ACK, 1);
+ if (r < 0)
+ log_debug_errno(r, "sd-netlink: Failed to enable NETLINK_EXT_ACK option, ignoring: %m");
+
+ r = socket_bind(rtnl);
+ if (r < 0) {
+ rtnl->fd = -1; /* on failure, the caller remains owner of the fd, hence don't close it here */
+ rtnl->protocol = -1;
+ return r;
+ }
+
+ *ret = TAKE_PTR(rtnl);
+
+ return 0;
+}
+
+int netlink_open_family(sd_netlink **ret, int family) {
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ fd = socket_open(family);
+ if (fd < 0)
+ return fd;
+
+ r = sd_netlink_open_fd(ret, fd);
+ if (r < 0)
+ return r;
+ TAKE_FD(fd);
+
+ return 0;
+}
+
+int sd_netlink_open(sd_netlink **ret) {
+ return netlink_open_family(ret, NETLINK_ROUTE);
+}
+
+int sd_netlink_inc_rcvbuf(sd_netlink *rtnl, size_t size) {
+ assert_return(rtnl, -EINVAL);
+ assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
+
+ return fd_inc_rcvbuf(rtnl->fd, size);
+}
+
+static sd_netlink *netlink_free(sd_netlink *rtnl) {
+ sd_netlink_slot *s;
+ unsigned i;
+
+ assert(rtnl);
+
+ for (i = 0; i < rtnl->rqueue_size; i++)
+ sd_netlink_message_unref(rtnl->rqueue[i]);
+ free(rtnl->rqueue);
+
+ for (i = 0; i < rtnl->rqueue_partial_size; i++)
+ sd_netlink_message_unref(rtnl->rqueue_partial[i]);
+ free(rtnl->rqueue_partial);
+
+ free(rtnl->rbuffer);
+
+ while ((s = rtnl->slots)) {
+ assert(s->floating);
+ netlink_slot_disconnect(s, true);
+ }
+ hashmap_free(rtnl->reply_callbacks);
+ prioq_free(rtnl->reply_callbacks_prioq);
+
+ sd_event_source_unref(rtnl->io_event_source);
+ sd_event_source_unref(rtnl->time_event_source);
+ sd_event_unref(rtnl->event);
+
+ hashmap_free(rtnl->broadcast_group_refs);
+
+ hashmap_free(rtnl->genl_family_to_nlmsg_type);
+ hashmap_free(rtnl->nlmsg_type_to_genl_family);
+
+ safe_close(rtnl->fd);
+ return mfree(rtnl);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_netlink, sd_netlink, netlink_free);
+
+static void rtnl_seal_message(sd_netlink *rtnl, sd_netlink_message *m) {
+ assert(rtnl);
+ assert(!rtnl_pid_changed(rtnl));
+ assert(m);
+ assert(m->hdr);
+
+ /* don't use seq == 0, as that is used for broadcasts, so we
+ would get confused by replies to such messages */
+ m->hdr->nlmsg_seq = rtnl->serial++ ? : rtnl->serial++;
+
+ rtnl_message_seal(m);
+
+ return;
+}
+
+int sd_netlink_send(sd_netlink *nl,
+ sd_netlink_message *message,
+ uint32_t *serial) {
+ int r;
+
+ assert_return(nl, -EINVAL);
+ assert_return(!rtnl_pid_changed(nl), -ECHILD);
+ assert_return(message, -EINVAL);
+ assert_return(!message->sealed, -EPERM);
+
+ rtnl_seal_message(nl, message);
+
+ r = socket_write_message(nl, message);
+ if (r < 0)
+ return r;
+
+ if (serial)
+ *serial = rtnl_message_get_serial(message);
+
+ return 1;
+}
+
+int rtnl_rqueue_make_room(sd_netlink *rtnl) {
+ assert(rtnl);
+
+ if (rtnl->rqueue_size >= RTNL_RQUEUE_MAX)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOBUFS),
+ "rtnl: exhausted the read queue size (%d)",
+ RTNL_RQUEUE_MAX);
+
+ if (!GREEDY_REALLOC(rtnl->rqueue, rtnl->rqueue_allocated, rtnl->rqueue_size + 1))
+ return -ENOMEM;
+
+ return 0;
+}
+
+int rtnl_rqueue_partial_make_room(sd_netlink *rtnl) {
+ assert(rtnl);
+
+ if (rtnl->rqueue_partial_size >= RTNL_RQUEUE_MAX)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOBUFS),
+ "rtnl: exhausted the partial read queue size (%d)",
+ RTNL_RQUEUE_MAX);
+
+ if (!GREEDY_REALLOC(rtnl->rqueue_partial, rtnl->rqueue_partial_allocated,
+ rtnl->rqueue_partial_size + 1))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int dispatch_rqueue(sd_netlink *rtnl, sd_netlink_message **message) {
+ int r;
+
+ assert(rtnl);
+ assert(message);
+
+ if (rtnl->rqueue_size <= 0) {
+ /* Try to read a new message */
+ r = socket_read_message(rtnl);
+ if (r == -ENOBUFS) { /* FIXME: ignore buffer overruns for now */
+ log_debug_errno(r, "Got ENOBUFS from netlink socket, ignoring.");
+ return 1;
+ }
+ if (r <= 0)
+ return r;
+ }
+
+ /* Dispatch a queued message */
+ *message = rtnl->rqueue[0];
+ rtnl->rqueue_size--;
+ memmove(rtnl->rqueue, rtnl->rqueue + 1, sizeof(sd_netlink_message*) * rtnl->rqueue_size);
+
+ return 1;
+}
+
+static int process_timeout(sd_netlink *rtnl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ struct reply_callback *c;
+ sd_netlink_slot *slot;
+ usec_t n;
+ int r;
+
+ assert(rtnl);
+
+ c = prioq_peek(rtnl->reply_callbacks_prioq);
+ if (!c)
+ return 0;
+
+ n = now(CLOCK_MONOTONIC);
+ if (c->timeout > n)
+ return 0;
+
+ r = rtnl_message_new_synthetic_error(rtnl, -ETIMEDOUT, c->serial, &m);
+ if (r < 0)
+ return r;
+
+ assert_se(prioq_pop(rtnl->reply_callbacks_prioq) == c);
+ c->timeout = 0;
+ hashmap_remove(rtnl->reply_callbacks, &c->serial);
+
+ slot = container_of(c, sd_netlink_slot, reply_callback);
+
+ r = c->callback(rtnl, m, slot->userdata);
+ if (r < 0)
+ log_debug_errno(r, "sd-netlink: timedout callback %s%s%sfailed: %m",
+ slot->description ? "'" : "",
+ strempty(slot->description),
+ slot->description ? "' " : "");
+
+ if (slot->floating)
+ netlink_slot_disconnect(slot, true);
+
+ return 1;
+}
+
+static int process_reply(sd_netlink *rtnl, sd_netlink_message *m) {
+ struct reply_callback *c;
+ sd_netlink_slot *slot;
+ uint64_t serial;
+ uint16_t type;
+ int r;
+
+ assert(rtnl);
+ assert(m);
+
+ serial = rtnl_message_get_serial(m);
+ c = hashmap_remove(rtnl->reply_callbacks, &serial);
+ if (!c)
+ return 0;
+
+ if (c->timeout != 0) {
+ prioq_remove(rtnl->reply_callbacks_prioq, c, &c->prioq_idx);
+ c->timeout = 0;
+ }
+
+ r = sd_netlink_message_get_type(m, &type);
+ if (r < 0)
+ return r;
+
+ if (type == NLMSG_DONE)
+ m = NULL;
+
+ slot = container_of(c, sd_netlink_slot, reply_callback);
+
+ r = c->callback(rtnl, m, slot->userdata);
+ if (r < 0)
+ log_debug_errno(r, "sd-netlink: reply callback %s%s%sfailed: %m",
+ slot->description ? "'" : "",
+ strempty(slot->description),
+ slot->description ? "' " : "");
+
+ if (slot->floating)
+ netlink_slot_disconnect(slot, true);
+
+ return 1;
+}
+
+static int process_match(sd_netlink *rtnl, sd_netlink_message *m) {
+ struct match_callback *c;
+ sd_netlink_slot *slot;
+ uint16_t type;
+ int r;
+
+ assert(rtnl);
+ assert(m);
+
+ r = sd_netlink_message_get_type(m, &type);
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(match_callbacks, c, rtnl->match_callbacks) {
+ if (type == c->type) {
+ slot = container_of(c, sd_netlink_slot, match_callback);
+
+ r = c->callback(rtnl, m, slot->userdata);
+ if (r != 0) {
+ if (r < 0)
+ log_debug_errno(r, "sd-netlink: match callback %s%s%sfailed: %m",
+ slot->description ? "'" : "",
+ strempty(slot->description),
+ slot->description ? "' " : "");
+
+ break;
+ }
+ }
+ }
+
+ return 1;
+}
+
+static int process_running(sd_netlink *rtnl, sd_netlink_message **ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ assert(rtnl);
+
+ r = process_timeout(rtnl);
+ if (r != 0)
+ goto null_message;
+
+ r = dispatch_rqueue(rtnl, &m);
+ if (r < 0)
+ return r;
+ if (!m)
+ goto null_message;
+
+ if (sd_netlink_message_is_broadcast(m)) {
+ r = process_match(rtnl, m);
+ if (r != 0)
+ goto null_message;
+ } else {
+ r = process_reply(rtnl, m);
+ if (r != 0)
+ goto null_message;
+ }
+
+ if (ret) {
+ *ret = TAKE_PTR(m);
+
+ return 1;
+ }
+
+ return 1;
+
+null_message:
+ if (r >= 0 && ret)
+ *ret = NULL;
+
+ return r;
+}
+
+int sd_netlink_process(sd_netlink *rtnl, sd_netlink_message **ret) {
+ NETLINK_DONT_DESTROY(rtnl);
+ int r;
+
+ assert_return(rtnl, -EINVAL);
+ assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
+ assert_return(!rtnl->processing, -EBUSY);
+
+ rtnl->processing = true;
+ r = process_running(rtnl, ret);
+ rtnl->processing = false;
+
+ return r;
+}
+
+static usec_t calc_elapse(uint64_t usec) {
+ if (usec == (uint64_t) -1)
+ return 0;
+
+ if (usec == 0)
+ usec = RTNL_DEFAULT_TIMEOUT;
+
+ return now(CLOCK_MONOTONIC) + usec;
+}
+
+static int rtnl_poll(sd_netlink *rtnl, bool need_more, uint64_t timeout_usec) {
+ usec_t m = USEC_INFINITY;
+ int r, e;
+
+ assert(rtnl);
+
+ e = sd_netlink_get_events(rtnl);
+ if (e < 0)
+ return e;
+
+ if (need_more)
+ /* Caller wants more data, and doesn't care about
+ * what's been read or any other timeouts. */
+ e |= POLLIN;
+ else {
+ usec_t until;
+ /* Caller wants to process if there is something to
+ * process, but doesn't care otherwise */
+
+ r = sd_netlink_get_timeout(rtnl, &until);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ usec_t nw;
+ nw = now(CLOCK_MONOTONIC);
+ m = until > nw ? until - nw : 0;
+ }
+ }
+
+ if (timeout_usec != (uint64_t) -1 && (m == USEC_INFINITY || timeout_usec < m))
+ m = timeout_usec;
+
+ r = fd_wait_for_event(rtnl->fd, e, m);
+ if (r <= 0)
+ return r;
+
+ return 1;
+}
+
+int sd_netlink_wait(sd_netlink *nl, uint64_t timeout_usec) {
+ assert_return(nl, -EINVAL);
+ assert_return(!rtnl_pid_changed(nl), -ECHILD);
+
+ if (nl->rqueue_size > 0)
+ return 0;
+
+ return rtnl_poll(nl, false, timeout_usec);
+}
+
+static int timeout_compare(const void *a, const void *b) {
+ const struct reply_callback *x = a, *y = b;
+
+ if (x->timeout != 0 && y->timeout == 0)
+ return -1;
+
+ if (x->timeout == 0 && y->timeout != 0)
+ return 1;
+
+ return CMP(x->timeout, y->timeout);
+}
+
+int sd_netlink_call_async(
+ sd_netlink *nl,
+ sd_netlink_slot **ret_slot,
+ sd_netlink_message *m,
+ sd_netlink_message_handler_t callback,
+ sd_netlink_destroy_t destroy_callback,
+ void *userdata,
+ uint64_t usec,
+ const char *description) {
+ _cleanup_free_ sd_netlink_slot *slot = NULL;
+ uint32_t s;
+ int r, k;
+
+ assert_return(nl, -EINVAL);
+ assert_return(m, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(!rtnl_pid_changed(nl), -ECHILD);
+
+ r = hashmap_ensure_allocated(&nl->reply_callbacks, &uint64_hash_ops);
+ if (r < 0)
+ return r;
+
+ if (usec != (uint64_t) -1) {
+ r = prioq_ensure_allocated(&nl->reply_callbacks_prioq, timeout_compare);
+ if (r < 0)
+ return r;
+ }
+
+ r = netlink_slot_allocate(nl, !ret_slot, NETLINK_REPLY_CALLBACK, sizeof(struct reply_callback), userdata, description, &slot);
+ if (r < 0)
+ return r;
+
+ slot->reply_callback.callback = callback;
+ slot->reply_callback.timeout = calc_elapse(usec);
+
+ k = sd_netlink_send(nl, m, &s);
+ if (k < 0)
+ return k;
+
+ slot->reply_callback.serial = s;
+
+ r = hashmap_put(nl->reply_callbacks, &slot->reply_callback.serial, &slot->reply_callback);
+ if (r < 0)
+ return r;
+
+ if (slot->reply_callback.timeout != 0) {
+ r = prioq_put(nl->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx);
+ if (r < 0) {
+ (void) hashmap_remove(nl->reply_callbacks, &slot->reply_callback.serial);
+ return r;
+ }
+ }
+
+ /* Set this at last. Otherwise, some failures in above call the destroy callback but some do not. */
+ slot->destroy_callback = destroy_callback;
+
+ if (ret_slot)
+ *ret_slot = slot;
+
+ TAKE_PTR(slot);
+
+ return k;
+}
+
+int sd_netlink_call(sd_netlink *rtnl,
+ sd_netlink_message *message,
+ uint64_t usec,
+ sd_netlink_message **ret) {
+ usec_t timeout;
+ uint32_t serial;
+ int r;
+
+ assert_return(rtnl, -EINVAL);
+ assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
+ assert_return(message, -EINVAL);
+
+ r = sd_netlink_send(rtnl, message, &serial);
+ if (r < 0)
+ return r;
+
+ timeout = calc_elapse(usec);
+
+ for (;;) {
+ usec_t left;
+ unsigned i;
+
+ for (i = 0; i < rtnl->rqueue_size; i++) {
+ uint32_t received_serial;
+
+ received_serial = rtnl_message_get_serial(rtnl->rqueue[i]);
+
+ if (received_serial == serial) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *incoming = NULL;
+ uint16_t type;
+
+ incoming = rtnl->rqueue[i];
+
+ /* found a match, remove from rqueue and return it */
+ memmove(rtnl->rqueue + i,rtnl->rqueue + i + 1,
+ sizeof(sd_netlink_message*) * (rtnl->rqueue_size - i - 1));
+ rtnl->rqueue_size--;
+
+ r = sd_netlink_message_get_errno(incoming);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_get_type(incoming, &type);
+ if (r < 0)
+ return r;
+
+ if (type == NLMSG_DONE) {
+ *ret = NULL;
+ return 0;
+ }
+
+ if (ret)
+ *ret = TAKE_PTR(incoming);
+
+ return 1;
+ }
+ }
+
+ r = socket_read_message(rtnl);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ /* received message, so try to process straight away */
+ continue;
+
+ if (timeout > 0) {
+ usec_t n;
+
+ n = now(CLOCK_MONOTONIC);
+ if (n >= timeout)
+ return -ETIMEDOUT;
+
+ left = timeout - n;
+ } else
+ left = (uint64_t) -1;
+
+ r = rtnl_poll(rtnl, true, left);
+ if (r < 0)
+ return r;
+ else if (r == 0)
+ return -ETIMEDOUT;
+ }
+}
+
+int sd_netlink_get_events(const sd_netlink *rtnl) {
+ assert_return(rtnl, -EINVAL);
+ assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
+
+ if (rtnl->rqueue_size == 0)
+ return POLLIN;
+ else
+ return 0;
+}
+
+int sd_netlink_get_timeout(const sd_netlink *rtnl, uint64_t *timeout_usec) {
+ struct reply_callback *c;
+
+ assert_return(rtnl, -EINVAL);
+ assert_return(timeout_usec, -EINVAL);
+ assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
+
+ if (rtnl->rqueue_size > 0) {
+ *timeout_usec = 0;
+ return 1;
+ }
+
+ c = prioq_peek(rtnl->reply_callbacks_prioq);
+ if (!c) {
+ *timeout_usec = (uint64_t) -1;
+ return 0;
+ }
+
+ *timeout_usec = c->timeout;
+
+ return 1;
+}
+
+static int io_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ sd_netlink *rtnl = userdata;
+ int r;
+
+ assert(rtnl);
+
+ r = sd_netlink_process(rtnl, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int time_callback(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_netlink *rtnl = userdata;
+ int r;
+
+ assert(rtnl);
+
+ r = sd_netlink_process(rtnl, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int prepare_callback(sd_event_source *s, void *userdata) {
+ sd_netlink *rtnl = userdata;
+ int r, e;
+ usec_t until;
+
+ assert(s);
+ assert(rtnl);
+
+ e = sd_netlink_get_events(rtnl);
+ if (e < 0)
+ return e;
+
+ r = sd_event_source_set_io_events(rtnl->io_event_source, e);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_get_timeout(rtnl, &until);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ int j;
+
+ j = sd_event_source_set_time(rtnl->time_event_source, until);
+ if (j < 0)
+ return j;
+ }
+
+ r = sd_event_source_set_enabled(rtnl->time_event_source, r > 0);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int sd_netlink_attach_event(sd_netlink *rtnl, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(rtnl, -EINVAL);
+ assert_return(!rtnl->event, -EBUSY);
+
+ assert(!rtnl->io_event_source);
+ assert(!rtnl->time_event_source);
+
+ if (event)
+ rtnl->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&rtnl->event);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_add_io(rtnl->event, &rtnl->io_event_source, rtnl->fd, 0, io_callback, rtnl);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(rtnl->io_event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_description(rtnl->io_event_source, "rtnl-receive-message");
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_prepare(rtnl->io_event_source, prepare_callback);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_add_time(rtnl->event, &rtnl->time_event_source, CLOCK_MONOTONIC, 0, 0, time_callback, rtnl);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(rtnl->time_event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_description(rtnl->time_event_source, "rtnl-timer");
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ sd_netlink_detach_event(rtnl);
+ return r;
+}
+
+int sd_netlink_detach_event(sd_netlink *rtnl) {
+ assert_return(rtnl, -EINVAL);
+ assert_return(rtnl->event, -ENXIO);
+
+ rtnl->io_event_source = sd_event_source_unref(rtnl->io_event_source);
+
+ rtnl->time_event_source = sd_event_source_unref(rtnl->time_event_source);
+
+ rtnl->event = sd_event_unref(rtnl->event);
+
+ return 0;
+}
+
+int sd_netlink_add_match(
+ sd_netlink *rtnl,
+ sd_netlink_slot **ret_slot,
+ uint16_t type,
+ sd_netlink_message_handler_t callback,
+ sd_netlink_destroy_t destroy_callback,
+ void *userdata,
+ const char *description) {
+ _cleanup_free_ sd_netlink_slot *slot = NULL;
+ int r;
+
+ assert_return(rtnl, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
+
+ r = netlink_slot_allocate(rtnl, !ret_slot, NETLINK_MATCH_CALLBACK, sizeof(struct match_callback), userdata, description, &slot);
+ if (r < 0)
+ return r;
+
+ slot->match_callback.callback = callback;
+ slot->match_callback.type = type;
+
+ switch (type) {
+ case RTM_NEWLINK:
+ case RTM_DELLINK:
+ r = socket_broadcast_group_ref(rtnl, RTNLGRP_LINK);
+ if (r < 0)
+ return r;
+
+ break;
+ case RTM_NEWADDR:
+ case RTM_DELADDR:
+ r = socket_broadcast_group_ref(rtnl, RTNLGRP_IPV4_IFADDR);
+ if (r < 0)
+ return r;
+
+ r = socket_broadcast_group_ref(rtnl, RTNLGRP_IPV6_IFADDR);
+ if (r < 0)
+ return r;
+
+ break;
+ case RTM_NEWNEIGH:
+ case RTM_DELNEIGH:
+ r = socket_broadcast_group_ref(rtnl, RTNLGRP_NEIGH);
+ if (r < 0)
+ return r;
+
+ break;
+ case RTM_NEWROUTE:
+ case RTM_DELROUTE:
+ r = socket_broadcast_group_ref(rtnl, RTNLGRP_IPV4_ROUTE);
+ if (r < 0)
+ return r;
+
+ r = socket_broadcast_group_ref(rtnl, RTNLGRP_IPV6_ROUTE);
+ if (r < 0)
+ return r;
+ break;
+ case RTM_NEWRULE:
+ case RTM_DELRULE:
+ r = socket_broadcast_group_ref(rtnl, RTNLGRP_IPV4_RULE);
+ if (r < 0)
+ return r;
+
+ r = socket_broadcast_group_ref(rtnl, RTNLGRP_IPV6_RULE);
+ if (r < 0)
+ return r;
+ break;
+ case RTM_NEWNEXTHOP:
+ case RTM_DELNEXTHOP:
+ r = socket_broadcast_group_ref(rtnl, RTNLGRP_NEXTHOP);
+ if (r < 0)
+ return r;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ LIST_PREPEND(match_callbacks, rtnl->match_callbacks, &slot->match_callback);
+
+ /* Set this at last. Otherwise, some failures in above call the destroy callback but some do not. */
+ slot->destroy_callback = destroy_callback;
+
+ if (ret_slot)
+ *ret_slot = slot;
+
+ TAKE_PTR(slot);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-netlink/test-netlink.c b/src/libsystemd/sd-netlink/test-netlink.c
new file mode 100644
index 0000000..41787c5
--- /dev/null
+++ b/src/libsystemd/sd-netlink/test-netlink.c
@@ -0,0 +1,642 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+#include <netinet/ether.h>
+#include <linux/genetlink.h>
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "ether-addr-util.h"
+#include "macro.h"
+#include "netlink-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+static void test_message_link_bridge(sd_netlink *rtnl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL;
+ uint32_t cost;
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &message, RTM_NEWLINK, 1) >= 0);
+ assert_se(sd_rtnl_message_link_set_family(message, AF_BRIDGE) >= 0);
+ assert_se(sd_netlink_message_open_container(message, IFLA_PROTINFO) >= 0);
+ assert_se(sd_netlink_message_append_u32(message, IFLA_BRPORT_COST, 10) >= 0);
+ assert_se(sd_netlink_message_close_container(message) >= 0);
+
+ assert_se(sd_netlink_message_rewind(message, NULL) >= 0);
+
+ assert_se(sd_netlink_message_enter_container(message, IFLA_PROTINFO) >= 0);
+ assert_se(sd_netlink_message_read_u32(message, IFLA_BRPORT_COST, &cost) >= 0);
+ assert_se(cost == 10);
+ assert_se(sd_netlink_message_exit_container(message) >= 0);
+}
+
+static void test_link_configure(sd_netlink *rtnl, int ifindex) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL;
+ const char *mac = "98:fe:94:3f:c6:18", *name = "test";
+ char buffer[ETHER_ADDR_TO_STRING_MAX];
+ uint32_t mtu = 1450, mtu_out;
+ const char *name_out;
+ struct ether_addr mac_out;
+
+ /* we'd really like to test NEWLINK, but let's not mess with the running kernel */
+ assert_se(sd_rtnl_message_new_link(rtnl, &message, RTM_GETLINK, ifindex) >= 0);
+ assert_se(sd_netlink_message_append_string(message, IFLA_IFNAME, name) >= 0);
+ assert_se(sd_netlink_message_append_ether_addr(message, IFLA_ADDRESS, ether_aton(mac)) >= 0);
+ assert_se(sd_netlink_message_append_u32(message, IFLA_MTU, mtu) >= 0);
+
+ assert_se(sd_netlink_call(rtnl, message, 0, NULL) == 1);
+ assert_se(sd_netlink_message_rewind(message, NULL) >= 0);
+
+ assert_se(sd_netlink_message_read_string(message, IFLA_IFNAME, &name_out) >= 0);
+ assert_se(streq(name, name_out));
+
+ assert_se(sd_netlink_message_read_ether_addr(message, IFLA_ADDRESS, &mac_out) >= 0);
+ assert_se(streq(mac, ether_addr_to_string(&mac_out, buffer)));
+
+ assert_se(sd_netlink_message_read_u32(message, IFLA_MTU, &mtu_out) >= 0);
+ assert_se(mtu == mtu_out);
+}
+
+static void test_link_get(sd_netlink *rtnl, int ifindex) {
+ sd_netlink_message *m;
+ sd_netlink_message *r;
+ uint32_t mtu = 1500;
+ const char *str_data;
+ uint8_t u8_data;
+ uint32_t u32_data;
+ struct ether_addr eth_data;
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0);
+ assert_se(m);
+
+ /* u8 test cases */
+ assert_se(sd_netlink_message_append_u8(m, IFLA_CARRIER, 0) >= 0);
+ assert_se(sd_netlink_message_append_u8(m, IFLA_OPERSTATE, 0) >= 0);
+ assert_se(sd_netlink_message_append_u8(m, IFLA_LINKMODE, 0) >= 0);
+
+ /* u32 test cases */
+ assert_se(sd_netlink_message_append_u32(m, IFLA_MTU, mtu) >= 0);
+ assert_se(sd_netlink_message_append_u32(m, IFLA_GROUP, 0) >= 0);
+ assert_se(sd_netlink_message_append_u32(m, IFLA_TXQLEN, 0) >= 0);
+ assert_se(sd_netlink_message_append_u32(m, IFLA_NUM_TX_QUEUES, 0) >= 0);
+ assert_se(sd_netlink_message_append_u32(m, IFLA_NUM_RX_QUEUES, 0) >= 0);
+
+ assert_se(sd_netlink_call(rtnl, m, -1, &r) == 1);
+
+ assert_se(sd_netlink_message_read_string(r, IFLA_IFNAME, &str_data) == 0);
+
+ assert_se(sd_netlink_message_read_u8(r, IFLA_CARRIER, &u8_data) == 0);
+ assert_se(sd_netlink_message_read_u8(r, IFLA_OPERSTATE, &u8_data) == 0);
+ assert_se(sd_netlink_message_read_u8(r, IFLA_LINKMODE, &u8_data) == 0);
+
+ assert_se(sd_netlink_message_read_u32(r, IFLA_MTU, &u32_data) == 0);
+ assert_se(sd_netlink_message_read_u32(r, IFLA_GROUP, &u32_data) == 0);
+ assert_se(sd_netlink_message_read_u32(r, IFLA_TXQLEN, &u32_data) == 0);
+ assert_se(sd_netlink_message_read_u32(r, IFLA_NUM_TX_QUEUES, &u32_data) == 0);
+ assert_se(sd_netlink_message_read_u32(r, IFLA_NUM_RX_QUEUES, &u32_data) == 0);
+
+ assert_se(sd_netlink_message_read_ether_addr(r, IFLA_ADDRESS, &eth_data) == 0);
+
+ assert_se((m = sd_netlink_message_unref(m)) == NULL);
+ assert_se((r = sd_netlink_message_unref(r)) == NULL);
+}
+
+static void test_address_get(sd_netlink *rtnl, int ifindex) {
+ sd_netlink_message *m;
+ sd_netlink_message *r;
+ struct in_addr in_data;
+ struct ifa_cacheinfo cache;
+ const char *label;
+
+ assert_se(sd_rtnl_message_new_addr(rtnl, &m, RTM_GETADDR, ifindex, AF_INET) >= 0);
+ assert_se(m);
+
+ assert_se(sd_netlink_call(rtnl, m, -1, &r) == 1);
+
+ assert_se(sd_netlink_message_read_in_addr(r, IFA_LOCAL, &in_data) == 0);
+ assert_se(sd_netlink_message_read_in_addr(r, IFA_ADDRESS, &in_data) == 0);
+ assert_se(sd_netlink_message_read_string(r, IFA_LABEL, &label) == 0);
+ assert_se(sd_netlink_message_read_cache_info(r, IFA_CACHEINFO, &cache) == 0);
+
+ assert_se((m = sd_netlink_message_unref(m)) == NULL);
+ assert_se((r = sd_netlink_message_unref(r)) == NULL);
+
+}
+
+static void test_route(sd_netlink *rtnl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ struct in_addr addr, addr_data;
+ uint32_t index = 2, u32_data;
+ int r;
+
+ r = sd_rtnl_message_new_route(rtnl, &req, RTM_NEWROUTE, AF_INET, RTPROT_STATIC);
+ if (r < 0) {
+ log_error_errno(r, "Could not create RTM_NEWROUTE message: %m");
+ return;
+ }
+
+ addr.s_addr = htobe32(INADDR_LOOPBACK);
+
+ r = sd_netlink_message_append_in_addr(req, RTA_GATEWAY, &addr);
+ if (r < 0) {
+ log_error_errno(r, "Could not append RTA_GATEWAY attribute: %m");
+ return;
+ }
+
+ r = sd_netlink_message_append_u32(req, RTA_OIF, index);
+ if (r < 0) {
+ log_error_errno(r, "Could not append RTA_OIF attribute: %m");
+ return;
+ }
+
+ assert_se(sd_netlink_message_rewind(req, NULL) >= 0);
+
+ assert_se(sd_netlink_message_read_in_addr(req, RTA_GATEWAY, &addr_data) >= 0);
+ assert_se(addr_data.s_addr == addr.s_addr);
+
+ assert_se(sd_netlink_message_read_u32(req, RTA_OIF, &u32_data) >= 0);
+ assert_se(u32_data == index);
+
+ assert_se((req = sd_netlink_message_unref(req)) == NULL);
+}
+
+static void test_multiple(void) {
+ sd_netlink *rtnl1, *rtnl2;
+
+ assert_se(sd_netlink_open(&rtnl1) >= 0);
+ assert_se(sd_netlink_open(&rtnl2) >= 0);
+
+ rtnl1 = sd_netlink_unref(rtnl1);
+ rtnl2 = sd_netlink_unref(rtnl2);
+}
+
+static int link_handler(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) {
+ char *ifname = userdata;
+ const char *data;
+
+ assert_se(rtnl);
+ assert_se(m);
+ assert_se(userdata);
+
+ log_info("%s: got link info about %s", __func__, ifname);
+ free(ifname);
+
+ assert_se(sd_netlink_message_read_string(m, IFLA_IFNAME, &data) >= 0);
+ assert_se(streq(data, "lo"));
+
+ return 1;
+}
+
+static void test_event_loop(int ifindex) {
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ char *ifname;
+
+ ifname = strdup("lo2");
+ assert_se(ifname);
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0);
+
+ assert_se(sd_netlink_call_async(rtnl, NULL, m, link_handler, NULL, ifname, 0, NULL) >= 0);
+
+ assert_se(sd_event_default(&event) >= 0);
+
+ assert_se(sd_netlink_attach_event(rtnl, event, 0) >= 0);
+
+ assert_se(sd_event_run(event, 0) >= 0);
+
+ assert_se(sd_netlink_detach_event(rtnl) >= 0);
+
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+}
+
+static void test_async_destroy(void *userdata) {
+}
+
+static void test_async(int ifindex) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *r = NULL;
+ _cleanup_(sd_netlink_slot_unrefp) sd_netlink_slot *slot = NULL;
+ sd_netlink_destroy_t destroy_callback;
+ const char *description;
+ char *ifname;
+
+ ifname = strdup("lo");
+ assert_se(ifname);
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0);
+
+ assert_se(sd_netlink_call_async(rtnl, &slot, m, link_handler, test_async_destroy, ifname, 0, "hogehoge") >= 0);
+
+ assert_se(sd_netlink_slot_get_netlink(slot) == rtnl);
+ assert_se(sd_netlink_slot_get_userdata(slot) == ifname);
+ assert_se(sd_netlink_slot_get_destroy_callback(slot, &destroy_callback) == 1);
+ assert_se(destroy_callback == test_async_destroy);
+ assert_se(sd_netlink_slot_get_floating(slot) == 0);
+ assert_se(sd_netlink_slot_get_description(slot, &description) == 1);
+ assert_se(streq(description, "hogehoge"));
+
+ assert_se(sd_netlink_wait(rtnl, 0) >= 0);
+ assert_se(sd_netlink_process(rtnl, &r) >= 0);
+
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+}
+
+static void test_slot_set(int ifindex) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *r = NULL;
+ _cleanup_(sd_netlink_slot_unrefp) sd_netlink_slot *slot = NULL;
+ sd_netlink_destroy_t destroy_callback;
+ const char *description;
+ char *ifname;
+
+ ifname = strdup("lo");
+ assert_se(ifname);
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0);
+
+ assert_se(sd_netlink_call_async(rtnl, &slot, m, link_handler, NULL, NULL, 0, NULL) >= 0);
+
+ assert_se(sd_netlink_slot_get_netlink(slot) == rtnl);
+ assert_se(!sd_netlink_slot_get_userdata(slot));
+ assert_se(!sd_netlink_slot_set_userdata(slot, ifname));
+ assert_se(sd_netlink_slot_get_userdata(slot) == ifname);
+ assert_se(sd_netlink_slot_get_destroy_callback(slot, NULL) == 0);
+ assert_se(sd_netlink_slot_set_destroy_callback(slot, test_async_destroy) >= 0);
+ assert_se(sd_netlink_slot_get_destroy_callback(slot, &destroy_callback) == 1);
+ assert_se(destroy_callback == test_async_destroy);
+ assert_se(sd_netlink_slot_get_floating(slot) == 0);
+ assert_se(sd_netlink_slot_set_floating(slot, 1) == 1);
+ assert_se(sd_netlink_slot_get_floating(slot) == 1);
+ assert_se(sd_netlink_slot_get_description(slot, NULL) == 0);
+ assert_se(sd_netlink_slot_set_description(slot, "hogehoge") >= 0);
+ assert_se(sd_netlink_slot_get_description(slot, &description) == 1);
+ assert_se(streq(description, "hogehoge"));
+
+ assert_se(sd_netlink_wait(rtnl, 0) >= 0);
+ assert_se(sd_netlink_process(rtnl, &r) >= 0);
+
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+}
+
+struct test_async_object {
+ unsigned n_ref;
+ char *ifname;
+};
+
+static struct test_async_object *test_async_object_free(struct test_async_object *t) {
+ assert(t);
+
+ free(t->ifname);
+ return mfree(t);
+}
+
+DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(struct test_async_object, test_async_object, test_async_object_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct test_async_object *, test_async_object_unref);
+
+static int link_handler2(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) {
+ struct test_async_object *t = userdata;
+ const char *data;
+
+ assert_se(rtnl);
+ assert_se(m);
+ assert_se(userdata);
+
+ log_info("%s: got link info about %s", __func__, t->ifname);
+
+ assert_se(sd_netlink_message_read_string(m, IFLA_IFNAME, &data) >= 0);
+ assert_se(streq(data, "lo"));
+
+ return 1;
+}
+
+static void test_async_object_destroy(void *userdata) {
+ struct test_async_object *t = userdata;
+
+ assert(userdata);
+
+ log_info("%s: n_ref=%u", __func__, t->n_ref);
+ test_async_object_unref(t);
+}
+
+static void test_async_destroy_callback(int ifindex) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *r = NULL;
+ _cleanup_(test_async_object_unrefp) struct test_async_object *t = NULL;
+ _cleanup_(sd_netlink_slot_unrefp) sd_netlink_slot *slot = NULL;
+ char *ifname;
+
+ assert_se(t = new(struct test_async_object, 1));
+ assert_se(ifname = strdup("lo"));
+ *t = (struct test_async_object) {
+ .n_ref = 1,
+ .ifname = ifname,
+ };
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+
+ /* destroy callback is called after processing message */
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0);
+ assert_se(sd_netlink_call_async(rtnl, NULL, m, link_handler2, test_async_object_destroy, t, 0, NULL) >= 0);
+
+ assert_se(t->n_ref == 1);
+ assert_se(test_async_object_ref(t));
+ assert_se(t->n_ref == 2);
+
+ assert_se(sd_netlink_wait(rtnl, 0) >= 0);
+ assert_se(sd_netlink_process(rtnl, &r) == 1);
+ assert_se(t->n_ref == 1);
+
+ assert_se(!sd_netlink_message_unref(m));
+
+ /* destroy callback is called when asynchronous call is cancelled, that is, slot is freed. */
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0);
+ assert_se(sd_netlink_call_async(rtnl, &slot, m, link_handler2, test_async_object_destroy, t, 0, NULL) >= 0);
+
+ assert_se(t->n_ref == 1);
+ assert_se(test_async_object_ref(t));
+ assert_se(t->n_ref == 2);
+
+ assert_se(!(slot = sd_netlink_slot_unref(slot)));
+ assert_se(t->n_ref == 1);
+
+ assert_se(!sd_netlink_message_unref(m));
+
+ /* destroy callback is also called by sd_netlink_unref() */
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0);
+ assert_se(sd_netlink_call_async(rtnl, NULL, m, link_handler2, test_async_object_destroy, t, 0, NULL) >= 0);
+
+ assert_se(t->n_ref == 1);
+ assert_se(test_async_object_ref(t));
+ assert_se(t->n_ref == 2);
+
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+ assert_se(t->n_ref == 1);
+}
+
+static int pipe_handler(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) {
+ int *counter = userdata;
+ int r;
+
+ (*counter)--;
+
+ r = sd_netlink_message_get_errno(m);
+
+ log_info_errno(r, "%d left in pipe. got reply: %m", *counter);
+
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static void test_pipe(int ifindex) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m1 = NULL, *m2 = NULL;
+ int counter = 0;
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m1, RTM_GETLINK, ifindex) >= 0);
+ assert_se(sd_rtnl_message_new_link(rtnl, &m2, RTM_GETLINK, ifindex) >= 0);
+
+ counter++;
+ assert_se(sd_netlink_call_async(rtnl, NULL, m1, pipe_handler, NULL, &counter, 0, NULL) >= 0);
+
+ counter++;
+ assert_se(sd_netlink_call_async(rtnl, NULL, m2, pipe_handler, NULL, &counter, 0, NULL) >= 0);
+
+ while (counter > 0) {
+ assert_se(sd_netlink_wait(rtnl, 0) >= 0);
+ assert_se(sd_netlink_process(rtnl, NULL) >= 0);
+ }
+
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+}
+
+static void test_container(sd_netlink *rtnl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ uint16_t u16_data;
+ uint32_t u32_data;
+ const char *string_data;
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0) >= 0);
+
+ assert_se(sd_netlink_message_open_container(m, IFLA_LINKINFO) >= 0);
+ assert_se(sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "vlan") >= 0);
+ assert_se(sd_netlink_message_append_u16(m, IFLA_VLAN_ID, 100) >= 0);
+ assert_se(sd_netlink_message_close_container(m) >= 0);
+ assert_se(sd_netlink_message_append_string(m, IFLA_INFO_KIND, "vlan") >= 0);
+ assert_se(sd_netlink_message_close_container(m) >= 0);
+ assert_se(sd_netlink_message_close_container(m) == -EINVAL);
+
+ assert_se(sd_netlink_message_rewind(m, NULL) >= 0);
+
+ assert_se(sd_netlink_message_enter_container(m, IFLA_LINKINFO) >= 0);
+ assert_se(sd_netlink_message_read_string(m, IFLA_INFO_KIND, &string_data) >= 0);
+ assert_se(streq("vlan", string_data));
+
+ assert_se(sd_netlink_message_enter_container(m, IFLA_INFO_DATA) >= 0);
+ assert_se(sd_netlink_message_read_u16(m, IFLA_VLAN_ID, &u16_data) >= 0);
+ assert_se(sd_netlink_message_exit_container(m) >= 0);
+
+ assert_se(sd_netlink_message_read_string(m, IFLA_INFO_KIND, &string_data) >= 0);
+ assert_se(streq("vlan", string_data));
+ assert_se(sd_netlink_message_exit_container(m) >= 0);
+
+ assert_se(sd_netlink_message_read_u32(m, IFLA_LINKINFO, &u32_data) < 0);
+
+ assert_se(sd_netlink_message_exit_container(m) == -EINVAL);
+}
+
+static void test_match(void) {
+ _cleanup_(sd_netlink_slot_unrefp) sd_netlink_slot *s1 = NULL, *s2 = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+
+ assert_se(sd_netlink_add_match(rtnl, &s1, RTM_NEWLINK, link_handler, NULL, NULL, NULL) >= 0);
+ assert_se(sd_netlink_add_match(rtnl, &s2, RTM_NEWLINK, link_handler, NULL, NULL, NULL) >= 0);
+ assert_se(sd_netlink_add_match(rtnl, NULL, RTM_NEWLINK, link_handler, NULL, NULL, NULL) >= 0);
+
+ assert_se(!(s1 = sd_netlink_slot_unref(s1)));
+ assert_se(!(s2 = sd_netlink_slot_unref(s2)));
+
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+}
+
+static void test_get_addresses(sd_netlink *rtnl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ sd_netlink_message *m;
+
+ assert_se(sd_rtnl_message_new_addr(rtnl, &req, RTM_GETADDR, 0, AF_UNSPEC) >= 0);
+
+ assert_se(sd_netlink_call(rtnl, req, 0, &reply) >= 0);
+
+ for (m = reply; m; m = sd_netlink_message_next(m)) {
+ uint16_t type;
+ unsigned char scope, flags;
+ int family, ifindex;
+
+ assert_se(sd_netlink_message_get_type(m, &type) >= 0);
+ assert_se(type == RTM_NEWADDR);
+
+ assert_se(sd_rtnl_message_addr_get_ifindex(m, &ifindex) >= 0);
+ assert_se(sd_rtnl_message_addr_get_family(m, &family) >= 0);
+ assert_se(sd_rtnl_message_addr_get_scope(m, &scope) >= 0);
+ assert_se(sd_rtnl_message_addr_get_flags(m, &flags) >= 0);
+
+ assert_se(ifindex > 0);
+ assert_se(IN_SET(family, AF_INET, AF_INET6));
+
+ log_info("got IPv%u address on ifindex %i", family == AF_INET ? 4: 6, ifindex);
+ }
+}
+
+static void test_message(sd_netlink *rtnl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+
+ assert_se(rtnl_message_new_synthetic_error(rtnl, -ETIMEDOUT, 1, &m) >= 0);
+ assert_se(sd_netlink_message_get_errno(m) == -ETIMEDOUT);
+}
+
+static void test_array(void) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *genl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+
+ assert_se(sd_genl_socket_open(&genl) >= 0);
+ assert_se(sd_genl_message_new(genl, SD_GENL_ID_CTRL, CTRL_CMD_GETFAMILY, &m) >= 0);
+
+ assert_se(sd_netlink_message_open_container(m, CTRL_ATTR_MCAST_GROUPS) >= 0);
+ for (unsigned i = 0; i < 10; i++) {
+ char name[STRLEN("hoge") + DECIMAL_STR_MAX(uint32_t)];
+ uint32_t id = i + 1000;
+
+ xsprintf(name, "hoge%" PRIu32, id);
+ assert_se(sd_netlink_message_open_array(m, i + 1) >= 0);
+ assert_se(sd_netlink_message_append_u32(m, CTRL_ATTR_MCAST_GRP_ID, id) >= 0);
+ assert_se(sd_netlink_message_append_string(m, CTRL_ATTR_MCAST_GRP_NAME, name) >= 0);
+ assert_se(sd_netlink_message_close_container(m) >= 0);
+ }
+ assert_se(sd_netlink_message_close_container(m) >= 0);
+
+ rtnl_message_seal(m);
+ assert_se(sd_netlink_message_rewind(m, genl) >= 0);
+
+ assert_se(sd_netlink_message_enter_container(m, CTRL_ATTR_MCAST_GROUPS) >= 0);
+ for (unsigned i = 0; i < 10; i++) {
+ char expected[STRLEN("hoge") + DECIMAL_STR_MAX(uint32_t)];
+ const char *name;
+ uint32_t id;
+
+ assert_se(sd_netlink_message_enter_array(m, i + 1) >= 0);
+ assert_se(sd_netlink_message_read_u32(m, CTRL_ATTR_MCAST_GRP_ID, &id) >= 0);
+ assert_se(sd_netlink_message_read_string(m, CTRL_ATTR_MCAST_GRP_NAME, &name) >= 0);
+ assert_se(sd_netlink_message_exit_container(m) >= 0);
+
+ assert_se(id == i + 1000);
+ xsprintf(expected, "hoge%" PRIu32, id);
+ assert_se(streq(name, expected));
+ }
+ assert_se(sd_netlink_message_exit_container(m) >= 0);
+}
+
+static void test_strv(sd_netlink *rtnl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ _cleanup_strv_free_ char **names_in = NULL, **names_out;
+ const char *p;
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINKPROP, 1) >= 0);
+
+ for (unsigned i = 0; i < 10; i++) {
+ char name[STRLEN("hoge") + DECIMAL_STR_MAX(uint32_t)];
+
+ xsprintf(name, "hoge%" PRIu32, i + 1000);
+ assert_se(strv_extend(&names_in, name) >= 0);
+ }
+
+ assert_se(sd_netlink_message_open_container(m, IFLA_PROP_LIST) >= 0);
+ assert_se(sd_netlink_message_append_strv(m, IFLA_ALT_IFNAME, names_in) >= 0);
+ assert_se(sd_netlink_message_close_container(m) >= 0);
+
+ rtnl_message_seal(m);
+ assert_se(sd_netlink_message_rewind(m, NULL) >= 0);
+
+ assert_se(sd_netlink_message_read_strv(m, IFLA_PROP_LIST, IFLA_ALT_IFNAME, &names_out) >= 0);
+ assert_se(strv_equal(names_in, names_out));
+
+ assert_se(sd_netlink_message_enter_container(m, IFLA_PROP_LIST) >= 0);
+ assert_se(sd_netlink_message_read_string(m, IFLA_ALT_IFNAME, &p) >= 0);
+ assert_se(streq(p, "hoge1009"));
+ assert_se(sd_netlink_message_exit_container(m) >= 0);
+}
+
+int main(void) {
+ sd_netlink *rtnl;
+ sd_netlink_message *m;
+ sd_netlink_message *r;
+ const char *string_data;
+ int if_loopback;
+ uint16_t type;
+
+ test_match();
+ test_multiple();
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+ assert_se(rtnl);
+
+ test_route(rtnl);
+ test_message(rtnl);
+ test_container(rtnl);
+ test_array();
+ test_strv(rtnl);
+
+ if_loopback = (int) if_nametoindex("lo");
+ assert_se(if_loopback > 0);
+
+ test_async(if_loopback);
+ test_slot_set(if_loopback);
+ test_async_destroy_callback(if_loopback);
+ test_pipe(if_loopback);
+ test_event_loop(if_loopback);
+ test_link_configure(rtnl, if_loopback);
+
+ test_get_addresses(rtnl);
+ test_message_link_bridge(rtnl);
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, if_loopback) >= 0);
+ assert_se(m);
+
+ assert_se(sd_netlink_message_get_type(m, &type) >= 0);
+ assert_se(type == RTM_GETLINK);
+
+ assert_se(sd_netlink_message_read_string(m, IFLA_IFNAME, &string_data) == -EPERM);
+
+ assert_se(sd_netlink_call(rtnl, m, 0, &r) == 1);
+ assert_se(sd_netlink_message_get_type(r, &type) >= 0);
+ assert_se(type == RTM_NEWLINK);
+
+ assert_se((r = sd_netlink_message_unref(r)) == NULL);
+
+ assert_se(sd_netlink_call(rtnl, m, -1, &r) == -EPERM);
+ assert_se((m = sd_netlink_message_unref(m)) == NULL);
+ assert_se((r = sd_netlink_message_unref(r)) == NULL);
+
+ test_link_get(rtnl, if_loopback);
+ test_address_get(rtnl, if_loopback);
+
+ assert_se((m = sd_netlink_message_unref(m)) == NULL);
+ assert_se((r = sd_netlink_message_unref(r)) == NULL);
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/libsystemd/sd-network/network-util.c b/src/libsystemd/sd-network/network-util.c
new file mode 100644
index 0000000..7753431
--- /dev/null
+++ b/src/libsystemd/sd-network/network-util.c
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "network-util.h"
+#include "string-table.h"
+#include "strv.h"
+
+bool network_is_online(void) {
+ _cleanup_free_ char *carrier_state = NULL, *addr_state = NULL;
+ int r;
+
+ r = sd_network_get_carrier_state(&carrier_state);
+ if (r < 0) /* if we don't know anything, we consider the system online */
+ return true;
+
+ r = sd_network_get_address_state(&addr_state);
+ if (r < 0) /* if we don't know anything, we consider the system online */
+ return true;
+
+ if (STR_IN_SET(carrier_state, "degraded-carrier", "carrier") &&
+ STR_IN_SET(addr_state, "routable", "degraded"))
+ return true;
+
+ return false;
+}
+
+static const char* const link_operstate_table[_LINK_OPERSTATE_MAX] = {
+ [LINK_OPERSTATE_MISSING] = "missing",
+ [LINK_OPERSTATE_OFF] = "off",
+ [LINK_OPERSTATE_NO_CARRIER] = "no-carrier",
+ [LINK_OPERSTATE_DORMANT] = "dormant",
+ [LINK_OPERSTATE_DEGRADED_CARRIER] = "degraded-carrier",
+ [LINK_OPERSTATE_CARRIER] = "carrier",
+ [LINK_OPERSTATE_DEGRADED] = "degraded",
+ [LINK_OPERSTATE_ENSLAVED] = "enslaved",
+ [LINK_OPERSTATE_ROUTABLE] = "routable",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(link_operstate, LinkOperationalState);
+
+static const char* const link_carrier_state_table[_LINK_CARRIER_STATE_MAX] = {
+ [LINK_CARRIER_STATE_OFF] = "off",
+ [LINK_CARRIER_STATE_NO_CARRIER] = "no-carrier",
+ [LINK_CARRIER_STATE_DORMANT] = "dormant",
+ [LINK_CARRIER_STATE_DEGRADED_CARRIER] = "degraded-carrier",
+ [LINK_CARRIER_STATE_CARRIER] = "carrier",
+ [LINK_CARRIER_STATE_ENSLAVED] = "enslaved",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(link_carrier_state, LinkCarrierState);
+
+static const char* const link_address_state_table[_LINK_ADDRESS_STATE_MAX] = {
+ [LINK_ADDRESS_STATE_OFF] = "off",
+ [LINK_ADDRESS_STATE_DEGRADED] = "degraded",
+ [LINK_ADDRESS_STATE_ROUTABLE] = "routable",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(link_address_state, LinkAddressState);
+
+int parse_operational_state_range(const char *str, LinkOperationalStateRange *out) {
+ LinkOperationalStateRange range = { _LINK_OPERSTATE_INVALID, _LINK_OPERSTATE_INVALID };
+ _cleanup_free_ const char *min = NULL;
+ const char *p;
+
+ assert(str);
+ assert(out);
+
+ p = strchr(str, ':');
+ if (p) {
+ min = strndup(str, p - str);
+
+ if (!isempty(p + 1)) {
+ range.max = link_operstate_from_string(p + 1);
+ if (range.max < 0)
+ return -EINVAL;
+ }
+ } else
+ min = strdup(str);
+
+ if (!min)
+ return -ENOMEM;
+
+ if (!isempty(min)) {
+ range.min = link_operstate_from_string(min);
+ if (range.min < 0)
+ return -EINVAL;
+ }
+
+ /* Fail on empty strings. */
+ if (range.min == _LINK_OPERSTATE_INVALID && range.max == _LINK_OPERSTATE_INVALID)
+ return -EINVAL;
+
+ if (range.min == _LINK_OPERSTATE_INVALID)
+ range.min = LINK_OPERSTATE_MISSING;
+ if (range.max == _LINK_OPERSTATE_INVALID)
+ range.max = LINK_OPERSTATE_ROUTABLE;
+
+ if (range.min > range.max)
+ return -EINVAL;
+
+ *out = range;
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-network/network-util.h b/src/libsystemd/sd-network/network-util.h
new file mode 100644
index 0000000..8cfd894
--- /dev/null
+++ b/src/libsystemd/sd-network/network-util.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-network.h"
+
+#include "macro.h"
+
+bool network_is_online(void);
+
+typedef enum LinkOperationalState {
+ LINK_OPERSTATE_MISSING,
+ LINK_OPERSTATE_OFF,
+ LINK_OPERSTATE_NO_CARRIER,
+ LINK_OPERSTATE_DORMANT,
+ LINK_OPERSTATE_DEGRADED_CARRIER,
+ LINK_OPERSTATE_CARRIER,
+ LINK_OPERSTATE_DEGRADED,
+ LINK_OPERSTATE_ENSLAVED,
+ LINK_OPERSTATE_ROUTABLE,
+ _LINK_OPERSTATE_MAX,
+ _LINK_OPERSTATE_INVALID = -1
+} LinkOperationalState;
+
+typedef enum LinkCarrierState {
+ LINK_CARRIER_STATE_OFF = LINK_OPERSTATE_OFF,
+ LINK_CARRIER_STATE_NO_CARRIER = LINK_OPERSTATE_NO_CARRIER,
+ LINK_CARRIER_STATE_DORMANT = LINK_OPERSTATE_DORMANT,
+ LINK_CARRIER_STATE_DEGRADED_CARRIER = LINK_OPERSTATE_DEGRADED_CARRIER,
+ LINK_CARRIER_STATE_CARRIER = LINK_OPERSTATE_CARRIER,
+ LINK_CARRIER_STATE_ENSLAVED = LINK_OPERSTATE_ENSLAVED,
+ _LINK_CARRIER_STATE_MAX,
+ _LINK_CARRIER_STATE_INVALID = -1
+} LinkCarrierState;
+
+typedef enum LinkAddressState {
+ LINK_ADDRESS_STATE_OFF,
+ LINK_ADDRESS_STATE_DEGRADED,
+ LINK_ADDRESS_STATE_ROUTABLE,
+ _LINK_ADDRESS_STATE_MAX,
+ _LINK_ADDRESS_STATE_INVALID = -1
+} LinkAddressState;
+
+const char* link_operstate_to_string(LinkOperationalState s) _const_;
+LinkOperationalState link_operstate_from_string(const char *s) _pure_;
+
+const char* link_carrier_state_to_string(LinkCarrierState s) _const_;
+LinkCarrierState link_carrier_state_from_string(const char *s) _pure_;
+
+const char* link_address_state_to_string(LinkAddressState s) _const_;
+LinkAddressState link_address_state_from_string(const char *s) _pure_;
+
+typedef struct LinkOperationalStateRange {
+ LinkOperationalState min;
+ LinkOperationalState max;
+} LinkOperationalStateRange;
+
+#define LINK_OPERSTATE_RANGE_DEFAULT (LinkOperationalStateRange) { LINK_OPERSTATE_DEGRADED, \
+ LINK_OPERSTATE_ROUTABLE }
+
+int parse_operational_state_range(const char *str, LinkOperationalStateRange *out);
diff --git a/src/libsystemd/sd-network/sd-network.c b/src/libsystemd/sd-network/sd-network.c
new file mode 100644
index 0000000..b9b1099
--- /dev/null
+++ b/src/libsystemd/sd-network/sd-network.c
@@ -0,0 +1,448 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <poll.h>
+#include <sys/inotify.h>
+
+#include "sd-network.h"
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+static int network_get_string(const char *field, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ r = parse_env_file(NULL, "/run/systemd/netif/state", field, &s);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_network_get_operational_state(char **state) {
+ return network_get_string("OPER_STATE", state);
+}
+
+_public_ int sd_network_get_carrier_state(char **state) {
+ return network_get_string("CARRIER_STATE", state);
+}
+
+_public_ int sd_network_get_address_state(char **state) {
+ return network_get_string("ADDRESS_STATE", state);
+}
+
+static int network_get_strv(const char *key, char ***ret) {
+ _cleanup_strv_free_ char **a = NULL;
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ r = parse_env_file(NULL, "/run/systemd/netif/state", key, &s);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+ if (isempty(s)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ a = strv_split(s, " ");
+ if (!a)
+ return -ENOMEM;
+
+ strv_uniq(a);
+ r = (int) strv_length(a);
+
+ *ret = TAKE_PTR(a);
+
+ return r;
+}
+
+_public_ int sd_network_get_dns(char ***ret) {
+ return network_get_strv("DNS", ret);
+}
+
+_public_ int sd_network_get_ntp(char ***ret) {
+ return network_get_strv("NTP", ret);
+}
+
+_public_ int sd_network_get_search_domains(char ***ret) {
+ return network_get_strv("DOMAINS", ret);
+}
+
+_public_ int sd_network_get_route_domains(char ***ret) {
+ return network_get_strv("ROUTE_DOMAINS", ret);
+}
+
+static int network_link_get_string(int ifindex, const char *field, char **ret) {
+ char path[STRLEN("/run/systemd/netif/links/") + DECIMAL_STR_MAX(ifindex) + 1];
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert_return(ifindex > 0, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ xsprintf(path, "/run/systemd/netif/links/%i", ifindex);
+
+ r = parse_env_file(NULL, path, field, &s);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
+
+static int network_link_get_strv(int ifindex, const char *key, char ***ret) {
+ char path[STRLEN("/run/systemd/netif/links/") + DECIMAL_STR_MAX(ifindex) + 1];
+ _cleanup_strv_free_ char **a = NULL;
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert_return(ifindex > 0, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ xsprintf(path, "/run/systemd/netif/links/%i", ifindex);
+ r = parse_env_file(NULL, path, key, &s);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+ if (isempty(s)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ a = strv_split(s, " ");
+ if (!a)
+ return -ENOMEM;
+
+ strv_uniq(a);
+ r = (int) strv_length(a);
+
+ *ret = TAKE_PTR(a);
+
+ return r;
+}
+
+_public_ int sd_network_link_get_setup_state(int ifindex, char **state) {
+ return network_link_get_string(ifindex, "ADMIN_STATE", state);
+}
+
+_public_ int sd_network_link_get_network_file(int ifindex, char **filename) {
+ return network_link_get_string(ifindex, "NETWORK_FILE", filename);
+}
+
+_public_ int sd_network_link_get_operational_state(int ifindex, char **state) {
+ return network_link_get_string(ifindex, "OPER_STATE", state);
+}
+
+_public_ int sd_network_link_get_carrier_state(int ifindex, char **state) {
+ return network_link_get_string(ifindex, "CARRIER_STATE", state);
+}
+
+_public_ int sd_network_link_get_address_state(int ifindex, char **state) {
+ return network_link_get_string(ifindex, "ADDRESS_STATE", state);
+}
+
+_public_ int sd_network_link_get_dhcp6_client_iaid_string(int ifindex, char **iaid) {
+ return network_link_get_string(ifindex, "DHCP6_CLIENT_IAID", iaid);
+}
+
+_public_ int sd_network_link_get_dhcp6_client_duid_string(int ifindex, char **duid) {
+ return network_link_get_string(ifindex, "DHCP6_CLIENT_DUID", duid);
+}
+
+_public_ int sd_network_link_get_required_for_online(int ifindex) {
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ r = network_link_get_string(ifindex, "REQUIRED_FOR_ONLINE", &s);
+ if (r < 0) {
+ /* Handle -ENODATA as RequiredForOnline=yes, for compatibility */
+ if (r == -ENODATA)
+ return true;
+ return r;
+ }
+
+ return parse_boolean(s);
+}
+
+_public_ int sd_network_link_get_required_operstate_for_online(int ifindex, char **state) {
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert_return(state, -EINVAL);
+
+ r = network_link_get_string(ifindex, "REQUIRED_OPER_STATE_FOR_ONLINE", &s);
+ if (r < 0) {
+ if (r != -ENODATA)
+ return r;
+
+ /* For compatibility, assuming degraded. */
+ s = strdup("degraded");
+ if (!s)
+ return -ENOMEM;
+ }
+
+ *state = TAKE_PTR(s);
+ return 0;
+}
+
+_public_ int sd_network_link_get_llmnr(int ifindex, char **llmnr) {
+ return network_link_get_string(ifindex, "LLMNR", llmnr);
+}
+
+_public_ int sd_network_link_get_mdns(int ifindex, char **mdns) {
+ return network_link_get_string(ifindex, "MDNS", mdns);
+}
+
+_public_ int sd_network_link_get_dns_over_tls(int ifindex, char **dns_over_tls) {
+ return network_link_get_string(ifindex, "DNS_OVER_TLS", dns_over_tls);
+}
+
+_public_ int sd_network_link_get_dnssec(int ifindex, char **dnssec) {
+ return network_link_get_string(ifindex, "DNSSEC", dnssec);
+}
+
+_public_ int sd_network_link_get_dnssec_negative_trust_anchors(int ifindex, char ***nta) {
+ return network_link_get_strv(ifindex, "DNSSEC_NTA", nta);
+}
+
+_public_ int sd_network_link_get_dns(int ifindex, char ***ret) {
+ return network_link_get_strv(ifindex, "DNS", ret);
+}
+
+_public_ int sd_network_link_get_ntp(int ifindex, char ***ret) {
+ return network_link_get_strv(ifindex, "NTP", ret);
+}
+
+_public_ int sd_network_link_get_sip(int ifindex, char ***ret) {
+ return network_link_get_strv(ifindex, "SIP", ret);
+}
+
+_public_ int sd_network_link_get_search_domains(int ifindex, char ***ret) {
+ return network_link_get_strv(ifindex, "DOMAINS", ret);
+}
+
+_public_ int sd_network_link_get_route_domains(int ifindex, char ***ret) {
+ return network_link_get_strv(ifindex, "ROUTE_DOMAINS", ret);
+}
+
+_public_ int sd_network_link_get_dns_default_route(int ifindex) {
+ char path[STRLEN("/run/systemd/netif/links/") + DECIMAL_STR_MAX(ifindex) + 1];
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert_return(ifindex > 0, -EINVAL);
+
+ xsprintf(path, "/run/systemd/netif/links/%i", ifindex);
+
+ r = parse_env_file(NULL, path, "DNS_DEFAULT_ROUTE", &s);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+ return parse_boolean(s);
+}
+
+static int network_link_get_ifindexes(int ifindex, const char *key, int **ret) {
+ char path[STRLEN("/run/systemd/netif/links/") + DECIMAL_STR_MAX(ifindex) + 1];
+ _cleanup_free_ int *ifis = NULL;
+ _cleanup_free_ char *s = NULL;
+ size_t allocated = 0, c = 0;
+ int r;
+
+ assert_return(ifindex > 0, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ xsprintf(path, "/run/systemd/netif/links/%i", ifindex);
+ r = parse_env_file(NULL, path, key, &s);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+
+ for (const char *x = s;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&x, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (!GREEDY_REALLOC(ifis, allocated, c + 2))
+ return -ENOMEM;
+
+ r = ifis[c++] = parse_ifindex(word);
+ if (r < 0)
+ return r;
+ }
+
+ if (ifis)
+ ifis[c] = 0; /* Let's add a 0 ifindex to the end, to be nice */
+
+ *ret = TAKE_PTR(ifis);
+
+ return c;
+}
+
+_public_ int sd_network_link_get_carrier_bound_to(int ifindex, int **ret) {
+ return network_link_get_ifindexes(ifindex, "CARRIER_BOUND_TO", ret);
+}
+
+_public_ int sd_network_link_get_carrier_bound_by(int ifindex, int **ret) {
+ return network_link_get_ifindexes(ifindex, "CARRIER_BOUND_BY", ret);
+}
+
+static int MONITOR_TO_FD(sd_network_monitor *m) {
+ return (int) (unsigned long) m - 1;
+}
+
+static sd_network_monitor* FD_TO_MONITOR(int fd) {
+ return (sd_network_monitor*) (unsigned long) (fd + 1);
+}
+
+static int monitor_add_inotify_watch(int fd) {
+ int k;
+
+ k = inotify_add_watch(fd, "/run/systemd/netif/links/", IN_MOVED_TO|IN_DELETE);
+ if (k >= 0)
+ return 0;
+ else if (errno != ENOENT)
+ return -errno;
+
+ k = inotify_add_watch(fd, "/run/systemd/netif/", IN_CREATE|IN_ISDIR);
+ if (k >= 0)
+ return 0;
+ else if (errno != ENOENT)
+ return -errno;
+
+ k = inotify_add_watch(fd, "/run/systemd/", IN_CREATE|IN_ISDIR);
+ if (k < 0)
+ return -errno;
+
+ return 0;
+}
+
+_public_ int sd_network_monitor_new(sd_network_monitor **m, const char *category) {
+ _cleanup_close_ int fd = -1;
+ int k;
+ bool good = false;
+
+ assert_return(m, -EINVAL);
+
+ fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (!category || streq(category, "links")) {
+ k = monitor_add_inotify_watch(fd);
+ if (k < 0)
+ return k;
+
+ good = true;
+ }
+
+ if (!good)
+ return -EINVAL;
+
+ *m = FD_TO_MONITOR(TAKE_FD(fd));
+ return 0;
+}
+
+_public_ sd_network_monitor* sd_network_monitor_unref(sd_network_monitor *m) {
+ if (m)
+ close_nointr(MONITOR_TO_FD(m));
+
+ return NULL;
+}
+
+_public_ int sd_network_monitor_flush(sd_network_monitor *m) {
+ union inotify_event_buffer buffer;
+ struct inotify_event *e;
+ ssize_t l;
+ int fd, k;
+
+ assert_return(m, -EINVAL);
+
+ fd = MONITOR_TO_FD(m);
+
+ l = read(fd, &buffer, sizeof(buffer));
+ if (l < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return -errno;
+ }
+
+ FOREACH_INOTIFY_EVENT(e, buffer, l) {
+ if (e->mask & IN_ISDIR) {
+ k = monitor_add_inotify_watch(fd);
+ if (k < 0)
+ return k;
+
+ k = inotify_rm_watch(fd, e->wd);
+ if (k < 0)
+ return -errno;
+ }
+ }
+
+ return 0;
+}
+
+_public_ int sd_network_monitor_get_fd(sd_network_monitor *m) {
+
+ assert_return(m, -EINVAL);
+
+ return MONITOR_TO_FD(m);
+}
+
+_public_ int sd_network_monitor_get_events(sd_network_monitor *m) {
+
+ assert_return(m, -EINVAL);
+
+ /* For now we will only return POLLIN here, since we don't
+ * need anything else ever for inotify. However, let's have
+ * this API to keep our options open should we later on need
+ * it. */
+ return POLLIN;
+}
+
+_public_ int sd_network_monitor_get_timeout(sd_network_monitor *m, uint64_t *timeout_usec) {
+
+ assert_return(m, -EINVAL);
+ assert_return(timeout_usec, -EINVAL);
+
+ /* For now we will only return (uint64_t) -1, since we don't
+ * need any timeout. However, let's have this API to keep our
+ * options open should we later on need it. */
+ *timeout_usec = (uint64_t) -1;
+ return 0;
+}
diff --git a/src/libsystemd/sd-path/sd-path.c b/src/libsystemd/sd-path/sd-path.c
new file mode 100644
index 0000000..61ed7cb
--- /dev/null
+++ b/src/libsystemd/sd-path/sd-path.c
@@ -0,0 +1,691 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-path.h"
+
+#include "alloc-util.h"
+#include "architecture.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "path-lookup.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+static int from_environment(const char *envname, const char *fallback, const char **ret) {
+ assert(ret);
+
+ if (envname) {
+ const char *e;
+
+ e = secure_getenv(envname);
+ if (e && path_is_absolute(e)) {
+ *ret = e;
+ return 0;
+ }
+ }
+
+ if (fallback) {
+ *ret = fallback;
+ return 0;
+ }
+
+ return -ENXIO;
+}
+
+static int from_home_dir(const char *envname, const char *suffix, char **buffer, const char **ret) {
+ _cleanup_free_ char *h = NULL;
+ char *cc = NULL;
+ int r;
+
+ assert(suffix);
+ assert(buffer);
+ assert(ret);
+
+ if (envname) {
+ const char *e = NULL;
+
+ e = secure_getenv(envname);
+ if (e && path_is_absolute(e)) {
+ *ret = e;
+ return 0;
+ }
+ }
+
+ r = get_home_dir(&h);
+ if (r < 0)
+ return r;
+
+ cc = path_join(h, suffix);
+ if (!cc)
+ return -ENOMEM;
+
+ *buffer = cc;
+ *ret = cc;
+ return 0;
+}
+
+static int from_user_dir(const char *field, char **buffer, const char **ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *b = NULL;
+ _cleanup_free_ const char *fn = NULL;
+ const char *c = NULL;
+ size_t n;
+ int r;
+
+ assert(field);
+ assert(buffer);
+ assert(ret);
+
+ r = from_home_dir("XDG_CONFIG_HOME", ".config", &b, &c);
+ if (r < 0)
+ return r;
+
+ fn = path_join(c, "user-dirs.dirs");
+ if (!fn)
+ return -ENOMEM;
+
+ f = fopen(fn, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ goto fallback;
+
+ return -errno;
+ }
+
+ /* This is an awful parse, but it follows closely what
+ * xdg-user-dirs does upstream */
+
+ n = strlen(field);
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l, *p, *e;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ l = strstrip(line);
+
+ if (!strneq(l, field, n))
+ continue;
+
+ p = l + n;
+ p += strspn(p, WHITESPACE);
+
+ if (*p != '=')
+ continue;
+ p++;
+
+ p += strspn(p, WHITESPACE);
+
+ if (*p != '"')
+ continue;
+ p++;
+
+ e = strrchr(p, '"');
+ if (!e)
+ continue;
+ *e = 0;
+
+ /* Three syntaxes permitted: relative to $HOME, $HOME itself, and absolute path */
+ if (startswith(p, "$HOME/")) {
+ _cleanup_free_ char *h = NULL;
+ char *cc;
+
+ r = get_home_dir(&h);
+ if (r < 0)
+ return r;
+
+ cc = path_join(h, p+5);
+ if (!cc)
+ return -ENOMEM;
+
+ *buffer = cc;
+ *ret = cc;
+ return 0;
+ } else if (streq(p, "$HOME")) {
+
+ r = get_home_dir(buffer);
+ if (r < 0)
+ return r;
+
+ *ret = *buffer;
+ return 0;
+ } else if (path_is_absolute(p)) {
+ char *copy;
+
+ copy = strdup(p);
+ if (!copy)
+ return -ENOMEM;
+
+ *buffer = copy;
+ *ret = copy;
+ return 0;
+ }
+ }
+
+fallback:
+ /* The desktop directory defaults to $HOME/Desktop, the others to $HOME */
+ if (streq(field, "XDG_DESKTOP_DIR")) {
+ _cleanup_free_ char *h = NULL;
+ char *cc;
+
+ r = get_home_dir(&h);
+ if (r < 0)
+ return r;
+
+ cc = path_join(h, "Desktop");
+ if (!cc)
+ return -ENOMEM;
+
+ *buffer = cc;
+ *ret = cc;
+ } else {
+
+ r = get_home_dir(buffer);
+ if (r < 0)
+ return r;
+
+ *ret = *buffer;
+ }
+
+ return 0;
+}
+
+static int get_path(uint64_t type, char **buffer, const char **ret) {
+ int r;
+
+ assert(buffer);
+ assert(ret);
+
+ switch (type) {
+
+ case SD_PATH_TEMPORARY:
+ return tmp_dir(ret);
+
+ case SD_PATH_TEMPORARY_LARGE:
+ return var_tmp_dir(ret);
+
+ case SD_PATH_SYSTEM_BINARIES:
+ *ret = "/usr/bin";
+ return 0;
+
+ case SD_PATH_SYSTEM_INCLUDE:
+ *ret = "/usr/include";
+ return 0;
+
+ case SD_PATH_SYSTEM_LIBRARY_PRIVATE:
+ *ret = "/usr/lib";
+ return 0;
+
+ case SD_PATH_SYSTEM_LIBRARY_ARCH:
+ *ret = LIBDIR;
+ return 0;
+
+ case SD_PATH_SYSTEM_SHARED:
+ *ret = "/usr/share";
+ return 0;
+
+ case SD_PATH_SYSTEM_CONFIGURATION_FACTORY:
+ *ret = "/usr/share/factory/etc";
+ return 0;
+
+ case SD_PATH_SYSTEM_STATE_FACTORY:
+ *ret = "/usr/share/factory/var";
+ return 0;
+
+ case SD_PATH_SYSTEM_CONFIGURATION:
+ *ret = "/etc";
+ return 0;
+
+ case SD_PATH_SYSTEM_RUNTIME:
+ *ret = "/run";
+ return 0;
+
+ case SD_PATH_SYSTEM_RUNTIME_LOGS:
+ *ret = "/run/log";
+ return 0;
+
+ case SD_PATH_SYSTEM_STATE_PRIVATE:
+ *ret = "/var/lib";
+ return 0;
+
+ case SD_PATH_SYSTEM_STATE_LOGS:
+ *ret = "/var/log";
+ return 0;
+
+ case SD_PATH_SYSTEM_STATE_CACHE:
+ *ret = "/var/cache";
+ return 0;
+
+ case SD_PATH_SYSTEM_STATE_SPOOL:
+ *ret = "/var/spool";
+ return 0;
+
+ case SD_PATH_USER_BINARIES:
+ return from_home_dir(NULL, ".local/bin", buffer, ret);
+
+ case SD_PATH_USER_LIBRARY_PRIVATE:
+ return from_home_dir(NULL, ".local/lib", buffer, ret);
+
+ case SD_PATH_USER_LIBRARY_ARCH:
+ return from_home_dir(NULL, ".local/lib/" LIB_ARCH_TUPLE, buffer, ret);
+
+ case SD_PATH_USER_SHARED:
+ return from_home_dir("XDG_DATA_HOME", ".local/share", buffer, ret);
+
+ case SD_PATH_USER_CONFIGURATION:
+ return from_home_dir("XDG_CONFIG_HOME", ".config", buffer, ret);
+
+ case SD_PATH_USER_RUNTIME:
+ return from_environment("XDG_RUNTIME_DIR", NULL, ret);
+
+ case SD_PATH_USER_STATE_CACHE:
+ return from_home_dir("XDG_CACHE_HOME", ".cache", buffer, ret);
+
+ case SD_PATH_USER:
+ r = get_home_dir(buffer);
+ if (r < 0)
+ return r;
+
+ *ret = *buffer;
+ return 0;
+
+ case SD_PATH_USER_DOCUMENTS:
+ return from_user_dir("XDG_DOCUMENTS_DIR", buffer, ret);
+
+ case SD_PATH_USER_MUSIC:
+ return from_user_dir("XDG_MUSIC_DIR", buffer, ret);
+
+ case SD_PATH_USER_PICTURES:
+ return from_user_dir("XDG_PICTURES_DIR", buffer, ret);
+
+ case SD_PATH_USER_VIDEOS:
+ return from_user_dir("XDG_VIDEOS_DIR", buffer, ret);
+
+ case SD_PATH_USER_DOWNLOAD:
+ return from_user_dir("XDG_DOWNLOAD_DIR", buffer, ret);
+
+ case SD_PATH_USER_PUBLIC:
+ return from_user_dir("XDG_PUBLICSHARE_DIR", buffer, ret);
+
+ case SD_PATH_USER_TEMPLATES:
+ return from_user_dir("XDG_TEMPLATES_DIR", buffer, ret);
+
+ case SD_PATH_USER_DESKTOP:
+ return from_user_dir("XDG_DESKTOP_DIR", buffer, ret);
+
+ case SD_PATH_SYSTEMD_UTIL:
+ *ret = ROOTPREFIX_NOSLASH "/lib/systemd";
+ return 0;
+
+ case SD_PATH_SYSTEMD_SYSTEM_UNIT:
+ *ret = SYSTEM_DATA_UNIT_PATH;
+ return 0;
+
+ case SD_PATH_SYSTEMD_SYSTEM_PRESET:
+ *ret = ROOTPREFIX_NOSLASH "/lib/systemd/system-preset";
+ return 0;
+
+ case SD_PATH_SYSTEMD_USER_UNIT:
+ *ret = USER_DATA_UNIT_DIR;
+ return 0;
+
+ case SD_PATH_SYSTEMD_USER_PRESET:
+ *ret = ROOTPREFIX_NOSLASH "/lib/systemd/user-preset";
+ return 0;
+
+ case SD_PATH_SYSTEMD_SYSTEM_CONF:
+ *ret = SYSTEM_CONFIG_UNIT_DIR;
+ return 0;
+
+ case SD_PATH_SYSTEMD_USER_CONF:
+ *ret = USER_CONFIG_UNIT_DIR;
+ return 0;
+
+ case SD_PATH_SYSTEMD_SYSTEM_GENERATOR:
+ *ret = SYSTEM_GENERATOR_DIR;
+ return 0;
+
+ case SD_PATH_SYSTEMD_USER_GENERATOR:
+ *ret = USER_GENERATOR_DIR;
+ return 0;
+
+ case SD_PATH_SYSTEMD_SLEEP:
+ *ret = ROOTPREFIX_NOSLASH "/lib/systemd/system-sleep";
+ return 0;
+
+ case SD_PATH_SYSTEMD_SHUTDOWN:
+ *ret = ROOTPREFIX_NOSLASH "/lib/systemd/system-shutdown";
+ return 0;
+
+ case SD_PATH_TMPFILES:
+ *ret = "/usr/lib/tmpfiles.d";
+ return 0;
+
+ case SD_PATH_SYSUSERS:
+ *ret = ROOTPREFIX_NOSLASH "/lib/sysusers.d";
+ return 0;
+
+ case SD_PATH_SYSCTL:
+ *ret = ROOTPREFIX_NOSLASH "/lib/sysctl.d";
+ return 0;
+
+ case SD_PATH_BINFMT:
+ *ret = ROOTPREFIX_NOSLASH "/lib/binfmt.d";
+ return 0;
+
+ case SD_PATH_MODULES_LOAD:
+ *ret = ROOTPREFIX_NOSLASH "/lib/modules-load.d";
+ return 0;
+
+ case SD_PATH_CATALOG:
+ *ret = "/usr/lib/systemd/catalog";
+ return 0;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static int get_path_alloc(uint64_t type, const char *suffix, char **path) {
+ _cleanup_free_ char *buffer = NULL;
+ char *buffer2 = NULL;
+ const char *ret;
+ int r;
+
+ assert(path);
+
+ r = get_path(type, &buffer, &ret);
+ if (r < 0)
+ return r;
+
+ if (suffix) {
+ suffix += strspn(suffix, "/");
+ buffer2 = path_join(ret, suffix);
+ if (!buffer2)
+ return -ENOMEM;
+ } else if (!buffer) {
+ buffer = strdup(ret);
+ if (!buffer)
+ return -ENOMEM;
+ }
+
+ *path = buffer2 ?: TAKE_PTR(buffer);
+ return 0;
+}
+
+_public_ int sd_path_lookup(uint64_t type, const char *suffix, char **path) {
+ int r;
+
+ assert_return(path, -EINVAL);
+
+ r = get_path_alloc(type, suffix, path);
+ if (r != -EOPNOTSUPP)
+ return r;
+
+ /* Fall back to sd_path_lookup_strv */
+ _cleanup_strv_free_ char **l = NULL;
+ char *buffer;
+
+ r = sd_path_lookup_strv(type, suffix, &l);
+ if (r < 0)
+ return r;
+
+ buffer = strv_join(l, ":");
+ if (!buffer)
+ return -ENOMEM;
+
+ *path = buffer;
+ return 0;
+}
+
+static int search_from_environment(
+ char ***list,
+ const char *env_home,
+ const char *home_suffix,
+ const char *env_search,
+ bool env_search_sufficient,
+ const char *first, ...) {
+
+ _cleanup_strv_free_ char **l = NULL;
+ const char *e;
+ char *h = NULL;
+ int r;
+
+ assert(list);
+
+ if (env_search) {
+ e = secure_getenv(env_search);
+ if (e) {
+ l = strv_split(e, ":");
+ if (!l)
+ return -ENOMEM;
+
+ if (env_search_sufficient) {
+ *list = TAKE_PTR(l);
+ return 0;
+ }
+ }
+ }
+
+ if (!l && first) {
+ va_list ap;
+
+ va_start(ap, first);
+ l = strv_new_ap(first, ap);
+ va_end(ap);
+
+ if (!l)
+ return -ENOMEM;
+ }
+
+ if (env_home) {
+ e = secure_getenv(env_home);
+ if (e && path_is_absolute(e)) {
+ h = strdup(e);
+ if (!h)
+ return -ENOMEM;
+ }
+ }
+
+ if (!h && home_suffix) {
+ e = secure_getenv("HOME");
+ if (e && path_is_absolute(e)) {
+ h = path_join(e, home_suffix);
+ if (!h)
+ return -ENOMEM;
+ }
+ }
+
+ if (h) {
+ r = strv_consume_prepend(&l, h);
+ if (r < 0)
+ return -ENOMEM;
+ }
+
+ *list = TAKE_PTR(l);
+ return 0;
+}
+
+#if HAVE_SPLIT_BIN
+# define ARRAY_SBIN_BIN(x) x "sbin", x "bin"
+#else
+# define ARRAY_SBIN_BIN(x) x "bin"
+#endif
+
+static int get_search(uint64_t type, char ***list) {
+ int r;
+
+ assert(list);
+
+ switch(type) {
+
+ case SD_PATH_SEARCH_BINARIES:
+ return search_from_environment(list,
+ NULL,
+ ".local/bin",
+ "PATH",
+ true,
+ ARRAY_SBIN_BIN("/usr/local/"),
+ ARRAY_SBIN_BIN("/usr/"),
+#if HAVE_SPLIT_USR
+ ARRAY_SBIN_BIN("/"),
+#endif
+ NULL);
+
+ case SD_PATH_SEARCH_LIBRARY_PRIVATE:
+ return search_from_environment(list,
+ NULL,
+ ".local/lib",
+ NULL,
+ false,
+ "/usr/local/lib",
+ "/usr/lib",
+#if HAVE_SPLIT_USR
+ "/lib",
+#endif
+ NULL);
+
+ case SD_PATH_SEARCH_LIBRARY_ARCH:
+ return search_from_environment(list,
+ NULL,
+ ".local/lib/" LIB_ARCH_TUPLE,
+ "LD_LIBRARY_PATH",
+ true,
+ LIBDIR,
+#if HAVE_SPLIT_USR
+ ROOTLIBDIR,
+#endif
+ NULL);
+
+ case SD_PATH_SEARCH_SHARED:
+ return search_from_environment(list,
+ "XDG_DATA_HOME",
+ ".local/share",
+ "XDG_DATA_DIRS",
+ false,
+ "/usr/local/share",
+ "/usr/share",
+ NULL);
+
+ case SD_PATH_SEARCH_CONFIGURATION_FACTORY:
+ return search_from_environment(list,
+ NULL,
+ NULL,
+ NULL,
+ false,
+ "/usr/local/share/factory/etc",
+ "/usr/share/factory/etc",
+ NULL);
+
+ case SD_PATH_SEARCH_STATE_FACTORY:
+ return search_from_environment(list,
+ NULL,
+ NULL,
+ NULL,
+ false,
+ "/usr/local/share/factory/var",
+ "/usr/share/factory/var",
+ NULL);
+
+ case SD_PATH_SEARCH_CONFIGURATION:
+ return search_from_environment(list,
+ "XDG_CONFIG_HOME",
+ ".config",
+ "XDG_CONFIG_DIRS",
+ false,
+ "/etc",
+ NULL);
+
+ case SD_PATH_SEARCH_BINARIES_DEFAULT:
+ return strv_from_nulstr(list, DEFAULT_PATH_NULSTR);
+
+ case SD_PATH_SYSTEMD_SEARCH_SYSTEM_UNIT:
+ case SD_PATH_SYSTEMD_SEARCH_USER_UNIT: {
+ _cleanup_(lookup_paths_free) LookupPaths lp = {};
+ const UnitFileScope scope = type == SD_PATH_SYSTEMD_SEARCH_SYSTEM_UNIT ?
+ UNIT_FILE_SYSTEM : UNIT_FILE_USER;
+
+ r = lookup_paths_init(&lp, scope, 0, NULL);
+ if (r < 0)
+ return r;
+
+ *list = TAKE_PTR(lp.search_path);
+ return 0;
+ }
+
+ case SD_PATH_SYSTEMD_SEARCH_SYSTEM_GENERATOR:
+ case SD_PATH_SYSTEMD_SEARCH_USER_GENERATOR: {
+ char **t;
+ const UnitFileScope scope = type == SD_PATH_SYSTEMD_SEARCH_SYSTEM_GENERATOR ?
+ UNIT_FILE_SYSTEM : UNIT_FILE_USER;
+
+ t = generator_binary_paths(scope);
+ if (!t)
+ return -ENOMEM;
+
+ *list = t;
+ return 0;
+ }
+
+ case SD_PATH_SYSTEMD_SEARCH_NETWORK:
+ return strv_from_nulstr(list, NETWORK_DIRS_NULSTR);
+
+ }
+
+ return -EOPNOTSUPP;
+}
+
+_public_ int sd_path_lookup_strv(uint64_t type, const char *suffix, char ***paths) {
+ _cleanup_strv_free_ char **l = NULL, **n = NULL;
+ int r;
+
+ assert_return(paths, -EINVAL);
+
+ r = get_search(type, &l);
+ if (r == -EOPNOTSUPP) {
+ _cleanup_free_ char *t = NULL;
+
+ r = get_path_alloc(type, suffix, &t);
+ if (r < 0)
+ return r;
+
+ l = new(char*, 2);
+ if (!l)
+ return -ENOMEM;
+ l[0] = TAKE_PTR(t);
+ l[1] = NULL;
+
+ *paths = TAKE_PTR(l);
+ return 0;
+
+ } else if (r < 0)
+ return r;
+
+ if (!suffix) {
+ *paths = TAKE_PTR(l);
+ return 0;
+ }
+
+ n = new(char*, strv_length(l)+1);
+ if (!n)
+ return -ENOMEM;
+
+ char **i, **j = n;
+ STRV_FOREACH(i, l) {
+ *j = path_join(*i, suffix);
+ if (!*j)
+ return -ENOMEM;
+
+ j++;
+ }
+ *j = NULL;
+
+ *paths = TAKE_PTR(n);
+ return 0;
+}
diff --git a/src/libsystemd/sd-resolve/resolve-private.h b/src/libsystemd/sd-resolve/resolve-private.h
new file mode 100644
index 0000000..7a339f7
--- /dev/null
+++ b/src/libsystemd/sd-resolve/resolve-private.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-resolve.h"
+
+int resolve_getaddrinfo_with_destroy_callback(
+ sd_resolve *resolve, sd_resolve_query **q,
+ const char *node, const char *service, const struct addrinfo *hints,
+ sd_resolve_getaddrinfo_handler_t callback,
+ sd_resolve_destroy_t destroy_callback, void *userdata);
+int resolve_getnameinfo_with_destroy_callback(
+ sd_resolve *resolve, sd_resolve_query **q,
+ const struct sockaddr *sa, socklen_t salen, int flags, uint64_t get,
+ sd_resolve_getnameinfo_handler_t callback,
+ sd_resolve_destroy_t destroy_callback, void *userdata);
+
+#define resolve_getaddrinfo(resolve, ret_query, node, service, hints, callback, destroy_callback, userdata) \
+ ({ \
+ int (*_callback_)(sd_resolve_query*, int, const struct addrinfo*, typeof(userdata)) = callback; \
+ void (*_destroy_)(typeof(userdata)) = destroy_callback; \
+ resolve_getaddrinfo_with_destroy_callback( \
+ resolve, ret_query, \
+ node, service, hints, \
+ (sd_resolve_getaddrinfo_handler_t) _callback_, \
+ (sd_resolve_destroy_t) _destroy_, \
+ userdata); \
+ })
+
+#define resolve_getnameinfo(resolve, ret_query, sa, salen, flags, get, callback, destroy_callback, userdata) \
+ ({ \
+ int (*_callback_)(sd_resolve_query*, int, const char*, const char*, typeof(userdata)) = callback; \
+ void (*_destroy_)(typeof(userdata)) = destroy_callback; \
+ resolve_getaddrinfo_with_destroy_callback( \
+ resolve, ret_query, \
+ sa, salen, flags, get, \
+ (sd_resolve_getnameinfo_handler_t) _callback_, \
+ (sd_resolve_destroy_t) _destroy_, \
+ userdata); \
+ })
diff --git a/src/libsystemd/sd-resolve/sd-resolve.c b/src/libsystemd/sd-resolve/sd-resolve.c
new file mode 100644
index 0000000..2cfa22d
--- /dev/null
+++ b/src/libsystemd/sd-resolve/sd-resolve.c
@@ -0,0 +1,1301 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <poll.h>
+#include <pthread.h>
+#include <resolv.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "sd-resolve.h"
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "list.h"
+#include "memory-util.h"
+#include "missing_syscall.h"
+#include "process-util.h"
+#include "resolve-private.h"
+#include "socket-util.h"
+
+#define WORKERS_MIN 1U
+#define WORKERS_MAX 16U
+#define QUERIES_MAX 256U
+#define BUFSIZE 10240U
+
+typedef enum {
+ REQUEST_ADDRINFO,
+ RESPONSE_ADDRINFO,
+ REQUEST_NAMEINFO,
+ RESPONSE_NAMEINFO,
+ REQUEST_TERMINATE,
+ RESPONSE_DIED
+} QueryType;
+
+enum {
+ REQUEST_RECV_FD,
+ REQUEST_SEND_FD,
+ RESPONSE_RECV_FD,
+ RESPONSE_SEND_FD,
+ _FD_MAX
+};
+
+struct sd_resolve {
+ unsigned n_ref;
+
+ bool dead:1;
+ pid_t original_pid;
+
+ int fds[_FD_MAX];
+
+ pthread_t workers[WORKERS_MAX];
+ unsigned n_valid_workers;
+
+ unsigned current_id;
+ sd_resolve_query* query_array[QUERIES_MAX];
+ unsigned n_queries, n_done, n_outstanding;
+
+ sd_event_source *event_source;
+ sd_event *event;
+
+ sd_resolve_query *current;
+
+ sd_resolve **default_resolve_ptr;
+ pid_t tid;
+
+ LIST_HEAD(sd_resolve_query, queries);
+};
+
+struct sd_resolve_query {
+ unsigned n_ref;
+
+ sd_resolve *resolve;
+
+ QueryType type:4;
+ bool done:1;
+ bool floating:1;
+ unsigned id;
+
+ int ret;
+ int _errno;
+ int _h_errno;
+ struct addrinfo *addrinfo;
+ char *serv, *host;
+
+ union {
+ sd_resolve_getaddrinfo_handler_t getaddrinfo_handler;
+ sd_resolve_getnameinfo_handler_t getnameinfo_handler;
+ };
+
+ void *userdata;
+ sd_resolve_destroy_t destroy_callback;
+
+ LIST_FIELDS(sd_resolve_query, queries);
+};
+
+typedef struct RHeader {
+ QueryType type;
+ unsigned id;
+ size_t length;
+} RHeader;
+
+typedef struct AddrInfoRequest {
+ struct RHeader header;
+ bool hints_valid;
+ int ai_flags;
+ int ai_family;
+ int ai_socktype;
+ int ai_protocol;
+ size_t node_len, service_len;
+} AddrInfoRequest;
+
+typedef struct AddrInfoResponse {
+ struct RHeader header;
+ int ret;
+ int _errno;
+ int _h_errno;
+ /* followed by addrinfo_serialization[] */
+} AddrInfoResponse;
+
+typedef struct AddrInfoSerialization {
+ int ai_flags;
+ int ai_family;
+ int ai_socktype;
+ int ai_protocol;
+ size_t ai_addrlen;
+ size_t canonname_len;
+ /* Followed by ai_addr amd ai_canonname with variable lengths */
+} AddrInfoSerialization;
+
+typedef struct NameInfoRequest {
+ struct RHeader header;
+ int flags;
+ socklen_t sockaddr_len;
+ bool gethost:1, getserv:1;
+} NameInfoRequest;
+
+typedef struct NameInfoResponse {
+ struct RHeader header;
+ size_t hostlen, servlen;
+ int ret;
+ int _errno;
+ int _h_errno;
+} NameInfoResponse;
+
+typedef union Packet {
+ RHeader rheader;
+ AddrInfoRequest addrinfo_request;
+ AddrInfoResponse addrinfo_response;
+ NameInfoRequest nameinfo_request;
+ NameInfoResponse nameinfo_response;
+} Packet;
+
+static int getaddrinfo_done(sd_resolve_query* q);
+static int getnameinfo_done(sd_resolve_query *q);
+
+static void resolve_query_disconnect(sd_resolve_query *q);
+
+#define RESOLVE_DONT_DESTROY(resolve) \
+ _cleanup_(sd_resolve_unrefp) _unused_ sd_resolve *_dont_destroy_##resolve = sd_resolve_ref(resolve)
+
+static void query_assign_errno(sd_resolve_query *q, int ret, int error, int h_error) {
+ assert(q);
+
+ q->ret = ret;
+ q->_errno = abs(error);
+ q->_h_errno = h_error;
+}
+
+static int send_died(int out_fd) {
+ RHeader rh = {
+ .type = RESPONSE_DIED,
+ .length = sizeof(RHeader),
+ };
+
+ assert(out_fd >= 0);
+
+ if (send(out_fd, &rh, rh.length, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static void *serialize_addrinfo(void *p, const struct addrinfo *ai, size_t *length, size_t maxlength) {
+ AddrInfoSerialization s;
+ size_t cnl, l;
+
+ assert(p);
+ assert(ai);
+ assert(length);
+ assert(*length <= maxlength);
+
+ cnl = ai->ai_canonname ? strlen(ai->ai_canonname)+1 : 0;
+ l = sizeof(AddrInfoSerialization) + ai->ai_addrlen + cnl;
+
+ if (*length + l > maxlength)
+ return NULL;
+
+ s = (AddrInfoSerialization) {
+ .ai_flags = ai->ai_flags,
+ .ai_family = ai->ai_family,
+ .ai_socktype = ai->ai_socktype,
+ .ai_protocol = ai->ai_protocol,
+ .ai_addrlen = ai->ai_addrlen,
+ .canonname_len = cnl,
+ };
+
+ memcpy((uint8_t*) p, &s, sizeof(AddrInfoSerialization));
+ memcpy((uint8_t*) p + sizeof(AddrInfoSerialization), ai->ai_addr, ai->ai_addrlen);
+ memcpy_safe((char*) p + sizeof(AddrInfoSerialization) + ai->ai_addrlen,
+ ai->ai_canonname, cnl);
+
+ *length += l;
+ return (uint8_t*) p + l;
+}
+
+static int send_addrinfo_reply(
+ int out_fd,
+ unsigned id,
+ int ret,
+ struct addrinfo *ai,
+ int _errno,
+ int _h_errno) {
+
+ AddrInfoResponse resp = {};
+ union {
+ AddrInfoSerialization ais;
+ uint8_t space[BUFSIZE];
+ } buffer;
+ struct iovec iov[2];
+ struct msghdr mh;
+
+ assert(out_fd >= 0);
+
+ resp = (AddrInfoResponse) {
+ .header.type = RESPONSE_ADDRINFO,
+ .header.id = id,
+ .header.length = sizeof(AddrInfoResponse),
+ .ret = ret,
+ ._errno = _errno,
+ ._h_errno = _h_errno,
+ };
+
+ msan_unpoison(&resp, sizeof(resp));
+
+ if (ret == 0 && ai) {
+ void *p = &buffer;
+ struct addrinfo *k;
+
+ for (k = ai; k; k = k->ai_next) {
+ p = serialize_addrinfo(p, k, &resp.header.length, (uint8_t*) &buffer + BUFSIZE - (uint8_t*) p);
+ if (!p) {
+ freeaddrinfo(ai);
+ return -ENOBUFS;
+ }
+ }
+ }
+
+ if (ai)
+ freeaddrinfo(ai);
+
+ iov[0] = IOVEC_MAKE(&resp, sizeof(AddrInfoResponse));
+ iov[1] = IOVEC_MAKE(&buffer, resp.header.length - sizeof(AddrInfoResponse));
+
+ mh = (struct msghdr) {
+ .msg_iov = iov,
+ .msg_iovlen = ELEMENTSOF(iov)
+ };
+
+ if (sendmsg(out_fd, &mh, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int send_nameinfo_reply(
+ int out_fd,
+ unsigned id,
+ int ret,
+ const char *host,
+ const char *serv,
+ int _errno,
+ int _h_errno) {
+
+ NameInfoResponse resp = {};
+ struct iovec iov[3];
+ struct msghdr mh;
+ size_t hl, sl;
+
+ assert(out_fd >= 0);
+
+ sl = serv ? strlen(serv)+1 : 0;
+ hl = host ? strlen(host)+1 : 0;
+
+ resp = (NameInfoResponse) {
+ .header.type = RESPONSE_NAMEINFO,
+ .header.id = id,
+ .header.length = sizeof(NameInfoResponse) + hl + sl,
+ .hostlen = hl,
+ .servlen = sl,
+ .ret = ret,
+ ._errno = _errno,
+ ._h_errno = _h_errno,
+ };
+
+ msan_unpoison(&resp, sizeof(resp));
+
+ iov[0] = IOVEC_MAKE(&resp, sizeof(NameInfoResponse));
+ iov[1] = IOVEC_MAKE((void*) host, hl);
+ iov[2] = IOVEC_MAKE((void*) serv, sl);
+
+ mh = (struct msghdr) {
+ .msg_iov = iov,
+ .msg_iovlen = ELEMENTSOF(iov)
+ };
+
+ if (sendmsg(out_fd, &mh, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int handle_request(int out_fd, const Packet *packet, size_t length) {
+ const RHeader *req;
+
+ assert(out_fd >= 0);
+ assert(packet);
+
+ req = &packet->rheader;
+
+ assert_return(length >= sizeof(RHeader), -EIO);
+ assert_return(length == req->length, -EIO);
+
+ switch (req->type) {
+
+ case REQUEST_ADDRINFO: {
+ const AddrInfoRequest *ai_req = &packet->addrinfo_request;
+ struct addrinfo hints, *result = NULL;
+ const char *node, *service;
+ int ret;
+
+ assert_return(length >= sizeof(AddrInfoRequest), -EBADMSG);
+ assert_return(length == sizeof(AddrInfoRequest) + ai_req->node_len + ai_req->service_len, -EBADMSG);
+
+ hints = (struct addrinfo) {
+ .ai_flags = ai_req->ai_flags,
+ .ai_family = ai_req->ai_family,
+ .ai_socktype = ai_req->ai_socktype,
+ .ai_protocol = ai_req->ai_protocol,
+ };
+
+ msan_unpoison(&hints, sizeof(hints));
+
+ node = ai_req->node_len ? (const char*) ai_req + sizeof(AddrInfoRequest) : NULL;
+ service = ai_req->service_len ? (const char*) ai_req + sizeof(AddrInfoRequest) + ai_req->node_len : NULL;
+
+ ret = getaddrinfo(node, service,
+ ai_req->hints_valid ? &hints : NULL,
+ &result);
+
+ /* send_addrinfo_reply() frees result */
+ return send_addrinfo_reply(out_fd, req->id, ret, result, errno, h_errno);
+ }
+
+ case REQUEST_NAMEINFO: {
+ const NameInfoRequest *ni_req = &packet->nameinfo_request;
+ char hostbuf[NI_MAXHOST], servbuf[NI_MAXSERV];
+ union sockaddr_union sa;
+ int ret;
+
+ assert_return(length >= sizeof(NameInfoRequest), -EBADMSG);
+ assert_return(length == sizeof(NameInfoRequest) + ni_req->sockaddr_len, -EBADMSG);
+ assert_return(ni_req->sockaddr_len <= sizeof(sa), -EBADMSG);
+
+ memcpy(&sa, (const uint8_t *) ni_req + sizeof(NameInfoRequest), ni_req->sockaddr_len);
+
+ ret = getnameinfo(&sa.sa, ni_req->sockaddr_len,
+ ni_req->gethost ? hostbuf : NULL, ni_req->gethost ? sizeof(hostbuf) : 0,
+ ni_req->getserv ? servbuf : NULL, ni_req->getserv ? sizeof(servbuf) : 0,
+ ni_req->flags);
+
+ return send_nameinfo_reply(out_fd, req->id, ret,
+ ret == 0 && ni_req->gethost ? hostbuf : NULL,
+ ret == 0 && ni_req->getserv ? servbuf : NULL,
+ errno, h_errno);
+ }
+
+ case REQUEST_TERMINATE:
+ /* Quit */
+ return -ECONNRESET;
+
+ default:
+ assert_not_reached("Unknown request");
+ }
+
+ return 0;
+}
+
+static void* thread_worker(void *p) {
+ sd_resolve *resolve = p;
+
+ /* Assign a pretty name to this thread */
+ (void) pthread_setname_np(pthread_self(), "sd-resolve");
+
+ while (!resolve->dead) {
+ union {
+ Packet packet;
+ uint8_t space[BUFSIZE];
+ } buf;
+ ssize_t length;
+
+ length = recv(resolve->fds[REQUEST_RECV_FD], &buf, sizeof buf, 0);
+ if (length < 0) {
+ if (errno == EINTR)
+ continue;
+
+ break;
+ }
+ if (length == 0)
+ break;
+
+ if (handle_request(resolve->fds[RESPONSE_SEND_FD], &buf.packet, (size_t) length) < 0)
+ break;
+ }
+
+ send_died(resolve->fds[RESPONSE_SEND_FD]);
+
+ return NULL;
+}
+
+static int start_threads(sd_resolve *resolve, unsigned extra) {
+ sigset_t ss, saved_ss;
+ unsigned n;
+ int r, k;
+
+ assert_se(sigfillset(&ss) >= 0);
+
+ /* No signals in forked off threads please. We set the mask before forking, so that the threads never exist
+ * with a different mask than a fully blocked one */
+ r = pthread_sigmask(SIG_BLOCK, &ss, &saved_ss);
+ if (r > 0)
+ return -r;
+
+ n = resolve->n_outstanding + extra;
+ n = CLAMP(n, WORKERS_MIN, WORKERS_MAX);
+
+ while (resolve->n_valid_workers < n) {
+ r = pthread_create(&resolve->workers[resolve->n_valid_workers], NULL, thread_worker, resolve);
+ if (r > 0) {
+ r = -r;
+ goto finish;
+ }
+
+ resolve->n_valid_workers++;
+ }
+
+ r = 0;
+
+finish:
+ k = pthread_sigmask(SIG_SETMASK, &saved_ss, NULL);
+ if (k > 0 && r >= 0)
+ r = -k;
+
+ return r;
+}
+
+static bool resolve_pid_changed(sd_resolve *r) {
+ assert(r);
+
+ /* We don't support people creating a resolver and keeping it
+ * around after fork(). Let's complain. */
+
+ return r->original_pid != getpid_cached();
+}
+
+_public_ int sd_resolve_new(sd_resolve **ret) {
+ _cleanup_(sd_resolve_unrefp) sd_resolve *resolve = NULL;
+ int i;
+
+ assert_return(ret, -EINVAL);
+
+ resolve = new0(sd_resolve, 1);
+ if (!resolve)
+ return -ENOMEM;
+
+ resolve->n_ref = 1;
+ resolve->original_pid = getpid_cached();
+
+ for (i = 0; i < _FD_MAX; i++)
+ resolve->fds[i] = -1;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, resolve->fds + REQUEST_RECV_FD) < 0)
+ return -errno;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, resolve->fds + RESPONSE_RECV_FD) < 0)
+ return -errno;
+
+ for (i = 0; i < _FD_MAX; i++)
+ resolve->fds[i] = fd_move_above_stdio(resolve->fds[i]);
+
+ (void) fd_inc_sndbuf(resolve->fds[REQUEST_SEND_FD], QUERIES_MAX * BUFSIZE);
+ (void) fd_inc_rcvbuf(resolve->fds[REQUEST_RECV_FD], QUERIES_MAX * BUFSIZE);
+ (void) fd_inc_sndbuf(resolve->fds[RESPONSE_SEND_FD], QUERIES_MAX * BUFSIZE);
+ (void) fd_inc_rcvbuf(resolve->fds[RESPONSE_RECV_FD], QUERIES_MAX * BUFSIZE);
+
+ (void) fd_nonblock(resolve->fds[RESPONSE_RECV_FD], true);
+
+ *ret = TAKE_PTR(resolve);
+ return 0;
+}
+
+_public_ int sd_resolve_default(sd_resolve **ret) {
+ static thread_local sd_resolve *default_resolve = NULL;
+ sd_resolve *e = NULL;
+ int r;
+
+ if (!ret)
+ return !!default_resolve;
+
+ if (default_resolve) {
+ *ret = sd_resolve_ref(default_resolve);
+ return 0;
+ }
+
+ r = sd_resolve_new(&e);
+ if (r < 0)
+ return r;
+
+ e->default_resolve_ptr = &default_resolve;
+ e->tid = gettid();
+ default_resolve = e;
+
+ *ret = e;
+ return 1;
+}
+
+_public_ int sd_resolve_get_tid(sd_resolve *resolve, pid_t *tid) {
+ assert_return(resolve, -EINVAL);
+ assert_return(tid, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ if (resolve->tid != 0) {
+ *tid = resolve->tid;
+ return 0;
+ }
+
+ if (resolve->event)
+ return sd_event_get_tid(resolve->event, tid);
+
+ return -ENXIO;
+}
+
+static sd_resolve *resolve_free(sd_resolve *resolve) {
+ PROTECT_ERRNO;
+ sd_resolve_query *q;
+ unsigned i;
+
+ assert(resolve);
+
+ while ((q = resolve->queries)) {
+ assert(q->floating);
+ resolve_query_disconnect(q);
+ sd_resolve_query_unref(q);
+ }
+
+ if (resolve->default_resolve_ptr)
+ *(resolve->default_resolve_ptr) = NULL;
+
+ resolve->dead = true;
+
+ sd_resolve_detach_event(resolve);
+
+ if (resolve->fds[REQUEST_SEND_FD] >= 0) {
+
+ RHeader req = {
+ .type = REQUEST_TERMINATE,
+ .length = sizeof req,
+ };
+
+ /* Send one termination packet for each worker */
+ for (i = 0; i < resolve->n_valid_workers; i++)
+ (void) send(resolve->fds[REQUEST_SEND_FD], &req, req.length, MSG_NOSIGNAL);
+ }
+
+ /* Now terminate them and wait until they are gone.
+ If we get an error than most likely the thread already exited. */
+ for (i = 0; i < resolve->n_valid_workers; i++)
+ (void) pthread_join(resolve->workers[i], NULL);
+
+ /* Close all communication channels */
+ close_many(resolve->fds, _FD_MAX);
+
+ return mfree(resolve);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_resolve, sd_resolve, resolve_free);
+
+_public_ int sd_resolve_get_fd(sd_resolve *resolve) {
+ assert_return(resolve, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ return resolve->fds[RESPONSE_RECV_FD];
+}
+
+_public_ int sd_resolve_get_events(sd_resolve *resolve) {
+ assert_return(resolve, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ return resolve->n_queries > resolve->n_done ? POLLIN : 0;
+}
+
+_public_ int sd_resolve_get_timeout(sd_resolve *resolve, uint64_t *usec) {
+ assert_return(resolve, -EINVAL);
+ assert_return(usec, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ *usec = (uint64_t) -1;
+ return 0;
+}
+
+static sd_resolve_query *lookup_query(sd_resolve *resolve, unsigned id) {
+ sd_resolve_query *q;
+
+ assert(resolve);
+
+ q = resolve->query_array[id % QUERIES_MAX];
+ if (q)
+ if (q->id == id)
+ return q;
+
+ return NULL;
+}
+
+static int complete_query(sd_resolve *resolve, sd_resolve_query *q) {
+ int r;
+
+ assert(q);
+ assert(!q->done);
+ assert(q->resolve == resolve);
+
+ q->done = true;
+ resolve->n_done++;
+
+ resolve->current = sd_resolve_query_ref(q);
+
+ switch (q->type) {
+
+ case REQUEST_ADDRINFO:
+ r = getaddrinfo_done(q);
+ break;
+
+ case REQUEST_NAMEINFO:
+ r = getnameinfo_done(q);
+ break;
+
+ default:
+ assert_not_reached("Cannot complete unknown query type");
+ }
+
+ resolve->current = NULL;
+
+ if (q->floating) {
+ resolve_query_disconnect(q);
+ sd_resolve_query_unref(q);
+ }
+
+ sd_resolve_query_unref(q);
+
+ return r;
+}
+
+static int unserialize_addrinfo(const void **p, size_t *length, struct addrinfo **ret_ai) {
+ AddrInfoSerialization s;
+ struct addrinfo *ai;
+ size_t l;
+
+ assert(p);
+ assert(*p);
+ assert(ret_ai);
+ assert(length);
+
+ if (*length < sizeof(AddrInfoSerialization))
+ return -EBADMSG;
+
+ memcpy(&s, *p, sizeof(s));
+
+ l = sizeof(AddrInfoSerialization) + s.ai_addrlen + s.canonname_len;
+ if (*length < l)
+ return -EBADMSG;
+
+ ai = new(struct addrinfo, 1);
+ if (!ai)
+ return -ENOMEM;
+
+ *ai = (struct addrinfo) {
+ .ai_flags = s.ai_flags,
+ .ai_family = s.ai_family,
+ .ai_socktype = s.ai_socktype,
+ .ai_protocol = s.ai_protocol,
+ .ai_addrlen = s.ai_addrlen,
+ };
+
+ if (s.ai_addrlen > 0) {
+ ai->ai_addr = memdup((const uint8_t*) *p + sizeof(AddrInfoSerialization), s.ai_addrlen);
+ if (!ai->ai_addr) {
+ free(ai);
+ return -ENOMEM;
+ }
+ }
+
+ if (s.canonname_len > 0) {
+ ai->ai_canonname = memdup((const uint8_t*) *p + sizeof(AddrInfoSerialization) + s.ai_addrlen, s.canonname_len);
+ if (!ai->ai_canonname) {
+ free(ai->ai_addr);
+ free(ai);
+ return -ENOMEM;
+ }
+ }
+
+ *length -= l;
+ *ret_ai = ai;
+ *p = ((const uint8_t*) *p) + l;
+
+ return 0;
+}
+
+static int handle_response(sd_resolve *resolve, const Packet *packet, size_t length) {
+ const RHeader *resp;
+ sd_resolve_query *q;
+ int r;
+
+ assert(resolve);
+ assert(packet);
+
+ resp = &packet->rheader;
+ assert_return(length >= sizeof(RHeader), -EIO);
+ assert_return(length == resp->length, -EIO);
+
+ if (resp->type == RESPONSE_DIED) {
+ resolve->dead = true;
+ return 0;
+ }
+
+ assert(resolve->n_outstanding > 0);
+ resolve->n_outstanding--;
+
+ q = lookup_query(resolve, resp->id);
+ if (!q)
+ return 0;
+
+ switch (resp->type) {
+
+ case RESPONSE_ADDRINFO: {
+ const AddrInfoResponse *ai_resp = &packet->addrinfo_response;
+ const void *p;
+ size_t l;
+ struct addrinfo *prev = NULL;
+
+ assert_return(length >= sizeof(AddrInfoResponse), -EBADMSG);
+ assert_return(q->type == REQUEST_ADDRINFO, -EBADMSG);
+
+ query_assign_errno(q, ai_resp->ret, ai_resp->_errno, ai_resp->_h_errno);
+
+ l = length - sizeof(AddrInfoResponse);
+ p = (const uint8_t*) resp + sizeof(AddrInfoResponse);
+
+ while (l > 0 && p) {
+ struct addrinfo *ai = NULL;
+
+ r = unserialize_addrinfo(&p, &l, &ai);
+ if (r < 0) {
+ query_assign_errno(q, EAI_SYSTEM, r, 0);
+ freeaddrinfo(q->addrinfo);
+ q->addrinfo = NULL;
+ break;
+ }
+
+ if (prev)
+ prev->ai_next = ai;
+ else
+ q->addrinfo = ai;
+
+ prev = ai;
+ }
+
+ return complete_query(resolve, q);
+ }
+
+ case RESPONSE_NAMEINFO: {
+ const NameInfoResponse *ni_resp = &packet->nameinfo_response;
+
+ assert_return(length >= sizeof(NameInfoResponse), -EBADMSG);
+ assert_return(q->type == REQUEST_NAMEINFO, -EBADMSG);
+
+ if (ni_resp->hostlen > DNS_HOSTNAME_MAX ||
+ ni_resp->servlen > DNS_HOSTNAME_MAX ||
+ sizeof(NameInfoResponse) + ni_resp->hostlen + ni_resp->servlen > length)
+ query_assign_errno(q, EAI_SYSTEM, EIO, 0);
+ else {
+ query_assign_errno(q, ni_resp->ret, ni_resp->_errno, ni_resp->_h_errno);
+
+ if (ni_resp->hostlen > 0) {
+ q->host = strndup((const char*) ni_resp + sizeof(NameInfoResponse),
+ ni_resp->hostlen-1);
+ if (!q->host)
+ query_assign_errno(q, EAI_MEMORY, ENOMEM, 0);
+ }
+
+ if (ni_resp->servlen > 0) {
+ q->serv = strndup((const char*) ni_resp + sizeof(NameInfoResponse) + ni_resp->hostlen,
+ ni_resp->servlen-1);
+ if (!q->serv)
+ query_assign_errno(q, EAI_MEMORY, ENOMEM, 0);
+ }
+ }
+
+ return complete_query(resolve, q);
+ }
+
+ default:
+ return 0;
+ }
+}
+
+_public_ int sd_resolve_process(sd_resolve *resolve) {
+ RESOLVE_DONT_DESTROY(resolve);
+
+ union {
+ Packet packet;
+ uint8_t space[BUFSIZE];
+ } buf;
+ ssize_t l;
+ int r;
+
+ assert_return(resolve, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ /* We don't allow recursively invoking sd_resolve_process(). */
+ assert_return(!resolve->current, -EBUSY);
+
+ l = recv(resolve->fds[RESPONSE_RECV_FD], &buf, sizeof buf, 0);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ return -errno;
+ }
+ if (l == 0)
+ return -ECONNREFUSED;
+
+ r = handle_response(resolve, &buf.packet, (size_t) l);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+_public_ int sd_resolve_wait(sd_resolve *resolve, uint64_t timeout_usec) {
+ int r;
+
+ assert_return(resolve, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ if (resolve->n_done >= resolve->n_queries)
+ return 0;
+
+ do {
+ r = fd_wait_for_event(resolve->fds[RESPONSE_RECV_FD], POLLIN, timeout_usec);
+ } while (r == -EINTR);
+
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ETIMEDOUT;
+
+ return sd_resolve_process(resolve);
+}
+
+static int alloc_query(sd_resolve *resolve, bool floating, sd_resolve_query **_q) {
+ sd_resolve_query *q;
+ int r;
+
+ assert(resolve);
+ assert(_q);
+
+ if (resolve->n_queries >= QUERIES_MAX)
+ return -ENOBUFS;
+
+ r = start_threads(resolve, 1);
+ if (r < 0)
+ return r;
+
+ while (resolve->query_array[resolve->current_id % QUERIES_MAX])
+ resolve->current_id++;
+
+ q = resolve->query_array[resolve->current_id % QUERIES_MAX] = new0(sd_resolve_query, 1);
+ if (!q)
+ return -ENOMEM;
+
+ q->n_ref = 1;
+ q->resolve = resolve;
+ q->floating = floating;
+ q->id = resolve->current_id++;
+
+ if (!floating)
+ sd_resolve_ref(resolve);
+
+ LIST_PREPEND(queries, resolve->queries, q);
+ resolve->n_queries++;
+
+ *_q = q;
+ return 0;
+}
+
+int resolve_getaddrinfo_with_destroy_callback(
+ sd_resolve *resolve,
+ sd_resolve_query **ret_query,
+ const char *node, const char *service,
+ const struct addrinfo *hints,
+ sd_resolve_getaddrinfo_handler_t callback,
+ sd_resolve_destroy_t destroy_callback,
+ void *userdata) {
+
+ _cleanup_(sd_resolve_query_unrefp) sd_resolve_query *q = NULL;
+ size_t node_len, service_len;
+ AddrInfoRequest req = {};
+ struct iovec iov[3];
+ struct msghdr mh = {};
+ int r;
+
+ assert_return(resolve, -EINVAL);
+ assert_return(node || service, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ r = alloc_query(resolve, !ret_query, &q);
+ if (r < 0)
+ return r;
+
+ q->type = REQUEST_ADDRINFO;
+ q->getaddrinfo_handler = callback;
+ q->userdata = userdata;
+
+ node_len = node ? strlen(node) + 1 : 0;
+ service_len = service ? strlen(service) + 1 : 0;
+
+ req = (AddrInfoRequest) {
+ .node_len = node_len,
+ .service_len = service_len,
+
+ .header.id = q->id,
+ .header.type = REQUEST_ADDRINFO,
+ .header.length = sizeof(AddrInfoRequest) + node_len + service_len,
+
+ .hints_valid = hints,
+ .ai_flags = hints ? hints->ai_flags : 0,
+ .ai_family = hints ? hints->ai_family : 0,
+ .ai_socktype = hints ? hints->ai_socktype : 0,
+ .ai_protocol = hints ? hints->ai_protocol : 0,
+ };
+
+ msan_unpoison(&req, sizeof(req));
+
+ iov[mh.msg_iovlen++] = IOVEC_MAKE(&req, sizeof(AddrInfoRequest));
+ if (node)
+ iov[mh.msg_iovlen++] = IOVEC_MAKE((void*) node, req.node_len);
+ if (service)
+ iov[mh.msg_iovlen++] = IOVEC_MAKE((void*) service, req.service_len);
+ mh.msg_iov = iov;
+
+ if (sendmsg(resolve->fds[REQUEST_SEND_FD], &mh, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ resolve->n_outstanding++;
+ q->destroy_callback = destroy_callback;
+
+ if (ret_query)
+ *ret_query = q;
+
+ TAKE_PTR(q);
+
+ return 0;
+}
+
+_public_ int sd_resolve_getaddrinfo(
+ sd_resolve *resolve,
+ sd_resolve_query **ret_query,
+ const char *node, const char *service,
+ const struct addrinfo *hints,
+ sd_resolve_getaddrinfo_handler_t callback,
+ void *userdata) {
+
+ return resolve_getaddrinfo_with_destroy_callback(resolve, ret_query, node, service, hints, callback, NULL, userdata);
+}
+
+static int getaddrinfo_done(sd_resolve_query* q) {
+ assert(q);
+ assert(q->done);
+ assert(q->getaddrinfo_handler);
+
+ errno = q->_errno;
+ h_errno = q->_h_errno;
+
+ return q->getaddrinfo_handler(q, q->ret, q->addrinfo, q->userdata);
+}
+
+int resolve_getnameinfo_with_destroy_callback(
+ sd_resolve *resolve,
+ sd_resolve_query **ret_query,
+ const struct sockaddr *sa, socklen_t salen,
+ int flags,
+ uint64_t get,
+ sd_resolve_getnameinfo_handler_t callback,
+ sd_resolve_destroy_t destroy_callback,
+ void *userdata) {
+
+ _cleanup_(sd_resolve_query_unrefp) sd_resolve_query *q = NULL;
+ NameInfoRequest req = {};
+ struct iovec iov[2];
+ struct msghdr mh;
+ int r;
+
+ assert_return(resolve, -EINVAL);
+ assert_return(sa, -EINVAL);
+ assert_return(salen >= sizeof(struct sockaddr), -EINVAL);
+ assert_return(salen <= sizeof(union sockaddr_union), -EINVAL);
+ assert_return((get & ~SD_RESOLVE_GET_BOTH) == 0, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ r = alloc_query(resolve, !ret_query, &q);
+ if (r < 0)
+ return r;
+
+ q->type = REQUEST_NAMEINFO;
+ q->getnameinfo_handler = callback;
+ q->userdata = userdata;
+
+ req = (NameInfoRequest) {
+ .header.id = q->id,
+ .header.type = REQUEST_NAMEINFO,
+ .header.length = sizeof(NameInfoRequest) + salen,
+
+ .flags = flags,
+ .sockaddr_len = salen,
+ .gethost = !!(get & SD_RESOLVE_GET_HOST),
+ .getserv = !!(get & SD_RESOLVE_GET_SERVICE),
+ };
+
+ msan_unpoison(&req, sizeof(req));
+
+ iov[0] = IOVEC_MAKE(&req, sizeof(NameInfoRequest));
+ iov[1] = IOVEC_MAKE((void*) sa, salen);
+
+ mh = (struct msghdr) {
+ .msg_iov = iov,
+ .msg_iovlen = ELEMENTSOF(iov)
+ };
+
+ if (sendmsg(resolve->fds[REQUEST_SEND_FD], &mh, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ resolve->n_outstanding++;
+ q->destroy_callback = destroy_callback;
+
+ if (ret_query)
+ *ret_query = q;
+
+ TAKE_PTR(q);
+
+ return 0;
+}
+
+_public_ int sd_resolve_getnameinfo(
+ sd_resolve *resolve,
+ sd_resolve_query **ret_query,
+ const struct sockaddr *sa, socklen_t salen,
+ int flags,
+ uint64_t get,
+ sd_resolve_getnameinfo_handler_t callback,
+ void *userdata) {
+
+ return resolve_getnameinfo_with_destroy_callback(resolve, ret_query, sa, salen, flags, get, callback, NULL, userdata);
+}
+
+static int getnameinfo_done(sd_resolve_query *q) {
+
+ assert(q);
+ assert(q->done);
+ assert(q->getnameinfo_handler);
+
+ errno = q->_errno;
+ h_errno = q->_h_errno;
+
+ return q->getnameinfo_handler(q, q->ret, q->host, q->serv, q->userdata);
+}
+
+static void resolve_freeaddrinfo(struct addrinfo *ai) {
+ while (ai) {
+ struct addrinfo *next = ai->ai_next;
+
+ free(ai->ai_addr);
+ free(ai->ai_canonname);
+ free(ai);
+ ai = next;
+ }
+}
+
+static void resolve_query_disconnect(sd_resolve_query *q) {
+ sd_resolve *resolve;
+ unsigned i;
+
+ assert(q);
+
+ if (!q->resolve)
+ return;
+
+ resolve = q->resolve;
+ assert(resolve->n_queries > 0);
+
+ if (q->done) {
+ assert(resolve->n_done > 0);
+ resolve->n_done--;
+ }
+
+ i = q->id % QUERIES_MAX;
+ assert(resolve->query_array[i] == q);
+ resolve->query_array[i] = NULL;
+ LIST_REMOVE(queries, resolve->queries, q);
+ resolve->n_queries--;
+
+ q->resolve = NULL;
+ if (!q->floating)
+ sd_resolve_unref(resolve);
+}
+
+static sd_resolve_query *resolve_query_free(sd_resolve_query *q) {
+ assert(q);
+
+ resolve_query_disconnect(q);
+
+ if (q->destroy_callback)
+ q->destroy_callback(q->userdata);
+
+ resolve_freeaddrinfo(q->addrinfo);
+ free(q->host);
+ free(q->serv);
+
+ return mfree(q);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_resolve_query, sd_resolve_query, resolve_query_free);
+
+_public_ int sd_resolve_query_is_done(sd_resolve_query *q) {
+ assert_return(q, -EINVAL);
+ assert_return(!resolve_pid_changed(q->resolve), -ECHILD);
+
+ return q->done;
+}
+
+_public_ void* sd_resolve_query_set_userdata(sd_resolve_query *q, void *userdata) {
+ void *ret;
+
+ assert_return(q, NULL);
+ assert_return(!resolve_pid_changed(q->resolve), NULL);
+
+ ret = q->userdata;
+ q->userdata = userdata;
+
+ return ret;
+}
+
+_public_ void* sd_resolve_query_get_userdata(sd_resolve_query *q) {
+ assert_return(q, NULL);
+ assert_return(!resolve_pid_changed(q->resolve), NULL);
+
+ return q->userdata;
+}
+
+_public_ sd_resolve *sd_resolve_query_get_resolve(sd_resolve_query *q) {
+ assert_return(q, NULL);
+ assert_return(!resolve_pid_changed(q->resolve), NULL);
+
+ return q->resolve;
+}
+
+_public_ int sd_resolve_query_get_destroy_callback(sd_resolve_query *q, sd_resolve_destroy_t *destroy_callback) {
+ assert_return(q, -EINVAL);
+
+ if (destroy_callback)
+ *destroy_callback = q->destroy_callback;
+
+ return !!q->destroy_callback;
+}
+
+_public_ int sd_resolve_query_set_destroy_callback(sd_resolve_query *q, sd_resolve_destroy_t destroy_callback) {
+ assert_return(q, -EINVAL);
+
+ q->destroy_callback = destroy_callback;
+ return 0;
+}
+
+_public_ int sd_resolve_query_get_floating(sd_resolve_query *q) {
+ assert_return(q, -EINVAL);
+
+ return q->floating;
+}
+
+_public_ int sd_resolve_query_set_floating(sd_resolve_query *q, int b) {
+ assert_return(q, -EINVAL);
+
+ if (q->floating == !!b)
+ return 0;
+
+ if (!q->resolve) /* Already disconnected */
+ return -ESTALE;
+
+ q->floating = b;
+
+ if (b) {
+ sd_resolve_query_ref(q);
+ sd_resolve_unref(q->resolve);
+ } else {
+ sd_resolve_ref(q->resolve);
+ sd_resolve_query_unref(q);
+ }
+
+ return 1;
+}
+
+static int io_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ sd_resolve *resolve = userdata;
+ int r;
+
+ assert(resolve);
+
+ r = sd_resolve_process(resolve);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+_public_ int sd_resolve_attach_event(sd_resolve *resolve, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(resolve, -EINVAL);
+ assert_return(!resolve->event, -EBUSY);
+
+ assert(!resolve->event_source);
+
+ if (event)
+ resolve->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&resolve->event);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_add_io(resolve->event, &resolve->event_source, resolve->fds[RESPONSE_RECV_FD], POLLIN, io_callback, resolve);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(resolve->event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ sd_resolve_detach_event(resolve);
+ return r;
+}
+
+_public_ int sd_resolve_detach_event(sd_resolve *resolve) {
+ assert_return(resolve, -EINVAL);
+
+ if (!resolve->event)
+ return 0;
+
+ if (resolve->event_source) {
+ sd_event_source_set_enabled(resolve->event_source, SD_EVENT_OFF);
+ resolve->event_source = sd_event_source_unref(resolve->event_source);
+ }
+
+ resolve->event = sd_event_unref(resolve->event);
+ return 1;
+}
+
+_public_ sd_event *sd_resolve_get_event(sd_resolve *resolve) {
+ assert_return(resolve, NULL);
+
+ return resolve->event;
+}
diff --git a/src/libsystemd/sd-resolve/test-resolve.c b/src/libsystemd/sd-resolve/test-resolve.c
new file mode 100644
index 0000000..b973dfd
--- /dev/null
+++ b/src/libsystemd/sd-resolve/test-resolve.c
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <resolv.h>
+#include <stdio.h>
+
+#include "sd-resolve.h"
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "time-util.h"
+
+#define TEST_TIMEOUT_USEC (20*USEC_PER_SEC)
+
+static int getaddrinfo_handler(sd_resolve_query *q, int ret, const struct addrinfo *ai, void *userdata) {
+ const struct addrinfo *i;
+
+ assert_se(q);
+
+ if (ret != 0) {
+ log_error("getaddrinfo error: %s %i", gai_strerror(ret), ret);
+ return 0;
+ }
+
+ for (i = ai; i; i = i->ai_next) {
+ _cleanup_free_ char *addr = NULL;
+
+ assert_se(sockaddr_pretty(i->ai_addr, i->ai_addrlen, false, true, &addr) == 0);
+ puts(addr);
+ }
+
+ printf("canonical name: %s\n", strna(ai->ai_canonname));
+
+ return 0;
+}
+
+static int getnameinfo_handler(sd_resolve_query *q, int ret, const char *host, const char *serv, void *userdata) {
+ assert_se(q);
+
+ if (ret != 0) {
+ log_error("getnameinfo error: %s %i", gai_strerror(ret), ret);
+ return 0;
+ }
+
+ printf("Host: %s — Serv: %s\n", strna(host), strna(serv));
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_resolve_query_unrefp) sd_resolve_query *q1 = NULL, *q2 = NULL;
+ _cleanup_(sd_resolve_unrefp) sd_resolve *resolve = NULL;
+ int r;
+
+ struct addrinfo hints = {
+ .ai_family = AF_UNSPEC,
+ .ai_socktype = SOCK_STREAM,
+ .ai_flags = AI_CANONNAME
+ };
+
+ struct sockaddr_in sa = {
+ .sin_family = AF_INET,
+ .sin_port = htobe16(80)
+ };
+
+ assert_se(sd_resolve_default(&resolve) >= 0);
+
+ /* Test a floating resolver query */
+ r = sd_resolve_getaddrinfo(resolve, NULL, "redhat.com", "http", NULL, getaddrinfo_handler, NULL);
+ if (r < 0)
+ log_error_errno(r, "sd_resolve_getaddrinfo(): %m");
+
+ /* Make a name -> address query */
+ r = sd_resolve_getaddrinfo(resolve, &q1, argc >= 2 ? argv[1] : "www.heise.de", NULL, &hints, getaddrinfo_handler, NULL);
+ if (r < 0)
+ log_error_errno(r, "sd_resolve_getaddrinfo(): %m");
+
+ /* Make an address -> name query */
+ sa.sin_addr.s_addr = inet_addr(argc >= 3 ? argv[2] : "193.99.144.71");
+ r = sd_resolve_getnameinfo(resolve, &q2, (struct sockaddr*) &sa, sizeof(sa), 0, SD_RESOLVE_GET_BOTH, getnameinfo_handler, NULL);
+ if (r < 0)
+ log_error_errno(r, "sd_resolve_getnameinfo(): %m");
+
+ /* Wait until all queries are completed */
+ for (;;) {
+ r = sd_resolve_wait(resolve, TEST_TIMEOUT_USEC);
+ if (r == 0)
+ break;
+ if (r == -ETIMEDOUT) {
+ /* Let's catch timeouts here, so that we can run safely in a CI that has no reliable DNS. Note
+ * that we invoke exit() directly here, as the stuck NSS call will not allow us to exit
+ * cleanly. */
+
+ log_notice_errno(r, "sd_resolve_wait() timed out, but that's OK");
+ exit(EXIT_SUCCESS);
+ }
+ if (r < 0) {
+ log_error_errno(r, "sd_resolve_wait(): %m");
+ assert_not_reached("sd_resolve_wait() failed");
+ }
+ }
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-utf8/sd-utf8.c b/src/libsystemd/sd-utf8/sd-utf8.c
new file mode 100644
index 0000000..82fa125
--- /dev/null
+++ b/src/libsystemd/sd-utf8/sd-utf8.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-utf8.h"
+
+#include "utf8.h"
+#include "util.h"
+
+_public_ const char *sd_utf8_is_valid(const char *s) {
+ assert_return(s, NULL);
+
+ return utf8_is_valid(s);
+}
+
+_public_ const char *sd_ascii_is_valid(const char *s) {
+ assert_return(s, NULL);
+
+ return ascii_is_valid(s);
+}
diff --git a/src/libudev/libudev-device-internal.h b/src/libudev/libudev-device-internal.h
new file mode 100644
index 0000000..437d431
--- /dev/null
+++ b/src/libudev/libudev-device-internal.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "libudev.h"
+#include "sd-device.h"
+
+struct udev_device;
+
+struct udev_device *udev_device_new(struct udev *udev, sd_device *device);
+sd_device *udev_device_get_sd_device(struct udev_device *udev_device);
diff --git a/src/libudev/libudev-device.c b/src/libudev/libudev-device.c
new file mode 100644
index 0000000..34543a8
--- /dev/null
+++ b/src/libudev/libudev-device.c
@@ -0,0 +1,905 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/sockios.h>
+#include <net/if.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "libudev.h"
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "libudev-device-internal.h"
+#include "libudev-list-internal.h"
+#include "parse-util.h"
+#include "time-util.h"
+
+/**
+ * SECTION:libudev-device
+ * @short_description: kernel sys devices
+ *
+ * Representation of kernel sys devices. Devices are uniquely identified
+ * by their syspath, every device has exactly one path in the kernel sys
+ * filesystem. Devices usually belong to a kernel subsystem, and have
+ * a unique name inside that subsystem.
+ */
+
+/**
+ * udev_device:
+ *
+ * Opaque object representing one kernel sys device.
+ */
+struct udev_device {
+ struct udev *udev;
+
+ /* real device object */
+ sd_device *device;
+
+ /* legacy */
+ unsigned n_ref;
+
+ struct udev_device *parent;
+ bool parent_set;
+
+ struct udev_list *properties;
+ uint64_t properties_generation;
+ struct udev_list *all_tags, *current_tags;
+ uint64_t all_tags_generation, current_tags_generation;
+ struct udev_list *devlinks;
+ uint64_t devlinks_generation;
+ bool properties_read:1;
+ bool all_tags_read:1;
+ bool current_tags_read:1;
+ bool devlinks_read:1;
+ struct udev_list *sysattrs;
+ bool sysattrs_read;
+};
+
+/**
+ * udev_device_get_seqnum:
+ * @udev_device: udev device
+ *
+ * This is only valid if the device was received through a monitor. Devices read from
+ * sys do not have a sequence number.
+ *
+ * Returns: the kernel event sequence number, or 0 if there is no sequence number available.
+ **/
+_public_ unsigned long long udev_device_get_seqnum(struct udev_device *udev_device) {
+ uint64_t seqnum;
+
+ assert_return_errno(udev_device, 0, EINVAL);
+
+ if (device_get_seqnum(udev_device->device, &seqnum) < 0)
+ return 0;
+
+ return seqnum;
+}
+
+/**
+ * udev_device_get_devnum:
+ * @udev_device: udev device
+ *
+ * Get the device major/minor number.
+ *
+ * Returns: the dev_t number.
+ **/
+_public_ dev_t udev_device_get_devnum(struct udev_device *udev_device) {
+ dev_t devnum;
+ int r;
+
+ assert_return_errno(udev_device, makedev(0, 0), EINVAL);
+
+ r = sd_device_get_devnum(udev_device->device, &devnum);
+ if (r == -ENOENT)
+ return makedev(0, 0);
+ if (r < 0)
+ return_with_errno(makedev(0, 0), r);
+
+ return devnum;
+}
+
+/**
+ * udev_device_get_driver:
+ * @udev_device: udev device
+ *
+ * Get the kernel driver name.
+ *
+ * Returns: the driver name string, or #NULL if there is no driver attached.
+ **/
+_public_ const char *udev_device_get_driver(struct udev_device *udev_device) {
+ const char *driver;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ r = sd_device_get_driver(udev_device->device, &driver);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return driver;
+}
+
+/**
+ * udev_device_get_devtype:
+ * @udev_device: udev device
+ *
+ * Retrieve the devtype string of the udev device.
+ *
+ * Returns: the devtype name of the udev device, or #NULL if it cannot be determined
+ **/
+_public_ const char *udev_device_get_devtype(struct udev_device *udev_device) {
+ const char *devtype;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ r = sd_device_get_devtype(udev_device->device, &devtype);
+ if (r == -ENOENT)
+ return NULL;
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return devtype;
+}
+
+/**
+ * udev_device_get_subsystem:
+ * @udev_device: udev device
+ *
+ * Retrieve the subsystem string of the udev device. The string does not
+ * contain any "/".
+ *
+ * Returns: the subsystem name of the udev device, or #NULL if it cannot be determined
+ **/
+_public_ const char *udev_device_get_subsystem(struct udev_device *udev_device) {
+ const char *subsystem;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ r = sd_device_get_subsystem(udev_device->device, &subsystem);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return subsystem;
+}
+
+/**
+ * udev_device_get_property_value:
+ * @udev_device: udev device
+ * @key: property name
+ *
+ * Get the value of a given property.
+ *
+ * Returns: the property string, or #NULL if there is no such property.
+ **/
+_public_ const char *udev_device_get_property_value(struct udev_device *udev_device, const char *key) {
+ const char *value;
+ int r;
+
+ assert_return_errno(udev_device && key, NULL, EINVAL);
+
+ r = sd_device_get_property_value(udev_device->device, key, &value);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return value;
+}
+
+struct udev_device *udev_device_new(struct udev *udev, sd_device *device) {
+ _cleanup_(udev_list_freep) struct udev_list *properties = NULL, *all_tags = NULL, *current_tags = NULL, *sysattrs = NULL, *devlinks = NULL;
+ struct udev_device *udev_device;
+
+ assert(device);
+
+ properties = udev_list_new(true);
+ if (!properties)
+ return_with_errno(NULL, ENOMEM);
+ all_tags = udev_list_new(true);
+ if (!all_tags)
+ return_with_errno(NULL, ENOMEM);
+ current_tags = udev_list_new(true);
+ if (!current_tags)
+ return_with_errno(NULL, ENOMEM);
+ sysattrs = udev_list_new(true);
+ if (!sysattrs)
+ return_with_errno(NULL, ENOMEM);
+ devlinks = udev_list_new(true);
+ if (!devlinks)
+ return_with_errno(NULL, ENOMEM);
+
+ udev_device = new(struct udev_device, 1);
+ if (!udev_device)
+ return_with_errno(NULL, ENOMEM);
+
+ *udev_device = (struct udev_device) {
+ .n_ref = 1,
+ .udev = udev,
+ .device = sd_device_ref(device),
+ .properties = TAKE_PTR(properties),
+ .all_tags = TAKE_PTR(all_tags),
+ .current_tags = TAKE_PTR(current_tags),
+ .sysattrs = TAKE_PTR(sysattrs),
+ .devlinks = TAKE_PTR(devlinks),
+ };
+
+ return udev_device;
+}
+
+/**
+ * udev_device_new_from_syspath:
+ * @udev: udev library context
+ * @syspath: sys device path including sys directory
+ *
+ * Create new udev device, and fill in information from the sys
+ * device and the udev database entry. The syspath is the absolute
+ * path to the device, including the sys mount point.
+ *
+ * The initial refcount is 1, and needs to be decremented to
+ * release the resources of the udev device.
+ *
+ * Returns: a new udev device, or #NULL, if it does not exist
+ **/
+_public_ struct udev_device *udev_device_new_from_syspath(struct udev *udev, const char *syspath) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ r = sd_device_new_from_syspath(&device, syspath);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return udev_device_new(udev, device);
+}
+
+/**
+ * udev_device_new_from_devnum:
+ * @udev: udev library context
+ * @type: char or block device
+ * @devnum: device major/minor number
+ *
+ * Create new udev device, and fill in information from the sys
+ * device and the udev database entry. The device is looked-up
+ * by its major/minor number and type. Character and block device
+ * numbers are not unique across the two types.
+ *
+ * The initial refcount is 1, and needs to be decremented to
+ * release the resources of the udev device.
+ *
+ * Returns: a new udev device, or #NULL, if it does not exist
+ **/
+_public_ struct udev_device *udev_device_new_from_devnum(struct udev *udev, char type, dev_t devnum) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ r = sd_device_new_from_devnum(&device, type, devnum);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return udev_device_new(udev, device);
+}
+
+/**
+ * udev_device_new_from_device_id:
+ * @udev: udev library context
+ * @id: text string identifying a kernel device
+ *
+ * Create new udev device, and fill in information from the sys
+ * device and the udev database entry. The device is looked-up
+ * by a special string:
+ * b8:2 - block device major:minor
+ * c128:1 - char device major:minor
+ * n3 - network device ifindex
+ * +sound:card29 - kernel driver core subsystem:device name
+ *
+ * The initial refcount is 1, and needs to be decremented to
+ * release the resources of the udev device.
+ *
+ * Returns: a new udev device, or #NULL, if it does not exist
+ **/
+_public_ struct udev_device *udev_device_new_from_device_id(struct udev *udev, const char *id) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ r = sd_device_new_from_device_id(&device, id);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return udev_device_new(udev, device);
+}
+
+/**
+ * udev_device_new_from_subsystem_sysname:
+ * @udev: udev library context
+ * @subsystem: the subsystem of the device
+ * @sysname: the name of the device
+ *
+ * Create new udev device, and fill in information from the sys device
+ * and the udev database entry. The device is looked up by the subsystem
+ * and name string of the device, like "mem" / "zero", or "block" / "sda".
+ *
+ * The initial refcount is 1, and needs to be decremented to
+ * release the resources of the udev device.
+ *
+ * Returns: a new udev device, or #NULL, if it does not exist
+ **/
+_public_ struct udev_device *udev_device_new_from_subsystem_sysname(struct udev *udev, const char *subsystem, const char *sysname) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ r = sd_device_new_from_subsystem_sysname(&device, subsystem, sysname);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return udev_device_new(udev, device);
+}
+
+/**
+ * udev_device_new_from_environment
+ * @udev: udev library context
+ *
+ * Create new udev device, and fill in information from the
+ * current process environment. This only works reliable if
+ * the process is called from a udev rule. It is usually used
+ * for tools executed from IMPORT= rules.
+ *
+ * The initial refcount is 1, and needs to be decremented to
+ * release the resources of the udev device.
+ *
+ * Returns: a new udev device, or #NULL, if it does not exist
+ **/
+_public_ struct udev_device *udev_device_new_from_environment(struct udev *udev) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ r = device_new_from_strv(&device, environ);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return udev_device_new(udev, device);
+}
+
+static struct udev_device *device_new_from_parent(struct udev_device *child) {
+ sd_device *parent;
+ int r;
+
+ assert_return_errno(child, NULL, EINVAL);
+
+ r = sd_device_get_parent(child->device, &parent);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return udev_device_new(child->udev, parent);
+}
+
+/**
+ * udev_device_get_parent:
+ * @udev_device: the device to start searching from
+ *
+ * Find the next parent device, and fill in information from the sys
+ * device and the udev database entry.
+ *
+ * Returned device is not referenced. It is attached to the child
+ * device, and will be cleaned up when the child device is cleaned up.
+ *
+ * It is not necessarily just the upper level directory, empty or not
+ * recognized sys directories are ignored.
+ *
+ * It can be called as many times as needed, without caring about
+ * references.
+ *
+ * Returns: a new udev device, or #NULL, if it no parent exist.
+ **/
+_public_ struct udev_device *udev_device_get_parent(struct udev_device *udev_device) {
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ if (!udev_device->parent_set) {
+ udev_device->parent_set = true;
+ udev_device->parent = device_new_from_parent(udev_device);
+ }
+
+ /* TODO: errno will differ here in case parent == NULL */
+ return udev_device->parent;
+}
+
+/**
+ * udev_device_get_parent_with_subsystem_devtype:
+ * @udev_device: udev device to start searching from
+ * @subsystem: the subsystem of the device
+ * @devtype: the type (DEVTYPE) of the device
+ *
+ * Find the next parent device, with a matching subsystem and devtype
+ * value, and fill in information from the sys device and the udev
+ * database entry.
+ *
+ * If devtype is #NULL, only subsystem is checked, and any devtype will
+ * match.
+ *
+ * Returned device is not referenced. It is attached to the child
+ * device, and will be cleaned up when the child device is cleaned up.
+ *
+ * It can be called as many times as needed, without caring about
+ * references.
+ *
+ * Returns: a new udev device, or #NULL if no matching parent exists.
+ **/
+_public_ struct udev_device *udev_device_get_parent_with_subsystem_devtype(struct udev_device *udev_device, const char *subsystem, const char *devtype) {
+ sd_device *parent;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ /* this relies on the fact that finding the subdevice of a parent or the
+ parent of a subdevice commute */
+
+ /* first find the correct sd_device */
+ r = sd_device_get_parent_with_subsystem_devtype(udev_device->device, subsystem, devtype, &parent);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ /* then walk the chain of udev_device parents until the corresponding
+ one is found */
+ while ((udev_device = udev_device_get_parent(udev_device)))
+ if (udev_device->device == parent)
+ return udev_device;
+
+ return_with_errno(NULL, ENOENT);
+}
+
+/**
+ * udev_device_get_udev:
+ * @udev_device: udev device
+ *
+ * Retrieve the udev library context the device was created with.
+ *
+ * Returns: the udev library context
+ **/
+_public_ struct udev *udev_device_get_udev(struct udev_device *udev_device) {
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ return udev_device->udev;
+}
+
+static struct udev_device *udev_device_free(struct udev_device *udev_device) {
+ assert(udev_device);
+
+ sd_device_unref(udev_device->device);
+ udev_device_unref(udev_device->parent);
+
+ udev_list_free(udev_device->properties);
+ udev_list_free(udev_device->sysattrs);
+ udev_list_free(udev_device->all_tags);
+ udev_list_free(udev_device->current_tags);
+ udev_list_free(udev_device->devlinks);
+
+ return mfree(udev_device);
+}
+
+/**
+ * udev_device_ref:
+ * @udev_device: udev device
+ *
+ * Take a reference of a udev device.
+ *
+ * Returns: the passed udev device
+ **/
+
+/**
+ * udev_device_unref:
+ * @udev_device: udev device
+ *
+ * Drop a reference of a udev device. If the refcount reaches zero,
+ * the resources of the device will be released.
+ *
+ * Returns: #NULL
+ **/
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(struct udev_device, udev_device, udev_device_free);
+
+/**
+ * udev_device_get_devpath:
+ * @udev_device: udev device
+ *
+ * Retrieve the kernel devpath value of the udev device. The path
+ * does not contain the sys mount point, and starts with a '/'.
+ *
+ * Returns: the devpath of the udev device
+ **/
+_public_ const char *udev_device_get_devpath(struct udev_device *udev_device) {
+ const char *devpath;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ r = sd_device_get_devpath(udev_device->device, &devpath);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return devpath;
+}
+
+/**
+ * udev_device_get_syspath:
+ * @udev_device: udev device
+ *
+ * Retrieve the sys path of the udev device. The path is an
+ * absolute path and starts with the sys mount point.
+ *
+ * Returns: the sys path of the udev device
+ **/
+_public_ const char *udev_device_get_syspath(struct udev_device *udev_device) {
+ const char *syspath;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ r = sd_device_get_syspath(udev_device->device, &syspath);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return syspath;
+}
+
+/**
+ * udev_device_get_sysname:
+ * @udev_device: udev device
+ *
+ * Get the kernel device name in /sys.
+ *
+ * Returns: the name string of the device
+ **/
+_public_ const char *udev_device_get_sysname(struct udev_device *udev_device) {
+ const char *sysname;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ r = sd_device_get_sysname(udev_device->device, &sysname);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return sysname;
+}
+
+/**
+ * udev_device_get_sysnum:
+ * @udev_device: udev device
+ *
+ * Get the instance number of the device.
+ *
+ * Returns: the trailing number string of the device name
+ **/
+_public_ const char *udev_device_get_sysnum(struct udev_device *udev_device) {
+ const char *sysnum;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ r = sd_device_get_sysnum(udev_device->device, &sysnum);
+ if (r == -ENOENT)
+ return NULL;
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return sysnum;
+}
+
+/**
+ * udev_device_get_devnode:
+ * @udev_device: udev device
+ *
+ * Retrieve the device node file name belonging to the udev device.
+ * The path is an absolute path, and starts with the device directory.
+ *
+ * Returns: the device node file name of the udev device, or #NULL if no device node exists
+ **/
+_public_ const char *udev_device_get_devnode(struct udev_device *udev_device) {
+ const char *devnode;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ r = sd_device_get_devname(udev_device->device, &devnode);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return devnode;
+}
+
+/**
+ * udev_device_get_devlinks_list_entry:
+ * @udev_device: udev device
+ *
+ * Retrieve the list of device links pointing to the device file of
+ * the udev device. The next list entry can be retrieved with
+ * udev_list_entry_get_next(), which returns #NULL if no more entries exist.
+ * The devlink path can be retrieved from the list entry by
+ * udev_list_entry_get_name(). The path is an absolute path, and starts with
+ * the device directory.
+ *
+ * Returns: the first entry of the device node link list
+ **/
+_public_ struct udev_list_entry *udev_device_get_devlinks_list_entry(struct udev_device *udev_device) {
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ if (device_get_devlinks_generation(udev_device->device) != udev_device->devlinks_generation ||
+ !udev_device->devlinks_read) {
+ const char *devlink;
+
+ udev_list_cleanup(udev_device->devlinks);
+
+ FOREACH_DEVICE_DEVLINK(udev_device->device, devlink)
+ if (!udev_list_entry_add(udev_device->devlinks, devlink, NULL))
+ return_with_errno(NULL, ENOMEM);
+
+ udev_device->devlinks_read = true;
+ udev_device->devlinks_generation = device_get_devlinks_generation(udev_device->device);
+ }
+
+ return udev_list_get_entry(udev_device->devlinks);
+}
+
+/**
+ * udev_device_get_event_properties_entry:
+ * @udev_device: udev device
+ *
+ * Retrieve the list of key/value device properties of the udev
+ * device. The next list entry can be retrieved with udev_list_entry_get_next(),
+ * which returns #NULL if no more entries exist. The property name
+ * can be retrieved from the list entry by udev_list_entry_get_name(),
+ * the property value by udev_list_entry_get_value().
+ *
+ * Returns: the first entry of the property list
+ **/
+_public_ struct udev_list_entry *udev_device_get_properties_list_entry(struct udev_device *udev_device) {
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ if (device_get_properties_generation(udev_device->device) != udev_device->properties_generation ||
+ !udev_device->properties_read) {
+ const char *key, *value;
+
+ udev_list_cleanup(udev_device->properties);
+
+ FOREACH_DEVICE_PROPERTY(udev_device->device, key, value)
+ if (!udev_list_entry_add(udev_device->properties, key, value))
+ return_with_errno(NULL, ENOMEM);
+
+ udev_device->properties_read = true;
+ udev_device->properties_generation = device_get_properties_generation(udev_device->device);
+ }
+
+ return udev_list_get_entry(udev_device->properties);
+}
+
+/**
+ * udev_device_get_action:
+ * @udev_device: udev device
+ *
+ * This is only valid if the device was received through a monitor. Devices read from
+ * sys do not have an action string. Usual actions are: add, remove, change, move,
+ * online, offline.
+ *
+ * Returns: the kernel action value, or #NULL if there is no action value available.
+ **/
+_public_ const char *udev_device_get_action(struct udev_device *udev_device) {
+ DeviceAction action;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ if (device_get_action(udev_device->device, &action) < 0)
+ return NULL;
+
+ return device_action_to_string(action);
+}
+
+/**
+ * udev_device_get_usec_since_initialized:
+ * @udev_device: udev device
+ *
+ * Return the number of microseconds passed since udev set up the
+ * device for the first time.
+ *
+ * This is only implemented for devices with need to store properties
+ * in the udev database. All other devices return 0 here.
+ *
+ * Returns: the number of microseconds since the device was first seen.
+ **/
+_public_ unsigned long long int udev_device_get_usec_since_initialized(struct udev_device *udev_device) {
+ usec_t ts;
+ int r;
+
+ assert_return(udev_device, -EINVAL);
+
+ r = sd_device_get_usec_since_initialized(udev_device->device, &ts);
+ if (r < 0)
+ return_with_errno(0, r);
+
+ return ts;
+}
+
+/**
+ * udev_device_get_sysattr_value:
+ * @udev_device: udev device
+ * @sysattr: attribute name
+ *
+ * The retrieved value is cached in the device. Repeated calls will return the same
+ * value and not open the attribute again.
+ *
+ * Returns: the content of a sys attribute file, or #NULL if there is no sys attribute value.
+ **/
+_public_ const char *udev_device_get_sysattr_value(struct udev_device *udev_device, const char *sysattr) {
+ const char *value;
+ int r;
+
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ r = sd_device_get_sysattr_value(udev_device->device, sysattr, &value);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return value;
+}
+
+/**
+ * udev_device_set_sysattr_value:
+ * @udev_device: udev device
+ * @sysattr: attribute name
+ * @value: new value to be set
+ *
+ * Update the contents of the sys attribute and the cached value of the device.
+ *
+ * Returns: Negative error code on failure or 0 on success.
+ **/
+_public_ int udev_device_set_sysattr_value(struct udev_device *udev_device, const char *sysattr, const char *value) {
+ int r;
+
+ assert_return(udev_device, -EINVAL);
+
+ r = sd_device_set_sysattr_value(udev_device->device, sysattr, value);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+/**
+ * udev_device_get_sysattr_list_entry:
+ * @udev_device: udev device
+ *
+ * Retrieve the list of available sysattrs, with value being empty;
+ * This just return all available sysfs attributes for a particular
+ * device without reading their values.
+ *
+ * Returns: the first entry of the property list
+ **/
+_public_ struct udev_list_entry *udev_device_get_sysattr_list_entry(struct udev_device *udev_device) {
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ if (!udev_device->sysattrs_read) {
+ const char *sysattr;
+
+ udev_list_cleanup(udev_device->sysattrs);
+
+ FOREACH_DEVICE_SYSATTR(udev_device->device, sysattr)
+ if (!udev_list_entry_add(udev_device->sysattrs, sysattr, NULL))
+ return_with_errno(NULL, ENOMEM);
+
+ udev_device->sysattrs_read = true;
+ }
+
+ return udev_list_get_entry(udev_device->sysattrs);
+}
+
+/**
+ * udev_device_get_is_initialized:
+ * @udev_device: udev device
+ *
+ * Check if udev has already handled the device and has set up
+ * device node permissions and context, or has renamed a network
+ * device.
+ *
+ * This is only implemented for devices with a device node
+ * or network interfaces. All other devices return 1 here.
+ *
+ * Returns: 1 if the device is set up. 0 otherwise.
+ **/
+_public_ int udev_device_get_is_initialized(struct udev_device *udev_device) {
+ int r;
+
+ assert_return(udev_device, -EINVAL);
+
+ r = sd_device_get_is_initialized(udev_device->device);
+ if (r < 0)
+ return_with_errno(0, r);
+
+ return r;
+}
+
+/**
+ * udev_device_get_tags_list_entry:
+ * @udev_device: udev device
+ *
+ * Retrieve the list of tags attached to the udev device. The next
+ * list entry can be retrieved with udev_list_entry_get_next(),
+ * which returns #NULL if no more entries exist. The tag string
+ * can be retrieved from the list entry by udev_list_entry_get_name().
+ *
+ * Returns: the first entry of the tag list
+ **/
+_public_ struct udev_list_entry *udev_device_get_tags_list_entry(struct udev_device *udev_device) {
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ if (device_get_tags_generation(udev_device->device) != udev_device->all_tags_generation ||
+ !udev_device->all_tags_read) {
+ const char *tag;
+
+ udev_list_cleanup(udev_device->all_tags);
+
+ FOREACH_DEVICE_TAG(udev_device->device, tag)
+ if (!udev_list_entry_add(udev_device->all_tags, tag, NULL))
+ return_with_errno(NULL, ENOMEM);
+
+ udev_device->all_tags_read = true;
+ udev_device->all_tags_generation = device_get_tags_generation(udev_device->device);
+ }
+
+ return udev_list_get_entry(udev_device->all_tags);
+}
+
+_public_ struct udev_list_entry *udev_device_get_current_tags_list_entry(struct udev_device *udev_device) {
+ assert_return_errno(udev_device, NULL, EINVAL);
+
+ if (device_get_tags_generation(udev_device->device) != udev_device->current_tags_generation ||
+ !udev_device->current_tags_read) {
+ const char *tag;
+
+ udev_list_cleanup(udev_device->current_tags);
+
+ FOREACH_DEVICE_CURRENT_TAG(udev_device->device, tag)
+ if (!udev_list_entry_add(udev_device->current_tags, tag, NULL))
+ return_with_errno(NULL, ENOMEM);
+
+ udev_device->current_tags_read = true;
+ udev_device->current_tags_generation = device_get_tags_generation(udev_device->device);
+ }
+
+ return udev_list_get_entry(udev_device->current_tags);
+}
+
+/**
+ * udev_device_has_tag:
+ * @udev_device: udev device
+ * @tag: tag name
+ *
+ * Check if a given device has a certain tag associated.
+ *
+ * Returns: 1 if the tag is found. 0 otherwise.
+ **/
+_public_ int udev_device_has_tag(struct udev_device *udev_device, const char *tag) {
+ assert_return(udev_device, 0);
+
+ return sd_device_has_tag(udev_device->device, tag) > 0;
+}
+
+_public_ int udev_device_has_current_tag(struct udev_device *udev_device, const char *tag) {
+ assert_return(udev_device, 0);
+
+ return sd_device_has_current_tag(udev_device->device, tag) > 0;
+}
+
+sd_device *udev_device_get_sd_device(struct udev_device *udev_device) {
+ assert(udev_device);
+
+ return udev_device->device;
+}
diff --git a/src/libudev/libudev-enumerate.c b/src/libudev/libudev-enumerate.c
new file mode 100644
index 0000000..33bd360
--- /dev/null
+++ b/src/libudev/libudev-enumerate.c
@@ -0,0 +1,458 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fnmatch.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#include "libudev.h"
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-enumerator-private.h"
+#include "device-util.h"
+#include "libudev-device-internal.h"
+#include "libudev-list-internal.h"
+
+/**
+ * SECTION:libudev-enumerate
+ * @short_description: lookup and sort sys devices
+ *
+ * Lookup devices in the sys filesystem, filter devices by properties,
+ * and return a sorted list of devices.
+ */
+
+/**
+ * udev_enumerate:
+ *
+ * Opaque object representing one device lookup/sort context.
+ */
+struct udev_enumerate {
+ struct udev *udev;
+ unsigned n_ref;
+ struct udev_list *devices_list;
+ bool devices_uptodate:1;
+
+ sd_device_enumerator *enumerator;
+};
+
+/**
+ * udev_enumerate_new:
+ * @udev: udev library context
+ *
+ * Create an enumeration context to scan /sys.
+ *
+ * Returns: an enumeration context.
+ **/
+_public_ struct udev_enumerate *udev_enumerate_new(struct udev *udev) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ _cleanup_(udev_list_freep) struct udev_list *list = NULL;
+ struct udev_enumerate *udev_enumerate;
+ int r;
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ list = udev_list_new(false);
+ if (!list)
+ return_with_errno(NULL, ENOMEM);
+
+ udev_enumerate = new(struct udev_enumerate, 1);
+ if (!udev_enumerate)
+ return_with_errno(NULL, ENOMEM);
+
+ *udev_enumerate = (struct udev_enumerate) {
+ .udev = udev,
+ .n_ref = 1,
+ .enumerator = TAKE_PTR(e),
+ .devices_list = TAKE_PTR(list),
+ };
+
+ return udev_enumerate;
+}
+
+static struct udev_enumerate *udev_enumerate_free(struct udev_enumerate *udev_enumerate) {
+ assert(udev_enumerate);
+
+ udev_list_free(udev_enumerate->devices_list);
+ sd_device_enumerator_unref(udev_enumerate->enumerator);
+ return mfree(udev_enumerate);
+}
+
+/**
+ * udev_enumerate_ref:
+ * @udev_enumerate: context
+ *
+ * Take a reference of a enumeration context.
+ *
+ * Returns: the passed enumeration context
+ **/
+
+/**
+ * udev_enumerate_unref:
+ * @udev_enumerate: context
+ *
+ * Drop a reference of an enumeration context. If the refcount reaches zero,
+ * all resources of the enumeration context will be released.
+ *
+ * Returns: #NULL
+ **/
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(struct udev_enumerate, udev_enumerate, udev_enumerate_free);
+
+/**
+ * udev_enumerate_get_udev:
+ * @udev_enumerate: context
+ *
+ * Get the udev library context.
+ *
+ * Returns: a pointer to the context.
+ */
+_public_ struct udev *udev_enumerate_get_udev(struct udev_enumerate *udev_enumerate) {
+ assert_return_errno(udev_enumerate, NULL, EINVAL);
+
+ return udev_enumerate->udev;
+}
+
+/**
+ * udev_enumerate_get_list_entry:
+ * @udev_enumerate: context
+ *
+ * Get the first entry of the sorted list of device paths.
+ *
+ * Returns: a udev_list_entry.
+ */
+_public_ struct udev_list_entry *udev_enumerate_get_list_entry(struct udev_enumerate *udev_enumerate) {
+ struct udev_list_entry *e;
+
+ assert_return_errno(udev_enumerate, NULL, EINVAL);
+
+ if (!udev_enumerate->devices_uptodate) {
+ sd_device *device;
+
+ udev_list_cleanup(udev_enumerate->devices_list);
+
+ FOREACH_DEVICE_AND_SUBSYSTEM(udev_enumerate->enumerator, device) {
+ const char *syspath;
+ int r;
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ if (!udev_list_entry_add(udev_enumerate->devices_list, syspath, NULL))
+ return_with_errno(NULL, ENOMEM);
+ }
+
+ udev_enumerate->devices_uptodate = true;
+ }
+
+ e = udev_list_get_entry(udev_enumerate->devices_list);
+ if (!e)
+ return_with_errno(NULL, ENODATA);
+
+ return e;
+}
+
+/**
+ * udev_enumerate_add_match_subsystem:
+ * @udev_enumerate: context
+ * @subsystem: filter for a subsystem of the device to include in the list
+ *
+ * Match only devices belonging to a certain kernel subsystem.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_match_subsystem(struct udev_enumerate *udev_enumerate, const char *subsystem) {
+ int r;
+
+ assert_return(udev_enumerate, -EINVAL);
+
+ if (!subsystem)
+ return 0;
+
+ r = sd_device_enumerator_add_match_subsystem(udev_enumerate->enumerator, subsystem, true);
+ if (r < 0)
+ return r;
+
+ udev_enumerate->devices_uptodate = false;
+ return 0;
+}
+
+/**
+ * udev_enumerate_add_nomatch_subsystem:
+ * @udev_enumerate: context
+ * @subsystem: filter for a subsystem of the device to exclude from the list
+ *
+ * Match only devices not belonging to a certain kernel subsystem.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_nomatch_subsystem(struct udev_enumerate *udev_enumerate, const char *subsystem) {
+ int r;
+
+ assert_return(udev_enumerate, -EINVAL);
+
+ if (!subsystem)
+ return 0;
+
+ r = sd_device_enumerator_add_match_subsystem(udev_enumerate->enumerator, subsystem, false);
+ if (r < 0)
+ return r;
+
+ udev_enumerate->devices_uptodate = false;
+ return 0;
+}
+
+/**
+ * udev_enumerate_add_match_sysattr:
+ * @udev_enumerate: context
+ * @sysattr: filter for a sys attribute at the device to include in the list
+ * @value: optional value of the sys attribute
+ *
+ * Match only devices with a certain /sys device attribute.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_match_sysattr(struct udev_enumerate *udev_enumerate, const char *sysattr, const char *value) {
+ int r;
+
+ assert_return(udev_enumerate, -EINVAL);
+
+ if (!sysattr)
+ return 0;
+
+ r = sd_device_enumerator_add_match_sysattr(udev_enumerate->enumerator, sysattr, value, true);
+ if (r < 0)
+ return r;
+
+ udev_enumerate->devices_uptodate = false;
+ return 0;
+}
+
+/**
+ * udev_enumerate_add_nomatch_sysattr:
+ * @udev_enumerate: context
+ * @sysattr: filter for a sys attribute at the device to exclude from the list
+ * @value: optional value of the sys attribute
+ *
+ * Match only devices not having a certain /sys device attribute.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_nomatch_sysattr(struct udev_enumerate *udev_enumerate, const char *sysattr, const char *value) {
+ int r;
+
+ assert_return(udev_enumerate, -EINVAL);
+
+ if (!sysattr)
+ return 0;
+
+ r = sd_device_enumerator_add_match_sysattr(udev_enumerate->enumerator, sysattr, value, false);
+ if (r < 0)
+ return r;
+
+ udev_enumerate->devices_uptodate = false;
+ return 0;
+}
+
+/**
+ * udev_enumerate_add_match_property:
+ * @udev_enumerate: context
+ * @property: filter for a property of the device to include in the list
+ * @value: value of the property
+ *
+ * Match only devices with a certain property.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_match_property(struct udev_enumerate *udev_enumerate, const char *property, const char *value) {
+ int r;
+
+ assert_return(udev_enumerate, -EINVAL);
+
+ if (!property)
+ return 0;
+
+ r = sd_device_enumerator_add_match_property(udev_enumerate->enumerator, property, value);
+ if (r < 0)
+ return r;
+
+ udev_enumerate->devices_uptodate = false;
+ return 0;
+}
+
+/**
+ * udev_enumerate_add_match_tag:
+ * @udev_enumerate: context
+ * @tag: filter for a tag of the device to include in the list
+ *
+ * Match only devices with a certain tag.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_match_tag(struct udev_enumerate *udev_enumerate, const char *tag) {
+ int r;
+
+ assert_return(udev_enumerate, -EINVAL);
+
+ if (!tag)
+ return 0;
+
+ r = sd_device_enumerator_add_match_tag(udev_enumerate->enumerator, tag);
+ if (r < 0)
+ return r;
+
+ udev_enumerate->devices_uptodate = false;
+ return 0;
+}
+
+/**
+ * udev_enumerate_add_match_parent:
+ * @udev_enumerate: context
+ * @parent: parent device where to start searching
+ *
+ * Return the devices on the subtree of one given device. The parent
+ * itself is included in the list.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_match_parent(struct udev_enumerate *udev_enumerate, struct udev_device *parent) {
+ int r;
+
+ assert_return(udev_enumerate, -EINVAL);
+
+ if (!parent)
+ return 0;
+
+ r = sd_device_enumerator_add_match_parent(udev_enumerate->enumerator, udev_device_get_sd_device(parent));
+ if (r < 0)
+ return r;
+
+ udev_enumerate->devices_uptodate = false;
+ return 0;
+}
+
+/**
+ * udev_enumerate_add_match_is_initialized:
+ * @udev_enumerate: context
+ *
+ * Match only devices which udev has set up already. This makes
+ * sure, that the device node permissions and context are properly set
+ * and that network devices are fully renamed.
+ *
+ * Usually, devices which are found in the kernel but not already
+ * handled by udev, have still pending events. Services should subscribe
+ * to monitor events and wait for these devices to become ready, instead
+ * of using uninitialized devices.
+ *
+ * For now, this will not affect devices which do not have a device node
+ * and are not network interfaces.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_match_is_initialized(struct udev_enumerate *udev_enumerate) {
+ int r;
+
+ assert_return(udev_enumerate, -EINVAL);
+
+ r = device_enumerator_add_match_is_initialized(udev_enumerate->enumerator);
+ if (r < 0)
+ return r;
+
+ udev_enumerate->devices_uptodate = false;
+ return 0;
+}
+
+/**
+ * udev_enumerate_add_match_sysname:
+ * @udev_enumerate: context
+ * @sysname: filter for the name of the device to include in the list
+ *
+ * Match only devices with a given /sys device name.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_match_sysname(struct udev_enumerate *udev_enumerate, const char *sysname) {
+ int r;
+
+ assert_return(udev_enumerate, -EINVAL);
+
+ if (!sysname)
+ return 0;
+
+ r = sd_device_enumerator_add_match_sysname(udev_enumerate->enumerator, sysname);
+ if (r < 0)
+ return r;
+
+ udev_enumerate->devices_uptodate = false;
+ return 0;
+}
+
+/**
+ * udev_enumerate_add_syspath:
+ * @udev_enumerate: context
+ * @syspath: path of a device
+ *
+ * Add a device to the list of devices, to retrieve it back sorted in dependency order.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_enumerate_add_syspath(struct udev_enumerate *udev_enumerate, const char *syspath) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ assert_return(udev_enumerate, -EINVAL);
+
+ if (!syspath)
+ return 0;
+
+ r = sd_device_new_from_syspath(&device, syspath);
+ if (r < 0)
+ return r;
+
+ r = device_enumerator_add_device(udev_enumerate->enumerator, device);
+ if (r < 0)
+ return r;
+
+ udev_enumerate->devices_uptodate = false;
+ return 0;
+}
+
+/**
+ * udev_enumerate_scan_devices:
+ * @udev_enumerate: udev enumeration context
+ *
+ * Scan /sys for all devices which match the given filters. No matches
+ * will return all currently available devices.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ **/
+_public_ int udev_enumerate_scan_devices(struct udev_enumerate *udev_enumerate) {
+ assert_return(udev_enumerate, -EINVAL);
+
+ return device_enumerator_scan_devices(udev_enumerate->enumerator);
+}
+
+/**
+ * udev_enumerate_scan_subsystems:
+ * @udev_enumerate: udev enumeration context
+ *
+ * Scan /sys for all kernel subsystems, including buses, classes, drivers.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ **/
+_public_ int udev_enumerate_scan_subsystems(struct udev_enumerate *udev_enumerate) {
+ assert_return(udev_enumerate, -EINVAL);
+
+ return device_enumerator_scan_subsystems(udev_enumerate->enumerator);
+}
diff --git a/src/libudev/libudev-hwdb.c b/src/libudev/libudev-hwdb.c
new file mode 100644
index 0000000..8e9ea97
--- /dev/null
+++ b/src/libudev/libudev-hwdb.c
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "sd-hwdb.h"
+
+#include "alloc-util.h"
+#include "hwdb-util.h"
+#include "libudev-list-internal.h"
+
+/**
+ * SECTION:libudev-hwdb
+ * @short_description: retrieve properties from the hardware database
+ *
+ * Libudev hardware database interface.
+ */
+
+/**
+ * udev_hwdb:
+ *
+ * Opaque object representing the hardware database.
+ */
+struct udev_hwdb {
+ unsigned n_ref;
+ sd_hwdb *hwdb;
+ struct udev_list *properties_list;
+};
+
+/**
+ * udev_hwdb_new:
+ * @udev: udev library context (unused)
+ *
+ * Create a hardware database context to query properties for devices.
+ *
+ * Returns: a hwdb context.
+ **/
+_public_ struct udev_hwdb *udev_hwdb_new(struct udev *udev) {
+ _cleanup_(udev_list_freep) struct udev_list *list = NULL;
+ _cleanup_(sd_hwdb_unrefp) sd_hwdb *hwdb_internal = NULL;
+ struct udev_hwdb *hwdb;
+ int r;
+
+ r = sd_hwdb_new(&hwdb_internal);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ list = udev_list_new(true);
+ if (!list)
+ return_with_errno(NULL, ENOMEM);
+
+ hwdb = new(struct udev_hwdb, 1);
+ if (!hwdb)
+ return_with_errno(NULL, ENOMEM);
+
+ *hwdb = (struct udev_hwdb) {
+ .n_ref = 1,
+ .hwdb = TAKE_PTR(hwdb_internal),
+ .properties_list = TAKE_PTR(list),
+ };
+
+ return hwdb;
+}
+
+static struct udev_hwdb *udev_hwdb_free(struct udev_hwdb *hwdb) {
+ assert(hwdb);
+
+ sd_hwdb_unref(hwdb->hwdb);
+ udev_list_free(hwdb->properties_list);
+ return mfree(hwdb);
+}
+
+/**
+ * udev_hwdb_ref:
+ * @hwdb: context
+ *
+ * Take a reference of a hwdb context.
+ *
+ * Returns: the passed enumeration context
+ **/
+
+/**
+ * udev_hwdb_unref:
+ * @hwdb: context
+ *
+ * Drop a reference of a hwdb context. If the refcount reaches zero,
+ * all resources of the hwdb context will be released.
+ *
+ * Returns: #NULL
+ **/
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(struct udev_hwdb, udev_hwdb, udev_hwdb_free);
+
+/**
+ * udev_hwdb_get_properties_list_entry:
+ * @hwdb: context
+ * @modalias: modalias string
+ * @flags: (unused)
+ *
+ * Lookup a matching device in the hardware database. The lookup key is a
+ * modalias string, whose formats are defined for the Linux kernel modules.
+ * Examples are: pci:v00008086d00001C2D*, usb:v04F2pB221*. The first entry
+ * of a list of retrieved properties is returned.
+ *
+ * Returns: a udev_list_entry.
+ */
+_public_ struct udev_list_entry *udev_hwdb_get_properties_list_entry(struct udev_hwdb *hwdb, const char *modalias, unsigned flags) {
+ const char *key, *value;
+ struct udev_list_entry *e;
+
+ assert_return_errno(hwdb, NULL, EINVAL);
+ assert_return_errno(modalias, NULL, EINVAL);
+
+ udev_list_cleanup(hwdb->properties_list);
+
+ SD_HWDB_FOREACH_PROPERTY(hwdb->hwdb, modalias, key, value)
+ if (!udev_list_entry_add(hwdb->properties_list, key, value))
+ return_with_errno(NULL, ENOMEM);
+
+ e = udev_list_get_entry(hwdb->properties_list);
+ if (!e)
+ return_with_errno(NULL, ENODATA);
+
+ return e;
+}
diff --git a/src/libudev/libudev-list-internal.h b/src/libudev/libudev-list-internal.h
new file mode 100644
index 0000000..c23735e
--- /dev/null
+++ b/src/libudev/libudev-list-internal.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "libudev.h"
+
+#include "macro.h"
+
+struct udev_list;
+
+struct udev_list *udev_list_new(bool unique);
+void udev_list_cleanup(struct udev_list *list);
+struct udev_list *udev_list_free(struct udev_list *list);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct udev_list *, udev_list_free);
+
+struct udev_list_entry *udev_list_get_entry(struct udev_list *list);
+struct udev_list_entry *udev_list_entry_add(struct udev_list *list, const char *name, const char *value);
diff --git a/src/libudev/libudev-list.c b/src/libudev/libudev-list.c
new file mode 100644
index 0000000..3b2a2cd
--- /dev/null
+++ b/src/libudev/libudev-list.c
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "hashmap.h"
+#include "libudev-list-internal.h"
+#include "list.h"
+#include "sort-util.h"
+
+/**
+ * SECTION:libudev-list
+ * @short_description: list operation
+ *
+ * Libudev list operations.
+ */
+
+/**
+ * udev_list_entry:
+ *
+ * Opaque object representing one entry in a list. An entry contains
+ * contains a name, and optionally a value.
+ */
+struct udev_list_entry {
+ struct udev_list *list;
+ char *name;
+ char *value;
+
+ LIST_FIELDS(struct udev_list_entry, entries);
+};
+
+struct udev_list {
+ Hashmap *unique_entries;
+ LIST_HEAD(struct udev_list_entry, entries);
+ bool unique:1;
+ bool uptodate:1;
+};
+
+static struct udev_list_entry *udev_list_entry_free(struct udev_list_entry *entry) {
+ if (!entry)
+ return NULL;
+
+ if (entry->list) {
+ if (entry->list->unique)
+ hashmap_remove(entry->list->unique_entries, entry->name);
+ else
+ LIST_REMOVE(entries, entry->list->entries, entry);
+ }
+
+ free(entry->name);
+ free(entry->value);
+
+ return mfree(entry);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct udev_list_entry *, udev_list_entry_free);
+
+struct udev_list *udev_list_new(bool unique) {
+ struct udev_list *list;
+
+ list = new(struct udev_list, 1);
+ if (!list)
+ return NULL;
+
+ *list = (struct udev_list) {
+ .unique = unique,
+ };
+
+ return list;
+}
+
+struct udev_list_entry *udev_list_entry_add(struct udev_list *list, const char *_name, const char *_value) {
+ _cleanup_(udev_list_entry_freep) struct udev_list_entry *entry = NULL;
+ _cleanup_free_ char *name = NULL, *value = NULL;
+ int r;
+
+ assert(list);
+
+ name = strdup(_name);
+ if (!name)
+ return NULL;
+
+ if (_value) {
+ value = strdup(_value);
+ if (!value)
+ return NULL;
+ }
+
+ entry = new(struct udev_list_entry, 1);
+ if (!entry)
+ return NULL;
+
+ *entry = (struct udev_list_entry) {
+ .list = list,
+ .name = TAKE_PTR(name),
+ .value = TAKE_PTR(value),
+ };
+
+ if (list->unique) {
+ r = hashmap_ensure_allocated(&list->unique_entries, &string_hash_ops);
+ if (r < 0)
+ return NULL;
+
+ udev_list_entry_free(hashmap_get(list->unique_entries, entry->name));
+
+ r = hashmap_put(list->unique_entries, entry->name, entry);
+ if (r < 0)
+ return NULL;
+
+ list->uptodate = false;
+ } else
+ LIST_APPEND(entries, list->entries, entry);
+
+ return TAKE_PTR(entry);
+}
+
+void udev_list_cleanup(struct udev_list *list) {
+ struct udev_list_entry *i, *n;
+
+ if (!list)
+ return;
+
+ if (list->unique) {
+ hashmap_clear_with_destructor(list->unique_entries, udev_list_entry_free);
+ list->uptodate = false;
+ } else
+ LIST_FOREACH_SAFE(entries, i, n, list->entries)
+ udev_list_entry_free(i);
+}
+
+struct udev_list *udev_list_free(struct udev_list *list) {
+ if (!list)
+ return NULL;
+
+ udev_list_cleanup(list);
+ hashmap_free(list->unique_entries);
+
+ return mfree(list);
+}
+
+static int udev_list_entry_compare_func(struct udev_list_entry * const *a, struct udev_list_entry * const *b) {
+ return strcmp((*a)->name, (*b)->name);
+}
+
+struct udev_list_entry *udev_list_get_entry(struct udev_list *list) {
+ if (!list)
+ return NULL;
+
+ if (list->unique && !list->uptodate) {
+ size_t n;
+
+ LIST_HEAD_INIT(list->entries);
+
+ n = hashmap_size(list->unique_entries);
+ if (n == 0)
+ ;
+ else if (n == 1)
+ LIST_PREPEND(entries, list->entries, hashmap_first(list->unique_entries));
+ else {
+ _cleanup_free_ struct udev_list_entry **buf = NULL;
+ struct udev_list_entry *entry, **p;
+ size_t j;
+
+ buf = new(struct udev_list_entry *, n);
+ if (!buf)
+ return NULL;
+
+ p = buf;
+ HASHMAP_FOREACH(entry, list->unique_entries)
+ *p++ = entry;
+
+ typesafe_qsort(buf, n, udev_list_entry_compare_func);
+
+ for (j = n; j > 0; j--)
+ LIST_PREPEND(entries, list->entries, buf[j-1]);
+ }
+
+ list->uptodate = true;
+ }
+
+ return list->entries;
+}
+
+/**
+ * udev_list_entry_get_next:
+ * @list_entry: current entry
+ *
+ * Get the next entry from the list.
+ *
+ * Returns: udev_list_entry, #NULL if no more entries are available.
+ */
+_public_ struct udev_list_entry *udev_list_entry_get_next(struct udev_list_entry *list_entry) {
+ if (!list_entry)
+ return NULL;
+ if (list_entry->list->unique && !list_entry->list->uptodate)
+ return NULL;
+ return list_entry->entries_next;
+}
+
+/**
+ * udev_list_entry_get_by_name:
+ * @list_entry: current entry
+ * @name: name string to match
+ *
+ * Lookup an entry in the list with a certain name.
+ *
+ * Returns: udev_list_entry, #NULL if no matching entry is found.
+ */
+_public_ struct udev_list_entry *udev_list_entry_get_by_name(struct udev_list_entry *list_entry, const char *name) {
+ if (!list_entry)
+ return NULL;
+ if (!list_entry->list->unique || !list_entry->list->uptodate)
+ return NULL;
+ return hashmap_get(list_entry->list->unique_entries, name);
+}
+
+/**
+ * udev_list_entry_get_name:
+ * @list_entry: current entry
+ *
+ * Get the name of a list entry.
+ *
+ * Returns: the name string of this entry.
+ */
+_public_ const char *udev_list_entry_get_name(struct udev_list_entry *list_entry) {
+ if (!list_entry)
+ return NULL;
+ return list_entry->name;
+}
+
+/**
+ * udev_list_entry_get_value:
+ * @list_entry: current entry
+ *
+ * Get the value of list entry.
+ *
+ * Returns: the value string of this entry.
+ */
+_public_ const char *udev_list_entry_get_value(struct udev_list_entry *list_entry) {
+ if (!list_entry)
+ return NULL;
+ return list_entry->value;
+}
diff --git a/src/libudev/libudev-monitor.c b/src/libudev/libudev-monitor.c
new file mode 100644
index 0000000..a93adbd
--- /dev/null
+++ b/src/libudev/libudev-monitor.c
@@ -0,0 +1,305 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <poll.h>
+
+#include "libudev.h"
+
+#include "alloc-util.h"
+#include "device-monitor-private.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "io-util.h"
+#include "libudev-device-internal.h"
+#include "string-util.h"
+
+/**
+ * SECTION:libudev-monitor
+ * @short_description: device event source
+ *
+ * Connects to a device event source.
+ */
+
+/**
+ * udev_monitor:
+ *
+ * Opaque object handling an event source.
+ */
+struct udev_monitor {
+ struct udev *udev;
+ unsigned n_ref;
+ sd_device_monitor *monitor;
+};
+
+static MonitorNetlinkGroup monitor_netlink_group_from_string(const char *name) {
+ if (!name)
+ return MONITOR_GROUP_NONE;
+ if (streq(name, "udev"))
+ return MONITOR_GROUP_UDEV;
+ if (streq(name, "kernel"))
+ return MONITOR_GROUP_KERNEL;
+ return _MONITOR_NETLINK_GROUP_INVALID;
+}
+
+/**
+ * udev_monitor_new_from_netlink:
+ * @udev: udev library context
+ * @name: name of event source
+ *
+ * Create new udev monitor and connect to a specified event
+ * source. Valid sources identifiers are "udev" and "kernel".
+ *
+ * Applications should usually not connect directly to the
+ * "kernel" events, because the devices might not be usable
+ * at that time, before udev has configured them, and created
+ * device nodes. Accessing devices at the same time as udev,
+ * might result in unpredictable behavior. The "udev" events
+ * are sent out after udev has finished its event processing,
+ * all rules have been processed, and needed device nodes are
+ * created.
+ *
+ * The initial refcount is 1, and needs to be decremented to
+ * release the resources of the udev monitor.
+ *
+ * Returns: a new udev monitor, or #NULL, in case of an error
+ **/
+_public_ struct udev_monitor *udev_monitor_new_from_netlink(struct udev *udev, const char *name) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *m = NULL;
+ struct udev_monitor *udev_monitor;
+ MonitorNetlinkGroup g;
+ int r;
+
+ g = monitor_netlink_group_from_string(name);
+ if (g < 0)
+ return_with_errno(NULL, EINVAL);
+
+ r = device_monitor_new_full(&m, g, -1);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ udev_monitor = new(struct udev_monitor, 1);
+ if (!udev_monitor)
+ return_with_errno(NULL, ENOMEM);
+
+ *udev_monitor = (struct udev_monitor) {
+ .udev = udev,
+ .n_ref = 1,
+ .monitor = TAKE_PTR(m),
+ };
+
+ return udev_monitor;
+}
+
+/**
+ * udev_monitor_filter_update:
+ * @udev_monitor: monitor
+ *
+ * Update the installed socket filter. This is only needed,
+ * if the filter was removed or changed.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_monitor_filter_update(struct udev_monitor *udev_monitor) {
+ assert_return(udev_monitor, -EINVAL);
+
+ return sd_device_monitor_filter_update(udev_monitor->monitor);
+}
+
+/**
+ * udev_monitor_enable_receiving:
+ * @udev_monitor: the monitor which should receive events
+ *
+ * Binds the @udev_monitor socket to the event source.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_monitor_enable_receiving(struct udev_monitor *udev_monitor) {
+ assert_return(udev_monitor, -EINVAL);
+
+ return device_monitor_enable_receiving(udev_monitor->monitor);
+}
+
+/**
+ * udev_monitor_set_receive_buffer_size:
+ * @udev_monitor: the monitor which should receive events
+ * @size: the size in bytes
+ *
+ * Set the size of the kernel socket buffer. This call needs the
+ * appropriate privileges to succeed.
+ *
+ * Returns: 0 on success, otherwise -1 on error.
+ */
+_public_ int udev_monitor_set_receive_buffer_size(struct udev_monitor *udev_monitor, int size) {
+ assert_return(udev_monitor, -EINVAL);
+
+ return sd_device_monitor_set_receive_buffer_size(udev_monitor->monitor, (size_t) size);
+}
+
+static struct udev_monitor *udev_monitor_free(struct udev_monitor *udev_monitor) {
+ assert(udev_monitor);
+
+ sd_device_monitor_unref(udev_monitor->monitor);
+ return mfree(udev_monitor);
+}
+
+/**
+ * udev_monitor_ref:
+ * @udev_monitor: udev monitor
+ *
+ * Take a reference of a udev monitor.
+ *
+ * Returns: the passed udev monitor
+ **/
+
+/**
+ * udev_monitor_unref:
+ * @udev_monitor: udev monitor
+ *
+ * Drop a reference of a udev monitor. If the refcount reaches zero,
+ * the bound socket will be closed, and the resources of the monitor
+ * will be released.
+ *
+ * Returns: #NULL
+ **/
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(struct udev_monitor, udev_monitor, udev_monitor_free);
+
+/**
+ * udev_monitor_get_udev:
+ * @udev_monitor: udev monitor
+ *
+ * Retrieve the udev library context the monitor was created with.
+ *
+ * Returns: the udev library context
+ **/
+_public_ struct udev *udev_monitor_get_udev(struct udev_monitor *udev_monitor) {
+ assert_return(udev_monitor, NULL);
+
+ return udev_monitor->udev;
+}
+
+/**
+ * udev_monitor_get_fd:
+ * @udev_monitor: udev monitor
+ *
+ * Retrieve the socket file descriptor associated with the monitor.
+ *
+ * Returns: the socket file descriptor
+ **/
+_public_ int udev_monitor_get_fd(struct udev_monitor *udev_monitor) {
+ assert_return(udev_monitor, -EINVAL);
+
+ return device_monitor_get_fd(udev_monitor->monitor);
+}
+
+static int udev_monitor_receive_sd_device(struct udev_monitor *udev_monitor, sd_device **ret) {
+ int r;
+
+ assert(udev_monitor);
+ assert(ret);
+
+ for (;;) {
+ /* r == 0 means a device is received but it does not pass the current filter. */
+ r = device_monitor_receive_device(udev_monitor->monitor, ret);
+ if (r != 0)
+ return r;
+
+ for (;;) {
+ /* Wait for next message */
+ r = fd_wait_for_event(device_monitor_get_fd(udev_monitor->monitor), POLLIN, 0);
+ if (r < 0) {
+ if (IN_SET(r, -EINTR, -EAGAIN))
+ continue;
+
+ return r;
+ }
+ if (r == 0)
+ return -EAGAIN;
+
+ /* Receive next message */
+ break;
+ }
+ }
+}
+
+/**
+ * udev_monitor_receive_device:
+ * @udev_monitor: udev monitor
+ *
+ * Receive data from the udev monitor socket, allocate a new udev
+ * device, fill in the received data, and return the device.
+ *
+ * Only socket connections with uid=0 are accepted.
+ *
+ * The monitor socket is by default set to NONBLOCK. A variant of poll() on
+ * the file descriptor returned by udev_monitor_get_fd() should to be used to
+ * wake up when new devices arrive, or alternatively the file descriptor
+ * switched into blocking mode.
+ *
+ * The initial refcount is 1, and needs to be decremented to
+ * release the resources of the udev device.
+ *
+ * Returns: a new udev device, or #NULL, in case of an error
+ **/
+_public_ struct udev_device *udev_monitor_receive_device(struct udev_monitor *udev_monitor) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ assert_return(udev_monitor, NULL);
+
+ r = udev_monitor_receive_sd_device(udev_monitor, &device);
+ if (r < 0)
+ return_with_errno(NULL, r);
+
+ return udev_device_new(udev_monitor->udev, device);
+}
+
+/**
+ * udev_monitor_filter_add_match_subsystem_devtype:
+ * @udev_monitor: the monitor
+ * @subsystem: the subsystem value to match the incoming devices against
+ * @devtype: the devtype value to match the incoming devices against
+ *
+ * This filter is efficiently executed inside the kernel, and libudev subscribers
+ * will usually not be woken up for devices which do not match.
+ *
+ * The filter must be installed before the monitor is switched to listening mode.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_monitor_filter_add_match_subsystem_devtype(struct udev_monitor *udev_monitor, const char *subsystem, const char *devtype) {
+ assert_return(udev_monitor, -EINVAL);
+
+ return sd_device_monitor_filter_add_match_subsystem_devtype(udev_monitor->monitor, subsystem, devtype);
+}
+
+/**
+ * udev_monitor_filter_add_match_tag:
+ * @udev_monitor: the monitor
+ * @tag: the name of a tag
+ *
+ * This filter is efficiently executed inside the kernel, and libudev subscribers
+ * will usually not be woken up for devices which do not match.
+ *
+ * The filter must be installed before the monitor is switched to listening mode.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_monitor_filter_add_match_tag(struct udev_monitor *udev_monitor, const char *tag) {
+ assert_return(udev_monitor, -EINVAL);
+
+ return sd_device_monitor_filter_add_match_tag(udev_monitor->monitor, tag);
+}
+
+/**
+ * udev_monitor_filter_remove:
+ * @udev_monitor: monitor
+ *
+ * Remove all filters from monitor.
+ *
+ * Returns: 0 on success, otherwise a negative error value.
+ */
+_public_ int udev_monitor_filter_remove(struct udev_monitor *udev_monitor) {
+ assert_return(udev_monitor, -EINVAL);
+
+ return sd_device_monitor_filter_remove(udev_monitor->monitor);
+}
diff --git a/src/libudev/libudev-queue.c b/src/libudev/libudev-queue.c
new file mode 100644
index 0000000..01b237f
--- /dev/null
+++ b/src/libudev/libudev-queue.c
@@ -0,0 +1,236 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2009 Alan Jenkins <alan-jenkins@tuffmail.co.uk>
+***/
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <sys/inotify.h>
+#include <unistd.h>
+
+#include "libudev.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "io-util.h"
+
+/**
+ * SECTION:libudev-queue
+ * @short_description: access to currently active events
+ *
+ * This exports the current state of the udev processing queue.
+ */
+
+/**
+ * udev_queue:
+ *
+ * Opaque object representing the current event queue in the udev daemon.
+ */
+struct udev_queue {
+ struct udev *udev;
+ unsigned n_ref;
+ int fd;
+};
+
+/**
+ * udev_queue_new:
+ * @udev: udev library context
+ *
+ * The initial refcount is 1, and needs to be decremented to
+ * release the resources of the udev queue context.
+ *
+ * Returns: the udev queue context, or #NULL on error.
+ **/
+_public_ struct udev_queue *udev_queue_new(struct udev *udev) {
+ struct udev_queue *udev_queue;
+
+ udev_queue = new(struct udev_queue, 1);
+ if (!udev_queue)
+ return_with_errno(NULL, ENOMEM);
+
+ *udev_queue = (struct udev_queue) {
+ .udev = udev,
+ .n_ref = 1,
+ .fd = -1,
+ };
+
+ return udev_queue;
+}
+
+static struct udev_queue *udev_queue_free(struct udev_queue *udev_queue) {
+ assert(udev_queue);
+
+ safe_close(udev_queue->fd);
+ return mfree(udev_queue);
+}
+
+/**
+ * udev_queue_ref:
+ * @udev_queue: udev queue context
+ *
+ * Take a reference of a udev queue context.
+ *
+ * Returns: the same udev queue context.
+ **/
+
+/**
+ * udev_queue_unref:
+ * @udev_queue: udev queue context
+ *
+ * Drop a reference of a udev queue context. If the refcount reaches zero,
+ * the resources of the queue context will be released.
+ *
+ * Returns: #NULL
+ **/
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(struct udev_queue, udev_queue, udev_queue_free);
+
+/**
+ * udev_queue_get_udev:
+ * @udev_queue: udev queue context
+ *
+ * Retrieve the udev library context the queue context was created with.
+ *
+ * Returns: the udev library context.
+ **/
+_public_ struct udev *udev_queue_get_udev(struct udev_queue *udev_queue) {
+ assert_return_errno(udev_queue, NULL, EINVAL);
+
+ return udev_queue->udev;
+}
+
+/**
+ * udev_queue_get_kernel_seqnum:
+ * @udev_queue: udev queue context
+ *
+ * This function is deprecated.
+ *
+ * Returns: 0.
+ **/
+_public_ unsigned long long int udev_queue_get_kernel_seqnum(struct udev_queue *udev_queue) {
+ return 0;
+}
+
+/**
+ * udev_queue_get_udev_seqnum:
+ * @udev_queue: udev queue context
+ *
+ * This function is deprecated.
+ *
+ * Returns: 0.
+ **/
+_public_ unsigned long long int udev_queue_get_udev_seqnum(struct udev_queue *udev_queue) {
+ return 0;
+}
+
+/**
+ * udev_queue_get_udev_is_active:
+ * @udev_queue: udev queue context
+ *
+ * Check if udev is active on the system.
+ *
+ * Returns: a flag indicating if udev is active.
+ **/
+_public_ int udev_queue_get_udev_is_active(struct udev_queue *udev_queue) {
+ return access("/run/udev/control", F_OK) >= 0;
+}
+
+/**
+ * udev_queue_get_queue_is_empty:
+ * @udev_queue: udev queue context
+ *
+ * Check if udev is currently processing any events.
+ *
+ * Returns: a flag indicating if udev is currently handling events.
+ **/
+_public_ int udev_queue_get_queue_is_empty(struct udev_queue *udev_queue) {
+ return access("/run/udev/queue", F_OK) < 0;
+}
+
+/**
+ * udev_queue_get_seqnum_sequence_is_finished:
+ * @udev_queue: udev queue context
+ * @start: first event sequence number
+ * @end: last event sequence number
+ *
+ * This function is deprecated, it just returns the result of
+ * udev_queue_get_queue_is_empty().
+ *
+ * Returns: a flag indicating if udev is currently handling events.
+ **/
+_public_ int udev_queue_get_seqnum_sequence_is_finished(struct udev_queue *udev_queue,
+ unsigned long long int start, unsigned long long int end) {
+ return udev_queue_get_queue_is_empty(udev_queue);
+}
+
+/**
+ * udev_queue_get_seqnum_is_finished:
+ * @udev_queue: udev queue context
+ * @seqnum: sequence number
+ *
+ * This function is deprecated, it just returns the result of
+ * udev_queue_get_queue_is_empty().
+ *
+ * Returns: a flag indicating if udev is currently handling events.
+ **/
+_public_ int udev_queue_get_seqnum_is_finished(struct udev_queue *udev_queue, unsigned long long int seqnum) {
+ return udev_queue_get_queue_is_empty(udev_queue);
+}
+
+/**
+ * udev_queue_get_queued_list_entry:
+ * @udev_queue: udev queue context
+ *
+ * This function is deprecated.
+ *
+ * Returns: NULL.
+ **/
+_public_ struct udev_list_entry *udev_queue_get_queued_list_entry(struct udev_queue *udev_queue) {
+ return_with_errno(NULL, ENODATA);
+}
+
+/**
+ * udev_queue_get_fd:
+ * @udev_queue: udev queue context
+ *
+ * Returns: a file descriptor to watch for a queue to become empty.
+ */
+_public_ int udev_queue_get_fd(struct udev_queue *udev_queue) {
+ _cleanup_close_ int fd = -1;
+
+ assert_return(udev_queue, -EINVAL);
+
+ if (udev_queue->fd >= 0)
+ return udev_queue->fd;
+
+ fd = inotify_init1(IN_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (inotify_add_watch(fd, "/run/udev" , IN_DELETE) < 0)
+ return -errno;
+
+ udev_queue->fd = TAKE_FD(fd);
+ return udev_queue->fd;
+}
+
+/**
+ * udev_queue_flush:
+ * @udev_queue: udev queue context
+ *
+ * Returns: the result of clearing the watch for queue changes.
+ */
+_public_ int udev_queue_flush(struct udev_queue *udev_queue) {
+ int r;
+
+ assert_return(udev_queue, -EINVAL);
+
+ if (udev_queue->fd < 0)
+ return -EINVAL;
+
+ r = flush_fd(udev_queue->fd);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
diff --git a/src/libudev/libudev-util.c b/src/libudev/libudev-util.c
new file mode 100644
index 0000000..bbb2879
--- /dev/null
+++ b/src/libudev/libudev-util.c
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ctype.h>
+#include <errno.h>
+
+#include "sd-device.h"
+
+#include "device-nodes.h"
+#include "libudev-util.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "utf8.h"
+
+/**
+ * SECTION:libudev-util
+ * @short_description: utils
+ *
+ * Utilities useful when dealing with devices and device node names.
+ */
+
+/* handle "[<SUBSYSTEM>/<KERNEL>]<attribute>" format */
+int util_resolve_subsys_kernel(const char *string, char *result, size_t maxsize, bool read_value) {
+ char temp[UTIL_PATH_SIZE], *subsys, *sysname, *attr;
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ const char *val;
+ int r;
+
+ if (string[0] != '[')
+ return -EINVAL;
+
+ strscpy(temp, sizeof(temp), string);
+
+ subsys = &temp[1];
+
+ sysname = strchr(subsys, '/');
+ if (!sysname)
+ return -EINVAL;
+ sysname[0] = '\0';
+ sysname = &sysname[1];
+
+ attr = strchr(sysname, ']');
+ if (!attr)
+ return -EINVAL;
+ attr[0] = '\0';
+ attr = &attr[1];
+ if (attr[0] == '/')
+ attr = &attr[1];
+ if (attr[0] == '\0')
+ attr = NULL;
+
+ if (read_value && !attr)
+ return -EINVAL;
+
+ r = sd_device_new_from_subsystem_sysname(&dev, subsys, sysname);
+ if (r < 0)
+ return r;
+
+ if (read_value) {
+ r = sd_device_get_sysattr_value(dev, attr, &val);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ if (r == -ENOENT)
+ result[0] = '\0';
+ else
+ strscpy(result, maxsize, val);
+ log_debug("value '[%s/%s]%s' is '%s'", subsys, sysname, attr, result);
+ } else {
+ r = sd_device_get_syspath(dev, &val);
+ if (r < 0)
+ return r;
+
+ strscpyl(result, maxsize, val, attr ? "/" : NULL, attr ?: NULL, NULL);
+ log_debug("path '[%s/%s]%s' is '%s'", subsys, sysname, strempty(attr), result);
+ }
+ return 0;
+}
+
+size_t util_path_encode(const char *src, char *dest, size_t size) {
+ size_t i, j;
+
+ assert(src);
+ assert(dest);
+
+ for (i = 0, j = 0; src[i] != '\0'; i++) {
+ if (src[i] == '/') {
+ if (j+4 >= size) {
+ j = 0;
+ break;
+ }
+ memcpy(&dest[j], "\\x2f", 4);
+ j += 4;
+ } else if (src[i] == '\\') {
+ if (j+4 >= size) {
+ j = 0;
+ break;
+ }
+ memcpy(&dest[j], "\\x5c", 4);
+ j += 4;
+ } else {
+ if (j+1 >= size) {
+ j = 0;
+ break;
+ }
+ dest[j] = src[i];
+ j++;
+ }
+ }
+ dest[j] = '\0';
+ return j;
+}
+
+/*
+ * Copy from 'str' to 'to', while removing all leading and trailing whitespace,
+ * and replacing each run of consecutive whitespace with a single underscore.
+ * The chars from 'str' are copied up to the \0 at the end of the string, or
+ * at most 'len' chars. This appends \0 to 'to', at the end of the copied
+ * characters.
+ *
+ * If 'len' chars are copied into 'to', the final \0 is placed at len+1
+ * (i.e. 'to[len] = \0'), so the 'to' buffer must have at least len+1
+ * chars available.
+ *
+ * Note this may be called with 'str' == 'to', i.e. to replace whitespace
+ * in-place in a buffer. This function can handle that situation.
+ *
+ * Note that only 'len' characters are read from 'str'.
+ */
+size_t util_replace_whitespace(const char *str, char *to, size_t len) {
+ bool is_space = false;
+ size_t i, j;
+
+ assert(str);
+ assert(to);
+
+ i = strspn(str, WHITESPACE);
+
+ for (j = 0; j < len && i < len && str[i] != '\0'; i++) {
+ if (isspace(str[i])) {
+ is_space = true;
+ continue;
+ }
+
+ if (is_space) {
+ if (j + 1 >= len)
+ break;
+
+ to[j++] = '_';
+ is_space = false;
+ }
+ to[j++] = str[i];
+ }
+
+ to[j] = '\0';
+ return j;
+}
+
+/* allow chars in allow list, plain ascii, hex-escaping and valid utf8 */
+size_t util_replace_chars(char *str, const char *allow) {
+ size_t i = 0, replaced = 0;
+
+ assert(str);
+
+ while (str[i] != '\0') {
+ int len;
+
+ if (allow_listed_char_for_devnode(str[i], allow)) {
+ i++;
+ continue;
+ }
+
+ /* accept hex encoding */
+ if (str[i] == '\\' && str[i+1] == 'x') {
+ i += 2;
+ continue;
+ }
+
+ /* accept valid utf8 */
+ len = utf8_encoded_valid_unichar(str + i, (size_t) -1);
+ if (len > 1) {
+ i += len;
+ continue;
+ }
+
+ /* if space is allowed, replace whitespace with ordinary space */
+ if (isspace(str[i]) && allow && strchr(allow, ' ')) {
+ str[i] = ' ';
+ i++;
+ replaced++;
+ continue;
+ }
+
+ /* everything else is replaced with '_' */
+ str[i] = '_';
+ i++;
+ replaced++;
+ }
+ return replaced;
+}
+
+/**
+ * udev_util_encode_string:
+ * @str: input string to be encoded
+ * @str_enc: output string to store the encoded input string
+ * @len: maximum size of the output string, which may be
+ * four times as long as the input string
+ *
+ * Encode all potentially unsafe characters of a string to the
+ * corresponding 2 char hex value prefixed by '\x'.
+ *
+ * Returns: 0 if the entire string was copied, non-zero otherwise.
+ **/
+_public_ int udev_util_encode_string(const char *str, char *str_enc, size_t len) {
+ return encode_devnode_name(str, str_enc, len);
+}
diff --git a/src/libudev/libudev-util.h b/src/libudev/libudev-util.h
new file mode 100644
index 0000000..15e6214
--- /dev/null
+++ b/src/libudev/libudev-util.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "libudev.h"
+
+#include "macro.h"
+
+/* libudev-util.c */
+#define UTIL_PATH_SIZE 1024
+#define UTIL_NAME_SIZE 512
+#define UTIL_LINE_SIZE 16384
+#define UDEV_ALLOWED_CHARS_INPUT "/ $%?,"
+size_t util_path_encode(const char *src, char *dest, size_t size);
+size_t util_replace_whitespace(const char *str, char *to, size_t len);
+size_t util_replace_chars(char *str, const char *white);
+int util_resolve_subsys_kernel(const char *string, char *result, size_t maxsize, bool read_value);
+
+/* Cleanup functions */
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct udev*, udev_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct udev_device*, udev_device_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct udev_enumerate*, udev_enumerate_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct udev_monitor*, udev_monitor_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct udev_hwdb*, udev_hwdb_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct udev_queue*, udev_queue_unref);
diff --git a/src/libudev/libudev.c b/src/libudev/libudev.c
new file mode 100644
index 0000000..7357487
--- /dev/null
+++ b/src/libudev/libudev.c
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ctype.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "libudev.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "string-util.h"
+
+/**
+ * SECTION:libudev
+ * @short_description: libudev context
+ */
+
+/**
+ * udev:
+ *
+ * Opaque object representing the library context.
+ */
+struct udev {
+ unsigned n_ref;
+ void *userdata;
+};
+
+/**
+ * udev_get_userdata:
+ * @udev: udev library context
+ *
+ * Retrieve stored data pointer from library context. This might be useful
+ * to access from callbacks.
+ *
+ * Returns: stored userdata
+ **/
+_public_ void *udev_get_userdata(struct udev *udev) {
+ assert_return(udev, NULL);
+
+ return udev->userdata;
+}
+
+/**
+ * udev_set_userdata:
+ * @udev: udev library context
+ * @userdata: data pointer
+ *
+ * Store custom @userdata in the library context.
+ **/
+_public_ void udev_set_userdata(struct udev *udev, void *userdata) {
+ if (!udev)
+ return;
+
+ udev->userdata = userdata;
+}
+
+/**
+ * udev_new:
+ *
+ * Create udev library context. This only allocates the basic data structure.
+ *
+ * The initial refcount is 1, and needs to be decremented to
+ * release the resources of the udev library context.
+ *
+ * Returns: a new udev library context
+ **/
+_public_ struct udev *udev_new(void) {
+ struct udev *udev;
+
+ udev = new(struct udev, 1);
+ if (!udev)
+ return_with_errno(NULL, ENOMEM);
+
+ *udev = (struct udev) {
+ .n_ref = 1,
+ };
+
+ return udev;
+}
+
+/**
+ * udev_ref:
+ * @udev: udev library context
+ *
+ * Take a reference of the udev library context.
+ *
+ * Returns: the passed udev library context
+ **/
+DEFINE_PUBLIC_TRIVIAL_REF_FUNC(struct udev, udev);
+
+/**
+ * udev_unref:
+ * @udev: udev library context
+ *
+ * Drop a reference of the udev library context. If the refcount
+ * reaches zero, the resources of the context will be released.
+ *
+ * Returns: the passed udev library context if it has still an active reference, or #NULL otherwise.
+ **/
+_public_ struct udev *udev_unref(struct udev *udev) {
+ if (!udev)
+ return NULL;
+
+ assert(udev->n_ref > 0);
+ udev->n_ref--;
+ if (udev->n_ref > 0)
+ /* This is different from our convention, but let's keep backward
+ * compatibility. So, do not use DEFINE_PUBLIC_TRIVIAL_UNREF_FUNC()
+ * macro to define this function. */
+ return udev;
+
+ return mfree(udev);
+}
+
+/**
+ * udev_set_log_fn:
+ * @udev: udev library context
+ * @log_fn: function to be called for log messages
+ *
+ * This function is deprecated.
+ *
+ **/
+_public_ void udev_set_log_fn(
+ struct udev *udev,
+ void (*log_fn)(struct udev *udev,
+ int priority, const char *file, int line, const char *fn,
+ const char *format, va_list args)) {
+ return;
+}
+
+/**
+ * udev_get_log_priority:
+ * @udev: udev library context
+ *
+ * This function is deprecated.
+ *
+ **/
+_public_ int udev_get_log_priority(struct udev *udev) {
+ return log_get_max_level();
+}
+
+/**
+ * udev_set_log_priority:
+ * @udev: udev library context
+ * @priority: the new log priority
+ *
+ * This function is deprecated.
+ *
+ **/
+_public_ void udev_set_log_priority(struct udev *udev, int priority) {
+ log_set_max_level(priority);
+}
diff --git a/src/libudev/libudev.h b/src/libudev/libudev.h
new file mode 100644
index 0000000..55036de
--- /dev/null
+++ b/src/libudev/libudev.h
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#ifndef _LIBUDEV_H_
+#define _LIBUDEV_H_
+
+#include <stdarg.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * udev - library context
+ *
+ * reads the udev config and system environment
+ * allows custom logging
+ */
+struct udev;
+struct udev *udev_ref(struct udev *udev);
+struct udev *udev_unref(struct udev *udev);
+struct udev *udev_new(void);
+void udev_set_log_fn(struct udev *udev,
+ void (*log_fn)(struct udev *udev,
+ int priority, const char *file, int line, const char *fn,
+ const char *format, va_list args)) __attribute__((__deprecated__));
+int udev_get_log_priority(struct udev *udev) __attribute__((__deprecated__));
+void udev_set_log_priority(struct udev *udev, int priority) __attribute__((__deprecated__));
+void *udev_get_userdata(struct udev *udev);
+void udev_set_userdata(struct udev *udev, void *userdata);
+
+/*
+ * udev_list
+ *
+ * access to libudev generated lists
+ */
+struct udev_list_entry;
+struct udev_list_entry *udev_list_entry_get_next(struct udev_list_entry *list_entry);
+struct udev_list_entry *udev_list_entry_get_by_name(struct udev_list_entry *list_entry, const char *name);
+const char *udev_list_entry_get_name(struct udev_list_entry *list_entry);
+const char *udev_list_entry_get_value(struct udev_list_entry *list_entry);
+/**
+ * udev_list_entry_foreach:
+ * @list_entry: entry to store the current position
+ * @first_entry: first entry to start with
+ *
+ * Helper to iterate over all entries of a list.
+ */
+#define udev_list_entry_foreach(list_entry, first_entry) \
+ for (list_entry = first_entry; \
+ list_entry; \
+ list_entry = udev_list_entry_get_next(list_entry))
+
+/*
+ * udev_device
+ *
+ * access to sysfs/kernel devices
+ */
+struct udev_device;
+struct udev_device *udev_device_ref(struct udev_device *udev_device);
+struct udev_device *udev_device_unref(struct udev_device *udev_device);
+struct udev *udev_device_get_udev(struct udev_device *udev_device);
+struct udev_device *udev_device_new_from_syspath(struct udev *udev, const char *syspath);
+struct udev_device *udev_device_new_from_devnum(struct udev *udev, char type, dev_t devnum);
+struct udev_device *udev_device_new_from_subsystem_sysname(struct udev *udev, const char *subsystem, const char *sysname);
+struct udev_device *udev_device_new_from_device_id(struct udev *udev, const char *id);
+struct udev_device *udev_device_new_from_environment(struct udev *udev);
+/* udev_device_get_parent_*() does not take a reference on the returned device, it is automatically unref'd with the parent */
+struct udev_device *udev_device_get_parent(struct udev_device *udev_device);
+struct udev_device *udev_device_get_parent_with_subsystem_devtype(struct udev_device *udev_device,
+ const char *subsystem, const char *devtype);
+/* retrieve device properties */
+const char *udev_device_get_devpath(struct udev_device *udev_device);
+const char *udev_device_get_subsystem(struct udev_device *udev_device);
+const char *udev_device_get_devtype(struct udev_device *udev_device);
+const char *udev_device_get_syspath(struct udev_device *udev_device);
+const char *udev_device_get_sysname(struct udev_device *udev_device);
+const char *udev_device_get_sysnum(struct udev_device *udev_device);
+const char *udev_device_get_devnode(struct udev_device *udev_device);
+int udev_device_get_is_initialized(struct udev_device *udev_device);
+struct udev_list_entry *udev_device_get_devlinks_list_entry(struct udev_device *udev_device);
+struct udev_list_entry *udev_device_get_properties_list_entry(struct udev_device *udev_device);
+struct udev_list_entry *udev_device_get_tags_list_entry(struct udev_device *udev_device);
+struct udev_list_entry *udev_device_get_current_tags_list_entry(struct udev_device *udev_device);
+struct udev_list_entry *udev_device_get_sysattr_list_entry(struct udev_device *udev_device);
+const char *udev_device_get_property_value(struct udev_device *udev_device, const char *key);
+const char *udev_device_get_driver(struct udev_device *udev_device);
+dev_t udev_device_get_devnum(struct udev_device *udev_device);
+const char *udev_device_get_action(struct udev_device *udev_device);
+unsigned long long int udev_device_get_seqnum(struct udev_device *udev_device);
+unsigned long long int udev_device_get_usec_since_initialized(struct udev_device *udev_device);
+const char *udev_device_get_sysattr_value(struct udev_device *udev_device, const char *sysattr);
+int udev_device_set_sysattr_value(struct udev_device *udev_device, const char *sysattr, const char *value);
+int udev_device_has_tag(struct udev_device *udev_device, const char *tag);
+int udev_device_has_current_tag(struct udev_device *udev_device, const char *tag);
+
+/*
+ * udev_monitor
+ *
+ * access to kernel uevents and udev events
+ */
+struct udev_monitor;
+struct udev_monitor *udev_monitor_ref(struct udev_monitor *udev_monitor);
+struct udev_monitor *udev_monitor_unref(struct udev_monitor *udev_monitor);
+struct udev *udev_monitor_get_udev(struct udev_monitor *udev_monitor);
+/* kernel and udev generated events over netlink */
+struct udev_monitor *udev_monitor_new_from_netlink(struct udev *udev, const char *name);
+/* bind socket */
+int udev_monitor_enable_receiving(struct udev_monitor *udev_monitor);
+int udev_monitor_set_receive_buffer_size(struct udev_monitor *udev_monitor, int size);
+int udev_monitor_get_fd(struct udev_monitor *udev_monitor);
+struct udev_device *udev_monitor_receive_device(struct udev_monitor *udev_monitor);
+/* in-kernel socket filters to select messages that get delivered to a listener */
+int udev_monitor_filter_add_match_subsystem_devtype(struct udev_monitor *udev_monitor,
+ const char *subsystem, const char *devtype);
+int udev_monitor_filter_add_match_tag(struct udev_monitor *udev_monitor, const char *tag);
+int udev_monitor_filter_update(struct udev_monitor *udev_monitor);
+int udev_monitor_filter_remove(struct udev_monitor *udev_monitor);
+
+/*
+ * udev_enumerate
+ *
+ * search sysfs for specific devices and provide a sorted list
+ */
+struct udev_enumerate;
+struct udev_enumerate *udev_enumerate_ref(struct udev_enumerate *udev_enumerate);
+struct udev_enumerate *udev_enumerate_unref(struct udev_enumerate *udev_enumerate);
+struct udev *udev_enumerate_get_udev(struct udev_enumerate *udev_enumerate);
+struct udev_enumerate *udev_enumerate_new(struct udev *udev);
+/* device properties filter */
+int udev_enumerate_add_match_subsystem(struct udev_enumerate *udev_enumerate, const char *subsystem);
+int udev_enumerate_add_nomatch_subsystem(struct udev_enumerate *udev_enumerate, const char *subsystem);
+int udev_enumerate_add_match_sysattr(struct udev_enumerate *udev_enumerate, const char *sysattr, const char *value);
+int udev_enumerate_add_nomatch_sysattr(struct udev_enumerate *udev_enumerate, const char *sysattr, const char *value);
+int udev_enumerate_add_match_property(struct udev_enumerate *udev_enumerate, const char *property, const char *value);
+int udev_enumerate_add_match_sysname(struct udev_enumerate *udev_enumerate, const char *sysname);
+int udev_enumerate_add_match_tag(struct udev_enumerate *udev_enumerate, const char *tag);
+int udev_enumerate_add_match_parent(struct udev_enumerate *udev_enumerate, struct udev_device *parent);
+int udev_enumerate_add_match_is_initialized(struct udev_enumerate *udev_enumerate);
+int udev_enumerate_add_syspath(struct udev_enumerate *udev_enumerate, const char *syspath);
+/* run enumeration with active filters */
+int udev_enumerate_scan_devices(struct udev_enumerate *udev_enumerate);
+int udev_enumerate_scan_subsystems(struct udev_enumerate *udev_enumerate);
+/* return device list */
+struct udev_list_entry *udev_enumerate_get_list_entry(struct udev_enumerate *udev_enumerate);
+
+/*
+ * udev_queue
+ *
+ * access to the currently running udev events
+ */
+struct udev_queue;
+struct udev_queue *udev_queue_ref(struct udev_queue *udev_queue);
+struct udev_queue *udev_queue_unref(struct udev_queue *udev_queue);
+struct udev *udev_queue_get_udev(struct udev_queue *udev_queue);
+struct udev_queue *udev_queue_new(struct udev *udev);
+unsigned long long int udev_queue_get_kernel_seqnum(struct udev_queue *udev_queue) __attribute__((__deprecated__));
+ unsigned long long int udev_queue_get_udev_seqnum(struct udev_queue *udev_queue) __attribute__((__deprecated__));
+int udev_queue_get_udev_is_active(struct udev_queue *udev_queue);
+int udev_queue_get_queue_is_empty(struct udev_queue *udev_queue);
+int udev_queue_get_seqnum_is_finished(struct udev_queue *udev_queue, unsigned long long int seqnum) __attribute__((__deprecated__));
+int udev_queue_get_seqnum_sequence_is_finished(struct udev_queue *udev_queue,
+ unsigned long long int start, unsigned long long int end) __attribute__((__deprecated__));
+int udev_queue_get_fd(struct udev_queue *udev_queue);
+int udev_queue_flush(struct udev_queue *udev_queue);
+struct udev_list_entry *udev_queue_get_queued_list_entry(struct udev_queue *udev_queue) __attribute__((__deprecated__));
+
+/*
+ * udev_hwdb
+ *
+ * access to the static hardware properties database
+ */
+struct udev_hwdb;
+struct udev_hwdb *udev_hwdb_new(struct udev *udev);
+struct udev_hwdb *udev_hwdb_ref(struct udev_hwdb *hwdb);
+struct udev_hwdb *udev_hwdb_unref(struct udev_hwdb *hwdb);
+struct udev_list_entry *udev_hwdb_get_properties_list_entry(struct udev_hwdb *hwdb, const char *modalias, unsigned flags);
+
+/*
+ * udev_util
+ *
+ * udev specific utilities
+ */
+int udev_util_encode_string(const char *str, char *str_enc, size_t len);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
diff --git a/src/libudev/libudev.pc.in b/src/libudev/libudev.pc.in
new file mode 100644
index 0000000..b657b7d
--- /dev/null
+++ b/src/libudev/libudev.pc.in
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@rootlibdir@
+includedir=@includedir@
+
+Name: libudev
+Description: Library to access udev device information
+Version: @PROJECT_VERSION@
+Libs: -L${libdir} -ludev
+Cflags: -I${includedir}
diff --git a/src/libudev/libudev.sym b/src/libudev/libudev.sym
new file mode 100644
index 0000000..6aa6768
--- /dev/null
+++ b/src/libudev/libudev.sym
@@ -0,0 +1,126 @@
+/***
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+***/
+
+LIBUDEV_183 {
+global:
+ udev_device_get_action;
+ udev_device_get_devlinks_list_entry;
+ udev_device_get_devnode;
+ udev_device_get_devnum;
+ udev_device_get_devpath;
+ udev_device_get_devtype;
+ udev_device_get_driver;
+ udev_device_get_is_initialized;
+ udev_device_get_parent;
+ udev_device_get_parent_with_subsystem_devtype;
+ udev_device_get_properties_list_entry;
+ udev_device_get_property_value;
+ udev_device_get_seqnum;
+ udev_device_get_subsystem;
+ udev_device_get_sysattr_list_entry;
+ udev_device_get_sysattr_value;
+ udev_device_get_sysname;
+ udev_device_get_sysnum;
+ udev_device_get_syspath;
+ udev_device_get_tags_list_entry;
+ udev_device_get_udev;
+ udev_device_get_usec_since_initialized;
+ udev_device_has_tag;
+ udev_device_new_from_devnum;
+ udev_device_new_from_environment;
+ udev_device_new_from_subsystem_sysname;
+ udev_device_new_from_syspath;
+ udev_device_ref;
+ udev_device_unref;
+ udev_enumerate_add_match_is_initialized;
+ udev_enumerate_add_match_parent;
+ udev_enumerate_add_match_property;
+ udev_enumerate_add_match_subsystem;
+ udev_enumerate_add_match_sysattr;
+ udev_enumerate_add_match_sysname;
+ udev_enumerate_add_match_tag;
+ udev_enumerate_add_nomatch_subsystem;
+ udev_enumerate_add_nomatch_sysattr;
+ udev_enumerate_add_syspath;
+ udev_enumerate_get_list_entry;
+ udev_enumerate_get_udev;
+ udev_enumerate_new;
+ udev_enumerate_ref;
+ udev_enumerate_scan_devices;
+ udev_enumerate_scan_subsystems;
+ udev_enumerate_unref;
+ udev_get_log_priority;
+ udev_get_userdata;
+ udev_list_entry_get_by_name;
+ udev_list_entry_get_name;
+ udev_list_entry_get_next;
+ udev_list_entry_get_value;
+ udev_monitor_enable_receiving;
+ udev_monitor_filter_add_match_subsystem_devtype;
+ udev_monitor_filter_add_match_tag;
+ udev_monitor_filter_remove;
+ udev_monitor_filter_update;
+ udev_monitor_get_fd;
+ udev_monitor_get_udev;
+ udev_monitor_new_from_netlink;
+ udev_monitor_receive_device;
+ udev_monitor_ref;
+ udev_monitor_set_receive_buffer_size;
+ udev_monitor_unref;
+ udev_new;
+ udev_queue_get_kernel_seqnum;
+ udev_queue_get_queue_is_empty;
+ udev_queue_get_queued_list_entry;
+ udev_queue_get_seqnum_is_finished;
+ udev_queue_get_seqnum_sequence_is_finished;
+ udev_queue_get_udev;
+ udev_queue_get_udev_is_active;
+ udev_queue_get_udev_seqnum;
+ udev_queue_new;
+ udev_queue_ref;
+ udev_queue_unref;
+ udev_ref;
+ udev_set_log_fn;
+ udev_set_log_priority;
+ udev_set_userdata;
+ udev_unref;
+ udev_util_encode_string;
+local:
+ *;
+};
+
+LIBUDEV_189 {
+global:
+ udev_device_new_from_device_id;
+} LIBUDEV_183;
+
+LIBUDEV_196 {
+global:
+ udev_hwdb_new;
+ udev_hwdb_ref;
+ udev_hwdb_unref;
+ udev_hwdb_get_properties_list_entry;
+} LIBUDEV_189;
+
+LIBUDEV_199 {
+global:
+ udev_device_set_sysattr_value;
+} LIBUDEV_196;
+
+LIBUDEV_215 {
+global:
+ udev_queue_flush;
+ udev_queue_get_fd;
+} LIBUDEV_199;
+
+LIBUDEV_247 {
+global:
+ udev_device_has_current_tag;
+ udev_device_get_current_tags_list_entry;
+} LIBUDEV_215;
diff --git a/src/libudev/meson.build b/src/libudev/meson.build
new file mode 100644
index 0000000..3bd00ff
--- /dev/null
+++ b/src/libudev/meson.build
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+libudev_sources = files('''
+ libudev.c
+ libudev-device.c
+ libudev-device-internal.h
+ libudev-enumerate.c
+ libudev-hwdb.c
+ libudev-list.c
+ libudev-list-internal.h
+ libudev-monitor.c
+ libudev-queue.c
+ libudev-util.c
+ libudev-util.h
+'''.split())
+
+############################################################
+
+libudev_sym = files('libudev.sym')
+libudev_sym_path = meson.current_source_dir() + '/libudev.sym'
+
+install_headers('libudev.h')
+libudev_h_path = '@0@/libudev.h'.format(meson.current_source_dir())
+
+configure_file(
+ input : 'libudev.pc.in',
+ output : 'libudev.pc',
+ configuration : substs,
+ install_dir : pkgconfiglibdir == 'no' ? '' : pkgconfiglibdir)
diff --git a/src/locale/kbd-model-map b/src/locale/kbd-model-map
new file mode 100644
index 0000000..8fa984f
--- /dev/null
+++ b/src/locale/kbd-model-map
@@ -0,0 +1,68 @@
+# Generated from system-config-keyboard's model list
+# consolelayout xlayout xmodel xvariant xoptions
+sg ch pc105 de_nodeadkeys terminate:ctrl_alt_bksp
+nl nl pc105 - terminate:ctrl_alt_bksp
+mk-utf mk,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll
+trq tr pc105 - terminate:ctrl_alt_bksp
+uk gb pc105 - terminate:ctrl_alt_bksp
+is-latin1 is pc105 - terminate:ctrl_alt_bksp
+de de pc105 - terminate:ctrl_alt_bksp
+la-latin1 latam pc105 - terminate:ctrl_alt_bksp
+us us pc105+inet - terminate:ctrl_alt_bksp
+ko kr pc105 - terminate:ctrl_alt_bksp
+ro-std ro pc105 std terminate:ctrl_alt_bksp
+de-latin1 de pc105 - terminate:ctrl_alt_bksp
+slovene si pc105 - terminate:ctrl_alt_bksp
+hu101 hu pc105 qwerty terminate:ctrl_alt_bksp
+jp106 jp jp106 - terminate:ctrl_alt_bksp
+croat hr pc105 - terminate:ctrl_alt_bksp
+it2 it pc105 - terminate:ctrl_alt_bksp
+hu hu pc105 - terminate:ctrl_alt_bksp
+sr-latin rs pc105 latin terminate:ctrl_alt_bksp
+fi fi pc105 - terminate:ctrl_alt_bksp
+fr_CH ch pc105 fr terminate:ctrl_alt_bksp
+dk-latin1 dk pc105 - terminate:ctrl_alt_bksp
+fr fr pc105 - terminate:ctrl_alt_bksp
+it it pc105 - terminate:ctrl_alt_bksp
+ua-utf ua,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll
+fr-latin1 fr pc105 - terminate:ctrl_alt_bksp
+sg-latin1 ch pc105 de_nodeadkeys terminate:ctrl_alt_bksp
+be-latin1 be pc105 - terminate:ctrl_alt_bksp
+dk dk pc105 - terminate:ctrl_alt_bksp
+fr-pc fr pc105 - terminate:ctrl_alt_bksp
+bg_pho-utf8 bg,us pc105 ,phonetic terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll
+it-ibm it pc105 - terminate:ctrl_alt_bksp
+cz-us-qwertz cz,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll
+br-abnt2 br abnt2 - terminate:ctrl_alt_bksp
+ro ro pc105 - terminate:ctrl_alt_bksp
+us-acentos us pc105 intl terminate:ctrl_alt_bksp
+pt-latin1 pt pc105 - terminate:ctrl_alt_bksp
+ro-std-cedilla ro pc105 std_cedilla terminate:ctrl_alt_bksp
+tj_alt-UTF8 tj pc105 - terminate:ctrl_alt_bksp
+de-latin1-nodeadkeys de pc105 nodeadkeys terminate:ctrl_alt_bksp
+no no pc105 - terminate:ctrl_alt_bksp
+bg_bds-utf8 bg,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll
+dvorak us pc105 dvorak terminate:ctrl_alt_bksp
+dvorak us pc105 dvorak-alt-intl terminate:ctrl_alt_bksp
+ru ru,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll
+cz-lat2 cz pc105 qwerty terminate:ctrl_alt_bksp
+pl2 pl pc105 - terminate:ctrl_alt_bksp
+es es pc105 - terminate:ctrl_alt_bksp
+ro-cedilla ro pc105 cedilla terminate:ctrl_alt_bksp
+ie ie pc105 - terminate:ctrl_alt_bksp
+et ee pc105 - terminate:ctrl_alt_bksp
+sk-qwerty sk pc105 - terminate:ctrl_alt_bksp,qwerty
+sk-qwertz sk pc105 - terminate:ctrl_alt_bksp
+fr-latin9 fr pc105 latin9 terminate:ctrl_alt_bksp
+fr_CH-latin1 ch pc105 fr terminate:ctrl_alt_bksp
+cf ca pc105 - terminate:ctrl_alt_bksp
+sv-latin1 se pc105 - terminate:ctrl_alt_bksp
+sr-cy rs pc105 - terminate:ctrl_alt_bksp
+gr gr,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll
+by by,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll
+il il pc105 - terminate:ctrl_alt_bksp
+kazakh kz,us pc105 - terminate:ctrl_alt_bksp,grp:shifts_toggle,grp_led:scroll
+lt.baltic lt pc105 - terminate:ctrl_alt_bksp
+lt.l4 lt pc105 - terminate:ctrl_alt_bksp
+lt lt pc105 - terminate:ctrl_alt_bksp
+khmer kh,us pc105 - terminate:ctrl_alt_bksp
diff --git a/src/locale/keymap-util.c b/src/locale/keymap-util.c
new file mode 100644
index 0000000..cb8153f
--- /dev/null
+++ b/src/locale/keymap-util.c
@@ -0,0 +1,782 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "bus-polkit.h"
+#include "env-file-label.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio-label.h"
+#include "fileio.h"
+#include "kbd-util.h"
+#include "keymap-util.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "nulstr-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+
+static bool startswith_comma(const char *s, const char *prefix) {
+ s = startswith(s, prefix);
+ if (!s)
+ return false;
+
+ return IN_SET(*s, ',', '\0');
+}
+
+static const char* systemd_kbd_model_map(void) {
+ const char* s;
+
+ s = getenv("SYSTEMD_KBD_MODEL_MAP");
+ if (s)
+ return s;
+
+ return SYSTEMD_KBD_MODEL_MAP;
+}
+
+static const char* systemd_language_fallback_map(void) {
+ const char* s;
+
+ s = getenv("SYSTEMD_LANGUAGE_FALLBACK_MAP");
+ if (s)
+ return s;
+
+ return SYSTEMD_LANGUAGE_FALLBACK_MAP;
+}
+
+static void context_free_x11(Context *c) {
+ c->x11_layout = mfree(c->x11_layout);
+ c->x11_options = mfree(c->x11_options);
+ c->x11_model = mfree(c->x11_model);
+ c->x11_variant = mfree(c->x11_variant);
+}
+
+static void context_free_vconsole(Context *c) {
+ c->vc_keymap = mfree(c->vc_keymap);
+ c->vc_keymap_toggle = mfree(c->vc_keymap_toggle);
+}
+
+static void context_free_locale(Context *c) {
+ int p;
+
+ for (p = 0; p < _VARIABLE_LC_MAX; p++)
+ c->locale[p] = mfree(c->locale[p]);
+}
+
+void context_clear(Context *c) {
+ context_free_locale(c);
+ context_free_x11(c);
+ context_free_vconsole(c);
+
+ sd_bus_message_unref(c->locale_cache);
+ sd_bus_message_unref(c->x11_cache);
+ sd_bus_message_unref(c->vc_cache);
+
+ bus_verify_polkit_async_registry_free(c->polkit_registry);
+};
+
+void locale_simplify(char *locale[_VARIABLE_LC_MAX]) {
+ int p;
+
+ for (p = VARIABLE_LANG+1; p < _VARIABLE_LC_MAX; p++)
+ if (isempty(locale[p]) || streq_ptr(locale[VARIABLE_LANG], locale[p]))
+ locale[p] = mfree(locale[p]);
+}
+
+int locale_read_data(Context *c, sd_bus_message *m) {
+ struct stat st;
+ int r;
+
+ /* Do not try to re-read the file within single bus operation. */
+ if (m) {
+ if (m == c->locale_cache)
+ return 0;
+
+ sd_bus_message_unref(c->locale_cache);
+ c->locale_cache = sd_bus_message_ref(m);
+ }
+
+ r = stat("/etc/locale.conf", &st);
+ if (r < 0 && errno != ENOENT)
+ return -errno;
+
+ if (r >= 0) {
+ usec_t t;
+
+ /* If mtime is not changed, then we do not need to re-read the file. */
+ t = timespec_load(&st.st_mtim);
+ if (c->locale_mtime != USEC_INFINITY && t == c->locale_mtime)
+ return 0;
+
+ c->locale_mtime = t;
+ context_free_locale(c);
+
+ r = parse_env_file(NULL, "/etc/locale.conf",
+ "LANG", &c->locale[VARIABLE_LANG],
+ "LANGUAGE", &c->locale[VARIABLE_LANGUAGE],
+ "LC_CTYPE", &c->locale[VARIABLE_LC_CTYPE],
+ "LC_NUMERIC", &c->locale[VARIABLE_LC_NUMERIC],
+ "LC_TIME", &c->locale[VARIABLE_LC_TIME],
+ "LC_COLLATE", &c->locale[VARIABLE_LC_COLLATE],
+ "LC_MONETARY", &c->locale[VARIABLE_LC_MONETARY],
+ "LC_MESSAGES", &c->locale[VARIABLE_LC_MESSAGES],
+ "LC_PAPER", &c->locale[VARIABLE_LC_PAPER],
+ "LC_NAME", &c->locale[VARIABLE_LC_NAME],
+ "LC_ADDRESS", &c->locale[VARIABLE_LC_ADDRESS],
+ "LC_TELEPHONE", &c->locale[VARIABLE_LC_TELEPHONE],
+ "LC_MEASUREMENT", &c->locale[VARIABLE_LC_MEASUREMENT],
+ "LC_IDENTIFICATION", &c->locale[VARIABLE_LC_IDENTIFICATION]);
+ if (r < 0)
+ return r;
+ } else {
+ int p;
+
+ c->locale_mtime = USEC_INFINITY;
+ context_free_locale(c);
+
+ /* Fill in what we got passed from systemd. */
+ for (p = 0; p < _VARIABLE_LC_MAX; p++) {
+ const char *name;
+
+ name = locale_variable_to_string(p);
+ assert(name);
+
+ r = free_and_strdup(&c->locale[p], empty_to_null(getenv(name)));
+ if (r < 0)
+ return r;
+ }
+ }
+
+ locale_simplify(c->locale);
+ return 0;
+}
+
+int vconsole_read_data(Context *c, sd_bus_message *m) {
+ struct stat st;
+ usec_t t;
+ int r;
+
+ /* Do not try to re-read the file within single bus operation. */
+ if (m) {
+ if (m == c->vc_cache)
+ return 0;
+
+ sd_bus_message_unref(c->vc_cache);
+ c->vc_cache = sd_bus_message_ref(m);
+ }
+
+ if (stat("/etc/vconsole.conf", &st) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ c->vc_mtime = USEC_INFINITY;
+ context_free_vconsole(c);
+ return 0;
+ }
+
+ /* If mtime is not changed, then we do not need to re-read */
+ t = timespec_load(&st.st_mtim);
+ if (c->vc_mtime != USEC_INFINITY && t == c->vc_mtime)
+ return 0;
+
+ c->vc_mtime = t;
+ context_free_vconsole(c);
+
+ r = parse_env_file(NULL, "/etc/vconsole.conf",
+ "KEYMAP", &c->vc_keymap,
+ "KEYMAP_TOGGLE", &c->vc_keymap_toggle);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int x11_read_data(Context *c, sd_bus_message *m) {
+ _cleanup_fclose_ FILE *f = NULL;
+ bool in_section = false;
+ struct stat st;
+ usec_t t;
+ int r;
+
+ /* Do not try to re-read the file within single bus operation. */
+ if (m) {
+ if (m == c->x11_cache)
+ return 0;
+
+ sd_bus_message_unref(c->x11_cache);
+ c->x11_cache = sd_bus_message_ref(m);
+ }
+
+ if (stat("/etc/X11/xorg.conf.d/00-keyboard.conf", &st) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ c->x11_mtime = USEC_INFINITY;
+ context_free_x11(c);
+ return 0;
+ }
+
+ /* If mtime is not changed, then we do not need to re-read */
+ t = timespec_load(&st.st_mtim);
+ if (c->x11_mtime != USEC_INFINITY && t == c->x11_mtime)
+ return 0;
+
+ c->x11_mtime = t;
+ context_free_x11(c);
+
+ f = fopen("/etc/X11/xorg.conf.d/00-keyboard.conf", "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ l = strstrip(line);
+ if (IN_SET(l[0], 0, '#'))
+ continue;
+
+ if (in_section && first_word(l, "Option")) {
+ _cleanup_strv_free_ char **a = NULL;
+
+ r = strv_split_full(&a, l, WHITESPACE, EXTRACT_UNQUOTE);
+ if (r < 0)
+ return r;
+
+ if (strv_length(a) == 3) {
+ char **p = NULL;
+
+ if (streq(a[1], "XkbLayout"))
+ p = &c->x11_layout;
+ else if (streq(a[1], "XkbModel"))
+ p = &c->x11_model;
+ else if (streq(a[1], "XkbVariant"))
+ p = &c->x11_variant;
+ else if (streq(a[1], "XkbOptions"))
+ p = &c->x11_options;
+
+ if (p)
+ free_and_replace(*p, a[2]);
+ }
+
+ } else if (!in_section && first_word(l, "Section")) {
+ _cleanup_strv_free_ char **a = NULL;
+
+ r = strv_split_full(&a, l, WHITESPACE, EXTRACT_UNQUOTE);
+ if (r < 0)
+ return -ENOMEM;
+
+ if (strv_length(a) == 2 && streq(a[1], "InputClass"))
+ in_section = true;
+
+ } else if (in_section && first_word(l, "EndSection"))
+ in_section = false;
+ }
+
+ return 0;
+}
+
+int locale_write_data(Context *c, char ***settings) {
+ _cleanup_strv_free_ char **l = NULL;
+ struct stat st;
+ int r, p;
+
+ /* Set values will be returned as strv in *settings on success. */
+
+ for (p = 0; p < _VARIABLE_LC_MAX; p++) {
+ _cleanup_free_ char *t = NULL;
+ char **u;
+ const char *name;
+
+ name = locale_variable_to_string(p);
+ assert(name);
+
+ if (isempty(c->locale[p]))
+ continue;
+
+ if (asprintf(&t, "%s=%s", name, c->locale[p]) < 0)
+ return -ENOMEM;
+
+ u = strv_env_set(l, t);
+ if (!u)
+ return -ENOMEM;
+
+ strv_free_and_replace(l, u);
+ }
+
+ if (strv_isempty(l)) {
+ if (unlink("/etc/locale.conf") < 0)
+ return errno == ENOENT ? 0 : -errno;
+
+ c->locale_mtime = USEC_INFINITY;
+ return 0;
+ }
+
+ r = write_env_file_label("/etc/locale.conf", l);
+ if (r < 0)
+ return r;
+
+ *settings = TAKE_PTR(l);
+
+ if (stat("/etc/locale.conf", &st) >= 0)
+ c->locale_mtime = timespec_load(&st.st_mtim);
+
+ return 0;
+}
+
+int vconsole_write_data(Context *c) {
+ _cleanup_strv_free_ char **l = NULL;
+ struct stat st;
+ int r;
+
+ r = load_env_file(NULL, "/etc/vconsole.conf", &l);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ if (isempty(c->vc_keymap))
+ l = strv_env_unset(l, "KEYMAP");
+ else {
+ _cleanup_free_ char *s = NULL;
+ char **u;
+
+ s = strjoin("KEYMAP=", c->vc_keymap);
+ if (!s)
+ return -ENOMEM;
+
+ u = strv_env_set(l, s);
+ if (!u)
+ return -ENOMEM;
+
+ strv_free_and_replace(l, u);
+ }
+
+ if (isempty(c->vc_keymap_toggle))
+ l = strv_env_unset(l, "KEYMAP_TOGGLE");
+ else {
+ _cleanup_free_ char *s = NULL;
+ char **u;
+
+ s = strjoin("KEYMAP_TOGGLE=", c->vc_keymap_toggle);
+ if (!s)
+ return -ENOMEM;
+
+ u = strv_env_set(l, s);
+ if (!u)
+ return -ENOMEM;
+
+ strv_free_and_replace(l, u);
+ }
+
+ if (strv_isempty(l)) {
+ if (unlink("/etc/vconsole.conf") < 0)
+ return errno == ENOENT ? 0 : -errno;
+
+ c->vc_mtime = USEC_INFINITY;
+ return 0;
+ }
+
+ r = write_env_file_label("/etc/vconsole.conf", l);
+ if (r < 0)
+ return r;
+
+ if (stat("/etc/vconsole.conf", &st) >= 0)
+ c->vc_mtime = timespec_load(&st.st_mtim);
+
+ return 0;
+}
+
+int x11_write_data(Context *c) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *temp_path = NULL;
+ struct stat st;
+ int r;
+
+ if (isempty(c->x11_layout) &&
+ isempty(c->x11_model) &&
+ isempty(c->x11_variant) &&
+ isempty(c->x11_options)) {
+
+ if (unlink("/etc/X11/xorg.conf.d/00-keyboard.conf") < 0)
+ return errno == ENOENT ? 0 : -errno;
+
+ c->vc_mtime = USEC_INFINITY;
+ return 0;
+ }
+
+ (void) mkdir_p_label("/etc/X11/xorg.conf.d", 0755);
+ r = fopen_temporary("/etc/X11/xorg.conf.d/00-keyboard.conf", &f, &temp_path);
+ if (r < 0)
+ return r;
+
+ (void) fchmod(fileno(f), 0644);
+
+ fputs("# Written by systemd-localed(8), read by systemd-localed and Xorg. It's\n"
+ "# probably wise not to edit this file manually. Use localectl(1) to\n"
+ "# instruct systemd-localed to update it.\n"
+ "Section \"InputClass\"\n"
+ " Identifier \"system-keyboard\"\n"
+ " MatchIsKeyboard \"on\"\n", f);
+
+ if (!isempty(c->x11_layout))
+ fprintf(f, " Option \"XkbLayout\" \"%s\"\n", c->x11_layout);
+
+ if (!isempty(c->x11_model))
+ fprintf(f, " Option \"XkbModel\" \"%s\"\n", c->x11_model);
+
+ if (!isempty(c->x11_variant))
+ fprintf(f, " Option \"XkbVariant\" \"%s\"\n", c->x11_variant);
+
+ if (!isempty(c->x11_options))
+ fprintf(f, " Option \"XkbOptions\" \"%s\"\n", c->x11_options);
+
+ fputs("EndSection\n", f);
+
+ r = fflush_sync_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, "/etc/X11/xorg.conf.d/00-keyboard.conf") < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (stat("/etc/X11/xorg.conf.d/00-keyboard.conf", &st) >= 0)
+ c->x11_mtime = timespec_load(&st.st_mtim);
+
+ return 0;
+
+fail:
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return r;
+}
+
+static int read_next_mapping(const char* filename,
+ unsigned min_fields, unsigned max_fields,
+ FILE *f, unsigned *n, char ***a) {
+ assert(f);
+ assert(n);
+ assert(a);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ size_t length;
+ char *l, **b;
+ int r;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ (*n)++;
+
+ l = strstrip(line);
+ if (IN_SET(l[0], 0, '#'))
+ continue;
+
+ r = strv_split_full(&b, l, WHITESPACE, EXTRACT_UNQUOTE);
+ if (r < 0)
+ return r;
+
+ length = strv_length(b);
+ if (length < min_fields || length > max_fields) {
+ log_error("Invalid line %s:%u, ignoring.", filename, *n);
+ strv_free(b);
+ continue;
+
+ }
+
+ *a = b;
+ return 1;
+ }
+
+ return 0;
+}
+
+int vconsole_convert_to_x11(Context *c) {
+ const char *map;
+ int modified = -1;
+
+ map = systemd_kbd_model_map();
+
+ if (isempty(c->vc_keymap)) {
+ modified =
+ !isempty(c->x11_layout) ||
+ !isempty(c->x11_model) ||
+ !isempty(c->x11_variant) ||
+ !isempty(c->x11_options);
+
+ context_free_x11(c);
+ } else {
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned n = 0;
+
+ f = fopen(map, "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_strv_free_ char **a = NULL;
+ int r;
+
+ r = read_next_mapping(map, 5, UINT_MAX, f, &n, &a);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (!streq(c->vc_keymap, a[0]))
+ continue;
+
+ if (!streq_ptr(c->x11_layout, empty_or_dash_to_null(a[1])) ||
+ !streq_ptr(c->x11_model, empty_or_dash_to_null(a[2])) ||
+ !streq_ptr(c->x11_variant, empty_or_dash_to_null(a[3])) ||
+ !streq_ptr(c->x11_options, empty_or_dash_to_null(a[4]))) {
+
+ if (free_and_strdup(&c->x11_layout, empty_or_dash_to_null(a[1])) < 0 ||
+ free_and_strdup(&c->x11_model, empty_or_dash_to_null(a[2])) < 0 ||
+ free_and_strdup(&c->x11_variant, empty_or_dash_to_null(a[3])) < 0 ||
+ free_and_strdup(&c->x11_options, empty_or_dash_to_null(a[4])) < 0)
+ return -ENOMEM;
+
+ modified = true;
+ }
+
+ break;
+ }
+ }
+
+ if (modified > 0)
+ log_info("Changing X11 keyboard layout to '%s' model '%s' variant '%s' options '%s'",
+ strempty(c->x11_layout),
+ strempty(c->x11_model),
+ strempty(c->x11_variant),
+ strempty(c->x11_options));
+ else if (modified < 0)
+ log_notice("X11 keyboard layout was not modified: no conversion found for \"%s\".",
+ c->vc_keymap);
+ else
+ log_debug("X11 keyboard layout did not need to be modified.");
+
+ return modified > 0;
+}
+
+int find_converted_keymap(const char *x11_layout, const char *x11_variant, char **new_keymap) {
+ const char *dir;
+ _cleanup_free_ char *n;
+
+ if (x11_variant)
+ n = strjoin(x11_layout, "-", x11_variant);
+ else
+ n = strdup(x11_layout);
+ if (!n)
+ return -ENOMEM;
+
+ NULSTR_FOREACH(dir, KBD_KEYMAP_DIRS) {
+ _cleanup_free_ char *p = NULL, *pz = NULL;
+ bool uncompressed;
+
+ p = strjoin(dir, "xkb/", n, ".map");
+ pz = strjoin(dir, "xkb/", n, ".map.gz");
+ if (!p || !pz)
+ return -ENOMEM;
+
+ uncompressed = access(p, F_OK) == 0;
+ if (uncompressed || access(pz, F_OK) == 0) {
+ log_debug("Found converted keymap %s at %s",
+ n, uncompressed ? p : pz);
+
+ *new_keymap = TAKE_PTR(n);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int find_legacy_keymap(Context *c, char **ret) {
+ const char *map;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *new_keymap = NULL;
+ unsigned n = 0;
+ unsigned best_matching = 0;
+ int r;
+
+ assert(!isempty(c->x11_layout));
+
+ map = systemd_kbd_model_map();
+
+ f = fopen(map, "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_strv_free_ char **a = NULL;
+ unsigned matching = 0;
+
+ r = read_next_mapping(map, 5, UINT_MAX, f, &n, &a);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ /* Determine how well matching this entry is */
+ if (streq(c->x11_layout, a[1]))
+ /* If we got an exact match, this is best */
+ matching = 10;
+ else {
+ /* We have multiple X layouts, look for an
+ * entry that matches our key with everything
+ * but the first layout stripped off. */
+ if (startswith_comma(c->x11_layout, a[1]))
+ matching = 5;
+ else {
+ _cleanup_free_ char *x = NULL;
+
+ /* If that didn't work, strip off the
+ * other layouts from the entry, too */
+ x = strndup(a[1], strcspn(a[1], ","));
+ if (startswith_comma(c->x11_layout, x))
+ matching = 1;
+ }
+ }
+
+ if (matching > 0) {
+ if (isempty(c->x11_model) || streq_ptr(c->x11_model, a[2])) {
+ matching++;
+
+ if (streq_ptr(c->x11_variant, a[3])) {
+ matching++;
+
+ if (streq_ptr(c->x11_options, a[4]))
+ matching++;
+ }
+ }
+ }
+
+ /* The best matching entry so far, then let's save that */
+ if (matching >= MAX(best_matching, 1u)) {
+ log_debug("Found legacy keymap %s with score %u",
+ a[0], matching);
+
+ if (matching > best_matching) {
+ best_matching = matching;
+
+ r = free_and_strdup(&new_keymap, a[0]);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ if (best_matching < 10 && c->x11_layout) {
+ /* The best match is only the first part of the X11
+ * keymap. Check if we have a converted map which
+ * matches just the first layout.
+ */
+ char *l, *v = NULL, *converted;
+
+ l = strndupa(c->x11_layout, strcspn(c->x11_layout, ","));
+ if (c->x11_variant)
+ v = strndupa(c->x11_variant, strcspn(c->x11_variant, ","));
+ r = find_converted_keymap(l, v, &converted);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ free_and_replace(new_keymap, converted);
+ }
+
+ *ret = TAKE_PTR(new_keymap);
+ return (bool) *ret;
+}
+
+int find_language_fallback(const char *lang, char **language) {
+ const char *map;
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned n = 0;
+
+ assert(lang);
+ assert(language);
+
+ map = systemd_language_fallback_map();
+
+ f = fopen(map, "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_strv_free_ char **a = NULL;
+ int r;
+
+ r = read_next_mapping(map, 2, 2, f, &n, &a);
+ if (r <= 0)
+ return r;
+
+ if (streq(lang, a[0])) {
+ assert(strv_length(a) == 2);
+ *language = TAKE_PTR(a[1]);
+ return 1;
+ }
+ }
+
+ assert_not_reached("should not be here");
+}
+
+int x11_convert_to_vconsole(Context *c) {
+ bool modified = false;
+
+ if (isempty(c->x11_layout)) {
+ modified =
+ !isempty(c->vc_keymap) ||
+ !isempty(c->vc_keymap_toggle);
+
+ context_free_vconsole(c);
+ } else {
+ _cleanup_free_ char *new_keymap = NULL;
+ int r;
+
+ r = find_converted_keymap(c->x11_layout, c->x11_variant, &new_keymap);
+ if (r < 0)
+ return r;
+ else if (r == 0) {
+ r = find_legacy_keymap(c, &new_keymap);
+ if (r < 0)
+ return r;
+ }
+ if (r == 0)
+ /* We search for layout-variant match first, but then we also look
+ * for anything which matches just the layout. So it's accurate to say
+ * that we couldn't find anything which matches the layout. */
+ log_notice("No conversion to virtual console map found for \"%s\".",
+ c->x11_layout);
+
+ if (!streq_ptr(c->vc_keymap, new_keymap)) {
+ free_and_replace(c->vc_keymap, new_keymap);
+ c->vc_keymap_toggle = mfree(c->vc_keymap_toggle);
+ modified = true;
+ }
+ }
+
+ if (modified)
+ log_info("Changing virtual console keymap to '%s' toggle '%s'",
+ strempty(c->vc_keymap), strempty(c->vc_keymap_toggle));
+ else
+ log_debug("Virtual console keymap was not modified.");
+
+ return modified;
+}
diff --git a/src/locale/keymap-util.h b/src/locale/keymap-util.h
new file mode 100644
index 0000000..4997647
--- /dev/null
+++ b/src/locale/keymap-util.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "hashmap.h"
+#include "locale-util.h"
+#include "time-util.h"
+
+typedef struct Context {
+ sd_bus_message *locale_cache;
+ usec_t locale_mtime;
+ char *locale[_VARIABLE_LC_MAX];
+
+ sd_bus_message *x11_cache;
+ usec_t x11_mtime;
+ char *x11_layout;
+ char *x11_model;
+ char *x11_variant;
+ char *x11_options;
+
+ sd_bus_message *vc_cache;
+ usec_t vc_mtime;
+ char *vc_keymap;
+ char *vc_keymap_toggle;
+
+ Hashmap *polkit_registry;
+} Context;
+
+int find_converted_keymap(const char *x11_layout, const char *x11_variant, char **new_keymap);
+int find_legacy_keymap(Context *c, char **new_keymap);
+int find_language_fallback(const char *lang, char **language);
+
+int locale_read_data(Context *c, sd_bus_message *m);
+int vconsole_read_data(Context *c, sd_bus_message *m);
+int x11_read_data(Context *c, sd_bus_message *m);
+
+void context_clear(Context *c);
+int vconsole_convert_to_x11(Context *c);
+int vconsole_write_data(Context *c);
+int x11_convert_to_vconsole(Context *c);
+int x11_write_data(Context *c);
+void locale_simplify(char *locale[_VARIABLE_LC_MAX]);
+int locale_write_data(Context *c, char ***settings);
diff --git a/src/locale/language-fallback-map b/src/locale/language-fallback-map
new file mode 100644
index 0000000..d0b02a6
--- /dev/null
+++ b/src/locale/language-fallback-map
@@ -0,0 +1,13 @@
+csb_PL csb:pl
+en_AU en_AU:en_GB
+en_IE en_IE:en_GB
+en_NZ en_NZ:en_GB
+en_ZA en_ZA:en_GB
+fr_BE fr_BE:fr_FR
+fr_CA fr_CA:fr_FR
+fr_CH fr_CH:fr_FR
+fr_LU fr_LU:fr_FR
+it_CH it_CH:it_IT
+mai_IN mai:hi
+nds_DE nds:de
+szl_PL szl:pl
diff --git a/src/locale/localectl.c b/src/locale/localectl.c
new file mode 100644
index 0000000..7d2e887
--- /dev/null
+++ b/src/locale/localectl.c
@@ -0,0 +1,519 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ftw.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include "sd-bus.h"
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "bus-map-properties.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "kbd-util.h"
+#include "locale-util.h"
+#include "main-func.h"
+#include "memory-util.h"
+#include "pager.h"
+#include "pretty-print.h"
+#include "proc-cmdline.h"
+#include "set.h"
+#include "spawn-polkit-agent.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "verbs.h"
+#include "virt.h"
+
+static PagerFlags arg_pager_flags = 0;
+static bool arg_ask_password = true;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static const char *arg_host = NULL;
+static bool arg_convert = true;
+
+typedef struct StatusInfo {
+ char **locale;
+ const char *vconsole_keymap;
+ const char *vconsole_keymap_toggle;
+ const char *x11_layout;
+ const char *x11_model;
+ const char *x11_variant;
+ const char *x11_options;
+} StatusInfo;
+
+static void status_info_clear(StatusInfo *info) {
+ if (info) {
+ strv_free(info->locale);
+ zero(*info);
+ }
+}
+
+static void print_overridden_variables(void) {
+ _cleanup_(locale_variables_freep) char *variables[_VARIABLE_LC_MAX] = {};
+ bool print_warning = true;
+ LocaleVariable j;
+ int r;
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL)
+ return;
+
+ r = proc_cmdline_get_key_many(
+ PROC_CMDLINE_STRIP_RD_PREFIX,
+ "locale.LANG", &variables[VARIABLE_LANG],
+ "locale.LANGUAGE", &variables[VARIABLE_LANGUAGE],
+ "locale.LC_CTYPE", &variables[VARIABLE_LC_CTYPE],
+ "locale.LC_NUMERIC", &variables[VARIABLE_LC_NUMERIC],
+ "locale.LC_TIME", &variables[VARIABLE_LC_TIME],
+ "locale.LC_COLLATE", &variables[VARIABLE_LC_COLLATE],
+ "locale.LC_MONETARY", &variables[VARIABLE_LC_MONETARY],
+ "locale.LC_MESSAGES", &variables[VARIABLE_LC_MESSAGES],
+ "locale.LC_PAPER", &variables[VARIABLE_LC_PAPER],
+ "locale.LC_NAME", &variables[VARIABLE_LC_NAME],
+ "locale.LC_ADDRESS", &variables[VARIABLE_LC_ADDRESS],
+ "locale.LC_TELEPHONE", &variables[VARIABLE_LC_TELEPHONE],
+ "locale.LC_MEASUREMENT", &variables[VARIABLE_LC_MEASUREMENT],
+ "locale.LC_IDENTIFICATION", &variables[VARIABLE_LC_IDENTIFICATION]);
+ if (r < 0 && r != -ENOENT) {
+ log_warning_errno(r, "Failed to read /proc/cmdline: %m");
+ return;
+ }
+
+ for (j = 0; j < _VARIABLE_LC_MAX; j++)
+ if (variables[j]) {
+ if (print_warning) {
+ log_warning("Warning: Settings on kernel command line override system locale settings in /etc/locale.conf.\n"
+ " Command Line: %s=%s", locale_variable_to_string(j), variables[j]);
+
+ print_warning = false;
+ } else
+ log_warning(" %s=%s", locale_variable_to_string(j), variables[j]);
+ }
+}
+
+static void print_status_info(StatusInfo *i) {
+ assert(i);
+
+ if (strv_isempty(i->locale))
+ puts(" System Locale: n/a");
+ else {
+ char **j;
+
+ printf(" System Locale: %s\n", i->locale[0]);
+ STRV_FOREACH(j, i->locale + 1)
+ printf(" %s\n", *j);
+ }
+
+ printf(" VC Keymap: %s\n", strna(i->vconsole_keymap));
+ if (!isempty(i->vconsole_keymap_toggle))
+ printf("VC Toggle Keymap: %s\n", i->vconsole_keymap_toggle);
+
+ printf(" X11 Layout: %s\n", strna(i->x11_layout));
+ if (!isempty(i->x11_model))
+ printf(" X11 Model: %s\n", i->x11_model);
+ if (!isempty(i->x11_variant))
+ printf(" X11 Variant: %s\n", i->x11_variant);
+ if (!isempty(i->x11_options))
+ printf(" X11 Options: %s\n", i->x11_options);
+}
+
+static int show_status(int argc, char **argv, void *userdata) {
+ _cleanup_(status_info_clear) StatusInfo info = {};
+ static const struct bus_properties_map map[] = {
+ { "VConsoleKeymap", "s", NULL, offsetof(StatusInfo, vconsole_keymap) },
+ { "VConsoleKeymapToggle", "s", NULL, offsetof(StatusInfo, vconsole_keymap_toggle) },
+ { "X11Layout", "s", NULL, offsetof(StatusInfo, x11_layout) },
+ { "X11Model", "s", NULL, offsetof(StatusInfo, x11_model) },
+ { "X11Variant", "s", NULL, offsetof(StatusInfo, x11_variant) },
+ { "X11Options", "s", NULL, offsetof(StatusInfo, x11_options) },
+ { "Locale", "as", NULL, offsetof(StatusInfo, locale) },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.locale1",
+ "/org/freedesktop/locale1",
+ map,
+ 0,
+ &error,
+ &m,
+ &info);
+ if (r < 0)
+ return log_error_errno(r, "Could not get properties: %s", bus_error_message(&error, r));
+
+ print_overridden_variables();
+ print_status_info(&info);
+
+ return r;
+}
+
+static int set_locale(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = bus_message_new_method_call(bus, &m, bus_locale, "SetLocale");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, argv + 1);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "b", arg_ask_password);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to issue method call: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int list_locales(int argc, char **argv, void *userdata) {
+ _cleanup_strv_free_ char **l = NULL;
+ int r;
+
+ r = get_locales(&l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read list of locales: %m");
+
+ (void) pager_open(arg_pager_flags);
+ strv_print(l);
+
+ return 0;
+}
+
+static int set_vconsole_keymap(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *map, *toggle_map;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ map = argv[1];
+ toggle_map = argc > 2 ? argv[2] : "";
+
+ r = bus_call_method(
+ bus,
+ bus_locale,
+ "SetVConsoleKeyboard",
+ &error,
+ NULL,
+ "ssbb", map, toggle_map, arg_convert, arg_ask_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set keymap: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int list_vconsole_keymaps(int argc, char **argv, void *userdata) {
+ _cleanup_strv_free_ char **l = NULL;
+ int r;
+
+ r = get_keymaps(&l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read list of keymaps: %m");
+
+ (void) pager_open(arg_pager_flags);
+
+ strv_print(l);
+
+ return 0;
+}
+
+static int set_x11_keymap(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *layout, *model, *variant, *options;
+ sd_bus *bus = userdata;
+ int r;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ layout = argv[1];
+ model = argc > 2 ? argv[2] : "";
+ variant = argc > 3 ? argv[3] : "";
+ options = argc > 4 ? argv[4] : "";
+
+ r = bus_call_method(
+ bus,
+ bus_locale,
+ "SetX11Keyboard",
+ &error,
+ NULL,
+ "ssssbb", layout, model, variant, options,
+ arg_convert, arg_ask_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set keymap: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int list_x11_keymaps(int argc, char **argv, void *userdata) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **list = NULL;
+ enum {
+ NONE,
+ MODELS,
+ LAYOUTS,
+ VARIANTS,
+ OPTIONS
+ } state = NONE, look_for;
+ int r;
+
+ f = fopen("/usr/share/X11/xkb/rules/base.lst", "re");
+ if (!f)
+ return log_error_errno(errno, "Failed to open keyboard mapping list. %m");
+
+ if (streq(argv[0], "list-x11-keymap-models"))
+ look_for = MODELS;
+ else if (streq(argv[0], "list-x11-keymap-layouts"))
+ look_for = LAYOUTS;
+ else if (streq(argv[0], "list-x11-keymap-variants"))
+ look_for = VARIANTS;
+ else if (streq(argv[0], "list-x11-keymap-options"))
+ look_for = OPTIONS;
+ else
+ assert_not_reached("Wrong parameter");
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l, *w;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read keyboard mapping list: %m");
+ if (r == 0)
+ break;
+
+ l = strstrip(line);
+
+ if (isempty(l))
+ continue;
+
+ if (l[0] == '!') {
+ if (startswith(l, "! model"))
+ state = MODELS;
+ else if (startswith(l, "! layout"))
+ state = LAYOUTS;
+ else if (startswith(l, "! variant"))
+ state = VARIANTS;
+ else if (startswith(l, "! option"))
+ state = OPTIONS;
+ else
+ state = NONE;
+
+ continue;
+ }
+
+ if (state != look_for)
+ continue;
+
+ w = l + strcspn(l, WHITESPACE);
+
+ if (argc > 1) {
+ char *e;
+
+ if (*w == 0)
+ continue;
+
+ *w = 0;
+ w++;
+ w += strspn(w, WHITESPACE);
+
+ e = strchr(w, ':');
+ if (!e)
+ continue;
+
+ *e = 0;
+
+ if (!streq(w, argv[1]))
+ continue;
+ } else
+ *w = 0;
+
+ r = strv_extend(&list, l);
+ if (r < 0)
+ return log_oom();
+ }
+
+ if (strv_isempty(list))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT),
+ "Couldn't find any entries.");
+
+ strv_sort(list);
+ strv_uniq(list);
+
+ (void) pager_open(arg_pager_flags);
+
+ strv_print(list);
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("localectl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND ...\n\n"
+ "%sQuery or change system locale and keyboard settings.%s\n"
+ "\nCommands:\n"
+ " status Show current locale settings\n"
+ " set-locale LOCALE... Set system locale\n"
+ " list-locales Show known locales\n"
+ " set-keymap MAP [MAP] Set console and X11 keyboard mappings\n"
+ " list-keymaps Show known virtual console keyboard mappings\n"
+ " set-x11-keymap LAYOUT [MODEL [VARIANT [OPTIONS]]]\n"
+ " Set X11 and console keyboard mappings\n"
+ " list-x11-keymap-models Show known X11 keyboard mapping models\n"
+ " list-x11-keymap-layouts Show known X11 keyboard mapping layouts\n"
+ " list-x11-keymap-variants [LAYOUT]\n"
+ " Show known X11 keyboard mapping variants\n"
+ " list-x11-keymap-options Show known X11 keyboard mapping options\n"
+ "\nOptions:\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-ask-password Do not prompt for password\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " --no-convert Don't convert keyboard mappings\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight()
+ , ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int verb_help(int argc, char **argv, void *userdata) {
+ return help();
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_PAGER,
+ ARG_NO_CONVERT,
+ ARG_NO_ASK_PASSWORD
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ { "no-convert", no_argument, NULL, ARG_NO_CONVERT },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hH:M:", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_CONVERT:
+ arg_convert = false;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_ASK_PASSWORD:
+ arg_ask_password = false;
+ break;
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int localectl_main(sd_bus *bus, int argc, char *argv[]) {
+
+ static const Verb verbs[] = {
+ { "status", VERB_ANY, 1, VERB_DEFAULT, show_status },
+ { "set-locale", 2, VERB_ANY, 0, set_locale },
+ { "list-locales", VERB_ANY, 1, 0, list_locales },
+ { "set-keymap", 2, 3, 0, set_vconsole_keymap },
+ { "list-keymaps", VERB_ANY, 1, 0, list_vconsole_keymaps },
+ { "set-x11-keymap", 2, 5, 0, set_x11_keymap },
+ { "list-x11-keymap-models", VERB_ANY, 1, 0, list_x11_keymaps },
+ { "list-x11-keymap-layouts", VERB_ANY, 1, 0, list_x11_keymaps },
+ { "list-x11-keymap-variants", VERB_ANY, 2, 0, list_x11_keymaps },
+ { "list-x11-keymap-options", VERB_ANY, 1, 0, list_x11_keymaps },
+ { "help", VERB_ANY, VERB_ANY, 0, verb_help }, /* Not documented, but supported since it is created. */
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, bus);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_setup_cli();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = bus_connect_transport(arg_transport, arg_host, false, &bus);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ return localectl_main(bus, argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/locale/localed.c b/src/locale/localed.c
new file mode 100644
index 0000000..736dacd
--- /dev/null
+++ b/src/locale/localed.c
@@ -0,0 +1,823 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#if HAVE_XKBCOMMON
+#include <xkbcommon/xkbcommon.h>
+#include <dlfcn.h>
+#endif
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-log-control-api.h"
+#include "bus-message.h"
+#include "bus-polkit.h"
+#include "def.h"
+#include "dlfcn-util.h"
+#include "keymap-util.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "main-func.h"
+#include "missing_capability.h"
+#include "path-util.h"
+#include "selinux-util.h"
+#include "service-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int locale_update_system_manager(Context *c, sd_bus *bus) {
+ _cleanup_free_ char **l_unset = NULL;
+ _cleanup_strv_free_ char **l_set = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ size_t c_set, c_unset;
+ LocaleVariable p;
+ int r;
+
+ assert(bus);
+
+ l_unset = new0(char*, _VARIABLE_LC_MAX);
+ if (!l_unset)
+ return log_oom();
+
+ l_set = new0(char*, _VARIABLE_LC_MAX);
+ if (!l_set)
+ return log_oom();
+
+ for (p = 0, c_set = 0, c_unset = 0; p < _VARIABLE_LC_MAX; p++) {
+ const char *name;
+
+ name = locale_variable_to_string(p);
+ assert(name);
+
+ if (isempty(c->locale[p]))
+ l_unset[c_set++] = (char*) name;
+ else {
+ char *s;
+
+ s = strjoin(name, "=", c->locale[p]);
+ if (!s)
+ return log_oom();
+
+ l_set[c_unset++] = s;
+ }
+ }
+
+ assert(c_set + c_unset == _VARIABLE_LC_MAX);
+ r = sd_bus_message_new_method_call(bus, &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "UnsetAndSetEnvironment");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, l_unset);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, l_set);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update the manager environment: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int vconsole_reload(sd_bus *bus) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "RestartUnit",
+ &error,
+ NULL,
+ "ss", "systemd-vconsole-setup.service", "replace");
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to issue method call: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int vconsole_convert_to_x11_and_emit(Context *c, sd_bus_message *m) {
+ int r;
+
+ assert(m);
+
+ r = x11_read_data(c, m);
+ if (r < 0)
+ return r;
+
+ r = vconsole_convert_to_x11(c);
+ if (r <= 0)
+ return r;
+
+ /* modified */
+ r = x11_write_data(c);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write X11 keyboard layout: %m");
+
+ sd_bus_emit_properties_changed(sd_bus_message_get_bus(m),
+ "/org/freedesktop/locale1",
+ "org.freedesktop.locale1",
+ "X11Layout", "X11Model", "X11Variant", "X11Options", NULL);
+
+ return 1;
+}
+
+static int x11_convert_to_vconsole_and_emit(Context *c, sd_bus_message *m) {
+ int r;
+
+ assert(m);
+
+ r = vconsole_read_data(c, m);
+ if (r < 0)
+ return r;
+
+ r = x11_convert_to_vconsole(c);
+ if (r <= 0)
+ return r;
+
+ /* modified */
+ r = vconsole_write_data(c);
+ if (r < 0)
+ log_error_errno(r, "Failed to save virtual console keymap: %m");
+
+ sd_bus_emit_properties_changed(sd_bus_message_get_bus(m),
+ "/org/freedesktop/locale1",
+ "org.freedesktop.locale1",
+ "VConsoleKeymap", "VConsoleKeymapToggle", NULL);
+
+ return vconsole_reload(sd_bus_message_get_bus(m));
+}
+
+static int property_get_locale(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Context *c = userdata;
+ _cleanup_strv_free_ char **l = NULL;
+ int p, q, r;
+
+ r = locale_read_data(c, reply);
+ if (r < 0)
+ return r;
+
+ l = new0(char*, _VARIABLE_LC_MAX+1);
+ if (!l)
+ return -ENOMEM;
+
+ for (p = 0, q = 0; p < _VARIABLE_LC_MAX; p++) {
+ char *t;
+ const char *name;
+
+ name = locale_variable_to_string(p);
+ assert(name);
+
+ if (isempty(c->locale[p]))
+ continue;
+
+ if (asprintf(&t, "%s=%s", name, c->locale[p]) < 0)
+ return -ENOMEM;
+
+ l[q++] = t;
+ }
+
+ return sd_bus_message_append_strv(reply, l);
+}
+
+static int property_get_vconsole(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Context *c = userdata;
+ int r;
+
+ r = vconsole_read_data(c, reply);
+ if (r < 0)
+ return r;
+
+ if (streq(property, "VConsoleKeymap"))
+ return sd_bus_message_append_basic(reply, 's', c->vc_keymap);
+ else if (streq(property, "VConsoleKeymapToggle"))
+ return sd_bus_message_append_basic(reply, 's', c->vc_keymap_toggle);
+
+ return -EINVAL;
+}
+
+static int property_get_xkb(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Context *c = userdata;
+ int r;
+
+ r = x11_read_data(c, reply);
+ if (r < 0)
+ return r;
+
+ if (streq(property, "X11Layout"))
+ return sd_bus_message_append_basic(reply, 's', c->x11_layout);
+ else if (streq(property, "X11Model"))
+ return sd_bus_message_append_basic(reply, 's', c->x11_model);
+ else if (streq(property, "X11Variant"))
+ return sd_bus_message_append_basic(reply, 's', c->x11_variant);
+ else if (streq(property, "X11Options"))
+ return sd_bus_message_append_basic(reply, 's', c->x11_options);
+
+ return -EINVAL;
+}
+
+static int process_locale_list_item(
+ const char *assignment,
+ char *new_locale[static _VARIABLE_LC_MAX],
+ sd_bus_error *error) {
+
+ assert(assignment);
+ assert(new_locale);
+
+ for (LocaleVariable p = 0; p < _VARIABLE_LC_MAX; p++) {
+ const char *name, *e;
+
+ assert_se(name = locale_variable_to_string(p));
+
+ e = startswith(assignment, name);
+ if (!e)
+ continue;
+
+ if (*e != '=')
+ continue;
+
+ e++;
+
+ if (!locale_is_valid(e))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Locale %s is not valid, refusing.", e);
+ if (locale_is_installed(e) <= 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Locale %s not installed, refusing.", e);
+ if (new_locale[p])
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Locale variable %s set twice, refusing.", name);
+
+ new_locale[p] = strdup(e);
+ if (!new_locale[p])
+ return -ENOMEM;
+
+ return 0;
+ }
+
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Locale assignment %s not valid, refusing.", assignment);
+}
+
+static int method_set_locale(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ _cleanup_(locale_variables_freep) char *new_locale[_VARIABLE_LC_MAX] = {};
+ _cleanup_strv_free_ char **settings = NULL, **l = NULL;
+ Context *c = userdata;
+ bool modified = false;
+ int interactive, r;
+ char **i;
+
+ assert(m);
+ assert(c);
+
+ r = sd_bus_message_read_strv(m, &l);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_basic(m, 'b', &interactive);
+ if (r < 0)
+ return r;
+
+ /* If single locale without variable name is provided, then we assume it is LANG=. */
+ if (strv_length(l) == 1 && !strchr(l[0], '=')) {
+ if (!locale_is_valid(l[0]))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid locale specification: %s", l[0]);
+ if (locale_is_installed(l[0]) <= 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Specified locale is not installed: %s", l[0]);
+
+ new_locale[VARIABLE_LANG] = strdup(l[0]);
+ if (!new_locale[VARIABLE_LANG])
+ return -ENOMEM;
+
+ l = strv_free(l);
+ }
+
+ /* Check whether a variable is valid */
+ STRV_FOREACH(i, l) {
+ r = process_locale_list_item(*i, new_locale, error);
+ if (r < 0)
+ return r;
+ }
+
+ /* If LANG was specified, but not LANGUAGE, check if we should
+ * set it based on the language fallback table. */
+ if (!isempty(new_locale[VARIABLE_LANG]) &&
+ isempty(new_locale[VARIABLE_LANGUAGE])) {
+ _cleanup_free_ char *language = NULL;
+
+ (void) find_language_fallback(new_locale[VARIABLE_LANG], &language);
+ if (language) {
+ log_debug("Converted LANG=%s to LANGUAGE=%s", new_locale[VARIABLE_LANG], language);
+ free_and_replace(new_locale[VARIABLE_LANGUAGE], language);
+ }
+ }
+
+ r = locale_read_data(c, m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to read locale data: %m");
+ return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Failed to read locale data");
+ }
+
+ /* Merge with the current settings */
+ for (LocaleVariable p = 0; p < _VARIABLE_LC_MAX; p++)
+ if (!isempty(c->locale[p]) && isempty(new_locale[p])) {
+ new_locale[p] = strdup(c->locale[p]);
+ if (!new_locale[p])
+ return -ENOMEM;
+ }
+
+ locale_simplify(new_locale);
+
+ for (LocaleVariable p = 0; p < _VARIABLE_LC_MAX; p++)
+ if (!streq_ptr(c->locale[p], new_locale[p])) {
+ modified = true;
+ break;
+ }
+
+ if (!modified) {
+ log_debug("Locale settings were not modified.");
+ return sd_bus_reply_method_return(m, NULL);
+ }
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.locale1.set-locale",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ for (LocaleVariable p = 0; p < _VARIABLE_LC_MAX; p++)
+ free_and_replace(c->locale[p], new_locale[p]);
+
+ r = locale_write_data(c, &settings);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set locale: %m");
+ return sd_bus_error_set_errnof(error, r, "Failed to set locale: %m");
+ }
+
+ (void) locale_update_system_manager(c, sd_bus_message_get_bus(m));
+
+ if (settings) {
+ _cleanup_free_ char *line;
+
+ line = strv_join(settings, ", ");
+ log_info("Changed locale to %s.", strnull(line));
+ } else
+ log_info("Changed locale to unset.");
+
+ (void) sd_bus_emit_properties_changed(
+ sd_bus_message_get_bus(m),
+ "/org/freedesktop/locale1",
+ "org.freedesktop.locale1",
+ "Locale", NULL);
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static int method_set_vc_keyboard(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ Context *c = userdata;
+ const char *keymap, *keymap_toggle;
+ int convert, interactive, r;
+
+ assert(m);
+ assert(c);
+
+ r = sd_bus_message_read(m, "ssbb", &keymap, &keymap_toggle, &convert, &interactive);
+ if (r < 0)
+ return r;
+
+ keymap = empty_to_null(keymap);
+ keymap_toggle = empty_to_null(keymap_toggle);
+
+ r = vconsole_read_data(c, m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to read virtual console keymap data: %m");
+ return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Failed to read virtual console keymap data");
+ }
+
+ if (streq_ptr(keymap, c->vc_keymap) &&
+ streq_ptr(keymap_toggle, c->vc_keymap_toggle))
+ return sd_bus_reply_method_return(m, NULL);
+
+ if ((keymap && (!filename_is_valid(keymap) || !string_is_safe(keymap))) ||
+ (keymap_toggle && (!filename_is_valid(keymap_toggle) || !string_is_safe(keymap_toggle))))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Received invalid keymap data");
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.locale1.set-keyboard",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ if (free_and_strdup(&c->vc_keymap, keymap) < 0 ||
+ free_and_strdup(&c->vc_keymap_toggle, keymap_toggle) < 0)
+ return -ENOMEM;
+
+ r = vconsole_write_data(c);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set virtual console keymap: %m");
+ return sd_bus_error_set_errnof(error, r, "Failed to set virtual console keymap: %m");
+ }
+
+ log_info("Changed virtual console keymap to '%s' toggle '%s'",
+ strempty(c->vc_keymap), strempty(c->vc_keymap_toggle));
+
+ (void) vconsole_reload(sd_bus_message_get_bus(m));
+
+ (void) sd_bus_emit_properties_changed(
+ sd_bus_message_get_bus(m),
+ "/org/freedesktop/locale1",
+ "org.freedesktop.locale1",
+ "VConsoleKeymap", "VConsoleKeymapToggle", NULL);
+
+ if (convert) {
+ r = vconsole_convert_to_x11_and_emit(c, m);
+ if (r < 0)
+ log_error_errno(r, "Failed to convert keymap data: %m");
+ }
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+#if HAVE_XKBCOMMON
+
+_printf_(3, 0)
+static void log_xkb(struct xkb_context *ctx, enum xkb_log_level lvl, const char *format, va_list args) {
+ const char *fmt;
+
+ fmt = strjoina("libxkbcommon: ", format);
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ log_internalv(LOG_DEBUG, 0, __FILE__, __LINE__, __func__, fmt, args);
+ REENABLE_WARNING;
+}
+
+#define LOAD_SYMBOL(symbol, dl, name) \
+ ({ \
+ (symbol) = (typeof(symbol)) dlvsym((dl), (name), "V_0.5.0"); \
+ (symbol) ? 0 : -EOPNOTSUPP; \
+ })
+
+static int verify_xkb_rmlvo(const char *model, const char *layout, const char *variant, const char *options) {
+
+ /* We dlopen() the library in order to make the dependency soft. The library (and what it pulls in) is huge
+ * after all, hence let's support XKB maps when the library is around, and refuse otherwise. The function
+ * pointers to the shared library are below: */
+
+ struct xkb_context* (*symbol_xkb_context_new)(enum xkb_context_flags flags) = NULL;
+ void (*symbol_xkb_context_unref)(struct xkb_context *context) = NULL;
+ void (*symbol_xkb_context_set_log_fn)(struct xkb_context *context, void (*log_fn)(struct xkb_context *context, enum xkb_log_level level, const char *format, va_list args)) = NULL;
+ struct xkb_keymap* (*symbol_xkb_keymap_new_from_names)(struct xkb_context *context, const struct xkb_rule_names *names, enum xkb_keymap_compile_flags flags) = NULL;
+ void (*symbol_xkb_keymap_unref)(struct xkb_keymap *keymap) = NULL;
+
+ const struct xkb_rule_names rmlvo = {
+ .model = model,
+ .layout = layout,
+ .variant = variant,
+ .options = options,
+ };
+ struct xkb_context *ctx = NULL;
+ struct xkb_keymap *km = NULL;
+ _cleanup_(dlclosep) void *dl = NULL;
+ int r;
+
+ /* Compile keymap from RMLVO information to check out its validity */
+
+ dl = dlopen("libxkbcommon.so.0", RTLD_LAZY);
+ if (!dl)
+ return -EOPNOTSUPP;
+
+ r = LOAD_SYMBOL(symbol_xkb_context_new, dl, "xkb_context_new");
+ if (r < 0)
+ goto finish;
+
+ r = LOAD_SYMBOL(symbol_xkb_context_unref, dl, "xkb_context_unref");
+ if (r < 0)
+ goto finish;
+
+ r = LOAD_SYMBOL(symbol_xkb_context_set_log_fn, dl, "xkb_context_set_log_fn");
+ if (r < 0)
+ goto finish;
+
+ r = LOAD_SYMBOL(symbol_xkb_keymap_new_from_names, dl, "xkb_keymap_new_from_names");
+ if (r < 0)
+ goto finish;
+
+ r = LOAD_SYMBOL(symbol_xkb_keymap_unref, dl, "xkb_keymap_unref");
+ if (r < 0)
+ goto finish;
+
+ ctx = symbol_xkb_context_new(XKB_CONTEXT_NO_ENVIRONMENT_NAMES);
+ if (!ctx) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ symbol_xkb_context_set_log_fn(ctx, log_xkb);
+
+ km = symbol_xkb_keymap_new_from_names(ctx, &rmlvo, XKB_KEYMAP_COMPILE_NO_FLAGS);
+ if (!km) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = 0;
+
+finish:
+ if (symbol_xkb_keymap_unref && km)
+ symbol_xkb_keymap_unref(km);
+
+ if (symbol_xkb_context_unref && ctx)
+ symbol_xkb_context_unref(ctx);
+
+ return r;
+}
+
+#else
+
+static int verify_xkb_rmlvo(const char *model, const char *layout, const char *variant, const char *options) {
+ return 0;
+}
+
+#endif
+
+static int method_set_x11_keyboard(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ Context *c = userdata;
+ const char *layout, *model, *variant, *options;
+ int convert, interactive, r;
+
+ assert(m);
+ assert(c);
+
+ r = sd_bus_message_read(m, "ssssbb", &layout, &model, &variant, &options, &convert, &interactive);
+ if (r < 0)
+ return r;
+
+ layout = empty_to_null(layout);
+ model = empty_to_null(model);
+ variant = empty_to_null(variant);
+ options = empty_to_null(options);
+
+ r = x11_read_data(c, m);
+ if (r < 0) {
+ log_error_errno(r, "Failed to read x11 keyboard layout data: %m");
+ return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Failed to read x11 keyboard layout data");
+ }
+
+ if (streq_ptr(layout, c->x11_layout) &&
+ streq_ptr(model, c->x11_model) &&
+ streq_ptr(variant, c->x11_variant) &&
+ streq_ptr(options, c->x11_options))
+ return sd_bus_reply_method_return(m, NULL);
+
+ if ((layout && !string_is_safe(layout)) ||
+ (model && !string_is_safe(model)) ||
+ (variant && !string_is_safe(variant)) ||
+ (options && !string_is_safe(options)))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Received invalid keyboard data");
+
+ r = verify_xkb_rmlvo(model, layout, variant, options);
+ if (r < 0) {
+ log_error_errno(r, "Cannot compile XKB keymap for new x11 keyboard layout ('%s' / '%s' / '%s' / '%s'): %m",
+ strempty(model), strempty(layout), strempty(variant), strempty(options));
+
+ if (r == -EOPNOTSUPP)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Local keyboard configuration not supported on this system.");
+
+ return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Specified keymap cannot be compiled, refusing as invalid.");
+ }
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.locale1.set-keyboard",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ if (free_and_strdup(&c->x11_layout, layout) < 0 ||
+ free_and_strdup(&c->x11_model, model) < 0 ||
+ free_and_strdup(&c->x11_variant, variant) < 0 ||
+ free_and_strdup(&c->x11_options, options) < 0)
+ return -ENOMEM;
+
+ r = x11_write_data(c);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set X11 keyboard layout: %m");
+ return sd_bus_error_set_errnof(error, r, "Failed to set X11 keyboard layout: %m");
+ }
+
+ log_info("Changed X11 keyboard layout to '%s' model '%s' variant '%s' options '%s'",
+ strempty(c->x11_layout),
+ strempty(c->x11_model),
+ strempty(c->x11_variant),
+ strempty(c->x11_options));
+
+ (void) sd_bus_emit_properties_changed(
+ sd_bus_message_get_bus(m),
+ "/org/freedesktop/locale1",
+ "org.freedesktop.locale1",
+ "X11Layout", "X11Model", "X11Variant", "X11Options", NULL);
+
+ if (convert) {
+ r = x11_convert_to_vconsole_and_emit(c, m);
+ if (r < 0)
+ log_error_errno(r, "Failed to convert keymap data: %m");
+ }
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static const sd_bus_vtable locale_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Locale", "as", property_get_locale, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("X11Layout", "s", property_get_xkb, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("X11Model", "s", property_get_xkb, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("X11Variant", "s", property_get_xkb, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("X11Options", "s", property_get_xkb, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("VConsoleKeymap", "s", property_get_vconsole, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("VConsoleKeymapToggle", "s", property_get_vconsole, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+
+ SD_BUS_METHOD_WITH_NAMES("SetLocale",
+ "asb",
+ SD_BUS_PARAM(locale)
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_set_locale,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetVConsoleKeyboard",
+ "ssbb",
+ SD_BUS_PARAM(keymap)
+ SD_BUS_PARAM(keymap_toggle)
+ SD_BUS_PARAM(convert)
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_set_vc_keyboard,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetX11Keyboard",
+ "ssssbb",
+ SD_BUS_PARAM(layout)
+ SD_BUS_PARAM(model)
+ SD_BUS_PARAM(variant)
+ SD_BUS_PARAM(options)
+ SD_BUS_PARAM(convert)
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_set_x11_keyboard,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_VTABLE_END
+};
+
+static const BusObjectImplementation manager_object = {
+ "/org/freedesktop/locale1",
+ "org.freedesktop.locale1",
+ .vtables = BUS_VTABLES(locale_vtable),
+};
+
+static int connect_bus(Context *c, sd_event *event, sd_bus **_bus) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(c);
+ assert(event);
+ assert(_bus);
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get system bus connection: %m");
+
+ r = bus_add_implementation(bus, &manager_object, c);
+ if (r < 0)
+ return r;
+
+ r = bus_log_control_api_register(bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_request_name_async(bus, NULL, "org.freedesktop.locale1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(bus, event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ *_bus = TAKE_PTR(bus);
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(context_clear) Context context = {
+ .locale_mtime = USEC_INFINITY,
+ .vc_mtime = USEC_INFINITY,
+ .x11_mtime = USEC_INFINITY,
+ };
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ log_setup_service();
+
+ r = service_parse_argv("systemd-localed.service",
+ "Manage system locale settings and key mappings.",
+ BUS_IMPLEMENTATIONS(&manager_object,
+ &log_control_object),
+ argc, argv);
+ if (r <= 0)
+ return r;
+
+ umask(0022);
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return r;
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ (void) sd_event_set_watchdog(event, true);
+
+ r = sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to install SIGINT handler: %m");
+
+ r = sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to install SIGTERM handler: %m");
+
+ r = connect_bus(&context, event, &bus);
+ if (r < 0)
+ return r;
+
+ r = bus_event_loop_with_idle(event, bus, "org.freedesktop.locale1", DEFAULT_EXIT_USEC, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/locale/meson.build b/src/locale/meson.build
new file mode 100644
index 0000000..6e3500d
--- /dev/null
+++ b/src/locale/meson.build
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+systemd_localed_sources = files('''
+ localed.c
+ keymap-util.c
+ keymap-util.h
+'''.split())
+
+localectl_sources = files('localectl.c')
+
+if conf.get('ENABLE_LOCALED') == 1
+ install_data('org.freedesktop.locale1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.locale1.service',
+ install_dir : dbussystemservicedir)
+ install_data('org.freedesktop.locale1.policy',
+ install_dir : polkitpolicydir)
+endif
+
+# If you know a way that allows the same variables to be used
+# in sources list and concatenated to a string for test_env,
+# let me know.
+kbd_model_map = join_paths(meson.current_source_dir(), 'kbd-model-map')
+language_fallback_map = join_paths(meson.current_source_dir(), 'language-fallback-map')
+
+if conf.get('ENABLE_LOCALED') == 1
+ install_data('kbd-model-map',
+ 'language-fallback-map',
+ install_dir : pkgdatadir)
+endif
+
+tests += [
+ [['src/locale/test-keymap-util.c',
+ 'src/locale/keymap-util.c',
+ 'src/locale/keymap-util.h'],
+ [libshared],
+ []],
+]
diff --git a/src/locale/org.freedesktop.locale1.conf b/src/locale/org.freedesktop.locale1.conf
new file mode 100644
index 0000000..a090557
--- /dev/null
+++ b/src/locale/org.freedesktop.locale1.conf
@@ -0,0 +1,29 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="root">
+ <allow own="org.freedesktop.locale1"/>
+ <allow send_destination="org.freedesktop.locale1"/>
+ <allow receive_sender="org.freedesktop.locale1"/>
+ </policy>
+
+ <policy context="default">
+ <allow send_destination="org.freedesktop.locale1"/>
+ <allow receive_sender="org.freedesktop.locale1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/locale/org.freedesktop.locale1.policy b/src/locale/org.freedesktop.locale1.policy
new file mode 100644
index 0000000..f12ca09
--- /dev/null
+++ b/src/locale/org.freedesktop.locale1.policy
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.locale1.set-locale">
+ <description gettext-domain="systemd">Set system locale</description>
+ <message gettext-domain="systemd">Authentication is required to set the system locale.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.locale1.set-keyboard</annotate>
+ </action>
+
+ <action id="org.freedesktop.locale1.set-keyboard">
+ <description gettext-domain="systemd">Set system keyboard settings</description>
+ <message gettext-domain="systemd">Authentication is required to set the system keyboard settings.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+</policyconfig>
diff --git a/src/locale/org.freedesktop.locale1.service b/src/locale/org.freedesktop.locale1.service
new file mode 100644
index 0000000..2d812cb
--- /dev/null
+++ b/src/locale/org.freedesktop.locale1.service
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.locale1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.locale1.service
diff --git a/src/locale/test-keymap-util.c b/src/locale/test-keymap-util.c
new file mode 100644
index 0000000..bded122
--- /dev/null
+++ b/src/locale/test-keymap-util.c
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "keymap-util.h"
+#include "log.h"
+#include "string-util.h"
+#include "tests.h"
+
+static void test_find_language_fallback(void) {
+ _cleanup_free_ char *ans = NULL, *ans2 = NULL;
+
+ log_info("/*** %s ***/", __func__);
+
+ assert_se(find_language_fallback("foobar", &ans) == 0);
+ assert_se(ans == NULL);
+
+ assert_se(find_language_fallback("csb", &ans) == 0);
+ assert_se(ans == NULL);
+
+ assert_se(find_language_fallback("csb_PL", &ans) == 1);
+ assert_se(streq(ans, "csb:pl"));
+
+ assert_se(find_language_fallback("szl_PL", &ans2) == 1);
+ assert_se(streq(ans2, "szl:pl"));
+}
+
+static void test_find_converted_keymap(void) {
+ _cleanup_free_ char *ans = NULL, *ans2 = NULL;
+ int r;
+
+ log_info("/*** %s ***/", __func__);
+
+ assert_se(find_converted_keymap("pl", "foobar", &ans) == 0);
+ assert_se(ans == NULL);
+
+ r = find_converted_keymap("pl", NULL, &ans);
+ if (r == 0) {
+ log_info("Skipping rest of %s: keymaps are not installed", __func__);
+ return;
+ }
+
+ assert_se(r == 1);
+ assert_se(streq(ans, "pl"));
+
+ assert_se(find_converted_keymap("pl", "dvorak", &ans2) == 1);
+ assert_se(streq(ans2, "pl-dvorak"));
+}
+
+static void test_find_legacy_keymap(void) {
+ Context c = {};
+ _cleanup_free_ char *ans = NULL, *ans2 = NULL;
+
+ log_info("/*** %s ***/", __func__);
+
+ c.x11_layout = (char*) "foobar";
+ assert_se(find_legacy_keymap(&c, &ans) == 0);
+ assert_se(ans == NULL);
+
+ c.x11_layout = (char*) "pl";
+ assert_se(find_legacy_keymap(&c, &ans) == 1);
+ assert_se(streq(ans, "pl2"));
+
+ c.x11_layout = (char*) "pl,ru";
+ assert_se(find_legacy_keymap(&c, &ans2) == 1);
+ assert_se(streq(ans, "pl2"));
+}
+
+static void test_vconsole_convert_to_x11(void) {
+ _cleanup_(context_clear) Context c = {};
+
+ log_info("/*** %s ***/", __func__);
+
+ log_info("/* test emptying first (:) */");
+ assert_se(free_and_strdup(&c.x11_layout, "foo") >= 0);
+ assert_se(free_and_strdup(&c.x11_variant, "bar") >= 0);
+ assert_se(vconsole_convert_to_x11(&c) == 1);
+ assert_se(c.x11_layout == NULL);
+ assert_se(c.x11_variant == NULL);
+
+ log_info("/* test emptying second (:) */");
+
+ assert_se(vconsole_convert_to_x11(&c) == 0);
+ assert_se(c.x11_layout == NULL);
+ assert_se(c.x11_variant == NULL);
+
+ log_info("/* test without variant, new mapping (es:) */");
+ assert_se(free_and_strdup(&c.vc_keymap, "es") >= 0);
+
+ assert_se(vconsole_convert_to_x11(&c) == 1);
+ assert_se(streq(c.x11_layout, "es"));
+ assert_se(c.x11_variant == NULL);
+
+ log_info("/* test with known variant, new mapping (es:dvorak) */");
+ assert_se(free_and_strdup(&c.vc_keymap, "es-dvorak") >= 0);
+
+ assert_se(vconsole_convert_to_x11(&c) == 0); // FIXME
+ assert_se(streq(c.x11_layout, "es"));
+ assert_se(c.x11_variant == NULL); // FIXME: "dvorak"
+
+ log_info("/* test with old mapping (fr:latin9) */");
+ assert_se(free_and_strdup(&c.vc_keymap, "fr-latin9") >= 0);
+
+ assert_se(vconsole_convert_to_x11(&c) == 1);
+ assert_se(streq(c.x11_layout, "fr"));
+ assert_se(streq(c.x11_variant, "latin9"));
+
+ log_info("/* test with a compound mapping (ru,us) */");
+ assert_se(free_and_strdup(&c.vc_keymap, "ru") >= 0);
+
+ assert_se(vconsole_convert_to_x11(&c) == 1);
+ assert_se(streq(c.x11_layout, "ru,us"));
+ assert_se(c.x11_variant == NULL);
+
+ log_info("/* test with a simple mapping (us) */");
+ assert_se(free_and_strdup(&c.vc_keymap, "us") >= 0);
+
+ assert_se(vconsole_convert_to_x11(&c) == 1);
+ assert_se(streq(c.x11_layout, "us"));
+ assert_se(c.x11_variant == NULL);
+}
+
+static void test_x11_convert_to_vconsole(void) {
+ _cleanup_(context_clear) Context c = {};
+ int r;
+
+ log_info("/*** %s ***/", __func__);
+
+ log_info("/* test emptying first (:) */");
+ assert_se(free_and_strdup(&c.vc_keymap, "foobar") >= 0);
+ assert_se(x11_convert_to_vconsole(&c) == 1);
+ assert_se(c.vc_keymap == NULL);
+
+ log_info("/* test emptying second (:) */");
+
+ assert_se(x11_convert_to_vconsole(&c) == 0);
+ assert_se(c.vc_keymap == NULL);
+
+ log_info("/* test without variant, new mapping (es:) */");
+ assert_se(free_and_strdup(&c.x11_layout, "es") >= 0);
+
+ assert_se(x11_convert_to_vconsole(&c) == 1);
+ assert_se(streq(c.vc_keymap, "es"));
+
+ log_info("/* test with unknown variant, new mapping (es:foobar) */");
+ assert_se(free_and_strdup(&c.x11_variant, "foobar") >= 0);
+
+ assert_se(x11_convert_to_vconsole(&c) == 0);
+ assert_se(streq(c.vc_keymap, "es"));
+
+ log_info("/* test with known variant, new mapping (es:dvorak) */");
+ assert_se(free_and_strdup(&c.x11_variant, "dvorak") >= 0);
+
+ r = x11_convert_to_vconsole(&c);
+ if (r == 0) {
+ log_info("Skipping rest of %s: keymaps are not installed", __func__);
+ return;
+ }
+
+ assert_se(r == 1);
+ assert_se(streq(c.vc_keymap, "es-dvorak"));
+
+ log_info("/* test with old mapping (fr:latin9) */");
+ assert_se(free_and_strdup(&c.x11_layout, "fr") >= 0);
+ assert_se(free_and_strdup(&c.x11_variant, "latin9") >= 0);
+
+ assert_se(x11_convert_to_vconsole(&c) == 1);
+ assert_se(streq(c.vc_keymap, "fr-latin9"));
+
+ log_info("/* test with a compound mapping (us,ru:) */");
+ assert_se(free_and_strdup(&c.x11_layout, "us,ru") >= 0);
+ assert_se(free_and_strdup(&c.x11_variant, NULL) >= 0);
+
+ assert_se(x11_convert_to_vconsole(&c) == 1);
+ assert_se(streq(c.vc_keymap, "us"));
+
+ log_info("/* test with a compound mapping (ru,us:) */");
+ assert_se(free_and_strdup(&c.x11_layout, "ru,us") >= 0);
+ assert_se(free_and_strdup(&c.x11_variant, NULL) >= 0);
+
+ assert_se(x11_convert_to_vconsole(&c) == 1);
+ assert_se(streq(c.vc_keymap, "ru"));
+
+ /* https://bugzilla.redhat.com/show_bug.cgi?id=1333998 */
+ log_info("/* test with a simple new mapping (ru:) */");
+ assert_se(free_and_strdup(&c.x11_layout, "ru") >= 0);
+ assert_se(free_and_strdup(&c.x11_variant, NULL) >= 0);
+
+ assert_se(x11_convert_to_vconsole(&c) == 0);
+ assert_se(streq(c.vc_keymap, "ru"));
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_find_language_fallback();
+ test_find_converted_keymap();
+ test_find_legacy_keymap();
+
+ test_vconsole_convert_to_x11();
+ test_x11_convert_to_vconsole();
+
+ return 0;
+}
diff --git a/src/login/70-power-switch.rules b/src/login/70-power-switch.rules
new file mode 100644
index 0000000..3fb954a
--- /dev/null
+++ b/src/login/70-power-switch.rules
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+ACTION=="remove", GOTO="power_switch_end"
+
+SUBSYSTEM=="input", KERNEL=="event*", ENV{ID_INPUT_SWITCH}=="1", TAG+="power-switch"
+SUBSYSTEM=="input", KERNEL=="event*", ENV{ID_INPUT_KEY}=="1", TAG+="power-switch"
+
+LABEL="power_switch_end"
diff --git a/src/login/70-uaccess.rules.m4 b/src/login/70-uaccess.rules.m4
new file mode 100644
index 0000000..45e4adf
--- /dev/null
+++ b/src/login/70-uaccess.rules.m4
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+ACTION=="remove", GOTO="uaccess_end"
+ENV{MAJOR}=="", GOTO="uaccess_end"
+
+# PTP/MTP protocol devices, cameras, portable media players
+SUBSYSTEM=="usb", ENV{ID_USB_INTERFACES}=="*:060101:*", TAG+="uaccess"
+
+# Digicams with proprietary protocol
+ENV{ID_GPHOTO2}=="?*", TAG+="uaccess"
+
+# SCSI and USB scanners
+ENV{libsane_matched}=="yes", TAG+="uaccess"
+
+# HPLIP devices (necessary for ink level check and HP tool maintenance)
+ENV{ID_HPLIP}=="1", TAG+="uaccess"
+
+# optical drives
+SUBSYSTEM=="block", ENV{ID_CDROM}=="1", TAG+="uaccess"
+SUBSYSTEM=="scsi_generic", SUBSYSTEMS=="scsi", ATTRS{type}=="4|5", TAG+="uaccess"
+
+# Sound devices
+SUBSYSTEM=="sound", TAG+="uaccess", \
+ OPTIONS+="static_node=snd/timer", OPTIONS+="static_node=snd/seq"
+
+# ffado is an userspace driver for firewire sound cards
+SUBSYSTEM=="firewire", ENV{ID_FFADO}=="1", TAG+="uaccess"
+
+# Webcams, frame grabber, TV cards
+SUBSYSTEM=="video4linux", TAG+="uaccess"
+SUBSYSTEM=="dvb", TAG+="uaccess"
+
+# IIDC devices: industrial cameras and some webcams
+SUBSYSTEM=="firewire", ATTR{units}=="*0x00a02d:0x00010*", TAG+="uaccess"
+SUBSYSTEM=="firewire", ATTR{units}=="*0x00b09d:0x00010*", TAG+="uaccess"
+# AV/C devices: camcorders, set-top boxes, TV sets, audio devices, and more
+SUBSYSTEM=="firewire", ATTR{units}=="*0x00a02d:0x010001*", TAG+="uaccess"
+SUBSYSTEM=="firewire", ATTR{units}=="*0x00a02d:0x014001*", TAG+="uaccess"
+
+# DRI video devices
+SUBSYSTEM=="drm", KERNEL=="card*", TAG+="uaccess"
+m4_ifdef(`GROUP_RENDER_UACCESS',``
+# DRI render nodes
+SUBSYSTEM=="drm", KERNEL=="renderD*", TAG+="uaccess"''
+)m4_dnl
+m4_ifdef(`DEV_KVM_UACCESS',``
+# KVM
+SUBSYSTEM=="misc", KERNEL=="kvm", TAG+="uaccess"''
+)m4_dnl
+
+# smart-card readers
+ENV{ID_SMARTCARD_READER}=="?*", TAG+="uaccess"
+
+# (USB) authentication devices
+ENV{ID_SECURITY_TOKEN}=="?*", TAG+="uaccess"
+
+# PDA devices
+ENV{ID_PDA}=="?*", TAG+="uaccess"
+
+# Programmable remote control
+ENV{ID_REMOTE_CONTROL}=="1", TAG+="uaccess"
+
+# joysticks
+SUBSYSTEM=="input", ENV{ID_INPUT_JOYSTICK}=="?*", TAG+="uaccess"
+
+# color measurement devices
+ENV{COLOR_MEASUREMENT_DEVICE}=="?*", TAG+="uaccess"
+
+# DDC/CI device, usually high-end monitors such as the DreamColor
+ENV{DDC_DEVICE}=="?*", TAG+="uaccess"
+
+# media player raw devices (for user-mode drivers, Android SDK, etc.)
+SUBSYSTEM=="usb", ENV{ID_MEDIA_PLAYER}=="?*", TAG+="uaccess"
+
+# software-defined radio communication devices
+ENV{ID_SOFTWARE_RADIO}=="?*", TAG+="uaccess"
+
+# 3D printers, CNC machines, laser cutters, 3D scanners, etc.
+ENV{ID_MAKER_TOOL}=="?*", TAG+="uaccess"
+
+LABEL="uaccess_end"
diff --git a/src/login/71-seat.rules.in b/src/login/71-seat.rules.in
new file mode 100644
index 0000000..3ccd524
--- /dev/null
+++ b/src/login/71-seat.rules.in
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+ACTION=="remove", GOTO="seat_end"
+
+TAG=="uaccess", SUBSYSTEM!="sound", TAG+="seat"
+SUBSYSTEM=="sound", KERNEL=="card*", TAG+="seat"
+SUBSYSTEM=="input", KERNEL=="input*", TAG+="seat"
+SUBSYSTEM=="graphics", KERNEL=="fb[0-9]*", TAG+="seat"
+
+# Assign keyboard and LCD backlights to the seat
+SUBSYSTEM=="leds", TAG+="seat"
+SUBSYSTEM=="backlight", TAG+="seat"
+
+# HyperV currently doesn't do DRM, hence we need to synthesize for HyperV's fb device instead
+SUBSYSTEM=="graphics", KERNEL=="fb[0-9]", DRIVERS=="hyperv_fb", TAG+="master-of-seat"
+
+# Allow efifb / uvesafb to be a master if KMS is disabled
+SUBSYSTEM=="graphics", KERNEL=="fb[0-9]", IMPORT{cmdline}="nomodeset", TAG+="master-of-seat"
+
+# Allow any PCI graphics device to be a master and synthesize a seat if KMS
+# is disabled and the kernel doesn't have a driver that would work with this device.
+SUBSYSTEM=="pci", ENV{ID_PCI_CLASS_FROM_DATABASE}=="Display controller", \
+ ENV{DRIVER}=="", IMPORT{cmdline}="nomodeset", TAG+="seat", TAG+="master-of-seat"
+
+SUBSYSTEM=="drm", KERNEL=="card[0-9]*", TAG+="seat", TAG+="master-of-seat"
+
+# Allow individual USB ports to be assigned to a seat
+SUBSYSTEM=="usb", ATTR{bDeviceClass}=="00", TAG+="seat"
+
+# Allow USB hubs (and all downstream ports) to be assigned to a seat
+SUBSYSTEM=="usb", ATTR{bDeviceClass}=="09", TAG+="seat"
+
+# 'Plugable' USB hub, sound, network, graphics adapter
+SUBSYSTEM=="usb", ATTR{idVendor}=="2230", ATTR{idProduct}=="000[13]", ENV{ID_AUTOSEAT}="1"
+
+# qemu (version 2.4+) has a PCI-PCI bridge (-device pci-bridge-seat) to group
+# devices belonging to one seat. See:
+# http://git.qemu.org/?p=qemu.git;a=blob;f=docs/multiseat.txt
+SUBSYSTEM=="pci", ATTR{vendor}=="0x1b36", ATTR{device}=="0x000a", TAG+="seat", ENV{ID_AUTOSEAT}="1"
+
+# Video adapter of Parallels virtualization platform
+# Seat should be synthesized for it. But there's no in-kernel driver for this
+# device so matching by vid/pid.
+SUBSYSTEM=="pci", ATTRS{vendor}=="0x1ab8", ATTRS{device}=="0x4005", TAG+="seat", TAG+="master-of-seat"
+
+# Mimo 720, with integrated USB hub, displaylink graphics, and e2i
+# touchscreen. This device carries no proper VID/PID in the USB hub,
+# but it does carry good ID data in the graphics component, hence we
+# check it from the parent. There's a bit of a race here however,
+# given that the child devices might not exist yet at the time this
+# rule is executed. To work around this we'll trigger the parent from
+# the child if we notice that the parent wasn't recognized yet.
+
+# Match parent
+SUBSYSTEM=="usb", ATTR{idVendor}=="058f", ATTR{idProduct}=="6254", \
+ ATTR{%k.2/idVendor}=="17e9", ATTR{%k.2/idProduct}=="401a", ATTR{%k.2/product}=="mimo inc", \
+ ENV{ID_AUTOSEAT}="1", ENV{ID_AVOID_LOOP}="1"
+
+# Match child, look for parent's ID_AVOID_LOOP
+SUBSYSTEM=="usb", ATTR{idVendor}=="17e9", ATTR{idProduct}=="401a", ATTR{product}=="mimo inc", \
+ ATTR{../idVendor}=="058f", ATTR{../idProduct}=="6254", \
+ IMPORT{parent}="ID_AVOID_LOOP"
+
+# Match child, retrigger parent
+SUBSYSTEM=="usb", ATTR{idVendor}=="17e9", ATTR{idProduct}=="401a", ATTR{product}=="mimo inc", \
+ ATTR{../idVendor}=="058f", ATTR{../idProduct}=="6254", \
+ ENV{ID_AVOID_LOOP}=="", \
+ RUN+="@rootbindir@/udevadm trigger --parent-match=%p/.."
+
+TAG=="seat", ENV{ID_PATH}=="", IMPORT{builtin}="path_id"
+TAG=="seat", ENV{ID_FOR_SEAT}=="", ENV{ID_PATH_TAG}!="", ENV{ID_FOR_SEAT}="$env{SUBSYSTEM}-$env{ID_PATH_TAG}"
+
+SUBSYSTEM=="input", ATTR{name}=="Wiebetech LLC Wiebetech", RUN+="@rootbindir@/loginctl lock-sessions"
+
+LABEL="seat_end"
diff --git a/src/login/73-seat-late.rules.m4 b/src/login/73-seat-late.rules.m4
new file mode 100644
index 0000000..e56cd61
--- /dev/null
+++ b/src/login/73-seat-late.rules.m4
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+ACTION=="remove", GOTO="seat_late_end"
+
+ENV{ID_SEAT}=="", ENV{ID_AUTOSEAT}=="1", ENV{ID_FOR_SEAT}!="", ENV{ID_SEAT}="seat-$env{ID_FOR_SEAT}"
+ENV{ID_SEAT}=="", IMPORT{parent}="ID_SEAT"
+
+ENV{ID_SEAT}!="", TAG+="$env{ID_SEAT}"
+m4_ifdef(`HAVE_ACL',``
+TAG=="uaccess", ENV{MAJOR}!="", RUN{builtin}+="uaccess"''
+)m4_dnl
+
+LABEL="seat_late_end"
diff --git a/src/login/inhibit.c b/src/login/inhibit.c
new file mode 100644
index 0000000..e18dbc5
--- /dev/null
+++ b/src/login/inhibit.c
@@ -0,0 +1,333 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "fd-util.h"
+#include "format-table.h"
+#include "format-util.h"
+#include "main-func.h"
+#include "pager.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "user-util.h"
+#include "util.h"
+
+static const char* arg_what = "idle:sleep:shutdown";
+static const char* arg_who = NULL;
+static const char* arg_why = "Unknown reason";
+static const char* arg_mode = NULL;
+static PagerFlags arg_pager_flags = 0;
+static bool arg_legend = true;
+
+static enum {
+ ACTION_INHIBIT,
+ ACTION_LIST
+} arg_action = ACTION_INHIBIT;
+
+static int inhibit(sd_bus *bus, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+ int fd;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "Inhibit",
+ error,
+ &reply,
+ "ssss", arg_what, arg_who, arg_why, arg_mode);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_basic(reply, SD_BUS_TYPE_UNIX_FD, &fd);
+ if (r < 0)
+ return r;
+
+ r = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (r < 0)
+ return -errno;
+
+ return r;
+}
+
+static int print_inhibitors(sd_bus *bus) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "ListInhibitors",
+ &error,
+ &reply,
+ "");
+ if (r < 0)
+ return log_error_errno(r, "Could not get active inhibitors: %s", bus_error_message(&error, r));
+
+ table = table_new("who", "uid", "user", "pid", "comm", "what", "why", "mode");
+ if (!table)
+ return log_oom();
+
+ /* If there's not enough space, shorten the "WHY" column, as it's little more than an explaining comment. */
+ (void) table_set_weight(table, TABLE_HEADER_CELL(6), 20);
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssssuu)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *comm = NULL, *u = NULL;
+ const char *what, *who, *why, *mode;
+ uint32_t uid, pid;
+
+ r = sd_bus_message_read(reply, "(ssssuu)", &what, &who, &why, &mode, &uid, &pid);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ if (arg_mode && !streq(mode, arg_mode))
+ continue;
+
+ (void) get_process_comm(pid, &comm);
+ u = uid_to_name(uid);
+
+ r = table_add_many(table,
+ TABLE_STRING, who,
+ TABLE_UID, (uid_t) uid,
+ TABLE_STRING, strna(u),
+ TABLE_PID, (pid_t) pid,
+ TABLE_STRING, strna(comm),
+ TABLE_STRING, what,
+ TABLE_STRING, why,
+ TABLE_STRING, mode);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (table_get_rows(table) > 1) {
+ r = table_set_sort(table, (size_t) 1, (size_t) 0, (size_t) 5, (size_t) 6, (size_t) -1);
+ if (r < 0)
+ return table_log_sort_error(r);
+
+ table_set_header(table, arg_legend);
+
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+ }
+
+ if (arg_legend) {
+ if (table_get_rows(table) > 1)
+ printf("\n%zu inhibitors listed.\n", table_get_rows(table) - 1);
+ else
+ printf("No inhibitors.\n");
+ }
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-inhibit", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND ...\n"
+ "\n%sExecute a process while inhibiting shutdown/sleep/idle.%s\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-legend Do not show the headers and footers\n"
+ " --what=WHAT Operations to inhibit, colon separated list of:\n"
+ " shutdown, sleep, idle, handle-power-key,\n"
+ " handle-suspend-key, handle-hibernate-key,\n"
+ " handle-lid-switch\n"
+ " --who=STRING A descriptive string who is inhibiting\n"
+ " --why=STRING A descriptive string why is being inhibited\n"
+ " --mode=MODE One of block or delay\n"
+ " --list List active inhibitors\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight(), ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_WHAT,
+ ARG_WHO,
+ ARG_WHY,
+ ARG_MODE,
+ ARG_LIST,
+ ARG_NO_PAGER,
+ ARG_NO_LEGEND,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "what", required_argument, NULL, ARG_WHAT },
+ { "who", required_argument, NULL, ARG_WHO },
+ { "why", required_argument, NULL, ARG_WHY },
+ { "mode", required_argument, NULL, ARG_MODE },
+ { "list", no_argument, NULL, ARG_LIST },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "+h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_WHAT:
+ arg_what = optarg;
+ break;
+
+ case ARG_WHO:
+ arg_who = optarg;
+ break;
+
+ case ARG_WHY:
+ arg_why = optarg;
+ break;
+
+ case ARG_MODE:
+ arg_mode = optarg;
+ break;
+
+ case ARG_LIST:
+ arg_action = ACTION_LIST;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_LEGEND:
+ arg_legend = false;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_action == ACTION_INHIBIT && optind == argc)
+ arg_action = ACTION_LIST;
+
+ else if (arg_action == ACTION_INHIBIT && optind >= argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Missing command line to execute.");
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ log_show_color(true);
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ if (arg_action == ACTION_LIST)
+ return print_inhibitors(bus);
+ else {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_strv_free_ char **arguments = NULL;
+ _cleanup_free_ char *w = NULL;
+ _cleanup_close_ int fd = -1;
+ pid_t pid;
+
+ /* Ignore SIGINT and allow the forked process to receive it */
+ (void) ignore_signals(SIGINT, -1);
+
+ if (!arg_who) {
+ w = strv_join(argv + optind, " ");
+ if (!w)
+ return log_oom();
+
+ arg_who = w;
+ }
+
+ if (!arg_mode)
+ arg_mode = "block";
+
+ fd = inhibit(bus, &error);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to inhibit: %s", bus_error_message(&error, fd));
+
+ arguments = strv_copy(argv + optind);
+ if (!arguments)
+ return log_oom();
+
+ r = safe_fork("(inhibit)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_CLOSE_ALL_FDS|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Child */
+ execvp(arguments[0], arguments);
+ log_open();
+ log_error_errno(errno, "Failed to execute %s: %m", argv[optind]);
+ _exit(EXIT_FAILURE);
+ }
+
+ return wait_for_terminate_and_check(argv[optind], pid, WAIT_LOG);
+ }
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/login/loginctl.c b/src/login/loginctl.c
new file mode 100644
index 0000000..695d18b
--- /dev/null
+++ b/src/login/loginctl.c
@@ -0,0 +1,1488 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <locale.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "bus-map-properties.h"
+#include "bus-print-properties.h"
+#include "bus-unit-procs.h"
+#include "cgroup-show.h"
+#include "cgroup-util.h"
+#include "format-table.h"
+#include "log.h"
+#include "logs-show.h"
+#include "macro.h"
+#include "main-func.h"
+#include "memory-util.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "sigbus.h"
+#include "signal-util.h"
+#include "spawn-polkit-agent.h"
+#include "string-table.h"
+#include "strv.h"
+#include "sysfs-show.h"
+#include "terminal-util.h"
+#include "unit-name.h"
+#include "user-util.h"
+#include "verbs.h"
+
+static char **arg_property = NULL;
+static bool arg_all = false;
+static bool arg_value = false;
+static bool arg_full = false;
+static PagerFlags arg_pager_flags = 0;
+static bool arg_legend = true;
+static const char *arg_kill_who = NULL;
+static int arg_signal = SIGTERM;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static char *arg_host = NULL;
+static bool arg_ask_password = true;
+static unsigned arg_lines = 10;
+static OutputMode arg_output = OUTPUT_SHORT;
+
+STATIC_DESTRUCTOR_REGISTER(arg_property, strv_freep);
+
+static OutputFlags get_output_flags(void) {
+
+ return
+ arg_all * OUTPUT_SHOW_ALL |
+ (arg_full || !on_tty() || pager_have()) * OUTPUT_FULL_WIDTH |
+ colors_enabled() * OUTPUT_COLOR;
+}
+
+static int get_session_path(sd_bus *bus, const char *session_id, sd_bus_error *error, char **path) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+ char *ans;
+
+ r = bus_call_method(bus, bus_login_mgr, "GetSession", error, &reply, "s", session_id);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "o", &ans);
+ if (r < 0)
+ return r;
+
+ ans = strdup(ans);
+ if (!ans)
+ return -ENOMEM;
+
+ *path = ans;
+ return 0;
+}
+
+static int show_table(Table *table, const char *word) {
+ int r;
+
+ assert(table);
+ assert(word);
+
+ if (table_get_rows(table) > 1 || OUTPUT_MODE_IS_JSON(arg_output)) {
+ r = table_set_sort(table, (size_t) 0, (size_t) -1);
+ if (r < 0)
+ return table_log_sort_error(r);
+
+ table_set_header(table, arg_legend);
+
+ if (OUTPUT_MODE_IS_JSON(arg_output))
+ r = table_print_json(table, NULL, output_mode_to_json_format_flags(arg_output) | JSON_FORMAT_COLOR_AUTO);
+ else
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+ }
+
+ if (arg_legend) {
+ if (table_get_rows(table) > 1)
+ printf("\n%zu %s listed.\n", table_get_rows(table) - 1, word);
+ else
+ printf("No %s.\n", word);
+ }
+
+ return 0;
+}
+
+static int list_sessions(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+ assert(argv);
+
+ (void) pager_open(arg_pager_flags);
+
+ r = bus_call_method(bus, bus_login_mgr, "ListSessions", &error, &reply, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list sessions: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, 'a', "(susso)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ table = table_new("session", "uid", "user", "seat", "tty");
+ if (!table)
+ return log_oom();
+
+ /* Right-align the first two fields (since they are numeric) */
+ (void) table_set_align_percent(table, TABLE_HEADER_CELL(0), 100);
+ (void) table_set_align_percent(table, TABLE_HEADER_CELL(1), 100);
+
+ for (;;) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_tty = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply_tty = NULL;
+ const char *id, *user, *seat, *object, *tty = NULL;
+ uint32_t uid;
+
+ r = sd_bus_message_read(reply, "(susso)", &id, &uid, &user, &seat, &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ r = sd_bus_get_property(
+ bus,
+ "org.freedesktop.login1",
+ object,
+ "org.freedesktop.login1.Session",
+ "TTY",
+ &error_tty,
+ &reply_tty,
+ "s");
+ if (r < 0)
+ log_warning_errno(r, "Failed to get TTY for session %s: %s", id, bus_error_message(&error_tty, r));
+ else {
+ r = sd_bus_message_read(reply_tty, "s", &tty);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ r = table_add_many(table,
+ TABLE_STRING, id,
+ TABLE_UID, (uid_t) uid,
+ TABLE_STRING, user,
+ TABLE_STRING, seat,
+ TABLE_STRING, strna(tty));
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return show_table(table, "sessions");
+}
+
+static int list_users(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+ assert(argv);
+
+ (void) pager_open(arg_pager_flags);
+
+ r = bus_call_method(bus, bus_login_mgr, "ListUsers", &error, &reply, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list users: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, 'a', "(uso)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ table = table_new("uid", "user");
+ if (!table)
+ return log_oom();
+
+ (void) table_set_align_percent(table, TABLE_HEADER_CELL(0), 100);
+
+ for (;;) {
+ const char *user;
+ uint32_t uid;
+
+ r = sd_bus_message_read(reply, "(uso)", &uid, &user, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ r = table_add_many(table,
+ TABLE_UID, (uid_t) uid,
+ TABLE_STRING, user);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return show_table(table, "users");
+}
+
+static int list_seats(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+ assert(argv);
+
+ (void) pager_open(arg_pager_flags);
+
+ r = bus_call_method(bus, bus_login_mgr, "ListSeats", &error, &reply, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list seats: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, 'a', "(so)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ table = table_new("seat");
+ if (!table)
+ return log_oom();
+
+ for (;;) {
+ const char *seat;
+
+ r = sd_bus_message_read(reply, "(so)", &seat, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ r = table_add_cell(table, NULL, TABLE_STRING, seat);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return show_table(table, "seats");
+}
+
+static int show_unit_cgroup(sd_bus *bus, const char *interface, const char *unit, pid_t leader) {
+ _cleanup_free_ char *cgroup = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ unsigned c;
+ int r;
+
+ assert(bus);
+ assert(unit);
+
+ r = show_cgroup_get_unit_path_and_warn(bus, unit, &cgroup);
+ if (r < 0)
+ return r;
+
+ if (isempty(cgroup))
+ return 0;
+
+ c = columns();
+ if (c > 18)
+ c -= 18;
+ else
+ c = 0;
+
+ r = unit_show_processes(bus, unit, cgroup, "\t\t ", c, get_output_flags(), &error);
+ if (r == -EBADR) {
+
+ if (arg_transport == BUS_TRANSPORT_REMOTE)
+ return 0;
+
+ /* Fallback for older systemd versions where the GetUnitProcesses() call is not yet available */
+
+ if (cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, cgroup) != 0 && leader <= 0)
+ return 0;
+
+ show_cgroup_and_extra(SYSTEMD_CGROUP_CONTROLLER, cgroup, "\t\t ", c, &leader, leader > 0, get_output_flags());
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to dump process list: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+typedef struct SessionStatusInfo {
+ const char *id;
+ uid_t uid;
+ const char *name;
+ struct dual_timestamp timestamp;
+ unsigned vtnr;
+ const char *seat;
+ const char *tty;
+ const char *display;
+ bool remote;
+ const char *remote_host;
+ const char *remote_user;
+ const char *service;
+ pid_t leader;
+ const char *type;
+ const char *class;
+ const char *state;
+ const char *scope;
+ const char *desktop;
+} SessionStatusInfo;
+
+typedef struct UserStatusInfo {
+ uid_t uid;
+ bool linger;
+ const char *name;
+ struct dual_timestamp timestamp;
+ const char *state;
+ char **sessions;
+ const char *display;
+ const char *slice;
+} UserStatusInfo;
+
+typedef struct SeatStatusInfo {
+ const char *id;
+ const char *active_session;
+ char **sessions;
+} SeatStatusInfo;
+
+static void user_status_info_clear(UserStatusInfo *info) {
+ if (info) {
+ strv_free(info->sessions);
+ zero(*info);
+ }
+}
+
+static void seat_status_info_clear(SeatStatusInfo *info) {
+ if (info) {
+ strv_free(info->sessions);
+ zero(*info);
+ }
+}
+
+static int prop_map_first_of_struct(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ const char *contents;
+ int r;
+
+ r = sd_bus_message_peek_type(m, NULL, &contents);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_STRUCT, contents);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_basic(m, contents[0], userdata);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_skip(m, contents+1);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int prop_map_sessions_strv(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ const char *name;
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ r = sd_bus_message_enter_container(m, 'a', "(so)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(m, "(so)", &name, NULL)) > 0) {
+ r = strv_extend(userdata, name);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_exit_container(m);
+}
+
+static int print_session_status_info(sd_bus *bus, const char *path, bool *new_line) {
+
+ static const struct bus_properties_map map[] = {
+ { "Id", "s", NULL, offsetof(SessionStatusInfo, id) },
+ { "Name", "s", NULL, offsetof(SessionStatusInfo, name) },
+ { "TTY", "s", NULL, offsetof(SessionStatusInfo, tty) },
+ { "Display", "s", NULL, offsetof(SessionStatusInfo, display) },
+ { "RemoteHost", "s", NULL, offsetof(SessionStatusInfo, remote_host) },
+ { "RemoteUser", "s", NULL, offsetof(SessionStatusInfo, remote_user) },
+ { "Service", "s", NULL, offsetof(SessionStatusInfo, service) },
+ { "Desktop", "s", NULL, offsetof(SessionStatusInfo, desktop) },
+ { "Type", "s", NULL, offsetof(SessionStatusInfo, type) },
+ { "Class", "s", NULL, offsetof(SessionStatusInfo, class) },
+ { "Scope", "s", NULL, offsetof(SessionStatusInfo, scope) },
+ { "State", "s", NULL, offsetof(SessionStatusInfo, state) },
+ { "VTNr", "u", NULL, offsetof(SessionStatusInfo, vtnr) },
+ { "Leader", "u", NULL, offsetof(SessionStatusInfo, leader) },
+ { "Remote", "b", NULL, offsetof(SessionStatusInfo, remote) },
+ { "Timestamp", "t", NULL, offsetof(SessionStatusInfo, timestamp.realtime) },
+ { "TimestampMonotonic", "t", NULL, offsetof(SessionStatusInfo, timestamp.monotonic) },
+ { "User", "(uo)", prop_map_first_of_struct, offsetof(SessionStatusInfo, uid) },
+ { "Seat", "(so)", prop_map_first_of_struct, offsetof(SessionStatusInfo, seat) },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ char since1[FORMAT_TIMESTAMP_RELATIVE_MAX];
+ char since2[FORMAT_TIMESTAMP_MAX];
+ const char *s1, *s2;
+ SessionStatusInfo i = {};
+ int r;
+
+ r = bus_map_all_properties(bus, "org.freedesktop.login1", path, map, BUS_MAP_BOOLEAN_AS_BOOL, &error, &m, &i);
+ if (r < 0)
+ return log_error_errno(r, "Could not get properties: %s", bus_error_message(&error, r));
+
+ if (*new_line)
+ printf("\n");
+
+ *new_line = true;
+
+ printf("%s - ", strna(i.id));
+
+ if (i.name)
+ printf("%s (%"PRIu32")\n", i.name, i.uid);
+ else
+ printf("%"PRIu32"\n", i.uid);
+
+ s1 = format_timestamp_relative(since1, sizeof(since1), i.timestamp.realtime);
+ s2 = format_timestamp(since2, sizeof(since2), i.timestamp.realtime);
+
+ if (s1)
+ printf("\t Since: %s; %s\n", s2, s1);
+ else if (s2)
+ printf("\t Since: %s\n", s2);
+
+ if (i.leader > 0) {
+ _cleanup_free_ char *t = NULL;
+
+ printf("\t Leader: %"PRIu32, i.leader);
+
+ (void) get_process_comm(i.leader, &t);
+ if (t)
+ printf(" (%s)", t);
+
+ printf("\n");
+ }
+
+ if (!isempty(i.seat)) {
+ printf("\t Seat: %s", i.seat);
+
+ if (i.vtnr > 0)
+ printf("; vc%u", i.vtnr);
+
+ printf("\n");
+ }
+
+ if (i.tty)
+ printf("\t TTY: %s\n", i.tty);
+ else if (i.display)
+ printf("\t Display: %s\n", i.display);
+
+ if (i.remote_host && i.remote_user)
+ printf("\t Remote: %s@%s\n", i.remote_user, i.remote_host);
+ else if (i.remote_host)
+ printf("\t Remote: %s\n", i.remote_host);
+ else if (i.remote_user)
+ printf("\t Remote: user %s\n", i.remote_user);
+ else if (i.remote)
+ printf("\t Remote: Yes\n");
+
+ if (i.service) {
+ printf("\t Service: %s", i.service);
+
+ if (i.type)
+ printf("; type %s", i.type);
+
+ if (i.class)
+ printf("; class %s", i.class);
+
+ printf("\n");
+ } else if (i.type) {
+ printf("\t Type: %s", i.type);
+
+ if (i.class)
+ printf("; class %s", i.class);
+
+ printf("\n");
+ } else if (i.class)
+ printf("\t Class: %s\n", i.class);
+
+ if (!isempty(i.desktop))
+ printf("\t Desktop: %s\n", i.desktop);
+
+ if (i.state)
+ printf("\t State: %s\n", i.state);
+
+ if (i.scope) {
+ printf("\t Unit: %s\n", i.scope);
+ show_unit_cgroup(bus, "org.freedesktop.systemd1.Scope", i.scope, i.leader);
+
+ if (arg_transport == BUS_TRANSPORT_LOCAL)
+ show_journal_by_unit(
+ stdout,
+ i.scope,
+ NULL,
+ arg_output,
+ 0,
+ i.timestamp.monotonic,
+ arg_lines,
+ 0,
+ get_output_flags() | OUTPUT_BEGIN_NEWLINE,
+ SD_JOURNAL_LOCAL_ONLY,
+ true,
+ NULL);
+ }
+
+ return 0;
+}
+
+static int print_user_status_info(sd_bus *bus, const char *path, bool *new_line) {
+
+ static const struct bus_properties_map map[] = {
+ { "Name", "s", NULL, offsetof(UserStatusInfo, name) },
+ { "Linger", "b", NULL, offsetof(UserStatusInfo, linger) },
+ { "Slice", "s", NULL, offsetof(UserStatusInfo, slice) },
+ { "State", "s", NULL, offsetof(UserStatusInfo, state) },
+ { "UID", "u", NULL, offsetof(UserStatusInfo, uid) },
+ { "Timestamp", "t", NULL, offsetof(UserStatusInfo, timestamp.realtime) },
+ { "TimestampMonotonic", "t", NULL, offsetof(UserStatusInfo, timestamp.monotonic) },
+ { "Display", "(so)", prop_map_first_of_struct, offsetof(UserStatusInfo, display) },
+ { "Sessions", "a(so)", prop_map_sessions_strv, offsetof(UserStatusInfo, sessions) },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ char since1[FORMAT_TIMESTAMP_RELATIVE_MAX];
+ char since2[FORMAT_TIMESTAMP_MAX];
+ const char *s1, *s2;
+ _cleanup_(user_status_info_clear) UserStatusInfo i = {};
+ int r;
+
+ r = bus_map_all_properties(bus, "org.freedesktop.login1", path, map, BUS_MAP_BOOLEAN_AS_BOOL, &error, &m, &i);
+ if (r < 0)
+ return log_error_errno(r, "Could not get properties: %s", bus_error_message(&error, r));
+
+ if (*new_line)
+ printf("\n");
+
+ *new_line = true;
+
+ if (i.name)
+ printf("%s (%"PRIu32")\n", i.name, i.uid);
+ else
+ printf("%"PRIu32"\n", i.uid);
+
+ s1 = format_timestamp_relative(since1, sizeof(since1), i.timestamp.realtime);
+ s2 = format_timestamp(since2, sizeof(since2), i.timestamp.realtime);
+
+ if (s1)
+ printf("\t Since: %s; %s\n", s2, s1);
+ else if (s2)
+ printf("\t Since: %s\n", s2);
+
+ if (!isempty(i.state))
+ printf("\t State: %s\n", i.state);
+
+ if (!strv_isempty(i.sessions)) {
+ char **l;
+ printf("\tSessions:");
+
+ STRV_FOREACH(l, i.sessions)
+ printf(" %s%s",
+ streq_ptr(*l, i.display) ? "*" : "",
+ *l);
+
+ printf("\n");
+ }
+
+ printf("\t Linger: %s\n", yes_no(i.linger));
+
+ if (i.slice) {
+ printf("\t Unit: %s\n", i.slice);
+ show_unit_cgroup(bus, "org.freedesktop.systemd1.Slice", i.slice, 0);
+
+ show_journal_by_unit(
+ stdout,
+ i.slice,
+ NULL,
+ arg_output,
+ 0,
+ i.timestamp.monotonic,
+ arg_lines,
+ 0,
+ get_output_flags() | OUTPUT_BEGIN_NEWLINE,
+ SD_JOURNAL_LOCAL_ONLY,
+ true,
+ NULL);
+ }
+
+ return 0;
+}
+
+static int print_seat_status_info(sd_bus *bus, const char *path, bool *new_line) {
+
+ static const struct bus_properties_map map[] = {
+ { "Id", "s", NULL, offsetof(SeatStatusInfo, id) },
+ { "ActiveSession", "(so)", prop_map_first_of_struct, offsetof(SeatStatusInfo, active_session) },
+ { "Sessions", "a(so)", prop_map_sessions_strv, offsetof(SeatStatusInfo, sessions) },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(seat_status_info_clear) SeatStatusInfo i = {};
+ int r;
+
+ r = bus_map_all_properties(bus, "org.freedesktop.login1", path, map, 0, &error, &m, &i);
+ if (r < 0)
+ return log_error_errno(r, "Could not get properties: %s", bus_error_message(&error, r));
+
+ if (*new_line)
+ printf("\n");
+
+ *new_line = true;
+
+ printf("%s\n", strna(i.id));
+
+ if (!strv_isempty(i.sessions)) {
+ char **l;
+ printf("\tSessions:");
+
+ STRV_FOREACH(l, i.sessions) {
+ if (streq_ptr(*l, i.active_session))
+ printf(" *%s", *l);
+ else
+ printf(" %s", *l);
+ }
+
+ printf("\n");
+ }
+
+ if (arg_transport == BUS_TRANSPORT_LOCAL) {
+ unsigned c;
+
+ c = columns();
+ if (c > 21)
+ c -= 21;
+ else
+ c = 0;
+
+ printf("\t Devices:\n");
+
+ show_sysfs(i.id, "\t\t ", c, get_output_flags());
+ }
+
+ return 0;
+}
+
+static int print_property(const char *name, const char *expected_value, sd_bus_message *m, bool value, bool all) {
+ char type;
+ const char *contents;
+ int r;
+
+ assert(name);
+ assert(m);
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return r;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_STRUCT:
+
+ if (contents[0] == SD_BUS_TYPE_STRING && STR_IN_SET(name, "Display", "Seat", "ActiveSession")) {
+ const char *s;
+
+ r = sd_bus_message_read(m, "(so)", &s, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (all || !isempty(s))
+ bus_print_property_value(name, expected_value, value, s);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_UINT32 && streq(name, "User")) {
+ uint32_t uid;
+
+ r = sd_bus_message_read(m, "(uo)", &uid, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (!uid_is_valid(uid))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid user ID: " UID_FMT,
+ uid);
+
+ bus_print_property_valuef(name, expected_value, value, UID_FMT, uid);
+ return 1;
+ }
+ break;
+
+ case SD_BUS_TYPE_ARRAY:
+
+ if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN && streq(name, "Sessions")) {
+ const char *s;
+ bool space = false;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(so)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (!value)
+ printf("%s=", name);
+
+ while ((r = sd_bus_message_read(m, "(so)", &s, NULL)) > 0) {
+ printf("%s%s", space ? " " : "", s);
+ space = true;
+ }
+
+ if (space || !value)
+ printf("\n");
+
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+static int show_properties(sd_bus *bus, const char *path, bool *new_line) {
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(new_line);
+
+ if (*new_line)
+ printf("\n");
+
+ *new_line = true;
+
+ r = bus_print_all_properties(
+ bus,
+ "org.freedesktop.login1",
+ path,
+ print_property,
+ arg_property,
+ arg_value,
+ arg_all,
+ NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 0;
+}
+
+static int show_session(int argc, char *argv[], void *userdata) {
+ bool properties, new_line = false;
+ sd_bus *bus = userdata;
+ int r;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *path = NULL;
+
+ assert(bus);
+ assert(argv);
+
+ properties = !strstr(argv[0], "status");
+
+ (void) pager_open(arg_pager_flags);
+
+ if (argc <= 1) {
+ /* If no argument is specified inspect the manager itself */
+ if (properties)
+ return show_properties(bus, "/org/freedesktop/login1", &new_line);
+
+ return print_session_status_info(bus, "/org/freedesktop/login1/session/auto", &new_line);
+ }
+
+ for (int i = 1; i < argc; i++) {
+ r = get_session_path(bus, argv[i], &error, &path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get session path: %s", bus_error_message(&error, r));
+
+ if (properties)
+ r = show_properties(bus, path, &new_line);
+ else
+ r = print_session_status_info(bus, path, &new_line);
+
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int show_user(int argc, char *argv[], void *userdata) {
+ bool properties, new_line = false;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+ assert(argv);
+
+ properties = !strstr(argv[0], "status");
+
+ (void) pager_open(arg_pager_flags);
+
+ if (argc <= 1) {
+ /* If no argument is specified inspect the manager itself */
+ if (properties)
+ return show_properties(bus, "/org/freedesktop/login1", &new_line);
+
+ return print_user_status_info(bus, "/org/freedesktop/login1/user/self", &new_line);
+ }
+
+ for (int i = 1; i < argc; i++) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message * reply = NULL;
+ const char *path = NULL;
+ uid_t uid;
+
+ r = get_user_creds((const char**) (argv+i), &uid, NULL, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to look up user %s: %m", argv[i]);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "GetUser",
+ &error, &reply,
+ "u", (uint32_t) uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get user: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "o", &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (properties)
+ r = show_properties(bus, path, &new_line);
+ else
+ r = print_user_status_info(bus, path, &new_line);
+
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int show_seat(int argc, char *argv[], void *userdata) {
+ bool properties, new_line = false;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+ assert(argv);
+
+ properties = !strstr(argv[0], "status");
+
+ (void) pager_open(arg_pager_flags);
+
+ if (argc <= 1) {
+ /* If no argument is specified inspect the manager itself */
+ if (properties)
+ return show_properties(bus, "/org/freedesktop/login1", &new_line);
+
+ return print_seat_status_info(bus, "/org/freedesktop/login1/seat/auto", &new_line);
+ }
+
+ for (int i = 1; i < argc; i++) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message * reply = NULL;
+ const char *path = NULL;
+
+ r = bus_call_method(bus, bus_login_mgr, "GetSeat", &error, &reply, "s", argv[i]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get seat: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "o", &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (properties)
+ r = show_properties(bus, path, &new_line);
+ else
+ r = print_seat_status_info(bus, path, &new_line);
+
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int activate(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+ assert(argv);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ if (argc < 2) {
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1/session/auto",
+ "org.freedesktop.login1.Session",
+ streq(argv[0], "lock-session") ? "Lock" :
+ streq(argv[0], "unlock-session") ? "Unlock" :
+ streq(argv[0], "terminate-session") ? "Terminate" :
+ "Activate",
+ &error, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to issue method call: %s", bus_error_message(&error, r));
+
+ return 0;
+ }
+
+ for (int i = 1; i < argc; i++) {
+
+ r = bus_call_method(
+ bus,
+ bus_login_mgr,
+ streq(argv[0], "lock-session") ? "LockSession" :
+ streq(argv[0], "unlock-session") ? "UnlockSession" :
+ streq(argv[0], "terminate-session") ? "TerminateSession" :
+ "ActivateSession",
+ &error, NULL,
+ "s", argv[i]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to issue method call: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int kill_session(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+ assert(argv);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ if (!arg_kill_who)
+ arg_kill_who = "all";
+
+ for (int i = 1; i < argc; i++) {
+
+ r = bus_call_method(
+ bus,
+ bus_login_mgr,
+ "KillSession",
+ &error, NULL,
+ "ssi", argv[i], arg_kill_who, arg_signal);
+ if (r < 0)
+ return log_error_errno(r, "Could not kill session: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int enable_linger(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ char* short_argv[3];
+ bool b;
+ int r;
+
+ assert(bus);
+ assert(argv);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ b = streq(argv[0], "enable-linger");
+
+ if (argc < 2) {
+ /* No argument? Let's use an empty user name,
+ * then logind will use our user. */
+
+ short_argv[0] = argv[0];
+ short_argv[1] = (char*) "";
+ short_argv[2] = NULL;
+ argv = short_argv;
+ argc = 2;
+ }
+
+ for (int i = 1; i < argc; i++) {
+ uid_t uid;
+
+ if (isempty(argv[i]))
+ uid = UID_INVALID;
+ else {
+ r = get_user_creds((const char**) (argv+i), &uid, NULL, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to look up user %s: %m", argv[i]);
+ }
+
+ r = bus_call_method(
+ bus,
+ bus_login_mgr,
+ "SetUserLinger",
+ &error, NULL,
+ "ubb", (uint32_t) uid, b, true);
+ if (r < 0)
+ return log_error_errno(r, "Could not enable linger: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int terminate_user(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+ assert(argv);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ for (int i = 1; i < argc; i++) {
+ uid_t uid;
+
+ r = get_user_creds((const char**) (argv+i), &uid, NULL, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to look up user %s: %m", argv[i]);
+
+ r = bus_call_method(bus, bus_login_mgr, "TerminateUser", &error, NULL, "u", (uint32_t) uid);
+ if (r < 0)
+ return log_error_errno(r, "Could not terminate user: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int kill_user(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+ assert(argv);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ if (!arg_kill_who)
+ arg_kill_who = "all";
+
+ for (int i = 1; i < argc; i++) {
+ uid_t uid;
+
+ r = get_user_creds((const char**) (argv+i), &uid, NULL, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to look up user %s: %m", argv[i]);
+
+ r = bus_call_method(
+ bus,
+ bus_login_mgr,
+ "KillUser",
+ &error, NULL,
+ "ui", (uint32_t) uid, arg_signal);
+ if (r < 0)
+ return log_error_errno(r, "Could not kill user: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int attach(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+ assert(argv);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ for (int i = 2; i < argc; i++) {
+
+ r = bus_call_method(
+ bus,
+ bus_login_mgr,
+ "AttachDevice",
+ &error, NULL,
+ "ssb", argv[1], argv[i], true);
+ if (r < 0)
+ return log_error_errno(r, "Could not attach device: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int flush_devices(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+ assert(argv);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = bus_call_method(bus, bus_login_mgr, "FlushDevices", &error, NULL, "b", true);
+ if (r < 0)
+ return log_error_errno(r, "Could not flush devices: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int lock_sessions(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+ assert(argv);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = bus_call_method(
+ bus,
+ bus_login_mgr,
+ streq(argv[0], "lock-sessions") ? "LockSessions" : "UnlockSessions",
+ &error, NULL,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Could not lock sessions: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int terminate_seat(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+ assert(argv);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ for (int i = 1; i < argc; i++) {
+
+ r = bus_call_method(bus, bus_login_mgr, "TerminateSeat", &error, NULL, "s", argv[i]);
+ if (r < 0)
+ return log_error_errno(r, "Could not terminate seat: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = terminal_urlify_man("loginctl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND ...\n\n"
+ "%sSend control commands to or query the login manager.%s\n"
+ "\nSession Commands:\n"
+ " list-sessions List sessions\n"
+ " session-status [ID...] Show session status\n"
+ " show-session [ID...] Show properties of sessions or the manager\n"
+ " activate [ID] Activate a session\n"
+ " lock-session [ID...] Screen lock one or more sessions\n"
+ " unlock-session [ID...] Screen unlock one or more sessions\n"
+ " lock-sessions Screen lock all current sessions\n"
+ " unlock-sessions Screen unlock all current sessions\n"
+ " terminate-session ID... Terminate one or more sessions\n"
+ " kill-session ID... Send signal to processes of a session\n"
+ "\nUser Commands:\n"
+ " list-users List users\n"
+ " user-status [USER...] Show user status\n"
+ " show-user [USER...] Show properties of users or the manager\n"
+ " enable-linger [USER...] Enable linger state of one or more users\n"
+ " disable-linger [USER...] Disable linger state of one or more users\n"
+ " terminate-user USER... Terminate all sessions of one or more users\n"
+ " kill-user USER... Send signal to processes of a user\n"
+ "\nSeat Commands:\n"
+ " list-seats List seats\n"
+ " seat-status [NAME...] Show seat status\n"
+ " show-seat [NAME...] Show properties of seats or the manager\n"
+ " attach NAME DEVICE... Attach one or more devices to a seat\n"
+ " flush-devices Flush all device associations\n"
+ " terminate-seat NAME... Terminate all sessions on one or more seats\n"
+ "\nOptions:\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-legend Do not show the headers and footers\n"
+ " --no-ask-password Don't prompt for password\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " -p --property=NAME Show only properties by this name\n"
+ " -P NAME Equivalent to --value --property=NAME\n"
+ " -a --all Show all properties, including empty ones\n"
+ " --value When showing properties, only print the value\n"
+ " -l --full Do not ellipsize output\n"
+ " --kill-who=WHO Who to send signal to\n"
+ " -s --signal=SIGNAL Which signal to send\n"
+ " -n --lines=INTEGER Number of journal entries to show\n"
+ " -o --output=STRING Change journal output mode (short, short-precise,\n"
+ " short-iso, short-iso-precise, short-full,\n"
+ " short-monotonic, short-unix, verbose, export,\n"
+ " json, json-pretty, json-sse, json-seq, cat,\n"
+ " with-unit)\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight()
+ , ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_VALUE,
+ ARG_NO_PAGER,
+ ARG_NO_LEGEND,
+ ARG_KILL_WHO,
+ ARG_NO_ASK_PASSWORD,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "property", required_argument, NULL, 'p' },
+ { "all", no_argument, NULL, 'a' },
+ { "value", no_argument, NULL, ARG_VALUE },
+ { "full", no_argument, NULL, 'l' },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
+ { "kill-who", required_argument, NULL, ARG_KILL_WHO },
+ { "signal", required_argument, NULL, 's' },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ { "lines", required_argument, NULL, 'n' },
+ { "output", required_argument, NULL, 'o' },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hp:P:als:H:M:n:o:", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help(0, NULL, NULL);
+
+ case ARG_VERSION:
+ return version();
+
+ case 'P':
+ arg_value = true;
+ _fallthrough_;
+
+ case 'p': {
+ r = strv_extend(&arg_property, optarg);
+ if (r < 0)
+ return log_oom();
+
+ /* If the user asked for a particular
+ * property, show it to them, even if it is
+ * empty. */
+ arg_all = true;
+ break;
+ }
+
+ case 'a':
+ arg_all = true;
+ break;
+
+ case ARG_VALUE:
+ arg_value = true;
+ break;
+
+ case 'l':
+ arg_full = true;
+ break;
+
+ case 'n':
+ if (safe_atou(optarg, &arg_lines) < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse lines '%s'", optarg);
+ break;
+
+ case 'o':
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(output_mode, OutputMode, _OUTPUT_MODE_MAX);
+ return 0;
+ }
+
+ arg_output = output_mode_from_string(optarg);
+ if (arg_output < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown output '%s'.", optarg);
+
+ if (OUTPUT_MODE_IS_JSON(arg_output))
+ arg_legend = false;
+
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_LEGEND:
+ arg_legend = false;
+ break;
+
+ case ARG_NO_ASK_PASSWORD:
+ arg_ask_password = false;
+ break;
+
+ case ARG_KILL_WHO:
+ arg_kill_who = optarg;
+ break;
+
+ case 's':
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(signal, int, _NSIG);
+ return 0;
+ }
+
+ arg_signal = signal_from_string(optarg);
+ if (arg_signal < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse signal string %s.", optarg);
+ break;
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int loginctl_main(int argc, char *argv[], sd_bus *bus) {
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "list-sessions", VERB_ANY, 1, VERB_DEFAULT, list_sessions },
+ { "session-status", VERB_ANY, VERB_ANY, 0, show_session },
+ { "show-session", VERB_ANY, VERB_ANY, 0, show_session },
+ { "activate", VERB_ANY, 2, 0, activate },
+ { "lock-session", VERB_ANY, VERB_ANY, 0, activate },
+ { "unlock-session", VERB_ANY, VERB_ANY, 0, activate },
+ { "lock-sessions", VERB_ANY, 1, 0, lock_sessions },
+ { "unlock-sessions", VERB_ANY, 1, 0, lock_sessions },
+ { "terminate-session", 2, VERB_ANY, 0, activate },
+ { "kill-session", 2, VERB_ANY, 0, kill_session },
+ { "list-users", VERB_ANY, 1, 0, list_users },
+ { "user-status", VERB_ANY, VERB_ANY, 0, show_user },
+ { "show-user", VERB_ANY, VERB_ANY, 0, show_user },
+ { "enable-linger", VERB_ANY, VERB_ANY, 0, enable_linger },
+ { "disable-linger", VERB_ANY, VERB_ANY, 0, enable_linger },
+ { "terminate-user", 2, VERB_ANY, 0, terminate_user },
+ { "kill-user", 2, VERB_ANY, 0, kill_user },
+ { "list-seats", VERB_ANY, 1, 0, list_seats },
+ { "seat-status", VERB_ANY, VERB_ANY, 0, show_seat },
+ { "show-seat", VERB_ANY, VERB_ANY, 0, show_seat },
+ { "attach", 3, VERB_ANY, 0, attach },
+ { "flush-devices", VERB_ANY, 1, 0, flush_devices },
+ { "terminate-seat", 2, VERB_ANY, 0, terminate_seat },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, bus);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_setup_cli();
+
+ /* The journal merging logic potentially needs a lot of fds. */
+ (void) rlimit_nofile_bump(HIGH_RLIMIT_NOFILE);
+
+ sigbus_install();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = bus_connect_transport(arg_transport, arg_host, false, &bus);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ (void) sd_bus_set_allow_interactive_authorization(bus, arg_ask_password);
+
+ return loginctl_main(argc, argv, bus);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/login/logind-acl.c b/src/login/logind-acl.c
new file mode 100644
index 0000000..ed615e2
--- /dev/null
+++ b/src/login/logind-acl.c
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "sd-device.h"
+
+#include "acl-util.h"
+#include "alloc-util.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "logind-acl.h"
+#include "set.h"
+#include "string-util.h"
+#include "util.h"
+
+static int flush_acl(acl_t acl) {
+ acl_entry_t i;
+ int found;
+ bool changed = false;
+
+ assert(acl);
+
+ for (found = acl_get_entry(acl, ACL_FIRST_ENTRY, &i);
+ found > 0;
+ found = acl_get_entry(acl, ACL_NEXT_ENTRY, &i)) {
+
+ acl_tag_t tag;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if (tag != ACL_USER)
+ continue;
+
+ if (acl_delete_entry(acl, i) < 0)
+ return -errno;
+
+ changed = true;
+ }
+
+ if (found < 0)
+ return -errno;
+
+ return changed;
+}
+
+int devnode_acl(const char *path,
+ bool flush,
+ bool del, uid_t old_uid,
+ bool add, uid_t new_uid) {
+
+ acl_t acl;
+ int r = 0;
+ bool changed = false;
+
+ assert(path);
+
+ acl = acl_get_file(path, ACL_TYPE_ACCESS);
+ if (!acl)
+ return -errno;
+
+ if (flush) {
+
+ r = flush_acl(acl);
+ if (r < 0)
+ goto finish;
+ if (r > 0)
+ changed = true;
+
+ } else if (del && old_uid > 0) {
+ acl_entry_t entry;
+
+ r = acl_find_uid(acl, old_uid, &entry);
+ if (r < 0)
+ goto finish;
+
+ if (r > 0) {
+ if (acl_delete_entry(acl, entry) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ changed = true;
+ }
+ }
+
+ if (add && new_uid > 0) {
+ acl_entry_t entry;
+ acl_permset_t permset;
+ int rd, wt;
+
+ r = acl_find_uid(acl, new_uid, &entry);
+ if (r < 0)
+ goto finish;
+
+ if (r == 0) {
+ if (acl_create_entry(&acl, &entry) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ if (acl_set_tag_type(entry, ACL_USER) < 0 ||
+ acl_set_qualifier(entry, &new_uid) < 0) {
+ r = -errno;
+ goto finish;
+ }
+ }
+
+ if (acl_get_permset(entry, &permset) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ rd = acl_get_perm(permset, ACL_READ);
+ if (rd < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ wt = acl_get_perm(permset, ACL_WRITE);
+ if (wt < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ if (!rd || !wt) {
+
+ if (acl_add_perm(permset, ACL_READ|ACL_WRITE) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ changed = true;
+ }
+ }
+
+ if (!changed)
+ goto finish;
+
+ if (acl_calc_mask(&acl) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ if (acl_set_file(path, ACL_TYPE_ACCESS, acl) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ r = 0;
+
+finish:
+ acl_free(acl);
+
+ return r;
+}
+
+int devnode_acl_all(const char *seat,
+ bool flush,
+ bool del, uid_t old_uid,
+ bool add, uid_t new_uid) {
+
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ _cleanup_set_free_free_ Set *nodes = NULL;
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *dent;
+ sd_device *d;
+ char *n;
+ int r;
+
+ nodes = set_new(&path_hash_ops);
+ if (!nodes)
+ return -ENOMEM;
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ if (isempty(seat))
+ seat = "seat0";
+
+ /* We can only match by one tag in libudev. We choose
+ * "uaccess" for that. If we could match for two tags here we
+ * could add the seat name as second match tag, but this would
+ * be hardly optimizable in libudev, and hence checking the
+ * second tag manually in our loop is a good solution. */
+ r = sd_device_enumerator_add_match_tag(e, "uaccess");
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, d) {
+ const char *node, *sn;
+
+ /* Make sure the tag is still in place */
+ if (sd_device_has_current_tag(d, "uaccess") <= 0)
+ continue;
+
+ if (sd_device_get_property_value(d, "ID_SEAT", &sn) < 0 || isempty(sn))
+ sn = "seat0";
+
+ if (!streq(seat, sn))
+ continue;
+
+ /* In case people mistag devices with nodes, we need to ignore this */
+ if (sd_device_get_devname(d, &node) < 0)
+ continue;
+
+ log_device_debug(d, "Found udev node %s for seat %s", node, seat);
+ r = set_put_strdup(&nodes, node);
+ if (r < 0)
+ return r;
+ }
+
+ /* udev exports "dead" device nodes to allow module on-demand loading,
+ * these devices are not known to the kernel at this moment */
+ dir = opendir("/run/udev/static_node-tags/uaccess");
+ if (dir) {
+ FOREACH_DIRENT(dent, dir, return -errno) {
+ _cleanup_free_ char *unescaped_devname = NULL;
+
+ if (cunescape(dent->d_name, UNESCAPE_RELAX, &unescaped_devname) < 0)
+ return -ENOMEM;
+
+ n = path_join("/dev", unescaped_devname);
+ if (!n)
+ return -ENOMEM;
+
+ log_debug("Found static node %s for seat %s", n, seat);
+ r = set_consume(nodes, n);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+ }
+ }
+
+ r = 0;
+ SET_FOREACH(n, nodes) {
+ int k;
+
+ log_debug("Changing ACLs at %s for seat %s (uid "UID_FMT"→"UID_FMT"%s%s)",
+ n, seat, old_uid, new_uid,
+ del ? " del" : "", add ? " add" : "");
+
+ k = devnode_acl(n, flush, del, old_uid, add, new_uid);
+ if (k == -ENOENT)
+ log_debug("Device %s disappeared while setting ACLs", n);
+ else if (k < 0 && r == 0)
+ r = k;
+ }
+
+ return r;
+}
diff --git a/src/login/logind-acl.h b/src/login/logind-acl.h
new file mode 100644
index 0000000..c88f3c0
--- /dev/null
+++ b/src/login/logind-acl.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#if HAVE_ACL
+
+int devnode_acl(const char *path,
+ bool flush,
+ bool del, uid_t old_uid,
+ bool add, uid_t new_uid);
+
+int devnode_acl_all(const char *seat,
+ bool flush,
+ bool del, uid_t old_uid,
+ bool add, uid_t new_uid);
+#else
+
+static inline int devnode_acl(const char *path,
+ bool flush,
+ bool del, uid_t old_uid,
+ bool add, uid_t new_uid) {
+ return 0;
+}
+
+static inline int devnode_acl_all(const char *seat,
+ bool flush,
+ bool del, uid_t old_uid,
+ bool add, uid_t new_uid) {
+ return 0;
+}
+
+#endif
diff --git a/src/login/logind-action.c b/src/login/logind-action.c
new file mode 100644
index 0000000..1a017c8
--- /dev/null
+++ b/src/login/logind-action.c
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "conf-parser.h"
+#include "format-util.h"
+#include "logind-action.h"
+#include "logind-dbus.h"
+#include "logind-session-dbus.h"
+#include "process-util.h"
+#include "sleep-config.h"
+#include "special.h"
+#include "string-table.h"
+#include "terminal-util.h"
+#include "user-util.h"
+
+const char* manager_target_for_action(HandleAction handle) {
+ static const char * const target_table[_HANDLE_ACTION_MAX] = {
+ [HANDLE_POWEROFF] = SPECIAL_POWEROFF_TARGET,
+ [HANDLE_REBOOT] = SPECIAL_REBOOT_TARGET,
+ [HANDLE_HALT] = SPECIAL_HALT_TARGET,
+ [HANDLE_KEXEC] = SPECIAL_KEXEC_TARGET,
+ [HANDLE_SUSPEND] = SPECIAL_SUSPEND_TARGET,
+ [HANDLE_HIBERNATE] = SPECIAL_HIBERNATE_TARGET,
+ [HANDLE_HYBRID_SLEEP] = SPECIAL_HYBRID_SLEEP_TARGET,
+ [HANDLE_SUSPEND_THEN_HIBERNATE] = SPECIAL_SUSPEND_THEN_HIBERNATE_TARGET,
+ };
+
+ assert(handle >= 0);
+ if (handle < (ssize_t) ELEMENTSOF(target_table))
+ return target_table[handle];
+ return NULL;
+}
+
+int manager_handle_action(
+ Manager *m,
+ InhibitWhat inhibit_key,
+ HandleAction handle,
+ bool ignore_inhibited,
+ bool is_edge) {
+
+ static const char * const message_table[_HANDLE_ACTION_MAX] = {
+ [HANDLE_POWEROFF] = "Powering Off...",
+ [HANDLE_REBOOT] = "Rebooting...",
+ [HANDLE_HALT] = "Halting...",
+ [HANDLE_KEXEC] = "Rebooting via kexec...",
+ [HANDLE_SUSPEND] = "Suspending...",
+ [HANDLE_HIBERNATE] = "Hibernating...",
+ [HANDLE_HYBRID_SLEEP] = "Hibernating and suspending...",
+ [HANDLE_SUSPEND_THEN_HIBERNATE] = "Suspending, then hibernating...",
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ InhibitWhat inhibit_operation;
+ Inhibitor *offending = NULL;
+ bool supported;
+ const char *target;
+ int r;
+
+ assert(m);
+
+ /* If the key handling is turned off, don't do anything */
+ if (handle == HANDLE_IGNORE) {
+ log_debug("Handling of %s (%s) is disabled, taking no action.",
+ inhibit_key == 0 ? "idle timeout" : inhibit_what_to_string(inhibit_key),
+ is_edge ? "edge" : "level");
+ return 0;
+ }
+
+ if (inhibit_key == INHIBIT_HANDLE_LID_SWITCH) {
+ /* If the last system suspend or startup is too close,
+ * let's not suspend for now, to give USB docking
+ * stations some time to settle so that we can
+ * properly watch its displays. */
+ if (m->lid_switch_ignore_event_source) {
+ log_debug("Ignoring lid switch request, system startup or resume too close.");
+ return 0;
+ }
+ }
+
+ /* If the key handling is inhibited, don't do anything */
+ if (inhibit_key > 0) {
+ if (manager_is_inhibited(m, inhibit_key, INHIBIT_BLOCK, NULL, true, false, 0, NULL)) {
+ log_debug("Refusing %s operation, %s is inhibited.",
+ handle_action_to_string(handle),
+ inhibit_what_to_string(inhibit_key));
+ return 0;
+ }
+ }
+
+ /* Locking is handled differently from the rest. */
+ if (handle == HANDLE_LOCK) {
+ if (!is_edge)
+ return 0;
+
+ log_info("Locking sessions...");
+ session_send_lock_all(m, true);
+ return 1;
+ }
+
+ if (handle == HANDLE_SUSPEND)
+ supported = can_sleep("suspend") > 0;
+ else if (handle == HANDLE_HIBERNATE)
+ supported = can_sleep("hibernate") > 0;
+ else if (handle == HANDLE_HYBRID_SLEEP)
+ supported = can_sleep("hybrid-sleep") > 0;
+ else if (handle == HANDLE_SUSPEND_THEN_HIBERNATE)
+ supported = can_sleep("suspend-then-hibernate") > 0;
+ else if (handle == HANDLE_KEXEC)
+ supported = access(KEXEC, X_OK) >= 0;
+ else
+ supported = true;
+
+ if (!supported && IN_SET(handle, HANDLE_HIBERNATE, HANDLE_HYBRID_SLEEP, HANDLE_SUSPEND_THEN_HIBERNATE)) {
+ supported = can_sleep("suspend") > 0;
+ if (supported) {
+ log_notice("Requested %s operation is not supported, using regular suspend instead.",
+ handle_action_to_string(handle));
+ handle = HANDLE_SUSPEND;
+ }
+ }
+
+ if (!supported)
+ return log_warning_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Requested %s operation not supported, ignoring.", handle_action_to_string(handle));
+
+ if (m->action_what > 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EALREADY),
+ "Action already in progress (%s), ignoring requested %s operation.",
+ inhibit_what_to_string(m->action_what),
+ handle_action_to_string(handle));
+
+ assert_se(target = manager_target_for_action(handle));
+
+ inhibit_operation = IN_SET(handle, HANDLE_SUSPEND, HANDLE_HIBERNATE,
+ HANDLE_HYBRID_SLEEP,
+ HANDLE_SUSPEND_THEN_HIBERNATE) ? INHIBIT_SLEEP : INHIBIT_SHUTDOWN;
+
+ /* If the actual operation is inhibited, warn and fail */
+ if (!ignore_inhibited &&
+ manager_is_inhibited(m, inhibit_operation, INHIBIT_BLOCK, NULL, false, false, 0, &offending)) {
+ _cleanup_free_ char *comm = NULL, *u = NULL;
+
+ (void) get_process_comm(offending->pid, &comm);
+ u = uid_to_name(offending->uid);
+
+ /* If this is just a recheck of the lid switch then don't warn about anything */
+ log_full(is_edge ? LOG_ERR : LOG_DEBUG,
+ "Refusing %s operation, %s is inhibited by UID "UID_FMT"/%s, PID "PID_FMT"/%s.",
+ handle_action_to_string(handle),
+ inhibit_what_to_string(inhibit_operation),
+ offending->uid, strna(u),
+ offending->pid, strna(comm));
+
+ return is_edge ? -EPERM : 0;
+ }
+
+ log_info("%s", message_table[handle]);
+
+ r = bus_manager_shutdown_or_sleep_now_or_later(m, target, inhibit_operation, &error);
+ if (r < 0)
+ return log_error_errno(r, "Failed to execute %s operation: %s",
+ handle_action_to_string(handle),
+ bus_error_message(&error, r));
+
+ return 1;
+}
+
+static const char* const handle_action_table[_HANDLE_ACTION_MAX] = {
+ [HANDLE_IGNORE] = "ignore",
+ [HANDLE_POWEROFF] = "poweroff",
+ [HANDLE_REBOOT] = "reboot",
+ [HANDLE_HALT] = "halt",
+ [HANDLE_KEXEC] = "kexec",
+ [HANDLE_SUSPEND] = "suspend",
+ [HANDLE_HIBERNATE] = "hibernate",
+ [HANDLE_HYBRID_SLEEP] = "hybrid-sleep",
+ [HANDLE_SUSPEND_THEN_HIBERNATE] = "suspend-then-hibernate",
+ [HANDLE_LOCK] = "lock",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(handle_action, HandleAction);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_handle_action, handle_action, HandleAction, "Failed to parse handle action setting");
diff --git a/src/login/logind-action.h b/src/login/logind-action.h
new file mode 100644
index 0000000..73aa20b
--- /dev/null
+++ b/src/login/logind-action.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "conf-parser.h"
+
+typedef enum HandleAction {
+ HANDLE_IGNORE,
+ HANDLE_POWEROFF,
+ HANDLE_REBOOT,
+ HANDLE_HALT,
+ HANDLE_KEXEC,
+ HANDLE_SUSPEND,
+ HANDLE_HIBERNATE,
+ HANDLE_HYBRID_SLEEP,
+ HANDLE_SUSPEND_THEN_HIBERNATE,
+ HANDLE_LOCK,
+ _HANDLE_ACTION_MAX,
+ _HANDLE_ACTION_INVALID = -1
+} HandleAction;
+
+#include "logind-inhibit.h"
+#include "logind.h"
+
+int manager_handle_action(
+ Manager *m,
+ InhibitWhat inhibit_key,
+ HandleAction handle,
+ bool ignore_inhibited,
+ bool is_edge);
+
+const char* handle_action_to_string(HandleAction h) _const_;
+HandleAction handle_action_from_string(const char *s) _pure_;
+
+const char* manager_target_for_action(HandleAction handle);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_handle_action);
diff --git a/src/login/logind-brightness.c b/src/login/logind-brightness.c
new file mode 100644
index 0000000..a6a1603
--- /dev/null
+++ b/src/login/logind-brightness.c
@@ -0,0 +1,252 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-util.h"
+#include "device-util.h"
+#include "hash-funcs.h"
+#include "logind-brightness.h"
+#include "logind.h"
+#include "process-util.h"
+#include "stdio-util.h"
+
+/* Brightness and LED devices tend to be very slow to write to (often being I2C and such). Writes to the
+ * sysfs attributes are synchronous, and hence will freeze our process on access. We can't really have that,
+ * hence we add some complexity: whenever we need to write to the brightness attribute, we do so in a forked
+ * off process, which terminates when it is done. Watching that process allows us to watch completion of the
+ * write operation.
+ *
+ * To make this even more complex: clients are likely to send us many write requests in a short time-frame
+ * (because they implement reactive brightness sliders on screen). Let's coalesce writes to make this
+ * efficient: whenever we get requests to change brightness while we are still writing to the brightness
+ * attribute, let's remember the request and restart a new one when the initial operation finished. When we
+ * get another request while one is ongoing and one is pending we'll replace the pending one with the new
+ * one.
+ *
+ * The bus messages are answered when the first write operation finishes that started either due to the
+ * request or due to a later request that overrode the requested one.
+ *
+ * Yes, this is complex, but I don't see an easier way if we want to be both efficient and still support
+ * completion notification. */
+
+typedef struct BrightnessWriter {
+ Manager *manager;
+
+ sd_device *device;
+ char *path;
+
+ pid_t child;
+
+ uint32_t brightness;
+ bool again;
+
+ Set *current_messages;
+ Set *pending_messages;
+
+ sd_event_source* child_event_source;
+} BrightnessWriter;
+
+static void brightness_writer_free(BrightnessWriter *w) {
+ if (!w)
+ return;
+
+ if (w->manager && w->path)
+ (void) hashmap_remove_value(w->manager->brightness_writers, w->path, w);
+
+ sd_device_unref(w->device);
+ free(w->path);
+
+ set_free(w->current_messages);
+ set_free(w->pending_messages);
+
+ w->child_event_source = sd_event_source_unref(w->child_event_source);
+
+ free(w);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(BrightnessWriter*, brightness_writer_free);
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
+ brightness_writer_hash_ops,
+ char,
+ string_hash_func,
+ string_compare_func,
+ BrightnessWriter,
+ brightness_writer_free);
+
+static void brightness_writer_reply(BrightnessWriter *w, int error) {
+ int r;
+
+ assert(w);
+
+ for (;;) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ m = set_steal_first(w->current_messages);
+ if (!m)
+ break;
+
+ if (error == 0)
+ r = sd_bus_reply_method_return(m, NULL);
+ else
+ r = sd_bus_reply_method_errnof(m, error, "Failed to write to brightness device: %m");
+ if (r < 0)
+ log_warning_errno(r, "Failed to send method reply, ignoring: %m");
+ }
+}
+
+static int brightness_writer_fork(BrightnessWriter *w);
+
+static int on_brightness_writer_exit(sd_event_source *s, const siginfo_t *si, void *userdata) {
+ BrightnessWriter *w = userdata;
+ int r;
+
+ assert(s);
+ assert(si);
+ assert(w);
+
+ assert(si->si_pid == w->child);
+ w->child = 0;
+ w->child_event_source = sd_event_source_unref(w->child_event_source);
+
+ brightness_writer_reply(w,
+ si->si_code == CLD_EXITED &&
+ si->si_status == EXIT_SUCCESS ? 0 : -EPROTO);
+
+ if (w->again) {
+ /* Another request to change the brightness has been queued. Act on it, but make the pending
+ * messages the current ones. */
+ w->again = false;
+ set_free(w->current_messages);
+ w->current_messages = TAKE_PTR(w->pending_messages);
+
+ r = brightness_writer_fork(w);
+ if (r >= 0)
+ return 0;
+
+ brightness_writer_reply(w, r);
+ }
+
+ brightness_writer_free(w);
+ return 0;
+}
+
+static int brightness_writer_fork(BrightnessWriter *w) {
+ int r;
+
+ assert(w);
+ assert(w->manager);
+ assert(w->child == 0);
+ assert(!w->child_event_source);
+
+ r = safe_fork("(sd-bright)", FORK_DEATHSIG|FORK_NULL_STDIO|FORK_CLOSE_ALL_FDS|FORK_LOG, &w->child);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ char brs[DECIMAL_STR_MAX(uint32_t)+1];
+
+ /* Child */
+ xsprintf(brs, "%" PRIu32, w->brightness);
+
+ r = sd_device_set_sysattr_value(w->device, "brightness", brs);
+ if (r < 0) {
+ log_device_error_errno(w->device, r, "Failed to write brightness to device: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ r = sd_event_add_child(w->manager->event, &w->child_event_source, w->child, WEXITED, on_brightness_writer_exit, w);
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch brightness writer child " PID_FMT ": %m", w->child);
+
+ return 0;
+}
+
+static int set_add_message(Set **set, sd_bus_message *message) {
+ int r;
+
+ assert(set);
+
+ if (!message)
+ return 0;
+
+ r = sd_bus_message_get_expect_reply(message);
+ if (r <= 0)
+ return r;
+
+ r = set_ensure_put(set, &bus_message_hash_ops, message);
+ if (r <= 0)
+ return r;
+ sd_bus_message_ref(message);
+
+ return 1;
+}
+
+int manager_write_brightness(
+ Manager *m,
+ sd_device *device,
+ uint32_t brightness,
+ sd_bus_message *message) {
+
+ _cleanup_(brightness_writer_freep) BrightnessWriter *w = NULL;
+ BrightnessWriter *existing;
+ const char *path;
+ int r;
+
+ assert(m);
+ assert(device);
+
+ r = sd_device_get_syspath(device, &path);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Failed to get sysfs path for brightness device: %m");
+
+ existing = hashmap_get(m->brightness_writers, path);
+ if (existing) {
+ /* There's already a writer for this device. Let's update it with the new brightness, and add
+ * our message to the set of message to reply when done. */
+
+ r = set_add_message(&existing->pending_messages, message);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add message to set: %m");
+
+ /* We override any previously requested brightness here: we coalesce writes, and the newest
+ * requested brightness is the one we'll put into effect. */
+ existing->brightness = brightness;
+ existing->again = true; /* request another iteration of the writer when the current one is
+ * complete */
+ return 0;
+ }
+
+ r = hashmap_ensure_allocated(&m->brightness_writers, &brightness_writer_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ w = new(BrightnessWriter, 1);
+ if (!w)
+ return log_oom();
+
+ *w = (BrightnessWriter) {
+ .device = sd_device_ref(device),
+ .path = strdup(path),
+ .brightness = brightness,
+ };
+
+ if (!w->path)
+ return log_oom();
+
+ r = hashmap_put(m->brightness_writers, w->path, w);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add brightness writer to hashmap: %m");
+ w->manager = m;
+
+ r = set_add_message(&w->current_messages, message);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add message to set: %m");
+
+ r = brightness_writer_fork(w);
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(w);
+ return 0;
+}
diff --git a/src/login/logind-brightness.h b/src/login/logind-brightness.h
new file mode 100644
index 0000000..f1c7775
--- /dev/null
+++ b/src/login/logind-brightness.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-device.h"
+
+#include "logind.h"
+
+int manager_write_brightness(Manager *m, sd_device *device, uint32_t brightness, sd_bus_message *message);
diff --git a/src/login/logind-button.c b/src/login/logind-button.c
new file mode 100644
index 0000000..0e38b5f
--- /dev/null
+++ b/src/login/logind-button.c
@@ -0,0 +1,380 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "async.h"
+#include "fd-util.h"
+#include "logind-button.h"
+#include "missing_input.h"
+#include "string-util.h"
+#include "util.h"
+
+#define CONST_MAX5(a, b, c, d, e) CONST_MAX(CONST_MAX(a, b), CONST_MAX(CONST_MAX(c, d), e))
+
+#define ULONG_BITS (sizeof(unsigned long)*8)
+
+static bool bitset_get(const unsigned long *bits, unsigned i) {
+ return (bits[i / ULONG_BITS] >> (i % ULONG_BITS)) & 1UL;
+}
+
+static void bitset_put(unsigned long *bits, unsigned i) {
+ bits[i / ULONG_BITS] |= (unsigned long) 1 << (i % ULONG_BITS);
+}
+
+Button* button_new(Manager *m, const char *name) {
+ Button *b;
+
+ assert(m);
+ assert(name);
+
+ b = new0(Button, 1);
+ if (!b)
+ return NULL;
+
+ b->name = strdup(name);
+ if (!b->name)
+ return mfree(b);
+
+ if (hashmap_put(m->buttons, b->name, b) < 0) {
+ free(b->name);
+ return mfree(b);
+ }
+
+ b->manager = m;
+ b->fd = -1;
+
+ return b;
+}
+
+void button_free(Button *b) {
+ assert(b);
+
+ hashmap_remove(b->manager->buttons, b->name);
+
+ sd_event_source_unref(b->io_event_source);
+ sd_event_source_unref(b->check_event_source);
+
+ asynchronous_close(b->fd);
+
+ free(b->name);
+ free(b->seat);
+ free(b);
+}
+
+int button_set_seat(Button *b, const char *sn) {
+ char *s;
+
+ assert(b);
+ assert(sn);
+
+ s = strdup(sn);
+ if (!s)
+ return -ENOMEM;
+
+ free(b->seat);
+ b->seat = s;
+
+ return 0;
+}
+
+static void button_lid_switch_handle_action(Manager *manager, bool is_edge) {
+ HandleAction handle_action;
+
+ assert(manager);
+
+ /* If we are docked or on external power, handle the lid switch
+ * differently */
+ if (manager_is_docked_or_external_displays(manager))
+ handle_action = manager->handle_lid_switch_docked;
+ else if (manager->handle_lid_switch_ep != _HANDLE_ACTION_INVALID &&
+ manager_is_on_external_power())
+ handle_action = manager->handle_lid_switch_ep;
+ else
+ handle_action = manager->handle_lid_switch;
+
+ manager_handle_action(manager, INHIBIT_HANDLE_LID_SWITCH, handle_action, manager->lid_switch_ignore_inhibited, is_edge);
+}
+
+static int button_recheck(sd_event_source *e, void *userdata) {
+ Button *b = userdata;
+
+ assert(b);
+ assert(b->lid_closed);
+
+ button_lid_switch_handle_action(b->manager, false);
+ return 1;
+}
+
+static int button_install_check_event_source(Button *b) {
+ int r;
+ assert(b);
+
+ /* Install a post handler, so that we keep rechecking as long as the lid is closed. */
+
+ if (b->check_event_source)
+ return 0;
+
+ r = sd_event_add_post(b->manager->event, &b->check_event_source, button_recheck, b);
+ if (r < 0)
+ return r;
+
+ return sd_event_source_set_priority(b->check_event_source, SD_EVENT_PRIORITY_IDLE+1);
+}
+
+static int button_dispatch(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Button *b = userdata;
+ struct input_event ev;
+ ssize_t l;
+
+ assert(s);
+ assert(fd == b->fd);
+ assert(b);
+
+ l = read(b->fd, &ev, sizeof(ev));
+ if (l < 0)
+ return errno != EAGAIN ? -errno : 0;
+ if ((size_t) l < sizeof(ev))
+ return -EIO;
+
+ if (ev.type == EV_KEY && ev.value > 0) {
+
+ switch (ev.code) {
+
+ case KEY_POWER:
+ case KEY_POWER2:
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("Power key pressed."),
+ "MESSAGE_ID=" SD_MESSAGE_POWER_KEY_STR);
+
+ manager_handle_action(b->manager, INHIBIT_HANDLE_POWER_KEY, b->manager->handle_power_key, b->manager->power_key_ignore_inhibited, true);
+ break;
+
+ /* The kernel naming is a bit confusing here:
+ KEY_RESTART was probably introduced for media playback purposes, but
+ is now being predominantly used to indicate device reboot.
+ */
+
+ case KEY_RESTART:
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("Reboot key pressed."),
+ "MESSAGE_ID=" SD_MESSAGE_REBOOT_KEY_STR);
+
+ manager_handle_action(b->manager, INHIBIT_HANDLE_REBOOT_KEY, b->manager->handle_reboot_key, b->manager->reboot_key_ignore_inhibited, true);
+ break;
+
+ /* The kernel naming is a bit confusing here:
+
+ KEY_SLEEP = suspend-to-ram, which everybody else calls "suspend"
+ KEY_SUSPEND = suspend-to-disk, which everybody else calls "hibernate"
+ */
+
+ case KEY_SLEEP:
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("Suspend key pressed."),
+ "MESSAGE_ID=" SD_MESSAGE_SUSPEND_KEY_STR);
+
+ manager_handle_action(b->manager, INHIBIT_HANDLE_SUSPEND_KEY, b->manager->handle_suspend_key, b->manager->suspend_key_ignore_inhibited, true);
+ break;
+
+ case KEY_SUSPEND:
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("Hibernate key pressed."),
+ "MESSAGE_ID=" SD_MESSAGE_HIBERNATE_KEY_STR);
+
+ manager_handle_action(b->manager, INHIBIT_HANDLE_HIBERNATE_KEY, b->manager->handle_hibernate_key, b->manager->hibernate_key_ignore_inhibited, true);
+ break;
+ }
+
+ } else if (ev.type == EV_SW && ev.value > 0) {
+
+ if (ev.code == SW_LID) {
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("Lid closed."),
+ "MESSAGE_ID=" SD_MESSAGE_LID_CLOSED_STR);
+
+ b->lid_closed = true;
+ button_lid_switch_handle_action(b->manager, true);
+ button_install_check_event_source(b);
+
+ } else if (ev.code == SW_DOCK) {
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("System docked."),
+ "MESSAGE_ID=" SD_MESSAGE_SYSTEM_DOCKED_STR);
+
+ b->docked = true;
+ }
+
+ } else if (ev.type == EV_SW && ev.value == 0) {
+
+ if (ev.code == SW_LID) {
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("Lid opened."),
+ "MESSAGE_ID=" SD_MESSAGE_LID_OPENED_STR);
+
+ b->lid_closed = false;
+ b->check_event_source = sd_event_source_unref(b->check_event_source);
+
+ } else if (ev.code == SW_DOCK) {
+ log_struct(LOG_INFO,
+ LOG_MESSAGE("System undocked."),
+ "MESSAGE_ID=" SD_MESSAGE_SYSTEM_UNDOCKED_STR);
+
+ b->docked = false;
+ }
+ }
+
+ return 0;
+}
+
+static int button_suitable(int fd) {
+ unsigned long types[CONST_MAX(EV_KEY, EV_SW)/ULONG_BITS+1];
+
+ assert(fd >= 0);
+
+ if (ioctl(fd, EVIOCGBIT(EV_SYN, sizeof types), types) < 0)
+ return -errno;
+
+ if (bitset_get(types, EV_KEY)) {
+ unsigned long keys[CONST_MAX5(KEY_POWER, KEY_POWER2, KEY_SLEEP, KEY_SUSPEND, KEY_RESTART)/ULONG_BITS+1];
+
+ if (ioctl(fd, EVIOCGBIT(EV_KEY, sizeof keys), keys) < 0)
+ return -errno;
+
+ if (bitset_get(keys, KEY_POWER) ||
+ bitset_get(keys, KEY_POWER2) ||
+ bitset_get(keys, KEY_SLEEP) ||
+ bitset_get(keys, KEY_SUSPEND) ||
+ bitset_get(keys, KEY_RESTART))
+ return true;
+ }
+
+ if (bitset_get(types, EV_SW)) {
+ unsigned long switches[CONST_MAX(SW_LID, SW_DOCK)/ULONG_BITS+1];
+
+ if (ioctl(fd, EVIOCGBIT(EV_SW, sizeof switches), switches) < 0)
+ return -errno;
+
+ if (bitset_get(switches, SW_LID) ||
+ bitset_get(switches, SW_DOCK))
+ return true;
+ }
+
+ return false;
+}
+
+static int button_set_mask(const char *name, int fd) {
+ unsigned long
+ types[CONST_MAX(EV_KEY, EV_SW)/ULONG_BITS+1] = {},
+ keys[CONST_MAX5(KEY_POWER, KEY_POWER2, KEY_SLEEP, KEY_SUSPEND, KEY_RESTART)/ULONG_BITS+1] = {},
+ switches[CONST_MAX(SW_LID, SW_DOCK)/ULONG_BITS+1] = {};
+ struct input_mask mask;
+
+ assert(name);
+ assert(fd >= 0);
+
+ bitset_put(types, EV_KEY);
+ bitset_put(types, EV_SW);
+
+ mask = (struct input_mask) {
+ .type = EV_SYN,
+ .codes_size = sizeof(types),
+ .codes_ptr = PTR_TO_UINT64(types),
+ };
+
+ if (ioctl(fd, EVIOCSMASK, &mask) < 0)
+ /* Log only at debug level if the kernel doesn't do EVIOCSMASK yet */
+ return log_full_errno(IN_SET(errno, ENOTTY, EOPNOTSUPP, EINVAL) ? LOG_DEBUG : LOG_WARNING,
+ errno, "Failed to set EV_SYN event mask on /dev/input/%s: %m", name);
+
+ bitset_put(keys, KEY_POWER);
+ bitset_put(keys, KEY_POWER2);
+ bitset_put(keys, KEY_SLEEP);
+ bitset_put(keys, KEY_SUSPEND);
+ bitset_put(keys, KEY_RESTART);
+
+ mask = (struct input_mask) {
+ .type = EV_KEY,
+ .codes_size = sizeof(keys),
+ .codes_ptr = PTR_TO_UINT64(keys),
+ };
+
+ if (ioctl(fd, EVIOCSMASK, &mask) < 0)
+ return log_warning_errno(errno, "Failed to set EV_KEY event mask on /dev/input/%s: %m", name);
+
+ bitset_put(switches, SW_LID);
+ bitset_put(switches, SW_DOCK);
+
+ mask = (struct input_mask) {
+ .type = EV_SW,
+ .codes_size = sizeof(switches),
+ .codes_ptr = PTR_TO_UINT64(switches),
+ };
+
+ if (ioctl(fd, EVIOCSMASK, &mask) < 0)
+ return log_warning_errno(errno, "Failed to set EV_SW event mask on /dev/input/%s: %m", name);
+
+ return 0;
+}
+
+int button_open(Button *b) {
+ _cleanup_(asynchronous_closep) int fd = -1;
+ const char *p;
+ char name[256];
+ int r;
+
+ assert(b);
+
+ b->fd = asynchronous_close(b->fd);
+
+ p = strjoina("/dev/input/", b->name);
+
+ fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (fd < 0)
+ return log_warning_errno(errno, "Failed to open %s: %m", p);
+
+ r = button_suitable(fd);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to determine whether input device %s is relevant to us: %m", p);
+ if (r == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EADDRNOTAVAIL),
+ "Device %s does not expose keys or switches relevant to us, ignoring.", p);
+
+ if (ioctl(fd, EVIOCGNAME(sizeof name), name) < 0)
+ return log_error_errno(errno, "Failed to get input name for %s: %m", p);
+
+ (void) button_set_mask(b->name, fd);
+
+ b->io_event_source = sd_event_source_unref(b->io_event_source);
+ r = sd_event_add_io(b->manager->event, &b->io_event_source, fd, EPOLLIN, button_dispatch, b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add button event for %s: %m", p);
+
+ b->fd = TAKE_FD(fd);
+ log_info("Watching system buttons on %s (%s)", p, name);
+ return 0;
+}
+
+int button_check_switches(Button *b) {
+ unsigned long switches[CONST_MAX(SW_LID, SW_DOCK)/ULONG_BITS+1] = {};
+ assert(b);
+
+ if (b->fd < 0)
+ return -EINVAL;
+
+ if (ioctl(b->fd, EVIOCGSW(sizeof(switches)), switches) < 0)
+ return -errno;
+
+ b->lid_closed = bitset_get(switches, SW_LID);
+ b->docked = bitset_get(switches, SW_DOCK);
+
+ if (b->lid_closed)
+ button_install_check_event_source(b);
+
+ return 0;
+}
diff --git a/src/login/logind-button.h b/src/login/logind-button.h
new file mode 100644
index 0000000..041d665
--- /dev/null
+++ b/src/login/logind-button.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct Button Button;
+
+#include "logind.h"
+
+struct Button {
+ Manager *manager;
+
+ sd_event_source *io_event_source;
+ sd_event_source *check_event_source;
+
+ char *name;
+ char *seat;
+ int fd;
+
+ bool lid_closed;
+ bool docked;
+};
+
+Button* button_new(Manager *m, const char *name);
+void button_free(Button *b);
+int button_open(Button *b);
+int button_set_seat(Button *b, const char *sn);
+int button_check_switches(Button *b);
diff --git a/src/login/logind-core.c b/src/login/logind-core.c
new file mode 100644
index 0000000..3595d7a
--- /dev/null
+++ b/src/login/logind-core.c
@@ -0,0 +1,844 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <linux/vt.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "cgroup-util.h"
+#include "conf-parser.h"
+#include "device-util.h"
+#include "efi-loader.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "limits-util.h"
+#include "logind.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "udev-util.h"
+#include "user-util.h"
+#include "userdb.h"
+#include "utmp-wtmp.h"
+
+void manager_reset_config(Manager *m) {
+ assert(m);
+
+ m->n_autovts = 6;
+ m->reserve_vt = 6;
+ m->remove_ipc = true;
+ m->inhibit_delay_max = 5 * USEC_PER_SEC;
+ m->user_stop_delay = 10 * USEC_PER_SEC;
+
+ m->handle_power_key = HANDLE_POWEROFF;
+ m->handle_suspend_key = HANDLE_SUSPEND;
+ m->handle_hibernate_key = HANDLE_HIBERNATE;
+ m->handle_lid_switch = HANDLE_SUSPEND;
+ m->handle_lid_switch_ep = _HANDLE_ACTION_INVALID;
+ m->handle_lid_switch_docked = HANDLE_IGNORE;
+ m->handle_reboot_key = HANDLE_REBOOT;
+ m->power_key_ignore_inhibited = false;
+ m->suspend_key_ignore_inhibited = false;
+ m->hibernate_key_ignore_inhibited = false;
+ m->lid_switch_ignore_inhibited = true;
+ m->reboot_key_ignore_inhibited = false;
+
+ m->holdoff_timeout_usec = 30 * USEC_PER_SEC;
+
+ m->idle_action_usec = 30 * USEC_PER_MINUTE;
+ m->idle_action = HANDLE_IGNORE;
+
+ m->runtime_dir_size = physical_memory_scale(10U, 100U); /* 10% */
+ m->runtime_dir_inodes = DIV_ROUND_UP(m->runtime_dir_size, 4096); /* 4k per inode */
+ m->sessions_max = 8192;
+ m->inhibitors_max = 8192;
+
+ m->kill_user_processes = KILL_USER_PROCESSES;
+
+ m->kill_only_users = strv_free(m->kill_only_users);
+ m->kill_exclude_users = strv_free(m->kill_exclude_users);
+}
+
+int manager_parse_config_file(Manager *m) {
+ assert(m);
+
+ return config_parse_many_nulstr(
+ PKGSYSCONFDIR "/logind.conf",
+ CONF_PATHS_NULSTR("systemd/logind.conf.d"),
+ "Login\0",
+ config_item_perf_lookup, logind_gperf_lookup,
+ CONFIG_PARSE_WARN, m,
+ NULL);
+}
+
+int manager_add_device(Manager *m, const char *sysfs, bool master, Device **ret_device) {
+ Device *d;
+
+ assert(m);
+ assert(sysfs);
+
+ d = hashmap_get(m->devices, sysfs);
+ if (d)
+ /* we support adding master-flags, but not removing them */
+ d->master = d->master || master;
+ else {
+ d = device_new(m, sysfs, master);
+ if (!d)
+ return -ENOMEM;
+ }
+
+ if (ret_device)
+ *ret_device = d;
+
+ return 0;
+}
+
+int manager_add_seat(Manager *m, const char *id, Seat **ret_seat) {
+ Seat *s;
+ int r;
+
+ assert(m);
+ assert(id);
+
+ s = hashmap_get(m->seats, id);
+ if (!s) {
+ r = seat_new(&s, m, id);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_seat)
+ *ret_seat = s;
+
+ return 0;
+}
+
+int manager_add_session(Manager *m, const char *id, Session **ret_session) {
+ Session *s;
+ int r;
+
+ assert(m);
+ assert(id);
+
+ s = hashmap_get(m->sessions, id);
+ if (!s) {
+ r = session_new(&s, m, id);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_session)
+ *ret_session = s;
+
+ return 0;
+}
+
+int manager_add_user(
+ Manager *m,
+ UserRecord *ur,
+ User **ret_user) {
+
+ User *u;
+ int r;
+
+ assert(m);
+ assert(ur);
+
+ u = hashmap_get(m->users, UID_TO_PTR(ur->uid));
+ if (!u) {
+ r = user_new(&u, m, ur);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_user)
+ *ret_user = u;
+
+ return 0;
+}
+
+int manager_add_user_by_name(
+ Manager *m,
+ const char *name,
+ User **ret_user) {
+
+ _cleanup_(user_record_unrefp) UserRecord *ur = NULL;
+ int r;
+
+ assert(m);
+ assert(name);
+
+ r = userdb_by_name(name, USERDB_AVOID_SHADOW, &ur);
+ if (r < 0)
+ return r;
+
+ return manager_add_user(m, ur, ret_user);
+}
+
+int manager_add_user_by_uid(
+ Manager *m,
+ uid_t uid,
+ User **ret_user) {
+
+ _cleanup_(user_record_unrefp) UserRecord *ur = NULL;
+ int r;
+
+ assert(m);
+ assert(uid_is_valid(uid));
+
+ r = userdb_by_uid(uid, USERDB_AVOID_SHADOW, &ur);
+ if (r < 0)
+ return r;
+
+ return manager_add_user(m, ur, ret_user);
+}
+
+int manager_add_inhibitor(Manager *m, const char* id, Inhibitor **ret) {
+ Inhibitor *i;
+ int r;
+
+ assert(m);
+ assert(id);
+
+ i = hashmap_get(m->inhibitors, id);
+ if (!i) {
+ r = inhibitor_new(&i, m, id);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret)
+ *ret = i;
+
+ return 0;
+}
+
+int manager_add_button(Manager *m, const char *name, Button **ret_button) {
+ Button *b;
+
+ assert(m);
+ assert(name);
+
+ b = hashmap_get(m->buttons, name);
+ if (!b) {
+ b = button_new(m, name);
+ if (!b)
+ return -ENOMEM;
+ }
+
+ if (ret_button)
+ *ret_button = b;
+
+ return 0;
+}
+
+int manager_process_seat_device(Manager *m, sd_device *d) {
+ Device *device;
+ int r;
+
+ assert(m);
+
+ if (device_for_action(d, DEVICE_ACTION_REMOVE) ||
+ sd_device_has_current_tag(d, "seat") <= 0) {
+ const char *syspath;
+
+ r = sd_device_get_syspath(d, &syspath);
+ if (r < 0)
+ return 0;
+
+ device = hashmap_get(m->devices, syspath);
+ if (!device)
+ return 0;
+
+ seat_add_to_gc_queue(device->seat);
+ device_free(device);
+
+ } else {
+ const char *sn, *syspath;
+ bool master;
+ Seat *seat;
+
+ if (sd_device_get_property_value(d, "ID_SEAT", &sn) < 0 || isempty(sn))
+ sn = "seat0";
+
+ if (!seat_name_is_valid(sn)) {
+ log_device_warning(d, "Device with invalid seat name %s found, ignoring.", sn);
+ return 0;
+ }
+
+ seat = hashmap_get(m->seats, sn);
+ master = sd_device_has_current_tag(d, "master-of-seat") > 0;
+
+ /* Ignore non-master devices for unknown seats */
+ if (!master && !seat)
+ return 0;
+
+ r = sd_device_get_syspath(d, &syspath);
+ if (r < 0)
+ return r;
+
+ r = manager_add_device(m, syspath, master, &device);
+ if (r < 0)
+ return r;
+
+ if (!seat) {
+ r = manager_add_seat(m, sn, &seat);
+ if (r < 0) {
+ if (!device->seat)
+ device_free(device);
+
+ return r;
+ }
+ }
+
+ device_attach(device, seat);
+ seat_start(seat);
+ }
+
+ return 0;
+}
+
+int manager_process_button_device(Manager *m, sd_device *d) {
+ const char *sysname;
+ Button *b;
+ int r;
+
+ assert(m);
+
+ r = sd_device_get_sysname(d, &sysname);
+ if (r < 0)
+ return r;
+
+ if (device_for_action(d, DEVICE_ACTION_REMOVE) ||
+ sd_device_has_current_tag(d, "power-switch") <= 0) {
+
+ b = hashmap_get(m->buttons, sysname);
+ if (!b)
+ return 0;
+
+ button_free(b);
+
+ } else {
+ const char *sn;
+
+ r = manager_add_button(m, sysname, &b);
+ if (r < 0)
+ return r;
+
+ if (sd_device_get_property_value(d, "ID_SEAT", &sn) < 0 || isempty(sn))
+ sn = "seat0";
+
+ button_set_seat(b, sn);
+
+ r = button_open(b);
+ if (r < 0) /* event device doesn't have any keys or switches relevant to us? (or any other error
+ * opening the device?) let's close the button again. */
+ button_free(b);
+ }
+
+ return 0;
+}
+
+int manager_get_session_by_pid(Manager *m, pid_t pid, Session **ret) {
+ _cleanup_free_ char *unit = NULL;
+ Session *s;
+ int r;
+
+ assert(m);
+
+ if (!pid_is_valid(pid))
+ return -EINVAL;
+
+ s = hashmap_get(m->sessions_by_leader, PID_TO_PTR(pid));
+ if (!s) {
+ r = cg_pid_get_unit(pid, &unit);
+ if (r >= 0)
+ s = hashmap_get(m->session_units, unit);
+ }
+
+ if (ret)
+ *ret = s;
+
+ return !!s;
+}
+
+int manager_get_user_by_pid(Manager *m, pid_t pid, User **ret) {
+ _cleanup_free_ char *unit = NULL;
+ User *u = NULL;
+ int r;
+
+ assert(m);
+
+ if (!pid_is_valid(pid))
+ return -EINVAL;
+
+ r = cg_pid_get_slice(pid, &unit);
+ if (r >= 0)
+ u = hashmap_get(m->user_units, unit);
+
+ if (ret)
+ *ret = u;
+
+ return !!u;
+}
+
+int manager_get_idle_hint(Manager *m, dual_timestamp *t) {
+ Session *s;
+ bool idle_hint;
+ dual_timestamp ts = DUAL_TIMESTAMP_NULL;
+
+ assert(m);
+
+ idle_hint = !manager_is_inhibited(m, INHIBIT_IDLE, INHIBIT_BLOCK, t, false, false, 0, NULL);
+
+ HASHMAP_FOREACH(s, m->sessions) {
+ dual_timestamp k;
+ int ih;
+
+ ih = session_get_idle_hint(s, &k);
+ if (ih < 0)
+ return ih;
+
+ if (!ih) {
+ if (!idle_hint) {
+ if (k.monotonic < ts.monotonic)
+ ts = k;
+ } else {
+ idle_hint = false;
+ ts = k;
+ }
+ } else if (idle_hint) {
+
+ if (k.monotonic > ts.monotonic)
+ ts = k;
+ }
+ }
+
+ if (t)
+ *t = ts;
+
+ return idle_hint;
+}
+
+bool manager_shall_kill(Manager *m, const char *user) {
+ assert(m);
+ assert(user);
+
+ if (!m->kill_exclude_users && streq(user, "root"))
+ return false;
+
+ if (strv_contains(m->kill_exclude_users, user))
+ return false;
+
+ if (!strv_isempty(m->kill_only_users))
+ return strv_contains(m->kill_only_users, user);
+
+ return m->kill_user_processes;
+}
+
+int config_parse_n_autovts(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ unsigned *n = data;
+ unsigned o;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou(rvalue, &o);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse number of autovts, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (o > 15) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "A maximum of 15 autovts are supported, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *n = o;
+ return 0;
+}
+
+static int vt_is_busy(unsigned vtnr) {
+ struct vt_stat vt_stat;
+ int r;
+ _cleanup_close_ int fd;
+
+ assert(vtnr >= 1);
+
+ /* VT_GETSTATE "cannot return state for more than 16 VTs, since v_state is short" */
+ assert(vtnr <= 15);
+
+ /* We explicitly open /dev/tty1 here instead of /dev/tty0. If
+ * we'd open the latter we'd open the foreground tty which
+ * hence would be unconditionally busy. By opening /dev/tty1
+ * we avoid this. Since tty1 is special and needs to be an
+ * explicitly loaded getty or DM this is safe. */
+
+ fd = open_terminal("/dev/tty1", O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (ioctl(fd, VT_GETSTATE, &vt_stat) < 0)
+ r = -errno;
+ else
+ r = !!(vt_stat.v_state & (1 << vtnr));
+
+ return r;
+}
+
+int manager_spawn_autovt(Manager *m, unsigned vtnr) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ char name[sizeof("autovt@tty.service") + DECIMAL_STR_MAX(unsigned)];
+ int r;
+
+ assert(m);
+ assert(vtnr >= 1);
+
+ if (vtnr > m->n_autovts &&
+ vtnr != m->reserve_vt)
+ return 0;
+
+ if (vtnr != m->reserve_vt) {
+ /* If this is the reserved TTY, we'll start the getty
+ * on it in any case, but otherwise only if it is not
+ * busy. */
+
+ r = vt_is_busy(vtnr);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ return -EBUSY;
+ }
+
+ xsprintf(name, "autovt@tty%u.service", vtnr);
+ r = sd_bus_call_method(
+ m->bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartUnit",
+ &error,
+ NULL,
+ "ss", name, "fail");
+ if (r < 0)
+ return log_error_errno(r, "Failed to start %s: %s", name, bus_error_message(&error, r));
+
+ return 0;
+}
+
+bool manager_is_lid_closed(Manager *m) {
+ Button *b;
+
+ HASHMAP_FOREACH(b, m->buttons)
+ if (b->lid_closed)
+ return true;
+
+ return false;
+}
+
+static bool manager_is_docked(Manager *m) {
+ Button *b;
+
+ HASHMAP_FOREACH(b, m->buttons)
+ if (b->docked)
+ return true;
+
+ return false;
+}
+
+static int manager_count_external_displays(Manager *m) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
+ int r, n = 0;
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_subsystem(e, "drm", true);
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, d) {
+ const char *status, *enabled, *dash, *nn, *subsys;
+ sd_device *p;
+
+ if (sd_device_get_parent(d, &p) < 0)
+ continue;
+
+ /* If the parent shares the same subsystem as the
+ * device we are looking at then it is a connector,
+ * which is what we are interested in. */
+ if (sd_device_get_subsystem(p, &subsys) < 0 || !streq(subsys, "drm"))
+ continue;
+
+ if (sd_device_get_sysname(d, &nn) < 0)
+ continue;
+
+ /* Ignore internal displays: the type is encoded in the sysfs name, as the second dash
+ * separated item (the first is the card name, the last the connector number). We implement a
+ * deny list of external displays here, rather than an allow list of internal ones, to ensure
+ * we don't block suspends too eagerly. */
+ dash = strchr(nn, '-');
+ if (!dash)
+ continue;
+
+ dash++;
+ if (!STARTSWITH_SET(dash,
+ "VGA-", "DVI-I-", "DVI-D-", "DVI-A-"
+ "Composite-", "SVIDEO-", "Component-",
+ "DIN-", "DP-", "HDMI-A-", "HDMI-B-", "TV-"))
+ continue;
+
+ /* Ignore ports that are not enabled */
+ if (sd_device_get_sysattr_value(d, "enabled", &enabled) < 0 || !streq(enabled, "enabled"))
+ continue;
+
+ /* We count any connector which is not explicitly
+ * "disconnected" as connected. */
+ if (sd_device_get_sysattr_value(d, "status", &status) < 0 || !streq(status, "disconnected"))
+ n++;
+ }
+
+ return n;
+}
+
+bool manager_is_docked_or_external_displays(Manager *m) {
+ int n;
+
+ /* If we are docked don't react to lid closing */
+ if (manager_is_docked(m)) {
+ log_debug("System is docked.");
+ return true;
+ }
+
+ /* If we have more than one display connected,
+ * assume that we are docked. */
+ n = manager_count_external_displays(m);
+ if (n < 0)
+ log_warning_errno(n, "Display counting failed: %m");
+ else if (n >= 1) {
+ log_debug("External (%i) displays connected.", n);
+ return true;
+ }
+
+ return false;
+}
+
+bool manager_is_on_external_power(void) {
+ int r;
+
+ /* For now we only check for AC power, but 'external power' can apply to anything that isn't an internal
+ * battery */
+ r = on_ac_power();
+ if (r < 0)
+ log_warning_errno(r, "Failed to read AC power status: %m");
+
+ return r != 0; /* Treat failure as 'on AC' */
+}
+
+bool manager_all_buttons_ignored(Manager *m) {
+ assert(m);
+
+ if (m->handle_power_key != HANDLE_IGNORE)
+ return false;
+ if (m->handle_suspend_key != HANDLE_IGNORE)
+ return false;
+ if (m->handle_hibernate_key != HANDLE_IGNORE)
+ return false;
+ if (m->handle_lid_switch != HANDLE_IGNORE)
+ return false;
+ if (!IN_SET(m->handle_lid_switch_ep, _HANDLE_ACTION_INVALID, HANDLE_IGNORE))
+ return false;
+ if (m->handle_lid_switch_docked != HANDLE_IGNORE)
+ return false;
+ if (m->handle_reboot_key != HANDLE_IGNORE)
+ return false;
+
+ return true;
+}
+
+int manager_read_utmp(Manager *m) {
+#if ENABLE_UTMP
+ int r;
+ _cleanup_(utxent_cleanup) bool utmpx = false;
+
+ assert(m);
+
+ if (utmpxname(_PATH_UTMPX) < 0)
+ return log_error_errno(errno, "Failed to set utmp path to " _PATH_UTMPX ": %m");
+
+ utmpx = utxent_start();
+
+ for (;;) {
+ _cleanup_free_ char *t = NULL;
+ struct utmpx *u;
+ const char *c;
+ Session *s;
+
+ errno = 0;
+ u = getutxent();
+ if (!u) {
+ if (errno != 0)
+ log_warning_errno(errno, "Failed to read " _PATH_UTMPX ", ignoring: %m");
+ return 0;
+ }
+
+ if (u->ut_type != USER_PROCESS)
+ continue;
+
+ if (!pid_is_valid(u->ut_pid))
+ continue;
+
+ t = strndup(u->ut_line, sizeof(u->ut_line));
+ if (!t)
+ return log_oom();
+
+ c = path_startswith(t, "/dev/");
+ if (c) {
+ r = free_and_strdup(&t, c);
+ if (r < 0)
+ return log_oom();
+ }
+
+ if (isempty(t))
+ continue;
+
+ s = hashmap_get(m->sessions_by_leader, PID_TO_PTR(u->ut_pid));
+ if (!s)
+ continue;
+
+ if (s->tty_validity == TTY_FROM_UTMP && !streq_ptr(s->tty, t)) {
+ /* This may happen on multiplexed SSH connection (i.e. 'SSH connection sharing'). In
+ * this case PAM and utmp sessions don't match. In such a case let's invalidate the TTY
+ * information and never acquire it again. */
+
+ s->tty = mfree(s->tty);
+ s->tty_validity = TTY_UTMP_INCONSISTENT;
+ log_debug("Session '%s' has inconsistent TTY information, dropping TTY information.", s->id);
+ continue;
+ }
+
+ /* Never override what we figured out once */
+ if (s->tty || s->tty_validity >= 0)
+ continue;
+
+ s->tty = TAKE_PTR(t);
+ s->tty_validity = TTY_FROM_UTMP;
+ log_debug("Acquired TTY information '%s' from utmp for session '%s'.", s->tty, s->id);
+ }
+
+#else
+ return 0;
+#endif
+}
+
+#if ENABLE_UTMP
+static int manager_dispatch_utmp(sd_event_source *s, const struct inotify_event *event, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+
+ /* If there's indication the file itself might have been removed or became otherwise unavailable, then let's
+ * reestablish the watch on whatever there's now. */
+ if ((event->mask & (IN_ATTRIB|IN_DELETE_SELF|IN_MOVE_SELF|IN_Q_OVERFLOW|IN_UNMOUNT)) != 0)
+ manager_connect_utmp(m);
+
+ (void) manager_read_utmp(m);
+ return 0;
+}
+#endif
+
+void manager_connect_utmp(Manager *m) {
+#if ENABLE_UTMP
+ sd_event_source *s = NULL;
+ int r;
+
+ assert(m);
+
+ /* Watch utmp for changes via inotify. We do this to deal with tools such as ssh, which will register the PAM
+ * session early, and acquire a TTY only much later for the connection. Thus during PAM the TTY won't be known
+ * yet. ssh will register itself with utmp when it finally acquired the TTY. Hence, let's make use of this, and
+ * watch utmp for the TTY asynchronously. We use the PAM session's leader PID as key, to find the right entry.
+ *
+ * Yes, relying on utmp is pretty ugly, but it's good enough for informational purposes, as well as idle
+ * detection (which, for tty sessions, relies on the TTY used) */
+
+ r = sd_event_add_inotify(m->event, &s, _PATH_UTMPX, IN_MODIFY|IN_MOVE_SELF|IN_DELETE_SELF|IN_ATTRIB, manager_dispatch_utmp, m);
+ if (r < 0)
+ log_full_errno(r == -ENOENT ? LOG_DEBUG: LOG_WARNING, r, "Failed to create inotify watch on " _PATH_UTMPX ", ignoring: %m");
+ else {
+ r = sd_event_source_set_priority(s, SD_EVENT_PRIORITY_IDLE);
+ if (r < 0)
+ log_warning_errno(r, "Failed to adjust utmp event source priority, ignoring: %m");
+
+ (void) sd_event_source_set_description(s, "utmp");
+ }
+
+ sd_event_source_unref(m->utmp_event_source);
+ m->utmp_event_source = s;
+#endif
+}
+
+void manager_reconnect_utmp(Manager *m) {
+#if ENABLE_UTMP
+ assert(m);
+
+ if (m->utmp_event_source)
+ return;
+
+ manager_connect_utmp(m);
+#endif
+}
+
+int manager_read_efi_boot_loader_entries(Manager *m) {
+#if ENABLE_EFI
+ int r;
+
+ assert(m);
+ if (m->efi_boot_loader_entries_set)
+ return 0;
+
+ r = efi_loader_get_entries(&m->efi_boot_loader_entries);
+ if (r == -ENOENT || ERRNO_IS_NOT_SUPPORTED(r)) {
+ log_debug_errno(r, "Boot loader reported no entries.");
+ m->efi_boot_loader_entries_set = true;
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine entries reported by boot loader: %m");
+
+ m->efi_boot_loader_entries_set = true;
+ return 1;
+#else
+ return 0;
+#endif
+}
diff --git a/src/login/logind-dbus.c b/src/login/logind-dbus.c
new file mode 100644
index 0000000..b95af1a
--- /dev/null
+++ b/src/login/logind-dbus.c
@@ -0,0 +1,4223 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "audit-util.h"
+#include "bootspec.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-get-properties.h"
+#include "bus-locator.h"
+#include "bus-polkit.h"
+#include "bus-unit-util.h"
+#include "bus-util.h"
+#include "cgroup-util.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "efi-loader.h"
+#include "efivars.h"
+#include "env-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio-label.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "logind-dbus.h"
+#include "logind-seat-dbus.h"
+#include "logind-session-dbus.h"
+#include "logind-user-dbus.h"
+#include "logind.h"
+#include "missing_capability.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "reboot-util.h"
+#include "selinux-util.h"
+#include "sleep-config.h"
+#include "special.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "unit-name.h"
+#include "user-util.h"
+#include "utmp-wtmp.h"
+#include "virt.h"
+
+static int get_sender_session(
+ Manager *m,
+ sd_bus_message *message,
+ bool consult_display,
+ sd_bus_error *error,
+ Session **ret) {
+
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ Session *session = NULL;
+ const char *name;
+ int r;
+
+ /* Acquire the sender's session. This first checks if the sending process is inside a session itself,
+ * and returns that. If not and 'consult_display' is true, this returns the display session of the
+ * owning user of the caller. */
+
+ r = sd_bus_query_sender_creds(message,
+ SD_BUS_CREDS_SESSION|SD_BUS_CREDS_AUGMENT|
+ (consult_display ? SD_BUS_CREDS_OWNER_UID : 0), &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_session(creds, &name);
+ if (r < 0) {
+ if (r != -ENXIO)
+ return r;
+
+ if (consult_display) {
+ uid_t uid;
+
+ r = sd_bus_creds_get_owner_uid(creds, &uid);
+ if (r < 0) {
+ if (r != -ENXIO)
+ return r;
+ } else {
+ User *user;
+
+ user = hashmap_get(m->users, UID_TO_PTR(uid));
+ if (user)
+ session = user->display;
+ }
+ }
+ } else
+ session = hashmap_get(m->sessions, name);
+
+ if (!session)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SESSION_FOR_PID,
+ consult_display ?
+ "Caller does not belong to any known session and doesn't own any suitable session." :
+ "Caller does not belong to any known session.");
+
+ *ret = session;
+ return 0;
+}
+
+int manager_get_session_from_creds(
+ Manager *m,
+ sd_bus_message *message,
+ const char *name,
+ sd_bus_error *error,
+ Session **ret) {
+
+ Session *session;
+
+ assert(m);
+ assert(ret);
+
+ if (SEAT_IS_SELF(name)) /* the caller's own session */
+ return get_sender_session(m, message, false, error, ret);
+ if (SEAT_IS_AUTO(name)) /* The caller's own session if they have one, otherwise their user's display session */
+ return get_sender_session(m, message, true, error, ret);
+
+ session = hashmap_get(m->sessions, name);
+ if (!session)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_SESSION, "No session '%s' known", name);
+
+ *ret = session;
+ return 0;
+}
+
+static int get_sender_user(Manager *m, sd_bus_message *message, sd_bus_error *error, User **ret) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ uid_t uid;
+ User *user;
+ int r;
+
+ /* Note that we get the owner UID of the session, not the actual client UID here! */
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_OWNER_UID|SD_BUS_CREDS_AUGMENT, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_owner_uid(creds, &uid);
+ if (r < 0) {
+ if (r != -ENXIO)
+ return r;
+
+ user = NULL;
+ } else
+ user = hashmap_get(m->users, UID_TO_PTR(uid));
+
+ if (!user)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_USER_FOR_PID,
+ "Caller does not belong to any logged in or lingering user");
+
+ *ret = user;
+ return 0;
+}
+
+int manager_get_user_from_creds(Manager *m, sd_bus_message *message, uid_t uid, sd_bus_error *error, User **ret) {
+ User *user;
+
+ assert(m);
+ assert(ret);
+
+ if (!uid_is_valid(uid))
+ return get_sender_user(m, message, error, ret);
+
+ user = hashmap_get(m->users, UID_TO_PTR(uid));
+ if (!user)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_USER,
+ "User ID "UID_FMT" is not logged in or lingering", uid);
+
+ *ret = user;
+ return 0;
+}
+
+int manager_get_seat_from_creds(
+ Manager *m,
+ sd_bus_message *message,
+ const char *name,
+ sd_bus_error *error,
+ Seat **ret) {
+
+ Seat *seat;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ if (SEAT_IS_SELF(name) || SEAT_IS_AUTO(name)) {
+ Session *session;
+
+ /* Use these special seat names as session names */
+ r = manager_get_session_from_creds(m, message, name, error, &session);
+ if (r < 0)
+ return r;
+
+ seat = session->seat;
+ if (!seat)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_SEAT, "Session '%s' has no seat.", session->id);
+ } else {
+ seat = hashmap_get(m->seats, name);
+ if (!seat)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_SEAT, "No seat '%s' known", name);
+ }
+
+ *ret = seat;
+ return 0;
+}
+
+static int return_test_polkit(
+ sd_bus_message *message,
+ int capability,
+ const char *action,
+ const char **details,
+ uid_t good_user,
+ sd_bus_error *e) {
+
+ const char *result;
+ bool challenge;
+ int r;
+
+ r = bus_test_polkit(message, capability, action, details, good_user, &challenge, e);
+ if (r < 0)
+ return r;
+
+ if (r > 0)
+ result = "yes";
+ else if (challenge)
+ result = "challenge";
+ else
+ result = "no";
+
+ return sd_bus_reply_method_return(message, "s", result);
+}
+
+static int property_get_idle_hint(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ return sd_bus_message_append(reply, "b", manager_get_idle_hint(m, NULL) > 0);
+}
+
+static int property_get_idle_since_hint(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ dual_timestamp t = DUAL_TIMESTAMP_NULL;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ manager_get_idle_hint(m, &t);
+
+ return sd_bus_message_append(reply, "t", streq(property, "IdleSinceHint") ? t.realtime : t.monotonic);
+}
+
+static int property_get_inhibited(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ InhibitWhat w;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ w = manager_inhibit_what(m, streq(property, "BlockInhibited") ? INHIBIT_BLOCK : INHIBIT_DELAY);
+
+ return sd_bus_message_append(reply, "s", inhibit_what_to_string(w));
+}
+
+static int property_get_preparing(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ bool b;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ if (streq(property, "PreparingForShutdown"))
+ b = m->action_what & INHIBIT_SHUTDOWN;
+ else
+ b = m->action_what & INHIBIT_SLEEP;
+
+ return sd_bus_message_append(reply, "b", b);
+}
+
+static int property_get_scheduled_shutdown(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ r = sd_bus_message_open_container(reply, 'r', "st");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "st", m->scheduled_shutdown_type, m->scheduled_shutdown_timeout);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_handle_action, handle_action, HandleAction);
+static BUS_DEFINE_PROPERTY_GET(property_get_docked, "b", Manager, manager_is_docked_or_external_displays);
+static BUS_DEFINE_PROPERTY_GET(property_get_lid_closed, "b", Manager, manager_is_lid_closed);
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_on_external_power, "b", manager_is_on_external_power);
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_compat_user_tasks_max, "t", CGROUP_LIMIT_MAX);
+static BUS_DEFINE_PROPERTY_GET_REF(property_get_hashmap_size, "t", Hashmap *, (uint64_t) hashmap_size);
+
+static int method_get_session(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ Manager *m = userdata;
+ const char *name;
+ Session *session;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_get_session_from_creds(m, message, name, error, &session);
+ if (r < 0)
+ return r;
+
+ p = session_bus_path(session);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+/* Get login session of a process. This is not what you are looking for these days,
+ * as apps may instead belong to a user service unit. This includes terminal
+ * emulators and hence command-line apps. */
+static int method_get_session_by_pid(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ Session *session = NULL;
+ Manager *m = userdata;
+ pid_t pid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(pid_t) == sizeof(uint32_t));
+
+ r = sd_bus_message_read(message, "u", &pid);
+ if (r < 0)
+ return r;
+ if (pid < 0)
+ return -EINVAL;
+
+ if (pid == 0) {
+ r = manager_get_session_from_creds(m, message, NULL, error, &session);
+ if (r < 0)
+ return r;
+ } else {
+ r = manager_get_session_by_pid(m, pid, &session);
+ if (r < 0)
+ return r;
+
+ if (!session)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SESSION_FOR_PID,
+ "PID "PID_FMT" does not belong to any known session", pid);
+ }
+
+ p = session_bus_path(session);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+static int method_get_user(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ Manager *m = userdata;
+ uint32_t uid;
+ User *user;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "u", &uid);
+ if (r < 0)
+ return r;
+
+ r = manager_get_user_from_creds(m, message, uid, error, &user);
+ if (r < 0)
+ return r;
+
+ p = user_bus_path(user);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+static int method_get_user_by_pid(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ Manager *m = userdata;
+ User *user = NULL;
+ pid_t pid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(pid_t) == sizeof(uint32_t));
+
+ r = sd_bus_message_read(message, "u", &pid);
+ if (r < 0)
+ return r;
+ if (pid < 0)
+ return -EINVAL;
+
+ if (pid == 0) {
+ r = manager_get_user_from_creds(m, message, UID_INVALID, error, &user);
+ if (r < 0)
+ return r;
+ } else {
+ r = manager_get_user_by_pid(m, pid, &user);
+ if (r < 0)
+ return r;
+ if (!user)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_USER_FOR_PID,
+ "PID "PID_FMT" does not belong to any logged in user or lingering user",
+ pid);
+ }
+
+ p = user_bus_path(user);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+static int method_get_seat(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ Manager *m = userdata;
+ const char *name;
+ Seat *seat;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_get_seat_from_creds(m, message, name, error, &seat);
+ if (r < 0)
+ return r;
+
+ p = seat_bus_path(seat);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+static int method_list_sessions(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ Session *session;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(susso)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(session, m->sessions) {
+ _cleanup_free_ char *p = NULL;
+
+ p = session_bus_path(session);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "(susso)",
+ session->id,
+ (uint32_t) session->user->user_record->uid,
+ session->user->user_record->user_name,
+ session->seat ? session->seat->id : "",
+ p);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_list_users(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ User *user;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(uso)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(user, m->users) {
+ _cleanup_free_ char *p = NULL;
+
+ p = user_bus_path(user);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "(uso)",
+ (uint32_t) user->user_record->uid,
+ user->user_record->user_name,
+ p);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_list_seats(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ Seat *seat;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(so)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(seat, m->seats) {
+ _cleanup_free_ char *p = NULL;
+
+ p = seat_bus_path(seat);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "(so)", seat->id, p);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_list_inhibitors(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ Inhibitor *inhibitor;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(ssssuu)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(inhibitor, m->inhibitors) {
+
+ r = sd_bus_message_append(reply, "(ssssuu)",
+ strempty(inhibit_what_to_string(inhibitor->what)),
+ strempty(inhibitor->who),
+ strempty(inhibitor->why),
+ strempty(inhibit_mode_to_string(inhibitor->mode)),
+ (uint32_t) inhibitor->uid,
+ (uint32_t) inhibitor->pid);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_create_session(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *service, *type, *class, *cseat, *tty, *display, *remote_user, *remote_host, *desktop;
+ _cleanup_free_ char *id = NULL;
+ Session *session = NULL;
+ uint32_t audit_id = 0;
+ Manager *m = userdata;
+ User *user = NULL;
+ Seat *seat = NULL;
+ pid_t leader;
+ uid_t uid;
+ int remote;
+ uint32_t vtnr = 0;
+ SessionType t;
+ SessionClass c;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(pid_t) == sizeof(uint32_t));
+ assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+
+ r = sd_bus_message_read(message, "uusssssussbss",
+ &uid, &leader, &service, &type, &class, &desktop, &cseat,
+ &vtnr, &tty, &display, &remote, &remote_user, &remote_host);
+ if (r < 0)
+ return r;
+
+ if (!uid_is_valid(uid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid UID");
+ if (leader < 0 || leader == 1 || leader == getpid_cached())
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid leader PID");
+
+ if (isempty(type))
+ t = _SESSION_TYPE_INVALID;
+ else {
+ t = session_type_from_string(type);
+ if (t < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Invalid session type %s", type);
+ }
+
+ if (isempty(class))
+ c = _SESSION_CLASS_INVALID;
+ else {
+ c = session_class_from_string(class);
+ if (c < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Invalid session class %s", class);
+ }
+
+ if (isempty(desktop))
+ desktop = NULL;
+ else {
+ if (!string_is_safe(desktop))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Invalid desktop string %s", desktop);
+ }
+
+ if (isempty(cseat))
+ seat = NULL;
+ else {
+ seat = hashmap_get(m->seats, cseat);
+ if (!seat)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_SEAT,
+ "No seat '%s' known", cseat);
+ }
+
+ if (tty_is_vc(tty)) {
+ int v;
+
+ if (!seat)
+ seat = m->seat0;
+ else if (seat != m->seat0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "TTY %s is virtual console but seat %s is not seat0", tty, seat->id);
+
+ v = vtnr_from_tty(tty);
+ if (v <= 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Cannot determine VT number from virtual console TTY %s", tty);
+
+ if (vtnr == 0)
+ vtnr = (uint32_t) v;
+ else if (vtnr != (uint32_t) v)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Specified TTY and VT number do not match");
+
+ } else if (tty_is_console(tty)) {
+
+ if (!seat)
+ seat = m->seat0;
+ else if (seat != m->seat0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Console TTY specified but seat is not seat0");
+
+ if (vtnr != 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Console TTY specified but VT number is not 0");
+ }
+
+ if (seat) {
+ if (seat_has_vts(seat)) {
+ if (vtnr <= 0 || vtnr > 63)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "VT number out of range");
+ } else {
+ if (vtnr != 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Seat has no VTs but VT number not 0");
+ }
+ }
+
+ if (t == _SESSION_TYPE_INVALID) {
+ if (!isempty(display))
+ t = SESSION_X11;
+ else if (!isempty(tty))
+ t = SESSION_TTY;
+ else
+ t = SESSION_UNSPECIFIED;
+ }
+
+ if (c == _SESSION_CLASS_INVALID) {
+ if (t == SESSION_UNSPECIFIED)
+ c = SESSION_BACKGROUND;
+ else
+ c = SESSION_USER;
+ }
+
+ if (leader == 0) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_pid(creds, (pid_t*) &leader);
+ if (r < 0)
+ return r;
+ }
+
+ /* Check if we are already in a logind session. Or if we are in user@.service
+ * which is a special PAM session that avoids creating a logind session. */
+ r = manager_get_user_by_pid(m, leader, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return sd_bus_error_setf(error, BUS_ERROR_SESSION_BUSY,
+ "Already running in a session or user slice");
+
+ /*
+ * Old gdm and lightdm start the user-session on the same VT as
+ * the greeter session. But they destroy the greeter session
+ * after the user-session and want the user-session to take
+ * over the VT. We need to support this for
+ * backwards-compatibility, so make sure we allow new sessions
+ * on a VT that a greeter is running on. Furthermore, to allow
+ * re-logins, we have to allow a greeter to take over a used VT for
+ * the exact same reasons.
+ */
+ if (c != SESSION_GREETER &&
+ vtnr > 0 &&
+ vtnr < m->seat0->position_count &&
+ m->seat0->positions[vtnr] &&
+ m->seat0->positions[vtnr]->class != SESSION_GREETER)
+ return sd_bus_error_setf(error, BUS_ERROR_SESSION_BUSY, "Already occupied by a session");
+
+ if (hashmap_size(m->sessions) >= m->sessions_max)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_LIMITS_EXCEEDED,
+ "Maximum number of sessions (%" PRIu64 ") reached, refusing further sessions.",
+ m->sessions_max);
+
+ (void) audit_session_from_pid(leader, &audit_id);
+ if (audit_session_is_valid(audit_id)) {
+ /* Keep our session IDs and the audit session IDs in sync */
+
+ if (asprintf(&id, "%"PRIu32, audit_id) < 0)
+ return -ENOMEM;
+
+ /* Wut? There's already a session by this name and we didn't find it above? Weird, then let's
+ * not trust the audit data and let's better register a new ID */
+ if (hashmap_contains(m->sessions, id)) {
+ log_warning("Existing logind session ID %s used by new audit session, ignoring.", id);
+ audit_id = AUDIT_SESSION_INVALID;
+ id = mfree(id);
+ }
+ }
+
+ if (!id) {
+ do {
+ id = mfree(id);
+
+ if (asprintf(&id, "c%lu", ++m->session_counter) < 0)
+ return -ENOMEM;
+
+ } while (hashmap_contains(m->sessions, id));
+ }
+
+ /* The generated names should not clash with 'auto' or 'self' */
+ assert(!SESSION_IS_SELF(id));
+ assert(!SESSION_IS_AUTO(id));
+
+ /* If we are not watching utmp already, try again */
+ manager_reconnect_utmp(m);
+
+ r = manager_add_user_by_uid(m, uid, &user);
+ if (r < 0)
+ goto fail;
+
+ r = manager_add_session(m, id, &session);
+ if (r < 0)
+ goto fail;
+
+ session_set_user(session, user);
+ r = session_set_leader(session, leader);
+ if (r < 0)
+ goto fail;
+
+ session->original_type = session->type = t;
+ session->class = c;
+ session->remote = remote;
+ session->vtnr = vtnr;
+
+ if (!isempty(tty)) {
+ session->tty = strdup(tty);
+ if (!session->tty) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ session->tty_validity = TTY_FROM_PAM;
+ }
+
+ if (!isempty(display)) {
+ session->display = strdup(display);
+ if (!session->display) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (!isempty(remote_user)) {
+ session->remote_user = strdup(remote_user);
+ if (!session->remote_user) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (!isempty(remote_host)) {
+ session->remote_host = strdup(remote_host);
+ if (!session->remote_host) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (!isempty(service)) {
+ session->service = strdup(service);
+ if (!session->service) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (!isempty(desktop)) {
+ session->desktop = strdup(desktop);
+ if (!session->desktop) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (seat) {
+ r = seat_attach_session(seat, session);
+ if (r < 0)
+ goto fail;
+ }
+
+ r = sd_bus_message_enter_container(message, 'a', "(sv)");
+ if (r < 0)
+ goto fail;
+
+ r = session_start(session, message, error);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ goto fail;
+
+ session->create_message = sd_bus_message_ref(message);
+
+ /* Now, let's wait until the slice unit and stuff got created. We send the reply back from
+ * session_send_create_reply(). */
+
+ return 1;
+
+fail:
+ if (session)
+ session_add_to_gc_queue(session);
+
+ if (user)
+ user_add_to_gc_queue(user);
+
+ return r;
+}
+
+static int method_release_session(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ Session *session;
+ const char *name;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_get_session_from_creds(m, message, name, error, &session);
+ if (r < 0)
+ return r;
+
+ r = session_release(session);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_activate_session(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ Session *session;
+ const char *name;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_get_session_from_creds(m, message, name, error, &session);
+ if (r < 0)
+ return r;
+
+ /* PolicyKit is done by bus_session_method_activate() */
+
+ return bus_session_method_activate(message, session, error);
+}
+
+static int method_activate_session_on_seat(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *session_name, *seat_name;
+ Manager *m = userdata;
+ Session *session;
+ Seat *seat;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Same as ActivateSession() but refuses to work if the seat doesn't match */
+
+ r = sd_bus_message_read(message, "ss", &session_name, &seat_name);
+ if (r < 0)
+ return r;
+
+ r = manager_get_session_from_creds(m, message, session_name, error, &session);
+ if (r < 0)
+ return r;
+
+ r = manager_get_seat_from_creds(m, message, seat_name, error, &seat);
+ if (r < 0)
+ return r;
+
+ if (session->seat != seat)
+ return sd_bus_error_setf(error, BUS_ERROR_SESSION_NOT_ON_SEAT,
+ "Session %s not on seat %s", session_name, seat_name);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.chvt",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = session_activate(session);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_lock_session(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ Session *session;
+ const char *name;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_get_session_from_creds(m, message, name, error, &session);
+ if (r < 0)
+ return r;
+
+ return bus_session_method_lock(message, session, error);
+}
+
+static int method_lock_sessions(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.lock-sessions",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = session_send_lock_all(m, streq(sd_bus_message_get_member(message), "LockSessions"));
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_kill_session(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *name;
+ Manager *m = userdata;
+ Session *session;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_get_session_from_creds(m, message, name, error, &session);
+ if (r < 0)
+ return r;
+
+ return bus_session_method_kill(message, session, error);
+}
+
+static int method_kill_user(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ uint32_t uid;
+ User *user;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "u", &uid);
+ if (r < 0)
+ return r;
+
+ r = manager_get_user_from_creds(m, message, uid, error, &user);
+ if (r < 0)
+ return r;
+
+ return bus_user_method_kill(message, user, error);
+}
+
+static int method_terminate_session(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Session *session;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_get_session_from_creds(m, message, name, error, &session);
+ if (r < 0)
+ return r;
+
+ return bus_session_method_terminate(message, session, error);
+}
+
+static int method_terminate_user(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ uint32_t uid;
+ User *user;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "u", &uid);
+ if (r < 0)
+ return r;
+
+ r = manager_get_user_from_creds(m, message, uid, error, &user);
+ if (r < 0)
+ return r;
+
+ return bus_user_method_terminate(message, user, error);
+}
+
+static int method_terminate_seat(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Seat *seat;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_get_seat_from_creds(m, message, name, error, &seat);
+ if (r < 0)
+ return r;
+
+ return bus_seat_method_terminate(message, seat, error);
+}
+
+static int method_set_user_linger(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ _cleanup_free_ char *cc = NULL;
+ Manager *m = userdata;
+ int r, b, interactive;
+ struct passwd *pw;
+ const char *path;
+ uint32_t uid, auth_uid;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "ubb", &uid, &b, &interactive);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID |
+ SD_BUS_CREDS_OWNER_UID|SD_BUS_CREDS_AUGMENT, &creds);
+ if (r < 0)
+ return r;
+
+ if (!uid_is_valid(uid)) {
+ /* Note that we get the owner UID of the session or user unit,
+ * not the actual client UID here! */
+ r = sd_bus_creds_get_owner_uid(creds, &uid);
+ if (r < 0)
+ return r;
+ }
+
+ /* owner_uid is racy, so for authorization we must use euid */
+ r = sd_bus_creds_get_euid(creds, &auth_uid);
+ if (r < 0)
+ return r;
+
+ errno = 0;
+ pw = getpwuid(uid);
+ if (!pw)
+ return errno_or_else(ENOENT);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ uid == auth_uid ? "org.freedesktop.login1.set-self-linger" :
+ "org.freedesktop.login1.set-user-linger",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ (void) mkdir_p_label("/var/lib/systemd", 0755);
+ r = mkdir_safe_label("/var/lib/systemd/linger", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ return r;
+
+ cc = cescape(pw->pw_name);
+ if (!cc)
+ return -ENOMEM;
+
+ path = strjoina("/var/lib/systemd/linger/", cc);
+ if (b) {
+ User *u;
+
+ r = touch(path);
+ if (r < 0)
+ return r;
+
+ if (manager_add_user_by_uid(m, uid, &u) >= 0)
+ user_start(u);
+
+ } else {
+ User *u;
+
+ r = unlink(path);
+ if (r < 0 && errno != ENOENT)
+ return -errno;
+
+ u = hashmap_get(m->users, UID_TO_PTR(uid));
+ if (u)
+ user_add_to_gc_queue(u);
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int trigger_device(Manager *m, sd_device *d) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ int r;
+
+ assert(m);
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ if (d) {
+ r = sd_device_enumerator_add_match_parent(e, d);
+ if (r < 0)
+ return r;
+ }
+
+ FOREACH_DEVICE(e, d) {
+ _cleanup_free_ char *t = NULL;
+ const char *p;
+
+ r = sd_device_get_syspath(d, &p);
+ if (r < 0)
+ return r;
+
+ t = path_join(p, "uevent");
+ if (!t)
+ return -ENOMEM;
+
+ (void) write_string_file(t, "change", WRITE_STRING_FILE_DISABLE_BUFFER);
+ }
+
+ return 0;
+}
+
+static int attach_device(Manager *m, const char *seat, const char *sysfs) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ _cleanup_free_ char *rule = NULL, *file = NULL;
+ const char *id_for_seat;
+ int r;
+
+ assert(m);
+ assert(seat);
+ assert(sysfs);
+
+ r = sd_device_new_from_syspath(&d, sysfs);
+ if (r < 0)
+ return r;
+
+ if (sd_device_has_current_tag(d, "seat") <= 0)
+ return -ENODEV;
+
+ if (sd_device_get_property_value(d, "ID_FOR_SEAT", &id_for_seat) < 0)
+ return -ENODEV;
+
+ if (asprintf(&file, "/etc/udev/rules.d/72-seat-%s.rules", id_for_seat) < 0)
+ return -ENOMEM;
+
+ if (asprintf(&rule, "TAG==\"seat\", ENV{ID_FOR_SEAT}==\"%s\", ENV{ID_SEAT}=\"%s\"", id_for_seat, seat) < 0)
+ return -ENOMEM;
+
+ (void) mkdir_p_label("/etc/udev/rules.d", 0755);
+ r = write_string_file_atomic_label(file, rule);
+ if (r < 0)
+ return r;
+
+ return trigger_device(m, d);
+}
+
+static int flush_devices(Manager *m) {
+ _cleanup_closedir_ DIR *d;
+
+ assert(m);
+
+ d = opendir("/etc/udev/rules.d");
+ if (!d) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to open /etc/udev/rules.d: %m");
+ } else {
+ struct dirent *de;
+
+ FOREACH_DIRENT_ALL(de, d, break) {
+ dirent_ensure_type(d, de);
+ if (!dirent_is_file(de))
+ continue;
+
+ if (!startswith(de->d_name, "72-seat-"))
+ continue;
+
+ if (!endswith(de->d_name, ".rules"))
+ continue;
+
+ if (unlinkat(dirfd(d), de->d_name, 0) < 0)
+ log_warning_errno(errno, "Failed to unlink %s: %m", de->d_name);
+ }
+ }
+
+ return trigger_device(m, NULL);
+}
+
+static int method_attach_device(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *sysfs, *seat;
+ Manager *m = userdata;
+ int interactive, r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "ssb", &seat, &sysfs, &interactive);
+ if (r < 0)
+ return r;
+
+ if (!path_is_normalized(sysfs))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path %s is not normalized", sysfs);
+ if (!path_startswith(sysfs, "/sys"))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path %s is not in /sys", sysfs);
+
+ if (SEAT_IS_SELF(seat) || SEAT_IS_AUTO(seat)) {
+ Seat *found;
+
+ r = manager_get_seat_from_creds(m, message, seat, error, &found);
+ if (r < 0)
+ return r;
+
+ seat = found->id;
+
+ } else if (!seat_name_is_valid(seat)) /* Note that a seat does not have to exist yet for this operation to succeed */
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Seat name %s is not valid", seat);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.attach-device",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = attach_device(m, seat, sysfs);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_flush_devices(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ int interactive, r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "b", &interactive);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.flush-devices",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = flush_devices(m);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int have_multiple_sessions(
+ Manager *m,
+ uid_t uid) {
+
+ Session *session;
+
+ assert(m);
+
+ /* Check for other users' sessions. Greeter sessions do not
+ * count, and non-login sessions do not count either. */
+ HASHMAP_FOREACH(session, m->sessions)
+ if (session->class == SESSION_USER &&
+ session->user->user_record->uid != uid)
+ return true;
+
+ return false;
+}
+
+static int bus_manager_log_shutdown(
+ Manager *m,
+ const char *unit_name) {
+
+ const char *p, *q;
+
+ assert(m);
+ assert(unit_name);
+
+ if (streq(unit_name, SPECIAL_POWEROFF_TARGET)) {
+ p = "MESSAGE=System is powering down";
+ q = "SHUTDOWN=power-off";
+ } else if (streq(unit_name, SPECIAL_REBOOT_TARGET)) {
+ p = "MESSAGE=System is rebooting";
+ q = "SHUTDOWN=reboot";
+ } else if (streq(unit_name, SPECIAL_HALT_TARGET)) {
+ p = "MESSAGE=System is halting";
+ q = "SHUTDOWN=halt";
+ } else if (streq(unit_name, SPECIAL_KEXEC_TARGET)) {
+ p = "MESSAGE=System is rebooting with kexec";
+ q = "SHUTDOWN=kexec";
+ } else {
+ p = "MESSAGE=System is shutting down";
+ q = NULL;
+ }
+
+ if (isempty(m->wall_message))
+ p = strjoina(p, ".");
+ else
+ p = strjoina(p, " (", m->wall_message, ").");
+
+ return log_struct(LOG_NOTICE,
+ "MESSAGE_ID=" SD_MESSAGE_SHUTDOWN_STR,
+ p,
+ q);
+}
+
+static int lid_switch_ignore_handler(sd_event_source *e, uint64_t usec, void *userdata) {
+ Manager *m = userdata;
+
+ assert(e);
+ assert(m);
+
+ m->lid_switch_ignore_event_source = sd_event_source_unref(m->lid_switch_ignore_event_source);
+ return 0;
+}
+
+int manager_set_lid_switch_ignore(Manager *m, usec_t until) {
+ int r;
+
+ assert(m);
+
+ if (until <= now(CLOCK_MONOTONIC))
+ return 0;
+
+ /* We want to ignore the lid switch for a while after each
+ * suspend, and after boot-up. Hence let's install a timer for
+ * this. As long as the event source exists we ignore the lid
+ * switch. */
+
+ if (m->lid_switch_ignore_event_source) {
+ usec_t u;
+
+ r = sd_event_source_get_time(m->lid_switch_ignore_event_source, &u);
+ if (r < 0)
+ return r;
+
+ if (until <= u)
+ return 0;
+
+ r = sd_event_source_set_time(m->lid_switch_ignore_event_source, until);
+ } else
+ r = sd_event_add_time(
+ m->event,
+ &m->lid_switch_ignore_event_source,
+ CLOCK_MONOTONIC,
+ until, 0,
+ lid_switch_ignore_handler, m);
+
+ return r;
+}
+
+static int send_prepare_for(Manager *m, InhibitWhat w, bool _active) {
+ int active = _active;
+
+ assert(m);
+ assert(IN_SET(w, INHIBIT_SHUTDOWN, INHIBIT_SLEEP));
+
+ return sd_bus_emit_signal(m->bus,
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ w == INHIBIT_SHUTDOWN ? "PrepareForShutdown" : "PrepareForSleep",
+ "b",
+ active);
+}
+
+static int execute_shutdown_or_sleep(
+ Manager *m,
+ InhibitWhat w,
+ const char *unit_name,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *p;
+ int r;
+
+ assert(m);
+ assert(w > 0);
+ assert(w < _INHIBIT_WHAT_MAX);
+ assert(unit_name);
+
+ if (w == INHIBIT_SHUTDOWN)
+ bus_manager_log_shutdown(m, unit_name);
+
+ r = bus_call_method(
+ m->bus,
+ bus_systemd_mgr,
+ "StartUnit",
+ error,
+ &reply,
+ "ss", unit_name, "replace-irreversibly");
+ if (r < 0)
+ goto error;
+
+ r = sd_bus_message_read(reply, "o", &p);
+ if (r < 0)
+ goto error;
+
+ r = free_and_strdup(&m->action_job, p);
+ if (r < 0)
+ goto error;
+
+ m->action_unit = unit_name;
+ m->action_what = w;
+
+ /* Make sure the lid switch is ignored for a while */
+ manager_set_lid_switch_ignore(m, now(CLOCK_MONOTONIC) + m->holdoff_timeout_usec);
+
+ return 0;
+
+error:
+ /* Tell people that they now may take a lock again */
+ (void) send_prepare_for(m, w, false);
+
+ return r;
+}
+
+int manager_dispatch_delayed(Manager *manager, bool timeout) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ Inhibitor *offending = NULL;
+ int r;
+
+ assert(manager);
+
+ if (manager->action_what == 0 || manager->action_job)
+ return 0;
+
+ if (manager_is_inhibited(manager, manager->action_what, INHIBIT_DELAY, NULL, false, false, 0, &offending)) {
+ _cleanup_free_ char *comm = NULL, *u = NULL;
+
+ if (!timeout)
+ return 0;
+
+ (void) get_process_comm(offending->pid, &comm);
+ u = uid_to_name(offending->uid);
+
+ log_notice("Delay lock is active (UID "UID_FMT"/%s, PID "PID_FMT"/%s) but inhibitor timeout is reached.",
+ offending->uid, strna(u),
+ offending->pid, strna(comm));
+ }
+
+ /* Actually do the operation */
+ r = execute_shutdown_or_sleep(manager, manager->action_what, manager->action_unit, &error);
+ if (r < 0) {
+ log_warning("Error during inhibitor-delayed operation (already returned success to client): %s",
+ bus_error_message(&error, r));
+
+ manager->action_unit = NULL;
+ manager->action_what = 0;
+ return r;
+ }
+
+ return 1;
+}
+
+static int manager_inhibit_timeout_handler(
+ sd_event_source *s,
+ uint64_t usec,
+ void *userdata) {
+
+ Manager *manager = userdata;
+ int r;
+
+ assert(manager);
+ assert(manager->inhibit_timeout_source == s);
+
+ r = manager_dispatch_delayed(manager, true);
+ return (r < 0) ? r : 0;
+}
+
+static int delay_shutdown_or_sleep(
+ Manager *m,
+ InhibitWhat w,
+ const char *unit_name) {
+
+ int r;
+
+ assert(m);
+ assert(w >= 0);
+ assert(w < _INHIBIT_WHAT_MAX);
+ assert(unit_name);
+
+ if (m->inhibit_timeout_source) {
+ r = sd_event_source_set_time_relative(m->inhibit_timeout_source, m->inhibit_delay_max);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_time_relative() failed: %m");
+
+ r = sd_event_source_set_enabled(m->inhibit_timeout_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_enabled() failed: %m");
+ } else {
+ r = sd_event_add_time_relative(
+ m->event,
+ &m->inhibit_timeout_source,
+ CLOCK_MONOTONIC, m->inhibit_delay_max, 0,
+ manager_inhibit_timeout_handler, m);
+ if (r < 0)
+ return r;
+ }
+
+ m->action_unit = unit_name;
+ m->action_what = w;
+
+ return 0;
+}
+
+int bus_manager_shutdown_or_sleep_now_or_later(
+ Manager *m,
+ const char *unit_name,
+ InhibitWhat w,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *load_state = NULL;
+ bool delayed;
+ int r;
+
+ assert(m);
+ assert(unit_name);
+ assert(w > 0);
+ assert(w < _INHIBIT_WHAT_MAX);
+ assert(!m->action_job);
+
+ r = unit_load_state(m->bus, unit_name, &load_state);
+ if (r < 0)
+ return r;
+
+ if (!streq(load_state, "loaded"))
+ return log_notice_errno(SYNTHETIC_ERRNO(EACCES),
+ "Unit %s is %s, refusing operation.",
+ unit_name, load_state);
+
+ /* Tell everybody to prepare for shutdown/sleep */
+ (void) send_prepare_for(m, w, true);
+
+ delayed =
+ m->inhibit_delay_max > 0 &&
+ manager_is_inhibited(m, w, INHIBIT_DELAY, NULL, false, false, 0, NULL);
+
+ if (delayed)
+ /* Shutdown is delayed, keep in mind what we
+ * want to do, and start a timeout */
+ r = delay_shutdown_or_sleep(m, w, unit_name);
+ else
+ /* Shutdown is not delayed, execute it
+ * immediately */
+ r = execute_shutdown_or_sleep(m, w, unit_name, error);
+
+ return r;
+}
+
+static int verify_shutdown_creds(
+ Manager *m,
+ sd_bus_message *message,
+ InhibitWhat w,
+ bool interactive,
+ const char *action,
+ const char *action_multiple_sessions,
+ const char *action_ignore_inhibit,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ bool multiple_sessions, blocked;
+ uid_t uid;
+ int r;
+
+ assert(m);
+ assert(message);
+ assert(w >= 0);
+ assert(w <= _INHIBIT_WHAT_MAX);
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_euid(creds, &uid);
+ if (r < 0)
+ return r;
+
+ r = have_multiple_sessions(m, uid);
+ if (r < 0)
+ return r;
+
+ multiple_sessions = r > 0;
+ blocked = manager_is_inhibited(m, w, INHIBIT_BLOCK, NULL, false, true, uid, NULL);
+
+ if (multiple_sessions && action_multiple_sessions) {
+ r = bus_verify_polkit_async(message, CAP_SYS_BOOT, action_multiple_sessions, NULL, interactive, UID_INVALID, &m->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+ }
+
+ if (blocked && action_ignore_inhibit) {
+ r = bus_verify_polkit_async(message, CAP_SYS_BOOT, action_ignore_inhibit, NULL, interactive, UID_INVALID, &m->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+ }
+
+ if (!multiple_sessions && !blocked && action) {
+ r = bus_verify_polkit_async(message, CAP_SYS_BOOT, action, NULL, interactive, UID_INVALID, &m->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+ }
+
+ return 0;
+}
+
+static int method_do_shutdown_or_sleep(
+ Manager *m,
+ sd_bus_message *message,
+ const char *unit_name,
+ InhibitWhat w,
+ const char *action,
+ const char *action_multiple_sessions,
+ const char *action_ignore_inhibit,
+ const char *sleep_verb,
+ sd_bus_error *error) {
+
+ int interactive, r;
+
+ assert(m);
+ assert(message);
+ assert(unit_name);
+ assert(w >= 0);
+ assert(w <= _INHIBIT_WHAT_MAX);
+
+ r = sd_bus_message_read(message, "b", &interactive);
+ if (r < 0)
+ return r;
+
+ /* Don't allow multiple jobs being executed at the same time */
+ if (m->action_what > 0)
+ return sd_bus_error_setf(error, BUS_ERROR_OPERATION_IN_PROGRESS,
+ "There's already a shutdown or sleep operation in progress");
+
+ if (sleep_verb) {
+ r = can_sleep(sleep_verb);
+ if (r == -ENOSPC)
+ return sd_bus_error_set(error, BUS_ERROR_SLEEP_VERB_NOT_SUPPORTED,
+ "Not enough swap space for hibernation");
+ if (r == 0)
+ return sd_bus_error_setf(error, BUS_ERROR_SLEEP_VERB_NOT_SUPPORTED,
+ "Sleep verb \"%s\" not supported", sleep_verb);
+ if (r < 0)
+ return r;
+ }
+
+ r = verify_shutdown_creds(m, message, w, interactive, action, action_multiple_sessions,
+ action_ignore_inhibit, error);
+ if (r != 0)
+ return r;
+
+ r = bus_manager_shutdown_or_sleep_now_or_later(m, unit_name, w, error);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_poweroff(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_do_shutdown_or_sleep(
+ m, message,
+ SPECIAL_POWEROFF_TARGET,
+ INHIBIT_SHUTDOWN,
+ "org.freedesktop.login1.power-off",
+ "org.freedesktop.login1.power-off-multiple-sessions",
+ "org.freedesktop.login1.power-off-ignore-inhibit",
+ NULL,
+ error);
+}
+
+static int method_reboot(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_do_shutdown_or_sleep(
+ m, message,
+ SPECIAL_REBOOT_TARGET,
+ INHIBIT_SHUTDOWN,
+ "org.freedesktop.login1.reboot",
+ "org.freedesktop.login1.reboot-multiple-sessions",
+ "org.freedesktop.login1.reboot-ignore-inhibit",
+ NULL,
+ error);
+}
+
+static int method_halt(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_do_shutdown_or_sleep(
+ m, message,
+ SPECIAL_HALT_TARGET,
+ INHIBIT_SHUTDOWN,
+ "org.freedesktop.login1.halt",
+ "org.freedesktop.login1.halt-multiple-sessions",
+ "org.freedesktop.login1.halt-ignore-inhibit",
+ NULL,
+ error);
+}
+
+static int method_suspend(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_do_shutdown_or_sleep(
+ m, message,
+ SPECIAL_SUSPEND_TARGET,
+ INHIBIT_SLEEP,
+ "org.freedesktop.login1.suspend",
+ "org.freedesktop.login1.suspend-multiple-sessions",
+ "org.freedesktop.login1.suspend-ignore-inhibit",
+ "suspend",
+ error);
+}
+
+static int method_hibernate(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_do_shutdown_or_sleep(
+ m, message,
+ SPECIAL_HIBERNATE_TARGET,
+ INHIBIT_SLEEP,
+ "org.freedesktop.login1.hibernate",
+ "org.freedesktop.login1.hibernate-multiple-sessions",
+ "org.freedesktop.login1.hibernate-ignore-inhibit",
+ "hibernate",
+ error);
+}
+
+static int method_hybrid_sleep(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_do_shutdown_or_sleep(
+ m, message,
+ SPECIAL_HYBRID_SLEEP_TARGET,
+ INHIBIT_SLEEP,
+ "org.freedesktop.login1.hibernate",
+ "org.freedesktop.login1.hibernate-multiple-sessions",
+ "org.freedesktop.login1.hibernate-ignore-inhibit",
+ "hybrid-sleep",
+ error);
+}
+
+static int method_suspend_then_hibernate(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_do_shutdown_or_sleep(
+ m, message,
+ SPECIAL_SUSPEND_THEN_HIBERNATE_TARGET,
+ INHIBIT_SLEEP,
+ "org.freedesktop.login1.hibernate",
+ "org.freedesktop.login1.hibernate-multiple-sessions",
+ "org.freedesktop.login1.hibernate-ignore-inhibit",
+ "hybrid-sleep",
+ error);
+}
+
+static int nologin_timeout_handler(
+ sd_event_source *s,
+ uint64_t usec,
+ void *userdata) {
+
+ Manager *m = userdata;
+
+ log_info("Creating /run/nologin, blocking further logins...");
+
+ m->unlink_nologin =
+ create_shutdown_run_nologin_or_warn() >= 0;
+
+ return 0;
+}
+
+static int update_schedule_file(Manager *m) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(m);
+
+ r = mkdir_safe_label("/run/systemd/shutdown", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create shutdown subdirectory: %m");
+
+ r = fopen_temporary("/run/systemd/shutdown/scheduled", &f, &temp_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to save information about scheduled shutdowns: %m");
+
+ (void) fchmod(fileno(f), 0644);
+
+ fprintf(f,
+ "USEC="USEC_FMT"\n"
+ "WARN_WALL=%i\n"
+ "MODE=%s\n",
+ m->scheduled_shutdown_timeout,
+ m->enable_wall_messages,
+ m->scheduled_shutdown_type);
+
+ if (!isempty(m->wall_message)) {
+ _cleanup_free_ char *t;
+
+ t = cescape(m->wall_message);
+ if (!t) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "WALL_MESSAGE=%s\n", t);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, "/run/systemd/shutdown/scheduled") < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(temp_path);
+ (void) unlink("/run/systemd/shutdown/scheduled");
+
+ return log_error_errno(r, "Failed to write information about scheduled shutdowns: %m");
+}
+
+static void reset_scheduled_shutdown(Manager *m) {
+ assert(m);
+
+ m->scheduled_shutdown_timeout_source = sd_event_source_unref(m->scheduled_shutdown_timeout_source);
+ m->wall_message_timeout_source = sd_event_source_unref(m->wall_message_timeout_source);
+ m->nologin_timeout_source = sd_event_source_unref(m->nologin_timeout_source);
+
+ m->scheduled_shutdown_type = mfree(m->scheduled_shutdown_type);
+ m->scheduled_shutdown_timeout = 0;
+ m->shutdown_dry_run = false;
+
+ if (m->unlink_nologin) {
+ (void) unlink_or_warn("/run/nologin");
+ m->unlink_nologin = false;
+ }
+
+ (void) unlink("/run/systemd/shutdown/scheduled");
+}
+
+static int manager_scheduled_shutdown_handler(
+ sd_event_source *s,
+ uint64_t usec,
+ void *userdata) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ Manager *m = userdata;
+ const char *target;
+ int r;
+
+ assert(m);
+
+ if (isempty(m->scheduled_shutdown_type))
+ return 0;
+
+ if (streq(m->scheduled_shutdown_type, "poweroff"))
+ target = SPECIAL_POWEROFF_TARGET;
+ else if (streq(m->scheduled_shutdown_type, "reboot"))
+ target = SPECIAL_REBOOT_TARGET;
+ else if (streq(m->scheduled_shutdown_type, "halt"))
+ target = SPECIAL_HALT_TARGET;
+ else
+ assert_not_reached("unexpected shutdown type");
+
+ /* Don't allow multiple jobs being executed at the same time */
+ if (m->action_what > 0) {
+ r = -EALREADY;
+ log_error("Scheduled shutdown to %s failed: shutdown or sleep operation already in progress", target);
+ goto error;
+ }
+
+ if (m->shutdown_dry_run) {
+ /* We do not process delay inhibitors here. Otherwise, we
+ * would have to be considered "in progress" (like the check
+ * above) for some seconds after our admin has seen the final
+ * wall message. */
+
+ bus_manager_log_shutdown(m, target);
+ log_info("Running in dry run, suppressing action.");
+ reset_scheduled_shutdown(m);
+
+ return 0;
+ }
+
+ r = bus_manager_shutdown_or_sleep_now_or_later(m, target, INHIBIT_SHUTDOWN, &error);
+ if (r < 0) {
+ log_error_errno(r, "Scheduled shutdown to %s failed: %m", target);
+ goto error;
+ }
+
+ return 0;
+
+error:
+ reset_scheduled_shutdown(m);
+ return r;
+}
+
+static int method_schedule_shutdown(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ const char *action_multiple_sessions = NULL;
+ const char *action_ignore_inhibit = NULL;
+ const char *action = NULL;
+ uint64_t elapse;
+ char *type;
+ int r;
+ bool dry_run = false;
+
+ assert(m);
+ assert(message);
+
+ r = sd_bus_message_read(message, "st", &type, &elapse);
+ if (r < 0)
+ return r;
+
+ if (startswith(type, "dry-")) {
+ type += 4;
+ dry_run = true;
+ }
+
+ if (streq(type, "poweroff")) {
+ action = "org.freedesktop.login1.power-off";
+ action_multiple_sessions = "org.freedesktop.login1.power-off-multiple-sessions";
+ action_ignore_inhibit = "org.freedesktop.login1.power-off-ignore-inhibit";
+ } else if (streq(type, "reboot")) {
+ action = "org.freedesktop.login1.reboot";
+ action_multiple_sessions = "org.freedesktop.login1.reboot-multiple-sessions";
+ action_ignore_inhibit = "org.freedesktop.login1.reboot-ignore-inhibit";
+ } else if (streq(type, "halt")) {
+ action = "org.freedesktop.login1.halt";
+ action_multiple_sessions = "org.freedesktop.login1.halt-multiple-sessions";
+ action_ignore_inhibit = "org.freedesktop.login1.halt-ignore-inhibit";
+ } else
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unsupported shutdown type");
+
+ r = verify_shutdown_creds(m, message, INHIBIT_SHUTDOWN, false,
+ action, action_multiple_sessions, action_ignore_inhibit, error);
+ if (r != 0)
+ return r;
+
+ if (m->scheduled_shutdown_timeout_source) {
+ r = sd_event_source_set_time(m->scheduled_shutdown_timeout_source, elapse);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_time() failed: %m");
+
+ r = sd_event_source_set_enabled(m->scheduled_shutdown_timeout_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_enabled() failed: %m");
+ } else {
+ r = sd_event_add_time(m->event, &m->scheduled_shutdown_timeout_source,
+ CLOCK_REALTIME, elapse, 0, manager_scheduled_shutdown_handler, m);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_add_time() failed: %m");
+ }
+
+ r = free_and_strdup(&m->scheduled_shutdown_type, type);
+ if (r < 0) {
+ m->scheduled_shutdown_timeout_source = sd_event_source_unref(m->scheduled_shutdown_timeout_source);
+ return log_oom();
+ }
+
+ m->shutdown_dry_run = dry_run;
+
+ if (m->nologin_timeout_source) {
+ r = sd_event_source_set_time(m->nologin_timeout_source, elapse);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_time() failed: %m");
+
+ r = sd_event_source_set_enabled(m->nologin_timeout_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_enabled() failed: %m");
+ } else {
+ r = sd_event_add_time(m->event, &m->nologin_timeout_source,
+ CLOCK_REALTIME, elapse - 5 * USEC_PER_MINUTE, 0, nologin_timeout_handler, m);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_add_time() failed: %m");
+ }
+
+ m->scheduled_shutdown_timeout = elapse;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_AUGMENT|SD_BUS_CREDS_TTY|SD_BUS_CREDS_UID, &creds);
+ if (r >= 0) {
+ const char *tty = NULL;
+
+ (void) sd_bus_creds_get_uid(creds, &m->scheduled_shutdown_uid);
+ (void) sd_bus_creds_get_tty(creds, &tty);
+
+ r = free_and_strdup(&m->scheduled_shutdown_tty, tty);
+ if (r < 0) {
+ m->scheduled_shutdown_timeout_source = sd_event_source_unref(m->scheduled_shutdown_timeout_source);
+ return log_oom();
+ }
+ }
+
+ r = manager_setup_wall_message_timer(m);
+ if (r < 0)
+ return r;
+
+ r = update_schedule_file(m);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_cancel_scheduled_shutdown(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ bool cancelled;
+
+ assert(m);
+ assert(message);
+
+ cancelled = m->scheduled_shutdown_type != NULL;
+ reset_scheduled_shutdown(m);
+
+ if (cancelled && m->enable_wall_messages) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ _cleanup_free_ char *username = NULL;
+ const char *tty = NULL;
+ uid_t uid = 0;
+ int r;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_AUGMENT|SD_BUS_CREDS_TTY|SD_BUS_CREDS_UID, &creds);
+ if (r >= 0) {
+ (void) sd_bus_creds_get_uid(creds, &uid);
+ (void) sd_bus_creds_get_tty(creds, &tty);
+ }
+
+ username = uid_to_name(uid);
+ utmp_wall("The system shutdown has been cancelled",
+ username, tty, logind_wall_tty_filter, m);
+ }
+
+ return sd_bus_reply_method_return(message, "b", cancelled);
+}
+
+static int method_can_shutdown_or_sleep(
+ Manager *m,
+ sd_bus_message *message,
+ InhibitWhat w,
+ const char *action,
+ const char *action_multiple_sessions,
+ const char *action_ignore_inhibit,
+ const char *sleep_verb,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ HandleAction handle;
+ bool multiple_sessions, challenge, blocked;
+ const char *result = NULL;
+ uid_t uid;
+ int r;
+
+ assert(m);
+ assert(message);
+ assert(w >= 0);
+ assert(w <= _INHIBIT_WHAT_MAX);
+ assert(action);
+ assert(action_multiple_sessions);
+ assert(action_ignore_inhibit);
+
+ if (sleep_verb) {
+ r = can_sleep(sleep_verb);
+ if (IN_SET(r, 0, -ENOSPC))
+ return sd_bus_reply_method_return(message, "s", "na");
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_euid(creds, &uid);
+ if (r < 0)
+ return r;
+
+ r = have_multiple_sessions(m, uid);
+ if (r < 0)
+ return r;
+
+ multiple_sessions = r > 0;
+ blocked = manager_is_inhibited(m, w, INHIBIT_BLOCK, NULL, false, true, uid, NULL);
+
+ handle = handle_action_from_string(sleep_verb);
+ if (handle >= 0) {
+ const char *target;
+
+ target = manager_target_for_action(handle);
+ if (target) {
+ _cleanup_free_ char *load_state = NULL;
+
+ r = unit_load_state(m->bus, target, &load_state);
+ if (r < 0)
+ return r;
+
+ if (!streq(load_state, "loaded")) {
+ result = "no";
+ goto finish;
+ }
+ }
+ }
+
+ if (multiple_sessions) {
+ r = bus_test_polkit(message, CAP_SYS_BOOT, action_multiple_sessions, NULL, UID_INVALID, &challenge, error);
+ if (r < 0)
+ return r;
+
+ if (r > 0)
+ result = "yes";
+ else if (challenge)
+ result = "challenge";
+ else
+ result = "no";
+ }
+
+ if (blocked) {
+ r = bus_test_polkit(message, CAP_SYS_BOOT, action_ignore_inhibit, NULL, UID_INVALID, &challenge, error);
+ if (r < 0)
+ return r;
+
+ if (r > 0) {
+ if (!result)
+ result = "yes";
+ } else if (challenge) {
+ if (!result || streq(result, "yes"))
+ result = "challenge";
+ } else
+ result = "no";
+ }
+
+ if (!multiple_sessions && !blocked) {
+ /* If neither inhibit nor multiple sessions
+ * apply then just check the normal policy */
+
+ r = bus_test_polkit(message, CAP_SYS_BOOT, action, NULL, UID_INVALID, &challenge, error);
+ if (r < 0)
+ return r;
+
+ if (r > 0)
+ result = "yes";
+ else if (challenge)
+ result = "challenge";
+ else
+ result = "no";
+ }
+
+ finish:
+ return sd_bus_reply_method_return(message, "s", result);
+}
+
+static int method_can_poweroff(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_can_shutdown_or_sleep(
+ m, message,
+ INHIBIT_SHUTDOWN,
+ "org.freedesktop.login1.power-off",
+ "org.freedesktop.login1.power-off-multiple-sessions",
+ "org.freedesktop.login1.power-off-ignore-inhibit",
+ NULL,
+ error);
+}
+
+static int method_can_reboot(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_can_shutdown_or_sleep(
+ m, message,
+ INHIBIT_SHUTDOWN,
+ "org.freedesktop.login1.reboot",
+ "org.freedesktop.login1.reboot-multiple-sessions",
+ "org.freedesktop.login1.reboot-ignore-inhibit",
+ NULL,
+ error);
+}
+
+static int method_can_halt(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_can_shutdown_or_sleep(
+ m, message,
+ INHIBIT_SHUTDOWN,
+ "org.freedesktop.login1.halt",
+ "org.freedesktop.login1.halt-multiple-sessions",
+ "org.freedesktop.login1.halt-ignore-inhibit",
+ NULL,
+ error);
+}
+
+static int method_can_suspend(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_can_shutdown_or_sleep(
+ m, message,
+ INHIBIT_SLEEP,
+ "org.freedesktop.login1.suspend",
+ "org.freedesktop.login1.suspend-multiple-sessions",
+ "org.freedesktop.login1.suspend-ignore-inhibit",
+ "suspend",
+ error);
+}
+
+static int method_can_hibernate(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_can_shutdown_or_sleep(
+ m, message,
+ INHIBIT_SLEEP,
+ "org.freedesktop.login1.hibernate",
+ "org.freedesktop.login1.hibernate-multiple-sessions",
+ "org.freedesktop.login1.hibernate-ignore-inhibit",
+ "hibernate",
+ error);
+}
+
+static int method_can_hybrid_sleep(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_can_shutdown_or_sleep(
+ m, message,
+ INHIBIT_SLEEP,
+ "org.freedesktop.login1.hibernate",
+ "org.freedesktop.login1.hibernate-multiple-sessions",
+ "org.freedesktop.login1.hibernate-ignore-inhibit",
+ "hybrid-sleep",
+ error);
+}
+
+static int method_can_suspend_then_hibernate(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ return method_can_shutdown_or_sleep(
+ m, message,
+ INHIBIT_SLEEP,
+ "org.freedesktop.login1.hibernate",
+ "org.freedesktop.login1.hibernate-multiple-sessions",
+ "org.freedesktop.login1.hibernate-ignore-inhibit",
+ "suspend-then-hibernate",
+ error);
+}
+
+static int property_get_reboot_parameter(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+ _cleanup_free_ char *parameter = NULL;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(userdata);
+
+ r = read_reboot_parameter(&parameter);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_append(reply, "s", parameter);
+}
+
+static int method_set_reboot_parameter(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ const char *arg;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &arg);
+ if (r < 0)
+ return r;
+
+ r = detect_container();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED,
+ "Reboot parameter not supported in containers, refusing.");
+
+ r = bus_verify_polkit_async(message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.set-reboot-parameter",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = update_reboot_parameter_and_warn(arg, false);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_can_reboot_parameter(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _unused_ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = detect_container();
+ if (r < 0)
+ return r;
+ if (r > 0) /* Inside containers, specifying a reboot parameter, doesn't make much sense */
+ return sd_bus_reply_method_return(message, "s", "na");
+
+ return return_test_polkit(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.set-reboot-parameter",
+ NULL,
+ UID_INVALID,
+ error);
+}
+
+static int property_get_reboot_to_firmware_setup(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(userdata);
+
+ r = getenv_bool("SYSTEMD_REBOOT_TO_FIRMWARE_SETUP");
+ if (r == -ENXIO) {
+ /* EFI case: let's see what is currently configured in the EFI variables */
+ r = efi_get_reboot_to_firmware();
+ if (r < 0 && r != -EOPNOTSUPP)
+ log_warning_errno(r, "Failed to determine reboot-to-firmware-setup state: %m");
+ } else if (r < 0)
+ log_warning_errno(r, "Failed to parse $SYSTEMD_REBOOT_TO_FIRMWARE_SETUP: %m");
+ else if (r > 0) {
+ /* Non-EFI case: let's see whether /run/systemd/reboot-to-firmware-setup exists. */
+ if (access("/run/systemd/reboot-to-firmware-setup", F_OK) < 0) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to check whether /run/systemd/reboot-to-firmware-setup exists: %m");
+
+ r = false;
+ } else
+ r = true;
+ }
+
+ return sd_bus_message_append(reply, "b", r > 0);
+}
+
+static int method_set_reboot_to_firmware_setup(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ bool use_efi;
+ int b, r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ r = getenv_bool("SYSTEMD_REBOOT_TO_FIRMWARE_SETUP");
+ if (r == -ENXIO) {
+ /* EFI case: let's see what the firmware supports */
+
+ r = efi_reboot_to_firmware_supported();
+ if (r == -EOPNOTSUPP)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Firmware does not support boot into firmware.");
+ if (r < 0)
+ return r;
+
+ use_efi = true;
+
+ } else if (r <= 0) {
+ /* non-EFI case: $SYSTEMD_REBOOT_TO_FIRMWARE_SETUP is set to off */
+
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse $SYSTEMD_REBOOT_TO_FIRMWARE_SETUP: %m");
+
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Firmware does not support boot into firmware.");
+ } else
+ /* non-EFI case: $SYSTEMD_REBOOT_TO_FIRMWARE_SETUP is set to on */
+ use_efi = false;
+
+ r = bus_verify_polkit_async(message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.set-reboot-to-firmware-setup",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ if (use_efi) {
+ r = efi_set_reboot_to_firmware(b);
+ if (r < 0)
+ return r;
+ } else {
+ if (b) {
+ r = touch("/run/systemd/reboot-to-firmware-setup");
+ if (r < 0)
+ return r;
+ } else {
+ if (unlink("/run/systemd/reboot-to-firmware-setup") < 0 && errno != ENOENT)
+ return -errno;
+ }
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_can_reboot_to_firmware_setup(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _unused_ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = getenv_bool("SYSTEMD_REBOOT_TO_FIRMWARE_SETUP");
+ if (r == -ENXIO) {
+ /* EFI case: let's see what the firmware supports */
+
+ r = efi_reboot_to_firmware_supported();
+ if (r < 0) {
+ if (r != -EOPNOTSUPP)
+ log_warning_errno(r, "Failed to determine whether reboot to firmware is supported: %m");
+
+ return sd_bus_reply_method_return(message, "s", "na");
+ }
+
+ } else if (r <= 0) {
+ /* Non-EFI case: let's trust $SYSTEMD_REBOOT_TO_FIRMWARE_SETUP */
+
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse $SYSTEMD_REBOOT_TO_FIRMWARE_SETUP: %m");
+
+ return sd_bus_reply_method_return(message, "s", "na");
+ }
+
+ return return_test_polkit(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.set-reboot-to-firmware-setup",
+ NULL,
+ UID_INVALID,
+ error);
+}
+
+static int property_get_reboot_to_boot_loader_menu(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ uint64_t x = UINT64_MAX;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(userdata);
+
+ r = getenv_bool("SYSTEMD_REBOOT_TO_BOOT_LOADER_MENU");
+ if (r == -ENXIO) {
+ /* EFI case: returns the current value of LoaderConfigTimeoutOneShot. Three cases are distuingished:
+ *
+ * 1. Variable not set, boot into boot loader menu is not enabled (we return UINT64_MAX to the user)
+ * 2. Variable set to "0", boot into boot loader menu is enabled with no timeout (we return 0 to the user)
+ * 3. Variable set to numeric value formatted in ASCII, boot into boot loader menu with the specified timeout in seconds
+ */
+
+ r = efi_loader_get_config_timeout_one_shot(&x);
+ if (r < 0) {
+ if (r != -ENOENT)
+ log_warning_errno(r, "Failed to read LoaderConfigTimeoutOneShot variable, ignoring: %m");
+ }
+
+ } else if (r < 0)
+ log_warning_errno(r, "Failed to parse $SYSTEMD_REBOOT_TO_BOOT_LOADER_MENU: %m");
+ else if (r > 0) {
+ _cleanup_free_ char *v = NULL;
+
+ /* Non-EFI case, let's process /run/systemd/reboot-to-boot-loader-menu. */
+
+ r = read_one_line_file("/run/systemd/reboot-to-boot-loader-menu", &v);
+ if (r < 0) {
+ if (r != -ENOENT)
+ log_warning_errno(r, "Failed to read /run/systemd/reboot-to-boot-loader-menu: %m");
+ } else {
+ r = safe_atou64(v, &x);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse /run/systemd/reboot-to-boot-loader-menu: %m");
+ }
+ }
+
+ return sd_bus_message_append(reply, "t", x);
+}
+
+static int method_set_reboot_to_boot_loader_menu(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ bool use_efi;
+ uint64_t x;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "t", &x);
+ if (r < 0)
+ return r;
+
+ r = getenv_bool("SYSTEMD_REBOOT_TO_BOOT_LOADER_MENU");
+ if (r == -ENXIO) {
+ uint64_t features;
+
+ /* EFI case: let's see if booting into boot loader menu is supported. */
+
+ r = efi_loader_get_features(&features);
+ if (r < 0)
+ log_warning_errno(r, "Failed to determine whether reboot to boot loader menu is supported: %m");
+ if (r < 0 || !FLAGS_SET(features, EFI_LOADER_FEATURE_CONFIG_TIMEOUT_ONE_SHOT))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Boot loader does not support boot into boot loader menu.");
+
+ use_efi = true;
+
+ } else if (r <= 0) {
+ /* non-EFI case: $SYSTEMD_REBOOT_TO_BOOT_LOADER_MENU is set to off */
+
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse $SYSTEMD_REBOOT_TO_BOOT_LOADER_MENU: %m");
+
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Boot loader does not support boot into boot loader menu.");
+ } else
+ /* non-EFI case: $SYSTEMD_REBOOT_TO_BOOT_LOADER_MENU is set to on */
+ use_efi = false;
+
+ r = bus_verify_polkit_async(message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.set-reboot-to-boot-loader-menu",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ if (use_efi) {
+ if (x == UINT64_MAX)
+ r = efi_set_variable(EFI_VENDOR_LOADER, "LoaderConfigTimeoutOneShot", NULL, 0);
+ else {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 1];
+ xsprintf(buf, "%" PRIu64, DIV_ROUND_UP(x, USEC_PER_SEC)); /* second granularity */
+
+ r = efi_set_variable_string(EFI_VENDOR_LOADER, "LoaderConfigTimeoutOneShot", buf);
+ }
+ if (r < 0)
+ return r;
+ } else {
+ if (x == UINT64_MAX) {
+ if (unlink("/run/systemd/reboot-to-boot-loader-menu") < 0 && errno != ENOENT)
+ return -errno;
+ } else {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 1];
+
+ xsprintf(buf, "%" PRIu64, x); /* µs granularity */
+
+ r = write_string_file_atomic_label("/run/systemd/reboot-to-boot-loader-menu", buf);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_can_reboot_to_boot_loader_menu(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _unused_ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = getenv_bool("SYSTEMD_REBOOT_TO_BOOT_LOADER_MENU");
+ if (r == -ENXIO) {
+ uint64_t features = 0;
+
+ /* EFI case, let's see if booting into boot loader menu is supported. */
+
+ r = efi_loader_get_features(&features);
+ if (r < 0)
+ log_warning_errno(r, "Failed to determine whether reboot to boot loader menu is supported: %m");
+ if (r < 0 || !FLAGS_SET(features, EFI_LOADER_FEATURE_CONFIG_TIMEOUT_ONE_SHOT))
+ return sd_bus_reply_method_return(message, "s", "na");
+
+ } else if (r <= 0) {
+ /* Non-EFI case: let's trust $SYSTEMD_REBOOT_TO_BOOT_LOADER_MENU */
+
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse $SYSTEMD_REBOOT_TO_BOOT_LOADER_MENU: %m");
+
+ return sd_bus_reply_method_return(message, "s", "na");
+ }
+
+ return return_test_polkit(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.set-reboot-to-boot-loader-menu",
+ NULL,
+ UID_INVALID,
+ error);
+}
+
+static int property_get_reboot_to_boot_loader_entry(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *v = NULL;
+ Manager *m = userdata;
+ const char *x = NULL;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ r = getenv_bool("SYSTEMD_REBOOT_TO_BOOT_LOADER_ENTRY");
+ if (r == -ENXIO) {
+ /* EFI case: let's read the LoaderEntryOneShot variable */
+
+ r = efi_loader_update_entry_one_shot_cache(&m->efi_loader_entry_one_shot, &m->efi_loader_entry_one_shot_stat);
+ if (r < 0) {
+ if (r != -ENOENT)
+ log_warning_errno(r, "Failed to read LoaderEntryOneShot variable, ignoring: %m");
+ } else
+ x = m->efi_loader_entry_one_shot;
+
+ } else if (r < 0)
+ log_warning_errno(r, "Failed to parse $SYSTEMD_REBOOT_TO_BOOT_LOADER_ENTRY: %m");
+ else if (r > 0) {
+
+ /* Non-EFI case, let's process /run/systemd/reboot-to-boot-loader-entry. */
+
+ r = read_one_line_file("/run/systemd/reboot-to-boot-loader-entry", &v);
+ if (r < 0) {
+ if (r != -ENOENT)
+ log_warning_errno(r, "Failed to read /run/systemd/reboot-to-boot-loader-entry, ignoring: %m");
+ } else if (!efi_loader_entry_name_valid(v))
+ log_warning("/run/systemd/reboot-to-boot-loader-entry is not valid, ignoring.");
+ else
+ x = v;
+ }
+
+ return sd_bus_message_append(reply, "s", x);
+}
+
+static int boot_loader_entry_exists(Manager *m, const char *id) {
+ _cleanup_(boot_config_free) BootConfig config = {};
+ int r;
+
+ assert(m);
+ assert(id);
+
+ r = boot_entries_load_config_auto(NULL, NULL, &config);
+ if (r < 0 && r != -ENOKEY) /* don't complain if no GPT is found, hence skip ENOKEY */
+ return r;
+
+ r = manager_read_efi_boot_loader_entries(m);
+ if (r >= 0)
+ (void) boot_entries_augment_from_loader(&config, m->efi_boot_loader_entries, true);
+
+ return boot_config_has_entry(&config, id);
+}
+
+static int method_set_reboot_to_boot_loader_entry(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ bool use_efi;
+ const char *v;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &v);
+ if (r < 0)
+ return r;
+
+ if (isempty(v))
+ v = NULL;
+ else if (efi_loader_entry_name_valid(v)) {
+ r = boot_loader_entry_exists(m, v);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Boot loader entry '%s' is not known.", v);
+ } else
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Boot loader entry name '%s' is not valid, refusing.", v);
+
+ r = getenv_bool("SYSTEMD_REBOOT_TO_BOOT_LOADER_ENTRY");
+ if (r == -ENXIO) {
+ uint64_t features;
+
+ /* EFI case: let's see if booting into boot loader entry is supported. */
+
+ r = efi_loader_get_features(&features);
+ if (r < 0)
+ log_warning_errno(r, "Failed to determine whether reboot into boot loader entry is supported: %m");
+ if (r < 0 || !FLAGS_SET(features, EFI_LOADER_FEATURE_ENTRY_ONESHOT))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Loader does not support boot into boot loader entry.");
+
+ use_efi = true;
+
+ } else if (r <= 0) {
+ /* non-EFI case: $SYSTEMD_REBOOT_TO_BOOT_LOADER_ENTRY is set to off */
+
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse $SYSTEMD_REBOOT_TO_BOOT_LOADER_ENTRY: %m");
+
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Loader does not support boot into boot loader entry.");
+ } else
+ /* non-EFI case: $SYSTEMD_REBOOT_TO_BOOT_LOADER_ENTRY is set to on */
+ use_efi = false;
+
+ r = bus_verify_polkit_async(message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.set-reboot-to-boot-loader-entry",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ if (use_efi) {
+ if (isempty(v))
+ /* Delete item */
+ r = efi_set_variable(EFI_VENDOR_LOADER, "LoaderEntryOneShot", NULL, 0);
+ else
+ r = efi_set_variable_string(EFI_VENDOR_LOADER, "LoaderEntryOneShot", v);
+ if (r < 0)
+ return r;
+ } else {
+ if (isempty(v)) {
+ if (unlink("/run/systemd/reboot-to-boot-loader-entry") < 0 && errno != ENOENT)
+ return -errno;
+ } else {
+ r = write_string_file_atomic_label("/run/systemd/reboot-boot-to-loader-entry", v);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_can_reboot_to_boot_loader_entry(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _unused_ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = getenv_bool("SYSTEMD_REBOOT_TO_BOOT_LOADER_ENTRY");
+ if (r == -ENXIO) {
+ uint64_t features = 0;
+
+ /* EFI case, let's see if booting into boot loader entry is supported. */
+
+ r = efi_loader_get_features(&features);
+ if (r < 0)
+ log_warning_errno(r, "Failed to determine whether reboot to boot loader entry is supported: %m");
+ if (r < 0 || !FLAGS_SET(features, EFI_LOADER_FEATURE_ENTRY_ONESHOT))
+ return sd_bus_reply_method_return(message, "s", "na");
+
+ } else if (r <= 0) {
+ /* Non-EFI case: let's trust $SYSTEMD_REBOOT_TO_BOOT_LOADER_ENTRY */
+
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse $SYSTEMD_REBOOT_TO_BOOT_LOADER_ENTRY: %m");
+
+ return sd_bus_reply_method_return(message, "s", "na");
+ }
+
+ return return_test_polkit(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.set-reboot-to-boot-loader-entry",
+ NULL,
+ UID_INVALID,
+ error);
+}
+
+static int property_get_boot_loader_entries(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_(boot_config_free) BootConfig config = {};
+ Manager *m = userdata;
+ size_t i;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ r = boot_entries_load_config_auto(NULL, NULL, &config);
+ if (r < 0 && r != -ENOKEY) /* don't complain if there's no GPT found */
+ return r;
+
+ r = manager_read_efi_boot_loader_entries(m);
+ if (r >= 0)
+ (void) boot_entries_augment_from_loader(&config, m->efi_boot_loader_entries, true);
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < config.n_entries; i++) {
+ BootEntry *e = config.entries + i;
+
+ r = sd_bus_message_append(reply, "s", e->id);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int method_set_wall_message(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int r;
+ Manager *m = userdata;
+ char *wall_message;
+ unsigned enable_wall_messages;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "sb", &wall_message, &enable_wall_messages);
+ if (r < 0)
+ return r;
+
+ /* Short-circuit the operation if the desired state is already in place, to
+ * avoid an unnecessary polkit permission check. */
+ if (streq_ptr(m->wall_message, empty_to_null(wall_message)) &&
+ m->enable_wall_messages == enable_wall_messages)
+ goto done;
+
+ r = bus_verify_polkit_async(message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.set-wall-message",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = free_and_strdup(&m->wall_message, empty_to_null(wall_message));
+ if (r < 0)
+ return log_oom();
+
+ m->enable_wall_messages = enable_wall_messages;
+
+ done:
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_inhibit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ const char *who, *why, *what, *mode;
+ _cleanup_free_ char *id = NULL;
+ _cleanup_close_ int fifo_fd = -1;
+ Manager *m = userdata;
+ InhibitMode mm;
+ InhibitWhat w;
+ pid_t pid;
+ uid_t uid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "ssss", &what, &who, &why, &mode);
+ if (r < 0)
+ return r;
+
+ w = inhibit_what_from_string(what);
+ if (w <= 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Invalid what specification %s", what);
+
+ mm = inhibit_mode_from_string(mode);
+ if (mm < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Invalid mode specification %s", mode);
+
+ /* Delay is only supported for shutdown/sleep */
+ if (mm == INHIBIT_DELAY && (w & ~(INHIBIT_SHUTDOWN|INHIBIT_SLEEP)))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Delay inhibitors only supported for shutdown and sleep");
+
+ /* Don't allow taking delay locks while we are already
+ * executing the operation. We shouldn't create the impression
+ * that the lock was successful if the machine is about to go
+ * down/suspend any moment. */
+ if (m->action_what & w)
+ return sd_bus_error_setf(error, BUS_ERROR_OPERATION_IN_PROGRESS,
+ "The operation inhibition has been requested for is already running");
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_BOOT,
+ w == INHIBIT_SHUTDOWN ? (mm == INHIBIT_BLOCK ? "org.freedesktop.login1.inhibit-block-shutdown" : "org.freedesktop.login1.inhibit-delay-shutdown") :
+ w == INHIBIT_SLEEP ? (mm == INHIBIT_BLOCK ? "org.freedesktop.login1.inhibit-block-sleep" : "org.freedesktop.login1.inhibit-delay-sleep") :
+ w == INHIBIT_IDLE ? "org.freedesktop.login1.inhibit-block-idle" :
+ w == INHIBIT_HANDLE_POWER_KEY ? "org.freedesktop.login1.inhibit-handle-power-key" :
+ w == INHIBIT_HANDLE_SUSPEND_KEY ? "org.freedesktop.login1.inhibit-handle-suspend-key" :
+ w == INHIBIT_HANDLE_REBOOT_KEY ? "org.freedesktop.login1.inhibit-handle-reboot-key" :
+ w == INHIBIT_HANDLE_HIBERNATE_KEY ? "org.freedesktop.login1.inhibit-handle-hibernate-key" :
+ "org.freedesktop.login1.inhibit-handle-lid-switch",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID|SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_euid(creds, &uid);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+
+ if (hashmap_size(m->inhibitors) >= m->inhibitors_max)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_LIMITS_EXCEEDED,
+ "Maximum number of inhibitors (%" PRIu64 ") reached, refusing further inhibitors.",
+ m->inhibitors_max);
+
+ do {
+ id = mfree(id);
+
+ if (asprintf(&id, "%lu", ++m->inhibit_counter) < 0)
+ return -ENOMEM;
+
+ } while (hashmap_get(m->inhibitors, id));
+
+ _cleanup_(inhibitor_freep) Inhibitor *i = NULL;
+ r = manager_add_inhibitor(m, id, &i);
+ if (r < 0)
+ return r;
+
+ i->what = w;
+ i->mode = mm;
+ i->pid = pid;
+ i->uid = uid;
+ i->why = strdup(why);
+ i->who = strdup(who);
+
+ if (!i->why || !i->who)
+ return -ENOMEM;
+
+ fifo_fd = inhibitor_create_fifo(i);
+ if (fifo_fd < 0)
+ return fifo_fd;
+
+ r = inhibitor_start(i);
+ if (r < 0)
+ return r;
+ TAKE_PTR(i);
+
+ return sd_bus_reply_method_return(message, "h", fifo_fd);
+}
+
+static const sd_bus_vtable manager_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_WRITABLE_PROPERTY("EnableWallMessages", "b", NULL, NULL, offsetof(Manager, enable_wall_messages), 0),
+ SD_BUS_WRITABLE_PROPERTY("WallMessage", "s", NULL, NULL, offsetof(Manager, wall_message), 0),
+
+ SD_BUS_PROPERTY("NAutoVTs", "u", NULL, offsetof(Manager, n_autovts), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KillOnlyUsers", "as", NULL, offsetof(Manager, kill_only_users), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KillExcludeUsers", "as", NULL, offsetof(Manager, kill_exclude_users), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("KillUserProcesses", "b", NULL, offsetof(Manager, kill_user_processes), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RebootParameter", "s", property_get_reboot_parameter, 0, 0),
+ SD_BUS_PROPERTY("RebootToFirmwareSetup", "b", property_get_reboot_to_firmware_setup, 0, 0),
+ SD_BUS_PROPERTY("RebootToBootLoaderMenu", "t", property_get_reboot_to_boot_loader_menu, 0, 0),
+ SD_BUS_PROPERTY("RebootToBootLoaderEntry", "s", property_get_reboot_to_boot_loader_entry, 0, 0),
+ SD_BUS_PROPERTY("BootLoaderEntries", "as", property_get_boot_loader_entries, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("IdleHint", "b", property_get_idle_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("IdleSinceHint", "t", property_get_idle_since_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("IdleSinceHintMonotonic", "t", property_get_idle_since_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("BlockInhibited", "s", property_get_inhibited, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("DelayInhibited", "s", property_get_inhibited, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("InhibitDelayMaxUSec", "t", NULL, offsetof(Manager, inhibit_delay_max), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("UserStopDelayUSec", "t", NULL, offsetof(Manager, user_stop_delay), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("HandlePowerKey", "s", property_get_handle_action, offsetof(Manager, handle_power_key), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("HandleSuspendKey", "s", property_get_handle_action, offsetof(Manager, handle_suspend_key), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("HandleHibernateKey", "s", property_get_handle_action, offsetof(Manager, handle_hibernate_key), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("HandleLidSwitch", "s", property_get_handle_action, offsetof(Manager, handle_lid_switch), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("HandleLidSwitchExternalPower", "s", property_get_handle_action, offsetof(Manager, handle_lid_switch_ep), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("HandleLidSwitchDocked", "s", property_get_handle_action, offsetof(Manager, handle_lid_switch_docked), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("HoldoffTimeoutUSec", "t", NULL, offsetof(Manager, holdoff_timeout_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("IdleAction", "s", property_get_handle_action, offsetof(Manager, idle_action), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("IdleActionUSec", "t", NULL, offsetof(Manager, idle_action_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PreparingForShutdown", "b", property_get_preparing, 0, 0),
+ SD_BUS_PROPERTY("PreparingForSleep", "b", property_get_preparing, 0, 0),
+ SD_BUS_PROPERTY("ScheduledShutdown", "(st)", property_get_scheduled_shutdown, 0, 0),
+ SD_BUS_PROPERTY("Docked", "b", property_get_docked, 0, 0),
+ SD_BUS_PROPERTY("LidClosed", "b", property_get_lid_closed, 0, 0),
+ SD_BUS_PROPERTY("OnExternalPower", "b", property_get_on_external_power, 0, 0),
+ SD_BUS_PROPERTY("RemoveIPC", "b", bus_property_get_bool, offsetof(Manager, remove_ipc), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RuntimeDirectorySize", "t", NULL, offsetof(Manager, runtime_dir_size), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RuntimeDirectoryInodesMax", "t", NULL, offsetof(Manager, runtime_dir_inodes), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("InhibitorsMax", "t", NULL, offsetof(Manager, inhibitors_max), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NCurrentInhibitors", "t", property_get_hashmap_size, offsetof(Manager, inhibitors), 0),
+ SD_BUS_PROPERTY("SessionsMax", "t", NULL, offsetof(Manager, sessions_max), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NCurrentSessions", "t", property_get_hashmap_size, offsetof(Manager, sessions), 0),
+ SD_BUS_PROPERTY("UserTasksMax", "t", property_get_compat_user_tasks_max, 0, SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+
+ SD_BUS_METHOD_WITH_NAMES("GetSession",
+ "s",
+ SD_BUS_PARAM(session_id),
+ "o",
+ SD_BUS_PARAM(object_path),
+ method_get_session,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetSessionByPID",
+ "u",
+ SD_BUS_PARAM(pid),
+ "o",
+ SD_BUS_PARAM(object_path),
+ method_get_session_by_pid,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetUser",
+ "u",
+ SD_BUS_PARAM(uid),
+ "o",
+ SD_BUS_PARAM(object_path),
+ method_get_user,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetUserByPID",
+ "u",
+ SD_BUS_PARAM(pid),
+ "o",
+ SD_BUS_PARAM(object_path),
+ method_get_user_by_pid,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetSeat",
+ "s",
+ SD_BUS_PARAM(seat_id),
+ "o",
+ SD_BUS_PARAM(object_path),
+ method_get_seat,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ListSessions",
+ NULL,,
+ "a(susso)",
+ SD_BUS_PARAM(sessions),
+ method_list_sessions,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ListUsers",
+ NULL,,
+ "a(uso)",
+ SD_BUS_PARAM(users),
+ method_list_users,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ListSeats",
+ NULL,,
+ "a(so)",
+ SD_BUS_PARAM(seats),
+ method_list_seats,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ListInhibitors",
+ NULL,,
+ "a(ssssuu)",
+ SD_BUS_PARAM(inhibitors),
+ method_list_inhibitors,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CreateSession",
+ "uusssssussbssa(sv)",
+ SD_BUS_PARAM(uid)
+ SD_BUS_PARAM(pid)
+ SD_BUS_PARAM(service)
+ SD_BUS_PARAM(type)
+ SD_BUS_PARAM(class)
+ SD_BUS_PARAM(desktop)
+ SD_BUS_PARAM(seat_id)
+ SD_BUS_PARAM(vtnr)
+ SD_BUS_PARAM(tty)
+ SD_BUS_PARAM(display)
+ SD_BUS_PARAM(remote)
+ SD_BUS_PARAM(remote_user)
+ SD_BUS_PARAM(remote_host)
+ SD_BUS_PARAM(properties),
+ "soshusub",
+ SD_BUS_PARAM(session_id)
+ SD_BUS_PARAM(object_path)
+ SD_BUS_PARAM(runtime_path)
+ SD_BUS_PARAM(fifo_fd)
+ SD_BUS_PARAM(uid)
+ SD_BUS_PARAM(seat_id)
+ SD_BUS_PARAM(vtnr)
+ SD_BUS_PARAM(existing),
+ method_create_session,
+ 0),
+ SD_BUS_METHOD_WITH_NAMES("ReleaseSession",
+ "s",
+ SD_BUS_PARAM(session_id),
+ NULL,,
+ method_release_session,
+ 0),
+ SD_BUS_METHOD_WITH_NAMES("ActivateSession",
+ "s",
+ SD_BUS_PARAM(session_id),
+ NULL,,
+ method_activate_session,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ActivateSessionOnSeat",
+ "ss",
+ SD_BUS_PARAM(session_id)
+ SD_BUS_PARAM(seat_id),
+ NULL,,
+ method_activate_session_on_seat,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("LockSession",
+ "s",
+ SD_BUS_PARAM(session_id),
+ NULL,,
+ method_lock_session,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("UnlockSession",
+ "s",
+ SD_BUS_PARAM(session_id),
+ NULL,,
+ method_lock_session,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("LockSessions",
+ NULL,
+ NULL,
+ method_lock_sessions,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("UnlockSessions",
+ NULL,
+ NULL,
+ method_lock_sessions,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("KillSession",
+ "ssi",
+ SD_BUS_PARAM(session_id)
+ SD_BUS_PARAM(who)
+ SD_BUS_PARAM(signal_number),
+ NULL,,
+ method_kill_session,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("KillUser",
+ "ui",
+ SD_BUS_PARAM(uid)
+ SD_BUS_PARAM(signal_number),
+ NULL,,
+ method_kill_user,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("TerminateSession",
+ "s",
+ SD_BUS_PARAM(session_id),
+ NULL,,
+ method_terminate_session,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("TerminateUser",
+ "u",
+ SD_BUS_PARAM(uid),
+ NULL,,
+ method_terminate_user,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("TerminateSeat",
+ "s",
+ SD_BUS_PARAM(seat_id),
+ NULL,,
+ method_terminate_seat,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetUserLinger",
+ "ubb",
+ SD_BUS_PARAM(uid)
+ SD_BUS_PARAM(enable)
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_set_user_linger,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("AttachDevice",
+ "ssb",
+ SD_BUS_PARAM(seat_id)
+ SD_BUS_PARAM(sysfs_path)
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_attach_device,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("FlushDevices",
+ "b",
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_flush_devices,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("PowerOff",
+ "b",
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_poweroff,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("Reboot",
+ "b",
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_reboot,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("Halt",
+ "b",
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_halt,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("Suspend",
+ "b",
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_suspend,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("Hibernate",
+ "b",
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_hibernate,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("HybridSleep",
+ "b",
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_hybrid_sleep,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SuspendThenHibernate",
+ "b",
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_suspend_then_hibernate,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CanPowerOff",
+ NULL,,
+ "s",
+ SD_BUS_PARAM(result),
+ method_can_poweroff,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CanReboot",
+ NULL,,
+ "s",
+ SD_BUS_PARAM(result),
+ method_can_reboot,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CanHalt",
+ NULL,,
+ "s",
+ SD_BUS_PARAM(result),
+ method_can_halt,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CanSuspend",
+ NULL,,
+ "s",
+ SD_BUS_PARAM(result),
+ method_can_suspend,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CanHibernate",
+ NULL,,
+ "s",
+ SD_BUS_PARAM(result),
+ method_can_hibernate,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CanHybridSleep",
+ NULL,,
+ "s",
+ SD_BUS_PARAM(result),
+ method_can_hybrid_sleep,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CanSuspendThenHibernate",
+ NULL,,
+ "s",
+ SD_BUS_PARAM(result),
+ method_can_suspend_then_hibernate,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ScheduleShutdown",
+ "st",
+ SD_BUS_PARAM(type)
+ SD_BUS_PARAM(usec),
+ NULL,,
+ method_schedule_shutdown,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CancelScheduledShutdown",
+ NULL,,
+ "b",
+ SD_BUS_PARAM(cancelled),
+ method_cancel_scheduled_shutdown,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("Inhibit",
+ "ssss",
+ SD_BUS_PARAM(what)
+ SD_BUS_PARAM(who)
+ SD_BUS_PARAM(why)
+ SD_BUS_PARAM(mode),
+ "h",
+ SD_BUS_PARAM(pipe_fd),
+ method_inhibit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CanRebootParameter",
+ NULL,,
+ "s",
+ SD_BUS_PARAM(result),
+ method_can_reboot_parameter,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetRebootParameter",
+ "s",
+ SD_BUS_PARAM(parameter),
+ NULL,,
+ method_set_reboot_parameter,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CanRebootToFirmwareSetup",
+ NULL,,
+ "s",
+ SD_BUS_PARAM(result),
+ method_can_reboot_to_firmware_setup,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetRebootToFirmwareSetup",
+ "b",
+ SD_BUS_PARAM(enable),
+ NULL,,
+ method_set_reboot_to_firmware_setup,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CanRebootToBootLoaderMenu",
+ NULL,,
+ "s",
+ SD_BUS_PARAM(result),
+ method_can_reboot_to_boot_loader_menu,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetRebootToBootLoaderMenu",
+ "t",
+ SD_BUS_PARAM(timeout),
+ NULL,,
+ method_set_reboot_to_boot_loader_menu,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CanRebootToBootLoaderEntry",
+ NULL,,
+ "s",
+ SD_BUS_PARAM(result),
+ method_can_reboot_to_boot_loader_entry,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetRebootToBootLoaderEntry",
+ "s",
+ SD_BUS_PARAM(boot_loader_entry),
+ NULL,,
+ method_set_reboot_to_boot_loader_entry,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetWallMessage",
+ "sb",
+ SD_BUS_PARAM(wall_message)
+ SD_BUS_PARAM(enable),
+ NULL,,
+ method_set_wall_message,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_SIGNAL_WITH_NAMES("SessionNew",
+ "so",
+ SD_BUS_PARAM(session_id)
+ SD_BUS_PARAM(object_path),
+ 0),
+ SD_BUS_SIGNAL_WITH_NAMES("SessionRemoved",
+ "so",
+ SD_BUS_PARAM(session_id)
+ SD_BUS_PARAM(object_path),
+ 0),
+ SD_BUS_SIGNAL_WITH_NAMES("UserNew",
+ "uo",
+ SD_BUS_PARAM(uid)
+ SD_BUS_PARAM(object_path),
+ 0),
+ SD_BUS_SIGNAL_WITH_NAMES("UserRemoved",
+ "uo",
+ SD_BUS_PARAM(uid)
+ SD_BUS_PARAM(object_path),
+ 0),
+ SD_BUS_SIGNAL_WITH_NAMES("SeatNew",
+ "so",
+ SD_BUS_PARAM(seat_id)
+ SD_BUS_PARAM(object_path),
+ 0),
+ SD_BUS_SIGNAL_WITH_NAMES("SeatRemoved",
+ "so",
+ SD_BUS_PARAM(seat_id)
+ SD_BUS_PARAM(object_path),
+ 0),
+ SD_BUS_SIGNAL_WITH_NAMES("PrepareForShutdown",
+ "b",
+ SD_BUS_PARAM(start),
+ 0),
+ SD_BUS_SIGNAL_WITH_NAMES("PrepareForSleep",
+ "b",
+ SD_BUS_PARAM(start),
+ 0),
+
+ SD_BUS_VTABLE_END
+};
+
+const BusObjectImplementation manager_object = {
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ .vtables = BUS_VTABLES(manager_vtable),
+ .children = BUS_IMPLEMENTATIONS(&seat_object,
+ &session_object,
+ &user_object),
+};
+
+static int session_jobs_reply(Session *s, uint32_t jid, const char *unit, const char *result) {
+ assert(s);
+ assert(unit);
+
+ if (!s->started)
+ return 0;
+
+ if (result && !streq(result, "done")) {
+ _cleanup_(sd_bus_error_free) sd_bus_error e = SD_BUS_ERROR_NULL;
+
+ sd_bus_error_setf(&e, BUS_ERROR_JOB_FAILED,
+ "Job %u for unit '%s' failed with '%s'", jid, unit, result);
+ return session_send_create_reply(s, &e);
+ }
+
+ return session_send_create_reply(s, NULL);
+}
+
+int match_job_removed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *path, *result, *unit;
+ Manager *m = userdata;
+ Session *session;
+ uint32_t id;
+ User *user;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "uoss", &id, &path, &unit, &result);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ if (m->action_job && streq(m->action_job, path)) {
+ log_info("Operation '%s' finished.", inhibit_what_to_string(m->action_what));
+
+ /* Tell people that they now may take a lock again */
+ (void) send_prepare_for(m, m->action_what, false);
+
+ m->action_job = mfree(m->action_job);
+ m->action_unit = NULL;
+ m->action_what = 0;
+ return 0;
+ }
+
+ session = hashmap_get(m->session_units, unit);
+ if (session) {
+ if (streq_ptr(path, session->scope_job)) {
+ session->scope_job = mfree(session->scope_job);
+ (void) session_jobs_reply(session, id, unit, result);
+
+ session_save(session);
+ user_save(session->user);
+ }
+
+ session_add_to_gc_queue(session);
+ }
+
+ user = hashmap_get(m->user_units, unit);
+ if (user) {
+ if (streq_ptr(path, user->service_job)) {
+ user->service_job = mfree(user->service_job);
+
+ LIST_FOREACH(sessions_by_user, session, user->sessions)
+ (void) session_jobs_reply(session, id, unit, NULL /* don't propagate user service failures to the client */);
+
+ user_save(user);
+ }
+
+ user_add_to_gc_queue(user);
+ }
+
+ return 0;
+}
+
+int match_unit_removed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *path, *unit;
+ Manager *m = userdata;
+ Session *session;
+ User *user;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "so", &unit, &path);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ session = hashmap_get(m->session_units, unit);
+ if (session)
+ session_add_to_gc_queue(session);
+
+ user = hashmap_get(m->user_units, unit);
+ if (user)
+ user_add_to_gc_queue(user);
+
+ return 0;
+}
+
+int match_properties_changed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *unit = NULL;
+ Manager *m = userdata;
+ const char *path;
+ Session *session;
+ User *user;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ path = sd_bus_message_get_path(message);
+ if (!path)
+ return 0;
+
+ r = unit_name_from_dbus_path(path, &unit);
+ if (r == -EINVAL) /* not a unit */
+ return 0;
+ if (r < 0) {
+ log_oom();
+ return 0;
+ }
+
+ session = hashmap_get(m->session_units, unit);
+ if (session)
+ session_add_to_gc_queue(session);
+
+ user = hashmap_get(m->user_units, unit);
+ if (user)
+ user_add_to_gc_queue(user);
+
+ return 0;
+}
+
+int match_reloading(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ Session *session;
+ int b, r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ if (b)
+ return 0;
+
+ /* systemd finished reloading, let's recheck all our sessions */
+ log_debug("System manager has been reloaded, rechecking sessions...");
+
+ HASHMAP_FOREACH(session, m->sessions)
+ session_add_to_gc_queue(session);
+
+ return 0;
+}
+
+int manager_send_changed(Manager *manager, const char *property, ...) {
+ char **l;
+
+ assert(manager);
+
+ l = strv_from_stdarg_alloca(property);
+
+ return sd_bus_emit_properties_changed_strv(
+ manager->bus,
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ l);
+}
+
+static int strdup_job(sd_bus_message *reply, char **job) {
+ const char *j;
+ char *copy;
+ int r;
+
+ r = sd_bus_message_read(reply, "o", &j);
+ if (r < 0)
+ return r;
+
+ copy = strdup(j);
+ if (!copy)
+ return -ENOMEM;
+
+ *job = copy;
+ return 1;
+}
+
+int manager_start_scope(
+ Manager *manager,
+ const char *scope,
+ pid_t pid,
+ const char *slice,
+ const char *description,
+ char **wants,
+ char **after,
+ const char *requires_mounts_for,
+ sd_bus_message *more_properties,
+ sd_bus_error *error,
+ char **job) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ char **i;
+ int r;
+
+ assert(manager);
+ assert(scope);
+ assert(pid > 1);
+ assert(job);
+
+ r = bus_message_new_method_call(manager->bus, &m, bus_systemd_mgr, "StartTransientUnit");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "ss", strempty(scope), "fail");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return r;
+
+ if (!isempty(slice)) {
+ r = sd_bus_message_append(m, "(sv)", "Slice", "s", slice);
+ if (r < 0)
+ return r;
+ }
+
+ if (!isempty(description)) {
+ r = sd_bus_message_append(m, "(sv)", "Description", "s", description);
+ if (r < 0)
+ return r;
+ }
+
+ STRV_FOREACH(i, wants) {
+ r = sd_bus_message_append(m, "(sv)", "Wants", "as", 1, *i);
+ if (r < 0)
+ return r;
+ }
+
+ STRV_FOREACH(i, after) {
+ r = sd_bus_message_append(m, "(sv)", "After", "as", 1, *i);
+ if (r < 0)
+ return r;
+ }
+
+ if (!empty_or_root(requires_mounts_for)) {
+ r = sd_bus_message_append(m, "(sv)", "RequiresMountsFor", "as", 1, requires_mounts_for);
+ if (r < 0)
+ return r;
+ }
+
+ /* Make sure that the session shells are terminated with SIGHUP since bash and friends tend to ignore
+ * SIGTERM */
+ r = sd_bus_message_append(m, "(sv)", "SendSIGHUP", "b", true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "(sv)", "PIDs", "au", 1, pid);
+ if (r < 0)
+ return r;
+
+ /* disable TasksMax= for the session scope, rely on the slice setting for it */
+ r = sd_bus_message_append(m, "(sv)", "TasksMax", "t", (uint64_t)-1);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (more_properties) {
+ /* If TasksMax also appears here, it will overwrite the default value set above */
+ r = sd_bus_message_copy(m, more_properties, true);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "a(sa(sv))", 0);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call(manager->bus, m, 0, error, &reply);
+ if (r < 0)
+ return r;
+
+ return strdup_job(reply, job);
+}
+
+int manager_start_unit(Manager *manager, const char *unit, sd_bus_error *error, char **job) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ assert(manager);
+ assert(unit);
+ assert(job);
+
+ r = bus_call_method(
+ manager->bus,
+ bus_systemd_mgr,
+ "StartUnit",
+ error,
+ &reply,
+ "ss", unit, "replace");
+ if (r < 0)
+ return r;
+
+ return strdup_job(reply, job);
+}
+
+int manager_stop_unit(Manager *manager, const char *unit, const char *job_mode, sd_bus_error *error, char **ret_job) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ assert(manager);
+ assert(unit);
+ assert(ret_job);
+
+ r = bus_call_method(
+ manager->bus,
+ bus_systemd_mgr,
+ "StopUnit",
+ error,
+ &reply,
+ "ss", unit, job_mode ?: "fail");
+ if (r < 0) {
+ if (sd_bus_error_has_names(error, BUS_ERROR_NO_SUCH_UNIT,
+ BUS_ERROR_LOAD_FAILED)) {
+
+ *ret_job = NULL;
+ sd_bus_error_free(error);
+ return 0;
+ }
+
+ return r;
+ }
+
+ return strdup_job(reply, ret_job);
+}
+
+int manager_abandon_scope(Manager *manager, const char *scope, sd_bus_error *ret_error) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ assert(manager);
+ assert(scope);
+
+ path = unit_dbus_path_from_name(scope);
+ if (!path)
+ return -ENOMEM;
+
+ r = sd_bus_call_method(
+ manager->bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Scope",
+ "Abandon",
+ &error,
+ NULL,
+ NULL);
+ if (r < 0) {
+ if (sd_bus_error_has_names(&error, BUS_ERROR_NO_SUCH_UNIT,
+ BUS_ERROR_LOAD_FAILED,
+ BUS_ERROR_SCOPE_NOT_RUNNING))
+ return 0;
+
+ sd_bus_error_move(ret_error, &error);
+ return r;
+ }
+
+ return 1;
+}
+
+int manager_kill_unit(Manager *manager, const char *unit, KillWho who, int signo, sd_bus_error *error) {
+ assert(manager);
+ assert(unit);
+
+ return bus_call_method(
+ manager->bus,
+ bus_systemd_mgr,
+ "KillUnit",
+ error,
+ NULL,
+ "ssi", unit, who == KILL_LEADER ? "main" : "all", signo);
+}
+
+int manager_unit_is_active(Manager *manager, const char *unit, sd_bus_error *ret_error) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *path = NULL;
+ const char *state;
+ int r;
+
+ assert(manager);
+ assert(unit);
+
+ path = unit_dbus_path_from_name(unit);
+ if (!path)
+ return -ENOMEM;
+
+ r = sd_bus_get_property(
+ manager->bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "ActiveState",
+ &error,
+ &reply,
+ "s");
+ if (r < 0) {
+ /* systemd might have dropped off momentarily, let's
+ * not make this an error */
+ if (sd_bus_error_has_names(&error, SD_BUS_ERROR_NO_REPLY,
+ SD_BUS_ERROR_DISCONNECTED))
+ return true;
+
+ /* If the unit is already unloaded then it's not
+ * active */
+ if (sd_bus_error_has_names(&error, BUS_ERROR_NO_SUCH_UNIT,
+ BUS_ERROR_LOAD_FAILED))
+ return false;
+
+ sd_bus_error_move(ret_error, &error);
+ return r;
+ }
+
+ r = sd_bus_message_read(reply, "s", &state);
+ if (r < 0)
+ return r;
+
+ return !STR_IN_SET(state, "inactive", "failed");
+}
+
+int manager_job_is_active(Manager *manager, const char *path, sd_bus_error *ret_error) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ assert(manager);
+ assert(path);
+
+ r = sd_bus_get_property(
+ manager->bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Job",
+ "State",
+ &error,
+ &reply,
+ "s");
+ if (r < 0) {
+ if (sd_bus_error_has_names(&error, SD_BUS_ERROR_NO_REPLY,
+ SD_BUS_ERROR_DISCONNECTED))
+ return true;
+
+ if (sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_OBJECT))
+ return false;
+
+ sd_bus_error_move(ret_error, &error);
+ return r;
+ }
+
+ /* We don't actually care about the state really. The fact
+ * that we could read the job state is enough for us */
+
+ return true;
+}
diff --git a/src/login/logind-dbus.h b/src/login/logind-dbus.h
new file mode 100644
index 0000000..6b5d3ab
--- /dev/null
+++ b/src/login/logind-dbus.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "bus-object.h"
+#include "logind-session.h"
+#include "logind-user.h"
+#include "logind.h"
+
+int manager_get_session_from_creds(Manager *m, sd_bus_message *message, const char *name, sd_bus_error *error, Session **ret);
+int manager_get_user_from_creds(Manager *m, sd_bus_message *message, uid_t uid, sd_bus_error *error, User **ret);
+int manager_get_seat_from_creds(Manager *m, sd_bus_message *message, const char *name, sd_bus_error *error, Seat **ret);
+
+int manager_dispatch_delayed(Manager *manager, bool timeout);
+
+int bus_manager_shutdown_or_sleep_now_or_later(Manager *m, const char *unit_name, InhibitWhat w, sd_bus_error *error);
+
+int match_job_removed(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int match_unit_removed(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int match_properties_changed(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int match_reloading(sd_bus_message *message, void *userdata, sd_bus_error *error);
+
+int manager_send_changed(Manager *manager, const char *property, ...) _sentinel_;
+
+int manager_start_scope(Manager *manager, const char *scope, pid_t pid, const char *slice, const char *description, char **wants, char **after, const char *requires_mounts_for, sd_bus_message *more_properties, sd_bus_error *error, char **job);
+int manager_start_unit(Manager *manager, const char *unit, sd_bus_error *error, char **job);
+int manager_stop_unit(Manager *manager, const char *unit, const char *job_mode, sd_bus_error *error, char **job);
+int manager_abandon_scope(Manager *manager, const char *scope, sd_bus_error *error);
+int manager_kill_unit(Manager *manager, const char *unit, KillWho who, int signo, sd_bus_error *error);
+int manager_unit_is_active(Manager *manager, const char *unit, sd_bus_error *error);
+int manager_job_is_active(Manager *manager, const char *path, sd_bus_error *error);
+
+extern const BusObjectImplementation manager_object;
diff --git a/src/login/logind-device.c b/src/login/logind-device.c
new file mode 100644
index 0000000..982a772
--- /dev/null
+++ b/src/login/logind-device.c
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <string.h>
+
+#include "alloc-util.h"
+#include "logind-device.h"
+#include "logind-seat-dbus.h"
+#include "util.h"
+
+Device* device_new(Manager *m, const char *sysfs, bool master) {
+ Device *d;
+
+ assert(m);
+ assert(sysfs);
+
+ d = new0(Device, 1);
+ if (!d)
+ return NULL;
+
+ d->sysfs = strdup(sysfs);
+ if (!d->sysfs)
+ return mfree(d);
+
+ if (hashmap_put(m->devices, d->sysfs, d) < 0) {
+ free(d->sysfs);
+ return mfree(d);
+ }
+
+ d->manager = m;
+ d->master = master;
+ dual_timestamp_get(&d->timestamp);
+
+ return d;
+}
+
+static void device_detach(Device *d) {
+ Seat *s;
+ SessionDevice *sd;
+
+ assert(d);
+
+ if (!d->seat)
+ return;
+
+ while ((sd = d->session_devices))
+ session_device_free(sd);
+
+ s = d->seat;
+ LIST_REMOVE(devices, d->seat->devices, d);
+ d->seat = NULL;
+
+ if (!seat_has_master_device(s)) {
+ seat_add_to_gc_queue(s);
+ seat_send_changed(s, "CanGraphical", NULL);
+ }
+}
+
+void device_free(Device *d) {
+ assert(d);
+
+ device_detach(d);
+
+ hashmap_remove(d->manager->devices, d->sysfs);
+
+ free(d->sysfs);
+ free(d);
+}
+
+void device_attach(Device *d, Seat *s) {
+ Device *i;
+ bool had_master;
+
+ assert(d);
+ assert(s);
+
+ if (d->seat == s)
+ return;
+
+ if (d->seat)
+ device_detach(d);
+
+ d->seat = s;
+ had_master = seat_has_master_device(s);
+
+ /* We keep the device list sorted by the "master" flag. That is, master
+ * devices are at the front, other devices at the tail. As there is no
+ * way to easily add devices at the list-tail, we need to iterate the
+ * list to find the first non-master device when adding non-master
+ * devices. We assume there is only a few (normally 1) master devices
+ * per seat, so we iterate only a few times. */
+
+ if (d->master || !s->devices)
+ LIST_PREPEND(devices, s->devices, d);
+ else
+ LIST_FOREACH(devices, i, s->devices) {
+ if (!i->devices_next || !i->master) {
+ LIST_INSERT_AFTER(devices, s->devices, i, d);
+ break;
+ }
+ }
+
+ if (!had_master && d->master && s->started) {
+ seat_save(s);
+ seat_send_changed(s, "CanGraphical", NULL);
+ }
+}
diff --git a/src/login/logind-device.h b/src/login/logind-device.h
new file mode 100644
index 0000000..0d89613
--- /dev/null
+++ b/src/login/logind-device.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct Device Device;
+
+#include "list.h"
+#include "logind-seat.h"
+#include "logind-session-device.h"
+
+struct Device {
+ Manager *manager;
+
+ char *sysfs;
+ Seat *seat;
+ bool master;
+
+ dual_timestamp timestamp;
+
+ LIST_FIELDS(struct Device, devices);
+ LIST_HEAD(SessionDevice, session_devices);
+};
+
+Device* device_new(Manager *m, const char *sysfs, bool master);
+void device_free(Device *d);
+void device_attach(Device *d, Seat *s);
diff --git a/src/login/logind-gperf.gperf b/src/login/logind-gperf.gperf
new file mode 100644
index 0000000..2c152d2
--- /dev/null
+++ b/src/login/logind-gperf.gperf
@@ -0,0 +1,47 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include "conf-parser.h"
+#include "logind.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name logind_gperf_hash
+%define lookup-function-name logind_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Login.NAutoVTs, config_parse_n_autovts, 0, offsetof(Manager, n_autovts)
+Login.ReserveVT, config_parse_unsigned, 0, offsetof(Manager, reserve_vt)
+Login.KillUserProcesses, config_parse_bool, 0, offsetof(Manager, kill_user_processes)
+Login.KillOnlyUsers, config_parse_strv, 0, offsetof(Manager, kill_only_users)
+Login.KillExcludeUsers, config_parse_strv, 0, offsetof(Manager, kill_exclude_users)
+Login.InhibitDelayMaxSec, config_parse_sec, 0, offsetof(Manager, inhibit_delay_max)
+Login.UserStopDelaySec, config_parse_sec, 0, offsetof(Manager, user_stop_delay)
+Login.HandlePowerKey, config_parse_handle_action, 0, offsetof(Manager, handle_power_key)
+Login.HandleSuspendKey, config_parse_handle_action, 0, offsetof(Manager, handle_suspend_key)
+Login.HandleHibernateKey, config_parse_handle_action, 0, offsetof(Manager, handle_hibernate_key)
+Login.HandleLidSwitch, config_parse_handle_action, 0, offsetof(Manager, handle_lid_switch)
+Login.HandleLidSwitchExternalPower, config_parse_handle_action, 0, offsetof(Manager, handle_lid_switch_ep)
+Login.HandleLidSwitchDocked, config_parse_handle_action, 0, offsetof(Manager, handle_lid_switch_docked)
+Login.HandleRebootKey, config_parse_handle_action, 0, offsetof(Manager, handle_reboot_key)
+Login.PowerKeyIgnoreInhibited, config_parse_bool, 0, offsetof(Manager, power_key_ignore_inhibited)
+Login.SuspendKeyIgnoreInhibited, config_parse_bool, 0, offsetof(Manager, suspend_key_ignore_inhibited)
+Login.HibernateKeyIgnoreInhibited, config_parse_bool, 0, offsetof(Manager, hibernate_key_ignore_inhibited)
+Login.LidSwitchIgnoreInhibited, config_parse_bool, 0, offsetof(Manager, lid_switch_ignore_inhibited)
+Login.RebootKeyIgnoreInhibited, config_parse_bool, 0, offsetof(Manager, reboot_key_ignore_inhibited)
+Login.HoldoffTimeoutSec, config_parse_sec, 0, offsetof(Manager, holdoff_timeout_usec)
+Login.IdleAction, config_parse_handle_action, 0, offsetof(Manager, idle_action)
+Login.IdleActionSec, config_parse_sec, 0, offsetof(Manager, idle_action_usec)
+Login.RuntimeDirectorySize, config_parse_tmpfs_size, 0, offsetof(Manager, runtime_dir_size)
+Login.RuntimeDirectoryInodesMax, config_parse_uint64, 0, offsetof(Manager, runtime_dir_inodes)
+Login.RemoveIPC, config_parse_bool, 0, offsetof(Manager, remove_ipc)
+Login.InhibitorsMax, config_parse_uint64, 0, offsetof(Manager, inhibitors_max)
+Login.SessionsMax, config_parse_uint64, 0, offsetof(Manager, sessions_max)
+Login.UserTasksMax, config_parse_compat_user_tasks_max, 0, 0
diff --git a/src/login/logind-inhibit.c b/src/login/logind-inhibit.c
new file mode 100644
index 0000000..57198ce
--- /dev/null
+++ b/src/login/logind-inhibit.c
@@ -0,0 +1,537 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "errno-list.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "logind-dbus.h"
+#include "logind-inhibit.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+
+static void inhibitor_remove_fifo(Inhibitor *i);
+
+int inhibitor_new(Inhibitor **ret, Manager *m, const char* id) {
+ _cleanup_(inhibitor_freep) Inhibitor *i = NULL;
+ int r;
+
+ assert(ret);
+ assert(m);
+ assert(id);
+
+ i = new(Inhibitor, 1);
+ if (!i)
+ return -ENOMEM;
+
+ *i = (Inhibitor) {
+ .manager = m,
+ .what = _INHIBIT_WHAT_INVALID,
+ .mode = _INHIBIT_MODE_INVALID,
+ .uid = UID_INVALID,
+ .fifo_fd = -1,
+ };
+
+ i->state_file = path_join("/run/systemd/inhibit", id);
+ if (!i->state_file)
+ return -ENOMEM;
+
+ i->id = basename(i->state_file);
+
+ r = hashmap_put(m->inhibitors, i->id, i);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(i);
+ return 0;
+}
+
+Inhibitor* inhibitor_free(Inhibitor *i) {
+
+ if (!i)
+ return NULL;
+
+ free(i->who);
+ free(i->why);
+
+ sd_event_source_unref(i->event_source);
+ safe_close(i->fifo_fd);
+
+ hashmap_remove(i->manager->inhibitors, i->id);
+
+ /* Note that we don't remove neither the state file nor the fifo path here, since we want both to
+ * survive daemon restarts */
+ free(i->state_file);
+ free(i->fifo_path);
+
+ return mfree(i);
+}
+
+static int inhibitor_save(Inhibitor *i) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(i);
+
+ r = mkdir_safe_label("/run/systemd/inhibit", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ goto fail;
+
+ r = fopen_temporary(i->state_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ (void) fchmod(fileno(f), 0644);
+
+ fprintf(f,
+ "# This is private data. Do not parse.\n"
+ "WHAT=%s\n"
+ "MODE=%s\n"
+ "UID="UID_FMT"\n"
+ "PID="PID_FMT"\n",
+ inhibit_what_to_string(i->what),
+ inhibit_mode_to_string(i->mode),
+ i->uid,
+ i->pid);
+
+ if (i->who) {
+ _cleanup_free_ char *cc = NULL;
+
+ cc = cescape(i->who);
+ if (!cc) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "WHO=%s\n", cc);
+ }
+
+ if (i->why) {
+ _cleanup_free_ char *cc = NULL;
+
+ cc = cescape(i->why);
+ if (!cc) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "WHY=%s\n", cc);
+ }
+
+ if (i->fifo_path)
+ fprintf(f, "FIFO=%s\n", i->fifo_path);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, i->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(i->state_file);
+
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return log_error_errno(r, "Failed to save inhibit data %s: %m", i->state_file);
+}
+
+static int bus_manager_send_inhibited_change(Inhibitor *i) {
+ const char *property;
+
+ assert(i);
+
+ property = i->mode == INHIBIT_BLOCK ? "BlockInhibited" : "DelayInhibited";
+
+ return manager_send_changed(i->manager, property, NULL);
+}
+
+int inhibitor_start(Inhibitor *i) {
+ assert(i);
+
+ if (i->started)
+ return 0;
+
+ dual_timestamp_get(&i->since);
+
+ log_debug("Inhibitor %s (%s) pid="PID_FMT" uid="UID_FMT" mode=%s started.",
+ strna(i->who), strna(i->why),
+ i->pid, i->uid,
+ inhibit_mode_to_string(i->mode));
+
+ i->started = true;
+
+ inhibitor_save(i);
+
+ bus_manager_send_inhibited_change(i);
+
+ return 0;
+}
+
+void inhibitor_stop(Inhibitor *i) {
+ assert(i);
+
+ if (i->started)
+ log_debug("Inhibitor %s (%s) pid="PID_FMT" uid="UID_FMT" mode=%s stopped.",
+ strna(i->who), strna(i->why),
+ i->pid, i->uid,
+ inhibit_mode_to_string(i->mode));
+
+ inhibitor_remove_fifo(i);
+
+ if (i->state_file)
+ (void) unlink(i->state_file);
+
+ i->started = false;
+
+ bus_manager_send_inhibited_change(i);
+}
+
+int inhibitor_load(Inhibitor *i) {
+
+ _cleanup_free_ char
+ *what = NULL,
+ *uid = NULL,
+ *pid = NULL,
+ *who = NULL,
+ *why = NULL,
+ *mode = NULL;
+
+ InhibitWhat w;
+ InhibitMode mm;
+ char *cc;
+ int r;
+
+ r = parse_env_file(NULL, i->state_file,
+ "WHAT", &what,
+ "UID", &uid,
+ "PID", &pid,
+ "WHO", &who,
+ "WHY", &why,
+ "MODE", &mode,
+ "FIFO", &i->fifo_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read %s: %m", i->state_file);
+
+ w = what ? inhibit_what_from_string(what) : 0;
+ if (w >= 0)
+ i->what = w;
+
+ mm = mode ? inhibit_mode_from_string(mode) : INHIBIT_BLOCK;
+ if (mm >= 0)
+ i->mode = mm;
+
+ if (uid) {
+ r = parse_uid(uid, &i->uid);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse UID of inhibitor: %s", uid);
+ }
+
+ if (pid) {
+ r = parse_pid(pid, &i->pid);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse PID of inhibitor: %s", pid);
+ }
+
+ if (who) {
+ r = cunescape(who, 0, &cc);
+ if (r < 0)
+ return log_oom();
+
+ free_and_replace(i->who, cc);
+ }
+
+ if (why) {
+ r = cunescape(why, 0, &cc);
+ if (r < 0)
+ return log_oom();
+
+ free_and_replace(i->why, cc);
+ }
+
+ if (i->fifo_path) {
+ _cleanup_close_ int fd = -1;
+
+ /* Let's re-open the FIFO on both sides, and close the writing side right away */
+ fd = inhibitor_create_fifo(i);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to reopen FIFO: %m");
+ }
+
+ return 0;
+}
+
+static int inhibitor_dispatch_fifo(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Inhibitor *i = userdata;
+
+ assert(s);
+ assert(fd == i->fifo_fd);
+ assert(i);
+
+ inhibitor_stop(i);
+ inhibitor_free(i);
+
+ return 0;
+}
+
+int inhibitor_create_fifo(Inhibitor *i) {
+ int r;
+
+ assert(i);
+
+ /* Create FIFO */
+ if (!i->fifo_path) {
+ r = mkdir_safe_label("/run/systemd/inhibit", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ return r;
+
+ i->fifo_path = strjoin("/run/systemd/inhibit/", i->id, ".ref");
+ if (!i->fifo_path)
+ return -ENOMEM;
+
+ if (mkfifo(i->fifo_path, 0600) < 0 && errno != EEXIST)
+ return -errno;
+ }
+
+ /* Open reading side */
+ if (i->fifo_fd < 0) {
+ i->fifo_fd = open(i->fifo_path, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ if (i->fifo_fd < 0)
+ return -errno;
+ }
+
+ if (!i->event_source) {
+ r = sd_event_add_io(i->manager->event, &i->event_source, i->fifo_fd, 0, inhibitor_dispatch_fifo, i);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(i->event_source, SD_EVENT_PRIORITY_IDLE-10);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(i->event_source, "inhibitor-ref");
+ }
+
+ /* Open writing side */
+ r = open(i->fifo_path, O_WRONLY|O_CLOEXEC|O_NONBLOCK);
+ if (r < 0)
+ return -errno;
+
+ return r;
+}
+
+static void inhibitor_remove_fifo(Inhibitor *i) {
+ assert(i);
+
+ i->event_source = sd_event_source_unref(i->event_source);
+ i->fifo_fd = safe_close(i->fifo_fd);
+
+ if (i->fifo_path) {
+ (void) unlink(i->fifo_path);
+ i->fifo_path = mfree(i->fifo_path);
+ }
+}
+
+bool inhibitor_is_orphan(Inhibitor *i) {
+ assert(i);
+
+ if (!i->started)
+ return true;
+
+ if (!i->fifo_path)
+ return true;
+
+ if (i->fifo_fd < 0)
+ return true;
+
+ if (pipe_eof(i->fifo_fd) != 0)
+ return true;
+
+ return false;
+}
+
+InhibitWhat manager_inhibit_what(Manager *m, InhibitMode mm) {
+ Inhibitor *i;
+ InhibitWhat what = 0;
+
+ assert(m);
+
+ HASHMAP_FOREACH(i, m->inhibitors)
+ if (i->mode == mm && i->started)
+ what |= i->what;
+
+ return what;
+}
+
+static int pid_is_active(Manager *m, pid_t pid) {
+ Session *s;
+ int r;
+
+ /* Get client session. This is not what you are looking for these days.
+ * FIXME #6852 */
+ r = manager_get_session_by_pid(m, pid, &s);
+ if (r < 0)
+ return r;
+
+ /* If there's no session assigned to it, then it's globally
+ * active on all ttys */
+ if (r == 0)
+ return 1;
+
+ return session_is_active(s);
+}
+
+bool manager_is_inhibited(
+ Manager *m,
+ InhibitWhat w,
+ InhibitMode mm,
+ dual_timestamp *since,
+ bool ignore_inactive,
+ bool ignore_uid,
+ uid_t uid,
+ Inhibitor **offending) {
+
+ Inhibitor *i;
+ struct dual_timestamp ts = DUAL_TIMESTAMP_NULL;
+ bool inhibited = false;
+
+ assert(m);
+ assert(w > 0 && w < _INHIBIT_WHAT_MAX);
+
+ HASHMAP_FOREACH(i, m->inhibitors) {
+ if (!i->started)
+ continue;
+
+ if (!(i->what & w))
+ continue;
+
+ if (i->mode != mm)
+ continue;
+
+ if (ignore_inactive && pid_is_active(m, i->pid) <= 0)
+ continue;
+
+ if (ignore_uid && i->uid == uid)
+ continue;
+
+ if (!inhibited ||
+ i->since.monotonic < ts.monotonic)
+ ts = i->since;
+
+ inhibited = true;
+
+ if (offending)
+ *offending = i;
+ }
+
+ if (since)
+ *since = ts;
+
+ return inhibited;
+}
+
+const char *inhibit_what_to_string(InhibitWhat w) {
+ static thread_local char buffer[STRLEN(
+ "shutdown:"
+ "sleep:"
+ "idle:"
+ "handle-power-key:"
+ "handle-suspend-key:"
+ "handle-hibernate-key:"
+ "handle-lid-switch:"
+ "handle-reboot-key")+1];
+ char *p;
+
+ if (w < 0 || w >= _INHIBIT_WHAT_MAX)
+ return NULL;
+
+ p = buffer;
+ if (w & INHIBIT_SHUTDOWN)
+ p = stpcpy(p, "shutdown:");
+ if (w & INHIBIT_SLEEP)
+ p = stpcpy(p, "sleep:");
+ if (w & INHIBIT_IDLE)
+ p = stpcpy(p, "idle:");
+ if (w & INHIBIT_HANDLE_POWER_KEY)
+ p = stpcpy(p, "handle-power-key:");
+ if (w & INHIBIT_HANDLE_SUSPEND_KEY)
+ p = stpcpy(p, "handle-suspend-key:");
+ if (w & INHIBIT_HANDLE_HIBERNATE_KEY)
+ p = stpcpy(p, "handle-hibernate-key:");
+ if (w & INHIBIT_HANDLE_LID_SWITCH)
+ p = stpcpy(p, "handle-lid-switch:");
+ if (w & INHIBIT_HANDLE_REBOOT_KEY)
+ p = stpcpy(p, "handle-reboot-key:");
+
+ if (p > buffer)
+ *(p-1) = 0;
+ else
+ *p = 0;
+
+ return buffer;
+}
+
+int inhibit_what_from_string(const char *s) {
+ InhibitWhat what = 0;
+
+ for (const char *p = s;;) {
+ _cleanup_free_ char *word = NULL;
+ int r;
+
+ /* A sanity check that our return values fit in an int */
+ assert_cc((int) _INHIBIT_WHAT_MAX == _INHIBIT_WHAT_MAX);
+
+ r = extract_first_word(&p, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return what;
+
+ if (streq(word, "shutdown"))
+ what |= INHIBIT_SHUTDOWN;
+ else if (streq(word, "sleep"))
+ what |= INHIBIT_SLEEP;
+ else if (streq(word, "idle"))
+ what |= INHIBIT_IDLE;
+ else if (streq(word, "handle-power-key"))
+ what |= INHIBIT_HANDLE_POWER_KEY;
+ else if (streq(word, "handle-suspend-key"))
+ what |= INHIBIT_HANDLE_SUSPEND_KEY;
+ else if (streq(word, "handle-hibernate-key"))
+ what |= INHIBIT_HANDLE_HIBERNATE_KEY;
+ else if (streq(word, "handle-lid-switch"))
+ what |= INHIBIT_HANDLE_LID_SWITCH;
+ else if (streq(word, "handle-reboot-key"))
+ what |= INHIBIT_HANDLE_REBOOT_KEY;
+ else
+ return _INHIBIT_WHAT_INVALID;
+ }
+}
+
+static const char* const inhibit_mode_table[_INHIBIT_MODE_MAX] = {
+ [INHIBIT_BLOCK] = "block",
+ [INHIBIT_DELAY] = "delay"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(inhibit_mode, InhibitMode);
diff --git a/src/login/logind-inhibit.h b/src/login/logind-inhibit.h
new file mode 100644
index 0000000..124bdb6
--- /dev/null
+++ b/src/login/logind-inhibit.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct Inhibitor Inhibitor;
+
+typedef enum InhibitWhat {
+ INHIBIT_SHUTDOWN = 1 << 0,
+ INHIBIT_SLEEP = 1 << 1,
+ INHIBIT_IDLE = 1 << 2,
+ INHIBIT_HANDLE_POWER_KEY = 1 << 3,
+ INHIBIT_HANDLE_SUSPEND_KEY = 1 << 4,
+ INHIBIT_HANDLE_HIBERNATE_KEY = 1 << 5,
+ INHIBIT_HANDLE_LID_SWITCH = 1 << 6,
+ INHIBIT_HANDLE_REBOOT_KEY = 1 << 7,
+ _INHIBIT_WHAT_MAX = 1 << 8,
+ _INHIBIT_WHAT_INVALID = -1
+} InhibitWhat;
+
+typedef enum InhibitMode {
+ INHIBIT_BLOCK,
+ INHIBIT_DELAY,
+ _INHIBIT_MODE_MAX,
+ _INHIBIT_MODE_INVALID = -1
+} InhibitMode;
+
+#include "logind.h"
+
+struct Inhibitor {
+ Manager *manager;
+
+ sd_event_source *event_source;
+
+ const char *id;
+ char *state_file;
+
+ bool started;
+
+ InhibitWhat what;
+ char *who;
+ char *why;
+ InhibitMode mode;
+
+ pid_t pid;
+ uid_t uid;
+
+ dual_timestamp since;
+
+ char *fifo_path;
+ int fifo_fd;
+};
+
+int inhibitor_new(Inhibitor **ret, Manager *m, const char* id);
+Inhibitor* inhibitor_free(Inhibitor *i);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Inhibitor*, inhibitor_free);
+
+int inhibitor_load(Inhibitor *i);
+
+int inhibitor_start(Inhibitor *i);
+void inhibitor_stop(Inhibitor *i);
+
+int inhibitor_create_fifo(Inhibitor *i);
+
+bool inhibitor_is_orphan(Inhibitor *i);
+
+InhibitWhat manager_inhibit_what(Manager *m, InhibitMode mm);
+bool manager_is_inhibited(Manager *m, InhibitWhat w, InhibitMode mm, dual_timestamp *since, bool ignore_inactive, bool ignore_uid, uid_t uid, Inhibitor **offending);
+
+const char *inhibit_what_to_string(InhibitWhat k);
+int inhibit_what_from_string(const char *s);
+
+const char *inhibit_mode_to_string(InhibitMode k);
+InhibitMode inhibit_mode_from_string(const char *s);
diff --git a/src/login/logind-seat-dbus.c b/src/login/logind-seat-dbus.c
new file mode 100644
index 0000000..a60ed2d
--- /dev/null
+++ b/src/login/logind-seat-dbus.c
@@ -0,0 +1,487 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-get-properties.h"
+#include "bus-label.h"
+#include "bus-polkit.h"
+#include "bus-util.h"
+#include "logind-dbus.h"
+#include "logind-seat-dbus.h"
+#include "logind-seat.h"
+#include "logind-session-dbus.h"
+#include "logind.h"
+#include "missing_capability.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_const_true, "b", true);
+static BUS_DEFINE_PROPERTY_GET(property_get_can_tty, "b", Seat, seat_can_tty);
+static BUS_DEFINE_PROPERTY_GET(property_get_can_graphical, "b", Seat, seat_can_graphical);
+
+static int property_get_active_session(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *p = NULL;
+ Seat *s = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ p = s->active ? session_bus_path(s->active) : strdup("/");
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_message_append(reply, "(so)", s->active ? s->active->id : "", p);
+}
+
+static int property_get_sessions(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Seat *s = userdata;
+ Session *session;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ r = sd_bus_message_open_container(reply, 'a', "(so)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(sessions_by_seat, session, s->sessions) {
+ _cleanup_free_ char *p = NULL;
+
+ p = session_bus_path(session);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "(so)", session->id, p);
+ if (r < 0)
+ return r;
+
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int property_get_idle_hint(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Seat *s = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ return sd_bus_message_append(reply, "b", seat_get_idle_hint(s, NULL) > 0);
+}
+
+static int property_get_idle_since_hint(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Seat *s = userdata;
+ dual_timestamp t;
+ uint64_t u;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ r = seat_get_idle_hint(s, &t);
+ if (r < 0)
+ return r;
+
+ u = streq(property, "IdleSinceHint") ? t.realtime : t.monotonic;
+
+ return sd_bus_message_append(reply, "t", u);
+}
+
+int bus_seat_method_terminate(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Seat *s = userdata;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_KILL,
+ "org.freedesktop.login1.manage",
+ NULL,
+ false,
+ UID_INVALID,
+ &s->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = seat_stop_sessions(s, /* force = */ true);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_activate_session(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Seat *s = userdata;
+ const char *name;
+ Session *session;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ session = hashmap_get(s->manager->sessions, name);
+ if (!session)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_SESSION, "No session '%s' known", name);
+
+ if (session->seat != s)
+ return sd_bus_error_setf(error, BUS_ERROR_SESSION_NOT_ON_SEAT, "Session %s not on seat %s", name, s->id);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.chvt",
+ NULL,
+ false,
+ UID_INVALID,
+ &s->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = session_activate(session);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_switch_to(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Seat *s = userdata;
+ unsigned to;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "u", &to);
+ if (r < 0)
+ return r;
+
+ if (to <= 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid virtual terminal");
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.chvt",
+ NULL,
+ false,
+ UID_INVALID,
+ &s->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = seat_switch_to(s, to);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_switch_to_next(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Seat *s = userdata;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.chvt",
+ NULL,
+ false,
+ UID_INVALID,
+ &s->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = seat_switch_to_next(s);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_switch_to_previous(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Seat *s = userdata;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.chvt",
+ NULL,
+ false,
+ UID_INVALID,
+ &s->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = seat_switch_to_previous(s);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int seat_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ _cleanup_free_ char *e = NULL;
+ sd_bus_message *message;
+ Manager *m = userdata;
+ const char *p;
+ Seat *seat;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ p = startswith(path, "/org/freedesktop/login1/seat/");
+ if (!p)
+ return 0;
+
+ e = bus_label_unescape(p);
+ if (!e)
+ return -ENOMEM;
+
+ message = sd_bus_get_current_message(bus);
+
+ r = manager_get_seat_from_creds(m, message, e, error, &seat);
+ if (r == -ENXIO) {
+ sd_bus_error_free(error);
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ *found = seat;
+ return 1;
+}
+
+char *seat_bus_path(Seat *s) {
+ _cleanup_free_ char *t = NULL;
+
+ assert(s);
+
+ t = bus_label_escape(s->id);
+ if (!t)
+ return NULL;
+
+ return strjoin("/org/freedesktop/login1/seat/", t);
+}
+
+static int seat_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ sd_bus_message *message;
+ Manager *m = userdata;
+ Seat *seat;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(nodes);
+
+ HASHMAP_FOREACH(seat, m->seats) {
+ char *p;
+
+ p = seat_bus_path(seat);
+ if (!p)
+ return -ENOMEM;
+
+ r = strv_consume(&l, p);
+ if (r < 0)
+ return r;
+ }
+
+ message = sd_bus_get_current_message(bus);
+ if (message) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_SESSION|SD_BUS_CREDS_OWNER_UID|SD_BUS_CREDS_AUGMENT, &creds);
+ if (r >= 0) {
+ bool may_auto = false;
+ const char *name;
+
+ r = sd_bus_creds_get_session(creds, &name);
+ if (r >= 0) {
+ Session *session;
+
+ session = hashmap_get(m->sessions, name);
+ if (session && session->seat) {
+ r = strv_extend(&l, "/org/freedesktop/login1/seat/self");
+ if (r < 0)
+ return r;
+
+ may_auto = true;
+ }
+ }
+
+ if (!may_auto) {
+ uid_t uid;
+
+ r = sd_bus_creds_get_owner_uid(creds, &uid);
+ if (r >= 0) {
+ User *user;
+
+ user = hashmap_get(m->users, UID_TO_PTR(uid));
+ may_auto = user && user->display && user->display->seat;
+ }
+ }
+
+ if (may_auto) {
+ r = strv_extend(&l, "/org/freedesktop/login1/seat/auto");
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ *nodes = TAKE_PTR(l);
+ return 1;
+}
+
+int seat_send_signal(Seat *s, bool new_seat) {
+ _cleanup_free_ char *p = NULL;
+
+ assert(s);
+
+ p = seat_bus_path(s);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_emit_signal(
+ s->manager->bus,
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ new_seat ? "SeatNew" : "SeatRemoved",
+ "so", s->id, p);
+}
+
+int seat_send_changed(Seat *s, const char *properties, ...) {
+ _cleanup_free_ char *p = NULL;
+ char **l;
+
+ assert(s);
+
+ if (!s->started)
+ return 0;
+
+ p = seat_bus_path(s);
+ if (!p)
+ return -ENOMEM;
+
+ l = strv_from_stdarg_alloca(properties);
+
+ return sd_bus_emit_properties_changed_strv(s->manager->bus, p, "org.freedesktop.login1.Seat", l);
+}
+
+static const sd_bus_vtable seat_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("Id", "s", NULL, offsetof(Seat, id), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ActiveSession", "(so)", property_get_active_session, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("CanMultiSession", "b", property_get_const_true, 0, SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("CanTTY", "b", property_get_can_tty, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CanGraphical", "b", property_get_can_graphical, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Sessions", "a(so)", property_get_sessions, 0, 0),
+ SD_BUS_PROPERTY("IdleHint", "b", property_get_idle_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("IdleSinceHint", "t", property_get_idle_since_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("IdleSinceHintMonotonic", "t", property_get_idle_since_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+
+ SD_BUS_METHOD("Terminate", NULL, NULL, bus_seat_method_terminate, SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_METHOD_WITH_NAMES("ActivateSession",
+ "s",
+ SD_BUS_PARAM(session_id),
+ NULL,,
+ method_activate_session,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SwitchTo",
+ "u",
+ SD_BUS_PARAM(vtnr),
+ NULL,,
+ method_switch_to,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_METHOD("SwitchToNext", NULL, NULL, method_switch_to_next, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SwitchToPrevious", NULL, NULL, method_switch_to_previous, SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_VTABLE_END
+};
+
+const BusObjectImplementation seat_object = {
+ "/org/freedesktop/login1/seat",
+ "org.freedesktop.login1.Seat",
+ .fallback_vtables = BUS_FALLBACK_VTABLES({seat_vtable, seat_object_find}),
+ .node_enumerator = seat_node_enumerator,
+};
diff --git a/src/login/logind-seat-dbus.h b/src/login/logind-seat-dbus.h
new file mode 100644
index 0000000..258db91
--- /dev/null
+++ b/src/login/logind-seat-dbus.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "bus-object.h"
+#include "logind-seat.h"
+
+extern const BusObjectImplementation seat_object;
+
+char *seat_bus_path(Seat *s);
+
+int seat_send_signal(Seat *s, bool new_seat);
+int seat_send_changed(Seat *s, const char *properties, ...) _sentinel_;
+
+int bus_seat_method_terminate(sd_bus_message *message, void *userdata, sd_bus_error *error);
diff --git a/src/login/logind-seat.c b/src/login/logind-seat.c
new file mode 100644
index 0000000..10cc7d9
--- /dev/null
+++ b/src/login/logind-seat.c
@@ -0,0 +1,664 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "logind-acl.h"
+#include "logind-seat-dbus.h"
+#include "logind-seat.h"
+#include "logind-session-dbus.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+int seat_new(Seat** ret, Manager *m, const char *id) {
+ _cleanup_(seat_freep) Seat *s = NULL;
+ int r;
+
+ assert(ret);
+ assert(m);
+ assert(id);
+
+ if (!seat_name_is_valid(id))
+ return -EINVAL;
+
+ s = new(Seat, 1);
+ if (!s)
+ return -ENOMEM;
+
+ *s = (Seat) {
+ .manager = m,
+ };
+
+ s->state_file = path_join("/run/systemd/seats", id);
+ if (!s->state_file)
+ return -ENOMEM;
+
+ s->id = basename(s->state_file);
+
+ r = hashmap_put(m->seats, s->id, s);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
+
+Seat* seat_free(Seat *s) {
+ if (!s)
+ return NULL;
+
+ if (s->in_gc_queue)
+ LIST_REMOVE(gc_queue, s->manager->seat_gc_queue, s);
+
+ while (s->sessions)
+ session_free(s->sessions);
+
+ assert(!s->active);
+
+ while (s->devices)
+ device_free(s->devices);
+
+ hashmap_remove(s->manager->seats, s->id);
+
+ free(s->positions);
+ free(s->state_file);
+
+ return mfree(s);
+}
+
+int seat_save(Seat *s) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(s);
+
+ if (!s->started)
+ return 0;
+
+ r = mkdir_safe_label("/run/systemd/seats", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ goto fail;
+
+ r = fopen_temporary(s->state_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ (void) fchmod(fileno(f), 0644);
+
+ fprintf(f,
+ "# This is private data. Do not parse.\n"
+ "IS_SEAT0=%i\n"
+ "CAN_MULTI_SESSION=1\n"
+ "CAN_TTY=%i\n"
+ "CAN_GRAPHICAL=%i\n",
+ seat_is_seat0(s),
+ seat_can_tty(s),
+ seat_can_graphical(s));
+
+ if (s->active) {
+ assert(s->active->user);
+
+ fprintf(f,
+ "ACTIVE=%s\n"
+ "ACTIVE_UID="UID_FMT"\n",
+ s->active->id,
+ s->active->user->user_record->uid);
+ }
+
+ if (s->sessions) {
+ Session *i;
+
+ fputs("SESSIONS=", f);
+ LIST_FOREACH(sessions_by_seat, i, s->sessions) {
+ fprintf(f,
+ "%s%c",
+ i->id,
+ i->sessions_by_seat_next ? ' ' : '\n');
+ }
+
+ fputs("UIDS=", f);
+ LIST_FOREACH(sessions_by_seat, i, s->sessions)
+ fprintf(f,
+ UID_FMT"%c",
+ i->user->user_record->uid,
+ i->sessions_by_seat_next ? ' ' : '\n');
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, s->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(s->state_file);
+
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return log_error_errno(r, "Failed to save seat data %s: %m", s->state_file);
+}
+
+int seat_load(Seat *s) {
+ assert(s);
+
+ /* There isn't actually anything to read here ... */
+
+ return 0;
+}
+
+static int vt_allocate(unsigned vtnr) {
+ char p[sizeof("/dev/tty") + DECIMAL_STR_MAX(unsigned)];
+ _cleanup_close_ int fd = -1;
+
+ assert(vtnr >= 1);
+
+ xsprintf(p, "/dev/tty%u", vtnr);
+ fd = open_terminal(p, O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ return 0;
+}
+
+int seat_preallocate_vts(Seat *s) {
+ int r = 0;
+ unsigned i;
+
+ assert(s);
+ assert(s->manager);
+
+ if (s->manager->n_autovts <= 0)
+ return 0;
+
+ if (!seat_has_vts(s))
+ return 0;
+
+ log_debug("Preallocating VTs...");
+
+ for (i = 1; i <= s->manager->n_autovts; i++) {
+ int q;
+
+ q = vt_allocate(i);
+ if (q < 0)
+ r = log_error_errno(q, "Failed to preallocate VT %u: %m", i);
+ }
+
+ return r;
+}
+
+int seat_apply_acls(Seat *s, Session *old_active) {
+ int r;
+
+ assert(s);
+
+ r = devnode_acl_all(s->id,
+ false,
+ !!old_active, old_active ? old_active->user->user_record->uid : 0,
+ !!s->active, s->active ? s->active->user->user_record->uid : 0);
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to apply ACLs: %m");
+
+ return 0;
+}
+
+int seat_set_active(Seat *s, Session *session) {
+ Session *old_active;
+
+ assert(s);
+ assert(!session || session->seat == s);
+
+ if (session == s->active)
+ return 0;
+
+ old_active = s->active;
+ s->active = session;
+
+ if (old_active) {
+ session_device_pause_all(old_active);
+ session_send_changed(old_active, "Active", NULL);
+ }
+
+ (void) seat_apply_acls(s, old_active);
+
+ if (session && session->started) {
+ session_send_changed(session, "Active", NULL);
+ session_device_resume_all(session);
+ }
+
+ if (!session || session->started)
+ seat_send_changed(s, "ActiveSession", NULL);
+
+ seat_save(s);
+
+ if (session) {
+ session_save(session);
+ user_save(session->user);
+ }
+
+ if (old_active) {
+ session_save(old_active);
+ if (!session || session->user != old_active->user)
+ user_save(old_active->user);
+ }
+
+ return 0;
+}
+
+int seat_switch_to(Seat *s, unsigned num) {
+ /* Public session positions skip 0 (there is only F1-F12). Maybe it
+ * will get reassigned in the future, so return error for now. */
+ if (num == 0)
+ return -EINVAL;
+
+ if (num >= s->position_count || !s->positions[num]) {
+ /* allow switching to unused VTs to trigger auto-activate */
+ if (seat_has_vts(s) && num < 64)
+ return chvt(num);
+
+ return -EINVAL;
+ }
+
+ return session_activate(s->positions[num]);
+}
+
+int seat_switch_to_next(Seat *s) {
+ unsigned start, i;
+
+ if (s->position_count == 0)
+ return -EINVAL;
+
+ start = 1;
+ if (s->active && s->active->position > 0)
+ start = s->active->position;
+
+ for (i = start + 1; i < s->position_count; ++i)
+ if (s->positions[i])
+ return session_activate(s->positions[i]);
+
+ for (i = 1; i < start; ++i)
+ if (s->positions[i])
+ return session_activate(s->positions[i]);
+
+ return -EINVAL;
+}
+
+int seat_switch_to_previous(Seat *s) {
+ unsigned start, i;
+
+ if (s->position_count == 0)
+ return -EINVAL;
+
+ start = 1;
+ if (s->active && s->active->position > 0)
+ start = s->active->position;
+
+ for (i = start - 1; i > 0; --i)
+ if (s->positions[i])
+ return session_activate(s->positions[i]);
+
+ for (i = s->position_count - 1; i > start; --i)
+ if (s->positions[i])
+ return session_activate(s->positions[i]);
+
+ return -EINVAL;
+}
+
+int seat_active_vt_changed(Seat *s, unsigned vtnr) {
+ Session *i, *new_active = NULL;
+ int r;
+
+ assert(s);
+ assert(vtnr >= 1);
+
+ if (!seat_has_vts(s))
+ return -EINVAL;
+
+ log_debug("VT changed to %u", vtnr);
+
+ /* we might have earlier closing sessions on the same VT, so try to
+ * find a running one first */
+ LIST_FOREACH(sessions_by_seat, i, s->sessions)
+ if (i->vtnr == vtnr && !i->stopping) {
+ new_active = i;
+ break;
+ }
+
+ if (!new_active) {
+ /* no running one? then we can't decide which one is the
+ * active one, let the first one win */
+ LIST_FOREACH(sessions_by_seat, i, s->sessions)
+ if (i->vtnr == vtnr) {
+ new_active = i;
+ break;
+ }
+ }
+
+ r = seat_set_active(s, new_active);
+ manager_spawn_autovt(s->manager, vtnr);
+
+ return r;
+}
+
+int seat_read_active_vt(Seat *s) {
+ char t[64];
+ ssize_t k;
+ int vtnr;
+
+ assert(s);
+
+ if (!seat_has_vts(s))
+ return 0;
+
+ if (lseek(s->manager->console_active_fd, SEEK_SET, 0) < 0)
+ return log_error_errno(errno, "lseek on console_active_fd failed: %m");
+
+ k = read(s->manager->console_active_fd, t, sizeof(t)-1);
+ if (k <= 0) {
+ log_error("Failed to read current console: %s", k < 0 ? strerror_safe(errno) : "EOF");
+ return k < 0 ? -errno : -EIO;
+ }
+
+ t[k] = 0;
+ truncate_nl(t);
+
+ vtnr = vtnr_from_tty(t);
+ if (vtnr < 0) {
+ log_error_errno(vtnr, "Hm, /sys/class/tty/tty0/active is badly formatted: %m");
+ return -EIO;
+ }
+
+ return seat_active_vt_changed(s, vtnr);
+}
+
+int seat_start(Seat *s) {
+ assert(s);
+
+ if (s->started)
+ return 0;
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_SEAT_START_STR,
+ "SEAT_ID=%s", s->id,
+ LOG_MESSAGE("New seat %s.", s->id));
+
+ /* Initialize VT magic stuff */
+ seat_preallocate_vts(s);
+
+ /* Read current VT */
+ seat_read_active_vt(s);
+
+ s->started = true;
+
+ /* Save seat data */
+ seat_save(s);
+
+ seat_send_signal(s, true);
+
+ return 0;
+}
+
+int seat_stop(Seat *s, bool force) {
+ int r;
+
+ assert(s);
+
+ if (s->started)
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_SEAT_STOP_STR,
+ "SEAT_ID=%s", s->id,
+ LOG_MESSAGE("Removed seat %s.", s->id));
+
+ r = seat_stop_sessions(s, force);
+
+ (void) unlink(s->state_file);
+ seat_add_to_gc_queue(s);
+
+ if (s->started)
+ seat_send_signal(s, false);
+
+ s->started = false;
+
+ return r;
+}
+
+int seat_stop_sessions(Seat *s, bool force) {
+ Session *session;
+ int r = 0, k;
+
+ assert(s);
+
+ LIST_FOREACH(sessions_by_seat, session, s->sessions) {
+ k = session_stop(session, force);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+void seat_evict_position(Seat *s, Session *session) {
+ Session *iter;
+ unsigned pos = session->position;
+
+ session->position = 0;
+
+ if (pos == 0)
+ return;
+
+ if (pos < s->position_count && s->positions[pos] == session) {
+ s->positions[pos] = NULL;
+
+ /* There might be another session claiming the same
+ * position (eg., during gdm->session transition), so let's look
+ * for it and set it on the free slot. */
+ LIST_FOREACH(sessions_by_seat, iter, s->sessions) {
+ if (iter->position == pos && session_get_state(iter) != SESSION_CLOSING) {
+ s->positions[pos] = iter;
+ break;
+ }
+ }
+ }
+}
+
+void seat_claim_position(Seat *s, Session *session, unsigned pos) {
+ /* with VTs, the position is always the same as the VTnr */
+ if (seat_has_vts(s))
+ pos = session->vtnr;
+
+ if (!GREEDY_REALLOC0(s->positions, s->position_count, pos + 1))
+ return;
+
+ seat_evict_position(s, session);
+
+ session->position = pos;
+ if (pos > 0)
+ s->positions[pos] = session;
+}
+
+static void seat_assign_position(Seat *s, Session *session) {
+ unsigned pos;
+
+ if (session->position > 0)
+ return;
+
+ for (pos = 1; pos < s->position_count; ++pos)
+ if (!s->positions[pos])
+ break;
+
+ seat_claim_position(s, session, pos);
+}
+
+int seat_attach_session(Seat *s, Session *session) {
+ assert(s);
+ assert(session);
+ assert(!session->seat);
+
+ if (!seat_has_vts(s) != !session->vtnr)
+ return -EINVAL;
+
+ session->seat = s;
+ LIST_PREPEND(sessions_by_seat, s->sessions, session);
+ seat_assign_position(s, session);
+
+ /* On seats with VTs, the VT logic defines which session is active. On
+ * seats without VTs, we automatically activate new sessions. */
+ if (!seat_has_vts(s))
+ seat_set_active(s, session);
+
+ return 0;
+}
+
+void seat_complete_switch(Seat *s) {
+ Session *session;
+
+ assert(s);
+
+ /* if no session-switch is pending or if it got canceled, do nothing */
+ if (!s->pending_switch)
+ return;
+
+ session = TAKE_PTR(s->pending_switch);
+
+ seat_set_active(s, session);
+}
+
+bool seat_has_vts(Seat *s) {
+ assert(s);
+
+ return seat_is_seat0(s) && s->manager->console_active_fd >= 0;
+}
+
+bool seat_is_seat0(Seat *s) {
+ assert(s);
+
+ return s->manager->seat0 == s;
+}
+
+bool seat_can_tty(Seat *s) {
+ assert(s);
+
+ return seat_has_vts(s);
+}
+
+bool seat_has_master_device(Seat *s) {
+ assert(s);
+
+ /* device list is ordered by "master" flag */
+ return !!s->devices && s->devices->master;
+}
+
+bool seat_can_graphical(Seat *s) {
+ assert(s);
+
+ return seat_has_master_device(s);
+}
+
+int seat_get_idle_hint(Seat *s, dual_timestamp *t) {
+ Session *session;
+ bool idle_hint = true;
+ dual_timestamp ts = DUAL_TIMESTAMP_NULL;
+
+ assert(s);
+
+ LIST_FOREACH(sessions_by_seat, session, s->sessions) {
+ dual_timestamp k;
+ int ih;
+
+ ih = session_get_idle_hint(session, &k);
+ if (ih < 0)
+ return ih;
+
+ if (!ih) {
+ if (!idle_hint) {
+ if (k.monotonic > ts.monotonic)
+ ts = k;
+ } else {
+ idle_hint = false;
+ ts = k;
+ }
+ } else if (idle_hint) {
+
+ if (k.monotonic > ts.monotonic)
+ ts = k;
+ }
+ }
+
+ if (t)
+ *t = ts;
+
+ return idle_hint;
+}
+
+bool seat_may_gc(Seat *s, bool drop_not_started) {
+ assert(s);
+
+ if (drop_not_started && !s->started)
+ return true;
+
+ if (seat_is_seat0(s))
+ return false;
+
+ return !seat_has_master_device(s);
+}
+
+void seat_add_to_gc_queue(Seat *s) {
+ assert(s);
+
+ if (s->in_gc_queue)
+ return;
+
+ LIST_PREPEND(gc_queue, s->manager->seat_gc_queue, s);
+ s->in_gc_queue = true;
+}
+
+static bool seat_name_valid_char(char c) {
+ return
+ (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ IN_SET(c, '-', '_');
+}
+
+bool seat_name_is_valid(const char *name) {
+ const char *p;
+
+ assert(name);
+
+ if (!startswith(name, "seat"))
+ return false;
+
+ if (!name[4])
+ return false;
+
+ for (p = name; *p; p++)
+ if (!seat_name_valid_char(*p))
+ return false;
+
+ if (strlen(name) > 255)
+ return false;
+
+ return true;
+}
diff --git a/src/login/logind-seat.h b/src/login/logind-seat.h
new file mode 100644
index 0000000..ddc45f4
--- /dev/null
+++ b/src/login/logind-seat.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct Seat Seat;
+
+#include "list.h"
+#include "logind-session.h"
+
+struct Seat {
+ Manager *manager;
+ char *id;
+
+ char *state_file;
+
+ LIST_HEAD(Device, devices);
+
+ Session *active;
+ Session *pending_switch;
+ LIST_HEAD(Session, sessions);
+
+ Session **positions;
+ size_t position_count;
+
+ bool in_gc_queue:1;
+ bool started:1;
+
+ LIST_FIELDS(Seat, gc_queue);
+};
+
+int seat_new(Seat **ret, Manager *m, const char *id);
+Seat* seat_free(Seat *s);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Seat *, seat_free);
+
+int seat_save(Seat *s);
+int seat_load(Seat *s);
+
+int seat_apply_acls(Seat *s, Session *old_active);
+int seat_set_active(Seat *s, Session *session);
+int seat_switch_to(Seat *s, unsigned num);
+int seat_switch_to_next(Seat *s);
+int seat_switch_to_previous(Seat *s);
+int seat_active_vt_changed(Seat *s, unsigned vtnr);
+int seat_read_active_vt(Seat *s);
+int seat_preallocate_vts(Seat *s);
+
+int seat_attach_session(Seat *s, Session *session);
+void seat_complete_switch(Seat *s);
+void seat_evict_position(Seat *s, Session *session);
+void seat_claim_position(Seat *s, Session *session, unsigned pos);
+
+bool seat_has_vts(Seat *s);
+bool seat_is_seat0(Seat *s);
+bool seat_can_tty(Seat *s);
+bool seat_has_master_device(Seat *s);
+bool seat_can_graphical(Seat *s);
+
+int seat_get_idle_hint(Seat *s, dual_timestamp *t);
+
+int seat_start(Seat *s);
+int seat_stop(Seat *s, bool force);
+int seat_stop_sessions(Seat *s, bool force);
+
+bool seat_may_gc(Seat *s, bool drop_not_started);
+void seat_add_to_gc_queue(Seat *s);
+
+bool seat_name_is_valid(const char *name);
+
+static inline bool SEAT_IS_SELF(const char *name) {
+ return isempty(name) || streq(name, "self");
+}
+
+static inline bool SEAT_IS_AUTO(const char *name) {
+ return streq_ptr(name, "auto");
+}
diff --git a/src/login/logind-session-dbus.c b/src/login/logind-session-dbus.c
new file mode 100644
index 0000000..b5d240b
--- /dev/null
+++ b/src/login/logind-session-dbus.c
@@ -0,0 +1,972 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-get-properties.h"
+#include "bus-label.h"
+#include "bus-polkit.h"
+#include "bus-util.h"
+#include "fd-util.h"
+#include "logind-brightness.h"
+#include "logind-dbus.h"
+#include "logind-seat-dbus.h"
+#include "logind-session-dbus.h"
+#include "logind-session-device.h"
+#include "logind-session.h"
+#include "logind-user-dbus.h"
+#include "logind.h"
+#include "missing_capability.h"
+#include "path-util.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+static int property_get_user(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *p = NULL;
+ Session *s = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ p = user_bus_path(s->user);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_message_append(reply, "(uo)", (uint32_t) s->user->user_record->uid, p);
+}
+
+static int property_get_name(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Session *s = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ return sd_bus_message_append(reply, "s", s->user->user_record->user_name);
+}
+
+static int property_get_seat(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *p = NULL;
+ Session *s = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ p = s->seat ? seat_bus_path(s->seat) : strdup("/");
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_message_append(reply, "(so)", s->seat ? s->seat->id : "", p);
+}
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, session_type, SessionType);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_class, session_class, SessionClass);
+static BUS_DEFINE_PROPERTY_GET(property_get_active, "b", Session, session_is_active);
+static BUS_DEFINE_PROPERTY_GET2(property_get_state, "s", Session, session_get_state, session_state_to_string);
+
+static int property_get_idle_hint(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Session *s = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ return sd_bus_message_append(reply, "b", session_get_idle_hint(s, NULL) > 0);
+}
+
+static int property_get_idle_since_hint(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Session *s = userdata;
+ dual_timestamp t = DUAL_TIMESTAMP_NULL;
+ uint64_t u;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ r = session_get_idle_hint(s, &t);
+ if (r < 0)
+ return r;
+
+ u = streq(property, "IdleSinceHint") ? t.realtime : t.monotonic;
+
+ return sd_bus_message_append(reply, "t", u);
+}
+
+static int property_get_locked_hint(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Session *s = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(s);
+
+ return sd_bus_message_append(reply, "b", session_get_locked_hint(s) > 0);
+}
+
+int bus_session_method_terminate(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Session *s = userdata;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_KILL,
+ "org.freedesktop.login1.manage",
+ NULL,
+ false,
+ s->user->user_record->uid,
+ &s->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = session_stop(s, /* force = */ true);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_session_method_activate(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Session *s = userdata;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.chvt",
+ NULL,
+ false,
+ UID_INVALID,
+ &s->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = session_activate(s);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_session_method_lock(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Session *s = userdata;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.login1.lock-sessions",
+ NULL,
+ false,
+ s->user->user_record->uid,
+ &s->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = session_send_lock(s, strstr(sd_bus_message_get_member(message), "Lock"));
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_set_idle_hint(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ Session *s = userdata;
+ uid_t uid;
+ int r, b;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_euid(creds, &uid);
+ if (r < 0)
+ return r;
+
+ if (uid != 0 && uid != s->user->user_record->uid)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Only owner of session may set idle hint");
+
+ r = session_set_idle_hint(s, b);
+ if (r == -ENOTTY)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Idle hint control is not supported on non-graphical sessions.");
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_set_locked_hint(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ Session *s = userdata;
+ uid_t uid;
+ int r, b;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_euid(creds, &uid);
+ if (r < 0)
+ return r;
+
+ if (uid != 0 && uid != s->user->user_record->uid)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Only owner of session may set locked hint");
+
+ session_set_locked_hint(s, b);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_session_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Session *s = userdata;
+ const char *swho;
+ int32_t signo;
+ KillWho who;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "si", &swho, &signo);
+ if (r < 0)
+ return r;
+
+ if (isempty(swho))
+ who = KILL_ALL;
+ else {
+ who = kill_who_from_string(swho);
+ if (who < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid kill parameter '%s'", swho);
+ }
+
+ if (!SIGNAL_VALID(signo))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid signal %i", signo);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_KILL,
+ "org.freedesktop.login1.manage",
+ NULL,
+ false,
+ s->user->user_record->uid,
+ &s->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = session_kill(s, who, signo);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_take_control(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ Session *s = userdata;
+ int r, force;
+ uid_t uid;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "b", &force);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_euid(creds, &uid);
+ if (r < 0)
+ return r;
+
+ if (uid != 0 && (force || uid != s->user->user_record->uid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Only owner of session may take control");
+
+ r = session_set_controller(s, sd_bus_message_get_sender(message), force, true);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_release_control(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Session *s = userdata;
+
+ assert(message);
+ assert(s);
+
+ if (!session_is_controller(s, sd_bus_message_get_sender(message)))
+ return sd_bus_error_setf(error, BUS_ERROR_NOT_IN_CONTROL, "You are not in control of this session");
+
+ session_drop_controller(s);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_set_type(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Session *s = userdata;
+ const char *t;
+ SessionType type;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "s", &t);
+ if (r < 0)
+ return r;
+
+ type = session_type_from_string(t);
+ if (type < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Invalid session type '%s'", t);
+
+ if (!session_is_controller(s, sd_bus_message_get_sender(message)))
+ return sd_bus_error_setf(error, BUS_ERROR_NOT_IN_CONTROL, "You must be in control of this session to set type");
+
+ session_set_type(s, type);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_take_device(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Session *s = userdata;
+ uint32_t major, minor;
+ SessionDevice *sd;
+ dev_t dev;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "uu", &major, &minor);
+ if (r < 0)
+ return r;
+
+ if (!DEVICE_MAJOR_VALID(major) || !DEVICE_MINOR_VALID(minor))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Device major/minor is not valid.");
+
+ if (!session_is_controller(s, sd_bus_message_get_sender(message)))
+ return sd_bus_error_setf(error, BUS_ERROR_NOT_IN_CONTROL, "You are not in control of this session");
+
+ dev = makedev(major, minor);
+ sd = hashmap_get(s->devices, &dev);
+ if (sd)
+ /* We don't allow retrieving a device multiple times.
+ * The related ReleaseDevice call is not ref-counted.
+ * The caller should use dup() if it requires more
+ * than one fd (it would be functionally
+ * equivalent). */
+ return sd_bus_error_setf(error, BUS_ERROR_DEVICE_IS_TAKEN, "Device already taken");
+
+ r = session_device_new(s, dev, true, &sd);
+ if (r < 0)
+ return r;
+
+ r = session_device_save(sd);
+ if (r < 0)
+ goto error;
+
+ r = sd_bus_reply_method_return(message, "hb", sd->fd, !sd->active);
+ if (r < 0)
+ goto error;
+
+ session_save(s);
+ return 1;
+
+error:
+ session_device_free(sd);
+ return r;
+}
+
+static int method_release_device(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Session *s = userdata;
+ uint32_t major, minor;
+ SessionDevice *sd;
+ dev_t dev;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "uu", &major, &minor);
+ if (r < 0)
+ return r;
+
+ if (!DEVICE_MAJOR_VALID(major) || !DEVICE_MINOR_VALID(minor))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Device major/minor is not valid.");
+
+ if (!session_is_controller(s, sd_bus_message_get_sender(message)))
+ return sd_bus_error_setf(error, BUS_ERROR_NOT_IN_CONTROL, "You are not in control of this session");
+
+ dev = makedev(major, minor);
+ sd = hashmap_get(s->devices, &dev);
+ if (!sd)
+ return sd_bus_error_setf(error, BUS_ERROR_DEVICE_NOT_TAKEN, "Device not taken");
+
+ session_device_free(sd);
+ session_save(s);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_pause_device_complete(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Session *s = userdata;
+ uint32_t major, minor;
+ SessionDevice *sd;
+ dev_t dev;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "uu", &major, &minor);
+ if (r < 0)
+ return r;
+
+ if (!DEVICE_MAJOR_VALID(major) || !DEVICE_MINOR_VALID(minor))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Device major/minor is not valid.");
+
+ if (!session_is_controller(s, sd_bus_message_get_sender(message)))
+ return sd_bus_error_setf(error, BUS_ERROR_NOT_IN_CONTROL, "You are not in control of this session");
+
+ dev = makedev(major, minor);
+ sd = hashmap_get(s->devices, &dev);
+ if (!sd)
+ return sd_bus_error_setf(error, BUS_ERROR_DEVICE_NOT_TAKEN, "Device not taken");
+
+ session_device_complete_pause(sd);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_set_brightness(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ const char *subsystem, *name, *seat;
+ Session *s = userdata;
+ uint32_t brightness;
+ uid_t uid;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ r = sd_bus_message_read(message, "ssu", &subsystem, &name, &brightness);
+ if (r < 0)
+ return r;
+
+ if (!STR_IN_SET(subsystem, "backlight", "leds"))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Subsystem type %s not supported, must be one of 'backlight' or 'leds'.", subsystem);
+ if (!filename_is_valid(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Not a valid device name %s, refusing.", name);
+
+ if (!s->seat)
+ return sd_bus_error_setf(error, BUS_ERROR_NOT_YOUR_DEVICE, "Your session has no seat, refusing.");
+ if (s->seat->active != s)
+ return sd_bus_error_setf(error, BUS_ERROR_NOT_YOUR_DEVICE, "Session is not in foreground, refusing.");
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_euid(creds, &uid);
+ if (r < 0)
+ return r;
+
+ if (uid != 0 && uid != s->user->user_record->uid)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Only owner of session may change brightness.");
+
+ r = sd_device_new_from_subsystem_sysname(&d, subsystem, name);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to open device %s:%s: %m", subsystem, name);
+
+ if (sd_device_get_property_value(d, "ID_SEAT", &seat) >= 0 && !streq_ptr(seat, s->seat->id))
+ return sd_bus_error_setf(error, BUS_ERROR_NOT_YOUR_DEVICE, "Device %s:%s does not belong to your seat %s, refusing.", subsystem, name, s->seat->id);
+
+ r = manager_write_brightness(s->manager, d, brightness, message);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int session_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ _cleanup_free_ char *e = NULL;
+ sd_bus_message *message;
+ Manager *m = userdata;
+ Session *session;
+ const char *p;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ p = startswith(path, "/org/freedesktop/login1/session/");
+ if (!p)
+ return 0;
+
+ e = bus_label_unescape(p);
+ if (!e)
+ return -ENOMEM;
+
+ message = sd_bus_get_current_message(bus);
+
+ r = manager_get_session_from_creds(m, message, e, error, &session);
+ if (r == -ENXIO) {
+ sd_bus_error_free(error);
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ *found = session;
+ return 1;
+}
+
+char *session_bus_path(Session *s) {
+ _cleanup_free_ char *t = NULL;
+
+ assert(s);
+
+ t = bus_label_escape(s->id);
+ if (!t)
+ return NULL;
+
+ return strjoin("/org/freedesktop/login1/session/", t);
+}
+
+static int session_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ sd_bus_message *message;
+ Manager *m = userdata;
+ Session *session;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(nodes);
+
+ HASHMAP_FOREACH(session, m->sessions) {
+ char *p;
+
+ p = session_bus_path(session);
+ if (!p)
+ return -ENOMEM;
+
+ r = strv_consume(&l, p);
+ if (r < 0)
+ return r;
+ }
+
+ message = sd_bus_get_current_message(bus);
+ if (message) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_SESSION|SD_BUS_CREDS_OWNER_UID|SD_BUS_CREDS_AUGMENT, &creds);
+ if (r >= 0) {
+ bool may_auto = false;
+ const char *name;
+
+ r = sd_bus_creds_get_session(creds, &name);
+ if (r >= 0) {
+ session = hashmap_get(m->sessions, name);
+ if (session) {
+ r = strv_extend(&l, "/org/freedesktop/login1/session/self");
+ if (r < 0)
+ return r;
+
+ may_auto = true;
+ }
+ }
+
+ if (!may_auto) {
+ uid_t uid;
+
+ r = sd_bus_creds_get_owner_uid(creds, &uid);
+ if (r >= 0) {
+ User *user;
+
+ user = hashmap_get(m->users, UID_TO_PTR(uid));
+ may_auto = user && user->display;
+ }
+ }
+
+ if (may_auto) {
+ r = strv_extend(&l, "/org/freedesktop/login1/session/auto");
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ *nodes = TAKE_PTR(l);
+ return 1;
+}
+
+int session_send_signal(Session *s, bool new_session) {
+ _cleanup_free_ char *p = NULL;
+
+ assert(s);
+
+ p = session_bus_path(s);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_emit_signal(
+ s->manager->bus,
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ new_session ? "SessionNew" : "SessionRemoved",
+ "so", s->id, p);
+}
+
+int session_send_changed(Session *s, const char *properties, ...) {
+ _cleanup_free_ char *p = NULL;
+ char **l;
+
+ assert(s);
+
+ if (!s->started)
+ return 0;
+
+ p = session_bus_path(s);
+ if (!p)
+ return -ENOMEM;
+
+ l = strv_from_stdarg_alloca(properties);
+
+ return sd_bus_emit_properties_changed_strv(s->manager->bus, p, "org.freedesktop.login1.Session", l);
+}
+
+int session_send_lock(Session *s, bool lock) {
+ _cleanup_free_ char *p = NULL;
+
+ assert(s);
+
+ p = session_bus_path(s);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_emit_signal(
+ s->manager->bus,
+ p,
+ "org.freedesktop.login1.Session",
+ lock ? "Lock" : "Unlock",
+ NULL);
+}
+
+int session_send_lock_all(Manager *m, bool lock) {
+ Session *session;
+ int r = 0;
+
+ assert(m);
+
+ HASHMAP_FOREACH(session, m->sessions) {
+ int k;
+
+ k = session_send_lock(session, lock);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static bool session_ready(Session *s) {
+ assert(s);
+
+ /* Returns true when the session is ready, i.e. all jobs we enqueued for it are done (regardless if successful or not) */
+
+ return !s->scope_job &&
+ !s->user->service_job;
+}
+
+int session_send_create_reply(Session *s, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *c = NULL;
+ _cleanup_close_ int fifo_fd = -1;
+ _cleanup_free_ char *p = NULL;
+
+ assert(s);
+
+ /* This is called after the session scope and the user service were successfully created, and finishes where
+ * bus_manager_create_session() left off. */
+
+ if (!s->create_message)
+ return 0;
+
+ if (!sd_bus_error_is_set(error) && !session_ready(s))
+ return 0;
+
+ c = TAKE_PTR(s->create_message);
+ if (error)
+ return sd_bus_reply_method_error(c, error);
+
+ fifo_fd = session_create_fifo(s);
+ if (fifo_fd < 0)
+ return fifo_fd;
+
+ /* Update the session state file before we notify the client about the result. */
+ session_save(s);
+
+ p = session_bus_path(s);
+ if (!p)
+ return -ENOMEM;
+
+ log_debug("Sending reply about created session: "
+ "id=%s object_path=%s uid=%u runtime_path=%s "
+ "session_fd=%d seat=%s vtnr=%u",
+ s->id,
+ p,
+ (uint32_t) s->user->user_record->uid,
+ s->user->runtime_path,
+ fifo_fd,
+ s->seat ? s->seat->id : "",
+ (uint32_t) s->vtnr);
+
+ return sd_bus_reply_method_return(
+ c, "soshusub",
+ s->id,
+ p,
+ s->user->runtime_path,
+ fifo_fd,
+ (uint32_t) s->user->user_record->uid,
+ s->seat ? s->seat->id : "",
+ (uint32_t) s->vtnr,
+ false);
+}
+
+static const sd_bus_vtable session_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("Id", "s", NULL, offsetof(Session, id), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("User", "(uo)", property_get_user, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Name", "s", property_get_name, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("Timestamp", offsetof(Session, timestamp), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("VTNr", "u", NULL, offsetof(Session, vtnr), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Seat", "(so)", property_get_seat, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("TTY", "s", NULL, offsetof(Session, tty), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Display", "s", NULL, offsetof(Session, display), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Remote", "b", bus_property_get_bool, offsetof(Session, remote), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RemoteHost", "s", NULL, offsetof(Session, remote_host), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RemoteUser", "s", NULL, offsetof(Session, remote_user), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Service", "s", NULL, offsetof(Session, service), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Desktop", "s", NULL, offsetof(Session, desktop), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Scope", "s", NULL, offsetof(Session, scope), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Leader", "u", bus_property_get_pid, offsetof(Session, leader), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Audit", "u", NULL, offsetof(Session, audit_id), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Type", "s", property_get_type, offsetof(Session, type), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Class", "s", property_get_class, offsetof(Session, class), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Active", "b", property_get_active, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("State", "s", property_get_state, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("IdleHint", "b", property_get_idle_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("IdleSinceHint", "t", property_get_idle_since_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("IdleSinceHintMonotonic", "t", property_get_idle_since_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("LockedHint", "b", property_get_locked_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+
+ SD_BUS_METHOD("Terminate",
+ NULL,
+ NULL,
+ bus_session_method_terminate,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Activate",
+ NULL,
+ NULL,
+ bus_session_method_activate,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Lock",
+ NULL,
+ NULL,
+ bus_session_method_lock,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Unlock",
+ NULL,
+ NULL,
+ bus_session_method_lock,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetIdleHint",
+ "b",
+ SD_BUS_PARAM(idle),
+ NULL,,
+ method_set_idle_hint,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetLockedHint",
+ "b",
+ SD_BUS_PARAM(locked),
+ NULL,,
+ method_set_locked_hint,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("Kill",
+ "si",
+ SD_BUS_PARAM(who)
+ SD_BUS_PARAM(signal_number),
+ NULL,,
+ bus_session_method_kill,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("TakeControl",
+ "b",
+ SD_BUS_PARAM(force),
+ NULL,,
+ method_take_control,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ReleaseControl",
+ NULL,
+ NULL,
+ method_release_control,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetType",
+ "s",
+ SD_BUS_PARAM(type),
+ NULL,,
+ method_set_type,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("TakeDevice",
+ "uu",
+ SD_BUS_PARAM(major)
+ SD_BUS_PARAM(minor),
+ "hb",
+ SD_BUS_PARAM(fd)
+ SD_BUS_PARAM(inactive),
+ method_take_device,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ReleaseDevice",
+ "uu",
+ SD_BUS_PARAM(major)
+ SD_BUS_PARAM(minor),
+ NULL,,
+ method_release_device,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("PauseDeviceComplete",
+ "uu",
+ SD_BUS_PARAM(major)
+ SD_BUS_PARAM(minor),
+ NULL,,
+ method_pause_device_complete,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetBrightness",
+ "ssu",
+ SD_BUS_PARAM(subsystem)
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(brightness),
+ NULL,,
+ method_set_brightness,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_SIGNAL_WITH_NAMES("PauseDevice",
+ "uus",
+ SD_BUS_PARAM(major)
+ SD_BUS_PARAM(minor)
+ SD_BUS_PARAM(type),
+ 0),
+ SD_BUS_SIGNAL_WITH_NAMES("ResumeDevice",
+ "uuh",
+ SD_BUS_PARAM(major)
+ SD_BUS_PARAM(minor)
+ SD_BUS_PARAM(fd),
+ 0),
+ SD_BUS_SIGNAL("Lock", NULL, 0),
+ SD_BUS_SIGNAL("Unlock", NULL, 0),
+
+ SD_BUS_VTABLE_END
+};
+
+const BusObjectImplementation session_object = {
+ "/org/freedesktop/login1/session",
+ "org.freedesktop.login1.Session",
+ .fallback_vtables = BUS_FALLBACK_VTABLES({session_vtable, session_object_find}),
+ .node_enumerator = session_node_enumerator,
+};
diff --git a/src/login/logind-session-dbus.h b/src/login/logind-session-dbus.h
new file mode 100644
index 0000000..751ca86
--- /dev/null
+++ b/src/login/logind-session-dbus.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "bus-object.h"
+#include "logind-session.h"
+
+extern const BusObjectImplementation session_object;
+
+char *session_bus_path(Session *s);
+
+int session_send_signal(Session *s, bool new_session);
+int session_send_changed(Session *s, const char *properties, ...) _sentinel_;
+int session_send_lock(Session *s, bool lock);
+int session_send_lock_all(Manager *m, bool lock);
+
+int session_send_create_reply(Session *s, sd_bus_error *error);
+
+int bus_session_method_activate(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_session_method_lock(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_session_method_terminate(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_session_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error);
diff --git a/src/login/logind-session-device.c b/src/login/logind-session-device.c
new file mode 100644
index 0000000..f2adb96
--- /dev/null
+++ b/src/login/logind-session-device.c
@@ -0,0 +1,518 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+
+#include "sd-device.h"
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "bus-util.h"
+#include "fd-util.h"
+#include "logind-session-dbus.h"
+#include "logind-session-device.h"
+#include "missing_drm.h"
+#include "missing_input.h"
+#include "parse-util.h"
+#include "util.h"
+
+enum SessionDeviceNotifications {
+ SESSION_DEVICE_RESUME,
+ SESSION_DEVICE_TRY_PAUSE,
+ SESSION_DEVICE_PAUSE,
+ SESSION_DEVICE_RELEASE,
+};
+
+static int session_device_notify(SessionDevice *sd, enum SessionDeviceNotifications type) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *path = NULL;
+ const char *t = NULL;
+ uint32_t major, minor;
+ int r;
+
+ assert(sd);
+
+ major = major(sd->dev);
+ minor = minor(sd->dev);
+
+ if (!sd->session->controller)
+ return 0;
+
+ path = session_bus_path(sd->session);
+ if (!path)
+ return -ENOMEM;
+
+ r = sd_bus_message_new_signal(
+ sd->session->manager->bus,
+ &m, path,
+ "org.freedesktop.login1.Session",
+ (type == SESSION_DEVICE_RESUME) ? "ResumeDevice" : "PauseDevice");
+ if (!m)
+ return r;
+
+ r = sd_bus_message_set_destination(m, sd->session->controller);
+ if (r < 0)
+ return r;
+
+ switch (type) {
+
+ case SESSION_DEVICE_RESUME:
+ r = sd_bus_message_append(m, "uuh", major, minor, sd->fd);
+ if (r < 0)
+ return r;
+ break;
+
+ case SESSION_DEVICE_TRY_PAUSE:
+ t = "pause";
+ break;
+
+ case SESSION_DEVICE_PAUSE:
+ t = "force";
+ break;
+
+ case SESSION_DEVICE_RELEASE:
+ t = "gone";
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ if (t) {
+ r = sd_bus_message_append(m, "uus", major, minor, t);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_send(sd->session->manager->bus, m, NULL);
+}
+
+static void sd_eviocrevoke(int fd) {
+ static bool warned = false;
+
+ assert(fd >= 0);
+
+ if (ioctl(fd, EVIOCREVOKE, NULL) < 0) {
+
+ if (errno == EINVAL && !warned) {
+ log_warning_errno(errno, "Kernel does not support evdev-revocation: %m");
+ warned = true;
+ }
+ }
+}
+
+static int sd_drmsetmaster(int fd) {
+ assert(fd >= 0);
+
+ if (ioctl(fd, DRM_IOCTL_SET_MASTER, 0) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int sd_drmdropmaster(int fd) {
+ assert(fd >= 0);
+
+ if (ioctl(fd, DRM_IOCTL_DROP_MASTER, 0) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int session_device_open(SessionDevice *sd, bool active) {
+ int fd, r;
+
+ assert(sd);
+ assert(sd->type != DEVICE_TYPE_UNKNOWN);
+ assert(sd->node);
+
+ /* open device and try to get an udev_device from it */
+ fd = open(sd->node, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (fd < 0)
+ return -errno;
+
+ switch (sd->type) {
+
+ case DEVICE_TYPE_DRM:
+ if (active) {
+ /* Weird legacy DRM semantics might return an error even though we're master. No way to detect
+ * that so fail at all times and let caller retry in inactive state. */
+ r = sd_drmsetmaster(fd);
+ if (r < 0) {
+ close_nointr(fd);
+ return r;
+ }
+ } else
+ /* DRM-Master is granted to the first user who opens a device automatically (ughh,
+ * racy!). Hence, we just drop DRM-Master in case we were the first. */
+ (void) sd_drmdropmaster(fd);
+ break;
+
+ case DEVICE_TYPE_EVDEV:
+ if (!active)
+ sd_eviocrevoke(fd);
+ break;
+
+ case DEVICE_TYPE_UNKNOWN:
+ default:
+ /* fallback for devices without synchronizations */
+ break;
+ }
+
+ return fd;
+}
+
+static int session_device_start(SessionDevice *sd) {
+ int r;
+
+ assert(sd);
+ assert(session_is_active(sd->session));
+
+ if (sd->active)
+ return 0;
+
+ switch (sd->type) {
+
+ case DEVICE_TYPE_DRM:
+ if (sd->fd < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADF),
+ "Failed to re-activate DRM fd, as the fd was lost (maybe logind restart went wrong?)");
+
+ /* Device is kept open. Simply call drmSetMaster() and hope there is no-one else. In case it fails, we
+ * keep the device paused. Maybe at some point we have a drmStealMaster(). */
+ r = sd_drmsetmaster(sd->fd);
+ if (r < 0)
+ return r;
+ break;
+
+ case DEVICE_TYPE_EVDEV:
+ /* Evdev devices are revoked while inactive. Reopen it and we are fine. */
+ r = session_device_open(sd, true);
+ if (r < 0)
+ return r;
+
+ /* For evdev devices, the file descriptor might be left uninitialized. This might happen while resuming
+ * into a session and logind has been restarted right before. */
+ CLOSE_AND_REPLACE(sd->fd, r);
+ break;
+
+ case DEVICE_TYPE_UNKNOWN:
+ default:
+ /* fallback for devices without synchronizations */
+ break;
+ }
+
+ sd->active = true;
+ return 0;
+}
+
+static void session_device_stop(SessionDevice *sd) {
+ assert(sd);
+
+ if (!sd->active)
+ return;
+
+ switch (sd->type) {
+
+ case DEVICE_TYPE_DRM:
+ if (sd->fd < 0) {
+ log_error("Failed to de-activate DRM fd, as the fd was lost (maybe logind restart went wrong?)");
+ return;
+ }
+
+ /* On DRM devices we simply drop DRM-Master but keep it open.
+ * This allows the user to keep resources allocated. The
+ * CAP_SYS_ADMIN restriction to DRM-Master prevents users from
+ * circumventing this. */
+ sd_drmdropmaster(sd->fd);
+ break;
+
+ case DEVICE_TYPE_EVDEV:
+ /* Revoke access on evdev file-descriptors during deactivation.
+ * This will basically prevent any operations on the fd and
+ * cannot be undone. Good side is: it needs no CAP_SYS_ADMIN
+ * protection this way. */
+ sd_eviocrevoke(sd->fd);
+ break;
+
+ case DEVICE_TYPE_UNKNOWN:
+ default:
+ /* fallback for devices without synchronization */
+ break;
+ }
+
+ sd->active = false;
+}
+
+static DeviceType detect_device_type(sd_device *dev) {
+ const char *sysname, *subsystem;
+ DeviceType type = DEVICE_TYPE_UNKNOWN;
+
+ if (sd_device_get_sysname(dev, &sysname) < 0 ||
+ sd_device_get_subsystem(dev, &subsystem) < 0)
+ return type;
+
+ if (streq(subsystem, "drm")) {
+ if (startswith(sysname, "card"))
+ type = DEVICE_TYPE_DRM;
+ } else if (streq(subsystem, "input")) {
+ if (startswith(sysname, "event"))
+ type = DEVICE_TYPE_EVDEV;
+ }
+
+ return type;
+}
+
+static int session_device_verify(SessionDevice *sd) {
+ _cleanup_(sd_device_unrefp) sd_device *p = NULL;
+ const char *sp, *node;
+ sd_device *dev;
+ int r;
+
+ r = sd_device_new_from_devnum(&p, 'c', sd->dev);
+ if (r < 0)
+ return r;
+
+ dev = p;
+
+ if (sd_device_get_syspath(dev, &sp) < 0 ||
+ sd_device_get_devname(dev, &node) < 0)
+ return -EINVAL;
+
+ /* detect device type so we can find the correct sysfs parent */
+ sd->type = detect_device_type(dev);
+ if (sd->type == DEVICE_TYPE_UNKNOWN)
+ return -ENODEV;
+
+ else if (sd->type == DEVICE_TYPE_EVDEV) {
+ /* for evdev devices we need the parent node as device */
+ if (sd_device_get_parent_with_subsystem_devtype(p, "input", NULL, &dev) < 0)
+ return -ENODEV;
+ if (sd_device_get_syspath(dev, &sp) < 0)
+ return -ENODEV;
+
+ } else if (sd->type != DEVICE_TYPE_DRM)
+ /* Prevent opening unsupported devices. Especially devices of
+ * subsystem "input" must be opened via the evdev node as
+ * we require EVIOCREVOKE. */
+ return -ENODEV;
+
+ /* search for an existing seat device and return it if available */
+ sd->device = hashmap_get(sd->session->manager->devices, sp);
+ if (!sd->device) {
+ /* The caller might have gotten the udev event before we were
+ * able to process it. Hence, fake the "add" event and let the
+ * logind-manager handle the new device. */
+ r = manager_process_seat_device(sd->session->manager, dev);
+ if (r < 0)
+ return r;
+
+ /* if it's still not available, then the device is invalid */
+ sd->device = hashmap_get(sd->session->manager->devices, sp);
+ if (!sd->device)
+ return -ENODEV;
+ }
+
+ if (sd->device->seat != sd->session->seat)
+ return -EPERM;
+
+ sd->node = strdup(node);
+ if (!sd->node)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int session_device_new(Session *s, dev_t dev, bool open_device, SessionDevice **out) {
+ SessionDevice *sd;
+ int r;
+
+ assert(s);
+ assert(out);
+
+ if (!s->seat)
+ return -EPERM;
+
+ sd = new0(SessionDevice, 1);
+ if (!sd)
+ return -ENOMEM;
+
+ sd->session = s;
+ sd->dev = dev;
+ sd->fd = -1;
+ sd->type = DEVICE_TYPE_UNKNOWN;
+
+ r = session_device_verify(sd);
+ if (r < 0)
+ goto error;
+
+ r = hashmap_put(s->devices, &sd->dev, sd);
+ if (r < 0)
+ goto error;
+
+ if (open_device) {
+ /* Open the device for the first time. We need a valid fd to pass back
+ * to the caller. If the session is not active, this _might_ immediately
+ * revoke access and thus invalidate the fd. But this is still needed
+ * to pass a valid fd back. */
+ sd->active = session_is_active(s);
+ r = session_device_open(sd, sd->active);
+ if (r < 0) {
+ /* EINVAL _may_ mean a master is active; retry inactive */
+ if (sd->active && r == -EINVAL) {
+ sd->active = false;
+ r = session_device_open(sd, false);
+ }
+ if (r < 0)
+ goto error;
+ }
+ sd->fd = r;
+ }
+
+ LIST_PREPEND(sd_by_device, sd->device->session_devices, sd);
+
+ *out = sd;
+ return 0;
+
+error:
+ hashmap_remove(s->devices, &sd->dev);
+ free(sd->node);
+ free(sd);
+ return r;
+}
+
+void session_device_free(SessionDevice *sd) {
+ assert(sd);
+
+ /* Make sure to remove the pushed fd. */
+ if (sd->pushed_fd)
+ (void) sd_notifyf(false,
+ "FDSTOREREMOVE=1\n"
+ "FDNAME=session-%s-device-%u-%u",
+ sd->session->id, major(sd->dev), minor(sd->dev));
+
+ session_device_stop(sd);
+ session_device_notify(sd, SESSION_DEVICE_RELEASE);
+ safe_close(sd->fd);
+
+ LIST_REMOVE(sd_by_device, sd->device->session_devices, sd);
+
+ hashmap_remove(sd->session->devices, &sd->dev);
+
+ free(sd->node);
+ free(sd);
+}
+
+void session_device_complete_pause(SessionDevice *sd) {
+ SessionDevice *iter;
+
+ if (!sd->active)
+ return;
+
+ session_device_stop(sd);
+
+ /* if not all devices are paused, wait for further completion events */
+ HASHMAP_FOREACH(iter, sd->session->devices)
+ if (iter->active)
+ return;
+
+ /* complete any pending session switch */
+ seat_complete_switch(sd->session->seat);
+}
+
+void session_device_resume_all(Session *s) {
+ SessionDevice *sd;
+
+ assert(s);
+
+ HASHMAP_FOREACH(sd, s->devices) {
+ if (sd->active)
+ continue;
+
+ if (session_device_start(sd) < 0)
+ continue;
+ if (session_device_save(sd) < 0)
+ continue;
+
+ session_device_notify(sd, SESSION_DEVICE_RESUME);
+ }
+}
+
+void session_device_pause_all(Session *s) {
+ SessionDevice *sd;
+
+ assert(s);
+
+ HASHMAP_FOREACH(sd, s->devices) {
+ if (!sd->active)
+ continue;
+
+ session_device_stop(sd);
+ session_device_notify(sd, SESSION_DEVICE_PAUSE);
+ }
+}
+
+unsigned session_device_try_pause_all(Session *s) {
+ unsigned num_pending = 0;
+ SessionDevice *sd;
+
+ assert(s);
+
+ HASHMAP_FOREACH(sd, s->devices) {
+ if (!sd->active)
+ continue;
+
+ session_device_notify(sd, SESSION_DEVICE_TRY_PAUSE);
+ num_pending++;
+ }
+
+ return num_pending;
+}
+
+int session_device_save(SessionDevice *sd) {
+ _cleanup_free_ char *m = NULL;
+ const char *id;
+ int r;
+
+ assert(sd);
+
+ /* Store device fd in PID1. It will send it back to us on restart so revocation will continue to work. To make
+ * things simple, send fds for all type of devices even if they don't support the revocation mechanism so we
+ * don't have to handle them differently later.
+ *
+ * Note: for device supporting revocation, PID1 will drop a stored fd automatically if the corresponding device
+ * is revoked. */
+
+ if (sd->pushed_fd)
+ return 0;
+
+ /* Session ID does not contain separators. */
+ id = sd->session->id;
+ assert(*(id + strcspn(id, "-\n")) == '\0');
+
+ r = asprintf(&m, "FDSTORE=1\n"
+ "FDNAME=session-%s-device-%u-%u\n",
+ id, major(sd->dev), minor(sd->dev));
+ if (r < 0)
+ return r;
+
+ r = sd_pid_notify_with_fds(0, false, m, &sd->fd, 1);
+ if (r < 0)
+ return r;
+
+ sd->pushed_fd = true;
+ return 1;
+}
+
+void session_device_attach_fd(SessionDevice *sd, int fd, bool active) {
+ assert(fd >= 0);
+ assert(sd);
+ assert(sd->fd < 0);
+ assert(!sd->active);
+
+ sd->fd = fd;
+ sd->pushed_fd = true;
+ sd->active = active;
+}
diff --git a/src/login/logind-session-device.h b/src/login/logind-session-device.h
new file mode 100644
index 0000000..4da3fe6
--- /dev/null
+++ b/src/login/logind-session-device.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef enum DeviceType DeviceType;
+typedef struct SessionDevice SessionDevice;
+
+#include "list.h"
+#include "logind.h"
+
+enum DeviceType {
+ DEVICE_TYPE_UNKNOWN,
+ DEVICE_TYPE_DRM,
+ DEVICE_TYPE_EVDEV,
+};
+
+struct SessionDevice {
+ Session *session;
+ Device *device;
+
+ dev_t dev;
+ char *node;
+ int fd;
+ DeviceType type:3;
+ bool active:1;
+ bool pushed_fd:1;
+
+ LIST_FIELDS(struct SessionDevice, sd_by_device);
+};
+
+int session_device_new(Session *s, dev_t dev, bool open_device, SessionDevice **out);
+void session_device_free(SessionDevice *sd);
+void session_device_complete_pause(SessionDevice *sd);
+
+void session_device_resume_all(Session *s);
+void session_device_pause_all(Session *s);
+unsigned session_device_try_pause_all(Session *s);
+
+int session_device_save(SessionDevice *sd);
+void session_device_attach_fd(SessionDevice *sd, int fd, bool active);
diff --git a/src/login/logind-session.c b/src/login/logind-session.c
new file mode 100644
index 0000000..34fcde9
--- /dev/null
+++ b/src/login/logind-session.c
@@ -0,0 +1,1461 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/kd.h>
+#include <linux/vt.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "audit-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "env-file.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "logind-dbus.h"
+#include "logind-seat-dbus.h"
+#include "logind-session-dbus.h"
+#include "logind-session.h"
+#include "logind-user-dbus.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "serialize.h"
+#include "string-table.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+
+#define RELEASE_USEC (20*USEC_PER_SEC)
+
+static void session_remove_fifo(Session *s);
+static void session_restore_vt(Session *s);
+
+int session_new(Session **ret, Manager *m, const char *id) {
+ _cleanup_(session_freep) Session *s = NULL;
+ int r;
+
+ assert(ret);
+ assert(m);
+ assert(id);
+
+ if (!session_id_valid(id))
+ return -EINVAL;
+
+ s = new(Session, 1);
+ if (!s)
+ return -ENOMEM;
+
+ *s = (Session) {
+ .manager = m,
+ .fifo_fd = -1,
+ .vtfd = -1,
+ .audit_id = AUDIT_SESSION_INVALID,
+ .tty_validity = _TTY_VALIDITY_INVALID,
+ };
+
+ s->state_file = path_join("/run/systemd/sessions", id);
+ if (!s->state_file)
+ return -ENOMEM;
+
+ s->id = basename(s->state_file);
+
+ s->devices = hashmap_new(&devt_hash_ops);
+ if (!s->devices)
+ return -ENOMEM;
+
+ r = hashmap_put(m->sessions, s->id, s);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
+
+Session* session_free(Session *s) {
+ SessionDevice *sd;
+
+ if (!s)
+ return NULL;
+
+ if (s->in_gc_queue)
+ LIST_REMOVE(gc_queue, s->manager->session_gc_queue, s);
+
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+
+ session_drop_controller(s);
+
+ while ((sd = hashmap_first(s->devices)))
+ session_device_free(sd);
+
+ hashmap_free(s->devices);
+
+ if (s->user) {
+ LIST_REMOVE(sessions_by_user, s->user->sessions, s);
+
+ if (s->user->display == s)
+ s->user->display = NULL;
+
+ user_update_last_session_timer(s->user);
+ }
+
+ if (s->seat) {
+ if (s->seat->active == s)
+ s->seat->active = NULL;
+ if (s->seat->pending_switch == s)
+ s->seat->pending_switch = NULL;
+
+ seat_evict_position(s->seat, s);
+ LIST_REMOVE(sessions_by_seat, s->seat->sessions, s);
+ }
+
+ if (s->scope) {
+ hashmap_remove(s->manager->session_units, s->scope);
+ free(s->scope);
+ }
+
+ if (pid_is_valid(s->leader))
+ (void) hashmap_remove_value(s->manager->sessions_by_leader, PID_TO_PTR(s->leader), s);
+
+ free(s->scope_job);
+
+ sd_bus_message_unref(s->create_message);
+
+ free(s->tty);
+ free(s->display);
+ free(s->remote_host);
+ free(s->remote_user);
+ free(s->service);
+ free(s->desktop);
+
+ hashmap_remove(s->manager->sessions, s->id);
+
+ sd_event_source_unref(s->fifo_event_source);
+ safe_close(s->fifo_fd);
+
+ /* Note that we remove neither the state file nor the fifo path here, since we want both to survive
+ * daemon restarts */
+ free(s->state_file);
+ free(s->fifo_path);
+
+ return mfree(s);
+}
+
+void session_set_user(Session *s, User *u) {
+ assert(s);
+ assert(!s->user);
+
+ s->user = u;
+ LIST_PREPEND(sessions_by_user, u->sessions, s);
+
+ user_update_last_session_timer(u);
+}
+
+int session_set_leader(Session *s, pid_t pid) {
+ int r;
+
+ assert(s);
+
+ if (!pid_is_valid(pid))
+ return -EINVAL;
+
+ if (s->leader == pid)
+ return 0;
+
+ r = hashmap_put(s->manager->sessions_by_leader, PID_TO_PTR(pid), s);
+ if (r < 0)
+ return r;
+
+ if (pid_is_valid(s->leader))
+ (void) hashmap_remove_value(s->manager->sessions_by_leader, PID_TO_PTR(s->leader), s);
+
+ s->leader = pid;
+ (void) audit_session_from_pid(pid, &s->audit_id);
+
+ return 1;
+}
+
+static void session_save_devices(Session *s, FILE *f) {
+ SessionDevice *sd;
+
+ if (!hashmap_isempty(s->devices)) {
+ fprintf(f, "DEVICES=");
+ HASHMAP_FOREACH(sd, s->devices)
+ fprintf(f, "%u:%u ", major(sd->dev), minor(sd->dev));
+ fprintf(f, "\n");
+ }
+}
+
+int session_save(Session *s) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(s);
+
+ if (!s->user)
+ return -ESTALE;
+
+ if (!s->started)
+ return 0;
+
+ r = mkdir_safe_label("/run/systemd/sessions", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ goto fail;
+
+ r = fopen_temporary(s->state_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ (void) fchmod(fileno(f), 0644);
+
+ fprintf(f,
+ "# This is private data. Do not parse.\n"
+ "UID="UID_FMT"\n"
+ "USER=%s\n"
+ "ACTIVE=%i\n"
+ "IS_DISPLAY=%i\n"
+ "STATE=%s\n"
+ "REMOTE=%i\n",
+ s->user->user_record->uid,
+ s->user->user_record->user_name,
+ session_is_active(s),
+ s->user->display == s,
+ session_state_to_string(session_get_state(s)),
+ s->remote);
+
+ if (s->type >= 0)
+ fprintf(f, "TYPE=%s\n", session_type_to_string(s->type));
+
+ if (s->original_type >= 0)
+ fprintf(f, "ORIGINAL_TYPE=%s\n", session_type_to_string(s->original_type));
+
+ if (s->class >= 0)
+ fprintf(f, "CLASS=%s\n", session_class_to_string(s->class));
+
+ if (s->scope)
+ fprintf(f, "SCOPE=%s\n", s->scope);
+ if (s->scope_job)
+ fprintf(f, "SCOPE_JOB=%s\n", s->scope_job);
+
+ if (s->fifo_path)
+ fprintf(f, "FIFO=%s\n", s->fifo_path);
+
+ if (s->seat)
+ fprintf(f, "SEAT=%s\n", s->seat->id);
+
+ if (s->tty)
+ fprintf(f, "TTY=%s\n", s->tty);
+
+ if (s->tty_validity >= 0)
+ fprintf(f, "TTY_VALIDITY=%s\n", tty_validity_to_string(s->tty_validity));
+
+ if (s->display)
+ fprintf(f, "DISPLAY=%s\n", s->display);
+
+ if (s->remote_host) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(s->remote_host);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "REMOTE_HOST=%s\n", escaped);
+ }
+
+ if (s->remote_user) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(s->remote_user);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "REMOTE_USER=%s\n", escaped);
+ }
+
+ if (s->service) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(s->service);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "SERVICE=%s\n", escaped);
+ }
+
+ if (s->desktop) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(s->desktop);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "DESKTOP=%s\n", escaped);
+ }
+
+ if (s->seat && seat_has_vts(s->seat))
+ fprintf(f, "VTNR=%u\n", s->vtnr);
+
+ if (!s->vtnr)
+ fprintf(f, "POSITION=%u\n", s->position);
+
+ if (pid_is_valid(s->leader))
+ fprintf(f, "LEADER="PID_FMT"\n", s->leader);
+
+ if (audit_session_is_valid(s->audit_id))
+ fprintf(f, "AUDIT=%"PRIu32"\n", s->audit_id);
+
+ if (dual_timestamp_is_set(&s->timestamp))
+ fprintf(f,
+ "REALTIME="USEC_FMT"\n"
+ "MONOTONIC="USEC_FMT"\n",
+ s->timestamp.realtime,
+ s->timestamp.monotonic);
+
+ if (s->controller) {
+ fprintf(f, "CONTROLLER=%s\n", s->controller);
+ session_save_devices(s, f);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, s->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(s->state_file);
+
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return log_error_errno(r, "Failed to save session data %s: %m", s->state_file);
+}
+
+static int session_load_devices(Session *s, const char *devices) {
+ const char *p;
+ int r = 0;
+
+ assert(s);
+
+ for (p = devices;;) {
+ _cleanup_free_ char *word = NULL;
+ SessionDevice *sd;
+ dev_t dev;
+ int k;
+
+ k = extract_first_word(&p, &word, NULL, 0);
+ if (k == 0)
+ break;
+ if (k < 0) {
+ r = k;
+ break;
+ }
+
+ k = parse_dev(word, &dev);
+ if (k < 0) {
+ r = k;
+ continue;
+ }
+
+ /* The file descriptors for loaded devices will be reattached later. */
+ k = session_device_new(s, dev, false, &sd);
+ if (k < 0)
+ r = k;
+ }
+
+ if (r < 0)
+ log_error_errno(r, "Loading session devices for session %s failed: %m", s->id);
+
+ return r;
+}
+
+int session_load(Session *s) {
+ _cleanup_free_ char *remote = NULL,
+ *seat = NULL,
+ *tty_validity = NULL,
+ *vtnr = NULL,
+ *state = NULL,
+ *position = NULL,
+ *leader = NULL,
+ *type = NULL,
+ *original_type = NULL,
+ *class = NULL,
+ *uid = NULL,
+ *realtime = NULL,
+ *monotonic = NULL,
+ *controller = NULL,
+ *active = NULL,
+ *devices = NULL,
+ *is_display = NULL;
+
+ int k, r;
+
+ assert(s);
+
+ r = parse_env_file(NULL, s->state_file,
+ "REMOTE", &remote,
+ "SCOPE", &s->scope,
+ "SCOPE_JOB", &s->scope_job,
+ "FIFO", &s->fifo_path,
+ "SEAT", &seat,
+ "TTY", &s->tty,
+ "TTY_VALIDITY", &tty_validity,
+ "DISPLAY", &s->display,
+ "REMOTE_HOST", &s->remote_host,
+ "REMOTE_USER", &s->remote_user,
+ "SERVICE", &s->service,
+ "DESKTOP", &s->desktop,
+ "VTNR", &vtnr,
+ "STATE", &state,
+ "POSITION", &position,
+ "LEADER", &leader,
+ "TYPE", &type,
+ "ORIGINAL_TYPE", &original_type,
+ "CLASS", &class,
+ "UID", &uid,
+ "REALTIME", &realtime,
+ "MONOTONIC", &monotonic,
+ "CONTROLLER", &controller,
+ "ACTIVE", &active,
+ "DEVICES", &devices,
+ "IS_DISPLAY", &is_display);
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to read %s: %m", s->state_file);
+
+ if (!s->user) {
+ uid_t u;
+ User *user;
+
+ if (!uid)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT),
+ "UID not specified for session %s",
+ s->id);
+
+ r = parse_uid(uid, &u);
+ if (r < 0) {
+ log_error("Failed to parse UID value %s for session %s.", uid, s->id);
+ return r;
+ }
+
+ user = hashmap_get(s->manager->users, UID_TO_PTR(u));
+ if (!user)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT),
+ "User of session %s not known.",
+ s->id);
+
+ session_set_user(s, user);
+ }
+
+ if (remote) {
+ k = parse_boolean(remote);
+ if (k >= 0)
+ s->remote = k;
+ }
+
+ if (vtnr)
+ safe_atou(vtnr, &s->vtnr);
+
+ if (seat && !s->seat) {
+ Seat *o;
+
+ o = hashmap_get(s->manager->seats, seat);
+ if (o)
+ r = seat_attach_session(o, s);
+ if (!o || r < 0)
+ log_error("Cannot attach session %s to seat %s", s->id, seat);
+ }
+
+ if (!s->seat || !seat_has_vts(s->seat))
+ s->vtnr = 0;
+
+ if (position && s->seat) {
+ unsigned npos;
+
+ safe_atou(position, &npos);
+ seat_claim_position(s->seat, s, npos);
+ }
+
+ if (tty_validity) {
+ TTYValidity v;
+
+ v = tty_validity_from_string(tty_validity);
+ if (v < 0)
+ log_debug("Failed to parse TTY validity: %s", tty_validity);
+ else
+ s->tty_validity = v;
+ }
+
+ if (leader) {
+ pid_t pid;
+
+ r = parse_pid(leader, &pid);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse leader PID of session: %s", leader);
+ else {
+ r = session_set_leader(s, pid);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set session leader PID, ignoring: %m");
+ }
+ }
+
+ if (type) {
+ SessionType t;
+
+ t = session_type_from_string(type);
+ if (t >= 0)
+ s->type = t;
+ }
+
+ if (original_type) {
+ SessionType ot;
+
+ ot = session_type_from_string(original_type);
+ if (ot >= 0)
+ s->original_type = ot;
+ } else
+ /* Pre-v246 compat: initialize original_type if not set in the state file */
+ s->original_type = s->type;
+
+ if (class) {
+ SessionClass c;
+
+ c = session_class_from_string(class);
+ if (c >= 0)
+ s->class = c;
+ }
+
+ if (state && streq(state, "closing"))
+ s->stopping = true;
+
+ if (s->fifo_path) {
+ int fd;
+
+ /* If we open an unopened pipe for reading we will not
+ get an EOF. to trigger an EOF we hence open it for
+ writing, but close it right away which then will
+ trigger the EOF. This will happen immediately if no
+ other process has the FIFO open for writing, i. e.
+ when the session died before logind (re)started. */
+
+ fd = session_create_fifo(s);
+ safe_close(fd);
+ }
+
+ if (realtime)
+ (void) deserialize_usec(realtime, &s->timestamp.realtime);
+ if (monotonic)
+ (void) deserialize_usec(monotonic, &s->timestamp.monotonic);
+
+ if (active) {
+ k = parse_boolean(active);
+ if (k >= 0)
+ s->was_active = k;
+ }
+
+ if (is_display) {
+ /* Note that when enumerating users are loaded before sessions, hence the display session to use is
+ * something we have to store along with the session and not the user, as in that case we couldn't
+ * apply it at the time we load the user. */
+
+ k = parse_boolean(is_display);
+ if (k < 0)
+ log_warning_errno(k, "Failed to parse IS_DISPLAY session property: %m");
+ else if (k > 0)
+ s->user->display = s;
+ }
+
+ if (controller) {
+ if (bus_name_has_owner(s->manager->bus, controller, NULL) > 0) {
+ session_set_controller(s, controller, false, false);
+ session_load_devices(s, devices);
+ } else
+ session_restore_vt(s);
+ }
+
+ return r;
+}
+
+int session_activate(Session *s) {
+ unsigned num_pending;
+
+ assert(s);
+ assert(s->user);
+
+ if (!s->seat)
+ return -EOPNOTSUPP;
+
+ if (s->seat->active == s)
+ return 0;
+
+ /* on seats with VTs, we let VTs manage session-switching */
+ if (seat_has_vts(s->seat)) {
+ if (s->vtnr == 0)
+ return -EOPNOTSUPP;
+
+ return chvt(s->vtnr);
+ }
+
+ /* On seats without VTs, we implement session-switching in logind. We
+ * try to pause all session-devices and wait until the session
+ * controller acknowledged them. Once all devices are asleep, we simply
+ * switch the active session and be done.
+ * We save the session we want to switch to in seat->pending_switch and
+ * seat_complete_switch() will perform the final switch. */
+
+ s->seat->pending_switch = s;
+
+ /* if no devices are running, immediately perform the session switch */
+ num_pending = session_device_try_pause_all(s);
+ if (!num_pending)
+ seat_complete_switch(s->seat);
+
+ return 0;
+}
+
+static int session_start_scope(Session *s, sd_bus_message *properties, sd_bus_error *error) {
+ int r;
+
+ assert(s);
+ assert(s->user);
+
+ if (!s->scope) {
+ _cleanup_free_ char *scope = NULL;
+ const char *description;
+
+ s->scope_job = mfree(s->scope_job);
+
+ scope = strjoin("session-", s->id, ".scope");
+ if (!scope)
+ return log_oom();
+
+ description = strjoina("Session ", s->id, " of user ", s->user->user_record->user_name);
+
+ r = manager_start_scope(
+ s->manager,
+ scope,
+ s->leader,
+ s->user->slice,
+ description,
+ /* These two have StopWhenUnneeded= set, hence add a dep towards them */
+ STRV_MAKE(s->user->runtime_dir_service,
+ s->user->service),
+ /* And order us after some more */
+ STRV_MAKE("systemd-logind.service",
+ "systemd-user-sessions.service",
+ s->user->runtime_dir_service,
+ s->user->service),
+ user_record_home_directory(s->user->user_record),
+ properties,
+ error,
+ &s->scope_job);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start session scope %s: %s",
+ scope, bus_error_message(error, r));
+
+ s->scope = TAKE_PTR(scope);
+ }
+
+ (void) hashmap_put(s->manager->session_units, s->scope, s);
+
+ return 0;
+}
+
+int session_start(Session *s, sd_bus_message *properties, sd_bus_error *error) {
+ int r;
+
+ assert(s);
+
+ if (!s->user)
+ return -ESTALE;
+
+ if (s->stopping)
+ return -EINVAL;
+
+ if (s->started)
+ return 0;
+
+ r = user_start(s->user);
+ if (r < 0)
+ return r;
+
+ r = session_start_scope(s, properties, error);
+ if (r < 0)
+ return r;
+
+ log_struct(s->class == SESSION_BACKGROUND ? LOG_DEBUG : LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_SESSION_START_STR,
+ "SESSION_ID=%s", s->id,
+ "USER_ID=%s", s->user->user_record->user_name,
+ "LEADER="PID_FMT, s->leader,
+ LOG_MESSAGE("New session %s of user %s.", s->id, s->user->user_record->user_name));
+
+ if (!dual_timestamp_is_set(&s->timestamp))
+ dual_timestamp_get(&s->timestamp);
+
+ if (s->seat)
+ seat_read_active_vt(s->seat);
+
+ s->started = true;
+
+ user_elect_display(s->user);
+
+ /* Save data */
+ session_save(s);
+ user_save(s->user);
+ if (s->seat)
+ seat_save(s->seat);
+
+ /* Send signals */
+ session_send_signal(s, true);
+ user_send_changed(s->user, "Display", NULL);
+
+ if (s->seat && s->seat->active == s)
+ seat_send_changed(s->seat, "ActiveSession", NULL);
+
+ return 0;
+}
+
+static int session_stop_scope(Session *s, bool force) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(s);
+
+ if (!s->scope)
+ return 0;
+
+ /* Let's always abandon the scope first. This tells systemd that we are not interested anymore, and everything
+ * that is left in the scope is "left-over". Informing systemd about this has the benefit that it will log
+ * when killing any processes left after this point. */
+ r = manager_abandon_scope(s->manager, s->scope, &error);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to abandon session scope, ignoring: %s", bus_error_message(&error, r));
+ sd_bus_error_free(&error);
+ }
+
+ s->scope_job = mfree(s->scope_job);
+
+ /* Optionally, let's kill everything that's left now. */
+ if (force ||
+ (s->user->user_record->kill_processes != 0 &&
+ (s->user->user_record->kill_processes > 0 ||
+ manager_shall_kill(s->manager, s->user->user_record->user_name)))) {
+
+ r = manager_stop_unit(s->manager, s->scope, force ? "replace" : "fail", &error, &s->scope_job);
+ if (r < 0) {
+ if (force)
+ return log_error_errno(r, "Failed to stop session scope: %s", bus_error_message(&error, r));
+
+ log_warning_errno(r, "Failed to stop session scope, ignoring: %s", bus_error_message(&error, r));
+ }
+ } else {
+
+ /* With no killing, this session is allowed to persist in "closing" state indefinitely.
+ * Therefore session stop and session removal may be two distinct events.
+ * Session stop is quite significant on its own, let's log it. */
+ log_struct(s->class == SESSION_BACKGROUND ? LOG_DEBUG : LOG_INFO,
+ "SESSION_ID=%s", s->id,
+ "USER_ID=%s", s->user->user_record->user_name,
+ "LEADER="PID_FMT, s->leader,
+ LOG_MESSAGE("Session %s logged out. Waiting for processes to exit.", s->id));
+ }
+
+ return 0;
+}
+
+int session_stop(Session *s, bool force) {
+ int r;
+
+ assert(s);
+
+ /* This is called whenever we begin with tearing down a session record. It's called in four cases: explicit API
+ * request via the bus (either directly for the session object or for the seat or user object this session
+ * belongs to; 'force' is true), or due to automatic GC (i.e. scope vanished; 'force' is false), or because the
+ * session FIFO saw an EOF ('force' is false), or because the release timer hit ('force' is false). */
+
+ if (!s->user)
+ return -ESTALE;
+ if (!s->started)
+ return 0;
+ if (s->stopping)
+ return 0;
+
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+
+ if (s->seat)
+ seat_evict_position(s->seat, s);
+
+ /* We are going down, don't care about FIFOs anymore */
+ session_remove_fifo(s);
+
+ /* Kill cgroup */
+ r = session_stop_scope(s, force);
+
+ s->stopping = true;
+
+ user_elect_display(s->user);
+
+ session_save(s);
+ user_save(s->user);
+
+ return r;
+}
+
+int session_finalize(Session *s) {
+ SessionDevice *sd;
+
+ assert(s);
+
+ if (!s->user)
+ return -ESTALE;
+
+ if (s->started)
+ log_struct(s->class == SESSION_BACKGROUND ? LOG_DEBUG : LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_SESSION_STOP_STR,
+ "SESSION_ID=%s", s->id,
+ "USER_ID=%s", s->user->user_record->user_name,
+ "LEADER="PID_FMT, s->leader,
+ LOG_MESSAGE("Removed session %s.", s->id));
+
+ s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+
+ if (s->seat)
+ seat_evict_position(s->seat, s);
+
+ /* Kill session devices */
+ while ((sd = hashmap_first(s->devices)))
+ session_device_free(sd);
+
+ (void) unlink(s->state_file);
+ session_add_to_gc_queue(s);
+ user_add_to_gc_queue(s->user);
+
+ if (s->started) {
+ session_send_signal(s, false);
+ s->started = false;
+ }
+
+ if (s->seat) {
+ if (s->seat->active == s)
+ seat_set_active(s->seat, NULL);
+
+ seat_save(s->seat);
+ }
+
+ user_save(s->user);
+ user_send_changed(s->user, "Display", NULL);
+
+ return 0;
+}
+
+static int release_timeout_callback(sd_event_source *es, uint64_t usec, void *userdata) {
+ Session *s = userdata;
+
+ assert(es);
+ assert(s);
+
+ session_stop(s, /* force = */ false);
+ return 0;
+}
+
+int session_release(Session *s) {
+ assert(s);
+
+ if (!s->started || s->stopping)
+ return 0;
+
+ if (s->timer_event_source)
+ return 0;
+
+ return sd_event_add_time_relative(
+ s->manager->event,
+ &s->timer_event_source,
+ CLOCK_MONOTONIC,
+ RELEASE_USEC, 0,
+ release_timeout_callback, s);
+}
+
+bool session_is_active(Session *s) {
+ assert(s);
+
+ if (!s->seat)
+ return true;
+
+ return s->seat->active == s;
+}
+
+static int get_tty_atime(const char *tty, usec_t *atime) {
+ _cleanup_free_ char *p = NULL;
+ struct stat st;
+
+ assert(tty);
+ assert(atime);
+
+ if (!path_is_absolute(tty)) {
+ p = path_join("/dev", tty);
+ if (!p)
+ return -ENOMEM;
+
+ tty = p;
+ } else if (!path_startswith(tty, "/dev/"))
+ return -ENOENT;
+
+ if (lstat(tty, &st) < 0)
+ return -errno;
+
+ *atime = timespec_load(&st.st_atim);
+ return 0;
+}
+
+static int get_process_ctty_atime(pid_t pid, usec_t *atime) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(pid > 0);
+ assert(atime);
+
+ r = get_ctty(pid, NULL, &p);
+ if (r < 0)
+ return r;
+
+ return get_tty_atime(p, atime);
+}
+
+int session_get_idle_hint(Session *s, dual_timestamp *t) {
+ usec_t atime = 0;
+ int r;
+
+ assert(s);
+
+ /* Graphical sessions have an explicit idle hint */
+ if (SESSION_TYPE_IS_GRAPHICAL(s->type)) {
+ if (t)
+ *t = s->idle_hint_timestamp;
+
+ return s->idle_hint;
+ }
+
+ /* For sessions with an explicitly configured tty, let's check its atime */
+ if (s->tty) {
+ r = get_tty_atime(s->tty, &atime);
+ if (r >= 0)
+ goto found_atime;
+ }
+
+ /* For sessions with a leader but no explicitly configured tty, let's check the controlling tty of
+ * the leader */
+ if (pid_is_valid(s->leader)) {
+ r = get_process_ctty_atime(s->leader, &atime);
+ if (r >= 0)
+ goto found_atime;
+ }
+
+ if (t)
+ *t = DUAL_TIMESTAMP_NULL;
+
+ return false;
+
+found_atime:
+ if (t)
+ dual_timestamp_from_realtime(t, atime);
+
+ if (s->manager->idle_action_usec <= 0)
+ return false;
+
+ return usec_add(atime, s->manager->idle_action_usec) <= now(CLOCK_REALTIME);
+}
+
+int session_set_idle_hint(Session *s, bool b) {
+ assert(s);
+
+ if (!SESSION_TYPE_IS_GRAPHICAL(s->type))
+ return -ENOTTY;
+
+ if (s->idle_hint == b)
+ return 0;
+
+ s->idle_hint = b;
+ dual_timestamp_get(&s->idle_hint_timestamp);
+
+ session_send_changed(s, "IdleHint", "IdleSinceHint", "IdleSinceHintMonotonic", NULL);
+
+ if (s->seat)
+ seat_send_changed(s->seat, "IdleHint", "IdleSinceHint", "IdleSinceHintMonotonic", NULL);
+
+ user_send_changed(s->user, "IdleHint", "IdleSinceHint", "IdleSinceHintMonotonic", NULL);
+ manager_send_changed(s->manager, "IdleHint", "IdleSinceHint", "IdleSinceHintMonotonic", NULL);
+
+ return 1;
+}
+
+int session_get_locked_hint(Session *s) {
+ assert(s);
+
+ return s->locked_hint;
+}
+
+void session_set_locked_hint(Session *s, bool b) {
+ assert(s);
+
+ if (s->locked_hint == b)
+ return;
+
+ s->locked_hint = b;
+
+ session_send_changed(s, "LockedHint", NULL);
+}
+
+void session_set_type(Session *s, SessionType t) {
+ assert(s);
+
+ if (s->type == t)
+ return;
+
+ s->type = t;
+ session_save(s);
+
+ session_send_changed(s, "Type", NULL);
+}
+
+static int session_dispatch_fifo(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ Session *s = userdata;
+
+ assert(s);
+ assert(s->fifo_fd == fd);
+
+ /* EOF on the FIFO means the session died abnormally. */
+
+ session_remove_fifo(s);
+ session_stop(s, /* force = */ false);
+
+ return 1;
+}
+
+int session_create_fifo(Session *s) {
+ int r;
+
+ assert(s);
+
+ /* Create FIFO */
+ if (!s->fifo_path) {
+ r = mkdir_safe_label("/run/systemd/sessions", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ return r;
+
+ s->fifo_path = strjoin("/run/systemd/sessions/", s->id, ".ref");
+ if (!s->fifo_path)
+ return -ENOMEM;
+
+ if (mkfifo(s->fifo_path, 0600) < 0 && errno != EEXIST)
+ return -errno;
+ }
+
+ /* Open reading side */
+ if (s->fifo_fd < 0) {
+ s->fifo_fd = open(s->fifo_path, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ if (s->fifo_fd < 0)
+ return -errno;
+ }
+
+ if (!s->fifo_event_source) {
+ r = sd_event_add_io(s->manager->event, &s->fifo_event_source, s->fifo_fd, 0, session_dispatch_fifo, s);
+ if (r < 0)
+ return r;
+
+ /* Let's make sure we noticed dead sessions before we process new bus requests (which might
+ * create new sessions). */
+ r = sd_event_source_set_priority(s->fifo_event_source, SD_EVENT_PRIORITY_NORMAL-10);
+ if (r < 0)
+ return r;
+ }
+
+ /* Open writing side */
+ r = open(s->fifo_path, O_WRONLY|O_CLOEXEC|O_NONBLOCK);
+ if (r < 0)
+ return -errno;
+
+ return r;
+}
+
+static void session_remove_fifo(Session *s) {
+ assert(s);
+
+ s->fifo_event_source = sd_event_source_unref(s->fifo_event_source);
+ s->fifo_fd = safe_close(s->fifo_fd);
+
+ if (s->fifo_path) {
+ (void) unlink(s->fifo_path);
+ s->fifo_path = mfree(s->fifo_path);
+ }
+}
+
+bool session_may_gc(Session *s, bool drop_not_started) {
+ int r;
+
+ assert(s);
+
+ if (drop_not_started && !s->started)
+ return true;
+
+ if (!s->user)
+ return true;
+
+ if (s->fifo_fd >= 0) {
+ if (pipe_eof(s->fifo_fd) <= 0)
+ return false;
+ }
+
+ if (s->scope_job) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ r = manager_job_is_active(s->manager, s->scope_job, &error);
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine whether job '%s' is pending, ignoring: %s", s->scope_job, bus_error_message(&error, r));
+ if (r != 0)
+ return false;
+ }
+
+ if (s->scope) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ r = manager_unit_is_active(s->manager, s->scope, &error);
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine whether unit '%s' is active, ignoring: %s", s->scope, bus_error_message(&error, r));
+ if (r != 0)
+ return false;
+ }
+
+ return true;
+}
+
+void session_add_to_gc_queue(Session *s) {
+ assert(s);
+
+ if (s->in_gc_queue)
+ return;
+
+ LIST_PREPEND(gc_queue, s->manager->session_gc_queue, s);
+ s->in_gc_queue = true;
+}
+
+SessionState session_get_state(Session *s) {
+ assert(s);
+
+ /* always check closing first */
+ if (s->stopping || s->timer_event_source)
+ return SESSION_CLOSING;
+
+ if (s->scope_job || s->fifo_fd < 0)
+ return SESSION_OPENING;
+
+ if (session_is_active(s))
+ return SESSION_ACTIVE;
+
+ return SESSION_ONLINE;
+}
+
+int session_kill(Session *s, KillWho who, int signo) {
+ assert(s);
+
+ if (!s->scope)
+ return -ESRCH;
+
+ return manager_kill_unit(s->manager, s->scope, who, signo, NULL);
+}
+
+static int session_open_vt(Session *s) {
+ char path[sizeof("/dev/tty") + DECIMAL_STR_MAX(s->vtnr)];
+
+ if (s->vtnr < 1)
+ return -ENODEV;
+
+ if (s->vtfd >= 0)
+ return s->vtfd;
+
+ sprintf(path, "/dev/tty%u", s->vtnr);
+ s->vtfd = open_terminal(path, O_RDWR | O_CLOEXEC | O_NONBLOCK | O_NOCTTY);
+ if (s->vtfd < 0)
+ return log_error_errno(s->vtfd, "cannot open VT %s of session %s: %m", path, s->id);
+
+ return s->vtfd;
+}
+
+static int session_prepare_vt(Session *s) {
+ int vt, r;
+ struct vt_mode mode = {};
+
+ if (s->vtnr < 1)
+ return 0;
+
+ vt = session_open_vt(s);
+ if (vt < 0)
+ return vt;
+
+ r = fchown(vt, s->user->user_record->uid, -1);
+ if (r < 0) {
+ r = log_error_errno(errno,
+ "Cannot change owner of /dev/tty%u: %m",
+ s->vtnr);
+ goto error;
+ }
+
+ r = ioctl(vt, KDSKBMODE, K_OFF);
+ if (r < 0) {
+ r = log_error_errno(errno,
+ "Cannot set K_OFF on /dev/tty%u: %m",
+ s->vtnr);
+ goto error;
+ }
+
+ r = ioctl(vt, KDSETMODE, KD_GRAPHICS);
+ if (r < 0) {
+ r = log_error_errno(errno,
+ "Cannot set KD_GRAPHICS on /dev/tty%u: %m",
+ s->vtnr);
+ goto error;
+ }
+
+ /* Oh, thanks to the VT layer, VT_AUTO does not work with KD_GRAPHICS.
+ * So we need a dummy handler here which just acknowledges *all* VT
+ * switch requests. */
+ mode.mode = VT_PROCESS;
+ mode.relsig = SIGRTMIN;
+ mode.acqsig = SIGRTMIN + 1;
+ r = ioctl(vt, VT_SETMODE, &mode);
+ if (r < 0) {
+ r = log_error_errno(errno,
+ "Cannot set VT_PROCESS on /dev/tty%u: %m",
+ s->vtnr);
+ goto error;
+ }
+
+ return 0;
+
+error:
+ session_restore_vt(s);
+ return r;
+}
+
+static void session_restore_vt(Session *s) {
+ int r;
+
+ r = vt_restore(s->vtfd);
+ if (r == -EIO) {
+ int vt, old_fd;
+
+ /* It might happen if the controlling process exited before or while we were
+ * restoring the VT as it would leave the old file-descriptor in a hung-up
+ * state. In this case let's retry with a fresh handle to the virtual terminal. */
+
+ /* We do a little dance to avoid having the terminal be available
+ * for reuse before we've cleaned it up. */
+ old_fd = TAKE_FD(s->vtfd);
+
+ vt = session_open_vt(s);
+ safe_close(old_fd);
+
+ if (vt >= 0)
+ r = vt_restore(vt);
+ }
+
+ if (r < 0)
+ log_warning_errno(r, "Failed to restore VT, ignoring: %m");
+
+ s->vtfd = safe_close(s->vtfd);
+}
+
+void session_leave_vt(Session *s) {
+ int r;
+
+ assert(s);
+
+ /* This is called whenever we get a VT-switch signal from the kernel.
+ * We acknowledge all of them unconditionally. Note that session are
+ * free to overwrite those handlers and we only register them for
+ * sessions with controllers. Legacy sessions are not affected.
+ * However, if we switch from a non-legacy to a legacy session, we must
+ * make sure to pause all device before acknowledging the switch. We
+ * process the real switch only after we are notified via sysfs, so the
+ * legacy session might have already started using the devices. If we
+ * don't pause the devices before the switch, we might confuse the
+ * session we switch to. */
+
+ if (s->vtfd < 0)
+ return;
+
+ session_device_pause_all(s);
+ r = vt_release(s->vtfd, false);
+ if (r < 0)
+ log_debug_errno(r, "Cannot release VT of session %s: %m", s->id);
+}
+
+bool session_is_controller(Session *s, const char *sender) {
+ assert(s);
+
+ return streq_ptr(s->controller, sender);
+}
+
+static void session_release_controller(Session *s, bool notify) {
+ _cleanup_free_ char *name = NULL;
+ SessionDevice *sd;
+
+ if (!s->controller)
+ return;
+
+ name = s->controller;
+
+ /* By resetting the controller before releasing the devices, we won't send notification signals.
+ * This avoids sending useless notifications if the controller is released on disconnects. */
+ if (!notify)
+ s->controller = NULL;
+
+ while ((sd = hashmap_first(s->devices)))
+ session_device_free(sd);
+
+ s->controller = NULL;
+ s->track = sd_bus_track_unref(s->track);
+}
+
+static int on_bus_track(sd_bus_track *track, void *userdata) {
+ Session *s = userdata;
+
+ assert(track);
+ assert(s);
+
+ session_drop_controller(s);
+
+ return 0;
+}
+
+int session_set_controller(Session *s, const char *sender, bool force, bool prepare) {
+ _cleanup_free_ char *name = NULL;
+ int r;
+
+ assert(s);
+ assert(sender);
+
+ if (session_is_controller(s, sender))
+ return 0;
+ if (s->controller && !force)
+ return -EBUSY;
+
+ name = strdup(sender);
+ if (!name)
+ return -ENOMEM;
+
+ s->track = sd_bus_track_unref(s->track);
+ r = sd_bus_track_new(s->manager->bus, &s->track, on_bus_track, s);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_track_add_name(s->track, name);
+ if (r < 0)
+ return r;
+
+ /* When setting a session controller, we forcibly mute the VT and set
+ * it into graphics-mode. Applications can override that by changing
+ * VT state after calling TakeControl(). However, this serves as a good
+ * default and well-behaving controllers can now ignore VTs entirely.
+ * Note that we reset the VT on ReleaseControl() and if the controller
+ * exits.
+ * If logind crashes/restarts, we restore the controller during restart
+ * (without preparing the VT since the controller has probably overridden
+ * VT state by now) or reset the VT in case it crashed/exited, too. */
+ if (prepare) {
+ r = session_prepare_vt(s);
+ if (r < 0) {
+ s->track = sd_bus_track_unref(s->track);
+ return r;
+ }
+ }
+
+ session_release_controller(s, true);
+ s->controller = TAKE_PTR(name);
+ session_save(s);
+
+ return 0;
+}
+
+void session_drop_controller(Session *s) {
+ assert(s);
+
+ if (!s->controller)
+ return;
+
+ s->track = sd_bus_track_unref(s->track);
+ session_set_type(s, s->original_type);
+ session_release_controller(s, false);
+ session_save(s);
+ session_restore_vt(s);
+}
+
+static const char* const session_state_table[_SESSION_STATE_MAX] = {
+ [SESSION_OPENING] = "opening",
+ [SESSION_ONLINE] = "online",
+ [SESSION_ACTIVE] = "active",
+ [SESSION_CLOSING] = "closing",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(session_state, SessionState);
+
+static const char* const session_type_table[_SESSION_TYPE_MAX] = {
+ [SESSION_UNSPECIFIED] = "unspecified",
+ [SESSION_TTY] = "tty",
+ [SESSION_X11] = "x11",
+ [SESSION_WAYLAND] = "wayland",
+ [SESSION_MIR] = "mir",
+ [SESSION_WEB] = "web",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(session_type, SessionType);
+
+static const char* const session_class_table[_SESSION_CLASS_MAX] = {
+ [SESSION_USER] = "user",
+ [SESSION_GREETER] = "greeter",
+ [SESSION_LOCK_SCREEN] = "lock-screen",
+ [SESSION_BACKGROUND] = "background",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(session_class, SessionClass);
+
+static const char* const kill_who_table[_KILL_WHO_MAX] = {
+ [KILL_LEADER] = "leader",
+ [KILL_ALL] = "all",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);
+
+static const char* const tty_validity_table[_TTY_VALIDITY_MAX] = {
+ [TTY_FROM_PAM] = "from-pam",
+ [TTY_FROM_UTMP] = "from-utmp",
+ [TTY_UTMP_INCONSISTENT] = "utmp-inconsistent",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(tty_validity, TTYValidity);
diff --git a/src/login/logind-session.h b/src/login/logind-session.h
new file mode 100644
index 0000000..1b59bdb
--- /dev/null
+++ b/src/login/logind-session.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct Session Session;
+typedef enum KillWho KillWho;
+
+#include "list.h"
+#include "login-util.h"
+#include "logind-user.h"
+#include "string-util.h"
+
+typedef enum SessionState {
+ SESSION_OPENING, /* Session scope is being created */
+ SESSION_ONLINE, /* Logged in */
+ SESSION_ACTIVE, /* Logged in and in the fg */
+ SESSION_CLOSING, /* Logged out, but scope is still there */
+ _SESSION_STATE_MAX,
+ _SESSION_STATE_INVALID = -1
+} SessionState;
+
+typedef enum SessionClass {
+ SESSION_USER,
+ SESSION_GREETER,
+ SESSION_LOCK_SCREEN,
+ SESSION_BACKGROUND,
+ _SESSION_CLASS_MAX,
+ _SESSION_CLASS_INVALID = -1
+} SessionClass;
+
+typedef enum SessionType {
+ SESSION_UNSPECIFIED,
+ SESSION_TTY,
+ SESSION_X11,
+ SESSION_WAYLAND,
+ SESSION_MIR,
+ SESSION_WEB,
+ _SESSION_TYPE_MAX,
+ _SESSION_TYPE_INVALID = -1
+} SessionType;
+
+#define SESSION_TYPE_IS_GRAPHICAL(type) IN_SET(type, SESSION_X11, SESSION_WAYLAND, SESSION_MIR)
+
+enum KillWho {
+ KILL_LEADER,
+ KILL_ALL,
+ _KILL_WHO_MAX,
+ _KILL_WHO_INVALID = -1
+};
+
+typedef enum TTYValidity {
+ TTY_FROM_PAM,
+ TTY_FROM_UTMP,
+ TTY_UTMP_INCONSISTENT, /* may happen on ssh sessions with multiplexed TTYs */
+ _TTY_VALIDITY_MAX,
+ _TTY_VALIDITY_INVALID = -1,
+} TTYValidity;
+
+struct Session {
+ Manager *manager;
+
+ const char *id;
+ unsigned position;
+ SessionType type;
+ SessionType original_type;
+ SessionClass class;
+
+ char *state_file;
+
+ User *user;
+
+ dual_timestamp timestamp;
+
+ char *display;
+ char *tty;
+ TTYValidity tty_validity;
+
+ bool remote;
+ char *remote_user;
+ char *remote_host;
+ char *service;
+ char *desktop;
+
+ char *scope;
+ char *scope_job;
+
+ Seat *seat;
+ unsigned vtnr;
+ int vtfd;
+
+ pid_t leader;
+ uint32_t audit_id;
+
+ int fifo_fd;
+ char *fifo_path;
+
+ sd_event_source *fifo_event_source;
+
+ bool idle_hint;
+ dual_timestamp idle_hint_timestamp;
+
+ bool locked_hint;
+
+ bool in_gc_queue:1;
+ bool started:1;
+ bool stopping:1;
+
+ bool was_active:1;
+
+ sd_bus_message *create_message;
+
+ /* Set up when a client requested to release the session via the bus */
+ sd_event_source *timer_event_source;
+
+ char *controller;
+ Hashmap *devices;
+ sd_bus_track *track;
+
+ LIST_FIELDS(Session, sessions_by_user);
+ LIST_FIELDS(Session, sessions_by_seat);
+
+ LIST_FIELDS(Session, gc_queue);
+};
+
+int session_new(Session **ret, Manager *m, const char *id);
+Session* session_free(Session *s);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Session *, session_free);
+
+void session_set_user(Session *s, User *u);
+int session_set_leader(Session *s, pid_t pid);
+bool session_may_gc(Session *s, bool drop_not_started);
+void session_add_to_gc_queue(Session *s);
+int session_activate(Session *s);
+bool session_is_active(Session *s);
+int session_get_idle_hint(Session *s, dual_timestamp *t);
+int session_set_idle_hint(Session *s, bool b);
+int session_get_locked_hint(Session *s);
+void session_set_locked_hint(Session *s, bool b);
+void session_set_type(Session *s, SessionType t);
+int session_create_fifo(Session *s);
+int session_start(Session *s, sd_bus_message *properties, sd_bus_error *error);
+int session_stop(Session *s, bool force);
+int session_finalize(Session *s);
+int session_release(Session *s);
+int session_save(Session *s);
+int session_load(Session *s);
+int session_kill(Session *s, KillWho who, int signo);
+
+SessionState session_get_state(Session *u);
+
+const char* session_state_to_string(SessionState t) _const_;
+SessionState session_state_from_string(const char *s) _pure_;
+
+const char* session_type_to_string(SessionType t) _const_;
+SessionType session_type_from_string(const char *s) _pure_;
+
+const char* session_class_to_string(SessionClass t) _const_;
+SessionClass session_class_from_string(const char *s) _pure_;
+
+const char *kill_who_to_string(KillWho k) _const_;
+KillWho kill_who_from_string(const char *s) _pure_;
+
+const char* tty_validity_to_string(TTYValidity t) _const_;
+TTYValidity tty_validity_from_string(const char *s) _pure_;
+
+void session_leave_vt(Session *s);
+
+bool session_is_controller(Session *s, const char *sender);
+int session_set_controller(Session *s, const char *sender, bool force, bool prepare);
+void session_drop_controller(Session *s);
+
+static inline bool SESSION_IS_SELF(const char *name) {
+ return isempty(name) || streq(name, "self");
+}
+
+static inline bool SESSION_IS_AUTO(const char *name) {
+ return streq_ptr(name, "auto");
+}
diff --git a/src/login/logind-user-dbus.c b/src/login/logind-user-dbus.c
new file mode 100644
index 0000000..414d431
--- /dev/null
+++ b/src/login/logind-user-dbus.c
@@ -0,0 +1,434 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "bus-get-properties.h"
+#include "bus-polkit.h"
+#include "bus-util.h"
+#include "format-util.h"
+#include "logind-dbus.h"
+#include "logind-session-dbus.h"
+#include "logind-user-dbus.h"
+#include "logind-user.h"
+#include "logind.h"
+#include "missing_capability.h"
+#include "signal-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int property_get_uid(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ User *u = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ return sd_bus_message_append(reply, "u", (uint32_t) u->user_record->uid);
+}
+
+static int property_get_gid(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ User *u = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ return sd_bus_message_append(reply, "u", (uint32_t) u->user_record->gid);
+}
+
+static int property_get_name(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ User *u = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ return sd_bus_message_append(reply, "s", u->user_record->user_name);
+}
+
+static BUS_DEFINE_PROPERTY_GET2(property_get_state, "s", User, user_get_state, user_state_to_string);
+
+static int property_get_display(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *p = NULL;
+ User *u = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ p = u->display ? session_bus_path(u->display) : strdup("/");
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_message_append(reply, "(so)", u->display ? u->display->id : "", p);
+}
+
+static int property_get_sessions(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ User *u = userdata;
+ Session *session;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ r = sd_bus_message_open_container(reply, 'a', "(so)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(sessions_by_user, session, u->sessions) {
+ _cleanup_free_ char *p = NULL;
+
+ p = session_bus_path(session);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "(so)", session->id, p);
+ if (r < 0)
+ return r;
+
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_idle_hint(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ User *u = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ return sd_bus_message_append(reply, "b", user_get_idle_hint(u, NULL) > 0);
+}
+
+static int property_get_idle_since_hint(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ User *u = userdata;
+ dual_timestamp t = DUAL_TIMESTAMP_NULL;
+ uint64_t k;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ (void) user_get_idle_hint(u, &t);
+ k = streq(property, "IdleSinceHint") ? t.realtime : t.monotonic;
+
+ return sd_bus_message_append(reply, "t", k);
+}
+
+static int property_get_linger(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ User *u = userdata;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(u);
+
+ r = user_check_linger_file(u);
+
+ return sd_bus_message_append(reply, "b", r > 0);
+}
+
+int bus_user_method_terminate(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ User *u = userdata;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_KILL,
+ "org.freedesktop.login1.manage",
+ NULL,
+ false,
+ u->user_record->uid,
+ &u->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = user_stop(u, /* force */ true);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_user_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ User *u = userdata;
+ int32_t signo;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_KILL,
+ "org.freedesktop.login1.manage",
+ NULL,
+ false,
+ u->user_record->uid,
+ &u->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = sd_bus_message_read(message, "i", &signo);
+ if (r < 0)
+ return r;
+
+ if (!SIGNAL_VALID(signo))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid signal %i", signo);
+
+ r = user_kill(u, signo);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int user_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ uid_t uid;
+ User *user;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ if (streq(path, "/org/freedesktop/login1/user/self")) {
+ sd_bus_message *message;
+
+ message = sd_bus_get_current_message(bus);
+
+ r = manager_get_user_from_creds(m, message, UID_INVALID, error, &user);
+ if (r == -ENXIO) {
+ sd_bus_error_free(error);
+ return 0;
+ }
+ if (r < 0)
+ return r;
+ } else {
+ const char *p;
+
+ p = startswith(path, "/org/freedesktop/login1/user/_");
+ if (!p)
+ return 0;
+
+ r = parse_uid(p, &uid);
+ if (r < 0)
+ return 0;
+
+ user = hashmap_get(m->users, UID_TO_PTR(uid));
+ if (!user)
+ return 0;
+ }
+
+ *found = user;
+ return 1;
+}
+
+char *user_bus_path(User *u) {
+ char *s;
+
+ assert(u);
+
+ if (asprintf(&s, "/org/freedesktop/login1/user/_"UID_FMT, u->user_record->uid) < 0)
+ return NULL;
+
+ return s;
+}
+
+static int user_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ sd_bus_message *message;
+ Manager *m = userdata;
+ User *user;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(nodes);
+
+ HASHMAP_FOREACH(user, m->users) {
+ char *p;
+
+ p = user_bus_path(user);
+ if (!p)
+ return -ENOMEM;
+
+ r = strv_consume(&l, p);
+ if (r < 0)
+ return r;
+ }
+
+ message = sd_bus_get_current_message(bus);
+ if (message) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_OWNER_UID|SD_BUS_CREDS_AUGMENT, &creds);
+ if (r >= 0) {
+ uid_t uid;
+
+ r = sd_bus_creds_get_owner_uid(creds, &uid);
+ if (r >= 0) {
+ user = hashmap_get(m->users, UID_TO_PTR(uid));
+ if (user) {
+ r = strv_extend(&l, "/org/freedesktop/login1/user/self");
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+ }
+
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
+
+static const sd_bus_vtable user_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("UID", "u", property_get_uid, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("GID", "u", property_get_gid, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Name", "s", property_get_name, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("Timestamp", offsetof(User, timestamp), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RuntimePath", "s", NULL, offsetof(User, runtime_path), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Service", "s", NULL, offsetof(User, service), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Slice", "s", NULL, offsetof(User, slice), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Display", "(so)", property_get_display, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("State", "s", property_get_state, 0, 0),
+ SD_BUS_PROPERTY("Sessions", "a(so)", property_get_sessions, 0, 0),
+ SD_BUS_PROPERTY("IdleHint", "b", property_get_idle_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("IdleSinceHint", "t", property_get_idle_since_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("IdleSinceHintMonotonic", "t", property_get_idle_since_hint, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Linger", "b", property_get_linger, 0, 0),
+
+ SD_BUS_METHOD("Terminate", NULL, NULL, bus_user_method_terminate, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("Kill",
+ "i",
+ SD_BUS_PARAM(signal_number),
+ NULL,,
+ bus_user_method_kill,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_VTABLE_END
+};
+
+const BusObjectImplementation user_object = {
+ "/org/freedesktop/login1/user",
+ "org.freedesktop.login1.User",
+ .fallback_vtables = BUS_FALLBACK_VTABLES({user_vtable, user_object_find}),
+ .node_enumerator = user_node_enumerator,
+};
+
+int user_send_signal(User *u, bool new_user) {
+ _cleanup_free_ char *p = NULL;
+
+ assert(u);
+
+ p = user_bus_path(u);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_emit_signal(
+ u->manager->bus,
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ new_user ? "UserNew" : "UserRemoved",
+ "uo", (uint32_t) u->user_record->uid, p);
+}
+
+int user_send_changed(User *u, const char *properties, ...) {
+ _cleanup_free_ char *p = NULL;
+ char **l;
+
+ assert(u);
+
+ if (!u->started)
+ return 0;
+
+ p = user_bus_path(u);
+ if (!p)
+ return -ENOMEM;
+
+ l = strv_from_stdarg_alloca(properties);
+
+ return sd_bus_emit_properties_changed_strv(u->manager->bus, p, "org.freedesktop.login1.User", l);
+}
diff --git a/src/login/logind-user-dbus.h b/src/login/logind-user-dbus.h
new file mode 100644
index 0000000..d2f24ce
--- /dev/null
+++ b/src/login/logind-user-dbus.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "logind-user.h"
+
+extern const BusObjectImplementation user_object;
+
+char *user_bus_path(User *s);
+
+int user_send_signal(User *u, bool new_user);
+int user_send_changed(User *u, const char *properties, ...) _sentinel_;
+
+int bus_user_method_terminate(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_user_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error);
diff --git a/src/login/logind-user.c b/src/login/logind-user.c
new file mode 100644
index 0000000..9b3ec07
--- /dev/null
+++ b/src/login/logind-user.c
@@ -0,0 +1,956 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "cgroup-util.h"
+#include "clean-ipc.h"
+#include "env-file.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "label.h"
+#include "limits-util.h"
+#include "logind-dbus.h"
+#include "logind-user.h"
+#include "logind-user-dbus.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "serialize.h"
+#include "special.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "unit-name.h"
+#include "user-util.h"
+#include "util.h"
+
+int user_new(User **ret,
+ Manager *m,
+ UserRecord *ur) {
+
+ _cleanup_(user_freep) User *u = NULL;
+ char lu[DECIMAL_STR_MAX(uid_t) + 1];
+ int r;
+
+ assert(ret);
+ assert(m);
+ assert(ur);
+
+ if (!ur->user_name)
+ return -EINVAL;
+
+ if (!uid_is_valid(ur->uid))
+ return -EINVAL;
+
+ u = new(User, 1);
+ if (!u)
+ return -ENOMEM;
+
+ *u = (User) {
+ .manager = m,
+ .user_record = user_record_ref(ur),
+ .last_session_timestamp = USEC_INFINITY,
+ };
+
+ if (asprintf(&u->state_file, "/run/systemd/users/" UID_FMT, ur->uid) < 0)
+ return -ENOMEM;
+
+ if (asprintf(&u->runtime_path, "/run/user/" UID_FMT, ur->uid) < 0)
+ return -ENOMEM;
+
+ xsprintf(lu, UID_FMT, ur->uid);
+ r = slice_build_subslice(SPECIAL_USER_SLICE, lu, &u->slice);
+ if (r < 0)
+ return r;
+
+ r = unit_name_build("user", lu, ".service", &u->service);
+ if (r < 0)
+ return r;
+
+ r = unit_name_build("user-runtime-dir", lu, ".service", &u->runtime_dir_service);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(m->users, UID_TO_PTR(ur->uid), u);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(m->user_units, u->slice, u);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(m->user_units, u->service, u);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(m->user_units, u->runtime_dir_service, u);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(u);
+ return 0;
+}
+
+User *user_free(User *u) {
+ if (!u)
+ return NULL;
+
+ if (u->in_gc_queue)
+ LIST_REMOVE(gc_queue, u->manager->user_gc_queue, u);
+
+ while (u->sessions)
+ session_free(u->sessions);
+
+ if (u->service)
+ hashmap_remove_value(u->manager->user_units, u->service, u);
+
+ if (u->runtime_dir_service)
+ hashmap_remove_value(u->manager->user_units, u->runtime_dir_service, u);
+
+ if (u->slice)
+ hashmap_remove_value(u->manager->user_units, u->slice, u);
+
+ hashmap_remove_value(u->manager->users, UID_TO_PTR(u->user_record->uid), u);
+
+ sd_event_source_unref(u->timer_event_source);
+
+ u->service_job = mfree(u->service_job);
+
+ u->service = mfree(u->service);
+ u->runtime_dir_service = mfree(u->runtime_dir_service);
+ u->slice = mfree(u->slice);
+ u->runtime_path = mfree(u->runtime_path);
+ u->state_file = mfree(u->state_file);
+
+ user_record_unref(u->user_record);
+
+ return mfree(u);
+}
+
+static int user_save_internal(User *u) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(u);
+ assert(u->state_file);
+
+ r = mkdir_safe_label("/run/systemd/users", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ goto fail;
+
+ r = fopen_temporary(u->state_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ (void) fchmod(fileno(f), 0644);
+
+ fprintf(f,
+ "# This is private data. Do not parse.\n"
+ "NAME=%s\n"
+ "STATE=%s\n" /* friendly user-facing state */
+ "STOPPING=%s\n", /* low-level state */
+ u->user_record->user_name,
+ user_state_to_string(user_get_state(u)),
+ yes_no(u->stopping));
+
+ /* LEGACY: no-one reads RUNTIME= anymore, drop it at some point */
+ if (u->runtime_path)
+ fprintf(f, "RUNTIME=%s\n", u->runtime_path);
+
+ if (u->service_job)
+ fprintf(f, "SERVICE_JOB=%s\n", u->service_job);
+
+ if (u->display)
+ fprintf(f, "DISPLAY=%s\n", u->display->id);
+
+ if (dual_timestamp_is_set(&u->timestamp))
+ fprintf(f,
+ "REALTIME="USEC_FMT"\n"
+ "MONOTONIC="USEC_FMT"\n",
+ u->timestamp.realtime,
+ u->timestamp.monotonic);
+
+ if (u->last_session_timestamp != USEC_INFINITY)
+ fprintf(f, "LAST_SESSION_TIMESTAMP=" USEC_FMT "\n",
+ u->last_session_timestamp);
+
+ if (u->sessions) {
+ Session *i;
+ bool first;
+
+ fputs("SESSIONS=", f);
+ first = true;
+ LIST_FOREACH(sessions_by_user, i, u->sessions) {
+ if (first)
+ first = false;
+ else
+ fputc(' ', f);
+
+ fputs(i->id, f);
+ }
+
+ fputs("\nSEATS=", f);
+ first = true;
+ LIST_FOREACH(sessions_by_user, i, u->sessions) {
+ if (!i->seat)
+ continue;
+
+ if (first)
+ first = false;
+ else
+ fputc(' ', f);
+
+ fputs(i->seat->id, f);
+ }
+
+ fputs("\nACTIVE_SESSIONS=", f);
+ first = true;
+ LIST_FOREACH(sessions_by_user, i, u->sessions) {
+ if (!session_is_active(i))
+ continue;
+
+ if (first)
+ first = false;
+ else
+ fputc(' ', f);
+
+ fputs(i->id, f);
+ }
+
+ fputs("\nONLINE_SESSIONS=", f);
+ first = true;
+ LIST_FOREACH(sessions_by_user, i, u->sessions) {
+ if (session_get_state(i) == SESSION_CLOSING)
+ continue;
+
+ if (first)
+ first = false;
+ else
+ fputc(' ', f);
+
+ fputs(i->id, f);
+ }
+
+ fputs("\nACTIVE_SEATS=", f);
+ first = true;
+ LIST_FOREACH(sessions_by_user, i, u->sessions) {
+ if (!session_is_active(i) || !i->seat)
+ continue;
+
+ if (first)
+ first = false;
+ else
+ fputc(' ', f);
+
+ fputs(i->seat->id, f);
+ }
+
+ fputs("\nONLINE_SEATS=", f);
+ first = true;
+ LIST_FOREACH(sessions_by_user, i, u->sessions) {
+ if (session_get_state(i) == SESSION_CLOSING || !i->seat)
+ continue;
+
+ if (first)
+ first = false;
+ else
+ fputc(' ', f);
+
+ fputs(i->seat->id, f);
+ }
+ fputc('\n', f);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, u->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(u->state_file);
+
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return log_error_errno(r, "Failed to save user data %s: %m", u->state_file);
+}
+
+int user_save(User *u) {
+ assert(u);
+
+ if (!u->started)
+ return 0;
+
+ return user_save_internal(u);
+}
+
+int user_load(User *u) {
+ _cleanup_free_ char *realtime = NULL, *monotonic = NULL, *stopping = NULL, *last_session_timestamp = NULL;
+ int r;
+
+ assert(u);
+
+ r = parse_env_file(NULL, u->state_file,
+ "SERVICE_JOB", &u->service_job,
+ "STOPPING", &stopping,
+ "REALTIME", &realtime,
+ "MONOTONIC", &monotonic,
+ "LAST_SESSION_TIMESTAMP", &last_session_timestamp);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to read %s: %m", u->state_file);
+
+ if (stopping) {
+ r = parse_boolean(stopping);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse 'STOPPING' boolean: %s", stopping);
+ else
+ u->stopping = r;
+ }
+
+ if (realtime)
+ (void) deserialize_usec(realtime, &u->timestamp.realtime);
+ if (monotonic)
+ (void) deserialize_usec(monotonic, &u->timestamp.monotonic);
+ if (last_session_timestamp)
+ (void) deserialize_usec(last_session_timestamp, &u->last_session_timestamp);
+
+ return 0;
+}
+
+static void user_start_service(User *u) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(u);
+
+ /* Start the service containing the "systemd --user" instance (user@.service). Note that we don't explicitly
+ * start the per-user slice or the systemd-runtime-dir@.service instance, as those are pulled in both by
+ * user@.service and the session scopes as dependencies. */
+
+ u->service_job = mfree(u->service_job);
+
+ r = manager_start_unit(u->manager, u->service, &error, &u->service_job);
+ if (r < 0)
+ log_full_errno(sd_bus_error_has_name(&error, BUS_ERROR_UNIT_MASKED) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to start user service '%s', ignoring: %s", u->service, bus_error_message(&error, r));
+}
+
+static int update_slice_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ _cleanup_(user_record_unrefp) UserRecord *ur = userdata;
+
+ assert(m);
+ assert(ur);
+
+ if (sd_bus_message_is_method_error(m, NULL)) {
+ log_warning_errno(sd_bus_message_get_errno(m),
+ "Failed to update slice of %s, ignoring: %s",
+ ur->user_name,
+ sd_bus_message_get_error(m)->message);
+
+ return 0;
+ }
+
+ log_debug("Successfully set slice parameters of %s.", ur->user_name);
+ return 0;
+}
+
+static int user_update_slice(User *u) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert(u);
+
+ if (u->user_record->tasks_max == UINT64_MAX &&
+ u->user_record->memory_high == UINT64_MAX &&
+ u->user_record->memory_max == UINT64_MAX &&
+ u->user_record->cpu_weight == UINT64_MAX &&
+ u->user_record->io_weight == UINT64_MAX)
+ return 0;
+
+ r = sd_bus_message_new_method_call(
+ u->manager->bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "SetUnitProperties");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "sb", u->slice, true);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ const struct {
+ const char *name;
+ uint64_t value;
+ } settings[] = {
+ { "TasksMax", u->user_record->tasks_max },
+ { "MemoryMax", u->user_record->memory_max },
+ { "MemoryHigh", u->user_record->memory_high },
+ { "CPUWeight", u->user_record->cpu_weight },
+ { "IOWeight", u->user_record->io_weight },
+ };
+
+ for (size_t i = 0; i < ELEMENTSOF(settings); i++)
+ if (settings[i].value != UINT64_MAX) {
+ r = sd_bus_message_append(m, "(sv)", settings[i].name, "t", settings[i].value);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call_async(u->manager->bus, NULL, m, update_slice_callback, u->user_record, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to change user slice properties: %m");
+
+ /* Ref the user record pointer, so that the slot keeps it pinned */
+ user_record_ref(u->user_record);
+
+ return 0;
+}
+
+int user_start(User *u) {
+ assert(u);
+
+ if (u->started && !u->stopping)
+ return 0;
+
+ /* If u->stopping is set, the user is marked for removal and service stop-jobs are queued. We have to clear
+ * that flag before queueing the start-jobs again. If they succeed, the user object can be re-used just fine
+ * (pid1 takes care of job-ordering and proper restart), but if they fail, we want to force another user_stop()
+ * so possibly pending units are stopped. */
+ u->stopping = false;
+
+ if (!u->started)
+ log_debug("Starting services for new user %s.", u->user_record->user_name);
+
+ /* Save the user data so far, because pam_systemd will read the XDG_RUNTIME_DIR out of it while starting up
+ * systemd --user. We need to do user_save_internal() because we have not "officially" started yet. */
+ user_save_internal(u);
+
+ /* Set slice parameters */
+ (void) user_update_slice(u);
+
+ /* Start user@UID.service */
+ user_start_service(u);
+
+ if (!u->started) {
+ if (!dual_timestamp_is_set(&u->timestamp))
+ dual_timestamp_get(&u->timestamp);
+ user_send_signal(u, true);
+ u->started = true;
+ }
+
+ /* Save new user data */
+ user_save(u);
+
+ return 0;
+}
+
+static void user_stop_service(User *u, bool force) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(u);
+ assert(u->service);
+
+ /* The reverse of user_start_service(). Note that we only stop user@UID.service here, and let StopWhenUnneeded=
+ * deal with the slice and the user-runtime-dir@.service instance. */
+
+ u->service_job = mfree(u->service_job);
+
+ r = manager_stop_unit(u->manager, u->service, force ? "replace" : "fail", &error, &u->service_job);
+ if (r < 0)
+ log_warning_errno(r, "Failed to stop user service '%s', ignoring: %s", u->service, bus_error_message(&error, r));
+}
+
+int user_stop(User *u, bool force) {
+ Session *s;
+ int r = 0;
+ assert(u);
+
+ /* This is called whenever we begin with tearing down a user record. It's called in two cases: explicit API
+ * request to do so via the bus (in which case 'force' is true) and automatically due to GC, if there's no
+ * session left pinning it (in which case 'force' is false). Note that this just initiates tearing down of the
+ * user, the User object will remain in memory until user_finalize() is called, see below. */
+
+ if (!u->started)
+ return 0;
+
+ if (u->stopping) { /* Stop jobs have already been queued */
+ user_save(u);
+ return 0;
+ }
+
+ LIST_FOREACH(sessions_by_user, s, u->sessions) {
+ int k;
+
+ k = session_stop(s, force);
+ if (k < 0)
+ r = k;
+ }
+
+ user_stop_service(u, force);
+
+ u->stopping = true;
+
+ user_save(u);
+
+ return r;
+}
+
+int user_finalize(User *u) {
+ Session *s;
+ int r = 0, k;
+
+ assert(u);
+
+ /* Called when the user is really ready to be freed, i.e. when all unit stop jobs and suchlike for it are
+ * done. This is called as a result of an earlier user_done() when all jobs are completed. */
+
+ if (u->started)
+ log_debug("User %s logged out.", u->user_record->user_name);
+
+ LIST_FOREACH(sessions_by_user, s, u->sessions) {
+ k = session_finalize(s);
+ if (k < 0)
+ r = k;
+ }
+
+ /* Clean SysV + POSIX IPC objects, but only if this is not a system user. Background: in many setups cronjobs
+ * are run in full PAM and thus logind sessions, even if the code run doesn't belong to actual users but to
+ * system components. Since enable RemoveIPC= globally for all users, we need to be a bit careful with such
+ * cases, as we shouldn't accidentally remove a system service's IPC objects while it is running, just because
+ * a cronjob running as the same user just finished. Hence: exclude system users generally from IPC clean-up,
+ * and do it only for normal users. */
+ if (u->manager->remove_ipc && !uid_is_system(u->user_record->uid)) {
+ k = clean_ipc_by_uid(u->user_record->uid);
+ if (k < 0)
+ r = k;
+ }
+
+ (void) unlink(u->state_file);
+ user_add_to_gc_queue(u);
+
+ if (u->started) {
+ user_send_signal(u, false);
+ u->started = false;
+ }
+
+ return r;
+}
+
+int user_get_idle_hint(User *u, dual_timestamp *t) {
+ Session *s;
+ bool idle_hint = true;
+ dual_timestamp ts = DUAL_TIMESTAMP_NULL;
+
+ assert(u);
+
+ LIST_FOREACH(sessions_by_user, s, u->sessions) {
+ dual_timestamp k;
+ int ih;
+
+ ih = session_get_idle_hint(s, &k);
+ if (ih < 0)
+ return ih;
+
+ if (!ih) {
+ if (!idle_hint) {
+ if (k.monotonic < ts.monotonic)
+ ts = k;
+ } else {
+ idle_hint = false;
+ ts = k;
+ }
+ } else if (idle_hint) {
+
+ if (k.monotonic > ts.monotonic)
+ ts = k;
+ }
+ }
+
+ if (t)
+ *t = ts;
+
+ return idle_hint;
+}
+
+int user_check_linger_file(User *u) {
+ _cleanup_free_ char *cc = NULL;
+ char *p = NULL;
+
+ cc = cescape(u->user_record->user_name);
+ if (!cc)
+ return -ENOMEM;
+
+ p = strjoina("/var/lib/systemd/linger/", cc);
+ if (access(p, F_OK) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ return false;
+ }
+
+ return true;
+}
+
+static bool user_unit_active(User *u) {
+ const char *i;
+ int r;
+
+ assert(u->service);
+ assert(u->runtime_dir_service);
+ assert(u->slice);
+
+ FOREACH_STRING(i, u->service, u->runtime_dir_service, u->slice) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ r = manager_unit_is_active(u->manager, i, &error);
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine whether unit '%s' is active, ignoring: %s", u->service, bus_error_message(&error, r));
+ if (r != 0)
+ return true;
+ }
+
+ return false;
+}
+
+static usec_t user_get_stop_delay(User *u) {
+ assert(u);
+
+ if (u->user_record->stop_delay_usec != UINT64_MAX)
+ return u->user_record->stop_delay_usec;
+
+ if (user_record_removable(u->user_record) > 0)
+ return 0; /* For removable users lower the stop delay to zero */
+
+ return u->manager->user_stop_delay;
+}
+
+bool user_may_gc(User *u, bool drop_not_started) {
+ int r;
+
+ assert(u);
+
+ if (drop_not_started && !u->started)
+ return true;
+
+ if (u->sessions)
+ return false;
+
+ if (u->last_session_timestamp != USEC_INFINITY) {
+ usec_t user_stop_delay;
+
+ /* All sessions have been closed. Let's see if we shall leave the user record around for a bit */
+
+ user_stop_delay = user_get_stop_delay(u);
+
+ if (user_stop_delay == USEC_INFINITY)
+ return false; /* Leave it around forever! */
+ if (user_stop_delay > 0 &&
+ now(CLOCK_MONOTONIC) < usec_add(u->last_session_timestamp, user_stop_delay))
+ return false; /* Leave it around for a bit longer. */
+ }
+
+ /* Is this a user that shall stay around forever ("linger")? Before we say "no" to GC'ing for lingering users, let's check
+ * if any of the three units that we maintain for this user is still around. If none of them is,
+ * there's no need to keep this user around even if lingering is enabled. */
+ if (user_check_linger_file(u) > 0 && user_unit_active(u))
+ return false;
+
+ /* Check if our job is still pending */
+ if (u->service_job) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ r = manager_job_is_active(u->manager, u->service_job, &error);
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine whether job '%s' is pending, ignoring: %s", u->service_job, bus_error_message(&error, r));
+ if (r != 0)
+ return false;
+ }
+
+ /* Note that we don't care if the three units we manage for each user object are up or not, as we are managing
+ * their state rather than tracking it. */
+
+ return true;
+}
+
+void user_add_to_gc_queue(User *u) {
+ assert(u);
+
+ if (u->in_gc_queue)
+ return;
+
+ LIST_PREPEND(gc_queue, u->manager->user_gc_queue, u);
+ u->in_gc_queue = true;
+}
+
+UserState user_get_state(User *u) {
+ Session *i;
+
+ assert(u);
+
+ if (u->stopping)
+ return USER_CLOSING;
+
+ if (!u->started || u->service_job)
+ return USER_OPENING;
+
+ if (u->sessions) {
+ bool all_closing = true;
+
+ LIST_FOREACH(sessions_by_user, i, u->sessions) {
+ SessionState state;
+
+ state = session_get_state(i);
+ if (state == SESSION_ACTIVE)
+ return USER_ACTIVE;
+ if (state != SESSION_CLOSING)
+ all_closing = false;
+ }
+
+ return all_closing ? USER_CLOSING : USER_ONLINE;
+ }
+
+ if (user_check_linger_file(u) > 0 && user_unit_active(u))
+ return USER_LINGERING;
+
+ return USER_CLOSING;
+}
+
+int user_kill(User *u, int signo) {
+ assert(u);
+
+ return manager_kill_unit(u->manager, u->slice, KILL_ALL, signo, NULL);
+}
+
+static bool elect_display_filter(Session *s) {
+ /* Return true if the session is a candidate for the user’s ‘primary session’ or ‘display’. */
+ assert(s);
+
+ return IN_SET(s->class, SESSION_USER, SESSION_GREETER) && s->started && !s->stopping;
+}
+
+static int elect_display_compare(Session *s1, Session *s2) {
+ /* Indexed by SessionType. Lower numbers mean more preferred. */
+ static const int type_ranks[_SESSION_TYPE_MAX] = {
+ [SESSION_UNSPECIFIED] = 0,
+ [SESSION_TTY] = -2,
+ [SESSION_X11] = -3,
+ [SESSION_WAYLAND] = -3,
+ [SESSION_MIR] = -3,
+ [SESSION_WEB] = -1,
+ };
+
+ /* Calculate the partial order relationship between s1 and s2,
+ * returning < 0 if s1 is preferred as the user’s ‘primary session’,
+ * 0 if s1 and s2 are equally preferred or incomparable, or > 0 if s2
+ * is preferred.
+ *
+ * s1 or s2 may be NULL. */
+ if (!s1 && !s2)
+ return 0;
+
+ if ((s1 == NULL) != (s2 == NULL))
+ return (s1 == NULL) - (s2 == NULL);
+
+ if (s1->stopping != s2->stopping)
+ return s1->stopping - s2->stopping;
+
+ if ((s1->class != SESSION_USER) != (s2->class != SESSION_USER))
+ return (s1->class != SESSION_USER) - (s2->class != SESSION_USER);
+
+ if ((s1->type == _SESSION_TYPE_INVALID) != (s2->type == _SESSION_TYPE_INVALID))
+ return (s1->type == _SESSION_TYPE_INVALID) - (s2->type == _SESSION_TYPE_INVALID);
+
+ if (s1->type != s2->type)
+ return type_ranks[s1->type] - type_ranks[s2->type];
+
+ return 0;
+}
+
+void user_elect_display(User *u) {
+ Session *s;
+
+ assert(u);
+
+ /* This elects a primary session for each user, which we call the "display". We try to keep the assignment
+ * stable, but we "upgrade" to better choices. */
+ log_debug("Electing new display for user %s", u->user_record->user_name);
+
+ LIST_FOREACH(sessions_by_user, s, u->sessions) {
+ if (!elect_display_filter(s)) {
+ log_debug("Ignoring session %s", s->id);
+ continue;
+ }
+
+ if (elect_display_compare(s, u->display) < 0) {
+ log_debug("Choosing session %s in preference to %s", s->id, u->display ? u->display->id : "-");
+ u->display = s;
+ }
+ }
+}
+
+static int user_stop_timeout_callback(sd_event_source *es, uint64_t usec, void *userdata) {
+ User *u = userdata;
+
+ assert(u);
+ user_add_to_gc_queue(u);
+
+ return 0;
+}
+
+void user_update_last_session_timer(User *u) {
+ usec_t user_stop_delay;
+ int r;
+
+ assert(u);
+
+ if (u->sessions) {
+ /* There are sessions, turn off the timer */
+ u->last_session_timestamp = USEC_INFINITY;
+ u->timer_event_source = sd_event_source_unref(u->timer_event_source);
+ return;
+ }
+
+ if (u->last_session_timestamp != USEC_INFINITY)
+ return; /* Timer already started */
+
+ u->last_session_timestamp = now(CLOCK_MONOTONIC);
+
+ assert(!u->timer_event_source);
+
+ user_stop_delay = user_get_stop_delay(u);
+ if (IN_SET(user_stop_delay, 0, USEC_INFINITY))
+ return;
+
+ if (sd_event_get_state(u->manager->event) == SD_EVENT_FINISHED) {
+ log_debug("Not allocating user stop timeout, since we are already exiting.");
+ return;
+ }
+
+ r = sd_event_add_time(u->manager->event,
+ &u->timer_event_source,
+ CLOCK_MONOTONIC,
+ usec_add(u->last_session_timestamp, user_stop_delay), 0,
+ user_stop_timeout_callback, u);
+ if (r < 0)
+ log_warning_errno(r, "Failed to enqueue user stop event source, ignoring: %m");
+
+ if (DEBUG_LOGGING) {
+ char s[FORMAT_TIMESPAN_MAX];
+
+ log_debug("Last session of user '%s' logged out, terminating user context in %s.",
+ u->user_record->user_name,
+ format_timespan(s, sizeof(s), user_stop_delay, USEC_PER_MSEC));
+ }
+}
+
+static const char* const user_state_table[_USER_STATE_MAX] = {
+ [USER_OFFLINE] = "offline",
+ [USER_OPENING] = "opening",
+ [USER_LINGERING] = "lingering",
+ [USER_ONLINE] = "online",
+ [USER_ACTIVE] = "active",
+ [USER_CLOSING] = "closing"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(user_state, UserState);
+
+int config_parse_tmpfs_size(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t *sz = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* First, try to parse as percentage */
+ r = parse_permille(rvalue);
+ if (r > 0 && r < 1000)
+ *sz = physical_memory_scale(r, 1000U);
+ else {
+ uint64_t k;
+
+ /* If the passed argument was not a percentage, or out of range, parse as byte size */
+
+ r = parse_size(rvalue, 1024, &k);
+ if (r >= 0 && (k <= 0 || (uint64_t) (size_t) k != k))
+ r = -ERANGE;
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse size value '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ *sz = PAGE_ALIGN((size_t) k);
+ }
+
+ return 0;
+}
+
+int config_parse_compat_user_tasks_max(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ log_syntax(unit, LOG_NOTICE, filename, line, 0,
+ "Support for option %s= has been removed.",
+ lvalue);
+ log_info("Hint: try creating /etc/systemd/system/user-.slice.d/50-limits.conf with:\n"
+ " [Slice]\n"
+ " TasksMax=%s",
+ rvalue);
+ return 0;
+}
diff --git a/src/login/logind-user.h b/src/login/logind-user.h
new file mode 100644
index 0000000..2c5f993
--- /dev/null
+++ b/src/login/logind-user.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct User User;
+
+#include "conf-parser.h"
+#include "list.h"
+#include "logind.h"
+#include "user-record.h"
+
+typedef enum UserState {
+ USER_OFFLINE, /* Not logged in at all */
+ USER_OPENING, /* Is logging in */
+ USER_LINGERING, /* Lingering has been enabled by the admin for this user */
+ USER_ONLINE, /* User logged in */
+ USER_ACTIVE, /* User logged in and has a session in the fg */
+ USER_CLOSING, /* User logged out, but processes still remain and lingering is not enabled */
+ _USER_STATE_MAX,
+ _USER_STATE_INVALID = -1
+} UserState;
+
+struct User {
+ Manager *manager;
+
+ UserRecord *user_record;
+
+ char *state_file;
+ char *runtime_path;
+
+ char *slice; /* user-UID.slice */
+ char *service; /* user@UID.service */
+ char *runtime_dir_service; /* user-runtime-dir@UID.service */
+
+ char *service_job;
+
+ Session *display;
+
+ dual_timestamp timestamp; /* When this User object was 'started' the first time */
+ usec_t last_session_timestamp; /* When the number of sessions of this user went from 1 to 0 the last time */
+
+ /* Set up when the last session of the user logs out */
+ sd_event_source *timer_event_source;
+
+ bool in_gc_queue:1;
+
+ bool started:1; /* Whenever the user being started, has been started or is being stopped again. */
+ bool stopping:1; /* Whenever the user is being stopped or has been stopped. */
+
+ LIST_HEAD(Session, sessions);
+ LIST_FIELDS(User, gc_queue);
+};
+
+int user_new(User **out, Manager *m, UserRecord *ur);
+User *user_free(User *u);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(User *, user_free);
+
+bool user_may_gc(User *u, bool drop_not_started);
+void user_add_to_gc_queue(User *u);
+int user_start(User *u);
+int user_stop(User *u, bool force);
+int user_finalize(User *u);
+UserState user_get_state(User *u);
+int user_get_idle_hint(User *u, dual_timestamp *t);
+int user_save(User *u);
+int user_load(User *u);
+int user_kill(User *u, int signo);
+int user_check_linger_file(User *u);
+void user_elect_display(User *u);
+void user_update_last_session_timer(User *u);
+
+const char* user_state_to_string(UserState s) _const_;
+UserState user_state_from_string(const char *s) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_compat_user_tasks_max);
diff --git a/src/login/logind-utmp.c b/src/login/logind-utmp.c
new file mode 100644
index 0000000..dfbbb64
--- /dev/null
+++ b/src/login/logind-utmp.c
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "audit-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "format-util.h"
+#include "logind.h"
+#include "path-util.h"
+#include "special.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "user-util.h"
+#include "utmp-wtmp.h"
+
+_const_ static usec_t when_wall(usec_t n, usec_t elapse) {
+
+ usec_t left;
+ unsigned i;
+ static const int wall_timers[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 25, 40, 55, 70, 100, 130, 150, 180,
+ };
+
+ /* If the time is already passed, then don't announce */
+ if (n >= elapse)
+ return 0;
+
+ left = elapse - n;
+
+ for (i = 1; i < ELEMENTSOF(wall_timers); i++)
+ if (wall_timers[i] * USEC_PER_MINUTE >= left)
+ return left - wall_timers[i-1] * USEC_PER_MINUTE;
+
+ return left % USEC_PER_HOUR;
+}
+
+bool logind_wall_tty_filter(const char *tty, void *userdata) {
+ Manager *m = userdata;
+ const char *p;
+
+ assert(m);
+
+ if (!m->scheduled_shutdown_tty)
+ return true;
+
+ p = path_startswith(tty, "/dev/");
+ if (!p)
+ return true;
+
+ return !streq(p, m->scheduled_shutdown_tty);
+}
+
+static int warn_wall(Manager *m, usec_t n) {
+ char date[FORMAT_TIMESTAMP_MAX] = {};
+ _cleanup_free_ char *l = NULL, *username = NULL;
+ usec_t left;
+ int r;
+
+ assert(m);
+
+ if (!m->enable_wall_messages)
+ return 0;
+
+ left = m->scheduled_shutdown_timeout > n;
+
+ r = asprintf(&l, "%s%sThe system is going down for %s %s%s!",
+ strempty(m->wall_message),
+ isempty(m->wall_message) ? "" : "\n",
+ m->scheduled_shutdown_type,
+ left ? "at " : "NOW",
+ left ? format_timestamp(date, sizeof(date), m->scheduled_shutdown_timeout) : "");
+ if (r < 0) {
+ log_oom();
+ return 0;
+ }
+
+ username = uid_to_name(m->scheduled_shutdown_uid);
+ utmp_wall(l, username, m->scheduled_shutdown_tty, logind_wall_tty_filter, m);
+
+ return 1;
+}
+
+static int wall_message_timeout_handler(
+ sd_event_source *s,
+ uint64_t usec,
+ void *userdata) {
+
+ Manager *m = userdata;
+ usec_t n, next;
+ int r;
+
+ assert(m);
+ assert(s == m->wall_message_timeout_source);
+
+ n = now(CLOCK_REALTIME);
+
+ r = warn_wall(m, n);
+ if (r == 0)
+ return 0;
+
+ next = when_wall(n, m->scheduled_shutdown_timeout);
+ if (next > 0) {
+ r = sd_event_source_set_time(s, n + next);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_time() failed. %m");
+
+ r = sd_event_source_set_enabled(s, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_enabled() failed. %m");
+ }
+
+ return 0;
+}
+
+int manager_setup_wall_message_timer(Manager *m) {
+
+ usec_t n, elapse;
+ int r;
+
+ assert(m);
+
+ n = now(CLOCK_REALTIME);
+ elapse = m->scheduled_shutdown_timeout;
+
+ /* wall message handling */
+
+ if (isempty(m->scheduled_shutdown_type)) {
+ warn_wall(m, n);
+ return 0;
+ }
+
+ if (elapse < n)
+ return 0;
+
+ /* Warn immediately if less than 15 minutes are left */
+ if (elapse - n < 15 * USEC_PER_MINUTE) {
+ r = warn_wall(m, n);
+ if (r == 0)
+ return 0;
+ }
+
+ elapse = when_wall(n, elapse);
+ if (elapse == 0)
+ return 0;
+
+ if (m->wall_message_timeout_source) {
+ r = sd_event_source_set_time(m->wall_message_timeout_source, n + elapse);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_time() failed. %m");
+
+ r = sd_event_source_set_enabled(m->wall_message_timeout_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_source_set_enabled() failed. %m");
+ } else {
+ r = sd_event_add_time(m->event, &m->wall_message_timeout_source,
+ CLOCK_REALTIME, n + elapse, 0, wall_message_timeout_handler, m);
+ if (r < 0)
+ return log_error_errno(r, "sd_event_add_time() failed. %m");
+ }
+
+ return 0;
+}
diff --git a/src/login/logind.c b/src/login/logind.c
new file mode 100644
index 0000000..3ddc7a0
--- /dev/null
+++ b/src/login/logind.c
@@ -0,0 +1,1202 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "bus-log-control-api.h"
+#include "bus-polkit.h"
+#include "cgroup-util.h"
+#include "daemon-util.h"
+#include "def.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "logind-dbus.h"
+#include "logind-seat-dbus.h"
+#include "logind-session-dbus.h"
+#include "logind-user-dbus.h"
+#include "logind.h"
+#include "main-func.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "service-util.h"
+#include "signal-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "udev-util.h"
+#include "user-util.h"
+
+static Manager* manager_unref(Manager *m);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_unref);
+
+static int manager_new(Manager **ret) {
+ _cleanup_(manager_unrefp) Manager *m = NULL;
+ int r;
+
+ assert(ret);
+
+ m = new(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (Manager) {
+ .console_active_fd = -1,
+ .reserve_vt_fd = -1,
+ .idle_action_not_before_usec = now(CLOCK_MONOTONIC),
+ };
+
+ m->devices = hashmap_new(&string_hash_ops);
+ m->seats = hashmap_new(&string_hash_ops);
+ m->sessions = hashmap_new(&string_hash_ops);
+ m->sessions_by_leader = hashmap_new(NULL);
+ m->users = hashmap_new(NULL);
+ m->inhibitors = hashmap_new(&string_hash_ops);
+ m->buttons = hashmap_new(&string_hash_ops);
+
+ m->user_units = hashmap_new(&string_hash_ops);
+ m->session_units = hashmap_new(&string_hash_ops);
+
+ if (!m->devices || !m->seats || !m->sessions || !m->sessions_by_leader || !m->users || !m->inhibitors || !m->buttons || !m->user_units || !m->session_units)
+ return -ENOMEM;
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_set_watchdog(m->event, true);
+
+ manager_reset_config(m);
+
+ *ret = TAKE_PTR(m);
+ return 0;
+}
+
+static Manager* manager_unref(Manager *m) {
+ Session *session;
+ User *u;
+ Device *d;
+ Seat *s;
+ Inhibitor *i;
+ Button *b;
+
+ if (!m)
+ return NULL;
+
+ while ((session = hashmap_first(m->sessions)))
+ session_free(session);
+
+ while ((u = hashmap_first(m->users)))
+ user_free(u);
+
+ while ((d = hashmap_first(m->devices)))
+ device_free(d);
+
+ while ((s = hashmap_first(m->seats)))
+ seat_free(s);
+
+ while ((i = hashmap_first(m->inhibitors)))
+ inhibitor_free(i);
+
+ while ((b = hashmap_first(m->buttons)))
+ button_free(b);
+
+ hashmap_free(m->devices);
+ hashmap_free(m->seats);
+ hashmap_free(m->sessions);
+ hashmap_free(m->sessions_by_leader);
+ hashmap_free(m->users);
+ hashmap_free(m->inhibitors);
+ hashmap_free(m->buttons);
+ hashmap_free(m->brightness_writers);
+
+ hashmap_free(m->user_units);
+ hashmap_free(m->session_units);
+
+ sd_event_source_unref(m->idle_action_event_source);
+ sd_event_source_unref(m->inhibit_timeout_source);
+ sd_event_source_unref(m->scheduled_shutdown_timeout_source);
+ sd_event_source_unref(m->nologin_timeout_source);
+ sd_event_source_unref(m->wall_message_timeout_source);
+
+ sd_event_source_unref(m->console_active_event_source);
+ sd_event_source_unref(m->lid_switch_ignore_event_source);
+
+#if ENABLE_UTMP
+ sd_event_source_unref(m->utmp_event_source);
+#endif
+
+ safe_close(m->console_active_fd);
+
+ sd_device_monitor_unref(m->device_seat_monitor);
+ sd_device_monitor_unref(m->device_monitor);
+ sd_device_monitor_unref(m->device_vcsa_monitor);
+ sd_device_monitor_unref(m->device_button_monitor);
+
+ if (m->unlink_nologin)
+ (void) unlink_or_warn("/run/nologin");
+
+ bus_verify_polkit_async_registry_free(m->polkit_registry);
+
+ sd_bus_flush_close_unref(m->bus);
+ sd_event_unref(m->event);
+
+ safe_close(m->reserve_vt_fd);
+
+ strv_free(m->kill_only_users);
+ strv_free(m->kill_exclude_users);
+
+ free(m->scheduled_shutdown_type);
+ free(m->scheduled_shutdown_tty);
+ free(m->wall_message);
+ free(m->action_job);
+
+ strv_free(m->efi_boot_loader_entries);
+ free(m->efi_loader_entry_one_shot);
+
+ return mfree(m);
+}
+
+static int manager_enumerate_devices(Manager *m) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
+ int r;
+
+ assert(m);
+
+ /* Loads devices from udev and creates seats for them as
+ * necessary */
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_tag(e, "master-of-seat");
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, d) {
+ int k;
+
+ k = manager_process_seat_device(m, d);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int manager_enumerate_buttons(Manager *m) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
+ int r;
+
+ assert(m);
+
+ /* Loads buttons from udev */
+
+ if (manager_all_buttons_ignored(m))
+ return 0;
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_subsystem(e, "input", true);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_tag(e, "power-switch");
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, d) {
+ int k;
+
+ k = manager_process_button_device(m, d);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int manager_enumerate_seats(Manager *m) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+
+ assert(m);
+
+ /* This loads data about seats stored on disk, but does not
+ * actually create any seats. Removes data of seats that no
+ * longer exist. */
+
+ d = opendir("/run/systemd/seats");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open /run/systemd/seats: %m");
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ Seat *s;
+ int k;
+
+ if (!dirent_is_file(de))
+ continue;
+
+ s = hashmap_get(m->seats, de->d_name);
+ if (!s) {
+ if (unlinkat(dirfd(d), de->d_name, 0) < 0)
+ log_warning("Failed to remove /run/systemd/seats/%s: %m",
+ de->d_name);
+ continue;
+ }
+
+ k = seat_load(s);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int manager_enumerate_linger_users(Manager *m) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+
+ assert(m);
+
+ d = opendir("/var/lib/systemd/linger");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open /var/lib/systemd/linger/: %m");
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ int k;
+
+ dirent_ensure_type(d, de);
+ if (!dirent_is_file(de))
+ continue;
+
+ k = manager_add_user_by_name(m, de->d_name, NULL);
+ if (k < 0)
+ r = log_warning_errno(k, "Couldn't add lingering user %s, ignoring: %m", de->d_name);
+ }
+
+ return r;
+}
+
+static int manager_enumerate_users(Manager *m) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r, k;
+
+ assert(m);
+
+ /* Add lingering users */
+ r = manager_enumerate_linger_users(m);
+
+ /* Read in user data stored on disk */
+ d = opendir("/run/systemd/users");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open /run/systemd/users: %m");
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ User *u;
+ uid_t uid;
+
+ if (!dirent_is_file(de))
+ continue;
+
+ k = parse_uid(de->d_name, &uid);
+ if (k < 0) {
+ r = log_warning_errno(k, "Failed to parse filename /run/systemd/users/%s as UID.", de->d_name);
+ continue;
+ }
+
+ k = manager_add_user_by_uid(m, uid, &u);
+ if (k < 0) {
+ r = log_warning_errno(k, "Failed to add user by file name %s, ignoring: %m", de->d_name);
+ continue;
+ }
+
+ user_add_to_gc_queue(u);
+
+ k = user_load(u);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int parse_fdname(const char *fdname, char **session_id, dev_t *dev) {
+ _cleanup_strv_free_ char **parts = NULL;
+ _cleanup_free_ char *id = NULL;
+ unsigned major, minor;
+ int r;
+
+ parts = strv_split(fdname, "-");
+ if (!parts)
+ return -ENOMEM;
+ if (strv_length(parts) != 5)
+ return -EINVAL;
+
+ if (!streq(parts[0], "session"))
+ return -EINVAL;
+
+ id = strdup(parts[1]);
+ if (!id)
+ return -ENOMEM;
+
+ if (!streq(parts[2], "device"))
+ return -EINVAL;
+
+ r = safe_atou(parts[3], &major);
+ if (r < 0)
+ return r;
+ r = safe_atou(parts[4], &minor);
+ if (r < 0)
+ return r;
+
+ *dev = makedev(major, minor);
+ *session_id = TAKE_PTR(id);
+
+ return 0;
+}
+
+static int deliver_fd(Manager *m, const char *fdname, int fd) {
+ _cleanup_free_ char *id = NULL;
+ SessionDevice *sd;
+ struct stat st;
+ Session *s;
+ dev_t dev;
+ int r;
+
+ assert(m);
+ assert(fd >= 0);
+
+ r = parse_fdname(fdname, &id, &dev);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse fd name %s: %m", fdname);
+
+ s = hashmap_get(m->sessions, id);
+ if (!s)
+ /* If the session doesn't exist anymore, the associated session device attached to this fd
+ * doesn't either. Let's simply close this fd. */
+ return log_debug_errno(SYNTHETIC_ERRNO(ENXIO), "Failed to attach fd for unknown session: %s", id);
+
+ if (fstat(fd, &st) < 0)
+ /* The device is allowed to go away at a random point, in which case fstat() failing is
+ * expected. */
+ return log_debug_errno(errno, "Failed to stat device fd for session %s: %m", id);
+
+ if (!S_ISCHR(st.st_mode) || st.st_rdev != dev)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENODEV), "Device fd doesn't point to the expected character device node");
+
+ sd = hashmap_get(s->devices, &dev);
+ if (!sd)
+ /* Weird, we got an fd for a session device which wasn't recorded in the session state
+ * file... */
+ return log_warning_errno(SYNTHETIC_ERRNO(ENODEV), "Got fd for missing session device [%u:%u] in session %s",
+ major(dev), minor(dev), s->id);
+
+ log_debug("Attaching fd to session device [%u:%u] for session %s",
+ major(dev), minor(dev), s->id);
+
+ session_device_attach_fd(sd, fd, s->was_active);
+ return 0;
+}
+
+static int manager_attach_fds(Manager *m) {
+ _cleanup_strv_free_ char **fdnames = NULL;
+ int n;
+
+ /* Upon restart, PID1 will send us back all fds of session devices that we previously opened. Each
+ * file descriptor is associated with a given session. The session ids are passed through FDNAMES. */
+
+ n = sd_listen_fds_with_names(true, &fdnames);
+ if (n < 0)
+ return log_warning_errno(n, "Failed to acquire passed fd list: %m");
+ if (n == 0)
+ return 0;
+
+ for (int i = 0; i < n; i++) {
+ int fd = SD_LISTEN_FDS_START + i;
+
+ if (deliver_fd(m, fdnames[i], fd) >= 0)
+ continue;
+
+ /* Hmm, we couldn't deliver the fd to any session device object? If so, let's close the fd */
+ safe_close(fd);
+
+ /* Remove from fdstore as well */
+ (void) sd_notifyf(false,
+ "FDSTOREREMOVE=1\n"
+ "FDNAME=%s", fdnames[i]);
+ }
+
+ return 0;
+}
+
+static int manager_enumerate_sessions(Manager *m) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0, k;
+
+ assert(m);
+
+ /* Read in session data stored on disk */
+ d = opendir("/run/systemd/sessions");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open /run/systemd/sessions: %m");
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ struct Session *s;
+
+ if (!dirent_is_file(de))
+ continue;
+
+ k = manager_add_session(m, de->d_name, &s);
+ if (k < 0) {
+ r = log_warning_errno(k, "Failed to add session by file name %s, ignoring: %m", de->d_name);
+ continue;
+ }
+
+ session_add_to_gc_queue(s);
+
+ k = session_load(s);
+ if (k < 0)
+ r = k;
+ }
+
+ /* We might be restarted and PID1 could have sent us back the session device fds we previously
+ * saved. */
+ (void) manager_attach_fds(m);
+
+ return r;
+}
+
+static int manager_enumerate_inhibitors(Manager *m) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+
+ assert(m);
+
+ d = opendir("/run/systemd/inhibit");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open /run/systemd/inhibit: %m");
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ int k;
+ Inhibitor *i;
+
+ if (!dirent_is_file(de))
+ continue;
+
+ k = manager_add_inhibitor(m, de->d_name, &i);
+ if (k < 0) {
+ r = log_warning_errno(k, "Couldn't add inhibitor %s, ignoring: %m", de->d_name);
+ continue;
+ }
+
+ k = inhibitor_load(i);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int manager_dispatch_seat_udev(sd_device_monitor *monitor, sd_device *device, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+ assert(device);
+
+ manager_process_seat_device(m, device);
+ return 0;
+}
+
+static int manager_dispatch_device_udev(sd_device_monitor *monitor, sd_device *device, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+ assert(device);
+
+ manager_process_seat_device(m, device);
+ return 0;
+}
+
+static int manager_dispatch_vcsa_udev(sd_device_monitor *monitor, sd_device *device, void *userdata) {
+ Manager *m = userdata;
+ const char *name;
+
+ assert(m);
+ assert(device);
+
+ /* Whenever a VCSA device is removed try to reallocate our
+ * VTs, to make sure our auto VTs never go away. */
+
+ if (sd_device_get_sysname(device, &name) >= 0 &&
+ startswith(name, "vcsa") &&
+ device_for_action(device, DEVICE_ACTION_REMOVE))
+ seat_preallocate_vts(m->seat0);
+
+ return 0;
+}
+
+static int manager_dispatch_button_udev(sd_device_monitor *monitor, sd_device *device, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+ assert(device);
+
+ manager_process_button_device(m, device);
+ return 0;
+}
+
+static int manager_dispatch_console(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+ assert(m->seat0);
+ assert(m->console_active_fd == fd);
+
+ seat_read_active_vt(m->seat0);
+ return 0;
+}
+
+static int manager_reserve_vt(Manager *m) {
+ _cleanup_free_ char *p = NULL;
+
+ assert(m);
+
+ if (m->reserve_vt <= 0)
+ return 0;
+
+ if (asprintf(&p, "/dev/tty%u", m->reserve_vt) < 0)
+ return log_oom();
+
+ m->reserve_vt_fd = open(p, O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (m->reserve_vt_fd < 0) {
+
+ /* Don't complain on VT-less systems */
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to pin reserved VT: %m");
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int manager_connect_bus(Manager *m) {
+ int r;
+
+ assert(m);
+ assert(!m->bus);
+
+ r = sd_bus_default_system(&m->bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to system bus: %m");
+
+ r = bus_add_implementation(m->bus, &manager_object, m);
+ if (r < 0)
+ return r;
+
+ r = bus_log_control_api_register(m->bus);
+ if (r < 0)
+ return r;
+
+ r = bus_match_signal_async(m->bus, NULL, bus_systemd_mgr, "JobRemoved", match_job_removed, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match for JobRemoved: %m");
+
+ r = bus_match_signal_async(m->bus, NULL, bus_systemd_mgr, "UnitRemoved", match_unit_removed, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match for UnitRemoved: %m");
+
+ r = sd_bus_match_signal_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.systemd1",
+ NULL,
+ "org.freedesktop.DBus.Properties",
+ "PropertiesChanged",
+ match_properties_changed, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match for PropertiesChanged: %m");
+
+ r = bus_match_signal_async(m->bus, NULL, bus_systemd_mgr, "Reloading", match_reloading, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match for Reloading: %m");
+
+ r = bus_call_method_async(m->bus, NULL, bus_systemd_mgr, "Subscribe", NULL, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable subscription: %m");
+
+ r = sd_bus_request_name_async(m->bus, NULL, "org.freedesktop.login1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(m->bus, m->event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ return 0;
+}
+
+static int manager_vt_switch(sd_event_source *src, const struct signalfd_siginfo *si, void *data) {
+ Manager *m = data;
+ Session *active, *iter;
+
+ /*
+ * We got a VT-switch signal and we have to acknowledge it immediately.
+ * Preferably, we'd just use m->seat0->active->vtfd, but unfortunately,
+ * old user-space might run multiple sessions on a single VT, *sigh*.
+ * Therefore, we have to iterate all sessions and find one with a vtfd
+ * on the requested VT.
+ * As only VTs with active controllers have VT_PROCESS set, our current
+ * notion of the active VT might be wrong (for instance if the switch
+ * happens while we setup VT_PROCESS). Therefore, read the current VT
+ * first and then use s->active->vtnr as reference. Note that this is
+ * not racy, as no further VT-switch can happen as long as we're in
+ * synchronous VT_PROCESS mode.
+ */
+
+ assert(m->seat0);
+ seat_read_active_vt(m->seat0);
+
+ active = m->seat0->active;
+ if (!active || active->vtnr < 1) {
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ /* We are requested to acknowledge the VT-switch signal by the kernel but
+ * there's no registered sessions for the current VT. Normally this
+ * shouldn't happen but something wrong might have happened when we tried
+ * to release the VT. Better be safe than sorry, and try to release the VT
+ * one more time otherwise the user will be locked with the current VT. */
+
+ log_warning("Received VT_PROCESS signal without a registered session, restoring VT.");
+
+ /* At this point we only have the kernel mapping for referring to the
+ * current VT. */
+ fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0) {
+ log_warning_errno(fd, "Failed to open, ignoring: %m");
+ return 0;
+ }
+
+ r = vt_release(fd, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to release VT, ignoring: %m");
+
+ return 0;
+ }
+
+ if (active->vtfd >= 0) {
+ session_leave_vt(active);
+ } else {
+ LIST_FOREACH(sessions_by_seat, iter, m->seat0->sessions) {
+ if (iter->vtnr == active->vtnr && iter->vtfd >= 0) {
+ session_leave_vt(iter);
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int manager_connect_console(Manager *m) {
+ int r;
+
+ assert(m);
+ assert(m->console_active_fd < 0);
+
+ /* On certain systems (such as S390, Xen, and containers) /dev/tty0 does not exist (as there is no VC), so
+ * don't fail if we can't open it. */
+
+ if (access("/dev/tty0", F_OK) < 0)
+ return 0;
+
+ m->console_active_fd = open("/sys/class/tty/tty0/active", O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (m->console_active_fd < 0) {
+
+ /* On some systems /dev/tty0 may exist even though /sys/class/tty/tty0 does not. These are broken, but
+ * common. Let's complain but continue anyway. */
+ if (errno == ENOENT) {
+ log_warning_errno(errno, "System has /dev/tty0 but not /sys/class/tty/tty0/active which is broken, ignoring: %m");
+ return 0;
+ }
+
+ return log_error_errno(errno, "Failed to open /sys/class/tty/tty0/active: %m");
+ }
+
+ r = sd_event_add_io(m->event, &m->console_active_event_source, m->console_active_fd, 0, manager_dispatch_console, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch foreground console: %m");
+
+ /*
+ * SIGRTMIN is used as global VT-release signal, SIGRTMIN + 1 is used
+ * as VT-acquire signal. We ignore any acquire-events (yes, we still
+ * have to provide a valid signal-number for it!) and acknowledge all
+ * release events immediately.
+ */
+
+ if (SIGRTMIN + 1 > SIGRTMAX)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not enough real-time signals available: %u-%u",
+ SIGRTMIN, SIGRTMAX);
+
+ assert_se(ignore_signals(SIGRTMIN + 1, -1) >= 0);
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGRTMIN, -1) >= 0);
+
+ r = sd_event_add_signal(m->event, NULL, SIGRTMIN, manager_vt_switch, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to subscribe to signal: %m");
+
+ return 0;
+}
+
+static int manager_connect_udev(Manager *m) {
+ int r;
+
+ assert(m);
+ assert(!m->device_seat_monitor);
+ assert(!m->device_monitor);
+ assert(!m->device_vcsa_monitor);
+ assert(!m->device_button_monitor);
+
+ r = sd_device_monitor_new(&m->device_seat_monitor);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_filter_add_match_tag(m->device_seat_monitor, "master-of-seat");
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_attach_event(m->device_seat_monitor, m->event);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_start(m->device_seat_monitor, manager_dispatch_seat_udev, m);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(sd_device_monitor_get_event_source(m->device_seat_monitor), "logind-seat-monitor");
+
+ r = sd_device_monitor_new(&m->device_monitor);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(m->device_monitor, "input", NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(m->device_monitor, "graphics", NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(m->device_monitor, "drm", NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_attach_event(m->device_monitor, m->event);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_start(m->device_monitor, manager_dispatch_device_udev, m);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(sd_device_monitor_get_event_source(m->device_monitor), "logind-device-monitor");
+
+ /* Don't watch keys if nobody cares */
+ if (!manager_all_buttons_ignored(m)) {
+ r = sd_device_monitor_new(&m->device_button_monitor);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_filter_add_match_tag(m->device_button_monitor, "power-switch");
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(m->device_button_monitor, "input", NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_attach_event(m->device_button_monitor, m->event);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_start(m->device_button_monitor, manager_dispatch_button_udev, m);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(sd_device_monitor_get_event_source(m->device_button_monitor), "logind-button-monitor");
+ }
+
+ /* Don't bother watching VCSA devices, if nobody cares */
+ if (m->n_autovts > 0 && m->console_active_fd >= 0) {
+
+ r = sd_device_monitor_new(&m->device_vcsa_monitor);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(m->device_vcsa_monitor, "vc", NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_attach_event(m->device_vcsa_monitor, m->event);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_start(m->device_vcsa_monitor, manager_dispatch_vcsa_udev, m);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(sd_device_monitor_get_event_source(m->device_vcsa_monitor), "logind-vcsa-monitor");
+ }
+
+ return 0;
+}
+
+static void manager_gc(Manager *m, bool drop_not_started) {
+ Seat *seat;
+ Session *session;
+ User *user;
+
+ assert(m);
+
+ while ((seat = m->seat_gc_queue)) {
+ LIST_REMOVE(gc_queue, m->seat_gc_queue, seat);
+ seat->in_gc_queue = false;
+
+ if (seat_may_gc(seat, drop_not_started)) {
+ seat_stop(seat, /* force = */ false);
+ seat_free(seat);
+ }
+ }
+
+ while ((session = m->session_gc_queue)) {
+ LIST_REMOVE(gc_queue, m->session_gc_queue, session);
+ session->in_gc_queue = false;
+
+ /* First, if we are not closing yet, initiate stopping. */
+ if (session_may_gc(session, drop_not_started) &&
+ session_get_state(session) != SESSION_CLOSING)
+ (void) session_stop(session, /* force = */ false);
+
+ /* Normally, this should make the session referenced again, if it doesn't then let's get rid
+ * of it immediately. */
+ if (session_may_gc(session, drop_not_started)) {
+ (void) session_finalize(session);
+ session_free(session);
+ }
+ }
+
+ while ((user = m->user_gc_queue)) {
+ LIST_REMOVE(gc_queue, m->user_gc_queue, user);
+ user->in_gc_queue = false;
+
+ /* First step: queue stop jobs */
+ if (user_may_gc(user, drop_not_started))
+ (void) user_stop(user, false);
+
+ /* Second step: finalize user */
+ if (user_may_gc(user, drop_not_started)) {
+ (void) user_finalize(user);
+ user_free(user);
+ }
+ }
+}
+
+static int manager_dispatch_idle_action(sd_event_source *s, uint64_t t, void *userdata) {
+ Manager *m = userdata;
+ struct dual_timestamp since;
+ usec_t n, elapse;
+ int r;
+
+ assert(m);
+
+ if (m->idle_action == HANDLE_IGNORE ||
+ m->idle_action_usec <= 0)
+ return 0;
+
+ n = now(CLOCK_MONOTONIC);
+
+ r = manager_get_idle_hint(m, &since);
+ if (r <= 0)
+ /* Not idle. Let's check if after a timeout it might be idle then. */
+ elapse = n + m->idle_action_usec;
+ else {
+ /* Idle! Let's see if it's time to do something, or if
+ * we shall sleep for longer. */
+
+ if (n >= since.monotonic + m->idle_action_usec &&
+ (m->idle_action_not_before_usec <= 0 || n >= m->idle_action_not_before_usec + m->idle_action_usec)) {
+ log_info("System idle. Doing %s operation.", handle_action_to_string(m->idle_action));
+
+ manager_handle_action(m, 0, m->idle_action, false, false);
+ m->idle_action_not_before_usec = n;
+ }
+
+ elapse = MAX(since.monotonic, m->idle_action_not_before_usec) + m->idle_action_usec;
+ }
+
+ if (!m->idle_action_event_source) {
+
+ r = sd_event_add_time(
+ m->event,
+ &m->idle_action_event_source,
+ CLOCK_MONOTONIC,
+ elapse, USEC_PER_SEC*30,
+ manager_dispatch_idle_action, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add idle event source: %m");
+
+ r = sd_event_source_set_priority(m->idle_action_event_source, SD_EVENT_PRIORITY_IDLE+10);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set idle event source priority: %m");
+ } else {
+ r = sd_event_source_set_time(m->idle_action_event_source, elapse);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set idle event timer: %m");
+
+ r = sd_event_source_set_enabled(m->idle_action_event_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable idle event timer: %m");
+ }
+
+ return 0;
+}
+
+static int manager_dispatch_reload_signal(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ Manager *m = userdata;
+ int r;
+
+ manager_reset_config(m);
+ r = manager_parse_config_file(m);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse config file, using defaults: %m");
+ else
+ log_info("Config file reloaded.");
+
+ return 0;
+}
+
+static int manager_startup(Manager *m) {
+ int r;
+ Seat *seat;
+ Session *session;
+ User *user;
+ Button *button;
+ Inhibitor *inhibitor;
+
+ assert(m);
+
+ r = sd_event_add_signal(m->event, NULL, SIGHUP, manager_dispatch_reload_signal, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register SIGHUP handler: %m");
+
+ /* Connect to utmp */
+ manager_connect_utmp(m);
+
+ /* Connect to console */
+ r = manager_connect_console(m);
+ if (r < 0)
+ return r;
+
+ /* Connect to udev */
+ r = manager_connect_udev(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create udev watchers: %m");
+
+ /* Connect to the bus */
+ r = manager_connect_bus(m);
+ if (r < 0)
+ return r;
+
+ /* Instantiate magic seat 0 */
+ r = manager_add_seat(m, "seat0", &m->seat0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add seat0: %m");
+
+ r = manager_set_lid_switch_ignore(m, 0 + m->holdoff_timeout_usec);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set up lid switch ignore event source: %m");
+
+ /* Deserialize state */
+ r = manager_enumerate_devices(m);
+ if (r < 0)
+ log_warning_errno(r, "Device enumeration failed: %m");
+
+ r = manager_enumerate_seats(m);
+ if (r < 0)
+ log_warning_errno(r, "Seat enumeration failed: %m");
+
+ r = manager_enumerate_users(m);
+ if (r < 0)
+ log_warning_errno(r, "User enumeration failed: %m");
+
+ r = manager_enumerate_sessions(m);
+ if (r < 0)
+ log_warning_errno(r, "Session enumeration failed: %m");
+
+ r = manager_enumerate_inhibitors(m);
+ if (r < 0)
+ log_warning_errno(r, "Inhibitor enumeration failed: %m");
+
+ r = manager_enumerate_buttons(m);
+ if (r < 0)
+ log_warning_errno(r, "Button enumeration failed: %m");
+
+ /* Remove stale objects before we start them */
+ manager_gc(m, false);
+
+ /* Reserve the special reserved VT */
+ manager_reserve_vt(m);
+
+ /* Read in utmp if it exists */
+ manager_read_utmp(m);
+
+ /* And start everything */
+ HASHMAP_FOREACH(seat, m->seats)
+ (void) seat_start(seat);
+
+ HASHMAP_FOREACH(user, m->users)
+ (void) user_start(user);
+
+ HASHMAP_FOREACH(session, m->sessions)
+ (void) session_start(session, NULL, NULL);
+
+ HASHMAP_FOREACH(inhibitor, m->inhibitors) {
+ (void) inhibitor_start(inhibitor);
+
+ /* Let's see if the inhibitor is dead now, then remove it */
+ if (inhibitor_is_orphan(inhibitor)) {
+ inhibitor_stop(inhibitor);
+ inhibitor_free(inhibitor);
+ }
+ }
+
+ HASHMAP_FOREACH(button, m->buttons)
+ button_check_switches(button);
+
+ manager_dispatch_idle_action(NULL, 0, m);
+
+ return 0;
+}
+
+static int manager_run(Manager *m) {
+ int r;
+
+ assert(m);
+
+ for (;;) {
+ r = sd_event_get_state(m->event);
+ if (r < 0)
+ return r;
+ if (r == SD_EVENT_FINISHED)
+ return 0;
+
+ manager_gc(m, true);
+
+ r = manager_dispatch_delayed(m, false);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ r = sd_event_run(m->event, (uint64_t) -1);
+ if (r < 0)
+ return r;
+ }
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(manager_unrefp) Manager *m = NULL;
+ _cleanup_(notify_on_cleanup) const char *notify_message = NULL;
+ int r;
+
+ log_set_facility(LOG_AUTH);
+ log_setup_service();
+
+ r = service_parse_argv("systemd-logind.service",
+ "Manager for user logins and devices and privileged operations.",
+ BUS_IMPLEMENTATIONS(&manager_object,
+ &log_control_object),
+ argc, argv);
+ if (r <= 0)
+ return r;
+
+ umask(0022);
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return r;
+
+ /* Always create the directories people can create inotify watches in. Note that some applications
+ * might check for the existence of /run/systemd/seats/ to determine whether logind is available, so
+ * please always make sure these directories are created early on and unconditionally. */
+ (void) mkdir_label("/run/systemd/seats", 0755);
+ (void) mkdir_label("/run/systemd/users", 0755);
+ (void) mkdir_label("/run/systemd/sessions", 0755);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGHUP, SIGTERM, SIGINT, SIGCHLD, -1) >= 0);
+
+ r = manager_new(&m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate manager object: %m");
+
+ (void) manager_parse_config_file(m);
+
+ r = manager_startup(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to fully start up daemon: %m");
+
+ notify_message = notify_start(NOTIFY_READY, NOTIFY_STOPPING);
+ return manager_run(m);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/login/logind.conf.in b/src/login/logind.conf.in
new file mode 100644
index 0000000..8b22026
--- /dev/null
+++ b/src/login/logind.conf.in
@@ -0,0 +1,41 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See logind.conf(5) for details.
+
+[Login]
+#NAutoVTs=6
+#ReserveVT=6
+#KillUserProcesses=@KILL_USER_PROCESSES@
+#KillOnlyUsers=
+#KillExcludeUsers=root
+#InhibitDelayMaxSec=5
+#UserStopDelaySec=10
+#HandlePowerKey=poweroff
+#HandleSuspendKey=suspend
+#HandleHibernateKey=hibernate
+#HandleLidSwitch=suspend
+#HandleLidSwitchExternalPower=suspend
+#HandleLidSwitchDocked=ignore
+#HandleRebootKey=reboot
+#PowerKeyIgnoreInhibited=no
+#SuspendKeyIgnoreInhibited=no
+#HibernateKeyIgnoreInhibited=no
+#LidSwitchIgnoreInhibited=yes
+#RebootKeyIgnoreInhibited=no
+#HoldoffTimeoutSec=30s
+#IdleAction=ignore
+#IdleActionSec=30min
+#RuntimeDirectorySize=10%
+#RuntimeDirectoryInodes=400k
+#RemoveIPC=yes
+#InhibitorsMax=8192
+#SessionsMax=8192
diff --git a/src/login/logind.h b/src/login/logind.h
new file mode 100644
index 0000000..49d1f5f
--- /dev/null
+++ b/src/login/logind.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/stat.h>
+
+#include "sd-bus.h"
+#include "sd-device.h"
+#include "sd-event.h"
+
+#include "conf-parser.h"
+#include "hashmap.h"
+#include "list.h"
+#include "set.h"
+#include "time-util.h"
+#include "user-record.h"
+
+typedef struct Manager Manager;
+
+#include "logind-action.h"
+#include "logind-button.h"
+#include "logind-device.h"
+#include "logind-inhibit.h"
+
+struct Manager {
+ sd_event *event;
+ sd_bus *bus;
+
+ Hashmap *devices;
+ Hashmap *seats;
+ Hashmap *sessions;
+ Hashmap *sessions_by_leader;
+ Hashmap *users; /* indexed by UID */
+ Hashmap *inhibitors;
+ Hashmap *buttons;
+ Hashmap *brightness_writers;
+
+ LIST_HEAD(Seat, seat_gc_queue);
+ LIST_HEAD(Session, session_gc_queue);
+ LIST_HEAD(User, user_gc_queue);
+
+ sd_device_monitor *device_seat_monitor, *device_monitor, *device_vcsa_monitor, *device_button_monitor;
+
+ sd_event_source *console_active_event_source;
+
+#if ENABLE_UTMP
+ sd_event_source *utmp_event_source;
+#endif
+
+ int console_active_fd;
+
+ unsigned n_autovts;
+
+ unsigned reserve_vt;
+ int reserve_vt_fd;
+
+ Seat *seat0;
+
+ char **kill_only_users, **kill_exclude_users;
+ bool kill_user_processes;
+
+ unsigned long session_counter;
+ unsigned long inhibit_counter;
+
+ Hashmap *session_units;
+ Hashmap *user_units;
+
+ usec_t inhibit_delay_max;
+ usec_t user_stop_delay;
+
+ /* If an action is currently being executed or is delayed,
+ * this is != 0 and encodes what is being done */
+ InhibitWhat action_what;
+
+ /* If a shutdown/suspend was delayed due to a inhibitor this
+ contains the unit name we are supposed to start after the
+ delay is over */
+ const char *action_unit;
+
+ /* If a shutdown/suspend is currently executed, then this is
+ * the job of it */
+ char *action_job;
+ sd_event_source *inhibit_timeout_source;
+
+ char *scheduled_shutdown_type;
+ usec_t scheduled_shutdown_timeout;
+ sd_event_source *scheduled_shutdown_timeout_source;
+ uid_t scheduled_shutdown_uid;
+ char *scheduled_shutdown_tty;
+ sd_event_source *nologin_timeout_source;
+ bool unlink_nologin;
+
+ char *wall_message;
+ unsigned enable_wall_messages;
+ sd_event_source *wall_message_timeout_source;
+
+ bool shutdown_dry_run;
+
+ sd_event_source *idle_action_event_source;
+ usec_t idle_action_usec;
+ usec_t idle_action_not_before_usec;
+ HandleAction idle_action;
+
+ HandleAction handle_power_key;
+ HandleAction handle_suspend_key;
+ HandleAction handle_hibernate_key;
+ HandleAction handle_lid_switch;
+ HandleAction handle_lid_switch_ep;
+ HandleAction handle_lid_switch_docked;
+ HandleAction handle_reboot_key;
+
+ bool power_key_ignore_inhibited;
+ bool suspend_key_ignore_inhibited;
+ bool hibernate_key_ignore_inhibited;
+ bool lid_switch_ignore_inhibited;
+ bool reboot_key_ignore_inhibited;
+
+ bool remove_ipc;
+
+ Hashmap *polkit_registry;
+
+ usec_t holdoff_timeout_usec;
+ sd_event_source *lid_switch_ignore_event_source;
+
+ uint64_t runtime_dir_size;
+ uint64_t runtime_dir_inodes;
+ uint64_t sessions_max;
+ uint64_t inhibitors_max;
+
+ char **efi_boot_loader_entries;
+ bool efi_boot_loader_entries_set;
+
+ char *efi_loader_entry_one_shot;
+ struct stat efi_loader_entry_one_shot_stat;
+};
+
+void manager_reset_config(Manager *m);
+int manager_parse_config_file(Manager *m);
+
+int manager_add_device(Manager *m, const char *sysfs, bool master, Device **ret_device);
+int manager_add_button(Manager *m, const char *name, Button **ret_button);
+int manager_add_seat(Manager *m, const char *id, Seat **ret_seat);
+int manager_add_session(Manager *m, const char *id, Session **ret_session);
+int manager_add_user(Manager *m, UserRecord *ur, User **ret_user);
+int manager_add_user_by_name(Manager *m, const char *name, User **ret_user);
+int manager_add_user_by_uid(Manager *m, uid_t uid, User **ret_user);
+int manager_add_inhibitor(Manager *m, const char* id, Inhibitor **ret_inhibitor);
+
+int manager_process_seat_device(Manager *m, sd_device *d);
+int manager_process_button_device(Manager *m, sd_device *d);
+
+int manager_spawn_autovt(Manager *m, unsigned vtnr);
+
+bool manager_shall_kill(Manager *m, const char *user);
+
+int manager_get_idle_hint(Manager *m, dual_timestamp *t);
+
+int manager_get_user_by_pid(Manager *m, pid_t pid, User **user);
+int manager_get_session_by_pid(Manager *m, pid_t pid, Session **session);
+
+bool manager_is_lid_closed(Manager *m);
+bool manager_is_docked_or_external_displays(Manager *m);
+bool manager_is_on_external_power(void);
+bool manager_all_buttons_ignored(Manager *m);
+
+int manager_read_utmp(Manager *m);
+void manager_connect_utmp(Manager *m);
+void manager_reconnect_utmp(Manager *m);
+
+/* gperf lookup function */
+const struct ConfigPerfItem* logind_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+
+int manager_set_lid_switch_ignore(Manager *m, usec_t until);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_n_autovts);
+CONFIG_PARSER_PROTOTYPE(config_parse_tmpfs_size);
+
+int manager_setup_wall_message_timer(Manager *m);
+bool logind_wall_tty_filter(const char *tty, void *userdata);
+
+int manager_read_efi_boot_loader_entries(Manager *m);
diff --git a/src/login/meson.build b/src/login/meson.build
new file mode 100644
index 0000000..e096109
--- /dev/null
+++ b/src/login/meson.build
@@ -0,0 +1,124 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+systemd_logind_sources = files('''
+ logind.c
+ logind.h
+'''.split())
+
+logind_gperf_c = custom_target(
+ 'logind_gperf.c',
+ input : 'logind-gperf.gperf',
+ output : 'logind-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+liblogind_core_sources = files('''
+ logind-acl.h
+ logind-action.c
+ logind-action.h
+ logind-brightness.c
+ logind-brightness.h
+ logind-button.c
+ logind-button.h
+ logind-core.c
+ logind-dbus.c
+ logind-dbus.h
+ logind-device.c
+ logind-device.h
+ logind-inhibit.c
+ logind-inhibit.h
+ logind-seat-dbus.c
+ logind-seat-dbus.h
+ logind-seat.c
+ logind-seat.h
+ logind-session-dbus.c
+ logind-session-dbus.h
+ logind-session-device.c
+ logind-session-device.h
+ logind-session.c
+ logind-session.h
+ logind-user-dbus.c
+ logind-user-dbus.h
+ logind-user.c
+ logind-user.h
+ logind-utmp.c
+'''.split())
+
+liblogind_core_sources += [logind_gperf_c]
+
+logind_acl_c = files('logind-acl.c')
+if conf.get('HAVE_ACL') == 1
+ liblogind_core_sources += logind_acl_c
+endif
+
+liblogind_core = static_library(
+ 'logind-core',
+ liblogind_core_sources,
+ include_directories : includes,
+ dependencies : [libacl])
+
+loginctl_sources = files('''
+ loginctl.c
+ sysfs-show.h
+ sysfs-show.c
+'''.split())
+
+user_runtime_dir_sources = files('''
+ user-runtime-dir.c
+'''.split())
+
+pam_systemd_sym = 'src/login/pam_systemd.sym'
+pam_systemd_c = files('pam_systemd.c')
+
+if conf.get('ENABLE_LOGIND') == 1
+ logind_conf = configure_file(
+ input : 'logind.conf.in',
+ output : 'logind.conf',
+ configuration : substs)
+ if install_sysconfdir
+ install_data(logind_conf,
+ install_dir : pkgsysconfdir)
+ endif
+
+ install_data('org.freedesktop.login1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.login1.service',
+ install_dir : dbussystemservicedir)
+ install_data('org.freedesktop.login1.policy',
+ install_dir : polkitpolicydir)
+
+ install_data('70-power-switch.rules', install_dir : udevrulesdir)
+
+ seat_rules = configure_file(
+ input : '71-seat.rules.in',
+ output : '71-seat.rules',
+ configuration : substs)
+ install_data(seat_rules,
+ install_dir : udevrulesdir)
+
+ custom_target(
+ '70-uaccess.rules',
+ input : '70-uaccess.rules.m4',
+ output: '70-uaccess.rules',
+ command : [meson_apply_m4, config_h, '@INPUT@'],
+ capture : true,
+ install : conf.get('HAVE_ACL') == 1,
+ install_dir : udevrulesdir)
+
+ custom_target(
+ '73-seat-late.rules',
+ input : '73-seat-late.rules.m4',
+ output: '73-seat-late.rules',
+ command : [meson_apply_m4, config_h, '@INPUT@'],
+ capture : true,
+ install : true,
+ install_dir : udevrulesdir)
+
+ custom_target(
+ 'systemd-user',
+ input : 'systemd-user.m4',
+ output: 'systemd-user',
+ command : [meson_apply_m4, config_h, '@INPUT@'],
+ capture : true,
+ install : pamconfdir != 'no',
+ install_dir : pamconfdir)
+endif
diff --git a/src/login/org.freedesktop.login1.conf b/src/login/org.freedesktop.login1.conf
new file mode 100644
index 0000000..ac14942
--- /dev/null
+++ b/src/login/org.freedesktop.login1.conf
@@ -0,0 +1,324 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="root">
+ <allow own="org.freedesktop.login1"/>
+ <allow send_destination="org.freedesktop.login1"/>
+ <allow receive_sender="org.freedesktop.login1"/>
+ </policy>
+
+ <policy context="default">
+ <deny send_destination="org.freedesktop.login1"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.DBus.Introspectable"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.DBus.Peer"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="Get"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="GetAll"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="GetSession"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="GetSessionByPID"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="GetUser"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="GetUserByPID"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="GetSeat"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="ListSessions"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="ListUsers"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="ListSeats"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="ListInhibitors"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="Inhibit"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="SetUserLinger"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="ActivateSession"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="ActivateSessionOnSeat"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="LockSession"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="UnlockSession"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="LockSessions"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="UnlockSessions"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="KillSession"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="KillUser"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="TerminateSession"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="TerminateUser"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="TerminateSeat"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="PowerOff"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="Reboot"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="Halt"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="Suspend"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="Hibernate"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="HybridSleep"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="SuspendThenHibernate"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CanPowerOff"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CanReboot"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CanHalt"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CanSuspend"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CanHibernate"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CanHybridSleep"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CanSuspendThenHibernate"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="ScheduleShutdown"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CancelScheduledShutdown"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CanRebootParameter"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="SetRebootParameter"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CanRebootToFirmwareSetup"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="SetRebootToFirmwareSetup"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CanRebootToBootLoaderMenu"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="SetRebootToBootLoaderMenu"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="CanRebootToBootLoaderEntry"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="SetRebootToBootLoaderEntry"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="SetWallMessage"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="AttachDevice"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Manager"
+ send_member="FlushDevices"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Seat"
+ send_member="Terminate"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Seat"
+ send_member="ActivateSession"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Seat"
+ send_member="SwitchTo"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Seat"
+ send_member="SwitchToPrevious"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Seat"
+ send_member="SwitchToNext"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="Terminate"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="Activate"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="Lock"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="Unlock"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="SetIdleHint"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="SetLockedHint"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="Kill"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="TakeControl"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="ReleaseControl"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="SetType"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="TakeDevice"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="ReleaseDevice"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="PauseDeviceComplete"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.Session"
+ send_member="SetBrightness"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.User"
+ send_member="Terminate"/>
+
+ <allow send_destination="org.freedesktop.login1"
+ send_interface="org.freedesktop.login1.User"
+ send_member="Kill"/>
+
+ <allow receive_sender="org.freedesktop.login1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/login/org.freedesktop.login1.policy b/src/login/org.freedesktop.login1.policy
new file mode 100644
index 0000000..80ebb39
--- /dev/null
+++ b/src/login/org.freedesktop.login1.policy
@@ -0,0 +1,415 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.login1.inhibit-block-shutdown">
+ <description gettext-domain="systemd">Allow applications to inhibit system shutdown</description>
+ <message gettext-domain="systemd">Authentication is required for an application to inhibit system shutdown.</message>
+ <defaults>
+ <allow_any>no</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.inhibit-delay-shutdown org.freedesktop.login1.inhibit-block-sleep org.freedesktop.login1.inhibit-delay-sleep org.freedesktop.login1.inhibit-block-idle</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.inhibit-delay-shutdown">
+ <description gettext-domain="systemd">Allow applications to delay system shutdown</description>
+ <message gettext-domain="systemd">Authentication is required for an application to delay system shutdown.</message>
+ <defaults>
+ <allow_any>yes</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.inhibit-delay-sleep</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.inhibit-block-sleep">
+ <description gettext-domain="systemd">Allow applications to inhibit system sleep</description>
+ <message gettext-domain="systemd">Authentication is required for an application to inhibit system sleep.</message>
+ <defaults>
+ <allow_any>no</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.inhibit-delay-sleep org.freedesktop.login1.inhibit-block-idle</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.inhibit-delay-sleep">
+ <description gettext-domain="systemd">Allow applications to delay system sleep</description>
+ <message gettext-domain="systemd">Authentication is required for an application to delay system sleep.</message>
+ <defaults>
+ <allow_any>yes</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.inhibit-block-idle">
+ <description gettext-domain="systemd">Allow applications to inhibit automatic system suspend</description>
+ <message gettext-domain="systemd">Authentication is required for an application to inhibit automatic system suspend.</message>
+ <defaults>
+ <allow_any>yes</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.inhibit-handle-power-key">
+ <description gettext-domain="systemd">Allow applications to inhibit system handling of the power key</description>
+ <message gettext-domain="systemd">Authentication is required for an application to inhibit system handling of the power key.</message>
+ <defaults>
+ <allow_any>no</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.inhibit-handle-suspend-key org.freedesktop.login1.inhibit-handle-hibernate-key org.freedesktop.login1.inhibit-handle-lid-switch</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.inhibit-handle-suspend-key">
+ <description gettext-domain="systemd">Allow applications to inhibit system handling of the suspend key</description>
+ <message gettext-domain="systemd">Authentication is required for an application to inhibit system handling of the suspend key.</message>
+ <defaults>
+ <allow_any>no</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.inhibit-handle-hibernate-key org.freedesktop.login1.inhibit-handle-lid-switch</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.inhibit-handle-hibernate-key">
+ <description gettext-domain="systemd">Allow applications to inhibit system handling of the hibernate key</description>
+ <message gettext-domain="systemd">Authentication is required for an application to inhibit system handling of the hibernate key.</message>
+ <defaults>
+ <allow_any>no</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.inhibit-handle-lid-switch">
+ <description gettext-domain="systemd">Allow applications to inhibit system handling of the lid switch</description>
+ <message gettext-domain="systemd">Authentication is required for an application to inhibit system handling of the lid switch.</message>
+ <defaults>
+ <allow_any>no</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.inhibit-handle-reboot-key">
+ <description gettext-domain="systemd">Allow applications to inhibit system handling of the reboot key</description>
+ <message gettext-domain="systemd">Authentication is required for an application to inhibit system handling of the reboot key.</message>
+ <defaults>
+ <allow_any>no</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.inhibit-handle-suspend-key org.freedesktop.login1.inhibit-handle-hibernate-key org.freedesktop.login1.inhibit-handle-lid-switch</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.set-self-linger">
+ <description gettext-domain="systemd">Allow non-logged-in user to run programs</description>
+ <message gettext-domain="systemd">Explicit request is required to run programs as a non-logged-in user.</message>
+ <defaults>
+ <allow_any>yes</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.set-user-linger">
+ <description gettext-domain="systemd">Allow non-logged-in users to run programs</description>
+ <message gettext-domain="systemd">Authentication is required to run programs as a non-logged-in user.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.attach-device">
+ <description gettext-domain="systemd">Allow attaching devices to seats</description>
+ <message gettext-domain="systemd">Authentication is required to attach a device to a seat.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.flush-devices</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.flush-devices">
+ <description gettext-domain="systemd">Flush device to seat attachments</description>
+ <message gettext-domain="systemd">Authentication is required to reset how devices are attached to seats.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.power-off">
+ <description gettext-domain="systemd">Power off the system</description>
+ <message gettext-domain="systemd">Authentication is required to power off the system.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.set-wall-message</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.power-off-multiple-sessions">
+ <description gettext-domain="systemd">Power off the system while other users are logged in</description>
+ <message gettext-domain="systemd">Authentication is required to power off the system while other users are logged in.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.power-off</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.power-off-ignore-inhibit">
+ <description gettext-domain="systemd">Power off the system while an application is inhibiting this</description>
+ <message gettext-domain="systemd">Authentication is required to power off the system while an application is inhibiting this.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.power-off</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.reboot">
+ <description gettext-domain="systemd">Reboot the system</description>
+ <message gettext-domain="systemd">Authentication is required to reboot the system.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.set-wall-message</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.reboot-multiple-sessions">
+ <description gettext-domain="systemd">Reboot the system while other users are logged in</description>
+ <message gettext-domain="systemd">Authentication is required to reboot the system while other users are logged in.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.reboot</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.reboot-ignore-inhibit">
+ <description gettext-domain="systemd">Reboot the system while an application is inhibiting this</description>
+ <message gettext-domain="systemd">Authentication is required to reboot the system while an application is inhibiting this.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.reboot</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.halt">
+ <description gettext-domain="systemd">Halt the system</description>
+ <message gettext-domain="systemd">Authentication is required to halt the system.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.set-wall-message</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.halt-multiple-sessions">
+ <description gettext-domain="systemd">Halt the system while other users are logged in</description>
+ <message gettext-domain="systemd">Authentication is required to halt the system while other users are logged in.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.halt</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.halt-ignore-inhibit">
+ <description gettext-domain="systemd">Halt the system while an application is inhibiting this</description>
+ <message gettext-domain="systemd">Authentication is required to halt the system while an application is inhibiting this.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.halt</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.suspend">
+ <description gettext-domain="systemd">Suspend the system</description>
+ <message gettext-domain="systemd">Authentication is required to suspend the system.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.suspend-multiple-sessions">
+ <description gettext-domain="systemd">Suspend the system while other users are logged in</description>
+ <message gettext-domain="systemd">Authentication is required to suspend the system while other users are logged in.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.suspend</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.suspend-ignore-inhibit">
+ <description gettext-domain="systemd">Suspend the system while an application is inhibiting this</description>
+ <message gettext-domain="systemd">Authentication is required to suspend the system while an application is inhibiting this.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.suspend</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.hibernate">
+ <description gettext-domain="systemd">Hibernate the system</description>
+ <message gettext-domain="systemd">Authentication is required to hibernate the system.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.hibernate-multiple-sessions">
+ <description gettext-domain="systemd">Hibernate the system while other users are logged in</description>
+ <message gettext-domain="systemd">Authentication is required to hibernate the system while other users are logged in.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.hibernate</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.hibernate-ignore-inhibit">
+ <description gettext-domain="systemd">Hibernate the system while an application is inhibiting this</description>
+ <message gettext-domain="systemd">Authentication is required to hibernate the system while an application is inhibiting this.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.hibernate</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.manage">
+ <description gettext-domain="systemd">Manage active sessions, users and seats</description>
+ <message gettext-domain="systemd">Authentication is required to manage active sessions, users and seats.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.lock-sessions">
+ <description gettext-domain="systemd">Lock or unlock active sessions</description>
+ <message gettext-domain="systemd">Authentication is required to lock or unlock active sessions.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.set-reboot-parameter">
+ <description gettext-domain="systemd">Set the reboot "reason" in the kernel</description>
+ <message gettext-domain="systemd">Authentication is required to set the reboot "reason" in the kernel.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.reboot</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.set-reboot-to-firmware-setup">
+ <description gettext-domain="systemd">Indicate to the firmware to boot to setup interface</description>
+ <message gettext-domain="systemd">Authentication is required to indicate to the firmware to boot to setup interface.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.reboot</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.set-reboot-to-boot-loader-menu">
+ <description gettext-domain="systemd">Indicate to the boot loader to boot to the boot loader menu</description>
+ <message gettext-domain="systemd">Authentication is required to indicate to the boot loader to boot to the boot loader menu.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.reboot</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.set-reboot-to-boot-loader-entry">
+ <description gettext-domain="systemd">Indicate to the boot loader to boot a specific entry</description>
+ <message gettext-domain="systemd">Authentication is required to indicate to the boot loader to boot into a specific boot loader entry.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.reboot</annotate>
+ </action>
+
+ <action id="org.freedesktop.login1.set-wall-message">
+ <description gettext-domain="systemd">Set a wall message</description>
+ <message gettext-domain="systemd">Authentication is required to set a wall message</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.login1.chvt">
+ <description gettext-domain="systemd">Change Session</description>
+ <message gettext-domain="systemd">Authentication is required to change the virtual terminal.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>yes</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ </action>
+
+</policyconfig>
diff --git a/src/login/org.freedesktop.login1.service b/src/login/org.freedesktop.login1.service
new file mode 100644
index 0000000..6d443cf
--- /dev/null
+++ b/src/login/org.freedesktop.login1.service
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.login1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.login1.service
diff --git a/src/login/pam_systemd.c b/src/login/pam_systemd.c
new file mode 100644
index 0000000..8e7a94d
--- /dev/null
+++ b/src/login/pam_systemd.c
@@ -0,0 +1,1024 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <endian.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pwd.h>
+#include <security/_pam_macros.h>
+#include <security/pam_ext.h>
+#include <security/pam_misc.h>
+#include <security/pam_modules.h>
+#include <security/pam_modutil.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "audit-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-internal.h"
+#include "bus-locator.h"
+#include "cgroup-setup.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "locale-util.h"
+#include "login-util.h"
+#include "macro.h"
+#include "pam-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "user-util.h"
+#include "userdb.h"
+
+#define LOGIN_SLOW_BUS_CALL_TIMEOUT_USEC (2*USEC_PER_MINUTE)
+
+static int parse_argv(
+ pam_handle_t *handle,
+ int argc, const char **argv,
+ const char **class,
+ const char **type,
+ const char **desktop,
+ bool *debug) {
+
+ unsigned i;
+
+ assert(argc >= 0);
+ assert(argc == 0 || argv);
+
+ for (i = 0; i < (unsigned) argc; i++) {
+ const char *p;
+
+ if ((p = startswith(argv[i], "class="))) {
+ if (class)
+ *class = p;
+
+ } else if ((p = startswith(argv[i], "type="))) {
+ if (type)
+ *type = p;
+
+ } else if ((p = startswith(argv[i], "desktop="))) {
+ if (desktop)
+ *desktop = p;
+
+ } else if (streq(argv[i], "debug")) {
+ if (debug)
+ *debug = true;
+
+ } else if ((p = startswith(argv[i], "debug="))) {
+ int k;
+
+ k = parse_boolean(p);
+ if (k < 0)
+ pam_syslog(handle, LOG_WARNING, "Failed to parse debug= argument, ignoring: %s", p);
+ else if (debug)
+ *debug = k;
+
+ } else
+ pam_syslog(handle, LOG_WARNING, "Unknown parameter '%s', ignoring", argv[i]);
+ }
+
+ return 0;
+}
+
+static int acquire_user_record(
+ pam_handle_t *handle,
+ UserRecord **ret_record) {
+
+ _cleanup_(user_record_unrefp) UserRecord *ur = NULL;
+ const char *username = NULL, *json = NULL;
+ _cleanup_free_ char *field = NULL;
+ int r;
+
+ assert(handle);
+
+ r = pam_get_user(handle, &username, NULL);
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to get user name: %s", pam_strerror(handle, r));
+ return r;
+ }
+
+ if (isempty(username)) {
+ pam_syslog(handle, LOG_ERR, "User name not valid.");
+ return PAM_SERVICE_ERR;
+ }
+
+ /* If pam_systemd_homed (or some other module) already acquired the user record we can reuse it
+ * here. */
+ field = strjoin("systemd-user-record-", username);
+ if (!field)
+ return pam_log_oom(handle);
+
+ r = pam_get_data(handle, field, (const void**) &json);
+ if (!IN_SET(r, PAM_SUCCESS, PAM_NO_MODULE_DATA)) {
+ pam_syslog(handle, LOG_ERR, "Failed to get PAM user record data: %s", pam_strerror(handle, r));
+ return r;
+ }
+ if (r == PAM_SUCCESS && json) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+
+ /* Parse cached record */
+ r = json_parse(json, JSON_PARSE_SENSITIVE, &v, NULL, NULL);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to parse JSON user record: %s", strerror_safe(r));
+ return PAM_SERVICE_ERR;
+ }
+
+ ur = user_record_new();
+ if (!ur)
+ return pam_log_oom(handle);
+
+ r = user_record_load(ur, v, USER_RECORD_LOAD_REFUSE_SECRET);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to load user record: %s", strerror_safe(r));
+ return PAM_SERVICE_ERR;
+ }
+
+ /* Safety check if cached record actually matches what we are looking for */
+ if (!streq_ptr(username, ur->user_name)) {
+ pam_syslog(handle, LOG_ERR, "Acquired user record does not match user name.");
+ return PAM_SERVICE_ERR;
+ }
+ } else {
+ _cleanup_free_ char *formatted = NULL;
+
+ /* Request the record ourselves */
+ r = userdb_by_name(username, 0, &ur);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to get user record: %s", strerror_safe(r));
+ return PAM_USER_UNKNOWN;
+ }
+
+ r = json_variant_format(ur->json, 0, &formatted);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to format user JSON: %s", strerror_safe(r));
+ return PAM_SERVICE_ERR;
+ }
+
+ /* And cache it for everyone else */
+ r = pam_set_data(handle, field, formatted, pam_cleanup_free);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to set PAM user record data '%s': %s",
+ field, pam_strerror(handle, r));
+ return r;
+ }
+
+ TAKE_PTR(formatted);
+ }
+
+ if (!uid_is_valid(ur->uid)) {
+ pam_syslog(handle, LOG_ERR, "Acquired user record does not have a UID.");
+ return PAM_SERVICE_ERR;
+ }
+
+ if (ret_record)
+ *ret_record = TAKE_PTR(ur);
+
+ return PAM_SUCCESS;
+}
+
+static bool display_is_local(const char *display) {
+ assert(display);
+
+ return
+ display[0] == ':' &&
+ display[1] >= '0' &&
+ display[1] <= '9';
+}
+
+static int socket_from_display(const char *display, char **path) {
+ size_t k;
+ char *f, *c;
+
+ assert(display);
+ assert(path);
+
+ if (!display_is_local(display))
+ return -EINVAL;
+
+ k = strspn(display+1, "0123456789");
+
+ f = new(char, STRLEN("/tmp/.X11-unix/X") + k + 1);
+ if (!f)
+ return -ENOMEM;
+
+ c = stpcpy(f, "/tmp/.X11-unix/X");
+ memcpy(c, display+1, k);
+ c[k] = 0;
+
+ *path = f;
+
+ return 0;
+}
+
+static int get_seat_from_display(const char *display, const char **seat, uint32_t *vtnr) {
+ union sockaddr_union sa;
+ socklen_t sa_len;
+ _cleanup_free_ char *p = NULL, *sys_path = NULL, *tty = NULL;
+ _cleanup_close_ int fd = -1;
+ struct ucred ucred;
+ int v, r;
+ dev_t display_ctty;
+
+ assert(display);
+ assert(vtnr);
+
+ /* We deduce the X11 socket from the display name, then use
+ * SO_PEERCRED to determine the X11 server process, ask for
+ * the controlling tty of that and if it's a VC then we know
+ * the seat and the virtual terminal. Sounds ugly, is only
+ * semi-ugly. */
+
+ r = socket_from_display(display, &p);
+ if (r < 0)
+ return r;
+ r = sockaddr_un_set_path(&sa.un, p);
+ if (r < 0)
+ return r;
+ sa_len = r;
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ return -errno;
+
+ if (connect(fd, &sa.sa, sa_len) < 0)
+ return -errno;
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ r = get_ctty_devnr(ucred.pid, &display_ctty);
+ if (r < 0)
+ return r;
+
+ if (asprintf(&sys_path, "/sys/dev/char/%d:%d", major(display_ctty), minor(display_ctty)) < 0)
+ return -ENOMEM;
+ r = readlink_value(sys_path, &tty);
+ if (r < 0)
+ return r;
+
+ v = vtnr_from_tty(tty);
+ if (v < 0)
+ return v;
+ else if (v == 0)
+ return -ENOENT;
+
+ if (seat)
+ *seat = "seat0";
+ *vtnr = (uint32_t) v;
+
+ return 0;
+}
+
+static int export_legacy_dbus_address(
+ pam_handle_t *handle,
+ const char *runtime) {
+
+ const char *s;
+ _cleanup_free_ char *t = NULL;
+ int r = PAM_BUF_ERR;
+
+ /* We need to export $DBUS_SESSION_BUS_ADDRESS because various applications will not connect
+ * correctly to the bus without it. This setting matches what dbus.socket does for the user
+ * session using 'systemctl --user set-environment'. We want to have the same configuration
+ * in processes started from the PAM session.
+ *
+ * The setting of the address is guarded by the access() check because it is also possible to compile
+ * dbus without --enable-user-session, in which case this socket is not used, and
+ * $DBUS_SESSION_BUS_ADDRESS should not be set. An alternative approach would to not do the access()
+ * check here, and let applications try on their own, by using "unix:path=%s/bus;autolaunch:". But we
+ * expect the socket to be present by the time we do this check, so we can just as well check once
+ * here. */
+
+ s = strjoina(runtime, "/bus");
+ if (access(s, F_OK) < 0)
+ return PAM_SUCCESS;
+
+ if (asprintf(&t, DEFAULT_USER_BUS_ADDRESS_FMT, runtime) < 0)
+ return pam_log_oom(handle);
+
+ r = pam_misc_setenv(handle, "DBUS_SESSION_BUS_ADDRESS", t, 0);
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to set bus variable: %s", pam_strerror(handle, r));
+ return r;
+ }
+
+ return PAM_SUCCESS;
+}
+
+static int append_session_memory_max(pam_handle_t *handle, sd_bus_message *m, const char *limit) {
+ uint64_t val;
+ int r;
+
+ if (isempty(limit))
+ return PAM_SUCCESS;
+
+ if (streq(limit, "infinity")) {
+ r = sd_bus_message_append(m, "(sv)", "MemoryMax", "t", (uint64_t)-1);
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+
+ return PAM_SUCCESS;
+ }
+
+ r = parse_permille(limit);
+ if (r >= 0) {
+ r = sd_bus_message_append(m, "(sv)", "MemoryMaxScale", "u", (uint32_t) (((uint64_t) r * UINT32_MAX) / 1000U));
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+
+ return PAM_SUCCESS;
+ }
+
+ r = parse_size(limit, 1024, &val);
+ if (r >= 0) {
+ r = sd_bus_message_append(m, "(sv)", "MemoryMax", "t", val);
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+
+ return PAM_SUCCESS;
+ }
+
+ pam_syslog(handle, LOG_WARNING, "Failed to parse systemd.memory_max, ignoring: %s", limit);
+ return PAM_SUCCESS;
+}
+
+static int append_session_runtime_max_sec(pam_handle_t *handle, sd_bus_message *m, const char *limit) {
+ usec_t val;
+ int r;
+
+ /* No need to parse "infinity" here, it will be set by default later in scope_init() */
+ if (isempty(limit) || streq(limit, "infinity"))
+ return PAM_SUCCESS;
+
+ r = parse_sec(limit, &val);
+ if (r >= 0) {
+ r = sd_bus_message_append(m, "(sv)", "RuntimeMaxUSec", "t", (uint64_t) val);
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+ } else
+ pam_syslog(handle, LOG_WARNING, "Failed to parse systemd.runtime_max_sec: %s, ignoring.", limit);
+
+ return PAM_SUCCESS;
+}
+
+static int append_session_tasks_max(pam_handle_t *handle, sd_bus_message *m, const char *limit) {
+ uint64_t val;
+ int r;
+
+ /* No need to parse "infinity" here, it will be set unconditionally later in manager_start_scope() */
+ if (isempty(limit) || streq(limit, "infinity"))
+ return PAM_SUCCESS;
+
+ r = safe_atou64(limit, &val);
+ if (r >= 0) {
+ r = sd_bus_message_append(m, "(sv)", "TasksMax", "t", val);
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+ } else
+ pam_syslog(handle, LOG_WARNING, "Failed to parse systemd.tasks_max, ignoring: %s", limit);
+
+ return PAM_SUCCESS;
+}
+
+static int append_session_cg_weight(pam_handle_t *handle, sd_bus_message *m, const char *limit, const char *field) {
+ uint64_t val;
+ int r;
+
+ if (isempty(limit))
+ return PAM_SUCCESS;
+
+ r = cg_weight_parse(limit, &val);
+ if (r >= 0) {
+ r = sd_bus_message_append(m, "(sv)", field, "t", val);
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+ } else if (streq(field, "CPUWeight"))
+ pam_syslog(handle, LOG_WARNING, "Failed to parse systemd.cpu_weight, ignoring: %s", limit);
+ else
+ pam_syslog(handle, LOG_WARNING, "Failed to parse systemd.io_weight, ignoring: %s", limit);
+
+ return PAM_SUCCESS;
+}
+
+static const char* getenv_harder(pam_handle_t *handle, const char *key, const char *fallback) {
+ const char *v;
+
+ assert(handle);
+ assert(key);
+
+ /* Looks for an environment variable, preferably in the environment block associated with the
+ * specified PAM handle, falling back to the process' block instead. Why check both? Because we want
+ * to permit configuration of session properties from unit files that invoke PAM services, so that
+ * PAM services don't have to be reworked to set systemd-specific properties, but these properties
+ * can still be set from the unit file Environment= block. */
+
+ v = pam_getenv(handle, key);
+ if (!isempty(v))
+ return v;
+
+ /* We use secure_getenv() here, since we might get loaded into su/sudo, which are SUID. Ideally
+ * they'd clean up the environment before invoking foreign code (such as PAM modules), but alas they
+ * currently don't (to be precise, they clean up the environment they pass to their children, but
+ * not their own environ[]). */
+ v = secure_getenv(key);
+ if (!isempty(v))
+ return v;
+
+ return fallback;
+}
+
+static int update_environment(pam_handle_t *handle, const char *key, const char *value) {
+ int r;
+
+ assert(handle);
+ assert(key);
+
+ /* Updates the environment, but only if there's actually a value set. Also, log about errors */
+
+ if (isempty(value))
+ return PAM_SUCCESS;
+
+ r = pam_misc_setenv(handle, key, value, 0);
+ if (r != PAM_SUCCESS)
+ pam_syslog(handle, LOG_ERR, "Failed to set environment variable %s: %s", key, pam_strerror(handle, r));
+
+ return r;
+}
+
+static bool validate_runtime_directory(pam_handle_t *handle, const char *path, uid_t uid) {
+ struct stat st;
+
+ assert(handle);
+ assert(path);
+
+ /* Some extra paranoia: let's not set $XDG_RUNTIME_DIR if the directory we'd set it to isn't actually
+ * set up properly for us. This is supposed to provide a careful safety net for supporting su/sudo
+ * type transitions: in that case the UID changes, but the session and thus the user owning it
+ * doesn't change. Since the $XDG_RUNTIME_DIR lifecycle is bound to the session's user being logged
+ * in at least once we should be particularly careful when setting the environment variable, since
+ * otherwise we might end up setting $XDG_RUNTIME_DIR to some directory owned by the wrong user. */
+
+ if (!path_is_absolute(path)) {
+ pam_syslog(handle, LOG_ERR, "Provided runtime directory '%s' is not absolute.", path);
+ goto fail;
+ }
+
+ if (lstat(path, &st) < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to stat() runtime directory '%s': %s", path, strerror_safe(errno));
+ goto fail;
+ }
+
+ if (!S_ISDIR(st.st_mode)) {
+ pam_syslog(handle, LOG_ERR, "Runtime directory '%s' is not actually a directory.", path);
+ goto fail;
+ }
+
+ if (st.st_uid != uid) {
+ pam_syslog(handle, LOG_ERR, "Runtime directory '%s' is not owned by UID " UID_FMT ", as it should.", path, uid);
+ goto fail;
+ }
+
+ return true;
+
+fail:
+ pam_syslog(handle, LOG_WARNING, "Not setting $XDG_RUNTIME_DIR, as the directory is not in order.");
+ return false;
+}
+
+static int pam_putenv_and_log(pam_handle_t *handle, const char *e, bool debug) {
+ int r;
+
+ assert(handle);
+ assert(e);
+
+ r = pam_putenv(handle, e);
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to set PAM environment variable %s: %s", e, pam_strerror(handle, r));
+ return r;
+ }
+
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "PAM environment variable %s set based on user record.", e);
+
+ return PAM_SUCCESS;
+}
+
+static int apply_user_record_settings(pam_handle_t *handle, UserRecord *ur, bool debug) {
+ char **i;
+ int r;
+
+ assert(handle);
+ assert(ur);
+
+ if (ur->umask != MODE_INVALID) {
+ umask(ur->umask);
+
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "Set user umask to %04o based on user record.", ur->umask);
+ }
+
+ STRV_FOREACH(i, ur->environment) {
+ _cleanup_free_ char *n = NULL;
+ const char *e;
+
+ assert_se(e = strchr(*i, '=')); /* environment was already validated while parsing JSON record, this thus must hold */
+
+ n = strndup(*i, e - *i);
+ if (!n)
+ return pam_log_oom(handle);
+
+ if (pam_getenv(handle, n)) {
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "PAM environment variable $%s already set, not changing based on record.", *i);
+ continue;
+ }
+
+ r = pam_putenv_and_log(handle, *i, debug);
+ if (r != PAM_SUCCESS)
+ return r;
+ }
+
+ if (ur->email_address) {
+ if (pam_getenv(handle, "EMAIL")) {
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "PAM environment variable $EMAIL already set, not changing based on user record.");
+ } else {
+ _cleanup_free_ char *joined = NULL;
+
+ joined = strjoin("EMAIL=", ur->email_address);
+ if (!joined)
+ return pam_log_oom(handle);
+
+ r = pam_putenv_and_log(handle, joined, debug);
+ if (r != PAM_SUCCESS)
+ return r;
+ }
+ }
+
+ if (ur->time_zone) {
+ if (pam_getenv(handle, "TZ")) {
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "PAM environment variable $TZ already set, not changing based on user record.");
+ } else if (!timezone_is_valid(ur->time_zone, LOG_DEBUG)) {
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "Time zone specified in user record is not valid locally, not setting $TZ.");
+ } else {
+ _cleanup_free_ char *joined = NULL;
+
+ joined = strjoin("TZ=:", ur->time_zone);
+ if (!joined)
+ return pam_log_oom(handle);
+
+ r = pam_putenv_and_log(handle, joined, debug);
+ if (r != PAM_SUCCESS)
+ return r;
+ }
+ }
+
+ if (ur->preferred_language) {
+ if (pam_getenv(handle, "LANG")) {
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "PAM environment variable $LANG already set, not changing based on user record.");
+ } else if (locale_is_installed(ur->preferred_language) <= 0) {
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "Preferred language specified in user record is not valid or not installed, not setting $LANG.");
+ } else {
+ _cleanup_free_ char *joined = NULL;
+
+ joined = strjoin("LANG=", ur->preferred_language);
+ if (!joined)
+ return pam_log_oom(handle);
+
+ r = pam_putenv_and_log(handle, joined, debug);
+ if (r != PAM_SUCCESS)
+ return r;
+ }
+ }
+
+ if (nice_is_valid(ur->nice_level)) {
+ if (nice(ur->nice_level) < 0)
+ pam_syslog(handle, LOG_ERR, "Failed to set nice level to %i, ignoring: %s", ur->nice_level, strerror_safe(errno));
+ else if (debug)
+ pam_syslog(handle, LOG_DEBUG, "Nice level set, based on user record.");
+ }
+
+ for (int rl = 0; rl < _RLIMIT_MAX; rl++) {
+
+ if (!ur->rlimits[rl])
+ continue;
+
+ r = setrlimit_closest(rl, ur->rlimits[rl]);
+ if (r < 0)
+ pam_syslog(handle, LOG_ERR, "Failed to set resource limit %s, ignoring: %s", rlimit_to_string(rl), strerror_safe(r));
+ else if (debug)
+ pam_syslog(handle, LOG_DEBUG, "Resource limit %s set, based on user record.", rlimit_to_string(rl));
+ }
+
+ return PAM_SUCCESS;
+}
+
+static int configure_runtime_directory(
+ pam_handle_t *handle,
+ UserRecord *ur,
+ const char *rt) {
+
+ int r;
+
+ assert(handle);
+ assert(ur);
+ assert(rt);
+
+ if (!validate_runtime_directory(handle, rt, ur->uid))
+ return PAM_SUCCESS;
+
+ r = pam_misc_setenv(handle, "XDG_RUNTIME_DIR", rt, 0);
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to set runtime dir: %s", pam_strerror(handle, r));
+ return r;
+ }
+
+ return export_legacy_dbus_address(handle, rt);
+}
+
+_public_ PAM_EXTERN int pam_sm_open_session(
+ pam_handle_t *handle,
+ int flags,
+ int argc, const char **argv) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ const char
+ *id, *object_path, *runtime_path,
+ *service = NULL,
+ *tty = NULL, *display = NULL,
+ *remote_user = NULL, *remote_host = NULL,
+ *seat = NULL,
+ *type = NULL, *class = NULL,
+ *class_pam = NULL, *type_pam = NULL, *cvtnr = NULL, *desktop = NULL, *desktop_pam = NULL,
+ *memory_max = NULL, *tasks_max = NULL, *cpu_weight = NULL, *io_weight = NULL, *runtime_max_sec = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *ur = NULL;
+ int session_fd = -1, existing, r;
+ bool debug = false, remote;
+ uint32_t vtnr = 0;
+ uid_t original_uid;
+
+ assert(handle);
+
+ if (parse_argv(handle,
+ argc, argv,
+ &class_pam,
+ &type_pam,
+ &desktop_pam,
+ &debug) < 0)
+ return PAM_SESSION_ERR;
+
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "pam-systemd initializing");
+
+ r = acquire_user_record(handle, &ur);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ /* Make most of this a NOP on non-logind systems */
+ if (!logind_running())
+ goto success;
+
+ /* Make sure we don't enter a loop by talking to
+ * systemd-logind when it is actually waiting for the
+ * background to finish start-up. If the service is
+ * "systemd-user" we simply set XDG_RUNTIME_DIR and
+ * leave. */
+
+ (void) pam_get_item(handle, PAM_SERVICE, (const void**) &service);
+ if (streq_ptr(service, "systemd-user")) {
+ char rt[STRLEN("/run/user/") + DECIMAL_STR_MAX(uid_t)];
+
+ xsprintf(rt, "/run/user/"UID_FMT, ur->uid);
+ r = configure_runtime_directory(handle, ur, rt);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ goto success;
+ }
+
+ /* Otherwise, we ask logind to create a session for us */
+
+ (void) pam_get_item(handle, PAM_XDISPLAY, (const void**) &display);
+ (void) pam_get_item(handle, PAM_TTY, (const void**) &tty);
+ (void) pam_get_item(handle, PAM_RUSER, (const void**) &remote_user);
+ (void) pam_get_item(handle, PAM_RHOST, (const void**) &remote_host);
+
+ seat = getenv_harder(handle, "XDG_SEAT", NULL);
+ cvtnr = getenv_harder(handle, "XDG_VTNR", NULL);
+ type = getenv_harder(handle, "XDG_SESSION_TYPE", type_pam);
+ class = getenv_harder(handle, "XDG_SESSION_CLASS", class_pam);
+ desktop = getenv_harder(handle, "XDG_SESSION_DESKTOP", desktop_pam);
+
+ tty = strempty(tty);
+
+ if (strchr(tty, ':')) {
+ /* A tty with a colon is usually an X11 display, placed there to show up in utmp. We rearrange things
+ * and don't pretend that an X display was a tty. */
+ if (isempty(display))
+ display = tty;
+ tty = NULL;
+
+ } else if (streq(tty, "cron")) {
+ /* cron is setting PAM_TTY to "cron" for some reason (the commit carries no information why, but
+ * probably because it wants to set it to something as pam_time/pam_access/… require PAM_TTY to be set
+ * (as they otherwise even try to update it!) — but cron doesn't actually allocate a TTY for its forked
+ * off processes.) */
+ type = "unspecified";
+ class = "background";
+ tty = NULL;
+
+ } else if (streq(tty, "ssh")) {
+ /* ssh has been setting PAM_TTY to "ssh" (for the same reason as cron does this, see above. For further
+ * details look for "PAM_TTY_KLUDGE" in the openssh sources). */
+ type ="tty";
+ class = "user";
+ tty = NULL; /* This one is particularly sad, as this means that ssh sessions — even though usually
+ * associated with a pty — won't be tracked by their tty in logind. This is because ssh
+ * does the PAM session registration early for new connections, and registers a pty only
+ * much later (this is because it doesn't know yet if it needs one at all, as whether to
+ * register a pty or not is negotiated much later in the protocol). */
+
+ } else
+ /* Chop off leading /dev prefix that some clients specify, but others do not. */
+ tty = skip_dev_prefix(tty);
+
+ /* If this fails vtnr will be 0, that's intended */
+ if (!isempty(cvtnr))
+ (void) safe_atou32(cvtnr, &vtnr);
+
+ if (!isempty(display) && !vtnr) {
+ if (isempty(seat))
+ (void) get_seat_from_display(display, &seat, &vtnr);
+ else if (streq(seat, "seat0"))
+ (void) get_seat_from_display(display, NULL, &vtnr);
+ }
+
+ if (seat && !streq(seat, "seat0") && vtnr != 0) {
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "Ignoring vtnr %"PRIu32" for %s which is not seat0", vtnr, seat);
+ vtnr = 0;
+ }
+
+ if (isempty(type))
+ type = !isempty(display) ? "x11" :
+ !isempty(tty) ? "tty" : "unspecified";
+
+ if (isempty(class))
+ class = streq(type, "unspecified") ? "background" : "user";
+
+ remote = !isempty(remote_host) && !is_localhost(remote_host);
+
+ (void) pam_get_data(handle, "systemd.memory_max", (const void **)&memory_max);
+ (void) pam_get_data(handle, "systemd.tasks_max", (const void **)&tasks_max);
+ (void) pam_get_data(handle, "systemd.cpu_weight", (const void **)&cpu_weight);
+ (void) pam_get_data(handle, "systemd.io_weight", (const void **)&io_weight);
+ (void) pam_get_data(handle, "systemd.runtime_max_sec", (const void **)&runtime_max_sec);
+
+ /* Talk to logind over the message bus */
+
+ r = pam_acquire_bus_connection(handle, &bus);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ if (debug) {
+ pam_syslog(handle, LOG_DEBUG, "Asking logind to create session: "
+ "uid="UID_FMT" pid="PID_FMT" service=%s type=%s class=%s desktop=%s seat=%s vtnr=%"PRIu32" tty=%s display=%s remote=%s remote_user=%s remote_host=%s",
+ ur->uid, getpid_cached(),
+ strempty(service),
+ type, class, strempty(desktop),
+ strempty(seat), vtnr, strempty(tty), strempty(display),
+ yes_no(remote), strempty(remote_user), strempty(remote_host));
+ pam_syslog(handle, LOG_DEBUG, "Session limits: "
+ "memory_max=%s tasks_max=%s cpu_weight=%s io_weight=%s runtime_max_sec=%s",
+ strna(memory_max), strna(tasks_max), strna(cpu_weight), strna(io_weight), strna(runtime_max_sec));
+ }
+
+ r = bus_message_new_method_call(bus, &m, bus_login_mgr, "CreateSession");
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+
+ r = sd_bus_message_append(m, "uusssssussbss",
+ (uint32_t) ur->uid,
+ 0,
+ service,
+ type,
+ class,
+ desktop,
+ seat,
+ vtnr,
+ tty,
+ display,
+ remote,
+ remote_user,
+ remote_host);
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+
+ r = append_session_memory_max(handle, m, memory_max);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = append_session_runtime_max_sec(handle, m, runtime_max_sec);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = append_session_tasks_max(handle, m, tasks_max);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = append_session_cg_weight(handle, m, cpu_weight, "CPUWeight");
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = append_session_cg_weight(handle, m, io_weight, "IOWeight");
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return pam_bus_log_create_error(handle, r);
+
+ r = sd_bus_call(bus, m, LOGIN_SLOW_BUS_CALL_TIMEOUT_USEC, &error, &reply);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_SESSION_BUSY)) {
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "Not creating session: %s", bus_error_message(&error, r));
+
+ /* We are already in a session, don't do anything */
+ goto success;
+ } else {
+ pam_syslog(handle, LOG_ERR, "Failed to create session: %s", bus_error_message(&error, r));
+ return PAM_SESSION_ERR;
+ }
+ }
+
+ r = sd_bus_message_read(reply,
+ "soshusub",
+ &id,
+ &object_path,
+ &runtime_path,
+ &session_fd,
+ &original_uid,
+ &seat,
+ &vtnr,
+ &existing);
+ if (r < 0)
+ return pam_bus_log_parse_error(handle, r);
+
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "Reply from logind: "
+ "id=%s object_path=%s runtime_path=%s session_fd=%d seat=%s vtnr=%u original_uid=%u",
+ id, object_path, runtime_path, session_fd, seat, vtnr, original_uid);
+
+ r = update_environment(handle, "XDG_SESSION_ID", id);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ if (original_uid == ur->uid) {
+ /* Don't set $XDG_RUNTIME_DIR if the user we now authenticated for does not match the
+ * original user of the session. We do this in order not to result in privileged apps
+ * clobbering the runtime directory unnecessarily. */
+
+ r = configure_runtime_directory(handle, ur, runtime_path);
+ if (r != PAM_SUCCESS)
+ return r;
+ }
+
+ /* Most likely we got the session/type/class from environment variables, but might have gotten the data
+ * somewhere else (for example PAM module parameters). Let's now update the environment variables, so that this
+ * data is inherited into the session processes, and programs can rely on them to be initialized. */
+
+ r = update_environment(handle, "XDG_SESSION_TYPE", type);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = update_environment(handle, "XDG_SESSION_CLASS", class);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = update_environment(handle, "XDG_SESSION_DESKTOP", desktop);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = update_environment(handle, "XDG_SEAT", seat);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ if (vtnr > 0) {
+ char buf[DECIMAL_STR_MAX(vtnr)];
+ sprintf(buf, "%u", vtnr);
+
+ r = update_environment(handle, "XDG_VTNR", buf);
+ if (r != PAM_SUCCESS)
+ return r;
+ }
+
+ r = pam_set_data(handle, "systemd.existing", INT_TO_PTR(!!existing), NULL);
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to install existing flag: %s", pam_strerror(handle, r));
+ return r;
+ }
+
+ if (session_fd >= 0) {
+ session_fd = fcntl(session_fd, F_DUPFD_CLOEXEC, 3);
+ if (session_fd < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to dup session fd: %m");
+ return PAM_SESSION_ERR;
+ }
+
+ r = pam_set_data(handle, "systemd.session-fd", FD_TO_PTR(session_fd), NULL);
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to install session fd: %s", pam_strerror(handle, r));
+ safe_close(session_fd);
+ return r;
+ }
+ }
+
+success:
+ r = apply_user_record_settings(handle, ur, debug);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ /* Let's release the D-Bus connection, after all the session might live quite a long time, and we are
+ * not going to use the bus connection in that time, so let's better close before the daemon kicks us
+ * off because we are not processing anything. */
+ (void) pam_release_bus_connection(handle);
+ return PAM_SUCCESS;
+}
+
+_public_ PAM_EXTERN int pam_sm_close_session(
+ pam_handle_t *handle,
+ int flags,
+ int argc, const char **argv) {
+
+ const void *existing = NULL;
+ bool debug = false;
+ const char *id;
+ int r;
+
+ assert(handle);
+
+ if (parse_argv(handle,
+ argc, argv,
+ NULL,
+ NULL,
+ NULL,
+ &debug) < 0)
+ return PAM_SESSION_ERR;
+
+ if (debug)
+ pam_syslog(handle, LOG_DEBUG, "pam-systemd shutting down");
+
+ /* Only release session if it wasn't pre-existing when we
+ * tried to create it */
+ (void) pam_get_data(handle, "systemd.existing", &existing);
+
+ id = pam_getenv(handle, "XDG_SESSION_ID");
+ if (id && !existing) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+
+ /* Before we go and close the FIFO we need to tell logind that this is a clean session
+ * shutdown, so that it doesn't just go and slaughter us immediately after closing the fd */
+
+ r = pam_acquire_bus_connection(handle, &bus);
+ if (r != PAM_SUCCESS)
+ return r;
+
+ r = bus_call_method(bus, bus_login_mgr, "ReleaseSession", &error, NULL, "s", id);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to release session: %s", bus_error_message(&error, r));
+ return PAM_SESSION_ERR;
+ }
+ }
+
+ /* Note that we are knowingly leaking the FIFO fd here. This way, logind can watch us die. If we
+ * closed it here it would not have any clue when that is completed. Given that one cannot really
+ * have multiple PAM sessions open from the same process this means we will leak one FD at max. */
+
+ return PAM_SUCCESS;
+}
diff --git a/src/login/pam_systemd.sym b/src/login/pam_systemd.sym
new file mode 100644
index 0000000..130cf6a
--- /dev/null
+++ b/src/login/pam_systemd.sym
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+{
+global:
+ pam_sm_close_session;
+ pam_sm_open_session;
+local: *;
+};
diff --git a/src/login/sysfs-show.c b/src/login/sysfs-show.c
new file mode 100644
index 0000000..5a19dbf
--- /dev/null
+++ b/src/login/sysfs-show.c
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-enumerator-private.h"
+#include "locale-util.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "sysfs-show.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static int show_sysfs_one(
+ const char *seat,
+ sd_device **dev_list,
+ size_t *i_dev,
+ size_t n_dev,
+ const char *sub,
+ const char *prefix,
+ unsigned n_columns,
+ OutputFlags flags) {
+
+ size_t max_width;
+ int r;
+
+ assert(seat);
+ assert(dev_list);
+ assert(i_dev);
+ assert(prefix);
+
+ if (flags & OUTPUT_FULL_WIDTH)
+ max_width = (size_t) -1;
+ else if (n_columns < 10)
+ max_width = 10;
+ else
+ max_width = n_columns;
+
+ while (*i_dev < n_dev) {
+ const char *sysfs, *sn, *name = NULL, *subsystem, *sysname;
+ _cleanup_free_ char *k = NULL, *l = NULL;
+ size_t lookahead;
+ bool is_master;
+
+ if (sd_device_get_syspath(dev_list[*i_dev], &sysfs) < 0 ||
+ !path_startswith(sysfs, sub))
+ return 0;
+
+ if (sd_device_get_property_value(dev_list[*i_dev], "ID_SEAT", &sn) < 0 || isempty(sn))
+ sn = "seat0";
+
+ /* Explicitly also check for tag 'seat' here */
+ if (!streq(seat, sn) ||
+ sd_device_has_current_tag(dev_list[*i_dev], "seat") <= 0 ||
+ sd_device_get_subsystem(dev_list[*i_dev], &subsystem) < 0 ||
+ sd_device_get_sysname(dev_list[*i_dev], &sysname) < 0) {
+ (*i_dev)++;
+ continue;
+ }
+
+ is_master = sd_device_has_current_tag(dev_list[*i_dev], "master-of-seat") > 0;
+
+ if (sd_device_get_sysattr_value(dev_list[*i_dev], "name", &name) < 0)
+ (void) sd_device_get_sysattr_value(dev_list[*i_dev], "id", &name);
+
+ /* Look if there's more coming after this */
+ for (lookahead = *i_dev + 1; lookahead < n_dev; lookahead++) {
+ const char *lookahead_sysfs;
+
+ if (sd_device_get_syspath(dev_list[lookahead], &lookahead_sysfs) < 0)
+ continue;
+
+ if (path_startswith(lookahead_sysfs, sub) &&
+ !path_startswith(lookahead_sysfs, sysfs)) {
+ const char *lookahead_sn;
+
+ if (sd_device_get_property_value(dev_list[lookahead], "ID_SEAT", &lookahead_sn) < 0 ||
+ isempty(lookahead_sn))
+ lookahead_sn = "seat0";
+
+ if (streq(seat, lookahead_sn) && sd_device_has_current_tag(dev_list[lookahead], "seat") > 0)
+ break;
+ }
+ }
+
+ k = ellipsize(sysfs, max_width, 20);
+ if (!k)
+ return -ENOMEM;
+
+ printf("%s%s%s\n", prefix, special_glyph(lookahead < n_dev ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT), k);
+
+ if (asprintf(&l,
+ "%s%s:%s%s%s%s",
+ is_master ? "[MASTER] " : "",
+ subsystem, sysname,
+ name ? " \"" : "", strempty(name), name ? "\"" : "") < 0)
+ return -ENOMEM;
+
+ free(k);
+ k = ellipsize(l, max_width, 70);
+ if (!k)
+ return -ENOMEM;
+
+ printf("%s%s%s\n", prefix, lookahead < n_dev ? special_glyph(SPECIAL_GLYPH_TREE_VERTICAL) : " ", k);
+
+ if (++(*i_dev) < n_dev) {
+ _cleanup_free_ char *p = NULL;
+
+ p = strjoin(prefix, lookahead < n_dev ? special_glyph(SPECIAL_GLYPH_TREE_VERTICAL) : " ");
+ if (!p)
+ return -ENOMEM;
+
+ r = show_sysfs_one(seat, dev_list, i_dev, n_dev, sysfs, p,
+ n_columns == (unsigned) -1 || n_columns < 2 ? n_columns : n_columns - 2,
+ flags);
+ if (r < 0)
+ return r;
+ }
+
+ }
+
+ return 0;
+}
+
+int show_sysfs(const char *seat, const char *prefix, unsigned n_columns, OutputFlags flags) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ size_t n_dev = 0, i = 0;
+ sd_device **dev_list;
+ int r;
+
+ if (n_columns <= 0)
+ n_columns = columns();
+
+ prefix = strempty(prefix);
+
+ if (isempty(seat))
+ seat = "seat0";
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_tag(e, streq(seat, "seat0") ? "seat" : seat);
+ if (r < 0)
+ return r;
+
+ r = device_enumerator_scan_devices(e);
+ if (r < 0)
+ return r;
+
+ dev_list = device_enumerator_get_devices(e, &n_dev);
+
+ if (dev_list && n_dev > 0)
+ show_sysfs_one(seat, dev_list, &i, n_dev, "/", prefix, n_columns, flags);
+ else
+ printf("%s%s%s\n", prefix, special_glyph(SPECIAL_GLYPH_TREE_RIGHT), "(none)");
+
+ return 0;
+}
diff --git a/src/login/sysfs-show.h b/src/login/sysfs-show.h
new file mode 100644
index 0000000..32ccbf3
--- /dev/null
+++ b/src/login/sysfs-show.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+#include "output-mode.h"
+
+int show_sysfs(const char *seat, const char *prefix, unsigned columns, OutputFlags flags);
diff --git a/src/login/systemd-user.m4 b/src/login/systemd-user.m4
new file mode 100644
index 0000000..f6313f7
--- /dev/null
+++ b/src/login/systemd-user.m4
@@ -0,0 +1,20 @@
+# This file is part of systemd.
+#
+# Used by systemd --user instances.
+
+m4_ifdef(`ENABLE_HOMED',
+-account sufficient pam_systemd_home.so
+)m4_dnl
+account sufficient pam_unix.so
+account required pam_permit.so
+
+m4_ifdef(`HAVE_SELINUX',
+session required pam_selinux.so close
+session required pam_selinux.so nottys open
+)m4_dnl
+session required pam_loginuid.so
+session optional pam_keyinit.so force revoke
+m4_ifdef(`ENABLE_HOMED',
+-session optional pam_systemd_home.so
+)m4_dnl
+session optional pam_systemd.so
diff --git a/src/login/test-inhibit.c b/src/login/test-inhibit.c
new file mode 100644
index 0000000..dbe79c7
--- /dev/null
+++ b/src/login/test-inhibit.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "sd-bus.h"
+
+#include "bus-util.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "util.h"
+
+static int inhibit(sd_bus *bus, const char *what) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *who = "Test Tool", *reason = "Just because!", *mode = "block";
+ int fd;
+ int r;
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "Inhibit",
+ &error,
+ &reply,
+ "ssss", what, who, reason, mode);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read_basic(reply, SD_BUS_TYPE_UNIX_FD, &fd);
+ assert_se(r >= 0);
+ assert_se(fd >= 0);
+
+ return fcntl(fd, F_DUPFD_CLOEXEC, 3);
+}
+
+static void print_inhibitors(sd_bus *bus) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *what, *who, *why, *mode;
+ uint32_t uid, pid;
+ unsigned n = 0;
+ int r;
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "ListInhibitors",
+ &error,
+ &reply,
+ "");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssssuu)");
+ assert_se(r >= 0);
+
+ while ((r = sd_bus_message_read(reply, "(ssssuu)", &what, &who, &why, &mode, &uid, &pid)) > 0) {
+ printf("what=<%s> who=<%s> why=<%s> mode=<%s> uid=<%"PRIu32"> pid=<%"PRIu32">\n",
+ what, who, why, mode, uid, pid);
+
+ n++;
+ }
+ assert_se(r >= 0);
+
+ printf("%u inhibitors\n", n);
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ int fd1, fd2;
+ int r;
+
+ r = sd_bus_open_system(&bus);
+ assert_se(r >= 0);
+
+ print_inhibitors(bus);
+
+ fd1 = inhibit(bus, "sleep");
+ assert_se(fd1 >= 0);
+ print_inhibitors(bus);
+
+ fd2 = inhibit(bus, "idle:shutdown");
+ assert_se(fd2 >= 0);
+ print_inhibitors(bus);
+
+ safe_close(fd1);
+ sleep(1);
+ print_inhibitors(bus);
+
+ safe_close(fd2);
+ sleep(1);
+ print_inhibitors(bus);
+
+ return 0;
+}
diff --git a/src/login/test-login-shared.c b/src/login/test-login-shared.c
new file mode 100644
index 0000000..d3de9c4
--- /dev/null
+++ b/src/login/test-login-shared.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "login-util.h"
+#include "macro.h"
+
+static void test_session_id_valid(void) {
+ assert_se(session_id_valid("c1"));
+ assert_se(session_id_valid("1234"));
+
+ assert_se(!session_id_valid("1-2"));
+ assert_se(!session_id_valid(""));
+ assert_se(!session_id_valid("\tid"));
+}
+
+int main(int argc, char* argv[]) {
+ log_parse_environment();
+ log_open();
+
+ test_session_id_valid();
+
+ return 0;
+}
diff --git a/src/login/test-login-tables.c b/src/login/test-login-tables.c
new file mode 100644
index 0000000..9d16685
--- /dev/null
+++ b/src/login/test-login-tables.c
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "logind-action.h"
+#include "logind-session.h"
+#include "test-tables.h"
+
+int main(int argc, char **argv) {
+ test_table(handle_action, HANDLE_ACTION);
+ test_table(inhibit_mode, INHIBIT_MODE);
+ test_table(kill_who, KILL_WHO);
+ test_table(session_class, SESSION_CLASS);
+ test_table(session_state, SESSION_STATE);
+ test_table(session_type, SESSION_TYPE);
+ test_table(user_state, USER_STATE);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/login/user-runtime-dir.c b/src/login/user-runtime-dir.c
new file mode 100644
index 0000000..9b5bdcc
--- /dev/null
+++ b/src/login/user-runtime-dir.c
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdint.h>
+#include <sys/mount.h>
+
+#include "sd-bus.h"
+
+#include "bus-error.h"
+#include "dev-setup.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "label.h"
+#include "limits-util.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "selinux-util.h"
+#include "smack-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int acquire_runtime_dir_properties(uint64_t *size, uint64_t *inodes) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to system bus: %m");
+
+ r = sd_bus_get_property_trivial(bus, "org.freedesktop.login1", "/org/freedesktop/login1", "org.freedesktop.login1.Manager", "RuntimeDirectorySize", &error, 't', size);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to acquire runtime directory size, ignoring: %s", bus_error_message(&error, r));
+ *size = physical_memory_scale(10U, 100U); /* 10% */
+ }
+
+ r = sd_bus_get_property_trivial(bus, "org.freedesktop.login1", "/org/freedesktop/login1", "org.freedesktop.login1.Manager", "RuntimeDirectoryInodesMax", &error, 't', inodes);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to acquire number of inodes for runtime directory, ignoring: %s", bus_error_message(&error, r));
+ *inodes = DIV_ROUND_UP(*size, 4096);
+ }
+
+ return 0;
+}
+
+static int user_mkdir_runtime_path(
+ const char *runtime_path,
+ uid_t uid,
+ gid_t gid,
+ uint64_t runtime_dir_size,
+ uint64_t runtime_dir_inodes) {
+
+ int r;
+
+ assert(runtime_path);
+ assert(path_is_absolute(runtime_path));
+ assert(uid_is_valid(uid));
+ assert(gid_is_valid(gid));
+
+ r = mkdir_safe_label("/run/user", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create /run/user: %m");
+
+ if (path_is_mount_point(runtime_path, NULL, 0) >= 0)
+ log_debug("%s is already a mount point", runtime_path);
+ else {
+ char options[sizeof("mode=0700,uid=,gid=,size=,nr_inodes=,smackfsroot=*")
+ + DECIMAL_STR_MAX(uid_t)
+ + DECIMAL_STR_MAX(gid_t)
+ + DECIMAL_STR_MAX(uint64_t)
+ + DECIMAL_STR_MAX(uint64_t)];
+
+ xsprintf(options,
+ "mode=0700,uid=" UID_FMT ",gid=" GID_FMT ",size=%" PRIu64 ",nr_inodes=%" PRIu64 "%s",
+ uid, gid, runtime_dir_size, runtime_dir_inodes,
+ mac_smack_use() ? ",smackfsroot=*" : "");
+
+ (void) mkdir_label(runtime_path, 0700);
+
+ r = mount_nofollow_verbose(LOG_DEBUG, "tmpfs", runtime_path, "tmpfs", MS_NODEV|MS_NOSUID, options);
+ if (r < 0) {
+ if (!ERRNO_IS_PRIVILEGE(r)) {
+ log_error_errno(r, "Failed to mount per-user tmpfs directory %s: %m", runtime_path);
+ goto fail;
+ }
+
+ log_debug_errno(r,
+ "Failed to mount per-user tmpfs directory %s.\n"
+ "Assuming containerized execution, ignoring: %m", runtime_path);
+
+ r = chmod_and_chown(runtime_path, 0700, uid, gid);
+ if (r < 0) {
+ log_error_errno(r, "Failed to change ownership and mode of \"%s\": %m", runtime_path);
+ goto fail;
+ }
+ }
+
+ r = label_fix(runtime_path, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to fix label of \"%s\", ignoring: %m", runtime_path);
+ }
+
+ return 0;
+
+fail:
+ /* Try to clean up, but ignore errors */
+ (void) rmdir(runtime_path);
+ return r;
+}
+
+static int user_remove_runtime_path(const char *runtime_path) {
+ int r;
+
+ assert(runtime_path);
+ assert(path_is_absolute(runtime_path));
+
+ r = rm_rf(runtime_path, 0);
+ if (r < 0)
+ log_debug_errno(r, "Failed to remove runtime directory %s (before unmounting), ignoring: %m", runtime_path);
+
+ /* Ignore cases where the directory isn't mounted, as that's quite possible, if we lacked the permissions to
+ * mount something */
+ r = umount2(runtime_path, MNT_DETACH);
+ if (r < 0 && !IN_SET(errno, EINVAL, ENOENT))
+ log_debug_errno(errno, "Failed to unmount user runtime directory %s, ignoring: %m", runtime_path);
+
+ r = rm_rf(runtime_path, REMOVE_ROOT);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "Failed to remove runtime directory %s (after unmounting): %m", runtime_path);
+
+ return 0;
+}
+
+static int do_mount(const char *user) {
+ char runtime_path[sizeof("/run/user") + DECIMAL_STR_MAX(uid_t)];
+ uint64_t runtime_dir_size, runtime_dir_inodes;
+ uid_t uid;
+ gid_t gid;
+ int r;
+
+ r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r,
+ r == -ESRCH ? "No such user \"%s\"" :
+ r == -ENOMSG ? "UID \"%s\" is invalid or has an invalid main group"
+ : "Failed to look up user \"%s\": %m",
+ user);
+
+ r = acquire_runtime_dir_properties(&runtime_dir_size, &runtime_dir_inodes);
+ if (r < 0)
+ return r;
+
+ xsprintf(runtime_path, "/run/user/" UID_FMT, uid);
+
+ log_debug("Will mount %s owned by "UID_FMT":"GID_FMT, runtime_path, uid, gid);
+ return user_mkdir_runtime_path(runtime_path, uid, gid, runtime_dir_size, runtime_dir_inodes);
+}
+
+static int do_umount(const char *user) {
+ char runtime_path[sizeof("/run/user") + DECIMAL_STR_MAX(uid_t)];
+ uid_t uid;
+ int r;
+
+ /* The user may be already removed. So, first try to parse the string by parse_uid(),
+ * and if it fails, fall back to get_user_creds().*/
+ if (parse_uid(user, &uid) < 0) {
+ r = get_user_creds(&user, &uid, NULL, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r,
+ r == -ESRCH ? "No such user \"%s\"" :
+ r == -ENOMSG ? "UID \"%s\" is invalid or has an invalid main group"
+ : "Failed to look up user \"%s\": %m",
+ user);
+ }
+
+ xsprintf(runtime_path, "/run/user/" UID_FMT, uid);
+
+ log_debug("Will remove %s", runtime_path);
+ return user_remove_runtime_path(runtime_path);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ if (argc != 3)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program takes two arguments.");
+ if (!STR_IN_SET(argv[1], "start", "stop"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "First argument must be either \"start\" or \"stop\".");
+
+ umask(0022);
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return r;
+
+ if (streq(argv[1], "start"))
+ return do_mount(argv[2]);
+ if (streq(argv[1], "stop"))
+ return do_umount(argv[2]);
+ assert_not_reached("Unknown verb!");
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/machine-id-setup/machine-id-setup-main.c b/src/machine-id-setup/machine-id-setup-main.c
new file mode 100644
index 0000000..c35da05
--- /dev/null
+++ b/src/machine-id-setup/machine-id-setup-main.c
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "id128-util.h"
+#include "log.h"
+#include "machine-id-setup.h"
+#include "main-func.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "util.h"
+
+static char *arg_root = NULL;
+static bool arg_commit = false;
+static bool arg_print = false;
+
+STATIC_DESTRUCTOR_REGISTER(arg_root, freep);
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-machine-id-setup", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "Initialize /etc/machine-id from a random source.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --root=ROOT Filesystem root\n"
+ " --commit Commit transient ID\n"
+ " --print Print used machine ID\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_ROOT,
+ ARG_COMMIT,
+ ARG_PRINT,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "root", required_argument, NULL, ARG_ROOT },
+ { "commit", no_argument, NULL, ARG_COMMIT },
+ { "print", no_argument, NULL, ARG_PRINT },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_ROOT:
+ r = parse_path_argument_and_warn(optarg, true, &arg_root);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_COMMIT:
+ arg_commit = true;
+ break;
+
+ case ARG_PRINT:
+ arg_print = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Extraneous arguments");
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ char buf[SD_ID128_STRING_MAX];
+ sd_id128_t id;
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (arg_commit) {
+ const char *etc_machine_id;
+
+ r = machine_id_commit(arg_root);
+ if (r < 0)
+ return r;
+
+ etc_machine_id = prefix_roota(arg_root, "/etc/machine-id");
+ r = id128_read(etc_machine_id, ID128_PLAIN, &id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read machine ID back: %m");
+ } else {
+ r = machine_id_setup(arg_root, false, SD_ID128_NULL, &id);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_print)
+ puts(sd_id128_to_string(id, buf));
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/machine/image-dbus.c b/src/machine/image-dbus.c
new file mode 100644
index 0000000..c157aaf
--- /dev/null
+++ b/src/machine/image-dbus.c
@@ -0,0 +1,505 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/file.h>
+#include <sys/mount.h>
+
+#include "alloc-util.h"
+#include "bus-get-properties.h"
+#include "bus-label.h"
+#include "bus-polkit.h"
+#include "copy.h"
+#include "dissect-image.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "image-dbus.h"
+#include "io-util.h"
+#include "loop-util.h"
+#include "machine-image.h"
+#include "missing_capability.h"
+#include "mount-util.h"
+#include "process-util.h"
+#include "raw-clone.h"
+#include "strv.h"
+#include "user-util.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, image_type, ImageType);
+
+int bus_image_method_remove(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 };
+ Image *image = userdata;
+ Manager *m = image->userdata;
+ pid_t child;
+ int r;
+
+ assert(message);
+ assert(image);
+
+ if (m->n_operations >= OPERATIONS_MAX)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_LIMITS_EXCEEDED, "Too many ongoing operations.");
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-images",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0)
+ return sd_bus_error_set_errnof(error, errno, "Failed to create pipe: %m");
+
+ r = safe_fork("(sd-imgrm)", FORK_RESET_SIGNALS, &child);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to fork(): %m");
+ if (r == 0) {
+ errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]);
+
+ r = image_remove(image);
+ if (r < 0) {
+ (void) write(errno_pipe_fd[1], &r, sizeof(r));
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
+
+ r = operation_new(m, NULL, child, message, errno_pipe_fd[0], NULL);
+ if (r < 0) {
+ (void) sigkill_wait(child);
+ return r;
+ }
+
+ errno_pipe_fd[0] = -1;
+
+ return 1;
+}
+
+int bus_image_method_rename(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Image *image = userdata;
+ Manager *m = image->userdata;
+ const char *new_name;
+ int r;
+
+ assert(message);
+ assert(image);
+
+ r = sd_bus_message_read(message, "s", &new_name);
+ if (r < 0)
+ return r;
+
+ if (!image_name_is_valid(new_name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Image name '%s' is invalid.", new_name);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-images",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = image_rename(image, new_name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_image_method_clone(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 };
+ Image *image = userdata;
+ Manager *m = image->userdata;
+ const char *new_name;
+ int r, read_only;
+ pid_t child;
+
+ assert(message);
+ assert(image);
+ assert(m);
+
+ if (m->n_operations >= OPERATIONS_MAX)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_LIMITS_EXCEEDED, "Too many ongoing operations.");
+
+ r = sd_bus_message_read(message, "sb", &new_name, &read_only);
+ if (r < 0)
+ return r;
+
+ if (!image_name_is_valid(new_name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Image name '%s' is invalid.", new_name);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-images",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0)
+ return sd_bus_error_set_errnof(error, errno, "Failed to create pipe: %m");
+
+ r = safe_fork("(sd-imgclone)", FORK_RESET_SIGNALS, &child);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to fork(): %m");
+ if (r == 0) {
+ errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]);
+
+ r = image_clone(image, new_name, read_only);
+ if (r < 0) {
+ (void) write(errno_pipe_fd[1], &r, sizeof(r));
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
+
+ r = operation_new(m, NULL, child, message, errno_pipe_fd[0], NULL);
+ if (r < 0) {
+ (void) sigkill_wait(child);
+ return r;
+ }
+
+ errno_pipe_fd[0] = -1;
+
+ return 1;
+}
+
+int bus_image_method_mark_read_only(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Image *image = userdata;
+ Manager *m = image->userdata;
+ int r, read_only;
+
+ assert(message);
+
+ r = sd_bus_message_read(message, "b", &read_only);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-images",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = image_read_only(image, read_only);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_image_method_set_limit(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Image *image = userdata;
+ Manager *m = image->userdata;
+ uint64_t limit;
+ int r;
+
+ assert(message);
+
+ r = sd_bus_message_read(message, "t", &limit);
+ if (r < 0)
+ return r;
+ if (!FILE_SIZE_VALID_OR_INFINITY(limit))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "New limit out of range");
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-images",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = image_set_limit(image, limit);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_image_method_get_hostname(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Image *image = userdata;
+ int r;
+
+ if (!image->metadata_valid) {
+ r = image_read_metadata(image);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to read image metadata: %m");
+ }
+
+ return sd_bus_reply_method_return(message, "s", image->hostname);
+}
+
+int bus_image_method_get_machine_id(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Image *image = userdata;
+ int r;
+
+ if (!image->metadata_valid) {
+ r = image_read_metadata(image);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to read image metadata: %m");
+ }
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ if (sd_id128_is_null(image->machine_id)) /* Add an empty array if the ID is zero */
+ r = sd_bus_message_append(reply, "ay", 0);
+ else
+ r = sd_bus_message_append_array(reply, 'y', image->machine_id.bytes, 16);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+int bus_image_method_get_machine_info(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Image *image = userdata;
+ int r;
+
+ if (!image->metadata_valid) {
+ r = image_read_metadata(image);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to read image metadata: %m");
+ }
+
+ return bus_reply_pair_array(message, image->machine_info);
+}
+
+int bus_image_method_get_os_release(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Image *image = userdata;
+ int r;
+
+ if (!image->metadata_valid) {
+ r = image_read_metadata(image);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to read image metadata: %m");
+ }
+
+ return bus_reply_pair_array(message, image->os_release);
+}
+
+static int image_flush_cache(sd_event_source *s, void *userdata) {
+ Manager *m = userdata;
+
+ assert(s);
+ assert(m);
+
+ hashmap_clear(m->image_cache);
+ return 0;
+}
+
+static int image_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ _cleanup_free_ char *e = NULL;
+ Manager *m = userdata;
+ Image *image = NULL;
+ const char *p;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+
+ p = startswith(path, "/org/freedesktop/machine1/image/");
+ if (!p)
+ return 0;
+
+ e = bus_label_unescape(p);
+ if (!e)
+ return -ENOMEM;
+
+ image = hashmap_get(m->image_cache, e);
+ if (image) {
+ *found = image;
+ return 1;
+ }
+
+ r = hashmap_ensure_allocated(&m->image_cache, &image_hash_ops);
+ if (r < 0)
+ return r;
+
+ if (!m->image_cache_defer_event) {
+ r = sd_event_add_defer(m->event, &m->image_cache_defer_event, image_flush_cache, m);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(m->image_cache_defer_event, SD_EVENT_PRIORITY_IDLE);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_source_set_enabled(m->image_cache_defer_event, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return r;
+
+ r = image_find(IMAGE_MACHINE, e, &image);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ image->userdata = m;
+
+ r = hashmap_put(m->image_cache, image->name, image);
+ if (r < 0) {
+ image_unref(image);
+ return r;
+ }
+
+ *found = image;
+ return 1;
+}
+
+char *image_bus_path(const char *name) {
+ _cleanup_free_ char *e = NULL;
+
+ assert(name);
+
+ e = bus_label_escape(name);
+ if (!e)
+ return NULL;
+
+ return strjoin("/org/freedesktop/machine1/image/", e);
+}
+
+static int image_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_hashmap_free_ Hashmap *images = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ Image *image;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(nodes);
+
+ images = hashmap_new(&image_hash_ops);
+ if (!images)
+ return -ENOMEM;
+
+ r = image_discover(IMAGE_MACHINE, images);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(image, images) {
+ char *p;
+
+ p = image_bus_path(image->name);
+ if (!p)
+ return -ENOMEM;
+
+ r = strv_consume(&l, p);
+ if (r < 0)
+ return r;
+ }
+
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
+
+const sd_bus_vtable image_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Name", "s", NULL, offsetof(Image, name), 0),
+ SD_BUS_PROPERTY("Path", "s", NULL, offsetof(Image, path), 0),
+ SD_BUS_PROPERTY("Type", "s", property_get_type, offsetof(Image, type), 0),
+ SD_BUS_PROPERTY("ReadOnly", "b", bus_property_get_bool, offsetof(Image, read_only), 0),
+ SD_BUS_PROPERTY("CreationTimestamp", "t", NULL, offsetof(Image, crtime), 0),
+ SD_BUS_PROPERTY("ModificationTimestamp", "t", NULL, offsetof(Image, mtime), 0),
+ SD_BUS_PROPERTY("Usage", "t", NULL, offsetof(Image, usage), 0),
+ SD_BUS_PROPERTY("Limit", "t", NULL, offsetof(Image, limit), 0),
+ SD_BUS_PROPERTY("UsageExclusive", "t", NULL, offsetof(Image, usage_exclusive), 0),
+ SD_BUS_PROPERTY("LimitExclusive", "t", NULL, offsetof(Image, limit_exclusive), 0),
+ SD_BUS_METHOD("Remove", NULL, NULL, bus_image_method_remove, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Rename", "s", NULL, bus_image_method_rename, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Clone", "sb", NULL, bus_image_method_clone, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("MarkReadOnly", "b", NULL, bus_image_method_mark_read_only, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetLimit", "t", NULL, bus_image_method_set_limit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetHostname", NULL, "s", bus_image_method_get_hostname, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetMachineID", NULL, "ay", bus_image_method_get_machine_id, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetMachineInfo", NULL, "a{ss}", bus_image_method_get_machine_info, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetOSRelease", NULL, "a{ss}", bus_image_method_get_os_release, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_VTABLE_END
+};
+
+const BusObjectImplementation image_object = {
+ "/org/freedesktop/machine1/image",
+ "org.freedesktop.machine1.Image",
+ .fallback_vtables = BUS_FALLBACK_VTABLES({image_vtable, image_object_find}),
+ .node_enumerator = image_node_enumerator,
+};
diff --git a/src/machine/image-dbus.h b/src/machine/image-dbus.h
new file mode 100644
index 0000000..4b00203
--- /dev/null
+++ b/src/machine/image-dbus.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "bus-object.h"
+#include "machined.h"
+
+extern const BusObjectImplementation image_object;
+
+char *image_bus_path(const char *name);
+
+int bus_image_method_remove(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_image_method_rename(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_image_method_clone(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_image_method_mark_read_only(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_image_method_set_limit(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_image_method_get_hostname(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_image_method_get_machine_id(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_image_method_get_machine_info(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_image_method_get_os_release(sd_bus_message *message, void *userdata, sd_bus_error *error);
diff --git a/src/machine/machine-dbus.c b/src/machine/machine-dbus.c
new file mode 100644
index 0000000..bb67beb
--- /dev/null
+++ b/src/machine/machine-dbus.c
@@ -0,0 +1,1563 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+
+/* When we include libgen.h because we need dirname() we immediately
+ * undefine basename() since libgen.h defines it as a macro to the POSIX
+ * version which is really broken. We prefer GNU basename(). */
+#include <libgen.h>
+#undef basename
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-get-properties.h"
+#include "bus-internal.h"
+#include "bus-label.h"
+#include "bus-locator.h"
+#include "bus-polkit.h"
+#include "copy.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "in-addr-util.h"
+#include "io-util.h"
+#include "local-addresses.h"
+#include "machine-dbus.h"
+#include "machine.h"
+#include "missing_capability.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "namespace-util.h"
+#include "os-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_class, machine_class, MachineClass);
+static BUS_DEFINE_PROPERTY_GET2(property_get_state, "s", Machine, machine_get_state, machine_state_to_string);
+
+static int property_get_netif(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Machine *m = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(m);
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ return sd_bus_message_append_array(reply, 'i', m->netif, m->n_netif * sizeof(int));
+}
+
+int bus_machine_method_unregister(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Machine *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_KILL,
+ "org.freedesktop.machine1.manage-machines",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = machine_finalize(m);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_machine_method_terminate(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Machine *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_KILL,
+ "org.freedesktop.machine1.manage-machines",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = machine_stop(m);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_machine_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Machine *m = userdata;
+ const char *swho;
+ int32_t signo;
+ KillWho who;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "si", &swho, &signo);
+ if (r < 0)
+ return r;
+
+ if (isempty(swho))
+ who = KILL_ALL;
+ else {
+ who = kill_who_from_string(swho);
+ if (who < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid kill parameter '%s'", swho);
+ }
+
+ if (!SIGNAL_VALID(signo))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid signal %i", signo);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_KILL,
+ "org.freedesktop.machine1.manage-machines",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = machine_kill(m, who, signo);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_machine_method_get_addresses(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Machine *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(iay)");
+ if (r < 0)
+ return r;
+
+ switch (m->class) {
+
+ case MACHINE_HOST: {
+ _cleanup_free_ struct local_address *addresses = NULL;
+ struct local_address *a;
+ int n, i;
+
+ n = local_addresses(NULL, 0, AF_UNSPEC, &addresses);
+ if (n < 0)
+ return n;
+
+ for (a = addresses, i = 0; i < n; a++, i++) {
+
+ r = sd_bus_message_open_container(reply, 'r', "iay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "i", addresses[i].family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', &addresses[i].address, FAMILY_ADDRESS_SIZE(addresses[i].family));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ break;
+ }
+
+ case MACHINE_CONTAINER: {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ _cleanup_free_ char *us = NULL, *them = NULL;
+ _cleanup_close_ int netns_fd = -1;
+ const char *p;
+ pid_t child;
+
+ r = readlink_malloc("/proc/self/ns/net", &us);
+ if (r < 0)
+ return r;
+
+ p = procfs_file_alloca(m->leader, "ns/net");
+ r = readlink_malloc(p, &them);
+ if (r < 0)
+ return r;
+
+ if (streq(us, them))
+ return sd_bus_error_setf(error, BUS_ERROR_NO_PRIVATE_NETWORKING, "Machine %s does not use private networking", m->name);
+
+ r = namespace_open(m->leader, NULL, NULL, &netns_fd, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET, 0, pair) < 0)
+ return -errno;
+
+ r = namespace_fork("(sd-addrns)", "(sd-addr)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ -1, -1, netns_fd, -1, -1, &child);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to fork(): %m");
+ if (r == 0) {
+ _cleanup_free_ struct local_address *addresses = NULL;
+ struct local_address *a;
+ int i, n;
+
+ pair[0] = safe_close(pair[0]);
+
+ n = local_addresses(NULL, 0, AF_UNSPEC, &addresses);
+ if (n < 0)
+ _exit(EXIT_FAILURE);
+
+ for (a = addresses, i = 0; i < n; a++, i++) {
+ struct iovec iov[2] = {
+ { .iov_base = &a->family, .iov_len = sizeof(a->family) },
+ { .iov_base = &a->address, .iov_len = FAMILY_ADDRESS_SIZE(a->family) },
+ };
+
+ r = writev(pair[1], iov, 2);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ for (;;) {
+ int family;
+ ssize_t n;
+ union in_addr_union in_addr;
+ struct iovec iov[2];
+ struct msghdr mh = {
+ .msg_iov = iov,
+ .msg_iovlen = 2,
+ };
+
+ iov[0] = IOVEC_MAKE(&family, sizeof(family));
+ iov[1] = IOVEC_MAKE(&in_addr, sizeof(in_addr));
+
+ n = recvmsg(pair[0], &mh, 0);
+ if (n < 0)
+ return -errno;
+ if ((size_t) n < sizeof(family))
+ break;
+
+ r = sd_bus_message_open_container(reply, 'r', "iay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "i", family);
+ if (r < 0)
+ return r;
+
+ switch (family) {
+
+ case AF_INET:
+ if (n != sizeof(struct in_addr) + sizeof(family))
+ return -EIO;
+
+ r = sd_bus_message_append_array(reply, 'y', &in_addr.in, sizeof(in_addr.in));
+ break;
+
+ case AF_INET6:
+ if (n != sizeof(struct in6_addr) + sizeof(family))
+ return -EIO;
+
+ r = sd_bus_message_append_array(reply, 'y', &in_addr.in6, sizeof(in_addr.in6));
+ break;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ r = wait_for_terminate_and_check("(sd-addrns)", child, 0);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to wait for child: %m");
+ if (r != EXIT_SUCCESS)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Child died abnormally.");
+ break;
+ }
+
+ default:
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Requesting IP address data is only supported on container machines.");
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+#define EXIT_NOT_FOUND 2
+
+int bus_machine_method_get_os_release(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ Machine *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ switch (m->class) {
+
+ case MACHINE_HOST:
+ r = load_os_release_pairs(NULL, &l);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case MACHINE_CONTAINER: {
+ _cleanup_close_ int mntns_fd = -1, root_fd = -1, pidns_fd = -1;
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ _cleanup_fclose_ FILE *f = NULL;
+ pid_t child;
+
+ r = namespace_open(m->leader, &pidns_fd, &mntns_fd, NULL, NULL, &root_fd);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET, 0, pair) < 0)
+ return -errno;
+
+ r = namespace_fork("(sd-osrelns)", "(sd-osrel)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ pidns_fd, mntns_fd, -1, -1, root_fd,
+ &child);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to fork(): %m");
+ if (r == 0) {
+ int fd = -1;
+
+ pair[0] = safe_close(pair[0]);
+
+ r = open_os_release(NULL, NULL, &fd);
+ if (r == -ENOENT)
+ _exit(EXIT_NOT_FOUND);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ r = copy_bytes(fd, pair[1], (uint64_t) -1, 0);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ f = take_fdopen(&pair[0], "r");
+ if (!f)
+ return -errno;
+
+ r = load_env_file_pairs(f, "/etc/os-release", &l);
+ if (r < 0)
+ return r;
+
+ r = wait_for_terminate_and_check("(sd-osrelns)", child, 0);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to wait for child: %m");
+ if (r == EXIT_NOT_FOUND)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Machine does not contain OS release information");
+ if (r != EXIT_SUCCESS)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Child died abnormally.");
+
+ break;
+ }
+
+ default:
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Requesting OS release data is only supported on container machines.");
+ }
+
+ return bus_reply_pair_array(message, l);
+}
+
+int bus_machine_method_open_pty(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *pty_name = NULL;
+ _cleanup_close_ int master = -1;
+ Machine *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ m->class == MACHINE_HOST ? "org.freedesktop.machine1.host-open-pty" : "org.freedesktop.machine1.open-pty",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ master = machine_openpt(m, O_RDWR|O_NOCTTY|O_CLOEXEC, &pty_name);
+ if (master < 0)
+ return master;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "hs", master, pty_name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int container_bus_new(Machine *m, sd_bus_error *error, sd_bus **ret) {
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ switch (m->class) {
+
+ case MACHINE_HOST:
+ *ret = NULL;
+ break;
+
+ case MACHINE_CONTAINER: {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ char *address;
+
+ r = sd_bus_new(&bus);
+ if (r < 0)
+ return r;
+
+ if (asprintf(&address, "x-machine-kernel:pid=%1$" PID_PRI ";x-machine-unix:pid=%1$" PID_PRI, m->leader) < 0)
+ return -ENOMEM;
+
+ bus->address = address;
+ bus->bus_client = true;
+ bus->trusted = false;
+ bus->is_system = true;
+
+ r = sd_bus_start(bus);
+ if (r == -ENOENT)
+ return sd_bus_error_set_errnof(error, r, "There is no system bus in container %s.", m->name);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(bus);
+ break;
+ }
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int bus_machine_method_open_login(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *pty_name = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *allocated_bus = NULL;
+ _cleanup_close_ int master = -1;
+ sd_bus *container_bus = NULL;
+ Machine *m = userdata;
+ const char *p, *getty;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ m->class == MACHINE_HOST ? "org.freedesktop.machine1.host-login" : "org.freedesktop.machine1.login",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ master = machine_openpt(m, O_RDWR|O_NOCTTY|O_CLOEXEC, &pty_name);
+ if (master < 0)
+ return master;
+
+ p = path_startswith(pty_name, "/dev/pts/");
+ assert(p);
+
+ r = container_bus_new(m, error, &allocated_bus);
+ if (r < 0)
+ return r;
+
+ container_bus = allocated_bus ?: m->manager->bus;
+
+ getty = strjoina("container-getty@", p, ".service");
+
+ r = bus_call_method(container_bus, bus_systemd_mgr, "StartUnit", error, NULL, "ss", getty, "replace");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "hs", master, pty_name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+int bus_machine_method_open_shell(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL, *tm = NULL;
+ _cleanup_free_ char *pty_name = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *allocated_bus = NULL;
+ sd_bus *container_bus = NULL;
+ _cleanup_close_ int master = -1, slave = -1;
+ _cleanup_strv_free_ char **env = NULL, **args_wire = NULL, **args = NULL;
+ Machine *m = userdata;
+ const char *p, *unit, *user, *path, *description, *utmp_id;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "ss", &user, &path);
+ if (r < 0)
+ return r;
+ user = isempty(user) ? "root" : user;
+ r = sd_bus_message_read_strv(message, &args_wire);
+ if (r < 0)
+ return r;
+ if (isempty(path)) {
+ path = "/bin/sh";
+
+ args = new0(char*, 3 + 1);
+ if (!args)
+ return -ENOMEM;
+ args[0] = strdup("sh");
+ if (!args[0])
+ return -ENOMEM;
+ args[1] = strdup("-c");
+ if (!args[1])
+ return -ENOMEM;
+ r = asprintf(&args[2],
+ "shell=$(getent passwd %s 2>/dev/null | { IFS=: read _ _ _ _ _ _ x; echo \"$x\"; })\n"\
+ "exec \"${shell:-/bin/sh}\" -l", /* -l is means --login */
+ user);
+ if (r < 0) {
+ args[2] = NULL;
+ return -ENOMEM;
+ }
+ } else {
+ if (!path_is_absolute(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Specified path '%s' is not absolute", path);
+ args = TAKE_PTR(args_wire);
+ if (strv_isempty(args)) {
+ args = strv_free(args);
+
+ args = strv_new(path);
+ if (!args)
+ return -ENOMEM;
+ }
+ }
+
+ r = sd_bus_message_read_strv(message, &env);
+ if (r < 0)
+ return r;
+ if (!strv_env_is_valid(env))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid environment assignments");
+
+ const char *details[] = {
+ "machine", m->name,
+ "user", user,
+ "program", path,
+ NULL
+ };
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ m->class == MACHINE_HOST ? "org.freedesktop.machine1.host-shell" : "org.freedesktop.machine1.shell",
+ details,
+ false,
+ UID_INVALID,
+ &m->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ master = machine_openpt(m, O_RDWR|O_NOCTTY|O_CLOEXEC, &pty_name);
+ if (master < 0)
+ return master;
+
+ p = path_startswith(pty_name, "/dev/pts/");
+ assert(p);
+
+ slave = machine_open_terminal(m, pty_name, O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (slave < 0)
+ return slave;
+
+ utmp_id = path_startswith(pty_name, "/dev/");
+ assert(utmp_id);
+
+ r = container_bus_new(m, error, &allocated_bus);
+ if (r < 0)
+ return r;
+
+ container_bus = allocated_bus ?: m->manager->bus;
+
+ r = bus_message_new_method_call(container_bus, &tm, bus_systemd_mgr, "StartTransientUnit");
+ if (r < 0)
+ return r;
+
+ /* Name and mode */
+ unit = strjoina("container-shell@", p, ".service");
+ r = sd_bus_message_append(tm, "ss", unit, "fail");
+ if (r < 0)
+ return r;
+
+ /* Properties */
+ r = sd_bus_message_open_container(tm, 'a', "(sv)");
+ if (r < 0)
+ return r;
+
+ description = strjoina("Shell for User ", user);
+ r = sd_bus_message_append(tm,
+ "(sv)(sv)(sv)(sv)(sv)(sv)(sv)(sv)(sv)(sv)(sv)(sv)",
+ "Description", "s", description,
+ "StandardInputFileDescriptor", "h", slave,
+ "StandardOutputFileDescriptor", "h", slave,
+ "StandardErrorFileDescriptor", "h", slave,
+ "SendSIGHUP", "b", true,
+ "IgnoreSIGPIPE", "b", false,
+ "KillMode", "s", "mixed",
+ "TTYReset", "b", true,
+ "UtmpIdentifier", "s", utmp_id,
+ "UtmpMode", "s", "user",
+ "PAMName", "s", "login",
+ "WorkingDirectory", "s", "-~");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(tm, "(sv)", "User", "s", user);
+ if (r < 0)
+ return r;
+
+ if (!strv_isempty(env)) {
+ r = sd_bus_message_open_container(tm, 'r', "sv");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(tm, "s", "Environment");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(tm, 'v', "as");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_strv(tm, env);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(tm);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(tm);
+ if (r < 0)
+ return r;
+ }
+
+ /* Exec container */
+ r = sd_bus_message_open_container(tm, 'r', "sv");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(tm, "s", "ExecStart");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(tm, 'v', "a(sasb)");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(tm, 'a', "(sasb)");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(tm, 'r', "sasb");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(tm, "s", path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_strv(tm, args);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(tm, "b", true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(tm);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(tm);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(tm);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(tm);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(tm);
+ if (r < 0)
+ return r;
+
+ /* Auxiliary units */
+ r = sd_bus_message_append(tm, "a(sa(sv))", 0);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call(container_bus, tm, 0, error, NULL);
+ if (r < 0)
+ return r;
+
+ slave = safe_close(slave);
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "hs", master, pty_name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+int bus_machine_method_bind_mount(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 };
+ char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p;
+ bool mount_slave_created = false, mount_slave_mounted = false,
+ mount_tmp_created = false, mount_tmp_mounted = false,
+ mount_outside_created = false, mount_outside_mounted = false;
+ _cleanup_free_ char *chased_src = NULL;
+ int read_only, make_file_or_directory;
+ const char *dest, *src;
+ Machine *m = userdata;
+ struct stat st;
+ pid_t child;
+ uid_t uid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ if (m->class != MACHINE_CONTAINER)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Bind mounting is only supported on container machines.");
+
+ r = sd_bus_message_read(message, "ssbb", &src, &dest, &read_only, &make_file_or_directory);
+ if (r < 0)
+ return r;
+
+ if (!path_is_absolute(src) || !path_is_normalized(src))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path must be absolute and not contain ../.");
+
+ if (isempty(dest))
+ dest = src;
+ else if (!path_is_absolute(dest) || !path_is_normalized(dest))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Destination path must be absolute and not contain ../.");
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-machines",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = machine_get_uid_shift(m, &uid);
+ if (r < 0)
+ return r;
+ if (uid != 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Can't bind mount on container with user namespacing applied.");
+
+ /* One day, when bind mounting /proc/self/fd/n works across
+ * namespace boundaries we should rework this logic to make
+ * use of it... */
+
+ p = strjoina("/run/systemd/nspawn/propagate/", m->name, "/");
+ if (laccess(p, F_OK) < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Container does not allow propagation of mount points.");
+
+ r = chase_symlinks(src, NULL, CHASE_TRAIL_SLASH, &chased_src, NULL);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to resolve source path: %m");
+
+ if (lstat(chased_src, &st) < 0)
+ return sd_bus_error_set_errnof(error, errno, "Failed to stat() source path: %m");
+ if (S_ISLNK(st.st_mode)) /* This shouldn't really happen, given that we just chased the symlinks above, but let's better be safe… */
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Source directory can't be a symbolic link");
+
+ /* Our goal is to install a new bind mount into the container,
+ possibly read-only. This is irritatingly complex
+ unfortunately, currently.
+
+ First, we start by creating a private playground in /tmp,
+ that we can mount MS_SLAVE. (Which is necessary, since
+ MS_MOVE cannot be applied to mounts with MS_SHARED parent
+ mounts.) */
+
+ if (!mkdtemp(mount_slave))
+ return sd_bus_error_set_errnof(error, errno, "Failed to create playground %s: %m", mount_slave);
+
+ mount_slave_created = true;
+
+ r = mount_nofollow_verbose(LOG_DEBUG, mount_slave, mount_slave, NULL, MS_BIND, NULL);
+ if (r < 0) {
+ sd_bus_error_set_errnof(error, r, "Failed to make bind mount %s: %m", mount_slave);
+ goto finish;
+ }
+
+ mount_slave_mounted = true;
+
+ r = mount_nofollow_verbose(LOG_DEBUG, NULL, mount_slave, NULL, MS_SLAVE, NULL);
+ if (r < 0) {
+ sd_bus_error_set_errnof(error, r, "Failed to remount slave %s: %m", mount_slave);
+ goto finish;
+ }
+
+ /* Second, we mount the source file or directory to a directory inside of our MS_SLAVE playground. */
+ mount_tmp = strjoina(mount_slave, "/mount");
+ if (S_ISDIR(st.st_mode))
+ r = mkdir_errno_wrapper(mount_tmp, 0700);
+ else
+ r = touch(mount_tmp);
+ if (r < 0) {
+ sd_bus_error_set_errnof(error, r, "Failed to create temporary mount point %s: %m", mount_tmp);
+ goto finish;
+ }
+
+ mount_tmp_created = true;
+
+ r = mount_nofollow_verbose(LOG_DEBUG, chased_src, mount_tmp, NULL, MS_BIND, NULL);
+ if (r < 0) {
+ sd_bus_error_set_errnof(error, r, "Failed to mount %s: %m", chased_src);
+ goto finish;
+ }
+
+ mount_tmp_mounted = true;
+
+ /* Third, we remount the new bind mount read-only if requested. */
+ if (read_only) {
+ r = mount_nofollow_verbose(LOG_DEBUG, NULL, mount_tmp, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
+ if (r < 0) {
+ sd_bus_error_set_errnof(error, r, "Failed to remount read-only %s: %m", mount_tmp);
+ goto finish;
+ }
+ }
+
+ /* Fourth, we move the new bind mount into the propagation directory. This way it will appear there read-only
+ * right-away. */
+
+ mount_outside = strjoina("/run/systemd/nspawn/propagate/", m->name, "/XXXXXX");
+ if (S_ISDIR(st.st_mode))
+ r = mkdtemp(mount_outside) ? 0 : -errno;
+ else {
+ r = mkostemp_safe(mount_outside);
+ safe_close(r);
+ }
+ if (r < 0) {
+ sd_bus_error_set_errnof(error, r, "Cannot create propagation file or directory %s: %m", mount_outside);
+ goto finish;
+ }
+
+ mount_outside_created = true;
+
+ r = mount_nofollow_verbose(LOG_DEBUG, mount_tmp, mount_outside, NULL, MS_MOVE, NULL);
+ if (r < 0) {
+ sd_bus_error_set_errnof(error, r, "Failed to move %s to %s: %m", mount_tmp, mount_outside);
+ goto finish;
+ }
+
+ mount_outside_mounted = true;
+ mount_tmp_mounted = false;
+
+ if (S_ISDIR(st.st_mode))
+ (void) rmdir(mount_tmp);
+ else
+ (void) unlink(mount_tmp);
+ mount_tmp_created = false;
+
+ (void) umount_verbose(LOG_DEBUG, mount_slave, UMOUNT_NOFOLLOW);
+ mount_slave_mounted = false;
+
+ (void) rmdir(mount_slave);
+ mount_slave_created = false;
+
+ if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0) {
+ r = sd_bus_error_set_errnof(error, errno, "Failed to create pipe: %m");
+ goto finish;
+ }
+
+ r = safe_fork("(sd-bindmnt)", FORK_RESET_SIGNALS, &child);
+ if (r < 0) {
+ sd_bus_error_set_errnof(error, r, "Failed to fork(): %m");
+ goto finish;
+ }
+ if (r == 0) {
+ const char *mount_inside, *q;
+ int mntfd;
+
+ errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]);
+
+ q = procfs_file_alloca(m->leader, "ns/mnt");
+ mntfd = open(q, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (mntfd < 0) {
+ r = log_error_errno(errno, "Failed to open mount namespace of leader: %m");
+ goto child_fail;
+ }
+
+ if (setns(mntfd, CLONE_NEWNS) < 0) {
+ r = log_error_errno(errno, "Failed to join namespace of leader: %m");
+ goto child_fail;
+ }
+
+ if (make_file_or_directory) {
+ if (S_ISDIR(st.st_mode))
+ (void) mkdir_p(dest, 0755);
+ else {
+ (void) mkdir_parents(dest, 0755);
+ (void) mknod(dest, S_IFREG|0600, 0);
+ }
+ }
+
+ mount_inside = strjoina("/run/host/incoming/", basename(mount_outside));
+ r = mount_nofollow_verbose(LOG_ERR, mount_inside, dest, NULL, MS_MOVE, NULL);
+ if (r < 0)
+ goto child_fail;
+
+ _exit(EXIT_SUCCESS);
+
+ child_fail:
+ (void) write(errno_pipe_fd[1], &r, sizeof(r));
+ errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
+
+ _exit(EXIT_FAILURE);
+ }
+
+ errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
+
+ r = wait_for_terminate_and_check("(sd-bindmnt)", child, 0);
+ if (r < 0) {
+ r = sd_bus_error_set_errnof(error, r, "Failed to wait for child: %m");
+ goto finish;
+ }
+ if (r != EXIT_SUCCESS) {
+ if (read(errno_pipe_fd[0], &r, sizeof(r)) == sizeof(r))
+ r = sd_bus_error_set_errnof(error, r, "Failed to mount: %m");
+ else
+ r = sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Child failed.");
+ goto finish;
+ }
+
+ r = sd_bus_reply_method_return(message, NULL);
+
+finish:
+ if (mount_outside_mounted)
+ (void) umount_verbose(LOG_DEBUG, mount_outside, UMOUNT_NOFOLLOW);
+ if (mount_outside_created) {
+ if (S_ISDIR(st.st_mode))
+ (void) rmdir(mount_outside);
+ else
+ (void) unlink(mount_outside);
+ }
+
+ if (mount_tmp_mounted)
+ (void) umount_verbose(LOG_DEBUG, mount_tmp, UMOUNT_NOFOLLOW);
+ if (mount_tmp_created) {
+ if (S_ISDIR(st.st_mode))
+ (void) rmdir(mount_tmp);
+ else
+ (void) unlink(mount_tmp);
+ }
+
+ if (mount_slave_mounted)
+ (void) umount_verbose(LOG_DEBUG, mount_slave, UMOUNT_NOFOLLOW);
+ if (mount_slave_created)
+ (void) rmdir(mount_slave);
+
+ return r;
+}
+
+int bus_machine_method_copy(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *src, *dest, *host_path, *container_path, *host_basename, *container_basename, *container_dirname;
+ _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 };
+ CopyFlags copy_flags = COPY_REFLINK|COPY_MERGE|COPY_HARDLINKS;
+ _cleanup_close_ int hostfd = -1;
+ Machine *m = userdata;
+ bool copy_from;
+ pid_t child;
+ uid_t uid_shift;
+ char *t;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ if (m->manager->n_operations >= OPERATIONS_MAX)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_LIMITS_EXCEEDED, "Too many ongoing copies.");
+
+ if (m->class != MACHINE_CONTAINER)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Copying files is only supported on container machines.");
+
+ r = sd_bus_message_read(message, "ss", &src, &dest);
+ if (r < 0)
+ return r;
+
+ if (!path_is_absolute(src))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path must be absolute.");
+
+ if (isempty(dest))
+ dest = src;
+ else if (!path_is_absolute(dest))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Destination path must be absolute.");
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-machines",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = machine_get_uid_shift(m, &uid_shift);
+ if (r < 0)
+ return r;
+
+ copy_from = strstr(sd_bus_message_get_member(message), "CopyFrom");
+
+ if (copy_from) {
+ container_path = src;
+ host_path = dest;
+ } else {
+ host_path = src;
+ container_path = dest;
+ }
+
+ host_basename = basename(host_path);
+
+ container_basename = basename(container_path);
+ t = strdupa(container_path);
+ container_dirname = dirname(t);
+
+ hostfd = open_parent(host_path, O_CLOEXEC, 0);
+ if (hostfd < 0)
+ return sd_bus_error_set_errnof(error, hostfd, "Failed to open host directory %s: %m", host_path);
+
+ if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0)
+ return sd_bus_error_set_errnof(error, errno, "Failed to create pipe: %m");
+
+ r = safe_fork("(sd-copy)", FORK_RESET_SIGNALS, &child);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to fork(): %m");
+ if (r == 0) {
+ int containerfd;
+ const char *q;
+ int mntfd;
+
+ errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]);
+
+ q = procfs_file_alloca(m->leader, "ns/mnt");
+ mntfd = open(q, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (mntfd < 0) {
+ r = log_error_errno(errno, "Failed to open mount namespace of leader: %m");
+ goto child_fail;
+ }
+
+ if (setns(mntfd, CLONE_NEWNS) < 0) {
+ r = log_error_errno(errno, "Failed to join namespace of leader: %m");
+ goto child_fail;
+ }
+
+ containerfd = open(container_dirname, O_CLOEXEC|O_RDONLY|O_NOCTTY|O_DIRECTORY);
+ if (containerfd < 0) {
+ r = log_error_errno(errno, "Failed to open destination directory: %m");
+ goto child_fail;
+ }
+
+ /* Run the actual copy operation. Note that when an UID shift is set we'll either clamp the UID/GID to
+ * 0 or to the actual UID shift depending on the direction we copy. If no UID shift is set we'll copy
+ * the UID/GIDs as they are. */
+ if (copy_from)
+ r = copy_tree_at(containerfd, container_basename, hostfd, host_basename, uid_shift == 0 ? UID_INVALID : 0, uid_shift == 0 ? GID_INVALID : 0, copy_flags);
+ else
+ r = copy_tree_at(hostfd, host_basename, containerfd, container_basename, uid_shift == 0 ? UID_INVALID : uid_shift, uid_shift == 0 ? GID_INVALID : uid_shift, copy_flags);
+
+ hostfd = safe_close(hostfd);
+ containerfd = safe_close(containerfd);
+
+ if (r < 0) {
+ r = log_error_errno(r, "Failed to copy tree: %m");
+ goto child_fail;
+ }
+
+ _exit(EXIT_SUCCESS);
+
+ child_fail:
+ (void) write(errno_pipe_fd[1], &r, sizeof(r));
+ _exit(EXIT_FAILURE);
+ }
+
+ errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
+
+ /* Copying might take a while, hence install a watch on the child, and return */
+
+ r = operation_new(m->manager, m, child, message, errno_pipe_fd[0], NULL);
+ if (r < 0) {
+ (void) sigkill_wait(child);
+ return r;
+ }
+ errno_pipe_fd[0] = -1;
+
+ return 1;
+}
+
+int bus_machine_method_open_root_directory(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_close_ int fd = -1;
+ Machine *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-machines",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->manager->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ switch (m->class) {
+
+ case MACHINE_HOST:
+ fd = open("/", O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (fd < 0)
+ return -errno;
+
+ break;
+
+ case MACHINE_CONTAINER: {
+ _cleanup_close_ int mntns_fd = -1, root_fd = -1;
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ pid_t child;
+
+ r = namespace_open(m->leader, NULL, &mntns_fd, NULL, NULL, &root_fd);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
+ return -errno;
+
+ r = namespace_fork("(sd-openrootns)", "(sd-openroot)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ -1, mntns_fd, -1, -1, root_fd, &child);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to fork(): %m");
+ if (r == 0) {
+ _cleanup_close_ int dfd = -1;
+
+ pair[0] = safe_close(pair[0]);
+
+ dfd = open("/", O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (dfd < 0)
+ _exit(EXIT_FAILURE);
+
+ r = send_one_fd(pair[1], dfd, 0);
+ dfd = safe_close(dfd);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ r = wait_for_terminate_and_check("(sd-openrootns)", child, 0);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to wait for child: %m");
+ if (r != EXIT_SUCCESS)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Child died abnormally.");
+
+ fd = receive_one_fd(pair[0], MSG_DONTWAIT);
+ if (fd < 0)
+ return fd;
+
+ break;
+ }
+
+ default:
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Opening the root directory is only supported on container machines.");
+ }
+
+ return sd_bus_reply_method_return(message, "h", fd);
+}
+
+int bus_machine_method_get_uid_shift(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Machine *m = userdata;
+ uid_t shift = 0;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* You wonder why this is a method and not a property? Well, properties are not supposed to return errors, but
+ * we kinda have to for this. */
+
+ if (m->class == MACHINE_HOST)
+ return sd_bus_reply_method_return(message, "u", UINT32_C(0));
+
+ if (m->class != MACHINE_CONTAINER)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "UID/GID shift may only be determined for container machines.");
+
+ r = machine_get_uid_shift(m, &shift);
+ if (r == -ENXIO)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Machine %s uses a complex UID/GID mapping, cannot determine shift", m->name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "u", (uint32_t) shift);
+}
+
+static int machine_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ Machine *machine;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ if (streq(path, "/org/freedesktop/machine1/machine/self")) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ sd_bus_message *message;
+ pid_t pid;
+
+ message = sd_bus_get_current_message(bus);
+ if (!message)
+ return 0;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+
+ r = manager_get_machine_by_pid(m, pid, &machine);
+ if (r <= 0)
+ return 0;
+ } else {
+ _cleanup_free_ char *e = NULL;
+ const char *p;
+
+ p = startswith(path, "/org/freedesktop/machine1/machine/");
+ if (!p)
+ return 0;
+
+ e = bus_label_unescape(p);
+ if (!e)
+ return -ENOMEM;
+
+ machine = hashmap_get(m->machines, e);
+ if (!machine)
+ return 0;
+ }
+
+ *found = machine;
+ return 1;
+}
+
+char *machine_bus_path(Machine *m) {
+ _cleanup_free_ char *e = NULL;
+
+ assert(m);
+
+ e = bus_label_escape(m->name);
+ if (!e)
+ return NULL;
+
+ return strjoin("/org/freedesktop/machine1/machine/", e);
+}
+
+static int machine_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ Machine *machine = NULL;
+ Manager *m = userdata;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(nodes);
+
+ HASHMAP_FOREACH(machine, m->machines) {
+ char *p;
+
+ p = machine_bus_path(machine);
+ if (!p)
+ return -ENOMEM;
+
+ r = strv_consume(&l, p);
+ if (r < 0)
+ return r;
+ }
+
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
+
+static const sd_bus_vtable machine_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Name", "s", NULL, offsetof(Machine, name), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Id", "ay", bus_property_get_id128, offsetof(Machine, id), SD_BUS_VTABLE_PROPERTY_CONST),
+ BUS_PROPERTY_DUAL_TIMESTAMP("Timestamp", offsetof(Machine, timestamp), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Service", "s", NULL, offsetof(Machine, service), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Unit", "s", NULL, offsetof(Machine, unit), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Scope", "s", NULL, offsetof(Machine, unit), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("Leader", "u", NULL, offsetof(Machine, leader), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Class", "s", property_get_class, offsetof(Machine, class), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RootDirectory", "s", NULL, offsetof(Machine, root_directory), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NetworkInterfaces", "ai", property_get_netif, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("State", "s", property_get_state, 0, 0),
+
+ SD_BUS_METHOD("Terminate",
+ NULL,
+ NULL,
+ bus_machine_method_terminate,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("Kill",
+ "si",
+ SD_BUS_PARAM(who)
+ SD_BUS_PARAM(signal),
+ NULL,,
+ bus_machine_method_kill,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetAddresses",
+ NULL,,
+ "a(iay)",
+ SD_BUS_PARAM(addresses),
+ bus_machine_method_get_addresses,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetOSRelease",
+ NULL,,
+ "a{ss}",
+ SD_BUS_PARAM(fields),
+ bus_machine_method_get_os_release,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetUIDShift",
+ NULL,,
+ "u",
+ SD_BUS_PARAM(shift),
+ bus_machine_method_get_uid_shift,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("OpenPTY",
+ NULL,,
+ "hs",
+ SD_BUS_PARAM(pty)
+ SD_BUS_PARAM(pty_path),
+ bus_machine_method_open_pty,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("OpenLogin",
+ NULL,,
+ "hs",
+ SD_BUS_PARAM(pty)
+ SD_BUS_PARAM(pty_path),
+ bus_machine_method_open_login,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("OpenShell",
+ "ssasas",
+ SD_BUS_PARAM(user)
+ SD_BUS_PARAM(path)
+ SD_BUS_PARAM(args)
+ SD_BUS_PARAM(environment),
+ "hs",
+ SD_BUS_PARAM(pty)
+ SD_BUS_PARAM(pty_path),
+ bus_machine_method_open_shell,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("BindMount",
+ "ssbb",
+ SD_BUS_PARAM(source)
+ SD_BUS_PARAM(destination)
+ SD_BUS_PARAM(read_only)
+ SD_BUS_PARAM(mkdir),
+ NULL,,
+ bus_machine_method_bind_mount,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CopyFrom",
+ "ss",
+ SD_BUS_PARAM(source)
+ SD_BUS_PARAM(destination),
+ NULL,,
+ bus_machine_method_copy,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CopyTo",
+ "ss",
+ SD_BUS_PARAM(source)
+ SD_BUS_PARAM(destination),
+ NULL,,
+ bus_machine_method_copy,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("OpenRootDirectory",
+ NULL,,
+ "h",
+ SD_BUS_PARAM(fd),
+ bus_machine_method_open_root_directory,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_VTABLE_END
+};
+
+const BusObjectImplementation machine_object = {
+ "/org/freedesktop/machine1/machine",
+ "org.freedesktop.machine1.Machine",
+ .fallback_vtables = BUS_FALLBACK_VTABLES({machine_vtable, machine_object_find}),
+ .node_enumerator = machine_node_enumerator,
+};
+
+int machine_send_signal(Machine *m, bool new_machine) {
+ _cleanup_free_ char *p = NULL;
+
+ assert(m);
+
+ p = machine_bus_path(m);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_emit_signal(
+ m->manager->bus,
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ new_machine ? "MachineNew" : "MachineRemoved",
+ "so", m->name, p);
+}
+
+int machine_send_create_reply(Machine *m, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *c = NULL;
+ _cleanup_free_ char *p = NULL;
+
+ assert(m);
+
+ if (!m->create_message)
+ return 0;
+
+ c = TAKE_PTR(m->create_message);
+
+ if (error)
+ return sd_bus_reply_method_error(c, error);
+
+ /* Update the machine state file before we notify the client
+ * about the result. */
+ machine_save(m);
+
+ p = machine_bus_path(m);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(c, "o", p);
+}
diff --git a/src/machine/machine-dbus.h b/src/machine/machine-dbus.h
new file mode 100644
index 0000000..1c114f4
--- /dev/null
+++ b/src/machine/machine-dbus.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "bus-util.h"
+#include "machine.h"
+
+extern const BusObjectImplementation machine_object;
+
+char *machine_bus_path(Machine *s);
+
+int bus_machine_method_unregister(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_terminate(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_get_addresses(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_get_os_release(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_open_pty(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_open_login(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_open_shell(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_bind_mount(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_copy(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_open_root_directory(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_machine_method_get_uid_shift(sd_bus_message *message, void *userdata, sd_bus_error *error);
+
+int machine_send_signal(Machine *m, bool new_machine);
+int machine_send_create_reply(Machine *m, sd_bus_error *error);
diff --git a/src/machine/machine.c b/src/machine/machine.c
new file mode 100644
index 0000000..537b0cd
--- /dev/null
+++ b/src/machine/machine.c
@@ -0,0 +1,907 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "env-file.h"
+#include "errno-util.h"
+#include "escape.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "hashmap.h"
+#include "machine-dbus.h"
+#include "machine.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "serialize.h"
+#include "special.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "unit-name.h"
+#include "user-util.h"
+#include "util.h"
+
+Machine* machine_new(Manager *manager, MachineClass class, const char *name) {
+ Machine *m;
+
+ assert(manager);
+ assert(class < _MACHINE_CLASS_MAX);
+ assert(name);
+
+ /* Passing class == _MACHINE_CLASS_INVALID here is fine. It
+ * means as much as "we don't know yet", and that we'll figure
+ * it out later when loading the state file. */
+
+ m = new0(Machine, 1);
+ if (!m)
+ return NULL;
+
+ m->name = strdup(name);
+ if (!m->name)
+ goto fail;
+
+ if (class != MACHINE_HOST) {
+ m->state_file = path_join("/run/systemd/machines", m->name);
+ if (!m->state_file)
+ goto fail;
+ }
+
+ m->class = class;
+
+ if (hashmap_put(manager->machines, m->name, m) < 0)
+ goto fail;
+
+ m->manager = manager;
+
+ return m;
+
+fail:
+ free(m->state_file);
+ free(m->name);
+ return mfree(m);
+}
+
+Machine* machine_free(Machine *m) {
+ if (!m)
+ return NULL;
+
+ while (m->operations)
+ operation_free(m->operations);
+
+ if (m->in_gc_queue)
+ LIST_REMOVE(gc_queue, m->manager->machine_gc_queue, m);
+
+ machine_release_unit(m);
+
+ free(m->scope_job);
+
+ (void) hashmap_remove(m->manager->machines, m->name);
+
+ if (m->manager->host_machine == m)
+ m->manager->host_machine = NULL;
+
+ if (m->leader > 0)
+ (void) hashmap_remove_value(m->manager->machine_leaders, PID_TO_PTR(m->leader), m);
+
+ sd_bus_message_unref(m->create_message);
+
+ free(m->name);
+ free(m->state_file);
+ free(m->service);
+ free(m->root_directory);
+ free(m->netif);
+ return mfree(m);
+}
+
+int machine_save(Machine *m) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(m);
+
+ if (!m->state_file)
+ return 0;
+
+ if (!m->started)
+ return 0;
+
+ r = mkdir_safe_label("/run/systemd/machines", 0755, 0, 0, MKDIR_WARN_MODE);
+ if (r < 0)
+ goto fail;
+
+ r = fopen_temporary(m->state_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ (void) fchmod(fileno(f), 0644);
+
+ fprintf(f,
+ "# This is private data. Do not parse.\n"
+ "NAME=%s\n",
+ m->name);
+
+ if (m->unit) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(m->unit);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "SCOPE=%s\n", escaped); /* We continue to call this "SCOPE=" because it is internal only, and we want to stay compatible with old files */
+ }
+
+ if (m->scope_job)
+ fprintf(f, "SCOPE_JOB=%s\n", m->scope_job);
+
+ if (m->service) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(m->service);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ fprintf(f, "SERVICE=%s\n", escaped);
+ }
+
+ if (m->root_directory) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(m->root_directory);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ fprintf(f, "ROOT=%s\n", escaped);
+ }
+
+ if (!sd_id128_is_null(m->id))
+ fprintf(f, "ID=" SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(m->id));
+
+ if (m->leader != 0)
+ fprintf(f, "LEADER="PID_FMT"\n", m->leader);
+
+ if (m->class != _MACHINE_CLASS_INVALID)
+ fprintf(f, "CLASS=%s\n", machine_class_to_string(m->class));
+
+ if (dual_timestamp_is_set(&m->timestamp))
+ fprintf(f,
+ "REALTIME="USEC_FMT"\n"
+ "MONOTONIC="USEC_FMT"\n",
+ m->timestamp.realtime,
+ m->timestamp.monotonic);
+
+ if (m->n_netif > 0) {
+ size_t i;
+
+ fputs("NETIF=", f);
+
+ for (i = 0; i < m->n_netif; i++) {
+ if (i != 0)
+ fputc(' ', f);
+
+ fprintf(f, "%i", m->netif[i]);
+ }
+
+ fputc('\n', f);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, m->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (m->unit) {
+ char *sl;
+
+ /* Create a symlink from the unit name to the machine
+ * name, so that we can quickly find the machine for
+ * each given unit. Ignore error. */
+ sl = strjoina("/run/systemd/machines/unit:", m->unit);
+ (void) symlink(m->name, sl);
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(m->state_file);
+
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return log_error_errno(r, "Failed to save machine data %s: %m", m->state_file);
+}
+
+static void machine_unlink(Machine *m) {
+ assert(m);
+
+ if (m->unit) {
+ char *sl;
+
+ sl = strjoina("/run/systemd/machines/unit:", m->unit);
+ (void) unlink(sl);
+ }
+
+ if (m->state_file)
+ (void) unlink(m->state_file);
+}
+
+int machine_load(Machine *m) {
+ _cleanup_free_ char *realtime = NULL, *monotonic = NULL, *id = NULL, *leader = NULL, *class = NULL, *netif = NULL;
+ int r;
+
+ assert(m);
+
+ if (!m->state_file)
+ return 0;
+
+ r = parse_env_file(NULL, m->state_file,
+ "SCOPE", &m->unit,
+ "SCOPE_JOB", &m->scope_job,
+ "SERVICE", &m->service,
+ "ROOT", &m->root_directory,
+ "ID", &id,
+ "LEADER", &leader,
+ "CLASS", &class,
+ "REALTIME", &realtime,
+ "MONOTONIC", &monotonic,
+ "NETIF", &netif);
+ if (r < 0) {
+ if (r == -ENOENT)
+ return 0;
+
+ return log_error_errno(r, "Failed to read %s: %m", m->state_file);
+ }
+
+ if (id)
+ sd_id128_from_string(id, &m->id);
+
+ if (leader)
+ parse_pid(leader, &m->leader);
+
+ if (class) {
+ MachineClass c;
+
+ c = machine_class_from_string(class);
+ if (c >= 0)
+ m->class = c;
+ }
+
+ if (realtime)
+ (void) deserialize_usec(realtime, &m->timestamp.realtime);
+ if (monotonic)
+ (void) deserialize_usec(monotonic, &m->timestamp.monotonic);
+
+ if (netif) {
+ size_t allocated = 0, nr = 0;
+ const char *p;
+ _cleanup_free_ int *ni = NULL;
+
+ p = netif;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse NETIF: %s", netif);
+ break;
+ }
+
+ r = parse_ifindex(word);
+ if (r < 0)
+ continue;
+
+ if (!GREEDY_REALLOC(ni, allocated, nr + 1))
+ return log_oom();
+
+ ni[nr++] = r;
+ }
+
+ free(m->netif);
+ m->netif = TAKE_PTR(ni);
+ m->n_netif = nr;
+ }
+
+ return r;
+}
+
+static int machine_start_scope(
+ Machine *machine,
+ sd_bus_message *more_properties,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_free_ char *escaped = NULL, *unit = NULL;
+ const char *description;
+ int r;
+
+ assert(machine);
+ assert(machine->leader > 0);
+ assert(!machine->unit);
+
+ escaped = unit_name_escape(machine->name);
+ if (!escaped)
+ return log_oom();
+
+ unit = strjoin("machine-", escaped, ".scope");
+ if (!unit)
+ return log_oom();
+
+ r = sd_bus_message_new_method_call(
+ machine->manager->bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartTransientUnit");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "ss", unit, "fail");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "(sv)", "Slice", "s", SPECIAL_MACHINE_SLICE);
+ if (r < 0)
+ return r;
+
+ description = strjoina(machine->class == MACHINE_VM ? "Virtual Machine " : "Container ", machine->name);
+ r = sd_bus_message_append(m, "(sv)", "Description", "s", description);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "(sv)(sv)(sv)(sv)(sv)",
+ "PIDs", "au", 1, machine->leader,
+ "Delegate", "b", 1,
+ "CollectMode", "s", "inactive-or-failed",
+ "AddRef", "b", 1,
+ "TasksMax", "t", UINT64_C(16384));
+ if (r < 0)
+ return r;
+
+ if (more_properties) {
+ r = sd_bus_message_copy(m, more_properties, true);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "a(sa(sv))", 0);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call(NULL, m, 0, error, &reply);
+ if (r < 0)
+ return r;
+
+ machine->unit = TAKE_PTR(unit);
+ machine->referenced = true;
+
+ const char *job;
+ r = sd_bus_message_read(reply, "o", &job);
+ if (r < 0)
+ return r;
+
+ return free_and_strdup(&machine->scope_job, job);
+}
+
+static int machine_ensure_scope(Machine *m, sd_bus_message *properties, sd_bus_error *error) {
+ int r;
+
+ assert(m);
+ assert(m->class != MACHINE_HOST);
+
+ if (!m->unit) {
+ r = machine_start_scope(m, properties, error);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start machine scope: %s", bus_error_message(error, r));
+ }
+
+ assert(m->unit);
+ hashmap_put(m->manager->machine_units, m->unit, m);
+
+ return 0;
+}
+
+int machine_start(Machine *m, sd_bus_message *properties, sd_bus_error *error) {
+ int r;
+
+ assert(m);
+
+ if (!IN_SET(m->class, MACHINE_CONTAINER, MACHINE_VM))
+ return -EOPNOTSUPP;
+
+ if (m->started)
+ return 0;
+
+ r = hashmap_put(m->manager->machine_leaders, PID_TO_PTR(m->leader), m);
+ if (r < 0)
+ return r;
+
+ /* Create cgroup */
+ r = machine_ensure_scope(m, properties, error);
+ if (r < 0)
+ return r;
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_MACHINE_START_STR,
+ "NAME=%s", m->name,
+ "LEADER="PID_FMT, m->leader,
+ LOG_MESSAGE("New machine %s.", m->name));
+
+ if (!dual_timestamp_is_set(&m->timestamp))
+ dual_timestamp_get(&m->timestamp);
+
+ m->started = true;
+
+ /* Save new machine data */
+ machine_save(m);
+
+ machine_send_signal(m, true);
+ (void) manager_enqueue_nscd_cache_flush(m->manager);
+
+ return 0;
+}
+
+int machine_stop(Machine *m) {
+ int r;
+
+ assert(m);
+
+ if (!IN_SET(m->class, MACHINE_CONTAINER, MACHINE_VM))
+ return -EOPNOTSUPP;
+
+ if (m->unit) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ char *job = NULL;
+
+ r = manager_stop_unit(m->manager, m->unit, &error, &job);
+ if (r < 0)
+ return log_error_errno(r, "Failed to stop machine scope: %s", bus_error_message(&error, r));
+
+ free_and_replace(m->scope_job, job);
+ }
+
+ m->stopping = true;
+
+ machine_save(m);
+ (void) manager_enqueue_nscd_cache_flush(m->manager);
+
+ return 0;
+}
+
+int machine_finalize(Machine *m) {
+ assert(m);
+
+ if (m->started) {
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_MACHINE_STOP_STR,
+ "NAME=%s", m->name,
+ "LEADER="PID_FMT, m->leader,
+ LOG_MESSAGE("Machine %s terminated.", m->name));
+
+ m->stopping = true; /* The machine is supposed to be going away. Don't try to kill it. */
+ }
+
+ machine_unlink(m);
+ machine_add_to_gc_queue(m);
+
+ if (m->started) {
+ machine_send_signal(m, false);
+ m->started = false;
+ }
+
+ return 0;
+}
+
+bool machine_may_gc(Machine *m, bool drop_not_started) {
+ assert(m);
+
+ if (m->class == MACHINE_HOST)
+ return false;
+
+ if (drop_not_started && !m->started)
+ return true;
+
+ if (m->scope_job && manager_job_is_active(m->manager, m->scope_job))
+ return false;
+
+ if (m->unit && manager_unit_is_active(m->manager, m->unit))
+ return false;
+
+ return true;
+}
+
+void machine_add_to_gc_queue(Machine *m) {
+ assert(m);
+
+ if (m->in_gc_queue)
+ return;
+
+ LIST_PREPEND(gc_queue, m->manager->machine_gc_queue, m);
+ m->in_gc_queue = true;
+}
+
+MachineState machine_get_state(Machine *s) {
+ assert(s);
+
+ if (s->class == MACHINE_HOST)
+ return MACHINE_RUNNING;
+
+ if (s->stopping)
+ return MACHINE_CLOSING;
+
+ if (s->scope_job)
+ return MACHINE_OPENING;
+
+ return MACHINE_RUNNING;
+}
+
+int machine_kill(Machine *m, KillWho who, int signo) {
+ assert(m);
+
+ if (!IN_SET(m->class, MACHINE_VM, MACHINE_CONTAINER))
+ return -EOPNOTSUPP;
+
+ if (!m->unit)
+ return -ESRCH;
+
+ if (who == KILL_LEADER) {
+ /* If we shall simply kill the leader, do so directly */
+
+ if (kill(m->leader, signo) < 0)
+ return -errno;
+
+ return 0;
+ }
+
+ /* Otherwise, make PID 1 do it for us, for the entire cgroup */
+ return manager_kill_unit(m->manager, m->unit, signo, NULL);
+}
+
+int machine_openpt(Machine *m, int flags, char **ret_slave) {
+ assert(m);
+
+ switch (m->class) {
+
+ case MACHINE_HOST:
+
+ return openpt_allocate(flags, ret_slave);
+
+ case MACHINE_CONTAINER:
+ if (m->leader <= 0)
+ return -EINVAL;
+
+ return openpt_allocate_in_namespace(m->leader, flags, ret_slave);
+
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int machine_open_terminal(Machine *m, const char *path, int mode) {
+ assert(m);
+
+ switch (m->class) {
+
+ case MACHINE_HOST:
+ return open_terminal(path, mode);
+
+ case MACHINE_CONTAINER:
+ if (m->leader <= 0)
+ return -EINVAL;
+
+ return open_terminal_in_namespace(m->leader, path, mode);
+
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+void machine_release_unit(Machine *m) {
+ assert(m);
+
+ if (!m->unit)
+ return;
+
+ if (m->referenced) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ r = manager_unref_unit(m->manager, m->unit, &error);
+ if (r < 0)
+ log_warning_errno(r, "Failed to drop reference to machine scope, ignoring: %s",
+ bus_error_message(&error, r));
+
+ m->referenced = false;
+ }
+
+ (void) hashmap_remove(m->manager->machine_units, m->unit);
+ m->unit = mfree(m->unit);
+}
+
+int machine_get_uid_shift(Machine *m, uid_t *ret) {
+ char p[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(pid_t) + 1];
+ uid_t uid_base, uid_shift, uid_range;
+ gid_t gid_base, gid_shift, gid_range;
+ _cleanup_fclose_ FILE *f = NULL;
+ int k, r;
+
+ assert(m);
+ assert(ret);
+
+ /* Return the base UID/GID of the specified machine. Note that this only works for containers with simple
+ * mappings. In most cases setups should be simple like this, and administrators should only care about the
+ * basic offset a container has relative to the host. This is what this function exposes.
+ *
+ * If we encounter any more complex mappings we politely refuse this with ENXIO. */
+
+ if (m->class == MACHINE_HOST) {
+ *ret = 0;
+ return 0;
+ }
+
+ if (m->class != MACHINE_CONTAINER)
+ return -EOPNOTSUPP;
+
+ xsprintf(p, "/proc/" PID_FMT "/uid_map", m->leader);
+ f = fopen(p, "re");
+ if (!f) {
+ if (errno == ENOENT) {
+ /* If the file doesn't exist, user namespacing is off in the kernel, return a zero mapping hence. */
+ *ret = 0;
+ return 0;
+ }
+
+ return -errno;
+ }
+
+ /* Read the first line. There's at least one. */
+ errno = 0;
+ k = fscanf(f, UID_FMT " " UID_FMT " " UID_FMT "\n", &uid_base, &uid_shift, &uid_range);
+ if (k != 3) {
+ if (ferror(f))
+ return errno_or_else(EIO);
+
+ return -EBADMSG;
+ }
+
+ /* Not a mapping starting at 0? Then it's a complex mapping we can't expose here. */
+ if (uid_base != 0)
+ return -ENXIO;
+ /* Insist that at least the nobody user is mapped, everything else is weird, and hence complex, and we don't support it */
+ if (uid_range < UID_NOBODY)
+ return -ENXIO;
+
+ /* If there's more than one line, then we don't support this mapping. */
+ r = safe_fgetc(f, NULL);
+ if (r < 0)
+ return r;
+ if (r != 0) /* Insist on EOF */
+ return -ENXIO;
+
+ fclose(f);
+
+ xsprintf(p, "/proc/" PID_FMT "/gid_map", m->leader);
+ f = fopen(p, "re");
+ if (!f)
+ return -errno;
+
+ /* Read the first line. There's at least one. */
+ errno = 0;
+ k = fscanf(f, GID_FMT " " GID_FMT " " GID_FMT "\n", &gid_base, &gid_shift, &gid_range);
+ if (k != 3) {
+ if (ferror(f))
+ return errno_or_else(EIO);
+
+ return -EBADMSG;
+ }
+
+ /* If there's more than one line, then we don't support this file. */
+ r = safe_fgetc(f, NULL);
+ if (r < 0)
+ return r;
+ if (r != 0) /* Insist on EOF */
+ return -ENXIO;
+
+ /* If the UID and GID mapping doesn't match, we don't support this mapping. */
+ if (uid_base != (uid_t) gid_base)
+ return -ENXIO;
+ if (uid_shift != (uid_t) gid_shift)
+ return -ENXIO;
+ if (uid_range != (uid_t) gid_range)
+ return -ENXIO;
+
+ *ret = uid_shift;
+ return 0;
+}
+
+static int machine_owns_uid_internal(
+ Machine *machine,
+ const char *map_file, /* "uid_map" or "gid_map" */
+ uid_t uid,
+ uid_t *ret_internal_uid) {
+
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *p;
+
+ /* This is a generic implementation for both uids and gids, under the assumptions they have the same types and semantics. */
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+
+ assert(machine);
+
+ /* Checks if the specified host UID is owned by the machine, and returns the UID it maps to
+ * internally in the machine */
+
+ if (machine->class != MACHINE_CONTAINER)
+ goto negative;
+
+ p = procfs_file_alloca(machine->leader, map_file);
+ f = fopen(p, "re");
+ if (!f) {
+ log_debug_errno(errno, "Failed to open %s, ignoring.", p);
+ goto negative;
+ }
+
+ for (;;) {
+ uid_t uid_base, uid_shift, uid_range, converted;
+ int k;
+
+ errno = 0;
+ k = fscanf(f, UID_FMT " " UID_FMT " " UID_FMT, &uid_base, &uid_shift, &uid_range);
+ if (k < 0 && feof(f))
+ break;
+ if (k != 3) {
+ if (ferror(f))
+ return errno_or_else(EIO);
+
+ return -EIO;
+ }
+
+ /* The private user namespace is disabled, ignoring. */
+ if (uid_shift == 0)
+ continue;
+
+ if (uid < uid_shift || uid >= uid_shift + uid_range)
+ continue;
+
+ converted = (uid - uid_shift + uid_base);
+ if (!uid_is_valid(converted))
+ return -EINVAL;
+
+ if (ret_internal_uid)
+ *ret_internal_uid = converted;
+
+ return true;
+ }
+
+negative:
+ if (ret_internal_uid)
+ *ret_internal_uid = UID_INVALID;
+
+ return false;
+}
+
+int machine_owns_uid(Machine *machine, uid_t uid, uid_t *ret_internal_uid) {
+ return machine_owns_uid_internal(machine, "uid_map", uid, ret_internal_uid);
+}
+
+int machine_owns_gid(Machine *machine, gid_t gid, gid_t *ret_internal_gid) {
+ return machine_owns_uid_internal(machine, "gid_map", (uid_t) gid, (uid_t*) ret_internal_gid);
+}
+
+static int machine_translate_uid_internal(
+ Machine *machine,
+ const char *map_file, /* "uid_map" or "gid_map" */
+ uid_t uid,
+ uid_t *ret_host_uid) {
+
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *p;
+
+ /* This is a generic implementation for both uids and gids, under the assumptions they have the same types and semantics. */
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+
+ assert(machine);
+ assert(uid_is_valid(uid));
+
+ if (machine->class != MACHINE_CONTAINER)
+ return -ESRCH;
+
+ /* Translates a machine UID into a host UID */
+
+ p = procfs_file_alloca(machine->leader, map_file);
+ f = fopen(p, "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ uid_t uid_base, uid_shift, uid_range, converted;
+ int k;
+
+ errno = 0;
+ k = fscanf(f, UID_FMT " " UID_FMT " " UID_FMT, &uid_base, &uid_shift, &uid_range);
+ if (k < 0 && feof(f))
+ break;
+ if (k != 3) {
+ if (ferror(f))
+ return errno_or_else(EIO);
+
+ return -EIO;
+ }
+
+ if (uid < uid_base || uid >= uid_base + uid_range)
+ continue;
+
+ converted = uid - uid_base + uid_shift;
+ if (!uid_is_valid(converted))
+ return -EINVAL;
+
+ if (ret_host_uid)
+ *ret_host_uid = converted;
+ return 0;
+ }
+
+ return -ESRCH;
+}
+
+int machine_translate_uid(Machine *machine, gid_t uid, gid_t *ret_host_uid) {
+ return machine_translate_uid_internal(machine, "uid_map", uid, ret_host_uid);
+}
+
+int machine_translate_gid(Machine *machine, gid_t gid, gid_t *ret_host_gid) {
+ return machine_translate_uid_internal(machine, "gid_map", (uid_t) gid, (uid_t*) ret_host_gid);
+}
+
+static const char* const machine_class_table[_MACHINE_CLASS_MAX] = {
+ [MACHINE_CONTAINER] = "container",
+ [MACHINE_VM] = "vm",
+ [MACHINE_HOST] = "host",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(machine_class, MachineClass);
+
+static const char* const machine_state_table[_MACHINE_STATE_MAX] = {
+ [MACHINE_OPENING] = "opening",
+ [MACHINE_RUNNING] = "running",
+ [MACHINE_CLOSING] = "closing"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(machine_state, MachineState);
+
+static const char* const kill_who_table[_KILL_WHO_MAX] = {
+ [KILL_LEADER] = "leader",
+ [KILL_ALL] = "all"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);
diff --git a/src/machine/machine.h b/src/machine/machine.h
new file mode 100644
index 0000000..2f62715
--- /dev/null
+++ b/src/machine/machine.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct Machine Machine;
+typedef enum KillWho KillWho;
+
+#include "list.h"
+#include "machined.h"
+#include "operation.h"
+#include "time-util.h"
+
+typedef enum MachineState {
+ MACHINE_OPENING, /* Machine is being registered */
+ MACHINE_RUNNING, /* Machine is running */
+ MACHINE_CLOSING, /* Machine is terminating */
+ _MACHINE_STATE_MAX,
+ _MACHINE_STATE_INVALID = -1
+} MachineState;
+
+typedef enum MachineClass {
+ MACHINE_CONTAINER,
+ MACHINE_VM,
+ MACHINE_HOST,
+ _MACHINE_CLASS_MAX,
+ _MACHINE_CLASS_INVALID = -1
+} MachineClass;
+
+enum KillWho {
+ KILL_LEADER,
+ KILL_ALL,
+ _KILL_WHO_MAX,
+ _KILL_WHO_INVALID = -1
+};
+
+struct Machine {
+ Manager *manager;
+
+ char *name;
+ sd_id128_t id;
+
+ MachineClass class;
+
+ char *state_file;
+ char *service;
+ char *root_directory;
+
+ char *unit;
+ char *scope_job;
+
+ pid_t leader;
+
+ dual_timestamp timestamp;
+
+ bool in_gc_queue:1;
+ bool started:1;
+ bool stopping:1;
+ bool referenced:1;
+
+ sd_bus_message *create_message;
+
+ int *netif;
+ size_t n_netif;
+
+ LIST_HEAD(Operation, operations);
+
+ LIST_FIELDS(Machine, gc_queue);
+};
+
+Machine* machine_new(Manager *manager, MachineClass class, const char *name);
+Machine* machine_free(Machine *m);
+bool machine_may_gc(Machine *m, bool drop_not_started);
+void machine_add_to_gc_queue(Machine *m);
+int machine_start(Machine *m, sd_bus_message *properties, sd_bus_error *error);
+int machine_stop(Machine *m);
+int machine_finalize(Machine *m);
+int machine_save(Machine *m);
+int machine_load(Machine *m);
+int machine_kill(Machine *m, KillWho who, int signo);
+
+void machine_release_unit(Machine *m);
+
+MachineState machine_get_state(Machine *u);
+
+const char* machine_class_to_string(MachineClass t) _const_;
+MachineClass machine_class_from_string(const char *s) _pure_;
+
+const char* machine_state_to_string(MachineState t) _const_;
+MachineState machine_state_from_string(const char *s) _pure_;
+
+const char *kill_who_to_string(KillWho k) _const_;
+KillWho kill_who_from_string(const char *s) _pure_;
+
+int machine_openpt(Machine *m, int flags, char **ret_slave);
+int machine_open_terminal(Machine *m, const char *path, int mode);
+
+int machine_get_uid_shift(Machine *m, uid_t *ret);
+
+int machine_owns_uid(Machine *m, uid_t host_uid, uid_t *ret_internal_uid);
+int machine_owns_gid(Machine *m, gid_t host_gid, gid_t *ret_internal_gid);
+
+int machine_translate_uid(Machine *m, uid_t internal_uid, uid_t *ret_host_uid);
+int machine_translate_gid(Machine *m, gid_t internal_gid, gid_t *ret_host_gid);
diff --git a/src/machine/machinectl.c b/src/machine/machinectl.c
new file mode 100644
index 0000000..4a3279d
--- /dev/null
+++ b/src/machine/machinectl.c
@@ -0,0 +1,2897 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <math.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <sys/mount.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "bus-map-properties.h"
+#include "bus-print-properties.h"
+#include "bus-unit-procs.h"
+#include "bus-unit-util.h"
+#include "bus-wait-for-jobs.h"
+#include "cgroup-show.h"
+#include "cgroup-util.h"
+#include "copy.h"
+#include "def.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "format-table.h"
+#include "hostname-util.h"
+#include "import-util.h"
+#include "locale-util.h"
+#include "log.h"
+#include "logs-show.h"
+#include "macro.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "nulstr-util.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "ptyfwd.h"
+#include "rlimit-util.h"
+#include "sigbus.h"
+#include "signal-util.h"
+#include "sort-util.h"
+#include "spawn-ask-password-agent.h"
+#include "spawn-polkit-agent.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "unit-name.h"
+#include "verbs.h"
+#include "web-util.h"
+
+#define ALL_ADDRESSES -1
+
+static char **arg_property = NULL;
+static bool arg_all = false;
+static bool arg_value = false;
+static bool arg_full = false;
+static PagerFlags arg_pager_flags = 0;
+static bool arg_legend = true;
+static const char *arg_kill_who = NULL;
+static int arg_signal = SIGTERM;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static const char *arg_host = NULL;
+static bool arg_read_only = false;
+static bool arg_mkdir = false;
+static bool arg_quiet = false;
+static bool arg_ask_password = true;
+static unsigned arg_lines = 10;
+static OutputMode arg_output = OUTPUT_SHORT;
+static bool arg_force = false;
+static ImportVerify arg_verify = IMPORT_VERIFY_SIGNATURE;
+static const char* arg_format = NULL;
+static const char *arg_uid = NULL;
+static char **arg_setenv = NULL;
+static int arg_max_addresses = 1;
+
+STATIC_DESTRUCTOR_REGISTER(arg_property, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_setenv, strv_freep);
+
+static OutputFlags get_output_flags(void) {
+ return
+ arg_all * OUTPUT_SHOW_ALL |
+ (arg_full || !on_tty() || pager_have()) * OUTPUT_FULL_WIDTH |
+ colors_enabled() * OUTPUT_COLOR |
+ !arg_quiet * OUTPUT_WARN_CUTOFF;
+}
+
+static int call_get_os_release(sd_bus *bus, const char *method, const char *name, const char *query, ...) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *k, *v, *iter, **query_res = NULL;
+ size_t count = 0, awaited_args = 0;
+ va_list ap;
+ int r;
+
+ assert(bus);
+ assert(name);
+ assert(query);
+
+ NULSTR_FOREACH(iter, query)
+ awaited_args++;
+ query_res = newa0(const char *, awaited_args);
+
+ r = bus_call_method(bus, bus_machine_mgr, method, &error, &reply, "s", name);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to call '%s()': %s", method, bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, 'a', "{ss}");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(reply, "{ss}", &k, &v)) > 0) {
+ count = 0;
+ NULSTR_FOREACH(iter, query) {
+ if (streq(k, iter)) {
+ query_res[count] = v;
+ break;
+ }
+ count++;
+ }
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ va_start(ap, query);
+ for (count = 0; count < awaited_args; count++) {
+ char *val, **out;
+
+ out = va_arg(ap, char **);
+ assert(out);
+ if (query_res[count]) {
+ val = strdup(query_res[count]);
+ if (!val) {
+ va_end(ap);
+ return -ENOMEM;
+ }
+ *out = val;
+ }
+ }
+ va_end(ap);
+
+ return 0;
+}
+
+static int call_get_addresses(
+ sd_bus *bus,
+ const char *name,
+ int ifi,
+ const char *prefix,
+ const char *prefix2,
+ char **ret) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *addresses = NULL;
+ unsigned n = 0;
+ int r;
+
+ assert(bus);
+ assert(name);
+ assert(prefix);
+ assert(prefix2);
+
+ r = bus_call_method(bus, bus_machine_mgr, "GetMachineAddresses", NULL, &reply, "s", name);
+ if (r < 0)
+ return log_debug_errno(r, "Could not get addresses: %s", bus_error_message(&error, r));
+
+ addresses = strdup(prefix);
+ if (!addresses)
+ return log_oom();
+ prefix = "";
+
+ r = sd_bus_message_enter_container(reply, 'a', "(iay)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_enter_container(reply, 'r', "iay")) > 0) {
+ int family;
+ const void *a;
+ size_t sz;
+ char buf_ifi[DECIMAL_STR_MAX(int) + 2], buffer[MAX(INET6_ADDRSTRLEN, INET_ADDRSTRLEN)];
+
+ r = sd_bus_message_read(reply, "i", &family);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_array(reply, 'y', &a, &sz);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (family == AF_INET6 && ifi > 0)
+ xsprintf(buf_ifi, "%%%i", ifi);
+ else
+ strcpy(buf_ifi, "");
+
+ if (!strextend(&addresses, prefix, inet_ntop(family, a, buffer, sizeof(buffer)), buf_ifi, NULL))
+ return log_oom();
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ prefix = prefix2;
+
+ n++;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ *ret = TAKE_PTR(addresses);
+ return (int) n;
+}
+
+static int show_table(Table *table, const char *word) {
+ int r;
+
+ assert(table);
+ assert(word);
+
+ if (table_get_rows(table) > 1 || OUTPUT_MODE_IS_JSON(arg_output)) {
+ r = table_set_sort(table, (size_t) 0, (size_t) -1);
+ if (r < 0)
+ return table_log_sort_error(r);
+
+ table_set_header(table, arg_legend);
+
+ if (OUTPUT_MODE_IS_JSON(arg_output))
+ r = table_print_json(table, NULL, output_mode_to_json_format_flags(arg_output) | JSON_FORMAT_COLOR_AUTO);
+ else
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+ }
+
+ if (arg_legend) {
+ if (table_get_rows(table) > 1)
+ printf("\n%zu %s listed.\n", table_get_rows(table) - 1, word);
+ else
+ printf("No %s.\n", word);
+ }
+
+ return 0;
+}
+
+static int list_machines(int argc, char *argv[], void *userdata) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ (void) pager_open(arg_pager_flags);
+
+ r = bus_call_method(bus, bus_machine_mgr, "ListMachines", &error, &reply, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Could not get machines: %s", bus_error_message(&error, r));
+
+ table = table_new("machine", "class", "service", "os", "version", "addresses");
+ if (!table)
+ return log_oom();
+
+ table_set_empty_string(table, "-");
+ if (!arg_full && arg_max_addresses != ALL_ADDRESSES)
+ table_set_cell_height_max(table, arg_max_addresses);
+
+ if (arg_full)
+ table_set_width(table, 0);
+
+ r = sd_bus_message_enter_container(reply, 'a', "(ssso)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *os = NULL, *version_id = NULL, *addresses = NULL;
+ const char *name, *class, *service;
+
+ r = sd_bus_message_read(reply, "(ssso)", &name, &class, &service, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ if (name[0] == '.' && !arg_all)
+ continue;
+
+ (void) call_get_os_release(
+ bus,
+ "GetMachineOSRelease",
+ name,
+ "ID\0"
+ "VERSION_ID\0",
+ &os,
+ &version_id);
+
+ (void) call_get_addresses(
+ bus,
+ name,
+ 0,
+ "",
+ "\n",
+ &addresses);
+
+ r = table_add_many(table,
+ TABLE_STRING, empty_to_null(name),
+ TABLE_STRING, empty_to_null(class),
+ TABLE_STRING, empty_to_null(service),
+ TABLE_STRING, empty_to_null(os),
+ TABLE_STRING, empty_to_null(version_id),
+ TABLE_STRING, empty_to_null(addresses));
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return show_table(table, "machines");
+}
+
+static int list_images(int argc, char *argv[], void *userdata) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ (void) pager_open(arg_pager_flags);
+
+ r = bus_call_method(bus, bus_machine_mgr, "ListImages", &error, &reply, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Could not get images: %s", bus_error_message(&error, r));
+
+ table = table_new("name", "type", "ro", "usage", "created", "modified");
+ if (!table)
+ return log_oom();
+
+ if (arg_full)
+ table_set_width(table, 0);
+
+ (void) table_set_align_percent(table, TABLE_HEADER_CELL(3), 100);
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssbttto)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ uint64_t crtime, mtime, size;
+ const char *name, *type;
+ int ro_int;
+
+ r = sd_bus_message_read(reply, "(ssbttto)", &name, &type, &ro_int, &crtime, &mtime, &size, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ if (name[0] == '.' && !arg_all)
+ continue;
+
+ r = table_add_many(table,
+ TABLE_STRING, name,
+ TABLE_STRING, type,
+ TABLE_BOOLEAN, ro_int,
+ TABLE_SET_COLOR, ro_int ? ansi_highlight_red() : NULL,
+ TABLE_SIZE, size,
+ TABLE_TIMESTAMP, crtime,
+ TABLE_TIMESTAMP, mtime);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return show_table(table, "images");
+}
+
+static int show_unit_cgroup(sd_bus *bus, const char *unit, pid_t leader) {
+ _cleanup_free_ char *cgroup = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+ unsigned c;
+
+ assert(bus);
+ assert(unit);
+
+ r = show_cgroup_get_unit_path_and_warn(bus, unit, &cgroup);
+ if (r < 0)
+ return r;
+
+ if (isempty(cgroup))
+ return 0;
+
+ c = columns();
+ if (c > 18)
+ c -= 18;
+ else
+ c = 0;
+
+ r = unit_show_processes(bus, unit, cgroup, "\t\t ", c, get_output_flags(), &error);
+ if (r == -EBADR) {
+
+ if (arg_transport == BUS_TRANSPORT_REMOTE)
+ return 0;
+
+ /* Fallback for older systemd versions where the GetUnitProcesses() call is not yet available */
+
+ if (cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, cgroup) != 0 && leader <= 0)
+ return 0;
+
+ show_cgroup_and_extra(SYSTEMD_CGROUP_CONTROLLER, cgroup, "\t\t ", c, &leader, leader > 0, get_output_flags());
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to dump process list: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int print_os_release(sd_bus *bus, const char *method, const char *name, const char *prefix) {
+ _cleanup_free_ char *pretty = NULL;
+ int r;
+
+ assert(bus);
+ assert(name);
+ assert(prefix);
+
+ r = call_get_os_release(bus, method, name, "PRETTY_NAME\0", &pretty, NULL);
+ if (r < 0)
+ return r;
+
+ if (pretty)
+ printf("%s%s\n", prefix, pretty);
+
+ return 0;
+}
+
+static int print_uid_shift(sd_bus *bus, const char *name) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ uint32_t shift;
+ int r;
+
+ assert(bus);
+ assert(name);
+
+ r = bus_call_method(bus, bus_machine_mgr, "GetMachineUIDShift", &error, &reply, "s", name);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to query UID/GID shift: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "u", &shift);
+ if (r < 0)
+ return r;
+
+ if (shift == 0) /* Don't show trivial mappings */
+ return 0;
+
+ printf(" UID Shift: %" PRIu32 "\n", shift);
+ return 0;
+}
+
+typedef struct MachineStatusInfo {
+ const char *name;
+ sd_id128_t id;
+ const char *class;
+ const char *service;
+ const char *unit;
+ const char *root_directory;
+ pid_t leader;
+ struct dual_timestamp timestamp;
+ int *netif;
+ size_t n_netif;
+} MachineStatusInfo;
+
+static void machine_status_info_clear(MachineStatusInfo *info) {
+ if (info) {
+ free(info->netif);
+ zero(*info);
+ }
+}
+
+static void print_machine_status_info(sd_bus *bus, MachineStatusInfo *i) {
+ char since1[FORMAT_TIMESTAMP_RELATIVE_MAX];
+ char since2[FORMAT_TIMESTAMP_MAX];
+ _cleanup_free_ char *addresses = NULL;
+ const char *s1, *s2;
+ int ifi = -1;
+
+ assert(bus);
+ assert(i);
+
+ fputs(strna(i->name), stdout);
+
+ if (!sd_id128_is_null(i->id))
+ printf("(" SD_ID128_FORMAT_STR ")\n", SD_ID128_FORMAT_VAL(i->id));
+ else
+ putchar('\n');
+
+ s1 = format_timestamp_relative(since1, sizeof(since1), i->timestamp.realtime);
+ s2 = format_timestamp(since2, sizeof(since2), i->timestamp.realtime);
+
+ if (s1)
+ printf("\t Since: %s; %s\n", s2, s1);
+ else if (s2)
+ printf("\t Since: %s\n", s2);
+
+ if (i->leader > 0) {
+ _cleanup_free_ char *t = NULL;
+
+ printf("\t Leader: %u", (unsigned) i->leader);
+
+ get_process_comm(i->leader, &t);
+ if (t)
+ printf(" (%s)", t);
+
+ putchar('\n');
+ }
+
+ if (i->service) {
+ printf("\t Service: %s", i->service);
+
+ if (i->class)
+ printf("; class %s", i->class);
+
+ putchar('\n');
+ } else if (i->class)
+ printf("\t Class: %s\n", i->class);
+
+ if (i->root_directory)
+ printf("\t Root: %s\n", i->root_directory);
+
+ if (i->n_netif > 0) {
+ fputs("\t Iface:", stdout);
+
+ for (size_t c = 0; c < i->n_netif; c++) {
+ char name[IF_NAMESIZE+1];
+
+ if (format_ifname(i->netif[c], name)) {
+ fputc(' ', stdout);
+ fputs(name, stdout);
+
+ if (ifi < 0)
+ ifi = i->netif[c];
+ else
+ ifi = 0;
+ } else
+ printf(" %i", i->netif[c]);
+ }
+
+ fputc('\n', stdout);
+ }
+
+ if (call_get_addresses(bus, i->name, ifi,
+ "\t Address: ", "\n\t ",
+ &addresses) > 0) {
+ fputs(addresses, stdout);
+ fputc('\n', stdout);
+ }
+
+ print_os_release(bus, "GetMachineOSRelease", i->name, "\t OS: ");
+
+ print_uid_shift(bus, i->name);
+
+ if (i->unit) {
+ printf("\t Unit: %s\n", i->unit);
+ show_unit_cgroup(bus, i->unit, i->leader);
+
+ if (arg_transport == BUS_TRANSPORT_LOCAL)
+
+ show_journal_by_unit(
+ stdout,
+ i->unit,
+ NULL,
+ arg_output,
+ 0,
+ i->timestamp.monotonic,
+ arg_lines,
+ 0,
+ get_output_flags() | OUTPUT_BEGIN_NEWLINE,
+ SD_JOURNAL_LOCAL_ONLY,
+ true,
+ NULL);
+ }
+}
+
+static int map_netif(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ MachineStatusInfo *i = userdata;
+ size_t l;
+ const void *v;
+ int r;
+
+ assert_cc(sizeof(int32_t) == sizeof(int));
+ r = sd_bus_message_read_array(m, SD_BUS_TYPE_INT32, &v, &l);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EBADMSG;
+
+ i->n_netif = l / sizeof(int32_t);
+ i->netif = memdup(v, l);
+ if (!i->netif)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int show_machine_info(const char *verb, sd_bus *bus, const char *path, bool *new_line) {
+
+ static const struct bus_properties_map map[] = {
+ { "Name", "s", NULL, offsetof(MachineStatusInfo, name) },
+ { "Class", "s", NULL, offsetof(MachineStatusInfo, class) },
+ { "Service", "s", NULL, offsetof(MachineStatusInfo, service) },
+ { "Unit", "s", NULL, offsetof(MachineStatusInfo, unit) },
+ { "RootDirectory", "s", NULL, offsetof(MachineStatusInfo, root_directory) },
+ { "Leader", "u", NULL, offsetof(MachineStatusInfo, leader) },
+ { "Timestamp", "t", NULL, offsetof(MachineStatusInfo, timestamp.realtime) },
+ { "TimestampMonotonic", "t", NULL, offsetof(MachineStatusInfo, timestamp.monotonic) },
+ { "Id", "ay", bus_map_id128, offsetof(MachineStatusInfo, id) },
+ { "NetworkInterfaces", "ai", map_netif, 0 },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(machine_status_info_clear) MachineStatusInfo info = {};
+ int r;
+
+ assert(verb);
+ assert(bus);
+ assert(path);
+ assert(new_line);
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.machine1",
+ path,
+ map,
+ 0,
+ &error,
+ &m,
+ &info);
+ if (r < 0)
+ return log_error_errno(r, "Could not get properties: %s", bus_error_message(&error, r));
+
+ if (*new_line)
+ printf("\n");
+ *new_line = true;
+
+ print_machine_status_info(bus, &info);
+
+ return r;
+}
+
+static int show_machine_properties(sd_bus *bus, const char *path, bool *new_line) {
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(new_line);
+
+ if (*new_line)
+ printf("\n");
+
+ *new_line = true;
+
+ r = bus_print_all_properties(bus, "org.freedesktop.machine1", path, NULL, arg_property, arg_value, arg_all, NULL);
+ if (r < 0)
+ log_error_errno(r, "Could not get properties: %m");
+
+ return r;
+}
+
+static int show_machine(int argc, char *argv[], void *userdata) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ bool properties, new_line = false;
+ sd_bus *bus = userdata;
+ int r = 0;
+
+ assert(bus);
+
+ properties = !strstr(argv[0], "status");
+
+ (void) pager_open(arg_pager_flags);
+
+ if (properties && argc <= 1) {
+
+ /* If no argument is specified, inspect the manager
+ * itself */
+ r = show_machine_properties(bus, "/org/freedesktop/machine1", &new_line);
+ if (r < 0)
+ return r;
+ }
+
+ for (int i = 1; i < argc; i++) {
+ const char *path = NULL;
+
+ r = bus_call_method(bus, bus_machine_mgr, "GetMachine", &error, &reply, "s", argv[i]);
+ if (r < 0)
+ return log_error_errno(r, "Could not get path to machine: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "o", &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (properties)
+ r = show_machine_properties(bus, path, &new_line);
+ else
+ r = show_machine_info(argv[0], bus, path, &new_line);
+ }
+
+ return r;
+}
+
+static int print_image_hostname(sd_bus *bus, const char *name) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *hn;
+ int r;
+
+ r = bus_call_method(bus, bus_machine_mgr, "GetImageHostname", NULL, &reply, "s", name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "s", &hn);
+ if (r < 0)
+ return r;
+
+ if (!isempty(hn))
+ printf("\tHostname: %s\n", hn);
+
+ return 0;
+}
+
+static int print_image_machine_id(sd_bus *bus, const char *name) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ sd_id128_t id = SD_ID128_NULL;
+ const void *p;
+ size_t size;
+ int r;
+
+ r = bus_call_method(bus, bus_machine_mgr, "GetImageMachineID", NULL, &reply, "s", name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_array(reply, 'y', &p, &size);
+ if (r < 0)
+ return r;
+
+ if (size == sizeof(sd_id128_t))
+ memcpy(&id, p, size);
+
+ if (!sd_id128_is_null(id))
+ printf(" Machine ID: " SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(id));
+
+ return 0;
+}
+
+static int print_image_machine_info(sd_bus *bus, const char *name) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ r = bus_call_method(bus, bus_machine_mgr, "GetImageMachineInfo", NULL, &reply, "s", name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(reply, 'a', "{ss}");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *p, *q;
+
+ r = sd_bus_message_read(reply, "{ss}", &p, &q);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (streq(p, "DEPLOYMENT"))
+ printf(" Deployment: %s\n", q);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+typedef struct ImageStatusInfo {
+ const char *name;
+ const char *path;
+ const char *type;
+ bool read_only;
+ usec_t crtime;
+ usec_t mtime;
+ uint64_t usage;
+ uint64_t limit;
+ uint64_t usage_exclusive;
+ uint64_t limit_exclusive;
+} ImageStatusInfo;
+
+static void print_image_status_info(sd_bus *bus, ImageStatusInfo *i) {
+ char ts_relative[FORMAT_TIMESTAMP_RELATIVE_MAX];
+ char ts_absolute[FORMAT_TIMESTAMP_MAX];
+ char bs[FORMAT_BYTES_MAX];
+ char bs_exclusive[FORMAT_BYTES_MAX];
+ const char *s1, *s2, *s3, *s4;
+
+ assert(bus);
+ assert(i);
+
+ if (i->name) {
+ fputs(i->name, stdout);
+ putchar('\n');
+ }
+
+ if (i->type)
+ printf("\t Type: %s\n", i->type);
+
+ if (i->path)
+ printf("\t Path: %s\n", i->path);
+
+ (void) print_image_hostname(bus, i->name);
+ (void) print_image_machine_id(bus, i->name);
+ (void) print_image_machine_info(bus, i->name);
+
+ print_os_release(bus, "GetImageOSRelease", i->name, "\t OS: ");
+
+ printf("\t RO: %s%s%s\n",
+ i->read_only ? ansi_highlight_red() : "",
+ i->read_only ? "read-only" : "writable",
+ i->read_only ? ansi_normal() : "");
+
+ s1 = format_timestamp_relative(ts_relative, sizeof(ts_relative), i->crtime);
+ s2 = format_timestamp(ts_absolute, sizeof(ts_absolute), i->crtime);
+ if (s1 && s2)
+ printf("\t Created: %s; %s\n", s2, s1);
+ else if (s2)
+ printf("\t Created: %s\n", s2);
+
+ s1 = format_timestamp_relative(ts_relative, sizeof(ts_relative), i->mtime);
+ s2 = format_timestamp(ts_absolute, sizeof(ts_absolute), i->mtime);
+ if (s1 && s2)
+ printf("\tModified: %s; %s\n", s2, s1);
+ else if (s2)
+ printf("\tModified: %s\n", s2);
+
+ s3 = format_bytes(bs, sizeof(bs), i->usage);
+ s4 = i->usage_exclusive != i->usage ? format_bytes(bs_exclusive, sizeof(bs_exclusive), i->usage_exclusive) : NULL;
+ if (s3 && s4)
+ printf("\t Usage: %s (exclusive: %s)\n", s3, s4);
+ else if (s3)
+ printf("\t Usage: %s\n", s3);
+
+ s3 = format_bytes(bs, sizeof(bs), i->limit);
+ s4 = i->limit_exclusive != i->limit ? format_bytes(bs_exclusive, sizeof(bs_exclusive), i->limit_exclusive) : NULL;
+ if (s3 && s4)
+ printf("\t Limit: %s (exclusive: %s)\n", s3, s4);
+ else if (s3)
+ printf("\t Limit: %s\n", s3);
+}
+
+static int show_image_info(sd_bus *bus, const char *path, bool *new_line) {
+
+ static const struct bus_properties_map map[] = {
+ { "Name", "s", NULL, offsetof(ImageStatusInfo, name) },
+ { "Path", "s", NULL, offsetof(ImageStatusInfo, path) },
+ { "Type", "s", NULL, offsetof(ImageStatusInfo, type) },
+ { "ReadOnly", "b", NULL, offsetof(ImageStatusInfo, read_only) },
+ { "CreationTimestamp", "t", NULL, offsetof(ImageStatusInfo, crtime) },
+ { "ModificationTimestamp", "t", NULL, offsetof(ImageStatusInfo, mtime) },
+ { "Usage", "t", NULL, offsetof(ImageStatusInfo, usage) },
+ { "Limit", "t", NULL, offsetof(ImageStatusInfo, limit) },
+ { "UsageExclusive", "t", NULL, offsetof(ImageStatusInfo, usage_exclusive) },
+ { "LimitExclusive", "t", NULL, offsetof(ImageStatusInfo, limit_exclusive) },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ ImageStatusInfo info = {};
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(new_line);
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.machine1",
+ path,
+ map,
+ BUS_MAP_BOOLEAN_AS_BOOL,
+ &error,
+ &m,
+ &info);
+ if (r < 0)
+ return log_error_errno(r, "Could not get properties: %s", bus_error_message(&error, r));
+
+ if (*new_line)
+ printf("\n");
+ *new_line = true;
+
+ print_image_status_info(bus, &info);
+
+ return r;
+}
+
+typedef struct PoolStatusInfo {
+ const char *path;
+ uint64_t usage;
+ uint64_t limit;
+} PoolStatusInfo;
+
+static void print_pool_status_info(sd_bus *bus, PoolStatusInfo *i) {
+ char bs[FORMAT_BYTES_MAX], *s;
+
+ if (i->path)
+ printf("\t Path: %s\n", i->path);
+
+ s = format_bytes(bs, sizeof(bs), i->usage);
+ if (s)
+ printf("\t Usage: %s\n", s);
+
+ s = format_bytes(bs, sizeof(bs), i->limit);
+ if (s)
+ printf("\t Limit: %s\n", s);
+}
+
+static int show_pool_info(sd_bus *bus) {
+
+ static const struct bus_properties_map map[] = {
+ { "PoolPath", "s", NULL, offsetof(PoolStatusInfo, path) },
+ { "PoolUsage", "t", NULL, offsetof(PoolStatusInfo, usage) },
+ { "PoolLimit", "t", NULL, offsetof(PoolStatusInfo, limit) },
+ {}
+ };
+
+ PoolStatusInfo info = {
+ .usage = (uint64_t) -1,
+ .limit = (uint64_t) -1,
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert(bus);
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ map,
+ 0,
+ &error,
+ &m,
+ &info);
+ if (r < 0)
+ return log_error_errno(r, "Could not get properties: %s", bus_error_message(&error, r));
+
+ print_pool_status_info(bus, &info);
+
+ return 0;
+}
+
+static int show_image_properties(sd_bus *bus, const char *path, bool *new_line) {
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(new_line);
+
+ if (*new_line)
+ printf("\n");
+
+ *new_line = true;
+
+ r = bus_print_all_properties(bus, "org.freedesktop.machine1", path, NULL, arg_property, arg_value, arg_all, NULL);
+ if (r < 0)
+ log_error_errno(r, "Could not get properties: %m");
+
+ return r;
+}
+
+static int show_image(int argc, char *argv[], void *userdata) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ bool properties, new_line = false;
+ sd_bus *bus = userdata;
+ int r = 0;
+
+ assert(bus);
+
+ properties = !strstr(argv[0], "status");
+
+ (void) pager_open(arg_pager_flags);
+
+ if (argc <= 1) {
+
+ /* If no argument is specified, inspect the manager
+ * itself */
+
+ if (properties)
+ r = show_image_properties(bus, "/org/freedesktop/machine1", &new_line);
+ else
+ r = show_pool_info(bus);
+ if (r < 0)
+ return r;
+ }
+
+ for (int i = 1; i < argc; i++) {
+ const char *path = NULL;
+
+ r = bus_call_method(bus, bus_machine_mgr, "GetImage", &error, &reply, "s", argv[i]);
+ if (r < 0)
+ return log_error_errno(r, "Could not get path to image: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "o", &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (properties)
+ r = show_image_properties(bus, path, &new_line);
+ else
+ r = show_image_info(bus, path, &new_line);
+ }
+
+ return r;
+}
+
+static int kill_machine(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ if (!arg_kill_who)
+ arg_kill_who = "all";
+
+ for (int i = 1; i < argc; i++) {
+ r = bus_call_method(
+ bus,
+ bus_machine_mgr,
+ "KillMachine",
+ &error,
+ NULL,
+ "ssi", argv[i], arg_kill_who, arg_signal);
+ if (r < 0)
+ return log_error_errno(r, "Could not kill machine: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int reboot_machine(int argc, char *argv[], void *userdata) {
+ arg_kill_who = "leader";
+ arg_signal = SIGINT; /* sysvinit + systemd */
+
+ return kill_machine(argc, argv, userdata);
+}
+
+static int poweroff_machine(int argc, char *argv[], void *userdata) {
+ arg_kill_who = "leader";
+ arg_signal = SIGRTMIN+4; /* only systemd */
+
+ return kill_machine(argc, argv, userdata);
+}
+
+static int terminate_machine(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ for (int i = 1; i < argc; i++) {
+ r = bus_call_method(bus, bus_machine_mgr, "TerminateMachine", &error, NULL, "s", argv[i]);
+ if (r < 0)
+ return log_error_errno(r, "Could not terminate machine: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int copy_files(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *abs_host_path = NULL;
+ char *dest, *host_path, *container_path;
+ sd_bus *bus = userdata;
+ bool copy_from;
+ int r;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ copy_from = streq(argv[0], "copy-from");
+ dest = argv[3] ?: argv[2];
+ host_path = copy_from ? dest : argv[2];
+ container_path = copy_from ? argv[2] : dest;
+
+ if (!path_is_absolute(host_path)) {
+ r = path_make_absolute_cwd(host_path, &abs_host_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make path absolute: %m");
+
+ host_path = abs_host_path;
+ }
+
+ r = bus_message_new_method_call(
+ bus,
+ &m,
+ bus_machine_mgr,
+ copy_from ? "CopyFromMachine" : "CopyToMachine");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "sss",
+ argv[1],
+ copy_from ? container_path : host_path,
+ copy_from ? host_path : container_path);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* This is a slow operation, hence turn off any method call timeouts */
+ r = sd_bus_call(bus, m, USEC_INFINITY, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to copy: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int bind_mount(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = bus_call_method(
+ bus,
+ bus_machine_mgr,
+ "BindMountMachine",
+ &error,
+ NULL,
+ "sssbb",
+ argv[1],
+ argv[2],
+ argv[3],
+ arg_read_only,
+ arg_mkdir);
+ if (r < 0)
+ return log_error_errno(r, "Failed to bind mount: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int on_machine_removed(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ PTYForward ** forward = (PTYForward**) userdata;
+ int r;
+
+ assert(m);
+ assert(forward);
+
+ if (*forward) {
+ /* If the forwarder is already initialized, tell it to
+ * exit on the next vhangup(), so that we still flush
+ * out what might be queued and exit then. */
+
+ r = pty_forward_set_ignore_vhangup(*forward, false);
+ if (r >= 0)
+ return 0;
+
+ log_error_errno(r, "Failed to set ignore_vhangup flag: %m");
+ }
+
+ /* On error, or when the forwarder is not initialized yet, quit immediately */
+ sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), EXIT_FAILURE);
+ return 0;
+}
+
+static int process_forward(sd_event *event, PTYForward **forward, int master, PTYForwardFlags flags, const char *name) {
+ char last_char = 0;
+ bool machine_died;
+ int r;
+
+ assert(event);
+ assert(master >= 0);
+ assert(name);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGWINCH, SIGTERM, SIGINT, -1) >= 0);
+
+ if (!arg_quiet) {
+ if (streq(name, ".host"))
+ log_info("Connected to the local host. Press ^] three times within 1s to exit session.");
+ else
+ log_info("Connected to machine %s. Press ^] three times within 1s to exit session.", name);
+ }
+
+ (void) sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
+
+ r = pty_forward_new(event, master, flags, forward);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create PTY forwarder: %m");
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ pty_forward_get_last_char(*forward, &last_char);
+
+ machine_died =
+ (flags & PTY_FORWARD_IGNORE_VHANGUP) &&
+ pty_forward_get_ignore_vhangup(*forward) == 0;
+
+ *forward = pty_forward_free(*forward);
+
+ if (last_char != '\n')
+ fputc('\n', stdout);
+
+ if (!arg_quiet) {
+ if (machine_died)
+ log_info("Machine %s terminated.", name);
+ else if (streq(name, ".host"))
+ log_info("Connection to the local host terminated.");
+ else
+ log_info("Connection to machine %s terminated.", name);
+ }
+
+ return 0;
+}
+
+static int parse_machine_uid(const char *spec, const char **machine, char **uid) {
+ /*
+ * Whatever is specified in the spec takes priority over global arguments.
+ */
+ char *_uid = NULL;
+ const char *_machine = NULL;
+
+ if (spec) {
+ const char *at;
+
+ at = strchr(spec, '@');
+ if (at) {
+ if (at == spec)
+ /* Do the same as ssh and refuse "@host". */
+ return -EINVAL;
+
+ _machine = at + 1;
+ _uid = strndup(spec, at - spec);
+ if (!_uid)
+ return -ENOMEM;
+ } else
+ _machine = spec;
+ };
+
+ if (arg_uid && !_uid) {
+ _uid = strdup(arg_uid);
+ if (!_uid)
+ return -ENOMEM;
+ }
+
+ *uid = _uid;
+ *machine = isempty(_machine) ? ".host" : _machine;
+ return 0;
+}
+
+static int login_machine(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(pty_forward_freep) PTYForward *forward = NULL;
+ _cleanup_(sd_bus_slot_unrefp) sd_bus_slot *slot = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ int master = -1, r;
+ sd_bus *bus = userdata;
+ const char *match, *machine;
+
+ assert(bus);
+
+ if (!strv_isempty(arg_setenv) || arg_uid)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--setenv= and --uid= are not supported for 'login'. Use 'shell' instead.");
+
+ if (!IN_SET(arg_transport, BUS_TRANSPORT_LOCAL, BUS_TRANSPORT_MACHINE))
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Login only supported on local machines.");
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get event loop: %m");
+
+ r = sd_bus_attach_event(bus, event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ machine = argc < 2 || isempty(argv[1]) ? ".host" : argv[1];
+
+ match = strjoina("type='signal',"
+ "sender='org.freedesktop.machine1',"
+ "path='/org/freedesktop/machine1',",
+ "interface='org.freedesktop.machine1.Manager',"
+ "member='MachineRemoved',"
+ "arg0='", machine, "'");
+
+ r = sd_bus_add_match_async(bus, &slot, match, on_machine_removed, NULL, &forward);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request machine removal match: %m");
+
+ r = bus_call_method(bus, bus_machine_mgr, "OpenMachineLogin", &error, &reply, "s", machine);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get login PTY: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "hs", &master, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return process_forward(event, &forward, master, PTY_FORWARD_IGNORE_VHANGUP, machine);
+}
+
+static int shell_machine(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL, *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(pty_forward_freep) PTYForward *forward = NULL;
+ _cleanup_(sd_bus_slot_unrefp) sd_bus_slot *slot = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ int master = -1, r;
+ sd_bus *bus = userdata;
+ const char *match, *machine, *path;
+ _cleanup_free_ char *uid = NULL;
+
+ assert(bus);
+
+ if (!IN_SET(arg_transport, BUS_TRANSPORT_LOCAL, BUS_TRANSPORT_MACHINE))
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Shell only supported on local machines.");
+
+ /* Pass $TERM to shell session, if not explicitly specified. */
+ if (!strv_find_prefix(arg_setenv, "TERM=")) {
+ const char *t;
+
+ t = strv_find_prefix(environ, "TERM=");
+ if (t) {
+ if (strv_extend(&arg_setenv, t) < 0)
+ return log_oom();
+ }
+ }
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get event loop: %m");
+
+ r = sd_bus_attach_event(bus, event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ r = parse_machine_uid(argc >= 2 ? argv[1] : NULL, &machine, &uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse machine specification: %m");
+
+ match = strjoina("type='signal',"
+ "sender='org.freedesktop.machine1',"
+ "path='/org/freedesktop/machine1',",
+ "interface='org.freedesktop.machine1.Manager',"
+ "member='MachineRemoved',"
+ "arg0='", machine, "'");
+
+ r = sd_bus_add_match_async(bus, &slot, match, on_machine_removed, NULL, &forward);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request machine removal match: %m");
+
+ r = bus_message_new_method_call(bus, &m, bus_machine_mgr, "OpenMachineShell");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ path = argc < 3 || isempty(argv[2]) ? NULL : argv[2];
+
+ r = sd_bus_message_append(m, "sss", machine, uid, path);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, strv_length(argv) <= 3 ? NULL : argv + 2);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, arg_setenv);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get shell PTY: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "hs", &master, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return process_forward(event, &forward, master, 0, machine);
+}
+
+static int remove_image(int argc, char *argv[], void *userdata) {
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ for (int i = 1; i < argc; i++) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = bus_message_new_method_call(bus, &m, bus_machine_mgr, "RemoveImage");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", argv[i]);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* This is a slow operation, hence turn off any method call timeouts */
+ r = sd_bus_call(bus, m, USEC_INFINITY, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Could not remove image: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int rename_image(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = bus_call_method(
+ bus,
+ bus_machine_mgr,
+ "RenameImage",
+ &error,
+ NULL,
+ "ss", argv[1], argv[2]);
+ if (r < 0)
+ return log_error_errno(r, "Could not rename image: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int clone_image(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = bus_message_new_method_call(bus, &m, bus_machine_mgr, "CloneImage");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "ssb", argv[1], argv[2], arg_read_only);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* This is a slow operation, hence turn off any method call timeouts */
+ r = sd_bus_call(bus, m, USEC_INFINITY, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Could not clone image: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int read_only_image(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int b = true, r;
+
+ assert(bus);
+
+ if (argc > 2) {
+ b = parse_boolean(argv[2]);
+ if (b < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse boolean argument: %s",
+ argv[2]);
+ }
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = bus_call_method(bus, bus_machine_mgr, "MarkImageReadOnly", &error, NULL, "sb", argv[1], b);
+ if (r < 0)
+ return log_error_errno(r, "Could not mark image read-only: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int image_exists(sd_bus *bus, const char *name) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+ assert(name);
+
+ r = bus_call_method(bus, bus_machine_mgr, "GetImage", &error, NULL, "s", name);
+ if (r < 0) {
+ if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_IMAGE))
+ return 0;
+
+ return log_error_errno(r, "Failed to check whether image %s exists: %s", name, bus_error_message(&error, r));
+ }
+
+ return 1;
+}
+
+static int make_service_name(const char *name, char **ret) {
+ int r;
+
+ assert(name);
+ assert(ret);
+
+ if (!machine_name_is_valid(name))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid machine name %s.", name);
+
+ r = unit_name_build("systemd-nspawn", name, ".service", ret);
+ if (r < 0)
+ return log_error_errno(r, "Failed to build unit name: %m");
+
+ return 0;
+}
+
+static int start_machine(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+ ask_password_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_oom();
+
+ for (int i = 1; i < argc; i++) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *unit = NULL;
+ const char *object;
+
+ r = make_service_name(argv[i], &unit);
+ if (r < 0)
+ return r;
+
+ r = image_exists(bus, argv[i]);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ENXIO),
+ "Machine image '%s' does not exist.",
+ argv[i]);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartUnit",
+ &error,
+ &reply,
+ "ss", unit, "fail");
+ if (r < 0)
+ return log_error_errno(r, "Failed to start unit: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "o", &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = bus_wait_for_jobs_add(w, object);
+ if (r < 0)
+ return log_oom();
+ }
+
+ r = bus_wait_for_jobs(w, arg_quiet, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int enable_machine(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ const char *method = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ method = streq(argv[0], "enable") ? "EnableUnitFiles" : "DisableUnitFiles";
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ method);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (int i = 1; i < argc; i++) {
+ _cleanup_free_ char *unit = NULL;
+
+ r = make_service_name(argv[i], &unit);
+ if (r < 0)
+ return r;
+
+ r = image_exists(bus, argv[i]);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ENXIO),
+ "Machine image '%s' does not exist.",
+ argv[i]);
+
+ r = sd_bus_message_append(m, "s", unit);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (streq(argv[0], "enable"))
+ r = sd_bus_message_append(m, "bb", false, false);
+ else
+ r = sd_bus_message_append(m, "b", false);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable or disable unit: %s", bus_error_message(&error, r));
+
+ if (streq(argv[0], "enable")) {
+ r = sd_bus_message_read(reply, "b", NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ r = bus_deserialize_and_dump_unit_file_changes(reply, arg_quiet, &changes, &n_changes);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "Reload",
+ &error,
+ NULL,
+ NULL);
+ if (r < 0) {
+ log_error("Failed to reload daemon: %s", bus_error_message(&error, r));
+ goto finish;
+ }
+
+ r = 0;
+
+finish:
+ unit_file_changes_free(changes, n_changes);
+
+ return r;
+}
+
+static int match_log_message(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ const char **our_path = userdata, *line;
+ unsigned priority;
+ int r;
+
+ assert(m);
+ assert(our_path);
+
+ r = sd_bus_message_read(m, "us", &priority, &line);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ if (!streq_ptr(*our_path, sd_bus_message_get_path(m)))
+ return 0;
+
+ if (arg_quiet && LOG_PRI(priority) >= LOG_INFO)
+ return 0;
+
+ log_full(priority, "%s", line);
+ return 0;
+}
+
+static int match_transfer_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ const char **our_path = userdata, *path, *result;
+ uint32_t id;
+ int r;
+
+ assert(m);
+ assert(our_path);
+
+ r = sd_bus_message_read(m, "uos", &id, &path, &result);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ if (!streq_ptr(*our_path, path))
+ return 0;
+
+ sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), !streq_ptr(result, "done"));
+ return 0;
+}
+
+static int transfer_signal_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ assert(s);
+ assert(si);
+
+ if (!arg_quiet)
+ log_info("Continuing download in the background. Use \"machinectl cancel-transfer %" PRIu32 "\" to abort transfer.", PTR_TO_UINT32(userdata));
+
+ sd_event_exit(sd_event_source_get_event(s), EINTR);
+ return 0;
+}
+
+static int transfer_image_common(sd_bus *bus, sd_bus_message *m) {
+ _cleanup_(sd_bus_slot_unrefp) sd_bus_slot *slot_job_removed = NULL, *slot_log_message = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_event_unrefp) sd_event* event = NULL;
+ const char *path = NULL;
+ uint32_t id;
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get event loop: %m");
+
+ r = sd_bus_attach_event(bus, event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ r = bus_match_signal_async(
+ bus,
+ &slot_job_removed,
+ bus_import_mgr,
+ "TransferRemoved",
+ match_transfer_removed, NULL, &path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match: %m");
+
+ r = sd_bus_match_signal_async(
+ bus,
+ &slot_log_message,
+ "org.freedesktop.import1",
+ NULL,
+ "org.freedesktop.import1.Transfer",
+ "LogMessage",
+ match_log_message, NULL, &path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match: %m");
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to transfer image: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "uo", &id, &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+
+ if (!arg_quiet)
+ log_info("Enqueued transfer job %u. Press C-c to continue download in background.", id);
+
+ (void) sd_event_add_signal(event, NULL, SIGINT, transfer_signal_handler, UINT32_TO_PTR(id));
+ (void) sd_event_add_signal(event, NULL, SIGTERM, transfer_signal_handler, UINT32_TO_PTR(id));
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ return -r;
+}
+
+static int import_tar(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *ll = NULL, *fn = NULL;
+ const char *local = NULL, *path = NULL;
+ _cleanup_close_ int fd = -1;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2)
+ path = empty_or_dash_to_null(argv[1]);
+
+ if (argc >= 3)
+ local = empty_or_dash_to_null(argv[2]);
+ else if (path) {
+ r = path_extract_filename(path, &fn);
+ if (r < 0)
+ return log_error_errno(r, "Cannot extract container name from filename: %m");
+
+ local = fn;
+ }
+ if (!local)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Need either path or local name.");
+
+ r = tar_strip_suffixes(local, &ll);
+ if (r < 0)
+ return log_oom();
+
+ local = ll;
+
+ if (!machine_name_is_valid(local))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Local name %s is not a suitable machine name.",
+ local);
+
+ if (path) {
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open %s: %m", path);
+ }
+
+ r = bus_message_new_method_call(bus, &m, bus_import_mgr, "ImportTar");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "hsbb",
+ fd >= 0 ? fd : STDIN_FILENO,
+ local,
+ arg_force,
+ arg_read_only);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return transfer_image_common(bus, m);
+}
+
+static int import_raw(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *ll = NULL, *fn = NULL;
+ const char *local = NULL, *path = NULL;
+ _cleanup_close_ int fd = -1;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2)
+ path = empty_or_dash_to_null(argv[1]);
+
+ if (argc >= 3)
+ local = empty_or_dash_to_null(argv[2]);
+ else if (path) {
+ r = path_extract_filename(path, &fn);
+ if (r < 0)
+ return log_error_errno(r, "Cannot extract container name from filename: %m");
+
+ local = fn;
+ }
+ if (!local)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Need either path or local name.");
+
+ r = raw_strip_suffixes(local, &ll);
+ if (r < 0)
+ return log_oom();
+
+ local = ll;
+
+ if (!machine_name_is_valid(local))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Local name %s is not a suitable machine name.",
+ local);
+
+ if (path) {
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open %s: %m", path);
+ }
+
+ r = bus_message_new_method_call(bus, &m, bus_import_mgr, "ImportRaw");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "hsbb",
+ fd >= 0 ? fd : STDIN_FILENO,
+ local,
+ arg_force,
+ arg_read_only);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return transfer_image_common(bus, m);
+}
+
+static int import_fs(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ const char *local = NULL, *path = NULL;
+ _cleanup_free_ char *fn = NULL;
+ _cleanup_close_ int fd = -1;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2)
+ path = empty_or_dash_to_null(argv[1]);
+
+ if (argc >= 3)
+ local = empty_or_dash_to_null(argv[2]);
+ else if (path) {
+ r = path_extract_filename(path, &fn);
+ if (r < 0)
+ return log_error_errno(r, "Cannot extract container name from filename: %m");
+
+ local = fn;
+ }
+ if (!local)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Need either path or local name.");
+
+ if (!machine_name_is_valid(local))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Local name %s is not a suitable machine name.",
+ local);
+
+ if (path) {
+ fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open directory '%s': %m", path);
+ }
+
+ r = bus_message_new_method_call(bus, &m, bus_import_mgr, "ImportFileSystem");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "hsbb",
+ fd >= 0 ? fd : STDIN_FILENO,
+ local,
+ arg_force,
+ arg_read_only);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return transfer_image_common(bus, m);
+}
+
+static void determine_compression_from_filename(const char *p) {
+ if (arg_format)
+ return;
+
+ if (!p)
+ return;
+
+ if (endswith(p, ".xz"))
+ arg_format = "xz";
+ else if (endswith(p, ".gz"))
+ arg_format = "gzip";
+ else if (endswith(p, ".bz2"))
+ arg_format = "bzip2";
+}
+
+static int export_tar(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_close_ int fd = -1;
+ const char *local = NULL, *path = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ local = argv[1];
+ if (!machine_name_is_valid(local))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Machine name %s is not valid.", local);
+
+ if (argc >= 3)
+ path = argv[2];
+ path = empty_or_dash_to_null(path);
+
+ if (path) {
+ determine_compression_from_filename(path);
+
+ fd = open(path, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC|O_NOCTTY, 0666);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open %s: %m", path);
+ }
+
+ r = bus_message_new_method_call(bus, &m, bus_import_mgr, "ExportTar");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "shs",
+ local,
+ fd >= 0 ? fd : STDOUT_FILENO,
+ arg_format);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return transfer_image_common(bus, m);
+}
+
+static int export_raw(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_close_ int fd = -1;
+ const char *local = NULL, *path = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ local = argv[1];
+ if (!machine_name_is_valid(local))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Machine name %s is not valid.", local);
+
+ if (argc >= 3)
+ path = argv[2];
+ path = empty_or_dash_to_null(path);
+
+ if (path) {
+ determine_compression_from_filename(path);
+
+ fd = open(path, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC|O_NOCTTY, 0666);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open %s: %m", path);
+ }
+
+ r = bus_message_new_method_call(bus, &m, bus_import_mgr, "ExportRaw");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "shs",
+ local,
+ fd >= 0 ? fd : STDOUT_FILENO,
+ arg_format);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return transfer_image_common(bus, m);
+}
+
+static int pull_tar(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *l = NULL, *ll = NULL;
+ const char *local, *remote;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ remote = argv[1];
+ if (!http_url_is_valid(remote))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "URL '%s' is not valid.", remote);
+
+ if (argc >= 3)
+ local = argv[2];
+ else {
+ r = import_url_last_component(remote, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get final component of URL: %m");
+
+ local = l;
+ }
+
+ local = empty_or_dash_to_null(local);
+
+ if (local) {
+ r = tar_strip_suffixes(local, &ll);
+ if (r < 0)
+ return log_oom();
+
+ local = ll;
+
+ if (!machine_name_is_valid(local))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Local name %s is not a suitable machine name.",
+ local);
+ }
+
+ r = bus_message_new_method_call(bus, &m, bus_import_mgr, "PullTar");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "sssb",
+ remote,
+ local,
+ import_verify_to_string(arg_verify),
+ arg_force);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return transfer_image_common(bus, m);
+}
+
+static int pull_raw(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *l = NULL, *ll = NULL;
+ const char *local, *remote;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ remote = argv[1];
+ if (!http_url_is_valid(remote))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "URL '%s' is not valid.", remote);
+
+ if (argc >= 3)
+ local = argv[2];
+ else {
+ r = import_url_last_component(remote, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get final component of URL: %m");
+
+ local = l;
+ }
+
+ local = empty_or_dash_to_null(local);
+
+ if (local) {
+ r = raw_strip_suffixes(local, &ll);
+ if (r < 0)
+ return log_oom();
+
+ local = ll;
+
+ if (!machine_name_is_valid(local))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Local name %s is not a suitable machine name.",
+ local);
+ }
+
+ r = bus_message_new_method_call(bus, &m, bus_import_mgr, "PullRaw");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "sssb",
+ remote,
+ local,
+ import_verify_to_string(arg_verify),
+ arg_force);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return transfer_image_common(bus, m);
+}
+
+typedef struct TransferInfo {
+ uint32_t id;
+ const char *type;
+ const char *remote;
+ const char *local;
+ double progress;
+} TransferInfo;
+
+static int compare_transfer_info(const TransferInfo *a, const TransferInfo *b) {
+ return strcmp(a->local, b->local);
+}
+
+static int list_transfers(int argc, char *argv[], void *userdata) {
+ size_t max_type = STRLEN("TYPE"), max_local = STRLEN("LOCAL"), max_remote = STRLEN("REMOTE");
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ TransferInfo *transfers = NULL;
+ size_t n_transfers = 0, n_allocated = 0;
+ const char *type, *remote, *local;
+ sd_bus *bus = userdata;
+ uint32_t id, max_id = 0;
+ double progress;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = bus_call_method(bus, bus_import_mgr, "ListTransfers", &error, &reply, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Could not get transfers: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, 'a', "(usssdo)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(reply, "(usssdo)", &id, &type, &remote, &local, &progress, NULL)) > 0) {
+ size_t l;
+
+ if (!GREEDY_REALLOC(transfers, n_allocated, n_transfers + 1))
+ return log_oom();
+
+ transfers[n_transfers].id = id;
+ transfers[n_transfers].type = type;
+ transfers[n_transfers].remote = remote;
+ transfers[n_transfers].local = local;
+ transfers[n_transfers].progress = progress;
+
+ l = strlen(type);
+ if (l > max_type)
+ max_type = l;
+
+ l = strlen(remote);
+ if (l > max_remote)
+ max_remote = l;
+
+ l = strlen(local);
+ if (l > max_local)
+ max_local = l;
+
+ if (id > max_id)
+ max_id = id;
+
+ n_transfers++;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ typesafe_qsort(transfers, n_transfers, compare_transfer_info);
+
+ if (arg_legend && n_transfers > 0)
+ printf("%-*s %-*s %-*s %-*s %-*s\n",
+ (int) MAX(2U, DECIMAL_STR_WIDTH(max_id)), "ID",
+ (int) 7, "PERCENT",
+ (int) max_type, "TYPE",
+ (int) max_local, "LOCAL",
+ (int) max_remote, "REMOTE");
+
+ for (size_t j = 0; j < n_transfers; j++)
+
+ if (transfers[j].progress < 0)
+ printf("%*" PRIu32 " %*s %-*s %-*s %-*s\n",
+ (int) MAX(2U, DECIMAL_STR_WIDTH(max_id)), transfers[j].id,
+ (int) 7, "n/a",
+ (int) max_type, transfers[j].type,
+ (int) max_local, transfers[j].local,
+ (int) max_remote, transfers[j].remote);
+ else
+ printf("%*" PRIu32 " %*u%% %-*s %-*s %-*s\n",
+ (int) MAX(2U, DECIMAL_STR_WIDTH(max_id)), transfers[j].id,
+ (int) 6, (unsigned) (transfers[j].progress * 100),
+ (int) max_type, transfers[j].type,
+ (int) max_local, transfers[j].local,
+ (int) max_remote, transfers[j].remote);
+
+ if (arg_legend) {
+ if (n_transfers > 0)
+ printf("\n%zu transfers listed.\n", n_transfers);
+ else
+ printf("No transfers.\n");
+ }
+
+ return 0;
+}
+
+static int cancel_transfer(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ for (int i = 1; i < argc; i++) {
+ uint32_t id;
+
+ r = safe_atou32(argv[i], &id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse transfer id: %s", argv[i]);
+
+ r = bus_call_method(bus, bus_import_mgr, "CancelTransfer", &error, NULL, "u", id);
+ if (r < 0)
+ return log_error_errno(r, "Could not cancel transfer: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int set_limit(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ uint64_t limit;
+ int r;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ if (STR_IN_SET(argv[argc-1], "-", "none", "infinity"))
+ limit = (uint64_t) -1;
+ else {
+ r = parse_size(argv[argc-1], 1024, &limit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse size: %s", argv[argc-1]);
+ }
+
+ if (argc > 2)
+ /* With two arguments changes the quota limit of the
+ * specified image */
+ r = bus_call_method(bus, bus_machine_mgr, "SetImageLimit", &error, NULL, "st", argv[1], limit);
+ else
+ /* With one argument changes the pool quota limit */
+ r = bus_call_method(bus, bus_machine_mgr, "SetPoolLimit", &error, NULL, "t", limit);
+
+ if (r < 0)
+ return log_error_errno(r, "Could not set limit: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int clean_images(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ uint64_t usage, total = 0;
+ char fb[FORMAT_BYTES_MAX];
+ sd_bus *bus = userdata;
+ const char *name;
+ unsigned c = 0;
+ int r;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = bus_message_new_method_call(bus, &m, bus_machine_mgr, "CleanPool");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", arg_all ? "all" : "hidden");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* This is a slow operation, hence permit a longer time for completion. */
+ r = sd_bus_call(bus, m, USEC_INFINITY, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Could not clean pool: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, 'a', "(st)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(reply, "(st)", &name, &usage)) > 0) {
+ if (usage == UINT64_MAX) {
+ log_info("Removed image '%s'", name);
+ total = UINT64_MAX;
+ } else {
+ log_info("Removed image '%s'. Freed exclusive disk space: %s",
+ name, format_bytes(fb, sizeof(fb), usage));
+ if (total != UINT64_MAX)
+ total += usage;
+ }
+ c++;
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (total == UINT64_MAX)
+ log_info("Removed %u images in total.", c);
+ else
+ log_info("Removed %u images in total. Total freed exclusive disk space: %s.",
+ c, format_bytes(fb, sizeof(fb), total));
+
+ return 0;
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = terminal_urlify_man("machinectl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND ...\n\n"
+ "%sSend control commands to or query the virtual machine and container%s\n"
+ "%sregistration manager.%s\n"
+ "\nMachine Commands:\n"
+ " list List running VMs and containers\n"
+ " status NAME... Show VM/container details\n"
+ " show [NAME...] Show properties of one or more VMs/containers\n"
+ " start NAME... Start container as a service\n"
+ " login [NAME] Get a login prompt in a container or on the\n"
+ " local host\n"
+ " shell [[USER@]NAME [COMMAND...]]\n"
+ " Invoke a shell (or other command) in a container\n"
+ " or on the local host\n"
+ " enable NAME... Enable automatic container start at boot\n"
+ " disable NAME... Disable automatic container start at boot\n"
+ " poweroff NAME... Power off one or more containers\n"
+ " reboot NAME... Reboot one or more containers\n"
+ " terminate NAME... Terminate one or more VMs/containers\n"
+ " kill NAME... Send signal to processes of a VM/container\n"
+ " copy-to NAME PATH [PATH] Copy files from the host to a container\n"
+ " copy-from NAME PATH [PATH] Copy files from a container to the host\n"
+ " bind NAME PATH [PATH] Bind mount a path from the host into a container\n\n"
+ "Image Commands:\n"
+ " list-images Show available container and VM images\n"
+ " image-status [NAME...] Show image details\n"
+ " show-image [NAME...] Show properties of image\n"
+ " clone NAME NAME Clone an image\n"
+ " rename NAME NAME Rename an image\n"
+ " read-only NAME [BOOL] Mark or unmark image read-only\n"
+ " remove NAME... Remove an image\n"
+ " set-limit [NAME] BYTES Set image or pool size limit (disk quota)\n"
+ " clean Remove hidden (or all) images\n\n"
+ "Image Transfer Commands:\n"
+ " pull-tar URL [NAME] Download a TAR container image\n"
+ " pull-raw URL [NAME] Download a RAW container or VM image\n"
+ " import-tar FILE [NAME] Import a local TAR container image\n"
+ " import-raw FILE [NAME] Import a local RAW container or VM image\n"
+ " import-fs DIRECTORY [NAME] Import a local directory container image\n"
+ " export-tar NAME [FILE] Export a TAR container image locally\n"
+ " export-raw NAME [FILE] Export a RAW container or VM image locally\n"
+ " list-transfers Show list of downloads in progress\n"
+ " cancel-transfer Cancel a download\n"
+ "\nOptions:\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-legend Do not show the headers and footers\n"
+ " --no-ask-password Do not ask for system passwords\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " -p --property=NAME Show only properties by this name\n"
+ " -q --quiet Suppress output\n"
+ " -a --all Show all properties, including empty ones\n"
+ " --value When showing properties, only print the value\n"
+ " -l --full Do not ellipsize output\n"
+ " --kill-who=WHO Who to send signal to\n"
+ " -s --signal=SIGNAL Which signal to send\n"
+ " --uid=USER Specify user ID to invoke shell as\n"
+ " -E --setenv=VAR=VALUE Add an environment variable for shell\n"
+ " --read-only Create read-only bind mount\n"
+ " --mkdir Create directory before bind mounting, if missing\n"
+ " -n --lines=INTEGER Number of journal entries to show\n"
+ " --max-addresses=INTEGER Number of internet addresses to show at most\n"
+ " -o --output=STRING Change journal output mode (short, short-precise,\n"
+ " short-iso, short-iso-precise, short-full,\n"
+ " short-monotonic, short-unix, verbose, export,\n"
+ " json, json-pretty, json-sse, json-seq, cat,\n"
+ " with-unit)\n"
+ " --verify=MODE Verification mode for downloaded images (no,\n"
+ " checksum, signature)\n"
+ " --force Download image even if already exists\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight()
+ , ansi_normal()
+ , ansi_highlight()
+ , ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_PAGER,
+ ARG_NO_LEGEND,
+ ARG_VALUE,
+ ARG_KILL_WHO,
+ ARG_READ_ONLY,
+ ARG_MKDIR,
+ ARG_NO_ASK_PASSWORD,
+ ARG_VERIFY,
+ ARG_FORCE,
+ ARG_FORMAT,
+ ARG_UID,
+ ARG_MAX_ADDRESSES,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "property", required_argument, NULL, 'p' },
+ { "all", no_argument, NULL, 'a' },
+ { "value", no_argument, NULL, ARG_VALUE },
+ { "full", no_argument, NULL, 'l' },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
+ { "kill-who", required_argument, NULL, ARG_KILL_WHO },
+ { "signal", required_argument, NULL, 's' },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "read-only", no_argument, NULL, ARG_READ_ONLY },
+ { "mkdir", no_argument, NULL, ARG_MKDIR },
+ { "quiet", no_argument, NULL, 'q' },
+ { "lines", required_argument, NULL, 'n' },
+ { "output", required_argument, NULL, 'o' },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ { "verify", required_argument, NULL, ARG_VERIFY },
+ { "force", no_argument, NULL, ARG_FORCE },
+ { "format", required_argument, NULL, ARG_FORMAT },
+ { "uid", required_argument, NULL, ARG_UID },
+ { "setenv", required_argument, NULL, 'E' },
+ { "max-addresses", required_argument, NULL, ARG_MAX_ADDRESSES },
+ {}
+ };
+
+ bool reorder = false;
+ int c, r, shell = -1;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ for (;;) {
+ static const char option_string[] = "-hp:als:H:M:qn:o:E:";
+
+ c = getopt_long(argc, argv, option_string + reorder, options, NULL);
+ if (c < 0)
+ break;
+
+ switch (c) {
+
+ case 1: /* getopt_long() returns 1 if "-" was the first character of the option string, and a
+ * non-option argument was discovered. */
+
+ assert(!reorder);
+
+ /* We generally are fine with the fact that getopt_long() reorders the command line, and looks
+ * for switches after the main verb. However, for "shell" we really don't want that, since we
+ * want that switches specified after the machine name are passed to the program to execute,
+ * and not processed by us. To make this possible, we'll first invoke getopt_long() with
+ * reordering disabled (i.e. with the "-" prefix in the option string), looking for the first
+ * non-option parameter. If it's the verb "shell" we remember its position and continue
+ * processing options. In this case, as soon as we hit the next non-option argument we found
+ * the machine name, and stop further processing. If the first non-option argument is any other
+ * verb than "shell" we switch to normal reordering mode and continue processing arguments
+ * normally. */
+
+ if (shell >= 0) {
+ /* If we already found the "shell" verb on the command line, and now found the next
+ * non-option argument, then this is the machine name and we should stop processing
+ * further arguments. */
+ optind --; /* don't process this argument, go one step back */
+ goto done;
+ }
+ if (streq(optarg, "shell"))
+ /* Remember the position of the "shell" verb, and continue processing normally. */
+ shell = optind - 1;
+ else {
+ int saved_optind;
+
+ /* OK, this is some other verb. In this case, turn on reordering again, and continue
+ * processing normally. */
+ reorder = true;
+
+ /* We changed the option string. getopt_long() only looks at it again if we invoke it
+ * at least once with a reset option index. Hence, let's reset the option index here,
+ * then invoke getopt_long() again (ignoring what it has to say, after all we most
+ * likely already processed it), and the bump the option index so that we read the
+ * intended argument again. */
+ saved_optind = optind;
+ optind = 0;
+ (void) getopt_long(argc, argv, option_string + reorder, options, NULL);
+ optind = saved_optind - 1; /* go one step back, process this argument again */
+ }
+
+ break;
+
+ case 'h':
+ return help(0, NULL, NULL);
+
+ case ARG_VERSION:
+ return version();
+
+ case 'p':
+ r = strv_extend(&arg_property, optarg);
+ if (r < 0)
+ return log_oom();
+
+ /* If the user asked for a particular
+ * property, show it to them, even if it is
+ * empty. */
+ arg_all = true;
+ break;
+
+ case 'a':
+ arg_all = true;
+ break;
+
+ case ARG_VALUE:
+ arg_value = true;
+ break;
+
+ case 'l':
+ arg_full = true;
+ break;
+
+ case 'n':
+ if (safe_atou(optarg, &arg_lines) < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse lines '%s'", optarg);
+ break;
+
+ case 'o':
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(output_mode, OutputMode, _OUTPUT_MODE_MAX);
+ return 0;
+ }
+
+ arg_output = output_mode_from_string(optarg);
+ if (arg_output < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown output '%s'.", optarg);
+
+ if (OUTPUT_MODE_IS_JSON(arg_output))
+ arg_legend = false;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_LEGEND:
+ arg_legend = false;
+ break;
+
+ case ARG_KILL_WHO:
+ arg_kill_who = optarg;
+ break;
+
+ case 's':
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(signal, int, _NSIG);
+ return 0;
+ }
+
+ arg_signal = signal_from_string(optarg);
+ if (arg_signal < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse signal string %s.", optarg);
+ break;
+
+ case ARG_NO_ASK_PASSWORD:
+ arg_ask_password = false;
+ break;
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case ARG_READ_ONLY:
+ arg_read_only = true;
+ break;
+
+ case ARG_MKDIR:
+ arg_mkdir = true;
+ break;
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case ARG_VERIFY:
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(import_verify, ImportVerify, _IMPORT_VERIFY_MAX);
+ return 0;
+ }
+
+ arg_verify = import_verify_from_string(optarg);
+ if (arg_verify < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse --verify= setting: %s", optarg);
+ break;
+
+ case ARG_FORCE:
+ arg_force = true;
+ break;
+
+ case ARG_FORMAT:
+ if (!STR_IN_SET(optarg, "uncompressed", "xz", "gzip", "bzip2"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown format: %s", optarg);
+
+ arg_format = optarg;
+ break;
+
+ case ARG_UID:
+ arg_uid = optarg;
+ break;
+
+ case 'E':
+ if (!env_assignment_is_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Environment assignment invalid: %s", optarg);
+
+ r = strv_extend(&arg_setenv, optarg);
+ if (r < 0)
+ return log_oom();
+ break;
+
+ case ARG_MAX_ADDRESSES:
+ if (streq(optarg, "all"))
+ arg_max_addresses = ALL_ADDRESSES;
+ else if (safe_atoi(optarg, &arg_max_addresses) < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid number of addresses: %s", optarg);
+ else if (arg_max_addresses <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Number of IPs cannot be negative or zero: %s", optarg);
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+ }
+
+done:
+ if (shell >= 0) {
+ char *t;
+
+ /* We found the "shell" verb while processing the argument list. Since we turned off reordering of the
+ * argument list initially let's readjust it now, and move the "shell" verb to the back. */
+
+ optind -= 1; /* place the option index where the "shell" verb will be placed */
+
+ t = argv[shell];
+ for (int i = shell; i < optind; i++)
+ argv[i] = argv[i+1];
+ argv[optind] = t;
+ }
+
+ return 1;
+}
+
+static int machinectl_main(int argc, char *argv[], sd_bus *bus) {
+
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "list", VERB_ANY, 1, VERB_DEFAULT, list_machines },
+ { "list-images", VERB_ANY, 1, 0, list_images },
+ { "status", 2, VERB_ANY, 0, show_machine },
+ { "image-status", VERB_ANY, VERB_ANY, 0, show_image },
+ { "show", VERB_ANY, VERB_ANY, 0, show_machine },
+ { "show-image", VERB_ANY, VERB_ANY, 0, show_image },
+ { "terminate", 2, VERB_ANY, 0, terminate_machine },
+ { "reboot", 2, VERB_ANY, 0, reboot_machine },
+ { "poweroff", 2, VERB_ANY, 0, poweroff_machine },
+ { "stop", 2, VERB_ANY, 0, poweroff_machine }, /* Convenience alias */
+ { "kill", 2, VERB_ANY, 0, kill_machine },
+ { "login", VERB_ANY, 2, 0, login_machine },
+ { "shell", VERB_ANY, VERB_ANY, 0, shell_machine },
+ { "bind", 3, 4, 0, bind_mount },
+ { "copy-to", 3, 4, 0, copy_files },
+ { "copy-from", 3, 4, 0, copy_files },
+ { "remove", 2, VERB_ANY, 0, remove_image },
+ { "rename", 3, 3, 0, rename_image },
+ { "clone", 3, 3, 0, clone_image },
+ { "read-only", 2, 3, 0, read_only_image },
+ { "start", 2, VERB_ANY, 0, start_machine },
+ { "enable", 2, VERB_ANY, 0, enable_machine },
+ { "disable", 2, VERB_ANY, 0, enable_machine },
+ { "import-tar", 2, 3, 0, import_tar },
+ { "import-raw", 2, 3, 0, import_raw },
+ { "import-fs", 2, 3, 0, import_fs },
+ { "export-tar", 2, 3, 0, export_tar },
+ { "export-raw", 2, 3, 0, export_raw },
+ { "pull-tar", 2, 3, 0, pull_tar },
+ { "pull-raw", 2, 3, 0, pull_raw },
+ { "list-transfers", VERB_ANY, 1, 0, list_transfers },
+ { "cancel-transfer", 2, VERB_ANY, 0, cancel_transfer },
+ { "set-limit", 2, 3, 0, set_limit },
+ { "clean", VERB_ANY, 1, 0, clean_images },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, bus);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_setup_cli();
+
+ /* The journal merging logic potentially needs a lot of fds. */
+ (void) rlimit_nofile_bump(HIGH_RLIMIT_NOFILE);
+
+ sigbus_install();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = bus_connect_transport(arg_transport, arg_host, false, &bus);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ (void) sd_bus_set_allow_interactive_authorization(bus, arg_ask_password);
+
+ return machinectl_main(argc, argv, bus);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/machine/machined-core.c b/src/machine/machined-core.c
new file mode 100644
index 0000000..1416fbf
--- /dev/null
+++ b/src/machine/machined-core.c
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "machined.h"
+#include "nscd-flush.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int on_nscd_cache_flush_event(sd_event_source *s, void *userdata) {
+ /* Let's ask glibc's nscd daemon to flush its caches. We request this for the three database machines may show
+ * up in: the hosts database (for resolvable machine names) and the user and group databases (for the user ns
+ * ranges). */
+
+ (void) nscd_flush_cache(STRV_MAKE("passwd", "group", "hosts"));
+ return 0;
+}
+
+int manager_enqueue_nscd_cache_flush(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (!m->nscd_cache_flush_event) {
+ r = sd_event_add_defer(m->event, &m->nscd_cache_flush_event, on_nscd_cache_flush_event, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate NSCD cache flush event: %m");
+
+ sd_event_source_set_description(m->nscd_cache_flush_event, "nscd-cache-flush");
+ }
+
+ r = sd_event_source_set_enabled(m->nscd_cache_flush_event, SD_EVENT_ONESHOT);
+ if (r < 0) {
+ m->nscd_cache_flush_event = sd_event_source_unref(m->nscd_cache_flush_event);
+ return log_error_errno(r, "Failed to enable NSCD cache flush event: %m");
+ }
+
+ return 0;
+}
+
+int manager_find_machine_for_uid(Manager *m, uid_t uid, Machine **ret_machine, uid_t *ret_internal_uid) {
+ Machine *machine;
+ int r;
+
+ assert(m);
+ assert(uid_is_valid(uid));
+
+ /* Finds the machine for the specified host UID and returns it along with the UID translated into the
+ * internal UID inside the machine */
+
+ HASHMAP_FOREACH(machine, m->machines) {
+ uid_t converted;
+
+ r = machine_owns_uid(machine, uid, &converted);
+ if (r < 0)
+ return r;
+ if (r) {
+ if (ret_machine)
+ *ret_machine = machine;
+
+ if (ret_internal_uid)
+ *ret_internal_uid = converted;
+
+ return true;
+ }
+ }
+
+ if (ret_machine)
+ *ret_machine = NULL;
+ if (ret_internal_uid)
+ *ret_internal_uid = UID_INVALID;
+
+ return false;
+}
+
+int manager_find_machine_for_gid(Manager *m, gid_t gid, Machine **ret_machine, gid_t *ret_internal_gid) {
+ Machine *machine;
+ int r;
+
+ assert(m);
+ assert(gid_is_valid(gid));
+
+ HASHMAP_FOREACH(machine, m->machines) {
+ gid_t converted;
+
+ r = machine_owns_gid(machine, gid, &converted);
+ if (r < 0)
+ return r;
+ if (r) {
+ if (ret_machine)
+ *ret_machine = machine;
+
+ if (ret_internal_gid)
+ *ret_internal_gid = converted;
+
+ return true;
+ }
+ }
+
+ if (ret_machine)
+ *ret_machine = NULL;
+ if (ret_internal_gid)
+ *ret_internal_gid = GID_INVALID;
+
+ return false;
+}
diff --git a/src/machine/machined-dbus.c b/src/machine/machined-dbus.c
new file mode 100644
index 0000000..494813e
--- /dev/null
+++ b/src/machine/machined-dbus.c
@@ -0,0 +1,1617 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "bus-common-errors.h"
+#include "bus-get-properties.h"
+#include "bus-locator.h"
+#include "bus-polkit.h"
+#include "cgroup-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "hostname-util.h"
+#include "image-dbus.h"
+#include "io-util.h"
+#include "machine-dbus.h"
+#include "machine-image.h"
+#include "machine-pool.h"
+#include "machined.h"
+#include "missing_capability.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "unit-name.h"
+#include "user-util.h"
+
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_pool_path, "s", "/var/lib/machines");
+
+static int property_get_pool_usage(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_close_ int fd = -1;
+ uint64_t usage = (uint64_t) -1;
+
+ assert(bus);
+ assert(reply);
+
+ fd = open("/var/lib/machines", O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (fd >= 0) {
+ BtrfsQuotaInfo q;
+
+ if (btrfs_subvol_get_subtree_quota_fd(fd, 0, &q) >= 0)
+ usage = q.referenced;
+ }
+
+ return sd_bus_message_append(reply, "t", usage);
+}
+
+static int property_get_pool_limit(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_close_ int fd = -1;
+ uint64_t size = (uint64_t) -1;
+
+ assert(bus);
+ assert(reply);
+
+ fd = open("/var/lib/machines", O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (fd >= 0) {
+ BtrfsQuotaInfo q;
+
+ if (btrfs_subvol_get_subtree_quota_fd(fd, 0, &q) >= 0)
+ size = q.referenced_max;
+ }
+
+ return sd_bus_message_append(reply, "t", size);
+}
+
+static int method_get_machine(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ Manager *m = userdata;
+ Machine *machine;
+ const char *name;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ machine = hashmap_get(m->machines, name);
+ if (!machine)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_MACHINE, "No machine '%s' known", name);
+
+ p = machine_bus_path(machine);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+static int method_get_image(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ _unused_ Manager *m = userdata;
+ const char *name;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = image_find(IMAGE_MACHINE, name, NULL);
+ if (r == -ENOENT)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_IMAGE, "No image '%s' known", name);
+ if (r < 0)
+ return r;
+
+ p = image_bus_path(name);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+static int method_get_machine_by_pid(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ Manager *m = userdata;
+ Machine *machine = NULL;
+ pid_t pid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(pid_t) == sizeof(uint32_t));
+
+ r = sd_bus_message_read(message, "u", &pid);
+ if (r < 0)
+ return r;
+
+ if (pid < 0)
+ return -EINVAL;
+
+ if (pid == 0) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+ }
+
+ r = manager_get_machine_by_pid(m, pid, &machine);
+ if (r < 0)
+ return r;
+ if (!machine)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_MACHINE_FOR_PID, "PID "PID_FMT" does not belong to any known machine", pid);
+
+ p = machine_bus_path(machine);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+static int method_list_machines(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *m = userdata;
+ Machine *machine;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return sd_bus_error_set_errno(error, r);
+
+ r = sd_bus_message_open_container(reply, 'a', "(ssso)");
+ if (r < 0)
+ return sd_bus_error_set_errno(error, r);
+
+ HASHMAP_FOREACH(machine, m->machines) {
+ _cleanup_free_ char *p = NULL;
+
+ p = machine_bus_path(machine);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "(ssso)",
+ machine->name,
+ strempty(machine_class_to_string(machine->class)),
+ machine->service,
+ p);
+ if (r < 0)
+ return sd_bus_error_set_errno(error, r);
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return sd_bus_error_set_errno(error, r);
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_create_or_register_machine(Manager *manager, sd_bus_message *message, bool read_network, Machine **_m, sd_bus_error *error) {
+ const char *name, *service, *class, *root_directory;
+ const int32_t *netif = NULL;
+ MachineClass c;
+ uint32_t leader;
+ sd_id128_t id;
+ const void *v;
+ Machine *m;
+ size_t n, n_netif = 0;
+ int r;
+
+ assert(manager);
+ assert(message);
+ assert(_m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+ if (!machine_name_is_valid(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid machine name");
+
+ r = sd_bus_message_read_array(message, 'y', &v, &n);
+ if (r < 0)
+ return r;
+ if (n == 0)
+ id = SD_ID128_NULL;
+ else if (n == 16)
+ memcpy(&id, v, n);
+ else
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid machine ID parameter");
+
+ r = sd_bus_message_read(message, "ssus", &service, &class, &leader, &root_directory);
+ if (r < 0)
+ return r;
+
+ if (read_network) {
+ size_t i;
+
+ r = sd_bus_message_read_array(message, 'i', (const void**) &netif, &n_netif);
+ if (r < 0)
+ return r;
+
+ n_netif /= sizeof(int32_t);
+
+ for (i = 0; i < n_netif; i++) {
+ if (netif[i] <= 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid network interface index %i", netif[i]);
+ }
+ }
+
+ if (isempty(class))
+ c = _MACHINE_CLASS_INVALID;
+ else {
+ c = machine_class_from_string(class);
+ if (c < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid machine class parameter");
+ }
+
+ if (leader == 1)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid leader PID");
+
+ if (!isempty(root_directory) && !path_is_absolute(root_directory))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Root directory must be empty or an absolute path");
+
+ if (leader == 0) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ assert_cc(sizeof(uint32_t) == sizeof(pid_t));
+
+ r = sd_bus_creds_get_pid(creds, (pid_t*) &leader);
+ if (r < 0)
+ return r;
+ }
+
+ if (hashmap_get(manager->machines, name))
+ return sd_bus_error_setf(error, BUS_ERROR_MACHINE_EXISTS, "Machine '%s' already exists", name);
+
+ r = manager_add_machine(manager, name, &m);
+ if (r < 0)
+ return r;
+
+ m->leader = leader;
+ m->class = c;
+ m->id = id;
+
+ if (!isempty(service)) {
+ m->service = strdup(service);
+ if (!m->service) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (!isempty(root_directory)) {
+ m->root_directory = strdup(root_directory);
+ if (!m->root_directory) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (n_netif > 0) {
+ assert_cc(sizeof(int32_t) == sizeof(int));
+ m->netif = memdup(netif, sizeof(int32_t) * n_netif);
+ if (!m->netif) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ m->n_netif = n_netif;
+ }
+
+ *_m = m;
+
+ return 1;
+
+fail:
+ machine_add_to_gc_queue(m);
+ return r;
+}
+
+static int method_create_machine_internal(sd_bus_message *message, bool read_network, void *userdata, sd_bus_error *error) {
+ Manager *manager = userdata;
+ Machine *m = NULL;
+ int r;
+
+ assert(message);
+ assert(manager);
+
+ r = method_create_or_register_machine(manager, message, read_network, &m, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(message, 'a', "(sv)");
+ if (r < 0)
+ goto fail;
+
+ r = machine_start(m, message, error);
+ if (r < 0)
+ goto fail;
+
+ m->create_message = sd_bus_message_ref(message);
+ return 1;
+
+fail:
+ machine_add_to_gc_queue(m);
+ return r;
+}
+
+static int method_create_machine_with_network(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_create_machine_internal(message, true, userdata, error);
+}
+
+static int method_create_machine(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_create_machine_internal(message, false, userdata, error);
+}
+
+static int method_register_machine_internal(sd_bus_message *message, bool read_network, void *userdata, sd_bus_error *error) {
+ Manager *manager = userdata;
+ _cleanup_free_ char *p = NULL;
+ Machine *m = NULL;
+ int r;
+
+ assert(message);
+ assert(manager);
+
+ r = method_create_or_register_machine(manager, message, read_network, &m, error);
+ if (r < 0)
+ return r;
+
+ r = cg_pid_get_unit(m->leader, &m->unit);
+ if (r < 0) {
+ r = sd_bus_error_set_errnof(error, r,
+ "Failed to determine unit of process "PID_FMT" : %m",
+ m->leader);
+ goto fail;
+ }
+
+ r = machine_start(m, NULL, error);
+ if (r < 0)
+ goto fail;
+
+ p = machine_bus_path(m);
+ if (!p) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ return sd_bus_reply_method_return(message, "o", p);
+
+fail:
+ machine_add_to_gc_queue(m);
+ return r;
+}
+
+static int method_register_machine_with_network(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_register_machine_internal(message, true, userdata, error);
+}
+
+static int method_register_machine(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return method_register_machine_internal(message, false, userdata, error);
+}
+
+static int redirect_method_to_machine(sd_bus_message *message, Manager *m, sd_bus_error *error, sd_bus_message_handler_t method) {
+ Machine *machine;
+ const char *name;
+ int r;
+
+ assert(message);
+ assert(m);
+ assert(method);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return sd_bus_error_set_errno(error, r);
+
+ machine = hashmap_get(m->machines, name);
+ if (!machine)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_MACHINE, "No machine '%s' known", name);
+
+ return method(message, machine, error);
+}
+
+static int method_unregister_machine(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_unregister);
+}
+
+static int method_terminate_machine(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_terminate);
+}
+
+static int method_kill_machine(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_kill);
+}
+
+static int method_get_machine_addresses(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_get_addresses);
+}
+
+static int method_get_machine_os_release(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_get_os_release);
+}
+
+static int method_list_images(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_hashmap_free_ Hashmap *images = NULL;
+ _unused_ Manager *m = userdata;
+ Image *image;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ images = hashmap_new(&image_hash_ops);
+ if (!images)
+ return -ENOMEM;
+
+ r = image_discover(IMAGE_MACHINE, images);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(ssbttto)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(image, images) {
+ _cleanup_free_ char *p = NULL;
+
+ p = image_bus_path(image->name);
+ if (!p)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "(ssbttto)",
+ image->name,
+ image_type_to_string(image->type),
+ image->read_only,
+ image->crtime,
+ image->mtime,
+ image->usage,
+ p);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_open_machine_pty(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_open_pty);
+}
+
+static int method_open_machine_login(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_open_login);
+}
+
+static int method_open_machine_shell(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_open_shell);
+}
+
+static int method_bind_mount_machine(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_bind_mount);
+}
+
+static int method_copy_machine(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_copy);
+}
+
+static int method_open_machine_root_directory(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_open_root_directory);
+}
+
+static int method_get_machine_uid_shift(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_machine(message, userdata, error, bus_machine_method_get_uid_shift);
+}
+
+static int redirect_method_to_image(sd_bus_message *message, Manager *m, sd_bus_error *error, sd_bus_message_handler_t method) {
+ _cleanup_(image_unrefp) Image* i = NULL;
+ const char *name;
+ int r;
+
+ assert(message);
+ assert(m);
+ assert(method);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ if (!image_name_is_valid(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Image name '%s' is invalid.", name);
+
+ r = image_find(IMAGE_MACHINE, name, &i);
+ if (r == -ENOENT)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_IMAGE, "No image '%s' known", name);
+ if (r < 0)
+ return r;
+
+ i->userdata = m;
+ return method(message, i, error);
+}
+
+static int method_remove_image(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(message, userdata, error, bus_image_method_remove);
+}
+
+static int method_rename_image(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(message, userdata, error, bus_image_method_rename);
+}
+
+static int method_clone_image(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(message, userdata, error, bus_image_method_clone);
+}
+
+static int method_mark_image_read_only(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(message, userdata, error, bus_image_method_mark_read_only);
+}
+
+static int method_get_image_hostname(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(message, userdata, error, bus_image_method_get_hostname);
+}
+
+static int method_get_image_machine_id(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(message, userdata, error, bus_image_method_get_machine_id);
+}
+
+static int method_get_image_machine_info(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(message, userdata, error, bus_image_method_get_machine_info);
+}
+
+static int method_get_image_os_release(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(message, userdata, error, bus_image_method_get_os_release);
+}
+
+static int clean_pool_done(Operation *operation, int ret, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ bool success;
+ size_t n;
+ int r;
+
+ assert(operation);
+ assert(operation->extra_fd >= 0);
+
+ if (lseek(operation->extra_fd, 0, SEEK_SET) == (off_t) -1)
+ return -errno;
+
+ f = take_fdopen(&operation->extra_fd, "r");
+ if (!f)
+ return -errno;
+
+ /* The resulting temporary file starts with a boolean value that indicates success or not. */
+ errno = 0;
+ n = fread(&success, 1, sizeof(success), f);
+ if (n != sizeof(success))
+ return ret < 0 ? ret : errno_or_else(EIO);
+
+ if (ret < 0) {
+ _cleanup_free_ char *name = NULL;
+
+ /* The clean-up operation failed. In this case the resulting temporary file should contain a boolean
+ * set to false followed by the name of the failed image. Let's try to read this and use it for the
+ * error message. If we can't read it, don't mind, and return the naked error. */
+
+ if (success) /* The resulting temporary file could not be updated, ignore it. */
+ return ret;
+
+ r = read_nul_string(f, LONG_LINE_MAX, &name);
+ if (r <= 0) /* Same here... */
+ return ret;
+
+ return sd_bus_error_set_errnof(error, ret, "Failed to remove image %s: %m", name);
+ }
+
+ assert(success);
+
+ r = sd_bus_message_new_method_return(operation->message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ /* On success the resulting temporary file will contain a list of image names that were removed followed by
+ * their size on disk. Let's read that and turn it into a bus message. */
+ for (;;) {
+ _cleanup_free_ char *name = NULL;
+ uint64_t size;
+
+ r = read_nul_string(f, LONG_LINE_MAX, &name);
+ if (r < 0)
+ return r;
+ if (r == 0) /* reached the end */
+ break;
+
+ errno = 0;
+ n = fread(&size, 1, sizeof(size), f);
+ if (n != sizeof(size))
+ return errno_or_else(EIO);
+
+ r = sd_bus_message_append(reply, "(st)", name, size);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_clean_pool(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ enum {
+ REMOVE_ALL,
+ REMOVE_HIDDEN,
+ } mode;
+
+ _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 };
+ _cleanup_close_ int result_fd = -1;
+ Manager *m = userdata;
+ Operation *operation;
+ const char *mm;
+ pid_t child;
+ int r;
+
+ assert(message);
+
+ if (m->n_operations >= OPERATIONS_MAX)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_LIMITS_EXCEEDED, "Too many ongoing operations.");
+
+ r = sd_bus_message_read(message, "s", &mm);
+ if (r < 0)
+ return r;
+
+ if (streq(mm, "all"))
+ mode = REMOVE_ALL;
+ else if (streq(mm, "hidden"))
+ mode = REMOVE_HIDDEN;
+ else
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown mode '%s'.", mm);
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-machines",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0)
+ return sd_bus_error_set_errnof(error, errno, "Failed to create pipe: %m");
+
+ /* Create a temporary file we can dump information about deleted images into. We use a temporary file for this
+ * instead of a pipe or so, since this might grow quit large in theory and we don't want to process this
+ * continuously */
+ result_fd = open_tmpfile_unlinkable(NULL, O_RDWR|O_CLOEXEC);
+ if (result_fd < 0)
+ return -errno;
+
+ /* This might be a slow operation, run it asynchronously in a background process */
+ r = safe_fork("(sd-clean)", FORK_RESET_SIGNALS, &child);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to fork(): %m");
+ if (r == 0) {
+ _cleanup_hashmap_free_ Hashmap *images = NULL;
+ bool success = true;
+ Image *image;
+ ssize_t l;
+
+ errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]);
+
+ images = hashmap_new(&image_hash_ops);
+ if (!images) {
+ r = -ENOMEM;
+ goto child_fail;
+ }
+
+ r = image_discover(IMAGE_MACHINE, images);
+ if (r < 0)
+ goto child_fail;
+
+ l = write(result_fd, &success, sizeof(success));
+ if (l < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ HASHMAP_FOREACH(image, images) {
+
+ /* We can't remove vendor images (i.e. those in /usr) */
+ if (IMAGE_IS_VENDOR(image))
+ continue;
+
+ if (IMAGE_IS_HOST(image))
+ continue;
+
+ if (mode == REMOVE_HIDDEN && !IMAGE_IS_HIDDEN(image))
+ continue;
+
+ r = image_remove(image);
+ if (r == -EBUSY) /* keep images that are currently being used. */
+ continue;
+ if (r < 0) {
+ /* If the operation failed, let's override everything we wrote, and instead write there at which image we failed. */
+ success = false;
+ (void) ftruncate(result_fd, 0);
+ (void) lseek(result_fd, 0, SEEK_SET);
+ (void) write(result_fd, &success, sizeof(success));
+ (void) write(result_fd, image->name, strlen(image->name)+1);
+ goto child_fail;
+ }
+
+ l = write(result_fd, image->name, strlen(image->name)+1);
+ if (l < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ l = write(result_fd, &image->usage_exclusive, sizeof(image->usage_exclusive));
+ if (l < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+ }
+
+ result_fd = safe_close(result_fd);
+ _exit(EXIT_SUCCESS);
+
+ child_fail:
+ (void) write(errno_pipe_fd[1], &r, sizeof(r));
+ _exit(EXIT_FAILURE);
+ }
+
+ errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
+
+ /* The clean-up might take a while, hence install a watch on the child and return */
+
+ r = operation_new(m, NULL, child, message, errno_pipe_fd[0], &operation);
+ if (r < 0) {
+ (void) sigkill_wait(child);
+ return r;
+ }
+
+ operation->extra_fd = result_fd;
+ operation->done = clean_pool_done;
+
+ result_fd = -1;
+ errno_pipe_fd[0] = -1;
+
+ return 1;
+}
+
+static int method_set_pool_limit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ uint64_t limit;
+ int r;
+
+ assert(message);
+
+ r = sd_bus_message_read(message, "t", &limit);
+ if (r < 0)
+ return r;
+ if (!FILE_SIZE_VALID_OR_INFINITY(limit))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "New limit out of range");
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.machine1.manage-machines",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ /* Set up the machine directory if necessary */
+ r = setup_machine_directory(error);
+ if (r < 0)
+ return r;
+
+ (void) btrfs_qgroup_set_limit("/var/lib/machines", 0, limit);
+
+ r = btrfs_subvol_set_subtree_quota_limit("/var/lib/machines", 0, limit);
+ if (r == -ENOTTY)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Quota is only supported on btrfs.");
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to adjust quota limit: %m");
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int method_set_image_limit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(message, userdata, error, bus_image_method_set_limit);
+}
+
+static int method_map_from_machine_user(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Machine *machine;
+ uint32_t uid;
+ uid_t converted;
+ int r;
+
+ r = sd_bus_message_read(message, "su", &name, &uid);
+ if (r < 0)
+ return r;
+
+ if (!uid_is_valid(uid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid user ID " UID_FMT, uid);
+
+ machine = hashmap_get(m->machines, name);
+ if (!machine)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_MACHINE, "No machine '%s' known", name);
+
+ if (machine->class != MACHINE_CONTAINER)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Not supported for non-container machines.");
+
+ r = machine_translate_uid(machine, uid, &converted);
+ if (r == -ESRCH)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_USER_MAPPING, "Machine '%s' has no matching user mappings.", name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "u", (uint32_t) converted);
+}
+
+static int method_map_to_machine_user(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *o = NULL;
+ Manager *m = userdata;
+ Machine *machine;
+ uid_t uid, converted;
+ int r;
+
+ r = sd_bus_message_read(message, "u", &uid);
+ if (r < 0)
+ return r;
+ if (!uid_is_valid(uid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid user ID " UID_FMT, uid);
+ if (uid < 0x10000)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_USER_MAPPING, "User " UID_FMT " belongs to host UID range", uid);
+
+ r = manager_find_machine_for_uid(m, uid, &machine, &converted);
+ if (r < 0)
+ return r;
+ if (!r)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_USER_MAPPING, "No matching user mapping for " UID_FMT ".", uid);
+
+ o = machine_bus_path(machine);
+ if (!o)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "sou", machine->name, o, (uint32_t) converted);
+}
+
+static int method_map_from_machine_group(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Machine *machine;
+ gid_t converted;
+ uint32_t gid;
+ int r;
+
+ r = sd_bus_message_read(message, "su", &name, &gid);
+ if (r < 0)
+ return r;
+
+ if (!gid_is_valid(gid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid group ID " GID_FMT, gid);
+
+ machine = hashmap_get(m->machines, name);
+ if (!machine)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_MACHINE, "No machine '%s' known", name);
+
+ if (machine->class != MACHINE_CONTAINER)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Not supported for non-container machines.");
+
+ r = machine_translate_gid(machine, gid, &converted);
+ if (r == -ESRCH)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_USER_MAPPING, "Machine '%s' has no matching group mappings.", name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "u", (uint32_t) converted);
+}
+
+static int method_map_to_machine_group(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *o = NULL;
+ Manager *m = userdata;
+ Machine *machine;
+ gid_t gid, converted;
+ int r;
+
+ r = sd_bus_message_read(message, "u", &gid);
+ if (r < 0)
+ return r;
+ if (!gid_is_valid(gid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid group ID " GID_FMT, gid);
+ if (gid < 0x10000)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_GROUP_MAPPING, "Group " GID_FMT " belongs to host GID range", gid);
+
+ r = manager_find_machine_for_gid(m, gid, &machine, &converted);
+ if (r < 0)
+ return r;
+ if (!r)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_GROUP_MAPPING, "No matching group mapping for " GID_FMT ".", gid);
+
+ o = machine_bus_path(machine);
+ if (!o)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "sou", machine->name, o, (uint32_t) converted);
+}
+
+const sd_bus_vtable manager_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("PoolPath", "s", property_get_pool_path, 0, 0),
+ SD_BUS_PROPERTY("PoolUsage", "t", property_get_pool_usage, 0, 0),
+ SD_BUS_PROPERTY("PoolLimit", "t", property_get_pool_limit, 0, 0),
+
+ SD_BUS_METHOD_WITH_NAMES("GetMachine",
+ "s",
+ SD_BUS_PARAM(name),
+ "o",
+ SD_BUS_PARAM(machine),
+ method_get_machine,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetImage",
+ "s",
+ SD_BUS_PARAM(name),
+ "o",
+ SD_BUS_PARAM(image),
+ method_get_image,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetMachineByPID",
+ "u",
+ SD_BUS_PARAM(pid),
+ "o",
+ SD_BUS_PARAM(machine),
+ method_get_machine_by_pid,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ListMachines",
+ NULL,,
+ "a(ssso)",
+ SD_BUS_PARAM(machines),
+ method_list_machines,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ListImages",
+ NULL,,
+ "a(ssbttto)",
+ SD_BUS_PARAM(images),
+ method_list_images,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CreateMachine",
+ "sayssusa(sv)",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(id)
+ SD_BUS_PARAM(service)
+ SD_BUS_PARAM(class)
+ SD_BUS_PARAM(leader)
+ SD_BUS_PARAM(root_directory)
+ SD_BUS_PARAM(scope_properties),
+ "o",
+ SD_BUS_PARAM(path),
+ method_create_machine, 0),
+ SD_BUS_METHOD_WITH_NAMES("CreateMachineWithNetwork",
+ "sayssusaia(sv)",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(id)
+ SD_BUS_PARAM(service)
+ SD_BUS_PARAM(class)
+ SD_BUS_PARAM(leader)
+ SD_BUS_PARAM(root_directory)
+ SD_BUS_PARAM(ifindices)
+ SD_BUS_PARAM(scope_properties),
+ "o",
+ SD_BUS_PARAM(path),
+ method_create_machine_with_network, 0),
+ SD_BUS_METHOD_WITH_NAMES("RegisterMachine",
+ "sayssus",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(id)
+ SD_BUS_PARAM(service)
+ SD_BUS_PARAM(class)
+ SD_BUS_PARAM(leader)
+ SD_BUS_PARAM(root_directory),
+ "o",
+ SD_BUS_PARAM(path),
+ method_register_machine, 0),
+ SD_BUS_METHOD_WITH_NAMES("RegisterMachineWithNetwork",
+ "sayssusai",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(id)
+ SD_BUS_PARAM(service)
+ SD_BUS_PARAM(class)
+ SD_BUS_PARAM(leader)
+ SD_BUS_PARAM(root_directory)
+ SD_BUS_PARAM(ifindices),
+ "o",
+ SD_BUS_PARAM(path),
+ method_register_machine_with_network, 0),
+ SD_BUS_METHOD_WITH_NAMES("UnregisterMachine",
+ "s",
+ SD_BUS_PARAM(name),
+ NULL,,
+ method_unregister_machine,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("TerminateMachine",
+ "s",
+ SD_BUS_PARAM(id),
+ NULL,,
+ method_terminate_machine,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("KillMachine",
+ "ssi",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(who)
+ SD_BUS_PARAM(signal),
+ NULL,,
+ method_kill_machine,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetMachineAddresses",
+ "s",
+ SD_BUS_PARAM(name),
+ "a(iay)",
+ SD_BUS_PARAM(addresses),
+ method_get_machine_addresses,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetMachineOSRelease",
+ "s",
+ SD_BUS_PARAM(name),
+ "a{ss}",
+ SD_BUS_PARAM(fields),
+ method_get_machine_os_release,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("OpenMachinePTY",
+ "s",
+ SD_BUS_PARAM(name),
+ "hs",
+ SD_BUS_PARAM(pty)
+ SD_BUS_PARAM(pty_path),
+ method_open_machine_pty,
+ 0),
+ SD_BUS_METHOD_WITH_NAMES("OpenMachineLogin",
+ "s",
+ SD_BUS_PARAM(name),
+ "hs",
+ SD_BUS_PARAM(pty)
+ SD_BUS_PARAM(pty_path),
+ method_open_machine_login,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("OpenMachineShell",
+ "sssasas",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(user)
+ SD_BUS_PARAM(path)
+ SD_BUS_PARAM(args)
+ SD_BUS_PARAM(environment),
+ "hs",
+ SD_BUS_PARAM(pty)
+ SD_BUS_PARAM(pty_path),
+ method_open_machine_shell,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("BindMountMachine",
+ "sssbb",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(source)
+ SD_BUS_PARAM(destination)
+ SD_BUS_PARAM(read_only)
+ SD_BUS_PARAM(mkdir),
+ NULL,,
+ method_bind_mount_machine,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CopyFromMachine",
+ "sss",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(source)
+ SD_BUS_PARAM(destination),
+ NULL,,
+ method_copy_machine,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CopyToMachine",
+ "sss",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(source)
+ SD_BUS_PARAM(destination),
+ NULL,,
+ method_copy_machine,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("OpenMachineRootDirectory",
+ "s",
+ SD_BUS_PARAM(name),
+ "h",
+ SD_BUS_PARAM(fd),
+ method_open_machine_root_directory,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetMachineUIDShift",
+ "s",
+ SD_BUS_PARAM(name),
+ "u",
+ SD_BUS_PARAM(shift),
+ method_get_machine_uid_shift,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("RemoveImage",
+ "s",
+ SD_BUS_PARAM(name),
+ NULL,,
+ method_remove_image,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("RenameImage",
+ "ss",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(new_name),
+ NULL,,
+ method_rename_image,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CloneImage",
+ "ssb",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(new_name)
+ SD_BUS_PARAM(read_only),
+ NULL,,
+ method_clone_image,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("MarkImageReadOnly",
+ "sb",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(read_only),
+ NULL,,
+ method_mark_image_read_only,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetImageHostname",
+ "s",
+ SD_BUS_PARAM(name),
+ "s",
+ SD_BUS_PARAM(hostname),
+ method_get_image_hostname,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetImageMachineID",
+ "s",
+ SD_BUS_PARAM(name),
+ "ay",
+ SD_BUS_PARAM(id),
+ method_get_image_machine_id,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetImageMachineInfo",
+ "s",
+ SD_BUS_PARAM(name),
+ "a{ss}",
+ SD_BUS_PARAM(machine_info),
+ method_get_image_machine_info,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("GetImageOSRelease",
+ "s",
+ SD_BUS_PARAM(name),
+ "a{ss}",
+ SD_BUS_PARAM(os_release),
+ method_get_image_os_release,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetPoolLimit",
+ "t",
+ SD_BUS_PARAM(size),
+ NULL,,
+ method_set_pool_limit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetImageLimit",
+ "st",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(size),
+ NULL,,
+ method_set_image_limit,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("CleanPool",
+ "s",
+ SD_BUS_PARAM(mode),
+ "a(st)",
+ SD_BUS_PARAM(images),
+ method_clean_pool,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("MapFromMachineUser",
+ "su",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(uid_inner),
+ "u",
+ SD_BUS_PARAM(uid_outer),
+ method_map_from_machine_user,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("MapToMachineUser",
+ "u",
+ SD_BUS_PARAM(uid_outer),
+ "sou",
+ SD_BUS_PARAM(machine_name)
+ SD_BUS_PARAM(machine_path)
+ SD_BUS_PARAM(uid_inner),
+ method_map_to_machine_user,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("MapFromMachineGroup",
+ "su",
+ SD_BUS_PARAM(name)
+ SD_BUS_PARAM(gid_inner),
+ "u",
+ SD_BUS_PARAM(gid_outer),
+ method_map_from_machine_group,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("MapToMachineGroup",
+ "u",
+ SD_BUS_PARAM(gid_outer),
+ "sou",
+ SD_BUS_PARAM(machine_name)
+ SD_BUS_PARAM(machine_path)
+ SD_BUS_PARAM(gid_inner),
+ method_map_to_machine_group,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_SIGNAL_WITH_NAMES("MachineNew",
+ "so",
+ SD_BUS_PARAM(machine)
+ SD_BUS_PARAM(path),
+ 0),
+ SD_BUS_SIGNAL_WITH_NAMES("MachineRemoved",
+ "so",
+ SD_BUS_PARAM(machine)
+ SD_BUS_PARAM(path),
+ 0),
+
+ SD_BUS_VTABLE_END
+};
+
+const BusObjectImplementation manager_object = {
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ .vtables = BUS_VTABLES(manager_vtable),
+ .children = BUS_IMPLEMENTATIONS( &machine_object,
+ &image_object ),
+};
+
+int match_job_removed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *path, *result, *unit;
+ Manager *m = userdata;
+ Machine *machine;
+ uint32_t id;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "uoss", &id, &path, &unit, &result);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ machine = hashmap_get(m->machine_units, unit);
+ if (!machine)
+ return 0;
+
+ if (streq_ptr(path, machine->scope_job)) {
+ machine->scope_job = mfree(machine->scope_job);
+
+ if (machine->started) {
+ if (streq(result, "done"))
+ machine_send_create_reply(machine, NULL);
+ else {
+ _cleanup_(sd_bus_error_free) sd_bus_error e = SD_BUS_ERROR_NULL;
+
+ sd_bus_error_setf(&e, BUS_ERROR_JOB_FAILED, "Start job for unit %s failed with '%s'", unit, result);
+
+ machine_send_create_reply(machine, &e);
+ }
+ }
+
+ machine_save(machine);
+ }
+
+ machine_add_to_gc_queue(machine);
+ return 0;
+}
+
+int match_properties_changed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *unit = NULL;
+ const char *path;
+ Manager *m = userdata;
+ Machine *machine;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ path = sd_bus_message_get_path(message);
+ if (!path)
+ return 0;
+
+ r = unit_name_from_dbus_path(path, &unit);
+ if (r == -EINVAL) /* not for a unit */
+ return 0;
+ if (r < 0) {
+ log_oom();
+ return 0;
+ }
+
+ machine = hashmap_get(m->machine_units, unit);
+ if (!machine)
+ return 0;
+
+ machine_add_to_gc_queue(machine);
+ return 0;
+}
+
+int match_unit_removed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *path, *unit;
+ Manager *m = userdata;
+ Machine *machine;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "so", &unit, &path);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ machine = hashmap_get(m->machine_units, unit);
+ if (!machine)
+ return 0;
+
+ machine_add_to_gc_queue(machine);
+ return 0;
+}
+
+int match_reloading(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ Machine *machine;
+ int b, r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+ if (b)
+ return 0;
+
+ /* systemd finished reloading, let's recheck all our machines */
+ log_debug("System manager has been reloaded, rechecking machines...");
+
+ HASHMAP_FOREACH(machine, m->machines)
+ machine_add_to_gc_queue(machine);
+
+ return 0;
+}
+
+int manager_unref_unit(
+ Manager *m,
+ const char *unit,
+ sd_bus_error *error) {
+
+ assert(m);
+ assert(unit);
+
+ return bus_call_method(m->bus, bus_systemd_mgr, "UnrefUnit", error, NULL, "s", unit);
+}
+
+int manager_stop_unit(Manager *manager, const char *unit, sd_bus_error *error, char **job) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ assert(manager);
+ assert(unit);
+
+ r = bus_call_method(manager->bus, bus_systemd_mgr, "StopUnit", error, &reply, "ss", unit, "fail");
+ if (r < 0) {
+ if (sd_bus_error_has_names(error, BUS_ERROR_NO_SUCH_UNIT,
+ BUS_ERROR_LOAD_FAILED)) {
+
+ if (job)
+ *job = NULL;
+
+ sd_bus_error_free(error);
+ return 0;
+ }
+
+ return r;
+ }
+
+ if (job) {
+ const char *j;
+ char *copy;
+
+ r = sd_bus_message_read(reply, "o", &j);
+ if (r < 0)
+ return r;
+
+ copy = strdup(j);
+ if (!copy)
+ return -ENOMEM;
+
+ *job = copy;
+ }
+
+ return 1;
+}
+
+int manager_kill_unit(Manager *manager, const char *unit, int signo, sd_bus_error *error) {
+ assert(manager);
+ assert(unit);
+
+ return bus_call_method(manager->bus, bus_systemd_mgr, "KillUnit", error, NULL, "ssi", unit, "all", signo);
+}
+
+int manager_unit_is_active(Manager *manager, const char *unit) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *path = NULL;
+ const char *state;
+ int r;
+
+ assert(manager);
+ assert(unit);
+
+ path = unit_dbus_path_from_name(unit);
+ if (!path)
+ return -ENOMEM;
+
+ r = sd_bus_get_property(
+ manager->bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "ActiveState",
+ &error,
+ &reply,
+ "s");
+ if (r < 0) {
+ if (sd_bus_error_has_names(&error, SD_BUS_ERROR_NO_REPLY,
+ SD_BUS_ERROR_DISCONNECTED))
+ return true;
+
+ if (sd_bus_error_has_names(&error, BUS_ERROR_NO_SUCH_UNIT,
+ BUS_ERROR_LOAD_FAILED))
+ return false;
+
+ return r;
+ }
+
+ r = sd_bus_message_read(reply, "s", &state);
+ if (r < 0)
+ return -EINVAL;
+
+ return !STR_IN_SET(state, "inactive", "failed");
+}
+
+int manager_job_is_active(Manager *manager, const char *path) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ assert(manager);
+ assert(path);
+
+ r = sd_bus_get_property(
+ manager->bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Job",
+ "State",
+ &error,
+ &reply,
+ "s");
+ if (r < 0) {
+ if (sd_bus_error_has_names(&error, SD_BUS_ERROR_NO_REPLY,
+ SD_BUS_ERROR_DISCONNECTED))
+ return true;
+
+ if (sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_OBJECT))
+ return false;
+
+ return r;
+ }
+
+ /* We don't actually care about the state really. The fact
+ * that we could read the job state is enough for us */
+
+ return true;
+}
+
+int manager_get_machine_by_pid(Manager *m, pid_t pid, Machine **machine) {
+ Machine *mm;
+ int r;
+
+ assert(m);
+ assert(pid >= 1);
+ assert(machine);
+
+ mm = hashmap_get(m->machine_leaders, PID_TO_PTR(pid));
+ if (!mm) {
+ _cleanup_free_ char *unit = NULL;
+
+ r = cg_pid_get_unit(pid, &unit);
+ if (r >= 0)
+ mm = hashmap_get(m->machine_units, unit);
+ }
+ if (!mm)
+ return 0;
+
+ *machine = mm;
+ return 1;
+}
+
+int manager_add_machine(Manager *m, const char *name, Machine **_machine) {
+ Machine *machine;
+
+ assert(m);
+ assert(name);
+
+ machine = hashmap_get(m->machines, name);
+ if (!machine) {
+ machine = machine_new(m, _MACHINE_CLASS_INVALID, name);
+ if (!machine)
+ return -ENOMEM;
+ }
+
+ if (_machine)
+ *_machine = machine;
+
+ return 0;
+}
diff --git a/src/machine/machined-varlink.c b/src/machine/machined-varlink.c
new file mode 100644
index 0000000..2d6c199
--- /dev/null
+++ b/src/machine/machined-varlink.c
@@ -0,0 +1,423 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "format-util.h"
+#include "machined-varlink.h"
+#include "mkdir.h"
+#include "user-util.h"
+#include "varlink.h"
+
+typedef struct LookupParameters {
+ const char *user_name;
+ const char *group_name;
+ union {
+ uid_t uid;
+ gid_t gid;
+ };
+ const char *service;
+} LookupParameters;
+
+static int build_user_json(const char *user_name, uid_t uid, const char *real_name, JsonVariant **ret) {
+ assert(user_name);
+ assert(uid_is_valid(uid));
+ assert(ret);
+
+ return json_build(ret, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("record", JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(user_name)),
+ JSON_BUILD_PAIR("uid", JSON_BUILD_UNSIGNED(uid)),
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(GID_NOBODY)),
+ JSON_BUILD_PAIR_CONDITION(!isempty(real_name), "realName", JSON_BUILD_STRING(real_name)),
+ JSON_BUILD_PAIR("homeDirectory", JSON_BUILD_STRING("/")),
+ JSON_BUILD_PAIR("shell", JSON_BUILD_STRING(NOLOGIN)),
+ JSON_BUILD_PAIR("locked", JSON_BUILD_BOOLEAN(true)),
+ JSON_BUILD_PAIR("service", JSON_BUILD_STRING("io.systemd.Machine")),
+ JSON_BUILD_PAIR("disposition", JSON_BUILD_STRING("container"))))));
+}
+
+static bool user_match_lookup_parameters(LookupParameters *p, const char *name, uid_t uid) {
+ assert(p);
+
+ if (p->user_name && !streq(name, p->user_name))
+ return false;
+
+ if (uid_is_valid(p->uid) && uid != p->uid)
+ return false;
+
+ return true;
+}
+
+static int user_lookup_uid(Manager *m, uid_t uid, char **ret_name, char **ret_real_name) {
+ _cleanup_free_ char *n = NULL, *rn = NULL;
+ uid_t converted_uid;
+ Machine *machine;
+ int r;
+
+ assert(m);
+ assert(uid_is_valid(uid));
+ assert(ret_name);
+ assert(ret_real_name);
+
+ if (uid < 0x10000) /* Host UID range */
+ return -ESRCH;
+
+ r = manager_find_machine_for_uid(m, uid, &machine, &converted_uid);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ESRCH;
+
+ if (asprintf(&n, "vu-%s-" UID_FMT, machine->name, converted_uid) < 0)
+ return -ENOMEM;
+
+ /* Don't synthesize invalid user/group names (too long...) */
+ if (!valid_user_group_name(n, 0))
+ return -ESRCH;
+
+ if (asprintf(&rn, "UID " UID_FMT " of Container %s", converted_uid, machine->name) < 0)
+ return -ENOMEM;
+
+ /* Don't synthesize invalid real names either, but since this field doesn't matter much, simply invalidate things */
+ if (!valid_gecos(rn))
+ rn = mfree(rn);
+
+ *ret_name = TAKE_PTR(n);
+ *ret_real_name = TAKE_PTR(rn);
+ return 0;
+}
+
+static int user_lookup_name(Manager *m, const char *name, uid_t *ret_uid, char **ret_real_name) {
+ _cleanup_free_ char *mn = NULL, *rn = NULL;
+ uid_t uid, converted_uid;
+ Machine *machine;
+ const char *e, *d;
+ int r;
+
+ assert(m);
+ assert(ret_uid);
+ assert(ret_real_name);
+
+ if (!valid_user_group_name(name, 0))
+ return -ESRCH;
+
+ e = startswith(name, "vu-");
+ if (!e)
+ return -ESRCH;
+
+ d = strrchr(e, '-');
+ if (!d)
+ return -ESRCH;
+
+ if (parse_uid(d + 1, &uid) < 0)
+ return -ESRCH;
+
+ mn = strndup(e, d - e);
+ if (!mn)
+ return -ENOMEM;
+
+ machine = hashmap_get(m->machines, mn);
+ if (!machine)
+ return -ESRCH;
+
+ if (machine->class != MACHINE_CONTAINER)
+ return -ESRCH;
+
+ r = machine_translate_uid(machine, uid, &converted_uid);
+ if (r < 0)
+ return r;
+
+ if (asprintf(&rn, "UID " UID_FMT " of Container %s", uid, machine->name) < 0)
+ return -ENOMEM;
+ if (!valid_gecos(rn))
+ rn = mfree(rn);
+
+ *ret_uid = converted_uid;
+ *ret_real_name = TAKE_PTR(rn);
+ return 0;
+}
+
+static int vl_method_get_user_record(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+
+ static const JsonDispatch dispatch_table[] = {
+ { "uid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(LookupParameters, uid), 0 },
+ { "userName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, user_name), JSON_SAFE },
+ { "service", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, service), 0 },
+ {}
+ };
+
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ LookupParameters p = {
+ .uid = UID_INVALID,
+ };
+ _cleanup_free_ char *found_name = NULL, *found_real_name = NULL;
+ uid_t found_uid = UID_INVALID, uid;
+ Manager *m = userdata;
+ const char *un;
+ int r;
+
+ assert(parameters);
+ assert(m);
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &p);
+ if (r < 0)
+ return r;
+
+ if (!streq_ptr(p.service, "io.systemd.Machine"))
+ return varlink_error(link, "io.systemd.UserDatabase.BadService", NULL);
+
+ if (uid_is_valid(p.uid))
+ r = user_lookup_uid(m, p.uid, &found_name, &found_real_name);
+ else if (p.user_name)
+ r = user_lookup_name(m, p.user_name, &found_uid, &found_real_name);
+ else
+ return varlink_error(link, "io.systemd.UserDatabase.EnumerationNotSupported", NULL);
+ if (r == -ESRCH)
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+ if (r < 0)
+ return r;
+
+ uid = uid_is_valid(found_uid) ? found_uid : p.uid;
+ un = found_name ?: p.user_name;
+
+ if (!user_match_lookup_parameters(&p, un, uid))
+ return varlink_error(link, "io.systemd.UserDatabase.ConflictingRecordFound", NULL);
+
+ r = build_user_json(un, uid, found_real_name, &v);
+ if (r < 0)
+ return r;
+
+ return varlink_reply(link, v);
+}
+
+static int build_group_json(const char *group_name, gid_t gid, const char *description, JsonVariant **ret) {
+ assert(group_name);
+ assert(gid_is_valid(gid));
+ assert(ret);
+
+ return json_build(ret, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("record", JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(group_name)),
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(gid)),
+ JSON_BUILD_PAIR_CONDITION(!isempty(description), "description", JSON_BUILD_STRING(description)),
+ JSON_BUILD_PAIR("service", JSON_BUILD_STRING("io.systemd.Machine")),
+ JSON_BUILD_PAIR("disposition", JSON_BUILD_STRING("container"))))));
+ }
+
+static bool group_match_lookup_parameters(LookupParameters *p, const char *name, gid_t gid) {
+ assert(p);
+
+ if (p->group_name && !streq(name, p->group_name))
+ return false;
+
+ if (gid_is_valid(p->gid) && gid != p->gid)
+ return false;
+
+ return true;
+}
+
+static int group_lookup_gid(Manager *m, gid_t gid, char **ret_name, char **ret_description) {
+ _cleanup_free_ char *n = NULL, *d = NULL;
+ gid_t converted_gid;
+ Machine *machine;
+ int r;
+
+ assert(m);
+ assert(gid_is_valid(gid));
+ assert(ret_name);
+ assert(ret_description);
+
+ if (gid < 0x10000) /* Host GID range */
+ return -ESRCH;
+
+ r = manager_find_machine_for_gid(m, gid, &machine, &converted_gid);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -ESRCH;
+
+ if (asprintf(&n, "vg-%s-" GID_FMT, machine->name, converted_gid) < 0)
+ return -ENOMEM;
+
+ if (!valid_user_group_name(n, 0))
+ return -ESRCH;
+
+ if (asprintf(&d, "GID " GID_FMT " of Container %s", converted_gid, machine->name) < 0)
+ return -ENOMEM;
+ if (!valid_gecos(d))
+ d = mfree(d);
+
+ *ret_name = TAKE_PTR(n);
+ *ret_description = TAKE_PTR(d);
+
+ return 0;
+}
+
+static int group_lookup_name(Manager *m, const char *name, gid_t *ret_gid, char **ret_description) {
+ _cleanup_free_ char *mn = NULL, *desc = NULL;
+ gid_t gid, converted_gid;
+ Machine *machine;
+ const char *e, *d;
+ int r;
+
+ assert(m);
+ assert(ret_gid);
+ assert(ret_description);
+
+ if (!valid_user_group_name(name, 0))
+ return -ESRCH;
+
+ e = startswith(name, "vg-");
+ if (!e)
+ return -ESRCH;
+
+ d = strrchr(e, '-');
+ if (!d)
+ return -ESRCH;
+
+ if (parse_gid(d + 1, &gid) < 0)
+ return -ESRCH;
+
+ mn = strndup(e, d - e);
+ if (!mn)
+ return -ENOMEM;
+
+ machine = hashmap_get(m->machines, mn);
+ if (!machine)
+ return -ESRCH;
+
+ if (machine->class != MACHINE_CONTAINER)
+ return -ESRCH;
+
+ r = machine_translate_gid(machine, gid, &converted_gid);
+ if (r < 0)
+ return r;
+
+ if (asprintf(&desc, "GID " GID_FMT " of Container %s", gid, machine->name) < 0)
+ return -ENOMEM;
+ if (!valid_gecos(desc))
+ desc = mfree(desc);
+
+ *ret_gid = converted_gid;
+ *ret_description = desc;
+ return 0;
+}
+
+static int vl_method_get_group_record(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+
+ static const JsonDispatch dispatch_table[] = {
+ { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(LookupParameters, gid), 0 },
+ { "groupName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, group_name), JSON_SAFE },
+ { "service", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, service), 0 },
+ {}
+ };
+
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ LookupParameters p = {
+ .gid = GID_INVALID,
+ };
+ _cleanup_free_ char *found_name = NULL, *found_description = NULL;
+ uid_t found_gid = GID_INVALID, gid;
+ Manager *m = userdata;
+ const char *gn;
+ int r;
+
+ assert(parameters);
+ assert(m);
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &p);
+ if (r < 0)
+ return r;
+
+ if (!streq_ptr(p.service, "io.systemd.Machine"))
+ return varlink_error(link, "io.systemd.UserDatabase.BadService", NULL);
+
+ if (gid_is_valid(p.gid))
+ r = group_lookup_gid(m, p.gid, &found_name, &found_description);
+ else if (p.group_name)
+ r = group_lookup_name(m, p.group_name, (uid_t*) &found_gid, &found_description);
+ else
+ return varlink_error(link, "io.systemd.UserDatabase.EnumerationNotSupported", NULL);
+ if (r == -ESRCH)
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+ if (r < 0)
+ return r;
+
+ gid = gid_is_valid(found_gid) ? found_gid : p.gid;
+ gn = found_name ?: p.group_name;
+
+ if (!group_match_lookup_parameters(&p, gn, gid))
+ return varlink_error(link, "io.systemd.UserDatabase.ConflictingRecordFound", NULL);
+
+ r = build_group_json(gn, gid, found_description, &v);
+ if (r < 0)
+ return r;
+
+ return varlink_reply(link, v);
+}
+
+static int vl_method_get_memberships(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+
+ static const JsonDispatch dispatch_table[] = {
+ { "userName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, user_name), JSON_SAFE },
+ { "groupName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, group_name), JSON_SAFE },
+ { "service", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, service), 0 },
+ {}
+ };
+
+ LookupParameters p = {};
+ int r;
+
+ assert(parameters);
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &p);
+ if (r < 0)
+ return r;
+
+ if (!streq_ptr(p.service, "io.systemd.Machine"))
+ return varlink_error(link, "io.systemd.UserDatabase.BadService", NULL);
+
+ /* We don't support auxiliary groups for machines. */
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+}
+
+int manager_varlink_init(Manager *m) {
+ _cleanup_(varlink_server_unrefp) VarlinkServer *s = NULL;
+ int r;
+
+ assert(m);
+
+ if (m->varlink_server)
+ return 0;
+
+ r = varlink_server_new(&s, VARLINK_SERVER_ACCOUNT_UID);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate varlink server object: %m");
+
+ varlink_server_set_userdata(s, m);
+
+ r = varlink_server_bind_method_many(
+ s,
+ "io.systemd.UserDatabase.GetUserRecord", vl_method_get_user_record,
+ "io.systemd.UserDatabase.GetGroupRecord", vl_method_get_group_record,
+ "io.systemd.UserDatabase.GetMemberships", vl_method_get_memberships);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register varlink methods: %m");
+
+ (void) mkdir_p("/run/systemd/userdb", 0755);
+
+ r = varlink_server_listen_address(s, "/run/systemd/userdb/io.systemd.Machine", 0666);
+ if (r < 0)
+ return log_error_errno(r, "Failed to bind to varlink socket: %m");
+
+ r = varlink_server_attach_event(s, m->event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach varlink connection to event loop: %m");
+
+ m->varlink_server = TAKE_PTR(s);
+ return 0;
+}
+
+void manager_varlink_done(Manager *m) {
+ assert(m);
+
+ m->varlink_server = varlink_server_unref(m->varlink_server);
+}
diff --git a/src/machine/machined-varlink.h b/src/machine/machined-varlink.h
new file mode 100644
index 0000000..f26bbe5
--- /dev/null
+++ b/src/machine/machined-varlink.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "machined.h"
+
+int manager_varlink_init(Manager *m);
+void manager_varlink_done(Manager *m);
diff --git a/src/machine/machined.c b/src/machine/machined.c
new file mode 100644
index 0000000..c3c08d1
--- /dev/null
+++ b/src/machine/machined.c
@@ -0,0 +1,367 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "bus-log-control-api.h"
+#include "bus-polkit.h"
+#include "cgroup-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "hostname-util.h"
+#include "label.h"
+#include "machine-image.h"
+#include "machined-varlink.h"
+#include "machined.h"
+#include "main-func.h"
+#include "process-util.h"
+#include "service-util.h"
+#include "signal-util.h"
+#include "special.h"
+
+static Manager* manager_unref(Manager *m);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_unref);
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(machine_hash_ops, char, string_hash_func, string_compare_func, Machine, machine_free);
+
+static int manager_new(Manager **ret) {
+ _cleanup_(manager_unrefp) Manager *m = NULL;
+ int r;
+
+ assert(ret);
+
+ m = new0(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ m->machines = hashmap_new(&machine_hash_ops);
+ m->machine_units = hashmap_new(&string_hash_ops);
+ m->machine_leaders = hashmap_new(NULL);
+
+ if (!m->machines || !m->machine_units || !m->machine_leaders)
+ return -ENOMEM;
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_set_watchdog(m->event, true);
+
+ *ret = TAKE_PTR(m);
+ return 0;
+}
+
+static Manager* manager_unref(Manager *m) {
+ if (!m)
+ return NULL;
+
+ while (m->operations)
+ operation_free(m->operations);
+
+ assert(m->n_operations == 0);
+
+ hashmap_free(m->machines); /* This will free all machines, so that the machine_units/machine_leaders is empty */
+ hashmap_free(m->machine_units);
+ hashmap_free(m->machine_leaders);
+ hashmap_free(m->image_cache);
+
+ sd_event_source_unref(m->image_cache_defer_event);
+ sd_event_source_unref(m->nscd_cache_flush_event);
+
+ bus_verify_polkit_async_registry_free(m->polkit_registry);
+
+ manager_varlink_done(m);
+
+ sd_bus_flush_close_unref(m->bus);
+ sd_event_unref(m->event);
+
+ return mfree(m);
+}
+
+static int manager_add_host_machine(Manager *m) {
+ _cleanup_free_ char *rd = NULL, *unit = NULL;
+ sd_id128_t mid;
+ Machine *t;
+ int r;
+
+ if (m->host_machine)
+ return 0;
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get machine ID: %m");
+
+ rd = strdup("/");
+ if (!rd)
+ return log_oom();
+
+ unit = strdup(SPECIAL_ROOT_SLICE);
+ if (!unit)
+ return log_oom();
+
+ t = machine_new(m, MACHINE_HOST, ".host");
+ if (!t)
+ return log_oom();
+
+ t->leader = 1;
+ t->id = mid;
+
+ t->root_directory = TAKE_PTR(rd);
+ t->unit = TAKE_PTR(unit);
+
+ dual_timestamp_from_boottime_or_monotonic(&t->timestamp, 0);
+
+ m->host_machine = t;
+
+ return 0;
+}
+
+static int manager_enumerate_machines(Manager *m) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r;
+
+ assert(m);
+
+ r = manager_add_host_machine(m);
+ if (r < 0)
+ return r;
+
+ /* Read in machine data stored on disk */
+ d = opendir("/run/systemd/machines");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open /run/systemd/machines: %m");
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ struct Machine *machine;
+ int k;
+
+ if (!dirent_is_file(de))
+ continue;
+
+ /* Ignore symlinks that map the unit name to the machine */
+ if (startswith(de->d_name, "unit:"))
+ continue;
+
+ if (!machine_name_is_valid(de->d_name))
+ continue;
+
+ k = manager_add_machine(m, de->d_name, &machine);
+ if (k < 0) {
+ r = log_error_errno(k, "Failed to add machine by file name %s: %m", de->d_name);
+ continue;
+ }
+
+ machine_add_to_gc_queue(machine);
+
+ k = machine_load(machine);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int manager_connect_bus(Manager *m) {
+ int r;
+
+ assert(m);
+ assert(!m->bus);
+
+ r = sd_bus_default_system(&m->bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to system bus: %m");
+
+ r = bus_add_implementation(m->bus, &manager_object, m);
+ if (r < 0)
+ return r;
+
+ r = bus_match_signal_async(m->bus, NULL, bus_systemd_mgr, "JobRemoved", match_job_removed, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match for JobRemoved: %m");
+
+ r = bus_match_signal_async(m->bus, NULL, bus_systemd_mgr, "UnitRemoved", match_unit_removed, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match for UnitRemoved: %m");
+
+ r = sd_bus_match_signal_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.systemd1",
+ NULL,
+ "org.freedesktop.DBus.Properties",
+ "PropertiesChanged",
+ match_properties_changed, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match for PropertiesChanged: %m");
+
+ r = bus_match_signal_async(m->bus, NULL, bus_systemd_mgr, "Reloading", match_reloading, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match for Reloading: %m");
+
+ r = bus_call_method_async(m->bus, NULL, bus_systemd_mgr, "Subscribe", NULL, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable subscription: %m");
+
+ r = bus_log_control_api_register(m->bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_request_name_async(m->bus, NULL, "org.freedesktop.machine1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(m->bus, m->event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ return 0;
+}
+
+static void manager_gc(Manager *m, bool drop_not_started) {
+ Machine *machine;
+
+ assert(m);
+
+ while ((machine = m->machine_gc_queue)) {
+ LIST_REMOVE(gc_queue, m->machine_gc_queue, machine);
+ machine->in_gc_queue = false;
+
+ /* First, if we are not closing yet, initiate stopping */
+ if (machine_may_gc(machine, drop_not_started) &&
+ machine_get_state(machine) != MACHINE_CLOSING)
+ machine_stop(machine);
+
+ /* Now, the stop probably made this referenced
+ * again, but if it didn't, then it's time to let it
+ * go entirely. */
+ if (machine_may_gc(machine, drop_not_started)) {
+ machine_finalize(machine);
+ machine_free(machine);
+ }
+ }
+}
+
+static int manager_startup(Manager *m) {
+ Machine *machine;
+ int r;
+
+ assert(m);
+
+ /* Connect to the bus */
+ r = manager_connect_bus(m);
+ if (r < 0)
+ return r;
+
+ /* Set up Varlink service */
+ r = manager_varlink_init(m);
+ if (r < 0)
+ return r;
+
+ /* Deserialize state */
+ manager_enumerate_machines(m);
+
+ /* Remove stale objects before we start them */
+ manager_gc(m, false);
+
+ /* And start everything */
+ HASHMAP_FOREACH(machine, m->machines)
+ machine_start(machine, NULL, NULL);
+
+ return 0;
+}
+
+static bool check_idle(void *userdata) {
+ Manager *m = userdata;
+
+ if (m->operations)
+ return false;
+
+ if (varlink_server_current_connections(m->varlink_server) > 0)
+ return false;
+
+ manager_gc(m, true);
+
+ return hashmap_isempty(m->machines);
+}
+
+static int manager_run(Manager *m) {
+ assert(m);
+
+ return bus_event_loop_with_idle(
+ m->event,
+ m->bus,
+ "org.freedesktop.machine1",
+ DEFAULT_EXIT_USEC,
+ check_idle, m);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(manager_unrefp) Manager *m = NULL;
+ int r;
+
+ log_set_facility(LOG_AUTH);
+ log_setup_service();
+
+ r = service_parse_argv("systemd-machined.service",
+ "Manage registrations of local VMs and containers.",
+ BUS_IMPLEMENTATIONS(&manager_object,
+ &log_control_object),
+ argc, argv);
+ if (r <= 0)
+ return r;
+
+ umask(0022);
+
+ /* Always create the directories people can create inotify watches in. Note that some applications might check
+ * for the existence of /run/systemd/machines/ to determine whether machined is available, so please always
+ * make sure this check stays in. */
+ (void) mkdir_label("/run/systemd/machines", 0755);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, SIGTERM, SIGINT, -1) >= 0);
+
+ r = manager_new(&m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate manager object: %m");
+
+ r = manager_startup(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to fully start up daemon: %m");
+
+ log_debug("systemd-machined running as pid "PID_FMT, getpid_cached());
+ (void) sd_notify(false,
+ "READY=1\n"
+ "STATUS=Processing requests...");
+
+ r = manager_run(m);
+
+ log_debug("systemd-machined stopped as pid "PID_FMT, getpid_cached());
+ (void) sd_notify(false,
+ "STOPPING=1\n"
+ "STATUS=Shutting down...");
+
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/machine/machined.h b/src/machine/machined.h
new file mode 100644
index 0000000..6e4182b
--- /dev/null
+++ b/src/machine/machined.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-bus.h"
+#include "sd-event.h"
+
+typedef struct Manager Manager;
+
+#include "hashmap.h"
+#include "image-dbus.h"
+#include "list.h"
+#include "machine-dbus.h"
+#include "machine.h"
+#include "operation.h"
+#include "varlink.h"
+
+struct Manager {
+ sd_event *event;
+ sd_bus *bus;
+
+ Hashmap *machines;
+ Hashmap *machine_units;
+ Hashmap *machine_leaders;
+
+ Hashmap *polkit_registry;
+
+ Hashmap *image_cache;
+ sd_event_source *image_cache_defer_event;
+
+ LIST_HEAD(Machine, machine_gc_queue);
+
+ Machine *host_machine;
+
+ LIST_HEAD(Operation, operations);
+ unsigned n_operations;
+
+ sd_event_source *nscd_cache_flush_event;
+
+ VarlinkServer *varlink_server;
+};
+
+int manager_add_machine(Manager *m, const char *name, Machine **_machine);
+int manager_get_machine_by_pid(Manager *m, pid_t pid, Machine **machine);
+
+extern const BusObjectImplementation manager_object;
+
+int match_reloading(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int match_unit_removed(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int match_properties_changed(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int match_job_removed(sd_bus_message *message, void *userdata, sd_bus_error *error);
+
+int manager_stop_unit(Manager *manager, const char *unit, sd_bus_error *error, char **job);
+int manager_kill_unit(Manager *manager, const char *unit, int signo, sd_bus_error *error);
+int manager_unref_unit(Manager *m, const char *unit, sd_bus_error *error);
+int manager_unit_is_active(Manager *manager, const char *unit);
+int manager_job_is_active(Manager *manager, const char *path);
+
+int manager_enqueue_nscd_cache_flush(Manager *m);
+
+int manager_find_machine_for_uid(Manager *m, uid_t host_uid, Machine **ret_machine, uid_t *ret_internal_uid);
+int manager_find_machine_for_gid(Manager *m, gid_t host_gid, Machine **ret_machine, gid_t *ret_internal_gid);
diff --git a/src/machine/meson.build b/src/machine/meson.build
new file mode 100644
index 0000000..ebbd46d
--- /dev/null
+++ b/src/machine/meson.build
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+systemd_machined_sources = files('''
+ machined.c
+ machined.h
+'''.split())
+
+libmachine_core_sources = files('''
+ image-dbus.c
+ image-dbus.h
+ machine-dbus.c
+ machine-dbus.h
+ machine.c
+ machine.h
+ machined-core.c
+ machined-dbus.c
+ machined-varlink.c
+ machined-varlink.h
+ operation.c
+ operation.h
+'''.split())
+
+libmachine_core = static_library(
+ 'machine-core',
+ libmachine_core_sources,
+ include_directories : includes,
+ dependencies : [threads])
+
+if conf.get('ENABLE_MACHINED') == 1
+ install_data('org.freedesktop.machine1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.machine1.service',
+ install_dir : dbussystemservicedir)
+ install_data('org.freedesktop.machine1.policy',
+ install_dir : polkitpolicydir)
+endif
+
+tests += [
+ [['src/machine/test-machine-tables.c'],
+ [libmachine_core,
+ libshared],
+ [threads],
+ 'ENABLE_MACHINED'],
+]
diff --git a/src/machine/operation.c b/src/machine/operation.c
new file mode 100644
index 0000000..34565e3
--- /dev/null
+++ b/src/machine/operation.c
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/wait.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "operation.h"
+#include "process-util.h"
+
+static int operation_done(sd_event_source *s, const siginfo_t *si, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ Operation *o = userdata;
+ int r;
+
+ assert(o);
+ assert(si);
+
+ log_debug("Operating " PID_FMT " is now complete with code=%s status=%i",
+ o->pid,
+ sigchld_code_to_string(si->si_code), si->si_status);
+
+ o->pid = 0;
+
+ if (si->si_code != CLD_EXITED) {
+ r = sd_bus_error_setf(&error, SD_BUS_ERROR_FAILED, "Child died abnormally.");
+ goto fail;
+ }
+
+ if (si->si_status == EXIT_SUCCESS)
+ r = 0;
+ else if (read(o->errno_fd, &r, sizeof(r)) != sizeof(r)) { /* Try to acquire error code for failed operation */
+ r = sd_bus_error_setf(&error, SD_BUS_ERROR_FAILED, "Child failed.");
+ goto fail;
+ }
+
+ if (o->done) {
+ /* A completion routine is set for this operation, call it. */
+ r = o->done(o, r, &error);
+ if (r < 0) {
+ if (!sd_bus_error_is_set(&error))
+ sd_bus_error_set_errno(&error, r);
+
+ goto fail;
+ }
+
+ } else {
+ /* The default operation when done is to simply return an error on failure or an empty success
+ * message on success. */
+ if (r < 0) {
+ sd_bus_error_set_errno(&error, r);
+ goto fail;
+ }
+
+ r = sd_bus_reply_method_return(o->message, NULL);
+ if (r < 0)
+ log_error_errno(r, "Failed to reply to message: %m");
+ }
+
+ operation_free(o);
+ return 0;
+
+fail:
+ r = sd_bus_reply_method_error(o->message, &error);
+ if (r < 0)
+ log_error_errno(r, "Failed to reply to message: %m");
+
+ operation_free(o);
+ return 0;
+}
+
+int operation_new(Manager *manager, Machine *machine, pid_t child, sd_bus_message *message, int errno_fd, Operation **ret) {
+ Operation *o;
+ int r;
+
+ assert(manager);
+ assert(child > 1);
+ assert(message);
+ assert(errno_fd >= 0);
+
+ o = new0(Operation, 1);
+ if (!o)
+ return -ENOMEM;
+
+ o->extra_fd = -1;
+
+ r = sd_event_add_child(manager->event, &o->event_source, child, WEXITED, operation_done, o);
+ if (r < 0) {
+ free(o);
+ return r;
+ }
+
+ o->pid = child;
+ o->message = sd_bus_message_ref(message);
+ o->errno_fd = errno_fd;
+
+ LIST_PREPEND(operations, manager->operations, o);
+ manager->n_operations++;
+ o->manager = manager;
+
+ if (machine) {
+ LIST_PREPEND(operations_by_machine, machine->operations, o);
+ o->machine = machine;
+ }
+
+ log_debug("Started new operation " PID_FMT ".", child);
+
+ /* At this point we took ownership of both the child and the errno file descriptor! */
+
+ if (ret)
+ *ret = o;
+
+ return 0;
+}
+
+Operation *operation_free(Operation *o) {
+ if (!o)
+ return NULL;
+
+ sd_event_source_unref(o->event_source);
+
+ safe_close(o->errno_fd);
+ safe_close(o->extra_fd);
+
+ if (o->pid > 1)
+ (void) sigkill_wait(o->pid);
+
+ sd_bus_message_unref(o->message);
+
+ if (o->manager) {
+ LIST_REMOVE(operations, o->manager->operations, o);
+ o->manager->n_operations--;
+ }
+
+ if (o->machine)
+ LIST_REMOVE(operations_by_machine, o->machine->operations, o);
+
+ return mfree(o);
+}
diff --git a/src/machine/operation.h b/src/machine/operation.h
new file mode 100644
index 0000000..fd48288
--- /dev/null
+++ b/src/machine/operation.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+#include "sd-bus.h"
+#include "sd-event.h"
+
+#include "list.h"
+
+typedef struct Operation Operation;
+
+#include "machined.h"
+
+#define OPERATIONS_MAX 64
+
+struct Operation {
+ Manager *manager;
+ Machine *machine;
+ pid_t pid;
+ sd_bus_message *message;
+ int errno_fd;
+ int extra_fd;
+ sd_event_source *event_source;
+ int (*done)(Operation *o, int ret, sd_bus_error *error);
+ LIST_FIELDS(Operation, operations);
+ LIST_FIELDS(Operation, operations_by_machine);
+};
+
+int operation_new(Manager *manager, Machine *machine, pid_t child, sd_bus_message *message, int errno_fd, Operation **ret);
+Operation *operation_free(Operation *o);
diff --git a/src/machine/org.freedesktop.machine1.conf b/src/machine/org.freedesktop.machine1.conf
new file mode 100644
index 0000000..eafbf6b
--- /dev/null
+++ b/src/machine/org.freedesktop.machine1.conf
@@ -0,0 +1,242 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="root">
+ <allow own="org.freedesktop.machine1"/>
+ <allow send_destination="org.freedesktop.machine1"/>
+ <allow receive_sender="org.freedesktop.machine1"/>
+ </policy>
+
+ <policy context="default">
+ <deny send_destination="org.freedesktop.machine1"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.DBus.Introspectable"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.DBus.Peer"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="Get"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="GetAll"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="ListMachines"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="ListImages"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetMachine"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetMachineByPID"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetImage"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetMachineAddresses"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetMachineOSRelease"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetMachineUIDShift"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="OpenMachineLogin"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="OpenMachineShell"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="UnregisterMachine"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="TerminateMachine"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="KillMachine"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="BindMountMachine"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="CopyFromMachine"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="CopyToMachine"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="RemoveImage"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="RenameImage"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="CloneImage"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="MarkImageReadOnly"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="SetPoolLimit"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="SetImageLimit"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetImageHostname"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetImageMachineID"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetImageMachineInfo"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="GetImageOSRelease"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="CleanPool"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="MapFromMachineUser"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="MapToMachineUser"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="MapFromMachineGroup"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Manager"
+ send_member="MapToMachineGroup"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="GetAddresses"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="GetOSRelease"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="GetUIDShift"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="OpenLogin"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="OpenShell"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="Terminate"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="Kill"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="BindMount"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="CopyFrom"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Machine"
+ send_member="CopyTo"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Image"
+ send_member="Remove"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Image"
+ send_member="Rename"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Image"
+ send_member="Clone"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Image"
+ send_member="SetLimit"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Image"
+ send_member="MarkReadOnly"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Image"
+ send_member="GetHostname"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Image"
+ send_member="GetMachineID"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Image"
+ send_member="GetMachineInfo"/>
+
+ <allow send_destination="org.freedesktop.machine1"
+ send_interface="org.freedesktop.machine1.Image"
+ send_member="GetOSRelease"/>
+
+ <allow receive_sender="org.freedesktop.machine1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/machine/org.freedesktop.machine1.policy b/src/machine/org.freedesktop.machine1.policy
new file mode 100644
index 0000000..ddf5ec0
--- /dev/null
+++ b/src/machine/org.freedesktop.machine1.policy
@@ -0,0 +1,104 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.machine1.login">
+ <description gettext-domain="systemd">Log into a local container</description>
+ <message gettext-domain="systemd">Authentication is required to log into a local container.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.machine1.host-login">
+ <description gettext-domain="systemd">Log into the local host</description>
+ <message gettext-domain="systemd">Authentication is required to log into the local host.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>yes</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.machine1.shell">
+ <description gettext-domain="systemd">Acquire a shell in a local container</description>
+ <message gettext-domain="systemd">Authentication is required to acquire a shell in a local container.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.login</annotate>
+ </action>
+
+ <action id="org.freedesktop.machine1.host-shell">
+ <description gettext-domain="systemd">Acquire a shell on the local host</description>
+ <message gettext-domain="systemd">Authentication is required to acquire a shell on the local host.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.host-login</annotate>
+ </action>
+
+ <action id="org.freedesktop.machine1.open-pty">
+ <description gettext-domain="systemd">Acquire a pseudo TTY in a local container</description>
+ <message gettext-domain="systemd">Authentication is required to acquire a pseudo TTY in a local container.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.machine1.host-open-pty">
+ <description gettext-domain="systemd">Acquire a pseudo TTY on the local host</description>
+ <message gettext-domain="systemd">Authentication is required to acquire a pseudo TTY on the local host.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.machine1.manage-machines">
+ <description gettext-domain="systemd">Manage local virtual machines and containers</description>
+ <message gettext-domain="systemd">Authentication is required to manage local virtual machines and containers.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.login1.shell org.freedesktop.login1.login</annotate>
+ </action>
+
+ <action id="org.freedesktop.machine1.manage-images">
+ <description gettext-domain="systemd">Manage local virtual machine and container images</description>
+ <message gettext-domain="systemd">Authentication is required to manage local virtual machine and container images.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+</policyconfig>
diff --git a/src/machine/org.freedesktop.machine1.service b/src/machine/org.freedesktop.machine1.service
new file mode 100644
index 0000000..64b73c1
--- /dev/null
+++ b/src/machine/org.freedesktop.machine1.service
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.machine1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.machine1.service
diff --git a/src/machine/test-machine-tables.c b/src/machine/test-machine-tables.c
new file mode 100644
index 0000000..0e51755
--- /dev/null
+++ b/src/machine/test-machine-tables.c
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "machine.h"
+#include "test-tables.h"
+
+int main(int argc, char **argv) {
+ test_table(kill_who, KILL_WHO);
+ test_table(machine_class, MACHINE_CLASS);
+ test_table(machine_state, MACHINE_STATE);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/modules-load/modules-load.c b/src/modules-load/modules-load.c
new file mode 100644
index 0000000..2f0cef5
--- /dev/null
+++ b/src/modules-load/modules-load.c
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <sys/stat.h>
+
+#include "conf-files.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "main-func.h"
+#include "module-util.h"
+#include "pretty-print.h"
+#include "proc-cmdline.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+static char **arg_proc_cmdline_modules = NULL;
+static const char conf_file_dirs[] = CONF_PATHS_NULSTR("modules-load.d");
+
+STATIC_DESTRUCTOR_REGISTER(arg_proc_cmdline_modules, strv_freep);
+
+static void systemd_kmod_log(void *data, int priority, const char *file, int line,
+ const char *fn, const char *format, va_list args) {
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ log_internalv(priority, 0, file, line, fn, format, args);
+ REENABLE_WARNING;
+}
+
+static int add_modules(const char *p) {
+ _cleanup_strv_free_ char **k = NULL;
+
+ k = strv_split(p, ",");
+ if (!k)
+ return log_oom();
+
+ if (strv_extend_strv(&arg_proc_cmdline_modules, k, true) < 0)
+ return log_oom();
+
+ return 0;
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ int r;
+
+ if (proc_cmdline_key_streq(key, "modules_load")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = add_modules(value);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int apply_file(struct kmod_ctx *ctx, const char *path, bool ignore_enoent) {
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(ctx);
+ assert(path);
+
+ r = search_and_fopen_nulstr(path, "re", NULL, conf_file_dirs, &f);
+ if (r < 0) {
+ if (ignore_enoent && r == -ENOENT)
+ return 0;
+
+ return log_error_errno(r, "Failed to open %s: %m", path);
+ }
+
+ log_debug("apply: %s", path);
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l;
+ int k;
+
+ k = read_line(f, LONG_LINE_MAX, &line);
+ if (k < 0)
+ return log_error_errno(k, "Failed to read file '%s': %m", path);
+ if (k == 0)
+ break;
+
+ l = strstrip(line);
+ if (isempty(l))
+ continue;
+ if (strchr(COMMENTS, *l))
+ continue;
+
+ k = module_load_and_warn(ctx, l, true);
+ if (k == -ENOENT)
+ continue;
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-modules-load.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [CONFIGURATION FILE...]\n\n"
+ "Loads statically configured kernel modules.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(kmod_unrefp) struct kmod_ctx *ctx = NULL;
+ int r, k;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ log_setup_service();
+
+ umask(0022);
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_STRIP_RD_PREFIX);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ ctx = kmod_new(NULL, NULL);
+ if (!ctx) {
+ log_error("Failed to allocate memory for kmod.");
+ return -ENOMEM;
+ }
+
+ kmod_load_resources(ctx);
+ kmod_set_log_fn(ctx, systemd_kmod_log, NULL);
+
+ r = 0;
+
+ if (argc > optind) {
+ int i;
+
+ for (i = optind; i < argc; i++) {
+ k = apply_file(ctx, argv[i], false);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ } else {
+ _cleanup_strv_free_ char **files = NULL;
+ char **fn, **i;
+
+ STRV_FOREACH(i, arg_proc_cmdline_modules) {
+ k = module_load_and_warn(ctx, *i, true);
+ if (k == -ENOENT)
+ continue;
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ k = conf_files_list_nulstr(&files, ".conf", NULL, 0, conf_file_dirs);
+ if (k < 0) {
+ log_error_errno(k, "Failed to enumerate modules-load.d files: %m");
+ if (r == 0)
+ r = k;
+ return r;
+ }
+
+ STRV_FOREACH(fn, files) {
+ k = apply_file(ctx, *fn, true);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+ }
+
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/mount/mount-tool.c b/src/mount/mount-tool.c
new file mode 100644
index 0000000..673f855
--- /dev/null
+++ b/src/mount/mount-tool.c
@@ -0,0 +1,1539 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+
+#include "sd-bus.h"
+#include "sd-device.h"
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "bus-unit-util.h"
+#include "bus-wait-for-jobs.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-table.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "fstab-util.h"
+#include "libmount-util.h"
+#include "main-func.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "sort-util.h"
+#include "spawn-polkit-agent.h"
+#include "stat-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "unit-def.h"
+#include "unit-name.h"
+#include "user-util.h"
+
+enum {
+ ACTION_DEFAULT,
+ ACTION_MOUNT,
+ ACTION_AUTOMOUNT,
+ ACTION_UMOUNT,
+ ACTION_LIST,
+} arg_action = ACTION_DEFAULT;
+
+static bool arg_no_block = false;
+static PagerFlags arg_pager_flags = 0;
+static bool arg_legend = true;
+static bool arg_full = false;
+static bool arg_ask_password = true;
+static bool arg_quiet = false;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static bool arg_user = false;
+static const char *arg_host = NULL;
+static bool arg_discover = false;
+static char *arg_mount_what = NULL;
+static char *arg_mount_where = NULL;
+static char *arg_mount_type = NULL;
+static char *arg_mount_options = NULL;
+static char *arg_description = NULL;
+static char **arg_property = NULL;
+static usec_t arg_timeout_idle = USEC_INFINITY;
+static bool arg_timeout_idle_set = false;
+static char **arg_automount_property = NULL;
+static int arg_bind_device = -1;
+static uid_t arg_uid = UID_INVALID;
+static gid_t arg_gid = GID_INVALID;
+static bool arg_fsck = true;
+static bool arg_aggressive_gc = false;
+
+STATIC_DESTRUCTOR_REGISTER(arg_mount_what, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_mount_where, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_mount_type, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_mount_options, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_description, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_property, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_automount_property, strv_freep);
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-mount", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("systemd-mount [OPTIONS...] WHAT [WHERE]\n"
+ "systemd-mount [OPTIONS...] --list\n"
+ "%s [OPTIONS...] %sWHAT|WHERE...\n\n"
+ "Establish a mount or auto-mount point transiently.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-block Do not wait until operation finished\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-legend Do not show the headers\n"
+ " -l --full Do not ellipsize output\n"
+ " --no-ask-password Do not prompt for password\n"
+ " -q --quiet Suppress information messages during runtime\n"
+ " --user Run as user unit\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " --discover Discover mount device metadata\n"
+ " -t --type=TYPE File system type\n"
+ " -o --options=OPTIONS Mount options\n"
+ " --owner=USER Add uid= and gid= options for USER\n"
+ " --fsck=no Don't run file system check before mount\n"
+ " --description=TEXT Description for unit\n"
+ " -p --property=NAME=VALUE Set mount unit property\n"
+ " -A --automount=BOOL Create an auto-mount point\n"
+ " --timeout-idle-sec=SEC Specify automount idle timeout\n"
+ " --automount-property=NAME=VALUE\n"
+ " Set automount unit property\n"
+ " --bind-device Bind automount unit to device\n"
+ " --list List mountable block devices\n"
+ " -u --umount Unmount mount points\n"
+ " -G --collect Unload unit after it stopped, even when failed\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , streq(program_invocation_short_name, "systemd-umount") ? "" : "--umount "
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_BLOCK,
+ ARG_NO_PAGER,
+ ARG_NO_LEGEND,
+ ARG_NO_ASK_PASSWORD,
+ ARG_USER,
+ ARG_SYSTEM,
+ ARG_DISCOVER,
+ ARG_MOUNT_TYPE,
+ ARG_MOUNT_OPTIONS,
+ ARG_OWNER,
+ ARG_FSCK,
+ ARG_DESCRIPTION,
+ ARG_TIMEOUT_IDLE,
+ ARG_AUTOMOUNT,
+ ARG_AUTOMOUNT_PROPERTY,
+ ARG_BIND_DEVICE,
+ ARG_LIST,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "no-block", no_argument, NULL, ARG_NO_BLOCK },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
+ { "full", no_argument, NULL, 'l' },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ { "quiet", no_argument, NULL, 'q' },
+ { "user", no_argument, NULL, ARG_USER },
+ { "system", no_argument, NULL, ARG_SYSTEM },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "discover", no_argument, NULL, ARG_DISCOVER },
+ { "type", required_argument, NULL, 't' },
+ { "options", required_argument, NULL, 'o' },
+ { "owner", required_argument, NULL, ARG_OWNER },
+ { "fsck", required_argument, NULL, ARG_FSCK },
+ { "description", required_argument, NULL, ARG_DESCRIPTION },
+ { "property", required_argument, NULL, 'p' },
+ { "automount", required_argument, NULL, ARG_AUTOMOUNT },
+ { "timeout-idle-sec", required_argument, NULL, ARG_TIMEOUT_IDLE },
+ { "automount-property", required_argument, NULL, ARG_AUTOMOUNT_PROPERTY },
+ { "bind-device", no_argument, NULL, ARG_BIND_DEVICE },
+ { "list", no_argument, NULL, ARG_LIST },
+ { "umount", no_argument, NULL, 'u' },
+ { "unmount", no_argument, NULL, 'u' },
+ { "collect", no_argument, NULL, 'G' },
+ {},
+ };
+
+ int r, c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ if (strstr(program_invocation_short_name, "systemd-umount"))
+ arg_action = ACTION_UMOUNT;
+
+ while ((c = getopt_long(argc, argv, "hqH:M:t:o:p:AuGl", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_BLOCK:
+ arg_no_block = true;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_LEGEND:
+ arg_legend = false;
+ break;
+
+ case 'l':
+ arg_full = true;
+ break;
+
+ case ARG_NO_ASK_PASSWORD:
+ arg_ask_password = false;
+ break;
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case ARG_USER:
+ arg_user = true;
+ break;
+
+ case ARG_SYSTEM:
+ arg_user = false;
+ break;
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case ARG_DISCOVER:
+ arg_discover = true;
+ break;
+
+ case 't':
+ if (free_and_strdup(&arg_mount_type, optarg) < 0)
+ return log_oom();
+ break;
+
+ case 'o':
+ if (free_and_strdup(&arg_mount_options, optarg) < 0)
+ return log_oom();
+ break;
+
+ case ARG_OWNER: {
+ const char *user = optarg;
+
+ r = get_user_creds(&user, &arg_uid, &arg_gid, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r,
+ r == -EBADMSG ? "UID or GID of user %s are invalid."
+ : "Cannot use \"%s\" as owner: %m",
+ optarg);
+ break;
+ }
+
+ case ARG_FSCK:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --fsck= argument: %s", optarg);
+
+ arg_fsck = r;
+ break;
+
+ case ARG_DESCRIPTION:
+ if (free_and_strdup(&arg_description, optarg) < 0)
+ return log_oom();
+ break;
+
+ case 'p':
+ if (strv_extend(&arg_property, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case 'A':
+ arg_action = ACTION_AUTOMOUNT;
+ break;
+
+ case ARG_AUTOMOUNT:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "--automount= expects a valid boolean parameter: %s", optarg);
+
+ arg_action = r ? ACTION_AUTOMOUNT : ACTION_MOUNT;
+ break;
+
+ case ARG_TIMEOUT_IDLE:
+ r = parse_sec(optarg, &arg_timeout_idle);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse timeout: %s", optarg);
+
+ break;
+
+ case ARG_AUTOMOUNT_PROPERTY:
+ if (strv_extend(&arg_automount_property, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_BIND_DEVICE:
+ arg_bind_device = true;
+ break;
+
+ case ARG_LIST:
+ arg_action = ACTION_LIST;
+ break;
+
+ case 'u':
+ arg_action = ACTION_UMOUNT;
+ break;
+
+ case 'G':
+ arg_aggressive_gc = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_user && arg_transport != BUS_TRANSPORT_LOCAL)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Execution in user context is not supported on non-local systems.");
+
+ if (arg_action == ACTION_LIST) {
+ if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many arguments.");
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Listing devices only supported locally.");
+ } else if (arg_action == ACTION_UMOUNT) {
+ if (optind >= argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "At least one argument required.");
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL) {
+ int i;
+
+ for (i = optind; i < argc; i++)
+ if (!path_is_absolute(argv[i]) )
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Only absolute path is supported: %s", argv[i]);
+ }
+ } else {
+ if (optind >= argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "At least one argument required.");
+
+ if (argc > optind+2)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "At most two arguments required.");
+
+ if (arg_mount_type && !fstype_is_blockdev_backed(arg_mount_type)) {
+ arg_mount_what = strdup(argv[optind]);
+ if (!arg_mount_what)
+ return log_oom();
+
+ } else if (arg_transport == BUS_TRANSPORT_LOCAL) {
+ _cleanup_free_ char *u = NULL;
+
+ u = fstab_node_to_udev_node(argv[optind]);
+ if (!u)
+ return log_oom();
+
+ r = chase_symlinks(u, NULL, 0, &arg_mount_what, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make path %s absolute: %m", u);
+ } else {
+ arg_mount_what = strdup(argv[optind]);
+ if (!arg_mount_what)
+ return log_oom();
+
+ path_simplify(arg_mount_what, false);
+
+ if (!path_is_absolute(arg_mount_what))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Only absolute path is supported: %s", arg_mount_what);
+ }
+
+ if (argc > optind+1) {
+ if (arg_transport == BUS_TRANSPORT_LOCAL) {
+ r = chase_symlinks(argv[optind+1], NULL, CHASE_NONEXISTENT, &arg_mount_where, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make path %s absolute: %m", argv[optind+1]);
+ } else {
+ arg_mount_where = strdup(argv[optind+1]);
+ if (!arg_mount_where)
+ return log_oom();
+
+ path_simplify(arg_mount_where, false);
+
+ if (!path_is_absolute(arg_mount_where))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Only absolute path is supported: %s", arg_mount_where);
+ }
+ } else
+ arg_discover = true;
+
+ if (arg_discover && arg_transport != BUS_TRANSPORT_LOCAL)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Automatic mount location discovery is only supported locally.");
+ }
+
+ return 1;
+}
+
+static int transient_unit_set_properties(sd_bus_message *m, UnitType t, char **properties) {
+ int r;
+
+ if (!isempty(arg_description)) {
+ r = sd_bus_message_append(m, "(sv)", "Description", "s", arg_description);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_bind_device && is_device_path(arg_mount_what)) {
+ _cleanup_free_ char *device_unit = NULL;
+
+ r = unit_name_from_path(arg_mount_what, ".device", &device_unit);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "(sv)(sv)",
+ "After", "as", 1, device_unit,
+ "BindsTo", "as", 1, device_unit);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_aggressive_gc) {
+ r = sd_bus_message_append(m, "(sv)", "CollectMode", "s", "inactive-or-failed");
+ if (r < 0)
+ return r;
+ }
+
+ r = bus_append_unit_property_assignment_many(m, t, properties);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int transient_mount_set_properties(sd_bus_message *m) {
+ _cleanup_free_ char *options = NULL;
+ int r;
+
+ assert(m);
+
+ r = transient_unit_set_properties(m, UNIT_MOUNT, arg_property);
+ if (r < 0)
+ return r;
+
+ if (arg_mount_what) {
+ r = sd_bus_message_append(m, "(sv)", "What", "s", arg_mount_what);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_mount_type) {
+ r = sd_bus_message_append(m, "(sv)", "Type", "s", arg_mount_type);
+ if (r < 0)
+ return r;
+ }
+
+ /* Prepend uid=…,gid=… if arg_uid is set */
+ if (arg_uid != UID_INVALID) {
+ r = asprintf(&options,
+ "uid=" UID_FMT ",gid=" GID_FMT "%s%s",
+ arg_uid, arg_gid,
+ arg_mount_options ? "," : "", strempty(arg_mount_options));
+ if (r < 0)
+ return -ENOMEM;
+ }
+
+ if (options || arg_mount_options) {
+ log_debug("Using mount options: %s", options ?: arg_mount_options);
+
+ r = sd_bus_message_append(m, "(sv)", "Options", "s", options ?: arg_mount_options);
+ if (r < 0)
+ return r;
+ } else
+ log_debug("Not using any mount options");
+
+ if (arg_fsck) {
+ _cleanup_free_ char *fsck = NULL;
+
+ r = unit_name_from_path_instance("systemd-fsck", arg_mount_what, ".service", &fsck);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m,
+ "(sv)(sv)",
+ "Requires", "as", 1, fsck,
+ "After", "as", 1, fsck);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int transient_automount_set_properties(sd_bus_message *m) {
+ int r;
+
+ assert(m);
+
+ r = transient_unit_set_properties(m, UNIT_AUTOMOUNT, arg_automount_property);
+ if (r < 0)
+ return r;
+
+ if (arg_timeout_idle != USEC_INFINITY) {
+ r = sd_bus_message_append(m, "(sv)", "TimeoutIdleUSec", "t", arg_timeout_idle);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int start_transient_mount(
+ sd_bus *bus,
+ char **argv) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ _cleanup_free_ char *mount_unit = NULL;
+ int r;
+
+ if (!arg_no_block) {
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_error_errno(r, "Could not watch jobs: %m");
+ }
+
+ r = unit_name_from_path(arg_mount_where, ".mount", &mount_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make mount unit name: %m");
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, "StartTransientUnit");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_set_allow_interactive_authorization(m, arg_ask_password);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Name and mode */
+ r = sd_bus_message_append(m, "ss", mount_unit, "fail");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Properties */
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = transient_mount_set_properties(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Auxiliary units */
+ r = sd_bus_message_append(m, "a(sa(sv))", 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start transient mount unit: %s", bus_error_message(&error, r));
+
+ if (w) {
+ const char *object;
+
+ r = sd_bus_message_read(reply, "o", &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = bus_wait_for_jobs_one(w, object, arg_quiet);
+ if (r < 0)
+ return r;
+ }
+
+ if (!arg_quiet)
+ log_info("Started unit %s%s%s for mount point: %s%s%s",
+ ansi_highlight(), mount_unit, ansi_normal(),
+ ansi_highlight(), arg_mount_where, ansi_normal());
+
+ return 0;
+}
+
+static int start_transient_automount(
+ sd_bus *bus,
+ char **argv) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ _cleanup_free_ char *automount_unit = NULL, *mount_unit = NULL;
+ int r;
+
+ if (!arg_no_block) {
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_error_errno(r, "Could not watch jobs: %m");
+ }
+
+ r = unit_name_from_path(arg_mount_where, ".automount", &automount_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make automount unit name: %m");
+
+ r = unit_name_from_path(arg_mount_where, ".mount", &mount_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make mount unit name: %m");
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, "StartTransientUnit");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_set_allow_interactive_authorization(m, arg_ask_password);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Name and mode */
+ r = sd_bus_message_append(m, "ss", automount_unit, "fail");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Properties */
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = transient_automount_set_properties(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Auxiliary units */
+ r = sd_bus_message_open_container(m, 'a', "(sa(sv))");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'r', "sa(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", mount_unit);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = transient_mount_set_properties(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start transient automount unit: %s", bus_error_message(&error, r));
+
+ if (w) {
+ const char *object;
+
+ r = sd_bus_message_read(reply, "o", &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = bus_wait_for_jobs_one(w, object, arg_quiet);
+ if (r < 0)
+ return r;
+ }
+
+ if (!arg_quiet)
+ log_info("Started unit %s%s%s for mount point: %s%s%s",
+ ansi_highlight(), automount_unit, ansi_normal(),
+ ansi_highlight(), arg_mount_where, ansi_normal());
+
+ return 0;
+}
+
+static int find_mount_points(const char *what, char ***list) {
+ _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
+ _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ size_t bufsize = 0, n = 0;
+ int r;
+
+ assert(what);
+ assert(list);
+
+ /* Returns all mount points obtained from /proc/self/mountinfo in *list,
+ * and the number of mount points as return value. */
+
+ r = libmount_parse(NULL, NULL, &table, &iter);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse /proc/self/mountinfo: %m");
+
+ for (;;) {
+ struct libmnt_fs *fs;
+ const char *source, *target;
+
+ r = mnt_table_next_fs(table, iter, &fs);
+ if (r == 1)
+ break;
+ if (r < 0)
+ return log_error_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
+
+ source = mnt_fs_get_source(fs);
+ target = mnt_fs_get_target(fs);
+ if (!source || !target)
+ continue;
+
+ if (!path_equal(source, what))
+ continue;
+
+ /* one extra slot is needed for the terminating NULL */
+ if (!GREEDY_REALLOC0(l, bufsize, n + 2))
+ return log_oom();
+
+ l[n] = strdup(target);
+ if (!l[n])
+ return log_oom();
+ n++;
+ }
+
+ if (!GREEDY_REALLOC0(l, bufsize, n + 1))
+ return log_oom();
+
+ *list = TAKE_PTR(l);
+ return n;
+}
+
+static int find_loop_device(const char *backing_file, char **loop_dev) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ _cleanup_free_ char *l = NULL;
+
+ assert(backing_file);
+ assert(loop_dev);
+
+ d = opendir("/sys/devices/virtual/block");
+ if (!d)
+ return -errno;
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ _cleanup_free_ char *sys = NULL, *fname = NULL;
+ int r;
+
+ dirent_ensure_type(d, de);
+
+ if (de->d_type != DT_DIR)
+ continue;
+
+ if (!startswith(de->d_name, "loop"))
+ continue;
+
+ sys = path_join("/sys/devices/virtual/block", de->d_name, "loop/backing_file");
+ if (!sys)
+ return -ENOMEM;
+
+ r = read_one_line_file(sys, &fname);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to read %s, ignoring: %m", sys);
+ continue;
+ }
+
+ if (files_same(fname, backing_file, 0) <= 0)
+ continue;
+
+ l = path_join("/dev", de->d_name);
+ if (!l)
+ return -ENOMEM;
+
+ break;
+ }
+
+ if (!l)
+ return -ENXIO;
+
+ *loop_dev = TAKE_PTR(l);
+
+ return 0;
+}
+
+static int stop_mount(
+ sd_bus *bus,
+ const char *where,
+ const char *suffix) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ _cleanup_free_ char *mount_unit = NULL;
+ int r;
+
+ if (!arg_no_block) {
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_error_errno(r, "Could not watch jobs: %m");
+ }
+
+ r = unit_name_from_path(where, suffix, &mount_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make %s unit name from path %s: %m", suffix + 1, where);
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, "StopUnit");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_set_allow_interactive_authorization(m, arg_ask_password);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Name and mode */
+ r = sd_bus_message_append(m, "ss", mount_unit, "fail");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0) {
+ if (streq(suffix, ".automount") &&
+ sd_bus_error_has_name(&error, "org.freedesktop.systemd1.NoSuchUnit"))
+ return 0;
+ return log_error_errno(r, "Failed to stop %s unit: %s", suffix + 1, bus_error_message(&error, r));
+ }
+
+ if (w) {
+ const char *object;
+
+ r = sd_bus_message_read(reply, "o", &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = bus_wait_for_jobs_one(w, object, arg_quiet);
+ if (r < 0)
+ return r;
+ }
+
+ if (!arg_quiet)
+ log_info("Stopped unit %s%s%s for mount point: %s%s%s",
+ ansi_highlight(), mount_unit, ansi_normal(),
+ ansi_highlight(), where, ansi_normal());
+
+ return 0;
+}
+
+static int stop_mounts(
+ sd_bus *bus,
+ const char *where) {
+
+ int r;
+
+ if (path_equal(where, "/"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Refusing to operate on root directory: %s", where);
+
+ if (!path_is_normalized(where))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path contains non-normalized components: %s", where);
+
+ r = stop_mount(bus, where, ".mount");
+ if (r < 0)
+ return r;
+
+ r = stop_mount(bus, where, ".automount");
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int umount_by_device(sd_bus *bus, const char *what) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ _cleanup_strv_free_ char **list = NULL;
+ struct stat st;
+ const char *v;
+ char **l;
+ int r, r2 = 0;
+
+ assert(what);
+
+ if (stat(what, &st) < 0)
+ return log_error_errno(errno, "Can't stat %s: %m", what);
+
+ if (!S_ISBLK(st.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK),
+ "Not a block device: %s", what);
+
+ r = sd_device_new_from_devnum(&d, 'b', st.st_rdev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device from device number: %m");
+
+ r = sd_device_get_property_value(d, "ID_FS_USAGE", &v);
+ if (r < 0)
+ return log_device_error_errno(d, r, "Failed to get device property: %m");
+
+ if (!streq(v, "filesystem"))
+ return log_device_error_errno(d, SYNTHETIC_ERRNO(EINVAL),
+ "%s does not contain a known file system.", what);
+
+ if (sd_device_get_property_value(d, "SYSTEMD_MOUNT_WHERE", &v) >= 0)
+ r2 = stop_mounts(bus, v);
+
+ r = find_mount_points(what, &list);
+ if (r < 0)
+ return r;
+
+ for (l = list; *l; l++) {
+ r = stop_mounts(bus, *l);
+ if (r < 0)
+ r2 = r;
+ }
+
+ return r2;
+}
+
+static int umount_loop(sd_bus *bus, const char *backing_file) {
+ _cleanup_free_ char *loop_dev = NULL;
+ int r;
+
+ assert(backing_file);
+
+ r = find_loop_device(backing_file, &loop_dev);
+ if (r < 0)
+ return log_error_errno(r, r == -ENXIO ? "File %s is not mounted." : "Can't get loop device for %s: %m", backing_file);
+
+ return umount_by_device(bus, loop_dev);
+}
+
+static int action_umount(
+ sd_bus *bus,
+ int argc,
+ char **argv) {
+
+ int i, r, r2 = 0;
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL) {
+ for (i = optind; i < argc; i++) {
+ _cleanup_free_ char *p = NULL;
+
+ p = strdup(argv[i]);
+ if (!p)
+ return log_oom();
+
+ path_simplify(p, false);
+
+ r = stop_mounts(bus, p);
+ if (r < 0)
+ r2 = r;
+ }
+ return r2;
+ }
+
+ for (i = optind; i < argc; i++) {
+ _cleanup_free_ char *u = NULL, *p = NULL;
+ struct stat st;
+
+ u = fstab_node_to_udev_node(argv[i]);
+ if (!u)
+ return log_oom();
+
+ r = chase_symlinks(u, NULL, 0, &p, NULL);
+ if (r < 0) {
+ r2 = log_error_errno(r, "Failed to make path %s absolute: %m", argv[i]);
+ continue;
+ }
+
+ if (stat(p, &st) < 0)
+ return log_error_errno(errno, "Can't stat %s (from %s): %m", p, argv[i]);
+
+ if (S_ISBLK(st.st_mode))
+ r = umount_by_device(bus, p);
+ else if (S_ISREG(st.st_mode))
+ r = umount_loop(bus, p);
+ else if (S_ISDIR(st.st_mode))
+ r = stop_mounts(bus, p);
+ else {
+ log_error("Invalid file type: %s (from %s)", p, argv[i]);
+ r = -EINVAL;
+ }
+
+ if (r < 0)
+ r2 = r;
+ }
+
+ return r2;
+}
+
+static int acquire_mount_type(sd_device *d) {
+ const char *v;
+
+ assert(d);
+
+ if (arg_mount_type)
+ return 0;
+
+ if (sd_device_get_property_value(d, "ID_FS_TYPE", &v) < 0)
+ return 0;
+
+ arg_mount_type = strdup(v);
+ if (!arg_mount_type)
+ return log_oom();
+
+ log_debug("Discovered type=%s", arg_mount_type);
+ return 1;
+}
+
+static int acquire_mount_options(sd_device *d) {
+ const char *v;
+
+ assert(d);
+
+ if (arg_mount_options)
+ return 0;
+
+ if (sd_device_get_property_value(d, "SYSTEMD_MOUNT_OPTIONS", &v) < 0)
+ return 0;
+
+ arg_mount_options = strdup(v);
+ if (!arg_mount_options)
+ return log_oom();
+
+ log_debug("Discovered options=%s", arg_mount_options);
+ return 1;
+}
+
+static const char *get_model(sd_device *d) {
+ const char *model;
+
+ assert(d);
+
+ if (sd_device_get_property_value(d, "ID_MODEL_FROM_DATABASE", &model) >= 0)
+ return model;
+
+ if (sd_device_get_property_value(d, "ID_MODEL", &model) >= 0)
+ return model;
+
+ return NULL;
+}
+
+static const char* get_label(sd_device *d) {
+ const char *label;
+
+ assert(d);
+
+ if (sd_device_get_property_value(d, "ID_FS_LABEL", &label) >= 0)
+ return label;
+
+ if (sd_device_get_property_value(d, "ID_PART_ENTRY_NAME", &label) >= 0)
+ return label;
+
+ return NULL;
+}
+
+static int acquire_mount_where(sd_device *d) {
+ const char *v;
+
+ if (arg_mount_where)
+ return 0;
+
+ if (sd_device_get_property_value(d, "SYSTEMD_MOUNT_WHERE", &v) < 0) {
+ _cleanup_free_ char *escaped = NULL;
+ const char *name;
+
+ name = get_label(d);
+ if (!name)
+ name = get_model(d);
+ if (!name) {
+ const char *dn;
+
+ if (sd_device_get_devname(d, &dn) < 0)
+ return 0;
+
+ name = basename(dn);
+ }
+
+ escaped = xescape(name, "\\");
+ if (!escaped)
+ return log_oom();
+ if (!filename_is_valid(escaped))
+ return 0;
+
+ arg_mount_where = path_join("/run/media/system", escaped);
+ } else
+ arg_mount_where = strdup(v);
+
+ if (!arg_mount_where)
+ return log_oom();
+
+ log_debug("Discovered where=%s", arg_mount_where);
+ return 1;
+}
+
+static int acquire_mount_where_for_loop_dev(const char *loop_dev) {
+ _cleanup_strv_free_ char **list = NULL;
+ int r;
+
+ if (arg_mount_where)
+ return 0;
+
+ r = find_mount_points(loop_dev, &list);
+ if (r < 0)
+ return r;
+ else if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Can't find mount point of %s. It is expected that %s is already mounted on a place.",
+ loop_dev, loop_dev);
+ else if (r >= 2)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s is mounted on %d places. It is expected that %s is mounted on a place.",
+ loop_dev, r, loop_dev);
+
+ arg_mount_where = strdup(list[0]);
+ if (!arg_mount_where)
+ return log_oom();
+
+ log_debug("Discovered where=%s", arg_mount_where);
+ return 1;
+}
+
+static int acquire_description(sd_device *d) {
+ const char *model, *label;
+
+ if (arg_description)
+ return 0;
+
+ model = get_model(d);
+
+ label = get_label(d);
+ if (!label)
+ (void) sd_device_get_property_value(d, "ID_PART_ENTRY_NUMBER", &label);
+
+ if (model && label)
+ arg_description = strjoin(model, " ", label);
+ else if (label)
+ arg_description = strdup(label);
+ else if (model)
+ arg_description = strdup(model);
+ else
+ return 0;
+
+ if (!arg_description)
+ return log_oom();
+
+ log_debug("Discovered description=%s", arg_description);
+ return 1;
+}
+
+static int acquire_removable(sd_device *d) {
+ const char *v;
+
+ /* Shortcut this if there's no reason to check it */
+ if (arg_action != ACTION_DEFAULT && arg_timeout_idle_set && arg_bind_device >= 0)
+ return 0;
+
+ for (;;) {
+ if (sd_device_get_sysattr_value(d, "removable", &v) > 0)
+ break;
+
+ if (sd_device_get_parent(d, &d) < 0)
+ return 0;
+
+ if (sd_device_get_subsystem(d, &v) < 0 || !streq(v, "block"))
+ return 0;
+ }
+
+ if (parse_boolean(v) <= 0)
+ return 0;
+
+ log_debug("Discovered removable device.");
+
+ if (arg_action == ACTION_DEFAULT) {
+ log_debug("Automatically turning on automount.");
+ arg_action = ACTION_AUTOMOUNT;
+ }
+
+ if (!arg_timeout_idle_set) {
+ log_debug("Setting idle timeout to 1s.");
+ arg_timeout_idle = USEC_PER_SEC;
+ }
+
+ if (arg_bind_device < 0) {
+ log_debug("Binding automount unit to device.");
+ arg_bind_device = true;
+ }
+
+ return 1;
+}
+
+static int discover_loop_backing_file(void) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ _cleanup_free_ char *loop_dev = NULL;
+ struct stat st;
+ const char *v;
+ int r;
+
+ r = find_loop_device(arg_mount_what, &loop_dev);
+ if (r < 0 && r != -ENXIO)
+ return log_error_errno(errno, "Can't get loop device for %s: %m", arg_mount_what);
+
+ if (r == -ENXIO) {
+ _cleanup_free_ char *escaped = NULL;
+
+ if (arg_mount_where)
+ return 0;
+
+ escaped = xescape(basename(arg_mount_what), "\\");
+ if (!escaped)
+ return log_oom();
+ if (!filename_is_valid(escaped))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Escaped name %s is not a valid filename.",
+ escaped);
+
+ arg_mount_where = path_join("/run/media/system", escaped);
+ if (!arg_mount_where)
+ return log_oom();
+
+ log_debug("Discovered where=%s", arg_mount_where);
+ return 0;
+ }
+
+ if (stat(loop_dev, &st) < 0)
+ return log_error_errno(errno, "Can't stat %s: %m", loop_dev);
+
+ if (!S_ISBLK(st.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid file type: %s", loop_dev);
+
+ r = sd_device_new_from_devnum(&d, 'b', st.st_rdev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device from device number: %m");
+
+ if (sd_device_get_property_value(d, "ID_FS_USAGE", &v) < 0 || !streq(v, "filesystem"))
+ return log_device_error_errno(d, SYNTHETIC_ERRNO(EINVAL),
+ "%s does not contain a known file system.", arg_mount_what);
+
+ r = acquire_mount_type(d);
+ if (r < 0)
+ return r;
+
+ r = acquire_mount_options(d);
+ if (r < 0)
+ return r;
+
+ r = acquire_mount_where_for_loop_dev(loop_dev);
+ if (r < 0)
+ return r;
+
+ r = acquire_description(d);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int discover_device(void) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ struct stat st;
+ const char *v;
+ int r;
+
+ if (stat(arg_mount_what, &st) < 0)
+ return log_error_errno(errno, "Can't stat %s: %m", arg_mount_what);
+
+ if (S_ISREG(st.st_mode))
+ return discover_loop_backing_file();
+
+ if (!S_ISBLK(st.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid file type: %s",
+ arg_mount_what);
+
+ r = sd_device_new_from_devnum(&d, 'b', st.st_rdev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device from device number: %m");
+
+ if (sd_device_get_property_value(d, "ID_FS_USAGE", &v) < 0 || !streq(v, "filesystem"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s does not contain a known file system.",
+ arg_mount_what);
+
+ r = acquire_mount_type(d);
+ if (r < 0)
+ return r;
+
+ r = acquire_mount_options(d);
+ if (r < 0)
+ return r;
+
+ r = acquire_mount_where(d);
+ if (r < 0)
+ return r;
+
+ r = acquire_description(d);
+ if (r < 0)
+ return r;
+
+ r = acquire_removable(d);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+enum {
+ COLUMN_NODE,
+ COLUMN_PATH,
+ COLUMN_MODEL,
+ COLUMN_WWN,
+ COLUMN_FSTYPE,
+ COLUMN_LABEL,
+ COLUMN_UUID,
+ _COLUMN_MAX,
+};
+
+static int list_devices(void) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ sd_device *d;
+ unsigned c;
+ int r;
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return log_oom();
+
+ r = sd_device_enumerator_add_match_subsystem(e, "block", true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add block match: %m");
+
+ r = sd_device_enumerator_add_match_property(e, "ID_FS_USAGE", "filesystem");
+ if (r < 0)
+ return log_error_errno(r, "Failed to add property match: %m");
+
+ table = table_new("NODE", "PATH", "MODEL", "WWN", "TYPE", "LABEL", "UUID");
+ if (!table)
+ return log_oom();
+
+ if (arg_full)
+ table_set_width(table, 0);
+
+ r = table_set_sort(table, (size_t) 0, (size_t) SIZE_MAX);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set sort index: %m");
+
+ table_set_header(table, arg_legend);
+
+ FOREACH_DEVICE(e, d) {
+ for (c = 0; c < _COLUMN_MAX; c++) {
+ const char *x = NULL;
+
+ switch (c) {
+
+ case COLUMN_NODE:
+ (void) sd_device_get_devname(d, &x);
+ break;
+
+ case COLUMN_PATH:
+ (void) sd_device_get_property_value(d, "ID_PATH", &x);
+ break;
+
+ case COLUMN_MODEL:
+ x = get_model(d);
+ break;
+
+ case COLUMN_WWN:
+ (void) sd_device_get_property_value(d, "ID_WWN", &x);
+ break;
+
+ case COLUMN_FSTYPE:
+ (void) sd_device_get_property_value(d, "ID_FS_TYPE", &x);
+ break;
+
+ case COLUMN_LABEL:
+ x = get_label(d);
+ break;
+
+ case COLUMN_UUID:
+ (void) sd_device_get_property_value(d, "ID_FS_UUID", &x);
+ break;
+ }
+
+ r = table_add_cell(table, NULL, c == COLUMN_NODE ? TABLE_PATH : TABLE_STRING, strna(x));
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+ }
+
+ (void) pager_open(arg_pager_flags);
+
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+
+ return 0;
+}
+
+static int run(int argc, char* argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ log_show_color(true);
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (arg_action == ACTION_LIST)
+ return list_devices();
+
+ r = bus_connect_transport_systemd(arg_transport, arg_host, arg_user, &bus);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ if (arg_action == ACTION_UMOUNT)
+ return action_umount(bus, argc, argv);
+
+ if ((!arg_mount_type || fstype_is_blockdev_backed(arg_mount_type))
+ && !path_is_normalized(arg_mount_what))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path contains non-normalized components: %s",
+ arg_mount_what);
+
+ if (arg_discover) {
+ r = discover_device();
+ if (r < 0)
+ return r;
+ }
+
+ if (!arg_mount_where)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Can't figure out where to mount %s.",
+ arg_mount_what);
+
+ if (path_equal(arg_mount_where, "/"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Refusing to operate on root directory.");
+
+ if (!path_is_normalized(arg_mount_where))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path contains non-normalized components: %s",
+ arg_mount_where);
+
+ if (streq_ptr(arg_mount_type, "auto"))
+ arg_mount_type = mfree(arg_mount_type);
+ if (streq_ptr(arg_mount_options, "defaults"))
+ arg_mount_options = mfree(arg_mount_options);
+
+ if (!is_device_path(arg_mount_what))
+ arg_fsck = false;
+
+ if (arg_fsck && arg_mount_type && arg_transport == BUS_TRANSPORT_LOCAL) {
+ r = fsck_exists(arg_mount_type);
+ if (r < 0)
+ log_warning_errno(r, "Couldn't determine whether fsck for %s exists, proceeding anyway.", arg_mount_type);
+ else if (r == 0) {
+ log_debug("Disabling file system check as fsck for %s doesn't exist.", arg_mount_type);
+ arg_fsck = false; /* fsck doesn't exist, let's not attempt it */
+ }
+ }
+
+ /* The kernel (properly) refuses mounting file systems with unknown uid=,gid= options,
+ * but not for all filesystem types. Let's try to catch the cases where the option
+ * would be used if the file system does not support it. It is also possible to
+ * autodetect the file system, but that's only possible with disk-based file systems
+ * which incidentally seem to be implemented more carefully and reject unknown options,
+ * so it's probably OK that we do the check only when the type is specified.
+ */
+ if (arg_mount_type &&
+ !streq(arg_mount_type, "auto") &&
+ arg_uid != UID_INVALID &&
+ !fstype_can_uid_gid(arg_mount_type))
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "File system type %s is not known to support uid=/gid=, refusing.",
+ arg_mount_type);
+
+ switch (arg_action) {
+
+ case ACTION_MOUNT:
+ case ACTION_DEFAULT:
+ r = start_transient_mount(bus, argv + optind);
+ break;
+
+ case ACTION_AUTOMOUNT:
+ r = start_transient_automount(bus, argv + optind);
+ break;
+
+ default:
+ assert_not_reached("Unexpected action.");
+ }
+
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/network/fuzz-netdev-parser.c b/src/network/fuzz-netdev-parser.c
new file mode 100644
index 0000000..ddabe1c
--- /dev/null
+++ b/src/network/fuzz-netdev-parser.c
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "fd-util.h"
+#include "fs-util.h"
+#include "fuzz.h"
+#include "networkd-manager.h"
+#include "tmpfile-util.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(manager_freep) Manager *manager = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_(unlink_tempfilep) char netdev_config[] = "/tmp/fuzz-networkd.XXXXXX";
+
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ assert_se(fmkostemp_safe(netdev_config, "r+", &f) == 0);
+ if (size != 0)
+ assert_se(fwrite(data, size, 1, f) == 1);
+
+ fflush(f);
+ assert_se(manager_new(&manager) >= 0);
+ (void) netdev_load_one(manager, netdev_config);
+ return 0;
+}
diff --git a/src/network/fuzz-network-parser.c b/src/network/fuzz-network-parser.c
new file mode 100644
index 0000000..1292eba
--- /dev/null
+++ b/src/network/fuzz-network-parser.c
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "fd-util.h"
+#include "fs-util.h"
+#include "fuzz.h"
+#include "networkd-manager.h"
+#include "tmpfile-util.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(manager_freep) Manager *manager = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_(unlink_tempfilep) char network_config[] = "/tmp/fuzz-networkd.XXXXXX";
+
+ if (size > 65535)
+ return 0;
+
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ assert_se(fmkostemp_safe(network_config, "r+", &f) == 0);
+ if (size != 0)
+ assert_se(fwrite(data, size, 1, f) == 1);
+
+ fflush(f);
+ assert_se(manager_new(&manager) >= 0);
+ (void) network_load_one(manager, &manager->networks, network_config);
+ return 0;
+}
diff --git a/src/network/fuzz-network-parser.options b/src/network/fuzz-network-parser.options
new file mode 100644
index 0000000..0824b19
--- /dev/null
+++ b/src/network/fuzz-network-parser.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 65535
diff --git a/src/network/generator/main.c b/src/network/generator/main.c
new file mode 100644
index 0000000..f9cace7
--- /dev/null
+++ b/src/network/generator/main.c
@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+
+#include "fd-util.h"
+#include "generator.h"
+#include "macro.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "network-generator.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+
+#define NETWORKD_UNIT_DIRECTORY "/run/systemd/network"
+
+static const char *arg_root = NULL;
+
+static int network_save(Network *network, const char *dest_dir) {
+ _cleanup_free_ char *filename = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(network);
+
+ r = asprintf(&filename, "%s-%s.network",
+ isempty(network->ifname) ? "91" : "90",
+ isempty(network->ifname) ? "default" : network->ifname);
+ if (r < 0)
+ return log_oom();
+
+ r = generator_open_unit_file(dest_dir, "kernel command line", filename, &f);
+ if (r < 0)
+ return r;
+
+ network_dump(network, f);
+
+ return 0;
+}
+
+static int netdev_save(NetDev *netdev, const char *dest_dir) {
+ _cleanup_free_ char *filename = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(netdev);
+
+ r = asprintf(&filename, "90-%s.netdev",
+ netdev->ifname);
+ if (r < 0)
+ return log_oom();
+
+ r = generator_open_unit_file(dest_dir, "kernel command line", filename, &f);
+ if (r < 0)
+ return r;
+
+ netdev_dump(netdev, f);
+
+ return 0;
+}
+
+static int link_save(Link *link, const char *dest_dir) {
+ _cleanup_free_ char *filename = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(link);
+
+ r = asprintf(&filename, "90-%s.link",
+ link->ifname);
+ if (r < 0)
+ return log_oom();
+
+ r = generator_open_unit_file(dest_dir, "kernel command line", filename, &f);
+ if (r < 0)
+ return r;
+
+ link_dump(link, f);
+
+ return 0;
+}
+
+static int context_save(Context *context) {
+ Network *network;
+ NetDev *netdev;
+ Link *link;
+ int k, r;
+ const char *p;
+
+ p = prefix_roota(arg_root, NETWORKD_UNIT_DIRECTORY);
+
+ r = mkdir_p(p, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create directory " NETWORKD_UNIT_DIRECTORY ": %m");
+
+ HASHMAP_FOREACH(network, context->networks_by_name) {
+ k = network_save(network, p);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ HASHMAP_FOREACH(netdev, context->netdevs_by_name) {
+ k = netdev_save(netdev, p);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ HASHMAP_FOREACH(link, context->links_by_name) {
+ k = link_save(link, p);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int help(void) {
+ printf("%s [OPTIONS...] [-- KERNEL_CMDLINE]\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --root=PATH Operate on an alternate filesystem root\n"
+ , program_invocation_short_name
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_ROOT,
+ };
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "root", required_argument, NULL, ARG_ROOT },
+ {},
+ };
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_ROOT:
+ arg_root = optarg;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(context_clear) Context context = {};
+ int i, r;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (optind >= argc) {
+ r = proc_cmdline_parse(parse_cmdline_item, &context, 0);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse kernel command line: %m");
+ } else {
+ for (i = optind; i < argc; i++) {
+ _cleanup_free_ char *word = NULL;
+ char *value;
+
+ word = strdup(argv[i]);
+ if (!word)
+ return log_oom();
+
+ value = strchr(word, '=');
+ if (value)
+ *(value++) = 0;
+
+ r = parse_cmdline_item(word, value, &context);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse command line \"%s%s%s\": %m",
+ word, value ? "=" : "", strempty(value));
+ }
+ }
+
+ r = context_merge_networks(&context);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to merge multiple command line options: %m");
+
+ return context_save(&context);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/network/generator/network-generator.c b/src/network/generator/network-generator.c
new file mode 100644
index 0000000..2fa21a0
--- /dev/null
+++ b/src/network/generator/network-generator.c
@@ -0,0 +1,1233 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "ether-addr-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hostname-util.h"
+#include "log.h"
+#include "macro.h"
+#include "network-generator.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+
+/*
+ # .network
+ ip={dhcp|on|any|dhcp6|auto6|either6}
+ ip=<interface>:{dhcp|on|any|dhcp6|auto6}[:[<mtu>][:<macaddr>]]
+ ip=<client-IP>:[<peer>]:<gateway-IP>:<netmask>:<client_hostname>:<interface>:{none|off|dhcp|on|any|dhcp6|auto6|ibft}[:[<mtu>][:<macaddr>]]
+ ip=<client-IP>:[<peer>]:<gateway-IP>:<netmask>:<client_hostname>:<interface>:{none|off|dhcp|on|any|dhcp6|auto6|ibft}[:[<dns1>][:<dns2>]]
+ rd.route=<net>/<netmask>:<gateway>[:<interface>]
+ nameserver=<IP> [nameserver=<IP> ...]
+ rd.peerdns=0
+
+ # .link
+ ifname=<interface>:<MAC>
+
+ # .netdev
+ vlan=<vlanname>:<phydevice>
+ bond=<bondname>[:<bondslaves>:[:<options>[:<mtu>]]]
+ team=<teammaster>:<teamslaves> # not supported
+ bridge=<bridgename>:<ethnames>
+
+ # ignored
+ bootdev=<interface>
+ BOOTIF=<MAC>
+ rd.bootif=0
+ biosdevname=0
+ rd.neednet=1
+*/
+
+static const char * const dracut_dhcp_type_table[_DHCP_TYPE_MAX] = {
+ [DHCP_TYPE_NONE] = "none",
+ [DHCP_TYPE_OFF] = "off",
+ [DHCP_TYPE_ON] = "on",
+ [DHCP_TYPE_ANY] = "any",
+ [DHCP_TYPE_DHCP] = "dhcp",
+ [DHCP_TYPE_DHCP6] = "dhcp6",
+ [DHCP_TYPE_AUTO6] = "auto6",
+ [DHCP_TYPE_EITHER6] = "either6",
+ [DHCP_TYPE_IBFT] = "ibft",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(dracut_dhcp_type, DHCPType);
+
+static const char * const networkd_dhcp_type_table[_DHCP_TYPE_MAX] = {
+ [DHCP_TYPE_NONE] = "no",
+ [DHCP_TYPE_OFF] = "no",
+ [DHCP_TYPE_ON] = "yes",
+ [DHCP_TYPE_ANY] = "yes",
+ [DHCP_TYPE_DHCP] = "ipv4",
+ [DHCP_TYPE_DHCP6] = "ipv6",
+ [DHCP_TYPE_AUTO6] = "no", /* TODO: enable other setting? */
+ [DHCP_TYPE_EITHER6] = "ipv6", /* TODO: enable other setting? */
+ [DHCP_TYPE_IBFT] = "no",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(networkd_dhcp_type, DHCPType);
+
+static Address *address_free(Address *address) {
+ if (!address)
+ return NULL;
+
+ if (address->network)
+ LIST_REMOVE(addresses, address->network->addresses, address);
+
+ return mfree(address);
+}
+
+static int address_new(Network *network, int family, unsigned char prefixlen,
+ union in_addr_union *addr, union in_addr_union *peer, Address **ret) {
+ Address *address;
+
+ assert(network);
+
+ address = new(Address, 1);
+ if (!address)
+ return -ENOMEM;
+
+ *address = (Address) {
+ .family = family,
+ .prefixlen = prefixlen,
+ .address = *addr,
+ .peer = *peer,
+ };
+
+ LIST_PREPEND(addresses, network->addresses, address);
+
+ address->network = network;
+
+ if (ret)
+ *ret = address;
+ return 0;
+}
+
+static Route *route_free(Route *route) {
+ if (!route)
+ return NULL;
+
+ if (route->network)
+ LIST_REMOVE(routes, route->network->routes, route);
+
+ return mfree(route);
+}
+
+static int route_new(Network *network, int family, unsigned char prefixlen,
+ union in_addr_union *dest, union in_addr_union *gateway, Route **ret) {
+ Route *route;
+
+ assert(network);
+
+ route = new(Route, 1);
+ if (!route)
+ return -ENOMEM;
+
+ *route = (Route) {
+ .family = family,
+ .prefixlen = prefixlen,
+ .dest = dest ? *dest : IN_ADDR_NULL,
+ .gateway = *gateway,
+ };
+
+ LIST_PREPEND(routes, network->routes, route);
+
+ route->network = network;
+
+ if (ret)
+ *ret = route;
+ return 0;
+}
+
+static Network *network_free(Network *network) {
+ Address *address;
+ Route *route;
+
+ if (!network)
+ return NULL;
+
+ free(network->ifname);
+ free(network->hostname);
+ strv_free(network->dns);
+ free(network->vlan);
+ free(network->bridge);
+ free(network->bond);
+
+ while ((address = network->addresses))
+ address_free(address);
+
+ while ((route = network->routes))
+ route_free(route);
+
+ return mfree(network);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Network*, network_free);
+
+static int network_new(Context *context, const char *name, Network **ret) {
+ _cleanup_(network_freep) Network *network = NULL;
+ _cleanup_free_ char *ifname = NULL;
+ int r;
+
+ assert(context);
+
+ if (!isempty(name) && !ifname_valid(name))
+ return -EINVAL;
+
+ ifname = strdup(name);
+ if (!ifname)
+ return -ENOMEM;
+
+ network = new(Network, 1);
+ if (!network)
+ return -ENOMEM;
+
+ *network = (Network) {
+ .ifname = TAKE_PTR(ifname),
+ .dhcp_type = _DHCP_TYPE_INVALID,
+ .dhcp_use_dns = -1,
+ };
+
+ r = hashmap_ensure_allocated(&context->networks_by_name, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(context->networks_by_name, network->ifname, network);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = network;
+
+ TAKE_PTR(network);
+ return 0;
+}
+
+Network *network_get(Context *context, const char *ifname) {
+ return hashmap_get(context->networks_by_name, ifname);
+}
+
+static NetDev *netdev_free(NetDev *netdev) {
+ if (!netdev)
+ return NULL;
+
+ free(netdev->ifname);
+ free(netdev->kind);
+ return mfree(netdev);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(NetDev*, netdev_free);
+
+static int netdev_new(Context *context, const char *_kind, const char *_ifname, NetDev **ret) {
+ _cleanup_(netdev_freep) NetDev *netdev = NULL;
+ _cleanup_free_ char *kind = NULL, *ifname = NULL;
+ int r;
+
+ assert(context);
+
+ if (!ifname_valid(_ifname))
+ return -EINVAL;
+
+ kind = strdup(_kind);
+ if (!kind)
+ return -ENOMEM;
+
+ ifname = strdup(_ifname);
+ if (!ifname)
+ return -ENOMEM;
+
+ netdev = new(NetDev, 1);
+ if (!netdev)
+ return -ENOMEM;
+
+ *netdev = (NetDev) {
+ .kind = TAKE_PTR(kind),
+ .ifname = TAKE_PTR(ifname),
+ };
+
+ r = hashmap_ensure_allocated(&context->netdevs_by_name, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(context->netdevs_by_name, netdev->ifname, netdev);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = netdev;
+
+ TAKE_PTR(netdev);
+ return 0;
+}
+
+NetDev *netdev_get(Context *context, const char *ifname) {
+ return hashmap_get(context->netdevs_by_name, ifname);
+}
+
+static Link *link_free(Link *link) {
+ if (!link)
+ return NULL;
+
+ free(link->ifname);
+ return mfree(link);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Link*, link_free);
+
+static int link_new(Context *context, const char *name, struct ether_addr *mac, Link **ret) {
+ _cleanup_(link_freep) Link *link = NULL;
+ _cleanup_free_ char *ifname = NULL;
+ int r;
+
+ assert(context);
+
+ if (!ifname_valid(name))
+ return -EINVAL;
+
+ ifname = strdup(name);
+ if (!ifname)
+ return -ENOMEM;
+
+ link = new(Link, 1);
+ if (!link)
+ return -ENOMEM;
+
+ *link = (Link) {
+ .ifname = TAKE_PTR(ifname),
+ .mac = *mac,
+ };
+
+ r = hashmap_ensure_allocated(&context->links_by_name, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(context->links_by_name, link->ifname, link);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = link;
+
+ TAKE_PTR(link);
+ return 0;
+}
+
+Link *link_get(Context *context, const char *ifname) {
+ return hashmap_get(context->links_by_name, ifname);
+}
+
+static int network_set_dhcp_type(Context *context, const char *ifname, const char *dhcp_type) {
+ Network *network;
+ DHCPType t;
+ int r;
+
+ t = dracut_dhcp_type_from_string(dhcp_type);
+ if (t < 0)
+ return -EINVAL;
+
+ network = network_get(context, ifname);
+ if (!network) {
+ r = network_new(context, ifname, &network);
+ if (r < 0)
+ return r;
+ }
+
+ network->dhcp_type = t;
+ return 0;
+}
+
+static int network_set_hostname(Context *context, const char *ifname, const char *hostname) {
+ Network *network;
+
+ network = network_get(context, ifname);
+ if (!network)
+ return -ENODEV;
+
+ return free_and_strdup(&network->hostname, hostname);
+}
+
+static int network_set_mtu(Context *context, const char *ifname, int family, const char *mtu) {
+ Network *network;
+
+ network = network_get(context, ifname);
+ if (!network)
+ return -ENODEV;
+
+ return parse_mtu(family, mtu, &network->mtu);
+}
+
+static int network_set_mac_address(Context *context, const char *ifname, const char *mac) {
+ Network *network;
+
+ network = network_get(context, ifname);
+ if (!network)
+ return -ENODEV;
+
+ return ether_addr_from_string(mac, &network->mac);
+}
+
+static int network_set_address(Context *context, const char *ifname, int family, unsigned char prefixlen,
+ union in_addr_union *addr, union in_addr_union *peer) {
+ Network *network;
+
+ if (in_addr_is_null(family, addr) != 0)
+ return 0;
+
+ network = network_get(context, ifname);
+ if (!network)
+ return -ENODEV;
+
+ return address_new(network, family, prefixlen, addr, peer, NULL);
+}
+
+static int network_set_route(Context *context, const char *ifname, int family, unsigned char prefixlen,
+ union in_addr_union *dest, union in_addr_union *gateway) {
+ Network *network;
+ int r;
+
+ if (in_addr_is_null(family, gateway) != 0)
+ return 0;
+
+ network = network_get(context, ifname);
+ if (!network) {
+ r = network_new(context, ifname, &network);
+ if (r < 0)
+ return r;
+ }
+
+ return route_new(network, family, prefixlen, dest, gateway, NULL);
+}
+
+static int network_set_dns(Context *context, const char *ifname, const char *dns) {
+ union in_addr_union a;
+ Network *network;
+ int family, r;
+
+ r = in_addr_from_string_auto(dns, &family, &a);
+ if (r < 0)
+ return r;
+
+ network = network_get(context, ifname);
+ if (!network) {
+ r = network_new(context, ifname, &network);
+ if (r < 0)
+ return r;
+ }
+
+ return strv_extend(&network->dns, dns);
+}
+
+static int network_set_dhcp_use_dns(Context *context, const char *ifname, bool value) {
+ Network *network;
+ int r;
+
+ network = network_get(context, ifname);
+ if (!network) {
+ r = network_new(context, ifname, &network);
+ if (r < 0)
+ return r;
+ }
+
+ network->dhcp_use_dns = value;
+
+ return 0;
+}
+
+static int network_set_vlan(Context *context, const char *ifname, const char *value) {
+ Network *network;
+ int r;
+
+ network = network_get(context, ifname);
+ if (!network) {
+ r = network_new(context, ifname, &network);
+ if (r < 0)
+ return r;
+ }
+
+ return free_and_strdup(&network->vlan, value);
+}
+
+static int network_set_bridge(Context *context, const char *ifname, const char *value) {
+ Network *network;
+ int r;
+
+ network = network_get(context, ifname);
+ if (!network) {
+ r = network_new(context, ifname, &network);
+ if (r < 0)
+ return r;
+ }
+
+ return free_and_strdup(&network->bridge, value);
+}
+
+static int network_set_bond(Context *context, const char *ifname, const char *value) {
+ Network *network;
+ int r;
+
+ network = network_get(context, ifname);
+ if (!network) {
+ r = network_new(context, ifname, &network);
+ if (r < 0)
+ return r;
+ }
+
+ return free_and_strdup(&network->bond, value);
+}
+
+static int parse_cmdline_ip_mtu_mac(Context *context, const char *ifname, int family, const char *value) {
+ const char *mtu, *p;
+ int r;
+
+ /* [<mtu>][:<macaddr>] */
+
+ p = strchr(value, ':');
+ if (!p)
+ mtu = value;
+ else
+ mtu = strndupa(value, p - value);
+
+ r = network_set_mtu(context, ifname, family, mtu);
+ if (r < 0)
+ return r;
+
+ if (!p)
+ return 0;
+
+ r = network_set_mac_address(context, ifname, p + 1);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int parse_ip_address_one(int family, const char **value, union in_addr_union *ret) {
+ const char *p = *value, *q, *buf;
+ int r;
+
+ if (p[0] == ':') {
+ *value = p + 1;
+ return 0;
+ }
+
+ if (family == AF_INET6) {
+ if (p[0] != '[')
+ return -EINVAL;
+
+ q = strchr(p + 1, ']');
+ if (!q)
+ return -EINVAL;
+
+ if (q[1] != ':')
+ return -EINVAL;
+
+ buf = strndupa(p + 1, q - p - 1);
+ p = q + 2;
+ } else {
+ q = strchr(p, ':');
+ if (!q)
+ return -EINVAL;
+
+ buf = strndupa(p, q - p);
+ p = q + 1;
+ }
+
+ r = in_addr_from_string(family, buf, ret);
+ if (r < 0)
+ return r;
+
+ *value = p;
+ return 1;
+}
+
+static int parse_netmask_or_prefixlen(int family, const char **value, unsigned char *ret) {
+ union in_addr_union netmask;
+ const char *p, *q;
+ int r;
+
+ r = parse_ip_address_one(family, value, &netmask);
+ if (r > 0) {
+ if (family == AF_INET6)
+ /* TODO: Not supported yet. */
+ return -EINVAL;
+
+ *ret = in4_addr_netmask_to_prefixlen(&netmask.in);
+ } else if (r == 0)
+ *ret = family == AF_INET6 ? 128 : 32;
+ else {
+ p = strchr(*value, ':');
+ if (!p)
+ return -EINVAL;
+
+ q = strndupa(*value, p - *value);
+ r = safe_atou8(q, ret);
+ if (r < 0)
+ return r;
+
+ *value = p + 1;
+ }
+
+ return 0;
+}
+
+static int parse_cmdline_ip_address(Context *context, int family, const char *value) {
+ union in_addr_union addr = {}, peer = {}, gateway = {};
+ const char *hostname = NULL, *ifname, *dhcp_type, *dns, *p;
+ unsigned char prefixlen;
+ int r;
+
+ /* ip=<client-IP>:[<peer>]:<gateway-IP>:<netmask>:<client_hostname>:<interface>:{none|off|dhcp|on|any|dhcp6|auto6|ibft}[:[<mtu>][:<macaddr>]]
+ * ip=<client-IP>:[<peer>]:<gateway-IP>:<netmask>:<client_hostname>:<interface>:{none|off|dhcp|on|any|dhcp6|auto6|ibft}[:[<dns1>][:<dns2>]] */
+
+ r = parse_ip_address_one(family, &value, &addr);
+ if (r < 0)
+ return r;
+ r = parse_ip_address_one(family, &value, &peer);
+ if (r < 0)
+ return r;
+ r = parse_ip_address_one(family, &value, &gateway);
+ if (r < 0)
+ return r;
+ r = parse_netmask_or_prefixlen(family, &value, &prefixlen);
+ if (r < 0)
+ return r;
+
+ /* hostname */
+ p = strchr(value, ':');
+ if (!p)
+ return -EINVAL;
+
+ if (p != value) {
+ hostname = strndupa(value, p - value);
+ if (!hostname_is_valid(hostname, false))
+ return -EINVAL;
+ }
+
+ value = p + 1;
+
+ /* ifname */
+ p = strchr(value, ':');
+ if (!p)
+ return -EINVAL;
+
+ ifname = strndupa(value, p - value);
+
+ value = p + 1;
+
+ /* dhcp_type */
+ p = strchr(value, ':');
+ if (!p)
+ dhcp_type = value;
+ else
+ dhcp_type = strndupa(value, p - value);
+
+ r = network_set_dhcp_type(context, ifname, dhcp_type);
+ if (r < 0)
+ return r;
+
+ /* set values */
+ r = network_set_hostname(context, ifname, hostname);
+ if (r < 0)
+ return r;
+
+ r = network_set_address(context, ifname, family, prefixlen, &addr, &peer);
+ if (r < 0)
+ return r;
+
+ r = network_set_route(context, ifname, family, 0, NULL, &gateway);
+ if (r < 0)
+ return r;
+
+ if (!p)
+ return 0;
+
+ /* First, try [<mtu>][:<macaddr>] */
+ r = parse_cmdline_ip_mtu_mac(context, ifname, AF_UNSPEC, p + 1);
+ if (r >= 0)
+ return 0;
+
+ /* Next, try [<dns1>][:<dns2>] */
+ value = p + 1;
+ p = strchr(value, ':');
+ if (!p) {
+ r = network_set_dns(context, ifname, value);
+ if (r < 0)
+ return r;
+ } else {
+ dns = strndupa(value, p - value);
+ r = network_set_dns(context, ifname, dns);
+ if (r < 0)
+ return r;
+ r = network_set_dns(context, ifname, p + 1);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int parse_cmdline_ip_interface(Context *context, const char *value) {
+ const char *ifname, *dhcp_type, *p;
+ int r;
+
+ /* ip=<interface>:{dhcp|on|any|dhcp6|auto6}[:[<mtu>][:<macaddr>]] */
+
+ p = strchr(value, ':');
+ if (!p)
+ return -EINVAL;
+
+ ifname = strndupa(value, p - value);
+
+ value = p + 1;
+ p = strchr(value, ':');
+ if (!p)
+ dhcp_type = value;
+ else
+ dhcp_type = strndupa(value, p - value);
+
+ r = network_set_dhcp_type(context, ifname, dhcp_type);
+ if (r < 0)
+ return r;
+
+ if (!p)
+ return 0;
+
+ return parse_cmdline_ip_mtu_mac(context, ifname, AF_UNSPEC, p + 1);
+}
+
+static int parse_cmdline_ip(Context *context, const char *key, const char *value) {
+ const char *p;
+ int r;
+
+ if (proc_cmdline_value_missing(key, value))
+ return -EINVAL;
+
+ p = strchr(value, ':');
+ if (!p)
+ /* ip={dhcp|on|any|dhcp6|auto6|either6} */
+ return network_set_dhcp_type(context, "", value);
+
+ if (value[0] == '[')
+ return parse_cmdline_ip_address(context, AF_INET6, value);
+
+ r = parse_cmdline_ip_address(context, AF_INET, value);
+ if (r < 0)
+ return parse_cmdline_ip_interface(context, value);
+
+ return 0;
+}
+
+static int parse_cmdline_rd_route(Context *context, const char *key, const char *value) {
+ union in_addr_union addr = {}, gateway = {};
+ unsigned char prefixlen;
+ const char *buf, *p;
+ int family, r;
+
+ /* rd.route=<net>/<netmask>:<gateway>[:<interface>] */
+
+ if (proc_cmdline_value_missing(key, value))
+ return -EINVAL;
+
+ if (value[0] == '[') {
+ p = strchr(value, ']');
+ if (!p)
+ return -EINVAL;
+
+ if (p[1] != ':')
+ return -EINVAL;
+
+ buf = strndupa(value + 1, p - value - 1);
+ value = p + 2;
+ family = AF_INET6;
+ } else {
+ p = strchr(value, ':');
+ if (!p)
+ return -EINVAL;
+
+ buf = strndupa(value, p - value);
+ value = p + 1;
+ family = AF_INET;
+ }
+
+ r = in_addr_prefix_from_string(buf, family, &addr, &prefixlen);
+ if (r < 0)
+ return r;
+
+ p = strchr(value, ':');
+ if (!p)
+ value = strjoina(value, ":");
+
+ r = parse_ip_address_one(family, &value, &gateway);
+ if (r < 0)
+ return r;
+
+ return network_set_route(context, value, family, prefixlen, &addr, &gateway);
+}
+
+static int parse_cmdline_nameserver(Context *context, const char *key, const char *value) {
+ if (proc_cmdline_value_missing(key, value))
+ return -EINVAL;
+
+ return network_set_dns(context, "", value);
+}
+
+static int parse_cmdline_rd_peerdns(Context *context, const char *key, const char *value) {
+ int r;
+
+ if (proc_cmdline_value_missing(key, value))
+ return network_set_dhcp_use_dns(context, "", true);
+
+ r = parse_boolean(value);
+ if (r < 0)
+ return r;
+
+ return network_set_dhcp_use_dns(context, "", r);
+}
+
+static int parse_cmdline_vlan(Context *context, const char *key, const char *value) {
+ const char *name, *p;
+ NetDev *netdev;
+ int r;
+
+ if (proc_cmdline_value_missing(key, value))
+ return -EINVAL;
+
+ p = strchr(value, ':');
+ if (!p)
+ return -EINVAL;
+
+ name = strndupa(value, p - value);
+
+ netdev = netdev_get(context, name);
+ if (!netdev) {
+ r = netdev_new(context, "vlan", name, &netdev);
+ if (r < 0)
+ return r;
+ }
+
+ return network_set_vlan(context, p + 1, name);
+}
+
+static int parse_cmdline_bridge(Context *context, const char *key, const char *value) {
+ const char *name, *p;
+ NetDev *netdev;
+ int r;
+
+ if (proc_cmdline_value_missing(key, value))
+ return -EINVAL;
+
+ p = strchr(value, ':');
+ if (!p)
+ return -EINVAL;
+
+ name = strndupa(value, p - value);
+
+ netdev = netdev_get(context, name);
+ if (!netdev) {
+ r = netdev_new(context, "bridge", name, &netdev);
+ if (r < 0)
+ return r;
+ }
+
+ p++;
+ if (isempty(p))
+ return -EINVAL;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, ",", 0);
+ if (r <= 0)
+ return r;
+
+ r = network_set_bridge(context, word, name);
+ if (r < 0)
+ return r;
+ }
+}
+
+static int parse_cmdline_bond(Context *context, const char *key, const char *value) {
+ const char *name, *slaves, *p;
+ NetDev *netdev;
+ int r;
+
+ if (proc_cmdline_value_missing(key, value))
+ return -EINVAL;
+
+ p = strchr(value, ':');
+ if (!p)
+ return -EINVAL;
+
+ name = strndupa(value, p - value);
+
+ netdev = netdev_get(context, name);
+ if (!netdev) {
+ r = netdev_new(context, "bond", name, &netdev);
+ if (r < 0)
+ return r;
+ }
+
+ value = p + 1;
+ p = strchr(value, ':');
+ if (!p)
+ slaves = value;
+ else
+ slaves = strndupa(value, p - value);
+
+ if (isempty(slaves))
+ return -EINVAL;
+
+ for (const char *q = slaves; ; ) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&q, &word, ",", 0);
+ if (r == 0)
+ break;
+ if (r < 0)
+ return r;
+
+ r = network_set_bond(context, word, name);
+ if (r < 0)
+ return r;
+ }
+
+ if (!p)
+ return 0;
+
+ value = p + 1;
+ p = strchr(value, ':');
+ if (!p)
+ /* TODO: set bonding options */
+ return 0;
+
+ return parse_mtu(AF_UNSPEC, p + 1, &netdev->mtu);
+}
+
+static int parse_cmdline_ifname(Context *context, const char *key, const char *value) {
+ struct ether_addr mac;
+ const char *name, *p;
+ int r;
+
+ /* ifname=<interface>:<MAC> */
+
+ if (proc_cmdline_value_missing(key, value))
+ return -EINVAL;
+
+ p = strchr(value, ':');
+ if (!p)
+ return -EINVAL;
+
+ name = strndupa(value, p - value);
+
+ r = ether_addr_from_string(p + 1, &mac);
+ if (r < 0)
+ return r;
+
+ return link_new(context, name, &mac, NULL);
+}
+
+int parse_cmdline_item(const char *key, const char *value, void *data) {
+ Context *context = data;
+
+ assert(key);
+ assert(data);
+
+ if (streq(key, "ip"))
+ return parse_cmdline_ip(context, key, value);
+ if (streq(key, "rd.route"))
+ return parse_cmdline_rd_route(context, key, value);
+ if (streq(key, "nameserver"))
+ return parse_cmdline_nameserver(context, key, value);
+ if (streq(key, "rd.peerdns"))
+ return parse_cmdline_rd_peerdns(context, key, value);
+ if (streq(key, "vlan"))
+ return parse_cmdline_vlan(context, key, value);
+ if (streq(key, "bridge"))
+ return parse_cmdline_bridge(context, key, value);
+ if (streq(key, "bond"))
+ return parse_cmdline_bond(context, key, value);
+ if (streq(key, "ifname"))
+ return parse_cmdline_ifname(context, key, value);
+
+ return 0;
+}
+
+int context_merge_networks(Context *context) {
+ Network *all, *network;
+ Route *route;
+ int r;
+
+ assert(context);
+
+ /* Copy settings about the following options
+ rd.route=<net>/<netmask>:<gateway>[:<interface>]
+ nameserver=<IP> [nameserver=<IP> ...]
+ rd.peerdns=0 */
+
+ all = network_get(context, "");
+ if (!all)
+ return 0;
+
+ if (hashmap_size(context->networks_by_name) <= 1)
+ return 0;
+
+ HASHMAP_FOREACH(network, context->networks_by_name) {
+ if (network == all)
+ continue;
+
+ network->dhcp_use_dns = all->dhcp_use_dns;
+
+ r = strv_extend_strv(&network->dns, all->dns, false);
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(routes, route, all->routes) {
+ r = route_new(network, route->family, route->prefixlen, &route->dest, &route->gateway, NULL);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ assert_se(hashmap_remove(context->networks_by_name, "") == all);
+ network_free(all);
+ return 0;
+}
+
+void context_clear(Context *context) {
+ if (!context)
+ return;
+
+ hashmap_free_with_destructor(context->networks_by_name, network_free);
+ hashmap_free_with_destructor(context->netdevs_by_name, netdev_free);
+ hashmap_free_with_destructor(context->links_by_name, link_free);
+}
+
+static int address_dump(Address *address, FILE *f) {
+ _cleanup_free_ char *addr = NULL, *peer = NULL;
+ int r;
+
+ r = in_addr_prefix_to_string(address->family, &address->address, address->prefixlen, &addr);
+ if (r < 0)
+ return r;
+
+ if (in_addr_is_null(address->family, &address->peer) == 0) {
+ r = in_addr_to_string(address->family, &address->peer, &peer);
+ if (r < 0)
+ return r;
+ }
+
+ fprintf(f,
+ "\n[Address]\n"
+ "Address=%s\n",
+ addr);
+
+ if (peer)
+ fprintf(f, "Peer=%s\n", peer);
+
+ return 0;
+}
+
+static int route_dump(Route *route, FILE *f) {
+ _cleanup_free_ char *dest = NULL, *gateway = NULL;
+ int r;
+
+ if (in_addr_is_null(route->family, &route->dest) == 0) {
+ r = in_addr_prefix_to_string(route->family, &route->dest, route->prefixlen, &dest);
+ if (r < 0)
+ return r;
+ }
+
+ r = in_addr_to_string(route->family, &route->gateway, &gateway);
+ if (r < 0)
+ return r;
+
+ fputs("\n[Route]\n", f);
+ if (dest)
+ fprintf(f, "Destination=%s\n", dest);
+ fprintf(f, "Gateway=%s\n", gateway);
+
+ return 0;
+}
+
+void network_dump(Network *network, FILE *f) {
+ char mac[ETHER_ADDR_TO_STRING_MAX];
+ Address *address;
+ Route *route;
+ const char *dhcp;
+ char **dns;
+
+ assert(network);
+ assert(f);
+
+ fprintf(f,
+ "[Match]\n"
+ "Name=%s\n",
+ isempty(network->ifname) ? "*" : network->ifname);
+
+ fputs("\n[Link]\n", f);
+
+ if (!ether_addr_is_null(&network->mac))
+ fprintf(f, "MACAddress=%s\n", ether_addr_to_string(&network->mac, mac));
+ if (network->mtu > 0)
+ fprintf(f, "MTUBytes=%" PRIu32 "\n", network->mtu);
+
+ fputs("\n[Network]\n", f);
+
+ dhcp = networkd_dhcp_type_to_string(network->dhcp_type);
+ if (dhcp)
+ fprintf(f, "DHCP=%s\n", dhcp);
+
+ if (!strv_isempty(network->dns))
+ STRV_FOREACH(dns, network->dns)
+ fprintf(f, "DNS=%s\n", *dns);
+
+ if (network->vlan)
+ fprintf(f, "VLAN=%s\n", network->vlan);
+
+ if (network->bridge)
+ fprintf(f, "Bridge=%s\n", network->bridge);
+
+ if (network->bond)
+ fprintf(f, "Bond=%s\n", network->bond);
+
+ fputs("\n[DHCP]\n", f);
+
+ if (!isempty(network->hostname))
+ fprintf(f, "Hostname=%s\n", network->hostname);
+
+ if (network->dhcp_use_dns >= 0)
+ fprintf(f, "UseDNS=%s\n", yes_no(network->dhcp_use_dns));
+
+ LIST_FOREACH(addresses, address, network->addresses)
+ (void) address_dump(address, f);
+
+ LIST_FOREACH(routes, route, network->routes)
+ (void) route_dump(route, f);
+}
+
+void netdev_dump(NetDev *netdev, FILE *f) {
+ assert(netdev);
+ assert(f);
+
+ fprintf(f,
+ "[NetDev]\n"
+ "Kind=%s\n"
+ "Name=%s\n",
+ netdev->kind,
+ netdev->ifname);
+
+ if (netdev->mtu > 0)
+ fprintf(f, "MTUBytes=%" PRIu32 "\n", netdev->mtu);
+}
+
+void link_dump(Link *link, FILE *f) {
+ char mac[ETHER_ADDR_TO_STRING_MAX];
+
+ assert(link);
+ assert(f);
+
+ fputs("[Match]\n", f);
+
+ if (!ether_addr_is_null(&link->mac))
+ fprintf(f, "MACAddress=%s\n", ether_addr_to_string(&link->mac, mac));
+
+ fprintf(f,
+ "\n[Link]\n"
+ "Name=%s\n",
+ link->ifname);
+}
+
+int network_format(Network *network, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ size_t sz = 0;
+ int r;
+
+ assert(network);
+ assert(ret);
+
+ {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ f = open_memstream_unlocked(&s, &sz);
+ if (!f)
+ return -ENOMEM;
+
+ network_dump(network, f);
+
+ /* Add terminating 0, so that the output buffer is a valid string. */
+ fputc('\0', f);
+
+ r = fflush_and_check(f);
+ }
+ if (r < 0)
+ return r;
+
+ assert(s);
+ *ret = TAKE_PTR(s);
+ assert(sz > 0);
+ return (int) sz - 1;
+}
+
+int netdev_format(NetDev *netdev, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ size_t sz = 0;
+ int r;
+
+ assert(netdev);
+ assert(ret);
+
+ {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ f = open_memstream_unlocked(&s, &sz);
+ if (!f)
+ return -ENOMEM;
+
+ netdev_dump(netdev, f);
+
+ /* Add terminating 0, so that the output buffer is a valid string. */
+ fputc('\0', f);
+
+ r = fflush_and_check(f);
+ }
+ if (r < 0)
+ return r;
+
+ assert(s);
+ *ret = TAKE_PTR(s);
+ assert(sz > 0);
+ return (int) sz - 1;
+}
+
+int link_format(Link *link, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ size_t sz = 0;
+ int r;
+
+ assert(link);
+ assert(ret);
+
+ {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ f = open_memstream_unlocked(&s, &sz);
+ if (!f)
+ return -ENOMEM;
+
+ link_dump(link, f);
+
+ /* Add terminating 0, so that the output buffer is a valid string. */
+ fputc('\0', f);
+
+ r = fflush_and_check(f);
+ }
+ if (r < 0)
+ return r;
+
+ assert(s);
+ *ret = TAKE_PTR(s);
+ assert(sz > 0);
+ return (int) sz - 1;
+}
diff --git a/src/network/generator/network-generator.h b/src/network/generator/network-generator.h
new file mode 100644
index 0000000..86bcaec
--- /dev/null
+++ b/src/network/generator/network-generator.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <net/ethernet.h>
+#include <stdio.h>
+
+#include "hashmap.h"
+#include "in-addr-util.h"
+#include "list.h"
+
+typedef enum DHCPType {
+ DHCP_TYPE_NONE,
+ DHCP_TYPE_OFF,
+ DHCP_TYPE_ON,
+ DHCP_TYPE_ANY,
+ DHCP_TYPE_DHCP,
+ DHCP_TYPE_DHCP6,
+ DHCP_TYPE_AUTO6,
+ DHCP_TYPE_EITHER6,
+ DHCP_TYPE_IBFT,
+ _DHCP_TYPE_MAX,
+ _DHCP_TYPE_INVALID = -1,
+} DHCPType;
+
+typedef struct Address Address;
+typedef struct Link Link;
+typedef struct NetDev NetDev;
+typedef struct Network Network;
+typedef struct Route Route;
+typedef struct Context Context;
+
+struct Address {
+ Network *network;
+
+ union in_addr_union address, peer;
+ unsigned char prefixlen;
+ int family;
+
+ LIST_FIELDS(Address, addresses);
+};
+
+struct Route {
+ Network *network;
+
+ union in_addr_union dest, gateway;
+ unsigned char prefixlen;
+ int family;
+
+ LIST_FIELDS(Route, routes);
+};
+
+struct Network {
+ /* [Match] */
+ char *ifname;
+
+ /* [Link] */
+ struct ether_addr mac;
+ uint32_t mtu;
+
+ /* [Network] */
+ DHCPType dhcp_type;
+ char **dns;
+ char *vlan;
+ char *bridge;
+ char *bond;
+
+ /* [DHCP] */
+ char *hostname;
+ int dhcp_use_dns;
+
+ LIST_HEAD(Address, addresses);
+ LIST_HEAD(Route, routes);
+};
+
+struct NetDev {
+ /* [NetDev] */
+ char *ifname;
+ char *kind;
+ uint32_t mtu;
+};
+
+struct Link {
+ /* [Match] */
+ char *ifname;
+ struct ether_addr mac;
+};
+
+typedef struct Context {
+ Hashmap *networks_by_name;
+ Hashmap *netdevs_by_name;
+ Hashmap *links_by_name;
+} Context;
+
+int parse_cmdline_item(const char *key, const char *value, void *data);
+int context_merge_networks(Context *context);
+void context_clear(Context *context);
+
+Network *network_get(Context *context, const char *ifname);
+void network_dump(Network *network, FILE *f);
+int network_format(Network *network, char **ret);
+
+NetDev *netdev_get(Context *context, const char *ifname);
+void netdev_dump(NetDev *netdev, FILE *f);
+int netdev_format(NetDev *netdev, char **ret);
+
+Link *link_get(Context *context, const char *ifname);
+void link_dump(Link *link, FILE *f);
+int link_format(Link *link, char **ret);
diff --git a/src/network/generator/test-network-generator.c b/src/network/generator/test-network-generator.c
new file mode 100644
index 0000000..e658d89
--- /dev/null
+++ b/src/network/generator/test-network-generator.c
@@ -0,0 +1,438 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "macro.h"
+#include "network-generator.h"
+#include "string-util.h"
+
+static void test_network_one(const char *ifname, const char *key, const char *value, const char *expected) {
+ _cleanup_(context_clear) Context context = {};
+ _cleanup_free_ char *output = NULL;
+ Network *network;
+
+ printf("# %s=%s\n", key, value);
+ assert_se(parse_cmdline_item(key, value, &context) >= 0);
+ assert_se(network = network_get(&context, ifname));
+ assert_se(network_format(network, &output) >= 0);
+ puts(output);
+ assert_se(streq(output, expected));
+}
+
+static void test_network_two(const char *ifname,
+ const char *key1, const char *value1,
+ const char *key2, const char *value2,
+ const char *expected) {
+ _cleanup_(context_clear) Context context = {};
+ _cleanup_free_ char *output = NULL;
+ Network *network;
+
+ printf("# %s=%s\n", key1, value1);
+ printf("# %s=%s\n", key2, value2);
+ assert_se(parse_cmdline_item(key1, value1, &context) >= 0);
+ assert_se(parse_cmdline_item(key2, value2, &context) >= 0);
+ assert_se(context_merge_networks(&context) >= 0);
+ assert_se(network = network_get(&context, ifname));
+ assert_se(network_format(network, &output) >= 0);
+ puts(output);
+ assert_se(streq(output, expected));
+}
+
+static void test_netdev_one(const char *ifname, const char *key, const char *value, const char *expected) {
+ _cleanup_(context_clear) Context context = {};
+ _cleanup_free_ char *output = NULL;
+ NetDev *netdev;
+
+ printf("# %s=%s\n", key, value);
+ assert_se(parse_cmdline_item(key, value, &context) >= 0);
+ assert_se(netdev = netdev_get(&context, ifname));
+ assert_se(netdev_format(netdev, &output) >= 0);
+ puts(output);
+ assert_se(streq(output, expected));
+}
+
+static void test_link_one(const char *ifname, const char *key, const char *value, const char *expected) {
+ _cleanup_(context_clear) Context context = {};
+ _cleanup_free_ char *output = NULL;
+ Link *link;
+
+ printf("# %s=%s\n", key, value);
+ assert_se(parse_cmdline_item(key, value, &context) >= 0);
+ assert_se(link = link_get(&context, ifname));
+ assert_se(link_format(link, &output) >= 0);
+ puts(output);
+ assert_se(streq(output, expected));
+}
+
+int main(int argc, char *argv[]) {
+ test_network_one("", "ip", "dhcp6",
+ "[Match]\n"
+ "Name=*\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "DHCP=ipv6\n"
+ "\n[DHCP]\n"
+ );
+
+ test_network_one("eth0", "ip", "eth0:dhcp",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "DHCP=ipv4\n"
+ "\n[DHCP]\n"
+ );
+
+ test_network_one("eth0", "ip", "eth0:dhcp:1530",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "MTUBytes=1530\n"
+ "\n[Network]\n"
+ "DHCP=ipv4\n"
+ "\n[DHCP]\n"
+ );
+
+ test_network_one("eth0", "ip", "eth0:dhcp:1530:00:11:22:33:44:55",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "MACAddress=00:11:22:33:44:55\n"
+ "MTUBytes=1530\n"
+ "\n[Network]\n"
+ "DHCP=ipv4\n"
+ "\n[DHCP]\n"
+ );
+
+ test_network_one("eth0", "ip", "192.168.0.10::192.168.0.1:255.255.255.0:hogehoge:eth0:on",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "DHCP=yes\n"
+ "\n[DHCP]\n"
+ "Hostname=hogehoge\n"
+ "\n[Address]\n"
+ "Address=192.168.0.10/24\n"
+ "\n[Route]\n"
+ "Gateway=192.168.0.1\n"
+ );
+
+ test_network_one("eth0", "ip", "192.168.0.10:192.168.0.2:192.168.0.1:255.255.255.0:hogehoge:eth0:on",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "DHCP=yes\n"
+ "\n[DHCP]\n"
+ "Hostname=hogehoge\n"
+ "\n[Address]\n"
+ "Address=192.168.0.10/24\n"
+ "Peer=192.168.0.2\n"
+ "\n[Route]\n"
+ "Gateway=192.168.0.1\n"
+ );
+
+ test_network_one("eth0", "ip", "192.168.0.10:192.168.0.2:192.168.0.1:255.255.255.0:hogehoge:eth0:on:1530",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "MTUBytes=1530\n"
+ "\n[Network]\n"
+ "DHCP=yes\n"
+ "\n[DHCP]\n"
+ "Hostname=hogehoge\n"
+ "\n[Address]\n"
+ "Address=192.168.0.10/24\n"
+ "Peer=192.168.0.2\n"
+ "\n[Route]\n"
+ "Gateway=192.168.0.1\n"
+ );
+
+ test_network_one("eth0", "ip", "192.168.0.10:192.168.0.2:192.168.0.1:255.255.255.0:hogehoge:eth0:on:1530:00:11:22:33:44:55",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "MACAddress=00:11:22:33:44:55\n"
+ "MTUBytes=1530\n"
+ "\n[Network]\n"
+ "DHCP=yes\n"
+ "\n[DHCP]\n"
+ "Hostname=hogehoge\n"
+ "\n[Address]\n"
+ "Address=192.168.0.10/24\n"
+ "Peer=192.168.0.2\n"
+ "\n[Route]\n"
+ "Gateway=192.168.0.1\n"
+ );
+
+ test_network_one("eth0", "ip", "192.168.0.10:192.168.0.2:192.168.0.1:255.255.255.0:hogehoge:eth0:on:10.10.10.10",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "DHCP=yes\n"
+ "DNS=10.10.10.10\n"
+ "\n[DHCP]\n"
+ "Hostname=hogehoge\n"
+ "\n[Address]\n"
+ "Address=192.168.0.10/24\n"
+ "Peer=192.168.0.2\n"
+ "\n[Route]\n"
+ "Gateway=192.168.0.1\n"
+ );
+
+ test_network_one("eth0", "ip", "192.168.0.10:192.168.0.2:192.168.0.1:255.255.255.0:hogehoge:eth0:on:10.10.10.10:10.10.10.11",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "DHCP=yes\n"
+ "DNS=10.10.10.10\n"
+ "DNS=10.10.10.11\n"
+ "\n[DHCP]\n"
+ "Hostname=hogehoge\n"
+ "\n[Address]\n"
+ "Address=192.168.0.10/24\n"
+ "Peer=192.168.0.2\n"
+ "\n[Route]\n"
+ "Gateway=192.168.0.1\n"
+ );
+
+ test_network_one("eth0", "ip", "[2001:1234:56:8f63::10]::[2001:1234:56:8f63::1]:64:hogehoge:eth0:on",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "DHCP=yes\n"
+ "\n[DHCP]\n"
+ "Hostname=hogehoge\n"
+ "\n[Address]\n"
+ "Address=2001:1234:56:8f63::10/64\n"
+ "\n[Route]\n"
+ "Gateway=2001:1234:56:8f63::1\n"
+ );
+
+ test_network_one("eth0", "ip", "[2001:1234:56:8f63::10]:[2001:1234:56:8f63::2]:[2001:1234:56:8f63::1]:64:hogehoge:eth0:on",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "DHCP=yes\n"
+ "\n[DHCP]\n"
+ "Hostname=hogehoge\n"
+ "\n[Address]\n"
+ "Address=2001:1234:56:8f63::10/64\n"
+ "Peer=2001:1234:56:8f63::2\n"
+ "\n[Route]\n"
+ "Gateway=2001:1234:56:8f63::1\n"
+ );
+
+ test_network_one("", "rd.route", "10.1.2.3/16:10.0.2.3",
+ "[Match]\n"
+ "Name=*\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "\n[DHCP]\n"
+ "\n[Route]\n"
+ "Destination=10.1.2.3/16\n"
+ "Gateway=10.0.2.3\n"
+ );
+
+ test_network_one("eth0", "rd.route", "10.1.2.3/16:10.0.2.3:eth0",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "\n[DHCP]\n"
+ "\n[Route]\n"
+ "Destination=10.1.2.3/16\n"
+ "Gateway=10.0.2.3\n"
+ );
+
+ test_network_one("", "nameserver", "10.1.2.3",
+ "[Match]\n"
+ "Name=*\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "DNS=10.1.2.3\n"
+ "\n[DHCP]\n"
+ );
+
+ test_network_one("", "rd.peerdns", "0",
+ "[Match]\n"
+ "Name=*\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "\n[DHCP]\n"
+ "UseDNS=no\n"
+ );
+
+ test_network_one("", "rd.peerdns", "1",
+ "[Match]\n"
+ "Name=*\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "\n[DHCP]\n"
+ "UseDNS=yes\n"
+ );
+
+ test_network_one("eth0", "vlan", "vlan99:eth0",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "VLAN=vlan99\n"
+ "\n[DHCP]\n"
+ );
+
+ test_network_one("eth0", "bridge", "bridge99:eth0,eth1",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "Bridge=bridge99\n"
+ "\n[DHCP]\n"
+ );
+
+ test_network_one("eth1", "bridge", "bridge99:eth0,eth1",
+ "[Match]\n"
+ "Name=eth1\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "Bridge=bridge99\n"
+ "\n[DHCP]\n"
+ );
+
+ test_network_one("eth0", "bond", "bond99:eth0,eth1",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "Bond=bond99\n"
+ "\n[DHCP]\n"
+ );
+
+ test_network_one("eth1", "bond", "bond99:eth0,eth1::1530",
+ "[Match]\n"
+ "Name=eth1\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "Bond=bond99\n"
+ "\n[DHCP]\n"
+ );
+
+ test_netdev_one("bond99", "bond", "bond99:eth0,eth1::1530",
+ "[NetDev]\n"
+ "Kind=bond\n"
+ "Name=bond99\n"
+ "MTUBytes=1530\n"
+ );
+
+ test_link_one("hogehoge", "ifname", "hogehoge:00:11:22:33:44:55",
+ "[Match]\n"
+ "MACAddress=00:11:22:33:44:55\n"
+ "\n[Link]\n"
+ "Name=hogehoge\n"
+ );
+
+ test_network_two("eth0",
+ "ip", "192.168.0.10:192.168.0.2:192.168.0.1:255.255.255.0:hogehoge:eth0:on:10.10.10.10:10.10.10.11",
+ "rd.route", "10.1.2.3/16:10.0.2.3",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "DHCP=yes\n"
+ "DNS=10.10.10.10\n"
+ "DNS=10.10.10.11\n"
+ "\n[DHCP]\n"
+ "Hostname=hogehoge\n"
+ "\n[Address]\n"
+ "Address=192.168.0.10/24\n"
+ "Peer=192.168.0.2\n"
+ "\n[Route]\n"
+ "Destination=10.1.2.3/16\n"
+ "Gateway=10.0.2.3\n"
+ "\n[Route]\n"
+ "Gateway=192.168.0.1\n"
+ );
+
+ test_network_two("eth0",
+ "ip", "192.168.0.10:192.168.0.2:192.168.0.1:255.255.255.0:hogehoge:eth0:on",
+ "nameserver", "10.1.2.3",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "DHCP=yes\n"
+ "DNS=10.1.2.3\n"
+ "\n[DHCP]\n"
+ "Hostname=hogehoge\n"
+ "\n[Address]\n"
+ "Address=192.168.0.10/24\n"
+ "Peer=192.168.0.2\n"
+ "\n[Route]\n"
+ "Gateway=192.168.0.1\n"
+ );
+
+ test_network_two("eth0",
+ "ip", "192.168.0.10:192.168.0.2:192.168.0.1:255.255.255.0:hogehoge:eth0:on:10.10.10.10:10.10.10.11",
+ "nameserver", "10.1.2.3",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "DHCP=yes\n"
+ "DNS=10.10.10.10\n"
+ "DNS=10.10.10.11\n"
+ "DNS=10.1.2.3\n"
+ "\n[DHCP]\n"
+ "Hostname=hogehoge\n"
+ "\n[Address]\n"
+ "Address=192.168.0.10/24\n"
+ "Peer=192.168.0.2\n"
+ "\n[Route]\n"
+ "Gateway=192.168.0.1\n"
+ );
+
+ test_network_two("eth0",
+ "ip", "192.168.0.10:192.168.0.2:192.168.0.1:255.255.255.0:hogehoge:eth0:on:10.10.10.10:10.10.10.11",
+ "rd.peerdns", "1",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "DHCP=yes\n"
+ "DNS=10.10.10.10\n"
+ "DNS=10.10.10.11\n"
+ "\n[DHCP]\n"
+ "Hostname=hogehoge\n"
+ "UseDNS=yes\n"
+ "\n[Address]\n"
+ "Address=192.168.0.10/24\n"
+ "Peer=192.168.0.2\n"
+ "\n[Route]\n"
+ "Gateway=192.168.0.1\n"
+ );
+
+ test_network_two("eth0",
+ "ip", "192.168.0.10:192.168.0.2:192.168.0.1:255.255.255.0:hogehoge:eth0:on:10.10.10.10:10.10.10.11",
+ "bridge", "bridge99:eth0,eth1",
+ "[Match]\n"
+ "Name=eth0\n"
+ "\n[Link]\n"
+ "\n[Network]\n"
+ "DHCP=yes\n"
+ "DNS=10.10.10.10\n"
+ "DNS=10.10.10.11\n"
+ "Bridge=bridge99\n"
+ "\n[DHCP]\n"
+ "Hostname=hogehoge\n"
+ "\n[Address]\n"
+ "Address=192.168.0.10/24\n"
+ "Peer=192.168.0.2\n"
+ "\n[Route]\n"
+ "Gateway=192.168.0.1\n"
+ );
+
+ return 0;
+}
diff --git a/src/network/meson.build b/src/network/meson.build
new file mode 100644
index 0000000..f5ca183
--- /dev/null
+++ b/src/network/meson.build
@@ -0,0 +1,303 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+sources = files('''
+ netdev/bareudp.c
+ netdev/bareudp.h
+ netdev/bond.c
+ netdev/bond.h
+ netdev/bridge.c
+ netdev/bridge.h
+ netdev/dummy.c
+ netdev/dummy.h
+ netdev/ifb.c
+ netdev/ifb.h
+ netdev/ipvlan.c
+ netdev/ipvlan.h
+ netdev/macvlan.c
+ netdev/macvlan.h
+ netdev/netdev.c
+ netdev/netdev.h
+ netdev/nlmon.c
+ netdev/nlmon.h
+ netdev/tunnel.c
+ netdev/tunnel.h
+ netdev/tuntap.c
+ netdev/tuntap.h
+ netdev/vcan.c
+ netdev/vcan.h
+ netdev/veth.c
+ netdev/veth.h
+ netdev/vlan.c
+ netdev/vlan.h
+ netdev/vrf.c
+ netdev/vrf.h
+ netdev/vxlan.c
+ netdev/vxlan.h
+ netdev/geneve.c
+ netdev/geneve.h
+ netdev/vxcan.c
+ netdev/vxcan.h
+ netdev/wireguard.c
+ netdev/wireguard.h
+ netdev/netdevsim.c
+ netdev/netdevsim.h
+ netdev/fou-tunnel.c
+ netdev/fou-tunnel.h
+ netdev/l2tp-tunnel.c
+ netdev/l2tp-tunnel.h
+ netdev/macsec.c
+ netdev/macsec.h
+ netdev/xfrm.c
+ netdev/xfrm.h
+ networkd-address-label.c
+ networkd-address-label.h
+ networkd-address-pool.c
+ networkd-address-pool.h
+ networkd-address.c
+ networkd-address.h
+ networkd-brvlan.c
+ networkd-brvlan.h
+ networkd-can.c
+ networkd-can.h
+ networkd-conf.c
+ networkd-conf.h
+ networkd-dhcp-common.c
+ networkd-dhcp-common.h
+ networkd-dhcp-server-bus.c
+ networkd-dhcp-server-bus.h
+ networkd-dhcp-server.c
+ networkd-dhcp-server.h
+ networkd-dhcp4.c
+ networkd-dhcp4.h
+ networkd-dhcp6.c
+ networkd-dhcp6.h
+ networkd-fdb.c
+ networkd-fdb.h
+ networkd-ipv4ll.c
+ networkd-ipv4ll.h
+ networkd-ipv6-proxy-ndp.c
+ networkd-ipv6-proxy-ndp.h
+ networkd-link-bus.c
+ networkd-link-bus.h
+ networkd-link.c
+ networkd-link.h
+ networkd-lldp-rx.c
+ networkd-lldp-rx.h
+ networkd-lldp-tx.c
+ networkd-lldp-tx.h
+ networkd-manager-bus.c
+ networkd-manager-bus.h
+ networkd-manager.c
+ networkd-manager.h
+ networkd-mdb.c
+ networkd-mdb.h
+ networkd-ndisc.c
+ networkd-ndisc.h
+ networkd-neighbor.c
+ networkd-neighbor.h
+ networkd-radv.c
+ networkd-radv.h
+ networkd-network-bus.c
+ networkd-network-bus.h
+ networkd-network.c
+ networkd-network.h
+ networkd-nexthop.c
+ networkd-nexthop.h
+ networkd-route.c
+ networkd-route.h
+ networkd-routing-policy-rule.c
+ networkd-routing-policy-rule.h
+ networkd-speed-meter.c
+ networkd-speed-meter.h
+ networkd-sriov.c
+ networkd-sriov.h
+ networkd-sysctl.c
+ networkd-sysctl.h
+ networkd-util.c
+ networkd-util.h
+ networkd-wifi.c
+ networkd-wifi.h
+ tc/cake.c
+ tc/cake.h
+ tc/codel.c
+ tc/codel.h
+ tc/drr.c
+ tc/drr.h
+ tc/ets.c
+ tc/ets.h
+ tc/fifo.c
+ tc/fifo.h
+ tc/fq.c
+ tc/fq.h
+ tc/fq-codel.c
+ tc/fq-codel.h
+ tc/fq-pie.c
+ tc/fq-pie.h
+ tc/gred.c
+ tc/gred.h
+ tc/hhf.c
+ tc/hhf.h
+ tc/htb.c
+ tc/htb.h
+ tc/netem.c
+ tc/netem.h
+ tc/pie.c
+ tc/pie.h
+ tc/qdisc.c
+ tc/qdisc.h
+ tc/qfq.c
+ tc/qfq.h
+ tc/sfb.c
+ tc/sfb.h
+ tc/sfq.c
+ tc/sfq.h
+ tc/tbf.c
+ tc/tbf.h
+ tc/tc-util.c
+ tc/tc-util.h
+ tc/tc.c
+ tc/tc.h
+ tc/tclass.c
+ tc/tclass.h
+ tc/teql.c
+ tc/teql.h
+'''.split())
+
+systemd_networkd_sources = files('networkd.c')
+
+systemd_networkd_wait_online_sources = files('''
+ wait-online/link.c
+ wait-online/link.h
+ wait-online/manager.c
+ wait-online/manager.h
+ wait-online/wait-online.c
+'''.split()) + network_internal_h
+
+networkctl_sources = files('networkctl.c')
+
+network_generator_sources = files('''
+ generator/main.c
+ generator/network-generator.c
+ generator/network-generator.h
+'''.split())
+
+network_include_dir = [includes, include_directories(['.', 'netdev', 'tc'])]
+
+if conf.get('ENABLE_NETWORKD') == 1
+ if get_option('link-networkd-shared')
+ networkd_link_with = [libshared]
+ else
+ networkd_link_with = [libsystemd_static,
+ libshared_static,
+ libjournal_client,
+ libbasic_gcrypt]
+ endif
+
+ networkd_gperf_c = custom_target(
+ 'networkd-gperf.c',
+ input : 'networkd-gperf.gperf',
+ output : 'networkd-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+ networkd_network_gperf_c = custom_target(
+ 'networkd-network-gperf.c',
+ input : 'networkd-network-gperf.gperf',
+ output : 'networkd-network-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+ netdev_gperf_c = custom_target(
+ 'netdev-gperf.c',
+ input : 'netdev/netdev-gperf.gperf',
+ output : 'netdev-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+ libnetworkd_core = static_library(
+ 'networkd-core',
+ sources,
+ network_internal_h,
+ networkd_gperf_c,
+ networkd_network_gperf_c,
+ netdev_gperf_c,
+ include_directories : network_include_dir,
+ link_with : [networkd_link_with])
+
+ install_data('org.freedesktop.network1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.network1.service',
+ install_dir : dbussystemservicedir)
+ install_data('org.freedesktop.network1.policy',
+ install_dir : polkitpolicydir)
+ if install_polkit
+ install_data('systemd-networkd.rules',
+ install_dir : polkitrulesdir)
+ endif
+ if install_polkit_pkla
+ install_data('systemd-networkd.pkla',
+ install_dir : polkitpkladir)
+ endif
+
+ if install_sysconfdir
+ install_data('networkd.conf',
+ install_dir : pkgsysconfdir)
+ endif
+
+ fuzzers += [
+ [['src/network/fuzz-netdev-parser.c',
+ 'src/fuzz/fuzz.h'],
+ [libnetworkd_core,
+ libudev_static,
+ libsystemd_network,
+ networkd_link_with],
+ [threads],
+ [],
+ network_include_dir],
+
+ [['src/network/fuzz-network-parser.c',
+ 'src/fuzz/fuzz.h'],
+ [libnetworkd_core,
+ libudev_static,
+ libsystemd_network,
+ networkd_link_with],
+ [threads],
+ [],
+ network_include_dir],
+ ]
+
+ tests += [
+ [['src/network/test-networkd-conf.c'],
+ [libnetworkd_core,
+ libsystemd_network,
+ libudev],
+ [], '', '', [], network_include_dir],
+
+ [['src/network/test-network.c'],
+ [libnetworkd_core,
+ libudev_static,
+ libsystemd_network,
+ networkd_link_with],
+ [threads],
+ '', '', [], network_include_dir],
+
+ [['src/network/test-routing-policy-rule.c'],
+ [libnetworkd_core,
+ libsystemd_network,
+ libudev],
+ [], '', '', [], network_include_dir],
+
+ [['src/network/test-network-tables.c',
+ test_tables_h],
+ [libnetworkd_core,
+ libudev_static,
+ libsystemd_network,
+ networkd_link_with],
+ [threads],
+ '', '', [],
+ [network_include_dir]],
+
+ [['src/network/generator/test-network-generator.c',
+ 'src/network/generator/network-generator.c',
+ 'src/network/generator/network-generator.h'],
+ [networkd_link_with],
+ [], '', '', [], network_include_dir],
+ ]
+endif
diff --git a/src/network/netdev/bareudp.c b/src/network/netdev/bareudp.c
new file mode 100644
index 0000000..22c0e49
--- /dev/null
+++ b/src/network/netdev/bareudp.c
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+
+#include "bareudp.h"
+#include "netlink-util.h"
+#include "networkd-manager.h"
+#include "string-table.h"
+
+static const char* const bare_udp_protocol_table[_BARE_UDP_PROTOCOL_MAX] = {
+ [BARE_UDP_PROTOCOL_IPV4] = "ipv4",
+ [BARE_UDP_PROTOCOL_IPV6] = "ipv6",
+ [BARE_UDP_PROTOCOL_MPLS_UC] = "mpls-uc",
+ [BARE_UDP_PROTOCOL_MPLS_MC] = "mpls-mc",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bare_udp_protocol, BareUDPProtocol);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_bare_udp_iftype, bare_udp_protocol, BareUDPProtocol,
+ "Failed to parse EtherType=");
+
+/* callback for bareudp netdev's created without a backing Link */
+static int bare_udp_netdev_create_handler(sd_netlink *rtnl, sd_netlink_message *m, NetDev *netdev) {
+ int r;
+
+ assert(netdev);
+ assert(netdev->state != _NETDEV_STATE_INVALID);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r == -EEXIST)
+ log_netdev_info(netdev, "BareUDP netdev exists, using existing without changing its parameters.");
+ else if (r < 0) {
+ log_netdev_warning_errno(netdev, r, "BareUDP netdev could not be created: %m");
+ netdev_drop(netdev);
+
+ return 1;
+ }
+
+ log_netdev_debug(netdev, "BareUDP created.");
+
+ return 1;
+}
+
+static int netdev_bare_udp_create(NetDev *netdev) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ BareUDP *u;
+ int r;
+
+ assert(netdev);
+
+ u = BAREUDP(netdev);
+
+ assert(u);
+
+ r = sd_rtnl_message_new_link(netdev->manager->rtnl, &m, RTM_NEWLINK, 0);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not allocate RTM_NEWLINK message: %m");
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, netdev->ifname);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IFNAME, attribute: %m");
+
+ r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_LINKINFO attribute: %m");
+
+ r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, netdev_kind_to_string(netdev->kind));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ r = sd_netlink_message_append_u16(m, IFLA_BAREUDP_ETHERTYPE, htobe16(u->iftype));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BAREUDP_ETHERTYPE attribute: %m");
+
+ r = sd_netlink_message_append_u16(m, IFLA_BAREUDP_PORT, htobe16(u->dest_port));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BAREUDP_PORT attribute: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_LINKINFO attribute: %m");
+
+ r = netlink_call_async(netdev->manager->rtnl, NULL, m, bare_udp_netdev_create_handler,
+ netdev_destroy_callback, netdev);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not send rtnetlink message: %m");
+
+ netdev_ref(netdev);
+ netdev->state = NETDEV_STATE_CREATING;
+
+ log_netdev_debug(netdev, "Creating");
+
+ return r;
+}
+
+static int netdev_bare_udp_verify(NetDev *netdev, const char *filename) {
+ BareUDP *u;
+
+ assert(netdev);
+ assert(filename);
+
+ u = BAREUDP(netdev);
+
+ assert(u);
+
+ if (u->dest_port == 0)
+ return log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: BareUDP DesinationPort= is not set. Ignoring.", filename);
+
+ if (u->iftype == _BARE_UDP_PROTOCOL_INVALID)
+ return log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: BareUDP EtherType= is not set. Ignoring.", filename);
+
+ return 0;
+}
+
+static void bare_udp_init(NetDev *netdev) {
+ BareUDP *u;
+
+ assert(netdev);
+
+ u = BAREUDP(netdev);
+
+ assert(u);
+
+ u->iftype = _BARE_UDP_PROTOCOL_INVALID;
+}
+
+const NetDevVTable bare_udp_vtable = {
+ .object_size = sizeof(BareUDP),
+ .sections = NETDEV_COMMON_SECTIONS "BareUDP\0",
+ .init = bare_udp_init,
+ .config_verify = netdev_bare_udp_verify,
+ .create = netdev_bare_udp_create,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+};
diff --git a/src/network/netdev/bareudp.h b/src/network/netdev/bareudp.h
new file mode 100644
index 0000000..ea80bbf
--- /dev/null
+++ b/src/network/netdev/bareudp.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+#pragma once
+
+typedef struct BareUDP BareUDP;
+
+#include <linux/if_ether.h>
+
+#include "conf-parser.h"
+#include "netdev.h"
+
+typedef enum BareUDPProtocol {
+ BARE_UDP_PROTOCOL_IPV4 = ETH_P_IP,
+ BARE_UDP_PROTOCOL_IPV6 = ETH_P_IPV6,
+ BARE_UDP_PROTOCOL_MPLS_UC = ETH_P_MPLS_UC,
+ BARE_UDP_PROTOCOL_MPLS_MC = ETH_P_MPLS_MC,
+ _BARE_UDP_PROTOCOL_MAX,
+ _BARE_UDP_PROTOCOL_INVALID = -1
+} BareUDPProtocol;
+
+struct BareUDP {
+ NetDev meta;
+
+ BareUDPProtocol iftype;
+ uint16_t dest_port;
+};
+
+DEFINE_NETDEV_CAST(BAREUDP, BareUDP);
+extern const NetDevVTable bare_udp_vtable;
+
+const char *bare_udp_protocol_to_string(BareUDPProtocol d) _const_;
+BareUDPProtocol bare_udp_protocol_from_string(const char *d) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_bare_udp_iftype);
diff --git a/src/network/netdev/bond.c b/src/network/netdev/bond.c
new file mode 100644
index 0000000..e27f360
--- /dev/null
+++ b/src/network/netdev/bond.c
@@ -0,0 +1,527 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bond.h"
+#include "bond-util.h"
+#include "conf-parser.h"
+#include "ether-addr-util.h"
+#include "extract-word.h"
+#include "netlink-util.h"
+#include "networkd-manager.h"
+#include "string-table.h"
+
+/*
+ * Number of seconds between instances where the bonding
+ * driver sends learning packets to each slaves peer switch
+ */
+#define LEARNING_PACKETS_INTERVAL_MIN_SEC (1 * USEC_PER_SEC)
+#define LEARNING_PACKETS_INTERVAL_MAX_SEC (0x7fffffff * USEC_PER_SEC)
+
+/* Number of IGMP membership reports to be issued after
+ * a failover event.
+ */
+#define RESEND_IGMP_MIN 0
+#define RESEND_IGMP_MAX 255
+#define RESEND_IGMP_DEFAULT 1
+
+/*
+ * Number of packets to transmit through a slave before
+ * moving to the next one.
+ */
+#define PACKETS_PER_SLAVE_MIN 0
+#define PACKETS_PER_SLAVE_MAX 65535
+#define PACKETS_PER_SLAVE_DEFAULT 1
+
+/*
+ * Number of peer notifications (gratuitous ARPs and
+ * unsolicited IPv6 Neighbor Advertisements) to be issued after a
+ * failover event.
+ */
+#define GRATUITOUS_ARP_MIN 0
+#define GRATUITOUS_ARP_MAX 255
+#define GRATUITOUS_ARP_DEFAULT 1
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_bond_mode, bond_mode, BondMode, "Failed to parse bond mode");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_bond_xmit_hash_policy,
+ bond_xmit_hash_policy,
+ BondXmitHashPolicy,
+ "Failed to parse bond transmit hash policy");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_bond_lacp_rate, bond_lacp_rate, BondLacpRate, "Failed to parse bond lacp rate");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_bond_ad_select, bond_ad_select, BondAdSelect, "Failed to parse bond AD select");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_bond_fail_over_mac, bond_fail_over_mac, BondFailOverMac, "Failed to parse bond fail over MAC");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_bond_arp_validate, bond_arp_validate, BondArpValidate, "Failed to parse bond arp validate");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_bond_arp_all_targets, bond_arp_all_targets, BondArpAllTargets, "Failed to parse bond Arp all targets");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_bond_primary_reselect, bond_primary_reselect, BondPrimaryReselect, "Failed to parse bond primary reselect");
+
+static int netdev_bond_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ Bond *b;
+ int r;
+
+ assert(netdev);
+ assert(!link);
+ assert(m);
+
+ b = BOND(netdev);
+
+ assert(b);
+
+ if (b->mode != _NETDEV_BOND_MODE_INVALID) {
+ r = sd_netlink_message_append_u8(m, IFLA_BOND_MODE, b->mode);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_MODE attribute: %m");
+ }
+
+ if (b->xmit_hash_policy != _NETDEV_BOND_XMIT_HASH_POLICY_INVALID) {
+ r = sd_netlink_message_append_u8(m, IFLA_BOND_XMIT_HASH_POLICY, b->xmit_hash_policy);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_XMIT_HASH_POLICY attribute: %m");
+ }
+
+ if (b->lacp_rate != _NETDEV_BOND_LACP_RATE_INVALID &&
+ b->mode == NETDEV_BOND_MODE_802_3AD) {
+ r = sd_netlink_message_append_u8(m, IFLA_BOND_AD_LACP_RATE, b->lacp_rate);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_AD_LACP_RATE attribute: %m");
+ }
+
+ if (b->miimon != 0) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_MIIMON, b->miimon / USEC_PER_MSEC);
+ if (r < 0)
+ log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_BOND_MIIMON attribute: %m");
+ }
+
+ if (b->downdelay != 0) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_DOWNDELAY, b->downdelay / USEC_PER_MSEC);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_DOWNDELAY attribute: %m");
+ }
+
+ if (b->updelay != 0) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_UPDELAY, b->updelay / USEC_PER_MSEC);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_UPDELAY attribute: %m");
+ }
+
+ if (b->arp_interval != 0) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_ARP_INTERVAL, b->arp_interval / USEC_PER_MSEC);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_ARP_INTERVAL attribute: %m");
+
+ if (b->lp_interval >= LEARNING_PACKETS_INTERVAL_MIN_SEC &&
+ b->lp_interval <= LEARNING_PACKETS_INTERVAL_MAX_SEC) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_LP_INTERVAL, b->lp_interval / USEC_PER_SEC);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_LP_INTERVAL attribute: %m");
+ }
+ }
+
+ if (b->ad_select != _NETDEV_BOND_AD_SELECT_INVALID &&
+ b->mode == NETDEV_BOND_MODE_802_3AD) {
+ r = sd_netlink_message_append_u8(m, IFLA_BOND_AD_SELECT, b->ad_select);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_AD_SELECT attribute: %m");
+ }
+
+ if (b->fail_over_mac != _NETDEV_BOND_FAIL_OVER_MAC_INVALID &&
+ b->mode == NETDEV_BOND_MODE_ACTIVE_BACKUP) {
+ r = sd_netlink_message_append_u8(m, IFLA_BOND_FAIL_OVER_MAC, b->fail_over_mac);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_FAIL_OVER_MAC attribute: %m");
+ }
+
+ if (b->arp_validate != _NETDEV_BOND_ARP_VALIDATE_INVALID) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_ARP_VALIDATE, b->arp_validate);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_ARP_VALIDATE attribute: %m");
+ }
+
+ if (b->arp_all_targets != _NETDEV_BOND_ARP_ALL_TARGETS_INVALID) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_ARP_ALL_TARGETS, b->arp_all_targets);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_ARP_ALL_TARGETS attribute: %m");
+ }
+
+ if (b->primary_reselect != _NETDEV_BOND_PRIMARY_RESELECT_INVALID) {
+ r = sd_netlink_message_append_u8(m, IFLA_BOND_PRIMARY_RESELECT, b->primary_reselect);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_PRIMARY_RESELECT attribute: %m");
+ }
+
+ if (b->resend_igmp <= RESEND_IGMP_MAX) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_RESEND_IGMP, b->resend_igmp);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_RESEND_IGMP attribute: %m");
+ }
+
+ if (b->packets_per_slave <= PACKETS_PER_SLAVE_MAX &&
+ b->mode == NETDEV_BOND_MODE_BALANCE_RR) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_PACKETS_PER_SLAVE, b->packets_per_slave);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_PACKETS_PER_SLAVE attribute: %m");
+ }
+
+ if (b->num_grat_arp <= GRATUITOUS_ARP_MAX) {
+ r = sd_netlink_message_append_u8(m, IFLA_BOND_NUM_PEER_NOTIF, b->num_grat_arp);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_NUM_PEER_NOTIF attribute: %m");
+ }
+
+ if (b->min_links != 0) {
+ r = sd_netlink_message_append_u32(m, IFLA_BOND_MIN_LINKS, b->min_links);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_MIN_LINKS attribute: %m");
+ }
+
+ if (b->ad_actor_sys_prio != 0) {
+ r = sd_netlink_message_append_u16(m, IFLA_BOND_AD_ACTOR_SYS_PRIO, b->ad_actor_sys_prio);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_AD_ACTOR_SYS_PRIO attribute: %m");
+ }
+
+ if (b->ad_user_port_key != 0) {
+ r = sd_netlink_message_append_u16(m, IFLA_BOND_AD_USER_PORT_KEY, b->ad_user_port_key);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_AD_USER_PORT_KEY attribute: %m");
+ }
+
+ if (!ether_addr_is_null(&b->ad_actor_system)) {
+ r = sd_netlink_message_append_ether_addr(m, IFLA_BOND_AD_ACTOR_SYSTEM, &b->ad_actor_system);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_AD_ACTOR_SYSTEM attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u8(m, IFLA_BOND_ALL_SLAVES_ACTIVE, b->all_slaves_active);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_ALL_SLAVES_ACTIVE attribute: %m");
+
+ if (b->tlb_dynamic_lb >= 0) {
+ r = sd_netlink_message_append_u8(m, IFLA_BOND_TLB_DYNAMIC_LB, b->tlb_dynamic_lb);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_TLB_DYNAMIC_LB attribute: %m");
+ }
+
+ if (b->arp_interval > 0 && !ordered_set_isempty(b->arp_ip_targets)) {
+ void *val;
+ int n = 0;
+
+ r = sd_netlink_message_open_container(m, IFLA_BOND_ARP_IP_TARGET);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not open contaniner IFLA_BOND_ARP_IP_TARGET : %m");
+
+ ORDERED_SET_FOREACH(val, b->arp_ip_targets) {
+ r = sd_netlink_message_append_u32(m, n++, PTR_TO_UINT32(val));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_ARP_ALL_TARGETS attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not close contaniner IFLA_BOND_ARP_IP_TARGET : %m");
+ }
+
+ return 0;
+}
+
+static int link_set_bond_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(m);
+ assert(link);
+ assert(link->ifname);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not set bonding interface: %m");
+ return 1;
+ }
+
+ return 1;
+}
+
+int link_set_bond(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_NEWLINK, link->network->bond->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_netlink_message_set_flags(req, NLM_F_REQUEST | NLM_F_ACK);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set netlink flags: %m");
+
+ r = sd_netlink_message_open_container(req, IFLA_LINKINFO);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_PROTINFO attribute: %m");
+
+ r = sd_netlink_message_open_container_union(req, IFLA_INFO_DATA, "bond");
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ if (link->network->active_slave) {
+ r = sd_netlink_message_append_u32(req, IFLA_BOND_ACTIVE_SLAVE, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BOND_ACTIVE_SLAVE attribute: %m");
+ }
+
+ if (link->network->primary_slave) {
+ r = sd_netlink_message_append_u32(req, IFLA_BOND_PRIMARY, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BOND_PRIMARY attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_LINKINFO attribute: %m");
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, link_set_bond_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return r;
+}
+
+int config_parse_arp_ip_target_address(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Bond *b = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ b->arp_ip_targets = ordered_set_free(b->arp_ip_targets);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *n = NULL;
+ union in_addr_union ip;
+
+ r = extract_first_word(&p, &n, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse Bond ARP IP target address, ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ r = in_addr_from_string(AF_INET, n, &ip);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Bond ARP IP target address is invalid, ignoring assignment: %s", n);
+ continue;
+ }
+
+ r = ordered_set_ensure_allocated(&b->arp_ip_targets, NULL);
+ if (r < 0)
+ return log_oom();
+
+ if (ordered_set_size(b->arp_ip_targets) >= NETDEV_BOND_ARP_TARGETS_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Too many ARP IP targets are specified. The maximum number is %d. Ignoring assignment: %s",
+ NETDEV_BOND_ARP_TARGETS_MAX, n);
+ continue;
+ }
+
+ r = ordered_set_put(b->arp_ip_targets, UINT32_TO_PTR(ip.in.s_addr));
+ if (r == -EEXIST)
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Bond ARP IP target address is duplicated, ignoring assignment: %s", n);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to store bond ARP IP target address '%s', ignoring assignment: %m", n);
+ }
+}
+
+int config_parse_ad_actor_sys_prio(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Bond *b = userdata;
+ uint16_t v;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou16(rvalue, &v);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse actor system priority '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (v == 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to parse actor system priority '%s'. Range is [1,65535], ignoring.",
+ rvalue);
+ return 0;
+ }
+
+ b->ad_actor_sys_prio = v;
+
+ return 0;
+}
+
+int config_parse_ad_user_port_key(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Bond *b = userdata;
+ uint16_t v;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou16(rvalue, &v);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse user port key '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (v > 1023) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to parse user port key '%s'. Range is [0…1023], ignoring.", rvalue);
+ return 0;
+ }
+
+ b->ad_user_port_key = v;
+
+ return 0;
+}
+
+int config_parse_ad_actor_system(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Bond *b = userdata;
+ struct ether_addr n;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = ether_addr_from_string(rvalue, &n);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Not a valid MAC address %s. Ignoring assignment: %m",
+ rvalue);
+ return 0;
+ }
+ if (ether_addr_is_null(&n) || (n.ether_addr_octet[0] & 0x01)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Not an appropriate MAC address %s, cannot be null or multicast. Ignoring assignment.",
+ rvalue);
+ return 0;
+ }
+
+ b->ad_actor_system = n;
+
+ return 0;
+}
+
+static void bond_done(NetDev *netdev) {
+ Bond *b;
+
+ assert(netdev);
+ b = BOND(netdev);
+ assert(b);
+
+ ordered_set_free(b->arp_ip_targets);
+}
+
+static void bond_init(NetDev *netdev) {
+ Bond *b;
+
+ assert(netdev);
+
+ b = BOND(netdev);
+
+ assert(b);
+
+ b->mode = _NETDEV_BOND_MODE_INVALID;
+ b->xmit_hash_policy = _NETDEV_BOND_XMIT_HASH_POLICY_INVALID;
+ b->lacp_rate = _NETDEV_BOND_LACP_RATE_INVALID;
+ b->ad_select = _NETDEV_BOND_AD_SELECT_INVALID;
+ b->fail_over_mac = _NETDEV_BOND_FAIL_OVER_MAC_INVALID;
+ b->arp_validate = _NETDEV_BOND_ARP_VALIDATE_INVALID;
+ b->arp_all_targets = _NETDEV_BOND_ARP_ALL_TARGETS_INVALID;
+ b->primary_reselect = _NETDEV_BOND_PRIMARY_RESELECT_INVALID;
+
+ b->all_slaves_active = false;
+ b->tlb_dynamic_lb = -1;
+
+ b->resend_igmp = RESEND_IGMP_DEFAULT;
+ b->packets_per_slave = PACKETS_PER_SLAVE_DEFAULT;
+ b->num_grat_arp = GRATUITOUS_ARP_DEFAULT;
+ b->lp_interval = LEARNING_PACKETS_INTERVAL_MIN_SEC;
+}
+
+const NetDevVTable bond_vtable = {
+ .object_size = sizeof(Bond),
+ .init = bond_init,
+ .done = bond_done,
+ .sections = NETDEV_COMMON_SECTIONS "Bond\0",
+ .fill_message_create = netdev_bond_fill_message_create,
+ .create_type = NETDEV_CREATE_MASTER,
+ .generate_mac = true,
+};
diff --git a/src/network/netdev/bond.h b/src/network/netdev/bond.h
new file mode 100644
index 0000000..11d3e9b
--- /dev/null
+++ b/src/network/netdev/bond.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <netinet/in.h>
+#include <linux/if_bonding.h>
+
+#include "bond-util.h"
+#include "macro.h"
+#include "netdev.h"
+#include "ordered-set.h"
+
+typedef struct Bond {
+ NetDev meta;
+
+ BondMode mode;
+ BondXmitHashPolicy xmit_hash_policy;
+ BondLacpRate lacp_rate;
+ BondAdSelect ad_select;
+ BondFailOverMac fail_over_mac;
+ BondArpValidate arp_validate;
+ BondArpAllTargets arp_all_targets;
+ BondPrimaryReselect primary_reselect;
+
+ int tlb_dynamic_lb;
+
+ bool all_slaves_active;
+
+ unsigned resend_igmp;
+ unsigned packets_per_slave;
+ unsigned num_grat_arp;
+ unsigned min_links;
+
+ uint16_t ad_actor_sys_prio;
+ uint16_t ad_user_port_key;
+ struct ether_addr ad_actor_system;
+
+ usec_t miimon;
+ usec_t updelay;
+ usec_t downdelay;
+ usec_t arp_interval;
+ usec_t lp_interval;
+
+ OrderedSet *arp_ip_targets;
+} Bond;
+
+DEFINE_NETDEV_CAST(BOND, Bond);
+extern const NetDevVTable bond_vtable;
+
+int link_set_bond(Link *link);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_bond_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_bond_xmit_hash_policy);
+CONFIG_PARSER_PROTOTYPE(config_parse_bond_lacp_rate);
+CONFIG_PARSER_PROTOTYPE(config_parse_bond_ad_select);
+CONFIG_PARSER_PROTOTYPE(config_parse_bond_fail_over_mac);
+CONFIG_PARSER_PROTOTYPE(config_parse_bond_arp_validate);
+CONFIG_PARSER_PROTOTYPE(config_parse_bond_arp_all_targets);
+CONFIG_PARSER_PROTOTYPE(config_parse_bond_primary_reselect);
+CONFIG_PARSER_PROTOTYPE(config_parse_arp_ip_target_address);
+CONFIG_PARSER_PROTOTYPE(config_parse_ad_actor_sys_prio);
+CONFIG_PARSER_PROTOTYPE(config_parse_ad_user_port_key);
+CONFIG_PARSER_PROTOTYPE(config_parse_ad_actor_system);
diff --git a/src/network/netdev/bridge.c b/src/network/netdev/bridge.c
new file mode 100644
index 0000000..1f59cd8
--- /dev/null
+++ b/src/network/netdev/bridge.c
@@ -0,0 +1,368 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+
+#include "bridge.h"
+#include "netlink-util.h"
+#include "network-internal.h"
+#include "networkd-manager.h"
+#include "string-table.h"
+#include "vlan-util.h"
+
+static const char* const multicast_router_table[_MULTICAST_ROUTER_MAX] = {
+ [MULTICAST_ROUTER_NONE] = "no",
+ [MULTICAST_ROUTER_TEMPORARY_QUERY] = "query",
+ [MULTICAST_ROUTER_PERMANENT] = "permanent",
+ [MULTICAST_ROUTER_TEMPORARY] = "temporary",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(multicast_router, MulticastRouter, _MULTICAST_ROUTER_INVALID);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_multicast_router, multicast_router, MulticastRouter,
+ "Failed to parse bridge multicast router setting");
+
+/* callback for bridge netdev's parameter set */
+static int netdev_bridge_set_handler(sd_netlink *rtnl, sd_netlink_message *m, NetDev *netdev) {
+ int r;
+
+ assert(netdev);
+ assert(m);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0) {
+ log_netdev_warning_errno(netdev, r, "Bridge parameters could not be set: %m");
+ return 1;
+ }
+
+ log_netdev_debug(netdev, "Bridge parameters set success");
+
+ return 1;
+}
+
+static int netdev_bridge_post_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ Bridge *b;
+ int r;
+
+ assert(netdev);
+
+ b = BRIDGE(netdev);
+
+ assert(b);
+
+ r = sd_rtnl_message_new_link(netdev->manager->rtnl, &req, RTM_NEWLINK, netdev->ifindex);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_netlink_message_set_flags(req, NLM_F_REQUEST | NLM_F_ACK);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set netlink flags: %m");
+
+ r = sd_netlink_message_open_container(req, IFLA_LINKINFO);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_LINKINFO attribute: %m");
+
+ r = sd_netlink_message_open_container_union(req, IFLA_INFO_DATA, netdev_kind_to_string(netdev->kind));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ /* convert to jiffes */
+ if (b->forward_delay != USEC_INFINITY) {
+ r = sd_netlink_message_append_u32(req, IFLA_BR_FORWARD_DELAY, usec_to_jiffies(b->forward_delay));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_FORWARD_DELAY attribute: %m");
+ }
+
+ if (b->hello_time > 0) {
+ r = sd_netlink_message_append_u32(req, IFLA_BR_HELLO_TIME, usec_to_jiffies(b->hello_time));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_HELLO_TIME attribute: %m");
+ }
+
+ if (b->max_age > 0) {
+ r = sd_netlink_message_append_u32(req, IFLA_BR_MAX_AGE, usec_to_jiffies(b->max_age));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_MAX_AGE attribute: %m");
+ }
+
+ if (b->ageing_time != USEC_INFINITY) {
+ r = sd_netlink_message_append_u32(req, IFLA_BR_AGEING_TIME, usec_to_jiffies(b->ageing_time));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_AGEING_TIME attribute: %m");
+ }
+
+ if (b->priority > 0) {
+ r = sd_netlink_message_append_u16(req, IFLA_BR_PRIORITY, b->priority);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_PRIORITY attribute: %m");
+ }
+
+ if (b->group_fwd_mask > 0) {
+ r = sd_netlink_message_append_u16(req, IFLA_BR_GROUP_FWD_MASK, b->group_fwd_mask);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_GROUP_FWD_MASK attribute: %m");
+ }
+
+ if (b->default_pvid != VLANID_INVALID) {
+ r = sd_netlink_message_append_u16(req, IFLA_BR_VLAN_DEFAULT_PVID, b->default_pvid);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_VLAN_DEFAULT_PVID attribute: %m");
+ }
+
+ if (b->mcast_querier >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BR_MCAST_QUERIER, b->mcast_querier);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_MCAST_QUERIER attribute: %m");
+ }
+
+ if (b->mcast_snooping >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BR_MCAST_SNOOPING, b->mcast_snooping);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_MCAST_SNOOPING attribute: %m");
+ }
+
+ if (b->vlan_filtering >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BR_VLAN_FILTERING, b->vlan_filtering);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_VLAN_FILTERING attribute: %m");
+ }
+
+ if (b->vlan_protocol >= 0) {
+ r = sd_netlink_message_append_u16(req, IFLA_BR_VLAN_PROTOCOL, b->vlan_protocol);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_VLAN_PROTOCOL attribute: %m");
+ }
+
+ if (b->stp >= 0) {
+ r = sd_netlink_message_append_u32(req, IFLA_BR_STP_STATE, b->stp);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_STP_STATE attribute: %m");
+ }
+
+ if (b->igmp_version > 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BR_MCAST_IGMP_VERSION, b->igmp_version);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_MCAST_IGMP_VERSION attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_LINKINFO attribute: %m");
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ r = netlink_call_async(netdev->manager->rtnl, NULL, req, netdev_bridge_set_handler,
+ netdev_destroy_callback, netdev);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not send rtnetlink message: %m");
+
+ netdev_ref(netdev);
+
+ return r;
+}
+
+static int link_set_bridge_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(m);
+ assert(link);
+ assert(link->ifname);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not set bridge interface: %m");
+ return 1;
+ }
+
+ return 1;
+}
+
+int link_set_bridge(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_rtnl_message_link_set_family(req, AF_BRIDGE);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set message family: %m");
+
+ r = sd_netlink_message_open_container(req, IFLA_PROTINFO);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_PROTINFO attribute: %m");
+
+ if (link->network->use_bpdu >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BRPORT_GUARD, link->network->use_bpdu);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_GUARD attribute: %m");
+ }
+
+ if (link->network->hairpin >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BRPORT_MODE, link->network->hairpin);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_MODE attribute: %m");
+ }
+
+ if (link->network->fast_leave >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BRPORT_FAST_LEAVE, link->network->fast_leave);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_FAST_LEAVE attribute: %m");
+ }
+
+ if (link->network->allow_port_to_be_root >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BRPORT_PROTECT, link->network->allow_port_to_be_root);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_PROTECT attribute: %m");
+ }
+
+ if (link->network->unicast_flood >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BRPORT_UNICAST_FLOOD, link->network->unicast_flood);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_UNICAST_FLOOD attribute: %m");
+ }
+
+ if (link->network->multicast_flood >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BRPORT_MCAST_FLOOD, link->network->multicast_flood);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_MCAST_FLOOD attribute: %m");
+ }
+
+ if (link->network->multicast_to_unicast >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BRPORT_MCAST_TO_UCAST, link->network->multicast_to_unicast);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_MCAST_TO_UCAST attribute: %m");
+ }
+
+ if (link->network->neighbor_suppression >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BRPORT_NEIGH_SUPPRESS, link->network->neighbor_suppression);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_NEIGH_SUPPRESS attribute: %m");
+ }
+
+ if (link->network->learning >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BRPORT_LEARNING, link->network->learning);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_LEARNING attribute: %m");
+ }
+
+ if (link->network->bridge_proxy_arp >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BRPORT_PROXYARP, link->network->bridge_proxy_arp);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_PROXYARP attribute: %m");
+ }
+
+ if (link->network->bridge_proxy_arp_wifi >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BRPORT_PROXYARP_WIFI, link->network->bridge_proxy_arp_wifi);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_PROXYARP_WIFI attribute: %m");
+ }
+
+ if (link->network->cost != 0) {
+ r = sd_netlink_message_append_u32(req, IFLA_BRPORT_COST, link->network->cost);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_COST attribute: %m");
+ }
+
+ if (link->network->priority != LINK_BRIDGE_PORT_PRIORITY_INVALID) {
+ r = sd_netlink_message_append_u16(req, IFLA_BRPORT_PRIORITY, link->network->priority);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_PRIORITY attribute: %m");
+ }
+
+ if (link->network->multicast_router != _MULTICAST_ROUTER_INVALID) {
+ r = sd_netlink_message_append_u8(req, IFLA_BRPORT_MULTICAST_ROUTER, link->network->multicast_router);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRPORT_MULTICAST_ROUTER attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_LINKINFO attribute: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, link_set_bridge_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return r;
+}
+
+int config_parse_bridge_igmp_version(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Bridge *b = userdata;
+ uint8_t u;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ b->igmp_version = 0; /* 0 means unset. */
+ return 0;
+ }
+
+ r = safe_atou8(rvalue, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse bridge's multicast IGMP version number '%s', ignoring assignment: %m",
+ rvalue);
+ return 0;
+ }
+ if (!IN_SET(u, 2, 3)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid bridge's multicast IGMP version number '%s', ignoring assignment.", rvalue);
+ return 0;
+ }
+
+ b->igmp_version = u;
+
+ return 0;
+}
+
+static void bridge_init(NetDev *n) {
+ Bridge *b;
+
+ b = BRIDGE(n);
+
+ assert(b);
+
+ b->mcast_querier = -1;
+ b->mcast_snooping = -1;
+ b->vlan_filtering = -1;
+ b->vlan_protocol = -1;
+ b->stp = -1;
+ b->default_pvid = VLANID_INVALID;
+ b->forward_delay = USEC_INFINITY;
+ b->ageing_time = USEC_INFINITY;
+}
+
+const NetDevVTable bridge_vtable = {
+ .object_size = sizeof(Bridge),
+ .init = bridge_init,
+ .sections = NETDEV_COMMON_SECTIONS "Bridge\0",
+ .post_create = netdev_bridge_post_create,
+ .create_type = NETDEV_CREATE_MASTER,
+};
diff --git a/src/network/netdev/bridge.h b/src/network/netdev/bridge.h
new file mode 100644
index 0000000..d6abda9
--- /dev/null
+++ b/src/network/netdev/bridge.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <netinet/in.h>
+#include <linux/if_bridge.h>
+
+#include "conf-parser.h"
+#include "netdev.h"
+
+typedef struct Bridge {
+ NetDev meta;
+
+ int mcast_querier;
+ int mcast_snooping;
+ int vlan_filtering;
+ int vlan_protocol;
+ int stp;
+ uint16_t priority;
+ uint16_t group_fwd_mask;
+ uint16_t default_pvid;
+ uint8_t igmp_version;
+
+ usec_t forward_delay;
+ usec_t hello_time;
+ usec_t max_age;
+ usec_t ageing_time;
+} Bridge;
+
+typedef enum MulticastRouter {
+ MULTICAST_ROUTER_NONE = MDB_RTR_TYPE_DISABLED,
+ MULTICAST_ROUTER_TEMPORARY_QUERY = MDB_RTR_TYPE_TEMP_QUERY,
+ MULTICAST_ROUTER_PERMANENT = MDB_RTR_TYPE_PERM,
+ MULTICAST_ROUTER_TEMPORARY = MDB_RTR_TYPE_TEMP,
+ _MULTICAST_ROUTER_MAX,
+ _MULTICAST_ROUTER_INVALID = -1,
+} MulticastRouter;
+
+DEFINE_NETDEV_CAST(BRIDGE, Bridge);
+extern const NetDevVTable bridge_vtable;
+
+int link_set_bridge(Link *link);
+
+const char* multicast_router_to_string(MulticastRouter i) _const_;
+MulticastRouter multicast_router_from_string(const char *s) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_multicast_router);
+CONFIG_PARSER_PROTOTYPE(config_parse_bridge_igmp_version);
diff --git a/src/network/netdev/dummy.c b/src/network/netdev/dummy.c
new file mode 100644
index 0000000..754ee98
--- /dev/null
+++ b/src/network/netdev/dummy.c
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "dummy.h"
+
+const NetDevVTable dummy_vtable = {
+ .object_size = sizeof(Dummy),
+ .sections = NETDEV_COMMON_SECTIONS,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+ .generate_mac = true,
+};
diff --git a/src/network/netdev/dummy.h b/src/network/netdev/dummy.h
new file mode 100644
index 0000000..eafdf4b
--- /dev/null
+++ b/src/network/netdev/dummy.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "netdev.h"
+
+typedef struct Dummy {
+ NetDev meta;
+} Dummy;
+
+DEFINE_NETDEV_CAST(DUMMY, Dummy);
+extern const NetDevVTable dummy_vtable;
diff --git a/src/network/netdev/fou-tunnel.c b/src/network/netdev/fou-tunnel.c
new file mode 100644
index 0000000..6863257
--- /dev/null
+++ b/src/network/netdev/fou-tunnel.c
@@ -0,0 +1,279 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/fou.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <linux/ip.h>
+
+#include "conf-parser.h"
+#include "fou-tunnel.h"
+#include "ip-protocol-list.h"
+#include "netlink-util.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "util.h"
+
+static const char* const fou_encap_type_table[_NETDEV_FOO_OVER_UDP_ENCAP_MAX] = {
+ [NETDEV_FOO_OVER_UDP_ENCAP_DIRECT] = "FooOverUDP",
+ [NETDEV_FOO_OVER_UDP_ENCAP_GUE] = "GenericUDPEncapsulation",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(fou_encap_type, FooOverUDPEncapType);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_fou_encap_type, fou_encap_type, FooOverUDPEncapType,
+ "Failed to parse Encapsulation=");
+
+static int netdev_fill_fou_tunnel_message(NetDev *netdev, sd_netlink_message **ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ FouTunnel *t;
+ uint8_t encap_type;
+ int r;
+
+ assert(netdev);
+
+ t = FOU(netdev);
+
+ assert(t);
+
+ r = sd_genl_message_new(netdev->manager->genl, SD_GENL_FOU, FOU_CMD_ADD, &m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Failed to allocate generic netlink message: %m");
+
+ r = sd_netlink_message_append_u16(m, FOU_ATTR_PORT, htobe16(t->port));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append FOU_ATTR_PORT attribute: %m");
+
+ if (IN_SET(t->peer_family, AF_INET, AF_INET6)) {
+ r = sd_netlink_message_append_u16(m, FOU_ATTR_PEER_PORT, htobe16(t->peer_port));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append FOU_ATTR_PEER_PORT attribute: %m");
+ }
+
+ switch (t->fou_encap_type) {
+ case NETDEV_FOO_OVER_UDP_ENCAP_DIRECT:
+ encap_type = FOU_ENCAP_DIRECT;
+ break;
+ case NETDEV_FOO_OVER_UDP_ENCAP_GUE:
+ encap_type = FOU_ENCAP_GUE;
+ break;
+ default:
+ assert_not_reached("invalid encap type");
+ }
+
+ r = sd_netlink_message_append_u8(m, FOU_ATTR_TYPE, encap_type);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append FOU_ATTR_TYPE attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, FOU_ATTR_AF, AF_INET);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append FOU_ATTR_AF attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, FOU_ATTR_IPPROTO, t->fou_protocol);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append FOU_ATTR_IPPROTO attribute: %m");
+
+ if (t->local_family == AF_INET) {
+ r = sd_netlink_message_append_in_addr(m, FOU_ATTR_LOCAL_V4, &t->local.in);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append FOU_ATTR_LOCAL_V4 attribute: %m");
+ } else if (t->local_family == AF_INET6) {
+ r = sd_netlink_message_append_in6_addr(m, FOU_ATTR_LOCAL_V6, &t->local.in6);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append FOU_ATTR_LOCAL_V6 attribute: %m");
+ }
+
+ if (t->peer_family == AF_INET) {
+ r = sd_netlink_message_append_in_addr(m, FOU_ATTR_PEER_V4, &t->peer.in);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append FOU_ATTR_PEER_V4 attribute: %m");
+ } else if (t->peer_family == AF_INET6){
+ r = sd_netlink_message_append_in6_addr(m, FOU_ATTR_PEER_V6, &t->peer.in6);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append FOU_ATTR_PEER_V6 attribute: %m");
+ }
+
+ *ret = TAKE_PTR(m);
+ return 0;
+}
+
+static int fou_tunnel_create_handler(sd_netlink *rtnl, sd_netlink_message *m, NetDev *netdev) {
+ int r;
+
+ assert(netdev);
+ assert(netdev->state != _NETDEV_STATE_INVALID);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r == -EEXIST)
+ log_netdev_info(netdev, "netdev exists, using existing without changing its parameters");
+ else if (r < 0) {
+ log_netdev_warning_errno(netdev, r, "netdev could not be created: %m");
+ netdev_drop(netdev);
+
+ return 1;
+ }
+
+ log_netdev_debug(netdev, "FooOverUDP tunnel is created");
+ return 1;
+}
+
+static int netdev_fou_tunnel_create(NetDev *netdev) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ assert(netdev);
+ assert(FOU(netdev));
+
+ r = netdev_fill_fou_tunnel_message(netdev, &m);
+ if (r < 0)
+ return r;
+
+ r = netlink_call_async(netdev->manager->genl, NULL, m, fou_tunnel_create_handler,
+ netdev_destroy_callback, netdev);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Failed to create FooOverUDP tunnel: %m");
+
+ netdev_ref(netdev);
+ return 0;
+}
+
+int config_parse_ip_protocol(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint8_t *ret = data;
+ unsigned protocol;
+ /* linux/fou.h defines the netlink field as one byte, so we need to reject protocols numbers that
+ * don't fit in one byte. */
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_ip_protocol(rvalue);
+ if (r >= 0)
+ protocol = r;
+ else {
+ r = safe_atou(rvalue, &protocol);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse IP protocol '%s' for FooOverUDP tunnel, "
+ "ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ if (protocol > UINT8_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "IP protocol '%s' for FooOverUDP tunnel out of range, "
+ "ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ *ret = protocol;
+ return 0;
+}
+
+int config_parse_fou_tunnel_address(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ union in_addr_union *addr = data;
+ FouTunnel *t = userdata;
+ int r, *f;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(lvalue, "Local"))
+ f = &t->local_family;
+ else
+ f = &t->peer_family;
+
+ r = in_addr_from_string_auto(rvalue, f, addr);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "FooOverUDP tunnel '%s' address is invalid, ignoring assignment: %s",
+ lvalue, rvalue);
+
+ return 0;
+}
+
+static int netdev_fou_tunnel_verify(NetDev *netdev, const char *filename) {
+ FouTunnel *t;
+
+ assert(netdev);
+ assert(filename);
+
+ t = FOU(netdev);
+
+ assert(t);
+
+ switch (t->fou_encap_type) {
+ case NETDEV_FOO_OVER_UDP_ENCAP_DIRECT:
+ if (t->fou_protocol <= 0)
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "FooOverUDP protocol not configured in %s. Rejecting configuration.",
+ filename);
+ break;
+ case NETDEV_FOO_OVER_UDP_ENCAP_GUE:
+ if (t->fou_protocol > 0)
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "FooOverUDP GUE can't be set with protocol configured in %s. Rejecting configuration.",
+ filename);
+ break;
+ default:
+ assert_not_reached("Invalid fou encap type");
+ }
+
+ if (t->peer_family == AF_UNSPEC && t->peer_port > 0)
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "FooOverUDP peer port is set but peer address not configured in %s. Rejecting configuration.",
+ filename);
+ else if (t->peer_family != AF_UNSPEC && t->peer_port == 0)
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "FooOverUDP peer port not set but peer address is configured in %s. Rejecting configuration.",
+ filename);
+ return 0;
+}
+
+static void fou_tunnel_init(NetDev *netdev) {
+ FouTunnel *t;
+
+ assert(netdev);
+
+ t = FOU(netdev);
+
+ assert(t);
+
+ t->fou_encap_type = NETDEV_FOO_OVER_UDP_ENCAP_DIRECT;
+}
+
+const NetDevVTable foutnl_vtable = {
+ .object_size = sizeof(FouTunnel),
+ .init = fou_tunnel_init,
+ .sections = NETDEV_COMMON_SECTIONS "FooOverUDP\0",
+ .create = netdev_fou_tunnel_create,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+ .config_verify = netdev_fou_tunnel_verify,
+};
diff --git a/src/network/netdev/fou-tunnel.h b/src/network/netdev/fou-tunnel.h
new file mode 100644
index 0000000..a6f10df
--- /dev/null
+++ b/src/network/netdev/fou-tunnel.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <netinet/in.h>
+#include <linux/fou.h>
+
+#include "in-addr-util.h"
+#include "netdev.h"
+
+typedef enum FooOverUDPEncapType {
+ NETDEV_FOO_OVER_UDP_ENCAP_UNSPEC = FOU_ENCAP_UNSPEC,
+ NETDEV_FOO_OVER_UDP_ENCAP_DIRECT = FOU_ENCAP_DIRECT,
+ NETDEV_FOO_OVER_UDP_ENCAP_GUE = FOU_ENCAP_GUE,
+ _NETDEV_FOO_OVER_UDP_ENCAP_MAX,
+ _NETDEV_FOO_OVER_UDP_ENCAP_INVALID = -1,
+} FooOverUDPEncapType;
+
+typedef struct FouTunnel {
+ NetDev meta;
+
+ uint8_t fou_protocol;
+
+ uint16_t port;
+ uint16_t peer_port;
+
+ int local_family;
+ int peer_family;
+
+ FooOverUDPEncapType fou_encap_type;
+ union in_addr_union local;
+ union in_addr_union peer;
+} FouTunnel;
+
+DEFINE_NETDEV_CAST(FOU, FouTunnel);
+extern const NetDevVTable foutnl_vtable;
+
+const char *fou_encap_type_to_string(FooOverUDPEncapType d) _const_;
+FooOverUDPEncapType fou_encap_type_from_string(const char *d) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_fou_encap_type);
+CONFIG_PARSER_PROTOTYPE(config_parse_ip_protocol);
+CONFIG_PARSER_PROTOTYPE(config_parse_fou_tunnel_address);
diff --git a/src/network/netdev/geneve.c b/src/network/netdev/geneve.c
new file mode 100644
index 0000000..edf92ec
--- /dev/null
+++ b/src/network/netdev/geneve.c
@@ -0,0 +1,356 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "extract-word.h"
+#include "geneve.h"
+#include "netlink-util.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+
+#define GENEVE_FLOW_LABEL_MAX_MASK 0xFFFFFU
+#define DEFAULT_GENEVE_DESTINATION_PORT 6081
+
+static const char* const geneve_df_table[_NETDEV_GENEVE_DF_MAX] = {
+ [NETDEV_GENEVE_DF_NO] = "no",
+ [NETDEV_GENEVE_DF_YES] = "yes",
+ [NETDEV_GENEVE_DF_INHERIT] = "inherit",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(geneve_df, GeneveDF, NETDEV_GENEVE_DF_YES);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_geneve_df, geneve_df, GeneveDF, "Failed to parse Geneve IPDoNotFragment= setting");
+
+/* callback for geneve netdev's created without a backing Link */
+static int geneve_netdev_create_handler(sd_netlink *rtnl, sd_netlink_message *m, NetDev *netdev) {
+ int r;
+
+ assert(netdev);
+ assert(netdev->state != _NETDEV_STATE_INVALID);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r == -EEXIST)
+ log_netdev_info(netdev, "Geneve netdev exists, using existing without changing its parameters");
+ else if (r < 0) {
+ log_netdev_warning_errno(netdev, r, "Geneve netdev could not be created: %m");
+ netdev_drop(netdev);
+
+ return 1;
+ }
+
+ log_netdev_debug(netdev, "Geneve created");
+
+ return 1;
+}
+
+static int netdev_geneve_create(NetDev *netdev) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ Geneve *v;
+ int r;
+
+ assert(netdev);
+
+ v = GENEVE(netdev);
+
+ r = sd_rtnl_message_new_link(netdev->manager->rtnl, &m, RTM_NEWLINK, 0);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not allocate RTM_NEWLINK message: %m");
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, netdev->ifname);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IFNAME, attribute: %m");
+
+ if (netdev->mac) {
+ r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, netdev->mac);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_ADDRESS attribute: %m");
+ }
+
+ if (netdev->mtu != 0) {
+ r = sd_netlink_message_append_u32(m, IFLA_MTU, netdev->mtu);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_MTU attribute: %m");
+ }
+
+ r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_LINKINFO attribute: %m");
+
+ r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, netdev_kind_to_string(netdev->kind));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ if (v->id <= GENEVE_VID_MAX) {
+ r = sd_netlink_message_append_u32(m, IFLA_GENEVE_ID, v->id);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_ID attribute: %m");
+ }
+
+ if (in_addr_is_null(v->remote_family, &v->remote) == 0) {
+ if (v->remote_family == AF_INET)
+ r = sd_netlink_message_append_in_addr(m, IFLA_GENEVE_REMOTE, &v->remote.in);
+ else
+ r = sd_netlink_message_append_in6_addr(m, IFLA_GENEVE_REMOTE6, &v->remote.in6);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_REMOTE/IFLA_GENEVE_REMOTE6 attribute: %m");
+ }
+
+ if (v->inherit) {
+ r = sd_netlink_message_append_u8(m, IFLA_GENEVE_TTL_INHERIT, 1);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_TTL_INHERIT attribute: %m");
+ } else {
+ r = sd_netlink_message_append_u8(m, IFLA_GENEVE_TTL, v->ttl);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_TTL attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u8(m, IFLA_GENEVE_TOS, v->tos);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_TOS attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_GENEVE_UDP_CSUM, v->udpcsum);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_UDP_CSUM attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, v->udp6zerocsumtx);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_UDP_ZERO_CSUM6_TX attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, v->udp6zerocsumrx);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_UDP_ZERO_CSUM6_RX attribute: %m");
+
+ if (v->dest_port != DEFAULT_GENEVE_DESTINATION_PORT) {
+ r = sd_netlink_message_append_u16(m, IFLA_GENEVE_PORT, htobe16(v->dest_port));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_PORT attribute: %m");
+ }
+
+ if (v->flow_label > 0) {
+ r = sd_netlink_message_append_u32(m, IFLA_GENEVE_LABEL, htobe32(v->flow_label));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_LABEL attribute: %m");
+ }
+
+ if (v->geneve_df != _NETDEV_GENEVE_DF_INVALID) {
+ r = sd_netlink_message_append_u8(m, IFLA_GENEVE_DF, v->geneve_df);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GENEVE_DF attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_LINKINFO attribute: %m");
+
+ r = netlink_call_async(netdev->manager->rtnl, NULL, m, geneve_netdev_create_handler,
+ netdev_destroy_callback, netdev);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not send rtnetlink message: %m");
+
+ netdev_ref(netdev);
+ netdev->state = NETDEV_STATE_CREATING;
+
+ log_netdev_debug(netdev, "Creating");
+
+ return r;
+}
+
+int config_parse_geneve_vni(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Geneve *v = userdata;
+ uint32_t f;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou32(rvalue, &f);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse Geneve VNI '%s'.", rvalue);
+ return 0;
+ }
+
+ if (f > GENEVE_VID_MAX){
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Geneve VNI out is of range '%s'.", rvalue);
+ return 0;
+ }
+
+ v->id = f;
+
+ return 0;
+}
+
+int config_parse_geneve_address(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Geneve *v = userdata;
+ union in_addr_union *addr = data, buffer;
+ int r, f;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = in_addr_from_string_auto(rvalue, &f, &buffer);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "geneve '%s' address is invalid, ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ r = in_addr_is_multicast(f, &buffer);
+ if (r > 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "geneve invalid multicast '%s' address, ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ v->remote_family = f;
+ *addr = buffer;
+
+ return 0;
+}
+
+int config_parse_geneve_flow_label(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Geneve *v = userdata;
+ uint32_t f;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou32(rvalue, &f);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse Geneve flow label '%s'.", rvalue);
+ return 0;
+ }
+
+ if (f & ~GENEVE_FLOW_LABEL_MAX_MASK) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Geneve flow label '%s' not valid. Flow label range should be [0-1048575].", rvalue);
+ return 0;
+ }
+
+ v->flow_label = f;
+
+ return 0;
+}
+
+int config_parse_geneve_ttl(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Geneve *v = userdata;
+ unsigned f;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(rvalue, "inherit"))
+ v->inherit = true;
+ else {
+ r = safe_atou(rvalue, &f);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse Geneve TTL '%s', ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ if (f > 255) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid Geneve TTL '%s'. TTL must be <= 255. Ignoring assignment.", rvalue);
+ return 0;
+ }
+
+ v->ttl = f;
+ }
+
+ return 0;
+}
+
+static int netdev_geneve_verify(NetDev *netdev, const char *filename) {
+ Geneve *v = GENEVE(netdev);
+
+ assert(netdev);
+ assert(v);
+ assert(filename);
+
+ if (v->id > GENEVE_VID_MAX)
+ return log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: Geneve without valid VNI (or Virtual Network Identifier) configured. Ignoring.",
+ filename);
+
+ return 0;
+}
+
+static void geneve_init(NetDev *netdev) {
+ Geneve *v;
+
+ assert(netdev);
+
+ v = GENEVE(netdev);
+
+ assert(v);
+
+ v->id = GENEVE_VID_MAX + 1;
+ v->geneve_df = _NETDEV_GENEVE_DF_INVALID;
+ v->dest_port = DEFAULT_GENEVE_DESTINATION_PORT;
+ v->udpcsum = false;
+ v->udp6zerocsumtx = false;
+ v->udp6zerocsumrx = false;
+}
+
+const NetDevVTable geneve_vtable = {
+ .object_size = sizeof(Geneve),
+ .init = geneve_init,
+ .sections = NETDEV_COMMON_SECTIONS "GENEVE\0",
+ .create = netdev_geneve_create,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+ .config_verify = netdev_geneve_verify,
+ .generate_mac = true,
+};
diff --git a/src/network/netdev/geneve.h b/src/network/netdev/geneve.h
new file mode 100644
index 0000000..b62eb7b
--- /dev/null
+++ b/src/network/netdev/geneve.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct Geneve Geneve;
+
+#include "in-addr-util.h"
+#include "netdev.h"
+#include "networkd-network.h"
+
+#define GENEVE_VID_MAX (1u << 24) - 1
+
+typedef enum GeneveDF {
+ NETDEV_GENEVE_DF_NO = GENEVE_DF_UNSET,
+ NETDEV_GENEVE_DF_YES = GENEVE_DF_SET,
+ NETDEV_GENEVE_DF_INHERIT = GENEVE_DF_INHERIT,
+ _NETDEV_GENEVE_DF_MAX,
+ _NETDEV_GENEVE_DF_INVALID = -1
+} GeneveDF;
+
+struct Geneve {
+ NetDev meta;
+
+ uint32_t id;
+ uint32_t flow_label;
+
+ int remote_family;
+
+ uint8_t tos;
+ uint8_t ttl;
+
+ uint16_t dest_port;
+
+ bool udpcsum;
+ bool udp6zerocsumtx;
+ bool udp6zerocsumrx;
+ bool inherit;
+
+ GeneveDF geneve_df;
+ union in_addr_union remote;
+};
+
+DEFINE_NETDEV_CAST(GENEVE, Geneve);
+extern const NetDevVTable geneve_vtable;
+
+const char *geneve_df_to_string(GeneveDF d) _const_;
+GeneveDF geneve_df_from_string(const char *d) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_geneve_vni);
+CONFIG_PARSER_PROTOTYPE(config_parse_geneve_address);
+CONFIG_PARSER_PROTOTYPE(config_parse_geneve_flow_label);
+CONFIG_PARSER_PROTOTYPE(config_parse_geneve_df);
+CONFIG_PARSER_PROTOTYPE(config_parse_geneve_ttl);
diff --git a/src/network/netdev/ifb.c b/src/network/netdev/ifb.c
new file mode 100644
index 0000000..16ff49d
--- /dev/null
+++ b/src/network/netdev/ifb.c
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+
+#include "ifb.h"
+
+const NetDevVTable ifb_vtable = {
+ .object_size = sizeof(IntermediateFunctionalBlock),
+ .sections = NETDEV_COMMON_SECTIONS,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+ .generate_mac = true,
+};
diff --git a/src/network/netdev/ifb.h b/src/network/netdev/ifb.h
new file mode 100644
index 0000000..badfb4a
--- /dev/null
+++ b/src/network/netdev/ifb.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+
+#pragma once
+
+#include "netdev.h"
+
+typedef struct IntermediateFunctionalBlock {
+ NetDev meta;
+} IntermediateFunctionalBlock;
+
+DEFINE_NETDEV_CAST(IFB, IntermediateFunctionalBlock);
+extern const NetDevVTable ifb_vtable;
diff --git a/src/network/netdev/ipvlan.c b/src/network/netdev/ipvlan.c
new file mode 100644
index 0000000..92a8f58
--- /dev/null
+++ b/src/network/netdev/ipvlan.c
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+
+#include "conf-parser.h"
+#include "ipvlan.h"
+#include "ipvlan-util.h"
+#include "networkd-link.h"
+#include "string-util.h"
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_ipvlan_mode, ipvlan_mode, IPVlanMode, "Failed to parse ipvlan mode");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_ipvlan_flags, ipvlan_flags, IPVlanFlags, "Failed to parse ipvlan flags");
+
+static int netdev_ipvlan_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *req) {
+ IPVlan *m;
+ int r;
+
+ assert(netdev);
+ assert(link);
+ assert(netdev->ifname);
+
+ if (netdev->kind == NETDEV_KIND_IPVLAN)
+ m = IPVLAN(netdev);
+ else
+ m = IPVTAP(netdev);
+
+ assert(m);
+
+ if (m->mode != _NETDEV_IPVLAN_MODE_INVALID) {
+ r = sd_netlink_message_append_u16(req, IFLA_IPVLAN_MODE, m->mode);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPVLAN_MODE attribute: %m");
+ }
+
+ if (m->flags != _NETDEV_IPVLAN_FLAGS_INVALID) {
+ r = sd_netlink_message_append_u16(req, IFLA_IPVLAN_FLAGS, m->flags);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPVLAN_FLAGS attribute: %m");
+ }
+
+ return 0;
+}
+
+static void ipvlan_init(NetDev *n) {
+ IPVlan *m;
+
+ assert(n);
+
+ if (n->kind == NETDEV_KIND_IPVLAN)
+ m = IPVLAN(n);
+ else
+ m = IPVTAP(n);
+
+ assert(m);
+
+ m->mode = _NETDEV_IPVLAN_MODE_INVALID;
+ m->flags = _NETDEV_IPVLAN_FLAGS_INVALID;
+}
+
+const NetDevVTable ipvlan_vtable = {
+ .object_size = sizeof(IPVlan),
+ .init = ipvlan_init,
+ .sections = NETDEV_COMMON_SECTIONS "IPVLAN\0",
+ .fill_message_create = netdev_ipvlan_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .generate_mac = true,
+};
+
+const NetDevVTable ipvtap_vtable = {
+ .object_size = sizeof(IPVlan),
+ .init = ipvlan_init,
+ .sections = NETDEV_COMMON_SECTIONS "IPVTAP\0",
+ .fill_message_create = netdev_ipvlan_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .generate_mac = true,
+};
+
+IPVlanMode link_get_ipvlan_mode(Link *link) {
+ NetDev *netdev;
+
+ if (!streq_ptr(link->kind, "ipvlan"))
+ return _NETDEV_IPVLAN_MODE_INVALID;
+
+ if (netdev_get(link->manager, link->ifname, &netdev) < 0)
+ return _NETDEV_IPVLAN_MODE_INVALID;
+
+ if (netdev->kind != NETDEV_KIND_IPVLAN)
+ return _NETDEV_IPVLAN_MODE_INVALID;
+
+ return IPVLAN(netdev)->mode;
+}
diff --git a/src/network/netdev/ipvlan.h b/src/network/netdev/ipvlan.h
new file mode 100644
index 0000000..633b0bd
--- /dev/null
+++ b/src/network/netdev/ipvlan.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <netinet/in.h>
+#include <linux/if_link.h>
+
+#include "ipvlan-util.h"
+#include "netdev.h"
+
+typedef struct IPVlan {
+ NetDev meta;
+
+ IPVlanMode mode;
+ IPVlanFlags flags;
+} IPVlan;
+
+DEFINE_NETDEV_CAST(IPVLAN, IPVlan);
+DEFINE_NETDEV_CAST(IPVTAP, IPVlan);
+extern const NetDevVTable ipvlan_vtable;
+extern const NetDevVTable ipvtap_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_ipvlan_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_ipvlan_flags);
+
+IPVlanMode link_get_ipvlan_mode(Link *link);
diff --git a/src/network/netdev/l2tp-tunnel.c b/src/network/netdev/l2tp-tunnel.c
new file mode 100644
index 0000000..eeea197
--- /dev/null
+++ b/src/network/netdev/l2tp-tunnel.c
@@ -0,0 +1,728 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <linux/l2tp.h>
+#include <linux/genetlink.h>
+
+#include "conf-parser.h"
+#include "hashmap.h"
+#include "l2tp-tunnel.h"
+#include "netlink-util.h"
+#include "networkd-address.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "util.h"
+
+static const char* const l2tp_l2spec_type_table[_NETDEV_L2TP_L2SPECTYPE_MAX] = {
+ [NETDEV_L2TP_L2SPECTYPE_NONE] = "none",
+ [NETDEV_L2TP_L2SPECTYPE_DEFAULT] = "default",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(l2tp_l2spec_type, L2tpL2specType);
+
+static const char* const l2tp_encap_type_table[_NETDEV_L2TP_ENCAPTYPE_MAX] = {
+ [NETDEV_L2TP_ENCAPTYPE_UDP] = "udp",
+ [NETDEV_L2TP_ENCAPTYPE_IP] = "ip",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(l2tp_encap_type, L2tpEncapType);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_l2tp_encap_type, l2tp_encap_type, L2tpEncapType, "Failed to parse L2TP Encapsulation Type");
+
+static const char* const l2tp_local_address_type_table[_NETDEV_L2TP_LOCAL_ADDRESS_MAX] = {
+ [NETDEV_L2TP_LOCAL_ADDRESS_AUTO] = "auto",
+ [NETDEV_L2TP_LOCAL_ADDRESS_STATIC] = "static",
+ [NETDEV_L2TP_LOCAL_ADDRESS_DYNAMIC] = "dynamic",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(l2tp_local_address_type, L2tpLocalAddressType);
+
+static void l2tp_session_free(L2tpSession *s) {
+ if (!s)
+ return;
+
+ if (s->tunnel && s->section)
+ ordered_hashmap_remove(s->tunnel->sessions_by_section, s->section);
+
+ network_config_section_free(s->section);
+
+ free(s->name);
+
+ free(s);
+}
+
+DEFINE_NETWORK_SECTION_FUNCTIONS(L2tpSession, l2tp_session_free);
+
+static int l2tp_session_new_static(L2tpTunnel *t, const char *filename, unsigned section_line, L2tpSession **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(l2tp_session_freep) L2tpSession *s = NULL;
+ int r;
+
+ assert(t);
+ assert(ret);
+ assert(filename);
+ assert(section_line > 0);
+
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ s = ordered_hashmap_get(t->sessions_by_section, n);
+ if (s) {
+ *ret = TAKE_PTR(s);
+ return 0;
+ }
+
+ s = new(L2tpSession, 1);
+ if (!s)
+ return -ENOMEM;
+
+ *s = (L2tpSession) {
+ .l2tp_l2spec_type = NETDEV_L2TP_L2SPECTYPE_DEFAULT,
+ .tunnel = t,
+ .section = TAKE_PTR(n),
+ };
+
+ r = ordered_hashmap_ensure_allocated(&t->sessions_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = ordered_hashmap_put(t->sessions_by_section, s->section, s);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
+
+static int netdev_l2tp_fill_message_tunnel(NetDev *netdev, union in_addr_union *local_address, sd_netlink_message **ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ uint16_t encap_type;
+ L2tpTunnel *t;
+ int r;
+
+ assert(netdev);
+ assert(local_address);
+
+ t = L2TP(netdev);
+
+ assert(t);
+
+ r = sd_genl_message_new(netdev->manager->genl, SD_GENL_L2TP, L2TP_CMD_TUNNEL_CREATE, &m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Failed to create generic netlink message: %m");
+
+ r = sd_netlink_message_append_u32(m, L2TP_ATTR_CONN_ID, t->tunnel_id);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_CONN_ID attribute: %m");
+
+ r = sd_netlink_message_append_u32(m, L2TP_ATTR_PEER_CONN_ID, t->peer_tunnel_id);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_PEER_CONN_ID attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, L2TP_ATTR_PROTO_VERSION, 3);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_PROTO_VERSION attribute: %m");
+
+ switch(t->l2tp_encap_type) {
+ case NETDEV_L2TP_ENCAPTYPE_IP:
+ encap_type = L2TP_ENCAPTYPE_IP;
+ break;
+ case NETDEV_L2TP_ENCAPTYPE_UDP:
+ default:
+ encap_type = L2TP_ENCAPTYPE_UDP;
+ break;
+ }
+
+ r = sd_netlink_message_append_u16(m, L2TP_ATTR_ENCAP_TYPE, encap_type);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_ENCAP_TYPE attribute: %m");
+
+ if (t->family == AF_INET) {
+ r = sd_netlink_message_append_in_addr(m, L2TP_ATTR_IP_SADDR, &local_address->in);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_IP_SADDR attribute: %m");
+
+ r = sd_netlink_message_append_in_addr(m, L2TP_ATTR_IP_DADDR, &t->remote.in);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_IP_DADDR attribute: %m");
+ } else {
+ r = sd_netlink_message_append_in6_addr(m, L2TP_ATTR_IP6_SADDR, &local_address->in6);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_IP6_SADDR attribute: %m");
+
+ r = sd_netlink_message_append_in6_addr(m, L2TP_ATTR_IP6_DADDR, &t->remote.in6);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_IP6_DADDR attribute: %m");
+ }
+
+ if (encap_type == L2TP_ENCAPTYPE_UDP) {
+ r = sd_netlink_message_append_u16(m, L2TP_ATTR_UDP_SPORT, t->l2tp_udp_sport);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_UDP_SPORT, attribute: %m");
+
+ r = sd_netlink_message_append_u16(m, L2TP_ATTR_UDP_DPORT, t->l2tp_udp_dport);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_UDP_DPORT attribute: %m");
+
+ if (t->udp_csum) {
+ r = sd_netlink_message_append_u8(m, L2TP_ATTR_UDP_CSUM, t->udp_csum);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_UDP_CSUM attribute: %m");
+ }
+
+ if (t->udp6_csum_tx) {
+ r = sd_netlink_message_append_flag(m, L2TP_ATTR_UDP_ZERO_CSUM6_TX);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_UDP_ZERO_CSUM6_TX attribute: %m");
+ }
+
+ if (t->udp6_csum_rx) {
+ r = sd_netlink_message_append_flag(m, L2TP_ATTR_UDP_ZERO_CSUM6_RX);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_UDP_ZERO_CSUM6_RX attribute: %m");
+ }
+ }
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
+
+static int netdev_l2tp_fill_message_session(NetDev *netdev, L2tpSession *session, sd_netlink_message **ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ uint16_t l2_spec_len;
+ uint8_t l2_spec_type;
+ int r;
+
+ assert(netdev);
+ assert(session);
+ assert(session->tunnel);
+
+ r = sd_genl_message_new(netdev->manager->genl, SD_GENL_L2TP, L2TP_CMD_SESSION_CREATE, &m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Failed to create generic netlink message: %m");
+
+ r = sd_netlink_message_append_u32(m, L2TP_ATTR_CONN_ID, session->tunnel->tunnel_id);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_CONN_ID attribute: %m");
+
+ r = sd_netlink_message_append_u32(m, L2TP_ATTR_PEER_CONN_ID, session->tunnel->peer_tunnel_id);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_PEER_CONN_ID attribute: %m");
+
+ r = sd_netlink_message_append_u32(m, L2TP_ATTR_SESSION_ID, session->session_id);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_SESSION_ID attribute: %m");
+
+ r = sd_netlink_message_append_u32(m, L2TP_ATTR_PEER_SESSION_ID, session->peer_session_id);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_PEER_SESSION_ID attribute: %m");
+
+ r = sd_netlink_message_append_u16(m, L2TP_ATTR_PW_TYPE, L2TP_PWTYPE_ETH);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_PW_TYPE attribute: %m");
+
+ switch (session->l2tp_l2spec_type) {
+ case NETDEV_L2TP_L2SPECTYPE_NONE:
+ l2_spec_type = L2TP_L2SPECTYPE_NONE;
+ l2_spec_len = 0;
+ break;
+ case NETDEV_L2TP_L2SPECTYPE_DEFAULT:
+ default:
+ l2_spec_type = L2TP_L2SPECTYPE_DEFAULT;
+ l2_spec_len = 4;
+ break;
+ }
+
+ r = sd_netlink_message_append_u8(m, L2TP_ATTR_L2SPEC_TYPE, l2_spec_type);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_L2SPEC_TYPE attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, L2TP_ATTR_L2SPEC_LEN, l2_spec_len);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_L2SPEC_LEN attribute: %m");
+
+ r = sd_netlink_message_append_string(m, L2TP_ATTR_IFNAME, session->name);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append L2TP_ATTR_IFNAME attribute: %m");
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
+
+static int l2tp_acquire_local_address_one(L2tpTunnel *t, Address *a, union in_addr_union *ret) {
+ if (a->family != t->family)
+ return -EINVAL;
+
+ if (in_addr_is_null(a->family, &a->in_addr_peer) <= 0)
+ return -EINVAL;
+
+ if (t->local_address_type == NETDEV_L2TP_LOCAL_ADDRESS_STATIC &&
+ !FLAGS_SET(a->flags, IFA_F_PERMANENT))
+ return -EINVAL;
+
+ if (t->local_address_type == NETDEV_L2TP_LOCAL_ADDRESS_DYNAMIC &&
+ FLAGS_SET(a->flags, IFA_F_PERMANENT))
+ return -EINVAL;
+
+ *ret = a->in_addr;
+ return 0;
+}
+
+static int l2tp_acquire_local_address(L2tpTunnel *t, Link *link, union in_addr_union *ret) {
+ Address *a;
+
+ assert(t);
+ assert(link);
+ assert(ret);
+ assert(IN_SET(t->family, AF_INET, AF_INET6));
+
+ if (!in_addr_is_null(t->family, &t->local)) {
+ /* local address is explicitly specified. */
+ *ret = t->local;
+ return 0;
+ }
+
+ SET_FOREACH(a, link->addresses)
+ if (l2tp_acquire_local_address_one(t, a, ret) >= 0)
+ return 1;
+
+ SET_FOREACH(a, link->addresses_foreign)
+ if (l2tp_acquire_local_address_one(t, a, ret) >= 0)
+ return 1;
+
+ return -ENODATA;
+}
+
+static void l2tp_session_destroy_callback(L2tpSession *session) {
+ if (!session)
+ return;
+
+ netdev_unref(NETDEV(session->tunnel));
+}
+
+static int l2tp_create_session_handler(sd_netlink *rtnl, sd_netlink_message *m, L2tpSession *session) {
+ NetDev *netdev;
+ int r;
+
+ assert(session);
+ assert(session->tunnel);
+
+ netdev = NETDEV(session->tunnel);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r == -EEXIST)
+ log_netdev_info(netdev, "L2TP session %s exists, using existing without changing its parameters",
+ session->name);
+ else if (r < 0) {
+ log_netdev_warning_errno(netdev, r, "L2TP session %s could not be created: %m", session->name);
+ return 1;
+ }
+
+ log_netdev_debug(netdev, "L2TP session %s created", session->name);
+ return 1;
+}
+
+static int l2tp_create_session(NetDev *netdev, L2tpSession *session) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *n = NULL;
+ int r;
+
+ r = netdev_l2tp_fill_message_session(netdev, session, &n);
+ if (r < 0)
+ return r;
+
+ r = netlink_call_async(netdev->manager->genl, NULL, n, l2tp_create_session_handler,
+ l2tp_session_destroy_callback, session);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Failed to create L2TP session %s: %m", session->name);
+
+ netdev_ref(netdev);
+ return 0;
+}
+
+static int l2tp_create_tunnel_handler(sd_netlink *rtnl, sd_netlink_message *m, NetDev *netdev) {
+ L2tpSession *session;
+ L2tpTunnel *t;
+ int r;
+
+ assert(netdev);
+ assert(netdev->state != _NETDEV_STATE_INVALID);
+
+ t = L2TP(netdev);
+
+ assert(t);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r == -EEXIST)
+ log_netdev_info(netdev, "netdev exists, using existing without changing its parameters");
+ else if (r < 0) {
+ log_netdev_warning_errno(netdev, r, "netdev could not be created: %m");
+ netdev_drop(netdev);
+
+ return 1;
+ }
+
+ log_netdev_debug(netdev, "L2TP tunnel is created");
+
+ ORDERED_HASHMAP_FOREACH(session, t->sessions_by_section)
+ (void) l2tp_create_session(netdev, session);
+
+ return 1;
+}
+
+static int l2tp_create_tunnel(NetDev *netdev, Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ union in_addr_union local_address;
+ L2tpTunnel *t;
+ int r;
+
+ assert(netdev);
+
+ t = L2TP(netdev);
+
+ assert(t);
+
+ r = l2tp_acquire_local_address(t, link, &local_address);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not find local address.");
+
+ if (r > 0 && DEBUG_LOGGING) {
+ _cleanup_free_ char *str = NULL;
+
+ (void) in_addr_to_string(t->family, &local_address, &str);
+ log_netdev_debug(netdev, "Local address %s acquired.", strna(str));
+ }
+
+ r = netdev_l2tp_fill_message_tunnel(netdev, &local_address, &m);
+ if (r < 0)
+ return r;
+
+ r = netlink_call_async(netdev->manager->genl, NULL, m, l2tp_create_tunnel_handler,
+ netdev_destroy_callback, netdev);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Failed to create L2TP tunnel: %m");
+
+ netdev_ref(netdev);
+
+ return 0;
+}
+
+int config_parse_l2tp_tunnel_address(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ L2tpTunnel *t = userdata;
+ union in_addr_union *addr = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(lvalue, "Local")) {
+ L2tpLocalAddressType addr_type;
+
+ if (isempty(rvalue))
+ addr_type = NETDEV_L2TP_LOCAL_ADDRESS_AUTO;
+ else
+ addr_type = l2tp_local_address_type_from_string(rvalue);
+
+ if (addr_type >= 0) {
+ if (in_addr_is_null(t->family, &t->remote) != 0)
+ /* If Remote= is not specified yet, then also clear family. */
+ t->family = AF_UNSPEC;
+
+ t->local = IN_ADDR_NULL;
+ t->local_address_type = addr_type;
+
+ return 0;
+ }
+ }
+
+ if (t->family == AF_UNSPEC)
+ r = in_addr_from_string_auto(rvalue, &t->family, addr);
+ else
+ r = in_addr_from_string(t->family, rvalue, addr);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid L2TP Tunnel address specified in %s='%s', ignoring assignment: %m", lvalue, rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_l2tp_tunnel_id(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint32_t *id = data, k;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou32(rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse L2TP tunnel id. Ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ if (k == 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid L2TP tunnel id. Ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ *id = k;
+
+ return 0;
+}
+
+int config_parse_l2tp_session_id(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(l2tp_session_free_or_set_invalidp) L2tpSession *session = NULL;
+ L2tpTunnel *t = userdata;
+ uint32_t k;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = l2tp_session_new_static(t, filename, section_line, &session);
+ if (r < 0)
+ return log_oom();
+
+ r = safe_atou32(rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse L2TP session id. Ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ if (k == 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid L2TP session id. Ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "SessionId"))
+ session->session_id = k;
+ else
+ session->peer_session_id = k;
+
+ session = NULL;
+ return 0;
+}
+
+int config_parse_l2tp_session_l2spec(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(l2tp_session_free_or_set_invalidp) L2tpSession *session = NULL;
+ L2tpTunnel *t = userdata;
+ L2tpL2specType spec;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = l2tp_session_new_static(t, filename, section_line, &session);
+ if (r < 0)
+ return log_oom();
+
+ spec = l2tp_l2spec_type_from_string(rvalue);
+ if (spec < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to parse layer2 specific header type. Ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ session->l2tp_l2spec_type = spec;
+
+ session = NULL;
+ return 0;
+}
+
+int config_parse_l2tp_session_name(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(l2tp_session_free_or_set_invalidp) L2tpSession *session = NULL;
+ L2tpTunnel *t = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = l2tp_session_new_static(t, filename, section_line, &session);
+ if (r < 0)
+ return log_oom();
+
+ if (!ifname_valid(rvalue)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to parse L2TP tunnel session name. Ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ r = free_and_strdup(&session->name, rvalue);
+ if (r < 0)
+ return log_oom();
+
+ session = NULL;
+ return 0;
+}
+
+static void l2tp_tunnel_init(NetDev *netdev) {
+ L2tpTunnel *t;
+
+ assert(netdev);
+
+ t = L2TP(netdev);
+
+ assert(t);
+
+ t->l2tp_encap_type = NETDEV_L2TP_ENCAPTYPE_UDP;
+ t->udp6_csum_rx = true;
+ t->udp6_csum_tx = true;
+}
+
+static int l2tp_session_verify(L2tpSession *session) {
+ NetDev *netdev;
+
+ assert(session);
+ assert(session->tunnel);
+
+ netdev = NETDEV(session->tunnel);
+
+ if (section_is_invalid(session->section))
+ return -EINVAL;
+
+ if (!session->name)
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: L2TP session without name configured. "
+ "Ignoring [L2TPSession] section from line %u",
+ session->section->filename, session->section->line);
+
+ if (session->session_id == 0 || session->peer_session_id == 0)
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: L2TP session without session IDs configured. "
+ "Ignoring [L2TPSession] section from line %u",
+ session->section->filename, session->section->line);
+
+ return 0;
+}
+
+static int netdev_l2tp_tunnel_verify(NetDev *netdev, const char *filename) {
+ L2tpTunnel *t;
+ L2tpSession *session;
+
+ assert(netdev);
+ assert(filename);
+
+ t = L2TP(netdev);
+
+ assert(t);
+
+ if (!IN_SET(t->family, AF_INET, AF_INET6))
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: L2TP tunnel with invalid address family configured. Ignoring",
+ filename);
+
+ if (in_addr_is_null(t->family, &t->remote))
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: L2TP tunnel without a remote address configured. Ignoring",
+ filename);
+
+ if (t->tunnel_id == 0 || t->peer_tunnel_id == 0)
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: L2TP tunnel without tunnel IDs configured. Ignoring",
+ filename);
+
+ ORDERED_HASHMAP_FOREACH(session, t->sessions_by_section)
+ if (l2tp_session_verify(session) < 0)
+ l2tp_session_free(session);
+
+ return 0;
+}
+
+static void l2tp_tunnel_done(NetDev *netdev) {
+ L2tpTunnel *t;
+
+ assert(netdev);
+
+ t = L2TP(netdev);
+
+ assert(t);
+
+ ordered_hashmap_free_with_destructor(t->sessions_by_section, l2tp_session_free);
+}
+
+const NetDevVTable l2tptnl_vtable = {
+ .object_size = sizeof(L2tpTunnel),
+ .init = l2tp_tunnel_init,
+ .sections = NETDEV_COMMON_SECTIONS "L2TP\0L2TPSession\0",
+ .create_after_configured = l2tp_create_tunnel,
+ .done = l2tp_tunnel_done,
+ .create_type = NETDEV_CREATE_AFTER_CONFIGURED,
+ .config_verify = netdev_l2tp_tunnel_verify,
+};
diff --git a/src/network/netdev/l2tp-tunnel.h b/src/network/netdev/l2tp-tunnel.h
new file mode 100644
index 0000000..048318d
--- /dev/null
+++ b/src/network/netdev/l2tp-tunnel.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <netinet/in.h>
+#include <linux/l2tp.h>
+
+#include "in-addr-util.h"
+#include "netdev.h"
+#include "networkd-util.h"
+
+typedef enum L2tpL2specType {
+ NETDEV_L2TP_L2SPECTYPE_NONE = L2TP_L2SPECTYPE_NONE,
+ NETDEV_L2TP_L2SPECTYPE_DEFAULT = L2TP_L2SPECTYPE_DEFAULT,
+ _NETDEV_L2TP_L2SPECTYPE_MAX,
+ _NETDEV_L2TP_L2SPECTYPE_INVALID = -1,
+} L2tpL2specType;
+
+typedef enum L2tpEncapType {
+ NETDEV_L2TP_ENCAPTYPE_UDP = L2TP_ENCAPTYPE_UDP,
+ NETDEV_L2TP_ENCAPTYPE_IP = L2TP_ENCAPTYPE_IP,
+ _NETDEV_L2TP_ENCAPTYPE_MAX,
+ _NETDEV_L2TP_ENCAPTYPE_INVALID = -1,
+} L2tpEncapType;
+
+typedef enum L2tpLocalAddressType {
+ NETDEV_L2TP_LOCAL_ADDRESS_AUTO,
+ NETDEV_L2TP_LOCAL_ADDRESS_STATIC,
+ NETDEV_L2TP_LOCAL_ADDRESS_DYNAMIC,
+ _NETDEV_L2TP_LOCAL_ADDRESS_MAX,
+ _NETDEV_L2TP_LOCAL_ADDRESS_INVALID = -1,
+} L2tpLocalAddressType;
+
+typedef struct L2tpTunnel L2tpTunnel;
+
+typedef struct L2tpSession {
+ L2tpTunnel *tunnel;
+ NetworkConfigSection *section;
+
+ char *name;
+
+ uint32_t session_id;
+ uint32_t peer_session_id;
+ L2tpL2specType l2tp_l2spec_type;
+} L2tpSession;
+
+struct L2tpTunnel {
+ NetDev meta;
+
+ uint16_t l2tp_udp_sport;
+ uint16_t l2tp_udp_dport;
+
+ uint32_t tunnel_id;
+ uint32_t peer_tunnel_id;
+
+ int family;
+
+ bool udp_csum;
+ bool udp6_csum_rx;
+ bool udp6_csum_tx;
+
+ L2tpLocalAddressType local_address_type;
+ union in_addr_union local;
+ union in_addr_union remote;
+
+ L2tpEncapType l2tp_encap_type;
+
+ OrderedHashmap *sessions_by_section;
+};
+
+DEFINE_NETDEV_CAST(L2TP, L2tpTunnel);
+extern const NetDevVTable l2tptnl_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_l2tp_tunnel_address);
+CONFIG_PARSER_PROTOTYPE(config_parse_l2tp_tunnel_id);
+CONFIG_PARSER_PROTOTYPE(config_parse_l2tp_encap_type);
+CONFIG_PARSER_PROTOTYPE(config_parse_l2tp_session_l2spec);
+CONFIG_PARSER_PROTOTYPE(config_parse_l2tp_session_id);
+CONFIG_PARSER_PROTOTYPE(config_parse_l2tp_session_name);
diff --git a/src/network/netdev/macsec.c b/src/network/netdev/macsec.c
new file mode 100644
index 0000000..82e71c3
--- /dev/null
+++ b/src/network/netdev/macsec.c
@@ -0,0 +1,1252 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_macsec.h>
+#include <linux/genetlink.h>
+
+#include "conf-parser.h"
+#include "fileio.h"
+#include "hashmap.h"
+#include "hexdecoct.h"
+#include "macsec.h"
+#include "memory-util.h"
+#include "netlink-util.h"
+#include "network-internal.h"
+#include "networkd-manager.h"
+#include "path-util.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "util.h"
+
+static void security_association_clear(SecurityAssociation *sa) {
+ if (!sa)
+ return;
+
+ explicit_bzero_safe(sa->key, sa->key_len);
+ free(sa->key);
+ free(sa->key_file);
+}
+
+static void security_association_init(SecurityAssociation *sa) {
+ assert(sa);
+
+ sa->activate = -1;
+ sa->use_for_encoding = -1;
+}
+
+static void macsec_receive_association_free(ReceiveAssociation *c) {
+ if (!c)
+ return;
+
+ if (c->macsec && c->section)
+ ordered_hashmap_remove(c->macsec->receive_associations_by_section, c->section);
+
+ network_config_section_free(c->section);
+ security_association_clear(&c->sa);
+
+ free(c);
+}
+
+DEFINE_NETWORK_SECTION_FUNCTIONS(ReceiveAssociation, macsec_receive_association_free);
+
+static int macsec_receive_association_new_static(MACsec *s, const char *filename, unsigned section_line, ReceiveAssociation **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(macsec_receive_association_freep) ReceiveAssociation *c = NULL;
+ int r;
+
+ assert(s);
+ assert(ret);
+ assert(filename);
+ assert(section_line > 0);
+
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ c = ordered_hashmap_get(s->receive_associations_by_section, n);
+ if (c) {
+ *ret = TAKE_PTR(c);
+ return 0;
+ }
+
+ c = new(ReceiveAssociation, 1);
+ if (!c)
+ return -ENOMEM;
+
+ *c = (ReceiveAssociation) {
+ .macsec = s,
+ .section = TAKE_PTR(n),
+ };
+
+ security_association_init(&c->sa);
+
+ r = ordered_hashmap_ensure_allocated(&s->receive_associations_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = ordered_hashmap_put(s->receive_associations_by_section, c->section, c);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(c);
+
+ return 0;
+}
+
+static void macsec_receive_channel_free(ReceiveChannel *c) {
+ if (!c)
+ return;
+
+ if (c->macsec) {
+ if (c->sci.as_uint64 > 0)
+ ordered_hashmap_remove_value(c->macsec->receive_channels, &c->sci.as_uint64, c);
+
+ if (c->section)
+ ordered_hashmap_remove(c->macsec->receive_channels_by_section, c->section);
+ }
+
+ network_config_section_free(c->section);
+
+ free(c);
+}
+
+DEFINE_NETWORK_SECTION_FUNCTIONS(ReceiveChannel, macsec_receive_channel_free);
+
+static int macsec_receive_channel_new(MACsec *s, uint64_t sci, ReceiveChannel **ret) {
+ ReceiveChannel *c;
+
+ assert(s);
+
+ c = new(ReceiveChannel, 1);
+ if (!c)
+ return -ENOMEM;
+
+ *c = (ReceiveChannel) {
+ .macsec = s,
+ .sci.as_uint64 = sci,
+ };
+
+ *ret = c;
+ return 0;
+}
+
+static int macsec_receive_channel_new_static(MACsec *s, const char *filename, unsigned section_line, ReceiveChannel **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(macsec_receive_channel_freep) ReceiveChannel *c = NULL;
+ int r;
+
+ assert(s);
+ assert(ret);
+ assert(filename);
+ assert(section_line > 0);
+
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ c = ordered_hashmap_get(s->receive_channels_by_section, n);
+ if (c) {
+ *ret = TAKE_PTR(c);
+ return 0;
+ }
+
+ r = macsec_receive_channel_new(s, 0, &c);
+ if (r < 0)
+ return r;
+
+ c->section = TAKE_PTR(n);
+
+ r = ordered_hashmap_ensure_allocated(&s->receive_channels_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = ordered_hashmap_put(s->receive_channels_by_section, c->section, c);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(c);
+
+ return 0;
+}
+
+static void macsec_transmit_association_free(TransmitAssociation *a) {
+ if (!a)
+ return;
+
+ if (a->macsec && a->section)
+ ordered_hashmap_remove(a->macsec->transmit_associations_by_section, a->section);
+
+ network_config_section_free(a->section);
+ security_association_clear(&a->sa);
+
+ free(a);
+}
+
+DEFINE_NETWORK_SECTION_FUNCTIONS(TransmitAssociation, macsec_transmit_association_free);
+
+static int macsec_transmit_association_new_static(MACsec *s, const char *filename, unsigned section_line, TransmitAssociation **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(macsec_transmit_association_freep) TransmitAssociation *a = NULL;
+ int r;
+
+ assert(s);
+ assert(ret);
+ assert(filename);
+ assert(section_line > 0);
+
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ a = ordered_hashmap_get(s->transmit_associations_by_section, n);
+ if (a) {
+ *ret = TAKE_PTR(a);
+ return 0;
+ }
+
+ a = new(TransmitAssociation, 1);
+ if (!a)
+ return -ENOMEM;
+
+ *a = (TransmitAssociation) {
+ .macsec = s,
+ .section = TAKE_PTR(n),
+ };
+
+ security_association_init(&a->sa);
+
+ r = ordered_hashmap_ensure_allocated(&s->transmit_associations_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = ordered_hashmap_put(s->transmit_associations_by_section, a->section, a);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(a);
+
+ return 0;
+}
+
+static int netdev_macsec_fill_message(NetDev *netdev, int command, sd_netlink_message **ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ assert(netdev);
+ assert(netdev->ifindex > 0);
+
+ r = sd_genl_message_new(netdev->manager->genl, SD_GENL_MACSEC, command, &m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Failed to create generic netlink message: %m");
+
+ r = sd_netlink_message_append_u32(m, MACSEC_ATTR_IFINDEX, netdev->ifindex);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append MACSEC_ATTR_IFINDEX attribute: %m");
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
+
+static int netdev_macsec_fill_message_sci(NetDev *netdev, MACsecSCI *sci, sd_netlink_message *m) {
+ int r;
+
+ assert(netdev);
+ assert(m);
+ assert(sci);
+
+ r = sd_netlink_message_open_container(m, MACSEC_ATTR_RXSC_CONFIG);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append MACSEC_ATTR_RXSC_CONFIG attribute: %m");
+
+ r = sd_netlink_message_append_u64(m, MACSEC_RXSC_ATTR_SCI, sci->as_uint64);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append MACSEC_RXSC_ATTR_SCI attribute: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append MACSEC_ATTR_RXSC_CONFIG attribute: %m");
+
+ return 0;
+}
+
+static int netdev_macsec_fill_message_sa(NetDev *netdev, SecurityAssociation *a, sd_netlink_message *m) {
+ int r;
+
+ assert(netdev);
+ assert(a);
+ assert(m);
+
+ r = sd_netlink_message_open_container(m, MACSEC_ATTR_SA_CONFIG);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append MACSEC_ATTR_SA_CONFIG attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, MACSEC_SA_ATTR_AN, a->association_number);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append MACSEC_SA_ATTR_AN attribute: %m");
+
+ if (a->packet_number > 0) {
+ r = sd_netlink_message_append_u32(m, MACSEC_SA_ATTR_PN, a->packet_number);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append MACSEC_SA_ATTR_PN attribute: %m");
+ }
+
+ if (a->key_len > 0) {
+ r = sd_netlink_message_append_data(m, MACSEC_SA_ATTR_KEYID, a->key_id, MACSEC_KEYID_LEN);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append MACSEC_SA_ATTR_KEYID attribute: %m");
+
+ r = sd_netlink_message_append_data(m, MACSEC_SA_ATTR_KEY, a->key, a->key_len);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append MACSEC_SA_ATTR_KEY attribute: %m");
+ }
+
+ if (a->activate >= 0) {
+ r = sd_netlink_message_append_u8(m, MACSEC_SA_ATTR_ACTIVE, a->activate);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append MACSEC_SA_ATTR_ACTIVE attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append MACSEC_ATTR_SA_CONFIG attribute: %m");
+
+ return 0;
+}
+
+static int macsec_receive_association_handler(sd_netlink *rtnl, sd_netlink_message *m, NetDev *netdev) {
+ int r;
+
+ assert(netdev);
+ assert(netdev->state != _NETDEV_STATE_INVALID);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r == -EEXIST)
+ log_netdev_info(netdev,
+ "MACsec receive secure association exists, "
+ "using existing without changing its parameters");
+ else if (r < 0) {
+ log_netdev_warning_errno(netdev, r,
+ "Failed to add receive secure association: %m");
+ netdev_drop(netdev);
+
+ return 1;
+ }
+
+ log_netdev_debug(netdev, "Receive secure association is configured");
+
+ return 1;
+}
+
+static int netdev_macsec_configure_receive_association(NetDev *netdev, ReceiveAssociation *a) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ assert(netdev);
+ assert(a);
+
+ r = netdev_macsec_fill_message(netdev, MACSEC_CMD_ADD_RXSA, &m);
+ if (r < 0)
+ return r;
+
+ r = netdev_macsec_fill_message_sa(netdev, &a->sa, m);
+ if (r < 0)
+ return r;
+
+ r = netdev_macsec_fill_message_sci(netdev, &a->sci, m);
+ if (r < 0)
+ return r;
+
+ r = netlink_call_async(netdev->manager->genl, NULL, m, macsec_receive_association_handler,
+ netdev_destroy_callback, netdev);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Failed to configure receive secure association: %m");
+
+ netdev_ref(netdev);
+
+ return 0;
+}
+
+static int macsec_receive_channel_handler(sd_netlink *rtnl, sd_netlink_message *m, ReceiveChannel *c) {
+ NetDev *netdev;
+ unsigned i;
+ int r;
+
+ assert(c);
+ assert(c->macsec);
+
+ netdev = NETDEV(c->macsec);
+
+ assert(netdev->state != _NETDEV_STATE_INVALID);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r == -EEXIST)
+ log_netdev_debug(netdev,
+ "MACsec receive channel exists, "
+ "using existing without changing its parameters");
+ else if (r < 0) {
+ log_netdev_warning_errno(netdev, r,
+ "Failed to add receive secure channel: %m");
+ netdev_drop(netdev);
+
+ return 1;
+ }
+
+ log_netdev_debug(netdev, "Receive channel is configured");
+
+ for (i = 0; i < c->n_rxsa; i++) {
+ r = netdev_macsec_configure_receive_association(netdev, c->rxsa[i]);
+ if (r < 0) {
+ log_netdev_warning_errno(netdev, r,
+ "Failed to configure receive security association: %m");
+ netdev_drop(netdev);
+ return 1;
+ }
+ }
+
+ return 1;
+}
+
+static void receive_channel_destroy_callback(ReceiveChannel *c) {
+ assert(c);
+ assert(c->macsec);
+
+ netdev_unref(NETDEV(c->macsec));
+}
+
+static int netdev_macsec_configure_receive_channel(NetDev *netdev, ReceiveChannel *c) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ assert(netdev);
+ assert(c);
+
+ r = netdev_macsec_fill_message(netdev, MACSEC_CMD_ADD_RXSC, &m);
+ if (r < 0)
+ return r;
+
+ r = netdev_macsec_fill_message_sci(netdev, &c->sci, m);
+ if (r < 0)
+ return r;
+
+ r = netlink_call_async(netdev->manager->genl, NULL, m, macsec_receive_channel_handler,
+ receive_channel_destroy_callback, c);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Failed to configure receive channel: %m");
+
+ netdev_ref(netdev);
+
+ return 0;
+}
+
+static int macsec_transmit_association_handler(sd_netlink *rtnl, sd_netlink_message *m, NetDev *netdev) {
+ int r;
+
+ assert(netdev);
+ assert(netdev->state != _NETDEV_STATE_INVALID);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r == -EEXIST)
+ log_netdev_info(netdev,
+ "MACsec transmit secure association exists, "
+ "using existing without changing its parameters");
+ else if (r < 0) {
+ log_netdev_warning_errno(netdev, r,
+ "Failed to add transmit secure association: %m");
+ netdev_drop(netdev);
+
+ return 1;
+ }
+
+ log_netdev_debug(netdev, "Transmit secure association is configured");
+
+ return 1;
+}
+
+static int netdev_macsec_configure_transmit_association(NetDev *netdev, TransmitAssociation *a) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ assert(netdev);
+ assert(a);
+
+ r = netdev_macsec_fill_message(netdev, MACSEC_CMD_ADD_TXSA, &m);
+ if (r < 0)
+ return r;
+
+ r = netdev_macsec_fill_message_sa(netdev, &a->sa, m);
+ if (r < 0)
+ return r;
+
+ r = netlink_call_async(netdev->manager->genl, NULL, m, macsec_transmit_association_handler,
+ netdev_destroy_callback, netdev);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Failed to configure transmit secure association: %m");
+
+ netdev_ref(netdev);
+
+ return 0;
+}
+
+static int netdev_macsec_configure(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ TransmitAssociation *a;
+ ReceiveChannel *c;
+ MACsec *s;
+ int r;
+
+ assert(netdev);
+ s = MACSEC(netdev);
+ assert(s);
+
+ ORDERED_HASHMAP_FOREACH(a, s->transmit_associations_by_section) {
+ r = netdev_macsec_configure_transmit_association(netdev, a);
+ if (r < 0)
+ return r;
+ }
+
+ ORDERED_HASHMAP_FOREACH(c, s->receive_channels) {
+ r = netdev_macsec_configure_receive_channel(netdev, c);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int netdev_macsec_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ MACsec *v;
+ int r;
+
+ assert(netdev);
+ assert(m);
+
+ v = MACSEC(netdev);
+
+ if (v->port > 0) {
+ r = sd_netlink_message_append_u16(m, IFLA_MACSEC_PORT, v->port);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_MACSEC_PORT attribute: %m");
+ }
+
+ if (v->encrypt >= 0) {
+ r = sd_netlink_message_append_u8(m, IFLA_MACSEC_ENCRYPT, v->encrypt);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_MACSEC_ENCRYPT attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u8(m, IFLA_MACSEC_ENCODING_SA, v->encoding_an);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_MACSEC_ENCODING_SA attribute: %m");
+
+ return r;
+}
+
+int config_parse_macsec_port(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(macsec_receive_association_free_or_set_invalidp) ReceiveAssociation *b = NULL;
+ _cleanup_(macsec_receive_channel_free_or_set_invalidp) ReceiveChannel *c = NULL;
+ MACsec *s = userdata;
+ uint16_t port;
+ void *dest;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* This parses port used to make Secure Channel Identifier (SCI) */
+
+ if (streq(section, "MACsec"))
+ dest = &s->port;
+ else if (streq(section, "MACsecReceiveChannel")) {
+ r = macsec_receive_channel_new_static(s, filename, section_line, &c);
+ if (r < 0)
+ return log_oom();
+
+ dest = &c->sci.port;
+ } else {
+ assert(streq(section, "MACsecReceiveAssociation"));
+
+ r = macsec_receive_association_new_static(s, filename, section_line, &b);
+ if (r < 0)
+ return log_oom();
+
+ dest = &b->sci.port;
+ }
+
+ r = parse_ip_port(rvalue, &port);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse port '%s' for secure channel identifier. Ignoring assignment: %m",
+ rvalue);
+ return 0;
+ }
+
+ unaligned_write_be16(dest, port);
+
+ TAKE_PTR(b);
+ TAKE_PTR(c);
+
+ return 0;
+}
+
+int config_parse_macsec_hw_address(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(macsec_receive_association_free_or_set_invalidp) ReceiveAssociation *b = NULL;
+ _cleanup_(macsec_receive_channel_free_or_set_invalidp) ReceiveChannel *c = NULL;
+ MACsec *s = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(section, "MACsecReceiveChannel"))
+ r = macsec_receive_channel_new_static(s, filename, section_line, &c);
+ else
+ r = macsec_receive_association_new_static(s, filename, section_line, &b);
+ if (r < 0)
+ return log_oom();
+
+ r = ether_addr_from_string(rvalue, b ? &b->sci.mac : &c->sci.mac);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse MAC address for secure channel identifier. "
+ "Ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ TAKE_PTR(b);
+ TAKE_PTR(c);
+
+ return 0;
+}
+
+int config_parse_macsec_packet_number(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(macsec_transmit_association_free_or_set_invalidp) TransmitAssociation *a = NULL;
+ _cleanup_(macsec_receive_association_free_or_set_invalidp) ReceiveAssociation *b = NULL;
+ MACsec *s = userdata;
+ uint32_t val, *dest;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(section, "MACsecTransmitAssociation"))
+ r = macsec_transmit_association_new_static(s, filename, section_line, &a);
+ else
+ r = macsec_receive_association_new_static(s, filename, section_line, &b);
+ if (r < 0)
+ return log_oom();
+
+ dest = a ? &a->sa.packet_number : &b->sa.packet_number;
+
+ r = safe_atou32(rvalue, &val);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse packet number. Ignoring assignment: %s", rvalue);
+ return 0;
+ }
+ if (streq(section, "MACsecTransmitAssociation") && val == 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid packet number. Ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ *dest = val;
+ TAKE_PTR(a);
+ TAKE_PTR(b);
+
+ return 0;
+}
+
+int config_parse_macsec_key(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(macsec_transmit_association_free_or_set_invalidp) TransmitAssociation *a = NULL;
+ _cleanup_(macsec_receive_association_free_or_set_invalidp) ReceiveAssociation *b = NULL;
+ _cleanup_(erase_and_freep) void *p = NULL;
+ MACsec *s = userdata;
+ SecurityAssociation *dest;
+ size_t l;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ (void) warn_file_is_world_accessible(filename, NULL, unit, line);
+
+ if (streq(section, "MACsecTransmitAssociation"))
+ r = macsec_transmit_association_new_static(s, filename, section_line, &a);
+ else
+ r = macsec_receive_association_new_static(s, filename, section_line, &b);
+ if (r < 0)
+ return log_oom();
+
+ dest = a ? &a->sa : &b->sa;
+
+ r = unhexmem_full(rvalue, strlen(rvalue), true, &p, &l);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse key. Ignoring assignment: %m");
+ return 0;
+ }
+
+ if (l != 16) {
+ /* See DEFAULT_SAK_LEN in drivers/net/macsec.c */
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid key length (%zu). Ignoring assignment", l);
+ return 0;
+ }
+
+ explicit_bzero_safe(dest->key, dest->key_len);
+ free_and_replace(dest->key, p);
+ dest->key_len = l;
+
+ TAKE_PTR(a);
+ TAKE_PTR(b);
+
+ return 0;
+}
+
+int config_parse_macsec_key_file(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(macsec_transmit_association_free_or_set_invalidp) TransmitAssociation *a = NULL;
+ _cleanup_(macsec_receive_association_free_or_set_invalidp) ReceiveAssociation *b = NULL;
+ _cleanup_free_ char *path = NULL;
+ MACsec *s = userdata;
+ char **dest;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(section, "MACsecTransmitAssociation"))
+ r = macsec_transmit_association_new_static(s, filename, section_line, &a);
+ else
+ r = macsec_receive_association_new_static(s, filename, section_line, &b);
+ if (r < 0)
+ return log_oom();
+
+ dest = a ? &a->sa.key_file : &b->sa.key_file;
+
+ if (isempty(rvalue)) {
+ *dest = mfree(*dest);
+ return 0;
+ }
+
+ path = strdup(rvalue);
+ if (!path)
+ return log_oom();
+
+ if (path_simplify_and_warn(path, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue) < 0)
+ return 0;
+
+ free_and_replace(*dest, path);
+ TAKE_PTR(a);
+ TAKE_PTR(b);
+
+ return 0;
+}
+
+int config_parse_macsec_key_id(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(macsec_transmit_association_free_or_set_invalidp) TransmitAssociation *a = NULL;
+ _cleanup_(macsec_receive_association_free_or_set_invalidp) ReceiveAssociation *b = NULL;
+ _cleanup_free_ void *p = NULL;
+ MACsec *s = userdata;
+ uint8_t *dest;
+ size_t l;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(section, "MACsecTransmitAssociation"))
+ r = macsec_transmit_association_new_static(s, filename, section_line, &a);
+ else
+ r = macsec_receive_association_new_static(s, filename, section_line, &b);
+ if (r < 0)
+ return log_oom();
+
+ r = unhexmem(rvalue, strlen(rvalue), &p, &l);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse KeyId=%s, ignoring assignment: %m", rvalue);
+ return 0;
+ }
+ if (l > MACSEC_KEYID_LEN) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Specified KeyId= is larger then the allowed maximum (%zu > %u), ignoring: %s",
+ l, MACSEC_KEYID_LEN, rvalue);
+ return 0;
+ }
+
+ dest = a ? a->sa.key_id : b->sa.key_id;
+ memcpy_safe(dest, p, l);
+ memzero(dest + l, MACSEC_KEYID_LEN - l);
+
+ TAKE_PTR(a);
+ TAKE_PTR(b);
+
+ return 0;
+}
+
+int config_parse_macsec_sa_activate(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(macsec_transmit_association_free_or_set_invalidp) TransmitAssociation *a = NULL;
+ _cleanup_(macsec_receive_association_free_or_set_invalidp) ReceiveAssociation *b = NULL;
+ MACsec *s = userdata;
+ int *dest;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(section, "MACsecTransmitAssociation"))
+ r = macsec_transmit_association_new_static(s, filename, section_line, &a);
+ else
+ r = macsec_receive_association_new_static(s, filename, section_line, &b);
+ if (r < 0)
+ return log_oom();
+
+ dest = a ? &a->sa.activate : &b->sa.activate;
+
+ if (isempty(rvalue))
+ r = -1;
+ else {
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse activation mode of %s security association. "
+ "Ignoring assignment: %s",
+ streq(section, "MACsecTransmitAssociation") ? "transmit" : "receive",
+ rvalue);
+ return 0;
+ }
+ }
+
+ *dest = r;
+ TAKE_PTR(a);
+ TAKE_PTR(b);
+
+ return 0;
+}
+
+int config_parse_macsec_use_for_encoding(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(macsec_transmit_association_free_or_set_invalidp) TransmitAssociation *a = NULL;
+ MACsec *s = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = macsec_transmit_association_new_static(s, filename, section_line, &a);
+ if (r < 0)
+ return log_oom();
+
+ if (isempty(rvalue)) {
+ a->sa.use_for_encoding = -1;
+ TAKE_PTR(a);
+ return 0;
+ }
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse %s= setting. Ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ a->sa.use_for_encoding = r;
+ if (a->sa.use_for_encoding > 0)
+ a->sa.activate = true;
+
+ TAKE_PTR(a);
+
+ return 0;
+}
+
+static int macsec_read_key_file(NetDev *netdev, SecurityAssociation *sa) {
+ _cleanup_(erase_and_freep) uint8_t *key = NULL;
+ size_t key_len;
+ int r;
+
+ assert(netdev);
+ assert(sa);
+
+ if (!sa->key_file)
+ return 0;
+
+ (void) warn_file_is_world_accessible(sa->key_file, NULL, NULL, 0);
+
+ r = read_full_file_full(
+ AT_FDCWD, sa->key_file,
+ READ_FULL_FILE_SECURE | READ_FULL_FILE_UNHEX | READ_FULL_FILE_WARN_WORLD_READABLE | READ_FULL_FILE_CONNECT_SOCKET,
+ NULL, (char **) &key, &key_len);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r,
+ "Failed to read key from '%s', ignoring: %m",
+ sa->key_file);
+
+ if (key_len != 16)
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "Invalid key length (%zu bytes), ignoring: %m", key_len);
+
+ explicit_bzero_safe(sa->key, sa->key_len);
+ free_and_replace(sa->key, key);
+ sa->key_len = key_len;
+
+ return 0;
+}
+
+static int macsec_receive_channel_verify(ReceiveChannel *c) {
+ NetDev *netdev;
+ int r;
+
+ assert(c);
+ assert(c->macsec);
+
+ netdev = NETDEV(c->macsec);
+
+ if (section_is_invalid(c->section))
+ return -EINVAL;
+
+ if (ether_addr_is_null(&c->sci.mac))
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: MACsec receive channel without MAC address configured. "
+ "Ignoring [MACsecReceiveChannel] section from line %u",
+ c->section->filename, c->section->line);
+
+ if (c->sci.port == 0)
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: MACsec receive channel without port configured. "
+ "Ignoring [MACsecReceiveChannel] section from line %u",
+ c->section->filename, c->section->line);
+
+ r = ordered_hashmap_ensure_allocated(&c->macsec->receive_channels, &uint64_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = ordered_hashmap_put(c->macsec->receive_channels, &c->sci.as_uint64, c);
+ if (r == -EEXIST)
+ return log_netdev_error_errno(netdev, r,
+ "%s: Multiple [MACsecReceiveChannel] sections have same SCI, "
+ "Ignoring [MACsecReceiveChannel] section from line %u",
+ c->section->filename, c->section->line);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r,
+ "%s: Failed to store [MACsecReceiveChannel] section at hashmap, "
+ "Ignoring [MACsecReceiveChannel] section from line %u",
+ c->section->filename, c->section->line);
+ return 0;
+}
+
+static int macsec_transmit_association_verify(TransmitAssociation *t) {
+ NetDev *netdev;
+ int r;
+
+ assert(t);
+ assert(t->macsec);
+
+ netdev = NETDEV(t->macsec);
+
+ if (section_is_invalid(t->section))
+ return -EINVAL;
+
+ if (t->sa.packet_number == 0)
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: MACsec transmit secure association without PacketNumber= configured. "
+ "Ignoring [MACsecTransmitAssociation] section from line %u",
+ t->section->filename, t->section->line);
+
+ r = macsec_read_key_file(netdev, &t->sa);
+ if (r < 0)
+ return r;
+
+ if (t->sa.key_len <= 0)
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: MACsec transmit secure association without key configured. "
+ "Ignoring [MACsecTransmitAssociation] section from line %u",
+ t->section->filename, t->section->line);
+
+ return 0;
+}
+
+static int macsec_receive_association_verify(ReceiveAssociation *a) {
+ ReceiveChannel *c;
+ NetDev *netdev;
+ int r;
+
+ assert(a);
+ assert(a->macsec);
+
+ netdev = NETDEV(a->macsec);
+
+ if (section_is_invalid(a->section))
+ return -EINVAL;
+
+ r = macsec_read_key_file(netdev, &a->sa);
+ if (r < 0)
+ return r;
+
+ if (a->sa.key_len <= 0)
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: MACsec receive secure association without key configured. "
+ "Ignoring [MACsecReceiveAssociation] section from line %u",
+ a->section->filename, a->section->line);
+
+ if (ether_addr_is_null(&a->sci.mac))
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: MACsec receive secure association without MAC address configured. "
+ "Ignoring [MACsecReceiveAssociation] section from line %u",
+ a->section->filename, a->section->line);
+
+ if (a->sci.port == 0)
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: MACsec receive secure association without port configured. "
+ "Ignoring [MACsecReceiveAssociation] section from line %u",
+ a->section->filename, a->section->line);
+
+ c = ordered_hashmap_get(a->macsec->receive_channels, &a->sci.as_uint64);
+ if (!c) {
+ _cleanup_(macsec_receive_channel_freep) ReceiveChannel *new_channel = NULL;
+
+ r = macsec_receive_channel_new(a->macsec, a->sci.as_uint64, &new_channel);
+ if (r < 0)
+ return log_oom();
+
+ r = ordered_hashmap_ensure_allocated(&a->macsec->receive_channels, &uint64_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = ordered_hashmap_put(a->macsec->receive_channels, &new_channel->sci.as_uint64, new_channel);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r,
+ "%s: Failed to store receive channel at hashmap, "
+ "Ignoring [MACsecReceiveAssociation] section from line %u",
+ a->section->filename, a->section->line);
+ c = TAKE_PTR(new_channel);
+ }
+ if (c->n_rxsa >= MACSEC_MAX_ASSOCIATION_NUMBER)
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(ERANGE),
+ "%s: Too many [MACsecReceiveAssociation] sections for the same receive channel, "
+ "Ignoring [MACsecReceiveAssociation] section from line %u",
+ a->section->filename, a->section->line);
+
+ a->sa.association_number = c->n_rxsa;
+ c->rxsa[c->n_rxsa++] = a;
+
+ return 0;
+}
+
+static int netdev_macsec_verify(NetDev *netdev, const char *filename) {
+ MACsec *v = MACSEC(netdev);
+ TransmitAssociation *a;
+ ReceiveAssociation *n;
+ ReceiveChannel *c;
+ uint8_t an, encoding_an;
+ bool use_for_encoding;
+ int r;
+
+ assert(netdev);
+ assert(v);
+ assert(filename);
+
+ ORDERED_HASHMAP_FOREACH(c, v->receive_channels_by_section) {
+ r = macsec_receive_channel_verify(c);
+ if (r < 0)
+ macsec_receive_channel_free(c);
+ }
+
+ an = 0;
+ use_for_encoding = false;
+ encoding_an = 0;
+ ORDERED_HASHMAP_FOREACH(a, v->transmit_associations_by_section) {
+ r = macsec_transmit_association_verify(a);
+ if (r < 0) {
+ macsec_transmit_association_free(a);
+ continue;
+ }
+
+ if (an >= MACSEC_MAX_ASSOCIATION_NUMBER) {
+ log_netdev_error(netdev,
+ "%s: Too many [MACsecTransmitAssociation] sections configured. "
+ "Ignoring [MACsecTransmitAssociation] section from line %u",
+ a->section->filename, a->section->line);
+ macsec_transmit_association_free(a);
+ continue;
+ }
+
+ a->sa.association_number = an++;
+
+ if (a->sa.use_for_encoding > 0) {
+ if (use_for_encoding) {
+ log_netdev_warning(netdev,
+ "%s: Multiple security associations are set to be used for transmit channel."
+ "Disabling UseForEncoding= in [MACsecTransmitAssociation] section from line %u",
+ a->section->filename, a->section->line);
+ a->sa.use_for_encoding = false;
+ } else {
+ encoding_an = a->sa.association_number;
+ use_for_encoding = true;
+ }
+ }
+ }
+
+ assert(encoding_an < MACSEC_MAX_ASSOCIATION_NUMBER);
+ v->encoding_an = encoding_an;
+
+ ORDERED_HASHMAP_FOREACH(n, v->receive_associations_by_section) {
+ r = macsec_receive_association_verify(n);
+ if (r < 0)
+ macsec_receive_association_free(n);
+ }
+
+ return 0;
+}
+
+static void macsec_init(NetDev *netdev) {
+ MACsec *v;
+
+ assert(netdev);
+
+ v = MACSEC(netdev);
+
+ assert(v);
+
+ v->encrypt = -1;
+}
+
+static void macsec_done(NetDev *netdev) {
+ MACsec *t;
+
+ assert(netdev);
+
+ t = MACSEC(netdev);
+
+ assert(t);
+
+ ordered_hashmap_free_with_destructor(t->receive_channels, macsec_receive_channel_free);
+ ordered_hashmap_free_with_destructor(t->receive_channels_by_section, macsec_receive_channel_free);
+ ordered_hashmap_free_with_destructor(t->transmit_associations_by_section, macsec_transmit_association_free);
+ ordered_hashmap_free_with_destructor(t->receive_associations_by_section, macsec_receive_association_free);
+}
+
+const NetDevVTable macsec_vtable = {
+ .object_size = sizeof(MACsec),
+ .init = macsec_init,
+ .sections = NETDEV_COMMON_SECTIONS "MACsec\0MACsecReceiveChannel\0MACsecTransmitAssociation\0MACsecReceiveAssociation\0",
+ .fill_message_create = netdev_macsec_fill_message_create,
+ .post_create = netdev_macsec_configure,
+ .done = macsec_done,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_macsec_verify,
+ .generate_mac = true,
+};
diff --git a/src/network/netdev/macsec.h b/src/network/netdev/macsec.h
new file mode 100644
index 0000000..4d88e49
--- /dev/null
+++ b/src/network/netdev/macsec.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <netinet/in.h>
+#include <linux/if_macsec.h>
+
+#include "ether-addr-util.h"
+#include "in-addr-util.h"
+#include "netdev.h"
+#include "networkd-util.h"
+#include "sparse-endian.h"
+
+/* See the definition of MACSEC_NUM_AN in kernel's drivers/net/macsec.c */
+#define MACSEC_MAX_ASSOCIATION_NUMBER 4
+
+typedef struct MACsec MACsec;
+
+typedef union MACsecSCI {
+ uint64_t as_uint64;
+
+ struct {
+ struct ether_addr mac;
+ be16_t port;
+ } _packed_;
+} MACsecSCI;
+
+assert_cc(sizeof(MACsecSCI) == sizeof(uint64_t));
+
+typedef struct SecurityAssociation {
+ uint8_t association_number;
+ uint32_t packet_number;
+ uint8_t key_id[MACSEC_KEYID_LEN];
+ uint8_t *key;
+ uint32_t key_len;
+ char *key_file;
+ int activate;
+ int use_for_encoding;
+} SecurityAssociation;
+
+typedef struct TransmitAssociation {
+ MACsec *macsec;
+ NetworkConfigSection *section;
+
+ SecurityAssociation sa;
+} TransmitAssociation;
+
+typedef struct ReceiveAssociation {
+ MACsec *macsec;
+ NetworkConfigSection *section;
+
+ MACsecSCI sci;
+ SecurityAssociation sa;
+} ReceiveAssociation;
+
+typedef struct ReceiveChannel {
+ MACsec *macsec;
+ NetworkConfigSection *section;
+
+ MACsecSCI sci;
+ ReceiveAssociation *rxsa[MACSEC_MAX_ASSOCIATION_NUMBER];
+ unsigned n_rxsa;
+} ReceiveChannel;
+
+struct MACsec {
+ NetDev meta;
+
+ uint16_t port;
+ int encrypt;
+ uint8_t encoding_an;
+
+ OrderedHashmap *receive_channels;
+ OrderedHashmap *receive_channels_by_section;
+ OrderedHashmap *transmit_associations_by_section;
+ OrderedHashmap *receive_associations_by_section;
+};
+
+DEFINE_NETDEV_CAST(MACSEC, MACsec);
+extern const NetDevVTable macsec_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_macsec_port);
+CONFIG_PARSER_PROTOTYPE(config_parse_macsec_hw_address);
+CONFIG_PARSER_PROTOTYPE(config_parse_macsec_packet_number);
+CONFIG_PARSER_PROTOTYPE(config_parse_macsec_key_id);
+CONFIG_PARSER_PROTOTYPE(config_parse_macsec_key);
+CONFIG_PARSER_PROTOTYPE(config_parse_macsec_key_file);
+CONFIG_PARSER_PROTOTYPE(config_parse_macsec_sa_activate);
+CONFIG_PARSER_PROTOTYPE(config_parse_macsec_use_for_encoding);
diff --git a/src/network/netdev/macvlan.c b/src/network/netdev/macvlan.c
new file mode 100644
index 0000000..9bdcf62
--- /dev/null
+++ b/src/network/netdev/macvlan.c
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+
+#include "conf-parser.h"
+#include "macvlan.h"
+#include "macvlan-util.h"
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_macvlan_mode, macvlan_mode, MacVlanMode, "Failed to parse macvlan mode");
+
+static int netdev_macvlan_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *req) {
+ MacVlan *m;
+ int r;
+
+ assert(netdev);
+ assert(link);
+ assert(netdev->ifname);
+
+ if (netdev->kind == NETDEV_KIND_MACVLAN)
+ m = MACVLAN(netdev);
+ else
+ m = MACVTAP(netdev);
+
+ assert(m);
+
+ if (m->mode == NETDEV_MACVLAN_MODE_SOURCE && !set_isempty(m->match_source_mac)) {
+ const struct ether_addr *mac_addr;
+
+ r = sd_netlink_message_append_u32(req, IFLA_MACVLAN_MACADDR_MODE, MACVLAN_MACADDR_SET);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_MACVLAN_MACADDR_MODE attribute: %m");
+
+ r = sd_netlink_message_open_container(req, IFLA_MACVLAN_MACADDR_DATA);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not open IFLA_MACVLAN_MACADDR_DATA container: %m");
+
+ SET_FOREACH(mac_addr, m->match_source_mac) {
+ r = sd_netlink_message_append_ether_addr(req, IFLA_MACVLAN_MACADDR, mac_addr);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_MACVLAN_MACADDR attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not close IFLA_MACVLAN_MACADDR_DATA container: %m");
+ }
+
+ if (m->mode != _NETDEV_MACVLAN_MODE_INVALID) {
+ r = sd_netlink_message_append_u32(req, IFLA_MACVLAN_MODE, m->mode);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_MACVLAN_MODE attribute: %m");
+ }
+
+ return 0;
+}
+
+static void macvlan_done(NetDev *n) {
+ MacVlan *m;
+
+ assert(n);
+
+ if (n->kind == NETDEV_KIND_MACVLAN)
+ m = MACVLAN(n);
+ else
+ m = MACVTAP(n);
+
+ assert(m);
+
+ set_free_free(m->match_source_mac);
+}
+
+static void macvlan_init(NetDev *n) {
+ MacVlan *m;
+
+ assert(n);
+
+ if (n->kind == NETDEV_KIND_MACVLAN)
+ m = MACVLAN(n);
+ else
+ m = MACVTAP(n);
+
+ assert(m);
+
+ m->mode = _NETDEV_MACVLAN_MODE_INVALID;
+}
+
+const NetDevVTable macvtap_vtable = {
+ .object_size = sizeof(MacVlan),
+ .init = macvlan_init,
+ .done = macvlan_done,
+ .sections = NETDEV_COMMON_SECTIONS "MACVTAP\0",
+ .fill_message_create = netdev_macvlan_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .generate_mac = true,
+};
+
+const NetDevVTable macvlan_vtable = {
+ .object_size = sizeof(MacVlan),
+ .init = macvlan_init,
+ .done = macvlan_done,
+ .sections = NETDEV_COMMON_SECTIONS "MACVLAN\0",
+ .fill_message_create = netdev_macvlan_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .generate_mac = true,
+};
diff --git a/src/network/netdev/macvlan.h b/src/network/netdev/macvlan.h
new file mode 100644
index 0000000..cb7eece
--- /dev/null
+++ b/src/network/netdev/macvlan.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct MacVlan MacVlan;
+
+#include "macvlan-util.h"
+#include "netdev.h"
+#include "set.h"
+
+struct MacVlan {
+ NetDev meta;
+
+ MacVlanMode mode;
+ Set *match_source_mac;
+};
+
+DEFINE_NETDEV_CAST(MACVLAN, MacVlan);
+DEFINE_NETDEV_CAST(MACVTAP, MacVlan);
+extern const NetDevVTable macvlan_vtable;
+extern const NetDevVTable macvtap_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_macvlan_mode);
diff --git a/src/network/netdev/netdev-gperf.gperf b/src/network/netdev/netdev-gperf.gperf
new file mode 100644
index 0000000..4e89761
--- /dev/null
+++ b/src/network/netdev/netdev-gperf.gperf
@@ -0,0 +1,232 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include "bareudp.h"
+#include "bond.h"
+#include "bridge.h"
+#include "conf-parser.h"
+#include "geneve.h"
+#include "ipvlan.h"
+#include "macsec.h"
+#include "macvlan.h"
+#include "tunnel.h"
+#include "tuntap.h"
+#include "veth.h"
+#include "vlan-util.h"
+#include "vlan.h"
+#include "vxlan.h"
+#include "vrf.h"
+#include "netdev.h"
+#include "network-internal.h"
+#include "vxcan.h"
+#include "wireguard.h"
+#include "fou-tunnel.h"
+#include "l2tp-tunnel.h"
+#include "xfrm.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name network_netdev_gperf_hash
+%define lookup-function-name network_netdev_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Match.Host, config_parse_net_condition, CONDITION_HOST, offsetof(NetDev, conditions)
+Match.Virtualization, config_parse_net_condition, CONDITION_VIRTUALIZATION, offsetof(NetDev, conditions)
+Match.KernelCommandLine, config_parse_net_condition, CONDITION_KERNEL_COMMAND_LINE, offsetof(NetDev, conditions)
+Match.KernelVersion, config_parse_net_condition, CONDITION_KERNEL_VERSION, offsetof(NetDev, conditions)
+Match.Architecture, config_parse_net_condition, CONDITION_ARCHITECTURE, offsetof(NetDev, conditions)
+NetDev.Description, config_parse_string, 0, offsetof(NetDev, description)
+NetDev.Name, config_parse_ifname, 0, offsetof(NetDev, ifname)
+NetDev.Kind, config_parse_netdev_kind, 0, offsetof(NetDev, kind)
+NetDev.MTUBytes, config_parse_mtu, AF_UNSPEC, offsetof(NetDev, mtu)
+NetDev.MACAddress, config_parse_hwaddr, 0, offsetof(NetDev, mac)
+VLAN.Id, config_parse_vlanid, 0, offsetof(VLan, id)
+VLAN.GVRP, config_parse_tristate, 0, offsetof(VLan, gvrp)
+VLAN.MVRP, config_parse_tristate, 0, offsetof(VLan, mvrp)
+VLAN.LooseBinding, config_parse_tristate, 0, offsetof(VLan, loose_binding)
+VLAN.ReorderHeader, config_parse_tristate, 0, offsetof(VLan, reorder_hdr)
+MACVLAN.Mode, config_parse_macvlan_mode, 0, offsetof(MacVlan, mode)
+MACVLAN.SourceMACAddress, config_parse_hwaddrs, 0, offsetof(MacVlan, match_source_mac)
+MACVTAP.Mode, config_parse_macvlan_mode, 0, offsetof(MacVlan, mode)
+MACVTAP.SourceMACAddress, config_parse_hwaddrs, 0, offsetof(MacVlan, match_source_mac)
+IPVLAN.Mode, config_parse_ipvlan_mode, 0, offsetof(IPVlan, mode)
+IPVLAN.Flags, config_parse_ipvlan_flags, 0, offsetof(IPVlan, flags)
+IPVTAP.Mode, config_parse_ipvlan_mode, 0, offsetof(IPVlan, mode)
+IPVTAP.Flags, config_parse_ipvlan_flags, 0, offsetof(IPVlan, flags)
+Tunnel.Local, config_parse_tunnel_address, 0, offsetof(Tunnel, local)
+Tunnel.Remote, config_parse_tunnel_address, 0, offsetof(Tunnel, remote)
+Tunnel.TOS, config_parse_unsigned, 0, offsetof(Tunnel, tos)
+Tunnel.TTL, config_parse_unsigned, 0, offsetof(Tunnel, ttl)
+Tunnel.Key, config_parse_tunnel_key, 0, offsetof(Tunnel, key)
+Tunnel.InputKey, config_parse_tunnel_key, 0, offsetof(Tunnel, ikey)
+Tunnel.OutputKey, config_parse_tunnel_key, 0, offsetof(Tunnel, okey)
+Tunnel.DiscoverPathMTU, config_parse_bool, 0, offsetof(Tunnel, pmtudisc)
+Tunnel.Mode, config_parse_ip6tnl_mode, 0, offsetof(Tunnel, ip6tnl_mode)
+Tunnel.IPv6FlowLabel, config_parse_ipv6_flowlabel, 0, offsetof(Tunnel, ipv6_flowlabel)
+Tunnel.CopyDSCP, config_parse_bool, 0, offsetof(Tunnel, copy_dscp)
+Tunnel.EncapsulationLimit, config_parse_encap_limit, 0, offsetof(Tunnel, encap_limit)
+Tunnel.Independent, config_parse_bool, 0, offsetof(Tunnel, independent)
+Tunnel.AssignToLoopback, config_parse_bool, 0, offsetof(Tunnel, assign_to_loopback)
+Tunnel.AllowLocalRemote, config_parse_tristate, 0, offsetof(Tunnel, allow_localremote)
+Tunnel.FooOverUDP, config_parse_bool, 0, offsetof(Tunnel, fou_tunnel)
+Tunnel.FOUDestinationPort, config_parse_ip_port, 0, offsetof(Tunnel, fou_destination_port)
+Tunnel.FOUSourcePort, config_parse_ip_port, 0, offsetof(Tunnel, encap_src_port)
+Tunnel.Encapsulation, config_parse_fou_encap_type, 0, offsetof(Tunnel, fou_encap_type)
+Tunnel.IPv6RapidDeploymentPrefix, config_parse_6rd_prefix, 0, 0
+Tunnel.ERSPANIndex, config_parse_uint32, 0, offsetof(Tunnel, erspan_index)
+Tunnel.SerializeTunneledPackets, config_parse_tristate, 0, offsetof(Tunnel, gre_erspan_sequence)
+Tunnel.ISATAP, config_parse_tristate, 0, offsetof(Tunnel, isatap)
+FooOverUDP.Protocol, config_parse_ip_protocol, 0, offsetof(FouTunnel, fou_protocol)
+FooOverUDP.Encapsulation, config_parse_fou_encap_type, 0, offsetof(FouTunnel, fou_encap_type)
+FooOverUDP.Port, config_parse_ip_port, 0, offsetof(FouTunnel, port)
+FooOverUDP.PeerPort, config_parse_ip_port, 0, offsetof(FouTunnel, peer_port)
+FooOverUDP.Local, config_parse_fou_tunnel_address, 0, offsetof(FouTunnel, local)
+FooOverUDP.Peer, config_parse_fou_tunnel_address, 0, offsetof(FouTunnel, peer)
+L2TP.TunnelId, config_parse_l2tp_tunnel_id, 0, offsetof(L2tpTunnel, tunnel_id)
+L2TP.PeerTunnelId, config_parse_l2tp_tunnel_id, 0, offsetof(L2tpTunnel, peer_tunnel_id)
+L2TP.UDPSourcePort, config_parse_ip_port, 0, offsetof(L2tpTunnel, l2tp_udp_sport)
+L2TP.UDPDestinationPort, config_parse_ip_port, 0, offsetof(L2tpTunnel, l2tp_udp_dport)
+L2TP.Local, config_parse_l2tp_tunnel_address, 0, offsetof(L2tpTunnel, local)
+L2TP.Remote, config_parse_l2tp_tunnel_address, 0, offsetof(L2tpTunnel, remote)
+L2TP.EncapsulationType, config_parse_l2tp_encap_type, 0, offsetof(L2tpTunnel, l2tp_encap_type)
+L2TP.UDPCheckSum, config_parse_bool, 0, offsetof(L2tpTunnel, udp_csum)
+L2TP.UDP6CheckSumRx, config_parse_bool, 0, offsetof(L2tpTunnel, udp6_csum_rx)
+L2TP.UDP6CheckSumTx, config_parse_bool, 0, offsetof(L2tpTunnel, udp6_csum_tx)
+L2TPSession.SessionId, config_parse_l2tp_session_id, 0, 0
+L2TPSession.PeerSessionId, config_parse_l2tp_session_id, 0, 0
+L2TPSession.Layer2SpecificHeader, config_parse_l2tp_session_l2spec, 0, 0
+L2TPSession.Name, config_parse_l2tp_session_name, 0, 0
+Peer.Name, config_parse_ifname, 0, offsetof(Veth, ifname_peer)
+Peer.MACAddress, config_parse_hwaddr, 0, offsetof(Veth, mac_peer)
+VXCAN.Peer, config_parse_ifname, 0, offsetof(VxCan, ifname_peer)
+VXLAN.VNI, config_parse_uint32, 0, offsetof(VxLan, vni)
+VXLAN.Id, config_parse_uint32, 0, offsetof(VxLan, vni) /* deprecated */
+VXLAN.Group, config_parse_vxlan_address, 0, offsetof(VxLan, group)
+VXLAN.Local, config_parse_vxlan_address, 0, offsetof(VxLan, local)
+VXLAN.Remote, config_parse_vxlan_address, 0, offsetof(VxLan, remote)
+VXLAN.TOS, config_parse_unsigned, 0, offsetof(VxLan, tos)
+VXLAN.TTL, config_parse_vxlan_ttl, 0, offsetof(VxLan, ttl)
+VXLAN.MacLearning, config_parse_bool, 0, offsetof(VxLan, learning)
+VXLAN.ARPProxy, config_parse_bool, 0, offsetof(VxLan, arp_proxy)
+VXLAN.ReduceARPProxy, config_parse_bool, 0, offsetof(VxLan, arp_proxy)
+VXLAN.L2MissNotification, config_parse_bool, 0, offsetof(VxLan, l2miss)
+VXLAN.L3MissNotification, config_parse_bool, 0, offsetof(VxLan, l3miss)
+VXLAN.RouteShortCircuit, config_parse_bool, 0, offsetof(VxLan, route_short_circuit)
+VXLAN.UDPCheckSum, config_parse_bool, 0, offsetof(VxLan, udpcsum)
+VXLAN.UDPChecksum, config_parse_bool, 0, offsetof(VxLan, udpcsum)
+VXLAN.UDP6ZeroCheckSumRx, config_parse_bool, 0, offsetof(VxLan, udp6zerocsumrx)
+VXLAN.UDP6ZeroChecksumRx, config_parse_bool, 0, offsetof(VxLan, udp6zerocsumrx)
+VXLAN.UDP6ZeroCheckSumTx, config_parse_bool, 0, offsetof(VxLan, udp6zerocsumtx)
+VXLAN.UDP6ZeroChecksumTx, config_parse_bool, 0, offsetof(VxLan, udp6zerocsumtx)
+VXLAN.RemoteChecksumTx, config_parse_bool, 0, offsetof(VxLan, remote_csum_tx)
+VXLAN.RemoteChecksumRx, config_parse_bool, 0, offsetof(VxLan, remote_csum_rx)
+VXLAN.FDBAgeingSec, config_parse_sec, 0, offsetof(VxLan, fdb_ageing)
+VXLAN.GroupPolicyExtension, config_parse_bool, 0, offsetof(VxLan, group_policy)
+VXLAN.GenericProtocolExtension, config_parse_bool, 0, offsetof(VxLan, generic_protocol_extension)
+VXLAN.MaximumFDBEntries, config_parse_unsigned, 0, offsetof(VxLan, max_fdb)
+VXLAN.PortRange, config_parse_port_range, 0, 0
+VXLAN.DestinationPort, config_parse_ip_port, 0, offsetof(VxLan, dest_port)
+VXLAN.FlowLabel, config_parse_flow_label, 0, 0
+VXLAN.IPDoNotFragment, config_parse_df, 0, offsetof(VxLan, df)
+VXLAN.Independent, config_parse_bool, 0, offsetof(VxLan, independent)
+GENEVE.Id, config_parse_geneve_vni, 0, offsetof(Geneve, id)
+GENEVE.Remote, config_parse_geneve_address, 0, offsetof(Geneve, remote)
+GENEVE.TOS, config_parse_uint8, 0, offsetof(Geneve, tos)
+GENEVE.TTL, config_parse_geneve_ttl, 0, offsetof(Geneve, ttl)
+GENEVE.UDPChecksum, config_parse_bool, 0, offsetof(Geneve, udpcsum)
+GENEVE.UDP6ZeroCheckSumRx, config_parse_bool, 0, offsetof(Geneve, udp6zerocsumrx)
+GENEVE.UDP6ZeroChecksumRx, config_parse_bool, 0, offsetof(Geneve, udp6zerocsumrx)
+GENEVE.UDP6ZeroCheckSumTx, config_parse_bool, 0, offsetof(Geneve, udp6zerocsumtx)
+GENEVE.UDP6ZeroChecksumTx, config_parse_bool, 0, offsetof(Geneve, udp6zerocsumtx)
+GENEVE.DestinationPort, config_parse_ip_port, 0, offsetof(Geneve, dest_port)
+GENEVE.IPDoNotFragment, config_parse_geneve_df, 0, offsetof(Geneve, geneve_df)
+GENEVE.FlowLabel, config_parse_geneve_flow_label, 0, 0
+MACsec.Port, config_parse_macsec_port, 0, 0
+MACsec.Encrypt, config_parse_tristate, 0, offsetof(MACsec, encrypt)
+MACsecReceiveChannel.Port, config_parse_macsec_port, 0, 0
+MACsecReceiveChannel.MACAddress, config_parse_macsec_hw_address, 0, 0
+MACsecTransmitAssociation.PacketNumber, config_parse_macsec_packet_number, 0, 0
+MACsecTransmitAssociation.KeyId, config_parse_macsec_key_id, 0, 0
+MACsecTransmitAssociation.Key, config_parse_macsec_key, 0, 0
+MACsecTransmitAssociation.KeyFile, config_parse_macsec_key_file, 0, 0
+MACsecTransmitAssociation.Activate, config_parse_macsec_sa_activate, 0, 0
+MACsecTransmitAssociation.UseForEncoding, config_parse_macsec_use_for_encoding, 0, 0
+MACsecReceiveAssociation.Port, config_parse_macsec_port, 0, 0
+MACsecReceiveAssociation.MACAddress, config_parse_macsec_hw_address, 0, 0
+MACsecReceiveAssociation.PacketNumber, config_parse_macsec_packet_number, 0, 0
+MACsecReceiveAssociation.KeyId, config_parse_macsec_key_id, 0, 0
+MACsecReceiveAssociation.Key, config_parse_macsec_key, 0, 0
+MACsecReceiveAssociation.KeyFile, config_parse_macsec_key_file, 0, 0
+MACsecReceiveAssociation.Activate, config_parse_macsec_sa_activate, 0, 0
+Tun.OneQueue, config_parse_warn_compat, DISABLED_LEGACY, 0
+Tun.MultiQueue, config_parse_bool, 0, offsetof(TunTap, multi_queue)
+Tun.PacketInfo, config_parse_bool, 0, offsetof(TunTap, packet_info)
+Tun.VNetHeader, config_parse_bool, 0, offsetof(TunTap, vnet_hdr)
+Tun.User, config_parse_string, 0, offsetof(TunTap, user_name)
+Tun.Group, config_parse_string, 0, offsetof(TunTap, group_name)
+Tap.OneQueue, config_parse_warn_compat, DISABLED_LEGACY, 0
+Tap.MultiQueue, config_parse_bool, 0, offsetof(TunTap, multi_queue)
+Tap.PacketInfo, config_parse_bool, 0, offsetof(TunTap, packet_info)
+Tap.VNetHeader, config_parse_bool, 0, offsetof(TunTap, vnet_hdr)
+Tap.User, config_parse_string, 0, offsetof(TunTap, user_name)
+Tap.Group, config_parse_string, 0, offsetof(TunTap, group_name)
+Bond.Mode, config_parse_bond_mode, 0, offsetof(Bond, mode)
+Bond.TransmitHashPolicy, config_parse_bond_xmit_hash_policy, 0, offsetof(Bond, xmit_hash_policy)
+Bond.LACPTransmitRate, config_parse_bond_lacp_rate, 0, offsetof(Bond, lacp_rate)
+Bond.AdSelect, config_parse_bond_ad_select, 0, offsetof(Bond, ad_select)
+Bond.FailOverMACPolicy, config_parse_bond_fail_over_mac, 0, offsetof(Bond, fail_over_mac)
+Bond.ARPIPTargets, config_parse_arp_ip_target_address, 0, 0
+Bond.ARPValidate, config_parse_bond_arp_validate, 0, offsetof(Bond, arp_validate)
+Bond.ARPAllTargets, config_parse_bond_arp_all_targets, 0, offsetof(Bond, arp_all_targets)
+Bond.PrimaryReselectPolicy, config_parse_bond_primary_reselect, 0, offsetof(Bond, primary_reselect)
+Bond.ResendIGMP, config_parse_unsigned, 0, offsetof(Bond, resend_igmp)
+Bond.PacketsPerSlave, config_parse_unsigned, 0, offsetof(Bond, packets_per_slave)
+Bond.GratuitousARP, config_parse_unsigned, 0, offsetof(Bond, num_grat_arp)
+Bond.AllSlavesActive, config_parse_bool, 0, offsetof(Bond, all_slaves_active)
+Bond.DynamicTransmitLoadBalancing, config_parse_tristate, 0, offsetof(Bond, tlb_dynamic_lb)
+Bond.MinLinks, config_parse_unsigned, 0, offsetof(Bond, min_links)
+Bond.MIIMonitorSec, config_parse_sec, 0, offsetof(Bond, miimon)
+Bond.UpDelaySec, config_parse_sec, 0, offsetof(Bond, updelay)
+Bond.DownDelaySec, config_parse_sec, 0, offsetof(Bond, downdelay)
+Bond.ARPIntervalSec, config_parse_sec, 0, offsetof(Bond, arp_interval)
+Bond.LearnPacketIntervalSec, config_parse_sec, 0, offsetof(Bond, lp_interval)
+Bond.AdActorSystemPriority, config_parse_ad_actor_sys_prio, 0, offsetof(Bond, ad_actor_sys_prio)
+Bond.AdUserPortKey, config_parse_ad_user_port_key, 0, offsetof(Bond, ad_user_port_key)
+Bond.AdActorSystem, config_parse_ad_actor_system, 0, offsetof(Bond, ad_actor_system)
+Bridge.HelloTimeSec, config_parse_sec, 0, offsetof(Bridge, hello_time)
+Bridge.MaxAgeSec, config_parse_sec, 0, offsetof(Bridge, max_age)
+Bridge.AgeingTimeSec, config_parse_sec, 0, offsetof(Bridge, ageing_time)
+Bridge.ForwardDelaySec, config_parse_sec, 0, offsetof(Bridge, forward_delay)
+Bridge.Priority, config_parse_uint16, 0, offsetof(Bridge, priority)
+Bridge.GroupForwardMask, config_parse_uint16, 0, offsetof(Bridge, group_fwd_mask)
+Bridge.DefaultPVID, config_parse_default_port_vlanid, 0, offsetof(Bridge, default_pvid)
+Bridge.MulticastQuerier, config_parse_tristate, 0, offsetof(Bridge, mcast_querier)
+Bridge.MulticastSnooping, config_parse_tristate, 0, offsetof(Bridge, mcast_snooping)
+Bridge.VLANFiltering, config_parse_tristate, 0, offsetof(Bridge, vlan_filtering)
+Bridge.VLANProtocol, config_parse_vlanprotocol, 0, offsetof(Bridge, vlan_protocol)
+Bridge.STP, config_parse_tristate, 0, offsetof(Bridge, stp)
+Bridge.MulticastIGMPVersion, config_parse_uint8, 0, offsetof(Bridge, igmp_version)
+VRF.TableId, config_parse_uint32, 0, offsetof(Vrf, table) /* deprecated */
+VRF.Table, config_parse_uint32, 0, offsetof(Vrf, table)
+BareUDP.DestinationPort, config_parse_ip_port, 0, offsetof(BareUDP, dest_port)
+BareUDP.EtherType, config_parse_bare_udp_iftype, 0, offsetof(BareUDP, iftype)
+WireGuard.FirewallMark, config_parse_unsigned, 0, offsetof(Wireguard, fwmark)
+WireGuard.FwMark, config_parse_unsigned, 0, offsetof(Wireguard, fwmark) /* deprecated */
+WireGuard.ListenPort, config_parse_wireguard_listen_port, 0, offsetof(Wireguard, port)
+WireGuard.PrivateKey, config_parse_wireguard_private_key, 0, 0
+WireGuard.PrivateKeyFile, config_parse_wireguard_private_key_file, 0, 0
+WireGuardPeer.AllowedIPs, config_parse_wireguard_allowed_ips, 0, 0
+WireGuardPeer.Endpoint, config_parse_wireguard_endpoint, 0, 0
+WireGuardPeer.PublicKey, config_parse_wireguard_peer_key, 0, 0
+WireGuardPeer.PresharedKey, config_parse_wireguard_peer_key, 0, 0
+WireGuardPeer.PresharedKeyFile, config_parse_wireguard_preshared_key_file, 0, 0
+WireGuardPeer.PersistentKeepalive, config_parse_wireguard_keepalive, 0, 0
+Xfrm.InterfaceId, config_parse_uint32, 0, offsetof(Xfrm, if_id)
+Xfrm.Independent, config_parse_bool, 0, offsetof(Xfrm, independent)
diff --git a/src/network/netdev/netdev.c b/src/network/netdev/netdev.c
new file mode 100644
index 0000000..9f390b5
--- /dev/null
+++ b/src/network/netdev/netdev.c
@@ -0,0 +1,868 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+#include <netinet/in.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bareudp.h"
+#include "bond.h"
+#include "bridge.h"
+#include "conf-files.h"
+#include "conf-parser.h"
+#include "dummy.h"
+#include "fd-util.h"
+#include "fou-tunnel.h"
+#include "geneve.h"
+#include "ifb.h"
+#include "ipvlan.h"
+#include "l2tp-tunnel.h"
+#include "list.h"
+#include "macsec.h"
+#include "macvlan.h"
+#include "netdev.h"
+#include "netdevsim.h"
+#include "netlink-util.h"
+#include "network-internal.h"
+#include "networkd-manager.h"
+#include "nlmon.h"
+#include "path-lookup.h"
+#include "siphash24.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tunnel.h"
+#include "tuntap.h"
+#include "vcan.h"
+#include "veth.h"
+#include "vlan.h"
+#include "vrf.h"
+#include "vxcan.h"
+#include "vxlan.h"
+#include "wireguard.h"
+#include "xfrm.h"
+
+const NetDevVTable * const netdev_vtable[_NETDEV_KIND_MAX] = {
+ [NETDEV_KIND_BRIDGE] = &bridge_vtable,
+ [NETDEV_KIND_BOND] = &bond_vtable,
+ [NETDEV_KIND_VLAN] = &vlan_vtable,
+ [NETDEV_KIND_MACVLAN] = &macvlan_vtable,
+ [NETDEV_KIND_MACVTAP] = &macvtap_vtable,
+ [NETDEV_KIND_IPVLAN] = &ipvlan_vtable,
+ [NETDEV_KIND_IPVTAP] = &ipvtap_vtable,
+ [NETDEV_KIND_VXLAN] = &vxlan_vtable,
+ [NETDEV_KIND_IPIP] = &ipip_vtable,
+ [NETDEV_KIND_GRE] = &gre_vtable,
+ [NETDEV_KIND_GRETAP] = &gretap_vtable,
+ [NETDEV_KIND_IP6GRE] = &ip6gre_vtable,
+ [NETDEV_KIND_IP6GRETAP] = &ip6gretap_vtable,
+ [NETDEV_KIND_SIT] = &sit_vtable,
+ [NETDEV_KIND_VTI] = &vti_vtable,
+ [NETDEV_KIND_VTI6] = &vti6_vtable,
+ [NETDEV_KIND_VETH] = &veth_vtable,
+ [NETDEV_KIND_DUMMY] = &dummy_vtable,
+ [NETDEV_KIND_TUN] = &tun_vtable,
+ [NETDEV_KIND_TAP] = &tap_vtable,
+ [NETDEV_KIND_IP6TNL] = &ip6tnl_vtable,
+ [NETDEV_KIND_VRF] = &vrf_vtable,
+ [NETDEV_KIND_VCAN] = &vcan_vtable,
+ [NETDEV_KIND_GENEVE] = &geneve_vtable,
+ [NETDEV_KIND_VXCAN] = &vxcan_vtable,
+ [NETDEV_KIND_WIREGUARD] = &wireguard_vtable,
+ [NETDEV_KIND_NETDEVSIM] = &netdevsim_vtable,
+ [NETDEV_KIND_FOU] = &foutnl_vtable,
+ [NETDEV_KIND_ERSPAN] = &erspan_vtable,
+ [NETDEV_KIND_L2TP] = &l2tptnl_vtable,
+ [NETDEV_KIND_MACSEC] = &macsec_vtable,
+ [NETDEV_KIND_NLMON] = &nlmon_vtable,
+ [NETDEV_KIND_XFRM] = &xfrm_vtable,
+ [NETDEV_KIND_IFB] = &ifb_vtable,
+ [NETDEV_KIND_BAREUDP] = &bare_udp_vtable,
+};
+
+static const char* const netdev_kind_table[_NETDEV_KIND_MAX] = {
+ [NETDEV_KIND_BAREUDP] = "bareudp",
+ [NETDEV_KIND_BRIDGE] = "bridge",
+ [NETDEV_KIND_BOND] = "bond",
+ [NETDEV_KIND_VLAN] = "vlan",
+ [NETDEV_KIND_MACVLAN] = "macvlan",
+ [NETDEV_KIND_MACVTAP] = "macvtap",
+ [NETDEV_KIND_IPVLAN] = "ipvlan",
+ [NETDEV_KIND_IPVTAP] = "ipvtap",
+ [NETDEV_KIND_VXLAN] = "vxlan",
+ [NETDEV_KIND_IPIP] = "ipip",
+ [NETDEV_KIND_GRE] = "gre",
+ [NETDEV_KIND_GRETAP] = "gretap",
+ [NETDEV_KIND_IP6GRE] = "ip6gre",
+ [NETDEV_KIND_IP6GRETAP] = "ip6gretap",
+ [NETDEV_KIND_SIT] = "sit",
+ [NETDEV_KIND_VETH] = "veth",
+ [NETDEV_KIND_VTI] = "vti",
+ [NETDEV_KIND_VTI6] = "vti6",
+ [NETDEV_KIND_DUMMY] = "dummy",
+ [NETDEV_KIND_TUN] = "tun",
+ [NETDEV_KIND_TAP] = "tap",
+ [NETDEV_KIND_IP6TNL] = "ip6tnl",
+ [NETDEV_KIND_VRF] = "vrf",
+ [NETDEV_KIND_VCAN] = "vcan",
+ [NETDEV_KIND_GENEVE] = "geneve",
+ [NETDEV_KIND_VXCAN] = "vxcan",
+ [NETDEV_KIND_WIREGUARD] = "wireguard",
+ [NETDEV_KIND_NETDEVSIM] = "netdevsim",
+ [NETDEV_KIND_FOU] = "fou",
+ [NETDEV_KIND_ERSPAN] = "erspan",
+ [NETDEV_KIND_L2TP] = "l2tp",
+ [NETDEV_KIND_MACSEC] = "macsec",
+ [NETDEV_KIND_NLMON] = "nlmon",
+ [NETDEV_KIND_XFRM] = "xfrm",
+ [NETDEV_KIND_IFB] = "ifb",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(netdev_kind, NetDevKind);
+
+int config_parse_netdev_kind(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ NetDevKind k, *kind = data;
+
+ assert(rvalue);
+ assert(data);
+
+ k = netdev_kind_from_string(rvalue);
+ if (k < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse netdev kind, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ if (*kind != _NETDEV_KIND_INVALID && *kind != k) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Specified netdev kind is different from the previous value '%s', ignoring assignment: %s",
+ netdev_kind_to_string(*kind), rvalue);
+ return 0;
+ }
+
+ *kind = k;
+
+ return 0;
+}
+
+static void netdev_callbacks_clear(NetDev *netdev) {
+ netdev_join_callback *callback;
+
+ if (!netdev)
+ return;
+
+ while ((callback = netdev->callbacks)) {
+ LIST_REMOVE(callbacks, netdev->callbacks, callback);
+ link_unref(callback->link);
+ free(callback);
+ }
+}
+
+bool netdev_is_managed(NetDev *netdev) {
+ if (!netdev || !netdev->manager || !netdev->ifname)
+ return false;
+
+ return hashmap_get(netdev->manager->netdevs, netdev->ifname) == netdev;
+}
+
+static void netdev_detach_from_manager(NetDev *netdev) {
+ if (netdev->ifname && netdev->manager)
+ hashmap_remove(netdev->manager->netdevs, netdev->ifname);
+}
+
+static NetDev *netdev_free(NetDev *netdev) {
+ assert(netdev);
+
+ netdev_callbacks_clear(netdev);
+
+ netdev_detach_from_manager(netdev);
+
+ free(netdev->filename);
+
+ free(netdev->description);
+ free(netdev->ifname);
+ free(netdev->mac);
+ condition_free_list(netdev->conditions);
+
+ /* Invoke the per-kind done() destructor, but only if the state field is initialized. We conditionalize that
+ * because we parse .netdev files twice: once to determine the kind (with a short, minimal NetDev structure
+ * allocation, with no room for per-kind fields), and once to read the kind's properties (with a full,
+ * comprehensive NetDev structure allocation with enough space for whatever the specific kind needs). Now, in
+ * the first case we shouldn't try to destruct the per-kind NetDev fields on destruction, in the second case we
+ * should. We use the state field to discern the two cases: it's _NETDEV_STATE_INVALID on the first "raw"
+ * call. */
+ if (netdev->state != _NETDEV_STATE_INVALID &&
+ NETDEV_VTABLE(netdev) &&
+ NETDEV_VTABLE(netdev)->done)
+ NETDEV_VTABLE(netdev)->done(netdev);
+
+ return mfree(netdev);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(NetDev, netdev, netdev_free);
+
+void netdev_drop(NetDev *netdev) {
+ if (!netdev || netdev->state == NETDEV_STATE_LINGER)
+ return;
+
+ netdev->state = NETDEV_STATE_LINGER;
+
+ log_netdev_debug(netdev, "netdev removed");
+
+ netdev_callbacks_clear(netdev);
+
+ netdev_detach_from_manager(netdev);
+
+ netdev_unref(netdev);
+
+ return;
+}
+
+int netdev_get(Manager *manager, const char *name, NetDev **ret) {
+ NetDev *netdev;
+
+ assert(manager);
+ assert(name);
+ assert(ret);
+
+ netdev = hashmap_get(manager->netdevs, name);
+ if (!netdev) {
+ *ret = NULL;
+ return -ENOENT;
+ }
+
+ *ret = netdev;
+
+ return 0;
+}
+
+static int netdev_enter_failed(NetDev *netdev) {
+ netdev->state = NETDEV_STATE_FAILED;
+
+ netdev_callbacks_clear(netdev);
+
+ return 0;
+}
+
+static int netdev_enslave_ready(NetDev *netdev, Link* link, link_netlink_message_handler_t callback) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(netdev);
+ assert(netdev->state == NETDEV_STATE_READY);
+ assert(netdev->manager);
+ assert(netdev->manager->rtnl);
+ assert(IN_SET(netdev->kind, NETDEV_KIND_BRIDGE, NETDEV_KIND_BOND, NETDEV_KIND_VRF));
+ assert(link);
+ assert(callback);
+
+ if (link->flags & IFF_UP && netdev->kind == NETDEV_KIND_BOND) {
+ log_netdev_debug(netdev, "Link '%s' was up when attempting to enslave it. Bringing link down.", link->ifname);
+ r = link_down(link, NULL);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not bring link down: %m");
+ }
+
+ r = sd_rtnl_message_new_link(netdev->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_netlink_message_append_u32(req, IFLA_MASTER, netdev->ifindex);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_MASTER attribute: %m");
+
+ r = netlink_call_async(netdev->manager->rtnl, NULL, req, callback,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ log_netdev_debug(netdev, "Enslaving link '%s'", link->ifname);
+
+ return 0;
+}
+
+static int netdev_enter_ready(NetDev *netdev) {
+ netdev_join_callback *callback, *callback_next;
+ int r;
+
+ assert(netdev);
+ assert(netdev->ifname);
+
+ if (netdev->state != NETDEV_STATE_CREATING)
+ return 0;
+
+ netdev->state = NETDEV_STATE_READY;
+
+ log_netdev_info(netdev, "netdev ready");
+
+ LIST_FOREACH_SAFE(callbacks, callback, callback_next, netdev->callbacks) {
+ /* enslave the links that were attempted to be enslaved before the
+ * link was ready */
+ r = netdev_enslave_ready(netdev, callback->link, callback->callback);
+ if (r < 0)
+ return r;
+
+ LIST_REMOVE(callbacks, netdev->callbacks, callback);
+ link_unref(callback->link);
+ free(callback);
+ }
+
+ if (NETDEV_VTABLE(netdev)->post_create)
+ NETDEV_VTABLE(netdev)->post_create(netdev, NULL, NULL);
+
+ return 0;
+}
+
+/* callback for netdev's created without a backing Link */
+static int netdev_create_handler(sd_netlink *rtnl, sd_netlink_message *m, NetDev *netdev) {
+ int r;
+
+ assert(netdev);
+ assert(netdev->state != _NETDEV_STATE_INVALID);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r == -EEXIST)
+ log_netdev_info(netdev, "netdev exists, using existing without changing its parameters");
+ else if (r < 0) {
+ log_netdev_warning_errno(netdev, r, "netdev could not be created: %m");
+ netdev_drop(netdev);
+
+ return 1;
+ }
+
+ log_netdev_debug(netdev, "Created");
+
+ return 1;
+}
+
+static int netdev_enslave(NetDev *netdev, Link *link, link_netlink_message_handler_t callback) {
+ int r;
+
+ assert(netdev);
+ assert(netdev->manager);
+ assert(netdev->manager->rtnl);
+ assert(IN_SET(netdev->kind, NETDEV_KIND_BRIDGE, NETDEV_KIND_BOND, NETDEV_KIND_VRF));
+
+ if (netdev->state == NETDEV_STATE_READY) {
+ r = netdev_enslave_ready(netdev, link, callback);
+ if (r < 0)
+ return r;
+ } else if (IN_SET(netdev->state, NETDEV_STATE_LINGER, NETDEV_STATE_FAILED)) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+
+ r = rtnl_message_new_synthetic_error(netdev->manager->rtnl, -ENODEV, 0, &m);
+ if (r >= 0)
+ callback(netdev->manager->rtnl, m, link);
+ } else {
+ /* the netdev is not yet ready, save this request for when it is */
+ netdev_join_callback *cb;
+
+ cb = new(netdev_join_callback, 1);
+ if (!cb)
+ return log_oom();
+
+ *cb = (netdev_join_callback) {
+ .callback = callback,
+ .link = link_ref(link),
+ };
+
+ LIST_PREPEND(callbacks, netdev->callbacks, cb);
+
+ log_netdev_debug(netdev, "Will enslave '%s', when ready", link->ifname);
+ }
+
+ return 0;
+}
+
+int netdev_set_ifindex(NetDev *netdev, sd_netlink_message *message) {
+ uint16_t type;
+ const char *kind;
+ const char *received_kind;
+ const char *received_name;
+ int r, ifindex;
+
+ assert(netdev);
+ assert(message);
+
+ r = sd_netlink_message_get_type(message, &type);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not get rtnl message type: %m");
+
+ if (type != RTM_NEWLINK) {
+ log_netdev_error(netdev, "Cannot set ifindex from unexpected rtnl message type.");
+ return -EINVAL;
+ }
+
+ r = sd_rtnl_message_link_get_ifindex(message, &ifindex);
+ if (r < 0) {
+ log_netdev_error_errno(netdev, r, "Could not get ifindex: %m");
+ netdev_enter_failed(netdev);
+ return r;
+ } else if (ifindex <= 0) {
+ log_netdev_error(netdev, "Got invalid ifindex: %d", ifindex);
+ netdev_enter_failed(netdev);
+ return -EINVAL;
+ }
+
+ if (netdev->ifindex > 0) {
+ if (netdev->ifindex != ifindex) {
+ log_netdev_error(netdev, "Could not set ifindex to %d, already set to %d",
+ ifindex, netdev->ifindex);
+ netdev_enter_failed(netdev);
+ return -EEXIST;
+ } else
+ /* ifindex already set to the same for this netdev */
+ return 0;
+ }
+
+ r = sd_netlink_message_read_string(message, IFLA_IFNAME, &received_name);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not get IFNAME: %m");
+
+ if (!streq(netdev->ifname, received_name)) {
+ log_netdev_error(netdev, "Received newlink with wrong IFNAME %s", received_name);
+ netdev_enter_failed(netdev);
+ return r;
+ }
+
+ r = sd_netlink_message_enter_container(message, IFLA_LINKINFO);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not get LINKINFO: %m");
+
+ r = sd_netlink_message_read_string(message, IFLA_INFO_KIND, &received_kind);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not get KIND: %m");
+
+ r = sd_netlink_message_exit_container(message);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not exit container: %m");
+
+ if (netdev->kind == NETDEV_KIND_TAP)
+ /* the kernel does not distinguish between tun and tap */
+ kind = "tun";
+ else {
+ kind = netdev_kind_to_string(netdev->kind);
+ if (!kind) {
+ log_netdev_error(netdev, "Could not get kind");
+ netdev_enter_failed(netdev);
+ return -EINVAL;
+ }
+ }
+
+ if (!streq(kind, received_kind)) {
+ log_netdev_error(netdev,
+ "Received newlink with wrong KIND %s, "
+ "expected %s", received_kind, kind);
+ netdev_enter_failed(netdev);
+ return r;
+ }
+
+ netdev->ifindex = ifindex;
+
+ log_netdev_debug(netdev, "netdev has index %d", netdev->ifindex);
+
+ netdev_enter_ready(netdev);
+
+ return 0;
+}
+
+#define HASH_KEY SD_ID128_MAKE(52,e1,45,bd,00,6f,29,96,21,c6,30,6d,83,71,04,48)
+
+int netdev_get_mac(const char *ifname, struct ether_addr **ret) {
+ _cleanup_free_ struct ether_addr *mac = NULL;
+ uint64_t result;
+ size_t l, sz;
+ uint8_t *v;
+ int r;
+
+ assert(ifname);
+ assert(ret);
+
+ mac = new0(struct ether_addr, 1);
+ if (!mac)
+ return -ENOMEM;
+
+ l = strlen(ifname);
+ sz = sizeof(sd_id128_t) + l;
+ v = newa(uint8_t, sz);
+
+ /* fetch some persistent data unique to the machine */
+ r = sd_id128_get_machine((sd_id128_t*) v);
+ if (r < 0)
+ return r;
+
+ /* combine with some data unique (on this machine) to this
+ * netdev */
+ memcpy(v + sizeof(sd_id128_t), ifname, l);
+
+ /* Let's hash the host machine ID plus the container name. We
+ * use a fixed, but originally randomly created hash key here. */
+ result = siphash24(v, sz, HASH_KEY.bytes);
+
+ assert_cc(ETH_ALEN <= sizeof(result));
+ memcpy(mac->ether_addr_octet, &result, ETH_ALEN);
+
+ /* see eth_random_addr in the kernel */
+ mac->ether_addr_octet[0] &= 0xfe; /* clear multicast bit */
+ mac->ether_addr_octet[0] |= 0x02; /* set local assignment bit (IEEE802) */
+
+ *ret = TAKE_PTR(mac);
+
+ return 0;
+}
+
+static int netdev_create(NetDev *netdev, Link *link, link_netlink_message_handler_t callback) {
+ int r;
+
+ assert(netdev);
+ assert(!link || callback);
+
+ /* create netdev */
+ if (NETDEV_VTABLE(netdev)->create) {
+ assert(!link);
+
+ r = NETDEV_VTABLE(netdev)->create(netdev);
+ if (r < 0)
+ return r;
+
+ log_netdev_debug(netdev, "Created");
+ } else {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+
+ r = sd_rtnl_message_new_link(netdev->manager->rtnl, &m, RTM_NEWLINK, 0);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not allocate RTM_NEWLINK message: %m");
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, netdev->ifname);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IFNAME, attribute: %m");
+
+ if (netdev->mac) {
+ r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, netdev->mac);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_ADDRESS attribute: %m");
+ }
+
+ if (netdev->mtu) {
+ r = sd_netlink_message_append_u32(m, IFLA_MTU, netdev->mtu);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_MTU attribute: %m");
+ }
+
+ if (link) {
+ r = sd_netlink_message_append_u32(m, IFLA_LINK, link->ifindex);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_LINK attribute: %m");
+ }
+
+ r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_LINKINFO attribute: %m");
+
+ r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, netdev_kind_to_string(netdev->kind));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ if (NETDEV_VTABLE(netdev)->fill_message_create) {
+ r = NETDEV_VTABLE(netdev)->fill_message_create(netdev, link, m);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_LINKINFO attribute: %m");
+
+ if (link) {
+ r = netlink_call_async(netdev->manager->rtnl, NULL, m, callback,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+ } else {
+ r = netlink_call_async(netdev->manager->rtnl, NULL, m, netdev_create_handler,
+ netdev_destroy_callback, netdev);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not send rtnetlink message: %m");
+
+ netdev_ref(netdev);
+ }
+
+ netdev->state = NETDEV_STATE_CREATING;
+
+ log_netdev_debug(netdev, "Creating");
+ }
+
+ return 0;
+}
+
+static int netdev_create_after_configured(NetDev *netdev, Link *link) {
+ assert(netdev);
+ assert(link);
+ assert(NETDEV_VTABLE(netdev)->create_after_configured);
+
+ return NETDEV_VTABLE(netdev)->create_after_configured(netdev, link);
+}
+
+/* the callback must be called, possibly after a timeout, as otherwise the Link will hang */
+int netdev_join(NetDev *netdev, Link *link, link_netlink_message_handler_t callback) {
+ int r;
+
+ assert(netdev);
+ assert(netdev->manager);
+ assert(netdev->manager->rtnl);
+
+ switch (netdev_get_create_type(netdev)) {
+ case NETDEV_CREATE_MASTER:
+ r = netdev_enslave(netdev, link, callback);
+ if (r < 0)
+ return r;
+
+ break;
+ case NETDEV_CREATE_STACKED:
+ r = netdev_create(netdev, link, callback);
+ if (r < 0)
+ return r;
+
+ break;
+ case NETDEV_CREATE_AFTER_CONFIGURED:
+ r = netdev_create_after_configured(netdev, link);
+ if (r < 0)
+ return r;
+ break;
+ default:
+ assert_not_reached("Cannot join independent netdev");
+ }
+
+ return 0;
+}
+
+int netdev_load_one(Manager *manager, const char *filename) {
+ _cleanup_(netdev_unrefp) NetDev *netdev_raw = NULL, *netdev = NULL;
+ _cleanup_fclose_ FILE *file = NULL;
+ const char *dropin_dirname;
+ bool independent = false;
+ int r;
+
+ assert(manager);
+ assert(filename);
+
+ file = fopen(filename, "re");
+ if (!file) {
+ if (errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+
+ if (null_or_empty_fd(fileno(file))) {
+ log_debug("Skipping empty file: %s", filename);
+ return 0;
+ }
+
+ netdev_raw = new(NetDev, 1);
+ if (!netdev_raw)
+ return log_oom();
+
+ *netdev_raw = (NetDev) {
+ .n_ref = 1,
+ .kind = _NETDEV_KIND_INVALID,
+ .state = _NETDEV_STATE_INVALID, /* an invalid state means done() of the implementation won't be called on destruction */
+ };
+
+ dropin_dirname = strjoina(basename(filename), ".d");
+ r = config_parse_many(
+ filename, NETWORK_DIRS, dropin_dirname,
+ NETDEV_COMMON_SECTIONS NETDEV_OTHER_SECTIONS,
+ config_item_perf_lookup, network_netdev_gperf_lookup,
+ CONFIG_PARSE_WARN,
+ netdev_raw,
+ NULL);
+ if (r < 0)
+ return r;
+
+ /* skip out early if configuration does not match the environment */
+ if (!condition_test_list(netdev_raw->conditions, environ, NULL, NULL, NULL)) {
+ log_debug("%s: Conditions in the file do not match the system environment, skipping.", filename);
+ return 0;
+ }
+
+ if (netdev_raw->kind == _NETDEV_KIND_INVALID) {
+ log_warning("NetDev has no Kind= configured in %s. Ignoring", filename);
+ return 0;
+ }
+
+ if (!netdev_raw->ifname) {
+ log_warning("NetDev without Name= configured in %s. Ignoring", filename);
+ return 0;
+ }
+
+ r = fseek(file, 0, SEEK_SET);
+ if (r < 0)
+ return -errno;
+
+ netdev = malloc0(NETDEV_VTABLE(netdev_raw)->object_size);
+ if (!netdev)
+ return log_oom();
+
+ netdev->n_ref = 1;
+ netdev->manager = manager;
+ netdev->kind = netdev_raw->kind;
+ netdev->state = NETDEV_STATE_LOADING; /* we initialize the state here for the first time,
+ so that done() will be called on destruction */
+
+ if (NETDEV_VTABLE(netdev)->init)
+ NETDEV_VTABLE(netdev)->init(netdev);
+
+ r = config_parse_many(
+ filename, NETWORK_DIRS, dropin_dirname,
+ NETDEV_VTABLE(netdev)->sections,
+ config_item_perf_lookup, network_netdev_gperf_lookup,
+ CONFIG_PARSE_WARN,
+ netdev, NULL);
+ if (r < 0)
+ return r;
+
+ /* verify configuration */
+ if (NETDEV_VTABLE(netdev)->config_verify) {
+ r = NETDEV_VTABLE(netdev)->config_verify(netdev, filename);
+ if (r < 0)
+ return 0;
+ }
+
+ netdev->filename = strdup(filename);
+ if (!netdev->filename)
+ return log_oom();
+
+ if (!netdev->mac && NETDEV_VTABLE(netdev)->generate_mac) {
+ r = netdev_get_mac(netdev->ifname, &netdev->mac);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r,
+ "Failed to generate predictable MAC address for %s: %m",
+ netdev->ifname);
+ }
+
+ r = hashmap_ensure_allocated(&netdev->manager->netdevs, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(netdev->manager->netdevs, netdev->ifname, netdev);
+ if (r == -EEXIST) {
+ NetDev *n = hashmap_get(netdev->manager->netdevs, netdev->ifname);
+
+ assert(n);
+ if (!streq(netdev->filename, n->filename))
+ log_netdev_warning_errno(netdev, r,
+ "The setting Name=%s in %s conflicts with the one in %s, ignoring",
+ netdev->ifname, netdev->filename, n->filename);
+
+ /* Clear ifname before netdev_free() is called. Otherwise, the NetDev object 'n' is
+ * removed from the hashmap 'manager->netdevs'. */
+ netdev->ifname = mfree(netdev->ifname);
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ LIST_HEAD_INIT(netdev->callbacks);
+
+ log_netdev_debug(netdev, "loaded %s", netdev_kind_to_string(netdev->kind));
+
+ if (IN_SET(netdev_get_create_type(netdev), NETDEV_CREATE_MASTER, NETDEV_CREATE_INDEPENDENT)) {
+ r = netdev_create(netdev, NULL, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ switch (netdev->kind) {
+ case NETDEV_KIND_IPIP:
+ independent = IPIP(netdev)->independent;
+ break;
+ case NETDEV_KIND_GRE:
+ independent = GRE(netdev)->independent;
+ break;
+ case NETDEV_KIND_GRETAP:
+ independent = GRETAP(netdev)->independent;
+ break;
+ case NETDEV_KIND_IP6GRE:
+ independent = IP6GRE(netdev)->independent;
+ break;
+ case NETDEV_KIND_IP6GRETAP:
+ independent = IP6GRETAP(netdev)->independent;
+ break;
+ case NETDEV_KIND_SIT:
+ independent = SIT(netdev)->independent;
+ break;
+ case NETDEV_KIND_VTI:
+ independent = VTI(netdev)->independent;
+ break;
+ case NETDEV_KIND_VTI6:
+ independent = VTI6(netdev)->independent;
+ break;
+ case NETDEV_KIND_IP6TNL:
+ independent = IP6TNL(netdev)->independent;
+ break;
+ case NETDEV_KIND_ERSPAN:
+ independent = ERSPAN(netdev)->independent;
+ break;
+ case NETDEV_KIND_XFRM:
+ independent = XFRM(netdev)->independent;
+ break;
+ case NETDEV_KIND_VXLAN:
+ independent = VXLAN(netdev)->independent;
+ break;
+ default:
+ break;
+ }
+
+ if (independent) {
+ r = netdev_create(netdev, NULL, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ netdev = NULL;
+
+ return 0;
+}
+
+int netdev_load(Manager *manager, bool reload) {
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+ int r;
+
+ assert(manager);
+
+ if (!reload)
+ hashmap_clear_with_destructor(manager->netdevs, netdev_unref);
+
+ r = conf_files_list_strv(&files, ".netdev", NULL, 0, NETWORK_DIRS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate netdev files: %m");
+
+ STRV_FOREACH(f, files) {
+ r = netdev_load_one(manager, *f);
+ if (r < 0)
+ log_error_errno(r, "Failed to load %s, ignoring: %m", *f);
+ }
+
+ return 0;
+}
diff --git a/src/network/netdev/netdev.h b/src/network/netdev/netdev.h
new file mode 100644
index 0000000..468fae5
--- /dev/null
+++ b/src/network/netdev/netdev.h
@@ -0,0 +1,244 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-netlink.h"
+
+#include "conf-parser.h"
+#include "list.h"
+#include "networkd-link.h"
+#include "time-util.h"
+
+#define NETDEV_COMMON_SECTIONS "Match\0NetDev\0"
+/* This is the list of known sections. We need to ignore them in the initial parsing phase. */
+#define NETDEV_OTHER_SECTIONS \
+ "-BareUDP\0" \
+ "-Bond\0" \
+ "-Bridge\0" \
+ "-FooOverUDP\0" \
+ "-GENEVE\0" \
+ "-IPVLAN\0" \
+ "-IPVTAP\0" \
+ "-L2TP\0" \
+ "-L2TPSession\0" \
+ "-MACsec\0" \
+ "-MACsecReceiveChannel\0" \
+ "-MACsecTransmitAssociation\0" \
+ "-MACsecReceiveAssociation\0" \
+ "-MACVTAP\0" \
+ "-MACVLAN\0" \
+ "-Tunnel\0" \
+ "-Tun\0" \
+ "-Tap\0" \
+ "-Peer\0" \
+ "-VLAN\0" \
+ "-VRF\0" \
+ "-VXCAN\0" \
+ "-VXLAN\0" \
+ "-WireGuard\0" \
+ "-WireGuardPeer\0" \
+ "-Xfrm\0"
+
+typedef struct netdev_join_callback netdev_join_callback;
+
+struct netdev_join_callback {
+ link_netlink_message_handler_t callback;
+ Link *link;
+
+ LIST_FIELDS(netdev_join_callback, callbacks);
+};
+
+typedef enum NetDevKind {
+ NETDEV_KIND_BRIDGE,
+ NETDEV_KIND_BOND,
+ NETDEV_KIND_VLAN,
+ NETDEV_KIND_MACVLAN,
+ NETDEV_KIND_MACVTAP,
+ NETDEV_KIND_IPVLAN,
+ NETDEV_KIND_IPVTAP,
+ NETDEV_KIND_VXLAN,
+ NETDEV_KIND_IPIP,
+ NETDEV_KIND_GRE,
+ NETDEV_KIND_GRETAP,
+ NETDEV_KIND_IP6GRE,
+ NETDEV_KIND_IP6GRETAP,
+ NETDEV_KIND_SIT,
+ NETDEV_KIND_VETH,
+ NETDEV_KIND_VTI,
+ NETDEV_KIND_VTI6,
+ NETDEV_KIND_IP6TNL,
+ NETDEV_KIND_DUMMY,
+ NETDEV_KIND_TUN,
+ NETDEV_KIND_TAP,
+ NETDEV_KIND_VRF,
+ NETDEV_KIND_VCAN,
+ NETDEV_KIND_GENEVE,
+ NETDEV_KIND_VXCAN,
+ NETDEV_KIND_WIREGUARD,
+ NETDEV_KIND_NETDEVSIM,
+ NETDEV_KIND_FOU,
+ NETDEV_KIND_ERSPAN,
+ NETDEV_KIND_L2TP,
+ NETDEV_KIND_MACSEC,
+ NETDEV_KIND_NLMON,
+ NETDEV_KIND_XFRM,
+ NETDEV_KIND_IFB,
+ NETDEV_KIND_BAREUDP,
+ _NETDEV_KIND_MAX,
+ _NETDEV_KIND_TUNNEL, /* Used by config_parse_stacked_netdev() */
+ _NETDEV_KIND_INVALID = -1
+} NetDevKind;
+
+typedef enum NetDevState {
+ NETDEV_STATE_LOADING,
+ NETDEV_STATE_FAILED,
+ NETDEV_STATE_CREATING,
+ NETDEV_STATE_READY,
+ NETDEV_STATE_LINGER,
+ _NETDEV_STATE_MAX,
+ _NETDEV_STATE_INVALID = -1,
+} NetDevState;
+
+typedef enum NetDevCreateType {
+ NETDEV_CREATE_INDEPENDENT,
+ NETDEV_CREATE_MASTER,
+ NETDEV_CREATE_STACKED,
+ NETDEV_CREATE_AFTER_CONFIGURED,
+ _NETDEV_CREATE_MAX,
+ _NETDEV_CREATE_INVALID = -1,
+} NetDevCreateType;
+
+typedef struct Manager Manager;
+typedef struct Condition Condition;
+
+typedef struct NetDev {
+ Manager *manager;
+
+ unsigned n_ref;
+
+ char *filename;
+
+ LIST_HEAD(Condition, conditions);
+
+ NetDevState state;
+ NetDevKind kind;
+ char *description;
+ char *ifname;
+ struct ether_addr *mac;
+ uint32_t mtu;
+ int ifindex;
+
+ LIST_HEAD(netdev_join_callback, callbacks);
+} NetDev;
+
+typedef struct NetDevVTable {
+ /* How much memory does an object of this unit type need */
+ size_t object_size;
+
+ /* Config file sections this netdev kind understands, separated
+ * by NUL chars */
+ const char *sections;
+
+ /* This should reset all type-specific variables. This should
+ * not allocate memory, and is called with zero-initialized
+ * data. It should hence only initialize variables that need
+ * to be set != 0. */
+ void (*init)(NetDev *n);
+
+ /* This should free all kind-specific variables. It should be
+ * idempotent. */
+ void (*done)(NetDev *n);
+
+ /* fill in message to create netdev */
+ int (*fill_message_create)(NetDev *netdev, Link *link, sd_netlink_message *message);
+
+ /* specifies if netdev is independent, or a master device or a stacked device */
+ NetDevCreateType create_type;
+
+ /* create netdev, if not done via rtnl */
+ int (*create)(NetDev *netdev);
+
+ /* create netdev after link is fully configured */
+ int (*create_after_configured)(NetDev *netdev, Link *link);
+
+ /* perform additional configuration after netdev has been createad */
+ int (*post_create)(NetDev *netdev, Link *link, sd_netlink_message *message);
+
+ /* verify that compulsory configuration options were specified */
+ int (*config_verify)(NetDev *netdev, const char *filename);
+
+ /* Generate MAC address or not When MACAddress= is not specified. */
+ bool generate_mac;
+} NetDevVTable;
+
+extern const NetDevVTable * const netdev_vtable[_NETDEV_KIND_MAX];
+
+#define NETDEV_VTABLE(n) ((n)->kind != _NETDEV_KIND_INVALID ? netdev_vtable[(n)->kind] : NULL)
+
+/* For casting a netdev into the various netdev kinds */
+#define DEFINE_NETDEV_CAST(UPPERCASE, MixedCase) \
+ static inline MixedCase* UPPERCASE(NetDev *n) { \
+ if (_unlikely_(!n || \
+ n->kind != NETDEV_KIND_##UPPERCASE) || \
+ n->state == _NETDEV_STATE_INVALID) \
+ return NULL; \
+ \
+ return (MixedCase*) n; \
+ }
+
+/* For casting the various netdev kinds into a netdev */
+#define NETDEV(n) (&(n)->meta)
+
+int netdev_load(Manager *manager, bool reload);
+int netdev_load_one(Manager *manager, const char *filename);
+void netdev_drop(NetDev *netdev);
+
+NetDev *netdev_unref(NetDev *netdev);
+NetDev *netdev_ref(NetDev *netdev);
+DEFINE_TRIVIAL_DESTRUCTOR(netdev_destroy_callback, NetDev, netdev_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(NetDev*, netdev_unref);
+
+bool netdev_is_managed(NetDev *netdev);
+int netdev_get(Manager *manager, const char *name, NetDev **ret);
+int netdev_set_ifindex(NetDev *netdev, sd_netlink_message *newlink);
+int netdev_get_mac(const char *ifname, struct ether_addr **ret);
+int netdev_join(NetDev *netdev, Link *link, link_netlink_message_handler_t cb);
+int netdev_join_after_configured(NetDev *netdev, Link *link, link_netlink_message_handler_t callback);
+
+const char *netdev_kind_to_string(NetDevKind d) _const_;
+NetDevKind netdev_kind_from_string(const char *d) _pure_;
+
+static inline NetDevCreateType netdev_get_create_type(NetDev *netdev) {
+ assert(netdev);
+ assert(NETDEV_VTABLE(netdev));
+
+ return NETDEV_VTABLE(netdev)->create_type;
+}
+
+CONFIG_PARSER_PROTOTYPE(config_parse_netdev_kind);
+
+/* gperf */
+const struct ConfigPerfItem* network_netdev_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+
+/* Macros which append INTERFACE= to the message */
+
+#define log_netdev_full(netdev, level, error, ...) \
+ ({ \
+ const NetDev *_n = (netdev); \
+ _n ? log_object_internal(level, error, PROJECT_FILE, __LINE__, __func__, "INTERFACE=", _n->ifname, NULL, NULL, ##__VA_ARGS__) : \
+ log_internal(level, error, PROJECT_FILE, __LINE__, __func__, ##__VA_ARGS__); \
+ })
+
+#define log_netdev_debug(netdev, ...) log_netdev_full(netdev, LOG_DEBUG, 0, ##__VA_ARGS__)
+#define log_netdev_info(netdev, ...) log_netdev_full(netdev, LOG_INFO, 0, ##__VA_ARGS__)
+#define log_netdev_notice(netdev, ...) log_netdev_full(netdev, LOG_NOTICE, 0, ##__VA_ARGS__)
+#define log_netdev_warning(netdev, ...) log_netdev_full(netdev, LOG_WARNING, 0, ## __VA_ARGS__)
+#define log_netdev_error(netdev, ...) log_netdev_full(netdev, LOG_ERR, 0, ##__VA_ARGS__)
+
+#define log_netdev_debug_errno(netdev, error, ...) log_netdev_full(netdev, LOG_DEBUG, error, ##__VA_ARGS__)
+#define log_netdev_info_errno(netdev, error, ...) log_netdev_full(netdev, LOG_INFO, error, ##__VA_ARGS__)
+#define log_netdev_notice_errno(netdev, error, ...) log_netdev_full(netdev, LOG_NOTICE, error, ##__VA_ARGS__)
+#define log_netdev_warning_errno(netdev, error, ...) log_netdev_full(netdev, LOG_WARNING, error, ##__VA_ARGS__)
+#define log_netdev_error_errno(netdev, error, ...) log_netdev_full(netdev, LOG_ERR, error, ##__VA_ARGS__)
+
+#define LOG_NETDEV_MESSAGE(netdev, fmt, ...) "MESSAGE=%s: " fmt, (netdev)->ifname, ##__VA_ARGS__
+#define LOG_NETDEV_INTERFACE(netdev) "INTERFACE=%s", (netdev)->ifname
diff --git a/src/network/netdev/netdevsim.c b/src/network/netdev/netdevsim.c
new file mode 100644
index 0000000..b281428
--- /dev/null
+++ b/src/network/netdev/netdevsim.c
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "netdevsim.h"
+
+const NetDevVTable netdevsim_vtable = {
+ .object_size = sizeof(NetDevSim),
+ .sections = NETDEV_COMMON_SECTIONS,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+ .generate_mac = true,
+};
diff --git a/src/network/netdev/netdevsim.h b/src/network/netdev/netdevsim.h
new file mode 100644
index 0000000..27adc59
--- /dev/null
+++ b/src/network/netdev/netdevsim.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct NetDevSim NetDevSim;
+
+#include "netdev.h"
+
+struct NetDevSim {
+ NetDev meta;
+};
+
+DEFINE_NETDEV_CAST(NETDEVSIM, NetDevSim);
+extern const NetDevVTable netdevsim_vtable;
diff --git a/src/network/netdev/nlmon.c b/src/network/netdev/nlmon.c
new file mode 100644
index 0000000..a8faed5
--- /dev/null
+++ b/src/network/netdev/nlmon.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "nlmon.h"
+
+static int netdev_nlmon_verify(NetDev *netdev, const char *filename) {
+ assert(netdev);
+ assert(filename);
+
+ if (netdev->mac) {
+ log_netdev_warning(netdev, "%s: MACAddress= is not supported. Ignoring", filename);
+ netdev->mac = mfree(netdev->mac);
+ }
+
+ return 0;
+}
+
+const NetDevVTable nlmon_vtable = {
+ .object_size = sizeof(NLMon),
+ .sections = NETDEV_COMMON_SECTIONS,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+ .config_verify = netdev_nlmon_verify,
+};
diff --git a/src/network/netdev/nlmon.h b/src/network/netdev/nlmon.h
new file mode 100644
index 0000000..edfc504
--- /dev/null
+++ b/src/network/netdev/nlmon.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct NLMon NLMon;
+
+#include "netdev.h"
+
+struct NLMon {
+ NetDev meta;
+};
+
+DEFINE_NETDEV_CAST(NLMON, NLMon);
+
+extern const NetDevVTable nlmon_vtable;
diff --git a/src/network/netdev/tunnel.c b/src/network/netdev/tunnel.c
new file mode 100644
index 0000000..66e8868
--- /dev/null
+++ b/src/network/netdev/tunnel.c
@@ -0,0 +1,903 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <linux/fou.h>
+#include <linux/ip.h>
+#include <linux/if_tunnel.h>
+#include <linux/ip6_tunnel.h>
+
+#include "conf-parser.h"
+#include "missing_network.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "tunnel.h"
+#include "util.h"
+
+#define DEFAULT_TNL_HOP_LIMIT 64
+#define IP6_FLOWINFO_FLOWLABEL htobe32(0x000FFFFF)
+#define IP6_TNL_F_ALLOW_LOCAL_REMOTE 0x40
+
+static const char* const ip6tnl_mode_table[_NETDEV_IP6_TNL_MODE_MAX] = {
+ [NETDEV_IP6_TNL_MODE_IP6IP6] = "ip6ip6",
+ [NETDEV_IP6_TNL_MODE_IPIP6] = "ipip6",
+ [NETDEV_IP6_TNL_MODE_ANYIP6] = "any",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(ip6tnl_mode, Ip6TnlMode);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_ip6tnl_mode, ip6tnl_mode, Ip6TnlMode, "Failed to parse ip6 tunnel Mode");
+
+static int netdev_ipip_sit_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ Tunnel *t;
+ int r;
+
+ assert(netdev);
+
+ if (netdev->kind == NETDEV_KIND_IPIP)
+ t = IPIP(netdev);
+ else
+ t = SIT(netdev);
+
+ assert(m);
+ assert(t);
+
+ if (link || t->assign_to_loopback) {
+ r = sd_netlink_message_append_u32(m, IFLA_IPTUN_LINK, link ? link->ifindex : LOOPBACK_IFINDEX);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_LINK attribute: %m");
+ }
+
+ r = sd_netlink_message_append_in_addr(m, IFLA_IPTUN_LOCAL, &t->local.in);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_LOCAL attribute: %m");
+
+ r = sd_netlink_message_append_in_addr(m, IFLA_IPTUN_REMOTE, &t->remote.in);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_REMOTE attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_IPTUN_TTL, t->ttl);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_TTL attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_IPTUN_PMTUDISC, t->pmtudisc);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_PMTUDISC attribute: %m");
+
+ if (t->fou_tunnel) {
+ r = sd_netlink_message_append_u16(m, IFLA_IPTUN_ENCAP_TYPE, t->fou_encap_type);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_ENCAP_TYPE attribute: %m");
+
+ r = sd_netlink_message_append_u16(m, IFLA_IPTUN_ENCAP_SPORT, htobe16(t->encap_src_port));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_ENCAP_SPORT attribute: %m");
+
+ r = sd_netlink_message_append_u16(m, IFLA_IPTUN_ENCAP_DPORT, htobe16(t->fou_destination_port));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_ENCAP_DPORT attribute: %m");
+ }
+
+ if (netdev->kind == NETDEV_KIND_SIT) {
+ if (t->sixrd_prefixlen > 0) {
+ r = sd_netlink_message_append_in6_addr(m, IFLA_IPTUN_6RD_PREFIX, &t->sixrd_prefix);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_6RD_PREFIX attribute: %m");
+
+ /* u16 is deliberate here, even though we're passing a netmask that can never be >128. The kernel is
+ * expecting to receive the prefixlen as a u16.
+ */
+ r = sd_netlink_message_append_u16(m, IFLA_IPTUN_6RD_PREFIXLEN, t->sixrd_prefixlen);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_6RD_PREFIXLEN attribute: %m");
+ }
+
+ if (t->isatap >= 0) {
+ uint16_t flags = 0;
+
+ SET_FLAG(flags, SIT_ISATAP, t->isatap);
+
+ r = sd_netlink_message_append_u16(m, IFLA_IPTUN_FLAGS, flags);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_FLAGS attribute: %m");
+ }
+ }
+
+ return r;
+}
+
+static int netdev_gre_erspan_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ uint32_t ikey = 0;
+ uint32_t okey = 0;
+ uint16_t iflags = 0;
+ uint16_t oflags = 0;
+ Tunnel *t;
+ int r;
+
+ assert(netdev);
+ assert(m);
+
+ switch (netdev->kind) {
+ case NETDEV_KIND_GRE:
+ t = GRE(netdev);
+ break;
+ case NETDEV_KIND_ERSPAN:
+ t = ERSPAN(netdev);
+ break;
+ case NETDEV_KIND_GRETAP:
+ t = GRETAP(netdev);
+ break;
+ default:
+ assert_not_reached("invalid netdev kind");
+ }
+
+ assert(t);
+
+ if (link || t->assign_to_loopback) {
+ r = sd_netlink_message_append_u32(m, IFLA_GRE_LINK, link ? link->ifindex : LOOPBACK_IFINDEX);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_LINK attribute: %m");
+ }
+
+ if (netdev->kind == NETDEV_KIND_ERSPAN) {
+ r = sd_netlink_message_append_u32(m, IFLA_GRE_ERSPAN_INDEX, t->erspan_index);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_ERSPAN_INDEX attribute: %m");
+ }
+
+ r = sd_netlink_message_append_in_addr(m, IFLA_GRE_LOCAL, &t->local.in);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_LOCAL attribute: %m");
+
+ r = sd_netlink_message_append_in_addr(m, IFLA_GRE_REMOTE, &t->remote.in);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_REMOTE attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_GRE_TTL, t->ttl);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_TTL attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_GRE_TOS, t->tos);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_TOS attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_GRE_PMTUDISC, t->pmtudisc);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_PMTUDISC attribute: %m");
+
+ if (t->key != 0) {
+ ikey = okey = htobe32(t->key);
+ iflags |= GRE_KEY;
+ oflags |= GRE_KEY;
+ }
+
+ if (t->ikey != 0) {
+ ikey = htobe32(t->ikey);
+ iflags |= GRE_KEY;
+ }
+
+ if (t->okey != 0) {
+ okey = htobe32(t->okey);
+ oflags |= GRE_KEY;
+ }
+
+ if (t->gre_erspan_sequence > 0) {
+ iflags |= GRE_SEQ;
+ oflags |= GRE_SEQ;
+ } else if (t->gre_erspan_sequence == 0) {
+ iflags &= ~GRE_SEQ;
+ oflags &= ~GRE_SEQ;
+ }
+
+ r = sd_netlink_message_append_u32(m, IFLA_GRE_IKEY, ikey);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_IKEY attribute: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_GRE_OKEY, okey);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_OKEY attribute: %m");
+
+ r = sd_netlink_message_append_u16(m, IFLA_GRE_IFLAGS, iflags);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_IFLAGS attribute: %m");
+
+ r = sd_netlink_message_append_u16(m, IFLA_GRE_OFLAGS, oflags);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_OFLAGS, attribute: %m");
+
+ if (t->fou_tunnel) {
+ r = sd_netlink_message_append_u16(m, IFLA_GRE_ENCAP_TYPE, t->fou_encap_type);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_ENCAP_TYPE attribute: %m");
+
+ r = sd_netlink_message_append_u16(m, IFLA_GRE_ENCAP_SPORT, htobe16(t->encap_src_port));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_ENCAP_SPORT attribute: %m");
+
+ r = sd_netlink_message_append_u16(m, IFLA_GRE_ENCAP_DPORT, htobe16(t->fou_destination_port));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_ENCAP_DPORT attribute: %m");
+ }
+
+ return r;
+}
+
+static int netdev_ip6gre_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ uint32_t ikey = 0;
+ uint32_t okey = 0;
+ uint16_t iflags = 0;
+ uint16_t oflags = 0;
+ Tunnel *t;
+ int r;
+
+ assert(netdev);
+
+ if (netdev->kind == NETDEV_KIND_IP6GRE)
+ t = IP6GRE(netdev);
+ else
+ t = IP6GRETAP(netdev);
+
+ assert(t);
+ assert(m);
+
+ if (link || t->assign_to_loopback) {
+ r = sd_netlink_message_append_u32(m, IFLA_GRE_LINK, link ? link->ifindex : LOOPBACK_IFINDEX);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_LINK attribute: %m");
+ }
+
+ r = sd_netlink_message_append_in6_addr(m, IFLA_GRE_LOCAL, &t->local.in6);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_LOCAL attribute: %m");
+
+ r = sd_netlink_message_append_in6_addr(m, IFLA_GRE_REMOTE, &t->remote.in6);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_REMOTE attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_GRE_TTL, t->ttl);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_TTL attribute: %m");
+
+ if (t->ipv6_flowlabel != _NETDEV_IPV6_FLOWLABEL_INVALID) {
+ r = sd_netlink_message_append_u32(m, IFLA_GRE_FLOWINFO, t->ipv6_flowlabel);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_FLOWINFO attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u32(m, IFLA_GRE_FLAGS, t->flags);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_FLAGS attribute: %m");
+
+ if (t->key != 0) {
+ ikey = okey = htobe32(t->key);
+ iflags |= GRE_KEY;
+ oflags |= GRE_KEY;
+ }
+
+ if (t->ikey != 0) {
+ ikey = htobe32(t->ikey);
+ iflags |= GRE_KEY;
+ }
+
+ if (t->okey != 0) {
+ okey = htobe32(t->okey);
+ oflags |= GRE_KEY;
+ }
+
+ r = sd_netlink_message_append_u32(m, IFLA_GRE_IKEY, ikey);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_IKEY attribute: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_GRE_OKEY, okey);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_OKEY attribute: %m");
+
+ r = sd_netlink_message_append_u16(m, IFLA_GRE_IFLAGS, iflags);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_IFLAGS attribute: %m");
+
+ r = sd_netlink_message_append_u16(m, IFLA_GRE_OFLAGS, oflags);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_GRE_OFLAGS, attribute: %m");
+
+ return r;
+}
+
+static int netdev_vti_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ uint32_t ikey, okey;
+ Tunnel *t;
+ int r;
+
+ assert(netdev);
+ assert(m);
+
+ if (netdev->kind == NETDEV_KIND_VTI)
+ t = VTI(netdev);
+ else
+ t = VTI6(netdev);
+
+ assert(t);
+
+ if (link || t->assign_to_loopback) {
+ r = sd_netlink_message_append_u32(m, IFLA_VTI_LINK, link ? link->ifindex : LOOPBACK_IFINDEX);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VTI_LINK attribute: %m");
+ }
+
+ if (t->key != 0)
+ ikey = okey = htobe32(t->key);
+ else {
+ ikey = htobe32(t->ikey);
+ okey = htobe32(t->okey);
+ }
+
+ r = sd_netlink_message_append_u32(m, IFLA_VTI_IKEY, ikey);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VTI_IKEY attribute: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_VTI_OKEY, okey);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VTI_OKEY attribute: %m");
+
+ r = netlink_message_append_in_addr_union(m, IFLA_VTI_LOCAL, t->family, &t->local);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VTI_LOCAL attribute: %m");
+
+ r = netlink_message_append_in_addr_union(m, IFLA_VTI_REMOTE, t->family, &t->remote);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VTI_REMOTE attribute: %m");
+
+ return r;
+}
+
+static int netdev_ip6tnl_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ Tunnel *t = IP6TNL(netdev);
+ uint8_t proto;
+ int r;
+
+ assert(netdev);
+ assert(m);
+ assert(t);
+
+ if (link || t->assign_to_loopback) {
+ r = sd_netlink_message_append_u32(m, IFLA_IPTUN_LINK, link ? link->ifindex : LOOPBACK_IFINDEX);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_LINK attribute: %m");
+ }
+
+ r = sd_netlink_message_append_in6_addr(m, IFLA_IPTUN_LOCAL, &t->local.in6);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_LOCAL attribute: %m");
+
+ r = sd_netlink_message_append_in6_addr(m, IFLA_IPTUN_REMOTE, &t->remote.in6);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_REMOTE attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_IPTUN_TTL, t->ttl);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_TTL attribute: %m");
+
+ if (t->ipv6_flowlabel != _NETDEV_IPV6_FLOWLABEL_INVALID) {
+ r = sd_netlink_message_append_u32(m, IFLA_IPTUN_FLOWINFO, t->ipv6_flowlabel);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_FLOWINFO attribute: %m");
+ }
+
+ if (t->copy_dscp)
+ t->flags |= IP6_TNL_F_RCV_DSCP_COPY;
+
+ if (t->allow_localremote >= 0)
+ SET_FLAG(t->flags, IP6_TNL_F_ALLOW_LOCAL_REMOTE, t->allow_localremote);
+
+ if (t->encap_limit != IPV6_DEFAULT_TNL_ENCAP_LIMIT) {
+ r = sd_netlink_message_append_u8(m, IFLA_IPTUN_ENCAP_LIMIT, t->encap_limit);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_ENCAP_LIMIT attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u32(m, IFLA_IPTUN_FLAGS, t->flags);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_FLAGS attribute: %m");
+
+ switch (t->ip6tnl_mode) {
+ case NETDEV_IP6_TNL_MODE_IP6IP6:
+ proto = IPPROTO_IPV6;
+ break;
+ case NETDEV_IP6_TNL_MODE_IPIP6:
+ proto = IPPROTO_IPIP;
+ break;
+ case NETDEV_IP6_TNL_MODE_ANYIP6:
+ default:
+ proto = 0;
+ break;
+ }
+
+ r = sd_netlink_message_append_u8(m, IFLA_IPTUN_PROTO, proto);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_IPTUN_PROTO attribute: %m");
+
+ return r;
+}
+
+static int netdev_tunnel_verify(NetDev *netdev, const char *filename) {
+ Tunnel *t = NULL;
+
+ assert(netdev);
+ assert(filename);
+
+ switch (netdev->kind) {
+ case NETDEV_KIND_IPIP:
+ t = IPIP(netdev);
+ break;
+ case NETDEV_KIND_SIT:
+ t = SIT(netdev);
+ break;
+ case NETDEV_KIND_GRE:
+ t = GRE(netdev);
+ break;
+ case NETDEV_KIND_GRETAP:
+ t = GRETAP(netdev);
+ break;
+ case NETDEV_KIND_IP6GRE:
+ t = IP6GRE(netdev);
+ break;
+ case NETDEV_KIND_IP6GRETAP:
+ t = IP6GRETAP(netdev);
+ break;
+ case NETDEV_KIND_VTI:
+ t = VTI(netdev);
+ break;
+ case NETDEV_KIND_VTI6:
+ t = VTI6(netdev);
+ break;
+ case NETDEV_KIND_IP6TNL:
+ t = IP6TNL(netdev);
+ break;
+ case NETDEV_KIND_ERSPAN:
+ t = ERSPAN(netdev);
+ break;
+ default:
+ assert_not_reached("Invalid tunnel kind");
+ }
+
+ assert(t);
+
+ if (IN_SET(netdev->kind, NETDEV_KIND_VTI, NETDEV_KIND_IPIP, NETDEV_KIND_SIT, NETDEV_KIND_GRE) &&
+ !IN_SET(t->family, AF_UNSPEC, AF_INET))
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "vti/ipip/sit/gre tunnel without a local/remote IPv4 address configured in %s. Ignoring", filename);
+
+ if (IN_SET(netdev->kind, NETDEV_KIND_GRETAP, NETDEV_KIND_ERSPAN) &&
+ (t->family != AF_INET || in_addr_is_null(t->family, &t->remote)))
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "gretap/erspan tunnel without a remote IPv4 address configured in %s. Ignoring", filename);
+
+ if ((IN_SET(netdev->kind, NETDEV_KIND_VTI6, NETDEV_KIND_IP6TNL) && t->family != AF_INET6) ||
+ (netdev->kind == NETDEV_KIND_IP6GRE && !IN_SET(t->family, AF_UNSPEC, AF_INET6)))
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "vti6/ip6tnl/ip6gre tunnel without a local/remote IPv6 address configured in %s. Ignoring", filename);
+
+ if (netdev->kind == NETDEV_KIND_IP6GRETAP &&
+ (t->family != AF_INET6 || in_addr_is_null(t->family, &t->remote)))
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "ip6gretap tunnel without a remote IPv6 address configured in %s. Ignoring", filename);
+
+ if (netdev->kind == NETDEV_KIND_IP6TNL &&
+ t->ip6tnl_mode == _NETDEV_IP6_TNL_MODE_INVALID)
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "ip6tnl without mode configured in %s. Ignoring", filename);
+
+ if (t->fou_tunnel && t->fou_destination_port <= 0)
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "FooOverUDP missing port configured in %s. Ignoring", filename);
+
+ if (netdev->kind == NETDEV_KIND_ERSPAN && (t->erspan_index >= (1 << 20) || t->erspan_index == 0))
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL), "Invalid erspan index %d. Ignoring", t->erspan_index);
+
+ /* netlink_message_append_in_addr_union() is used for vti/vti6. So, t->family cannot be AF_UNSPEC. */
+ if (netdev->kind == NETDEV_KIND_VTI)
+ t->family = AF_INET;
+
+ return 0;
+}
+
+int config_parse_tunnel_address(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Tunnel *t = userdata;
+ union in_addr_union *addr = data, buffer;
+ int r, f;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* This is used to parse addresses on both local and remote ends of the tunnel.
+ * Address families must match.
+ *
+ * "any" is a special value which means that the address is unspecified.
+ */
+
+ if (streq(rvalue, "any")) {
+ *addr = IN_ADDR_NULL;
+
+ /* As a special case, if both the local and remote addresses are
+ * unspecified, also clear the address family.
+ */
+ if (t->family != AF_UNSPEC &&
+ in_addr_is_null(t->family, &t->local) != 0 &&
+ in_addr_is_null(t->family, &t->remote) != 0)
+ t->family = AF_UNSPEC;
+ return 0;
+ }
+
+ r = in_addr_from_string_auto(rvalue, &f, &buffer);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Tunnel address \"%s\" invalid, ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ if (t->family != AF_UNSPEC && t->family != f) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Tunnel addresses incompatible, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ t->family = f;
+ *addr = buffer;
+ return 0;
+}
+
+int config_parse_tunnel_key(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ union in_addr_union buffer;
+ Tunnel *t = userdata;
+ uint32_t k;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = in_addr_from_string(AF_INET, rvalue, &buffer);
+ if (r < 0) {
+ r = safe_atou32(rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse tunnel key ignoring assignment: %s", rvalue);
+ return 0;
+ }
+ } else
+ k = be32toh(buffer.in.s_addr);
+
+ if (streq(lvalue, "Key"))
+ t->key = k;
+ else if (streq(lvalue, "InputKey"))
+ t->ikey = k;
+ else
+ t->okey = k;
+
+ return 0;
+}
+
+int config_parse_ipv6_flowlabel(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ IPv6FlowLabel *ipv6_flowlabel = data;
+ Tunnel *t = userdata;
+ int k = 0;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(ipv6_flowlabel);
+
+ if (streq(rvalue, "inherit")) {
+ *ipv6_flowlabel = IP6_FLOWINFO_FLOWLABEL;
+ t->flags |= IP6_TNL_F_USE_ORIG_FLOWLABEL;
+ } else {
+ r = config_parse_int(unit, filename, line, section, section_line, lvalue, ltype, rvalue, &k, userdata);
+ if (r < 0)
+ return r;
+
+ if (k > 0xFFFFF)
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse IPv6 flowlabel option, ignoring: %s", rvalue);
+ else {
+ *ipv6_flowlabel = htobe32(k) & IP6_FLOWINFO_FLOWLABEL;
+ t->flags &= ~IP6_TNL_F_USE_ORIG_FLOWLABEL;
+ }
+ }
+
+ return 0;
+}
+
+int config_parse_encap_limit(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Tunnel *t = userdata;
+ int k = 0;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (streq(rvalue, "none"))
+ t->flags |= IP6_TNL_F_IGN_ENCAP_LIMIT;
+ else {
+ r = safe_atoi(rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse Tunnel Encapsulation Limit option, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (k > 255 || k < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid Tunnel Encapsulation value, ignoring: %d", k);
+ else {
+ t->encap_limit = k;
+ t->flags &= ~IP6_TNL_F_IGN_ENCAP_LIMIT;
+ }
+ }
+
+ return 0;
+}
+
+int config_parse_6rd_prefix(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Tunnel *t = userdata;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ union in_addr_union p;
+ uint8_t l;
+ int r;
+
+ r = in_addr_prefix_from_string(rvalue, AF_INET6, &p, &l);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse 6rd prefix \"%s\", ignoring: %m", rvalue);
+ return 0;
+ }
+ if (l == 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "6rd prefix length of \"%s\" must be greater than zero, ignoring", rvalue);
+ return 0;
+ }
+
+ t->sixrd_prefix = p.in6;
+ t->sixrd_prefixlen = l;
+
+ return 0;
+}
+
+static void ipip_sit_init(NetDev *n) {
+ Tunnel *t;
+
+ assert(n);
+
+ switch (n->kind) {
+ case NETDEV_KIND_IPIP:
+ t = IPIP(n);
+ break;
+ case NETDEV_KIND_SIT:
+ t = SIT(n);
+ break;
+ default:
+ assert_not_reached("invalid netdev kind");
+ }
+
+ assert(t);
+
+ t->pmtudisc = true;
+ t->fou_encap_type = NETDEV_FOO_OVER_UDP_ENCAP_DIRECT;
+ t->isatap = -1;
+}
+
+static void vti_init(NetDev *n) {
+ Tunnel *t;
+
+ assert(n);
+
+ if (n->kind == NETDEV_KIND_VTI)
+ t = VTI(n);
+ else
+ t = VTI6(n);
+
+ assert(t);
+
+ t->pmtudisc = true;
+}
+
+static void gre_erspan_init(NetDev *n) {
+ Tunnel *t;
+
+ assert(n);
+
+ switch (n->kind) {
+ case NETDEV_KIND_GRE:
+ t = GRE(n);
+ break;
+ case NETDEV_KIND_ERSPAN:
+ t = ERSPAN(n);
+ break;
+ case NETDEV_KIND_GRETAP:
+ t = GRETAP(n);
+ break;
+ default:
+ assert_not_reached("invalid netdev kind");
+ }
+
+ assert(t);
+
+ t->pmtudisc = true;
+ t->gre_erspan_sequence = -1;
+ t->fou_encap_type = NETDEV_FOO_OVER_UDP_ENCAP_DIRECT;
+}
+
+static void ip6gre_init(NetDev *n) {
+ Tunnel *t;
+
+ assert(n);
+
+ if (n->kind == NETDEV_KIND_IP6GRE)
+ t = IP6GRE(n);
+ else
+ t = IP6GRETAP(n);
+
+ assert(t);
+
+ t->ttl = DEFAULT_TNL_HOP_LIMIT;
+}
+
+static void ip6tnl_init(NetDev *n) {
+ Tunnel *t = IP6TNL(n);
+
+ assert(n);
+ assert(t);
+
+ t->ttl = DEFAULT_TNL_HOP_LIMIT;
+ t->encap_limit = IPV6_DEFAULT_TNL_ENCAP_LIMIT;
+ t->ip6tnl_mode = _NETDEV_IP6_TNL_MODE_INVALID;
+ t->ipv6_flowlabel = _NETDEV_IPV6_FLOWLABEL_INVALID;
+ t->allow_localremote = -1;
+}
+
+const NetDevVTable ipip_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = ipip_sit_init,
+ .sections = NETDEV_COMMON_SECTIONS "Tunnel\0",
+ .fill_message_create = netdev_ipip_sit_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_tunnel_verify,
+ .generate_mac = true,
+};
+
+const NetDevVTable sit_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = ipip_sit_init,
+ .sections = NETDEV_COMMON_SECTIONS "Tunnel\0",
+ .fill_message_create = netdev_ipip_sit_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_tunnel_verify,
+ .generate_mac = true,
+};
+
+const NetDevVTable vti_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = vti_init,
+ .sections = NETDEV_COMMON_SECTIONS "Tunnel\0",
+ .fill_message_create = netdev_vti_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_tunnel_verify,
+ .generate_mac = true,
+};
+
+const NetDevVTable vti6_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = vti_init,
+ .sections = NETDEV_COMMON_SECTIONS "Tunnel\0",
+ .fill_message_create = netdev_vti_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_tunnel_verify,
+ .generate_mac = true,
+};
+
+const NetDevVTable gre_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = gre_erspan_init,
+ .sections = NETDEV_COMMON_SECTIONS "Tunnel\0",
+ .fill_message_create = netdev_gre_erspan_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_tunnel_verify,
+ .generate_mac = true,
+};
+
+const NetDevVTable gretap_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = gre_erspan_init,
+ .sections = NETDEV_COMMON_SECTIONS "Tunnel\0",
+ .fill_message_create = netdev_gre_erspan_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_tunnel_verify,
+ .generate_mac = true,
+};
+
+const NetDevVTable ip6gre_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = ip6gre_init,
+ .sections = NETDEV_COMMON_SECTIONS "Tunnel\0",
+ .fill_message_create = netdev_ip6gre_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_tunnel_verify,
+ .generate_mac = true,
+};
+
+const NetDevVTable ip6gretap_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = ip6gre_init,
+ .sections = NETDEV_COMMON_SECTIONS "Tunnel\0",
+ .fill_message_create = netdev_ip6gre_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_tunnel_verify,
+ .generate_mac = true,
+};
+
+const NetDevVTable ip6tnl_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = ip6tnl_init,
+ .sections = NETDEV_COMMON_SECTIONS "Tunnel\0",
+ .fill_message_create = netdev_ip6tnl_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_tunnel_verify,
+ .generate_mac = true,
+};
+
+const NetDevVTable erspan_vtable = {
+ .object_size = sizeof(Tunnel),
+ .init = gre_erspan_init,
+ .sections = NETDEV_COMMON_SECTIONS "Tunnel\0",
+ .fill_message_create = netdev_gre_erspan_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_tunnel_verify,
+ .generate_mac = true,
+};
diff --git a/src/network/netdev/tunnel.h b/src/network/netdev/tunnel.h
new file mode 100644
index 0000000..d58ded7
--- /dev/null
+++ b/src/network/netdev/tunnel.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "in-addr-util.h"
+
+#include "conf-parser.h"
+#include "fou-tunnel.h"
+#include "netdev.h"
+
+typedef enum Ip6TnlMode {
+ NETDEV_IP6_TNL_MODE_IP6IP6,
+ NETDEV_IP6_TNL_MODE_IPIP6,
+ NETDEV_IP6_TNL_MODE_ANYIP6,
+ _NETDEV_IP6_TNL_MODE_MAX,
+ _NETDEV_IP6_TNL_MODE_INVALID = -1,
+} Ip6TnlMode;
+
+typedef enum IPv6FlowLabel {
+ NETDEV_IPV6_FLOWLABEL_INHERIT = 0xFFFFF + 1,
+ _NETDEV_IPV6_FLOWLABEL_MAX,
+ _NETDEV_IPV6_FLOWLABEL_INVALID = -1,
+} IPv6FlowLabel;
+
+typedef struct Tunnel {
+ NetDev meta;
+
+ uint8_t encap_limit;
+
+ int family;
+ int ipv6_flowlabel;
+ int allow_localremote;
+ int gre_erspan_sequence;
+ int isatap;
+
+ unsigned ttl;
+ unsigned tos;
+ unsigned flags;
+
+ uint32_t key;
+ uint32_t ikey;
+ uint32_t okey;
+ uint32_t erspan_index;
+
+ union in_addr_union local;
+ union in_addr_union remote;
+
+ Ip6TnlMode ip6tnl_mode;
+ FooOverUDPEncapType fou_encap_type;
+
+ bool pmtudisc;
+ bool copy_dscp;
+ bool independent;
+ bool fou_tunnel;
+ bool assign_to_loopback;
+
+ uint16_t encap_src_port;
+ uint16_t fou_destination_port;
+
+ struct in6_addr sixrd_prefix;
+ uint8_t sixrd_prefixlen;
+} Tunnel;
+
+DEFINE_NETDEV_CAST(IPIP, Tunnel);
+DEFINE_NETDEV_CAST(GRE, Tunnel);
+DEFINE_NETDEV_CAST(GRETAP, Tunnel);
+DEFINE_NETDEV_CAST(IP6GRE, Tunnel);
+DEFINE_NETDEV_CAST(IP6GRETAP, Tunnel);
+DEFINE_NETDEV_CAST(SIT, Tunnel);
+DEFINE_NETDEV_CAST(VTI, Tunnel);
+DEFINE_NETDEV_CAST(VTI6, Tunnel);
+DEFINE_NETDEV_CAST(IP6TNL, Tunnel);
+DEFINE_NETDEV_CAST(ERSPAN, Tunnel);
+extern const NetDevVTable ipip_vtable;
+extern const NetDevVTable sit_vtable;
+extern const NetDevVTable vti_vtable;
+extern const NetDevVTable vti6_vtable;
+extern const NetDevVTable gre_vtable;
+extern const NetDevVTable gretap_vtable;
+extern const NetDevVTable ip6gre_vtable;
+extern const NetDevVTable ip6gretap_vtable;
+extern const NetDevVTable ip6tnl_vtable;
+extern const NetDevVTable erspan_vtable;
+
+const char *ip6tnl_mode_to_string(Ip6TnlMode d) _const_;
+Ip6TnlMode ip6tnl_mode_from_string(const char *d) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_ip6tnl_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_tunnel_address);
+CONFIG_PARSER_PROTOTYPE(config_parse_ipv6_flowlabel);
+CONFIG_PARSER_PROTOTYPE(config_parse_encap_limit);
+CONFIG_PARSER_PROTOTYPE(config_parse_tunnel_key);
+CONFIG_PARSER_PROTOTYPE(config_parse_6rd_prefix);
diff --git a/src/network/netdev/tuntap.c b/src/network/netdev/tuntap.c
new file mode 100644
index 0000000..d9d6544
--- /dev/null
+++ b/src/network/netdev/tuntap.c
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <net/if.h>
+#include <netinet/if_ether.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/if_tun.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "tuntap.h"
+#include "user-util.h"
+
+#define TUN_DEV "/dev/net/tun"
+
+static int netdev_fill_tuntap_message(NetDev *netdev, struct ifreq *ifr) {
+ TunTap *t;
+
+ assert(netdev);
+ assert(netdev->ifname);
+ assert(ifr);
+
+ if (netdev->kind == NETDEV_KIND_TAP) {
+ t = TAP(netdev);
+ ifr->ifr_flags |= IFF_TAP;
+ } else {
+ t = TUN(netdev);
+ ifr->ifr_flags |= IFF_TUN;
+ }
+
+ if (!t->packet_info)
+ ifr->ifr_flags |= IFF_NO_PI;
+
+ if (t->multi_queue)
+ ifr->ifr_flags |= IFF_MULTI_QUEUE;
+
+ if (t->vnet_hdr)
+ ifr->ifr_flags |= IFF_VNET_HDR;
+
+ strncpy(ifr->ifr_name, netdev->ifname, IFNAMSIZ-1);
+
+ return 0;
+}
+
+static int netdev_tuntap_add(NetDev *netdev, struct ifreq *ifr) {
+ _cleanup_close_ int fd;
+ TunTap *t = NULL;
+ const char *user;
+ const char *group;
+ uid_t uid;
+ gid_t gid;
+ int r;
+
+ assert(netdev);
+ assert(ifr);
+
+ fd = open(TUN_DEV, O_RDWR|O_CLOEXEC);
+ if (fd < 0)
+ return log_netdev_error_errno(netdev, -errno, "Failed to open tun dev: %m");
+
+ if (ioctl(fd, TUNSETIFF, ifr) < 0)
+ return log_netdev_error_errno(netdev, -errno, "TUNSETIFF failed on tun dev: %m");
+
+ if (netdev->kind == NETDEV_KIND_TAP)
+ t = TAP(netdev);
+ else
+ t = TUN(netdev);
+
+ assert(t);
+
+ if (t->user_name) {
+ user = t->user_name;
+
+ r = get_user_creds(&user, &uid, NULL, NULL, NULL, USER_CREDS_ALLOW_MISSING);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Cannot resolve user name %s: %m", t->user_name);
+
+ if (ioctl(fd, TUNSETOWNER, uid) < 0)
+ return log_netdev_error_errno(netdev, -errno, "TUNSETOWNER failed on tun dev: %m");
+ }
+
+ if (t->group_name) {
+ group = t->group_name;
+
+ r = get_group_creds(&group, &gid, USER_CREDS_ALLOW_MISSING);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Cannot resolve group name %s: %m", t->group_name);
+
+ if (ioctl(fd, TUNSETGROUP, gid) < 0)
+ return log_netdev_error_errno(netdev, -errno, "TUNSETGROUP failed on tun dev: %m");
+
+ }
+
+ if (ioctl(fd, TUNSETPERSIST, 1) < 0)
+ return log_netdev_error_errno(netdev, -errno, "TUNSETPERSIST failed on tun dev: %m");
+
+ return 0;
+}
+
+static int netdev_create_tuntap(NetDev *netdev) {
+ struct ifreq ifr = {};
+ int r;
+
+ r = netdev_fill_tuntap_message(netdev, &ifr);
+ if (r < 0)
+ return r;
+
+ return netdev_tuntap_add(netdev, &ifr);
+}
+
+static void tuntap_done(NetDev *netdev) {
+ TunTap *t = NULL;
+
+ assert(netdev);
+
+ if (netdev->kind == NETDEV_KIND_TUN)
+ t = TUN(netdev);
+ else
+ t = TAP(netdev);
+
+ assert(t);
+
+ t->user_name = mfree(t->user_name);
+ t->group_name = mfree(t->group_name);
+}
+
+static int tuntap_verify(NetDev *netdev, const char *filename) {
+ assert(netdev);
+
+ if (netdev->mtu != 0)
+ log_netdev_warning(netdev,
+ "MTUBytes= configured for %s device in %s will be ignored.\n"
+ "Please set it in the corresponding .network file.",
+ netdev_kind_to_string(netdev->kind), filename);
+
+ if (netdev->mac)
+ log_netdev_warning(netdev,
+ "MACAddress= configured for %s device in %s will be ignored.\n"
+ "Please set it in the corresponding .network file.",
+ netdev_kind_to_string(netdev->kind), filename);
+
+ return 0;
+}
+
+const NetDevVTable tun_vtable = {
+ .object_size = sizeof(TunTap),
+ .sections = NETDEV_COMMON_SECTIONS "Tun\0",
+ .config_verify = tuntap_verify,
+ .done = tuntap_done,
+ .create = netdev_create_tuntap,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+};
+
+const NetDevVTable tap_vtable = {
+ .object_size = sizeof(TunTap),
+ .sections = NETDEV_COMMON_SECTIONS "Tap\0",
+ .config_verify = tuntap_verify,
+ .done = tuntap_done,
+ .create = netdev_create_tuntap,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+};
diff --git a/src/network/netdev/tuntap.h b/src/network/netdev/tuntap.h
new file mode 100644
index 0000000..4d1e643
--- /dev/null
+++ b/src/network/netdev/tuntap.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct TunTap TunTap;
+
+#include "netdev.h"
+
+struct TunTap {
+ NetDev meta;
+
+ char *user_name;
+ char *group_name;
+ bool multi_queue;
+ bool packet_info;
+ bool vnet_hdr;
+};
+
+DEFINE_NETDEV_CAST(TUN, TunTap);
+DEFINE_NETDEV_CAST(TAP, TunTap);
+extern const NetDevVTable tun_vtable;
+extern const NetDevVTable tap_vtable;
diff --git a/src/network/netdev/vcan.c b/src/network/netdev/vcan.c
new file mode 100644
index 0000000..3621d4c
--- /dev/null
+++ b/src/network/netdev/vcan.c
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "vcan.h"
+
+const NetDevVTable vcan_vtable = {
+ .object_size = sizeof(VCan),
+ .sections = NETDEV_COMMON_SECTIONS,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+ .generate_mac = true,
+};
diff --git a/src/network/netdev/vcan.h b/src/network/netdev/vcan.h
new file mode 100644
index 0000000..843984f
--- /dev/null
+++ b/src/network/netdev/vcan.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct VCan VCan;
+
+#include <netinet/in.h>
+#include <linux/can/netlink.h>
+
+#include "netdev.h"
+
+struct VCan {
+ NetDev meta;
+};
+
+DEFINE_NETDEV_CAST(VCAN, VCan);
+
+extern const NetDevVTable vcan_vtable;
diff --git a/src/network/netdev/veth.c b/src/network/netdev/veth.c
new file mode 100644
index 0000000..840a327
--- /dev/null
+++ b/src/network/netdev/veth.c
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <net/if.h>
+#include <linux/veth.h>
+
+#include "veth.h"
+
+static int netdev_veth_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ Veth *v;
+ int r;
+
+ assert(netdev);
+ assert(!link);
+ assert(m);
+
+ v = VETH(netdev);
+
+ assert(v);
+
+ r = sd_netlink_message_open_container(m, VETH_INFO_PEER);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append VETH_INFO_PEER attribute: %m");
+
+ if (v->ifname_peer) {
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, v->ifname_peer);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Failed to add netlink interface name: %m");
+ }
+
+ if (v->mac_peer) {
+ r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, v->mac_peer);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_ADDRESS attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ return r;
+}
+
+static int netdev_veth_verify(NetDev *netdev, const char *filename) {
+ Veth *v;
+ int r;
+
+ assert(netdev);
+ assert(filename);
+
+ v = VETH(netdev);
+
+ assert(v);
+
+ if (!v->ifname_peer) {
+ log_netdev_warning(netdev, "Veth NetDev without peer name configured in %s. Ignoring",
+ filename);
+ return -EINVAL;
+ }
+
+ if (!v->mac_peer) {
+ r = netdev_get_mac(v->ifname_peer, &v->mac_peer);
+ if (r < 0) {
+ log_netdev_warning(netdev,
+ "Failed to generate predictable MAC address for %s. Ignoring",
+ v->ifname_peer);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static void veth_done(NetDev *n) {
+ Veth *v;
+
+ assert(n);
+
+ v = VETH(n);
+
+ assert(v);
+
+ free(v->ifname_peer);
+ free(v->mac_peer);
+}
+
+const NetDevVTable veth_vtable = {
+ .object_size = sizeof(Veth),
+ .sections = NETDEV_COMMON_SECTIONS "Peer\0",
+ .done = veth_done,
+ .fill_message_create = netdev_veth_fill_message_create,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+ .config_verify = netdev_veth_verify,
+ .generate_mac = true,
+};
diff --git a/src/network/netdev/veth.h b/src/network/netdev/veth.h
new file mode 100644
index 0000000..643f737
--- /dev/null
+++ b/src/network/netdev/veth.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct Veth Veth;
+
+#include "netdev.h"
+
+struct Veth {
+ NetDev meta;
+
+ char *ifname_peer;
+ struct ether_addr *mac_peer;
+};
+
+DEFINE_NETDEV_CAST(VETH, Veth);
+extern const NetDevVTable veth_vtable;
diff --git a/src/network/netdev/vlan.c b/src/network/netdev/vlan.c
new file mode 100644
index 0000000..e7f03f0
--- /dev/null
+++ b/src/network/netdev/vlan.c
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <net/if.h>
+#include <linux/if_vlan.h>
+
+#include "vlan-util.h"
+#include "vlan.h"
+
+static int netdev_vlan_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *req) {
+ struct ifla_vlan_flags flags = {};
+ VLan *v;
+ int r;
+
+ assert(netdev);
+ assert(link);
+ assert(req);
+
+ v = VLAN(netdev);
+
+ assert(v);
+
+ r = sd_netlink_message_append_u16(req, IFLA_VLAN_ID, v->id);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VLAN_ID attribute: %m");
+
+ if (v->gvrp != -1) {
+ flags.mask |= VLAN_FLAG_GVRP;
+ SET_FLAG(flags.flags, VLAN_FLAG_GVRP, v->gvrp);
+ }
+
+ if (v->mvrp != -1) {
+ flags.mask |= VLAN_FLAG_MVRP;
+ SET_FLAG(flags.flags, VLAN_FLAG_MVRP, v->mvrp);
+ }
+
+ if (v->reorder_hdr != -1) {
+ flags.mask |= VLAN_FLAG_REORDER_HDR;
+ SET_FLAG(flags.flags, VLAN_FLAG_REORDER_HDR, v->reorder_hdr);
+ }
+
+ if (v->loose_binding != -1) {
+ flags.mask |= VLAN_FLAG_LOOSE_BINDING;
+ SET_FLAG(flags.flags, VLAN_FLAG_LOOSE_BINDING, v->loose_binding);
+ }
+
+ r = sd_netlink_message_append_data(req, IFLA_VLAN_FLAGS, &flags, sizeof(struct ifla_vlan_flags));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VLAN_FLAGS attribute: %m");
+
+ return 0;
+}
+
+static int netdev_vlan_verify(NetDev *netdev, const char *filename) {
+ VLan *v;
+
+ assert(netdev);
+ assert(filename);
+
+ v = VLAN(netdev);
+
+ assert(v);
+
+ if (v->id == VLANID_INVALID) {
+ log_netdev_warning(netdev, "VLAN without valid Id (%"PRIu16") configured in %s.", v->id, filename);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void vlan_init(NetDev *netdev) {
+ VLan *v = VLAN(netdev);
+
+ assert(netdev);
+ assert(v);
+
+ v->id = VLANID_INVALID;
+ v->gvrp = -1;
+ v->mvrp = -1;
+ v->loose_binding = -1;
+ v->reorder_hdr = -1;
+}
+
+const NetDevVTable vlan_vtable = {
+ .object_size = sizeof(VLan),
+ .init = vlan_init,
+ .sections = NETDEV_COMMON_SECTIONS "VLAN\0",
+ .fill_message_create = netdev_vlan_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_vlan_verify,
+};
diff --git a/src/network/netdev/vlan.h b/src/network/netdev/vlan.h
new file mode 100644
index 0000000..9dff924
--- /dev/null
+++ b/src/network/netdev/vlan.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct VLan VLan;
+
+#include "netdev.h"
+
+struct VLan {
+ NetDev meta;
+
+ uint16_t id;
+
+ int gvrp;
+ int mvrp;
+ int loose_binding;
+ int reorder_hdr;
+};
+
+DEFINE_NETDEV_CAST(VLAN, VLan);
+extern const NetDevVTable vlan_vtable;
diff --git a/src/network/netdev/vrf.c b/src/network/netdev/vrf.c
new file mode 100644
index 0000000..ae71ae9
--- /dev/null
+++ b/src/network/netdev/vrf.c
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+
+#include "vrf.h"
+
+static int netdev_vrf_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ Vrf *v;
+ int r;
+
+ assert(netdev);
+ assert(!link);
+ assert(m);
+
+ v = VRF(netdev);
+
+ assert(v);
+
+ r = sd_netlink_message_append_u32(m, IFLA_VRF_TABLE, v->table);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IPLA_VRF_TABLE attribute: %m");
+
+ return r;
+}
+
+const NetDevVTable vrf_vtable = {
+ .object_size = sizeof(Vrf),
+ .sections = NETDEV_COMMON_SECTIONS "VRF\0",
+ .fill_message_create = netdev_vrf_fill_message_create,
+ .create_type = NETDEV_CREATE_MASTER,
+ .generate_mac = true,
+};
diff --git a/src/network/netdev/vrf.h b/src/network/netdev/vrf.h
new file mode 100644
index 0000000..87977e2
--- /dev/null
+++ b/src/network/netdev/vrf.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct Vrf Vrf;
+
+#include "netdev.h"
+
+struct Vrf {
+ NetDev meta;
+
+ uint32_t table;
+};
+
+DEFINE_NETDEV_CAST(VRF, Vrf);
+extern const NetDevVTable vrf_vtable;
diff --git a/src/network/netdev/vxcan.c b/src/network/netdev/vxcan.c
new file mode 100644
index 0000000..e4e32ff
--- /dev/null
+++ b/src/network/netdev/vxcan.c
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/can/vxcan.h>
+
+#include "vxcan.h"
+
+static int netdev_vxcan_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ VxCan *v;
+ int r;
+
+ assert(netdev);
+ assert(!link);
+ assert(m);
+
+ v = VXCAN(netdev);
+
+ assert(v);
+
+ r = sd_netlink_message_open_container(m, VXCAN_INFO_PEER);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append VXCAN_INFO_PEER attribute: %m");
+
+ if (v->ifname_peer) {
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, v->ifname_peer);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Failed to add vxcan netlink interface peer name: %m");
+ }
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append VXCAN_INFO_PEER attribute: %m");
+
+ return r;
+}
+
+static int netdev_vxcan_verify(NetDev *netdev, const char *filename) {
+ VxCan *v;
+
+ assert(netdev);
+ assert(filename);
+
+ v = VXCAN(netdev);
+
+ assert(v);
+
+ if (!v->ifname_peer) {
+ log_netdev_warning(netdev, "VxCan NetDev without peer name configured in %s. Ignoring", filename);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void vxcan_done(NetDev *n) {
+ VxCan *v;
+
+ assert(n);
+
+ v = VXCAN(n);
+
+ assert(v);
+
+ free(v->ifname_peer);
+}
+
+const NetDevVTable vxcan_vtable = {
+ .object_size = sizeof(VxCan),
+ .sections = NETDEV_COMMON_SECTIONS "VXCAN\0",
+ .done = vxcan_done,
+ .fill_message_create = netdev_vxcan_fill_message_create,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+ .config_verify = netdev_vxcan_verify,
+ .generate_mac = true,
+};
diff --git a/src/network/netdev/vxcan.h b/src/network/netdev/vxcan.h
new file mode 100644
index 0000000..47be3f0
--- /dev/null
+++ b/src/network/netdev/vxcan.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct VxCan VxCan;
+
+#include "netdev.h"
+
+struct VxCan {
+ NetDev meta;
+
+ char *ifname_peer;
+};
+
+DEFINE_NETDEV_CAST(VXCAN, VxCan);
+
+extern const NetDevVTable vxcan_vtable;
diff --git a/src/network/netdev/vxlan.c b/src/network/netdev/vxlan.c
new file mode 100644
index 0000000..6748f67
--- /dev/null
+++ b/src/network/netdev/vxlan.c
@@ -0,0 +1,390 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+
+#include "conf-parser.h"
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "parse-util.h"
+#include "vxlan.h"
+
+static const char* const df_table[_NETDEV_VXLAN_DF_MAX] = {
+ [NETDEV_VXLAN_DF_NO] = "no",
+ [NETDEV_VXLAN_DF_YES] = "yes",
+ [NETDEV_VXLAN_DF_INHERIT] = "inherit",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(df, VxLanDF, NETDEV_VXLAN_DF_YES);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_df, df, VxLanDF, "Failed to parse VXLAN IPDoNotFragment= setting");
+
+static int netdev_vxlan_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ VxLan *v;
+ int r;
+
+ assert(netdev);
+ assert(m);
+
+ v = VXLAN(netdev);
+
+ assert(v);
+
+ if (v->vni <= VXLAN_VID_MAX) {
+ r = sd_netlink_message_append_u32(m, IFLA_VXLAN_ID, v->vni);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_ID attribute: %m");
+ }
+
+ if (in_addr_is_null(v->group_family, &v->group) == 0) {
+ if (v->group_family == AF_INET)
+ r = sd_netlink_message_append_in_addr(m, IFLA_VXLAN_GROUP, &v->group.in);
+ else
+ r = sd_netlink_message_append_in6_addr(m, IFLA_VXLAN_GROUP6, &v->group.in6);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_GROUP attribute: %m");
+ } else if (in_addr_is_null(v->remote_family, &v->remote) == 0) {
+ if (v->remote_family == AF_INET)
+ r = sd_netlink_message_append_in_addr(m, IFLA_VXLAN_GROUP, &v->remote.in);
+ else
+ r = sd_netlink_message_append_in6_addr(m, IFLA_VXLAN_GROUP6, &v->remote.in6);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_GROUP attribute: %m");
+ }
+
+ if (in_addr_is_null(v->local_family, &v->local) == 0) {
+ if (v->local_family == AF_INET)
+ r = sd_netlink_message_append_in_addr(m, IFLA_VXLAN_LOCAL, &v->local.in);
+ else
+ r = sd_netlink_message_append_in6_addr(m, IFLA_VXLAN_LOCAL6, &v->local.in6);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_LOCAL attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u32(m, IFLA_VXLAN_LINK, link ? link->ifindex : 0);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_LINK attribute: %m");
+
+ if (v->inherit) {
+ r = sd_netlink_message_append_flag(m, IFLA_VXLAN_TTL_INHERIT);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_TTL_INHERIT attribute: %m");
+ } else {
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_TTL, v->ttl);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_TTL attribute: %m");
+ }
+
+ if (v->tos != 0) {
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_TOS, v->tos);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_TOS attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_LEARNING, v->learning);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_LEARNING attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_RSC, v->route_short_circuit);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_RSC attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_PROXY, v->arp_proxy);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_PROXY attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_L2MISS, v->l2miss);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_L2MISS attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_L3MISS, v->l3miss);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_L3MISS attribute: %m");
+
+ if (v->fdb_ageing != 0) {
+ r = sd_netlink_message_append_u32(m, IFLA_VXLAN_AGEING, v->fdb_ageing / USEC_PER_SEC);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_AGEING attribute: %m");
+ }
+
+ if (v->max_fdb != 0) {
+ r = sd_netlink_message_append_u32(m, IFLA_VXLAN_LIMIT, v->max_fdb);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_LIMIT attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_UDP_CSUM, v->udpcsum);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_UDP_CSUM attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_UDP_ZERO_CSUM6_TX, v->udp6zerocsumtx);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_UDP_ZERO_CSUM6_TX attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, v->udp6zerocsumrx);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_UDP_ZERO_CSUM6_RX attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_REMCSUM_TX, v->remote_csum_tx);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_REMCSUM_TX attribute: %m");
+
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_REMCSUM_RX, v->remote_csum_rx);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_REMCSUM_RX attribute: %m");
+
+ r = sd_netlink_message_append_u16(m, IFLA_VXLAN_PORT, htobe16(v->dest_port));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_PORT attribute: %m");
+
+ if (v->port_range.low != 0 || v->port_range.high != 0) {
+ struct ifla_vxlan_port_range port_range;
+
+ port_range.low = htobe16(v->port_range.low);
+ port_range.high = htobe16(v->port_range.high);
+
+ r = sd_netlink_message_append_data(m, IFLA_VXLAN_PORT_RANGE, &port_range, sizeof(port_range));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_PORT_RANGE attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u32(m, IFLA_VXLAN_LABEL, htobe32(v->flow_label));
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_LABEL attribute: %m");
+
+ if (v->group_policy) {
+ r = sd_netlink_message_append_flag(m, IFLA_VXLAN_GBP);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_GBP attribute: %m");
+ }
+
+ if (v->generic_protocol_extension) {
+ r = sd_netlink_message_append_flag(m, IFLA_VXLAN_GPE);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_GPE attribute: %m");
+ }
+
+ if (v->df != _NETDEV_VXLAN_DF_INVALID) {
+ r = sd_netlink_message_append_u8(m, IFLA_VXLAN_DF, v->df);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_VXLAN_DF attribute: %m");
+ }
+
+ return r;
+}
+
+int config_parse_vxlan_address(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ VxLan *v = userdata;
+ union in_addr_union *addr = data, buffer;
+ int r, f;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = in_addr_from_string_auto(rvalue, &f, &buffer);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "vxlan '%s' address is invalid, ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ r = in_addr_is_multicast(f, &buffer);
+
+ if (streq(lvalue, "Group")) {
+ if (r <= 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "vxlan %s invalid multicast address, ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ v->group_family = f;
+ } else {
+ if (r > 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "vxlan %s cannot be a multicast address, ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "Remote"))
+ v->remote_family = f;
+ else
+ v->local_family = f;
+ }
+
+ *addr = buffer;
+
+ return 0;
+}
+
+int config_parse_port_range(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ VxLan *v = userdata;
+ uint16_t low, high;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_ip_port_range(rvalue, &low, &high);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse VXLAN port range '%s'. Port should be greater than 0 and less than 65535.", rvalue);
+ return 0;
+ }
+
+ v->port_range.low = low;
+ v->port_range.high = high;
+
+ return 0;
+}
+
+int config_parse_flow_label(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ VxLan *v = userdata;
+ unsigned f;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou(rvalue, &f);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse VXLAN flow label '%s'.", rvalue);
+ return 0;
+ }
+
+ if (f & ~VXLAN_FLOW_LABEL_MAX_MASK) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "VXLAN flow label '%s' not valid. Flow label range should be [0-1048575].", rvalue);
+ return 0;
+ }
+
+ v->flow_label = f;
+
+ return 0;
+}
+
+int config_parse_vxlan_ttl(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ VxLan *v = userdata;
+ unsigned f;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(rvalue, "inherit"))
+ v->inherit = true;
+ else {
+ r = safe_atou(rvalue, &f);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse VXLAN TTL '%s', ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ if (f > 255) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid VXLAN TTL '%s'. TTL must be <= 255. Ignoring assignment.", rvalue);
+ return 0;
+ }
+
+ v->ttl = f;
+ }
+
+ return 0;
+}
+
+static int netdev_vxlan_verify(NetDev *netdev, const char *filename) {
+ VxLan *v = VXLAN(netdev);
+
+ assert(netdev);
+ assert(v);
+ assert(filename);
+
+ if (v->vni > VXLAN_VID_MAX)
+ return log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: VXLAN without valid VNI (or VXLAN Segment ID) configured. Ignoring.",
+ filename);
+
+ if (v->ttl > 255)
+ return log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: VXLAN TTL must be <= 255. Ignoring.",
+ filename);
+
+ if (!v->dest_port && v->generic_protocol_extension)
+ v->dest_port = 4790;
+
+ if (in_addr_is_null(v->group_family, &v->group) == 0 && in_addr_is_null(v->remote_family, &v->remote) == 0)
+ return log_netdev_warning_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: VXLAN both 'Group=' and 'Remote=' cannot be specified. Ignoring.",
+ filename);
+
+ return 0;
+}
+
+static void vxlan_init(NetDev *netdev) {
+ VxLan *v;
+
+ assert(netdev);
+
+ v = VXLAN(netdev);
+
+ assert(v);
+
+ v->vni = VXLAN_VID_MAX + 1;
+ v->df = _NETDEV_VXLAN_DF_INVALID;
+ v->learning = true;
+ v->udpcsum = false;
+ v->udp6zerocsumtx = false;
+ v->udp6zerocsumrx = false;
+}
+
+const NetDevVTable vxlan_vtable = {
+ .object_size = sizeof(VxLan),
+ .init = vxlan_init,
+ .sections = NETDEV_COMMON_SECTIONS "VXLAN\0",
+ .fill_message_create = netdev_vxlan_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED,
+ .config_verify = netdev_vxlan_verify,
+ .generate_mac = true,
+};
diff --git a/src/network/netdev/vxlan.h b/src/network/netdev/vxlan.h
new file mode 100644
index 0000000..371653c
--- /dev/null
+++ b/src/network/netdev/vxlan.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct VxLan VxLan;
+
+#include <linux/if_link.h>
+
+#include "in-addr-util.h"
+#include "netdev.h"
+
+#define VXLAN_VID_MAX (1u << 24) - 1
+#define VXLAN_FLOW_LABEL_MAX_MASK 0xFFFFFU
+
+typedef enum VxLanDF {
+ NETDEV_VXLAN_DF_NO = VXLAN_DF_UNSET,
+ NETDEV_VXLAN_DF_YES = VXLAN_DF_SET,
+ NETDEV_VXLAN_DF_INHERIT = VXLAN_DF_INHERIT,
+ _NETDEV_VXLAN_DF_MAX,
+ _NETDEV_VXLAN_DF_INVALID = -1
+} VxLanDF;
+
+struct VxLan {
+ NetDev meta;
+
+ uint32_t vni;
+
+ int remote_family;
+ int local_family;
+ int group_family;
+
+ VxLanDF df;
+
+ union in_addr_union remote;
+ union in_addr_union local;
+ union in_addr_union group;
+
+ unsigned tos;
+ unsigned ttl;
+ unsigned max_fdb;
+ unsigned flow_label;
+
+ uint16_t dest_port;
+
+ usec_t fdb_ageing;
+
+ bool learning;
+ bool arp_proxy;
+ bool route_short_circuit;
+ bool l2miss;
+ bool l3miss;
+ bool udpcsum;
+ bool udp6zerocsumtx;
+ bool udp6zerocsumrx;
+ bool remote_csum_tx;
+ bool remote_csum_rx;
+ bool group_policy;
+ bool generic_protocol_extension;
+ bool inherit;
+ bool independent;
+
+ struct ifla_vxlan_port_range port_range;
+};
+
+DEFINE_NETDEV_CAST(VXLAN, VxLan);
+extern const NetDevVTable vxlan_vtable;
+
+const char *df_to_string(VxLanDF d) _const_;
+VxLanDF df_from_string(const char *d) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_vxlan_address);
+CONFIG_PARSER_PROTOTYPE(config_parse_port_range);
+CONFIG_PARSER_PROTOTYPE(config_parse_flow_label);
+CONFIG_PARSER_PROTOTYPE(config_parse_df);
+CONFIG_PARSER_PROTOTYPE(config_parse_vxlan_ttl);
diff --git a/src/network/netdev/wireguard.c b/src/network/netdev/wireguard.c
new file mode 100644
index 0000000..416e9b9
--- /dev/null
+++ b/src/network/netdev/wireguard.c
@@ -0,0 +1,946 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+***/
+
+#include <sys/ioctl.h>
+#include <net/if.h>
+
+#include "sd-resolve.h"
+
+#include "alloc-util.h"
+#include "event-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hexdecoct.h"
+#include "memory-util.h"
+#include "netlink-util.h"
+#include "networkd-manager.h"
+#include "networkd-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "resolve-private.h"
+#include "string-util.h"
+#include "strv.h"
+#include "wireguard.h"
+
+static void resolve_endpoints(NetDev *netdev);
+
+static void wireguard_peer_free(WireguardPeer *peer) {
+ WireguardIPmask *mask;
+
+ if (!peer)
+ return;
+
+ if (peer->wireguard) {
+ LIST_REMOVE(peers, peer->wireguard->peers, peer);
+
+ set_remove(peer->wireguard->peers_with_unresolved_endpoint, peer);
+ set_remove(peer->wireguard->peers_with_failed_endpoint, peer);
+
+ if (peer->section)
+ hashmap_remove(peer->wireguard->peers_by_section, peer->section);
+ }
+
+ network_config_section_free(peer->section);
+
+ while ((mask = peer->ipmasks)) {
+ LIST_REMOVE(ipmasks, peer->ipmasks, mask);
+ free(mask);
+ }
+
+ free(peer->endpoint_host);
+ free(peer->endpoint_port);
+ free(peer->preshared_key_file);
+ explicit_bzero_safe(peer->preshared_key, WG_KEY_LEN);
+
+ free(peer);
+}
+
+DEFINE_NETWORK_SECTION_FUNCTIONS(WireguardPeer, wireguard_peer_free);
+
+static int wireguard_peer_new_static(Wireguard *w, const char *filename, unsigned section_line, WireguardPeer **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(wireguard_peer_freep) WireguardPeer *peer = NULL;
+ int r;
+
+ assert(w);
+ assert(ret);
+ assert(filename);
+ assert(section_line > 0);
+
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ peer = hashmap_get(w->peers_by_section, n);
+ if (peer) {
+ *ret = TAKE_PTR(peer);
+ return 0;
+ }
+
+ peer = new(WireguardPeer, 1);
+ if (!peer)
+ return -ENOMEM;
+
+ *peer = (WireguardPeer) {
+ .flags = WGPEER_F_REPLACE_ALLOWEDIPS,
+ .wireguard = w,
+ .section = TAKE_PTR(n),
+ };
+
+ LIST_PREPEND(peers, w->peers, peer);
+
+ r = hashmap_ensure_allocated(&w->peers_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(w->peers_by_section, peer->section, peer);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(peer);
+ return 0;
+}
+
+static int wireguard_set_ipmask_one(NetDev *netdev, sd_netlink_message *message, const WireguardIPmask *mask, uint16_t index) {
+ int r;
+
+ assert(message);
+ assert(mask);
+ assert(index > 0);
+
+ /* This returns 1 on success, 0 on recoverable error, and negative errno on failure. */
+
+ r = sd_netlink_message_open_array(message, index);
+ if (r < 0)
+ return 0;
+
+ r = sd_netlink_message_append_u16(message, WGALLOWEDIP_A_FAMILY, mask->family);
+ if (r < 0)
+ goto cancel;
+
+ r = netlink_message_append_in_addr_union(message, WGALLOWEDIP_A_IPADDR, mask->family, &mask->ip);
+ if (r < 0)
+ goto cancel;
+
+ r = sd_netlink_message_append_u8(message, WGALLOWEDIP_A_CIDR_MASK, mask->cidr);
+ if (r < 0)
+ goto cancel;
+
+ r = sd_netlink_message_close_container(message);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not add wireguard allowed ip: %m");
+
+ return 1;
+
+cancel:
+ r = sd_netlink_message_cancel_array(message);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not cancel wireguard allowed ip message attribute: %m");
+
+ return 0;
+}
+
+static int wireguard_set_peer_one(NetDev *netdev, sd_netlink_message *message, const WireguardPeer *peer, uint16_t index, WireguardIPmask **mask_start) {
+ WireguardIPmask *mask, *start;
+ uint16_t j = 0;
+ int r;
+
+ assert(message);
+ assert(peer);
+ assert(index > 0);
+ assert(mask_start);
+
+ /* This returns 1 on success, 0 on recoverable error, and negative errno on failure. */
+
+ start = *mask_start ?: peer->ipmasks;
+
+ r = sd_netlink_message_open_array(message, index);
+ if (r < 0)
+ return 0;
+
+ r = sd_netlink_message_append_data(message, WGPEER_A_PUBLIC_KEY, &peer->public_key, sizeof(peer->public_key));
+ if (r < 0)
+ goto cancel;
+
+ if (!*mask_start) {
+ r = sd_netlink_message_append_data(message, WGPEER_A_PRESHARED_KEY, &peer->preshared_key, WG_KEY_LEN);
+ if (r < 0)
+ goto cancel;
+
+ r = sd_netlink_message_append_u32(message, WGPEER_A_FLAGS, peer->flags);
+ if (r < 0)
+ goto cancel;
+
+ r = sd_netlink_message_append_u16(message, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, peer->persistent_keepalive_interval);
+ if (r < 0)
+ goto cancel;
+
+ if (IN_SET(peer->endpoint.sa.sa_family, AF_INET, AF_INET6)) {
+ r = netlink_message_append_sockaddr_union(message, WGPEER_A_ENDPOINT, &peer->endpoint);
+ if (r < 0)
+ goto cancel;
+ }
+ }
+
+ r = sd_netlink_message_open_container(message, WGPEER_A_ALLOWEDIPS);
+ if (r < 0)
+ goto cancel;
+
+ LIST_FOREACH(ipmasks, mask, start) {
+ r = wireguard_set_ipmask_one(netdev, message, mask, ++j);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+
+ r = sd_netlink_message_close_container(message);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not add wireguard allowed ip: %m");
+
+ r = sd_netlink_message_close_container(message);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not add wireguard peer: %m");
+
+ *mask_start = mask; /* Start next cycle from this mask. */
+ return !mask;
+
+cancel:
+ r = sd_netlink_message_cancel_array(message);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not cancel wireguard peers: %m");
+
+ return 0;
+}
+
+static int wireguard_set_interface(NetDev *netdev) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL;
+ WireguardIPmask *mask_start = NULL;
+ WireguardPeer *peer, *peer_start;
+ bool sent_once = false;
+ uint32_t serial;
+ Wireguard *w;
+ int r;
+
+ assert(netdev);
+ w = WIREGUARD(netdev);
+ assert(w);
+
+ for (peer_start = w->peers; peer_start || !sent_once; ) {
+ uint16_t i = 0;
+
+ message = sd_netlink_message_unref(message);
+
+ r = sd_genl_message_new(netdev->manager->genl, SD_GENL_WIREGUARD, WG_CMD_SET_DEVICE, &message);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Failed to allocate generic netlink message: %m");
+
+ r = sd_netlink_message_append_string(message, WGDEVICE_A_IFNAME, netdev->ifname);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append wireguard interface name: %m");
+
+ if (peer_start == w->peers) {
+ r = sd_netlink_message_append_data(message, WGDEVICE_A_PRIVATE_KEY, &w->private_key, WG_KEY_LEN);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append wireguard private key: %m");
+
+ r = sd_netlink_message_append_u16(message, WGDEVICE_A_LISTEN_PORT, w->port);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append wireguard port: %m");
+
+ r = sd_netlink_message_append_u32(message, WGDEVICE_A_FWMARK, w->fwmark);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append wireguard fwmark: %m");
+
+ r = sd_netlink_message_append_u32(message, WGDEVICE_A_FLAGS, w->flags);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append wireguard flags: %m");
+ }
+
+ r = sd_netlink_message_open_container(message, WGDEVICE_A_PEERS);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append wireguard peer attributes: %m");
+
+ LIST_FOREACH(peers, peer, peer_start) {
+ r = wireguard_set_peer_one(netdev, message, peer, ++i, &mask_start);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+ peer_start = peer; /* Start next cycle from this peer. */
+
+ r = sd_netlink_message_close_container(message);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not close wireguard container: %m");
+
+ r = sd_netlink_send(netdev->manager->genl, message, &serial);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not set wireguard device: %m");
+
+ sent_once = true;
+ }
+
+ return 0;
+}
+
+static void wireguard_peer_destroy_callback(WireguardPeer *peer) {
+ NetDev *netdev;
+
+ assert(peer);
+ assert(peer->wireguard);
+
+ netdev = NETDEV(peer->wireguard);
+
+ if (section_is_invalid(peer->section))
+ wireguard_peer_free(peer);
+
+ netdev_unref(netdev);
+}
+
+static int on_resolve_retry(sd_event_source *s, usec_t usec, void *userdata) {
+ NetDev *netdev = userdata;
+ Wireguard *w;
+
+ assert(netdev);
+ w = WIREGUARD(netdev);
+ assert(w);
+
+ if (!netdev_is_managed(netdev))
+ return 0;
+
+ assert(set_isempty(w->peers_with_unresolved_endpoint));
+
+ SWAP_TWO(w->peers_with_unresolved_endpoint, w->peers_with_failed_endpoint);
+
+ resolve_endpoints(netdev);
+
+ return 0;
+}
+
+/*
+ * Given the number of retries this function will return will an exponential
+ * increasing time in milliseconds to wait starting at 200ms and capped at 25 seconds.
+ */
+static int exponential_backoff_milliseconds(unsigned n_retries) {
+ return (2 << MIN(n_retries, 7U)) * 100 * USEC_PER_MSEC;
+}
+
+static int wireguard_resolve_handler(sd_resolve_query *q,
+ int ret,
+ const struct addrinfo *ai,
+ WireguardPeer *peer) {
+ NetDev *netdev;
+ Wireguard *w;
+ int r;
+
+ assert(peer);
+ assert(peer->wireguard);
+
+ w = peer->wireguard;
+ netdev = NETDEV(w);
+
+ if (!netdev_is_managed(netdev))
+ return 0;
+
+ if (ret != 0) {
+ log_netdev_error(netdev, "Failed to resolve host '%s:%s': %s", peer->endpoint_host, peer->endpoint_port, gai_strerror(ret));
+
+ r = set_ensure_put(&w->peers_with_failed_endpoint, NULL, peer);
+ if (r < 0) {
+ log_netdev_error(netdev, "Failed to save a peer, dropping the peer: %m");
+ peer->section->invalid = true;
+ goto resolve_next;
+ }
+
+ } else if ((ai->ai_family == AF_INET && ai->ai_addrlen == sizeof(struct sockaddr_in)) ||
+ (ai->ai_family == AF_INET6 && ai->ai_addrlen == sizeof(struct sockaddr_in6)))
+ memcpy(&peer->endpoint, ai->ai_addr, ai->ai_addrlen);
+ else
+ log_netdev_error(netdev, "Neither IPv4 nor IPv6 address found for peer endpoint %s:%s, ignoring the address.",
+ peer->endpoint_host, peer->endpoint_port);
+
+resolve_next:
+ if (!set_isempty(w->peers_with_unresolved_endpoint)) {
+ resolve_endpoints(netdev);
+ return 0;
+ }
+
+ (void) wireguard_set_interface(netdev);
+
+ if (!set_isempty(w->peers_with_failed_endpoint)) {
+ usec_t usec;
+
+ w->n_retries++;
+ usec = usec_add(now(CLOCK_MONOTONIC), exponential_backoff_milliseconds(w->n_retries));
+ r = event_reset_time(netdev->manager->event, &w->resolve_retry_event_source,
+ CLOCK_MONOTONIC, usec, 0, on_resolve_retry, netdev,
+ 0, "wireguard-resolve-retry", true);
+ if (r < 0) {
+ log_netdev_warning_errno(netdev, r, "Could not arm resolve retry handler: %m");
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+static void resolve_endpoints(NetDev *netdev) {
+ static const struct addrinfo hints = {
+ .ai_family = AF_UNSPEC,
+ .ai_socktype = SOCK_DGRAM,
+ .ai_protocol = IPPROTO_UDP
+ };
+ WireguardPeer *peer;
+ Wireguard *w;
+ int r;
+
+ assert(netdev);
+ w = WIREGUARD(netdev);
+ assert(w);
+
+ SET_FOREACH(peer, w->peers_with_unresolved_endpoint) {
+ r = resolve_getaddrinfo(netdev->manager->resolve,
+ NULL,
+ peer->endpoint_host,
+ peer->endpoint_port,
+ &hints,
+ wireguard_resolve_handler,
+ wireguard_peer_destroy_callback,
+ peer);
+ if (r == -ENOBUFS)
+ break;
+ if (r < 0) {
+ log_netdev_error_errno(netdev, r, "Failed to create resolver: %m");
+ continue;
+ }
+
+ /* Avoid freeing netdev. It will be unrefed by the destroy callback. */
+ netdev_ref(netdev);
+
+ (void) set_remove(w->peers_with_unresolved_endpoint, peer);
+ }
+}
+
+static int netdev_wireguard_post_create(NetDev *netdev, Link *link, sd_netlink_message *m) {
+ assert(netdev);
+ assert(WIREGUARD(netdev));
+
+ (void) wireguard_set_interface(netdev);
+ resolve_endpoints(netdev);
+ return 0;
+}
+
+int config_parse_wireguard_listen_port(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint16_t *s = data;
+ int r;
+
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue) || streq(rvalue, "auto")) {
+ *s = 0;
+ return 0;
+ }
+
+ r = parse_ip_port(rvalue, s);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid port specification, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+static int wireguard_decode_key_and_warn(
+ const char *rvalue,
+ uint8_t ret[static WG_KEY_LEN],
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *lvalue) {
+
+ _cleanup_(erase_and_freep) void *key = NULL;
+ size_t len;
+ int r;
+
+ assert(rvalue);
+ assert(ret);
+ assert(filename);
+ assert(lvalue);
+
+ if (isempty(rvalue)) {
+ memzero(ret, WG_KEY_LEN);
+ return 0;
+ }
+
+ if (!streq(lvalue, "PublicKey"))
+ (void) warn_file_is_world_accessible(filename, NULL, unit, line);
+
+ r = unbase64mem_full(rvalue, strlen(rvalue), true, &key, &len);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to decode wireguard key provided by %s=, ignoring assignment: %m", lvalue);
+ return 0;
+ }
+ if (len != WG_KEY_LEN) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Wireguard key provided by %s= has invalid length (%zu bytes), ignoring assignment.",
+ lvalue, len);
+ return 0;
+ }
+
+ memcpy(ret, key, WG_KEY_LEN);
+ return 0;
+}
+
+int config_parse_wireguard_private_key(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Wireguard *w;
+
+ assert(data);
+ w = WIREGUARD(data);
+ assert(w);
+
+ return wireguard_decode_key_and_warn(rvalue, w->private_key, unit, filename, line, lvalue);
+}
+
+int config_parse_wireguard_private_key_file(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *path = NULL;
+ Wireguard *w;
+
+ assert(data);
+ w = WIREGUARD(data);
+ assert(w);
+
+ if (isempty(rvalue)) {
+ w->private_key_file = mfree(w->private_key_file);
+ return 0;
+ }
+
+ path = strdup(rvalue);
+ if (!path)
+ return log_oom();
+
+ if (path_simplify_and_warn(path, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue) < 0)
+ return 0;
+
+ return free_and_replace(w->private_key_file, path);
+}
+
+int config_parse_wireguard_peer_key(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(wireguard_peer_free_or_set_invalidp) WireguardPeer *peer = NULL;
+ Wireguard *w;
+ int r;
+
+ assert(data);
+ w = WIREGUARD(data);
+ assert(w);
+
+ r = wireguard_peer_new_static(w, filename, section_line, &peer);
+ if (r < 0)
+ return log_oom();
+
+ r = wireguard_decode_key_and_warn(rvalue,
+ streq(lvalue, "PublicKey") ? peer->public_key : peer->preshared_key,
+ unit, filename, line, lvalue);
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(peer);
+ return 0;
+}
+
+int config_parse_wireguard_preshared_key_file(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(wireguard_peer_free_or_set_invalidp) WireguardPeer *peer = NULL;
+ _cleanup_free_ char *path = NULL;
+ Wireguard *w;
+ int r;
+
+ assert(data);
+ w = WIREGUARD(data);
+ assert(w);
+
+ r = wireguard_peer_new_static(w, filename, section_line, &peer);
+ if (r < 0)
+ return log_oom();
+
+ if (isempty(rvalue)) {
+ peer->preshared_key_file = mfree(peer->preshared_key_file);
+ TAKE_PTR(peer);
+ return 0;
+ }
+
+ path = strdup(rvalue);
+ if (!path)
+ return log_oom();
+
+ if (path_simplify_and_warn(path, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue) < 0)
+ return 0;
+
+ free_and_replace(peer->preshared_key_file, path);
+ TAKE_PTR(peer);
+ return 0;
+}
+
+int config_parse_wireguard_allowed_ips(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(wireguard_peer_free_or_set_invalidp) WireguardPeer *peer = NULL;
+ union in_addr_union addr;
+ unsigned char prefixlen;
+ int r, family;
+ Wireguard *w;
+ WireguardIPmask *ipmask;
+
+ assert(rvalue);
+ assert(data);
+
+ w = WIREGUARD(data);
+ assert(w);
+
+ r = wireguard_peer_new_static(w, filename, section_line, &peer);
+ if (r < 0)
+ return log_oom();
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, "," WHITESPACE, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to split allowed ips \"%s\" option: %m", rvalue);
+ break;
+ }
+
+ r = in_addr_prefix_from_string_auto(word, &family, &addr, &prefixlen);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Network address is invalid, ignoring assignment: %s", word);
+ continue;
+ }
+
+ ipmask = new(WireguardIPmask, 1);
+ if (!ipmask)
+ return log_oom();
+
+ *ipmask = (WireguardIPmask) {
+ .family = family,
+ .ip.in6 = addr.in6,
+ .cidr = prefixlen,
+ };
+
+ LIST_PREPEND(ipmasks, peer->ipmasks, ipmask);
+ }
+
+ TAKE_PTR(peer);
+ return 0;
+}
+
+int config_parse_wireguard_endpoint(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(wireguard_peer_free_or_set_invalidp) WireguardPeer *peer = NULL;
+ const char *begin, *end;
+ Wireguard *w;
+ size_t len;
+ int r;
+
+ assert(data);
+ assert(rvalue);
+
+ w = WIREGUARD(data);
+ assert(w);
+
+ if (rvalue[0] == '[') {
+ begin = &rvalue[1];
+ end = strchr(rvalue, ']');
+ if (!end) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Unable to find matching brace of endpoint, ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+ len = end - begin;
+ ++end;
+ if (*end != ':' || !*(end + 1)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Unable to find port of endpoint, ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+ ++end;
+ } else {
+ begin = rvalue;
+ end = strrchr(rvalue, ':');
+ if (!end || !*(end + 1)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Unable to find port of endpoint, ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+ len = end - begin;
+ ++end;
+ }
+
+ r = wireguard_peer_new_static(w, filename, section_line, &peer);
+ if (r < 0)
+ return log_oom();
+
+ r = free_and_strndup(&peer->endpoint_host, begin, len);
+ if (r < 0)
+ return log_oom();
+
+ r = free_and_strdup(&peer->endpoint_port, end);
+ if (r < 0)
+ return log_oom();
+
+ r = set_ensure_put(&w->peers_with_unresolved_endpoint, NULL, peer);
+ if (r < 0)
+ return log_oom();
+ TAKE_PTR(peer); /* The peer may already have been in the hash map, that is fine too. */
+
+ return 0;
+}
+
+int config_parse_wireguard_keepalive(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(wireguard_peer_free_or_set_invalidp) WireguardPeer *peer = NULL;
+ uint16_t keepalive = 0;
+ Wireguard *w;
+ int r;
+
+ assert(rvalue);
+ assert(data);
+
+ w = WIREGUARD(data);
+ assert(w);
+
+ r = wireguard_peer_new_static(w, filename, section_line, &peer);
+ if (r < 0)
+ return log_oom();
+
+ if (streq(rvalue, "off"))
+ keepalive = 0;
+ else {
+ r = safe_atou16(rvalue, &keepalive);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse \"%s\" as keepalive interval (range 0–65535), ignoring assignment: %m",
+ rvalue);
+ return 0;
+ }
+ }
+
+ peer->persistent_keepalive_interval = keepalive;
+
+ TAKE_PTR(peer);
+ return 0;
+}
+
+static void wireguard_init(NetDev *netdev) {
+ Wireguard *w;
+
+ assert(netdev);
+ w = WIREGUARD(netdev);
+ assert(w);
+
+ w->flags = WGDEVICE_F_REPLACE_PEERS;
+}
+
+static void wireguard_done(NetDev *netdev) {
+ Wireguard *w;
+
+ assert(netdev);
+ w = WIREGUARD(netdev);
+ assert(w);
+
+ sd_event_source_unref(w->resolve_retry_event_source);
+
+ explicit_bzero_safe(w->private_key, WG_KEY_LEN);
+ free(w->private_key_file);
+
+ hashmap_free_with_destructor(w->peers_by_section, wireguard_peer_free);
+ set_free(w->peers_with_unresolved_endpoint);
+ set_free(w->peers_with_failed_endpoint);
+}
+
+static int wireguard_read_key_file(const char *filename, uint8_t dest[static WG_KEY_LEN]) {
+ _cleanup_(erase_and_freep) char *key = NULL;
+ size_t key_len;
+ int r;
+
+ if (!filename)
+ return 0;
+
+ assert(dest);
+
+ (void) warn_file_is_world_accessible(filename, NULL, NULL, 0);
+
+ r = read_full_file_full(
+ AT_FDCWD, filename,
+ READ_FULL_FILE_SECURE | READ_FULL_FILE_UNBASE64 | READ_FULL_FILE_WARN_WORLD_READABLE | READ_FULL_FILE_CONNECT_SOCKET,
+ NULL, &key, &key_len);
+ if (r < 0)
+ return r;
+
+ if (key_len != WG_KEY_LEN)
+ return -EINVAL;
+
+ memcpy(dest, key, WG_KEY_LEN);
+ return 0;
+}
+
+static int wireguard_peer_verify(WireguardPeer *peer) {
+ NetDev *netdev = NETDEV(peer->wireguard);
+ int r;
+
+ if (section_is_invalid(peer->section))
+ return -EINVAL;
+
+ if (eqzero(peer->public_key))
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: WireGuardPeer section without PublicKey= configured. "
+ "Ignoring [WireGuardPeer] section from line %u.",
+ peer->section->filename, peer->section->line);
+
+ r = wireguard_read_key_file(peer->preshared_key_file, peer->preshared_key);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r,
+ "%s: Failed to read preshared key from '%s'. "
+ "Ignoring [WireGuardPeer] section from line %u.",
+ peer->section->filename, peer->preshared_key_file,
+ peer->section->line);
+
+ return 0;
+}
+
+static int wireguard_verify(NetDev *netdev, const char *filename) {
+ WireguardPeer *peer, *peer_next;
+ Wireguard *w;
+ int r;
+
+ assert(netdev);
+ w = WIREGUARD(netdev);
+ assert(w);
+
+ r = wireguard_read_key_file(w->private_key_file, w->private_key);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r,
+ "Failed to read private key from %s. Dropping network device %s.",
+ w->private_key_file, netdev->ifname);
+
+ if (eqzero(w->private_key))
+ return log_netdev_error_errno(netdev, SYNTHETIC_ERRNO(EINVAL),
+ "%s: Missing PrivateKey= or PrivateKeyFile=, "
+ "Dropping network device %s.",
+ filename, netdev->ifname);
+
+ LIST_FOREACH_SAFE(peers, peer, peer_next, w->peers)
+ if (wireguard_peer_verify(peer) < 0)
+ wireguard_peer_free(peer);
+
+ return 0;
+}
+
+const NetDevVTable wireguard_vtable = {
+ .object_size = sizeof(Wireguard),
+ .sections = NETDEV_COMMON_SECTIONS "WireGuard\0WireGuardPeer\0",
+ .post_create = netdev_wireguard_post_create,
+ .init = wireguard_init,
+ .done = wireguard_done,
+ .create_type = NETDEV_CREATE_INDEPENDENT,
+ .config_verify = wireguard_verify,
+ .generate_mac = true,
+};
diff --git a/src/network/netdev/wireguard.h b/src/network/netdev/wireguard.h
new file mode 100644
index 0000000..b9b5ae9
--- /dev/null
+++ b/src/network/netdev/wireguard.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+typedef struct Wireguard Wireguard;
+
+#include <netinet/in.h>
+#include <linux/wireguard.h>
+
+#include "in-addr-util.h"
+#include "netdev.h"
+#include "socket-util.h"
+
+typedef struct WireguardIPmask {
+ uint16_t family;
+ union in_addr_union ip;
+ uint8_t cidr;
+
+ LIST_FIELDS(struct WireguardIPmask, ipmasks);
+} WireguardIPmask;
+
+typedef struct WireguardPeer {
+ Wireguard *wireguard;
+ NetworkConfigSection *section;
+
+ uint8_t public_key[WG_KEY_LEN];
+ uint8_t preshared_key[WG_KEY_LEN];
+ char *preshared_key_file;
+ uint32_t flags;
+ uint16_t persistent_keepalive_interval;
+
+ union sockaddr_union endpoint;
+ char *endpoint_host;
+ char *endpoint_port;
+
+ LIST_HEAD(WireguardIPmask, ipmasks);
+ LIST_FIELDS(struct WireguardPeer, peers);
+} WireguardPeer;
+
+struct Wireguard {
+ NetDev meta;
+ unsigned last_peer_section;
+
+ uint32_t flags;
+ uint8_t private_key[WG_KEY_LEN];
+ char *private_key_file;
+ uint16_t port;
+ uint32_t fwmark;
+
+ Hashmap *peers_by_section;
+ Set *peers_with_unresolved_endpoint;
+ Set *peers_with_failed_endpoint;
+
+ LIST_HEAD(WireguardPeer, peers);
+
+ unsigned n_retries;
+ sd_event_source *resolve_retry_event_source;
+};
+
+DEFINE_NETDEV_CAST(WIREGUARD, Wireguard);
+extern const NetDevVTable wireguard_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_wireguard_allowed_ips);
+CONFIG_PARSER_PROTOTYPE(config_parse_wireguard_endpoint);
+CONFIG_PARSER_PROTOTYPE(config_parse_wireguard_listen_port);
+CONFIG_PARSER_PROTOTYPE(config_parse_wireguard_peer_key);
+CONFIG_PARSER_PROTOTYPE(config_parse_wireguard_private_key);
+CONFIG_PARSER_PROTOTYPE(config_parse_wireguard_private_key_file);
+CONFIG_PARSER_PROTOTYPE(config_parse_wireguard_preshared_key_file);
+CONFIG_PARSER_PROTOTYPE(config_parse_wireguard_keepalive);
diff --git a/src/network/netdev/xfrm.c b/src/network/netdev/xfrm.c
new file mode 100644
index 0000000..a407c54
--- /dev/null
+++ b/src/network/netdev/xfrm.c
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "missing_network.h"
+#include "xfrm.h"
+
+static int xfrm_fill_message_create(NetDev *netdev, Link *link, sd_netlink_message *message) {
+ Xfrm *x;
+ int r;
+
+ assert(netdev);
+ assert(message);
+
+ x = XFRM(netdev);
+
+ assert(link || x->independent);
+
+ r = sd_netlink_message_append_u32(message, IFLA_XFRM_LINK, link ? link->ifindex : LOOPBACK_IFINDEX);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_XFRM_LINK: %m");
+
+ r = sd_netlink_message_append_u32(message, IFLA_XFRM_IF_ID, x->if_id);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_XFRM_IF_ID: %m");
+
+ return 0;
+}
+
+const NetDevVTable xfrm_vtable = {
+ .object_size = sizeof(Xfrm),
+ .sections = NETDEV_COMMON_SECTIONS "Xfrm\0",
+ .fill_message_create = xfrm_fill_message_create,
+ .create_type = NETDEV_CREATE_STACKED
+};
diff --git a/src/network/netdev/xfrm.h b/src/network/netdev/xfrm.h
new file mode 100644
index 0000000..f56c4f2
--- /dev/null
+++ b/src/network/netdev/xfrm.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "netdev.h"
+
+typedef struct Xfrm {
+ NetDev meta;
+
+ uint32_t if_id;
+ bool independent;
+} Xfrm;
+
+DEFINE_NETDEV_CAST(XFRM, Xfrm);
+extern const NetDevVTable xfrm_vtable;
diff --git a/src/network/networkctl.c b/src/network/networkctl.c
new file mode 100644
index 0000000..63a90bc
--- /dev/null
+++ b/src/network/networkctl.c
@@ -0,0 +1,2830 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+#include <getopt.h>
+#include <linux/if_addrlabel.h>
+#include <net/if.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <linux/if_bridge.h>
+#include <linux/if_tunnel.h>
+
+#include "sd-bus.h"
+#include "sd-device.h"
+#include "sd-dhcp-client.h"
+#include "sd-hwdb.h"
+#include "sd-lldp.h"
+#include "sd-netlink.h"
+#include "sd-network.h"
+
+#include "alloc-util.h"
+#include "bond-util.h"
+#include "bridge-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "device-util.h"
+#include "escape.h"
+#include "ether-addr-util.h"
+#include "ethtool-util.h"
+#include "fd-util.h"
+#include "format-table.h"
+#include "format-util.h"
+#include "geneve-util.h"
+#include "glob-util.h"
+#include "hwdb-util.h"
+#include "ipvlan-util.h"
+#include "local-addresses.h"
+#include "locale-util.h"
+#include "logs-show.h"
+#include "macro.h"
+#include "macvlan-util.h"
+#include "main-func.h"
+#include "netlink-util.h"
+#include "network-internal.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "set.h"
+#include "socket-netlink.h"
+#include "socket-util.h"
+#include "sort-util.h"
+#include "sparse-endian.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "terminal-util.h"
+#include "unit-def.h"
+#include "verbs.h"
+#include "wifi-util.h"
+
+/* Kernel defines MODULE_NAME_LEN as 64 - sizeof(unsigned long). So, 64 is enough. */
+#define NETDEV_KIND_MAX 64
+
+/* use 128 kB for receive socket kernel queue, we shouldn't need more here */
+#define RCVBUF_SIZE (128*1024)
+
+static PagerFlags arg_pager_flags = 0;
+static bool arg_legend = true;
+static bool arg_all = false;
+static bool arg_stats = false;
+static bool arg_full = false;
+static unsigned arg_lines = 10;
+
+static void operational_state_to_color(const char *name, const char *state, const char **on, const char **off) {
+ assert(on);
+ assert(off);
+
+ if (STRPTR_IN_SET(state, "routable", "enslaved") ||
+ (streq_ptr(name, "lo") && streq_ptr(state, "carrier"))) {
+ *on = ansi_highlight_green();
+ *off = ansi_normal();
+ } else if (streq_ptr(state, "degraded")) {
+ *on = ansi_highlight_yellow();
+ *off = ansi_normal();
+ } else
+ *on = *off = "";
+}
+
+static void setup_state_to_color(const char *state, const char **on, const char **off) {
+ assert(on);
+ assert(off);
+
+ if (streq_ptr(state, "configured")) {
+ *on = ansi_highlight_green();
+ *off = ansi_normal();
+ } else if (streq_ptr(state, "configuring")) {
+ *on = ansi_highlight_yellow();
+ *off = ansi_normal();
+ } else if (STRPTR_IN_SET(state, "failed", "linger")) {
+ *on = ansi_highlight_red();
+ *off = ansi_normal();
+ } else
+ *on = *off = "";
+}
+
+typedef struct VxLanInfo {
+ uint32_t vni;
+ uint32_t link;
+
+ int local_family;
+ int group_family;
+
+ union in_addr_union local;
+ union in_addr_union group;
+
+ uint16_t dest_port;
+
+ uint8_t proxy;
+ uint8_t learning;
+ uint8_t inerit;
+ uint8_t rsc;
+ uint8_t l2miss;
+ uint8_t l3miss;
+ uint8_t tos;
+ uint8_t ttl;
+} VxLanInfo;
+
+typedef struct LinkInfo {
+ char name[IFNAMSIZ+1];
+ char netdev_kind[NETDEV_KIND_MAX];
+ sd_device *sd_device;
+ int ifindex;
+ unsigned short iftype;
+ hw_addr_data hw_address;
+ struct ether_addr permanent_mac_address;
+ uint32_t master;
+ uint32_t mtu;
+ uint32_t min_mtu;
+ uint32_t max_mtu;
+ uint32_t tx_queues;
+ uint32_t rx_queues;
+ uint8_t addr_gen_mode;
+ char *qdisc;
+ char **alternative_names;
+
+ union {
+ struct rtnl_link_stats64 stats64;
+ struct rtnl_link_stats stats;
+ };
+
+ uint64_t tx_bitrate;
+ uint64_t rx_bitrate;
+
+ /* bridge info */
+ uint32_t forward_delay;
+ uint32_t hello_time;
+ uint32_t max_age;
+ uint32_t ageing_time;
+ uint32_t stp_state;
+ uint32_t cost;
+ uint16_t priority;
+ uint8_t mcast_igmp_version;
+ uint8_t port_state;
+
+ /* vxlan info */
+ VxLanInfo vxlan_info;
+
+ /* vlan info */
+ uint16_t vlan_id;
+
+ /* tunnel info */
+ uint8_t ttl;
+ uint8_t tos;
+ uint8_t inherit;
+ uint8_t df;
+ uint8_t csum;
+ uint8_t csum6_tx;
+ uint8_t csum6_rx;
+ uint16_t tunnel_port;
+ uint32_t vni;
+ uint32_t label;
+ union in_addr_union local;
+ union in_addr_union remote;
+
+ /* bonding info */
+ uint8_t mode;
+ uint32_t miimon;
+ uint32_t updelay;
+ uint32_t downdelay;
+
+ /* macvlan and macvtap info */
+ uint32_t macvlan_mode;
+
+ /* ipvlan info */
+ uint16_t ipvlan_mode;
+ uint16_t ipvlan_flags;
+
+ /* ethtool info */
+ int autonegotiation;
+ uint64_t speed;
+ Duplex duplex;
+ NetDevPort port;
+
+ /* wlan info */
+ enum nl80211_iftype wlan_iftype;
+ char *ssid;
+ struct ether_addr bssid;
+
+ bool has_mac_address:1;
+ bool has_permanent_mac_address:1;
+ bool has_tx_queues:1;
+ bool has_rx_queues:1;
+ bool has_stats64:1;
+ bool has_stats:1;
+ bool has_bitrates:1;
+ bool has_ethtool_link_info:1;
+ bool has_wlan_link_info:1;
+ bool has_tunnel_ipv4:1;
+ bool has_ipv6_address_generation_mode:1;
+
+ bool needs_freeing:1;
+} LinkInfo;
+
+static int link_info_compare(const LinkInfo *a, const LinkInfo *b) {
+ return CMP(a->ifindex, b->ifindex);
+}
+
+static const LinkInfo* link_info_array_free(LinkInfo *array) {
+ for (unsigned i = 0; array && array[i].needs_freeing; i++) {
+ sd_device_unref(array[i].sd_device);
+ free(array[i].ssid);
+ free(array[i].qdisc);
+ strv_free(array[i].alternative_names);
+ }
+
+ return mfree(array);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(LinkInfo*, link_info_array_free);
+
+static int decode_netdev(sd_netlink_message *m, LinkInfo *info) {
+ const char *received_kind;
+ int r;
+
+ assert(m);
+ assert(info);
+
+ r = sd_netlink_message_enter_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_read_string(m, IFLA_INFO_KIND, &received_kind);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_enter_container(m, IFLA_INFO_DATA);
+ if (r < 0)
+ return r;
+
+ if (streq(received_kind, "bridge")) {
+ (void) sd_netlink_message_read_u32(m, IFLA_BR_FORWARD_DELAY, &info->forward_delay);
+ (void) sd_netlink_message_read_u32(m, IFLA_BR_HELLO_TIME, &info->hello_time);
+ (void) sd_netlink_message_read_u32(m, IFLA_BR_MAX_AGE, &info->max_age);
+ (void) sd_netlink_message_read_u32(m, IFLA_BR_AGEING_TIME, &info->ageing_time);
+ (void) sd_netlink_message_read_u32(m, IFLA_BR_STP_STATE, &info->stp_state);
+ (void) sd_netlink_message_read_u32(m, IFLA_BRPORT_COST, &info->cost);
+ (void) sd_netlink_message_read_u16(m, IFLA_BR_PRIORITY, &info->priority);
+ (void) sd_netlink_message_read_u8(m, IFLA_BR_MCAST_IGMP_VERSION, &info->mcast_igmp_version);
+ (void) sd_netlink_message_read_u8(m, IFLA_BRPORT_STATE, &info->port_state);
+ } if (streq(received_kind, "bond")) {
+ (void) sd_netlink_message_read_u8(m, IFLA_BOND_MODE, &info->mode);
+ (void) sd_netlink_message_read_u32(m, IFLA_BOND_MIIMON, &info->miimon);
+ (void) sd_netlink_message_read_u32(m, IFLA_BOND_DOWNDELAY, &info->downdelay);
+ (void) sd_netlink_message_read_u32(m, IFLA_BOND_UPDELAY, &info->updelay);
+ } else if (streq(received_kind, "vxlan")) {
+ (void) sd_netlink_message_read_u32(m, IFLA_VXLAN_ID, &info->vxlan_info.vni);
+
+ r = sd_netlink_message_read_in_addr(m, IFLA_VXLAN_GROUP, &info->vxlan_info.group.in);
+ if (r >= 0)
+ info->vxlan_info.group_family = AF_INET;
+ else {
+ r = sd_netlink_message_read_in6_addr(m, IFLA_VXLAN_GROUP6, &info->vxlan_info.group.in6);
+ if (r >= 0)
+ info->vxlan_info.group_family = AF_INET6;
+ }
+
+ r = sd_netlink_message_read_in_addr(m, IFLA_VXLAN_LOCAL, &info->vxlan_info.local.in);
+ if (r >= 0)
+ info->vxlan_info.local_family = AF_INET;
+ else {
+ r = sd_netlink_message_read_in6_addr(m, IFLA_VXLAN_LOCAL6, &info->vxlan_info.local.in6);
+ if (r >= 0)
+ info->vxlan_info.local_family = AF_INET6;
+ }
+
+ (void) sd_netlink_message_read_u32(m, IFLA_VXLAN_LINK, &info->vxlan_info.link);
+ (void) sd_netlink_message_read_u16(m, IFLA_VXLAN_PORT, &info->vxlan_info.dest_port);
+ (void) sd_netlink_message_read_u8(m, IFLA_VXLAN_PROXY, &info->vxlan_info.proxy);
+ (void) sd_netlink_message_read_u8(m, IFLA_VXLAN_LEARNING, &info->vxlan_info.learning);
+ (void) sd_netlink_message_read_u8(m, IFLA_VXLAN_RSC, &info->vxlan_info.rsc);
+ (void) sd_netlink_message_read_u8(m, IFLA_VXLAN_L3MISS, &info->vxlan_info.l3miss);
+ (void) sd_netlink_message_read_u8(m, IFLA_VXLAN_L2MISS, &info->vxlan_info.l2miss);
+ (void) sd_netlink_message_read_u8(m, IFLA_VXLAN_TOS, &info->vxlan_info.tos);
+ (void) sd_netlink_message_read_u8(m, IFLA_VXLAN_TTL, &info->vxlan_info.ttl);
+ } else if (streq(received_kind, "vlan"))
+ (void) sd_netlink_message_read_u16(m, IFLA_VLAN_ID, &info->vlan_id);
+ else if (STR_IN_SET(received_kind, "ipip", "sit")) {
+ (void) sd_netlink_message_read_in_addr(m, IFLA_IPTUN_LOCAL, &info->local.in);
+ (void) sd_netlink_message_read_in_addr(m, IFLA_IPTUN_REMOTE, &info->remote.in);
+ } else if (streq(received_kind, "geneve")) {
+ (void) sd_netlink_message_read_u32(m, IFLA_GENEVE_ID, &info->vni);
+
+ r = sd_netlink_message_read_in_addr(m, IFLA_GENEVE_REMOTE, &info->remote.in);
+ if (r >= 0)
+ info->has_tunnel_ipv4 = true;
+ else
+ (void) sd_netlink_message_read_in6_addr(m, IFLA_GENEVE_REMOTE6, &info->remote.in6);
+
+ (void) sd_netlink_message_read_u8(m, IFLA_GENEVE_TTL, &info->ttl);
+ (void) sd_netlink_message_read_u8(m, IFLA_GENEVE_TTL_INHERIT, &info->inherit);
+ (void) sd_netlink_message_read_u8(m, IFLA_GENEVE_TOS, &info->tos);
+ (void) sd_netlink_message_read_u8(m, IFLA_GENEVE_DF, &info->df);
+ (void) sd_netlink_message_read_u8(m, IFLA_GENEVE_UDP_CSUM, &info->csum);
+ (void) sd_netlink_message_read_u8(m, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, &info->csum6_tx);
+ (void) sd_netlink_message_read_u8(m, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, &info->csum6_rx);
+ (void) sd_netlink_message_read_u16(m, IFLA_GENEVE_PORT, &info->tunnel_port);
+ (void) sd_netlink_message_read_u32(m, IFLA_GENEVE_LABEL, &info->label);
+ } else if (STR_IN_SET(received_kind, "gre", "gretap", "erspan")) {
+ (void) sd_netlink_message_read_in_addr(m, IFLA_GRE_LOCAL, &info->local.in);
+ (void) sd_netlink_message_read_in_addr(m, IFLA_GRE_REMOTE, &info->remote.in);
+ } else if (STR_IN_SET(received_kind, "ip6gre", "ip6gretap", "ip6erspan")) {
+ (void) sd_netlink_message_read_in6_addr(m, IFLA_GRE_LOCAL, &info->local.in6);
+ (void) sd_netlink_message_read_in6_addr(m, IFLA_GRE_REMOTE, &info->remote.in6);
+ } else if (streq(received_kind, "vti")) {
+ (void) sd_netlink_message_read_in_addr(m, IFLA_VTI_LOCAL, &info->local.in);
+ (void) sd_netlink_message_read_in_addr(m, IFLA_VTI_REMOTE, &info->remote.in);
+ } else if (streq(received_kind, "vti6")) {
+ (void) sd_netlink_message_read_in6_addr(m, IFLA_VTI_LOCAL, &info->local.in6);
+ (void) sd_netlink_message_read_in6_addr(m, IFLA_VTI_REMOTE, &info->remote.in6);
+ } else if (STR_IN_SET(received_kind, "macvlan", "macvtap"))
+ (void) sd_netlink_message_read_u32(m, IFLA_MACVLAN_MODE, &info->macvlan_mode);
+ else if (streq(received_kind, "ipvlan")) {
+ (void) sd_netlink_message_read_u16(m, IFLA_IPVLAN_MODE, &info->ipvlan_mode);
+ (void) sd_netlink_message_read_u16(m, IFLA_IPVLAN_FLAGS, &info->ipvlan_flags);
+ }
+
+ strncpy(info->netdev_kind, received_kind, IFNAMSIZ);
+
+ (void) sd_netlink_message_exit_container(m);
+ (void) sd_netlink_message_exit_container(m);
+
+ return 0;
+}
+
+static int decode_link(sd_netlink_message *m, LinkInfo *info, char **patterns, bool matched_patterns[]) {
+ _cleanup_strv_free_ char **altnames = NULL;
+ const char *name, *qdisc;
+ int ifindex, r;
+ uint16_t type;
+
+ assert(m);
+ assert(info);
+
+ r = sd_netlink_message_get_type(m, &type);
+ if (r < 0)
+ return r;
+
+ if (type != RTM_NEWLINK)
+ return 0;
+
+ r = sd_rtnl_message_link_get_ifindex(m, &ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_read_string(m, IFLA_IFNAME, &name);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_read_strv(m, IFLA_PROP_LIST, IFLA_ALT_IFNAME, &altnames);
+ if (r < 0 && r != -ENODATA)
+ return r;
+
+ if (patterns) {
+ char str[DECIMAL_STR_MAX(int)];
+ size_t pos;
+
+ assert(matched_patterns);
+
+ xsprintf(str, "%i", ifindex);
+ if (!strv_fnmatch_full(patterns, str, 0, &pos) &&
+ !strv_fnmatch_full(patterns, name, 0, &pos)) {
+ bool match = false;
+ char **p;
+
+ STRV_FOREACH(p, altnames)
+ if (strv_fnmatch_full(patterns, *p, 0, &pos)) {
+ match = true;
+ break;
+ }
+ if (!match)
+ return 0;
+ }
+
+ matched_patterns[pos] = true;
+ }
+
+ r = sd_rtnl_message_link_get_type(m, &info->iftype);
+ if (r < 0)
+ return r;
+
+ strscpy(info->name, sizeof info->name, name);
+ info->ifindex = ifindex;
+ info->alternative_names = TAKE_PTR(altnames);
+
+ info->has_mac_address =
+ netlink_message_read_hw_addr(m, IFLA_ADDRESS, &info->hw_address) >= 0 &&
+ memcmp(&info->hw_address, &HW_ADDR_NULL, sizeof(hw_addr_data)) != 0;
+
+ info->has_permanent_mac_address =
+ ethtool_get_permanent_macaddr(NULL, info->name, &info->permanent_mac_address) >= 0 &&
+ memcmp(&info->permanent_mac_address, &ETHER_ADDR_NULL, sizeof(struct ether_addr)) != 0 &&
+ (info->hw_address.length != sizeof(struct ether_addr) ||
+ memcmp(&info->permanent_mac_address, info->hw_address.addr.bytes, sizeof(struct ether_addr)) != 0);
+
+ (void) sd_netlink_message_read_u32(m, IFLA_MTU, &info->mtu);
+ (void) sd_netlink_message_read_u32(m, IFLA_MIN_MTU, &info->min_mtu);
+ (void) sd_netlink_message_read_u32(m, IFLA_MAX_MTU, &info->max_mtu);
+
+ info->has_rx_queues =
+ sd_netlink_message_read_u32(m, IFLA_NUM_RX_QUEUES, &info->rx_queues) >= 0 &&
+ info->rx_queues > 0;
+
+ info->has_tx_queues =
+ sd_netlink_message_read_u32(m, IFLA_NUM_TX_QUEUES, &info->tx_queues) >= 0 &&
+ info->tx_queues > 0;
+
+ if (sd_netlink_message_read(m, IFLA_STATS64, sizeof info->stats64, &info->stats64) >= 0)
+ info->has_stats64 = true;
+ else if (sd_netlink_message_read(m, IFLA_STATS, sizeof info->stats, &info->stats) >= 0)
+ info->has_stats = true;
+
+ r = sd_netlink_message_read_string(m, IFLA_QDISC, &qdisc);
+ if (r >= 0) {
+ info->qdisc = strdup(qdisc);
+ if (!info->qdisc)
+ return log_oom();
+ }
+
+ (void) sd_netlink_message_read_u32(m, IFLA_MASTER, &info->master);
+
+ r = sd_netlink_message_enter_container(m, IFLA_AF_SPEC);
+ if (r >= 0) {
+ r = sd_netlink_message_enter_container(m, AF_INET6);
+ if (r >= 0) {
+ r = sd_netlink_message_read_u8(m, IFLA_INET6_ADDR_GEN_MODE, &info->addr_gen_mode);
+ if (r >= 0 && IN_SET(info->addr_gen_mode,
+ IN6_ADDR_GEN_MODE_EUI64,
+ IN6_ADDR_GEN_MODE_NONE,
+ IN6_ADDR_GEN_MODE_STABLE_PRIVACY,
+ IN6_ADDR_GEN_MODE_RANDOM))
+ info->has_ipv6_address_generation_mode = true;
+
+ (void) sd_netlink_message_exit_container(m);
+ }
+ (void) sd_netlink_message_exit_container(m);
+ }
+
+ /* fill kind info */
+ (void) decode_netdev(m, info);
+
+ return 1;
+}
+
+static int link_get_property(
+ sd_bus *bus,
+ const LinkInfo *link,
+ sd_bus_error *error,
+ sd_bus_message **reply,
+ const char *iface,
+ const char *propname) {
+ _cleanup_free_ char *path = NULL, *ifindex_str = NULL;
+ int r;
+
+ if (asprintf(&ifindex_str, "%i", link->ifindex) < 0)
+ return -ENOMEM;
+
+ r = sd_bus_path_encode("/org/freedesktop/network1/link", ifindex_str, &path);
+ if (r < 0)
+ return r;
+
+ return sd_bus_call_method(
+ bus,
+ "org.freedesktop.network1",
+ path,
+ "org.freedesktop.DBus.Properties",
+ "Get",
+ error,
+ reply,
+ "ss",
+ iface,
+ propname);
+}
+
+static int acquire_link_bitrates(sd_bus *bus, LinkInfo *link) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ r = link_get_property(bus, link, &error, &reply, "org.freedesktop.network1.Link", "BitRates");
+ if (r < 0) {
+ bool quiet = sd_bus_error_has_names(&error, SD_BUS_ERROR_UNKNOWN_PROPERTY,
+ BUS_ERROR_SPEED_METER_INACTIVE);
+
+ return log_full_errno(quiet ? LOG_DEBUG : LOG_WARNING,
+ r, "Failed to query link bit rates: %s", bus_error_message(&error, r));
+ }
+
+ r = sd_bus_message_enter_container(reply, 'v', "(tt)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read(reply, "(tt)", &link->tx_bitrate, &link->rx_bitrate);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ link->has_bitrates = link->tx_bitrate != UINT64_MAX && link->rx_bitrate != UINT64_MAX;
+
+ return 0;
+}
+
+static void acquire_ether_link_info(int *fd, LinkInfo *link) {
+ if (ethtool_get_link_info(fd, link->name,
+ &link->autonegotiation,
+ &link->speed,
+ &link->duplex,
+ &link->port) >= 0)
+ link->has_ethtool_link_info = true;
+}
+
+static void acquire_wlan_link_info(LinkInfo *link) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *genl = NULL;
+ const char *type = NULL;
+ int r, k = 0;
+
+ if (link->sd_device)
+ (void) sd_device_get_devtype(link->sd_device, &type);
+ if (!streq_ptr(type, "wlan"))
+ return;
+
+ r = sd_genl_socket_open(&genl);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to open generic netlink socket: %m");
+ return;
+ }
+
+ (void) sd_netlink_inc_rcvbuf(genl, RCVBUF_SIZE);
+
+ r = wifi_get_interface(genl, link->ifindex, &link->wlan_iftype, &link->ssid);
+ if (r < 0)
+ log_debug_errno(r, "%s: failed to query ssid: %m", link->name);
+
+ if (link->wlan_iftype == NL80211_IFTYPE_STATION) {
+ k = wifi_get_station(genl, link->ifindex, &link->bssid);
+ if (k < 0)
+ log_debug_errno(k, "%s: failed to query bssid: %m", link->name);
+ }
+
+ link->has_wlan_link_info = r > 0 || k > 0;
+}
+
+static int acquire_link_info(sd_bus *bus, sd_netlink *rtnl, char **patterns, LinkInfo **ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ _cleanup_(link_info_array_freep) LinkInfo *links = NULL;
+ _cleanup_close_ int fd = -1;
+ size_t allocated = 0, c = 0, j;
+ sd_netlink_message *i;
+ int r;
+
+ assert(rtnl);
+ assert(ret);
+
+ r = sd_rtnl_message_new_link(rtnl, &req, RTM_GETLINK, 0);
+ if (r < 0)
+ return rtnl_log_create_error(r);
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return rtnl_log_create_error(r);
+
+ r = sd_netlink_call(rtnl, req, 0, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate links: %m");
+
+ _cleanup_free_ bool *matched_patterns = NULL;
+ if (patterns) {
+ matched_patterns = new0(bool, strv_length(patterns));
+ if (!matched_patterns)
+ return log_oom();
+ }
+
+ for (i = reply; i; i = sd_netlink_message_next(i)) {
+ if (!GREEDY_REALLOC0(links, allocated, c + 2)) /* We keep one trailing one as marker */
+ return -ENOMEM;
+
+ r = decode_link(i, links + c, patterns, matched_patterns);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ links[c].needs_freeing = true;
+
+ char devid[2 + DECIMAL_STR_MAX(int)];
+ xsprintf(devid, "n%i", links[c].ifindex);
+ (void) sd_device_new_from_device_id(&links[c].sd_device, devid);
+
+ acquire_ether_link_info(&fd, &links[c]);
+ acquire_wlan_link_info(&links[c]);
+
+ c++;
+ }
+
+ /* Look if we matched all our arguments that are not globs. It
+ * is OK for a glob to match nothing, but not for an exact argument. */
+ for (size_t pos = 0; pos < strv_length(patterns); pos++) {
+ if (matched_patterns[pos])
+ continue;
+
+ if (string_is_glob(patterns[pos]))
+ log_debug("Pattern \"%s\" doesn't match any interface, ignoring.",
+ patterns[pos]);
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(ENODEV),
+ "Interface \"%s\" not found.", patterns[pos]);
+ }
+
+ typesafe_qsort(links, c, link_info_compare);
+
+ if (bus)
+ for (j = 0; j < c; j++)
+ (void) acquire_link_bitrates(bus, links + j);
+
+ *ret = TAKE_PTR(links);
+
+ if (patterns && c == 0)
+ log_warning("No interfaces matched.");
+
+ return (int) c;
+}
+
+static int list_links(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(link_info_array_freep) LinkInfo *links = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ TableCell *cell;
+ int c, i, r;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ c = acquire_link_info(NULL, rtnl, argc > 1 ? argv + 1 : NULL, &links);
+ if (c < 0)
+ return c;
+
+ (void) pager_open(arg_pager_flags);
+
+ table = table_new("idx", "link", "type", "operational", "setup");
+ if (!table)
+ return log_oom();
+
+ if (arg_full)
+ table_set_width(table, 0);
+
+ table_set_header(table, arg_legend);
+
+ assert_se(cell = table_get_cell(table, 0, 0));
+ (void) table_set_minimum_width(table, cell, 3);
+ (void) table_set_weight(table, cell, 0);
+ (void) table_set_ellipsize_percent(table, cell, 100);
+ (void) table_set_align_percent(table, cell, 100);
+
+ assert_se(cell = table_get_cell(table, 0, 1));
+ (void) table_set_ellipsize_percent(table, cell, 100);
+
+ for (i = 0; i < c; i++) {
+ _cleanup_free_ char *setup_state = NULL, *operational_state = NULL;
+ const char *on_color_operational, *off_color_operational,
+ *on_color_setup, *off_color_setup;
+ _cleanup_free_ char *t = NULL;
+
+ (void) sd_network_link_get_operational_state(links[i].ifindex, &operational_state);
+ operational_state_to_color(links[i].name, operational_state, &on_color_operational, &off_color_operational);
+
+ r = sd_network_link_get_setup_state(links[i].ifindex, &setup_state);
+ if (r == -ENODATA) /* If there's no info available about this iface, it's unmanaged by networkd */
+ setup_state = strdup("unmanaged");
+ setup_state_to_color(setup_state, &on_color_setup, &off_color_setup);
+
+ t = link_get_type_string(links[i].iftype, links[i].sd_device);
+
+ r = table_add_many(table,
+ TABLE_INT, links[i].ifindex,
+ TABLE_STRING, links[i].name,
+ TABLE_STRING, strna(t),
+ TABLE_STRING, strna(operational_state),
+ TABLE_SET_COLOR, on_color_operational,
+ TABLE_STRING, strna(setup_state),
+ TABLE_SET_COLOR, on_color_setup);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+
+ if (arg_legend)
+ printf("\n%i links listed.\n", c);
+
+ return 0;
+}
+
+/* IEEE Organizationally Unique Identifier vendor string */
+static int ieee_oui(sd_hwdb *hwdb, const struct ether_addr *mac, char **ret) {
+ const char *description;
+ char modalias[STRLEN("OUI:XXYYXXYYXXYY") + 1], *desc;
+ int r;
+
+ assert(ret);
+
+ if (!hwdb)
+ return -EINVAL;
+
+ if (!mac)
+ return -EINVAL;
+
+ /* skip commonly misused 00:00:00 (Xerox) prefix */
+ if (memcmp(mac, "\0\0\0", 3) == 0)
+ return -EINVAL;
+
+ xsprintf(modalias, "OUI:" ETHER_ADDR_FORMAT_STR,
+ ETHER_ADDR_FORMAT_VAL(*mac));
+
+ r = sd_hwdb_get(hwdb, modalias, "ID_OUI_FROM_DATABASE", &description);
+ if (r < 0)
+ return r;
+
+ desc = strdup(description);
+ if (!desc)
+ return -ENOMEM;
+
+ *ret = desc;
+
+ return 0;
+}
+
+static int get_gateway_description(
+ sd_netlink *rtnl,
+ sd_hwdb *hwdb,
+ int ifindex,
+ int family,
+ union in_addr_union *gateway,
+ char **gateway_description) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ sd_netlink_message *m;
+ int r;
+
+ assert(rtnl);
+ assert(ifindex >= 0);
+ assert(IN_SET(family, AF_INET, AF_INET6));
+ assert(gateway);
+ assert(gateway_description);
+
+ r = sd_rtnl_message_new_neigh(rtnl, &req, RTM_GETNEIGH, ifindex, family);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ for (m = reply; m; m = sd_netlink_message_next(m)) {
+ union in_addr_union gw = IN_ADDR_NULL;
+ struct ether_addr mac = ETHER_ADDR_NULL;
+ uint16_t type;
+ int ifi, fam;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0) {
+ log_error_errno(r, "got error: %m");
+ continue;
+ }
+
+ r = sd_netlink_message_get_type(m, &type);
+ if (r < 0) {
+ log_error_errno(r, "could not get type: %m");
+ continue;
+ }
+
+ if (type != RTM_NEWNEIGH) {
+ log_error("type is not RTM_NEWNEIGH");
+ continue;
+ }
+
+ r = sd_rtnl_message_neigh_get_family(m, &fam);
+ if (r < 0) {
+ log_error_errno(r, "could not get family: %m");
+ continue;
+ }
+
+ if (fam != family) {
+ log_error("family is not correct");
+ continue;
+ }
+
+ r = sd_rtnl_message_neigh_get_ifindex(m, &ifi);
+ if (r < 0) {
+ log_error_errno(r, "could not get ifindex: %m");
+ continue;
+ }
+
+ if (ifindex > 0 && ifi != ifindex)
+ continue;
+
+ switch (fam) {
+ case AF_INET:
+ r = sd_netlink_message_read_in_addr(m, NDA_DST, &gw.in);
+ if (r < 0)
+ continue;
+
+ break;
+ case AF_INET6:
+ r = sd_netlink_message_read_in6_addr(m, NDA_DST, &gw.in6);
+ if (r < 0)
+ continue;
+
+ break;
+ default:
+ continue;
+ }
+
+ if (!in_addr_equal(fam, &gw, gateway))
+ continue;
+
+ r = sd_netlink_message_read(m, NDA_LLADDR, sizeof(mac), &mac);
+ if (r < 0)
+ continue;
+
+ r = ieee_oui(hwdb, &mac, gateway_description);
+ if (r < 0)
+ continue;
+
+ return 0;
+ }
+
+ return -ENODATA;
+}
+
+static int dump_list(Table *table, const char *prefix, char * const *l) {
+ int r;
+
+ if (strv_isempty(l))
+ return 0;
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, prefix,
+ TABLE_STRV, l);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ return 0;
+}
+
+static int dump_gateways(
+ sd_netlink *rtnl,
+ sd_hwdb *hwdb,
+ Table *table,
+ int ifindex) {
+ _cleanup_free_ struct local_address *local = NULL;
+ _cleanup_strv_free_ char **buf = NULL;
+ int r, n, i;
+
+ assert(rtnl);
+ assert(table);
+
+ n = local_gateways(rtnl, ifindex, AF_UNSPEC, &local);
+ if (n <= 0)
+ return n;
+
+ for (i = 0; i < n; i++) {
+ _cleanup_free_ char *gateway = NULL, *description = NULL, *with_description = NULL;
+ char name[IF_NAMESIZE+1];
+
+ r = in_addr_to_string(local[i].family, &local[i].address, &gateway);
+ if (r < 0)
+ return r;
+
+ r = get_gateway_description(rtnl, hwdb, local[i].ifindex, local[i].family, &local[i].address, &description);
+ if (r < 0)
+ log_debug_errno(r, "Could not get description of gateway, ignoring: %m");
+
+ if (description) {
+ with_description = strjoin(gateway, " (", description, ")");
+ if (!with_description)
+ return log_oom();
+ }
+
+ /* Show interface name for the entry if we show entries for all interfaces */
+ r = strv_extendf(&buf, "%s%s%s",
+ with_description ?: gateway,
+ ifindex <= 0 ? " on " : "",
+ ifindex <= 0 ? format_ifname_full(local[i].ifindex, name, FORMAT_IFNAME_IFINDEX_WITH_PERCENT) : "");
+ if (r < 0)
+ return log_oom();
+ }
+
+ return dump_list(table, "Gateway:", buf);
+}
+
+static int dump_addresses(
+ sd_netlink *rtnl,
+ sd_dhcp_lease *lease,
+ Table *table,
+ int ifindex) {
+
+ _cleanup_free_ struct local_address *local = NULL;
+ _cleanup_strv_free_ char **buf = NULL;
+ struct in_addr dhcp4_address = {};
+ int r, n, i;
+
+ assert(rtnl);
+ assert(table);
+
+ n = local_addresses(rtnl, ifindex, AF_UNSPEC, &local);
+ if (n <= 0)
+ return n;
+
+ if (lease)
+ (void) sd_dhcp_lease_get_address(lease, &dhcp4_address);
+
+ for (i = 0; i < n; i++) {
+ _cleanup_free_ char *pretty = NULL;
+ char name[IF_NAMESIZE+1];
+
+ r = in_addr_to_string(local[i].family, &local[i].address, &pretty);
+ if (r < 0)
+ return r;
+
+ if (local[i].family == AF_INET && in4_addr_equal(&local[i].address.in, &dhcp4_address)) {
+ struct in_addr server_address;
+ char *p, s[INET_ADDRSTRLEN];
+
+ r = sd_dhcp_lease_get_server_identifier(lease, &server_address);
+ if (r >= 0 && inet_ntop(AF_INET, &server_address, s, sizeof(s)))
+ p = strjoin(pretty, " (DHCP4 via ", s, ")");
+ else
+ p = strjoin(pretty, " (DHCP4)");
+ if (!p)
+ return log_oom();
+
+ free_and_replace(pretty, p);
+ }
+
+ r = strv_extendf(&buf, "%s%s%s",
+ pretty,
+ ifindex <= 0 ? " on " : "",
+ ifindex <= 0 ? format_ifname_full(local[i].ifindex, name, FORMAT_IFNAME_IFINDEX_WITH_PERCENT) : "");
+ if (r < 0)
+ return log_oom();
+ }
+
+ return dump_list(table, "Address:", buf);
+}
+
+static int dump_address_labels(sd_netlink *rtnl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ sd_netlink_message *m;
+ TableCell *cell;
+ int r;
+
+ assert(rtnl);
+
+ r = sd_rtnl_message_new_addrlabel(rtnl, &req, RTM_GETADDRLABEL, 0, AF_INET6);
+ if (r < 0)
+ return log_error_errno(r, "Could not allocate RTM_GETADDRLABEL message: %m");
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ table = table_new("label", "prefix/prefixlen");
+ if (!table)
+ return log_oom();
+
+ if (arg_full)
+ table_set_width(table, 0);
+
+ r = table_set_sort(table, (size_t) 0, (size_t) SIZE_MAX);
+ if (r < 0)
+ return r;
+
+ assert_se(cell = table_get_cell(table, 0, 0));
+ (void) table_set_align_percent(table, cell, 100);
+ (void) table_set_ellipsize_percent(table, cell, 100);
+
+ assert_se(cell = table_get_cell(table, 0, 1));
+ (void) table_set_align_percent(table, cell, 100);
+
+ for (m = reply; m; m = sd_netlink_message_next(m)) {
+ _cleanup_free_ char *pretty = NULL;
+ union in_addr_union prefix = IN_ADDR_NULL;
+ uint8_t prefixlen;
+ uint32_t label;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0) {
+ log_error_errno(r, "got error: %m");
+ continue;
+ }
+
+ r = sd_netlink_message_read_u32(m, IFAL_LABEL, &label);
+ if (r < 0 && r != -ENODATA) {
+ log_error_errno(r, "Could not read IFAL_LABEL, ignoring: %m");
+ continue;
+ }
+
+ r = sd_netlink_message_read_in6_addr(m, IFAL_ADDRESS, &prefix.in6);
+ if (r < 0)
+ continue;
+
+ r = in_addr_to_string(AF_INET6, &prefix, &pretty);
+ if (r < 0)
+ continue;
+
+ r = sd_rtnl_message_addrlabel_get_prefixlen(m, &prefixlen);
+ if (r < 0)
+ continue;
+
+ r = table_add_cell(table, NULL, TABLE_UINT32, &label);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_cell_stringf(table, NULL, "%s/%u", pretty, prefixlen);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+
+ return 0;
+}
+
+static int list_address_labels(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ int r;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ dump_address_labels(rtnl);
+
+ return 0;
+}
+
+static int open_lldp_neighbors(int ifindex, FILE **ret) {
+ _cleanup_free_ char *p = NULL;
+ FILE *f;
+
+ if (asprintf(&p, "/run/systemd/netif/lldp/%i", ifindex) < 0)
+ return -ENOMEM;
+
+ f = fopen(p, "re");
+ if (!f)
+ return -errno;
+
+ *ret = f;
+ return 0;
+}
+
+static int next_lldp_neighbor(FILE *f, sd_lldp_neighbor **ret) {
+ _cleanup_free_ void *raw = NULL;
+ size_t l;
+ le64_t u;
+ int r;
+
+ assert(f);
+ assert(ret);
+
+ l = fread(&u, 1, sizeof(u), f);
+ if (l == 0 && feof(f))
+ return 0;
+ if (l != sizeof(u))
+ return -EBADMSG;
+
+ /* each LLDP packet is at most MTU size, but let's allow up to 4KiB just in case */
+ if (le64toh(u) >= 4096)
+ return -EBADMSG;
+
+ raw = new(uint8_t, le64toh(u));
+ if (!raw)
+ return -ENOMEM;
+
+ if (fread(raw, 1, le64toh(u), f) != le64toh(u))
+ return -EBADMSG;
+
+ r = sd_lldp_neighbor_from_raw(ret, raw, le64toh(u));
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int dump_lldp_neighbors(Table *table, const char *prefix, int ifindex) {
+ _cleanup_strv_free_ char **buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(table);
+ assert(prefix);
+ assert(ifindex > 0);
+
+ r = open_lldp_neighbors(ifindex, &f);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *system_name = NULL, *port_id = NULL, *port_description = NULL;
+ _cleanup_(sd_lldp_neighbor_unrefp) sd_lldp_neighbor *n = NULL;
+
+ r = next_lldp_neighbor(f, &n);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ (void) sd_lldp_neighbor_get_system_name(n, &system_name);
+ (void) sd_lldp_neighbor_get_port_id_as_string(n, &port_id);
+ (void) sd_lldp_neighbor_get_port_description(n, &port_description);
+
+ r = strv_extendf(&buf, "%s on port %s%s%s%s",
+ strna(system_name),
+ strna(port_id),
+ isempty(port_description) ? "" : " (",
+ strempty(port_description),
+ isempty(port_description) ? "" : ")");
+ if (r < 0)
+ return log_oom();
+ }
+
+ return dump_list(table, prefix, buf);
+}
+
+static int dump_dhcp_leases(Table *table, const char *prefix, sd_bus *bus, const LinkInfo *link) {
+ _cleanup_strv_free_ char **buf = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ r = link_get_property(bus, link, &error, &reply, "org.freedesktop.network1.DHCPServer", "Leases");
+ if (r < 0) {
+ bool quiet = sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_PROPERTY);
+
+ log_full_errno(quiet ? LOG_DEBUG : LOG_WARNING,
+ r, "Failed to query link DHCP leases: %s", bus_error_message(&error, r));
+ return 0;
+ }
+
+ r = sd_bus_message_enter_container(reply, 'v', "a(uayayayayt)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_enter_container(reply, 'a', "(uayayayayt)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_enter_container(reply, 'r', "uayayayayt")) > 0) {
+ _cleanup_free_ char *id = NULL, *ip = NULL;
+ const void *client_id, *addr, *gtw, *hwaddr;
+ size_t client_id_sz, sz;
+ uint64_t expiration;
+ uint32_t family;
+
+ r = sd_bus_message_read(reply, "u", &family);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_array(reply, 'y', &client_id, &client_id_sz);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_array(reply, 'y', &addr, &sz);
+ if (r < 0 || sz != 4)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_array(reply, 'y', &gtw, &sz);
+ if (r < 0 || sz != 4)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_array(reply, 'y', &hwaddr, &sz);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_basic(reply, 't', &expiration);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_dhcp_client_id_to_string(client_id, client_id_sz, &id);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = in_addr_to_string(family, addr, &ip);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = strv_extendf(&buf, "%s (to %s)", ip, id);
+ if (r < 0)
+ return log_oom();
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (strv_isempty(buf)) {
+ r = strv_extendf(&buf, "none");
+ if (r < 0)
+ return log_oom();
+ }
+
+ return dump_list(table, prefix, buf);
+}
+
+static int dump_ifindexes(Table *table, const char *prefix, const int *ifindexes) {
+ unsigned c;
+ int r;
+
+ assert(prefix);
+
+ if (!ifindexes || ifindexes[0] <= 0)
+ return 0;
+
+ for (c = 0; ifindexes[c] > 0; c++) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, c == 0 ? prefix : "",
+ TABLE_IFINDEX, ifindexes[c]);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ return 0;
+}
+
+#define DUMP_STATS_ONE(name, val_name) \
+ r = table_add_many(table, \
+ TABLE_EMPTY, \
+ TABLE_STRING, name ":"); \
+ if (r < 0) \
+ return table_log_add_error(r); \
+ r = table_add_cell(table, NULL, \
+ info->has_stats64 ? TABLE_UINT64 : TABLE_UINT32, \
+ info->has_stats64 ? (void*) &info->stats64.val_name : (void*) &info->stats.val_name); \
+ if (r < 0) \
+ return table_log_add_error(r);
+
+static int dump_statistics(Table *table, const LinkInfo *info) {
+ int r;
+
+ if (!arg_stats)
+ return 0;
+
+ if (!info->has_stats64 && !info->has_stats)
+ return 0;
+
+ DUMP_STATS_ONE("Rx Packets", rx_packets);
+ DUMP_STATS_ONE("Tx Packets", tx_packets);
+ DUMP_STATS_ONE("Rx Bytes", rx_bytes);
+ DUMP_STATS_ONE("Tx Bytes", tx_bytes);
+ DUMP_STATS_ONE("Rx Errors", rx_errors);
+ DUMP_STATS_ONE("Tx Errors", tx_errors);
+ DUMP_STATS_ONE("Rx Dropped", rx_dropped);
+ DUMP_STATS_ONE("Tx Dropped", tx_dropped);
+ DUMP_STATS_ONE("Multicast Packets", multicast);
+ DUMP_STATS_ONE("Collisions", collisions);
+
+ return 0;
+}
+
+static OutputFlags get_output_flags(void) {
+ return
+ arg_all * OUTPUT_SHOW_ALL |
+ (arg_full || !on_tty() || pager_have()) * OUTPUT_FULL_WIDTH |
+ colors_enabled() * OUTPUT_COLOR;
+}
+
+static int show_logs(const LinkInfo *info) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ int r;
+
+ if (arg_lines == 0)
+ return 0;
+
+ r = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open journal: %m");
+
+ r = add_match_this_boot(j, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add boot matches: %m");
+
+ if (info) {
+ char m1[STRLEN("_KERNEL_DEVICE=n") + DECIMAL_STR_MAX(int)];
+ const char *m2, *m3;
+
+ /* kernel */
+ xsprintf(m1, "_KERNEL_DEVICE=n%i", info->ifindex);
+ /* networkd */
+ m2 = strjoina("INTERFACE=", info->name);
+ /* udevd */
+ m3 = strjoina("DEVICE=", info->name);
+
+ (void)(
+ (r = sd_journal_add_match(j, m1, 0)) ||
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m2, 0)) ||
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m3, 0))
+ );
+ if (r < 0)
+ return log_error_errno(r, "Failed to add link matches: %m");
+ } else {
+ r = add_matches_for_unit(j, "systemd-networkd.service");
+ if (r < 0)
+ return log_error_errno(r, "Failed to add unit matches: %m");
+
+ r = add_matches_for_unit(j, "systemd-networkd-wait-online.service");
+ if (r < 0)
+ return log_error_errno(r, "Failed to add unit matches: %m");
+ }
+
+ return show_journal(
+ stdout,
+ j,
+ OUTPUT_SHORT,
+ 0,
+ 0,
+ arg_lines,
+ get_output_flags() | OUTPUT_BEGIN_NEWLINE,
+ NULL);
+}
+
+static int link_status_one(
+ sd_bus *bus,
+ sd_netlink *rtnl,
+ sd_hwdb *hwdb,
+ const LinkInfo *info) {
+
+ _cleanup_strv_free_ char **dns = NULL, **ntp = NULL, **sip = NULL, **search_domains = NULL, **route_domains = NULL;
+ _cleanup_free_ char *t = NULL, *network = NULL, *iaid = NULL, *duid = NULL,
+ *setup_state = NULL, *operational_state = NULL, *lease_file = NULL;
+ const char *driver = NULL, *path = NULL, *vendor = NULL, *model = NULL, *link = NULL,
+ *on_color_operational, *off_color_operational, *on_color_setup, *off_color_setup;
+ _cleanup_free_ int *carrier_bound_to = NULL, *carrier_bound_by = NULL;
+ _cleanup_(sd_dhcp_lease_unrefp) sd_dhcp_lease *lease = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ TableCell *cell;
+ int r;
+
+ assert(rtnl);
+ assert(info);
+
+ (void) sd_network_link_get_operational_state(info->ifindex, &operational_state);
+ operational_state_to_color(info->name, operational_state, &on_color_operational, &off_color_operational);
+
+ r = sd_network_link_get_setup_state(info->ifindex, &setup_state);
+ if (r == -ENODATA) /* If there's no info available about this iface, it's unmanaged by networkd */
+ setup_state = strdup("unmanaged");
+ setup_state_to_color(setup_state, &on_color_setup, &off_color_setup);
+
+ (void) sd_network_link_get_dns(info->ifindex, &dns);
+ (void) sd_network_link_get_search_domains(info->ifindex, &search_domains);
+ (void) sd_network_link_get_route_domains(info->ifindex, &route_domains);
+ (void) sd_network_link_get_ntp(info->ifindex, &ntp);
+ (void) sd_network_link_get_sip(info->ifindex, &sip);
+
+ if (info->sd_device) {
+ (void) sd_device_get_property_value(info->sd_device, "ID_NET_LINK_FILE", &link);
+ (void) sd_device_get_property_value(info->sd_device, "ID_NET_DRIVER", &driver);
+ (void) sd_device_get_property_value(info->sd_device, "ID_PATH", &path);
+
+ if (sd_device_get_property_value(info->sd_device, "ID_VENDOR_FROM_DATABASE", &vendor) < 0)
+ (void) sd_device_get_property_value(info->sd_device, "ID_VENDOR", &vendor);
+
+ if (sd_device_get_property_value(info->sd_device, "ID_MODEL_FROM_DATABASE", &model) < 0)
+ (void) sd_device_get_property_value(info->sd_device, "ID_MODEL", &model);
+ }
+
+ t = link_get_type_string(info->iftype, info->sd_device);
+
+ (void) sd_network_link_get_network_file(info->ifindex, &network);
+
+ (void) sd_network_link_get_carrier_bound_to(info->ifindex, &carrier_bound_to);
+ (void) sd_network_link_get_carrier_bound_by(info->ifindex, &carrier_bound_by);
+
+ if (asprintf(&lease_file, "/run/systemd/netif/leases/%d", info->ifindex) < 0)
+ return log_oom();
+
+ (void) dhcp_lease_load(&lease, lease_file);
+
+ table = table_new("dot", "key", "value");
+ if (!table)
+ return log_oom();
+
+ if (arg_full)
+ table_set_width(table, 0);
+
+ assert_se(cell = table_get_cell(table, 0, 0));
+ (void) table_set_ellipsize_percent(table, cell, 100);
+
+ assert_se(cell = table_get_cell(table, 0, 1));
+ (void) table_set_ellipsize_percent(table, cell, 100);
+
+ table_set_header(table, false);
+
+ r = table_add_many(table,
+ TABLE_STRING, special_glyph(SPECIAL_GLYPH_BLACK_CIRCLE),
+ TABLE_SET_COLOR, on_color_operational);
+ if (r < 0)
+ return table_log_add_error(r);
+ r = table_add_cell_stringf(table, &cell, "%i: %s", info->ifindex, info->name);
+ if (r < 0)
+ return table_log_add_error(r);
+ (void) table_set_align_percent(table, cell, 0);
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_EMPTY,
+ TABLE_STRING, "Link File:",
+ TABLE_SET_ALIGN_PERCENT, 100,
+ TABLE_STRING, strna(link),
+ TABLE_EMPTY,
+ TABLE_STRING, "Network File:",
+ TABLE_STRING, strna(network),
+ TABLE_EMPTY,
+ TABLE_STRING, "Type:",
+ TABLE_STRING, strna(t),
+ TABLE_EMPTY,
+ TABLE_STRING, "State:");
+ if (r < 0)
+ return table_log_add_error(r);
+ r = table_add_cell_stringf(table, NULL, "%s%s%s (%s%s%s)",
+ on_color_operational, strna(operational_state), off_color_operational,
+ on_color_setup, strna(setup_state), off_color_setup);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ strv_sort(info->alternative_names);
+ r = dump_list(table, "Alternative Names:", info->alternative_names);
+ if (r < 0)
+ return r;
+
+ if (path) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Path:",
+ TABLE_STRING, path);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+ if (driver) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Driver:",
+ TABLE_STRING, driver);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+ if (vendor) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Vendor:",
+ TABLE_STRING, vendor);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+ if (model) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Model:",
+ TABLE_STRING, model);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (info->has_mac_address) {
+ _cleanup_free_ char *description = NULL;
+
+ if (info->hw_address.length == ETH_ALEN)
+ (void) ieee_oui(hwdb, &info->hw_address.addr.ether, &description);
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "HW Address:");
+ if (r < 0)
+ return table_log_add_error(r);
+ r = table_add_cell_stringf(table, NULL, "%s%s%s%s",
+ HW_ADDR_TO_STR(&info->hw_address),
+ description ? " (" : "",
+ strempty(description),
+ description ? ")" : "");
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (info->has_permanent_mac_address) {
+ _cleanup_free_ char *description = NULL;
+ char ea[ETHER_ADDR_TO_STRING_MAX];
+
+ (void) ieee_oui(hwdb, &info->permanent_mac_address, &description);
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "HW Permanent Address:");
+ if (r < 0)
+ return table_log_add_error(r);
+ r = table_add_cell_stringf(table, NULL, "%s%s%s%s",
+ ether_addr_to_string(&info->permanent_mac_address, ea),
+ description ? " (" : "",
+ strempty(description),
+ description ? ")" : "");
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (info->mtu > 0) {
+ char min_str[DECIMAL_STR_MAX(uint32_t)], max_str[DECIMAL_STR_MAX(uint32_t)];
+
+ xsprintf(min_str, "%" PRIu32, info->min_mtu);
+ xsprintf(max_str, "%" PRIu32, info->max_mtu);
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "MTU:");
+ if (r < 0)
+ return table_log_add_error(r);
+ r = table_add_cell_stringf(table, NULL, "%" PRIu32 "%s%s%s%s%s%s%s",
+ info->mtu,
+ info->min_mtu > 0 || info->max_mtu > 0 ? " (" : "",
+ info->min_mtu > 0 ? "min: " : "",
+ info->min_mtu > 0 ? min_str : "",
+ info->min_mtu > 0 && info->max_mtu > 0 ? ", " : "",
+ info->max_mtu > 0 ? "max: " : "",
+ info->max_mtu > 0 ? max_str : "",
+ info->min_mtu > 0 || info->max_mtu > 0 ? ")" : "");
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (info->qdisc) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "QDisc:",
+ TABLE_STRING, info->qdisc);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (info->master > 0) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Master:",
+ TABLE_IFINDEX, info->master);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (info->has_ipv6_address_generation_mode) {
+ static const struct {
+ const char *mode;
+ } mode_table[] = {
+ { "eui64" },
+ { "none" },
+ { "stable-privacy" },
+ { "random" },
+ };
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "IPv6 Address Generation Mode:",
+ TABLE_STRING, mode_table[info->addr_gen_mode]);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (streq_ptr(info->netdev_kind, "bridge")) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Forward Delay:",
+ TABLE_TIMESPAN_MSEC, jiffies_to_usec(info->forward_delay),
+ TABLE_EMPTY,
+ TABLE_STRING, "Hello Time:",
+ TABLE_TIMESPAN_MSEC, jiffies_to_usec(info->hello_time),
+ TABLE_EMPTY,
+ TABLE_STRING, "Max Age:",
+ TABLE_TIMESPAN_MSEC, jiffies_to_usec(info->max_age),
+ TABLE_EMPTY,
+ TABLE_STRING, "Ageing Time:",
+ TABLE_TIMESPAN_MSEC, jiffies_to_usec(info->ageing_time),
+ TABLE_EMPTY,
+ TABLE_STRING, "Priority:",
+ TABLE_UINT16, info->priority,
+ TABLE_EMPTY,
+ TABLE_STRING, "STP:",
+ TABLE_BOOLEAN, info->stp_state > 0,
+ TABLE_EMPTY,
+ TABLE_STRING, "Multicast IGMP Version:",
+ TABLE_UINT8, info->mcast_igmp_version,
+ TABLE_EMPTY,
+ TABLE_STRING, "Cost:",
+ TABLE_UINT32, info->cost);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (info->port_state <= BR_STATE_BLOCKING)
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Port State:",
+ TABLE_STRING, bridge_state_to_string(info->port_state));
+ } else if (streq_ptr(info->netdev_kind, "bond")) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Mode:",
+ TABLE_STRING, bond_mode_to_string(info->mode),
+ TABLE_EMPTY,
+ TABLE_STRING, "Miimon:",
+ TABLE_TIMESPAN_MSEC, jiffies_to_usec(info->miimon),
+ TABLE_EMPTY,
+ TABLE_STRING, "Updelay:",
+ TABLE_TIMESPAN_MSEC, jiffies_to_usec(info->updelay),
+ TABLE_EMPTY,
+ TABLE_STRING, "Downdelay:",
+ TABLE_TIMESPAN_MSEC, jiffies_to_usec(info->downdelay));
+ if (r < 0)
+ return table_log_add_error(r);
+
+ } else if (streq_ptr(info->netdev_kind, "vxlan")) {
+ char ttl[CONST_MAX(STRLEN("auto") + 1, DECIMAL_STR_MAX(uint8_t))];
+
+ if (info->vxlan_info.vni > 0) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "VNI:",
+ TABLE_UINT32, info->vxlan_info.vni);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (IN_SET(info->vxlan_info.group_family, AF_INET, AF_INET6)) {
+ const char *p;
+
+ r = in_addr_is_multicast(info->vxlan_info.group_family, &info->vxlan_info.group);
+ if (r <= 0)
+ p = "Remote:";
+ else
+ p = "Group:";
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, p,
+ info->vxlan_info.group_family == AF_INET ? TABLE_IN_ADDR : TABLE_IN6_ADDR,
+ &info->vxlan_info.group);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (IN_SET(info->vxlan_info.local_family, AF_INET, AF_INET6)) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Local:",
+ info->vxlan_info.local_family == AF_INET ? TABLE_IN_ADDR : TABLE_IN6_ADDR,
+ &info->vxlan_info.local);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (info->vxlan_info.dest_port > 0) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Destination Port:",
+ TABLE_UINT16, be16toh(info->vxlan_info.dest_port));
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (info->vxlan_info.link > 0) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Underlying Device:",
+ TABLE_IFINDEX, info->vxlan_info.link);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Learning:",
+ TABLE_BOOLEAN, info->vxlan_info.learning);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "RSC:",
+ TABLE_BOOLEAN, info->vxlan_info.rsc);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "L3MISS:",
+ TABLE_BOOLEAN, info->vxlan_info.l3miss);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "L2MISS:",
+ TABLE_BOOLEAN, info->vxlan_info.l2miss);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (info->vxlan_info.tos > 1) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "TOS:",
+ TABLE_UINT8, info->vxlan_info.tos);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (info->vxlan_info.ttl > 0)
+ xsprintf(ttl, "%" PRIu8, info->vxlan_info.ttl);
+ else
+ strcpy(ttl, "auto");
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "TTL:",
+ TABLE_STRING, ttl);
+ if (r < 0)
+ return table_log_add_error(r);
+ } else if (streq_ptr(info->netdev_kind, "vlan") && info->vlan_id > 0) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "VLan Id:",
+ TABLE_UINT16, info->vlan_id);
+ if (r < 0)
+ return table_log_add_error(r);
+ } else if (STRPTR_IN_SET(info->netdev_kind, "ipip", "sit", "gre", "gretap", "erspan", "vti")) {
+ if (!in_addr_is_null(AF_INET, &info->local)) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Local:",
+ TABLE_IN_ADDR, &info->local);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (!in_addr_is_null(AF_INET, &info->remote)) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Remote:",
+ TABLE_IN_ADDR, &info->remote);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+ } else if (STRPTR_IN_SET(info->netdev_kind, "ip6gre", "ip6gretap", "ip6erspan", "vti6")) {
+ if (!in_addr_is_null(AF_INET6, &info->local)) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Local:",
+ TABLE_IN6_ADDR, &info->local);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (!in_addr_is_null(AF_INET6, &info->remote)) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Remote:",
+ TABLE_IN6_ADDR, &info->remote);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+ } else if (streq_ptr(info->netdev_kind, "geneve")) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "VNI:",
+ TABLE_UINT32, info->vni);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (info->has_tunnel_ipv4 && !in_addr_is_null(AF_INET, &info->remote)) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Remote:",
+ TABLE_IN_ADDR, &info->remote);
+ if (r < 0)
+ return table_log_add_error(r);
+ } else if (!in_addr_is_null(AF_INET6, &info->remote)) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Remote:",
+ TABLE_IN6_ADDR, &info->remote);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (info->ttl > 0) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "TTL:",
+ TABLE_UINT8, info->ttl);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (info->tos > 0) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "TOS:",
+ TABLE_UINT8, info->tos);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Port:",
+ TABLE_UINT16, info->tunnel_port);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Inherit:",
+ TABLE_STRING, geneve_df_to_string(info->inherit));
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (info->df > 0) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "IPDoNotFragment:",
+ TABLE_UINT8, info->df);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "UDPChecksum:",
+ TABLE_BOOLEAN, info->csum);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "UDP6ZeroChecksumTx:",
+ TABLE_BOOLEAN, info->csum6_tx);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "UDP6ZeroChecksumRx:",
+ TABLE_BOOLEAN, info->csum6_rx);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (info->label > 0) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "FlowLabel:",
+ TABLE_UINT32, info->label);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+ } else if (STRPTR_IN_SET(info->netdev_kind, "macvlan", "macvtap")) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Mode:",
+ TABLE_STRING, macvlan_mode_to_string(info->macvlan_mode));
+ if (r < 0)
+ return table_log_add_error(r);
+ } else if (streq_ptr(info->netdev_kind, "ipvlan")) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+
+ if (info->ipvlan_flags & IPVLAN_F_PRIVATE)
+ p = strdup("private");
+ else if (info->ipvlan_flags & IPVLAN_F_VEPA)
+ p = strdup("vepa");
+ else
+ p = strdup("bridge");
+ if (!p)
+ log_oom();
+
+ s = strjoin(ipvlan_mode_to_string(info->ipvlan_mode), " (", p, ")");
+ if (!s)
+ return log_oom();
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Mode:",
+ TABLE_STRING, s);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (info->has_wlan_link_info) {
+ _cleanup_free_ char *esc = NULL;
+ char buf[ETHER_ADDR_TO_STRING_MAX];
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "WiFi access point:");
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (info->ssid)
+ esc = cescape(info->ssid);
+
+ r = table_add_cell_stringf(table, NULL, "%s (%s)",
+ strnull(esc),
+ ether_addr_to_string(&info->bssid, buf));
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (info->has_bitrates) {
+ char tx[FORMAT_BYTES_MAX], rx[FORMAT_BYTES_MAX];
+
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Bit Rate (Tx/Rx):");
+ if (r < 0)
+ return table_log_add_error(r);
+ r = table_add_cell_stringf(table, NULL, "%sbps/%sbps",
+ format_bytes_full(tx, sizeof tx, info->tx_bitrate, 0),
+ format_bytes_full(rx, sizeof rx, info->rx_bitrate, 0));
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (info->has_tx_queues || info->has_rx_queues) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Queue Length (Tx/Rx):");
+ if (r < 0)
+ return table_log_add_error(r);
+ r = table_add_cell_stringf(table, NULL, "%" PRIu32 "/%" PRIu32, info->tx_queues, info->rx_queues);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (info->has_ethtool_link_info) {
+ const char *duplex = duplex_to_string(info->duplex);
+ const char *port = port_to_string(info->port);
+
+ if (IN_SET(info->autonegotiation, AUTONEG_DISABLE, AUTONEG_ENABLE)) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Auto negotiation:",
+ TABLE_BOOLEAN, info->autonegotiation == AUTONEG_ENABLE);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (info->speed > 0) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Speed:",
+ TABLE_BPS, info->speed);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (duplex) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Duplex:",
+ TABLE_STRING, duplex);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (port) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Port:",
+ TABLE_STRING, port);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+ }
+
+ r = dump_addresses(rtnl, lease, table, info->ifindex);
+ if (r < 0)
+ return r;
+ r = dump_gateways(rtnl, hwdb, table, info->ifindex);
+ if (r < 0)
+ return r;
+ r = dump_list(table, "DNS:", dns);
+ if (r < 0)
+ return r;
+ r = dump_list(table, "Search Domains:", search_domains);
+ if (r < 0)
+ return r;
+ r = dump_list(table, "Route Domains:", route_domains);
+ if (r < 0)
+ return r;
+ r = dump_list(table, "NTP:", ntp);
+ if (r < 0)
+ return r;
+ r = dump_list(table, "SIP:", sip);
+ if (r < 0)
+ return r;
+ r = dump_ifindexes(table, "Carrier Bound To:", carrier_bound_to);
+ if (r < 0)
+ return r;
+ r = dump_ifindexes(table, "Carrier Bound By:", carrier_bound_by);
+ if (r < 0)
+ return r;
+
+ if (lease) {
+ const void *client_id;
+ size_t client_id_len;
+ const char *tz;
+
+ r = sd_dhcp_lease_get_timezone(lease, &tz);
+ if (r >= 0) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "Time Zone:",
+ TABLE_STRING, tz);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = sd_dhcp_lease_get_client_id(lease, &client_id, &client_id_len);
+ if (r >= 0) {
+ _cleanup_free_ char *id = NULL;
+
+ r = sd_dhcp_client_id_to_string(client_id, client_id_len, &id);
+ if (r >= 0) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "DHCP4 Client ID:",
+ TABLE_STRING, id);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+ }
+ }
+
+ r = sd_network_link_get_dhcp6_client_iaid_string(info->ifindex, &iaid);
+ if (r >= 0) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "DHCP6 Client IAID:",
+ TABLE_STRING, iaid);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = sd_network_link_get_dhcp6_client_duid_string(info->ifindex, &duid);
+ if (r >= 0) {
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_STRING, "DHCP6 Client DUID:",
+ TABLE_STRING, duid);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = dump_lldp_neighbors(table, "Connected To:", info->ifindex);
+ if (r < 0)
+ return r;
+
+ r = dump_dhcp_leases(table, "Offered DHCP leases:", bus, info);
+ if (r < 0)
+ return r;
+
+ r = dump_statistics(table, info);
+ if (r < 0)
+ return r;
+
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+
+ return show_logs(info);
+}
+
+static int system_status(sd_netlink *rtnl, sd_hwdb *hwdb) {
+ _cleanup_free_ char *operational_state = NULL;
+ _cleanup_strv_free_ char **dns = NULL, **ntp = NULL, **search_domains = NULL, **route_domains = NULL;
+ const char *on_color_operational, *off_color_operational;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ TableCell *cell;
+ int r;
+
+ assert(rtnl);
+
+ (void) sd_network_get_operational_state(&operational_state);
+ operational_state_to_color(NULL, operational_state, &on_color_operational, &off_color_operational);
+
+ table = table_new("dot", "key", "value");
+ if (!table)
+ return log_oom();
+
+ if (arg_full)
+ table_set_width(table, 0);
+
+ assert_se(cell = table_get_cell(table, 0, 0));
+ (void) table_set_ellipsize_percent(table, cell, 100);
+
+ assert_se(cell = table_get_cell(table, 0, 1));
+ (void) table_set_align_percent(table, cell, 100);
+ (void) table_set_ellipsize_percent(table, cell, 100);
+
+ table_set_header(table, false);
+
+ r = table_add_many(table,
+ TABLE_STRING, special_glyph(SPECIAL_GLYPH_BLACK_CIRCLE),
+ TABLE_SET_COLOR, on_color_operational,
+ TABLE_STRING, "State:",
+ TABLE_STRING, strna(operational_state),
+ TABLE_SET_COLOR, on_color_operational);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = dump_addresses(rtnl, NULL, table, 0);
+ if (r < 0)
+ return r;
+ r = dump_gateways(rtnl, hwdb, table, 0);
+ if (r < 0)
+ return r;
+
+ (void) sd_network_get_dns(&dns);
+ r = dump_list(table, "DNS:", dns);
+ if (r < 0)
+ return r;
+
+ (void) sd_network_get_search_domains(&search_domains);
+ r = dump_list(table, "Search Domains:", search_domains);
+ if (r < 0)
+ return r;
+
+ (void) sd_network_get_route_domains(&route_domains);
+ r = dump_list(table, "Route Domains:", route_domains);
+ if (r < 0)
+ return r;
+
+ (void) sd_network_get_ntp(&ntp);
+ r = dump_list(table, "NTP:", ntp);
+ if (r < 0)
+ return r;
+
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+
+ return show_logs(NULL);
+}
+
+static int link_status(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_hwdb_unrefp) sd_hwdb *hwdb = NULL;
+ _cleanup_(link_info_array_freep) LinkInfo *links = NULL;
+ int r, c, i;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect system bus: %m");
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ r = sd_hwdb_new(&hwdb);
+ if (r < 0)
+ log_debug_errno(r, "Failed to open hardware database: %m");
+
+ if (arg_all)
+ c = acquire_link_info(bus, rtnl, NULL, &links);
+ else if (argc <= 1)
+ return system_status(rtnl, hwdb);
+ else
+ c = acquire_link_info(bus, rtnl, argv + 1, &links);
+ if (c < 0)
+ return c;
+
+ for (i = 0; i < c; i++) {
+ if (i > 0)
+ fputc('\n', stdout);
+
+ link_status_one(bus, rtnl, hwdb, links + i);
+ }
+
+ return 0;
+}
+
+static char *lldp_capabilities_to_string(uint16_t x) {
+ static const char characters[] = {
+ 'o', 'p', 'b', 'w', 'r', 't', 'd', 'a', 'c', 's', 'm',
+ };
+ char *ret;
+ unsigned i;
+
+ ret = new(char, ELEMENTSOF(characters) + 1);
+ if (!ret)
+ return NULL;
+
+ for (i = 0; i < ELEMENTSOF(characters); i++)
+ ret[i] = (x & (1U << i)) ? characters[i] : '.';
+
+ ret[i] = 0;
+ return ret;
+}
+
+static void lldp_capabilities_legend(uint16_t x) {
+ unsigned w, i, cols = columns();
+ static const char* const table[] = {
+ "o - Other",
+ "p - Repeater",
+ "b - Bridge",
+ "w - WLAN Access Point",
+ "r - Router",
+ "t - Telephone",
+ "d - DOCSIS cable device",
+ "a - Station",
+ "c - Customer VLAN",
+ "s - Service VLAN",
+ "m - Two-port MAC Relay (TPMR)",
+ };
+
+ if (x == 0)
+ return;
+
+ printf("\nCapability Flags:\n");
+ for (w = 0, i = 0; i < ELEMENTSOF(table); i++)
+ if (x & (1U << i) || arg_all) {
+ bool newline;
+
+ newline = w + strlen(table[i]) + (w == 0 ? 0 : 2) > cols;
+ if (newline)
+ w = 0;
+ w += printf("%s%s%s", newline ? "\n" : "", w == 0 ? "" : "; ", table[i]);
+ }
+ puts("");
+}
+
+static int link_lldp_status(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(link_info_array_freep) LinkInfo *links = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ int i, r, c, m = 0;
+ uint16_t all = 0;
+ TableCell *cell;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ c = acquire_link_info(NULL, rtnl, argc > 1 ? argv + 1 : NULL, &links);
+ if (c < 0)
+ return c;
+
+ (void) pager_open(arg_pager_flags);
+
+ table = table_new("link",
+ "chassis id",
+ "system name",
+ "caps",
+ "port id",
+ "port description");
+ if (!table)
+ return log_oom();
+
+ if (arg_full)
+ table_set_width(table, 0);
+
+ table_set_header(table, arg_legend);
+
+ assert_se(cell = table_get_cell(table, 0, 0));
+ table_set_minimum_width(table, cell, 16);
+
+ assert_se(cell = table_get_cell(table, 0, 1));
+ table_set_minimum_width(table, cell, 17);
+
+ assert_se(cell = table_get_cell(table, 0, 2));
+ table_set_minimum_width(table, cell, 16);
+
+ assert_se(cell = table_get_cell(table, 0, 3));
+ table_set_minimum_width(table, cell, 11);
+
+ assert_se(cell = table_get_cell(table, 0, 4));
+ table_set_minimum_width(table, cell, 17);
+
+ assert_se(cell = table_get_cell(table, 0, 5));
+ table_set_minimum_width(table, cell, 16);
+
+ for (i = 0; i < c; i++) {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ r = open_lldp_neighbors(links[i].ifindex, &f);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0) {
+ log_warning_errno(r, "Failed to open LLDP data for %i, ignoring: %m", links[i].ifindex);
+ continue;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *cid = NULL, *pid = NULL, *sname = NULL, *pdesc = NULL, *capabilities = NULL;
+ const char *chassis_id = NULL, *port_id = NULL, *system_name = NULL, *port_description = NULL;
+ _cleanup_(sd_lldp_neighbor_unrefp) sd_lldp_neighbor *n = NULL;
+ uint16_t cc;
+
+ r = next_lldp_neighbor(f, &n);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to read neighbor data: %m");
+ break;
+ }
+ if (r == 0)
+ break;
+
+ (void) sd_lldp_neighbor_get_chassis_id_as_string(n, &chassis_id);
+ (void) sd_lldp_neighbor_get_port_id_as_string(n, &port_id);
+ (void) sd_lldp_neighbor_get_system_name(n, &system_name);
+ (void) sd_lldp_neighbor_get_port_description(n, &port_description);
+
+ if (chassis_id) {
+ cid = ellipsize(chassis_id, 17, 100);
+ if (cid)
+ chassis_id = cid;
+ }
+
+ if (port_id) {
+ pid = ellipsize(port_id, 17, 100);
+ if (pid)
+ port_id = pid;
+ }
+
+ if (system_name) {
+ sname = ellipsize(system_name, 16, 100);
+ if (sname)
+ system_name = sname;
+ }
+
+ if (port_description) {
+ pdesc = ellipsize(port_description, 16, 100);
+ if (pdesc)
+ port_description = pdesc;
+ }
+
+ if (sd_lldp_neighbor_get_enabled_capabilities(n, &cc) >= 0) {
+ capabilities = lldp_capabilities_to_string(cc);
+ all |= cc;
+ }
+
+ r = table_add_many(table,
+ TABLE_STRING, links[i].name,
+ TABLE_STRING, strna(chassis_id),
+ TABLE_STRING, strna(system_name),
+ TABLE_STRING, strna(capabilities),
+ TABLE_STRING, strna(port_id),
+ TABLE_STRING, strna(port_description));
+ if (r < 0)
+ return table_log_add_error(r);
+
+ m++;
+ }
+ }
+
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+
+ if (arg_legend) {
+ lldp_capabilities_legend(all);
+ printf("\n%i neighbors listed.\n", m);
+ }
+
+ return 0;
+}
+
+static int link_delete_send_message(sd_netlink *rtnl, int index) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(rtnl);
+
+ r = sd_rtnl_message_new_link(rtnl, &req, RTM_DELLINK, index);
+ if (r < 0)
+ return rtnl_log_create_error(r);
+
+ r = sd_netlink_call(rtnl, req, 0, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int link_up_down_send_message(sd_netlink *rtnl, char *command, int index) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(rtnl);
+
+ r = sd_rtnl_message_new_link(rtnl, &req, RTM_SETLINK, index);
+ if (r < 0)
+ return rtnl_log_create_error(r);
+
+ if (streq(command, "up"))
+ r = sd_rtnl_message_link_set_flags(req, IFF_UP, IFF_UP);
+ else
+ r = sd_rtnl_message_link_set_flags(req, 0, IFF_UP);
+ if (r < 0)
+ return log_error_errno(r, "Could not set link flags: %m");
+
+ r = sd_netlink_call(rtnl, req, 0, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int link_up_down(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_set_free_ Set *indexes = NULL;
+ int index, r, i;
+ void *p;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ indexes = set_new(NULL);
+ if (!indexes)
+ return log_oom();
+
+ for (i = 1; i < argc; i++) {
+ index = resolve_interface_or_warn(&rtnl, argv[i]);
+ if (index < 0)
+ return index;
+
+ r = set_put(indexes, INT_TO_PTR(index));
+ if (r < 0)
+ return log_oom();
+ }
+
+ SET_FOREACH(p, indexes) {
+ index = PTR_TO_INT(p);
+ r = link_up_down_send_message(rtnl, argv[0], index);
+ if (r < 0) {
+ char ifname[IF_NAMESIZE + 1];
+
+ return log_error_errno(r, "Failed to %s interface %s: %m",
+ argv[1], format_ifname_full(index, ifname, FORMAT_IFNAME_IFINDEX));
+ }
+ }
+
+ return r;
+}
+
+static int link_delete(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_set_free_ Set *indexes = NULL;
+ int index, r, i;
+ void *p;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ indexes = set_new(NULL);
+ if (!indexes)
+ return log_oom();
+
+ for (i = 1; i < argc; i++) {
+ index = resolve_interface_or_warn(&rtnl, argv[i]);
+ if (index < 0)
+ return index;
+
+ r = set_put(indexes, INT_TO_PTR(index));
+ if (r < 0)
+ return log_oom();
+ }
+
+ SET_FOREACH(p, indexes) {
+ index = PTR_TO_INT(p);
+ r = link_delete_send_message(rtnl, index);
+ if (r < 0) {
+ char ifname[IF_NAMESIZE + 1];
+
+ return log_error_errno(r, "Failed to delete interface %s: %m",
+ format_ifname_full(index, ifname, FORMAT_IFNAME_IFINDEX));
+ }
+ }
+
+ return r;
+}
+
+static int link_renew_one(sd_bus *bus, int index, const char *name) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ r = bus_call_method(bus, bus_network_mgr, "RenewLink", &error, NULL, "i", index);
+ if (r < 0)
+ return log_error_errno(r, "Failed to renew dynamic configuration of interface %s: %s",
+ name, bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int link_renew(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ int index, i, k = 0, r;
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect system bus: %m");
+
+ for (i = 1; i < argc; i++) {
+ index = resolve_interface_or_warn(&rtnl, argv[i]);
+ if (index < 0)
+ return index;
+
+ r = link_renew_one(bus, index, argv[i]);
+ if (r < 0 && k >= 0)
+ k = r;
+ }
+
+ return k;
+}
+
+static int link_force_renew_one(sd_bus *bus, int index, const char *name) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ r = bus_call_method(bus, bus_network_mgr, "ForceRenewLink", &error, NULL, "i", index);
+ if (r < 0)
+ return log_error_errno(r, "Failed to force renew dynamic configuration of interface %s: %s",
+ name, bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int link_force_renew(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ int index, i, k = 0, r;
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect system bus: %m");
+
+ for (i = 1; i < argc; i++) {
+ index = resolve_interface_or_warn(&rtnl, argv[i]);
+ if (index < 0)
+ return index;
+
+ r = link_force_renew_one(bus, index, argv[i]);
+ if (r < 0 && k >= 0)
+ k = r;
+ }
+
+ return k;
+}
+
+static int verb_reload(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect system bus: %m");
+
+ r = bus_call_method(bus, bus_network_mgr, "Reload", &error, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to reload network settings: %m");
+
+ return 0;
+}
+
+static int verb_reconfigure(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_set_free_ Set *indexes = NULL;
+ int index, i, r;
+ void *p;
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect system bus: %m");
+
+ indexes = set_new(NULL);
+ if (!indexes)
+ return log_oom();
+
+ for (i = 1; i < argc; i++) {
+ index = resolve_interface_or_warn(&rtnl, argv[i]);
+ if (index < 0)
+ return index;
+
+ r = set_put(indexes, INT_TO_PTR(index));
+ if (r < 0)
+ return log_oom();
+ }
+
+ SET_FOREACH(p, indexes) {
+ index = PTR_TO_INT(p);
+ r = bus_call_method(bus, bus_network_mgr, "ReconfigureLink", &error, NULL, "i", index);
+ if (r < 0) {
+ char ifname[IF_NAMESIZE + 1];
+
+ return log_error_errno(r, "Failed to reconfigure network interface %s: %m",
+ format_ifname_full(index, ifname, FORMAT_IFNAME_IFINDEX));
+ }
+ }
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("networkctl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND\n\n"
+ "%sQuery and control the networking subsystem.%s\n"
+ "\nCommands:\n"
+ " list [PATTERN...] List links\n"
+ " status [PATTERN...] Show link status\n"
+ " lldp [PATTERN...] Show LLDP neighbors\n"
+ " label Show current address label entries in the kernel\n"
+ " delete DEVICES... Delete virtual netdevs\n"
+ " up DEVICES... Bring devices up\n"
+ " down DEVICES... Bring devices down\n"
+ " renew DEVICES... Renew dynamic configurations\n"
+ " forcerenew DEVICES... Trigger DHCP reconfiguration of all connected clients\n"
+ " reconfigure DEVICES... Reconfigure interfaces\n"
+ " reload Reload .network and .netdev files\n"
+ "\nOptions:\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-legend Do not show the headers and footers\n"
+ " -a --all Show status for all links\n"
+ " -s --stats Show detailed link statics\n"
+ " -l --full Do not ellipsize output\n"
+ " -n --lines=INTEGER Number of journal entries to show\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight()
+ , ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_PAGER,
+ ARG_NO_LEGEND,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
+ { "all", no_argument, NULL, 'a' },
+ { "stats", no_argument, NULL, 's' },
+ { "full", no_argument, NULL, 'l' },
+ { "lines", required_argument, NULL, 'n' },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hasln:", options, NULL)) >= 0) {
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_LEGEND:
+ arg_legend = false;
+ break;
+
+ case 'a':
+ arg_all = true;
+ break;
+
+ case 's':
+ arg_stats = true;
+ break;
+
+ case 'l':
+ arg_full = true;
+ break;
+
+ case 'n':
+ if (safe_atou(optarg, &arg_lines) < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse lines '%s'", optarg);
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+ }
+
+ return 1;
+}
+
+static int networkctl_main(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "list", VERB_ANY, VERB_ANY, VERB_DEFAULT, list_links },
+ { "status", VERB_ANY, VERB_ANY, 0, link_status },
+ { "lldp", VERB_ANY, VERB_ANY, 0, link_lldp_status },
+ { "label", 1, 1, 0, list_address_labels },
+ { "delete", 2, VERB_ANY, 0, link_delete },
+ { "up", 2, VERB_ANY, 0, link_up_down },
+ { "down", 2, VERB_ANY, 0, link_up_down },
+ { "renew", 2, VERB_ANY, 0, link_renew },
+ { "forcerenew", 2, VERB_ANY, 0, link_force_renew },
+ { "reconfigure", 2, VERB_ANY, 0, verb_reconfigure },
+ { "reload", 1, 1, 0, verb_reload },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static void warn_networkd_missing(void) {
+
+ if (access("/run/systemd/netif/state", F_OK) >= 0)
+ return;
+
+ fprintf(stderr, "WARNING: systemd-networkd is not running, output will be incomplete.\n\n");
+}
+
+static int run(int argc, char* argv[]) {
+ int r;
+
+ log_setup_cli();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ warn_networkd_missing();
+
+ return networkctl_main(argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/network/networkd-address-label.c b/src/network/networkd-address-label.c
new file mode 100644
index 0000000..f933a1d
--- /dev/null
+++ b/src/network/networkd-address-label.c
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+#include <linux/if_addrlabel.h>
+
+#include "alloc-util.h"
+#include "netlink-util.h"
+#include "networkd-address-label.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "networkd-network.h"
+#include "parse-util.h"
+
+AddressLabel *address_label_free(AddressLabel *label) {
+ if (!label)
+ return NULL;
+
+ if (label->network) {
+ assert(label->section);
+ hashmap_remove(label->network->address_labels_by_section, label->section);
+ }
+
+ network_config_section_free(label->section);
+ return mfree(label);
+}
+
+DEFINE_NETWORK_SECTION_FUNCTIONS(AddressLabel, address_label_free);
+
+static int address_label_new_static(Network *network, const char *filename, unsigned section_line, AddressLabel **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(address_label_freep) AddressLabel *label = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(filename);
+ assert(section_line > 0);
+
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ label = hashmap_get(network->address_labels_by_section, n);
+ if (label) {
+ *ret = TAKE_PTR(label);
+ return 0;
+ }
+
+ label = new(AddressLabel, 1);
+ if (!label)
+ return -ENOMEM;
+
+ *label = (AddressLabel) {
+ .network = network,
+ .section = TAKE_PTR(n),
+ };
+
+ r = hashmap_ensure_allocated(&network->address_labels_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(network->address_labels_by_section, label->section, label);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(label);
+ return 0;
+}
+
+static int address_label_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(rtnl);
+ assert(m);
+ assert(link);
+ assert(link->ifname);
+ assert(link->address_label_messages > 0);
+
+ link->address_label_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_message_warning_errno(link, m, r, "Could not set address label");
+ link_enter_failed(link);
+ return 1;
+ } else if (r >= 0)
+ (void) manager_rtnl_process_address(rtnl, m, link->manager);
+
+ if (link->address_label_messages == 0)
+ log_link_debug(link, "Addresses label set");
+
+ return 1;
+}
+
+static int address_label_configure(AddressLabel *label, Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(label);
+ assert(link);
+ assert(link->ifindex > 0);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ r = sd_rtnl_message_new_addrlabel(link->manager->rtnl, &req, RTM_NEWADDRLABEL,
+ link->ifindex, AF_INET6);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_NEWADDR message: %m");
+
+ r = sd_rtnl_message_addrlabel_set_prefixlen(req, label->prefixlen);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set prefixlen: %m");
+
+ r = sd_netlink_message_append_u32(req, IFAL_LABEL, label->label);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFAL_LABEL attribute: %m");
+
+ r = sd_netlink_message_append_in6_addr(req, IFA_ADDRESS, &label->in_addr.in6);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFA_ADDRESS attribute: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req,
+ address_label_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+int link_set_address_labels(Link *link) {
+ AddressLabel *label;
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ HASHMAP_FOREACH(label, link->network->address_labels_by_section) {
+ r = address_label_configure(label, link);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not set address label: %m");
+
+ link->address_label_messages++;
+ }
+
+ return 0;
+}
+
+void network_drop_invalid_address_labels(Network *network) {
+ AddressLabel *label;
+
+ assert(network);
+
+ HASHMAP_FOREACH(label, network->address_labels_by_section)
+ if (section_is_invalid(label->section))
+ address_label_free(label);
+}
+
+int config_parse_address_label_prefix(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(address_label_free_or_set_invalidp) AddressLabel *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = address_label_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return log_oom();
+
+ r = in_addr_prefix_from_string(rvalue, AF_INET6, &n->in_addr, &n->prefixlen);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Address label is invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_address_label(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(address_label_free_or_set_invalidp) AddressLabel *n = NULL;
+ Network *network = userdata;
+ uint32_t k;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = address_label_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return log_oom();
+
+ r = safe_atou32(rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse address label, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (k == 0xffffffffUL) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Address label is invalid, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ n->label = k;
+ n = NULL;
+
+ return 0;
+}
diff --git a/src/network/networkd-address-label.h b/src/network/networkd-address-label.h
new file mode 100644
index 0000000..11fdd9a
--- /dev/null
+++ b/src/network/networkd-address-label.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+
+#include "conf-parser.h"
+#include "in-addr-util.h"
+#include "networkd-util.h"
+
+typedef struct Network Network;
+typedef struct Link Link;
+
+typedef struct AddressLabel {
+ Network *network;
+ NetworkConfigSection *section;
+
+ unsigned char prefixlen;
+ uint32_t label;
+ union in_addr_union in_addr;
+} AddressLabel;
+
+AddressLabel *address_label_free(AddressLabel *label);
+
+void network_drop_invalid_address_labels(Network *network);
+
+int link_set_address_labels(Link *link);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_address_label);
+CONFIG_PARSER_PROTOTYPE(config_parse_address_label_prefix);
diff --git a/src/network/networkd-address-pool.c b/src/network/networkd-address-pool.c
new file mode 100644
index 0000000..7e27db6
--- /dev/null
+++ b/src/network/networkd-address-pool.c
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "networkd-address-pool.h"
+#include "networkd-address.h"
+#include "networkd-manager.h"
+#include "set.h"
+#include "string-util.h"
+
+#define RANDOM_PREFIX_TRIAL_MAX 1024
+
+static int address_pool_new(
+ Manager *m,
+ int family,
+ const union in_addr_union *u,
+ unsigned prefixlen) {
+
+ _cleanup_free_ AddressPool *p = NULL;
+ int r;
+
+ assert(m);
+ assert(u);
+
+ p = new(AddressPool, 1);
+ if (!p)
+ return -ENOMEM;
+
+ *p = (AddressPool) {
+ .manager = m,
+ .family = family,
+ .prefixlen = prefixlen,
+ .in_addr = *u,
+ };
+
+ r = ordered_set_ensure_put(&m->address_pools, NULL, p);
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(p);
+ return 0;
+}
+
+static int address_pool_new_from_string(
+ Manager *m,
+ int family,
+ const char *p,
+ unsigned prefixlen) {
+
+ union in_addr_union u;
+ int r;
+
+ assert(m);
+ assert(p);
+
+ r = in_addr_from_string(family, p, &u);
+ if (r < 0)
+ return r;
+
+ return address_pool_new(m, family, &u, prefixlen);
+}
+
+int address_pool_setup_default(Manager *m) {
+ int r;
+
+ assert(m);
+
+ /* Add in the well-known private address ranges. */
+ r = address_pool_new_from_string(m, AF_INET6, "fd00::", 8);
+ if (r < 0)
+ return r;
+
+ r = address_pool_new_from_string(m, AF_INET, "192.168.0.0", 16);
+ if (r < 0)
+ return r;
+
+ r = address_pool_new_from_string(m, AF_INET, "172.16.0.0", 12);
+ if (r < 0)
+ return r;
+
+ r = address_pool_new_from_string(m, AF_INET, "10.0.0.0", 8);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static bool address_pool_prefix_is_taken(
+ AddressPool *p,
+ const union in_addr_union *u,
+ unsigned prefixlen) {
+
+ Link *l;
+ Network *n;
+
+ assert(p);
+ assert(u);
+
+ HASHMAP_FOREACH(l, p->manager->links) {
+ Address *a;
+
+ /* Don't clash with assigned addresses */
+ SET_FOREACH(a, l->addresses) {
+ if (a->family != p->family)
+ continue;
+
+ if (in_addr_prefix_intersect(p->family, u, prefixlen, &a->in_addr, a->prefixlen))
+ return true;
+ }
+
+ /* Don't clash with addresses already pulled from the pool, but not assigned yet */
+ SET_FOREACH(a, l->pool_addresses) {
+ if (a->family != p->family)
+ continue;
+
+ if (in_addr_prefix_intersect(p->family, u, prefixlen, &a->in_addr, a->prefixlen))
+ return true;
+ }
+ }
+
+ /* And don't clash with configured but un-assigned addresses either */
+ ORDERED_HASHMAP_FOREACH(n, p->manager->networks) {
+ Address *a;
+
+ ORDERED_HASHMAP_FOREACH(a, n->addresses_by_section) {
+ if (a->family != p->family)
+ continue;
+
+ if (in_addr_prefix_intersect(p->family, u, prefixlen, &a->in_addr, a->prefixlen))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static int address_pool_acquire_one(AddressPool *p, int family, unsigned prefixlen, union in_addr_union *found) {
+ union in_addr_union u;
+ unsigned i;
+ int r;
+
+ assert(p);
+ assert(prefixlen > 0);
+ assert(found);
+
+ if (p->family != family)
+ return 0;
+
+ if (p->prefixlen >= prefixlen)
+ return 0;
+
+ u = p->in_addr;
+
+ for (i = 0; i < RANDOM_PREFIX_TRIAL_MAX; i++) {
+ r = in_addr_random_prefix(p->family, &u, p->prefixlen, prefixlen);
+ if (r <= 0)
+ return r;
+
+ if (!address_pool_prefix_is_taken(p, &u, prefixlen)) {
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *s = NULL;
+
+ (void) in_addr_to_string(p->family, &u, &s);
+ log_debug("Found range %s/%u", strna(s), prefixlen);
+ }
+
+ *found = u;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int address_pool_acquire(Manager *m, int family, unsigned prefixlen, union in_addr_union *found) {
+ AddressPool *p;
+ int r;
+
+ assert(m);
+ assert(IN_SET(family, AF_INET, AF_INET6));
+ assert(prefixlen > 0);
+ assert(found);
+
+ ORDERED_SET_FOREACH(p, m->address_pools) {
+ r = address_pool_acquire_one(p, family, prefixlen, found);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/network/networkd-address-pool.h b/src/network/networkd-address-pool.h
new file mode 100644
index 0000000..93bdec8
--- /dev/null
+++ b/src/network/networkd-address-pool.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "in-addr-util.h"
+
+typedef struct Manager Manager;
+
+typedef struct AddressPool {
+ Manager *manager;
+
+ int family;
+ unsigned prefixlen;
+ union in_addr_union in_addr;
+} AddressPool;
+
+int address_pool_setup_default(Manager *m);
+int address_pool_acquire(Manager *m, int family, unsigned prefixlen, union in_addr_union *found);
diff --git a/src/network/networkd-address.c b/src/network/networkd-address.c
new file mode 100644
index 0000000..961b248
--- /dev/null
+++ b/src/network/networkd-address.c
@@ -0,0 +1,1922 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+#include <net/if_arp.h>
+
+#include "alloc-util.h"
+#include "firewall-util.h"
+#include "memory-util.h"
+#include "netlink-util.h"
+#include "networkd-address-pool.h"
+#include "networkd-address.h"
+#include "networkd-manager.h"
+#include "networkd-network.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+#define ADDRESSES_PER_LINK_MAX 2048U
+#define STATIC_ADDRESSES_PER_NETWORK_MAX 1024U
+
+int generate_ipv6_eui_64_address(const Link *link, struct in6_addr *ret) {
+ assert(link);
+ assert(ret);
+
+ if (link->iftype == ARPHRD_INFINIBAND) {
+ /* see RFC4391 section 8 */
+ memcpy(&ret->s6_addr[8], &link->hw_addr.addr.infiniband[12], 8);
+ ret->s6_addr[8] ^= 1 << 1;
+
+ return 0;
+ }
+
+ /* see RFC4291 section 2.5.1 */
+ ret->s6_addr[8] = link->hw_addr.addr.ether.ether_addr_octet[0];
+ ret->s6_addr[8] ^= 1 << 1;
+ ret->s6_addr[9] = link->hw_addr.addr.ether.ether_addr_octet[1];
+ ret->s6_addr[10] = link->hw_addr.addr.ether.ether_addr_octet[2];
+ ret->s6_addr[11] = 0xff;
+ ret->s6_addr[12] = 0xfe;
+ ret->s6_addr[13] = link->hw_addr.addr.ether.ether_addr_octet[3];
+ ret->s6_addr[14] = link->hw_addr.addr.ether.ether_addr_octet[4];
+ ret->s6_addr[15] = link->hw_addr.addr.ether.ether_addr_octet[5];
+
+ return 0;
+}
+
+int address_new(Address **ret) {
+ _cleanup_(address_freep) Address *address = NULL;
+
+ address = new(Address, 1);
+ if (!address)
+ return -ENOMEM;
+
+ *address = (Address) {
+ .family = AF_UNSPEC,
+ .scope = RT_SCOPE_UNIVERSE,
+ .cinfo.ifa_prefered = CACHE_INFO_INFINITY_LIFE_TIME,
+ .cinfo.ifa_valid = CACHE_INFO_INFINITY_LIFE_TIME,
+ .duplicate_address_detection = ADDRESS_FAMILY_IPV6,
+ };
+
+ *ret = TAKE_PTR(address);
+
+ return 0;
+}
+
+static int address_new_static(Network *network, const char *filename, unsigned section_line, Address **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(address_freep) Address *address = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(filename);
+ assert(section_line > 0);
+
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ address = ordered_hashmap_get(network->addresses_by_section, n);
+ if (address) {
+ *ret = TAKE_PTR(address);
+ return 0;
+ }
+
+ if (ordered_hashmap_size(network->addresses_by_section) >= STATIC_ADDRESSES_PER_NETWORK_MAX)
+ return -E2BIG;
+
+ r = address_new(&address);
+ if (r < 0)
+ return r;
+
+ address->network = network;
+ address->section = TAKE_PTR(n);
+
+ r = ordered_hashmap_ensure_allocated(&network->addresses_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = ordered_hashmap_put(network->addresses_by_section, address->section, address);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(address);
+ return 0;
+}
+
+Address *address_free(Address *address) {
+ if (!address)
+ return NULL;
+
+ if (address->network) {
+ assert(address->section);
+ ordered_hashmap_remove(address->network->addresses_by_section, address->section);
+ }
+
+ if (address->link) {
+ NDiscAddress *n;
+
+ set_remove(address->link->addresses, address);
+ set_remove(address->link->addresses_foreign, address);
+ set_remove(address->link->static_addresses, address);
+ if (address->link->dhcp_address == address)
+ address->link->dhcp_address = NULL;
+ if (address->link->dhcp_address_old == address)
+ address->link->dhcp_address_old = NULL;
+ set_remove(address->link->dhcp6_addresses, address);
+ set_remove(address->link->dhcp6_addresses_old, address);
+ set_remove(address->link->dhcp6_pd_addresses, address);
+ set_remove(address->link->dhcp6_pd_addresses_old, address);
+ SET_FOREACH(n, address->link->ndisc_addresses)
+ if (n->address == address)
+ free(set_remove(address->link->ndisc_addresses, n));
+
+ if (in_addr_equal(AF_INET6, &address->in_addr, (const union in_addr_union *) &address->link->ipv6ll_address))
+ memzero(&address->link->ipv6ll_address, sizeof(struct in6_addr));
+ }
+
+ sd_ipv4acd_unref(address->acd);
+
+ network_config_section_free(address->section);
+ free(address->label);
+ return mfree(address);
+}
+
+static bool address_may_have_broadcast(const Address *a) {
+ assert(a);
+
+ /* A /31 or /32 IPv4 address does not have a broadcast address.
+ * See https://tools.ietf.org/html/rfc3021 */
+
+ return a->family == AF_INET && in4_addr_is_null(&a->in_addr_peer.in) && a->prefixlen <= 30;
+}
+
+static uint32_t address_prefix(const Address *a) {
+ assert(a);
+
+ /* make sure we don't try to shift by 32.
+ * See ISO/IEC 9899:TC3 § 6.5.7.3. */
+ if (a->prefixlen == 0)
+ return 0;
+
+ if (a->in_addr_peer.in.s_addr != 0)
+ return be32toh(a->in_addr_peer.in.s_addr) >> (32 - a->prefixlen);
+ else
+ return be32toh(a->in_addr.in.s_addr) >> (32 - a->prefixlen);
+}
+
+void address_hash_func(const Address *a, struct siphash *state) {
+ assert(a);
+
+ siphash24_compress(&a->family, sizeof(a->family), state);
+
+ switch (a->family) {
+ case AF_INET:
+ siphash24_compress(&a->prefixlen, sizeof(a->prefixlen), state);
+
+ uint32_t prefix = address_prefix(a);
+ siphash24_compress(&prefix, sizeof(prefix), state);
+
+ _fallthrough_;
+ case AF_INET6:
+ siphash24_compress(&a->in_addr, FAMILY_ADDRESS_SIZE(a->family), state);
+ break;
+ default:
+ /* treat any other address family as AF_UNSPEC */
+ break;
+ }
+}
+
+int address_compare_func(const Address *a1, const Address *a2) {
+ int r;
+
+ r = CMP(a1->family, a2->family);
+ if (r != 0)
+ return r;
+
+ switch (a1->family) {
+ case AF_INET:
+ /* See kernel's find_matching_ifa() in net/ipv4/devinet.c */
+ r = CMP(a1->prefixlen, a2->prefixlen);
+ if (r != 0)
+ return r;
+
+ r = CMP(address_prefix(a1), address_prefix(a2));
+ if (r != 0)
+ return r;
+
+ _fallthrough_;
+ case AF_INET6:
+ /* See kernel's ipv6_get_ifaddr() in net/ipv6/addrconf.c */
+ return memcmp(&a1->in_addr, &a2->in_addr, FAMILY_ADDRESS_SIZE(a1->family));
+ default:
+ /* treat any other address family as AF_UNSPEC */
+ return 0;
+ }
+}
+
+DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(address_hash_ops, Address, address_hash_func, address_compare_func, address_free);
+
+bool address_equal(const Address *a1, const Address *a2) {
+ if (a1 == a2)
+ return true;
+
+ if (!a1 || !a2)
+ return false;
+
+ return address_compare_func(a1, a2) == 0;
+}
+
+static int address_copy(Address *dest, const Address *src) {
+ int r;
+
+ assert(dest);
+ assert(src);
+
+ if (src->family == AF_INET) {
+ r = free_and_strdup(&dest->label, src->label);
+ if (r < 0)
+ return r;
+ }
+
+ dest->family = src->family;
+ dest->prefixlen = src->prefixlen;
+ dest->scope = src->scope;
+ dest->flags = src->flags;
+ dest->cinfo = src->cinfo;
+ dest->in_addr = src->in_addr;
+ dest->in_addr_peer = src->in_addr_peer;
+ if (address_may_have_broadcast(src))
+ dest->broadcast = src->broadcast;
+ dest->duplicate_address_detection = src->duplicate_address_detection;
+
+ return 0;
+}
+
+static int address_set_masquerade(Address *address, bool add) {
+ union in_addr_union masked;
+ int r;
+
+ assert(address);
+ assert(address->link);
+
+ if (!address->link->network)
+ return 0;
+
+ if (!address->link->network->ip_masquerade)
+ return 0;
+
+ if (address->family != AF_INET)
+ return 0;
+
+ if (address->scope >= RT_SCOPE_LINK)
+ return 0;
+
+ if (address->ip_masquerade_done == add)
+ return 0;
+
+ masked = address->in_addr;
+ r = in_addr_mask(address->family, &masked, address->prefixlen);
+ if (r < 0)
+ return r;
+
+ r = fw_add_masquerade(add, AF_INET, 0, &masked, address->prefixlen, NULL, NULL, 0);
+ if (r < 0)
+ return r;
+
+ address->ip_masquerade_done = add;
+
+ return 0;
+}
+
+static int address_add_internal(Link *link, Set **addresses, const Address *in, Address **ret) {
+ _cleanup_(address_freep) Address *address = NULL;
+ int r;
+
+ assert(link);
+ assert(addresses);
+ assert(in);
+
+ r = address_new(&address);
+ if (r < 0)
+ return r;
+
+ r = address_copy(address, in);
+ if (r < 0)
+ return r;
+
+ /* Consider address tentative until we get the real flags from the kernel */
+ address->flags = IFA_F_TENTATIVE;
+
+ r = set_ensure_put(addresses, &address_hash_ops, address);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EEXIST;
+
+ address->link = link;
+
+ if (ret)
+ *ret = address;
+ TAKE_PTR(address);
+ return 0;
+}
+
+static int address_add_foreign(Link *link, const Address *in, Address **ret) {
+ return address_add_internal(link, &link->addresses_foreign, in, ret);
+}
+
+static int address_add(Link *link, const Address *in, Address **ret) {
+ Address *address;
+ int r;
+
+ assert(link);
+ assert(in);
+
+ r = address_get(link, in, &address);
+ if (r == -ENOENT) {
+ /* Address does not exist, create a new one */
+ r = address_add_internal(link, &link->addresses, in, &address);
+ if (r < 0)
+ return r;
+ } else if (r == 0) {
+ /* Take over a foreign address */
+ r = set_ensure_put(&link->addresses, &address_hash_ops, address);
+ if (r < 0)
+ return r;
+
+ set_remove(link->addresses_foreign, address);
+ } else if (r == 1) {
+ /* Already exists, do nothing */
+ ;
+ } else
+ return r;
+
+ if (ret)
+ *ret = address;
+
+ return 0;
+}
+
+static int address_update(Address *address, const Address *src) {
+ bool ready;
+ int r;
+
+ assert(address);
+ assert(address->link);
+ assert(src);
+
+ ready = address_is_ready(address);
+
+ address->flags = src->flags;
+ address->scope = src->scope;
+ address->cinfo = src->cinfo;
+
+ if (IN_SET(address->link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 0;
+
+ link_update_operstate(address->link, true);
+ link_check_ready(address->link);
+
+ if (!ready && address_is_ready(address)) {
+ if (address->callback) {
+ r = address->callback(address);
+ if (r < 0)
+ return r;
+ }
+
+ if (address->family == AF_INET6 &&
+ in_addr_is_link_local(AF_INET6, &address->in_addr) > 0 &&
+ IN6_IS_ADDR_UNSPECIFIED(&address->link->ipv6ll_address) > 0) {
+
+ r = link_ipv6ll_gained(address->link, &address->in_addr.in6);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int address_drop(Address *address) {
+ Link *link;
+ bool ready;
+ int r;
+
+ assert(address);
+
+ ready = address_is_ready(address);
+ link = address->link;
+
+ r = address_set_masquerade(address, false);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Failed to disable IP masquerading, ignoring: %m");
+
+ address_free(address);
+
+ link_update_operstate(link, true);
+
+ if (link && !ready)
+ link_check_ready(link);
+
+ return 0;
+}
+
+int address_get(Link *link, const Address *in, Address **ret) {
+ Address *existing;
+
+ assert(link);
+ assert(in);
+
+ existing = set_get(link->addresses, in);
+ if (existing) {
+ if (ret)
+ *ret = existing;
+ return 1;
+ }
+
+ existing = set_get(link->addresses_foreign, in);
+ if (existing) {
+ if (ret)
+ *ret = existing;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static bool address_exists_internal(Set *addresses, int family, const union in_addr_union *in_addr) {
+ Address *address;
+
+ SET_FOREACH(address, addresses) {
+ if (address->family != family)
+ continue;
+ if (in_addr_equal(address->family, &address->in_addr, in_addr))
+ return true;
+ }
+
+ return false;
+}
+
+bool address_exists(Link *link, int family, const union in_addr_union *in_addr) {
+ assert(link);
+ assert(IN_SET(family, AF_INET, AF_INET6));
+ assert(in_addr);
+
+ if (address_exists_internal(link->addresses, family, in_addr))
+ return true;
+ if (address_exists_internal(link->addresses_foreign, family, in_addr))
+ return true;
+ return false;
+}
+
+static int address_remove_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(m);
+ assert(link);
+ assert(link->ifname);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EADDRNOTAVAIL)
+ log_link_message_warning_errno(link, m, r, "Could not drop address");
+ else if (r >= 0)
+ (void) manager_rtnl_process_address(rtnl, m, link->manager);
+
+ return 1;
+}
+
+int address_remove(
+ const Address *address,
+ Link *link,
+ link_netlink_message_handler_t callback) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(address);
+ assert(IN_SET(address->family, AF_INET, AF_INET6));
+ assert(link);
+ assert(link->ifindex > 0);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *b = NULL;
+
+ (void) in_addr_to_string(address->family, &address->in_addr, &b);
+ log_link_debug(link, "Removing address %s", strna(b));
+ }
+
+ r = sd_rtnl_message_new_addr(link->manager->rtnl, &req, RTM_DELADDR,
+ link->ifindex, address->family);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_DELADDR message: %m");
+
+ r = sd_rtnl_message_addr_set_prefixlen(req, address->prefixlen);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set prefixlen: %m");
+
+ r = netlink_message_append_in_addr_union(req, IFA_LOCAL, address->family, &address->in_addr);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFA_LOCAL attribute: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req,
+ callback ?: address_remove_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static bool link_is_static_address_configured(const Link *link, const Address *address) {
+ Address *net_address;
+
+ assert(link);
+ assert(address);
+
+ if (!link->network)
+ return false;
+
+ ORDERED_HASHMAP_FOREACH(net_address, link->network->addresses_by_section)
+ if (address_equal(net_address, address))
+ return true;
+
+ return false;
+}
+
+static bool link_address_is_dynamic(const Link *link, const Address *address) {
+ Route *route;
+
+ assert(link);
+ assert(address);
+
+ if (address->cinfo.ifa_prefered != CACHE_INFO_INFINITY_LIFE_TIME)
+ return true;
+
+ /* Even when the address is leased from a DHCP server, networkd assign the address
+ * without lifetime when KeepConfiguration=dhcp. So, let's check that we have
+ * corresponding routes with RTPROT_DHCP. */
+ SET_FOREACH(route, link->routes_foreign) {
+ if (route->protocol != RTPROT_DHCP)
+ continue;
+
+ if (address->family != route->family)
+ continue;
+
+ if (in_addr_equal(address->family, &address->in_addr, &route->prefsrc))
+ return true;
+ }
+
+ return false;
+}
+
+static int link_enumerate_ipv6_tentative_addresses(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ sd_netlink_message *addr;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ r = sd_rtnl_message_new_addr(link->manager->rtnl, &req, RTM_GETADDR, 0, AF_INET6);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(link->manager->rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ for (addr = reply; addr; addr = sd_netlink_message_next(addr)) {
+ unsigned char flags;
+ int ifindex;
+
+ r = sd_rtnl_message_addr_get_ifindex(addr, &ifindex);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: invalid ifindex, ignoring: %m");
+ continue;
+ } else if (link->ifindex != ifindex)
+ continue;
+
+ r = sd_rtnl_message_addr_get_flags(addr, &flags);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received address message with invalid flags, ignoring: %m");
+ continue;
+ } else if (!(flags & IFA_F_TENTATIVE))
+ continue;
+
+ log_link_debug(link, "Found tentative ipv6 link-local address");
+ (void) manager_rtnl_process_address(link->manager->rtnl, addr, link->manager);
+ }
+
+ return 0;
+}
+
+int link_drop_foreign_addresses(Link *link) {
+ Address *address;
+ int k, r = 0;
+
+ assert(link);
+
+ /* The kernel doesn't notify us about tentative addresses;
+ * so if ipv6ll is disabled, we need to enumerate them now so we can drop them below */
+ if (!link_ipv6ll_enabled(link)) {
+ r = link_enumerate_ipv6_tentative_addresses(link);
+ if (r < 0)
+ return r;
+ }
+
+ SET_FOREACH(address, link->addresses_foreign) {
+ /* we consider IPv6LL addresses to be managed by the kernel */
+ if (address->family == AF_INET6 && in_addr_is_link_local(AF_INET6, &address->in_addr) == 1 && link_ipv6ll_enabled(link))
+ continue;
+
+ if (link_address_is_dynamic(link, address)) {
+ if (link->network && FLAGS_SET(link->network->keep_configuration, KEEP_CONFIGURATION_DHCP))
+ continue;
+ } else if (link->network && FLAGS_SET(link->network->keep_configuration, KEEP_CONFIGURATION_STATIC))
+ continue;
+
+ if (link_is_static_address_configured(link, address)) {
+ k = address_add(link, address, NULL);
+ if (k < 0) {
+ log_link_error_errno(link, k, "Failed to add address: %m");
+ if (r >= 0)
+ r = k;
+ }
+ } else {
+ k = address_remove(address, link, NULL);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+ }
+
+ return r;
+}
+
+static int remove_static_address_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(m);
+ assert(link);
+ assert(link->ifname);
+ assert(link->address_remove_messages > 0);
+
+ link->address_remove_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EADDRNOTAVAIL)
+ log_link_message_warning_errno(link, m, r, "Could not drop address");
+ else if (r >= 0)
+ (void) manager_rtnl_process_address(rtnl, m, link->manager);
+
+ if (link->address_remove_messages == 0 && link->request_static_addresses) {
+ link_set_state(link, LINK_STATE_CONFIGURING);
+ r = link_set_addresses(link);
+ if (r < 0)
+ link_enter_failed(link);
+ }
+
+ return 1;
+}
+
+int link_drop_addresses(Link *link) {
+ Address *address, *pool_address;
+ int k, r = 0;
+
+ assert(link);
+
+ SET_FOREACH(address, link->addresses) {
+ /* we consider IPv6LL addresses to be managed by the kernel */
+ if (address->family == AF_INET6 && in_addr_is_link_local(AF_INET6, &address->in_addr) == 1 && link_ipv6ll_enabled(link))
+ continue;
+
+ k = address_remove(address, link, remove_static_address_handler);
+ if (k < 0 && r >= 0) {
+ r = k;
+ continue;
+ }
+
+ link->address_remove_messages++;
+
+ SET_FOREACH(pool_address, link->pool_addresses)
+ if (address_equal(address, pool_address))
+ address_free(set_remove(link->pool_addresses, pool_address));
+ }
+
+ return r;
+}
+
+static int address_acquire(Link *link, const Address *original, Address **ret) {
+ union in_addr_union in_addr = IN_ADDR_NULL;
+ struct in_addr broadcast = {};
+ _cleanup_(address_freep) Address *na = NULL;
+ int r;
+
+ assert(link);
+ assert(original);
+ assert(ret);
+
+ /* Something useful was configured? just use it */
+ r = in_addr_is_null(original->family, &original->in_addr);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ *ret = NULL;
+ return 0;
+ }
+
+ /* The address is configured to be 0.0.0.0 or [::] by the user?
+ * Then let's acquire something more useful from the pool. */
+ r = address_pool_acquire(link->manager, original->family, original->prefixlen, &in_addr);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EBUSY;
+
+ if (original->family == AF_INET) {
+ /* Pick first address in range for ourselves ... */
+ in_addr.in.s_addr = in_addr.in.s_addr | htobe32(1);
+
+ /* .. and use last as broadcast address */
+ if (original->prefixlen > 30)
+ broadcast.s_addr = 0;
+ else
+ broadcast.s_addr = in_addr.in.s_addr | htobe32(0xFFFFFFFFUL >> original->prefixlen);
+ } else if (original->family == AF_INET6)
+ in_addr.in6.s6_addr[15] |= 1;
+
+ r = address_new(&na);
+ if (r < 0)
+ return r;
+
+ r = address_copy(na, original);
+ if (r < 0)
+ return r;
+
+ na->broadcast = broadcast;
+ na->in_addr = in_addr;
+
+ r = set_ensure_put(&link->pool_addresses, &address_hash_ops, na);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EEXIST;
+
+ *ret = TAKE_PTR(na);
+ return 1;
+}
+
+static int ipv4_dad_configure(Address *address);
+
+int address_configure(
+ const Address *address,
+ Link *link,
+ link_netlink_message_handler_t callback,
+ bool update,
+ Address **ret) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ Address *acquired_address, *a;
+ uint32_t flags;
+ int r;
+
+ assert(address);
+ assert(IN_SET(address->family, AF_INET, AF_INET6));
+ assert(link);
+ assert(link->ifindex > 0);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+ assert(callback);
+
+ /* If this is a new address, then refuse adding more than the limit */
+ if (address_get(link, address, NULL) <= 0 &&
+ set_size(link->addresses) >= ADDRESSES_PER_LINK_MAX)
+ return log_link_error_errno(link, SYNTHETIC_ERRNO(E2BIG),
+ "Too many addresses are configured, refusing: %m");
+
+ r = address_acquire(link, address, &acquired_address);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to acquire an address from pool: %m");
+ if (acquired_address)
+ address = acquired_address;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *str = NULL;
+
+ (void) in_addr_to_string(address->family, &address->in_addr, &str);
+ log_link_debug(link, "%s address: %s", update ? "Updating" : "Configuring", strna(str));
+ }
+
+ if (update)
+ r = sd_rtnl_message_new_addr_update(link->manager->rtnl, &req,
+ link->ifindex, address->family);
+ else
+ r = sd_rtnl_message_new_addr(link->manager->rtnl, &req, RTM_NEWADDR,
+ link->ifindex, address->family);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_NEWADDR message: %m");
+
+ r = sd_rtnl_message_addr_set_prefixlen(req, address->prefixlen);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set prefixlen: %m");
+
+ flags = address->flags | IFA_F_PERMANENT;
+ r = sd_rtnl_message_addr_set_flags(req, flags & 0xff);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set flags: %m");
+
+ if (flags & ~0xff) {
+ r = sd_netlink_message_append_u32(req, IFA_FLAGS, flags);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set extended flags: %m");
+ }
+
+ r = sd_rtnl_message_addr_set_scope(req, address->scope);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set scope: %m");
+
+ r = netlink_message_append_in_addr_union(req, IFA_LOCAL, address->family, &address->in_addr);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFA_LOCAL attribute: %m");
+
+ if (in_addr_is_null(address->family, &address->in_addr_peer) == 0) {
+ r = netlink_message_append_in_addr_union(req, IFA_ADDRESS, address->family, &address->in_addr_peer);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFA_ADDRESS attribute: %m");
+ } else if (address_may_have_broadcast(address)) {
+ r = sd_netlink_message_append_in_addr(req, IFA_BROADCAST, &address->broadcast);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFA_BROADCAST attribute: %m");
+ }
+
+ if (address->family == AF_INET && address->label) {
+ r = sd_netlink_message_append_string(req, IFA_LABEL, address->label);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFA_LABEL attribute: %m");
+ }
+
+ r = sd_netlink_message_append_cache_info(req, IFA_CACHEINFO, &address->cinfo);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFA_CACHEINFO attribute: %m");
+
+ r = address_add(link, address, &a);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not add address: %m");
+
+ r = address_set_masquerade(a, true);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Could not enable IP masquerading, ignoring: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, callback, link_netlink_destroy_callback, link);
+ if (r < 0) {
+ (void) address_set_masquerade(a, false);
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+ }
+
+ link_ref(link);
+
+ if (FLAGS_SET(address->duplicate_address_detection, ADDRESS_FAMILY_IPV4)) {
+ r = ipv4_dad_configure(a);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Failed to start IPv4ACD client, ignoring: %m");
+ }
+
+ if (ret)
+ *ret = a;
+
+ return 1;
+}
+
+static int static_address_ready_callback(Address *address) {
+ Address *a;
+ Link *link;
+
+ assert(address);
+ assert(address->link);
+
+ link = address->link;
+
+ if (!link->addresses_configured)
+ return 0;
+
+ SET_FOREACH(a, link->static_addresses)
+ if (!address_is_ready(a)) {
+ _cleanup_free_ char *str = NULL;
+
+ (void) in_addr_to_string(a->family, &a->in_addr, &str);
+ log_link_debug(link, "an address %s/%u is not ready", strnull(str), a->prefixlen);
+ return 0;
+ }
+
+ /* This should not be called again */
+ SET_FOREACH(a, link->static_addresses)
+ a->callback = NULL;
+
+ link->addresses_ready = true;
+
+ return link_set_routes(link);
+}
+
+static int address_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(rtnl);
+ assert(m);
+ assert(link);
+ assert(link->ifname);
+ assert(link->address_messages > 0);
+
+ link->address_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_message_warning_errno(link, m, r, "Could not set address");
+ link_enter_failed(link);
+ return 1;
+ } else if (r >= 0)
+ (void) manager_rtnl_process_address(rtnl, m, link->manager);
+
+ if (link->address_messages == 0) {
+ Address *a;
+
+ log_link_debug(link, "Addresses set");
+ link->addresses_configured = true;
+
+ /* When all static addresses are already ready, then static_address_ready_callback()
+ * will not be called automatically. So, call it here. */
+ a = set_first(link->static_addresses);
+ if (!a) {
+ log_link_debug(link, "No static address is stored. Already removed?");
+ return 1;
+ }
+
+ r = static_address_ready_callback(a);
+ if (r < 0)
+ link_enter_failed(link);
+ }
+
+ return 1;
+}
+
+static int static_address_configure(const Address *address, Link *link, bool update) {
+ Address *ret;
+ int r;
+
+ assert(address);
+ assert(link);
+
+ r = address_configure(address, link, address_handler, update, &ret);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not configure static address: %m");
+
+ link->address_messages++;
+
+ r = set_ensure_put(&link->static_addresses, &address_hash_ops, ret);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to store static address: %m");
+
+ ret->callback = static_address_ready_callback;
+
+ return 0;
+}
+
+int link_set_addresses(Link *link) {
+ Address *ad;
+ Prefix *p;
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ if (link->address_remove_messages != 0) {
+ log_link_debug(link, "Removing old addresses, new addresses will be configured later.");
+ link->request_static_addresses = true;
+ return 0;
+ }
+
+ ORDERED_HASHMAP_FOREACH(ad, link->network->addresses_by_section) {
+ bool update;
+
+ update = address_get(link, ad, NULL) > 0;
+ r = static_address_configure(ad, link, update);
+ if (r < 0)
+ return r;
+ }
+
+ HASHMAP_FOREACH(p, link->network->prefixes_by_section) {
+ _cleanup_(address_freep) Address *address = NULL;
+
+ if (!p->assign)
+ continue;
+
+ r = address_new(&address);
+ if (r < 0)
+ return log_oom();
+
+ r = sd_radv_prefix_get_prefix(p->radv_prefix, &address->in_addr.in6, &address->prefixlen);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not get RA prefix: %m");
+
+ r = generate_ipv6_eui_64_address(link, &address->in_addr.in6);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not generate EUI64 address: %m");
+
+ address->family = AF_INET6;
+ r = static_address_configure(address, link, true);
+ if (r < 0)
+ return r;
+ }
+
+ if (link->address_messages == 0) {
+ link->addresses_configured = true;
+ link->addresses_ready = true;
+ r = link_set_routes(link);
+ if (r < 0)
+ return r;
+ } else {
+ log_link_debug(link, "Setting addresses");
+ link_set_state(link, LINK_STATE_CONFIGURING);
+ }
+
+ return 0;
+}
+
+int manager_rtnl_process_address(sd_netlink *rtnl, sd_netlink_message *message, Manager *m) {
+ _cleanup_(address_freep) Address *tmp = NULL;
+ _cleanup_free_ char *buf = NULL, *buf_peer = NULL;
+ Link *link = NULL;
+ uint16_t type;
+ unsigned char flags;
+ Address *address = NULL;
+ char valid_buf[FORMAT_TIMESPAN_MAX];
+ const char *valid_str = NULL;
+ int ifindex, r;
+ bool has_peer = false;
+
+ assert(rtnl);
+ assert(message);
+ assert(m);
+
+ if (sd_netlink_message_is_error(message)) {
+ r = sd_netlink_message_get_errno(message);
+ if (r < 0)
+ log_message_warning_errno(message, r, "rtnl: failed to receive address message, ignoring");
+
+ return 0;
+ }
+
+ r = sd_netlink_message_get_type(message, &type);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: could not get message type, ignoring: %m");
+ return 0;
+ } else if (!IN_SET(type, RTM_NEWADDR, RTM_DELADDR)) {
+ log_warning("rtnl: received unexpected message type %u when processing address, ignoring.", type);
+ return 0;
+ }
+
+ r = sd_rtnl_message_addr_get_ifindex(message, &ifindex);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: could not get ifindex from message, ignoring: %m");
+ return 0;
+ } else if (ifindex <= 0) {
+ log_warning("rtnl: received address message with invalid ifindex %d, ignoring.", ifindex);
+ return 0;
+ }
+
+ r = link_get(m, ifindex, &link);
+ if (r < 0 || !link) {
+ /* when enumerating we might be out of sync, but we will get the address again, so just
+ * ignore it */
+ if (!m->enumerating)
+ log_warning("rtnl: received address for link '%d' we don't know about, ignoring.", ifindex);
+ return 0;
+ }
+
+ r = address_new(&tmp);
+ if (r < 0)
+ return log_oom();
+
+ r = sd_rtnl_message_addr_get_family(message, &tmp->family);
+ if (r < 0) {
+ log_link_warning(link, "rtnl: received address message without family, ignoring.");
+ return 0;
+ } else if (!IN_SET(tmp->family, AF_INET, AF_INET6)) {
+ log_link_debug(link, "rtnl: received address message with invalid family '%i', ignoring.", tmp->family);
+ return 0;
+ }
+
+ r = sd_rtnl_message_addr_get_prefixlen(message, &tmp->prefixlen);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received address message without prefixlen, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_rtnl_message_addr_get_scope(message, &tmp->scope);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received address message without scope, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_rtnl_message_addr_get_flags(message, &flags);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received address message without flags, ignoring: %m");
+ return 0;
+ }
+ tmp->flags = flags;
+
+ switch (tmp->family) {
+ case AF_INET:
+ r = sd_netlink_message_read_in_addr(message, IFA_LOCAL, &tmp->in_addr.in);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received address message without valid address, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_in_addr(message, IFA_ADDRESS, &tmp->in_addr_peer.in);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: could not get peer address from address message, ignoring: %m");
+ return 0;
+ } else if (r >= 0) {
+ if (in4_addr_equal(&tmp->in_addr.in, &tmp->in_addr_peer.in))
+ tmp->in_addr_peer = IN_ADDR_NULL;
+ else
+ has_peer = true;
+ }
+
+ r = sd_netlink_message_read_in_addr(message, IFA_BROADCAST, &tmp->broadcast);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: could not get broadcast from address message, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_string_strdup(message, IFA_LABEL, &tmp->label);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: could not get label from address message, ignoring: %m");
+ return 0;
+ } else if (r >= 0 && streq_ptr(tmp->label, link->ifname))
+ tmp->label = mfree(tmp->label);
+
+ break;
+
+ case AF_INET6:
+ r = sd_netlink_message_read_in6_addr(message, IFA_LOCAL, &tmp->in_addr.in6);
+ if (r >= 0) {
+ /* Have peer address. */
+ r = sd_netlink_message_read_in6_addr(message, IFA_ADDRESS, &tmp->in_addr_peer.in6);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: could not get peer address from address message, ignoring: %m");
+ return 0;
+ }
+ has_peer = true;
+ } else if (r == -ENODATA) {
+ /* Does not have peer address. */
+ r = sd_netlink_message_read_in6_addr(message, IFA_ADDRESS, &tmp->in_addr.in6);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received address message without valid address, ignoring: %m");
+ return 0;
+ }
+ } else {
+ log_link_warning_errno(link, r, "rtnl: could not get local address from address message, ignoring: %m");
+ return 0;
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Received unsupported address family");
+ }
+
+ (void) in_addr_to_string(tmp->family, &tmp->in_addr, &buf);
+ (void) in_addr_to_string(tmp->family, &tmp->in_addr_peer, &buf_peer);
+
+ r = sd_netlink_message_read_cache_info(message, IFA_CACHEINFO, &tmp->cinfo);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: cannot get IFA_CACHEINFO attribute, ignoring: %m");
+ return 0;
+ } else if (r >= 0 && tmp->cinfo.ifa_valid != CACHE_INFO_INFINITY_LIFE_TIME)
+ valid_str = format_timespan(valid_buf, FORMAT_TIMESPAN_MAX,
+ tmp->cinfo.ifa_valid * USEC_PER_SEC,
+ USEC_PER_SEC);
+
+ (void) address_get(link, tmp, &address);
+
+ switch (type) {
+ case RTM_NEWADDR:
+ if (address)
+ log_link_debug(link, "Remembering updated address: %s%s%s/%u (valid %s%s)",
+ strnull(buf), has_peer ? " peer " : "",
+ has_peer ? strnull(buf_peer) : "", tmp->prefixlen,
+ valid_str ? "for " : "forever", strempty(valid_str));
+ else {
+ /* An address appeared that we did not request */
+ r = address_add_foreign(link, tmp, &address);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Failed to remember foreign address %s/%u, ignoring: %m",
+ strnull(buf), tmp->prefixlen);
+ return 0;
+ } else
+ log_link_debug(link, "Remembering foreign address: %s%s%s/%u (valid %s%s)",
+ strnull(buf), has_peer ? " peer " : "",
+ has_peer ? strnull(buf_peer) : "", tmp->prefixlen,
+ valid_str ? "for " : "forever", strempty(valid_str));
+ }
+
+ /* address_update() logs internally, so we don't need to here. */
+ r = address_update(address, tmp);
+ if (r < 0)
+ link_enter_failed(link);
+
+ break;
+
+ case RTM_DELADDR:
+ if (address) {
+ log_link_debug(link, "Forgetting address: %s%s%s/%u (valid %s%s)",
+ strnull(buf), has_peer ? " peer " : "",
+ has_peer ? strnull(buf_peer) : "", tmp->prefixlen,
+ valid_str ? "for " : "forever", strempty(valid_str));
+ (void) address_drop(address);
+ } else
+ log_link_debug(link, "Kernel removed an address we don't remember: %s%s%s/%u (valid %s%s), ignoring.",
+ strnull(buf), has_peer ? " peer " : "",
+ has_peer ? strnull(buf_peer) : "", tmp->prefixlen,
+ valid_str ? "for " : "forever", strempty(valid_str));
+
+ break;
+
+ default:
+ assert_not_reached("Received invalid RTNL message type");
+ }
+
+ return 1;
+}
+
+int link_serialize_addresses(Link *link, FILE *f) {
+ bool space = false;
+ Address *a;
+
+ assert(link);
+
+ fputs("ADDRESSES=", f);
+ SET_FOREACH(a, link->addresses) {
+ _cleanup_free_ char *address_str = NULL;
+
+ if (in_addr_to_string(a->family, &a->in_addr, &address_str) < 0)
+ continue;
+
+ fprintf(f, "%s%s/%u", space ? " " : "", address_str, a->prefixlen);
+ space = true;
+ }
+ fputc('\n', f);
+
+ return 0;
+}
+
+int link_deserialize_addresses(Link *link, const char *addresses) {
+ int r;
+
+ assert(link);
+
+ for (const char *p = addresses;; ) {
+ _cleanup_(address_freep) Address *tmp = NULL;
+ _cleanup_free_ char *address_str = NULL;
+
+ r = extract_first_word(&p, &address_str, NULL, 0);
+ if (r < 0)
+ return log_link_debug_errno(link, r, "Failed to parse ADDRESSES=: %m");
+ if (r == 0)
+ return 0;
+
+ r = address_new(&tmp);
+ if (r < 0)
+ return log_oom();
+
+ r = in_addr_prefix_from_string_auto(address_str, &tmp->family, &tmp->in_addr, &tmp->prefixlen);
+ if (r < 0) {
+ log_link_debug_errno(link, r, "Failed to parse address, ignoring: %s", address_str);
+ continue;
+ }
+
+ r = address_add(link, tmp, NULL);
+ if (r < 0)
+ log_link_debug_errno(link, r, "Failed to add address %s, ignoring: %m", address_str);
+ }
+
+ return 0;
+}
+
+static void static_address_on_acd(sd_ipv4acd *acd, int event, void *userdata) {
+ _cleanup_free_ char *pretty = NULL;
+ Address *address;
+ Link *link;
+ int r;
+
+ assert(acd);
+ assert(userdata);
+
+ address = (Address *) userdata;
+ link = address->link;
+
+ (void) in_addr_to_string(address->family, &address->in_addr, &pretty);
+ switch (event) {
+ case SD_IPV4ACD_EVENT_STOP:
+ log_link_debug(link, "Stopping ACD client...");
+ return;
+
+ case SD_IPV4ACD_EVENT_BIND:
+ log_link_debug(link, "Successfully claimed address %s", strna(pretty));
+ link_check_ready(link);
+ break;
+
+ case SD_IPV4ACD_EVENT_CONFLICT:
+ log_link_warning(link, "DAD conflict. Dropping address %s", strna(pretty));
+ r = address_remove(address, link, NULL);
+ if (r < 0)
+ log_link_error_errno(link, r, "Failed to drop DAD conflicted address %s", strna(pretty));;
+
+ link_check_ready(link);
+ break;
+
+ default:
+ assert_not_reached("Invalid IPv4ACD event.");
+ }
+
+ (void) sd_ipv4acd_stop(acd);
+
+ return;
+}
+
+static int ipv4_dad_configure(Address *address) {
+ int r;
+
+ assert(address);
+ assert(address->link);
+
+ if (address->family != AF_INET)
+ return 0;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *pretty = NULL;
+
+ (void) in_addr_to_string(address->family, &address->in_addr, &pretty);
+ log_link_debug(address->link, "Starting IPv4ACD client. Probing address %s", strna(pretty));
+ }
+
+ if (!address->acd) {
+ r = sd_ipv4acd_new(&address->acd);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4acd_attach_event(address->acd, address->link->manager->event, 0);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_ipv4acd_set_ifindex(address->acd, address->link->ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4acd_set_mac(address->acd, &address->link->hw_addr.addr.ether);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4acd_set_address(address->acd, &address->in_addr.in);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4acd_set_callback(address->acd, static_address_on_acd, address);
+ if (r < 0)
+ return r;
+
+ return sd_ipv4acd_start(address->acd, true);
+}
+
+static int ipv4_dad_update_mac_one(Address *address) {
+ bool running;
+ int r;
+
+ assert(address);
+
+ if (!address->acd)
+ return 0;
+
+ running = sd_ipv4acd_is_running(address->acd);
+
+ r = sd_ipv4acd_stop(address->acd);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4acd_set_mac(address->acd, &address->link->hw_addr.addr.ether);
+ if (r < 0)
+ return r;
+
+ if (running) {
+ r = sd_ipv4acd_start(address->acd, true);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int ipv4_dad_update_mac(Link *link) {
+ Address *address;
+ int k, r = 0;
+
+ assert(link);
+
+ SET_FOREACH(address, link->addresses) {
+ k = ipv4_dad_update_mac_one(address);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ return r;
+}
+
+int ipv4_dad_stop(Link *link) {
+ Address *address;
+ int k, r = 0;
+
+ assert(link);
+
+ SET_FOREACH(address, link->addresses) {
+ k = sd_ipv4acd_stop(address->acd);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ return r;
+}
+
+void ipv4_dad_unref(Link *link) {
+ Address *address;
+
+ assert(link);
+
+ SET_FOREACH(address, link->addresses)
+ address->acd = sd_ipv4acd_unref(address->acd);
+}
+
+int config_parse_broadcast(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(address_free_or_set_invalidp) Address *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = address_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate new address, ignoring assignment: %m");
+ return 0;
+ }
+
+ if (n->family == AF_INET6) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Broadcast is not valid for IPv6 addresses, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ r = in_addr_from_string(AF_INET, rvalue, (union in_addr_union*) &n->broadcast);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Broadcast is invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ n->family = AF_INET;
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_address(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(address_free_or_set_invalidp) Address *n = NULL;
+ union in_addr_union buffer;
+ unsigned char prefixlen;
+ int r, f;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(section, "Network"))
+ /* we are not in an Address section, so use line number instead. */
+ r = address_new_static(network, filename, line, &n);
+ else
+ r = address_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate new address, ignoring assignment: %m");
+ return 0;
+ }
+
+ /* Address=address/prefixlen */
+ r = in_addr_prefix_from_string_auto_internal(rvalue, PREFIXLEN_REFUSE, &f, &buffer, &prefixlen);
+ if (r == -ENOANO) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "An address '%s' is specified without prefix length. "
+ "The behavior of parsing addresses without prefix length will be changed in the future release. "
+ "Please specify prefix length explicitly.", rvalue);
+
+ r = in_addr_prefix_from_string_auto_internal(rvalue, PREFIXLEN_LEGACY, &f, &buffer, &prefixlen);
+ }
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid address '%s', ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ if (n->family != AF_UNSPEC && f != n->family) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Address is incompatible, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ if (in_addr_is_null(f, &buffer)) {
+ /* Will use address from address pool. Note that for ipv6 case, prefix of the address
+ * pool is 8, but 40 bit is used by the global ID and 16 bit by the subnet ID. So,
+ * let's limit the prefix length to 64 or larger. See RFC4193. */
+ if ((f == AF_INET && prefixlen < 8) ||
+ (f == AF_INET6 && prefixlen < 64)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Null address with invalid prefixlen='%u', ignoring assignment: %s",
+ prefixlen, rvalue);
+ return 0;
+ }
+ }
+
+ n->family = f;
+ n->prefixlen = prefixlen;
+
+ if (streq(lvalue, "Address"))
+ n->in_addr = buffer;
+ else
+ n->in_addr_peer = buffer;
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_label(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(address_free_or_set_invalidp) Address *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = address_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate new address, ignoring assignment: %m");
+ return 0;
+ }
+
+ if (!address_label_valid(rvalue)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Interface label is too long or invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ r = free_and_strdup(&n->label, rvalue);
+ if (r < 0)
+ return log_oom();
+
+ n = NULL;
+ return 0;
+}
+
+int config_parse_lifetime(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(address_free_or_set_invalidp) Address *n = NULL;
+ uint32_t k;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = address_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate new address, ignoring assignment: %m");
+ return 0;
+ }
+
+ /* We accept only "forever", "infinity", empty, or "0". */
+ if (STR_IN_SET(rvalue, "forever", "infinity", ""))
+ k = CACHE_INFO_INFINITY_LIFE_TIME;
+ else if (streq(rvalue, "0"))
+ k = 0;
+ else {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid PreferredLifetime= value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ n->cinfo.ifa_prefered = k;
+ TAKE_PTR(n);
+
+ return 0;
+}
+
+int config_parse_address_flags(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(address_free_or_set_invalidp) Address *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = address_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate new address, ignoring assignment: %m");
+ return 0;
+ }
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse %s=, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "AddPrefixRoute"))
+ r = !r;
+
+ SET_FLAG(n->flags, ltype, r);
+
+ n = NULL;
+ return 0;
+}
+
+int config_parse_address_scope(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(address_free_or_set_invalidp) Address *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = address_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate new address, ignoring assignment: %m");
+ return 0;
+ }
+
+ if (streq(rvalue, "host"))
+ n->scope = RT_SCOPE_HOST;
+ else if (streq(rvalue, "link"))
+ n->scope = RT_SCOPE_LINK;
+ else if (streq(rvalue, "global"))
+ n->scope = RT_SCOPE_UNIVERSE;
+ else {
+ r = safe_atou8(rvalue , &n->scope);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Could not parse address scope \"%s\", ignoring assignment: %m", rvalue);
+ return 0;
+ }
+ }
+
+ n->scope_set = true;
+ n = NULL;
+ return 0;
+}
+
+int config_parse_duplicate_address_detection(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(address_free_or_set_invalidp) Address *n = NULL;
+ AddressFamily a;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = address_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate new address, ignoring assignment: %m");
+ return 0;
+ }
+
+ r = parse_boolean(rvalue);
+ if (r >= 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "For historical reasons, %s=%s means %s=%s. "
+ "Please use 'both', 'ipv4', 'ipv6' or 'none' instead.",
+ lvalue, rvalue, lvalue, r ? "none" : "both");
+ n->duplicate_address_detection = r ? ADDRESS_FAMILY_NO : ADDRESS_FAMILY_YES;
+ n = NULL;
+ return 0;
+ }
+
+ a = duplicate_address_detection_address_family_from_string(rvalue);
+ if (a < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse %s=, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ n->duplicate_address_detection = a;
+ n = NULL;
+ return 0;
+}
+
+bool address_is_ready(const Address *a) {
+ assert(a);
+
+ return !(a->flags & IFA_F_TENTATIVE);
+}
+
+static int address_section_verify(Address *address) {
+ if (section_is_invalid(address->section))
+ return -EINVAL;
+
+ if (address->family == AF_UNSPEC) {
+ assert(address->section);
+
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: Address section without Address= field configured. "
+ "Ignoring [Address] section from line %u.",
+ address->section->filename, address->section->line);
+ }
+
+ if (address_may_have_broadcast(address)) {
+ if (address->broadcast.s_addr == 0)
+ address->broadcast.s_addr = address->in_addr.in.s_addr | htobe32(0xfffffffflu >> address->prefixlen);
+ } else if (address->broadcast.s_addr != 0) {
+ log_warning("%s: broadcast address is set for IPv6 address or IPv4 address with prefixlength larger than 30. "
+ "Ignoring Broadcast= setting in the [Address] section from line %u.",
+ address->section->filename, address->section->line);
+
+ address->broadcast.s_addr = 0;
+ }
+
+ if (address->family == AF_INET6 && address->label) {
+ log_warning("%s: address label is set for IPv6 address in the [Address] section from line %u. "
+ "Ignoring Label= setting.",
+ address->section->filename, address->section->line);
+
+ address->label = mfree(address->label);
+ }
+
+ if (in_addr_is_localhost(address->family, &address->in_addr) > 0 &&
+ (address->family == AF_INET || !address->scope_set)) {
+ /* For IPv4, scope must be always RT_SCOPE_HOST.
+ * For IPv6, use RT_SCOPE_HOST only when it is not explicitly specified. */
+
+ if (address->scope_set && address->scope != RT_SCOPE_HOST)
+ log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: non-host scope is set in the [Address] section from line %u. "
+ "Ignoring Scope= setting.",
+ address->section->filename, address->section->line);
+
+ address->scope = RT_SCOPE_HOST;
+ }
+
+ if (!FLAGS_SET(address->duplicate_address_detection, ADDRESS_FAMILY_IPV6))
+ address->flags |= IFA_F_NODAD;
+
+ return 0;
+}
+
+void network_drop_invalid_addresses(Network *network) {
+ Address *address;
+
+ assert(network);
+
+ ORDERED_HASHMAP_FOREACH(address, network->addresses_by_section)
+ if (address_section_verify(address) < 0)
+ address_free(address);
+}
diff --git a/src/network/networkd-address.h b/src/network/networkd-address.h
new file mode 100644
index 0000000..56e81da
--- /dev/null
+++ b/src/network/networkd-address.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "sd-ipv4acd.h"
+
+#include "conf-parser.h"
+#include "in-addr-util.h"
+#include "networkd-link.h"
+#include "networkd-util.h"
+
+#define CACHE_INFO_INFINITY_LIFE_TIME 0xFFFFFFFFU
+
+typedef struct Manager Manager;
+typedef struct Network Network;
+typedef int (*address_ready_callback_t)(Address *address);
+
+typedef struct Address {
+ Network *network;
+ NetworkConfigSection *section;
+
+ Link *link;
+
+ int family;
+ unsigned char prefixlen;
+ unsigned char scope;
+ uint32_t flags;
+ char *label;
+
+ struct in_addr broadcast;
+ struct ifa_cacheinfo cinfo;
+
+ union in_addr_union in_addr;
+ union in_addr_union in_addr_peer;
+
+ bool scope_set:1;
+ bool ip_masquerade_done:1;
+ AddressFamily duplicate_address_detection;
+
+ /* Called when address become ready */
+ address_ready_callback_t callback;
+
+ sd_ipv4acd *acd;
+} Address;
+
+int address_new(Address **ret);
+Address *address_free(Address *address);
+int address_get(Link *link, const Address *in, Address **ret);
+bool address_exists(Link *link, int family, const union in_addr_union *in_addr);
+int address_configure(const Address *address, Link *link, link_netlink_message_handler_t callback, bool update, Address **ret);
+int address_remove(const Address *address, Link *link, link_netlink_message_handler_t callback);
+bool address_equal(const Address *a1, const Address *a2);
+bool address_is_ready(const Address *a);
+
+int generate_ipv6_eui_64_address(const Link *link, struct in6_addr *ret);
+
+DEFINE_NETWORK_SECTION_FUNCTIONS(Address, address_free);
+
+int link_set_addresses(Link *link);
+int link_drop_addresses(Link *link);
+int link_drop_foreign_addresses(Link *link);
+int link_serialize_addresses(Link *link, FILE *f);
+int link_deserialize_addresses(Link *link, const char *addresses);
+
+void ipv4_dad_unref(Link *link);
+int ipv4_dad_stop(Link *link);
+int ipv4_dad_update_mac(Link *link);
+
+int manager_rtnl_process_address(sd_netlink *nl, sd_netlink_message *message, Manager *m);
+
+void network_drop_invalid_addresses(Network *network);
+
+void address_hash_func(const Address *a, struct siphash *state);
+int address_compare_func(const Address *a1, const Address *a2);
+extern const struct hash_ops address_hash_ops;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_address);
+CONFIG_PARSER_PROTOTYPE(config_parse_broadcast);
+CONFIG_PARSER_PROTOTYPE(config_parse_label);
+CONFIG_PARSER_PROTOTYPE(config_parse_lifetime);
+CONFIG_PARSER_PROTOTYPE(config_parse_address_flags);
+CONFIG_PARSER_PROTOTYPE(config_parse_address_scope);
+CONFIG_PARSER_PROTOTYPE(config_parse_duplicate_address_detection);
+
+#define IPV4_ADDRESS_FMT_STR "%u.%u.%u.%u"
+#define IPV4_ADDRESS_FMT_VAL(address) \
+ be32toh((address).s_addr) >> 24, \
+ (be32toh((address).s_addr) >> 16) & 0xFFu, \
+ (be32toh((address).s_addr) >> 8) & 0xFFu, \
+ be32toh((address).s_addr) & 0xFFu
diff --git a/src/network/networkd-brvlan.c b/src/network/networkd-brvlan.c
new file mode 100644
index 0000000..e53c73c
--- /dev/null
+++ b/src/network/networkd-brvlan.c
@@ -0,0 +1,283 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2016 BISDN GmbH. All rights reserved.
+***/
+
+#include <netinet/in.h>
+#include <linux/if_bridge.h>
+#include <stdbool.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "netlink-util.h"
+#include "networkd-brvlan.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "networkd-network.h"
+#include "parse-util.h"
+#include "vlan-util.h"
+
+static bool is_bit_set(unsigned bit, uint32_t scope) {
+ assert(bit < sizeof(scope)*8);
+ return scope & (UINT32_C(1) << bit);
+}
+
+static void set_bit(unsigned nr, uint32_t *addr) {
+ if (nr < BRIDGE_VLAN_BITMAP_MAX)
+ addr[nr / 32] |= (UINT32_C(1) << (nr % 32));
+}
+
+static int find_next_bit(int i, uint32_t x) {
+ int j;
+
+ if (i >= 32)
+ return -1;
+
+ /* find first bit */
+ if (i < 0)
+ return BUILTIN_FFS_U32(x);
+
+ /* mask off prior finds to get next */
+ j = __builtin_ffs(x >> i);
+ return j ? j + i : 0;
+}
+
+static int append_vlan_info_data(Link *const link, sd_netlink_message *req, uint16_t pvid, const uint32_t *br_vid_bitmap, const uint32_t *br_untagged_bitmap) {
+ struct bridge_vlan_info br_vlan;
+ int i, j, k, r, cnt;
+ uint16_t begin, end;
+ bool done, untagged = false;
+
+ assert(link);
+ assert(req);
+ assert(br_vid_bitmap);
+ assert(br_untagged_bitmap);
+
+ cnt = 0;
+
+ begin = end = UINT16_MAX;
+ for (k = 0; k < BRIDGE_VLAN_BITMAP_LEN; k++) {
+ unsigned base_bit;
+ uint32_t vid_map = br_vid_bitmap[k];
+ uint32_t untagged_map = br_untagged_bitmap[k];
+
+ base_bit = k * 32;
+ i = -1;
+ done = false;
+ do {
+ j = find_next_bit(i, vid_map);
+ if (j > 0) {
+ /* first hit of any bit */
+ if (begin == UINT16_MAX && end == UINT16_MAX) {
+ begin = end = j - 1 + base_bit;
+ untagged = is_bit_set(j - 1, untagged_map);
+ goto next;
+ }
+
+ /* this bit is a continuation of prior bits */
+ if (j - 2 + base_bit == end && untagged == is_bit_set(j - 1, untagged_map) && (uint16_t)j - 1 + base_bit != pvid && (uint16_t)begin != pvid) {
+ end++;
+ goto next;
+ }
+ } else
+ done = true;
+
+ if (begin != UINT16_MAX) {
+ cnt++;
+ if (done && k < BRIDGE_VLAN_BITMAP_LEN - 1)
+ break;
+
+ br_vlan.flags = 0;
+ if (untagged)
+ br_vlan.flags |= BRIDGE_VLAN_INFO_UNTAGGED;
+
+ if (begin == end) {
+ br_vlan.vid = begin;
+
+ if (begin == pvid)
+ br_vlan.flags |= BRIDGE_VLAN_INFO_PVID;
+
+ r = sd_netlink_message_append_data(req, IFLA_BRIDGE_VLAN_INFO, &br_vlan, sizeof(br_vlan));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRIDGE_VLAN_INFO attribute: %m");
+ } else {
+ br_vlan.vid = begin;
+ br_vlan.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
+
+ r = sd_netlink_message_append_data(req, IFLA_BRIDGE_VLAN_INFO, &br_vlan, sizeof(br_vlan));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRIDGE_VLAN_INFO attribute: %m");
+
+ br_vlan.vid = end;
+ br_vlan.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
+ br_vlan.flags |= BRIDGE_VLAN_INFO_RANGE_END;
+
+ r = sd_netlink_message_append_data(req, IFLA_BRIDGE_VLAN_INFO, &br_vlan, sizeof(br_vlan));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_BRIDGE_VLAN_INFO attribute: %m");
+ }
+
+ if (done)
+ break;
+ }
+ if (j > 0) {
+ begin = end = j - 1 + base_bit;
+ untagged = is_bit_set(j - 1, untagged_map);
+ }
+
+ next:
+ i = j;
+ } while (!done);
+ }
+
+ assert(cnt > 0);
+ return cnt;
+}
+
+static int set_brvlan_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST)
+ log_link_message_warning_errno(link, m, r, "Could not add VLAN to bridge port");
+
+ return 1;
+}
+
+int link_set_bridge_vlan(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->network);
+
+ if (!link->network->use_br_vlan)
+ return 0;
+
+ if (!link->network->bridge && !streq_ptr(link->kind, "bridge"))
+ return 0;
+
+ /* pvid might not be in br_vid_bitmap yet */
+ if (link->network->pvid)
+ set_bit(link->network->pvid, link->network->br_vid_bitmap);
+
+ /* create new RTM message */
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_rtnl_message_link_set_family(req, AF_BRIDGE);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set message family: %m");
+
+ r = sd_netlink_message_open_container(req, IFLA_AF_SPEC);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open IFLA_AF_SPEC container: %m");
+
+ /* master needs flag self */
+ if (!link->network->bridge) {
+ uint16_t flags = BRIDGE_FLAGS_SELF;
+ r = sd_netlink_message_append_data(req, IFLA_BRIDGE_FLAGS, &flags, sizeof(flags));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open IFLA_BRIDGE_FLAGS: %m");
+ }
+
+ /* add vlan info */
+ r = append_vlan_info_data(link, req, link->network->pvid, link->network->br_vid_bitmap, link->network->br_untagged_bitmap);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append VLANs: %m");
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close IFLA_AF_SPEC container: %m");
+
+ /* send message to the kernel */
+ r = netlink_call_async(link->manager->rtnl, NULL, req, set_brvlan_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+int config_parse_brvlan_pvid(const char *unit, const char *filename,
+ unsigned line, const char *section,
+ unsigned section_line, const char *lvalue,
+ int ltype, const char *rvalue, void *data,
+ void *userdata) {
+ Network *network = userdata;
+ uint16_t pvid;
+ int r;
+
+ r = parse_vlanid(rvalue, &pvid);
+ if (r < 0)
+ return r;
+
+ network->pvid = pvid;
+ network->use_br_vlan = true;
+
+ return 0;
+}
+
+int config_parse_brvlan_vlan(const char *unit, const char *filename,
+ unsigned line, const char *section,
+ unsigned section_line, const char *lvalue,
+ int ltype, const char *rvalue, void *data,
+ void *userdata) {
+ Network *network = userdata;
+ uint16_t vid, vid_end;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_vid_range(rvalue, &vid, &vid_end);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse VLAN, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ for (; vid <= vid_end; vid++)
+ set_bit(vid, network->br_vid_bitmap);
+
+ network->use_br_vlan = true;
+ return 0;
+}
+
+int config_parse_brvlan_untagged(const char *unit, const char *filename,
+ unsigned line, const char *section,
+ unsigned section_line, const char *lvalue,
+ int ltype, const char *rvalue, void *data,
+ void *userdata) {
+ Network *network = userdata;
+ int r;
+ uint16_t vid, vid_end;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_vid_range(rvalue, &vid, &vid_end);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Could not parse VLAN: %s", rvalue);
+ return 0;
+ }
+
+ for (; vid <= vid_end; vid++) {
+ set_bit(vid, network->br_vid_bitmap);
+ set_bit(vid, network->br_untagged_bitmap);
+ }
+
+ network->use_br_vlan = true;
+ return 0;
+}
diff --git a/src/network/networkd-brvlan.h b/src/network/networkd-brvlan.h
new file mode 100644
index 0000000..938b790
--- /dev/null
+++ b/src/network/networkd-brvlan.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2016 BISDN GmbH. All rights reserved.
+***/
+
+#include "conf-parser.h"
+
+#define BRIDGE_VLAN_BITMAP_MAX 4096
+#define BRIDGE_VLAN_BITMAP_LEN (BRIDGE_VLAN_BITMAP_MAX / 32)
+
+typedef struct Link Link;
+
+int link_set_bridge_vlan(Link *link);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_brvlan_pvid);
+CONFIG_PARSER_PROTOTYPE(config_parse_brvlan_vlan);
+CONFIG_PARSER_PROTOTYPE(config_parse_brvlan_untagged);
diff --git a/src/network/networkd-can.c b/src/network/networkd-can.c
new file mode 100644
index 0000000..7e31d2f
--- /dev/null
+++ b/src/network/networkd-can.c
@@ -0,0 +1,315 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+#include <linux/can/netlink.h>
+
+#include "netlink-util.h"
+#include "networkd-can.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+#define CAN_TERMINATION_OHM_VALUE 120
+
+int config_parse_can_bitrate(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint32_t *br = data;
+ uint64_t sz;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_size(rvalue, 1000, &sz);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse can bitrate '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ /* Linux uses __u32 for bitrates, so the value should not exceed that. */
+ if (sz <= 0 || sz > UINT32_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Bit rate out of permitted range 1...4294967295");
+ return 0;
+ }
+
+ *br = (uint32_t) sz;
+
+ return 0;
+}
+
+static int link_up_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ /* we warn but don't fail the link, as it may be brought up later */
+ log_link_message_warning_errno(link, m, r, "Could not bring up interface");
+
+ return 1;
+}
+
+static int link_up_can(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+
+ log_link_debug(link, "Bringing CAN link up");
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_rtnl_message_link_set_flags(req, IFF_UP, IFF_UP);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set link flags: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, link_up_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static int link_set_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ log_link_debug(link, "Set link");
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_message_warning_errno(link, m, r, "Failed to configure CAN link");
+ link_enter_failed(link);
+ }
+
+ return 1;
+}
+
+static int link_set_can(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ struct can_ctrlmode cm = {};
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ log_link_debug(link, "Configuring CAN link.");
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &m, RTM_NEWLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to allocate netlink message: %m");
+
+ r = sd_netlink_message_set_flags(m, NLM_F_REQUEST | NLM_F_ACK);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set netlink flags: %m");
+
+ r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, link->kind);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_INFO_DATA attribute: %m");
+
+ if (link->network->can_bitrate > 0 || link->network->can_sample_point > 0) {
+ struct can_bittiming bt = {
+ .bitrate = link->network->can_bitrate,
+ .sample_point = link->network->can_sample_point,
+ };
+
+ log_link_debug(link, "Setting bitrate = %d bit/s", bt.bitrate);
+ if (link->network->can_sample_point > 0)
+ log_link_debug(link, "Setting sample point = %d.%d%%", bt.sample_point / 10, bt.sample_point % 10);
+ else
+ log_link_debug(link, "Using default sample point");
+
+ r = sd_netlink_message_append_data(m, IFLA_CAN_BITTIMING, &bt, sizeof(bt));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_CAN_BITTIMING attribute: %m");
+ }
+
+ if (link->network->can_data_bitrate > 0 || link->network->can_data_sample_point > 0) {
+ struct can_bittiming bt = {
+ .bitrate = link->network->can_data_bitrate,
+ .sample_point = link->network->can_data_sample_point,
+ };
+
+ log_link_debug(link, "Setting data bitrate = %d bit/s", bt.bitrate);
+ if (link->network->can_data_sample_point > 0)
+ log_link_debug(link, "Setting data sample point = %d.%d%%", bt.sample_point / 10, bt.sample_point % 10);
+ else
+ log_link_debug(link, "Using default data sample point");
+
+ r = sd_netlink_message_append_data(m, IFLA_CAN_DATA_BITTIMING, &bt, sizeof(bt));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_CAN_DATA_BITTIMING attribute: %m");
+ }
+
+ if (link->network->can_fd_mode >= 0) {
+ cm.mask |= CAN_CTRLMODE_FD;
+ SET_FLAG(cm.flags, CAN_CTRLMODE_FD, link->network->can_fd_mode > 0);
+ log_link_debug(link, "%sabling FD mode", link->network->can_fd_mode > 0 ? "En" : "Dis");
+ }
+
+ if (link->network->can_non_iso >= 0) {
+ cm.mask |= CAN_CTRLMODE_FD_NON_ISO;
+ SET_FLAG(cm.flags, CAN_CTRLMODE_FD_NON_ISO, link->network->can_non_iso > 0);
+ log_link_debug(link, "%sabling FD non-ISO mode", link->network->can_non_iso > 0 ? "En" : "Dis");
+ }
+
+ if (link->network->can_restart_us > 0) {
+ char time_string[FORMAT_TIMESPAN_MAX];
+ uint64_t restart_ms;
+
+ if (link->network->can_restart_us == USEC_INFINITY)
+ restart_ms = 0;
+ else
+ restart_ms = DIV_ROUND_UP(link->network->can_restart_us, USEC_PER_MSEC);
+
+ format_timespan(time_string, FORMAT_TIMESPAN_MAX, restart_ms * 1000, MSEC_PER_SEC);
+
+ if (restart_ms > UINT32_MAX) {
+ log_link_error(link, "restart timeout (%s) too big.", time_string);
+ return -ERANGE;
+ }
+
+ log_link_debug(link, "Setting restart = %s", time_string);
+
+ r = sd_netlink_message_append_u32(m, IFLA_CAN_RESTART_MS, restart_ms);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_CAN_RESTART_MS attribute: %m");
+ }
+
+ if (link->network->can_triple_sampling >= 0) {
+ cm.mask |= CAN_CTRLMODE_3_SAMPLES;
+ SET_FLAG(cm.flags, CAN_CTRLMODE_3_SAMPLES, link->network->can_triple_sampling);
+ log_link_debug(link, "%sabling triple-sampling", link->network->can_triple_sampling ? "En" : "Dis");
+ }
+
+ if (link->network->can_listen_only >= 0) {
+ cm.mask |= CAN_CTRLMODE_LISTENONLY;
+ SET_FLAG(cm.flags, CAN_CTRLMODE_LISTENONLY, link->network->can_listen_only);
+ log_link_debug(link, "%sabling listen-only mode", link->network->can_listen_only ? "En" : "Dis");
+ }
+
+ if (cm.mask != 0) {
+ r = sd_netlink_message_append_data(m, IFLA_CAN_CTRLMODE, &cm, sizeof(cm));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_CAN_CTRLMODE attribute: %m");
+ }
+
+ if (link->network->can_termination >= 0) {
+
+ log_link_debug(link, "%sabling can-termination", link->network->can_termination ? "En" : "Dis");
+
+ r = sd_netlink_message_append_u16(m, IFLA_CAN_TERMINATION,
+ link->network->can_termination ? CAN_TERMINATION_OHM_VALUE : 0);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_CAN_TERMINATION attribute: %m");
+
+ }
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to close netlink container: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, m, link_set_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ if (!(link->flags & IFF_UP))
+ return link_up_can(link);
+
+ return 0;
+}
+
+static int link_down_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0) {
+ log_link_message_warning_errno(link, m, r, "Could not bring down interface");
+ link_enter_failed(link);
+ return 1;
+ }
+
+ r = link_set_can(link);
+ if (r < 0)
+ link_enter_failed(link);
+
+ return 1;
+}
+
+int link_configure_can(Link *link) {
+ int r;
+
+ link_set_state(link, LINK_STATE_CONFIGURING);
+
+ if (streq_ptr(link->kind, "can")) {
+ /* The CAN interface must be down to configure bitrate, etc... */
+ if ((link->flags & IFF_UP)) {
+ r = link_down(link, link_down_handler);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+ } else {
+ r = link_set_can(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+ }
+
+ return 0;
+ }
+
+ if (!(link->flags & IFF_UP)) {
+ r = link_up_can(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+ }
+
+ return 0;
+}
diff --git a/src/network/networkd-can.h b/src/network/networkd-can.h
new file mode 100644
index 0000000..7a2705b
--- /dev/null
+++ b/src/network/networkd-can.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "conf-parser.h"
+
+typedef struct Link Link;
+
+int link_configure_can(Link *link);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_can_bitrate);
diff --git a/src/network/networkd-conf.c b/src/network/networkd-conf.c
new file mode 100644
index 0000000..bf51624
--- /dev/null
+++ b/src/network/networkd-conf.c
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2014 Vinay Kulkarni <kulkarniv@vmware.com>
+ ***/
+
+#include <ctype.h>
+#include <netinet/ip.h>
+
+#include "conf-parser.h"
+#include "def.h"
+#include "dhcp-identifier.h"
+#include "extract-word.h"
+#include "hexdecoct.h"
+#include "networkd-conf.h"
+#include "networkd-manager.h"
+#include "networkd-network.h"
+#include "networkd-speed-meter.h"
+#include "networkd-dhcp4.h"
+#include "string-table.h"
+
+int manager_parse_config_file(Manager *m) {
+ int r;
+
+ assert(m);
+
+ r = config_parse_many_nulstr(
+ PKGSYSCONFDIR "/networkd.conf",
+ CONF_PATHS_NULSTR("systemd/networkd.conf.d"),
+ "Network\0"
+ "DHCP\0",
+ config_item_perf_lookup, networkd_gperf_lookup,
+ CONFIG_PARSE_WARN,
+ m,
+ NULL);
+ if (r < 0)
+ return r;
+
+ if (m->use_speed_meter && m->speed_meter_interval_usec < SPEED_METER_MINIMUM_TIME_INTERVAL) {
+ char buf[FORMAT_TIMESPAN_MAX];
+
+ log_warning("SpeedMeterIntervalSec= is too small, using %s.",
+ format_timespan(buf, sizeof buf, SPEED_METER_MINIMUM_TIME_INTERVAL, USEC_PER_SEC));
+ m->speed_meter_interval_usec = SPEED_METER_MINIMUM_TIME_INTERVAL;
+ }
+
+ return 0;
+}
+
+static const char* const duid_type_table[_DUID_TYPE_MAX] = {
+ [DUID_TYPE_LLT] = "link-layer-time",
+ [DUID_TYPE_EN] = "vendor",
+ [DUID_TYPE_LL] = "link-layer",
+ [DUID_TYPE_UUID] = "uuid",
+};
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(duid_type, DUIDType);
+
+int config_parse_duid_type(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *type_string = NULL;
+ const char *p = rvalue;
+ DUID *duid = data;
+ DUIDType type;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(duid);
+
+ r = extract_first_word(&p, &type_string, ":", 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to extract DUID type from '%s', ignoring.", rvalue);
+ return 0;
+ }
+
+ type = duid_type_from_string(type_string);
+ if (type < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to parse DUID type '%s', ignoring.", type_string);
+ return 0;
+ }
+
+ if (!isempty(p)) {
+ usec_t u;
+
+ if (type != DUID_TYPE_LLT) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = parse_timestamp(p, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse timestamp, ignoring: %s", p);
+ return 0;
+ }
+
+ duid->llt_time = u;
+ }
+
+ duid->type = type;
+
+ return 0;
+}
+
+int config_parse_duid_rawdata(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ DUID *ret = data;
+ uint8_t raw_data[MAX_DUID_LEN];
+ unsigned count = 0;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(ret);
+
+ /* RawData contains DUID in format "NN:NN:NN..." */
+ for (const char *p = rvalue;;) {
+ int n1, n2, len, r;
+ uint32_t byte;
+ _cleanup_free_ char *cbyte = NULL;
+
+ r = extract_first_word(&p, &cbyte, ":", 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to read DUID, ignoring assignment: %s.", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ break;
+
+ if (count >= MAX_DUID_LEN) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Max DUID length exceeded, ignoring assignment: %s.", rvalue);
+ return 0;
+ }
+
+ len = strlen(cbyte);
+ if (!IN_SET(len, 1, 2)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid length - DUID byte: %s, ignoring assignment: %s.", cbyte, rvalue);
+ return 0;
+ }
+ n1 = unhexchar(cbyte[0]);
+ if (len == 2)
+ n2 = unhexchar(cbyte[1]);
+ else
+ n2 = 0;
+
+ if (n1 < 0 || n2 < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid DUID byte: %s. Ignoring assignment: %s.", cbyte, rvalue);
+ return 0;
+ }
+
+ byte = ((uint8_t) n1 << (4 * (len-1))) | (uint8_t) n2;
+ raw_data[count++] = byte;
+ }
+
+ assert_cc(sizeof(raw_data) == sizeof(ret->raw_data));
+ memcpy(ret->raw_data, raw_data, count);
+ ret->raw_data_len = count;
+ return 0;
+}
diff --git a/src/network/networkd-conf.h b/src/network/networkd-conf.h
new file mode 100644
index 0000000..b485e9e
--- /dev/null
+++ b/src/network/networkd-conf.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2014 Vinay Kulkarni <kulkarniv@vmware.com>
+***/
+
+#include "conf-parser.h"
+
+typedef struct Manager Manager;
+
+int manager_parse_config_file(Manager *m);
+
+const struct ConfigPerfItem* networkd_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_duid_type);
+CONFIG_PARSER_PROTOTYPE(config_parse_duid_rawdata);
diff --git a/src/network/networkd-dhcp-common.c b/src/network/networkd-dhcp-common.c
new file mode 100644
index 0000000..9f58121
--- /dev/null
+++ b/src/network/networkd-dhcp-common.c
@@ -0,0 +1,935 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <linux/if_arp.h>
+
+#include "dhcp-internal.h"
+#include "dhcp6-internal.h"
+#include "escape.h"
+#include "in-addr-util.h"
+#include "networkd-dhcp-common.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "networkd-network.h"
+#include "parse-util.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "strv.h"
+
+bool link_dhcp_enabled(Link *link, int family) {
+ assert(link);
+ assert(IN_SET(family, AF_INET, AF_INET6));
+
+ if (family == AF_INET6 && !socket_ipv6_is_supported())
+ return false;
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (link->iftype == ARPHRD_CAN)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ return link->network->dhcp & (family == AF_INET ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6);
+}
+
+void network_adjust_dhcp(Network *network) {
+ assert(network);
+ assert(network->dhcp >= 0);
+
+ if (network->dhcp == ADDRESS_FAMILY_NO)
+ return;
+
+ /* Bonding slave does not support addressing. */
+ if (network->bond) {
+ log_warning("%s: Cannot enable DHCP= when Bond= is specified, disabling DHCP=.",
+ network->filename);
+ network->dhcp = ADDRESS_FAMILY_NO;
+ return;
+ }
+
+ if (!FLAGS_SET(network->link_local, ADDRESS_FAMILY_IPV6) &&
+ FLAGS_SET(network->dhcp, ADDRESS_FAMILY_IPV6)) {
+ log_warning("%s: DHCPv6 client is enabled but IPv6 link local addressing is disabled. "
+ "Disabling DHCPv6 client.", network->filename);
+ SET_FLAG(network->dhcp, ADDRESS_FAMILY_IPV6, false);
+ }
+}
+
+static struct DUID fallback_duid = { .type = DUID_TYPE_EN };
+DUID* link_get_duid(Link *link) {
+ if (link->network->duid.type != _DUID_TYPE_INVALID)
+ return &link->network->duid;
+ else if (link->hw_addr.length == 0 &&
+ (link->manager->duid.type == DUID_TYPE_LLT ||
+ link->manager->duid.type == DUID_TYPE_LL))
+ /* Fallback to DUID that works without mac addresses.
+ * This is useful for tunnel devices without mac address. */
+ return &fallback_duid;
+ else
+ return &link->manager->duid;
+}
+
+static int duid_set_uuid(DUID *duid, sd_id128_t uuid) {
+ assert(duid);
+
+ if (duid->raw_data_len > 0)
+ return 0;
+
+ if (duid->type != DUID_TYPE_UUID)
+ return -EINVAL;
+
+ memcpy(&duid->raw_data, &uuid, sizeof(sd_id128_t));
+ duid->raw_data_len = sizeof(sd_id128_t);
+
+ return 1;
+}
+
+static int get_product_uuid_handler(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ Manager *manager = userdata;
+ const sd_bus_error *e;
+ const void *a;
+ size_t sz;
+ DUID *duid;
+ Link *link;
+ int r;
+
+ assert(m);
+ assert(manager);
+
+ e = sd_bus_message_get_error(m);
+ if (e) {
+ log_error_errno(sd_bus_error_get_errno(e),
+ "Could not get product UUID. Falling back to use machine-app-specific ID as DUID-UUID: %s",
+ e->message);
+ goto configure;
+ }
+
+ r = sd_bus_message_read_array(m, 'y', &a, &sz);
+ if (r < 0)
+ goto configure;
+
+ if (sz != sizeof(sd_id128_t)) {
+ log_error("Invalid product UUID. Falling back to use machine-app-specific ID as DUID-UUID.");
+ goto configure;
+ }
+
+ memcpy(&manager->product_uuid, a, sz);
+ while ((duid = set_steal_first(manager->duids_requesting_uuid)))
+ (void) duid_set_uuid(duid, manager->product_uuid);
+
+ manager->duids_requesting_uuid = set_free(manager->duids_requesting_uuid);
+
+configure:
+ while ((link = set_steal_first(manager->links_requesting_uuid))) {
+ link_unref(link);
+
+ r = link_configure(link);
+ if (r < 0)
+ link_enter_failed(link);
+ }
+
+ manager->links_requesting_uuid = set_free(manager->links_requesting_uuid);
+
+ /* To avoid calling GetProductUUID() bus method so frequently, set the flag below
+ * even if the method fails. */
+ manager->has_product_uuid = true;
+
+ return 1;
+}
+
+int manager_request_product_uuid(Manager *m, Link *link) {
+ int r;
+
+ assert(m);
+
+ if (m->has_product_uuid)
+ return 0;
+
+ log_debug("Requesting product UUID");
+
+ if (link) {
+ DUID *duid;
+
+ assert_se(duid = link_get_duid(link));
+
+ r = set_ensure_put(&m->links_requesting_uuid, NULL, link);
+ if (r < 0)
+ return log_oom();
+ if (r > 0)
+ link_ref(link);
+
+ r = set_ensure_put(&m->duids_requesting_uuid, NULL, duid);
+ if (r < 0)
+ return log_oom();
+ }
+
+ if (!m->bus || sd_bus_is_ready(m->bus) <= 0) {
+ log_debug("Not connected to system bus, requesting product UUID later.");
+ return 0;
+ }
+
+ r = sd_bus_call_method_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.hostname1",
+ "/org/freedesktop/hostname1",
+ "org.freedesktop.hostname1",
+ "GetProductUUID",
+ get_product_uuid_handler,
+ m,
+ "b",
+ false);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to get product UUID: %m");
+
+ return 0;
+}
+
+static bool link_requires_uuid(Link *link) {
+ const DUID *duid;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->network);
+
+ duid = link_get_duid(link);
+ if (duid->type != DUID_TYPE_UUID || duid->raw_data_len != 0)
+ return false;
+
+ if (link_dhcp4_enabled(link) && IN_SET(link->network->dhcp_client_identifier, DHCP_CLIENT_ID_DUID, DHCP_CLIENT_ID_DUID_ONLY))
+ return true;
+
+ if (link_dhcp6_enabled(link) || link_ipv6_accept_ra_enabled(link))
+ return true;
+
+ return false;
+}
+
+int link_configure_duid(Link *link) {
+ Manager *m;
+ DUID *duid;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->network);
+
+ m = link->manager;
+ duid = link_get_duid(link);
+
+ if (!link_requires_uuid(link))
+ return 1;
+
+ if (m->has_product_uuid) {
+ (void) duid_set_uuid(duid, m->product_uuid);
+ return 1;
+ }
+
+ if (!m->links_requesting_uuid) {
+ r = manager_request_product_uuid(m, link);
+ if (r < 0) {
+ if (r == -ENOMEM)
+ return r;
+
+ log_link_warning_errno(link, r,
+ "Failed to get product UUID. Falling back to use machine-app-specific ID as DUID-UUID: %m");
+ return 1;
+ }
+ } else {
+ r = set_put(m->links_requesting_uuid, link);
+ if (r < 0)
+ return log_oom();
+ if (r > 0)
+ link_ref(link);
+
+ r = set_put(m->duids_requesting_uuid, duid);
+ if (r < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+int config_parse_dhcp(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ AddressFamily *dhcp = data, s;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* Note that this is mostly like
+ * config_parse_address_family(), except that it
+ * understands some old names for the enum values */
+
+ s = address_family_from_string(rvalue);
+ if (s < 0) {
+
+ /* Previously, we had a slightly different enum here,
+ * support its values for compatibility. */
+
+ if (streq(rvalue, "none"))
+ s = ADDRESS_FAMILY_NO;
+ else if (streq(rvalue, "v4"))
+ s = ADDRESS_FAMILY_IPV4;
+ else if (streq(rvalue, "v6"))
+ s = ADDRESS_FAMILY_IPV6;
+ else if (streq(rvalue, "both"))
+ s = ADDRESS_FAMILY_YES;
+ else {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to parse DHCP option, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "DHCP=%s is deprecated, please use DHCP=%s instead.",
+ rvalue, address_family_to_string(s));
+ }
+
+ *dhcp = s;
+ return 0;
+}
+
+int config_parse_dhcp_route_metric(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = data;
+ uint32_t metric;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou32(rvalue, &metric);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse RouteMetric=%s, ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ if (streq_ptr(section, "DHCPv4")) {
+ network->dhcp_route_metric = metric;
+ network->dhcp_route_metric_set = true;
+ } else if (streq_ptr(section, "DHCPv6")) {
+ network->dhcp6_route_metric = metric;
+ network->dhcp6_route_metric_set = true;
+ } else { /* [DHCP] section */
+ if (!network->dhcp_route_metric_set)
+ network->dhcp_route_metric = metric;
+ if (!network->dhcp6_route_metric_set)
+ network->dhcp6_route_metric = metric;
+ }
+
+ return 0;
+}
+
+int config_parse_dhcp_use_dns(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse UseDNS=%s, ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ if (streq_ptr(section, "DHCPv4")) {
+ network->dhcp_use_dns = r;
+ network->dhcp_use_dns_set = true;
+ } else if (streq_ptr(section, "DHCPv6")) {
+ network->dhcp6_use_dns = r;
+ network->dhcp6_use_dns_set = true;
+ } else { /* [DHCP] section */
+ if (!network->dhcp_use_dns_set)
+ network->dhcp_use_dns = r;
+ if (!network->dhcp6_use_dns_set)
+ network->dhcp6_use_dns = r;
+ }
+
+ return 0;
+}
+
+int config_parse_dhcp_use_ntp(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse UseNTP=%s, ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ if (streq_ptr(section, "DHCPv4")) {
+ network->dhcp_use_ntp = r;
+ network->dhcp_use_ntp_set = true;
+ } else if (streq_ptr(section, "DHCPv6")) {
+ network->dhcp6_use_ntp = r;
+ network->dhcp6_use_ntp_set = true;
+ } else { /* [DHCP] section */
+ if (!network->dhcp_use_ntp_set)
+ network->dhcp_use_ntp = r;
+ if (!network->dhcp6_use_ntp_set)
+ network->dhcp6_use_ntp = r;
+ }
+
+ return 0;
+}
+
+int config_parse_section_route_table(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = data;
+ uint32_t rt;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou32(rvalue, &rt);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse RouteTable=%s, ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ if (STRPTR_IN_SET(section, "DHCP", "DHCPv4")) {
+ network->dhcp_route_table = rt;
+ network->dhcp_route_table_set = true;
+ } else { /* section is IPv6AcceptRA */
+ network->ipv6_accept_ra_route_table = rt;
+ network->ipv6_accept_ra_route_table_set = true;
+ }
+
+ return 0;
+}
+
+int config_parse_iaid(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Network *network = data;
+ uint32_t iaid;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(network);
+
+ r = safe_atou32(rvalue, &iaid);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Unable to read IAID, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ network->iaid = iaid;
+ network->iaid_set = true;
+
+ return 0;
+}
+
+int config_parse_dhcp_user_class(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char ***l = data;
+ int r;
+
+ assert(l);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ *l = strv_free(*l);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *w = NULL;
+ size_t len;
+
+ r = extract_first_word(&p, &w, NULL, EXTRACT_CUNESCAPE|EXTRACT_UNQUOTE);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to split user classes option, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ len = strlen(w);
+ if (ltype == AF_INET) {
+ if (len > UINT8_MAX || len == 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "%s length is not in the range 1-255, ignoring.", w);
+ continue;
+ }
+ } else {
+ if (len > UINT16_MAX || len == 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "%s length is not in the range 1-65535, ignoring.", w);
+ continue;
+ }
+ }
+
+ r = strv_consume(l, TAKE_PTR(w));
+ if (r < 0)
+ return log_oom();
+ }
+}
+
+int config_parse_dhcp_vendor_class(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ char ***l = data;
+ int r;
+
+ assert(l);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ *l = strv_free(*l);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *w = NULL;
+
+ r = extract_first_word(&p, &w, NULL, EXTRACT_CUNESCAPE|EXTRACT_UNQUOTE);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to split vendor classes option, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ if (strlen(w) > UINT8_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "%s length is not in the range 1-255, ignoring.", w);
+ continue;
+ }
+
+ r = strv_push(l, w);
+ if (r < 0)
+ return log_oom();
+
+ w = NULL;
+ }
+}
+
+int config_parse_dhcp_send_option(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(sd_dhcp_option_unrefp) sd_dhcp_option *opt4 = NULL, *old4 = NULL;
+ _cleanup_(sd_dhcp6_option_unrefp) sd_dhcp6_option *opt6 = NULL, *old6 = NULL;
+ uint32_t uint32_data, enterprise_identifier = 0;
+ _cleanup_free_ char *word = NULL, *q = NULL;
+ OrderedHashmap **options = data;
+ uint16_t u16, uint16_data;
+ union in_addr_union addr;
+ DHCPOptionDataType type;
+ uint8_t u8, uint8_data;
+ const void *udata;
+ const char *p;
+ ssize_t sz;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *options = ordered_hashmap_free(*options);
+ return 0;
+ }
+
+ p = rvalue;
+ if (ltype == AF_INET6 && streq(lvalue, "SendVendorOption")) {
+ r = extract_first_word(&p, &word, ":", 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r <= 0 || isempty(p)) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid DHCP option, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ r = safe_atou32(word, &enterprise_identifier);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse DHCPv6 enterprise identifier data, ignoring assignment: %s", p);
+ return 0;
+ }
+ word = mfree(word);
+ }
+
+ r = extract_first_word(&p, &word, ":", 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r <= 0 || isempty(p)) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid DHCP option, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ if (ltype == AF_INET6) {
+ r = safe_atou16(word, &u16);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid DHCP option, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+ if (u16 < 1 || u16 >= UINT16_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid DHCP option, valid range is 1-65535, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+ } else {
+ r = safe_atou8(word, &u8);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid DHCP option, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+ if (u8 < 1 || u8 >= UINT8_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid DHCP option, valid range is 1-254, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+ }
+
+ word = mfree(word);
+ r = extract_first_word(&p, &word, ":", 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r <= 0 || isempty(p)) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid DHCP option, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ type = dhcp_option_data_type_from_string(word);
+ if (type < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid DHCP option data type, ignoring assignment: %s", p);
+ return 0;
+ }
+
+ switch(type) {
+ case DHCP_OPTION_DATA_UINT8:{
+ r = safe_atou8(p, &uint8_data);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse DHCP uint8 data, ignoring assignment: %s", p);
+ return 0;
+ }
+
+ udata = &uint8_data;
+ sz = sizeof(uint8_t);
+ break;
+ }
+ case DHCP_OPTION_DATA_UINT16:{
+ r = safe_atou16(p, &uint16_data);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse DHCP uint16 data, ignoring assignment: %s", p);
+ return 0;
+ }
+
+ udata = &uint16_data;
+ sz = sizeof(uint16_t);
+ break;
+ }
+ case DHCP_OPTION_DATA_UINT32: {
+ r = safe_atou32(p, &uint32_data);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse DHCP uint32 data, ignoring assignment: %s", p);
+ return 0;
+ }
+
+ udata = &uint32_data;
+ sz = sizeof(uint32_t);
+
+ break;
+ }
+ case DHCP_OPTION_DATA_IPV4ADDRESS: {
+ r = in_addr_from_string(AF_INET, p, &addr);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse DHCP ipv4address data, ignoring assignment: %s", p);
+ return 0;
+ }
+
+ udata = &addr.in;
+ sz = sizeof(addr.in.s_addr);
+ break;
+ }
+ case DHCP_OPTION_DATA_IPV6ADDRESS: {
+ r = in_addr_from_string(AF_INET6, p, &addr);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse DHCP ipv6address data, ignoring assignment: %s", p);
+ return 0;
+ }
+
+ udata = &addr.in6;
+ sz = sizeof(addr.in6.s6_addr);
+ break;
+ }
+ case DHCP_OPTION_DATA_STRING:
+ sz = cunescape(p, UNESCAPE_ACCEPT_NUL, &q);
+ if (sz < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, sz,
+ "Failed to decode DHCP option data, ignoring assignment: %s", p);
+
+ udata = q;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (ltype == AF_INET6) {
+ r = sd_dhcp6_option_new(u16, udata, sz, enterprise_identifier, &opt6);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to store DHCP option '%s', ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ r = ordered_hashmap_ensure_allocated(options, &dhcp6_option_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ /* Overwrite existing option */
+ old6 = ordered_hashmap_get(*options, UINT_TO_PTR(u16));
+ r = ordered_hashmap_replace(*options, UINT_TO_PTR(u16), opt6);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to store DHCP option '%s', ignoring assignment: %m", rvalue);
+ return 0;
+ }
+ TAKE_PTR(opt6);
+ } else {
+ r = sd_dhcp_option_new(u8, udata, sz, &opt4);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to store DHCP option '%s', ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ r = ordered_hashmap_ensure_allocated(options, &dhcp_option_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ /* Overwrite existing option */
+ old4 = ordered_hashmap_get(*options, UINT_TO_PTR(u8));
+ r = ordered_hashmap_replace(*options, UINT_TO_PTR(u8), opt4);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to store DHCP option '%s', ignoring assignment: %m", rvalue);
+ return 0;
+ }
+ TAKE_PTR(opt4);
+ }
+ return 0;
+}
+
+int config_parse_dhcp_request_options(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = data;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ if (ltype == AF_INET)
+ network->dhcp_request_options = set_free(network->dhcp_request_options);
+ else
+ network->dhcp6_request_options = set_free(network->dhcp6_request_options);
+
+ return 0;
+ }
+
+ for (p = rvalue;;) {
+ _cleanup_free_ char *n = NULL;
+ uint32_t i;
+
+ r = extract_first_word(&p, &n, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse DHCP request option, ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ r = safe_atou32(n, &i);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "DHCP request option is invalid, ignoring assignment: %s", n);
+ continue;
+ }
+
+ if (i < 1 || i >= UINT8_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "DHCP request option is invalid, valid range is 1-254, ignoring assignment: %s", n);
+ continue;
+ }
+
+ r = set_ensure_put(ltype == AF_INET ? &network->dhcp_request_options : &network->dhcp6_request_options,
+ NULL, UINT32_TO_PTR(i));
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to store DHCP request option '%s', ignoring assignment: %m", n);
+ }
+}
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_dhcp_use_domains, dhcp_use_domains, DHCPUseDomains,
+ "Failed to parse DHCP use domains setting");
+
+static const char* const dhcp_use_domains_table[_DHCP_USE_DOMAINS_MAX] = {
+ [DHCP_USE_DOMAINS_NO] = "no",
+ [DHCP_USE_DOMAINS_ROUTE] = "route",
+ [DHCP_USE_DOMAINS_YES] = "yes",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dhcp_use_domains, DHCPUseDomains, DHCP_USE_DOMAINS_YES);
+
+static const char * const dhcp_option_data_type_table[_DHCP_OPTION_DATA_MAX] = {
+ [DHCP_OPTION_DATA_UINT8] = "uint8",
+ [DHCP_OPTION_DATA_UINT16] = "uint16",
+ [DHCP_OPTION_DATA_UINT32] = "uint32",
+ [DHCP_OPTION_DATA_STRING] = "string",
+ [DHCP_OPTION_DATA_IPV4ADDRESS] = "ipv4address",
+ [DHCP_OPTION_DATA_IPV6ADDRESS] = "ipv6address",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(dhcp_option_data_type, DHCPOptionDataType);
diff --git a/src/network/networkd-dhcp-common.h b/src/network/networkd-dhcp-common.h
new file mode 100644
index 0000000..78c149e
--- /dev/null
+++ b/src/network/networkd-dhcp-common.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "conf-parser.h"
+#include "dhcp-identifier.h"
+#include "time-util.h"
+
+#define DHCP_ROUTE_METRIC 1024
+
+typedef struct Link Link;
+typedef struct Manager Manager;
+typedef struct Network Network;
+
+typedef enum DHCPUseDomains {
+ DHCP_USE_DOMAINS_NO,
+ DHCP_USE_DOMAINS_YES,
+ DHCP_USE_DOMAINS_ROUTE,
+ _DHCP_USE_DOMAINS_MAX,
+ _DHCP_USE_DOMAINS_INVALID = -1,
+} DHCPUseDomains;
+
+typedef enum DHCPOptionDataType {
+ DHCP_OPTION_DATA_UINT8,
+ DHCP_OPTION_DATA_UINT16,
+ DHCP_OPTION_DATA_UINT32,
+ DHCP_OPTION_DATA_STRING,
+ DHCP_OPTION_DATA_IPV4ADDRESS,
+ DHCP_OPTION_DATA_IPV6ADDRESS,
+ _DHCP_OPTION_DATA_MAX,
+ _DHCP_OPTION_DATA_INVALID,
+} DHCPOptionDataType;
+
+typedef struct DUID {
+ /* Value of Type in [DHCP] section */
+ DUIDType type;
+
+ uint8_t raw_data_len;
+ uint8_t raw_data[MAX_DUID_LEN];
+ usec_t llt_time;
+} DUID;
+
+bool link_dhcp_enabled(Link *link, int family);
+static inline bool link_dhcp4_enabled(Link *link) {
+ return link_dhcp_enabled(link, AF_INET);
+}
+static inline bool link_dhcp6_enabled(Link *link) {
+ return link_dhcp_enabled(link, AF_INET6);
+}
+
+void network_adjust_dhcp(Network *network);
+
+DUID* link_get_duid(Link *link);
+int link_configure_duid(Link *link);
+int manager_request_product_uuid(Manager *m, Link *link);
+
+const char* dhcp_use_domains_to_string(DHCPUseDomains p) _const_;
+DHCPUseDomains dhcp_use_domains_from_string(const char *s) _pure_;
+
+const char *dhcp_option_data_type_to_string(DHCPOptionDataType d) _const_;
+DHCPOptionDataType dhcp_option_data_type_from_string(const char *d) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_route_metric);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_use_dns);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_use_domains);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_use_ntp);
+CONFIG_PARSER_PROTOTYPE(config_parse_iaid);
+CONFIG_PARSER_PROTOTYPE(config_parse_section_route_table);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_user_class);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_vendor_class);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_send_option);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_request_options);
diff --git a/src/network/networkd-dhcp-server-bus.c b/src/network/networkd-dhcp-server-bus.c
new file mode 100644
index 0000000..32f4bae
--- /dev/null
+++ b/src/network/networkd-dhcp-server-bus.c
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-dhcp-server.h"
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-util.h"
+#include "dhcp-server-internal.h"
+#include "networkd-dhcp-server-bus.h"
+#include "networkd-link-bus.h"
+#include "networkd-manager.h"
+#include "strv.h"
+
+static int property_get_leases(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+ Link *l = userdata;
+ sd_dhcp_server *s;
+ DHCPLease *lease;
+ int r;
+
+ assert(reply);
+ assert(l);
+
+ s = l->dhcp_server;
+ if (!s)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Link %s has no DHCP server.", l->ifname);
+
+ r = sd_bus_message_open_container(reply, 'a', "(uayayayayt)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(lease, s->leases_by_client_id) {
+ r = sd_bus_message_open_container(reply, 'r', "uayayayayt");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "u", (uint32_t)AF_INET);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', lease->client_id.data, lease->client_id.length);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', &lease->address, sizeof(lease->address));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', &lease->gateway, sizeof(lease->gateway));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', &lease->chaddr, sizeof(lease->chaddr));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_basic(reply, 't', &lease->expiration);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int dhcp_server_emit_changed(Link *link, const char *property, ...) {
+ _cleanup_free_ char *path = NULL;
+ char **l;
+
+ assert(link);
+
+ path = link_bus_path(link);
+ if (!path)
+ return log_oom();
+
+ l = strv_from_stdarg_alloca(property);
+
+ return sd_bus_emit_properties_changed_strv(
+ link->manager->bus,
+ path,
+ "org.freedesktop.network1.DHCPServer",
+ l);
+}
+
+void dhcp_server_callback(sd_dhcp_server *s, uint64_t event, void *data) {
+ Link *l = data;
+
+ assert(l);
+
+ if (event & SD_DHCP_SERVER_EVENT_LEASE_CHANGED)
+ (void) dhcp_server_emit_changed(l, "Leases", NULL);
+}
+
+
+const sd_bus_vtable dhcp_server_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("Leases", "a(uayayayayt)", property_get_leases, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+
+ SD_BUS_VTABLE_END
+};
diff --git a/src/network/networkd-dhcp-server-bus.h b/src/network/networkd-dhcp-server-bus.h
new file mode 100644
index 0000000..7191478
--- /dev/null
+++ b/src/network/networkd-dhcp-server-bus.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+#include "networkd-link.h"
+
+extern const sd_bus_vtable dhcp_server_vtable[];
+
+void dhcp_server_callback(sd_dhcp_server *server, uint64_t event, void *data);
diff --git a/src/network/networkd-dhcp-server.c b/src/network/networkd-dhcp-server.c
new file mode 100644
index 0000000..cf279c6
--- /dev/null
+++ b/src/network/networkd-dhcp-server.c
@@ -0,0 +1,439 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <linux/if_arp.h>
+#include <linux/if.h>
+
+#include "sd-dhcp-server.h"
+
+#include "fd-util.h"
+#include "fileio.h"
+#include "networkd-address.h"
+#include "networkd-dhcp-server.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "networkd-network.h"
+#include "parse-util.h"
+#include "socket-netlink.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+
+static bool link_dhcp4_server_enabled(Link *link) {
+ assert(link);
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ if (link->network->bond)
+ return false;
+
+ if (link->iftype == ARPHRD_CAN)
+ return false;
+
+ return link->network->dhcp_server;
+}
+
+static Address* link_find_dhcp_server_address(Link *link) {
+ Address *address;
+
+ assert(link);
+ assert(link->network);
+
+ /* The first statically configured address if there is any */
+ ORDERED_HASHMAP_FOREACH(address, link->network->addresses_by_section)
+ if (address->family == AF_INET &&
+ !in_addr_is_null(address->family, &address->in_addr))
+ return address;
+
+ /* If that didn't work, find a suitable address we got from the pool */
+ SET_FOREACH(address, link->pool_addresses)
+ if (address->family == AF_INET)
+ return address;
+
+ return NULL;
+}
+
+static int link_push_uplink_to_dhcp_server(
+ Link *link,
+ sd_dhcp_lease_server_type what,
+ sd_dhcp_server *s) {
+
+ _cleanup_free_ struct in_addr *addresses = NULL;
+ size_t n_addresses = 0, n_allocated = 0;
+ bool use_dhcp_lease_data = true;
+
+ assert(link);
+
+ if (!link->network)
+ return 0;
+ assert(link->network);
+
+ log_link_debug(link, "Copying %s from link", dhcp_lease_server_type_to_string(what));
+
+ switch (what) {
+
+ case SD_DHCP_LEASE_DNS:
+ /* For DNS we have a special case. We the data configured explicitly locally along with the
+ * data from the DHCP lease. */
+
+ for (unsigned i = 0; i < link->network->n_dns; i++) {
+ struct in_addr ia;
+
+ /* Only look for IPv4 addresses */
+ if (link->network->dns[i]->family != AF_INET)
+ continue;
+
+ ia = link->network->dns[i]->address.in;
+
+ /* Never propagate obviously borked data */
+ if (in4_addr_is_null(&ia) || in4_addr_is_localhost(&ia))
+ continue;
+
+ if (!GREEDY_REALLOC(addresses, n_allocated, n_addresses + 1))
+ return log_oom();
+
+ addresses[n_addresses++] = ia;
+ }
+
+ use_dhcp_lease_data = link->network->dhcp_use_dns;
+ break;
+
+ case SD_DHCP_LEASE_NTP: {
+ char **i;
+
+ /* For NTP things are similar, but for NTP hostnames can be configured too, which we cannot
+ * propagate via DHCP. Hence let's only propagate those which are IP addresses. */
+
+ STRV_FOREACH(i, link->network->ntp) {
+ union in_addr_union ia;
+
+ if (in_addr_from_string(AF_INET, *i, &ia) < 0)
+ continue;
+
+ /* Never propagate obviously borked data */
+ if (in4_addr_is_null(&ia.in) || in4_addr_is_localhost(&ia.in))
+ continue;
+
+ if (!GREEDY_REALLOC(addresses, n_allocated, n_addresses + 1))
+ return log_oom();
+
+ addresses[n_addresses++] = ia.in;
+ }
+
+ use_dhcp_lease_data = link->network->dhcp_use_ntp;
+ break;
+ }
+
+ case SD_DHCP_LEASE_SIP:
+
+ /* For SIP we don't allow explicit, local configuration, but there's control whether to use the data */
+ use_dhcp_lease_data = link->network->dhcp_use_sip;
+ break;
+
+ case SD_DHCP_LEASE_POP3:
+ case SD_DHCP_LEASE_SMTP:
+ case SD_DHCP_LEASE_LPR:
+ /* For the other server types we currently do not allow local configuration of server data,
+ * since there are typically no local consumers of the data. */
+ break;
+
+ default:
+ assert_not_reached("Unexpected server type");
+ }
+
+ if (use_dhcp_lease_data && link->dhcp_lease) {
+ const struct in_addr *da;
+
+ int n = sd_dhcp_lease_get_servers(link->dhcp_lease, what, &da);
+ if (n > 0) {
+ if (!GREEDY_REALLOC(addresses, n_allocated, n_addresses + n))
+ return log_oom();
+
+ for (int j = 0; j < n; j++)
+ if (in4_addr_is_non_local(&da[j]))
+ addresses[n_addresses++] = da[j];
+ }
+ }
+
+ if (n_addresses <= 0)
+ return 0;
+
+ return sd_dhcp_server_set_servers(s, what, addresses, n_addresses);
+}
+
+static int dhcp4_server_parse_dns_server_string_and_warn(Link *l, const char *string, struct in_addr **addresses, size_t *n_allocated, size_t *n_addresses) {
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *server_name = NULL;
+ union in_addr_union address;
+ int family, r, ifindex = 0;
+
+ r = extract_first_word(&string, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = in_addr_ifindex_name_from_string_auto(word, &family, &address, &ifindex, &server_name);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse DNS server address '%s', ignoring: %m", word);
+ continue;
+ }
+
+ /* Only look for IPv4 addresses */
+ if (family != AF_INET)
+ continue;
+
+ /* Never propagate obviously borked data */
+ if (in4_addr_is_null(&address.in) || in4_addr_is_localhost(&address.in))
+ continue;
+
+ if (!GREEDY_REALLOC(*addresses, *n_allocated, *n_addresses + 1))
+ return log_oom();
+
+ (*addresses)[(*n_addresses)++] = address.in;
+ }
+
+ return 0;
+}
+
+static int dhcp4_server_set_dns_from_resolve_conf(Link *link) {
+ _cleanup_free_ struct in_addr *addresses = NULL;
+ size_t n_addresses = 0, n_allocated = 0;
+ _cleanup_fclose_ FILE *f = NULL;
+ int n = 0, r;
+
+ f = fopen(PRIVATE_UPLINK_RESOLV_CONF, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open " PRIVATE_UPLINK_RESOLV_CONF ": %m");
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *a;
+ char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read " PRIVATE_UPLINK_RESOLV_CONF ": %m");
+ if (r == 0)
+ break;
+
+ n++;
+
+ l = strstrip(line);
+ if (IN_SET(*l, '#', ';', 0))
+ continue;
+
+ a = first_word(l, "nameserver");
+ if (!a)
+ continue;
+
+ r = dhcp4_server_parse_dns_server_string_and_warn(link, a, &addresses, &n_allocated, &n_addresses);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse DNS server address '%s', ignoring.", a);
+ }
+
+ if (n_addresses <= 0)
+ return 0;
+
+ return sd_dhcp_server_set_dns(link->dhcp_server, addresses, n_addresses);
+}
+
+int dhcp4_server_configure(Link *link) {
+ bool acquired_uplink = false;
+ sd_dhcp_option *p;
+ Link *uplink = NULL;
+ Address *address;
+ int r;
+
+ assert(link);
+
+ if (!link_dhcp4_server_enabled(link))
+ return 0;
+
+ if (!(link->flags & IFF_UP))
+ return 0;
+
+ if (!link->dhcp_server) {
+ r = sd_dhcp_server_new(&link->dhcp_server, link->ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_dhcp_server_attach_event(link->dhcp_server, link->manager->event, 0);
+ if (r < 0)
+ return r;
+ }
+
+ address = link_find_dhcp_server_address(link);
+ if (!address)
+ return log_link_error_errno(link, SYNTHETIC_ERRNO(EBUSY),
+ "Failed to find suitable address for DHCPv4 server instance.");
+
+ /* use the server address' subnet as the pool */
+ r = sd_dhcp_server_configure_pool(link->dhcp_server, &address->in_addr.in, address->prefixlen,
+ link->network->dhcp_server_pool_offset, link->network->dhcp_server_pool_size);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to configure address pool for DHCPv4 server instance: %m");
+
+ /* TODO:
+ r = sd_dhcp_server_set_router(link->dhcp_server, &main_address->in_addr.in);
+ if (r < 0)
+ return r;
+ */
+
+ if (link->network->dhcp_server_max_lease_time_usec > 0) {
+ r = sd_dhcp_server_set_max_lease_time(link->dhcp_server,
+ DIV_ROUND_UP(link->network->dhcp_server_max_lease_time_usec, USEC_PER_SEC));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to set maximum lease time for DHCPv4 server instance: %m");
+ }
+
+ if (link->network->dhcp_server_default_lease_time_usec > 0) {
+ r = sd_dhcp_server_set_default_lease_time(link->dhcp_server,
+ DIV_ROUND_UP(link->network->dhcp_server_default_lease_time_usec, USEC_PER_SEC));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to set default lease time for DHCPv4 server instance: %m");
+ }
+
+ for (sd_dhcp_lease_server_type type = 0; type < _SD_DHCP_LEASE_SERVER_TYPE_MAX; type ++) {
+
+ if (!link->network->dhcp_server_emit[type].emit)
+ continue;
+
+ if (link->network->dhcp_server_emit[type].n_addresses > 0)
+ /* Explicitly specified servers to emit */
+ r = sd_dhcp_server_set_servers(
+ link->dhcp_server,
+ type,
+ link->network->dhcp_server_emit[type].addresses,
+ link->network->dhcp_server_emit[type].n_addresses);
+ else {
+ /* Emission is requested, but nothing explicitly configured. Let's find a suitable upling */
+ if (!acquired_uplink) {
+ uplink = manager_find_uplink(link->manager, link);
+ acquired_uplink = true;
+ }
+
+ if (uplink && uplink->network)
+ r = link_push_uplink_to_dhcp_server(uplink, type, link->dhcp_server);
+ else if (type == SD_DHCP_LEASE_DNS)
+ r = dhcp4_server_set_dns_from_resolve_conf(link);
+ else {
+ log_link_debug(link,
+ "Not emitting %s on link, couldn't find suitable uplink.",
+ dhcp_lease_server_type_to_string(type));
+ continue;
+ }
+ }
+
+ if (r < 0)
+ log_link_warning_errno(link, r,
+ "Failed to set %s for DHCP server, ignoring: %m",
+ dhcp_lease_server_type_to_string(type));
+ }
+
+ r = sd_dhcp_server_set_emit_router(link->dhcp_server, link->network->dhcp_server_emit_router);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to set router emission for DHCP server: %m");
+
+ if (link->network->dhcp_server_emit_timezone) {
+ _cleanup_free_ char *buffer = NULL;
+ const char *tz;
+
+ if (link->network->dhcp_server_timezone)
+ tz = link->network->dhcp_server_timezone;
+ else {
+ r = get_timezone(&buffer);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to determine timezone: %m");
+
+ tz = buffer;
+ }
+
+ r = sd_dhcp_server_set_timezone(link->dhcp_server, tz);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to set timezone for DHCP server: %m");
+ }
+
+ ORDERED_HASHMAP_FOREACH(p, link->network->dhcp_server_send_options) {
+ r = sd_dhcp_server_add_option(link->dhcp_server, p);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to set DHCPv4 option: %m");
+ }
+
+ ORDERED_HASHMAP_FOREACH(p, link->network->dhcp_server_send_vendor_options) {
+ r = sd_dhcp_server_add_vendor_option(link->dhcp_server, p);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to set DHCPv4 option: %m");
+ }
+
+ if (!sd_dhcp_server_is_running(link->dhcp_server)) {
+ r = sd_dhcp_server_start(link->dhcp_server);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not start DHCPv4 server instance: %m");
+
+ log_link_debug(link, "Offering DHCPv4 leases");
+ }
+
+ return 0;
+}
+
+int config_parse_dhcp_server_emit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ NetworkDHCPServerEmitAddress *emit = data;
+
+ assert(emit);
+ assert(rvalue);
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *w = NULL;
+ union in_addr_union a;
+ int r;
+
+ r = extract_first_word(&p, &w, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to extract word, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ r = in_addr_from_string(AF_INET, w, &a);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse %s= address '%s', ignoring: %m", lvalue, w);
+ continue;
+ }
+
+ struct in_addr *m = reallocarray(emit->addresses, emit->n_addresses + 1, sizeof(struct in_addr));
+ if (!m)
+ return log_oom();
+
+ emit->addresses = m;
+ emit->addresses[emit->n_addresses++] = a.in;
+ }
+}
diff --git a/src/network/networkd-dhcp-server.h b/src/network/networkd-dhcp-server.h
new file mode 100644
index 0000000..4bd5120
--- /dev/null
+++ b/src/network/networkd-dhcp-server.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "conf-parser.h"
+#include "networkd-link.h"
+#include "networkd-util.h"
+
+typedef struct Link Link;
+
+int dhcp4_server_configure(Link *link);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_server_emit);
diff --git a/src/network/networkd-dhcp4.c b/src/network/networkd-dhcp4.c
new file mode 100644
index 0000000..f3c1e5f
--- /dev/null
+++ b/src/network/networkd-dhcp4.c
@@ -0,0 +1,1761 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <linux/if.h>
+#include <linux/if_arp.h>
+
+#include "escape.h"
+#include "alloc-util.h"
+#include "dhcp-client-internal.h"
+#include "hostname-util.h"
+#include "parse-util.h"
+#include "network-internal.h"
+#include "networkd-address.h"
+#include "networkd-dhcp4.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "networkd-network.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "sysctl-util.h"
+#include "web-util.h"
+
+static int dhcp4_update_address(Link *link, bool announce);
+static int dhcp4_remove_all(Link *link);
+
+static int dhcp4_release_old_lease(Link *link) {
+ Route *route;
+ int k, r = 0;
+
+ assert(link);
+
+ if (!link->dhcp_address_old && set_isempty(link->dhcp_routes_old))
+ return 0;
+
+ log_link_debug(link, "Removing old DHCPv4 address and routes.");
+
+ link_dirty(link);
+
+ SET_FOREACH(route, link->dhcp_routes_old) {
+ k = route_remove(route, NULL, link, NULL);
+ if (k < 0)
+ r = k;
+ }
+
+ if (link->dhcp_address_old) {
+ k = address_remove(link->dhcp_address_old, link, NULL);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static void dhcp4_check_ready(Link *link) {
+ int r;
+
+ if (link->network->dhcp_send_decline && !link->dhcp4_address_bind)
+ return;
+
+ if (link->dhcp4_messages > 0)
+ return;
+
+ link->dhcp4_configured = true;
+
+ /* New address and routes are configured now. Let's release old lease. */
+ r = dhcp4_release_old_lease(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return;
+ }
+
+ link_check_ready(link);
+}
+
+static int dhcp4_route_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->dhcp4_messages > 0);
+
+ link->dhcp4_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r == -ENETUNREACH && !link->dhcp4_route_retrying) {
+
+ /* It seems kernel does not support that the prefix route cannot be configured with
+ * route table. Let's once drop the config and reconfigure them later. */
+
+ log_link_message_debug_errno(link, m, r, "Could not set DHCPv4 route, retrying later");
+ link->dhcp4_route_failed = true;
+ link->manager->dhcp4_prefix_root_cannot_set_table = true;
+ } else if (r < 0 && r != -EEXIST) {
+ log_link_message_warning_errno(link, m, r, "Could not set DHCPv4 route");
+ link_enter_failed(link);
+ return 1;
+ }
+
+ if (link->dhcp4_messages == 0 && link->dhcp4_route_failed) {
+ link->dhcp4_route_failed = false;
+ link->dhcp4_route_retrying = true;
+
+ r = dhcp4_remove_all(link);
+ if (r < 0)
+ link_enter_failed(link);
+ return 1;
+ }
+
+ dhcp4_check_ready(link);
+
+ return 1;
+}
+
+static int route_scope_from_address(const Route *route, const struct in_addr *self_addr) {
+ assert(route);
+ assert(self_addr);
+
+ if (in4_addr_is_localhost(&route->dst.in) ||
+ (!in4_addr_is_null(self_addr) && in4_addr_equal(&route->dst.in, self_addr)))
+ return RT_SCOPE_HOST;
+ else if (in4_addr_is_null(&route->gw.in))
+ return RT_SCOPE_LINK;
+ else
+ return RT_SCOPE_UNIVERSE;
+}
+
+static bool link_prefixroute(Link *link) {
+ return !link->network->dhcp_route_table_set ||
+ link->network->dhcp_route_table == RT_TABLE_MAIN ||
+ link->manager->dhcp4_prefix_root_cannot_set_table;
+}
+
+static int dhcp_route_configure(Route *route, Link *link) {
+ Route *ret;
+ int r;
+
+ assert(route);
+ assert(link);
+
+ r = route_configure(route, link, dhcp4_route_handler, &ret);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to set DHCPv4 route: %m");
+
+ link->dhcp4_messages++;
+
+ r = set_ensure_put(&link->dhcp_routes, &route_hash_ops, ret);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to store DHCPv4 route: %m");
+
+ (void) set_remove(link->dhcp_routes_old, ret);
+
+ return 0;
+}
+
+static int link_set_dns_routes(Link *link, const struct in_addr *address) {
+ const struct in_addr *dns;
+ uint32_t table;
+ int i, n, r;
+
+ assert(link);
+ assert(link->dhcp_lease);
+ assert(link->network);
+
+ if (!link->network->dhcp_use_dns ||
+ !link->network->dhcp_routes_to_dns)
+ return 0;
+
+ n = sd_dhcp_lease_get_dns(link->dhcp_lease, &dns);
+ if (IN_SET(n, 0, -ENODATA))
+ return 0;
+ if (n < 0)
+ return log_link_warning_errno(link, n, "DHCP error: could not get DNS servers: %m");
+
+ table = link_get_dhcp_route_table(link);
+
+ for (i = 0; i < n; i ++) {
+ _cleanup_(route_freep) Route *route = NULL;
+
+ r = route_new(&route);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate route: %m");
+
+ /* Set routes to DNS servers. */
+
+ route->family = AF_INET;
+ route->dst.in = dns[i];
+ route->dst_prefixlen = 32;
+ route->prefsrc.in = *address;
+ route->scope = RT_SCOPE_LINK;
+ route->protocol = RTPROT_DHCP;
+ route->priority = link->network->dhcp_route_metric;
+ route->table = table;
+
+ r = dhcp_route_configure(route, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set route to DNS server: %m");
+ }
+
+ return 0;
+}
+
+static int dhcp_prefix_route_from_lease(
+ const sd_dhcp_lease *lease,
+ uint32_t table,
+ const struct in_addr *address,
+ Route **ret_route) {
+
+ Route *route;
+ struct in_addr netmask;
+ int r;
+
+ r = sd_dhcp_lease_get_netmask((sd_dhcp_lease*) lease, &netmask);
+ if (r < 0)
+ return r;
+
+ r = route_new(&route);
+ if (r < 0)
+ return r;
+
+ route->family = AF_INET;
+ route->dst.in.s_addr = address->s_addr & netmask.s_addr;
+ route->dst_prefixlen = in4_addr_netmask_to_prefixlen(&netmask);
+ route->prefsrc.in = *address;
+ route->scope = RT_SCOPE_LINK;
+ route->protocol = RTPROT_DHCP;
+ route->table = table;
+ *ret_route = route;
+ return 0;
+}
+
+static int link_set_dhcp_routes(Link *link) {
+ _cleanup_free_ sd_dhcp_route **static_routes = NULL;
+ bool classless_route = false, static_route = false;
+ struct in_addr address;
+ uint32_t table;
+ Route *rt;
+ int r, n;
+
+ assert(link);
+
+ if (!link->dhcp_lease) /* link went down while we configured the IP addresses? */
+ return 0;
+
+ if (!link->network) /* link went down while we configured the IP addresses? */
+ return 0;
+
+ if (!link_has_carrier(link) && !link->network->configure_without_carrier)
+ /* During configuring addresses, the link lost its carrier. As networkd is dropping
+ * the addresses now, let's not configure the routes either. */
+ return 0;
+
+ while ((rt = set_steal_first(link->dhcp_routes))) {
+ r = set_ensure_put(&link->dhcp_routes_old, &route_hash_ops, rt);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to store old DHCPv4 route: %m");
+ }
+
+ table = link_get_dhcp_route_table(link);
+
+ r = sd_dhcp_lease_get_address(link->dhcp_lease, &address);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "DHCP error: could not get address: %m");
+
+ if (!link_prefixroute(link)) {
+ _cleanup_(route_freep) Route *prefix_route = NULL;
+
+ r = dhcp_prefix_route_from_lease(link->dhcp_lease, table, &address, &prefix_route);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not create prefix route: %m");
+
+ r = dhcp_route_configure(prefix_route, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set prefix route: %m");
+ }
+
+ n = sd_dhcp_lease_get_routes(link->dhcp_lease, &static_routes);
+ if (n == -ENODATA)
+ log_link_debug_errno(link, n, "DHCP: No routes received from DHCP server: %m");
+ else if (n < 0)
+ return log_link_error_errno(link, n, "DHCP: could not get routes: %m");
+
+ for (int i = 0; i < n; i++) {
+ switch (sd_dhcp_route_get_option(static_routes[i])) {
+ case SD_DHCP_OPTION_CLASSLESS_STATIC_ROUTE:
+ classless_route = true;
+ break;
+ case SD_DHCP_OPTION_STATIC_ROUTE:
+ static_route = true;
+ break;
+ }
+ }
+
+ if (link->network->dhcp_use_routes) {
+ /* if the DHCP server returns both a Classless Static Routes option and a Static Routes option,
+ * the DHCP client MUST ignore the Static Routes option. */
+ if (classless_route && static_route)
+ log_link_warning(link, "Classless static routes received from DHCP server: ignoring static-route option");
+
+ for (int i = 0; i < n; i++) {
+ _cleanup_(route_freep) Route *route = NULL;
+
+ if (classless_route &&
+ sd_dhcp_route_get_option(static_routes[i]) != SD_DHCP_OPTION_CLASSLESS_STATIC_ROUTE)
+ continue;
+
+ r = route_new(&route);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate route: %m");
+
+ route->family = AF_INET;
+ route->protocol = RTPROT_DHCP;
+ route->gw_family = AF_INET;
+ assert_se(sd_dhcp_route_get_gateway(static_routes[i], &route->gw.in) >= 0);
+ assert_se(sd_dhcp_route_get_destination(static_routes[i], &route->dst.in) >= 0);
+ assert_se(sd_dhcp_route_get_destination_prefix_length(static_routes[i], &route->dst_prefixlen) >= 0);
+ route->priority = link->network->dhcp_route_metric;
+ route->table = table;
+ route->mtu = link->network->dhcp_route_mtu;
+ route->scope = route_scope_from_address(route, &address);
+ if (IN_SET(route->scope, RT_SCOPE_LINK, RT_SCOPE_UNIVERSE))
+ route->prefsrc.in = address;
+
+ if (set_contains(link->dhcp_routes, route))
+ continue;
+
+ r = dhcp_route_configure(route, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set route: %m");
+ }
+ }
+
+ if (link->network->dhcp_use_gateway) {
+ const struct in_addr *router;
+
+ r = sd_dhcp_lease_get_router(link->dhcp_lease, &router);
+ if (IN_SET(r, 0, -ENODATA))
+ log_link_info(link, "DHCP: No gateway received from DHCP server.");
+ else if (r < 0)
+ return log_link_error_errno(link, r, "DHCP error: could not get gateway: %m");
+ else if (in4_addr_is_null(&router[0]))
+ log_link_info(link, "DHCP: Received gateway is null.");
+ else if (classless_route)
+ /* According to RFC 3442: If the DHCP server returns both a Classless Static Routes option and
+ * a Router option, the DHCP client MUST ignore the Router option. */
+ log_link_warning(link, "Classless static routes received from DHCP server: ignoring router option");
+ else {
+ _cleanup_(route_freep) Route *route = NULL, *route_gw = NULL;
+
+ r = route_new(&route_gw);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate route: %m");
+
+ /* The dhcp netmask may mask out the gateway. Add an explicit
+ * route for the gw host so that we can route no matter the
+ * netmask or existing kernel route tables. */
+ route_gw->family = AF_INET;
+ route_gw->dst.in = router[0];
+ route_gw->dst_prefixlen = 32;
+ route_gw->prefsrc.in = address;
+ route_gw->scope = RT_SCOPE_LINK;
+ route_gw->protocol = RTPROT_DHCP;
+ route_gw->priority = link->network->dhcp_route_metric;
+ route_gw->table = table;
+ route_gw->mtu = link->network->dhcp_route_mtu;
+
+ r = dhcp_route_configure(route_gw, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set host route: %m");
+
+ r = route_new(&route);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate route: %m");
+
+ route->family = AF_INET;
+ route->gw_family = AF_INET;
+ route->gw.in = router[0];
+ route->prefsrc.in = address;
+ route->protocol = RTPROT_DHCP;
+ route->priority = link->network->dhcp_route_metric;
+ route->table = table;
+ route->mtu = link->network->dhcp_route_mtu;
+
+ r = dhcp_route_configure(route, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set router: %m");
+
+ HASHMAP_FOREACH(rt, link->network->routes_by_section) {
+ if (!rt->gateway_from_dhcp_or_ra)
+ continue;
+
+ if (rt->gw_family != AF_INET)
+ continue;
+
+ rt->gw.in = router[0];
+ if (!rt->protocol_set)
+ rt->protocol = RTPROT_DHCP;
+ if (!rt->priority_set)
+ rt->priority = link->network->dhcp_route_metric;
+ if (!rt->table_set)
+ rt->table = table;
+ if (rt->mtu == 0)
+ rt->mtu = link->network->dhcp_route_mtu;
+
+ r = dhcp_route_configure(rt, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set gateway: %m");
+ }
+ }
+ }
+
+ return link_set_dns_routes(link, &address);
+}
+
+static int dhcp_reset_mtu(Link *link) {
+ uint16_t mtu;
+ int r;
+
+ assert(link);
+
+ if (!link->network->dhcp_use_mtu)
+ return 0;
+
+ r = sd_dhcp_lease_get_mtu(link->dhcp_lease, &mtu);
+ if (r == -ENODATA)
+ return 0;
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP error: failed to get MTU from lease: %m");
+
+ if (link->original_mtu == mtu)
+ return 0;
+
+ r = link_set_mtu(link, link->original_mtu);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP error: could not reset MTU: %m");
+
+ return 0;
+}
+
+static int dhcp_reset_hostname(Link *link) {
+ const char *hostname;
+ int r;
+
+ assert(link);
+
+ if (!link->network->dhcp_use_hostname)
+ return 0;
+
+ hostname = link->network->dhcp_hostname;
+ if (!hostname)
+ (void) sd_dhcp_lease_get_hostname(link->dhcp_lease, &hostname);
+
+ if (!hostname)
+ return 0;
+
+ /* If a hostname was set due to the lease, then unset it now. */
+ r = manager_set_hostname(link->manager, NULL);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP error: Failed to reset transient hostname: %m");
+
+ return 0;
+}
+
+static int dhcp4_remove_route_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(m);
+ assert(link);
+ assert(link->dhcp4_remove_messages > 0);
+
+ link->dhcp4_remove_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -ESRCH)
+ log_link_message_warning_errno(link, m, r, "Failed to remove DHCPv4 route, ignoring");
+
+ if (link->dhcp4_remove_messages == 0) {
+ r = dhcp4_update_address(link, false);
+ if (r < 0)
+ link_enter_failed(link);
+ }
+
+ return 1;
+}
+
+static int dhcp4_remove_address_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(m);
+ assert(link);
+ assert(link->dhcp4_remove_messages > 0);
+
+ link->dhcp4_remove_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EADDRNOTAVAIL)
+ log_link_message_warning_errno(link, m, r, "Failed to remove DHCPv4 address, ignoring");
+ else
+ (void) manager_rtnl_process_address(rtnl, m, link->manager);
+
+ if (link->dhcp4_remove_messages == 0) {
+ r = dhcp4_update_address(link, false);
+ if (r < 0)
+ link_enter_failed(link);
+ }
+
+ return 1;
+}
+
+static int dhcp4_remove_all(Link *link) {
+ Route *route;
+ int k, r = 0;
+
+ assert(link);
+
+ SET_FOREACH(route, link->dhcp_routes) {
+ k = route_remove(route, NULL, link, dhcp4_remove_route_handler);
+ if (k < 0)
+ r = k;
+ else
+ link->dhcp4_remove_messages++;
+ }
+
+ if (link->dhcp_address) {
+ k = address_remove(link->dhcp_address, link, dhcp4_remove_address_handler);
+ if (k < 0)
+ r = k;
+ else
+ link->dhcp4_remove_messages++;
+ }
+
+ return r;
+}
+
+static int dhcp_lease_lost(Link *link) {
+ int k, r = 0;
+
+ assert(link);
+ assert(link->dhcp_lease);
+
+ log_link_info(link, "DHCP lease lost");
+
+ link->dhcp4_configured = false;
+
+ /* dhcp_lease_lost() may be called during renewing IP address. */
+ k = dhcp4_release_old_lease(link);
+ if (k < 0)
+ r = k;
+
+ k = dhcp4_remove_all(link);
+ if (k < 0)
+ r = k;
+
+ k = dhcp_reset_mtu(link);
+ if (k < 0)
+ r = k;
+
+ k = dhcp_reset_hostname(link);
+ if (k < 0)
+ r = k;
+
+ link->dhcp_lease = sd_dhcp_lease_unref(link->dhcp_lease);
+ link_dirty(link);
+
+ (void) sd_ipv4acd_stop(link->dhcp_acd);
+
+ return r;
+}
+
+static void dhcp_address_on_acd(sd_ipv4acd *acd, int event, void *userdata) {
+ _cleanup_free_ char *pretty = NULL;
+ union in_addr_union address = {};
+ Link *link;
+ int r;
+
+ assert(acd);
+ assert(userdata);
+
+ link = userdata;
+
+ switch (event) {
+ case SD_IPV4ACD_EVENT_STOP:
+ log_link_debug(link, "Stopping ACD client for DHCP4...");
+ return;
+
+ case SD_IPV4ACD_EVENT_BIND:
+ if (DEBUG_LOGGING) {
+ (void) sd_dhcp_lease_get_address(link->dhcp_lease, &address.in);
+ (void) in_addr_to_string(AF_INET, &address, &pretty);
+ log_link_debug(link, "Successfully claimed DHCP4 address %s", strna(pretty));
+ }
+ link->dhcp4_address_bind = true;
+ dhcp4_check_ready(link);
+ break;
+
+ case SD_IPV4ACD_EVENT_CONFLICT:
+ (void) sd_dhcp_lease_get_address(link->dhcp_lease, &address.in);
+ (void) in_addr_to_string(AF_INET, &address, &pretty);
+ log_link_warning(link, "DAD conflict. Dropping DHCP4 address %s", strna(pretty));
+
+ r = sd_dhcp_client_send_decline(link->dhcp_client);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Failed to send DHCP DECLINE, ignoring: %m");
+
+ if (link->dhcp_lease) {
+ r = dhcp_lease_lost(link);
+ if (r < 0)
+ link_enter_failed(link);
+ }
+ break;
+
+ default:
+ assert_not_reached("Invalid IPv4ACD event.");
+ }
+
+ (void) sd_ipv4acd_stop(acd);
+
+ return;
+}
+
+static int dhcp4_configure_dad(Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->network);
+
+ if (!link->network->dhcp_send_decline)
+ return 0;
+
+ if (!link->dhcp_acd) {
+ r = sd_ipv4acd_new(&link->dhcp_acd);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4acd_attach_event(link->dhcp_acd, link->manager->event, 0);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_ipv4acd_set_ifindex(link->dhcp_acd, link->ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4acd_set_mac(link->dhcp_acd, &link->hw_addr.addr.ether);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int dhcp4_dad_update_mac(Link *link) {
+ bool running;
+ int r;
+
+ assert(link);
+
+ if (!link->dhcp_acd)
+ return 0;
+
+ running = sd_ipv4acd_is_running(link->dhcp_acd);
+
+ r = sd_ipv4acd_stop(link->dhcp_acd);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4acd_set_mac(link->dhcp_acd, &link->hw_addr.addr.ether);
+ if (r < 0)
+ return r;
+
+ if (running) {
+ r = sd_ipv4acd_start(link->dhcp_acd, true);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int dhcp4_start_acd(Link *link) {
+ union in_addr_union addr;
+ struct in_addr old;
+ int r;
+
+ if (!link->network->dhcp_send_decline)
+ return 0;
+
+ if (!link->dhcp_lease)
+ return 0;
+
+ (void) sd_ipv4acd_stop(link->dhcp_acd);
+
+ link->dhcp4_address_bind = false;
+
+ r = sd_dhcp_lease_get_address(link->dhcp_lease, &addr.in);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4acd_get_address(link->dhcp_acd, &old);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4acd_set_address(link->dhcp_acd, &addr.in);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4acd_set_callback(link->dhcp_acd, dhcp_address_on_acd, link);
+ if (r < 0)
+ return r;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *pretty = NULL;
+
+ (void) in_addr_to_string(AF_INET, &addr, &pretty);
+ log_link_debug(link, "Starting IPv4ACD client. Probing DHCPv4 address %s", strna(pretty));
+ }
+
+ r = sd_ipv4acd_start(link->dhcp_acd, !in4_addr_equal(&addr.in, &old));
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int dhcp4_address_ready_callback(Address *address) {
+ Link *link;
+ int r;
+
+ assert(address);
+
+ link = address->link;
+
+ /* Do not call this again. */
+ address->callback = NULL;
+
+ r = link_set_dhcp_routes(link);
+ if (r < 0)
+ return r;
+
+ /* Reconfigure static routes as kernel may remove some routes when lease expires. */
+ r = link_set_routes(link);
+ if (r < 0)
+ return r;
+
+ r = dhcp4_start_acd(link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to start IPv4ACD for DHCP4 address: %m");
+
+ dhcp4_check_ready(link);
+ return 0;
+}
+
+static int dhcp4_address_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_message_warning_errno(link, m, r, "Could not set DHCPv4 address");
+ link_enter_failed(link);
+ return 1;
+ } else if (r >= 0)
+ (void) manager_rtnl_process_address(rtnl, m, link->manager);
+
+ if (address_is_ready(link->dhcp_address)) {
+ r = dhcp4_address_ready_callback(link->dhcp_address);
+ if (r < 0) {
+ link_enter_failed(link);
+ return 1;
+ }
+ } else
+ link->dhcp_address->callback = dhcp4_address_ready_callback;
+
+ return 1;
+}
+
+static int dhcp4_update_address(Link *link, bool announce) {
+ _cleanup_(address_freep) Address *addr = NULL;
+ uint32_t lifetime = CACHE_INFO_INFINITY_LIFE_TIME;
+ struct in_addr address, netmask;
+ unsigned prefixlen;
+ Address *ret;
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ if (!link->dhcp_lease)
+ return 0;
+
+ link_set_state(link, LINK_STATE_CONFIGURING);
+ link->dhcp4_configured = false;
+
+ /* address_handler calls link_set_routes() and link_set_nexthop(). Before they are called, the
+ * related flags must be cleared. Otherwise, the link becomes configured state before routes
+ * are configured. */
+ link->static_routes_configured = false;
+ link->static_nexthops_configured = false;
+
+ r = sd_dhcp_lease_get_address(link->dhcp_lease, &address);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "DHCP error: no address: %m");
+
+ r = sd_dhcp_lease_get_netmask(link->dhcp_lease, &netmask);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "DHCP error: no netmask: %m");
+
+ if (!FLAGS_SET(link->network->keep_configuration, KEEP_CONFIGURATION_DHCP)) {
+ r = sd_dhcp_lease_get_lifetime(link->dhcp_lease, &lifetime);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "DHCP error: no lifetime: %m");
+ }
+
+ prefixlen = in4_addr_netmask_to_prefixlen(&netmask);
+
+ if (announce) {
+ const struct in_addr *router;
+
+ r = sd_dhcp_lease_get_router(link->dhcp_lease, &router);
+ if (r < 0 && r != -ENODATA)
+ return log_link_error_errno(link, r, "DHCP error: Could not get gateway: %m");
+
+ if (r > 0 && !in4_addr_is_null(&router[0]))
+ log_struct(LOG_INFO,
+ LOG_LINK_INTERFACE(link),
+ LOG_LINK_MESSAGE(link, "DHCPv4 address "IPV4_ADDRESS_FMT_STR"/%u via "IPV4_ADDRESS_FMT_STR,
+ IPV4_ADDRESS_FMT_VAL(address),
+ prefixlen,
+ IPV4_ADDRESS_FMT_VAL(router[0])),
+ "ADDRESS="IPV4_ADDRESS_FMT_STR, IPV4_ADDRESS_FMT_VAL(address),
+ "PREFIXLEN=%u", prefixlen,
+ "GATEWAY="IPV4_ADDRESS_FMT_STR, IPV4_ADDRESS_FMT_VAL(router[0]));
+ else
+ log_struct(LOG_INFO,
+ LOG_LINK_INTERFACE(link),
+ LOG_LINK_MESSAGE(link, "DHCPv4 address "IPV4_ADDRESS_FMT_STR"/%u",
+ IPV4_ADDRESS_FMT_VAL(address),
+ prefixlen),
+ "ADDRESS="IPV4_ADDRESS_FMT_STR, IPV4_ADDRESS_FMT_VAL(address),
+ "PREFIXLEN=%u", prefixlen);
+ }
+
+ r = address_new(&addr);
+ if (r < 0)
+ return log_oom();
+
+ addr->family = AF_INET;
+ addr->in_addr.in.s_addr = address.s_addr;
+ addr->cinfo.ifa_prefered = lifetime;
+ addr->cinfo.ifa_valid = lifetime;
+ addr->prefixlen = prefixlen;
+ if (prefixlen <= 30)
+ addr->broadcast.s_addr = address.s_addr | ~netmask.s_addr;
+ SET_FLAG(addr->flags, IFA_F_NOPREFIXROUTE, !link_prefixroute(link));
+
+ /* allow reusing an existing address and simply update its lifetime
+ * in case it already exists */
+ r = address_configure(addr, link, dhcp4_address_handler, true, &ret);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to set DHCPv4 address: %m");
+
+ if (!address_equal(link->dhcp_address, ret))
+ link->dhcp_address_old = link->dhcp_address;
+ link->dhcp_address = ret;
+
+ return 0;
+}
+
+static int dhcp_lease_renew(sd_dhcp_client *client, Link *link) {
+ sd_dhcp_lease *lease;
+ int r;
+
+ assert(link);
+ assert(client);
+
+ r = sd_dhcp_client_get_lease(client, &lease);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "DHCP error: no lease: %m");
+
+ sd_dhcp_lease_unref(link->dhcp_lease);
+ link->dhcp_lease = sd_dhcp_lease_ref(lease);
+ link_dirty(link);
+
+ return dhcp4_update_address(link, false);
+}
+
+static int dhcp_lease_acquired(sd_dhcp_client *client, Link *link) {
+ sd_dhcp_lease *lease;
+ int r;
+
+ assert(client);
+ assert(link);
+
+ r = sd_dhcp_client_get_lease(client, &lease);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP error: No lease: %m");
+
+ sd_dhcp_lease_unref(link->dhcp_lease);
+ link->dhcp_lease = sd_dhcp_lease_ref(lease);
+ link_dirty(link);
+
+ if (link->network->dhcp_use_mtu) {
+ uint16_t mtu;
+
+ r = sd_dhcp_lease_get_mtu(lease, &mtu);
+ if (r >= 0) {
+ r = link_set_mtu(link, mtu);
+ if (r < 0)
+ log_link_error_errno(link, r, "Failed to set MTU to %" PRIu16 ": %m", mtu);
+ }
+ }
+
+ if (link->network->dhcp_use_hostname) {
+ const char *dhcpname = NULL;
+ _cleanup_free_ char *hostname = NULL;
+
+ if (link->network->dhcp_hostname)
+ dhcpname = link->network->dhcp_hostname;
+ else
+ (void) sd_dhcp_lease_get_hostname(lease, &dhcpname);
+
+ if (dhcpname) {
+ r = shorten_overlong(dhcpname, &hostname);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Unable to shorten overlong DHCP hostname '%s', ignoring: %m", dhcpname);
+ if (r == 1)
+ log_link_notice(link, "Overlong DHCP hostname received, shortened from '%s' to '%s'", dhcpname, hostname);
+ }
+
+ if (hostname) {
+ r = manager_set_hostname(link->manager, hostname);
+ if (r < 0)
+ log_link_error_errno(link, r, "Failed to set transient hostname to '%s': %m", hostname);
+ }
+ }
+
+ if (link->network->dhcp_use_timezone) {
+ const char *tz = NULL;
+
+ (void) sd_dhcp_lease_get_timezone(link->dhcp_lease, &tz);
+
+ if (tz) {
+ r = manager_set_timezone(link->manager, tz);
+ if (r < 0)
+ log_link_error_errno(link, r, "Failed to set timezone to '%s': %m", tz);
+ }
+ }
+
+ if (link->dhcp4_remove_messages == 0) {
+ r = dhcp4_update_address(link, true);
+ if (r < 0)
+ return r;
+ } else
+ log_link_debug(link,
+ "The link has previously assigned DHCPv4 address or routes. "
+ "The newly assigned address and routes will set up after old ones are removed.");
+
+ return 0;
+}
+
+static int dhcp_lease_ip_change(sd_dhcp_client *client, Link *link) {
+ int r;
+
+ r = dhcp_lease_acquired(client, link);
+ if (r < 0)
+ (void) dhcp_lease_lost(link);
+
+ return r;
+}
+
+static int dhcp_server_is_deny_listed(Link *link, sd_dhcp_client *client) {
+ sd_dhcp_lease *lease;
+ struct in_addr addr;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(client);
+
+ r = sd_dhcp_client_get_lease(client, &lease);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get DHCP lease: %m");
+
+ r = sd_dhcp_lease_get_server_identifier(lease, &addr);
+ if (r < 0)
+ return log_link_debug_errno(link, r, "Failed to get DHCP server IP address: %m");
+
+ if (set_contains(link->network->dhcp_deny_listed_ip, UINT32_TO_PTR(addr.s_addr))) {
+ log_struct(LOG_DEBUG,
+ LOG_LINK_INTERFACE(link),
+ LOG_LINK_MESSAGE(link, "DHCPv4 server IP address "IPV4_ADDRESS_FMT_STR" found in deny-list, ignoring offer",
+ IPV4_ADDRESS_FMT_VAL(addr)));
+ return true;
+ }
+
+ return false;
+}
+
+static int dhcp_server_is_allow_listed(Link *link, sd_dhcp_client *client) {
+ sd_dhcp_lease *lease;
+ struct in_addr addr;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(client);
+
+ r = sd_dhcp_client_get_lease(client, &lease);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get DHCP lease: %m");
+
+ r = sd_dhcp_lease_get_server_identifier(lease, &addr);
+ if (r < 0)
+ return log_link_debug_errno(link, r, "Failed to get DHCP server IP address: %m");
+
+ if (set_contains(link->network->dhcp_allow_listed_ip, UINT32_TO_PTR(addr.s_addr))) {
+ log_struct(LOG_DEBUG,
+ LOG_LINK_INTERFACE(link),
+ LOG_LINK_MESSAGE(link, "DHCPv4 server IP address "IPV4_ADDRESS_FMT_STR" found in allow-list, accepting offer",
+ IPV4_ADDRESS_FMT_VAL(addr)));
+ return true;
+ }
+
+ return false;
+}
+
+static int dhcp4_handler(sd_dhcp_client *client, int event, void *userdata) {
+ Link *link = userdata;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->manager);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 0;
+
+ switch (event) {
+ case SD_DHCP_CLIENT_EVENT_STOP:
+
+ if (link_ipv4ll_enabled(link, ADDRESS_FAMILY_FALLBACK_IPV4)) {
+ assert(link->ipv4ll);
+
+ log_link_debug(link, "DHCP client is stopped. Acquiring IPv4 link-local address");
+
+ r = sd_ipv4ll_start(link->ipv4ll);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not acquire IPv4 link-local address: %m");
+ }
+
+ if (FLAGS_SET(link->network->keep_configuration, KEEP_CONFIGURATION_DHCP)) {
+ log_link_notice(link, "DHCPv4 connection considered critical, ignoring request to reconfigure it.");
+ return 0;
+ }
+
+ if (link->dhcp_lease) {
+ if (link->network->dhcp_send_release) {
+ r = sd_dhcp_client_send_release(client);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Failed to send DHCP RELEASE, ignoring: %m");
+ }
+
+ r = dhcp_lease_lost(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+ }
+
+ break;
+ case SD_DHCP_CLIENT_EVENT_EXPIRED:
+ if (FLAGS_SET(link->network->keep_configuration, KEEP_CONFIGURATION_DHCP)) {
+ log_link_notice(link, "DHCPv4 connection considered critical, ignoring request to reconfigure it.");
+ return 0;
+ }
+
+ if (link->dhcp_lease) {
+ r = dhcp_lease_lost(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+ }
+
+ break;
+ case SD_DHCP_CLIENT_EVENT_IP_CHANGE:
+ if (FLAGS_SET(link->network->keep_configuration, KEEP_CONFIGURATION_DHCP)) {
+ log_link_notice(link, "DHCPv4 connection considered critical, ignoring request to reconfigure it.");
+ return 0;
+ }
+
+ r = dhcp_lease_ip_change(client, link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+
+ break;
+ case SD_DHCP_CLIENT_EVENT_RENEW:
+ r = dhcp_lease_renew(client, link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+ break;
+ case SD_DHCP_CLIENT_EVENT_IP_ACQUIRE:
+ r = dhcp_lease_acquired(client, link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+ break;
+ case SD_DHCP_CLIENT_EVENT_SELECTING:
+ if (!set_isempty(link->network->dhcp_allow_listed_ip)) {
+ r = dhcp_server_is_allow_listed(link, client);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENOMSG;
+ } else {
+ r = dhcp_server_is_deny_listed(link, client);
+ if (r < 0)
+ return r;
+ if (r != 0)
+ return -ENOMSG;
+ }
+ break;
+ default:
+ if (event < 0)
+ log_link_warning_errno(link, event, "DHCP error: Client failed: %m");
+ else
+ log_link_warning(link, "DHCP unknown event: %i", event);
+ break;
+ }
+
+ return 0;
+}
+
+static int dhcp4_set_hostname(Link *link) {
+ _cleanup_free_ char *hostname = NULL;
+ const char *hn;
+ int r;
+
+ assert(link);
+
+ if (!link->network->dhcp_send_hostname)
+ hn = NULL;
+ else if (link->network->dhcp_hostname)
+ hn = link->network->dhcp_hostname;
+ else {
+ r = gethostname_strict(&hostname);
+ if (r < 0 && r != -ENXIO) /* ENXIO: no hostname set or hostname is "localhost" */
+ return r;
+
+ hn = hostname;
+ }
+
+ r = sd_dhcp_client_set_hostname(link->dhcp_client, hn);
+ if (r == -EINVAL && hostname)
+ /* Ignore error when the machine's hostname is not suitable to send in DHCP packet. */
+ log_link_warning_errno(link, r, "DHCP4 CLIENT: Failed to set hostname from kernel hostname, ignoring: %m");
+ else if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set hostname: %m");
+
+ return 0;
+}
+
+static bool promote_secondaries_enabled(const char *ifname) {
+ _cleanup_free_ char *promote_secondaries_sysctl = NULL;
+ char *promote_secondaries_path;
+ int r;
+
+ promote_secondaries_path = strjoina("net/ipv4/conf/", ifname, "/promote_secondaries");
+ r = sysctl_read(promote_secondaries_path, &promote_secondaries_sysctl);
+ if (r < 0) {
+ log_debug_errno(r, "Cannot read sysctl %s", promote_secondaries_path);
+ return false;
+ }
+
+ truncate_nl(promote_secondaries_sysctl);
+ r = parse_boolean(promote_secondaries_sysctl);
+ if (r < 0)
+ log_warning_errno(r, "Cannot parse sysctl %s with content %s as boolean", promote_secondaries_path, promote_secondaries_sysctl);
+ return r > 0;
+}
+
+/* dhcp4_set_promote_secondaries will ensure this interface has
+ * the "promote_secondaries" option in the kernel set. If this sysctl
+ * is not set DHCP will work only as long as the IP address does not
+ * changes between leases. The kernel will remove all secondary IP
+ * addresses of an interface otherwise. The way systemd-network works
+ * is that the new IP of a lease is added as a secondary IP and when
+ * the primary one expires it relies on the kernel to promote the
+ * secondary IP. See also https://github.com/systemd/systemd/issues/7163
+ */
+static int dhcp4_set_promote_secondaries(Link *link) {
+ int r;
+
+ assert(link);
+
+ /* check if the kernel has promote_secondaries enabled for our
+ * interface. If it is not globally enabled or enabled for the
+ * specific interface we must either enable it.
+ */
+ if (!(promote_secondaries_enabled("all") || promote_secondaries_enabled(link->ifname))) {
+ char *promote_secondaries_path = NULL;
+
+ log_link_debug(link, "promote_secondaries is unset, setting it");
+ promote_secondaries_path = strjoina("net/ipv4/conf/", link->ifname, "/promote_secondaries");
+ r = sysctl_write(promote_secondaries_path, "1");
+ if (r < 0)
+ log_link_warning_errno(link, r, "cannot set sysctl %s to 1", promote_secondaries_path);
+ return r > 0;
+ }
+
+ return 0;
+}
+
+static int dhcp4_set_client_identifier(Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->dhcp_client);
+
+ switch (link->network->dhcp_client_identifier) {
+ case DHCP_CLIENT_ID_DUID: {
+ /* If configured, apply user specified DUID and IAID */
+ const DUID *duid = link_get_duid(link);
+
+ if (duid->type == DUID_TYPE_LLT && duid->raw_data_len == 0)
+ r = sd_dhcp_client_set_iaid_duid_llt(link->dhcp_client,
+ link->network->iaid_set,
+ link->network->iaid,
+ duid->llt_time);
+ else
+ r = sd_dhcp_client_set_iaid_duid(link->dhcp_client,
+ link->network->iaid_set,
+ link->network->iaid,
+ duid->type,
+ duid->raw_data_len > 0 ? duid->raw_data : NULL,
+ duid->raw_data_len);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set IAID+DUID: %m");
+ break;
+ }
+ case DHCP_CLIENT_ID_DUID_ONLY: {
+ /* If configured, apply user specified DUID */
+ const DUID *duid = link_get_duid(link);
+
+ if (duid->type == DUID_TYPE_LLT && duid->raw_data_len == 0)
+ r = sd_dhcp_client_set_duid_llt(link->dhcp_client,
+ duid->llt_time);
+ else
+ r = sd_dhcp_client_set_duid(link->dhcp_client,
+ duid->type,
+ duid->raw_data_len > 0 ? duid->raw_data : NULL,
+ duid->raw_data_len);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set DUID: %m");
+ break;
+ }
+ case DHCP_CLIENT_ID_MAC: {
+ const uint8_t *hw_addr = link->hw_addr.addr.bytes;
+ size_t hw_addr_len = link->hw_addr.length;
+
+ if (link->iftype == ARPHRD_INFINIBAND && hw_addr_len == INFINIBAND_ALEN) {
+ /* set_client_id expects only last 8 bytes of an IB address */
+ hw_addr += INFINIBAND_ALEN - 8;
+ hw_addr_len -= INFINIBAND_ALEN - 8;
+ }
+
+ r = sd_dhcp_client_set_client_id(link->dhcp_client,
+ link->iftype,
+ hw_addr,
+ hw_addr_len);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set client ID: %m");
+ break;
+ }
+ default:
+ assert_not_reached("Unknown client identifier type.");
+ }
+
+ return 0;
+}
+
+static int dhcp4_init(Link *link) {
+ int r;
+
+ assert(link);
+
+ if (link->dhcp_client)
+ return 0;
+
+ r = sd_dhcp_client_new(&link->dhcp_client, link->network->dhcp_anonymize);
+ if (r < 0)
+ return r;
+
+ r = sd_dhcp_client_attach_event(link->dhcp_client, link->manager->event, 0);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int dhcp4_configure(Link *link) {
+ sd_dhcp_option *send_option;
+ void *request_options;
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ if (!link_dhcp4_enabled(link))
+ return 0;
+
+ r = dhcp4_set_promote_secondaries(link);
+ if (r < 0)
+ return r;
+
+ r = dhcp4_init(link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to initialize DHCP4 client: %m");
+
+ r = sd_dhcp_client_set_mac(link->dhcp_client,
+ link->hw_addr.addr.bytes,
+ link->bcast_addr.length > 0 ? link->bcast_addr.addr.bytes : NULL,
+ link->hw_addr.length, link->iftype);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set MAC address: %m");
+
+ r = sd_dhcp_client_set_ifindex(link->dhcp_client, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set ifindex: %m");
+
+ r = sd_dhcp_client_set_callback(link->dhcp_client, dhcp4_handler, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set callback: %m");
+
+ r = sd_dhcp_client_set_request_broadcast(link->dhcp_client,
+ link->network->dhcp_broadcast);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set request flag for broadcast: %m");
+
+ if (link->mtu) {
+ r = sd_dhcp_client_set_mtu(link->dhcp_client, link->mtu);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set MTU: %m");
+ }
+
+ if (link->network->dhcp_use_mtu) {
+ r = sd_dhcp_client_set_request_option(link->dhcp_client,
+ SD_DHCP_OPTION_INTERFACE_MTU);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set request flag for MTU: %m");
+ }
+
+ /* NOTE: even if this variable is called "use", it also "sends" PRL
+ * options, maybe there should be a different configuration variable
+ * to send or not route options?. */
+ /* NOTE: when using Anonymize=yes, routes PRL options are sent
+ * by default, so they don't need to be added here. */
+ if (link->network->dhcp_use_routes && !link->network->dhcp_anonymize) {
+ r = sd_dhcp_client_set_request_option(link->dhcp_client,
+ SD_DHCP_OPTION_STATIC_ROUTE);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set request flag for static route: %m");
+
+ r = sd_dhcp_client_set_request_option(link->dhcp_client,
+ SD_DHCP_OPTION_CLASSLESS_STATIC_ROUTE);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set request flag for classless static route: %m");
+ }
+
+ if (link->network->dhcp_use_domains != DHCP_USE_DOMAINS_NO && !link->network->dhcp_anonymize) {
+ r = sd_dhcp_client_set_request_option(link->dhcp_client, SD_DHCP_OPTION_DOMAIN_SEARCH_LIST);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set request flag for domain search list: %m");
+ }
+
+ if (link->network->dhcp_use_ntp) {
+ r = sd_dhcp_client_set_request_option(link->dhcp_client, SD_DHCP_OPTION_NTP_SERVER);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set request flag for NTP server: %m");
+ }
+
+ if (link->network->dhcp_use_sip) {
+ r = sd_dhcp_client_set_request_option(link->dhcp_client, SD_DHCP_OPTION_SIP_SERVER);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set request flag for SIP server: %m");
+ }
+
+ if (link->network->dhcp_use_timezone) {
+ r = sd_dhcp_client_set_request_option(link->dhcp_client, SD_DHCP_OPTION_NEW_TZDB_TIMEZONE);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set request flag for timezone: %m");
+ }
+
+ SET_FOREACH(request_options, link->network->dhcp_request_options) {
+ uint32_t option = PTR_TO_UINT32(request_options);
+
+ r = sd_dhcp_client_set_request_option(link->dhcp_client, option);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set request flag for '%u': %m", option);
+ }
+
+ ORDERED_HASHMAP_FOREACH(send_option, link->network->dhcp_client_send_options) {
+ r = sd_dhcp_client_add_option(link->dhcp_client, send_option);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set send option: %m");
+ }
+
+ ORDERED_HASHMAP_FOREACH(send_option, link->network->dhcp_client_send_vendor_options) {
+ r = sd_dhcp_client_add_vendor_option(link->dhcp_client, send_option);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set send option: %m");
+ }
+
+ r = dhcp4_set_hostname(link);
+ if (r < 0)
+ return r;
+
+ if (link->network->dhcp_vendor_class_identifier) {
+ r = sd_dhcp_client_set_vendor_class_identifier(link->dhcp_client,
+ link->network->dhcp_vendor_class_identifier);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set vendor class identifier: %m");
+ }
+
+ if (link->network->dhcp_mudurl) {
+ r = sd_dhcp_client_set_mud_url(link->dhcp_client,
+ link->network->dhcp_mudurl);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set MUD URL: %m");
+ }
+
+ if (link->network->dhcp_user_class) {
+ r = sd_dhcp_client_set_user_class(link->dhcp_client, link->network->dhcp_user_class);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set user class: %m");
+ }
+
+ if (link->network->dhcp_client_port) {
+ r = sd_dhcp_client_set_client_port(link->dhcp_client, link->network->dhcp_client_port);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set listen port: %m");
+ }
+
+ if (link->network->dhcp_max_attempts > 0) {
+ r = sd_dhcp_client_set_max_attempts(link->dhcp_client, link->network->dhcp_max_attempts);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set max attempts: %m");
+ }
+
+ if (link->network->dhcp_ip_service_type > 0) {
+ r = sd_dhcp_client_set_service_type(link->dhcp_client, link->network->dhcp_ip_service_type);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to set IP service type: %m");
+ }
+
+ if (link->network->dhcp_fallback_lease_lifetime > 0) {
+ r = sd_dhcp_client_set_fallback_lease_lifetime(link->dhcp_client, link->network->dhcp_fallback_lease_lifetime);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed set to lease lifetime: %m");
+ }
+
+ r = dhcp4_configure_dad(link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP4 CLIENT: Failed to configure service type: %m");
+
+ return dhcp4_set_client_identifier(link);
+}
+
+int dhcp4_update_mac(Link *link) {
+ int r;
+
+ assert(link);
+
+ if (!link->dhcp_client)
+ return 0;
+
+ r = sd_dhcp_client_set_mac(link->dhcp_client, link->hw_addr.addr.bytes,
+ link->bcast_addr.length > 0 ? link->bcast_addr.addr.bytes : NULL,
+ link->hw_addr.length, link->iftype);
+ if (r < 0)
+ return r;
+
+ r = dhcp4_set_client_identifier(link);
+ if (r < 0)
+ return r;
+
+ r = dhcp4_dad_update_mac(link);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int link_deserialize_dhcp4(Link *link, const char *dhcp4_address) {
+ union in_addr_union address;
+ int r;
+
+ assert(link);
+
+ if (isempty(dhcp4_address))
+ return 0;
+
+ r = in_addr_from_string(AF_INET, dhcp4_address, &address);
+ if (r < 0)
+ return log_link_debug_errno(link, r, "Failed to parse DHCPv4 address: %s", dhcp4_address);
+
+ r = dhcp4_init(link);
+ if (r < 0)
+ return log_link_debug_errno(link, r, "Failed to initialize DHCPv4 client: %m");
+
+ r = sd_dhcp_client_set_request_address(link->dhcp_client, &address.in);
+ if (r < 0)
+ return log_link_debug_errno(link, r, "Failed to set initial DHCPv4 address %s: %m", dhcp4_address);
+
+ return 0;
+}
+
+int config_parse_dhcp_max_attempts(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = data;
+ uint64_t a;
+ int r;
+
+ assert(network);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ network->dhcp_max_attempts = 0;
+ return 0;
+ }
+
+ if (streq(rvalue, "infinity")) {
+ network->dhcp_max_attempts = (uint64_t) -1;
+ return 0;
+ }
+
+ r = safe_atou64(rvalue, &a);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse DHCP maximum attempts, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (a == 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "%s= must be positive integer or 'infinity', ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ network->dhcp_max_attempts = a;
+
+ return 0;
+}
+
+int config_parse_dhcp_acl_ip_address(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = data;
+ Set **acl;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ acl = STR_IN_SET(lvalue, "DenyList", "BlackList") ? &network->dhcp_deny_listed_ip : &network->dhcp_allow_listed_ip;
+
+ if (isempty(rvalue)) {
+ *acl = set_free(*acl);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *n = NULL;
+ union in_addr_union ip;
+
+ r = extract_first_word(&p, &n, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse DHCP '%s=' IP address, ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ r = in_addr_from_string(AF_INET, n, &ip);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "DHCP '%s=' IP address is invalid, ignoring assignment: %s", lvalue, n);
+ continue;
+ }
+
+ r = set_ensure_put(acl, NULL, UINT32_TO_PTR(ip.in.s_addr));
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to store DHCP '%s=' IP address '%s', ignoring assignment: %m", lvalue, n);
+ }
+}
+
+int config_parse_dhcp_ip_service_type(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (streq(rvalue, "CS4"))
+ *((int *)data) = IPTOS_CLASS_CS4;
+ else if (streq(rvalue, "CS6"))
+ *((int *)data) = IPTOS_CLASS_CS6;
+ else
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to parse IPServiceType type '%s', ignoring.", rvalue);
+
+ return 0;
+}
+
+int config_parse_dhcp_mud_url(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *unescaped = NULL;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ network->dhcp_mudurl = mfree(network->dhcp_mudurl);
+ return 0;
+ }
+
+ r = cunescape(rvalue, 0, &unescaped);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to Failed to unescape MUD URL, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (!http_url_is_valid(unescaped) || strlen(unescaped) > 255) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to parse MUD URL '%s', ignoring: %m", rvalue);
+
+ return 0;
+ }
+
+ return free_and_strdup_warn(&network->dhcp_mudurl, unescaped);
+}
+
+int config_parse_dhcp_fallback_lease_lifetime(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ Network *network = userdata;
+ uint32_t k;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ network->dhcp_fallback_lease_lifetime = 0;
+ return 0;
+ }
+
+ /* We accept only "forever" or "infinity". */
+ if (STR_IN_SET(rvalue, "forever", "infinity"))
+ k = CACHE_INFO_INFINITY_LIFE_TIME;
+ else {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid LeaseLifetime= value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ network->dhcp_fallback_lease_lifetime = k;
+
+ return 0;
+}
+
+static const char* const dhcp_client_identifier_table[_DHCP_CLIENT_ID_MAX] = {
+ [DHCP_CLIENT_ID_MAC] = "mac",
+ [DHCP_CLIENT_ID_DUID] = "duid",
+ [DHCP_CLIENT_ID_DUID_ONLY] = "duid-only",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(dhcp_client_identifier, DHCPClientIdentifier);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_dhcp_client_identifier, dhcp_client_identifier, DHCPClientIdentifier,
+ "Failed to parse client identifier type");
diff --git a/src/network/networkd-dhcp4.h b/src/network/networkd-dhcp4.h
new file mode 100644
index 0000000..daab5b1
--- /dev/null
+++ b/src/network/networkd-dhcp4.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "conf-parser.h"
+
+typedef struct Link Link;
+
+typedef enum DHCPClientIdentifier {
+ DHCP_CLIENT_ID_MAC,
+ DHCP_CLIENT_ID_DUID,
+ /* The following option may not be good for RFC regarding DHCP (3315 and 4361).
+ * But some setups require this. E.g., Sky Broadband, the second largest provider in the UK
+ * requires the client id to be set to a custom string, reported at
+ * https://github.com/systemd/systemd/issues/7828 */
+ DHCP_CLIENT_ID_DUID_ONLY,
+ _DHCP_CLIENT_ID_MAX,
+ _DHCP_CLIENT_ID_INVALID = -1,
+} DHCPClientIdentifier;
+
+int dhcp4_configure(Link *link);
+int dhcp4_update_mac(Link *link);
+
+int link_deserialize_dhcp4(Link *link, const char *dhcp4_address);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_client_identifier);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_acl_ip_address);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_max_attempts);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_ip_service_type);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_mud_url);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp_fallback_lease_lifetime);
diff --git a/src/network/networkd-dhcp6.c b/src/network/networkd-dhcp6.c
new file mode 100644
index 0000000..d4d4182
--- /dev/null
+++ b/src/network/networkd-dhcp6.c
@@ -0,0 +1,1719 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <netinet/in.h>
+#include <linux/if.h>
+#include <linux/if_arp.h>
+
+#include "sd-dhcp6-client.h"
+
+#include "escape.h"
+#include "hashmap.h"
+#include "hostname-util.h"
+#include "missing_network.h"
+#include "network-internal.h"
+#include "networkd-address.h"
+#include "networkd-dhcp6.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "networkd-radv.h"
+#include "siphash24.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "radv-internal.h"
+#include "web-util.h"
+
+bool link_dhcp6_pd_is_enabled(Link *link) {
+ assert(link);
+
+ if (!link->network)
+ return false;
+
+ return link->network->dhcp6_pd;
+}
+
+static bool dhcp6_lease_has_pd_prefix(sd_dhcp6_lease *lease) {
+ uint32_t lifetime_preferred, lifetime_valid;
+ union in_addr_union pd_prefix;
+ uint8_t pd_prefix_len;
+
+ if (!lease)
+ return false;
+
+ sd_dhcp6_lease_reset_pd_prefix_iter(lease);
+
+ return sd_dhcp6_lease_get_pd(lease, &pd_prefix.in6, &pd_prefix_len, &lifetime_preferred, &lifetime_valid) >= 0;
+}
+
+DHCP6DelegatedPrefix *dhcp6_pd_free(DHCP6DelegatedPrefix *p) {
+ if (!p)
+ return NULL;
+
+ if (p->link && p->link->manager) {
+ hashmap_remove(p->link->manager->dhcp6_prefixes, &p->prefix);
+ set_remove(p->link->manager->dhcp6_pd_prefixes, p);
+ }
+
+ link_unref(p->link);
+ return mfree(p);
+}
+
+static void dhcp6_pd_hash_func(const DHCP6DelegatedPrefix *p, struct siphash *state) {
+ assert(p);
+
+ siphash24_compress(&p->pd_prefix, sizeof(p->pd_prefix), state);
+ siphash24_compress(&p->link, sizeof(p->link), state);
+}
+
+static int dhcp6_pd_compare_func(const DHCP6DelegatedPrefix *a, const DHCP6DelegatedPrefix *b) {
+ int r;
+
+ r = memcmp(&a->pd_prefix, &b->pd_prefix, sizeof(a->pd_prefix));
+ if (r != 0)
+ return r;
+
+ return CMP(a->link, b->link);
+}
+
+DEFINE_HASH_OPS(dhcp6_pd_hash_ops, DHCP6DelegatedPrefix, dhcp6_pd_hash_func, dhcp6_pd_compare_func);
+
+static Link *dhcp6_pd_get_link_by_prefix(Link *link, const union in_addr_union *prefix) {
+ DHCP6DelegatedPrefix *pd;
+
+ assert(link);
+ assert(link->manager);
+ assert(prefix);
+
+ pd = hashmap_get(link->manager->dhcp6_prefixes, &prefix->in6);
+ if (!pd)
+ return NULL;
+
+ return pd->link;
+}
+
+static int dhcp6_pd_get_assigned_prefix(Link *link, const union in_addr_union *pd_prefix, union in_addr_union *ret_prefix) {
+ DHCP6DelegatedPrefix *pd, in;
+
+ assert(link);
+ assert(link->manager);
+ assert(pd_prefix);
+ assert(ret_prefix);
+
+ in = (DHCP6DelegatedPrefix) {
+ .pd_prefix = pd_prefix->in6,
+ .link = link,
+ };
+
+ pd = set_get(link->manager->dhcp6_pd_prefixes, &in);
+ if (!pd)
+ return -ENOENT;
+
+ ret_prefix->in6 = pd->prefix;
+ return 0;
+}
+
+static int dhcp6_pd_remove_old(Link *link, bool force);
+
+static int dhcp6_pd_address_callback(Address *address) {
+ Address *a;
+
+ assert(address);
+ assert(address->link);
+
+ /* Make this called only once */
+ SET_FOREACH(a, address->link->dhcp6_pd_addresses)
+ a->callback = NULL;
+
+ return dhcp6_pd_remove_old(address->link, true);
+}
+
+static int dhcp6_pd_remove_old(Link *link, bool force) {
+ Address *address;
+ Route *route;
+ int k, r = 0;
+
+ assert(link);
+ assert(link->manager);
+
+ if (!force && (link->dhcp6_pd_address_messages != 0 || link->dhcp6_pd_route_configured != 0))
+ return 0;
+
+ if (set_isempty(link->dhcp6_pd_addresses_old) && set_isempty(link->dhcp6_pd_routes_old))
+ return 0;
+
+ if (!force) {
+ bool set_callback = !set_isempty(link->dhcp6_pd_addresses);
+
+ SET_FOREACH(address, link->dhcp6_pd_addresses)
+ if (address_is_ready(address)) {
+ set_callback = false;
+ break;
+ }
+
+ if (set_callback) {
+ SET_FOREACH(address, link->dhcp6_pd_addresses)
+ address->callback = dhcp6_pd_address_callback;
+ return 0;
+ }
+ }
+
+ log_link_debug(link, "Removing old DHCPv6 Prefix Delegation addresses and routes.");
+
+ link_dirty(link);
+
+ SET_FOREACH(route, link->dhcp6_pd_routes_old) {
+ k = route_remove(route, NULL, link, NULL);
+ if (k < 0)
+ r = k;
+
+ if (link->radv)
+ (void) sd_radv_remove_prefix(link->radv, &route->dst.in6, 64);
+ dhcp6_pd_free(hashmap_get(link->manager->dhcp6_prefixes, &route->dst.in6));
+ }
+
+ SET_FOREACH(address, link->dhcp6_pd_addresses_old) {
+ k = address_remove(address, link, NULL);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+int dhcp6_pd_remove(Link *link) {
+ Address *address;
+ Route *route;
+ int k, r = 0;
+
+ assert(link);
+ assert(link->manager);
+
+ if (!link_dhcp6_pd_is_enabled(link))
+ return 0;
+
+ link->dhcp6_pd_address_configured = false;
+ link->dhcp6_pd_route_configured = false;
+
+ k = dhcp6_pd_remove_old(link, true);
+ if (k < 0)
+ r = k;
+
+ if (set_isempty(link->dhcp6_pd_addresses) && set_isempty(link->dhcp6_pd_routes))
+ return r;
+
+ log_link_debug(link, "Removing DHCPv6 Prefix Delegation addresses and routes.");
+
+ link_dirty(link);
+
+ SET_FOREACH(route, link->dhcp6_pd_routes) {
+ k = route_remove(route, NULL, link, NULL);
+ if (k < 0)
+ r = k;
+
+ if (link->radv)
+ (void) sd_radv_remove_prefix(link->radv, &route->dst.in6, 64);
+ dhcp6_pd_free(hashmap_get(link->manager->dhcp6_prefixes, &route->dst.in6));
+ }
+
+ SET_FOREACH(address, link->dhcp6_pd_addresses) {
+ k = address_remove(address, link, NULL);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int dhcp6_pd_route_handler(sd_netlink *nl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->dhcp6_pd_route_messages > 0);
+
+ link->dhcp6_pd_route_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_message_warning_errno(link, m, r, "Failed to add DHCPv6 Prefix Delegation route");
+ link_enter_failed(link);
+ return 1;
+ }
+
+ if (link->dhcp6_pd_route_messages == 0) {
+ log_link_debug(link, "DHCPv6 prefix delegation routes set");
+ if (link->dhcp6_pd_prefixes_assigned)
+ link->dhcp6_pd_route_configured = true;
+
+ r = dhcp6_pd_remove_old(link, false);
+ if (r < 0) {
+ link_enter_failed(link);
+ return 1;
+ }
+
+ link_check_ready(link);
+ }
+
+ return 1;
+}
+
+static int dhcp6_set_pd_route(Link *link, const union in_addr_union *prefix, const union in_addr_union *pd_prefix) {
+ _cleanup_(dhcp6_pd_freep) DHCP6DelegatedPrefix *pd = NULL;
+ _cleanup_(route_freep) Route *route = NULL;
+ Link *assigned_link;
+ Route *ret;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(prefix);
+ assert(pd_prefix);
+
+ r = route_new(&route);
+ if (r < 0)
+ return r;
+
+ route->family = AF_INET6;
+ route->dst = *prefix;
+ route->dst_prefixlen = 64;
+
+ r = route_configure(route, link, dhcp6_pd_route_handler, &ret);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to set DHCPv6 prefix route: %m");
+
+ link->dhcp6_pd_route_messages++;
+
+ r = set_ensure_put(&link->dhcp6_pd_routes, &route_hash_ops, ret);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to store DHCPv6 prefix route: %m");
+
+ (void) set_remove(link->dhcp6_pd_routes_old, ret);
+
+ assigned_link = dhcp6_pd_get_link_by_prefix(link, prefix);
+ if (assigned_link) {
+ assert(assigned_link == link);
+ return 0;
+ }
+
+ pd = new(DHCP6DelegatedPrefix, 1);
+ if (!pd)
+ return log_oom();
+
+ *pd = (DHCP6DelegatedPrefix) {
+ .prefix = prefix->in6,
+ .pd_prefix = pd_prefix->in6,
+ .link = link_ref(link),
+ };
+
+ r = hashmap_ensure_allocated(&link->manager->dhcp6_prefixes, &in6_addr_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = hashmap_put(link->manager->dhcp6_prefixes, &pd->prefix, pd);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to store DHCPv6 prefix route at manager: %m");
+
+ r = set_ensure_put(&link->manager->dhcp6_pd_prefixes, &dhcp6_pd_hash_ops, pd);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to store DHCPv6 prefix route at manager: %m");
+
+ TAKE_PTR(pd);
+ return 0;
+}
+
+static int dhcp6_pd_address_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->dhcp6_pd_address_messages > 0);
+
+ link->dhcp6_pd_address_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_message_warning_errno(link, m, r, "Could not set DHCPv6 delegated prefix address");
+ link_enter_failed(link);
+ return 1;
+ } else if (r >= 0)
+ (void) manager_rtnl_process_address(rtnl, m, link->manager);
+
+ if (link->dhcp6_pd_address_messages == 0) {
+ log_link_debug(link, "DHCPv6 delegated prefix addresses set");
+ if (link->dhcp6_pd_prefixes_assigned)
+ link->dhcp6_pd_address_configured = true;
+
+ r = dhcp6_pd_remove_old(link, false);
+ if (r < 0) {
+ link_enter_failed(link);
+ return 1;
+ }
+
+ r = link_set_routes(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return 1;
+ }
+ }
+
+ return 1;
+}
+
+static int dhcp6_set_pd_address(Link *link,
+ const union in_addr_union *prefix,
+ uint8_t prefix_len,
+ uint32_t lifetime_preferred,
+ uint32_t lifetime_valid) {
+
+ _cleanup_(address_freep) Address *address = NULL;
+ Address *ret;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(prefix);
+
+ if (!link->network->dhcp6_pd_assign)
+ return 0;
+
+ r = address_new(&address);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to allocate address for DHCPv6 delegated prefix: %m");
+
+ address->in_addr = *prefix;
+
+ if (!in_addr_is_null(AF_INET6, &link->network->dhcp6_pd_token))
+ memcpy(address->in_addr.in6.s6_addr + 8, link->network->dhcp6_pd_token.in6.s6_addr + 8, 8);
+ else {
+ r = generate_ipv6_eui_64_address(link, &address->in_addr.in6);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to generate EUI64 address for acquired DHCPv6 delegated prefix: %m");
+ }
+
+ address->prefixlen = prefix_len;
+ address->family = AF_INET6;
+ address->cinfo.ifa_prefered = lifetime_preferred;
+ address->cinfo.ifa_valid = lifetime_valid;
+
+ r = address_configure(address, link, dhcp6_pd_address_handler, true, &ret);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to set DHCPv6 delegated prefix address: %m");
+
+ link->dhcp6_pd_address_messages++;
+
+ r = set_ensure_put(&link->dhcp6_pd_addresses, &address_hash_ops, ret);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to store DHCPv6 delegated prefix address: %m");
+
+ (void) set_remove(link->dhcp6_pd_addresses_old, ret);
+
+ return 0;
+}
+
+static int dhcp6_pd_assign_prefix(Link *link, const union in_addr_union *prefix, const union in_addr_union *pd_prefix,
+ uint8_t prefix_len, uint32_t lifetime_preferred, uint32_t lifetime_valid) {
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(prefix);
+
+ if (link->network->dhcp6_pd_announce) {
+ r = radv_add_prefix(link, &prefix->in6, prefix_len, lifetime_preferred, lifetime_valid);
+ if (r < 0)
+ return r;
+ }
+
+ r = dhcp6_set_pd_route(link, prefix, pd_prefix);
+ if (r < 0)
+ return r;
+
+ r = dhcp6_set_pd_address(link, prefix, prefix_len, lifetime_preferred, lifetime_valid);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static bool link_has_preferred_subnet_id(Link *link) {
+ if (!link->network)
+ return false;
+
+ return link->network->dhcp6_pd_subnet_id >= 0;
+}
+
+static int dhcp6_get_preferred_delegated_prefix(
+ Link *link,
+ const union in_addr_union *masked_pd_prefix,
+ uint8_t pd_prefix_len,
+ union in_addr_union *ret) {
+
+ /* We start off with the original PD prefix we have been assigned and iterate from there */
+ union in_addr_union prefix;
+ uint64_t n_prefixes;
+ Link *assigned_link;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(masked_pd_prefix);
+ assert(pd_prefix_len <= 64);
+
+ n_prefixes = UINT64_C(1) << (64 - pd_prefix_len);
+ prefix = *masked_pd_prefix;
+
+ if (link_has_preferred_subnet_id(link)) {
+ uint64_t subnet_id = link->network->dhcp6_pd_subnet_id;
+
+ /* If the link has a preference for a particular subnet id try to allocate that */
+ if (subnet_id >= n_prefixes)
+ return log_link_warning_errno(link, SYNTHETIC_ERRNO(ERANGE),
+ "subnet id %" PRIu64 " is out of range. Only have %" PRIu64 " subnets.",
+ subnet_id, n_prefixes);
+
+ r = in_addr_prefix_nth(AF_INET6, &prefix, 64, subnet_id);
+ if (r < 0)
+ return log_link_warning_errno(link, r,
+ "subnet id %" PRIu64 " is out of range. Only have %" PRIu64 " subnets.",
+ subnet_id, n_prefixes);
+
+ /* Verify that the prefix we did calculate fits in the pd prefix.
+ * This should not fail as we checked the prefix size beforehand */
+ assert_se(in_addr_prefix_covers(AF_INET6, masked_pd_prefix, pd_prefix_len, &prefix) > 0);
+
+ assigned_link = dhcp6_pd_get_link_by_prefix(link, &prefix);
+ if (assigned_link && assigned_link != link) {
+ _cleanup_free_ char *assigned_buf = NULL;
+
+ (void) in_addr_to_string(AF_INET6, &prefix, &assigned_buf);
+ return log_link_warning_errno(link, SYNTHETIC_ERRNO(EAGAIN),
+ "The requested prefix %s is already assigned to another link.",
+ strna(assigned_buf));
+ }
+
+ *ret = prefix;
+ return 0;
+ }
+
+ for (uint64_t n = 0; n < n_prefixes; n++) {
+ /* If we do not have an allocation preference just iterate
+ * through the address space and return the first free prefix. */
+ assigned_link = dhcp6_pd_get_link_by_prefix(link, &prefix);
+ if (!assigned_link || assigned_link == link) {
+ *ret = prefix;
+ return 0;
+ }
+
+ r = in_addr_prefix_next(AF_INET6, &prefix, 64);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Can't allocate another prefix. Out of address space?: %m");
+ }
+
+ return log_link_warning_errno(link, SYNTHETIC_ERRNO(ERANGE), "Couldn't find a suitable prefix. Ran out of address space.");
+}
+
+static void dhcp6_pd_prefix_distribute(Link *dhcp6_link,
+ const union in_addr_union *masked_pd_prefix,
+ uint8_t pd_prefix_len,
+ uint32_t lifetime_preferred,
+ uint32_t lifetime_valid,
+ bool assign_preferred_subnet_id) {
+
+ Link *link;
+ int r;
+
+ assert(dhcp6_link);
+ assert(dhcp6_link->manager);
+ assert(masked_pd_prefix);
+ assert(pd_prefix_len <= 64);
+
+ HASHMAP_FOREACH(link, dhcp6_link->manager->links) {
+ _cleanup_free_ char *assigned_buf = NULL;
+ union in_addr_union assigned_prefix;
+
+ if (link == dhcp6_link)
+ continue;
+
+ if (!link_dhcp6_pd_is_enabled(link))
+ continue;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ continue;
+
+ if (assign_preferred_subnet_id != link_has_preferred_subnet_id(link))
+ continue;
+
+ r = dhcp6_pd_get_assigned_prefix(link, masked_pd_prefix, &assigned_prefix);
+ if (r < 0) {
+ r = dhcp6_get_preferred_delegated_prefix(link, masked_pd_prefix, pd_prefix_len, &assigned_prefix);
+ if (r < 0) {
+ link->dhcp6_pd_prefixes_assigned = false;
+ continue;
+ }
+ }
+
+ (void) in_addr_to_string(AF_INET6, &assigned_prefix, &assigned_buf);
+ r = dhcp6_pd_assign_prefix(link, &assigned_prefix, masked_pd_prefix, 64,
+ lifetime_preferred, lifetime_valid);
+ if (r < 0) {
+ log_link_error_errno(link, r, "Unable to assign/update prefix %s/64: %m",
+ strna(assigned_buf));
+ link_enter_failed(link);
+ } else
+ log_link_debug(link, "Assigned prefix %s/64", strna(assigned_buf));
+ }
+}
+
+static int dhcp6_pd_prepare(Link *link) {
+ Address *address;
+ Route *route;
+ int r;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 0;
+
+ if (!link_dhcp6_pd_is_enabled(link))
+ return 0;
+
+ link_dirty(link);
+
+ link->dhcp6_pd_address_configured = false;
+ link->dhcp6_pd_route_configured = false;
+ link->dhcp6_pd_prefixes_assigned = true;
+
+ while ((address = set_steal_first(link->dhcp6_pd_addresses))) {
+ r = set_ensure_put(&link->dhcp6_pd_addresses_old, &address_hash_ops, address);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to store old DHCPv6 Prefix Delegation address: %m");
+ }
+
+ while ((route = set_steal_first(link->dhcp6_pd_routes))) {
+ r = set_ensure_put(&link->dhcp6_pd_routes_old, &route_hash_ops, route);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to store old DHCPv6 Prefix Delegation route: %m");
+ }
+
+ return 0;
+}
+
+static int dhcp6_pd_finalize(Link *link) {
+ int r;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 0;
+
+ if (!link_dhcp6_pd_is_enabled(link))
+ return 0;
+
+ if (link->dhcp6_pd_address_messages == 0) {
+ if (link->dhcp6_pd_prefixes_assigned)
+ link->dhcp6_pd_address_configured = true;
+ } else {
+ log_link_debug(link, "Setting DHCPv6 PD addresses");
+ /* address_handler calls link_set_routes() and link_set_nexthop(). Before they are
+ * called, the related flags must be cleared. Otherwise, the link becomes configured
+ * state before routes are configured. */
+ link->static_routes_configured = false;
+ link->static_nexthops_configured = false;
+ }
+
+ if (link->dhcp6_pd_route_messages == 0) {
+ if (link->dhcp6_pd_prefixes_assigned)
+ link->dhcp6_pd_route_configured = true;
+ } else
+ log_link_debug(link, "Setting DHCPv6 PD routes");
+
+ r = dhcp6_pd_remove_old(link, false);
+ if (r < 0)
+ return r;
+
+ if (link->dhcp6_pd_address_configured && link->dhcp6_pd_route_configured)
+ link_check_ready(link);
+ else
+ link_set_state(link, LINK_STATE_CONFIGURING);
+
+ return 0;
+}
+
+static void dhcp6_pd_prefix_lost(Link *dhcp6_link) {
+ Link *link;
+ int r;
+
+ assert(dhcp6_link);
+ assert(dhcp6_link->manager);
+
+ HASHMAP_FOREACH(link, dhcp6_link->manager->links) {
+ if (link == dhcp6_link)
+ continue;
+
+ r = dhcp6_pd_remove(link);
+ if (r < 0)
+ link_enter_failed(link);
+ }
+}
+
+static int dhcp6_remove_old(Link *link, bool force);
+
+static int dhcp6_address_callback(Address *address) {
+ Address *a;
+
+ assert(address);
+ assert(address->link);
+
+ /* Make this called only once */
+ SET_FOREACH(a, address->link->dhcp6_addresses)
+ a->callback = NULL;
+
+ return dhcp6_remove_old(address->link, true);
+}
+
+static int dhcp6_remove_old(Link *link, bool force) {
+ Address *address;
+ Route *route;
+ int k, r = 0;
+
+ assert(link);
+
+ if (!force && (!link->dhcp6_address_configured || !link->dhcp6_route_configured))
+ return 0;
+
+ if (set_isempty(link->dhcp6_addresses_old) && set_isempty(link->dhcp6_routes_old))
+ return 0;
+
+ if (!force) {
+ bool set_callback = !set_isempty(link->dhcp6_addresses);
+
+ SET_FOREACH(address, link->dhcp6_addresses)
+ if (address_is_ready(address)) {
+ set_callback = false;
+ break;
+ }
+
+ if (set_callback) {
+ SET_FOREACH(address, link->dhcp6_addresses)
+ address->callback = dhcp6_address_callback;
+ return 0;
+ }
+ }
+
+ log_link_debug(link, "Removing old DHCPv6 addresses and routes.");
+
+ link_dirty(link);
+
+ SET_FOREACH(route, link->dhcp6_routes_old) {
+ k = route_remove(route, NULL, link, NULL);
+ if (k < 0)
+ r = k;
+ }
+
+ SET_FOREACH(address, link->dhcp6_addresses_old) {
+ k = address_remove(address, link, NULL);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int dhcp6_remove(Link *link) {
+ Address *address;
+ Route *route;
+ int k, r = 0;
+
+ assert(link);
+
+ link->dhcp6_address_configured = false;
+ link->dhcp6_route_configured = false;
+
+ k = dhcp6_remove_old(link, true);
+ if (k < 0)
+ r = k;
+
+ if (set_isempty(link->dhcp6_addresses) && set_isempty(link->dhcp6_routes))
+ return r;
+
+ log_link_debug(link, "Removing DHCPv6 addresses and routes.");
+
+ link_dirty(link);
+
+ SET_FOREACH(route, link->dhcp6_routes) {
+ k = route_remove(route, NULL, link, NULL);
+ if (k < 0)
+ r = k;
+ }
+
+ SET_FOREACH(address, link->dhcp6_addresses) {
+ k = address_remove(address, link, NULL);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int dhcp6_route_handler(sd_netlink *nl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->dhcp6_route_messages > 0);
+
+ link->dhcp6_route_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_message_warning_errno(link, m, r, "Failed to add unreachable route for DHCPv6 delegated subnet");
+ link_enter_failed(link);
+ return 1;
+ }
+
+ if (link->dhcp6_route_messages == 0) {
+ log_link_debug(link, "Unreachable routes for DHCPv6 delegated subnets set");
+ link->dhcp6_route_configured = true;
+
+ r = dhcp6_remove_old(link, false);
+ if (r < 0) {
+ link_enter_failed(link);
+ return 1;
+ }
+
+ link_check_ready(link);
+ }
+
+ return 1;
+}
+
+static int dhcp6_set_unreachable_route(Link *link, const union in_addr_union *addr, uint8_t prefixlen) {
+ _cleanup_(route_freep) Route *route = NULL;
+ _cleanup_free_ char *buf = NULL;
+ Route *ret;
+ int r;
+
+ assert(link);
+ assert(addr);
+
+ (void) in_addr_to_string(AF_INET6, addr, &buf);
+
+ if (prefixlen > 64) {
+ log_link_debug(link, "PD Prefix length > 64, ignoring prefix %s/%u",
+ strna(buf), prefixlen);
+ return 0;
+ }
+
+ if (prefixlen == 64) {
+ log_link_debug(link, "Not adding a blocking route for DHCPv6 delegated subnet %s/64 since distributed prefix is 64",
+ strna(buf));
+ return 1;
+ }
+
+ if (prefixlen < 48)
+ log_link_warning(link, "PD Prefix length < 48, looks unusual %s/%u",
+ strna(buf), prefixlen);
+
+ r = route_new(&route);
+ if (r < 0)
+ return log_oom();
+
+ route->family = AF_INET6;
+ route->dst = *addr;
+ route->dst_prefixlen = prefixlen;
+ route->table = link_get_dhcp_route_table(link);
+ route->type = RTN_UNREACHABLE;
+
+ r = route_configure(route, link, dhcp6_route_handler, &ret);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to set unreachable route for DHCPv6 delegated subnet %s/%u: %m",
+ strna(buf), prefixlen);
+
+ link->dhcp6_route_messages++;
+
+ r = set_ensure_put(&link->dhcp6_routes, &route_hash_ops, ret);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to store unreachable route for DHCPv6 delegated subnet %s/%u: %m",
+ strna(buf), prefixlen);
+
+ (void) set_remove(link->dhcp6_routes_old, ret);
+
+ return 1;
+}
+
+static int dhcp6_pd_prefix_acquired(Link *dhcp6_link) {
+ Link *link;
+ int r;
+
+ assert(dhcp6_link);
+ assert(dhcp6_link->dhcp6_lease);
+
+ HASHMAP_FOREACH(link, dhcp6_link->manager->links) {
+ if (link == dhcp6_link)
+ continue;
+
+ r = dhcp6_pd_prepare(link);
+ if (r < 0)
+ link_enter_failed(link);
+ }
+
+ for (sd_dhcp6_lease_reset_pd_prefix_iter(dhcp6_link->dhcp6_lease);;) {
+ uint32_t lifetime_preferred, lifetime_valid;
+ union in_addr_union pd_prefix, prefix;
+ uint8_t pd_prefix_len;
+
+ r = sd_dhcp6_lease_get_pd(dhcp6_link->dhcp6_lease, &pd_prefix.in6, &pd_prefix_len,
+ &lifetime_preferred, &lifetime_valid);
+ if (r < 0)
+ break;
+
+ r = dhcp6_set_unreachable_route(dhcp6_link, &pd_prefix, pd_prefix_len);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* We are doing prefix allocation in two steps:
+ * 1. all those links that have a preferred subnet id will be assigned their subnet
+ * 2. all those links that remain will receive prefixes in sequential order. Prefixes
+ * that were previously already allocated to another link will be skipped.
+ * The assignment has to be split in two phases since subnet id
+ * preferences should be honored. Meaning that any subnet id should be
+ * handed out to the requesting link and not to some link that didn't
+ * specify any preference. */
+
+ assert(pd_prefix_len <= 64);
+
+ prefix = pd_prefix;
+ r = in_addr_mask(AF_INET6, &prefix, pd_prefix_len);
+ if (r < 0)
+ return log_link_error_errno(dhcp6_link, r, "Failed to mask DHCPv6 PD prefix: %m");
+
+ if (DEBUG_LOGGING) {
+ uint64_t n_prefixes = UINT64_C(1) << (64 - pd_prefix_len);
+ _cleanup_free_ char *buf = NULL;
+
+ (void) in_addr_to_string(AF_INET6, &prefix, &buf);
+ log_link_debug(dhcp6_link, "Assigning up to %" PRIu64 " prefixes from %s/%u",
+ n_prefixes, strna(buf), pd_prefix_len);
+ }
+
+ dhcp6_pd_prefix_distribute(dhcp6_link,
+ &prefix,
+ pd_prefix_len,
+ lifetime_preferred,
+ lifetime_valid,
+ true);
+
+ dhcp6_pd_prefix_distribute(dhcp6_link,
+ &prefix,
+ pd_prefix_len,
+ lifetime_preferred,
+ lifetime_valid,
+ false);
+ }
+
+ HASHMAP_FOREACH(link, dhcp6_link->manager->links) {
+ if (link == dhcp6_link)
+ continue;
+
+ r = dhcp6_pd_finalize(link);
+ if (r < 0)
+ link_enter_failed(link);
+ }
+
+ return 0;
+}
+
+static int dhcp6_address_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->dhcp6_address_messages > 0);
+
+ link->dhcp6_address_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_message_warning_errno(link, m, r, "Could not set DHCPv6 address");
+ link_enter_failed(link);
+ return 1;
+ } else if (r >= 0)
+ (void) manager_rtnl_process_address(rtnl, m, link->manager);
+
+ if (link->dhcp6_address_messages == 0) {
+ log_link_debug(link, "DHCPv6 addresses set");
+ link->dhcp6_address_configured = true;
+
+ r = dhcp6_remove_old(link, false);
+ if (r < 0) {
+ link_enter_failed(link);
+ return 1;
+ }
+
+ r = link_set_routes(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return 1;
+ }
+ }
+
+ return 1;
+}
+
+static int dhcp6_update_address(
+ Link *link,
+ const struct in6_addr *ip6_addr,
+ uint32_t lifetime_preferred,
+ uint32_t lifetime_valid) {
+
+ _cleanup_(address_freep) Address *addr = NULL;
+ _cleanup_free_ char *buffer = NULL;
+ Address *ret;
+ int r;
+
+ r = address_new(&addr);
+ if (r < 0)
+ return log_oom();
+
+ addr->family = AF_INET6;
+ addr->in_addr.in6 = *ip6_addr;
+ addr->flags = IFA_F_NOPREFIXROUTE;
+ addr->prefixlen = 128;
+ addr->cinfo.ifa_prefered = lifetime_preferred;
+ addr->cinfo.ifa_valid = lifetime_valid;
+
+ (void) in_addr_to_string(addr->family, &addr->in_addr, &buffer);
+ log_link_full(link, set_contains(link->dhcp6_addresses, addr) ? LOG_DEBUG : LOG_INFO,
+ "DHCPv6 address %s/%u timeout preferred %d valid %d",
+ strna(buffer), addr->prefixlen, lifetime_preferred, lifetime_valid);
+
+ r = address_configure(addr, link, dhcp6_address_handler, true, &ret);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to set DHCPv6 address %s/%u: %m",
+ strna(buffer), addr->prefixlen);
+
+ link->dhcp6_address_messages++;
+
+ r = set_ensure_put(&link->dhcp6_addresses, &address_hash_ops, ret);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to store DHCPv6 address %s/%u: %m",
+ strna(buffer), addr->prefixlen);
+
+ (void) set_remove(link->dhcp6_addresses_old, ret);
+
+ return 0;
+}
+
+static int dhcp6_address_acquired(Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->dhcp6_lease);
+
+ for (sd_dhcp6_lease_reset_address_iter(link->dhcp6_lease);;) {
+ uint32_t lifetime_preferred, lifetime_valid;
+ struct in6_addr ip6_addr;
+
+ r = sd_dhcp6_lease_get_address(link->dhcp6_lease, &ip6_addr, &lifetime_preferred, &lifetime_valid);
+ if (r < 0)
+ break;
+
+ r = dhcp6_update_address(link, &ip6_addr, lifetime_preferred, lifetime_valid);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int dhcp6_lease_ip_acquired(sd_dhcp6_client *client, Link *link) {
+ _cleanup_(sd_dhcp6_lease_unrefp) sd_dhcp6_lease *lease_old = NULL;
+ sd_dhcp6_lease *lease;
+ Address *a;
+ Route *rt;
+ int r;
+
+ link->dhcp6_address_configured = false;
+ link->dhcp6_route_configured = false;
+
+ link_dirty(link);
+
+ while ((a = set_steal_first(link->dhcp6_addresses))) {
+ r = set_ensure_put(&link->dhcp6_addresses_old, &address_hash_ops, a);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to store old DHCPv6 address: %m");
+ }
+
+ while ((rt = set_steal_first(link->dhcp6_routes))) {
+ r = set_ensure_put(&link->dhcp6_routes_old, &route_hash_ops, rt);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to store old DHCPv6 route: %m");
+ }
+
+ r = sd_dhcp6_client_get_lease(client, &lease);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get DHCPv6 lease: %m");
+
+ lease_old = TAKE_PTR(link->dhcp6_lease);
+ link->dhcp6_lease = sd_dhcp6_lease_ref(lease);
+
+ r = dhcp6_address_acquired(link);
+ if (r < 0)
+ return r;
+
+ if (dhcp6_lease_has_pd_prefix(lease)) {
+ r = dhcp6_pd_prefix_acquired(link);
+ if (r < 0)
+ return r;
+ } else if (dhcp6_lease_has_pd_prefix(lease_old))
+ /* When we had PD prefixes but not now, we need to remove them. */
+ dhcp6_pd_prefix_lost(link);
+
+ if (link->dhcp6_address_messages == 0)
+ link->dhcp6_address_configured = true;
+ else {
+ log_link_debug(link, "Setting DHCPv6 addresses");
+ /* address_handler calls link_set_routes() and link_set_nexthop(). Before they are
+ * called, the related flags must be cleared. Otherwise, the link becomes configured
+ * state before routes are configured. */
+ link->static_routes_configured = false;
+ link->static_nexthops_configured = false;
+ }
+
+ if (link->dhcp6_route_messages == 0)
+ link->dhcp6_route_configured = true;
+ else
+ log_link_debug(link, "Setting unreachable routes for DHCPv6 delegated subnets");
+
+ r = dhcp6_remove_old(link, false);
+ if (r < 0)
+ return r;
+
+ if (link->dhcp6_address_configured && link->dhcp6_route_configured)
+ link_check_ready(link);
+ else
+ link_set_state(link, LINK_STATE_CONFIGURING);
+
+ return 0;
+}
+
+static int dhcp6_lease_information_acquired(sd_dhcp6_client *client, Link *link) {
+ return 0;
+}
+
+static int dhcp6_lease_lost(Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->manager);
+
+ log_link_info(link, "DHCPv6 lease lost");
+
+ if (dhcp6_lease_has_pd_prefix(link->dhcp6_lease))
+ dhcp6_pd_prefix_lost(link);
+
+ link->dhcp6_lease = sd_dhcp6_lease_unref(link->dhcp6_lease);
+
+ r = dhcp6_remove(link);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static void dhcp6_handler(sd_dhcp6_client *client, int event, void *userdata) {
+ Link *link = userdata;
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return;
+
+ switch (event) {
+ case SD_DHCP6_CLIENT_EVENT_STOP:
+ case SD_DHCP6_CLIENT_EVENT_RESEND_EXPIRE:
+ case SD_DHCP6_CLIENT_EVENT_RETRANS_MAX:
+ r = dhcp6_lease_lost(link);
+ if (r < 0)
+ link_enter_failed(link);
+ break;
+
+ case SD_DHCP6_CLIENT_EVENT_IP_ACQUIRE:
+ r = dhcp6_lease_ip_acquired(client, link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return;
+ }
+
+ _fallthrough_;
+ case SD_DHCP6_CLIENT_EVENT_INFORMATION_REQUEST:
+ r = dhcp6_lease_information_acquired(client, link);
+ if (r < 0)
+ link_enter_failed(link);
+ break;
+
+ default:
+ if (event < 0)
+ log_link_warning_errno(link, event, "DHCPv6 error: %m");
+ else
+ log_link_warning(link, "DHCPv6 unknown event: %d", event);
+ return;
+ }
+}
+
+int dhcp6_request_address(Link *link, int ir) {
+ int r, inf_req, pd;
+ bool running;
+
+ assert(link);
+ assert(link->dhcp6_client);
+ assert(link->network);
+ assert(in_addr_is_link_local(AF_INET6, (const union in_addr_union*) &link->ipv6ll_address) > 0);
+
+ r = sd_dhcp6_client_is_running(link->dhcp6_client);
+ if (r < 0)
+ return r;
+ running = r;
+
+ r = sd_dhcp6_client_get_prefix_delegation(link->dhcp6_client, &pd);
+ if (r < 0)
+ return r;
+
+ if (pd && ir && link->network->dhcp6_force_pd_other_information) {
+ log_link_debug(link, "Enabling managed mode to request DHCPv6 PD with 'Other Information' set");
+
+ r = sd_dhcp6_client_set_address_request(link->dhcp6_client, false);
+ if (r < 0)
+ return r;
+
+ ir = false;
+ }
+
+ if (running) {
+ r = sd_dhcp6_client_get_information_request(link->dhcp6_client, &inf_req);
+ if (r < 0)
+ return r;
+
+ if (inf_req == ir)
+ return 0;
+
+ r = sd_dhcp6_client_stop(link->dhcp6_client);
+ if (r < 0)
+ return r;
+ } else {
+ r = sd_dhcp6_client_set_local_address(link->dhcp6_client, &link->ipv6ll_address);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_dhcp6_client_set_information_request(link->dhcp6_client, ir);
+ if (r < 0)
+ return r;
+
+ r = sd_dhcp6_client_start(link->dhcp6_client);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int dhcp6_request_prefix_delegation(Link *link) {
+ Link *l;
+
+ assert(link);
+ assert(link->manager);
+
+ if (!link_dhcp6_pd_is_enabled(link))
+ return 0;
+
+ log_link_debug(link, "Requesting DHCPv6 prefixes to be delegated for new link");
+
+ HASHMAP_FOREACH(l, link->manager->links) {
+ int r, enabled;
+
+ if (l == link)
+ continue;
+
+ if (!l->dhcp6_client)
+ continue;
+
+ r = sd_dhcp6_client_get_prefix_delegation(l->dhcp6_client, &enabled);
+ if (r < 0) {
+ log_link_warning_errno(l, r, "Cannot get prefix delegation when adding new link: %m");
+ link_enter_failed(l);
+ continue;
+ }
+
+ if (enabled == 0) {
+ r = sd_dhcp6_client_set_prefix_delegation(l->dhcp6_client, 1);
+ if (r < 0) {
+ log_link_warning_errno(l, r, "Cannot enable prefix delegation when adding new link: %m");
+ link_enter_failed(l);
+ continue;
+ }
+ }
+
+ r = sd_dhcp6_client_is_running(l->dhcp6_client);
+ if (r <= 0)
+ continue;
+
+ if (enabled != 0) {
+ if (dhcp6_lease_has_pd_prefix(l->dhcp6_lease)) {
+ log_link_debug(l, "Requesting re-assignment of delegated prefixes after adding new link");
+ r = dhcp6_pd_prefix_acquired(l);
+ if (r < 0)
+ link_enter_failed(l);
+ }
+ continue;
+ }
+
+ r = sd_dhcp6_client_stop(l->dhcp6_client);
+ if (r < 0) {
+ log_link_warning_errno(l, r, "Cannot stop DHCPv6 prefix delegation client after adding new link: %m");
+ link_enter_failed(l);
+ continue;
+ }
+
+ r = sd_dhcp6_client_start(l->dhcp6_client);
+ if (r < 0) {
+ log_link_warning_errno(l, r, "Cannot restart DHCPv6 prefix delegation client after adding new link: %m");
+ link_enter_failed(l);
+ continue;
+ }
+
+ log_link_debug(l, "Restarted DHCPv6 client to acquire prefix delegations after adding new link");
+ }
+
+ /* dhcp6_pd_prefix_acquired() may make the link in failed state. */
+ if (link->state == LINK_STATE_FAILED)
+ return -ENOANO;
+
+ return 0;
+}
+
+static int dhcp6_set_hostname(sd_dhcp6_client *client, Link *link) {
+ _cleanup_free_ char *hostname = NULL;
+ const char *hn;
+ int r;
+
+ assert(link);
+
+ if (!link->network->dhcp_send_hostname)
+ hn = NULL;
+ else if (link->network->dhcp_hostname)
+ hn = link->network->dhcp_hostname;
+ else {
+ r = gethostname_strict(&hostname);
+ if (r < 0 && r != -ENXIO) /* ENXIO: no hostname set or hostname is "localhost" */
+ return r;
+
+ hn = hostname;
+ }
+
+ r = sd_dhcp6_client_set_fqdn(client, hn);
+ if (r == -EINVAL && hostname)
+ /* Ignore error when the machine's hostname is not suitable to send in DHCP packet. */
+ log_link_warning_errno(link, r, "DHCP6 CLIENT: Failed to set hostname from kernel hostname, ignoring: %m");
+ else if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set hostname: %m");
+
+ return 0;
+}
+
+static bool dhcp6_enable_prefix_delegation(Link *dhcp6_link) {
+ Link *link;
+
+ assert(dhcp6_link);
+ assert(dhcp6_link->manager);
+
+ HASHMAP_FOREACH(link, dhcp6_link->manager->links) {
+ if (link == dhcp6_link)
+ continue;
+
+ if (!link_dhcp6_pd_is_enabled(link))
+ continue;
+
+ return true;
+ }
+
+ return false;
+}
+
+static int dhcp6_set_identifier(Link *link, sd_dhcp6_client *client) {
+ const DUID *duid;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(client);
+
+ r = sd_dhcp6_client_set_mac(client, link->hw_addr.addr.bytes, link->hw_addr.length, link->iftype);
+ if (r < 0)
+ return r;
+
+ if (link->network->iaid_set) {
+ r = sd_dhcp6_client_set_iaid(client, link->network->iaid);
+ if (r < 0)
+ return r;
+ }
+
+ duid = link_get_duid(link);
+ if (duid->type == DUID_TYPE_LLT && duid->raw_data_len == 0)
+ r = sd_dhcp6_client_set_duid_llt(client, duid->llt_time);
+ else
+ r = sd_dhcp6_client_set_duid(client,
+ duid->type,
+ duid->raw_data_len > 0 ? duid->raw_data : NULL,
+ duid->raw_data_len);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int dhcp6_configure(Link *link) {
+ _cleanup_(sd_dhcp6_client_unrefp) sd_dhcp6_client *client = NULL;
+ sd_dhcp6_option *vendor_option;
+ sd_dhcp6_option *send_option;
+ void *request_options;
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ if (!link_dhcp6_enabled(link) && !link_ipv6_accept_ra_enabled(link))
+ return 0;
+
+ if (link->dhcp6_client)
+ return 0;
+
+ r = sd_dhcp6_client_new(&client);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to create DHCP6 client: %m");
+
+ r = sd_dhcp6_client_attach_event(client, link->manager->event, 0);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to attach event: %m");
+
+ r = dhcp6_set_identifier(link, client);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set identifier: %m");
+
+ ORDERED_HASHMAP_FOREACH(send_option, link->network->dhcp6_client_send_options) {
+ r = sd_dhcp6_client_add_option(client, send_option);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set option: %m");
+ }
+
+ r = dhcp6_set_hostname(client, link);
+ if (r < 0)
+ return r;
+
+ r = sd_dhcp6_client_set_ifindex(client, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set ifindex: %m");
+
+ if (link->network->dhcp6_rapid_commit) {
+ r = sd_dhcp6_client_set_request_option(client, SD_DHCP6_OPTION_RAPID_COMMIT);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set request flag for rapid commit: %m");
+ }
+
+ if (link->network->dhcp6_mudurl) {
+ r = sd_dhcp6_client_set_request_mud_url(client, link->network->dhcp6_mudurl);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set MUD URL: %m");
+ }
+
+ SET_FOREACH(request_options, link->network->dhcp6_request_options) {
+ uint32_t option = PTR_TO_UINT32(request_options);
+
+ r = sd_dhcp6_client_set_request_option(client, option);
+ if (r == -EEXIST) {
+ log_link_debug(link, "DHCP6 CLIENT: Failed to set request flag for '%u' already exists, ignoring.", option);
+ continue;
+ }
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set request flag for '%u': %m", option);
+ }
+
+ if (link->network->dhcp6_user_class) {
+ r = sd_dhcp6_client_set_request_user_class(client, link->network->dhcp6_user_class);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set user class: %m");
+ }
+
+ if (link->network->dhcp6_vendor_class) {
+ r = sd_dhcp6_client_set_request_vendor_class(client, link->network->dhcp6_vendor_class);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set vendor class: %m");
+ }
+
+ ORDERED_HASHMAP_FOREACH(vendor_option, link->network->dhcp6_client_send_vendor_options) {
+ r = sd_dhcp6_client_add_vendor_option(client, vendor_option);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set vendor option: %m");
+ }
+
+ r = sd_dhcp6_client_set_callback(client, dhcp6_handler, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set callback: %m");
+
+ if (dhcp6_enable_prefix_delegation(link)) {
+ r = sd_dhcp6_client_set_prefix_delegation(client, true);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set prefix delegation: %m");
+ }
+
+ if (link->network->dhcp6_pd_length > 0) {
+ r = sd_dhcp6_client_set_prefix_delegation_hint(client, link->network->dhcp6_pd_length, &link->network->dhcp6_pd_address);
+ if (r < 0)
+ return log_link_error_errno(link, r, "DHCP6 CLIENT: Failed to set prefix hint: %m");
+ }
+
+ link->dhcp6_client = TAKE_PTR(client);
+
+ return 0;
+}
+
+int dhcp6_update_mac(Link *link) {
+ bool restart;
+ int r;
+
+ assert(link);
+
+ if (!link->dhcp6_client)
+ return 0;
+
+ restart = sd_dhcp6_client_is_running(link->dhcp6_client) > 0;
+
+ if (restart) {
+ r = sd_dhcp6_client_stop(link->dhcp6_client);
+ if (r < 0)
+ return r;
+ }
+
+ r = dhcp6_set_identifier(link, link->dhcp6_client);
+ if (r < 0)
+ return r;
+
+ if (restart) {
+ r = sd_dhcp6_client_start(link->dhcp6_client);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not restart DHCPv6 client: %m");
+ }
+
+ return 0;
+}
+
+int link_serialize_dhcp6_client(Link *link, FILE *f) {
+ _cleanup_free_ char *duid = NULL;
+ uint32_t iaid;
+ int r;
+
+ assert(link);
+
+ if (!link->dhcp6_client)
+ return 0;
+
+ r = sd_dhcp6_client_get_iaid(link->dhcp6_client, &iaid);
+ if (r >= 0)
+ fprintf(f, "DHCP6_CLIENT_IAID=0x%x\n", iaid);
+
+ r = sd_dhcp6_client_duid_as_string(link->dhcp6_client, &duid);
+ if (r >= 0)
+ fprintf(f, "DHCP6_CLIENT_DUID=%s\n", duid);
+
+ return 0;
+}
+
+int config_parse_dhcp6_pd_hint(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = in_addr_prefix_from_string(rvalue, AF_INET6, (union in_addr_union *) &network->dhcp6_pd_address, &network->dhcp6_pd_length);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse PrefixDelegationHint=%s, ignoring assignment", rvalue);
+ return 0;
+ }
+
+ if (network->dhcp6_pd_length < 1 || network->dhcp6_pd_length > 128) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid prefix length='%d', ignoring assignment", network->dhcp6_pd_length);
+ network->dhcp6_pd_length = 0;
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_dhcp6_mud_url(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *unescaped = NULL;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ network->dhcp6_mudurl = mfree(network->dhcp6_mudurl);
+ return 0;
+ }
+
+ r = cunescape(rvalue, 0, &unescaped);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to Failed to unescape MUD URL, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (!http_url_is_valid(unescaped) || strlen(unescaped) > UINT8_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to parse MUD URL '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ return free_and_replace(network->dhcp6_mudurl, unescaped);
+}
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_dhcp6_client_start_mode, dhcp6_client_start_mode, DHCP6ClientStartMode,
+ "Failed to parse WithoutRA= setting");
+
+static const char* const dhcp6_client_start_mode_table[_DHCP6_CLIENT_START_MODE_MAX] = {
+ [DHCP6_CLIENT_START_MODE_NO] = "no",
+ [DHCP6_CLIENT_START_MODE_INFORMATION_REQUEST] = "information-request",
+ [DHCP6_CLIENT_START_MODE_SOLICIT] = "solicit",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(dhcp6_client_start_mode, DHCP6ClientStartMode);
+
+int config_parse_dhcp6_pd_subnet_id(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int64_t *p = data;
+ uint64_t t;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue) || streq(rvalue, "auto")) {
+ *p = -1;
+ return 0;
+ }
+
+ r = safe_atoux64(rvalue, &t);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse %s=, ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+ if (t > INT64_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid subnet id '%s', ignoring assignment.",
+ rvalue);
+ return 0;
+ }
+
+ *p = (int64_t) t;
+
+ return 0;
+}
+
+int config_parse_dhcp6_pd_token(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ union in_addr_union *addr = data, tmp;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *addr = IN_ADDR_NULL;
+ return 0;
+ }
+
+ r = in_addr_from_string(AF_INET6, rvalue, &tmp);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse DHCPv6 Prefix Delegation token, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (in_addr_is_null(AF_INET6, &tmp)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "DHCPv6 Prefix Delegation token cannot be the ANY address, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *addr = tmp;
+
+ return 0;
+}
diff --git a/src/network/networkd-dhcp6.h b/src/network/networkd-dhcp6.h
new file mode 100644
index 0000000..65b35fd
--- /dev/null
+++ b/src/network/networkd-dhcp6.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-dhcp6-client.h"
+
+#include "conf-parser.h"
+#include "macro.h"
+
+typedef enum DHCP6ClientStartMode {
+ DHCP6_CLIENT_START_MODE_NO,
+ DHCP6_CLIENT_START_MODE_INFORMATION_REQUEST,
+ DHCP6_CLIENT_START_MODE_SOLICIT,
+ _DHCP6_CLIENT_START_MODE_MAX,
+ _DHCP6_CLIENT_START_MODE_INVALID = -1,
+} DHCP6ClientStartMode;
+
+typedef struct Link Link;
+typedef struct Manager Manager;
+
+typedef struct DHCP6DelegatedPrefix {
+ struct in6_addr prefix; /* Prefix assigned to the link */
+ struct in6_addr pd_prefix; /* PD prefix provided by DHCP6 lease */
+ Link *link;
+} DHCP6DelegatedPrefix;
+
+DHCP6DelegatedPrefix *dhcp6_pd_free(DHCP6DelegatedPrefix *p);
+DEFINE_TRIVIAL_CLEANUP_FUNC(DHCP6DelegatedPrefix*, dhcp6_pd_free);
+
+bool link_dhcp6_pd_is_enabled(Link *link);
+int dhcp6_pd_remove(Link *link);
+int dhcp6_configure(Link *link);
+int dhcp6_update_mac(Link *link);
+int dhcp6_request_address(Link *link, int ir);
+int dhcp6_request_prefix_delegation(Link *link);
+
+int link_serialize_dhcp6_client(Link *link, FILE *f);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp6_pd_hint);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp6_mud_url);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp6_client_start_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp6_pd_subnet_id);
+CONFIG_PARSER_PROTOTYPE(config_parse_dhcp6_pd_token);
+
+const char* dhcp6_client_start_mode_to_string(DHCP6ClientStartMode i) _const_;
+DHCP6ClientStartMode dhcp6_client_start_mode_from_string(const char *s) _pure_;
diff --git a/src/network/networkd-fdb.c b/src/network/networkd-fdb.c
new file mode 100644
index 0000000..283dece
--- /dev/null
+++ b/src/network/networkd-fdb.c
@@ -0,0 +1,409 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <net/ethernet.h>
+#include <net/if.h>
+
+#include "alloc-util.h"
+#include "bridge.h"
+#include "netlink-util.h"
+#include "networkd-fdb.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "networkd-network.h"
+#include "parse-util.h"
+#include "string-table.h"
+#include "vlan-util.h"
+#include "vxlan.h"
+
+#define STATIC_FDB_ENTRIES_PER_NETWORK_MAX 1024U
+
+/* remove and FDB entry. */
+FdbEntry *fdb_entry_free(FdbEntry *fdb_entry) {
+ if (!fdb_entry)
+ return NULL;
+
+ if (fdb_entry->network) {
+ assert(fdb_entry->section);
+ hashmap_remove(fdb_entry->network->fdb_entries_by_section, fdb_entry->section);
+ }
+
+ network_config_section_free(fdb_entry->section);
+ return mfree(fdb_entry);
+}
+
+DEFINE_NETWORK_SECTION_FUNCTIONS(FdbEntry, fdb_entry_free);
+
+/* create a new FDB entry or get an existing one. */
+static int fdb_entry_new_static(
+ Network *network,
+ const char *filename,
+ unsigned section_line,
+ FdbEntry **ret) {
+
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(fdb_entry_freep) FdbEntry *fdb_entry = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(filename);
+ assert(section_line > 0);
+
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ /* search entry in hashmap first. */
+ fdb_entry = hashmap_get(network->fdb_entries_by_section, n);
+ if (fdb_entry) {
+ *ret = TAKE_PTR(fdb_entry);
+ return 0;
+ }
+
+ if (hashmap_size(network->fdb_entries_by_section) >= STATIC_FDB_ENTRIES_PER_NETWORK_MAX)
+ return -E2BIG;
+
+ /* allocate space for and FDB entry. */
+ fdb_entry = new(FdbEntry, 1);
+ if (!fdb_entry)
+ return -ENOMEM;
+
+ /* init FDB structure. */
+ *fdb_entry = (FdbEntry) {
+ .network = network,
+ .section = TAKE_PTR(n),
+ .vni = VXLAN_VID_MAX + 1,
+ .fdb_ntf_flags = NEIGHBOR_CACHE_ENTRY_FLAGS_SELF,
+ };
+
+ r = hashmap_ensure_allocated(&network->fdb_entries_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(network->fdb_entries_by_section, fdb_entry->section, fdb_entry);
+ if (r < 0)
+ return r;
+
+ /* return allocated FDB structure. */
+ *ret = TAKE_PTR(fdb_entry);
+
+ return 0;
+}
+
+static int set_fdb_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_message_warning_errno(link, m, r, "Could not add FDB entry");
+ link_enter_failed(link);
+ return 1;
+ }
+
+ return 1;
+}
+
+/* send a request to the kernel to add a FDB entry in its static MAC table. */
+static int fdb_entry_configure(Link *link, FdbEntry *fdb_entry) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->manager);
+ assert(fdb_entry);
+
+ /* create new RTM message */
+ r = sd_rtnl_message_new_neigh(link->manager->rtnl, &req, RTM_NEWNEIGH, link->ifindex, AF_BRIDGE);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not create RTM_NEWNEIGH message: %m");
+
+ r = sd_rtnl_message_neigh_set_flags(req, fdb_entry->fdb_ntf_flags);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set neighbor flags: %m");
+
+ /* only NUD_PERMANENT state supported. */
+ r = sd_rtnl_message_neigh_set_state(req, NUD_NOARP | NUD_PERMANENT);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set neighbor state: %m");
+
+ r = sd_netlink_message_append_data(req, NDA_LLADDR, &fdb_entry->mac_addr, sizeof(fdb_entry->mac_addr));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append NDA_LLADDR attribute: %m");
+
+ /* VLAN Id is optional. We'll add VLAN Id only if it's specified. */
+ if (fdb_entry->vlan_id > 0) {
+ r = sd_netlink_message_append_u16(req, NDA_VLAN, fdb_entry->vlan_id);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append NDA_VLAN attribute: %m");
+ }
+
+ if (!in_addr_is_null(fdb_entry->family, &fdb_entry->destination_addr)) {
+ r = netlink_message_append_in_addr_union(req, NDA_DST, fdb_entry->family, &fdb_entry->destination_addr);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append NDA_DST attribute: %m");
+ }
+
+ if (fdb_entry->vni <= VXLAN_VID_MAX) {
+ r = sd_netlink_message_append_u32(req, NDA_VNI, fdb_entry->vni);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append NDA_VNI attribute: %m");
+ }
+
+ /* send message to the kernel to update its internal static MAC table. */
+ r = netlink_call_async(link->manager->rtnl, NULL, req, set_fdb_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 1;
+}
+
+int link_set_bridge_fdb(Link *link) {
+ FdbEntry *fdb_entry;
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ HASHMAP_FOREACH(fdb_entry, link->network->fdb_entries_by_section) {
+ r = fdb_entry_configure(link, fdb_entry);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to add MAC entry to static MAC table: %m");
+ }
+
+ return 0;
+}
+
+void network_drop_invalid_fdb_entries(Network *network) {
+ FdbEntry *fdb_entry;
+
+ assert(network);
+
+ HASHMAP_FOREACH(fdb_entry, network->fdb_entries_by_section)
+ if (section_is_invalid(fdb_entry->section))
+ fdb_entry_free(fdb_entry);
+}
+
+/* parse the HW address from config files. */
+int config_parse_fdb_hwaddr(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(fdb_entry_free_or_set_invalidp) FdbEntry *fdb_entry = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = fdb_entry_new_static(network, filename, section_line, &fdb_entry);
+ if (r < 0)
+ return log_oom();
+
+ r = ether_addr_from_string(rvalue, &fdb_entry->mac_addr);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Not a valid MAC address, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ fdb_entry = NULL;
+
+ return 0;
+}
+
+/* parse the VLAN Id from config files. */
+int config_parse_fdb_vlan_id(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(fdb_entry_free_or_set_invalidp) FdbEntry *fdb_entry = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = fdb_entry_new_static(network, filename, section_line, &fdb_entry);
+ if (r < 0)
+ return log_oom();
+
+ r = config_parse_vlanid(unit, filename, line, section,
+ section_line, lvalue, ltype,
+ rvalue, &fdb_entry->vlan_id, userdata);
+ if (r < 0)
+ return r;
+
+ fdb_entry = NULL;
+
+ return 0;
+}
+
+int config_parse_fdb_destination(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(fdb_entry_free_or_set_invalidp) FdbEntry *fdb_entry = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = fdb_entry_new_static(network, filename, section_line, &fdb_entry);
+ if (r < 0)
+ return log_oom();
+
+ r = in_addr_from_string_auto(rvalue, &fdb_entry->family, &fdb_entry->destination_addr);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "FDB destination IP address is invalid, ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+
+ fdb_entry = NULL;
+
+ return 0;
+}
+
+int config_parse_fdb_vxlan_vni(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(fdb_entry_free_or_set_invalidp) FdbEntry *fdb_entry = NULL;
+ Network *network = userdata;
+ uint32_t vni;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = fdb_entry_new_static(network, filename, section_line, &fdb_entry);
+ if (r < 0)
+ return log_oom();
+
+ r = safe_atou32(rvalue, &vni);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse VXLAN Network Identifier (VNI), ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+
+ if (vni > VXLAN_VID_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "FDB invalid VXLAN Network Identifier (VNI), ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+
+ fdb_entry->vni = vni;
+ fdb_entry = NULL;
+
+ return 0;
+}
+
+static const char* const fdb_ntf_flags_table[_NEIGHBOR_CACHE_ENTRY_FLAGS_MAX] = {
+ [NEIGHBOR_CACHE_ENTRY_FLAGS_USE] = "use",
+ [NEIGHBOR_CACHE_ENTRY_FLAGS_SELF] = "self",
+ [NEIGHBOR_CACHE_ENTRY_FLAGS_MASTER] = "master",
+ [NEIGHBOR_CACHE_ENTRY_FLAGS_ROUTER] = "router",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(fdb_ntf_flags, NeighborCacheEntryFlags);
+
+int config_parse_fdb_ntf_flags(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(fdb_entry_free_or_set_invalidp) FdbEntry *fdb_entry = NULL;
+ Network *network = userdata;
+ NeighborCacheEntryFlags f;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = fdb_entry_new_static(network, filename, section_line, &fdb_entry);
+ if (r < 0)
+ return log_oom();
+
+ f = fdb_ntf_flags_from_string(rvalue);
+ if (f < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, SYNTHETIC_ERRNO(EINVAL),
+ "FDB failed to parse AssociatedWith=, ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+
+ fdb_entry->fdb_ntf_flags = f;
+ fdb_entry = NULL;
+
+ return 0;
+}
diff --git a/src/network/networkd-fdb.h b/src/network/networkd-fdb.h
new file mode 100644
index 0000000..48f4e40
--- /dev/null
+++ b/src/network/networkd-fdb.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <inttypes.h>
+#include <linux/neighbour.h>
+
+#include "conf-parser.h"
+#include "ether-addr-util.h"
+#include "in-addr-util.h"
+#include "networkd-util.h"
+
+typedef struct Network Network;
+typedef struct Link Link;
+
+typedef enum NeighborCacheEntryFlags {
+ NEIGHBOR_CACHE_ENTRY_FLAGS_USE = NTF_USE,
+ NEIGHBOR_CACHE_ENTRY_FLAGS_SELF = NTF_SELF,
+ NEIGHBOR_CACHE_ENTRY_FLAGS_MASTER = NTF_MASTER,
+ NEIGHBOR_CACHE_ENTRY_FLAGS_ROUTER = NTF_ROUTER,
+ _NEIGHBOR_CACHE_ENTRY_FLAGS_MAX,
+ _NEIGHBOR_CACHE_ENTRY_FLAGS_INVALID = -1,
+} NeighborCacheEntryFlags;
+
+typedef struct FdbEntry {
+ Network *network;
+ NetworkConfigSection *section;
+
+ uint32_t vni;
+
+ int family;
+ uint16_t vlan_id;
+
+ struct ether_addr mac_addr;
+ union in_addr_union destination_addr;
+ NeighborCacheEntryFlags fdb_ntf_flags;
+} FdbEntry;
+
+FdbEntry *fdb_entry_free(FdbEntry *fdb_entry);
+
+void network_drop_invalid_fdb_entries(Network *network);
+
+int link_set_bridge_fdb(Link *link);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_fdb_hwaddr);
+CONFIG_PARSER_PROTOTYPE(config_parse_fdb_vlan_id);
+CONFIG_PARSER_PROTOTYPE(config_parse_fdb_destination);
+CONFIG_PARSER_PROTOTYPE(config_parse_fdb_vxlan_vni);
+CONFIG_PARSER_PROTOTYPE(config_parse_fdb_ntf_flags);
diff --git a/src/network/networkd-gperf.gperf b/src/network/networkd-gperf.gperf
new file mode 100644
index 0000000..aaabb3d
--- /dev/null
+++ b/src/network/networkd-gperf.gperf
@@ -0,0 +1,25 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include "conf-parser.h"
+#include "networkd-conf.h"
+#include "networkd-manager.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name networkd_gperf_hash
+%define lookup-function-name networkd_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Network.SpeedMeter, config_parse_bool, 0, offsetof(Manager, use_speed_meter)
+Network.SpeedMeterIntervalSec, config_parse_sec, 0, offsetof(Manager, speed_meter_interval_usec)
+Network.ManageForeignRoutes, config_parse_bool, 0, offsetof(Manager, manage_foreign_routes)
+DHCP.DUIDType, config_parse_duid_type, 0, offsetof(Manager, duid)
+DHCP.DUIDRawData, config_parse_duid_rawdata, 0, offsetof(Manager, duid)
diff --git a/src/network/networkd-ipv4ll.c b/src/network/networkd-ipv4ll.c
new file mode 100644
index 0000000..295abe8
--- /dev/null
+++ b/src/network/networkd-ipv4ll.c
@@ -0,0 +1,313 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <linux/if.h>
+
+#include "network-internal.h"
+#include "networkd-address.h"
+#include "networkd-ipv4ll.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+
+static int ipv4ll_address_lost(Link *link) {
+ _cleanup_(address_freep) Address *address = NULL;
+ struct in_addr addr;
+ int r;
+
+ assert(link);
+
+ link->ipv4ll_address_configured = false;
+
+ r = sd_ipv4ll_get_address(link->ipv4ll, &addr);
+ if (r < 0)
+ return 0;
+
+ log_link_debug(link, "IPv4 link-local release "IPV4_ADDRESS_FMT_STR, IPV4_ADDRESS_FMT_VAL(addr));
+
+ r = address_new(&address);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate address: %m");
+
+ address->family = AF_INET;
+ address->in_addr.in = addr;
+ address->prefixlen = 16;
+ address->scope = RT_SCOPE_LINK;
+
+ r = address_remove(address, link, NULL);
+ if (r < 0)
+ return r;
+
+ link_check_ready(link);
+
+ return 0;
+}
+
+static int ipv4ll_address_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(!link->ipv4ll_address_configured);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_message_warning_errno(link, m, r, "could not set ipv4ll address");
+ link_enter_failed(link);
+ return 1;
+ } else if (r >= 0)
+ (void) manager_rtnl_process_address(rtnl, m, link->manager);
+
+ link->ipv4ll_address_configured = true;
+ link_check_ready(link);
+
+ return 1;
+}
+
+static int ipv4ll_address_claimed(sd_ipv4ll *ll, Link *link) {
+ _cleanup_(address_freep) Address *ll_addr = NULL;
+ struct in_addr address;
+ int r;
+
+ assert(ll);
+ assert(link);
+
+ link->ipv4ll_address_configured = false;
+
+ r = sd_ipv4ll_get_address(ll, &address);
+ if (r == -ENOENT)
+ return 0;
+ else if (r < 0)
+ return r;
+
+ log_link_debug(link, "IPv4 link-local claim "IPV4_ADDRESS_FMT_STR,
+ IPV4_ADDRESS_FMT_VAL(address));
+
+ r = address_new(&ll_addr);
+ if (r < 0)
+ return r;
+
+ ll_addr->family = AF_INET;
+ ll_addr->in_addr.in = address;
+ ll_addr->prefixlen = 16;
+ ll_addr->broadcast.s_addr = ll_addr->in_addr.in.s_addr | htobe32(0xfffffffflu >> ll_addr->prefixlen);
+ ll_addr->scope = RT_SCOPE_LINK;
+
+ r = address_configure(ll_addr, link, ipv4ll_address_handler, false, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static void ipv4ll_handler(sd_ipv4ll *ll, int event, void *userdata) {
+ Link *link = userdata;
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return;
+
+ switch(event) {
+ case SD_IPV4LL_EVENT_STOP:
+ r = ipv4ll_address_lost(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return;
+ }
+ break;
+ case SD_IPV4LL_EVENT_CONFLICT:
+ r = ipv4ll_address_lost(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return;
+ }
+
+ r = sd_ipv4ll_restart(ll);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Could not acquire IPv4 link-local address: %m");
+ break;
+ case SD_IPV4LL_EVENT_BIND:
+ r = ipv4ll_address_claimed(ll, link);
+ if (r < 0) {
+ log_link_error(link, "Failed to configure ipv4ll address: %m");
+ link_enter_failed(link);
+ return;
+ }
+ break;
+ default:
+ log_link_warning(link, "IPv4 link-local unknown event: %d", event);
+ break;
+ }
+}
+
+static int ipv4ll_init(Link *link) {
+ int r;
+
+ assert(link);
+
+ if (link->ipv4ll)
+ return 0;
+
+ r = sd_ipv4ll_new(&link->ipv4ll);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4ll_attach_event(link->ipv4ll, link->manager->event, 0);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int ipv4ll_configure(Link *link) {
+ uint64_t seed;
+ int r;
+
+ assert(link);
+
+ if (!link_ipv4ll_enabled(link, ADDRESS_FAMILY_IPV4 | ADDRESS_FAMILY_FALLBACK_IPV4))
+ return 0;
+
+ r = ipv4ll_init(link);
+ if (r < 0)
+ return r;
+
+ if (link->sd_device &&
+ net_get_unique_predictable_data(link->sd_device, true, &seed) >= 0) {
+ r = sd_ipv4ll_set_address_seed(link->ipv4ll, seed);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_ipv4ll_set_mac(link->ipv4ll, &link->hw_addr.addr.ether);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4ll_set_ifindex(link->ipv4ll, link->ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4ll_set_callback(link->ipv4ll, ipv4ll_handler, link);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int ipv4ll_update_mac(Link *link) {
+ bool restart;
+ int r;
+
+ assert(link);
+
+ if (!link->ipv4ll)
+ return 0;
+
+ restart = sd_ipv4ll_is_running(link->ipv4ll) > 0;
+
+ r = sd_ipv4ll_stop(link->ipv4ll);
+ if (r < 0)
+ return r;
+
+ r = sd_ipv4ll_set_mac(link->ipv4ll, &link->hw_addr.addr.ether);
+ if (r < 0)
+ return r;
+
+ if (restart) {
+ r = sd_ipv4ll_start(link->ipv4ll);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int link_serialize_ipv4ll(Link *link, FILE *f) {
+ struct in_addr address;
+ int r;
+
+ assert(link);
+
+ if (!link->ipv4ll)
+ return 0;
+
+ r = sd_ipv4ll_get_address(link->ipv4ll, &address);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ fputs("IPV4LL_ADDRESS=", f);
+ serialize_in_addrs(f, &address, 1, false, NULL);
+ fputc('\n', f);
+
+ return 0;
+}
+
+int link_deserialize_ipv4ll(Link *link, const char *ipv4ll_address) {
+ union in_addr_union address;
+ int r;
+
+ assert(link);
+
+ if (isempty(ipv4ll_address))
+ return 0;
+
+ r = in_addr_from_string(AF_INET, ipv4ll_address, &address);
+ if (r < 0)
+ return log_link_debug_errno(link, r, "Failed to parse IPv4LL address: %s", ipv4ll_address);
+
+ r = ipv4ll_init(link);
+ if (r < 0)
+ return log_link_debug_errno(link, r, "Failed to initialize IPv4LL client: %m");
+
+ r = sd_ipv4ll_set_address(link->ipv4ll, &address.in);
+ if (r < 0)
+ return log_link_debug_errno(link, r, "Failed to set initial IPv4LL address %s: %m", ipv4ll_address);
+
+ return 0;
+}
+
+int config_parse_ipv4ll(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ AddressFamily *link_local = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* Note that this is mostly like
+ * config_parse_address_family(), except that it
+ * applies only to IPv4 */
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse %s=%s, ignoring assignment. "
+ "Note that the setting %s= is deprecated, please use LinkLocalAddressing= instead.",
+ lvalue, rvalue, lvalue);
+ return 0;
+ }
+
+ SET_FLAG(*link_local, ADDRESS_FAMILY_IPV4, r);
+
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "%s=%s is deprecated, please use LinkLocalAddressing=%s instead.",
+ lvalue, rvalue, address_family_to_string(*link_local));
+
+ return 0;
+}
diff --git a/src/network/networkd-ipv4ll.h b/src/network/networkd-ipv4ll.h
new file mode 100644
index 0000000..fae48cd
--- /dev/null
+++ b/src/network/networkd-ipv4ll.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "conf-parser.h"
+
+#define IPV4LL_ROUTE_METRIC 2048
+
+typedef struct Link Link;
+
+int ipv4ll_configure(Link *link);
+int ipv4ll_update_mac(Link *link);
+int link_serialize_ipv4ll(Link *link, FILE *f);
+int link_deserialize_ipv4ll(Link *link, const char *ipv4ll_address);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_ipv4ll);
diff --git a/src/network/networkd-ipv6-proxy-ndp.c b/src/network/networkd-ipv6-proxy-ndp.c
new file mode 100644
index 0000000..7a370e9
--- /dev/null
+++ b/src/network/networkd-ipv6-proxy-ndp.c
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <linux/if.h>
+
+#include "netlink-util.h"
+#include "networkd-ipv6-proxy-ndp.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "networkd-network.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "sysctl-util.h"
+
+static int set_ipv6_proxy_ndp_address_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST)
+ log_link_message_warning_errno(link, m, r, "Could not add IPv6 proxy ndp address entry, ignoring");
+
+ return 1;
+}
+
+/* send a request to the kernel to add a IPv6 Proxy entry to the neighbour table */
+static int ipv6_proxy_ndp_address_configure(Link *link, const struct in6_addr *address) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(address);
+
+ /* create new netlink message */
+ r = sd_rtnl_message_new_neigh(link->manager->rtnl, &req, RTM_NEWNEIGH, link->ifindex, AF_INET6);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not create RTM_NEWNEIGH message: %m");
+
+ r = sd_rtnl_message_neigh_set_flags(req, NTF_PROXY);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set neighbor flags: %m");
+
+ r = sd_netlink_message_append_in6_addr(req, NDA_DST, address);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append NDA_DST attribute: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, set_ipv6_proxy_ndp_address_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static bool ipv6_proxy_ndp_is_needed(Link *link) {
+ assert(link);
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ if (link->network->ipv6_proxy_ndp >= 0)
+ return link->network->ipv6_proxy_ndp;
+
+ return !set_isempty(link->network->ipv6_proxy_ndp_addresses);
+}
+
+static int ipv6_proxy_ndp_set(Link *link) {
+ bool v;
+ int r;
+
+ assert(link);
+
+ if (!socket_ipv6_is_supported())
+ return 0;
+
+ v = ipv6_proxy_ndp_is_needed(link);
+
+ r = sysctl_write_ip_property_boolean(AF_INET6, link->ifname, "proxy_ndp", v);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Cannot configure proxy NDP for the interface, ignoring: %m");
+
+ return v;
+}
+
+/* configure all ipv6 proxy ndp addresses */
+int link_set_ipv6_proxy_ndp_addresses(Link *link) {
+ struct in6_addr *address;
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ /* enable or disable proxy_ndp itself depending on whether ipv6_proxy_ndp_addresses are set or not */
+ r = ipv6_proxy_ndp_set(link);
+ if (r <= 0)
+ return 0;
+
+ SET_FOREACH(address, link->network->ipv6_proxy_ndp_addresses) {
+ r = ipv6_proxy_ndp_address_configure(link, address);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int config_parse_ipv6_proxy_ndp_address(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ struct in6_addr *address = NULL;
+ Network *network = userdata;
+ union in_addr_union buffer;
+ int r;
+
+ assert(filename);
+ assert(rvalue);
+ assert(network);
+
+ if (isempty(rvalue)) {
+ network->ipv6_proxy_ndp_addresses = set_free_free(network->ipv6_proxy_ndp_addresses);
+ return 0;
+ }
+
+ r = in_addr_from_string(AF_INET6, rvalue, &buffer);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse IPv6 proxy NDP address, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (in_addr_is_null(AF_INET6, &buffer)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "IPv6 proxy NDP address cannot be the ANY address, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ address = newdup(struct in6_addr, &buffer.in6, 1);
+ if (!address)
+ return log_oom();
+
+ r = set_ensure_put(&network->ipv6_proxy_ndp_addresses, &in6_addr_hash_ops, address);
+ if (r < 0)
+ return log_oom();
+ if (r > 0)
+ TAKE_PTR(address);
+
+ return 0;
+}
diff --git a/src/network/networkd-ipv6-proxy-ndp.h b/src/network/networkd-ipv6-proxy-ndp.h
new file mode 100644
index 0000000..27313ef
--- /dev/null
+++ b/src/network/networkd-ipv6-proxy-ndp.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "conf-parser.h"
+
+typedef struct Link Link;
+
+int link_set_ipv6_proxy_ndp_addresses(Link *link);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_ipv6_proxy_ndp_address);
diff --git a/src/network/networkd-link-bus.c b/src/network/networkd-link-bus.c
new file mode 100644
index 0000000..4df31df
--- /dev/null
+++ b/src/network/networkd-link-bus.c
@@ -0,0 +1,816 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+#include <netinet/in.h>
+#include <sys/capability.h>
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-get-properties.h"
+#include "bus-message-util.h"
+#include "bus-polkit.h"
+#include "dns-domain.h"
+#include "networkd-link-bus.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "resolve-util.h"
+#include "socket-netlink.h"
+#include "strv.h"
+#include "user-util.h"
+
+BUS_DEFINE_PROPERTY_GET_ENUM(property_get_operational_state, link_operstate, LinkOperationalState);
+BUS_DEFINE_PROPERTY_GET_ENUM(property_get_carrier_state, link_carrier_state, LinkCarrierState);
+BUS_DEFINE_PROPERTY_GET_ENUM(property_get_address_state, link_address_state, LinkAddressState);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_administrative_state, link_state, LinkState);
+
+static int property_get_bit_rates(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Link *link = userdata;
+ Manager *manager;
+ double interval_sec;
+ uint64_t tx, rx;
+
+ assert(bus);
+ assert(reply);
+ assert(userdata);
+
+ manager = link->manager;
+
+ if (!manager->use_speed_meter ||
+ manager->speed_meter_usec_old == 0 ||
+ !link->stats_updated)
+ return sd_bus_message_append(reply, "(tt)", UINT64_MAX, UINT64_MAX);
+
+ assert(manager->speed_meter_usec_new > manager->speed_meter_usec_old);
+ interval_sec = (manager->speed_meter_usec_new - manager->speed_meter_usec_old) / USEC_PER_SEC;
+
+ if (link->stats_new.tx_bytes > link->stats_old.tx_bytes)
+ tx = (uint64_t) ((link->stats_new.tx_bytes - link->stats_old.tx_bytes) / interval_sec);
+ else
+ tx = (uint64_t) ((UINT64_MAX - (link->stats_old.tx_bytes - link->stats_new.tx_bytes)) / interval_sec);
+
+ if (link->stats_new.rx_bytes > link->stats_old.rx_bytes)
+ rx = (uint64_t) ((link->stats_new.rx_bytes - link->stats_old.rx_bytes) / interval_sec);
+ else
+ rx = (uint64_t) ((UINT64_MAX - (link->stats_old.rx_bytes - link->stats_new.rx_bytes)) / interval_sec);
+
+ return sd_bus_message_append(reply, "(tt)", tx, rx);
+}
+
+static int verify_managed_link(Link *l, sd_bus_error *error) {
+ assert(l);
+
+ if (l->flags & IFF_LOOPBACK)
+ return sd_bus_error_setf(error, BUS_ERROR_LINK_BUSY, "Link %s is loopback device.", l->ifname);
+
+ return 0;
+}
+
+int bus_link_method_set_ntp_servers(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_strv_free_ char **ntp = NULL;
+ Link *l = userdata;
+ int r;
+ char **i;
+
+ assert(message);
+ assert(l);
+
+ r = verify_managed_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &ntp);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, ntp) {
+ r = dns_name_is_valid_or_address(*i);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid NTP server: %s", *i);
+ }
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.network1.set-ntp-servers",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ strv_free_and_replace(l->ntp, ntp);
+
+ link_dirty(l);
+ r = link_save_and_clean(l);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int bus_link_method_set_dns_servers_internal(sd_bus_message *message, void *userdata, sd_bus_error *error, bool extended) {
+ struct in_addr_full **dns;
+ Link *l = userdata;
+ size_t n;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_managed_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = bus_message_read_dns_servers(message, error, extended, &dns, &n);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.network1.set-dns-servers",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ goto finalize;
+ if (r == 0) {
+ r = 1; /* Polkit will call us back */
+ goto finalize;
+ }
+
+ if (l->n_dns != (unsigned) -1)
+ for (unsigned i = 0; i < l->n_dns; i++)
+ in_addr_full_free(l->dns[i]);
+
+ free_and_replace(l->dns, dns);
+ l->n_dns = n;
+
+ link_dirty(l);
+ r = link_save_and_clean(l);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+
+finalize:
+ for (size_t i = 0; i < n; i++)
+ in_addr_full_free(dns[i]);
+ free(dns);
+
+ return r;
+}
+
+int bus_link_method_set_dns_servers(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_link_method_set_dns_servers_internal(message, userdata, error, false);
+}
+
+int bus_link_method_set_dns_servers_ex(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_link_method_set_dns_servers_internal(message, userdata, error, true);
+}
+
+int bus_link_method_set_domains(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(ordered_set_freep) OrderedSet *search_domains = NULL, *route_domains = NULL;
+ Link *l = userdata;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_managed_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(message, 'a', "(sb)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *str = NULL;
+ OrderedSet **domains;
+ const char *name;
+ int route_only;
+
+ r = sd_bus_message_read(message, "(sb)", &name, &route_only);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = dns_name_is_valid(name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid search domain %s", name);
+ if (!route_only && dns_name_is_root(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Root domain is not suitable as search domain");
+
+ r = dns_name_normalize(name, 0, &str);
+ if (r < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid search domain %s", name);
+
+ domains = route_only ? &route_domains : &search_domains;
+ r = ordered_set_ensure_allocated(domains, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = ordered_set_put(*domains, str);
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(str);
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.network1.set-domains",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ ordered_set_free_free(l->search_domains);
+ ordered_set_free_free(l->route_domains);
+ l->search_domains = TAKE_PTR(search_domains);
+ l->route_domains = TAKE_PTR(route_domains);
+
+ link_dirty(l);
+ r = link_save_and_clean(l);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_set_default_route(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ int r, b;
+
+ assert(message);
+ assert(l);
+
+ r = verify_managed_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.network1.set-default-route",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ if (l->dns_default_route != b) {
+ l->dns_default_route = b;
+
+ link_dirty(l);
+ r = link_save_and_clean(l);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_set_llmnr(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ ResolveSupport mode;
+ const char *llmnr;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_managed_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "s", &llmnr);
+ if (r < 0)
+ return r;
+
+ if (isempty(llmnr))
+ mode = RESOLVE_SUPPORT_YES;
+ else {
+ mode = resolve_support_from_string(llmnr);
+ if (mode < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid LLMNR setting: %s", llmnr);
+ }
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.network1.set-llmnr",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ if (l->llmnr != mode) {
+ l->llmnr = mode;
+
+ link_dirty(l);
+ r = link_save_and_clean(l);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_set_mdns(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ ResolveSupport mode;
+ const char *mdns;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_managed_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "s", &mdns);
+ if (r < 0)
+ return r;
+
+ if (isempty(mdns))
+ mode = RESOLVE_SUPPORT_NO;
+ else {
+ mode = resolve_support_from_string(mdns);
+ if (mode < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid MulticastDNS setting: %s", mdns);
+ }
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.network1.set-mdns",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ if (l->mdns != mode) {
+ l->mdns = mode;
+
+ link_dirty(l);
+ r = link_save_and_clean(l);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_set_dns_over_tls(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ const char *dns_over_tls;
+ DnsOverTlsMode mode;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_managed_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "s", &dns_over_tls);
+ if (r < 0)
+ return r;
+
+ if (isempty(dns_over_tls))
+ mode = _DNS_OVER_TLS_MODE_INVALID;
+ else {
+ mode = dns_over_tls_mode_from_string(dns_over_tls);
+ if (mode < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid DNSOverTLS setting: %s", dns_over_tls);
+ }
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.network1.set-dns-over-tls",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ if (l->dns_over_tls_mode != mode) {
+ l->dns_over_tls_mode = mode;
+
+ link_dirty(l);
+ r = link_save_and_clean(l);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_set_dnssec(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ const char *dnssec;
+ DnssecMode mode;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_managed_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "s", &dnssec);
+ if (r < 0)
+ return r;
+
+ if (isempty(dnssec))
+ mode = _DNSSEC_MODE_INVALID;
+ else {
+ mode = dnssec_mode_from_string(dnssec);
+ if (mode < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid DNSSEC setting: %s", dnssec);
+ }
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.network1.set-dnssec",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ if (l->dnssec_mode != mode) {
+ l->dnssec_mode = mode;
+
+ link_dirty(l);
+ r = link_save_and_clean(l);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_set_dnssec_negative_trust_anchors(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_set_free_free_ Set *ns = NULL;
+ _cleanup_strv_free_ char **ntas = NULL;
+ Link *l = userdata;
+ int r;
+ char **i;
+
+ assert(message);
+ assert(l);
+
+ r = verify_managed_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &ntas);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, ntas) {
+ r = dns_name_is_valid(*i);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid negative trust anchor domain: %s", *i);
+ }
+
+ ns = set_new(&dns_name_hash_ops);
+ if (!ns)
+ return -ENOMEM;
+
+ STRV_FOREACH(i, ntas) {
+ r = set_put_strdup(&ns, *i);
+ if (r < 0)
+ return r;
+ }
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.network1.set-dnssec-negative-trust-anchors",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ set_free_free(l->dnssec_negative_trust_anchors);
+ l->dnssec_negative_trust_anchors = TAKE_PTR(ns);
+
+ link_dirty(l);
+ r = link_save_and_clean(l);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_revert_ntp(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_managed_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.network1.revert-ntp",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ link_ntp_settings_clear(l);
+
+ link_dirty(l);
+ r = link_save_and_clean(l);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_revert_dns(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_managed_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.network1.revert-dns",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ link_dns_settings_clear(l);
+
+ link_dirty(l);
+ r = link_save_and_clean(l);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_force_renew(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ int r;
+
+ assert(l);
+
+ if (!l->network)
+ return sd_bus_error_setf(error, BUS_ERROR_UNMANAGED_INTERFACE,
+ "Interface %s is not managed by systemd-networkd",
+ l->ifname);
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.network1.forcerenew",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ if (l->dhcp_server) {
+ r = sd_dhcp_server_forcerenew(l->dhcp_server);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_renew(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ int r;
+
+ assert(l);
+
+ if (!l->network)
+ return sd_bus_error_setf(error, BUS_ERROR_UNMANAGED_INTERFACE,
+ "Interface %s is not managed by systemd-networkd",
+ l->ifname);
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.network1.renew",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ if (l->dhcp_client) {
+ r = sd_dhcp_client_send_renew(l->dhcp_client);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_reconfigure(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.network1.reconfigure",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ r = link_reconfigure(l, true);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ link_set_state(l, LINK_STATE_INITIALIZED);
+ r = link_save_and_clean(l);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+const sd_bus_vtable link_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("OperationalState", "s", property_get_operational_state, offsetof(Link, operstate), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("CarrierState", "s", property_get_carrier_state, offsetof(Link, carrier_state), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("AddressState", "s", property_get_address_state, offsetof(Link, address_state), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("AdministrativeState", "s", property_get_administrative_state, offsetof(Link, state), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("BitRates", "(tt)", property_get_bit_rates, 0, 0),
+
+ SD_BUS_METHOD("SetNTP", "as", NULL, bus_link_method_set_ntp_servers, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetDNS", "a(iay)", NULL, bus_link_method_set_dns_servers, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetDNSEx", "a(iayqs)", NULL, bus_link_method_set_dns_servers_ex, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetDomains", "a(sb)", NULL, bus_link_method_set_domains, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetDefaultRoute", "b", NULL, bus_link_method_set_default_route, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetLLMNR", "s", NULL, bus_link_method_set_llmnr, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetMulticastDNS", "s", NULL, bus_link_method_set_mdns, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetDNSOverTLS", "s", NULL, bus_link_method_set_dns_over_tls, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetDNSSEC", "s", NULL, bus_link_method_set_dnssec, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetDNSSECNegativeTrustAnchors", "as", NULL, bus_link_method_set_dnssec_negative_trust_anchors, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("RevertNTP", NULL, NULL, bus_link_method_revert_ntp, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("RevertDNS", NULL, NULL, bus_link_method_revert_dns, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Renew", NULL, NULL, bus_link_method_renew, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ForceRenew", NULL, NULL, bus_link_method_force_renew, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Reconfigure", NULL, NULL, bus_link_method_reconfigure, SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_VTABLE_END
+};
+
+char *link_bus_path(Link *link) {
+ _cleanup_free_ char *ifindex = NULL;
+ char *p;
+ int r;
+
+ assert(link);
+ assert(link->ifindex > 0);
+
+ if (asprintf(&ifindex, "%d", link->ifindex) < 0)
+ return NULL;
+
+ r = sd_bus_path_encode("/org/freedesktop/network1/link", ifindex, &p);
+ if (r < 0)
+ return NULL;
+
+ return p;
+}
+
+int link_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ unsigned c = 0;
+ Link *link;
+
+ assert(bus);
+ assert(path);
+ assert(m);
+ assert(nodes);
+
+ l = new0(char*, hashmap_size(m->links) + 1);
+ if (!l)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(link, m->links) {
+ char *p;
+
+ p = link_bus_path(link);
+ if (!p)
+ return -ENOMEM;
+
+ l[c++] = p;
+ }
+
+ l[c] = NULL;
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
+
+int link_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ _cleanup_free_ char *identifier = NULL;
+ Manager *m = userdata;
+ Link *link;
+ int ifindex, r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(m);
+ assert(found);
+
+ r = sd_bus_path_decode(path, "/org/freedesktop/network1/link", &identifier);
+ if (r <= 0)
+ return 0;
+
+ ifindex = parse_ifindex(identifier);
+ if (ifindex < 0)
+ return 0;
+
+ r = link_get(m, ifindex, &link);
+ if (r < 0)
+ return 0;
+
+ if (streq(interface, "org.freedesktop.network1.DHCPServer") && !link->dhcp_server)
+ return 0;
+
+ *found = link;
+
+ return 1;
+}
+
+int link_send_changed_strv(Link *link, char **properties) {
+ _cleanup_free_ char *p = NULL;
+
+ assert(link);
+ assert(link->manager);
+ assert(properties);
+
+ if (!link->manager->bus)
+ return 0;
+
+ p = link_bus_path(link);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_emit_properties_changed_strv(
+ link->manager->bus,
+ p,
+ "org.freedesktop.network1.Link",
+ properties);
+}
+
+int link_send_changed(Link *link, const char *property, ...) {
+ char **properties;
+
+ properties = strv_from_stdarg_alloca(property);
+
+ return link_send_changed_strv(link, properties);
+}
diff --git a/src/network/networkd-link-bus.h b/src/network/networkd-link-bus.h
new file mode 100644
index 0000000..45594df
--- /dev/null
+++ b/src/network/networkd-link-bus.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "macro.h"
+
+typedef struct Link Link;
+
+extern const sd_bus_vtable link_vtable[];
+
+char *link_bus_path(Link *link);
+int link_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error);
+int link_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error);
+int link_send_changed_strv(Link *link, char **properties);
+int link_send_changed(Link *link, const char *property, ...) _sentinel_;
+
+int property_get_operational_state(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+int property_get_carrier_state(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+int property_get_address_state(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+
+int bus_link_method_set_ntp_servers(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_dns_servers(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_dns_servers_ex(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_domains(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_default_route(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_llmnr(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_mdns(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_dns_over_tls(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_dnssec(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_dnssec_negative_trust_anchors(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_revert_ntp(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_revert_dns(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_renew(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_force_renew(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_reconfigure(sd_bus_message *message, void *userdata, sd_bus_error *error);
diff --git a/src/network/networkd-link.c b/src/network/networkd-link.c
new file mode 100644
index 0000000..8120343
--- /dev/null
+++ b/src/network/networkd-link.c
@@ -0,0 +1,3263 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <linux/if.h>
+#include <linux/if_arp.h>
+#include <linux/if_link.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bond.h"
+#include "bridge.h"
+#include "bus-util.h"
+#include "dhcp-identifier.h"
+#include "dhcp-lease-internal.h"
+#include "env-file.h"
+#include "ethtool-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "ipvlan.h"
+#include "missing_network.h"
+#include "netlink-util.h"
+#include "network-internal.h"
+#include "networkd-address-label.h"
+#include "networkd-address.h"
+#include "networkd-can.h"
+#include "networkd-dhcp-server.h"
+#include "networkd-dhcp4.h"
+#include "networkd-dhcp6.h"
+#include "networkd-fdb.h"
+#include "networkd-ipv4ll.h"
+#include "networkd-ipv6-proxy-ndp.h"
+#include "networkd-link-bus.h"
+#include "networkd-link.h"
+#include "networkd-lldp-tx.h"
+#include "networkd-manager.h"
+#include "networkd-mdb.h"
+#include "networkd-ndisc.h"
+#include "networkd-neighbor.h"
+#include "networkd-nexthop.h"
+#include "networkd-sriov.h"
+#include "networkd-sysctl.h"
+#include "networkd-radv.h"
+#include "networkd-routing-policy-rule.h"
+#include "networkd-wifi.h"
+#include "set.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "strv.h"
+#include "sysctl-util.h"
+#include "tc.h"
+#include "tmpfile-util.h"
+#include "udev-util.h"
+#include "util.h"
+#include "vrf.h"
+
+bool link_ipv4ll_enabled(Link *link, AddressFamily mask) {
+ assert(link);
+ assert((mask & ~(ADDRESS_FAMILY_IPV4 | ADDRESS_FAMILY_FALLBACK_IPV4)) == 0);
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ if (link->iftype == ARPHRD_CAN)
+ return false;
+
+ if (STRPTR_IN_SET(link->kind,
+ "vrf", "wireguard", "ipip", "gre", "ip6gre","ip6tnl", "sit", "vti",
+ "vti6", "nlmon", "xfrm", "bareudp"))
+ return false;
+
+ /* L3 or L3S mode do not support ARP. */
+ if (IN_SET(link_get_ipvlan_mode(link), NETDEV_IPVLAN_MODE_L3, NETDEV_IPVLAN_MODE_L3S))
+ return false;
+
+ if (link->network->bond)
+ return false;
+
+ return link->network->link_local & mask;
+}
+
+bool link_ipv6ll_enabled(Link *link) {
+ assert(link);
+
+ if (!socket_ipv6_is_supported())
+ return false;
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ if (link->iftype == ARPHRD_CAN)
+ return false;
+
+ if (STRPTR_IN_SET(link->kind, "vrf", "wireguard", "ipip", "gre", "sit", "vti", "nlmon"))
+ return false;
+
+ if (link->network->bond)
+ return false;
+
+ return link->network->link_local & ADDRESS_FAMILY_IPV6;
+}
+
+bool link_ipv6_enabled(Link *link) {
+ assert(link);
+
+ if (!socket_ipv6_is_supported())
+ return false;
+
+ if (link->network->bond)
+ return false;
+
+ if (link->iftype == ARPHRD_CAN)
+ return false;
+
+ /* DHCPv6 client will not be started if no IPv6 link-local address is configured. */
+ if (link_ipv6ll_enabled(link))
+ return true;
+
+ if (network_has_static_ipv6_configurations(link->network))
+ return true;
+
+ return false;
+}
+
+static bool link_is_enslaved(Link *link) {
+ if (link->flags & IFF_SLAVE)
+ /* Even if the link is not managed by networkd, honor IFF_SLAVE flag. */
+ return true;
+
+ if (!link->network)
+ return false;
+
+ if (link->master_ifindex > 0 && link->network->bridge)
+ return true;
+
+ /* TODO: add conditions for other netdevs. */
+
+ return false;
+}
+
+static void link_update_master_operstate(Link *link, NetDev *netdev) {
+ Link *master;
+
+ if (!netdev)
+ return;
+
+ if (netdev->ifindex <= 0)
+ return;
+
+ if (link_get(link->manager, netdev->ifindex, &master) < 0)
+ return;
+
+ link_update_operstate(master, true);
+}
+
+void link_update_operstate(Link *link, bool also_update_master) {
+ LinkOperationalState operstate;
+ LinkCarrierState carrier_state;
+ LinkAddressState address_state;
+ _cleanup_strv_free_ char **p = NULL;
+ uint8_t scope = RT_SCOPE_NOWHERE;
+ bool changed = false;
+ Address *address;
+
+ assert(link);
+
+ if (link->kernel_operstate == IF_OPER_DORMANT)
+ carrier_state = LINK_CARRIER_STATE_DORMANT;
+ else if (link_has_carrier(link)) {
+ if (link_is_enslaved(link))
+ carrier_state = LINK_CARRIER_STATE_ENSLAVED;
+ else
+ carrier_state = LINK_CARRIER_STATE_CARRIER;
+ } else if (link->flags & IFF_UP)
+ carrier_state = LINK_CARRIER_STATE_NO_CARRIER;
+ else
+ carrier_state = LINK_CARRIER_STATE_OFF;
+
+ if (carrier_state >= LINK_CARRIER_STATE_CARRIER) {
+ Link *slave;
+
+ SET_FOREACH(slave, link->slaves) {
+ link_update_operstate(slave, false);
+
+ if (slave->carrier_state < LINK_CARRIER_STATE_CARRIER)
+ carrier_state = LINK_CARRIER_STATE_DEGRADED_CARRIER;
+ }
+ }
+
+ SET_FOREACH(address, link->addresses) {
+ if (!address_is_ready(address))
+ continue;
+
+ if (address->scope < scope)
+ scope = address->scope;
+ }
+
+ /* for operstate we also take foreign addresses into account */
+ SET_FOREACH(address, link->addresses_foreign) {
+ if (!address_is_ready(address))
+ continue;
+
+ if (address->scope < scope)
+ scope = address->scope;
+ }
+
+ if (scope < RT_SCOPE_SITE)
+ /* universally accessible addresses found */
+ address_state = LINK_ADDRESS_STATE_ROUTABLE;
+ else if (scope < RT_SCOPE_HOST)
+ /* only link or site local addresses found */
+ address_state = LINK_ADDRESS_STATE_DEGRADED;
+ else
+ /* no useful addresses found */
+ address_state = LINK_ADDRESS_STATE_OFF;
+
+ /* Mapping of address and carrier state vs operational state
+ * carrier state
+ * | off | no-carrier | dormant | degraded-carrier | carrier | enslaved
+ * ------------------------------------------------------------------------------
+ * off | off | no-carrier | dormant | degraded-carrier | carrier | enslaved
+ * address_state degraded | off | no-carrier | dormant | degraded-carrier | degraded | enslaved
+ * routable | off | no-carrier | dormant | degraded-carrier | routable | routable
+ */
+
+ if (carrier_state < LINK_CARRIER_STATE_CARRIER || address_state == LINK_ADDRESS_STATE_OFF)
+ operstate = (LinkOperationalState) carrier_state;
+ else if (address_state == LINK_ADDRESS_STATE_ROUTABLE)
+ operstate = LINK_OPERSTATE_ROUTABLE;
+ else if (carrier_state == LINK_CARRIER_STATE_CARRIER)
+ operstate = LINK_OPERSTATE_DEGRADED;
+ else
+ operstate = LINK_OPERSTATE_ENSLAVED;
+
+ if (link->carrier_state != carrier_state) {
+ link->carrier_state = carrier_state;
+ changed = true;
+ if (strv_extend(&p, "CarrierState") < 0)
+ log_oom();
+ }
+
+ if (link->address_state != address_state) {
+ link->address_state = address_state;
+ changed = true;
+ if (strv_extend(&p, "AddressState") < 0)
+ log_oom();
+ }
+
+ if (link->operstate != operstate) {
+ link->operstate = operstate;
+ changed = true;
+ if (strv_extend(&p, "OperationalState") < 0)
+ log_oom();
+ }
+
+ if (p)
+ link_send_changed_strv(link, p);
+ if (changed)
+ link_dirty(link);
+
+ if (also_update_master && link->network) {
+ link_update_master_operstate(link, link->network->bond);
+ link_update_master_operstate(link, link->network->bridge);
+ }
+}
+
+#define FLAG_STRING(string, flag, old, new) \
+ (((old ^ new) & flag) \
+ ? ((old & flag) ? (" -" string) : (" +" string)) \
+ : "")
+
+static int link_update_flags(Link *link, sd_netlink_message *m, bool force_update_operstate) {
+ unsigned flags, unknown_flags_added, unknown_flags_removed, unknown_flags;
+ uint8_t operstate;
+ int r;
+
+ assert(link);
+
+ r = sd_rtnl_message_link_get_flags(m, &flags);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not get link flags: %m");
+
+ r = sd_netlink_message_read_u8(m, IFLA_OPERSTATE, &operstate);
+ if (r < 0)
+ /* if we got a message without operstate, take it to mean
+ the state was unchanged */
+ operstate = link->kernel_operstate;
+
+ if (!force_update_operstate && (link->flags == flags) && (link->kernel_operstate == operstate))
+ return 0;
+
+ if (link->flags != flags) {
+ log_link_debug(link, "Flags change:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+ FLAG_STRING("LOOPBACK", IFF_LOOPBACK, link->flags, flags),
+ FLAG_STRING("MASTER", IFF_MASTER, link->flags, flags),
+ FLAG_STRING("SLAVE", IFF_SLAVE, link->flags, flags),
+ FLAG_STRING("UP", IFF_UP, link->flags, flags),
+ FLAG_STRING("DORMANT", IFF_DORMANT, link->flags, flags),
+ FLAG_STRING("LOWER_UP", IFF_LOWER_UP, link->flags, flags),
+ FLAG_STRING("RUNNING", IFF_RUNNING, link->flags, flags),
+ FLAG_STRING("MULTICAST", IFF_MULTICAST, link->flags, flags),
+ FLAG_STRING("BROADCAST", IFF_BROADCAST, link->flags, flags),
+ FLAG_STRING("POINTOPOINT", IFF_POINTOPOINT, link->flags, flags),
+ FLAG_STRING("PROMISC", IFF_PROMISC, link->flags, flags),
+ FLAG_STRING("ALLMULTI", IFF_ALLMULTI, link->flags, flags),
+ FLAG_STRING("PORTSEL", IFF_PORTSEL, link->flags, flags),
+ FLAG_STRING("AUTOMEDIA", IFF_AUTOMEDIA, link->flags, flags),
+ FLAG_STRING("DYNAMIC", IFF_DYNAMIC, link->flags, flags),
+ FLAG_STRING("NOARP", IFF_NOARP, link->flags, flags),
+ FLAG_STRING("NOTRAILERS", IFF_NOTRAILERS, link->flags, flags),
+ FLAG_STRING("DEBUG", IFF_DEBUG, link->flags, flags),
+ FLAG_STRING("ECHO", IFF_ECHO, link->flags, flags));
+
+ unknown_flags = ~(IFF_LOOPBACK | IFF_MASTER | IFF_SLAVE | IFF_UP |
+ IFF_DORMANT | IFF_LOWER_UP | IFF_RUNNING |
+ IFF_MULTICAST | IFF_BROADCAST | IFF_POINTOPOINT |
+ IFF_PROMISC | IFF_ALLMULTI | IFF_PORTSEL |
+ IFF_AUTOMEDIA | IFF_DYNAMIC | IFF_NOARP |
+ IFF_NOTRAILERS | IFF_DEBUG | IFF_ECHO);
+ unknown_flags_added = ((link->flags ^ flags) & flags & unknown_flags);
+ unknown_flags_removed = ((link->flags ^ flags) & link->flags & unknown_flags);
+
+ /* link flags are currently at most 18 bits, let's align to
+ * printing 20 */
+ if (unknown_flags_added)
+ log_link_debug(link,
+ "Unknown link flags gained: %#.5x (ignoring)",
+ unknown_flags_added);
+
+ if (unknown_flags_removed)
+ log_link_debug(link,
+ "Unknown link flags lost: %#.5x (ignoring)",
+ unknown_flags_removed);
+ }
+
+ link->flags = flags;
+ link->kernel_operstate = operstate;
+
+ link_update_operstate(link, true);
+
+ return 0;
+}
+
+static int link_new(Manager *manager, sd_netlink_message *message, Link **ret) {
+ _cleanup_(link_unrefp) Link *link = NULL;
+ const char *ifname, *kind = NULL;
+ unsigned short iftype;
+ int r, ifindex;
+ uint16_t type;
+
+ assert(manager);
+ assert(message);
+ assert(ret);
+
+ /* check for link kind */
+ r = sd_netlink_message_enter_container(message, IFLA_LINKINFO);
+ if (r == 0) {
+ (void) sd_netlink_message_read_string(message, IFLA_INFO_KIND, &kind);
+ r = sd_netlink_message_exit_container(message);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_netlink_message_get_type(message, &type);
+ if (r < 0)
+ return r;
+ else if (type != RTM_NEWLINK)
+ return -EINVAL;
+
+ r = sd_rtnl_message_link_get_ifindex(message, &ifindex);
+ if (r < 0)
+ return r;
+ else if (ifindex <= 0)
+ return -EINVAL;
+
+ r = sd_rtnl_message_link_get_type(message, &iftype);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_read_string(message, IFLA_IFNAME, &ifname);
+ if (r < 0)
+ return r;
+
+ link = new(Link, 1);
+ if (!link)
+ return -ENOMEM;
+
+ *link = (Link) {
+ .n_ref = 1,
+ .manager = manager,
+ .state = LINK_STATE_PENDING,
+ .ifindex = ifindex,
+ .iftype = iftype,
+
+ .n_dns = (unsigned) -1,
+ .dns_default_route = -1,
+ .llmnr = _RESOLVE_SUPPORT_INVALID,
+ .mdns = _RESOLVE_SUPPORT_INVALID,
+ .dnssec_mode = _DNSSEC_MODE_INVALID,
+ .dns_over_tls_mode = _DNS_OVER_TLS_MODE_INVALID,
+ };
+
+ link->ifname = strdup(ifname);
+ if (!link->ifname)
+ return -ENOMEM;
+
+ if (kind) {
+ link->kind = strdup(kind);
+ if (!link->kind)
+ return -ENOMEM;
+ }
+
+ r = sd_netlink_message_read_u32(message, IFLA_MASTER, (uint32_t *)&link->master_ifindex);
+ if (r < 0)
+ log_link_debug_errno(link, r, "New device has no master, continuing without");
+
+ r = netlink_message_read_hw_addr(message, IFLA_ADDRESS, &link->hw_addr);
+ if (r < 0)
+ log_link_debug_errno(link, r, "Hardware address not found for new device, continuing without");
+
+ r = netlink_message_read_hw_addr(message, IFLA_BROADCAST, &link->bcast_addr);
+ if (r < 0)
+ log_link_debug_errno(link, r, "Broadcast address not found for new device, continuing without");
+
+ r = ethtool_get_permanent_macaddr(&manager->ethtool_fd, link->ifname, &link->permanent_mac);
+ if (r < 0)
+ log_link_debug_errno(link, r, "Permanent MAC address not found for new device, continuing without: %m");
+
+ r = ethtool_get_driver(&manager->ethtool_fd, link->ifname, &link->driver);
+ if (r < 0)
+ log_link_debug_errno(link, r, "Failed to get driver, continuing without: %m");
+
+ r = sd_netlink_message_read_strv(message, IFLA_PROP_LIST, IFLA_ALT_IFNAME, &link->alternative_names);
+ if (r < 0 && r != -ENODATA)
+ return r;
+
+ if (asprintf(&link->state_file, "/run/systemd/netif/links/%d", link->ifindex) < 0)
+ return -ENOMEM;
+
+ if (asprintf(&link->lease_file, "/run/systemd/netif/leases/%d", link->ifindex) < 0)
+ return -ENOMEM;
+
+ if (asprintf(&link->lldp_file, "/run/systemd/netif/lldp/%d", link->ifindex) < 0)
+ return -ENOMEM;
+
+ r = hashmap_ensure_allocated(&manager->links, NULL);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(manager->links, INT_TO_PTR(link->ifindex), link);
+ if (r < 0)
+ return r;
+
+ r = link_update_flags(link, message, false);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(link);
+
+ return 0;
+}
+
+void link_ntp_settings_clear(Link *link) {
+ link->ntp = strv_free(link->ntp);
+}
+
+void link_dns_settings_clear(Link *link) {
+ if (link->n_dns != (unsigned) -1)
+ for (unsigned i = 0; i < link->n_dns; i++)
+ in_addr_full_free(link->dns[i]);
+ link->dns = mfree(link->dns);
+ link->n_dns = (unsigned) -1;
+
+ link->search_domains = ordered_set_free_free(link->search_domains);
+ link->route_domains = ordered_set_free_free(link->route_domains);
+
+ link->dns_default_route = -1;
+ link->llmnr = _RESOLVE_SUPPORT_INVALID;
+ link->mdns = _RESOLVE_SUPPORT_INVALID;
+ link->dnssec_mode = _DNSSEC_MODE_INVALID;
+ link->dns_over_tls_mode = _DNS_OVER_TLS_MODE_INVALID;
+
+ link->dnssec_negative_trust_anchors = set_free_free(link->dnssec_negative_trust_anchors);
+}
+
+static void link_free_engines(Link *link) {
+ if (!link)
+ return;
+
+ link->dhcp_server = sd_dhcp_server_unref(link->dhcp_server);
+ link->dhcp_client = sd_dhcp_client_unref(link->dhcp_client);
+ link->dhcp_lease = sd_dhcp_lease_unref(link->dhcp_lease);
+ link->dhcp_acd = sd_ipv4acd_unref(link->dhcp_acd);
+
+ link->lldp = sd_lldp_unref(link->lldp);
+ link_lldp_emit_stop(link);
+
+ ndisc_flush(link);
+
+ link->ipv4ll = sd_ipv4ll_unref(link->ipv4ll);
+ link->dhcp6_client = sd_dhcp6_client_unref(link->dhcp6_client);
+ link->dhcp6_lease = sd_dhcp6_lease_unref(link->dhcp6_lease);
+ link->ndisc = sd_ndisc_unref(link->ndisc);
+ link->radv = sd_radv_unref(link->radv);
+
+ ipv4_dad_unref(link);
+}
+
+static Link *link_free(Link *link) {
+ assert(link);
+
+ link_ntp_settings_clear(link);
+ link_dns_settings_clear(link);
+
+ link->routes = set_free(link->routes);
+ link->routes_foreign = set_free(link->routes_foreign);
+ link->dhcp_routes = set_free(link->dhcp_routes);
+ link->dhcp_routes_old = set_free(link->dhcp_routes_old);
+ link->dhcp6_routes = set_free(link->dhcp6_routes);
+ link->dhcp6_routes_old = set_free(link->dhcp6_routes_old);
+ link->dhcp6_pd_routes = set_free(link->dhcp6_pd_routes);
+ link->dhcp6_pd_routes_old = set_free(link->dhcp6_pd_routes_old);
+ link->ndisc_routes = set_free(link->ndisc_routes);
+
+ link->nexthops = set_free(link->nexthops);
+ link->nexthops_foreign = set_free(link->nexthops_foreign);
+
+ link->neighbors = set_free(link->neighbors);
+ link->neighbors_foreign = set_free(link->neighbors_foreign);
+
+ link->addresses = set_free(link->addresses);
+ link->addresses_foreign = set_free(link->addresses_foreign);
+ link->pool_addresses = set_free(link->pool_addresses);
+ link->static_addresses = set_free(link->static_addresses);
+ link->dhcp6_addresses = set_free(link->dhcp6_addresses);
+ link->dhcp6_addresses_old = set_free(link->dhcp6_addresses_old);
+ link->dhcp6_pd_addresses = set_free(link->dhcp6_pd_addresses);
+ link->dhcp6_pd_addresses_old = set_free(link->dhcp6_pd_addresses_old);
+ link->ndisc_addresses = set_free(link->ndisc_addresses);
+
+ link_free_engines(link);
+ free(link->lease_file);
+ free(link->lldp_file);
+
+ free(link->ifname);
+ strv_free(link->alternative_names);
+ free(link->kind);
+ free(link->ssid);
+ free(link->driver);
+
+ (void) unlink(link->state_file);
+ free(link->state_file);
+
+ sd_device_unref(link->sd_device);
+
+ hashmap_free(link->bound_to_links);
+ hashmap_free(link->bound_by_links);
+
+ set_free_with_destructor(link->slaves, link_unref);
+
+ network_unref(link->network);
+
+ return mfree(link);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(Link, link, link_free);
+
+int link_get(Manager *m, int ifindex, Link **ret) {
+ Link *link;
+
+ assert(m);
+ assert(ifindex > 0);
+ assert(ret);
+
+ link = hashmap_get(m->links, INT_TO_PTR(ifindex));
+ if (!link)
+ return -ENODEV;
+
+ *ret = link;
+
+ return 0;
+}
+
+void link_set_state(Link *link, LinkState state) {
+ assert(link);
+
+ if (link->state == state)
+ return;
+
+ log_link_debug(link, "State changed: %s -> %s",
+ link_state_to_string(link->state),
+ link_state_to_string(state));
+
+ link->state = state;
+
+ link_send_changed(link, "AdministrativeState", NULL);
+}
+
+static void link_enter_unmanaged(Link *link) {
+ assert(link);
+
+ link_set_state(link, LINK_STATE_UNMANAGED);
+
+ link_dirty(link);
+}
+
+int link_stop_engines(Link *link, bool may_keep_dhcp) {
+ int r = 0, k;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->event);
+
+ bool keep_dhcp = may_keep_dhcp &&
+ link->network &&
+ (link->manager->restarting ||
+ FLAGS_SET(link->network->keep_configuration, KEEP_CONFIGURATION_DHCP_ON_STOP));
+
+ if (!keep_dhcp) {
+ k = sd_dhcp_client_stop(link->dhcp_client);
+ if (k < 0)
+ r = log_link_warning_errno(link, k, "Could not stop DHCPv4 client: %m");
+ }
+
+ k = sd_ipv4acd_stop(link->dhcp_acd);
+ if (k < 0)
+ r = log_link_warning_errno(link, k, "Could not stop IPv4 ACD client for DHCPv4: %m");
+
+ k = sd_dhcp_server_stop(link->dhcp_server);
+ if (k < 0)
+ r = log_link_warning_errno(link, k, "Could not stop DHCPv4 server: %m");
+
+ k = sd_lldp_stop(link->lldp);
+ if (k < 0)
+ r = log_link_warning_errno(link, k, "Could not stop LLDP: %m");
+
+ k = sd_ipv4ll_stop(link->ipv4ll);
+ if (k < 0)
+ r = log_link_warning_errno(link, k, "Could not stop IPv4 link-local: %m");
+
+ k = ipv4_dad_stop(link);
+ if (k < 0)
+ r = log_link_warning_errno(link, k, "Could not stop IPv4 ACD client: %m");
+
+ k = sd_dhcp6_client_stop(link->dhcp6_client);
+ if (k < 0)
+ r = log_link_warning_errno(link, k, "Could not stop DHCPv6 client: %m");
+
+ k = dhcp6_pd_remove(link);
+ if (k < 0)
+ r = log_link_warning_errno(link, k, "Could not remove DHCPv6 PD addresses and routes: %m");
+
+ k = sd_ndisc_stop(link->ndisc);
+ if (k < 0)
+ r = log_link_warning_errno(link, k, "Could not stop IPv6 Router Discovery: %m");
+
+ k = sd_radv_stop(link->radv);
+ if (k < 0)
+ r = log_link_warning_errno(link, k, "Could not stop IPv6 Router Advertisement: %m");
+
+ link_lldp_emit_stop(link);
+ return r;
+}
+
+void link_enter_failed(Link *link) {
+ assert(link);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return;
+
+ log_link_warning(link, "Failed");
+
+ link_set_state(link, LINK_STATE_FAILED);
+
+ (void) link_stop_engines(link, false);
+
+ link_dirty(link);
+}
+
+static int link_join_netdevs_after_configured(Link *link) {
+ NetDev *netdev;
+ int r;
+
+ HASHMAP_FOREACH(netdev, link->network->stacked_netdevs) {
+ if (netdev->ifindex > 0)
+ /* Assume already enslaved. */
+ continue;
+
+ if (netdev_get_create_type(netdev) != NETDEV_CREATE_AFTER_CONFIGURED)
+ continue;
+
+ log_struct(LOG_DEBUG,
+ LOG_LINK_INTERFACE(link),
+ LOG_NETDEV_INTERFACE(netdev),
+ LOG_LINK_MESSAGE(link, "Enslaving by '%s'", netdev->ifname));
+
+ r = netdev_join(netdev, link, NULL);
+ if (r < 0)
+ return log_struct_errno(LOG_WARNING, r,
+ LOG_LINK_INTERFACE(link),
+ LOG_NETDEV_INTERFACE(netdev),
+ LOG_LINK_MESSAGE(link, "Could not join netdev '%s': %m", netdev->ifname));
+ }
+
+ return 0;
+}
+
+static void link_enter_configured(Link *link) {
+ assert(link);
+ assert(link->network);
+
+ if (link->state != LINK_STATE_CONFIGURING)
+ return;
+
+ link_set_state(link, LINK_STATE_CONFIGURED);
+
+ (void) link_join_netdevs_after_configured(link);
+
+ link_dirty(link);
+}
+
+void link_check_ready(Link *link) {
+ Address *a;
+
+ assert(link);
+
+ if (link->state == LINK_STATE_CONFIGURED)
+ return;
+
+ if (link->state != LINK_STATE_CONFIGURING) {
+ log_link_debug(link, "%s(): link is in %s state.", __func__, link_state_to_string(link->state));
+ return;
+ }
+
+ if (!link->network)
+ return;
+
+ if (!link->addresses_configured) {
+ log_link_debug(link, "%s(): static addresses are not configured.", __func__);
+ return;
+ }
+
+ if (!link->neighbors_configured) {
+ log_link_debug(link, "%s(): static neighbors are not configured.", __func__);
+ return;
+ }
+
+ SET_FOREACH(a, link->addresses)
+ if (!address_is_ready(a)) {
+ _cleanup_free_ char *str = NULL;
+
+ (void) in_addr_to_string(a->family, &a->in_addr, &str);
+ log_link_debug(link, "%s(): an address %s/%d is not ready.", __func__, strnull(str), a->prefixlen);
+ return;
+ }
+
+ if (!link->static_routes_configured) {
+ log_link_debug(link, "%s(): static routes are not configured.", __func__);
+ return;
+ }
+
+ if (!link->static_nexthops_configured) {
+ log_link_debug(link, "%s(): static nexthops are not configured.", __func__);
+ return;
+ }
+
+ if (!link->routing_policy_rules_configured) {
+ log_link_debug(link, "%s(): static routing policy rules are not configured.", __func__);
+ return;
+ }
+
+ if (!link->tc_configured) {
+ log_link_debug(link, "%s(): traffic controls are not configured.", __func__);
+ return;
+ }
+
+ if (!link->sr_iov_configured) {
+ log_link_debug(link, "%s(): SR-IOV is not configured.", __func__);
+ return;
+ }
+
+ if (!link->bridge_mdb_configured) {
+ log_link_debug(link, "%s(): Bridge MDB is not configured.", __func__);
+ return;
+ }
+
+ if (link_has_carrier(link) || !link->network->configure_without_carrier) {
+ bool has_ndisc_address = false;
+ NDiscAddress *n;
+
+ if (link_ipv4ll_enabled(link, ADDRESS_FAMILY_IPV4) && !link->ipv4ll_address_configured) {
+ log_link_debug(link, "%s(): IPv4LL is not configured.", __func__);
+ return;
+ }
+
+ if (link_ipv6ll_enabled(link) &&
+ in_addr_is_null(AF_INET6, (const union in_addr_union*) &link->ipv6ll_address)) {
+ log_link_debug(link, "%s(): IPv6LL is not configured.", __func__);
+ return;
+ }
+
+ SET_FOREACH(n, link->ndisc_addresses)
+ if (!n->marked) {
+ has_ndisc_address = true;
+ break;
+ }
+
+ if ((link_dhcp4_enabled(link) || link_dhcp6_enabled(link)) &&
+ !link->dhcp_address && set_isempty(link->dhcp6_addresses) && !has_ndisc_address &&
+ !(link_ipv4ll_enabled(link, ADDRESS_FAMILY_FALLBACK_IPV4) && link->ipv4ll_address_configured)) {
+ log_link_debug(link, "%s(): DHCP4 or DHCP6 is enabled but no dynamic address is assigned yet.", __func__);
+ return;
+ }
+
+ if (link_dhcp4_enabled(link) || link_dhcp6_enabled(link) || link_dhcp6_pd_is_enabled(link) || link_ipv6_accept_ra_enabled(link)) {
+ if (!link->dhcp4_configured &&
+ !(link->dhcp6_address_configured && link->dhcp6_route_configured) &&
+ !(link->dhcp6_pd_address_configured && link->dhcp6_pd_route_configured) &&
+ !(link->ndisc_addresses_configured && link->ndisc_routes_configured) &&
+ !(link_ipv4ll_enabled(link, ADDRESS_FAMILY_FALLBACK_IPV4) && link->ipv4ll_address_configured)) {
+ /* When DHCP or RA is enabled, at least one protocol must provide an address, or
+ * an IPv4ll fallback address must be configured. */
+ log_link_debug(link, "%s(): dynamic addresses or routes are not configured.", __func__);
+ return;
+ }
+
+ log_link_debug(link, "%s(): dhcp4:%s dhcp6_addresses:%s dhcp_routes:%s dhcp_pd_addresses:%s dhcp_pd_routes:%s ndisc_addresses:%s ndisc_routes:%s",
+ __func__,
+ yes_no(link->dhcp4_configured),
+ yes_no(link->dhcp6_address_configured),
+ yes_no(link->dhcp6_route_configured),
+ yes_no(link->dhcp6_pd_address_configured),
+ yes_no(link->dhcp6_pd_route_configured),
+ yes_no(link->ndisc_addresses_configured),
+ yes_no(link->ndisc_routes_configured));
+ }
+ }
+
+ link_enter_configured(link);
+
+ return;
+}
+
+static int link_set_static_configs(Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->state != _LINK_STATE_INVALID);
+
+ /* Reset all *_configured flags we are configuring. */
+ link->request_static_addresses = false;
+ link->addresses_configured = false;
+ link->addresses_ready = false;
+ link->neighbors_configured = false;
+ link->static_routes_configured = false;
+ link->static_nexthops_configured = false;
+ link->routing_policy_rules_configured = false;
+
+ r = link_set_bridge_fdb(link);
+ if (r < 0)
+ return r;
+
+ r = link_set_bridge_mdb(link);
+ if (r < 0)
+ return r;
+
+ r = link_set_neighbors(link);
+ if (r < 0)
+ return r;
+
+ r = link_set_addresses(link);
+ if (r < 0)
+ return r;
+
+ r = link_set_address_labels(link);
+ if (r < 0)
+ return r;
+
+ /* now that we can figure out a default address for the dhcp server, start it */
+ r = dhcp4_server_configure(link);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int link_configure_continue(Link *link);
+
+static int link_mac_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ log_link_message_warning_errno(link, m, r, "Could not set MAC address, ignoring");
+ else
+ log_link_debug(link, "Setting MAC address done.");
+
+ return 1;
+}
+
+static int link_set_mac(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ if (!link->network->mac)
+ return 0;
+
+ log_link_debug(link, "Setting MAC address");
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_netlink_message_append_ether_addr(req, IFLA_ADDRESS, link->network->mac);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set MAC address: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, link_mac_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static int link_nomaster_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ log_link_message_warning_errno(link, m, r, "Could not set nomaster, ignoring");
+ else
+ log_link_debug(link, "Setting nomaster done.");
+
+ return 1;
+}
+
+static int link_set_nomaster(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ /* set it free if not enslaved with networkd */
+ if (link->network->bridge || link->network->bond || link->network->vrf)
+ return 0;
+
+ log_link_debug(link, "Setting nomaster");
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_netlink_message_append_u32(req, IFLA_MASTER, 0);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_MASTER attribute: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, link_nomaster_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static int set_mtu_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(m);
+ assert(link);
+ assert(link->ifname);
+
+ link->setting_mtu = false;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ log_link_message_warning_errno(link, m, r, "Could not set MTU, ignoring");
+ else
+ log_link_debug(link, "Setting MTU done.");
+
+ if (link->state == LINK_STATE_INITIALIZED) {
+ r = link_configure_continue(link);
+ if (r < 0)
+ link_enter_failed(link);
+ }
+
+ return 1;
+}
+
+int link_set_mtu(Link *link, uint32_t mtu) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ if (mtu == 0 || link->setting_mtu)
+ return 0;
+
+ if (link->mtu == mtu)
+ return 0;
+
+ log_link_debug(link, "Setting MTU: %" PRIu32, mtu);
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ /* IPv6 protocol requires a minimum MTU of IPV6_MTU_MIN(1280) bytes
+ * on the interface. Bump up MTU bytes to IPV6_MTU_MIN. */
+ if (link_ipv6_enabled(link) && mtu < IPV6_MIN_MTU) {
+
+ log_link_warning(link, "Bumping MTU to " STRINGIFY(IPV6_MIN_MTU) ", as "
+ "IPv6 is requested and requires a minimum MTU of " STRINGIFY(IPV6_MIN_MTU) " bytes");
+
+ mtu = IPV6_MIN_MTU;
+ }
+
+ r = sd_netlink_message_append_u32(req, IFLA_MTU, mtu);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append MTU: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, set_mtu_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+ link->setting_mtu = true;
+
+ return 0;
+}
+
+static bool link_reduces_vlan_mtu(Link *link) {
+ /* See netif_reduces_vlan_mtu() in kernel. */
+ return streq_ptr(link->kind, "macsec");
+}
+
+static uint32_t link_get_requested_mtu_by_stacked_netdevs(Link *link) {
+ uint32_t mtu = 0;
+ NetDev *dev;
+
+ HASHMAP_FOREACH(dev, link->network->stacked_netdevs)
+ if (dev->kind == NETDEV_KIND_VLAN && dev->mtu > 0)
+ /* See vlan_dev_change_mtu() in kernel. */
+ mtu = MAX(mtu, link_reduces_vlan_mtu(link) ? dev->mtu + 4 : dev->mtu);
+
+ else if (dev->kind == NETDEV_KIND_MACVLAN && dev->mtu > mtu)
+ /* See macvlan_change_mtu() in kernel. */
+ mtu = dev->mtu;
+
+ return mtu;
+}
+
+static int link_configure_mtu(Link *link) {
+ uint32_t mtu;
+
+ assert(link);
+ assert(link->network);
+
+ if (link->network->mtu > 0)
+ return link_set_mtu(link, link->network->mtu);
+
+ mtu = link_get_requested_mtu_by_stacked_netdevs(link);
+ if (link->mtu >= mtu)
+ return 0;
+
+ log_link_notice(link, "Bumping MTU bytes from %"PRIu32" to %"PRIu32" because of stacked device. "
+ "If it is not desired, then please explicitly specify MTUBytes= setting.",
+ link->mtu, mtu);
+
+ return link_set_mtu(link, mtu);
+}
+
+static int set_flags_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(m);
+ assert(link);
+ assert(link->ifname);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ log_link_message_warning_errno(link, m, r, "Could not set link flags, ignoring");
+
+ return 1;
+}
+
+static int link_set_flags(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ unsigned ifi_change = 0;
+ unsigned ifi_flags = 0;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ if (link->flags & IFF_LOOPBACK)
+ return 0;
+
+ if (!link->network)
+ return 0;
+
+ if (link->network->arp < 0 && link->network->multicast < 0 && link->network->allmulticast < 0)
+ return 0;
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ if (link->network->arp >= 0) {
+ ifi_change |= IFF_NOARP;
+ SET_FLAG(ifi_flags, IFF_NOARP, link->network->arp == 0);
+ }
+
+ if (link->network->multicast >= 0) {
+ ifi_change |= IFF_MULTICAST;
+ SET_FLAG(ifi_flags, IFF_MULTICAST, link->network->multicast);
+ }
+
+ if (link->network->allmulticast >= 0) {
+ ifi_change |= IFF_ALLMULTI;
+ SET_FLAG(ifi_flags, IFF_ALLMULTI, link->network->allmulticast);
+ }
+
+ r = sd_rtnl_message_link_set_flags(req, ifi_flags, ifi_change);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set link flags: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, set_flags_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static int link_acquire_ipv6_conf(Link *link) {
+ int r;
+
+ assert(link);
+
+ if (link->ndisc) {
+ log_link_debug(link, "Discovering IPv6 routers");
+
+ r = sd_ndisc_start(link->ndisc);
+ if (r < 0 && r != -EBUSY)
+ return log_link_warning_errno(link, r, "Could not start IPv6 Router Discovery: %m");
+ }
+
+ if (link->radv) {
+ assert(link->radv);
+ assert(in_addr_is_link_local(AF_INET6, (const union in_addr_union*)&link->ipv6ll_address) > 0);
+
+ log_link_debug(link, "Starting IPv6 Router Advertisements");
+
+ r = radv_emit_dns(link);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to configure DNS or Domains in IPv6 Router Advertisement: %m");
+
+ r = sd_radv_start(link->radv);
+ if (r < 0 && r != -EBUSY)
+ return log_link_warning_errno(link, r, "Could not start IPv6 Router Advertisement: %m");
+ }
+
+ if (link_dhcp6_enabled(link) && IN_SET(link->network->dhcp6_without_ra,
+ DHCP6_CLIENT_START_MODE_INFORMATION_REQUEST,
+ DHCP6_CLIENT_START_MODE_SOLICIT)) {
+ assert(link->dhcp6_client);
+ assert(in_addr_is_link_local(AF_INET6, (const union in_addr_union*)&link->ipv6ll_address) > 0);
+
+ r = dhcp6_request_address(link, link->network->dhcp6_without_ra == DHCP6_CLIENT_START_MODE_INFORMATION_REQUEST);
+ if (r < 0 && r != -EBUSY)
+ return log_link_warning_errno(link, r, "Could not acquire DHCPv6 lease: %m");
+ else
+ log_link_debug(link, "Acquiring DHCPv6 lease");
+ }
+
+ r = dhcp6_request_prefix_delegation(link);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to request DHCPv6 prefix delegation: %m");
+
+ return 0;
+}
+
+static int link_acquire_ipv4_conf(Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->event);
+
+ if (link_ipv4ll_enabled(link, ADDRESS_FAMILY_IPV4)) {
+ assert(link->ipv4ll);
+
+ log_link_debug(link, "Acquiring IPv4 link-local address");
+
+ r = sd_ipv4ll_start(link->ipv4ll);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not acquire IPv4 link-local address: %m");
+ }
+
+ if (link->dhcp_client) {
+ log_link_debug(link, "Acquiring DHCPv4 lease");
+
+ r = sd_dhcp_client_start(link->dhcp_client);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not acquire DHCPv4 lease: %m");
+ }
+
+ return 0;
+}
+
+static int link_acquire_conf(Link *link) {
+ int r;
+
+ assert(link);
+
+ r = link_acquire_ipv4_conf(link);
+ if (r < 0)
+ return r;
+
+ if (!in_addr_is_null(AF_INET6, (const union in_addr_union*) &link->ipv6ll_address)) {
+ r = link_acquire_ipv6_conf(link);
+ if (r < 0)
+ return r;
+ }
+
+ r = link_lldp_emit_start(link);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to start LLDP transmission: %m");
+
+ return 0;
+}
+
+bool link_has_carrier(Link *link) {
+ /* see Documentation/networking/operstates.txt in the kernel sources */
+
+ if (link->kernel_operstate == IF_OPER_UP)
+ return true;
+
+ if (link->kernel_operstate == IF_OPER_UNKNOWN)
+ /* operstate may not be implemented, so fall back to flags */
+ if (FLAGS_SET(link->flags, IFF_LOWER_UP | IFF_RUNNING) &&
+ !FLAGS_SET(link->flags, IFF_DORMANT))
+ return true;
+
+ return false;
+}
+
+static int link_address_genmode_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ link->setting_genmode = false;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ log_link_message_warning_errno(link, m, r, "Could not set address genmode for interface, ignoring");
+ else
+ log_link_debug(link, "Setting address genmode done.");
+
+ if (link->state == LINK_STATE_INITIALIZED) {
+ r = link_configure_continue(link);
+ if (r < 0)
+ link_enter_failed(link);
+ }
+
+ return 1;
+}
+
+static int link_configure_addrgen_mode(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ uint8_t ipv6ll_mode;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ if (!socket_ipv6_is_supported() || link->setting_genmode)
+ return 0;
+
+ log_link_debug(link, "Setting address genmode for link");
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_netlink_message_open_container(req, IFLA_AF_SPEC);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open IFLA_AF_SPEC container: %m");
+
+ r = sd_netlink_message_open_container(req, AF_INET6);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open AF_INET6 container: %m");
+
+ if (!link_ipv6ll_enabled(link))
+ ipv6ll_mode = IN6_ADDR_GEN_MODE_NONE;
+ else if (link->network->ipv6ll_address_gen_mode < 0) {
+ r = sysctl_read_ip_property(AF_INET6, link->ifname, "stable_secret", NULL);
+ if (r < 0) {
+ /* The file may not exist. And even if it exists, when stable_secret is unset,
+ * reading the file fails with EIO. */
+ log_link_debug_errno(link, r, "Failed to read sysctl property stable_secret: %m");
+
+ ipv6ll_mode = IN6_ADDR_GEN_MODE_EUI64;
+ } else
+ ipv6ll_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+ } else
+ ipv6ll_mode = link->network->ipv6ll_address_gen_mode;
+
+ r = sd_netlink_message_append_u8(req, IFLA_INET6_ADDR_GEN_MODE, ipv6ll_mode);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_INET6_ADDR_GEN_MODE: %m");
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close AF_INET6 container: %m");
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close IFLA_AF_SPEC container: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, link_address_genmode_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+ link->setting_genmode = true;
+
+ return 0;
+}
+
+static int link_up_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ /* we warn but don't fail the link, as it may be brought up later */
+ log_link_message_warning_errno(link, m, r, "Could not bring up interface");
+
+ return 1;
+}
+
+static int link_up(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ log_link_debug(link, "Bringing link up");
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_rtnl_message_link_set_flags(req, IFF_UP, IFF_UP);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set link flags: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, link_up_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static int link_down_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ log_link_message_warning_errno(link, m, r, "Could not bring down interface");
+
+ return 1;
+}
+
+int link_down(Link *link, link_netlink_message_handler_t callback) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ log_link_debug(link, "Bringing link down");
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req,
+ RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_rtnl_message_link_set_flags(req, 0, IFF_UP);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set link flags: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req,
+ callback ?: link_down_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static int link_group_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ log_link_message_warning_errno(link, m, r, "Could not set group for the interface");
+
+ return 1;
+}
+
+static int link_set_group(Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ if (link->network->group <= 0)
+ return 0;
+
+ log_link_debug(link, "Setting group");
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_netlink_message_append_u32(req, IFLA_GROUP, link->network->group);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set link group: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, link_group_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static int link_handle_bound_to_list(Link *link) {
+ Link *l;
+ int r;
+ bool required_up = false;
+ bool link_is_up = false;
+
+ assert(link);
+
+ if (hashmap_isempty(link->bound_to_links))
+ return 0;
+
+ if (link->flags & IFF_UP)
+ link_is_up = true;
+
+ HASHMAP_FOREACH (l, link->bound_to_links)
+ if (link_has_carrier(l)) {
+ required_up = true;
+ break;
+ }
+
+ if (!required_up && link_is_up) {
+ r = link_down(link, NULL);
+ if (r < 0)
+ return r;
+ } else if (required_up && !link_is_up) {
+ r = link_up(link);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int link_handle_bound_by_list(Link *link) {
+ Link *l;
+ int r;
+
+ assert(link);
+
+ if (hashmap_isempty(link->bound_by_links))
+ return 0;
+
+ HASHMAP_FOREACH (l, link->bound_by_links) {
+ r = link_handle_bound_to_list(l);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int link_put_carrier(Link *link, Link *carrier, Hashmap **h) {
+ int r;
+
+ assert(link);
+ assert(carrier);
+
+ if (link == carrier)
+ return 0;
+
+ if (hashmap_get(*h, INT_TO_PTR(carrier->ifindex)))
+ return 0;
+
+ r = hashmap_ensure_allocated(h, NULL);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(*h, INT_TO_PTR(carrier->ifindex), carrier);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int link_new_bound_by_list(Link *link) {
+ Manager *m;
+ Link *carrier;
+ int r;
+ bool list_updated = false;
+
+ assert(link);
+ assert(link->manager);
+
+ m = link->manager;
+
+ HASHMAP_FOREACH(carrier, m->links) {
+ if (!carrier->network)
+ continue;
+
+ if (strv_isempty(carrier->network->bind_carrier))
+ continue;
+
+ if (strv_fnmatch(carrier->network->bind_carrier, link->ifname)) {
+ r = link_put_carrier(link, carrier, &link->bound_by_links);
+ if (r < 0)
+ return r;
+
+ list_updated = true;
+ }
+ }
+
+ if (list_updated)
+ link_dirty(link);
+
+ HASHMAP_FOREACH(carrier, link->bound_by_links) {
+ r = link_put_carrier(carrier, link, &carrier->bound_to_links);
+ if (r < 0)
+ return r;
+
+ link_dirty(carrier);
+ }
+
+ return 0;
+}
+
+static int link_new_bound_to_list(Link *link) {
+ Manager *m;
+ Link *carrier;
+ int r;
+ bool list_updated = false;
+
+ assert(link);
+ assert(link->manager);
+
+ if (!link->network)
+ return 0;
+
+ if (strv_isempty(link->network->bind_carrier))
+ return 0;
+
+ m = link->manager;
+
+ HASHMAP_FOREACH (carrier, m->links) {
+ if (strv_fnmatch(link->network->bind_carrier, carrier->ifname)) {
+ r = link_put_carrier(link, carrier, &link->bound_to_links);
+ if (r < 0)
+ return r;
+
+ list_updated = true;
+ }
+ }
+
+ if (list_updated)
+ link_dirty(link);
+
+ HASHMAP_FOREACH (carrier, link->bound_to_links) {
+ r = link_put_carrier(carrier, link, &carrier->bound_by_links);
+ if (r < 0)
+ return r;
+
+ link_dirty(carrier);
+ }
+
+ return 0;
+}
+
+static int link_new_carrier_maps(Link *link) {
+ int r;
+
+ r = link_new_bound_by_list(link);
+ if (r < 0)
+ return r;
+
+ r = link_handle_bound_by_list(link);
+ if (r < 0)
+ return r;
+
+ r = link_new_bound_to_list(link);
+ if (r < 0)
+ return r;
+
+ r = link_handle_bound_to_list(link);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static void link_free_bound_to_list(Link *link) {
+ Link *bound_to;
+
+ HASHMAP_FOREACH (bound_to, link->bound_to_links) {
+ hashmap_remove(link->bound_to_links, INT_TO_PTR(bound_to->ifindex));
+
+ if (hashmap_remove(bound_to->bound_by_links, INT_TO_PTR(link->ifindex)))
+ link_dirty(bound_to);
+ }
+
+ return;
+}
+
+static void link_free_bound_by_list(Link *link) {
+ Link *bound_by;
+
+ HASHMAP_FOREACH (bound_by, link->bound_by_links) {
+ hashmap_remove(link->bound_by_links, INT_TO_PTR(bound_by->ifindex));
+
+ if (hashmap_remove(bound_by->bound_to_links, INT_TO_PTR(link->ifindex))) {
+ link_dirty(bound_by);
+ link_handle_bound_to_list(bound_by);
+ }
+ }
+
+ return;
+}
+
+static void link_free_carrier_maps(Link *link) {
+ bool list_updated = false;
+
+ assert(link);
+
+ if (!hashmap_isempty(link->bound_to_links)) {
+ link_free_bound_to_list(link);
+ list_updated = true;
+ }
+
+ if (!hashmap_isempty(link->bound_by_links)) {
+ link_free_bound_by_list(link);
+ list_updated = true;
+ }
+
+ if (list_updated)
+ link_dirty(link);
+
+ return;
+}
+
+static int link_append_to_master(Link *link, NetDev *netdev) {
+ Link *master;
+ int r;
+
+ assert(link);
+ assert(netdev);
+
+ r = link_get(link->manager, netdev->ifindex, &master);
+ if (r < 0)
+ return r;
+
+ r = set_ensure_put(&master->slaves, NULL, link);
+ if (r <= 0)
+ return r;
+
+ link_ref(link);
+ return 0;
+}
+
+static void link_drop_from_master(Link *link, NetDev *netdev) {
+ Link *master;
+
+ assert(link);
+
+ if (!link->manager || !netdev)
+ return;
+
+ if (link_get(link->manager, netdev->ifindex, &master) < 0)
+ return;
+
+ link_unref(set_remove(master->slaves, link));
+}
+
+static void link_detach_from_manager(Link *link) {
+ if (!link || !link->manager)
+ return;
+
+ link_unref(set_remove(link->manager->links_requesting_uuid, link));
+ link_clean(link);
+
+ /* The following must be called at last. */
+ assert_se(hashmap_remove(link->manager->links, INT_TO_PTR(link->ifindex)) == link);
+ link_unref(link);
+}
+
+void link_drop(Link *link) {
+ if (!link || link->state == LINK_STATE_LINGER)
+ return;
+
+ link_set_state(link, LINK_STATE_LINGER);
+
+ link_free_carrier_maps(link);
+
+ if (link->network) {
+ link_drop_from_master(link, link->network->bridge);
+ link_drop_from_master(link, link->network->bond);
+ }
+
+ log_link_debug(link, "Link removed");
+
+ (void) unlink(link->state_file);
+ link_detach_from_manager(link);
+}
+
+static int link_joined(Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ if (!hashmap_isempty(link->bound_to_links)) {
+ r = link_handle_bound_to_list(link);
+ if (r < 0)
+ return r;
+ } else if (!(link->flags & IFF_UP)) {
+ r = link_up(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+ }
+
+ if (link->network->bridge) {
+ r = link_set_bridge(link);
+ if (r < 0)
+ log_link_error_errno(link, r, "Could not set bridge message: %m");
+
+ r = link_append_to_master(link, link->network->bridge);
+ if (r < 0)
+ log_link_error_errno(link, r, "Failed to add to bridge master's slave list: %m");
+ }
+
+ if (link->network->bond) {
+ r = link_set_bond(link);
+ if (r < 0)
+ log_link_error_errno(link, r, "Could not set bond message: %m");
+
+ r = link_append_to_master(link, link->network->bond);
+ if (r < 0)
+ log_link_error_errno(link, r, "Failed to add to bond master's slave list: %m");
+ }
+
+ r = link_set_bridge_vlan(link);
+ if (r < 0)
+ log_link_error_errno(link, r, "Could not set bridge vlan: %m");
+
+ /* Skip setting up addresses until it gets carrier,
+ or it would try to set addresses twice,
+ which is bad for non-idempotent steps. */
+ if (!link_has_carrier(link) && !link->network->configure_without_carrier)
+ return 0;
+
+ link_set_state(link, LINK_STATE_CONFIGURING);
+
+ r = link_acquire_conf(link);
+ if (r < 0)
+ return r;
+
+ return link_set_static_configs(link);
+}
+
+static int netdev_join_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->enslaving > 0);
+
+ link->enslaving--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_message_warning_errno(link, m, r, "Could not join netdev");
+ link_enter_failed(link);
+ return 1;
+ }
+
+ log_link_debug(link, "Joined netdev");
+
+ if (link->enslaving == 0) {
+ r = link_joined(link);
+ if (r < 0)
+ link_enter_failed(link);
+ }
+
+ return 1;
+}
+
+static int link_enter_join_netdev(Link *link) {
+ NetDev *netdev;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->state == LINK_STATE_INITIALIZED);
+
+ link_set_state(link, LINK_STATE_CONFIGURING);
+
+ link_dirty(link);
+ link->enslaving = 0;
+
+ if (link->network->bond) {
+ if (link->network->bond->state == NETDEV_STATE_READY &&
+ link->network->bond->ifindex == link->master_ifindex)
+ return link_joined(link);
+
+ log_struct(LOG_DEBUG,
+ LOG_LINK_INTERFACE(link),
+ LOG_NETDEV_INTERFACE(link->network->bond),
+ LOG_LINK_MESSAGE(link, "Enslaving by '%s'", link->network->bond->ifname));
+
+ link->enslaving++;
+
+ r = netdev_join(link->network->bond, link, netdev_join_handler);
+ if (r < 0) {
+ log_struct_errno(LOG_WARNING, r,
+ LOG_LINK_INTERFACE(link),
+ LOG_NETDEV_INTERFACE(link->network->bond),
+ LOG_LINK_MESSAGE(link, "Could not join netdev '%s': %m", link->network->bond->ifname));
+ link_enter_failed(link);
+ return r;
+ }
+ }
+
+ if (link->network->bridge) {
+ log_struct(LOG_DEBUG,
+ LOG_LINK_INTERFACE(link),
+ LOG_NETDEV_INTERFACE(link->network->bridge),
+ LOG_LINK_MESSAGE(link, "Enslaving by '%s'", link->network->bridge->ifname));
+
+ link->enslaving++;
+
+ r = netdev_join(link->network->bridge, link, netdev_join_handler);
+ if (r < 0) {
+ log_struct_errno(LOG_WARNING, r,
+ LOG_LINK_INTERFACE(link),
+ LOG_NETDEV_INTERFACE(link->network->bridge),
+ LOG_LINK_MESSAGE(link, "Could not join netdev '%s': %m", link->network->bridge->ifname));
+ link_enter_failed(link);
+ return r;
+ }
+ }
+
+ if (link->network->vrf) {
+ log_struct(LOG_DEBUG,
+ LOG_LINK_INTERFACE(link),
+ LOG_NETDEV_INTERFACE(link->network->vrf),
+ LOG_LINK_MESSAGE(link, "Enslaving by '%s'", link->network->vrf->ifname));
+
+ link->enslaving++;
+
+ r = netdev_join(link->network->vrf, link, netdev_join_handler);
+ if (r < 0) {
+ log_struct_errno(LOG_WARNING, r,
+ LOG_LINK_INTERFACE(link),
+ LOG_NETDEV_INTERFACE(link->network->vrf),
+ LOG_LINK_MESSAGE(link, "Could not join netdev '%s': %m", link->network->vrf->ifname));
+ link_enter_failed(link);
+ return r;
+ }
+ }
+
+ HASHMAP_FOREACH(netdev, link->network->stacked_netdevs) {
+
+ if (netdev->ifindex > 0)
+ /* Assume already enslaved. */
+ continue;
+
+ if (netdev_get_create_type(netdev) != NETDEV_CREATE_STACKED)
+ continue;
+
+ log_struct(LOG_DEBUG,
+ LOG_LINK_INTERFACE(link),
+ LOG_NETDEV_INTERFACE(netdev),
+ LOG_LINK_MESSAGE(link, "Enslaving by '%s'", netdev->ifname));
+
+ link->enslaving++;
+
+ r = netdev_join(netdev, link, netdev_join_handler);
+ if (r < 0) {
+ log_struct_errno(LOG_WARNING, r,
+ LOG_LINK_INTERFACE(link),
+ LOG_NETDEV_INTERFACE(netdev),
+ LOG_LINK_MESSAGE(link, "Could not join netdev '%s': %m", netdev->ifname));
+ link_enter_failed(link);
+ return r;
+ }
+ }
+
+ if (link->enslaving == 0)
+ return link_joined(link);
+
+ return 0;
+}
+
+static int link_drop_foreign_config(Link *link) {
+ int r;
+
+ r = link_drop_foreign_addresses(link);
+ if (r < 0)
+ return r;
+
+ r = link_drop_foreign_neighbors(link);
+ if (r < 0)
+ return r;
+
+ return link_drop_foreign_routes(link);
+}
+
+static int link_drop_config(Link *link) {
+ int r;
+
+ r = link_drop_addresses(link);
+ if (r < 0)
+ return r;
+
+ r = link_drop_neighbors(link);
+ if (r < 0)
+ return r;
+
+ r = link_drop_routes(link);
+ if (r < 0)
+ return r;
+
+ ndisc_flush(link);
+
+ return 0;
+}
+
+int link_configure(Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->state == LINK_STATE_INITIALIZED);
+
+ r = link_configure_traffic_control(link);
+ if (r < 0)
+ return r;
+
+ r = link_configure_sr_iov(link);
+ if (r < 0)
+ return r;
+
+ if (link->iftype == ARPHRD_CAN)
+ return link_configure_can(link);
+
+ r = link_set_sysctl(link);
+ if (r < 0)
+ return r;
+
+ r = link_set_ipv6_proxy_ndp_addresses(link);
+ if (r < 0)
+ return r;
+
+ r = link_set_mac(link);
+ if (r < 0)
+ return r;
+
+ r = link_set_nomaster(link);
+ if (r < 0)
+ return r;
+
+ r = link_set_flags(link);
+ if (r < 0)
+ return r;
+
+ r = link_set_group(link);
+ if (r < 0)
+ return r;
+
+ r = ipv4ll_configure(link);
+ if (r < 0)
+ return r;
+
+ r = dhcp4_configure(link);
+ if (r < 0)
+ return r;
+
+ r = dhcp6_configure(link);
+ if (r < 0)
+ return r;
+
+ r = ndisc_configure(link);
+ if (r < 0)
+ return r;
+
+ r = radv_configure(link);
+ if (r < 0)
+ return r;
+
+ r = link_lldp_rx_configure(link);
+ if (r < 0)
+ return r;
+
+ r = link_configure_mtu(link);
+ if (r < 0)
+ return r;
+
+ r = link_configure_addrgen_mode(link);
+ if (r < 0)
+ return r;
+
+ return link_configure_continue(link);
+}
+
+/* The configuration continues in this separate function, instead of
+ * including this in the above link_configure() function, for two
+ * reasons:
+ * 1) some devices reset the link when the mtu is set, which caused
+ * an infinite loop here in networkd; see:
+ * https://github.com/systemd/systemd/issues/6593
+ * https://github.com/systemd/systemd/issues/9831
+ * 2) if ipv6ll is disabled, then bringing the interface up must be
+ * delayed until after we get confirmation from the kernel that
+ * the addr_gen_mode parameter has been set (via netlink), see:
+ * https://github.com/systemd/systemd/issues/13882
+ */
+static int link_configure_continue(Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->state == LINK_STATE_INITIALIZED);
+
+ if (link->setting_mtu || link->setting_genmode)
+ return 0;
+
+ /* Drop foreign config, but ignore loopback or critical devices.
+ * We do not want to remove loopback address or addresses used for root NFS. */
+ if (!(link->flags & IFF_LOOPBACK) &&
+ link->network->keep_configuration != KEEP_CONFIGURATION_YES) {
+ r = link_drop_foreign_config(link);
+ if (r < 0)
+ return r;
+ }
+
+ /* The kernel resets ipv6 mtu after changing device mtu;
+ * we must set this here, after we've set device mtu */
+ r = link_set_ipv6_mtu(link);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot set IPv6 MTU for interface, ignoring: %m");
+
+ return link_enter_join_netdev(link);
+}
+
+static int link_reconfigure_internal(Link *link, sd_netlink_message *m, bool force) {
+ _cleanup_strv_free_ char **s = NULL;
+ Network *network;
+ int r;
+
+ assert(m);
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_read_strv(m, IFLA_PROP_LIST, IFLA_ALT_IFNAME, &s);
+ if (r < 0 && r != -ENODATA)
+ return r;
+
+ strv_free_and_replace(link->alternative_names, s);
+
+ r = network_get(link->manager, link->iftype, link->sd_device,
+ link->ifname, link->alternative_names, link->driver,
+ &link->hw_addr.addr.ether, &link->permanent_mac,
+ link->wlan_iftype, link->ssid, &link->bssid, &network);
+ if (r == -ENOENT) {
+ link_enter_unmanaged(link);
+ return 0;
+ } else if (r == 0 && network->unmanaged) {
+ link_enter_unmanaged(link);
+ return 0;
+ } else if (r < 0)
+ return r;
+
+ if (link->network == network && !force)
+ return 0;
+
+ log_link_info(link, "Re-configuring with %s", network->filename);
+
+ /* Dropping old .network file */
+ r = link_stop_engines(link, false);
+ if (r < 0)
+ return r;
+
+ r = link_drop_config(link);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(link->state, LINK_STATE_UNMANAGED, LINK_STATE_PENDING, LINK_STATE_INITIALIZED)) {
+ log_link_debug(link, "State is %s, dropping config", link_state_to_string(link->state));
+ r = link_drop_foreign_config(link);
+ if (r < 0)
+ return r;
+ }
+
+ link_free_carrier_maps(link);
+ link_free_engines(link);
+ link->network = network_unref(link->network);
+
+ /* Then, apply new .network file */
+ r = network_apply(network, link);
+ if (r < 0)
+ return r;
+
+ r = link_new_carrier_maps(link);
+ if (r < 0)
+ return r;
+
+ link_set_state(link, LINK_STATE_INITIALIZED);
+ link_dirty(link);
+
+ /* link_configure_duid() returns 0 if it requests product UUID. In that case,
+ * link_configure() is called later asynchronously. */
+ r = link_configure_duid(link);
+ if (r <= 0)
+ return r;
+
+ r = link_configure(link);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int link_reconfigure_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ r = link_reconfigure_internal(link, m, false);
+ if (r < 0)
+ link_enter_failed(link);
+
+ return 1;
+}
+
+static int link_force_reconfigure_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ r = link_reconfigure_internal(link, m, true);
+ if (r < 0)
+ link_enter_failed(link);
+
+ return 1;
+}
+
+int link_reconfigure(Link *link, bool force) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ /* When link in pending or initialized state, then link_configure() will be called. To prevent
+ * the function be called multiple times simultaneously, refuse to reconfigure the interface in
+ * these case. */
+ if (IN_SET(link->state, LINK_STATE_PENDING, LINK_STATE_INITIALIZED, LINK_STATE_LINGER))
+ return 0; /* o means no-op. */
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_GETLINK,
+ link->ifindex);
+ if (r < 0)
+ return r;
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req,
+ force ? link_force_reconfigure_handler : link_reconfigure_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return r;
+
+ link_ref(link);
+
+ return 1; /* 1 means the interface will be reconfigured. */
+}
+
+static int link_initialized_and_synced(Link *link) {
+ Network *network;
+ int r;
+
+ assert(link);
+ assert(link->ifname);
+ assert(link->manager);
+
+ /* We may get called either from the asynchronous netlink callback,
+ * or directly for link_add() if running in a container. See link_add(). */
+ if (!IN_SET(link->state, LINK_STATE_PENDING, LINK_STATE_INITIALIZED))
+ return 0;
+
+ log_link_debug(link, "Link state is up-to-date");
+ link_set_state(link, LINK_STATE_INITIALIZED);
+
+ r = link_new_bound_by_list(link);
+ if (r < 0)
+ return r;
+
+ r = link_handle_bound_by_list(link);
+ if (r < 0)
+ return r;
+
+ if (!link->network) {
+ r = wifi_get_info(link);
+ if (r < 0)
+ return r;
+
+ r = network_get(link->manager, link->iftype, link->sd_device,
+ link->ifname, link->alternative_names, link->driver,
+ &link->hw_addr.addr.ether, &link->permanent_mac,
+ link->wlan_iftype, link->ssid, &link->bssid, &network);
+ if (r == -ENOENT) {
+ link_enter_unmanaged(link);
+ return 0;
+ } else if (r == 0 && network->unmanaged) {
+ link_enter_unmanaged(link);
+ return 0;
+ } else if (r < 0)
+ return r;
+
+ if (link->flags & IFF_LOOPBACK) {
+ if (network->link_local != ADDRESS_FAMILY_NO)
+ log_link_debug(link, "Ignoring link-local autoconfiguration for loopback link");
+
+ if (network->dhcp != ADDRESS_FAMILY_NO)
+ log_link_debug(link, "Ignoring DHCP clients for loopback link");
+
+ if (network->dhcp_server)
+ log_link_debug(link, "Ignoring DHCP server for loopback link");
+ }
+
+ r = network_apply(network, link);
+ if (r < 0)
+ return r;
+ }
+
+ r = link_new_bound_to_list(link);
+ if (r < 0)
+ return r;
+
+ /* link_configure_duid() returns 0 if it requests product UUID. In that case,
+ * link_configure() is called later asynchronously. */
+ r = link_configure_duid(link);
+ if (r <= 0)
+ return r;
+
+ r = link_configure(link);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int link_initialized_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ _cleanup_strv_free_ char **s = NULL;
+ int r;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Failed to wait for the interface to be initialized: %m");
+ link_enter_failed(link);
+ return 0;
+ }
+
+ r = sd_netlink_message_read_strv(m, IFLA_PROP_LIST, IFLA_ALT_IFNAME, &s);
+ if (r < 0 && r != -ENODATA) {
+ link_enter_failed(link);
+ return 0;
+ }
+
+ strv_free_and_replace(link->alternative_names, s);
+
+ r = link_initialized_and_synced(link);
+ if (r < 0)
+ link_enter_failed(link);
+ return 1;
+}
+
+int link_initialized(Link *link, sd_device *device) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+ assert(device);
+
+ if (link->state != LINK_STATE_PENDING)
+ return 0;
+
+ if (link->sd_device)
+ return 0;
+
+ log_link_debug(link, "udev initialized link");
+ link_set_state(link, LINK_STATE_INITIALIZED);
+
+ link->sd_device = sd_device_ref(device);
+
+ /* udev has initialized the link, but we don't know if we have yet
+ * processed the NEWLINK messages with the latest state. Do a GETLINK,
+ * when it returns we know that the pending NEWLINKs have already been
+ * processed and that we are up-to-date */
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_GETLINK,
+ link->ifindex);
+ if (r < 0)
+ return r;
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, link_initialized_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return r;
+
+ link_ref(link);
+
+ return 0;
+}
+
+static int link_load(Link *link) {
+ _cleanup_free_ char *network_file = NULL,
+ *addresses = NULL,
+ *routes = NULL,
+ *dhcp4_address = NULL,
+ *ipv4ll_address = NULL;
+ int r;
+
+ assert(link);
+
+ r = parse_env_file(NULL, link->state_file,
+ "NETWORK_FILE", &network_file,
+ "ADDRESSES", &addresses,
+ "ROUTES", &routes,
+ "DHCP4_ADDRESS", &dhcp4_address,
+ "IPV4LL_ADDRESS", &ipv4ll_address);
+ if (r < 0 && r != -ENOENT)
+ return log_link_error_errno(link, r, "Failed to read %s: %m", link->state_file);
+
+ if (network_file) {
+ Network *network;
+ char *suffix;
+
+ /* drop suffix */
+ suffix = strrchr(network_file, '.');
+ if (!suffix) {
+ log_link_debug(link, "Failed to get network name from %s", network_file);
+ goto network_file_fail;
+ }
+ *suffix = '\0';
+
+ r = network_get_by_name(link->manager, basename(network_file), &network);
+ if (r < 0) {
+ log_link_debug_errno(link, r, "Failed to get network %s: %m", basename(network_file));
+ goto network_file_fail;
+ }
+
+ r = network_apply(network, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to apply network %s: %m", basename(network_file));
+ }
+
+network_file_fail:
+
+ r = link_deserialize_addresses(link, addresses);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Failed to load addresses from %s, ignoring: %m", link->state_file);
+
+ r = link_deserialize_routes(link, routes);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Failed to load routes from %s, ignoring: %m", link->state_file);
+
+ r = link_deserialize_dhcp4(link, dhcp4_address);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Failed to load DHCPv4 address from %s, ignoring: %m", link->state_file);
+
+ r = link_deserialize_ipv4ll(link, ipv4ll_address);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Failed to load IPv4LL address from %s, ignoring: %m", link->state_file);
+
+ return 0;
+}
+
+int link_add(Manager *m, sd_netlink_message *message, Link **ret) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ char ifindex_str[2 + DECIMAL_STR_MAX(int)];
+ Link *link;
+ int r;
+
+ assert(m);
+ assert(m->rtnl);
+ assert(message);
+ assert(ret);
+
+ r = link_new(m, message, ret);
+ if (r < 0)
+ return r;
+
+ link = *ret;
+
+ log_link_debug(link, "Link %d added", link->ifindex);
+
+ r = link_load(link);
+ if (r < 0)
+ return r;
+
+ if (path_is_read_only_fs("/sys") <= 0) {
+ /* udev should be around */
+ sprintf(ifindex_str, "n%d", link->ifindex);
+ r = sd_device_new_from_device_id(&device, ifindex_str);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not find device, waiting for device initialization: %m");
+ return 0;
+ }
+
+ r = sd_device_get_is_initialized(device);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not determine whether the device is initialized: %m");
+ goto failed;
+ }
+ if (r == 0) {
+ /* not yet ready */
+ log_link_debug(link, "link pending udev initialization...");
+ return 0;
+ }
+
+ r = device_is_renaming(device);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Failed to determine the device is being renamed: %m");
+ goto failed;
+ }
+ if (r > 0) {
+ log_link_debug(link, "Interface is being renamed, pending initialization.");
+ return 0;
+ }
+
+ r = link_initialized(link, device);
+ if (r < 0)
+ goto failed;
+ } else {
+ r = link_initialized_and_synced(link);
+ if (r < 0)
+ goto failed;
+ }
+
+ return 0;
+failed:
+ link_enter_failed(link);
+ return r;
+}
+
+int link_ipv6ll_gained(Link *link, const struct in6_addr *address) {
+ int r;
+
+ assert(link);
+
+ log_link_info(link, "Gained IPv6LL");
+
+ link->ipv6ll_address = *address;
+ link_check_ready(link);
+
+ if (IN_SET(link->state, LINK_STATE_CONFIGURING, LINK_STATE_CONFIGURED)) {
+ r = link_acquire_ipv6_conf(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int link_carrier_gained(Link *link) {
+ int r;
+
+ assert(link);
+
+ r = wifi_get_info(link);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ r = link_reconfigure(link, false);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+ }
+
+ if (IN_SET(link->state, LINK_STATE_CONFIGURING, LINK_STATE_CONFIGURED)) {
+ r = link_acquire_conf(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+
+ link_set_state(link, LINK_STATE_CONFIGURING);
+ r = link_set_static_configs(link);
+ if (r < 0)
+ return r;
+ }
+
+ r = link_handle_bound_by_list(link);
+ if (r < 0)
+ return r;
+
+ if (!link->bridge_mdb_configured) {
+ r = link_set_bridge_mdb(link);
+ if (r < 0)
+ return r;
+ }
+
+ if (streq_ptr(link->kind, "bridge")) {
+ Link *slave;
+
+ SET_FOREACH(slave, link->slaves) {
+ if (slave->bridge_mdb_configured)
+ continue;
+
+ r = link_set_bridge_mdb(slave);
+ if (r < 0)
+ link_enter_failed(slave);
+ }
+ }
+
+ return 0;
+}
+
+static int link_carrier_lost(Link *link) {
+ int r;
+
+ assert(link);
+
+ if (link->network && link->network->ignore_carrier_loss)
+ return 0;
+
+ /* Some devices reset itself while setting the MTU. This causes the DHCP client fall into a loop.
+ * setting_mtu keep track whether the device got reset because of setting MTU and does not drop the
+ * configuration and stop the clients as well. */
+ if (link->setting_mtu)
+ return 0;
+
+ r = link_stop_engines(link, false);
+ if (r < 0) {
+ link_enter_failed(link);
+ return r;
+ }
+
+ r = link_drop_config(link);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(link->state, LINK_STATE_UNMANAGED, LINK_STATE_PENDING, LINK_STATE_INITIALIZED)) {
+ log_link_debug(link, "State is %s, dropping config", link_state_to_string(link->state));
+ r = link_drop_foreign_config(link);
+ if (r < 0)
+ return r;
+ }
+
+ r = link_handle_bound_by_list(link);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int link_carrier_reset(Link *link) {
+ int r;
+
+ assert(link);
+
+ if (link_has_carrier(link)) {
+ r = link_carrier_lost(link);
+ if (r < 0)
+ return r;
+
+ r = link_carrier_gained(link);
+ if (r < 0)
+ return r;
+
+ log_link_info(link, "Reset carrier");
+ }
+
+ return 0;
+}
+
+/* This is called every time an interface admin state changes to up;
+ * specifically, when IFF_UP flag changes from unset to set */
+static int link_admin_state_up(Link *link) {
+ int r;
+
+ /* We set the ipv6 mtu after the device mtu, but the kernel resets
+ * ipv6 mtu on NETDEV_UP, so we need to reset it. The check for
+ * ipv6_mtu_set prevents this from trying to set it too early before
+ * the link->network has been setup; we only need to reset it
+ * here if we've already set it during normal initialization. */
+ if (link->ipv6_mtu_set) {
+ r = link_set_ipv6_mtu(link);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int link_update(Link *link, sd_netlink_message *m) {
+ _cleanup_strv_free_ char **s = NULL;
+ hw_addr_data hw_addr;
+ const char *ifname;
+ uint32_t mtu;
+ bool had_carrier, carrier_gained, carrier_lost, link_was_admin_up;
+ int old_master, r;
+
+ assert(link);
+ assert(link->ifname);
+ assert(m);
+
+ if (link->state == LINK_STATE_LINGER) {
+ log_link_info(link, "Link re-added");
+ link_set_state(link, LINK_STATE_CONFIGURING);
+
+ r = link_new_carrier_maps(link);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_netlink_message_read_string(m, IFLA_IFNAME, &ifname);
+ if (r >= 0 && !streq(ifname, link->ifname)) {
+ Manager *manager = link->manager;
+
+ log_link_info(link, "Interface name change detected, %s has been renamed to %s.", link->ifname, ifname);
+
+ link_drop(link);
+ r = link_add(manager, m, &link);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_netlink_message_read_strv(m, IFLA_PROP_LIST, IFLA_ALT_IFNAME, &s);
+ if (r >= 0)
+ strv_free_and_replace(link->alternative_names, s);
+
+ r = sd_netlink_message_read_u32(m, IFLA_MTU, &mtu);
+ if (r >= 0 && mtu > 0) {
+ link->mtu = mtu;
+ if (link->original_mtu == 0) {
+ link->original_mtu = mtu;
+ log_link_debug(link, "Saved original MTU: %" PRIu32, link->original_mtu);
+ }
+
+ if (link->dhcp_client) {
+ r = sd_dhcp_client_set_mtu(link->dhcp_client,
+ link->mtu);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not update MTU in DHCP client: %m");
+ }
+
+ if (link->radv) {
+ r = sd_radv_set_mtu(link->radv, link->mtu);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not set MTU for Router Advertisement: %m");
+ }
+ }
+
+ /* The kernel may broadcast NEWLINK messages without the MAC address
+ set, simply ignore them. */
+ r = netlink_message_read_hw_addr(m, IFLA_ADDRESS, &hw_addr);
+ if (r >= 0 && (link->hw_addr.length != hw_addr.length ||
+ memcmp(link->hw_addr.addr.bytes, hw_addr.addr.bytes, hw_addr.length) != 0)) {
+
+ memcpy(link->hw_addr.addr.bytes, hw_addr.addr.bytes, hw_addr.length);
+
+ log_link_debug(link, "Gained new hardware address: %s", HW_ADDR_TO_STR(&hw_addr));
+
+ r = ipv4ll_update_mac(link);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not update MAC address in IPv4LL client: %m");
+
+ r = dhcp4_update_mac(link);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not update MAC address in DHCP client: %m");
+
+ r = dhcp6_update_mac(link);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not update MAC address in DHCPv6 client: %m");
+
+ r = radv_update_mac(link);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not update MAC address for Router Advertisement: %m");
+
+ if (link->ndisc) {
+ r = sd_ndisc_set_mac(link->ndisc, &link->hw_addr.addr.ether);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not update MAC for NDisc: %m");
+ }
+
+ r = ipv4_dad_update_mac(link);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not update MAC address in IPv4 ACD client: %m");
+ }
+
+ old_master = link->master_ifindex;
+ (void) sd_netlink_message_read_u32(m, IFLA_MASTER, (uint32_t *) &link->master_ifindex);
+
+ link_was_admin_up = link->flags & IFF_UP;
+ had_carrier = link_has_carrier(link);
+
+ r = link_update_flags(link, m, old_master != link->master_ifindex);
+ if (r < 0)
+ return r;
+
+ if (!link_was_admin_up && (link->flags & IFF_UP)) {
+ log_link_info(link, "Link UP");
+
+ r = link_admin_state_up(link);
+ if (r < 0)
+ return r;
+ } else if (link_was_admin_up && !(link->flags & IFF_UP))
+ log_link_info(link, "Link DOWN");
+
+ r = link_update_lldp(link);
+ if (r < 0)
+ return r;
+
+ carrier_gained = !had_carrier && link_has_carrier(link);
+ carrier_lost = had_carrier && !link_has_carrier(link);
+
+ if (carrier_gained) {
+ log_link_info(link, "Gained carrier");
+
+ r = link_carrier_gained(link);
+ if (r < 0)
+ return r;
+ } else if (carrier_lost) {
+ log_link_info(link, "Lost carrier");
+
+ r = link_carrier_lost(link);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static void print_link_hashmap(FILE *f, const char *prefix, Hashmap* h) {
+ bool space = false;
+ Link *link;
+
+ assert(f);
+ assert(prefix);
+
+ if (hashmap_isempty(h))
+ return;
+
+ fputs(prefix, f);
+ HASHMAP_FOREACH(link, h) {
+ if (space)
+ fputc(' ', f);
+
+ fprintf(f, "%i", link->ifindex);
+ space = true;
+ }
+
+ fputc('\n', f);
+}
+
+static void link_save_dns(Link *link, FILE *f, struct in_addr_full **dns, unsigned n_dns, bool *space) {
+ for (unsigned j = 0; j < n_dns; j++) {
+ const char *str;
+
+ if (dns[j]->ifindex != 0 && dns[j]->ifindex != link->ifindex)
+ continue;
+
+ str = in_addr_full_to_string(dns[j]);
+ if (!str)
+ continue;
+
+ if (*space)
+ fputc(' ', f);
+ fputs(str, f);
+ *space = true;
+ }
+}
+
+static void serialize_addresses(
+ FILE *f,
+ const char *lvalue,
+ bool *space,
+ char **addresses,
+ sd_dhcp_lease *lease,
+ bool conditional,
+ sd_dhcp_lease_server_type what,
+ sd_dhcp6_lease *lease6,
+ bool conditional6,
+ int (*lease6_get_addr)(sd_dhcp6_lease*, const struct in6_addr**),
+ int (*lease6_get_fqdn)(sd_dhcp6_lease*, char ***)) {
+ int r;
+
+ bool _space = false;
+ if (!space)
+ space = &_space;
+
+ if (lvalue)
+ fprintf(f, "%s=", lvalue);
+ fputstrv(f, addresses, NULL, space);
+
+ if (lease && conditional) {
+ const struct in_addr *lease_addresses;
+
+ r = sd_dhcp_lease_get_servers(lease, what, &lease_addresses);
+ if (r > 0)
+ serialize_in_addrs(f, lease_addresses, r, space, in4_addr_is_non_local);
+ }
+
+ if (lease6 && conditional6 && lease6_get_addr) {
+ const struct in6_addr *in6_addrs;
+
+ r = lease6_get_addr(lease6, &in6_addrs);
+ if (r > 0)
+ serialize_in6_addrs(f, in6_addrs, r, space);
+ }
+
+ if (lease6 && conditional6 && lease6_get_fqdn) {
+ char **in6_hosts;
+
+ r = lease6_get_fqdn(lease6, &in6_hosts);
+ if (r > 0)
+ fputstrv(f, in6_hosts, NULL, space);
+ }
+
+ if (lvalue)
+ fputc('\n', f);
+}
+
+int link_save(Link *link) {
+ const char *admin_state, *oper_state, *carrier_state, *address_state;
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(link);
+ assert(link->state_file);
+ assert(link->lease_file);
+ assert(link->manager);
+
+ if (link->state == LINK_STATE_LINGER) {
+ (void) unlink(link->state_file);
+ return 0;
+ }
+
+ link_lldp_save(link);
+
+ admin_state = link_state_to_string(link->state);
+ assert(admin_state);
+
+ oper_state = link_operstate_to_string(link->operstate);
+ assert(oper_state);
+
+ carrier_state = link_carrier_state_to_string(link->carrier_state);
+ assert(carrier_state);
+
+ address_state = link_address_state_to_string(link->address_state);
+ assert(address_state);
+
+ r = fopen_temporary(link->state_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ (void) fchmod(fileno(f), 0644);
+
+ fprintf(f,
+ "# This is private data. Do not parse.\n"
+ "ADMIN_STATE=%s\n"
+ "OPER_STATE=%s\n"
+ "CARRIER_STATE=%s\n"
+ "ADDRESS_STATE=%s\n",
+ admin_state, oper_state, carrier_state, address_state);
+
+ if (link->network) {
+ char **dhcp6_domains = NULL, **dhcp_domains = NULL;
+ const char *dhcp_domainname = NULL, *p;
+ bool space;
+
+ fprintf(f, "REQUIRED_FOR_ONLINE=%s\n",
+ yes_no(link->network->required_for_online));
+
+ LinkOperationalStateRange st = link->network->required_operstate_for_online;
+ fprintf(f, "REQUIRED_OPER_STATE_FOR_ONLINE=%s%s%s\n",
+ strempty(link_operstate_to_string(st.min)),
+ st.max != LINK_OPERSTATE_RANGE_DEFAULT.max ? ":" : "",
+ st.max != LINK_OPERSTATE_RANGE_DEFAULT.max ? strempty(link_operstate_to_string(st.max)) : "");
+
+ fprintf(f, "NETWORK_FILE=%s\n", link->network->filename);
+
+ /************************************************************/
+
+ fputs("DNS=", f);
+ space = false;
+ if (link->n_dns != (unsigned) -1)
+ link_save_dns(link, f, link->dns, link->n_dns, &space);
+ else
+ link_save_dns(link, f, link->network->dns, link->network->n_dns, &space);
+
+ serialize_addresses(f, NULL, &space,
+ NULL,
+ link->dhcp_lease,
+ link->network->dhcp_use_dns,
+ SD_DHCP_LEASE_DNS,
+ link->dhcp6_lease,
+ link->network->dhcp6_use_dns,
+ sd_dhcp6_lease_get_dns,
+ NULL);
+
+ /* Make sure to flush out old entries before we use the NDisc data */
+ ndisc_vacuum(link);
+
+ if (link->network->ipv6_accept_ra_use_dns && link->ndisc_rdnss) {
+ NDiscRDNSS *dd;
+
+ SET_FOREACH(dd, link->ndisc_rdnss)
+ serialize_in6_addrs(f, &dd->address, 1, &space);
+ }
+
+ fputc('\n', f);
+
+ /************************************************************/
+
+ serialize_addresses(f, "NTP", NULL,
+ link->ntp ?: link->network->ntp,
+ link->dhcp_lease,
+ link->network->dhcp_use_ntp,
+ SD_DHCP_LEASE_NTP,
+ link->dhcp6_lease,
+ link->network->dhcp6_use_ntp,
+ sd_dhcp6_lease_get_ntp_addrs,
+ sd_dhcp6_lease_get_ntp_fqdn);
+
+ serialize_addresses(f, "SIP", NULL,
+ NULL,
+ link->dhcp_lease,
+ link->network->dhcp_use_sip,
+ SD_DHCP_LEASE_SIP,
+ NULL, false, NULL, NULL);
+
+ /************************************************************/
+
+ if (link->network->dhcp_use_domains != DHCP_USE_DOMAINS_NO) {
+ if (link->dhcp_lease) {
+ (void) sd_dhcp_lease_get_domainname(link->dhcp_lease, &dhcp_domainname);
+ (void) sd_dhcp_lease_get_search_domains(link->dhcp_lease, &dhcp_domains);
+ }
+ if (link->dhcp6_lease)
+ (void) sd_dhcp6_lease_get_domains(link->dhcp6_lease, &dhcp6_domains);
+ }
+
+ fputs("DOMAINS=", f);
+ space = false;
+ ORDERED_SET_FOREACH(p, link->search_domains ?: link->network->search_domains)
+ fputs_with_space(f, p, NULL, &space);
+
+ if (link->network->dhcp_use_domains == DHCP_USE_DOMAINS_YES) {
+ if (dhcp_domainname)
+ fputs_with_space(f, dhcp_domainname, NULL, &space);
+ if (dhcp_domains)
+ fputstrv(f, dhcp_domains, NULL, &space);
+ if (dhcp6_domains)
+ fputstrv(f, dhcp6_domains, NULL, &space);
+ }
+
+ if (link->network->ipv6_accept_ra_use_domains == DHCP_USE_DOMAINS_YES) {
+ NDiscDNSSL *dd;
+
+ SET_FOREACH(dd, link->ndisc_dnssl)
+ fputs_with_space(f, NDISC_DNSSL_DOMAIN(dd), NULL, &space);
+ }
+
+ fputc('\n', f);
+
+ /************************************************************/
+
+ fputs("ROUTE_DOMAINS=", f);
+ space = false;
+ ORDERED_SET_FOREACH(p, link->route_domains ?: link->network->route_domains)
+ fputs_with_space(f, p, NULL, &space);
+
+ if (link->network->dhcp_use_domains == DHCP_USE_DOMAINS_ROUTE) {
+ if (dhcp_domainname)
+ fputs_with_space(f, dhcp_domainname, NULL, &space);
+ if (dhcp_domains)
+ fputstrv(f, dhcp_domains, NULL, &space);
+ if (dhcp6_domains)
+ fputstrv(f, dhcp6_domains, NULL, &space);
+ }
+
+ if (link->network->ipv6_accept_ra_use_domains == DHCP_USE_DOMAINS_ROUTE) {
+ NDiscDNSSL *dd;
+
+ SET_FOREACH(dd, link->ndisc_dnssl)
+ fputs_with_space(f, NDISC_DNSSL_DOMAIN(dd), NULL, &space);
+ }
+
+ fputc('\n', f);
+
+ /************************************************************/
+
+ fprintf(f, "LLMNR=%s\n",
+ resolve_support_to_string(link->llmnr >= 0 ? link->llmnr : link->network->llmnr));
+
+ /************************************************************/
+
+ fprintf(f, "MDNS=%s\n",
+ resolve_support_to_string(link->mdns >= 0 ? link->mdns : link->network->mdns));
+
+ /************************************************************/
+
+ int dns_default_route =
+ link->dns_default_route >= 0 ? link->dns_default_route :
+ link->network->dns_default_route;
+ if (dns_default_route >= 0)
+ fprintf(f, "DNS_DEFAULT_ROUTE=%s\n", yes_no(dns_default_route));
+
+ /************************************************************/
+
+ DnsOverTlsMode dns_over_tls_mode =
+ link->dns_over_tls_mode != _DNS_OVER_TLS_MODE_INVALID ? link->dns_over_tls_mode :
+ link->network->dns_over_tls_mode;
+ if (dns_over_tls_mode != _DNS_OVER_TLS_MODE_INVALID)
+ fprintf(f, "DNS_OVER_TLS=%s\n", dns_over_tls_mode_to_string(dns_over_tls_mode));
+
+ /************************************************************/
+
+ DnssecMode dnssec_mode =
+ link->dnssec_mode != _DNSSEC_MODE_INVALID ? link->dnssec_mode :
+ link->network->dnssec_mode;
+ if (dnssec_mode != _DNSSEC_MODE_INVALID)
+ fprintf(f, "DNSSEC=%s\n", dnssec_mode_to_string(dnssec_mode));
+
+ /************************************************************/
+
+ Set *nta_anchors = link->dnssec_negative_trust_anchors;
+ if (set_isempty(nta_anchors))
+ nta_anchors = link->network->dnssec_negative_trust_anchors;
+
+ if (!set_isempty(nta_anchors)) {
+ const char *n;
+
+ fputs("DNSSEC_NTA=", f);
+ space = false;
+ SET_FOREACH(n, nta_anchors)
+ fputs_with_space(f, n, NULL, &space);
+ fputc('\n', f);
+ }
+
+ /************************************************************/
+
+ r = link_serialize_addresses(link, f);
+ if (r < 0)
+ goto fail;
+
+ /************************************************************/
+
+ r = link_serialize_routes(link, f);
+ if (r < 0)
+ goto fail;
+ }
+
+ print_link_hashmap(f, "CARRIER_BOUND_TO=", link->bound_to_links);
+ print_link_hashmap(f, "CARRIER_BOUND_BY=", link->bound_by_links);
+
+ if (link->dhcp_lease) {
+ r = dhcp_lease_save(link->dhcp_lease, link->lease_file);
+ if (r < 0)
+ goto fail;
+
+ fprintf(f,
+ "DHCP_LEASE=%s\n",
+ link->lease_file);
+ } else
+ (void) unlink(link->lease_file);
+
+ r = link_serialize_ipv4ll(link, f);
+ if (r < 0)
+ goto fail;
+
+ r = link_serialize_dhcp6_client(link, f);
+ if (r < 0)
+ goto fail;
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, link->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(link->state_file);
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return log_link_error_errno(link, r, "Failed to save link data to %s: %m", link->state_file);
+}
+
+/* The serialized state in /run is no longer up-to-date. */
+void link_dirty(Link *link) {
+ int r;
+
+ assert(link);
+
+ /* mark manager dirty as link is dirty */
+ manager_dirty(link->manager);
+
+ r = set_ensure_put(&link->manager->dirty_links, NULL, link);
+ if (r <= 0)
+ /* Ignore allocation errors and don't take another ref if the link was already dirty */
+ return;
+ link_ref(link);
+}
+
+/* The serialized state in /run is up-to-date */
+void link_clean(Link *link) {
+ assert(link);
+ assert(link->manager);
+
+ link_unref(set_remove(link->manager->dirty_links, link));
+}
+
+int link_save_and_clean(Link *link) {
+ int r;
+
+ r = link_save(link);
+ if (r < 0)
+ return r;
+
+ link_clean(link);
+ return 0;
+}
+
+static const char* const link_state_table[_LINK_STATE_MAX] = {
+ [LINK_STATE_PENDING] = "pending",
+ [LINK_STATE_INITIALIZED] = "initialized",
+ [LINK_STATE_CONFIGURING] = "configuring",
+ [LINK_STATE_CONFIGURED] = "configured",
+ [LINK_STATE_UNMANAGED] = "unmanaged",
+ [LINK_STATE_FAILED] = "failed",
+ [LINK_STATE_LINGER] = "linger",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(link_state, LinkState);
+
+int log_link_message_full_errno(Link *link, sd_netlink_message *m, int level, int err, const char *msg) {
+ const char *err_msg = NULL;
+
+ (void) sd_netlink_message_read_string(m, NLMSGERR_ATTR_MSG, &err_msg);
+ return log_link_full_errno(link, level, err,
+ "%s: %s%s%s%m",
+ msg,
+ strempty(err_msg),
+ err_msg && !endswith(err_msg, ".") ? "." : "",
+ err_msg ? " " : "");
+}
diff --git a/src/network/networkd-link.h b/src/network/networkd-link.h
new file mode 100644
index 0000000..cd54192
--- /dev/null
+++ b/src/network/networkd-link.h
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <endian.h>
+#include <linux/nl80211.h>
+
+#include "sd-bus.h"
+#include "sd-device.h"
+#include "sd-dhcp-client.h"
+#include "sd-dhcp-server.h"
+#include "sd-dhcp6-client.h"
+#include "sd-ipv4acd.h"
+#include "sd-ipv4ll.h"
+#include "sd-lldp.h"
+#include "sd-ndisc.h"
+#include "sd-radv.h"
+#include "sd-netlink.h"
+
+#include "ether-addr-util.h"
+#include "log-link.h"
+#include "network-util.h"
+#include "networkd-util.h"
+#include "ordered-set.h"
+#include "resolve-util.h"
+#include "set.h"
+
+typedef enum LinkState {
+ LINK_STATE_PENDING, /* udev has not initialized the link */
+ LINK_STATE_INITIALIZED, /* udev has initialized the link */
+ LINK_STATE_CONFIGURING, /* configuring addresses, routes, etc. */
+ LINK_STATE_CONFIGURED, /* everything is configured */
+ LINK_STATE_UNMANAGED, /* Unmanaged=yes is set */
+ LINK_STATE_FAILED, /* at least one configuration process failed */
+ LINK_STATE_LINGER, /* RTM_DELLINK for the link has been received */
+ _LINK_STATE_MAX,
+ _LINK_STATE_INVALID = -1
+} LinkState;
+
+typedef struct Manager Manager;
+typedef struct Network Network;
+typedef struct Address Address;
+typedef struct DUID DUID;
+
+typedef struct Link {
+ Manager *manager;
+
+ unsigned n_ref;
+
+ int ifindex;
+ int master_ifindex;
+ char *ifname;
+ char **alternative_names;
+ char *kind;
+ unsigned short iftype;
+ char *state_file;
+ hw_addr_data hw_addr;
+ hw_addr_data bcast_addr;
+ struct ether_addr permanent_mac;
+ struct in6_addr ipv6ll_address;
+ uint32_t mtu;
+ sd_device *sd_device;
+ char *driver;
+
+ /* wlan */
+ enum nl80211_iftype wlan_iftype;
+ char *ssid;
+ struct ether_addr bssid;
+
+ unsigned flags;
+ uint8_t kernel_operstate;
+
+ Network *network;
+
+ LinkState state;
+ LinkOperationalState operstate;
+ LinkCarrierState carrier_state;
+ LinkAddressState address_state;
+
+ unsigned address_messages;
+ unsigned address_remove_messages;
+ unsigned address_label_messages;
+ unsigned neighbor_messages;
+ unsigned route_messages;
+ unsigned nexthop_messages;
+ unsigned routing_policy_rule_messages;
+ unsigned routing_policy_rule_remove_messages;
+ unsigned tc_messages;
+ unsigned sr_iov_messages;
+ unsigned enslaving;
+ unsigned bridge_mdb_messages;
+
+ Set *addresses;
+ Set *addresses_foreign;
+ Set *pool_addresses;
+ Set *static_addresses;
+ Set *neighbors;
+ Set *neighbors_foreign;
+ Set *routes;
+ Set *routes_foreign;
+ Set *nexthops;
+ Set *nexthops_foreign;
+
+ sd_dhcp_client *dhcp_client;
+ sd_dhcp_lease *dhcp_lease;
+ Address *dhcp_address, *dhcp_address_old;
+ Set *dhcp_routes, *dhcp_routes_old;
+ char *lease_file;
+ uint32_t original_mtu;
+ unsigned dhcp4_messages;
+ unsigned dhcp4_remove_messages;
+ sd_ipv4acd *dhcp_acd;
+ bool dhcp4_route_failed:1;
+ bool dhcp4_route_retrying:1;
+ bool dhcp4_configured:1;
+ bool dhcp4_address_bind:1;
+
+ sd_ipv4ll *ipv4ll;
+ bool ipv4ll_address_configured:1;
+
+ bool request_static_addresses:1;
+ bool addresses_configured:1;
+ bool addresses_ready:1;
+ bool neighbors_configured:1;
+ bool static_routes_configured:1;
+ bool static_nexthops_configured:1;
+ bool routing_policy_rules_configured:1;
+ bool tc_configured:1;
+ bool sr_iov_configured:1;
+ bool setting_mtu:1;
+ bool setting_genmode:1;
+ bool ipv6_mtu_set:1;
+ bool bridge_mdb_configured:1;
+
+ sd_dhcp_server *dhcp_server;
+
+ sd_ndisc *ndisc;
+ Set *ndisc_rdnss;
+ Set *ndisc_dnssl;
+ Set *ndisc_addresses;
+ Set *ndisc_routes;
+ unsigned ndisc_addresses_messages;
+ unsigned ndisc_routes_messages;
+ bool ndisc_addresses_configured:1;
+ bool ndisc_routes_configured:1;
+
+ sd_radv *radv;
+
+ sd_dhcp6_client *dhcp6_client;
+ sd_dhcp6_lease *dhcp6_lease;
+ Set *dhcp6_addresses, *dhcp6_addresses_old;
+ Set *dhcp6_routes, *dhcp6_routes_old;
+ Set *dhcp6_pd_addresses, *dhcp6_pd_addresses_old;
+ Set *dhcp6_pd_routes, *dhcp6_pd_routes_old;
+ unsigned dhcp6_address_messages;
+ unsigned dhcp6_route_messages;
+ unsigned dhcp6_pd_address_messages;
+ unsigned dhcp6_pd_route_messages;
+ bool dhcp6_address_configured:1;
+ bool dhcp6_route_configured:1;
+ bool dhcp6_pd_address_configured:1;
+ bool dhcp6_pd_route_configured:1;
+ bool dhcp6_pd_prefixes_assigned:1;
+
+ /* This is about LLDP reception */
+ sd_lldp *lldp;
+ char *lldp_file;
+
+ /* This is about LLDP transmission */
+ unsigned lldp_tx_fast; /* The LLDP txFast counter (See 802.1ab-2009, section 9.2.5.18) */
+ sd_event_source *lldp_emit_event_source;
+
+ Hashmap *bound_by_links;
+ Hashmap *bound_to_links;
+ Set *slaves;
+
+ /* For speed meter */
+ struct rtnl_link_stats64 stats_old, stats_new;
+ bool stats_updated;
+
+ /* All kinds of DNS configuration the user configured via D-Bus */
+ struct in_addr_full **dns;
+ unsigned n_dns;
+ OrderedSet *search_domains, *route_domains;
+
+ int dns_default_route;
+ ResolveSupport llmnr;
+ ResolveSupport mdns;
+ DnssecMode dnssec_mode;
+ DnsOverTlsMode dns_over_tls_mode;
+ Set *dnssec_negative_trust_anchors;
+
+ /* Similar, but NTP server configuration */
+ char **ntp;
+} Link;
+
+typedef int (*link_netlink_message_handler_t)(sd_netlink*, sd_netlink_message*, Link*);
+
+void link_ntp_settings_clear(Link *link);
+void link_dns_settings_clear(Link *link);
+Link *link_unref(Link *link);
+Link *link_ref(Link *link);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Link*, link_unref);
+DEFINE_TRIVIAL_DESTRUCTOR(link_netlink_destroy_callback, Link, link_unref);
+
+int link_get(Manager *m, int ifindex, Link **ret);
+int link_add(Manager *manager, sd_netlink_message *message, Link **ret);
+void link_drop(Link *link);
+
+int link_down(Link *link, link_netlink_message_handler_t callback);
+
+void link_enter_failed(Link *link);
+int link_initialized(Link *link, sd_device *device);
+
+void link_set_state(Link *link, LinkState state);
+void link_check_ready(Link *link);
+
+void link_update_operstate(Link *link, bool also_update_bond_master);
+int link_update(Link *link, sd_netlink_message *message);
+
+void link_dirty(Link *link);
+void link_clean(Link *link);
+int link_save(Link *link);
+int link_save_and_clean(Link *link);
+
+int link_carrier_reset(Link *link);
+bool link_has_carrier(Link *link);
+
+bool link_ipv6_enabled(Link *link);
+bool link_ipv6ll_enabled(Link *link);
+int link_ipv6ll_gained(Link *link, const struct in6_addr *address);
+
+int link_set_mtu(Link *link, uint32_t mtu);
+
+bool link_ipv4ll_enabled(Link *link, AddressFamily mask);
+
+int link_stop_engines(Link *link, bool may_keep_dhcp);
+
+const char* link_state_to_string(LinkState s) _const_;
+LinkState link_state_from_string(const char *s) _pure_;
+
+int link_configure(Link *link);
+int link_reconfigure(Link *link, bool force);
+
+int log_link_message_full_errno(Link *link, sd_netlink_message *m, int level, int err, const char *msg);
+#define log_link_message_error_errno(link, m, err, msg) log_link_message_full_errno(link, m, LOG_ERR, err, msg)
+#define log_link_message_warning_errno(link, m, err, msg) log_link_message_full_errno(link, m, LOG_WARNING, err, msg)
+#define log_link_message_notice_errno(link, m, err, msg) log_link_message_full_errno(link, m, LOG_NOTICE, err, msg)
+#define log_link_message_info_errno(link, m, err, msg) log_link_message_full_errno(link, m, LOG_INFO, err, msg)
+#define log_link_message_debug_errno(link, m, err, msg) log_link_message_full_errno(link, m, LOG_DEBUG, err, msg)
diff --git a/src/network/networkd-lldp-rx.c b/src/network/networkd-lldp-rx.c
new file mode 100644
index 0000000..c22852f
--- /dev/null
+++ b/src/network/networkd-lldp-rx.c
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "fileio.h"
+#include "networkd-link.h"
+#include "networkd-lldp-rx.h"
+#include "networkd-lldp-tx.h"
+#include "networkd-manager.h"
+#include "networkd-network.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_lldp_mode, lldp_mode, LLDPMode, "Failed to parse LLDP= setting.");
+
+static const char* const lldp_mode_table[_LLDP_MODE_MAX] = {
+ [LLDP_MODE_NO] = "no",
+ [LLDP_MODE_YES] = "yes",
+ [LLDP_MODE_ROUTERS_ONLY] = "routers-only",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(lldp_mode, LLDPMode, LLDP_MODE_YES);
+
+static bool link_lldp_rx_enabled(Link *link) {
+ assert(link);
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (link->iftype != ARPHRD_ETHER)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ /* LLDP should be handled on bridge and bond slaves as those have a direct connection to their peers,
+ * not on the bridge/bond master. Linux doesn't even (by default) forward lldp packets to the bridge
+ * master.*/
+ if (link->kind && STR_IN_SET(link->kind, "bridge", "bond"))
+ return false;
+
+ return link->network->lldp_mode != LLDP_MODE_NO;
+}
+
+static void lldp_handler(sd_lldp *lldp, sd_lldp_event event, sd_lldp_neighbor *n, void *userdata) {
+ Link *link = userdata;
+ int r;
+
+ assert(link);
+
+ (void) link_lldp_save(link);
+
+ if (link_lldp_emit_enabled(link) && event == SD_LLDP_EVENT_ADDED) {
+ /* If we received information about a new neighbor, restart the LLDP "fast" logic */
+
+ log_link_debug(link, "Received LLDP datagram from previously unknown neighbor, restarting 'fast' LLDP transmission.");
+
+ r = link_lldp_emit_start(link);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Failed to restart LLDP transmission: %m");
+ }
+}
+
+int link_lldp_rx_configure(Link *link) {
+ int r;
+
+ if (!link_lldp_rx_enabled(link))
+ return 0;
+
+ if (!link->lldp) {
+ r = sd_lldp_new(&link->lldp);
+ if (r < 0)
+ return r;
+
+ r = sd_lldp_attach_event(link->lldp, link->manager->event, 0);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_lldp_set_ifindex(link->lldp, link->ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_lldp_match_capabilities(link->lldp,
+ link->network->lldp_mode == LLDP_MODE_ROUTERS_ONLY ?
+ SD_LLDP_SYSTEM_CAPABILITIES_ALL_ROUTERS :
+ SD_LLDP_SYSTEM_CAPABILITIES_ALL);
+ if (r < 0)
+ return r;
+
+ r = sd_lldp_set_filter_address(link->lldp, &link->hw_addr.addr.ether);
+ if (r < 0)
+ return r;
+
+ r = sd_lldp_set_callback(link->lldp, lldp_handler, link);
+ if (r < 0)
+ return r;
+
+ r = link_update_lldp(link);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int link_update_lldp(Link *link) {
+ int r;
+
+ assert(link);
+
+ if (!link->lldp)
+ return 0;
+
+ if (link->flags & IFF_UP) {
+ r = sd_lldp_start(link->lldp);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to start LLDP: %m");
+ if (r > 0)
+ log_link_debug(link, "Started LLDP.");
+ } else {
+ r = sd_lldp_stop(link->lldp);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Failed to stop LLDP: %m");
+ if (r > 0)
+ log_link_debug(link, "Stopped LLDP.");
+ }
+
+ return r;
+}
+
+int link_lldp_save(Link *link) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ sd_lldp_neighbor **l = NULL;
+ int n = 0, r, i;
+
+ assert(link);
+ assert(link->lldp_file);
+
+ if (!link->lldp) {
+ (void) unlink(link->lldp_file);
+ return 0;
+ }
+
+ r = sd_lldp_get_neighbors(link->lldp, &l);
+ if (r < 0)
+ goto finish;
+ if (r == 0) {
+ (void) unlink(link->lldp_file);
+ goto finish;
+ }
+
+ n = r;
+
+ r = fopen_temporary(link->lldp_file, &f, &temp_path);
+ if (r < 0)
+ goto finish;
+
+ fchmod(fileno(f), 0644);
+
+ for (i = 0; i < n; i++) {
+ const void *p;
+ le64_t u;
+ size_t sz;
+
+ r = sd_lldp_neighbor_get_raw(l[i], &p, &sz);
+ if (r < 0)
+ goto finish;
+
+ u = htole64(sz);
+ (void) fwrite(&u, 1, sizeof(u), f);
+ (void) fwrite(p, 1, sz, f);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto finish;
+
+ if (rename(temp_path, link->lldp_file) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+finish:
+ if (r < 0) {
+ (void) unlink(link->lldp_file);
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ log_link_error_errno(link, r, "Failed to save LLDP data to %s: %m", link->lldp_file);
+ }
+
+ if (l) {
+ for (i = 0; i < n; i++)
+ sd_lldp_neighbor_unref(l[i]);
+ free(l);
+ }
+
+ return r;
+}
diff --git a/src/network/networkd-lldp-rx.h b/src/network/networkd-lldp-rx.h
new file mode 100644
index 0000000..78c5228
--- /dev/null
+++ b/src/network/networkd-lldp-rx.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "conf-parser.h"
+
+typedef struct Link Link;
+
+typedef enum LLDPMode {
+ LLDP_MODE_NO = 0,
+ LLDP_MODE_YES = 1,
+ LLDP_MODE_ROUTERS_ONLY = 2,
+ _LLDP_MODE_MAX,
+ _LLDP_MODE_INVALID = -1,
+} LLDPMode;
+
+int link_lldp_rx_configure(Link *link);
+int link_update_lldp(Link *link);
+int link_lldp_save(Link *link);
+
+const char* lldp_mode_to_string(LLDPMode m) _const_;
+LLDPMode lldp_mode_from_string(const char *s) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_lldp_mode);
diff --git a/src/network/networkd-lldp-tx.c b/src/network/networkd-lldp-tx.c
new file mode 100644
index 0000000..b03d948
--- /dev/null
+++ b/src/network/networkd-lldp-tx.c
@@ -0,0 +1,493 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <endian.h>
+#include <inttypes.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "hostname-util.h"
+#include "missing_network.h"
+#include "networkd-link.h"
+#include "networkd-lldp-tx.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "random-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unaligned.h"
+#include "web-util.h"
+
+/* The LLDP spec calls this "txFastInit", see 9.2.5.19 */
+#define LLDP_TX_FAST_INIT 4U
+
+/* The LLDP spec calls this "msgTxHold", see 9.2.5.6 */
+#define LLDP_TX_HOLD 4U
+
+/* The jitter range to add, see 9.2.2. */
+#define LLDP_JITTER_USEC (400U * USEC_PER_MSEC)
+
+/* The LLDP spec calls this msgTxInterval, but we subtract half the jitter off it. */
+#define LLDP_TX_INTERVAL_USEC (30U * USEC_PER_SEC - LLDP_JITTER_USEC / 2)
+
+/* The LLDP spec calls this msgFastTx, but we subtract half the jitter off it. */
+#define LLDP_FAST_TX_USEC (1U * USEC_PER_SEC - LLDP_JITTER_USEC / 2)
+
+static const struct ether_addr lldp_multicast_addr[_LLDP_EMIT_MAX] = {
+ [LLDP_EMIT_NEAREST_BRIDGE] = {{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x0e }},
+ [LLDP_EMIT_NON_TPMR_BRIDGE] = {{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03 }},
+ [LLDP_EMIT_CUSTOMER_BRIDGE] = {{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }},
+};
+
+bool link_lldp_emit_enabled(Link *link) {
+ assert(link);
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (link->iftype != ARPHRD_ETHER)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ if (link->kind && STR_IN_SET(link->kind, "bridge", "bond"))
+ return false;
+
+ return link->network->lldp_emit != LLDP_EMIT_NO;
+}
+
+static int lldp_write_tlv_header(uint8_t **p, uint8_t id, size_t sz) {
+ assert(p);
+
+ if (id > 127)
+ return -EBADMSG;
+ if (sz > 511)
+ return -ENOBUFS;
+
+ (*p)[0] = (id << 1) | !!(sz & 256);
+ (*p)[1] = sz & 255;
+
+ *p = *p + 2;
+ return 0;
+}
+
+static int lldp_make_packet(
+ LLDPEmit mode,
+ const struct ether_addr *hwaddr,
+ const char *machine_id,
+ const char *ifname,
+ uint16_t ttl,
+ const char *port_description,
+ const char *hostname,
+ const char *pretty_hostname,
+ uint16_t system_capabilities,
+ uint16_t enabled_capabilities,
+ char *mud,
+ void **ret, size_t *sz) {
+
+ size_t machine_id_length, ifname_length, port_description_length = 0, hostname_length = 0,
+ pretty_hostname_length = 0, mud_length = 0;
+ _cleanup_free_ void *packet = NULL;
+ struct ether_header *h;
+ uint8_t *p;
+ size_t l;
+ int r;
+
+ assert(mode > LLDP_EMIT_NO);
+ assert(mode < _LLDP_EMIT_MAX);
+ assert(hwaddr);
+ assert(machine_id);
+ assert(ifname);
+ assert(ret);
+ assert(sz);
+
+ machine_id_length = strlen(machine_id);
+ ifname_length = strlen(ifname);
+
+ if (port_description)
+ port_description_length = strlen(port_description);
+
+ if (hostname)
+ hostname_length = strlen(hostname);
+
+ if (pretty_hostname)
+ pretty_hostname_length = strlen(pretty_hostname);
+
+ if (mud)
+ mud_length = strlen(mud);
+
+ l = sizeof(struct ether_header) +
+ /* Chassis ID */
+ 2 + 1 + machine_id_length +
+ /* Port ID */
+ 2 + 1 + ifname_length +
+ /* TTL */
+ 2 + 2 +
+ /* System Capabilities */
+ 2 + 4 +
+ /* End */
+ 2;
+
+ /* Port Description */
+ if (port_description)
+ l += 2 + port_description_length;
+
+ /* System Name */
+ if (hostname)
+ l += 2 + hostname_length;
+
+ /* System Description */
+ if (pretty_hostname)
+ l += 2 + pretty_hostname_length;
+
+ /* MUD URL */
+ if (mud)
+ l += 2 + sizeof(SD_LLDP_OUI_MUD) + 1 + mud_length;
+
+ packet = malloc(l);
+ if (!packet)
+ return -ENOMEM;
+
+ h = (struct ether_header*) packet;
+ h->ether_type = htobe16(ETHERTYPE_LLDP);
+ memcpy(h->ether_dhost, lldp_multicast_addr + mode, ETH_ALEN);
+ memcpy(h->ether_shost, hwaddr, ETH_ALEN);
+
+ p = (uint8_t*) packet + sizeof(struct ether_header);
+
+ r = lldp_write_tlv_header(&p, SD_LLDP_TYPE_CHASSIS_ID, 1 + machine_id_length);
+ if (r < 0)
+ return r;
+ *(p++) = SD_LLDP_CHASSIS_SUBTYPE_LOCALLY_ASSIGNED;
+ p = mempcpy(p, machine_id, machine_id_length);
+
+ r = lldp_write_tlv_header(&p, SD_LLDP_TYPE_PORT_ID, 1 + ifname_length);
+ if (r < 0)
+ return r;
+ *(p++) = SD_LLDP_PORT_SUBTYPE_INTERFACE_NAME;
+ p = mempcpy(p, ifname, ifname_length);
+
+ r = lldp_write_tlv_header(&p, SD_LLDP_TYPE_TTL, 2);
+ if (r < 0)
+ return r;
+ unaligned_write_be16(p, ttl);
+ p += 2;
+
+ if (port_description) {
+ r = lldp_write_tlv_header(&p, SD_LLDP_TYPE_PORT_DESCRIPTION, port_description_length);
+ if (r < 0)
+ return r;
+ p = mempcpy(p, port_description, port_description_length);
+ }
+
+ if (hostname) {
+ r = lldp_write_tlv_header(&p, SD_LLDP_TYPE_SYSTEM_NAME, hostname_length);
+ if (r < 0)
+ return r;
+ p = mempcpy(p, hostname, hostname_length);
+ }
+
+ if (pretty_hostname) {
+ r = lldp_write_tlv_header(&p, SD_LLDP_TYPE_SYSTEM_DESCRIPTION, pretty_hostname_length);
+ if (r < 0)
+ return r;
+ p = mempcpy(p, pretty_hostname, pretty_hostname_length);
+ }
+
+ if (mud) {
+ uint8_t oui_mud[sizeof(SD_LLDP_OUI_MUD)] = {0x00, 0x00, 0x5E};
+ /*
+ * +--------+--------+----------+---------+--------------
+ * |TLV Type| len | OUI |subtype | MUDString
+ * | =127 | |= 00 00 5E| = 1 |
+ * |(7 bits)|(9 bits)|(3 octets)|(1 octet)|(1-255 octets)
+ * +--------+--------+----------+---------+--------------
+ * where:
+
+ * o TLV Type = 127 indicates a vendor-specific TLV
+ * o len = indicates the TLV string length
+ * o OUI = 00 00 5E is the organizationally unique identifier of IANA
+ * o subtype = 1 (as assigned by IANA for the MUDstring)
+ * o MUDstring = the length MUST NOT exceed 255 octets
+ */
+
+ r = lldp_write_tlv_header(&p, SD_LLDP_TYPE_PRIVATE, sizeof(SD_LLDP_OUI_MUD) + 1 + mud_length);
+ if (r < 0)
+ return r;
+
+ p = mempcpy(p, &oui_mud, sizeof(SD_LLDP_OUI_MUD));
+ *(p++) = SD_LLDP_OUI_SUBTYPE_MUD_USAGE_DESCRIPTION;
+ p = mempcpy(p, mud, mud_length);
+ }
+
+ r = lldp_write_tlv_header(&p, SD_LLDP_TYPE_SYSTEM_CAPABILITIES, 4);
+ if (r < 0)
+ return r;
+ unaligned_write_be16(p, system_capabilities);
+ p += 2;
+ unaligned_write_be16(p, enabled_capabilities);
+ p += 2;
+
+ r = lldp_write_tlv_header(&p, SD_LLDP_TYPE_END, 0);
+ if (r < 0)
+ return r;
+
+ assert(p == (uint8_t*) packet + l);
+
+ *ret = TAKE_PTR(packet);
+ *sz = l;
+
+ return 0;
+}
+
+static int lldp_send_packet(
+ int ifindex,
+ const struct ether_addr *address,
+ const void *packet,
+ size_t packet_size) {
+
+ union sockaddr_union sa = {
+ .ll.sll_family = AF_PACKET,
+ .ll.sll_protocol = htobe16(ETHERTYPE_LLDP),
+ .ll.sll_ifindex = ifindex,
+ .ll.sll_halen = ETH_ALEN,
+ };
+
+ _cleanup_close_ int fd = -1;
+ ssize_t l;
+
+ assert(ifindex > 0);
+ assert(address);
+ assert(packet || packet_size <= 0);
+
+ memcpy(sa.ll.sll_addr, address, ETH_ALEN);
+
+ fd = socket(AF_PACKET, SOCK_RAW|SOCK_CLOEXEC, IPPROTO_RAW);
+ if (fd < 0)
+ return -errno;
+
+ l = sendto(fd, packet, packet_size, MSG_NOSIGNAL, &sa.sa, sizeof(sa.ll));
+ if (l < 0)
+ return -errno;
+
+ if ((size_t) l != packet_size)
+ return -EIO;
+
+ return 0;
+}
+
+static int link_send_lldp(Link *link) {
+ char machine_id_string[SD_ID128_STRING_MAX];
+ _cleanup_free_ char *hostname = NULL, *pretty_hostname = NULL;
+ _cleanup_free_ void *packet = NULL;
+ size_t packet_size = 0;
+ sd_id128_t machine_id;
+ uint16_t caps;
+ usec_t ttl;
+ int r;
+
+ assert(link);
+
+ if (!link->network || link->network->lldp_emit == LLDP_EMIT_NO)
+ return 0;
+
+ assert(link->network->lldp_emit < _LLDP_EMIT_MAX);
+
+ r = sd_id128_get_machine(&machine_id);
+ if (r < 0)
+ return r;
+
+ (void) gethostname_strict(&hostname);
+ (void) parse_env_file(NULL, "/etc/machine-info", "PRETTY_HOSTNAME", &pretty_hostname);
+
+ assert_cc(LLDP_TX_INTERVAL_USEC * LLDP_TX_HOLD + 1 <= (UINT16_MAX - 1) * USEC_PER_SEC);
+ ttl = DIV_ROUND_UP(LLDP_TX_INTERVAL_USEC * LLDP_TX_HOLD + 1, USEC_PER_SEC);
+
+ caps = (link->network && link->network->ip_forward != ADDRESS_FAMILY_NO) ?
+ SD_LLDP_SYSTEM_CAPABILITIES_ROUTER :
+ SD_LLDP_SYSTEM_CAPABILITIES_STATION;
+
+ r = lldp_make_packet(link->network->lldp_emit,
+ &link->hw_addr.addr.ether,
+ sd_id128_to_string(machine_id, machine_id_string),
+ link->ifname,
+ (uint16_t) ttl,
+ link->network ? link->network->description : NULL,
+ hostname,
+ pretty_hostname,
+ SD_LLDP_SYSTEM_CAPABILITIES_STATION|SD_LLDP_SYSTEM_CAPABILITIES_BRIDGE|SD_LLDP_SYSTEM_CAPABILITIES_ROUTER,
+ caps,
+ link->network ? link->network->lldp_mud : NULL,
+ &packet, &packet_size);
+ if (r < 0)
+ return r;
+
+ return lldp_send_packet(link->ifindex, lldp_multicast_addr + link->network->lldp_emit, packet, packet_size);
+}
+
+static int on_lldp_timer(sd_event_source *s, usec_t t, void *userdata) {
+ Link *link = userdata;
+ usec_t delay;
+ int r;
+
+ assert(s);
+ assert(userdata);
+
+ log_link_debug(link, "Sending LLDP packet...");
+
+ r = link_send_lldp(link);
+ if (r < 0)
+ log_link_debug_errno(link, r, "Failed to send LLDP packet, ignoring: %m");
+
+ if (link->lldp_tx_fast > 0)
+ link->lldp_tx_fast--;
+
+ delay = link->lldp_tx_fast > 0 ? LLDP_FAST_TX_USEC : LLDP_TX_INTERVAL_USEC;
+ delay = usec_add(delay, (usec_t) random_u64() % LLDP_JITTER_USEC);
+
+ r = sd_event_source_set_time_relative(s, delay);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to restart LLDP timer: %m");
+
+ r = sd_event_source_set_enabled(s, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to enable LLDP timer: %m");
+
+ return 0;
+}
+
+int link_lldp_emit_start(Link *link) {
+ usec_t next;
+ int r;
+
+ assert(link);
+
+ if (!link_lldp_emit_enabled(link)) {
+ link_lldp_emit_stop(link);
+ return 0;
+ }
+
+ /* Starts the LLDP transmission in "fast" mode. If it is already started, turns "fast" mode back on again. */
+
+ link->lldp_tx_fast = LLDP_TX_FAST_INIT;
+
+ next = usec_add(usec_add(now(clock_boottime_or_monotonic()), LLDP_FAST_TX_USEC),
+ (usec_t) random_u64() % LLDP_JITTER_USEC);
+
+ if (link->lldp_emit_event_source) {
+ usec_t old;
+
+ /* Lower the timeout, maybe */
+ r = sd_event_source_get_time(link->lldp_emit_event_source, &old);
+ if (r < 0)
+ return r;
+
+ if (old <= next)
+ return 0;
+
+ return sd_event_source_set_time(link->lldp_emit_event_source, next);
+ } else {
+ r = sd_event_add_time(
+ link->manager->event,
+ &link->lldp_emit_event_source,
+ clock_boottime_or_monotonic(),
+ next,
+ 0,
+ on_lldp_timer,
+ link);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(link->lldp_emit_event_source, "lldp-tx");
+ }
+
+ return 0;
+}
+
+void link_lldp_emit_stop(Link *link) {
+ assert(link);
+
+ link->lldp_emit_event_source = sd_event_source_unref(link->lldp_emit_event_source);
+}
+
+int config_parse_lldp_emit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ LLDPEmit *emit = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue))
+ *emit = LLDP_EMIT_NO;
+ else if (streq(rvalue, "nearest-bridge"))
+ *emit = LLDP_EMIT_NEAREST_BRIDGE;
+ else if (streq(rvalue, "non-tpmr-bridge"))
+ *emit = LLDP_EMIT_NON_TPMR_BRIDGE;
+ else if (streq(rvalue, "customer-bridge"))
+ *emit = LLDP_EMIT_CUSTOMER_BRIDGE;
+ else {
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse LLDP emission setting, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *emit = r ? LLDP_EMIT_NEAREST_BRIDGE : LLDP_EMIT_NO;
+ }
+
+ return 0;
+}
+
+int config_parse_lldp_mud(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *unescaped = NULL;
+ Network *n = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = cunescape(rvalue, 0, &unescaped);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to Failed to unescape LLDP MUD URL, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (!http_url_is_valid(unescaped) || strlen(unescaped) > 255) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to parse LLDP MUD URL '%s', ignoring: %m", rvalue);
+
+ return 0;
+ }
+
+ return free_and_replace(n->lldp_mud, unescaped);
+}
diff --git a/src/network/networkd-lldp-tx.h b/src/network/networkd-lldp-tx.h
new file mode 100644
index 0000000..aae30cb
--- /dev/null
+++ b/src/network/networkd-lldp-tx.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "conf-parser.h"
+
+typedef struct Link Link;
+
+typedef enum LLDPEmit {
+ LLDP_EMIT_NO,
+ LLDP_EMIT_NEAREST_BRIDGE,
+ LLDP_EMIT_NON_TPMR_BRIDGE,
+ LLDP_EMIT_CUSTOMER_BRIDGE,
+ _LLDP_EMIT_MAX,
+} LLDPEmit;
+
+bool link_lldp_emit_enabled(Link *link);
+int link_lldp_emit_start(Link *link);
+void link_lldp_emit_stop(Link *link);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_lldp_emit);
+CONFIG_PARSER_PROTOTYPE(config_parse_lldp_mud);
diff --git a/src/network/networkd-manager-bus.c b/src/network/networkd-manager-bus.c
new file mode 100644
index 0000000..a0ac8b5
--- /dev/null
+++ b/src/network/networkd-manager-bus.c
@@ -0,0 +1,274 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+#include <netinet/in.h>
+#include <sys/capability.h>
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-message-util.h"
+#include "bus-polkit.h"
+#include "networkd-link-bus.h"
+#include "networkd-link.h"
+#include "networkd-manager-bus.h"
+#include "networkd-manager.h"
+#include "path-util.h"
+#include "socket-netlink.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int method_list_links(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Manager *manager = userdata;
+ Link *link;
+ int r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(iso)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(link, manager->links) {
+ _cleanup_free_ char *path = NULL;
+
+ path = link_bus_path(link);
+ if (!path)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(
+ reply, "(iso)",
+ link->ifindex,
+ link->ifname,
+ empty_to_root(path));
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_get_link_by_name(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *path = NULL;
+ Manager *manager = userdata;
+ const char *name;
+ int index, r;
+ Link *link;
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ index = resolve_ifname(&manager->rtnl, name);
+ if (index < 0)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_LINK, "Link %s cannot be resolved", name);
+
+ link = hashmap_get(manager->links, INT_TO_PTR(index));
+ if (!link)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_LINK, "Link %s not known", name);
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ path = link_bus_path(link);
+ if (!path)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "io", link->ifindex, empty_to_root(path));
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int method_get_link_by_index(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *path = NULL;
+ Manager *manager = userdata;
+ int ifindex, r;
+ Link *link;
+
+ r = bus_message_read_ifindex(message, error, &ifindex);
+ if (r < 0)
+ return r;
+
+ link = hashmap_get(manager->links, INT_TO_PTR(ifindex));
+ if (!link)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_LINK, "Link %i not known", ifindex);
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ path = link_bus_path(link);
+ if (!path)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(reply, "so", link->ifname, empty_to_root(path));
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int call_link_method(Manager *m, sd_bus_message *message, sd_bus_message_handler_t handler, sd_bus_error *error) {
+ int ifindex, r;
+ Link *l;
+
+ assert(m);
+ assert(message);
+ assert(handler);
+
+ r = bus_message_read_ifindex(message, error, &ifindex);
+ if (r < 0)
+ return r;
+
+ l = hashmap_get(m->links, INT_TO_PTR(ifindex));
+ if (!l)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_LINK, "Link %i not known", ifindex);
+
+ return handler(message, l, error);
+}
+
+static int bus_method_set_link_ntp_servers(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_ntp_servers, error);
+}
+
+static int bus_method_set_link_dns_servers(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_dns_servers, error);
+}
+
+static int bus_method_set_link_dns_servers_ex(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_dns_servers_ex, error);
+}
+
+static int bus_method_set_link_domains(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_domains, error);
+}
+
+static int bus_method_set_link_default_route(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_default_route, error);
+}
+
+static int bus_method_set_link_llmnr(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_llmnr, error);
+}
+
+static int bus_method_set_link_mdns(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_mdns, error);
+}
+
+static int bus_method_set_link_dns_over_tls(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_dns_over_tls, error);
+}
+
+static int bus_method_set_link_dnssec(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_dnssec, error);
+}
+
+static int bus_method_set_link_dnssec_negative_trust_anchors(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_dnssec_negative_trust_anchors, error);
+}
+
+static int bus_method_revert_link_ntp(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_revert_ntp, error);
+}
+
+static int bus_method_revert_link_dns(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_revert_dns, error);
+}
+
+static int bus_method_renew_link(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_renew, error);
+}
+
+static int bus_method_force_renew_link(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_force_renew, error);
+}
+
+static int bus_method_reconfigure_link(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_reconfigure, error);
+}
+
+static int bus_method_reload(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *manager = userdata;
+ Link *link;
+ int r;
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.network1.reload",
+ NULL, true, UID_INVALID,
+ &manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ r = netdev_load(manager, true);
+ if (r < 0)
+ return r;
+
+ r = network_reload(manager);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(link, manager->links) {
+ r = link_reconfigure(link, false);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+const sd_bus_vtable manager_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("OperationalState", "s", property_get_operational_state, offsetof(Manager, operational_state), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("CarrierState", "s", property_get_carrier_state, offsetof(Manager, carrier_state), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("AddressState", "s", property_get_address_state, offsetof(Manager, address_state), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+
+ SD_BUS_METHOD("ListLinks", NULL, "a(iso)", method_list_links, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetLinkByName", "s", "io", method_get_link_by_name, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetLinkByIndex", "i", "so", method_get_link_by_index, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetLinkNTP", "ias", NULL, bus_method_set_link_ntp_servers, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetLinkDNS", "ia(iay)", NULL, bus_method_set_link_dns_servers, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetLinkDNSEx", "ia(iayqs)", NULL, bus_method_set_link_dns_servers_ex, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetLinkDomains", "ia(sb)", NULL, bus_method_set_link_domains, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetLinkDefaultRoute", "ib", NULL, bus_method_set_link_default_route, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetLinkLLMNR", "is", NULL, bus_method_set_link_llmnr, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetLinkMulticastDNS", "is", NULL, bus_method_set_link_mdns, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetLinkDNSOverTLS", "is", NULL, bus_method_set_link_dns_over_tls, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetLinkDNSSEC", "is", NULL, bus_method_set_link_dnssec, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetLinkDNSSECNegativeTrustAnchors", "ias", NULL, bus_method_set_link_dnssec_negative_trust_anchors, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("RevertLinkNTP", "i", NULL, bus_method_revert_link_ntp, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("RevertLinkDNS", "i", NULL, bus_method_revert_link_dns, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("RenewLink", "i", NULL, bus_method_renew_link, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ForceRenewLink", "i", NULL, bus_method_force_renew_link, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ReconfigureLink", "i", NULL, bus_method_reconfigure_link, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Reload", NULL, NULL, bus_method_reload, SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_VTABLE_END
+};
+
+int manager_send_changed_strv(Manager *manager, char **properties) {
+ assert(manager);
+ assert(properties);
+
+ if (!manager->bus)
+ return 0;
+
+ return sd_bus_emit_properties_changed_strv(
+ manager->bus,
+ "/org/freedesktop/network1",
+ "org.freedesktop.network1.Manager",
+ properties);
+}
diff --git a/src/network/networkd-manager-bus.h b/src/network/networkd-manager-bus.h
new file mode 100644
index 0000000..08ddfbd
--- /dev/null
+++ b/src/network/networkd-manager-bus.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+typedef struct Manager Manager;
+
+extern const sd_bus_vtable manager_vtable[];
+
+int manager_send_changed_strv(Manager *m, char **properties);
diff --git a/src/network/networkd-manager.c b/src/network/networkd-manager.c
new file mode 100644
index 0000000..19c3cc6
--- /dev/null
+++ b/src/network/networkd-manager.c
@@ -0,0 +1,1254 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <linux/if.h>
+#include <linux/fib_rules.h>
+#include <linux/nexthop.h>
+
+#include "sd-daemon.h"
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "bus-log-control-api.h"
+#include "bus-polkit.h"
+#include "bus-util.h"
+#include "conf-parser.h"
+#include "def.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "dns-domain.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "local-addresses.h"
+#include "netlink-util.h"
+#include "network-internal.h"
+#include "networkd-address-pool.h"
+#include "networkd-dhcp-server-bus.h"
+#include "networkd-dhcp6.h"
+#include "networkd-link-bus.h"
+#include "networkd-manager-bus.h"
+#include "networkd-manager.h"
+#include "networkd-neighbor.h"
+#include "networkd-network-bus.h"
+#include "networkd-nexthop.h"
+#include "networkd-routing-policy-rule.h"
+#include "networkd-speed-meter.h"
+#include "ordered-set.h"
+#include "path-lookup.h"
+#include "path-util.h"
+#include "selinux-util.h"
+#include "set.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "strv.h"
+#include "sysctl-util.h"
+#include "tmpfile-util.h"
+#include "udev-util.h"
+
+/* use 128 MB for receive socket kernel queue. */
+#define RCVBUF_SIZE (128*1024*1024)
+
+static int manager_reset_all(Manager *m) {
+ Link *link;
+ int r;
+
+ assert(m);
+
+ HASHMAP_FOREACH(link, m->links) {
+ r = link_carrier_reset(link);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Could not reset carrier: %m");
+ }
+
+ return 0;
+}
+
+static int match_prepare_for_sleep(sd_bus_message *message, void *userdata, sd_bus_error *ret_error) {
+ Manager *m = userdata;
+ int b, r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ if (b)
+ return 0;
+
+ log_debug("Coming back from suspend, resetting all connections...");
+
+ (void) manager_reset_all(m);
+
+ return 0;
+}
+
+static int on_connected(sd_bus_message *message, void *userdata, sd_bus_error *ret_error) {
+ Manager *m = userdata;
+
+ assert(message);
+ assert(m);
+
+ /* Did we get a timezone or transient hostname from DHCP while D-Bus wasn't up yet? */
+ if (m->dynamic_hostname)
+ (void) manager_set_hostname(m, m->dynamic_hostname);
+ if (m->dynamic_timezone)
+ (void) manager_set_timezone(m, m->dynamic_timezone);
+ if (m->links_requesting_uuid)
+ (void) manager_request_product_uuid(m, NULL);
+
+ return 0;
+}
+
+int manager_connect_bus(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (m->bus)
+ return 0;
+
+ r = bus_open_system_watch_bind_with_description(&m->bus, "bus-api-network");
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to bus: %m");
+
+ r = sd_bus_add_object_vtable(m->bus, NULL, "/org/freedesktop/network1", "org.freedesktop.network1.Manager", manager_vtable, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add manager object vtable: %m");
+
+ r = sd_bus_add_fallback_vtable(m->bus, NULL, "/org/freedesktop/network1/link", "org.freedesktop.network1.Link", link_vtable, link_object_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add link object vtable: %m");
+
+ r = sd_bus_add_fallback_vtable(m->bus, NULL, "/org/freedesktop/network1/link", "org.freedesktop.network1.DHCPServer", dhcp_server_vtable, link_object_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add link object vtable: %m");
+
+ r = sd_bus_add_node_enumerator(m->bus, NULL, "/org/freedesktop/network1/link", link_node_enumerator, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add link enumerator: %m");
+
+ r = sd_bus_add_fallback_vtable(m->bus, NULL, "/org/freedesktop/network1/network", "org.freedesktop.network1.Network", network_vtable, network_object_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add network object vtable: %m");
+
+ r = sd_bus_add_node_enumerator(m->bus, NULL, "/org/freedesktop/network1/network", network_node_enumerator, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add network enumerator: %m");
+
+ r = bus_log_control_api_register(m->bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_request_name_async(m->bus, NULL, "org.freedesktop.network1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(m->bus, m->event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ r = sd_bus_match_signal_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.DBus.Local",
+ NULL,
+ "org.freedesktop.DBus.Local",
+ "Connected",
+ on_connected, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match on Connected signal: %m");
+
+ r = sd_bus_match_signal_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "PrepareForSleep",
+ match_prepare_for_sleep, NULL, m);
+ if (r < 0)
+ log_warning_errno(r, "Failed to request match for PrepareForSleep, ignoring: %m");
+
+ return 0;
+}
+
+static int manager_udev_process_link(sd_device_monitor *monitor, sd_device *device, void *userdata) {
+ Manager *m = userdata;
+ DeviceAction action;
+ Link *link = NULL;
+ int r, ifindex;
+
+ assert(m);
+ assert(device);
+
+ r = device_get_action(device, &action);
+ if (r < 0) {
+ log_device_debug_errno(device, r, "Failed to get udev action, ignoring device: %m");
+ return 0;
+ }
+
+ /* Ignore the "remove" uevent — let's remove a device only if rtnetlink says so. All other uevents
+ * are "positive" events in some form, i.e. inform us about a changed or new network interface, that
+ * still exists — and we are interested in that. */
+ if (action == DEVICE_ACTION_REMOVE)
+ return 0;
+
+ r = sd_device_get_ifindex(device, &ifindex);
+ if (r < 0) {
+ log_device_debug_errno(device, r, "Ignoring udev %s event for device without ifindex or with invalid ifindex: %m",
+ device_action_to_string(action));
+ return 0;
+ }
+
+ r = device_is_renaming(device);
+ if (r < 0) {
+ log_device_error_errno(device, r, "Failed to determine the device is renamed or not, ignoring '%s' uevent: %m",
+ device_action_to_string(action));
+ return 0;
+ }
+ if (r > 0) {
+ log_device_debug(device, "Interface is under renaming, wait for the interface to be renamed.");
+ return 0;
+ }
+
+ r = link_get(m, ifindex, &link);
+ if (r < 0) {
+ if (r != -ENODEV)
+ log_debug_errno(r, "Failed to get link from ifindex %i, ignoring: %m", ifindex);
+ return 0;
+ }
+
+ (void) link_initialized(link, device);
+
+ return 0;
+}
+
+static int manager_connect_udev(Manager *m) {
+ int r;
+
+ /* udev does not initialize devices inside containers, so we rely on them being already
+ * initialized before entering the container. */
+ if (path_is_read_only_fs("/sys") > 0)
+ return 0;
+
+ r = sd_device_monitor_new(&m->device_monitor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize device monitor: %m");
+
+ r = sd_device_monitor_set_receive_buffer_size(m->device_monitor, RCVBUF_SIZE);
+ if (r < 0)
+ log_warning_errno(r, "Failed to increase buffer size for device monitor, ignoring: %m");
+
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(m->device_monitor, "net", NULL);
+ if (r < 0)
+ return log_error_errno(r, "Could not add device monitor filter: %m");
+
+ r = sd_device_monitor_attach_event(m->device_monitor, m->event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach event to device monitor: %m");
+
+ r = sd_device_monitor_start(m->device_monitor, manager_udev_process_link, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start device monitor: %m");
+
+ return 0;
+}
+
+static int manager_rtnl_process_link(sd_netlink *rtnl, sd_netlink_message *message, Manager *m) {
+ Link *link = NULL;
+ NetDev *netdev = NULL;
+ uint16_t type;
+ const char *name;
+ int r, ifindex;
+
+ assert(rtnl);
+ assert(message);
+ assert(m);
+
+ if (sd_netlink_message_is_error(message)) {
+ r = sd_netlink_message_get_errno(message);
+ if (r < 0)
+ log_message_warning_errno(message, r, "rtnl: Could not receive link message, ignoring");
+
+ return 0;
+ }
+
+ r = sd_netlink_message_get_type(message, &type);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: Could not get message type, ignoring: %m");
+ return 0;
+ } else if (!IN_SET(type, RTM_NEWLINK, RTM_DELLINK)) {
+ log_warning("rtnl: Received unexpected message type %u when processing link, ignoring.", type);
+ return 0;
+ }
+
+ r = sd_rtnl_message_link_get_ifindex(message, &ifindex);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: Could not get ifindex from link message, ignoring: %m");
+ return 0;
+ } else if (ifindex <= 0) {
+ log_warning("rtnl: received link message with invalid ifindex %d, ignoring.", ifindex);
+ return 0;
+ }
+
+ r = sd_netlink_message_read_string(message, IFLA_IFNAME, &name);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: Received link message without ifname, ignoring: %m");
+ return 0;
+ }
+
+ (void) link_get(m, ifindex, &link);
+ (void) netdev_get(m, name, &netdev);
+
+ switch (type) {
+ case RTM_NEWLINK:
+ if (!link) {
+ /* link is new, so add it */
+ r = link_add(m, message, &link);
+ if (r < 0) {
+ log_warning_errno(r, "Could not process new link message, ignoring: %m");
+ return 0;
+ }
+ }
+
+ if (netdev) {
+ /* netdev exists, so make sure the ifindex matches */
+ r = netdev_set_ifindex(netdev, message);
+ if (r < 0) {
+ log_warning_errno(r, "Could not process new link message for netdev, ignoring: %m");
+ return 0;
+ }
+ }
+
+ r = link_update(link, message);
+ if (r < 0) {
+ log_warning_errno(r, "Could not process link message, ignoring: %m");
+ return 0;
+ }
+
+ break;
+
+ case RTM_DELLINK:
+ link_drop(link);
+ netdev_drop(netdev);
+
+ break;
+
+ default:
+ assert_not_reached("Received link message with invalid RTNL message type.");
+ }
+
+ return 1;
+}
+
+static int systemd_netlink_fd(void) {
+ int n, fd, rtnl_fd = -EINVAL;
+
+ n = sd_listen_fds(true);
+ if (n <= 0)
+ return -EINVAL;
+
+ for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd ++) {
+ if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
+ if (rtnl_fd >= 0)
+ return -EINVAL;
+
+ rtnl_fd = fd;
+ }
+ }
+
+ return rtnl_fd;
+}
+
+static int manager_connect_genl(Manager *m) {
+ int r;
+
+ assert(m);
+
+ r = sd_genl_socket_open(&m->genl);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_inc_rcvbuf(m->genl, RCVBUF_SIZE);
+ if (r < 0)
+ log_warning_errno(r, "Failed to increase receive buffer size for general netlink socket, ignoring: %m");
+
+ r = sd_netlink_attach_event(m->genl, m->event, 0);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int manager_connect_rtnl(Manager *m) {
+ int fd, r;
+
+ assert(m);
+
+ fd = systemd_netlink_fd();
+ if (fd < 0)
+ r = sd_netlink_open(&m->rtnl);
+ else
+ r = sd_netlink_open_fd(&m->rtnl, fd);
+ if (r < 0)
+ return r;
+
+ /* Bump receiver buffer, but only if we are not called via socket activation, as in that
+ * case systemd sets the receive buffer size for us, and the value in the .socket unit
+ * should take full effect. */
+ if (fd < 0) {
+ r = sd_netlink_inc_rcvbuf(m->rtnl, RCVBUF_SIZE);
+ if (r < 0)
+ log_warning_errno(r, "Failed to increase receive buffer size for rtnl socket, ignoring: %m");
+ }
+
+ r = sd_netlink_attach_event(m->rtnl, m->event, 0);
+ if (r < 0)
+ return r;
+
+ r = netlink_add_match(m->rtnl, NULL, RTM_NEWLINK, &manager_rtnl_process_link, NULL, m, "network-rtnl_process_link");
+ if (r < 0)
+ return r;
+
+ r = netlink_add_match(m->rtnl, NULL, RTM_DELLINK, &manager_rtnl_process_link, NULL, m, "network-rtnl_process_link");
+ if (r < 0)
+ return r;
+
+ r = netlink_add_match(m->rtnl, NULL, RTM_NEWADDR, &manager_rtnl_process_address, NULL, m, "network-rtnl_process_address");
+ if (r < 0)
+ return r;
+
+ r = netlink_add_match(m->rtnl, NULL, RTM_DELADDR, &manager_rtnl_process_address, NULL, m, "network-rtnl_process_address");
+ if (r < 0)
+ return r;
+
+ r = netlink_add_match(m->rtnl, NULL, RTM_NEWNEIGH, &manager_rtnl_process_neighbor, NULL, m, "network-rtnl_process_neighbor");
+ if (r < 0)
+ return r;
+
+ r = netlink_add_match(m->rtnl, NULL, RTM_DELNEIGH, &manager_rtnl_process_neighbor, NULL, m, "network-rtnl_process_neighbor");
+ if (r < 0)
+ return r;
+
+ r = netlink_add_match(m->rtnl, NULL, RTM_NEWROUTE, &manager_rtnl_process_route, NULL, m, "network-rtnl_process_route");
+ if (r < 0)
+ return r;
+
+ r = netlink_add_match(m->rtnl, NULL, RTM_DELROUTE, &manager_rtnl_process_route, NULL, m, "network-rtnl_process_route");
+ if (r < 0)
+ return r;
+
+ r = netlink_add_match(m->rtnl, NULL, RTM_NEWRULE, &manager_rtnl_process_rule, NULL, m, "network-rtnl_process_rule");
+ if (r < 0)
+ return r;
+
+ r = netlink_add_match(m->rtnl, NULL, RTM_DELRULE, &manager_rtnl_process_rule, NULL, m, "network-rtnl_process_rule");
+ if (r < 0)
+ return r;
+
+ r = netlink_add_match(m->rtnl, NULL, RTM_NEWNEXTHOP, &manager_rtnl_process_nexthop, NULL, m, "network-rtnl_process_nexthop");
+ if (r < 0)
+ return r;
+
+ r = netlink_add_match(m->rtnl, NULL, RTM_DELNEXTHOP, &manager_rtnl_process_nexthop, NULL, m, "network-rtnl_process_nexthop");
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int ordered_set_put_dns_server(OrderedSet *s, int ifindex, struct in_addr_full *dns) {
+ const char *p;
+ int r;
+
+ assert(s);
+ assert(dns);
+
+ if (dns->ifindex != 0 && dns->ifindex != ifindex)
+ return 0;
+
+ p = in_addr_full_to_string(dns);
+ if (!p)
+ return 0;
+
+ r = ordered_set_put_strdup(s, p);
+ if (r == -EEXIST)
+ return 0;
+
+ return r;
+}
+
+static int ordered_set_put_dns_servers(OrderedSet *s, int ifindex, struct in_addr_full **dns, unsigned n) {
+ int r, c = 0;
+ unsigned i;
+
+ assert(s);
+ assert(dns || n == 0);
+
+ for (i = 0; i < n; i++) {
+ r = ordered_set_put_dns_server(s, ifindex, dns[i]);
+ if (r < 0)
+ return r;
+
+ c += r;
+ }
+
+ return c;
+}
+
+static int ordered_set_put_in4_addr(OrderedSet *s, const struct in_addr *address) {
+ char *p;
+ int r;
+
+ assert(s);
+ assert(address);
+
+ r = in_addr_to_string(AF_INET, (const union in_addr_union*) address, &p);
+ if (r < 0)
+ return r;
+
+ r = ordered_set_consume(s, p);
+ if (r == -EEXIST)
+ return 0;
+
+ return r;
+}
+
+static int ordered_set_put_in4_addrv(OrderedSet *s,
+ const struct in_addr *addresses,
+ size_t n,
+ bool (*predicate)(const struct in_addr *addr)) {
+ int r, c = 0;
+ size_t i;
+
+ assert(s);
+ assert(n == 0 || addresses);
+
+ for (i = 0; i < n; i++) {
+ if (predicate && !predicate(&addresses[i]))
+ continue;
+ r = ordered_set_put_in4_addr(s, addresses+i);
+ if (r < 0)
+ return r;
+
+ c += r;
+ }
+
+ return c;
+}
+
+static int manager_save(Manager *m) {
+ _cleanup_ordered_set_free_free_ OrderedSet *dns = NULL, *ntp = NULL, *sip = NULL, *search_domains = NULL, *route_domains = NULL;
+ const char *operstate_str, *carrier_state_str, *address_state_str;
+ LinkOperationalState operstate = LINK_OPERSTATE_OFF;
+ LinkCarrierState carrier_state = LINK_CARRIER_STATE_OFF;
+ LinkAddressState address_state = LINK_ADDRESS_STATE_OFF;
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_strv_free_ char **p = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ Link *link;
+ int r;
+
+ assert(m);
+ assert(m->state_file);
+
+ /* We add all NTP and DNS server to a set, to filter out duplicates */
+ dns = ordered_set_new(&string_hash_ops);
+ if (!dns)
+ return -ENOMEM;
+
+ ntp = ordered_set_new(&string_hash_ops);
+ if (!ntp)
+ return -ENOMEM;
+
+ sip = ordered_set_new(&string_hash_ops);
+ if (!sip)
+ return -ENOMEM;
+
+ search_domains = ordered_set_new(&dns_name_hash_ops);
+ if (!search_domains)
+ return -ENOMEM;
+
+ route_domains = ordered_set_new(&dns_name_hash_ops);
+ if (!route_domains)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(link, m->links) {
+ const struct in_addr *addresses;
+
+ if (link->flags & IFF_LOOPBACK)
+ continue;
+
+ if (link->operstate > operstate)
+ operstate = link->operstate;
+
+ if (link->carrier_state > carrier_state)
+ carrier_state = link->carrier_state;
+
+ if (link->address_state > address_state)
+ address_state = link->address_state;
+
+ if (!link->network)
+ continue;
+
+ /* First add the static configured entries */
+ if (link->n_dns != (unsigned) -1)
+ r = ordered_set_put_dns_servers(dns, link->ifindex, link->dns, link->n_dns);
+ else
+ r = ordered_set_put_dns_servers(dns, link->ifindex, link->network->dns, link->network->n_dns);
+ if (r < 0)
+ return r;
+
+ r = ordered_set_put_strdupv(ntp, link->ntp ?: link->network->ntp);
+ if (r < 0)
+ return r;
+
+ r = ordered_set_put_string_set(search_domains, link->search_domains ?: link->network->search_domains);
+ if (r < 0)
+ return r;
+
+ r = ordered_set_put_string_set(route_domains, link->route_domains ?: link->network->route_domains);
+ if (r < 0)
+ return r;
+
+ if (!link->dhcp_lease)
+ continue;
+
+ /* Secondly, add the entries acquired via DHCP */
+ if (link->network->dhcp_use_dns) {
+ r = sd_dhcp_lease_get_dns(link->dhcp_lease, &addresses);
+ if (r > 0) {
+ r = ordered_set_put_in4_addrv(dns, addresses, r, in4_addr_is_non_local);
+ if (r < 0)
+ return r;
+ } else if (r < 0 && r != -ENODATA)
+ return r;
+ }
+
+ if (link->network->dhcp_use_ntp) {
+ r = sd_dhcp_lease_get_ntp(link->dhcp_lease, &addresses);
+ if (r > 0) {
+ r = ordered_set_put_in4_addrv(ntp, addresses, r, in4_addr_is_non_local);
+ if (r < 0)
+ return r;
+ } else if (r < 0 && r != -ENODATA)
+ return r;
+ }
+
+ if (link->network->dhcp_use_sip) {
+ r = sd_dhcp_lease_get_sip(link->dhcp_lease, &addresses);
+ if (r > 0) {
+ r = ordered_set_put_in4_addrv(sip, addresses, r, in4_addr_is_non_local);
+ if (r < 0)
+ return r;
+ } else if (r < 0 && r != -ENODATA)
+ return r;
+ }
+
+ if (link->network->dhcp_use_domains != DHCP_USE_DOMAINS_NO) {
+ const char *domainname;
+ char **domains = NULL;
+
+ OrderedSet *target_domains = (link->network->dhcp_use_domains == DHCP_USE_DOMAINS_YES) ? search_domains : route_domains;
+ r = sd_dhcp_lease_get_domainname(link->dhcp_lease, &domainname);
+ if (r >= 0) {
+ r = ordered_set_put_strdup(target_domains, domainname);
+ if (r < 0)
+ return r;
+ } else if (r != -ENODATA)
+ return r;
+
+ r = sd_dhcp_lease_get_search_domains(link->dhcp_lease, &domains);
+ if (r >= 0) {
+ r = ordered_set_put_strdupv(target_domains, domains);
+ if (r < 0)
+ return r;
+ } else if (r != -ENODATA)
+ return r;
+ }
+ }
+
+ if (carrier_state >= LINK_CARRIER_STATE_ENSLAVED)
+ carrier_state = LINK_CARRIER_STATE_CARRIER;
+
+ operstate_str = link_operstate_to_string(operstate);
+ assert(operstate_str);
+
+ carrier_state_str = link_carrier_state_to_string(carrier_state);
+ assert(carrier_state_str);
+
+ address_state_str = link_address_state_to_string(address_state);
+ assert(address_state_str);
+
+ r = fopen_temporary(m->state_file, &f, &temp_path);
+ if (r < 0)
+ return r;
+
+ (void) fchmod(fileno(f), 0644);
+
+ fprintf(f,
+ "# This is private data. Do not parse.\n"
+ "OPER_STATE=%s\n"
+ "CARRIER_STATE=%s\n"
+ "ADDRESS_STATE=%s\n",
+ operstate_str, carrier_state_str, address_state_str);
+
+ ordered_set_print(f, "DNS=", dns);
+ ordered_set_print(f, "NTP=", ntp);
+ ordered_set_print(f, "SIP=", sip);
+ ordered_set_print(f, "DOMAINS=", search_domains);
+ ordered_set_print(f, "ROUTE_DOMAINS=", route_domains);
+
+ r = routing_policy_serialize_rules(m->rules, f);
+ if (r < 0)
+ goto fail;
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, m->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (m->operational_state != operstate) {
+ m->operational_state = operstate;
+ if (strv_extend(&p, "OperationalState") < 0)
+ log_oom();
+ }
+
+ if (m->carrier_state != carrier_state) {
+ m->carrier_state = carrier_state;
+ if (strv_extend(&p, "CarrierState") < 0)
+ log_oom();
+ }
+
+ if (m->address_state != address_state) {
+ m->address_state = address_state;
+ if (strv_extend(&p, "AddressState") < 0)
+ log_oom();
+ }
+
+ if (p) {
+ r = manager_send_changed_strv(m, p);
+ if (r < 0)
+ log_error_errno(r, "Could not emit changed properties: %m");
+ }
+
+ m->dirty = false;
+
+ return 0;
+
+fail:
+ (void) unlink(m->state_file);
+ (void) unlink(temp_path);
+
+ return log_error_errno(r, "Failed to save network state to %s: %m", m->state_file);
+}
+
+static int manager_dirty_handler(sd_event_source *s, void *userdata) {
+ Manager *m = userdata;
+ Link *link;
+
+ assert(m);
+
+ if (m->dirty)
+ manager_save(m);
+
+ SET_FOREACH(link, m->dirty_links)
+ (void) link_save_and_clean(link);
+
+ return 1;
+}
+
+static int signal_terminate_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+ m->restarting = false;
+
+ log_debug("Terminate operation initiated.");
+
+ return sd_event_exit(sd_event_source_get_event(s), 0);
+}
+
+static int signal_restart_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+ m->restarting = true;
+
+ log_debug("Restart operation initiated.");
+
+ return sd_event_exit(sd_event_source_get_event(s), 0);
+}
+
+int manager_new(Manager **ret) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ int r;
+
+ m = new(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (Manager) {
+ .speed_meter_interval_usec = SPEED_METER_DEFAULT_TIME_INTERVAL,
+ .manage_foreign_routes = true,
+ .ethtool_fd = -1,
+ };
+
+ m->state_file = strdup("/run/systemd/netif/state");
+ if (!m->state_file)
+ return -ENOMEM;
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR2, -1) >= 0);
+
+ (void) sd_event_set_watchdog(m->event, true);
+ (void) sd_event_add_signal(m->event, NULL, SIGTERM, signal_terminate_callback, m);
+ (void) sd_event_add_signal(m->event, NULL, SIGINT, signal_terminate_callback, m);
+ (void) sd_event_add_signal(m->event, NULL, SIGUSR2, signal_restart_callback, m);
+
+ r = sd_event_add_post(m->event, NULL, manager_dirty_handler, m);
+ if (r < 0)
+ return r;
+
+ r = manager_connect_rtnl(m);
+ if (r < 0)
+ return r;
+
+ r = manager_connect_genl(m);
+ if (r < 0)
+ return r;
+
+ r = manager_connect_udev(m);
+ if (r < 0)
+ return r;
+
+ r = sd_resolve_default(&m->resolve);
+ if (r < 0)
+ return r;
+
+ r = sd_resolve_attach_event(m->resolve, m->event, 0);
+ if (r < 0)
+ return r;
+
+ r = address_pool_setup_default(m);
+ if (r < 0)
+ return r;
+
+ m->duid.type = DUID_TYPE_EN;
+
+ (void) routing_policy_load_rules(m->state_file, &m->rules_saved);
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
+
+void manager_free(Manager *m) {
+ Link *link;
+
+ if (!m)
+ return;
+
+ free(m->state_file);
+
+ HASHMAP_FOREACH(link, m->links)
+ (void) link_stop_engines(link, true);
+
+ m->dhcp6_prefixes = hashmap_free_with_destructor(m->dhcp6_prefixes, dhcp6_pd_free);
+ m->dhcp6_pd_prefixes = set_free_with_destructor(m->dhcp6_pd_prefixes, dhcp6_pd_free);
+
+ m->dirty_links = set_free_with_destructor(m->dirty_links, link_unref);
+ m->links_requesting_uuid = set_free_with_destructor(m->links_requesting_uuid, link_unref);
+ m->links = hashmap_free_with_destructor(m->links, link_unref);
+
+ m->duids_requesting_uuid = set_free(m->duids_requesting_uuid);
+ m->networks = ordered_hashmap_free_with_destructor(m->networks, network_unref);
+
+ m->netdevs = hashmap_free_with_destructor(m->netdevs, netdev_unref);
+
+ ordered_set_free_free(m->address_pools);
+
+ /* routing_policy_rule_free() access m->rules and m->rules_foreign.
+ * So, it is necessary to set NULL after the sets are freed. */
+ m->rules = set_free(m->rules);
+ m->rules_foreign = set_free(m->rules_foreign);
+ set_free(m->rules_saved);
+
+ sd_netlink_unref(m->rtnl);
+ sd_netlink_unref(m->genl);
+ sd_resolve_unref(m->resolve);
+
+ /* reject (e.g. unreachable) type routes are managed by Manager, but may be referenced by a
+ * link. E.g., DHCP6 with prefix delegation creates unreachable routes, and they are referenced
+ * by the upstream link. And the links may be referenced by netlink slots. Hence, two
+ * set_free() must be called after the above sd_netlink_unref(). */
+ m->routes = set_free(m->routes);
+ m->routes_foreign = set_free(m->routes_foreign);
+
+ sd_event_source_unref(m->speed_meter_event_source);
+ sd_event_unref(m->event);
+
+ sd_device_monitor_unref(m->device_monitor);
+
+ bus_verify_polkit_async_registry_free(m->polkit_registry);
+ sd_bus_flush_close_unref(m->bus);
+
+ free(m->dynamic_timezone);
+ free(m->dynamic_hostname);
+
+ safe_close(m->ethtool_fd);
+
+ free(m);
+}
+
+int manager_start(Manager *m) {
+ Link *link;
+ int r;
+
+ assert(m);
+
+ r = manager_start_speed_meter(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize speed meter: %m");
+
+ /* The dirty handler will deal with future serialization, but the first one
+ must be done explicitly. */
+
+ manager_save(m);
+
+ HASHMAP_FOREACH(link, m->links)
+ (void) link_save(link);
+
+ return 0;
+}
+
+int manager_load_config(Manager *m) {
+ int r;
+
+ /* update timestamp */
+ paths_check_timestamp(NETWORK_DIRS, &m->network_dirs_ts_usec, true);
+
+ r = netdev_load(m, false);
+ if (r < 0)
+ return r;
+
+ r = network_load(m, &m->networks);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+bool manager_should_reload(Manager *m) {
+ return paths_check_timestamp(NETWORK_DIRS, &m->network_dirs_ts_usec, false);
+}
+
+static int manager_enumerate_internal(
+ Manager *m,
+ sd_netlink_message *req,
+ int (*process)(sd_netlink *, sd_netlink_message *, Manager *),
+ const char *name) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *reply = NULL;
+ int r;
+
+ assert(m);
+ assert(m->rtnl);
+ assert(req);
+ assert(process);
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(m->rtnl, req, 0, &reply);
+ if (r < 0) {
+ if (name && (r == -EOPNOTSUPP || (r == -EINVAL && mac_selinux_enforcing()))) {
+ log_debug_errno(r, "%s are not supported by the kernel. Ignoring.", name);
+ return 0;
+ }
+
+ return r;
+ }
+
+ for (sd_netlink_message *reply_one = reply; reply_one; reply_one = sd_netlink_message_next(reply_one)) {
+ int k;
+
+ m->enumerating = true;
+
+ k = process(m->rtnl, reply_one, m);
+ if (k < 0 && r >= 0)
+ r = k;
+
+ m->enumerating = false;
+ }
+
+ return r;
+}
+
+static int manager_enumerate_links(Manager *m) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(m);
+ assert(m->rtnl);
+
+ r = sd_rtnl_message_new_link(m->rtnl, &req, RTM_GETLINK, 0);
+ if (r < 0)
+ return r;
+
+ return manager_enumerate_internal(m, req, manager_rtnl_process_link, NULL);
+}
+
+static int manager_enumerate_addresses(Manager *m) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(m);
+ assert(m->rtnl);
+
+ r = sd_rtnl_message_new_addr(m->rtnl, &req, RTM_GETADDR, 0, 0);
+ if (r < 0)
+ return r;
+
+ return manager_enumerate_internal(m, req, manager_rtnl_process_address, NULL);
+}
+
+static int manager_enumerate_neighbors(Manager *m) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(m);
+ assert(m->rtnl);
+
+ r = sd_rtnl_message_new_neigh(m->rtnl, &req, RTM_GETNEIGH, 0, AF_UNSPEC);
+ if (r < 0)
+ return r;
+
+ return manager_enumerate_internal(m, req, manager_rtnl_process_neighbor, NULL);
+}
+
+static int manager_enumerate_routes(Manager *m) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(m);
+ assert(m->rtnl);
+
+ if (!m->manage_foreign_routes)
+ return 0;
+
+ r = sd_rtnl_message_new_route(m->rtnl, &req, RTM_GETROUTE, 0, 0);
+ if (r < 0)
+ return r;
+
+ return manager_enumerate_internal(m, req, manager_rtnl_process_route, NULL);
+}
+
+static int manager_enumerate_rules(Manager *m) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(m);
+ assert(m->rtnl);
+
+ r = sd_rtnl_message_new_routing_policy_rule(m->rtnl, &req, RTM_GETRULE, 0);
+ if (r < 0)
+ return r;
+
+ return manager_enumerate_internal(m, req, manager_rtnl_process_rule, "Routing policy rules");
+}
+
+static int manager_enumerate_nexthop(Manager *m) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(m);
+ assert(m->rtnl);
+
+ r = sd_rtnl_message_new_nexthop(m->rtnl, &req, RTM_GETNEXTHOP, 0, 0);
+ if (r < 0)
+ return r;
+
+ return manager_enumerate_internal(m, req, manager_rtnl_process_nexthop, "Nexthop rules");
+}
+
+int manager_enumerate(Manager *m) {
+ int r;
+
+ r = manager_enumerate_links(m);
+ if (r < 0)
+ return log_error_errno(r, "Could not enumerate links: %m");
+
+ r = manager_enumerate_addresses(m);
+ if (r < 0)
+ return log_error_errno(r, "Could not enumerate addresses: %m");
+
+ r = manager_enumerate_neighbors(m);
+ if (r < 0)
+ return log_error_errno(r, "Could not enumerate neighbors: %m");
+
+ r = manager_enumerate_routes(m);
+ if (r < 0)
+ return log_error_errno(r, "Could not enumerate routes: %m");
+
+ r = manager_enumerate_rules(m);
+ if (r < 0)
+ return log_error_errno(r, "Could not enumerate routing policy rules: %m");
+
+ r = manager_enumerate_nexthop(m);
+ if (r < 0)
+ return log_error_errno(r, "Could not enumerate nexthop rules: %m");
+
+ return 0;
+}
+
+Link* manager_find_uplink(Manager *m, Link *exclude) {
+ _cleanup_free_ struct local_address *gateways = NULL;
+ int n, i;
+
+ assert(m);
+
+ /* Looks for a suitable "uplink", via black magic: an
+ * interface that is up and where the default route with the
+ * highest priority points to. */
+
+ n = local_gateways(m->rtnl, 0, AF_UNSPEC, &gateways);
+ if (n < 0) {
+ log_warning_errno(n, "Failed to determine list of default gateways: %m");
+ return NULL;
+ }
+
+ for (i = 0; i < n; i++) {
+ Link *link;
+
+ link = hashmap_get(m->links, INT_TO_PTR(gateways[i].ifindex));
+ if (!link) {
+ log_debug("Weird, found a gateway for a link we don't know. Ignoring.");
+ continue;
+ }
+
+ if (link == exclude)
+ continue;
+
+ if (link->operstate < LINK_OPERSTATE_ROUTABLE)
+ continue;
+
+ return link;
+ }
+
+ return NULL;
+}
+
+void manager_dirty(Manager *manager) {
+ assert(manager);
+
+ /* the serialized state in /run is no longer up-to-date */
+ manager->dirty = true;
+}
+
+static int set_hostname_handler(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ _unused_ Manager *manager = userdata;
+ const sd_bus_error *e;
+
+ assert(m);
+ assert(manager);
+
+ e = sd_bus_message_get_error(m);
+ if (e)
+ log_warning_errno(sd_bus_error_get_errno(e), "Could not set hostname: %s", e->message);
+
+ return 1;
+}
+
+int manager_set_hostname(Manager *m, const char *hostname) {
+ int r;
+
+ log_debug("Setting transient hostname: '%s'", strna(hostname));
+
+ if (free_and_strdup(&m->dynamic_hostname, hostname) < 0)
+ return log_oom();
+
+ if (!m->bus || sd_bus_is_ready(m->bus) <= 0) {
+ log_debug("Not connected to system bus, setting hostname later.");
+ return 0;
+ }
+
+ r = sd_bus_call_method_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.hostname1",
+ "/org/freedesktop/hostname1",
+ "org.freedesktop.hostname1",
+ "SetHostname",
+ set_hostname_handler,
+ m,
+ "sb",
+ hostname,
+ false);
+
+ if (r < 0)
+ return log_error_errno(r, "Could not set transient hostname: %m");
+
+ return 0;
+}
+
+static int set_timezone_handler(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ _unused_ Manager *manager = userdata;
+ const sd_bus_error *e;
+
+ assert(m);
+ assert(manager);
+
+ e = sd_bus_message_get_error(m);
+ if (e)
+ log_warning_errno(sd_bus_error_get_errno(e), "Could not set timezone: %s", e->message);
+
+ return 1;
+}
+
+int manager_set_timezone(Manager *m, const char *tz) {
+ int r;
+
+ assert(m);
+ assert(tz);
+
+ log_debug("Setting system timezone: '%s'", tz);
+ if (free_and_strdup(&m->dynamic_timezone, tz) < 0)
+ return log_oom();
+
+ if (!m->bus || sd_bus_is_ready(m->bus) <= 0) {
+ log_debug("Not connected to system bus, setting timezone later.");
+ return 0;
+ }
+
+ r = sd_bus_call_method_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.timedate1",
+ "/org/freedesktop/timedate1",
+ "org.freedesktop.timedate1",
+ "SetTimezone",
+ set_timezone_handler,
+ m,
+ "sb",
+ tz,
+ false);
+ if (r < 0)
+ return log_error_errno(r, "Could not set timezone: %m");
+
+ return 0;
+}
diff --git a/src/network/networkd-manager.h b/src/network/networkd-manager.h
new file mode 100644
index 0000000..b67116b
--- /dev/null
+++ b/src/network/networkd-manager.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-device.h"
+#include "sd-event.h"
+#include "sd-id128.h"
+#include "sd-netlink.h"
+#include "sd-resolve.h"
+
+#include "dhcp-identifier.h"
+#include "hashmap.h"
+#include "networkd-link.h"
+#include "networkd-network.h"
+#include "ordered-set.h"
+#include "set.h"
+#include "time-util.h"
+
+struct Manager {
+ sd_netlink *rtnl;
+ /* lazy initialized */
+ sd_netlink *genl;
+ sd_event *event;
+ sd_resolve *resolve;
+ sd_bus *bus;
+ sd_device_monitor *device_monitor;
+ Hashmap *polkit_registry;
+ int ethtool_fd;
+
+ bool enumerating:1;
+ bool dirty:1;
+ bool restarting:1;
+ bool manage_foreign_routes;
+
+ Set *dirty_links;
+
+ char *state_file;
+ LinkOperationalState operational_state;
+ LinkCarrierState carrier_state;
+ LinkAddressState address_state;
+
+ Hashmap *links;
+ Hashmap *netdevs;
+ OrderedHashmap *networks;
+ Hashmap *dhcp6_prefixes;
+ Set *dhcp6_pd_prefixes;
+ OrderedSet *address_pools;
+
+ usec_t network_dirs_ts_usec;
+
+ DUID duid;
+ sd_id128_t product_uuid;
+ bool has_product_uuid;
+ Set *links_requesting_uuid;
+ Set *duids_requesting_uuid;
+
+ char* dynamic_hostname;
+ char* dynamic_timezone;
+
+ Set *rules;
+ Set *rules_foreign;
+ Set *rules_saved;
+
+ /* Manager stores routes without RTA_OIF attribute. */
+ Set *routes;
+ Set *routes_foreign;
+
+ /* For link speed meter*/
+ bool use_speed_meter;
+ sd_event_source *speed_meter_event_source;
+ usec_t speed_meter_interval_usec;
+ usec_t speed_meter_usec_new;
+ usec_t speed_meter_usec_old;
+
+ bool dhcp4_prefix_root_cannot_set_table:1;
+ bool bridge_mdb_on_master_not_supported:1;
+};
+
+int manager_new(Manager **ret);
+void manager_free(Manager *m);
+
+int manager_connect_bus(Manager *m);
+int manager_start(Manager *m);
+
+int manager_load_config(Manager *m);
+bool manager_should_reload(Manager *m);
+
+int manager_enumerate(Manager *m);
+
+void manager_dirty(Manager *m);
+
+Link* manager_find_uplink(Manager *m, Link *exclude);
+
+int manager_set_hostname(Manager *m, const char *hostname);
+int manager_set_timezone(Manager *m, const char *timezone);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
diff --git a/src/network/networkd-mdb.c b/src/network/networkd-mdb.c
new file mode 100644
index 0000000..0300dce
--- /dev/null
+++ b/src/network/networkd-mdb.c
@@ -0,0 +1,365 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+
+#include "netlink-util.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "networkd-mdb.h"
+#include "networkd-network.h"
+#include "string-util.h"
+#include "vlan-util.h"
+
+#define STATIC_MDB_ENTRIES_PER_NETWORK_MAX 1024U
+
+/* remove MDB entry. */
+MdbEntry *mdb_entry_free(MdbEntry *mdb_entry) {
+ if (!mdb_entry)
+ return NULL;
+
+ if (mdb_entry->network) {
+ assert(mdb_entry->section);
+ hashmap_remove(mdb_entry->network->mdb_entries_by_section, mdb_entry->section);
+ }
+
+ network_config_section_free(mdb_entry->section);
+
+ return mfree(mdb_entry);
+}
+
+DEFINE_NETWORK_SECTION_FUNCTIONS(MdbEntry, mdb_entry_free);
+
+/* create a new MDB entry or get an existing one. */
+static int mdb_entry_new_static(
+ Network *network,
+ const char *filename,
+ unsigned section_line,
+ MdbEntry **ret) {
+
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(mdb_entry_freep) MdbEntry *mdb_entry = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(filename);
+ assert(section_line > 0);
+
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ /* search entry in hashmap first. */
+ mdb_entry = hashmap_get(network->mdb_entries_by_section, n);
+ if (mdb_entry) {
+ *ret = TAKE_PTR(mdb_entry);
+ return 0;
+ }
+
+ if (hashmap_size(network->mdb_entries_by_section) >= STATIC_MDB_ENTRIES_PER_NETWORK_MAX)
+ return -E2BIG;
+
+ /* allocate space for an MDB entry. */
+ mdb_entry = new(MdbEntry, 1);
+ if (!mdb_entry)
+ return -ENOMEM;
+
+ /* init MDB structure. */
+ *mdb_entry = (MdbEntry) {
+ .network = network,
+ .section = TAKE_PTR(n),
+ };
+
+ r = hashmap_ensure_allocated(&network->mdb_entries_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(network->mdb_entries_by_section, mdb_entry->section, mdb_entry);
+ if (r < 0)
+ return r;
+
+ /* return allocated MDB structure. */
+ *ret = TAKE_PTR(mdb_entry);
+ return 0;
+}
+
+static int set_mdb_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->bridge_mdb_messages > 0);
+
+ link->bridge_mdb_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r == -EINVAL && streq_ptr(link->kind, "bridge") && (!link->network || !link->network->bridge)) {
+ /* To configure bridge MDB entries on bridge master, 1bc844ee0faa1b92e3ede00bdd948021c78d7088 (v5.4) is required. */
+ if (!link->manager->bridge_mdb_on_master_not_supported) {
+ log_link_warning_errno(link, r, "Kernel seems not to support configuring bridge MDB entries on bridge master, ignoring: %m");
+ link->manager->bridge_mdb_on_master_not_supported = true;
+ }
+ } else if (r < 0 && r != -EEXIST) {
+ log_link_message_warning_errno(link, m, r, "Could not add MDB entry");
+ link_enter_failed(link);
+ return 1;
+ }
+
+ if (link->bridge_mdb_messages == 0) {
+ link->bridge_mdb_configured = true;
+ link_check_ready(link);
+ }
+
+ return 1;
+}
+
+static int link_get_bridge_master_ifindex(Link *link) {
+ assert(link);
+
+ if (link->network && link->network->bridge)
+ return link->network->bridge->ifindex;
+
+ if (streq_ptr(link->kind, "bridge"))
+ return link->ifindex;
+
+ return 0;
+}
+
+/* send a request to the kernel to add an MDB entry */
+static int mdb_entry_configure(Link *link, MdbEntry *mdb_entry) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ struct br_mdb_entry entry;
+ int master, r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->manager);
+ assert(mdb_entry);
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *a = NULL;
+
+ (void) in_addr_to_string(mdb_entry->family, &mdb_entry->group_addr, &a);
+ log_link_debug(link, "Configuring bridge MDB entry: MulticastGroupAddress=%s, VLANId=%u",
+ strna(a), mdb_entry->vlan_id);
+ }
+
+ master = link_get_bridge_master_ifindex(link);
+ if (master <= 0)
+ return log_link_error_errno(link, SYNTHETIC_ERRNO(EINVAL), "Invalid bridge master ifindex %i", master);
+
+ entry = (struct br_mdb_entry) {
+ /* If MDB entry is added on bridge master, then the state must be MDB_TEMPORARY.
+ * See br_mdb_add_group() in net/bridge/br_mdb.c of kernel. */
+ .state = master == link->ifindex ? MDB_TEMPORARY : MDB_PERMANENT,
+ .ifindex = link->ifindex,
+ .vid = mdb_entry->vlan_id,
+ };
+
+ /* create new RTM message */
+ r = sd_rtnl_message_new_mdb(link->manager->rtnl, &req, RTM_NEWMDB, master);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not create RTM_NEWMDB message: %m");
+
+ switch (mdb_entry->family) {
+ case AF_INET:
+ entry.addr.u.ip4 = mdb_entry->group_addr.in.s_addr;
+ entry.addr.proto = htobe16(ETH_P_IP);
+ break;
+
+ case AF_INET6:
+ entry.addr.u.ip6 = mdb_entry->group_addr.in6;
+ entry.addr.proto = htobe16(ETH_P_IPV6);
+ break;
+
+ default:
+ assert_not_reached("Invalid address family");
+ }
+
+ r = sd_netlink_message_append_data(req, MDBA_SET_ENTRY, &entry, sizeof(entry));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append MDBA_SET_ENTRY attribute: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, set_mdb_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 1;
+}
+
+int link_set_bridge_mdb(Link *link) {
+ MdbEntry *mdb_entry;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+
+ link->bridge_mdb_configured = false;
+
+ if (!link->network)
+ return 0;
+
+ if (hashmap_isempty(link->network->mdb_entries_by_section))
+ goto finish;
+
+ if (!link_has_carrier(link))
+ return log_link_debug(link, "Link does not have carrier yet, setting MDB entries later.");
+
+ if (link->network->bridge) {
+ Link *master;
+
+ r = link_get(link->manager, link->network->bridge->ifindex, &master);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get Link object for Bridge=%s", link->network->bridge->ifname);
+
+ if (!link_has_carrier(master))
+ return log_link_debug(link, "Bridge interface %s does not have carrier yet, setting MDB entries later.", link->network->bridge->ifname);
+
+ } else if (!streq_ptr(link->kind, "bridge")) {
+ log_link_warning(link, "Link is neither a bridge master nor a bridge port, ignoring [BridgeMDB] sections.");
+ goto finish;
+ } else if (link->manager->bridge_mdb_on_master_not_supported) {
+ log_link_debug(link, "Kernel seems not to support configuring bridge MDB entries on bridge master, ignoring [BridgeMDB] sections.");
+ goto finish;
+ }
+
+ HASHMAP_FOREACH(mdb_entry, link->network->mdb_entries_by_section) {
+ r = mdb_entry_configure(link, mdb_entry);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to add MDB entry to multicast group database: %m");
+
+ link->bridge_mdb_messages++;
+ }
+
+finish:
+ if (link->bridge_mdb_messages == 0) {
+ link->bridge_mdb_configured = true;
+ link_check_ready(link);
+ }
+
+ return 0;
+}
+
+static int mdb_entry_verify(MdbEntry *mdb_entry) {
+ if (section_is_invalid(mdb_entry->section))
+ return -EINVAL;
+
+ if (mdb_entry->family == AF_UNSPEC)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: [BridgeMDB] section without MulticastGroupAddress= field configured. "
+ "Ignoring [BridgeMDB] section from line %u.",
+ mdb_entry->section->filename, mdb_entry->section->line);
+
+ if (!in_addr_is_multicast(mdb_entry->family, &mdb_entry->group_addr))
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: MulticastGroupAddress= is not a multicast address. "
+ "Ignoring [BridgeMDB] section from line %u.",
+ mdb_entry->section->filename, mdb_entry->section->line);
+
+ if (mdb_entry->family == AF_INET) {
+ if (in4_addr_is_local_multicast(&mdb_entry->group_addr.in))
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: MulticastGroupAddress= is a local multicast address. "
+ "Ignoring [BridgeMDB] section from line %u.",
+ mdb_entry->section->filename, mdb_entry->section->line);
+ } else {
+ if (in6_addr_is_link_local_all_nodes(&mdb_entry->group_addr.in6))
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: MulticastGroupAddress= is the multicast all nodes address. "
+ "Ignoring [BridgeMDB] section from line %u.",
+ mdb_entry->section->filename, mdb_entry->section->line);
+ }
+
+ return 0;
+}
+
+void network_drop_invalid_mdb_entries(Network *network) {
+ MdbEntry *mdb_entry;
+
+ assert(network);
+
+ HASHMAP_FOREACH(mdb_entry, network->mdb_entries_by_section)
+ if (mdb_entry_verify(mdb_entry) < 0)
+ mdb_entry_free(mdb_entry);
+}
+
+/* parse the VLAN Id from config files. */
+int config_parse_mdb_vlan_id(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(mdb_entry_free_or_set_invalidp) MdbEntry *mdb_entry = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = mdb_entry_new_static(network, filename, section_line, &mdb_entry);
+ if (r < 0)
+ return log_oom();
+
+ r = config_parse_vlanid(unit, filename, line, section,
+ section_line, lvalue, ltype,
+ rvalue, &mdb_entry->vlan_id, userdata);
+ if (r < 0)
+ return r;
+
+ mdb_entry = NULL;
+
+ return 0;
+}
+
+/* parse the multicast group from config files. */
+int config_parse_mdb_group_address(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(mdb_entry_free_or_set_invalidp) MdbEntry *mdb_entry = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = mdb_entry_new_static(network, filename, section_line, &mdb_entry);
+ if (r < 0)
+ return log_oom();
+
+ r = in_addr_from_string_auto(rvalue, &mdb_entry->family, &mdb_entry->group_addr);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Cannot parse multicast group address: %m");
+ return 0;
+ }
+
+ mdb_entry = NULL;
+
+ return 0;
+}
diff --git a/src/network/networkd-mdb.h b/src/network/networkd-mdb.h
new file mode 100644
index 0000000..ea88412
--- /dev/null
+++ b/src/network/networkd-mdb.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+
+#include "conf-parser.h"
+#include "in-addr-util.h"
+#include "networkd-util.h"
+
+typedef struct Network Network;
+typedef struct Link Link;
+
+typedef struct MdbEntry {
+ Network *network;
+ NetworkConfigSection *section;
+
+ int family;
+ union in_addr_union group_addr;
+ uint16_t vlan_id;
+} MdbEntry;
+
+MdbEntry *mdb_entry_free(MdbEntry *mdb_entry);
+
+void network_drop_invalid_mdb_entries(Network *network);
+
+int link_set_bridge_mdb(Link *link);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_mdb_group_address);
+CONFIG_PARSER_PROTOTYPE(config_parse_mdb_vlan_id);
diff --git a/src/network/networkd-ndisc.c b/src/network/networkd-ndisc.c
new file mode 100644
index 0000000..d2aa3db
--- /dev/null
+++ b/src/network/networkd-ndisc.c
@@ -0,0 +1,1516 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+***/
+
+#include <arpa/inet.h>
+#include <netinet/icmp6.h>
+#include <net/if_arp.h>
+#include <linux/if.h>
+
+#include "sd-ndisc.h"
+
+#include "missing_network.h"
+#include "networkd-address.h"
+#include "networkd-dhcp6.h"
+#include "networkd-manager.h"
+#include "networkd-ndisc.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+
+#define NDISC_DNSSL_MAX 64U
+#define NDISC_RDNSS_MAX 64U
+#define NDISC_PREFIX_LFT_MIN 7200U
+
+#define DAD_CONFLICTS_IDGEN_RETRIES_RFC7217 3
+
+/* https://tools.ietf.org/html/rfc5453 */
+/* https://www.iana.org/assignments/ipv6-interface-ids/ipv6-interface-ids.xml */
+
+#define SUBNET_ROUTER_ANYCAST_ADDRESS_RFC4291 ((struct in6_addr) { .s6_addr = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } })
+#define SUBNET_ROUTER_ANYCAST_PREFIXLEN 8
+#define RESERVED_IPV6_INTERFACE_IDENTIFIERS_ADDRESS_RFC4291 ((struct in6_addr) { .s6_addr = { 0x02, 0x00, 0x5E, 0xFF, 0xFE } })
+#define RESERVED_IPV6_INTERFACE_IDENTIFIERS_PREFIXLEN 5
+#define RESERVED_SUBNET_ANYCAST_ADDRESSES_RFC4291 ((struct in6_addr) { .s6_addr = { 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF } })
+#define RESERVED_SUBNET_ANYCAST_PREFIXLEN 7
+
+#define NDISC_APP_ID SD_ID128_MAKE(13,ac,81,a7,d5,3f,49,78,92,79,5d,0c,29,3a,bc,7e)
+
+bool link_ipv6_accept_ra_enabled(Link *link) {
+ assert(link);
+
+ if (!socket_ipv6_is_supported())
+ return false;
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ if (!link_ipv6ll_enabled(link))
+ return false;
+
+ assert(link->network->ipv6_accept_ra >= 0);
+ return link->network->ipv6_accept_ra;
+}
+
+void network_adjust_ipv6_accept_ra(Network *network) {
+ assert(network);
+
+ if (!FLAGS_SET(network->link_local, ADDRESS_FAMILY_IPV6)) {
+ if (network->ipv6_accept_ra > 0)
+ log_warning("%s: IPv6AcceptRA= is enabled but IPv6 link local addressing is disabled or not supported. "
+ "Disabling IPv6AcceptRA=.", network->filename);
+ network->ipv6_accept_ra = false;
+ }
+
+ if (network->ipv6_accept_ra < 0)
+ /* default to accept RA if ip_forward is disabled and ignore RA if ip_forward is enabled */
+ network->ipv6_accept_ra = !FLAGS_SET(network->ip_forward, ADDRESS_FAMILY_IPV6);
+}
+
+static int ndisc_remove_old_one(Link *link, const struct in6_addr *router, bool force);
+
+static int ndisc_address_callback(Address *address) {
+ struct in6_addr router = {};
+ NDiscAddress *n;
+
+ assert(address);
+ assert(address->link);
+
+ SET_FOREACH(n, address->link->ndisc_addresses)
+ if (n->address == address) {
+ router = n->router;
+ break;
+ }
+
+ if (IN6_IS_ADDR_UNSPECIFIED(&router)) {
+ _cleanup_free_ char *buf = NULL;
+
+ (void) in_addr_to_string(address->family, &address->in_addr, &buf);
+ log_link_debug(address->link, "%s is called for %s/%u, but it is already removed, ignoring.",
+ __func__, strna(buf), address->prefixlen);
+ return 0;
+ }
+
+ /* Make this called only once */
+ SET_FOREACH(n, address->link->ndisc_addresses)
+ if (IN6_ARE_ADDR_EQUAL(&n->router, &router))
+ n->address->callback = NULL;
+
+ return ndisc_remove_old_one(address->link, &router, true);
+}
+
+static int ndisc_remove_old_one(Link *link, const struct in6_addr *router, bool force) {
+ NDiscAddress *na;
+ NDiscRoute *nr;
+ NDiscDNSSL *dnssl;
+ NDiscRDNSS *rdnss;
+ int k, r = 0;
+
+ assert(link);
+ assert(router);
+
+ if (!force) {
+ bool set_callback = false;
+
+ if (!link->ndisc_addresses_configured || !link->ndisc_routes_configured)
+ return 0;
+
+ SET_FOREACH(na, link->ndisc_addresses)
+ if (!na->marked && IN6_ARE_ADDR_EQUAL(&na->router, router)) {
+ set_callback = true;
+ break;
+ }
+
+ if (set_callback)
+ SET_FOREACH(na, link->ndisc_addresses)
+ if (!na->marked && address_is_ready(na->address)) {
+ set_callback = false;
+ break;
+ }
+
+ if (set_callback) {
+ SET_FOREACH(na, link->ndisc_addresses)
+ if (!na->marked && IN6_ARE_ADDR_EQUAL(&na->router, router))
+ na->address->callback = ndisc_address_callback;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *buf = NULL;
+
+ (void) in_addr_to_string(AF_INET6, (union in_addr_union *) router, &buf);
+ log_link_debug(link, "No SLAAC address obtained from %s is ready. "
+ "The old NDisc information will be removed later.",
+ strna(buf));
+ }
+ return 0;
+ }
+ }
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *buf = NULL;
+
+ (void) in_addr_to_string(AF_INET6, (union in_addr_union *) router, &buf);
+ log_link_debug(link, "Removing old NDisc information obtained from %s.", strna(buf));
+ }
+
+ link_dirty(link);
+
+ SET_FOREACH(na, link->ndisc_addresses)
+ if (na->marked && IN6_ARE_ADDR_EQUAL(&na->router, router)) {
+ k = address_remove(na->address, link, NULL);
+ if (k < 0)
+ r = k;
+ }
+
+ SET_FOREACH(nr, link->ndisc_routes)
+ if (nr->marked && IN6_ARE_ADDR_EQUAL(&nr->router, router)) {
+ k = route_remove(nr->route, NULL, link, NULL);
+ if (k < 0)
+ r = k;
+ }
+
+ SET_FOREACH(rdnss, link->ndisc_rdnss)
+ if (rdnss->marked && IN6_ARE_ADDR_EQUAL(&rdnss->router, router))
+ free(set_remove(link->ndisc_rdnss, rdnss));
+
+ SET_FOREACH(dnssl, link->ndisc_dnssl)
+ if (dnssl->marked && IN6_ARE_ADDR_EQUAL(&dnssl->router, router))
+ free(set_remove(link->ndisc_dnssl, dnssl));
+
+ return r;
+}
+
+static int ndisc_remove_old(Link *link) {
+ _cleanup_set_free_free_ Set *routers = NULL;
+ _cleanup_free_ struct in6_addr *router = NULL;
+ struct in6_addr *a;
+ NDiscAddress *na;
+ NDiscRoute *nr;
+ NDiscDNSSL *dnssl;
+ NDiscRDNSS *rdnss;
+ int k, r;
+
+ assert(link);
+
+ routers = set_new(&in6_addr_hash_ops);
+ if (!routers)
+ return -ENOMEM;
+
+ SET_FOREACH(na, link->ndisc_addresses)
+ if (!set_contains(routers, &na->router)) {
+ router = newdup(struct in6_addr, &na->router, 1);
+ if (!router)
+ return -ENOMEM;
+
+ r = set_put(routers, router);
+ if (r < 0)
+ return r;
+
+ assert(r > 0);
+ TAKE_PTR(router);
+ }
+
+ SET_FOREACH(nr, link->ndisc_routes)
+ if (!set_contains(routers, &nr->router)) {
+ router = newdup(struct in6_addr, &nr->router, 1);
+ if (!router)
+ return -ENOMEM;
+
+ r = set_put(routers, router);
+ if (r < 0)
+ return r;
+
+ assert(r > 0);
+ TAKE_PTR(router);
+ }
+
+ SET_FOREACH(rdnss, link->ndisc_rdnss)
+ if (!set_contains(routers, &rdnss->router)) {
+ router = newdup(struct in6_addr, &rdnss->router, 1);
+ if (!router)
+ return -ENOMEM;
+
+ r = set_put(routers, router);
+ if (r < 0)
+ return r;
+
+ assert(r > 0);
+ TAKE_PTR(router);
+ }
+
+ SET_FOREACH(dnssl, link->ndisc_dnssl)
+ if (!set_contains(routers, &dnssl->router)) {
+ router = newdup(struct in6_addr, &dnssl->router, 1);
+ if (!router)
+ return -ENOMEM;
+
+ r = set_put(routers, router);
+ if (r < 0)
+ return r;
+
+ assert(r > 0);
+ TAKE_PTR(router);
+ }
+
+ r = 0;
+ SET_FOREACH(a, routers) {
+ k = ndisc_remove_old_one(link, a, false);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static void ndisc_route_hash_func(const NDiscRoute *x, struct siphash *state) {
+ route_hash_func(x->route, state);
+}
+
+static int ndisc_route_compare_func(const NDiscRoute *a, const NDiscRoute *b) {
+ return route_compare_func(a->route, b->route);
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(
+ ndisc_route_hash_ops,
+ NDiscRoute,
+ ndisc_route_hash_func,
+ ndisc_route_compare_func,
+ free);
+
+static int ndisc_route_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->ndisc_routes_messages > 0);
+
+ link->ndisc_routes_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_message_error_errno(link, m, r, "Could not set NDisc route");
+ link_enter_failed(link);
+ return 1;
+ }
+
+ if (link->ndisc_routes_messages == 0) {
+ log_link_debug(link, "NDisc routes set.");
+ link->ndisc_routes_configured = true;
+
+ r = ndisc_remove_old(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return 1;
+ }
+
+ link_check_ready(link);
+ }
+
+ return 1;
+}
+
+static int ndisc_route_configure(Route *route, Link *link, sd_ndisc_router *rt) {
+ _cleanup_free_ NDiscRoute *nr = NULL;
+ NDiscRoute *nr_exist;
+ struct in6_addr router;
+ Route *ret;
+ int r;
+
+ assert(route);
+ assert(link);
+ assert(rt);
+
+ r = route_configure(route, link, ndisc_route_handler, &ret);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to set NDisc route: %m");
+
+ link->ndisc_routes_messages++;
+
+ r = sd_ndisc_router_get_address(rt, &router);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get router address from RA: %m");
+
+ nr = new(NDiscRoute, 1);
+ if (!nr)
+ return log_oom();
+
+ *nr = (NDiscRoute) {
+ .router = router,
+ .route = ret,
+ };
+
+ nr_exist = set_get(link->ndisc_routes, nr);
+ if (nr_exist) {
+ nr_exist->marked = false;
+ nr_exist->router = router;
+ return 0;
+ }
+
+ r = set_ensure_put(&link->ndisc_routes, &ndisc_route_hash_ops, nr);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to store NDisc SLAAC route: %m");
+ assert(r > 0);
+ TAKE_PTR(nr);
+
+ return 0;
+}
+
+static void ndisc_address_hash_func(const NDiscAddress *x, struct siphash *state) {
+ address_hash_func(x->address, state);
+}
+
+static int ndisc_address_compare_func(const NDiscAddress *a, const NDiscAddress *b) {
+ return address_compare_func(a->address, b->address);
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(
+ ndisc_address_hash_ops,
+ NDiscAddress,
+ ndisc_address_hash_func,
+ ndisc_address_compare_func,
+ free);
+
+static int ndisc_address_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->ndisc_addresses_messages > 0);
+
+ link->ndisc_addresses_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_message_error_errno(link, m, r, "Could not set NDisc address");
+ link_enter_failed(link);
+ return 1;
+ } else if (r >= 0)
+ (void) manager_rtnl_process_address(rtnl, m, link->manager);
+
+ if (link->ndisc_addresses_messages == 0) {
+ log_link_debug(link, "NDisc SLAAC addresses set.");
+ link->ndisc_addresses_configured = true;
+
+ r = ndisc_remove_old(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return 1;
+ }
+
+ r = link_set_routes(link);
+ if (r < 0) {
+ link_enter_failed(link);
+ return 1;
+ }
+ }
+
+ return 1;
+}
+
+static int ndisc_address_configure(Address *address, Link *link, sd_ndisc_router *rt) {
+ _cleanup_free_ NDiscAddress *na = NULL;
+ NDiscAddress *na_exist;
+ struct in6_addr router;
+ Address *ret;
+ int r;
+
+ assert(address);
+ assert(link);
+ assert(rt);
+
+ r = address_configure(address, link, ndisc_address_handler, true, &ret);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to set NDisc SLAAC address: %m");
+
+ link->ndisc_addresses_messages++;
+
+ r = sd_ndisc_router_get_address(rt, &router);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get router address from RA: %m");
+
+ na = new(NDiscAddress, 1);
+ if (!na)
+ return log_oom();
+
+ *na = (NDiscAddress) {
+ .router = router,
+ .address = ret,
+ };
+
+ na_exist = set_get(link->ndisc_addresses, na);
+ if (na_exist) {
+ na_exist->marked = false;
+ na_exist->router = router;
+ return 0;
+ }
+
+ r = set_ensure_put(&link->ndisc_addresses, &ndisc_address_hash_ops, na);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to store NDisc SLAAC address: %m");
+ assert(r > 0);
+ TAKE_PTR(na);
+
+ return 0;
+}
+
+static int ndisc_router_process_default(Link *link, sd_ndisc_router *rt) {
+ _cleanup_(route_freep) Route *route = NULL;
+ union in_addr_union gateway;
+ uint16_t lifetime;
+ unsigned preference;
+ uint32_t table, mtu;
+ usec_t time_now;
+ int r;
+
+ assert(link);
+ assert(rt);
+
+ r = sd_ndisc_router_get_lifetime(rt, &lifetime);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get gateway lifetime from RA: %m");
+
+ if (lifetime == 0) /* not a default router */
+ return 0;
+
+ r = sd_ndisc_router_get_address(rt, &gateway.in6);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get gateway address from RA: %m");
+
+ if (address_exists(link, AF_INET6, &gateway)) {
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *buffer = NULL;
+
+ (void) in_addr_to_string(AF_INET6, &gateway, &buffer);
+ log_link_debug(link, "No NDisc route added, gateway %s matches local address",
+ strnull(buffer));
+ }
+ return 0;
+ }
+
+ r = sd_ndisc_router_get_preference(rt, &preference);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get default router preference from RA: %m");
+
+ r = sd_ndisc_router_get_timestamp(rt, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get RA timestamp: %m");
+
+ r = sd_ndisc_router_get_mtu(rt, &mtu);
+ if (r == -ENODATA)
+ mtu = 0;
+ else if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get default router MTU from RA: %m");
+
+ table = link_get_ipv6_accept_ra_route_table(link);
+
+ r = route_new(&route);
+ if (r < 0)
+ return log_oom();
+
+ route->family = AF_INET6;
+ route->table = table;
+ route->priority = link->network->dhcp6_route_metric;
+ route->protocol = RTPROT_RA;
+ route->pref = preference;
+ route->gw_family = AF_INET6;
+ route->gw = gateway;
+ route->lifetime = time_now + lifetime * USEC_PER_SEC;
+ route->mtu = mtu;
+
+ r = ndisc_route_configure(route, link, rt);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set default route: %m");
+
+ Route *route_gw;
+ HASHMAP_FOREACH(route_gw, link->network->routes_by_section) {
+ if (!route_gw->gateway_from_dhcp_or_ra)
+ continue;
+
+ if (route_gw->gw_family != AF_INET6)
+ continue;
+
+ route_gw->gw = gateway;
+ if (!route_gw->table_set)
+ route_gw->table = table;
+ if (!route_gw->priority_set)
+ route_gw->priority = link->network->dhcp6_route_metric;
+ if (!route_gw->protocol_set)
+ route_gw->protocol = RTPROT_RA;
+ if (!route_gw->pref_set)
+ route->pref = preference;
+ route_gw->lifetime = time_now + lifetime * USEC_PER_SEC;
+ if (route_gw->mtu == 0)
+ route_gw->mtu = mtu;
+
+ r = ndisc_route_configure(route_gw, link, rt);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set gateway: %m");
+ }
+
+ return 0;
+}
+
+static bool stableprivate_address_is_valid(const struct in6_addr *addr) {
+ assert(addr);
+
+ /* According to rfc4291, generated address should not be in the following ranges. */
+
+ if (memcmp(addr, &SUBNET_ROUTER_ANYCAST_ADDRESS_RFC4291, SUBNET_ROUTER_ANYCAST_PREFIXLEN) == 0)
+ return false;
+
+ if (memcmp(addr, &RESERVED_IPV6_INTERFACE_IDENTIFIERS_ADDRESS_RFC4291, RESERVED_IPV6_INTERFACE_IDENTIFIERS_PREFIXLEN) == 0)
+ return false;
+
+ if (memcmp(addr, &RESERVED_SUBNET_ANYCAST_ADDRESSES_RFC4291, RESERVED_SUBNET_ANYCAST_PREFIXLEN) == 0)
+ return false;
+
+ return true;
+}
+
+static int make_stableprivate_address(Link *link, const struct in6_addr *prefix, uint8_t prefix_len, uint8_t dad_counter, struct in6_addr **ret) {
+ _cleanup_free_ struct in6_addr *addr = NULL;
+ sd_id128_t secret_key;
+ struct siphash state;
+ uint64_t rid;
+ size_t l;
+ int r;
+
+ /* According to rfc7217 section 5.1
+ * RID = F(Prefix, Net_Iface, Network_ID, DAD_Counter, secret_key) */
+
+ r = sd_id128_get_machine_app_specific(NDISC_APP_ID, &secret_key);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate key: %m");
+
+ siphash24_init(&state, secret_key.bytes);
+
+ l = MAX(DIV_ROUND_UP(prefix_len, 8), 8);
+ siphash24_compress(prefix, l, &state);
+ siphash24_compress_string(link->ifname, &state);
+ /* Only last 8 bytes of IB MAC are stable */
+ if (link->iftype == ARPHRD_INFINIBAND)
+ siphash24_compress(&link->hw_addr.addr.infiniband[12], 8, &state);
+ else
+ siphash24_compress(link->hw_addr.addr.bytes, link->hw_addr.length, &state);
+ siphash24_compress(&dad_counter, sizeof(uint8_t), &state);
+
+ rid = htole64(siphash24_finalize(&state));
+
+ addr = new(struct in6_addr, 1);
+ if (!addr)
+ return log_oom();
+
+ memcpy(addr->s6_addr, prefix->s6_addr, l);
+ memcpy(addr->s6_addr + l, &rid, 16 - l);
+
+ if (!stableprivate_address_is_valid(addr)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ *ret = TAKE_PTR(addr);
+ return 1;
+}
+
+static int ndisc_router_generate_addresses(Link *link, struct in6_addr *address, uint8_t prefixlen, Set **ret) {
+ _cleanup_set_free_free_ Set *addresses = NULL;
+ IPv6Token *j;
+ int r;
+
+ assert(link);
+ assert(address);
+ assert(ret);
+
+ addresses = set_new(&in6_addr_hash_ops);
+ if (!addresses)
+ return log_oom();
+
+ ORDERED_SET_FOREACH(j, link->network->ipv6_tokens) {
+ _cleanup_free_ struct in6_addr *new_address = NULL;
+
+ if (j->address_generation_type == IPV6_TOKEN_ADDRESS_GENERATION_PREFIXSTABLE
+ && (IN6_IS_ADDR_UNSPECIFIED(&j->prefix) || IN6_ARE_ADDR_EQUAL(&j->prefix, address))) {
+ /* While this loop uses dad_counter and a retry limit as specified in RFC 7217, the loop
+ does not actually attempt Duplicate Address Detection; the counter will be incremented
+ only when the address generation algorithm produces an invalid address, and the loop
+ may exit with an address which ends up being unusable due to duplication on the link.
+ */
+ for (; j->dad_counter < DAD_CONFLICTS_IDGEN_RETRIES_RFC7217; j->dad_counter++) {
+ r = make_stableprivate_address(link, address, prefixlen, j->dad_counter, &new_address);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ break;
+ }
+ } else if (j->address_generation_type == IPV6_TOKEN_ADDRESS_GENERATION_STATIC) {
+ new_address = new(struct in6_addr, 1);
+ if (!new_address)
+ return log_oom();
+
+ memcpy(new_address->s6_addr, address->s6_addr, 8);
+ memcpy(new_address->s6_addr + 8, j->prefix.s6_addr + 8, 8);
+ }
+
+ if (new_address) {
+ r = set_put(addresses, new_address);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to store SLAAC address: %m");
+ else if (r == 0)
+ log_link_debug_errno(link, r, "Generated SLAAC address is duplicated, ignoring.");
+ else
+ TAKE_PTR(new_address);
+ }
+ }
+
+ /* fall back to EUI-64 if no tokens provided addresses */
+ if (set_isempty(addresses)) {
+ _cleanup_free_ struct in6_addr *new_address = NULL;
+
+ new_address = newdup(struct in6_addr, address, 1);
+ if (!new_address)
+ return log_oom();
+
+ r = generate_ipv6_eui_64_address(link, new_address);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to generate EUI64 address: %m");
+
+ r = set_put(addresses, new_address);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to store SLAAC address: %m");
+
+ TAKE_PTR(new_address);
+ }
+
+ *ret = TAKE_PTR(addresses);
+
+ return 0;
+}
+
+static int ndisc_router_process_autonomous_prefix(Link *link, sd_ndisc_router *rt) {
+ uint32_t lifetime_valid, lifetime_preferred, lifetime_remaining;
+ _cleanup_set_free_free_ Set *addresses = NULL;
+ _cleanup_(address_freep) Address *address = NULL;
+ struct in6_addr addr, *a;
+ unsigned prefixlen;
+ usec_t time_now;
+ int r;
+
+ assert(link);
+ assert(rt);
+
+ r = sd_ndisc_router_get_timestamp(rt, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get RA timestamp: %m");
+
+ r = sd_ndisc_router_prefix_get_prefixlen(rt, &prefixlen);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get prefix length: %m");
+
+ r = sd_ndisc_router_prefix_get_valid_lifetime(rt, &lifetime_valid);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get prefix valid lifetime: %m");
+
+ r = sd_ndisc_router_prefix_get_preferred_lifetime(rt, &lifetime_preferred);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get prefix preferred lifetime: %m");
+
+ /* The preferred lifetime is never greater than the valid lifetime */
+ if (lifetime_preferred > lifetime_valid)
+ return 0;
+
+ r = sd_ndisc_router_prefix_get_address(rt, &addr);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get prefix address: %m");
+
+ r = ndisc_router_generate_addresses(link, &addr, prefixlen, &addresses);
+ if (r < 0)
+ return r;
+
+ r = address_new(&address);
+ if (r < 0)
+ return log_oom();
+
+ address->family = AF_INET6;
+ address->prefixlen = prefixlen;
+ address->flags = IFA_F_NOPREFIXROUTE|IFA_F_MANAGETEMPADDR;
+ address->cinfo.ifa_prefered = lifetime_preferred;
+
+ SET_FOREACH(a, addresses) {
+ Address *existing_address;
+
+ address->in_addr.in6 = *a;
+
+ /* see RFC4862 section 5.5.3.e */
+ r = address_get(link, address, &existing_address);
+ if (r > 0) {
+ lifetime_remaining = existing_address->cinfo.tstamp / 100 + existing_address->cinfo.ifa_valid - time_now / USEC_PER_SEC;
+ if (lifetime_valid > NDISC_PREFIX_LFT_MIN || lifetime_valid > lifetime_remaining)
+ address->cinfo.ifa_valid = lifetime_valid;
+ else if (lifetime_remaining <= NDISC_PREFIX_LFT_MIN)
+ address->cinfo.ifa_valid = lifetime_remaining;
+ else
+ address->cinfo.ifa_valid = NDISC_PREFIX_LFT_MIN;
+ } else if (lifetime_valid > 0)
+ address->cinfo.ifa_valid = lifetime_valid;
+ else
+ continue; /* see RFC4862 section 5.5.3.d */
+
+ if (address->cinfo.ifa_valid == 0)
+ continue;
+
+ r = ndisc_address_configure(address, link, rt);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set SLAAC address: %m");
+ }
+
+ return 0;
+}
+
+static int ndisc_router_process_onlink_prefix(Link *link, sd_ndisc_router *rt) {
+ _cleanup_(route_freep) Route *route = NULL;
+ usec_t time_now;
+ uint32_t lifetime;
+ unsigned prefixlen;
+ int r;
+
+ assert(link);
+ assert(rt);
+
+ r = sd_ndisc_router_get_timestamp(rt, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get RA timestamp: %m");
+
+ r = sd_ndisc_router_prefix_get_prefixlen(rt, &prefixlen);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get prefix length: %m");
+
+ r = sd_ndisc_router_prefix_get_valid_lifetime(rt, &lifetime);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get prefix lifetime: %m");
+
+ r = route_new(&route);
+ if (r < 0)
+ return log_oom();
+
+ route->family = AF_INET6;
+ route->table = link_get_ipv6_accept_ra_route_table(link);
+ route->priority = link->network->dhcp6_route_metric;
+ route->protocol = RTPROT_RA;
+ route->flags = RTM_F_PREFIX;
+ route->dst_prefixlen = prefixlen;
+ route->lifetime = time_now + lifetime * USEC_PER_SEC;
+
+ r = sd_ndisc_router_prefix_get_address(rt, &route->dst.in6);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get prefix address: %m");
+
+ r = ndisc_route_configure(route, link, rt);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set prefix route: %m");;
+
+ return 0;
+}
+
+static int ndisc_router_process_route(Link *link, sd_ndisc_router *rt) {
+ _cleanup_(route_freep) Route *route = NULL;
+ struct in6_addr gateway;
+ uint32_t lifetime;
+ unsigned preference, prefixlen;
+ usec_t time_now;
+ int r;
+
+ assert(link);
+
+ r = sd_ndisc_router_route_get_lifetime(rt, &lifetime);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get gateway lifetime from RA: %m");
+
+ if (lifetime == 0)
+ return 0;
+
+ r = sd_ndisc_router_get_address(rt, &gateway);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get gateway address from RA: %m");
+
+ r = sd_ndisc_router_route_get_prefixlen(rt, &prefixlen);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get route prefix length: %m");
+
+ r = sd_ndisc_router_route_get_preference(rt, &preference);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get default router preference from RA: %m");
+
+ r = sd_ndisc_router_get_timestamp(rt, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get RA timestamp: %m");
+
+ r = route_new(&route);
+ if (r < 0)
+ return log_oom();
+
+ route->family = AF_INET6;
+ route->table = link_get_ipv6_accept_ra_route_table(link);
+ route->priority = link->network->dhcp6_route_metric;
+ route->protocol = RTPROT_RA;
+ route->pref = preference;
+ route->gw.in6 = gateway;
+ route->gw_family = AF_INET6;
+ route->dst_prefixlen = prefixlen;
+ route->lifetime = time_now + lifetime * USEC_PER_SEC;
+
+ r = sd_ndisc_router_route_get_address(rt, &route->dst.in6);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get route address: %m");
+
+ r = ndisc_route_configure(route, link, rt);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set additional route: %m");
+
+ return 0;
+}
+
+static void ndisc_rdnss_hash_func(const NDiscRDNSS *x, struct siphash *state) {
+ siphash24_compress(&x->address, sizeof(x->address), state);
+}
+
+static int ndisc_rdnss_compare_func(const NDiscRDNSS *a, const NDiscRDNSS *b) {
+ return memcmp(&a->address, &b->address, sizeof(a->address));
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(
+ ndisc_rdnss_hash_ops,
+ NDiscRDNSS,
+ ndisc_rdnss_hash_func,
+ ndisc_rdnss_compare_func,
+ free);
+
+static int ndisc_router_process_rdnss(Link *link, sd_ndisc_router *rt) {
+ uint32_t lifetime;
+ const struct in6_addr *a;
+ struct in6_addr router;
+ NDiscRDNSS *rdnss;
+ usec_t time_now;
+ int n, r;
+
+ assert(link);
+ assert(rt);
+
+ r = sd_ndisc_router_get_address(rt, &router);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get router address from RA: %m");
+
+ r = sd_ndisc_router_get_timestamp(rt, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get RA timestamp: %m");
+
+ r = sd_ndisc_router_rdnss_get_lifetime(rt, &lifetime);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get RDNSS lifetime: %m");
+
+ n = sd_ndisc_router_rdnss_get_addresses(rt, &a);
+ if (n < 0)
+ return log_link_error_errno(link, n, "Failed to get RDNSS addresses: %m");
+
+ SET_FOREACH(rdnss, link->ndisc_rdnss)
+ if (IN6_ARE_ADDR_EQUAL(&rdnss->router, &router))
+ rdnss->marked = true;
+
+ if (lifetime == 0)
+ return 0;
+
+ if (n >= (int) NDISC_RDNSS_MAX) {
+ log_link_warning(link, "Too many RDNSS records per link. Only first %i records will be used.", NDISC_RDNSS_MAX);
+ n = NDISC_RDNSS_MAX;
+ }
+
+ for (int j = 0; j < n; j++) {
+ _cleanup_free_ NDiscRDNSS *x = NULL;
+ NDiscRDNSS d = {
+ .address = a[j],
+ };
+
+ rdnss = set_get(link->ndisc_rdnss, &d);
+ if (rdnss) {
+ rdnss->marked = false;
+ rdnss->router = router;
+ rdnss->valid_until = time_now + lifetime * USEC_PER_SEC;
+ continue;
+ }
+
+ x = new(NDiscRDNSS, 1);
+ if (!x)
+ return log_oom();
+
+ *x = (NDiscRDNSS) {
+ .address = a[j],
+ .router = router,
+ .valid_until = time_now + lifetime * USEC_PER_SEC,
+ };
+
+ r = set_ensure_consume(&link->ndisc_rdnss, &ndisc_rdnss_hash_ops, TAKE_PTR(x));
+ if (r < 0)
+ return log_oom();
+ assert(r > 0);
+ }
+
+ return 0;
+}
+
+static void ndisc_dnssl_hash_func(const NDiscDNSSL *x, struct siphash *state) {
+ siphash24_compress_string(NDISC_DNSSL_DOMAIN(x), state);
+}
+
+static int ndisc_dnssl_compare_func(const NDiscDNSSL *a, const NDiscDNSSL *b) {
+ return strcmp(NDISC_DNSSL_DOMAIN(a), NDISC_DNSSL_DOMAIN(b));
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(
+ ndisc_dnssl_hash_ops,
+ NDiscDNSSL,
+ ndisc_dnssl_hash_func,
+ ndisc_dnssl_compare_func,
+ free);
+
+static int ndisc_router_process_dnssl(Link *link, sd_ndisc_router *rt) {
+ _cleanup_strv_free_ char **l = NULL;
+ struct in6_addr router;
+ uint32_t lifetime;
+ usec_t time_now;
+ NDiscDNSSL *dnssl;
+ char **j;
+ int r;
+
+ assert(link);
+ assert(rt);
+
+ r = sd_ndisc_router_get_address(rt, &router);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get router address from RA: %m");
+
+ r = sd_ndisc_router_get_timestamp(rt, clock_boottime_or_monotonic(), &time_now);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get RA timestamp: %m");
+
+ r = sd_ndisc_router_dnssl_get_lifetime(rt, &lifetime);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get DNSSL lifetime: %m");
+
+ r = sd_ndisc_router_dnssl_get_domains(rt, &l);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get DNSSL addresses: %m");
+
+ SET_FOREACH(dnssl, link->ndisc_dnssl)
+ if (IN6_ARE_ADDR_EQUAL(&dnssl->router, &router))
+ dnssl->marked = true;
+
+ if (lifetime == 0)
+ return 0;
+
+ if (strv_length(l) >= NDISC_DNSSL_MAX) {
+ log_link_warning(link, "Too many DNSSL records per link. Only first %i records will be used.", NDISC_DNSSL_MAX);
+ STRV_FOREACH(j, l + NDISC_DNSSL_MAX)
+ *j = mfree(*j);
+ }
+
+ STRV_FOREACH(j, l) {
+ _cleanup_free_ NDiscDNSSL *s = NULL;
+
+ s = malloc0(ALIGN(sizeof(NDiscDNSSL)) + strlen(*j) + 1);
+ if (!s)
+ return log_oom();
+
+ strcpy(NDISC_DNSSL_DOMAIN(s), *j);
+
+ dnssl = set_get(link->ndisc_dnssl, s);
+ if (dnssl) {
+ dnssl->marked = false;
+ dnssl->router = router;
+ dnssl->valid_until = time_now + lifetime * USEC_PER_SEC;
+ continue;
+ }
+
+ s->router = router;
+ s->valid_until = time_now + lifetime * USEC_PER_SEC;
+
+ r = set_ensure_consume(&link->ndisc_dnssl, &ndisc_dnssl_hash_ops, TAKE_PTR(s));
+ if (r < 0)
+ return log_oom();
+ assert(r > 0);
+ }
+
+ return 0;
+}
+
+static int ndisc_router_process_options(Link *link, sd_ndisc_router *rt) {
+ assert(link);
+ assert(link->network);
+ assert(rt);
+
+ for (int r = sd_ndisc_router_option_rewind(rt); ; r = sd_ndisc_router_option_next(rt)) {
+ uint8_t type;
+
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to iterate through options: %m");
+ if (r == 0) /* EOF */
+ return 0;
+
+ r = sd_ndisc_router_option_get_type(rt, &type);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get RA option type: %m");
+
+ switch (type) {
+
+ case SD_NDISC_OPTION_PREFIX_INFORMATION: {
+ union in_addr_union a;
+ uint8_t flags;
+
+ r = sd_ndisc_router_prefix_get_address(rt, &a.in6);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get prefix address: %m");
+
+ if (set_contains(link->network->ndisc_deny_listed_prefix, &a.in6)) {
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *b = NULL;
+
+ (void) in_addr_to_string(AF_INET6, &a, &b);
+ log_link_debug(link, "Prefix '%s' is deny-listed, ignoring", strna(b));
+ }
+ break;
+ }
+
+ r = sd_ndisc_router_prefix_get_flags(rt, &flags);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get RA prefix flags: %m");
+
+ if (link->network->ipv6_accept_ra_use_onlink_prefix &&
+ FLAGS_SET(flags, ND_OPT_PI_FLAG_ONLINK)) {
+ r = ndisc_router_process_onlink_prefix(link, rt);
+ if (r < 0)
+ return r;
+ }
+
+ if (link->network->ipv6_accept_ra_use_autonomous_prefix &&
+ FLAGS_SET(flags, ND_OPT_PI_FLAG_AUTO)) {
+ r = ndisc_router_process_autonomous_prefix(link, rt);
+ if (r < 0)
+ return r;
+ }
+ break;
+ }
+
+ case SD_NDISC_OPTION_ROUTE_INFORMATION:
+ r = ndisc_router_process_route(link, rt);
+ if (r < 0)
+ return r;
+ break;
+
+ case SD_NDISC_OPTION_RDNSS:
+ if (link->network->ipv6_accept_ra_use_dns) {
+ r = ndisc_router_process_rdnss(link, rt);
+ if (r < 0)
+ return r;
+ }
+ break;
+
+ case SD_NDISC_OPTION_DNSSL:
+ if (link->network->ipv6_accept_ra_use_dns) {
+ r = ndisc_router_process_dnssl(link, rt);
+ if (r < 0)
+ return r;
+ }
+ break;
+ }
+ }
+}
+
+static int ndisc_router_handler(Link *link, sd_ndisc_router *rt) {
+ struct in6_addr router;
+ uint64_t flags;
+ NDiscAddress *na;
+ NDiscRoute *nr;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->manager);
+ assert(rt);
+
+ link->ndisc_addresses_configured = false;
+ link->ndisc_routes_configured = false;
+
+ link_dirty(link);
+
+ r = sd_ndisc_router_get_address(rt, &router);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get router address from RA: %m");
+
+ SET_FOREACH(na, link->ndisc_addresses)
+ if (IN6_ARE_ADDR_EQUAL(&na->router, &router))
+ na->marked = true;
+
+ SET_FOREACH(nr, link->ndisc_routes)
+ if (IN6_ARE_ADDR_EQUAL(&nr->router, &router))
+ nr->marked = true;
+
+ r = sd_ndisc_router_get_flags(rt, &flags);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get RA flags: %m");
+
+ if ((flags & (ND_RA_FLAG_MANAGED | ND_RA_FLAG_OTHER) &&
+ link->network->ipv6_accept_ra_start_dhcp6_client != IPV6_ACCEPT_RA_START_DHCP6_CLIENT_NO) ||
+ link->network->ipv6_accept_ra_start_dhcp6_client == IPV6_ACCEPT_RA_START_DHCP6_CLIENT_ALWAYS) {
+
+ if (flags & (ND_RA_FLAG_MANAGED | ND_RA_FLAG_OTHER))
+ /* (re)start DHCPv6 client in stateful or stateless mode according to RA flags */
+ r = dhcp6_request_address(link, !(flags & ND_RA_FLAG_MANAGED));
+ else
+ /* When IPv6AcceptRA.DHCPv6Client=always, start dhcp6 client in managed mode
+ * even if router does not have M or O flag. */
+ r = dhcp6_request_address(link, false);
+ if (r < 0 && r != -EBUSY)
+ return log_link_error_errno(link, r, "Could not acquire DHCPv6 lease on NDisc request: %m");
+ else
+ log_link_debug(link, "Acquiring DHCPv6 lease on NDisc request");
+ }
+
+ r = ndisc_router_process_default(link, rt);
+ if (r < 0)
+ return r;
+ r = ndisc_router_process_options(link, rt);
+ if (r < 0)
+ return r;
+
+ if (link->ndisc_addresses_messages == 0)
+ link->ndisc_addresses_configured = true;
+ else {
+ log_link_debug(link, "Setting SLAAC addresses.");
+
+ /* address_handler calls link_set_routes() and link_set_nexthop(). Before they are
+ * called, the related flags must be cleared. Otherwise, the link becomes configured
+ * state before routes are configured. */
+ link->static_routes_configured = false;
+ link->static_nexthops_configured = false;
+ }
+
+ if (link->ndisc_routes_messages == 0)
+ link->ndisc_routes_configured = true;
+ else
+ log_link_debug(link, "Setting NDisc routes.");
+
+ r = ndisc_remove_old(link);
+ if (r < 0)
+ return r;
+
+ if (link->ndisc_addresses_configured && link->ndisc_routes_configured)
+ link_check_ready(link);
+ else
+ link_set_state(link, LINK_STATE_CONFIGURING);
+
+ return 0;
+}
+
+static void ndisc_handler(sd_ndisc *nd, sd_ndisc_event event, sd_ndisc_router *rt, void *userdata) {
+ Link *link = userdata;
+ int r;
+
+ assert(link);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return;
+
+ switch (event) {
+
+ case SD_NDISC_EVENT_ROUTER:
+ r = ndisc_router_handler(link, rt);
+ if (r < 0) {
+ link_enter_failed(link);
+ return;
+ }
+ break;
+
+ case SD_NDISC_EVENT_TIMEOUT:
+ log_link_debug(link, "NDisc handler get timeout event");
+ if (link->ndisc_addresses_messages == 0 && link->ndisc_routes_messages == 0) {
+ link->ndisc_addresses_configured = true;
+ link->ndisc_routes_configured = true;
+ link_check_ready(link);
+ }
+ break;
+ default:
+ assert_not_reached("Unknown NDisc event");
+ }
+}
+
+int ndisc_configure(Link *link) {
+ int r;
+
+ assert(link);
+
+ if (!link_ipv6_accept_ra_enabled(link))
+ return 0;
+
+ if (!link->ndisc) {
+ r = sd_ndisc_new(&link->ndisc);
+ if (r < 0)
+ return r;
+
+ r = sd_ndisc_attach_event(link->ndisc, link->manager->event, 0);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_ndisc_set_mac(link->ndisc, &link->hw_addr.addr.ether);
+ if (r < 0)
+ return r;
+
+ r = sd_ndisc_set_ifindex(link->ndisc, link->ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_ndisc_set_callback(link->ndisc, ndisc_handler, link);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+void ndisc_vacuum(Link *link) {
+ NDiscRDNSS *r;
+ NDiscDNSSL *d;
+ usec_t time_now;
+ bool updated = false;
+
+ assert(link);
+
+ /* Removes all RDNSS and DNSSL entries whose validity time has passed */
+
+ time_now = now(clock_boottime_or_monotonic());
+
+ SET_FOREACH(r, link->ndisc_rdnss)
+ if (r->valid_until < time_now) {
+ free(set_remove(link->ndisc_rdnss, r));
+ updated = true;
+ }
+
+ SET_FOREACH(d, link->ndisc_dnssl)
+ if (d->valid_until < time_now) {
+ free(set_remove(link->ndisc_dnssl, d));
+ updated = true;
+ }
+
+ if (updated)
+ link_dirty(link);
+}
+
+void ndisc_flush(Link *link) {
+ assert(link);
+
+ /* Removes all RDNSS and DNSSL entries, without exception */
+
+ link->ndisc_rdnss = set_free(link->ndisc_rdnss);
+ link->ndisc_dnssl = set_free(link->ndisc_dnssl);
+}
+
+int ipv6token_new(IPv6Token **ret) {
+ IPv6Token *p;
+
+ p = new(IPv6Token, 1);
+ if (!p)
+ return -ENOMEM;
+
+ *p = (IPv6Token) {
+ .address_generation_type = IPV6_TOKEN_ADDRESS_GENERATION_NONE,
+ };
+
+ *ret = TAKE_PTR(p);
+
+ return 0;
+}
+
+static void ipv6_token_hash_func(const IPv6Token *p, struct siphash *state) {
+ siphash24_compress(&p->address_generation_type, sizeof(p->address_generation_type), state);
+ siphash24_compress(&p->prefix, sizeof(p->prefix), state);
+}
+
+static int ipv6_token_compare_func(const IPv6Token *a, const IPv6Token *b) {
+ int r;
+
+ r = CMP(a->address_generation_type, b->address_generation_type);
+ if (r != 0)
+ return r;
+
+ return memcmp(&a->prefix, &b->prefix, sizeof(struct in6_addr));
+}
+
+DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(
+ ipv6_token_hash_ops,
+ IPv6Token,
+ ipv6_token_hash_func,
+ ipv6_token_compare_func,
+ free);
+
+int config_parse_ndisc_deny_listed_prefix(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = data;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ network->ndisc_deny_listed_prefix = set_free_free(network->ndisc_deny_listed_prefix);
+ return 0;
+ }
+
+ for (p = rvalue;;) {
+ _cleanup_free_ char *n = NULL;
+ _cleanup_free_ struct in6_addr *a = NULL;
+ union in_addr_union ip;
+
+ r = extract_first_word(&p, &n, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse NDisc deny-listed prefix, ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ r = in_addr_from_string(AF_INET6, n, &ip);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "NDisc deny-listed prefix is invalid, ignoring assignment: %s", n);
+ continue;
+ }
+
+ if (set_contains(network->ndisc_deny_listed_prefix, &ip.in6))
+ continue;
+
+ a = newdup(struct in6_addr, &ip.in6, 1);
+ if (!a)
+ return log_oom();
+
+ r = set_ensure_consume(&network->ndisc_deny_listed_prefix, &in6_addr_hash_ops, TAKE_PTR(a));
+ if (r < 0)
+ return log_oom();
+ }
+}
+
+int config_parse_address_generation_type(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ IPv6Token *token = NULL;
+ union in_addr_union buffer;
+ Network *network = data;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ network->ipv6_tokens = ordered_set_free(network->ipv6_tokens);
+ return 0;
+ }
+
+ r = ipv6token_new(&token);
+ if (r < 0)
+ return log_oom();
+
+ if ((p = startswith(rvalue, "prefixstable"))) {
+ token->address_generation_type = IPV6_TOKEN_ADDRESS_GENERATION_PREFIXSTABLE;
+ if (*p == ':')
+ p++;
+ else if (*p == '\0')
+ p = NULL;
+ else {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid IPv6 token mode in %s=, ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+ } else {
+ token->address_generation_type = IPV6_TOKEN_ADDRESS_GENERATION_STATIC;
+ p = startswith(rvalue, "static:");
+ if (!p)
+ p = rvalue;
+ }
+
+ if (p) {
+ r = in_addr_from_string(AF_INET6, p, &buffer);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse IP address in %s=, ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+ if (token->address_generation_type == IPV6_TOKEN_ADDRESS_GENERATION_STATIC &&
+ in_addr_is_null(AF_INET6, &buffer)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "IPv6 address in %s= cannot be the ANY address, ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+ token->prefix = buffer.in6;
+ }
+
+ r = ordered_set_ensure_allocated(&network->ipv6_tokens, &ipv6_token_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = ordered_set_put(network->ipv6_tokens, token);
+ if (r == -EEXIST)
+ log_syntax(unit, LOG_DEBUG, filename, line, r,
+ "IPv6 token '%s' is duplicated, ignoring: %m", rvalue);
+ else if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to store IPv6 token '%s', ignoring: %m", rvalue);
+ else
+ TAKE_PTR(token);
+
+ return 0;
+}
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_ipv6_accept_ra_start_dhcp6_client, ipv6_accept_ra_start_dhcp6_client, IPv6AcceptRAStartDHCP6Client,
+ "Failed to parse DHCPv6Client= setting")
+static const char* const ipv6_accept_ra_start_dhcp6_client_table[_IPV6_ACCEPT_RA_START_DHCP6_CLIENT_MAX] = {
+ [IPV6_ACCEPT_RA_START_DHCP6_CLIENT_NO] = "no",
+ [IPV6_ACCEPT_RA_START_DHCP6_CLIENT_ALWAYS] = "always",
+ [IPV6_ACCEPT_RA_START_DHCP6_CLIENT_YES] = "yes",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(ipv6_accept_ra_start_dhcp6_client, IPv6AcceptRAStartDHCP6Client, IPV6_ACCEPT_RA_START_DHCP6_CLIENT_YES);
diff --git a/src/network/networkd-ndisc.h b/src/network/networkd-ndisc.h
new file mode 100644
index 0000000..1562411
--- /dev/null
+++ b/src/network/networkd-ndisc.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "conf-parser.h"
+#include "networkd-address.h"
+#include "networkd-link.h"
+#include "networkd-route.h"
+#include "time-util.h"
+
+typedef struct IPv6Token IPv6Token;
+
+typedef enum IPv6TokenAddressGeneration {
+ IPV6_TOKEN_ADDRESS_GENERATION_NONE,
+ IPV6_TOKEN_ADDRESS_GENERATION_STATIC,
+ IPV6_TOKEN_ADDRESS_GENERATION_PREFIXSTABLE,
+ _IPV6_TOKEN_ADDRESS_GENERATION_MAX,
+ _IPV6_TOKEN_ADDRESS_GENERATION_INVALID = -1,
+} IPv6TokenAddressGeneration;
+
+typedef enum IPv6AcceptRAStartDHCP6Client {
+ IPV6_ACCEPT_RA_START_DHCP6_CLIENT_NO,
+ IPV6_ACCEPT_RA_START_DHCP6_CLIENT_ALWAYS,
+ IPV6_ACCEPT_RA_START_DHCP6_CLIENT_YES,
+ _IPV6_ACCEPT_RA_START_DHCP6_CLIENT_MAX,
+ _IPV6_ACCEPT_RA_START_DHCP6_CLIENT_INVALID = -1,
+} IPv6AcceptRAStartDHCP6Client;
+
+typedef struct NDiscAddress {
+ /* Used when GC'ing old DNS servers when configuration changes. */
+ bool marked;
+ struct in6_addr router;
+ Address *address;
+} NDiscAddress;
+
+typedef struct NDiscRoute {
+ /* Used when GC'ing old DNS servers when configuration changes. */
+ bool marked;
+ struct in6_addr router;
+ Route *route;
+} NDiscRoute;
+
+typedef struct NDiscRDNSS {
+ /* Used when GC'ing old DNS servers when configuration changes. */
+ bool marked;
+ struct in6_addr router;
+ usec_t valid_until;
+ struct in6_addr address;
+} NDiscRDNSS;
+
+typedef struct NDiscDNSSL {
+ /* Used when GC'ing old domains when configuration changes. */
+ bool marked;
+ struct in6_addr router;
+ usec_t valid_until;
+ /* The domain name follows immediately. */
+} NDiscDNSSL;
+
+struct IPv6Token {
+ IPv6TokenAddressGeneration address_generation_type;
+
+ uint8_t dad_counter;
+ struct in6_addr prefix;
+};
+
+int ipv6token_new(IPv6Token **ret);
+DEFINE_TRIVIAL_CLEANUP_FUNC(IPv6Token *, freep);
+
+static inline char* NDISC_DNSSL_DOMAIN(const NDiscDNSSL *n) {
+ return ((char*) n) + ALIGN(sizeof(NDiscDNSSL));
+}
+
+bool link_ipv6_accept_ra_enabled(Link *link);
+
+void network_adjust_ipv6_accept_ra(Network *network);
+
+int ndisc_configure(Link *link);
+void ndisc_vacuum(Link *link);
+void ndisc_flush(Link *link);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_ndisc_deny_listed_prefix);
+CONFIG_PARSER_PROTOTYPE(config_parse_address_generation_type);
+CONFIG_PARSER_PROTOTYPE(config_parse_ipv6_accept_ra_start_dhcp6_client);
+
+const char* ipv6_accept_ra_start_dhcp6_client_to_string(IPv6AcceptRAStartDHCP6Client i) _const_;
+IPv6AcceptRAStartDHCP6Client ipv6_accept_ra_start_dhcp6_client_from_string(const char *s) _pure_;
diff --git a/src/network/networkd-neighbor.c b/src/network/networkd-neighbor.c
new file mode 100644
index 0000000..c805d52
--- /dev/null
+++ b/src/network/networkd-neighbor.c
@@ -0,0 +1,725 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "hashmap.h"
+#include "netlink-util.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "networkd-neighbor.h"
+#include "networkd-network.h"
+#include "set.h"
+
+Neighbor *neighbor_free(Neighbor *neighbor) {
+ if (!neighbor)
+ return NULL;
+
+ if (neighbor->network) {
+ assert(neighbor->section);
+ hashmap_remove(neighbor->network->neighbors_by_section, neighbor->section);
+ }
+
+ network_config_section_free(neighbor->section);
+
+ if (neighbor->link) {
+ set_remove(neighbor->link->neighbors, neighbor);
+ set_remove(neighbor->link->neighbors_foreign, neighbor);
+ }
+
+ return mfree(neighbor);
+}
+
+DEFINE_NETWORK_SECTION_FUNCTIONS(Neighbor, neighbor_free);
+
+static int neighbor_new_static(Network *network, const char *filename, unsigned section_line, Neighbor **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(neighbor_freep) Neighbor *neighbor = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(filename);
+ assert(section_line > 0);
+
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ neighbor = hashmap_get(network->neighbors_by_section, n);
+ if (neighbor) {
+ *ret = TAKE_PTR(neighbor);
+ return 0;
+ }
+
+ neighbor = new(Neighbor, 1);
+ if (!neighbor)
+ return -ENOMEM;
+
+ *neighbor = (Neighbor) {
+ .network = network,
+ .family = AF_UNSPEC,
+ .section = TAKE_PTR(n),
+ };
+
+ r = hashmap_ensure_allocated(&network->neighbors_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(network->neighbors_by_section, neighbor->section, neighbor);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(neighbor);
+ return 0;
+}
+
+static void neighbor_hash_func(const Neighbor *neighbor, struct siphash *state) {
+ assert(neighbor);
+
+ siphash24_compress(&neighbor->family, sizeof(neighbor->family), state);
+ siphash24_compress(&neighbor->lladdr_size, sizeof(neighbor->lladdr_size), state);
+
+ switch (neighbor->family) {
+ case AF_INET:
+ case AF_INET6:
+ /* Equality of neighbors are given by the pair (addr,lladdr) */
+ siphash24_compress(&neighbor->in_addr, FAMILY_ADDRESS_SIZE(neighbor->family), state);
+ break;
+ default:
+ /* treat any other address family as AF_UNSPEC */
+ break;
+ }
+
+ siphash24_compress(&neighbor->lladdr, neighbor->lladdr_size, state);
+}
+
+static int neighbor_compare_func(const Neighbor *a, const Neighbor *b) {
+ int r;
+
+ r = CMP(a->family, b->family);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->lladdr_size, b->lladdr_size);
+ if (r != 0)
+ return r;
+
+ switch (a->family) {
+ case AF_INET:
+ case AF_INET6:
+ r = memcmp(&a->in_addr, &b->in_addr, FAMILY_ADDRESS_SIZE(a->family));
+ if (r != 0)
+ return r;
+ }
+
+ return memcmp(&a->lladdr, &b->lladdr, a->lladdr_size);
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(neighbor_hash_ops, Neighbor, neighbor_hash_func, neighbor_compare_func, neighbor_free);
+
+static int neighbor_get(Link *link, const Neighbor *in, Neighbor **ret) {
+ Neighbor *existing;
+
+ assert(link);
+ assert(in);
+
+ existing = set_get(link->neighbors, in);
+ if (existing) {
+ if (ret)
+ *ret = existing;
+ return 1;
+ }
+
+ existing = set_get(link->neighbors_foreign, in);
+ if (existing) {
+ if (ret)
+ *ret = existing;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static int neighbor_add_internal(Link *link, Set **neighbors, const Neighbor *in, Neighbor **ret) {
+ _cleanup_(neighbor_freep) Neighbor *neighbor = NULL;
+ int r;
+
+ assert(link);
+ assert(neighbors);
+ assert(in);
+
+ neighbor = new(Neighbor, 1);
+ if (!neighbor)
+ return -ENOMEM;
+
+ *neighbor = (Neighbor) {
+ .family = in->family,
+ .in_addr = in->in_addr,
+ .lladdr = in->lladdr,
+ .lladdr_size = in->lladdr_size,
+ };
+
+ r = set_ensure_put(neighbors, &neighbor_hash_ops, neighbor);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EEXIST;
+
+ neighbor->link = link;
+
+ if (ret)
+ *ret = neighbor;
+
+ TAKE_PTR(neighbor);
+ return 0;
+}
+
+static int neighbor_add(Link *link, const Neighbor *in, Neighbor **ret) {
+ Neighbor *neighbor;
+ int r;
+
+ r = neighbor_get(link, in, &neighbor);
+ if (r == -ENOENT) {
+ /* Neighbor doesn't exist, make a new one */
+ r = neighbor_add_internal(link, &link->neighbors, in, &neighbor);
+ if (r < 0)
+ return r;
+ } else if (r == 0) {
+ /* Neighbor is foreign, claim it as recognized */
+ r = set_ensure_put(&link->neighbors, &neighbor_hash_ops, neighbor);
+ if (r < 0)
+ return r;
+
+ set_remove(link->neighbors_foreign, neighbor);
+ } else if (r == 1) {
+ /* Neighbor already exists */
+ } else
+ return r;
+
+ if (ret)
+ *ret = neighbor;
+ return 0;
+}
+
+static int neighbor_add_foreign(Link *link, const Neighbor *in, Neighbor **ret) {
+ return neighbor_add_internal(link, &link->neighbors_foreign, in, ret);
+}
+
+static bool neighbor_equal(const Neighbor *n1, const Neighbor *n2) {
+ if (n1 == n2)
+ return true;
+
+ if (!n1 || !n2)
+ return false;
+
+ return neighbor_compare_func(n1, n2) == 0;
+}
+
+static int neighbor_configure_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(m);
+ assert(link);
+ assert(link->neighbor_messages > 0);
+
+ link->neighbor_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST)
+ /* Neighbor may not exist yet. So, do not enter failed state here. */
+ log_link_message_warning_errno(link, m, r, "Could not set neighbor, ignoring");
+
+ if (link->neighbor_messages == 0) {
+ log_link_debug(link, "Neighbors set");
+ link->neighbors_configured = true;
+ link_check_ready(link);
+ }
+
+ return 1;
+}
+
+static int neighbor_configure(Neighbor *neighbor, Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(neighbor);
+ assert(link);
+ assert(link->ifindex > 0);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ r = sd_rtnl_message_new_neigh(link->manager->rtnl, &req, RTM_NEWNEIGH,
+ link->ifindex, neighbor->family);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_NEWNEIGH message: %m");
+
+ r = sd_rtnl_message_neigh_set_state(req, NUD_PERMANENT);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set state: %m");
+
+ r = sd_netlink_message_set_flags(req, NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_REPLACE);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set flags: %m");
+
+ r = sd_netlink_message_append_data(req, NDA_LLADDR, &neighbor->lladdr, neighbor->lladdr_size);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append NDA_LLADDR attribute: %m");
+
+ r = netlink_message_append_in_addr_union(req, NDA_DST, neighbor->family, &neighbor->in_addr);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append NDA_DST attribute: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, neighbor_configure_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link->neighbor_messages++;
+ link_ref(link);
+
+ r = neighbor_add(link, neighbor, NULL);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not add neighbor: %m");
+
+ return 0;
+}
+
+int link_set_neighbors(Link *link) {
+ Neighbor *neighbor;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->state != _LINK_STATE_INVALID);
+
+ link->neighbors_configured = false;
+
+ HASHMAP_FOREACH(neighbor, link->network->neighbors_by_section) {
+ r = neighbor_configure(neighbor, link);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not set neighbor: %m");
+ }
+
+ if (link->neighbor_messages == 0) {
+ link->neighbors_configured = true;
+ link_check_ready(link);
+ } else {
+ log_link_debug(link, "Setting neighbors");
+ link_set_state(link, LINK_STATE_CONFIGURING);
+ }
+
+ return 0;
+}
+
+static int neighbor_remove_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(m);
+ assert(link);
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -ESRCH)
+ /* Neighbor may not exist because it already got deleted, ignore that. */
+ log_link_message_warning_errno(link, m, r, "Could not remove neighbor");
+
+ return 1;
+}
+
+static int neighbor_remove(Neighbor *neighbor, Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(neighbor);
+ assert(link);
+ assert(link->ifindex > 0);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ r = sd_rtnl_message_new_neigh(link->manager->rtnl, &req, RTM_DELNEIGH,
+ link->ifindex, neighbor->family);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_DELNEIGH message: %m");
+
+ r = netlink_message_append_in_addr_union(req, NDA_DST, neighbor->family, &neighbor->in_addr);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append NDA_DST attribute: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, neighbor_remove_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static bool link_is_neighbor_configured(Link *link, Neighbor *neighbor) {
+ Neighbor *net_neighbor;
+
+ assert(link);
+ assert(neighbor);
+
+ if (!link->network)
+ return false;
+
+ HASHMAP_FOREACH(net_neighbor, link->network->neighbors_by_section)
+ if (neighbor_equal(net_neighbor, neighbor))
+ return true;
+
+ return false;
+}
+
+int link_drop_foreign_neighbors(Link *link) {
+ Neighbor *neighbor;
+ int r;
+
+ assert(link);
+
+ SET_FOREACH(neighbor, link->neighbors_foreign)
+ if (link_is_neighbor_configured(link, neighbor)) {
+ r = neighbor_add(link, neighbor, NULL);
+ if (r < 0)
+ return r;
+ } else {
+ r = neighbor_remove(neighbor, link);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int link_drop_neighbors(Link *link) {
+ Neighbor *neighbor;
+ int k, r = 0;
+
+ assert(link);
+
+ SET_FOREACH(neighbor, link->neighbors) {
+ k = neighbor_remove(neighbor, link);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int manager_rtnl_process_neighbor_lladdr(sd_netlink_message *message, union lladdr_union *lladdr, size_t *size, char **str) {
+ int r;
+
+ assert(message);
+ assert(lladdr);
+ assert(size);
+ assert(str);
+
+ *str = NULL;
+
+ r = sd_netlink_message_read(message, NDA_LLADDR, sizeof(lladdr->ip.in6), &lladdr->ip.in6);
+ if (r >= 0) {
+ *size = sizeof(lladdr->ip.in6);
+ if (in_addr_to_string(AF_INET6, &lladdr->ip, str) < 0)
+ log_warning_errno(r, "Could not print lower address: %m");
+ return r;
+ }
+
+ r = sd_netlink_message_read(message, NDA_LLADDR, sizeof(lladdr->mac), &lladdr->mac);
+ if (r >= 0) {
+ *size = sizeof(lladdr->mac);
+ *str = new(char, ETHER_ADDR_TO_STRING_MAX);
+ if (!*str) {
+ log_oom();
+ return r;
+ }
+ ether_addr_to_string(&lladdr->mac, *str);
+ return r;
+ }
+
+ r = sd_netlink_message_read(message, NDA_LLADDR, sizeof(lladdr->ip.in), &lladdr->ip.in);
+ if (r >= 0) {
+ *size = sizeof(lladdr->ip.in);
+ if (in_addr_to_string(AF_INET, &lladdr->ip, str) < 0)
+ log_warning_errno(r, "Could not print lower address: %m");
+ return r;
+ }
+
+ return r;
+}
+
+int manager_rtnl_process_neighbor(sd_netlink *rtnl, sd_netlink_message *message, Manager *m) {
+ _cleanup_(neighbor_freep) Neighbor *tmp = NULL;
+ _cleanup_free_ char *addr_str = NULL, *lladdr_str = NULL;
+ Neighbor *neighbor = NULL;
+ uint16_t type, state;
+ int ifindex, r;
+ Link *link;
+
+ assert(rtnl);
+ assert(message);
+ assert(m);
+
+ if (sd_netlink_message_is_error(message)) {
+ r = sd_netlink_message_get_errno(message);
+ if (r < 0)
+ log_message_warning_errno(message, r, "rtnl: failed to receive neighbor message, ignoring");
+
+ return 0;
+ }
+
+ r = sd_netlink_message_get_type(message, &type);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: could not get message type, ignoring: %m");
+ return 0;
+ } else if (!IN_SET(type, RTM_NEWNEIGH, RTM_DELNEIGH)) {
+ log_warning("rtnl: received unexpected message type %u when processing neighbor, ignoring.", type);
+ return 0;
+ }
+
+ r = sd_rtnl_message_neigh_get_state(message, &state);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: received neighbor message with invalid state, ignoring: %m");
+ return 0;
+ } else if (!FLAGS_SET(state, NUD_PERMANENT)) {
+ log_debug("rtnl: received non-static neighbor, ignoring.");
+ return 0;
+ }
+
+ r = sd_rtnl_message_neigh_get_ifindex(message, &ifindex);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: could not get ifindex from message, ignoring: %m");
+ return 0;
+ } else if (ifindex <= 0) {
+ log_warning("rtnl: received neighbor message with invalid ifindex %d, ignoring.", ifindex);
+ return 0;
+ }
+
+ r = link_get(m, ifindex, &link);
+ if (r < 0 || !link) {
+ /* when enumerating we might be out of sync, but we will get the neighbor again, so just
+ * ignore it */
+ if (!m->enumerating)
+ log_warning("rtnl: received neighbor for link '%d' we don't know about, ignoring.", ifindex);
+ return 0;
+ }
+
+ tmp = new0(Neighbor, 1);
+
+ r = sd_rtnl_message_neigh_get_family(message, &tmp->family);
+ if (r < 0) {
+ log_link_warning(link, "rtnl: received neighbor message without family, ignoring.");
+ return 0;
+ } else if (!IN_SET(tmp->family, AF_INET, AF_INET6)) {
+ log_link_debug(link, "rtnl: received neighbor message with invalid family '%i', ignoring.", tmp->family);
+ return 0;
+ }
+
+ r = netlink_message_read_in_addr_union(message, NDA_DST, tmp->family, &tmp->in_addr);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received neighbor message without valid address, ignoring: %m");
+ return 0;
+ }
+
+ if (in_addr_to_string(tmp->family, &tmp->in_addr, &addr_str) < 0)
+ log_link_warning_errno(link, r, "Could not print address: %m");
+
+ r = manager_rtnl_process_neighbor_lladdr(message, &tmp->lladdr, &tmp->lladdr_size, &lladdr_str);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received neighbor message with invalid lladdr, ignoring: %m");
+ return 0;
+ }
+
+ (void) neighbor_get(link, tmp, &neighbor);
+
+ switch (type) {
+ case RTM_NEWNEIGH:
+ if (neighbor)
+ log_link_debug(link, "Received remembered neighbor: %s->%s",
+ strnull(addr_str), strnull(lladdr_str));
+ else {
+ /* A neighbor appeared that we did not request */
+ r = neighbor_add_foreign(link, tmp, NULL);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Failed to remember foreign neighbor %s->%s, ignoring: %m",
+ strnull(addr_str), strnull(lladdr_str));
+ return 0;
+ } else
+ log_link_debug(link, "Remembering foreign neighbor: %s->%s",
+ strnull(addr_str), strnull(lladdr_str));
+ }
+
+ break;
+
+ case RTM_DELNEIGH:
+ if (neighbor) {
+ log_link_debug(link, "Forgetting neighbor: %s->%s",
+ strnull(addr_str), strnull(lladdr_str));
+ (void) neighbor_free(neighbor);
+ } else
+ log_link_debug(link, "Kernel removed a neighbor we don't remember: %s->%s, ignoring.",
+ strnull(addr_str), strnull(lladdr_str));
+
+ break;
+
+ default:
+ assert_not_reached("Received invalid RTNL message type");
+ }
+
+ return 1;
+}
+
+static int neighbor_section_verify(Neighbor *neighbor) {
+ if (section_is_invalid(neighbor->section))
+ return -EINVAL;
+
+ if (neighbor->family == AF_UNSPEC)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: Neighbor section without Address= configured. "
+ "Ignoring [Neighbor] section from line %u.",
+ neighbor->section->filename, neighbor->section->line);
+
+ if (neighbor->lladdr_size == 0)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: Neighbor section without LinkLayerAddress= configured. "
+ "Ignoring [Neighbor] section from line %u.",
+ neighbor->section->filename, neighbor->section->line);
+
+ return 0;
+}
+
+void network_drop_invalid_neighbors(Network *network) {
+ Neighbor *neighbor;
+
+ assert(network);
+
+ HASHMAP_FOREACH(neighbor, network->neighbors_by_section)
+ if (neighbor_section_verify(neighbor) < 0)
+ neighbor_free(neighbor);
+}
+
+
+int config_parse_neighbor_address(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(neighbor_free_or_set_invalidp) Neighbor *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = neighbor_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return log_oom();
+
+ r = in_addr_from_string_auto(rvalue, &n->family, &n->in_addr);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Neighbor Address is invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ TAKE_PTR(n);
+
+ return 0;
+}
+
+int config_parse_neighbor_lladdr(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(neighbor_free_or_set_invalidp) Neighbor *n = NULL;
+ int family, r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = neighbor_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return log_oom();
+
+ r = ether_addr_from_string(rvalue, &n->lladdr.mac);
+ if (r >= 0)
+ n->lladdr_size = sizeof(n->lladdr.mac);
+ else {
+ r = in_addr_from_string_auto(rvalue, &family, &n->lladdr.ip);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Neighbor LinkLayerAddress= is invalid, ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+ n->lladdr_size = family == AF_INET ? sizeof(n->lladdr.ip.in) : sizeof(n->lladdr.ip.in6);
+ }
+
+ TAKE_PTR(n);
+
+ return 0;
+}
+
+int config_parse_neighbor_hwaddr(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(neighbor_free_or_set_invalidp) Neighbor *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = neighbor_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return log_oom();
+
+ r = ether_addr_from_string(rvalue, &n->lladdr.mac);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Neighbor MACAddress= is invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ n->lladdr_size = sizeof(n->lladdr.mac);
+ TAKE_PTR(n);
+
+ return 0;
+}
diff --git a/src/network/networkd-neighbor.h b/src/network/networkd-neighbor.h
new file mode 100644
index 0000000..8ad790b
--- /dev/null
+++ b/src/network/networkd-neighbor.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-netlink.h"
+
+#include "conf-parser.h"
+#include "ether-addr-util.h"
+#include "in-addr-util.h"
+#include "networkd-util.h"
+
+typedef Manager Manager;
+typedef Network Network;
+typedef Link Link;
+
+union lladdr_union {
+ struct ether_addr mac;
+ union in_addr_union ip;
+};
+
+typedef struct Neighbor {
+ Network *network;
+ Link *link;
+ NetworkConfigSection *section;
+
+ int family;
+ union in_addr_union in_addr;
+ union lladdr_union lladdr;
+ size_t lladdr_size;
+} Neighbor;
+
+Neighbor *neighbor_free(Neighbor *neighbor);
+
+void network_drop_invalid_neighbors(Network *network);
+
+int link_set_neighbors(Link *link);
+int link_drop_neighbors(Link *link);
+int link_drop_foreign_neighbors(Link *link);
+
+int manager_rtnl_process_neighbor(sd_netlink *rtnl, sd_netlink_message *message, Manager *m);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_neighbor_address);
+CONFIG_PARSER_PROTOTYPE(config_parse_neighbor_hwaddr);
+CONFIG_PARSER_PROTOTYPE(config_parse_neighbor_lladdr);
diff --git a/src/network/networkd-network-bus.c b/src/network/networkd-network-bus.c
new file mode 100644
index 0000000..0e5f148
--- /dev/null
+++ b/src/network/networkd-network-bus.c
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "ether-addr-util.h"
+#include "networkd-manager.h"
+#include "networkd-network-bus.h"
+#include "string-util.h"
+#include "strv.h"
+
+static int property_get_ether_addrs(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ char buf[ETHER_ADDR_TO_STRING_MAX];
+ const struct ether_addr *p;
+ Set *s;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(userdata);
+
+ s = *(Set **) userdata;
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ SET_FOREACH(p, s) {
+ r = sd_bus_message_append(reply, "s", ether_addr_to_string(p, buf));
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+const sd_bus_vtable network_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("Description", "s", NULL, offsetof(Network, description), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SourcePath", "s", NULL, offsetof(Network, filename), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MatchMAC", "as", property_get_ether_addrs, offsetof(Network, match_mac), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MatchPath", "as", NULL, offsetof(Network, match_path), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MatchDriver", "as", NULL, offsetof(Network, match_driver), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MatchType", "as", NULL, offsetof(Network, match_type), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MatchName", "as", NULL, offsetof(Network, match_name), SD_BUS_VTABLE_PROPERTY_CONST),
+
+ SD_BUS_VTABLE_END
+};
+
+static char *network_bus_path(Network *network) {
+ _cleanup_free_ char *name = NULL;
+ char *networkname, *d, *path;
+ int r;
+
+ assert(network);
+ assert(network->filename);
+
+ name = strdup(network->filename);
+ if (!name)
+ return NULL;
+
+ networkname = basename(name);
+
+ d = strrchr(networkname, '.');
+ if (!d)
+ return NULL;
+
+ assert(streq(d, ".network"));
+
+ *d = '\0';
+
+ r = sd_bus_path_encode("/org/freedesktop/network1/network", networkname, &path);
+ if (r < 0)
+ return NULL;
+
+ return path;
+}
+
+int network_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ Network *network;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(m);
+ assert(nodes);
+
+ ORDERED_HASHMAP_FOREACH(network, m->networks) {
+ char *p;
+
+ p = network_bus_path(network);
+ if (!p)
+ return -ENOMEM;
+
+ r = strv_consume(&l, p);
+ if (r < 0)
+ return r;
+ }
+
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
+
+int network_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ Manager *m = userdata;
+ Network *network;
+ _cleanup_free_ char *name = NULL;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(m);
+ assert(found);
+
+ r = sd_bus_path_decode(path, "/org/freedesktop/network1/network", &name);
+ if (r < 0)
+ return 0;
+
+ r = network_get_by_name(m, name, &network);
+ if (r < 0)
+ return 0;
+
+ *found = network;
+
+ return 1;
+}
diff --git a/src/network/networkd-network-bus.h b/src/network/networkd-network-bus.h
new file mode 100644
index 0000000..cca1e0a
--- /dev/null
+++ b/src/network/networkd-network-bus.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+typedef struct Link Link;
+
+extern const sd_bus_vtable network_vtable[];
+
+int network_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error);
+int network_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error);
diff --git a/src/network/networkd-network-gperf.gperf b/src/network/networkd-network-gperf.gperf
new file mode 100644
index 0000000..5cc9e3e
--- /dev/null
+++ b/src/network/networkd-network-gperf.gperf
@@ -0,0 +1,482 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include "conf-parser.h"
+#include "netem.h"
+#include "network-internal.h"
+#include "networkd-address-label.h"
+#include "networkd-address.h"
+#include "networkd-can.h"
+#include "networkd-conf.h"
+#include "networkd-dhcp-common.h"
+#include "networkd-dhcp-server.h"
+#include "networkd-dhcp4.h"
+#include "networkd-dhcp6.h"
+#include "networkd-fdb.h"
+#include "networkd-ipv4ll.h"
+#include "networkd-ipv6-proxy-ndp.h"
+#include "networkd-mdb.h"
+#include "networkd-ndisc.h"
+#include "networkd-network.h"
+#include "networkd-neighbor.h"
+#include "networkd-nexthop.h"
+#include "networkd-radv.h"
+#include "networkd-route.h"
+#include "networkd-routing-policy-rule.h"
+#include "networkd-sriov.h"
+#include "qdisc.h"
+#include "tclass.h"
+#include "vlan-util.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name network_network_gperf_hash
+%define lookup-function-name network_network_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Match.MACAddress, config_parse_hwaddrs, 0, offsetof(Network, match_mac)
+Match.PermanentMACAddress, config_parse_hwaddrs, 0, offsetof(Network, match_permanent_mac)
+Match.Path, config_parse_match_strv, 0, offsetof(Network, match_path)
+Match.Driver, config_parse_match_strv, 0, offsetof(Network, match_driver)
+Match.Type, config_parse_match_strv, 0, offsetof(Network, match_type)
+Match.WLANInterfaceType, config_parse_match_strv, 0, offsetof(Network, match_wlan_iftype)
+Match.SSID, config_parse_match_strv, 0, offsetof(Network, match_ssid)
+Match.BSSID, config_parse_hwaddrs, 0, offsetof(Network, match_bssid)
+Match.Name, config_parse_match_ifnames, IFNAME_VALID_ALTERNATIVE, offsetof(Network, match_name)
+Match.Property, config_parse_match_property, 0, offsetof(Network, match_property)
+Match.Host, config_parse_net_condition, CONDITION_HOST, offsetof(Network, conditions)
+Match.Virtualization, config_parse_net_condition, CONDITION_VIRTUALIZATION, offsetof(Network, conditions)
+Match.KernelCommandLine, config_parse_net_condition, CONDITION_KERNEL_COMMAND_LINE, offsetof(Network, conditions)
+Match.KernelVersion, config_parse_net_condition, CONDITION_KERNEL_VERSION, offsetof(Network, conditions)
+Match.Architecture, config_parse_net_condition, CONDITION_ARCHITECTURE, offsetof(Network, conditions)
+Link.MACAddress, config_parse_hwaddr, 0, offsetof(Network, mac)
+Link.MTUBytes, config_parse_mtu, AF_UNSPEC, offsetof(Network, mtu)
+Link.Group, config_parse_uint32, 0, offsetof(Network, group)
+Link.ARP, config_parse_tristate, 0, offsetof(Network, arp)
+Link.Multicast, config_parse_tristate, 0, offsetof(Network, multicast)
+Link.AllMulticast, config_parse_tristate, 0, offsetof(Network, allmulticast)
+Link.Unmanaged, config_parse_bool, 0, offsetof(Network, unmanaged)
+Link.RequiredForOnline, config_parse_required_for_online, 0, 0
+SR-IOV.VirtualFunction, config_parse_sr_iov_uint32, 0, 0
+SR-IOV.VLANId, config_parse_sr_iov_uint32, 0, 0
+SR-IOV.QualityOfService, config_parse_sr_iov_uint32, 0, 0
+SR-IOV.VLANProtocol, config_parse_sr_iov_vlan_proto, 0, 0
+SR-IOV.MACSpoofCheck, config_parse_sr_iov_boolean, 0, 0
+SR-IOV.QueryReceiveSideScaling, config_parse_sr_iov_boolean, 0, 0
+SR-IOV.Trust, config_parse_sr_iov_boolean, 0, 0
+SR-IOV.LinkState, config_parse_sr_iov_link_state, 0, 0
+SR-IOV.MACAddress, config_parse_sr_iov_mac, 0, 0
+Network.Description, config_parse_string, 0, offsetof(Network, description)
+Network.Bridge, config_parse_ifname, 0, offsetof(Network, bridge_name)
+Network.Bond, config_parse_ifname, 0, offsetof(Network, bond_name)
+Network.VLAN, config_parse_stacked_netdev, NETDEV_KIND_VLAN, offsetof(Network, stacked_netdev_names)
+Network.MACVLAN, config_parse_stacked_netdev, NETDEV_KIND_MACVLAN, offsetof(Network, stacked_netdev_names)
+Network.MACVTAP, config_parse_stacked_netdev, NETDEV_KIND_MACVTAP, offsetof(Network, stacked_netdev_names)
+Network.IPVLAN, config_parse_stacked_netdev, NETDEV_KIND_IPVLAN, offsetof(Network, stacked_netdev_names)
+Network.IPVTAP, config_parse_stacked_netdev, NETDEV_KIND_IPVTAP, offsetof(Network, stacked_netdev_names)
+Network.VXLAN, config_parse_stacked_netdev, NETDEV_KIND_VXLAN, offsetof(Network, stacked_netdev_names)
+Network.L2TP, config_parse_stacked_netdev, NETDEV_KIND_L2TP, offsetof(Network, stacked_netdev_names)
+Network.MACsec, config_parse_stacked_netdev, NETDEV_KIND_MACSEC, offsetof(Network, stacked_netdev_names)
+Network.Tunnel, config_parse_stacked_netdev, _NETDEV_KIND_TUNNEL, offsetof(Network, stacked_netdev_names)
+Network.Xfrm, config_parse_stacked_netdev, NETDEV_KIND_XFRM, offsetof(Network, stacked_netdev_names)
+Network.VRF, config_parse_ifname, 0, offsetof(Network, vrf_name)
+Network.DHCP, config_parse_dhcp, 0, offsetof(Network, dhcp)
+Network.DHCPServer, config_parse_bool, 0, offsetof(Network, dhcp_server)
+Network.LinkLocalAddressing, config_parse_link_local_address_family, 0, offsetof(Network, link_local)
+Network.IPv6LinkLocalAddressGenerationMode, config_parse_ipv6_link_local_address_gen_mode, 0, offsetof(Network, ipv6ll_address_gen_mode)
+Network.IPv4LLRoute, config_parse_bool, 0, offsetof(Network, ipv4ll_route)
+Network.DefaultRouteOnDevice, config_parse_bool, 0, offsetof(Network, default_route_on_device)
+Network.IPv6Token, config_parse_address_generation_type, 0, 0
+Network.LLDP, config_parse_lldp_mode, 0, offsetof(Network, lldp_mode)
+Network.EmitLLDP, config_parse_lldp_emit, 0, offsetof(Network, lldp_emit)
+Network.Address, config_parse_address, 0, 0
+Network.Gateway, config_parse_gateway, 0, 0
+Network.Domains, config_parse_domains, 0, 0
+Network.DNS, config_parse_dns, 0, 0
+Network.DNSDefaultRoute, config_parse_tristate, 0, offsetof(Network, dns_default_route)
+Network.LLMNR, config_parse_resolve_support, 0, offsetof(Network, llmnr)
+Network.MulticastDNS, config_parse_resolve_support, 0, offsetof(Network, mdns)
+Network.DNSOverTLS, config_parse_dns_over_tls_mode, 0, offsetof(Network, dns_over_tls_mode)
+Network.DNSSEC, config_parse_dnssec_mode, 0, offsetof(Network, dnssec_mode)
+Network.DNSSECNegativeTrustAnchors, config_parse_dnssec_negative_trust_anchors, 0, 0
+Network.NTP, config_parse_ntp, 0, offsetof(Network, ntp)
+Network.IPForward, config_parse_address_family_with_kernel, 0, offsetof(Network, ip_forward)
+Network.IPMasquerade, config_parse_bool, 0, offsetof(Network, ip_masquerade)
+Network.IPv6PrivacyExtensions, config_parse_ipv6_privacy_extensions, 0, offsetof(Network, ipv6_privacy_extensions)
+Network.IPv6AcceptRA, config_parse_tristate, 0, offsetof(Network, ipv6_accept_ra)
+Network.IPv6AcceptRouterAdvertisements, config_parse_tristate, 0, offsetof(Network, ipv6_accept_ra)
+Network.IPv6DuplicateAddressDetection, config_parse_int, 0, offsetof(Network, ipv6_dad_transmits)
+Network.IPv6HopLimit, config_parse_int, 0, offsetof(Network, ipv6_hop_limit)
+Network.IPv6ProxyNDP, config_parse_tristate, 0, offsetof(Network, ipv6_proxy_ndp)
+Network.IPv6MTUBytes, config_parse_mtu, AF_INET6, offsetof(Network, ipv6_mtu)
+Network.IPv4AcceptLocal, config_parse_tristate, 0, offsetof(Network, ipv4_accept_local)
+Network.ActiveSlave, config_parse_bool, 0, offsetof(Network, active_slave)
+Network.PrimarySlave, config_parse_bool, 0, offsetof(Network, primary_slave)
+Network.IPv4ProxyARP, config_parse_tristate, 0, offsetof(Network, proxy_arp)
+Network.ProxyARP, config_parse_tristate, 0, offsetof(Network, proxy_arp)
+Network.IPv6ProxyNDPAddress, config_parse_ipv6_proxy_ndp_address, 0, 0
+Network.BindCarrier, config_parse_strv, 0, offsetof(Network, bind_carrier)
+Network.ConfigureWithoutCarrier, config_parse_bool, 0, offsetof(Network, configure_without_carrier)
+Network.IgnoreCarrierLoss, config_parse_tristate, 0, offsetof(Network, ignore_carrier_loss)
+Network.KeepConfiguration, config_parse_keep_configuration, 0, offsetof(Network, keep_configuration)
+Network.IPv6SendRA, config_parse_router_prefix_delegation, 0, offsetof(Network, router_prefix_delegation)
+Network.DHCPv6PrefixDelegation, config_parse_tristate, 0, offsetof(Network, dhcp6_pd)
+Address.Address, config_parse_address, 0, 0
+Address.Peer, config_parse_address, 0, 0
+Address.Broadcast, config_parse_broadcast, 0, 0
+Address.Label, config_parse_label, 0, 0
+Address.PreferredLifetime, config_parse_lifetime, 0, 0
+Address.HomeAddress, config_parse_address_flags, IFA_F_HOMEADDRESS, 0
+Address.ManageTemporaryAddress, config_parse_address_flags, IFA_F_MANAGETEMPADDR, 0
+Address.PrefixRoute, config_parse_address_flags, IFA_F_NOPREFIXROUTE, 0 /* deprecated */
+Address.AddPrefixRoute, config_parse_address_flags, IFA_F_NOPREFIXROUTE, 0
+Address.AutoJoin, config_parse_address_flags, IFA_F_MCAUTOJOIN, 0
+Address.DuplicateAddressDetection, config_parse_duplicate_address_detection, 0, 0
+Address.Scope, config_parse_address_scope, 0, 0
+IPv6AddressLabel.Prefix, config_parse_address_label_prefix, 0, 0
+IPv6AddressLabel.Label, config_parse_address_label, 0, 0
+Neighbor.Address, config_parse_neighbor_address, 0, 0
+Neighbor.LinkLayerAddress, config_parse_neighbor_lladdr, 0, 0
+Neighbor.MACAddress, config_parse_neighbor_hwaddr, 0, 0 /* deprecated */
+RoutingPolicyRule.TypeOfService, config_parse_routing_policy_rule_tos, 0, 0
+RoutingPolicyRule.Priority, config_parse_routing_policy_rule_priority, 0, 0
+RoutingPolicyRule.Table, config_parse_routing_policy_rule_table, 0, 0
+RoutingPolicyRule.FirewallMark, config_parse_routing_policy_rule_fwmark_mask, 0, 0
+RoutingPolicyRule.From, config_parse_routing_policy_rule_prefix, 0, 0
+RoutingPolicyRule.To, config_parse_routing_policy_rule_prefix, 0, 0
+RoutingPolicyRule.IncomingInterface, config_parse_routing_policy_rule_device, 0, 0
+RoutingPolicyRule.OutgoingInterface, config_parse_routing_policy_rule_device, 0, 0
+RoutingPolicyRule.IPProtocol, config_parse_routing_policy_rule_ip_protocol, 0, 0
+RoutingPolicyRule.SourcePort, config_parse_routing_policy_rule_port_range, 0, 0
+RoutingPolicyRule.DestinationPort, config_parse_routing_policy_rule_port_range, 0, 0
+RoutingPolicyRule.InvertRule, config_parse_routing_policy_rule_invert, 0, 0
+RoutingPolicyRule.Family, config_parse_routing_policy_rule_family, 0, 0
+RoutingPolicyRule.User, config_parse_routing_policy_rule_uid_range, 0, 0
+RoutingPolicyRule.SuppressPrefixLength, config_parse_routing_policy_rule_suppress_prefixlen, 0, 0
+Route.Gateway, config_parse_gateway, 0, 0
+Route.Destination, config_parse_destination, 0, 0
+Route.Source, config_parse_destination, 0, 0
+Route.Metric, config_parse_route_priority, 0, 0
+Route.Scope, config_parse_route_scope, 0, 0
+Route.PreferredSource, config_parse_preferred_src, 0, 0
+Route.Table, config_parse_route_table, 0, 0
+Route.MTUBytes, config_parse_route_mtu, AF_UNSPEC, 0
+Route.GatewayOnLink, config_parse_route_boolean, 0, 0
+Route.GatewayOnlink, config_parse_route_boolean, 0, 0
+Route.IPv6Preference, config_parse_ipv6_route_preference, 0, 0
+Route.Protocol, config_parse_route_protocol, 0, 0
+Route.Type, config_parse_route_type, 0, 0
+Route.InitialCongestionWindow, config_parse_tcp_window, 0, 0
+Route.InitialAdvertisedReceiveWindow, config_parse_tcp_window, 0, 0
+Route.QuickAck, config_parse_route_boolean, 0, 0
+Route.FastOpenNoCookie, config_parse_route_boolean, 0, 0
+Route.TTLPropagate, config_parse_route_boolean, 0, 0
+Route.MultiPathRoute, config_parse_multipath_route, 0, 0
+NextHop.Id, config_parse_nexthop_id, 0, 0
+NextHop.Gateway, config_parse_nexthop_gateway, 0, 0
+DHCPv4.ClientIdentifier, config_parse_dhcp_client_identifier, 0, offsetof(Network, dhcp_client_identifier)
+DHCPv4.UseDNS, config_parse_dhcp_use_dns, 0, 0
+DHCPv4.RoutesToDNS, config_parse_bool, 0, offsetof(Network, dhcp_routes_to_dns)
+DHCPv4.UseNTP, config_parse_dhcp_use_ntp, 0, 0
+DHCPv4.UseSIP, config_parse_bool, 0, offsetof(Network, dhcp_use_sip)
+DHCPv4.UseMTU, config_parse_bool, 0, offsetof(Network, dhcp_use_mtu)
+DHCPv4.UseHostname, config_parse_bool, 0, offsetof(Network, dhcp_use_hostname)
+DHCPv4.UseDomains, config_parse_dhcp_use_domains, 0, offsetof(Network, dhcp_use_domains)
+DHCPv4.UseRoutes, config_parse_bool, 0, offsetof(Network, dhcp_use_routes)
+DHCPv4.UseGateway, config_parse_tristate, 0, offsetof(Network, dhcp_use_gateway)
+DHCPv4.RequestOptions, config_parse_dhcp_request_options, AF_INET, 0
+DHCPv4.Anonymize, config_parse_bool, 0, offsetof(Network, dhcp_anonymize)
+DHCPv4.SendHostname, config_parse_bool, 0, offsetof(Network, dhcp_send_hostname)
+DHCPv4.Hostname, config_parse_hostname, 0, offsetof(Network, dhcp_hostname)
+DHCPv4.RequestBroadcast, config_parse_bool, 0, offsetof(Network, dhcp_broadcast)
+DHCPv4.VendorClassIdentifier, config_parse_string, 0, offsetof(Network, dhcp_vendor_class_identifier)
+DHCPv4.MUDURL, config_parse_dhcp_mud_url, 0, 0
+DHCPv4.MaxAttempts, config_parse_dhcp_max_attempts, 0, 0
+DHCPv4.UserClass, config_parse_dhcp_user_class, AF_INET, offsetof(Network, dhcp_user_class)
+DHCPv4.DUIDType, config_parse_duid_type, 0, offsetof(Network, duid)
+DHCPv4.DUIDRawData, config_parse_duid_rawdata, 0, offsetof(Network, duid)
+DHCPv4.RouteMetric, config_parse_dhcp_route_metric, 0, 0
+DHCPv4.RouteTable, config_parse_section_route_table, 0, 0
+DHCPv4.UseTimezone, config_parse_bool, 0, offsetof(Network, dhcp_use_timezone)
+DHCPv4.IAID, config_parse_iaid, 0, 0
+DHCPv4.ListenPort, config_parse_uint16, 0, offsetof(Network, dhcp_client_port)
+DHCPv4.SendRelease, config_parse_bool, 0, offsetof(Network, dhcp_send_release)
+DHCPv4.SendDecline, config_parse_bool, 0, offsetof(Network, dhcp_send_decline)
+DHCPv4.DenyList, config_parse_dhcp_acl_ip_address, 0, 0
+DHCPv4.AllowList, config_parse_dhcp_acl_ip_address, 0, 0
+DHCPv4.IPServiceType, config_parse_dhcp_ip_service_type, 0, offsetof(Network, dhcp_ip_service_type)
+DHCPv4.SendOption, config_parse_dhcp_send_option, AF_INET, offsetof(Network, dhcp_client_send_options)
+DHCPv4.SendVendorOption, config_parse_dhcp_send_option, 0, offsetof(Network, dhcp_client_send_vendor_options)
+DHCPv4.RouteMTUBytes, config_parse_mtu, AF_INET, offsetof(Network, dhcp_route_mtu)
+DHCPv4.FallbackLeaseLifetimeSec, config_parse_dhcp_fallback_lease_lifetime, 0, 0
+DHCPv6.UseDNS, config_parse_dhcp_use_dns, 0, 0
+DHCPv6.UseNTP, config_parse_dhcp_use_ntp, 0, 0
+DHCPv6.RapidCommit, config_parse_bool, 0, offsetof(Network, dhcp6_rapid_commit)
+DHCPv6.MUDURL, config_parse_dhcp6_mud_url, 0, 0
+DHCPv6.RequestOptions, config_parse_dhcp_request_options, AF_INET6, 0
+DHCPv6.UserClass, config_parse_dhcp_user_class, AF_INET6, offsetof(Network, dhcp6_user_class)
+DHCPv6.VendorClass, config_parse_dhcp_vendor_class, 0, offsetof(Network, dhcp6_vendor_class)
+DHCPv6.SendVendorOption, config_parse_dhcp_send_option, AF_INET6, offsetof(Network, dhcp6_client_send_vendor_options)
+DHCPv6.ForceDHCPv6PDOtherInformation, config_parse_bool, 0, offsetof(Network, dhcp6_force_pd_other_information)
+DHCPv6.PrefixDelegationHint, config_parse_dhcp6_pd_hint, 0, 0
+DHCPv6.WithoutRA, config_parse_dhcp6_client_start_mode, 0, offsetof(Network, dhcp6_without_ra)
+DHCPv6.SendOption, config_parse_dhcp_send_option, AF_INET6, offsetof(Network, dhcp6_client_send_options)
+DHCPv6.RouteMetric, config_parse_dhcp_route_metric, 0, 0
+IPv6AcceptRA.UseAutonomousPrefix, config_parse_bool, 0, offsetof(Network, ipv6_accept_ra_use_autonomous_prefix)
+IPv6AcceptRA.UseOnLinkPrefix, config_parse_bool, 0, offsetof(Network, ipv6_accept_ra_use_onlink_prefix)
+IPv6AcceptRA.UseDNS, config_parse_bool, 0, offsetof(Network, ipv6_accept_ra_use_dns)
+IPv6AcceptRA.UseDomains, config_parse_dhcp_use_domains, 0, offsetof(Network, ipv6_accept_ra_use_domains)
+IPv6AcceptRA.DHCPv6Client, config_parse_ipv6_accept_ra_start_dhcp6_client, 0, offsetof(Network, ipv6_accept_ra_start_dhcp6_client)
+IPv6AcceptRA.RouteTable, config_parse_section_route_table, 0, 0
+IPv6AcceptRA.DenyList, config_parse_ndisc_deny_listed_prefix, 0, 0
+IPv6AcceptRA.BlackList, config_parse_ndisc_deny_listed_prefix, 0, 0
+DHCPServer.MaxLeaseTimeSec, config_parse_sec, 0, offsetof(Network, dhcp_server_max_lease_time_usec)
+DHCPServer.DefaultLeaseTimeSec, config_parse_sec, 0, offsetof(Network, dhcp_server_default_lease_time_usec)
+DHCPServer.EmitDNS, config_parse_bool, 0, offsetof(Network, dhcp_server_emit[SD_DHCP_LEASE_DNS].emit)
+DHCPServer.DNS, config_parse_dhcp_server_emit, 0, offsetof(Network, dhcp_server_emit[SD_DHCP_LEASE_DNS])
+DHCPServer.EmitNTP, config_parse_bool, 0, offsetof(Network, dhcp_server_emit[SD_DHCP_LEASE_NTP].emit)
+DHCPServer.NTP, config_parse_dhcp_server_emit, 0, offsetof(Network, dhcp_server_emit[SD_DHCP_LEASE_NTP])
+DHCPServer.EmitSIP, config_parse_bool, 0, offsetof(Network, dhcp_server_emit[SD_DHCP_LEASE_SIP].emit)
+DHCPServer.SIP, config_parse_dhcp_server_emit, 0, offsetof(Network, dhcp_server_emit[SD_DHCP_LEASE_SIP])
+DHCPServer.EmitPOP3, config_parse_bool, 0, offsetof(Network, dhcp_server_emit[SD_DHCP_LEASE_POP3].emit)
+DHCPServer.POP3, config_parse_dhcp_server_emit, 0, offsetof(Network, dhcp_server_emit[SD_DHCP_LEASE_POP3])
+DHCPServer.EmitSMTP, config_parse_bool, 0, offsetof(Network, dhcp_server_emit[SD_DHCP_LEASE_SMTP].emit)
+DHCPServer.SMTP, config_parse_dhcp_server_emit, 0, offsetof(Network, dhcp_server_emit[SD_DHCP_LEASE_SMTP])
+DHCPServer.EmitLPR, config_parse_bool, 0, offsetof(Network, dhcp_server_emit[SD_DHCP_LEASE_LPR].emit)
+DHCPServer.LPR, config_parse_dhcp_server_emit, 0, offsetof(Network, dhcp_server_emit[SD_DHCP_LEASE_LPR])
+DHCPServer.EmitRouter, config_parse_bool, 0, offsetof(Network, dhcp_server_emit_router)
+DHCPServer.EmitTimezone, config_parse_bool, 0, offsetof(Network, dhcp_server_emit_timezone)
+DHCPServer.Timezone, config_parse_timezone, 0, offsetof(Network, dhcp_server_timezone)
+DHCPServer.PoolOffset, config_parse_uint32, 0, offsetof(Network, dhcp_server_pool_offset)
+DHCPServer.PoolSize, config_parse_uint32, 0, offsetof(Network, dhcp_server_pool_size)
+DHCPServer.SendVendorOption, config_parse_dhcp_send_option, 0, offsetof(Network, dhcp_server_send_vendor_options)
+DHCPServer.SendOption, config_parse_dhcp_send_option, 0, offsetof(Network, dhcp_server_send_options)
+Bridge.Cost, config_parse_uint32, 0, offsetof(Network, cost)
+Bridge.UseBPDU, config_parse_tristate, 0, offsetof(Network, use_bpdu)
+Bridge.HairPin, config_parse_tristate, 0, offsetof(Network, hairpin)
+Bridge.FastLeave, config_parse_tristate, 0, offsetof(Network, fast_leave)
+Bridge.AllowPortToBeRoot, config_parse_tristate, 0, offsetof(Network, allow_port_to_be_root)
+Bridge.UnicastFlood, config_parse_tristate, 0, offsetof(Network, unicast_flood)
+Bridge.MulticastFlood, config_parse_tristate, 0, offsetof(Network, multicast_flood)
+Bridge.MulticastToUnicast, config_parse_tristate, 0, offsetof(Network, multicast_to_unicast)
+Bridge.NeighborSuppression, config_parse_tristate, 0, offsetof(Network, neighbor_suppression)
+Bridge.Learning, config_parse_tristate, 0, offsetof(Network, learning)
+Bridge.ProxyARP, config_parse_tristate, 0, offsetof(Network, bridge_proxy_arp)
+Bridge.ProxyARPWiFi, config_parse_tristate, 0, offsetof(Network, bridge_proxy_arp_wifi)
+Bridge.Priority, config_parse_bridge_port_priority, 0, offsetof(Network, priority)
+Bridge.MulticastRouter, config_parse_multicast_router, 0, offsetof(Network, multicast_router)
+BridgeFDB.MACAddress, config_parse_fdb_hwaddr, 0, 0
+BridgeFDB.VLANId, config_parse_fdb_vlan_id, 0, 0
+BridgeFDB.Destination, config_parse_fdb_destination, 0, 0
+BridgeFDB.VNI, config_parse_fdb_vxlan_vni, 0, 0
+BridgeFDB.AssociatedWith, config_parse_fdb_ntf_flags, 0, 0
+BridgeMDB.MulticastGroupAddress, config_parse_mdb_group_address, 0, 0
+BridgeMDB.VLANId, config_parse_mdb_vlan_id, 0, 0
+BridgeVLAN.PVID, config_parse_brvlan_pvid, 0, 0
+BridgeVLAN.VLAN, config_parse_brvlan_vlan, 0, 0
+BridgeVLAN.EgressUntagged, config_parse_brvlan_untagged, 0, 0
+DHCPv6PrefixDelegation.SubnetId, config_parse_dhcp6_pd_subnet_id, 0, offsetof(Network, dhcp6_pd_subnet_id)
+DHCPv6PrefixDelegation.Announce, config_parse_bool, 0, offsetof(Network, dhcp6_pd_announce)
+DHCPv6PrefixDelegation.Assign, config_parse_bool, 0, offsetof(Network, dhcp6_pd_assign)
+DHCPv6PrefixDelegation.Token, config_parse_dhcp6_pd_token, 0, offsetof(Network, dhcp6_pd_token)
+IPv6SendRA.RouterLifetimeSec, config_parse_sec, 0, offsetof(Network, router_lifetime_usec)
+IPv6SendRA.Managed, config_parse_bool, 0, offsetof(Network, router_managed)
+IPv6SendRA.OtherInformation, config_parse_bool, 0, offsetof(Network, router_other_information)
+IPv6SendRA.RouterPreference, config_parse_router_preference, 0, 0
+IPv6SendRA.EmitDNS, config_parse_bool, 0, offsetof(Network, router_emit_dns)
+IPv6SendRA.DNS, config_parse_radv_dns, 0, 0
+IPv6SendRA.EmitDomains, config_parse_bool, 0, offsetof(Network, router_emit_domains)
+IPv6SendRA.Domains, config_parse_radv_search_domains, 0, 0
+IPv6SendRA.DNSLifetimeSec, config_parse_sec, 0, offsetof(Network, router_dns_lifetime_usec)
+IPv6Prefix.Prefix, config_parse_prefix, 0, 0
+IPv6Prefix.OnLink, config_parse_prefix_flags, 0, 0
+IPv6Prefix.AddressAutoconfiguration, config_parse_prefix_flags, 0, 0
+IPv6Prefix.ValidLifetimeSec, config_parse_prefix_lifetime, 0, 0
+IPv6Prefix.PreferredLifetimeSec, config_parse_prefix_lifetime, 0, 0
+IPv6Prefix.Assign, config_parse_prefix_assign, 0, 0
+IPv6RoutePrefix.Route, config_parse_route_prefix, 0, 0
+IPv6RoutePrefix.LifetimeSec, config_parse_route_prefix_lifetime, 0, 0
+LLDP.MUDURL, config_parse_lldp_mud, 0, 0
+CAN.BitRate, config_parse_can_bitrate, 0, offsetof(Network, can_bitrate)
+CAN.SamplePoint, config_parse_permille, 0, offsetof(Network, can_sample_point)
+CAN.DataBitRate, config_parse_can_bitrate, 0, offsetof(Network, can_data_bitrate)
+CAN.DataSamplePoint, config_parse_permille, 0, offsetof(Network, can_data_sample_point)
+CAN.FDMode, config_parse_tristate, 0, offsetof(Network, can_fd_mode)
+CAN.FDNonISO, config_parse_tristate, 0, offsetof(Network, can_non_iso)
+CAN.RestartSec, config_parse_sec, 0, offsetof(Network, can_restart_us)
+CAN.TripleSampling, config_parse_tristate, 0, offsetof(Network, can_triple_sampling)
+CAN.Termination, config_parse_tristate, 0, offsetof(Network, can_termination)
+CAN.ListenOnly, config_parse_tristate, 0, offsetof(Network, can_listen_only)
+QDisc.Parent, config_parse_qdisc_parent, _QDISC_KIND_INVALID, 0
+QDisc.Handle, config_parse_qdisc_handle, _QDISC_KIND_INVALID, 0
+BFIFO.Parent, config_parse_qdisc_parent, QDISC_KIND_BFIFO, 0
+BFIFO.Handle, config_parse_qdisc_handle, QDISC_KIND_BFIFO, 0
+BFIFO.LimitBytes, config_parse_bfifo_size, QDISC_KIND_BFIFO, 0
+CAKE.Parent, config_parse_qdisc_parent, QDISC_KIND_CAKE, 0
+CAKE.Handle, config_parse_qdisc_handle, QDISC_KIND_CAKE, 0
+CAKE.Bandwidth, config_parse_cake_bandwidth, QDISC_KIND_CAKE, 0
+CAKE.OverheadBytes, config_parse_cake_overhead, QDISC_KIND_CAKE, 0
+ControlledDelay.Parent, config_parse_qdisc_parent, QDISC_KIND_CODEL, 0
+ControlledDelay.Handle, config_parse_qdisc_handle, QDISC_KIND_CODEL, 0
+ControlledDelay.PacketLimit, config_parse_controlled_delay_u32, QDISC_KIND_CODEL, 0
+ControlledDelay.TargetSec, config_parse_controlled_delay_usec, QDISC_KIND_CODEL, 0
+ControlledDelay.IntervalSec, config_parse_controlled_delay_usec, QDISC_KIND_CODEL, 0
+ControlledDelay.CEThresholdSec, config_parse_controlled_delay_usec, QDISC_KIND_CODEL, 0
+ControlledDelay.ECN, config_parse_controlled_delay_bool, QDISC_KIND_CODEL, 0
+DeficitRoundRobinScheduler.Parent, config_parse_qdisc_parent, QDISC_KIND_DRR, 0
+DeficitRoundRobinScheduler.Handle, config_parse_qdisc_handle, QDISC_KIND_DRR, 0
+DeficitRoundRobinSchedulerClass.Parent, config_parse_tclass_parent, TCLASS_KIND_DRR, 0
+DeficitRoundRobinSchedulerClass.ClassId, config_parse_tclass_classid, TCLASS_KIND_DRR, 0
+DeficitRoundRobinSchedulerClass.QuantumBytes, config_parse_drr_size, TCLASS_KIND_DRR, 0
+EnhancedTransmissionSelection.Parent, config_parse_qdisc_parent, QDISC_KIND_ETS, 0
+EnhancedTransmissionSelection.Handle, config_parse_qdisc_handle, QDISC_KIND_ETS, 0
+EnhancedTransmissionSelection.Bands, config_parse_ets_u8, QDISC_KIND_ETS, 0
+EnhancedTransmissionSelection.StrictBands, config_parse_ets_u8, QDISC_KIND_ETS, 0
+EnhancedTransmissionSelection.QuantumBytes, config_parse_ets_quanta, QDISC_KIND_ETS, 0
+EnhancedTransmissionSelection.PriorityMap, config_parse_ets_prio, QDISC_KIND_ETS, 0
+PFIFO.Parent, config_parse_qdisc_parent, QDISC_KIND_PFIFO, 0
+PFIFO.Handle, config_parse_qdisc_handle, QDISC_KIND_PFIFO, 0
+PFIFO.PacketLimit, config_parse_pfifo_size, QDISC_KIND_PFIFO, 0
+PFIFOFast.Parent, config_parse_qdisc_parent, QDISC_KIND_PFIFO_FAST, 0
+PFIFOFast.Handle, config_parse_qdisc_handle, QDISC_KIND_PFIFO_FAST, 0
+PFIFOHeadDrop.Parent, config_parse_qdisc_parent, QDISC_KIND_PFIFO_HEAD_DROP, 0
+PFIFOHeadDrop.Handle, config_parse_qdisc_handle, QDISC_KIND_PFIFO_HEAD_DROP, 0
+PFIFOHeadDrop.PacketLimit, config_parse_pfifo_size, QDISC_KIND_PFIFO_HEAD_DROP, 0
+QuickFairQueueing.Parent, config_parse_qdisc_parent, QDISC_KIND_QFQ, 0
+QuickFairQueueing.Handle, config_parse_qdisc_handle, QDISC_KIND_QFQ, 0
+QuickFairQueueingClass.Parent, config_parse_tclass_parent, TCLASS_KIND_QFQ, 0
+QuickFairQueueingClass.ClassId, config_parse_tclass_classid, TCLASS_KIND_QFQ, 0
+QuickFairQueueingClass.Weight, config_parse_quick_fair_queueing_weight, TCLASS_KIND_QFQ, 0
+QuickFairQueueingClass.MaxPacketBytes, config_parse_quick_fair_queueing_max_packet, TCLASS_KIND_QFQ, 0
+FairQueueing.Parent, config_parse_qdisc_parent, QDISC_KIND_FQ, 0
+FairQueueing.Handle, config_parse_qdisc_handle, QDISC_KIND_FQ, 0
+FairQueueing.PacketLimit, config_parse_fair_queueing_u32, QDISC_KIND_FQ, 0
+FairQueueing.FlowLimit, config_parse_fair_queueing_u32, QDISC_KIND_FQ, 0
+FairQueueing.QuantumBytes, config_parse_fair_queueing_size, QDISC_KIND_FQ, 0
+FairQueueing.InitialQuantumBytes, config_parse_fair_queueing_size, QDISC_KIND_FQ, 0
+FairQueueing.MaximumRate, config_parse_fair_queueing_max_rate, QDISC_KIND_FQ, 0
+FairQueueing.Buckets, config_parse_fair_queueing_u32, QDISC_KIND_FQ, 0
+FairQueueing.OrphanMask, config_parse_fair_queueing_u32, QDISC_KIND_FQ, 0
+FairQueueing.Pacing, config_parse_fair_queueing_bool, QDISC_KIND_FQ, 0
+FairQueueing.CEThresholdSec, config_parse_fair_queueing_usec, QDISC_KIND_FQ, 0
+FairQueueingControlledDelay.Parent, config_parse_qdisc_parent, QDISC_KIND_FQ_CODEL, 0
+FairQueueingControlledDelay.Handle, config_parse_qdisc_handle, QDISC_KIND_FQ_CODEL, 0
+FairQueueingControlledDelay.PacketLimit, config_parse_fair_queueing_controlled_delay_u32, QDISC_KIND_FQ_CODEL, 0
+FairQueueingControlledDelay.MemoryLimitBytes, config_parse_fair_queueing_controlled_delay_size, QDISC_KIND_FQ_CODEL, 0
+FairQueueingControlledDelay.Flows, config_parse_fair_queueing_controlled_delay_u32, QDISC_KIND_FQ_CODEL, 0
+FairQueueingControlledDelay.QuantumBytes, config_parse_fair_queueing_controlled_delay_size, QDISC_KIND_FQ_CODEL, 0
+FairQueueingControlledDelay.TargetSec, config_parse_fair_queueing_controlled_delay_usec, QDISC_KIND_FQ_CODEL, 0
+FairQueueingControlledDelay.IntervalSec, config_parse_fair_queueing_controlled_delay_usec, QDISC_KIND_FQ_CODEL, 0
+FairQueueingControlledDelay.CEThresholdSec, config_parse_fair_queueing_controlled_delay_usec, QDISC_KIND_FQ_CODEL, 0
+FairQueueingControlledDelay.ECN, config_parse_fair_queueing_controlled_delay_bool, QDISC_KIND_FQ_CODEL, 0
+FlowQueuePIE.Parent, config_parse_qdisc_parent, QDISC_KIND_FQ_PIE, 0
+FlowQueuePIE.Handle, config_parse_qdisc_handle, QDISC_KIND_FQ_PIE, 0
+FlowQueuePIE.PacketLimit, config_parse_fq_pie_packet_limit, QDISC_KIND_FQ_PIE, 0
+GenericRandomEarlyDetection.Parent, config_parse_qdisc_parent, QDISC_KIND_GRED, 0
+GenericRandomEarlyDetection.Handle, config_parse_qdisc_handle, QDISC_KIND_GRED, 0
+GenericRandomEarlyDetection.VirtualQueues, config_parse_generic_random_early_detection_u32, QDISC_KIND_GRED, 0
+GenericRandomEarlyDetection.DefaultVirtualQueue, config_parse_generic_random_early_detection_u32, QDISC_KIND_GRED, 0
+GenericRandomEarlyDetection.GenericRIO, config_parse_generic_random_early_detection_bool, QDISC_KIND_GRED, 0
+HeavyHitterFilter.Parent, config_parse_qdisc_parent, QDISC_KIND_HHF, 0
+HeavyHitterFilter.Handle, config_parse_qdisc_handle, QDISC_KIND_HHF, 0
+HeavyHitterFilter.PacketLimit, config_parse_heavy_hitter_filter_packet_limit, QDISC_KIND_HHF, 0
+HierarchyTokenBucket.Parent, config_parse_qdisc_parent, QDISC_KIND_HTB, 0
+HierarchyTokenBucket.Handle, config_parse_qdisc_handle, QDISC_KIND_HTB, 0
+HierarchyTokenBucket.DefaultClass, config_parse_hierarchy_token_bucket_default_class, QDISC_KIND_HTB, 0
+HierarchyTokenBucket.RateToQuantum, config_parse_hierarchy_token_bucket_u32, QDISC_KIND_HTB, 0
+HierarchyTokenBucketClass.Parent, config_parse_tclass_parent, TCLASS_KIND_HTB, 0
+HierarchyTokenBucketClass.ClassId, config_parse_tclass_classid, TCLASS_KIND_HTB, 0
+HierarchyTokenBucketClass.Priority, config_parse_hierarchy_token_bucket_class_u32, TCLASS_KIND_HTB, 0
+HierarchyTokenBucketClass.QuantumBytes, config_parse_hierarchy_token_bucket_class_size, TCLASS_KIND_HTB, 0
+HierarchyTokenBucketClass.MTUBytes, config_parse_hierarchy_token_bucket_class_size, TCLASS_KIND_HTB, 0
+HierarchyTokenBucketClass.OverheadBytes, config_parse_hierarchy_token_bucket_class_size, TCLASS_KIND_HTB, 0
+HierarchyTokenBucketClass.Rate, config_parse_hierarchy_token_bucket_class_rate, TCLASS_KIND_HTB, 0
+HierarchyTokenBucketClass.CeilRate, config_parse_hierarchy_token_bucket_class_rate, TCLASS_KIND_HTB, 0
+HierarchyTokenBucketClass.BufferBytes, config_parse_hierarchy_token_bucket_class_size, TCLASS_KIND_HTB, 0
+HierarchyTokenBucketClass.CeilBufferBytes, config_parse_hierarchy_token_bucket_class_size, TCLASS_KIND_HTB, 0
+NetworkEmulator.Parent, config_parse_qdisc_parent, QDISC_KIND_NETEM, 0
+NetworkEmulator.Handle, config_parse_qdisc_handle, QDISC_KIND_NETEM, 0
+NetworkEmulator.DelaySec, config_parse_network_emulator_delay, QDISC_KIND_NETEM, 0
+NetworkEmulator.DelayJitterSec, config_parse_network_emulator_delay, QDISC_KIND_NETEM, 0
+NetworkEmulator.LossRate, config_parse_network_emulator_rate, QDISC_KIND_NETEM, 0
+NetworkEmulator.DuplicateRate, config_parse_network_emulator_rate, QDISC_KIND_NETEM, 0
+NetworkEmulator.PacketLimit, config_parse_network_emulator_packet_limit, QDISC_KIND_NETEM, 0
+PIE.Parent, config_parse_qdisc_parent, QDISC_KIND_PIE, 0
+PIE.Handle, config_parse_qdisc_handle, QDISC_KIND_PIE, 0
+PIE.PacketLimit, config_parse_pie_packet_limit, QDISC_KIND_PIE, 0
+StochasticFairBlue.Parent, config_parse_qdisc_parent, QDISC_KIND_SFB, 0
+StochasticFairBlue.Handle, config_parse_qdisc_handle, QDISC_KIND_SFB, 0
+StochasticFairBlue.PacketLimit, config_parse_stochastic_fair_blue_u32, QDISC_KIND_SFB, 0
+StochasticFairnessQueueing.Parent, config_parse_qdisc_parent, QDISC_KIND_SFQ, 0
+StochasticFairnessQueueing.Handle, config_parse_qdisc_handle, QDISC_KIND_SFQ, 0
+StochasticFairnessQueueing.PerturbPeriodSec, config_parse_stochastic_fairness_queueing_perturb_period, QDISC_KIND_SFQ, 0
+TokenBucketFilter.Parent, config_parse_qdisc_parent, QDISC_KIND_TBF, 0
+TokenBucketFilter.Handle, config_parse_qdisc_handle, QDISC_KIND_TBF, 0
+TokenBucketFilter.Rate, config_parse_token_bucket_filter_rate, QDISC_KIND_TBF, 0
+TokenBucketFilter.BurstBytes, config_parse_token_bucket_filter_size, QDISC_KIND_TBF, 0
+TokenBucketFilter.LimitBytes, config_parse_token_bucket_filter_size, QDISC_KIND_TBF, 0
+TokenBucketFilter.MTUBytes, config_parse_token_bucket_filter_size, QDISC_KIND_TBF, 0
+TokenBucketFilter.MPUBytes, config_parse_token_bucket_filter_size, QDISC_KIND_TBF, 0
+TokenBucketFilter.PeakRate, config_parse_token_bucket_filter_rate, QDISC_KIND_TBF, 0
+TokenBucketFilter.LatencySec, config_parse_token_bucket_filter_latency, QDISC_KIND_TBF, 0
+TrivialLinkEqualizer.Parent, config_parse_qdisc_parent, QDISC_KIND_TEQL, 0
+TrivialLinkEqualizer.Handle, config_parse_qdisc_handle, QDISC_KIND_TEQL, 0
+TrivialLinkEqualizer.Id, config_parse_trivial_link_equalizer_id, QDISC_KIND_TEQL, 0
+/* backwards compatibility: do not add new entries to this section */
+Network.IPv4LL, config_parse_ipv4ll, 0, offsetof(Network, link_local)
+Network.IPv6PrefixDelegation, config_parse_router_prefix_delegation, 0, offsetof(Network, router_prefix_delegation)
+IPv6PrefixDelegation.RouterLifetimeSec, config_parse_sec, 0, offsetof(Network, router_lifetime_usec)
+IPv6PrefixDelegation.Managed, config_parse_bool, 0, offsetof(Network, router_managed)
+IPv6PrefixDelegation.OtherInformation, config_parse_bool, 0, offsetof(Network, router_other_information)
+IPv6PrefixDelegation.RouterPreference, config_parse_router_preference, 0, 0
+IPv6PrefixDelegation.EmitDNS, config_parse_bool, 0, offsetof(Network, router_emit_dns)
+IPv6PrefixDelegation.DNS, config_parse_radv_dns, 0, 0
+IPv6PrefixDelegation.EmitDomains, config_parse_bool, 0, offsetof(Network, router_emit_domains)
+IPv6PrefixDelegation.Domains, config_parse_radv_search_domains, 0, 0
+IPv6PrefixDelegation.DNSLifetimeSec, config_parse_sec, 0, offsetof(Network, router_dns_lifetime_usec)
+DHCPv4.BlackList, config_parse_dhcp_acl_ip_address, 0, 0
+DHCP.ClientIdentifier, config_parse_dhcp_client_identifier, 0, offsetof(Network, dhcp_client_identifier)
+DHCP.UseDNS, config_parse_dhcp_use_dns, 0, 0
+DHCP.UseNTP, config_parse_dhcp_use_ntp, 0, 0
+DHCP.UseMTU, config_parse_bool, 0, offsetof(Network, dhcp_use_mtu)
+DHCP.UseHostname, config_parse_bool, 0, offsetof(Network, dhcp_use_hostname)
+DHCP.UseDomains, config_parse_dhcp_use_domains, 0, offsetof(Network, dhcp_use_domains)
+DHCP.UseDomainName, config_parse_dhcp_use_domains, 0, offsetof(Network, dhcp_use_domains)
+DHCP.UseRoutes, config_parse_bool, 0, offsetof(Network, dhcp_use_routes)
+DHCP.Anonymize, config_parse_bool, 0, offsetof(Network, dhcp_anonymize)
+DHCP.SendHostname, config_parse_bool, 0, offsetof(Network, dhcp_send_hostname)
+DHCP.Hostname, config_parse_hostname, 0, offsetof(Network, dhcp_hostname)
+DHCP.RequestBroadcast, config_parse_bool, 0, offsetof(Network, dhcp_broadcast)
+DHCP.CriticalConnection, config_parse_tristate, 0, offsetof(Network, dhcp_critical)
+DHCP.VendorClassIdentifier, config_parse_string, 0, offsetof(Network, dhcp_vendor_class_identifier)
+DHCP.UserClass, config_parse_dhcp_user_class, AF_INET, offsetof(Network, dhcp_user_class)
+DHCP.DUIDType, config_parse_duid_type, 0, offsetof(Network, duid)
+DHCP.DUIDRawData, config_parse_duid_rawdata, 0, offsetof(Network, duid)
+DHCP.RouteMetric, config_parse_dhcp_route_metric, 0, 0
+DHCP.RouteTable, config_parse_section_route_table, 0, 0
+DHCP.UseTimezone, config_parse_bool, 0, offsetof(Network, dhcp_use_timezone)
+DHCP.IAID, config_parse_iaid, 0, 0
+DHCP.ListenPort, config_parse_uint16, 0, offsetof(Network, dhcp_client_port)
+DHCP.RapidCommit, config_parse_bool, 0, offsetof(Network, dhcp6_rapid_commit)
+DHCP.ForceDHCPv6PDOtherInformation, config_parse_bool, 0, offsetof(Network, dhcp6_force_pd_other_information)
+DHCPv4.UseDomainName, config_parse_dhcp_use_domains, 0, offsetof(Network, dhcp_use_domains)
+DHCPv4.CriticalConnection, config_parse_tristate, 0, offsetof(Network, dhcp_critical)
+TrafficControlQueueingDiscipline.Parent, config_parse_qdisc_parent, _QDISC_KIND_INVALID, 0
+TrafficControlQueueingDiscipline.NetworkEmulatorDelaySec, config_parse_network_emulator_delay, 0, 0
+TrafficControlQueueingDiscipline.NetworkEmulatorDelayJitterSec, config_parse_network_emulator_delay, 0, 0
+TrafficControlQueueingDiscipline.NetworkEmulatorLossRate, config_parse_network_emulator_rate, 0, 0
+TrafficControlQueueingDiscipline.NetworkEmulatorDuplicateRate, config_parse_network_emulator_rate, 0, 0
+TrafficControlQueueingDiscipline.NetworkEmulatorPacketLimit, config_parse_network_emulator_packet_limit, 0, 0
+FairQueueing.Quantum, config_parse_fair_queueing_size, QDISC_KIND_FQ, 0
+FairQueueing.InitialQuantum, config_parse_fair_queueing_size, QDISC_KIND_FQ, 0
+FairQueueingControlledDelay.MemoryLimit, config_parse_fair_queueing_controlled_delay_size, QDISC_KIND_FQ_CODEL, 0
+FairQueueingControlledDelay.Quantum, config_parse_fair_queueing_controlled_delay_size, QDISC_KIND_FQ_CODEL, 0
+TokenBucketFilter.Burst, config_parse_token_bucket_filter_size, QDISC_KIND_TBF, 0
+TokenBucketFilter.LimitSize, config_parse_token_bucket_filter_size, QDISC_KIND_TBF, 0
diff --git a/src/network/networkd-network.c b/src/network/networkd-network.c
new file mode 100644
index 0000000..3254641
--- /dev/null
+++ b/src/network/networkd-network.c
@@ -0,0 +1,1238 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+#include <netinet/in.h>
+#include <linux/netdevice.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "conf-parser.h"
+#include "dns-domain.h"
+#include "fd-util.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
+#include "networkd-dhcp-server.h"
+#include "network-internal.h"
+#include "networkd-address-label.h"
+#include "networkd-address.h"
+#include "networkd-dhcp-common.h"
+#include "networkd-fdb.h"
+#include "networkd-manager.h"
+#include "networkd-mdb.h"
+#include "networkd-ndisc.h"
+#include "networkd-neighbor.h"
+#include "networkd-network.h"
+#include "networkd-nexthop.h"
+#include "networkd-radv.h"
+#include "networkd-routing-policy-rule.h"
+#include "networkd-sriov.h"
+#include "parse-util.h"
+#include "path-lookup.h"
+#include "set.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tc.h"
+#include "util.h"
+
+/* Let's assume that anything above this number is a user misconfiguration. */
+#define MAX_NTP_SERVERS 128
+
+/* Set defaults following RFC7844 */
+void network_apply_anonymize_if_set(Network *network) {
+ if (!network->dhcp_anonymize)
+ return;
+ /* RFC7844 3.7
+ SHOULD NOT send the Host Name option */
+ network->dhcp_send_hostname = false;
+ /* RFC7844 section 3.:
+ MAY contain the Client Identifier option
+ Section 3.5:
+ clients MUST use client identifiers based solely
+ on the link-layer address */
+ /* NOTE: Using MAC, as it does not reveal extra information,
+ * and some servers might not answer if this option is not sent */
+ network->dhcp_client_identifier = DHCP_CLIENT_ID_MAC;
+ /* RFC 7844 3.10:
+ SHOULD NOT use the Vendor Class Identifier option */
+ network->dhcp_vendor_class_identifier = mfree(network->dhcp_vendor_class_identifier);
+ /* RFC7844 section 3.6.:
+ The client intending to protect its privacy SHOULD only request a
+ minimal number of options in the PRL and SHOULD also randomly shuffle
+ the ordering of option codes in the PRL. If this random ordering
+ cannot be implemented, the client MAY order the option codes in the
+ PRL by option code number (lowest to highest).
+ */
+ /* NOTE: dhcp_use_mtu is false by default,
+ * though it was not initiallized to any value in network_load_one.
+ * Maybe there should be another var called *send*?
+ * (to use the MTU sent by the server but to do not send
+ * the option in the PRL). */
+ network->dhcp_use_mtu = false;
+ /* NOTE: when Anonymize=yes, the PRL route options are sent by default,
+ * but this is needed to use them. */
+ network->dhcp_use_routes = true;
+ /* RFC7844 section 3.6.
+ * same comments as previous option */
+ network->dhcp_use_timezone = false;
+}
+
+static int network_resolve_netdev_one(Network *network, const char *name, NetDevKind kind, NetDev **ret_netdev) {
+ const char *kind_string;
+ NetDev *netdev;
+ int r;
+
+ /* For test-networkd-conf, the check must be earlier than the assertions. */
+ if (!name)
+ return 0;
+
+ assert(network);
+ assert(network->manager);
+ assert(network->filename);
+ assert(ret_netdev);
+
+ if (kind == _NETDEV_KIND_TUNNEL)
+ kind_string = "tunnel";
+ else {
+ kind_string = netdev_kind_to_string(kind);
+ if (!kind_string)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: Invalid NetDev kind of %s, ignoring assignment.",
+ network->filename, name);
+ }
+
+ r = netdev_get(network->manager, name, &netdev);
+ if (r < 0)
+ return log_error_errno(r, "%s: %s NetDev could not be found, ignoring assignment.",
+ network->filename, name);
+
+ if (netdev->kind != kind && !(kind == _NETDEV_KIND_TUNNEL &&
+ IN_SET(netdev->kind,
+ NETDEV_KIND_IPIP,
+ NETDEV_KIND_SIT,
+ NETDEV_KIND_GRE,
+ NETDEV_KIND_GRETAP,
+ NETDEV_KIND_IP6GRE,
+ NETDEV_KIND_IP6GRETAP,
+ NETDEV_KIND_VTI,
+ NETDEV_KIND_VTI6,
+ NETDEV_KIND_IP6TNL,
+ NETDEV_KIND_ERSPAN)))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: NetDev %s is not a %s, ignoring assignment",
+ network->filename, name, kind_string);
+
+ *ret_netdev = netdev_ref(netdev);
+ return 1;
+}
+
+static int network_resolve_stacked_netdevs(Network *network) {
+ void *name, *kind;
+ int r;
+
+ assert(network);
+
+ HASHMAP_FOREACH_KEY(kind, name, network->stacked_netdev_names) {
+ _cleanup_(netdev_unrefp) NetDev *netdev = NULL;
+
+ r = network_resolve_netdev_one(network, name, PTR_TO_INT(kind), &netdev);
+ if (r <= 0)
+ continue;
+
+ r = hashmap_ensure_allocated(&network->stacked_netdevs, &string_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = hashmap_put(network->stacked_netdevs, netdev->ifname, netdev);
+ if (r < 0)
+ return log_error_errno(r, "%s: Failed to add NetDev '%s' to network: %m",
+ network->filename, (const char *) name);
+
+ netdev = NULL;
+ }
+
+ return 0;
+}
+
+int network_verify(Network *network) {
+ assert(network);
+ assert(network->filename);
+
+ if (set_isempty(network->match_mac) && set_isempty(network->match_permanent_mac) &&
+ strv_isempty(network->match_path) && strv_isempty(network->match_driver) &&
+ strv_isempty(network->match_type) && strv_isempty(network->match_name) &&
+ strv_isempty(network->match_property) && strv_isempty(network->match_wlan_iftype) &&
+ strv_isempty(network->match_ssid) && !network->conditions)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: No valid settings found in the [Match] section, ignoring file. "
+ "To match all interfaces, add Name=* in the [Match] section.",
+ network->filename);
+
+ /* skip out early if configuration does not match the environment */
+ if (!condition_test_list(network->conditions, environ, NULL, NULL, NULL))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: Conditions in the file do not match the system environment, skipping.",
+ network->filename);
+
+ (void) network_resolve_netdev_one(network, network->bond_name, NETDEV_KIND_BOND, &network->bond);
+ (void) network_resolve_netdev_one(network, network->bridge_name, NETDEV_KIND_BRIDGE, &network->bridge);
+ (void) network_resolve_netdev_one(network, network->vrf_name, NETDEV_KIND_VRF, &network->vrf);
+ (void) network_resolve_stacked_netdevs(network);
+
+ /* Free unnecessary entries. */
+ network->bond_name = mfree(network->bond_name);
+ network->bridge_name = mfree(network->bridge_name);
+ network->vrf_name = mfree(network->vrf_name);
+ network->stacked_netdev_names = hashmap_free_free_key(network->stacked_netdev_names);
+
+ if (network->bond) {
+ /* Bonding slave does not support addressing. */
+ if (network->link_local >= 0 && network->link_local != ADDRESS_FAMILY_NO) {
+ log_warning("%s: Cannot enable LinkLocalAddressing= when Bond= is specified, disabling LinkLocalAddressing=.",
+ network->filename);
+ network->link_local = ADDRESS_FAMILY_NO;
+ }
+ if (network->dhcp_server) {
+ log_warning("%s: Cannot enable DHCPServer= when Bond= is specified, disabling DHCPServer=.",
+ network->filename);
+ network->dhcp_server = false;
+ }
+ if (!ordered_hashmap_isempty(network->addresses_by_section))
+ log_warning("%s: Cannot set addresses when Bond= is specified, ignoring addresses.",
+ network->filename);
+ if (!hashmap_isempty(network->routes_by_section))
+ log_warning("%s: Cannot set routes when Bond= is specified, ignoring routes.",
+ network->filename);
+
+ network->addresses_by_section = ordered_hashmap_free_with_destructor(network->addresses_by_section, address_free);
+ network->routes_by_section = hashmap_free_with_destructor(network->routes_by_section, route_free);
+ }
+
+ if (network->link_local < 0)
+ network->link_local = network->bridge ? ADDRESS_FAMILY_NO : ADDRESS_FAMILY_IPV6;
+
+ if (FLAGS_SET(network->link_local, ADDRESS_FAMILY_FALLBACK_IPV4) &&
+ !FLAGS_SET(network->dhcp, ADDRESS_FAMILY_IPV4)) {
+ log_warning("%s: fallback assignment of IPv4 link local address is enabled but DHCPv4 is disabled. "
+ "Disabling the fallback assignment.", network->filename);
+ SET_FLAG(network->link_local, ADDRESS_FAMILY_FALLBACK_IPV4, false);
+ }
+
+ /* IPMasquerade=yes implies IPForward=yes */
+ if (network->ip_masquerade)
+ network->ip_forward |= ADDRESS_FAMILY_IPV4;
+
+ network_adjust_ipv6_accept_ra(network);
+ network_adjust_dhcp(network);
+ network_adjust_radv(network);
+
+ if (network->mtu > 0 && network->dhcp_use_mtu) {
+ log_warning("%s: MTUBytes= in [Link] section and UseMTU= in [DHCP] section are set. "
+ "Disabling UseMTU=.", network->filename);
+ network->dhcp_use_mtu = false;
+ }
+
+ if (network->dhcp_use_gateway < 0)
+ network->dhcp_use_gateway = network->dhcp_use_routes;
+
+ if (network->ignore_carrier_loss < 0)
+ network->ignore_carrier_loss = network->configure_without_carrier;
+
+ if (network->dhcp_critical >= 0) {
+ if (network->keep_configuration >= 0)
+ log_warning("%s: Both KeepConfiguration= and deprecated CriticalConnection= are set. "
+ "Ignoring CriticalConnection=.", network->filename);
+ else if (network->dhcp_critical)
+ /* CriticalConnection=yes also preserve foreign static configurations. */
+ network->keep_configuration = KEEP_CONFIGURATION_YES;
+ else
+ network->keep_configuration = KEEP_CONFIGURATION_NO;
+ }
+
+ if (network->keep_configuration < 0)
+ network->keep_configuration = KEEP_CONFIGURATION_NO;
+
+ if (network->ipv6_proxy_ndp == 0 && !set_isempty(network->ipv6_proxy_ndp_addresses)) {
+ log_warning("%s: IPv6ProxyNDP= is disabled. Ignoring IPv6ProxyNDPAddress=.", network->filename);
+ network->ipv6_proxy_ndp_addresses = set_free_free(network->ipv6_proxy_ndp_addresses);
+ }
+
+ network_drop_invalid_addresses(network);
+ network_drop_invalid_routes(network);
+ network_drop_invalid_nexthops(network);
+ network_drop_invalid_fdb_entries(network);
+ network_drop_invalid_mdb_entries(network);
+ network_drop_invalid_neighbors(network);
+ network_drop_invalid_address_labels(network);
+ network_drop_invalid_prefixes(network);
+ network_drop_invalid_route_prefixes(network);
+ network_drop_invalid_routing_policy_rules(network);
+ network_drop_invalid_traffic_control(network);
+ network_drop_invalid_sr_iov(network);
+
+ return 0;
+}
+
+int network_load_one(Manager *manager, OrderedHashmap **networks, const char *filename) {
+ _cleanup_free_ char *fname = NULL, *name = NULL;
+ _cleanup_(network_unrefp) Network *network = NULL;
+ _cleanup_fclose_ FILE *file = NULL;
+ const char *dropin_dirname;
+ char *d;
+ int r;
+
+ assert(manager);
+ assert(filename);
+
+ file = fopen(filename, "re");
+ if (!file) {
+ if (errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+
+ if (null_or_empty_fd(fileno(file))) {
+ log_debug("Skipping empty file: %s", filename);
+ return 0;
+ }
+
+ fname = strdup(filename);
+ if (!fname)
+ return log_oom();
+
+ name = strdup(basename(filename));
+ if (!name)
+ return log_oom();
+
+ d = strrchr(name, '.');
+ if (!d)
+ return -EINVAL;
+
+ *d = '\0';
+
+ dropin_dirname = strjoina(name, ".network.d");
+
+ network = new(Network, 1);
+ if (!network)
+ return log_oom();
+
+ *network = (Network) {
+ .filename = TAKE_PTR(fname),
+ .name = TAKE_PTR(name),
+
+ .manager = manager,
+ .n_ref = 1,
+
+ .required_for_online = true,
+ .required_operstate_for_online = LINK_OPERSTATE_RANGE_DEFAULT,
+ .arp = -1,
+ .multicast = -1,
+ .allmulticast = -1,
+
+ .configure_without_carrier = false,
+ .ignore_carrier_loss = -1,
+ .keep_configuration = _KEEP_CONFIGURATION_INVALID,
+
+ .dhcp = ADDRESS_FAMILY_NO,
+ .duid.type = _DUID_TYPE_INVALID,
+ .dhcp_critical = -1,
+ .dhcp_use_ntp = true,
+ .dhcp_use_sip = true,
+ .dhcp_use_dns = true,
+ .dhcp_use_hostname = true,
+ .dhcp_use_routes = true,
+ .dhcp_use_gateway = -1,
+ /* NOTE: this var might be overwritten by network_apply_anonymize_if_set */
+ .dhcp_send_hostname = true,
+ .dhcp_send_release = true,
+ /* To enable/disable RFC7844 Anonymity Profiles */
+ .dhcp_anonymize = false,
+ .dhcp_route_metric = DHCP_ROUTE_METRIC,
+ /* NOTE: this var might be overwritten by network_apply_anonymize_if_set */
+ .dhcp_client_identifier = DHCP_CLIENT_ID_DUID,
+ .dhcp_route_table = RT_TABLE_MAIN,
+ .dhcp_route_table_set = false,
+ /* NOTE: from man: UseMTU=... Defaults to false*/
+ .dhcp_use_mtu = false,
+ /* NOTE: from man: UseTimezone=... Defaults to "no".*/
+ .dhcp_use_timezone = false,
+ .dhcp_ip_service_type = -1,
+
+ .dhcp6_rapid_commit = true,
+ .dhcp6_route_metric = DHCP_ROUTE_METRIC,
+ .dhcp6_use_ntp = true,
+ .dhcp6_use_dns = true,
+
+ .dhcp6_pd = -1,
+ .dhcp6_pd_announce = true,
+ .dhcp6_pd_assign = true,
+ .dhcp6_pd_subnet_id = -1,
+
+ .dhcp_server_emit[SD_DHCP_LEASE_DNS].emit = true,
+ .dhcp_server_emit[SD_DHCP_LEASE_NTP].emit = true,
+ .dhcp_server_emit[SD_DHCP_LEASE_SIP].emit = true,
+
+ .dhcp_server_emit_router = true,
+ .dhcp_server_emit_timezone = true,
+
+ .router_lifetime_usec = 30 * USEC_PER_MINUTE,
+ .router_emit_dns = true,
+ .router_emit_domains = true,
+
+ .use_bpdu = -1,
+ .hairpin = -1,
+ .fast_leave = -1,
+ .allow_port_to_be_root = -1,
+ .unicast_flood = -1,
+ .multicast_flood = -1,
+ .multicast_to_unicast = -1,
+ .neighbor_suppression = -1,
+ .learning = -1,
+ .bridge_proxy_arp = -1,
+ .bridge_proxy_arp_wifi = -1,
+ .priority = LINK_BRIDGE_PORT_PRIORITY_INVALID,
+ .multicast_router = _MULTICAST_ROUTER_INVALID,
+
+ .lldp_mode = LLDP_MODE_ROUTERS_ONLY,
+
+ .dns_default_route = -1,
+ .llmnr = RESOLVE_SUPPORT_YES,
+ .mdns = RESOLVE_SUPPORT_NO,
+ .dnssec_mode = _DNSSEC_MODE_INVALID,
+ .dns_over_tls_mode = _DNS_OVER_TLS_MODE_INVALID,
+
+ /* If LinkLocalAddressing= is not set, then set to ADDRESS_FAMILY_IPV6 later. */
+ .link_local = _ADDRESS_FAMILY_INVALID,
+ .ipv6ll_address_gen_mode = _IPV6_LINK_LOCAL_ADDRESS_GEN_MODE_INVALID,
+
+ .ipv4_accept_local = -1,
+ .ipv6_privacy_extensions = IPV6_PRIVACY_EXTENSIONS_NO,
+ .ipv6_accept_ra = -1,
+ .ipv6_dad_transmits = -1,
+ .ipv6_hop_limit = -1,
+ .ipv6_proxy_ndp = -1,
+ .proxy_arp = -1,
+
+ .ipv6_accept_ra_use_dns = true,
+ .ipv6_accept_ra_use_autonomous_prefix = true,
+ .ipv6_accept_ra_use_onlink_prefix = true,
+ .ipv6_accept_ra_route_table = RT_TABLE_MAIN,
+ .ipv6_accept_ra_route_table_set = false,
+ .ipv6_accept_ra_start_dhcp6_client = IPV6_ACCEPT_RA_START_DHCP6_CLIENT_YES,
+
+ .can_triple_sampling = -1,
+ .can_termination = -1,
+ .can_listen_only = -1,
+ .can_fd_mode = -1,
+ .can_non_iso = -1,
+ };
+
+ r = config_parse_many(
+ filename, NETWORK_DIRS, dropin_dirname,
+ "Match\0"
+ "Link\0"
+ "SR-IOV\0"
+ "Network\0"
+ "Address\0"
+ "Neighbor\0"
+ "IPv6AddressLabel\0"
+ "RoutingPolicyRule\0"
+ "Route\0"
+ "NextHop\0"
+ "DHCP\0" /* compat */
+ "DHCPv4\0"
+ "DHCPv6\0"
+ "DHCPv6PrefixDelegation\0"
+ "DHCPServer\0"
+ "IPv6AcceptRA\0"
+ "IPv6NDPProxyAddress\0"
+ "Bridge\0"
+ "BridgeFDB\0"
+ "BridgeMDB\0"
+ "BridgeVLAN\0"
+ "IPv6SendRA\0"
+ "IPv6PrefixDelegation\0"
+ "IPv6Prefix\0"
+ "IPv6RoutePrefix\0"
+ "LLDP\0"
+ "TrafficControlQueueingDiscipline\0"
+ "CAN\0"
+ "QDisc\0"
+ "BFIFO\0"
+ "CAKE\0"
+ "ControlledDelay\0"
+ "DeficitRoundRobinScheduler\0"
+ "DeficitRoundRobinSchedulerClass\0"
+ "EnhancedTransmissionSelection\0"
+ "FairQueueing\0"
+ "FairQueueingControlledDelay\0"
+ "FlowQueuePIE\0"
+ "GenericRandomEarlyDetection\0"
+ "HeavyHitterFilter\0"
+ "HierarchyTokenBucket\0"
+ "HierarchyTokenBucketClass\0"
+ "NetworkEmulator\0"
+ "PFIFO\0"
+ "PFIFOFast\0"
+ "PFIFOHeadDrop\0"
+ "PIE\0"
+ "QuickFairQueueing\0"
+ "QuickFairQueueingClass\0"
+ "StochasticFairBlue\0"
+ "StochasticFairnessQueueing\0"
+ "TokenBucketFilter\0"
+ "TrivialLinkEqualizer\0",
+ config_item_perf_lookup, network_network_gperf_lookup,
+ CONFIG_PARSE_WARN,
+ network,
+ &network->timestamp);
+ if (r < 0)
+ return r;
+
+ network_apply_anonymize_if_set(network);
+
+ r = network_add_ipv4ll_route(network);
+ if (r < 0)
+ log_warning_errno(r, "%s: Failed to add IPv4LL route, ignoring: %m", network->filename);
+
+ r = network_add_default_route_on_device(network);
+ if (r < 0)
+ log_warning_errno(r, "%s: Failed to add default route on device, ignoring: %m",
+ network->filename);
+
+ if (network_verify(network) < 0)
+ /* Ignore .network files that do not match the conditions. */
+ return 0;
+
+ r = ordered_hashmap_ensure_allocated(networks, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = ordered_hashmap_put(*networks, network->name, network);
+ if (r < 0)
+ return r;
+
+ network = NULL;
+ return 0;
+}
+
+int network_load(Manager *manager, OrderedHashmap **networks) {
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+ int r;
+
+ assert(manager);
+
+ ordered_hashmap_clear_with_destructor(*networks, network_unref);
+
+ r = conf_files_list_strv(&files, ".network", NULL, 0, NETWORK_DIRS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate network files: %m");
+
+ STRV_FOREACH(f, files) {
+ r = network_load_one(manager, networks, *f);
+ if (r < 0)
+ log_error_errno(r, "Failed to load %s, ignoring: %m", *f);
+ }
+
+ return 0;
+}
+
+int network_reload(Manager *manager) {
+ OrderedHashmap *new_networks = NULL;
+ Network *n, *old;
+ int r;
+
+ assert(manager);
+
+ r = network_load(manager, &new_networks);
+ if (r < 0)
+ goto failure;
+
+ ORDERED_HASHMAP_FOREACH(n, new_networks) {
+ r = network_get_by_name(manager, n->name, &old);
+ if (r < 0)
+ continue; /* The .network file is new. */
+
+ if (n->timestamp != old->timestamp)
+ continue; /* The .network file is modified. */
+
+ if (!streq(n->filename, old->filename))
+ continue;
+
+ r = ordered_hashmap_replace(new_networks, old->name, old);
+ if (r < 0)
+ goto failure;
+
+ network_ref(old);
+ network_unref(n);
+ }
+
+ ordered_hashmap_free_with_destructor(manager->networks, network_unref);
+ manager->networks = new_networks;
+
+ return 0;
+
+failure:
+ ordered_hashmap_free_with_destructor(new_networks, network_unref);
+
+ return r;
+}
+
+static Network *network_free(Network *network) {
+ if (!network)
+ return NULL;
+
+ free(network->filename);
+
+ set_free_free(network->match_mac);
+ set_free_free(network->match_permanent_mac);
+ strv_free(network->match_path);
+ strv_free(network->match_driver);
+ strv_free(network->match_type);
+ strv_free(network->match_name);
+ strv_free(network->match_property);
+ strv_free(network->match_wlan_iftype);
+ strv_free(network->match_ssid);
+ set_free_free(network->match_bssid);
+ condition_free_list(network->conditions);
+
+ free(network->description);
+ free(network->dhcp_vendor_class_identifier);
+ free(network->dhcp_mudurl);
+ strv_free(network->dhcp_user_class);
+ free(network->dhcp_hostname);
+ set_free(network->dhcp_deny_listed_ip);
+ set_free(network->dhcp_allow_listed_ip);
+ set_free(network->dhcp_request_options);
+ set_free(network->dhcp6_request_options);
+ free(network->mac);
+ free(network->dhcp6_mudurl);
+ strv_free(network->dhcp6_user_class);
+ strv_free(network->dhcp6_vendor_class);
+
+ strv_free(network->ntp);
+ for (unsigned i = 0; i < network->n_dns; i++)
+ in_addr_full_free(network->dns[i]);
+ free(network->dns);
+ ordered_set_free(network->search_domains);
+ ordered_set_free(network->route_domains);
+ strv_free(network->bind_carrier);
+
+ ordered_set_free(network->router_search_domains);
+ free(network->router_dns);
+ set_free_free(network->ndisc_deny_listed_prefix);
+
+ free(network->bridge_name);
+ free(network->bond_name);
+ free(network->vrf_name);
+ hashmap_free_free_key(network->stacked_netdev_names);
+ netdev_unref(network->bridge);
+ netdev_unref(network->bond);
+ netdev_unref(network->vrf);
+ hashmap_free_with_destructor(network->stacked_netdevs, netdev_unref);
+
+ set_free_free(network->ipv6_proxy_ndp_addresses);
+ ordered_hashmap_free_with_destructor(network->addresses_by_section, address_free);
+ hashmap_free_with_destructor(network->routes_by_section, route_free);
+ hashmap_free_with_destructor(network->nexthops_by_section, nexthop_free);
+ hashmap_free_with_destructor(network->fdb_entries_by_section, fdb_entry_free);
+ hashmap_free_with_destructor(network->mdb_entries_by_section, mdb_entry_free);
+ hashmap_free_with_destructor(network->neighbors_by_section, neighbor_free);
+ hashmap_free_with_destructor(network->address_labels_by_section, address_label_free);
+ hashmap_free_with_destructor(network->prefixes_by_section, prefix_free);
+ hashmap_free_with_destructor(network->route_prefixes_by_section, route_prefix_free);
+ hashmap_free_with_destructor(network->rules_by_section, routing_policy_rule_free);
+ ordered_hashmap_free_with_destructor(network->sr_iov_by_section, sr_iov_free);
+ ordered_hashmap_free_with_destructor(network->tc_by_section, traffic_control_free);
+
+ if (network->manager &&
+ network->manager->duids_requesting_uuid)
+ set_remove(network->manager->duids_requesting_uuid, &network->duid);
+
+ free(network->name);
+
+ free(network->dhcp_server_timezone);
+
+ for (sd_dhcp_lease_server_type t = 0; t < _SD_DHCP_LEASE_SERVER_TYPE_MAX; t++)
+ free(network->dhcp_server_emit[t].addresses);
+
+ set_free_free(network->dnssec_negative_trust_anchors);
+
+ free(network->lldp_mud);
+
+ ordered_hashmap_free(network->dhcp_client_send_options);
+ ordered_hashmap_free(network->dhcp_client_send_vendor_options);
+ ordered_hashmap_free(network->dhcp_server_send_options);
+ ordered_hashmap_free(network->dhcp_server_send_vendor_options);
+ ordered_set_free(network->ipv6_tokens);
+ ordered_hashmap_free(network->dhcp6_client_send_options);
+ ordered_hashmap_free(network->dhcp6_client_send_vendor_options);
+
+ return mfree(network);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(Network, network, network_free);
+
+int network_get_by_name(Manager *manager, const char *name, Network **ret) {
+ Network *network;
+
+ assert(manager);
+ assert(name);
+ assert(ret);
+
+ network = ordered_hashmap_get(manager->networks, name);
+ if (!network)
+ return -ENOENT;
+
+ *ret = network;
+
+ return 0;
+}
+
+int network_get(Manager *manager, unsigned short iftype, sd_device *device,
+ const char *ifname, char * const *alternative_names, const char *driver,
+ const struct ether_addr *mac, const struct ether_addr *permanent_mac,
+ enum nl80211_iftype wlan_iftype, const char *ssid, const struct ether_addr *bssid,
+ Network **ret) {
+ Network *network;
+
+ assert(manager);
+ assert(ret);
+
+ ORDERED_HASHMAP_FOREACH(network, manager->networks)
+ if (net_match_config(network->match_mac, network->match_permanent_mac,
+ network->match_path, network->match_driver,
+ network->match_type, network->match_name, network->match_property,
+ network->match_wlan_iftype, network->match_ssid, network->match_bssid,
+ device, mac, permanent_mac, driver, iftype,
+ ifname, alternative_names, wlan_iftype, ssid, bssid)) {
+ if (network->match_name && device) {
+ const char *attr;
+ uint8_t name_assign_type = NET_NAME_UNKNOWN;
+
+ if (sd_device_get_sysattr_value(device, "name_assign_type", &attr) >= 0)
+ (void) safe_atou8(attr, &name_assign_type);
+
+ if (name_assign_type == NET_NAME_ENUM)
+ log_warning("%s: found matching network '%s', based on potentially unpredictable ifname",
+ ifname, network->filename);
+ else
+ log_debug("%s: found matching network '%s'", ifname, network->filename);
+ } else
+ log_debug("%s: found matching network '%s'", ifname, network->filename);
+
+ *ret = network;
+ return 0;
+ }
+
+ *ret = NULL;
+
+ return -ENOENT;
+}
+
+int network_apply(Network *network, Link *link) {
+ assert(network);
+ assert(link);
+
+ link->network = network_ref(network);
+
+ if (network->n_dns > 0 ||
+ !strv_isempty(network->ntp) ||
+ !ordered_set_isempty(network->search_domains) ||
+ !ordered_set_isempty(network->route_domains))
+ link_dirty(link);
+
+ return 0;
+}
+
+bool network_has_static_ipv6_configurations(Network *network) {
+ Address *address;
+ Route *route;
+ FdbEntry *fdb;
+ MdbEntry *mdb;
+ Neighbor *neighbor;
+
+ assert(network);
+
+ ORDERED_HASHMAP_FOREACH(address, network->addresses_by_section)
+ if (address->family == AF_INET6)
+ return true;
+
+ HASHMAP_FOREACH(route, network->routes_by_section)
+ if (route->family == AF_INET6)
+ return true;
+
+ HASHMAP_FOREACH(fdb, network->fdb_entries_by_section)
+ if (fdb->family == AF_INET6)
+ return true;
+
+ HASHMAP_FOREACH(mdb, network->mdb_entries_by_section)
+ if (mdb->family == AF_INET6)
+ return true;
+
+ HASHMAP_FOREACH(neighbor, network->neighbors_by_section)
+ if (neighbor->family == AF_INET6)
+ return true;
+
+ if (!hashmap_isempty(network->address_labels_by_section))
+ return true;
+
+ if (!hashmap_isempty(network->prefixes_by_section))
+ return true;
+
+ if (!hashmap_isempty(network->route_prefixes_by_section))
+ return true;
+
+ return false;
+}
+
+int config_parse_stacked_netdev(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ _cleanup_free_ char *name = NULL;
+ NetDevKind kind = ltype;
+ Hashmap **h = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+ assert(IN_SET(kind,
+ NETDEV_KIND_VLAN, NETDEV_KIND_MACVLAN, NETDEV_KIND_MACVTAP,
+ NETDEV_KIND_IPVLAN, NETDEV_KIND_IPVTAP, NETDEV_KIND_VXLAN,
+ NETDEV_KIND_L2TP, NETDEV_KIND_MACSEC, _NETDEV_KIND_TUNNEL,
+ NETDEV_KIND_XFRM));
+
+ if (!ifname_valid(rvalue)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid netdev name in %s=, ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ name = strdup(rvalue);
+ if (!name)
+ return log_oom();
+
+ r = hashmap_ensure_allocated(h, &string_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = hashmap_put(*h, name, INT_TO_PTR(kind));
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Cannot add NetDev '%s' to network, ignoring assignment: %m", name);
+ else if (r == 0)
+ log_syntax(unit, LOG_DEBUG, filename, line, r,
+ "NetDev '%s' specified twice, ignoring.", name);
+ else
+ name = NULL;
+
+ return 0;
+}
+
+int config_parse_domains(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *n = data;
+ int r;
+
+ assert(n);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ n->search_domains = ordered_set_free(n->search_domains);
+ n->route_domains = ordered_set_free(n->route_domains);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *w = NULL, *normalized = NULL;
+ const char *domain;
+ bool is_route;
+
+ r = extract_first_word(&p, &w, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to extract search or route domain, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ is_route = w[0] == '~';
+ domain = is_route ? w + 1 : w;
+
+ if (dns_name_is_root(domain) || streq(domain, "*")) {
+ /* If the root domain appears as is, or the special token "*" is found, we'll
+ * consider this as routing domain, unconditionally. */
+ is_route = true;
+ domain = "."; /* make sure we don't allow empty strings, thus write the root
+ * domain as "." */
+ } else {
+ r = dns_name_normalize(domain, 0, &normalized);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "'%s' is not a valid domain name, ignoring.", domain);
+ continue;
+ }
+
+ domain = normalized;
+
+ if (is_localhost(domain)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "'localhost' domain may not be configured as search or route domain, ignoring assignment: %s",
+ domain);
+ continue;
+ }
+ }
+
+ OrderedSet **set = is_route ? &n->route_domains : &n->search_domains;
+ r = ordered_set_ensure_allocated(set, &string_hash_ops_free);
+ if (r < 0)
+ return log_oom();
+
+ r = ordered_set_put_strdup(*set, domain);
+ if (r < 0)
+ return log_oom();
+ }
+}
+
+int config_parse_hostname(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *hn = NULL;
+ char **hostname = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = config_parse_string(unit, filename, line, section, section_line, lvalue, ltype, rvalue, &hn, userdata);
+ if (r < 0)
+ return r;
+
+ if (!hostname_is_valid(hn, false)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Hostname is not valid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ r = dns_name_is_valid(hn);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to check validity of hostname '%s', ignoring assignment: %m", rvalue);
+ return 0;
+ }
+ if (r == 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Hostname is not a valid DNS domain name, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ return free_and_replace(*hostname, hn);
+}
+
+int config_parse_timezone(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *tz = NULL;
+ char **datap = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = config_parse_string(unit, filename, line, section, section_line, lvalue, ltype, rvalue, &tz, userdata);
+ if (r < 0)
+ return r;
+
+ if (!timezone_is_valid(tz, LOG_WARNING)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Timezone is not valid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ return free_and_replace(*datap, tz);
+}
+
+int config_parse_dns(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *n = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ for (unsigned i = 0; i < n->n_dns; i++)
+ in_addr_full_free(n->dns[i]);
+ n->dns = mfree(n->dns);
+ n->n_dns = 0;
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_(in_addr_full_freep) struct in_addr_full *dns = NULL;
+ _cleanup_free_ char *w = NULL;
+ struct in_addr_full **m;
+
+ r = extract_first_word(&p, &w, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ r = in_addr_full_new_from_string(w, &dns);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse dns server address, ignoring: %s", w);
+ continue;
+ }
+
+ if (IN_SET(dns->port, 53, 853))
+ dns->port = 0;
+
+ m = reallocarray(n->dns, n->n_dns + 1, sizeof(struct in_addr_full*));
+ if (!m)
+ return log_oom();
+
+ m[n->n_dns++] = TAKE_PTR(dns);
+ n->dns = m;
+ }
+}
+
+int config_parse_dnssec_negative_trust_anchors(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *n = data;
+ int r;
+
+ assert(n);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ n->dnssec_negative_trust_anchors = set_free_free(n->dnssec_negative_trust_anchors);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *w = NULL;
+
+ r = extract_first_word(&p, &w, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to extract negative trust anchor domain, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ r = dns_name_is_valid(w);
+ if (r <= 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "%s is not a valid domain name, ignoring.", w);
+ continue;
+ }
+
+ r = set_ensure_consume(&n->dnssec_negative_trust_anchors, &dns_name_hash_ops, TAKE_PTR(w));
+ if (r < 0)
+ return log_oom();
+ }
+}
+
+int config_parse_ntp(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char ***l = data;
+ int r;
+
+ assert(l);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ *l = strv_free(*l);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *w = NULL;
+
+ r = extract_first_word(&p, &w, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to extract NTP server name, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ r = dns_name_is_valid_or_address(w);
+ if (r <= 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "%s is not a valid domain name or IP address, ignoring.", w);
+ continue;
+ }
+
+ if (strv_length(*l) > MAX_NTP_SERVERS) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "More than %u NTP servers specified, ignoring \"%s\" and any subsequent entries.",
+ MAX_NTP_SERVERS, w);
+ return 0;
+ }
+
+ r = strv_consume(l, TAKE_PTR(w));
+ if (r < 0)
+ return log_oom();
+ }
+}
+
+int config_parse_required_for_online(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = data;
+ LinkOperationalStateRange range;
+ bool required = true;
+ int r;
+
+ if (isempty(rvalue)) {
+ network->required_for_online = true;
+ network->required_operstate_for_online = LINK_OPERSTATE_RANGE_DEFAULT;
+ return 0;
+ }
+
+ r = parse_operational_state_range(rvalue, &range);
+ if (r < 0) {
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse %s= setting, ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ required = r;
+ range = LINK_OPERSTATE_RANGE_DEFAULT;
+ }
+
+ network->required_for_online = required;
+ network->required_operstate_for_online = range;
+
+ return 0;
+}
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_keep_configuration, keep_configuration, KeepConfiguration,
+ "Failed to parse KeepConfiguration= setting");
+
+static const char* const keep_configuration_table[_KEEP_CONFIGURATION_MAX] = {
+ [KEEP_CONFIGURATION_NO] = "no",
+ [KEEP_CONFIGURATION_DHCP_ON_STOP] = "dhcp-on-stop",
+ [KEEP_CONFIGURATION_DHCP] = "dhcp",
+ [KEEP_CONFIGURATION_STATIC] = "static",
+ [KEEP_CONFIGURATION_YES] = "yes",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(keep_configuration, KeepConfiguration, KEEP_CONFIGURATION_YES);
+
+static const char* const ipv6_link_local_address_gen_mode_table[_IPV6_LINK_LOCAL_ADDRESS_GEN_MODE_MAX] = {
+ [IPV6_LINK_LOCAL_ADDRESSS_GEN_MODE_EUI64] = "eui64",
+ [IPV6_LINK_LOCAL_ADDRESSS_GEN_MODE_NONE] = "none",
+ [IPV6_LINK_LOCAL_ADDRESSS_GEN_MODE_STABLE_PRIVACY] = "stable-privacy",
+ [IPV6_LINK_LOCAL_ADDRESSS_GEN_MODE_RANDOM] = "random",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(ipv6_link_local_address_gen_mode, IPv6LinkLocalAddressGenMode);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_ipv6_link_local_address_gen_mode, ipv6_link_local_address_gen_mode, IPv6LinkLocalAddressGenMode, "Failed to parse IPv6 link local address generation mode");
diff --git a/src/network/networkd-network.h b/src/network/networkd-network.h
new file mode 100644
index 0000000..fd0fe05
--- /dev/null
+++ b/src/network/networkd-network.h
@@ -0,0 +1,340 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/nl80211.h>
+
+#include "sd-bus.h"
+#include "sd-device.h"
+
+#include "bridge.h"
+#include "condition.h"
+#include "conf-parser.h"
+#include "hashmap.h"
+#include "netdev.h"
+#include "networkd-brvlan.h"
+#include "networkd-dhcp-common.h"
+#include "networkd-dhcp4.h"
+#include "networkd-dhcp6.h"
+#include "networkd-dhcp-server.h"
+#include "networkd-lldp-rx.h"
+#include "networkd-lldp-tx.h"
+#include "networkd-ndisc.h"
+#include "networkd-radv.h"
+#include "networkd-sysctl.h"
+#include "networkd-util.h"
+#include "ordered-set.h"
+#include "resolve-util.h"
+#include "socket-netlink.h"
+
+typedef enum KeepConfiguration {
+ KEEP_CONFIGURATION_NO = 0,
+ KEEP_CONFIGURATION_DHCP_ON_START = 1 << 0,
+ KEEP_CONFIGURATION_DHCP_ON_STOP = 1 << 1,
+ KEEP_CONFIGURATION_DHCP = KEEP_CONFIGURATION_DHCP_ON_START | KEEP_CONFIGURATION_DHCP_ON_STOP,
+ KEEP_CONFIGURATION_STATIC = 1 << 2,
+ KEEP_CONFIGURATION_YES = KEEP_CONFIGURATION_DHCP | KEEP_CONFIGURATION_STATIC,
+ _KEEP_CONFIGURATION_MAX,
+ _KEEP_CONFIGURATION_INVALID = -1,
+} KeepConfiguration;
+
+typedef enum IPv6LinkLocalAddressGenMode {
+ IPV6_LINK_LOCAL_ADDRESSS_GEN_MODE_EUI64 = IN6_ADDR_GEN_MODE_EUI64,
+ IPV6_LINK_LOCAL_ADDRESSS_GEN_MODE_NONE = IN6_ADDR_GEN_MODE_NONE,
+ IPV6_LINK_LOCAL_ADDRESSS_GEN_MODE_STABLE_PRIVACY = IN6_ADDR_GEN_MODE_STABLE_PRIVACY,
+ IPV6_LINK_LOCAL_ADDRESSS_GEN_MODE_RANDOM = IN6_ADDR_GEN_MODE_RANDOM,
+ _IPV6_LINK_LOCAL_ADDRESS_GEN_MODE_MAX,
+ _IPV6_LINK_LOCAL_ADDRESS_GEN_MODE_INVALID = -1
+} IPv6LinkLocalAddressGenMode;
+
+typedef struct Manager Manager;
+
+typedef struct NetworkDHCPServerEmitAddress {
+ bool emit;
+ struct in_addr *addresses;
+ size_t n_addresses;
+} NetworkDHCPServerEmitAddress;
+
+struct Network {
+ Manager *manager;
+
+ unsigned n_ref;
+
+ char *name;
+ char *filename;
+ usec_t timestamp;
+ char *description;
+
+ /* [Match] section */
+ Set *match_mac;
+ Set *match_permanent_mac;
+ char **match_path;
+ char **match_driver;
+ char **match_type;
+ char **match_name;
+ char **match_property;
+ char **match_wlan_iftype;
+ char **match_ssid;
+ Set *match_bssid;
+ LIST_HEAD(Condition, conditions);
+
+ /* Master or stacked netdevs */
+ NetDev *bridge;
+ NetDev *bond;
+ NetDev *vrf;
+ NetDev *xfrm;
+ Hashmap *stacked_netdevs;
+ char *bridge_name;
+ char *bond_name;
+ char *vrf_name;
+ Hashmap *stacked_netdev_names;
+
+ /* [Link] section */
+ struct ether_addr *mac;
+ uint32_t mtu;
+ uint32_t group;
+ int arp;
+ int multicast;
+ int allmulticast;
+ bool unmanaged;
+ bool required_for_online; /* Is this network required to be considered online? */
+ LinkOperationalStateRange required_operstate_for_online;
+
+ /* misc settings */
+ bool configure_without_carrier;
+ int ignore_carrier_loss;
+ KeepConfiguration keep_configuration;
+ char **bind_carrier;
+ bool default_route_on_device;
+ bool ip_masquerade;
+
+ /* DHCP Client Support */
+ AddressFamily dhcp;
+ DHCPClientIdentifier dhcp_client_identifier;
+ DUID duid;
+ uint32_t iaid;
+ bool iaid_set;
+ char *dhcp_vendor_class_identifier;
+ char *dhcp_mudurl;
+ char **dhcp_user_class;
+ char *dhcp_hostname;
+ uint64_t dhcp_max_attempts;
+ uint32_t dhcp_route_metric;
+ bool dhcp_route_metric_set;
+ uint32_t dhcp_route_table;
+ uint32_t dhcp_fallback_lease_lifetime;
+ uint32_t dhcp_route_mtu;
+ uint16_t dhcp_client_port;
+ int dhcp_critical;
+ int dhcp_ip_service_type;
+ bool dhcp_anonymize;
+ bool dhcp_send_hostname;
+ bool dhcp_broadcast;
+ bool dhcp_use_dns;
+ bool dhcp_use_dns_set;
+ bool dhcp_routes_to_dns;
+ bool dhcp_use_ntp;
+ bool dhcp_use_ntp_set;
+ bool dhcp_use_sip;
+ bool dhcp_use_mtu;
+ bool dhcp_use_routes;
+ int dhcp_use_gateway;
+ bool dhcp_use_timezone;
+ bool dhcp_use_hostname;
+ bool dhcp_route_table_set;
+ bool dhcp_send_release;
+ bool dhcp_send_decline;
+ DHCPUseDomains dhcp_use_domains;
+ Set *dhcp_deny_listed_ip;
+ Set *dhcp_allow_listed_ip;
+ Set *dhcp_request_options;
+ OrderedHashmap *dhcp_client_send_options;
+ OrderedHashmap *dhcp_client_send_vendor_options;
+
+ /* DHCPv6 Client support*/
+ bool dhcp6_use_dns;
+ bool dhcp6_use_dns_set;
+ bool dhcp6_use_ntp;
+ bool dhcp6_use_ntp_set;
+ bool dhcp6_rapid_commit;
+ uint8_t dhcp6_pd_length;
+ uint32_t dhcp6_route_metric;
+ bool dhcp6_route_metric_set;
+ char *dhcp6_mudurl;
+ char **dhcp6_user_class;
+ char **dhcp6_vendor_class;
+ struct in6_addr dhcp6_pd_address;
+ DHCP6ClientStartMode dhcp6_without_ra;
+ OrderedHashmap *dhcp6_client_send_options;
+ OrderedHashmap *dhcp6_client_send_vendor_options;
+ Set *dhcp6_request_options;
+ /* Start DHCPv6 PD also when 'O' RA flag is set, see RFC 7084, WPD-4 */
+ bool dhcp6_force_pd_other_information;
+
+ /* DHCP Server Support */
+ bool dhcp_server;
+ NetworkDHCPServerEmitAddress dhcp_server_emit[_SD_DHCP_LEASE_SERVER_TYPE_MAX];
+ bool dhcp_server_emit_router;
+ bool dhcp_server_emit_timezone;
+ char *dhcp_server_timezone;
+ usec_t dhcp_server_default_lease_time_usec, dhcp_server_max_lease_time_usec;
+ uint32_t dhcp_server_pool_offset;
+ uint32_t dhcp_server_pool_size;
+ OrderedHashmap *dhcp_server_send_options;
+ OrderedHashmap *dhcp_server_send_vendor_options;
+
+ /* link local addressing support */
+ AddressFamily link_local;
+ IPv6LinkLocalAddressGenMode ipv6ll_address_gen_mode;
+ bool ipv4ll_route;
+
+ /* IPv6 RA support */
+ RADVPrefixDelegation router_prefix_delegation;
+ usec_t router_lifetime_usec;
+ uint8_t router_preference;
+ bool router_managed;
+ bool router_other_information;
+ bool router_emit_dns;
+ bool router_emit_domains;
+ usec_t router_dns_lifetime_usec;
+ struct in6_addr *router_dns;
+ unsigned n_router_dns;
+ OrderedSet *router_search_domains;
+
+ /* DHCPv6 Prefix Delegation support */
+ int dhcp6_pd;
+ bool dhcp6_pd_announce;
+ bool dhcp6_pd_assign;
+ int64_t dhcp6_pd_subnet_id;
+ union in_addr_union dhcp6_pd_token;
+
+ /* Bridge Support */
+ int use_bpdu;
+ int hairpin;
+ int fast_leave;
+ int allow_port_to_be_root;
+ int unicast_flood;
+ int multicast_flood;
+ int multicast_to_unicast;
+ int neighbor_suppression;
+ int learning;
+ int bridge_proxy_arp;
+ int bridge_proxy_arp_wifi;
+ uint32_t cost;
+ uint16_t priority;
+ MulticastRouter multicast_router;
+
+ /* Bridge VLAN */
+ bool use_br_vlan;
+ uint16_t pvid;
+ uint32_t br_vid_bitmap[BRIDGE_VLAN_BITMAP_LEN];
+ uint32_t br_untagged_bitmap[BRIDGE_VLAN_BITMAP_LEN];
+
+ /* CAN support */
+ uint32_t can_bitrate;
+ unsigned can_sample_point;
+ uint32_t can_data_bitrate;
+ unsigned can_data_sample_point;
+ usec_t can_restart_us;
+ int can_triple_sampling;
+ int can_termination;
+ int can_listen_only;
+ int can_fd_mode;
+ int can_non_iso;
+
+ /* sysctl settings */
+ AddressFamily ip_forward;
+ int ipv4_accept_local;
+ int ipv6_dad_transmits;
+ int ipv6_hop_limit;
+ int proxy_arp;
+ uint32_t ipv6_mtu;
+ IPv6PrivacyExtensions ipv6_privacy_extensions;
+ int ipv6_proxy_ndp;
+ Set *ipv6_proxy_ndp_addresses;
+
+ /* IPv6 accept RA */
+ int ipv6_accept_ra;
+ bool ipv6_accept_ra_use_dns;
+ bool ipv6_accept_ra_use_autonomous_prefix;
+ bool ipv6_accept_ra_use_onlink_prefix;
+ bool active_slave;
+ bool primary_slave;
+ bool ipv6_accept_ra_route_table_set;
+ DHCPUseDomains ipv6_accept_ra_use_domains;
+ IPv6AcceptRAStartDHCP6Client ipv6_accept_ra_start_dhcp6_client;
+ uint32_t ipv6_accept_ra_route_table;
+ Set *ndisc_deny_listed_prefix;
+ OrderedSet *ipv6_tokens;
+
+ /* LLDP support */
+ LLDPMode lldp_mode; /* LLDP reception */
+ LLDPEmit lldp_emit; /* LLDP transmission */
+ char *lldp_mud; /* LLDP MUD URL */
+
+ OrderedHashmap *addresses_by_section;
+ Hashmap *routes_by_section;
+ Hashmap *nexthops_by_section;
+ Hashmap *fdb_entries_by_section;
+ Hashmap *mdb_entries_by_section;
+ Hashmap *neighbors_by_section;
+ Hashmap *address_labels_by_section;
+ Hashmap *prefixes_by_section;
+ Hashmap *route_prefixes_by_section;
+ Hashmap *rules_by_section;
+ OrderedHashmap *tc_by_section;
+ OrderedHashmap *sr_iov_by_section;
+
+ /* All kinds of DNS configuration */
+ struct in_addr_full **dns;
+ unsigned n_dns;
+ OrderedSet *search_domains, *route_domains;
+ int dns_default_route;
+ ResolveSupport llmnr;
+ ResolveSupport mdns;
+ DnssecMode dnssec_mode;
+ DnsOverTlsMode dns_over_tls_mode;
+ Set *dnssec_negative_trust_anchors;
+
+ /* NTP */
+ char **ntp;
+};
+
+Network *network_ref(Network *network);
+Network *network_unref(Network *network);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Network*, network_unref);
+
+int network_load(Manager *manager, OrderedHashmap **networks);
+int network_reload(Manager *manager);
+int network_load_one(Manager *manager, OrderedHashmap **networks, const char *filename);
+int network_verify(Network *network);
+
+int network_get_by_name(Manager *manager, const char *name, Network **ret);
+int network_get(Manager *manager, unsigned short iftype, sd_device *device,
+ const char *ifname, char * const *alternative_names, const char *driver,
+ const struct ether_addr *mac, const struct ether_addr *permanent_mac,
+ enum nl80211_iftype wlan_iftype, const char *ssid, const struct ether_addr *bssid,
+ Network **ret);
+int network_apply(Network *network, Link *link);
+void network_apply_anonymize_if_set(Network *network);
+
+bool network_has_static_ipv6_configurations(Network *network);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_stacked_netdev);
+CONFIG_PARSER_PROTOTYPE(config_parse_tunnel);
+CONFIG_PARSER_PROTOTYPE(config_parse_domains);
+CONFIG_PARSER_PROTOTYPE(config_parse_dns);
+CONFIG_PARSER_PROTOTYPE(config_parse_hostname);
+CONFIG_PARSER_PROTOTYPE(config_parse_timezone);
+CONFIG_PARSER_PROTOTYPE(config_parse_dnssec_negative_trust_anchors);
+CONFIG_PARSER_PROTOTYPE(config_parse_ntp);
+CONFIG_PARSER_PROTOTYPE(config_parse_required_for_online);
+CONFIG_PARSER_PROTOTYPE(config_parse_keep_configuration);
+CONFIG_PARSER_PROTOTYPE(config_parse_ipv6_link_local_address_gen_mode);
+
+const struct ConfigPerfItem* network_network_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+
+const char* keep_configuration_to_string(KeepConfiguration i) _const_;
+KeepConfiguration keep_configuration_from_string(const char *s) _pure_;
+
+const char* ipv6_link_local_address_gen_mode_to_string(IPv6LinkLocalAddressGenMode s) _const_;
+IPv6LinkLocalAddressGenMode ipv6_link_local_address_gen_mode_from_string(const char *s) _pure_;
diff --git a/src/network/networkd-nexthop.c b/src/network/networkd-nexthop.c
new file mode 100644
index 0000000..4a09b4c
--- /dev/null
+++ b/src/network/networkd-nexthop.c
@@ -0,0 +1,534 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc.
+ */
+
+#include <linux/nexthop.h>
+
+#include "alloc-util.h"
+#include "netlink-util.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "networkd-network.h"
+#include "networkd-nexthop.h"
+#include "parse-util.h"
+#include "set.h"
+#include "string-util.h"
+
+NextHop *nexthop_free(NextHop *nexthop) {
+ if (!nexthop)
+ return NULL;
+
+ if (nexthop->network) {
+ assert(nexthop->section);
+ hashmap_remove(nexthop->network->nexthops_by_section, nexthop->section);
+ }
+
+ network_config_section_free(nexthop->section);
+
+ if (nexthop->link) {
+ set_remove(nexthop->link->nexthops, nexthop);
+ set_remove(nexthop->link->nexthops_foreign, nexthop);
+ }
+
+ return mfree(nexthop);
+}
+
+DEFINE_NETWORK_SECTION_FUNCTIONS(NextHop, nexthop_free);
+
+static int nexthop_new(NextHop **ret) {
+ _cleanup_(nexthop_freep) NextHop *nexthop = NULL;
+
+ nexthop = new(NextHop, 1);
+ if (!nexthop)
+ return -ENOMEM;
+
+ *nexthop = (NextHop) {
+ .family = AF_UNSPEC,
+ };
+
+ *ret = TAKE_PTR(nexthop);
+
+ return 0;
+}
+
+static int nexthop_new_static(Network *network, const char *filename, unsigned section_line, NextHop **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(nexthop_freep) NextHop *nexthop = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(filename);
+ assert(section_line > 0);
+
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ nexthop = hashmap_get(network->nexthops_by_section, n);
+ if (nexthop) {
+ *ret = TAKE_PTR(nexthop);
+ return 0;
+ }
+
+ r = nexthop_new(&nexthop);
+ if (r < 0)
+ return r;
+
+ nexthop->protocol = RTPROT_STATIC;
+ nexthop->network = network;
+ nexthop->section = TAKE_PTR(n);
+
+ r = hashmap_ensure_allocated(&network->nexthops_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(network->nexthops_by_section, nexthop->section, nexthop);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(nexthop);
+ return 0;
+}
+
+static void nexthop_hash_func(const NextHop *nexthop, struct siphash *state) {
+ assert(nexthop);
+
+ siphash24_compress(&nexthop->id, sizeof(nexthop->id), state);
+ siphash24_compress(&nexthop->family, sizeof(nexthop->family), state);
+
+ switch (nexthop->family) {
+ case AF_INET:
+ case AF_INET6:
+ siphash24_compress(&nexthop->gw, FAMILY_ADDRESS_SIZE(nexthop->family), state);
+
+ break;
+ default:
+ /* treat any other address family as AF_UNSPEC */
+ break;
+ }
+}
+
+static int nexthop_compare_func(const NextHop *a, const NextHop *b) {
+ int r;
+
+ r = CMP(a->id, b->id);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->family, b->family);
+ if (r != 0)
+ return r;
+
+ if (IN_SET(a->family, AF_INET, AF_INET6))
+ return memcmp(&a->gw, &b->gw, FAMILY_ADDRESS_SIZE(a->family));
+
+ return 0;
+}
+
+DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(
+ nexthop_hash_ops,
+ NextHop,
+ nexthop_hash_func,
+ nexthop_compare_func,
+ nexthop_free);
+
+static int nexthop_get(Link *link, NextHop *in, NextHop **ret) {
+ NextHop *existing;
+
+ assert(link);
+ assert(in);
+
+ existing = set_get(link->nexthops, in);
+ if (existing) {
+ if (ret)
+ *ret = existing;
+ return 1;
+ }
+
+ existing = set_get(link->nexthops_foreign, in);
+ if (existing) {
+ if (ret)
+ *ret = existing;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static int nexthop_add_internal(Link *link, Set **nexthops, NextHop *in, NextHop **ret) {
+ _cleanup_(nexthop_freep) NextHop *nexthop = NULL;
+ int r;
+
+ assert(link);
+ assert(nexthops);
+ assert(in);
+
+ r = nexthop_new(&nexthop);
+ if (r < 0)
+ return r;
+
+ nexthop->id = in->id;
+ nexthop->family = in->family;
+ nexthop->gw = in->gw;
+
+ r = set_ensure_put(nexthops, &nexthop_hash_ops, nexthop);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EEXIST;
+
+ nexthop->link = link;
+
+ if (ret)
+ *ret = nexthop;
+
+ nexthop = NULL;
+
+ return 0;
+}
+
+static int nexthop_add_foreign(Link *link, NextHop *in, NextHop **ret) {
+ return nexthop_add_internal(link, &link->nexthops_foreign, in, ret);
+}
+
+static int nexthop_add(Link *link, NextHop *in, NextHop **ret) {
+ NextHop *nexthop;
+ int r;
+
+ r = nexthop_get(link, in, &nexthop);
+ if (r == -ENOENT) {
+ /* NextHop does not exist, create a new one */
+ r = nexthop_add_internal(link, &link->nexthops, in, &nexthop);
+ if (r < 0)
+ return r;
+ } else if (r == 0) {
+ /* Take over a foreign nexthop */
+ r = set_ensure_put(&link->nexthops, &nexthop_hash_ops, nexthop);
+ if (r < 0)
+ return r;
+
+ set_remove(link->nexthops_foreign, nexthop);
+ } else if (r == 1) {
+ /* NextHop exists, do nothing */
+ ;
+ } else
+ return r;
+
+ if (ret)
+ *ret = nexthop;
+
+ return 0;
+}
+
+static int nexthop_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->nexthop_messages > 0);
+
+ link->nexthop_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_message_warning_errno(link, m, r, "Could not set nexthop");
+ link_enter_failed(link);
+ return 1;
+ }
+
+ if (link->nexthop_messages == 0) {
+ log_link_debug(link, "Nexthop set");
+ link->static_nexthops_configured = true;
+ link_check_ready(link);
+ }
+
+ return 1;
+}
+
+static int nexthop_configure(NextHop *nexthop, Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+ assert(link->ifindex > 0);
+ assert(IN_SET(nexthop->family, AF_INET, AF_INET6));
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *gw = NULL;
+
+ if (!in_addr_is_null(nexthop->family, &nexthop->gw))
+ (void) in_addr_to_string(nexthop->family, &nexthop->gw, &gw);
+
+ log_link_debug(link, "Configuring nexthop: gw: %s", strna(gw));
+ }
+
+ r = sd_rtnl_message_new_nexthop(link->manager->rtnl, &req,
+ RTM_NEWNEXTHOP, nexthop->family,
+ nexthop->protocol);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not create RTM_NEWNEXTHOP message: %m");
+
+ r = sd_netlink_message_append_u32(req, NHA_ID, nexthop->id);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append NHA_ID attribute: %m");
+
+ r = sd_netlink_message_append_u32(req, NHA_OIF, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append NHA_OIF attribute: %m");
+
+ if (in_addr_is_null(nexthop->family, &nexthop->gw) == 0) {
+ r = netlink_message_append_in_addr_union(req, NHA_GATEWAY, nexthop->family, &nexthop->gw);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append NHA_GATEWAY attribute: %m");
+
+ r = sd_rtnl_message_nexthop_set_family(req, nexthop->family);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set nexthop family: %m");
+ }
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, nexthop_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ r = nexthop_add(link, nexthop, &nexthop);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not add nexthop: %m");
+
+ return 1;
+}
+
+int link_set_nexthop(Link *link) {
+ NextHop *nh;
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ link->static_nexthops_configured = false;
+
+ HASHMAP_FOREACH(nh, link->network->nexthops_by_section) {
+ r = nexthop_configure(nh, link);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not set nexthop: %m");
+
+ link->nexthop_messages++;
+ }
+
+ if (link->nexthop_messages == 0) {
+ link->static_nexthops_configured = true;
+ link_check_ready(link);
+ } else {
+ log_link_debug(link, "Setting nexthop");
+ link_set_state(link, LINK_STATE_CONFIGURING);
+ }
+
+ return 1;
+}
+
+int manager_rtnl_process_nexthop(sd_netlink *rtnl, sd_netlink_message *message, Manager *m) {
+ _cleanup_(nexthop_freep) NextHop *tmp = NULL;
+ _cleanup_free_ char *gateway = NULL;
+ NextHop *nexthop = NULL;
+ uint32_t ifindex;
+ uint16_t type;
+ Link *link;
+ int r;
+
+ assert(rtnl);
+ assert(message);
+ assert(m);
+
+ if (sd_netlink_message_is_error(message)) {
+ r = sd_netlink_message_get_errno(message);
+ if (r < 0)
+ log_message_warning_errno(message, r, "rtnl: failed to receive rule message, ignoring");
+
+ return 0;
+ }
+
+ r = sd_netlink_message_get_type(message, &type);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: could not get message type, ignoring: %m");
+ return 0;
+ } else if (!IN_SET(type, RTM_NEWNEXTHOP, RTM_DELNEXTHOP)) {
+ log_warning("rtnl: received unexpected message type %u when processing nexthop, ignoring.", type);
+ return 0;
+ }
+
+ r = sd_netlink_message_read_u32(message, NHA_OIF, &ifindex);
+ if (r == -ENODATA) {
+ log_warning_errno(r, "rtnl: received nexthop message without NHA_OIF attribute, ignoring: %m");
+ return 0;
+ } else if (r < 0) {
+ log_warning_errno(r, "rtnl: could not get NHA_OIF attribute, ignoring: %m");
+ return 0;
+ } else if (ifindex <= 0) {
+ log_warning("rtnl: received nexthop message with invalid ifindex %"PRIu32", ignoring.", ifindex);
+ return 0;
+ }
+
+ r = link_get(m, ifindex, &link);
+ if (r < 0 || !link) {
+ if (!m->enumerating)
+ log_warning("rtnl: received nexthop message for link (%"PRIu32") we do not know about, ignoring", ifindex);
+ return 0;
+ }
+
+ r = nexthop_new(&tmp);
+ if (r < 0)
+ return log_oom();
+
+ r = sd_rtnl_message_get_family(message, &tmp->family);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: could not get nexthop family, ignoring: %m");
+ return 0;
+ } else if (!IN_SET(tmp->family, AF_INET, AF_INET6))
+ return log_link_debug(link, "rtnl: received nexthop message with invalid family %d, ignoring.", tmp->family);
+
+ r = netlink_message_read_in_addr_union(message, NHA_GATEWAY, tmp->family, &tmp->gw);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: could not get NHA_GATEWAY attribute, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_u32(message, NHA_ID, &tmp->id);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: could not get NHA_ID attribute, ignoring: %m");
+ return 0;
+ }
+
+ (void) nexthop_get(link, tmp, &nexthop);
+
+ if (DEBUG_LOGGING)
+ (void) in_addr_to_string(tmp->family, &tmp->gw, &gateway);
+
+ switch (type) {
+ case RTM_NEWNEXTHOP:
+ if (nexthop)
+ log_link_debug(link, "Received remembered nexthop: %s, id: %d", strna(gateway), tmp->id);
+ else {
+ log_link_debug(link, "Remembering foreign nexthop: %s, id: %d", strna(gateway), tmp->id);
+ r = nexthop_add_foreign(link, tmp, &nexthop);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Could not remember foreign nexthop, ignoring: %m");
+ return 0;
+ }
+ }
+ break;
+ case RTM_DELNEXTHOP:
+ if (nexthop) {
+ log_link_debug(link, "Forgetting nexthop: %s, id: %d", strna(gateway), tmp->id);
+ nexthop_free(nexthop);
+ } else
+ log_link_debug(link, "Kernel removed a nexthop we don't remember: %s, id: %d, ignoring.",
+ strna(gateway), tmp->id);
+ break;
+
+ default:
+ assert_not_reached("Received invalid RTNL message type");
+ }
+
+ return 1;
+}
+
+static int nexthop_section_verify(NextHop *nh) {
+ if (section_is_invalid(nh->section))
+ return -EINVAL;
+
+ if (in_addr_is_null(nh->family, &nh->gw) < 0)
+ return -EINVAL;
+
+ return 0;
+}
+
+void network_drop_invalid_nexthops(Network *network) {
+ NextHop *nh;
+
+ assert(network);
+
+ HASHMAP_FOREACH(nh, network->nexthops_by_section)
+ if (nexthop_section_verify(nh) < 0)
+ nexthop_free(nh);
+}
+
+int config_parse_nexthop_id(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(nexthop_free_or_set_invalidp) NextHop *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = nexthop_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return log_oom();
+
+ r = safe_atou32(rvalue, &n->id);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Could not parse nexthop id \"%s\", ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_nexthop_gateway(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(nexthop_free_or_set_invalidp) NextHop *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = nexthop_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return log_oom();
+
+ r = in_addr_from_string_auto(rvalue, &n->family, &n->gw);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid %s='%s', ignoring assignment: %m", lvalue, rvalue);
+ return 0;
+ }
+
+ TAKE_PTR(n);
+ return 0;
+}
diff --git a/src/network/networkd-nexthop.h b/src/network/networkd-nexthop.h
new file mode 100644
index 0000000..75714e7
--- /dev/null
+++ b/src/network/networkd-nexthop.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc.
+ */
+
+#pragma once
+
+#include <inttypes.h>
+
+#include "sd-netlink.h"
+
+#include "conf-parser.h"
+#include "in-addr-util.h"
+#include "networkd-util.h"
+
+typedef struct Link Link;
+typedef struct Manager Manager;
+typedef struct Network Network;
+
+typedef struct NextHop {
+ Network *network;
+ NetworkConfigSection *section;
+
+ Link *link;
+
+ unsigned char protocol;
+
+ uint32_t id;
+ int family;
+ union in_addr_union gw;
+} NextHop;
+
+NextHop *nexthop_free(NextHop *nexthop);
+
+void network_drop_invalid_nexthops(Network *network);
+
+int link_set_nexthop(Link *link);
+
+int manager_rtnl_process_nexthop(sd_netlink *rtnl, sd_netlink_message *message, Manager *m);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_nexthop_id);
+CONFIG_PARSER_PROTOTYPE(config_parse_nexthop_gateway);
diff --git a/src/network/networkd-radv.c b/src/network/networkd-radv.c
new file mode 100644
index 0000000..a8e1b2b
--- /dev/null
+++ b/src/network/networkd-radv.c
@@ -0,0 +1,999 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2017 Intel Corporation. All rights reserved.
+***/
+
+#include <netinet/icmp6.h>
+#include <arpa/inet.h>
+
+#include "dns-domain.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "networkd-network.h"
+#include "networkd-radv.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "string-table.h"
+#include "strv.h"
+
+Prefix *prefix_free(Prefix *prefix) {
+ if (!prefix)
+ return NULL;
+
+ if (prefix->network) {
+ assert(prefix->section);
+ hashmap_remove(prefix->network->prefixes_by_section, prefix->section);
+ }
+
+ network_config_section_free(prefix->section);
+ sd_radv_prefix_unref(prefix->radv_prefix);
+
+ return mfree(prefix);
+}
+
+DEFINE_NETWORK_SECTION_FUNCTIONS(Prefix, prefix_free);
+
+static int prefix_new(Prefix **ret) {
+ _cleanup_(prefix_freep) Prefix *prefix = NULL;
+
+ prefix = new0(Prefix, 1);
+ if (!prefix)
+ return -ENOMEM;
+
+ if (sd_radv_prefix_new(&prefix->radv_prefix) < 0)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(prefix);
+
+ return 0;
+}
+
+static int prefix_new_static(Network *network, const char *filename, unsigned section_line, Prefix **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(prefix_freep) Prefix *prefix = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(filename);
+ assert(section_line > 0);
+
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ prefix = hashmap_get(network->prefixes_by_section, n);
+ if (prefix) {
+ *ret = TAKE_PTR(prefix);
+ return 0;
+ }
+
+ r = prefix_new(&prefix);
+ if (r < 0)
+ return r;
+
+ prefix->network = network;
+ prefix->section = TAKE_PTR(n);
+
+ r = hashmap_ensure_allocated(&network->prefixes_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(network->prefixes_by_section, prefix->section, prefix);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(prefix);
+
+ return 0;
+}
+
+RoutePrefix *route_prefix_free(RoutePrefix *prefix) {
+ if (!prefix)
+ return NULL;
+
+ if (prefix->network) {
+ assert(prefix->section);
+ hashmap_remove(prefix->network->route_prefixes_by_section, prefix->section);
+ }
+
+ network_config_section_free(prefix->section);
+ sd_radv_route_prefix_unref(prefix->radv_route_prefix);
+
+ return mfree(prefix);
+}
+
+DEFINE_NETWORK_SECTION_FUNCTIONS(RoutePrefix, route_prefix_free);
+
+static int route_prefix_new(RoutePrefix **ret) {
+ _cleanup_(route_prefix_freep) RoutePrefix *prefix = NULL;
+
+ prefix = new0(RoutePrefix, 1);
+ if (!prefix)
+ return -ENOMEM;
+
+ if (sd_radv_route_prefix_new(&prefix->radv_route_prefix) < 0)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(prefix);
+
+ return 0;
+}
+
+static int route_prefix_new_static(Network *network, const char *filename, unsigned section_line, RoutePrefix **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(route_prefix_freep) RoutePrefix *prefix = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(filename);
+ assert(section_line > 0);
+
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ prefix = hashmap_get(network->route_prefixes_by_section, n);
+ if (prefix) {
+ *ret = TAKE_PTR(prefix);
+ return 0;
+ }
+
+ r = route_prefix_new(&prefix);
+ if (r < 0)
+ return r;
+
+ prefix->network = network;
+ prefix->section = TAKE_PTR(n);
+
+ r = hashmap_ensure_allocated(&network->route_prefixes_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(network->route_prefixes_by_section, prefix->section, prefix);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(prefix);
+
+ return 0;
+}
+
+void network_drop_invalid_prefixes(Network *network) {
+ Prefix *prefix;
+
+ assert(network);
+
+ HASHMAP_FOREACH(prefix, network->prefixes_by_section)
+ if (section_is_invalid(prefix->section))
+ prefix_free(prefix);
+}
+
+void network_drop_invalid_route_prefixes(Network *network) {
+ RoutePrefix *prefix;
+
+ assert(network);
+
+ HASHMAP_FOREACH(prefix, network->route_prefixes_by_section)
+ if (section_is_invalid(prefix->section))
+ route_prefix_free(prefix);
+}
+
+void network_adjust_radv(Network *network) {
+ assert(network);
+
+ /* After this function is called, network->router_prefix_delegation can be treated as a boolean. */
+
+ if (network->dhcp6_pd < 0)
+ /* For backward compatibility. */
+ network->dhcp6_pd = FLAGS_SET(network->router_prefix_delegation, RADV_PREFIX_DELEGATION_DHCP6);
+
+ if (!FLAGS_SET(network->link_local, ADDRESS_FAMILY_IPV6)) {
+ if (network->router_prefix_delegation != RADV_PREFIX_DELEGATION_NONE)
+ log_warning("%s: IPv6PrefixDelegation= is enabled but IPv6 link local addressing is disabled. "
+ "Disabling IPv6PrefixDelegation=.", network->filename);
+
+ network->router_prefix_delegation = RADV_PREFIX_DELEGATION_NONE;
+ }
+
+ if (network->router_prefix_delegation == RADV_PREFIX_DELEGATION_NONE) {
+ network->n_router_dns = 0;
+ network->router_dns = mfree(network->router_dns);
+ network->router_search_domains = ordered_set_free(network->router_search_domains);
+ }
+
+ if (!FLAGS_SET(network->router_prefix_delegation, RADV_PREFIX_DELEGATION_STATIC)) {
+ network->prefixes_by_section = hashmap_free_with_destructor(network->prefixes_by_section, prefix_free);
+ network->route_prefixes_by_section = hashmap_free_with_destructor(network->route_prefixes_by_section, route_prefix_free);
+ }
+}
+
+int config_parse_prefix(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(prefix_free_or_set_invalidp) Prefix *p = NULL;
+ uint8_t prefixlen = 64;
+ union in_addr_union in6addr;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = prefix_new_static(network, filename, section_line, &p);
+ if (r < 0)
+ return log_oom();
+
+ r = in_addr_prefix_from_string(rvalue, AF_INET6, &in6addr, &prefixlen);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Prefix is invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ r = sd_radv_prefix_set_prefix(p->radv_prefix, &in6addr.in6, prefixlen);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to set radv prefix, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ p = NULL;
+
+ return 0;
+}
+
+int config_parse_prefix_flags(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(prefix_free_or_set_invalidp) Prefix *p = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = prefix_new_static(network, filename, section_line, &p);
+ if (r < 0)
+ return log_oom();
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse %s=, ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "OnLink"))
+ r = sd_radv_prefix_set_onlink(p->radv_prefix, r);
+ else if (streq(lvalue, "AddressAutoconfiguration"))
+ r = sd_radv_prefix_set_address_autoconfiguration(p->radv_prefix, r);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to set %s=, ignoring assignment: %m", lvalue);
+ return 0;
+ }
+
+ p = NULL;
+
+ return 0;
+}
+
+int config_parse_prefix_lifetime(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(prefix_free_or_set_invalidp) Prefix *p = NULL;
+ usec_t usec;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = prefix_new_static(network, filename, section_line, &p);
+ if (r < 0)
+ return log_oom();
+
+ r = parse_sec(rvalue, &usec);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Lifetime is invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ /* a value of 0xffffffff represents infinity */
+ if (streq(lvalue, "PreferredLifetimeSec"))
+ r = sd_radv_prefix_set_preferred_lifetime(p->radv_prefix,
+ DIV_ROUND_UP(usec, USEC_PER_SEC));
+ else if (streq(lvalue, "ValidLifetimeSec"))
+ r = sd_radv_prefix_set_valid_lifetime(p->radv_prefix,
+ DIV_ROUND_UP(usec, USEC_PER_SEC));
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to set %s=, ignoring assignment: %m", lvalue);
+ return 0;
+ }
+
+ p = NULL;
+
+ return 0;
+}
+
+int config_parse_prefix_assign(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(prefix_free_or_set_invalidp) Prefix *p = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = prefix_new_static(network, filename, section_line, &p);
+ if (r < 0)
+ return log_oom();
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse %s=, ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ p->assign = r;
+ p = NULL;
+
+ return 0;
+}
+
+int config_parse_route_prefix(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_prefix_free_or_set_invalidp) RoutePrefix *p = NULL;
+ uint8_t prefixlen = 64;
+ union in_addr_union in6addr;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_prefix_new_static(network, filename, section_line, &p);
+ if (r < 0)
+ return log_oom();
+
+ r = in_addr_prefix_from_string(rvalue, AF_INET6, &in6addr, &prefixlen);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Route prefix is invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ r = sd_radv_prefix_set_route_prefix(p->radv_route_prefix, &in6addr.in6, prefixlen);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to set route prefix, ignoring assignment: %m");
+ return 0;
+ }
+
+ p = NULL;
+
+ return 0;
+}
+
+int config_parse_route_prefix_lifetime(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_prefix_free_or_set_invalidp) RoutePrefix *p = NULL;
+ usec_t usec;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_prefix_new_static(network, filename, section_line, &p);
+ if (r < 0)
+ return log_oom();
+
+ r = parse_sec(rvalue, &usec);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Route lifetime is invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ /* a value of 0xffffffff represents infinity */
+ r = sd_radv_route_prefix_set_lifetime(p->radv_route_prefix, DIV_ROUND_UP(usec, USEC_PER_SEC));
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to set route lifetime, ignoring assignment: %m");
+ return 0;
+ }
+
+ p = NULL;
+
+ return 0;
+}
+
+static int network_get_ipv6_dns(Network *network, struct in6_addr **ret_addresses, size_t *ret_size) {
+ _cleanup_free_ struct in6_addr *addresses = NULL;
+ size_t n_addresses = 0, n_allocated = 0;
+
+ assert(network);
+ assert(ret_addresses);
+ assert(ret_size);
+
+ for (size_t i = 0; i < network->n_dns; i++) {
+ union in_addr_union *addr;
+
+ if (network->dns[i]->family != AF_INET6)
+ continue;
+
+ addr = &network->dns[i]->address;
+
+ if (in_addr_is_null(AF_INET6, addr) ||
+ in_addr_is_link_local(AF_INET6, addr) ||
+ in_addr_is_localhost(AF_INET6, addr))
+ continue;
+
+ if (!GREEDY_REALLOC(addresses, n_allocated, n_addresses + 1))
+ return -ENOMEM;
+
+ addresses[n_addresses++] = addr->in6;
+ }
+
+ *ret_addresses = TAKE_PTR(addresses);
+ *ret_size = n_addresses;
+
+ return n_addresses;
+}
+
+static int radv_set_dns(Link *link, Link *uplink) {
+ _cleanup_free_ struct in6_addr *dns = NULL;
+ usec_t lifetime_usec;
+ size_t n_dns;
+ int r;
+
+ if (!link->network->router_emit_dns)
+ return 0;
+
+ if (link->network->router_dns) {
+ struct in6_addr *p;
+
+ dns = new(struct in6_addr, link->network->n_router_dns);
+ if (!dns)
+ return -ENOMEM;
+
+ p = dns;
+ for (size_t i = 0; i < link->network->n_router_dns; i++)
+ if (IN6_IS_ADDR_UNSPECIFIED(&link->network->router_dns[i])) {
+ if (!IN6_IS_ADDR_UNSPECIFIED(&link->ipv6ll_address))
+ *(p++) = link->ipv6ll_address;
+ } else
+ *(p++) = link->network->router_dns[i];
+
+ n_dns = p - dns;
+ lifetime_usec = link->network->router_dns_lifetime_usec;
+
+ goto set_dns;
+ }
+
+ lifetime_usec = SD_RADV_DEFAULT_DNS_LIFETIME_USEC;
+
+ r = network_get_ipv6_dns(link->network, &dns, &n_dns);
+ if (r > 0)
+ goto set_dns;
+
+ if (uplink) {
+ if (!uplink->network) {
+ log_link_debug(uplink, "Cannot fetch DNS servers as uplink interface is not managed by us");
+ return 0;
+ }
+
+ r = network_get_ipv6_dns(uplink->network, &dns, &n_dns);
+ if (r > 0)
+ goto set_dns;
+ }
+
+ return 0;
+
+ set_dns:
+ return sd_radv_set_rdnss(link->radv,
+ DIV_ROUND_UP(lifetime_usec, USEC_PER_SEC),
+ dns, n_dns);
+}
+
+static int radv_set_domains(Link *link, Link *uplink) {
+ OrderedSet *search_domains;
+ usec_t lifetime_usec;
+ _cleanup_free_ char **s = NULL; /* just free() because the strings are owned by the set */
+
+ if (!link->network->router_emit_domains)
+ return 0;
+
+ search_domains = link->network->router_search_domains;
+ lifetime_usec = link->network->router_dns_lifetime_usec;
+
+ if (search_domains)
+ goto set_domains;
+
+ lifetime_usec = SD_RADV_DEFAULT_DNS_LIFETIME_USEC;
+
+ search_domains = link->network->search_domains;
+ if (search_domains)
+ goto set_domains;
+
+ if (uplink) {
+ if (!uplink->network) {
+ log_link_debug(uplink, "Cannot fetch DNS search domains as uplink interface is not managed by us");
+ return 0;
+ }
+
+ search_domains = uplink->network->search_domains;
+ if (search_domains)
+ goto set_domains;
+ }
+
+ return 0;
+
+ set_domains:
+ s = ordered_set_get_strv(search_domains);
+ if (!s)
+ return log_oom();
+
+ return sd_radv_set_dnssl(link->radv,
+ DIV_ROUND_UP(lifetime_usec, USEC_PER_SEC),
+ s);
+
+}
+
+int radv_emit_dns(Link *link) {
+ Link *uplink;
+ int r;
+
+ uplink = manager_find_uplink(link->manager, link);
+
+ r = radv_set_dns(link, uplink);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Could not set RA DNS: %m");
+
+ r = radv_set_domains(link, uplink);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Could not set RA Domains: %m");
+
+ return 0;
+}
+
+static bool link_radv_enabled(Link *link) {
+ assert(link);
+
+ if (!link_ipv6ll_enabled(link))
+ return false;
+
+ return link->network->router_prefix_delegation;
+}
+
+int radv_configure(Link *link) {
+ uint16_t router_lifetime;
+ RoutePrefix *q;
+ Prefix *p;
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ if (!link_radv_enabled(link))
+ return 0;
+
+ r = sd_radv_new(&link->radv);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_attach_event(link->radv, link->manager->event, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_set_mac(link->radv, &link->hw_addr.addr.ether);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_set_ifindex(link->radv, link->ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_set_managed_information(link->radv, link->network->router_managed);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_set_other_information(link->radv, link->network->router_other_information);
+ if (r < 0)
+ return r;
+
+ /* a value of UINT16_MAX represents infinity, 0x0 means this host is not a router */
+ if (link->network->router_lifetime_usec == USEC_INFINITY)
+ router_lifetime = UINT16_MAX;
+ else if (link->network->router_lifetime_usec > (UINT16_MAX - 1) * USEC_PER_SEC)
+ router_lifetime = UINT16_MAX - 1;
+ else
+ router_lifetime = DIV_ROUND_UP(link->network->router_lifetime_usec, USEC_PER_SEC);
+
+ r = sd_radv_set_router_lifetime(link->radv, router_lifetime);
+ if (r < 0)
+ return r;
+
+ if (router_lifetime > 0) {
+ r = sd_radv_set_preference(link->radv, link->network->router_preference);
+ if (r < 0)
+ return r;
+ }
+
+ HASHMAP_FOREACH(p, link->network->prefixes_by_section) {
+ r = sd_radv_add_prefix(link->radv, p->radv_prefix, false);
+ if (r == -EEXIST)
+ continue;
+ if (r == -ENOEXEC) {
+ log_link_warning_errno(link, r, "[IPv6Prefix] section configured without Prefix= setting, ignoring section.");
+ continue;
+ }
+ if (r < 0)
+ return r;
+ }
+
+ HASHMAP_FOREACH(q, link->network->route_prefixes_by_section) {
+ r = sd_radv_add_route_prefix(link->radv, q->radv_route_prefix, false);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int radv_update_mac(Link *link) {
+ bool restart;
+ int r;
+
+ assert(link);
+
+ if (!link->radv)
+ return 0;
+
+ restart = sd_radv_is_running(link->radv);
+
+ r = sd_radv_stop(link->radv);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_set_mac(link->radv, &link->hw_addr.addr.ether);
+ if (r < 0)
+ return r;
+
+ if (restart) {
+ r = sd_radv_start(link->radv);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int radv_add_prefix(
+ Link *link,
+ const struct in6_addr *prefix,
+ uint8_t prefix_len,
+ uint32_t lifetime_preferred,
+ uint32_t lifetime_valid) {
+
+ _cleanup_(sd_radv_prefix_unrefp) sd_radv_prefix *p = NULL;
+ int r;
+
+ assert(link);
+
+ if (!link->radv)
+ return 0;
+
+ r = sd_radv_prefix_new(&p);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_prefix_set_prefix(p, prefix, prefix_len);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_prefix_set_preferred_lifetime(p, lifetime_preferred);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_prefix_set_valid_lifetime(p, lifetime_valid);
+ if (r < 0)
+ return r;
+
+ r = sd_radv_add_prefix(link->radv, p, true);
+ if (r < 0 && r != -EEXIST)
+ return r;
+
+ return 0;
+}
+
+int config_parse_radv_dns(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *n = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ n->n_router_dns = 0;
+ n->router_dns = mfree(n->router_dns);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *w = NULL;
+ union in_addr_union a;
+
+ r = extract_first_word(&p, &w, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to extract word, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ if (streq(w, "_link_local"))
+ a = IN_ADDR_NULL;
+ else {
+ r = in_addr_from_string(AF_INET6, w, &a);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse DNS server address, ignoring: %s", w);
+ continue;
+ }
+
+ if (in_addr_is_null(AF_INET6, &a)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "DNS server address is null, ignoring: %s", w);
+ continue;
+ }
+ }
+
+ struct in6_addr *m;
+ m = reallocarray(n->router_dns, n->n_router_dns + 1, sizeof(struct in6_addr));
+ if (!m)
+ return log_oom();
+
+ m[n->n_router_dns++] = a.in6;
+ n->router_dns = m;
+ }
+}
+
+int config_parse_radv_search_domains(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *n = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ n->router_search_domains = ordered_set_free(n->router_search_domains);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *w = NULL, *idna = NULL;
+
+ r = extract_first_word(&p, &w, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to extract word, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ r = dns_name_apply_idna(w, &idna);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to apply IDNA to domain name '%s', ignoring: %m", w);
+ continue;
+ } else if (r == 0)
+ /* transfer ownership to simplify subsequent operations */
+ idna = TAKE_PTR(w);
+
+ r = ordered_set_ensure_allocated(&n->router_search_domains, &string_hash_ops_free);
+ if (r < 0)
+ return log_oom();
+
+ r = ordered_set_consume(n->router_search_domains, TAKE_PTR(idna));
+ if (r < 0)
+ return log_oom();
+ }
+}
+
+static const char * const radv_prefix_delegation_table[_RADV_PREFIX_DELEGATION_MAX] = {
+ [RADV_PREFIX_DELEGATION_NONE] = "no",
+ [RADV_PREFIX_DELEGATION_STATIC] = "static",
+ [RADV_PREFIX_DELEGATION_DHCP6] = "dhcpv6",
+ [RADV_PREFIX_DELEGATION_BOTH] = "yes",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(
+ radv_prefix_delegation,
+ RADVPrefixDelegation,
+ RADV_PREFIX_DELEGATION_BOTH);
+
+int config_parse_router_prefix_delegation(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ RADVPrefixDelegation val, *ra = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(lvalue, "IPv6SendRA")) {
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid %s= setting, ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ /* When IPv6SendRA= is enabled, only static prefixes are sent by default, and users
+ * need to explicitly enable DHCPv6PrefixDelegation=. */
+ *ra = r ? RADV_PREFIX_DELEGATION_STATIC : RADV_PREFIX_DELEGATION_NONE;
+ return 0;
+ }
+
+ /* For backward compatibility */
+ val = radv_prefix_delegation_from_string(rvalue);
+ if (val < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid %s= setting, ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ *ra = val;
+ return 0;
+}
+
+int config_parse_router_preference(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(rvalue, "high"))
+ network->router_preference = SD_NDISC_PREFERENCE_HIGH;
+ else if (STR_IN_SET(rvalue, "medium", "normal", "default"))
+ network->router_preference = SD_NDISC_PREFERENCE_MEDIUM;
+ else if (streq(rvalue, "low"))
+ network->router_preference = SD_NDISC_PREFERENCE_LOW;
+ else
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid router preference, ignoring assignment: %s", rvalue);
+
+ return 0;
+}
diff --git a/src/network/networkd-radv.h b/src/network/networkd-radv.h
new file mode 100644
index 0000000..4dfbefe
--- /dev/null
+++ b/src/network/networkd-radv.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2017 Intel Corporation. All rights reserved.
+***/
+
+#include <inttypes.h>
+#include <stdbool.h>
+
+#include "sd-radv.h"
+
+#include "in-addr-util.h"
+#include "conf-parser.h"
+#include "networkd-util.h"
+
+typedef struct Network Network;
+typedef struct Link Link;
+
+typedef enum RADVPrefixDelegation {
+ RADV_PREFIX_DELEGATION_NONE = 0,
+ RADV_PREFIX_DELEGATION_STATIC = 1 << 0,
+ RADV_PREFIX_DELEGATION_DHCP6 = 1 << 1,
+ RADV_PREFIX_DELEGATION_BOTH = RADV_PREFIX_DELEGATION_STATIC | RADV_PREFIX_DELEGATION_DHCP6,
+ _RADV_PREFIX_DELEGATION_MAX,
+ _RADV_PREFIX_DELEGATION_INVALID = -1,
+} RADVPrefixDelegation;
+
+typedef struct Prefix {
+ Network *network;
+ NetworkConfigSection *section;
+
+ sd_radv_prefix *radv_prefix;
+
+ bool assign;
+} Prefix;
+
+typedef struct RoutePrefix {
+ Network *network;
+ NetworkConfigSection *section;
+
+ sd_radv_route_prefix *radv_route_prefix;
+} RoutePrefix;
+
+Prefix *prefix_free(Prefix *prefix);
+RoutePrefix *route_prefix_free(RoutePrefix *prefix);
+
+void network_drop_invalid_prefixes(Network *network);
+void network_drop_invalid_route_prefixes(Network *network);
+void network_adjust_radv(Network *network);
+
+int radv_emit_dns(Link *link);
+int radv_configure(Link *link);
+int radv_update_mac(Link *link);
+int radv_add_prefix(Link *link, const struct in6_addr *prefix, uint8_t prefix_len,
+ uint32_t lifetime_preferred, uint32_t lifetime_valid);
+
+const char* radv_prefix_delegation_to_string(RADVPrefixDelegation i) _const_;
+RADVPrefixDelegation radv_prefix_delegation_from_string(const char *s) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_router_prefix_delegation);
+CONFIG_PARSER_PROTOTYPE(config_parse_router_preference);
+CONFIG_PARSER_PROTOTYPE(config_parse_prefix);
+CONFIG_PARSER_PROTOTYPE(config_parse_prefix_flags);
+CONFIG_PARSER_PROTOTYPE(config_parse_prefix_lifetime);
+CONFIG_PARSER_PROTOTYPE(config_parse_prefix_assign);
+CONFIG_PARSER_PROTOTYPE(config_parse_radv_dns);
+CONFIG_PARSER_PROTOTYPE(config_parse_radv_search_domains);
+CONFIG_PARSER_PROTOTYPE(config_parse_route_prefix);
+CONFIG_PARSER_PROTOTYPE(config_parse_route_prefix_lifetime);
diff --git a/src/network/networkd-route.c b/src/network/networkd-route.c
new file mode 100644
index 0000000..0ed8958
--- /dev/null
+++ b/src/network/networkd-route.c
@@ -0,0 +1,2537 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/icmpv6.h>
+#include <linux/ipv6_route.h>
+
+#include "alloc-util.h"
+#include "netlink-util.h"
+#include "networkd-ipv4ll.h"
+#include "networkd-manager.h"
+#include "networkd-network.h"
+#include "networkd-nexthop.h"
+#include "networkd-route.h"
+#include "networkd-routing-policy-rule.h"
+#include "parse-util.h"
+#include "socket-netlink.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "sysctl-util.h"
+#include "vrf.h"
+
+#define ROUTES_DEFAULT_MAX_PER_FAMILY 4096U
+
+static uint32_t link_get_vrf_table(const Link *link) {
+ return link->network->vrf ? VRF(link->network->vrf)->table : RT_TABLE_MAIN;
+}
+
+uint32_t link_get_dhcp_route_table(const Link *link) {
+ /* When the interface is part of an VRF use the VRFs routing table, unless
+ * another table is explicitly specified. */
+ if (link->network->dhcp_route_table_set)
+ return link->network->dhcp_route_table;
+ return link_get_vrf_table(link);
+}
+
+uint32_t link_get_ipv6_accept_ra_route_table(const Link *link) {
+ if (link->network->ipv6_accept_ra_route_table_set)
+ return link->network->ipv6_accept_ra_route_table;
+ return link_get_vrf_table(link);
+}
+
+static const char * const route_type_table[__RTN_MAX] = {
+ [RTN_UNICAST] = "unicast",
+ [RTN_LOCAL] = "local",
+ [RTN_BROADCAST] = "broadcast",
+ [RTN_ANYCAST] = "anycast",
+ [RTN_MULTICAST] = "multicast",
+ [RTN_BLACKHOLE] = "blackhole",
+ [RTN_UNREACHABLE] = "unreachable",
+ [RTN_PROHIBIT] = "prohibit",
+ [RTN_THROW] = "throw",
+ [RTN_NAT] = "nat",
+ [RTN_XRESOLVE] = "xresolve",
+};
+
+assert_cc(__RTN_MAX <= UCHAR_MAX);
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(route_type, int);
+
+static const char * const route_scope_table[] = {
+ [RT_SCOPE_UNIVERSE] = "global",
+ [RT_SCOPE_SITE] = "site",
+ [RT_SCOPE_LINK] = "link",
+ [RT_SCOPE_HOST] = "host",
+ [RT_SCOPE_NOWHERE] = "nowhere",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(route_scope, int);
+
+#define ROUTE_SCOPE_STR_MAX CONST_MAX(DECIMAL_STR_MAX(int), STRLEN("nowhere") + 1)
+static const char *format_route_scope(int scope, char *buf, size_t size) {
+ const char *s;
+ char *p = buf;
+
+ s = route_scope_to_string(scope);
+ if (s)
+ strpcpy(&p, size, s);
+ else
+ strpcpyf(&p, size, "%d", scope);
+
+ return buf;
+}
+
+static const char * const route_table_table[] = {
+ [RT_TABLE_DEFAULT] = "default",
+ [RT_TABLE_MAIN] = "main",
+ [RT_TABLE_LOCAL] = "local",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(route_table, int);
+
+#define ROUTE_TABLE_STR_MAX CONST_MAX(DECIMAL_STR_MAX(int), STRLEN("default") + 1)
+static const char *format_route_table(int table, char *buf, size_t size) {
+ const char *s;
+ char *p = buf;
+
+ s = route_table_to_string(table);
+ if (s)
+ strpcpy(&p, size, s);
+ else
+ strpcpyf(&p, size, "%d", table);
+
+ return buf;
+}
+
+static const char * const route_protocol_table[] = {
+ [RTPROT_KERNEL] = "kernel",
+ [RTPROT_BOOT] = "boot",
+ [RTPROT_STATIC] = "static",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(route_protocol, int);
+
+static const char * const route_protocol_full_table[] = {
+ [RTPROT_REDIRECT] = "redirect",
+ [RTPROT_KERNEL] = "kernel",
+ [RTPROT_BOOT] = "boot",
+ [RTPROT_STATIC] = "static",
+ [RTPROT_GATED] = "gated",
+ [RTPROT_RA] = "ra",
+ [RTPROT_MRT] = "mrt",
+ [RTPROT_ZEBRA] = "zebra",
+ [RTPROT_BIRD] = "bird",
+ [RTPROT_DNROUTED] = "dnrouted",
+ [RTPROT_XORP] = "xorp",
+ [RTPROT_NTK] = "ntk",
+ [RTPROT_DHCP] = "dhcp",
+ [RTPROT_MROUTED] = "mrouted",
+ [RTPROT_BABEL] = "babel",
+ [RTPROT_BGP] = "bgp",
+ [RTPROT_ISIS] = "isis",
+ [RTPROT_OSPF] = "ospf",
+ [RTPROT_RIP] = "rip",
+ [RTPROT_EIGRP] = "eigrp",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(route_protocol_full, int);
+
+#define ROUTE_PROTOCOL_STR_MAX CONST_MAX(DECIMAL_STR_MAX(int), STRLEN("redirect") + 1)
+static const char *format_route_protocol(int protocol, char *buf, size_t size) {
+ const char *s;
+ char *p = buf;
+
+ s = route_protocol_full_to_string(protocol);
+ if (s)
+ strpcpy(&p, size, s);
+ else
+ strpcpyf(&p, size, "%d", protocol);
+
+ return buf;
+}
+
+static unsigned routes_max(void) {
+ static thread_local unsigned cached = 0;
+
+ _cleanup_free_ char *s4 = NULL, *s6 = NULL;
+ unsigned val4 = ROUTES_DEFAULT_MAX_PER_FAMILY, val6 = ROUTES_DEFAULT_MAX_PER_FAMILY;
+
+ if (cached > 0)
+ return cached;
+
+ if (sysctl_read("net/ipv4/route/max_size", &s4) >= 0) {
+ truncate_nl(s4);
+ if (safe_atou(s4, &val4) >= 0 &&
+ val4 == 2147483647U)
+ /* This is the default "no limit" value in the kernel */
+ val4 = ROUTES_DEFAULT_MAX_PER_FAMILY;
+ }
+
+ if (sysctl_read("net/ipv6/route/max_size", &s6) >= 0) {
+ truncate_nl(s6);
+ (void) safe_atou(s6, &val6);
+ }
+
+ cached = MAX(ROUTES_DEFAULT_MAX_PER_FAMILY, val4) +
+ MAX(ROUTES_DEFAULT_MAX_PER_FAMILY, val6);
+ return cached;
+}
+
+int route_new(Route **ret) {
+ _cleanup_(route_freep) Route *route = NULL;
+
+ route = new(Route, 1);
+ if (!route)
+ return -ENOMEM;
+
+ *route = (Route) {
+ .family = AF_UNSPEC,
+ .scope = RT_SCOPE_UNIVERSE,
+ .protocol = RTPROT_UNSPEC,
+ .type = RTN_UNICAST,
+ .table = RT_TABLE_MAIN,
+ .lifetime = USEC_INFINITY,
+ .quickack = -1,
+ .fast_open_no_cookie = -1,
+ .gateway_onlink = -1,
+ .ttl_propagate = -1,
+ };
+
+ *ret = TAKE_PTR(route);
+
+ return 0;
+}
+
+static int route_new_static(Network *network, const char *filename, unsigned section_line, Route **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(route_freep) Route *route = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(filename);
+ assert(section_line > 0);
+
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ route = hashmap_get(network->routes_by_section, n);
+ if (route) {
+ *ret = TAKE_PTR(route);
+ return 0;
+ }
+
+ if (hashmap_size(network->routes_by_section) >= routes_max())
+ return -E2BIG;
+
+ r = route_new(&route);
+ if (r < 0)
+ return r;
+
+ route->protocol = RTPROT_STATIC;
+ route->network = network;
+ route->section = TAKE_PTR(n);
+
+ r = hashmap_ensure_allocated(&network->routes_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(network->routes_by_section, route->section, route);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(route);
+ return 0;
+}
+
+Route *route_free(Route *route) {
+ if (!route)
+ return NULL;
+
+ if (route->network) {
+ assert(route->section);
+ hashmap_remove(route->network->routes_by_section, route->section);
+ }
+
+ network_config_section_free(route->section);
+
+ if (route->link) {
+ NDiscRoute *n;
+
+ set_remove(route->link->routes, route);
+ set_remove(route->link->routes_foreign, route);
+ set_remove(route->link->dhcp_routes, route);
+ set_remove(route->link->dhcp_routes_old, route);
+ set_remove(route->link->dhcp6_routes, route);
+ set_remove(route->link->dhcp6_routes_old, route);
+ set_remove(route->link->dhcp6_pd_routes, route);
+ set_remove(route->link->dhcp6_pd_routes_old, route);
+ SET_FOREACH(n, route->link->ndisc_routes)
+ if (n->route == route)
+ free(set_remove(route->link->ndisc_routes, n));
+ }
+
+ if (route->manager) {
+ set_remove(route->manager->routes, route);
+ set_remove(route->manager->routes_foreign, route);
+ }
+
+ ordered_set_free_free(route->multipath_routes);
+
+ sd_event_source_unref(route->expire);
+
+ return mfree(route);
+}
+
+void route_hash_func(const Route *route, struct siphash *state) {
+ assert(route);
+
+ siphash24_compress(&route->family, sizeof(route->family), state);
+
+ switch (route->family) {
+ case AF_INET:
+ case AF_INET6:
+ siphash24_compress(&route->dst_prefixlen, sizeof(route->dst_prefixlen), state);
+ siphash24_compress(&route->dst, FAMILY_ADDRESS_SIZE(route->family), state);
+
+ siphash24_compress(&route->src_prefixlen, sizeof(route->src_prefixlen), state);
+ siphash24_compress(&route->src, FAMILY_ADDRESS_SIZE(route->family), state);
+
+ siphash24_compress(&route->gw_family, sizeof(route->gw_family), state);
+ if (IN_SET(route->gw_family, AF_INET, AF_INET6)) {
+ siphash24_compress(&route->gw, FAMILY_ADDRESS_SIZE(route->gw_family), state);
+ siphash24_compress(&route->gw_weight, sizeof(route->gw_weight), state);
+ }
+
+ siphash24_compress(&route->prefsrc, FAMILY_ADDRESS_SIZE(route->family), state);
+
+ siphash24_compress(&route->tos, sizeof(route->tos), state);
+ siphash24_compress(&route->priority, sizeof(route->priority), state);
+ siphash24_compress(&route->table, sizeof(route->table), state);
+ siphash24_compress(&route->protocol, sizeof(route->protocol), state);
+ siphash24_compress(&route->scope, sizeof(route->scope), state);
+ siphash24_compress(&route->type, sizeof(route->type), state);
+
+ siphash24_compress(&route->initcwnd, sizeof(route->initcwnd), state);
+ siphash24_compress(&route->initrwnd, sizeof(route->initrwnd), state);
+
+ break;
+ default:
+ /* treat any other address family as AF_UNSPEC */
+ break;
+ }
+}
+
+int route_compare_func(const Route *a, const Route *b) {
+ int r;
+
+ r = CMP(a->family, b->family);
+ if (r != 0)
+ return r;
+
+ switch (a->family) {
+ case AF_INET:
+ case AF_INET6:
+ r = CMP(a->dst_prefixlen, b->dst_prefixlen);
+ if (r != 0)
+ return r;
+
+ r = memcmp(&a->dst, &b->dst, FAMILY_ADDRESS_SIZE(a->family));
+ if (r != 0)
+ return r;
+
+ r = CMP(a->src_prefixlen, b->src_prefixlen);
+ if (r != 0)
+ return r;
+
+ r = memcmp(&a->src, &b->src, FAMILY_ADDRESS_SIZE(a->family));
+ if (r != 0)
+ return r;
+
+ r = CMP(a->gw_family, b->gw_family);
+ if (r != 0)
+ return r;
+
+ if (IN_SET(a->gw_family, AF_INET, AF_INET6)) {
+ r = memcmp(&a->gw, &b->gw, FAMILY_ADDRESS_SIZE(a->family));
+ if (r != 0)
+ return r;
+
+ r = CMP(a->gw_weight, b->gw_weight);
+ if (r != 0)
+ return r;
+ }
+
+ r = memcmp(&a->prefsrc, &b->prefsrc, FAMILY_ADDRESS_SIZE(a->family));
+ if (r != 0)
+ return r;
+
+ r = CMP(a->tos, b->tos);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->priority, b->priority);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->table, b->table);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->protocol, b->protocol);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->scope, b->scope);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->type, b->type);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->initcwnd, b->initcwnd);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->initrwnd, b->initrwnd);
+ if (r != 0)
+ return r;
+
+ return 0;
+ default:
+ /* treat any other address family as AF_UNSPEC */
+ return 0;
+ }
+}
+
+DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(
+ route_hash_ops,
+ Route,
+ route_hash_func,
+ route_compare_func,
+ route_free);
+
+static bool route_equal(const Route *r1, const Route *r2) {
+ if (r1 == r2)
+ return true;
+
+ if (!r1 || !r2)
+ return false;
+
+ return route_compare_func(r1, r2) == 0;
+}
+
+static int route_get(const Manager *manager, const Link *link, const Route *in, Route **ret) {
+ Route *existing;
+
+ assert(manager || link);
+ assert(in);
+
+ if (link) {
+ existing = set_get(link->routes, in);
+ if (existing) {
+ if (ret)
+ *ret = existing;
+ return 1;
+ }
+
+ existing = set_get(link->routes_foreign, in);
+ if (existing) {
+ if (ret)
+ *ret = existing;
+ return 0;
+ }
+ } else {
+ existing = set_get(manager->routes, in);
+ if (existing) {
+ if (ret)
+ *ret = existing;
+ return 1;
+ }
+
+ existing = set_get(manager->routes_foreign, in);
+ if (existing) {
+ if (ret)
+ *ret = existing;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static void route_copy(Route *dest, const Route *src, const MultipathRoute *m) {
+ assert(dest);
+ assert(src);
+
+ dest->family = src->family;
+ dest->src = src->src;
+ dest->src_prefixlen = src->src_prefixlen;
+ dest->dst = src->dst;
+ dest->dst_prefixlen = src->dst_prefixlen;
+ dest->prefsrc = src->prefsrc;
+ dest->scope = src->scope;
+ dest->protocol = src->protocol;
+ dest->type = src->type;
+ dest->tos = src->tos;
+ dest->priority = src->priority;
+ dest->table = src->table;
+ dest->initcwnd = src->initcwnd;
+ dest->initrwnd = src->initrwnd;
+ dest->lifetime = src->lifetime;
+
+ if (m) {
+ dest->gw_family = m->gateway.family;
+ dest->gw = m->gateway.address;
+ dest->gw_weight = m->weight;
+ } else {
+ dest->gw_family = src->gw_family;
+ dest->gw = src->gw;
+ dest->gw_weight = src->gw_weight;
+ }
+}
+
+static int route_add_internal(Manager *manager, Link *link, Set **routes, const Route *in, Route **ret) {
+ _cleanup_(route_freep) Route *route = NULL;
+ int r;
+
+ assert(manager || link);
+ assert(routes);
+ assert(in);
+
+ r = route_new(&route);
+ if (r < 0)
+ return r;
+
+ route_copy(route, in, NULL);
+
+ r = set_ensure_put(routes, &route_hash_ops, route);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EEXIST;
+
+ route->link = link;
+ route->manager = manager;
+
+ if (ret)
+ *ret = route;
+
+ route = NULL;
+
+ return 0;
+}
+
+static int route_add_foreign(Manager *manager, Link *link, const Route *in, Route **ret) {
+ assert(manager || link);
+ return route_add_internal(manager, link, link ? &link->routes_foreign : &manager->routes_foreign, in, ret);
+}
+
+static int route_add(Manager *manager, Link *link, const Route *in, const MultipathRoute *m, Route **ret) {
+ _cleanup_(route_freep) Route *tmp = NULL;
+ Route *route;
+ int r;
+
+ assert(manager || link);
+ assert(in);
+
+ if (m) {
+ assert(link && (m->ifindex == 0 || m->ifindex == link->ifindex));
+
+ r = route_new(&tmp);
+ if (r < 0)
+ return r;
+
+ route_copy(tmp, in, m);
+ in = tmp;
+ }
+
+ r = route_get(manager, link, in, &route);
+ if (r == -ENOENT) {
+ /* Route does not exist, create a new one */
+ r = route_add_internal(manager, link, link ? &link->routes : &manager->routes, in, &route);
+ if (r < 0)
+ return r;
+ } else if (r == 0) {
+ /* Take over a foreign route */
+ if (link) {
+ r = set_ensure_put(&link->routes, &route_hash_ops, route);
+ if (r < 0)
+ return r;
+
+ set_remove(link->routes_foreign, route);
+ } else {
+ r = set_ensure_put(&manager->routes, &route_hash_ops, route);
+ if (r < 0)
+ return r;
+
+ set_remove(manager->routes_foreign, route);
+ }
+ } else if (r == 1) {
+ /* Route exists, do nothing */
+ ;
+ } else
+ return r;
+
+ if (ret)
+ *ret = route;
+
+ return 0;
+}
+
+static int route_set_netlink_message(const Route *route, sd_netlink_message *req, Link *link) {
+ unsigned flags;
+ int r;
+
+ assert(route);
+ assert(req);
+
+ /* link may be NULL */
+
+ if (in_addr_is_null(route->gw_family, &route->gw) == 0) {
+ if (route->gw_family == route->family) {
+ r = netlink_message_append_in_addr_union(req, RTA_GATEWAY, route->gw_family, &route->gw);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append RTA_GATEWAY attribute: %m");
+ } else {
+ RouteVia rtvia = {
+ .family = route->gw_family,
+ .address = route->gw,
+ };
+
+ r = sd_netlink_message_append_data(req, RTA_VIA, &rtvia, sizeof(rtvia));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append RTA_VIA attribute: %m");
+ }
+ }
+
+ if (route->dst_prefixlen > 0) {
+ r = netlink_message_append_in_addr_union(req, RTA_DST, route->family, &route->dst);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append RTA_DST attribute: %m");
+
+ r = sd_rtnl_message_route_set_dst_prefixlen(req, route->dst_prefixlen);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set destination prefix length: %m");
+ }
+
+ if (route->src_prefixlen > 0) {
+ r = netlink_message_append_in_addr_union(req, RTA_SRC, route->family, &route->src);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append RTA_SRC attribute: %m");
+
+ r = sd_rtnl_message_route_set_src_prefixlen(req, route->src_prefixlen);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set source prefix length: %m");
+ }
+
+ if (in_addr_is_null(route->family, &route->prefsrc) == 0) {
+ r = netlink_message_append_in_addr_union(req, RTA_PREFSRC, route->family, &route->prefsrc);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append RTA_PREFSRC attribute: %m");
+ }
+
+ r = sd_rtnl_message_route_set_scope(req, route->scope);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set scope: %m");
+
+ flags = route->flags;
+ if (route->gateway_onlink >= 0)
+ SET_FLAG(flags, RTNH_F_ONLINK, route->gateway_onlink);
+
+ r = sd_rtnl_message_route_set_flags(req, flags);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set flags: %m");
+
+ if (route->table != RT_TABLE_MAIN) {
+ if (route->table < 256) {
+ r = sd_rtnl_message_route_set_table(req, route->table);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set route table: %m");
+ } else {
+ r = sd_rtnl_message_route_set_table(req, RT_TABLE_UNSPEC);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set route table: %m");
+
+ /* Table attribute to allow more than 256. */
+ r = sd_netlink_message_append_data(req, RTA_TABLE, &route->table, sizeof(route->table));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append RTA_TABLE attribute: %m");
+ }
+ }
+
+ r = sd_rtnl_message_route_set_type(req, route->type);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set route type: %m");
+
+ if (!IN_SET(route->type, RTN_UNREACHABLE, RTN_PROHIBIT, RTN_BLACKHOLE, RTN_THROW)) {
+ assert(link); /* Those routes must be attached to a specific link */
+
+ r = sd_netlink_message_append_u32(req, RTA_OIF, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append RTA_OIF attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u8(req, RTA_PREF, route->pref);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append RTA_PREF attribute: %m");
+
+ r = sd_netlink_message_append_u32(req, RTA_PRIORITY, route->priority);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append RTA_PRIORITY attribute: %m");
+
+ return 0;
+}
+
+static int route_remove_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(m);
+
+ /* Note that link may be NULL. */
+ if (link && IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -ESRCH)
+ log_link_message_warning_errno(link, m, r, "Could not drop route, ignoring");
+
+ return 1;
+}
+
+int route_remove(
+ const Route *route,
+ Manager *manager,
+ Link *link,
+ link_netlink_message_handler_t callback) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link || manager);
+ assert(IN_SET(route->family, AF_INET, AF_INET6));
+
+ if (!manager)
+ manager = link->manager;
+ /* link may be NULL! */
+
+ r = sd_rtnl_message_new_route(manager->rtnl, &req,
+ RTM_DELROUTE, route->family,
+ route->protocol);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not create RTM_DELROUTE message: %m");
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *dst = NULL, *dst_prefixlen = NULL, *src = NULL, *gw = NULL, *prefsrc = NULL;
+ char scope[ROUTE_SCOPE_STR_MAX], table[ROUTE_TABLE_STR_MAX], protocol[ROUTE_PROTOCOL_STR_MAX];
+
+ if (!in_addr_is_null(route->family, &route->dst)) {
+ (void) in_addr_to_string(route->family, &route->dst, &dst);
+ (void) asprintf(&dst_prefixlen, "/%u", route->dst_prefixlen);
+ }
+ if (!in_addr_is_null(route->family, &route->src))
+ (void) in_addr_to_string(route->family, &route->src, &src);
+ if (!in_addr_is_null(route->gw_family, &route->gw))
+ (void) in_addr_to_string(route->gw_family, &route->gw, &gw);
+ if (!in_addr_is_null(route->family, &route->prefsrc))
+ (void) in_addr_to_string(route->family, &route->prefsrc, &prefsrc);
+
+ log_link_debug(link, "Removing route: dst: %s%s, src: %s, gw: %s, prefsrc: %s, scope: %s, table: %s, proto: %s, type: %s",
+ strna(dst), strempty(dst_prefixlen), strna(src), strna(gw), strna(prefsrc),
+ format_route_scope(route->scope, scope, sizeof(scope)),
+ format_route_table(route->table, table, sizeof(table)),
+ format_route_protocol(route->protocol, protocol, sizeof(protocol)),
+ strna(route_type_to_string(route->type)));
+ }
+
+ r = route_set_netlink_message(route, req, link);
+ if (r < 0)
+ return r;
+
+ r = netlink_call_async(manager->rtnl, NULL, req,
+ callback ?: route_remove_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link); /* link may be NULL, link_ref() is OK with that */
+
+ return 0;
+}
+
+static bool link_has_route(const Link *link, const Route *route) {
+ Route *net_route;
+
+ assert(link);
+ assert(route);
+
+ if (!link->network)
+ return false;
+
+ HASHMAP_FOREACH(net_route, link->network->routes_by_section)
+ if (route_equal(net_route, route))
+ return true;
+
+ return false;
+}
+
+static bool links_have_route(Manager *manager, const Route *route, const Link *except) {
+ Link *link;
+
+ assert(manager);
+
+ HASHMAP_FOREACH(link, manager->links) {
+ if (link == except)
+ continue;
+
+ if (link_has_route(link, route))
+ return true;
+ }
+
+ return false;
+}
+
+static int manager_drop_foreign_routes(Manager *manager) {
+ Route *route;
+ int k, r = 0;
+
+ assert(manager);
+
+ SET_FOREACH(route, manager->routes_foreign) {
+ /* do not touch routes managed by the kernel */
+ if (route->protocol == RTPROT_KERNEL)
+ continue;
+
+ if (links_have_route(manager, route, NULL))
+ /* The route will be configured later. */
+ continue;
+
+ /* The existing links do not have the route. Let's drop this now. It may by
+ * re-configured later. */
+ k = route_remove(route, manager, NULL, NULL);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int manager_drop_routes(Manager *manager, Link *except) {
+ Route *route;
+ int k, r = 0;
+
+ assert(manager);
+
+ SET_FOREACH(route, manager->routes) {
+ /* do not touch routes managed by the kernel */
+ if (route->protocol == RTPROT_KERNEL)
+ continue;
+
+ if (links_have_route(manager, route, except))
+ /* The route will be configured later. */
+ continue;
+
+ /* The existing links do not have the route. Let's drop this now. It may by
+ * re-configured later. */
+ k = route_remove(route, manager, NULL, NULL);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ return r;
+}
+
+int link_drop_foreign_routes(Link *link) {
+ Route *route;
+ int k, r = 0;
+
+ assert(link);
+ assert(link->manager);
+
+ SET_FOREACH(route, link->routes_foreign) {
+ /* do not touch routes managed by the kernel */
+ if (route->protocol == RTPROT_KERNEL)
+ continue;
+
+ /* do not touch multicast route added by kernel */
+ /* FIXME: Why the kernel adds this route with protocol RTPROT_BOOT??? We need to investigate that.
+ * https://tools.ietf.org/html/rfc4862#section-5.4 may explain why. */
+ if (route->protocol == RTPROT_BOOT &&
+ route->family == AF_INET6 &&
+ route->dst_prefixlen == 8 &&
+ in_addr_equal(AF_INET6, &route->dst, &(union in_addr_union) { .in6 = {{{ 0xff,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 }}} }))
+ continue;
+
+ if (route->protocol == RTPROT_STATIC && link->network &&
+ FLAGS_SET(link->network->keep_configuration, KEEP_CONFIGURATION_STATIC))
+ continue;
+
+ if (route->protocol == RTPROT_DHCP && link->network &&
+ FLAGS_SET(link->network->keep_configuration, KEEP_CONFIGURATION_DHCP))
+ continue;
+
+ if (link_has_route(link, route))
+ k = route_add(NULL, link, route, NULL, NULL);
+ else
+ k = route_remove(route, NULL, link, NULL);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ k = manager_drop_foreign_routes(link->manager);
+ if (k < 0 && r >= 0)
+ r = k;
+
+ return r;
+}
+
+int link_drop_routes(Link *link) {
+ Route *route;
+ int k, r = 0;
+
+ assert(link);
+
+ SET_FOREACH(route, link->routes) {
+ /* do not touch routes managed by the kernel */
+ if (route->protocol == RTPROT_KERNEL)
+ continue;
+
+ k = route_remove(route, NULL, link, NULL);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ k = manager_drop_routes(link->manager, link);
+ if (k < 0 && r >= 0)
+ r = k;
+
+ return r;
+}
+
+static int route_expire_handler(sd_event_source *s, uint64_t usec, void *userdata) {
+ Route *route = userdata;
+ int r;
+
+ assert(route);
+
+ r = route_remove(route, route->manager, route->link, NULL);
+ if (r < 0) {
+ log_link_warning_errno(route->link, r, "Could not remove route: %m");
+ route_free(route);
+ }
+
+ return 1;
+}
+
+static int route_add_and_setup_timer(Link *link, const Route *route, const MultipathRoute *m, Route **ret) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *expire = NULL;
+ Route *nr;
+ int r;
+
+ assert(link);
+ assert(route);
+
+ if (IN_SET(route->type, RTN_UNREACHABLE, RTN_PROHIBIT, RTN_BLACKHOLE, RTN_THROW))
+ r = route_add(link->manager, NULL, route, NULL, &nr);
+ else if (!m || m->ifindex == 0 || m->ifindex == link->ifindex)
+ r = route_add(NULL, link, route, m, &nr);
+ else {
+ Link *link_gw;
+
+ r = link_get(link->manager, m->ifindex, &link_gw);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to get link with ifindex %d: %m", m->ifindex);
+
+ r = route_add(NULL, link_gw, route, m, &nr);
+ }
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not add route: %m");
+
+ /* TODO: drop expiration handling once it can be pushed into the kernel */
+ if (nr->lifetime != USEC_INFINITY && !kernel_route_expiration_supported()) {
+ r = sd_event_add_time(link->manager->event, &expire, clock_boottime_or_monotonic(),
+ nr->lifetime, 0, route_expire_handler, nr);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not arm expiration timer: %m");
+ }
+
+ sd_event_source_unref(nr->expire);
+ nr->expire = TAKE_PTR(expire);
+
+ if (ret)
+ *ret = nr;
+
+ return 0;
+}
+
+static int append_nexthop_one(const Route *route, const MultipathRoute *m, struct rtattr **rta, size_t offset) {
+ struct rtnexthop *rtnh;
+ struct rtattr *new_rta;
+ int r;
+
+ assert(route);
+ assert(m);
+ assert(rta);
+ assert(*rta);
+
+ new_rta = realloc(*rta, RTA_ALIGN((*rta)->rta_len) + RTA_SPACE(sizeof(struct rtnexthop)));
+ if (!new_rta)
+ return -ENOMEM;
+ *rta = new_rta;
+
+ rtnh = (struct rtnexthop *)((uint8_t *) *rta + offset);
+ *rtnh = (struct rtnexthop) {
+ .rtnh_len = sizeof(*rtnh),
+ .rtnh_ifindex = m->ifindex,
+ .rtnh_hops = m->weight > 0 ? m->weight - 1 : 0,
+ };
+
+ (*rta)->rta_len += sizeof(struct rtnexthop);
+
+ if (route->family == m->gateway.family) {
+ r = rtattr_append_attribute(rta, RTA_GATEWAY, &m->gateway.address, FAMILY_ADDRESS_SIZE(m->gateway.family));
+ if (r < 0)
+ goto clear;
+ rtnh = (struct rtnexthop *)((uint8_t *) *rta + offset);
+ rtnh->rtnh_len += RTA_SPACE(FAMILY_ADDRESS_SIZE(m->gateway.family));
+ } else {
+ r = rtattr_append_attribute(rta, RTA_VIA, &m->gateway, FAMILY_ADDRESS_SIZE(m->gateway.family) + sizeof(m->gateway.family));
+ if (r < 0)
+ goto clear;
+ rtnh = (struct rtnexthop *)((uint8_t *) *rta + offset);
+ rtnh->rtnh_len += RTA_SPACE(FAMILY_ADDRESS_SIZE(m->gateway.family) + sizeof(m->gateway.family));
+ }
+
+ return 0;
+
+clear:
+ (*rta)->rta_len -= sizeof(struct rtnexthop);
+ return r;
+}
+
+static int append_nexthops(const Route *route, sd_netlink_message *req) {
+ _cleanup_free_ struct rtattr *rta = NULL;
+ struct rtnexthop *rtnh;
+ MultipathRoute *m;
+ size_t offset;
+ int r;
+
+ if (ordered_set_isempty(route->multipath_routes))
+ return 0;
+
+ rta = new(struct rtattr, 1);
+ if (!rta)
+ return -ENOMEM;
+
+ *rta = (struct rtattr) {
+ .rta_type = RTA_MULTIPATH,
+ .rta_len = RTA_LENGTH(0),
+ };
+ offset = (uint8_t *) RTA_DATA(rta) - (uint8_t *) rta;
+
+ ORDERED_SET_FOREACH(m, route->multipath_routes) {
+ r = append_nexthop_one(route, m, &rta, offset);
+ if (r < 0)
+ return r;
+
+ rtnh = (struct rtnexthop *)((uint8_t *) rta + offset);
+ offset = (uint8_t *) RTNH_NEXT(rtnh) - (uint8_t *) rta;
+ }
+
+ r = sd_netlink_message_append_data(req, RTA_MULTIPATH, RTA_DATA(rta), RTA_PAYLOAD(rta));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int route_configure(
+ const Route *route,
+ Link *link,
+ link_netlink_message_handler_t callback,
+ Route **ret) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+ assert(link->ifindex > 0);
+ assert(IN_SET(route->family, AF_INET, AF_INET6));
+ assert(callback);
+
+ if (route_get(link->manager, link, route, NULL) <= 0 &&
+ set_size(link->routes) >= routes_max())
+ return log_link_error_errno(link, SYNTHETIC_ERRNO(E2BIG),
+ "Too many routes are configured, refusing: %m");
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *dst = NULL, *dst_prefixlen = NULL, *src = NULL, *gw = NULL, *prefsrc = NULL;
+ char scope[ROUTE_SCOPE_STR_MAX], table[ROUTE_TABLE_STR_MAX], protocol[ROUTE_PROTOCOL_STR_MAX];
+
+ if (!in_addr_is_null(route->family, &route->dst)) {
+ (void) in_addr_to_string(route->family, &route->dst, &dst);
+ (void) asprintf(&dst_prefixlen, "/%u", route->dst_prefixlen);
+ }
+ if (!in_addr_is_null(route->family, &route->src))
+ (void) in_addr_to_string(route->family, &route->src, &src);
+ if (!in_addr_is_null(route->gw_family, &route->gw))
+ (void) in_addr_to_string(route->gw_family, &route->gw, &gw);
+ if (!in_addr_is_null(route->family, &route->prefsrc))
+ (void) in_addr_to_string(route->family, &route->prefsrc, &prefsrc);
+
+ log_link_debug(link, "Configuring route: dst: %s%s, src: %s, gw: %s, prefsrc: %s, scope: %s, table: %s, proto: %s, type: %s",
+ strna(dst), strempty(dst_prefixlen), strna(src), strna(gw), strna(prefsrc),
+ format_route_scope(route->scope, scope, sizeof(scope)),
+ format_route_table(route->table, table, sizeof(table)),
+ format_route_protocol(route->protocol, protocol, sizeof(protocol)),
+ strna(route_type_to_string(route->type)));
+ }
+
+ r = sd_rtnl_message_new_route(link->manager->rtnl, &req,
+ RTM_NEWROUTE, route->family,
+ route->protocol);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not create RTM_NEWROUTE message: %m");
+
+ r = route_set_netlink_message(route, req, link);
+ if (r < 0)
+ return r;
+
+ if (route->lifetime != USEC_INFINITY && kernel_route_expiration_supported()) {
+ r = sd_netlink_message_append_u32(req, RTA_EXPIRES,
+ DIV_ROUND_UP(usec_sub_unsigned(route->lifetime, now(clock_boottime_or_monotonic())), USEC_PER_SEC));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append RTA_EXPIRES attribute: %m");
+ }
+
+ if (route->ttl_propagate >= 0) {
+ r = sd_netlink_message_append_u8(req, RTA_TTL_PROPAGATE, route->ttl_propagate);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append RTA_TTL_PROPAGATE attribute: %m");
+ }
+
+ r = sd_netlink_message_open_container(req, RTA_METRICS);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append RTA_METRICS attribute: %m");
+
+ if (route->mtu > 0) {
+ r = sd_netlink_message_append_u32(req, RTAX_MTU, route->mtu);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append RTAX_MTU attribute: %m");
+ }
+
+ if (route->initcwnd > 0) {
+ r = sd_netlink_message_append_u32(req, RTAX_INITCWND, route->initcwnd);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append RTAX_INITCWND attribute: %m");
+ }
+
+ if (route->initrwnd > 0) {
+ r = sd_netlink_message_append_u32(req, RTAX_INITRWND, route->initrwnd);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append RTAX_INITRWND attribute: %m");
+ }
+
+ if (route->quickack >= 0) {
+ r = sd_netlink_message_append_u32(req, RTAX_QUICKACK, route->quickack);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append RTAX_QUICKACK attribute: %m");
+ }
+
+ if (route->fast_open_no_cookie >= 0) {
+ r = sd_netlink_message_append_u32(req, RTAX_FASTOPEN_NO_COOKIE, route->fast_open_no_cookie);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append RTAX_FASTOPEN_NO_COOKIE attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append RTA_METRICS attribute: %m");
+
+ r = append_nexthops(route, req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append RTA_MULTIPATH attribute: %m");
+
+ if (ordered_set_isempty(route->multipath_routes)) {
+ Route *nr;
+
+ r = route_add_and_setup_timer(link, route, NULL, &nr);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = nr;
+ } else {
+ MultipathRoute *m;
+
+ assert(!ret);
+
+ ORDERED_SET_FOREACH(m, route->multipath_routes) {
+ r = route_add_and_setup_timer(link, route, m, NULL);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, callback,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static int route_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->route_messages > 0);
+
+ link->route_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_message_warning_errno(link, m, r, "Could not set route");
+ link_enter_failed(link);
+ return 1;
+ }
+
+ if (link->route_messages == 0) {
+ log_link_debug(link, "Routes set");
+ link->static_routes_configured = true;
+ link_set_nexthop(link);
+ }
+
+ return 1;
+}
+
+int link_set_routes(Link *link) {
+ enum {
+ PHASE_NON_GATEWAY, /* First phase: Routes without a gateway */
+ PHASE_GATEWAY, /* Second phase: Routes with a gateway */
+ _PHASE_MAX
+ } phase;
+ Route *rt;
+ int r;
+
+ assert(link);
+ assert(link->network);
+ assert(link->state != _LINK_STATE_INVALID);
+
+ link->static_routes_configured = false;
+
+ if (!link->addresses_ready)
+ return 0;
+
+ if (!link_has_carrier(link) && !link->network->configure_without_carrier)
+ /* During configuring addresses, the link lost its carrier. As networkd is dropping
+ * the addresses now, let's not configure the routes either. */
+ return 0;
+
+ r = link_set_routing_policy_rules(link);
+ if (r < 0)
+ return r;
+
+ /* First add the routes that enable us to talk to gateways, then add in the others that need a gateway. */
+ for (phase = 0; phase < _PHASE_MAX; phase++)
+ HASHMAP_FOREACH(rt, link->network->routes_by_section) {
+ if (rt->gateway_from_dhcp_or_ra)
+ continue;
+
+ if ((in_addr_is_null(rt->gw_family, &rt->gw) && ordered_set_isempty(rt->multipath_routes)) != (phase == PHASE_NON_GATEWAY))
+ continue;
+
+ r = route_configure(rt, link, route_handler, NULL);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not set routes: %m");
+
+ link->route_messages++;
+ }
+
+ if (link->route_messages == 0) {
+ link->static_routes_configured = true;
+ link_set_nexthop(link);
+ } else {
+ log_link_debug(link, "Setting routes");
+ link_set_state(link, LINK_STATE_CONFIGURING);
+ }
+
+ return 0;
+}
+
+static int process_route_one(Manager *manager, Link *link, uint16_t type, const Route *tmp, const MultipathRoute *m) {
+ _cleanup_(route_freep) Route *nr = NULL;
+ Route *route = NULL;
+ int r;
+
+ assert(manager);
+ assert(tmp);
+ assert(IN_SET(type, RTM_NEWROUTE, RTM_DELROUTE));
+
+ if (m) {
+ if (link)
+ return log_link_warning_errno(link, SYNTHETIC_ERRNO(EINVAL),
+ "rtnl: received route contains both RTA_OIF and RTA_MULTIPATH, ignoring.");
+
+ if (m->ifindex <= 0)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "rtnl: received multipath route with invalid ifindex, ignoring.");
+
+ r = link_get(manager, m->ifindex, &link);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: received multipath route for link (%d) we do not know, ignoring: %m", m->ifindex);
+ return 0;
+ }
+
+ r = route_new(&nr);
+ if (r < 0)
+ return log_oom();
+
+ route_copy(nr, tmp, m);
+
+ tmp = nr;
+ }
+
+ (void) route_get(manager, link, tmp, &route);
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *buf_dst = NULL, *buf_dst_prefixlen = NULL,
+ *buf_src = NULL, *buf_gw = NULL, *buf_prefsrc = NULL;
+ char buf_scope[ROUTE_SCOPE_STR_MAX], buf_table[ROUTE_TABLE_STR_MAX],
+ buf_protocol[ROUTE_PROTOCOL_STR_MAX];
+
+ if (!in_addr_is_null(tmp->family, &tmp->dst)) {
+ (void) in_addr_to_string(tmp->family, &tmp->dst, &buf_dst);
+ (void) asprintf(&buf_dst_prefixlen, "/%u", tmp->dst_prefixlen);
+ }
+ if (!in_addr_is_null(tmp->family, &tmp->src))
+ (void) in_addr_to_string(tmp->family, &tmp->src, &buf_src);
+ if (!in_addr_is_null(tmp->gw_family, &tmp->gw))
+ (void) in_addr_to_string(tmp->gw_family, &tmp->gw, &buf_gw);
+ if (!in_addr_is_null(tmp->family, &tmp->prefsrc))
+ (void) in_addr_to_string(tmp->family, &tmp->prefsrc, &buf_prefsrc);
+
+ log_link_debug(link,
+ "%s route: dst: %s%s, src: %s, gw: %s, prefsrc: %s, scope: %s, table: %s, proto: %s, type: %s",
+ (!route && !manager->manage_foreign_routes) ? "Ignoring received foreign" :
+ type == RTM_DELROUTE ? "Forgetting" :
+ route ? "Received remembered" : "Remembering",
+ strna(buf_dst), strempty(buf_dst_prefixlen),
+ strna(buf_src), strna(buf_gw), strna(buf_prefsrc),
+ format_route_scope(tmp->scope, buf_scope, sizeof buf_scope),
+ format_route_table(tmp->table, buf_table, sizeof buf_table),
+ format_route_protocol(tmp->protocol, buf_protocol, sizeof buf_protocol),
+ strna(route_type_to_string(tmp->type)));
+ }
+
+ switch (type) {
+ case RTM_NEWROUTE:
+ if (!route && manager->manage_foreign_routes) {
+ /* A route appeared that we did not request */
+ r = route_add_foreign(manager, link, tmp, NULL);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "Failed to remember foreign route, ignoring: %m");
+ return 0;
+ }
+ }
+
+ break;
+
+ case RTM_DELROUTE:
+ route_free(route);
+ break;
+
+ default:
+ assert_not_reached("Received route message with invalid RTNL message type");
+ }
+
+ return 1;
+}
+
+int manager_rtnl_process_route(sd_netlink *rtnl, sd_netlink_message *message, Manager *m) {
+ _cleanup_ordered_set_free_free_ OrderedSet *multipath_routes = NULL;
+ _cleanup_(route_freep) Route *tmp = NULL;
+ _cleanup_free_ void *rta_multipath = NULL;
+ Link *link = NULL;
+ uint32_t ifindex;
+ uint16_t type;
+ unsigned char table;
+ RouteVia via;
+ size_t rta_len;
+ int r;
+
+ assert(rtnl);
+ assert(message);
+ assert(m);
+
+ if (sd_netlink_message_is_error(message)) {
+ r = sd_netlink_message_get_errno(message);
+ if (r < 0)
+ log_message_warning_errno(message, r, "rtnl: failed to receive route message, ignoring");
+
+ return 0;
+ }
+
+ r = sd_netlink_message_get_type(message, &type);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: could not get message type, ignoring: %m");
+ return 0;
+ } else if (!IN_SET(type, RTM_NEWROUTE, RTM_DELROUTE)) {
+ log_warning("rtnl: received unexpected message type %u when processing route, ignoring.", type);
+ return 0;
+ }
+
+ r = sd_netlink_message_read_u32(message, RTA_OIF, &ifindex);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get ifindex from route message, ignoring: %m");
+ return 0;
+ } else if (r >= 0) {
+ if (ifindex <= 0) {
+ log_warning("rtnl: received route message with invalid ifindex %d, ignoring.", ifindex);
+ return 0;
+ }
+
+ r = link_get(m, ifindex, &link);
+ if (r < 0 || !link) {
+ /* when enumerating we might be out of sync, but we will
+ * get the route again, so just ignore it */
+ if (!m->enumerating)
+ log_warning("rtnl: received route message for link (%d) we do not know about, ignoring", ifindex);
+ return 0;
+ }
+ }
+
+ r = route_new(&tmp);
+ if (r < 0)
+ return log_oom();
+
+ r = sd_rtnl_message_route_get_family(message, &tmp->family);
+ if (r < 0) {
+ log_link_warning(link, "rtnl: received route message without family, ignoring");
+ return 0;
+ } else if (!IN_SET(tmp->family, AF_INET, AF_INET6)) {
+ log_link_debug(link, "rtnl: received route message with invalid family '%i', ignoring", tmp->family);
+ return 0;
+ }
+
+ r = sd_rtnl_message_route_get_protocol(message, &tmp->protocol);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: received route message without route protocol: %m");
+ return 0;
+ }
+
+ switch (tmp->family) {
+ case AF_INET:
+ r = sd_netlink_message_read_in_addr(message, RTA_DST, &tmp->dst.in);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route message without valid destination, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_in_addr(message, RTA_GATEWAY, &tmp->gw.in);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route message without valid gateway, ignoring: %m");
+ return 0;
+ } else if (r >= 0)
+ tmp->gw_family = AF_INET;
+
+ r = sd_netlink_message_read(message, RTA_VIA, sizeof(via), &via);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route message without valid gateway, ignoring: %m");
+ return 0;
+ } else if (r >= 0) {
+ tmp->gw_family = via.family;
+ tmp->gw = via.address;
+ }
+
+ r = sd_netlink_message_read_in_addr(message, RTA_SRC, &tmp->src.in);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route message without valid source, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_in_addr(message, RTA_PREFSRC, &tmp->prefsrc.in);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route message without valid preferred source, ignoring: %m");
+ return 0;
+ }
+
+ break;
+
+ case AF_INET6:
+ r = sd_netlink_message_read_in6_addr(message, RTA_DST, &tmp->dst.in6);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route message without valid destination, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_in6_addr(message, RTA_GATEWAY, &tmp->gw.in6);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route message without valid gateway, ignoring: %m");
+ return 0;
+ } else if (r >= 0)
+ tmp->gw_family = AF_INET6;
+
+ r = sd_netlink_message_read_in6_addr(message, RTA_SRC, &tmp->src.in6);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route message without valid source, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_in6_addr(message, RTA_PREFSRC, &tmp->prefsrc.in6);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route message without valid preferred source, ignoring: %m");
+ return 0;
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Received route message with unsupported address family");
+ return 0;
+ }
+
+ r = sd_rtnl_message_route_get_dst_prefixlen(message, &tmp->dst_prefixlen);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received route message with invalid destination prefixlen, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_rtnl_message_route_get_src_prefixlen(message, &tmp->src_prefixlen);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received route message with invalid source prefixlen, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_rtnl_message_route_get_scope(message, &tmp->scope);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received route message with invalid scope, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_rtnl_message_route_get_tos(message, &tmp->tos);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received route message with invalid tos, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_rtnl_message_route_get_type(message, &tmp->type);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received route message with invalid type, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_rtnl_message_route_get_table(message, &table);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: received route message with invalid table, ignoring: %m");
+ return 0;
+ }
+ tmp->table = table;
+
+ r = sd_netlink_message_read_u32(message, RTA_PRIORITY, &tmp->priority);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route message with invalid priority, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_enter_container(message, RTA_METRICS);
+ if (r < 0 && r != -ENODATA) {
+ log_link_error_errno(link, r, "rtnl: Could not enter RTA_METRICS container: %m");
+ return 0;
+ }
+ if (r >= 0) {
+ r = sd_netlink_message_read_u32(message, RTAX_INITCWND, &tmp->initcwnd);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route message with invalid initcwnd, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_u32(message, RTAX_INITRWND, &tmp->initrwnd);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: received route message with invalid initrwnd, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_exit_container(message);
+ if (r < 0) {
+ log_link_error_errno(link, r, "rtnl: Could not exit from RTA_METRICS container: %m");
+ return 0;
+ }
+ }
+
+ r = sd_netlink_message_read_data(message, RTA_MULTIPATH, &rta_len, &rta_multipath);
+ if (r < 0 && r != -ENODATA) {
+ log_link_warning_errno(link, r, "rtnl: failed to read RTA_MULTIPATH attribute, ignoring: %m");
+ return 0;
+ } else if (r >= 0) {
+ r = rtattr_read_nexthop(rta_multipath, rta_len, tmp->family, &multipath_routes);
+ if (r < 0) {
+ log_link_warning_errno(link, r, "rtnl: failed to parse RTA_MULTIPATH attribute, ignoring: %m");
+ return 0;
+ }
+ }
+
+ if (ordered_set_isempty(multipath_routes))
+ (void) process_route_one(m, link, type, tmp, NULL);
+ else {
+ MultipathRoute *mr;
+
+ ORDERED_SET_FOREACH(mr, multipath_routes) {
+ r = process_route_one(m, link, type, tmp, mr);
+ if (r < 0)
+ break;
+ }
+ }
+
+ return 1;
+}
+
+int link_serialize_routes(const Link *link, FILE *f) {
+ bool space = false;
+ Route *route;
+
+ assert(link);
+ assert(link->network);
+ assert(f);
+
+ fputs("ROUTES=", f);
+ SET_FOREACH(route, link->routes) {
+ _cleanup_free_ char *route_str = NULL;
+
+ if (in_addr_to_string(route->family, &route->dst, &route_str) < 0)
+ continue;
+
+ fprintf(f, "%s%s/%hhu/%hhu/%"PRIu32"/%"PRIu32"/"USEC_FMT,
+ space ? " " : "", route_str,
+ route->dst_prefixlen, route->tos, route->priority, route->table, route->lifetime);
+ space = true;
+ }
+ fputc('\n', f);
+
+ return 0;
+}
+
+int link_deserialize_routes(Link *link, const char *routes) {
+ int r;
+
+ assert(link);
+
+ for (const char *p = routes;; ) {
+ _cleanup_(route_freep) Route *tmp = NULL;
+ _cleanup_free_ char *route_str = NULL;
+ char *prefixlen_str;
+
+ r = extract_first_word(&p, &route_str, NULL, 0);
+ if (r < 0)
+ return log_link_debug_errno(link, r, "Failed to parse ROUTES=: %m");
+ if (r == 0)
+ return 0;
+
+ prefixlen_str = strchr(route_str, '/');
+ if (!prefixlen_str) {
+ log_link_debug(link, "Failed to parse route, ignoring: %s", route_str);
+ continue;
+ }
+ *prefixlen_str++ = '\0';
+
+ r = route_new(&tmp);
+ if (r < 0)
+ return log_oom();
+
+ r = sscanf(prefixlen_str,
+ "%hhu/%hhu/%"SCNu32"/%"PRIu32"/"USEC_FMT,
+ &tmp->dst_prefixlen,
+ &tmp->tos,
+ &tmp->priority,
+ &tmp->table,
+ &tmp->lifetime);
+ if (r != 5) {
+ log_link_debug(link,
+ "Failed to parse destination prefix length, tos, priority, table or expiration: %s",
+ prefixlen_str);
+ continue;
+ }
+
+ r = in_addr_from_string_auto(route_str, &tmp->family, &tmp->dst);
+ if (r < 0) {
+ log_link_debug_errno(link, r, "Failed to parse route destination %s: %m", route_str);
+ continue;
+ }
+
+ r = route_add_and_setup_timer(link, tmp, NULL, NULL);
+ if (r < 0)
+ return log_link_debug_errno(link, r, "Failed to add route: %m");
+ }
+}
+
+int network_add_ipv4ll_route(Network *network) {
+ _cleanup_(route_free_or_set_invalidp) Route *n = NULL;
+ unsigned section_line;
+ int r;
+
+ assert(network);
+
+ if (!network->ipv4ll_route)
+ return 0;
+
+ section_line = hashmap_find_free_section_line(network->routes_by_section);
+
+ /* IPv4LLRoute= is in [Network] section. */
+ r = route_new_static(network, network->filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ r = in_addr_from_string(AF_INET, "169.254.0.0", &n->dst);
+ if (r < 0)
+ return r;
+
+ n->family = AF_INET;
+ n->dst_prefixlen = 16;
+ n->scope = RT_SCOPE_LINK;
+ n->scope_set = true;
+ n->table_set = true;
+ n->priority = IPV4LL_ROUTE_METRIC;
+ n->protocol = RTPROT_STATIC;
+
+ TAKE_PTR(n);
+ return 0;
+}
+
+int network_add_default_route_on_device(Network *network) {
+ _cleanup_(route_free_or_set_invalidp) Route *n = NULL;
+ unsigned section_line;
+ int r;
+
+ assert(network);
+
+ if (!network->default_route_on_device)
+ return 0;
+
+ section_line = hashmap_find_free_section_line(network->routes_by_section);
+
+ /* DefaultRouteOnDevice= is in [Network] section. */
+ r = route_new_static(network, network->filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ n->family = AF_INET;
+ n->scope = RT_SCOPE_LINK;
+ n->scope_set = true;
+ n->protocol = RTPROT_STATIC;
+
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_gateway(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_free_or_set_invalidp) Route *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(section, "Network")) {
+ /* we are not in an Route section, so use line number instead */
+ r = route_new_static(network, filename, line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate route, ignoring assignment: %m");
+ return 0;
+ }
+ } else {
+ r = route_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate route, ignoring assignment: %m");
+ return 0;
+ }
+
+ if (isempty(rvalue)) {
+ n->gateway_from_dhcp_or_ra = false;
+ n->gw_family = AF_UNSPEC;
+ n->gw = IN_ADDR_NULL;
+ TAKE_PTR(n);
+ return 0;
+ }
+
+ if (streq(rvalue, "_dhcp")) {
+ n->gateway_from_dhcp_or_ra = true;
+ TAKE_PTR(n);
+ return 0;
+ }
+
+ if (streq(rvalue, "_dhcp4")) {
+ n->gw_family = AF_INET;
+ n->gateway_from_dhcp_or_ra = true;
+ TAKE_PTR(n);
+ return 0;
+ }
+
+ if (streq(rvalue, "_ipv6ra")) {
+ n->gw_family = AF_INET6;
+ n->gateway_from_dhcp_or_ra = true;
+ TAKE_PTR(n);
+ return 0;
+ }
+ }
+
+ r = in_addr_from_string_auto(rvalue, &n->gw_family, &n->gw);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid %s='%s', ignoring assignment: %m", lvalue, rvalue);
+ return 0;
+ }
+
+ n->gateway_from_dhcp_or_ra = false;
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_preferred_src(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_free_or_set_invalidp) Route *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate route, ignoring assignment: %m");
+ return 0;
+ }
+
+ if (n->family == AF_UNSPEC)
+ r = in_addr_from_string_auto(rvalue, &n->family, &n->prefsrc);
+ else
+ r = in_addr_from_string(n->family, rvalue, &n->prefsrc);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, EINVAL,
+ "Invalid %s='%s', ignoring assignment: %m", lvalue, rvalue);
+ return 0;
+ }
+
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_destination(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_free_or_set_invalidp) Route *n = NULL;
+ union in_addr_union *buffer;
+ unsigned char *prefixlen;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate route, ignoring assignment: %m");
+ return 0;
+ }
+
+ if (streq(lvalue, "Destination")) {
+ buffer = &n->dst;
+ prefixlen = &n->dst_prefixlen;
+ } else if (streq(lvalue, "Source")) {
+ buffer = &n->src;
+ prefixlen = &n->src_prefixlen;
+ } else
+ assert_not_reached(lvalue);
+
+ if (n->family == AF_UNSPEC)
+ r = in_addr_prefix_from_string_auto(rvalue, &n->family, buffer, prefixlen);
+ else
+ r = in_addr_prefix_from_string(rvalue, n->family, buffer, prefixlen);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, EINVAL,
+ "Invalid %s='%s', ignoring assignment: %m", lvalue, rvalue);
+ return 0;
+ }
+
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_route_priority(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_free_or_set_invalidp) Route *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate route, ignoring assignment: %m");
+ return 0;
+ }
+
+ r = safe_atou32(rvalue, &n->priority);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Could not parse route priority \"%s\", ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ n->priority_set = true;
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_route_scope(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_free_or_set_invalidp) Route *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate route, ignoring assignment: %m");
+ return 0;
+ }
+
+ r = route_scope_from_string(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Unknown route scope: %s", rvalue);
+ return 0;
+ }
+
+ n->scope = r;
+ n->scope_set = true;
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_route_table(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(route_free_or_set_invalidp) Route *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate route, ignoring assignment: %m");
+ return 0;
+ }
+
+ r = route_table_from_string(rvalue);
+ if (r >= 0)
+ n->table = r;
+ else {
+ r = safe_atou32(rvalue, &n->table);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Could not parse route table number \"%s\", ignoring assignment: %m", rvalue);
+ return 0;
+ }
+ }
+
+ n->table_set = true;
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_route_boolean(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_free_or_set_invalidp) Route *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate route, ignoring assignment: %m");
+ return 0;
+ }
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Could not parse %s=\"%s\", ignoring assignment: %m", lvalue, rvalue);
+ return 0;
+ }
+
+ if (STR_IN_SET(lvalue, "GatewayOnLink", "GatewayOnlink"))
+ n->gateway_onlink = r;
+ else if (streq(lvalue, "QuickAck"))
+ n->quickack = r;
+ else if (streq(lvalue, "FastOpenNoCookie"))
+ n->fast_open_no_cookie = r;
+ else if (streq(lvalue, "TTLPropagate"))
+ n->ttl_propagate = r;
+ else
+ assert_not_reached("Invalid lvalue");
+
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_ipv6_route_preference(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_free_or_set_invalidp) Route *n = NULL;
+ int r;
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate route, ignoring assignment: %m");
+ return 0;
+ }
+
+ if (streq(rvalue, "low"))
+ n->pref = ICMPV6_ROUTER_PREF_LOW;
+ else if (streq(rvalue, "medium"))
+ n->pref = ICMPV6_ROUTER_PREF_MEDIUM;
+ else if (streq(rvalue, "high"))
+ n->pref = ICMPV6_ROUTER_PREF_HIGH;
+ else {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Unknown route preference: %s", rvalue);
+ return 0;
+ }
+
+ n->pref_set = true;
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_route_protocol(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_free_or_set_invalidp) Route *n = NULL;
+ int r;
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate route, ignoring assignment: %m");
+ return 0;
+ }
+
+ r = route_protocol_from_string(rvalue);
+ if (r >= 0)
+ n->protocol = r;
+ else {
+ r = safe_atou8(rvalue , &n->protocol);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Could not parse route protocol \"%s\", ignoring assignment: %m", rvalue);
+ return 0;
+ }
+ }
+
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_route_type(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_free_or_set_invalidp) Route *n = NULL;
+ int t, r;
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate route, ignoring assignment: %m");
+ return 0;
+ }
+
+ t = route_type_from_string(rvalue);
+ if (t < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Could not parse route type \"%s\", ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ n->type = (unsigned char) t;
+
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_tcp_window(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(route_free_or_set_invalidp) Route *n = NULL;
+ Network *network = userdata;
+ uint32_t k;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate route, ignoring assignment: %m");
+ return 0;
+ }
+
+ r = safe_atou32(rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Could not parse TCP %s \"%s\", ignoring assignment: %m", lvalue, rvalue);
+ return 0;
+ }
+ if (k >= 1024) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Specified TCP %s \"%s\" is too large, ignoring assignment: %m", lvalue, rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "InitialCongestionWindow"))
+ n->initcwnd = k;
+ else if (streq(lvalue, "InitialAdvertisedReceiveWindow"))
+ n->initrwnd = k;
+ else
+ assert_not_reached("Invalid TCP window type.");
+
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_route_mtu(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Network *network = userdata;
+ _cleanup_(route_free_or_set_invalidp) Route *n = NULL;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate route, ignoring assignment: %m");
+ return 0;
+ }
+
+ r = config_parse_mtu(unit, filename, line, section, section_line, lvalue, ltype, rvalue, &n->mtu, userdata);
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(n);
+ return 0;
+}
+
+int config_parse_multipath_route(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(route_free_or_set_invalidp) Route *n = NULL;
+ _cleanup_free_ char *word = NULL, *buf = NULL;
+ _cleanup_free_ MultipathRoute *m = NULL;
+ Network *network = userdata;
+ const char *p, *ip, *dev;
+ union in_addr_union a;
+ int family, r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = route_new_static(network, filename, section_line, &n);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to allocate route, ignoring assignment: %m");
+ return 0;
+ }
+
+ if (isempty(rvalue)) {
+ n->multipath_routes = ordered_set_free_free(n->multipath_routes);
+ return 0;
+ }
+
+ m = new0(MultipathRoute, 1);
+ if (!m)
+ return log_oom();
+
+ p = rvalue;
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r <= 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid multipath route option, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ dev = strchr(word, '@');
+ if (dev) {
+ buf = strndup(word, dev - word);
+ if (!buf)
+ return log_oom();
+ ip = buf;
+ dev++;
+ } else
+ ip = word;
+
+ r = in_addr_from_string_auto(ip, &family, &a);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid multipath route gateway '%s', ignoring assignment: %m", rvalue);
+ return 0;
+ }
+ m->gateway.address = a;
+ m->gateway.family = family;
+
+ if (dev) {
+ r = resolve_interface(NULL, dev);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid interface name or index, ignoring assignment: %s", dev);
+ return 0;
+ }
+ m->ifindex = r;
+ }
+
+ if (!isempty(p)) {
+ r = safe_atou32(p, &m->weight);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid multipath route weight, ignoring assignment: %s", p);
+ return 0;
+ }
+ if (m->weight == 0 || m->weight > 256) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid multipath route weight, ignoring assignment: %s", p);
+ return 0;
+ }
+ }
+
+ r = ordered_set_ensure_allocated(&n->multipath_routes, NULL);
+ if (r < 0)
+ return log_oom();
+
+ r = ordered_set_put(n->multipath_routes, m);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to store multipath route, ignoring assignment: %m");
+ return 0;
+ }
+
+ TAKE_PTR(m);
+ TAKE_PTR(n);
+ return 0;
+}
+
+static int route_section_verify(Route *route, Network *network) {
+ if (section_is_invalid(route->section))
+ return -EINVAL;
+
+ if (route->gateway_from_dhcp_or_ra) {
+ if (route->gw_family == AF_UNSPEC) {
+ /* When deprecated Gateway=_dhcp is set, then assume gateway family based on other settings. */
+ switch (route->family) {
+ case AF_UNSPEC:
+ log_warning("%s: Deprecated value \"_dhcp\" is specified for Gateway= in [Route] section from line %u. "
+ "Please use \"_dhcp4\" or \"_ipv6ra\" instead. Assuming \"_dhcp4\".",
+ route->section->filename, route->section->line);
+ route->family = AF_INET;
+ break;
+ case AF_INET:
+ case AF_INET6:
+ log_warning("%s: Deprecated value \"_dhcp\" is specified for Gateway= in [Route] section from line %u. "
+ "Assuming \"%s\" based on Destination=, Source=, or PreferredSource= setting.",
+ route->section->filename, route->section->line, route->family == AF_INET ? "_dhcp4" : "_ipv6ra");
+ break;
+ default:
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: Invalid route family. Ignoring [Route] section from line %u.",
+ route->section->filename, route->section->line);
+ }
+ route->gw_family = route->family;
+ }
+
+ if (route->gw_family == AF_INET && !FLAGS_SET(network->dhcp, ADDRESS_FAMILY_IPV4))
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: Gateway=\"_dhcp4\" is specified but DHCPv4 client is disabled. "
+ "Ignoring [Route] section from line %u.",
+ route->section->filename, route->section->line);
+
+ if (route->gw_family == AF_INET6 && !network->ipv6_accept_ra)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: Gateway=\"_ipv6ra\" is specified but IPv6AcceptRA= is disabled. "
+ "Ignoring [Route] section from line %u.",
+ route->section->filename, route->section->line);
+ }
+
+ /* When only Gateway= is specified, assume the route family based on the Gateway address. */
+ if (route->family == AF_UNSPEC)
+ route->family = route->gw_family;
+
+ if (route->family == AF_UNSPEC) {
+ assert(route->section);
+
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: Route section without Gateway=, Destination=, Source=, "
+ "or PreferredSource= field configured. "
+ "Ignoring [Route] section from line %u.",
+ route->section->filename, route->section->line);
+ }
+
+ if (route->family == AF_INET6 && route->gw_family == AF_INET)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: IPv4 gateway is configured for IPv6 route. "
+ "Ignoring [Route] section from line %u.",
+ route->section->filename, route->section->line);
+
+ if (!route->table_set && network->vrf) {
+ route->table = VRF(network->vrf)->table;
+ route->table_set = true;
+ }
+
+ if (!route->table_set && IN_SET(route->type, RTN_LOCAL, RTN_BROADCAST, RTN_ANYCAST, RTN_NAT))
+ route->table = RT_TABLE_LOCAL;
+
+ if (!route->scope_set && route->family != AF_INET6) {
+ if (IN_SET(route->type, RTN_LOCAL, RTN_NAT))
+ route->scope = RT_SCOPE_HOST;
+ else if (IN_SET(route->type, RTN_BROADCAST, RTN_ANYCAST, RTN_MULTICAST))
+ route->scope = RT_SCOPE_LINK;
+ }
+
+ if (route->scope != RT_SCOPE_UNIVERSE && route->family == AF_INET6) {
+ log_warning("%s: Scope= is specified for IPv6 route. It will be ignored.", route->section->filename);
+ route->scope = RT_SCOPE_UNIVERSE;
+ }
+
+ if (route->family == AF_INET6 && route->priority == 0)
+ route->priority = IP6_RT_PRIO_USER;
+
+ if (ordered_hashmap_isempty(network->addresses_by_section) &&
+ in_addr_is_null(route->gw_family, &route->gw) == 0 &&
+ route->gateway_onlink < 0) {
+ log_warning("%s: Gateway= without static address configured. "
+ "Enabling GatewayOnLink= option.",
+ network->filename);
+ route->gateway_onlink = true;
+ }
+
+ return 0;
+}
+
+void network_drop_invalid_routes(Network *network) {
+ Route *route;
+
+ assert(network);
+
+ HASHMAP_FOREACH(route, network->routes_by_section)
+ if (route_section_verify(route, network) < 0)
+ route_free(route);
+}
diff --git a/src/network/networkd-route.h b/src/network/networkd-route.h
new file mode 100644
index 0000000..f593693
--- /dev/null
+++ b/src/network/networkd-route.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "sd-netlink.h"
+
+#include "conf-parser.h"
+#include "in-addr-util.h"
+#include "networkd-link.h"
+#include "networkd-util.h"
+
+typedef struct Manager Manager;
+typedef struct Network Network;
+
+typedef struct Route {
+ Network *network;
+ NetworkConfigSection *section;
+
+ Link *link;
+ Manager *manager;
+
+ int family;
+ int gw_family;
+ uint32_t gw_weight;
+ int quickack;
+ int fast_open_no_cookie;
+ int ttl_propagate;
+
+ unsigned char dst_prefixlen;
+ unsigned char src_prefixlen;
+ unsigned char scope;
+ unsigned char protocol; /* RTPROT_* */
+ unsigned char type; /* RTN_* */
+ unsigned char tos;
+ uint32_t priority; /* note that ip(8) calls this 'metric' */
+ uint32_t table;
+ uint32_t mtu;
+ uint32_t initcwnd;
+ uint32_t initrwnd;
+ unsigned char pref;
+ unsigned flags;
+ int gateway_onlink;
+
+ bool scope_set:1;
+ bool table_set:1;
+ bool priority_set:1;
+ bool protocol_set:1;
+ bool pref_set:1;
+ bool gateway_from_dhcp_or_ra:1;
+
+ union in_addr_union gw;
+ union in_addr_union dst;
+ union in_addr_union src;
+ union in_addr_union prefsrc;
+ OrderedSet *multipath_routes;
+
+ usec_t lifetime;
+ sd_event_source *expire;
+} Route;
+
+void route_hash_func(const Route *route, struct siphash *state);
+int route_compare_func(const Route *a, const Route *b);
+extern const struct hash_ops route_hash_ops;
+
+int route_new(Route **ret);
+Route *route_free(Route *route);
+DEFINE_NETWORK_SECTION_FUNCTIONS(Route, route_free);
+
+int route_configure(const Route *route, Link *link, link_netlink_message_handler_t callback, Route **ret);
+int route_remove(const Route *route, Manager *manager, Link *link, link_netlink_message_handler_t callback);
+
+int link_set_routes(Link *link);
+int link_drop_routes(Link *link);
+int link_drop_foreign_routes(Link *link);
+int link_serialize_routes(const Link *link, FILE *f);
+int link_deserialize_routes(Link *link, const char *routes);
+
+uint32_t link_get_dhcp_route_table(const Link *link);
+uint32_t link_get_ipv6_accept_ra_route_table(const Link *link);
+
+int manager_rtnl_process_route(sd_netlink *rtnl, sd_netlink_message *message, Manager *m);
+
+int network_add_ipv4ll_route(Network *network);
+int network_add_default_route_on_device(Network *network);
+void network_drop_invalid_routes(Network *network);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_gateway);
+CONFIG_PARSER_PROTOTYPE(config_parse_preferred_src);
+CONFIG_PARSER_PROTOTYPE(config_parse_destination);
+CONFIG_PARSER_PROTOTYPE(config_parse_route_priority);
+CONFIG_PARSER_PROTOTYPE(config_parse_route_scope);
+CONFIG_PARSER_PROTOTYPE(config_parse_route_table);
+CONFIG_PARSER_PROTOTYPE(config_parse_route_boolean);
+CONFIG_PARSER_PROTOTYPE(config_parse_ipv6_route_preference);
+CONFIG_PARSER_PROTOTYPE(config_parse_route_protocol);
+CONFIG_PARSER_PROTOTYPE(config_parse_route_type);
+CONFIG_PARSER_PROTOTYPE(config_parse_tcp_window);
+CONFIG_PARSER_PROTOTYPE(config_parse_route_mtu);
+CONFIG_PARSER_PROTOTYPE(config_parse_multipath_route);
diff --git a/src/network/networkd-routing-policy-rule.c b/src/network/networkd-routing-policy-rule.c
new file mode 100644
index 0000000..e44ecb4
--- /dev/null
+++ b/src/network/networkd-routing-policy-rule.c
@@ -0,0 +1,1810 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+#include <linux/fib_rules.h>
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "ip-protocol-list.h"
+#include "netlink-util.h"
+#include "networkd-manager.h"
+#include "networkd-routing-policy-rule.h"
+#include "networkd-util.h"
+#include "parse-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+RoutingPolicyRule *routing_policy_rule_free(RoutingPolicyRule *rule) {
+ if (!rule)
+ return NULL;
+
+ if (rule->network) {
+ assert(rule->section);
+ hashmap_remove(rule->network->rules_by_section, rule->section);
+ }
+
+ if (rule->manager) {
+ if (set_get(rule->manager->rules, rule) == rule)
+ set_remove(rule->manager->rules, rule);
+ if (set_get(rule->manager->rules_foreign, rule) == rule)
+ set_remove(rule->manager->rules_foreign, rule);
+ }
+
+ network_config_section_free(rule->section);
+ free(rule->iif);
+ free(rule->oif);
+
+ return mfree(rule);
+}
+
+DEFINE_NETWORK_SECTION_FUNCTIONS(RoutingPolicyRule, routing_policy_rule_free);
+
+static int routing_policy_rule_new(RoutingPolicyRule **ret) {
+ RoutingPolicyRule *rule;
+
+ rule = new(RoutingPolicyRule, 1);
+ if (!rule)
+ return -ENOMEM;
+
+ *rule = (RoutingPolicyRule) {
+ .table = RT_TABLE_MAIN,
+ .uid_range.start = UID_INVALID,
+ .uid_range.end = UID_INVALID,
+ .suppress_prefixlen = -1,
+ };
+
+ *ret = rule;
+ return 0;
+}
+
+static int routing_policy_rule_new_static(Network *network, const char *filename, unsigned section_line, RoutingPolicyRule **ret) {
+ _cleanup_(routing_policy_rule_freep) RoutingPolicyRule *rule = NULL;
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(filename);
+ assert(section_line > 0);
+
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ rule = hashmap_get(network->rules_by_section, n);
+ if (rule) {
+ *ret = TAKE_PTR(rule);
+ return 0;
+ }
+
+ r = routing_policy_rule_new(&rule);
+ if (r < 0)
+ return r;
+
+ rule->network = network;
+ rule->section = TAKE_PTR(n);
+
+ r = hashmap_ensure_allocated(&network->rules_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(network->rules_by_section, rule->section, rule);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(rule);
+ return 0;
+}
+
+static int routing_policy_rule_copy(RoutingPolicyRule *dest, RoutingPolicyRule *src) {
+ _cleanup_free_ char *iif = NULL, *oif = NULL;
+
+ assert(dest);
+ assert(src);
+
+ if (src->iif) {
+ iif = strdup(src->iif);
+ if (!iif)
+ return -ENOMEM;
+ }
+
+ if (src->oif) {
+ oif = strdup(src->oif);
+ if (!oif)
+ return -ENOMEM;
+ }
+
+ dest->family = src->family;
+ dest->from = src->from;
+ dest->from_prefixlen = src->from_prefixlen;
+ dest->to = src->to;
+ dest->to_prefixlen = src->to_prefixlen;
+ dest->invert_rule = src->invert_rule;
+ dest->tos = src->tos;
+ dest->fwmark = src->fwmark;
+ dest->fwmask = src->fwmask;
+ dest->priority = src->priority;
+ dest->table = src->table;
+ dest->iif = TAKE_PTR(iif);
+ dest->oif = TAKE_PTR(oif);
+ dest->protocol = src->protocol;
+ dest->sport = src->sport;
+ dest->dport = src->dport;
+ dest->uid_range = src->uid_range;
+ dest->suppress_prefixlen = src->suppress_prefixlen;
+
+ return 0;
+}
+
+static void routing_policy_rule_hash_func(const RoutingPolicyRule *rule, struct siphash *state) {
+ assert(rule);
+
+ siphash24_compress(&rule->family, sizeof(rule->family), state);
+
+ switch (rule->family) {
+ case AF_INET:
+ case AF_INET6:
+ siphash24_compress(&rule->from, FAMILY_ADDRESS_SIZE(rule->family), state);
+ siphash24_compress(&rule->from_prefixlen, sizeof(rule->from_prefixlen), state);
+
+ siphash24_compress(&rule->to, FAMILY_ADDRESS_SIZE(rule->family), state);
+ siphash24_compress(&rule->to_prefixlen, sizeof(rule->to_prefixlen), state);
+
+ siphash24_compress_boolean(rule->invert_rule, state);
+
+ siphash24_compress(&rule->tos, sizeof(rule->tos), state);
+ siphash24_compress(&rule->fwmark, sizeof(rule->fwmark), state);
+ siphash24_compress(&rule->fwmask, sizeof(rule->fwmask), state);
+ siphash24_compress(&rule->priority, sizeof(rule->priority), state);
+ siphash24_compress(&rule->table, sizeof(rule->table), state);
+ siphash24_compress(&rule->suppress_prefixlen, sizeof(rule->suppress_prefixlen), state);
+
+ siphash24_compress(&rule->protocol, sizeof(rule->protocol), state);
+ siphash24_compress(&rule->sport, sizeof(rule->sport), state);
+ siphash24_compress(&rule->dport, sizeof(rule->dport), state);
+ siphash24_compress(&rule->uid_range, sizeof(rule->uid_range), state);
+
+ siphash24_compress_string(rule->iif, state);
+ siphash24_compress_string(rule->oif, state);
+
+ break;
+ default:
+ /* treat any other address family as AF_UNSPEC */
+ break;
+ }
+}
+
+static int routing_policy_rule_compare_func(const RoutingPolicyRule *a, const RoutingPolicyRule *b) {
+ int r;
+
+ r = CMP(a->family, b->family);
+ if (r != 0)
+ return r;
+
+ switch (a->family) {
+ case AF_INET:
+ case AF_INET6:
+ r = CMP(a->from_prefixlen, b->from_prefixlen);
+ if (r != 0)
+ return r;
+
+ r = memcmp(&a->from, &b->from, FAMILY_ADDRESS_SIZE(a->family));
+ if (r != 0)
+ return r;
+
+ r = CMP(a->to_prefixlen, b->to_prefixlen);
+ if (r != 0)
+ return r;
+
+ r = memcmp(&a->to, &b->to, FAMILY_ADDRESS_SIZE(a->family));
+ if (r != 0)
+ return r;
+
+ r = CMP(a->invert_rule, b->invert_rule);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->tos, b->tos);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->fwmark, b->fwmark);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->fwmask, b->fwmask);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->priority, b->priority);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->table, b->table);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->suppress_prefixlen, b->suppress_prefixlen);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->protocol, b->protocol);
+ if (r != 0)
+ return r;
+
+ r = memcmp(&a->sport, &b->sport, sizeof(a->sport));
+ if (r != 0)
+ return r;
+
+ r = memcmp(&a->dport, &b->dport, sizeof(a->dport));
+ if (r != 0)
+ return r;
+
+ r = memcmp(&a->uid_range, &b->uid_range, sizeof(a->uid_range));
+ if (r != 0)
+ return r;
+
+ r = strcmp_ptr(a->iif, b->iif);
+ if (r != 0)
+ return r;
+
+ r = strcmp_ptr(a->oif, b->oif);
+ if (r != 0)
+ return r;
+
+ return 0;
+ default:
+ /* treat any other address family as AF_UNSPEC */
+ return 0;
+ }
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(
+ routing_policy_rule_hash_ops,
+ RoutingPolicyRule,
+ routing_policy_rule_hash_func,
+ routing_policy_rule_compare_func,
+ routing_policy_rule_free);
+
+static int routing_policy_rule_get(Manager *m, RoutingPolicyRule *rule, RoutingPolicyRule **ret) {
+
+ RoutingPolicyRule *existing;
+
+ assert(m);
+
+ existing = set_get(m->rules, rule);
+ if (existing) {
+ if (ret)
+ *ret = existing;
+ return 1;
+ }
+
+ existing = set_get(m->rules_foreign, rule);
+ if (existing) {
+ if (ret)
+ *ret = existing;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static int routing_policy_rule_add_internal(Manager *m, Set **rules, RoutingPolicyRule *in, int family, RoutingPolicyRule **ret) {
+ _cleanup_(routing_policy_rule_freep) RoutingPolicyRule *rule = NULL;
+ int r;
+
+ assert(m);
+ assert(rules);
+ assert(in);
+ assert(IN_SET(family, AF_INET, AF_INET6));
+ assert(in->family == AF_UNSPEC || in->family == family);
+
+ r = routing_policy_rule_new(&rule);
+ if (r < 0)
+ return r;
+
+ r = routing_policy_rule_copy(rule, in);
+ if (r < 0)
+ return r;
+
+ rule->family = family;
+
+ r = set_ensure_put(rules, &routing_policy_rule_hash_ops, rule);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EEXIST;
+
+ rule->manager = m;
+
+ if (ret)
+ *ret = rule;
+
+ TAKE_PTR(rule);
+ return 0;
+}
+
+static int routing_policy_rule_add(Manager *m, RoutingPolicyRule *rule, int family, RoutingPolicyRule **ret) {
+ return routing_policy_rule_add_internal(m, &m->rules, rule, family, ret);
+}
+
+static int routing_policy_rule_add_foreign(Manager *m, RoutingPolicyRule *rule, RoutingPolicyRule **ret) {
+ return routing_policy_rule_add_internal(m, &m->rules_foreign, rule, rule->family, ret);
+}
+
+static int routing_policy_rule_set_netlink_message(RoutingPolicyRule *rule, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(rule);
+ assert(m);
+ assert(link);
+
+ if (in_addr_is_null(rule->family, &rule->from) == 0) {
+ r = netlink_message_append_in_addr_union(m, FRA_SRC, rule->family, &rule->from);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append FRA_SRC attribute: %m");
+
+ r = sd_rtnl_message_routing_policy_rule_set_rtm_src_prefixlen(m, rule->from_prefixlen);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set source prefix length: %m");
+ }
+
+ if (in_addr_is_null(rule->family, &rule->to) == 0) {
+ r = netlink_message_append_in_addr_union(m, FRA_DST, rule->family, &rule->to);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append FRA_DST attribute: %m");
+
+ r = sd_rtnl_message_routing_policy_rule_set_rtm_dst_prefixlen(m, rule->to_prefixlen);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set destination prefix length: %m");
+ }
+
+ r = sd_netlink_message_append_u32(m, FRA_PRIORITY, rule->priority);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append FRA_PRIORITY attribute: %m");
+
+ if (rule->tos > 0) {
+ r = sd_rtnl_message_routing_policy_rule_set_tos(m, rule->tos);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set IP rule TOS: %m");
+ }
+
+ if (rule->table < 256) {
+ r = sd_rtnl_message_routing_policy_rule_set_table(m, rule->table);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set IP rule table: %m");
+ } else {
+ r = sd_rtnl_message_routing_policy_rule_set_table(m, RT_TABLE_UNSPEC);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set IP rule table: %m");
+
+ r = sd_netlink_message_append_u32(m, FRA_TABLE, rule->table);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append FRA_TABLE attribute: %m");
+ }
+
+ if (rule->fwmark > 0) {
+ r = sd_netlink_message_append_u32(m, FRA_FWMARK, rule->fwmark);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append FRA_FWMARK attribute: %m");
+
+ r = sd_netlink_message_append_u32(m, FRA_FWMASK, rule->fwmask);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append FRA_FWMASK attribute: %m");
+ }
+
+ if (rule->iif) {
+ r = sd_netlink_message_append_string(m, FRA_IIFNAME, rule->iif);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append FRA_IIFNAME attribute: %m");
+ }
+
+ if (rule->oif) {
+ r = sd_netlink_message_append_string(m, FRA_OIFNAME, rule->oif);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append FRA_OIFNAME attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u8(m, FRA_IP_PROTO, rule->protocol);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append FRA_IP_PROTO attribute: %m");
+
+ if (rule->sport.start != 0 || rule->sport.end != 0) {
+ r = sd_netlink_message_append_data(m, FRA_SPORT_RANGE, &rule->sport, sizeof(rule->sport));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append FRA_SPORT_RANGE attribute: %m");
+ }
+
+ if (rule->dport.start != 0 || rule->dport.end != 0) {
+ r = sd_netlink_message_append_data(m, FRA_DPORT_RANGE, &rule->dport, sizeof(rule->dport));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append FRA_DPORT_RANGE attribute: %m");
+ }
+
+ if (rule->uid_range.start != UID_INVALID && rule->uid_range.end != UID_INVALID) {
+ r = sd_netlink_message_append_data(m, FRA_UID_RANGE, &rule->uid_range, sizeof(rule->uid_range));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append FRA_UID_RANGE attribute: %m");
+ }
+
+ if (rule->invert_rule) {
+ r = sd_rtnl_message_routing_policy_rule_set_flags(m, FIB_RULE_INVERT);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append FIB_RULE_INVERT attribute: %m");
+ }
+
+ if (rule->suppress_prefixlen >= 0) {
+ r = sd_netlink_message_append_u32(m, FRA_SUPPRESS_PREFIXLEN, (uint32_t) rule->suppress_prefixlen);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append FRA_SUPPRESS_PREFIXLEN attribute: %m");
+ }
+
+ return 0;
+}
+
+static int routing_policy_rule_remove_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(m);
+ assert(link);
+ assert(link->ifname);
+
+ link->routing_policy_rule_remove_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ log_link_message_warning_errno(link, m, r, "Could not drop routing policy rule");
+
+ return 1;
+}
+
+static int routing_policy_rule_remove(RoutingPolicyRule *rule, Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ assert(rule);
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+ assert(link->ifindex > 0);
+ assert(IN_SET(rule->family, AF_INET, AF_INET6));
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *from = NULL, *to = NULL;
+
+ (void) in_addr_to_string(rule->family, &rule->from, &from);
+ (void) in_addr_to_string(rule->family, &rule->to, &to);
+
+ log_link_debug(link,
+ "Removing routing policy rule: priority: %"PRIu32", %s/%u -> %s/%u, iif: %s, oif: %s, table: %"PRIu32,
+ rule->priority, strna(from), rule->from_prefixlen, strna(to), rule->to_prefixlen, strna(rule->iif), strna(rule->oif), rule->table);
+ }
+
+ r = sd_rtnl_message_new_routing_policy_rule(link->manager->rtnl, &m, RTM_DELRULE, rule->family);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_DELRULE message: %m");
+
+ r = routing_policy_rule_set_netlink_message(rule, m, link);
+ if (r < 0)
+ return r;
+
+ r = netlink_call_async(link->manager->rtnl, NULL, m,
+ routing_policy_rule_remove_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+
+ return 0;
+}
+
+static int routing_policy_rule_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(rtnl);
+ assert(m);
+ assert(link);
+ assert(link->ifname);
+ assert(link->routing_policy_rule_messages > 0);
+
+ link->routing_policy_rule_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_message_warning_errno(link, m, r, "Could not add routing policy rule");
+ link_enter_failed(link);
+ return 1;
+ }
+
+ if (link->routing_policy_rule_messages == 0) {
+ log_link_debug(link, "Routing policy rule configured");
+ link->routing_policy_rules_configured = true;
+ link_check_ready(link);
+ }
+
+ return 1;
+}
+
+static int routing_policy_rule_configure_internal(RoutingPolicyRule *rule, int family, Link *link) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ assert(rule);
+ assert(link);
+ assert(link->ifindex > 0);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *from = NULL, *to = NULL;
+
+ (void) in_addr_to_string(family, &rule->from, &from);
+ (void) in_addr_to_string(family, &rule->to, &to);
+
+ log_link_debug(link,
+ "Configuring routing policy rule: priority: %"PRIu32", %s/%u -> %s/%u, iif: %s, oif: %s, table: %"PRIu32,
+ rule->priority, strna(from), rule->from_prefixlen, strna(to), rule->to_prefixlen, strna(rule->iif), strna(rule->oif), rule->table);
+ }
+
+ r = sd_rtnl_message_new_routing_policy_rule(link->manager->rtnl, &m, RTM_NEWRULE, family);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_NEWRULE message: %m");
+
+ r = routing_policy_rule_set_netlink_message(rule, m, link);
+ if (r < 0)
+ return r;
+
+ r = netlink_call_async(link->manager->rtnl, NULL, m,
+ routing_policy_rule_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+ link->routing_policy_rule_messages++;
+
+ r = routing_policy_rule_add(link->manager, rule, family, NULL);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not add rule: %m");
+
+ return 1;
+}
+
+static int routing_policy_rule_configure(RoutingPolicyRule *rule, Link *link) {
+ int r;
+
+ if (IN_SET(rule->family, AF_INET, AF_INET6))
+ return routing_policy_rule_configure_internal(rule, rule->family, link);
+
+ if (FLAGS_SET(rule->address_family, ADDRESS_FAMILY_IPV4)) {
+ r = routing_policy_rule_configure_internal(rule, AF_INET, link);
+ if (r < 0)
+ return r;
+ }
+
+ if (FLAGS_SET(rule->address_family, ADDRESS_FAMILY_IPV6)) {
+ r = routing_policy_rule_configure_internal(rule, AF_INET6, link);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static bool manager_links_have_routing_policy_rule(Manager *m, RoutingPolicyRule *rule) {
+ Link *link;
+
+ assert(m);
+ assert(rule);
+
+ HASHMAP_FOREACH(link, m->links) {
+ RoutingPolicyRule *link_rule;
+
+ if (!link->network)
+ continue;
+
+ HASHMAP_FOREACH(link_rule, link->network->rules_by_section)
+ if (routing_policy_rule_compare_func(link_rule, rule) == 0)
+ return true;
+ }
+
+ return false;
+}
+
+static void routing_policy_rule_purge(Manager *m, Link *link) {
+ RoutingPolicyRule *rule;
+ int r;
+
+ assert(m);
+ assert(link);
+
+ SET_FOREACH(rule, m->rules_saved) {
+ RoutingPolicyRule *existing;
+
+ existing = set_get(m->rules_foreign, rule);
+ if (!existing)
+ continue; /* Saved rule does not exist anymore. */
+
+ if (manager_links_have_routing_policy_rule(m, existing))
+ continue; /* Existing links have the saved rule. */
+
+ /* Existing links do not have the saved rule. Let's drop the rule now, and re-configure it
+ * later when it is requested. */
+
+ r = routing_policy_rule_remove(existing, link);
+ if (r < 0) {
+ log_warning_errno(r, "Could not remove routing policy rules: %m");
+ continue;
+ }
+
+ link->routing_policy_rule_remove_messages++;
+
+ assert_se(set_remove(m->rules_foreign, existing) == existing);
+ routing_policy_rule_free(existing);
+ }
+}
+
+int link_set_routing_policy_rules(Link *link) {
+ RoutingPolicyRule *rule;
+ int r;
+
+ assert(link);
+ assert(link->network);
+
+ link->routing_policy_rules_configured = false;
+
+ HASHMAP_FOREACH(rule, link->network->rules_by_section) {
+ RoutingPolicyRule *existing;
+
+ r = routing_policy_rule_get(link->manager, rule, &existing);
+ if (r > 0)
+ continue;
+ if (r == 0) {
+ r = set_ensure_put(&link->manager->rules, &routing_policy_rule_hash_ops, existing);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not store existing routing policy rule: %m");
+
+ set_remove(link->manager->rules_foreign, existing);
+ continue;
+ }
+
+ r = routing_policy_rule_configure(rule, link);
+ if (r < 0)
+ return log_link_warning_errno(link, r, "Could not set routing policy rule: %m");
+ }
+
+ routing_policy_rule_purge(link->manager, link);
+ if (link->routing_policy_rule_messages == 0)
+ link->routing_policy_rules_configured = true;
+ else {
+ log_link_debug(link, "Setting routing policy rules");
+ link_set_state(link, LINK_STATE_CONFIGURING);
+ }
+
+ return 0;
+}
+
+int manager_rtnl_process_rule(sd_netlink *rtnl, sd_netlink_message *message, Manager *m) {
+ _cleanup_(routing_policy_rule_freep) RoutingPolicyRule *tmp = NULL;
+ _cleanup_free_ char *from = NULL, *to = NULL;
+ RoutingPolicyRule *rule = NULL;
+ const char *iif = NULL, *oif = NULL;
+ uint32_t suppress_prefixlen;
+ unsigned flags;
+ uint16_t type;
+ int r;
+
+ assert(rtnl);
+ assert(message);
+
+ if (sd_netlink_message_is_error(message)) {
+ r = sd_netlink_message_get_errno(message);
+ if (r < 0)
+ log_message_warning_errno(message, r, "rtnl: failed to receive rule message, ignoring");
+
+ return 0;
+ }
+
+ r = sd_netlink_message_get_type(message, &type);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: could not get message type, ignoring: %m");
+ return 0;
+ } else if (!IN_SET(type, RTM_NEWRULE, RTM_DELRULE)) {
+ log_warning("rtnl: received unexpected message type %u when processing rule, ignoring.", type);
+ return 0;
+ }
+
+ r = routing_policy_rule_new(&tmp);
+ if (r < 0) {
+ log_oom();
+ return 0;
+ }
+
+ r = sd_rtnl_message_get_family(message, &tmp->family);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: could not get rule family, ignoring: %m");
+ return 0;
+ } else if (!IN_SET(tmp->family, AF_INET, AF_INET6)) {
+ log_debug("rtnl: received rule message with invalid family %d, ignoring.", tmp->family);
+ return 0;
+ }
+
+ switch (tmp->family) {
+ case AF_INET:
+ r = sd_netlink_message_read_in_addr(message, FRA_SRC, &tmp->from.in);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_SRC attribute, ignoring: %m");
+ return 0;
+ } else if (r >= 0) {
+ r = sd_rtnl_message_routing_policy_rule_get_rtm_src_prefixlen(message, &tmp->from_prefixlen);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: received rule message without valid source prefix length, ignoring: %m");
+ return 0;
+ }
+ }
+
+ r = sd_netlink_message_read_in_addr(message, FRA_DST, &tmp->to.in);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_DST attribute, ignoring: %m");
+ return 0;
+ } else if (r >= 0) {
+ r = sd_rtnl_message_routing_policy_rule_get_rtm_dst_prefixlen(message, &tmp->to_prefixlen);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: received rule message without valid destination prefix length, ignoring: %m");
+ return 0;
+ }
+ }
+
+ break;
+
+ case AF_INET6:
+ r = sd_netlink_message_read_in6_addr(message, FRA_SRC, &tmp->from.in6);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_SRC attribute, ignoring: %m");
+ return 0;
+ } else if (r >= 0) {
+ r = sd_rtnl_message_routing_policy_rule_get_rtm_src_prefixlen(message, &tmp->from_prefixlen);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: received rule message without valid source prefix length, ignoring: %m");
+ return 0;
+ }
+ }
+
+ r = sd_netlink_message_read_in6_addr(message, FRA_DST, &tmp->to.in6);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_DST attribute, ignoring: %m");
+ return 0;
+ } else if (r >= 0) {
+ r = sd_rtnl_message_routing_policy_rule_get_rtm_dst_prefixlen(message, &tmp->to_prefixlen);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: received rule message without valid destination prefix length, ignoring: %m");
+ return 0;
+ }
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Received rule message with unsupported address family");
+ }
+
+ r = sd_rtnl_message_routing_policy_rule_get_flags(message, &flags);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: received rule message without valid flag, ignoring: %m");
+ return 0;
+ }
+ tmp->invert_rule = flags & FIB_RULE_INVERT;
+
+ r = sd_netlink_message_read_u32(message, FRA_FWMARK, &tmp->fwmark);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_FWMARK attribute, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_u32(message, FRA_FWMASK, &tmp->fwmask);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_FWMASK attribute, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_u32(message, FRA_PRIORITY, &tmp->priority);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_PRIORITY attribute, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_u32(message, FRA_TABLE, &tmp->table);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_TABLE attribute, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_rtnl_message_routing_policy_rule_get_tos(message, &tmp->tos);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get ip rule TOS, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_string(message, FRA_IIFNAME, &iif);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_IIFNAME attribute, ignoring: %m");
+ return 0;
+ }
+ r = free_and_strdup(&tmp->iif, iif);
+ if (r < 0)
+ return log_oom();
+
+ r = sd_netlink_message_read_string(message, FRA_OIFNAME, &oif);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_OIFNAME attribute, ignoring: %m");
+ return 0;
+ }
+ r = free_and_strdup(&tmp->oif, oif);
+ if (r < 0)
+ return log_oom();
+
+ r = sd_netlink_message_read_u8(message, FRA_IP_PROTO, &tmp->protocol);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_IP_PROTO attribute, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read(message, FRA_SPORT_RANGE, sizeof(tmp->sport), &tmp->sport);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_SPORT_RANGE attribute, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read(message, FRA_DPORT_RANGE, sizeof(tmp->dport), &tmp->dport);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_DPORT_RANGE attribute, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read(message, FRA_UID_RANGE, sizeof(tmp->uid_range), &tmp->uid_range);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_UID_RANGE attribute, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_read_u32(message, FRA_SUPPRESS_PREFIXLEN, &suppress_prefixlen);
+ if (r < 0 && r != -ENODATA) {
+ log_warning_errno(r, "rtnl: could not get FRA_SUPPRESS_PREFIXLEN attribute, ignoring: %m");
+ return 0;
+ }
+ if (r >= 0)
+ tmp->suppress_prefixlen = (int) suppress_prefixlen;
+
+ (void) routing_policy_rule_get(m, tmp, &rule);
+
+ if (DEBUG_LOGGING) {
+ (void) in_addr_to_string(tmp->family, &tmp->from, &from);
+ (void) in_addr_to_string(tmp->family, &tmp->to, &to);
+ }
+
+ switch (type) {
+ case RTM_NEWRULE:
+ if (rule)
+ log_debug("Received remembered routing policy rule: priority: %"PRIu32", %s/%u -> %s/%u, iif: %s, oif: %s, table: %"PRIu32,
+ tmp->priority, strna(from), tmp->from_prefixlen, strna(to), tmp->to_prefixlen, strna(tmp->iif), strna(tmp->oif), tmp->table);
+ else {
+ log_debug("Remembering foreign routing policy rule: priority: %"PRIu32", %s/%u -> %s/%u, iif: %s, oif: %s, table: %"PRIu32,
+ tmp->priority, strna(from), tmp->from_prefixlen, strna(to), tmp->to_prefixlen, strna(tmp->iif), strna(tmp->oif), tmp->table);
+ r = routing_policy_rule_add_foreign(m, tmp, &rule);
+ if (r < 0) {
+ log_warning_errno(r, "Could not remember foreign rule, ignoring: %m");
+ return 0;
+ }
+ }
+ break;
+ case RTM_DELRULE:
+ if (rule) {
+ log_debug("Forgetting routing policy rule: priority: %"PRIu32", %s/%u -> %s/%u, iif: %s, oif: %s, table: %"PRIu32,
+ tmp->priority, strna(from), tmp->from_prefixlen, strna(to), tmp->to_prefixlen, strna(tmp->iif), strna(tmp->oif), tmp->table);
+ routing_policy_rule_free(rule);
+ } else
+ log_debug("Kernel removed a routing policy rule we don't remember: priority: %"PRIu32", %s/%u -> %s/%u, iif: %s, oif: %s, table: %"PRIu32", ignoring.",
+ tmp->priority, strna(from), tmp->from_prefixlen, strna(to), tmp->to_prefixlen, strna(tmp->iif), strna(tmp->oif), tmp->table);
+ break;
+
+ default:
+ assert_not_reached("Received invalid RTNL message type");
+ }
+
+ return 1;
+}
+
+static int parse_fwmark_fwmask(const char *s, uint32_t *ret_fwmark, uint32_t *ret_fwmask) {
+ _cleanup_free_ char *fwmark_str = NULL;
+ uint32_t fwmark, fwmask = 0;
+ const char *slash;
+ int r;
+
+ assert(s);
+ assert(ret_fwmark);
+ assert(ret_fwmask);
+
+ slash = strchr(s, '/');
+ if (slash) {
+ fwmark_str = strndup(s, slash - s);
+ if (!fwmark_str)
+ return -ENOMEM;
+ }
+
+ r = safe_atou32(fwmark_str ?: s, &fwmark);
+ if (r < 0)
+ return r;
+
+ if (fwmark > 0) {
+ if (slash) {
+ r = safe_atou32(slash + 1, &fwmask);
+ if (r < 0)
+ return r;
+ } else
+ fwmask = UINT32_MAX;
+ }
+
+ *ret_fwmark = fwmark;
+ *ret_fwmask = fwmask;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_tos(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(routing_policy_rule_free_or_set_invalidp) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return log_oom();
+
+ r = safe_atou8(rvalue, &n->tos);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse RPDB rule TOS, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_priority(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(routing_policy_rule_free_or_set_invalidp) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return log_oom();
+
+ r = safe_atou32(rvalue, &n->priority);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse RPDB rule priority, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_table(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(routing_policy_rule_free_or_set_invalidp) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return log_oom();
+
+ r = safe_atou32(rvalue, &n->table);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse RPDB rule table, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_fwmark_mask(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(routing_policy_rule_free_or_set_invalidp) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return log_oom();
+
+ r = parse_fwmark_fwmask(rvalue, &n->fwmark, &n->fwmask);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse RPDB rule firewall mark or mask, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_prefix(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(routing_policy_rule_free_or_set_invalidp) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ union in_addr_union *buffer;
+ uint8_t *prefixlen;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return log_oom();
+
+ if (streq(lvalue, "To")) {
+ buffer = &n->to;
+ prefixlen = &n->to_prefixlen;
+ } else {
+ buffer = &n->from;
+ prefixlen = &n->from_prefixlen;
+ }
+
+ if (n->family == AF_UNSPEC)
+ r = in_addr_prefix_from_string_auto(rvalue, &n->family, buffer, prefixlen);
+ else
+ r = in_addr_prefix_from_string(rvalue, n->family, buffer, prefixlen);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "RPDB rule prefix is invalid, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_device(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(routing_policy_rule_free_or_set_invalidp) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return log_oom();
+
+ if (!ifname_valid(rvalue)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse '%s' interface name, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "IncomingInterface")) {
+ r = free_and_strdup(&n->iif, rvalue);
+ if (r < 0)
+ return log_oom();
+ } else {
+ r = free_and_strdup(&n->oif, rvalue);
+ if (r < 0)
+ return log_oom();
+ }
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_port_range(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ _cleanup_(routing_policy_rule_free_or_set_invalidp) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ uint16_t low, high;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return log_oom();
+
+ r = parse_ip_port_range(rvalue, &low, &high);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse routing policy rule port range '%s'", rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "SourcePort")) {
+ n->sport.start = low;
+ n->sport.end = high;
+ } else {
+ n->dport.start = low;
+ n->dport.end = high;
+ }
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_ip_protocol(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(routing_policy_rule_free_or_set_invalidp) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return log_oom();
+
+ r = parse_ip_protocol(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse IP protocol '%s' for routing policy rule, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ n->protocol = r;
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_invert(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(routing_policy_rule_free_or_set_invalidp) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return log_oom();
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse RPDB rule invert, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ n->invert_rule = r;
+
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_family(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(routing_policy_rule_free_or_set_invalidp) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ AddressFamily a;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return log_oom();
+
+ a = routing_policy_rule_address_family_from_string(rvalue);
+ if (a < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid address family '%s', ignoring.", rvalue);
+ return 0;
+ }
+
+ n->address_family = a;
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_uid_range(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(routing_policy_rule_free_or_set_invalidp) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ uid_t start, end;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return log_oom();
+
+ r = get_user_creds(&rvalue, &start, NULL, NULL, NULL, 0);
+ if (r >= 0)
+ end = start;
+ else {
+ r = parse_uid_range(rvalue, &start, &end);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid uid or uid range '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+ }
+
+ n->uid_range.start = start;
+ n->uid_range.end = end;
+ n = NULL;
+
+ return 0;
+}
+
+int config_parse_routing_policy_rule_suppress_prefixlen(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(routing_policy_rule_free_or_set_invalidp) RoutingPolicyRule *n = NULL;
+ Network *network = userdata;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = routing_policy_rule_new_static(network, filename, section_line, &n);
+ if (r < 0)
+ return log_oom();
+
+ r = parse_ip_prefix_length(rvalue, &n->suppress_prefixlen);
+ if (r == -ERANGE) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Prefix length outside of valid range 0-128, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse RPDB rule suppress_prefixlen, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ n = NULL;
+
+ return 0;
+}
+
+static int routing_policy_rule_section_verify(RoutingPolicyRule *rule) {
+ if (section_is_invalid(rule->section))
+ return -EINVAL;
+
+ if ((rule->family == AF_INET && FLAGS_SET(rule->address_family, ADDRESS_FAMILY_IPV6)) ||
+ (rule->family == AF_INET6 && FLAGS_SET(rule->address_family, ADDRESS_FAMILY_IPV4)))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: address family specified by Family= conflicts with the address "
+ "specified by To= or From=. Ignoring [RoutingPolicyRule] section from line %u.",
+ rule->section->filename, rule->section->line);
+
+ if (rule->family == AF_UNSPEC && rule->address_family == ADDRESS_FAMILY_NO)
+ rule->family = AF_INET;
+
+ return 0;
+}
+
+void network_drop_invalid_routing_policy_rules(Network *network) {
+ RoutingPolicyRule *rule;
+
+ assert(network);
+
+ HASHMAP_FOREACH(rule, network->rules_by_section)
+ if (routing_policy_rule_section_verify(rule) < 0)
+ routing_policy_rule_free(rule);
+}
+
+int routing_policy_serialize_rules(Set *rules, FILE *f) {
+ RoutingPolicyRule *rule;
+ int r;
+
+ assert(f);
+
+ SET_FOREACH(rule, rules) {
+ const char *family_str;
+ bool space = false;
+
+ fputs("RULE=", f);
+
+ family_str = af_to_name(rule->family);
+ if (family_str) {
+ fprintf(f, "family=%s",
+ family_str);
+ space = true;
+ }
+
+ if (!in_addr_is_null(rule->family, &rule->from)) {
+ _cleanup_free_ char *str = NULL;
+
+ r = in_addr_to_string(rule->family, &rule->from, &str);
+ if (r < 0)
+ return r;
+
+ fprintf(f, "%sfrom=%s/%hhu",
+ space ? " " : "",
+ str, rule->from_prefixlen);
+ space = true;
+ }
+
+ if (!in_addr_is_null(rule->family, &rule->to)) {
+ _cleanup_free_ char *str = NULL;
+
+ r = in_addr_to_string(rule->family, &rule->to, &str);
+ if (r < 0)
+ return r;
+
+ fprintf(f, "%sto=%s/%hhu",
+ space ? " " : "",
+ str, rule->to_prefixlen);
+ space = true;
+ }
+
+ if (rule->tos != 0) {
+ fprintf(f, "%stos=%hhu",
+ space ? " " : "",
+ rule->tos);
+ space = true;
+ }
+
+ if (rule->priority != 0) {
+ fprintf(f, "%spriority=%"PRIu32,
+ space ? " " : "",
+ rule->priority);
+ space = true;
+ }
+
+ if (rule->fwmark != 0) {
+ fprintf(f, "%sfwmark=%"PRIu32,
+ space ? " " : "",
+ rule->fwmark);
+ if (rule->fwmask != UINT32_MAX)
+ fprintf(f, "/%"PRIu32, rule->fwmask);
+ space = true;
+ }
+
+ if (rule->iif) {
+ fprintf(f, "%siif=%s",
+ space ? " " : "",
+ rule->iif);
+ space = true;
+ }
+
+ if (rule->oif) {
+ fprintf(f, "%soif=%s",
+ space ? " " : "",
+ rule->oif);
+ space = true;
+ }
+
+ if (rule->protocol != 0) {
+ fprintf(f, "%sprotocol=%hhu",
+ space ? " " : "",
+ rule->protocol);
+ space = true;
+ }
+
+ if (rule->sport.start != 0 || rule->sport.end != 0) {
+ fprintf(f, "%ssourcesport=%"PRIu16"-%"PRIu16,
+ space ? " " : "",
+ rule->sport.start, rule->sport.end);
+ space = true;
+ }
+
+ if (rule->dport.start != 0 || rule->dport.end != 0) {
+ fprintf(f, "%sdestinationport=%"PRIu16"-%"PRIu16,
+ space ? " " : "",
+ rule->dport.start, rule->dport.end);
+ space = true;
+ }
+
+ if (rule->uid_range.start != UID_INVALID && rule->uid_range.end != UID_INVALID) {
+ assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+ fprintf(f, "%suidrange="UID_FMT"-"UID_FMT,
+ space ? " " : "",
+ rule->uid_range.start, rule->uid_range.end);
+ space = true;
+ }
+
+ if (rule->suppress_prefixlen >= 0) {
+ fprintf(f, "%ssuppress_prefixlen=%d",
+ space ? " " : "",
+ rule->suppress_prefixlen);
+ space = true;
+ }
+
+ fprintf(f, "%sinvert_rule=%s table=%"PRIu32"\n",
+ space ? " " : "",
+ yes_no(rule->invert_rule),
+ rule->table);
+ }
+
+ return 0;
+}
+
+static int routing_policy_rule_read_full_file(const char *state_file, char ***ret) {
+ _cleanup_strv_free_ char **lines = NULL;
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert(state_file);
+
+ r = read_full_file(state_file, &s, NULL);
+ if (r == -ENOENT) {
+ *ret = NULL;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ lines = strv_split_newlines(s);
+ if (!lines)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(lines);
+ return 0;
+}
+
+int routing_policy_load_rules(const char *state_file, Set **rules) {
+ _cleanup_strv_free_ char **data = NULL;
+ char **i;
+ int r;
+
+ assert(state_file);
+ assert(rules);
+
+ r = routing_policy_rule_read_full_file(state_file, &data);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to read %s, ignoring: %m", state_file);
+
+ STRV_FOREACH(i, data) {
+ _cleanup_(routing_policy_rule_freep) RoutingPolicyRule *rule = NULL;
+ const char *p;
+
+ p = startswith(*i, "RULE=");
+ if (!p)
+ continue;
+
+ r = routing_policy_rule_new(&rule);
+ if (r < 0)
+ return log_oom();
+
+ for (;;) {
+ _cleanup_free_ char *a = NULL;
+ char *b;
+
+ r = extract_first_word(&p, &a, NULL, 0);
+ if (r < 0)
+ return log_oom();
+ if (r == 0)
+ break;
+
+ b = strchr(a, '=');
+ if (!b) {
+ log_warning_errno(r, "Failed to parse RPDB rule, ignoring: %s", a);
+ continue;
+ }
+ *b++ = '\0';
+
+ if (streq(a, "family")) {
+ r = af_from_name(b);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse RPDB rule family, ignoring: %s", b);
+ continue;
+ }
+ if (rule->family != AF_UNSPEC && rule->family != r) {
+ log_warning("RPDB rule family is already specified, ignoring assignment: %s", b);
+ continue;
+ }
+ rule->family = r;
+ } else if (STR_IN_SET(a, "from", "to")) {
+ union in_addr_union *buffer;
+ uint8_t *prefixlen;
+
+ if (streq(a, "to")) {
+ buffer = &rule->to;
+ prefixlen = &rule->to_prefixlen;
+ } else {
+ buffer = &rule->from;
+ prefixlen = &rule->from_prefixlen;
+ }
+
+ if (rule->family == AF_UNSPEC)
+ r = in_addr_prefix_from_string_auto(b, &rule->family, buffer, prefixlen);
+ else
+ r = in_addr_prefix_from_string(b, rule->family, buffer, prefixlen);
+ if (r < 0) {
+ log_warning_errno(r, "RPDB rule prefix is invalid, ignoring assignment: %s", b);
+ continue;
+ }
+ } else if (streq(a, "tos")) {
+ r = safe_atou8(b, &rule->tos);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse RPDB rule TOS, ignoring: %s", b);
+ continue;
+ }
+ } else if (streq(a, "table")) {
+ r = safe_atou32(b, &rule->table);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse RPDB rule table, ignoring: %s", b);
+ continue;
+ }
+ } else if (streq(a, "priority")) {
+ r = safe_atou32(b, &rule->priority);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse RPDB rule priority, ignoring: %s", b);
+ continue;
+ }
+ } else if (streq(a, "fwmark")) {
+ r = parse_fwmark_fwmask(b, &rule->fwmark, &rule->fwmask);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse RPDB rule firewall mark or mask, ignoring: %s", a);
+ continue;
+ }
+ } else if (streq(a, "iif")) {
+ if (free_and_strdup(&rule->iif, b) < 0)
+ return log_oom();
+
+ } else if (streq(a, "oif")) {
+
+ if (free_and_strdup(&rule->oif, b) < 0)
+ return log_oom();
+ } else if (streq(a, "protocol")) {
+ r = safe_atou8(b, &rule->protocol);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse RPDB rule protocol, ignoring: %s", b);
+ continue;
+ }
+ } else if (streq(a, "sourceport")) {
+ uint16_t low, high;
+
+ r = parse_ip_port_range(b, &low, &high);
+ if (r < 0) {
+ log_warning_errno(r, "Invalid routing policy rule source port range, ignoring assignment: '%s'", b);
+ continue;
+ }
+
+ rule->sport.start = low;
+ rule->sport.end = high;
+ } else if (streq(a, "destinationport")) {
+ uint16_t low, high;
+
+ r = parse_ip_port_range(b, &low, &high);
+ if (r < 0) {
+ log_warning_errno(r, "Invalid routing policy rule destination port range, ignoring assignment: '%s'", b);
+ continue;
+ }
+
+ rule->dport.start = low;
+ rule->dport.end = high;
+ } else if (streq(a, "uidrange")) {
+ uid_t lower, upper;
+
+ r = parse_uid_range(b, &lower, &upper);
+ if (r < 0) {
+ log_warning_errno(r, "Invalid routing policy rule uid range, ignoring assignment: '%s'", b);
+ continue;
+ }
+
+ rule->uid_range.start = lower;
+ rule->uid_range.end = upper;
+ } else if (streq(a, "suppress_prefixlen")) {
+ r = parse_ip_prefix_length(b, &rule->suppress_prefixlen);
+ if (r == -ERANGE) {
+ log_warning_errno(r, "Prefix length outside of valid range 0-128, ignoring: %s", b);
+ continue;
+ }
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse RPDB rule suppress_prefixlen, ignoring: %s", b);
+ continue;
+ }
+ } else if (streq(a, "invert_rule")) {
+ r = parse_boolean(b);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse RPDB rule invert_rule, ignoring: %s", b);
+ continue;
+ }
+ rule->invert_rule = r;
+ } else
+ log_warning("Unknown RPDB rule, ignoring: %s", a);
+ }
+
+ r = set_ensure_put(rules, &routing_policy_rule_hash_ops, rule);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to add RPDB rule to saved DB, ignoring: %s", *i);
+ continue;
+ }
+ if (r > 0)
+ rule = NULL;
+ }
+
+ return 0;
+}
diff --git a/src/network/networkd-routing-policy-rule.h b/src/network/networkd-routing-policy-rule.h
new file mode 100644
index 0000000..baf086f
--- /dev/null
+++ b/src/network/networkd-routing-policy-rule.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <linux/fib_rules.h>
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "conf-parser.h"
+#include "in-addr-util.h"
+#include "networkd-util.h"
+#include "set.h"
+
+typedef struct Network Network;
+typedef struct Link Link;
+typedef struct Manager Manager;
+
+typedef struct RoutingPolicyRule {
+ Manager *manager;
+ Network *network;
+ NetworkConfigSection *section;
+
+ bool invert_rule;
+
+ uint8_t tos;
+ uint8_t protocol;
+
+ uint32_t table;
+ uint32_t fwmark;
+ uint32_t fwmask;
+ uint32_t priority;
+
+ AddressFamily address_family; /* Specified by Family= */
+ int family; /* Automatically determined by From= or To= */
+ unsigned char to_prefixlen;
+ unsigned char from_prefixlen;
+
+ char *iif;
+ char *oif;
+
+ union in_addr_union to;
+ union in_addr_union from;
+
+ struct fib_rule_port_range sport;
+ struct fib_rule_port_range dport;
+ struct fib_rule_uid_range uid_range;
+
+ int suppress_prefixlen;
+} RoutingPolicyRule;
+
+RoutingPolicyRule *routing_policy_rule_free(RoutingPolicyRule *rule);
+
+void network_drop_invalid_routing_policy_rules(Network *network);
+
+int link_set_routing_policy_rules(Link *link);
+
+int manager_rtnl_process_rule(sd_netlink *rtnl, sd_netlink_message *message, Manager *m);
+
+int routing_policy_serialize_rules(Set *rules, FILE *f);
+int routing_policy_load_rules(const char *state_file, Set **rules);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_tos);
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_table);
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_fwmark_mask);
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_prefix);
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_priority);
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_device);
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_port_range);
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_ip_protocol);
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_invert);
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_family);
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_uid_range);
+CONFIG_PARSER_PROTOTYPE(config_parse_routing_policy_rule_suppress_prefixlen);
diff --git a/src/network/networkd-speed-meter.c b/src/network/networkd-speed-meter.c
new file mode 100644
index 0000000..e7f0682
--- /dev/null
+++ b/src/network/networkd-speed-meter.c
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "sd-event.h"
+#include "sd-netlink.h"
+
+#include "networkd-link-bus.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "networkd-speed-meter.h"
+
+static int process_message(Manager *manager, sd_netlink_message *message) {
+ uint16_t type;
+ int ifindex, r;
+ Link *link;
+
+ r = sd_netlink_message_get_type(message, &type);
+ if (r < 0)
+ return r;
+
+ if (type != RTM_NEWLINK)
+ return 0;
+
+ r = sd_rtnl_message_link_get_ifindex(message, &ifindex);
+ if (r < 0)
+ return r;
+
+ link = hashmap_get(manager->links, INT_TO_PTR(ifindex));
+ if (!link)
+ return -ENODEV;
+
+ link->stats_old = link->stats_new;
+
+ r = sd_netlink_message_read(message, IFLA_STATS64, sizeof link->stats_new, &link->stats_new);
+ if (r < 0)
+ return r;
+
+ link->stats_updated = true;
+
+ return 0;
+}
+
+static int speed_meter_handler(sd_event_source *s, uint64_t usec, void *userdata) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ Manager *manager = userdata;
+ sd_netlink_message *i;
+ usec_t usec_now;
+ Link *link;
+ int r;
+
+ assert(s);
+ assert(userdata);
+
+ r = sd_event_now(sd_event_source_get_event(s), CLOCK_MONOTONIC, &usec_now);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_time(s, usec_now + manager->speed_meter_interval_usec);
+ if (r < 0)
+ return r;
+
+ manager->speed_meter_usec_old = manager->speed_meter_usec_new;
+ manager->speed_meter_usec_new = usec_now;
+
+ HASHMAP_FOREACH(link, manager->links)
+ link->stats_updated = false;
+
+ r = sd_rtnl_message_new_link(manager->rtnl, &req, RTM_GETLINK, 0);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to allocate RTM_GETLINK netlink message, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to set dump flag, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_netlink_call(manager->rtnl, req, 0, &reply);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to call RTM_GETLINK, ignoring: %m");
+ return 0;
+ }
+
+ for (i = reply; i; i = sd_netlink_message_next(i))
+ (void) process_message(manager, i);
+
+ return 0;
+}
+
+int manager_start_speed_meter(Manager *manager) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL;
+ int r;
+
+ assert(manager);
+ assert(manager->event);
+
+ if (!manager->use_speed_meter)
+ return 0;
+
+ r = sd_event_add_time(manager->event, &s, CLOCK_MONOTONIC, 0, 0, speed_meter_handler, manager);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(s, SD_EVENT_ON);
+ if (r < 0)
+ return r;
+
+ manager->speed_meter_event_source = TAKE_PTR(s);
+ return 0;
+}
diff --git a/src/network/networkd-speed-meter.h b/src/network/networkd-speed-meter.h
new file mode 100644
index 0000000..4dd024b
--- /dev/null
+++ b/src/network/networkd-speed-meter.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/* Default interval is 10sec. The speed meter periodically make networkd
+ * to be woke up. So, too small interval value is not desired.
+ * We set the minimum value 100msec = 0.1sec. */
+#define SPEED_METER_DEFAULT_TIME_INTERVAL (10 * USEC_PER_SEC)
+#define SPEED_METER_MINIMUM_TIME_INTERVAL (100 * USEC_PER_MSEC)
+
+typedef struct Manager Manager;
+
+int manager_start_speed_meter(Manager *m);
diff --git a/src/network/networkd-sriov.c b/src/network/networkd-sriov.c
new file mode 100644
index 0000000..68f43b5
--- /dev/null
+++ b/src/network/networkd-sriov.c
@@ -0,0 +1,532 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+
+#include "alloc-util.h"
+#include "netlink-util.h"
+#include "networkd-manager.h"
+#include "networkd-sriov.h"
+#include "parse-util.h"
+#include "set.h"
+#include "string-util.h"
+
+static int sr_iov_new(SRIOV **ret) {
+ SRIOV *sr_iov;
+
+ sr_iov = new(SRIOV, 1);
+ if (!sr_iov)
+ return -ENOMEM;
+
+ *sr_iov = (SRIOV) {
+ .vf = (uint32_t) -1,
+ .vlan_proto = ETH_P_8021Q,
+ .vf_spoof_check_setting = -1,
+ .trust = -1,
+ .query_rss = -1,
+ .link_state = _SR_IOV_LINK_STATE_INVALID,
+ };
+
+ *ret = TAKE_PTR(sr_iov);
+
+ return 0;
+}
+
+static int sr_iov_new_static(Network *network, const char *filename, unsigned section_line, SRIOV **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(sr_iov_freep) SRIOV *sr_iov = NULL;
+ SRIOV *existing = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(filename);
+ assert(section_line > 0);
+
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ existing = ordered_hashmap_get(network->sr_iov_by_section, n);
+ if (existing) {
+ *ret = existing;
+ return 0;
+ }
+
+ r = sr_iov_new(&sr_iov);
+ if (r < 0)
+ return r;
+
+ sr_iov->network = network;
+ sr_iov->section = TAKE_PTR(n);
+
+ r = ordered_hashmap_ensure_allocated(&network->sr_iov_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = ordered_hashmap_put(network->sr_iov_by_section, sr_iov->section, sr_iov);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(sr_iov);
+ return 0;
+}
+
+SRIOV *sr_iov_free(SRIOV *sr_iov) {
+ if (!sr_iov)
+ return NULL;
+
+ if (sr_iov->network && sr_iov->section)
+ ordered_hashmap_remove(sr_iov->network->sr_iov_by_section, sr_iov->section);
+
+ network_config_section_free(sr_iov->section);
+
+ return mfree(sr_iov);
+}
+
+static int sr_iov_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->sr_iov_messages > 0);
+ link->sr_iov_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_message_error_errno(link, m, r, "Could not set up SR-IOV");
+ link_enter_failed(link);
+ return 1;
+ }
+
+ if (link->sr_iov_messages == 0) {
+ log_link_debug(link, "SR-IOV configured");
+ link->sr_iov_configured = true;
+ link_check_ready(link);
+ }
+
+ return 1;
+}
+
+static int sr_iov_configure(Link *link, SRIOV *sr_iov) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+ assert(link->ifindex > 0);
+
+ log_link_debug(link, "Setting SR-IOV virtual function %"PRIu32, sr_iov->vf);
+
+ r = sd_rtnl_message_new_link(link->manager->rtnl, &req, RTM_SETLINK, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not allocate RTM_SETLINK message: %m");
+
+ r = sd_netlink_message_open_container(req, IFLA_VFINFO_LIST);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open IFLA_VFINFO_LIST container: %m");
+
+ r = sd_netlink_message_open_container(req, IFLA_VF_INFO);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open IFLA_VF_INFO container: %m");
+
+ if (!ether_addr_is_null(&sr_iov->mac)) {
+ struct ifla_vf_mac ivm = {
+ .vf = sr_iov->vf,
+ };
+
+ memcpy(ivm.mac, &sr_iov->mac, ETH_ALEN);
+ r = sd_netlink_message_append_data(req, IFLA_VF_MAC, &ivm, sizeof(struct ifla_vf_mac));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_VF_MAC: %m");
+ }
+
+ if (sr_iov->vf_spoof_check_setting >= 0) {
+ struct ifla_vf_spoofchk ivs = {
+ .vf = sr_iov->vf,
+ .setting = sr_iov->vf_spoof_check_setting,
+ };
+
+ r = sd_netlink_message_append_data(req, IFLA_VF_SPOOFCHK, &ivs, sizeof(struct ifla_vf_spoofchk));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_VF_SPOOFCHK: %m");
+ }
+
+ if (sr_iov->query_rss >= 0) {
+ struct ifla_vf_rss_query_en ivs = {
+ .vf = sr_iov->vf,
+ .setting = sr_iov->query_rss,
+ };
+
+ r = sd_netlink_message_append_data(req, IFLA_VF_RSS_QUERY_EN, &ivs, sizeof(struct ifla_vf_rss_query_en));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_VF_RSS_QUERY_EN: %m");
+ }
+
+ if (sr_iov->trust >= 0) {
+ struct ifla_vf_trust ivt = {
+ .vf = sr_iov->vf,
+ .setting = sr_iov->trust,
+ };
+
+ r = sd_netlink_message_append_data(req, IFLA_VF_TRUST, &ivt, sizeof(struct ifla_vf_trust));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_VF_TRUST: %m");
+ }
+
+ if (sr_iov->link_state >= 0) {
+ struct ifla_vf_link_state ivl = {
+ .vf = sr_iov->vf,
+ .link_state = sr_iov->link_state,
+ };
+
+ r = sd_netlink_message_append_data(req, IFLA_VF_LINK_STATE, &ivl, sizeof(struct ifla_vf_link_state));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_VF_LINK_STATE: %m");
+ }
+
+ if (sr_iov->vlan > 0) {
+ /* Because of padding, first the buffer must be initialized with 0. */
+ struct ifla_vf_vlan_info ivvi = {};
+ ivvi.vf = sr_iov->vf;
+ ivvi.vlan = sr_iov->vlan;
+ ivvi.qos = sr_iov->qos;
+ ivvi.vlan_proto = htobe16(sr_iov->vlan_proto);
+
+ r = sd_netlink_message_open_container(req, IFLA_VF_VLAN_LIST);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open IFLA_VF_VLAN_LIST container: %m");
+
+ r = sd_netlink_message_append_data(req, IFLA_VF_VLAN_INFO, &ivvi, sizeof(struct ifla_vf_vlan_info));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append IFLA_VF_VLAN_INFO: %m");
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close IFLA_VF_VLAN_LIST container: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close IFLA_VF_INFO container: %m");
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close IFLA_VFINFO_LIST container: %m");
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, sr_iov_handler,
+ link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+ link->sr_iov_messages++;
+
+ return 0;
+}
+
+int link_configure_sr_iov(Link *link) {
+ SRIOV *sr_iov;
+ int r;
+
+ link->sr_iov_configured = false;
+ link->sr_iov_messages = 0;
+
+ ORDERED_HASHMAP_FOREACH(sr_iov, link->network->sr_iov_by_section) {
+ r = sr_iov_configure(link, sr_iov);
+ if (r < 0)
+ return r;
+ }
+
+ if (link->sr_iov_messages == 0)
+ link->sr_iov_configured = true;
+ else
+ log_link_debug(link, "Configuring SR-IOV");
+
+ return 0;
+}
+
+static int sr_iov_section_verify(SRIOV *sr_iov) {
+ assert(sr_iov);
+
+ if (section_is_invalid(sr_iov->section))
+ return -EINVAL;
+
+ if (sr_iov->vf == (uint32_t) -1)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: [SRIOV] section without VirtualFunction= field configured. "
+ "Ignoring [SRIOV] section from line %u.",
+ sr_iov->section->filename, sr_iov->section->line);
+
+ return 0;
+}
+
+void network_drop_invalid_sr_iov(Network *network) {
+ SRIOV *sr_iov;
+
+ assert(network);
+
+ ORDERED_HASHMAP_FOREACH(sr_iov, network->sr_iov_by_section)
+ if (sr_iov_section_verify(sr_iov) < 0)
+ sr_iov_free(sr_iov);
+}
+
+int config_parse_sr_iov_uint32(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(sr_iov_free_or_set_invalidp) SRIOV *sr_iov = NULL;
+ Network *network = data;
+ uint32_t k;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = sr_iov_new_static(network, filename, section_line, &sr_iov);
+ if (r < 0)
+ return r;
+
+ if (isempty(rvalue)) {
+ if (streq(lvalue, "VirtualFunction"))
+ sr_iov->vf = (uint32_t) -1;
+ else if (streq(lvalue, "VLANId"))
+ sr_iov->vlan = 0;
+ else if (streq(lvalue, "QualityOfService"))
+ sr_iov->qos = 0;
+ else
+ assert_not_reached("Invalid lvalue");
+
+ TAKE_PTR(sr_iov);
+ return 0;
+ }
+
+ r = safe_atou32(rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse SR-IOV '%s=', ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "VLANId")) {
+ if (k == 0 || k > 4095) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid SR-IOV VLANId: %d", k);
+ return 0;
+ }
+ sr_iov->vlan = k;
+ } else if (streq(lvalue, "VirtualFunction")) {
+ if (k >= INT_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid SR-IOV virtual function: %d", k);
+ return 0;
+ }
+ sr_iov->vf = k;
+ } else if (streq(lvalue, "QualityOfService"))
+ sr_iov->qos = k;
+ else
+ assert_not_reached("Invalid lvalue");
+
+ TAKE_PTR(sr_iov);
+ return 0;
+}
+
+int config_parse_sr_iov_vlan_proto(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(sr_iov_free_or_set_invalidp) SRIOV *sr_iov = NULL;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = sr_iov_new_static(network, filename, section_line, &sr_iov);
+ if (r < 0)
+ return r;
+
+ if (isempty(rvalue) || streq(rvalue, "802.1Q"))
+ sr_iov->vlan_proto = ETH_P_8021Q;
+ else if (streq(rvalue, "802.1ad"))
+ sr_iov->vlan_proto = ETH_P_8021AD;
+ else {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid SR-IOV '%s=', ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ TAKE_PTR(sr_iov);
+ return 0;
+}
+
+int config_parse_sr_iov_link_state(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(sr_iov_free_or_set_invalidp) SRIOV *sr_iov = NULL;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = sr_iov_new_static(network, filename, section_line, &sr_iov);
+ if (r < 0)
+ return r;
+
+ /* Unfortunately, SR_IOV_LINK_STATE_DISABLE is 2, not 0. So, we cannot use
+ * DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN() macro. */
+
+ if (isempty(rvalue)) {
+ sr_iov->link_state = _SR_IOV_LINK_STATE_INVALID;
+ TAKE_PTR(sr_iov);
+ return 0;
+ }
+
+ if (streq(rvalue, "auto")) {
+ sr_iov->link_state = SR_IOV_LINK_STATE_AUTO;
+ TAKE_PTR(sr_iov);
+ return 0;
+ }
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse SR-IOV '%s=', ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ sr_iov->link_state = r ? SR_IOV_LINK_STATE_ENABLE : SR_IOV_LINK_STATE_DISABLE;
+ TAKE_PTR(sr_iov);
+ return 0;
+}
+
+int config_parse_sr_iov_boolean(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(sr_iov_free_or_set_invalidp) SRIOV *sr_iov = NULL;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = sr_iov_new_static(network, filename, section_line, &sr_iov);
+ if (r < 0)
+ return r;
+
+ if (isempty(rvalue)) {
+ if (streq(lvalue, "MACSpoofCheck"))
+ sr_iov->vf_spoof_check_setting = -1;
+ else if (streq(lvalue, "QueryReceiveSideScaling"))
+ sr_iov->query_rss = -1;
+ else if (streq(lvalue, "Trust"))
+ sr_iov->trust = -1;
+ else
+ assert_not_reached("Invalid lvalue");
+
+ TAKE_PTR(sr_iov);
+ return 0;
+ }
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse '%s=', ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "MACSpoofCheck"))
+ sr_iov->vf_spoof_check_setting = r;
+ else if (streq(lvalue, "QueryReceiveSideScaling"))
+ sr_iov->query_rss = r;
+ else if (streq(lvalue, "Trust"))
+ sr_iov->trust = r;
+ else
+ assert_not_reached("Invalid lvalue");
+
+ TAKE_PTR(sr_iov);
+ return 0;
+}
+
+int config_parse_sr_iov_mac(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(sr_iov_free_or_set_invalidp) SRIOV *sr_iov = NULL;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = sr_iov_new_static(network, filename, section_line, &sr_iov);
+ if (r < 0)
+ return r;
+
+ if (isempty(rvalue)) {
+ sr_iov->mac = ETHER_ADDR_NULL;
+ TAKE_PTR(sr_iov);
+ return 0;
+ }
+
+ r = ether_addr_from_string(rvalue, &sr_iov->mac);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse SR-IOV '%s=', ignoring assignment: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ TAKE_PTR(sr_iov);
+ return 0;
+}
diff --git a/src/network/networkd-sriov.h b/src/network/networkd-sriov.h
new file mode 100644
index 0000000..dae5ff0
--- /dev/null
+++ b/src/network/networkd-sriov.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+#pragma once
+
+#include <linux/if_link.h>
+
+#include "conf-parser.h"
+#include "ether-addr-util.h"
+#include "networkd-link.h"
+#include "networkd-network.h"
+#include "networkd-util.h"
+
+typedef enum SRIOVLinkState {
+ SR_IOV_LINK_STATE_AUTO = IFLA_VF_LINK_STATE_AUTO,
+ SR_IOV_LINK_STATE_ENABLE = IFLA_VF_LINK_STATE_ENABLE,
+ SR_IOV_LINK_STATE_DISABLE = IFLA_VF_LINK_STATE_DISABLE,
+ _SR_IOV_LINK_STATE_MAX,
+ _SR_IOV_LINK_STATE_INVALID = -1,
+} SRIOVLinkState;
+
+typedef struct SRIOV {
+ NetworkConfigSection *section;
+ Network *network;
+
+ uint32_t vf; /* 0 - 2147483646 */
+ uint32_t vlan; /* 0 - 4095, 0 disables VLAN filter */
+ uint32_t qos;
+ uint16_t vlan_proto; /* ETH_P_8021Q or ETH_P_8021AD */
+ int vf_spoof_check_setting;
+ int query_rss;
+ int trust;
+ SRIOVLinkState link_state;
+ struct ether_addr mac;
+} SRIOV;
+
+SRIOV *sr_iov_free(SRIOV *sr_iov);
+int link_configure_sr_iov(Link *link);
+void network_drop_invalid_sr_iov(Network *network);
+
+DEFINE_NETWORK_SECTION_FUNCTIONS(SRIOV, sr_iov_free);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_sr_iov_uint32);
+CONFIG_PARSER_PROTOTYPE(config_parse_sr_iov_boolean);
+CONFIG_PARSER_PROTOTYPE(config_parse_sr_iov_link_state);
+CONFIG_PARSER_PROTOTYPE(config_parse_sr_iov_vlan_proto);
+CONFIG_PARSER_PROTOTYPE(config_parse_sr_iov_mac);
diff --git a/src/network/networkd-sysctl.c b/src/network/networkd-sysctl.c
new file mode 100644
index 0000000..bde0cec
--- /dev/null
+++ b/src/network/networkd-sysctl.c
@@ -0,0 +1,288 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <linux/if.h>
+
+#include "missing_network.h"
+#include "networkd-link.h"
+#include "networkd-network.h"
+#include "networkd-sysctl.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "sysctl-util.h"
+
+static int link_update_ipv6_sysctl(Link *link) {
+ assert(link);
+
+ if (link->flags & IFF_LOOPBACK)
+ return 0;
+
+ if (!link_ipv6_enabled(link))
+ return 0;
+
+ return sysctl_write_ip_property_boolean(AF_INET6, link->ifname, "disable_ipv6", false);
+}
+
+static int link_set_proxy_arp(Link *link) {
+ assert(link);
+
+ if (link->flags & IFF_LOOPBACK)
+ return 0;
+
+ if (!link->network)
+ return 0;
+
+ if (link->network->proxy_arp < 0)
+ return 0;
+
+ return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "proxy_arp", link->network->proxy_arp > 0);
+}
+
+static bool link_ip_forward_enabled(Link *link, int family) {
+ assert(link);
+ assert(IN_SET(family, AF_INET, AF_INET6));
+
+ if (family == AF_INET6 && !socket_ipv6_is_supported())
+ return false;
+
+ if (link->flags & IFF_LOOPBACK)
+ return false;
+
+ if (!link->network)
+ return false;
+
+ return link->network->ip_forward & (family == AF_INET ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6);
+}
+
+static int link_set_ipv4_forward(Link *link) {
+ assert(link);
+
+ if (!link_ip_forward_enabled(link, AF_INET))
+ return 0;
+
+ /* We propagate the forwarding flag from one interface to the
+ * global setting one way. This means: as long as at least one
+ * interface was configured at any time that had IP forwarding
+ * enabled the setting will stay on for good. We do this
+ * primarily to keep IPv4 and IPv6 packet forwarding behaviour
+ * somewhat in sync (see below). */
+
+ return sysctl_write_ip_property(AF_INET, NULL, "ip_forward", "1");
+}
+
+static int link_set_ipv6_forward(Link *link) {
+ assert(link);
+
+ if (!link_ip_forward_enabled(link, AF_INET6))
+ return 0;
+
+ /* On Linux, the IPv6 stack does not know a per-interface
+ * packet forwarding setting: either packet forwarding is on
+ * for all, or off for all. We hence don't bother with a
+ * per-interface setting, but simply propagate the interface
+ * flag, if it is set, to the global flag, one-way. Note that
+ * while IPv4 would allow a per-interface flag, we expose the
+ * same behaviour there and also propagate the setting from
+ * one to all, to keep things simple (see above). */
+
+ return sysctl_write_ip_property(AF_INET6, "all", "forwarding", "1");
+}
+
+static int link_set_ipv6_privacy_extensions(Link *link) {
+ assert(link);
+
+ if (!socket_ipv6_is_supported())
+ return 0;
+
+ if (link->flags & IFF_LOOPBACK)
+ return 0;
+
+ if (!link->network)
+ return 0;
+
+ // this is the special "kernel" value
+ if (link->network->ipv6_privacy_extensions == _IPV6_PRIVACY_EXTENSIONS_INVALID)
+ return 0;
+
+ return sysctl_write_ip_property_int(AF_INET6, link->ifname, "use_tempaddr", (int) link->network->ipv6_privacy_extensions);
+}
+
+static int link_set_ipv6_accept_ra(Link *link) {
+ assert(link);
+
+ /* Make this a NOP if IPv6 is not available */
+ if (!socket_ipv6_is_supported())
+ return 0;
+
+ if (link->flags & IFF_LOOPBACK)
+ return 0;
+
+ if (!link->network)
+ return 0;
+
+ return sysctl_write_ip_property(AF_INET6, link->ifname, "accept_ra", "0");
+}
+
+static int link_set_ipv6_dad_transmits(Link *link) {
+ assert(link);
+
+ /* Make this a NOP if IPv6 is not available */
+ if (!socket_ipv6_is_supported())
+ return 0;
+
+ if (link->flags & IFF_LOOPBACK)
+ return 0;
+
+ if (!link->network)
+ return 0;
+
+ if (link->network->ipv6_dad_transmits < 0)
+ return 0;
+
+ return sysctl_write_ip_property_int(AF_INET6, link->ifname, "dad_transmits", link->network->ipv6_dad_transmits);
+}
+
+static int link_set_ipv6_hop_limit(Link *link) {
+ assert(link);
+
+ /* Make this a NOP if IPv6 is not available */
+ if (!socket_ipv6_is_supported())
+ return 0;
+
+ if (link->flags & IFF_LOOPBACK)
+ return 0;
+
+ if (!link->network)
+ return 0;
+
+ if (link->network->ipv6_hop_limit < 0)
+ return 0;
+
+ return sysctl_write_ip_property_int(AF_INET6, link->ifname, "hop_limit", link->network->ipv6_hop_limit);
+}
+
+static int link_set_ipv4_accept_local(Link *link) {
+ assert(link);
+
+ if (link->flags & IFF_LOOPBACK)
+ return 0;
+
+ if (link->network->ipv4_accept_local < 0)
+ return 0;
+
+ return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "accept_local", link->network->ipv4_accept_local > 0);
+}
+
+int link_set_sysctl(Link *link) {
+ int r;
+
+ assert(link);
+
+ /* If IPv6 configured that is static IPv6 address and IPv6LL autoconfiguration is enabled
+ * for this interface, then enable IPv6 */
+ r = link_update_ipv6_sysctl(link);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot enable IPv6, ignoring: %m");
+
+ r = link_set_proxy_arp(link);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot configure proxy ARP for interface, ignoring: %m");
+
+ r = link_set_ipv4_forward(link);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot turn on IPv4 packet forwarding, ignoring: %m");
+
+ r = link_set_ipv6_forward(link);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot configure IPv6 packet forwarding, ignoring: %m");;
+
+ r = link_set_ipv6_privacy_extensions(link);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot configure IPv6 privacy extensions for interface, ignoring: %m");
+
+ r = link_set_ipv6_accept_ra(link);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot disable kernel IPv6 accept_ra for interface, ignoring: %m");
+
+ r = link_set_ipv6_dad_transmits(link);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot set IPv6 dad transmits for interface, ignoring: %m");
+
+ r = link_set_ipv6_hop_limit(link);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot set IPv6 hop limit for interface, ignoring: %m");
+
+ r = link_set_ipv4_accept_local(link);
+ if (r < 0)
+ log_link_warning_errno(link, r, "Cannot set IPv4 accept_local flag for interface, ignoring: %m");
+
+ return 0;
+}
+
+int link_set_ipv6_mtu(Link *link) {
+ int r;
+
+ assert(link);
+
+ /* Make this a NOP if IPv6 is not available */
+ if (!socket_ipv6_is_supported())
+ return 0;
+
+ if (link->flags & IFF_LOOPBACK)
+ return 0;
+
+ if (link->network->ipv6_mtu == 0)
+ return 0;
+
+ r = sysctl_write_ip_property_uint32(AF_INET6, link->ifname, "mtu", link->network->ipv6_mtu);
+ if (r < 0)
+ return r;
+
+ link->ipv6_mtu_set = true;
+
+ return 0;
+}
+
+static const char* const ipv6_privacy_extensions_table[_IPV6_PRIVACY_EXTENSIONS_MAX] = {
+ [IPV6_PRIVACY_EXTENSIONS_NO] = "no",
+ [IPV6_PRIVACY_EXTENSIONS_PREFER_PUBLIC] = "prefer-public",
+ [IPV6_PRIVACY_EXTENSIONS_YES] = "yes",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(ipv6_privacy_extensions, IPv6PrivacyExtensions,
+ IPV6_PRIVACY_EXTENSIONS_YES);
+
+int config_parse_ipv6_privacy_extensions(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ IPv6PrivacyExtensions s, *ipv6_privacy_extensions = data;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(ipv6_privacy_extensions);
+
+ s = ipv6_privacy_extensions_from_string(rvalue);
+ if (s < 0) {
+ if (streq(rvalue, "kernel"))
+ s = _IPV6_PRIVACY_EXTENSIONS_INVALID;
+ else {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to parse IPv6 privacy extensions option, ignoring: %s", rvalue);
+ return 0;
+ }
+ }
+
+ *ipv6_privacy_extensions = s;
+
+ return 0;
+}
diff --git a/src/network/networkd-sysctl.h b/src/network/networkd-sysctl.h
new file mode 100644
index 0000000..3568900
--- /dev/null
+++ b/src/network/networkd-sysctl.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "conf-parser.h"
+
+typedef struct Link Link;
+
+typedef enum IPv6PrivacyExtensions {
+ /* The values map to the kernel's /proc/sys/net/ipv6/conf/xxx/use_tempaddr values */
+ IPV6_PRIVACY_EXTENSIONS_NO,
+ IPV6_PRIVACY_EXTENSIONS_PREFER_PUBLIC,
+ IPV6_PRIVACY_EXTENSIONS_YES, /* aka prefer-temporary */
+ _IPV6_PRIVACY_EXTENSIONS_MAX,
+ _IPV6_PRIVACY_EXTENSIONS_INVALID = -1,
+} IPv6PrivacyExtensions;
+
+int link_set_sysctl(Link *link);
+int link_set_ipv6_mtu(Link *link);
+
+const char* ipv6_privacy_extensions_to_string(IPv6PrivacyExtensions i) _const_;
+IPv6PrivacyExtensions ipv6_privacy_extensions_from_string(const char *s) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_ipv6_privacy_extensions);
diff --git a/src/network/networkd-util.c b/src/network/networkd-util.c
new file mode 100644
index 0000000..8ddcbb2
--- /dev/null
+++ b/src/network/networkd-util.c
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "condition.h"
+#include "conf-parser.h"
+#include "networkd-util.h"
+#include "parse-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "util.h"
+
+static const char* const address_family_table[_ADDRESS_FAMILY_MAX] = {
+ [ADDRESS_FAMILY_NO] = "no",
+ [ADDRESS_FAMILY_YES] = "yes",
+ [ADDRESS_FAMILY_IPV4] = "ipv4",
+ [ADDRESS_FAMILY_IPV6] = "ipv6",
+};
+
+static const char* const link_local_address_family_table[_ADDRESS_FAMILY_MAX] = {
+ [ADDRESS_FAMILY_NO] = "no",
+ [ADDRESS_FAMILY_YES] = "yes",
+ [ADDRESS_FAMILY_IPV4] = "ipv4",
+ [ADDRESS_FAMILY_IPV6] = "ipv6",
+ [ADDRESS_FAMILY_FALLBACK] = "fallback",
+ [ADDRESS_FAMILY_FALLBACK_IPV4] = "ipv4-fallback",
+};
+
+static const char* const routing_policy_rule_address_family_table[_ADDRESS_FAMILY_MAX] = {
+ [ADDRESS_FAMILY_YES] = "both",
+ [ADDRESS_FAMILY_IPV4] = "ipv4",
+ [ADDRESS_FAMILY_IPV6] = "ipv6",
+};
+
+static const char* const duplicate_address_detection_address_family_table[_ADDRESS_FAMILY_MAX] = {
+ [ADDRESS_FAMILY_NO] = "none",
+ [ADDRESS_FAMILY_YES] = "both",
+ [ADDRESS_FAMILY_IPV4] = "ipv4",
+ [ADDRESS_FAMILY_IPV6] = "ipv6",
+};
+
+static const char* const dhcp_lease_server_type_table[_SD_DHCP_LEASE_SERVER_TYPE_MAX] = {
+ [SD_DHCP_LEASE_DNS] = "DNS servers",
+ [SD_DHCP_LEASE_NTP] = "NTP servers",
+ [SD_DHCP_LEASE_SIP] = "SIP servers",
+ [SD_DHCP_LEASE_POP3] = "POP3 servers",
+ [SD_DHCP_LEASE_SMTP] = "SMTP servers",
+ [SD_DHCP_LEASE_LPR] = "LPR servers",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(address_family, AddressFamily, ADDRESS_FAMILY_YES);
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(link_local_address_family, AddressFamily, ADDRESS_FAMILY_YES);
+DEFINE_STRING_TABLE_LOOKUP(routing_policy_rule_address_family, AddressFamily);
+DEFINE_STRING_TABLE_LOOKUP(duplicate_address_detection_address_family, AddressFamily);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_link_local_address_family, link_local_address_family,
+ AddressFamily, "Failed to parse option");
+DEFINE_STRING_TABLE_LOOKUP(dhcp_lease_server_type, sd_dhcp_lease_server_type);
+
+int config_parse_address_family_with_kernel(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ AddressFamily *fwd = data, s;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* This function is mostly obsolete now. It simply redirects
+ * "kernel" to "no". In older networkd versions we used to
+ * distinguish IPForward=off from IPForward=kernel, where the
+ * former would explicitly turn off forwarding while the
+ * latter would simply not touch the setting. But that logic
+ * is gone, hence silently accept the old setting, but turn it
+ * to "no". */
+
+ s = address_family_from_string(rvalue);
+ if (s < 0) {
+ if (streq(rvalue, "kernel"))
+ s = ADDRESS_FAMILY_NO;
+ else {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse IPForward= option, ignoring: %s", rvalue);
+ return 0;
+ }
+ }
+
+ *fwd = s;
+
+ return 0;
+}
+
+/* Router lifetime can be set with netlink interface since kernel >= 4.5
+ * so for the supported kernel we don't need to expire routes in userspace */
+int kernel_route_expiration_supported(void) {
+ static int cached = -1;
+ int r;
+
+ if (cached < 0) {
+ Condition c = {
+ .type = CONDITION_KERNEL_VERSION,
+ .parameter = (char *) ">= 4.5"
+ };
+ r = condition_test(&c, NULL);
+ if (r < 0)
+ return r;
+
+ cached = r;
+ }
+ return cached;
+}
+
+static void network_config_hash_func(const NetworkConfigSection *c, struct siphash *state) {
+ siphash24_compress_string(c->filename, state);
+ siphash24_compress(&c->line, sizeof(c->line), state);
+}
+
+static int network_config_compare_func(const NetworkConfigSection *x, const NetworkConfigSection *y) {
+ int r;
+
+ r = strcmp(x->filename, y->filename);
+ if (r != 0)
+ return r;
+
+ return CMP(x->line, y->line);
+}
+
+DEFINE_HASH_OPS(network_config_hash_ops, NetworkConfigSection, network_config_hash_func, network_config_compare_func);
+
+int network_config_section_new(const char *filename, unsigned line, NetworkConfigSection **s) {
+ NetworkConfigSection *cs;
+
+ cs = malloc0(offsetof(NetworkConfigSection, filename) + strlen(filename) + 1);
+ if (!cs)
+ return -ENOMEM;
+
+ strcpy(cs->filename, filename);
+ cs->line = line;
+
+ *s = TAKE_PTR(cs);
+
+ return 0;
+}
+
+void network_config_section_free(NetworkConfigSection *cs) {
+ free(cs);
+}
+
+unsigned hashmap_find_free_section_line(Hashmap *hashmap) {
+ NetworkConfigSection *cs;
+ unsigned n = 0;
+ void *entry;
+
+ HASHMAP_FOREACH_KEY(entry, cs, hashmap)
+ if (n < cs->line)
+ n = cs->line;
+
+ return n + 1;
+}
diff --git a/src/network/networkd-util.h b/src/network/networkd-util.h
new file mode 100644
index 0000000..6100a00
--- /dev/null
+++ b/src/network/networkd-util.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-dhcp-lease.h"
+#include "sd-netlink.h"
+
+#include "conf-parser.h"
+#include "hashmap.h"
+#include "log.h"
+#include "macro.h"
+#include "string-util.h"
+
+typedef enum AddressFamily {
+ /* This is a bitmask, though it usually doesn't feel that way! */
+ ADDRESS_FAMILY_NO = 0,
+ ADDRESS_FAMILY_IPV4 = 1 << 0,
+ ADDRESS_FAMILY_IPV6 = 1 << 1,
+ ADDRESS_FAMILY_YES = ADDRESS_FAMILY_IPV4 | ADDRESS_FAMILY_IPV6,
+ ADDRESS_FAMILY_FALLBACK_IPV4 = 1 << 2,
+ ADDRESS_FAMILY_FALLBACK = ADDRESS_FAMILY_FALLBACK_IPV4 | ADDRESS_FAMILY_IPV6,
+ _ADDRESS_FAMILY_MAX,
+ _ADDRESS_FAMILY_INVALID = -1,
+} AddressFamily;
+
+typedef struct NetworkConfigSection {
+ unsigned line;
+ bool invalid;
+ char filename[];
+} NetworkConfigSection;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_link_local_address_family);
+CONFIG_PARSER_PROTOTYPE(config_parse_address_family_with_kernel);
+
+const char *address_family_to_string(AddressFamily b) _const_;
+AddressFamily address_family_from_string(const char *s) _pure_;
+
+const char *link_local_address_family_to_string(AddressFamily b) _const_;
+AddressFamily link_local_address_family_from_string(const char *s) _pure_;
+
+const char *routing_policy_rule_address_family_to_string(AddressFamily b) _const_;
+AddressFamily routing_policy_rule_address_family_from_string(const char *s) _pure_;
+
+const char *duplicate_address_detection_address_family_to_string(AddressFamily b) _const_;
+AddressFamily duplicate_address_detection_address_family_from_string(const char *s) _pure_;
+
+const char *dhcp_lease_server_type_to_string(sd_dhcp_lease_server_type t) _const_;
+sd_dhcp_lease_server_type dhcp_lease_server_type_from_string(const char *s) _pure_;
+
+int kernel_route_expiration_supported(void);
+
+int network_config_section_new(const char *filename, unsigned line, NetworkConfigSection **s);
+void network_config_section_free(NetworkConfigSection *network);
+DEFINE_TRIVIAL_CLEANUP_FUNC(NetworkConfigSection*, network_config_section_free);
+extern const struct hash_ops network_config_hash_ops;
+unsigned hashmap_find_free_section_line(Hashmap *hashmap);
+
+static inline bool section_is_invalid(NetworkConfigSection *section) {
+ /* If this returns false, then it does _not_ mean the section is valid. */
+
+ if (!section)
+ return false;
+
+ return section->invalid;
+}
+
+#define DEFINE_NETWORK_SECTION_FUNCTIONS(type, free_func) \
+ static inline void free_func##_or_set_invalid(type *p) { \
+ assert(p); \
+ \
+ if (p->section) \
+ p->section->invalid = true; \
+ else \
+ free_func(p); \
+ } \
+ DEFINE_TRIVIAL_CLEANUP_FUNC(type*, free_func); \
+ DEFINE_TRIVIAL_CLEANUP_FUNC(type*, free_func##_or_set_invalid);
+
+static inline int log_message_warning_errno(sd_netlink_message *m, int err, const char *msg) {
+ const char *err_msg = NULL;
+
+ (void) sd_netlink_message_read_string(m, NLMSGERR_ATTR_MSG, &err_msg);
+ return log_warning_errno(err, "%s: %s%s%m", msg, strempty(err_msg), err_msg ? " " : "");
+}
diff --git a/src/network/networkd-wifi.c b/src/network/networkd-wifi.c
new file mode 100644
index 0000000..0f2def7
--- /dev/null
+++ b/src/network/networkd-wifi.c
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/ethernet.h>
+#include <linux/nl80211.h>
+
+#include "sd-bus.h"
+
+#include "bus-util.h"
+#include "ether-addr-util.h"
+#include "netlink-internal.h"
+#include "netlink-util.h"
+#include "networkd-link.h"
+#include "networkd-manager.h"
+#include "networkd-wifi.h"
+#include "string-util.h"
+#include "wifi-util.h"
+
+int wifi_get_info(Link *link) {
+ const char *type;
+ int r, s = 0;
+
+ assert(link);
+
+ if (!link->sd_device)
+ return 0;
+
+ r = sd_device_get_devtype(link->sd_device, &type);
+ if (r == -ENOENT)
+ return 0;
+ else if (r < 0)
+ return r;
+
+ if (!streq(type, "wlan"))
+ return 0;
+
+ _cleanup_free_ char *ssid = NULL;
+ r = wifi_get_interface(link->manager->genl, link->ifindex, &link->wlan_iftype, &ssid);
+ if (r < 0)
+ return r;
+ if (r > 0 && streq_ptr(link->ssid, ssid))
+ r = 0;
+ free_and_replace(link->ssid, ssid);
+
+ if (link->wlan_iftype == NL80211_IFTYPE_STATION) {
+ struct ether_addr old_bssid = link->bssid;
+ s = wifi_get_station(link->manager->genl, link->ifindex, &link->bssid);
+ if (s < 0)
+ return s;
+ if (s > 0 && memcmp(&old_bssid, &link->bssid, sizeof old_bssid) == 0)
+ s = 0;
+ }
+
+ if (r > 0 || s > 0) {
+ char buf[ETHER_ADDR_TO_STRING_MAX];
+
+ if (link->wlan_iftype == NL80211_IFTYPE_STATION && link->ssid)
+ log_link_info(link, "Connected WiFi access point: %s (%s)",
+ link->ssid, ether_addr_to_string(&link->bssid, buf));
+ return 1;
+ }
+ return 0;
+}
diff --git a/src/network/networkd-wifi.h b/src/network/networkd-wifi.h
new file mode 100644
index 0000000..ab868eb
--- /dev/null
+++ b/src/network/networkd-wifi.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+typedef struct Link Link;
+
+int wifi_get_info(Link *link);
diff --git a/src/network/networkd.c b/src/network/networkd.c
new file mode 100644
index 0000000..b448d9b
--- /dev/null
+++ b/src/network/networkd.c
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "sd-daemon.h"
+#include "sd-event.h"
+
+#include "capability-util.h"
+#include "daemon-util.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "networkd-conf.h"
+#include "networkd-manager.h"
+#include "signal-util.h"
+#include "user-util.h"
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ _cleanup_(notify_on_cleanup) const char *notify_message = NULL;
+ int r;
+
+ log_setup_service();
+
+ umask(0022);
+
+ if (argc != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program takes no arguments.");
+
+ /* Drop privileges, but only if we have been started as root. If we are not running as root we assume all
+ * privileges are already dropped and we can't create our runtime directory. */
+ if (geteuid() == 0) {
+ const char *user = "systemd-network";
+ uid_t uid;
+ gid_t gid;
+
+ r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Cannot resolve user name %s: %m", user);
+
+ /* Create runtime directory. This is not necessary when networkd is
+ * started with "RuntimeDirectory=systemd/netif", or after
+ * systemd-tmpfiles-setup.service. */
+ r = mkdir_safe_label("/run/systemd/netif", 0755, uid, gid, MKDIR_WARN_MODE);
+ if (r < 0)
+ log_warning_errno(r, "Could not create runtime directory: %m");
+
+ r = drop_privileges(uid, gid,
+ (1ULL << CAP_NET_ADMIN) |
+ (1ULL << CAP_NET_BIND_SERVICE) |
+ (1ULL << CAP_NET_BROADCAST) |
+ (1ULL << CAP_NET_RAW));
+ if (r < 0)
+ return log_error_errno(r, "Failed to drop privileges: %m");
+ }
+
+ /* Always create the directories people can create inotify watches in.
+ * It is necessary to create the following subdirectories after drop_privileges()
+ * to support old kernels not supporting AmbientCapabilities=. */
+ r = mkdir_safe_label("/run/systemd/netif/links", 0755, UID_INVALID, GID_INVALID, MKDIR_WARN_MODE);
+ if (r < 0)
+ log_warning_errno(r, "Could not create runtime directory 'links': %m");
+
+ r = mkdir_safe_label("/run/systemd/netif/leases", 0755, UID_INVALID, GID_INVALID, MKDIR_WARN_MODE);
+ if (r < 0)
+ log_warning_errno(r, "Could not create runtime directory 'leases': %m");
+
+ r = mkdir_safe_label("/run/systemd/netif/lldp", 0755, UID_INVALID, GID_INVALID, MKDIR_WARN_MODE);
+ if (r < 0)
+ log_warning_errno(r, "Could not create runtime directory 'lldp': %m");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+
+ r = manager_new(&m);
+ if (r < 0)
+ return log_error_errno(r, "Could not create manager: %m");
+
+ r = manager_connect_bus(m);
+ if (r < 0)
+ return log_error_errno(r, "Could not connect to bus: %m");
+
+ r = manager_parse_config_file(m);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse configuration file: %m");
+
+ r = manager_load_config(m);
+ if (r < 0)
+ return log_error_errno(r, "Could not load configuration files: %m");
+
+ r = manager_enumerate(m);
+ if (r < 0)
+ return r;
+
+ r = manager_start(m);
+ if (r < 0)
+ return log_error_errno(r, "Could not start manager: %m");
+
+ log_info("Enumeration completed");
+
+ notify_message = notify_start(NOTIFY_READY, NOTIFY_STOPPING);
+
+ r = sd_event_loop(m->event);
+ if (r < 0)
+ return log_error_errno(r, "Event loop failed: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/network/networkd.conf b/src/network/networkd.conf
new file mode 100644
index 0000000..5339e5e
--- /dev/null
+++ b/src/network/networkd.conf
@@ -0,0 +1,21 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See networkd.conf(5) for details
+
+[Network]
+#SpeedMeter=no
+#SpeedMeterIntervalSec=10sec
+#ManageForeignRoutes=yes
+
+[DHCP]
+#DUIDType=vendor
+#DUIDRawData=
diff --git a/src/network/org.freedesktop.network1.conf b/src/network/org.freedesktop.network1.conf
new file mode 100644
index 0000000..366c630
--- /dev/null
+++ b/src/network/org.freedesktop.network1.conf
@@ -0,0 +1,27 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="systemd-network">
+ <allow own="org.freedesktop.network1"/>
+ <allow send_destination="org.freedesktop.network1"/>
+ <allow receive_sender="org.freedesktop.network1"/>
+ </policy>
+
+ <policy context="default">
+ <allow send_destination="org.freedesktop.network1"/>
+ <allow receive_sender="org.freedesktop.network1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/network/org.freedesktop.network1.policy b/src/network/org.freedesktop.network1.policy
new file mode 100644
index 0000000..9e27f72
--- /dev/null
+++ b/src/network/org.freedesktop.network1.policy
@@ -0,0 +1,186 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.network1.set-ntp-servers">
+ <description gettext-domain="systemd">Set NTP servers</description>
+ <message gettext-domain="systemd">Authentication is required to set NTP servers.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-network</annotate>
+ </action>
+
+ <action id="org.freedesktop.network1.set-dns-servers">
+ <description gettext-domain="systemd">Set DNS servers</description>
+ <message gettext-domain="systemd">Authentication is required to set DNS servers.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-network</annotate>
+ </action>
+
+ <action id="org.freedesktop.network1.set-domains">
+ <description gettext-domain="systemd">Set domains</description>
+ <message gettext-domain="systemd">Authentication is required to set domains.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-network</annotate>
+ </action>
+
+ <action id="org.freedesktop.network1.set-default-route">
+ <description gettext-domain="systemd">Set default route</description>
+ <message gettext-domain="systemd">Authentication is required to set default route.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-network</annotate>
+ </action>
+
+ <action id="org.freedesktop.network1.set-llmnr">
+ <description gettext-domain="systemd">Enable/disable LLMNR</description>
+ <message gettext-domain="systemd">Authentication is required to enable or disable LLMNR.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-network</annotate>
+ </action>
+
+ <action id="org.freedesktop.network1.set-mdns">
+ <description gettext-domain="systemd">Enable/disable multicast DNS</description>
+ <message gettext-domain="systemd">Authentication is required to enable or disable multicast DNS.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-network</annotate>
+ </action>
+
+ <action id="org.freedesktop.network1.set-dns-over-tls">
+ <description gettext-domain="systemd">Enable/disable DNS over TLS</description>
+ <message gettext-domain="systemd">Authentication is required to enable or disable DNS over TLS.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-network</annotate>
+ </action>
+
+ <action id="org.freedesktop.network1.set-dnssec">
+ <description gettext-domain="systemd">Enable/disable DNSSEC</description>
+ <message gettext-domain="systemd">Authentication is required to enable or disable DNSSEC.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-network</annotate>
+ </action>
+
+ <action id="org.freedesktop.network1.set-dnssec-negative-trust-anchors">
+ <description gettext-domain="systemd">Set DNSSEC Negative Trust Anchors</description>
+ <message gettext-domain="systemd">Authentication is required to set DNSSEC Negative Trust Anchors.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-network</annotate>
+ </action>
+
+ <action id="org.freedesktop.network1.revert-ntp">
+ <description gettext-domain="systemd">Revert NTP settings</description>
+ <message gettext-domain="systemd">Authentication is required to reset NTP settings.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-network</annotate>
+ </action>
+
+ <action id="org.freedesktop.network1.revert-dns">
+ <description gettext-domain="systemd">Revert DNS settings</description>
+ <message gettext-domain="systemd">Authentication is required to reset DNS settings.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-network</annotate>
+ </action>
+
+ <action id="org.freedesktop.network1.forcerenew">
+ <description gettext-domain="systemd">DHCP server sends force renew message</description>
+ <message gettext-domain="systemd">Authentication is required to send force renew message.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-network</annotate>
+ </action>
+
+ <action id="org.freedesktop.network1.renew">
+ <description gettext-domain="systemd">Renew dynamic addresses</description>
+ <message gettext-domain="systemd">Authentication is required to renew dynamic addresses.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-network</annotate>
+ </action>
+
+ <action id="org.freedesktop.network1.reload">
+ <description gettext-domain="systemd">Reload network settings</description>
+ <message gettext-domain="systemd">Authentication is required to reload network settings.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-network</annotate>
+ </action>
+
+ <action id="org.freedesktop.network1.reconfigure">
+ <description gettext-domain="systemd">Reconfigure network interface</description>
+ <message gettext-domain="systemd">Authentication is required to reconfigure network interface.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-network</annotate>
+ </action>
+
+</policyconfig>
diff --git a/src/network/org.freedesktop.network1.service b/src/network/org.freedesktop.network1.service
new file mode 100644
index 0000000..ddbf3eb
--- /dev/null
+++ b/src/network/org.freedesktop.network1.service
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.network1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.network1.service
diff --git a/src/network/systemd-networkd.pkla b/src/network/systemd-networkd.pkla
new file mode 100644
index 0000000..4d1bb45
--- /dev/null
+++ b/src/network/systemd-networkd.pkla
@@ -0,0 +1,4 @@
+[Allow systemd-networkd to set timezone and transient hostname]
+Identity=unix-user:systemd-network
+Action=org.freedesktop.hostname1.set-hostname;org.freedesktop.hostname1.get-product-uuid;org.freedesktop.timedate1.set-timezone;
+ResultAny=yes
diff --git a/src/network/systemd-networkd.rules b/src/network/systemd-networkd.rules
new file mode 100644
index 0000000..b9077c1
--- /dev/null
+++ b/src/network/systemd-networkd.rules
@@ -0,0 +1,10 @@
+// Allow systemd-networkd to set timezone, get product UUID,
+// and transient hostname
+polkit.addRule(function(action, subject) {
+ if ((action.id == "org.freedesktop.hostname1.set-hostname" ||
+ action.id == "org.freedesktop.hostname1.get-product-uuid" ||
+ action.id == "org.freedesktop.timedate1.set-timezone") &&
+ subject.user == "systemd-network") {
+ return polkit.Result.YES;
+ }
+});
diff --git a/src/network/tc/cake.c b/src/network/tc/cake.c
new file mode 100644
index 0000000..76fb718
--- /dev/null
+++ b/src/network/tc/cake.c
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+
+#include "alloc-util.h"
+#include "cake.h"
+#include "conf-parser.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "qdisc.h"
+#include "string-util.h"
+
+static int cake_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) {
+ CommonApplicationsKeptEnhanced *c;
+ int r;
+
+ assert(link);
+ assert(qdisc);
+ assert(req);
+
+ c = CAKE(qdisc);
+
+ r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "cake");
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open container TCA_OPTIONS: %m");
+
+ if (c->bandwidth > 0) {
+ r = sd_netlink_message_append_u64(req, TCA_CAKE_BASE_RATE64, c->bandwidth);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_CAKE_BASE_RATE64 attribute: %m");
+ }
+
+ r = sd_netlink_message_append_s32(req, TCA_CAKE_OVERHEAD, c->overhead);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_CAKE_OVERHEAD attribute: %m");
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close container TCA_OPTIONS: %m");
+
+ return 0;
+}
+
+int config_parse_cake_bandwidth(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ CommonApplicationsKeptEnhanced *c;
+ Network *network = data;
+ uint64_t k;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_CAKE, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ c = CAKE(qdisc);
+
+ if (isempty(rvalue)) {
+ c->bandwidth = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_size(rvalue, 1000, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ c->bandwidth = k/8;
+ qdisc = NULL;
+
+ return 0;
+}
+
+int config_parse_cake_overhead(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ CommonApplicationsKeptEnhanced *c;
+ Network *network = data;
+ int32_t v;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_CAKE, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ c = CAKE(qdisc);
+
+ if (isempty(rvalue)) {
+ c->overhead = 0;
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = safe_atoi32(rvalue, &v);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+ if (v < -64 || v > 256) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ c->overhead = v;
+ qdisc = NULL;
+ return 0;
+}
+
+const QDiscVTable cake_vtable = {
+ .object_size = sizeof(CommonApplicationsKeptEnhanced),
+ .tca_kind = "cake",
+ .fill_message = cake_fill_message,
+};
diff --git a/src/network/tc/cake.h b/src/network/tc/cake.h
new file mode 100644
index 0000000..1da28b7
--- /dev/null
+++ b/src/network/tc/cake.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+#pragma once
+
+#include "conf-parser.h"
+#include "qdisc.h"
+
+typedef struct CommonApplicationsKeptEnhanced {
+ QDisc meta;
+
+ int overhead;
+ uint64_t bandwidth;
+
+} CommonApplicationsKeptEnhanced;
+
+DEFINE_QDISC_CAST(CAKE, CommonApplicationsKeptEnhanced);
+extern const QDiscVTable cake_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_cake_bandwidth);
+CONFIG_PARSER_PROTOTYPE(config_parse_cake_overhead);
diff --git a/src/network/tc/codel.c b/src/network/tc/codel.c
new file mode 100644
index 0000000..807c247
--- /dev/null
+++ b/src/network/tc/codel.c
@@ -0,0 +1,255 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "qdisc.h"
+#include "string-util.h"
+
+static int controlled_delay_init(QDisc *qdisc) {
+ ControlledDelay *cd;
+
+ assert(qdisc);
+
+ cd = CODEL(qdisc);
+
+ cd->ce_threshold_usec = USEC_INFINITY;
+ cd->ecn = -1;
+
+ return 0;
+}
+
+static int controlled_delay_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) {
+ ControlledDelay *cd;
+ int r;
+
+ assert(link);
+ assert(qdisc);
+ assert(req);
+
+ cd = CODEL(qdisc);
+
+ r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "codel");
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open container TCA_OPTIONS: %m");
+
+ if (cd->packet_limit > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_CODEL_LIMIT, cd->packet_limit);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_CODEL_LIMIT attribute: %m");
+ }
+
+ if (cd->interval_usec > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_CODEL_INTERVAL, cd->interval_usec);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_CODEL_INTERVAL attribute: %m");
+ }
+
+ if (cd->target_usec > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_CODEL_TARGET, cd->target_usec);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_CODEL_TARGET attribute: %m");
+ }
+
+ if (cd->ecn >= 0) {
+ r = sd_netlink_message_append_u32(req, TCA_CODEL_ECN, cd->ecn);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_CODEL_ECN attribute: %m");
+ }
+
+ if (cd->ce_threshold_usec != USEC_INFINITY) {
+ r = sd_netlink_message_append_u32(req, TCA_CODEL_CE_THRESHOLD, cd->ce_threshold_usec);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_CODEL_CE_THRESHOLD attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close container TCA_OPTIONS: %m");
+
+ return 0;
+}
+
+int config_parse_controlled_delay_u32(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ ControlledDelay *cd;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_CODEL, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ cd = CODEL(qdisc);
+
+ if (isempty(rvalue)) {
+ cd->packet_limit = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = safe_atou32(rvalue, &cd->packet_limit);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ qdisc = NULL;
+
+ return 0;
+}
+
+int config_parse_controlled_delay_usec(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ ControlledDelay *cd;
+ Network *network = data;
+ usec_t *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_CODEL, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ cd = CODEL(qdisc);
+
+ if (streq(lvalue, "TargetSec"))
+ p = &cd->target_usec;
+ else if (streq(lvalue, "IntervalSec"))
+ p = &cd->interval_usec;
+ else if (streq(lvalue, "CEThresholdSec"))
+ p = &cd->ce_threshold_usec;
+ else
+ assert_not_reached("Invalid lvalue");
+
+ if (isempty(rvalue)) {
+ if (streq(lvalue, "CEThresholdSec"))
+ *p = USEC_INFINITY;
+ else
+ *p = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_sec(rvalue, p);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ qdisc = NULL;
+
+ return 0;
+}
+
+int config_parse_controlled_delay_bool(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ ControlledDelay *cd;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_CODEL, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ cd = CODEL(qdisc);
+
+ if (isempty(rvalue)) {
+ cd->ecn = -1;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ cd->ecn = r;
+ qdisc = NULL;
+
+ return 0;
+}
+
+const QDiscVTable codel_vtable = {
+ .object_size = sizeof(ControlledDelay),
+ .tca_kind = "codel",
+ .init = controlled_delay_init,
+ .fill_message = controlled_delay_fill_message,
+};
diff --git a/src/network/tc/codel.h b/src/network/tc/codel.h
new file mode 100644
index 0000000..4fe5283
--- /dev/null
+++ b/src/network/tc/codel.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+#pragma once
+
+#include "conf-parser.h"
+#include "qdisc.h"
+#include "time-util.h"
+
+typedef struct ControlledDelay {
+ QDisc meta;
+
+ uint32_t packet_limit;
+ usec_t interval_usec;
+ usec_t target_usec;
+ usec_t ce_threshold_usec;
+ int ecn;
+} ControlledDelay;
+
+DEFINE_QDISC_CAST(CODEL, ControlledDelay);
+extern const QDiscVTable codel_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_controlled_delay_u32);
+CONFIG_PARSER_PROTOTYPE(config_parse_controlled_delay_usec);
+CONFIG_PARSER_PROTOTYPE(config_parse_controlled_delay_bool);
diff --git a/src/network/tc/drr.c b/src/network/tc/drr.c
new file mode 100644
index 0000000..86b7f43
--- /dev/null
+++ b/src/network/tc/drr.c
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "drr.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+const QDiscVTable drr_vtable = {
+ .object_size = sizeof(DeficitRoundRobinScheduler),
+ .tca_kind = "drr",
+};
+
+static int drr_class_fill_message(Link *link, TClass *tclass, sd_netlink_message *req) {
+ DeficitRoundRobinSchedulerClass *drr;
+ int r;
+
+ assert(link);
+ assert(tclass);
+ assert(req);
+
+ drr = TCLASS_TO_DRR(tclass);
+
+ r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "drr");
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open container TCA_OPTIONS: %m");
+
+ if (drr->quantum > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_DRR_QUANTUM, drr->quantum);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_DRR_QUANTUM, attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close container TCA_OPTIONS: %m");
+
+ return 0;
+}
+
+int config_parse_drr_size(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(tclass_free_or_set_invalidp) TClass *tclass = NULL;
+ DeficitRoundRobinSchedulerClass *drr;
+ Network *network = data;
+ uint64_t u;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = tclass_new_static(TCLASS_KIND_DRR, network, filename, section_line, &tclass);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to create traffic control class, ignoring assignment: %m");
+ return 0;
+ }
+
+ drr = TCLASS_TO_DRR(tclass);
+
+ if (isempty(rvalue)) {
+ drr->quantum = 0;
+
+ tclass = NULL;
+ return 0;
+ }
+
+ r = parse_size(rvalue, 1024, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+ if (u > UINT32_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ drr->quantum = (uint32_t) u;
+
+ tclass = NULL;
+ return 0;
+}
+
+const TClassVTable drr_tclass_vtable = {
+ .object_size = sizeof(DeficitRoundRobinSchedulerClass),
+ .tca_kind = "drr",
+ .fill_message = drr_class_fill_message,
+};
diff --git a/src/network/tc/drr.h b/src/network/tc/drr.h
new file mode 100644
index 0000000..c96cc4d
--- /dev/null
+++ b/src/network/tc/drr.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+#pragma once
+
+#include "qdisc.h"
+
+typedef struct DeficitRoundRobinScheduler {
+ QDisc meta;
+} DeficitRoundRobinScheduler;
+
+DEFINE_QDISC_CAST(DRR, DeficitRoundRobinScheduler);
+extern const QDiscVTable drr_vtable;
+
+typedef struct DeficitRoundRobinSchedulerClass {
+ TClass meta;
+
+ uint32_t quantum;
+} DeficitRoundRobinSchedulerClass;
+
+DEFINE_TCLASS_CAST(DRR, DeficitRoundRobinSchedulerClass);
+extern const TClassVTable drr_tclass_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_drr_size);
diff --git a/src/network/tc/ets.c b/src/network/tc/ets.c
new file mode 100644
index 0000000..8214a57
--- /dev/null
+++ b/src/network/tc/ets.c
@@ -0,0 +1,344 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/pkt_sched.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "ets.h"
+#include "memory-util.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "qdisc.h"
+#include "string-util.h"
+#include "tc-util.h"
+
+static int enhanced_transmission_selection_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) {
+ EnhancedTransmissionSelection *ets;
+ int r;
+
+ assert(link);
+ assert(qdisc);
+ assert(req);
+
+ ets = ETS(qdisc);
+
+ r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "ets");
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open container TCA_OPTIONS: %m");
+
+ r = sd_netlink_message_append_u8(req, TCA_ETS_NBANDS, ets->n_bands);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_ETS_NBANDS attribute: %m");
+
+ if (ets->n_strict > 0) {
+ r = sd_netlink_message_append_u8(req, TCA_ETS_NSTRICT, ets->n_strict);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_ETS_NSTRICT attribute: %m");
+ }
+
+ if (ets->n_quanta > 0) {
+ r = sd_netlink_message_open_container(req, TCA_ETS_QUANTA);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open container TCA_ETS_QUANTA: %m");
+
+ for (unsigned i = 0; i < ets->n_quanta; i++) {
+ r = sd_netlink_message_append_u32(req, TCA_ETS_QUANTA_BAND, ets->quanta[i]);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_ETS_QUANTA_BAND attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close container TCA_ETS_QUANTA: %m");
+ }
+
+ if (ets->n_prio > 0) {
+ r = sd_netlink_message_open_container(req, TCA_ETS_PRIOMAP);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open container TCA_ETS_PRIOMAP: %m");
+
+ for (unsigned i = 0; i < ets->n_prio; i++) {
+ r = sd_netlink_message_append_u8(req, TCA_ETS_PRIOMAP_BAND, ets->prio[i]);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_ETS_PRIOMAP_BAND attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close container TCA_ETS_PRIOMAP: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close container TCA_OPTIONS: %m");
+
+ return 0;
+}
+
+int config_parse_ets_u8(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ EnhancedTransmissionSelection *ets;
+ Network *network = data;
+ uint8_t v, *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_ETS, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ ets = ETS(qdisc);
+ if (streq(lvalue, "Bands"))
+ p = &ets->n_bands;
+ else if (streq(lvalue, "StrictBands"))
+ p = &ets->n_strict;
+ else
+ assert_not_reached("Invalid lvalue.");
+
+ if (isempty(rvalue)) {
+ *p = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = safe_atou8(rvalue, &v);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+ if (v > TCQ_ETS_MAX_BANDS) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid '%s='. The value must be <= %d, ignoring assignment: %s",
+ lvalue, TCQ_ETS_MAX_BANDS, rvalue);
+ return 0;
+ }
+
+ *p = v;
+ qdisc = NULL;
+
+ return 0;
+}
+
+int config_parse_ets_quanta(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ EnhancedTransmissionSelection *ets;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_ETS, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ ets = ETS(qdisc);
+
+ if (isempty(rvalue)) {
+ memzero(ets->quanta, sizeof(uint32_t) * TCQ_ETS_MAX_BANDS);
+ ets->n_quanta = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL;
+ uint64_t v;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to extract next value, ignoring: %m");
+ break;
+ }
+ if (r == 0)
+ break;
+
+ r = parse_size(word, 1024, &v);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, word);
+ continue;
+ }
+ if (v == 0 || v > UINT32_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid '%s=', ignoring assignment: %s",
+ lvalue, word);
+ continue;
+ }
+ if (ets->n_quanta >= TCQ_ETS_MAX_BANDS) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Too many quanta in '%s=', ignoring assignment: %s",
+ lvalue, word);
+ continue;
+ }
+
+ ets->quanta[ets->n_quanta++] = v;
+ }
+
+ qdisc = NULL;
+
+ return 0;
+}
+
+int config_parse_ets_prio(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ EnhancedTransmissionSelection *ets;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_ETS, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ ets = ETS(qdisc);
+
+ if (isempty(rvalue)) {
+ memzero(ets->prio, sizeof(uint8_t) * (TC_PRIO_MAX + 1));
+ ets->n_prio = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL;
+ uint8_t v;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to extract next value, ignoring: %m");
+ break;
+ }
+ if (r == 0)
+ break;
+
+ r = safe_atou8(word, &v);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, word);
+ continue;
+ }
+ if (ets->n_prio > TC_PRIO_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Too many priomap in '%s=', ignoring assignment: %s",
+ lvalue, word);
+ continue;
+ }
+
+ ets->prio[ets->n_prio++] = v;
+ }
+
+ qdisc = NULL;
+
+ return 0;
+}
+
+static int enhanced_transmission_selection_verify(QDisc *qdisc) {
+ EnhancedTransmissionSelection *ets;
+
+ assert(qdisc);
+
+ ets = ETS(qdisc);
+
+ if (ets->n_bands == 0)
+ ets->n_bands = ets->n_strict + ets->n_quanta;
+
+ if (ets->n_bands == 0)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: At least one of Band=, Strict=, or Quanta= must be specified. "
+ "Ignoring [EnhancedTransmissionSelection] section from line %u.",
+ qdisc->section->filename, qdisc->section->line);
+
+ if (ets->n_bands < ets->n_strict + ets->n_quanta)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: Not enough total bands to cover all the strict bands and quanta. "
+ "Ignoring [EnhancedTransmissionSelection] section from line %u.",
+ qdisc->section->filename, qdisc->section->line);
+
+ for (unsigned i = 0; i < ets->n_prio; i++)
+ if (ets->prio[i] >= ets->n_bands)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: PriorityMap= element is out of bands. "
+ "Ignoring [EnhancedTransmissionSelection] section from line %u.",
+ qdisc->section->filename, qdisc->section->line);
+
+ return 0;
+}
+
+const QDiscVTable ets_vtable = {
+ .object_size = sizeof(EnhancedTransmissionSelection),
+ .tca_kind = "ets",
+ .fill_message = enhanced_transmission_selection_fill_message,
+ .verify = enhanced_transmission_selection_verify,
+};
diff --git a/src/network/tc/ets.h b/src/network/tc/ets.h
new file mode 100644
index 0000000..b6dd428
--- /dev/null
+++ b/src/network/tc/ets.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/pkt_sched.h>
+
+#include "conf-parser.h"
+#include "qdisc.h"
+
+typedef struct EnhancedTransmissionSelection {
+ QDisc meta;
+
+ uint8_t n_bands;
+ uint8_t n_strict;
+ unsigned n_quanta;
+ uint32_t quanta[TCQ_ETS_MAX_BANDS];
+ unsigned n_prio;
+ uint8_t prio[TC_PRIO_MAX + 1];
+} EnhancedTransmissionSelection;
+
+DEFINE_QDISC_CAST(ETS, EnhancedTransmissionSelection);
+extern const QDiscVTable ets_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_ets_u8);
+CONFIG_PARSER_PROTOTYPE(config_parse_ets_quanta);
+CONFIG_PARSER_PROTOTYPE(config_parse_ets_prio);
diff --git a/src/network/tc/fifo.c b/src/network/tc/fifo.c
new file mode 100644
index 0000000..8b1fa6e
--- /dev/null
+++ b/src/network/tc/fifo.c
@@ -0,0 +1,187 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "fifo.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+static int fifo_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) {
+ struct tc_fifo_qopt opt = {};
+ FirstInFirstOut *fifo;
+ int r;
+
+ assert(link);
+ assert(qdisc);
+ assert(req);
+
+ switch(qdisc->kind) {
+ case QDISC_KIND_PFIFO:
+ fifo = PFIFO(qdisc);
+ break;
+ case QDISC_KIND_BFIFO:
+ fifo = BFIFO(qdisc);
+ break;
+ case QDISC_KIND_PFIFO_HEAD_DROP:
+ fifo = PFIFO_HEAD_DROP(qdisc);
+ break;
+ default:
+ assert_not_reached("Invalid QDisc kind.");
+ }
+
+ opt.limit = fifo->limit;
+
+ r = sd_netlink_message_append_data(req, TCA_OPTIONS, &opt, sizeof(struct tc_fifo_qopt));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_OPTIONS attribute: %m");
+
+ return 0;
+}
+
+int config_parse_pfifo_size(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ Network *network = data;
+ FirstInFirstOut *fifo;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(ltype, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ switch(qdisc->kind) {
+ case QDISC_KIND_PFIFO:
+ fifo = PFIFO(qdisc);
+ break;
+ case QDISC_KIND_PFIFO_HEAD_DROP:
+ fifo = PFIFO_HEAD_DROP(qdisc);
+ break;
+ default:
+ assert_not_reached("Invalid QDisc kind.");
+ }
+
+ if (isempty(rvalue)) {
+ fifo->limit = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = safe_atou32(rvalue, &fifo->limit);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ qdisc = NULL;
+ return 0;
+}
+
+int config_parse_bfifo_size(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ Network *network = data;
+ FirstInFirstOut *fifo;
+ uint64_t u;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_BFIFO, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ fifo = BFIFO(qdisc);
+
+ if (isempty(rvalue)) {
+ fifo->limit = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_size(rvalue, 1024, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+ if (u > UINT32_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ fifo->limit = (uint32_t) u;
+
+ qdisc = NULL;
+ return 0;
+}
+
+const QDiscVTable pfifo_vtable = {
+ .object_size = sizeof(FirstInFirstOut),
+ .tca_kind = "pfifo",
+ .fill_message = fifo_fill_message,
+};
+
+const QDiscVTable bfifo_vtable = {
+ .object_size = sizeof(FirstInFirstOut),
+ .tca_kind = "bfifo",
+ .fill_message = fifo_fill_message,
+};
+
+const QDiscVTable pfifo_head_drop_vtable = {
+ .object_size = sizeof(FirstInFirstOut),
+ .tca_kind = "pfifo_head_drop",
+ .fill_message = fifo_fill_message,
+};
+
+const QDiscVTable pfifo_fast_vtable = {
+ .object_size = sizeof(FirstInFirstOut),
+ .tca_kind = "pfifo_fast",
+};
diff --git a/src/network/tc/fifo.h b/src/network/tc/fifo.h
new file mode 100644
index 0000000..b9bbd09
--- /dev/null
+++ b/src/network/tc/fifo.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+#pragma once
+
+#include "conf-parser.h"
+#include "qdisc.h"
+
+typedef struct FirstInFirstOut {
+ QDisc meta;
+
+ uint32_t limit;
+} FirstInFirstOut;
+
+DEFINE_QDISC_CAST(PFIFO, FirstInFirstOut);
+DEFINE_QDISC_CAST(BFIFO, FirstInFirstOut);
+DEFINE_QDISC_CAST(PFIFO_HEAD_DROP, FirstInFirstOut);
+DEFINE_QDISC_CAST(PFIFO_FAST, FirstInFirstOut);
+
+extern const QDiscVTable pfifo_vtable;
+extern const QDiscVTable bfifo_vtable;
+extern const QDiscVTable pfifo_head_drop_vtable;
+extern const QDiscVTable pfifo_fast_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_pfifo_size);
+CONFIG_PARSER_PROTOTYPE(config_parse_bfifo_size);
diff --git a/src/network/tc/fq-codel.c b/src/network/tc/fq-codel.c
new file mode 100644
index 0000000..958f65a
--- /dev/null
+++ b/src/network/tc/fq-codel.c
@@ -0,0 +1,355 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "qdisc.h"
+#include "string-util.h"
+#include "strv.h"
+
+static int fair_queueing_controlled_delay_init(QDisc *qdisc) {
+ FairQueueingControlledDelay *fqcd;
+
+ assert(qdisc);
+
+ fqcd = FQ_CODEL(qdisc);
+
+ fqcd->memory_limit = UINT32_MAX;
+ fqcd->ce_threshold_usec = USEC_INFINITY;
+ fqcd->ecn = -1;
+
+ return 0;
+}
+
+static int fair_queueing_controlled_delay_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) {
+ FairQueueingControlledDelay *fqcd;
+ int r;
+
+ assert(link);
+ assert(qdisc);
+ assert(req);
+
+ fqcd = FQ_CODEL(qdisc);
+
+ r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "fq_codel");
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open container TCA_OPTIONS: %m");
+
+ if (fqcd->packet_limit > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_LIMIT, fqcd->packet_limit);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_FQ_CODEL_LIMIT attribute: %m");
+ }
+
+ if (fqcd->flows > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_FLOWS, fqcd->flows);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_FQ_CODEL_FLOWS attribute: %m");
+ }
+
+ if (fqcd->quantum > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_QUANTUM, fqcd->quantum);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_FQ_CODEL_QUANTUM attribute: %m");
+ }
+
+ if (fqcd->interval_usec > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_INTERVAL, fqcd->interval_usec);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_FQ_CODEL_INTERVAL attribute: %m");
+ }
+
+ if (fqcd->target_usec > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_TARGET, fqcd->target_usec);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_FQ_CODEL_TARGET attribute: %m");
+ }
+
+ if (fqcd->ecn >= 0) {
+ r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_ECN, fqcd->ecn);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_FQ_CODEL_ECN attribute: %m");
+ }
+
+ if (fqcd->ce_threshold_usec != USEC_INFINITY) {
+ r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_CE_THRESHOLD, fqcd->ce_threshold_usec);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_FQ_CODEL_CE_THRESHOLD attribute: %m");
+ }
+
+ if (fqcd->memory_limit != UINT32_MAX) {
+ r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_MEMORY_LIMIT, fqcd->memory_limit);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_FQ_CODEL_MEMORY_LIMIT attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close container TCA_OPTIONS: %m");
+
+ return 0;
+}
+
+int config_parse_fair_queueing_controlled_delay_u32(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ FairQueueingControlledDelay *fqcd;
+ Network *network = data;
+ uint32_t *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_FQ_CODEL, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ fqcd = FQ_CODEL(qdisc);
+
+ if (streq(lvalue, "PacketLimit"))
+ p = &fqcd->packet_limit;
+ else if (streq(lvalue, "Flows"))
+ p = &fqcd->flows;
+ else
+ assert_not_reached("Invalid lvalue.");
+
+ if (isempty(rvalue)) {
+ *p = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = safe_atou32(rvalue, p);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ qdisc = NULL;
+
+ return 0;
+}
+
+int config_parse_fair_queueing_controlled_delay_usec(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ FairQueueingControlledDelay *fqcd;
+ Network *network = data;
+ usec_t *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_FQ_CODEL, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ fqcd = FQ_CODEL(qdisc);
+
+ if (streq(lvalue, "TargetSec"))
+ p = &fqcd->target_usec;
+ else if (streq(lvalue, "IntervalSec"))
+ p = &fqcd->interval_usec;
+ else if (streq(lvalue, "CEThresholdSec"))
+ p = &fqcd->ce_threshold_usec;
+ else
+ assert_not_reached("Invalid lvalue.");
+
+ if (isempty(rvalue)) {
+ if (streq(lvalue, "CEThresholdSec"))
+ *p = USEC_INFINITY;
+ else
+ *p = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_sec(rvalue, p);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ qdisc = NULL;
+
+ return 0;
+}
+
+int config_parse_fair_queueing_controlled_delay_bool(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ FairQueueingControlledDelay *fqcd;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_FQ_CODEL, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ fqcd = FQ_CODEL(qdisc);
+
+ if (isempty(rvalue)) {
+ fqcd->ecn = -1;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ fqcd->ecn = r;
+ qdisc = NULL;
+
+ return 0;
+}
+
+int config_parse_fair_queueing_controlled_delay_size(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ FairQueueingControlledDelay *fqcd;
+ Network *network = data;
+ uint64_t sz;
+ uint32_t *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_FQ_CODEL, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ fqcd = FQ_CODEL(qdisc);
+
+ if (STR_IN_SET(lvalue, "MemoryLimitBytes", "MemoryLimit"))
+ p = &fqcd->memory_limit;
+ else if (STR_IN_SET(lvalue, "QuantumBytes", "Quantum"))
+ p = &fqcd->quantum;
+ else
+ assert_not_reached("Invalid lvalue.");
+
+ if (isempty(rvalue)) {
+ if (STR_IN_SET(lvalue, "MemoryLimitBytes", "MemoryLimit"))
+ *p = UINT32_MAX;
+ else
+ *p = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_size(rvalue, 1024, &sz);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+ if (sz >= UINT32_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Specified '%s=' is too large, ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ *p = sz;
+ qdisc = NULL;
+
+ return 0;
+}
+
+const QDiscVTable fq_codel_vtable = {
+ .object_size = sizeof(FairQueueingControlledDelay),
+ .tca_kind = "fq_codel",
+ .init = fair_queueing_controlled_delay_init,
+ .fill_message = fair_queueing_controlled_delay_fill_message,
+};
diff --git a/src/network/tc/fq-codel.h b/src/network/tc/fq-codel.h
new file mode 100644
index 0000000..2553c59
--- /dev/null
+++ b/src/network/tc/fq-codel.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+#pragma once
+
+#include "conf-parser.h"
+#include "qdisc.h"
+#include "time-util.h"
+
+typedef struct FairQueueingControlledDelay {
+ QDisc meta;
+
+ uint32_t packet_limit;
+ uint32_t flows;
+ uint32_t quantum;
+ uint32_t memory_limit;
+ usec_t target_usec;
+ usec_t interval_usec;
+ usec_t ce_threshold_usec;
+ int ecn;
+} FairQueueingControlledDelay;
+
+DEFINE_QDISC_CAST(FQ_CODEL, FairQueueingControlledDelay);
+extern const QDiscVTable fq_codel_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_fair_queueing_controlled_delay_u32);
+CONFIG_PARSER_PROTOTYPE(config_parse_fair_queueing_controlled_delay_usec);
+CONFIG_PARSER_PROTOTYPE(config_parse_fair_queueing_controlled_delay_bool);
+CONFIG_PARSER_PROTOTYPE(config_parse_fair_queueing_controlled_delay_size);
diff --git a/src/network/tc/fq-pie.c b/src/network/tc/fq-pie.c
new file mode 100644
index 0000000..c7d7623
--- /dev/null
+++ b/src/network/tc/fq-pie.c
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "fq-pie.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+static int fq_pie_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) {
+ FlowQueuePIE *fq_pie;
+ int r;
+
+ assert(link);
+ assert(qdisc);
+ assert(req);
+
+ fq_pie = FQ_PIE(qdisc);
+
+ r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "fq_pie");
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open container TCA_OPTIONS: %m");
+
+ if (fq_pie->packet_limit > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_FQ_PIE_LIMIT, fq_pie->packet_limit);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_FQ_PIE_PLIMIT attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close container TCA_OPTIONS: %m");
+
+ return 0;
+}
+
+int config_parse_fq_pie_packet_limit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ FlowQueuePIE *fq_pie;
+ Network *network = data;
+ uint32_t val;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_FQ_PIE, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+
+ fq_pie = FQ_PIE(qdisc);
+
+ if (isempty(rvalue)) {
+ fq_pie->packet_limit = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = safe_atou32(rvalue, &val);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+ if (val == 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ fq_pie->packet_limit = val;
+ qdisc = NULL;
+
+ return 0;
+}
+
+const QDiscVTable fq_pie_vtable = {
+ .object_size = sizeof(FlowQueuePIE),
+ .tca_kind = "fq_pie",
+ .fill_message = fq_pie_fill_message,
+};
diff --git a/src/network/tc/fq-pie.h b/src/network/tc/fq-pie.h
new file mode 100644
index 0000000..51fb626
--- /dev/null
+++ b/src/network/tc/fq-pie.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+#pragma once
+
+#include "conf-parser.h"
+#include "qdisc.h"
+
+typedef struct FlowQueuePIE {
+ QDisc meta;
+
+ uint32_t packet_limit;
+} FlowQueuePIE;
+
+DEFINE_QDISC_CAST(FQ_PIE, FlowQueuePIE);
+extern const QDiscVTable fq_pie_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_fq_pie_packet_limit);
diff --git a/src/network/tc/fq.c b/src/network/tc/fq.c
new file mode 100644
index 0000000..d48aea8
--- /dev/null
+++ b/src/network/tc/fq.c
@@ -0,0 +1,420 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "fq.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+static int fair_queueing_init(QDisc *qdisc) {
+ FairQueueing *fq;
+
+ assert(qdisc);
+
+ fq = FQ(qdisc);
+
+ fq->pacing = -1;
+ fq->ce_threshold_usec = USEC_INFINITY;
+
+ return 0;
+}
+
+static int fair_queueing_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) {
+ FairQueueing *fq;
+ int r;
+
+ assert(link);
+ assert(qdisc);
+ assert(req);
+
+ fq = FQ(qdisc);
+
+ r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "fq");
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open container TCA_OPTIONS: %m");
+
+ if (fq->packet_limit > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_FQ_PLIMIT, fq->packet_limit);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_FQ_PLIMIT attribute: %m");
+ }
+
+ if (fq->flow_limit > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_FQ_FLOW_PLIMIT, fq->flow_limit);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_FQ_FLOW_PLIMIT attribute: %m");
+ }
+
+ if (fq->quantum > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_FQ_QUANTUM, fq->quantum);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_FQ_QUANTUM attribute: %m");
+ }
+
+ if (fq->initial_quantum > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_FQ_INITIAL_QUANTUM, fq->initial_quantum);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_FQ_INITIAL_QUANTUM attribute: %m");
+ }
+
+ if (fq->pacing >= 0) {
+ r = sd_netlink_message_append_u32(req, TCA_FQ_RATE_ENABLE, fq->pacing);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_FQ_RATE_ENABLE attribute: %m");
+ }
+
+ if (fq->max_rate > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_FQ_FLOW_MAX_RATE, fq->max_rate);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_FQ_FLOW_MAX_RATE attribute: %m");
+ }
+
+ if (fq->buckets > 0) {
+ uint32_t l;
+
+ l = log2u(fq->buckets);
+ r = sd_netlink_message_append_u32(req, TCA_FQ_BUCKETS_LOG, l);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_FQ_BUCKETS_LOG attribute: %m");
+ }
+
+ if (fq->orphan_mask > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_FQ_ORPHAN_MASK, fq->orphan_mask);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_FQ_ORPHAN_MASK attribute: %m");
+ }
+
+ if (fq->ce_threshold_usec != USEC_INFINITY) {
+ r = sd_netlink_message_append_u32(req, TCA_FQ_CE_THRESHOLD, fq->ce_threshold_usec);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_FQ_CE_THRESHOLD attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close container TCA_OPTIONS: %m");
+
+ return 0;
+}
+
+int config_parse_fair_queueing_u32(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ FairQueueing *fq;
+ Network *network = data;
+ uint32_t *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_FQ, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ fq = FQ(qdisc);
+
+ if (streq(lvalue, "PacketLimit"))
+ p = &fq->packet_limit;
+ else if (streq(lvalue, "FlowLimit"))
+ p = &fq->flow_limit;
+ else if (streq(lvalue, "Buckets"))
+ p = &fq->buckets;
+ else if (streq(lvalue, "OrphanMask"))
+ p = &fq->orphan_mask;
+ else
+ assert_not_reached("Invalid lvalue");
+
+ if (isempty(rvalue)) {
+ *p = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = safe_atou32(rvalue, p);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ qdisc = NULL;
+
+ return 0;
+}
+
+int config_parse_fair_queueing_size(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ FairQueueing *fq;
+ Network *network = data;
+ uint64_t sz;
+ uint32_t *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_FQ, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ fq = FQ(qdisc);
+
+ if (STR_IN_SET(lvalue, "QuantumBytes", "Quantum"))
+ p = &fq->quantum;
+ else if (STR_IN_SET(lvalue, "InitialQuantumBytes", "InitialQuantum"))
+ p = &fq->initial_quantum;
+ else
+ assert_not_reached("Invalid lvalue");
+
+ if (isempty(rvalue)) {
+ *p = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_size(rvalue, 1024, &sz);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+ if (sz > UINT32_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Specified '%s=' is too large, ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ *p = sz;
+ qdisc = NULL;
+
+ return 0;
+}
+
+int config_parse_fair_queueing_bool(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ FairQueueing *fq;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_FQ, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ fq = FQ(qdisc);
+
+ if (isempty(rvalue)) {
+ fq->pacing = -1;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ fq->pacing = r;
+ qdisc = NULL;
+
+ return 0;
+}
+
+int config_parse_fair_queueing_usec(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ FairQueueing *fq;
+ Network *network = data;
+ usec_t sec;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_FQ, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ fq = FQ(qdisc);
+
+ if (isempty(rvalue)) {
+ fq->ce_threshold_usec = USEC_INFINITY;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_sec(rvalue, &sec);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+ if (sec > UINT32_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Specified '%s=' is too large, ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ fq->ce_threshold_usec = sec;
+ qdisc = NULL;
+
+ return 0;
+}
+
+int config_parse_fair_queueing_max_rate(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ FairQueueing *fq;
+ Network *network = data;
+ uint64_t sz;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_FQ, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ fq = FQ(qdisc);
+
+ if (isempty(rvalue)) {
+ fq->max_rate = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_size(rvalue, 1000, &sz);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+ if (sz / 8 > UINT32_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Specified '%s=' is too large, ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ fq->max_rate = sz / 8;
+ qdisc = NULL;
+
+ return 0;
+}
+
+const QDiscVTable fq_vtable = {
+ .init = fair_queueing_init,
+ .object_size = sizeof(FairQueueing),
+ .tca_kind = "fq",
+ .fill_message = fair_queueing_fill_message,
+};
diff --git a/src/network/tc/fq.h b/src/network/tc/fq.h
new file mode 100644
index 0000000..77469c4
--- /dev/null
+++ b/src/network/tc/fq.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+#pragma once
+
+#include "conf-parser.h"
+#include "qdisc.h"
+
+typedef struct FairQueueing {
+ QDisc meta;
+
+ uint32_t packet_limit;
+ uint32_t flow_limit;
+ uint32_t quantum;
+ uint32_t initial_quantum;
+ uint32_t max_rate;
+ uint32_t buckets;
+ uint32_t orphan_mask;
+ int pacing;
+ usec_t ce_threshold_usec;
+} FairQueueing;
+
+DEFINE_QDISC_CAST(FQ, FairQueueing);
+extern const QDiscVTable fq_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_fair_queueing_u32);
+CONFIG_PARSER_PROTOTYPE(config_parse_fair_queueing_size);
+CONFIG_PARSER_PROTOTYPE(config_parse_fair_queueing_bool);
+CONFIG_PARSER_PROTOTYPE(config_parse_fair_queueing_usec);
+CONFIG_PARSER_PROTOTYPE(config_parse_fair_queueing_max_rate);
diff --git a/src/network/tc/gred.c b/src/network/tc/gred.c
new file mode 100644
index 0000000..46a9ead
--- /dev/null
+++ b/src/network/tc/gred.c
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "qdisc.h"
+#include "string-util.h"
+
+static int generic_random_early_detection_init(QDisc *qdisc) {
+ GenericRandomEarlyDetection *gred;
+
+ assert(qdisc);
+
+ gred = GRED(qdisc);
+
+ gred->grio = -1;
+
+ return 0;
+}
+
+static int generic_random_early_detection_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) {
+ GenericRandomEarlyDetection *gred;
+ struct tc_gred_sopt opt = {};
+ int r;
+
+ assert(link);
+ assert(qdisc);
+ assert(req);
+
+ gred = GRED(qdisc);
+
+ opt.DPs = gred->virtual_queues;
+ opt.def_DP = gred->default_virtual_queue;
+
+ if (gred->grio >= 0)
+ opt.grio = gred->grio;
+
+ r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "gred");
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open container TCA_OPTIONS: %m");
+
+ r = sd_netlink_message_append_data(req, TCA_GRED_DPS, &opt, sizeof(struct tc_gred_sopt));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_GRED_DPS attribute: %m");
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close container TCA_OPTIONS: %m");
+
+ return 0;
+}
+
+static int generic_random_early_detection_verify(QDisc *qdisc) {
+ GenericRandomEarlyDetection *gred = GRED(qdisc);
+
+ if (gred->default_virtual_queue >= gred->virtual_queues)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: DefaultVirtualQueue= must be less than VirtualQueues=. "
+ "Ignoring [GenericRandomEarlyDetection] section from line %u.",
+ qdisc->section->filename, qdisc->section->line);
+
+ return 0;
+}
+
+int config_parse_generic_random_early_detection_u32(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ GenericRandomEarlyDetection *gred;
+ Network *network = data;
+ uint32_t *p;
+ uint32_t v;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_GRED, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ gred = GRED(qdisc);
+
+ if (streq(lvalue, "VirtualQueues"))
+ p = &gred->virtual_queues;
+ else if (streq(lvalue, "DefaultVirtualQueue"))
+ p = &gred->default_virtual_queue;
+ else
+ assert_not_reached("Invalid lvalue.");
+
+ if (isempty(rvalue)) {
+ *p = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = safe_atou32(rvalue, &v);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ if (v > MAX_DPs)
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+
+ *p = v;
+ qdisc = NULL;
+
+ return 0;
+}
+int config_parse_generic_random_early_detection_bool(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ GenericRandomEarlyDetection *gred;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_GRED, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ gred = GRED(qdisc);
+
+ if (isempty(rvalue)) {
+ gred->grio = -1;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ gred->grio = r;
+ qdisc = NULL;
+
+ return 0;
+}
+
+const QDiscVTable gred_vtable = {
+ .object_size = sizeof(GenericRandomEarlyDetection),
+ .tca_kind = "gred",
+ .init = generic_random_early_detection_init,
+ .fill_message = generic_random_early_detection_fill_message,
+ .verify = generic_random_early_detection_verify,
+};
diff --git a/src/network/tc/gred.h b/src/network/tc/gred.h
new file mode 100644
index 0000000..c084ff1
--- /dev/null
+++ b/src/network/tc/gred.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+#pragma once
+
+#include "conf-parser.h"
+#include "qdisc.h"
+
+typedef struct GenericRandomEarlyDetection {
+ QDisc meta;
+
+ uint32_t virtual_queues;
+ uint32_t default_virtual_queue;
+ int grio;
+} GenericRandomEarlyDetection;
+
+DEFINE_QDISC_CAST(GRED, GenericRandomEarlyDetection);
+extern const QDiscVTable gred_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_generic_random_early_detection_u32);
+CONFIG_PARSER_PROTOTYPE(config_parse_generic_random_early_detection_bool);
diff --git a/src/network/tc/hhf.c b/src/network/tc/hhf.c
new file mode 100644
index 0000000..69c02f4
--- /dev/null
+++ b/src/network/tc/hhf.c
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "hhf.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "util.h"
+
+static int heavy_hitter_filter_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) {
+ HeavyHitterFilter *hhf;
+ int r;
+
+ assert(link);
+ assert(qdisc);
+ assert(req);
+
+ hhf = HHF(qdisc);
+
+ r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "hhf");
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open container TCA_OPTIONS: %m");
+
+ if (hhf->packet_limit > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_HHF_BACKLOG_LIMIT, hhf->packet_limit);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_HHF_BACKLOG_LIMIT attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close container TCA_OPTIONS: %m");
+
+ return 0;
+}
+
+int config_parse_heavy_hitter_filter_packet_limit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ HeavyHitterFilter *hhf;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_HHF, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ hhf = HHF(qdisc);
+
+ if (isempty(rvalue)) {
+ hhf->packet_limit = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = safe_atou32(rvalue, &hhf->packet_limit);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ qdisc = NULL;
+
+ return 0;
+}
+
+const QDiscVTable hhf_vtable = {
+ .object_size = sizeof(HeavyHitterFilter),
+ .tca_kind = "hhf",
+ .fill_message = heavy_hitter_filter_fill_message,
+};
diff --git a/src/network/tc/hhf.h b/src/network/tc/hhf.h
new file mode 100644
index 0000000..04caaa8
--- /dev/null
+++ b/src/network/tc/hhf.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+#pragma once
+
+#include "conf-parser.h"
+#include "qdisc.h"
+
+typedef struct HeavyHitterFilter {
+ QDisc meta;
+
+ uint32_t packet_limit;
+} HeavyHitterFilter;
+
+DEFINE_QDISC_CAST(HHF, HeavyHitterFilter);
+extern const QDiscVTable hhf_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_heavy_hitter_filter_packet_limit);
diff --git a/src/network/tc/htb.c b/src/network/tc/htb.c
new file mode 100644
index 0000000..0969587
--- /dev/null
+++ b/src/network/tc/htb.c
@@ -0,0 +1,489 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/pkt_sched.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "qdisc.h"
+#include "htb.h"
+#include "string-util.h"
+#include "tc-util.h"
+
+#define HTB_DEFAULT_RATE_TO_QUANTUM 10
+#define HTB_DEFAULT_MTU 1600 /* Ethernet packet length */
+
+static int hierarchy_token_bucket_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) {
+ HierarchyTokenBucket *htb;
+ struct tc_htb_glob opt = {
+ .version = 3,
+ };
+ int r;
+
+ assert(link);
+ assert(qdisc);
+ assert(req);
+
+ htb = HTB(qdisc);
+
+ opt.rate2quantum = htb->rate_to_quantum;
+ opt.defcls = htb->default_class;
+
+ r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "htb");
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open container TCA_OPTIONS: %m");
+
+ r = sd_netlink_message_append_data(req, TCA_HTB_INIT, &opt, sizeof(opt));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_HTB_INIT attribute: %m");
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close container TCA_OPTIONS: %m");
+ return 0;
+}
+
+int config_parse_hierarchy_token_bucket_default_class(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ HierarchyTokenBucket *htb;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_HTB, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ htb = HTB(qdisc);
+
+ if (isempty(rvalue)) {
+ htb->default_class = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = safe_atou32_full(rvalue, 16, &htb->default_class);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ qdisc = NULL;
+
+ return 0;
+}
+
+int config_parse_hierarchy_token_bucket_u32(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ HierarchyTokenBucket *htb;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_HTB, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ htb = HTB(qdisc);
+
+ if (isempty(rvalue)) {
+ htb->rate_to_quantum = HTB_DEFAULT_RATE_TO_QUANTUM;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = safe_atou32(rvalue, &htb->rate_to_quantum);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ qdisc = NULL;
+
+ return 0;
+}
+
+static int hierarchy_token_bucket_init(QDisc *qdisc) {
+ HierarchyTokenBucket *htb;
+
+ assert(qdisc);
+
+ htb = HTB(qdisc);
+
+ htb->rate_to_quantum = HTB_DEFAULT_RATE_TO_QUANTUM;
+
+ return 0;
+}
+
+const QDiscVTable htb_vtable = {
+ .object_size = sizeof(HierarchyTokenBucket),
+ .tca_kind = "htb",
+ .fill_message = hierarchy_token_bucket_fill_message,
+ .init = hierarchy_token_bucket_init,
+};
+
+static int hierarchy_token_bucket_class_fill_message(Link *link, TClass *tclass, sd_netlink_message *req) {
+ HierarchyTokenBucketClass *htb;
+ struct tc_htb_opt opt = {};
+ uint32_t rtab[256], ctab[256];
+ int r;
+
+ assert(link);
+ assert(tclass);
+ assert(req);
+
+ htb = TCLASS_TO_HTB(tclass);
+
+ opt.prio = htb->priority;
+ opt.quantum = htb->quantum;
+ opt.rate.rate = (htb->rate >= (1ULL << 32)) ? ~0U : htb->rate;
+ opt.ceil.rate = (htb->ceil_rate >= (1ULL << 32)) ? ~0U : htb->ceil_rate;
+ opt.rate.overhead = htb->overhead;
+ opt.ceil.overhead = htb->overhead;
+
+ r = tc_transmit_time(htb->rate, htb->buffer, &opt.buffer);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to calculate buffer size: %m");
+
+ r = tc_transmit_time(htb->ceil_rate, htb->ceil_buffer, &opt.cbuffer);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to calculate ceil buffer size: %m");
+
+ r = tc_fill_ratespec_and_table(&opt.rate, rtab, htb->mtu);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to calculate rate table: %m");
+
+ r = tc_fill_ratespec_and_table(&opt.ceil, ctab, htb->mtu);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to calculate ceil rate table: %m");
+
+ r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "htb");
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open container TCA_OPTIONS: %m");
+
+ r = sd_netlink_message_append_data(req, TCA_HTB_PARMS, &opt, sizeof(opt));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_HTB_PARMS attribute: %m");
+
+ if (htb->rate >= (1ULL << 32)) {
+ r = sd_netlink_message_append_u64(req, TCA_HTB_RATE64, htb->rate);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_HTB_RATE64 attribute: %m");
+ }
+
+ if (htb->ceil_rate >= (1ULL << 32)) {
+ r = sd_netlink_message_append_u64(req, TCA_HTB_CEIL64, htb->ceil_rate);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_HTB_CEIL64 attribute: %m");
+ }
+
+ r = sd_netlink_message_append_data(req, TCA_HTB_RTAB, rtab, sizeof(rtab));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_HTB_RTAB attribute: %m");
+
+ r = sd_netlink_message_append_data(req, TCA_HTB_CTAB, ctab, sizeof(ctab));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_HTB_CTAB attribute: %m");
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close container TCA_OPTIONS: %m");
+ return 0;
+}
+
+int config_parse_hierarchy_token_bucket_class_u32(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(tclass_free_or_set_invalidp) TClass *tclass = NULL;
+ HierarchyTokenBucketClass *htb;
+ Network *network = data;
+ uint32_t v;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = tclass_new_static(TCLASS_KIND_HTB, network, filename, section_line, &tclass);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to create traffic control class, ignoring assignment: %m");
+ return 0;
+ }
+
+ htb = TCLASS_TO_HTB(tclass);
+
+ if (isempty(rvalue)) {
+ htb->priority = 0;
+ tclass = NULL;
+ return 0;
+ }
+
+ r = safe_atou32(rvalue, &v);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ htb->priority = v;
+ tclass = NULL;
+
+ return 0;
+}
+
+int config_parse_hierarchy_token_bucket_class_size(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(tclass_free_or_set_invalidp) TClass *tclass = NULL;
+ HierarchyTokenBucketClass *htb;
+ Network *network = data;
+ uint64_t v;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = tclass_new_static(TCLASS_KIND_HTB, network, filename, section_line, &tclass);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to create traffic control class, ignoring assignment: %m");
+ return 0;
+ }
+
+ htb = TCLASS_TO_HTB(tclass);
+
+ if (isempty(rvalue)) {
+ if (streq(lvalue, "QuantumBytes"))
+ htb->quantum = 0;
+ else if (streq(lvalue, "MTUBytes"))
+ htb->mtu = HTB_DEFAULT_MTU;
+ else if (streq(lvalue, "OverheadBytes"))
+ htb->overhead = 0;
+ else if (streq(lvalue, "BufferBytes"))
+ htb->buffer = 0;
+ else if (streq(lvalue, "CeilBufferBytes"))
+ htb->ceil_buffer = 0;
+ else
+ assert_not_reached("Invalid lvalue");
+
+ tclass = NULL;
+ return 0;
+ }
+
+ r = parse_size(rvalue, 1024, &v);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+ if ((streq(lvalue, "OverheadBytes") && v > UINT16_MAX) || v > UINT32_MAX) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "QuantumBytes"))
+ htb->quantum = v;
+ else if (streq(lvalue, "OverheadBytes"))
+ htb->overhead = v;
+ else if (streq(lvalue, "MTUBytes"))
+ htb->mtu = v;
+ else if (streq(lvalue, "BufferBytes"))
+ htb->buffer = v;
+ else if (streq(lvalue, "CeilBufferBytes"))
+ htb->ceil_buffer = v;
+ else
+ assert_not_reached("Invalid lvalue");
+
+ tclass = NULL;
+
+ return 0;
+}
+
+int config_parse_hierarchy_token_bucket_class_rate(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(tclass_free_or_set_invalidp) TClass *tclass = NULL;
+ HierarchyTokenBucketClass *htb;
+ Network *network = data;
+ uint64_t *v;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = tclass_new_static(TCLASS_KIND_HTB, network, filename, section_line, &tclass);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to create traffic control class, ignoring assignment: %m");
+ return 0;
+ }
+
+ htb = TCLASS_TO_HTB(tclass);
+ if (streq(lvalue, "Rate"))
+ v = &htb->rate;
+ else if (streq(lvalue, "CeilRate"))
+ v = &htb->ceil_rate;
+ else
+ assert_not_reached("Invalid lvalue");
+
+ if (isempty(rvalue)) {
+ *v = 0;
+
+ tclass = NULL;
+ return 0;
+ }
+
+ r = parse_size(rvalue, 1000, v);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ *v /= 8;
+ tclass = NULL;
+
+ return 0;
+}
+
+static int hierarchy_token_bucket_class_init(TClass *tclass) {
+ HierarchyTokenBucketClass *htb;
+
+ assert(tclass);
+
+ htb = TCLASS_TO_HTB(tclass);
+
+ htb->mtu = HTB_DEFAULT_MTU;
+
+ return 0;
+}
+
+static int hierarchy_token_bucket_class_verify(TClass *tclass) {
+ HierarchyTokenBucketClass *htb;
+ uint32_t hz;
+ int r;
+
+ assert(tclass);
+
+ htb = TCLASS_TO_HTB(tclass);
+
+ if (htb->rate == 0)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: Rate= is mandatory. "
+ "Ignoring [HierarchyTokenBucketClass] section from line %u.",
+ tclass->section->filename, tclass->section->line);
+
+ /* if CeilRate= setting is missing, use the same as Rate= */
+ if (htb->ceil_rate == 0)
+ htb->ceil_rate = htb->rate;
+
+ r = tc_init(NULL, &hz);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read /proc/net/psched: %m");
+
+ if (htb->buffer == 0)
+ htb->buffer = htb->rate / hz + htb->mtu;
+ if (htb->ceil_buffer == 0)
+ htb->ceil_buffer = htb->ceil_rate / hz + htb->mtu;
+
+ return 0;
+}
+
+const TClassVTable htb_tclass_vtable = {
+ .object_size = sizeof(HierarchyTokenBucketClass),
+ .tca_kind = "htb",
+ .fill_message = hierarchy_token_bucket_class_fill_message,
+ .init = hierarchy_token_bucket_class_init,
+ .verify = hierarchy_token_bucket_class_verify,
+};
diff --git a/src/network/tc/htb.h b/src/network/tc/htb.h
new file mode 100644
index 0000000..55644db
--- /dev/null
+++ b/src/network/tc/htb.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "conf-parser.h"
+#include "qdisc.h"
+#include "tclass.h"
+
+typedef struct HierarchyTokenBucket {
+ QDisc meta;
+
+ uint32_t default_class;
+ uint32_t rate_to_quantum;
+} HierarchyTokenBucket;
+
+DEFINE_QDISC_CAST(HTB, HierarchyTokenBucket);
+extern const QDiscVTable htb_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_hierarchy_token_bucket_default_class);
+CONFIG_PARSER_PROTOTYPE(config_parse_hierarchy_token_bucket_u32);
+
+typedef struct HierarchyTokenBucketClass {
+ TClass meta;
+
+ uint32_t priority;
+ uint32_t quantum;
+ uint32_t mtu;
+ uint16_t overhead;
+ uint64_t rate;
+ uint32_t buffer;
+ uint64_t ceil_rate;
+ uint32_t ceil_buffer;
+} HierarchyTokenBucketClass;
+
+DEFINE_TCLASS_CAST(HTB, HierarchyTokenBucketClass);
+extern const TClassVTable htb_tclass_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_hierarchy_token_bucket_class_u32);
+CONFIG_PARSER_PROTOTYPE(config_parse_hierarchy_token_bucket_class_size);
+CONFIG_PARSER_PROTOTYPE(config_parse_hierarchy_token_bucket_class_rate);
diff --git a/src/network/tc/netem.c b/src/network/tc/netem.c
new file mode 100644
index 0000000..454e556
--- /dev/null
+++ b/src/network/tc/netem.c
@@ -0,0 +1,236 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "netem.h"
+#include "netlink-util.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "qdisc.h"
+#include "strv.h"
+#include "tc-util.h"
+
+static int network_emulator_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) {
+ struct tc_netem_qopt opt = {
+ .limit = 1000,
+ };
+ NetworkEmulator *ne;
+ int r;
+
+ assert(link);
+ assert(qdisc);
+ assert(req);
+
+ ne = NETEM(qdisc);
+
+ if (ne->limit > 0)
+ opt.limit = ne->limit;
+
+ if (ne->loss > 0)
+ opt.loss = ne->loss;
+
+ if (ne->duplicate > 0)
+ opt.duplicate = ne->duplicate;
+
+ if (ne->delay != USEC_INFINITY) {
+ r = tc_time_to_tick(ne->delay, &opt.latency);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to calculate latency in TCA_OPTION: %m");
+ }
+
+ if (ne->jitter != USEC_INFINITY) {
+ r = tc_time_to_tick(ne->jitter, &opt.jitter);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to calculate jitter in TCA_OPTION: %m");
+ }
+
+ r = sd_netlink_message_append_data(req, TCA_OPTIONS, &opt, sizeof(struct tc_netem_qopt));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_OPTION attribute: %m");
+
+ return 0;
+}
+
+int config_parse_network_emulator_delay(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ Network *network = data;
+ NetworkEmulator *ne;
+ usec_t u;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_NETEM, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ ne = NETEM(qdisc);
+
+ if (isempty(rvalue)) {
+ if (STR_IN_SET(lvalue, "DelaySec", "NetworkEmulatorDelaySec"))
+ ne->delay = USEC_INFINITY;
+ else if (STR_IN_SET(lvalue, "DelayJitterSec", "NetworkEmulatorDelayJitterSec"))
+ ne->jitter = USEC_INFINITY;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_sec(rvalue, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ if (STR_IN_SET(lvalue, "DelaySec", "NetworkEmulatorDelaySec"))
+ ne->delay = u;
+ else if (STR_IN_SET(lvalue, "DelayJitterSec", "NetworkEmulatorDelayJitterSec"))
+ ne->jitter = u;
+
+ qdisc = NULL;
+
+ return 0;
+}
+
+int config_parse_network_emulator_rate(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ Network *network = data;
+ NetworkEmulator *ne;
+ uint32_t rate;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_NETEM, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ ne = NETEM(qdisc);
+
+ if (isempty(rvalue)) {
+ if (STR_IN_SET(lvalue, "LossRate", "NetworkEmulatorLossRate"))
+ ne->loss = 0;
+ else if (STR_IN_SET(lvalue, "DuplicateRate", "NetworkEmulatorDuplicateRate"))
+ ne->duplicate = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_tc_percent(rvalue, &rate);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ if (STR_IN_SET(lvalue, "LossRate", "NetworkEmulatorLossRate"))
+ ne->loss = rate;
+ else if (STR_IN_SET(lvalue, "DuplicateRate", "NetworkEmulatorDuplicateRate"))
+ ne->duplicate = rate;
+
+ qdisc = NULL;
+ return 0;
+}
+
+int config_parse_network_emulator_packet_limit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ Network *network = data;
+ NetworkEmulator *ne;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_NETEM, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ ne = NETEM(qdisc);
+
+ if (isempty(rvalue)) {
+ ne->limit = 0;
+ qdisc = NULL;
+
+ return 0;
+ }
+
+ r = safe_atou(rvalue, &ne->limit);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ qdisc = NULL;
+ return 0;
+}
+
+const QDiscVTable netem_vtable = {
+ .object_size = sizeof(NetworkEmulator),
+ .tca_kind = "netem",
+ .fill_message = network_emulator_fill_message,
+};
diff --git a/src/network/tc/netem.h b/src/network/tc/netem.h
new file mode 100644
index 0000000..d58d5ac
--- /dev/null
+++ b/src/network/tc/netem.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+#pragma once
+
+#include "conf-parser.h"
+#include "qdisc.h"
+#include "time-util.h"
+
+typedef struct NetworkEmulator {
+ QDisc meta;
+
+ usec_t delay;
+ usec_t jitter;
+
+ uint32_t limit;
+ uint32_t loss;
+ uint32_t duplicate;
+} NetworkEmulator;
+
+DEFINE_QDISC_CAST(NETEM, NetworkEmulator);
+extern const QDiscVTable netem_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_network_emulator_delay);
+CONFIG_PARSER_PROTOTYPE(config_parse_network_emulator_rate);
+CONFIG_PARSER_PROTOTYPE(config_parse_network_emulator_packet_limit);
diff --git a/src/network/tc/pie.c b/src/network/tc/pie.c
new file mode 100644
index 0000000..695a381
--- /dev/null
+++ b/src/network/tc/pie.c
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "pie.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+static int pie_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) {
+ ProportionalIntegralControllerEnhanced *pie;
+ int r;
+
+ assert(link);
+ assert(qdisc);
+ assert(req);
+
+ pie = PIE(qdisc);
+
+ r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "pie");
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open container TCA_OPTIONS: %m");
+
+ if (pie->packet_limit > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_PIE_LIMIT, pie->packet_limit);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_PIE_PLIMIT attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close container TCA_OPTIONS: %m");
+
+ return 0;
+}
+
+int config_parse_pie_packet_limit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ ProportionalIntegralControllerEnhanced *pie;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_PIE, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ pie = PIE(qdisc);
+
+ if (isempty(rvalue)) {
+ pie->packet_limit = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = safe_atou32(rvalue, &pie->packet_limit);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ qdisc = NULL;
+
+ return 0;
+}
+
+const QDiscVTable pie_vtable = {
+ .object_size = sizeof(ProportionalIntegralControllerEnhanced),
+ .tca_kind = "pie",
+ .fill_message = pie_fill_message,
+};
diff --git a/src/network/tc/pie.h b/src/network/tc/pie.h
new file mode 100644
index 0000000..40a114e
--- /dev/null
+++ b/src/network/tc/pie.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+#pragma once
+
+#include "conf-parser.h"
+#include "qdisc.h"
+
+typedef struct ProportionalIntegralControllerEnhanced {
+ QDisc meta;
+
+ uint32_t packet_limit;
+} ProportionalIntegralControllerEnhanced;
+
+DEFINE_QDISC_CAST(PIE, ProportionalIntegralControllerEnhanced);
+extern const QDiscVTable pie_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_pie_packet_limit);
diff --git a/src/network/tc/qdisc.c b/src/network/tc/qdisc.c
new file mode 100644
index 0000000..2add128
--- /dev/null
+++ b/src/network/tc/qdisc.c
@@ -0,0 +1,381 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "in-addr-util.h"
+#include "netlink-util.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "qdisc.h"
+#include "set.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tc-util.h"
+
+const QDiscVTable * const qdisc_vtable[_QDISC_KIND_MAX] = {
+ [QDISC_KIND_BFIFO] = &bfifo_vtable,
+ [QDISC_KIND_CAKE] = &cake_vtable,
+ [QDISC_KIND_CODEL] = &codel_vtable,
+ [QDISC_KIND_DRR] = &drr_vtable,
+ [QDISC_KIND_ETS] = &ets_vtable,
+ [QDISC_KIND_FQ] = &fq_vtable,
+ [QDISC_KIND_FQ_CODEL] = &fq_codel_vtable,
+ [QDISC_KIND_FQ_PIE] = &fq_pie_vtable,
+ [QDISC_KIND_GRED] = &gred_vtable,
+ [QDISC_KIND_HHF] = &hhf_vtable,
+ [QDISC_KIND_HTB] = &htb_vtable,
+ [QDISC_KIND_NETEM] = &netem_vtable,
+ [QDISC_KIND_PIE] = &pie_vtable,
+ [QDISC_KIND_QFQ] = &qfq_vtable,
+ [QDISC_KIND_PFIFO] = &pfifo_vtable,
+ [QDISC_KIND_PFIFO_FAST] = &pfifo_fast_vtable,
+ [QDISC_KIND_PFIFO_HEAD_DROP] = &pfifo_head_drop_vtable,
+ [QDISC_KIND_SFB] = &sfb_vtable,
+ [QDISC_KIND_SFQ] = &sfq_vtable,
+ [QDISC_KIND_TBF] = &tbf_vtable,
+ [QDISC_KIND_TEQL] = &teql_vtable,
+};
+
+static int qdisc_new(QDiscKind kind, QDisc **ret) {
+ _cleanup_(qdisc_freep) QDisc *qdisc = NULL;
+ int r;
+
+ if (kind == _QDISC_KIND_INVALID) {
+ qdisc = new(QDisc, 1);
+ if (!qdisc)
+ return -ENOMEM;
+
+ *qdisc = (QDisc) {
+ .meta.kind = TC_KIND_QDISC,
+ .family = AF_UNSPEC,
+ .parent = TC_H_ROOT,
+ .kind = kind,
+ };
+ } else {
+ qdisc = malloc0(qdisc_vtable[kind]->object_size);
+ if (!qdisc)
+ return -ENOMEM;
+
+ qdisc->meta.kind = TC_KIND_QDISC,
+ qdisc->family = AF_UNSPEC;
+ qdisc->parent = TC_H_ROOT;
+ qdisc->kind = kind;
+
+ if (QDISC_VTABLE(qdisc)->init) {
+ r = QDISC_VTABLE(qdisc)->init(qdisc);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ *ret = TAKE_PTR(qdisc);
+
+ return 0;
+}
+
+int qdisc_new_static(QDiscKind kind, Network *network, const char *filename, unsigned section_line, QDisc **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(qdisc_freep) QDisc *qdisc = NULL;
+ TrafficControl *existing;
+ QDisc *q = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(filename);
+ assert(section_line > 0);
+
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ existing = ordered_hashmap_get(network->tc_by_section, n);
+ if (existing) {
+ if (existing->kind != TC_KIND_QDISC)
+ return -EINVAL;
+
+ q = TC_TO_QDISC(existing);
+
+ if (q->kind != _QDISC_KIND_INVALID &&
+ kind != _QDISC_KIND_INVALID &&
+ q->kind != kind)
+ return -EINVAL;
+
+ if (q->kind == kind || kind == _QDISC_KIND_INVALID) {
+ *ret = q;
+ return 0;
+ }
+ }
+
+ r = qdisc_new(kind, &qdisc);
+ if (r < 0)
+ return r;
+
+ if (q) {
+ qdisc->family = q->family;
+ qdisc->handle = q->handle;
+ qdisc->parent = q->parent;
+ qdisc->tca_kind = TAKE_PTR(q->tca_kind);
+
+ qdisc_free(q);
+ }
+
+ qdisc->network = network;
+ qdisc->section = TAKE_PTR(n);
+
+ r = ordered_hashmap_ensure_allocated(&network->tc_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = ordered_hashmap_put(network->tc_by_section, qdisc->section, TC(qdisc));
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(qdisc);
+ return 0;
+}
+
+void qdisc_free(QDisc *qdisc) {
+ if (!qdisc)
+ return;
+
+ if (qdisc->network && qdisc->section)
+ ordered_hashmap_remove(qdisc->network->tc_by_section, qdisc->section);
+
+ network_config_section_free(qdisc->section);
+
+ free(qdisc->tca_kind);
+ free(qdisc);
+}
+
+static int qdisc_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->tc_messages > 0);
+ link->tc_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_message_error_errno(link, m, r, "Could not set QDisc");
+ link_enter_failed(link);
+ return 1;
+ }
+
+ if (link->tc_messages == 0) {
+ log_link_debug(link, "Traffic control configured");
+ link->tc_configured = true;
+ link_check_ready(link);
+ }
+
+ return 1;
+}
+
+int qdisc_configure(Link *link, QDisc *qdisc) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+ assert(link->ifindex > 0);
+
+ r = sd_rtnl_message_new_qdisc(link->manager->rtnl, &req, RTM_NEWQDISC, qdisc->family, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not create RTM_NEWQDISC message: %m");
+
+ r = sd_rtnl_message_set_qdisc_parent(req, qdisc->parent);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not create tcm_parent message: %m");
+
+ if (qdisc->handle != TC_H_UNSPEC) {
+ r = sd_rtnl_message_set_qdisc_handle(req, qdisc->handle);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set tcm_handle message: %m");
+ }
+
+ if (QDISC_VTABLE(qdisc)) {
+ if (QDISC_VTABLE(qdisc)->fill_tca_kind) {
+ r = QDISC_VTABLE(qdisc)->fill_tca_kind(link, qdisc, req);
+ if (r < 0)
+ return r;
+ } else {
+ r = sd_netlink_message_append_string(req, TCA_KIND, QDISC_VTABLE(qdisc)->tca_kind);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_KIND attribute: %m");
+ }
+
+ if (QDISC_VTABLE(qdisc)->fill_message) {
+ r = QDISC_VTABLE(qdisc)->fill_message(link, qdisc, req);
+ if (r < 0)
+ return r;
+ }
+ } else {
+ r = sd_netlink_message_append_string(req, TCA_KIND, qdisc->tca_kind);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_KIND attribute: %m");
+ }
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, qdisc_handler, link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+ link->tc_messages++;
+
+ return 0;
+}
+
+int qdisc_section_verify(QDisc *qdisc, bool *has_root, bool *has_clsact) {
+ int r;
+
+ assert(qdisc);
+ assert(has_root);
+ assert(has_clsact);
+
+ if (section_is_invalid(qdisc->section))
+ return -EINVAL;
+
+ if (QDISC_VTABLE(qdisc) && QDISC_VTABLE(qdisc)->verify) {
+ r = QDISC_VTABLE(qdisc)->verify(qdisc);
+ if (r < 0)
+ return r;
+ }
+
+ if (qdisc->parent == TC_H_ROOT) {
+ if (*has_root)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: More than one root qdisc section is defined. "
+ "Ignoring the qdisc section from line %u.",
+ qdisc->section->filename, qdisc->section->line);
+ *has_root = true;
+ } else if (qdisc->parent == TC_H_CLSACT) { /* TC_H_CLSACT == TC_H_INGRESS */
+ if (*has_clsact)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: More than one clsact or ingress qdisc section is defined. "
+ "Ignoring the qdisc section from line %u.",
+ qdisc->section->filename, qdisc->section->line);
+ *has_clsact = true;
+ }
+
+ return 0;
+}
+
+int config_parse_qdisc_parent(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(ltype, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ if (streq(rvalue, "root")) {
+ qdisc->parent = TC_H_ROOT;
+ if (qdisc->handle == 0)
+ qdisc->handle = TC_H_UNSPEC;
+ } else if (streq(rvalue, "clsact")) {
+ qdisc->parent = TC_H_CLSACT;
+ qdisc->handle = TC_H_MAKE(TC_H_CLSACT, 0);
+ } else if (streq(rvalue, "ingress")) {
+ qdisc->parent = TC_H_INGRESS;
+ qdisc->handle = TC_H_MAKE(TC_H_INGRESS, 0);
+ } else {
+ r = parse_handle(rvalue, &qdisc->parent);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse 'Parent=', ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+ }
+
+ if (STR_IN_SET(rvalue, "clsact", "ingress")) {
+ r = free_and_strdup(&qdisc->tca_kind, rvalue);
+ if (r < 0)
+ return log_oom();
+ } else
+ qdisc->tca_kind = mfree(qdisc->tca_kind);
+
+ qdisc = NULL;
+
+ return 0;
+}
+
+int config_parse_qdisc_handle(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ Network *network = data;
+ uint16_t n;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(ltype, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ if (isempty(rvalue)) {
+ qdisc->handle = TC_H_UNSPEC;
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = safe_atou16_full(rvalue, 16, &n);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse 'Handle=', ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+
+ qdisc->handle = (uint32_t) n << 16;
+ qdisc = NULL;
+
+ return 0;
+}
diff --git a/src/network/tc/qdisc.h b/src/network/tc/qdisc.h
new file mode 100644
index 0000000..f9a9954
--- /dev/null
+++ b/src/network/tc/qdisc.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+#pragma once
+
+#include "conf-parser.h"
+#include "networkd-link.h"
+#include "networkd-network.h"
+#include "networkd-util.h"
+#include "tc.h"
+
+typedef enum QDiscKind {
+ QDISC_KIND_BFIFO,
+ QDISC_KIND_CAKE,
+ QDISC_KIND_CODEL,
+ QDISC_KIND_DRR,
+ QDISC_KIND_ETS,
+ QDISC_KIND_FQ,
+ QDISC_KIND_FQ_CODEL,
+ QDISC_KIND_FQ_PIE,
+ QDISC_KIND_GRED,
+ QDISC_KIND_HHF,
+ QDISC_KIND_HTB,
+ QDISC_KIND_NETEM,
+ QDISC_KIND_PFIFO,
+ QDISC_KIND_PFIFO_FAST,
+ QDISC_KIND_PFIFO_HEAD_DROP,
+ QDISC_KIND_PIE,
+ QDISC_KIND_QFQ,
+ QDISC_KIND_SFB,
+ QDISC_KIND_SFQ,
+ QDISC_KIND_TBF,
+ QDISC_KIND_TEQL,
+ _QDISC_KIND_MAX,
+ _QDISC_KIND_INVALID = -1,
+} QDiscKind;
+
+typedef struct QDisc {
+ TrafficControl meta;
+
+ NetworkConfigSection *section;
+ Network *network;
+
+ int family;
+ uint32_t handle;
+ uint32_t parent;
+
+ char *tca_kind;
+ QDiscKind kind;
+} QDisc;
+
+typedef struct QDiscVTable {
+ size_t object_size;
+ const char *tca_kind;
+ /* called in qdisc_new() */
+ int (*init)(QDisc *qdisc);
+ int (*fill_tca_kind)(Link *link, QDisc *qdisc, sd_netlink_message *m);
+ int (*fill_message)(Link *link, QDisc *qdisc, sd_netlink_message *m);
+ int (*verify)(QDisc *qdisc);
+} QDiscVTable;
+
+extern const QDiscVTable * const qdisc_vtable[_QDISC_KIND_MAX];
+
+#define QDISC_VTABLE(q) ((q)->kind != _QDISC_KIND_INVALID ? qdisc_vtable[(q)->kind] : NULL)
+
+/* For casting a qdisc into the various qdisc kinds */
+#define DEFINE_QDISC_CAST(UPPERCASE, MixedCase) \
+ static inline MixedCase* UPPERCASE(QDisc *q) { \
+ if (_unlikely_(!q || q->kind != QDISC_KIND_##UPPERCASE)) \
+ return NULL; \
+ \
+ return (MixedCase*) q; \
+ }
+
+/* For casting the various qdisc kinds into a qdisc */
+#define QDISC(q) (&(q)->meta)
+
+void qdisc_free(QDisc *qdisc);
+int qdisc_new_static(QDiscKind kind, Network *network, const char *filename, unsigned section_line, QDisc **ret);
+
+int qdisc_configure(Link *link, QDisc *qdisc);
+int qdisc_section_verify(QDisc *qdisc, bool *has_root, bool *has_clsact);
+
+DEFINE_NETWORK_SECTION_FUNCTIONS(QDisc, qdisc_free);
+
+DEFINE_TC_CAST(QDISC, QDisc);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_qdisc_parent);
+CONFIG_PARSER_PROTOTYPE(config_parse_qdisc_handle);
+
+#include "cake.h"
+#include "codel.h"
+#include "ets.h"
+#include "fifo.h"
+#include "fq-codel.h"
+#include "fq-pie.h"
+#include "fq.h"
+#include "gred.h"
+#include "hhf.h"
+#include "htb.h"
+#include "pie.h"
+#include "qfq.h"
+#include "netem.h"
+#include "drr.h"
+#include "sfb.h"
+#include "sfq.h"
+#include "tbf.h"
+#include "teql.h"
diff --git a/src/network/tc/qfq.c b/src/network/tc/qfq.c
new file mode 100644
index 0000000..320f2c1
--- /dev/null
+++ b/src/network/tc/qfq.c
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+
+#include "parse-util.h"
+#include "qdisc.h"
+#include "qfq.h"
+#include "string-util.h"
+
+#define QFQ_MAX_WEIGHT (1 << 10)
+#define QFQ_MIN_MAX_PACKET 512
+#define QFQ_MAX_MAX_PACKET (1 << 16)
+
+const QDiscVTable qfq_vtable = {
+ .object_size = sizeof(QuickFairQueueing),
+ .tca_kind = "qfq",
+};
+
+static int quick_fair_queueing_class_fill_message(Link *link, TClass *tclass, sd_netlink_message *req) {
+ QuickFairQueueingClass *qfq;
+ int r;
+
+ assert(link);
+ assert(tclass);
+ assert(req);
+
+ qfq = TCLASS_TO_QFQ(tclass);
+
+ r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "qfq");
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open container TCA_OPTIONS: %m");
+
+ if (qfq->weight > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_QFQ_WEIGHT, qfq->weight);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_QFQ_WEIGHT attribute: %m");
+ }
+
+ if (qfq->max_packet > 0) {
+ r = sd_netlink_message_append_u32(req, TCA_QFQ_LMAX, qfq->max_packet);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_QFQ_LMAX attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close container TCA_OPTIONS: %m");
+ return 0;
+}
+
+int config_parse_quick_fair_queueing_weight(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(tclass_free_or_set_invalidp) TClass *tclass = NULL;
+ QuickFairQueueingClass *qfq;
+ Network *network = data;
+ uint32_t v;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = tclass_new_static(TCLASS_KIND_QFQ, network, filename, section_line, &tclass);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to create traffic control class, ignoring assignment: %m");
+ return 0;
+ }
+
+ qfq = TCLASS_TO_QFQ(tclass);
+
+ if (isempty(rvalue)) {
+ qfq->weight = 0;
+ tclass = NULL;
+ return 0;
+ }
+
+ r = safe_atou32(rvalue, &v);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ if (v == 0 || v > QFQ_MAX_WEIGHT) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ qfq->weight = v;
+ tclass = NULL;
+
+ return 0;
+}
+
+int config_parse_quick_fair_queueing_max_packet(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(tclass_free_or_set_invalidp) TClass *tclass = NULL;
+ QuickFairQueueingClass *qfq;
+ Network *network = data;
+ uint64_t v;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = tclass_new_static(TCLASS_KIND_QFQ, network, filename, section_line, &tclass);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to create traffic control class, ignoring assignment: %m");
+ return 0;
+ }
+
+ qfq = TCLASS_TO_QFQ(tclass);
+
+ if (isempty(rvalue)) {
+ qfq->max_packet = 0;
+ tclass = NULL;
+ return 0;
+ }
+
+ r = parse_size(rvalue, 1024, &v);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ if (v < QFQ_MIN_MAX_PACKET || v > QFQ_MAX_MAX_PACKET) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ qfq->max_packet = (uint32_t) v;
+ tclass = NULL;
+
+ return 0;
+}
+
+const TClassVTable qfq_tclass_vtable = {
+ .object_size = sizeof(QuickFairQueueingClass),
+ .tca_kind = "qfq",
+ .fill_message = quick_fair_queueing_class_fill_message,
+};
diff --git a/src/network/tc/qfq.h b/src/network/tc/qfq.h
new file mode 100644
index 0000000..0f013a9
--- /dev/null
+++ b/src/network/tc/qfq.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+#pragma once
+
+#include "conf-parser.h"
+#include "qdisc.h"
+
+typedef struct QuickFairQueueing {
+ QDisc meta;
+} QuickFairQueueing;
+
+DEFINE_QDISC_CAST(QFQ, QuickFairQueueing);
+extern const QDiscVTable qfq_vtable;
+
+typedef struct QuickFairQueueingClass {
+ TClass meta;
+
+ uint32_t weight;
+ uint32_t max_packet;
+} QuickFairQueueingClass;
+
+DEFINE_TCLASS_CAST(QFQ, QuickFairQueueingClass);
+extern const TClassVTable qfq_tclass_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_quick_fair_queueing_weight);
+CONFIG_PARSER_PROTOTYPE(config_parse_quick_fair_queueing_max_packet);
diff --git a/src/network/tc/sfb.c b/src/network/tc/sfb.c
new file mode 100644
index 0000000..674fdf6
--- /dev/null
+++ b/src/network/tc/sfb.c
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "qdisc.h"
+#include "sfb.h"
+#include "string-util.h"
+
+static int stochastic_fair_blue_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) {
+ StochasticFairBlue *sfb;
+ struct tc_sfb_qopt opt = {
+ .rehash_interval = 600*1000,
+ .warmup_time = 60*1000,
+ .penalty_rate = 10,
+ .penalty_burst = 20,
+ .increment = (SFB_MAX_PROB + 1000) / 2000,
+ .decrement = (SFB_MAX_PROB + 10000) / 20000,
+ .max = 25,
+ .bin_size = 20,
+ };
+ int r;
+
+ assert(link);
+ assert(qdisc);
+ assert(req);
+
+ sfb = SFB(qdisc);
+
+ opt.limit = sfb->packet_limit;
+
+ r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "sfb");
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open container TCA_OPTIONS: %m");
+
+ r = sd_netlink_message_append_data(req, TCA_SFB_PARMS, &opt, sizeof(struct tc_sfb_qopt));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_SFB_PARMS attribute: %m");
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close container TCA_OPTIONS: %m");
+
+ return 0;
+}
+
+int config_parse_stochastic_fair_blue_u32(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ StochasticFairBlue *sfb;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_SFB, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ sfb = SFB(qdisc);
+
+ if (isempty(rvalue)) {
+ sfb->packet_limit = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = safe_atou32(rvalue, &sfb->packet_limit);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ qdisc = NULL;
+
+ return 0;
+}
+
+const QDiscVTable sfb_vtable = {
+ .object_size = sizeof(StochasticFairBlue),
+ .tca_kind = "sfb",
+ .fill_message = stochastic_fair_blue_fill_message,
+};
diff --git a/src/network/tc/sfb.h b/src/network/tc/sfb.h
new file mode 100644
index 0000000..628df35
--- /dev/null
+++ b/src/network/tc/sfb.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2020 VMware, Inc. */
+#pragma once
+
+#include "conf-parser.h"
+#include "qdisc.h"
+
+typedef struct StochasticFairBlue {
+ QDisc meta;
+
+ uint32_t packet_limit;
+} StochasticFairBlue;
+
+DEFINE_QDISC_CAST(SFB, StochasticFairBlue);
+extern const QDiscVTable sfb_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_stochastic_fair_blue_u32);
diff --git a/src/network/tc/sfq.c b/src/network/tc/sfq.c
new file mode 100644
index 0000000..387be83
--- /dev/null
+++ b/src/network/tc/sfq.c
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "qdisc.h"
+#include "sfq.h"
+#include "string-util.h"
+
+static int stochastic_fairness_queueing_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) {
+ StochasticFairnessQueueing *sfq;
+ struct tc_sfq_qopt_v1 opt = {};
+ int r;
+
+ assert(link);
+ assert(qdisc);
+ assert(req);
+
+ sfq = SFQ(qdisc);
+
+ opt.v0.perturb_period = sfq->perturb_period / USEC_PER_SEC;
+
+ r = sd_netlink_message_append_data(req, TCA_OPTIONS, &opt, sizeof(struct tc_sfq_qopt_v1));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_OPTIONS attribute: %m");
+
+ return 0;
+}
+
+int config_parse_stochastic_fairness_queueing_perturb_period(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ StochasticFairnessQueueing *sfq;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_SFQ, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ sfq = SFQ(qdisc);
+
+ if (isempty(rvalue)) {
+ sfq->perturb_period = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_sec(rvalue, &sfq->perturb_period);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ qdisc = NULL;
+
+ return 0;
+}
+
+const QDiscVTable sfq_vtable = {
+ .object_size = sizeof(StochasticFairnessQueueing),
+ .tca_kind = "sfq",
+ .fill_message = stochastic_fairness_queueing_fill_message,
+};
diff --git a/src/network/tc/sfq.h b/src/network/tc/sfq.h
new file mode 100644
index 0000000..1626775
--- /dev/null
+++ b/src/network/tc/sfq.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+#pragma once
+
+#include "conf-parser.h"
+#include "qdisc.h"
+#include "time-util.h"
+
+typedef struct StochasticFairnessQueueing {
+ QDisc meta;
+
+ usec_t perturb_period;
+} StochasticFairnessQueueing;
+
+DEFINE_QDISC_CAST(SFQ, StochasticFairnessQueueing);
+extern const QDiscVTable sfq_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_stochastic_fairness_queueing_perturb_period);
diff --git a/src/network/tc/tbf.c b/src/network/tc/tbf.c
new file mode 100644
index 0000000..2d84c5a
--- /dev/null
+++ b/src/network/tc/tbf.c
@@ -0,0 +1,346 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+#include <math.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "netem.h"
+#include "netlink-util.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "qdisc.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tc-util.h"
+
+static int token_bucket_filter_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) {
+ uint32_t rtab[256], ptab[256];
+ struct tc_tbf_qopt opt = {};
+ TokenBucketFilter *tbf;
+ int r;
+
+ assert(link);
+ assert(qdisc);
+ assert(req);
+
+ tbf = TBF(qdisc);
+
+ opt.rate.rate = tbf->rate >= (1ULL << 32) ? ~0U : tbf->rate;
+ opt.peakrate.rate = tbf->peak_rate >= (1ULL << 32) ? ~0U : tbf->peak_rate;
+
+ if (tbf->limit > 0)
+ opt.limit = tbf->limit;
+ else {
+ double lim, lim2;
+
+ lim = tbf->rate * (double) tbf->latency / USEC_PER_SEC + tbf->burst;
+ if (tbf->peak_rate > 0) {
+ lim2 = tbf->peak_rate * (double) tbf->latency / USEC_PER_SEC + tbf->mtu;
+ lim = MIN(lim, lim2);
+ }
+ opt.limit = lim;
+ }
+
+ opt.rate.mpu = tbf->mpu;
+
+ r = tc_fill_ratespec_and_table(&opt.rate, rtab, tbf->mtu);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to calculate ratespec: %m");
+
+ r = tc_transmit_time(opt.rate.rate, tbf->burst, &opt.buffer);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to calculate buffer size: %m");
+
+ if (opt.peakrate.rate > 0) {
+ opt.peakrate.mpu = tbf->mpu;
+
+ r = tc_fill_ratespec_and_table(&opt.peakrate, ptab, tbf->mtu);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to calculate ratespec: %m");
+
+ r = tc_transmit_time(opt.peakrate.rate, tbf->mtu, &opt.mtu);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to calculate mtu size: %m");
+ }
+
+ r = sd_netlink_message_open_container_union(req, TCA_OPTIONS, "tbf");
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not open container TCA_OPTIONS: %m");
+
+ r = sd_netlink_message_append_data(req, TCA_TBF_PARMS, &opt, sizeof(struct tc_tbf_qopt));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_TBF_PARMS attribute: %m");
+
+ r = sd_netlink_message_append_data(req, TCA_TBF_BURST, &tbf->burst, sizeof(tbf->burst));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_TBF_BURST attribute: %m");
+
+ if (tbf->rate >= (1ULL << 32)) {
+ r = sd_netlink_message_append_u64(req, TCA_TBF_RATE64, tbf->rate);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_TBF_RATE64 attribute: %m");
+ }
+
+ r = sd_netlink_message_append_data(req, TCA_TBF_RTAB, rtab, sizeof(rtab));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_TBF_RTAB attribute: %m");
+
+ if (opt.peakrate.rate > 0) {
+ if (tbf->peak_rate >= (1ULL << 32)) {
+ r = sd_netlink_message_append_u64(req, TCA_TBF_PRATE64, tbf->peak_rate);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_TBF_PRATE64 attribute: %m");
+ }
+
+ r = sd_netlink_message_append_u32(req, TCA_TBF_PBURST, tbf->mtu);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_TBF_PBURST attribute: %m");
+
+ r = sd_netlink_message_append_data(req, TCA_TBF_PTAB, ptab, sizeof(ptab));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_TBF_PTAB attribute: %m");
+ }
+
+ r = sd_netlink_message_close_container(req);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not close container TCA_OPTIONS: %m");
+
+ return 0;
+}
+
+int config_parse_token_bucket_filter_size(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ Network *network = data;
+ TokenBucketFilter *tbf;
+ uint64_t k;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_TBF, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ tbf = TBF(qdisc);
+
+ if (isempty(rvalue)) {
+ if (STR_IN_SET(lvalue, "BurstBytes", "Burst"))
+ tbf->burst = 0;
+ else if (STR_IN_SET(lvalue, "LimitBytes", "LimitSize"))
+ tbf->limit = 0;
+ else if (streq(lvalue, "MTUBytes"))
+ tbf->mtu = 0;
+ else if (streq(lvalue, "MPUBytes"))
+ tbf->mpu = 0;
+ else
+ assert_not_reached("unknown lvalue");
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_size(rvalue, 1024, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ if (STR_IN_SET(lvalue, "BurstBytes", "Burst"))
+ tbf->burst = k;
+ else if (STR_IN_SET(lvalue, "LimitBytes", "LimitSize"))
+ tbf->limit = k;
+ else if (streq(lvalue, "MPUBytes"))
+ tbf->mpu = k;
+ else if (streq(lvalue, "MTUBytes"))
+ tbf->mtu = k;
+ else
+ assert_not_reached("unknown lvalue");
+
+ qdisc = NULL;
+
+ return 0;
+}
+
+int config_parse_token_bucket_filter_rate(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ Network *network = data;
+ TokenBucketFilter *tbf;
+ uint64_t k, *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_TBF, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ tbf = TBF(qdisc);
+ if (streq(lvalue, "Rate"))
+ p = &tbf->rate;
+ else if (streq(lvalue, "PeakRate"))
+ p = &tbf->peak_rate;
+ else
+ assert_not_reached("unknown lvalue");
+
+ if (isempty(rvalue)) {
+ *p = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_size(rvalue, 1000, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ *p = k / 8;
+
+ qdisc = NULL;
+
+ return 0;
+}
+
+int config_parse_token_bucket_filter_latency(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ Network *network = data;
+ TokenBucketFilter *tbf;
+ usec_t u;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_TBF, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ tbf = TBF(qdisc);
+
+ if (isempty(rvalue)) {
+ tbf->latency = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_sec(rvalue, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ tbf->latency = u;
+
+ qdisc = NULL;
+
+ return 0;
+}
+
+static int token_bucket_filter_verify(QDisc *qdisc) {
+ TokenBucketFilter *tbf = TBF(qdisc);
+
+ if (tbf->limit > 0 && tbf->latency > 0)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: Specifying both LimitSize= and LatencySec= is not allowed. "
+ "Ignoring [TokenBucketFilter] section from line %u.",
+ qdisc->section->filename, qdisc->section->line);
+
+ if (tbf->limit == 0 && tbf->latency == 0)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: Either LimitSize= or LatencySec= is required. "
+ "Ignoring [TokenBucketFilter] section from line %u.",
+ qdisc->section->filename, qdisc->section->line);
+
+ if (tbf->rate == 0)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: Rate= is mandatory. "
+ "Ignoring [TokenBucketFilter] section from line %u.",
+ qdisc->section->filename, qdisc->section->line);
+
+ if (tbf->burst == 0)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: Burst= is mandatory. "
+ "Ignoring [TokenBucketFilter] section from line %u.",
+ qdisc->section->filename, qdisc->section->line);
+
+ if (tbf->peak_rate > 0 && tbf->mtu == 0)
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: MTUBytes= is mandatory when PeakRate= is specified. "
+ "Ignoring [TokenBucketFilter] section from line %u.",
+ qdisc->section->filename, qdisc->section->line);
+
+ return 0;
+}
+
+const QDiscVTable tbf_vtable = {
+ .object_size = sizeof(TokenBucketFilter),
+ .tca_kind = "tbf",
+ .fill_message = token_bucket_filter_fill_message,
+ .verify = token_bucket_filter_verify
+};
diff --git a/src/network/tc/tbf.h b/src/network/tc/tbf.h
new file mode 100644
index 0000000..6b4b017
--- /dev/null
+++ b/src/network/tc/tbf.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+#pragma once
+
+#include "conf-parser.h"
+#include "qdisc.h"
+#include "time-util.h"
+
+typedef struct TokenBucketFilter {
+ QDisc meta;
+
+ uint64_t rate;
+ uint64_t peak_rate;
+ uint32_t burst;
+ uint32_t mtu;
+ usec_t latency;
+ size_t limit;
+ size_t mpu;
+} TokenBucketFilter;
+
+DEFINE_QDISC_CAST(TBF, TokenBucketFilter);
+extern const QDiscVTable tbf_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_token_bucket_filter_latency);
+CONFIG_PARSER_PROTOTYPE(config_parse_token_bucket_filter_size);
+CONFIG_PARSER_PROTOTYPE(config_parse_token_bucket_filter_rate);
diff --git a/src/network/tc/tc-util.c b/src/network/tc/tc-util.c
new file mode 100644
index 0000000..3e10b50
--- /dev/null
+++ b/src/network/tc/tc-util.c
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "fileio.h"
+#include "parse-util.h"
+#include "tc-util.h"
+#include "time-util.h"
+
+int tc_init(double *ret_ticks_in_usec, uint32_t *ret_hz) {
+ static double ticks_in_usec = -1;
+ static uint32_t hz;
+
+ if (ticks_in_usec < 0) {
+ uint32_t clock_resolution, ticks_to_usec, usec_to_ticks;
+ _cleanup_free_ char *line = NULL;
+ double clock_factor;
+ int r;
+
+ r = read_one_line_file("/proc/net/psched", &line);
+ if (r < 0)
+ return r;
+
+ r = sscanf(line, "%08x%08x%08x%08x", &ticks_to_usec, &usec_to_ticks, &clock_resolution, &hz);
+ if (r < 4)
+ return -EIO;
+
+ clock_factor = (double) clock_resolution / USEC_PER_SEC;
+ ticks_in_usec = (double) ticks_to_usec / usec_to_ticks * clock_factor;
+ }
+
+ if (ret_ticks_in_usec)
+ *ret_ticks_in_usec = ticks_in_usec;
+ if (ret_hz)
+ *ret_hz = hz;
+
+ return 0;
+}
+
+int tc_time_to_tick(usec_t t, uint32_t *ret) {
+ double ticks_in_usec;
+ usec_t a;
+ int r;
+
+ assert(ret);
+
+ r = tc_init(&ticks_in_usec, NULL);
+ if (r < 0)
+ return r;
+
+ a = t * ticks_in_usec;
+ if (a > UINT32_MAX)
+ return -ERANGE;
+
+ *ret = a;
+ return 0;
+}
+
+int parse_tc_percent(const char *s, uint32_t *percent) {
+ int r;
+
+ assert(s);
+ assert(percent);
+
+ r = parse_permille(s);
+ if (r < 0)
+ return r;
+
+ *percent = (double) r / 1000 * UINT32_MAX;
+ return 0;
+}
+
+int tc_transmit_time(uint64_t rate, uint32_t size, uint32_t *ret) {
+ return tc_time_to_tick(USEC_PER_SEC * ((double)size / (double)rate), ret);
+}
+
+int tc_fill_ratespec_and_table(struct tc_ratespec *rate, uint32_t *rtab, uint32_t mtu) {
+ uint32_t cell_log = 0;
+ int r;
+
+ if (mtu == 0)
+ mtu = 2047;
+
+ while ((mtu >> cell_log) > 255)
+ cell_log++;
+
+ for (size_t i = 0; i < 256; i++) {
+ uint32_t sz;
+
+ sz = (i + 1) << cell_log;
+ if (sz < rate->mpu)
+ sz = rate->mpu;
+ r = tc_transmit_time(rate->rate, sz, &rtab[i]);
+ if (r < 0)
+ return r;
+ }
+
+ rate->cell_align = -1;
+ rate->cell_log = cell_log;
+ rate->linklayer = TC_LINKLAYER_ETHERNET;
+ return 0;
+}
+
+int parse_handle(const char *t, uint32_t *ret) {
+ _cleanup_free_ char *word = NULL;
+ uint16_t major, minor;
+ int r;
+
+ assert(t);
+ assert(ret);
+
+ /* Extract the major number. */
+ r = extract_first_word(&t, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+ if (!t)
+ return -EINVAL;
+
+ r = safe_atou16_full(word, 16, &major);
+ if (r < 0)
+ return r;
+
+ r = safe_atou16_full(t, 16, &minor);
+ if (r < 0)
+ return r;
+
+ *ret = ((uint32_t) major << 16) | minor;
+ return 0;
+}
diff --git a/src/network/tc/tc-util.h b/src/network/tc/tc-util.h
new file mode 100644
index 0000000..83bad8e
--- /dev/null
+++ b/src/network/tc/tc-util.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+#pragma once
+
+#include <linux/pkt_sched.h>
+
+#include "time-util.h"
+
+int tc_init(double *ret_ticks_in_usec, uint32_t *ret_hz);
+int tc_time_to_tick(usec_t t, uint32_t *ret);
+int parse_tc_percent(const char *s, uint32_t *percent);
+int tc_transmit_time(uint64_t rate, uint32_t size, uint32_t *ret);
+int tc_fill_ratespec_and_table(struct tc_ratespec *rate, uint32_t *rtab, uint32_t mtu);
+int parse_handle(const char *t, uint32_t *ret);
diff --git a/src/network/tc/tc.c b/src/network/tc/tc.c
new file mode 100644
index 0000000..c32b040
--- /dev/null
+++ b/src/network/tc/tc.c
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "macro.h"
+#include "qdisc.h"
+#include "tc.h"
+#include "tclass.h"
+
+void traffic_control_free(TrafficControl *tc) {
+ if (!tc)
+ return;
+
+ switch (tc->kind) {
+ case TC_KIND_QDISC:
+ qdisc_free(TC_TO_QDISC(tc));
+ break;
+ case TC_KIND_TCLASS:
+ tclass_free(TC_TO_TCLASS(tc));
+ break;
+ default:
+ assert_not_reached("Invalid traffic control type");
+ }
+}
+
+static int traffic_control_configure(Link *link, TrafficControl *tc) {
+ assert(link);
+ assert(tc);
+
+ switch(tc->kind) {
+ case TC_KIND_QDISC:
+ return qdisc_configure(link, TC_TO_QDISC(tc));
+ case TC_KIND_TCLASS:
+ return tclass_configure(link, TC_TO_TCLASS(tc));
+ default:
+ assert_not_reached("Invalid traffic control type");
+ }
+}
+
+int link_configure_traffic_control(Link *link) {
+ TrafficControl *tc;
+ int r;
+
+ link->tc_configured = false;
+ link->tc_messages = 0;
+
+ ORDERED_HASHMAP_FOREACH(tc, link->network->tc_by_section) {
+ r = traffic_control_configure(link, tc);
+ if (r < 0)
+ return r;
+ }
+
+ if (link->tc_messages == 0)
+ link->tc_configured = true;
+ else
+ log_link_debug(link, "Configuring traffic control");
+
+ return 0;
+}
+
+static int traffic_control_section_verify(TrafficControl *tc, bool *qdisc_has_root, bool *qdisc_has_clsact) {
+ assert(tc);
+
+ switch(tc->kind) {
+ case TC_KIND_QDISC:
+ return qdisc_section_verify(TC_TO_QDISC(tc), qdisc_has_root, qdisc_has_clsact);
+ case TC_KIND_TCLASS:
+ return tclass_section_verify(TC_TO_TCLASS(tc));
+ default:
+ assert_not_reached("Invalid traffic control type");
+ }
+}
+
+void network_drop_invalid_traffic_control(Network *network) {
+ bool has_root = false, has_clsact = false;
+ TrafficControl *tc;
+
+ assert(network);
+
+ ORDERED_HASHMAP_FOREACH(tc, network->tc_by_section)
+ if (traffic_control_section_verify(tc, &has_root, &has_clsact) < 0)
+ traffic_control_free(tc);
+}
diff --git a/src/network/tc/tc.h b/src/network/tc/tc.h
new file mode 100644
index 0000000..7fbd744
--- /dev/null
+++ b/src/network/tc/tc.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "networkd-link.h"
+
+typedef enum TrafficControlKind {
+ TC_KIND_QDISC,
+ TC_KIND_TCLASS,
+ TC_KIND_FILTER,
+ _TC_KIND_MAX,
+ _TC_KIND_INVALID = -1,
+} TrafficControlKind;
+
+typedef struct TrafficControl {
+ TrafficControlKind kind;
+} TrafficControl;
+
+/* For casting a tc into the various tc kinds */
+#define DEFINE_TC_CAST(UPPERCASE, MixedCase) \
+ static inline MixedCase* TC_TO_##UPPERCASE(TrafficControl *tc) { \
+ if (_unlikely_(!tc || tc->kind != TC_KIND_##UPPERCASE)) \
+ return NULL; \
+ \
+ return (MixedCase*) tc; \
+ }
+
+/* For casting the various tc kinds into a tc */
+#define TC(tc) (&(tc)->meta)
+
+void traffic_control_free(TrafficControl *tc);
+int link_configure_traffic_control(Link *link);
+void network_drop_invalid_traffic_control(Network *network);
diff --git a/src/network/tc/tclass.c b/src/network/tc/tclass.c
new file mode 100644
index 0000000..21b26b0
--- /dev/null
+++ b/src/network/tc/tclass.c
@@ -0,0 +1,289 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "in-addr-util.h"
+#include "netlink-util.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "set.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tc-util.h"
+#include "tclass.h"
+
+const TClassVTable * const tclass_vtable[_TCLASS_KIND_MAX] = {
+ [TCLASS_KIND_DRR] = &drr_tclass_vtable,
+ [TCLASS_KIND_HTB] = &htb_tclass_vtable,
+ [TCLASS_KIND_QFQ] = &qfq_tclass_vtable,
+};
+
+static int tclass_new(TClassKind kind, TClass **ret) {
+ _cleanup_(tclass_freep) TClass *tclass = NULL;
+ int r;
+
+ tclass = malloc0(tclass_vtable[kind]->object_size);
+ if (!tclass)
+ return -ENOMEM;
+
+ tclass->meta.kind = TC_KIND_TCLASS,
+ tclass->parent = TC_H_ROOT;
+ tclass->kind = kind;
+
+ if (TCLASS_VTABLE(tclass)->init) {
+ r = TCLASS_VTABLE(tclass)->init(tclass);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(tclass);
+
+ return 0;
+}
+
+int tclass_new_static(TClassKind kind, Network *network, const char *filename, unsigned section_line, TClass **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(tclass_freep) TClass *tclass = NULL;
+ TrafficControl *existing;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(filename);
+ assert(section_line > 0);
+
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ existing = ordered_hashmap_get(network->tc_by_section, n);
+ if (existing) {
+ TClass *t;
+
+ if (existing->kind != TC_KIND_TCLASS)
+ return -EINVAL;
+
+ t = TC_TO_TCLASS(existing);
+
+ if (t->kind != kind)
+ return -EINVAL;
+
+ *ret = t;
+ return 0;
+ }
+
+ r = tclass_new(kind, &tclass);
+ if (r < 0)
+ return r;
+
+ tclass->network = network;
+ tclass->section = TAKE_PTR(n);
+
+ r = ordered_hashmap_ensure_allocated(&network->tc_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = ordered_hashmap_put(network->tc_by_section, tclass->section, tclass);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(tclass);
+ return 0;
+}
+
+void tclass_free(TClass *tclass) {
+ if (!tclass)
+ return;
+
+ if (tclass->network && tclass->section)
+ ordered_hashmap_remove(tclass->network->tc_by_section, tclass->section);
+
+ network_config_section_free(tclass->section);
+
+ free(tclass);
+}
+
+static int tclass_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->tc_messages > 0);
+ link->tc_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_message_error_errno(link, m, r, "Could not set TClass");
+ link_enter_failed(link);
+ return 1;
+ }
+
+ if (link->tc_messages == 0) {
+ log_link_debug(link, "Traffic control configured");
+ link->tc_configured = true;
+ link_check_ready(link);
+ }
+
+ return 1;
+}
+
+int tclass_configure(Link *link, TClass *tclass) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+ assert(link->ifindex > 0);
+
+ r = sd_rtnl_message_new_tclass(link->manager->rtnl, &req, RTM_NEWTCLASS, AF_UNSPEC, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not create RTM_NEWTCLASS message: %m");
+
+ r = sd_rtnl_message_set_tclass_parent(req, tclass->parent);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not create tcm_parent message: %m");
+
+ if (tclass->classid != TC_H_UNSPEC) {
+ r = sd_rtnl_message_set_tclass_handle(req, tclass->classid);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set tcm_handle message: %m");
+ }
+
+ r = sd_netlink_message_append_string(req, TCA_KIND, TCLASS_VTABLE(tclass)->tca_kind);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_KIND attribute: %m");
+
+ if (TCLASS_VTABLE(tclass)->fill_message) {
+ r = TCLASS_VTABLE(tclass)->fill_message(link, tclass, req);
+ if (r < 0)
+ return r;
+ }
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, tclass_handler, link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+ link->tc_messages++;
+
+ return 0;
+}
+
+int tclass_section_verify(TClass *tclass) {
+ int r;
+
+ assert(tclass);
+
+ if (section_is_invalid(tclass->section))
+ return -EINVAL;
+
+ if (TCLASS_VTABLE(tclass)->verify) {
+ r = TCLASS_VTABLE(tclass)->verify(tclass);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int config_parse_tclass_parent(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(tclass_free_or_set_invalidp) TClass *tclass = NULL;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = tclass_new_static(ltype, network, filename, section_line, &tclass);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to create traffic control class, ignoring assignment: %m");
+ return 0;
+ }
+
+ if (streq(rvalue, "root"))
+ tclass->parent = TC_H_ROOT;
+ else {
+ r = parse_handle(rvalue, &tclass->parent);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse 'Parent=', ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+ }
+
+ tclass = NULL;
+
+ return 0;
+}
+
+int config_parse_tclass_classid(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(tclass_free_or_set_invalidp) TClass *tclass = NULL;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = tclass_new_static(ltype, network, filename, section_line, &tclass);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to create traffic control class, ignoring assignment: %m");
+ return 0;
+ }
+
+ if (isempty(rvalue)) {
+ tclass->classid = TC_H_UNSPEC;
+ tclass = NULL;
+ return 0;
+ }
+
+ r = parse_handle(rvalue, &tclass->classid);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse 'ClassId=', ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+
+ tclass = NULL;
+
+ return 0;
+}
diff --git a/src/network/tc/tclass.h b/src/network/tc/tclass.h
new file mode 100644
index 0000000..f02a6a7
--- /dev/null
+++ b/src/network/tc/tclass.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ * Copyright © 2019 VMware, Inc. */
+#pragma once
+
+#include "conf-parser.h"
+#include "networkd-link.h"
+#include "networkd-network.h"
+#include "networkd-util.h"
+#include "tc.h"
+
+typedef enum TClassKind {
+ TCLASS_KIND_DRR,
+ TCLASS_KIND_HTB,
+ TCLASS_KIND_QFQ,
+ _TCLASS_KIND_MAX,
+ _TCLASS_KIND_INVALID = -1,
+} TClassKind;
+
+typedef struct TClass {
+ TrafficControl meta;
+
+ NetworkConfigSection *section;
+ Network *network;
+
+ uint32_t classid;
+ uint32_t parent;
+
+ TClassKind kind;
+} TClass;
+
+typedef struct TClassVTable {
+ size_t object_size;
+ const char *tca_kind;
+ /* called in tclass_new() */
+ int (*init)(TClass *tclass);
+ int (*fill_message)(Link *link, TClass *tclass, sd_netlink_message *m);
+ int (*verify)(TClass *tclass);
+} TClassVTable;
+
+extern const TClassVTable * const tclass_vtable[_TCLASS_KIND_MAX];
+
+#define TCLASS_VTABLE(t) ((t)->kind != _TCLASS_KIND_INVALID ? tclass_vtable[(t)->kind] : NULL)
+
+/* For casting a tclass into the various tclass kinds */
+#define DEFINE_TCLASS_CAST(UPPERCASE, MixedCase) \
+ static inline MixedCase* TCLASS_TO_##UPPERCASE(TClass *t) { \
+ if (_unlikely_(!t || t->kind != TCLASS_KIND_##UPPERCASE)) \
+ return NULL; \
+ \
+ return (MixedCase*) t; \
+ }
+
+/* For casting the various tclass kinds into a tclass */
+#define TCLASS(t) (&(t)->meta)
+
+void tclass_free(TClass *tclass);
+int tclass_new_static(TClassKind kind, Network *network, const char *filename, unsigned section_line, TClass **ret);
+
+int tclass_configure(Link *link, TClass *tclass);
+int tclass_section_verify(TClass *tclass);
+
+DEFINE_NETWORK_SECTION_FUNCTIONS(TClass, tclass_free);
+
+DEFINE_TC_CAST(TCLASS, TClass);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_tclass_parent);
+CONFIG_PARSER_PROTOTYPE(config_parse_tclass_classid);
+
+#include "drr.h"
+#include "htb.h"
+#include "qfq.h"
diff --git a/src/network/tc/teql.c b/src/network/tc/teql.c
new file mode 100644
index 0000000..0da2fc3
--- /dev/null
+++ b/src/network/tc/teql.c
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "macro.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "teql.h"
+
+static int trivial_link_equalizer_fill_tca_kind(Link *link, QDisc *qdisc, sd_netlink_message *req) {
+ char kind[STRLEN("teql") + DECIMAL_STR_MAX(unsigned)];
+ TrivialLinkEqualizer *teql;
+ int r;
+
+ assert(link);
+ assert(qdisc);
+ assert(req);
+
+ teql = TEQL(qdisc);
+
+ xsprintf(kind, "teql%u", teql->id);
+ r = sd_netlink_message_append_string(req, TCA_KIND, kind);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_KIND attribute: %m");
+
+ return 0;
+}
+
+const QDiscVTable teql_vtable = {
+ .object_size = sizeof(TrivialLinkEqualizer),
+ .fill_tca_kind = trivial_link_equalizer_fill_tca_kind,
+};
+
+int config_parse_trivial_link_equalizer_id(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
+ TrivialLinkEqualizer *teql;
+ Network *network = data;
+ unsigned id;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(QDISC_KIND_TEQL, network, filename, section_line, &qdisc);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "More than one kind of queueing discipline, ignoring assignment: %m");
+ return 0;
+ }
+
+ teql = TEQL(qdisc);
+
+ if (isempty(rvalue)) {
+ teql->id = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = safe_atou(rvalue, &id);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+ if (id > INT_MAX)
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "'%s=' is too large, ignoring assignment: %s",
+ lvalue, rvalue);
+
+ teql->id = id;
+
+ qdisc = NULL;
+ return 0;
+}
diff --git a/src/network/tc/teql.h b/src/network/tc/teql.h
new file mode 100644
index 0000000..8d0085e
--- /dev/null
+++ b/src/network/tc/teql.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "conf-parser.h"
+#include "qdisc.h"
+
+typedef struct TrivialLinkEqualizer {
+ QDisc meta;
+
+ unsigned id;
+} TrivialLinkEqualizer;
+
+DEFINE_QDISC_CAST(TEQL, TrivialLinkEqualizer);
+extern const QDiscVTable teql_vtable;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_trivial_link_equalizer_id);
diff --git a/src/network/test-network-tables.c b/src/network/test-network-tables.c
new file mode 100644
index 0000000..475cac7
--- /dev/null
+++ b/src/network/test-network-tables.c
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bond.h"
+#include "dhcp6-internal.h"
+#include "dhcp6-protocol.h"
+#include "ethtool-util.h"
+#include "ipvlan.h"
+#include "lldp-internal.h"
+#include "macvlan.h"
+#include "ndisc-internal.h"
+#include "netlink-internal.h"
+#include "networkd-link.h"
+#include "networkd-network.h"
+#include "networkd-util.h"
+#include "test-tables.h"
+#include "tunnel.h"
+
+int main(int argc, char **argv) {
+ test_table(bond_ad_select, NETDEV_BOND_AD_SELECT);
+ test_table(bond_arp_all_targets, NETDEV_BOND_ARP_ALL_TARGETS);
+ test_table(bond_arp_validate, NETDEV_BOND_ARP_VALIDATE);
+ test_table(bond_fail_over_mac, NETDEV_BOND_FAIL_OVER_MAC);
+ test_table(bond_lacp_rate, NETDEV_BOND_LACP_RATE);
+ test_table(bond_mode, NETDEV_BOND_MODE);
+ test_table(bond_primary_reselect, NETDEV_BOND_PRIMARY_RESELECT);
+ test_table(bond_xmit_hash_policy, NETDEV_BOND_XMIT_HASH_POLICY);
+ test_table(dhcp6_message_status, DHCP6_STATUS);
+ test_table_sparse(dhcp6_message_type, DHCP6_MESSAGE); /* enum starts from 1 */
+ test_table(dhcp_use_domains, DHCP_USE_DOMAINS);
+ test_table(duplex, DUP);
+ test_table(ip6tnl_mode, NETDEV_IP6_TNL_MODE);
+ test_table(ipv6_privacy_extensions, IPV6_PRIVACY_EXTENSIONS);
+ test_table(ipvlan_flags, NETDEV_IPVLAN_FLAGS);
+ test_table(link_operstate, LINK_OPERSTATE);
+ /* test_table(link_state, LINK_STATE); — not a reversible mapping */
+ test_table(lldp_mode, LLDP_MODE);
+ test_table(netdev_kind, NETDEV_KIND);
+ test_table(nl_union_link_info_data, NL_UNION_LINK_INFO_DATA);
+ test_table(radv_prefix_delegation, RADV_PREFIX_DELEGATION);
+ test_table(wol, WOL);
+ test_table(lldp_event, SD_LLDP_EVENT);
+ test_table(ndisc_event, SD_NDISC_EVENT);
+
+ test_table_sparse(ipvlan_mode, NETDEV_IPVLAN_MODE);
+ test_table_sparse(macvlan_mode, NETDEV_MACVLAN_MODE);
+ test_table_sparse(address_family, ADDRESS_FAMILY);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/network/test-network.c b/src/network/test-network.c
new file mode 100644
index 0000000..03c9440
--- /dev/null
+++ b/src/network/test-network.c
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+#include <sys/param.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "dhcp-lease-internal.h"
+#include "ether-addr-util.h"
+#include "hostname-util.h"
+#include "network-internal.h"
+#include "networkd-manager.h"
+#include "string-util.h"
+#include "tests.h"
+
+static void test_deserialize_in_addr(void) {
+ _cleanup_free_ struct in_addr *addresses = NULL;
+ _cleanup_free_ struct in6_addr *addresses6 = NULL;
+ union in_addr_union a, b, c, d, e, f;
+ int size;
+ const char *addresses_string = "192.168.0.1 0:0:0:0:0:FFFF:204.152.189.116 192.168.0.2 ::1 192.168.0.3 1:0:0:0:0:0:0:8";
+
+ assert_se(in_addr_from_string(AF_INET, "0:0:0:0:0:FFFF:204.152.189.116", &a) < 0);
+ assert_se(in_addr_from_string(AF_INET6, "192.168.0.1", &d) < 0);
+
+ assert_se(in_addr_from_string(AF_INET, "192.168.0.1", &a) >= 0);
+ assert_se(in_addr_from_string(AF_INET, "192.168.0.2", &b) >= 0);
+ assert_se(in_addr_from_string(AF_INET, "192.168.0.3", &c) >= 0);
+ assert_se(in_addr_from_string(AF_INET6, "0:0:0:0:0:FFFF:204.152.189.116", &d) >= 0);
+ assert_se(in_addr_from_string(AF_INET6, "::1", &e) >= 0);
+ assert_se(in_addr_from_string(AF_INET6, "1:0:0:0:0:0:0:8", &f) >= 0);
+
+ assert_se((size = deserialize_in_addrs(&addresses, addresses_string)) >= 0);
+ assert_se(size == 3);
+ assert_se(in_addr_equal(AF_INET, &a, (union in_addr_union *) &addresses[0]));
+ assert_se(in_addr_equal(AF_INET, &b, (union in_addr_union *) &addresses[1]));
+ assert_se(in_addr_equal(AF_INET, &c, (union in_addr_union *) &addresses[2]));
+
+ assert_se((size = deserialize_in6_addrs(&addresses6, addresses_string)) >= 0);
+ assert_se(size == 3);
+ assert_se(in_addr_equal(AF_INET6, &d, (union in_addr_union *) &addresses6[0]));
+ assert_se(in_addr_equal(AF_INET6, &e, (union in_addr_union *) &addresses6[1]));
+ assert_se(in_addr_equal(AF_INET6, &f, (union in_addr_union *) &addresses6[2]));
+}
+
+static void test_deserialize_dhcp_routes(void) {
+ size_t size, allocated;
+
+ {
+ _cleanup_free_ struct sd_dhcp_route *routes = NULL;
+ assert_se(deserialize_dhcp_routes(&routes, &size, &allocated, "") >= 0);
+ assert_se(size == 0);
+ }
+
+ {
+ /* no errors */
+ _cleanup_free_ struct sd_dhcp_route *routes = NULL;
+ const char *routes_string = "192.168.0.0/16,192.168.0.1 10.1.2.0/24,10.1.2.1 0.0.0.0/0,10.0.1.1";
+
+ assert_se(deserialize_dhcp_routes(&routes, &size, &allocated, routes_string) >= 0);
+
+ assert_se(size == 3);
+ assert_se(routes[0].dst_addr.s_addr == inet_addr("192.168.0.0"));
+ assert_se(routes[0].gw_addr.s_addr == inet_addr("192.168.0.1"));
+ assert_se(routes[0].dst_prefixlen == 16);
+
+ assert_se(routes[1].dst_addr.s_addr == inet_addr("10.1.2.0"));
+ assert_se(routes[1].gw_addr.s_addr == inet_addr("10.1.2.1"));
+ assert_se(routes[1].dst_prefixlen == 24);
+
+ assert_se(routes[2].dst_addr.s_addr == inet_addr("0.0.0.0"));
+ assert_se(routes[2].gw_addr.s_addr == inet_addr("10.0.1.1"));
+ assert_se(routes[2].dst_prefixlen == 0);
+ }
+
+ {
+ /* error in second word */
+ _cleanup_free_ struct sd_dhcp_route *routes = NULL;
+ const char *routes_string = "192.168.0.0/16,192.168.0.1 10.1.2.0#24,10.1.2.1 0.0.0.0/0,10.0.1.1";
+
+ assert_se(deserialize_dhcp_routes(&routes, &size, &allocated, routes_string) >= 0);
+
+ assert_se(size == 2);
+ assert_se(routes[0].dst_addr.s_addr == inet_addr("192.168.0.0"));
+ assert_se(routes[0].gw_addr.s_addr == inet_addr("192.168.0.1"));
+ assert_se(routes[0].dst_prefixlen == 16);
+
+ assert_se(routes[1].dst_addr.s_addr == inet_addr("0.0.0.0"));
+ assert_se(routes[1].gw_addr.s_addr == inet_addr("10.0.1.1"));
+ assert_se(routes[1].dst_prefixlen == 0);
+ }
+
+ {
+ /* error in every word */
+ _cleanup_free_ struct sd_dhcp_route *routes = NULL;
+ const char *routes_string = "192.168.0.0/55,192.168.0.1 10.1.2.0#24,10.1.2.1 0.0.0.0/0,10.0.1.X";
+
+ assert_se(deserialize_dhcp_routes(&routes, &size, &allocated, routes_string) >= 0);
+ assert_se(size == 0);
+ }
+}
+
+static int test_load_config(Manager *manager) {
+ int r;
+/* TODO: should_reload, is false if the config dirs do not exist, so
+ * so we can't do this test here, move it to a test for paths_check_timestamps
+ * directly
+ *
+ * assert_se(network_should_reload(manager) == true);
+*/
+
+ r = manager_load_config(manager);
+ if (r == -EPERM)
+ return r;
+ assert_se(r >= 0);
+
+ assert_se(manager_should_reload(manager) == false);
+
+ return 0;
+}
+
+static void test_network_get(Manager *manager, sd_device *loopback) {
+ Network *network;
+ const struct ether_addr mac = ETHER_ADDR_NULL;
+ int r;
+
+ /* Let's hope that the test machine does not have a .network file that applies to loopback device…
+ * But it is still possible, so let's allow that case too. */
+ r = network_get(manager, 0, loopback, "lo", NULL, NULL, &mac, &mac, 0, NULL, NULL, &network);
+ if (r == -ENOENT)
+ /* The expected case */
+ assert_se(!network);
+ else if (r >= 0)
+ assert_se(network);
+ else
+ assert_not_reached("bad error!");
+}
+
+static void test_address_equality(void) {
+ _cleanup_(address_freep) Address *a1 = NULL, *a2 = NULL;
+
+ assert_se(address_new(&a1) >= 0);
+ assert_se(address_new(&a2) >= 0);
+
+ assert_se(address_equal(NULL, NULL));
+ assert_se(!address_equal(a1, NULL));
+ assert_se(!address_equal(NULL, a2));
+ assert_se(address_equal(a1, a2));
+
+ a1->family = AF_INET;
+ assert_se(!address_equal(a1, a2));
+
+ a2->family = AF_INET;
+ assert_se(address_equal(a1, a2));
+
+ assert_se(in_addr_from_string(AF_INET, "192.168.3.9", &a1->in_addr) >= 0);
+ assert_se(!address_equal(a1, a2));
+ assert_se(in_addr_from_string(AF_INET, "192.168.3.9", &a2->in_addr) >= 0);
+ assert_se(address_equal(a1, a2));
+ assert_se(in_addr_from_string(AF_INET, "192.168.3.10", &a1->in_addr_peer) >= 0);
+ assert_se(address_equal(a1, a2));
+ assert_se(in_addr_from_string(AF_INET, "192.168.3.11", &a2->in_addr_peer) >= 0);
+ assert_se(address_equal(a1, a2));
+ a1->prefixlen = 10;
+ assert_se(!address_equal(a1, a2));
+ a2->prefixlen = 10;
+ assert_se(address_equal(a1, a2));
+
+ a1->family = AF_INET6;
+ assert_se(!address_equal(a1, a2));
+
+ a2->family = AF_INET6;
+ assert_se(in_addr_from_string(AF_INET6, "2001:4ca0:4f01::2", &a1->in_addr) >= 0);
+ assert_se(in_addr_from_string(AF_INET6, "2001:4ca0:4f01::2", &a2->in_addr) >= 0);
+ assert_se(address_equal(a1, a2));
+
+ a2->prefixlen = 8;
+ assert_se(address_equal(a1, a2));
+
+ assert_se(in_addr_from_string(AF_INET6, "2001:4ca0:4f01::1", &a2->in_addr) >= 0);
+ assert_se(!address_equal(a1, a2));
+}
+
+static void test_dhcp_hostname_shorten_overlong(void) {
+ int r;
+
+ {
+ /* simple hostname, no actions, no errors */
+ _cleanup_free_ char *shortened = NULL;
+ r = shorten_overlong("name1", &shortened);
+ assert_se(r == 0);
+ assert_se(streq("name1", shortened));
+ }
+
+ {
+ /* simple fqdn, no actions, no errors */
+ _cleanup_free_ char *shortened = NULL;
+ r = shorten_overlong("name1.example.com", &shortened);
+ assert_se(r == 0);
+ assert_se(streq("name1.example.com", shortened));
+ }
+
+ {
+ /* overlong fqdn, cut to first dot, no errors */
+ _cleanup_free_ char *shortened = NULL;
+ r = shorten_overlong("name1.test-dhcp-this-one-here-is-a-very-very-long-domain.example.com", &shortened);
+ assert_se(r == 1);
+ assert_se(streq("name1", shortened));
+ }
+
+ {
+ /* overlong hostname, cut to HOST_MAX_LEN, no errors */
+ _cleanup_free_ char *shortened = NULL;
+ r = shorten_overlong("test-dhcp-this-one-here-is-a-very-very-long-hostname-without-domainname", &shortened);
+ assert_se(r == 1);
+ assert_se(streq("test-dhcp-this-one-here-is-a-very-very-long-hostname-without-dom", shortened));
+ }
+
+ {
+ /* overlong fqdn, cut to first dot, empty result error */
+ _cleanup_free_ char *shortened = NULL;
+ r = shorten_overlong(".test-dhcp-this-one-here-is-a-very-very-long-hostname.example.com", &shortened);
+ assert_se(r == -EDOM);
+ assert_se(shortened == NULL);
+ }
+
+}
+
+int main(void) {
+ _cleanup_(manager_freep) Manager *manager = NULL;
+ _cleanup_(sd_device_unrefp) sd_device *loopback = NULL;
+ int ifindex, r;
+
+ test_setup_logging(LOG_INFO);
+
+ test_deserialize_in_addr();
+ test_deserialize_dhcp_routes();
+ test_address_equality();
+ test_dhcp_hostname_shorten_overlong();
+
+ assert_se(manager_new(&manager) >= 0);
+
+ r = test_load_config(manager);
+ if (r == -EPERM)
+ return log_tests_skipped("Cannot load configuration");
+ assert_se(r == 0);
+
+ assert_se(sd_device_new_from_syspath(&loopback, "/sys/class/net/lo") >= 0);
+ assert_se(loopback);
+ assert_se(sd_device_get_ifindex(loopback, &ifindex) >= 0);
+ assert_se(ifindex == 1);
+
+ test_network_get(manager, loopback);
+
+ assert_se(manager_enumerate(manager) >= 0);
+ return 0;
+}
diff --git a/src/network/test-networkd-conf.c b/src/network/test-networkd-conf.c
new file mode 100644
index 0000000..c771007
--- /dev/null
+++ b/src/network/test-networkd-conf.c
@@ -0,0 +1,260 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "ether-addr-util.h"
+#include "hexdecoct.h"
+#include "log.h"
+#include "macro.h"
+#include "set.h"
+#include "string-util.h"
+
+#include "network-internal.h"
+#include "networkd-conf.h"
+#include "networkd-network.h"
+
+static void test_config_parse_duid_type_one(const char *rvalue, int ret, DUIDType expected, usec_t expected_time) {
+ DUID actual = {};
+ int r;
+
+ r = config_parse_duid_type("network", "filename", 1, "section", 1, "lvalue", 0, rvalue, &actual, NULL);
+ log_info_errno(r, "\"%s\" → %d (%m)", rvalue, actual.type);
+ assert_se(r == ret);
+ assert_se(expected == actual.type);
+ if (expected == DUID_TYPE_LLT)
+ assert_se(expected_time == actual.llt_time);
+}
+
+static void test_config_parse_duid_type(void) {
+ test_config_parse_duid_type_one("", 0, 0, 0);
+ test_config_parse_duid_type_one("link-layer-time", 0, DUID_TYPE_LLT, 0);
+ test_config_parse_duid_type_one("link-layer-time:2000-01-01 00:00:00 UTC", 0, DUID_TYPE_LLT, (usec_t) 946684800000000);
+ test_config_parse_duid_type_one("vendor", 0, DUID_TYPE_EN, 0);
+ test_config_parse_duid_type_one("vendor:2000-01-01 00:00:00 UTC", 0, 0, 0);
+ test_config_parse_duid_type_one("link-layer", 0, DUID_TYPE_LL, 0);
+ test_config_parse_duid_type_one("link-layer:2000-01-01 00:00:00 UTC", 0, 0, 0);
+ test_config_parse_duid_type_one("uuid", 0, DUID_TYPE_UUID, 0);
+ test_config_parse_duid_type_one("uuid:2000-01-01 00:00:00 UTC", 0, 0, 0);
+ test_config_parse_duid_type_one("foo", 0, 0, 0);
+ test_config_parse_duid_type_one("foo:2000-01-01 00:00:00 UTC", 0, 0, 0);
+}
+
+static void test_config_parse_duid_rawdata_one(const char *rvalue, int ret, const DUID* expected) {
+ DUID actual = {};
+ int r;
+ _cleanup_free_ char *d = NULL;
+
+ r = config_parse_duid_rawdata("network", "filename", 1, "section", 1, "lvalue", 0, rvalue, &actual, NULL);
+ d = hexmem(actual.raw_data, actual.raw_data_len);
+ log_info_errno(r, "\"%s\" → \"%s\" (%m)",
+ rvalue, strnull(d));
+ assert_se(r == ret);
+ if (expected) {
+ assert_se(actual.raw_data_len == expected->raw_data_len);
+ assert_se(memcmp(actual.raw_data, expected->raw_data, expected->raw_data_len) == 0);
+ }
+}
+
+static void test_config_parse_hwaddr_one(const char *rvalue, int ret, const struct ether_addr* expected) {
+ struct ether_addr *actual = NULL;
+ int r;
+
+ r = config_parse_hwaddr("network", "filename", 1, "section", 1, "lvalue", 0, rvalue, &actual, NULL);
+ assert_se(ret == r);
+ if (expected) {
+ assert_se(actual);
+ assert_se(ether_addr_equal(expected, actual));
+ } else
+ assert_se(actual == NULL);
+
+ free(actual);
+}
+
+static void test_config_parse_hwaddrs_one(const char *rvalue, const struct ether_addr* list, size_t n) {
+ _cleanup_set_free_free_ Set *s = NULL;
+ size_t m;
+
+ assert_se(config_parse_hwaddrs("network", "filename", 1, "section", 1, "lvalue", 0, rvalue, &s, NULL) == 0);
+ assert_se(set_size(s) == n);
+
+ for (m = 0; m < n; m++) {
+ _cleanup_free_ struct ether_addr *q = NULL;
+
+ assert_se(q = set_remove(s, &list[m]));
+ }
+
+ assert_se(set_size(s) == 0);
+}
+
+#define BYTES_0_128 "0:1:2:3:4:5:6:7:8:9:a:b:c:d:e:f:10:11:12:13:14:15:16:17:18:19:1a:1b:1c:1d:1e:1f:20:21:22:23:24:25:26:27:28:29:2a:2b:2c:2d:2e:2f:30:31:32:33:34:35:36:37:38:39:3a:3b:3c:3d:3e:3f:40:41:42:43:44:45:46:47:48:49:4a:4b:4c:4d:4e:4f:50:51:52:53:54:55:56:57:58:59:5a:5b:5c:5d:5e:5f:60:61:62:63:64:65:66:67:68:69:6a:6b:6c:6d:6e:6f:70:71:72:73:74:75:76:77:78:79:7a:7b:7c:7d:7e:7f:80"
+
+#define BYTES_1_128 {0x1,0x2,0x3,0x4,0x5,0x6,0x7,0x8,0x9,0xa,0xb,0xc,0xd,0xe,0xf,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,0x80}
+
+static void test_config_parse_duid_rawdata(void) {
+ test_config_parse_duid_rawdata_one("", 0, &(DUID){});
+ test_config_parse_duid_rawdata_one("00:11:22:33:44:55:66:77", 0,
+ &(DUID){0, 8, {0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77}});
+ test_config_parse_duid_rawdata_one("00:11:22:", 0,
+ &(DUID){0, 3, {0x00,0x11,0x22}});
+ test_config_parse_duid_rawdata_one("000:11:22", 0, &(DUID){}); /* error, output is all zeros */
+ test_config_parse_duid_rawdata_one("00:111:22", 0, &(DUID){});
+ test_config_parse_duid_rawdata_one("0:1:2:3:4:5:6:7", 0,
+ &(DUID){0, 8, {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7}});
+ test_config_parse_duid_rawdata_one("11::", 0, &(DUID){0, 1, {0x11}}); /* FIXME: should this be an error? */
+ test_config_parse_duid_rawdata_one("abcdef", 0, &(DUID){});
+ test_config_parse_duid_rawdata_one(BYTES_0_128, 0, &(DUID){});
+ test_config_parse_duid_rawdata_one(&BYTES_0_128[2], 0, &(DUID){0, 128, BYTES_1_128});
+}
+
+static void test_config_parse_hwaddr(void) {
+ const struct ether_addr t[] = {
+ { .ether_addr_octet = { 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff } },
+ { .ether_addr_octet = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab } },
+ };
+
+ test_config_parse_hwaddr_one("", 0, NULL);
+ test_config_parse_hwaddr_one("no:ta:ma:ca:dd:re", 0, NULL);
+ test_config_parse_hwaddr_one("aa:bb:cc:dd:ee:fx", 0, NULL);
+ test_config_parse_hwaddr_one("aa:bb:cc:dd:ee:ff", 0, &t[0]);
+ test_config_parse_hwaddr_one(" aa:bb:cc:dd:ee:ff", 0, &t[0]);
+ test_config_parse_hwaddr_one("aa:bb:cc:dd:ee:ff \t\n", 0, NULL);
+ test_config_parse_hwaddr_one("aa:bb:cc:dd:ee:ff \t\nxxx", 0, NULL);
+ test_config_parse_hwaddr_one("aa:bb:cc: dd:ee:ff", 0, NULL);
+ test_config_parse_hwaddr_one("aa:bb:cc:d d:ee:ff", 0, NULL);
+ test_config_parse_hwaddr_one("aa:bb:cc:dd:ee", 0, NULL);
+ test_config_parse_hwaddr_one("9:aa:bb:cc:dd:ee:ff", 0, NULL);
+ test_config_parse_hwaddr_one("aa:bb:cc:dd:ee:ff:gg", 0, NULL);
+ test_config_parse_hwaddr_one("aa:Bb:CC:dd:ee:ff", 0, &t[0]);
+ test_config_parse_hwaddr_one("01:23:45:67:89:aB", 0, &t[1]);
+ test_config_parse_hwaddr_one("1:23:45:67:89:aB", 0, &t[1]);
+ test_config_parse_hwaddr_one("aa-bb-cc-dd-ee-ff", 0, &t[0]);
+ test_config_parse_hwaddr_one("AA-BB-CC-DD-EE-FF", 0, &t[0]);
+ test_config_parse_hwaddr_one("01-23-45-67-89-ab", 0, &t[1]);
+ test_config_parse_hwaddr_one("aabb.ccdd.eeff", 0, &t[0]);
+ test_config_parse_hwaddr_one("0123.4567.89ab", 0, &t[1]);
+ test_config_parse_hwaddr_one("123.4567.89ab.", 0, NULL);
+ test_config_parse_hwaddr_one("aabbcc.ddeeff", 0, NULL);
+ test_config_parse_hwaddr_one("aabbccddeeff", 0, NULL);
+ test_config_parse_hwaddr_one("aabbccddee:ff", 0, NULL);
+ test_config_parse_hwaddr_one("012345.6789ab", 0, NULL);
+ test_config_parse_hwaddr_one("123.4567.89ab", 0, &t[1]);
+
+ test_config_parse_hwaddrs_one("", t, 0);
+ test_config_parse_hwaddrs_one("no:ta:ma:ca:dd:re", t, 0);
+ test_config_parse_hwaddrs_one("aa:bb:cc:dd:ee:fx", t, 0);
+ test_config_parse_hwaddrs_one("aa:bb:cc:dd:ee:ff", t, 1);
+ test_config_parse_hwaddrs_one(" aa:bb:cc:dd:ee:ff", t, 1);
+ test_config_parse_hwaddrs_one("aa:bb:cc:dd:ee:ff \t\n", t, 1);
+ test_config_parse_hwaddrs_one("aa:bb:cc:dd:ee:ff \t\nxxx", t, 1);
+ test_config_parse_hwaddrs_one("aa:bb:cc: dd:ee:ff", t, 0);
+ test_config_parse_hwaddrs_one("aa:bb:cc:d d:ee:ff", t, 0);
+ test_config_parse_hwaddrs_one("aa:bb:cc:dd:ee", t, 0);
+ test_config_parse_hwaddrs_one("9:aa:bb:cc:dd:ee:ff", t, 0);
+ test_config_parse_hwaddrs_one("aa:bb:cc:dd:ee:ff:gg", t, 0);
+ test_config_parse_hwaddrs_one("aa:Bb:CC:dd:ee:ff", t, 1);
+ test_config_parse_hwaddrs_one("01:23:45:67:89:aB", &t[1], 1);
+ test_config_parse_hwaddrs_one("1:23:45:67:89:aB", &t[1], 1);
+ test_config_parse_hwaddrs_one("aa-bb-cc-dd-ee-ff", t, 1);
+ test_config_parse_hwaddrs_one("AA-BB-CC-DD-EE-FF", t, 1);
+ test_config_parse_hwaddrs_one("01-23-45-67-89-ab", &t[1], 1);
+ test_config_parse_hwaddrs_one("aabb.ccdd.eeff", t, 1);
+ test_config_parse_hwaddrs_one("0123.4567.89ab", &t[1], 1);
+ test_config_parse_hwaddrs_one("123.4567.89ab.", t, 0);
+ test_config_parse_hwaddrs_one("aabbcc.ddeeff", t, 0);
+ test_config_parse_hwaddrs_one("aabbccddeeff", t, 0);
+ test_config_parse_hwaddrs_one("aabbccddee:ff", t, 0);
+ test_config_parse_hwaddrs_one("012345.6789ab", t, 0);
+ test_config_parse_hwaddrs_one("123.4567.89ab", &t[1], 1);
+
+ test_config_parse_hwaddrs_one("123.4567.89ab aa:bb:cc:dd:ee:ff 01-23-45-67-89-ab aa:Bb:CC:dd:ee:ff", t, 2);
+ test_config_parse_hwaddrs_one("123.4567.89ab aa:bb:cc:dd:ee:fx hogehoge 01-23-45-67-89-ab aaaa aa:Bb:CC:dd:ee:ff", t, 2);
+}
+
+static void test_config_parse_address_one(const char *rvalue, int family, unsigned n_addresses, const union in_addr_union *u, unsigned char prefixlen) {
+ _cleanup_(network_unrefp) Network *network = NULL;
+
+ assert_se(network = new0(Network, 1));
+ network->n_ref = 1;
+ assert_se(network->filename = strdup("hogehoge.network"));
+ assert_se(config_parse_match_ifnames("network", "filename", 1, "section", 1, "Name", 0, "*", &network->match_name, network) == 0);
+ assert_se(config_parse_address("network", "filename", 1, "section", 1, "Address", 0, rvalue, network, network) == 0);
+ assert_se(ordered_hashmap_size(network->addresses_by_section) == 1);
+ assert_se(network_verify(network) >= 0);
+ assert_se(ordered_hashmap_size(network->addresses_by_section) == n_addresses);
+ if (n_addresses > 0) {
+ Address *a;
+
+ assert_se(a = ordered_hashmap_first(network->addresses_by_section));
+ assert_se(a->prefixlen == prefixlen);
+ assert_se(a->family == family);
+ assert_se(in_addr_equal(family, &a->in_addr, u));
+ /* TODO: check Address.in_addr and Address.broadcast */
+ }
+}
+
+static void test_config_parse_address(void) {
+ test_config_parse_address_one("", AF_INET, 0, NULL, 0);
+ test_config_parse_address_one("/", AF_INET, 0, NULL, 0);
+ test_config_parse_address_one("/8", AF_INET, 0, NULL, 0);
+ test_config_parse_address_one("1.2.3.4", AF_INET, 1, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 8);
+ test_config_parse_address_one("1.2.3.4/0", AF_INET, 1, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 0);
+ test_config_parse_address_one("1.2.3.4/1", AF_INET, 1, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 1);
+ test_config_parse_address_one("1.2.3.4/2", AF_INET, 1, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 2);
+ test_config_parse_address_one("1.2.3.4/32", AF_INET, 1, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 32);
+ test_config_parse_address_one("1.2.3.4/33", AF_INET, 0, NULL, 0);
+ test_config_parse_address_one("1.2.3.4/-1", AF_INET, 0, NULL, 0);
+
+ test_config_parse_address_one("", AF_INET6, 0, NULL, 0);
+ test_config_parse_address_one("/", AF_INET6, 0, NULL, 0);
+ test_config_parse_address_one("/8", AF_INET6, 0, NULL, 0);
+ test_config_parse_address_one("::1", AF_INET6, 1, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 0);
+ test_config_parse_address_one("::1/0", AF_INET6, 1, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 0);
+ test_config_parse_address_one("::1/1", AF_INET6, 1, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 1);
+ test_config_parse_address_one("::1/2", AF_INET6, 1, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 2);
+ test_config_parse_address_one("::1/32", AF_INET6, 1, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 32);
+ test_config_parse_address_one("::1/33", AF_INET6, 1, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 33);
+ test_config_parse_address_one("::1/64", AF_INET6, 1, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 64);
+ test_config_parse_address_one("::1/128", AF_INET6, 1, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 128);
+ test_config_parse_address_one("::1/129", AF_INET6, 0, NULL, 0);
+ test_config_parse_address_one("::1/-1", AF_INET6, 0, NULL, 0);
+}
+
+static void test_config_parse_match_ifnames(void) {
+ _cleanup_strv_free_ char **names = NULL;
+
+ assert_se(config_parse_match_ifnames("network", "filename", 1, "section", 1, "Name", 0, "!hoge hogehoge foo", &names, NULL) == 0);
+ assert_se(config_parse_match_ifnames("network", "filename", 1, "section", 1, "Name", 0, "!baz", &names, NULL) == 0);
+ assert_se(config_parse_match_ifnames("network", "filename", 1, "section", 1, "Name", 0, "aaa bbb ccc", &names, NULL) == 0);
+
+ assert_se(strv_equal(names, STRV_MAKE("!hoge", "!hogehoge", "!foo", "!baz", "aaa", "bbb", "ccc")));
+}
+
+static void test_config_parse_match_strv(void) {
+ _cleanup_strv_free_ char **names = NULL;
+
+ assert_se(config_parse_match_strv("network", "filename", 1, "section", 1, "Name", 0, "!hoge hogehoge foo", &names, NULL) == 0);
+ assert_se(config_parse_match_strv("network", "filename", 1, "section", 1, "Name", 0, "!baz", &names, NULL) == 0);
+ assert_se(config_parse_match_strv("network", "filename", 1, "section", 1, "Name", 0,
+ "KEY=val \"KEY2=val with space\" \"KEY3=val with \\\"quotation\\\"\"", &names, NULL) == 0);
+
+ assert_se(strv_equal(names,
+ STRV_MAKE("!hoge",
+ "!hogehoge",
+ "!foo",
+ "!baz",
+ "KEY=val",
+ "KEY2=val with space",
+ "KEY3=val with \\quotation\\")));
+}
+
+int main(int argc, char **argv) {
+ log_parse_environment();
+ log_open();
+
+ test_config_parse_duid_type();
+ test_config_parse_duid_rawdata();
+ test_config_parse_hwaddr();
+ test_config_parse_address();
+ test_config_parse_match_ifnames();
+ test_config_parse_match_strv();
+
+ return 0;
+}
diff --git a/src/network/test-routing-policy-rule.c b/src/network/test-routing-policy-rule.c
new file mode 100644
index 0000000..8d87cdf
--- /dev/null
+++ b/src/network/test-routing-policy-rule.c
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "fd-util.h"
+#include "fileio.h"
+#include "networkd-routing-policy-rule.h"
+#include "string-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+
+static void test_rule_serialization(const char *title, const char *ruleset, const char *expected) {
+ char pattern[] = "/tmp/systemd-test-routing-policy-rule.XXXXXX",
+ pattern2[] = "/tmp/systemd-test-routing-policy-rule.XXXXXX",
+ pattern3[] = "/tmp/systemd-test-routing-policy-rule.XXXXXX";
+ const char *cmd;
+ int fd, fd2, fd3;
+ _cleanup_fclose_ FILE *f = NULL, *f2 = NULL, *f3 = NULL;
+ Set *rules = NULL;
+ _cleanup_free_ char *buf = NULL;
+ size_t buf_size;
+
+ log_info("========== %s ==========", title);
+ log_info("put:\n%s\n", ruleset);
+
+ fd = mkostemp_safe(pattern);
+ assert_se(fd >= 0);
+ assert_se(f = fdopen(fd, "a+"));
+ assert_se(write_string_stream(f, ruleset, 0) == 0);
+
+ assert_se(routing_policy_load_rules(pattern, &rules) == 0);
+
+ fd2 = mkostemp_safe(pattern2);
+ assert_se(fd2 >= 0);
+ assert_se(f2 = fdopen(fd2, "a+"));
+
+ assert_se(routing_policy_serialize_rules(rules, f2) == 0);
+ assert_se(fflush_and_check(f2) == 0);
+
+ assert_se(read_full_file(pattern2, &buf, &buf_size) == 0);
+
+ log_info("got:\n%s", buf);
+
+ fd3 = mkostemp_safe(pattern3);
+ assert_se(fd3 >= 0);
+ assert_se(f3 = fdopen(fd3, "w"));
+ assert_se(write_string_stream(f3, expected ?: ruleset, 0) == 0);
+
+ cmd = strjoina("diff -u ", pattern3, " ", pattern2);
+ log_info("$ %s", cmd);
+ assert_se(system(cmd) == 0);
+
+ set_free(rules);
+}
+
+int main(int argc, char **argv) {
+ _cleanup_free_ char *p = NULL;
+
+ test_setup_logging(LOG_DEBUG);
+
+ test_rule_serialization("basic parsing",
+ "RULE=family=AF_INET from=1.2.3.4/32 to=2.3.4.5/32 tos=5 priority=10 fwmark=1/2 invert_rule=yes table=10", NULL);
+
+ test_rule_serialization("ignored values",
+ "RULE=something=to=ignore from=1.2.3.4/32 from=1.2.3.4/32"
+ " \t to=2.3.4.5/24 to=2.3.4.5/32 tos=5 fwmark=2 fwmark=1 table=10 table=20",
+ "RULE=family=AF_INET from=1.2.3.4/32 to=2.3.4.5/32 tos=5 fwmark=1 invert_rule=no table=20");
+
+ test_rule_serialization("ipv6",
+ "RULE=family=AF_INET6 from=1::2/64 to=2::3/64 invert_rule=yes table=6", NULL);
+
+ assert_se(asprintf(&p, "RULE=family=AF_INET6 from=1::2/64 to=2::3/64 invert_rule=no table=%d", RT_TABLE_MAIN) >= 0);
+ test_rule_serialization("default table",
+ "RULE=from=1::2/64 to=2::3/64", p);
+
+ test_rule_serialization("incoming interface",
+ "RULE=from=1::2/64 to=2::3/64 table=1 iif=lo",
+ "RULE=family=AF_INET6 from=1::2/64 to=2::3/64 iif=lo invert_rule=no table=1");
+
+ test_rule_serialization("outgoing interface",
+ "RULE=family=AF_INET6 from=1::2/64 to=2::3/64 oif=eth0 invert_rule=no table=1", NULL);
+
+ test_rule_serialization("freeing interface names",
+ "RULE=from=1::2/64 to=2::3/64 family=AF_INET6 iif=e0 iif=e1 oif=e0 oif=e1 table=1",
+ "RULE=family=AF_INET6 from=1::2/64 to=2::3/64 iif=e1 oif=e1 invert_rule=no table=1");
+
+ test_rule_serialization("ignoring invalid family",
+ "RULE=from=1::2/64 to=2::3/64 family=AF_UNSEPC family=AF_INET table=1",
+ "RULE=family=AF_INET6 from=1::2/64 to=2::3/64 invert_rule=no table=1");
+
+ return 0;
+}
diff --git a/src/network/wait-online/link.c b/src/network/wait-online/link.c
new file mode 100644
index 0000000..529fc9f
--- /dev/null
+++ b/src/network/wait-online/link.c
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-network.h"
+
+#include "alloc-util.h"
+#include "hashmap.h"
+#include "link.h"
+#include "manager.h"
+#include "string-util.h"
+
+int link_new(Manager *m, Link **ret, int ifindex, const char *ifname) {
+ _cleanup_(link_freep) Link *l = NULL;
+ _cleanup_free_ char *n = NULL;
+ int r;
+
+ assert(m);
+ assert(ifindex > 0);
+
+ r = hashmap_ensure_allocated(&m->links, NULL);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&m->links_by_name, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ n = strdup(ifname);
+ if (!n)
+ return -ENOMEM;
+
+ l = new(Link, 1);
+ if (!l)
+ return -ENOMEM;
+
+ *l = (Link) {
+ .manager = m,
+ .ifname = TAKE_PTR(n),
+ .ifindex = ifindex,
+ .required_operstate = LINK_OPERSTATE_RANGE_DEFAULT,
+ };
+
+ r = hashmap_put(m->links_by_name, l->ifname, l);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(m->links, INT_TO_PTR(ifindex), l);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = l;
+
+ TAKE_PTR(l);
+ return 0;
+}
+
+Link *link_free(Link *l) {
+
+ if (!l)
+ return NULL;
+
+ if (l->manager) {
+ hashmap_remove(l->manager->links, INT_TO_PTR(l->ifindex));
+ hashmap_remove(l->manager->links_by_name, l->ifname);
+ }
+
+ free(l->state);
+ free(l->ifname);
+ return mfree(l);
+ }
+
+int link_update_rtnl(Link *l, sd_netlink_message *m) {
+ const char *ifname;
+ int r;
+
+ assert(l);
+ assert(l->manager);
+ assert(m);
+
+ r = sd_rtnl_message_link_get_flags(m, &l->flags);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_read_string(m, IFLA_IFNAME, &ifname);
+ if (r < 0)
+ return r;
+
+ if (!streq(l->ifname, ifname)) {
+ char *new_ifname;
+
+ new_ifname = strdup(ifname);
+ if (!new_ifname)
+ return -ENOMEM;
+
+ assert_se(hashmap_remove(l->manager->links_by_name, l->ifname) == l);
+ free_and_replace(l->ifname, new_ifname);
+
+ r = hashmap_put(l->manager->links_by_name, l->ifname, l);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int link_update_monitor(Link *l) {
+ _cleanup_free_ char *operstate = NULL, *required_operstate = NULL, *state = NULL;
+ int r, ret = 0;
+
+ assert(l);
+ assert(l->ifname);
+
+ r = sd_network_link_get_required_for_online(l->ifindex);
+ if (r < 0)
+ ret = log_link_debug_errno(l, r, "Failed to determine whether the link is required for online or not, "
+ "ignoring: %m");
+ else
+ l->required_for_online = r > 0;
+
+ r = sd_network_link_get_required_operstate_for_online(l->ifindex, &required_operstate);
+ if (r < 0)
+ ret = log_link_debug_errno(l, r, "Failed to get required operational state, ignoring: %m");
+ else if (isempty(required_operstate))
+ l->required_operstate = LINK_OPERSTATE_RANGE_DEFAULT;
+ else {
+ r = parse_operational_state_range(required_operstate, &l->required_operstate);
+ if (r < 0)
+ ret = log_link_debug_errno(l, SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse required operational state, ignoring: %m");
+ }
+
+ r = sd_network_link_get_operational_state(l->ifindex, &operstate);
+ if (r < 0)
+ ret = log_link_debug_errno(l, r, "Failed to get operational state, ignoring: %m");
+ else {
+ LinkOperationalState s;
+
+ s = link_operstate_from_string(operstate);
+ if (s < 0)
+ ret = log_link_debug_errno(l, SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse operational state, ignoring: %m");
+ else
+ l->operational_state = s;
+ }
+
+ r = sd_network_link_get_setup_state(l->ifindex, &state);
+ if (r < 0)
+ ret = log_link_debug_errno(l, r, "Failed to get setup state, ignoring: %m");
+ else
+ free_and_replace(l->state, state);
+
+ return ret;
+}
diff --git a/src/network/wait-online/link.h b/src/network/wait-online/link.h
new file mode 100644
index 0000000..3aa8357
--- /dev/null
+++ b/src/network/wait-online/link.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-netlink.h"
+
+#include "log-link.h"
+#include "network-util.h"
+
+typedef struct Link Link;
+typedef struct Manager Manager;
+
+struct Link {
+ Manager *manager;
+
+ int ifindex;
+ char *ifname;
+ unsigned flags;
+
+ bool required_for_online;
+ LinkOperationalStateRange required_operstate;
+ LinkOperationalState operational_state;
+ char *state;
+};
+
+int link_new(Manager *m, Link **ret, int ifindex, const char *ifname);
+Link *link_free(Link *l);
+int link_update_rtnl(Link *l, sd_netlink_message *m);
+int link_update_monitor(Link *l);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Link*, link_free);
diff --git a/src/network/wait-online/manager.c b/src/network/wait-online/manager.c
new file mode 100644
index 0000000..79994bd
--- /dev/null
+++ b/src/network/wait-online/manager.c
@@ -0,0 +1,369 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/ether.h>
+#include <linux/if.h>
+#include <fnmatch.h>
+
+#include "alloc-util.h"
+#include "link.h"
+#include "manager.h"
+#include "netlink-util.h"
+#include "network-internal.h"
+#include "strv.h"
+#include "time-util.h"
+#include "util.h"
+
+static bool manager_ignore_link(Manager *m, Link *link) {
+ assert(m);
+ assert(link);
+
+ /* always ignore the loopback interface */
+ if (link->flags & IFF_LOOPBACK)
+ return true;
+
+ /* if interfaces are given on the command line, ignore all others */
+ if (m->interfaces && !hashmap_contains(m->interfaces, link->ifname))
+ return true;
+
+ if (!link->required_for_online)
+ return true;
+
+ /* ignore interfaces we explicitly are asked to ignore */
+ return strv_fnmatch(m->ignore, link->ifname);
+}
+
+static int manager_link_is_online(Manager *m, Link *l, LinkOperationalStateRange s) {
+ /* This returns the following:
+ * -EAGAIN: not processed by udev or networkd
+ * 0: operstate is not enough
+ * 1: online */
+
+ if (!l->state)
+ return log_link_debug_errno(l, SYNTHETIC_ERRNO(EAGAIN),
+ "link has not yet been processed by udev");
+
+ if (STR_IN_SET(l->state, "configuring", "pending"))
+ return log_link_debug_errno(l, SYNTHETIC_ERRNO(EAGAIN),
+ "link is being processed by networkd");
+
+ if (s.min < 0)
+ s.min = m->required_operstate.min >= 0 ? m->required_operstate.min
+ : l->required_operstate.min;
+
+ if (s.max < 0)
+ s.max = m->required_operstate.max >= 0 ? m->required_operstate.max
+ : l->required_operstate.max;
+
+ if (l->operational_state < s.min || l->operational_state > s.max) {
+ log_link_debug(l, "Operational state '%s' is not in range ['%s':'%s']",
+ link_operstate_to_string(l->operational_state),
+ link_operstate_to_string(s.min), link_operstate_to_string(s.max));
+ return 0;
+ }
+
+ return 1;
+}
+
+bool manager_configured(Manager *m) {
+ bool one_ready = false;
+ const char *ifname;
+ void *p;
+ Link *l;
+ int r;
+
+ if (!hashmap_isempty(m->interfaces)) {
+ /* wait for all the links given on the command line to appear */
+ HASHMAP_FOREACH_KEY(p, ifname, m->interfaces) {
+ LinkOperationalStateRange *range = p;
+
+ l = hashmap_get(m->links_by_name, ifname);
+ if (!l && range->min == LINK_OPERSTATE_MISSING) {
+ one_ready = true;
+ continue;
+ }
+
+ if (!l) {
+ log_debug("still waiting for %s", ifname);
+ if (!m->any)
+ return false;
+ continue;
+ }
+
+ if (manager_link_is_online(m, l, *range) <= 0) {
+ if (!m->any)
+ return false;
+ continue;
+ }
+
+ one_ready = true;
+ }
+
+ /* all interfaces given by the command line are online, or
+ * one of the specified interfaces is online. */
+ return one_ready;
+ }
+
+ /* wait for all links networkd manages to be in admin state 'configured'
+ * and at least one link to gain a carrier */
+ HASHMAP_FOREACH(l, m->links) {
+ if (manager_ignore_link(m, l)) {
+ log_link_debug(l, "link is ignored");
+ continue;
+ }
+
+ r = manager_link_is_online(m, l,
+ (LinkOperationalStateRange) { _LINK_OPERSTATE_INVALID,
+ _LINK_OPERSTATE_INVALID });
+ if (r < 0 && !m->any)
+ return false;
+ if (r > 0)
+ /* we wait for at least one link to be ready,
+ * regardless of who manages it */
+ one_ready = true;
+ }
+
+ return one_ready;
+}
+
+static int manager_process_link(sd_netlink *rtnl, sd_netlink_message *mm, void *userdata) {
+ Manager *m = userdata;
+ uint16_t type;
+ Link *l;
+ const char *ifname;
+ int ifindex, r;
+
+ assert(rtnl);
+ assert(m);
+ assert(mm);
+
+ r = sd_netlink_message_get_type(mm, &type);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: Could not get message type, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_rtnl_message_link_get_ifindex(mm, &ifindex);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: Could not get ifindex from link, ignoring: %m");
+ return 0;
+ } else if (ifindex <= 0) {
+ log_warning("rtnl: received link message with invalid ifindex %d, ignoring", ifindex);
+ return 0;
+ }
+
+ r = sd_netlink_message_read_string(mm, IFLA_IFNAME, &ifname);
+ if (r < 0) {
+ log_warning_errno(r, "rtnl: Received link message without ifname, ignoring: %m");
+ return 0;
+ }
+
+ l = hashmap_get(m->links, INT_TO_PTR(ifindex));
+
+ switch (type) {
+
+ case RTM_NEWLINK:
+ if (!l) {
+ log_debug("Found link %i", ifindex);
+
+ r = link_new(m, &l, ifindex, ifname);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create link object: %m");
+ }
+
+ r = link_update_rtnl(l, mm);
+ if (r < 0)
+ log_link_warning_errno(l, r, "Failed to process RTNL link message, ignoring: %m");
+
+ r = link_update_monitor(l);
+ if (r < 0 && r != -ENODATA)
+ log_link_warning_errno(l, r, "Failed to update link state, ignoring: %m");
+
+ break;
+
+ case RTM_DELLINK:
+ if (l) {
+ log_link_debug(l, "Removing link");
+ link_free(l);
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+static int on_rtnl_event(sd_netlink *rtnl, sd_netlink_message *mm, void *userdata) {
+ Manager *m = userdata;
+ int r;
+
+ r = manager_process_link(rtnl, mm, m);
+ if (r < 0)
+ return r;
+
+ if (manager_configured(m))
+ sd_event_exit(m->event, 0);
+
+ return 1;
+}
+
+static int manager_rtnl_listen(Manager *m) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ sd_netlink_message *i;
+ int r;
+
+ assert(m);
+
+ /* First, subscribe to interfaces coming and going */
+ r = sd_netlink_open(&m->rtnl);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_attach_event(m->rtnl, m->event, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_NEWLINK, on_rtnl_event, NULL, m, "wait-online-on-NEWLINK");
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_DELLINK, on_rtnl_event, NULL, m, "wait-online-on-DELLINK");
+ if (r < 0)
+ return r;
+
+ /* Then, enumerate all links */
+ r = sd_rtnl_message_new_link(m->rtnl, &req, RTM_GETLINK, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(m->rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ for (i = reply; i; i = sd_netlink_message_next(i)) {
+ r = manager_process_link(m->rtnl, i, m);
+ if (r < 0)
+ return r;
+ }
+
+ return r;
+}
+
+static int on_network_event(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+ Link *l;
+ int r;
+
+ assert(m);
+
+ sd_network_monitor_flush(m->network_monitor);
+
+ HASHMAP_FOREACH(l, m->links) {
+ r = link_update_monitor(l);
+ if (r < 0 && r != -ENODATA)
+ log_link_warning_errno(l, r, "Failed to update link state, ignoring: %m");
+ }
+
+ if (manager_configured(m))
+ sd_event_exit(m->event, 0);
+
+ return 0;
+}
+
+static int manager_network_monitor_listen(Manager *m) {
+ int r, fd, events;
+
+ assert(m);
+
+ r = sd_network_monitor_new(&m->network_monitor, NULL);
+ if (r < 0)
+ return r;
+
+ fd = sd_network_monitor_get_fd(m->network_monitor);
+ if (fd < 0)
+ return fd;
+
+ events = sd_network_monitor_get_events(m->network_monitor);
+ if (events < 0)
+ return events;
+
+ r = sd_event_add_io(m->event, &m->network_monitor_event_source,
+ fd, events, &on_network_event, m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int manager_new(Manager **ret, Hashmap *interfaces, char **ignore,
+ LinkOperationalStateRange required_operstate,
+ bool any, usec_t timeout) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ int r;
+
+ assert(ret);
+
+ m = new(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (Manager) {
+ .interfaces = interfaces,
+ .ignore = ignore,
+ .required_operstate = required_operstate,
+ .any = any,
+ };
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
+ (void) sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
+
+ if (timeout > 0) {
+ usec_t usec;
+
+ usec = now(clock_boottime_or_monotonic()) + timeout;
+
+ r = sd_event_add_time(m->event, NULL, clock_boottime_or_monotonic(), usec, 0, NULL, INT_TO_PTR(-ETIMEDOUT));
+ if (r < 0)
+ return r;
+ }
+
+ sd_event_set_watchdog(m->event, true);
+
+ r = manager_network_monitor_listen(m);
+ if (r < 0)
+ return r;
+
+ r = manager_rtnl_listen(m);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
+
+void manager_free(Manager *m) {
+ if (!m)
+ return;
+
+ hashmap_free_with_destructor(m->links, link_free);
+ hashmap_free(m->links_by_name);
+
+ sd_event_source_unref(m->network_monitor_event_source);
+ sd_network_monitor_unref(m->network_monitor);
+
+ sd_event_source_unref(m->rtnl_event_source);
+ sd_netlink_unref(m->rtnl);
+
+ sd_event_unref(m->event);
+ free(m);
+
+ return;
+}
diff --git a/src/network/wait-online/manager.h b/src/network/wait-online/manager.h
new file mode 100644
index 0000000..f5e8353
--- /dev/null
+++ b/src/network/wait-online/manager.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-event.h"
+#include "sd-netlink.h"
+#include "sd-network.h"
+
+#include "hashmap.h"
+#include "network-util.h"
+#include "time-util.h"
+
+typedef struct Manager Manager;
+typedef struct Link Link;
+
+struct Manager {
+ Hashmap *links;
+ Hashmap *links_by_name;
+
+ /* Do not free the two members below. */
+ Hashmap *interfaces;
+ char **ignore;
+
+ LinkOperationalStateRange required_operstate;
+ bool any;
+
+ sd_netlink *rtnl;
+ sd_event_source *rtnl_event_source;
+
+ sd_network_monitor *network_monitor;
+ sd_event_source *network_monitor_event_source;
+
+ sd_event *event;
+};
+
+void manager_free(Manager *m);
+int manager_new(Manager **ret, Hashmap *interfaces, char **ignore,
+ LinkOperationalStateRange required_operstate,
+ bool any, usec_t timeout);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
+
+bool manager_configured(Manager *m);
diff --git a/src/network/wait-online/wait-online.c b/src/network/wait-online/wait-online.c
new file mode 100644
index 0000000..c2bdcd4
--- /dev/null
+++ b/src/network/wait-online/wait-online.c
@@ -0,0 +1,224 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "sd-daemon.h"
+
+#include "daemon-util.h"
+#include "main-func.h"
+#include "manager.h"
+#include "pretty-print.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "strv.h"
+
+static bool arg_quiet = false;
+static usec_t arg_timeout = 120 * USEC_PER_SEC;
+static Hashmap *arg_interfaces = NULL;
+static char **arg_ignore = NULL;
+static LinkOperationalStateRange arg_required_operstate = { _LINK_OPERSTATE_INVALID, _LINK_OPERSTATE_INVALID };
+static bool arg_any = false;
+
+STATIC_DESTRUCTOR_REGISTER(arg_interfaces, hashmap_free_free_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_ignore, strv_freep);
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-networkd-wait-online.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "Block until network is configured.\n\n"
+ " -h --help Show this help\n"
+ " --version Print version string\n"
+ " -q --quiet Do not show status information\n"
+ " -i --interface=INTERFACE[:MIN_OPERSTATE[:MAX_OPERSTATE]]\n"
+ " Block until at least these interfaces have appeared\n"
+ " --ignore=INTERFACE Don't take these interfaces into account\n"
+ " -o --operational-state=MIN_OPERSTATE[:MAX_OPERSTATE]\n"
+ " Required operational state\n"
+ " --any Wait until at least one of the interfaces is online\n"
+ " --timeout=SECS Maximum time to wait for network connectivity\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_interface_with_operstate_range(const char *str) {
+ _cleanup_free_ char *ifname = NULL;
+ _cleanup_free_ LinkOperationalStateRange *range;
+ const char *p;
+ int r;
+
+ assert(str);
+
+ range = new(LinkOperationalStateRange, 1);
+ if (!range)
+ return log_oom();
+
+ p = strchr(str, ':');
+ if (p) {
+ r = parse_operational_state_range(p + 1, range);
+ if (r < 0)
+ log_error_errno(r, "Invalid operational state range '%s'", p + 1);
+
+ ifname = strndup(optarg, p - optarg);
+ } else {
+ range->min = _LINK_OPERSTATE_INVALID;
+ range->max = _LINK_OPERSTATE_INVALID;
+ ifname = strdup(str);
+ }
+ if (!ifname)
+ return log_oom();
+
+ if (!ifname_valid(ifname))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid interface name '%s'", ifname);
+
+ r = hashmap_ensure_allocated(&arg_interfaces, &string_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = hashmap_put(arg_interfaces, ifname, TAKE_PTR(range));
+ if (r < 0)
+ return log_error_errno(r, "Failed to store interface name: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Interface name %s is already specified", ifname);
+
+ TAKE_PTR(ifname);
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_IGNORE,
+ ARG_ANY,
+ ARG_TIMEOUT,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "quiet", no_argument, NULL, 'q' },
+ { "interface", required_argument, NULL, 'i' },
+ { "ignore", required_argument, NULL, ARG_IGNORE },
+ { "operational-state", required_argument, NULL, 'o' },
+ { "any", no_argument, NULL, ARG_ANY },
+ { "timeout", required_argument, NULL, ARG_TIMEOUT },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hi:qo:", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ help();
+ return 0;
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case ARG_VERSION:
+ return version();
+
+ case 'i':
+ r = parse_interface_with_operstate_range(optarg);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_IGNORE:
+ if (strv_extend(&arg_ignore, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case 'o': {
+ LinkOperationalStateRange range;
+
+ r = parse_operational_state_range(optarg, &range);
+ if (r < 0)
+ return log_error_errno(r, "Invalid operational state range '%s'", optarg);
+
+ arg_required_operstate = range;
+
+ break;
+ }
+ case ARG_ANY:
+ arg_any = true;
+ break;
+
+ case ARG_TIMEOUT:
+ r = parse_sec(optarg, &arg_timeout);
+ if (r < 0)
+ return r;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ _cleanup_(notify_on_cleanup) const char *notify_message = NULL;
+ int r;
+
+ log_setup_service();
+
+ umask(0022);
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (arg_quiet)
+ log_set_max_level(LOG_ERR);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+
+ r = manager_new(&m, arg_interfaces, arg_ignore, arg_required_operstate, arg_any, arg_timeout);
+ if (r < 0)
+ return log_error_errno(r, "Could not create manager: %m");
+
+ if (manager_configured(m))
+ goto success;
+
+ notify_message = notify_start("READY=1\n"
+ "STATUS=Waiting for network connections...",
+ "STATUS=Failed to wait for network connectivity...");
+
+ r = sd_event_loop(m->event);
+ if (r < 0)
+ return log_error_errno(r, "Event loop failed: %m");
+
+success:
+ notify_message = "STATUS=All interfaces configured...";
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/notify/notify.c b/src/notify/notify.c
new file mode 100644
index 0000000..6a506db
--- /dev/null
+++ b/src/notify/notify.c
@@ -0,0 +1,281 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "env-util.h"
+#include "format-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "user-util.h"
+#include "util.h"
+
+static bool arg_ready = false;
+static pid_t arg_pid = 0;
+static const char *arg_status = NULL;
+static bool arg_booted = false;
+static uid_t arg_uid = UID_INVALID;
+static gid_t arg_gid = GID_INVALID;
+static bool arg_no_block = false;
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-notify", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [VARIABLE=VALUE...]\n"
+ "\n%sNotify the init system about service status updates.%s\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --ready Inform the init system about service start-up completion\n"
+ " --pid[=PID] Set main PID of daemon\n"
+ " --uid=USER Set user to send from\n"
+ " --status=TEXT Set status text\n"
+ " --booted Check if the system was booted up with systemd\n"
+ " --no-block Do not wait until operation finished\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight(), ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static pid_t manager_pid(void) {
+ const char *e;
+ pid_t pid;
+ int r;
+
+ /* If we run as a service managed by systemd --user the $MANAGERPID environment variable points to
+ * the service manager's PID. */
+ e = getenv("MANAGERPID");
+ if (!e)
+ return 0;
+
+ r = parse_pid(e, &pid);
+ if (r < 0) {
+ log_warning_errno(r, "$MANAGERPID is set to an invalid PID, ignoring: %s", e);
+ return 0;
+ }
+
+ return pid;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_READY = 0x100,
+ ARG_VERSION,
+ ARG_PID,
+ ARG_STATUS,
+ ARG_BOOTED,
+ ARG_UID,
+ ARG_NO_BLOCK
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "ready", no_argument, NULL, ARG_READY },
+ { "pid", optional_argument, NULL, ARG_PID },
+ { "status", required_argument, NULL, ARG_STATUS },
+ { "booted", no_argument, NULL, ARG_BOOTED },
+ { "uid", required_argument, NULL, ARG_UID },
+ { "no-block", no_argument, NULL, ARG_NO_BLOCK },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0) {
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_READY:
+ arg_ready = true;
+ break;
+
+ case ARG_PID:
+ if (isempty(optarg) || streq(optarg, "auto")) {
+ arg_pid = getppid();
+
+ if (arg_pid <= 1 ||
+ arg_pid == manager_pid()) /* Don't send from PID 1 or the service
+ * manager's PID (which might be distinct from
+ * 1, if we are a --user instance), that'd just
+ * be confusing for the service manager */
+ arg_pid = getpid();
+ } else if (streq(optarg, "parent"))
+ arg_pid = getppid();
+ else if (streq(optarg, "self"))
+ arg_pid = getpid();
+ else {
+ r = parse_pid(optarg, &arg_pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PID %s.", optarg);
+ }
+
+ break;
+
+ case ARG_STATUS:
+ arg_status = optarg;
+ break;
+
+ case ARG_BOOTED:
+ arg_booted = true;
+ break;
+
+ case ARG_UID: {
+ const char *u = optarg;
+
+ r = get_user_creds(&u, &arg_uid, &arg_gid, NULL, NULL, 0);
+ if (r == -ESRCH) /* If the user doesn't exist, then accept it anyway as numeric */
+ r = parse_uid(u, &arg_uid);
+ if (r < 0)
+ return log_error_errno(r, "Can't resolve user %s: %m", optarg);
+
+ break;
+ }
+
+ case ARG_NO_BLOCK:
+ arg_no_block = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+ }
+
+ if (optind >= argc &&
+ !arg_ready &&
+ !arg_status &&
+ !arg_pid &&
+ !arg_booted) {
+ help();
+ return -EINVAL;
+ }
+
+ return 1;
+}
+
+static int run(int argc, char* argv[]) {
+ _cleanup_free_ char *status = NULL, *cpid = NULL, *n = NULL;
+ _cleanup_strv_free_ char **final_env = NULL;
+ char* our_env[4];
+ unsigned i = 0;
+ pid_t source_pid;
+ int r;
+
+ log_show_color(true);
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (arg_booted)
+ return sd_booted() <= 0;
+
+ if (arg_ready)
+ our_env[i++] = (char*) "READY=1";
+
+ if (arg_status) {
+ status = strjoin("STATUS=", arg_status);
+ if (!status)
+ return log_oom();
+
+ our_env[i++] = status;
+ }
+
+ if (arg_pid > 0) {
+ if (asprintf(&cpid, "MAINPID="PID_FMT, arg_pid) < 0)
+ return log_oom();
+
+ our_env[i++] = cpid;
+ }
+
+ our_env[i++] = NULL;
+
+ final_env = strv_env_merge(2, our_env, argv + optind);
+ if (!final_env)
+ return log_oom();
+
+ if (strv_isempty(final_env))
+ return 0;
+
+ n = strv_join(final_env, "\n");
+ if (!n)
+ return log_oom();
+
+ /* If this is requested change to the requested UID/GID. Note that we only change the real UID here, and leave
+ the effective UID in effect (which is 0 for this to work). That's because we want the privileges to fake the
+ ucred data, and sd_pid_notify() uses the real UID for filling in ucred. */
+
+ if (arg_gid != GID_INVALID &&
+ setregid(arg_gid, (gid_t) -1) < 0)
+ return log_error_errno(errno, "Failed to change GID: %m");
+
+ if (arg_uid != UID_INVALID &&
+ setreuid(arg_uid, (uid_t) -1) < 0)
+ return log_error_errno(errno, "Failed to change UID: %m");
+
+ if (arg_pid > 0)
+ source_pid = arg_pid;
+ else {
+ /* Pretend the message originates from our parent, given that we are typically called from a
+ * shell script, i.e. we are not the main process of a service but only a child of it. */
+ source_pid = getppid();
+ if (source_pid <= 1 ||
+ source_pid == manager_pid()) /* safety check: don't claim we'd send anything from PID 1
+ * or the service manager itself */
+ source_pid = 0;
+ }
+ r = sd_pid_notify(source_pid, false, n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to notify init system: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "No status data could be sent: $NOTIFY_SOCKET was not set");
+
+ if (!arg_no_block) {
+ r = sd_notify_barrier(0, 5 * USEC_PER_SEC);
+ if (r < 0)
+ return log_error_errno(r, "Failed to invoke barrier: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "No status data could be sent: $NOTIFY_SOCKET was not set");
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/nspawn/meson.build b/src/nspawn/meson.build
new file mode 100644
index 0000000..539ed56
--- /dev/null
+++ b/src/nspawn/meson.build
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+libnspawn_core_sources = files('''
+ nspawn-cgroup.c
+ nspawn-cgroup.h
+ nspawn-creds.c
+ nspawn-creds.h
+ nspawn-def.h
+ nspawn-expose-ports.c
+ nspawn-expose-ports.h
+ nspawn-mount.c
+ nspawn-mount.h
+ nspawn-network.c
+ nspawn-network.h
+ nspawn-oci.c
+ nspawn-oci.h
+ nspawn-patch-uid.c
+ nspawn-patch-uid.h
+ nspawn-register.c
+ nspawn-register.h
+ nspawn-seccomp.c
+ nspawn-seccomp.h
+ nspawn-settings.c
+ nspawn-settings.h
+ nspawn-setuid.c
+ nspawn-setuid.h
+ nspawn-stub-pid1.c
+ nspawn-stub-pid1.h
+'''.split())
+
+nspawn_gperf_c = custom_target(
+ 'nspawn-gperf.c',
+ input : 'nspawn-gperf.gperf',
+ output : 'nspawn-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+libnspawn_core_sources += [nspawn_gperf_c]
+
+libnspawn_core = static_library(
+ 'nspawn-core',
+ libnspawn_core_sources,
+ include_directories : includes,
+ dependencies : [libacl,
+ libseccomp,
+ libselinux])
+
+systemd_nspawn_sources = files('nspawn.c')
+
+tests += [
+ [['src/nspawn/test-nspawn-tables.c'],
+ [libnspawn_core,
+ libshared],
+ [libseccomp]],
+
+ [['src/nspawn/test-patch-uid.c'],
+ [libnspawn_core,
+ libshared],
+ [libacl],
+ '', 'manual'],
+]
diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c
new file mode 100644
index 0000000..cb01b25
--- /dev/null
+++ b/src/nspawn/nspawn-cgroup.c
@@ -0,0 +1,605 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/mount.h>
+
+#include "alloc-util.h"
+#include "cgroup-setup.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "nspawn-cgroup.h"
+#include "nspawn-mount.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+static int chown_cgroup_path(const char *path, uid_t uid_shift) {
+ _cleanup_close_ int fd = -1;
+ const char *fn;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (fd < 0)
+ return -errno;
+
+ FOREACH_STRING(fn,
+ ".",
+ "cgroup.clone_children",
+ "cgroup.controllers",
+ "cgroup.events",
+ "cgroup.procs",
+ "cgroup.stat",
+ "cgroup.subtree_control",
+ "cgroup.threads",
+ "notify_on_release",
+ "tasks")
+ if (fchownat(fd, fn, uid_shift, uid_shift, 0) < 0)
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to chown \"%s/%s\", ignoring: %m", path, fn);
+
+ return 0;
+}
+
+int chown_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift) {
+ _cleanup_free_ char *path = NULL, *fs = NULL;
+ int r;
+
+ r = cg_pid_get_path(NULL, pid, &path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get container cgroup path: %m");
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get file system path for container cgroup: %m");
+
+ r = chown_cgroup_path(fs, uid_shift);
+ if (r < 0)
+ return log_error_errno(r, "Failed to chown() cgroup %s: %m", fs);
+
+ if (unified_requested == CGROUP_UNIFIED_SYSTEMD || (unified_requested == CGROUP_UNIFIED_NONE && cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0)) {
+ _cleanup_free_ char *lfs = NULL;
+ /* Always propagate access rights from unified to legacy controller */
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, NULL, &lfs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get file system path for container cgroup: %m");
+
+ r = chown_cgroup_path(lfs, uid_shift);
+ if (r < 0)
+ return log_error_errno(r, "Failed to chown() cgroup %s: %m", lfs);
+ }
+
+ return 0;
+}
+
+int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift) {
+ _cleanup_free_ char *cgroup = NULL;
+ char tree[] = "/tmp/unifiedXXXXXX", pid_string[DECIMAL_STR_MAX(pid) + 1];
+ bool undo_mount = false;
+ const char *fn;
+ int r, unified_controller;
+
+ unified_controller = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (unified_controller < 0)
+ return log_error_errno(unified_controller, "Failed to determine whether the systemd hierarchy is unified: %m");
+ if ((unified_controller > 0) == (unified_requested >= CGROUP_UNIFIED_SYSTEMD))
+ return 0;
+
+ /* When the host uses the legacy cgroup setup, but the
+ * container shall use the unified hierarchy, let's make sure
+ * we copy the path from the name=systemd hierarchy into the
+ * unified hierarchy. Similar for the reverse situation. */
+
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get control group of " PID_FMT ": %m", pid);
+
+ /* In order to access the unified hierarchy we need to mount it */
+ if (!mkdtemp(tree))
+ return log_error_errno(errno, "Failed to generate temporary mount point for unified hierarchy: %m");
+
+ if (unified_controller > 0)
+ r = mount_nofollow_verbose(LOG_ERR, "cgroup", tree, "cgroup",
+ MS_NOSUID|MS_NOEXEC|MS_NODEV, "none,name=systemd,xattr");
+ else
+ r = mount_nofollow_verbose(LOG_ERR, "cgroup", tree, "cgroup2",
+ MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+ if (r < 0)
+ goto finish;
+
+ undo_mount = true;
+
+ /* If nspawn dies abruptly the cgroup hierarchy created below
+ * its unit isn't cleaned up. So, let's remove it
+ * https://github.com/systemd/systemd/pull/4223#issuecomment-252519810 */
+ fn = strjoina(tree, cgroup);
+ (void) rm_rf(fn, REMOVE_ROOT|REMOVE_ONLY_DIRECTORIES);
+
+ fn = strjoina(tree, cgroup, "/cgroup.procs");
+
+ sprintf(pid_string, PID_FMT, pid);
+ r = write_string_file(fn, pid_string, WRITE_STRING_FILE_DISABLE_BUFFER|WRITE_STRING_FILE_MKDIR_0755);
+ if (r < 0) {
+ log_error_errno(r, "Failed to move process: %m");
+ goto finish;
+ }
+
+ fn = strjoina(tree, cgroup);
+ r = chown_cgroup_path(fn, uid_shift);
+ if (r < 0)
+ log_error_errno(r, "Failed to chown() cgroup %s: %m", fn);
+finish:
+ if (undo_mount)
+ (void) umount_verbose(LOG_ERR, tree, UMOUNT_NOFOLLOW);
+
+ (void) rmdir(tree);
+ return r;
+}
+
+int create_subcgroup(pid_t pid, bool keep_unit, CGroupUnified unified_requested) {
+ _cleanup_free_ char *cgroup = NULL;
+ CGroupMask supported;
+ const char *payload;
+ int r;
+
+ assert(pid > 1);
+
+ /* In the unified hierarchy inner nodes may only contain subgroups, but not processes. Hence, if we running in
+ * the unified hierarchy and the container does the same, and we did not create a scope unit for the container
+ * move us and the container into two separate subcgroups.
+ *
+ * Moreover, container payloads such as systemd try to manage the cgroup they run in in full (i.e. including
+ * its attributes), while the host systemd will only delegate cgroups for children of the cgroup created for a
+ * delegation unit, instead of the cgroup itself. This means, if we'd pass on the cgroup allocated from the
+ * host systemd directly to the payload, the host and payload systemd might fight for the cgroup
+ * attributes. Hence, let's insert an intermediary cgroup to cover that case too.
+ *
+ * Note that we only bother with the main hierarchy here, not with any secondary ones. On the unified setup
+ * that's fine because there's only one hierarchy anyway and controllers are enabled directly on it. On the
+ * legacy setup, this is fine too, since delegation of controllers is generally not safe there, hence we won't
+ * do it. */
+
+ r = cg_mask_supported(&supported);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine supported controllers: %m");
+
+ if (keep_unit)
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
+ else
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get our control group: %m");
+
+ payload = strjoina(cgroup, "/payload");
+ r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, payload, pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create %s subcgroup: %m", payload);
+
+ if (keep_unit) {
+ const char *supervisor;
+
+ supervisor = strjoina(cgroup, "/supervisor");
+ r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, supervisor, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create %s subcgroup: %m", supervisor);
+ }
+
+ /* Try to enable as many controllers as possible for the new payload. */
+ (void) cg_enable_everywhere(supported, supported, cgroup, NULL);
+ return 0;
+}
+
+/* Retrieve existing subsystems. This function is called in a new cgroup
+ * namespace.
+ */
+static int get_process_controllers(Set **ret) {
+ _cleanup_set_free_ Set *controllers = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(ret);
+
+ f = fopen("/proc/self/cgroup", "re");
+ if (!f)
+ return errno == ENOENT ? -ESRCH : -errno;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *e, *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ l = strchr(line, ':');
+ if (!l)
+ continue;
+
+ l++;
+ e = strchr(l, ':');
+ if (!e)
+ continue;
+
+ *e = 0;
+
+ if (STR_IN_SET(l, "", "name=systemd", "name=unified"))
+ continue;
+
+ r = set_put_strdup(&controllers, l);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(controllers);
+
+ return 0;
+}
+
+static int mount_legacy_cgroup_hierarchy(
+ const char *dest,
+ const char *controller,
+ const char *hierarchy,
+ bool read_only) {
+
+ const char *to, *fstype, *opts;
+ int r;
+
+ to = strjoina(strempty(dest), "/sys/fs/cgroup/", hierarchy);
+
+ r = path_is_mount_point(to, dest, 0);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to);
+ if (r > 0)
+ return 0;
+
+ (void) mkdir_p(to, 0755);
+
+ /* The superblock mount options of the mount point need to be
+ * identical to the hosts', and hence writable... */
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_HYBRID)) {
+ fstype = "cgroup2";
+ opts = NULL;
+ } else if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_LEGACY)) {
+ fstype = "cgroup";
+ opts = "none,name=systemd,xattr";
+ } else {
+ fstype = "cgroup";
+ opts = controller;
+ }
+
+ r = mount_nofollow_verbose(LOG_ERR, "cgroup", to, fstype, MS_NOSUID|MS_NOEXEC|MS_NODEV, opts);
+ if (r < 0)
+ return r;
+
+ /* ... hence let's only make the bind mount read-only, not the superblock. */
+ if (read_only) {
+ r = mount_nofollow_verbose(LOG_ERR, NULL, to, NULL,
+ MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+/* Mount a legacy cgroup hierarchy when cgroup namespaces are supported. */
+static int mount_legacy_cgns_supported(
+ const char *dest,
+ CGroupUnified unified_requested,
+ bool userns,
+ uid_t uid_shift,
+ uid_t uid_range,
+ const char *selinux_apifs_context) {
+
+ _cleanup_set_free_ Set *controllers = NULL;
+ const char *cgroup_root = "/sys/fs/cgroup", *c;
+ int r;
+
+ (void) mkdir_p(cgroup_root, 0755);
+
+ /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
+ r = path_is_mount_point(cgroup_root, dest, AT_SYMLINK_FOLLOW);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
+ if (r == 0) {
+ _cleanup_free_ char *options = NULL;
+
+ /* When cgroup namespaces are enabled and user namespaces are
+ * used then the mount of the cgroupfs is done *inside* the new
+ * user namespace. We're root in the new user namespace and the
+ * kernel will happily translate our uid/gid to the correct
+ * uid/gid as seen from e.g. /proc/1/mountinfo. So we simply
+ * pass uid 0 and not uid_shift to tmpfs_patch_options().
+ */
+ r = tmpfs_patch_options("mode=755" TMPFS_LIMITS_SYS_FS_CGROUP, 0, selinux_apifs_context, &options);
+ if (r < 0)
+ return log_oom();
+
+ r = mount_nofollow_verbose(LOG_ERR, "tmpfs", cgroup_root, "tmpfs",
+ MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options);
+ if (r < 0)
+ return r;
+ }
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ goto skip_controllers;
+
+ r = get_process_controllers(&controllers);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine cgroup controllers: %m");
+
+ for (;;) {
+ _cleanup_free_ const char *controller = NULL;
+
+ controller = set_steal_first(controllers);
+ if (!controller)
+ break;
+
+ r = mount_legacy_cgroup_hierarchy("", controller, controller, !userns);
+ if (r < 0)
+ return r;
+
+ /* When multiple hierarchies are co-mounted, make their
+ * constituting individual hierarchies a symlink to the
+ * co-mount.
+ */
+ c = controller;
+ for (;;) {
+ _cleanup_free_ char *target = NULL, *tok = NULL;
+
+ r = extract_first_word(&c, &tok, ",", 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extract co-mounted cgroup controller: %m");
+ if (r == 0)
+ break;
+
+ if (streq(controller, tok))
+ break;
+
+ target = path_join("/sys/fs/cgroup/", tok);
+ if (!target)
+ return log_oom();
+
+ r = symlink_idempotent(controller, target, false);
+ if (r == -EINVAL)
+ return log_error_errno(r, "Invalid existing symlink for combined hierarchy: %m");
+ if (r < 0)
+ return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m");
+ }
+ }
+
+skip_controllers:
+ if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) {
+ r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false);
+ if (r < 0)
+ return r;
+ }
+
+ r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false);
+ if (r < 0)
+ return r;
+
+ if (!userns)
+ return mount_nofollow_verbose(LOG_ERR, NULL, cgroup_root, NULL,
+ MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
+
+ return 0;
+}
+
+/* Mount legacy cgroup hierarchy when cgroup namespaces are unsupported. */
+static int mount_legacy_cgns_unsupported(
+ const char *dest,
+ CGroupUnified unified_requested,
+ bool userns,
+ uid_t uid_shift,
+ uid_t uid_range,
+ const char *selinux_apifs_context) {
+
+ _cleanup_set_free_free_ Set *controllers = NULL;
+ const char *cgroup_root;
+ int r;
+
+ cgroup_root = prefix_roota(dest, "/sys/fs/cgroup");
+
+ (void) mkdir_p(cgroup_root, 0755);
+
+ /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
+ r = path_is_mount_point(cgroup_root, dest, AT_SYMLINK_FOLLOW);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
+ if (r == 0) {
+ _cleanup_free_ char *options = NULL;
+
+ r = tmpfs_patch_options("mode=755" TMPFS_LIMITS_SYS_FS_CGROUP, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &options);
+ if (r < 0)
+ return log_oom();
+
+ r = mount_nofollow_verbose(LOG_ERR, "tmpfs", cgroup_root, "tmpfs",
+ MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options);
+ if (r < 0)
+ return r;
+ }
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ goto skip_controllers;
+
+ r = cg_kernel_controllers(&controllers);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine cgroup controllers: %m");
+
+ for (;;) {
+ _cleanup_free_ char *controller = NULL, *origin = NULL, *combined = NULL;
+
+ controller = set_steal_first(controllers);
+ if (!controller)
+ break;
+
+ origin = path_join("/sys/fs/cgroup/", controller);
+ if (!origin)
+ return log_oom();
+
+ r = readlink_malloc(origin, &combined);
+ if (r == -EINVAL) {
+ /* Not a symbolic link, but directly a single cgroup hierarchy */
+
+ r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
+ if (r < 0)
+ return r;
+
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to read link %s: %m", origin);
+ else {
+ _cleanup_free_ char *target = NULL;
+
+ target = path_join(dest, origin);
+ if (!target)
+ return log_oom();
+
+ /* A symbolic link, a combination of controllers in one hierarchy */
+
+ if (!filename_is_valid(combined)) {
+ log_warning("Ignoring invalid combined hierarchy %s.", combined);
+ continue;
+ }
+
+ r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
+ if (r < 0)
+ return r;
+
+ r = symlink_idempotent(combined, target, false);
+ if (r == -EINVAL)
+ return log_error_errno(r, "Invalid existing symlink for combined hierarchy: %m");
+ if (r < 0)
+ return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m");
+ }
+ }
+
+skip_controllers:
+ if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) {
+ r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false);
+ if (r < 0)
+ return r;
+ }
+
+ r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false);
+ if (r < 0)
+ return r;
+
+ return mount_nofollow_verbose(LOG_ERR, NULL, cgroup_root, NULL,
+ MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
+}
+
+static int mount_unified_cgroups(const char *dest) {
+ const char *p;
+ int r;
+
+ assert(dest);
+
+ p = prefix_roota(dest, "/sys/fs/cgroup");
+
+ (void) mkdir_p(p, 0755);
+
+ r = path_is_mount_point(p, dest, AT_SYMLINK_FOLLOW);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p);
+ if (r > 0) {
+ p = prefix_roota(dest, "/sys/fs/cgroup/cgroup.procs");
+ if (access(p, F_OK) >= 0)
+ return 0;
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p);
+
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s is already mounted but not a unified cgroup hierarchy. Refusing.", p);
+ }
+
+ return mount_nofollow_verbose(LOG_ERR, "cgroup", p, "cgroup2", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+}
+
+int mount_cgroups(
+ const char *dest,
+ CGroupUnified unified_requested,
+ bool userns,
+ uid_t uid_shift,
+ uid_t uid_range,
+ const char *selinux_apifs_context,
+ bool use_cgns) {
+
+ if (unified_requested >= CGROUP_UNIFIED_ALL)
+ return mount_unified_cgroups(dest);
+ if (use_cgns)
+ return mount_legacy_cgns_supported(dest, unified_requested, userns, uid_shift, uid_range, selinux_apifs_context);
+
+ return mount_legacy_cgns_unsupported(dest, unified_requested, userns, uid_shift, uid_range, selinux_apifs_context);
+}
+
+static int mount_systemd_cgroup_writable_one(const char *root, const char *own) {
+ int r;
+
+ assert(root);
+ assert(own);
+
+ /* Make our own cgroup a (writable) bind mount */
+ r = mount_nofollow_verbose(LOG_ERR, own, own, NULL, MS_BIND, NULL);
+ if (r < 0)
+ return r;
+
+ /* And then remount the systemd cgroup root read-only */
+ return mount_nofollow_verbose(LOG_ERR, NULL, root, NULL,
+ MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL);
+}
+
+int mount_systemd_cgroup_writable(
+ const char *dest,
+ CGroupUnified unified_requested) {
+
+ _cleanup_free_ char *own_cgroup_path = NULL;
+ const char *root, *own;
+ int r;
+
+ assert(dest);
+
+ r = cg_pid_get_path(NULL, 0, &own_cgroup_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine our own cgroup path: %m");
+
+ /* If we are living in the top-level, then there's nothing to do... */
+ if (path_equal(own_cgroup_path, "/"))
+ return 0;
+
+ if (unified_requested >= CGROUP_UNIFIED_ALL) {
+
+ root = prefix_roota(dest, "/sys/fs/cgroup");
+ own = strjoina(root, own_cgroup_path);
+
+ } else {
+
+ if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) {
+ root = prefix_roota(dest, "/sys/fs/cgroup/unified");
+ own = strjoina(root, own_cgroup_path);
+
+ r = mount_systemd_cgroup_writable_one(root, own);
+ if (r < 0)
+ return r;
+ }
+
+ root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
+ own = strjoina(root, own_cgroup_path);
+ }
+
+ return mount_systemd_cgroup_writable_one(root, own);
+}
diff --git a/src/nspawn/nspawn-cgroup.h b/src/nspawn/nspawn-cgroup.h
new file mode 100644
index 0000000..3f5ba62
--- /dev/null
+++ b/src/nspawn/nspawn-cgroup.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "cgroup-util.h"
+
+int chown_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift);
+int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift);
+int create_subcgroup(pid_t pid, bool keep_unit, CGroupUnified unified_requested);
+
+int mount_cgroups(const char *dest, CGroupUnified unified_requested, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context, bool use_cgns);
+int mount_systemd_cgroup_writable(const char *dest, CGroupUnified unified_requested);
diff --git a/src/nspawn/nspawn-creds.c b/src/nspawn/nspawn-creds.c
new file mode 100644
index 0000000..0900d8c
--- /dev/null
+++ b/src/nspawn/nspawn-creds.c
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "nspawn-creds.h"
+
+static void credential_free(Credential *cred) {
+ assert(cred);
+
+ cred->id = mfree(cred->id);
+ cred->data = erase_and_free(cred->data);
+ cred->size = 0;
+}
+
+void credential_free_all(Credential *creds, size_t n) {
+ size_t i;
+
+ assert(creds || n == 0);
+
+ for (i = 0; i < n; i++)
+ credential_free(creds + i);
+
+ free(creds);
+}
diff --git a/src/nspawn/nspawn-creds.h b/src/nspawn/nspawn-creds.h
new file mode 100644
index 0000000..de0661b
--- /dev/null
+++ b/src/nspawn/nspawn-creds.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+typedef struct Credential {
+ char *id;
+ void *data;
+ size_t size;
+} Credential;
+
+void credential_free_all(Credential *creds, size_t n);
diff --git a/src/nspawn/nspawn-def.h b/src/nspawn/nspawn-def.h
new file mode 100644
index 0000000..32a20aa
--- /dev/null
+++ b/src/nspawn/nspawn-def.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+/* While we are chmod()ing a directory tree, we set the top-level UID base to this "busy" base, so that we can always
+ * recognize trees we are were chmod()ing recursively and got interrupted in */
+#define UID_BUSY_BASE ((uid_t) UINT32_C(0xFFFE0000))
+#define UID_BUSY_MASK ((uid_t) UINT32_C(0xFFFF0000))
diff --git a/src/nspawn/nspawn-expose-ports.c b/src/nspawn/nspawn-expose-ports.c
new file mode 100644
index 0000000..d8a37a3
--- /dev/null
+++ b/src/nspawn/nspawn-expose-ports.c
@@ -0,0 +1,231 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "firewall-util.h"
+#include "in-addr-util.h"
+#include "local-addresses.h"
+#include "netlink-util.h"
+#include "nspawn-expose-ports.h"
+#include "parse-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "util.h"
+
+int expose_port_parse(ExposePort **l, const char *s) {
+
+ const char *split, *e;
+ uint16_t container_port, host_port;
+ int protocol;
+ ExposePort *p;
+ int r;
+
+ assert(l);
+ assert(s);
+
+ if ((e = startswith(s, "tcp:")))
+ protocol = IPPROTO_TCP;
+ else if ((e = startswith(s, "udp:")))
+ protocol = IPPROTO_UDP;
+ else {
+ e = s;
+ protocol = IPPROTO_TCP;
+ }
+
+ split = strchr(e, ':');
+ if (split) {
+ char v[split - e + 1];
+
+ memcpy(v, e, split - e);
+ v[split - e] = 0;
+
+ r = parse_ip_port(v, &host_port);
+ if (r < 0)
+ return -EINVAL;
+
+ r = parse_ip_port(split + 1, &container_port);
+ } else {
+ r = parse_ip_port(e, &container_port);
+ host_port = container_port;
+ }
+
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(ports, p, *l)
+ if (p->protocol == protocol && p->host_port == host_port)
+ return -EEXIST;
+
+ p = new(ExposePort, 1);
+ if (!p)
+ return -ENOMEM;
+
+ *p = (ExposePort) {
+ .protocol = protocol,
+ .host_port = host_port,
+ .container_port = container_port,
+ };
+
+ LIST_PREPEND(ports, *l, p);
+
+ return 0;
+}
+
+void expose_port_free_all(ExposePort *p) {
+
+ while (p) {
+ ExposePort *q = p;
+ LIST_REMOVE(ports, p, q);
+ free(q);
+ }
+}
+
+int expose_port_flush(ExposePort* l, union in_addr_union *exposed) {
+ ExposePort *p;
+ int r, af = AF_INET;
+
+ assert(exposed);
+
+ if (!l)
+ return 0;
+
+ if (in_addr_is_null(af, exposed))
+ return 0;
+
+ log_debug("Lost IP address.");
+
+ LIST_FOREACH(ports, p, l) {
+ r = fw_add_local_dnat(false,
+ af,
+ p->protocol,
+ NULL,
+ NULL, 0,
+ NULL, 0,
+ p->host_port,
+ exposed,
+ p->container_port,
+ NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to modify firewall: %m");
+ }
+
+ *exposed = IN_ADDR_NULL;
+ return 0;
+}
+
+int expose_port_execute(sd_netlink *rtnl, ExposePort *l, union in_addr_union *exposed) {
+ _cleanup_free_ struct local_address *addresses = NULL;
+ union in_addr_union new_exposed;
+ ExposePort *p;
+ bool add;
+ int af = AF_INET, r;
+
+ assert(exposed);
+
+ /* Invoked each time an address is added or removed inside the
+ * container */
+
+ if (!l)
+ return 0;
+
+ r = local_addresses(rtnl, 0, af, &addresses);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate local addresses: %m");
+
+ add = r > 0 &&
+ addresses[0].family == af &&
+ addresses[0].scope < RT_SCOPE_LINK;
+
+ if (!add)
+ return expose_port_flush(l, exposed);
+
+ new_exposed = addresses[0].address;
+ if (in_addr_equal(af, exposed, &new_exposed))
+ return 0;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *pretty = NULL;
+ in_addr_to_string(af, &new_exposed, &pretty);
+ log_debug("New container IP is %s.", strna(pretty));
+ }
+
+ LIST_FOREACH(ports, p, l) {
+
+ r = fw_add_local_dnat(true,
+ af,
+ p->protocol,
+ NULL,
+ NULL, 0,
+ NULL, 0,
+ p->host_port,
+ &new_exposed,
+ p->container_port,
+ in_addr_is_null(af, exposed) ? NULL : exposed);
+ if (r < 0)
+ log_warning_errno(r, "Failed to modify firewall: %m");
+ }
+
+ *exposed = new_exposed;
+ return 0;
+}
+
+int expose_port_send_rtnl(int send_fd) {
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(send_fd >= 0);
+
+ fd = socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_ROUTE);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to allocate container netlink: %m");
+
+ /* Store away the fd in the socket, so that it stays open as
+ * long as we run the child */
+ r = send_one_fd(send_fd, fd, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send netlink fd: %m");
+
+ return 0;
+}
+
+int expose_port_watch_rtnl(
+ sd_event *event,
+ int recv_fd,
+ sd_netlink_message_handler_t handler,
+ union in_addr_union *exposed,
+ sd_netlink **ret) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ int fd, r;
+
+ assert(event);
+ assert(recv_fd >= 0);
+ assert(ret);
+
+ fd = receive_one_fd(recv_fd, 0);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to recv netlink fd: %m");
+
+ r = sd_netlink_open_fd(&rtnl, fd);
+ if (r < 0) {
+ safe_close(fd);
+ return log_error_errno(r, "Failed to create rtnl object: %m");
+ }
+
+ r = sd_netlink_add_match(rtnl, NULL, RTM_NEWADDR, handler, NULL, exposed, "nspawn-NEWADDR");
+ if (r < 0)
+ return log_error_errno(r, "Failed to subscribe to RTM_NEWADDR messages: %m");
+
+ r = sd_netlink_add_match(rtnl, NULL, RTM_DELADDR, handler, NULL, exposed, "nspawn-DELADDR");
+ if (r < 0)
+ return log_error_errno(r, "Failed to subscribe to RTM_DELADDR messages: %m");
+
+ r = sd_netlink_attach_event(rtnl, event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add to event loop: %m");
+
+ *ret = TAKE_PTR(rtnl);
+
+ return 0;
+}
diff --git a/src/nspawn/nspawn-expose-ports.h b/src/nspawn/nspawn-expose-ports.h
new file mode 100644
index 0000000..cc834a4
--- /dev/null
+++ b/src/nspawn/nspawn-expose-ports.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+
+#include "sd-event.h"
+#include "sd-netlink.h"
+
+#include "in-addr-util.h"
+#include "list.h"
+
+typedef struct ExposePort {
+ int protocol;
+ uint16_t host_port;
+ uint16_t container_port;
+ LIST_FIELDS(struct ExposePort, ports);
+} ExposePort;
+
+void expose_port_free_all(ExposePort *p);
+int expose_port_parse(ExposePort **l, const char *s);
+
+int expose_port_watch_rtnl(sd_event *event, int recv_fd, sd_netlink_message_handler_t handler, union in_addr_union *exposed, sd_netlink **ret);
+int expose_port_send_rtnl(int send_fd);
+
+int expose_port_execute(sd_netlink *rtnl, ExposePort *l, union in_addr_union *exposed);
+int expose_port_flush(ExposePort* l, union in_addr_union *exposed);
diff --git a/src/nspawn/nspawn-gperf.gperf b/src/nspawn/nspawn-gperf.gperf
new file mode 100644
index 0000000..79304d2
--- /dev/null
+++ b/src/nspawn/nspawn-gperf.gperf
@@ -0,0 +1,77 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include "conf-parser.h"
+#include "nspawn-settings.h"
+#include "nspawn-expose-ports.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name nspawn_gperf_hash
+%define lookup-function-name nspawn_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Exec.Boot, config_parse_boot, 0, 0
+Exec.Ephemeral, config_parse_bool, 0, offsetof(Settings, ephemeral)
+Exec.ProcessTwo, config_parse_pid2, 0, 0
+Exec.Parameters, config_parse_strv, 0, offsetof(Settings, parameters)
+Exec.Environment, config_parse_strv, 0, offsetof(Settings, environment)
+Exec.User, config_parse_string, 0, offsetof(Settings, user)
+Exec.Capability, config_parse_capability, 0, offsetof(Settings, capability)
+Exec.DropCapability, config_parse_capability, 0, offsetof(Settings, drop_capability)
+Exec.KillSignal, config_parse_signal, 0, offsetof(Settings, kill_signal)
+Exec.Personality, config_parse_personality, 0, offsetof(Settings, personality)
+Exec.MachineID, config_parse_id128, 0, offsetof(Settings, machine_id)
+Exec.WorkingDirectory, config_parse_path, 0, offsetof(Settings, working_directory)
+Exec.PivotRoot, config_parse_pivot_root, 0, 0
+Exec.PrivateUsers, config_parse_private_users, 0, 0
+Exec.NotifyReady, config_parse_bool, 0, offsetof(Settings, notify_ready)
+Exec.SystemCallFilter, config_parse_syscall_filter, 0, 0,
+Exec.LimitCPU, config_parse_rlimit, RLIMIT_CPU, offsetof(Settings, rlimit)
+Exec.LimitFSIZE, config_parse_rlimit, RLIMIT_FSIZE, offsetof(Settings, rlimit)
+Exec.LimitDATA, config_parse_rlimit, RLIMIT_DATA, offsetof(Settings, rlimit)
+Exec.LimitSTACK, config_parse_rlimit, RLIMIT_STACK, offsetof(Settings, rlimit)
+Exec.LimitCORE, config_parse_rlimit, RLIMIT_CORE, offsetof(Settings, rlimit)
+Exec.LimitRSS, config_parse_rlimit, RLIMIT_RSS, offsetof(Settings, rlimit)
+Exec.LimitNOFILE, config_parse_rlimit, RLIMIT_NOFILE, offsetof(Settings, rlimit)
+Exec.LimitAS, config_parse_rlimit, RLIMIT_AS, offsetof(Settings, rlimit)
+Exec.LimitNPROC, config_parse_rlimit, RLIMIT_NPROC, offsetof(Settings, rlimit)
+Exec.LimitMEMLOCK, config_parse_rlimit, RLIMIT_MEMLOCK, offsetof(Settings, rlimit)
+Exec.LimitLOCKS, config_parse_rlimit, RLIMIT_LOCKS, offsetof(Settings, rlimit)
+Exec.LimitSIGPENDING, config_parse_rlimit, RLIMIT_SIGPENDING, offsetof(Settings, rlimit)
+Exec.LimitMSGQUEUE, config_parse_rlimit, RLIMIT_MSGQUEUE, offsetof(Settings, rlimit)
+Exec.LimitNICE, config_parse_rlimit, RLIMIT_NICE, offsetof(Settings, rlimit)
+Exec.LimitRTPRIO, config_parse_rlimit, RLIMIT_RTPRIO, offsetof(Settings, rlimit)
+Exec.LimitRTTIME, config_parse_rlimit, RLIMIT_RTTIME, offsetof(Settings, rlimit)
+Exec.Hostname, config_parse_hostname, 0, offsetof(Settings, hostname)
+Exec.NoNewPrivileges, config_parse_tristate, 0, offsetof(Settings, no_new_privileges)
+Exec.OOMScoreAdjust, config_parse_oom_score_adjust, 0, 0
+Exec.CPUAffinity, config_parse_cpu_affinity, 0, 0
+Exec.ResolvConf, config_parse_resolv_conf, 0, offsetof(Settings, resolv_conf)
+Exec.LinkJournal, config_parse_link_journal, 0, 0
+Exec.Timezone, config_parse_timezone, 0, offsetof(Settings, timezone)
+Files.ReadOnly, config_parse_tristate, 0, offsetof(Settings, read_only)
+Files.Volatile, config_parse_volatile_mode, 0, offsetof(Settings, volatile_mode)
+Files.Bind, config_parse_bind, 0, 0
+Files.BindReadOnly, config_parse_bind, 1, 0
+Files.TemporaryFileSystem, config_parse_tmpfs, 0, 0
+Files.Inaccessible, config_parse_inaccessible, 0, 0
+Files.Overlay, config_parse_overlay, 0, 0
+Files.OverlayReadOnly, config_parse_overlay, 1, 0
+Files.PrivateUsersChown, config_parse_tristate, 0, offsetof(Settings, userns_chown)
+Network.Private, config_parse_tristate, 0, offsetof(Settings, private_network)
+Network.Interface, config_parse_strv, 0, offsetof(Settings, network_interfaces)
+Network.MACVLAN, config_parse_strv, 0, offsetof(Settings, network_macvlan)
+Network.IPVLAN, config_parse_strv, 0, offsetof(Settings, network_ipvlan)
+Network.VirtualEthernet, config_parse_tristate, 0, offsetof(Settings, network_veth)
+Network.VirtualEthernetExtra, config_parse_veth_extra, 0, 0
+Network.Bridge, config_parse_ifname, 0, offsetof(Settings, network_bridge)
+Network.Zone, config_parse_network_zone, 0, 0
+Network.Port, config_parse_expose_port, 0, 0
diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c
new file mode 100644
index 0000000..2ea1bed
--- /dev/null
+++ b/src/nspawn/nspawn-mount.c
@@ -0,0 +1,1312 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/mount.h>
+#include <linux/magic.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "label.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "nspawn-mount.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "set.h"
+#include "sort-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+
+CustomMount* custom_mount_add(CustomMount **l, size_t *n, CustomMountType t) {
+ CustomMount *c, *ret;
+
+ assert(l);
+ assert(n);
+ assert(t >= 0);
+ assert(t < _CUSTOM_MOUNT_TYPE_MAX);
+
+ c = reallocarray(*l, *n + 1, sizeof(CustomMount));
+ if (!c)
+ return NULL;
+
+ *l = c;
+ ret = *l + *n;
+ (*n)++;
+
+ *ret = (CustomMount) {
+ .type = t
+ };
+
+ return ret;
+}
+
+void custom_mount_free_all(CustomMount *l, size_t n) {
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ CustomMount *m = l + i;
+
+ free(m->source);
+ free(m->destination);
+ free(m->options);
+
+ if (m->work_dir) {
+ (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
+ free(m->work_dir);
+ }
+
+ if (m->rm_rf_tmpdir) {
+ (void) rm_rf(m->rm_rf_tmpdir, REMOVE_ROOT|REMOVE_PHYSICAL);
+ free(m->rm_rf_tmpdir);
+ }
+
+ strv_free(m->lower);
+ free(m->type_argument);
+ }
+
+ free(l);
+}
+
+static int custom_mount_compare(const CustomMount *a, const CustomMount *b) {
+ int r;
+
+ r = path_compare(a->destination, b->destination);
+ if (r != 0)
+ return r;
+
+ return CMP(a->type, b->type);
+}
+
+static bool source_path_is_valid(const char *p) {
+ assert(p);
+
+ if (*p == '+')
+ p++;
+
+ return path_is_absolute(p);
+}
+
+static char *resolve_source_path(const char *dest, const char *source) {
+
+ if (!source)
+ return NULL;
+
+ if (source[0] == '+')
+ return path_join(dest, source + 1);
+
+ return strdup(source);
+}
+
+static int allocate_temporary_source(CustomMount *m) {
+ assert(m);
+ assert(!m->source);
+ assert(!m->rm_rf_tmpdir);
+
+ m->rm_rf_tmpdir = strdup("/var/tmp/nspawn-temp-XXXXXX");
+ if (!m->rm_rf_tmpdir)
+ return log_oom();
+
+ if (!mkdtemp(m->rm_rf_tmpdir)) {
+ m->rm_rf_tmpdir = mfree(m->rm_rf_tmpdir);
+ return log_error_errno(errno, "Failed to acquire temporary directory: %m");
+ }
+
+ m->source = path_join(m->rm_rf_tmpdir, "src");
+ if (!m->source)
+ return log_oom();
+
+ if (mkdir(m->source, 0755) < 0)
+ return log_error_errno(errno, "Failed to create %s: %m", m->source);
+
+ return 0;
+}
+
+int custom_mount_prepare_all(const char *dest, CustomMount *l, size_t n) {
+ size_t i;
+ int r;
+
+ /* Prepare all custom mounts. This will make source we know all temporary directories. This is called in the
+ * parent process, so that we know the temporary directories to remove on exit before we fork off the
+ * children. */
+
+ assert(l || n == 0);
+
+ /* Order the custom mounts, and make sure we have a working directory */
+ typesafe_qsort(l, n, custom_mount_compare);
+
+ for (i = 0; i < n; i++) {
+ CustomMount *m = l + i;
+
+ /* /proc we mount in the inner child, i.e. when we acquired CLONE_NEWPID. All other mounts we mount
+ * already in the outer child, so that the mounts are already established before CLONE_NEWPID and in
+ * particular CLONE_NEWUSER. This also means any custom mounts below /proc also need to be mounted in
+ * the inner child, not the outer one. Determine this here. */
+ m->in_userns = path_startswith(m->destination, "/proc");
+
+ if (m->type == CUSTOM_MOUNT_BIND) {
+ if (m->source) {
+ char *s;
+
+ s = resolve_source_path(dest, m->source);
+ if (!s)
+ return log_oom();
+
+ free_and_replace(m->source, s);
+ } else {
+ /* No source specified? In that case, use a throw-away temporary directory in /var/tmp */
+
+ r = allocate_temporary_source(m);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (m->type == CUSTOM_MOUNT_OVERLAY) {
+ char **j;
+
+ STRV_FOREACH(j, m->lower) {
+ char *s;
+
+ s = resolve_source_path(dest, *j);
+ if (!s)
+ return log_oom();
+
+ free_and_replace(*j, s);
+ }
+
+ if (m->source) {
+ char *s;
+
+ s = resolve_source_path(dest, m->source);
+ if (!s)
+ return log_oom();
+
+ free_and_replace(m->source, s);
+ } else {
+ r = allocate_temporary_source(m);
+ if (r < 0)
+ return r;
+ }
+
+ if (m->work_dir) {
+ char *s;
+
+ s = resolve_source_path(dest, m->work_dir);
+ if (!s)
+ return log_oom();
+
+ free_and_replace(m->work_dir, s);
+ } else {
+ r = tempfn_random(m->source, NULL, &m->work_dir);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire working directory: %m");
+ }
+
+ (void) mkdir_label(m->work_dir, 0700);
+ }
+ }
+
+ return 0;
+}
+
+int bind_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only) {
+ _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL;
+ const char *p = s;
+ CustomMount *m;
+ int r;
+
+ assert(l);
+ assert(n);
+
+ r = extract_many_words(&p, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+ if (r == 1) {
+ destination = strdup(source[0] == '+' ? source+1 : source);
+ if (!destination)
+ return -ENOMEM;
+ }
+ if (r == 2 && !isempty(p)) {
+ opts = strdup(p);
+ if (!opts)
+ return -ENOMEM;
+ }
+
+ if (isempty(source))
+ source = mfree(source);
+ else if (!source_path_is_valid(source))
+ return -EINVAL;
+
+ if (!path_is_absolute(destination))
+ return -EINVAL;
+
+ m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
+ if (!m)
+ return -ENOMEM;
+
+ m->source = TAKE_PTR(source);
+ m->destination = TAKE_PTR(destination);
+ m->read_only = read_only;
+ m->options = TAKE_PTR(opts);
+
+ return 0;
+}
+
+int tmpfs_mount_parse(CustomMount **l, size_t *n, const char *s) {
+ _cleanup_free_ char *path = NULL, *opts = NULL;
+ const char *p = s;
+ CustomMount *m;
+ int r;
+
+ assert(l);
+ assert(n);
+ assert(s);
+
+ r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ if (isempty(p))
+ opts = strdup("mode=0755");
+ else
+ opts = strdup(p);
+ if (!opts)
+ return -ENOMEM;
+
+ if (!path_is_absolute(path))
+ return -EINVAL;
+
+ m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
+ if (!m)
+ return -ENOMEM;
+
+ m->destination = TAKE_PTR(path);
+ m->options = TAKE_PTR(opts);
+
+ return 0;
+}
+
+int overlay_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only) {
+ _cleanup_free_ char *upper = NULL, *destination = NULL;
+ _cleanup_strv_free_ char **lower = NULL;
+ CustomMount *m;
+ int k;
+
+ k = strv_split_full(&lower, s, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (k < 0)
+ return k;
+ if (k < 2)
+ return -EADDRNOTAVAIL;
+ if (k == 2) {
+ /* If two parameters are specified, the first one is the lower, the second one the upper directory. And
+ * we'll also define the destination mount point the same as the upper. */
+
+ if (!source_path_is_valid(lower[0]) ||
+ !source_path_is_valid(lower[1]))
+ return -EINVAL;
+
+ upper = TAKE_PTR(lower[1]);
+
+ destination = strdup(upper[0] == '+' ? upper+1 : upper); /* take the destination without "+" prefix */
+ if (!destination)
+ return -ENOMEM;
+ } else {
+ char **i;
+
+ /* If more than two parameters are specified, the last one is the destination, the second to last one
+ * the "upper", and all before that the "lower" directories. */
+
+ destination = lower[k - 1];
+ upper = TAKE_PTR(lower[k - 2]);
+
+ STRV_FOREACH(i, lower)
+ if (!source_path_is_valid(*i))
+ return -EINVAL;
+
+ /* If the upper directory is unspecified, then let's create it automatically as a throw-away directory
+ * in /var/tmp */
+ if (isempty(upper))
+ upper = mfree(upper);
+ else if (!source_path_is_valid(upper))
+ return -EINVAL;
+
+ if (!path_is_absolute(destination))
+ return -EINVAL;
+ }
+
+ m = custom_mount_add(l, n, CUSTOM_MOUNT_OVERLAY);
+ if (!m)
+ return -ENOMEM;
+
+ m->destination = TAKE_PTR(destination);
+ m->source = TAKE_PTR(upper);
+ m->lower = TAKE_PTR(lower);
+ m->read_only = read_only;
+
+ return 0;
+}
+
+int inaccessible_mount_parse(CustomMount **l, size_t *n, const char *s) {
+ _cleanup_free_ char *path = NULL;
+ CustomMount *m;
+
+ assert(l);
+ assert(n);
+ assert(s);
+
+ if (!path_is_absolute(s))
+ return -EINVAL;
+
+ path = strdup(s);
+ if (!path)
+ return -ENOMEM;
+
+ m = custom_mount_add(l, n, CUSTOM_MOUNT_INACCESSIBLE);
+ if (!m)
+ return -ENOMEM;
+
+ m->destination = TAKE_PTR(path);
+ return 0;
+}
+
+int tmpfs_patch_options(
+ const char *options,
+ uid_t uid_shift,
+ const char *selinux_apifs_context,
+ char **ret) {
+
+ char *buf = NULL;
+
+ if (uid_shift != UID_INVALID) {
+ if (asprintf(&buf, "%s%suid=" UID_FMT ",gid=" UID_FMT,
+ strempty(options), options ? "," : "",
+ uid_shift, uid_shift) < 0)
+ return -ENOMEM;
+
+ options = buf;
+ }
+
+#if HAVE_SELINUX
+ if (selinux_apifs_context) {
+ char *t;
+
+ t = strjoin(strempty(options), options ? "," : "",
+ "context=\"", selinux_apifs_context, "\"");
+ free(buf);
+ if (!t)
+ return -ENOMEM;
+
+ buf = t;
+ }
+#endif
+
+ if (!buf && options) {
+ buf = strdup(options);
+ if (!buf)
+ return -ENOMEM;
+ }
+ *ret = buf;
+
+ return !!buf;
+}
+
+int mount_sysfs(const char *dest, MountSettingsMask mount_settings) {
+ const char *full, *top, *x;
+ int r;
+ unsigned long extra_flags = 0;
+
+ top = prefix_roota(dest, "/sys");
+ r = path_is_fs_type(top, SYSFS_MAGIC);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top);
+ /* /sys might already be mounted as sysfs by the outer child in the
+ * !netns case. In this case, it's all good. Don't touch it because we
+ * don't have the right to do so, see https://github.com/systemd/systemd/issues/1555.
+ */
+ if (r > 0)
+ return 0;
+
+ full = prefix_roota(top, "/full");
+
+ (void) mkdir(full, 0755);
+
+ if (FLAGS_SET(mount_settings, MOUNT_APPLY_APIVFS_RO))
+ extra_flags |= MS_RDONLY;
+
+ r = mount_nofollow_verbose(LOG_ERR, "sysfs", full, "sysfs",
+ MS_NOSUID|MS_NOEXEC|MS_NODEV|extra_flags, NULL);
+ if (r < 0)
+ return r;
+
+ FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") {
+ _cleanup_free_ char *from = NULL, *to = NULL;
+
+ from = path_join(full, x);
+ if (!from)
+ return log_oom();
+
+ to = path_join(top, x);
+ if (!to)
+ return log_oom();
+
+ (void) mkdir(to, 0755);
+
+ r = mount_nofollow_verbose(LOG_ERR, from, to, NULL, MS_BIND, NULL);
+ if (r < 0)
+ return r;
+
+ r = mount_nofollow_verbose(LOG_ERR, NULL, to, NULL,
+ MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ r = umount_verbose(LOG_ERR, full, UMOUNT_NOFOLLOW);
+ if (r < 0)
+ return r;
+
+ if (rmdir(full) < 0)
+ return log_error_errno(errno, "Failed to remove %s: %m", full);
+
+ /* Create mountpoint for cgroups. Otherwise we are not allowed since we
+ * remount /sys read-only.
+ */
+ x = prefix_roota(top, "/fs/cgroup");
+ (void) mkdir_p(x, 0755);
+
+ return mount_nofollow_verbose(LOG_ERR, NULL, top, NULL,
+ MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
+}
+
+int mount_all(const char *dest,
+ MountSettingsMask mount_settings,
+ uid_t uid_shift,
+ const char *selinux_apifs_context) {
+
+#define PROC_INACCESSIBLE_REG(path) \
+ { "/run/systemd/inaccessible/reg", (path), NULL, NULL, MS_BIND, \
+ MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ \
+ { NULL, (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \
+ MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */
+
+#define PROC_READ_ONLY(path) \
+ { (path), (path), NULL, NULL, MS_BIND, \
+ MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ \
+ { NULL, (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \
+ MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */
+
+ typedef struct MountPoint {
+ const char *what;
+ const char *where;
+ const char *type;
+ const char *options;
+ unsigned long flags;
+ MountSettingsMask mount_settings;
+ } MountPoint;
+
+ static const MountPoint mount_table[] = {
+ /* First we list inner child mounts (i.e. mounts applied *after* entering user namespacing) */
+ { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_MKDIR|MOUNT_FOLLOW_SYMLINKS }, /* we follow symlinks here since not following them requires /proc/ already being mounted, which we don't have here. */
+
+ { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND,
+ MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */
+
+ { "/proc/sys/net", "/proc/sys/net", NULL, NULL, MS_BIND,
+ MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO|MOUNT_APPLY_APIVFS_NETNS }, /* (except for this) */
+
+ { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
+ MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* ... then, make it r/o */
+
+ /* Make these files inaccessible to container payloads: they potentially leak information about kernel
+ * internals or the host's execution environment to the container */
+ PROC_INACCESSIBLE_REG("/proc/kallsyms"),
+ PROC_INACCESSIBLE_REG("/proc/kcore"),
+ PROC_INACCESSIBLE_REG("/proc/keys"),
+ PROC_INACCESSIBLE_REG("/proc/sysrq-trigger"),
+ PROC_INACCESSIBLE_REG("/proc/timer_list"),
+
+ /* Make these directories read-only to container payloads: they show hardware information, and in some
+ * cases contain tunables the container really shouldn't have access to. */
+ PROC_READ_ONLY("/proc/acpi"),
+ PROC_READ_ONLY("/proc/apm"),
+ PROC_READ_ONLY("/proc/asound"),
+ PROC_READ_ONLY("/proc/bus"),
+ PROC_READ_ONLY("/proc/fs"),
+ PROC_READ_ONLY("/proc/irq"),
+ PROC_READ_ONLY("/proc/scsi"),
+
+ { "mqueue", "/dev/mqueue", "mqueue", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ MOUNT_IN_USERNS|MOUNT_MKDIR },
+
+ /* Then we list outer child mounts (i.e. mounts applied *before* entering user namespacing) */
+ { "tmpfs", "/tmp", "tmpfs", "mode=1777" NESTED_TMPFS_LIMITS, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ MOUNT_FATAL|MOUNT_APPLY_TMPFS_TMP|MOUNT_MKDIR },
+ { "tmpfs", "/sys", "tmpfs", "mode=555" TMPFS_LIMITS_SYS, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS|MOUNT_MKDIR },
+ { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ MOUNT_FATAL|MOUNT_APPLY_APIVFS_RO|MOUNT_MKDIR }, /* skipped if above was mounted */
+ { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ MOUNT_FATAL|MOUNT_MKDIR }, /* skipped if above was mounted */
+ { "tmpfs", "/dev", "tmpfs", "mode=755" TMPFS_LIMITS_DEV, MS_NOSUID|MS_STRICTATIME,
+ MOUNT_FATAL|MOUNT_MKDIR },
+ { "tmpfs", "/dev/shm", "tmpfs", "mode=1777" NESTED_TMPFS_LIMITS, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ MOUNT_FATAL|MOUNT_MKDIR },
+ { "tmpfs", "/run", "tmpfs", "mode=755" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ MOUNT_FATAL|MOUNT_MKDIR },
+ { "/run/host", "/run/host", NULL, NULL, MS_BIND,
+ MOUNT_FATAL|MOUNT_MKDIR|MOUNT_PREFIX_ROOT }, /* Prepare this so that we can make it read-only when we are done */
+ { "/etc/os-release", "/run/host/os-release", NULL, NULL, MS_BIND,
+ MOUNT_TOUCH }, /* As per kernel interface requirements, bind mount first (creating mount points) and make read-only later */
+ { "/usr/lib/os-release", "/run/host/os-release", NULL, NULL, MS_BIND,
+ MOUNT_FATAL }, /* If /etc/os-release doesn't exist use the version in /usr/lib as fallback */
+ { NULL, "/run/host/os-release", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
+ MOUNT_FATAL },
+ { NULL, "/run/host", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
+ MOUNT_FATAL|MOUNT_IN_USERNS },
+#if HAVE_SELINUX
+ { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,
+ MOUNT_MKDIR }, /* Bind mount first (mkdir/chown the mount point in case /sys/ is mounted as minimal skeleton tmpfs) */
+ { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
+ 0 }, /* Then, make it r/o (don't mkdir/chown the mount point here, the previous entry already did that) */
+#endif
+ };
+
+ bool use_userns = FLAGS_SET(mount_settings, MOUNT_USE_USERNS);
+ bool netns = FLAGS_SET(mount_settings, MOUNT_APPLY_APIVFS_NETNS);
+ bool ro = FLAGS_SET(mount_settings, MOUNT_APPLY_APIVFS_RO);
+ bool in_userns = FLAGS_SET(mount_settings, MOUNT_IN_USERNS);
+ bool tmpfs_tmp = FLAGS_SET(mount_settings, MOUNT_APPLY_TMPFS_TMP);
+ size_t k;
+ int r;
+
+ for (k = 0; k < ELEMENTSOF(mount_table); k++) {
+ _cleanup_free_ char *where = NULL, *options = NULL, *prefixed = NULL;
+ bool fatal = FLAGS_SET(mount_table[k].mount_settings, MOUNT_FATAL);
+ const char *o;
+
+ if (in_userns != FLAGS_SET(mount_table[k].mount_settings, MOUNT_IN_USERNS))
+ continue;
+
+ if (!netns && FLAGS_SET(mount_table[k].mount_settings, MOUNT_APPLY_APIVFS_NETNS))
+ continue;
+
+ if (!ro && FLAGS_SET(mount_table[k].mount_settings, MOUNT_APPLY_APIVFS_RO))
+ continue;
+
+ if (!tmpfs_tmp && FLAGS_SET(mount_table[k].mount_settings, MOUNT_APPLY_TMPFS_TMP))
+ continue;
+
+ r = chase_symlinks(mount_table[k].where, dest, CHASE_NONEXISTENT|CHASE_PREFIX_ROOT, &where, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, mount_table[k].where);
+
+ /* Skip this entry if it is not a remount. */
+ if (mount_table[k].what) {
+ r = path_is_mount_point(where, NULL, 0);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
+ if (r > 0)
+ continue;
+ }
+
+ if ((mount_table[k].mount_settings & (MOUNT_MKDIR|MOUNT_TOUCH)) != 0) {
+ uid_t u = (use_userns && !in_userns) ? uid_shift : UID_INVALID;
+
+ if (FLAGS_SET(mount_table[k].mount_settings, MOUNT_TOUCH))
+ r = mkdir_parents_safe(dest, where, 0755, u, u, 0);
+ else
+ r = mkdir_p_safe(dest, where, 0755, u, u, 0);
+ if (r < 0 && r != -EEXIST) {
+ if (fatal && r != -EROFS)
+ return log_error_errno(r, "Failed to create directory %s: %m", where);
+
+ log_debug_errno(r, "Failed to create directory %s: %m", where);
+
+ /* If we failed mkdir() or chown() due to the root directory being read only,
+ * attempt to mount this fs anyway and let mount_verbose log any errors */
+ if (r != -EROFS)
+ continue;
+ }
+ }
+
+ if (FLAGS_SET(mount_table[k].mount_settings, MOUNT_TOUCH)) {
+ r = touch(where);
+ if (r < 0 && r != -EEXIST) {
+ if (fatal && r != -EROFS)
+ return log_error_errno(r, "Failed to create file %s: %m", where);
+
+ log_debug_errno(r, "Failed to create file %s: %m", where);
+ if (r != -EROFS)
+ continue;
+ }
+ }
+
+ o = mount_table[k].options;
+ if (streq_ptr(mount_table[k].type, "tmpfs")) {
+ r = tmpfs_patch_options(o, in_userns ? 0 : uid_shift, selinux_apifs_context, &options);
+ if (r < 0)
+ return log_oom();
+ if (r > 0)
+ o = options;
+ }
+
+ if (FLAGS_SET(mount_table[k].mount_settings, MOUNT_PREFIX_ROOT)) {
+ /* Optionally prefix the mount source with the root dir. This is useful in bind
+ * mounts to be created within the container image before we transition into it. Note
+ * that MOUNT_IN_USERNS is run after we transitioned hence prefixing is not ncessary
+ * for those. */
+ r = chase_symlinks(mount_table[k].what, dest, CHASE_PREFIX_ROOT, &prefixed, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, mount_table[k].what);
+ }
+
+ r = mount_verbose_full(
+ fatal ? LOG_ERR : LOG_DEBUG,
+ prefixed ?: mount_table[k].what,
+ where,
+ mount_table[k].type,
+ mount_table[k].flags,
+ o,
+ FLAGS_SET(mount_table[k].mount_settings, MOUNT_FOLLOW_SYMLINKS));
+ if (r < 0 && fatal)
+ return r;
+ }
+
+ return 0;
+}
+
+static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts) {
+ const char *p = options;
+ unsigned long flags = *mount_flags;
+ char *opts = NULL;
+ int r;
+
+ assert(options);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, ",", 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extract mount option: %m");
+ if (r == 0)
+ break;
+
+ if (streq(word, "rbind"))
+ flags |= MS_REC;
+ else if (streq(word, "norbind"))
+ flags &= ~MS_REC;
+ else {
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid bind mount option: %s",
+ word);
+ }
+ }
+
+ *mount_flags = flags;
+ /* in the future mount_opts will hold string options for mount(2) */
+ *mount_opts = opts;
+
+ return 0;
+}
+
+static int mount_bind(const char *dest, CustomMount *m) {
+ _cleanup_free_ char *mount_opts = NULL, *where = NULL;
+ unsigned long mount_flags = MS_BIND | MS_REC;
+ struct stat source_st, dest_st;
+ int r;
+
+ assert(dest);
+ assert(m);
+
+ if (m->options) {
+ r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts);
+ if (r < 0)
+ return r;
+ }
+
+ if (stat(m->source, &source_st) < 0)
+ return log_error_errno(errno, "Failed to stat %s: %m", m->source);
+
+ r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
+ if (r > 0) { /* Path exists already? */
+
+ if (stat(where, &dest_st) < 0)
+ return log_error_errno(errno, "Failed to stat %s: %m", where);
+
+ if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot bind mount directory %s on file %s.",
+ m->source, where);
+
+ if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot bind mount file %s on directory %s.",
+ m->source, where);
+
+ } else { /* Path doesn't exist yet? */
+ r = mkdir_parents_label(where, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make parents of %s: %m", where);
+
+ /* Create the mount point. Any non-directory file can be
+ * mounted on any non-directory file (regular, fifo, socket,
+ * char, block).
+ */
+ if (S_ISDIR(source_st.st_mode))
+ r = mkdir_label(where, 0755);
+ else
+ r = touch(where);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create mount point %s: %m", where);
+ }
+
+ r = mount_nofollow_verbose(LOG_ERR, m->source, where, NULL, mount_flags, mount_opts);
+ if (r < 0)
+ return r;
+
+ if (m->read_only) {
+ r = bind_remount_recursive(where, MS_RDONLY, MS_RDONLY, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Read-only bind mount failed: %m");
+ }
+
+ return 0;
+}
+
+static int mount_tmpfs(const char *dest, CustomMount *m, uid_t uid_shift, const char *selinux_apifs_context) {
+
+ const char *options;
+ _cleanup_free_ char *buf = NULL, *where = NULL;
+ int r;
+
+ assert(dest);
+ assert(m);
+
+ r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
+ if (r == 0) { /* Doesn't exist yet? */
+ r = mkdir_p_label(where, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
+ }
+
+ r = tmpfs_patch_options(m->options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
+ if (r < 0)
+ return log_oom();
+ options = r > 0 ? buf : m->options;
+
+ return mount_nofollow_verbose(LOG_ERR, "tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options);
+}
+
+static char *joined_and_escaped_lower_dirs(char **lower) {
+ _cleanup_strv_free_ char **sv = NULL;
+
+ sv = strv_copy(lower);
+ if (!sv)
+ return NULL;
+
+ strv_reverse(sv);
+
+ if (!strv_shell_escape(sv, ",:"))
+ return NULL;
+
+ return strv_join(sv, ":");
+}
+
+static int mount_overlay(const char *dest, CustomMount *m) {
+ _cleanup_free_ char *lower = NULL, *where = NULL, *escaped_source = NULL;
+ const char *options;
+ int r;
+
+ assert(dest);
+ assert(m);
+
+ r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
+ if (r == 0) { /* Doesn't exist yet? */
+ r = mkdir_label(where, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
+ }
+
+ (void) mkdir_p_label(m->source, 0755);
+
+ lower = joined_and_escaped_lower_dirs(m->lower);
+ if (!lower)
+ return log_oom();
+
+ escaped_source = shell_escape(m->source, ",:");
+ if (!escaped_source)
+ return log_oom();
+
+ if (m->read_only)
+ options = strjoina("lowerdir=", escaped_source, ":", lower);
+ else {
+ _cleanup_free_ char *escaped_work_dir = NULL;
+
+ escaped_work_dir = shell_escape(m->work_dir, ",:");
+ if (!escaped_work_dir)
+ return log_oom();
+
+ options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
+ }
+
+ return mount_nofollow_verbose(LOG_ERR, "overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options);
+}
+
+static int mount_inaccessible(const char *dest, CustomMount *m) {
+ _cleanup_free_ char *where = NULL, *source = NULL;
+ struct stat st;
+ int r;
+
+ assert(dest);
+ assert(m);
+
+ r = chase_symlinks_and_stat(m->destination, dest, CHASE_PREFIX_ROOT, &where, &st, NULL);
+ if (r < 0) {
+ log_full_errno(m->graceful ? LOG_DEBUG : LOG_ERR, r, "Failed to resolve %s/%s: %m", dest, m->destination);
+ return m->graceful ? 0 : r;
+ }
+
+ r = mode_to_inaccessible_node(NULL, st.st_mode, &source);
+ if (r < 0)
+ return m->graceful ? 0 : r;
+
+ r = mount_nofollow_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, source, where, NULL, MS_BIND, NULL);
+ if (r < 0)
+ return m->graceful ? 0 : r;
+
+ r = mount_nofollow_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, NULL, where, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, NULL);
+ if (r < 0) {
+ (void) umount_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, where, UMOUNT_NOFOLLOW);
+ return m->graceful ? 0 : r;
+ }
+
+ return 0;
+}
+
+static int mount_arbitrary(const char *dest, CustomMount *m) {
+ _cleanup_free_ char *where = NULL;
+ int r;
+
+ assert(dest);
+ assert(m);
+
+ r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
+ if (r == 0) { /* Doesn't exist yet? */
+ r = mkdir_p_label(where, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Creating mount point for mount %s failed: %m", where);
+ }
+
+ return mount_nofollow_verbose(LOG_ERR, m->source, where, m->type_argument, 0, m->options);
+}
+
+int mount_custom(
+ const char *dest,
+ CustomMount *mounts, size_t n,
+ uid_t uid_shift,
+ const char *selinux_apifs_context,
+ MountSettingsMask mount_settings) {
+
+ size_t i;
+ int r;
+
+ assert(dest);
+
+ for (i = 0; i < n; i++) {
+ CustomMount *m = mounts + i;
+
+ if (FLAGS_SET(mount_settings, MOUNT_IN_USERNS) != m->in_userns)
+ continue;
+
+ if (FLAGS_SET(mount_settings, MOUNT_ROOT_ONLY) && !path_equal(m->destination, "/"))
+ continue;
+
+ if (FLAGS_SET(mount_settings, MOUNT_NON_ROOT_ONLY) && path_equal(m->destination, "/"))
+ continue;
+
+ switch (m->type) {
+
+ case CUSTOM_MOUNT_BIND:
+ r = mount_bind(dest, m);
+ break;
+
+ case CUSTOM_MOUNT_TMPFS:
+ r = mount_tmpfs(dest, m, uid_shift, selinux_apifs_context);
+ break;
+
+ case CUSTOM_MOUNT_OVERLAY:
+ r = mount_overlay(dest, m);
+ break;
+
+ case CUSTOM_MOUNT_INACCESSIBLE:
+ r = mount_inaccessible(dest, m);
+ break;
+
+ case CUSTOM_MOUNT_ARBITRARY:
+ r = mount_arbitrary(dest, m);
+ break;
+
+ default:
+ assert_not_reached("Unknown custom mount type");
+ }
+
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+bool has_custom_root_mount(const CustomMount *mounts, size_t n) {
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ const CustomMount *m = mounts + i;
+
+ if (path_equal(m->destination, "/"))
+ return true;
+ }
+
+ return false;
+}
+
+static int setup_volatile_state(const char *directory, uid_t uid_shift, const char *selinux_apifs_context) {
+
+ _cleanup_free_ char *buf = NULL;
+ const char *p, *options;
+ int r;
+
+ assert(directory);
+
+ /* --volatile=state means we simply overmount /var with a tmpfs, and the rest read-only. */
+
+ r = bind_remount_recursive(directory, MS_RDONLY, MS_RDONLY, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
+
+ p = prefix_roota(directory, "/var");
+ r = mkdir(p, 0755);
+ if (r < 0 && errno != EEXIST)
+ return log_error_errno(errno, "Failed to create %s: %m", directory);
+
+ options = "mode=755" TMPFS_LIMITS_VOLATILE_STATE;
+ r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
+ if (r < 0)
+ return log_oom();
+ if (r > 0)
+ options = buf;
+
+ return mount_nofollow_verbose(LOG_ERR, "tmpfs", p, "tmpfs", MS_STRICTATIME, options);
+}
+
+static int setup_volatile_yes(const char *directory, uid_t uid_shift, const char *selinux_apifs_context) {
+
+ bool tmpfs_mounted = false, bind_mounted = false;
+ char template[] = "/tmp/nspawn-volatile-XXXXXX";
+ _cleanup_free_ char *buf = NULL, *bindir = NULL;
+ const char *f, *t, *options;
+ struct stat st;
+ int r;
+
+ assert(directory);
+
+ /* --volatile=yes means we mount a tmpfs to the root dir, and the original /usr to use inside it, and
+ * that read-only. Before we start setting this up let's validate if the image has the /usr merge
+ * implemented, and let's output a friendly log message if it hasn't. */
+
+ bindir = path_join(directory, "/bin");
+ if (!bindir)
+ return log_oom();
+ if (lstat(bindir, &st) < 0) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to stat /bin directory below image: %m");
+
+ /* ENOENT is fine, just means the image is probably just a naked /usr and we can create the
+ * rest. */
+ } else if (S_ISDIR(st.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(EISDIR),
+ "Sorry, --volatile=yes mode is not supported with OS images that have not merged /bin/, /sbin/, /lib/, /lib64/ into /usr/. "
+ "Please work with your distribution and help them adopt the merged /usr scheme.");
+ else if (!S_ISLNK(st.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Error starting image: if --volatile=yes is used /bin must be a symlink (for merged /usr support) or non-existent (in which case a symlink is created automatically).");
+
+ if (!mkdtemp(template))
+ return log_error_errno(errno, "Failed to create temporary directory: %m");
+
+ options = "mode=755" TMPFS_LIMITS_ROOTFS;
+ r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
+ if (r < 0)
+ goto fail;
+ if (r > 0)
+ options = buf;
+
+ r = mount_nofollow_verbose(LOG_ERR, "tmpfs", template, "tmpfs", MS_STRICTATIME, options);
+ if (r < 0)
+ goto fail;
+
+ tmpfs_mounted = true;
+
+ f = prefix_roota(directory, "/usr");
+ t = prefix_roota(template, "/usr");
+
+ r = mkdir(t, 0755);
+ if (r < 0 && errno != EEXIST) {
+ r = log_error_errno(errno, "Failed to create %s: %m", t);
+ goto fail;
+ }
+
+ r = mount_nofollow_verbose(LOG_ERR, f, t, NULL, MS_BIND|MS_REC, NULL);
+ if (r < 0)
+ goto fail;
+
+ bind_mounted = true;
+
+ r = bind_remount_recursive(t, MS_RDONLY, MS_RDONLY, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to remount %s read-only: %m", t);
+ goto fail;
+ }
+
+ r = mount_nofollow_verbose(LOG_ERR, template, directory, NULL, MS_MOVE, NULL);
+ if (r < 0)
+ goto fail;
+
+ (void) rmdir(template);
+
+ return 0;
+
+fail:
+ if (bind_mounted)
+ (void) umount_verbose(LOG_ERR, t, UMOUNT_NOFOLLOW);
+
+ if (tmpfs_mounted)
+ (void) umount_verbose(LOG_ERR, template, UMOUNT_NOFOLLOW);
+
+ (void) rmdir(template);
+ return r;
+}
+
+static int setup_volatile_overlay(const char *directory, uid_t uid_shift, const char *selinux_apifs_context) {
+
+ _cleanup_free_ char *buf = NULL, *escaped_directory = NULL, *escaped_upper = NULL, *escaped_work = NULL;
+ char template[] = "/tmp/nspawn-volatile-XXXXXX";
+ const char *upper, *work, *options;
+ bool tmpfs_mounted = false;
+ int r;
+
+ assert(directory);
+
+ /* --volatile=overlay means we mount an overlayfs to the root dir. */
+
+ if (!mkdtemp(template))
+ return log_error_errno(errno, "Failed to create temporary directory: %m");
+
+ options = "mode=755" TMPFS_LIMITS_ROOTFS;
+ r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
+ if (r < 0)
+ goto finish;
+ if (r > 0)
+ options = buf;
+
+ r = mount_nofollow_verbose(LOG_ERR, "tmpfs", template, "tmpfs", MS_STRICTATIME, options);
+ if (r < 0)
+ goto finish;
+
+ tmpfs_mounted = true;
+
+ upper = strjoina(template, "/upper");
+ work = strjoina(template, "/work");
+
+ if (mkdir(upper, 0755) < 0) {
+ r = log_error_errno(errno, "Failed to create %s: %m", upper);
+ goto finish;
+ }
+ if (mkdir(work, 0755) < 0) {
+ r = log_error_errno(errno, "Failed to create %s: %m", work);
+ goto finish;
+ }
+
+ /* And now, let's overmount the root dir with an overlayfs that uses the root dir as lower dir. It's kinda nice
+ * that the kernel allows us to do that without going through some mount point rearrangements. */
+
+ escaped_directory = shell_escape(directory, ",:");
+ escaped_upper = shell_escape(upper, ",:");
+ escaped_work = shell_escape(work, ",:");
+ if (!escaped_directory || !escaped_upper || !escaped_work) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ options = strjoina("lowerdir=", escaped_directory, ",upperdir=", escaped_upper, ",workdir=", escaped_work);
+ r = mount_nofollow_verbose(LOG_ERR, "overlay", directory, "overlay", 0, options);
+
+finish:
+ if (tmpfs_mounted)
+ (void) umount_verbose(LOG_ERR, template, UMOUNT_NOFOLLOW);
+
+ (void) rmdir(template);
+ return r;
+}
+
+int setup_volatile_mode(
+ const char *directory,
+ VolatileMode mode,
+ uid_t uid_shift,
+ const char *selinux_apifs_context) {
+
+ switch (mode) {
+
+ case VOLATILE_YES:
+ return setup_volatile_yes(directory, uid_shift, selinux_apifs_context);
+
+ case VOLATILE_STATE:
+ return setup_volatile_state(directory, uid_shift, selinux_apifs_context);
+
+ case VOLATILE_OVERLAY:
+ return setup_volatile_overlay(directory, uid_shift, selinux_apifs_context);
+
+ default:
+ return 0;
+ }
+}
+
+/* Expects *pivot_root_new and *pivot_root_old to be initialised to allocated memory or NULL. */
+int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s) {
+ _cleanup_free_ char *root_new = NULL, *root_old = NULL;
+ const char *p = s;
+ int r;
+
+ assert(pivot_root_new);
+ assert(pivot_root_old);
+
+ r = extract_first_word(&p, &root_new, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ if (isempty(p))
+ root_old = NULL;
+ else {
+ root_old = strdup(p);
+ if (!root_old)
+ return -ENOMEM;
+ }
+
+ if (!path_is_absolute(root_new))
+ return -EINVAL;
+ if (root_old && !path_is_absolute(root_old))
+ return -EINVAL;
+
+ free_and_replace(*pivot_root_new, root_new);
+ free_and_replace(*pivot_root_old, root_old);
+
+ return 0;
+}
+
+int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old) {
+ _cleanup_free_ char *directory_pivot_root_new = NULL;
+ _cleanup_free_ char *pivot_tmp_pivot_root_old = NULL;
+ char pivot_tmp[] = "/tmp/nspawn-pivot-XXXXXX";
+ bool remove_pivot_tmp = false;
+ int r;
+
+ assert(directory);
+
+ if (!pivot_root_new)
+ return 0;
+
+ /* Pivot pivot_root_new to / and the existing / to pivot_root_old.
+ * If pivot_root_old is NULL, the existing / disappears.
+ * This requires a temporary directory, pivot_tmp, which is
+ * not a child of either.
+ *
+ * This is typically used for OSTree-style containers, where
+ * the root partition contains several sysroots which could be
+ * run. Normally, one would be chosen by the bootloader and
+ * pivoted to / by initramfs.
+ *
+ * For example, for an OSTree deployment, pivot_root_new
+ * would be: /ostree/deploy/$os/deploy/$checksum. Note that this
+ * code doesn’t do the /var mount which OSTree expects: use
+ * --bind +/sysroot/ostree/deploy/$os/var:/var for that.
+ *
+ * So in the OSTree case, we’ll end up with something like:
+ * - directory = /tmp/nspawn-root-123456
+ * - pivot_root_new = /ostree/deploy/os/deploy/123abc
+ * - pivot_root_old = /sysroot
+ * - directory_pivot_root_new =
+ * /tmp/nspawn-root-123456/ostree/deploy/os/deploy/123abc
+ * - pivot_tmp = /tmp/nspawn-pivot-123456
+ * - pivot_tmp_pivot_root_old = /tmp/nspawn-pivot-123456/sysroot
+ *
+ * Requires all file systems at directory and below to be mounted
+ * MS_PRIVATE or MS_SLAVE so they can be moved.
+ */
+ directory_pivot_root_new = path_join(directory, pivot_root_new);
+ if (!directory_pivot_root_new)
+ return log_oom();
+
+ /* Remount directory_pivot_root_new to make it movable. */
+ r = mount_nofollow_verbose(LOG_ERR, directory_pivot_root_new, directory_pivot_root_new, NULL, MS_BIND, NULL);
+ if (r < 0)
+ goto done;
+
+ if (pivot_root_old) {
+ if (!mkdtemp(pivot_tmp)) {
+ r = log_error_errno(errno, "Failed to create temporary directory: %m");
+ goto done;
+ }
+
+ remove_pivot_tmp = true;
+ pivot_tmp_pivot_root_old = path_join(pivot_tmp, pivot_root_old);
+ if (!pivot_tmp_pivot_root_old) {
+ r = log_oom();
+ goto done;
+ }
+
+ r = mount_nofollow_verbose(LOG_ERR, directory_pivot_root_new, pivot_tmp, NULL, MS_MOVE, NULL);
+ if (r < 0)
+ goto done;
+
+ r = mount_nofollow_verbose(LOG_ERR, directory, pivot_tmp_pivot_root_old, NULL, MS_MOVE, NULL);
+ if (r < 0)
+ goto done;
+
+ r = mount_nofollow_verbose(LOG_ERR, pivot_tmp, directory, NULL, MS_MOVE, NULL);
+ if (r < 0)
+ goto done;
+ } else {
+ r = mount_nofollow_verbose(LOG_ERR, directory_pivot_root_new, directory, NULL, MS_MOVE, NULL);
+ if (r < 0)
+ goto done;
+ }
+
+done:
+ if (remove_pivot_tmp)
+ (void) rmdir(pivot_tmp);
+
+ return r;
+}
diff --git a/src/nspawn/nspawn-mount.h b/src/nspawn/nspawn-mount.h
new file mode 100644
index 0000000..e8b75fb
--- /dev/null
+++ b/src/nspawn/nspawn-mount.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "cgroup-util.h"
+#include "volatile-util.h"
+
+typedef enum MountSettingsMask {
+ MOUNT_FATAL = 1 << 0, /* if set, a mount error is considered fatal */
+ MOUNT_USE_USERNS = 1 << 1, /* if set, mounts are patched considering uid/gid shifts in a user namespace */
+ MOUNT_IN_USERNS = 1 << 2, /* if set, the mount is executed in the inner child, otherwise in the outer child */
+ MOUNT_APPLY_APIVFS_RO = 1 << 3, /* if set, /proc/sys, and /sys will be mounted read-only, otherwise read-write. */
+ MOUNT_APPLY_APIVFS_NETNS = 1 << 4, /* if set, /proc/sys/net will be mounted read-write.
+ Works only if MOUNT_APPLY_APIVFS_RO is also set. */
+ MOUNT_APPLY_TMPFS_TMP = 1 << 5, /* if set, /tmp will be mounted as tmpfs */
+ MOUNT_ROOT_ONLY = 1 << 6, /* if set, only root mounts are mounted */
+ MOUNT_NON_ROOT_ONLY = 1 << 7, /* if set, only non-root mounts are mounted */
+ MOUNT_MKDIR = 1 << 8, /* if set, make directory to mount over first */
+ MOUNT_TOUCH = 1 << 9, /* if set, touch file to mount over first */
+ MOUNT_PREFIX_ROOT = 1 << 10,/* if set, prefix the source path with the container's root directory */
+ MOUNT_FOLLOW_SYMLINKS = 1 << 11,/* if set, we'll follow symlinks for the mount target */
+} MountSettingsMask;
+
+typedef enum CustomMountType {
+ CUSTOM_MOUNT_BIND,
+ CUSTOM_MOUNT_TMPFS,
+ CUSTOM_MOUNT_OVERLAY,
+ CUSTOM_MOUNT_INACCESSIBLE,
+ CUSTOM_MOUNT_ARBITRARY,
+ _CUSTOM_MOUNT_TYPE_MAX,
+ _CUSTOM_MOUNT_TYPE_INVALID = -1
+} CustomMountType;
+
+typedef struct CustomMount {
+ CustomMountType type;
+ bool read_only;
+ char *source; /* for overlayfs this is the upper directory */
+ char *destination;
+ char *options;
+ char *work_dir;
+ char **lower;
+ char *rm_rf_tmpdir;
+ char *type_argument; /* only for CUSTOM_MOUNT_ARBITRARY */
+ bool graceful;
+ bool in_userns;
+} CustomMount;
+
+CustomMount* custom_mount_add(CustomMount **l, size_t *n, CustomMountType t);
+void custom_mount_free_all(CustomMount *l, size_t n);
+int custom_mount_prepare_all(const char *dest, CustomMount *l, size_t n);
+
+int bind_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only);
+int tmpfs_mount_parse(CustomMount **l, size_t *n, const char *s);
+int overlay_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only);
+int inaccessible_mount_parse(CustomMount **l, size_t *n, const char *s);
+
+int mount_all(const char *dest, MountSettingsMask mount_settings, uid_t uid_shift, const char *selinux_apifs_context);
+int mount_sysfs(const char *dest, MountSettingsMask mount_settings);
+
+int mount_custom(const char *dest, CustomMount *mounts, size_t n, uid_t uid_shift, const char *selinux_apifs_context, MountSettingsMask mount_settings);
+bool has_custom_root_mount(const CustomMount *mounts, size_t n);
+
+int setup_volatile_mode(const char *directory, VolatileMode mode, uid_t uid_shift, const char *selinux_apifs_context);
+
+int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s);
+int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old);
+
+int tmpfs_patch_options(const char *options,uid_t uid_shift, const char *selinux_apifs_context, char **ret);
diff --git a/src/nspawn/nspawn-network.c b/src/nspawn/nspawn-network.c
new file mode 100644
index 0000000..d6b7d8e
--- /dev/null
+++ b/src/nspawn/nspawn-network.c
@@ -0,0 +1,765 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+#include <linux/if.h>
+#include <linux/veth.h>
+#include <sys/file.h>
+
+#include "sd-device.h"
+#include "sd-id128.h"
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "ether-addr-util.h"
+#include "lockfile-util.h"
+#include "missing_network.h"
+#include "netif-naming-scheme.h"
+#include "netlink-util.h"
+#include "nspawn-network.h"
+#include "parse-util.h"
+#include "siphash24.h"
+#include "socket-netlink.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "udev-util.h"
+#include "util.h"
+
+#define HOST_HASH_KEY SD_ID128_MAKE(1a,37,6f,c7,46,ec,45,0b,ad,a3,d5,31,06,60,5d,b1)
+#define CONTAINER_HASH_KEY SD_ID128_MAKE(c3,c4,f9,19,b5,57,b2,1c,e6,cf,14,27,03,9c,ee,a2)
+#define VETH_EXTRA_HOST_HASH_KEY SD_ID128_MAKE(48,c7,f6,b7,ea,9d,4c,9e,b7,28,d4,de,91,d5,bf,66)
+#define VETH_EXTRA_CONTAINER_HASH_KEY SD_ID128_MAKE(af,50,17,61,ce,f9,4d,35,84,0d,2b,20,54,be,ce,59)
+#define MACVLAN_HASH_KEY SD_ID128_MAKE(00,13,6d,bc,66,83,44,81,bb,0c,f9,51,1f,24,a6,6f)
+#define SHORTEN_IFNAME_HASH_KEY SD_ID128_MAKE(e1,90,a4,04,a8,ef,4b,51,8c,cc,c3,3a,9f,11,fc,a2)
+
+static int remove_one_link(sd_netlink *rtnl, const char *name) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ if (isempty(name))
+ return 0;
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_DELLINK, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate netlink message: %m");
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface name: %m");
+
+ r = sd_netlink_call(rtnl, m, 0, NULL);
+ if (r == -ENODEV) /* Already gone */
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to remove interface %s: %m", name);
+
+ return 1;
+}
+
+static int generate_mac(
+ const char *machine_name,
+ struct ether_addr *mac,
+ sd_id128_t hash_key,
+ uint64_t idx) {
+
+ uint64_t result;
+ size_t l, sz;
+ uint8_t *v, *i;
+ int r;
+
+ l = strlen(machine_name);
+ sz = sizeof(sd_id128_t) + l;
+ if (idx > 0)
+ sz += sizeof(idx);
+
+ v = newa(uint8_t, sz);
+
+ /* fetch some persistent data unique to the host */
+ r = sd_id128_get_machine((sd_id128_t*) v);
+ if (r < 0)
+ return r;
+
+ /* combine with some data unique (on this host) to this
+ * container instance */
+ i = mempcpy(v + sizeof(sd_id128_t), machine_name, l);
+ if (idx > 0) {
+ idx = htole64(idx);
+ memcpy(i, &idx, sizeof(idx));
+ }
+
+ /* Let's hash the host machine ID plus the container name. We
+ * use a fixed, but originally randomly created hash key here. */
+ result = htole64(siphash24(v, sz, hash_key.bytes));
+
+ assert_cc(ETH_ALEN <= sizeof(result));
+ memcpy(mac->ether_addr_octet, &result, ETH_ALEN);
+
+ /* see eth_random_addr in the kernel */
+ mac->ether_addr_octet[0] &= 0xfe; /* clear multicast bit */
+ mac->ether_addr_octet[0] |= 0x02; /* set local assignment bit (IEEE802) */
+
+ return 0;
+}
+
+static int set_alternative_ifname(sd_netlink *rtnl, const char *ifname, const char *altifname) {
+ int r;
+
+ assert(rtnl);
+ assert(ifname);
+
+ if (!altifname)
+ return 0;
+
+ if (strlen(altifname) >= ALTIFNAMSIZ)
+ return log_warning_errno(SYNTHETIC_ERRNO(ERANGE),
+ "Alternative interface name '%s' for '%s' is too long, ignoring",
+ altifname, ifname);
+
+ r = rtnl_set_link_alternative_names_by_ifname(&rtnl, ifname, STRV_MAKE(altifname));
+ if (r < 0)
+ return log_warning_errno(r,
+ "Failed to set alternative interface name '%s' to '%s', ignoring: %m",
+ altifname, ifname);
+
+ return 0;
+}
+
+static int add_veth(
+ sd_netlink *rtnl,
+ pid_t pid,
+ const char *ifname_host,
+ const char *altifname_host,
+ const struct ether_addr *mac_host,
+ const char *ifname_container,
+ const struct ether_addr *mac_container) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(ifname_host);
+ assert(mac_host);
+ assert(ifname_container);
+ assert(mac_container);
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate netlink message: %m");
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, ifname_host);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface name: %m");
+
+ r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, mac_host);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink MAC address: %m");
+
+ r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "veth");
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_open_container(m, VETH_INFO_PEER);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, ifname_container);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface name: %m");
+
+ r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, mac_container);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink MAC address: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink namespace field: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_call(rtnl, m, 0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add new veth interfaces (%s:%s): %m", ifname_host, ifname_container);
+
+ (void) set_alternative_ifname(rtnl, ifname_host, altifname_host);
+
+ return 0;
+}
+
+/* This is almost base64char(), but not entirely, as it uses the "url and filename safe" alphabet, since we
+ * don't want "/" appear in interface names (since interfaces appear in sysfs as filenames). See section #5
+ * of RFC 4648. */
+static char urlsafe_base64char(int x) {
+ static const char table[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789-_";
+ return table[x & 63];
+}
+
+static int shorten_ifname(char *ifname) {
+ char new_ifname[IFNAMSIZ];
+
+ assert(ifname);
+
+ if (strlen(ifname) < IFNAMSIZ) /* Name is short enough */
+ return 0;
+
+ if (naming_scheme_has(NAMING_NSPAWN_LONG_HASH)) {
+ uint64_t h;
+
+ /* Calculate 64bit hash value */
+ h = siphash24(ifname, strlen(ifname), SHORTEN_IFNAME_HASH_KEY.bytes);
+
+ /* Set the final four bytes (i.e. 32bit) to the lower 24bit of the hash, encoded in url-safe base64 */
+ memcpy(new_ifname, ifname, IFNAMSIZ - 5);
+ new_ifname[IFNAMSIZ - 5] = urlsafe_base64char(h >> 18);
+ new_ifname[IFNAMSIZ - 4] = urlsafe_base64char(h >> 12);
+ new_ifname[IFNAMSIZ - 3] = urlsafe_base64char(h >> 6);
+ new_ifname[IFNAMSIZ - 2] = urlsafe_base64char(h);
+ } else
+ /* On old nspawn versions we just truncated the name, provide compatibility */
+ memcpy(new_ifname, ifname, IFNAMSIZ-1);
+
+ new_ifname[IFNAMSIZ - 1] = 0;
+
+ /* Log the incident to make it more discoverable */
+ log_warning("Network interface name '%s' has been changed to '%s' to fit length constraints.", ifname, new_ifname);
+
+ strcpy(ifname, new_ifname);
+ return 1;
+}
+
+int setup_veth(const char *machine_name,
+ pid_t pid,
+ char iface_name[IFNAMSIZ],
+ bool bridge) {
+
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ struct ether_addr mac_host, mac_container;
+ unsigned u;
+ char *n, *a = NULL;
+ int r;
+
+ assert(machine_name);
+ assert(pid > 0);
+ assert(iface_name);
+
+ /* Use two different interface name prefixes depending whether
+ * we are in bridge mode or not. */
+ n = strjoina(bridge ? "vb-" : "ve-", machine_name);
+ r = shorten_ifname(n);
+ if (r > 0)
+ a = strjoina(bridge ? "vb-" : "ve-", machine_name);
+
+ r = generate_mac(machine_name, &mac_container, CONTAINER_HASH_KEY, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate predictable MAC address for container side: %m");
+
+ r = generate_mac(machine_name, &mac_host, HOST_HASH_KEY, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate predictable MAC address for host side: %m");
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ r = add_veth(rtnl, pid, n, a, &mac_host, "host0", &mac_container);
+ if (r < 0)
+ return r;
+
+ u = if_nametoindex(n); /* We don't need to use resolve_ifname() here because the
+ * name we assigned is always the main name. */
+ if (u == 0)
+ return log_error_errno(errno, "Failed to resolve interface %s: %m", n);
+
+ strcpy(iface_name, n);
+ return (int) u;
+}
+
+int setup_veth_extra(
+ const char *machine_name,
+ pid_t pid,
+ char **pairs) {
+
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ uint64_t idx = 0;
+ char **a, **b;
+ int r;
+
+ assert(machine_name);
+ assert(pid > 0);
+
+ if (strv_isempty(pairs))
+ return 0;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ STRV_FOREACH_PAIR(a, b, pairs) {
+ struct ether_addr mac_host, mac_container;
+
+ r = generate_mac(machine_name, &mac_container, VETH_EXTRA_CONTAINER_HASH_KEY, idx);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate predictable MAC address for container side of extra veth link: %m");
+
+ r = generate_mac(machine_name, &mac_host, VETH_EXTRA_HOST_HASH_KEY, idx);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate predictable MAC address for host side of extra veth link: %m");
+
+ r = add_veth(rtnl, pid, *a, NULL, &mac_host, *b, &mac_container);
+ if (r < 0)
+ return r;
+
+ idx++;
+ }
+
+ return 0;
+}
+
+static int join_bridge(sd_netlink *rtnl, const char *veth_name, const char *bridge_name) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r, bridge_ifi;
+
+ assert(rtnl);
+ assert(veth_name);
+ assert(bridge_name);
+
+ bridge_ifi = resolve_interface(&rtnl, bridge_name);
+ if (bridge_ifi < 0)
+ return bridge_ifi;
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_SETLINK, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_link_set_flags(m, IFF_UP, IFF_UP);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, veth_name);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_u32(m, IFLA_MASTER, bridge_ifi);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(rtnl, m, 0, NULL);
+ if (r < 0)
+ return r;
+
+ return bridge_ifi;
+}
+
+static int create_bridge(sd_netlink *rtnl, const char *bridge_name) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, bridge_name);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "bridge");
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(rtnl, m, 0, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int setup_bridge(const char *veth_name, const char *bridge_name, bool create) {
+ _cleanup_(release_lock_file) LockFile bridge_lock = LOCK_FILE_INIT;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ int r, bridge_ifi;
+ unsigned n = 0;
+
+ assert(veth_name);
+ assert(bridge_name);
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ if (create) {
+ /* We take a system-wide lock here, so that we can safely check whether there's still a member in the
+ * bridge before removing it, without risking interference from other nspawn instances. */
+
+ r = make_lock_file("/run/systemd/nspawn-network-zone", LOCK_EX, &bridge_lock);
+ if (r < 0)
+ return log_error_errno(r, "Failed to take network zone lock: %m");
+ }
+
+ for (;;) {
+ bridge_ifi = join_bridge(rtnl, veth_name, bridge_name);
+ if (bridge_ifi >= 0)
+ return bridge_ifi;
+ if (bridge_ifi != -ENODEV || !create || n > 10)
+ return log_error_errno(bridge_ifi, "Failed to add interface %s to bridge %s: %m", veth_name, bridge_name);
+
+ /* Count attempts, so that we don't enter an endless loop here. */
+ n++;
+
+ /* The bridge doesn't exist yet. Let's create it */
+ r = create_bridge(rtnl, bridge_name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bridge interface %s: %m", bridge_name);
+
+ /* Try again, now that the bridge exists */
+ }
+}
+
+int remove_bridge(const char *bridge_name) {
+ _cleanup_(release_lock_file) LockFile bridge_lock = LOCK_FILE_INIT;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ const char *path;
+ int r;
+
+ /* Removes the specified bridge, but only if it is currently empty */
+
+ if (isempty(bridge_name))
+ return 0;
+
+ r = make_lock_file("/run/systemd/nspawn-network-zone", LOCK_EX, &bridge_lock);
+ if (r < 0)
+ return log_error_errno(r, "Failed to take network zone lock: %m");
+
+ path = strjoina("/sys/class/net/", bridge_name, "/brif");
+
+ r = dir_is_empty(path);
+ if (r == -ENOENT) /* Already gone? */
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Can't detect if bridge %s is empty: %m", bridge_name);
+ if (r == 0) /* Still populated, leave it around */
+ return 0;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ return remove_one_link(rtnl, bridge_name);
+}
+
+int test_network_interface_initialized(const char *name) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ int ifi, r;
+ char ifi_str[2 + DECIMAL_STR_MAX(int)];
+
+ if (path_is_read_only_fs("/sys"))
+ return 0;
+
+ /* udev should be around. */
+
+ ifi = resolve_interface_or_warn(NULL, name);
+ if (ifi < 0)
+ return ifi;
+
+ sprintf(ifi_str, "n%i", ifi);
+ r = sd_device_new_from_device_id(&d, ifi_str);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device %s: %m", name);
+
+ r = sd_device_get_is_initialized(d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether interface %s is initialized: %m", name);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EBUSY), "Network interface %s is not initialized yet.", name);
+
+ r = device_is_renaming(d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine the interface %s is being renamed: %m", name);
+ if (r > 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EBUSY), "Interface %s is being renamed.", name);
+
+ return 0;
+}
+
+int move_network_interfaces(int netns_fd, char **ifaces) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ char **i;
+ int r;
+
+ if (strv_isempty(ifaces))
+ return 0;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ STRV_FOREACH(i, ifaces) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int ifi;
+
+ ifi = resolve_interface_or_warn(&rtnl, *i);
+ if (ifi < 0)
+ return ifi;
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_SETLINK, ifi);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate netlink message: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_NET_NS_FD, netns_fd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to append namespace fd to netlink message: %m");
+
+ r = sd_netlink_call(rtnl, m, 0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to move interface %s to namespace: %m", *i);
+ }
+
+ return 0;
+}
+
+int setup_macvlan(const char *machine_name, pid_t pid, char **ifaces) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ unsigned idx = 0;
+ char **i;
+ int r;
+
+ if (strv_isempty(ifaces))
+ return 0;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ STRV_FOREACH(i, ifaces) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ _cleanup_free_ char *n = NULL, *a = NULL;
+ struct ether_addr mac;
+ int ifi;
+
+ ifi = resolve_interface_or_warn(&rtnl, *i);
+ if (ifi < 0)
+ return ifi;
+
+ r = generate_mac(machine_name, &mac, MACVLAN_HASH_KEY, idx++);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create MACVLAN MAC address: %m");
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate netlink message: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_LINK, ifi);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface index: %m");
+
+ n = strjoin("mv-", *i);
+ if (!n)
+ return log_oom();
+
+ r = shorten_ifname(n);
+ if (r > 0) {
+ a = strjoin("mv-", *i);
+ if (!a)
+ return log_oom();
+ }
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface name: %m");
+
+ r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, &mac);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink MAC address: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink namespace field: %m");
+
+ r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "macvlan");
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_MACVLAN_MODE, MACVLAN_MODE_BRIDGE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to append macvlan mode: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_call(rtnl, m, 0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add new macvlan interfaces: %m");
+
+ (void) set_alternative_ifname(rtnl, n, a);
+ }
+
+ return 0;
+}
+
+int setup_ipvlan(const char *machine_name, pid_t pid, char **ifaces) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ char **i;
+ int r;
+
+ if (strv_isempty(ifaces))
+ return 0;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ STRV_FOREACH(i, ifaces) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ _cleanup_free_ char *n = NULL, *a = NULL;
+ int ifi;
+
+ ifi = resolve_interface_or_warn(&rtnl, *i);
+ if (ifi < 0)
+ return ifi;
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate netlink message: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_LINK, ifi);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface index: %m");
+
+ n = strjoin("iv-", *i);
+ if (!n)
+ return log_oom();
+
+ r = shorten_ifname(n);
+ if (r > 0) {
+ a = strjoin("iv-", *i);
+ if (!a)
+ return log_oom();
+ }
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface name: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink namespace field: %m");
+
+ r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "ipvlan");
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_append_u16(m, IFLA_IPVLAN_MODE, IPVLAN_MODE_L2);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add ipvlan mode: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_call(rtnl, m, 0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add new ipvlan interfaces: %m");
+
+ (void) set_alternative_ifname(rtnl, n, a);
+ }
+
+ return 0;
+}
+
+int veth_extra_parse(char ***l, const char *p) {
+ _cleanup_free_ char *a = NULL, *b = NULL;
+ int r;
+
+ r = extract_first_word(&p, &a, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0 || !ifname_valid(a))
+ return -EINVAL;
+
+ r = extract_first_word(&p, &b, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0 || !ifname_valid(b)) {
+ free(b);
+ b = strdup(a);
+ if (!b)
+ return -ENOMEM;
+ }
+
+ if (p)
+ return -EINVAL;
+
+ r = strv_push_pair(l, a, b);
+ if (r < 0)
+ return -ENOMEM;
+
+ a = b = NULL;
+ return 0;
+}
+
+int remove_veth_links(const char *primary, char **pairs) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ char **a, **b;
+ int r;
+
+ /* In some cases the kernel might pin the veth links between host and container even after the namespace
+ * died. Hence, let's better remove them explicitly too. */
+
+ if (isempty(primary) && strv_isempty(pairs))
+ return 0;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ remove_one_link(rtnl, primary);
+
+ STRV_FOREACH_PAIR(a, b, pairs)
+ remove_one_link(rtnl, *a);
+
+ return 0;
+}
diff --git a/src/nspawn/nspawn-network.h b/src/nspawn/nspawn-network.h
new file mode 100644
index 0000000..5c2d983
--- /dev/null
+++ b/src/nspawn/nspawn-network.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <net/if.h>
+#include <stdbool.h>
+#include <sys/types.h>
+
+int test_network_interface_initialized(const char *name);
+
+int setup_veth(const char *machine_name, pid_t pid, char iface_name[IFNAMSIZ], bool bridge);
+int setup_veth_extra(const char *machine_name, pid_t pid, char **pairs);
+
+int setup_bridge(const char *veth_name, const char *bridge_name, bool create);
+int remove_bridge(const char *bridge_name);
+
+int setup_macvlan(const char *machine_name, pid_t pid, char **ifaces);
+int setup_ipvlan(const char *machine_name, pid_t pid, char **ifaces);
+
+int move_network_interfaces(int netns_fd, char **ifaces);
+
+int veth_extra_parse(char ***l, const char *p);
+
+int remove_veth_links(const char *primary, char **pairs);
diff --git a/src/nspawn/nspawn-oci.c b/src/nspawn/nspawn-oci.c
new file mode 100644
index 0000000..ca708be
--- /dev/null
+++ b/src/nspawn/nspawn-oci.c
@@ -0,0 +1,2258 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/oom.h>
+#if HAVE_SECCOMP
+#include <seccomp.h>
+#endif
+
+#include "bus-util.h"
+#include "cap-list.h"
+#include "cpu-set-util.h"
+#include "env-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "json.h"
+#include "missing_sched.h"
+#include "nspawn-oci.h"
+#include "path-util.h"
+#include "rlimit-util.h"
+#if HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+/* TODO:
+ * OCI runtime tool implementation
+ * hooks
+ *
+ * Spec issues:
+ *
+ * How is RLIM_INFINITY supposed to be encoded?
+ * configured effective caps is bullshit, as execv() corrupts it anyway
+ * pipes bind mounted is *very* different from pipes newly created, comments regarding bind mount or not are bogus
+ * annotation values structured? or string?
+ * configurable file system namespace path, but then also root path? wtf?
+ * apply sysctl inside of the container? or outside?
+ * how is unlimited pids tasks limit to be encoded?
+ * what are the defaults for caps if not specified?
+ * what are the default uid/gid mappings if one is missing but the other set, or when user ns is on but no namespace configured
+ * the source field of "mounts" is really weird, as it cannot realistically be relative to the bundle, since we never know if that's what the fs wants
+ * spec contradicts itself on the mount "type" field, as the example uses "bind" as type, but it's not listed in /proc/filesystem, and is something made up by /bin/mount
+ * if type of mount is left out, what shall be assumed? "bind"?
+ * readonly mounts is entirely redundant?
+ * should escaping be applied when joining mount options with ","?
+ * devices cgroup support is bogus, "allow" and "deny" on the kernel level is about adding/removing entries, not about access
+ * spec needs to say that "rwm" devices cgroup combination can't be the empty string
+ * cgrouspv1 crap: kernel, kernelTCP, swapiness, disableOOMKiller, swap, devices, leafWeight
+ * general: it shouldn't leak lower level abstractions this obviously
+ * unmanagable cgroups stuff: realtimeRuntime/realtimePeriod
+ * needs to say what happense when some option is not specified, i.e. which defaults apply
+ * no architecture? no personality?
+ * seccomp example and logic is simply broken: there's no constant "SCMP_ACT_ERRNO".
+ * spec should say what to do with unknown props
+ * /bin/mount regarding NFS and FUSE required?
+ * what does terminal=false mean?
+ * sysctl inside or outside? allow-listing?
+ * swapiness typo -> swappiness
+ *
+ * Unsupported:
+ *
+ * apparmorProfile
+ * selinuxLabel + mountLabel
+ * hugepageLimits
+ * network
+ * rdma
+ * intelRdt
+ * swappiness, disableOOMKiller, kernel, kernelTCP, leafWeight (because it's dead, cgroupsv2 can't do it and hence systemd neither)
+ *
+ * Non-slice cgroup paths
+ * Propagation that is not slave + shared
+ * more than one uid/gid mapping, mappings with a container base != 0, or non-matching uid/gid mappings
+ * device cgroups access = false items that are not catchall
+ * device cgroups matches where minor is specified, but major isn't. similar where major is specified but char/block is not. also, any match that only has a type set that has less than "rwm" set. also, any entry that has none of rwm set.
+ *
+ */
+
+static int oci_unexpected(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected OCI element '%s' of type '%s'.", name, json_variant_type_to_string(json_variant_type(v)));
+}
+
+static int oci_unsupported(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ return json_log(v, flags, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Unsupported OCI element '%s' of type '%s'.", name, json_variant_type_to_string(json_variant_type(v)));
+}
+
+static int oci_terminal(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ Settings *s = userdata;
+
+ /* If not specified, or set to true, we'll default to either an interactive or a read-only
+ * console. If specified as false, we'll forcibly move to "pipe" mode though. */
+ s->console_mode = json_variant_boolean(v) ? _CONSOLE_MODE_INVALID : CONSOLE_PIPE;
+ return 0;
+}
+
+static int oci_console_dimension(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ unsigned *u = userdata;
+ uintmax_t k;
+
+ assert(u);
+
+ k = json_variant_unsigned(variant);
+ if (k == 0)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE),
+ "Console size field '%s' is too small.", strna(name));
+ if (k > USHRT_MAX) /* TIOCSWINSZ's struct winsize uses "unsigned short" for width and height */
+ return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE),
+ "Console size field '%s' is too large.", strna(name));
+
+ *u = (unsigned) k;
+ return 0;
+}
+
+static int oci_console_size(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch table[] = {
+ { "height", JSON_VARIANT_UNSIGNED, oci_console_dimension, offsetof(Settings, console_height), JSON_MANDATORY },
+ { "width", JSON_VARIANT_UNSIGNED, oci_console_dimension, offsetof(Settings, console_width), JSON_MANDATORY },
+ {}
+ };
+
+ return json_dispatch(v, table, oci_unexpected, flags, userdata);
+}
+
+static int oci_absolute_path(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ char **p = userdata;
+ const char *n;
+
+ assert(p);
+
+ n = json_variant_string(v);
+
+ if (!path_is_absolute(n))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Path in JSON field '%s' is not absolute: %s", strna(name), n);
+
+ return free_and_strdup_warn(p, n);
+}
+
+static int oci_env(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ char ***l = userdata;
+ JsonVariant *e;
+ int r;
+
+ assert(l);
+
+ JSON_VARIANT_ARRAY_FOREACH(e, v) {
+ const char *n;
+
+ if (!json_variant_is_string(e))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Environment array contains non-string.");
+
+ assert_se(n = json_variant_string(e));
+
+ if (!env_assignment_is_valid(n))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Environment assignment not valid: %s", n);
+
+ r = strv_extend(l, n);
+ if (r < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+static int oci_args(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ _cleanup_strv_free_ char **l = NULL;
+ char ***value = userdata;
+ int r;
+
+ assert(value);
+
+ r = json_variant_strv(v, &l);
+ if (r < 0)
+ return json_log(v, flags, r, "Cannot parse arguments as list of strings: %m");
+
+ if (strv_isempty(l))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Argument list empty, refusing.");
+
+ if (isempty(l[0]))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Executable name is empty, refusing.");
+
+ return strv_free_and_replace(*value, l);
+}
+
+static int oci_rlimit_type(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ const char *z;
+ int t, *type = userdata;
+
+ assert_se(type);
+
+ z = startswith(json_variant_string(v), "RLIMIT_");
+ if (!z)
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "rlimit entry's name does not begin with 'RLIMIT_', refusing: %s",
+ json_variant_string(v));
+
+ t = rlimit_from_string(z);
+ if (t < 0)
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "rlimit name unknown: %s", json_variant_string(v));
+
+ *type = t;
+ return 0;
+}
+
+static int oci_rlimit_value(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ rlim_t z, *value = userdata;
+
+ assert(value);
+
+ if (json_variant_is_negative(v))
+ z = RLIM_INFINITY;
+ else {
+ if (!json_variant_is_unsigned(v))
+ return json_log(v, flags, SYNTHETIC_ERRNO(ERANGE),
+ "rlimits limit not unsigned, refusing.");
+
+ z = (rlim_t) json_variant_unsigned(v);
+
+ if ((uintmax_t) z != json_variant_unsigned(v))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "rlimits limit out of range, refusing.");
+ }
+
+ *value = z;
+ return 0;
+}
+
+static int oci_rlimits(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+
+ Settings *s = userdata;
+ JsonVariant *e;
+ int r;
+
+ assert(s);
+
+ JSON_VARIANT_ARRAY_FOREACH(e, v) {
+
+ struct rlimit_data {
+ int type;
+ rlim_t soft;
+ rlim_t hard;
+ } data = {
+ .type = -1,
+ .soft = RLIM_INFINITY,
+ .hard = RLIM_INFINITY,
+ };
+
+ static const JsonDispatch table[] = {
+ { "soft", JSON_VARIANT_NUMBER, oci_rlimit_value, offsetof(struct rlimit_data, soft), JSON_MANDATORY },
+ { "hard", JSON_VARIANT_NUMBER, oci_rlimit_value, offsetof(struct rlimit_data, hard), JSON_MANDATORY },
+ { "type", JSON_VARIANT_STRING, oci_rlimit_type, offsetof(struct rlimit_data, type), JSON_MANDATORY },
+ {}
+ };
+
+ r = json_dispatch(e, table, oci_unexpected, flags, &data);
+ if (r < 0)
+ return r;
+
+ assert(data.type >= 0);
+ assert(data.type < _RLIMIT_MAX);
+
+ if (s->rlimit[data.type])
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "rlimits array contains duplicate entry, refusing.");
+
+ s->rlimit[data.type] = new(struct rlimit, 1);
+ if (!s->rlimit[data.type])
+ return log_oom();
+
+ *s->rlimit[data.type] = (struct rlimit) {
+ .rlim_cur = data.soft,
+ .rlim_max = data.hard,
+ };
+
+ }
+ return 0;
+}
+
+static int oci_capability_array(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ uint64_t *mask = userdata, m = 0;
+ JsonVariant *e;
+
+ JSON_VARIANT_ARRAY_FOREACH(e, v) {
+ const char *n;
+ int cap;
+
+ if (!json_variant_is_string(e))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Entry in capabilities array is not a string.");
+
+ assert_se(n = json_variant_string(e));
+
+ cap = capability_from_name(n);
+ if (cap < 0)
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Unknown capability: %s", n);
+
+ m |= UINT64_C(1) << cap;
+ }
+
+ if (*mask == (uint64_t) -1)
+ *mask = m;
+ else
+ *mask |= m;
+
+ return 0;
+}
+
+static int oci_capabilities(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch table[] = {
+ { "effective", JSON_VARIANT_ARRAY, oci_capability_array, offsetof(CapabilityQuintet, effective) },
+ { "bounding", JSON_VARIANT_ARRAY, oci_capability_array, offsetof(CapabilityQuintet, bounding) },
+ { "inheritable", JSON_VARIANT_ARRAY, oci_capability_array, offsetof(CapabilityQuintet, inheritable) },
+ { "permitted", JSON_VARIANT_ARRAY, oci_capability_array, offsetof(CapabilityQuintet, permitted) },
+ { "ambient", JSON_VARIANT_ARRAY, oci_capability_array, offsetof(CapabilityQuintet, ambient) },
+ {}
+ };
+
+ Settings *s = userdata;
+ int r;
+
+ assert(s);
+
+ r = json_dispatch(v, table, oci_unexpected, flags, &s->full_capabilities);
+ if (r < 0)
+ return r;
+
+ if (s->full_capabilities.bounding != (uint64_t) -1) {
+ s->capability = s->full_capabilities.bounding;
+ s->drop_capability = ~s->full_capabilities.bounding;
+ }
+
+ return 0;
+}
+
+static int oci_oom_score_adj(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ Settings *s = userdata;
+ intmax_t k;
+
+ assert(s);
+
+ k = json_variant_integer(v);
+ if (k < OOM_SCORE_ADJ_MIN || k > OOM_SCORE_ADJ_MAX)
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "oomScoreAdj value out of range: %ji", k);
+
+ s->oom_score_adjust = (int) k;
+ s->oom_score_adjust_set = true;
+
+ return 0;
+}
+
+static int oci_uid_gid(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ uid_t *uid = userdata, u;
+ uintmax_t k;
+
+ assert(uid);
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+
+ k = json_variant_unsigned(v);
+ u = (uid_t) k;
+ if ((uintmax_t) u != k)
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "UID/GID out of range: %ji", k);
+
+ if (!uid_is_valid(u))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "UID/GID is not valid: " UID_FMT, u);
+
+ *uid = u;
+ return 0;
+}
+
+static int oci_supplementary_gids(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ Settings *s = userdata;
+ JsonVariant *e;
+ int r;
+
+ assert(s);
+
+ JSON_VARIANT_ARRAY_FOREACH(e, v) {
+ gid_t gid, *a;
+
+ if (!json_variant_is_unsigned(e))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Supplementary GID entry is not a UID.");
+
+ r = oci_uid_gid(name, e, flags, &gid);
+ if (r < 0)
+ return r;
+
+ a = reallocarray(s->supplementary_gids, s->n_supplementary_gids + 1, sizeof(gid_t));
+ if (!a)
+ return log_oom();
+
+ s->supplementary_gids = a;
+ s->supplementary_gids[s->n_supplementary_gids++] = gid;
+ }
+
+ return 0;
+}
+
+static int oci_user(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ static const JsonDispatch table[] = {
+ { "uid", JSON_VARIANT_UNSIGNED, oci_uid_gid, offsetof(Settings, uid), JSON_MANDATORY },
+ { "gid", JSON_VARIANT_UNSIGNED, oci_uid_gid, offsetof(Settings, gid), JSON_MANDATORY },
+ { "additionalGids", JSON_VARIANT_ARRAY, oci_supplementary_gids, 0, 0 },
+ {}
+ };
+
+ return json_dispatch(v, table, oci_unexpected, flags, userdata);
+}
+
+static int oci_process(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch table[] = {
+ { "terminal", JSON_VARIANT_BOOLEAN, oci_terminal, 0, 0 },
+ { "consoleSize", JSON_VARIANT_OBJECT, oci_console_size, 0, 0 },
+ { "cwd", JSON_VARIANT_STRING, oci_absolute_path, offsetof(Settings, working_directory), 0 },
+ { "env", JSON_VARIANT_ARRAY, oci_env, offsetof(Settings, environment), 0 },
+ { "args", JSON_VARIANT_ARRAY, oci_args, offsetof(Settings, parameters), 0 },
+ { "rlimits", JSON_VARIANT_ARRAY, oci_rlimits, 0, 0 },
+ { "apparmorProfile", JSON_VARIANT_STRING, oci_unsupported, 0, JSON_PERMISSIVE },
+ { "capabilities", JSON_VARIANT_OBJECT, oci_capabilities, 0, 0 },
+ { "noNewPrivileges", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(Settings, no_new_privileges), 0 },
+ { "oomScoreAdj", JSON_VARIANT_INTEGER, oci_oom_score_adj, 0, 0 },
+ { "selinuxLabel", JSON_VARIANT_STRING, oci_unsupported, 0, JSON_PERMISSIVE },
+ { "user", JSON_VARIANT_OBJECT, oci_user, 0, 0 },
+ {}
+ };
+
+ return json_dispatch(v, table, oci_unexpected, flags, userdata);
+}
+
+static int oci_root(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch table[] = {
+ { "path", JSON_VARIANT_STRING, json_dispatch_string, offsetof(Settings, root) },
+ { "readonly", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(Settings, read_only) },
+ {}
+ };
+
+ return json_dispatch(v, table, oci_unexpected, flags, userdata);
+}
+
+static int oci_hostname(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ Settings *s = userdata;
+ const char *n;
+
+ assert(s);
+
+ assert_se(n = json_variant_string(v));
+
+ if (!hostname_is_valid(n, false))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Hostname string is not a valid hostname: %s", n);
+
+ return free_and_strdup_warn(&s->hostname, n);
+}
+
+static bool oci_exclude_mount(const char *path) {
+
+ /* Returns "true" for all mounts we insist to mount on our own, and hence ignore the OCI data. */
+
+ if (PATH_IN_SET(path,
+ "/dev",
+ "/dev/mqueue",
+ "/dev/pts",
+ "/dev/shm",
+ "/proc",
+ "/proc/acpi",
+ "/proc/apm",
+ "/proc/asound",
+ "/proc/bus",
+ "/proc/fs",
+ "/proc/irq",
+ "/proc/kallsyms",
+ "/proc/kcore",
+ "/proc/keys",
+ "/proc/scsi",
+ "/proc/sys",
+ "/proc/sys/net",
+ "/proc/sysrq-trigger",
+ "/proc/timer_list",
+ "/run",
+ "/sys",
+ "/sys",
+ "/sys/fs/selinux",
+ "/tmp"))
+ return true;
+
+ /* Similar, skip the whole /sys/fs/cgroups subtree */
+ if (path_startswith(path, "/sys/fs/cgroup"))
+ return true;
+
+ return false;
+}
+
+typedef struct oci_mount_data {
+ char *destination;
+ char *source;
+ char *type;
+ char **options;
+} oci_mount_data;
+
+static void cleanup_oci_mount_data(oci_mount_data *data) {
+ free(data->destination);
+ free(data->source);
+ strv_free(data->options);
+ free(data->type);
+}
+
+static int oci_mounts(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ Settings *s = userdata;
+ JsonVariant *e;
+ int r;
+
+ assert(s);
+
+ JSON_VARIANT_ARRAY_FOREACH(e, v) {
+ static const JsonDispatch table[] = {
+ { "destination", JSON_VARIANT_STRING, oci_absolute_path, offsetof(oci_mount_data, destination), JSON_MANDATORY },
+ { "source", JSON_VARIANT_STRING, json_dispatch_string, offsetof(oci_mount_data, source), 0 },
+ { "options", JSON_VARIANT_ARRAY, json_dispatch_strv, offsetof(oci_mount_data, options), 0, },
+ { "type", JSON_VARIANT_STRING, json_dispatch_string, offsetof(oci_mount_data, type), 0 },
+ {}
+ };
+
+ _cleanup_free_ char *joined_options = NULL;
+ CustomMount *m;
+ _cleanup_(cleanup_oci_mount_data) oci_mount_data data = {};
+
+ r = json_dispatch(e, table, oci_unexpected, flags, &data);
+ if (r < 0)
+ return r;
+
+ if (!path_is_absolute(data.destination))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Mount destination not an absolute path: %s", data.destination);
+
+ if (oci_exclude_mount(data.destination))
+ continue;
+
+ if (data.options) {
+ joined_options = strv_join(data.options, ",");
+ if (!joined_options)
+ return log_oom();
+ }
+
+ if (!data.type || streq(data.type, "bind")) {
+ if (data.source && !path_is_absolute(data.source)) {
+ char *joined;
+
+ joined = path_join(s->bundle, data.source);
+ if (!joined)
+ return log_oom();
+
+ free_and_replace(data.source, joined);
+ }
+
+ data.type = mfree(data.type);
+
+ m = custom_mount_add(&s->custom_mounts, &s->n_custom_mounts, CUSTOM_MOUNT_BIND);
+ } else
+ m = custom_mount_add(&s->custom_mounts, &s->n_custom_mounts, CUSTOM_MOUNT_ARBITRARY);
+ if (!m)
+ return log_oom();
+
+ m->destination = TAKE_PTR(data.destination);
+ m->source = TAKE_PTR(data.source);
+ m->options = TAKE_PTR(joined_options);
+ m->type_argument = TAKE_PTR(data.type);
+ }
+
+ return 0;
+}
+
+static int oci_namespace_type(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ unsigned long *nsflags = userdata;
+ const char *n;
+
+ assert(nsflags);
+ assert_se(n = json_variant_string(v));
+
+ /* We don't use namespace_flags_from_string() here, as the OCI spec uses slightly different names than the
+ * kernel here. */
+ if (streq(n, "pid"))
+ *nsflags = CLONE_NEWPID;
+ else if (streq(n, "network"))
+ *nsflags = CLONE_NEWNET;
+ else if (streq(n, "mount"))
+ *nsflags = CLONE_NEWNS;
+ else if (streq(n, "ipc"))
+ *nsflags = CLONE_NEWIPC;
+ else if (streq(n, "uts"))
+ *nsflags = CLONE_NEWUTS;
+ else if (streq(n, "user"))
+ *nsflags = CLONE_NEWUSER;
+ else if (streq(n, "cgroup"))
+ *nsflags = CLONE_NEWCGROUP;
+ else
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Unknown cgroup type, refusing: %s", n);
+
+ return 0;
+}
+
+static int oci_namespaces(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ Settings *s = userdata;
+ unsigned long n = 0;
+ JsonVariant *e;
+ int r;
+
+ assert_se(s);
+
+ JSON_VARIANT_ARRAY_FOREACH(e, v) {
+
+ struct namespace_data {
+ unsigned long type;
+ char *path;
+ } data = {};
+
+ static const JsonDispatch table[] = {
+ { "type", JSON_VARIANT_STRING, oci_namespace_type, offsetof(struct namespace_data, type), JSON_MANDATORY },
+ { "path", JSON_VARIANT_STRING, oci_absolute_path, offsetof(struct namespace_data, path), 0 },
+ {}
+ };
+
+ r = json_dispatch(e, table, oci_unexpected, flags, &data);
+ if (r < 0) {
+ free(data.path);
+ return r;
+ }
+
+ if (data.path) {
+ if (data.type != CLONE_NEWNET) {
+ free(data.path);
+ return json_log(e, flags, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Specifying namespace path for non-network namespace is not supported.");
+ }
+
+ if (s->network_namespace_path) {
+ free(data.path);
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Network namespace path specified more than once, refusing.");
+ }
+
+ free(s->network_namespace_path);
+ s->network_namespace_path = data.path;
+ }
+
+ if (FLAGS_SET(n, data.type))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Duplicate namespace specification, refusing.");
+
+ n |= data.type;
+ }
+
+ if (!FLAGS_SET(n, CLONE_NEWNS))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Containers without file system namespace aren't supported.");
+
+ s->private_network = FLAGS_SET(n, CLONE_NEWNET);
+ s->userns_mode = FLAGS_SET(n, CLONE_NEWUSER) ? USER_NAMESPACE_FIXED : USER_NAMESPACE_NO;
+ s->use_cgns = FLAGS_SET(n, CLONE_NEWCGROUP);
+
+ s->clone_ns_flags = n & (CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS);
+
+ return 0;
+}
+
+static int oci_uid_gid_range(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ uid_t *uid = userdata, u;
+ uintmax_t k;
+
+ assert(uid);
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+
+ /* This is very much like oci_uid_gid(), except the checks are a bit different, as this is a UID range rather
+ * than a specific UID, and hence (uid_t) -1 has no special significance. OTOH a range of zero makes no
+ * sense. */
+
+ k = json_variant_unsigned(v);
+ u = (uid_t) k;
+ if ((uintmax_t) u != k)
+ return json_log(v, flags, SYNTHETIC_ERRNO(ERANGE),
+ "UID/GID out of range: %ji", k);
+ if (u == 0)
+ return json_log(v, flags, SYNTHETIC_ERRNO(ERANGE),
+ "UID/GID range can't be zero.");
+
+ *uid = u;
+ return 0;
+}
+
+static int oci_uid_gid_mappings(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ struct mapping_data {
+ uid_t host_id;
+ uid_t container_id;
+ uid_t range;
+ } data = {
+ .host_id = UID_INVALID,
+ .container_id = UID_INVALID,
+ .range = 0,
+ };
+
+ static const JsonDispatch table[] = {
+ { "containerID", JSON_VARIANT_UNSIGNED, oci_uid_gid, offsetof(struct mapping_data, container_id), JSON_MANDATORY },
+ { "hostID", JSON_VARIANT_UNSIGNED, oci_uid_gid, offsetof(struct mapping_data, host_id), JSON_MANDATORY },
+ { "size", JSON_VARIANT_UNSIGNED, oci_uid_gid_range, offsetof(struct mapping_data, range), JSON_MANDATORY },
+ {}
+ };
+
+ Settings *s = userdata;
+ JsonVariant *e;
+ int r;
+
+ assert(s);
+
+ if (json_variant_elements(v) == 0)
+ return 0;
+
+ if (json_variant_elements(v) > 1)
+ return json_log(v, flags, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "UID/GID mappings with more than one entry are not supported.");
+
+ assert_se(e = json_variant_by_index(v, 0));
+
+ r = json_dispatch(e, table, oci_unexpected, flags, &data);
+ if (r < 0)
+ return r;
+
+ if (data.host_id + data.range < data.host_id ||
+ data.container_id + data.range < data.container_id)
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "UID/GID range goes beyond UID/GID validity range, refusing.");
+
+ if (data.container_id != 0)
+ return json_log(v, flags, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "UID/GID mappings with a non-zero container base are not supported.");
+
+ if (data.range < 0x10000)
+ json_log(v, flags|JSON_WARNING, 0,
+ "UID/GID mapping with less than 65536 UID/GIDS set up, you are looking for trouble.");
+
+ if (s->uid_range != UID_INVALID &&
+ (s->uid_shift != data.host_id || s->uid_range != data.range))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Non-matching UID and GID mappings are not supported.");
+
+ s->uid_shift = data.host_id;
+ s->uid_range = data.range;
+
+ return 0;
+}
+
+static int oci_device_type(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ mode_t *mode = userdata;
+ const char *t;
+
+ assert(mode);
+ assert_se(t = json_variant_string(v));
+
+ if (STR_IN_SET(t, "c", "u"))
+ *mode = (*mode & ~S_IFMT) | S_IFCHR;
+ else if (streq(t, "b"))
+ *mode = (*mode & ~S_IFMT) | S_IFBLK;
+ else if (streq(t, "p"))
+ *mode = (*mode & ~S_IFMT) | S_IFIFO;
+ else
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Unknown device type: %s", t);
+
+ return 0;
+}
+
+static int oci_device_major(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ unsigned *u = userdata;
+ uintmax_t k;
+
+ assert_se(u);
+
+ k = json_variant_unsigned(v);
+ if (!DEVICE_MAJOR_VALID(k))
+ return json_log(v, flags, SYNTHETIC_ERRNO(ERANGE),
+ "Device major %ji out of range.", k);
+
+ *u = (unsigned) k;
+ return 0;
+}
+
+static int oci_device_minor(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ unsigned *u = userdata;
+ uintmax_t k;
+
+ assert_se(u);
+
+ k = json_variant_unsigned(v);
+ if (!DEVICE_MINOR_VALID(k))
+ return json_log(v, flags, SYNTHETIC_ERRNO(ERANGE),
+ "Device minor %ji out of range.", k);
+
+ *u = (unsigned) k;
+ return 0;
+}
+
+static int oci_device_file_mode(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ mode_t *mode = userdata, m;
+ uintmax_t k;
+
+ assert(mode);
+
+ k = json_variant_unsigned(v);
+ m = (mode_t) k;
+
+ if ((m & ~07777) != 0 || (uintmax_t) m != k)
+ return json_log(v, flags, SYNTHETIC_ERRNO(ERANGE),
+ "fileMode out of range, refusing.");
+
+ *mode = m;
+ return 0;
+}
+
+static int oci_devices(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ Settings *s = userdata;
+ JsonVariant *e;
+ int r;
+
+ assert(s);
+
+ JSON_VARIANT_ARRAY_FOREACH(e, v) {
+
+ static const JsonDispatch table[] = {
+ { "type", JSON_VARIANT_STRING, oci_device_type, offsetof(DeviceNode, mode), JSON_MANDATORY },
+ { "path", JSON_VARIANT_STRING, oci_absolute_path, offsetof(DeviceNode, path), JSON_MANDATORY },
+ { "major", JSON_VARIANT_UNSIGNED, oci_device_major, offsetof(DeviceNode, major), 0 },
+ { "minor", JSON_VARIANT_UNSIGNED, oci_device_minor, offsetof(DeviceNode, minor), 0 },
+ { "fileMode", JSON_VARIANT_UNSIGNED, oci_device_file_mode, offsetof(DeviceNode, mode), 0 },
+ { "uid", JSON_VARIANT_UNSIGNED, oci_uid_gid, offsetof(DeviceNode, uid), 0 },
+ { "gid", JSON_VARIANT_UNSIGNED, oci_uid_gid, offsetof(DeviceNode, gid), 0 },
+ {}
+ };
+
+ DeviceNode *node, *nodes;
+
+ nodes = reallocarray(s->extra_nodes, s->n_extra_nodes + 1, sizeof(DeviceNode));
+ if (!nodes)
+ return log_oom();
+
+ s->extra_nodes = nodes;
+
+ node = nodes + s->n_extra_nodes;
+ *node = (DeviceNode) {
+ .uid = UID_INVALID,
+ .gid = GID_INVALID,
+ .major = (unsigned) -1,
+ .minor = (unsigned) -1,
+ .mode = 0644,
+ };
+
+ r = json_dispatch(e, table, oci_unexpected, flags, node);
+ if (r < 0)
+ goto fail_element;
+
+ if (S_ISCHR(node->mode) || S_ISBLK(node->mode)) {
+ _cleanup_free_ char *path = NULL;
+
+ if (node->major == (unsigned) -1 || node->minor == (unsigned) -1) {
+ r = json_log(e, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Major/minor required when device node is device node");
+ goto fail_element;
+ }
+
+ /* Suppress a couple of implicit device nodes */
+ r = device_path_make_canonical(node->mode, makedev(node->major, node->minor), &path);
+ if (r < 0)
+ json_log(e, flags|JSON_DEBUG, 0, "Failed to resolve device node %u:%u, ignoring: %m", node->major, node->minor);
+ else {
+ if (PATH_IN_SET(path,
+ "/dev/null",
+ "/dev/zero",
+ "/dev/full",
+ "/dev/random",
+ "/dev/urandom",
+ "/dev/tty",
+ "/dev/net/tun",
+ "/dev/ptmx",
+ "/dev/pts/ptmx",
+ "/dev/console")) {
+
+ json_log(e, flags|JSON_DEBUG, 0, "Ignoring devices item for device '%s', as it is implicitly created anyway.", path);
+ free(node->path);
+ continue;
+ }
+ }
+ }
+
+ s->n_extra_nodes++;
+ continue;
+
+ fail_element:
+ free(node->path);
+ return r;
+ }
+
+ return 0;
+}
+
+static int oci_cgroups_path(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ _cleanup_free_ char *slice = NULL, *backwards = NULL;
+ Settings *s = userdata;
+ const char *p;
+ int r;
+
+ assert(s);
+
+ assert_se(p = json_variant_string(v));
+
+ r = cg_path_get_slice(p, &slice);
+ if (r < 0)
+ return json_log(v, flags, r, "Couldn't derive slice unit name from path '%s': %m", p);
+
+ r = cg_slice_to_path(slice, &backwards);
+ if (r < 0)
+ return json_log(v, flags, r, "Couldn't convert slice unit name '%s' back to path: %m", slice);
+
+ if (!path_equal(backwards, p))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Control group path '%s' does not refer to slice unit, refusing.", p);
+
+ free_and_replace(s->slice, slice);
+ return 0;
+}
+
+static int oci_cgroup_device_type(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ mode_t *mode = userdata;
+ const char *n;
+
+ assert_se(n = json_variant_string(v));
+
+ if (streq(n, "c"))
+ *mode = S_IFCHR;
+ else if (streq(n, "b"))
+ *mode = S_IFBLK;
+ else
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Control group device type unknown: %s", n);
+
+ return 0;
+}
+
+struct device_data {
+ bool allow;
+ bool r;
+ bool w;
+ bool m;
+ mode_t type;
+ unsigned major;
+ unsigned minor;
+};
+
+static int oci_cgroup_device_access(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ struct device_data *d = userdata;
+ bool r = false, w = false, m = false;
+ const char *s;
+ size_t i;
+
+ assert_se(s = json_variant_string(v));
+
+ for (i = 0; s[i]; i++)
+ if (s[i] == 'r')
+ r = true;
+ else if (s[i] == 'w')
+ w = true;
+ else if (s[i] == 'm')
+ m = true;
+ else
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Unknown device access character '%c'.", s[i]);
+
+ d->r = r;
+ d->w = w;
+ d->m = m;
+
+ return 0;
+}
+
+static int oci_cgroup_devices(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+
+ _cleanup_free_ struct device_data *list = NULL;
+ Settings *s = userdata;
+ size_t n_list = 0, i;
+ bool noop = false;
+ JsonVariant *e;
+ int r;
+
+ assert(s);
+
+ JSON_VARIANT_ARRAY_FOREACH(e, v) {
+
+ struct device_data data = {
+ .major = (unsigned) -1,
+ .minor = (unsigned) -1,
+ }, *a;
+
+ static const JsonDispatch table[] = {
+ { "allow", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(struct device_data, allow), JSON_MANDATORY },
+ { "type", JSON_VARIANT_STRING, oci_cgroup_device_type, offsetof(struct device_data, type), 0 },
+ { "major", JSON_VARIANT_UNSIGNED, oci_device_major, offsetof(struct device_data, major), 0 },
+ { "minor", JSON_VARIANT_UNSIGNED, oci_device_minor, offsetof(struct device_data, minor), 0 },
+ { "access", JSON_VARIANT_STRING, oci_cgroup_device_access, 0, 0 },
+ {}
+ };
+
+ r = json_dispatch(e, table, oci_unexpected, flags, &data);
+ if (r < 0)
+ return r;
+
+ if (!data.allow) {
+ /* The fact that OCI allows 'deny' entries makes really no sense, as 'allow'
+ * vs. 'deny' for the devices cgroup controller is really not about allow-listing and
+ * deny-listing but about adding and removing entries from the allow list. Since we
+ * always start out with an empty allow list we hence ignore the whole thing, as
+ * removing entries which don't exist make no sense. We'll log about this, since this
+ * is really borked in the spec, with one exception: the entry that's supposed to
+ * drop the kernel's default we ignore silently */
+
+ if (!data.r || !data.w || !data.m || data.type != 0 || data.major != (unsigned) -1 || data.minor != (unsigned) -1)
+ json_log(v, flags|JSON_WARNING, 0, "Devices cgroup allow list with arbitrary 'allow' entries not supported, ignoring.");
+
+ /* We ignore the 'deny' entry as for us that's implied */
+ continue;
+ }
+
+ if (!data.r && !data.w && !data.m) {
+ json_log(v, flags|LOG_WARNING, 0, "Device cgroup allow list entry with no effect found, ignoring.");
+ continue;
+ }
+
+ if (data.minor != (unsigned) -1 && data.major == (unsigned) -1)
+ return json_log(v, flags, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Device cgroup allow list entries with minors but no majors not supported.");
+
+ if (data.major != (unsigned) -1 && data.type == 0)
+ return json_log(v, flags, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Device cgroup allow list entries with majors but no device node type not supported.");
+
+ if (data.type == 0) {
+ if (data.r && data.w && data.m) /* a catchall allow list entry means we are looking at a noop */
+ noop = true;
+ else
+ return json_log(v, flags, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Device cgroup allow list entries with no type not supported.");
+ }
+
+ a = reallocarray(list, n_list + 1, sizeof(struct device_data));
+ if (!a)
+ return log_oom();
+
+ list = a;
+ list[n_list++] = data;
+ }
+
+ if (noop)
+ return 0;
+
+ r = settings_allocate_properties(s);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(s->properties, 'r', "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(s->properties, "s", "DeviceAllow");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(s->properties, 'v', "a(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(s->properties, 'a', "(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (i = 0; i < n_list; i++) {
+ _cleanup_free_ char *pattern = NULL;
+ char access[4];
+ size_t n = 0;
+
+ if (list[i].minor == (unsigned) -1) {
+ const char *t;
+
+ if (list[i].type == S_IFBLK)
+ t = "block";
+ else {
+ assert(list[i].type == S_IFCHR);
+ t = "char";
+ }
+
+ if (list[i].major == (unsigned) -1) {
+ pattern = strjoin(t, "-*");
+ if (!pattern)
+ return log_oom();
+ } else {
+ if (asprintf(&pattern, "%s-%u", t, list[i].major) < 0)
+ return log_oom();
+ }
+
+ } else {
+ assert(list[i].major != (unsigned) -1); /* If a minor is specified, then a major also needs to be specified */
+
+ r = device_path_make_major_minor(list[i].type, makedev(list[i].major, list[i].minor), &pattern);
+ if (r < 0)
+ return log_oom();
+ }
+
+ if (list[i].r)
+ access[n++] = 'r';
+ if (list[i].w)
+ access[n++] = 'w';
+ if (list[i].m)
+ access[n++] = 'm';
+ access[n] = 0;
+
+ assert(n > 0);
+
+ r = sd_bus_message_append(s->properties, "(ss)", pattern, access);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(s->properties);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(s->properties);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(s->properties);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 0;
+}
+
+static int oci_cgroup_memory_limit(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ uint64_t *m = userdata;
+ uintmax_t k;
+
+ assert(m);
+
+ if (json_variant_is_negative(v)) {
+ *m = UINT64_MAX;
+ return 0;
+ }
+
+ if (!json_variant_is_unsigned(v))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Memory limit is not an unsigned integer");
+
+ k = json_variant_unsigned(v);
+ if (k >= UINT64_MAX)
+ return json_log(v, flags, SYNTHETIC_ERRNO(ERANGE),
+ "Memory limit too large: %ji", k);
+
+ *m = (uint64_t) k;
+ return 0;
+}
+
+static int oci_cgroup_memory(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+
+ struct memory_data {
+ uint64_t limit;
+ uint64_t reservation;
+ uint64_t swap;
+ } data = {
+ .limit = UINT64_MAX,
+ .reservation = UINT64_MAX,
+ .swap = UINT64_MAX,
+ };
+
+ static const JsonDispatch table[] = {
+ { "limit", JSON_VARIANT_NUMBER, oci_cgroup_memory_limit, offsetof(struct memory_data, limit), 0 },
+ { "reservation", JSON_VARIANT_NUMBER, oci_cgroup_memory_limit, offsetof(struct memory_data, reservation), 0 },
+ { "swap", JSON_VARIANT_NUMBER, oci_cgroup_memory_limit, offsetof(struct memory_data, swap), 0 },
+ { "kernel", JSON_VARIANT_NUMBER, oci_unsupported, 0, JSON_PERMISSIVE },
+ { "kernelTCP", JSON_VARIANT_NUMBER, oci_unsupported, 0, JSON_PERMISSIVE },
+ { "swapiness", JSON_VARIANT_NUMBER, oci_unsupported, 0, JSON_PERMISSIVE },
+ { "disableOOMKiller", JSON_VARIANT_NUMBER, oci_unsupported, 0, JSON_PERMISSIVE },
+ {}
+ };
+
+ Settings *s = userdata;
+ int r;
+
+ r = json_dispatch(v, table, oci_unexpected, flags, &data);
+ if (r < 0)
+ return r;
+
+ if (data.swap != UINT64_MAX) {
+ if (data.limit == UINT64_MAX)
+ json_log(v, flags|LOG_WARNING, 0, "swap limit without memory limit is not supported, ignoring.");
+ else if (data.swap < data.limit)
+ json_log(v, flags|LOG_WARNING, 0, "swap limit is below memory limit, ignoring.");
+ else {
+ r = settings_allocate_properties(s);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(s->properties, "(sv)", "MemorySwapMax", "t", data.swap - data.limit);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+ }
+
+ if (data.limit != UINT64_MAX) {
+ r = settings_allocate_properties(s);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(s->properties, "(sv)", "MemoryMax", "t", data.limit);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (data.reservation != UINT64_MAX) {
+ r = settings_allocate_properties(s);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(s->properties, "(sv)", "MemoryLow", "t", data.reservation);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ return 0;
+}
+
+struct cpu_data {
+ uint64_t shares;
+ uint64_t quota;
+ uint64_t period;
+ CPUSet cpu_set;
+};
+
+static int oci_cgroup_cpu_shares(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ uint64_t *u = userdata;
+ uintmax_t k;
+
+ assert(u);
+
+ k = json_variant_unsigned(v);
+ if (k < CGROUP_CPU_SHARES_MIN || k > CGROUP_CPU_SHARES_MAX)
+ return json_log(v, flags, SYNTHETIC_ERRNO(ERANGE),
+ "shares value out of range.");
+
+ *u = (uint64_t) k;
+ return 0;
+}
+
+static int oci_cgroup_cpu_quota(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ uint64_t *u = userdata;
+ uintmax_t k;
+
+ assert(u);
+
+ k = json_variant_unsigned(v);
+ if (k <= 0 || k >= UINT64_MAX)
+ return json_log(v, flags, SYNTHETIC_ERRNO(ERANGE),
+ "period/quota value out of range.");
+
+ *u = (uint64_t) k;
+ return 0;
+}
+
+static int oci_cgroup_cpu_cpus(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ struct cpu_data *data = userdata;
+ CPUSet set;
+ const char *n;
+ int r;
+
+ assert(data);
+
+ assert_se(n = json_variant_string(v));
+
+ r = parse_cpu_set(n, &set);
+ if (r < 0)
+ return json_log(v, flags, r, "Failed to parse CPU set specification: %s", n);
+
+ cpu_set_reset(&data->cpu_set);
+ data->cpu_set = set;
+
+ return 0;
+}
+
+static int oci_cgroup_cpu(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch table[] = {
+ { "shares", JSON_VARIANT_UNSIGNED, oci_cgroup_cpu_shares, offsetof(struct cpu_data, shares), 0 },
+ { "quota", JSON_VARIANT_UNSIGNED, oci_cgroup_cpu_quota, offsetof(struct cpu_data, quota), 0 },
+ { "period", JSON_VARIANT_UNSIGNED, oci_cgroup_cpu_quota, offsetof(struct cpu_data, period), 0 },
+ { "realtimeRuntime", JSON_VARIANT_UNSIGNED, oci_unsupported, 0, 0 },
+ { "realtimePeriod", JSON_VARIANT_UNSIGNED, oci_unsupported, 0, 0 },
+ { "cpus", JSON_VARIANT_STRING, oci_cgroup_cpu_cpus, 0, 0 },
+ { "mems", JSON_VARIANT_STRING, oci_unsupported, 0, 0 },
+ {}
+ };
+
+ struct cpu_data data = {
+ .shares = UINT64_MAX,
+ .quota = UINT64_MAX,
+ .period = UINT64_MAX,
+ };
+
+ Settings *s = userdata;
+ int r;
+
+ r = json_dispatch(v, table, oci_unexpected, flags, &data);
+ if (r < 0) {
+ cpu_set_reset(&data.cpu_set);
+ return r;
+ }
+
+ cpu_set_reset(&s->cpu_set);
+ s->cpu_set = data.cpu_set;
+
+ if (data.shares != UINT64_MAX) {
+ r = settings_allocate_properties(s);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(s->properties, "(sv)", "CPUShares", "t", data.shares);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (data.quota != UINT64_MAX && data.period != UINT64_MAX) {
+ r = settings_allocate_properties(s);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(s->properties, "(sv)", "CPUQuotaPerSecUSec", "t", (uint64_t) (data.quota * USEC_PER_SEC / data.period));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ } else if ((data.quota != UINT64_MAX) != (data.period != UINT64_MAX))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "CPU quota and period not used together.");
+
+ return 0;
+}
+
+static int oci_cgroup_block_io_weight(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ Settings *s = userdata;
+ uintmax_t k;
+ int r;
+
+ assert(s);
+
+ k = json_variant_unsigned(v);
+ if (k < CGROUP_BLKIO_WEIGHT_MIN || k > CGROUP_BLKIO_WEIGHT_MAX)
+ return json_log(v, flags, SYNTHETIC_ERRNO(ERANGE),
+ "Block I/O weight out of range.");
+
+ r = settings_allocate_properties(s);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(s->properties, "(sv)", "BlockIOWeight", "t", (uint64_t) k);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 0;
+}
+
+static int oci_cgroup_block_io_weight_device(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ Settings *s = userdata;
+ JsonVariant *e;
+ int r;
+
+ assert(s);
+
+ JSON_VARIANT_ARRAY_FOREACH(e, v) {
+ struct device_data {
+ unsigned major;
+ unsigned minor;
+ uintmax_t weight;
+ } data = {
+ .major = (unsigned) -1,
+ .minor = (unsigned) -1,
+ .weight = UINTMAX_MAX,
+ };
+
+ static const JsonDispatch table[] = {
+ { "major", JSON_VARIANT_UNSIGNED, oci_device_major, offsetof(struct device_data, major), JSON_MANDATORY },
+ { "minor", JSON_VARIANT_UNSIGNED, oci_device_minor, offsetof(struct device_data, minor), JSON_MANDATORY },
+ { "weight", JSON_VARIANT_UNSIGNED, json_dispatch_unsigned, offsetof(struct device_data, weight), 0 },
+ { "leafWeight", JSON_VARIANT_INTEGER, oci_unsupported, 0, JSON_PERMISSIVE },
+ {}
+ };
+
+ _cleanup_free_ char *path = NULL;
+
+ r = json_dispatch(e, table, oci_unexpected, flags, &data);
+ if (r < 0)
+ return r;
+
+ if (data.weight == UINTMAX_MAX)
+ continue;
+
+ if (data.weight < CGROUP_BLKIO_WEIGHT_MIN || data.weight > CGROUP_BLKIO_WEIGHT_MAX)
+ return json_log(v, flags, SYNTHETIC_ERRNO(ERANGE),
+ "Block I/O device weight out of range.");
+
+ r = device_path_make_major_minor(S_IFBLK, makedev(data.major, data.minor), &path);
+ if (r < 0)
+ return json_log(v, flags, r, "Failed to build device path: %m");
+
+ r = settings_allocate_properties(s);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(s->properties, "(sv)", "BlockIODeviceWeight", "a(st)", 1, path, (uint64_t) data.weight);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ return 0;
+}
+
+static int oci_cgroup_block_io_throttle(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ Settings *s = userdata;
+ const char *pname;
+ JsonVariant *e;
+ int r;
+
+ assert(s);
+
+ pname = streq(name, "throttleReadBpsDevice") ? "IOReadBandwidthMax" :
+ streq(name, "throttleWriteBpsDevice") ? "IOWriteBandwidthMax" :
+ streq(name, "throttleReadIOPSDevice") ? "IOReadIOPSMax" :
+ "IOWriteIOPSMax";
+
+ JSON_VARIANT_ARRAY_FOREACH(e, v) {
+ struct device_data {
+ unsigned major;
+ unsigned minor;
+ uintmax_t rate;
+ } data = {
+ .major = (unsigned) -1,
+ .minor = (unsigned) -1,
+ };
+
+ static const JsonDispatch table[] = {
+ { "major", JSON_VARIANT_UNSIGNED, oci_device_major, offsetof(struct device_data, major), JSON_MANDATORY },
+ { "minor", JSON_VARIANT_UNSIGNED, oci_device_minor, offsetof(struct device_data, minor), JSON_MANDATORY },
+ { "rate", JSON_VARIANT_UNSIGNED, json_dispatch_unsigned, offsetof(struct device_data, rate), JSON_MANDATORY },
+ {}
+ };
+
+ _cleanup_free_ char *path = NULL;
+
+ r = json_dispatch(e, table, oci_unexpected, flags, &data);
+ if (r < 0)
+ return r;
+
+ if (data.rate >= UINT64_MAX)
+ return json_log(v, flags, SYNTHETIC_ERRNO(ERANGE),
+ "Block I/O device rate out of range.");
+
+ r = device_path_make_major_minor(S_IFBLK, makedev(data.major, data.minor), &path);
+ if (r < 0)
+ return json_log(v, flags, r, "Failed to build device path: %m");
+
+ r = settings_allocate_properties(s);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(s->properties, "(sv)", pname, "a(st)", 1, path, (uint64_t) data.rate);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ return 0;
+}
+
+static int oci_cgroup_block_io(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch table[] = {
+ { "weight", JSON_VARIANT_UNSIGNED, oci_cgroup_block_io_weight, 0, 0 },
+ { "leafWeight", JSON_VARIANT_UNSIGNED, oci_unsupported, 0, JSON_PERMISSIVE },
+ { "weightDevice", JSON_VARIANT_ARRAY, oci_cgroup_block_io_weight_device, 0, 0 },
+ { "throttleReadBpsDevice", JSON_VARIANT_ARRAY, oci_cgroup_block_io_throttle, 0, 0 },
+ { "throttleWriteBpsDevice", JSON_VARIANT_ARRAY, oci_cgroup_block_io_throttle, 0, 0 },
+ { "throttleReadIOPSDevice", JSON_VARIANT_ARRAY, oci_cgroup_block_io_throttle, 0, 0 },
+ { "throttleWriteIOPSDevice", JSON_VARIANT_ARRAY, oci_cgroup_block_io_throttle, 0, 0 },
+ {}
+ };
+
+ return json_dispatch(v, table, oci_unexpected, flags, userdata);
+}
+
+static int oci_cgroup_pids(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch table[] = {
+ { "limit", JSON_VARIANT_NUMBER, json_dispatch_variant, 0, JSON_MANDATORY },
+ {}
+ };
+
+ _cleanup_(json_variant_unrefp) JsonVariant *k = NULL;
+ Settings *s = userdata;
+ uint64_t m;
+ int r;
+
+ assert(s);
+
+ r = json_dispatch(v, table, oci_unexpected, flags, &k);
+ if (r < 0)
+ return r;
+
+ if (json_variant_is_negative(k))
+ m = UINT64_MAX;
+ else {
+ if (!json_variant_is_unsigned(k))
+ return json_log(k, flags, SYNTHETIC_ERRNO(EINVAL),
+ "pids limit not unsigned integer, refusing.");
+
+ m = (uint64_t) json_variant_unsigned(k);
+
+ if ((uintmax_t) m != json_variant_unsigned(k))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "pids limit out of range, refusing.");
+ }
+
+ r = settings_allocate_properties(s);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(s->properties, "(sv)", "TasksMax", "t", m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 0;
+}
+
+static int oci_resources(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch table[] = {
+ { "devices", JSON_VARIANT_ARRAY, oci_cgroup_devices, 0, 0 },
+ { "memory", JSON_VARIANT_OBJECT, oci_cgroup_memory, 0, 0 },
+ { "cpu", JSON_VARIANT_OBJECT, oci_cgroup_cpu, 0, 0 },
+ { "blockIO", JSON_VARIANT_OBJECT, oci_cgroup_block_io, 0, 0 },
+ { "hugepageLimits", JSON_VARIANT_ARRAY, oci_unsupported, 0, 0 },
+ { "network", JSON_VARIANT_OBJECT, oci_unsupported, 0, 0 },
+ { "pids", JSON_VARIANT_OBJECT, oci_cgroup_pids, 0, 0 },
+ { "rdma", JSON_VARIANT_OBJECT, oci_unsupported, 0, 0 },
+ {}
+ };
+
+ return json_dispatch(v, table, oci_unexpected, flags, userdata);
+}
+
+static bool sysctl_key_valid(const char *s) {
+ bool dot = true;
+
+ /* Note that we are a bit stricter here than in systemd-sysctl, as that inherited semantics from the old sysctl
+ * tool, which were really weird (as it swaps / and . in both ways) */
+
+ if (isempty(s))
+ return false;
+
+ for (; *s; s++) {
+
+ if (*s <= ' ' || *s >= 127)
+ return false;
+ if (*s == '/')
+ return false;
+ if (*s == '.') {
+
+ if (dot) /* Don't allow two dots next to each other (or at the beginning) */
+ return false;
+
+ dot = true;
+ } else
+ dot = false;
+ }
+
+ if (dot) /* don't allow a dot at the end */
+ return false;
+
+ return true;
+}
+
+static int oci_sysctl(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ Settings *s = userdata;
+ JsonVariant *w;
+ const char *k;
+ int r;
+
+ assert(s);
+
+ JSON_VARIANT_OBJECT_FOREACH(k, w, v) {
+ const char *m;
+
+ if (!json_variant_is_string(w))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "sysctl parameter is not a string, refusing.");
+
+ assert_se(m = json_variant_string(w));
+
+ if (sysctl_key_valid(k))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "sysctl key invalid, refusing: %s", k);
+
+ r = strv_extend_strv(&s->sysctl, STRV_MAKE(k, m), false);
+ if (r < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+#if HAVE_SECCOMP
+static int oci_seccomp_action_from_string(const char *name, uint32_t *ret) {
+
+ static const struct {
+ const char *name;
+ uint32_t action;
+ } table[] = {
+ { "SCMP_ACT_ALLOW", SCMP_ACT_ALLOW },
+ { "SCMP_ACT_ERRNO", SCMP_ACT_ERRNO(EPERM) }, /* the OCI spec doesn't document the error, but it appears EPERM is supposed to be used */
+ { "SCMP_ACT_KILL", SCMP_ACT_KILL },
+#ifdef SCMP_ACT_KILL_PROCESS
+ { "SCMP_ACT_KILL_PROCESS", SCMP_ACT_KILL_PROCESS },
+#endif
+#ifdef SCMP_ACT_KILL_THREAD
+ { "SCMP_ACT_KILL_THREAD", SCMP_ACT_KILL_THREAD },
+#endif
+#ifdef SCMP_ACT_LOG
+ { "SCMP_ACT_LOG", SCMP_ACT_LOG },
+#endif
+ { "SCMP_ACT_TRAP", SCMP_ACT_TRAP },
+
+ /* We don't support SCMP_ACT_TRACE because that requires a tracer, and that doesn't really make sense
+ * here */
+ };
+
+ size_t i;
+
+ for (i = 0; i < ELEMENTSOF(table); i++)
+ if (streq_ptr(name, table[i].name)) {
+ *ret = table[i].action;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int oci_seccomp_arch_from_string(const char *name, uint32_t *ret) {
+
+ static const struct {
+ const char *name;
+ uint32_t arch;
+ } table[] = {
+ { "SCMP_ARCH_AARCH64", SCMP_ARCH_AARCH64 },
+ { "SCMP_ARCH_ARM", SCMP_ARCH_ARM },
+ { "SCMP_ARCH_MIPS", SCMP_ARCH_MIPS },
+ { "SCMP_ARCH_MIPS64", SCMP_ARCH_MIPS64 },
+ { "SCMP_ARCH_MIPS64N32", SCMP_ARCH_MIPS64N32 },
+ { "SCMP_ARCH_MIPSEL", SCMP_ARCH_MIPSEL },
+ { "SCMP_ARCH_MIPSEL64", SCMP_ARCH_MIPSEL64 },
+ { "SCMP_ARCH_MIPSEL64N32", SCMP_ARCH_MIPSEL64N32 },
+ { "SCMP_ARCH_NATIVE", SCMP_ARCH_NATIVE },
+#ifdef SCMP_ARCH_PARISC
+ { "SCMP_ARCH_PARISC", SCMP_ARCH_PARISC },
+#endif
+#ifdef SCMP_ARCH_PARISC64
+ { "SCMP_ARCH_PARISC64", SCMP_ARCH_PARISC64 },
+#endif
+ { "SCMP_ARCH_PPC", SCMP_ARCH_PPC },
+ { "SCMP_ARCH_PPC64", SCMP_ARCH_PPC64 },
+ { "SCMP_ARCH_PPC64LE", SCMP_ARCH_PPC64LE },
+#ifdef SCMP_ARCH_RISCV64
+ { "SCMP_ARCH_RISCV64", SCMP_ARCH_RISCV64 },
+#endif
+ { "SCMP_ARCH_S390", SCMP_ARCH_S390 },
+ { "SCMP_ARCH_S390X", SCMP_ARCH_S390X },
+ { "SCMP_ARCH_X32", SCMP_ARCH_X32 },
+ { "SCMP_ARCH_X86", SCMP_ARCH_X86 },
+ { "SCMP_ARCH_X86_64", SCMP_ARCH_X86_64 },
+ };
+
+ size_t i;
+
+ for (i = 0; i < ELEMENTSOF(table); i++)
+ if (streq_ptr(table[i].name, name)) {
+ *ret = table[i].arch;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int oci_seccomp_compare_from_string(const char *name, enum scmp_compare *ret) {
+
+ static const struct {
+ const char *name;
+ enum scmp_compare op;
+ } table[] = {
+ { "SCMP_CMP_NE", SCMP_CMP_NE },
+ { "SCMP_CMP_LT", SCMP_CMP_LT },
+ { "SCMP_CMP_LE", SCMP_CMP_LE },
+ { "SCMP_CMP_EQ", SCMP_CMP_EQ },
+ { "SCMP_CMP_GE", SCMP_CMP_GE },
+ { "SCMP_CMP_GT", SCMP_CMP_GT },
+ { "SCMP_CMP_MASKED_EQ", SCMP_CMP_MASKED_EQ },
+ };
+
+ size_t i;
+
+ for (i = 0; i < ELEMENTSOF(table); i++)
+ if (streq_ptr(table[i].name, name)) {
+ *ret = table[i].op;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int oci_seccomp_archs(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ scmp_filter_ctx *sc = userdata;
+ JsonVariant *e;
+ int r;
+
+ assert(sc);
+
+ JSON_VARIANT_ARRAY_FOREACH(e, v) {
+ uint32_t a;
+
+ if (!json_variant_is_string(e))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Architecture entry is not a string");
+
+ r = oci_seccomp_arch_from_string(json_variant_string(e), &a);
+ if (r < 0)
+ return json_log(e, flags, r, "Unknown architecture: %s", json_variant_string(e));
+
+ r = seccomp_arch_add(sc, a);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return json_log(e, flags, r, "Failed to add architecture to seccomp filter: %m");
+ }
+
+ return 0;
+}
+
+struct syscall_rule {
+ char **names;
+ uint32_t action;
+ struct scmp_arg_cmp *arguments;
+ size_t n_arguments;
+};
+
+static void syscall_rule_free(struct syscall_rule *rule) {
+ assert(rule);
+
+ strv_free(rule->names);
+ free(rule->arguments);
+};
+
+static int oci_seccomp_action(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ uint32_t *action = userdata;
+ int r;
+
+ assert(action);
+
+ r = oci_seccomp_action_from_string(json_variant_string(v), action);
+ if (r < 0)
+ return json_log(v, flags, r, "Unknown system call action '%s': %m", json_variant_string(v));
+
+ return 0;
+}
+
+static int oci_seccomp_op(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ enum scmp_compare *op = userdata;
+ int r;
+
+ assert(op);
+
+ r = oci_seccomp_compare_from_string(json_variant_string(v), op);
+ if (r < 0)
+ return json_log(v, flags, r, "Unknown seccomp operator '%s': %m", json_variant_string(v));
+
+ return 0;
+}
+
+static int oci_seccomp_args(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ struct syscall_rule *rule = userdata;
+ JsonVariant *e;
+ int r;
+
+ assert(rule);
+
+ JSON_VARIANT_ARRAY_FOREACH(e, v) {
+ static const struct JsonDispatch table[] = {
+ { "index", JSON_VARIANT_UNSIGNED, json_dispatch_uint32, offsetof(struct scmp_arg_cmp, arg), JSON_MANDATORY },
+ { "value", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(struct scmp_arg_cmp, datum_a), JSON_MANDATORY },
+ { "valueTwo", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(struct scmp_arg_cmp, datum_b), 0 },
+ { "op", JSON_VARIANT_STRING, oci_seccomp_op, offsetof(struct scmp_arg_cmp, op), JSON_MANDATORY },
+ {},
+ };
+
+ struct scmp_arg_cmp *a, *p;
+ int expected;
+
+ a = reallocarray(rule->arguments, rule->n_arguments + 1, sizeof(struct syscall_rule));
+ if (!a)
+ return log_oom();
+
+ rule->arguments = a;
+ p = rule->arguments + rule->n_arguments;
+
+ *p = (struct scmp_arg_cmp) {
+ .arg = 0,
+ .datum_a = 0,
+ .datum_b = 0,
+ .op = 0,
+ };
+
+ r = json_dispatch(e, table, oci_unexpected, flags, p);
+ if (r < 0)
+ return r;
+
+ expected = p->op == SCMP_CMP_MASKED_EQ ? 4 : 3;
+ if (r != expected)
+ json_log(e, flags|JSON_WARNING, 0, "Wrong number of system call arguments for JSON data data, ignoring.");
+
+ /* Note that we are a bit sloppy here and do not insist that SCMP_CMP_MASKED_EQ gets two datum values,
+ * and the other only one. That's because buildah for example by default calls things with
+ * SCMP_CMP_MASKED_EQ but only one argument. We use 0 when the value is not specified. */
+
+ rule->n_arguments++;
+ }
+
+ return 0;
+}
+
+static int oci_seccomp_syscalls(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ scmp_filter_ctx *sc = userdata;
+ JsonVariant *e;
+ int r;
+
+ assert(sc);
+
+ JSON_VARIANT_ARRAY_FOREACH(e, v) {
+ static const JsonDispatch table[] = {
+ { "names", JSON_VARIANT_ARRAY, json_dispatch_strv, offsetof(struct syscall_rule, names), JSON_MANDATORY },
+ { "action", JSON_VARIANT_STRING, oci_seccomp_action, offsetof(struct syscall_rule, action), JSON_MANDATORY },
+ { "args", JSON_VARIANT_ARRAY, oci_seccomp_args, 0, 0 },
+ };
+ struct syscall_rule rule = {
+ .action = (uint32_t) -1,
+ };
+ char **i;
+
+ r = json_dispatch(e, table, oci_unexpected, flags, &rule);
+ if (r < 0)
+ goto fail_rule;
+
+ if (strv_isempty(rule.names)) {
+ json_log(e, flags, 0, "System call name list is empty.");
+ r = -EINVAL;
+ goto fail_rule;
+ }
+
+ STRV_FOREACH(i, rule.names) {
+ int nr;
+
+ nr = seccomp_syscall_resolve_name(*i);
+ if (nr == __NR_SCMP_ERROR) {
+ log_debug("Unknown syscall %s, skipping.", *i);
+ continue;
+ }
+
+ r = seccomp_rule_add_array(sc, rule.action, nr, rule.n_arguments, rule.arguments);
+ if (r < 0)
+ goto fail_rule;
+ }
+
+ syscall_rule_free(&rule);
+ continue;
+
+ fail_rule:
+ syscall_rule_free(&rule);
+ return r;
+ }
+
+ return 0;
+}
+#endif
+
+static int oci_seccomp(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+
+#if HAVE_SECCOMP
+ static const JsonDispatch table[] = {
+ { "defaultAction", JSON_VARIANT_STRING, NULL, 0, JSON_MANDATORY },
+ { "architectures", JSON_VARIANT_ARRAY, oci_seccomp_archs, 0, 0 },
+ { "syscalls", JSON_VARIANT_ARRAY, oci_seccomp_syscalls, 0, 0 },
+ {}
+ };
+
+ _cleanup_(seccomp_releasep) scmp_filter_ctx sc = NULL;
+ Settings *s = userdata;
+ JsonVariant *def;
+ uint32_t d;
+ int r;
+
+ assert(s);
+
+ def = json_variant_by_key(v, "defaultAction");
+ if (!def)
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL), "defaultAction element missing.");
+
+ if (!json_variant_is_string(def))
+ return json_log(def, flags, SYNTHETIC_ERRNO(EINVAL), "defaultAction is not a string.");
+
+ r = oci_seccomp_action_from_string(json_variant_string(def), &d);
+ if (r < 0)
+ return json_log(def, flags, r, "Unknown default action: %s", json_variant_string(def));
+
+ sc = seccomp_init(d);
+ if (!sc)
+ return json_log(v, flags, SYNTHETIC_ERRNO(ENOMEM), "Couldn't allocate seccomp object.");
+
+ r = json_dispatch(v, table, oci_unexpected, flags, sc);
+ if (r < 0)
+ return r;
+
+ seccomp_release(s->seccomp);
+ s->seccomp = TAKE_PTR(sc);
+ return 0;
+#else
+ return json_log(v, flags, SYNTHETIC_ERRNO(EOPNOTSUPP), "libseccomp support not enabled, can't parse seccomp object.");
+#endif
+}
+
+static int oci_rootfs_propagation(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ const char *s;
+
+ s = json_variant_string(v);
+
+ if (streq(s, "shared"))
+ return 0;
+
+ json_log(v, flags|JSON_DEBUG, 0, "Ignoring rootfsPropagation setting '%s'.", s);
+ return 0;
+}
+
+static int oci_masked_paths(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ Settings *s = userdata;
+ JsonVariant *e;
+
+ assert(s);
+
+ JSON_VARIANT_ARRAY_FOREACH(e, v) {
+ _cleanup_free_ char *destination = NULL;
+ CustomMount *m;
+ const char *p;
+
+ if (!json_variant_is_string(e))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Path is not a string, refusing.");
+
+ assert_se(p = json_variant_string(e));
+
+ if (!path_is_absolute(p))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Path is not not absolute, refusing: %s", p);
+
+ if (oci_exclude_mount(p))
+ continue;
+
+ destination = strdup(p);
+ if (!destination)
+ return log_oom();
+
+ m = custom_mount_add(&s->custom_mounts, &s->n_custom_mounts, CUSTOM_MOUNT_INACCESSIBLE);
+ if (!m)
+ return log_oom();
+
+ m->destination = TAKE_PTR(destination);
+
+ /* The spec doesn't say this, but apparently pre-existing implementations are lenient towards
+ * non-existing paths to mask. Let's hence be too. */
+ m->graceful = true;
+ }
+
+ return 0;
+}
+
+static int oci_readonly_paths(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ Settings *s = userdata;
+ JsonVariant *e;
+
+ assert(s);
+
+ JSON_VARIANT_ARRAY_FOREACH(e, v) {
+ _cleanup_free_ char *source = NULL, *destination = NULL;
+ CustomMount *m;
+ const char *p;
+
+ if (!json_variant_is_string(e))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Path is not a string, refusing.");
+
+ assert_se(p = json_variant_string(e));
+
+ if (!path_is_absolute(p))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Path is not not absolute, refusing: %s", p);
+
+ if (oci_exclude_mount(p))
+ continue;
+
+ source = strjoin("+", p);
+ if (!source)
+ return log_oom();
+
+ destination = strdup(p);
+ if (!destination)
+ return log_oom();
+
+ m = custom_mount_add(&s->custom_mounts, &s->n_custom_mounts, CUSTOM_MOUNT_BIND);
+ if (!m)
+ return log_oom();
+
+ m->source = TAKE_PTR(source);
+ m->destination = TAKE_PTR(destination);
+ m->read_only = true;
+ }
+
+ return 0;
+}
+
+static int oci_linux(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch table[] = {
+ { "namespaces", JSON_VARIANT_ARRAY, oci_namespaces, 0, 0 },
+ { "uidMappings", JSON_VARIANT_ARRAY, oci_uid_gid_mappings, 0, 0 },
+ { "gidMappings", JSON_VARIANT_ARRAY, oci_uid_gid_mappings, 0, 0 },
+ { "devices", JSON_VARIANT_ARRAY, oci_devices, 0, 0 },
+ { "cgroupsPath", JSON_VARIANT_STRING, oci_cgroups_path, 0, 0 },
+ { "resources", JSON_VARIANT_OBJECT, oci_resources, 0, 0 },
+ { "intelRdt", JSON_VARIANT_OBJECT, oci_unsupported, 0, JSON_PERMISSIVE },
+ { "sysctl", JSON_VARIANT_OBJECT, oci_sysctl, 0, 0 },
+ { "seccomp", JSON_VARIANT_OBJECT, oci_seccomp, 0, 0 },
+ { "rootfsPropagation", JSON_VARIANT_STRING, oci_rootfs_propagation, 0, 0 },
+ { "maskedPaths", JSON_VARIANT_ARRAY, oci_masked_paths, 0, 0 },
+ { "readonlyPaths", JSON_VARIANT_ARRAY, oci_readonly_paths, 0, 0 },
+ { "mountLabel", JSON_VARIANT_STRING, oci_unsupported, 0, JSON_PERMISSIVE },
+ {}
+ };
+
+ return json_dispatch(v, table, oci_unexpected, flags, userdata);
+}
+
+static int oci_hook_timeout(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ usec_t *u = userdata;
+ uintmax_t k;
+
+ k = json_variant_unsigned(v);
+ if (k == 0 || k > (UINT64_MAX-1)/USEC_PER_SEC)
+ return json_log(v, flags, SYNTHETIC_ERRNO(ERANGE),
+ "Hook timeout value out of range.");
+
+ *u = k * USEC_PER_SEC;
+ return 0;
+}
+
+static int oci_hooks_array(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ Settings *s = userdata;
+ JsonVariant *e;
+ int r;
+
+ assert(s);
+
+ JSON_VARIANT_ARRAY_FOREACH(e, v) {
+
+ static const JsonDispatch table[] = {
+ { "path", JSON_VARIANT_STRING, oci_absolute_path, offsetof(OciHook, path), JSON_MANDATORY },
+ { "args", JSON_VARIANT_ARRAY, oci_args, offsetof(OciHook, args), 0 },
+ { "env", JSON_VARIANT_ARRAY, oci_env, offsetof(OciHook, env), 0 },
+ { "timeout", JSON_VARIANT_UNSIGNED, oci_hook_timeout, offsetof(OciHook, timeout), 0 },
+ {}
+ };
+
+ OciHook *a, **array, *new_item;
+ size_t *n_array;
+
+ if (streq(name, "prestart")) {
+ array = &s->oci_hooks_prestart;
+ n_array = &s->n_oci_hooks_prestart;
+ } else if (streq(name, "poststart")) {
+ array = &s->oci_hooks_poststart;
+ n_array = &s->n_oci_hooks_poststart;
+ } else {
+ assert(streq(name, "poststop"));
+ array = &s->oci_hooks_poststop;
+ n_array = &s->n_oci_hooks_poststop;
+ }
+
+ a = reallocarray(*array, *n_array + 1, sizeof(OciHook));
+ if (!a)
+ return log_oom();
+
+ *array = a;
+ new_item = a + *n_array;
+
+ *new_item = (OciHook) {
+ .timeout = USEC_INFINITY,
+ };
+
+ r = json_dispatch(e, table, oci_unexpected, flags, userdata);
+ if (r < 0) {
+ free(new_item->path);
+ strv_free(new_item->args);
+ strv_free(new_item->env);
+ return r;
+ }
+
+ (*n_array) ++;
+ }
+
+ return 0;
+}
+
+static int oci_hooks(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch table[] = {
+ { "prestart", JSON_VARIANT_OBJECT, oci_hooks_array, 0, 0 },
+ { "poststart", JSON_VARIANT_OBJECT, oci_hooks_array, 0, 0 },
+ { "poststop", JSON_VARIANT_OBJECT, oci_hooks_array, 0, 0 },
+ {}
+ };
+
+ return json_dispatch(v, table, oci_unexpected, flags, userdata);
+}
+
+static int oci_annotations(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ JsonVariant *w;
+ const char *k;
+
+ JSON_VARIANT_OBJECT_FOREACH(k, w, v) {
+
+ if (isempty(k))
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Annotation with empty key, refusing.");
+
+ if (!json_variant_is_string(w))
+ return json_log(w, flags, SYNTHETIC_ERRNO(EINVAL),
+ "Annotation has non-string value, refusing.");
+
+ json_log(w, flags|JSON_DEBUG, 0, "Ignoring annotation '%s' with value '%s'.", k, json_variant_string(w));
+ }
+
+ return 0;
+}
+
+int oci_load(FILE *f, const char *bundle, Settings **ret) {
+
+ static const JsonDispatch table[] = {
+ { "ociVersion", JSON_VARIANT_STRING, NULL, 0, JSON_MANDATORY },
+ { "process", JSON_VARIANT_OBJECT, oci_process, 0, 0 },
+ { "root", JSON_VARIANT_OBJECT, oci_root, 0, 0 },
+ { "hostname", JSON_VARIANT_STRING, oci_hostname, 0, 0 },
+ { "mounts", JSON_VARIANT_ARRAY, oci_mounts, 0, 0 },
+ { "linux", JSON_VARIANT_OBJECT, oci_linux, 0, 0 },
+ { "hooks", JSON_VARIANT_OBJECT, oci_hooks, 0, 0 },
+ { "annotations", JSON_VARIANT_OBJECT, oci_annotations, 0, 0 },
+ {}
+ };
+
+ _cleanup_(json_variant_unrefp) JsonVariant *oci = NULL;
+ _cleanup_(settings_freep) Settings *s = NULL;
+ unsigned line = 0, column = 0;
+ JsonVariant *v;
+ const char *path;
+ int r;
+
+ assert_se(bundle);
+
+ path = strjoina(bundle, "/config.json");
+
+ r = json_parse_file(f, path, 0, &oci, &line, &column);
+ if (r < 0) {
+ if (line != 0 && column != 0)
+ return log_error_errno(r, "Failed to parse '%s' at %u:%u: %m", path, line, column);
+ else
+ return log_error_errno(r, "Failed to parse '%s': %m", path);
+ }
+
+ v = json_variant_by_key(oci, "ociVersion");
+ if (!v)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "JSON file '%s' is not an OCI bundle configuration file. Refusing.",
+ path);
+ if (!streq_ptr(json_variant_string(v), "1.0.0"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "OCI bundle version not supported: %s",
+ strna(json_variant_string(v)));
+
+ // {
+ // _cleanup_free_ char *formatted = NULL;
+ // assert_se(json_variant_format(oci, JSON_FORMAT_PRETTY|JSON_FORMAT_COLOR, &formatted) >= 0);
+ // fputs(formatted, stdout);
+ // }
+
+ s = settings_new();
+ if (!s)
+ return log_oom();
+
+ s->start_mode = START_PID1;
+ s->resolv_conf = RESOLV_CONF_OFF;
+ s->link_journal = LINK_NO;
+ s->timezone = TIMEZONE_OFF;
+
+ s->bundle = strdup(bundle);
+ if (!s->bundle)
+ return log_oom();
+
+ r = json_dispatch(oci, table, oci_unexpected, 0, s);
+ if (r < 0)
+ return r;
+
+ if (s->properties) {
+ r = sd_bus_message_seal(s->properties, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Cannot seal properties bus message: %m");
+ }
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
diff --git a/src/nspawn/nspawn-oci.h b/src/nspawn/nspawn-oci.h
new file mode 100644
index 0000000..ee72c91
--- /dev/null
+++ b/src/nspawn/nspawn-oci.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "nspawn-settings.h"
+
+int oci_load(FILE *f, const char *path, Settings **ret);
diff --git a/src/nspawn/nspawn-patch-uid.c b/src/nspawn/nspawn-patch-uid.c
new file mode 100644
index 0000000..785332e
--- /dev/null
+++ b/src/nspawn/nspawn-patch-uid.c
@@ -0,0 +1,485 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/statvfs.h>
+#include <sys/vfs.h>
+#include <unistd.h>
+
+#include "acl-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "missing_magic.h"
+#include "nspawn-def.h"
+#include "nspawn-patch-uid.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+#if HAVE_ACL
+
+static int get_acl(int fd, const char *name, acl_type_t type, acl_t *ret) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
+ acl_t acl;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ if (name) {
+ _cleanup_close_ int child_fd = -1;
+
+ child_fd = openat(fd, name, O_PATH|O_CLOEXEC|O_NOFOLLOW);
+ if (child_fd < 0)
+ return -errno;
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", child_fd);
+ acl = acl_get_file(procfs_path, type);
+ } else if (type == ACL_TYPE_ACCESS)
+ acl = acl_get_fd(fd);
+ else {
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ acl = acl_get_file(procfs_path, type);
+ }
+ if (!acl)
+ return -errno;
+
+ *ret = acl;
+ return 0;
+}
+
+static int set_acl(int fd, const char *name, acl_type_t type, acl_t acl) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
+ int r;
+
+ assert(fd >= 0);
+ assert(acl);
+
+ if (name) {
+ _cleanup_close_ int child_fd = -1;
+
+ child_fd = openat(fd, name, O_PATH|O_CLOEXEC|O_NOFOLLOW);
+ if (child_fd < 0)
+ return -errno;
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", child_fd);
+ r = acl_set_file(procfs_path, type, acl);
+ } else if (type == ACL_TYPE_ACCESS)
+ r = acl_set_fd(fd, acl);
+ else {
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+ r = acl_set_file(procfs_path, type, acl);
+ }
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int shift_acl(acl_t acl, uid_t shift, acl_t *ret) {
+ _cleanup_(acl_freep) acl_t copy = NULL;
+ acl_entry_t i;
+ int r;
+
+ assert(acl);
+ assert(ret);
+
+ r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i);
+ if (r < 0)
+ return -errno;
+ while (r > 0) {
+ uid_t *old_uid, new_uid;
+ bool modify = false;
+ acl_tag_t tag;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if (IN_SET(tag, ACL_USER, ACL_GROUP)) {
+
+ /* We don't distinguish here between uid_t and gid_t, let's make sure the compiler checks that
+ * this is actually OK */
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+
+ old_uid = acl_get_qualifier(i);
+ if (!old_uid)
+ return -errno;
+
+ new_uid = shift | (*old_uid & UINT32_C(0xFFFF));
+ if (!uid_is_valid(new_uid))
+ return -EINVAL;
+
+ modify = new_uid != *old_uid;
+ if (modify && !copy) {
+ int n;
+
+ /* There's no copy of the ACL yet? if so, let's create one, and start the loop from the
+ * beginning, so that we copy all entries, starting from the first, this time. */
+
+ n = acl_entries(acl);
+ if (n < 0)
+ return -errno;
+
+ copy = acl_init(n);
+ if (!copy)
+ return -errno;
+
+ /* Seek back to the beginning */
+ r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i);
+ if (r < 0)
+ return -errno;
+ continue;
+ }
+ }
+
+ if (copy) {
+ acl_entry_t new_entry;
+
+ if (acl_create_entry(&copy, &new_entry) < 0)
+ return -errno;
+
+ if (acl_copy_entry(new_entry, i) < 0)
+ return -errno;
+
+ if (modify)
+ if (acl_set_qualifier(new_entry, &new_uid) < 0)
+ return -errno;
+ }
+
+ r = acl_get_entry(acl, ACL_NEXT_ENTRY, &i);
+ if (r < 0)
+ return -errno;
+ }
+
+ *ret = TAKE_PTR(copy);
+
+ return !!*ret;
+}
+
+static int patch_acls(int fd, const char *name, const struct stat *st, uid_t shift) {
+ _cleanup_(acl_freep) acl_t acl = NULL, shifted = NULL;
+ bool changed = false;
+ int r;
+
+ assert(fd >= 0);
+ assert(st);
+
+ /* ACLs are not supported on symlinks, there's no point in trying */
+ if (S_ISLNK(st->st_mode))
+ return 0;
+
+ r = get_acl(fd, name, ACL_TYPE_ACCESS, &acl);
+ if (r == -EOPNOTSUPP)
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = shift_acl(acl, shift, &shifted);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ r = set_acl(fd, name, ACL_TYPE_ACCESS, shifted);
+ if (r < 0)
+ return r;
+
+ changed = true;
+ }
+
+ if (S_ISDIR(st->st_mode)) {
+ acl_free(acl);
+ acl_free(shifted);
+
+ acl = shifted = NULL;
+
+ r = get_acl(fd, name, ACL_TYPE_DEFAULT, &acl);
+ if (r < 0)
+ return r;
+
+ r = shift_acl(acl, shift, &shifted);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ r = set_acl(fd, name, ACL_TYPE_DEFAULT, shifted);
+ if (r < 0)
+ return r;
+
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+#else
+
+static int patch_acls(int fd, const char *name, const struct stat *st, uid_t shift) {
+ return 0;
+}
+
+#endif
+
+static int patch_fd(int fd, const char *name, const struct stat *st, uid_t shift) {
+ uid_t new_uid;
+ gid_t new_gid;
+ bool changed = false;
+ int r;
+
+ assert(fd >= 0);
+ assert(st);
+
+ new_uid = shift | (st->st_uid & UINT32_C(0xFFFF));
+ new_gid = (gid_t) shift | (st->st_gid & UINT32_C(0xFFFF));
+
+ if (!uid_is_valid(new_uid) || !gid_is_valid(new_gid))
+ return -EINVAL;
+
+ if (st->st_uid != new_uid || st->st_gid != new_gid) {
+ if (name)
+ r = fchownat(fd, name, new_uid, new_gid, AT_SYMLINK_NOFOLLOW);
+ else
+ r = fchown(fd, new_uid, new_gid);
+ if (r < 0)
+ return -errno;
+
+ /* The Linux kernel alters the mode in some cases of chown(). Let's undo this. */
+ if (name) {
+ if (!S_ISLNK(st->st_mode))
+ r = fchmodat(fd, name, st->st_mode, 0);
+ else /* AT_SYMLINK_NOFOLLOW is not available for fchmodat() */
+ r = 0;
+ } else
+ r = fchmod(fd, st->st_mode);
+ if (r < 0)
+ return -errno;
+
+ changed = true;
+ }
+
+ r = patch_acls(fd, name, st, shift);
+ if (r < 0)
+ return r;
+
+ return r > 0 || changed;
+}
+
+/*
+ * Check if the filesystem is fully compatible with user namespaces or
+ * UID/GID patching. Some filesystems in this list can be fully mounted inside
+ * user namespaces, however their inodes may relate to host resources or only
+ * valid in the global user namespace, therefore no patching should be applied.
+ */
+static int is_fs_fully_userns_compatible(const struct statfs *sfs) {
+
+ assert(sfs);
+
+ return F_TYPE_EQUAL(sfs->f_type, BINFMTFS_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, CGROUP_SUPER_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, CGROUP2_SUPER_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, DEBUGFS_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, DEVPTS_SUPER_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, EFIVARFS_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, HUGETLBFS_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, MQUEUE_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, PROC_SUPER_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, PSTOREFS_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, SELINUX_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, SMACK_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, SECURITYFS_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, BPF_FS_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, TRACEFS_MAGIC) ||
+ F_TYPE_EQUAL(sfs->f_type, SYSFS_MAGIC);
+}
+
+static int recurse_fd(int fd, bool donate_fd, const struct stat *st, uid_t shift, bool is_toplevel) {
+ _cleanup_closedir_ DIR *d = NULL;
+ bool changed = false;
+ struct statfs sfs;
+ int r;
+
+ assert(fd >= 0);
+
+ if (fstatfs(fd, &sfs) < 0)
+ return -errno;
+
+ /* We generally want to permit crossing of mount boundaries when patching the UIDs/GIDs. However, we probably
+ * shouldn't do this for /proc and /sys if that is already mounted into place. Hence, let's stop the recursion
+ * when we hit procfs, sysfs or some other special file systems. */
+
+ r = is_fs_fully_userns_compatible(&sfs);
+ if (r < 0)
+ goto finish;
+ if (r > 0) {
+ r = 0; /* don't recurse */
+ goto finish;
+ }
+
+ /* Also, if we hit a read-only file system, then don't bother, skip the whole subtree */
+ if ((sfs.f_flags & ST_RDONLY) ||
+ access_fd(fd, W_OK) == -EROFS)
+ goto read_only;
+
+ if (S_ISDIR(st->st_mode)) {
+ struct dirent *de;
+
+ if (!donate_fd) {
+ int copy;
+
+ copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (copy < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ fd = copy;
+ donate_fd = true;
+ }
+
+ d = take_fdopendir(&fd);
+ if (!d) {
+ r = -errno;
+ goto finish;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, r = -errno; goto finish) {
+ struct stat fst;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (fstatat(dirfd(d), de->d_name, &fst, AT_SYMLINK_NOFOLLOW) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ if (S_ISDIR(fst.st_mode)) {
+ int subdir_fd;
+
+ subdir_fd = openat(dirfd(d), de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
+ if (subdir_fd < 0) {
+ r = -errno;
+ goto finish;
+
+ }
+
+ r = recurse_fd(subdir_fd, true, &fst, shift, false);
+ if (r < 0)
+ goto finish;
+ if (r > 0)
+ changed = true;
+
+ } else {
+ r = patch_fd(dirfd(d), de->d_name, &fst, shift);
+ if (r < 0)
+ goto finish;
+ if (r > 0)
+ changed = true;
+ }
+ }
+ }
+
+ /* After we descended, also patch the directory itself. It's key to do this in this order so that the top-level
+ * directory is patched as very last object in the tree, so that we can use it as quick indicator whether the
+ * tree is properly chown()ed already. */
+ r = patch_fd(d ? dirfd(d) : fd, NULL, st, shift);
+ if (r == -EROFS)
+ goto read_only;
+ if (r > 0)
+ changed = true;
+
+ r = changed;
+ goto finish;
+
+read_only:
+ if (!is_toplevel) {
+ _cleanup_free_ char *name = NULL;
+
+ /* When we hit a ready-only subtree we simply skip it, but log about it. */
+ (void) fd_get_path(fd, &name);
+ log_debug("Skipping read-only file or directory %s.", strna(name));
+ r = changed;
+ }
+
+finish:
+ if (donate_fd)
+ safe_close(fd);
+
+ return r;
+}
+
+static int fd_patch_uid_internal(int fd, bool donate_fd, uid_t shift, uid_t range) {
+ struct stat st;
+ int r;
+
+ assert(fd >= 0);
+
+ /* Recursively adjusts the UID/GIDs of all files of a directory tree. This is used to automatically fix up an
+ * OS tree to the used user namespace UID range. Note that this automatic adjustment only works for UID ranges
+ * following the concept that the upper 16bit of a UID identify the container, and the lower 16bit are the actual
+ * UID within the container. */
+
+ if ((shift & 0xFFFF) != 0) {
+ /* We only support containers where the shift starts at a 2^16 boundary */
+ r = -EOPNOTSUPP;
+ goto finish;
+ }
+
+ if (shift == UID_BUSY_BASE) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (range != 0x10000) {
+ /* We only support containers with 16bit UID ranges for the patching logic */
+ r = -EOPNOTSUPP;
+ goto finish;
+ }
+
+ if (fstat(fd, &st) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ if ((uint32_t) st.st_uid >> 16 != (uint32_t) st.st_gid >> 16) {
+ /* We only support containers where the uid/gid container ID match */
+ r = -EBADE;
+ goto finish;
+ }
+
+ /* Try to detect if the range is already right. Of course, this a pretty drastic optimization, as we assume
+ * that if the top-level dir has the right upper 16bit assigned, then everything below will have too... */
+ if (((uint32_t) (st.st_uid ^ shift) >> 16) == 0)
+ return 0;
+
+ /* Before we start recursively chowning, mark the top-level dir as "busy" by chowning it to the "busy"
+ * range. Should we be interrupted in the middle of our work, we'll see it owned by this user and will start
+ * chown()ing it again, unconditionally, as the busy UID is not a valid UID we'd everpick for ourselves. */
+
+ if ((st.st_uid & UID_BUSY_MASK) != UID_BUSY_BASE) {
+ if (fchown(fd,
+ UID_BUSY_BASE | (st.st_uid & ~UID_BUSY_MASK),
+ (gid_t) UID_BUSY_BASE | (st.st_gid & ~(gid_t) UID_BUSY_MASK)) < 0) {
+ r = -errno;
+ goto finish;
+ }
+ }
+
+ return recurse_fd(fd, donate_fd, &st, shift, true);
+
+finish:
+ if (donate_fd)
+ safe_close(fd);
+
+ return r;
+}
+
+int path_patch_uid(const char *path, uid_t shift, uid_t range) {
+ int fd;
+
+ fd = open(path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
+ if (fd < 0)
+ return -errno;
+
+ return fd_patch_uid_internal(fd, true, shift, range);
+}
diff --git a/src/nspawn/nspawn-patch-uid.h b/src/nspawn/nspawn-patch-uid.h
new file mode 100644
index 0000000..5c7349b
--- /dev/null
+++ b/src/nspawn/nspawn-patch-uid.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include <sys/types.h>
+
+int path_patch_uid(const char *path, uid_t shift, uid_t range);
diff --git a/src/nspawn/nspawn-register.c b/src/nspawn/nspawn-register.c
new file mode 100644
index 0000000..2e6c12b
--- /dev/null
+++ b/src/nspawn/nspawn-register.c
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-bus.h"
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "bus-unit-util.h"
+#include "bus-util.h"
+#include "bus-wait-for-jobs.h"
+#include "nspawn-register.h"
+#include "special.h"
+#include "stat-util.h"
+#include "strv.h"
+#include "util.h"
+
+static int append_machine_properties(
+ sd_bus_message *m,
+ CustomMount *mounts,
+ unsigned n_mounts,
+ int kill_signal) {
+
+ unsigned j;
+ int r;
+
+ assert(m);
+
+ r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "closed");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* If you make changes here, also make sure to update systemd-nspawn@.service, to keep the device policies in
+ * sync regardless if we are run with or without the --keep-unit switch. */
+ r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 2,
+ /* Allow the container to
+ * access and create the API
+ * device nodes, so that
+ * PrivateDevices= in the
+ * container can work
+ * fine */
+ "/dev/net/tun", "rwm",
+ /* Allow the container
+ * access to ptys. However,
+ * do not permit the
+ * container to ever create
+ * these device nodes. */
+ "char-pts", "rw");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (j = 0; j < n_mounts; j++) {
+ CustomMount *cm = mounts + j;
+
+ if (cm->type != CUSTOM_MOUNT_BIND)
+ continue;
+
+ r = is_device_node(cm->source);
+ if (r == -ENOENT) {
+ /* The bind source might only appear as the image is put together, hence don't complain */
+ log_debug_errno(r, "Bind mount source %s not found, ignoring: %m", cm->source);
+ continue;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to stat %s: %m", cm->source);
+
+ if (r) {
+ r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 1,
+ cm->source, cm->read_only ? "r" : "rw");
+ if (r < 0)
+ return log_error_errno(r, "Failed to append message arguments: %m");
+ }
+ }
+
+ if (kill_signal != 0) {
+ r = sd_bus_message_append(m, "(sv)", "KillSignal", "i", kill_signal);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "(sv)", "KillMode", "s", "mixed");
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ return 0;
+}
+
+static int append_controller_property(sd_bus *bus, sd_bus_message *m) {
+ const char *unique;
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ r = sd_bus_get_unique_name(bus, &unique);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get unique name: %m");
+
+ r = sd_bus_message_append(m, "(sv)", "Controller", "s", unique);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 0;
+}
+
+int register_machine(
+ sd_bus *bus,
+ const char *machine_name,
+ pid_t pid,
+ const char *directory,
+ sd_id128_t uuid,
+ int local_ifindex,
+ const char *slice,
+ CustomMount *mounts,
+ unsigned n_mounts,
+ int kill_signal,
+ char **properties,
+ sd_bus_message *properties_message,
+ bool keep_unit,
+ const char *service) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+
+ if (keep_unit) {
+ r = bus_call_method(
+ bus,
+ bus_machine_mgr,
+ "RegisterMachineWithNetwork",
+ &error,
+ NULL,
+ "sayssusai",
+ machine_name,
+ SD_BUS_MESSAGE_APPEND_ID128(uuid),
+ service,
+ "container",
+ (uint32_t) pid,
+ strempty(directory),
+ local_ifindex > 0 ? 1 : 0, local_ifindex);
+ } else {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = bus_message_new_method_call(bus, &m, bus_machine_mgr, "CreateMachineWithNetwork");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "sayssusai",
+ machine_name,
+ SD_BUS_MESSAGE_APPEND_ID128(uuid),
+ service,
+ "container",
+ (uint32_t) pid,
+ strempty(directory),
+ local_ifindex > 0 ? 1 : 0, local_ifindex);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (!isempty(slice)) {
+ r = sd_bus_message_append(m, "(sv)", "Slice", "s", slice);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = append_controller_property(bus, m);
+ if (r < 0)
+ return r;
+
+ r = append_machine_properties(
+ m,
+ mounts,
+ n_mounts,
+ kill_signal);
+ if (r < 0)
+ return r;
+
+ if (properties_message) {
+ r = sd_bus_message_copy(m, properties_message, true);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = bus_append_unit_property_assignment_many(m, UNIT_SERVICE, properties);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, NULL);
+ }
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to register machine: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+int unregister_machine(
+ sd_bus *bus,
+ const char *machine_name) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+
+ r = bus_call_method(bus, bus_machine_mgr, "UnregisterMachine", &error, NULL, "s", machine_name);
+ if (r < 0)
+ log_debug("Failed to unregister machine: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+int allocate_scope(
+ sd_bus *bus,
+ const char *machine_name,
+ pid_t pid,
+ const char *slice,
+ CustomMount *mounts,
+ unsigned n_mounts,
+ int kill_signal,
+ char **properties,
+ sd_bus_message *properties_message) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ _cleanup_free_ char *scope = NULL;
+ const char *description, *object;
+ int r;
+
+ assert(bus);
+
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_error_errno(r, "Could not watch job: %m");
+
+ r = unit_name_mangle_with_suffix(machine_name, "as machine name", 0, ".scope", &scope);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle scope name: %m");
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, "StartTransientUnit");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "ss", scope, "fail");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Properties */
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ description = strjoina("Container ", machine_name);
+
+ r = sd_bus_message_append(m, "(sv)(sv)(sv)(sv)(sv)(sv)",
+ "PIDs", "au", 1, pid,
+ "Description", "s", description,
+ "Delegate", "b", 1,
+ "CollectMode", "s", "inactive-or-failed",
+ "AddRef", "b", 1,
+ "Slice", "s", isempty(slice) ? SPECIAL_MACHINE_SLICE : slice);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = append_controller_property(bus, m);
+ if (r < 0)
+ return r;
+
+ if (properties_message) {
+ r = sd_bus_message_copy(m, properties_message, true);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = append_machine_properties(
+ m,
+ mounts,
+ n_mounts,
+ kill_signal);
+ if (r < 0)
+ return r;
+
+ r = bus_append_unit_property_assignment_many(m, UNIT_SCOPE, properties);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* No auxiliary units */
+ r = sd_bus_message_append(
+ m,
+ "a(sa(sv))",
+ 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate scope: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "o", &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = bus_wait_for_jobs_one(w, object, false);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int terminate_scope(
+ sd_bus *bus,
+ const char *machine_name) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *scope = NULL;
+ int r;
+
+ r = unit_name_mangle_with_suffix(machine_name, "to terminate", 0, ".scope", &scope);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle scope name: %m");
+
+ r = bus_call_method(bus, bus_systemd_mgr, "AbandonScope", &error, NULL, "s", scope);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to abandon scope '%s', ignoring: %s", scope, bus_error_message(&error, r));
+ sd_bus_error_free(&error);
+ }
+
+ r = bus_call_method(
+ bus,
+ bus_systemd_mgr,
+ "KillUnit",
+ &error,
+ NULL,
+ "ssi",
+ scope,
+ "all",
+ (int32_t) SIGKILL);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to SIGKILL scope '%s', ignoring: %s", scope, bus_error_message(&error, r));
+ sd_bus_error_free(&error);
+ }
+
+ r = bus_call_method(bus, bus_systemd_mgr, "UnrefUnit", &error, NULL, "s", scope);
+ if (r < 0)
+ log_debug_errno(r, "Failed to drop reference to scope '%s', ignoring: %s", scope, bus_error_message(&error, r));
+
+ return 0;
+}
diff --git a/src/nspawn/nspawn-register.h b/src/nspawn/nspawn-register.h
new file mode 100644
index 0000000..59fdd1b
--- /dev/null
+++ b/src/nspawn/nspawn-register.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+#include "sd-id128.h"
+
+#include "nspawn-mount.h"
+
+int register_machine(sd_bus *bus, const char *machine_name, pid_t pid, const char *directory, sd_id128_t uuid, int local_ifindex, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties, sd_bus_message *properties_message, bool keep_unit, const char *service);
+int unregister_machine(sd_bus *bus, const char *machine_name);
+
+int allocate_scope(sd_bus *bus, const char *machine_name, pid_t pid, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties, sd_bus_message *properties_message);
+int terminate_scope(sd_bus *bus, const char *machine_name);
diff --git a/src/nspawn/nspawn-seccomp.c b/src/nspawn/nspawn-seccomp.c
new file mode 100644
index 0000000..1da7191
--- /dev/null
+++ b/src/nspawn/nspawn-seccomp.c
@@ -0,0 +1,260 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <linux/netlink.h>
+#include <sys/capability.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#if HAVE_SECCOMP
+#include <seccomp.h>
+#endif
+
+#include "alloc-util.h"
+#include "log.h"
+#include "nspawn-seccomp.h"
+#if HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
+#include "string-util.h"
+#include "strv.h"
+
+#if HAVE_SECCOMP
+
+static int add_syscall_filters(
+ scmp_filter_ctx ctx,
+ uint32_t arch,
+ uint64_t cap_list_retain,
+ char **syscall_allow_list,
+ char **syscall_deny_list) {
+
+ static const struct {
+ uint64_t capability;
+ const char* name;
+ } allow_list[] = {
+ /* Let's use set names where we can */
+ { 0, "@aio" },
+ { 0, "@basic-io" },
+ { 0, "@chown" },
+ { 0, "@default" },
+ { 0, "@file-system" },
+ { 0, "@io-event" },
+ { 0, "@ipc" },
+ { 0, "@mount" },
+ { 0, "@network-io" },
+ { 0, "@process" },
+ { 0, "@resources" },
+ { 0, "@setuid" },
+ { 0, "@signal" },
+ { 0, "@sync" },
+ { 0, "@timer" },
+
+ /* The following four are sets we optionally enable, in case the caps have been configured for it */
+ { CAP_SYS_TIME, "@clock" },
+ { CAP_SYS_MODULE, "@module" },
+ { CAP_SYS_RAWIO, "@raw-io" },
+ { CAP_IPC_LOCK, "@memlock" },
+
+ /* Plus a good set of additional syscalls which are not part of any of the groups above */
+ { 0, "brk" },
+ { 0, "capget" },
+ { 0, "capset" },
+ { 0, "copy_file_range" },
+ { 0, "fadvise64" },
+ { 0, "fadvise64_64" },
+ { 0, "flock" },
+ { 0, "get_mempolicy" },
+ { 0, "getcpu" },
+ { 0, "getpriority" },
+ { 0, "getrandom" },
+ { 0, "ioctl" },
+ { 0, "ioprio_get" },
+ { 0, "kcmp" },
+ { 0, "madvise" },
+ { 0, "mincore" },
+ { 0, "mprotect" },
+ { 0, "mremap" },
+ { 0, "name_to_handle_at" },
+ { 0, "oldolduname" },
+ { 0, "olduname" },
+ { 0, "personality" },
+ { 0, "readahead" },
+ { 0, "readdir" },
+ { 0, "remap_file_pages" },
+ { 0, "sched_get_priority_max" },
+ { 0, "sched_get_priority_min" },
+ { 0, "sched_getaffinity" },
+ { 0, "sched_getattr" },
+ { 0, "sched_getparam" },
+ { 0, "sched_getscheduler" },
+ { 0, "sched_rr_get_interval" },
+ { 0, "sched_yield" },
+ { 0, "seccomp" },
+ { 0, "sendfile" },
+ { 0, "sendfile64" },
+ { 0, "setdomainname" },
+ { 0, "setfsgid" },
+ { 0, "setfsgid32" },
+ { 0, "setfsuid" },
+ { 0, "setfsuid32" },
+ { 0, "sethostname" },
+ { 0, "setpgid" },
+ { 0, "setsid" },
+ { 0, "splice" },
+ { 0, "sysinfo" },
+ { 0, "tee" },
+ { 0, "umask" },
+ { 0, "uname" },
+ { 0, "userfaultfd" },
+ { 0, "vmsplice" },
+
+ /* The following individual syscalls are added depending on specified caps */
+ { CAP_SYS_PACCT, "acct" },
+ { CAP_SYS_PTRACE, "process_vm_readv" },
+ { CAP_SYS_PTRACE, "process_vm_writev" },
+ { CAP_SYS_PTRACE, "ptrace" },
+ { CAP_SYS_BOOT, "reboot" },
+ { CAP_SYSLOG, "syslog" },
+ { CAP_SYS_TTY_CONFIG, "vhangup" },
+
+ /*
+ * The following syscalls and groups are knowingly excluded:
+ *
+ * @cpu-emulation
+ * @keyring (NB: keyring is not namespaced!)
+ * @obsolete
+ * @pkey
+ * @swap
+ *
+ * bpf (NB: bpffs is not namespaced!)
+ * fanotify_init
+ * fanotify_mark
+ * kexec_file_load
+ * kexec_load
+ * lookup_dcookie
+ * nfsservctl
+ * open_by_handle_at
+ * perf_event_open
+ * quotactl
+ */
+ };
+
+ _cleanup_strv_free_ char **added = NULL;
+ char **p;
+ int r;
+
+ for (size_t i = 0; i < ELEMENTSOF(allow_list); i++) {
+ if (allow_list[i].capability != 0 && (cap_list_retain & (1ULL << allow_list[i].capability)) == 0)
+ continue;
+
+ r = seccomp_add_syscall_filter_item(ctx,
+ allow_list[i].name,
+ SCMP_ACT_ALLOW,
+ syscall_deny_list,
+ false,
+ &added);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add syscall filter item %s: %m", allow_list[i].name);
+ }
+
+ STRV_FOREACH(p, syscall_allow_list) {
+ r = seccomp_add_syscall_filter_item(ctx, *p, SCMP_ACT_ALLOW, syscall_deny_list, true, &added);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add rule for system call %s on %s, ignoring: %m",
+ *p, seccomp_arch_to_string(arch));
+ }
+
+ /* The default action is ENOSYS. Respond with EPERM to all other "known" but not allow-listed
+ * syscalls. */
+ r = seccomp_add_syscall_filter_item(ctx, "@known", SCMP_ACT_ERRNO(EPERM), added, true, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add rule for @known set on %s, ignoring: %m",
+ seccomp_arch_to_string(arch));
+
+#if (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR >= 5) || SCMP_VER_MAJOR > 2
+ /* We have a large filter here, so let's turn on the binary tree mode if possible. */
+ r = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_OPTIMIZE, 2);
+ if (r < 0)
+ return r;
+#endif
+
+ return 0;
+}
+
+int setup_seccomp(uint64_t cap_list_retain, char **syscall_allow_list, char **syscall_deny_list) {
+ uint32_t arch;
+ int r;
+
+ if (!is_seccomp_available()) {
+ log_debug("SECCOMP features not detected in the kernel or disabled at runtime, disabling SECCOMP filtering");
+ return 0;
+ }
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ log_debug("Applying allow list on architecture: %s", seccomp_arch_to_string(arch));
+
+ /* We install ENOSYS as the default action, but it will only apply to syscalls which are not
+ * in the @known set, see above. */
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ERRNO(ENOSYS));
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate seccomp object: %m");
+
+ r = add_syscall_filters(seccomp, arch, cap_list_retain, syscall_allow_list, syscall_deny_list);
+ if (r < 0)
+ return r;
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return log_error_errno(r, "Failed to install seccomp filter: %m");
+ if (r < 0)
+ log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ log_debug("Applying NETLINK_AUDIT mask on architecture: %s", seccomp_arch_to_string(arch));
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate seccomp object: %m");
+
+ /*
+ Audit is broken in containers, much of the userspace audit hookup will fail if running inside a
+ container. We don't care and just turn off creation of audit sockets.
+
+ This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail with EAFNOSUPPORT which audit userspace uses
+ as indication that audit is disabled in the kernel.
+ */
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 2,
+ SCMP_A0(SCMP_CMP_EQ, AF_NETLINK),
+ SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add audit seccomp rule, ignoring: %m");
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return log_error_errno(r, "Failed to install seccomp audit filter: %m");
+ if (r < 0)
+ log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+#else
+
+int setup_seccomp(uint64_t cap_list_retain, char **syscall_allow_list, char **syscall_deny_list) {
+ return 0;
+}
+
+#endif
diff --git a/src/nspawn/nspawn-seccomp.h b/src/nspawn/nspawn-seccomp.h
new file mode 100644
index 0000000..2690fba
--- /dev/null
+++ b/src/nspawn/nspawn-seccomp.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+int setup_seccomp(uint64_t cap_list_retain, char **syscall_allow_list, char **syscall_deny_list);
diff --git a/src/nspawn/nspawn-settings.c b/src/nspawn/nspawn-settings.c
new file mode 100644
index 0000000..92bb512
--- /dev/null
+++ b/src/nspawn/nspawn-settings.c
@@ -0,0 +1,868 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "cap-list.h"
+#include "conf-parser.h"
+#include "cpu-set-util.h"
+#include "hostname-util.h"
+#include "nspawn-network.h"
+#include "nspawn-settings.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+Settings *settings_new(void) {
+ Settings *s;
+
+ s = new(Settings, 1);
+ if (!s)
+ return NULL;
+
+ *s = (Settings) {
+ .start_mode = _START_MODE_INVALID,
+ .personality = PERSONALITY_INVALID,
+
+ .resolv_conf = _RESOLV_CONF_MODE_INVALID,
+ .link_journal = _LINK_JOURNAL_INVALID,
+ .timezone = _TIMEZONE_MODE_INVALID,
+
+ .userns_mode = _USER_NAMESPACE_MODE_INVALID,
+ .userns_chown = -1,
+ .uid_shift = UID_INVALID,
+ .uid_range = UID_INVALID,
+
+ .no_new_privileges = -1,
+
+ .read_only = -1,
+ .volatile_mode = _VOLATILE_MODE_INVALID,
+
+ .private_network = -1,
+ .network_veth = -1,
+
+ .full_capabilities = CAPABILITY_QUINTET_NULL,
+
+ .uid = UID_INVALID,
+ .gid = GID_INVALID,
+
+ .console_mode = _CONSOLE_MODE_INVALID,
+ .console_width = (unsigned) -1,
+ .console_height = (unsigned) -1,
+
+ .clone_ns_flags = (unsigned long) -1,
+ .use_cgns = -1,
+ };
+
+ return s;
+}
+
+int settings_load(FILE *f, const char *path, Settings **ret) {
+ _cleanup_(settings_freep) Settings *s = NULL;
+ int r;
+
+ assert(path);
+ assert(ret);
+
+ s = settings_new();
+ if (!s)
+ return -ENOMEM;
+
+ r = config_parse(NULL, path, f,
+ "Exec\0"
+ "Network\0"
+ "Files\0",
+ config_item_perf_lookup, nspawn_gperf_lookup,
+ CONFIG_PARSE_WARN,
+ s, NULL);
+ if (r < 0)
+ return r;
+
+ /* Make sure that if userns_mode is set, userns_chown is set to something appropriate, and vice versa. Either
+ * both fields shall be initialized or neither. */
+ if (s->userns_mode == USER_NAMESPACE_PICK)
+ s->userns_chown = true;
+ else if (s->userns_mode != _USER_NAMESPACE_MODE_INVALID && s->userns_chown < 0)
+ s->userns_chown = false;
+
+ if (s->userns_chown >= 0 && s->userns_mode == _USER_NAMESPACE_MODE_INVALID)
+ s->userns_mode = USER_NAMESPACE_NO;
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
+
+static void free_oci_hooks(OciHook *h, size_t n) {
+ size_t i;
+
+ assert(h || n == 0);
+
+ for (i = 0; i < n; i++) {
+ free(h[i].path);
+ strv_free(h[i].args);
+ strv_free(h[i].env);
+ }
+
+ free(h);
+}
+
+void device_node_array_free(DeviceNode *node, size_t n) {
+ size_t i;
+
+ for (i = 0; i < n; i++)
+ free(node[i].path);
+
+ free(node);
+}
+
+Settings* settings_free(Settings *s) {
+ if (!s)
+ return NULL;
+
+ strv_free(s->parameters);
+ strv_free(s->environment);
+ free(s->user);
+ free(s->pivot_root_new);
+ free(s->pivot_root_old);
+ free(s->working_directory);
+ strv_free(s->syscall_allow_list);
+ strv_free(s->syscall_deny_list);
+ rlimit_free_all(s->rlimit);
+ free(s->hostname);
+ cpu_set_reset(&s->cpu_set);
+
+ strv_free(s->network_interfaces);
+ strv_free(s->network_macvlan);
+ strv_free(s->network_ipvlan);
+ strv_free(s->network_veth_extra);
+ free(s->network_bridge);
+ free(s->network_zone);
+ expose_port_free_all(s->expose_ports);
+
+ custom_mount_free_all(s->custom_mounts, s->n_custom_mounts);
+
+ free(s->bundle);
+ free(s->root);
+
+ free_oci_hooks(s->oci_hooks_prestart, s->n_oci_hooks_prestart);
+ free_oci_hooks(s->oci_hooks_poststart, s->n_oci_hooks_poststart);
+ free_oci_hooks(s->oci_hooks_poststop, s->n_oci_hooks_poststop);
+
+ free(s->slice);
+ sd_bus_message_unref(s->properties);
+
+ free(s->supplementary_gids);
+ device_node_array_free(s->extra_nodes, s->n_extra_nodes);
+ free(s->network_namespace_path);
+
+ strv_free(s->sysctl);
+
+#if HAVE_SECCOMP
+ seccomp_release(s->seccomp);
+#endif
+
+ return mfree(s);
+}
+
+bool settings_private_network(Settings *s) {
+ assert(s);
+
+ return
+ s->private_network > 0 ||
+ s->network_veth > 0 ||
+ s->network_bridge ||
+ s->network_zone ||
+ s->network_interfaces ||
+ s->network_macvlan ||
+ s->network_ipvlan ||
+ s->network_veth_extra;
+}
+
+bool settings_network_veth(Settings *s) {
+ assert(s);
+
+ return
+ s->network_veth > 0 ||
+ s->network_bridge ||
+ s->network_zone;
+}
+
+int settings_allocate_properties(Settings *s) {
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(s);
+
+ if (s->properties)
+ return 0;
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new(bus, &s->properties, SD_BUS_MESSAGE_METHOD_CALL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_volatile_mode, volatile_mode, VolatileMode, "Failed to parse volatile mode");
+
+int config_parse_expose_port(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *s = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = expose_port_parse(&s->expose_ports, rvalue);
+ if (r == -EEXIST)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Duplicate port specification, ignoring: %s", rvalue);
+ else if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse host port %s: %m", rvalue);
+
+ return 0;
+}
+
+int config_parse_capability(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t u = 0, *result = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&rvalue, &word, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to extract capability string, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ break;
+
+ if (streq(word, "all"))
+ u = (uint64_t) -1;
+ else {
+ r = capability_from_name(word);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse capability, ignoring: %s", word);
+ continue;
+ }
+
+ u |= UINT64_C(1) << r;
+ }
+ }
+
+ if (u == 0)
+ return 0;
+
+ *result |= u;
+ return 0;
+}
+
+int config_parse_pivot_root(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = pivot_root_parse(&settings->pivot_root_new, &settings->pivot_root_old, rvalue);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid pivot root mount specification %s: %m", rvalue);
+
+ return 0;
+}
+
+int config_parse_bind(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = bind_mount_parse(&settings->custom_mounts, &settings->n_custom_mounts, rvalue, ltype);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid bind mount specification %s: %m", rvalue);
+
+ return 0;
+}
+
+int config_parse_tmpfs(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = tmpfs_mount_parse(&settings->custom_mounts, &settings->n_custom_mounts, rvalue);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid temporary file system specification %s: %m", rvalue);
+
+ return 0;
+}
+
+int config_parse_inaccessible(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = inaccessible_mount_parse(&settings->custom_mounts, &settings->n_custom_mounts, rvalue);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid inaccessible file system specification %s: %m", rvalue);
+
+ return 0;
+}
+
+int config_parse_overlay(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = overlay_mount_parse(&settings->custom_mounts, &settings->n_custom_mounts, rvalue, ltype);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid overlay file system specification %s, ignoring: %m", rvalue);
+
+ return 0;
+}
+
+int config_parse_veth_extra(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = veth_extra_parse(&settings->network_veth_extra, rvalue);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid extra virtual Ethernet link specification %s: %m", rvalue);
+
+ return 0;
+}
+
+int config_parse_network_zone(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ _cleanup_free_ char *j = NULL;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ j = strjoin("vz-", rvalue);
+ if (!ifname_valid(j)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid network zone name, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ return free_and_replace(settings->network_zone, j);
+}
+
+int config_parse_boot(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse Boot= parameter %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (r) {
+ if (settings->start_mode == START_PID2)
+ goto conflict;
+
+ settings->start_mode = START_BOOT;
+ } else {
+ if (settings->start_mode == START_BOOT)
+ goto conflict;
+
+ if (settings->start_mode < 0)
+ settings->start_mode = START_PID1;
+ }
+
+ return 0;
+
+conflict:
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Conflicting Boot= or ProcessTwo= setting found. Ignoring.");
+ return 0;
+}
+
+int config_parse_pid2(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse ProcessTwo= parameter %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (r) {
+ if (settings->start_mode == START_BOOT)
+ goto conflict;
+
+ settings->start_mode = START_PID2;
+ } else {
+ if (settings->start_mode == START_PID2)
+ goto conflict;
+
+ if (settings->start_mode < 0)
+ settings->start_mode = START_PID1;
+ }
+
+ return 0;
+
+conflict:
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Conflicting Boot= or ProcessTwo= setting found. Ignoring.");
+ return 0;
+}
+
+int config_parse_private_users(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = parse_boolean(rvalue);
+ if (r == 0) {
+ /* no: User namespacing off */
+ settings->userns_mode = USER_NAMESPACE_NO;
+ settings->uid_shift = UID_INVALID;
+ settings->uid_range = UINT32_C(0x10000);
+ } else if (r > 0) {
+ /* yes: User namespacing on, UID range is read from root dir */
+ settings->userns_mode = USER_NAMESPACE_FIXED;
+ settings->uid_shift = UID_INVALID;
+ settings->uid_range = UINT32_C(0x10000);
+ } else if (streq(rvalue, "pick")) {
+ /* pick: User namespacing on, UID range is picked randomly */
+ settings->userns_mode = USER_NAMESPACE_PICK;
+ settings->uid_shift = UID_INVALID;
+ settings->uid_range = UINT32_C(0x10000);
+ } else {
+ const char *range, *shift;
+ uid_t sh, rn;
+
+ /* anything else: User namespacing on, UID range is explicitly configured */
+
+ range = strchr(rvalue, ':');
+ if (range) {
+ shift = strndupa(rvalue, range - rvalue);
+ range++;
+
+ r = safe_atou32(range, &rn);
+ if (r < 0 || rn <= 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "UID/GID range invalid, ignoring: %s", range);
+ return 0;
+ }
+ } else {
+ shift = rvalue;
+ rn = UINT32_C(0x10000);
+ }
+
+ r = parse_uid(shift, &sh);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "UID/GID shift invalid, ignoring: %s", range);
+ return 0;
+ }
+
+ settings->userns_mode = USER_NAMESPACE_FIXED;
+ settings->uid_shift = sh;
+ settings->uid_range = rn;
+ }
+
+ return 0;
+}
+
+int config_parse_syscall_filter(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ bool negative;
+ const char *items;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ negative = rvalue[0] == '~';
+ items = negative ? rvalue + 1 : rvalue;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&items, &word, NULL, 0);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse SystemCallFilter= parameter %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (negative)
+ r = strv_extend(&settings->syscall_deny_list, word);
+ else
+ r = strv_extend(&settings->syscall_allow_list, word);
+ if (r < 0)
+ return log_oom();
+ }
+}
+
+int config_parse_hostname(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char **s = data;
+
+ assert(rvalue);
+ assert(s);
+
+ if (!hostname_is_valid(rvalue, false)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid hostname, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (free_and_strdup(s, empty_to_null(rvalue)) < 0)
+ return log_oom();
+
+ return 0;
+}
+
+int config_parse_oom_score_adjust(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int oa, r;
+
+ assert(rvalue);
+ assert(settings);
+
+ if (isempty(rvalue)) {
+ settings->oom_score_adjust_set = false;
+ return 0;
+ }
+
+ r = parse_oom_score_adjust(rvalue, &oa);
+ if (r == -ERANGE) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "OOM score adjust value out of range, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse the OOM score adjust value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ settings->oom_score_adjust = oa;
+ settings->oom_score_adjust_set = true;
+
+ return 0;
+}
+
+int config_parse_cpu_affinity(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+
+ assert(rvalue);
+ assert(settings);
+
+ return parse_cpu_set_extend(rvalue, &settings->cpu_set, true, unit, filename, line, lvalue);
+}
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_resolv_conf, resolv_conf_mode, ResolvConfMode, "Failed to parse resolv.conf mode");
+
+static const char *const resolv_conf_mode_table[_RESOLV_CONF_MODE_MAX] = {
+ [RESOLV_CONF_OFF] = "off",
+ [RESOLV_CONF_COPY_HOST] = "copy-host",
+ [RESOLV_CONF_COPY_STATIC] = "copy-static",
+ [RESOLV_CONF_COPY_UPLINK] = "copy-uplink",
+ [RESOLV_CONF_COPY_STUB] = "copy-stub",
+ [RESOLV_CONF_REPLACE_HOST] = "replace-host",
+ [RESOLV_CONF_REPLACE_STATIC] = "replace-static",
+ [RESOLV_CONF_REPLACE_UPLINK] = "replace-uplink",
+ [RESOLV_CONF_REPLACE_STUB] = "replace-stub",
+ [RESOLV_CONF_BIND_HOST] = "bind-host",
+ [RESOLV_CONF_BIND_STATIC] = "bind-static",
+ [RESOLV_CONF_BIND_UPLINK] = "bind-uplink",
+ [RESOLV_CONF_BIND_STUB] = "bind-stub",
+ [RESOLV_CONF_DELETE] = "delete",
+ [RESOLV_CONF_AUTO] = "auto",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(resolv_conf_mode, ResolvConfMode, RESOLV_CONF_AUTO);
+
+int parse_link_journal(const char *s, LinkJournal *ret_mode, bool *ret_try) {
+ int r;
+
+ assert(s);
+ assert(ret_mode);
+ assert(ret_try);
+
+ if (streq(s, "auto")) {
+ *ret_mode = LINK_AUTO;
+ *ret_try = false;
+ } else if (streq(s, "guest")) {
+ *ret_mode = LINK_GUEST;
+ *ret_try = false;
+ } else if (streq(s, "host")) {
+ *ret_mode = LINK_HOST;
+ *ret_try = false;
+ } else if (streq(s, "try-guest")) {
+ *ret_mode = LINK_GUEST;
+ *ret_try = true;
+ } else if (streq(s, "try-host")) {
+ *ret_mode = LINK_HOST;
+ *ret_try = true;
+ } else {
+ /* Also support boolean values, to make things less confusing. */
+ r = parse_boolean(s);
+ if (r < 0)
+ return r;
+
+ /* Let's consider "true" to be equivalent to "auto". */
+ *ret_mode = r ? LINK_AUTO : LINK_NO;
+ *ret_try = false;
+ }
+
+ return 0;
+}
+
+int config_parse_link_journal(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(rvalue);
+ assert(settings);
+
+ r = parse_link_journal(rvalue, &settings->link_journal, &settings->link_journal_try);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse link journal mode, ignoring: %s", rvalue);
+
+ return 0;
+}
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_timezone, timezone_mode, TimezoneMode, "Failed to parse timezone mode");
+
+static const char *const timezone_mode_table[_TIMEZONE_MODE_MAX] = {
+ [TIMEZONE_OFF] = "off",
+ [TIMEZONE_COPY] = "copy",
+ [TIMEZONE_BIND] = "bind",
+ [TIMEZONE_SYMLINK] = "symlink",
+ [TIMEZONE_DELETE] = "delete",
+ [TIMEZONE_AUTO] = "auto",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(timezone_mode, TimezoneMode, TIMEZONE_AUTO);
diff --git a/src/nspawn/nspawn-settings.h b/src/nspawn/nspawn-settings.h
new file mode 100644
index 0000000..4a83e55
--- /dev/null
+++ b/src/nspawn/nspawn-settings.h
@@ -0,0 +1,266 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sched.h>
+#include <stdio.h>
+
+#if HAVE_SECCOMP
+#include <seccomp.h>
+#endif
+
+#include "sd-bus.h"
+#include "sd-id128.h"
+
+#include "capability-util.h"
+#include "conf-parser.h"
+#include "cpu-set-util.h"
+#include "macro.h"
+#include "missing_resource.h"
+#include "nspawn-expose-ports.h"
+#include "nspawn-mount.h"
+#include "time-util.h"
+
+typedef enum StartMode {
+ START_PID1, /* Run parameters as command line as process 1 */
+ START_PID2, /* Use stub init process as PID 1, run parameters as command line as process 2 */
+ START_BOOT, /* Search for init system, pass arguments as parameters */
+ _START_MODE_MAX,
+ _START_MODE_INVALID = -1
+} StartMode;
+
+typedef enum UserNamespaceMode {
+ USER_NAMESPACE_NO,
+ USER_NAMESPACE_FIXED,
+ USER_NAMESPACE_PICK,
+ _USER_NAMESPACE_MODE_MAX,
+ _USER_NAMESPACE_MODE_INVALID = -1,
+} UserNamespaceMode;
+
+typedef enum ResolvConfMode {
+ RESOLV_CONF_OFF,
+ RESOLV_CONF_COPY_HOST, /* /etc/resolv.conf */
+ RESOLV_CONF_COPY_STATIC, /* /usr/lib/systemd/resolv.conf */
+ RESOLV_CONF_COPY_UPLINK, /* /run/systemd/resolve/resolv.conf */
+ RESOLV_CONF_COPY_STUB, /* /run/systemd/resolve/stub-resolv.conf */
+ RESOLV_CONF_REPLACE_HOST,
+ RESOLV_CONF_REPLACE_STATIC,
+ RESOLV_CONF_REPLACE_UPLINK,
+ RESOLV_CONF_REPLACE_STUB,
+ RESOLV_CONF_BIND_HOST,
+ RESOLV_CONF_BIND_STATIC,
+ RESOLV_CONF_BIND_UPLINK,
+ RESOLV_CONF_BIND_STUB,
+ RESOLV_CONF_DELETE,
+ RESOLV_CONF_AUTO,
+ _RESOLV_CONF_MODE_MAX,
+ _RESOLV_CONF_MODE_INVALID = -1
+} ResolvConfMode;
+
+typedef enum LinkJournal {
+ LINK_NO,
+ LINK_AUTO,
+ LINK_HOST,
+ LINK_GUEST,
+ _LINK_JOURNAL_MAX,
+ _LINK_JOURNAL_INVALID = -1
+} LinkJournal;
+
+typedef enum TimezoneMode {
+ TIMEZONE_OFF,
+ TIMEZONE_COPY,
+ TIMEZONE_BIND,
+ TIMEZONE_SYMLINK,
+ TIMEZONE_DELETE,
+ TIMEZONE_AUTO,
+ _TIMEZONE_MODE_MAX,
+ _TIMEZONE_MODE_INVALID = -1
+} TimezoneMode;
+
+typedef enum ConsoleMode {
+ CONSOLE_INTERACTIVE,
+ CONSOLE_READ_ONLY,
+ CONSOLE_PASSIVE,
+ CONSOLE_PIPE,
+ _CONSOLE_MODE_MAX,
+ _CONSOLE_MODE_INVALID = -1,
+} ConsoleMode;
+
+typedef enum SettingsMask {
+ SETTING_START_MODE = UINT64_C(1) << 0,
+ SETTING_ENVIRONMENT = UINT64_C(1) << 1,
+ SETTING_USER = UINT64_C(1) << 2,
+ SETTING_CAPABILITY = UINT64_C(1) << 3,
+ SETTING_KILL_SIGNAL = UINT64_C(1) << 4,
+ SETTING_PERSONALITY = UINT64_C(1) << 5,
+ SETTING_MACHINE_ID = UINT64_C(1) << 6,
+ SETTING_NETWORK = UINT64_C(1) << 7,
+ SETTING_EXPOSE_PORTS = UINT64_C(1) << 8,
+ SETTING_READ_ONLY = UINT64_C(1) << 9,
+ SETTING_VOLATILE_MODE = UINT64_C(1) << 10,
+ SETTING_CUSTOM_MOUNTS = UINT64_C(1) << 11,
+ SETTING_WORKING_DIRECTORY = UINT64_C(1) << 12,
+ SETTING_USERNS = UINT64_C(1) << 13,
+ SETTING_NOTIFY_READY = UINT64_C(1) << 14,
+ SETTING_PIVOT_ROOT = UINT64_C(1) << 15,
+ SETTING_SYSCALL_FILTER = UINT64_C(1) << 16,
+ SETTING_HOSTNAME = UINT64_C(1) << 17,
+ SETTING_NO_NEW_PRIVILEGES = UINT64_C(1) << 18,
+ SETTING_OOM_SCORE_ADJUST = UINT64_C(1) << 19,
+ SETTING_CPU_AFFINITY = UINT64_C(1) << 20,
+ SETTING_RESOLV_CONF = UINT64_C(1) << 21,
+ SETTING_LINK_JOURNAL = UINT64_C(1) << 22,
+ SETTING_TIMEZONE = UINT64_C(1) << 23,
+ SETTING_EPHEMERAL = UINT64_C(1) << 24,
+ SETTING_SLICE = UINT64_C(1) << 25,
+ SETTING_DIRECTORY = UINT64_C(1) << 26,
+ SETTING_USE_CGNS = UINT64_C(1) << 27,
+ SETTING_CLONE_NS_FLAGS = UINT64_C(1) << 28,
+ SETTING_CONSOLE_MODE = UINT64_C(1) << 29,
+ SETTING_CREDENTIALS = UINT64_C(1) << 30,
+ SETTING_RLIMIT_FIRST = UINT64_C(1) << 31, /* we define one bit per resource limit here */
+ SETTING_RLIMIT_LAST = UINT64_C(1) << (31 + _RLIMIT_MAX - 1),
+ _SETTINGS_MASK_ALL = (UINT64_C(1) << (31 + _RLIMIT_MAX)) -1,
+ _SETTING_FORCE_ENUM_WIDTH = UINT64_MAX
+} SettingsMask;
+
+/* We want to use SETTING_RLIMIT_FIRST in shifts, so make sure it is really 64 bits
+ * when used in expressions. */
+#define SETTING_RLIMIT_FIRST ((uint64_t) SETTING_RLIMIT_FIRST)
+#define SETTING_RLIMIT_LAST ((uint64_t) SETTING_RLIMIT_LAST)
+
+assert_cc(sizeof(SettingsMask) == 8);
+assert_cc(sizeof(SETTING_RLIMIT_FIRST) == 8);
+assert_cc(sizeof(SETTING_RLIMIT_LAST) == 8);
+
+typedef struct DeviceNode {
+ char *path;
+ unsigned major;
+ unsigned minor;
+ mode_t mode;
+ uid_t uid;
+ gid_t gid;
+} DeviceNode;
+
+typedef struct OciHook {
+ char *path;
+ char **args;
+ char **env;
+ usec_t timeout;
+} OciHook;
+
+typedef struct Settings {
+ /* [Run] */
+ StartMode start_mode;
+ bool ephemeral;
+ char **parameters;
+ char **environment;
+ char *user;
+ uint64_t capability;
+ uint64_t drop_capability;
+ int kill_signal;
+ unsigned long personality;
+ sd_id128_t machine_id;
+ char *working_directory;
+ char *pivot_root_new;
+ char *pivot_root_old;
+ UserNamespaceMode userns_mode;
+ uid_t uid_shift, uid_range;
+ bool notify_ready;
+ char **syscall_allow_list;
+ char **syscall_deny_list;
+ struct rlimit *rlimit[_RLIMIT_MAX];
+ char *hostname;
+ int no_new_privileges;
+ int oom_score_adjust;
+ bool oom_score_adjust_set;
+ CPUSet cpu_set;
+ ResolvConfMode resolv_conf;
+ LinkJournal link_journal;
+ bool link_journal_try;
+ TimezoneMode timezone;
+
+ /* [Image] */
+ int read_only;
+ VolatileMode volatile_mode;
+ CustomMount *custom_mounts;
+ size_t n_custom_mounts;
+ int userns_chown;
+
+ /* [Network] */
+ int private_network;
+ int network_veth;
+ char *network_bridge;
+ char *network_zone;
+ char **network_interfaces;
+ char **network_macvlan;
+ char **network_ipvlan;
+ char **network_veth_extra;
+ ExposePort *expose_ports;
+
+ /* Additional fields, that are specific to OCI runtime case */
+ char *bundle;
+ char *root;
+ OciHook *oci_hooks_prestart, *oci_hooks_poststart, *oci_hooks_poststop;
+ size_t n_oci_hooks_prestart, n_oci_hooks_poststart, n_oci_hooks_poststop;
+ char *slice;
+ sd_bus_message *properties;
+ CapabilityQuintet full_capabilities;
+ uid_t uid;
+ gid_t gid;
+ gid_t *supplementary_gids;
+ size_t n_supplementary_gids;
+ unsigned console_width, console_height;
+ ConsoleMode console_mode;
+ DeviceNode *extra_nodes;
+ size_t n_extra_nodes;
+ unsigned long clone_ns_flags;
+ char *network_namespace_path;
+ int use_cgns;
+ char **sysctl;
+#if HAVE_SECCOMP
+ scmp_filter_ctx seccomp;
+#endif
+} Settings;
+
+Settings *settings_new(void);
+int settings_load(FILE *f, const char *path, Settings **ret);
+Settings* settings_free(Settings *s);
+
+bool settings_network_veth(Settings *s);
+bool settings_private_network(Settings *s);
+int settings_allocate_properties(Settings *s);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Settings*, settings_free);
+
+const struct ConfigPerfItem* nspawn_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_capability);
+CONFIG_PARSER_PROTOTYPE(config_parse_expose_port);
+CONFIG_PARSER_PROTOTYPE(config_parse_volatile_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_pivot_root);
+CONFIG_PARSER_PROTOTYPE(config_parse_bind);
+CONFIG_PARSER_PROTOTYPE(config_parse_tmpfs);
+CONFIG_PARSER_PROTOTYPE(config_parse_overlay);
+CONFIG_PARSER_PROTOTYPE(config_parse_inaccessible);
+CONFIG_PARSER_PROTOTYPE(config_parse_veth_extra);
+CONFIG_PARSER_PROTOTYPE(config_parse_network_zone);
+CONFIG_PARSER_PROTOTYPE(config_parse_boot);
+CONFIG_PARSER_PROTOTYPE(config_parse_pid2);
+CONFIG_PARSER_PROTOTYPE(config_parse_private_users);
+CONFIG_PARSER_PROTOTYPE(config_parse_syscall_filter);
+CONFIG_PARSER_PROTOTYPE(config_parse_hostname);
+CONFIG_PARSER_PROTOTYPE(config_parse_oom_score_adjust);
+CONFIG_PARSER_PROTOTYPE(config_parse_cpu_affinity);
+CONFIG_PARSER_PROTOTYPE(config_parse_resolv_conf);
+CONFIG_PARSER_PROTOTYPE(config_parse_link_journal);
+CONFIG_PARSER_PROTOTYPE(config_parse_timezone);
+
+const char *resolv_conf_mode_to_string(ResolvConfMode a) _const_;
+ResolvConfMode resolv_conf_mode_from_string(const char *s) _pure_;
+
+const char *timezone_mode_to_string(TimezoneMode a) _const_;
+TimezoneMode timezone_mode_from_string(const char *s) _pure_;
+
+int parse_link_journal(const char *s, LinkJournal *ret_mode, bool *ret_try);
+
+void device_node_array_free(DeviceNode *node, size_t n);
diff --git a/src/nspawn/nspawn-setuid.c b/src/nspawn/nspawn-setuid.c
new file mode 100644
index 0000000..c224fd0
--- /dev/null
+++ b/src/nspawn/nspawn-setuid.c
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "def.h"
+#include "errno.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "mkdir.h"
+#include "nspawn-setuid.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+static int spawn_getent(const char *database, const char *key, pid_t *rpid) {
+ int pipe_fds[2], r;
+ pid_t pid;
+
+ assert(database);
+ assert(key);
+ assert(rpid);
+
+ if (pipe2(pipe_fds, O_CLOEXEC) < 0)
+ return log_error_errno(errno, "Failed to allocate pipe: %m");
+
+ r = safe_fork("(getent)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &pid);
+ if (r < 0) {
+ safe_close_pair(pipe_fds);
+ return r;
+ }
+ if (r == 0) {
+ char *empty_env = NULL;
+
+ safe_close(pipe_fds[0]);
+
+ if (rearrange_stdio(-1, pipe_fds[1], -1) < 0)
+ _exit(EXIT_FAILURE);
+
+ (void) close_all_fds(NULL, 0);
+
+ (void) rlimit_nofile_safe();
+
+ execle("/usr/bin/getent", "getent", database, key, NULL, &empty_env);
+ execle("/bin/getent", "getent", database, key, NULL, &empty_env);
+ _exit(EXIT_FAILURE);
+ }
+
+ pipe_fds[1] = safe_close(pipe_fds[1]);
+
+ *rpid = pid;
+
+ return pipe_fds[0];
+}
+
+int change_uid_gid_raw(
+ uid_t uid,
+ gid_t gid,
+ const gid_t *supplementary_gids,
+ size_t n_supplementary_gids,
+ bool chown_stdio) {
+
+ if (!uid_is_valid(uid))
+ uid = 0;
+ if (!gid_is_valid(gid))
+ gid = 0;
+
+ if (chown_stdio) {
+ (void) fchown(STDIN_FILENO, uid, gid);
+ (void) fchown(STDOUT_FILENO, uid, gid);
+ (void) fchown(STDERR_FILENO, uid, gid);
+ }
+
+ if (setgroups(n_supplementary_gids, supplementary_gids) < 0)
+ return log_error_errno(errno, "Failed to set auxiliary groups: %m");
+
+ if (setresgid(gid, gid, gid) < 0)
+ return log_error_errno(errno, "setresgid() failed: %m");
+
+ if (setresuid(uid, uid, uid) < 0)
+ return log_error_errno(errno, "setresuid() failed: %m");
+
+ return 0;
+}
+
+int change_uid_gid(const char *user, bool chown_stdio, char **ret_home) {
+ char *x, *u, *g, *h;
+ _cleanup_free_ gid_t *gids = NULL;
+ _cleanup_free_ char *home = NULL, *line = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_close_ int fd = -1;
+ unsigned n_gids = 0;
+ size_t sz = 0;
+ uid_t uid;
+ gid_t gid;
+ pid_t pid;
+ int r;
+
+ assert(ret_home);
+
+ if (!user || STR_IN_SET(user, "root", "0")) {
+ /* Reset everything fully to 0, just in case */
+
+ r = reset_uid_gid();
+ if (r < 0)
+ return log_error_errno(r, "Failed to become root: %m");
+
+ *ret_home = NULL;
+ return 0;
+ }
+
+ /* First, get user credentials */
+ fd = spawn_getent("passwd", user, &pid);
+ if (fd < 0)
+ return fd;
+
+ f = take_fdopen(&fd, "r");
+ if (!f)
+ return log_oom();
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ESRCH),
+ "Failed to resolve user %s.", user);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read from getent: %m");
+
+ (void) wait_for_terminate_and_check("getent passwd", pid, WAIT_LOG);
+
+ x = strchr(line, ':');
+ if (!x)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "/etc/passwd entry has invalid user field.");
+
+ u = strchr(x+1, ':');
+ if (!u)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "/etc/passwd entry has invalid password field.");
+
+ u++;
+ g = strchr(u, ':');
+ if (!g)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "/etc/passwd entry has invalid UID field.");
+
+ *g = 0;
+ g++;
+ x = strchr(g, ':');
+ if (!x)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "/etc/passwd entry has invalid GID field.");
+
+ *x = 0;
+ h = strchr(x+1, ':');
+ if (!h)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "/etc/passwd entry has invalid GECOS field.");
+
+ h++;
+ x = strchr(h, ':');
+ if (!x)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "/etc/passwd entry has invalid home directory field.");
+
+ *x = 0;
+
+ r = parse_uid(u, &uid);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to parse UID of user.");
+
+ r = parse_gid(g, &gid);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to parse GID of user.");
+
+ home = strdup(h);
+ if (!home)
+ return log_oom();
+
+ f = safe_fclose(f);
+ line = mfree(line);
+
+ /* Second, get group memberships */
+ fd = spawn_getent("initgroups", user, &pid);
+ if (fd < 0)
+ return fd;
+
+ f = take_fdopen(&fd, "r");
+ if (!f)
+ return log_oom();
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ESRCH),
+ "Failed to resolve user %s.", user);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read from getent: %m");
+
+ (void) wait_for_terminate_and_check("getent initgroups", pid, WAIT_LOG);
+
+ /* Skip over the username and subsequent separator whitespace */
+ x = line;
+ x += strcspn(x, WHITESPACE);
+ x += strspn(x, WHITESPACE);
+
+ for (const char *p = x;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse group data from getent: %m");
+ if (r == 0)
+ break;
+
+ if (!GREEDY_REALLOC(gids, sz, n_gids+1))
+ return log_oom();
+
+ r = parse_gid(word, &gids[n_gids++]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse group data from getent: %m");
+ }
+
+ r = mkdir_parents(home, 0775);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make home root directory: %m");
+
+ r = mkdir_safe(home, 0755, uid, gid, 0);
+ if (r < 0 && !IN_SET(r, -EEXIST, -ENOTDIR))
+ return log_error_errno(r, "Failed to make home directory: %m");
+
+ r = change_uid_gid_raw(uid, gid, gids, n_gids, chown_stdio);
+ if (r < 0)
+ return r;
+
+ if (ret_home)
+ *ret_home = TAKE_PTR(home);
+
+ return 0;
+}
diff --git a/src/nspawn/nspawn-setuid.h b/src/nspawn/nspawn-setuid.h
new file mode 100644
index 0000000..1924711
--- /dev/null
+++ b/src/nspawn/nspawn-setuid.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int change_uid_gid_raw(uid_t uid, gid_t gid, const gid_t *supplementary_gids, size_t n_supplementary_gids, bool chown_stdio);
+int change_uid_gid(const char *user, bool chown_stdio, char **ret_home);
diff --git a/src/nspawn/nspawn-stub-pid1.c b/src/nspawn/nspawn-stub-pid1.c
new file mode 100644
index 0000000..3cbe4ef
--- /dev/null
+++ b/src/nspawn/nspawn-stub-pid1.c
@@ -0,0 +1,200 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/ioctl.h>
+#include <sys/reboot.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "def.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "log.h"
+#include "nspawn-stub-pid1.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "time-util.h"
+
+static int reset_environ(const char *new_environment, size_t length) {
+ unsigned long start, end;
+
+ start = (unsigned long) new_environment;
+ end = start + length;
+
+ if (prctl(PR_SET_MM, PR_SET_MM_ENV_START, start, 0, 0) < 0)
+ return -errno;
+
+ if (prctl(PR_SET_MM, PR_SET_MM_ENV_END, end, 0, 0) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int stub_pid1(sd_id128_t uuid) {
+ enum {
+ STATE_RUNNING,
+ STATE_REBOOT,
+ STATE_POWEROFF,
+ } state = STATE_RUNNING;
+
+ sigset_t fullmask, oldmask, waitmask;
+ usec_t quit_usec = USEC_INFINITY;
+ pid_t pid;
+ int r;
+
+ /* The new environment we set up, on the stack. */
+ char new_environment[] =
+ "container=systemd-nspawn\0"
+ "container_uuid=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
+
+ /* Implements a stub PID 1, that reaps all processes and processes a couple of standard signals. This is useful
+ * for allowing arbitrary processes run in a container, and still have all zombies reaped. */
+
+ assert_se(sigfillset(&fullmask) >= 0);
+ assert_se(sigprocmask(SIG_BLOCK, &fullmask, &oldmask) >= 0);
+
+ pid = fork();
+ if (pid < 0)
+ return log_error_errno(errno, "Failed to fork child pid: %m");
+
+ if (pid == 0) {
+ /* Return in the child */
+ assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) >= 0);
+
+ if (setsid() < 0)
+ return log_error_errno(errno, "Failed to become session leader in payload process: %m");
+
+ return 0;
+ }
+
+ reset_all_signal_handlers();
+
+ log_close();
+ (void) close_all_fds(NULL, 0);
+ log_open();
+
+ if (ioctl(STDIN_FILENO, TIOCNOTTY) < 0) {
+ if (errno != ENOTTY)
+ log_warning_errno(errno, "Unexpected error from TIOCNOTTY ioctl in init stub process, ignoring: %m");
+ } else
+ log_warning("Expected TIOCNOTTY to fail, but it succeeded in init stub process, ignoring.");
+
+ /* Flush out /proc/self/environ, so that we don't leak the environment from the host into the container. Also,
+ * set $container= and $container_uuid= so that clients in the container that query it from /proc/1/environ
+ * find them set. */
+ sd_id128_to_string(uuid, new_environment + sizeof(new_environment) - SD_ID128_STRING_MAX);
+ reset_environ(new_environment, sizeof(new_environment));
+
+ (void) rename_process("(sd-stubinit)");
+
+ assert_se(sigemptyset(&waitmask) >= 0);
+ assert_se(sigset_add_many(&waitmask,
+ SIGCHLD, /* posix: process died */
+ SIGINT, /* sysv: ctrl-alt-del */
+ SIGRTMIN+3, /* systemd: halt */
+ SIGRTMIN+4, /* systemd: poweroff */
+ SIGRTMIN+5, /* systemd: reboot */
+ SIGRTMIN+6, /* systemd: kexec */
+ SIGRTMIN+13, /* systemd: halt */
+ SIGRTMIN+14, /* systemd: poweroff */
+ SIGRTMIN+15, /* systemd: reboot */
+ SIGRTMIN+16, /* systemd: kexec */
+ -1) >= 0);
+
+ /* Note that we ignore SIGTERM (sysv's reexec), SIGHUP (reload), and all other signals here, since we don't
+ * support reexec/reloading in this stub process. */
+
+ for (;;) {
+ siginfo_t si;
+ usec_t current_usec;
+
+ si.si_pid = 0;
+ r = waitid(P_ALL, 0, &si, WEXITED|WNOHANG);
+ if (r < 0) {
+ r = log_error_errno(errno, "Failed to reap children: %m");
+ goto finish;
+ }
+
+ current_usec = now(CLOCK_MONOTONIC);
+
+ if (si.si_pid == pid || current_usec >= quit_usec) {
+
+ /* The child we started ourselves died or we reached a timeout. */
+
+ if (state == STATE_REBOOT) { /* dispatch a queued reboot */
+ (void) reboot(RB_AUTOBOOT);
+ r = log_error_errno(errno, "Failed to reboot: %m");
+ goto finish;
+
+ } else if (state == STATE_POWEROFF)
+ (void) reboot(RB_POWER_OFF); /* if this fails, fall back to normal exit. */
+
+ if (si.si_pid == pid && si.si_code == CLD_EXITED)
+ r = si.si_status; /* pass on exit code */
+ else
+ r = EXIT_EXCEPTION; /* signal, coredump, timeout, … */
+
+ goto finish;
+ }
+ if (si.si_pid != 0)
+ /* We reaped something. Retry until there's nothing more to reap. */
+ continue;
+
+ if (quit_usec == USEC_INFINITY)
+ r = sigwaitinfo(&waitmask, &si);
+ else {
+ struct timespec ts;
+ r = sigtimedwait(&waitmask, &si, timespec_store(&ts, quit_usec - current_usec));
+ }
+ if (r < 0) {
+ if (errno == EINTR) /* strace -p attach can result in EINTR, let's handle this nicely. */
+ continue;
+ if (errno == EAGAIN) /* timeout reached */
+ continue;
+
+ r = log_error_errno(errno, "Failed to wait for signal: %m");
+ goto finish;
+ }
+
+ if (si.si_signo == SIGCHLD)
+ continue; /* Let's reap this */
+
+ if (state != STATE_RUNNING)
+ continue;
+
+ /* Would love to use a switch() statement here, but SIGRTMIN is actually a function call, not a
+ * constant… */
+
+ if (si.si_signo == SIGRTMIN+3 ||
+ si.si_signo == SIGRTMIN+4 ||
+ si.si_signo == SIGRTMIN+13 ||
+ si.si_signo == SIGRTMIN+14)
+
+ state = STATE_POWEROFF;
+
+ else if (si.si_signo == SIGINT ||
+ si.si_signo == SIGRTMIN+5 ||
+ si.si_signo == SIGRTMIN+6 ||
+ si.si_signo == SIGRTMIN+15 ||
+ si.si_signo == SIGRTMIN+16)
+
+ state = STATE_REBOOT;
+ else
+ assert_not_reached("Got unexpected signal");
+
+ r = kill_and_sigcont(pid, SIGTERM);
+
+ /* Let's send a SIGHUP after the SIGTERM, as shells tend to ignore SIGTERM but do react to SIGHUP. We
+ * do it strictly in this order, so that the SIGTERM is dispatched first, and SIGHUP second for those
+ * processes which handle both. That's because services tend to bind configuration reload or something
+ * else to SIGHUP. */
+
+ if (r != -ESRCH)
+ (void) kill(pid, SIGHUP);
+
+ quit_usec = now(CLOCK_MONOTONIC) + DEFAULT_TIMEOUT_USEC;
+ }
+
+finish:
+ _exit(r < 0 ? EXIT_FAILURE : r);
+}
diff --git a/src/nspawn/nspawn-stub-pid1.h b/src/nspawn/nspawn-stub-pid1.h
new file mode 100644
index 0000000..e0810fe
--- /dev/null
+++ b/src/nspawn/nspawn-stub-pid1.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-id128.h"
+
+int stub_pid1(sd_id128_t uuid);
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
new file mode 100644
index 0000000..7515380
--- /dev/null
+++ b/src/nspawn/nspawn.c
@@ -0,0 +1,5561 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_BLKID
+#endif
+#include <errno.h>
+#include <getopt.h>
+#include <linux/fs.h>
+#include <linux/loop.h>
+#if HAVE_SELINUX
+#include <selinux/selinux.h>
+#endif
+#include <stdlib.h>
+#include <sys/file.h>
+#include <sys/ioctl.h>
+#include <sys/personality.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-daemon.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "barrier.h"
+#include "base-filesystem.h"
+#include "blkid-util.h"
+#include "btrfs-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "cap-list.h"
+#include "capability-util.h"
+#include "cgroup-util.h"
+#include "copy.h"
+#include "cpu-set-util.h"
+#include "dev-setup.h"
+#include "dissect-image.h"
+#include "env-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fdset.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "gpt.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "loop-util.h"
+#include "loopback-setup.h"
+#include "machine-image.h"
+#include "macro.h"
+#include "main-func.h"
+#include "missing_sched.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "namespace-util.h"
+#include "netlink-util.h"
+#include "nspawn-cgroup.h"
+#include "nspawn-creds.h"
+#include "nspawn-def.h"
+#include "nspawn-expose-ports.h"
+#include "nspawn-mount.h"
+#include "nspawn-network.h"
+#include "nspawn-oci.h"
+#include "nspawn-patch-uid.h"
+#include "nspawn-register.h"
+#include "nspawn-seccomp.h"
+#include "nspawn-settings.h"
+#include "nspawn-setuid.h"
+#include "nspawn-stub-pid1.h"
+#include "nulstr-util.h"
+#include "os-util.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "ptyfwd.h"
+#include "random-util.h"
+#include "raw-clone.h"
+#include "resolve-util.h"
+#include "rlimit-util.h"
+#include "rm-rf.h"
+#if HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "sysctl-util.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+#include "unit-name.h"
+#include "user-util.h"
+#include "util.h"
+
+/* The notify socket inside the container it can use to talk to nspawn using the sd_notify(3) protocol */
+#define NSPAWN_NOTIFY_SOCKET_PATH "/run/host/notify"
+
+#define EXIT_FORCE_RESTART 133
+
+typedef enum ContainerStatus {
+ CONTAINER_TERMINATED,
+ CONTAINER_REBOOTED,
+} ContainerStatus;
+
+static char *arg_directory = NULL;
+static char *arg_template = NULL;
+static char *arg_chdir = NULL;
+static char *arg_pivot_root_new = NULL;
+static char *arg_pivot_root_old = NULL;
+static char *arg_user = NULL;
+static uid_t arg_uid = UID_INVALID;
+static gid_t arg_gid = GID_INVALID;
+static gid_t* arg_supplementary_gids = NULL;
+static size_t arg_n_supplementary_gids = 0;
+static sd_id128_t arg_uuid = {};
+static char *arg_machine = NULL; /* The name used by the host to refer to this */
+static char *arg_hostname = NULL; /* The name the payload sees by default */
+static const char *arg_selinux_context = NULL;
+static const char *arg_selinux_apifs_context = NULL;
+static char *arg_slice = NULL;
+static bool arg_private_network = false;
+static bool arg_read_only = false;
+static StartMode arg_start_mode = START_PID1;
+static bool arg_ephemeral = false;
+static LinkJournal arg_link_journal = LINK_AUTO;
+static bool arg_link_journal_try = false;
+static uint64_t arg_caps_retain =
+ (1ULL << CAP_AUDIT_CONTROL) |
+ (1ULL << CAP_AUDIT_WRITE) |
+ (1ULL << CAP_CHOWN) |
+ (1ULL << CAP_DAC_OVERRIDE) |
+ (1ULL << CAP_DAC_READ_SEARCH) |
+ (1ULL << CAP_FOWNER) |
+ (1ULL << CAP_FSETID) |
+ (1ULL << CAP_IPC_OWNER) |
+ (1ULL << CAP_KILL) |
+ (1ULL << CAP_LEASE) |
+ (1ULL << CAP_LINUX_IMMUTABLE) |
+ (1ULL << CAP_MKNOD) |
+ (1ULL << CAP_NET_BIND_SERVICE) |
+ (1ULL << CAP_NET_BROADCAST) |
+ (1ULL << CAP_NET_RAW) |
+ (1ULL << CAP_SETFCAP) |
+ (1ULL << CAP_SETGID) |
+ (1ULL << CAP_SETPCAP) |
+ (1ULL << CAP_SETUID) |
+ (1ULL << CAP_SYS_ADMIN) |
+ (1ULL << CAP_SYS_BOOT) |
+ (1ULL << CAP_SYS_CHROOT) |
+ (1ULL << CAP_SYS_NICE) |
+ (1ULL << CAP_SYS_PTRACE) |
+ (1ULL << CAP_SYS_RESOURCE) |
+ (1ULL << CAP_SYS_TTY_CONFIG);
+static CapabilityQuintet arg_full_capabilities = CAPABILITY_QUINTET_NULL;
+static CustomMount *arg_custom_mounts = NULL;
+static size_t arg_n_custom_mounts = 0;
+static char **arg_setenv = NULL;
+static bool arg_quiet = false;
+static bool arg_register = true;
+static bool arg_keep_unit = false;
+static char **arg_network_interfaces = NULL;
+static char **arg_network_macvlan = NULL;
+static char **arg_network_ipvlan = NULL;
+static bool arg_network_veth = false;
+static char **arg_network_veth_extra = NULL;
+static char *arg_network_bridge = NULL;
+static char *arg_network_zone = NULL;
+static char *arg_network_namespace_path = NULL;
+static PagerFlags arg_pager_flags = 0;
+static unsigned long arg_personality = PERSONALITY_INVALID;
+static char *arg_image = NULL;
+static char *arg_oci_bundle = NULL;
+static VolatileMode arg_volatile_mode = VOLATILE_NO;
+static ExposePort *arg_expose_ports = NULL;
+static char **arg_property = NULL;
+static sd_bus_message *arg_property_message = NULL;
+static UserNamespaceMode arg_userns_mode = USER_NAMESPACE_NO;
+static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U;
+static bool arg_userns_chown = false;
+static int arg_kill_signal = 0;
+static CGroupUnified arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_UNKNOWN;
+static SettingsMask arg_settings_mask = 0;
+static int arg_settings_trusted = -1;
+static char **arg_parameters = NULL;
+static const char *arg_container_service_name = "systemd-nspawn";
+static bool arg_notify_ready = false;
+static bool arg_use_cgns = true;
+static unsigned long arg_clone_ns_flags = CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS;
+static MountSettingsMask arg_mount_settings = MOUNT_APPLY_APIVFS_RO|MOUNT_APPLY_TMPFS_TMP;
+static VeritySettings arg_verity_settings = VERITY_SETTINGS_DEFAULT;
+static char **arg_syscall_allow_list = NULL;
+static char **arg_syscall_deny_list = NULL;
+#if HAVE_SECCOMP
+static scmp_filter_ctx arg_seccomp = NULL;
+#endif
+static struct rlimit *arg_rlimit[_RLIMIT_MAX] = {};
+static bool arg_no_new_privileges = false;
+static int arg_oom_score_adjust = 0;
+static bool arg_oom_score_adjust_set = false;
+static CPUSet arg_cpu_set = {};
+static ResolvConfMode arg_resolv_conf = RESOLV_CONF_AUTO;
+static TimezoneMode arg_timezone = TIMEZONE_AUTO;
+static unsigned arg_console_width = (unsigned) -1, arg_console_height = (unsigned) -1;
+static DeviceNode* arg_extra_nodes = NULL;
+static size_t arg_n_extra_nodes = 0;
+static char **arg_sysctl = NULL;
+static ConsoleMode arg_console_mode = _CONSOLE_MODE_INVALID;
+static Credential *arg_credentials = NULL;
+static size_t arg_n_credentials = 0;
+
+STATIC_DESTRUCTOR_REGISTER(arg_directory, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_template, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_chdir, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_pivot_root_new, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_pivot_root_old, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_user, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_supplementary_gids, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_machine, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_hostname, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_slice, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_setenv, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_network_interfaces, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_network_macvlan, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_network_ipvlan, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_network_veth_extra, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_network_bridge, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_network_zone, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_network_namespace_path, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_image, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_oci_bundle, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_property, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_property_message, sd_bus_message_unrefp);
+STATIC_DESTRUCTOR_REGISTER(arg_parameters, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_verity_settings, verity_settings_done);
+STATIC_DESTRUCTOR_REGISTER(arg_syscall_allow_list, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_syscall_deny_list, strv_freep);
+#if HAVE_SECCOMP
+STATIC_DESTRUCTOR_REGISTER(arg_seccomp, seccomp_releasep);
+#endif
+STATIC_DESTRUCTOR_REGISTER(arg_cpu_set, cpu_set_reset);
+STATIC_DESTRUCTOR_REGISTER(arg_sysctl, strv_freep);
+
+static int handle_arg_console(const char *arg) {
+ if (streq(arg, "help")) {
+ puts("autopipe\n"
+ "interactive\n"
+ "passive\n"
+ "pipe\n"
+ "read-only");
+ return 0;
+ }
+
+ if (streq(arg, "interactive"))
+ arg_console_mode = CONSOLE_INTERACTIVE;
+ else if (streq(arg, "read-only"))
+ arg_console_mode = CONSOLE_READ_ONLY;
+ else if (streq(arg, "passive"))
+ arg_console_mode = CONSOLE_PASSIVE;
+ else if (streq(arg, "pipe")) {
+ if (isatty(STDIN_FILENO) > 0 && isatty(STDOUT_FILENO) > 0)
+ log_full(arg_quiet ? LOG_DEBUG : LOG_NOTICE,
+ "Console mode 'pipe' selected, but standard input/output are connected to an interactive TTY. "
+ "Most likely you want to use 'interactive' console mode for proper interactivity and shell job control. "
+ "Proceeding anyway.");
+
+ arg_console_mode = CONSOLE_PIPE;
+ } else if (streq(arg, "autopipe")) {
+ if (isatty(STDIN_FILENO) > 0 && isatty(STDOUT_FILENO) > 0)
+ arg_console_mode = CONSOLE_INTERACTIVE;
+ else
+ arg_console_mode = CONSOLE_PIPE;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown console mode: %s", optarg);
+
+ arg_settings_mask |= SETTING_CONSOLE_MODE;
+ return 1;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = terminal_urlify_man("systemd-nspawn", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%1$s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
+ "%5$sSpawn a command or OS in a light-weight container.%6$s\n\n"
+ " -h --help Show this help\n"
+ " --version Print version string\n"
+ " -q --quiet Do not show status information\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --settings=BOOLEAN Load additional settings from .nspawn file\n\n"
+ "%3$sImage:%4$s\n"
+ " -D --directory=PATH Root directory for the container\n"
+ " --template=PATH Initialize root directory from template directory,\n"
+ " if missing\n"
+ " -x --ephemeral Run container with snapshot of root directory, and\n"
+ " remove it after exit\n"
+ " -i --image=PATH Root file system disk image (or device node) for\n"
+ " the container\n"
+ " --oci-bundle=PATH OCI bundle directory\n"
+ " --read-only Mount the root directory read-only\n"
+ " --volatile[=MODE] Run the system in volatile mode\n"
+ " --root-hash=HASH Specify verity root hash for root disk image\n"
+ " --root-hash-sig=SIG Specify pkcs7 signature of root hash for verity\n"
+ " as a DER encoded PKCS7, either as a path to a file\n"
+ " or as an ASCII base64 encoded string prefixed by\n"
+ " 'base64:'\n"
+ " --verity-data=PATH Specify hash device for verity\n"
+ " --pivot-root=PATH[:PATH]\n"
+ " Pivot root to given directory in the container\n\n"
+ "%3$sExecution:%4$s\n"
+ " -a --as-pid2 Maintain a stub init as PID1, invoke binary as PID2\n"
+ " -b --boot Boot up full system (i.e. invoke init)\n"
+ " --chdir=PATH Set working directory in the container\n"
+ " -E --setenv=NAME=VALUE Pass an environment variable to PID 1\n"
+ " -u --user=USER Run the command under specified user or UID\n"
+ " --kill-signal=SIGNAL Select signal to use for shutting down PID 1\n"
+ " --notify-ready=BOOLEAN Receive notifications from the child init process\n\n"
+ "%3$sSystem Identity:%4$s\n"
+ " -M --machine=NAME Set the machine name for the container\n"
+ " --hostname=NAME Override the hostname for the container\n"
+ " --uuid=UUID Set a specific machine UUID for the container\n\n"
+ "%3$sProperties:%4$s\n"
+ " -S --slice=SLICE Place the container in the specified slice\n"
+ " --property=NAME=VALUE Set scope unit property\n"
+ " --register=BOOLEAN Register container as machine\n"
+ " --keep-unit Do not register a scope for the machine, reuse\n"
+ " the service unit nspawn is running in\n\n"
+ "%3$sUser Namespacing:%4$s\n"
+ " -U --private-users=pick Run within user namespace, autoselect UID/GID range\n"
+ " --private-users[=UIDBASE[:NUIDS]]\n"
+ " Similar, but with user configured UID/GID range\n"
+ " --private-users-chown Adjust OS tree ownership to private UID/GID range\n\n"
+ "%3$sNetworking:%4$s\n"
+ " --private-network Disable network in container\n"
+ " --network-interface=INTERFACE\n"
+ " Assign an existing network interface to the\n"
+ " container\n"
+ " --network-macvlan=INTERFACE\n"
+ " Create a macvlan network interface based on an\n"
+ " existing network interface to the container\n"
+ " --network-ipvlan=INTERFACE\n"
+ " Create a ipvlan network interface based on an\n"
+ " existing network interface to the container\n"
+ " -n --network-veth Add a virtual Ethernet connection between host\n"
+ " and container\n"
+ " --network-veth-extra=HOSTIF[:CONTAINERIF]\n"
+ " Add an additional virtual Ethernet link between\n"
+ " host and container\n"
+ " --network-bridge=INTERFACE\n"
+ " Add a virtual Ethernet connection to the container\n"
+ " and attach it to an existing bridge on the host\n"
+ " --network-zone=NAME Similar, but attach the new interface to an\n"
+ " an automatically managed bridge interface\n"
+ " --network-namespace-path=PATH\n"
+ " Set network namespace to the one represented by\n"
+ " the specified kernel namespace file node\n"
+ " -p --port=[PROTOCOL:]HOSTPORT[:CONTAINERPORT]\n"
+ " Expose a container IP port on the host\n\n"
+ "%3$sSecurity:%4$s\n"
+ " --capability=CAP In addition to the default, retain specified\n"
+ " capability\n"
+ " --drop-capability=CAP Drop the specified capability from the default set\n"
+ " --no-new-privileges Set PR_SET_NO_NEW_PRIVS flag for container payload\n"
+ " --system-call-filter=LIST|~LIST\n"
+ " Permit/prohibit specific system calls\n"
+ " -Z --selinux-context=SECLABEL\n"
+ " Set the SELinux security context to be used by\n"
+ " processes in the container\n"
+ " -L --selinux-apifs-context=SECLABEL\n"
+ " Set the SELinux security context to be used by\n"
+ " API/tmpfs file systems in the container\n\n"
+ "%3$sResources:%4$s\n"
+ " --rlimit=NAME=LIMIT Set a resource limit for the payload\n"
+ " --oom-score-adjust=VALUE\n"
+ " Adjust the OOM score value for the payload\n"
+ " --cpu-affinity=CPUS Adjust the CPU affinity of the container\n"
+ " --personality=ARCH Pick personality for this container\n\n"
+ "%3$sIntegration:%4$s\n"
+ " --resolv-conf=MODE Select mode of /etc/resolv.conf initialization\n"
+ " --timezone=MODE Select mode of /etc/localtime initialization\n"
+ " --link-journal=MODE Link up guest journal, one of no, auto, guest, \n"
+ " host, try-guest, try-host\n"
+ " -j Equivalent to --link-journal=try-guest\n\n"
+ "%3$sMounts:%4$s\n"
+ " --bind=PATH[:PATH[:OPTIONS]]\n"
+ " Bind mount a file or directory from the host into\n"
+ " the container\n"
+ " --bind-ro=PATH[:PATH[:OPTIONS]\n"
+ " Similar, but creates a read-only bind mount\n"
+ " --inaccessible=PATH Over-mount file node with inaccessible node to mask\n"
+ " it\n"
+ " --tmpfs=PATH:[OPTIONS] Mount an empty tmpfs to the specified directory\n"
+ " --overlay=PATH[:PATH...]:PATH\n"
+ " Create an overlay mount from the host to \n"
+ " the container\n"
+ " --overlay-ro=PATH[:PATH...]:PATH\n"
+ " Similar, but creates a read-only overlay mount\n\n"
+ "%3$sInput/Output:%4$s\n"
+ " --console=MODE Select how stdin/stdout/stderr and /dev/console are\n"
+ " set up for the container.\n"
+ " -P --pipe Equivalent to --console=pipe\n\n"
+ "%3$sCredentials:%4$s\n"
+ " --set-credential=ID:VALUE\n"
+ " Pass a credential with literal value to container.\n"
+ " --load-credential=ID:PATH\n"
+ " Load credential to pass to container from file or\n"
+ " AF_UNIX stream socket.\n"
+ "\nSee the %2$s for details.\n"
+ , program_invocation_short_name
+ , link
+ , ansi_underline(), ansi_normal()
+ , ansi_highlight(), ansi_normal()
+ );
+
+ return 0;
+}
+
+static int custom_mount_check_all(void) {
+ size_t i;
+
+ for (i = 0; i < arg_n_custom_mounts; i++) {
+ CustomMount *m = &arg_custom_mounts[i];
+
+ if (path_equal(m->destination, "/") && arg_userns_mode != USER_NAMESPACE_NO) {
+ if (arg_userns_chown)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--private-users-chown may not be combined with custom root mounts.");
+ else if (arg_uid_shift == UID_INVALID)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--private-users with automatic UID shift may not be combined with custom root mounts.");
+ }
+ }
+
+ return 0;
+}
+
+static int detect_unified_cgroup_hierarchy_from_environment(void) {
+ const char *e, *var = "SYSTEMD_NSPAWN_UNIFIED_HIERARCHY";
+ int r;
+
+ /* Allow the user to control whether the unified hierarchy is used */
+
+ e = getenv(var);
+ if (!e) {
+ /* $UNIFIED_CGROUP_HIERARCHY has been renamed to $SYSTEMD_NSPAWN_UNIFIED_HIERARCHY. */
+ var = "UNIFIED_CGROUP_HIERARCHY";
+ e = getenv(var);
+ }
+
+ if (!isempty(e)) {
+ r = parse_boolean(e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse $%s: %m", var);
+ if (r > 0)
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_ALL;
+ else
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
+ }
+
+ return 0;
+}
+
+static int detect_unified_cgroup_hierarchy_from_image(const char *directory) {
+ int r;
+
+ /* Let's inherit the mode to use from the host system, but let's take into consideration what systemd
+ * in the image actually supports. */
+ r = cg_all_unified();
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether we are in all unified mode.");
+ if (r > 0) {
+ /* Unified cgroup hierarchy support was added in 230. Unfortunately the detection
+ * routine only detects 231, so we'll have a false negative here for 230. */
+ r = systemd_installation_has_version(directory, 230);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine systemd version in container: %m");
+ if (r > 0)
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_ALL;
+ else
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
+ } else if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0) {
+ /* Mixed cgroup hierarchy support was added in 233 */
+ r = systemd_installation_has_version(directory, 233);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine systemd version in container: %m");
+ if (r > 0)
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_SYSTEMD;
+ else
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
+ } else
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
+
+ log_debug("Using %s hierarchy for container.",
+ arg_unified_cgroup_hierarchy == CGROUP_UNIFIED_NONE ? "legacy" :
+ arg_unified_cgroup_hierarchy == CGROUP_UNIFIED_SYSTEMD ? "hybrid" : "unified");
+
+ return 0;
+}
+
+static int parse_capability_spec(const char *spec, uint64_t *ret_mask) {
+ uint64_t mask = 0;
+ int r;
+
+ for (;;) {
+ _cleanup_free_ char *t = NULL;
+
+ r = extract_first_word(&spec, &t, ",", 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse capability %s.", t);
+ if (r == 0)
+ break;
+
+ if (streq(t, "help")) {
+ for (int i = 0; i < capability_list_length(); i++) {
+ const char *name;
+
+ name = capability_to_name(i);
+ if (name)
+ puts(name);
+ }
+
+ return 0; /* quit */
+ }
+
+ if (streq(t, "all"))
+ mask = (uint64_t) -1;
+ else {
+ r = capability_from_name(t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse capability %s.", t);
+
+ mask |= 1ULL << r;
+ }
+ }
+
+ *ret_mask = mask;
+ return 1; /* continue */
+}
+
+static int parse_share_ns_env(const char *name, unsigned long ns_flag) {
+ int r;
+
+ r = getenv_bool(name);
+ if (r == -ENXIO)
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse $%s: %m", name);
+
+ arg_clone_ns_flags = (arg_clone_ns_flags & ~ns_flag) | (r > 0 ? 0 : ns_flag);
+ arg_settings_mask |= SETTING_CLONE_NS_FLAGS;
+ return 0;
+}
+
+static int parse_mount_settings_env(void) {
+ const char *e;
+ int r;
+
+ r = getenv_bool("SYSTEMD_NSPAWN_TMPFS_TMP");
+ if (r < 0 && r != -ENXIO)
+ return log_error_errno(r, "Failed to parse $SYSTEMD_NSPAWN_TMPFS_TMP: %m");
+ if (r >= 0)
+ SET_FLAG(arg_mount_settings, MOUNT_APPLY_TMPFS_TMP, r > 0);
+
+ e = getenv("SYSTEMD_NSPAWN_API_VFS_WRITABLE");
+ if (streq_ptr(e, "network"))
+ arg_mount_settings |= MOUNT_APPLY_APIVFS_RO|MOUNT_APPLY_APIVFS_NETNS;
+
+ else if (e) {
+ r = parse_boolean(e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse $SYSTEMD_NSPAWN_API_VFS_WRITABLE: %m");
+
+ SET_FLAG(arg_mount_settings, MOUNT_APPLY_APIVFS_RO, r == 0);
+ SET_FLAG(arg_mount_settings, MOUNT_APPLY_APIVFS_NETNS, false);
+ }
+
+ return 0;
+}
+
+static int parse_environment(void) {
+ const char *e;
+ int r;
+
+ r = parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_IPC", CLONE_NEWIPC);
+ if (r < 0)
+ return r;
+ r = parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_PID", CLONE_NEWPID);
+ if (r < 0)
+ return r;
+ r = parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_UTS", CLONE_NEWUTS);
+ if (r < 0)
+ return r;
+ r = parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_SYSTEM", CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS);
+ if (r < 0)
+ return r;
+
+ r = parse_mount_settings_env();
+ if (r < 0)
+ return r;
+
+ /* SYSTEMD_NSPAWN_USE_CGNS=0 can be used to disable CLONE_NEWCGROUP use,
+ * even if it is supported. If not supported, it has no effect. */
+ if (!cg_ns_supported())
+ arg_use_cgns = false;
+ else {
+ r = getenv_bool("SYSTEMD_NSPAWN_USE_CGNS");
+ if (r < 0) {
+ if (r != -ENXIO)
+ return log_error_errno(r, "Failed to parse $SYSTEMD_NSPAWN_USE_CGNS: %m");
+
+ arg_use_cgns = true;
+ } else {
+ arg_use_cgns = r > 0;
+ arg_settings_mask |= SETTING_USE_CGNS;
+ }
+ }
+
+ e = getenv("SYSTEMD_NSPAWN_CONTAINER_SERVICE");
+ if (e)
+ arg_container_service_name = e;
+
+ return detect_unified_cgroup_hierarchy_from_environment();
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_PRIVATE_NETWORK,
+ ARG_UUID,
+ ARG_READ_ONLY,
+ ARG_CAPABILITY,
+ ARG_DROP_CAPABILITY,
+ ARG_LINK_JOURNAL,
+ ARG_BIND,
+ ARG_BIND_RO,
+ ARG_TMPFS,
+ ARG_OVERLAY,
+ ARG_OVERLAY_RO,
+ ARG_INACCESSIBLE,
+ ARG_SHARE_SYSTEM,
+ ARG_REGISTER,
+ ARG_KEEP_UNIT,
+ ARG_NETWORK_INTERFACE,
+ ARG_NETWORK_MACVLAN,
+ ARG_NETWORK_IPVLAN,
+ ARG_NETWORK_BRIDGE,
+ ARG_NETWORK_ZONE,
+ ARG_NETWORK_VETH_EXTRA,
+ ARG_NETWORK_NAMESPACE_PATH,
+ ARG_PERSONALITY,
+ ARG_VOLATILE,
+ ARG_TEMPLATE,
+ ARG_PROPERTY,
+ ARG_PRIVATE_USERS,
+ ARG_KILL_SIGNAL,
+ ARG_SETTINGS,
+ ARG_CHDIR,
+ ARG_PIVOT_ROOT,
+ ARG_PRIVATE_USERS_CHOWN,
+ ARG_NOTIFY_READY,
+ ARG_ROOT_HASH,
+ ARG_ROOT_HASH_SIG,
+ ARG_VERITY_DATA,
+ ARG_SYSTEM_CALL_FILTER,
+ ARG_RLIMIT,
+ ARG_HOSTNAME,
+ ARG_NO_NEW_PRIVILEGES,
+ ARG_OOM_SCORE_ADJUST,
+ ARG_CPU_AFFINITY,
+ ARG_RESOLV_CONF,
+ ARG_TIMEZONE,
+ ARG_CONSOLE,
+ ARG_PIPE,
+ ARG_OCI_BUNDLE,
+ ARG_NO_PAGER,
+ ARG_SET_CREDENTIAL,
+ ARG_LOAD_CREDENTIAL,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "directory", required_argument, NULL, 'D' },
+ { "template", required_argument, NULL, ARG_TEMPLATE },
+ { "ephemeral", no_argument, NULL, 'x' },
+ { "user", required_argument, NULL, 'u' },
+ { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK },
+ { "as-pid2", no_argument, NULL, 'a' },
+ { "boot", no_argument, NULL, 'b' },
+ { "uuid", required_argument, NULL, ARG_UUID },
+ { "read-only", no_argument, NULL, ARG_READ_ONLY },
+ { "capability", required_argument, NULL, ARG_CAPABILITY },
+ { "drop-capability", required_argument, NULL, ARG_DROP_CAPABILITY },
+ { "no-new-privileges", required_argument, NULL, ARG_NO_NEW_PRIVILEGES },
+ { "link-journal", required_argument, NULL, ARG_LINK_JOURNAL },
+ { "bind", required_argument, NULL, ARG_BIND },
+ { "bind-ro", required_argument, NULL, ARG_BIND_RO },
+ { "tmpfs", required_argument, NULL, ARG_TMPFS },
+ { "overlay", required_argument, NULL, ARG_OVERLAY },
+ { "overlay-ro", required_argument, NULL, ARG_OVERLAY_RO },
+ { "inaccessible", required_argument, NULL, ARG_INACCESSIBLE },
+ { "machine", required_argument, NULL, 'M' },
+ { "hostname", required_argument, NULL, ARG_HOSTNAME },
+ { "slice", required_argument, NULL, 'S' },
+ { "setenv", required_argument, NULL, 'E' },
+ { "selinux-context", required_argument, NULL, 'Z' },
+ { "selinux-apifs-context", required_argument, NULL, 'L' },
+ { "quiet", no_argument, NULL, 'q' },
+ { "share-system", no_argument, NULL, ARG_SHARE_SYSTEM }, /* not documented */
+ { "register", required_argument, NULL, ARG_REGISTER },
+ { "keep-unit", no_argument, NULL, ARG_KEEP_UNIT },
+ { "network-interface", required_argument, NULL, ARG_NETWORK_INTERFACE },
+ { "network-macvlan", required_argument, NULL, ARG_NETWORK_MACVLAN },
+ { "network-ipvlan", required_argument, NULL, ARG_NETWORK_IPVLAN },
+ { "network-veth", no_argument, NULL, 'n' },
+ { "network-veth-extra", required_argument, NULL, ARG_NETWORK_VETH_EXTRA },
+ { "network-bridge", required_argument, NULL, ARG_NETWORK_BRIDGE },
+ { "network-zone", required_argument, NULL, ARG_NETWORK_ZONE },
+ { "network-namespace-path", required_argument, NULL, ARG_NETWORK_NAMESPACE_PATH },
+ { "personality", required_argument, NULL, ARG_PERSONALITY },
+ { "image", required_argument, NULL, 'i' },
+ { "volatile", optional_argument, NULL, ARG_VOLATILE },
+ { "port", required_argument, NULL, 'p' },
+ { "property", required_argument, NULL, ARG_PROPERTY },
+ { "private-users", optional_argument, NULL, ARG_PRIVATE_USERS },
+ { "private-users-chown", optional_argument, NULL, ARG_PRIVATE_USERS_CHOWN },
+ { "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL },
+ { "settings", required_argument, NULL, ARG_SETTINGS },
+ { "chdir", required_argument, NULL, ARG_CHDIR },
+ { "pivot-root", required_argument, NULL, ARG_PIVOT_ROOT },
+ { "notify-ready", required_argument, NULL, ARG_NOTIFY_READY },
+ { "root-hash", required_argument, NULL, ARG_ROOT_HASH },
+ { "root-hash-sig", required_argument, NULL, ARG_ROOT_HASH_SIG },
+ { "verity-data", required_argument, NULL, ARG_VERITY_DATA },
+ { "system-call-filter", required_argument, NULL, ARG_SYSTEM_CALL_FILTER },
+ { "rlimit", required_argument, NULL, ARG_RLIMIT },
+ { "oom-score-adjust", required_argument, NULL, ARG_OOM_SCORE_ADJUST },
+ { "cpu-affinity", required_argument, NULL, ARG_CPU_AFFINITY },
+ { "resolv-conf", required_argument, NULL, ARG_RESOLV_CONF },
+ { "timezone", required_argument, NULL, ARG_TIMEZONE },
+ { "console", required_argument, NULL, ARG_CONSOLE },
+ { "pipe", no_argument, NULL, ARG_PIPE },
+ { "oci-bundle", required_argument, NULL, ARG_OCI_BUNDLE },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "set-credential", required_argument, NULL, ARG_SET_CREDENTIAL },
+ { "load-credential", required_argument, NULL, ARG_LOAD_CREDENTIAL },
+ {}
+ };
+
+ int c, r;
+ uint64_t plus = 0, minus = 0;
+ bool mask_all_settings = false, mask_no_settings = false;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "+hD:u:abL:M:jS:Z:qi:xp:nUE:P", options, NULL)) >= 0)
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'D':
+ r = parse_path_argument_and_warn(optarg, false, &arg_directory);
+ if (r < 0)
+ return r;
+
+ arg_settings_mask |= SETTING_DIRECTORY;
+ break;
+
+ case ARG_TEMPLATE:
+ r = parse_path_argument_and_warn(optarg, false, &arg_template);
+ if (r < 0)
+ return r;
+
+ arg_settings_mask |= SETTING_DIRECTORY;
+ break;
+
+ case 'i':
+ r = parse_path_argument_and_warn(optarg, false, &arg_image);
+ if (r < 0)
+ return r;
+
+ arg_settings_mask |= SETTING_DIRECTORY;
+ break;
+
+ case ARG_OCI_BUNDLE:
+ r = parse_path_argument_and_warn(optarg, false, &arg_oci_bundle);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case 'x':
+ arg_ephemeral = true;
+ arg_settings_mask |= SETTING_EPHEMERAL;
+ break;
+
+ case 'u':
+ r = free_and_strdup(&arg_user, optarg);
+ if (r < 0)
+ return log_oom();
+
+ arg_settings_mask |= SETTING_USER;
+ break;
+
+ case ARG_NETWORK_ZONE: {
+ char *j;
+
+ j = strjoin("vz-", optarg);
+ if (!j)
+ return log_oom();
+
+ if (!ifname_valid(j)) {
+ log_error("Network zone name not valid: %s", j);
+ free(j);
+ return -EINVAL;
+ }
+
+ free_and_replace(arg_network_zone, j);
+
+ arg_network_veth = true;
+ arg_private_network = true;
+ arg_settings_mask |= SETTING_NETWORK;
+ break;
+ }
+
+ case ARG_NETWORK_BRIDGE:
+
+ if (!ifname_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Bridge interface name not valid: %s", optarg);
+
+ r = free_and_strdup(&arg_network_bridge, optarg);
+ if (r < 0)
+ return log_oom();
+
+ _fallthrough_;
+ case 'n':
+ arg_network_veth = true;
+ arg_private_network = true;
+ arg_settings_mask |= SETTING_NETWORK;
+ break;
+
+ case ARG_NETWORK_VETH_EXTRA:
+ r = veth_extra_parse(&arg_network_veth_extra, optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --network-veth-extra= parameter: %s", optarg);
+
+ arg_private_network = true;
+ arg_settings_mask |= SETTING_NETWORK;
+ break;
+
+ case ARG_NETWORK_INTERFACE:
+ if (!ifname_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Network interface name not valid: %s", optarg);
+
+ r = test_network_interface_initialized(optarg);
+ if (r < 0)
+ return r;
+
+ if (strv_extend(&arg_network_interfaces, optarg) < 0)
+ return log_oom();
+
+ arg_private_network = true;
+ arg_settings_mask |= SETTING_NETWORK;
+ break;
+
+ case ARG_NETWORK_MACVLAN:
+
+ if (!ifname_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "MACVLAN network interface name not valid: %s", optarg);
+
+ r = test_network_interface_initialized(optarg);
+ if (r < 0)
+ return r;
+
+ if (strv_extend(&arg_network_macvlan, optarg) < 0)
+ return log_oom();
+
+ arg_private_network = true;
+ arg_settings_mask |= SETTING_NETWORK;
+ break;
+
+ case ARG_NETWORK_IPVLAN:
+
+ if (!ifname_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "IPVLAN network interface name not valid: %s", optarg);
+
+ r = test_network_interface_initialized(optarg);
+ if (r < 0)
+ return r;
+
+ if (strv_extend(&arg_network_ipvlan, optarg) < 0)
+ return log_oom();
+
+ _fallthrough_;
+ case ARG_PRIVATE_NETWORK:
+ arg_private_network = true;
+ arg_settings_mask |= SETTING_NETWORK;
+ break;
+
+ case ARG_NETWORK_NAMESPACE_PATH:
+ r = parse_path_argument_and_warn(optarg, false, &arg_network_namespace_path);
+ if (r < 0)
+ return r;
+
+ arg_settings_mask |= SETTING_NETWORK;
+ break;
+
+ case 'b':
+ if (arg_start_mode == START_PID2)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--boot and --as-pid2 may not be combined.");
+
+ arg_start_mode = START_BOOT;
+ arg_settings_mask |= SETTING_START_MODE;
+ break;
+
+ case 'a':
+ if (arg_start_mode == START_BOOT)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--boot and --as-pid2 may not be combined.");
+
+ arg_start_mode = START_PID2;
+ arg_settings_mask |= SETTING_START_MODE;
+ break;
+
+ case ARG_UUID:
+ r = sd_id128_from_string(optarg, &arg_uuid);
+ if (r < 0)
+ return log_error_errno(r, "Invalid UUID: %s", optarg);
+
+ if (sd_id128_is_null(arg_uuid))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Machine UUID may not be all zeroes.");
+
+ arg_settings_mask |= SETTING_MACHINE_ID;
+ break;
+
+ case 'S': {
+ _cleanup_free_ char *mangled = NULL;
+
+ r = unit_name_mangle_with_suffix(optarg, NULL, UNIT_NAME_MANGLE_WARN, ".slice", &mangled);
+ if (r < 0)
+ return log_oom();
+
+ free_and_replace(arg_slice, mangled);
+ arg_settings_mask |= SETTING_SLICE;
+ break;
+ }
+
+ case 'M':
+ if (isempty(optarg))
+ arg_machine = mfree(arg_machine);
+ else {
+ if (!machine_name_is_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid machine name: %s", optarg);
+
+ r = free_and_strdup(&arg_machine, optarg);
+ if (r < 0)
+ return log_oom();
+ }
+ break;
+
+ case ARG_HOSTNAME:
+ if (isempty(optarg))
+ arg_hostname = mfree(arg_hostname);
+ else {
+ if (!hostname_is_valid(optarg, false))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid hostname: %s", optarg);
+
+ r = free_and_strdup(&arg_hostname, optarg);
+ if (r < 0)
+ return log_oom();
+ }
+
+ arg_settings_mask |= SETTING_HOSTNAME;
+ break;
+
+ case 'Z':
+ arg_selinux_context = optarg;
+ break;
+
+ case 'L':
+ arg_selinux_apifs_context = optarg;
+ break;
+
+ case ARG_READ_ONLY:
+ arg_read_only = true;
+ arg_settings_mask |= SETTING_READ_ONLY;
+ break;
+
+ case ARG_CAPABILITY:
+ case ARG_DROP_CAPABILITY: {
+ uint64_t m;
+ r = parse_capability_spec(optarg, &m);
+ if (r <= 0)
+ return r;
+
+ if (c == ARG_CAPABILITY)
+ plus |= m;
+ else
+ minus |= m;
+ arg_settings_mask |= SETTING_CAPABILITY;
+ break;
+ }
+ case ARG_NO_NEW_PRIVILEGES:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --no-new-privileges= argument: %s", optarg);
+
+ arg_no_new_privileges = r;
+ arg_settings_mask |= SETTING_NO_NEW_PRIVILEGES;
+ break;
+
+ case 'j':
+ arg_link_journal = LINK_GUEST;
+ arg_link_journal_try = true;
+ arg_settings_mask |= SETTING_LINK_JOURNAL;
+ break;
+
+ case ARG_LINK_JOURNAL:
+ r = parse_link_journal(optarg, &arg_link_journal, &arg_link_journal_try);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse link journal mode %s", optarg);
+
+ arg_settings_mask |= SETTING_LINK_JOURNAL;
+ break;
+
+ case ARG_BIND:
+ case ARG_BIND_RO:
+ r = bind_mount_parse(&arg_custom_mounts, &arg_n_custom_mounts, optarg, c == ARG_BIND_RO);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --bind(-ro)= argument %s: %m", optarg);
+
+ arg_settings_mask |= SETTING_CUSTOM_MOUNTS;
+ break;
+
+ case ARG_TMPFS:
+ r = tmpfs_mount_parse(&arg_custom_mounts, &arg_n_custom_mounts, optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --tmpfs= argument %s: %m", optarg);
+
+ arg_settings_mask |= SETTING_CUSTOM_MOUNTS;
+ break;
+
+ case ARG_OVERLAY:
+ case ARG_OVERLAY_RO:
+ r = overlay_mount_parse(&arg_custom_mounts, &arg_n_custom_mounts, optarg, c == ARG_OVERLAY_RO);
+ if (r == -EADDRNOTAVAIL)
+ return log_error_errno(r, "--overlay(-ro)= needs at least two colon-separated directories specified.");
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --overlay(-ro)= argument %s: %m", optarg);
+
+ arg_settings_mask |= SETTING_CUSTOM_MOUNTS;
+ break;
+
+ case ARG_INACCESSIBLE:
+ r = inaccessible_mount_parse(&arg_custom_mounts, &arg_n_custom_mounts, optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --inaccessible= argument %s: %m", optarg);
+
+ arg_settings_mask |= SETTING_CUSTOM_MOUNTS;
+ break;
+
+ case 'E': {
+ char **n;
+
+ if (!env_assignment_is_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Environment variable assignment '%s' is not valid.", optarg);
+
+ n = strv_env_set(arg_setenv, optarg);
+ if (!n)
+ return log_oom();
+
+ strv_free_and_replace(arg_setenv, n);
+ arg_settings_mask |= SETTING_ENVIRONMENT;
+ break;
+ }
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case ARG_SHARE_SYSTEM:
+ /* We don't officially support this anymore, except for compat reasons. People should use the
+ * $SYSTEMD_NSPAWN_SHARE_* environment variables instead. */
+ log_warning("Please do not use --share-system anymore, use $SYSTEMD_NSPAWN_SHARE_* instead.");
+ arg_clone_ns_flags = 0;
+ break;
+
+ case ARG_REGISTER:
+ r = parse_boolean(optarg);
+ if (r < 0) {
+ log_error("Failed to parse --register= argument: %s", optarg);
+ return r;
+ }
+
+ arg_register = r;
+ break;
+
+ case ARG_KEEP_UNIT:
+ arg_keep_unit = true;
+ break;
+
+ case ARG_PERSONALITY:
+
+ arg_personality = personality_from_string(optarg);
+ if (arg_personality == PERSONALITY_INVALID)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown or unsupported personality '%s'.", optarg);
+
+ arg_settings_mask |= SETTING_PERSONALITY;
+ break;
+
+ case ARG_VOLATILE:
+
+ if (!optarg)
+ arg_volatile_mode = VOLATILE_YES;
+ else if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(volatile_mode, VolatileMode, _VOLATILE_MODE_MAX);
+ return 0;
+ } else {
+ VolatileMode m;
+
+ m = volatile_mode_from_string(optarg);
+ if (m < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse --volatile= argument: %s", optarg);
+ else
+ arg_volatile_mode = m;
+ }
+
+ arg_settings_mask |= SETTING_VOLATILE_MODE;
+ break;
+
+ case 'p':
+ r = expose_port_parse(&arg_expose_ports, optarg);
+ if (r == -EEXIST)
+ return log_error_errno(r, "Duplicate port specification: %s", optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse host port %s: %m", optarg);
+
+ arg_settings_mask |= SETTING_EXPOSE_PORTS;
+ break;
+
+ case ARG_PROPERTY:
+ if (strv_extend(&arg_property, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_PRIVATE_USERS: {
+ int boolean = -1;
+
+ if (!optarg)
+ boolean = true;
+ else if (!in_charset(optarg, DIGITS))
+ /* do *not* parse numbers as booleans */
+ boolean = parse_boolean(optarg);
+
+ if (boolean == false) {
+ /* no: User namespacing off */
+ arg_userns_mode = USER_NAMESPACE_NO;
+ arg_uid_shift = UID_INVALID;
+ arg_uid_range = UINT32_C(0x10000);
+ } else if (boolean == true) {
+ /* yes: User namespacing on, UID range is read from root dir */
+ arg_userns_mode = USER_NAMESPACE_FIXED;
+ arg_uid_shift = UID_INVALID;
+ arg_uid_range = UINT32_C(0x10000);
+ } else if (streq(optarg, "pick")) {
+ /* pick: User namespacing on, UID range is picked randomly */
+ arg_userns_mode = USER_NAMESPACE_PICK;
+ arg_uid_shift = UID_INVALID;
+ arg_uid_range = UINT32_C(0x10000);
+ } else {
+ _cleanup_free_ char *buffer = NULL;
+ const char *range, *shift;
+
+ /* anything else: User namespacing on, UID range is explicitly configured */
+
+ range = strchr(optarg, ':');
+ if (range) {
+ buffer = strndup(optarg, range - optarg);
+ if (!buffer)
+ return log_oom();
+ shift = buffer;
+
+ range++;
+ r = safe_atou32(range, &arg_uid_range);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse UID range \"%s\": %m", range);
+ } else
+ shift = optarg;
+
+ r = parse_uid(shift, &arg_uid_shift);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse UID \"%s\": %m", optarg);
+
+ arg_userns_mode = USER_NAMESPACE_FIXED;
+ }
+
+ if (arg_uid_range <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "UID range cannot be 0.");
+
+ arg_settings_mask |= SETTING_USERNS;
+ break;
+ }
+
+ case 'U':
+ if (userns_supported()) {
+ arg_userns_mode = USER_NAMESPACE_PICK;
+ arg_uid_shift = UID_INVALID;
+ arg_uid_range = UINT32_C(0x10000);
+
+ arg_settings_mask |= SETTING_USERNS;
+ }
+
+ break;
+
+ case ARG_PRIVATE_USERS_CHOWN:
+ arg_userns_chown = true;
+
+ arg_settings_mask |= SETTING_USERNS;
+ break;
+
+ case ARG_KILL_SIGNAL:
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(signal, int, _NSIG);
+ return 0;
+ }
+
+ arg_kill_signal = signal_from_string(optarg);
+ if (arg_kill_signal < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot parse signal: %s", optarg);
+
+ arg_settings_mask |= SETTING_KILL_SIGNAL;
+ break;
+
+ case ARG_SETTINGS:
+
+ /* no → do not read files
+ * yes → read files, do not override cmdline, trust only subset
+ * override → read files, override cmdline, trust only subset
+ * trusted → read files, do not override cmdline, trust all
+ */
+
+ r = parse_boolean(optarg);
+ if (r < 0) {
+ if (streq(optarg, "trusted")) {
+ mask_all_settings = false;
+ mask_no_settings = false;
+ arg_settings_trusted = true;
+
+ } else if (streq(optarg, "override")) {
+ mask_all_settings = false;
+ mask_no_settings = true;
+ arg_settings_trusted = -1;
+ } else
+ return log_error_errno(r, "Failed to parse --settings= argument: %s", optarg);
+ } else if (r > 0) {
+ /* yes */
+ mask_all_settings = false;
+ mask_no_settings = false;
+ arg_settings_trusted = -1;
+ } else {
+ /* no */
+ mask_all_settings = true;
+ mask_no_settings = false;
+ arg_settings_trusted = false;
+ }
+
+ break;
+
+ case ARG_CHDIR:
+ if (!path_is_absolute(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Working directory %s is not an absolute path.", optarg);
+
+ r = free_and_strdup(&arg_chdir, optarg);
+ if (r < 0)
+ return log_oom();
+
+ arg_settings_mask |= SETTING_WORKING_DIRECTORY;
+ break;
+
+ case ARG_PIVOT_ROOT:
+ r = pivot_root_parse(&arg_pivot_root_new, &arg_pivot_root_old, optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --pivot-root= argument %s: %m", optarg);
+
+ arg_settings_mask |= SETTING_PIVOT_ROOT;
+ break;
+
+ case ARG_NOTIFY_READY:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s is not a valid notify mode. Valid modes are: yes, no, and ready.", optarg);
+ arg_notify_ready = r;
+ arg_settings_mask |= SETTING_NOTIFY_READY;
+ break;
+
+ case ARG_ROOT_HASH: {
+ _cleanup_free_ void *k = NULL;
+ size_t l;
+
+ r = unhexmem(optarg, strlen(optarg), &k, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse root hash: %s", optarg);
+ if (l < sizeof(sd_id128_t))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Root hash must be at least 128bit long: %s", optarg);
+
+ free_and_replace(arg_verity_settings.root_hash, k);
+ arg_verity_settings.root_hash_size = l;
+ break;
+ }
+
+ case ARG_ROOT_HASH_SIG: {
+ char *value;
+ size_t l;
+ void *p;
+
+ if ((value = startswith(optarg, "base64:"))) {
+ r = unbase64mem(value, strlen(value), &p, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse root hash signature '%s': %m", optarg);
+
+ } else {
+ r = read_full_file(optarg, (char**) &p, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed parse root hash signature file '%s': %m", optarg);
+ }
+
+ free_and_replace(arg_verity_settings.root_hash_sig, p);
+ arg_verity_settings.root_hash_sig_size = l;
+ break;
+ }
+
+ case ARG_VERITY_DATA:
+ r = parse_path_argument_and_warn(optarg, false, &arg_verity_settings.data_path);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_SYSTEM_CALL_FILTER: {
+ bool negative;
+ const char *items;
+
+ negative = optarg[0] == '~';
+ items = negative ? optarg + 1 : optarg;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&items, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse system call filter: %m");
+
+ if (negative)
+ r = strv_extend(&arg_syscall_deny_list, word);
+ else
+ r = strv_extend(&arg_syscall_allow_list, word);
+ if (r < 0)
+ return log_oom();
+ }
+
+ arg_settings_mask |= SETTING_SYSCALL_FILTER;
+ break;
+ }
+
+ case ARG_RLIMIT: {
+ const char *eq;
+ _cleanup_free_ char *name = NULL;
+ int rl;
+
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(rlimit, int, _RLIMIT_MAX);
+ return 0;
+ }
+
+ eq = strchr(optarg, '=');
+ if (!eq)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--rlimit= expects an '=' assignment.");
+
+ name = strndup(optarg, eq - optarg);
+ if (!name)
+ return log_oom();
+
+ rl = rlimit_from_string_harder(name);
+ if (rl < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown resource limit: %s", name);
+
+ if (!arg_rlimit[rl]) {
+ arg_rlimit[rl] = new0(struct rlimit, 1);
+ if (!arg_rlimit[rl])
+ return log_oom();
+ }
+
+ r = rlimit_parse(rl, eq + 1, arg_rlimit[rl]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse resource limit: %s", eq + 1);
+
+ arg_settings_mask |= SETTING_RLIMIT_FIRST << rl;
+ break;
+ }
+
+ case ARG_OOM_SCORE_ADJUST:
+ r = parse_oom_score_adjust(optarg, &arg_oom_score_adjust);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --oom-score-adjust= parameter: %s", optarg);
+
+ arg_oom_score_adjust_set = true;
+ arg_settings_mask |= SETTING_OOM_SCORE_ADJUST;
+ break;
+
+ case ARG_CPU_AFFINITY: {
+ CPUSet cpuset;
+
+ r = parse_cpu_set(optarg, &cpuset);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse CPU affinity mask %s: %m", optarg);
+
+ cpu_set_reset(&arg_cpu_set);
+ arg_cpu_set = cpuset;
+ arg_settings_mask |= SETTING_CPU_AFFINITY;
+ break;
+ }
+
+ case ARG_RESOLV_CONF:
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(resolv_conf_mode, ResolvConfMode, _RESOLV_CONF_MODE_MAX);
+ return 0;
+ }
+
+ arg_resolv_conf = resolv_conf_mode_from_string(optarg);
+ if (arg_resolv_conf < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse /etc/resolv.conf mode: %s", optarg);
+
+ arg_settings_mask |= SETTING_RESOLV_CONF;
+ break;
+
+ case ARG_TIMEZONE:
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(timezone_mode, TimezoneMode, _TIMEZONE_MODE_MAX);
+ return 0;
+ }
+
+ arg_timezone = timezone_mode_from_string(optarg);
+ if (arg_timezone < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse /etc/localtime mode: %s", optarg);
+
+ arg_settings_mask |= SETTING_TIMEZONE;
+ break;
+
+ case ARG_CONSOLE:
+ r = handle_arg_console(optarg);
+ if (r <= 0)
+ return r;
+ break;
+
+ case 'P':
+ case ARG_PIPE:
+ r = handle_arg_console("pipe");
+ if (r <= 0)
+ return r;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_SET_CREDENTIAL: {
+ _cleanup_free_ char *word = NULL, *data = NULL;
+ const char *p = optarg;
+ Credential *a;
+ size_t i;
+ int l;
+
+ r = extract_first_word(&p, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --set-credential= parameter: %m");
+ if (r == 0 || !p)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Missing value for --set-credential=: %s", optarg);
+
+ if (!credential_name_valid(word))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Credential name is not valid: %s", word);
+
+ for (i = 0; i < arg_n_credentials; i++)
+ if (streq(arg_credentials[i].id, word))
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Duplicate credential '%s', refusing.", word);
+
+ l = cunescape(p, UNESCAPE_ACCEPT_NUL, &data);
+ if (l < 0)
+ return log_error_errno(l, "Failed to unescape credential data: %s", p);
+
+ a = reallocarray(arg_credentials, arg_n_credentials + 1, sizeof(Credential));
+ if (!a)
+ return log_oom();
+
+ a[arg_n_credentials++] = (Credential) {
+ .id = TAKE_PTR(word),
+ .data = TAKE_PTR(data),
+ .size = l,
+ };
+
+ arg_credentials = a;
+
+ arg_settings_mask |= SETTING_CREDENTIALS;
+ break;
+ }
+
+ case ARG_LOAD_CREDENTIAL: {
+ ReadFullFileFlags flags = READ_FULL_FILE_SECURE;
+ _cleanup_(erase_and_freep) char *data = NULL;
+ _cleanup_free_ char *word = NULL, *j = NULL;
+ const char *p = optarg;
+ Credential *a;
+ size_t size, i;
+
+ r = extract_first_word(&p, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --set-credential= parameter: %m");
+ if (r == 0 || !p)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Missing value for --set-credential=: %s", optarg);
+
+ if (!credential_name_valid(word))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Credential name is not valid: %s", word);
+
+ for (i = 0; i < arg_n_credentials; i++)
+ if (streq(arg_credentials[i].id, word))
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Duplicate credential '%s', refusing.", word);
+
+ if (path_is_absolute(p))
+ flags |= READ_FULL_FILE_CONNECT_SOCKET;
+ else {
+ const char *e;
+
+ e = getenv("CREDENTIALS_DIRECTORY");
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Credential not available (no credentials passed at all): %s", word);
+
+ j = path_join(e, p);
+ if (!j)
+ return log_oom();
+ }
+
+ r = read_full_file_full(AT_FDCWD, j ?: p, flags, NULL, &data, &size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read credential '%s': %m", j ?: p);
+
+ a = reallocarray(arg_credentials, arg_n_credentials + 1, sizeof(Credential));
+ if (!a)
+ return log_oom();
+
+ a[arg_n_credentials++] = (Credential) {
+ .id = TAKE_PTR(word),
+ .data = TAKE_PTR(data),
+ .size = size,
+ };
+
+ arg_credentials = a;
+
+ arg_settings_mask |= SETTING_CREDENTIALS;
+ break;
+ }
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (argc > optind) {
+ strv_free(arg_parameters);
+ arg_parameters = strv_copy(argv + optind);
+ if (!arg_parameters)
+ return log_oom();
+
+ arg_settings_mask |= SETTING_START_MODE;
+ }
+
+ if (arg_ephemeral && arg_template && !arg_directory)
+ /* User asked for ephemeral execution but specified --template= instead of --directory=. Semantically
+ * such an invocation makes some sense, see https://github.com/systemd/systemd/issues/3667. Let's
+ * accept this here, and silently make "--ephemeral --template=" equivalent to "--ephemeral
+ * --directory=". */
+ arg_directory = TAKE_PTR(arg_template);
+
+ arg_caps_retain = (arg_caps_retain | plus | (arg_private_network ? UINT64_C(1) << CAP_NET_ADMIN : 0)) & ~minus;
+
+ /* Make sure to parse environment before we reset the settings mask below */
+ r = parse_environment();
+ if (r < 0)
+ return r;
+
+ /* Load all settings from .nspawn files */
+ if (mask_no_settings)
+ arg_settings_mask = 0;
+
+ /* Don't load any settings from .nspawn files */
+ if (mask_all_settings)
+ arg_settings_mask = _SETTINGS_MASK_ALL;
+
+ return 1;
+}
+
+static int verify_arguments(void) {
+ int r;
+
+ if (arg_start_mode == START_PID2 && arg_unified_cgroup_hierarchy == CGROUP_UNIFIED_UNKNOWN) {
+ /* If we are running the stub init in the container, we don't need to look at what the init
+ * in the container supports, because we are not using it. Let's immediately pick the right
+ * setting based on the host system configuration.
+ *
+ * We only do this, if the user didn't use an environment variable to override the detection.
+ */
+
+ r = cg_all_unified();
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether we are in all unified mode.");
+ if (r > 0)
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_ALL;
+ else if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0)
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_SYSTEMD;
+ else
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
+ }
+
+ if (arg_userns_mode != USER_NAMESPACE_NO)
+ arg_mount_settings |= MOUNT_USE_USERNS;
+
+ if (arg_private_network)
+ arg_mount_settings |= MOUNT_APPLY_APIVFS_NETNS;
+
+ if (!(arg_clone_ns_flags & CLONE_NEWPID) ||
+ !(arg_clone_ns_flags & CLONE_NEWUTS)) {
+ arg_register = false;
+ if (arg_start_mode != START_PID1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--boot cannot be used without namespacing.");
+ }
+
+ if (arg_userns_mode == USER_NAMESPACE_PICK)
+ arg_userns_chown = true;
+
+ if (arg_start_mode == START_BOOT && arg_kill_signal <= 0)
+ arg_kill_signal = SIGRTMIN+3;
+
+ if (arg_volatile_mode != VOLATILE_NO) /* Make sure all file systems contained in the image are mounted read-only if we are in volatile mode */
+ arg_read_only = true;
+
+ if (has_custom_root_mount(arg_custom_mounts, arg_n_custom_mounts))
+ arg_read_only = true;
+
+ if (arg_keep_unit && arg_register && cg_pid_get_owner_uid(0, NULL) >= 0)
+ /* Save the user from accidentally registering either user-$SESSION.scope or user@.service.
+ * The latter is not technically a user session, but we don't need to labour the point. */
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--keep-unit --register=yes may not be used when invoked from a user session.");
+
+ if (arg_directory && arg_image)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--directory= and --image= may not be combined.");
+
+ if (arg_template && arg_image)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--template= and --image= may not be combined.");
+
+ if (arg_template && !(arg_directory || arg_machine))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--template= needs --directory= or --machine=.");
+
+ if (arg_ephemeral && arg_template)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--ephemeral and --template= may not be combined.");
+
+ if (arg_ephemeral && !IN_SET(arg_link_journal, LINK_NO, LINK_AUTO))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--ephemeral and --link-journal= may not be combined.");
+
+ if (arg_userns_mode != USER_NAMESPACE_NO && !userns_supported())
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "--private-users= is not supported, kernel compiled without user namespace support.");
+
+ if (arg_userns_chown && arg_read_only)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--read-only and --private-users-chown may not be combined.");
+
+ /* We don't support --private-users-chown together with any of the volatile modes since we couldn't
+ * change the read-only part of the tree (i.e. /usr) anyway, or because it would trigger a massive
+ * copy-up (in case of overlay) making the entire exercise pointless. */
+ if (arg_userns_chown && arg_volatile_mode != VOLATILE_NO)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--volatile= and --private-users-chown may not be combined.");
+
+ /* If --network-namespace-path is given with any other network-related option (except --private-network),
+ * we need to error out, to avoid conflicts between different network options. */
+ if (arg_network_namespace_path &&
+ (arg_network_interfaces || arg_network_macvlan ||
+ arg_network_ipvlan || arg_network_veth_extra ||
+ arg_network_bridge || arg_network_zone ||
+ arg_network_veth))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--network-namespace-path= cannot be combined with other network options.");
+
+ if (arg_network_bridge && arg_network_zone)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--network-bridge= and --network-zone= may not be combined.");
+
+ if (arg_userns_mode != USER_NAMESPACE_NO && (arg_mount_settings & MOUNT_APPLY_APIVFS_NETNS) && !arg_private_network)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid namespacing settings. Mounting sysfs with --private-users requires --private-network.");
+
+ if (arg_userns_mode != USER_NAMESPACE_NO && !(arg_mount_settings & MOUNT_APPLY_APIVFS_RO))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot combine --private-users with read-write mounts.");
+
+ if (arg_expose_ports && !arg_private_network)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot use --port= without private networking.");
+
+#if ! HAVE_LIBIPTC
+ if (arg_expose_ports)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "--port= is not supported, compiled without libiptc support.");
+#endif
+
+ r = custom_mount_check_all();
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int userns_lchown(const char *p, uid_t uid, gid_t gid) {
+ assert(p);
+
+ if (arg_userns_mode == USER_NAMESPACE_NO)
+ return 0;
+
+ if (uid == UID_INVALID && gid == GID_INVALID)
+ return 0;
+
+ if (uid != UID_INVALID) {
+ uid += arg_uid_shift;
+
+ if (uid < arg_uid_shift || uid >= arg_uid_shift + arg_uid_range)
+ return -EOVERFLOW;
+ }
+
+ if (gid != GID_INVALID) {
+ gid += (gid_t) arg_uid_shift;
+
+ if (gid < (gid_t) arg_uid_shift || gid >= (gid_t) (arg_uid_shift + arg_uid_range))
+ return -EOVERFLOW;
+ }
+
+ if (lchown(p, uid, gid) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int userns_mkdir(const char *root, const char *path, mode_t mode, uid_t uid, gid_t gid) {
+ const char *q;
+ int r;
+
+ q = prefix_roota(root, path);
+ r = mkdir_errno_wrapper(q, mode);
+ if (r == -EEXIST)
+ return 0;
+ if (r < 0)
+ return r;
+
+ return userns_lchown(q, uid, gid);
+}
+
+static const char *timezone_from_path(const char *path) {
+ return PATH_STARTSWITH_SET(
+ path,
+ "../usr/share/zoneinfo/",
+ "/usr/share/zoneinfo/");
+}
+
+static bool etc_writable(void) {
+ return !arg_read_only || IN_SET(arg_volatile_mode, VOLATILE_YES, VOLATILE_OVERLAY);
+}
+
+static int setup_timezone(const char *dest) {
+ _cleanup_free_ char *p = NULL, *etc = NULL;
+ const char *where, *check;
+ TimezoneMode m;
+ int r;
+
+ assert(dest);
+
+ if (IN_SET(arg_timezone, TIMEZONE_AUTO, TIMEZONE_SYMLINK)) {
+ r = readlink_malloc("/etc/localtime", &p);
+ if (r == -ENOENT && arg_timezone == TIMEZONE_AUTO)
+ m = etc_writable() ? TIMEZONE_DELETE : TIMEZONE_OFF;
+ else if (r == -EINVAL && arg_timezone == TIMEZONE_AUTO) /* regular file? */
+ m = etc_writable() ? TIMEZONE_COPY : TIMEZONE_BIND;
+ else if (r < 0) {
+ log_warning_errno(r, "Failed to read host's /etc/localtime symlink, not updating container timezone: %m");
+ /* To handle warning, delete /etc/localtime and replace it with a symbolic link to a time zone data
+ * file.
+ *
+ * Example:
+ * ln -s /usr/share/zoneinfo/UTC /etc/localtime
+ */
+ return 0;
+ } else if (arg_timezone == TIMEZONE_AUTO)
+ m = etc_writable() ? TIMEZONE_SYMLINK : TIMEZONE_BIND;
+ else
+ m = arg_timezone;
+ } else
+ m = arg_timezone;
+
+ if (m == TIMEZONE_OFF)
+ return 0;
+
+ r = chase_symlinks("/etc", dest, CHASE_PREFIX_ROOT, &etc, NULL);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to resolve /etc path in container, ignoring: %m");
+ return 0;
+ }
+
+ where = strjoina(etc, "/localtime");
+
+ switch (m) {
+
+ case TIMEZONE_DELETE:
+ if (unlink(where) < 0)
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno, "Failed to remove '%s', ignoring: %m", where);
+
+ return 0;
+
+ case TIMEZONE_SYMLINK: {
+ _cleanup_free_ char *q = NULL;
+ const char *z, *what;
+
+ z = timezone_from_path(p);
+ if (!z) {
+ log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
+ return 0;
+ }
+
+ r = readlink_malloc(where, &q);
+ if (r >= 0 && streq_ptr(timezone_from_path(q), z))
+ return 0; /* Already pointing to the right place? Then do nothing .. */
+
+ check = strjoina(dest, "/usr/share/zoneinfo/", z);
+ r = chase_symlinks(check, dest, 0, NULL, NULL);
+ if (r < 0)
+ log_debug_errno(r, "Timezone %s does not exist (or is not accessible) in container, not creating symlink: %m", z);
+ else {
+ if (unlink(where) < 0 && errno != ENOENT) {
+ log_full_errno(IN_SET(errno, EROFS, EACCES, EPERM) ? LOG_DEBUG : LOG_WARNING, /* Don't complain on read-only images */
+ errno, "Failed to remove existing timezone info %s in container, ignoring: %m", where);
+ return 0;
+ }
+
+ what = strjoina("../usr/share/zoneinfo/", z);
+ if (symlink(what, where) < 0) {
+ log_full_errno(IN_SET(errno, EROFS, EACCES, EPERM) ? LOG_DEBUG : LOG_WARNING,
+ errno, "Failed to correct timezone of container, ignoring: %m");
+ return 0;
+ }
+
+ break;
+ }
+
+ _fallthrough_;
+ }
+
+ case TIMEZONE_BIND: {
+ _cleanup_free_ char *resolved = NULL;
+ int found;
+
+ found = chase_symlinks(where, dest, CHASE_NONEXISTENT, &resolved, NULL);
+ if (found < 0) {
+ log_warning_errno(found, "Failed to resolve /etc/localtime path in container, ignoring: %m");
+ return 0;
+ }
+
+ if (found == 0) /* missing? */
+ (void) touch(resolved);
+
+ r = mount_nofollow_verbose(LOG_WARNING, "/etc/localtime", resolved, NULL, MS_BIND, NULL);
+ if (r >= 0)
+ return mount_nofollow_verbose(LOG_ERR, NULL, resolved, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_NOSUID|MS_NODEV, NULL);
+
+ _fallthrough_;
+ }
+
+ case TIMEZONE_COPY:
+ /* If mounting failed, try to copy */
+ r = copy_file_atomic("/etc/localtime", where, 0644, 0, 0, COPY_REFLINK|COPY_REPLACE);
+ if (r < 0) {
+ log_full_errno(IN_SET(r, -EROFS, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to copy /etc/localtime to %s, ignoring: %m", where);
+ return 0;
+ }
+
+ break;
+
+ default:
+ assert_not_reached("unexpected mode");
+ }
+
+ /* Fix permissions of the symlink or file copy we just created */
+ r = userns_lchown(where, 0, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to chown /etc/localtime, ignoring: %m");
+
+ return 0;
+}
+
+static int have_resolv_conf(const char *path) {
+ assert(path);
+
+ if (access(path, F_OK) < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_debug_errno(errno, "Failed to determine whether '%s' is available: %m", path);
+ }
+
+ return 1;
+}
+
+static int resolved_listening(void) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *dns_stub_listener_mode = NULL;
+ int r;
+
+ /* Check if resolved is listening */
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to open system bus: %m");
+
+ r = bus_name_has_owner(bus, "org.freedesktop.resolve1", NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to check whether the 'org.freedesktop.resolve1' bus name is taken: %m");
+ if (r == 0)
+ return 0;
+
+ r = sd_bus_get_property_string(bus,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "DNSStubListener",
+ &error,
+ &dns_stub_listener_mode);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to query DNSStubListener property: %s", bus_error_message(&error, r));
+
+ return STR_IN_SET(dns_stub_listener_mode, "udp", "yes");
+}
+
+static int setup_resolv_conf(const char *dest) {
+ _cleanup_free_ char *etc = NULL;
+ const char *where, *what;
+ ResolvConfMode m;
+ int r;
+
+ assert(dest);
+
+ if (arg_resolv_conf == RESOLV_CONF_AUTO) {
+ if (arg_private_network)
+ m = RESOLV_CONF_OFF;
+ else if (have_resolv_conf(PRIVATE_STUB_RESOLV_CONF) > 0 && resolved_listening() > 0)
+ m = etc_writable() ? RESOLV_CONF_COPY_STUB : RESOLV_CONF_BIND_STUB;
+ else if (have_resolv_conf("/etc/resolv.conf") > 0)
+ m = etc_writable() ? RESOLV_CONF_COPY_HOST : RESOLV_CONF_BIND_HOST;
+ else
+ m = etc_writable() ? RESOLV_CONF_DELETE : RESOLV_CONF_OFF;
+
+ } else
+ m = arg_resolv_conf;
+
+ if (m == RESOLV_CONF_OFF)
+ return 0;
+
+ r = chase_symlinks("/etc", dest, CHASE_PREFIX_ROOT, &etc, NULL);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to resolve /etc path in container, ignoring: %m");
+ return 0;
+ }
+
+ where = strjoina(etc, "/resolv.conf");
+
+ if (m == RESOLV_CONF_DELETE) {
+ if (unlink(where) < 0)
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno, "Failed to remove '%s', ignoring: %m", where);
+
+ return 0;
+ }
+
+ if (IN_SET(m, RESOLV_CONF_BIND_STATIC, RESOLV_CONF_REPLACE_STATIC, RESOLV_CONF_COPY_STATIC))
+ what = PRIVATE_STATIC_RESOLV_CONF;
+ else if (IN_SET(m, RESOLV_CONF_BIND_UPLINK, RESOLV_CONF_REPLACE_UPLINK, RESOLV_CONF_COPY_UPLINK))
+ what = PRIVATE_UPLINK_RESOLV_CONF;
+ else if (IN_SET(m, RESOLV_CONF_BIND_STUB, RESOLV_CONF_REPLACE_STUB, RESOLV_CONF_COPY_STUB))
+ what = PRIVATE_STUB_RESOLV_CONF;
+ else
+ what = "/etc/resolv.conf";
+
+ if (IN_SET(m, RESOLV_CONF_BIND_HOST, RESOLV_CONF_BIND_STATIC, RESOLV_CONF_BIND_UPLINK, RESOLV_CONF_BIND_STUB)) {
+ _cleanup_free_ char *resolved = NULL;
+ int found;
+
+ found = chase_symlinks(where, dest, CHASE_NONEXISTENT, &resolved, NULL);
+ if (found < 0) {
+ log_warning_errno(found, "Failed to resolve /etc/resolv.conf path in container, ignoring: %m");
+ return 0;
+ }
+
+ if (found == 0) /* missing? */
+ (void) touch(resolved);
+
+ r = mount_nofollow_verbose(LOG_WARNING, what, resolved, NULL, MS_BIND, NULL);
+ if (r >= 0)
+ return mount_nofollow_verbose(LOG_ERR, NULL, resolved, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_NOSUID|MS_NODEV, NULL);
+
+ /* If that didn't work, let's copy the file */
+ }
+
+ if (IN_SET(m, RESOLV_CONF_REPLACE_HOST, RESOLV_CONF_REPLACE_STATIC, RESOLV_CONF_REPLACE_UPLINK, RESOLV_CONF_REPLACE_STUB))
+ r = copy_file_atomic(what, where, 0644, 0, 0, COPY_REFLINK|COPY_REPLACE);
+ else
+ r = copy_file(what, where, O_TRUNC|O_NOFOLLOW, 0644, 0, 0, COPY_REFLINK);
+ if (r < 0) {
+ /* If the file already exists as symlink, let's suppress the warning, under the assumption that
+ * resolved or something similar runs inside and the symlink points there.
+ *
+ * If the disk image is read-only, there's also no point in complaining.
+ */
+ log_full_errno(!IN_SET(RESOLV_CONF_COPY_HOST, RESOLV_CONF_COPY_STATIC, RESOLV_CONF_COPY_UPLINK, RESOLV_CONF_COPY_STUB) &&
+ IN_SET(r, -ELOOP, -EROFS, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to copy /etc/resolv.conf to %s, ignoring: %m", where);
+ return 0;
+ }
+
+ r = userns_lchown(where, 0, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to chown /etc/resolv.conf, ignoring: %m");
+
+ return 0;
+}
+
+static int setup_boot_id(void) {
+ _cleanup_(unlink_and_freep) char *from = NULL;
+ _cleanup_free_ char *path = NULL;
+ sd_id128_t rnd = SD_ID128_NULL;
+ const char *to;
+ int r;
+
+ /* Generate a new randomized boot ID, so that each boot-up of the container gets a new one */
+
+ r = tempfn_random_child("/run", "proc-sys-kernel-random-boot-id", &path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate random boot ID path: %m");
+
+ r = sd_id128_randomize(&rnd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate random boot id: %m");
+
+ r = id128_write(path, ID128_UUID, rnd, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write boot id: %m");
+
+ from = TAKE_PTR(path);
+ to = "/proc/sys/kernel/random/boot_id";
+
+ r = mount_nofollow_verbose(LOG_ERR, from, to, NULL, MS_BIND, NULL);
+ if (r < 0)
+ return r;
+
+ return mount_nofollow_verbose(LOG_ERR, NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+}
+
+static int copy_devnodes(const char *dest) {
+ static const char devnodes[] =
+ "null\0"
+ "zero\0"
+ "full\0"
+ "random\0"
+ "urandom\0"
+ "tty\0"
+ "net/tun\0";
+
+ _cleanup_umask_ mode_t u;
+ const char *d;
+ int r = 0;
+
+ assert(dest);
+
+ u = umask(0000);
+
+ /* Create /dev/net, so that we can create /dev/net/tun in it */
+ if (userns_mkdir(dest, "/dev/net", 0755, 0, 0) < 0)
+ return log_error_errno(r, "Failed to create /dev/net directory: %m");
+
+ NULSTR_FOREACH(d, devnodes) {
+ _cleanup_free_ char *from = NULL, *to = NULL;
+ struct stat st;
+
+ from = path_join("/dev/", d);
+ if (!from)
+ return log_oom();
+
+ to = path_join(dest, from);
+ if (!to)
+ return log_oom();
+
+ if (stat(from, &st) < 0) {
+
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to stat %s: %m", from);
+
+ } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "%s is not a char or block device, cannot copy.", from);
+ else {
+ _cleanup_free_ char *sl = NULL, *prefixed = NULL, *dn = NULL, *t = NULL;
+
+ if (mknod(to, st.st_mode, st.st_rdev) < 0) {
+ /* Explicitly warn the user when /dev is already populated. */
+ if (errno == EEXIST)
+ log_notice("%s/dev is pre-mounted and pre-populated. If a pre-mounted /dev is provided it needs to be an unpopulated file system.", dest);
+ if (errno != EPERM)
+ return log_error_errno(errno, "mknod(%s) failed: %m", to);
+
+ /* Some systems abusively restrict mknod but allow bind mounts. */
+ r = touch(to);
+ if (r < 0)
+ return log_error_errno(r, "touch (%s) failed: %m", to);
+ r = mount_nofollow_verbose(LOG_DEBUG, from, to, NULL, MS_BIND, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Both mknod and bind mount (%s) failed: %m", to);
+ }
+
+ r = userns_lchown(to, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "chown() of device node %s failed: %m", to);
+
+ dn = path_join("/dev", S_ISCHR(st.st_mode) ? "char" : "block");
+ if (!dn)
+ return log_oom();
+
+ r = userns_mkdir(dest, dn, 0755, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create '%s': %m", dn);
+
+ if (asprintf(&sl, "%s/%u:%u", dn, major(st.st_rdev), minor(st.st_rdev)) < 0)
+ return log_oom();
+
+ prefixed = path_join(dest, sl);
+ if (!prefixed)
+ return log_oom();
+
+ t = path_join("..", d);
+ if (!t)
+ return log_oom();
+
+ if (symlink(t, prefixed) < 0)
+ log_debug_errno(errno, "Failed to symlink '%s' to '%s': %m", t, prefixed);
+ }
+ }
+
+ return r;
+}
+
+static int make_extra_nodes(const char *dest) {
+ _cleanup_umask_ mode_t u;
+ size_t i;
+ int r;
+
+ u = umask(0000);
+
+ for (i = 0; i < arg_n_extra_nodes; i++) {
+ _cleanup_free_ char *path = NULL;
+ DeviceNode *n = arg_extra_nodes + i;
+
+ path = path_join(dest, n->path);
+ if (!path)
+ return log_oom();
+
+ if (mknod(path, n->mode, S_ISCHR(n->mode) || S_ISBLK(n->mode) ? makedev(n->major, n->minor) : 0) < 0)
+ return log_error_errno(errno, "Failed to create device node '%s': %m", path);
+
+ r = chmod_and_chown(path, n->mode, n->uid, n->gid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust device node ownership of '%s': %m", path);
+ }
+
+ return 0;
+}
+
+static int setup_pts(const char *dest) {
+ _cleanup_free_ char *options = NULL;
+ const char *p;
+ int r;
+
+#if HAVE_SELINUX
+ if (arg_selinux_apifs_context)
+ (void) asprintf(&options,
+ "newinstance,ptmxmode=0666,mode=620,gid=" GID_FMT ",context=\"%s\"",
+ arg_uid_shift + TTY_GID,
+ arg_selinux_apifs_context);
+ else
+#endif
+ (void) asprintf(&options,
+ "newinstance,ptmxmode=0666,mode=620,gid=" GID_FMT,
+ arg_uid_shift + TTY_GID);
+
+ if (!options)
+ return log_oom();
+
+ /* Mount /dev/pts itself */
+ p = prefix_roota(dest, "/dev/pts");
+ r = mkdir_errno_wrapper(p, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create /dev/pts: %m");
+
+ r = mount_nofollow_verbose(LOG_ERR, "devpts", p, "devpts", MS_NOSUID|MS_NOEXEC, options);
+ if (r < 0)
+ return r;
+ r = userns_lchown(p, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to chown /dev/pts: %m");
+
+ /* Create /dev/ptmx symlink */
+ p = prefix_roota(dest, "/dev/ptmx");
+ if (symlink("pts/ptmx", p) < 0)
+ return log_error_errno(errno, "Failed to create /dev/ptmx symlink: %m");
+ r = userns_lchown(p, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to chown /dev/ptmx: %m");
+
+ /* And fix /dev/pts/ptmx ownership */
+ p = prefix_roota(dest, "/dev/pts/ptmx");
+ r = userns_lchown(p, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to chown /dev/pts/ptmx: %m");
+
+ return 0;
+}
+
+static int setup_stdio_as_dev_console(void) {
+ _cleanup_close_ int terminal = -1;
+ int r;
+
+ /* We open the TTY in O_NOCTTY mode, so that we do not become controller yet. We'll do that later
+ * explicitly, if we are configured to. */
+ terminal = open_terminal("/dev/console", O_RDWR|O_NOCTTY);
+ if (terminal < 0)
+ return log_error_errno(terminal, "Failed to open console: %m");
+
+ /* Make sure we can continue logging to the original stderr, even if
+ * stderr points elsewhere now */
+ r = log_dup_console();
+ if (r < 0)
+ return log_error_errno(r, "Failed to duplicate stderr: %m");
+
+ /* invalidates 'terminal' on success and failure */
+ r = rearrange_stdio(terminal, terminal, terminal);
+ TAKE_FD(terminal);
+ if (r < 0)
+ return log_error_errno(r, "Failed to move console to stdin/stdout/stderr: %m");
+
+ return 0;
+}
+
+static int setup_dev_console(const char *console) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ /* Create /dev/console symlink */
+ r = path_make_relative("/dev", console, &p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create relative path: %m");
+
+ if (symlink(p, "/dev/console") < 0)
+ return log_error_errno(errno, "Failed to create /dev/console symlink: %m");
+
+ return 0;
+}
+
+static int setup_keyring(void) {
+ key_serial_t keyring;
+
+ /* Allocate a new session keyring for the container. This makes sure the keyring of the session
+ * systemd-nspawn was invoked from doesn't leak into the container. Note that by default we block
+ * keyctl() and request_key() anyway via seccomp so doing this operation isn't strictly necessary,
+ * but in case people explicitly allow-list these system calls let's make sure we don't leak anything
+ * into the container. */
+
+ keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
+ if (keyring == -1) {
+ if (errno == ENOSYS)
+ log_debug_errno(errno, "Kernel keyring not supported, ignoring.");
+ else if (ERRNO_IS_PRIVILEGE(errno))
+ log_debug_errno(errno, "Kernel keyring access prohibited, ignoring.");
+ else
+ return log_error_errno(errno, "Setting up kernel keyring failed: %m");
+ }
+
+ return 0;
+}
+
+static int setup_credentials(const char *root) {
+ const char *q;
+ int r;
+
+ if (arg_n_credentials <= 0)
+ return 0;
+
+ r = userns_mkdir(root, "/run/host", 0755, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create /run/host: %m");
+
+ r = userns_mkdir(root, "/run/host/credentials", 0700, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create /run/host/credentials: %m");
+
+ q = prefix_roota(root, "/run/host/credentials");
+ r = mount_nofollow_verbose(LOG_ERR, NULL, q, "ramfs", MS_NOSUID|MS_NOEXEC|MS_NODEV, "mode=0700");
+ if (r < 0)
+ return r;
+
+ for (size_t i = 0; i < arg_n_credentials; i++) {
+ _cleanup_free_ char *j = NULL;
+ _cleanup_close_ int fd = -1;
+
+ j = path_join(q, arg_credentials[i].id);
+ if (!j)
+ return log_oom();
+
+ fd = open(j, O_CREAT|O_EXCL|O_WRONLY|O_CLOEXEC|O_NOFOLLOW, 0600);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to create credential file %s: %m", j);
+
+ r = loop_write(fd, arg_credentials[i].data, arg_credentials[i].size, /* do_poll= */ false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write credential to file %s: %m", j);
+
+ if (fchmod(fd, 0400) < 0)
+ return log_error_errno(errno, "Failed to adjust access mode of %s: %m", j);
+
+ if (arg_userns_mode != USER_NAMESPACE_NO) {
+ if (fchown(fd, arg_uid_shift, arg_uid_shift) < 0)
+ return log_error_errno(errno, "Failed to adjust ownership of %s: %m", j);
+ }
+ }
+
+ if (chmod(q, 0500) < 0)
+ return log_error_errno(errno, "Failed to adjust access mode of %s: %m", q);
+
+ r = userns_lchown(q, 0, 0);
+ if (r < 0)
+ return r;
+
+ /* Make both mount and superblock read-only now */
+ r = mount_nofollow_verbose(LOG_ERR, NULL, q, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+ if (r < 0)
+ return r;
+
+ return mount_nofollow_verbose(LOG_ERR, NULL, q, NULL, MS_REMOUNT|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, "mode=0500");
+}
+
+static int setup_kmsg(int kmsg_socket) {
+ _cleanup_(unlink_and_freep) char *from = NULL;
+ _cleanup_free_ char *fifo = NULL;
+ _cleanup_close_ int fd = -1;
+ _cleanup_umask_ mode_t u;
+ int r;
+
+ assert(kmsg_socket >= 0);
+
+ u = umask(0000);
+
+ /* We create the kmsg FIFO as as temporary file in /run, but immediately delete it after bind mounting it to
+ * /proc/kmsg. While FIFOs on the reading side behave very similar to /proc/kmsg, their writing side behaves
+ * differently from /dev/kmsg in that writing blocks when nothing is reading. In order to avoid any problems
+ * with containers deadlocking due to this we simply make /dev/kmsg unavailable to the container. */
+
+ r = tempfn_random_child("/run", "proc-kmsg", &fifo);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate kmsg path: %m");
+
+ if (mkfifo(fifo, 0600) < 0)
+ return log_error_errno(errno, "mkfifo() for /run/kmsg failed: %m");
+
+ from = TAKE_PTR(fifo);
+
+ r = mount_nofollow_verbose(LOG_ERR, from, "/proc/kmsg", NULL, MS_BIND, NULL);
+ if (r < 0)
+ return r;
+
+ fd = open(from, O_RDWR|O_NONBLOCK|O_CLOEXEC);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open fifo: %m");
+
+ /* Store away the fd in the socket, so that it stays open as long as we run the child */
+ r = send_one_fd(kmsg_socket, fd, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send FIFO fd: %m");
+
+ return 0;
+}
+
+static int on_address_change(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) {
+ union in_addr_union *exposed = userdata;
+
+ assert(rtnl);
+ assert(m);
+ assert(exposed);
+
+ expose_port_execute(rtnl, arg_expose_ports, exposed);
+ return 0;
+}
+
+static int setup_hostname(void) {
+ int r;
+
+ if ((arg_clone_ns_flags & CLONE_NEWUTS) == 0)
+ return 0;
+
+ r = sethostname_idempotent(arg_hostname ?: arg_machine);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set hostname: %m");
+
+ return 0;
+}
+
+static int setup_journal(const char *directory) {
+ _cleanup_free_ char *d = NULL;
+ char id[SD_ID128_STRING_MAX];
+ const char *dirname, *p, *q;
+ sd_id128_t this_id;
+ bool try;
+ int r;
+
+ /* Don't link journals in ephemeral mode */
+ if (arg_ephemeral)
+ return 0;
+
+ if (arg_link_journal == LINK_NO)
+ return 0;
+
+ try = arg_link_journal_try || arg_link_journal == LINK_AUTO;
+
+ r = sd_id128_get_machine(&this_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to retrieve machine ID: %m");
+
+ if (sd_id128_equal(arg_uuid, this_id)) {
+ log_full(try ? LOG_WARNING : LOG_ERR,
+ "Host and machine ids are equal (%s): refusing to link journals", sd_id128_to_string(arg_uuid, id));
+ if (try)
+ return 0;
+ return -EEXIST;
+ }
+
+ FOREACH_STRING(dirname, "/var", "/var/log", "/var/log/journal") {
+ r = userns_mkdir(directory, dirname, 0755, 0, 0);
+ if (r < 0) {
+ bool ignore = r == -EROFS && try;
+ log_full_errno(ignore ? LOG_DEBUG : LOG_ERR, r,
+ "Failed to create %s%s: %m", dirname, ignore ? ", ignoring" : "");
+ return ignore ? 0 : r;
+ }
+ }
+
+ (void) sd_id128_to_string(arg_uuid, id);
+
+ p = strjoina("/var/log/journal/", id);
+ q = prefix_roota(directory, p);
+
+ if (path_is_mount_point(p, NULL, 0) > 0) {
+ if (try)
+ return 0;
+
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "%s: already a mount point, refusing to use for journal", p);
+ }
+
+ if (path_is_mount_point(q, NULL, 0) > 0) {
+ if (try)
+ return 0;
+
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "%s: already a mount point, refusing to use for journal", q);
+ }
+
+ r = readlink_and_make_absolute(p, &d);
+ if (r >= 0) {
+ if (IN_SET(arg_link_journal, LINK_GUEST, LINK_AUTO) &&
+ path_equal(d, q)) {
+
+ r = userns_mkdir(directory, p, 0755, 0, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to create directory %s: %m", q);
+ return 0;
+ }
+
+ if (unlink(p) < 0)
+ return log_error_errno(errno, "Failed to remove symlink %s: %m", p);
+ } else if (r == -EINVAL) {
+
+ if (arg_link_journal == LINK_GUEST &&
+ rmdir(p) < 0) {
+
+ if (errno == ENOTDIR) {
+ log_error("%s already exists and is neither a symlink nor a directory", p);
+ return r;
+ } else
+ return log_error_errno(errno, "Failed to remove %s: %m", p);
+ }
+ } else if (r != -ENOENT)
+ return log_error_errno(r, "readlink(%s) failed: %m", p);
+
+ if (arg_link_journal == LINK_GUEST) {
+
+ if (symlink(q, p) < 0) {
+ if (try) {
+ log_debug_errno(errno, "Failed to symlink %s to %s, skipping journal setup: %m", q, p);
+ return 0;
+ } else
+ return log_error_errno(errno, "Failed to symlink %s to %s: %m", q, p);
+ }
+
+ r = userns_mkdir(directory, p, 0755, 0, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to create directory %s: %m", q);
+ return 0;
+ }
+
+ if (arg_link_journal == LINK_HOST) {
+ /* don't create parents here — if the host doesn't have
+ * permanent journal set up, don't force it here */
+
+ r = mkdir_errno_wrapper(p, 0755);
+ if (r < 0 && r != -EEXIST) {
+ if (try) {
+ log_debug_errno(r, "Failed to create %s, skipping journal setup: %m", p);
+ return 0;
+ } else
+ return log_error_errno(r, "Failed to create %s: %m", p);
+ }
+
+ } else if (access(p, F_OK) < 0)
+ return 0;
+
+ if (dir_is_empty(q) == 0)
+ log_warning("%s is not empty, proceeding anyway.", q);
+
+ r = userns_mkdir(directory, p, 0755, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create %s: %m", q);
+
+ r = mount_nofollow_verbose(LOG_DEBUG, p, q, NULL, MS_BIND, NULL);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to bind mount journal from host into guest: %m");
+
+ return 0;
+}
+
+static int drop_capabilities(uid_t uid) {
+ CapabilityQuintet q;
+
+ /* Let's initialize all five capability sets to something valid. If the quintet was configured via
+ * OCI use that, but fill in missing bits. If it wasn't then derive the quintet in full from
+ * arg_caps_retain. */
+
+ if (capability_quintet_is_set(&arg_full_capabilities)) {
+ q = arg_full_capabilities;
+
+ if (q.bounding == (uint64_t) -1)
+ q.bounding = uid == 0 ? arg_caps_retain : 0;
+
+ if (q.effective == (uint64_t) -1)
+ q.effective = uid == 0 ? q.bounding : 0;
+
+ if (q.inheritable == (uint64_t) -1)
+ q.inheritable = uid == 0 ? q.bounding : 0;
+
+ if (q.permitted == (uint64_t) -1)
+ q.permitted = uid == 0 ? q.bounding : 0;
+
+ if (q.ambient == (uint64_t) -1 && ambient_capabilities_supported())
+ q.ambient = 0;
+
+ if (capability_quintet_mangle(&q))
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Cannot set capabilities that are not in the current bounding set.");
+
+ } else {
+ q = (CapabilityQuintet) {
+ .bounding = arg_caps_retain,
+ .effective = uid == 0 ? arg_caps_retain : 0,
+ .inheritable = uid == 0 ? arg_caps_retain : 0,
+ .permitted = uid == 0 ? arg_caps_retain : 0,
+ .ambient = ambient_capabilities_supported() ? 0 : (uint64_t) -1,
+ };
+
+ /* If we're not using OCI, proceed with mangled capabilities (so we don't error out)
+ * in order to maintain the same behavior as systemd < 242. */
+ if (capability_quintet_mangle(&q))
+ log_full(arg_quiet ? LOG_DEBUG : LOG_WARNING,
+ "Some capabilities will not be set because they are not in the current bounding set.");
+
+ }
+
+ return capability_quintet_enforce(&q);
+}
+
+static int reset_audit_loginuid(void) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ if ((arg_clone_ns_flags & CLONE_NEWPID) == 0)
+ return 0;
+
+ r = read_one_line_file("/proc/self/loginuid", &p);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to read /proc/self/loginuid: %m");
+
+ /* Already reset? */
+ if (streq(p, "4294967295"))
+ return 0;
+
+ r = write_string_file("/proc/self/loginuid", "4294967295", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0) {
+ log_error_errno(r,
+ "Failed to reset audit login UID. This probably means that your kernel is too\n"
+ "old and you have audit enabled. Note that the auditing subsystem is known to\n"
+ "be incompatible with containers on old kernels. Please make sure to upgrade\n"
+ "your kernel or to off auditing with 'audit=0' on the kernel command line before\n"
+ "using systemd-nspawn. Sleeping for 5s... (%m)");
+
+ sleep(5);
+ }
+
+ return 0;
+}
+
+static int setup_propagate(const char *root) {
+ const char *p, *q;
+ int r;
+
+ (void) mkdir_p("/run/systemd/nspawn/", 0755);
+ (void) mkdir_p("/run/systemd/nspawn/propagate", 0600);
+ p = strjoina("/run/systemd/nspawn/propagate/", arg_machine);
+ (void) mkdir_p(p, 0600);
+
+ r = userns_mkdir(root, "/run/host", 0755, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create /run/host: %m");
+
+ r = userns_mkdir(root, "/run/host/incoming", 0600, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create /run/host/incoming: %m");
+
+ q = prefix_roota(root, "/run/host/incoming");
+ r = mount_nofollow_verbose(LOG_ERR, p, q, NULL, MS_BIND, NULL);
+ if (r < 0)
+ return r;
+
+ r = mount_nofollow_verbose(LOG_ERR, NULL, q, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
+ if (r < 0)
+ return r;
+
+ /* machined will MS_MOVE into that directory, and that's only supported for non-shared mounts. */
+ return mount_nofollow_verbose(LOG_ERR, NULL, q, NULL, MS_SLAVE, NULL);
+}
+
+static int setup_machine_id(const char *directory) {
+ const char *etc_machine_id;
+ sd_id128_t id;
+ int r;
+
+ /* If the UUID in the container is already set, then that's what counts, and we use. If it isn't set, and the
+ * caller passed --uuid=, then we'll pass it in the $container_uuid env var to PID 1 of the container. The
+ * assumption is that PID 1 will then write it to /etc/machine-id to make it persistent. If --uuid= is not
+ * passed we generate a random UUID, and pass it via $container_uuid. In effect this means that /etc/machine-id
+ * in the container and our idea of the container UUID will always be in sync (at least if PID 1 in the
+ * container behaves nicely). */
+
+ etc_machine_id = prefix_roota(directory, "/etc/machine-id");
+
+ r = id128_read(etc_machine_id, ID128_PLAIN_OR_UNINIT, &id);
+ if (r < 0) {
+ if (!IN_SET(r, -ENOENT, -ENOMEDIUM)) /* If the file is missing or empty, we don't mind */
+ return log_error_errno(r, "Failed to read machine ID from container image: %m");
+
+ if (sd_id128_is_null(arg_uuid)) {
+ r = sd_id128_randomize(&arg_uuid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire randomized machine UUID: %m");
+ }
+ } else {
+ if (sd_id128_is_null(id))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Machine ID in container image is zero, refusing.");
+
+ arg_uuid = id;
+ }
+
+ return 0;
+}
+
+static int recursive_chown(const char *directory, uid_t shift, uid_t range) {
+ int r;
+
+ assert(directory);
+
+ if (arg_userns_mode == USER_NAMESPACE_NO || !arg_userns_chown)
+ return 0;
+
+ r = path_patch_uid(directory, arg_uid_shift, arg_uid_range);
+ if (r == -EOPNOTSUPP)
+ return log_error_errno(r, "Automatic UID/GID adjusting is only supported for UID/GID ranges starting at multiples of 2^16 with a range of 2^16.");
+ if (r == -EBADE)
+ return log_error_errno(r, "Upper 16 bits of root directory UID and GID do not match.");
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust UID/GID shift of OS tree: %m");
+ if (r == 0)
+ log_debug("Root directory of image is already owned by the right UID/GID range, skipping recursive chown operation.");
+ else
+ log_debug("Patched directory tree to match UID/GID range.");
+
+ return r;
+}
+
+/*
+ * Return values:
+ * < 0 : wait_for_terminate() failed to get the state of the
+ * container, the container was terminated by a signal, or
+ * failed for an unknown reason. No change is made to the
+ * container argument.
+ * > 0 : The program executed in the container terminated with an
+ * error. The exit code of the program executed in the
+ * container is returned. The container argument has been set
+ * to CONTAINER_TERMINATED.
+ * 0 : The container is being rebooted, has been shut down or exited
+ * successfully. The container argument has been set to either
+ * CONTAINER_TERMINATED or CONTAINER_REBOOTED.
+ *
+ * That is, success is indicated by a return value of zero, and an
+ * error is indicated by a non-zero value.
+ */
+static int wait_for_container(pid_t pid, ContainerStatus *container) {
+ siginfo_t status;
+ int r;
+
+ r = wait_for_terminate(pid, &status);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to wait for container: %m");
+
+ switch (status.si_code) {
+
+ case CLD_EXITED:
+ if (status.si_status == 0)
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO, "Container %s exited successfully.", arg_machine);
+ else
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO, "Container %s failed with error code %i.", arg_machine, status.si_status);
+
+ *container = CONTAINER_TERMINATED;
+ return status.si_status;
+
+ case CLD_KILLED:
+ if (status.si_status == SIGINT) {
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO, "Container %s has been shut down.", arg_machine);
+ *container = CONTAINER_TERMINATED;
+ return 0;
+
+ } else if (status.si_status == SIGHUP) {
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO, "Container %s is being rebooted.", arg_machine);
+ *container = CONTAINER_REBOOTED;
+ return 0;
+ }
+
+ _fallthrough_;
+ case CLD_DUMPED:
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Container %s terminated by signal %s.", arg_machine, signal_to_string(status.si_status));
+
+ default:
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Container %s failed due to unknown reason.", arg_machine);
+ }
+}
+
+static int on_orderly_shutdown(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ pid_t pid;
+
+ pid = PTR_TO_PID(userdata);
+ if (pid > 0) {
+ if (kill(pid, arg_kill_signal) >= 0) {
+ log_info("Trying to halt container. Send SIGTERM again to trigger immediate termination.");
+ sd_event_source_set_userdata(s, NULL);
+ return 0;
+ }
+ }
+
+ sd_event_exit(sd_event_source_get_event(s), 0);
+ return 0;
+}
+
+static int on_sigchld(sd_event_source *s, const struct signalfd_siginfo *ssi, void *userdata) {
+ pid_t pid;
+
+ assert(s);
+ assert(ssi);
+
+ pid = PTR_TO_PID(userdata);
+
+ for (;;) {
+ siginfo_t si = {};
+
+ if (waitid(P_ALL, 0, &si, WNOHANG|WNOWAIT|WEXITED) < 0)
+ return log_error_errno(errno, "Failed to waitid(): %m");
+ if (si.si_pid == 0) /* No pending children. */
+ break;
+ if (si.si_pid == pid) {
+ /* The main process we care for has exited. Return from
+ * signal handler but leave the zombie. */
+ sd_event_exit(sd_event_source_get_event(s), 0);
+ break;
+ }
+
+ /* Reap all other children. */
+ (void) waitid(P_PID, si.si_pid, &si, WNOHANG|WEXITED);
+ }
+
+ return 0;
+}
+
+static int on_request_stop(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ pid_t pid;
+
+ assert(m);
+
+ pid = PTR_TO_PID(userdata);
+
+ if (arg_kill_signal > 0) {
+ log_info("Container termination requested. Attempting to halt container.");
+ (void) kill(pid, arg_kill_signal);
+ } else {
+ log_info("Container termination requested. Exiting.");
+ sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), 0);
+ }
+
+ return 0;
+}
+
+static int determine_names(void) {
+ int r;
+
+ if (arg_template && !arg_directory && arg_machine) {
+
+ /* If --template= was specified then we should not
+ * search for a machine, but instead create a new one
+ * in /var/lib/machine. */
+
+ arg_directory = path_join("/var/lib/machines", arg_machine);
+ if (!arg_directory)
+ return log_oom();
+ }
+
+ if (!arg_image && !arg_directory) {
+ if (arg_machine) {
+ _cleanup_(image_unrefp) Image *i = NULL;
+
+ r = image_find(IMAGE_MACHINE, arg_machine, &i);
+ if (r == -ENOENT)
+ return log_error_errno(r, "No image for machine '%s'.", arg_machine);
+ if (r < 0)
+ return log_error_errno(r, "Failed to find image for machine '%s': %m", arg_machine);
+
+ if (IN_SET(i->type, IMAGE_RAW, IMAGE_BLOCK))
+ r = free_and_strdup(&arg_image, i->path);
+ else
+ r = free_and_strdup(&arg_directory, i->path);
+ if (r < 0)
+ return log_oom();
+
+ if (!arg_ephemeral)
+ arg_read_only = arg_read_only || i->read_only;
+ } else {
+ r = safe_getcwd(&arg_directory);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine current directory: %m");
+ }
+
+ if (!arg_directory && !arg_image)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine path, please use -D or -i.");
+ }
+
+ if (!arg_machine) {
+ if (arg_directory && path_equal(arg_directory, "/"))
+ arg_machine = gethostname_malloc();
+ else {
+ if (arg_image) {
+ char *e;
+
+ arg_machine = strdup(basename(arg_image));
+
+ /* Truncate suffix if there is one */
+ e = endswith(arg_machine, ".raw");
+ if (e)
+ *e = 0;
+ } else
+ arg_machine = strdup(basename(arg_directory));
+ }
+ if (!arg_machine)
+ return log_oom();
+
+ hostname_cleanup(arg_machine);
+ if (!machine_name_is_valid(arg_machine))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine machine name automatically, please use -M.");
+
+ if (arg_ephemeral) {
+ char *b;
+
+ /* Add a random suffix when this is an
+ * ephemeral machine, so that we can run many
+ * instances at once without manually having
+ * to specify -M each time. */
+
+ if (asprintf(&b, "%s-%016" PRIx64, arg_machine, random_u64()) < 0)
+ return log_oom();
+
+ free(arg_machine);
+ arg_machine = b;
+ }
+ }
+
+ return 0;
+}
+
+static int chase_symlinks_and_update(char **p, unsigned flags) {
+ char *chased;
+ int r;
+
+ assert(p);
+
+ if (!*p)
+ return 0;
+
+ r = chase_symlinks(*p, NULL, flags, &chased, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve path %s: %m", *p);
+
+ return free_and_replace(*p, chased);
+}
+
+static int determine_uid_shift(const char *directory) {
+ int r;
+
+ if (arg_userns_mode == USER_NAMESPACE_NO) {
+ arg_uid_shift = 0;
+ return 0;
+ }
+
+ if (arg_uid_shift == UID_INVALID) {
+ struct stat st;
+
+ r = stat(directory, &st);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to determine UID base of %s: %m", directory);
+
+ arg_uid_shift = st.st_uid & UINT32_C(0xffff0000);
+
+ if (arg_uid_shift != (st.st_gid & UINT32_C(0xffff0000)))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "UID and GID base of %s don't match.", directory);
+
+ arg_uid_range = UINT32_C(0x10000);
+ }
+
+ if (arg_uid_shift > (uid_t) -1 - arg_uid_range)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "UID base too high for UID range.");
+
+ return 0;
+}
+
+static unsigned long effective_clone_ns_flags(void) {
+ unsigned long flags = arg_clone_ns_flags;
+
+ if (arg_private_network)
+ flags |= CLONE_NEWNET;
+ if (arg_use_cgns)
+ flags |= CLONE_NEWCGROUP;
+ if (arg_userns_mode != USER_NAMESPACE_NO)
+ flags |= CLONE_NEWUSER;
+
+ return flags;
+}
+
+static int patch_sysctl(void) {
+
+ /* This table is inspired by runc's sysctl() function */
+ static const struct {
+ const char *key;
+ bool prefix;
+ unsigned long clone_flags;
+ } safe_sysctl[] = {
+ { "kernel.hostname", false, CLONE_NEWUTS },
+ { "kernel.domainname", false, CLONE_NEWUTS },
+ { "kernel.msgmax", false, CLONE_NEWIPC },
+ { "kernel.msgmnb", false, CLONE_NEWIPC },
+ { "kernel.msgmni", false, CLONE_NEWIPC },
+ { "kernel.sem", false, CLONE_NEWIPC },
+ { "kernel.shmall", false, CLONE_NEWIPC },
+ { "kernel.shmmax", false, CLONE_NEWIPC },
+ { "kernel.shmmni", false, CLONE_NEWIPC },
+ { "fs.mqueue.", true, CLONE_NEWIPC },
+ { "net.", true, CLONE_NEWNET },
+ };
+
+ unsigned long flags;
+ char **k, **v;
+ int r;
+
+ flags = effective_clone_ns_flags();
+
+ STRV_FOREACH_PAIR(k, v, arg_sysctl) {
+ bool good = false;
+ size_t i;
+
+ for (i = 0; i < ELEMENTSOF(safe_sysctl); i++) {
+
+ if (!FLAGS_SET(flags, safe_sysctl[i].clone_flags))
+ continue;
+
+ if (safe_sysctl[i].prefix)
+ good = startswith(*k, safe_sysctl[i].key);
+ else
+ good = streq(*k, safe_sysctl[i].key);
+
+ if (good)
+ break;
+ }
+
+ if (!good)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Refusing to write to sysctl '%s', as it is not safe in the selected namespaces.", *k);
+
+ r = sysctl_write(*k, *v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write sysctl '%s': %m", *k);
+ }
+
+ return 0;
+}
+
+static int inner_child(
+ Barrier *barrier,
+ const char *directory,
+ bool secondary,
+ int kmsg_socket,
+ int rtnl_socket,
+ int master_pty_socket,
+ FDSet *fds,
+ char **os_release_pairs) {
+
+ _cleanup_free_ char *home = NULL;
+ char as_uuid[ID128_UUID_STRING_MAX];
+ size_t n_env = 1;
+ const char *envp[] = {
+ "PATH=" DEFAULT_PATH_COMPAT,
+ NULL, /* container */
+ NULL, /* TERM */
+ NULL, /* HOME */
+ NULL, /* USER */
+ NULL, /* LOGNAME */
+ NULL, /* container_uuid */
+ NULL, /* LISTEN_FDS */
+ NULL, /* LISTEN_PID */
+ NULL, /* NOTIFY_SOCKET */
+ NULL, /* CREDENTIALS_DIRECTORY */
+ NULL
+ };
+ const char *exec_target;
+ _cleanup_strv_free_ char **env_use = NULL;
+ int r, which_failed;
+
+ /* This is the "inner" child process, i.e. the one forked off by the "outer" child process, which is the one
+ * the container manager itself forked off. At the time of clone() it gained its own CLONE_NEWNS, CLONE_NEWPID,
+ * CLONE_NEWUTS, CLONE_NEWIPC, CLONE_NEWUSER namespaces. Note that it has its own CLONE_NEWNS namespace,
+ * separate from the CLONE_NEWNS created for the "outer" child, and also separate from the host's CLONE_NEWNS
+ * namespace. The reason for having two levels of CLONE_NEWNS namespaces is that the "inner" one is owned by
+ * the CLONE_NEWUSER namespace of the container, while the "outer" one is owned by the host's CLONE_NEWUSER
+ * namespace.
+ *
+ * Note at this point we have no CLONE_NEWNET namespace yet. We'll acquire that one later through
+ * unshare(). See below. */
+
+ assert(barrier);
+ assert(directory);
+ assert(kmsg_socket >= 0);
+
+ log_debug("Inner child is initializing.");
+
+ if (arg_userns_mode != USER_NAMESPACE_NO) {
+ /* Tell the parent, that it now can write the UID map. */
+ (void) barrier_place(barrier); /* #1 */
+
+ /* Wait until the parent wrote the UID map */
+ if (!barrier_place_and_sync(barrier)) /* #2 */
+ return log_error_errno(SYNTHETIC_ERRNO(ESRCH), "Parent died too early");
+
+ /* Become the new root user inside our namespace */
+ r = reset_uid_gid();
+ if (r < 0)
+ return log_error_errno(r, "Couldn't become new root: %m");
+
+ /* Creating a new user namespace means all MS_SHARED mounts become MS_SLAVE. Let's put them
+ * back to MS_SHARED here, since that's what we want as defaults. (This will not reconnect
+ * propagation, but simply create new peer groups for all our mounts). */
+ r = mount_follow_verbose(LOG_ERR, NULL, "/", NULL, MS_SHARED|MS_REC, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ r = mount_all(NULL,
+ arg_mount_settings | MOUNT_IN_USERNS,
+ arg_uid_shift,
+ arg_selinux_apifs_context);
+ if (r < 0)
+ return r;
+
+ if (!arg_network_namespace_path && arg_private_network) {
+ r = unshare(CLONE_NEWNET);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to unshare network namespace: %m");
+
+ /* Tell the parent that it can setup network interfaces. */
+ (void) barrier_place(barrier); /* #3 */
+ }
+
+ r = mount_sysfs(NULL, arg_mount_settings);
+ if (r < 0)
+ return r;
+
+ /* Wait until we are cgroup-ified, so that we
+ * can mount the right cgroup path writable */
+ if (!barrier_place_and_sync(barrier)) /* #4 */
+ return log_error_errno(SYNTHETIC_ERRNO(ESRCH),
+ "Parent died too early");
+
+ if (arg_use_cgns) {
+ r = unshare(CLONE_NEWCGROUP);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to unshare cgroup namespace: %m");
+ r = mount_cgroups(
+ "",
+ arg_unified_cgroup_hierarchy,
+ arg_userns_mode != USER_NAMESPACE_NO,
+ arg_uid_shift,
+ arg_uid_range,
+ arg_selinux_apifs_context,
+ true);
+ } else
+ r = mount_systemd_cgroup_writable("", arg_unified_cgroup_hierarchy);
+ if (r < 0)
+ return r;
+
+ r = setup_boot_id();
+ if (r < 0)
+ return r;
+
+ r = setup_kmsg(kmsg_socket);
+ if (r < 0)
+ return r;
+ kmsg_socket = safe_close(kmsg_socket);
+
+ r = mount_custom(
+ "/",
+ arg_custom_mounts,
+ arg_n_custom_mounts,
+ 0,
+ arg_selinux_apifs_context,
+ MOUNT_NON_ROOT_ONLY | MOUNT_IN_USERNS);
+ if (r < 0)
+ return r;
+
+ if (setsid() < 0)
+ return log_error_errno(errno, "setsid() failed: %m");
+
+ if (arg_private_network)
+ (void) loopback_setup();
+
+ if (arg_expose_ports) {
+ r = expose_port_send_rtnl(rtnl_socket);
+ if (r < 0)
+ return r;
+ rtnl_socket = safe_close(rtnl_socket);
+ }
+
+ if (arg_console_mode != CONSOLE_PIPE) {
+ _cleanup_close_ int master = -1;
+ _cleanup_free_ char *console = NULL;
+
+ /* Allocate a pty and make it available as /dev/console. */
+ master = openpt_allocate(O_RDWR|O_NONBLOCK, &console);
+ if (master < 0)
+ return log_error_errno(master, "Failed to allocate a pty: %m");
+
+ r = setup_dev_console(console);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up /dev/console: %m");
+
+ r = send_one_fd(master_pty_socket, master, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send master fd: %m");
+ master_pty_socket = safe_close(master_pty_socket);
+
+ r = setup_stdio_as_dev_console();
+ if (r < 0)
+ return r;
+ }
+
+ r = patch_sysctl();
+ if (r < 0)
+ return r;
+
+ if (arg_oom_score_adjust_set) {
+ r = set_oom_score_adjust(arg_oom_score_adjust);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust OOM score: %m");
+ }
+
+ if (arg_cpu_set.set)
+ if (sched_setaffinity(0, arg_cpu_set.allocated, arg_cpu_set.set) < 0)
+ return log_error_errno(errno, "Failed to set CPU affinity: %m");
+
+ (void) setup_hostname();
+
+ if (arg_personality != PERSONALITY_INVALID) {
+ r = safe_personality(arg_personality);
+ if (r < 0)
+ return log_error_errno(r, "personality() failed: %m");
+ } else if (secondary) {
+ r = safe_personality(PER_LINUX32);
+ if (r < 0)
+ return log_error_errno(r, "personality() failed: %m");
+ }
+
+ r = setrlimit_closest_all((const struct rlimit *const*) arg_rlimit, &which_failed);
+ if (r < 0)
+ return log_error_errno(r, "Failed to apply resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
+
+#if HAVE_SECCOMP
+ if (arg_seccomp) {
+
+ if (is_seccomp_available()) {
+
+ r = seccomp_load(arg_seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return log_error_errno(r, "Failed to install seccomp filter: %m");
+ if (r < 0)
+ log_debug_errno(r, "Failed to install seccomp filter: %m");
+ }
+ } else
+#endif
+ {
+ r = setup_seccomp(arg_caps_retain, arg_syscall_allow_list, arg_syscall_deny_list);
+ if (r < 0)
+ return r;
+ }
+
+#if HAVE_SELINUX
+ if (arg_selinux_context)
+ if (setexeccon(arg_selinux_context) < 0)
+ return log_error_errno(errno, "setexeccon(\"%s\") failed: %m", arg_selinux_context);
+#endif
+
+ /* Make sure we keep the caps across the uid/gid dropping, so that we can retain some selected caps
+ * if we need to later on. */
+ if (prctl(PR_SET_KEEPCAPS, 1) < 0)
+ return log_error_errno(errno, "Failed to set PR_SET_KEEPCAPS: %m");
+
+ if (uid_is_valid(arg_uid) || gid_is_valid(arg_gid))
+ r = change_uid_gid_raw(arg_uid, arg_gid, arg_supplementary_gids, arg_n_supplementary_gids, arg_console_mode != CONSOLE_PIPE);
+ else
+ r = change_uid_gid(arg_user, arg_console_mode != CONSOLE_PIPE, &home);
+ if (r < 0)
+ return r;
+
+ r = drop_capabilities(getuid());
+ if (r < 0)
+ return log_error_errno(r, "Dropping capabilities failed: %m");
+
+ if (arg_no_new_privileges)
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0)
+ return log_error_errno(errno, "Failed to disable new privileges: %m");
+
+ /* LXC sets container=lxc, so follow the scheme here */
+ envp[n_env++] = strjoina("container=", arg_container_service_name);
+
+ envp[n_env] = strv_find_prefix(environ, "TERM=");
+ if (envp[n_env])
+ n_env++;
+
+ if (home || !uid_is_valid(arg_uid) || arg_uid == 0)
+ if (asprintf((char**)(envp + n_env++), "HOME=%s", home ?: "/root") < 0)
+ return log_oom();
+
+ if (arg_user || !uid_is_valid(arg_uid) || arg_uid == 0)
+ if (asprintf((char**)(envp + n_env++), "USER=%s", arg_user ?: "root") < 0 ||
+ asprintf((char**)(envp + n_env++), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)
+ return log_oom();
+
+ assert(!sd_id128_is_null(arg_uuid));
+
+ if (asprintf((char**)(envp + n_env++), "container_uuid=%s", id128_to_uuid_string(arg_uuid, as_uuid)) < 0)
+ return log_oom();
+
+ if (fdset_size(fds) > 0) {
+ r = fdset_cloexec(fds, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to unset O_CLOEXEC for file descriptors.");
+
+ if ((asprintf((char **)(envp + n_env++), "LISTEN_FDS=%u", fdset_size(fds)) < 0) ||
+ (asprintf((char **)(envp + n_env++), "LISTEN_PID=1") < 0))
+ return log_oom();
+ }
+ if (asprintf((char **)(envp + n_env++), "NOTIFY_SOCKET=%s", NSPAWN_NOTIFY_SOCKET_PATH) < 0)
+ return log_oom();
+
+ if (arg_n_credentials > 0) {
+ envp[n_env] = strdup("CREDENTIALS_DIRECTORY=/run/host/credentials");
+ if (!envp[n_env])
+ return log_oom();
+ n_env++;
+ }
+
+ env_use = strv_env_merge(3, envp, os_release_pairs, arg_setenv);
+ if (!env_use)
+ return log_oom();
+
+ /* Let the parent know that we are ready and
+ * wait until the parent is ready with the
+ * setup, too... */
+ if (!barrier_place_and_sync(barrier)) /* #5 */
+ return log_error_errno(SYNTHETIC_ERRNO(ESRCH), "Parent died too early");
+
+ if (arg_chdir)
+ if (chdir(arg_chdir) < 0)
+ return log_error_errno(errno, "Failed to change to specified working directory %s: %m", arg_chdir);
+
+ if (arg_start_mode == START_PID2) {
+ r = stub_pid1(arg_uuid);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_console_mode != CONSOLE_PIPE) {
+ /* So far our pty wasn't controlled by any process. Finally, it's time to change that, if we
+ * are configured for that. Acquire it as controlling tty. */
+ if (ioctl(STDIN_FILENO, TIOCSCTTY) < 0)
+ return log_error_errno(errno, "Failed to acquire controlling TTY: %m");
+ }
+
+ log_debug("Inner child completed, invoking payload.");
+
+ /* Now, explicitly close the log, so that we then can close all remaining fds. Closing the log explicitly first
+ * has the benefit that the logging subsystem knows about it, and is thus ready to be reopened should we need
+ * it again. Note that the other fds closed here are at least the locking and barrier fds. */
+ log_close();
+ log_set_open_when_needed(true);
+
+ (void) fdset_close_others(fds);
+
+ if (arg_start_mode == START_BOOT) {
+ char **a;
+ size_t m;
+
+ /* Automatically search for the init system */
+
+ m = strv_length(arg_parameters);
+ a = newa(char*, m + 2);
+ memcpy_safe(a + 1, arg_parameters, m * sizeof(char*));
+ a[1 + m] = NULL;
+
+ a[0] = (char*) "/usr/lib/systemd/systemd";
+ execve(a[0], a, env_use);
+
+ a[0] = (char*) "/lib/systemd/systemd";
+ execve(a[0], a, env_use);
+
+ a[0] = (char*) "/sbin/init";
+ execve(a[0], a, env_use);
+
+ exec_target = "/usr/lib/systemd/systemd, /lib/systemd/systemd, /sbin/init";
+ } else if (!strv_isempty(arg_parameters)) {
+ const char *dollar_path;
+
+ exec_target = arg_parameters[0];
+
+ /* Use the user supplied search $PATH if there is one, or DEFAULT_PATH_COMPAT if not to search the
+ * binary. */
+ dollar_path = strv_env_get(env_use, "PATH");
+ if (dollar_path) {
+ if (setenv("PATH", dollar_path, 1) < 0)
+ return log_error_errno(errno, "Failed to update $PATH: %m");
+ }
+
+ execvpe(arg_parameters[0], arg_parameters, env_use);
+ } else {
+ if (!arg_chdir)
+ /* If we cannot change the directory, we'll end up in /, that is expected. */
+ (void) chdir(home ?: "/root");
+
+ execle("/bin/bash", "-bash", NULL, env_use);
+ execle("/bin/sh", "-sh", NULL, env_use);
+
+ exec_target = "/bin/bash, /bin/sh";
+ }
+
+ return log_error_errno(errno, "execv(%s) failed: %m", exec_target);
+}
+
+static int setup_notify_child(void) {
+ _cleanup_close_ int fd = -1;
+ union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = NSPAWN_NOTIFY_SOCKET_PATH,
+ };
+ int r;
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to allocate notification socket: %m");
+
+ (void) mkdir_parents(NSPAWN_NOTIFY_SOCKET_PATH, 0755);
+ (void) sockaddr_un_unlink(&sa.un);
+
+ r = bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ if (r < 0)
+ return log_error_errno(errno, "bind(" NSPAWN_NOTIFY_SOCKET_PATH ") failed: %m");
+
+ r = userns_lchown(NSPAWN_NOTIFY_SOCKET_PATH, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to chown " NSPAWN_NOTIFY_SOCKET_PATH ": %m");
+
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return log_error_errno(r, "SO_PASSCRED failed: %m");
+
+ return TAKE_FD(fd);
+}
+
+static int outer_child(
+ Barrier *barrier,
+ const char *directory,
+ DissectedImage *dissected_image,
+ bool secondary,
+ int pid_socket,
+ int uuid_socket,
+ int notify_socket,
+ int kmsg_socket,
+ int rtnl_socket,
+ int uid_shift_socket,
+ int master_pty_socket,
+ int unified_cgroup_hierarchy_socket,
+ FDSet *fds,
+ int netns_fd) {
+
+ _cleanup_strv_free_ char **os_release_pairs = NULL;
+ _cleanup_close_ int fd = -1;
+ const char *p;
+ pid_t pid;
+ ssize_t l;
+ int r;
+
+ /* This is the "outer" child process, i.e the one forked off by the container manager itself. It already has
+ * its own CLONE_NEWNS namespace (which was created by the clone()). It still lives in the host's CLONE_NEWPID,
+ * CLONE_NEWUTS, CLONE_NEWIPC, CLONE_NEWUSER and CLONE_NEWNET namespaces. After it completed a number of
+ * initializations a second child (the "inner" one) is forked off it, and it exits. */
+
+ assert(barrier);
+ assert(directory);
+ assert(pid_socket >= 0);
+ assert(uuid_socket >= 0);
+ assert(notify_socket >= 0);
+ assert(master_pty_socket >= 0);
+ assert(kmsg_socket >= 0);
+
+ log_debug("Outer child is initializing.");
+
+ r = load_os_release_pairs_with_prefix("/", "container_host_", &os_release_pairs);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read os-release from host for container, ignoring: %m");
+
+ if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
+ return log_error_errno(errno, "PR_SET_PDEATHSIG failed: %m");
+
+ r = reset_audit_loginuid();
+ if (r < 0)
+ return r;
+
+ /* Mark everything as slave, so that we still receive mounts from the real root, but don't propagate
+ * mounts to the real root. */
+ r = mount_follow_verbose(LOG_ERR, NULL, "/", NULL, MS_SLAVE|MS_REC, NULL);
+ if (r < 0)
+ return r;
+
+ if (dissected_image) {
+ /* If we are operating on a disk image, then mount its root directory now, but leave out the rest. We
+ * can read the UID shift from it if we need to. Further down we'll mount the rest, but then with the
+ * uid shift known. That way we can mount VFAT file systems shifted to the right place right away. This
+ * makes sure ESP partitions and userns are compatible. */
+
+ r = dissected_image_mount_and_warn(
+ dissected_image, directory, arg_uid_shift,
+ DISSECT_IMAGE_MOUNT_ROOT_ONLY|DISSECT_IMAGE_DISCARD_ON_LOOP|
+ (arg_read_only ? DISSECT_IMAGE_READ_ONLY : DISSECT_IMAGE_FSCK)|
+ (arg_start_mode == START_BOOT ? DISSECT_IMAGE_VALIDATE_OS : 0));
+ if (r < 0)
+ return r;
+ }
+
+ r = determine_uid_shift(directory);
+ if (r < 0)
+ return r;
+
+ if (arg_userns_mode != USER_NAMESPACE_NO) {
+ /* Let the parent know which UID shift we read from the image */
+ l = send(uid_shift_socket, &arg_uid_shift, sizeof(arg_uid_shift), MSG_NOSIGNAL);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to send UID shift: %m");
+ if (l != sizeof(arg_uid_shift))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Short write while sending UID shift.");
+
+ if (arg_userns_mode == USER_NAMESPACE_PICK) {
+ /* When we are supposed to pick the UID shift, the parent will check now whether the UID shift
+ * we just read from the image is available. If yes, it will send the UID shift back to us, if
+ * not it will pick a different one, and send it back to us. */
+
+ l = recv(uid_shift_socket, &arg_uid_shift, sizeof(arg_uid_shift), 0);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to recv UID shift: %m");
+ if (l != sizeof(arg_uid_shift))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Short read while receiving UID shift.");
+ }
+
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO,
+ "Selected user namespace base " UID_FMT " and range " UID_FMT ".", arg_uid_shift, arg_uid_range);
+ }
+
+ if (path_equal(directory, "/")) {
+ /* If the directory we shall boot is the host, let's operate on a bind mount at a different
+ * place, so that we can make changes to its mount structure (for example, to implement
+ * --volatile=) without this interfering with our ability to access files such as
+ * /etc/localtime to copy into the container. Note that we use a fixed place for this
+ * (instead of a temporary directory, since we are living in our own mount namspace here
+ * already, and thus don't need to be afraid of colliding with anyone else's mounts).*/
+ (void) mkdir_p("/run/systemd/nspawn-root", 0755);
+
+ r = mount_nofollow_verbose(LOG_ERR, "/", "/run/systemd/nspawn-root", NULL, MS_BIND|MS_REC, NULL);
+ if (r < 0)
+ return r;
+
+ directory = "/run/systemd/nspawn-root";
+ }
+
+ r = setup_pivot_root(
+ directory,
+ arg_pivot_root_new,
+ arg_pivot_root_old);
+ if (r < 0)
+ return r;
+
+ r = setup_volatile_mode(
+ directory,
+ arg_volatile_mode,
+ arg_uid_shift,
+ arg_selinux_apifs_context);
+ if (r < 0)
+ return r;
+
+ r = mount_custom(
+ directory,
+ arg_custom_mounts,
+ arg_n_custom_mounts,
+ arg_uid_shift,
+ arg_selinux_apifs_context,
+ MOUNT_ROOT_ONLY);
+ if (r < 0)
+ return r;
+
+ /* Make sure we always have a mount that we can move to root later on. */
+ if (!path_is_mount_point(directory, NULL, 0)) {
+ r = mount_nofollow_verbose(LOG_ERR, directory, directory, NULL, MS_BIND|MS_REC, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ if (dissected_image) {
+ /* Now we know the uid shift, let's now mount everything else that might be in the image. */
+ r = dissected_image_mount(dissected_image, directory, arg_uid_shift,
+ DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY|DISSECT_IMAGE_DISCARD_ON_LOOP|(arg_read_only ? DISSECT_IMAGE_READ_ONLY : DISSECT_IMAGE_FSCK));
+ if (r == -EUCLEAN)
+ return log_error_errno(r, "File system check for image failed: %m");
+ if (r < 0)
+ return log_error_errno(r, "Failed to mount image file system: %m");
+ }
+
+ if (arg_unified_cgroup_hierarchy == CGROUP_UNIFIED_UNKNOWN) {
+ /* OK, we don't know yet which cgroup mode to use yet. Let's figure it out, and tell the parent. */
+
+ r = detect_unified_cgroup_hierarchy_from_image(directory);
+ if (r < 0)
+ return r;
+
+ l = send(unified_cgroup_hierarchy_socket, &arg_unified_cgroup_hierarchy, sizeof(arg_unified_cgroup_hierarchy), MSG_NOSIGNAL);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to send cgroup mode: %m");
+ if (l != sizeof(arg_unified_cgroup_hierarchy))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Short write while sending cgroup mode.");
+
+ unified_cgroup_hierarchy_socket = safe_close(unified_cgroup_hierarchy_socket);
+ }
+
+ /* Mark everything as shared so our mounts get propagated down. This is
+ * required to make new bind mounts available in systemd services
+ * inside the container that create a new mount namespace.
+ * See https://github.com/systemd/systemd/issues/3860
+ * Further submounts (such as /dev) done after this will inherit the
+ * shared propagation mode.
+ *
+ * IMPORTANT: Do not overmount the root directory anymore from now on to
+ * enable moving the root directory mount to root later on.
+ * https://github.com/systemd/systemd/issues/3847#issuecomment-562735251
+ */
+ r = mount_nofollow_verbose(LOG_ERR, NULL, directory, NULL, MS_SHARED|MS_REC, NULL);
+ if (r < 0)
+ return r;
+
+ r = recursive_chown(directory, arg_uid_shift, arg_uid_range);
+ if (r < 0)
+ return r;
+
+ r = base_filesystem_create(directory, arg_uid_shift, (gid_t) arg_uid_shift);
+ if (r < 0)
+ return r;
+
+ if (arg_read_only && arg_volatile_mode == VOLATILE_NO &&
+ !has_custom_root_mount(arg_custom_mounts, arg_n_custom_mounts)) {
+ r = bind_remount_recursive(directory, MS_RDONLY, MS_RDONLY, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make tree read-only: %m");
+ }
+
+ r = mount_all(directory,
+ arg_mount_settings,
+ arg_uid_shift,
+ arg_selinux_apifs_context);
+ if (r < 0)
+ return r;
+
+ r = copy_devnodes(directory);
+ if (r < 0)
+ return r;
+
+ r = make_extra_nodes(directory);
+ if (r < 0)
+ return r;
+
+ (void) dev_setup(directory, arg_uid_shift, arg_uid_shift);
+
+ p = prefix_roota(directory, "/run/host");
+ (void) make_inaccessible_nodes(p, arg_uid_shift, arg_uid_shift);
+
+ r = setup_pts(directory);
+ if (r < 0)
+ return r;
+
+ r = setup_propagate(directory);
+ if (r < 0)
+ return r;
+
+ r = setup_keyring();
+ if (r < 0)
+ return r;
+
+ r = setup_credentials(directory);
+ if (r < 0)
+ return r;
+
+ r = mount_custom(
+ directory,
+ arg_custom_mounts,
+ arg_n_custom_mounts,
+ arg_uid_shift,
+ arg_selinux_apifs_context,
+ MOUNT_NON_ROOT_ONLY);
+ if (r < 0)
+ return r;
+
+ r = setup_timezone(directory);
+ if (r < 0)
+ return r;
+
+ r = setup_resolv_conf(directory);
+ if (r < 0)
+ return r;
+
+ r = setup_machine_id(directory);
+ if (r < 0)
+ return r;
+
+ r = setup_journal(directory);
+ if (r < 0)
+ return r;
+
+ /* The same stuff as the $container env var, but nicely readable for the entire payload */
+ p = prefix_roota(directory, "/run/host/container-manager");
+ (void) write_string_file(p, arg_container_service_name, WRITE_STRING_FILE_CREATE);
+
+ /* The same stuff as the $container_uuid env var */
+ p = prefix_roota(directory, "/run/host/container-uuid");
+ (void) write_string_filef(p, WRITE_STRING_FILE_CREATE, SD_ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(arg_uuid));
+
+ if (!arg_use_cgns) {
+ r = mount_cgroups(
+ directory,
+ arg_unified_cgroup_hierarchy,
+ arg_userns_mode != USER_NAMESPACE_NO,
+ arg_uid_shift,
+ arg_uid_range,
+ arg_selinux_apifs_context,
+ false);
+ if (r < 0)
+ return r;
+ }
+
+ r = mount_move_root(directory);
+ if (r < 0)
+ return log_error_errno(r, "Failed to move root directory: %m");
+
+ fd = setup_notify_child();
+ if (fd < 0)
+ return fd;
+
+ pid = raw_clone(SIGCHLD|CLONE_NEWNS|
+ arg_clone_ns_flags |
+ (arg_userns_mode != USER_NAMESPACE_NO ? CLONE_NEWUSER : 0));
+ if (pid < 0)
+ return log_error_errno(errno, "Failed to fork inner child: %m");
+ if (pid == 0) {
+ pid_socket = safe_close(pid_socket);
+ uuid_socket = safe_close(uuid_socket);
+ notify_socket = safe_close(notify_socket);
+ uid_shift_socket = safe_close(uid_shift_socket);
+
+ /* The inner child has all namespaces that are requested, so that we all are owned by the
+ * user if user namespaces are turned on. */
+
+ if (arg_network_namespace_path) {
+ r = namespace_enter(-1, -1, netns_fd, -1, -1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to join network namespace: %m");
+ }
+
+ r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, master_pty_socket, fds, os_release_pairs);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ l = send(pid_socket, &pid, sizeof(pid), MSG_NOSIGNAL);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to send PID: %m");
+ if (l != sizeof(pid))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Short write while sending PID.");
+
+ l = send(uuid_socket, &arg_uuid, sizeof(arg_uuid), MSG_NOSIGNAL);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to send machine ID: %m");
+ if (l != sizeof(arg_uuid))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Short write while sending machine ID.");
+
+ l = send_one_fd(notify_socket, fd, 0);
+ if (l < 0)
+ return log_error_errno(l, "Failed to send notify fd: %m");
+
+ pid_socket = safe_close(pid_socket);
+ uuid_socket = safe_close(uuid_socket);
+ notify_socket = safe_close(notify_socket);
+ master_pty_socket = safe_close(master_pty_socket);
+ kmsg_socket = safe_close(kmsg_socket);
+ rtnl_socket = safe_close(rtnl_socket);
+ netns_fd = safe_close(netns_fd);
+
+ return 0;
+}
+
+static int uid_shift_pick(uid_t *shift, LockFile *ret_lock_file) {
+ bool tried_hashed = false;
+ unsigned n_tries = 100;
+ uid_t candidate;
+ int r;
+
+ assert(shift);
+ assert(ret_lock_file);
+ assert(arg_userns_mode == USER_NAMESPACE_PICK);
+ assert(arg_uid_range == 0x10000U);
+
+ candidate = *shift;
+
+ (void) mkdir("/run/systemd/nspawn-uid", 0755);
+
+ for (;;) {
+ char lock_path[STRLEN("/run/systemd/nspawn-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
+ _cleanup_(release_lock_file) LockFile lf = LOCK_FILE_INIT;
+
+ if (--n_tries <= 0)
+ return -EBUSY;
+
+ if (candidate < CONTAINER_UID_BASE_MIN || candidate > CONTAINER_UID_BASE_MAX)
+ goto next;
+ if ((candidate & UINT32_C(0xFFFF)) != 0)
+ goto next;
+
+ xsprintf(lock_path, "/run/systemd/nspawn-uid/" UID_FMT, candidate);
+ r = make_lock_file(lock_path, LOCK_EX|LOCK_NB, &lf);
+ if (r == -EBUSY) /* Range already taken by another nspawn instance */
+ goto next;
+ if (r < 0)
+ return r;
+
+ /* Make some superficial checks whether the range is currently known in the user database */
+ if (getpwuid(candidate))
+ goto next;
+ if (getpwuid(candidate + UINT32_C(0xFFFE)))
+ goto next;
+ if (getgrgid(candidate))
+ goto next;
+ if (getgrgid(candidate + UINT32_C(0xFFFE)))
+ goto next;
+
+ *ret_lock_file = lf;
+ lf = (struct LockFile) LOCK_FILE_INIT;
+ *shift = candidate;
+ return 0;
+
+ next:
+ if (arg_machine && !tried_hashed) {
+ /* Try to hash the base from the container name */
+
+ static const uint8_t hash_key[] = {
+ 0xe1, 0x56, 0xe0, 0xf0, 0x4a, 0xf0, 0x41, 0xaf,
+ 0x96, 0x41, 0xcf, 0x41, 0x33, 0x94, 0xff, 0x72
+ };
+
+ candidate = (uid_t) siphash24(arg_machine, strlen(arg_machine), hash_key);
+
+ tried_hashed = true;
+ } else
+ random_bytes(&candidate, sizeof(candidate));
+
+ candidate = (candidate % (CONTAINER_UID_BASE_MAX - CONTAINER_UID_BASE_MIN)) + CONTAINER_UID_BASE_MIN;
+ candidate &= (uid_t) UINT32_C(0xFFFF0000);
+ }
+}
+
+static int setup_uid_map(pid_t pid) {
+ char uid_map[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(uid_t) + 1], line[DECIMAL_STR_MAX(uid_t)*3+3+1];
+ int r;
+
+ assert(pid > 1);
+
+ xsprintf(uid_map, "/proc/" PID_FMT "/uid_map", pid);
+ xsprintf(line, UID_FMT " " UID_FMT " " UID_FMT "\n", 0, arg_uid_shift, arg_uid_range);
+ r = write_string_file(uid_map, line, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write UID map: %m");
+
+ /* We always assign the same UID and GID ranges */
+ xsprintf(uid_map, "/proc/" PID_FMT "/gid_map", pid);
+ r = write_string_file(uid_map, line, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write GID map: %m");
+
+ return 0;
+}
+
+static int nspawn_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ char buf[NOTIFY_BUFFER_MAX+1];
+ char *p = NULL;
+ struct iovec iovec = {
+ .iov_base = buf,
+ .iov_len = sizeof(buf)-1,
+ };
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(int) * NOTIFY_FD_MAX)) control;
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ struct ucred *ucred;
+ ssize_t n;
+ pid_t inner_child_pid;
+ _cleanup_strv_free_ char **tags = NULL;
+
+ assert(userdata);
+
+ inner_child_pid = PTR_TO_PID(userdata);
+
+ if (revents != EPOLLIN) {
+ log_warning("Got unexpected poll event for notify fd.");
+ return 0;
+ }
+
+ n = recvmsg_safe(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ if (IN_SET(n, -EAGAIN, -EINTR))
+ return 0;
+ if (n == -EXFULL) {
+ log_warning("Got message with truncated control data (too many fds sent?), ignoring.");
+ return 0;
+ }
+ if (n < 0)
+ return log_warning_errno(n, "Couldn't read notification socket: %m");
+
+ cmsg_close_all(&msghdr);
+
+ ucred = CMSG_FIND_DATA(&msghdr, SOL_SOCKET, SCM_CREDENTIALS, struct ucred);
+ if (!ucred || ucred->pid != inner_child_pid) {
+ log_debug("Received notify message without valid credentials. Ignoring.");
+ return 0;
+ }
+
+ if ((size_t) n >= sizeof(buf)) {
+ log_warning("Received notify message exceeded maximum size. Ignoring.");
+ return 0;
+ }
+
+ buf[n] = 0;
+ tags = strv_split(buf, "\n\r");
+ if (!tags)
+ return log_oom();
+
+ if (strv_find(tags, "READY=1"))
+ (void) sd_notifyf(false, "READY=1\n");
+
+ p = strv_find_startswith(tags, "STATUS=");
+ if (p)
+ (void) sd_notifyf(false, "STATUS=Container running: %s", p);
+
+ return 0;
+}
+
+static int setup_notify_parent(sd_event *event, int fd, pid_t *inner_child_pid, sd_event_source **notify_event_source) {
+ int r;
+
+ r = sd_event_add_io(event, notify_event_source, fd, EPOLLIN, nspawn_dispatch_notify_fd, inner_child_pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate notify event source: %m");
+
+ (void) sd_event_source_set_description(*notify_event_source, "nspawn-notify");
+
+ return 0;
+}
+
+static int merge_settings(Settings *settings, const char *path) {
+ int rl;
+
+ assert(settings);
+ assert(path);
+
+ /* Copy over bits from the settings, unless they have been explicitly masked by command line switches. Note
+ * that this steals the fields of the Settings* structure, and hence modifies it. */
+
+ if ((arg_settings_mask & SETTING_START_MODE) == 0 &&
+ settings->start_mode >= 0) {
+ arg_start_mode = settings->start_mode;
+ strv_free_and_replace(arg_parameters, settings->parameters);
+ }
+
+ if ((arg_settings_mask & SETTING_EPHEMERAL) == 0)
+ arg_ephemeral = settings->ephemeral;
+
+ if ((arg_settings_mask & SETTING_DIRECTORY) == 0 &&
+ settings->root) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring root directory setting, file %s is not trusted.", path);
+ else
+ free_and_replace(arg_directory, settings->root);
+ }
+
+ if ((arg_settings_mask & SETTING_PIVOT_ROOT) == 0 &&
+ settings->pivot_root_new) {
+ free_and_replace(arg_pivot_root_new, settings->pivot_root_new);
+ free_and_replace(arg_pivot_root_old, settings->pivot_root_old);
+ }
+
+ if ((arg_settings_mask & SETTING_WORKING_DIRECTORY) == 0 &&
+ settings->working_directory)
+ free_and_replace(arg_chdir, settings->working_directory);
+
+ if ((arg_settings_mask & SETTING_ENVIRONMENT) == 0 &&
+ settings->environment)
+ strv_free_and_replace(arg_setenv, settings->environment);
+
+ if ((arg_settings_mask & SETTING_USER) == 0) {
+
+ if (settings->user)
+ free_and_replace(arg_user, settings->user);
+
+ if (uid_is_valid(settings->uid))
+ arg_uid = settings->uid;
+ if (gid_is_valid(settings->gid))
+ arg_gid = settings->gid;
+ if (settings->n_supplementary_gids > 0) {
+ free_and_replace(arg_supplementary_gids, settings->supplementary_gids);
+ arg_n_supplementary_gids = settings->n_supplementary_gids;
+ }
+ }
+
+ if ((arg_settings_mask & SETTING_CAPABILITY) == 0) {
+ uint64_t plus, minus;
+ uint64_t network_minus = 0;
+
+ /* Note that we copy both the simple plus/minus caps here, and the full quintet from the
+ * Settings structure */
+
+ plus = settings->capability;
+ minus = settings->drop_capability;
+
+ if ((arg_settings_mask & SETTING_NETWORK) == 0) {
+ if (settings_private_network(settings))
+ plus |= UINT64_C(1) << CAP_NET_ADMIN;
+ else
+ network_minus |= UINT64_C(1) << CAP_NET_ADMIN;
+ }
+
+ if (!arg_settings_trusted && plus != 0) {
+ if (settings->capability != 0)
+ log_warning("Ignoring Capability= setting, file %s is not trusted.", path);
+ } else {
+ arg_caps_retain &= ~network_minus;
+ arg_caps_retain |= plus;
+ }
+
+ arg_caps_retain &= ~minus;
+
+ /* Copy the full capabilities over too */
+ if (capability_quintet_is_set(&settings->full_capabilities)) {
+ if (!arg_settings_trusted)
+ log_warning("Ignoring capability settings, file %s is not trusted.", path);
+ else
+ arg_full_capabilities = settings->full_capabilities;
+ }
+ }
+
+ if ((arg_settings_mask & SETTING_KILL_SIGNAL) == 0 &&
+ settings->kill_signal > 0)
+ arg_kill_signal = settings->kill_signal;
+
+ if ((arg_settings_mask & SETTING_PERSONALITY) == 0 &&
+ settings->personality != PERSONALITY_INVALID)
+ arg_personality = settings->personality;
+
+ if ((arg_settings_mask & SETTING_MACHINE_ID) == 0 &&
+ !sd_id128_is_null(settings->machine_id)) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring MachineID= setting, file %s is not trusted.", path);
+ else
+ arg_uuid = settings->machine_id;
+ }
+
+ if ((arg_settings_mask & SETTING_READ_ONLY) == 0 &&
+ settings->read_only >= 0)
+ arg_read_only = settings->read_only;
+
+ if ((arg_settings_mask & SETTING_VOLATILE_MODE) == 0 &&
+ settings->volatile_mode != _VOLATILE_MODE_INVALID)
+ arg_volatile_mode = settings->volatile_mode;
+
+ if ((arg_settings_mask & SETTING_CUSTOM_MOUNTS) == 0 &&
+ settings->n_custom_mounts > 0) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring TemporaryFileSystem=, Bind= and BindReadOnly= settings, file %s is not trusted.", path);
+ else {
+ custom_mount_free_all(arg_custom_mounts, arg_n_custom_mounts);
+ arg_custom_mounts = TAKE_PTR(settings->custom_mounts);
+ arg_n_custom_mounts = settings->n_custom_mounts;
+ settings->n_custom_mounts = 0;
+ }
+ }
+
+ if ((arg_settings_mask & SETTING_NETWORK) == 0 &&
+ (settings->private_network >= 0 ||
+ settings->network_veth >= 0 ||
+ settings->network_bridge ||
+ settings->network_zone ||
+ settings->network_interfaces ||
+ settings->network_macvlan ||
+ settings->network_ipvlan ||
+ settings->network_veth_extra ||
+ settings->network_namespace_path)) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring network settings, file %s is not trusted.", path);
+ else {
+ arg_network_veth = settings_network_veth(settings);
+ arg_private_network = settings_private_network(settings);
+
+ strv_free_and_replace(arg_network_interfaces, settings->network_interfaces);
+ strv_free_and_replace(arg_network_macvlan, settings->network_macvlan);
+ strv_free_and_replace(arg_network_ipvlan, settings->network_ipvlan);
+ strv_free_and_replace(arg_network_veth_extra, settings->network_veth_extra);
+
+ free_and_replace(arg_network_bridge, settings->network_bridge);
+ free_and_replace(arg_network_zone, settings->network_zone);
+
+ free_and_replace(arg_network_namespace_path, settings->network_namespace_path);
+ }
+ }
+
+ if ((arg_settings_mask & SETTING_EXPOSE_PORTS) == 0 &&
+ settings->expose_ports) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring Port= setting, file %s is not trusted.", path);
+ else {
+ expose_port_free_all(arg_expose_ports);
+ arg_expose_ports = TAKE_PTR(settings->expose_ports);
+ }
+ }
+
+ if ((arg_settings_mask & SETTING_USERNS) == 0 &&
+ settings->userns_mode != _USER_NAMESPACE_MODE_INVALID) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring PrivateUsers= and PrivateUsersChown= settings, file %s is not trusted.", path);
+ else {
+ arg_userns_mode = settings->userns_mode;
+ arg_uid_shift = settings->uid_shift;
+ arg_uid_range = settings->uid_range;
+ arg_userns_chown = settings->userns_chown;
+ }
+ }
+
+ if ((arg_settings_mask & SETTING_NOTIFY_READY) == 0)
+ arg_notify_ready = settings->notify_ready;
+
+ if ((arg_settings_mask & SETTING_SYSCALL_FILTER) == 0) {
+
+ if (!arg_settings_trusted && !strv_isempty(settings->syscall_allow_list))
+ log_warning("Ignoring SystemCallFilter= settings, file %s is not trusted.", path);
+ else {
+ strv_free_and_replace(arg_syscall_allow_list, settings->syscall_allow_list);
+ strv_free_and_replace(arg_syscall_deny_list, settings->syscall_deny_list);
+ }
+
+#if HAVE_SECCOMP
+ if (!arg_settings_trusted && settings->seccomp)
+ log_warning("Ignoring SECCOMP filter, file %s is not trusted.", path);
+ else {
+ seccomp_release(arg_seccomp);
+ arg_seccomp = TAKE_PTR(settings->seccomp);
+ }
+#endif
+ }
+
+ for (rl = 0; rl < _RLIMIT_MAX; rl ++) {
+ if ((arg_settings_mask & (SETTING_RLIMIT_FIRST << rl)))
+ continue;
+
+ if (!settings->rlimit[rl])
+ continue;
+
+ if (!arg_settings_trusted) {
+ log_warning("Ignoring Limit%s= setting, file '%s' is not trusted.", rlimit_to_string(rl), path);
+ continue;
+ }
+
+ free_and_replace(arg_rlimit[rl], settings->rlimit[rl]);
+ }
+
+ if ((arg_settings_mask & SETTING_HOSTNAME) == 0 &&
+ settings->hostname)
+ free_and_replace(arg_hostname, settings->hostname);
+
+ if ((arg_settings_mask & SETTING_NO_NEW_PRIVILEGES) == 0 &&
+ settings->no_new_privileges >= 0)
+ arg_no_new_privileges = settings->no_new_privileges;
+
+ if ((arg_settings_mask & SETTING_OOM_SCORE_ADJUST) == 0 &&
+ settings->oom_score_adjust_set) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring OOMScoreAdjust= setting, file '%s' is not trusted.", path);
+ else {
+ arg_oom_score_adjust = settings->oom_score_adjust;
+ arg_oom_score_adjust_set = true;
+ }
+ }
+
+ if ((arg_settings_mask & SETTING_CPU_AFFINITY) == 0 &&
+ settings->cpu_set.set) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring CPUAffinity= setting, file '%s' is not trusted.", path);
+ else {
+ cpu_set_reset(&arg_cpu_set);
+ arg_cpu_set = settings->cpu_set;
+ settings->cpu_set = (CPUSet) {};
+ }
+ }
+
+ if ((arg_settings_mask & SETTING_RESOLV_CONF) == 0 &&
+ settings->resolv_conf != _RESOLV_CONF_MODE_INVALID)
+ arg_resolv_conf = settings->resolv_conf;
+
+ if ((arg_settings_mask & SETTING_LINK_JOURNAL) == 0 &&
+ settings->link_journal != _LINK_JOURNAL_INVALID) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring journal link setting, file '%s' is not trusted.", path);
+ else {
+ arg_link_journal = settings->link_journal;
+ arg_link_journal_try = settings->link_journal_try;
+ }
+ }
+
+ if ((arg_settings_mask & SETTING_TIMEZONE) == 0 &&
+ settings->timezone != _TIMEZONE_MODE_INVALID)
+ arg_timezone = settings->timezone;
+
+ if ((arg_settings_mask & SETTING_SLICE) == 0 &&
+ settings->slice) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring slice setting, file '%s' is not trusted.", path);
+ else
+ free_and_replace(arg_slice, settings->slice);
+ }
+
+ if ((arg_settings_mask & SETTING_USE_CGNS) == 0 &&
+ settings->use_cgns >= 0) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring cgroup namespace setting, file '%s' is not trusted.", path);
+ else
+ arg_use_cgns = settings->use_cgns;
+ }
+
+ if ((arg_settings_mask & SETTING_CLONE_NS_FLAGS) == 0 &&
+ settings->clone_ns_flags != (unsigned long) -1) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring namespace setting, file '%s' is not trusted.", path);
+ else
+ arg_clone_ns_flags = settings->clone_ns_flags;
+ }
+
+ if ((arg_settings_mask & SETTING_CONSOLE_MODE) == 0 &&
+ settings->console_mode >= 0) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring console mode setting, file '%s' is not trusted.", path);
+ else
+ arg_console_mode = settings->console_mode;
+ }
+
+ /* The following properties can only be set through the OCI settings logic, not from the command line, hence we
+ * don't consult arg_settings_mask for them. */
+
+ sd_bus_message_unref(arg_property_message);
+ arg_property_message = TAKE_PTR(settings->properties);
+
+ arg_console_width = settings->console_width;
+ arg_console_height = settings->console_height;
+
+ device_node_array_free(arg_extra_nodes, arg_n_extra_nodes);
+ arg_extra_nodes = TAKE_PTR(settings->extra_nodes);
+ arg_n_extra_nodes = settings->n_extra_nodes;
+
+ return 0;
+}
+
+static int load_settings(void) {
+ _cleanup_(settings_freep) Settings *settings = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ const char *fn, *i;
+ int r;
+
+ if (arg_oci_bundle)
+ return 0;
+
+ /* If all settings are masked, there's no point in looking for
+ * the settings file */
+ if (FLAGS_SET(arg_settings_mask, _SETTINGS_MASK_ALL))
+ return 0;
+
+ fn = strjoina(arg_machine, ".nspawn");
+
+ /* We first look in the admin's directories in /etc and /run */
+ FOREACH_STRING(i, "/etc/systemd/nspawn", "/run/systemd/nspawn") {
+ _cleanup_free_ char *j = NULL;
+
+ j = path_join(i, fn);
+ if (!j)
+ return log_oom();
+
+ f = fopen(j, "re");
+ if (f) {
+ p = TAKE_PTR(j);
+
+ /* By default, we trust configuration from /etc and /run */
+ if (arg_settings_trusted < 0)
+ arg_settings_trusted = true;
+
+ break;
+ }
+
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to open %s: %m", j);
+ }
+
+ if (!f) {
+ /* After that, let's look for a file next to the
+ * actual image we shall boot. */
+
+ if (arg_image) {
+ p = file_in_same_dir(arg_image, fn);
+ if (!p)
+ return log_oom();
+ } else if (arg_directory && !path_equal(arg_directory, "/")) {
+ p = file_in_same_dir(arg_directory, fn);
+ if (!p)
+ return log_oom();
+ }
+
+ if (p) {
+ f = fopen(p, "re");
+ if (!f && errno != ENOENT)
+ return log_error_errno(errno, "Failed to open %s: %m", p);
+
+ /* By default, we do not trust configuration from /var/lib/machines */
+ if (arg_settings_trusted < 0)
+ arg_settings_trusted = false;
+ }
+ }
+
+ if (!f)
+ return 0;
+
+ log_debug("Settings are trusted: %s", yes_no(arg_settings_trusted));
+
+ r = settings_load(f, p, &settings);
+ if (r < 0)
+ return r;
+
+ return merge_settings(settings, p);
+}
+
+static int load_oci_bundle(void) {
+ _cleanup_(settings_freep) Settings *settings = NULL;
+ int r;
+
+ if (!arg_oci_bundle)
+ return 0;
+
+ /* By default let's trust OCI bundles */
+ if (arg_settings_trusted < 0)
+ arg_settings_trusted = true;
+
+ r = oci_load(NULL, arg_oci_bundle, &settings);
+ if (r < 0)
+ return r;
+
+ return merge_settings(settings, arg_oci_bundle);
+}
+
+static int run_container(
+ DissectedImage *dissected_image,
+ bool secondary,
+ FDSet *fds,
+ char veth_name[IFNAMSIZ], bool *veth_created,
+ union in_addr_union *exposed,
+ int *master, pid_t *pid, int *ret) {
+
+ static const struct sigaction sa = {
+ .sa_handler = nop_signal_handler,
+ .sa_flags = SA_NOCLDSTOP|SA_RESTART,
+ };
+
+ _cleanup_(release_lock_file) LockFile uid_shift_lock = LOCK_FILE_INIT;
+ _cleanup_close_ int etc_passwd_lock = -1;
+ _cleanup_close_pair_ int
+ kmsg_socket_pair[2] = { -1, -1 },
+ rtnl_socket_pair[2] = { -1, -1 },
+ pid_socket_pair[2] = { -1, -1 },
+ uuid_socket_pair[2] = { -1, -1 },
+ notify_socket_pair[2] = { -1, -1 },
+ uid_shift_socket_pair[2] = { -1, -1 },
+ master_pty_socket_pair[2] = { -1, -1 },
+ unified_cgroup_hierarchy_socket_pair[2] = { -1, -1};
+
+ _cleanup_close_ int notify_socket = -1;
+ _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
+ _cleanup_(sd_event_source_unrefp) sd_event_source *notify_event_source = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_(pty_forward_freep) PTYForward *forward = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ ContainerStatus container_status = 0;
+ int ifi = 0, r;
+ ssize_t l;
+ sigset_t mask_chld;
+ _cleanup_close_ int child_netns_fd = -1;
+
+ assert_se(sigemptyset(&mask_chld) == 0);
+ assert_se(sigaddset(&mask_chld, SIGCHLD) == 0);
+
+ if (arg_userns_mode == USER_NAMESPACE_PICK) {
+ /* When we shall pick the UID/GID range, let's first lock /etc/passwd, so that we can safely
+ * check with getpwuid() if the specific user already exists. Note that /etc might be
+ * read-only, in which case this will fail with EROFS. But that's really OK, as in that case we
+ * can be reasonably sure that no users are going to be added. Note that getpwuid() checks are
+ * really just an extra safety net. We kinda assume that the UID range we allocate from is
+ * really ours. */
+
+ etc_passwd_lock = take_etc_passwd_lock(NULL);
+ if (etc_passwd_lock < 0 && etc_passwd_lock != -EROFS)
+ return log_error_errno(etc_passwd_lock, "Failed to take /etc/passwd lock: %m");
+ }
+
+ r = barrier_create(&barrier);
+ if (r < 0)
+ return log_error_errno(r, "Cannot initialize IPC barrier: %m");
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0)
+ return log_error_errno(errno, "Failed to create kmsg socket pair: %m");
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, rtnl_socket_pair) < 0)
+ return log_error_errno(errno, "Failed to create rtnl socket pair: %m");
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pid_socket_pair) < 0)
+ return log_error_errno(errno, "Failed to create pid socket pair: %m");
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, uuid_socket_pair) < 0)
+ return log_error_errno(errno, "Failed to create id socket pair: %m");
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, notify_socket_pair) < 0)
+ return log_error_errno(errno, "Failed to create notify socket pair: %m");
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, master_pty_socket_pair) < 0)
+ return log_error_errno(errno, "Failed to create console socket pair: %m");
+
+ if (arg_userns_mode != USER_NAMESPACE_NO)
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, uid_shift_socket_pair) < 0)
+ return log_error_errno(errno, "Failed to create uid shift socket pair: %m");
+
+ if (arg_unified_cgroup_hierarchy == CGROUP_UNIFIED_UNKNOWN)
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, unified_cgroup_hierarchy_socket_pair) < 0)
+ return log_error_errno(errno, "Failed to create unified cgroup socket pair: %m");
+
+ /* Child can be killed before execv(), so handle SIGCHLD in order to interrupt
+ * parent's blocking calls and give it a chance to call wait() and terminate. */
+ r = sigprocmask(SIG_UNBLOCK, &mask_chld, NULL);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to change the signal mask: %m");
+
+ r = sigaction(SIGCHLD, &sa, NULL);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to install SIGCHLD handler: %m");
+
+ if (arg_network_namespace_path) {
+ child_netns_fd = open(arg_network_namespace_path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (child_netns_fd < 0)
+ return log_error_errno(errno, "Cannot open file %s: %m", arg_network_namespace_path);
+
+ r = fd_is_network_ns(child_netns_fd);
+ if (r == -EUCLEAN)
+ log_debug_errno(r, "Cannot determine if passed network namespace path '%s' really refers to a network namespace, assuming it does.", arg_network_namespace_path);
+ else if (r < 0)
+ return log_error_errno(r, "Failed to check %s fs type: %m", arg_network_namespace_path);
+ else if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path %s doesn't refer to a network namespace, refusing.", arg_network_namespace_path);
+ }
+
+ *pid = raw_clone(SIGCHLD|CLONE_NEWNS);
+ if (*pid < 0)
+ return log_error_errno(errno, "clone() failed%s: %m",
+ errno == EINVAL ?
+ ", do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in)" : "");
+
+ if (*pid == 0) {
+ /* The outer child only has a file system namespace. */
+ barrier_set_role(&barrier, BARRIER_CHILD);
+
+ kmsg_socket_pair[0] = safe_close(kmsg_socket_pair[0]);
+ rtnl_socket_pair[0] = safe_close(rtnl_socket_pair[0]);
+ pid_socket_pair[0] = safe_close(pid_socket_pair[0]);
+ uuid_socket_pair[0] = safe_close(uuid_socket_pair[0]);
+ notify_socket_pair[0] = safe_close(notify_socket_pair[0]);
+ master_pty_socket_pair[0] = safe_close(master_pty_socket_pair[0]);
+ uid_shift_socket_pair[0] = safe_close(uid_shift_socket_pair[0]);
+ unified_cgroup_hierarchy_socket_pair[0] = safe_close(unified_cgroup_hierarchy_socket_pair[0]);
+
+ (void) reset_all_signal_handlers();
+ (void) reset_signal_mask();
+
+ r = outer_child(&barrier,
+ arg_directory,
+ dissected_image,
+ secondary,
+ pid_socket_pair[1],
+ uuid_socket_pair[1],
+ notify_socket_pair[1],
+ kmsg_socket_pair[1],
+ rtnl_socket_pair[1],
+ uid_shift_socket_pair[1],
+ master_pty_socket_pair[1],
+ unified_cgroup_hierarchy_socket_pair[1],
+ fds,
+ child_netns_fd);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ barrier_set_role(&barrier, BARRIER_PARENT);
+
+ fdset_close(fds);
+
+ kmsg_socket_pair[1] = safe_close(kmsg_socket_pair[1]);
+ rtnl_socket_pair[1] = safe_close(rtnl_socket_pair[1]);
+ pid_socket_pair[1] = safe_close(pid_socket_pair[1]);
+ uuid_socket_pair[1] = safe_close(uuid_socket_pair[1]);
+ notify_socket_pair[1] = safe_close(notify_socket_pair[1]);
+ master_pty_socket_pair[1] = safe_close(master_pty_socket_pair[1]);
+ uid_shift_socket_pair[1] = safe_close(uid_shift_socket_pair[1]);
+ unified_cgroup_hierarchy_socket_pair[1] = safe_close(unified_cgroup_hierarchy_socket_pair[1]);
+
+ if (arg_userns_mode != USER_NAMESPACE_NO) {
+ /* The child just let us know the UID shift it might have read from the image. */
+ l = recv(uid_shift_socket_pair[0], &arg_uid_shift, sizeof arg_uid_shift, 0);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to read UID shift: %m");
+ if (l != sizeof arg_uid_shift)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading UID shift.");
+
+ if (arg_userns_mode == USER_NAMESPACE_PICK) {
+ /* If we are supposed to pick the UID shift, let's try to use the shift read from the
+ * image, but if that's already in use, pick a new one, and report back to the child,
+ * which one we now picked. */
+
+ r = uid_shift_pick(&arg_uid_shift, &uid_shift_lock);
+ if (r < 0)
+ return log_error_errno(r, "Failed to pick suitable UID/GID range: %m");
+
+ l = send(uid_shift_socket_pair[0], &arg_uid_shift, sizeof arg_uid_shift, MSG_NOSIGNAL);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to send UID shift: %m");
+ if (l != sizeof arg_uid_shift)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short write while writing UID shift.");
+ }
+ }
+
+ if (arg_unified_cgroup_hierarchy == CGROUP_UNIFIED_UNKNOWN) {
+ /* The child let us know the support cgroup mode it might have read from the image. */
+ l = recv(unified_cgroup_hierarchy_socket_pair[0], &arg_unified_cgroup_hierarchy, sizeof(arg_unified_cgroup_hierarchy), 0);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to read cgroup mode: %m");
+ if (l != sizeof(arg_unified_cgroup_hierarchy))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading cgroup mode (%zu bytes).%s",
+ l, l == 0 ? " The child is most likely dead." : "");
+ }
+
+ /* Wait for the outer child. */
+ r = wait_for_terminate_and_check("(sd-namespace)", *pid, WAIT_LOG_ABNORMAL);
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS)
+ return -EIO;
+
+ /* And now retrieve the PID of the inner child. */
+ l = recv(pid_socket_pair[0], pid, sizeof *pid, 0);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to read inner child PID: %m");
+ if (l != sizeof *pid)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading inner child PID.");
+
+ /* We also retrieve container UUID in case it was generated by outer child */
+ l = recv(uuid_socket_pair[0], &arg_uuid, sizeof arg_uuid, 0);
+ if (l < 0)
+ return log_error_errno(errno, "Failed to read container machine ID: %m");
+ if (l != sizeof(arg_uuid))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading container machined ID.");
+
+ /* We also retrieve the socket used for notifications generated by outer child */
+ notify_socket = receive_one_fd(notify_socket_pair[0], 0);
+ if (notify_socket < 0)
+ return log_error_errno(notify_socket,
+ "Failed to receive notification socket from the outer child: %m");
+
+ log_debug("Init process invoked as PID "PID_FMT, *pid);
+
+ if (arg_userns_mode != USER_NAMESPACE_NO) {
+ if (!barrier_place_and_sync(&barrier)) /* #1 */
+ return log_error_errno(SYNTHETIC_ERRNO(ESRCH), "Child died too early.");
+
+ r = setup_uid_map(*pid);
+ if (r < 0)
+ return r;
+
+ (void) barrier_place(&barrier); /* #2 */
+ }
+
+ if (arg_private_network) {
+ if (!arg_network_namespace_path) {
+ /* Wait until the child has unshared its network namespace. */
+ if (!barrier_place_and_sync(&barrier)) /* #3 */
+ return log_error_errno(SYNTHETIC_ERRNO(ESRCH), "Child died too early");
+ }
+
+ if (child_netns_fd < 0) {
+ /* Make sure we have an open file descriptor to the child's network
+ * namespace so it stays alive even if the child exits. */
+ r = namespace_open(*pid, NULL, NULL, &child_netns_fd, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open child network namespace: %m");
+ }
+
+ r = move_network_interfaces(child_netns_fd, arg_network_interfaces);
+ if (r < 0)
+ return r;
+
+ if (arg_network_veth) {
+ r = setup_veth(arg_machine, *pid, veth_name,
+ arg_network_bridge || arg_network_zone);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ ifi = r;
+
+ if (arg_network_bridge) {
+ /* Add the interface to a bridge */
+ r = setup_bridge(veth_name, arg_network_bridge, false);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ ifi = r;
+ } else if (arg_network_zone) {
+ /* Add the interface to a bridge, possibly creating it */
+ r = setup_bridge(veth_name, arg_network_zone, true);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ ifi = r;
+ }
+ }
+
+ r = setup_veth_extra(arg_machine, *pid, arg_network_veth_extra);
+ if (r < 0)
+ return r;
+
+ /* We created the primary and extra veth links now; let's remember this, so that we know to
+ remove them later on. Note that we don't bother with removing veth links that were created
+ here when their setup failed half-way, because in that case the kernel should be able to
+ remove them on its own, since they cannot be referenced by anything yet. */
+ *veth_created = true;
+
+ r = setup_macvlan(arg_machine, *pid, arg_network_macvlan);
+ if (r < 0)
+ return r;
+
+ r = setup_ipvlan(arg_machine, *pid, arg_network_ipvlan);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_register || !arg_keep_unit) {
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open system bus: %m");
+
+ r = sd_bus_set_close_on_exit(bus, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to disable close-on-exit behaviour: %m");
+ }
+
+ if (!arg_keep_unit) {
+ /* When a new scope is created for this container, then we'll be registered as its controller, in which
+ * case PID 1 will send us a friendly RequestStop signal, when it is asked to terminate the
+ * scope. Let's hook into that, and cleanly shut down the container, and print a friendly message. */
+
+ r = sd_bus_match_signal_async(
+ bus,
+ NULL,
+ "org.freedesktop.systemd1",
+ NULL,
+ "org.freedesktop.systemd1.Scope",
+ "RequestStop",
+ on_request_stop, NULL, PID_TO_PTR(*pid));
+ if (r < 0)
+ return log_error_errno(r, "Failed to request RequestStop match: %m");
+ }
+
+ if (arg_register) {
+ r = register_machine(
+ bus,
+ arg_machine,
+ *pid,
+ arg_directory,
+ arg_uuid,
+ ifi,
+ arg_slice,
+ arg_custom_mounts, arg_n_custom_mounts,
+ arg_kill_signal,
+ arg_property,
+ arg_property_message,
+ arg_keep_unit,
+ arg_container_service_name);
+ if (r < 0)
+ return r;
+
+ } else if (!arg_keep_unit) {
+ r = allocate_scope(
+ bus,
+ arg_machine,
+ *pid,
+ arg_slice,
+ arg_custom_mounts, arg_n_custom_mounts,
+ arg_kill_signal,
+ arg_property,
+ arg_property_message);
+ if (r < 0)
+ return r;
+
+ } else if (arg_slice || arg_property)
+ log_notice("Machine and scope registration turned off, --slice= and --property= settings will have no effect.");
+
+ r = create_subcgroup(*pid, arg_keep_unit, arg_unified_cgroup_hierarchy);
+ if (r < 0)
+ return r;
+
+ r = sync_cgroup(*pid, arg_unified_cgroup_hierarchy, arg_uid_shift);
+ if (r < 0)
+ return r;
+
+ r = chown_cgroup(*pid, arg_unified_cgroup_hierarchy, arg_uid_shift);
+ if (r < 0)
+ return r;
+
+ /* Notify the child that the parent is ready with all
+ * its setup (including cgroup-ification), and that
+ * the child can now hand over control to the code to
+ * run inside the container. */
+ (void) barrier_place(&barrier); /* #4 */
+
+ /* Block SIGCHLD here, before notifying child.
+ * process_pty() will handle it with the other signals. */
+ assert_se(sigprocmask(SIG_BLOCK, &mask_chld, NULL) >= 0);
+
+ /* Reset signal to default */
+ r = default_signals(SIGCHLD, -1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to reset SIGCHLD: %m");
+
+ r = sd_event_new(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get default event source: %m");
+
+ (void) sd_event_set_watchdog(event, true);
+
+ if (bus) {
+ r = sd_bus_attach_event(bus, event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+ }
+
+ r = setup_notify_parent(event, notify_socket, PID_TO_PTR(*pid), &notify_event_source);
+ if (r < 0)
+ return r;
+
+ /* Let the child know that we are ready and wait that the child is completely ready now. */
+ if (!barrier_place_and_sync(&barrier)) /* #5 */
+ return log_error_errno(SYNTHETIC_ERRNO(ESRCH), "Child died too early.");
+
+ /* At this point we have made use of the UID we picked, and thus nss-systemd/systemd-machined.service
+ * will make them appear in getpwuid(), thus we can release the /etc/passwd lock. */
+ etc_passwd_lock = safe_close(etc_passwd_lock);
+
+ (void) sd_notifyf(false,
+ "STATUS=Container running.\n"
+ "X_NSPAWN_LEADER_PID=" PID_FMT, *pid);
+ if (!arg_notify_ready)
+ (void) sd_notify(false, "READY=1\n");
+
+ if (arg_kill_signal > 0) {
+ /* Try to kill the init system on SIGINT or SIGTERM */
+ (void) sd_event_add_signal(event, NULL, SIGINT, on_orderly_shutdown, PID_TO_PTR(*pid));
+ (void) sd_event_add_signal(event, NULL, SIGTERM, on_orderly_shutdown, PID_TO_PTR(*pid));
+ } else {
+ /* Immediately exit */
+ (void) sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
+ }
+
+ /* Exit when the child exits */
+ (void) sd_event_add_signal(event, NULL, SIGCHLD, on_sigchld, PID_TO_PTR(*pid));
+
+ if (arg_expose_ports) {
+ r = expose_port_watch_rtnl(event, rtnl_socket_pair[0], on_address_change, exposed, &rtnl);
+ if (r < 0)
+ return r;
+
+ (void) expose_port_execute(rtnl, arg_expose_ports, exposed);
+ }
+
+ rtnl_socket_pair[0] = safe_close(rtnl_socket_pair[0]);
+
+ if (arg_console_mode != CONSOLE_PIPE) {
+ _cleanup_close_ int fd = -1;
+ PTYForwardFlags flags = 0;
+
+ /* Retrieve the master pty allocated by inner child */
+ fd = receive_one_fd(master_pty_socket_pair[0], 0);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to receive master pty from the inner child: %m");
+
+ switch (arg_console_mode) {
+
+ case CONSOLE_READ_ONLY:
+ flags |= PTY_FORWARD_READ_ONLY;
+
+ _fallthrough_;
+
+ case CONSOLE_INTERACTIVE:
+ flags |= PTY_FORWARD_IGNORE_VHANGUP;
+
+ r = pty_forward_new(event, fd, flags, &forward);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create PTY forwarder: %m");
+
+ if (arg_console_width != (unsigned) -1 || arg_console_height != (unsigned) -1)
+ (void) pty_forward_set_width_height(forward,
+ arg_console_width,
+ arg_console_height);
+ break;
+
+ default:
+ assert(arg_console_mode == CONSOLE_PASSIVE);
+ }
+
+ *master = TAKE_FD(fd);
+ }
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ if (forward) {
+ char last_char = 0;
+
+ (void) pty_forward_get_last_char(forward, &last_char);
+ forward = pty_forward_free(forward);
+
+ if (!arg_quiet && last_char != '\n')
+ putc('\n', stdout);
+ }
+
+ /* Kill if it is not dead yet anyway */
+ if (!arg_register && !arg_keep_unit && bus)
+ terminate_scope(bus, arg_machine);
+
+ /* Normally redundant, but better safe than sorry */
+ (void) kill(*pid, SIGKILL);
+
+ if (arg_private_network) {
+ /* Move network interfaces back to the parent network namespace. We use `safe_fork`
+ * to avoid having to move the parent to the child network namespace. */
+ r = safe_fork(NULL, FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_WAIT|FORK_LOG, NULL);
+ if (r < 0)
+ return r;
+
+ if (r == 0) {
+ _cleanup_close_ int parent_netns_fd = -1;
+
+ r = namespace_open(getpid(), NULL, NULL, &parent_netns_fd, NULL, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to open parent network namespace: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ r = namespace_enter(-1, -1, child_netns_fd, -1, -1);
+ if (r < 0) {
+ log_error_errno(r, "Failed to enter child network namespace: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ r = move_network_interfaces(parent_netns_fd, arg_network_interfaces);
+ if (r < 0)
+ log_error_errno(r, "Failed to move network interfaces back to parent network namespace: %m");
+
+ _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
+ }
+ }
+
+ r = wait_for_container(*pid, &container_status);
+ *pid = 0;
+
+ /* Tell machined that we are gone. */
+ if (bus)
+ (void) unregister_machine(bus, arg_machine);
+
+ if (r < 0)
+ /* We failed to wait for the container, or the container exited abnormally. */
+ return r;
+ if (r > 0 || container_status == CONTAINER_TERMINATED) {
+ /* r > 0 → The container exited with a non-zero status.
+ * As a special case, we need to replace 133 with a different value,
+ * because 133 is special-cased in the service file to reboot the container.
+ * otherwise → The container exited with zero status and a reboot was not requested.
+ */
+ if (r == EXIT_FORCE_RESTART)
+ r = EXIT_FAILURE; /* replace 133 with the general failure code */
+ *ret = r;
+ return 0; /* finito */
+ }
+
+ /* CONTAINER_REBOOTED, loop again */
+
+ if (arg_keep_unit) {
+ /* Special handling if we are running as a service: instead of simply
+ * restarting the machine we want to restart the entire service, so let's
+ * inform systemd about this with the special exit code 133. The service
+ * file uses RestartForceExitStatus=133 so that this results in a full
+ * nspawn restart. This is necessary since we might have cgroup parameters
+ * set we want to have flushed out. */
+ *ret = EXIT_FORCE_RESTART;
+ return 0; /* finito */
+ }
+
+ expose_port_flush(arg_expose_ports, exposed);
+
+ (void) remove_veth_links(veth_name, arg_network_veth_extra);
+ *veth_created = false;
+ return 1; /* loop again */
+}
+
+static int initialize_rlimits(void) {
+ /* The default resource limits the kernel passes to PID 1, as per kernel 4.16. Let's pass our container payload
+ * the same values as the kernel originally passed to PID 1, in order to minimize differences between host and
+ * container execution environments. */
+
+ static const struct rlimit kernel_defaults[_RLIMIT_MAX] = {
+ [RLIMIT_AS] = { RLIM_INFINITY, RLIM_INFINITY },
+ [RLIMIT_CORE] = { 0, RLIM_INFINITY },
+ [RLIMIT_CPU] = { RLIM_INFINITY, RLIM_INFINITY },
+ [RLIMIT_DATA] = { RLIM_INFINITY, RLIM_INFINITY },
+ [RLIMIT_FSIZE] = { RLIM_INFINITY, RLIM_INFINITY },
+ [RLIMIT_LOCKS] = { RLIM_INFINITY, RLIM_INFINITY },
+ [RLIMIT_MEMLOCK] = { 65536, 65536 },
+ [RLIMIT_MSGQUEUE] = { 819200, 819200 },
+ [RLIMIT_NICE] = { 0, 0 },
+ [RLIMIT_NOFILE] = { 1024, 4096 },
+ [RLIMIT_RSS] = { RLIM_INFINITY, RLIM_INFINITY },
+ [RLIMIT_RTPRIO] = { 0, 0 },
+ [RLIMIT_RTTIME] = { RLIM_INFINITY, RLIM_INFINITY },
+ [RLIMIT_STACK] = { 8388608, RLIM_INFINITY },
+
+ /* The kernel scales the default for RLIMIT_NPROC and RLIMIT_SIGPENDING based on the system's amount of
+ * RAM. To provide best compatibility we'll read these limits off PID 1 instead of hardcoding them
+ * here. This is safe as we know that PID 1 doesn't change these two limits and thus the original
+ * kernel's initialization should still be valid during runtime — at least if PID 1 is systemd. Note
+ * that PID 1 changes a number of other resource limits during early initialization which is why we
+ * don't read the other limits from PID 1 but prefer the static table above. */
+ };
+
+ int rl;
+
+ for (rl = 0; rl < _RLIMIT_MAX; rl++) {
+ /* Let's only fill in what the user hasn't explicitly configured anyway */
+ if ((arg_settings_mask & (SETTING_RLIMIT_FIRST << rl)) == 0) {
+ const struct rlimit *v;
+ struct rlimit buffer;
+
+ if (IN_SET(rl, RLIMIT_NPROC, RLIMIT_SIGPENDING)) {
+ /* For these two let's read the limits off PID 1. See above for an explanation. */
+
+ if (prlimit(1, rl, NULL, &buffer) < 0)
+ return log_error_errno(errno, "Failed to read resource limit RLIMIT_%s of PID 1: %m", rlimit_to_string(rl));
+
+ v = &buffer;
+ } else
+ v = kernel_defaults + rl;
+
+ arg_rlimit[rl] = newdup(struct rlimit, v, 1);
+ if (!arg_rlimit[rl])
+ return log_oom();
+ }
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *k = NULL;
+
+ (void) rlimit_format(arg_rlimit[rl], &k);
+ log_debug("Setting RLIMIT_%s to %s.", rlimit_to_string(rl), k);
+ }
+ }
+
+ return 0;
+}
+
+static int cant_be_in_netns(void) {
+ union sockaddr_union sa = {
+ .un = {
+ .sun_family = AF_UNIX,
+ .sun_path = "/run/udev/control",
+ },
+ };
+ char udev_path[STRLEN("/proc//ns/net") + DECIMAL_STR_MAX(pid_t)];
+ _cleanup_free_ char *udev_ns = NULL, *our_ns = NULL;
+ _cleanup_close_ int fd = -1;
+ struct ucred ucred;
+ int r;
+
+ /* Check if we are in the same netns as udev. If we aren't, then device monitoring (and thus waiting
+ * for loopback block devices) won't work, and we will hang. Detect this case and exit early with a
+ * nice message. */
+
+ if (!arg_image) /* only matters if --image= us used, i.e. we actually need to use loopback devices */
+ return 0;
+
+ fd = socket(AF_UNIX, SOCK_SEQPACKET|SOCK_NONBLOCK|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to allocate udev control socket: %m");
+
+ if (connect(fd, &sa.un, SOCKADDR_UN_LEN(sa.un)) < 0) {
+
+ if (errno == ENOENT || ERRNO_IS_DISCONNECT(errno))
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Sorry, but --image= requires access to the host's /run/ hierarchy, since we need access to udev.");
+
+ return log_error_errno(errno, "Failed to connect socket to udev control socket: %m");
+ }
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine peer of udev control socket: %m");
+
+ xsprintf(udev_path, "/proc/" PID_FMT "/ns/net", ucred.pid);
+ r = readlink_malloc(udev_path, &udev_ns);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read network namespace of udev: %m");
+
+ r = readlink_malloc("/proc/self/ns/net", &our_ns);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read our own network namespace: %m");
+
+ if (!streq(our_ns, udev_ns))
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Sorry, but --image= is only supported in the main network namespace, since we need access to udev/AF_NETLINK.");
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ bool secondary = false, remove_directory = false, remove_image = false,
+ veth_created = false, remove_tmprootdir = false;
+ _cleanup_close_ int master = -1;
+ _cleanup_fdset_free_ FDSet *fds = NULL;
+ int r, n_fd_passed, ret = EXIT_SUCCESS;
+ char veth_name[IFNAMSIZ] = "";
+ union in_addr_union exposed = {};
+ _cleanup_(release_lock_file) LockFile tree_global_lock = LOCK_FILE_INIT, tree_local_lock = LOCK_FILE_INIT;
+ char tmprootdir[] = "/tmp/nspawn-root-XXXXXX";
+ _cleanup_(loop_device_unrefp) LoopDevice *loop = NULL;
+ _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL;
+ _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
+ pid_t pid = 0;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ goto finish;
+
+ if (geteuid() != 0) {
+ r = log_warning_errno(SYNTHETIC_ERRNO(EPERM),
+ argc >= 2 ? "Need to be root." :
+ "Need to be root (and some arguments are usually required).\nHint: try --help");
+ goto finish;
+ }
+
+ r = cant_be_in_netns();
+ if (r < 0)
+ goto finish;
+
+ r = initialize_rlimits();
+ if (r < 0)
+ goto finish;
+
+ r = load_oci_bundle();
+ if (r < 0)
+ goto finish;
+
+ r = determine_names();
+ if (r < 0)
+ goto finish;
+
+ r = load_settings();
+ if (r < 0)
+ goto finish;
+
+ r = cg_unified();
+ if (r < 0) {
+ log_error_errno(r, "Failed to determine whether the unified cgroups hierarchy is used: %m");
+ goto finish;
+ }
+
+ r = verify_arguments();
+ if (r < 0)
+ goto finish;
+
+ /* Reapply environment settings. */
+ (void) detect_unified_cgroup_hierarchy_from_environment();
+
+ /* Ignore SIGPIPE here, because we use splice() on the ptyfwd stuff and that will generate SIGPIPE if
+ * the result is closed. Note that the container payload child will reset signal mask+handler anyway,
+ * so just turning this off here means we only turn it off in nspawn itself, not any children. */
+ (void) ignore_signals(SIGPIPE, -1);
+
+ n_fd_passed = sd_listen_fds(false);
+ if (n_fd_passed > 0) {
+ r = fdset_new_listen_fds(&fds, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to collect file descriptors: %m");
+ goto finish;
+ }
+ }
+
+ /* The "default" umask. This is appropriate for most file and directory
+ * operations performed by nspawn, and is the umask that will be used for
+ * the child. Functions like copy_devnodes() change the umask temporarily. */
+ umask(0022);
+
+ if (arg_directory) {
+ assert(!arg_image);
+
+ /* Safety precaution: let's not allow running images from the live host OS image, as long as
+ * /var from the host will propagate into container dynamically (because bad things happen if
+ * two systems write to the same /var). Let's allow it for the special cases where /var is
+ * either copied (i.e. --ephemeral) or replaced (i.e. --volatile=yes|state). */
+ if (path_equal(arg_directory, "/") && !(arg_ephemeral || IN_SET(arg_volatile_mode, VOLATILE_YES, VOLATILE_STATE))) {
+ log_error("Spawning container on root directory is not supported. Consider using --ephemeral, --volatile=yes or --volatile=state.");
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (arg_ephemeral) {
+ _cleanup_free_ char *np = NULL;
+
+ r = chase_symlinks_and_update(&arg_directory, 0);
+ if (r < 0)
+ goto finish;
+
+ /* If the specified path is a mount point we generate the new snapshot immediately
+ * inside it under a random name. However if the specified is not a mount point we
+ * create the new snapshot in the parent directory, just next to it. */
+ r = path_is_mount_point(arg_directory, NULL, 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to determine whether directory %s is mount point: %m", arg_directory);
+ goto finish;
+ }
+ if (r > 0)
+ r = tempfn_random_child(arg_directory, "machine.", &np);
+ else
+ r = tempfn_random(arg_directory, "machine.", &np);
+ if (r < 0) {
+ log_error_errno(r, "Failed to generate name for directory snapshot: %m");
+ goto finish;
+ }
+
+ /* We take an exclusive lock on this image, since it's our private, ephemeral copy
+ * only owned by us and no one else. */
+ r = image_path_lock(np, LOCK_EX|LOCK_NB, &tree_global_lock, &tree_local_lock);
+ if (r < 0) {
+ log_error_errno(r, "Failed to lock %s: %m", np);
+ goto finish;
+ }
+
+ {
+ BLOCK_SIGNALS(SIGINT);
+ r = btrfs_subvol_snapshot(arg_directory, np,
+ (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
+ BTRFS_SNAPSHOT_FALLBACK_COPY |
+ BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
+ BTRFS_SNAPSHOT_RECURSIVE |
+ BTRFS_SNAPSHOT_QUOTA |
+ BTRFS_SNAPSHOT_SIGINT);
+ }
+ if (r == -EINTR) {
+ log_error_errno(r, "Interrupted while copying file system tree to %s, removed again.", np);
+ goto finish;
+ }
+ if (r < 0) {
+ log_error_errno(r, "Failed to create snapshot %s from %s: %m", np, arg_directory);
+ goto finish;
+ }
+
+ free_and_replace(arg_directory, np);
+ remove_directory = true;
+ } else {
+ r = chase_symlinks_and_update(&arg_directory, arg_template ? CHASE_NONEXISTENT : 0);
+ if (r < 0)
+ goto finish;
+
+ r = image_path_lock(arg_directory, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, &tree_global_lock, &tree_local_lock);
+ if (r == -EBUSY) {
+ log_error_errno(r, "Directory tree %s is currently busy.", arg_directory);
+ goto finish;
+ }
+ if (r < 0) {
+ log_error_errno(r, "Failed to lock %s: %m", arg_directory);
+ goto finish;
+ }
+
+ if (arg_template) {
+ r = chase_symlinks_and_update(&arg_template, 0);
+ if (r < 0)
+ goto finish;
+
+ {
+ BLOCK_SIGNALS(SIGINT);
+ r = btrfs_subvol_snapshot(arg_template, arg_directory,
+ (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
+ BTRFS_SNAPSHOT_FALLBACK_COPY |
+ BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
+ BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
+ BTRFS_SNAPSHOT_RECURSIVE |
+ BTRFS_SNAPSHOT_QUOTA |
+ BTRFS_SNAPSHOT_SIGINT);
+ }
+ if (r == -EEXIST)
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO,
+ "Directory %s already exists, not populating from template %s.", arg_directory, arg_template);
+ else if (r == -EINTR) {
+ log_error_errno(r, "Interrupted while copying file system tree to %s, removed again.", arg_directory);
+ goto finish;
+ } else if (r < 0) {
+ log_error_errno(r, "Couldn't create snapshot %s from %s: %m", arg_directory, arg_template);
+ goto finish;
+ } else
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO,
+ "Populated %s from template %s.", arg_directory, arg_template);
+ }
+ }
+
+ if (arg_start_mode == START_BOOT) {
+ const char *p;
+
+ if (arg_pivot_root_new)
+ p = prefix_roota(arg_directory, arg_pivot_root_new);
+ else
+ p = arg_directory;
+
+ if (path_is_os_tree(p) <= 0) {
+ log_error("Directory %s doesn't look like an OS root directory (os-release file is missing). Refusing.", p);
+ r = -EINVAL;
+ goto finish;
+ }
+ } else {
+ const char *p, *q;
+
+ if (arg_pivot_root_new)
+ p = prefix_roota(arg_directory, arg_pivot_root_new);
+ else
+ p = arg_directory;
+
+ q = strjoina(p, "/usr/");
+
+ if (laccess(q, F_OK) < 0) {
+ log_error("Directory %s doesn't look like it has an OS tree. Refusing.", p);
+ r = -EINVAL;
+ goto finish;
+ }
+ }
+
+ } else {
+ DissectImageFlags dissect_image_flags = DISSECT_IMAGE_REQUIRE_ROOT | DISSECT_IMAGE_RELAX_VAR_CHECK;
+ assert(arg_image);
+ assert(!arg_template);
+
+ r = chase_symlinks_and_update(&arg_image, 0);
+ if (r < 0)
+ goto finish;
+
+ if (arg_ephemeral) {
+ _cleanup_free_ char *np = NULL;
+
+ r = tempfn_random(arg_image, "machine.", &np);
+ if (r < 0) {
+ log_error_errno(r, "Failed to generate name for image snapshot: %m");
+ goto finish;
+ }
+
+ /* Always take an exclusive lock on our own ephemeral copy. */
+ r = image_path_lock(np, LOCK_EX|LOCK_NB, &tree_global_lock, &tree_local_lock);
+ if (r < 0) {
+ r = log_error_errno(r, "Failed to create image lock: %m");
+ goto finish;
+ }
+
+ {
+ BLOCK_SIGNALS(SIGINT);
+ r = copy_file(arg_image, np, O_EXCL, arg_read_only ? 0400 : 0600, FS_NOCOW_FL, FS_NOCOW_FL, COPY_REFLINK|COPY_CRTIME|COPY_SIGINT);
+ }
+ if (r == -EINTR) {
+ log_error_errno(r, "Interrupted while copying image file to %s, removed again.", np);
+ goto finish;
+ }
+ if (r < 0) {
+ r = log_error_errno(r, "Failed to copy image file: %m");
+ goto finish;
+ }
+
+ free_and_replace(arg_image, np);
+ remove_image = true;
+ } else {
+ r = image_path_lock(arg_image, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, &tree_global_lock, &tree_local_lock);
+ if (r == -EBUSY) {
+ r = log_error_errno(r, "Disk image %s is currently busy.", arg_image);
+ goto finish;
+ }
+ if (r < 0) {
+ r = log_error_errno(r, "Failed to create image lock: %m");
+ goto finish;
+ }
+
+ r = verity_settings_load(
+ &arg_verity_settings,
+ arg_image, NULL, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to read verity artefacts for %s: %m", arg_image);
+ goto finish;
+ }
+
+ if (arg_verity_settings.data_path)
+ dissect_image_flags |= DISSECT_IMAGE_NO_PARTITION_TABLE;
+ }
+
+ if (!mkdtemp(tmprootdir)) {
+ r = log_error_errno(errno, "Failed to create temporary directory: %m");
+ goto finish;
+ }
+
+ remove_tmprootdir = true;
+
+ arg_directory = strdup(tmprootdir);
+ if (!arg_directory) {
+ r = log_oom();
+ goto finish;
+ }
+
+ r = loop_device_make_by_path(
+ arg_image,
+ arg_read_only ? O_RDONLY : O_RDWR,
+ FLAGS_SET(dissect_image_flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN,
+ &loop);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set up loopback block device: %m");
+ goto finish;
+ }
+
+ r = dissect_image_and_warn(
+ loop->fd,
+ arg_image,
+ &arg_verity_settings,
+ NULL,
+ dissect_image_flags,
+ &dissected_image);
+ if (r == -ENOPKG) {
+ /* dissected_image_and_warn() already printed a brief error message. Extend on that with more details */
+ log_notice("Note that the disk image needs to\n"
+ " a) either contain only a single MBR partition of type 0x83 that is marked bootable\n"
+ " b) or contain a single GPT partition of type 0FC63DAF-8483-4772-8E79-3D69D8477DE4\n"
+ " c) or follow https://systemd.io/DISCOVERABLE_PARTITIONS\n"
+ " d) or contain a file system without a partition table\n"
+ "in order to be bootable with systemd-nspawn.");
+ goto finish;
+ }
+ if (r < 0)
+ goto finish;
+
+ if (!arg_verity_settings.root_hash && dissected_image->can_verity)
+ log_notice("Note: image %s contains verity information, but no root hash specified! Proceeding without integrity checking.", arg_image);
+
+ r = dissected_image_decrypt_interactively(
+ dissected_image,
+ NULL,
+ &arg_verity_settings,
+ 0,
+ &decrypted_image);
+ if (r < 0)
+ goto finish;
+
+ /* Now that we mounted the image, let's try to remove it again, if it is ephemeral */
+ if (remove_image && unlink(arg_image) >= 0)
+ remove_image = false;
+ }
+
+ r = custom_mount_prepare_all(arg_directory, arg_custom_mounts, arg_n_custom_mounts);
+ if (r < 0)
+ goto finish;
+
+ if (arg_console_mode < 0)
+ arg_console_mode =
+ isatty(STDIN_FILENO) > 0 &&
+ isatty(STDOUT_FILENO) > 0 ? CONSOLE_INTERACTIVE : CONSOLE_READ_ONLY;
+
+ if (arg_console_mode == CONSOLE_PIPE) /* if we pass STDERR on to the container, don't add our own logs into it too */
+ arg_quiet = true;
+
+ if (!arg_quiet)
+ log_info("Spawning container %s on %s.\nPress ^] three times within 1s to kill container.",
+ arg_machine, arg_image ?: arg_directory);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1) >= 0);
+
+ if (prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) < 0) {
+ r = log_error_errno(errno, "Failed to become subreaper: %m");
+ goto finish;
+ }
+
+ for (;;) {
+ r = run_container(dissected_image,
+ secondary,
+ fds,
+ veth_name, &veth_created,
+ &exposed, &master,
+ &pid, &ret);
+ if (r <= 0)
+ break;
+ }
+
+finish:
+ (void) sd_notify(false,
+ r == 0 && ret == EXIT_FORCE_RESTART ? "STOPPING=1\nSTATUS=Restarting..." :
+ "STOPPING=1\nSTATUS=Terminating...");
+
+ if (pid > 0)
+ (void) kill(pid, SIGKILL);
+
+ /* Try to flush whatever is still queued in the pty */
+ if (master >= 0) {
+ (void) copy_bytes(master, STDOUT_FILENO, (uint64_t) -1, 0);
+ master = safe_close(master);
+ }
+
+ if (pid > 0)
+ (void) wait_for_terminate(pid, NULL);
+
+ pager_close();
+
+ if (remove_directory && arg_directory) {
+ int k;
+
+ k = rm_rf(arg_directory, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+ if (k < 0)
+ log_warning_errno(k, "Cannot remove '%s', ignoring: %m", arg_directory);
+ }
+
+ if (remove_image && arg_image) {
+ if (unlink(arg_image) < 0)
+ log_warning_errno(errno, "Can't remove image file '%s', ignoring: %m", arg_image);
+ }
+
+ if (remove_tmprootdir) {
+ if (rmdir(tmprootdir) < 0)
+ log_debug_errno(errno, "Can't remove temporary root directory '%s', ignoring: %m", tmprootdir);
+ }
+
+ if (arg_machine) {
+ const char *p;
+
+ p = strjoina("/run/systemd/nspawn/propagate/", arg_machine);
+ (void) rm_rf(p, REMOVE_ROOT);
+ }
+
+ expose_port_flush(arg_expose_ports, &exposed);
+
+ if (veth_created)
+ (void) remove_veth_links(veth_name, arg_network_veth_extra);
+ (void) remove_bridge(arg_network_zone);
+
+ custom_mount_free_all(arg_custom_mounts, arg_n_custom_mounts);
+ expose_port_free_all(arg_expose_ports);
+ rlimit_free_all(arg_rlimit);
+ device_node_array_free(arg_extra_nodes, arg_n_extra_nodes);
+ credential_free_all(arg_credentials, arg_n_credentials);
+
+ if (r < 0)
+ return r;
+
+ return ret;
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/nspawn/test-nspawn-tables.c b/src/nspawn/test-nspawn-tables.c
new file mode 100644
index 0000000..3baf5d3
--- /dev/null
+++ b/src/nspawn/test-nspawn-tables.c
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "nspawn-settings.h"
+#include "test-tables.h"
+
+int main(int argc, char **argv) {
+ test_table(resolv_conf_mode, RESOLV_CONF_MODE);
+ test_table(timezone_mode, TIMEZONE_MODE);
+
+ return 0;
+}
diff --git a/src/nspawn/test-patch-uid.c b/src/nspawn/test-patch-uid.c
new file mode 100644
index 0000000..f8f44b0
--- /dev/null
+++ b/src/nspawn/test-patch-uid.c
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdlib.h>
+
+#include "log.h"
+#include "nspawn-patch-uid.h"
+#include "user-util.h"
+#include "string-util.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ uid_t shift, range;
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ if (argc != 4) {
+ log_error("Expected PATH SHIFT RANGE parameters.");
+ return EXIT_FAILURE;
+ }
+
+ r = parse_uid(argv[2], &shift);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse UID shift %s.", argv[2]);
+ return EXIT_FAILURE;
+ }
+
+ r = parse_gid(argv[3], &range);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse UID range %s.", argv[3]);
+ return EXIT_FAILURE;
+ }
+
+ r = path_patch_uid(argv[1], shift, range);
+ if (r < 0) {
+ log_error_errno(r, "Failed to patch directory tree: %m");
+ return EXIT_FAILURE;
+ }
+
+ log_info("Changed: %s", yes_no(r));
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/nss-myhostname/nss-myhostname.c b/src/nss-myhostname/nss-myhostname.c
new file mode 100644
index 0000000..ffabc60
--- /dev/null
+++ b/src/nss-myhostname/nss-myhostname.c
@@ -0,0 +1,500 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <nss.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "hostname-util.h"
+#include "local-addresses.h"
+#include "macro.h"
+#include "nss-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+
+/* We use 127.0.0.2 as IPv4 address. This has the advantage over
+ * 127.0.0.1 that it can be translated back to the local hostname. For
+ * IPv6 we use ::1 which unfortunately will not translate back to the
+ * hostname but instead something like "localhost" or so. */
+
+#define LOCALADDRESS_IPV4 (htobe32(0x7F000002))
+#define LOCALADDRESS_IPV6 &in6addr_loopback
+
+NSS_GETHOSTBYNAME_PROTOTYPES(myhostname);
+NSS_GETHOSTBYADDR_PROTOTYPES(myhostname);
+
+enum nss_status _nss_myhostname_gethostbyname4_r(
+ const char *name,
+ struct gaih_addrtuple **pat,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp) {
+
+ struct gaih_addrtuple *r_tuple, *r_tuple_prev = NULL;
+ _cleanup_free_ struct local_address *addresses = NULL;
+ _cleanup_free_ char *hn = NULL;
+ const char *canonical = NULL;
+ int n_addresses = 0;
+ uint32_t local_address_ipv4;
+ struct local_address *a;
+ size_t l, idx, ms;
+ char *r_name;
+ unsigned n;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(name);
+ assert(pat);
+ assert(buffer);
+ assert(errnop);
+ assert(h_errnop);
+
+ if (is_localhost(name)) {
+ /* We respond to 'localhost', so that /etc/hosts
+ * is optional */
+
+ canonical = "localhost";
+ local_address_ipv4 = htobe32(INADDR_LOOPBACK);
+
+ } else if (is_gateway_hostname(name)) {
+
+ n_addresses = local_gateways(NULL, 0, AF_UNSPEC, &addresses);
+ if (n_addresses <= 0)
+ goto not_found;
+
+ canonical = "_gateway";
+
+ } else {
+ hn = gethostname_malloc();
+ if (!hn) {
+ UNPROTECT_ERRNO;
+ *errnop = ENOMEM;
+ *h_errnop = NO_RECOVERY;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ /* We respond to our local hostname, our hostname suffixed with a single dot. */
+ if (!streq(name, hn) && !streq_ptr(startswith(name, hn), "."))
+ goto not_found;
+
+ n_addresses = local_addresses(NULL, 0, AF_UNSPEC, &addresses);
+ if (n_addresses < 0)
+ n_addresses = 0;
+
+ canonical = hn;
+ local_address_ipv4 = LOCALADDRESS_IPV4;
+ }
+
+ l = strlen(canonical);
+ ms = ALIGN(l+1) + ALIGN(sizeof(struct gaih_addrtuple)) * (n_addresses > 0 ? n_addresses : 2);
+ if (buflen < ms) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ *h_errnop = NETDB_INTERNAL;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ /* First, fill in hostname */
+ r_name = buffer;
+ memcpy(r_name, canonical, l+1);
+ idx = ALIGN(l+1);
+
+ assert(n_addresses >= 0);
+ if (n_addresses == 0) {
+ /* Second, fill in IPv6 tuple */
+ r_tuple = (struct gaih_addrtuple*) (buffer + idx);
+ r_tuple->next = r_tuple_prev;
+ r_tuple->name = r_name;
+ r_tuple->family = AF_INET6;
+ memcpy(r_tuple->addr, LOCALADDRESS_IPV6, 16);
+ r_tuple->scopeid = 0;
+
+ idx += ALIGN(sizeof(struct gaih_addrtuple));
+ r_tuple_prev = r_tuple;
+
+ /* Third, fill in IPv4 tuple */
+ r_tuple = (struct gaih_addrtuple*) (buffer + idx);
+ r_tuple->next = r_tuple_prev;
+ r_tuple->name = r_name;
+ r_tuple->family = AF_INET;
+ *(uint32_t*) r_tuple->addr = local_address_ipv4;
+ r_tuple->scopeid = 0;
+
+ idx += ALIGN(sizeof(struct gaih_addrtuple));
+ r_tuple_prev = r_tuple;
+ }
+
+ /* Fourth, fill actual addresses in, but in backwards order */
+ for (a = addresses + n_addresses - 1, n = 0; (int) n < n_addresses; n++, a--) {
+ r_tuple = (struct gaih_addrtuple*) (buffer + idx);
+ r_tuple->next = r_tuple_prev;
+ r_tuple->name = r_name;
+ r_tuple->family = a->family;
+ r_tuple->scopeid = a->family == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&a->address.in6) ? a->ifindex : 0;
+ memcpy(r_tuple->addr, &a->address, 16);
+
+ idx += ALIGN(sizeof(struct gaih_addrtuple));
+ r_tuple_prev = r_tuple;
+ }
+
+ /* Verify the size matches */
+ assert(idx == ms);
+
+ /* Nscd expects us to store the first record in **pat. */
+ if (*pat)
+ **pat = *r_tuple_prev;
+ else
+ *pat = r_tuple_prev;
+
+ if (ttlp)
+ *ttlp = 0;
+
+ /* Explicitly reset both *h_errnop and h_errno to work around
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1125975 */
+ *h_errnop = NETDB_SUCCESS;
+ h_errno = 0;
+
+ return NSS_STATUS_SUCCESS;
+
+not_found:
+ *h_errnop = HOST_NOT_FOUND;
+ return NSS_STATUS_NOTFOUND;
+}
+
+static enum nss_status fill_in_hostent(
+ const char *canonical, const char *additional,
+ int af,
+ struct local_address *addresses, unsigned n_addresses,
+ uint32_t local_address_ipv4,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp,
+ char **canonp) {
+
+ size_t l_canonical, l_additional, idx, ms, alen;
+ char *r_addr, *r_name, *r_aliases, *r_alias = NULL, *r_addr_list;
+ struct local_address *a;
+ unsigned n, c;
+
+ assert(canonical);
+ assert(result);
+ assert(buffer);
+ assert(errnop);
+ assert(h_errnop);
+
+ PROTECT_ERRNO;
+
+ alen = FAMILY_ADDRESS_SIZE(af);
+
+ for (a = addresses, n = 0, c = 0; n < n_addresses; a++, n++)
+ if (af == a->family)
+ c++;
+
+ l_canonical = strlen(canonical);
+ l_additional = strlen_ptr(additional);
+ ms = ALIGN(l_canonical+1)+
+ (additional ? ALIGN(l_additional+1) : 0) +
+ sizeof(char*) +
+ (additional ? sizeof(char*) : 0) +
+ (c > 0 ? c : 1) * ALIGN(alen) +
+ (c > 0 ? c+1 : 2) * sizeof(char*);
+
+ if (buflen < ms) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ *h_errnop = NETDB_INTERNAL;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ /* First, fill in hostnames */
+ r_name = buffer;
+ memcpy(r_name, canonical, l_canonical+1);
+ idx = ALIGN(l_canonical+1);
+
+ if (additional) {
+ r_alias = buffer + idx;
+ memcpy(r_alias, additional, l_additional+1);
+ idx += ALIGN(l_additional+1);
+ }
+
+ /* Second, create aliases array */
+ r_aliases = buffer + idx;
+ if (additional) {
+ ((char**) r_aliases)[0] = r_alias;
+ ((char**) r_aliases)[1] = NULL;
+ idx += 2*sizeof(char*);
+ } else {
+ ((char**) r_aliases)[0] = NULL;
+ idx += sizeof(char*);
+ }
+
+ /* Third, add addresses */
+ r_addr = buffer + idx;
+ if (c > 0) {
+ unsigned i = 0;
+
+ for (a = addresses, n = 0; n < n_addresses; a++, n++) {
+ if (af != a->family)
+ continue;
+
+ memcpy(r_addr + i*ALIGN(alen), &a->address, alen);
+ i++;
+ }
+
+ assert(i == c);
+ idx += c*ALIGN(alen);
+ } else {
+ if (af == AF_INET)
+ *(uint32_t*) r_addr = local_address_ipv4;
+ else
+ memcpy(r_addr, LOCALADDRESS_IPV6, 16);
+
+ idx += ALIGN(alen);
+ }
+
+ /* Fourth, add address pointer array */
+ r_addr_list = buffer + idx;
+ if (c > 0) {
+ unsigned i;
+
+ for (i = 0; i < c; i++)
+ ((char**) r_addr_list)[i] = r_addr + i*ALIGN(alen);
+
+ ((char**) r_addr_list)[i] = NULL;
+ idx += (c+1) * sizeof(char*);
+
+ } else {
+ ((char**) r_addr_list)[0] = r_addr;
+ ((char**) r_addr_list)[1] = NULL;
+ idx += 2 * sizeof(char*);
+ }
+
+ /* Verify the size matches */
+ assert(idx == ms);
+
+ result->h_name = r_name;
+ result->h_aliases = (char**) r_aliases;
+ result->h_addrtype = af;
+ result->h_length = alen;
+ result->h_addr_list = (char**) r_addr_list;
+
+ if (ttlp)
+ *ttlp = 0;
+
+ if (canonp)
+ *canonp = r_name;
+
+ /* Explicitly reset both *h_errnop and h_errno to work around
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1125975 */
+ *h_errnop = NETDB_SUCCESS;
+ h_errno = 0;
+
+ return NSS_STATUS_SUCCESS;
+}
+
+enum nss_status _nss_myhostname_gethostbyname3_r(
+ const char *name,
+ int af,
+ struct hostent *host,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp,
+ char **canonp) {
+
+ _cleanup_free_ struct local_address *addresses = NULL;
+ const char *canonical, *additional = NULL;
+ _cleanup_free_ char *hn = NULL;
+ uint32_t local_address_ipv4 = 0;
+ int n_addresses = 0;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(name);
+ assert(host);
+ assert(buffer);
+ assert(errnop);
+ assert(h_errnop);
+
+ if (af == AF_UNSPEC)
+ af = AF_INET;
+
+ if (!IN_SET(af, AF_INET, AF_INET6)) {
+ UNPROTECT_ERRNO;
+ *errnop = EAFNOSUPPORT;
+ *h_errnop = NO_DATA;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ if (is_localhost(name)) {
+ canonical = "localhost";
+ local_address_ipv4 = htobe32(INADDR_LOOPBACK);
+
+ } else if (is_gateway_hostname(name)) {
+
+ n_addresses = local_gateways(NULL, 0, af, &addresses);
+ if (n_addresses <= 0)
+ goto not_found;
+
+ canonical = "_gateway";
+
+ } else {
+ hn = gethostname_malloc();
+ if (!hn) {
+ UNPROTECT_ERRNO;
+ *errnop = ENOMEM;
+ *h_errnop = NO_RECOVERY;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ if (!streq(name, hn) && !streq_ptr(startswith(name, hn), "."))
+ goto not_found;
+
+ n_addresses = local_addresses(NULL, 0, af, &addresses);
+ if (n_addresses < 0)
+ n_addresses = 0;
+
+ canonical = hn;
+ additional = n_addresses <= 0 && af == AF_INET6 ? "localhost" : NULL;
+ local_address_ipv4 = LOCALADDRESS_IPV4;
+ }
+
+ UNPROTECT_ERRNO;
+
+ return fill_in_hostent(
+ canonical, additional,
+ af,
+ addresses, n_addresses,
+ local_address_ipv4,
+ host,
+ buffer, buflen,
+ errnop, h_errnop,
+ ttlp,
+ canonp);
+
+not_found:
+ *h_errnop = HOST_NOT_FOUND;
+ return NSS_STATUS_NOTFOUND;
+}
+
+enum nss_status _nss_myhostname_gethostbyaddr2_r(
+ const void* addr, socklen_t len,
+ int af,
+ struct hostent *host,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp) {
+
+ const char *canonical = NULL, *additional = NULL;
+ uint32_t local_address_ipv4 = LOCALADDRESS_IPV4;
+ _cleanup_free_ struct local_address *addresses = NULL;
+ _cleanup_free_ char *hn = NULL;
+ int n_addresses = 0;
+ struct local_address *a;
+ bool additional_from_hostname = false;
+ unsigned n;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(addr);
+ assert(host);
+ assert(buffer);
+ assert(errnop);
+ assert(h_errnop);
+
+ if (!IN_SET(af, AF_INET, AF_INET6)) {
+ UNPROTECT_ERRNO;
+ *errnop = EAFNOSUPPORT;
+ *h_errnop = NO_DATA;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ if (len != FAMILY_ADDRESS_SIZE(af)) {
+ UNPROTECT_ERRNO;
+ *errnop = EINVAL;
+ *h_errnop = NO_RECOVERY;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ if (af == AF_INET) {
+ if ((*(uint32_t*) addr) == LOCALADDRESS_IPV4)
+ goto found;
+
+ if ((*(uint32_t*) addr) == htobe32(INADDR_LOOPBACK)) {
+ canonical = "localhost";
+ local_address_ipv4 = htobe32(INADDR_LOOPBACK);
+ goto found;
+ }
+
+ } else {
+ assert(af == AF_INET6);
+
+ if (memcmp(addr, LOCALADDRESS_IPV6, 16) == 0) {
+ canonical = "localhost";
+ additional_from_hostname = true;
+ goto found;
+ }
+ }
+
+ n_addresses = local_addresses(NULL, 0, AF_UNSPEC, &addresses);
+ for (a = addresses, n = 0; (int) n < n_addresses; n++, a++) {
+ if (af != a->family)
+ continue;
+
+ if (memcmp(addr, &a->address, FAMILY_ADDRESS_SIZE(af)) == 0)
+ goto found;
+ }
+
+ addresses = mfree(addresses);
+
+ n_addresses = local_gateways(NULL, 0, AF_UNSPEC, &addresses);
+ for (a = addresses, n = 0; (int) n < n_addresses; n++, a++) {
+ if (af != a->family)
+ continue;
+
+ if (memcmp(addr, &a->address, FAMILY_ADDRESS_SIZE(af)) == 0) {
+ canonical = "_gateway";
+ goto found;
+ }
+ }
+
+ *h_errnop = HOST_NOT_FOUND;
+ return NSS_STATUS_NOTFOUND;
+
+found:
+ if (!canonical || additional_from_hostname) {
+ hn = gethostname_malloc();
+ if (!hn) {
+ UNPROTECT_ERRNO;
+ *errnop = ENOMEM;
+ *h_errnop = NO_RECOVERY;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ if (!canonical)
+ canonical = hn;
+ else
+ additional = hn;
+ }
+
+ UNPROTECT_ERRNO;
+ return fill_in_hostent(
+ canonical, additional,
+ af,
+ addresses, n_addresses,
+ local_address_ipv4,
+ host,
+ buffer, buflen,
+ errnop, h_errnop,
+ ttlp,
+ NULL);
+}
+
+NSS_GETHOSTBYNAME_FALLBACKS(myhostname);
+NSS_GETHOSTBYADDR_FALLBACKS(myhostname);
diff --git a/src/nss-myhostname/nss-myhostname.sym b/src/nss-myhostname/nss-myhostname.sym
new file mode 100644
index 0000000..21ab637
--- /dev/null
+++ b/src/nss-myhostname/nss-myhostname.sym
@@ -0,0 +1,19 @@
+/***
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+***/
+
+{
+global:
+ _nss_myhostname_gethostbyname_r;
+ _nss_myhostname_gethostbyname2_r;
+ _nss_myhostname_gethostbyname3_r;
+ _nss_myhostname_gethostbyname4_r;
+ _nss_myhostname_gethostbyaddr_r;
+ _nss_myhostname_gethostbyaddr2_r;
+local: *;
+};
diff --git a/src/nss-mymachines/nss-mymachines.c b/src/nss-mymachines/nss-mymachines.c
new file mode 100644
index 0000000..53f0492
--- /dev/null
+++ b/src/nss-mymachines/nss-mymachines.c
@@ -0,0 +1,429 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netdb.h>
+#include <nss.h>
+
+#include "sd-bus.h"
+#include "sd-login.h"
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-locator.h"
+#include "env-util.h"
+#include "errno-util.h"
+#include "format-util.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "nss-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+
+NSS_GETHOSTBYNAME_PROTOTYPES(mymachines);
+NSS_GETPW_PROTOTYPES(mymachines);
+NSS_GETGR_PROTOTYPES(mymachines);
+
+static int count_addresses(sd_bus_message *m, int af, unsigned *ret) {
+ unsigned c = 0;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ while ((r = sd_bus_message_enter_container(m, 'r', "iay")) > 0) {
+ int family;
+
+ r = sd_bus_message_read(m, "i", &family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_skip(m, "ay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ if (af != AF_UNSPEC && family != af)
+ continue;
+
+ c++;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_rewind(m, false);
+ if (r < 0)
+ return r;
+
+ *ret = c;
+ return 0;
+}
+
+static bool avoid_deadlock(void) {
+
+ /* Check whether this lookup might have a chance of deadlocking because we are called from the service manager
+ * code activating systemd-machined.service. After all, we shouldn't synchronously do lookups to
+ * systemd-machined if we are required to finish before it can be started. This of course won't detect all
+ * possible dead locks of this kind, but it should work for the most obvious cases. */
+
+ if (geteuid() != 0) /* Ignore the env vars unless we are privileged. */
+ return false;
+
+ return streq_ptr(getenv("SYSTEMD_ACTIVATION_UNIT"), "systemd-machined.service") &&
+ streq_ptr(getenv("SYSTEMD_ACTIVATION_SCOPE"), "system");
+}
+
+enum nss_status _nss_mymachines_gethostbyname4_r(
+ const char *name,
+ struct gaih_addrtuple **pat,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp) {
+
+ struct gaih_addrtuple *r_tuple, *r_tuple_first = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ int *ifindices = NULL;
+ _cleanup_free_ char *class = NULL;
+ size_t l, ms, idx;
+ unsigned i = 0, c = 0;
+ char *r_name;
+ int n_ifindices, r;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(name);
+ assert(pat);
+ assert(buffer);
+ assert(errnop);
+ assert(h_errnop);
+
+ r = sd_machine_get_class(name, &class);
+ if (r < 0)
+ goto fail;
+ if (!streq(class, "container")) {
+ r = -ENOTTY;
+ goto fail;
+ }
+
+ n_ifindices = sd_machine_get_ifindices(name, &ifindices);
+ if (n_ifindices < 0) {
+ r = n_ifindices;
+ goto fail;
+ }
+
+ if (avoid_deadlock()) {
+ r = -EDEADLK;
+ goto fail;
+ }
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ goto fail;
+
+ r = bus_call_method(bus, bus_machine_mgr, "GetMachineAddresses", NULL, &reply, "s", name);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_enter_container(reply, 'a', "(iay)");
+ if (r < 0)
+ goto fail;
+
+ r = count_addresses(reply, AF_UNSPEC, &c);
+ if (r < 0)
+ goto fail;
+
+ if (c <= 0) {
+ *h_errnop = HOST_NOT_FOUND;
+ return NSS_STATUS_NOTFOUND;
+ }
+
+ l = strlen(name);
+ ms = ALIGN(l+1) + ALIGN(sizeof(struct gaih_addrtuple)) * c;
+ if (buflen < ms) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ *h_errnop = NETDB_INTERNAL;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ /* First, append name */
+ r_name = buffer;
+ memcpy(r_name, name, l+1);
+ idx = ALIGN(l+1);
+
+ /* Second, append addresses */
+ r_tuple_first = (struct gaih_addrtuple*) (buffer + idx);
+ while ((r = sd_bus_message_enter_container(reply, 'r', "iay")) > 0) {
+ int family;
+ const void *a;
+ size_t sz;
+
+ r = sd_bus_message_read(reply, "i", &family);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_read_array(reply, 'y', &a, &sz);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ goto fail;
+
+ if (!IN_SET(family, AF_INET, AF_INET6)) {
+ r = -EAFNOSUPPORT;
+ goto fail;
+ }
+
+ if (sz != FAMILY_ADDRESS_SIZE(family)) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ r_tuple = (struct gaih_addrtuple*) (buffer + idx);
+ r_tuple->next = i == c-1 ? NULL : (struct gaih_addrtuple*) ((char*) r_tuple + ALIGN(sizeof(struct gaih_addrtuple)));
+ r_tuple->name = r_name;
+ r_tuple->family = family;
+ r_tuple->scopeid = n_ifindices == 1 ? ifindices[0] : 0;
+ memcpy(r_tuple->addr, a, sz);
+
+ idx += ALIGN(sizeof(struct gaih_addrtuple));
+ i++;
+ }
+
+ assert(i == c);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ goto fail;
+
+ assert(idx == ms);
+
+ if (*pat)
+ **pat = *r_tuple_first;
+ else
+ *pat = r_tuple_first;
+
+ if (ttlp)
+ *ttlp = 0;
+
+ /* Explicitly reset both *h_errnop and h_errno to work around
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1125975 */
+ *h_errnop = NETDB_SUCCESS;
+ h_errno = 0;
+
+ return NSS_STATUS_SUCCESS;
+
+fail:
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ *h_errnop = NO_RECOVERY;
+ return NSS_STATUS_UNAVAIL;
+}
+
+enum nss_status _nss_mymachines_gethostbyname3_r(
+ const char *name,
+ int af,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp,
+ char **canonp) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *class = NULL;
+ unsigned c = 0, i = 0;
+ char *r_name, *r_aliases, *r_addr, *r_addr_list;
+ size_t l, idx, ms, alen;
+ int r;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(name);
+ assert(result);
+ assert(buffer);
+ assert(errnop);
+ assert(h_errnop);
+
+ if (af == AF_UNSPEC)
+ af = AF_INET;
+
+ if (af != AF_INET && af != AF_INET6) {
+ r = -EAFNOSUPPORT;
+ goto fail;
+ }
+
+ r = sd_machine_get_class(name, &class);
+ if (r < 0)
+ goto fail;
+ if (!streq(class, "container")) {
+ r = -ENOTTY;
+ goto fail;
+ }
+
+ if (avoid_deadlock()) {
+ r = -EDEADLK;
+ goto fail;
+ }
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ goto fail;
+
+ r = bus_call_method(bus, bus_machine_mgr, "GetMachineAddresses", NULL, &reply, "s", name);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_enter_container(reply, 'a', "(iay)");
+ if (r < 0)
+ goto fail;
+
+ r = count_addresses(reply, af, &c);
+ if (r < 0)
+ goto fail;
+
+ if (c <= 0) {
+ *h_errnop = HOST_NOT_FOUND;
+ return NSS_STATUS_NOTFOUND;
+ }
+
+ alen = FAMILY_ADDRESS_SIZE(af);
+ l = strlen(name);
+
+ ms = ALIGN(l+1) + c * ALIGN(alen) + (c+2) * sizeof(char*);
+
+ if (buflen < ms) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ *h_errnop = NETDB_INTERNAL;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ /* First, append name */
+ r_name = buffer;
+ memcpy(r_name, name, l+1);
+ idx = ALIGN(l+1);
+
+ /* Second, create aliases array */
+ r_aliases = buffer + idx;
+ ((char**) r_aliases)[0] = NULL;
+ idx += sizeof(char*);
+
+ /* Third, append addresses */
+ r_addr = buffer + idx;
+ while ((r = sd_bus_message_enter_container(reply, 'r', "iay")) > 0) {
+ int family;
+ const void *a;
+ size_t sz;
+
+ r = sd_bus_message_read(reply, "i", &family);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_read_array(reply, 'y', &a, &sz);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ goto fail;
+
+ if (family != af)
+ continue;
+
+ if (sz != alen) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ memcpy(r_addr + i*ALIGN(alen), a, alen);
+ i++;
+ }
+
+ assert(i == c);
+ idx += c * ALIGN(alen);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ goto fail;
+
+ /* Third, append address pointer array */
+ r_addr_list = buffer + idx;
+ for (i = 0; i < c; i++)
+ ((char**) r_addr_list)[i] = r_addr + i*ALIGN(alen);
+
+ ((char**) r_addr_list)[i] = NULL;
+ idx += (c+1) * sizeof(char*);
+
+ assert(idx == ms);
+
+ result->h_name = r_name;
+ result->h_aliases = (char**) r_aliases;
+ result->h_addrtype = af;
+ result->h_length = alen;
+ result->h_addr_list = (char**) r_addr_list;
+
+ if (ttlp)
+ *ttlp = 0;
+
+ if (canonp)
+ *canonp = r_name;
+
+ /* Explicitly reset both *h_errnop and h_errno to work around
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1125975 */
+ *h_errnop = NETDB_SUCCESS;
+ h_errno = 0;
+
+ return NSS_STATUS_SUCCESS;
+
+fail:
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ *h_errnop = NO_RECOVERY;
+ return NSS_STATUS_UNAVAIL;
+}
+
+NSS_GETHOSTBYNAME_FALLBACKS(mymachines);
+
+enum nss_status _nss_mymachines_getpwnam_r(
+ const char *name,
+ struct passwd *pwd,
+ char *buffer, size_t buflen,
+ int *errnop) {
+
+ return NSS_STATUS_NOTFOUND;
+}
+
+enum nss_status _nss_mymachines_getpwuid_r(
+ uid_t uid,
+ struct passwd *pwd,
+ char *buffer, size_t buflen,
+ int *errnop) {
+
+ return NSS_STATUS_NOTFOUND;
+}
+
+enum nss_status _nss_mymachines_getgrnam_r(
+ const char *name,
+ struct group *gr,
+ char *buffer, size_t buflen,
+ int *errnop) {
+
+ return NSS_STATUS_NOTFOUND;
+}
+
+enum nss_status _nss_mymachines_getgrgid_r(
+ gid_t gid,
+ struct group *gr,
+ char *buffer, size_t buflen,
+ int *errnop) {
+
+ return NSS_STATUS_NOTFOUND;
+}
diff --git a/src/nss-mymachines/nss-mymachines.sym b/src/nss-mymachines/nss-mymachines.sym
new file mode 100644
index 0000000..258244e
--- /dev/null
+++ b/src/nss-mymachines/nss-mymachines.sym
@@ -0,0 +1,21 @@
+/***
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+***/
+
+{
+global:
+ _nss_mymachines_gethostbyname_r;
+ _nss_mymachines_gethostbyname2_r;
+ _nss_mymachines_gethostbyname3_r;
+ _nss_mymachines_gethostbyname4_r;
+ _nss_mymachines_getpwnam_r;
+ _nss_mymachines_getpwuid_r;
+ _nss_mymachines_getgrnam_r;
+ _nss_mymachines_getgrgid_r;
+local: *;
+};
diff --git a/src/nss-resolve/nss-resolve.c b/src/nss-resolve/nss-resolve.c
new file mode 100644
index 0000000..3fee4f5
--- /dev/null
+++ b/src/nss-resolve/nss-resolve.c
@@ -0,0 +1,664 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <netdb.h>
+#include <nss.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "errno-util.h"
+#include "in-addr-util.h"
+#include "macro.h"
+#include "nss-util.h"
+#include "resolved-def.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "varlink.h"
+
+NSS_GETHOSTBYNAME_PROTOTYPES(resolve);
+NSS_GETHOSTBYADDR_PROTOTYPES(resolve);
+
+static bool error_shall_fallback(const char *error_id) {
+ return STR_IN_SET(error_id,
+ VARLINK_ERROR_DISCONNECTED,
+ VARLINK_ERROR_TIMEOUT,
+ VARLINK_ERROR_PROTOCOL,
+ VARLINK_ERROR_INTERFACE_NOT_FOUND,
+ VARLINK_ERROR_METHOD_NOT_FOUND,
+ VARLINK_ERROR_METHOD_NOT_IMPLEMENTED);
+}
+
+static int connect_to_resolved(Varlink **ret) {
+ _cleanup_(varlink_unrefp) Varlink *link = NULL;
+ int r;
+
+ r = varlink_connect_address(&link, "/run/systemd/resolve/io.systemd.Resolve");
+ if (r < 0)
+ return r;
+
+ r = varlink_set_relative_timeout(link, SD_RESOLVED_QUERY_TIMEOUT_USEC);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(link);
+ return 0;
+}
+
+static uint32_t ifindex_to_scopeid(int family, const void *a, int ifindex) {
+ struct in6_addr in6;
+
+ if (family != AF_INET6)
+ return 0;
+
+ /* Some apps can't deal with the scope ID attached to non-link-local addresses. Hence, let's suppress that. */
+
+ assert(sizeof(in6) == FAMILY_ADDRESS_SIZE(AF_INET6));
+ memcpy(&in6, a, sizeof(struct in6_addr));
+
+ return IN6_IS_ADDR_LINKLOCAL(&in6) ? ifindex : 0;
+}
+
+static int json_dispatch_ifindex(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ int *ifi = userdata;
+ intmax_t t;
+
+ assert(variant);
+ assert(ifi);
+
+ if (!json_variant_is_integer(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an integer.", strna(name));
+
+ t = json_variant_integer(variant);
+ if (t <= 0 || t > INT_MAX)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is out of bounds for an interface index.", strna(name));
+
+ *ifi = (int) t;
+ return 0;
+}
+
+static int json_dispatch_family(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ int *family = userdata;
+ intmax_t t;
+
+ assert(variant);
+ assert(family);
+
+ if (!json_variant_is_integer(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an integer.", strna(name));
+
+ t = json_variant_integer(variant);
+ if (t < 0 || t > INT_MAX)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid family.", strna(name));
+
+ *family = (int) t;
+ return 0;
+}
+
+typedef struct ResolveHostnameReply {
+ JsonVariant *addresses;
+ char *name;
+ uint64_t flags;
+} ResolveHostnameReply;
+
+static void resolve_hostname_reply_destroy(ResolveHostnameReply *p) {
+ assert(p);
+
+ json_variant_unref(p->addresses);
+ free(p->name);
+}
+
+static const JsonDispatch resolve_hostname_reply_dispatch_table[] = {
+ { "addresses", JSON_VARIANT_ARRAY, json_dispatch_variant, offsetof(ResolveHostnameReply, addresses), JSON_MANDATORY },
+ { "name", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ResolveHostnameReply, name), 0 },
+ { "flags", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(ResolveHostnameReply, flags), 0 },
+ {}
+};
+
+typedef struct AddressParameters {
+ int ifindex;
+ int family;
+ union in_addr_union address;
+ size_t address_size;
+} AddressParameters;
+
+static int json_dispatch_address(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ AddressParameters *p = userdata;
+ union in_addr_union buf = {};
+ JsonVariant *i;
+ size_t n, k = 0;
+
+ assert(variant);
+ assert(p);
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array.", strna(name));
+
+ n = json_variant_elements(variant);
+ if (!IN_SET(n, 4, 16))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is array of unexpected size.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(i, variant) {
+ intmax_t b;
+
+ if (!json_variant_is_integer(i))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Element %zu of JSON field '%s' is not an integer.", k, strna(name));
+
+ b = json_variant_integer(i);
+ if (b < 0 || b > 0xff)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Element %zu of JSON field '%s' is out of range 0…255.", k, strna(name));
+
+ buf.bytes[k++] = (uint8_t) b;
+ }
+
+ p->address = buf;
+ p->address_size = k;
+
+ return 0;
+}
+
+static const JsonDispatch address_parameters_dispatch_table[] = {
+ { "ifindex", JSON_VARIANT_INTEGER, json_dispatch_ifindex, offsetof(AddressParameters, ifindex), 0 },
+ { "family", JSON_VARIANT_INTEGER, json_dispatch_family, offsetof(AddressParameters, family), JSON_MANDATORY },
+ { "address", JSON_VARIANT_ARRAY, json_dispatch_address, 0, JSON_MANDATORY },
+ {}
+};
+
+enum nss_status _nss_resolve_gethostbyname4_r(
+ const char *name,
+ struct gaih_addrtuple **pat,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp) {
+
+ _cleanup_(resolve_hostname_reply_destroy) ResolveHostnameReply p = {};
+ _cleanup_(json_variant_unrefp) JsonVariant *cparams = NULL;
+ struct gaih_addrtuple *r_tuple = NULL, *r_tuple_first = NULL;
+ _cleanup_(varlink_unrefp) Varlink *link = NULL;
+ const char *canonical = NULL, *error_id = NULL;
+ JsonVariant *entry, *rparams;
+ size_t l, ms, idx, c = 0;
+ char *r_name;
+ int r;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(name);
+ assert(pat);
+ assert(buffer);
+ assert(errnop);
+ assert(h_errnop);
+
+ r = connect_to_resolved(&link);
+ if (r < 0)
+ goto fail;
+
+ r = json_build(&cparams, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("name", JSON_BUILD_STRING(name))));
+ if (r < 0)
+ goto fail;
+
+ /* Return NSS_STATUS_UNAVAIL when communication with systemd-resolved fails, allowing falling
+ * back to other nss modules. Treat all other error conditions as NOTFOUND. This includes
+ * DNSSEC errors and suchlike. (We don't use UNAVAIL in this case so that the nsswitch.conf
+ * configuration can distinguish such executed but negative replies from complete failure to
+ * talk to resolved). */
+ r = varlink_call(link, "io.systemd.Resolve.ResolveHostname", cparams, &rparams, &error_id, NULL);
+ if (r < 0)
+ goto fail;
+ if (!isempty(error_id)) {
+ if (!error_shall_fallback(error_id))
+ goto not_found;
+ goto fail;
+ }
+
+ r = json_dispatch(rparams, resolve_hostname_reply_dispatch_table, NULL, 0, &p);
+ if (r < 0)
+ goto fail;
+ if (json_variant_is_blank_object(p.addresses))
+ goto not_found;
+
+ JSON_VARIANT_ARRAY_FOREACH(entry, p.addresses) {
+ AddressParameters q = {};
+
+ r = json_dispatch(entry, address_parameters_dispatch_table, NULL, 0, &q);
+ if (r < 0)
+ goto fail;
+
+ if (!IN_SET(q.family, AF_INET, AF_INET6))
+ continue;
+
+ if (q.address_size != FAMILY_ADDRESS_SIZE(q.family)) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ c++;
+ }
+
+ canonical = p.name ?: name;
+
+ l = strlen(canonical);
+ ms = ALIGN(l+1) + ALIGN(sizeof(struct gaih_addrtuple)) * c;
+ if (buflen < ms) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ *h_errnop = NETDB_INTERNAL;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ /* First, append name */
+ r_name = buffer;
+ memcpy(r_name, canonical, l+1);
+ idx = ALIGN(l+1);
+
+ /* Second, append addresses */
+ r_tuple_first = (struct gaih_addrtuple*) (buffer + idx);
+
+ JSON_VARIANT_ARRAY_FOREACH(entry, p.addresses) {
+ AddressParameters q = {};
+
+ r = json_dispatch(entry, address_parameters_dispatch_table, NULL, 0, &q);
+ if (r < 0)
+ goto fail;
+
+ if (!IN_SET(q.family, AF_INET, AF_INET6))
+ continue;
+
+ r_tuple = (struct gaih_addrtuple*) (buffer + idx);
+ r_tuple->next = (struct gaih_addrtuple*) ((char*) r_tuple + ALIGN(sizeof(struct gaih_addrtuple)));
+ r_tuple->name = r_name;
+ r_tuple->family = q.family;
+ r_tuple->scopeid = ifindex_to_scopeid(q.family, &q.address, q.ifindex);
+ memcpy(r_tuple->addr, &q.address, q.address_size);
+
+ idx += ALIGN(sizeof(struct gaih_addrtuple));
+ }
+
+ assert(r_tuple);
+ r_tuple->next = NULL; /* Override last next pointer */
+
+ assert(idx == ms);
+
+ if (*pat)
+ **pat = *r_tuple_first;
+ else
+ *pat = r_tuple_first;
+
+ if (ttlp)
+ *ttlp = 0;
+
+ /* Explicitly reset both *h_errnop and h_errno to work around
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1125975 */
+ *h_errnop = NETDB_SUCCESS;
+ h_errno = 0;
+
+ return NSS_STATUS_SUCCESS;
+
+fail:
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ *h_errnop = NO_RECOVERY;
+ return NSS_STATUS_UNAVAIL;
+
+not_found:
+ *h_errnop = HOST_NOT_FOUND;
+ return NSS_STATUS_NOTFOUND;
+}
+
+enum nss_status _nss_resolve_gethostbyname3_r(
+ const char *name,
+ int af,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp,
+ char **canonp) {
+
+ _cleanup_(resolve_hostname_reply_destroy) ResolveHostnameReply p = {};
+ _cleanup_(json_variant_unrefp) JsonVariant *cparams = NULL;
+ char *r_name, *r_aliases, *r_addr, *r_addr_list;
+ _cleanup_(varlink_unrefp) Varlink *link = NULL;
+ const char *canonical, *error_id = NULL;
+ size_t l, idx, ms, alen, i = 0, c = 0;
+ JsonVariant *entry, *rparams;
+ int r;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(name);
+ assert(result);
+ assert(buffer);
+ assert(errnop);
+ assert(h_errnop);
+
+ if (af == AF_UNSPEC)
+ af = AF_INET;
+
+ if (!IN_SET(af, AF_INET, AF_INET6)) {
+ r = -EAFNOSUPPORT;
+ goto fail;
+ }
+
+ r = connect_to_resolved(&link);
+ if (r < 0)
+ goto fail;
+
+ r = json_build(&cparams, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("name", JSON_BUILD_STRING(name)),
+ JSON_BUILD_PAIR("family", JSON_BUILD_INTEGER(af))));
+ if (r < 0)
+ goto fail;
+
+ r = varlink_call(link, "io.systemd.Resolve.ResolveHostname", cparams, &rparams, &error_id, NULL);
+ if (r < 0)
+ goto fail;
+ if (!isempty(error_id)) {
+ if (!error_shall_fallback(error_id))
+ goto not_found;
+ goto fail;
+ }
+
+ r = json_dispatch(rparams, resolve_hostname_reply_dispatch_table, NULL, 0, &p);
+ if (r < 0)
+ goto fail;
+ if (json_variant_is_blank_object(p.addresses))
+ goto not_found;
+
+ JSON_VARIANT_ARRAY_FOREACH(entry, p.addresses) {
+ AddressParameters q = {};
+
+ r = json_dispatch(entry, address_parameters_dispatch_table, NULL, 0, &q);
+ if (r < 0)
+ goto fail;
+
+ if (!IN_SET(q.family, AF_INET, AF_INET6))
+ continue;
+
+ if (q.address_size != FAMILY_ADDRESS_SIZE(q.family)) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ c++;
+ }
+
+ canonical = p.name ?: name;
+
+ alen = FAMILY_ADDRESS_SIZE(af);
+ l = strlen(canonical);
+
+ ms = ALIGN(l+1) + c*ALIGN(alen) + (c+2) * sizeof(char*);
+
+ if (buflen < ms) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ *h_errnop = NETDB_INTERNAL;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ /* First, append name */
+ r_name = buffer;
+ memcpy(r_name, canonical, l+1);
+ idx = ALIGN(l+1);
+
+ /* Second, create empty aliases array */
+ r_aliases = buffer + idx;
+ ((char**) r_aliases)[0] = NULL;
+ idx += sizeof(char*);
+
+ /* Third, append addresses */
+ r_addr = buffer + idx;
+
+ JSON_VARIANT_ARRAY_FOREACH(entry, p.addresses) {
+ AddressParameters q = {};
+
+ r = json_dispatch(entry, address_parameters_dispatch_table, NULL, 0, &q);
+ if (r < 0)
+ goto fail;
+
+ if (q.family != af)
+ continue;
+
+ if (q.address_size != alen) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ memcpy(r_addr + i*ALIGN(alen), &q.address, alen);
+ i++;
+ }
+
+ assert(i == c);
+ idx += c * ALIGN(alen);
+
+ /* Fourth, append address pointer array */
+ r_addr_list = buffer + idx;
+ for (i = 0; i < c; i++)
+ ((char**) r_addr_list)[i] = r_addr + i*ALIGN(alen);
+
+ ((char**) r_addr_list)[i] = NULL;
+ idx += (c+1) * sizeof(char*);
+
+ assert(idx == ms);
+
+ result->h_name = r_name;
+ result->h_aliases = (char**) r_aliases;
+ result->h_addrtype = af;
+ result->h_length = alen;
+ result->h_addr_list = (char**) r_addr_list;
+
+ if (ttlp)
+ *ttlp = 0;
+
+ if (canonp)
+ *canonp = r_name;
+
+ /* Explicitly reset both *h_errnop and h_errno to work around
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1125975 */
+ *h_errnop = NETDB_SUCCESS;
+ h_errno = 0;
+
+ return NSS_STATUS_SUCCESS;
+
+fail:
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ *h_errnop = NO_RECOVERY;
+ return NSS_STATUS_UNAVAIL;
+
+not_found:
+ *h_errnop = HOST_NOT_FOUND;
+ return NSS_STATUS_NOTFOUND;
+}
+
+typedef struct ResolveAddressReply {
+ JsonVariant *names;
+ uint64_t flags;
+} ResolveAddressReply;
+
+static void resolve_address_reply_destroy(ResolveAddressReply *p) {
+ assert(p);
+
+ json_variant_unref(p->names);
+}
+
+static const JsonDispatch resolve_address_reply_dispatch_table[] = {
+ { "names", JSON_VARIANT_ARRAY, json_dispatch_variant, offsetof(ResolveAddressReply, names), JSON_MANDATORY },
+ { "flags", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(ResolveAddressReply, flags), 0 },
+ {}
+};
+
+typedef struct NameParameters {
+ int ifindex;
+ char *name;
+} NameParameters;
+
+static void name_parameters_destroy(NameParameters *p) {
+ assert(p);
+
+ free(p->name);
+}
+
+static const JsonDispatch name_parameters_dispatch_table[] = {
+ { "ifindex", JSON_VARIANT_INTEGER, json_dispatch_ifindex, offsetof(NameParameters, ifindex), 0 },
+ { "name", JSON_VARIANT_UNSIGNED, json_dispatch_string, offsetof(NameParameters, name), JSON_MANDATORY },
+ {}
+};
+
+enum nss_status _nss_resolve_gethostbyaddr2_r(
+ const void* addr, socklen_t len,
+ int af,
+ struct hostent *result,
+ char *buffer, size_t buflen,
+ int *errnop, int *h_errnop,
+ int32_t *ttlp) {
+
+ _cleanup_(resolve_address_reply_destroy) ResolveAddressReply p = {};
+ _cleanup_(json_variant_unrefp) JsonVariant *cparams = NULL;
+ char *r_name, *r_aliases, *r_addr, *r_addr_list;
+ _cleanup_(varlink_unrefp) Varlink *link = NULL;
+ JsonVariant *entry, *rparams;
+ const char *n, *error_id;
+ unsigned c = 0, i = 0;
+ size_t ms = 0, idx;
+ int r;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(addr);
+ assert(result);
+ assert(buffer);
+ assert(errnop);
+ assert(h_errnop);
+
+ if (!IN_SET(af, AF_INET, AF_INET6)) {
+ UNPROTECT_ERRNO;
+ *errnop = EAFNOSUPPORT;
+ *h_errnop = NO_DATA;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ if (len != FAMILY_ADDRESS_SIZE(af)) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ r = connect_to_resolved(&link);
+ if (r < 0)
+ goto fail;
+
+ r = json_build(&cparams, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("address", JSON_BUILD_BYTE_ARRAY(addr, len)),
+ JSON_BUILD_PAIR("family", JSON_BUILD_INTEGER(af))));
+ if (r < 0)
+ goto fail;
+
+ r = varlink_call(link, "io.systemd.Resolve.ResolveAddress", cparams, &rparams, &error_id, NULL);
+ if (r < 0)
+ goto fail;
+ if (!isempty(error_id)) {
+ if (!error_shall_fallback(error_id))
+ goto not_found;
+ goto fail;
+ }
+
+ r = json_dispatch(rparams, resolve_address_reply_dispatch_table, NULL, 0, &p);
+ if (r < 0)
+ goto fail;
+ if (json_variant_is_blank_object(p.names))
+ goto not_found;
+
+ JSON_VARIANT_ARRAY_FOREACH(entry, p.names) {
+ _cleanup_(name_parameters_destroy) NameParameters q = {};
+
+ r = json_dispatch(entry, name_parameters_dispatch_table, NULL, 0, &q);
+ if (r < 0)
+ goto fail;
+
+ ms += ALIGN(strlen(q.name) + 1);
+ }
+
+ ms += ALIGN(len) + /* the address */
+ 2 * sizeof(char*) + /* pointers to the address, plus trailing NULL */
+ json_variant_elements(p.names) * sizeof(char*); /* pointers to aliases, plus trailing NULL */
+
+ if (buflen < ms) {
+ UNPROTECT_ERRNO;
+ *errnop = ERANGE;
+ *h_errnop = NETDB_INTERNAL;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ /* First, place address */
+ r_addr = buffer;
+ memcpy(r_addr, addr, len);
+ idx = ALIGN(len);
+
+ /* Second, place address list */
+ r_addr_list = buffer + idx;
+ ((char**) r_addr_list)[0] = r_addr;
+ ((char**) r_addr_list)[1] = NULL;
+ idx += sizeof(char*) * 2;
+
+ /* Third, reserve space for the aliases array */
+ r_aliases = buffer + idx;
+ idx += sizeof(char*) * c;
+
+ /* Fourth, place aliases */
+ i = 0;
+ r_name = buffer + idx;
+ JSON_VARIANT_ARRAY_FOREACH(entry, p.names) {
+ _cleanup_(name_parameters_destroy) NameParameters q = {};
+ size_t l;
+ char *z;
+
+ r = json_dispatch(entry, name_parameters_dispatch_table, NULL, 0, &q);
+ if (r < 0)
+ goto fail;
+
+ l = strlen(q.name);
+ z = buffer + idx;
+ memcpy(z, n, l+1);
+
+ if (i > 0)
+ ((char**) r_aliases)[i-1] = z;
+ i++;
+
+ idx += ALIGN(l+1);
+ }
+
+ ((char**) r_aliases)[c-1] = NULL;
+ assert(idx == ms);
+
+ result->h_name = r_name;
+ result->h_aliases = (char**) r_aliases;
+ result->h_addrtype = af;
+ result->h_length = len;
+ result->h_addr_list = (char**) r_addr_list;
+
+ if (ttlp)
+ *ttlp = 0;
+
+ /* Explicitly reset both *h_errnop and h_errno to work around
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1125975 */
+ *h_errnop = NETDB_SUCCESS;
+ h_errno = 0;
+
+ return NSS_STATUS_SUCCESS;
+
+fail:
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ *h_errnop = NO_RECOVERY;
+ return NSS_STATUS_UNAVAIL;
+
+not_found:
+ *h_errnop = HOST_NOT_FOUND;
+ return NSS_STATUS_NOTFOUND;
+}
+
+NSS_GETHOSTBYNAME_FALLBACKS(resolve);
+NSS_GETHOSTBYADDR_FALLBACKS(resolve);
diff --git a/src/nss-resolve/nss-resolve.sym b/src/nss-resolve/nss-resolve.sym
new file mode 100644
index 0000000..ecc958e
--- /dev/null
+++ b/src/nss-resolve/nss-resolve.sym
@@ -0,0 +1,19 @@
+/***
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+***/
+
+{
+global:
+ _nss_resolve_gethostbyname_r;
+ _nss_resolve_gethostbyname2_r;
+ _nss_resolve_gethostbyname3_r;
+ _nss_resolve_gethostbyname4_r;
+ _nss_resolve_gethostbyaddr_r;
+ _nss_resolve_gethostbyaddr2_r;
+local: *;
+};
diff --git a/src/nss-systemd/nss-systemd.c b/src/nss-systemd/nss-systemd.c
new file mode 100644
index 0000000..758f381
--- /dev/null
+++ b/src/nss-systemd/nss-systemd.c
@@ -0,0 +1,640 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <nss.h>
+#include <pthread.h>
+
+#include "env-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "nss-systemd.h"
+#include "nss-util.h"
+#include "pthread-util.h"
+#include "signal-util.h"
+#include "strv.h"
+#include "user-record-nss.h"
+#include "user-util.h"
+#include "userdb-glue.h"
+#include "userdb.h"
+
+static const struct passwd root_passwd = {
+ .pw_name = (char*) "root",
+ .pw_passwd = (char*) "x", /* see shadow file */
+ .pw_uid = 0,
+ .pw_gid = 0,
+ .pw_gecos = (char*) "Super User",
+ .pw_dir = (char*) "/root",
+ .pw_shell = (char*) "/bin/sh",
+};
+
+static const struct passwd nobody_passwd = {
+ .pw_name = (char*) NOBODY_USER_NAME,
+ .pw_passwd = (char*) "*", /* locked */
+ .pw_uid = UID_NOBODY,
+ .pw_gid = GID_NOBODY,
+ .pw_gecos = (char*) "User Nobody",
+ .pw_dir = (char*) "/",
+ .pw_shell = (char*) NOLOGIN,
+};
+
+static const struct group root_group = {
+ .gr_name = (char*) "root",
+ .gr_gid = 0,
+ .gr_passwd = (char*) "x", /* see shadow file */
+ .gr_mem = (char*[]) { NULL },
+};
+
+static const struct group nobody_group = {
+ .gr_name = (char*) NOBODY_GROUP_NAME,
+ .gr_gid = GID_NOBODY,
+ .gr_passwd = (char*) "*", /* locked */
+ .gr_mem = (char*[]) { NULL },
+};
+
+typedef struct GetentData {
+ /* As explained in NOTES section of getpwent_r(3) as 'getpwent_r() is not really reentrant since it
+ * shares the reading position in the stream with all other threads', we need to protect the data in
+ * UserDBIterator from multithreaded programs which may call setpwent(), getpwent_r(), or endpwent()
+ * simultaneously. So, each function locks the data by using the mutex below. */
+ pthread_mutex_t mutex;
+ UserDBIterator *iterator;
+
+ /* Applies to group iterations only: true while we iterate over groups defined through NSS, false
+ * otherwise. */
+ bool by_membership;
+} GetentData;
+
+static GetentData getpwent_data = {
+ .mutex = PTHREAD_MUTEX_INITIALIZER
+};
+
+static GetentData getgrent_data = {
+ .mutex = PTHREAD_MUTEX_INITIALIZER
+};
+
+NSS_GETPW_PROTOTYPES(systemd);
+NSS_GETGR_PROTOTYPES(systemd);
+NSS_PWENT_PROTOTYPES(systemd);
+NSS_GRENT_PROTOTYPES(systemd);
+NSS_INITGROUPS_PROTOTYPE(systemd);
+
+enum nss_status _nss_systemd_getpwnam_r(
+ const char *name,
+ struct passwd *pwd,
+ char *buffer, size_t buflen,
+ int *errnop) {
+
+ enum nss_status status;
+ int e;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(name);
+ assert(pwd);
+ assert(errnop);
+
+ /* If the username is not valid, then we don't know it. Ideally libc would filter these for us
+ * anyway. We don't generate EINVAL here, because it isn't really out business to complain about
+ * invalid user names. */
+ if (!valid_user_group_name(name, VALID_USER_RELAX))
+ return NSS_STATUS_NOTFOUND;
+
+ /* Synthesize entries for the root and nobody users, in case they are missing in /etc/passwd */
+ if (getenv_bool_secure("SYSTEMD_NSS_BYPASS_SYNTHETIC") <= 0) {
+
+ if (streq(name, root_passwd.pw_name)) {
+ *pwd = root_passwd;
+ return NSS_STATUS_SUCCESS;
+ }
+
+ if (streq(name, nobody_passwd.pw_name)) {
+ if (!synthesize_nobody())
+ return NSS_STATUS_NOTFOUND;
+
+ *pwd = nobody_passwd;
+ return NSS_STATUS_SUCCESS;
+ }
+
+ } else if (STR_IN_SET(name, root_passwd.pw_name, nobody_passwd.pw_name))
+ return NSS_STATUS_NOTFOUND;
+
+ status = userdb_getpwnam(name, pwd, buffer, buflen, &e);
+ if (IN_SET(status, NSS_STATUS_UNAVAIL, NSS_STATUS_TRYAGAIN)) {
+ UNPROTECT_ERRNO;
+ *errnop = e;
+ return status;
+ }
+
+ return status;
+}
+
+enum nss_status _nss_systemd_getpwuid_r(
+ uid_t uid,
+ struct passwd *pwd,
+ char *buffer, size_t buflen,
+ int *errnop) {
+
+ enum nss_status status;
+ int e;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(pwd);
+ assert(errnop);
+
+ if (!uid_is_valid(uid))
+ return NSS_STATUS_NOTFOUND;
+
+ /* Synthesize data for the root user and for nobody in case they are missing from /etc/passwd */
+ if (getenv_bool_secure("SYSTEMD_NSS_BYPASS_SYNTHETIC") <= 0) {
+
+ if (uid == root_passwd.pw_uid) {
+ *pwd = root_passwd;
+ return NSS_STATUS_SUCCESS;
+ }
+
+ if (uid == nobody_passwd.pw_uid) {
+ if (!synthesize_nobody())
+ return NSS_STATUS_NOTFOUND;
+
+ *pwd = nobody_passwd;
+ return NSS_STATUS_SUCCESS;
+ }
+
+ } else if (uid == root_passwd.pw_uid || uid == nobody_passwd.pw_uid)
+ return NSS_STATUS_NOTFOUND;
+
+ status = userdb_getpwuid(uid, pwd, buffer, buflen, &e);
+ if (IN_SET(status, NSS_STATUS_UNAVAIL, NSS_STATUS_TRYAGAIN)) {
+ UNPROTECT_ERRNO;
+ *errnop = e;
+ return status;
+ }
+
+ return status;
+}
+
+#pragma GCC diagnostic ignored "-Wsizeof-pointer-memaccess"
+
+enum nss_status _nss_systemd_getgrnam_r(
+ const char *name,
+ struct group *gr,
+ char *buffer, size_t buflen,
+ int *errnop) {
+
+ enum nss_status status;
+ int e;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(name);
+ assert(gr);
+ assert(errnop);
+
+ if (!valid_user_group_name(name, VALID_USER_RELAX))
+ return NSS_STATUS_NOTFOUND;
+
+ /* Synthesize records for root and nobody, in case they are missing from /etc/group */
+ if (getenv_bool_secure("SYSTEMD_NSS_BYPASS_SYNTHETIC") <= 0) {
+
+ if (streq(name, root_group.gr_name)) {
+ *gr = root_group;
+ return NSS_STATUS_SUCCESS;
+ }
+
+ if (streq(name, nobody_group.gr_name)) {
+ if (!synthesize_nobody())
+ return NSS_STATUS_NOTFOUND;
+
+ *gr = nobody_group;
+ return NSS_STATUS_SUCCESS;
+ }
+
+ } else if (STR_IN_SET(name, root_group.gr_name, nobody_group.gr_name))
+ return NSS_STATUS_NOTFOUND;
+
+ status = userdb_getgrnam(name, gr, buffer, buflen, &e);
+ if (IN_SET(status, NSS_STATUS_UNAVAIL, NSS_STATUS_TRYAGAIN)) {
+ UNPROTECT_ERRNO;
+ *errnop = e;
+ return status;
+ }
+
+ return status;
+}
+
+enum nss_status _nss_systemd_getgrgid_r(
+ gid_t gid,
+ struct group *gr,
+ char *buffer, size_t buflen,
+ int *errnop) {
+
+ enum nss_status status;
+ int e;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(gr);
+ assert(errnop);
+
+ if (!gid_is_valid(gid))
+ return NSS_STATUS_NOTFOUND;
+
+ /* Synthesize records for root and nobody, in case they are missing from /etc/group */
+ if (getenv_bool_secure("SYSTEMD_NSS_BYPASS_SYNTHETIC") <= 0) {
+
+ if (gid == root_group.gr_gid) {
+ *gr = root_group;
+ return NSS_STATUS_SUCCESS;
+ }
+
+ if (gid == nobody_group.gr_gid) {
+ if (!synthesize_nobody())
+ return NSS_STATUS_NOTFOUND;
+
+ *gr = nobody_group;
+ return NSS_STATUS_SUCCESS;
+ }
+
+ } else if (gid == root_group.gr_gid || gid == nobody_group.gr_gid)
+ return NSS_STATUS_NOTFOUND;
+
+ status = userdb_getgrgid(gid, gr, buffer, buflen, &e);
+ if (IN_SET(status, NSS_STATUS_UNAVAIL, NSS_STATUS_TRYAGAIN)) {
+ UNPROTECT_ERRNO;
+ *errnop = e;
+ return status;
+ }
+
+ return status;
+}
+
+static enum nss_status nss_systemd_endent(GetentData *p) {
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(p);
+
+ _cleanup_(pthread_mutex_unlock_assertp) pthread_mutex_t *_l = NULL;
+ _l = pthread_mutex_lock_assert(&p->mutex);
+
+ p->iterator = userdb_iterator_free(p->iterator);
+ p->by_membership = false;
+
+ return NSS_STATUS_SUCCESS;
+}
+
+enum nss_status _nss_systemd_endpwent(void) {
+ return nss_systemd_endent(&getpwent_data);
+}
+
+enum nss_status _nss_systemd_endgrent(void) {
+ return nss_systemd_endent(&getgrent_data);
+}
+
+enum nss_status _nss_systemd_setpwent(int stayopen) {
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ if (_nss_systemd_is_blocked())
+ return NSS_STATUS_NOTFOUND;
+
+ _cleanup_(pthread_mutex_unlock_assertp) pthread_mutex_t *_l = NULL;
+ int r;
+
+ _l = pthread_mutex_lock_assert(&getpwent_data.mutex);
+
+ getpwent_data.iterator = userdb_iterator_free(getpwent_data.iterator);
+ getpwent_data.by_membership = false;
+
+ /* Don't synthesize root/nobody when iterating. Let nss-files take care of that. If the two records
+ * are missing there, then that's fine, after all getpwent() is known to be possibly incomplete
+ * (think: LDAP/NIS type situations), and our synthesizing of root/nobody is a robustness fallback
+ * only, which matters for getpwnam()/getpwuid() primarily, which are the main NSS entrypoints to the
+ * user database. */
+ r = userdb_all(nss_glue_userdb_flags() | USERDB_DONT_SYNTHESIZE, &getpwent_data.iterator);
+ return r < 0 ? NSS_STATUS_UNAVAIL : NSS_STATUS_SUCCESS;
+}
+
+enum nss_status _nss_systemd_setgrent(int stayopen) {
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ if (_nss_systemd_is_blocked())
+ return NSS_STATUS_NOTFOUND;
+
+ _cleanup_(pthread_mutex_unlock_assertp) pthread_mutex_t *_l = NULL;
+ int r;
+
+ _l = pthread_mutex_lock_assert(&getgrent_data.mutex);
+
+ getgrent_data.iterator = userdb_iterator_free(getgrent_data.iterator);
+ getpwent_data.by_membership = false;
+
+ /* See _nss_systemd_setpwent() for an explanation why we use USERDB_DONT_SYNTHESIZE here */
+ r = groupdb_all(nss_glue_userdb_flags() | USERDB_DONT_SYNTHESIZE, &getgrent_data.iterator);
+ return r < 0 ? NSS_STATUS_UNAVAIL : NSS_STATUS_SUCCESS;
+}
+
+enum nss_status _nss_systemd_getpwent_r(
+ struct passwd *result,
+ char *buffer, size_t buflen,
+ int *errnop) {
+
+ _cleanup_(user_record_unrefp) UserRecord *ur = NULL;
+ int r;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(result);
+ assert(errnop);
+
+ if (_nss_systemd_is_blocked())
+ return NSS_STATUS_NOTFOUND;
+
+ _cleanup_(pthread_mutex_unlock_assertp) pthread_mutex_t *_l = NULL;
+
+ _l = pthread_mutex_lock_assert(&getpwent_data.mutex);
+
+ if (!getpwent_data.iterator) {
+ UNPROTECT_ERRNO;
+ *errnop = EHOSTDOWN;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ r = userdb_iterator_get(getpwent_data.iterator, &ur);
+ if (r == -ESRCH)
+ return NSS_STATUS_NOTFOUND;
+ if (r < 0) {
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ r = nss_pack_user_record(ur, result, buffer, buflen);
+ if (r < 0) {
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ return NSS_STATUS_SUCCESS;
+}
+
+enum nss_status _nss_systemd_getgrent_r(
+ struct group *result,
+ char *buffer, size_t buflen,
+ int *errnop) {
+
+ _cleanup_(group_record_unrefp) GroupRecord *gr = NULL;
+ _cleanup_free_ char **members = NULL;
+ int r;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(result);
+ assert(errnop);
+
+ if (_nss_systemd_is_blocked())
+ return NSS_STATUS_NOTFOUND;
+
+ _cleanup_(pthread_mutex_unlock_assertp) pthread_mutex_t *_l = NULL;
+
+ _l = pthread_mutex_lock_assert(&getgrent_data.mutex);
+
+ if (!getgrent_data.iterator) {
+ UNPROTECT_ERRNO;
+ *errnop = EHOSTDOWN;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ if (!getgrent_data.by_membership) {
+ r = groupdb_iterator_get(getgrent_data.iterator, &gr);
+ if (r == -ESRCH) {
+ /* So we finished iterating native groups now. Let's now continue with iterating
+ * native memberships, and generate additional group entries for any groups
+ * referenced there that are defined in NSS only. This means for those groups there
+ * will be two or more entries generated during iteration, but this is apparently how
+ * this is supposed to work, and what other implementations do too. Clients are
+ * supposed to merge the group records found during iteration automatically. */
+ getgrent_data.iterator = userdb_iterator_free(getgrent_data.iterator);
+
+ r = membershipdb_all(nss_glue_userdb_flags(), &getgrent_data.iterator);
+ if (r < 0) {
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ getgrent_data.by_membership = true;
+ } else if (r < 0) {
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+ } else if (!STR_IN_SET(gr->group_name, root_group.gr_name, nobody_group.gr_name)) {
+ r = membershipdb_by_group_strv(gr->group_name, nss_glue_userdb_flags(), &members);
+ if (r < 0) {
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+ }
+ }
+ }
+
+ if (getgrent_data.by_membership) {
+ _cleanup_(_nss_systemd_unblockp) bool blocked = false;
+
+ for (;;) {
+ _cleanup_free_ char *user_name = NULL, *group_name = NULL;
+
+ r = membershipdb_iterator_get(getgrent_data.iterator, &user_name, &group_name);
+ if (r == -ESRCH)
+ return NSS_STATUS_NOTFOUND;
+ if (r < 0) {
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ if (STR_IN_SET(user_name, root_passwd.pw_name, nobody_passwd.pw_name))
+ continue;
+ if (STR_IN_SET(group_name, root_group.gr_name, nobody_group.gr_name))
+ continue;
+
+ /* We are about to recursively call into NSS, let's make sure we disable recursion into our own code. */
+ if (!blocked) {
+ r = _nss_systemd_block(true);
+ if (r < 0) {
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ blocked = true;
+ }
+
+ r = nss_group_record_by_name(group_name, false, &gr);
+ if (r == -ESRCH)
+ continue;
+ if (r < 0) {
+ log_debug_errno(r, "Failed to do NSS check for group '%s', ignoring: %m", group_name);
+ continue;
+ }
+
+ members = strv_new(user_name);
+ if (!members) {
+ UNPROTECT_ERRNO;
+ *errnop = ENOMEM;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ /* Note that we currently generate one group entry per user that is part of a
+ * group. It's a bit ugly, but equivalent to generating a single entry with a set of
+ * members in them. */
+ break;
+ }
+ }
+
+ r = nss_pack_group_record(gr, members, result, buffer, buflen);
+ if (r < 0) {
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ return NSS_STATUS_SUCCESS;
+}
+
+enum nss_status _nss_systemd_initgroups_dyn(
+ const char *user_name,
+ gid_t gid,
+ long *start,
+ long *size,
+ gid_t **groupsp,
+ long int limit,
+ int *errnop) {
+
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ bool any = false;
+ int r;
+
+ PROTECT_ERRNO;
+ BLOCK_SIGNALS(NSS_SIGNALS_BLOCK);
+
+ assert(user_name);
+ assert(start);
+ assert(size);
+ assert(groupsp);
+ assert(errnop);
+
+ if (!valid_user_group_name(user_name, VALID_USER_RELAX))
+ return NSS_STATUS_NOTFOUND;
+
+ /* Don't allow extending these two special users, the same as we won't resolve them via getpwnam() */
+ if (STR_IN_SET(user_name, root_passwd.pw_name, nobody_passwd.pw_name))
+ return NSS_STATUS_NOTFOUND;
+
+ if (_nss_systemd_is_blocked())
+ return NSS_STATUS_NOTFOUND;
+
+ r = membershipdb_by_user(user_name, nss_glue_userdb_flags(), &iterator);
+ if (r < 0) {
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ for (;;) {
+ _cleanup_(group_record_unrefp) GroupRecord *g = NULL;
+ _cleanup_free_ char *group_name = NULL;
+
+ r = membershipdb_iterator_get(iterator, NULL, &group_name);
+ if (r == -ESRCH)
+ break;
+ if (r < 0) {
+ UNPROTECT_ERRNO;
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ /* The group might be defined via traditional NSS only, hence let's do a full look-up without
+ * disabling NSS. This means we are operating recursively here. */
+
+ r = groupdb_by_name(group_name, (nss_glue_userdb_flags() & ~USERDB_AVOID_NSS) | USERDB_AVOID_SHADOW, &g);
+ if (r == -ESRCH)
+ continue;
+ if (r < 0) {
+ log_debug_errno(r, "Failed to resolve group '%s', ignoring: %m", group_name);
+ continue;
+ }
+
+ if (g->gid == gid)
+ continue;
+
+ if (*start >= *size) {
+ gid_t *new_groups;
+ long new_size;
+
+ if (limit > 0 && *size >= limit) /* Reached the limit.? */
+ break;
+
+ if (*size > LONG_MAX/2) { /* Check for overflow */
+ UNPROTECT_ERRNO;
+ *errnop = ENOMEM;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ new_size = *start * 2;
+ if (limit > 0 && new_size > limit)
+ new_size = limit;
+
+ /* Enlarge buffer */
+ new_groups = reallocarray(*groupsp, new_size, sizeof(**groupsp));
+ if (!new_groups) {
+ UNPROTECT_ERRNO;
+ *errnop = ENOMEM;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ *groupsp = new_groups;
+ *size = new_size;
+ }
+
+ (*groupsp)[(*start)++] = g->gid;
+ any = true;
+ }
+
+ return any ? NSS_STATUS_SUCCESS : NSS_STATUS_NOTFOUND;
+}
+
+static thread_local unsigned _blocked = 0;
+
+_public_ int _nss_systemd_block(bool b) {
+
+ /* This blocks recursively: it's blocked for as many times this function is called with `true` until
+ * it is called an equal time with `false`. */
+
+ if (b) {
+ if (_blocked >= UINT_MAX)
+ return -EOVERFLOW;
+
+ _blocked++;
+ } else {
+ if (_blocked <= 0)
+ return -EOVERFLOW;
+
+ _blocked--;
+ }
+
+ return b; /* Return what is passed in, i.e. the new state from the PoV of the caller */
+}
+
+_public_ bool _nss_systemd_is_blocked(void) {
+ return _blocked > 0;
+}
diff --git a/src/nss-systemd/nss-systemd.h b/src/nss-systemd/nss-systemd.h
new file mode 100644
index 0000000..e97b801
--- /dev/null
+++ b/src/nss-systemd/nss-systemd.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+int _nss_systemd_block(bool b);
+bool _nss_systemd_is_blocked(void);
+
+/* For use with the _cleanup_() macro */
+static inline void _nss_systemd_unblockp(bool *b) {
+ if (*b)
+ assert_se(_nss_systemd_block(false) >= 0);
+}
diff --git a/src/nss-systemd/nss-systemd.sym b/src/nss-systemd/nss-systemd.sym
new file mode 100644
index 0000000..7caf217
--- /dev/null
+++ b/src/nss-systemd/nss-systemd.sym
@@ -0,0 +1,28 @@
+/***
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+***/
+
+{
+global:
+ _nss_systemd_getpwnam_r;
+ _nss_systemd_getpwuid_r;
+ _nss_systemd_endpwent;
+ _nss_systemd_setpwent;
+ _nss_systemd_getpwent_r;
+ _nss_systemd_getgrnam_r;
+ _nss_systemd_getgrgid_r;
+ _nss_systemd_endgrent;
+ _nss_systemd_setgrent;
+ _nss_systemd_getgrent_r;
+ _nss_systemd_initgroups_dyn;
+
+ /* These two are not used by glibc, but can be used by apps to explicitly disable nss-systemd for the calling thread. */
+ _nss_systemd_block;
+ _nss_systemd_is_blocked;
+local: *;
+};
diff --git a/src/nss-systemd/userdb-glue.c b/src/nss-systemd/userdb-glue.c
new file mode 100644
index 0000000..22af0fd
--- /dev/null
+++ b/src/nss-systemd/userdb-glue.c
@@ -0,0 +1,328 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "env-util.h"
+#include "fd-util.h"
+#include "nss-systemd.h"
+#include "strv.h"
+#include "user-record-nss.h"
+#include "user-record.h"
+#include "userdb-glue.h"
+#include "userdb.h"
+
+UserDBFlags nss_glue_userdb_flags(void) {
+ UserDBFlags flags = USERDB_AVOID_NSS;
+
+ /* Make sure that we don't go in circles when allocating a dynamic UID by checking our own database */
+ if (getenv_bool_secure("SYSTEMD_NSS_DYNAMIC_BYPASS") > 0)
+ flags |= USERDB_AVOID_DYNAMIC_USER;
+
+ return flags;
+}
+
+int nss_pack_user_record(
+ UserRecord *hr,
+ struct passwd *pwd,
+ char *buffer,
+ size_t buflen) {
+
+ const char *rn, *hd, *shell;
+ size_t required;
+
+ assert(hr);
+ assert(pwd);
+
+ assert_se(hr->user_name);
+ required = strlen(hr->user_name) + 1;
+
+ assert_se(rn = user_record_real_name(hr));
+ required += strlen(rn) + 1;
+
+ assert_se(hd = user_record_home_directory(hr));
+ required += strlen(hd) + 1;
+
+ assert_se(shell = user_record_shell(hr));
+ required += strlen(shell) + 1;
+
+ if (buflen < required)
+ return -ERANGE;
+
+ *pwd = (struct passwd) {
+ .pw_name = buffer,
+ .pw_uid = hr->uid,
+ .pw_gid = user_record_gid(hr),
+ .pw_passwd = (char*) "x", /* means: see shadow file */
+ };
+
+ assert(buffer);
+
+ pwd->pw_gecos = stpcpy(pwd->pw_name, hr->user_name) + 1;
+ pwd->pw_dir = stpcpy(pwd->pw_gecos, rn) + 1;
+ pwd->pw_shell = stpcpy(pwd->pw_dir, hd) + 1;
+ strcpy(pwd->pw_shell, shell);
+
+ return 0;
+}
+
+enum nss_status userdb_getpwnam(
+ const char *name,
+ struct passwd *pwd,
+ char *buffer, size_t buflen,
+ int *errnop) {
+
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ int r;
+
+ assert(pwd);
+ assert(errnop);
+
+ if (_nss_systemd_is_blocked())
+ return NSS_STATUS_NOTFOUND;
+
+ r = userdb_by_name(name, nss_glue_userdb_flags(), &hr);
+ if (r == -ESRCH)
+ return NSS_STATUS_NOTFOUND;
+ if (r < 0) {
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ r = nss_pack_user_record(hr, pwd, buffer, buflen);
+ if (r < 0) {
+ *errnop = -r;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ return NSS_STATUS_SUCCESS;
+}
+
+enum nss_status userdb_getpwuid(
+ uid_t uid,
+ struct passwd *pwd,
+ char *buffer,
+ size_t buflen,
+ int *errnop) {
+
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ int r;
+
+ assert(pwd);
+ assert(errnop);
+
+ if (_nss_systemd_is_blocked())
+ return NSS_STATUS_NOTFOUND;
+
+ r = userdb_by_uid(uid, nss_glue_userdb_flags(), &hr);
+ if (r == -ESRCH)
+ return NSS_STATUS_NOTFOUND;
+ if (r < 0) {
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ r = nss_pack_user_record(hr, pwd, buffer, buflen);
+ if (r < 0) {
+ *errnop = -r;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ return NSS_STATUS_SUCCESS;
+}
+
+int nss_pack_group_record(
+ GroupRecord *g,
+ char **extra_members,
+ struct group *gr,
+ char *buffer,
+ size_t buflen) {
+
+ char **array = NULL, *p, **m;
+ size_t required, n = 0, i = 0;
+
+ assert(g);
+ assert(gr);
+
+ assert_se(g->group_name);
+ required = strlen(g->group_name) + 1;
+
+ STRV_FOREACH(m, g->members) {
+ required += sizeof(char*); /* space for ptr array entry */
+ required += strlen(*m) + 1;
+ n++;
+ }
+ STRV_FOREACH(m, extra_members) {
+ if (strv_contains(g->members, *m))
+ continue;
+
+ required += sizeof(char*);
+ required += strlen(*m) + 1;
+ n++;
+ }
+
+ required += sizeof(char*); /* trailing NULL in ptr array entry */
+
+ if (buflen < required)
+ return -ERANGE;
+
+ array = (char**) buffer; /* place ptr array at beginning of buffer, under assumption buffer is aligned */
+ p = buffer + sizeof(void*) * (n + 1); /* place member strings right after the ptr array */
+
+ STRV_FOREACH(m, g->members) {
+ array[i++] = p;
+ p = stpcpy(p, *m) + 1;
+ }
+ STRV_FOREACH(m, extra_members) {
+ if (strv_contains(g->members, *m))
+ continue;
+
+ array[i++] = p;
+ p = stpcpy(p, *m) + 1;
+ }
+
+ assert_se(i == n);
+ array[n] = NULL;
+
+ *gr = (struct group) {
+ .gr_name = strcpy(p, g->group_name),
+ .gr_gid = g->gid,
+ .gr_passwd = (char*) "x", /* means: see shadow file */
+ .gr_mem = array,
+ };
+
+ return 0;
+}
+
+enum nss_status userdb_getgrnam(
+ const char *name,
+ struct group *gr,
+ char *buffer,
+ size_t buflen,
+ int *errnop) {
+
+ _cleanup_(group_record_unrefp) GroupRecord *g = NULL;
+ _cleanup_strv_free_ char **members = NULL;
+ int r;
+
+ assert(gr);
+ assert(errnop);
+
+ if (_nss_systemd_is_blocked())
+ return NSS_STATUS_NOTFOUND;
+
+ r = groupdb_by_name(name, nss_glue_userdb_flags(), &g);
+ if (r < 0 && r != -ESRCH) {
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ r = membershipdb_by_group_strv(name, nss_glue_userdb_flags(), &members);
+ if (r < 0) {
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ if (!g) {
+ _cleanup_(_nss_systemd_unblockp) bool blocked = false;
+
+ if (strv_isempty(members))
+ return NSS_STATUS_NOTFOUND;
+
+ /* Grmbl, so we are supposed to extend a group entry, but the group entry itself is not
+ * accessible via non-NSS. Hence let's do what we have to do, and query NSS after all to
+ * acquire it, so that we can extend it (that's because glibc's group merging feature will
+ * merge groups only if both GID and name match and thus we need to have both first). It
+ * sucks behaving recursively likely this, but it's apparently what everybody does. We break
+ * the recursion for ourselves via the _nss_systemd_block_nss() lock. */
+
+ r = _nss_systemd_block(true);
+ if (r < 0)
+ return r;
+
+ blocked = true;
+
+ r = nss_group_record_by_name(name, false, &g);
+ if (r == -ESRCH)
+ return NSS_STATUS_NOTFOUND;
+ if (r < 0) {
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+ }
+ }
+
+ r = nss_pack_group_record(g, members, gr, buffer, buflen);
+ if (r < 0) {
+ *errnop = -r;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ return NSS_STATUS_SUCCESS;
+}
+
+enum nss_status userdb_getgrgid(
+ gid_t gid,
+ struct group *gr,
+ char *buffer,
+ size_t buflen,
+ int *errnop) {
+
+
+ _cleanup_(group_record_unrefp) GroupRecord *g = NULL;
+ _cleanup_strv_free_ char **members = NULL;
+ bool from_nss;
+ int r;
+
+ assert(gr);
+ assert(errnop);
+
+ if (_nss_systemd_is_blocked())
+ return NSS_STATUS_NOTFOUND;
+
+ r = groupdb_by_gid(gid, nss_glue_userdb_flags(), &g);
+ if (r < 0 && r != -ESRCH) {
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ if (!g) {
+ _cleanup_(_nss_systemd_unblockp) bool blocked = false;
+
+ /* So, quite possibly we have to extend an existing group record with additional members. But
+ * to do this we need to know the group name first. The group didn't exist via non-NSS
+ * queries though, hence let's try to acquire it here recursively via NSS. */
+
+ r = _nss_systemd_block(true);
+ if (r < 0)
+ return r;
+
+ blocked = true;
+
+ r = nss_group_record_by_gid(gid, false, &g);
+ if (r == -ESRCH)
+ return NSS_STATUS_NOTFOUND;
+ if (r < 0) {
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ from_nss = true;
+ } else
+ from_nss = false;
+
+ r = membershipdb_by_group_strv(g->group_name, nss_glue_userdb_flags(), &members);
+ if (r < 0) {
+ *errnop = -r;
+ return NSS_STATUS_UNAVAIL;
+ }
+
+ /* If we acquired the record via NSS then there's no reason to respond unless we have to augment the
+ * list of members of the group */
+ if (from_nss && strv_isempty(members))
+ return NSS_STATUS_NOTFOUND;
+
+ r = nss_pack_group_record(g, members, gr, buffer, buflen);
+ if (r < 0) {
+ *errnop = -r;
+ return NSS_STATUS_TRYAGAIN;
+ }
+
+ return NSS_STATUS_SUCCESS;
+}
diff --git a/src/nss-systemd/userdb-glue.h b/src/nss-systemd/userdb-glue.h
new file mode 100644
index 0000000..cb0dcb9
--- /dev/null
+++ b/src/nss-systemd/userdb-glue.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <nss.h>
+#include <pwd.h>
+#include <grp.h>
+#include <sys/types.h>
+
+#include "userdb.h"
+
+UserDBFlags nss_glue_userdb_flags(void);
+
+int nss_pack_user_record(UserRecord *hr, struct passwd *pwd, char *buffer, size_t buflen);
+int nss_pack_group_record(GroupRecord *g, char **extra_members, struct group *gr, char *buffer, size_t buflen);
+
+enum nss_status userdb_getpwnam(const char *name, struct passwd *pwd, char *buffer, size_t buflen, int *errnop);
+enum nss_status userdb_getpwuid(uid_t uid, struct passwd *pwd, char *buffer, size_t buflen, int *errnop);
+
+enum nss_status userdb_getgrnam(const char *name, struct group *gr, char *buffer, size_t buflen, int *errnop);
+enum nss_status userdb_getgrgid(gid_t gid, struct group *gr, char *buffer, size_t buflen, int *errnop);
diff --git a/src/oom/meson.build b/src/oom/meson.build
new file mode 100644
index 0000000..1ea6766
--- /dev/null
+++ b/src/oom/meson.build
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+systemd_oomd_sources = files('''
+ oomd-manager-bus.c
+ oomd-manager-bus.h
+ oomd-manager.c
+ oomd-manager.h
+ oomd-util.c
+ oomd-util.h
+ oomd.c
+'''.split())
+
+oomctl_sources = files('''
+ oomctl.c
+'''.split())
+
+if conf.get('ENABLE_OOMD') == 1
+ tests += [
+ [['src/oom/test-oomd-util.c',
+ 'src/oom/oomd-util.c',
+ 'src/oom/oomd-util.h'],
+ [],
+ []]
+ ]
+
+ install_data('org.freedesktop.oom1.conf',
+ install_dir : dbuspolicydir)
+
+ install_data('org.freedesktop.oom1.service',
+ install_dir : dbussystemservicedir)
+
+ if install_sysconfdir
+ install_data('oomd.conf',
+ install_dir : pkgsysconfdir)
+ endif
+endif
diff --git a/src/oom/oomctl.c b/src/oom/oomctl.c
new file mode 100644
index 0000000..dd393fc
--- /dev/null
+++ b/src/oom/oomctl.c
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <unistd.h>
+
+#include "bus-error.h"
+#include "copy.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "terminal-util.h"
+#include "verbs.h"
+
+static PagerFlags arg_pager_flags = 0;
+
+static int help(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = terminal_urlify_man("oomctl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%1$s [OPTIONS...] COMMAND ...\n\n"
+ "%2$sManage or inspect the userspace OOM killer.%3$s\n"
+ "\n%4$sCommands:%5$s\n"
+ " dump Output the current state of systemd-oomd\n"
+ "\n%4$sOptions:%5$s\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ "\nSee the %6$s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight(), ansi_normal()
+ , ansi_underline(), ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int dump_state(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int fd = -1;
+ int r;
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect system bus: %m");
+
+ (void) pager_open(arg_pager_flags);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.oom1",
+ "/org/freedesktop/oom1",
+ "org.freedesktop.oom1.Manager",
+ "DumpByFileDescriptor",
+ &error,
+ &reply,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to dump context: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "h", &fd);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ fflush(stdout);
+ return copy_bytes(fd, STDOUT_FILENO, (uint64_t) -1, 0);
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_PAGER,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help(0, NULL, NULL);
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Invalid option passed.");
+ }
+
+ return 1;
+}
+
+static int run(int argc, char* argv[]) {
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "dump", VERB_ANY, 1, VERB_DEFAULT, dump_state },
+ {}
+ };
+
+ int r;
+
+ log_show_color(true);
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/oom/oomd-manager-bus.c b/src/oom/oomd-manager-bus.c
new file mode 100644
index 0000000..4ea2a33
--- /dev/null
+++ b/src/oom/oomd-manager-bus.c
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/capability.h>
+
+#include "bus-common-errors.h"
+#include "bus-polkit.h"
+#include "fd-util.h"
+#include "oomd-manager-bus.h"
+#include "oomd-manager.h"
+#include "user-util.h"
+
+static int bus_method_dump_by_fd(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *dump = NULL;
+ _cleanup_close_ int fd = -1;
+ Manager *m = userdata;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = manager_get_dump_string(m, &dump);
+ if (r < 0)
+ return r;
+
+ fd = acquire_data_fd(dump, strlen(dump), 0);
+ if (fd < 0)
+ return fd;
+
+ return sd_bus_reply_method_return(message, "h", fd);
+}
+
+static const sd_bus_vtable manager_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_METHOD_WITH_NAMES("DumpByFileDescriptor",
+ NULL,,
+ "h",
+ SD_BUS_PARAM(fd),
+ bus_method_dump_by_fd,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_VTABLE_END
+};
+
+const BusObjectImplementation manager_object = {
+ "/org/freedesktop/oom1",
+ "org.freedesktop.oom1.Manager",
+ .vtables = BUS_VTABLES(manager_vtable),
+};
diff --git a/src/oom/oomd-manager-bus.h b/src/oom/oomd-manager-bus.h
new file mode 100644
index 0000000..7935b35
--- /dev/null
+++ b/src/oom/oomd-manager-bus.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "bus-object.h"
+
+typedef struct Manager Manager;
+
+extern const BusObjectImplementation manager_object;
diff --git a/src/oom/oomd-manager.c b/src/oom/oomd-manager.c
new file mode 100644
index 0000000..fec9651
--- /dev/null
+++ b/src/oom/oomd-manager.c
@@ -0,0 +1,546 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-log-control-api.h"
+#include "bus-util.h"
+#include "bus-polkit.h"
+#include "cgroup-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "oomd-manager-bus.h"
+#include "oomd-manager.h"
+#include "path-util.h"
+
+typedef struct ManagedOOMReply {
+ ManagedOOMMode mode;
+ char *path;
+ char *property;
+ unsigned limit;
+} ManagedOOMReply;
+
+static void managed_oom_reply_destroy(ManagedOOMReply *reply) {
+ assert(reply);
+ free(reply->path);
+ free(reply->property);
+}
+
+static int managed_oom_mode(const char *name, JsonVariant *v, JsonDispatchFlags flags, void *userdata) {
+ ManagedOOMMode *mode = userdata, m;
+ const char *s;
+
+ assert(mode);
+ assert_se(s = json_variant_string(v));
+
+ m = managed_oom_mode_from_string(s);
+ if (m < 0)
+ return json_log(v, flags, SYNTHETIC_ERRNO(EINVAL), "%s is not a valid ManagedOOMMode", s);
+
+ *mode = m;
+ return 0;
+}
+
+static int process_managed_oom_reply(
+ Varlink *link,
+ JsonVariant *parameters,
+ const char *error_id,
+ VarlinkReplyFlags flags,
+ void *userdata) {
+ JsonVariant *c, *cgroups;
+ Manager *m = userdata;
+ int r = 0;
+
+ assert(m);
+
+ static const JsonDispatch dispatch_table[] = {
+ { "mode", JSON_VARIANT_STRING, managed_oom_mode, offsetof(ManagedOOMReply, mode), JSON_MANDATORY },
+ { "path", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, path), JSON_MANDATORY },
+ { "property", JSON_VARIANT_STRING, json_dispatch_string, offsetof(ManagedOOMReply, property), JSON_MANDATORY },
+ { "limit", JSON_VARIANT_UNSIGNED, json_dispatch_unsigned, offsetof(ManagedOOMReply, limit), 0 },
+ {},
+ };
+
+ if (error_id) {
+ r = -EIO;
+ log_debug("Error getting ManagedOOM cgroups: %s", error_id);
+ goto finish;
+ }
+
+ cgroups = json_variant_by_key(parameters, "cgroups");
+ if (!cgroups) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ /* Skip malformed elements and keep processing in case the others are good */
+ JSON_VARIANT_ARRAY_FOREACH(c, cgroups) {
+ _cleanup_(managed_oom_reply_destroy) ManagedOOMReply reply = {};
+ OomdCGroupContext *ctx;
+ Hashmap *monitor_hm;
+ loadavg_t limit;
+ int ret;
+
+ if (!json_variant_is_object(c))
+ continue;
+
+ ret = json_dispatch(c, dispatch_table, NULL, 0, &reply);
+ if (ret == -ENOMEM) {
+ r = ret;
+ goto finish;
+ } else if (ret < 0)
+ continue;
+
+ monitor_hm = streq(reply.property, "ManagedOOMSwap") ?
+ m->monitored_swap_cgroup_contexts : m->monitored_mem_pressure_cgroup_contexts;
+
+ if (reply.mode == MANAGED_OOM_AUTO) {
+ (void) oomd_cgroup_context_free(hashmap_remove(monitor_hm, reply.path));
+ continue;
+ }
+
+ limit = m->default_mem_pressure_limit;
+
+ if (streq(reply.property, "ManagedOOMMemoryPressure")) {
+ if (reply.limit > 100)
+ continue;
+ else if (reply.limit != 0) {
+ ret = store_loadavg_fixed_point((unsigned long) reply.limit, 0, &limit);
+ if (ret < 0)
+ continue;
+ }
+ }
+
+ ret = oomd_insert_cgroup_context(NULL, monitor_hm, reply.path);
+ if (ret == -ENOMEM) {
+ r = ret;
+ goto finish;
+ }
+
+ /* Always update the limit in case it was changed. For non-memory pressure detection the value is
+ * ignored so always updating it here is not a problem. */
+ ctx = hashmap_get(monitor_hm, reply.path);
+ if (ctx)
+ ctx->mem_pressure_limit = limit;
+ }
+
+finish:
+ if (!FLAGS_SET(flags, VARLINK_REPLY_CONTINUES))
+ m->varlink = varlink_close_unref(link);
+
+ return r;
+}
+
+/* Fill `new_h` with `path`'s descendent OomdCGroupContexts. Only include descendent cgroups that are possible
+ * candidates for action. That is, only leaf cgroups or cgroups with memory.oom.group set to "1".
+ *
+ * This function ignores most errors in order to handle cgroups that may have been cleaned up while populating
+ * the hashmap.
+ *
+ * `new_h` is of the form { key: cgroup paths -> value: OomdCGroupContext } */
+static int recursively_get_cgroup_context(Hashmap *new_h, const char *path) {
+ _cleanup_free_ char *subpath = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ int r;
+
+ assert(new_h);
+ assert(path);
+
+ r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
+ if (r < 0)
+ return r;
+
+ r = cg_read_subgroup(d, &subpath);
+ if (r < 0)
+ return r;
+ else if (r == 0) { /* No subgroups? We're a leaf node */
+ r = oomd_insert_cgroup_context(NULL, new_h, path);
+ return (r == -ENOMEM) ? r : 0;
+ }
+
+ do {
+ _cleanup_free_ char *cg_path = NULL;
+ bool oom_group;
+
+ cg_path = path_join(empty_to_root(path), subpath);
+ if (!cg_path)
+ return -ENOMEM;
+
+ subpath = mfree(subpath);
+
+ r = cg_get_attribute_as_bool("memory", cg_path, "memory.oom.group", &oom_group);
+ /* The cgroup might be gone. Skip it as a candidate since we can't get information on it. */
+ if (r < 0)
+ return (r == -ENOMEM) ? r : 0;
+
+ if (oom_group)
+ r = oomd_insert_cgroup_context(NULL, new_h, cg_path);
+ else
+ r = recursively_get_cgroup_context(new_h, cg_path);
+ if (r == -ENOMEM)
+ return r;
+ } while ((r = cg_read_subgroup(d, &subpath)) > 0);
+
+ return 0;
+}
+
+static int update_monitored_cgroup_contexts(Hashmap **monitored_cgroups) {
+ _cleanup_hashmap_free_ Hashmap *new_base = NULL;
+ OomdCGroupContext *ctx;
+ int r;
+
+ assert(monitored_cgroups);
+
+ new_base = hashmap_new(&oomd_cgroup_ctx_hash_ops);
+ if (!new_base)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(ctx, *monitored_cgroups) {
+ /* Skip most errors since the cgroup we're trying to update might not exist anymore. */
+ r = oomd_insert_cgroup_context(*monitored_cgroups, new_base, ctx->path);
+ if (r == -ENOMEM)
+ return r;
+ }
+
+ hashmap_free(*monitored_cgroups);
+ *monitored_cgroups = TAKE_PTR(new_base);
+
+ return 0;
+}
+
+static int get_monitored_cgroup_contexts_candidates(Hashmap *monitored_cgroups, Hashmap **ret_candidates) {
+ _cleanup_hashmap_free_ Hashmap *candidates = NULL;
+ OomdCGroupContext *ctx;
+ int r;
+
+ assert(monitored_cgroups);
+ assert(ret_candidates);
+
+ candidates = hashmap_new(&oomd_cgroup_ctx_hash_ops);
+ if (!candidates)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(ctx, monitored_cgroups) {
+ r = recursively_get_cgroup_context(candidates, ctx->path);
+ if (r == -ENOMEM)
+ return r;
+ }
+
+ *ret_candidates = TAKE_PTR(candidates);
+
+ return 0;
+}
+
+static int acquire_managed_oom_connect(Manager *m) {
+ _cleanup_(varlink_close_unrefp) Varlink *link = NULL;
+ int r;
+
+ assert(m);
+ assert(m->event);
+
+ r = varlink_connect_address(&link, VARLINK_ADDR_PATH_MANAGED_OOM);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to %s: %m", VARLINK_ADDR_PATH_MANAGED_OOM);
+
+ (void) varlink_set_userdata(link, m);
+ (void) varlink_set_description(link, "oomd");
+ (void) varlink_set_relative_timeout(link, USEC_INFINITY);
+
+ r = varlink_attach_event(link, m->event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach varlink connection to event loop: %m");
+
+ r = varlink_bind_reply(link, process_managed_oom_reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to bind reply callback: %m");
+
+ r = varlink_observe(link, "io.systemd.ManagedOOM.SubscribeManagedOOMCGroups", NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to observe varlink call: %m");
+
+ m->varlink = TAKE_PTR(link);
+ return 0;
+}
+
+static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, void *userdata) {
+ _cleanup_set_free_ Set *targets = NULL;
+ Manager *m = userdata;
+ usec_t usec_now;
+ int r;
+
+ assert(s);
+ assert(userdata);
+
+ /* Reset timer */
+ r = sd_event_now(sd_event_source_get_event(s), CLOCK_MONOTONIC, &usec_now);
+ if (r < 0)
+ return log_error_errno(r, "Failed to reset event timer");
+
+ r = sd_event_source_set_time_relative(s, INTERVAL_USEC);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set relative time for timer");
+
+ /* Reconnect if our connection dropped */
+ if (!m->varlink) {
+ r = acquire_managed_oom_connect(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire varlink connection");
+ }
+
+ /* Update the cgroups used for detection/action */
+ r = update_monitored_cgroup_contexts(&m->monitored_swap_cgroup_contexts);
+ if (r == -ENOMEM)
+ return log_error_errno(r, "Failed to update monitored swap cgroup contexts");
+
+ r = update_monitored_cgroup_contexts(&m->monitored_mem_pressure_cgroup_contexts);
+ if (r == -ENOMEM)
+ return log_error_errno(r, "Failed to update monitored memory pressure cgroup contexts");
+
+ r = oomd_system_context_acquire("/proc/swaps", &m->system_context);
+ /* If there aren't units depending on swap actions, the only error we exit on is ENOMEM */
+ if (r == -ENOMEM || (r < 0 && !hashmap_isempty(m->monitored_swap_cgroup_contexts)))
+ return log_error_errno(r, "Failed to acquire system context");
+
+ /* If we're still recovering from a kill, don't try to kill again yet */
+ if (m->post_action_delay_start > 0) {
+ if (m->post_action_delay_start + POST_ACTION_DELAY_USEC > usec_now)
+ return 0;
+ else
+ m->post_action_delay_start = 0;
+ }
+
+ r = oomd_pressure_above(m->monitored_mem_pressure_cgroup_contexts, PRESSURE_DURATION_USEC, &targets);
+ if (r == -ENOMEM)
+ return log_error_errno(r, "Failed to check if memory pressure exceeded limits");
+ else if (r == 1) {
+ /* Check if there was reclaim activity in the last interval. The concern is the following case:
+ * Pressure climbed, a lot of high-frequency pages were reclaimed, and we killed the offending
+ * cgroup. Even after this, well-behaved processes will fault in recently resident pages and
+ * this will cause pressure to remain high. Thus if there isn't any reclaim pressure, no need
+ * to kill something (it won't help anyways). */
+ if (oomd_memory_reclaim(m->monitored_mem_pressure_cgroup_contexts)) {
+ _cleanup_hashmap_free_ Hashmap *candidates = NULL;
+ OomdCGroupContext *t;
+
+ r = get_monitored_cgroup_contexts_candidates(m->monitored_mem_pressure_cgroup_contexts, &candidates);
+ if (r == -ENOMEM)
+ return log_error_errno(r, "Failed to get monitored memory pressure cgroup candidates");
+
+ SET_FOREACH(t, targets) {
+ log_notice("Memory pressure for %s is greater than %lu for more than %"PRIu64" seconds and there was reclaim activity",
+ t->path, LOAD_INT(t->mem_pressure_limit), PRESSURE_DURATION_USEC / USEC_PER_SEC);
+
+ r = oomd_kill_by_pgscan(candidates, t->path, m->dry_run);
+ if (r == -ENOMEM)
+ return log_error_errno(r, "Failed to kill cgroup processes by pgscan");
+ if (r < 0)
+ log_info("Failed to kill any cgroup(s) under %s based on pressure", t->path);
+ else {
+ /* Don't act on all the high pressure cgroups at once; return as soon as we kill one */
+ m->post_action_delay_start = usec_now;
+ return 0;
+ }
+ }
+ }
+ }
+
+ if (oomd_swap_free_below(&m->system_context, (100 - m->swap_used_limit))) {
+ _cleanup_hashmap_free_ Hashmap *candidates = NULL;
+
+ log_notice("Swap used (%"PRIu64") / total (%"PRIu64") is more than %u%%",
+ m->system_context.swap_used, m->system_context.swap_total, m->swap_used_limit);
+
+ r = get_monitored_cgroup_contexts_candidates(m->monitored_swap_cgroup_contexts, &candidates);
+ if (r == -ENOMEM)
+ return log_error_errno(r, "Failed to get monitored swap cgroup candidates");
+
+ r = oomd_kill_by_swap_usage(candidates, m->dry_run);
+ if (r == -ENOMEM)
+ return log_error_errno(r, "Failed to kill cgroup processes by swap usage");
+ if (r < 0)
+ log_info("Failed to kill any cgroup(s) based on swap");
+ else {
+ m->post_action_delay_start = usec_now;
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+static int monitor_cgroup_contexts(Manager *m) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL;
+ int r;
+
+ assert(m);
+ assert(m->event);
+
+ r = sd_event_add_time(m->event, &s, CLOCK_MONOTONIC, 0, 0, monitor_cgroup_contexts_handler, m);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_exit_on_failure(s, true);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(s, SD_EVENT_ON);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(s, "oomd-timer");
+
+ m->cgroup_context_event_source = TAKE_PTR(s);
+ return 0;
+}
+
+void manager_free(Manager *m) {
+ assert(m);
+
+ varlink_close_unref(m->varlink);
+ sd_event_source_unref(m->cgroup_context_event_source);
+ sd_event_unref(m->event);
+
+ bus_verify_polkit_async_registry_free(m->polkit_registry);
+ sd_bus_flush_close_unref(m->bus);
+
+ hashmap_free(m->monitored_swap_cgroup_contexts);
+ hashmap_free(m->monitored_mem_pressure_cgroup_contexts);
+
+ free(m);
+}
+
+int manager_new(Manager **ret) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ int r;
+
+ assert(ret);
+
+ m = new0(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_set_watchdog(m->event, true);
+
+ r = sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ m->monitored_swap_cgroup_contexts = hashmap_new(&oomd_cgroup_ctx_hash_ops);
+ if (!m->monitored_swap_cgroup_contexts)
+ return -ENOMEM;
+
+ m->monitored_mem_pressure_cgroup_contexts = hashmap_new(&oomd_cgroup_ctx_hash_ops);
+ if (!m->monitored_mem_pressure_cgroup_contexts)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(m);
+ return 0;
+}
+
+static int manager_connect_bus(Manager *m) {
+ int r;
+
+ assert(m);
+ assert(!m->bus);
+
+ r = bus_open_system_watch_bind_with_description(&m->bus, "bus-api-oom");
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to bus: %m");
+
+ r = bus_add_implementation(m->bus, &manager_object, m);
+ if (r < 0)
+ return r;
+
+ r = bus_log_control_api_register(m->bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_request_name_async(m->bus, NULL, "org.freedesktop.oom1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(m->bus, m->event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ return 0;
+}
+
+int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressure_limit) {
+ unsigned long l;
+ int r;
+
+ assert(m);
+
+ m->dry_run = dry_run;
+
+ m->swap_used_limit = swap_used_limit != -1 ? swap_used_limit : DEFAULT_SWAP_USED_LIMIT;
+ assert(m->swap_used_limit <= 100);
+
+ l = mem_pressure_limit != -1 ? mem_pressure_limit : DEFAULT_MEM_PRESSURE_LIMIT;
+ r = store_loadavg_fixed_point(l, 0, &m->default_mem_pressure_limit);
+ if (r < 0)
+ return r;
+
+ r = manager_connect_bus(m);
+ if (r < 0)
+ return r;
+
+ r = acquire_managed_oom_connect(m);
+ if (r < 0)
+ return r;
+
+ r = monitor_cgroup_contexts(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int manager_get_dump_string(Manager *m, char **ret) {
+ _cleanup_free_ char *dump = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ OomdCGroupContext *c;
+ size_t size;
+ char *key;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ f = open_memstream_unlocked(&dump, &size);
+ if (!f)
+ return -errno;
+
+ fprintf(f,
+ "Dry Run: %s\n"
+ "Swap Used Limit: %u%%\n"
+ "Default Memory Pressure Limit: %lu%%\n"
+ "System Context:\n",
+ yes_no(m->dry_run),
+ m->swap_used_limit,
+ LOAD_INT(m->default_mem_pressure_limit));
+ oomd_dump_system_context(&m->system_context, f, "\t");
+
+ fprintf(f, "Swap Monitored CGroups:\n");
+ HASHMAP_FOREACH_KEY(c, key, m->monitored_swap_cgroup_contexts)
+ oomd_dump_swap_cgroup_context(c, f, "\t");
+
+ fprintf(f, "Memory Pressure Monitored CGroups:\n");
+ HASHMAP_FOREACH_KEY(c, key, m->monitored_mem_pressure_cgroup_contexts)
+ oomd_dump_memory_pressure_cgroup_context(c, f, "\t");
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ f = safe_fclose(f);
+
+ *ret = TAKE_PTR(dump);
+ return 0;
+}
diff --git a/src/oom/oomd-manager.h b/src/oom/oomd-manager.h
new file mode 100644
index 0000000..3f3eb5a
--- /dev/null
+++ b/src/oom/oomd-manager.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "conf-parser.h"
+#include "oomd-util.h"
+#include "sd-event.h"
+#include "varlink.h"
+
+/* Polling interval for monitoring stats */
+#define INTERVAL_USEC (1 * USEC_PER_SEC)
+
+/* Used to weight the averages */
+#define AVERAGE_SIZE_DECAY 4
+
+/* Take action if 10s of memory pressure > 60 for more than 30s. We use the "full" value from PSI so this is the
+ * percentage of time all tasks were delayed (i.e. unproductive).
+ * Generally 60 or higher might be acceptable for something like system.slice with no memory.high set; processes in
+ * system.slice are assumed to be less latency sensitive. */
+#define PRESSURE_DURATION_USEC (30 * USEC_PER_SEC)
+#define DEFAULT_MEM_PRESSURE_LIMIT 60
+#define DEFAULT_SWAP_USED_LIMIT 90
+
+#define POST_ACTION_DELAY_USEC (15 * USEC_PER_SEC)
+
+typedef struct Manager Manager;
+
+struct Manager {
+ sd_bus *bus;
+ sd_event *event;
+
+ Hashmap *polkit_registry;
+
+ bool dry_run;
+ unsigned swap_used_limit;
+ loadavg_t default_mem_pressure_limit;
+
+ /* k: cgroup paths -> v: OomdCGroupContext
+ * Used to detect when to take action. */
+ Hashmap *monitored_swap_cgroup_contexts;
+ Hashmap *monitored_mem_pressure_cgroup_contexts;
+
+ OomdSystemContext system_context;
+
+ usec_t post_action_delay_start;
+
+ sd_event_source *cgroup_context_event_source;
+
+ Varlink *varlink;
+};
+
+void manager_free(Manager *m);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
+
+int manager_new(Manager **ret);
+
+int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressure_limit);
+
+int manager_get_dump_string(Manager *m, char **ret);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_oomd_default);
diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c
new file mode 100644
index 0000000..cec656f
--- /dev/null
+++ b/src/oom/oomd-util.c
@@ -0,0 +1,451 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "cgroup-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "oomd-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "procfs-util.h"
+#include "signal-util.h"
+#include "sort-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+
+DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
+ oomd_cgroup_ctx_hash_ops,
+ char,
+ string_hash_func,
+ string_compare_func,
+ OomdCGroupContext,
+ oomd_cgroup_context_free);
+
+static int log_kill(pid_t pid, int sig, void *userdata) {
+ log_debug("oomd attempting to kill " PID_FMT " with %s", pid, signal_to_string(sig));
+ return 0;
+}
+
+static int increment_oomd_xattr(const char *path, const char *xattr, uint64_t num_procs_killed) {
+ _cleanup_free_ char *value = NULL;
+ char buf[DECIMAL_STR_MAX(uint64_t) + 1];
+ uint64_t curr_count = 0;
+ int r;
+
+ assert(path);
+ assert(xattr);
+
+ r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, path, xattr, &value);
+ if (r < 0 && r != -ENODATA)
+ return r;
+
+ if (!isempty(value)) {
+ r = safe_atou64(value, &curr_count);
+ if (r < 0)
+ return r;
+ }
+
+ if (curr_count > UINT64_MAX - num_procs_killed)
+ return -EOVERFLOW;
+
+ xsprintf(buf, "%"PRIu64, curr_count + num_procs_killed);
+ r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, path, xattr, buf, strlen(buf), 0);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+OomdCGroupContext *oomd_cgroup_context_free(OomdCGroupContext *ctx) {
+ if (!ctx)
+ return NULL;
+
+ free(ctx->path);
+ return mfree(ctx);
+}
+
+int oomd_pressure_above(Hashmap *h, usec_t duration, Set **ret) {
+ _cleanup_set_free_ Set *targets = NULL;
+ OomdCGroupContext *ctx;
+ char *key;
+ int r;
+
+ assert(h);
+ assert(ret);
+
+ targets = set_new(NULL);
+ if (!targets)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH_KEY(ctx, key, h) {
+ if (ctx->memory_pressure.avg10 > ctx->mem_pressure_limit) {
+ usec_t diff;
+
+ if (ctx->last_hit_mem_pressure_limit == 0)
+ ctx->last_hit_mem_pressure_limit = now(CLOCK_MONOTONIC);
+
+ diff = now(CLOCK_MONOTONIC) - ctx->last_hit_mem_pressure_limit;
+ if (diff >= duration) {
+ r = set_put(targets, ctx);
+ if (r < 0)
+ return -ENOMEM;
+ }
+ } else
+ ctx->last_hit_mem_pressure_limit = 0;
+ }
+
+ if (!set_isempty(targets)) {
+ *ret = TAKE_PTR(targets);
+ return 1;
+ }
+
+ *ret = NULL;
+ return 0;
+}
+
+bool oomd_memory_reclaim(Hashmap *h) {
+ uint64_t pgscan = 0, pgscan_of = 0, last_pgscan = 0, last_pgscan_of = 0;
+ OomdCGroupContext *ctx;
+
+ assert(h);
+
+ /* If sum of all the current pgscan values are greater than the sum of all the last_pgscan values,
+ * there was reclaim activity. Used along with pressure checks to decide whether to take action. */
+
+ HASHMAP_FOREACH(ctx, h) {
+ uint64_t sum;
+
+ sum = pgscan + ctx->pgscan;
+ if (sum < pgscan || sum < ctx->pgscan)
+ pgscan_of++; /* count overflows */
+ pgscan = sum;
+
+ sum = last_pgscan + ctx->last_pgscan;
+ if (sum < last_pgscan || sum < ctx->last_pgscan)
+ last_pgscan_of++; /* count overflows */
+ last_pgscan = sum;
+ }
+
+ /* overflow counts are the same, return sums comparison */
+ if (last_pgscan_of == pgscan_of)
+ return pgscan > last_pgscan;
+
+ return pgscan_of > last_pgscan_of;
+}
+
+bool oomd_swap_free_below(const OomdSystemContext *ctx, uint64_t threshold_percent) {
+ uint64_t swap_threshold;
+
+ assert(ctx);
+ assert(threshold_percent <= 100);
+
+ swap_threshold = ctx->swap_total * threshold_percent / ((uint64_t) 100);
+ return (ctx->swap_total - ctx->swap_used) < swap_threshold;
+}
+
+int oomd_sort_cgroup_contexts(Hashmap *h, oomd_compare_t compare_func, const char *prefix, OomdCGroupContext ***ret) {
+ _cleanup_free_ OomdCGroupContext **sorted = NULL;
+ OomdCGroupContext *item;
+ size_t k = 0;
+
+ assert(h);
+ assert(compare_func);
+ assert(ret);
+
+ sorted = new0(OomdCGroupContext*, hashmap_size(h));
+ if (!sorted)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(item, h) {
+ if (item->path && prefix && !path_startswith(item->path, prefix))
+ continue;
+
+ sorted[k++] = item;
+ }
+
+ typesafe_qsort(sorted, k, compare_func);
+
+ *ret = TAKE_PTR(sorted);
+
+ assert(k <= INT_MAX);
+ return (int) k;
+}
+
+int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) {
+ _cleanup_set_free_ Set *pids_killed = NULL;
+ int r;
+
+ assert(path);
+
+ if (dry_run) {
+ _cleanup_free_ char *cg_path = NULL;
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &cg_path);
+ if (r < 0)
+ return r;
+
+ log_debug("oomd dry-run: Would have tried to kill %s with recurse=%s", cg_path, true_false(recurse));
+ return 0;
+ }
+
+ pids_killed = set_new(NULL);
+ if (!pids_killed)
+ return -ENOMEM;
+
+ if (recurse)
+ r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, path, SIGKILL, CGROUP_IGNORE_SELF, pids_killed, log_kill, NULL);
+ else
+ r = cg_kill(SYSTEMD_CGROUP_CONTROLLER, path, SIGKILL, CGROUP_IGNORE_SELF, pids_killed, log_kill, NULL);
+ if (r < 0)
+ return r;
+
+ r = increment_oomd_xattr(path, "user.systemd_oomd_kill", set_size(pids_killed));
+ if (r < 0)
+ log_debug_errno(r, "Failed to set user.systemd_oomd_kill on kill: %m");
+
+ return set_size(pids_killed) != 0;
+}
+
+int oomd_kill_by_pgscan(Hashmap *h, const char *prefix, bool dry_run) {
+ _cleanup_free_ OomdCGroupContext **sorted = NULL;
+ int r;
+
+ assert(h);
+
+ r = oomd_sort_cgroup_contexts(h, compare_pgscan, prefix, &sorted);
+ if (r < 0)
+ return r;
+
+ for (int i = 0; i < r; i++) {
+ if (sorted[i]->pgscan == 0)
+ break;
+
+ r = oomd_cgroup_kill(sorted[i]->path, true, dry_run);
+ if (r > 0 || r == -ENOMEM)
+ break;
+ }
+
+ return r;
+}
+
+int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run) {
+ _cleanup_free_ OomdCGroupContext **sorted = NULL;
+ int r;
+
+ assert(h);
+
+ r = oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted);
+ if (r < 0)
+ return r;
+
+ /* Try to kill cgroups with non-zero swap usage until we either succeed in
+ * killing or we get to a cgroup with no swap usage. */
+ for (int i = 0; i < r; i++) {
+ if (sorted[i]->swap_usage == 0)
+ break;
+
+ r = oomd_cgroup_kill(sorted[i]->path, true, dry_run);
+ if (r > 0 || r == -ENOMEM)
+ break;
+ }
+
+ return r;
+}
+
+int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) {
+ _cleanup_(oomd_cgroup_context_freep) OomdCGroupContext *ctx = NULL;
+ _cleanup_free_ char *p = NULL, *val = NULL;
+ bool is_root;
+ int r;
+
+ assert(path);
+ assert(ret);
+
+ ctx = new0(OomdCGroupContext, 1);
+ if (!ctx)
+ return -ENOMEM;
+
+ is_root = empty_or_root(path);
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "memory.pressure", &p);
+ if (r < 0)
+ return log_debug_errno(r, "Error getting cgroup memory pressure path from %s: %m", path);
+
+ r = read_resource_pressure(p, PRESSURE_TYPE_FULL, &ctx->memory_pressure);
+ if (r < 0)
+ return log_debug_errno(r, "Error parsing memory pressure from %s: %m", p);
+
+ if (is_root) {
+ r = procfs_memory_get_used(&ctx->current_memory_usage);
+ if (r < 0)
+ return log_debug_errno(r, "Error getting memory used from procfs: %m");
+ } else {
+ r = cg_get_attribute_as_uint64(SYSTEMD_CGROUP_CONTROLLER, path, "memory.current", &ctx->current_memory_usage);
+ if (r < 0)
+ return log_debug_errno(r, "Error getting memory.current from %s: %m", path);
+
+ r = cg_get_attribute_as_uint64(SYSTEMD_CGROUP_CONTROLLER, path, "memory.min", &ctx->memory_min);
+ if (r < 0)
+ return log_debug_errno(r, "Error getting memory.min from %s: %m", path);
+
+ r = cg_get_attribute_as_uint64(SYSTEMD_CGROUP_CONTROLLER, path, "memory.low", &ctx->memory_low);
+ if (r < 0)
+ return log_debug_errno(r, "Error getting memory.low from %s: %m", path);
+
+ r = cg_get_attribute_as_uint64(SYSTEMD_CGROUP_CONTROLLER, path, "memory.swap.current", &ctx->swap_usage);
+ if (r < 0)
+ return log_debug_errno(r, "Error getting memory.swap.current from %s: %m", path);
+
+ r = cg_get_keyed_attribute(SYSTEMD_CGROUP_CONTROLLER, path, "memory.stat", STRV_MAKE("pgscan"), &val);
+ if (r < 0)
+ return log_debug_errno(r, "Error getting pgscan from memory.stat under %s: %m", path);
+
+ r = safe_atou64(val, &ctx->pgscan);
+ if (r < 0)
+ return log_debug_errno(r, "Error converting pgscan value to uint64_t: %m");
+ }
+
+ ctx->path = strdup(empty_to_root(path));
+ if (!ctx->path)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(ctx);
+ return 0;
+}
+
+int oomd_system_context_acquire(const char *proc_swaps_path, OomdSystemContext *ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ OomdSystemContext ctx = {};
+ int r;
+
+ assert(proc_swaps_path);
+ assert(ret);
+
+ f = fopen(proc_swaps_path, "re");
+ if (!f)
+ return -errno;
+
+ (void) fscanf(f, "%*s %*s %*s %*s %*s\n");
+
+ for (;;) {
+ uint64_t total, used;
+
+ r = fscanf(f,
+ "%*s " /* device/file */
+ "%*s " /* type of swap */
+ "%" PRIu64 " " /* swap size */
+ "%" PRIu64 " " /* used */
+ "%*s\n", /* priority */
+ &total, &used);
+
+ if (r == EOF && feof(f))
+ break;
+
+ if (r != 2) {
+ if (ferror(f))
+ return log_debug_errno(errno, "Error reading from %s: %m", proc_swaps_path);
+
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to parse values from %s: %m", proc_swaps_path);
+ }
+
+ ctx.swap_total += total * 1024U;
+ ctx.swap_used += used * 1024U;
+ }
+
+ *ret = ctx;
+ return 0;
+}
+
+int oomd_insert_cgroup_context(Hashmap *old_h, Hashmap *new_h, const char *path) {
+ _cleanup_(oomd_cgroup_context_freep) OomdCGroupContext *curr_ctx = NULL;
+ OomdCGroupContext *old_ctx, *ctx;
+ int r;
+
+ assert(new_h);
+ assert(path);
+
+ r = oomd_cgroup_context_acquire(path, &curr_ctx);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get OomdCGroupContext for %s: %m", path);
+
+ old_ctx = hashmap_get(old_h, path);
+ if (old_ctx) {
+ curr_ctx->last_pgscan = old_ctx->pgscan;
+ curr_ctx->mem_pressure_limit = old_ctx->mem_pressure_limit;
+ curr_ctx->last_hit_mem_pressure_limit = old_ctx->last_hit_mem_pressure_limit;
+ }
+
+ ctx = TAKE_PTR(curr_ctx);
+ r = hashmap_put(new_h, ctx->path, ctx);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+void oomd_dump_swap_cgroup_context(const OomdCGroupContext *ctx, FILE *f, const char *prefix) {
+ char swap[FORMAT_BYTES_MAX];
+
+ assert(ctx);
+ assert(f);
+
+ if (!empty_or_root(ctx->path))
+ fprintf(f,
+ "%sPath: %s\n"
+ "%s\tSwap Usage: %s\n",
+ strempty(prefix), ctx->path,
+ strempty(prefix), format_bytes(swap, sizeof(swap), ctx->swap_usage));
+ else
+ fprintf(f,
+ "%sPath: %s\n"
+ "%s\tSwap Usage: (see System Context)\n",
+ strempty(prefix), ctx->path,
+ strempty(prefix));
+}
+
+void oomd_dump_memory_pressure_cgroup_context(const OomdCGroupContext *ctx, FILE *f, const char *prefix) {
+ char tbuf[FORMAT_TIMESPAN_MAX], mem_use[FORMAT_BYTES_MAX];
+ char mem_min[FORMAT_BYTES_MAX], mem_low[FORMAT_BYTES_MAX];
+
+ assert(ctx);
+ assert(f);
+
+ fprintf(f,
+ "%sPath: %s\n"
+ "%s\tMemory Pressure Limit: %lu%%\n"
+ "%s\tPressure: Avg10: %lu.%02lu Avg60: %lu.%02lu Avg300: %lu.%02lu Total: %s\n"
+ "%s\tCurrent Memory Usage: %s\n",
+ strempty(prefix), ctx->path,
+ strempty(prefix), LOAD_INT(ctx->mem_pressure_limit),
+ strempty(prefix),
+ LOAD_INT(ctx->memory_pressure.avg10), LOAD_FRAC(ctx->memory_pressure.avg10),
+ LOAD_INT(ctx->memory_pressure.avg60), LOAD_FRAC(ctx->memory_pressure.avg60),
+ LOAD_INT(ctx->memory_pressure.avg300), LOAD_FRAC(ctx->memory_pressure.avg300),
+ format_timespan(tbuf, sizeof(tbuf), ctx->memory_pressure.total, USEC_PER_SEC),
+ strempty(prefix), format_bytes(mem_use, sizeof(mem_use), ctx->current_memory_usage));
+
+ if (!empty_or_root(ctx->path))
+ fprintf(f,
+ "%s\tMemory Min: %s\n"
+ "%s\tMemory Low: %s\n"
+ "%s\tPgscan: %" PRIu64 "\n",
+ strempty(prefix), format_bytes_cgroup_protection(mem_min, sizeof(mem_min), ctx->memory_min),
+ strempty(prefix), format_bytes_cgroup_protection(mem_low, sizeof(mem_low), ctx->memory_low),
+ strempty(prefix), ctx->pgscan);
+}
+
+void oomd_dump_system_context(const OomdSystemContext *ctx, FILE *f, const char *prefix) {
+ char used[FORMAT_BYTES_MAX], total[FORMAT_BYTES_MAX];
+
+ assert(ctx);
+ assert(f);
+
+ fprintf(f,
+ "%sSwap: Used: %s Total: %s\n",
+ strempty(prefix),
+ format_bytes(used, sizeof(used), ctx->swap_used),
+ format_bytes(total, sizeof(total), ctx->swap_total));
+}
diff --git a/src/oom/oomd-util.h b/src/oom/oomd-util.h
new file mode 100644
index 0000000..87ecda8
--- /dev/null
+++ b/src/oom/oomd-util.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "psi-util.h"
+
+#define GROWING_SIZE_PERCENTILE 80
+
+extern const struct hash_ops oomd_cgroup_ctx_hash_ops;
+
+typedef struct OomdCGroupContext OomdCGroupContext;
+typedef struct OomdSystemContext OomdSystemContext;
+
+typedef int (oomd_compare_t)(OomdCGroupContext * const *, OomdCGroupContext * const *);
+
+struct OomdCGroupContext {
+ char *path;
+
+ ResourcePressure memory_pressure;
+
+ uint64_t current_memory_usage;
+
+ uint64_t memory_min;
+ uint64_t memory_low;
+ uint64_t swap_usage;
+
+ uint64_t last_pgscan;
+ uint64_t pgscan;
+
+ /* These are only used by oomd_pressure_above for acting on high memory pressure. */
+ loadavg_t mem_pressure_limit;
+ usec_t last_hit_mem_pressure_limit;
+};
+
+struct OomdSystemContext {
+ uint64_t swap_total;
+ uint64_t swap_used;
+};
+
+OomdCGroupContext *oomd_cgroup_context_free(OomdCGroupContext *ctx);
+DEFINE_TRIVIAL_CLEANUP_FUNC(OomdCGroupContext*, oomd_cgroup_context_free);
+
+/* All hashmaps used with these functions are expected to be of the form
+ * key: cgroup paths -> value: OomdCGroupContext. */
+
+/* Scans all the OomdCGroupContexts in `h` and returns 1 and a set of pointers to those OomdCGroupContexts in `ret`
+ * if any of them have exceeded their supplied memory pressure limits for the `duration` length of time.
+ * `last_hit_mem_pressure_limit` is updated accordingly for each entry when the limit is exceeded, and when it returns
+ * below the limit.
+ * Returns 0 and sets `ret` to an empty set if no entries exceeded limits for `duration`.
+ * Returns -ENOMEM for allocation errors. */
+int oomd_pressure_above(Hashmap *h, usec_t duration, Set **ret);
+
+/* Sum up current OomdCGroupContexts' pgscan values and last interval's pgscan values in `h`. Returns true if the
+ * current sum is higher than the last interval's sum (there was some reclaim activity). */
+bool oomd_memory_reclaim(Hashmap *h);
+
+/* Returns true if the amount of swap free is below the percentage of swap specified by `threshold_percent`. */
+bool oomd_swap_free_below(const OomdSystemContext *ctx, uint64_t threshold_percent);
+
+static inline int compare_pgscan(OomdCGroupContext * const *c1, OomdCGroupContext * const *c2) {
+ assert(c1);
+ assert(c2);
+
+ if ((*c1)->pgscan > (*c2)->pgscan)
+ return -1;
+ else if ((*c1)->pgscan < (*c2)->pgscan)
+ return 1;
+ else
+ return 0;
+}
+
+static inline int compare_swap_usage(OomdCGroupContext * const *c1, OomdCGroupContext * const *c2) {
+ assert(c1);
+ assert(c2);
+
+ if ((*c1)->swap_usage > (*c2)->swap_usage)
+ return -1;
+ else if ((*c1)->swap_usage < (*c2)->swap_usage)
+ return 1;
+ else
+ return 0;
+}
+
+/* Get an array of OomdCGroupContexts from `h`, qsorted from largest to smallest values according to `compare_func`.
+ * If `prefix` is not NULL, only include OomdCGroupContexts whose paths start with prefix. Otherwise all paths are sorted.
+ * Returns the number of sorted items; negative on error. */
+int oomd_sort_cgroup_contexts(Hashmap *h, oomd_compare_t compare_func, const char *prefix, OomdCGroupContext ***ret);
+
+/* Returns a negative value on error, 0 if no processes were killed, or 1 if processes were killed. */
+int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run);
+
+/* The following oomd_kill_by_* functions return 1 if processes were killed, or negative otherwise. */
+/* If `prefix` is supplied, only cgroups whose paths start with `prefix` are eligible candidates. Otherwise,
+ * everything in `h` is a candidate. */
+int oomd_kill_by_pgscan(Hashmap *h, const char *prefix, bool dry_run);
+int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run);
+
+int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret);
+int oomd_system_context_acquire(const char *proc_swaps_path, OomdSystemContext *ret);
+
+/* Get the OomdCGroupContext of `path` and insert it into `new_h`. The key for the inserted context will be `path`.
+ *
+ * `old_h` is used to get data used to calculate prior interval information. `old_h` can be NULL in which case there
+ * was no prior data to reference. */
+int oomd_insert_cgroup_context(Hashmap *old_h, Hashmap *new_h, const char *path);
+
+void oomd_dump_swap_cgroup_context(const OomdCGroupContext *ctx, FILE *f, const char *prefix);
+void oomd_dump_memory_pressure_cgroup_context(const OomdCGroupContext *ctx, FILE *f, const char *prefix);
+void oomd_dump_system_context(const OomdSystemContext *ctx, FILE *f, const char *prefix);
diff --git a/src/oom/oomd.c b/src/oom/oomd.c
new file mode 100644
index 0000000..8cf776e
--- /dev/null
+++ b/src/oom/oomd.c
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+
+#include "bus-log-control-api.h"
+#include "bus-object.h"
+#include "cgroup-util.h"
+#include "conf-parser.h"
+#include "daemon-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "oomd-manager.h"
+#include "oomd-manager-bus.h"
+#include "parse-util.h"
+#include "pretty-print.c"
+#include "psi-util.h"
+#include "signal-util.h"
+
+static bool arg_dry_run = false;
+static int arg_swap_used_limit = -1;
+static int arg_mem_pressure_limit = -1;
+
+static int parse_config(void) {
+ static const ConfigTableItem items[] = {
+ { "OOM", "SwapUsedLimitPercent", config_parse_percent, 0, &arg_swap_used_limit },
+ { "OOM", "DefaultMemoryPressureLimitPercent", config_parse_percent, 0, &arg_mem_pressure_limit },
+ {}
+ };
+
+ return config_parse_many_nulstr(PKGSYSCONFDIR "/oomd.conf",
+ CONF_PATHS_NULSTR("systemd/oomd.conf.d"),
+ "OOM\0",
+ config_item_table_lookup,
+ items,
+ CONFIG_PARSE_WARN,
+ NULL,
+ NULL);
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-oomd", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "Run the userspace out-of-memory (OOM) killer.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --dry-run Only print destructive actions instead of doing them\n"
+ " --bus-introspect=PATH Write D-Bus XML introspection data\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_DRY_RUN,
+ ARG_BUS_INTROSPECT,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "dry-run", no_argument, NULL, ARG_DRY_RUN },
+ { "bus-introspect", required_argument, NULL, ARG_BUS_INTROSPECT },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_DRY_RUN:
+ arg_dry_run = true;
+ break;
+
+ case ARG_BUS_INTROSPECT:
+ return bus_introspect_implementations(
+ stdout,
+ optarg,
+ BUS_IMPLEMENTATIONS(&manager_object,
+ &log_control_object));
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unknown option code.");
+ }
+
+ if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program takes no arguments.");
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(notify_on_cleanup) const char *notify_msg = NULL;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ _cleanup_free_ char *swap = NULL;
+ unsigned long long s = 0;
+ int r;
+
+ log_setup_service();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = parse_config();
+ if (r < 0)
+ return r;
+
+ /* Do some basic requirement checks for running systemd-oomd. It's not exhaustive as some of the other
+ * requirements do not have a reliable means to check for in code. */
+
+ /* SwapTotal is always available in /proc/meminfo and defaults to 0, even on swap-disabled kernels. */
+ r = get_proc_field("/proc/meminfo", "SwapTotal", WHITESPACE, &swap);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get SwapTotal from /proc/meminfo: %m");
+
+ r = safe_atollu(swap, &s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse SwapTotal from /proc/meminfo: %s: %m", swap);
+ if (s == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Requires swap to operate");
+
+ if (!is_pressure_supported())
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Pressure Stall Information (PSI) is not supported");
+
+ r = cg_all_unified();
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether the unified cgroups hierarchy is used: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Requires the unified cgroups hierarchy");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+
+ r = manager_new(&m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create manager: %m");
+
+ r = manager_start(m, arg_dry_run, arg_swap_used_limit, arg_mem_pressure_limit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start up daemon: %m");
+
+ notify_msg = notify_start(NOTIFY_READY, NOTIFY_STOPPING);
+
+ log_info("systemd-oomd starting%s!", arg_dry_run ? " in dry run mode" : "");
+
+ r = sd_event_loop(m->event);
+ if (r < 0)
+ return log_error_errno(r, "Event loop failed: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/oom/oomd.conf b/src/oom/oomd.conf
new file mode 100644
index 0000000..8ac9716
--- /dev/null
+++ b/src/oom/oomd.conf
@@ -0,0 +1,16 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See oomd.conf(5) for details
+
+[OOM]
+#SwapUsedLimitPercent=90%
+#DefaultMemoryPressureLimitPercent=60%
diff --git a/src/oom/org.freedesktop.oom1.conf b/src/oom/org.freedesktop.oom1.conf
new file mode 100644
index 0000000..cc1143a
--- /dev/null
+++ b/src/oom/org.freedesktop.oom1.conf
@@ -0,0 +1,47 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!-- SPDX-License-Identifier: LGPL-2.1-or-later -->
+
+<busconfig>
+
+ <policy user="systemd-oom">
+ <allow own="org.freedesktop.oom1"/>
+ <allow send_destination="org.freedesktop.oom1"/>
+ <allow receive_sender="org.freedesktop.oom1"/>
+ </policy>
+
+ <policy user="root">
+ <allow send_destination="org.freedesktop.oom1"/>
+ </policy>
+
+ <policy context="default">
+ <deny send_destination="org.freedesktop.oom1"/>
+
+ <!-- Generic interfaces -->
+
+ <allow send_destination="org.freedesktop.oom1"
+ send_interface="org.freedesktop.DBus.Introspectable"/>
+
+ <allow send_destination="org.freedesktop.oom1"
+ send_interface="org.freedesktop.DBus.Peer"/>
+
+ <allow send_destination="org.freedesktop.oom1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="Get"/>
+
+ <allow send_destination="org.freedesktop.oom1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="GetAll"/>
+
+ <!-- Manager interface -->
+
+ <allow send_destination="org.freedesktop.oom1"
+ send_interface="org.freedesktop.oom1.Manager"
+ send_member="DumpByFileDescriptor"/>
+
+ <allow receive_sender="org.freedesktop.oom1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/oom/org.freedesktop.oom1.service b/src/oom/org.freedesktop.oom1.service
new file mode 100644
index 0000000..4fd5138
--- /dev/null
+++ b/src/oom/org.freedesktop.oom1.service
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.oom1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.oom1.service
diff --git a/src/oom/test-oomd-util.c b/src/oom/test-oomd-util.c
new file mode 100644
index 0000000..5df5710
--- /dev/null
+++ b/src/oom/test-oomd-util.c
@@ -0,0 +1,346 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "cgroup-setup.h"
+#include "cgroup-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "oomd-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+
+static int fork_and_sleep(unsigned sleep_min) {
+ usec_t n, timeout, ts;
+
+ pid_t pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ timeout = sleep_min * USEC_PER_MINUTE;
+ ts = now(CLOCK_MONOTONIC);
+ while (true) {
+ n = now(CLOCK_MONOTONIC);
+ if (ts + timeout < n) {
+ log_error("Child timed out waiting to be killed");
+ abort();
+ }
+ sleep(1);
+ }
+ }
+
+ return pid;
+}
+
+static void test_oomd_cgroup_kill(void) {
+ _cleanup_free_ char *cgroup_root = NULL, *cgroup = NULL;
+ int pid[2];
+ int r;
+
+ if (geteuid() != 0)
+ return (void) log_tests_skipped("not root");
+
+ if (cg_all_unified() <= 0)
+ return (void) log_tests_skipped("cgroups are not running in unified mode");
+
+ assert_se(cg_pid_get_path(NULL, 0, &cgroup_root) >= 0);
+
+ /* Create another cgroup below this one for the pids we forked off. We need this to be managed
+ * by the test so that pid1 doesn't delete it before we can read the xattrs. */
+ cgroup = path_join(cgroup_root, "oomdkilltest");
+ assert(cgroup);
+ assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, cgroup) >= 0);
+
+ /* If we don't have permissions to set xattrs we're likely in a userns or missing capabilities */
+ r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, cgroup, "user.oomd_test", "test", 4, 0);
+ if (ERRNO_IS_PRIVILEGE(r) || ERRNO_IS_NOT_SUPPORTED(r))
+ return (void) log_tests_skipped("Cannot set user xattrs");
+
+ /* Do this twice to also check the increment behavior on the xattrs */
+ for (int i = 0; i < 2; i++) {
+ _cleanup_free_ char *v = NULL;
+
+ for (int j = 0; j < 2; j++) {
+ pid[j] = fork_and_sleep(5);
+ assert_se(cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup, pid[j]) >= 0);
+ }
+
+ r = oomd_cgroup_kill(cgroup, false /* recurse */, false /* dry run */);
+ if (r <= 0) {
+ log_debug_errno(r, "Failed to kill processes under %s: %m", cgroup);
+ abort();
+ }
+
+ /* Wait a bit since processes may take some time to be cleaned up. */
+ sleep(2);
+ assert_se(cg_is_empty(SYSTEMD_CGROUP_CONTROLLER, cgroup) == true);
+
+ assert_se(cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, cgroup, "user.systemd_oomd_kill", &v) >= 0);
+ assert_se(memcmp(v, i == 0 ? "2" : "4", 2) == 0);
+ }
+}
+
+static void test_oomd_cgroup_context_acquire_and_insert(void) {
+ _cleanup_hashmap_free_ Hashmap *h1 = NULL, *h2 = NULL;
+ _cleanup_(oomd_cgroup_context_freep) OomdCGroupContext *ctx = NULL;
+ _cleanup_free_ char *cgroup = NULL;
+ OomdCGroupContext *c1, *c2;
+
+ if (geteuid() != 0)
+ return (void) log_tests_skipped("not root");
+
+ if (!is_pressure_supported())
+ return (void) log_tests_skipped("system does not support pressure");
+
+ if (cg_all_unified() <= 0)
+ return (void) log_tests_skipped("cgroups are not running in unified mode");
+
+ assert_se(cg_pid_get_path(NULL, 0, &cgroup) >= 0);
+
+ assert_se(oomd_cgroup_context_acquire(cgroup, &ctx) == 0);
+
+ assert_se(streq(ctx->path, cgroup));
+ assert_se(ctx->current_memory_usage > 0);
+ assert_se(ctx->memory_min == 0);
+ assert_se(ctx->memory_low == 0);
+ assert_se(ctx->swap_usage == 0);
+ assert_se(ctx->last_pgscan == 0);
+ assert_se(ctx->pgscan == 0);
+ ctx = oomd_cgroup_context_free(ctx);
+
+ /* Test the root cgroup */
+ assert_se(oomd_cgroup_context_acquire("", &ctx) == 0);
+ assert_se(streq(ctx->path, "/"));
+ assert_se(ctx->current_memory_usage > 0);
+
+ /* Test hashmap inserts */
+ assert_se(h1 = hashmap_new(&oomd_cgroup_ctx_hash_ops));
+ assert_se(oomd_insert_cgroup_context(NULL, h1, cgroup) == 0);
+ c1 = hashmap_get(h1, cgroup);
+ assert_se(c1);
+
+ /* make sure certain values from h1 get updated in h2 */
+ c1->pgscan = 5555;
+ c1->mem_pressure_limit = 6789;
+ c1->last_hit_mem_pressure_limit = 42;
+ assert_se(h2 = hashmap_new(&oomd_cgroup_ctx_hash_ops));
+ assert_se(oomd_insert_cgroup_context(h1, h2, cgroup) == 0);
+ c1 = hashmap_get(h1, cgroup);
+ c2 = hashmap_get(h2, cgroup);
+ assert_se(c1);
+ assert_se(c2);
+ assert_se(c1 != c2);
+ assert_se(c2->last_pgscan == 5555);
+ assert_se(c2->mem_pressure_limit == 6789);
+ assert_se(c2->last_hit_mem_pressure_limit == 42);
+}
+
+static void test_oomd_system_context_acquire(void) {
+ _cleanup_(unlink_tempfilep) char path[] = "/oomdgetsysctxtestXXXXXX";
+ OomdSystemContext ctx;
+
+ if (geteuid() != 0)
+ return (void) log_tests_skipped("not root");
+
+ assert_se(mkstemp(path));
+
+ assert_se(oomd_system_context_acquire("/verylikelynonexistentpath", &ctx) == -ENOENT);
+
+ assert_se(oomd_system_context_acquire(path, &ctx) == 0);
+ assert_se(ctx.swap_total == 0);
+ assert_se(ctx.swap_used == 0);
+
+ assert_se(write_string_file(path, "some\nwords\nacross\nmultiple\nlines", WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(oomd_system_context_acquire(path, &ctx) == 0);
+ assert_se(ctx.swap_total == 0);
+ assert_se(ctx.swap_used == 0);
+
+ assert_se(write_string_file(path, "Filename Type Size Used Priority\n"
+ "/swapvol/swapfile file 18971644 0 -3\n"
+ "/dev/vda2 partition 1999868 993780 -2", WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(oomd_system_context_acquire(path, &ctx) == 0);
+ assert_se(ctx.swap_total == 21474828288);
+ assert_se(ctx.swap_used == 1017630720);
+}
+
+static void test_oomd_pressure_above(void) {
+ _cleanup_hashmap_free_ Hashmap *h1 = NULL, *h2 = NULL;
+ _cleanup_set_free_ Set *t1 = NULL, *t2 = NULL, *t3 = NULL;
+ OomdCGroupContext ctx[2], *c;
+ loadavg_t threshold;
+
+ assert_se(store_loadavg_fixed_point(80, 0, &threshold) == 0);
+
+ /* /herp.slice */
+ assert_se(store_loadavg_fixed_point(99, 99, &(ctx[0].memory_pressure.avg10)) == 0);
+ assert_se(store_loadavg_fixed_point(99, 99, &(ctx[0].memory_pressure.avg60)) == 0);
+ assert_se(store_loadavg_fixed_point(99, 99, &(ctx[0].memory_pressure.avg300)) == 0);
+ ctx[0].mem_pressure_limit = threshold;
+
+ /* /derp.slice */
+ assert_se(store_loadavg_fixed_point(1, 11, &(ctx[1].memory_pressure.avg10)) == 0);
+ assert_se(store_loadavg_fixed_point(1, 11, &(ctx[1].memory_pressure.avg60)) == 0);
+ assert_se(store_loadavg_fixed_point(1, 11, &(ctx[1].memory_pressure.avg300)) == 0);
+ ctx[1].mem_pressure_limit = threshold;
+
+
+ /* High memory pressure */
+ assert_se(h1 = hashmap_new(&string_hash_ops));
+ assert_se(hashmap_put(h1, "/herp.slice", &ctx[0]) >= 0);
+ assert_se(oomd_pressure_above(h1, 0 /* duration */, &t1) == 1);
+ assert_se(set_contains(t1, &ctx[0]) == true);
+ assert_se(c = hashmap_get(h1, "/herp.slice"));
+ assert_se(c->last_hit_mem_pressure_limit > 0);
+
+ /* Low memory pressure */
+ assert_se(h2 = hashmap_new(&string_hash_ops));
+ assert_se(hashmap_put(h2, "/derp.slice", &ctx[1]) >= 0);
+ assert_se(oomd_pressure_above(h2, 0 /* duration */, &t2) == 0);
+ assert_se(t2 == NULL);
+ assert_se(c = hashmap_get(h2, "/derp.slice"));
+ assert_se(c->last_hit_mem_pressure_limit == 0);
+
+ /* High memory pressure w/ multiple cgroups */
+ assert_se(hashmap_put(h1, "/derp.slice", &ctx[1]) >= 0);
+ assert_se(oomd_pressure_above(h1, 0 /* duration */, &t3) == 1);
+ assert_se(set_contains(t3, &ctx[0]) == true);
+ assert_se(set_size(t3) == 1);
+ assert_se(c = hashmap_get(h1, "/herp.slice"));
+ assert_se(c->last_hit_mem_pressure_limit > 0);
+ assert_se(c = hashmap_get(h1, "/derp.slice"));
+ assert_se(c->last_hit_mem_pressure_limit == 0);
+}
+
+static void test_oomd_memory_reclaim(void) {
+ _cleanup_hashmap_free_ Hashmap *h1 = NULL;
+ char **paths = STRV_MAKE("/0.slice",
+ "/1.slice",
+ "/2.slice",
+ "/3.slice",
+ "/4.slice");
+
+ OomdCGroupContext ctx[5] = {
+ { .path = paths[0],
+ .last_pgscan = 100,
+ .pgscan = 100 },
+ { .path = paths[1],
+ .last_pgscan = 100,
+ .pgscan = 100 },
+ { .path = paths[2],
+ .last_pgscan = 77,
+ .pgscan = 33 },
+ { .path = paths[3],
+ .last_pgscan = UINT64_MAX,
+ .pgscan = 100 },
+ { .path = paths[4],
+ .last_pgscan = 100,
+ .pgscan = UINT64_MAX },
+ };
+
+ assert_se(h1 = hashmap_new(&string_hash_ops));
+ assert_se(hashmap_put(h1, paths[0], &ctx[0]) >= 0);
+ assert_se(hashmap_put(h1, paths[1], &ctx[1]) >= 0);
+ assert_se(oomd_memory_reclaim(h1) == false);
+
+ assert_se(hashmap_put(h1, paths[2], &ctx[2]) >= 0);
+ assert_se(oomd_memory_reclaim(h1) == false);
+
+ assert_se(hashmap_put(h1, paths[4], &ctx[4]) >= 0);
+ assert_se(oomd_memory_reclaim(h1) == true);
+
+ assert_se(hashmap_put(h1, paths[3], &ctx[3]) >= 0);
+ assert_se(oomd_memory_reclaim(h1) == false);
+}
+
+static void test_oomd_swap_free_below(void) {
+ OomdSystemContext ctx = (OomdSystemContext) {
+ .swap_total = 20971512 * 1024U,
+ .swap_used = 20971440 * 1024U,
+ };
+ assert_se(oomd_swap_free_below(&ctx, 20) == true);
+
+ ctx = (OomdSystemContext) {
+ .swap_total = 20971512 * 1024U,
+ .swap_used = 3310136 * 1024U,
+ };
+ assert_se(oomd_swap_free_below(&ctx, 20) == false);
+}
+
+static void test_oomd_sort_cgroups(void) {
+ _cleanup_hashmap_free_ Hashmap *h = NULL;
+ _cleanup_free_ OomdCGroupContext **sorted_cgroups;
+ char **paths = STRV_MAKE("/herp.slice",
+ "/herp.slice/derp.scope",
+ "/herp.slice/derp.scope/sheep.service",
+ "/zupa.slice");
+
+ OomdCGroupContext ctx[4] = {
+ { .path = paths[0],
+ .swap_usage = 20,
+ .pgscan = 60 },
+ { .path = paths[1],
+ .swap_usage = 60,
+ .pgscan = 40 },
+ { .path = paths[2],
+ .swap_usage = 40,
+ .pgscan = 20 },
+ { .path = paths[3],
+ .swap_usage = 10,
+ .pgscan = 80 },
+ };
+
+ assert_se(h = hashmap_new(&string_hash_ops));
+
+ assert_se(hashmap_put(h, "/herp.slice", &ctx[0]) >= 0);
+ assert_se(hashmap_put(h, "/herp.slice/derp.scope", &ctx[1]) >= 0);
+ assert_se(hashmap_put(h, "/herp.slice/derp.scope/sheep.service", &ctx[2]) >= 0);
+ assert_se(hashmap_put(h, "/zupa.slice", &ctx[3]) >= 0);
+
+ assert_se(oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted_cgroups) == 4);
+ assert_se(sorted_cgroups[0] == &ctx[1]);
+ assert_se(sorted_cgroups[1] == &ctx[2]);
+ assert_se(sorted_cgroups[2] == &ctx[0]);
+ assert_se(sorted_cgroups[3] == &ctx[3]);
+ sorted_cgroups = mfree(sorted_cgroups);
+
+ assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan, NULL, &sorted_cgroups) == 4);
+ assert_se(sorted_cgroups[0] == &ctx[3]);
+ assert_se(sorted_cgroups[1] == &ctx[0]);
+ assert_se(sorted_cgroups[2] == &ctx[1]);
+ assert_se(sorted_cgroups[3] == &ctx[2]);
+ sorted_cgroups = mfree(sorted_cgroups);
+
+ assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan, "/herp.slice/derp.scope", &sorted_cgroups) == 2);
+ assert_se(sorted_cgroups[0] == &ctx[1]);
+ assert_se(sorted_cgroups[1] == &ctx[2]);
+ assert_se(sorted_cgroups[2] == 0);
+ assert_se(sorted_cgroups[3] == 0);
+ sorted_cgroups = mfree(sorted_cgroups);
+}
+
+int main(void) {
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ test_oomd_system_context_acquire();
+ test_oomd_pressure_above();
+ test_oomd_memory_reclaim();
+ test_oomd_swap_free_below();
+ test_oomd_sort_cgroups();
+
+ /* The following tests operate on live cgroups */
+
+ r = enter_cgroup_root(NULL);
+ if (r < 0)
+ return log_tests_skipped_errno(r, "failed to enter a test cgroup scope");
+
+ test_oomd_cgroup_kill();
+ test_oomd_cgroup_context_acquire_and_insert();
+
+ return 0;
+}
diff --git a/src/partition/growfs.c b/src/partition/growfs.c
new file mode 100644
index 0000000..9406ae8
--- /dev/null
+++ b/src/partition/growfs.c
@@ -0,0 +1,261 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/btrfs.h>
+#include <linux/magic.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+
+#include "blockdev-util.h"
+#include "btrfs-util.h"
+#include "cryptsetup-util.h"
+#include "device-nodes.h"
+#include "dissect-image.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "resize-fs.h"
+
+static const char *arg_target = NULL;
+static bool arg_dry_run = false;
+
+#if HAVE_LIBCRYPTSETUP
+static int resize_crypt_luks_device(dev_t devno, const char *fstype, dev_t main_devno) {
+ _cleanup_free_ char *devpath = NULL, *main_devpath = NULL;
+ _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
+ _cleanup_close_ int main_devfd = -1;
+ uint64_t size;
+ int r;
+
+ r = dlopen_cryptsetup();
+ if (r < 0)
+ return log_error_errno(r, "Cannot resize LUKS device: %m");
+
+ r = device_path_make_major_minor(S_IFBLK, main_devno, &main_devpath);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format device major/minor path: %m");
+
+ main_devfd = open(main_devpath, O_RDONLY|O_CLOEXEC);
+ if (main_devfd < 0)
+ return log_error_errno(errno, "Failed to open \"%s\": %m", main_devpath);
+
+ if (ioctl(main_devfd, BLKGETSIZE64, &size) != 0)
+ return log_error_errno(errno, "Failed to query size of \"%s\" (before resize): %m",
+ main_devpath);
+
+ log_debug("%s is %"PRIu64" bytes", main_devpath, size);
+ r = device_path_make_major_minor(S_IFBLK, devno, &devpath);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format major/minor path: %m");
+
+ r = sym_crypt_init(&cd, devpath);
+ if (r < 0)
+ return log_error_errno(r, "crypt_init(\"%s\") failed: %m", devpath);
+
+ cryptsetup_enable_logging(cd);
+
+ r = sym_crypt_load(cd, CRYPT_LUKS, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to load LUKS metadata for %s: %m", devpath);
+
+ if (arg_dry_run)
+ return 0;
+
+ r = sym_crypt_resize(cd, main_devpath, 0);
+ if (r < 0)
+ return log_error_errno(r, "crypt_resize() of %s failed: %m", devpath);
+
+ if (ioctl(main_devfd, BLKGETSIZE64, &size) != 0)
+ log_warning_errno(errno, "Failed to query size of \"%s\" (after resize): %m",
+ devpath);
+ else
+ log_debug("%s is now %"PRIu64" bytes", main_devpath, size);
+
+ return 1;
+}
+#endif
+
+static int maybe_resize_underlying_device(const char *mountpath, dev_t main_devno) {
+ _cleanup_free_ char *fstype = NULL, *devpath = NULL;
+ dev_t devno;
+ int r;
+
+#if HAVE_LIBCRYPTSETUP
+ cryptsetup_enable_logging(NULL);
+#endif
+
+ r = get_block_device_harder(mountpath, &devno);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine underlying block device of \"%s\": %m",
+ mountpath);
+ if (devno == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "File system \"%s\" not backed by block device.", arg_target);
+
+ log_debug("Underlying device %d:%d, main dev %d:%d, %s",
+ major(devno), minor(devno),
+ major(main_devno), minor(main_devno),
+ devno == main_devno ? "same" : "different");
+ if (devno == main_devno)
+ return 0;
+
+ r = device_path_make_major_minor(S_IFBLK, devno, &devpath);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format device major/minor path: %m");
+
+ r = probe_filesystem(devpath, &fstype);
+ if (r == -EUCLEAN)
+ return log_warning_errno(r, "Cannot reliably determine probe \"%s\", refusing to proceed.", devpath);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to probe \"%s\": %m", devpath);
+
+#if HAVE_LIBCRYPTSETUP
+ if (streq_ptr(fstype, "crypto_LUKS"))
+ return resize_crypt_luks_device(devno, fstype, main_devno);
+#endif
+
+ log_debug("Don't know how to resize %s of type %s, ignoring.", devpath, strnull(fstype));
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-growfs@.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] /path/to/mountpoint\n\n"
+ "Grow filesystem or encrypted payload to device size.\n\n"
+ "Options:\n"
+ " -h --help Show this help and exit\n"
+ " --version Print version string and exit\n"
+ " -n --dry-run Just print what would be done\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ };
+
+ int c;
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version" , no_argument, NULL, ARG_VERSION },
+ { "dry-run", no_argument, NULL, 'n' },
+ {}
+ };
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hn", options, NULL)) >= 0)
+ switch(c) {
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'n':
+ arg_dry_run = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind + 1 != argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s excepts exactly one argument (the mount point).",
+ program_invocation_short_name);
+
+ arg_target = argv[optind];
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_close_ int mountfd = -1, devfd = -1;
+ _cleanup_free_ char *devpath = NULL;
+ uint64_t size, newsize;
+ char fb[FORMAT_BYTES_MAX];
+ dev_t devno;
+ int r;
+
+ log_setup_service();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = path_is_mount_point(arg_target, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to check if \"%s\" is a mount point: %m", arg_target);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "\"%s\" is not a mount point: %m", arg_target);
+
+ r = get_block_device(arg_target, &devno);
+ if (r == -EUCLEAN)
+ return btrfs_log_dev_root(LOG_ERR, r, arg_target);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine block device of \"%s\": %m", arg_target);
+ if (devno == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "File system \"%s\" not backed by block device.", arg_target);
+
+ r = maybe_resize_underlying_device(arg_target, devno);
+ if (r < 0)
+ return r;
+
+ mountfd = open(arg_target, O_RDONLY|O_CLOEXEC);
+ if (mountfd < 0)
+ return log_error_errno(errno, "Failed to open \"%s\": %m", arg_target);
+
+ r = device_path_make_major_minor(S_IFBLK, devno, &devpath);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format device major/minor path: %m");
+
+ devfd = open(devpath, O_RDONLY|O_CLOEXEC);
+ if (devfd < 0)
+ return log_error_errno(errno, "Failed to open \"%s\": %m", devpath);
+
+ if (ioctl(devfd, BLKGETSIZE64, &size) != 0)
+ return log_error_errno(errno, "Failed to query size of \"%s\": %m", devpath);
+
+ log_debug("Resizing \"%s\" to %"PRIu64" bytes...", arg_target, size);
+ r = resize_fs(mountfd, size, &newsize);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resize \"%s\" to %"PRIu64" bytes: %m",
+ arg_target, size);
+ if (newsize == size)
+ log_info("Successfully resized \"%s\" to %s bytes.",
+ arg_target,
+ format_bytes(fb, sizeof fb, newsize));
+ else
+ log_info("Successfully resized \"%s\" to %s bytes (%"PRIu64" bytes lost due to blocksize).",
+ arg_target,
+ format_bytes(fb, sizeof fb, newsize),
+ size - newsize);
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/partition/makefs.c b/src/partition/makefs.c
new file mode 100644
index 0000000..fd924d2
--- /dev/null
+++ b/src/partition/makefs.c
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/file.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "blockdev-util.h"
+#include "dissect-image.h"
+#include "fd-util.h"
+#include "main-func.h"
+#include "mkfs-util.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+
+static int run(int argc, char *argv[]) {
+ _cleanup_free_ char *device = NULL, *fstype = NULL, *detected = NULL;
+ _cleanup_close_ int lock_fd = -1;
+ sd_id128_t uuid;
+ struct stat st;
+ int r;
+
+ log_setup_service();
+
+ if (argc != 3)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program expects two arguments.");
+
+ /* type and device must be copied because makefs calls safe_fork, which clears argv[] */
+ fstype = strdup(argv[1]);
+ if (!fstype)
+ return log_oom();
+
+ device = strdup(argv[2]);
+ if (!device)
+ return log_oom();
+
+ if (stat(device, &st) < 0)
+ return log_error_errno(errno, "Failed to stat \"%s\": %m", device);
+
+ if (S_ISBLK(st.st_mode)) {
+ /* Lock the device so that udev doesn't interfere with our work */
+
+ lock_fd = lock_whole_block_device(st.st_rdev, LOCK_EX);
+ if (lock_fd < 0)
+ return log_error_errno(lock_fd, "Failed to lock whole block device of \"%s\": %m", device);
+ } else
+ log_info("%s is not a block device.", device);
+
+ r = probe_filesystem(device, &detected);
+ if (r == -EUCLEAN)
+ return log_error_errno(r, "Ambiguous results of probing for file system on \"%s\", refusing to proceed.", device);
+ if (r < 0)
+ return log_error_errno(r, "Failed to probe \"%s\": %m", device);
+ if (detected) {
+ log_info("'%s' is not empty (contains file system of type %s), exiting.", device, detected);
+ return 0;
+ }
+
+ r = sd_id128_randomize(&uuid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate UUID for file system: %m");
+
+ return make_filesystem(device, fstype, basename(device), uuid, true);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/partition/meson.build b/src/partition/meson.build
new file mode 100644
index 0000000..d2729da
--- /dev/null
+++ b/src/partition/meson.build
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+systemd_repart_sources = files('''
+ repart.c
+'''.split())
+
+test_repart_sh = find_program('test-repart.sh')
diff --git a/src/partition/repart.c b/src/partition/repart.c
new file mode 100644
index 0000000..6db413e
--- /dev/null
+++ b/src/partition/repart.c
@@ -0,0 +1,4114 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_VALGRIND_MEMCHECK_H
+#include <valgrind/memcheck.h>
+#endif
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <libfdisk.h>
+#include <linux/fs.h>
+#include <linux/loop.h>
+#include <sys/file.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+
+#include <openssl/hmac.h>
+#include <openssl/sha.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "blkid-util.h"
+#include "blockdev-util.h"
+#include "btrfs-util.h"
+#include "conf-files.h"
+#include "conf-parser.h"
+#include "cryptsetup-util.h"
+#include "def.h"
+#include "efivars.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-table.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "gpt.h"
+#include "id128-util.h"
+#include "json.h"
+#include "list.h"
+#include "locale-util.h"
+#include "loop-util.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "mkfs-util.h"
+#include "mount-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "resize-fs.h"
+#include "sort-util.h"
+#include "specifier.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "user-util.h"
+#include "utf8.h"
+
+/* If not configured otherwise use a minimal partition size of 10M */
+#define DEFAULT_MIN_SIZE (10*1024*1024)
+
+/* Hard lower limit for new partition sizes */
+#define HARD_MIN_SIZE 4096
+
+/* libfdisk takes off slightly more than 1M of the disk size when creating a GPT disk label */
+#define GPT_METADATA_SIZE (1044*1024)
+
+/* LUKS2 takes off 16M of the partition size with its metadata by default */
+#define LUKS2_METADATA_SIZE (16*1024*1024)
+
+#if !HAVE_LIBCRYPTSETUP
+struct crypt_device;
+static inline void sym_crypt_free(struct crypt_device* cd) {}
+static inline void sym_crypt_freep(struct crypt_device** cd) {}
+#endif
+
+/* Note: When growing and placing new partitions we always align to 4K sector size. It's how newer hard disks
+ * are designed, and if everything is aligned to that performance is best. And for older hard disks with 512B
+ * sector size devices were generally assumed to have an even number of sectors, hence at the worst we'll
+ * waste 3K per partition, which is probably fine. */
+
+static enum {
+ EMPTY_REFUSE, /* refuse empty disks, never create a partition table */
+ EMPTY_ALLOW, /* allow empty disks, create partition table if necessary */
+ EMPTY_REQUIRE, /* require an empty disk, create a partition table */
+ EMPTY_FORCE, /* make disk empty, erase everything, create a partition table always */
+ EMPTY_CREATE, /* create disk as loopback file, create a partition table always */
+} arg_empty = EMPTY_REFUSE;
+
+static bool arg_dry_run = true;
+static const char *arg_node = NULL;
+static char *arg_root = NULL;
+static char *arg_definitions = NULL;
+static bool arg_discard = true;
+static bool arg_can_factory_reset = false;
+static int arg_factory_reset = -1;
+static sd_id128_t arg_seed = SD_ID128_NULL;
+static bool arg_randomize = false;
+static int arg_pretty = -1;
+static uint64_t arg_size = UINT64_MAX;
+static bool arg_size_auto = false;
+static bool arg_json = false;
+static JsonFormatFlags arg_json_format_flags = 0;
+static void *arg_key = NULL;
+static size_t arg_key_size = 0;
+
+STATIC_DESTRUCTOR_REGISTER(arg_root, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_definitions, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_key, erase_and_freep);
+
+typedef struct Partition Partition;
+typedef struct FreeArea FreeArea;
+typedef struct Context Context;
+
+struct Partition {
+ char *definition_path;
+
+ sd_id128_t type_uuid;
+ sd_id128_t current_uuid, new_uuid;
+ char *current_label, *new_label;
+
+ bool dropped;
+ bool factory_reset;
+ int32_t priority;
+
+ uint32_t weight, padding_weight;
+
+ uint64_t current_size, new_size;
+ uint64_t size_min, size_max;
+
+ uint64_t current_padding, new_padding;
+ uint64_t padding_min, padding_max;
+
+ uint64_t partno;
+ uint64_t offset;
+
+ struct fdisk_partition *current_partition;
+ struct fdisk_partition *new_partition;
+ FreeArea *padding_area;
+ FreeArea *allocated_to_area;
+
+ char *copy_blocks_path;
+ int copy_blocks_fd;
+ uint64_t copy_blocks_size;
+
+ char *format;
+ char **copy_files;
+ bool encrypt;
+
+ LIST_FIELDS(Partition, partitions);
+};
+
+#define PARTITION_IS_FOREIGN(p) (!(p)->definition_path)
+#define PARTITION_EXISTS(p) (!!(p)->current_partition)
+
+struct FreeArea {
+ Partition *after;
+ uint64_t size;
+ uint64_t allocated;
+};
+
+struct Context {
+ LIST_HEAD(Partition, partitions);
+ size_t n_partitions;
+
+ FreeArea **free_areas;
+ size_t n_free_areas, n_allocated_free_areas;
+
+ uint64_t start, end, total;
+
+ struct fdisk_context *fdisk_context;
+
+ sd_id128_t seed;
+};
+
+static uint64_t round_down_size(uint64_t v, uint64_t p) {
+ return (v / p) * p;
+}
+
+static uint64_t round_up_size(uint64_t v, uint64_t p) {
+
+ v = DIV_ROUND_UP(v, p);
+
+ if (v > UINT64_MAX / p)
+ return UINT64_MAX; /* overflow */
+
+ return v * p;
+}
+
+static Partition *partition_new(void) {
+ Partition *p;
+
+ p = new(Partition, 1);
+ if (!p)
+ return NULL;
+
+ *p = (Partition) {
+ .weight = 1000,
+ .padding_weight = 0,
+ .current_size = UINT64_MAX,
+ .new_size = UINT64_MAX,
+ .size_min = UINT64_MAX,
+ .size_max = UINT64_MAX,
+ .current_padding = UINT64_MAX,
+ .new_padding = UINT64_MAX,
+ .padding_min = UINT64_MAX,
+ .padding_max = UINT64_MAX,
+ .partno = UINT64_MAX,
+ .offset = UINT64_MAX,
+ .copy_blocks_fd = -1,
+ .copy_blocks_size = UINT64_MAX,
+ };
+
+ return p;
+}
+
+static Partition* partition_free(Partition *p) {
+ if (!p)
+ return NULL;
+
+ free(p->current_label);
+ free(p->new_label);
+ free(p->definition_path);
+
+ if (p->current_partition)
+ fdisk_unref_partition(p->current_partition);
+ if (p->new_partition)
+ fdisk_unref_partition(p->new_partition);
+
+ free(p->copy_blocks_path);
+ safe_close(p->copy_blocks_fd);
+
+ free(p->format);
+ strv_free(p->copy_files);
+
+ return mfree(p);
+}
+
+static Partition* partition_unlink_and_free(Context *context, Partition *p) {
+ if (!p)
+ return NULL;
+
+ LIST_REMOVE(partitions, context->partitions, p);
+
+ assert(context->n_partitions > 0);
+ context->n_partitions--;
+
+ return partition_free(p);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Partition*, partition_free);
+
+static Context *context_new(sd_id128_t seed) {
+ Context *context;
+
+ context = new(Context, 1);
+ if (!context)
+ return NULL;
+
+ *context = (Context) {
+ .start = UINT64_MAX,
+ .end = UINT64_MAX,
+ .total = UINT64_MAX,
+ .seed = seed,
+ };
+
+ return context;
+}
+
+static void context_free_free_areas(Context *context) {
+ assert(context);
+
+ for (size_t i = 0; i < context->n_free_areas; i++)
+ free(context->free_areas[i]);
+
+ context->free_areas = mfree(context->free_areas);
+ context->n_free_areas = 0;
+ context->n_allocated_free_areas = 0;
+}
+
+static Context *context_free(Context *context) {
+ if (!context)
+ return NULL;
+
+ while (context->partitions)
+ partition_unlink_and_free(context, context->partitions);
+ assert(context->n_partitions == 0);
+
+ context_free_free_areas(context);
+
+ if (context->fdisk_context)
+ fdisk_unref_context(context->fdisk_context);
+
+ return mfree(context);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Context*, context_free);
+
+static int context_add_free_area(
+ Context *context,
+ uint64_t size,
+ Partition *after) {
+
+ FreeArea *a;
+
+ assert(context);
+ assert(!after || !after->padding_area);
+
+ if (!GREEDY_REALLOC(context->free_areas, context->n_allocated_free_areas, context->n_free_areas + 1))
+ return -ENOMEM;
+
+ a = new(FreeArea, 1);
+ if (!a)
+ return -ENOMEM;
+
+ *a = (FreeArea) {
+ .size = size,
+ .after = after,
+ };
+
+ context->free_areas[context->n_free_areas++] = a;
+
+ if (after)
+ after->padding_area = a;
+
+ return 0;
+}
+
+static bool context_drop_one_priority(Context *context) {
+ int32_t priority = 0;
+ Partition *p;
+ bool exists = false;
+
+ LIST_FOREACH(partitions, p, context->partitions) {
+ if (p->dropped)
+ continue;
+ if (p->priority < priority)
+ continue;
+ if (p->priority == priority) {
+ exists = exists || PARTITION_EXISTS(p);
+ continue;
+ }
+
+ priority = p->priority;
+ exists = PARTITION_EXISTS(p);
+ }
+
+ /* Refuse to drop partitions with 0 or negative priorities or partitions of priorities that have at
+ * least one existing priority */
+ if (priority <= 0 || exists)
+ return false;
+
+ LIST_FOREACH(partitions, p, context->partitions) {
+ if (p->priority < priority)
+ continue;
+
+ if (p->dropped)
+ continue;
+
+ p->dropped = true;
+ log_info("Can't fit partition %s of priority %" PRIi32 ", dropping.", p->definition_path, p->priority);
+ }
+
+ return true;
+}
+
+static uint64_t partition_min_size(const Partition *p) {
+ uint64_t sz;
+
+ /* Calculate the disk space we really need at minimum for this partition. If the partition already
+ * exists the current size is what we really need. If it doesn't exist yet refuse to allocate less
+ * than 4K.
+ *
+ * DEFAULT_MIN_SIZE is the default SizeMin= we configure if nothing else is specified. */
+
+ if (PARTITION_IS_FOREIGN(p)) {
+ /* Don't allow changing size of partitions not managed by us */
+ assert(p->current_size != UINT64_MAX);
+ return p->current_size;
+ }
+
+ sz = p->current_size != UINT64_MAX ? p->current_size : HARD_MIN_SIZE;
+
+ if (!PARTITION_EXISTS(p)) {
+ uint64_t d = 0;
+
+ if (p->encrypt)
+ d += round_up_size(LUKS2_METADATA_SIZE, 4096);
+
+ if (p->copy_blocks_size != UINT64_MAX)
+ d += round_up_size(p->copy_blocks_size, 4096);
+ else if (p->format || p->encrypt) {
+ uint64_t f;
+
+ /* If we shall synthesize a file system, take minimal fs size into account (assumed to be 4K if not known) */
+ f = p->format ? minimal_size_by_fs_name(p->format) : UINT64_MAX;
+ d += f == UINT64_MAX ? 4096 : f;
+ }
+
+ if (d > sz)
+ sz = d;
+ }
+
+ return MAX(p->size_min != UINT64_MAX ? p->size_min : DEFAULT_MIN_SIZE, sz);
+}
+
+static uint64_t partition_max_size(const Partition *p) {
+ /* Calculate how large the partition may become at max. This is generally the configured maximum
+ * size, except when it already exists and is larger than that. In that case it's the existing size,
+ * since we never want to shrink partitions. */
+
+ if (PARTITION_IS_FOREIGN(p)) {
+ /* Don't allow changing size of partitions not managed by us */
+ assert(p->current_size != UINT64_MAX);
+ return p->current_size;
+ }
+
+ if (p->current_size != UINT64_MAX)
+ return MAX(p->current_size, p->size_max);
+
+ return p->size_max;
+}
+
+static uint64_t partition_min_size_with_padding(const Partition *p) {
+ uint64_t sz;
+
+ /* Calculate the disk space we need for this partition plus any free space coming after it. This
+ * takes user configured padding into account as well as any additional whitespace needed to align
+ * the next partition to 4K again. */
+
+ sz = partition_min_size(p);
+
+ if (p->padding_min != UINT64_MAX)
+ sz += p->padding_min;
+
+ if (PARTITION_EXISTS(p)) {
+ /* If the partition wasn't aligned, add extra space so that any we might add will be aligned */
+ assert(p->offset != UINT64_MAX);
+ return round_up_size(p->offset + sz, 4096) - p->offset;
+ }
+
+ /* If this is a new partition we'll place it aligned, hence we just need to round up the required size here */
+ return round_up_size(sz, 4096);
+}
+
+static uint64_t free_area_available(const FreeArea *a) {
+ assert(a);
+
+ /* Determines how much of this free area is not allocated yet */
+
+ assert(a->size >= a->allocated);
+ return a->size - a->allocated;
+}
+
+static uint64_t free_area_available_for_new_partitions(const FreeArea *a) {
+ uint64_t avail;
+
+ /* Similar to free_area_available(), but takes into account that the required size and padding of the
+ * preceding partition is honoured. */
+
+ avail = free_area_available(a);
+ if (a->after) {
+ uint64_t need, space;
+
+ need = partition_min_size_with_padding(a->after);
+
+ assert(a->after->offset != UINT64_MAX);
+ assert(a->after->current_size != UINT64_MAX);
+
+ space = round_up_size(a->after->offset + a->after->current_size, 4096) - a->after->offset + avail;
+ if (need >= space)
+ return 0;
+
+ return space - need;
+ }
+
+ return avail;
+}
+
+static int free_area_compare(FreeArea *const *a, FreeArea *const*b) {
+ return CMP(free_area_available_for_new_partitions(*a),
+ free_area_available_for_new_partitions(*b));
+}
+
+static uint64_t charge_size(uint64_t total, uint64_t amount) {
+ uint64_t rounded;
+
+ assert(amount <= total);
+
+ /* Subtract the specified amount from total, rounding up to multiple of 4K if there's room */
+ rounded = round_up_size(amount, 4096);
+ if (rounded >= total)
+ return 0;
+
+ return total - rounded;
+}
+
+static uint64_t charge_weight(uint64_t total, uint64_t amount) {
+ assert(amount <= total);
+ return total - amount;
+}
+
+static bool context_allocate_partitions(Context *context) {
+ Partition *p;
+
+ assert(context);
+
+ /* A simple first-fit algorithm, assuming the array of free areas is sorted by size in decreasing
+ * order. */
+
+ LIST_FOREACH(partitions, p, context->partitions) {
+ bool fits = false;
+ uint64_t required;
+ FreeArea *a = NULL;
+
+ /* Skip partitions we already dropped or that already exist */
+ if (p->dropped || PARTITION_EXISTS(p))
+ continue;
+
+ /* Sort by size */
+ typesafe_qsort(context->free_areas, context->n_free_areas, free_area_compare);
+
+ /* How much do we need to fit? */
+ required = partition_min_size_with_padding(p);
+ assert(required % 4096 == 0);
+
+ for (size_t i = 0; i < context->n_free_areas; i++) {
+ a = context->free_areas[i];
+
+ if (free_area_available_for_new_partitions(a) >= required) {
+ fits = true;
+ break;
+ }
+ }
+
+ if (!fits)
+ return false; /* 😢 Oh no! We can't fit this partition into any free area! */
+
+ /* Assign the partition to this free area */
+ p->allocated_to_area = a;
+
+ /* Budget the minimal partition size */
+ a->allocated += required;
+ }
+
+ return true;
+}
+
+static int context_sum_weights(Context *context, FreeArea *a, uint64_t *ret) {
+ uint64_t weight_sum = 0;
+ Partition *p;
+
+ assert(context);
+ assert(a);
+ assert(ret);
+
+ /* Determine the sum of the weights of all partitions placed in or before the specified free area */
+
+ LIST_FOREACH(partitions, p, context->partitions) {
+ if (p->padding_area != a && p->allocated_to_area != a)
+ continue;
+
+ if (p->weight > UINT64_MAX - weight_sum)
+ goto overflow_sum;
+ weight_sum += p->weight;
+
+ if (p->padding_weight > UINT64_MAX - weight_sum)
+ goto overflow_sum;
+ weight_sum += p->padding_weight;
+ }
+
+ *ret = weight_sum;
+ return 0;
+
+overflow_sum:
+ return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Combined weight of partition exceeds unsigned 64bit range, refusing.");
+}
+
+static int scale_by_weight(uint64_t value, uint64_t weight, uint64_t weight_sum, uint64_t *ret) {
+ assert(weight_sum >= weight);
+ assert(ret);
+
+ if (weight == 0) {
+ *ret = 0;
+ return 0;
+ }
+
+ if (value > UINT64_MAX / weight)
+ return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Scaling by weight of partition exceeds unsigned 64bit range, refusing.");
+
+ *ret = value * weight / weight_sum;
+ return 0;
+}
+
+typedef enum GrowPartitionPhase {
+ /* The first phase: we charge partitions which need more (according to constraints) than their weight-based share. */
+ PHASE_OVERCHARGE,
+
+ /* The second phase: we charge partitions which need less (according to constraints) than their weight-based share. */
+ PHASE_UNDERCHARGE,
+
+ /* The third phase: we distribute what remains among the remaining partitions, according to the weights */
+ PHASE_DISTRIBUTE,
+} GrowPartitionPhase;
+
+static int context_grow_partitions_phase(
+ Context *context,
+ FreeArea *a,
+ GrowPartitionPhase phase,
+ uint64_t *span,
+ uint64_t *weight_sum) {
+
+ Partition *p;
+ int r;
+
+ assert(context);
+ assert(a);
+
+ /* Now let's look at the intended weights and adjust them taking the minimum space assignments into
+ * account. i.e. if a partition has a small weight but a high minimum space value set it should not
+ * get any additional room from the left-overs. Similar, if two partitions have the same weight they
+ * should get the same space if possible, even if one has a smaller minimum size than the other. */
+ LIST_FOREACH(partitions, p, context->partitions) {
+
+ /* Look only at partitions associated with this free area, i.e. immediately
+ * preceding it, or allocated into it */
+ if (p->allocated_to_area != a && p->padding_area != a)
+ continue;
+
+ if (p->new_size == UINT64_MAX) {
+ bool charge = false, try_again = false;
+ uint64_t share, rsz, xsz;
+
+ /* Calculate how much this space this partition needs if everyone would get
+ * the weight based share */
+ r = scale_by_weight(*span, p->weight, *weight_sum, &share);
+ if (r < 0)
+ return r;
+
+ rsz = partition_min_size(p);
+ xsz = partition_max_size(p);
+
+ if (phase == PHASE_OVERCHARGE && rsz > share) {
+ /* This partition needs more than its calculated share. Let's assign
+ * it that, and take this partition out of all calculations and start
+ * again. */
+
+ p->new_size = rsz;
+ charge = try_again = true;
+
+ } else if (phase == PHASE_UNDERCHARGE && xsz != UINT64_MAX && xsz < share) {
+ /* This partition accepts less than its calculated
+ * share. Let's assign it that, and take this partition out
+ * of all calculations and start again. */
+
+ p->new_size = xsz;
+ charge = try_again = true;
+
+ } else if (phase == PHASE_DISTRIBUTE) {
+ /* This partition can accept its calculated share. Let's
+ * assign it. There's no need to restart things here since
+ * assigning this shouldn't impact the shares of the other
+ * partitions. */
+
+ if (PARTITION_IS_FOREIGN(p))
+ /* Never change of foreign partitions (i.e. those we don't manage) */
+ p->new_size = p->current_size;
+ else
+ p->new_size = MAX(round_down_size(share, 4096), rsz);
+
+ charge = true;
+ }
+
+ if (charge) {
+ *span = charge_size(*span, p->new_size);
+ *weight_sum = charge_weight(*weight_sum, p->weight);
+ }
+
+ if (try_again)
+ return 0; /* try again */
+ }
+
+ if (p->new_padding == UINT64_MAX) {
+ bool charge = false, try_again = false;
+ uint64_t share;
+
+ r = scale_by_weight(*span, p->padding_weight, *weight_sum, &share);
+ if (r < 0)
+ return r;
+
+ if (phase == PHASE_OVERCHARGE && p->padding_min != UINT64_MAX && p->padding_min > share) {
+ p->new_padding = p->padding_min;
+ charge = try_again = true;
+ } else if (phase == PHASE_UNDERCHARGE && p->padding_max != UINT64_MAX && p->padding_max < share) {
+ p->new_padding = p->padding_max;
+ charge = try_again = true;
+ } else if (phase == PHASE_DISTRIBUTE) {
+
+ p->new_padding = round_down_size(share, 4096);
+ if (p->padding_min != UINT64_MAX && p->new_padding < p->padding_min)
+ p->new_padding = p->padding_min;
+
+ charge = true;
+ }
+
+ if (charge) {
+ *span = charge_size(*span, p->new_padding);
+ *weight_sum = charge_weight(*weight_sum, p->padding_weight);
+ }
+
+ if (try_again)
+ return 0; /* try again */
+ }
+ }
+
+ return 1; /* done */
+}
+
+static int context_grow_partitions_on_free_area(Context *context, FreeArea *a) {
+ uint64_t weight_sum = 0, span;
+ int r;
+
+ assert(context);
+ assert(a);
+
+ r = context_sum_weights(context, a, &weight_sum);
+ if (r < 0)
+ return r;
+
+ /* Let's calculate the total area covered by this free area and the partition before it */
+ span = a->size;
+ if (a->after) {
+ assert(a->after->offset != UINT64_MAX);
+ assert(a->after->current_size != UINT64_MAX);
+
+ span += round_up_size(a->after->offset + a->after->current_size, 4096) - a->after->offset;
+ }
+
+ GrowPartitionPhase phase = PHASE_OVERCHARGE;
+ for (;;) {
+ r = context_grow_partitions_phase(context, a, phase, &span, &weight_sum);
+ if (r < 0)
+ return r;
+ if (r == 0) /* not done yet, re-run this phase */
+ continue;
+
+ if (phase == PHASE_OVERCHARGE)
+ phase = PHASE_UNDERCHARGE;
+ else if (phase == PHASE_UNDERCHARGE)
+ phase = PHASE_DISTRIBUTE;
+ else if (phase == PHASE_DISTRIBUTE)
+ break;
+ }
+
+ /* We still have space left over? Donate to preceding partition if we have one */
+ if (span > 0 && a->after && !PARTITION_IS_FOREIGN(a->after)) {
+ uint64_t m, xsz;
+
+ assert(a->after->new_size != UINT64_MAX);
+ m = a->after->new_size + span;
+
+ xsz = partition_max_size(a->after);
+ if (xsz != UINT64_MAX && m > xsz)
+ m = xsz;
+
+ span = charge_size(span, m - a->after->new_size);
+ a->after->new_size = m;
+ }
+
+ /* What? Even still some space left (maybe because there was no preceding partition, or it had a
+ * size limit), then let's donate it to whoever wants it. */
+ if (span > 0) {
+ Partition *p;
+
+ LIST_FOREACH(partitions, p, context->partitions) {
+ uint64_t m, xsz;
+
+ if (p->allocated_to_area != a)
+ continue;
+
+ if (PARTITION_IS_FOREIGN(p))
+ continue;
+
+ assert(p->new_size != UINT64_MAX);
+ m = p->new_size + span;
+
+ xsz = partition_max_size(p);
+ if (xsz != UINT64_MAX && m > xsz)
+ m = xsz;
+
+ span = charge_size(span, m - p->new_size);
+ p->new_size = m;
+
+ if (span == 0)
+ break;
+ }
+ }
+
+ /* Yuck, still no one? Then make it padding */
+ if (span > 0 && a->after) {
+ assert(a->after->new_padding != UINT64_MAX);
+ a->after->new_padding += span;
+ }
+
+ return 0;
+}
+
+static int context_grow_partitions(Context *context) {
+ Partition *p;
+ int r;
+
+ assert(context);
+
+ for (size_t i = 0; i < context->n_free_areas; i++) {
+ r = context_grow_partitions_on_free_area(context, context->free_areas[i]);
+ if (r < 0)
+ return r;
+ }
+
+ /* All existing partitions that have no free space after them can't change size */
+ LIST_FOREACH(partitions, p, context->partitions) {
+ if (p->dropped)
+ continue;
+
+ if (!PARTITION_EXISTS(p) || p->padding_area) {
+ /* The algorithm above must have initialized this already */
+ assert(p->new_size != UINT64_MAX);
+ continue;
+ }
+
+ assert(p->new_size == UINT64_MAX);
+ p->new_size = p->current_size;
+
+ assert(p->new_padding == UINT64_MAX);
+ p->new_padding = p->current_padding;
+ }
+
+ return 0;
+}
+
+static void context_place_partitions(Context *context) {
+ uint64_t partno = 0;
+ Partition *p;
+
+ assert(context);
+
+ /* Determine next partition number to assign */
+ LIST_FOREACH(partitions, p, context->partitions) {
+ if (!PARTITION_EXISTS(p))
+ continue;
+
+ assert(p->partno != UINT64_MAX);
+ if (p->partno >= partno)
+ partno = p->partno + 1;
+ }
+
+ for (size_t i = 0; i < context->n_free_areas; i++) {
+ FreeArea *a = context->free_areas[i];
+ uint64_t start, left;
+
+ if (a->after) {
+ assert(a->after->offset != UINT64_MAX);
+ assert(a->after->new_size != UINT64_MAX);
+ assert(a->after->new_padding != UINT64_MAX);
+
+ start = a->after->offset + a->after->new_size + a->after->new_padding;
+ } else
+ start = context->start;
+
+ start = round_up_size(start, 4096);
+ left = a->size;
+
+ LIST_FOREACH(partitions, p, context->partitions) {
+ if (p->allocated_to_area != a)
+ continue;
+
+ p->offset = start;
+ p->partno = partno++;
+
+ assert(left >= p->new_size);
+ start += p->new_size;
+ left -= p->new_size;
+
+ assert(left >= p->new_padding);
+ start += p->new_padding;
+ left -= p->new_padding;
+ }
+ }
+}
+
+static int config_parse_type(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ sd_id128_t *type_uuid = data;
+ int r;
+
+ assert(rvalue);
+ assert(type_uuid);
+
+ r = gpt_partition_type_uuid_from_string(rvalue, type_uuid);
+ if (r < 0)
+ return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse partition type: %s", rvalue);
+
+ return 0;
+}
+
+static const Specifier specifier_table[] = {
+ COMMON_SYSTEM_SPECIFIERS,
+ {}
+};
+
+static int config_parse_label(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char16_t *recoded = NULL;
+ _cleanup_free_ char *resolved = NULL;
+ char **label = data;
+ int r;
+
+ assert(rvalue);
+ assert(label);
+
+ r = specifier_printf(rvalue, specifier_table, NULL, &resolved);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to expand specifiers in Label=, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (!utf8_is_valid(resolved)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Partition label not valid UTF-8, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ recoded = utf8_to_utf16(resolved, strlen(resolved));
+ if (!recoded)
+ return log_oom();
+
+ if (char16_strlen(recoded) > 36) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Partition label too long for GPT table, ignoring: \"%s\" (from \"%s\")",
+ resolved, rvalue);
+ return 0;
+ }
+
+ free_and_replace(*label, resolved);
+ return 0;
+}
+
+static int config_parse_weight(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint32_t *priority = data, v;
+ int r;
+
+ assert(rvalue);
+ assert(priority);
+
+ r = safe_atou32(rvalue, &v);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse weight value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (v > 1000U*1000U) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Weight needs to be in range 0…10000000, ignoring: %" PRIu32, v);
+ return 0;
+ }
+
+ *priority = v;
+ return 0;
+}
+
+static int config_parse_size4096(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t *sz = data, parsed;
+ int r;
+
+ assert(rvalue);
+ assert(data);
+
+ r = parse_size(rvalue, 1024, &parsed);
+ if (r < 0)
+ return log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to parse size value: %s", rvalue);
+
+ if (ltype > 0)
+ *sz = round_up_size(parsed, 4096);
+ else if (ltype < 0)
+ *sz = round_down_size(parsed, 4096);
+ else
+ *sz = parsed;
+
+ if (*sz != parsed)
+ log_syntax(unit, LOG_NOTICE, filename, line, r, "Rounded %s= size %" PRIu64 " → %" PRIu64 ", a multiple of 4096.", lvalue, parsed, *sz);
+
+ return 0;
+}
+
+static int config_parse_fstype(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char **fstype = data;
+
+ assert(rvalue);
+ assert(data);
+
+ if (!filename_is_valid(rvalue))
+ return log_syntax(unit, LOG_ERR, filename, line, 0,
+ "File system type is not valid, refusing: %s", rvalue);
+
+ return free_and_strdup_warn(fstype, rvalue);
+}
+
+static int config_parse_copy_files(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *source = NULL, *buffer = NULL, *resolved_source = NULL, *resolved_target = NULL;
+ const char *p = rvalue, *target;
+ Partition *partition = data;
+ int r;
+
+ assert(rvalue);
+ assert(partition);
+
+ r = extract_first_word(&p, &source, ":", EXTRACT_CUNESCAPE|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract source path: %s", rvalue);
+ if (r == 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "No argument specified: %s", rvalue);
+ return 0;
+ }
+
+ r = extract_first_word(&p, &buffer, ":", EXTRACT_CUNESCAPE|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract target path: %s", rvalue);
+ if (r == 0)
+ target = source; /* No target, then it's the same as the source */
+ else
+ target = buffer;
+
+ if (!isempty(p))
+ return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EINVAL), "Too many arguments: %s", rvalue);
+
+ r = specifier_printf(source, specifier_table, NULL, &resolved_source);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to expand specifiers in CopyFiles= source, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (!path_is_absolute(resolved_source) || !path_is_normalized(resolved_source)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid path name in CopyFiles= source, ignoring: %s", resolved_source);
+ return 0;
+ }
+
+ r = specifier_printf(target, specifier_table, NULL, &resolved_target);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to expand specifiers in CopyFiles= target, ignoring: %s", resolved_target);
+ return 0;
+ }
+
+ if (!path_is_absolute(resolved_target) || !path_is_normalized(resolved_target)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid path name in CopyFiles= source, ignoring: %s", resolved_target);
+ return 0;
+ }
+
+ r = strv_consume_pair(&partition->copy_files, TAKE_PTR(resolved_source), TAKE_PTR(resolved_target));
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+static int partition_read_definition(Partition *p, const char *path) {
+
+ ConfigTableItem table[] = {
+ { "Partition", "Type", config_parse_type, 0, &p->type_uuid },
+ { "Partition", "Label", config_parse_label, 0, &p->new_label },
+ { "Partition", "UUID", config_parse_id128, 0, &p->new_uuid },
+ { "Partition", "Priority", config_parse_int32, 0, &p->priority },
+ { "Partition", "Weight", config_parse_weight, 0, &p->weight },
+ { "Partition", "PaddingWeight", config_parse_weight, 0, &p->padding_weight },
+ { "Partition", "SizeMinBytes", config_parse_size4096, 1, &p->size_min },
+ { "Partition", "SizeMaxBytes", config_parse_size4096, -1, &p->size_max },
+ { "Partition", "PaddingMinBytes", config_parse_size4096, 1, &p->padding_min },
+ { "Partition", "PaddingMaxBytes", config_parse_size4096, -1, &p->padding_max },
+ { "Partition", "FactoryReset", config_parse_bool, 0, &p->factory_reset },
+ { "Partition", "CopyBlocks", config_parse_path, 0, &p->copy_blocks_path },
+ { "Partition", "Format", config_parse_fstype, 0, &p->format },
+ { "Partition", "CopyFiles", config_parse_copy_files, 0, p },
+ { "Partition", "Encrypt", config_parse_bool, 0, &p->encrypt },
+ {}
+ };
+ int r;
+
+ r = config_parse(NULL, path, NULL,
+ "Partition\0",
+ config_item_table_lookup, table,
+ CONFIG_PARSE_WARN,
+ p,
+ NULL);
+ if (r < 0)
+ return r;
+
+ if (p->size_min != UINT64_MAX && p->size_max != UINT64_MAX && p->size_min > p->size_max)
+ return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
+ "SizeMinBytes= larger than SizeMaxBytes=, refusing.");
+
+ if (p->padding_min != UINT64_MAX && p->padding_max != UINT64_MAX && p->padding_min > p->padding_max)
+ return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
+ "PaddingMinBytes= larger than PaddingMaxBytes=, refusing.");
+
+ if (sd_id128_is_null(p->type_uuid))
+ return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
+ "Type= not defined, refusing.");
+
+ if (p->copy_blocks_path && (p->format || !strv_isempty(p->copy_files)))
+ return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
+ "Format= and CopyBlocks= cannot be combined, refusing.");
+
+ if (!strv_isempty(p->copy_files) && streq_ptr(p->format, "swap"))
+ return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
+ "Format=swap and CopyFiles= cannot be combined, refusing.");
+
+ if (!p->format && (!strv_isempty(p->copy_files) || (p->encrypt && !p->copy_blocks_path))) {
+ /* Pick "ext4" as file system if we are configured to copy files or encrypt the device */
+ p->format = strdup("ext4");
+ if (!p->format)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+static int context_read_definitions(
+ Context *context,
+ const char *directory,
+ const char *root) {
+
+ _cleanup_strv_free_ char **files = NULL;
+ Partition *last = NULL;
+ char **f;
+ int r;
+
+ assert(context);
+
+ if (directory)
+ r = conf_files_list_strv(&files, ".conf", NULL, CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED, (const char**) STRV_MAKE(directory));
+ else
+ r = conf_files_list_strv(&files, ".conf", root, CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED, (const char**) CONF_PATHS_STRV("repart.d"));
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate *.conf files: %m");
+
+ STRV_FOREACH(f, files) {
+ _cleanup_(partition_freep) Partition *p = NULL;
+
+ p = partition_new();
+ if (!p)
+ return log_oom();
+
+ p->definition_path = strdup(*f);
+ if (!p->definition_path)
+ return log_oom();
+
+ r = partition_read_definition(p, *f);
+ if (r < 0)
+ return r;
+
+ LIST_INSERT_AFTER(partitions, context->partitions, last, p);
+ last = TAKE_PTR(p);
+ context->n_partitions++;
+ }
+
+ return 0;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct fdisk_context*, fdisk_unref_context);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct fdisk_partition*, fdisk_unref_partition);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct fdisk_parttype*, fdisk_unref_parttype);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct fdisk_table*, fdisk_unref_table);
+
+static int determine_current_padding(
+ struct fdisk_context *c,
+ struct fdisk_table *t,
+ struct fdisk_partition *p,
+ uint64_t *ret) {
+
+ size_t n_partitions;
+ uint64_t offset, next = UINT64_MAX;
+
+ assert(c);
+ assert(t);
+ assert(p);
+
+ if (!fdisk_partition_has_end(p))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Partition has no end!");
+
+ offset = fdisk_partition_get_end(p);
+ assert(offset < UINT64_MAX / 512);
+ offset *= 512;
+
+ n_partitions = fdisk_table_get_nents(t);
+ for (size_t i = 0; i < n_partitions; i++) {
+ struct fdisk_partition *q;
+ uint64_t start;
+
+ q = fdisk_table_get_partition(t, i);
+ if (!q)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read partition metadata: %m");
+
+ if (fdisk_partition_is_used(q) <= 0)
+ continue;
+
+ if (!fdisk_partition_has_start(q))
+ continue;
+
+ start = fdisk_partition_get_start(q);
+ assert(start < UINT64_MAX / 512);
+ start *= 512;
+
+ if (start >= offset && (next == UINT64_MAX || next > start))
+ next = start;
+ }
+
+ if (next == UINT64_MAX) {
+ /* No later partition? In that case check the end of the usable area */
+ next = fdisk_get_last_lba(c);
+ assert(next < UINT64_MAX);
+ next++; /* The last LBA is one sector before the end */
+
+ assert(next < UINT64_MAX / 512);
+ next *= 512;
+
+ if (offset > next)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Partition end beyond disk end.");
+ }
+
+ assert(next >= offset);
+ offset = round_up_size(offset, 4096);
+ next = round_down_size(next, 4096);
+
+ if (next >= offset) /* Check again, rounding might have fucked things up */
+ *ret = next - offset;
+ else
+ *ret = 0;
+
+ return 0;
+}
+
+static int fdisk_ask_cb(struct fdisk_context *c, struct fdisk_ask *ask, void *data) {
+ _cleanup_free_ char *ids = NULL;
+ int r;
+
+ if (fdisk_ask_get_type(ask) != FDISK_ASKTYPE_STRING)
+ return -EINVAL;
+
+ ids = new(char, ID128_UUID_STRING_MAX);
+ if (!ids)
+ return -ENOMEM;
+
+ r = fdisk_ask_string_set_result(ask, id128_to_uuid_string(*(sd_id128_t*) data, ids));
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(ids);
+ return 0;
+}
+
+static int fdisk_set_disklabel_id_by_uuid(struct fdisk_context *c, sd_id128_t id) {
+ int r;
+
+ r = fdisk_set_ask(c, fdisk_ask_cb, &id);
+ if (r < 0)
+ return r;
+
+ r = fdisk_set_disklabel_id(c);
+ if (r < 0)
+ return r;
+
+ return fdisk_set_ask(c, NULL, NULL);
+}
+
+static int derive_uuid(sd_id128_t base, const char *token, sd_id128_t *ret) {
+ union {
+ unsigned char md[SHA256_DIGEST_LENGTH];
+ sd_id128_t id;
+ } result;
+
+ assert(token);
+ assert(ret);
+
+ /* Derive a new UUID from the specified UUID in a stable and reasonably safe way. Specifically, we
+ * calculate the HMAC-SHA256 of the specified token string, keyed by the supplied base (typically the
+ * machine ID). We use the machine ID as key (and not as cleartext!) of the HMAC operation since it's
+ * the machine ID we don't want to leak. */
+
+ if (!HMAC(EVP_sha256(),
+ &base, sizeof(base),
+ (const unsigned char*) token, strlen(token),
+ result.md, NULL))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), "HMAC-SHA256 calculation failed.");
+
+ /* Take the first half, mark it as v4 UUID */
+ assert_cc(sizeof(result.md) == sizeof(result.id) * 2);
+ *ret = id128_make_v4_uuid(result.id);
+ return 0;
+}
+
+static int context_load_partition_table(
+ Context *context,
+ const char *node,
+ int *backing_fd) {
+
+ _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
+ _cleanup_(fdisk_unref_tablep) struct fdisk_table *t = NULL;
+ uint64_t left_boundary = UINT64_MAX, first_lba, last_lba, nsectors;
+ _cleanup_free_ char *disk_uuid_string = NULL;
+ bool from_scratch = false;
+ sd_id128_t disk_uuid;
+ size_t n_partitions;
+ int r;
+
+ assert(context);
+ assert(node);
+ assert(backing_fd);
+ assert(!context->fdisk_context);
+ assert(!context->free_areas);
+ assert(context->start == UINT64_MAX);
+ assert(context->end == UINT64_MAX);
+ assert(context->total == UINT64_MAX);
+
+ c = fdisk_new_context();
+ if (!c)
+ return log_oom();
+
+ /* libfdisk doesn't have an API to operate on arbitrary fds, hence reopen the fd going via the
+ * /proc/self/fd/ magic path if we have an existing fd. Open the original file otherwise. */
+ if (*backing_fd < 0)
+ r = fdisk_assign_device(c, node, arg_dry_run);
+ else {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ xsprintf(procfs_path, "/proc/self/fd/%i", *backing_fd);
+
+ r = fdisk_assign_device(c, procfs_path, arg_dry_run);
+ }
+ if (r == -EINVAL && arg_size_auto) {
+ struct stat st;
+
+ /* libfdisk returns EINVAL if opening a file of size zero. Let's check for that, and accept
+ * it if automatic sizing is requested. */
+
+ if (*backing_fd < 0)
+ r = stat(node, &st);
+ else
+ r = fstat(*backing_fd, &st);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to stat block device '%s': %m", node);
+
+ if (S_ISREG(st.st_mode) && st.st_size == 0)
+ return /* from_scratch = */ true;
+
+ r = -EINVAL;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to open device '%s': %m", node);
+
+ if (*backing_fd < 0) {
+ /* If we have no fd referencing the device yet, make a copy of the fd now, so that we have one */
+ *backing_fd = fcntl(fdisk_get_devfd(c), F_DUPFD_CLOEXEC, 3);
+ if (*backing_fd < 0)
+ return log_error_errno(errno, "Failed to duplicate fdisk fd: %m");
+ }
+
+ /* Tell udev not to interfere while we are processing the device */
+ if (flock(fdisk_get_devfd(c), arg_dry_run ? LOCK_SH : LOCK_EX) < 0)
+ return log_error_errno(errno, "Failed to lock block device: %m");
+
+ switch (arg_empty) {
+
+ case EMPTY_REFUSE:
+ /* Refuse empty disks, insist on an existing GPT partition table */
+ if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT))
+ return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s has no GPT disk label, not repartitioning.", node);
+
+ break;
+
+ case EMPTY_REQUIRE:
+ /* Require an empty disk, refuse any existing partition table */
+ r = fdisk_has_label(c);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether disk %s has a disk label: %m", node);
+ if (r > 0)
+ return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s already has a disk label, refusing.", node);
+
+ from_scratch = true;
+ break;
+
+ case EMPTY_ALLOW:
+ /* Allow both an empty disk and an existing partition table, but only GPT */
+ r = fdisk_has_label(c);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether disk %s has a disk label: %m", node);
+ if (r > 0) {
+ if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT))
+ return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s has non-GPT disk label, not repartitioning.", node);
+ } else
+ from_scratch = true;
+
+ break;
+
+ case EMPTY_FORCE:
+ case EMPTY_CREATE:
+ /* Always reinitiaize the disk, don't consider what there was on the disk before */
+ from_scratch = true;
+ break;
+ }
+
+ if (from_scratch) {
+ r = fdisk_create_disklabel(c, "gpt");
+ if (r < 0)
+ return log_error_errno(r, "Failed to create GPT disk label: %m");
+
+ r = derive_uuid(context->seed, "disk-uuid", &disk_uuid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire disk GPT uuid: %m");
+
+ r = fdisk_set_disklabel_id_by_uuid(c, disk_uuid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set GPT disk label: %m");
+
+ goto add_initial_free_area;
+ }
+
+ r = fdisk_get_disklabel_id(c, &disk_uuid_string);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get current GPT disk label UUID: %m");
+
+ r = sd_id128_from_string(disk_uuid_string, &disk_uuid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse current GPT disk label UUID: %m");
+
+ if (sd_id128_is_null(disk_uuid)) {
+ r = derive_uuid(context->seed, "disk-uuid", &disk_uuid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire disk GPT uuid: %m");
+
+ r = fdisk_set_disklabel_id(c);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set GPT disk label: %m");
+ }
+
+ r = fdisk_get_partitions(c, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire partition table: %m");
+
+ n_partitions = fdisk_table_get_nents(t);
+ for (size_t i = 0; i < n_partitions; i++) {
+ _cleanup_free_ char *label_copy = NULL;
+ Partition *pp, *last = NULL;
+ struct fdisk_partition *p;
+ struct fdisk_parttype *pt;
+ const char *pts, *ids, *label;
+ uint64_t sz, start;
+ bool found = false;
+ sd_id128_t ptid, id;
+ size_t partno;
+
+ p = fdisk_table_get_partition(t, i);
+ if (!p)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read partition metadata: %m");
+
+ if (fdisk_partition_is_used(p) <= 0)
+ continue;
+
+ if (fdisk_partition_has_start(p) <= 0 ||
+ fdisk_partition_has_size(p) <= 0 ||
+ fdisk_partition_has_partno(p) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Found a partition without a position, size or number.");
+
+ pt = fdisk_partition_get_type(p);
+ if (!pt)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to acquire type of partition: %m");
+
+ pts = fdisk_parttype_get_string(pt);
+ if (!pts)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to acquire type of partition as string: %m");
+
+ r = sd_id128_from_string(pts, &ptid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse partition type UUID %s: %m", pts);
+
+ ids = fdisk_partition_get_uuid(p);
+ if (!ids)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Found a partition without a UUID.");
+
+ r = sd_id128_from_string(ids, &id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse partition UUID %s: %m", ids);
+
+ label = fdisk_partition_get_name(p);
+ if (!isempty(label)) {
+ label_copy = strdup(label);
+ if (!label_copy)
+ return log_oom();
+ }
+
+ sz = fdisk_partition_get_size(p);
+ assert_se(sz <= UINT64_MAX/512);
+ sz *= 512;
+
+ start = fdisk_partition_get_start(p);
+ assert_se(start <= UINT64_MAX/512);
+ start *= 512;
+
+ partno = fdisk_partition_get_partno(p);
+
+ if (left_boundary == UINT64_MAX || left_boundary > start)
+ left_boundary = start;
+
+ /* Assign this existing partition to the first partition of the right type that doesn't have
+ * an existing one assigned yet. */
+ LIST_FOREACH(partitions, pp, context->partitions) {
+ last = pp;
+
+ if (!sd_id128_equal(pp->type_uuid, ptid))
+ continue;
+
+ if (!pp->current_partition) {
+ pp->current_uuid = id;
+ pp->current_size = sz;
+ pp->offset = start;
+ pp->partno = partno;
+ pp->current_label = TAKE_PTR(label_copy);
+
+ pp->current_partition = p;
+ fdisk_ref_partition(p);
+
+ r = determine_current_padding(c, t, p, &pp->current_padding);
+ if (r < 0)
+ return r;
+
+ if (pp->current_padding > 0) {
+ r = context_add_free_area(context, pp->current_padding, pp);
+ if (r < 0)
+ return r;
+ }
+
+ found = true;
+ break;
+ }
+ }
+
+ /* If we have no matching definition, create a new one. */
+ if (!found) {
+ _cleanup_(partition_freep) Partition *np = NULL;
+
+ np = partition_new();
+ if (!np)
+ return log_oom();
+
+ np->current_uuid = id;
+ np->type_uuid = ptid;
+ np->current_size = sz;
+ np->offset = start;
+ np->partno = partno;
+ np->current_label = TAKE_PTR(label_copy);
+
+ np->current_partition = p;
+ fdisk_ref_partition(p);
+
+ r = determine_current_padding(c, t, p, &np->current_padding);
+ if (r < 0)
+ return r;
+
+ if (np->current_padding > 0) {
+ r = context_add_free_area(context, np->current_padding, np);
+ if (r < 0)
+ return r;
+ }
+
+ LIST_INSERT_AFTER(partitions, context->partitions, last, TAKE_PTR(np));
+ context->n_partitions++;
+ }
+ }
+
+add_initial_free_area:
+ nsectors = fdisk_get_nsectors(c);
+ assert(nsectors <= UINT64_MAX/512);
+ nsectors *= 512;
+
+ first_lba = fdisk_get_first_lba(c);
+ assert(first_lba <= UINT64_MAX/512);
+ first_lba *= 512;
+
+ last_lba = fdisk_get_last_lba(c);
+ assert(last_lba < UINT64_MAX);
+ last_lba++;
+ assert(last_lba <= UINT64_MAX/512);
+ last_lba *= 512;
+
+ assert(last_lba >= first_lba);
+
+ if (left_boundary == UINT64_MAX) {
+ /* No partitions at all? Then the whole disk is up for grabs. */
+
+ first_lba = round_up_size(first_lba, 4096);
+ last_lba = round_down_size(last_lba, 4096);
+
+ if (last_lba > first_lba) {
+ r = context_add_free_area(context, last_lba - first_lba, NULL);
+ if (r < 0)
+ return r;
+ }
+ } else {
+ /* Add space left of first partition */
+ assert(left_boundary >= first_lba);
+
+ first_lba = round_up_size(first_lba, 4096);
+ left_boundary = round_down_size(left_boundary, 4096);
+ last_lba = round_down_size(last_lba, 4096);
+
+ if (left_boundary > first_lba) {
+ r = context_add_free_area(context, left_boundary - first_lba, NULL);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ context->start = first_lba;
+ context->end = last_lba;
+ context->total = nsectors;
+ context->fdisk_context = TAKE_PTR(c);
+
+ return from_scratch;
+}
+
+static void context_unload_partition_table(Context *context) {
+ Partition *p, *next;
+
+ assert(context);
+
+ LIST_FOREACH_SAFE(partitions, p, next, context->partitions) {
+
+ /* Entirely remove partitions that have no configuration */
+ if (PARTITION_IS_FOREIGN(p)) {
+ partition_unlink_and_free(context, p);
+ continue;
+ }
+
+ /* Otherwise drop all data we read off the block device and everything we might have
+ * calculated based on it */
+
+ p->dropped = false;
+ p->current_size = UINT64_MAX;
+ p->new_size = UINT64_MAX;
+ p->current_padding = UINT64_MAX;
+ p->new_padding = UINT64_MAX;
+ p->partno = UINT64_MAX;
+ p->offset = UINT64_MAX;
+
+ if (p->current_partition) {
+ fdisk_unref_partition(p->current_partition);
+ p->current_partition = NULL;
+ }
+
+ if (p->new_partition) {
+ fdisk_unref_partition(p->new_partition);
+ p->new_partition = NULL;
+ }
+
+ p->padding_area = NULL;
+ p->allocated_to_area = NULL;
+
+ p->current_uuid = SD_ID128_NULL;
+ p->current_label = mfree(p->current_label);
+ }
+
+ context->start = UINT64_MAX;
+ context->end = UINT64_MAX;
+ context->total = UINT64_MAX;
+
+ if (context->fdisk_context) {
+ fdisk_unref_context(context->fdisk_context);
+ context->fdisk_context = NULL;
+ }
+
+ context_free_free_areas(context);
+}
+
+static int format_size_change(uint64_t from, uint64_t to, char **ret) {
+ char format_buffer1[FORMAT_BYTES_MAX], format_buffer2[FORMAT_BYTES_MAX], *buf;
+
+ if (from != UINT64_MAX)
+ format_bytes(format_buffer1, sizeof(format_buffer1), from);
+ if (to != UINT64_MAX)
+ format_bytes(format_buffer2, sizeof(format_buffer2), to);
+
+ if (from != UINT64_MAX) {
+ if (from == to || to == UINT64_MAX)
+ buf = strdup(format_buffer1);
+ else
+ buf = strjoin(format_buffer1, " ", special_glyph(SPECIAL_GLYPH_ARROW), " ", format_buffer2);
+ } else if (to != UINT64_MAX)
+ buf = strjoin(special_glyph(SPECIAL_GLYPH_ARROW), " ", format_buffer2);
+ else {
+ *ret = NULL;
+ return 0;
+ }
+
+ if (!buf)
+ return log_oom();
+
+ *ret = TAKE_PTR(buf);
+ return 1;
+}
+
+static const char *partition_label(const Partition *p) {
+ assert(p);
+
+ if (p->new_label)
+ return p->new_label;
+
+ if (p->current_label)
+ return p->current_label;
+
+ return gpt_partition_type_uuid_to_string(p->type_uuid);
+}
+
+static int context_dump_partitions(Context *context, const char *node) {
+ _cleanup_(table_unrefp) Table *t = NULL;
+ uint64_t sum_padding = 0, sum_size = 0;
+ Partition *p;
+ int r;
+
+ if (!arg_json && context->n_partitions == 0) {
+ log_info("Empty partition table.");
+ return 0;
+ }
+
+ t = table_new("type", "label", "uuid", "file", "node", "offset", "old size", "raw size", "size", "old padding", "raw padding", "padding", "activity");
+ if (!t)
+ return log_oom();
+
+ if (!DEBUG_LOGGING) {
+ if (arg_json)
+ (void) table_set_display(t, (size_t) 0, (size_t) 1, (size_t) 2, (size_t) 3, (size_t) 4,
+ (size_t) 5, (size_t) 6, (size_t) 7, (size_t) 9, (size_t) 10, (size_t) 12, (size_t) -1);
+ else
+ (void) table_set_display(t, (size_t) 0, (size_t) 1, (size_t) 2, (size_t) 3, (size_t) 4,
+ (size_t) 8, (size_t) 11, (size_t) -1);
+ }
+
+ (void) table_set_align_percent(t, table_get_cell(t, 0, 4), 100);
+ (void) table_set_align_percent(t, table_get_cell(t, 0, 5), 100);
+
+ LIST_FOREACH(partitions, p, context->partitions) {
+ _cleanup_free_ char *size_change = NULL, *padding_change = NULL, *partname = NULL;
+ char uuid_buffer[ID128_UUID_STRING_MAX];
+ const char *label, *activity = NULL;
+
+ if (p->dropped)
+ continue;
+
+ if (p->current_size == UINT64_MAX)
+ activity = "create";
+ else if (p->current_size != p->new_size)
+ activity = "resize";
+
+ label = partition_label(p);
+ partname = p->partno != UINT64_MAX ? fdisk_partname(node, p->partno+1) : NULL;
+
+ r = format_size_change(p->current_size, p->new_size, &size_change);
+ if (r < 0)
+ return r;
+
+ r = format_size_change(p->current_padding, p->new_padding, &padding_change);
+ if (r < 0)
+ return r;
+
+ if (p->new_size != UINT64_MAX)
+ sum_size += p->new_size;
+ if (p->new_padding != UINT64_MAX)
+ sum_padding += p->new_padding;
+
+ r = table_add_many(
+ t,
+ TABLE_STRING, gpt_partition_type_uuid_to_string_harder(p->type_uuid, uuid_buffer),
+ TABLE_STRING, label ?: "-", TABLE_SET_COLOR, label ? NULL : ansi_grey(),
+ TABLE_UUID, sd_id128_is_null(p->new_uuid) ? p->current_uuid : p->new_uuid,
+ TABLE_STRING, p->definition_path ? basename(p->definition_path) : "-", TABLE_SET_COLOR, p->definition_path ? NULL : ansi_grey(),
+ TABLE_STRING, partname ?: "-", TABLE_SET_COLOR, partname ? NULL : ansi_highlight(),
+ TABLE_UINT64, p->offset,
+ TABLE_UINT64, p->current_size == UINT64_MAX ? 0 : p->current_size,
+ TABLE_UINT64, p->new_size,
+ TABLE_STRING, size_change, TABLE_SET_COLOR, !p->partitions_next && sum_size > 0 ? ansi_underline() : NULL,
+ TABLE_UINT64, p->current_padding == UINT64_MAX ? 0 : p->current_padding,
+ TABLE_UINT64, p->new_padding,
+ TABLE_STRING, padding_change, TABLE_SET_COLOR, !p->partitions_next && sum_padding > 0 ? ansi_underline() : NULL,
+ TABLE_STRING, activity ?: "unknown");
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (!arg_json && (sum_padding > 0 || sum_size > 0)) {
+ char s[FORMAT_BYTES_MAX];
+ const char *a, *b;
+
+ a = strjoina(special_glyph(SPECIAL_GLYPH_SIGMA), " = ", format_bytes(s, sizeof(s), sum_size));
+ b = strjoina(special_glyph(SPECIAL_GLYPH_SIGMA), " = ", format_bytes(s, sizeof(s), sum_padding));
+
+ r = table_add_many(
+ t,
+ TABLE_EMPTY,
+ TABLE_EMPTY,
+ TABLE_EMPTY,
+ TABLE_EMPTY,
+ TABLE_EMPTY,
+ TABLE_EMPTY,
+ TABLE_EMPTY,
+ TABLE_EMPTY,
+ TABLE_STRING, a,
+ TABLE_EMPTY,
+ TABLE_EMPTY,
+ TABLE_STRING, b,
+ TABLE_EMPTY);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (arg_json)
+ r = table_print_json(t, stdout, arg_json_format_flags);
+ else
+ r = table_print(t, stdout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to dump table: %m");
+
+ return 0;
+}
+
+static void context_bar_char_process_partition(
+ Context *context,
+ Partition *bar[],
+ size_t n,
+ Partition *p,
+ size_t *ret_start) {
+
+ uint64_t from, to, total;
+ size_t x, y;
+
+ assert(context);
+ assert(bar);
+ assert(n > 0);
+ assert(p);
+
+ if (p->dropped)
+ return;
+
+ assert(p->offset != UINT64_MAX);
+ assert(p->new_size != UINT64_MAX);
+
+ from = p->offset;
+ to = from + p->new_size;
+
+ assert(context->end >= context->start);
+ total = context->end - context->start;
+
+ assert(from >= context->start);
+ assert(from <= context->end);
+ x = (from - context->start) * n / total;
+
+ assert(to >= context->start);
+ assert(to <= context->end);
+ y = (to - context->start) * n / total;
+
+ assert(x <= y);
+ assert(y <= n);
+
+ for (size_t i = x; i < y; i++)
+ bar[i] = p;
+
+ *ret_start = x;
+}
+
+static int partition_hint(const Partition *p, const char *node, char **ret) {
+ _cleanup_free_ char *buf = NULL;
+ char ids[ID128_UUID_STRING_MAX];
+ const char *label;
+ sd_id128_t id;
+
+ /* Tries really hard to find a suitable description for this partition */
+
+ if (p->definition_path) {
+ buf = strdup(basename(p->definition_path));
+ goto done;
+ }
+
+ label = partition_label(p);
+ if (!isempty(label)) {
+ buf = strdup(label);
+ goto done;
+ }
+
+ if (p->partno != UINT64_MAX) {
+ buf = fdisk_partname(node, p->partno+1);
+ goto done;
+ }
+
+ if (!sd_id128_is_null(p->new_uuid))
+ id = p->new_uuid;
+ else if (!sd_id128_is_null(p->current_uuid))
+ id = p->current_uuid;
+ else
+ id = p->type_uuid;
+
+ buf = strdup(id128_to_uuid_string(id, ids));
+
+done:
+ if (!buf)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(buf);
+ return 0;
+}
+
+static int context_dump_partition_bar(Context *context, const char *node) {
+ _cleanup_free_ Partition **bar = NULL;
+ _cleanup_free_ size_t *start_array = NULL;
+ Partition *p, *last = NULL;
+ bool z = false;
+ size_t c, j = 0;
+
+ assert_se((c = columns()) >= 2);
+ c -= 2; /* We do not use the leftmost and rightmost character cell */
+
+ bar = new0(Partition*, c);
+ if (!bar)
+ return log_oom();
+
+ start_array = new(size_t, context->n_partitions);
+ if (!start_array)
+ return log_oom();
+
+ LIST_FOREACH(partitions, p, context->partitions)
+ context_bar_char_process_partition(context, bar, c, p, start_array + j++);
+
+ putc(' ', stdout);
+
+ for (size_t i = 0; i < c; i++) {
+ if (bar[i]) {
+ if (last != bar[i])
+ z = !z;
+
+ fputs(z ? ansi_green() : ansi_yellow(), stdout);
+ fputs(special_glyph(SPECIAL_GLYPH_DARK_SHADE), stdout);
+ } else {
+ fputs(ansi_normal(), stdout);
+ fputs(special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), stdout);
+ }
+
+ last = bar[i];
+ }
+
+ fputs(ansi_normal(), stdout);
+ putc('\n', stdout);
+
+ for (size_t i = 0; i < context->n_partitions; i++) {
+ _cleanup_free_ char **line = NULL;
+
+ line = new0(char*, c);
+ if (!line)
+ return log_oom();
+
+ j = 0;
+ LIST_FOREACH(partitions, p, context->partitions) {
+ _cleanup_free_ char *d = NULL;
+ j++;
+
+ if (i < context->n_partitions - j) {
+
+ if (line[start_array[j-1]]) {
+ const char *e;
+
+ /* Upgrade final corner to the right with a branch to the right */
+ e = startswith(line[start_array[j-1]], special_glyph(SPECIAL_GLYPH_TREE_RIGHT));
+ if (e) {
+ d = strjoin(special_glyph(SPECIAL_GLYPH_TREE_BRANCH), e);
+ if (!d)
+ return log_oom();
+ }
+ }
+
+ if (!d) {
+ d = strdup(special_glyph(SPECIAL_GLYPH_TREE_VERTICAL));
+ if (!d)
+ return log_oom();
+ }
+
+ } else if (i == context->n_partitions - j) {
+ _cleanup_free_ char *hint = NULL;
+
+ (void) partition_hint(p, node, &hint);
+
+ if (streq_ptr(line[start_array[j-1]], special_glyph(SPECIAL_GLYPH_TREE_VERTICAL)))
+ d = strjoin(special_glyph(SPECIAL_GLYPH_TREE_BRANCH), " ", strna(hint));
+ else
+ d = strjoin(special_glyph(SPECIAL_GLYPH_TREE_RIGHT), " ", strna(hint));
+
+ if (!d)
+ return log_oom();
+ }
+
+ if (d)
+ free_and_replace(line[start_array[j-1]], d);
+ }
+
+ putc(' ', stdout);
+
+ j = 0;
+ while (j < c) {
+ if (line[j]) {
+ fputs(line[j], stdout);
+ j += utf8_console_width(line[j]);
+ } else {
+ putc(' ', stdout);
+ j++;
+ }
+ }
+
+ putc('\n', stdout);
+
+ for (j = 0; j < c; j++)
+ free(line[j]);
+ }
+
+ return 0;
+}
+
+static bool context_changed(const Context *context) {
+ Partition *p;
+
+ LIST_FOREACH(partitions, p, context->partitions) {
+ if (p->dropped)
+ continue;
+
+ if (p->allocated_to_area)
+ return true;
+
+ if (p->new_size != p->current_size)
+ return true;
+ }
+
+ return false;
+}
+
+static int context_wipe_range(Context *context, uint64_t offset, uint64_t size) {
+ _cleanup_(blkid_free_probep) blkid_probe probe = NULL;
+ int r;
+
+ assert(context);
+ assert(offset != UINT64_MAX);
+ assert(size != UINT64_MAX);
+
+ probe = blkid_new_probe();
+ if (!probe)
+ return log_oom();
+
+ errno = 0;
+ r = blkid_probe_set_device(probe, fdisk_get_devfd(context->fdisk_context), offset, size);
+ if (r < 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to allocate device probe for wiping.");
+
+ errno = 0;
+ if (blkid_probe_enable_superblocks(probe, true) < 0 ||
+ blkid_probe_set_superblocks_flags(probe, BLKID_SUBLKS_MAGIC|BLKID_SUBLKS_BADCSUM) < 0 ||
+ blkid_probe_enable_partitions(probe, true) < 0 ||
+ blkid_probe_set_partitions_flags(probe, BLKID_PARTS_MAGIC) < 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to enable superblock and partition probing.");
+
+ for (;;) {
+ errno = 0;
+ r = blkid_do_probe(probe);
+ if (r < 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe for file systems.");
+ if (r > 0)
+ break;
+
+ errno = 0;
+ if (blkid_do_wipe(probe, false) < 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to wipe file system signature.");
+ }
+
+ return 0;
+}
+
+static int context_wipe_partition(Context *context, Partition *p) {
+ int r;
+
+ assert(context);
+ assert(p);
+ assert(!PARTITION_EXISTS(p)); /* Safety check: never wipe existing partitions */
+
+ assert(p->offset != UINT64_MAX);
+ assert(p->new_size != UINT64_MAX);
+
+ r = context_wipe_range(context, p->offset, p->new_size);
+ if (r < 0)
+ return r;
+
+ log_info("Successfully wiped file system signatures from future partition %" PRIu64 ".", p->partno);
+ return 0;
+}
+
+static int context_discard_range(
+ Context *context,
+ uint64_t offset,
+ uint64_t size) {
+
+ struct stat st;
+ int fd;
+
+ assert(context);
+ assert(offset != UINT64_MAX);
+ assert(size != UINT64_MAX);
+
+ if (size <= 0)
+ return 0;
+
+ assert_se((fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (S_ISREG(st.st_mode)) {
+ if (fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, size) < 0) {
+ if (ERRNO_IS_NOT_SUPPORTED(errno))
+ return -EOPNOTSUPP;
+
+ return -errno;
+ }
+
+ return 1;
+ }
+
+ if (S_ISBLK(st.st_mode)) {
+ uint64_t range[2], end;
+
+ range[0] = round_up_size(offset, 512);
+
+ end = offset + size;
+ if (end <= range[0])
+ return 0;
+
+ range[1] = round_down_size(end - range[0], 512);
+ if (range[1] <= 0)
+ return 0;
+
+ if (ioctl(fd, BLKDISCARD, range) < 0) {
+ if (ERRNO_IS_NOT_SUPPORTED(errno))
+ return -EOPNOTSUPP;
+
+ return -errno;
+ }
+
+ return 1;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static int context_discard_partition(Context *context, Partition *p) {
+ int r;
+
+ assert(context);
+ assert(p);
+
+ assert(p->offset != UINT64_MAX);
+ assert(p->new_size != UINT64_MAX);
+ assert(!PARTITION_EXISTS(p)); /* Safety check: never discard existing partitions */
+
+ if (!arg_discard)
+ return 0;
+
+ r = context_discard_range(context, p->offset, p->new_size);
+ if (r == -EOPNOTSUPP) {
+ log_info("Storage does not support discard, not discarding data in future partition %" PRIu64 ".", p->partno);
+ return 0;
+ }
+ if (r == 0) {
+ log_info("Partition %" PRIu64 " too short for discard, skipping.", p->partno);
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to discard data for future partition %" PRIu64 ".", p->partno);
+
+ log_info("Successfully discarded data from future partition %" PRIu64 ".", p->partno);
+ return 1;
+}
+
+static int context_discard_gap_after(Context *context, Partition *p) {
+ uint64_t gap, next = UINT64_MAX;
+ Partition *q;
+ int r;
+
+ assert(context);
+ assert(!p || (p->offset != UINT64_MAX && p->new_size != UINT64_MAX));
+
+ if (p)
+ gap = p->offset + p->new_size;
+ else
+ gap = context->start;
+
+ LIST_FOREACH(partitions, q, context->partitions) {
+ if (q->dropped)
+ continue;
+
+ assert(q->offset != UINT64_MAX);
+ assert(q->new_size != UINT64_MAX);
+
+ if (q->offset < gap)
+ continue;
+
+ if (next == UINT64_MAX || q->offset < next)
+ next = q->offset;
+ }
+
+ if (next == UINT64_MAX) {
+ next = context->end;
+ if (gap > next)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Partition end beyond disk end.");
+ }
+
+ assert(next >= gap);
+ r = context_discard_range(context, gap, next - gap);
+ if (r == -EOPNOTSUPP) {
+ if (p)
+ log_info("Storage does not support discard, not discarding gap after partition %" PRIu64 ".", p->partno);
+ else
+ log_info("Storage does not support discard, not discarding gap at beginning of disk.");
+ return 0;
+ }
+ if (r == 0) /* Too short */
+ return 0;
+ if (r < 0) {
+ if (p)
+ return log_error_errno(r, "Failed to discard gap after partition %" PRIu64 ".", p->partno);
+ else
+ return log_error_errno(r, "Failed to discard gap at beginning of disk.");
+ }
+
+ if (p)
+ log_info("Successfully discarded gap after partition %" PRIu64 ".", p->partno);
+ else
+ log_info("Successfully discarded gap at beginning of disk.");
+
+ return 0;
+}
+
+static int context_wipe_and_discard(Context *context, bool from_scratch) {
+ Partition *p;
+ int r;
+
+ assert(context);
+
+ /* Wipe and discard the contents of all partitions we are about to create. We skip the discarding if
+ * we were supposed to start from scratch anyway, as in that case we just discard the whole block
+ * device in one go early on. */
+
+ LIST_FOREACH(partitions, p, context->partitions) {
+
+ if (!p->allocated_to_area)
+ continue;
+
+ r = context_wipe_partition(context, p);
+ if (r < 0)
+ return r;
+
+ if (!from_scratch) {
+ r = context_discard_partition(context, p);
+ if (r < 0)
+ return r;
+
+ r = context_discard_gap_after(context, p);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (!from_scratch) {
+ r = context_discard_gap_after(context, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int partition_encrypt(
+ Partition *p,
+ const char *node,
+ struct crypt_device **ret_cd,
+ char **ret_volume,
+ int *ret_fd) {
+#if HAVE_LIBCRYPTSETUP
+ _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
+ _cleanup_(erase_and_freep) void *volume_key = NULL;
+ _cleanup_free_ char *dm_name = NULL, *vol = NULL;
+ char suuid[ID128_UUID_STRING_MAX];
+ size_t volume_key_size = 256 / 8;
+ sd_id128_t uuid;
+ int r;
+
+ assert(p);
+ assert(p->encrypt);
+
+ r = dlopen_cryptsetup();
+ if (r < 0)
+ return log_error_errno(r, "libcryptsetup not found, cannot encrypt: %m");
+
+ if (asprintf(&dm_name, "luks-repart-%08" PRIx64, random_u64()) < 0)
+ return log_oom();
+
+ if (ret_volume) {
+ vol = path_join("/dev/mapper/", dm_name);
+ if (!vol)
+ return log_oom();
+ }
+
+ r = derive_uuid(p->new_uuid, "luks-uuid", &uuid);
+ if (r < 0)
+ return r;
+
+ log_info("Encrypting future partition %" PRIu64 "...", p->partno);
+
+ volume_key = malloc(volume_key_size);
+ if (!volume_key)
+ return log_oom();
+
+ r = genuine_random_bytes(volume_key, volume_key_size, RANDOM_BLOCK);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate volume key: %m");
+
+ r = sym_crypt_init(&cd, node);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate libcryptsetup context: %m");
+
+ cryptsetup_enable_logging(cd);
+
+ r = sym_crypt_format(cd,
+ CRYPT_LUKS2,
+ "aes",
+ "xts-plain64",
+ id128_to_uuid_string(uuid, suuid),
+ volume_key,
+ volume_key_size,
+ &(struct crypt_params_luks2) {
+ .label = p->new_label,
+ .sector_size = 512U,
+ });
+ if (r < 0)
+ return log_error_errno(r, "Failed to LUKS2 format future partition: %m");
+
+ r = sym_crypt_keyslot_add_by_volume_key(
+ cd,
+ CRYPT_ANY_SLOT,
+ volume_key,
+ volume_key_size,
+ strempty(arg_key),
+ arg_key_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add LUKS2 key: %m");
+
+ r = sym_crypt_activate_by_volume_key(
+ cd,
+ dm_name,
+ volume_key,
+ volume_key_size,
+ arg_discard ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to activate LUKS superblock: %m");
+
+ log_info("Successfully encrypted future partition %" PRIu64 ".", p->partno);
+
+ if (ret_fd) {
+ _cleanup_close_ int dev_fd = -1;
+
+ dev_fd = open(vol, O_RDWR|O_CLOEXEC|O_NOCTTY);
+ if (dev_fd < 0)
+ return log_error_errno(errno, "Failed to open LUKS volume '%s': %m", vol);
+
+ *ret_fd = TAKE_FD(dev_fd);
+ }
+
+ if (ret_cd)
+ *ret_cd = TAKE_PTR(cd);
+ if (ret_volume)
+ *ret_volume = TAKE_PTR(vol);
+
+ return 0;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "libcryptsetup is not supported, cannot encrypt: %m");
+#endif
+}
+
+static int deactivate_luks(struct crypt_device *cd, const char *node) {
+#if HAVE_LIBCRYPTSETUP
+ int r;
+
+ if (!cd)
+ return 0;
+
+ assert(node);
+
+ /* udev or so might access out block device in the background while we are done. Let's hence force
+ * detach the volume. We sync'ed before, hence this should be safe. */
+
+ r = sym_crypt_deactivate_by_name(cd, basename(node), CRYPT_DEACTIVATE_FORCE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to deactivate LUKS device: %m");
+
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+static int context_copy_blocks(Context *context) {
+ Partition *p;
+ int whole_fd = -1, r;
+
+ assert(context);
+
+ /* Copy in file systems on the block level */
+
+ LIST_FOREACH(partitions, p, context->partitions) {
+ _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
+ _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
+ _cleanup_free_ char *encrypted = NULL;
+ _cleanup_close_ int encrypted_dev_fd = -1;
+ char buf[FORMAT_BYTES_MAX];
+ int target_fd;
+
+ if (p->copy_blocks_fd < 0)
+ continue;
+
+ if (p->dropped)
+ continue;
+
+ if (PARTITION_EXISTS(p)) /* Never copy over existing partitions */
+ continue;
+
+ assert(p->new_size != UINT64_MAX);
+ assert(p->copy_blocks_size != UINT64_MAX);
+ assert(p->new_size >= p->copy_blocks_size);
+
+ if (whole_fd < 0)
+ assert_se((whole_fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
+
+ if (p->encrypt) {
+ r = loop_device_make(whole_fd, O_RDWR, p->offset, p->new_size, 0, &d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make loopback device of future partition %" PRIu64 ": %m", p->partno);
+
+ r = loop_device_flock(d, LOCK_EX);
+ if (r < 0)
+ return log_error_errno(r, "Failed to lock loopback device: %m");
+
+ r = partition_encrypt(p, d->node, &cd, &encrypted, &encrypted_dev_fd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to encrypt device: %m");
+
+ if (flock(encrypted_dev_fd, LOCK_EX) < 0)
+ return log_error_errno(errno, "Failed to lock LUKS device: %m");
+
+ target_fd = encrypted_dev_fd;
+ } else {
+ if (lseek(whole_fd, p->offset, SEEK_SET) == (off_t) -1)
+ return log_error_errno(errno, "Failed to seek to partition offset: %m");
+
+ target_fd = whole_fd;
+ }
+
+ log_info("Copying in '%s' (%s) on block level into future partition %" PRIu64 ".", p->copy_blocks_path, format_bytes(buf, sizeof(buf), p->copy_blocks_size), p->partno);
+
+ r = copy_bytes_full(p->copy_blocks_fd, target_fd, p->copy_blocks_size, 0, NULL, NULL, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to copy in data from '%s': %m", p->copy_blocks_path);
+
+ if (fsync(target_fd) < 0)
+ return log_error_errno(r, "Failed to synchronize copied data blocks: %m");
+
+ if (p->encrypt) {
+ encrypted_dev_fd = safe_close(encrypted_dev_fd);
+
+ r = deactivate_luks(cd, encrypted);
+ if (r < 0)
+ return r;
+
+ sym_crypt_free(cd);
+ cd = NULL;
+
+ r = loop_device_sync(d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to sync loopback device: %m");
+ }
+
+ log_info("Copying in of '%s' on block level completed.", p->copy_blocks_path);
+ }
+
+ return 0;
+}
+
+static int do_copy_files(Partition *p, const char *fs) {
+ char **source, **target;
+ int r;
+
+ assert(p);
+ assert(fs);
+
+ STRV_FOREACH_PAIR(source, target, p->copy_files) {
+ _cleanup_close_ int sfd = -1, pfd = -1, tfd = -1;
+ _cleanup_free_ char *dn = NULL;
+
+ dn = dirname_malloc(*target);
+ if (!dn)
+ return log_oom();
+
+ sfd = chase_symlinks_and_open(*source, arg_root, CHASE_PREFIX_ROOT|CHASE_WARN, O_CLOEXEC|O_NOCTTY, NULL);
+ if (sfd < 0)
+ return log_error_errno(sfd, "Failed to open source file '%s%s': %m", strempty(arg_root), *source);
+
+ r = fd_verify_regular(sfd);
+ if (r < 0) {
+ if (r != -EISDIR)
+ return log_error_errno(r, "Failed to check type of source file '%s': %m", *source);
+
+ /* We are looking at a directory */
+ tfd = chase_symlinks_and_open(*target, fs, CHASE_PREFIX_ROOT|CHASE_WARN, O_RDONLY|O_DIRECTORY|O_CLOEXEC, NULL);
+ if (tfd < 0) {
+ if (tfd != -ENOENT)
+ return log_error_errno(tfd, "Failed to open target directory '%s': %m", *target);
+
+ r = mkdir_p_root(fs, dn, UID_INVALID, GID_INVALID, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create parent directory '%s': %m", dn);
+
+ pfd = chase_symlinks_and_open(dn, fs, CHASE_PREFIX_ROOT|CHASE_WARN, O_RDONLY|O_DIRECTORY|O_CLOEXEC, NULL);
+ if (pfd < 0)
+ return log_error_errno(pfd, "Failed to open parent directory of target: %m");
+
+ r = copy_tree_at(
+ sfd, ".",
+ pfd, basename(*target),
+ UID_INVALID, GID_INVALID,
+ COPY_REFLINK|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS);
+ } else
+ r = copy_tree_at(
+ sfd, ".",
+ tfd, ".",
+ UID_INVALID, GID_INVALID,
+ COPY_REFLINK|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to copy %s%s to %s: %m", strempty(arg_root), *source, *target);
+ } else {
+ /* We are looking at a regular file */
+
+ r = mkdir_p_root(fs, dn, UID_INVALID, GID_INVALID, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create parent directory: %m");
+
+ pfd = chase_symlinks_and_open(dn, fs, CHASE_PREFIX_ROOT|CHASE_WARN, O_RDONLY|O_DIRECTORY|O_CLOEXEC, NULL);
+ if (pfd < 0)
+ return log_error_errno(tfd, "Failed to open parent directory of target: %m");
+
+ tfd = openat(pfd, basename(*target), O_CREAT|O_EXCL|O_WRONLY|O_CLOEXEC, 0700);
+ if (tfd < 0)
+ return log_error_errno(errno, "Failed to create target file '%s': %m", *target);
+
+ r = copy_bytes(sfd, tfd, UINT64_MAX, COPY_REFLINK|COPY_SIGINT);
+ if (r < 0)
+ return log_error_errno(r, "Failed to copy '%s%s' to '%s': %m", strempty(arg_root), *source, *target);
+
+ (void) copy_xattr(sfd, tfd);
+ (void) copy_access(sfd, tfd);
+ (void) copy_times(sfd, tfd, 0);
+ }
+ }
+
+ return 0;
+}
+
+static int partition_copy_files(Partition *p, const char *node) {
+ int r;
+
+ assert(p);
+ assert(node);
+
+ if (strv_isempty(p->copy_files))
+ return 0;
+
+ log_info("Populating partition %" PRIu64 " with files.", p->partno);
+
+ /* We copy in a child process, since we have to mount the fs for that, and we don't want that fs to
+ * appear in the host namespace. Hence we fork a child that has its own file system namespace and
+ * detached mount propagation. */
+
+ r = safe_fork("(sd-copy)", FORK_DEATHSIG|FORK_LOG|FORK_WAIT|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ static const char fs[] = "/run/systemd/mount-root";
+ /* This is a child process with its own mount namespace and propagation to host turned off */
+
+ r = mkdir_p(fs, 0700);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create mount point: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (mount_nofollow_verbose(LOG_ERR, node, fs, p->format, MS_NOATIME|MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL) < 0)
+ _exit(EXIT_FAILURE);
+
+ if (do_copy_files(p, fs) < 0)
+ _exit(EXIT_FAILURE);
+
+ r = syncfs_path(AT_FDCWD, fs);
+ if (r < 0) {
+ log_error_errno(r, "Failed to synchronize written files: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ log_info("Successfully populated partition %" PRIu64 " with files.", p->partno);
+ return 0;
+}
+
+static int context_mkfs(Context *context) {
+ Partition *p;
+ int fd = -1, r;
+
+ assert(context);
+
+ /* Make a file system */
+
+ LIST_FOREACH(partitions, p, context->partitions) {
+ _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
+ _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
+ _cleanup_free_ char *encrypted = NULL;
+ _cleanup_close_ int encrypted_dev_fd = -1;
+ const char *fsdev;
+ sd_id128_t fs_uuid;
+
+ if (p->dropped)
+ continue;
+
+ if (PARTITION_EXISTS(p)) /* Never format existing partitions */
+ continue;
+
+ if (!p->format)
+ continue;
+
+ assert(p->offset != UINT64_MAX);
+ assert(p->new_size != UINT64_MAX);
+
+ if (fd < 0)
+ assert_se((fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
+
+ /* Loopback block devices are not only useful to turn regular files into block devices, but
+ * also to cut out sections of block devices into new block devices. */
+
+ r = loop_device_make(fd, O_RDWR, p->offset, p->new_size, 0, &d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make loopback device of future partition %" PRIu64 ": %m", p->partno);
+
+ r = loop_device_flock(d, LOCK_EX);
+ if (r < 0)
+ return log_error_errno(r, "Failed to lock loopback device: %m");
+
+ if (p->encrypt) {
+ r = partition_encrypt(p, d->node, &cd, &encrypted, &encrypted_dev_fd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to encrypt device: %m");
+
+ if (flock(encrypted_dev_fd, LOCK_EX) < 0)
+ return log_error_errno(errno, "Failed to lock LUKS device: %m");
+
+ fsdev = encrypted;
+ } else
+ fsdev = d->node;
+
+ log_info("Formatting future partition %" PRIu64 ".", p->partno);
+
+ /* Calculate the UUID for the file system as HMAC-SHA256 of the string "file-system-uuid",
+ * keyed off the partition UUID. */
+ r = derive_uuid(p->new_uuid, "file-system-uuid", &fs_uuid);
+ if (r < 0)
+ return r;
+
+ r = make_filesystem(fsdev, p->format, p->new_label, fs_uuid, arg_discard);
+ if (r < 0) {
+ encrypted_dev_fd = safe_close(encrypted_dev_fd);
+ (void) deactivate_luks(cd, encrypted);
+ return r;
+ }
+
+ log_info("Successfully formatted future partition %" PRIu64 ".", p->partno);
+
+ /* The file system is now created, no need to delay udev further */
+ if (p->encrypt)
+ if (flock(encrypted_dev_fd, LOCK_UN) < 0)
+ return log_error_errno(errno, "Failed to unlock LUKS device: %m");
+
+ r = partition_copy_files(p, fsdev);
+ if (r < 0) {
+ encrypted_dev_fd = safe_close(encrypted_dev_fd);
+ (void) deactivate_luks(cd, encrypted);
+ return r;
+ }
+
+ /* Note that we always sync explicitly here, since mkfs.fat doesn't do that on its own, and
+ * if we don't sync before detaching a block device the in-flight sectors possibly won't hit
+ * the disk. */
+
+ if (p->encrypt) {
+ if (fsync(encrypted_dev_fd) < 0)
+ return log_error_errno(r, "Failed to synchronize LUKS volume: %m");
+ encrypted_dev_fd = safe_close(encrypted_dev_fd);
+
+ r = deactivate_luks(cd, encrypted);
+ if (r < 0)
+ return r;
+
+ sym_crypt_free(cd);
+ cd = NULL;
+ }
+
+ r = loop_device_sync(d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to sync loopback device: %m");
+ }
+
+ return 0;
+}
+
+static int partition_acquire_uuid(Context *context, Partition *p, sd_id128_t *ret) {
+ struct {
+ sd_id128_t type_uuid;
+ uint64_t counter;
+ } _packed_ plaintext = {};
+ union {
+ unsigned char md[SHA256_DIGEST_LENGTH];
+ sd_id128_t id;
+ } result;
+
+ uint64_t k = 0;
+ Partition *q;
+ int r;
+
+ assert(context);
+ assert(p);
+ assert(ret);
+
+ /* Calculate a good UUID for the indicated partition. We want a certain degree of reproducibility,
+ * hence we won't generate the UUIDs randomly. Instead we use a cryptographic hash (precisely:
+ * HMAC-SHA256) to derive them from a single seed. The seed is generally the machine ID of the
+ * installation we are processing, but if random behaviour is desired can be random, too. We use the
+ * seed value as key for the HMAC (since the machine ID is something we generally don't want to leak)
+ * and the partition type as plaintext. The partition type is suffixed with a counter (only for the
+ * second and later partition of the same type) if we have more than one partition of the same
+ * time. Or in other words:
+ *
+ * With:
+ * SEED := /etc/machine-id
+ *
+ * If first partition instance of type TYPE_UUID:
+ * PARTITION_UUID := HMAC-SHA256(SEED, TYPE_UUID)
+ *
+ * For all later partition instances of type TYPE_UUID with INSTANCE being the LE64 encoded instance number:
+ * PARTITION_UUID := HMAC-SHA256(SEED, TYPE_UUID || INSTANCE)
+ */
+
+ LIST_FOREACH(partitions, q, context->partitions) {
+ if (p == q)
+ break;
+
+ if (!sd_id128_equal(p->type_uuid, q->type_uuid))
+ continue;
+
+ k++;
+ }
+
+ plaintext.type_uuid = p->type_uuid;
+ plaintext.counter = htole64(k);
+
+ if (!HMAC(EVP_sha256(),
+ &context->seed, sizeof(context->seed),
+ (const unsigned char*) &plaintext, k == 0 ? sizeof(sd_id128_t) : sizeof(plaintext),
+ result.md, NULL))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), "SHA256 calculation failed.");
+
+ /* Take the first half, mark it as v4 UUID */
+ assert_cc(sizeof(result.md) == sizeof(result.id) * 2);
+ result.id = id128_make_v4_uuid(result.id);
+
+ /* Ensure this partition UUID is actually unique, and there's no remaining partition from an earlier run? */
+ LIST_FOREACH(partitions, q, context->partitions) {
+ if (p == q)
+ continue;
+
+ if (sd_id128_equal(q->current_uuid, result.id) ||
+ sd_id128_equal(q->new_uuid, result.id)) {
+ log_warning("Partition UUID calculated from seed for partition %" PRIu64 " exists already, reverting to randomized UUID.", p->partno);
+
+ r = sd_id128_randomize(&result.id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate randomized UUID: %m");
+
+ break;
+ }
+ }
+
+ *ret = result.id;
+ return 0;
+}
+
+static int partition_acquire_label(Context *context, Partition *p, char **ret) {
+ _cleanup_free_ char *label = NULL;
+ const char *prefix;
+ unsigned k = 1;
+
+ assert(context);
+ assert(p);
+ assert(ret);
+
+ prefix = gpt_partition_type_uuid_to_string(p->type_uuid);
+ if (!prefix)
+ prefix = "linux";
+
+ for (;;) {
+ const char *ll = label ?: prefix;
+ bool retry = false;
+ Partition *q;
+
+ LIST_FOREACH(partitions, q, context->partitions) {
+ if (p == q)
+ break;
+
+ if (streq_ptr(ll, q->current_label) ||
+ streq_ptr(ll, q->new_label)) {
+ retry = true;
+ break;
+ }
+ }
+
+ if (!retry)
+ break;
+
+ label = mfree(label);
+
+
+ if (asprintf(&label, "%s-%u", prefix, ++k) < 0)
+ return log_oom();
+ }
+
+ if (!label) {
+ label = strdup(prefix);
+ if (!label)
+ return log_oom();
+ }
+
+ *ret = TAKE_PTR(label);
+ return 0;
+}
+
+static int context_acquire_partition_uuids_and_labels(Context *context) {
+ Partition *p;
+ int r;
+
+ assert(context);
+
+ LIST_FOREACH(partitions, p, context->partitions) {
+ /* Never touch foreign partitions */
+ if (PARTITION_IS_FOREIGN(p)) {
+ p->new_uuid = p->current_uuid;
+
+ if (p->current_label) {
+ free(p->new_label);
+ p->new_label = strdup(p->current_label);
+ if (!p->new_label)
+ return log_oom();
+ }
+
+ continue;
+ }
+
+ if (!sd_id128_is_null(p->current_uuid))
+ p->new_uuid = p->current_uuid; /* Never change initialized UUIDs */
+ else if (sd_id128_is_null(p->new_uuid)) {
+ /* Not explicitly set by user! */
+ r = partition_acquire_uuid(context, p, &p->new_uuid);
+ if (r < 0)
+ return r;
+ }
+
+ if (!isempty(p->current_label)) {
+ free(p->new_label);
+ p->new_label = strdup(p->current_label); /* never change initialized labels */
+ if (!p->new_label)
+ return log_oom();
+ } else if (!p->new_label) {
+ /* Not explicitly set by user! */
+
+ r = partition_acquire_label(context, p, &p->new_label);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int context_mangle_partitions(Context *context) {
+ Partition *p;
+ int r;
+
+ assert(context);
+
+ LIST_FOREACH(partitions, p, context->partitions) {
+ if (p->dropped)
+ continue;
+
+ assert(p->new_size != UINT64_MAX);
+ assert(p->offset != UINT64_MAX);
+ assert(p->partno != UINT64_MAX);
+
+ if (PARTITION_EXISTS(p)) {
+ bool changed = false;
+
+ assert(p->current_partition);
+
+ if (p->new_size != p->current_size) {
+ assert(p->new_size >= p->current_size);
+ assert(p->new_size % 512 == 0);
+
+ r = fdisk_partition_size_explicit(p->current_partition, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable explicit sizing: %m");
+
+ r = fdisk_partition_set_size(p->current_partition, p->new_size / 512);
+ if (r < 0)
+ return log_error_errno(r, "Failed to grow partition: %m");
+
+ log_info("Growing existing partition %" PRIu64 ".", p->partno);
+ changed = true;
+ }
+
+ if (!sd_id128_equal(p->new_uuid, p->current_uuid)) {
+ char buf[ID128_UUID_STRING_MAX];
+
+ assert(!sd_id128_is_null(p->new_uuid));
+
+ r = fdisk_partition_set_uuid(p->current_partition, id128_to_uuid_string(p->new_uuid, buf));
+ if (r < 0)
+ return log_error_errno(r, "Failed to set partition UUID: %m");
+
+ log_info("Initializing UUID of existing partition %" PRIu64 ".", p->partno);
+ changed = true;
+ }
+
+ if (!streq_ptr(p->new_label, p->current_label)) {
+ assert(!isempty(p->new_label));
+
+ r = fdisk_partition_set_name(p->current_partition, p->new_label);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set partition label: %m");
+
+ log_info("Setting partition label of existing partition %" PRIu64 ".", p->partno);
+ changed = true;
+ }
+
+ if (changed) {
+ assert(!PARTITION_IS_FOREIGN(p)); /* never touch foreign partitions */
+
+ r = fdisk_set_partition(context->fdisk_context, p->partno, p->current_partition);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update partition: %m");
+ }
+ } else {
+ _cleanup_(fdisk_unref_partitionp) struct fdisk_partition *q = NULL;
+ _cleanup_(fdisk_unref_parttypep) struct fdisk_parttype *t = NULL;
+ char ids[ID128_UUID_STRING_MAX];
+
+ assert(!p->new_partition);
+ assert(p->offset % 512 == 0);
+ assert(p->new_size % 512 == 0);
+ assert(!sd_id128_is_null(p->new_uuid));
+ assert(!isempty(p->new_label));
+
+ t = fdisk_new_parttype();
+ if (!t)
+ return log_oom();
+
+ r = fdisk_parttype_set_typestr(t, id128_to_uuid_string(p->type_uuid, ids));
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize partition type: %m");
+
+ q = fdisk_new_partition();
+ if (!q)
+ return log_oom();
+
+ r = fdisk_partition_set_type(q, t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set partition type: %m");
+
+ r = fdisk_partition_size_explicit(q, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable explicit sizing: %m");
+
+ r = fdisk_partition_set_start(q, p->offset / 512);
+ if (r < 0)
+ return log_error_errno(r, "Failed to position partition: %m");
+
+ r = fdisk_partition_set_size(q, p->new_size / 512);
+ if (r < 0)
+ return log_error_errno(r, "Failed to grow partition: %m");
+
+ r = fdisk_partition_set_partno(q, p->partno);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set partition number: %m");
+
+ r = fdisk_partition_set_uuid(q, id128_to_uuid_string(p->new_uuid, ids));
+ if (r < 0)
+ return log_error_errno(r, "Failed to set partition UUID: %m");
+
+ r = fdisk_partition_set_name(q, p->new_label);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set partition label: %m");
+
+ log_info("Adding new partition %" PRIu64 " to partition table.", p->partno);
+
+ r = fdisk_add_partition(context->fdisk_context, q, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add partition: %m");
+
+ assert(!p->new_partition);
+ p->new_partition = TAKE_PTR(q);
+ }
+ }
+
+ return 0;
+}
+
+static int context_write_partition_table(
+ Context *context,
+ const char *node,
+ bool from_scratch) {
+
+ _cleanup_(fdisk_unref_tablep) struct fdisk_table *original_table = NULL;
+ int capable, r;
+
+ assert(context);
+
+ if (arg_pretty > 0 ||
+ (arg_pretty < 0 && isatty(STDOUT_FILENO) > 0) ||
+ arg_json) {
+
+ (void) context_dump_partitions(context, node);
+
+ putc('\n', stdout);
+
+ if (!arg_json)
+ (void) context_dump_partition_bar(context, node);
+ putc('\n', stdout);
+ fflush(stdout);
+ }
+
+ if (!from_scratch && !context_changed(context)) {
+ log_info("No changes.");
+ return 0;
+ }
+
+ if (arg_dry_run) {
+ log_notice("Refusing to repartition, please re-run with --dry-run=no.");
+ return 0;
+ }
+
+ log_info("Applying changes.");
+
+ if (from_scratch) {
+ r = context_wipe_range(context, 0, context->total);
+ if (r < 0)
+ return r;
+
+ log_info("Wiped block device.");
+
+ r = context_discard_range(context, 0, context->total);
+ if (r == -EOPNOTSUPP)
+ log_info("Storage does not support discard, not discarding entire block device data.");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to discard entire block device: %m");
+ else if (r > 0)
+ log_info("Discarded entire block device.");
+ }
+
+ r = fdisk_get_partitions(context->fdisk_context, &original_table);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire partition table: %m");
+
+ /* Wipe fs signatures and discard sectors where the new partitions are going to be placed and in the
+ * gaps between partitions, just to be sure. */
+ r = context_wipe_and_discard(context, from_scratch);
+ if (r < 0)
+ return r;
+
+ r = context_copy_blocks(context);
+ if (r < 0)
+ return r;
+
+ r = context_mkfs(context);
+ if (r < 0)
+ return r;
+
+ r = context_mangle_partitions(context);
+ if (r < 0)
+ return r;
+
+ log_info("Writing new partition table.");
+
+ r = fdisk_write_disklabel(context->fdisk_context);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write partition table: %m");
+
+ capable = blockdev_partscan_enabled(fdisk_get_devfd(context->fdisk_context));
+ if (capable == -ENOTBLK)
+ log_debug("Not telling kernel to reread partition table, since we are not operating on a block device.");
+ else if (capable < 0)
+ return log_error_errno(capable, "Failed to check if block device supports partition scanning: %m");
+ else if (capable > 0) {
+ log_info("Telling kernel to reread partition table.");
+
+ if (from_scratch)
+ r = fdisk_reread_partition_table(context->fdisk_context);
+ else
+ r = fdisk_reread_changes(context->fdisk_context, original_table);
+ if (r < 0)
+ return log_error_errno(r, "Failed to reread partition table: %m");
+ } else
+ log_notice("Not telling kernel to reread partition table, because selected image does not support kernel partition block devices.");
+
+ log_info("All done.");
+
+ return 0;
+}
+
+static int context_read_seed(Context *context, const char *root) {
+ int r;
+
+ assert(context);
+
+ if (!sd_id128_is_null(context->seed))
+ return 0;
+
+ if (!arg_randomize) {
+ _cleanup_close_ int fd = -1;
+
+ fd = chase_symlinks_and_open("/etc/machine-id", root, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC, NULL);
+ if (fd == -ENOENT)
+ log_info("No machine ID set, using randomized partition UUIDs.");
+ else if (fd < 0)
+ return log_error_errno(fd, "Failed to determine machine ID of image: %m");
+ else {
+ r = id128_read_fd(fd, ID128_PLAIN_OR_UNINIT, &context->seed);
+ if (r == -ENOMEDIUM)
+ log_info("No machine ID set, using randomized partition UUIDs.");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to parse machine ID of image: %m");
+
+ return 0;
+ }
+ }
+
+ r = sd_id128_randomize(&context->seed);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate randomized seed: %m");
+
+ return 0;
+}
+
+static int context_factory_reset(Context *context, bool from_scratch) {
+ Partition *p;
+ size_t n = 0;
+ int r;
+
+ assert(context);
+
+ if (arg_factory_reset <= 0)
+ return 0;
+
+ if (from_scratch) /* Nothing to reset if we start from scratch */
+ return 0;
+
+ if (arg_dry_run) {
+ log_notice("Refusing to factory reset, please re-run with --dry-run=no.");
+ return 0;
+ }
+
+ log_info("Applying factory reset.");
+
+ LIST_FOREACH(partitions, p, context->partitions) {
+
+ if (!p->factory_reset || !PARTITION_EXISTS(p))
+ continue;
+
+ assert(p->partno != UINT64_MAX);
+
+ log_info("Removing partition %" PRIu64 " for factory reset.", p->partno);
+
+ r = fdisk_delete_partition(context->fdisk_context, p->partno);
+ if (r < 0)
+ return log_error_errno(r, "Failed to remove partition %" PRIu64 ": %m", p->partno);
+
+ n++;
+ }
+
+ if (n == 0) {
+ log_info("Factory reset requested, but no partitions to delete found.");
+ return 0;
+ }
+
+ r = fdisk_write_disklabel(context->fdisk_context);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write disk label: %m");
+
+ log_info("Successfully deleted %zu partitions.", n);
+ return 1;
+}
+
+static int context_can_factory_reset(Context *context) {
+ Partition *p;
+
+ assert(context);
+
+ LIST_FOREACH(partitions, p, context->partitions)
+ if (p->factory_reset && PARTITION_EXISTS(p))
+ return true;
+
+ return false;
+}
+
+static int context_open_copy_block_paths(Context *context) {
+ Partition *p;
+ int r;
+
+ assert(context);
+
+ LIST_FOREACH(partitions, p, context->partitions) {
+ _cleanup_close_ int source_fd = -1;
+ uint64_t size;
+ struct stat st;
+
+ assert(p->copy_blocks_fd < 0);
+ assert(p->copy_blocks_size == UINT64_MAX);
+
+ if (PARTITION_EXISTS(p)) /* Never copy over partitions that already exist! */
+ continue;
+
+ if (!p->copy_blocks_path)
+ continue;
+
+ source_fd = open(p->copy_blocks_path, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (source_fd < 0)
+ return log_error_errno(errno, "Failed to open block copy file '%s': %m", p->copy_blocks_path);
+
+ if (fstat(source_fd, &st) < 0)
+ return log_error_errno(errno, "Failed to stat block copy file '%s': %m", p->copy_blocks_path);
+
+ if (S_ISDIR(st.st_mode)) {
+ _cleanup_free_ char *bdev = NULL;
+
+ /* If the file is a directory, automatically find the backing block device */
+
+ if (major(st.st_dev) != 0)
+ r = device_path_make_major_minor(S_IFBLK, st.st_dev, &bdev);
+ else {
+ dev_t devt;
+
+ /* Special support for btrfs */
+
+ r = btrfs_get_block_device_fd(source_fd, &devt);
+ if (r == -EUCLEAN)
+ return btrfs_log_dev_root(LOG_ERR, r, p->copy_blocks_path);
+ if (r < 0)
+ return log_error_errno(r, "Unable to determine backing block device of '%s': %m", p->copy_blocks_path);
+
+ r = device_path_make_major_minor(S_IFBLK, devt, &bdev);
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine block device path for block device backing '%s': %m", p->copy_blocks_path);
+
+ safe_close(source_fd);
+
+ source_fd = open(bdev, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (source_fd < 0)
+ return log_error_errno(errno, "Failed to open block device '%s': %m", bdev);
+
+ if (fstat(source_fd, &st) < 0)
+ return log_error_errno(errno, "Failed to stat block device '%s': %m", bdev);
+
+ if (!S_ISBLK(st.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), "Block device '%s' is not actually a block device, refusing.", bdev);
+ }
+
+ if (S_ISREG(st.st_mode))
+ size = st.st_size;
+ else if (S_ISBLK(st.st_mode)) {
+ if (ioctl(source_fd, BLKGETSIZE64, &size) != 0)
+ return log_error_errno(errno, "Failed to determine size of block device to copy from: %m");
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Specified path to copy blocks from '%s' is not a regular file, block device or directory, refusing: %m", p->copy_blocks_path);
+
+ if (size <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "File to copy bytes from '%s' has zero size, refusing.", p->copy_blocks_path);
+ if (size % 512 != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "File to copy bytes from '%s' has size that is not multiple of 512, refusing.", p->copy_blocks_path);
+
+ p->copy_blocks_fd = TAKE_FD(source_fd);
+ p->copy_blocks_size = size;
+ }
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-repart", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [DEVICE]\n"
+ "\n%sGrow and add partitions to partition table.%s\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --dry-run=BOOL Whether to run dry-run operation\n"
+ " --empty=MODE One of refuse, allow, require, force, create; controls\n"
+ " how to handle empty disks lacking partition tables\n"
+ " --discard=BOOL Whether to discard backing blocks for new partitions\n"
+ " --pretty=BOOL Whether to show pretty summary before doing changes\n"
+ " --factory-reset=BOOL Whether to remove data partitions before recreating\n"
+ " them\n"
+ " --can-factory-reset Test whether factory reset is defined\n"
+ " --root=PATH Operate relative to root path\n"
+ " --definitions=DIR Find partitions in specified directory\n"
+ " --key-file=PATH Key to use when encrypting partitions\n"
+ " --seed=UUID 128bit seed UUID to derive all UUIDs from\n"
+ " --size=BYTES Grow loopback file to specified size\n"
+ " --json=pretty|short|off\n"
+ " Generate JSON output\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight(), ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_DRY_RUN,
+ ARG_EMPTY,
+ ARG_DISCARD,
+ ARG_FACTORY_RESET,
+ ARG_CAN_FACTORY_RESET,
+ ARG_ROOT,
+ ARG_SEED,
+ ARG_PRETTY,
+ ARG_DEFINITIONS,
+ ARG_SIZE,
+ ARG_JSON,
+ ARG_KEY_FILE,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "dry-run", required_argument, NULL, ARG_DRY_RUN },
+ { "empty", required_argument, NULL, ARG_EMPTY },
+ { "discard", required_argument, NULL, ARG_DISCARD },
+ { "factory-reset", required_argument, NULL, ARG_FACTORY_RESET },
+ { "can-factory-reset", no_argument, NULL, ARG_CAN_FACTORY_RESET },
+ { "root", required_argument, NULL, ARG_ROOT },
+ { "seed", required_argument, NULL, ARG_SEED },
+ { "pretty", required_argument, NULL, ARG_PRETTY },
+ { "definitions", required_argument, NULL, ARG_DEFINITIONS },
+ { "size", required_argument, NULL, ARG_SIZE },
+ { "json", required_argument, NULL, ARG_JSON },
+ { "key-file", required_argument, NULL, ARG_KEY_FILE },
+ {}
+ };
+
+ int c, r, dry_run = -1;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_DRY_RUN:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --dry-run= parameter: %s", optarg);
+
+ dry_run = r;
+ break;
+
+ case ARG_EMPTY:
+ if (isempty(optarg) || streq(optarg, "refuse"))
+ arg_empty = EMPTY_REFUSE;
+ else if (streq(optarg, "allow"))
+ arg_empty = EMPTY_ALLOW;
+ else if (streq(optarg, "require"))
+ arg_empty = EMPTY_REQUIRE;
+ else if (streq(optarg, "force"))
+ arg_empty = EMPTY_FORCE;
+ else if (streq(optarg, "create")) {
+ arg_empty = EMPTY_CREATE;
+
+ if (dry_run < 0)
+ dry_run = false; /* Imply --dry-run=no if we create the loopback file
+ * anew. After all we cannot really break anyone's
+ * partition tables that way. */
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse --empty= parameter: %s", optarg);
+ break;
+
+ case ARG_DISCARD:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --discard= parameter: %s", optarg);
+
+ arg_discard = r;
+ break;
+
+ case ARG_FACTORY_RESET:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --factory-reset= parameter: %s", optarg);
+
+ arg_factory_reset = r;
+ break;
+
+ case ARG_CAN_FACTORY_RESET:
+ arg_can_factory_reset = true;
+ break;
+
+ case ARG_ROOT:
+ r = parse_path_argument_and_warn(optarg, false, &arg_root);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_SEED:
+ if (isempty(optarg)) {
+ arg_seed = SD_ID128_NULL;
+ arg_randomize = false;
+ } else if (streq(optarg, "random"))
+ arg_randomize = true;
+ else {
+ r = sd_id128_from_string(optarg, &arg_seed);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse seed: %s", optarg);
+
+ arg_randomize = false;
+ }
+
+ break;
+
+ case ARG_PRETTY:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --pretty= parameter: %s", optarg);
+
+ arg_pretty = r;
+ break;
+
+ case ARG_DEFINITIONS:
+ r = parse_path_argument_and_warn(optarg, false, &arg_definitions);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_SIZE: {
+ uint64_t parsed, rounded;
+
+ if (streq(optarg, "auto")) {
+ arg_size = UINT64_MAX;
+ arg_size_auto = true;
+ break;
+ }
+
+ r = parse_size(optarg, 1024, &parsed);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --size= parameter: %s", optarg);
+
+ rounded = round_up_size(parsed, 4096);
+ if (rounded == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Specified image size too small, refusing.");
+ if (rounded == UINT64_MAX)
+ return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Specified image size too large, refusing.");
+
+ if (rounded != parsed)
+ log_warning("Specified size is not a multiple of 4096, rounding up automatically. (%" PRIu64 " → %" PRIu64 ")",
+ parsed, rounded);
+
+ arg_size = rounded;
+ arg_size_auto = false;
+ break;
+ }
+
+ case ARG_JSON:
+ if (streq(optarg, "pretty")) {
+ arg_json = true;
+ arg_json_format_flags = JSON_FORMAT_PRETTY|JSON_FORMAT_COLOR_AUTO;
+ } else if (streq(optarg, "short")) {
+ arg_json = true;
+ arg_json_format_flags = JSON_FORMAT_NEWLINE;
+ } else if (streq(optarg, "off")) {
+ arg_json = false;
+ arg_json_format_flags = 0;
+ } else if (streq(optarg, "help")) {
+ puts("pretty\n"
+ "short\n"
+ "off");
+ return 0;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown argument to --json=: %s", optarg);
+
+ break;
+
+ case ARG_KEY_FILE: {
+ _cleanup_(erase_and_freep) char *k = NULL;
+ size_t n = 0;
+
+ r = read_full_file_full(AT_FDCWD, optarg, READ_FULL_FILE_SECURE|READ_FULL_FILE_CONNECT_SOCKET, NULL, &k, &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read key file '%s': %m", optarg);
+
+ erase_and_free(arg_key);
+ arg_key = TAKE_PTR(k);
+ arg_key_size = n;
+ break;
+ }
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (argc - optind > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Expected at most one argument, the path to the block device.");
+
+ if (arg_factory_reset > 0 && IN_SET(arg_empty, EMPTY_FORCE, EMPTY_REQUIRE, EMPTY_CREATE))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Combination of --factory-reset=yes and --empty=force/--empty=require/--empty=create is invalid.");
+
+ if (arg_can_factory_reset)
+ arg_dry_run = true; /* When --can-factory-reset is specified we don't make changes, hence
+ * non-dry-run mode makes no sense. Thus, imply dry run mode so that we
+ * open things strictly read-only. */
+ else if (dry_run >= 0)
+ arg_dry_run = dry_run;
+
+ if (arg_empty == EMPTY_CREATE && (arg_size == UINT64_MAX && !arg_size_auto))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "If --empty=create is specified, --size= must be specified, too.");
+
+ arg_node = argc > optind ? argv[optind] : NULL;
+
+ if (IN_SET(arg_empty, EMPTY_FORCE, EMPTY_REQUIRE, EMPTY_CREATE) && !arg_node)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "A path to a device node or loopback file must be specified when --empty=force, --empty=require or --empty=create are used.");
+
+ return 1;
+}
+
+static int parse_proc_cmdline_factory_reset(void) {
+ bool b;
+ int r;
+
+ if (arg_factory_reset >= 0) /* Never override what is specified on the process command line */
+ return 0;
+
+ if (!in_initrd()) /* Never honour kernel command line factory reset request outside of the initrd */
+ return 0;
+
+ r = proc_cmdline_get_bool("systemd.factory_reset", &b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse systemd.factory_reset kernel command line argument: %m");
+ if (r > 0) {
+ arg_factory_reset = b;
+
+ if (b)
+ log_notice("Honouring factory reset requested via kernel command line.");
+ }
+
+ return 0;
+}
+
+static int parse_efi_variable_factory_reset(void) {
+ _cleanup_free_ char *value = NULL;
+ int r;
+
+ if (arg_factory_reset >= 0) /* Never override what is specified on the process command line */
+ return 0;
+
+ if (!in_initrd()) /* Never honour EFI variable factory reset request outside of the initrd */
+ return 0;
+
+ r = efi_get_variable_string(EFI_VENDOR_SYSTEMD, "FactoryReset", &value);
+ if (r == -ENOENT || ERRNO_IS_NOT_SUPPORTED(r))
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to read EFI variable FactoryReset: %m");
+
+ r = parse_boolean(value);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse EFI variable FactoryReset: %m");
+
+ arg_factory_reset = r;
+ if (r)
+ log_notice("Honouring factory reset requested via EFI variable FactoryReset: %m");
+
+ return 0;
+}
+
+static int remove_efi_variable_factory_reset(void) {
+ int r;
+
+ r = efi_set_variable(EFI_VENDOR_SYSTEMD, "FactoryReset", NULL, 0);
+ if (r == -ENOENT || ERRNO_IS_NOT_SUPPORTED(r))
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to remove EFI variable FactoryReset: %m");
+
+ log_info("Successfully unset EFI variable FactoryReset.");
+ return 0;
+}
+
+static int acquire_root_devno(const char *p, int mode, char **ret, int *ret_fd) {
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+ dev_t devno, fd_devno = (mode_t) -1;
+ int r;
+
+ assert(p);
+ assert(ret);
+ assert(ret_fd);
+
+ fd = open(p, mode);
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (S_ISREG(st.st_mode)) {
+ char *s;
+
+ s = strdup(p);
+ if (!s)
+ return log_oom();
+
+ *ret = s;
+ *ret_fd = TAKE_FD(fd);
+
+ return 0;
+ }
+
+ if (S_ISBLK(st.st_mode))
+ fd_devno = devno = st.st_rdev;
+ else if (S_ISDIR(st.st_mode)) {
+
+ devno = st.st_dev;
+ if (major(devno) == 0) {
+ r = btrfs_get_block_device_fd(fd, &devno);
+ if (r == -ENOTTY) /* not btrfs */
+ return -ENODEV;
+ if (r < 0)
+ return r;
+ }
+ } else
+ return -ENOTBLK;
+
+ /* From dm-crypt to backing partition */
+ r = block_get_originating(devno, &devno);
+ if (r < 0)
+ log_debug_errno(r, "Failed to find underlying block device for '%s', ignoring: %m", p);
+
+ /* From partition to whole disk containing it */
+ r = block_get_whole_disk(devno, &devno);
+ if (r < 0)
+ log_debug_errno(r, "Failed to find whole disk block device for '%s', ignoring: %m", p);
+
+ r = device_path_make_canonical(S_IFBLK, devno, ret);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to determine canonical path for '%s': %m", p);
+
+ /* Only if we still lock at the same block device we can reuse the fd. Otherwise return an
+ * invalidated fd. */
+ *ret_fd = fd_devno != (mode_t) -1 && fd_devno == devno ? TAKE_FD(fd) : -1;
+ return 0;
+}
+
+static int find_root(char **ret, int *ret_fd) {
+ const char *t;
+ int r;
+
+ assert(ret);
+ assert(ret_fd);
+
+ if (arg_node) {
+ if (arg_empty == EMPTY_CREATE) {
+ _cleanup_close_ int fd = -1;
+ _cleanup_free_ char *s = NULL;
+
+ s = strdup(arg_node);
+ if (!s)
+ return log_oom();
+
+ fd = open(arg_node, O_RDONLY|O_CREAT|O_EXCL|O_CLOEXEC|O_NOFOLLOW, 0666);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to create '%s': %m", arg_node);
+
+ *ret = TAKE_PTR(s);
+ *ret_fd = TAKE_FD(fd);
+ return 0;
+ }
+
+ r = acquire_root_devno(arg_node, O_RDONLY|O_CLOEXEC, ret, ret_fd);
+ if (r == -EUCLEAN)
+ return btrfs_log_dev_root(LOG_ERR, r, arg_node);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open file or determine backing device of %s: %m", arg_node);
+
+ return 0;
+ }
+
+ assert(IN_SET(arg_empty, EMPTY_REFUSE, EMPTY_ALLOW));
+
+ /* Let's search for the root device. We look for two cases here: first in /, and then in /usr. The
+ * latter we check for cases where / is a tmpfs and only /usr is an actual persistent block device
+ * (think: volatile setups) */
+
+ FOREACH_STRING(t, "/", "/usr") {
+ _cleanup_free_ char *j = NULL;
+ const char *p;
+
+ if (in_initrd()) {
+ j = path_join("/sysroot", t);
+ if (!j)
+ return log_oom();
+
+ p = j;
+ } else
+ p = t;
+
+ r = acquire_root_devno(p, O_RDONLY|O_DIRECTORY|O_CLOEXEC, ret, ret_fd);
+ if (r < 0) {
+ if (r == -EUCLEAN)
+ return btrfs_log_dev_root(LOG_ERR, r, p);
+ if (r != -ENODEV)
+ return log_error_errno(r, "Failed to determine backing device of %s: %m", p);
+ } else
+ return 0;
+ }
+
+ return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "Failed to discover root block device.");
+}
+
+static int resize_backing_fd(const char *node, int *fd) {
+ char buf1[FORMAT_BYTES_MAX], buf2[FORMAT_BYTES_MAX];
+ _cleanup_close_ int writable_fd = -1;
+ struct stat st;
+ int r;
+
+ assert(node);
+ assert(fd);
+
+ if (arg_size == UINT64_MAX) /* Nothing to do */
+ return 0;
+
+ if (*fd < 0) {
+ /* Open the file if we haven't opened it yet. Note that we open it read-only here, just to
+ * keep a reference to the file we can pass around. */
+ *fd = open(node, O_RDONLY|O_CLOEXEC);
+ if (*fd < 0)
+ return log_error_errno(errno, "Failed to open '%s' in order to adjust size: %m", node);
+ }
+
+ if (fstat(*fd, &st) < 0)
+ return log_error_errno(errno, "Failed to stat '%s': %m", node);
+
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ return log_error_errno(r, "Specified path '%s' is not a regular file, cannot resize: %m", node);
+
+ assert_se(format_bytes(buf1, sizeof(buf1), st.st_size));
+ assert_se(format_bytes(buf2, sizeof(buf2), arg_size));
+
+ if ((uint64_t) st.st_size >= arg_size) {
+ log_info("File '%s' already is of requested size or larger, not growing. (%s >= %s)", node, buf1, buf2);
+ return 0;
+ }
+
+ /* The file descriptor is read-only. In order to grow the file we need to have a writable fd. We
+ * reopen the file for that temporarily. We keep the writable fd only open for this operation though,
+ * as fdisk can't accept it anyway. */
+
+ writable_fd = fd_reopen(*fd, O_WRONLY|O_CLOEXEC);
+ if (writable_fd < 0)
+ return log_error_errno(writable_fd, "Failed to reopen backing file '%s' writable: %m", node);
+
+ if (!arg_discard) {
+ if (fallocate(writable_fd, 0, 0, arg_size) < 0) {
+ if (!ERRNO_IS_NOT_SUPPORTED(errno))
+ return log_error_errno(errno, "Failed to grow '%s' from %s to %s by allocation: %m",
+ node, buf1, buf2);
+
+ /* Fallback to truncation, if fallocate() is not supported. */
+ log_debug("Backing file system does not support fallocate(), falling back to ftruncate().");
+ } else {
+ if (st.st_size == 0) /* Likely regular file just created by us */
+ log_info("Allocated %s for '%s'.", buf2, node);
+ else
+ log_info("File '%s' grown from %s to %s by allocation.", node, buf1, buf2);
+
+ return 1;
+ }
+ }
+
+ if (ftruncate(writable_fd, arg_size) < 0)
+ return log_error_errno(errno, "Failed to grow '%s' from %s to %s by truncation: %m",
+ node, buf1, buf2);
+
+ if (st.st_size == 0) /* Likely regular file just created by us */
+ log_info("Sized '%s' to %s.", node, buf2);
+ else
+ log_info("File '%s' grown from %s to %s by truncation.", node, buf1, buf2);
+
+ return 1;
+}
+
+static int determine_auto_size(Context *c) {
+ uint64_t sum = round_up_size(GPT_METADATA_SIZE, 4096);
+ char buf[FORMAT_BYTES_MAX];
+ Partition *p;
+
+ assert_se(c);
+ assert_se(arg_size == UINT64_MAX);
+ assert_se(arg_size_auto);
+
+ LIST_FOREACH(partitions, p, c->partitions) {
+ uint64_t m;
+
+ if (p->dropped)
+ continue;
+
+ m = partition_min_size_with_padding(p);
+ if (m > UINT64_MAX - sum)
+ return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Image would grow too large, refusing.");
+
+ sum += m;
+ }
+
+ assert_se(format_bytes(buf, sizeof(buf), sum));
+ log_info("Automatically determined minimal disk image size as %s.", buf);
+
+ arg_size = sum;
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(context_freep) Context* context = NULL;
+ _cleanup_free_ char *node = NULL;
+ _cleanup_close_ int backing_fd = -1;
+ bool from_scratch;
+ int r;
+
+ log_show_color(true);
+ log_parse_environment();
+ log_open();
+
+ if (in_initrd()) {
+ /* Default to operation on /sysroot when invoked in the initrd! */
+ arg_root = strdup("/sysroot");
+ if (!arg_root)
+ return log_oom();
+ }
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = parse_proc_cmdline_factory_reset();
+ if (r < 0)
+ return r;
+
+ r = parse_efi_variable_factory_reset();
+ if (r < 0)
+ return r;
+
+ context = context_new(arg_seed);
+ if (!context)
+ return log_oom();
+
+ r = context_read_definitions(context, arg_definitions, arg_root);
+ if (r < 0)
+ return r;
+
+ if (context->n_partitions <= 0 && arg_empty == EMPTY_REFUSE) {
+ log_info("Didn't find any partition definition files, nothing to do.");
+ return 0;
+ }
+
+ r = find_root(&node, &backing_fd);
+ if (r < 0)
+ return r;
+
+ if (arg_size != UINT64_MAX) {
+ r = resize_backing_fd(node, &backing_fd);
+ if (r < 0)
+ return r;
+ }
+
+ r = context_load_partition_table(context, node, &backing_fd);
+ if (r == -EHWPOISON)
+ return 77; /* Special return value which means "Not GPT, so not doing anything". This isn't
+ * really an error when called at boot. */
+ if (r < 0)
+ return r;
+ from_scratch = r > 0; /* Starting from scratch */
+
+ if (arg_can_factory_reset) {
+ r = context_can_factory_reset(context);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return EXIT_FAILURE;
+
+ return 0;
+ }
+
+ r = context_factory_reset(context, from_scratch);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* We actually did a factory reset! */
+ r = remove_efi_variable_factory_reset();
+ if (r < 0)
+ return r;
+
+ /* Reload the reduced partition table */
+ context_unload_partition_table(context);
+ r = context_load_partition_table(context, node, &backing_fd);
+ if (r < 0)
+ return r;
+ }
+
+#if 0
+ (void) context_dump_partitions(context, node);
+ putchar('\n');
+#endif
+
+ r = context_read_seed(context, arg_root);
+ if (r < 0)
+ return r;
+
+ /* Open all files to copy blocks from now, since we want to take their size into consideration */
+ r = context_open_copy_block_paths(context);
+ if (r < 0)
+ return r;
+
+ if (arg_size_auto) {
+ r = determine_auto_size(context);
+ if (r < 0)
+ return r;
+
+ /* Flush out everything again, and let's grow the file first, then start fresh */
+ context_unload_partition_table(context);
+
+ assert_se(arg_size != UINT64_MAX);
+ r = resize_backing_fd(node, &backing_fd);
+ if (r < 0)
+ return r;
+
+ r = context_load_partition_table(context, node, &backing_fd);
+ if (r < 0)
+ return r;
+ }
+
+ /* First try to fit new partitions in, dropping by priority until it fits */
+ for (;;) {
+ if (context_allocate_partitions(context))
+ break; /* Success! */
+
+ if (!context_drop_one_priority(context))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOSPC),
+ "Can't fit requested partitions into free space, refusing.");
+ }
+
+ /* Now assign free space according to the weight logic */
+ r = context_grow_partitions(context);
+ if (r < 0)
+ return r;
+
+ /* Now calculate where each partition gets placed */
+ context_place_partitions(context);
+
+ /* Make sure each partition has a unique UUID and unique label */
+ r = context_acquire_partition_uuids_and_labels(context);
+ if (r < 0)
+ return r;
+
+ r = context_write_partition_table(context, node, from_scratch);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/partition/test-repart.sh b/src/partition/test-repart.sh
new file mode 100755
index 0000000..9af3049
--- /dev/null
+++ b/src/partition/test-repart.sh
@@ -0,0 +1,213 @@
+#!/usr/bin/env bash
+set -ex
+
+[[ -f /dev/loop-control ]] || exit 77
+
+repart=$1
+test -x $repart
+
+D=$(mktemp --tmpdir --directory "test-repart.XXXXXXXXXX")
+trap "rm -rf '$D'" EXIT INT QUIT PIPE
+mkdir -p $D/definitions
+
+SEED=e2a40bf9-73f1-4278-9160-49c031e7aef8
+
+echo "### Testing systemd-repart --empty=create ###"
+
+$repart $D/zzz --empty=create --size=1G --seed=$SEED
+
+sfdisk -d $D/zzz | grep -v -e 'sector-size' -e '^$' >$D/empty
+
+cmp $D/empty - <<EOF
+label: gpt
+label-id: EF7F7EE2-47B3-4251-B1A1-09EA8BF12D5D
+device: $D/zzz
+unit: sectors
+first-lba: 2048
+last-lba: 2097118
+EOF
+
+echo "### Testing with root, root2, home, & swap ###"
+
+cat >$D/definitions/root.conf <<EOF
+[Partition]
+Type=root-x86-64
+EOF
+
+ln -s root.conf $D/definitions/root2.conf
+
+cat >$D/definitions/home.conf <<EOF
+[Partition]
+Type=home
+Label=home-first
+Label=home-always-too-long-xxxxxxxxxxxxxx-%v
+EOF
+
+cat >$D/definitions/swap.conf <<EOF
+[Partition]
+Type=swap
+SizeMaxBytes=64M
+PaddingMinBytes=92M
+EOF
+
+$repart $D/zzz --dry-run=no --seed=$SEED --definitions=$D/definitions
+
+sfdisk -d $D/zzz | grep -v -e 'sector-size' -e '^$' >$D/populated
+
+cmp $D/populated - <<EOF
+label: gpt
+label-id: EF7F7EE2-47B3-4251-B1A1-09EA8BF12D5D
+device: $D/zzz
+unit: sectors
+first-lba: 2048
+last-lba: 2097118
+$D/zzz1 : start= 2048, size= 591856, type=933AC7E1-2EB4-4F13-B844-0E14E2AEF915, uuid=A6005774-F558-4330-A8E5-D6D2C01C01D6, name="home-first"
+$D/zzz2 : start= 593904, size= 591856, type=4F68BCE3-E8CD-4DB1-96E7-FBCAF984B709, uuid=CE9C76EB-A8F1-40FF-813C-11DCA6C0A55B, name="root-x86-64"
+$D/zzz3 : start= 1185760, size= 591864, type=4F68BCE3-E8CD-4DB1-96E7-FBCAF984B709, uuid=AC60A837-550C-43BD-B5C4-9CB73B884E79, name="root-x86-64-2"
+$D/zzz4 : start= 1777624, size= 131072, type=0657FD6D-A4AB-43C4-84E5-0933C84B4F4F, uuid=2AA78CDB-59C7-4173-AF11-C7453737A5D1, name="swap"
+EOF
+
+echo "### Testing with root, root2, home, swap, & another partition ###"
+
+cat >$D/definitions/swap.conf <<EOF
+[Partition]
+Type=swap
+SizeMaxBytes=64M
+EOF
+
+cat >$D/definitions/extra.conf <<EOF
+[Partition]
+Type=linux-generic
+Label=custom_label
+UUID=a0a1a2a3a4a5a6a7a8a9aaabacadaeaf
+EOF
+
+echo "Label=ignored_label" >>$D/definitions/home.conf
+echo "UUID=b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" >>$D/definitions/home.conf
+
+$repart $D/zzz --dry-run=no --seed=$SEED --definitions=$D/definitions
+
+sfdisk -d $D/zzz | grep -v -e 'sector-size' -e '^$' >$D/populated2
+
+cmp $D/populated2 - <<EOF
+label: gpt
+label-id: EF7F7EE2-47B3-4251-B1A1-09EA8BF12D5D
+device: $D/zzz
+unit: sectors
+first-lba: 2048
+last-lba: 2097118
+$D/zzz1 : start= 2048, size= 591856, type=933AC7E1-2EB4-4F13-B844-0E14E2AEF915, uuid=A6005774-F558-4330-A8E5-D6D2C01C01D6, name="home-first"
+$D/zzz2 : start= 593904, size= 591856, type=4F68BCE3-E8CD-4DB1-96E7-FBCAF984B709, uuid=CE9C76EB-A8F1-40FF-813C-11DCA6C0A55B, name="root-x86-64"
+$D/zzz3 : start= 1185760, size= 591864, type=4F68BCE3-E8CD-4DB1-96E7-FBCAF984B709, uuid=AC60A837-550C-43BD-B5C4-9CB73B884E79, name="root-x86-64-2"
+$D/zzz4 : start= 1777624, size= 131072, type=0657FD6D-A4AB-43C4-84E5-0933C84B4F4F, uuid=2AA78CDB-59C7-4173-AF11-C7453737A5D1, name="swap"
+$D/zzz5 : start= 1908696, size= 188416, type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, uuid=A0A1A2A3-A4A5-A6A7-A8A9-AAABACADAEAF, name="custom_label"
+EOF
+
+echo "### Resizing to 2G ###"
+
+$repart $D/zzz --size=2G --dry-run=no --seed=$SEED --definitions=$D/definitions
+
+sfdisk -d $D/zzz | grep -v -e 'sector-size' -e '^$' >$D/populated3
+
+cmp $D/populated3 - <<EOF
+label: gpt
+label-id: EF7F7EE2-47B3-4251-B1A1-09EA8BF12D5D
+device: $D/zzz
+unit: sectors
+first-lba: 2048
+last-lba: 4194270
+$D/zzz1 : start= 2048, size= 591856, type=933AC7E1-2EB4-4F13-B844-0E14E2AEF915, uuid=A6005774-F558-4330-A8E5-D6D2C01C01D6, name="home-first"
+$D/zzz2 : start= 593904, size= 591856, type=4F68BCE3-E8CD-4DB1-96E7-FBCAF984B709, uuid=CE9C76EB-A8F1-40FF-813C-11DCA6C0A55B, name="root-x86-64"
+$D/zzz3 : start= 1185760, size= 591864, type=4F68BCE3-E8CD-4DB1-96E7-FBCAF984B709, uuid=AC60A837-550C-43BD-B5C4-9CB73B884E79, name="root-x86-64-2"
+$D/zzz4 : start= 1777624, size= 131072, type=0657FD6D-A4AB-43C4-84E5-0933C84B4F4F, uuid=2AA78CDB-59C7-4173-AF11-C7453737A5D1, name="swap"
+$D/zzz5 : start= 1908696, size= 2285568, type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, uuid=A0A1A2A3-A4A5-A6A7-A8A9-AAABACADAEAF, name="custom_label"
+EOF
+
+dd if=/dev/urandom of=$D/block-copy bs=4096 count=10240
+
+echo "### Testing with root, root2, home, swap, another partition, & partition copy ###"
+
+cat >$D/definitions/extra2.conf <<EOF
+[Partition]
+Type=linux-generic
+Label=block-copy
+UUID=2a1d97e1d0a346cca26eadc643926617
+CopyBlocks=$D/block-copy
+EOF
+
+$repart $D/zzz --size=3G --dry-run=no --seed=$SEED --definitions=$D/definitions
+
+sfdisk -d $D/zzz | grep -v -e 'sector-size' -e '^$' >$D/populated4
+
+cmp $D/populated4 - <<EOF
+label: gpt
+label-id: EF7F7EE2-47B3-4251-B1A1-09EA8BF12D5D
+device: $D/zzz
+unit: sectors
+first-lba: 2048
+last-lba: 6291422
+$D/zzz1 : start= 2048, size= 591856, type=933AC7E1-2EB4-4F13-B844-0E14E2AEF915, uuid=A6005774-F558-4330-A8E5-D6D2C01C01D6, name="home-first"
+$D/zzz2 : start= 593904, size= 591856, type=4F68BCE3-E8CD-4DB1-96E7-FBCAF984B709, uuid=CE9C76EB-A8F1-40FF-813C-11DCA6C0A55B, name="root-x86-64"
+$D/zzz3 : start= 1185760, size= 591864, type=4F68BCE3-E8CD-4DB1-96E7-FBCAF984B709, uuid=AC60A837-550C-43BD-B5C4-9CB73B884E79, name="root-x86-64-2"
+$D/zzz4 : start= 1777624, size= 131072, type=0657FD6D-A4AB-43C4-84E5-0933C84B4F4F, uuid=2AA78CDB-59C7-4173-AF11-C7453737A5D1, name="swap"
+$D/zzz5 : start= 1908696, size= 2285568, type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, uuid=A0A1A2A3-A4A5-A6A7-A8A9-AAABACADAEAF, name="custom_label"
+$D/zzz6 : start= 4194264, size= 2097152, type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, uuid=2A1D97E1-D0A3-46CC-A26E-ADC643926617, name="block-copy"
+EOF
+
+cmp --bytes=41943040 --ignore-initial=0:$((512*4194264)) $D/block-copy $D/zzz
+
+if [ `id -u` == 0 ] && type -P cryptsetup diff losetup > /dev/null ; then
+ echo "### Testing Format=/Encrypt=/CopyFiles="
+
+ # These tests require privileges unfortunately
+
+ cat >$D/definitions/extra3.conf <<EOF
+[Partition]
+Type=linux-generic
+Label=luks-format-copy
+UUID=7b93d1f2-595d-4ce3-b0b9-837fbd9e63b0
+Format=ext4
+Encrypt=yes
+CopyFiles=$D/definitions:/def
+SizeMinBytes=48M
+EOF
+
+ $repart $D/zzz --size=auto --dry-run=no --seed=$SEED --definitions=$D/definitions
+
+ sfdisk -d $D/zzz | grep -v -e 'sector-size' -e '^$' >$D/populated5
+
+ cmp $D/populated5 - <<EOF
+label: gpt
+label-id: EF7F7EE2-47B3-4251-B1A1-09EA8BF12D5D
+device: $D/zzz
+unit: sectors
+first-lba: 2048
+last-lba: 6389726
+$D/zzz1 : start= 2048, size= 591856, type=933AC7E1-2EB4-4F13-B844-0E14E2AEF915, uuid=A6005774-F558-4330-A8E5-D6D2C01C01D6, name="home-first"
+$D/zzz2 : start= 593904, size= 591856, type=4F68BCE3-E8CD-4DB1-96E7-FBCAF984B709, uuid=CE9C76EB-A8F1-40FF-813C-11DCA6C0A55B, name="root-x86-64"
+$D/zzz3 : start= 1185760, size= 591864, type=4F68BCE3-E8CD-4DB1-96E7-FBCAF984B709, uuid=AC60A837-550C-43BD-B5C4-9CB73B884E79, name="root-x86-64-2"
+$D/zzz4 : start= 1777624, size= 131072, type=0657FD6D-A4AB-43C4-84E5-0933C84B4F4F, uuid=2AA78CDB-59C7-4173-AF11-C7453737A5D1, name="swap"
+$D/zzz5 : start= 1908696, size= 2285568, type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, uuid=A0A1A2A3-A4A5-A6A7-A8A9-AAABACADAEAF, name="custom_label"
+$D/zzz6 : start= 4194264, size= 2097152, type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, uuid=2A1D97E1-D0A3-46CC-A26E-ADC643926617, name="block-copy"
+$D/zzz7 : start= 6291416, size= 98304, type=0FC63DAF-8483-4772-8E79-3D69D8477DE4, uuid=7B93D1F2-595D-4CE3-B0B9-837FBD9E63B0, name="luks-format-copy"
+EOF
+
+ LOOP=`losetup -P --show --find $D/zzz`
+ VOLUME=test-repart-$RANDOM
+
+ touch $D/empty-password
+ cryptsetup open --type=luks2 --key-file=$D/empty-password ${LOOP}p7 $VOLUME
+ mkdir $D/mount
+ mount -t ext4 /dev/mapper/$VOLUME $D/mount
+ diff -r $D/mount/def $D/definitions > /dev/null
+ umount $D/mount
+ cryptsetup close $VOLUME
+ losetup -d $LOOP
+else
+ echo "### Skipping Format=/Encrypt=/CopyFiles= test, lacking privileges or missing cryptsetup/diff/losetup"
+fi
+
+echo "### Testing json output ###"
+$repart $D/zzz --size=3G --dry-run=no --seed=$SEED --definitions=$D/definitions --json=help
+$repart $D/zzz --size=3G --dry-run=no --seed=$SEED --definitions=$D/definitions --json=pretty
+$repart $D/zzz --size=3G --dry-run=no --seed=$SEED --definitions=$D/definitions --json=short
diff --git a/src/path/path.c b/src/path/path.c
new file mode 100644
index 0000000..5f1bb14
--- /dev/null
+++ b/src/path/path.c
@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "sd-path.h"
+
+#include "alloc-util.h"
+#include "log.h"
+#include "macro.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "util.h"
+
+static const char *arg_suffix = NULL;
+
+static const char* const path_table[_SD_PATH_MAX] = {
+ [SD_PATH_TEMPORARY] = "temporary",
+ [SD_PATH_TEMPORARY_LARGE] = "temporary-large",
+ [SD_PATH_SYSTEM_BINARIES] = "system-binaries",
+ [SD_PATH_SYSTEM_INCLUDE] = "system-include",
+ [SD_PATH_SYSTEM_LIBRARY_PRIVATE] = "system-library-private",
+ [SD_PATH_SYSTEM_LIBRARY_ARCH] = "system-library-arch",
+ [SD_PATH_SYSTEM_SHARED] = "system-shared",
+ [SD_PATH_SYSTEM_CONFIGURATION_FACTORY] = "system-configuration-factory",
+ [SD_PATH_SYSTEM_STATE_FACTORY] = "system-state-factory",
+ [SD_PATH_SYSTEM_CONFIGURATION] = "system-configuration",
+ [SD_PATH_SYSTEM_RUNTIME] = "system-runtime",
+ [SD_PATH_SYSTEM_RUNTIME_LOGS] = "system-runtime-logs",
+ [SD_PATH_SYSTEM_STATE_PRIVATE] = "system-state-private",
+ [SD_PATH_SYSTEM_STATE_LOGS] = "system-state-logs",
+ [SD_PATH_SYSTEM_STATE_CACHE] = "system-state-cache",
+ [SD_PATH_SYSTEM_STATE_SPOOL] = "system-state-spool",
+ [SD_PATH_USER_BINARIES] = "user-binaries",
+ [SD_PATH_USER_LIBRARY_PRIVATE] = "user-library-private",
+ [SD_PATH_USER_LIBRARY_ARCH] = "user-library-arch",
+ [SD_PATH_USER_SHARED] = "user-shared",
+ [SD_PATH_USER_CONFIGURATION] = "user-configuration",
+ [SD_PATH_USER_RUNTIME] = "user-runtime",
+ [SD_PATH_USER_STATE_CACHE] = "user-state-cache",
+ [SD_PATH_USER] = "user",
+ [SD_PATH_USER_DOCUMENTS] = "user-documents",
+ [SD_PATH_USER_MUSIC] = "user-music",
+ [SD_PATH_USER_PICTURES] = "user-pictures",
+ [SD_PATH_USER_VIDEOS] = "user-videos",
+ [SD_PATH_USER_DOWNLOAD] = "user-download",
+ [SD_PATH_USER_PUBLIC] = "user-public",
+ [SD_PATH_USER_TEMPLATES] = "user-templates",
+ [SD_PATH_USER_DESKTOP] = "user-desktop",
+ [SD_PATH_SEARCH_BINARIES] = "search-binaries",
+ [SD_PATH_SEARCH_BINARIES_DEFAULT] = "search-binaries-default",
+ [SD_PATH_SEARCH_LIBRARY_PRIVATE] = "search-library-private",
+ [SD_PATH_SEARCH_LIBRARY_ARCH] = "search-library-arch",
+ [SD_PATH_SEARCH_SHARED] = "search-shared",
+ [SD_PATH_SEARCH_CONFIGURATION_FACTORY] = "search-configuration-factory",
+ [SD_PATH_SEARCH_STATE_FACTORY] = "search-state-factory",
+ [SD_PATH_SEARCH_CONFIGURATION] = "search-configuration",
+
+ [SD_PATH_SYSTEMD_UTIL] = "systemd-util",
+ [SD_PATH_SYSTEMD_SYSTEM_UNIT] = "systemd-system-unit",
+ [SD_PATH_SYSTEMD_SYSTEM_PRESET] = "systemd-system-preset",
+ [SD_PATH_SYSTEMD_SYSTEM_CONF] = "systemd-system-conf",
+ [SD_PATH_SYSTEMD_SEARCH_SYSTEM_UNIT] = "systemd-search-system-unit",
+ [SD_PATH_SYSTEMD_SYSTEM_GENERATOR] = "systemd-system-generator",
+ [SD_PATH_SYSTEMD_SEARCH_SYSTEM_GENERATOR] = "systemd-search-system-generator",
+ [SD_PATH_SYSTEMD_USER_UNIT] = "systemd-user-unit",
+ [SD_PATH_SYSTEMD_USER_PRESET] = "systemd-user-preset",
+ [SD_PATH_SYSTEMD_USER_CONF] = "systemd-user-conf",
+ [SD_PATH_SYSTEMD_SEARCH_USER_UNIT] = "systemd-search-user-unit",
+ [SD_PATH_SYSTEMD_SEARCH_USER_GENERATOR] = "systemd-search-user-generator",
+ [SD_PATH_SYSTEMD_USER_GENERATOR] = "systemd-user-generator",
+ [SD_PATH_SYSTEMD_SLEEP] = "systemd-sleep",
+ [SD_PATH_SYSTEMD_SHUTDOWN] = "systemd-shutdown",
+
+ [SD_PATH_TMPFILES] = "tmpfiles",
+ [SD_PATH_SYSUSERS] = "sysusers",
+ [SD_PATH_SYSCTL] = "sysctl",
+ [SD_PATH_BINFMT] = "binfmt",
+ [SD_PATH_MODULES_LOAD] = "modules-load",
+ [SD_PATH_CATALOG] = "catalog",
+
+ [SD_PATH_SYSTEMD_SEARCH_NETWORK] = "systemd-search-network",
+};
+
+static int list_homes(void) {
+ uint64_t i = 0;
+ int r = 0;
+
+ for (i = 0; i < ELEMENTSOF(path_table); i++) {
+ _cleanup_free_ char *p = NULL;
+ int q;
+
+ q = sd_path_lookup(i, arg_suffix, &p);
+ if (q < 0) {
+ log_full_errno(q == -ENXIO ? LOG_DEBUG : LOG_ERR,
+ q, "Failed to query %s: %m", path_table[i]);
+ if (q != -ENXIO)
+ r = q;
+ continue;
+ }
+
+ printf("%s: %s\n", path_table[i], p);
+ }
+
+ return r;
+}
+
+static int print_home(const char *n) {
+ uint64_t i = 0;
+ int r;
+
+ for (i = 0; i < ELEMENTSOF(path_table); i++) {
+ if (streq(path_table[i], n)) {
+ _cleanup_free_ char *p = NULL;
+
+ r = sd_path_lookup(i, arg_suffix, &p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query %s: %m", n);
+
+ printf("%s\n", p);
+ return 0;
+ }
+ }
+
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Path %s not known.", n);
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-path", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [NAME...]\n\n"
+ "Show system and user paths.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --suffix=SUFFIX Suffix to append to paths\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_SUFFIX,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "suffix", required_argument, NULL, ARG_SUFFIX },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_SUFFIX:
+ arg_suffix = optarg;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int run(int argc, char* argv[]) {
+ int r;
+
+ log_show_color(true);
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (argc > optind) {
+ int i, q;
+
+ for (i = optind; i < argc; i++) {
+ q = print_home(argv[i]);
+ if (q < 0)
+ r = q;
+ }
+
+ return r;
+ } else
+ return list_homes();
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/portable/meson.build b/src/portable/meson.build
new file mode 100644
index 0000000..8f866f5
--- /dev/null
+++ b/src/portable/meson.build
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+systemd_portabled_sources = files('''
+ portable.c
+ portable.h
+ portabled-bus.c
+ portabled-image-bus.c
+ portabled-image-bus.h
+ portabled-image.c
+ portabled-image.h
+ portabled-operation.c
+ portabled-operation.h
+ portabled.c
+ portabled.h
+'''.split())
+
+if conf.get('ENABLE_PORTABLED') == 1
+ install_data('org.freedesktop.portable1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.portable1.service',
+ install_dir : dbussystemservicedir)
+ install_data('org.freedesktop.portable1.policy',
+ install_dir : polkitpolicydir)
+
+ install_data('profile/default/service.conf', install_dir : join_paths(profiledir, 'default'))
+ install_data('profile/nonetwork/service.conf', install_dir : join_paths(profiledir, 'nonetwork'))
+ install_data('profile/strict/service.conf', install_dir : join_paths(profiledir, 'strict'))
+ install_data('profile/trusted/service.conf', install_dir : join_paths(profiledir, 'trusted'))
+endif
diff --git a/src/portable/org.freedesktop.portable1.conf b/src/portable/org.freedesktop.portable1.conf
new file mode 100644
index 0000000..1343e1d
--- /dev/null
+++ b/src/portable/org.freedesktop.portable1.conf
@@ -0,0 +1,117 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!-- SPDX-License-Identifier: LGPL-2.1-or-later -->
+
+<busconfig>
+
+ <policy user="root">
+ <allow own="org.freedesktop.portable1"/>
+ <allow send_destination="org.freedesktop.portable1"/>
+ <allow receive_sender="org.freedesktop.portable1"/>
+ </policy>
+
+ <policy context="default">
+ <deny send_destination="org.freedesktop.portable1"/>
+
+ <!-- generic interfaces -->
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.DBus.Introspectable"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.DBus.Peer"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="Get"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="GetAll"/>
+
+ <!-- Manager object -->
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="GetImage"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="ListImages"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="GetImageOSRelease"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="GetImageMetadata"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="GetImageState"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="AttachImage"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="DetachImage"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="RemoveImage"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="MarkImageReadOnly"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="SetImageLimit"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Manager"
+ send_member="SetPoolLimit"/>
+
+ <!-- Image object -->
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Image"
+ send_member="GetOSRelease"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Image"
+ send_member="GetMetadata"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Image"
+ send_member="GetState"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Image"
+ send_member="Attach"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Image"
+ send_member="Detach"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Image"
+ send_member="Remove"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Image"
+ send_member="MarkReadOnly"/>
+
+ <allow send_destination="org.freedesktop.portable1"
+ send_interface="org.freedesktop.portable1.Image"
+ send_member="SetLimit"/>
+
+ <allow receive_sender="org.freedesktop.portable1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/portable/org.freedesktop.portable1.policy b/src/portable/org.freedesktop.portable1.policy
new file mode 100644
index 0000000..17e22b0
--- /dev/null
+++ b/src/portable/org.freedesktop.portable1.policy
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!-- SPDX-License-Identifier: LGPL-2.1-or-later -->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.portable1.inspect-images">
+ <description gettext-domain="systemd">Inspect a portable service image</description>
+ <message gettext-domain="systemd">Authentication is required to inspect a portable service image.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.portable1.attach-images">
+ <description gettext-domain="systemd">Attach or detach a portable service image</description>
+ <message gettext-domain="systemd">Authentication is required to attach or detach a portable service image.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.systemd1.reload-daemon</annotate>
+ </action>
+
+ <action id="org.freedesktop.portable1.manage-images">
+ <description gettext-domain="systemd">Delete or modify portable service image</description>
+ <message gettext-domain="systemd">Authentication is required to delete or modify a portable service image.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+</policyconfig>
diff --git a/src/portable/org.freedesktop.portable1.service b/src/portable/org.freedesktop.portable1.service
new file mode 100644
index 0000000..873746e
--- /dev/null
+++ b/src/portable/org.freedesktop.portable1.service
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+[D-BUS Service]
+Name=org.freedesktop.portable1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.portable1.service
diff --git a/src/portable/portable.c b/src/portable/portable.c
new file mode 100644
index 0000000..ed7eac0
--- /dev/null
+++ b/src/portable/portable.c
@@ -0,0 +1,1426 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/loop.h>
+
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "conf-files.h"
+#include "copy.h"
+#include "def.h"
+#include "dirent-util.h"
+#include "dissect-image.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "install.h"
+#include "io-util.h"
+#include "locale-util.h"
+#include "loop-util.h"
+#include "machine-image.h"
+#include "mkdir.h"
+#include "nulstr-util.h"
+#include "os-util.h"
+#include "path-lookup.h"
+#include "portable.h"
+#include "process-util.h"
+#include "set.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "sort-util.h"
+#include "string-table.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+
+static const char profile_dirs[] = CONF_PATHS_NULSTR("systemd/portable/profile");
+
+/* Markers used in the first line of our 20-portable.conf unit file drop-in to determine, that a) the unit file was
+ * dropped there by the portable service logic and b) for which image it was dropped there. */
+#define PORTABLE_DROPIN_MARKER_BEGIN "# Drop-in created for image '"
+#define PORTABLE_DROPIN_MARKER_END "', do not edit."
+
+static bool prefix_match(const char *unit, const char *prefix) {
+ const char *p;
+
+ p = startswith(unit, prefix);
+ if (!p)
+ return false;
+
+ /* Only respect prefixes followed by dash or dot or when there's a complete match */
+ return IN_SET(*p, '-', '.', '@', 0);
+}
+
+static bool unit_match(const char *unit, char **matches) {
+ const char *dot;
+ char **i;
+
+ dot = strrchr(unit, '.');
+ if (!dot)
+ return false;
+
+ if (!STR_IN_SET(dot, ".service", ".socket", ".target", ".timer", ".path"))
+ return false;
+
+ /* Empty match expression means: everything */
+ if (strv_isempty(matches))
+ return true;
+
+ /* Otherwise, at least one needs to match */
+ STRV_FOREACH(i, matches)
+ if (prefix_match(unit, *i))
+ return true;
+
+ return false;
+}
+
+static PortableMetadata *portable_metadata_new(const char *name, int fd) {
+ PortableMetadata *m;
+
+ m = malloc0(offsetof(PortableMetadata, name) + strlen(name) + 1);
+ if (!m)
+ return NULL;
+
+ strcpy(m->name, name);
+ m->fd = fd;
+
+ return m;
+}
+
+PortableMetadata *portable_metadata_unref(PortableMetadata *i) {
+ if (!i)
+ return NULL;
+
+ safe_close(i->fd);
+ free(i->source);
+
+ return mfree(i);
+}
+
+static int compare_metadata(PortableMetadata *const *x, PortableMetadata *const *y) {
+ return strcmp((*x)->name, (*y)->name);
+}
+
+int portable_metadata_hashmap_to_sorted_array(Hashmap *unit_files, PortableMetadata ***ret) {
+
+ _cleanup_free_ PortableMetadata **sorted = NULL;
+ PortableMetadata *item;
+ size_t k = 0;
+
+ sorted = new(PortableMetadata*, hashmap_size(unit_files));
+ if (!sorted)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(item, unit_files)
+ sorted[k++] = item;
+
+ assert(k == hashmap_size(unit_files));
+
+ typesafe_qsort(sorted, k, compare_metadata);
+
+ *ret = TAKE_PTR(sorted);
+ return 0;
+}
+
+static int send_item(
+ int socket_fd,
+ const char *name,
+ int fd) {
+
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int))) control = {};
+ struct iovec iovec;
+ struct msghdr mh = {
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ };
+ struct cmsghdr *cmsg;
+ _cleanup_close_ int data_fd = -1;
+
+ assert(socket_fd >= 0);
+ assert(name);
+ assert(fd >= 0);
+
+ data_fd = fd_duplicate_data_fd(fd);
+ if (data_fd < 0)
+ return data_fd;
+
+ cmsg = CMSG_FIRSTHDR(&mh);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &data_fd, sizeof(int));
+
+ iovec = IOVEC_MAKE_STRING(name);
+
+ if (sendmsg(socket_fd, &mh, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int recv_item(
+ int socket_fd,
+ char **ret_name,
+ int *ret_fd) {
+
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int))) control;
+ char buffer[PATH_MAX+2];
+ struct iovec iov = IOVEC_INIT(buffer, sizeof(buffer)-1);
+ struct msghdr mh = {
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ struct cmsghdr *cmsg;
+ _cleanup_close_ int found_fd = -1;
+ char *copy;
+ ssize_t n;
+
+ assert(socket_fd >= 0);
+ assert(ret_name);
+ assert(ret_fd);
+
+ n = recvmsg_safe(socket_fd, &mh, MSG_CMSG_CLOEXEC);
+ if (n < 0)
+ return (int) n;
+
+ CMSG_FOREACH(cmsg, &mh) {
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_RIGHTS) {
+
+ if (cmsg->cmsg_len == CMSG_LEN(sizeof(int))) {
+ assert(found_fd < 0);
+ found_fd = *(int*) CMSG_DATA(cmsg);
+ break;
+ }
+
+ cmsg_close_all(&mh);
+ return -EIO;
+ }
+ }
+
+ buffer[n] = 0;
+
+ copy = strdup(buffer);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret_name = copy;
+ *ret_fd = TAKE_FD(found_fd);
+
+ return 0;
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(portable_metadata_hash_ops, char, string_hash_func, string_compare_func,
+ PortableMetadata, portable_metadata_unref);
+
+static int extract_now(
+ const char *where,
+ char **matches,
+ int socket_fd,
+ PortableMetadata **ret_os_release,
+ Hashmap **ret_unit_files) {
+
+ _cleanup_hashmap_free_ Hashmap *unit_files = NULL;
+ _cleanup_(portable_metadata_unrefp) PortableMetadata *os_release = NULL;
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_close_ int os_release_fd = -1;
+ _cleanup_free_ char *os_release_path = NULL;
+ char **i;
+ int r;
+
+ /* Extracts the metadata from a directory tree 'where'. Extracts two kinds of information: the /etc/os-release
+ * data, and all unit files matching the specified expression. Note that this function is called in two very
+ * different but also similar contexts. When the tool gets invoked on a directory tree, we'll process it
+ * directly, and in-process, and thus can return the requested data directly, via 'ret_os_release' and
+ * 'ret_unit_files'. However, if the tool is invoked on a raw disk image — which needs to be mounted first — we
+ * are invoked in a child process with private mounts and then need to send the collected data to our
+ * parent. To handle both cases in one call this function also gets a 'socket_fd' parameter, which when >= 0 is
+ * used to send the data to the parent. */
+
+ assert(where);
+
+ /* First, find /etc/os-release and send it upstream (or just save it). */
+ r = open_os_release(where, &os_release_path, &os_release_fd);
+ if (r < 0)
+ log_debug_errno(r, "Couldn't acquire os-release file, ignoring: %m");
+ else {
+ if (socket_fd >= 0) {
+ r = send_item(socket_fd, "/etc/os-release", os_release_fd);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to send os-release file: %m");
+ }
+
+ if (ret_os_release) {
+ os_release = portable_metadata_new("/etc/os-release", os_release_fd);
+ if (!os_release)
+ return -ENOMEM;
+
+ os_release_fd = -1;
+ os_release->source = TAKE_PTR(os_release_path);
+ }
+ }
+
+ /* Then, send unit file data to the parent (or/and add it to the hashmap). For that we use our usual unit
+ * discovery logic. Note that we force looking inside of /lib/systemd/system/ for units too, as we mightbe
+ * compiled for a split-usr system but the image might be a legacy-usr one. */
+ r = lookup_paths_init(&paths, UNIT_FILE_SYSTEM, LOOKUP_PATHS_SPLIT_USR, where);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to acquire lookup paths: %m");
+
+ unit_files = hashmap_new(&portable_metadata_hash_ops);
+ if (!unit_files)
+ return -ENOMEM;
+
+ STRV_FOREACH(i, paths.search_path) {
+ _cleanup_free_ char *resolved = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ r = chase_symlinks_and_opendir(*i, where, 0, &resolved, &d);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to open unit path '%s', ignoring: %m", *i);
+ continue;
+ }
+
+ FOREACH_DIRENT(de, d, return log_debug_errno(errno, "Failed to read directory: %m")) {
+ _cleanup_(portable_metadata_unrefp) PortableMetadata *m = NULL;
+ _cleanup_close_ int fd = -1;
+
+ if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY))
+ continue;
+
+ if (!unit_match(de->d_name, matches))
+ continue;
+
+ /* Filter out duplicates */
+ if (hashmap_get(unit_files, de->d_name))
+ continue;
+
+ dirent_ensure_type(d, de);
+ if (!IN_SET(de->d_type, DT_LNK, DT_REG))
+ continue;
+
+ fd = openat(dirfd(d), de->d_name, O_CLOEXEC|O_RDONLY);
+ if (fd < 0) {
+ log_debug_errno(errno, "Failed to open unit file '%s', ignoring: %m", de->d_name);
+ continue;
+ }
+
+ if (socket_fd >= 0) {
+ r = send_item(socket_fd, de->d_name, fd);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to send unit metadata to parent: %m");
+ }
+
+ m = portable_metadata_new(de->d_name, fd);
+ if (!m)
+ return -ENOMEM;
+ fd = -1;
+
+ m->source = path_join(resolved, de->d_name);
+ if (!m->source)
+ return -ENOMEM;
+
+ r = hashmap_put(unit_files, m->name, m);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add unit to hashmap: %m");
+ m = NULL;
+ }
+ }
+
+ if (ret_os_release)
+ *ret_os_release = TAKE_PTR(os_release);
+ if (ret_unit_files)
+ *ret_unit_files = TAKE_PTR(unit_files);
+
+ return 0;
+}
+
+static int portable_extract_by_path(
+ const char *path,
+ char **matches,
+ PortableMetadata **ret_os_release,
+ Hashmap **ret_unit_files,
+ sd_bus_error *error) {
+
+ _cleanup_hashmap_free_ Hashmap *unit_files = NULL;
+ _cleanup_(portable_metadata_unrefp) PortableMetadata* os_release = NULL;
+ _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
+ int r;
+
+ assert(path);
+
+ r = loop_device_make_by_path(path, O_RDONLY, LO_FLAGS_PARTSCAN, &d);
+ if (r == -EISDIR) {
+ /* We can't turn this into a loop-back block device, and this returns EISDIR? Then this is a directory
+ * tree and not a raw device. It's easy then. */
+
+ r = extract_now(path, matches, -1, &os_release, &unit_files);
+ if (r < 0)
+ return r;
+
+ } else if (r < 0)
+ return log_debug_errno(r, "Failed to set up loopback device: %m");
+ else {
+ _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
+ _cleanup_(rmdir_and_freep) char *tmpdir = NULL;
+ _cleanup_(close_pairp) int seq[2] = { -1, -1 };
+ _cleanup_(sigkill_waitp) pid_t child = 0;
+
+ /* We now have a loopback block device, let's fork off a child in its own mount namespace, mount it
+ * there, and extract the metadata we need. The metadata is sent from the child back to us. */
+
+ BLOCK_SIGNALS(SIGCHLD);
+
+ r = mkdtemp_malloc("/tmp/inspect-XXXXXX", &tmpdir);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create temporary directory: %m");
+
+ r = dissect_image(d->fd, NULL, NULL, DISSECT_IMAGE_READ_ONLY|DISSECT_IMAGE_REQUIRE_ROOT|DISSECT_IMAGE_DISCARD_ON_LOOP|DISSECT_IMAGE_RELAX_VAR_CHECK, &m);
+ if (r == -ENOPKG)
+ sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Couldn't identify a suitable partition table or file system in '%s'.", path);
+ else if (r == -EADDRNOTAVAIL)
+ sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "No root partition for specified root hash found in '%s'.", path);
+ else if (r == -ENOTUNIQ)
+ sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Multiple suitable root partitions found in image '%s'.", path);
+ else if (r == -ENXIO)
+ sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "No suitable root partition found in image '%s'.", path);
+ else if (r == -EPROTONOSUPPORT)
+ sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Device '%s' is loopback block device with partition scanning turned off, please turn it on.", path);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, seq) < 0)
+ return log_debug_errno(errno, "Failed to allocated SOCK_SEQPACKET socket: %m");
+
+ r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE|FORK_LOG, &child);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ seq[0] = safe_close(seq[0]);
+
+ r = dissected_image_mount(m, tmpdir, UID_INVALID, DISSECT_IMAGE_READ_ONLY|DISSECT_IMAGE_VALIDATE_OS);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to mount dissected image: %m");
+ goto child_finish;
+ }
+
+ r = extract_now(tmpdir, matches, seq[1], NULL, NULL);
+
+ child_finish:
+ _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
+ }
+
+ seq[1] = safe_close(seq[1]);
+
+ unit_files = hashmap_new(&portable_metadata_hash_ops);
+ if (!unit_files)
+ return -ENOMEM;
+
+ for (;;) {
+ _cleanup_(portable_metadata_unrefp) PortableMetadata *add = NULL;
+ _cleanup_free_ char *name = NULL;
+ _cleanup_close_ int fd = -1;
+
+ r = recv_item(seq[0], &name, &fd);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to receive item: %m");
+
+ /* We can't really distinguish a zero-length datagram without any fds from EOF (both are signalled the
+ * same way by recvmsg()). Hence, accept either as end notification. */
+ if (isempty(name) && fd < 0)
+ break;
+
+ if (isempty(name) || fd < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid item sent from child.");
+
+ add = portable_metadata_new(name, fd);
+ if (!add)
+ return -ENOMEM;
+ fd = -1;
+
+ /* Note that we do not initialize 'add->source' here, as the source path is not usable here as
+ * it refers to a path only valid in the short-living namespaced child process we forked
+ * here. */
+
+ if (PORTABLE_METADATA_IS_UNIT(add)) {
+ r = hashmap_put(unit_files, add->name, add);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add item to unit file list: %m");
+
+ add = NULL;
+
+ } else if (PORTABLE_METADATA_IS_OS_RELEASE(add)) {
+
+ assert(!os_release);
+ os_release = TAKE_PTR(add);
+ } else
+ assert_not_reached("Unexpected metadata item from child.");
+ }
+
+ r = wait_for_terminate_and_check("(sd-dissect)", child, 0);
+ if (r < 0)
+ return r;
+ child = 0;
+ }
+
+ if (!os_release)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Image '%s' lacks os-release data, refusing.", path);
+
+ if (hashmap_isempty(unit_files))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Couldn't find any matching unit files in image '%s', refusing.", path);
+
+ if (ret_unit_files)
+ *ret_unit_files = TAKE_PTR(unit_files);
+
+ if (ret_os_release)
+ *ret_os_release = TAKE_PTR(os_release);
+
+ return 0;
+}
+
+int portable_extract(
+ const char *name_or_path,
+ char **matches,
+ PortableMetadata **ret_os_release,
+ Hashmap **ret_unit_files,
+ sd_bus_error *error) {
+
+ _cleanup_(image_unrefp) Image *image = NULL;
+ int r;
+
+ assert(name_or_path);
+
+ r = image_find_harder(IMAGE_PORTABLE, name_or_path, &image);
+ if (r < 0)
+ return r;
+
+ return portable_extract_by_path(image->path, matches, ret_os_release, ret_unit_files, error);
+}
+
+static int unit_file_is_active(
+ sd_bus *bus,
+ const char *name,
+ sd_bus_error *error) {
+
+ static const char *const active_states[] = {
+ "activating",
+ "active",
+ "reloading",
+ "deactivating",
+ NULL,
+ };
+ int r;
+
+ if (!bus)
+ return false;
+
+ /* If we are looking at a plain or instance things are easy, we can just query the state */
+ if (unit_name_is_valid(name, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE)) {
+ _cleanup_free_ char *path = NULL, *buf = NULL;
+
+ path = unit_dbus_path_from_name(name);
+ if (!path)
+ return -ENOMEM;
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "ActiveState",
+ error,
+ &buf);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to retrieve unit state: %s", bus_error_message(error, r));
+
+ return strv_contains((char**) active_states, buf);
+ }
+
+ /* Otherwise we need to enumerate. But let's build the most restricted query we can */
+ if (unit_name_is_valid(name, UNIT_NAME_TEMPLATE)) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ const char *at, *prefix, *joined;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "ListUnitsByPatterns");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_strv(m, (char**) active_states);
+ if (r < 0)
+ return r;
+
+ at = strchr(name, '@');
+ assert(at);
+
+ prefix = strndupa(name, at + 1 - name);
+ joined = strjoina(prefix, "*", at + 1);
+
+ r = sd_bus_message_append_strv(m, STRV_MAKE(joined));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call(bus, m, 0, error, &reply);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to list units: %s", bus_error_message(error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssssssouso)");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_STRUCT, "ssssssouso");
+ if (r < 0)
+ return r;
+
+ return r > 0;
+ }
+
+ return -EINVAL;
+}
+
+static int portable_changes_add(
+ PortableChange **changes,
+ size_t *n_changes,
+ PortableChangeType type,
+ const char *path,
+ const char *source) {
+
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ PortableChange *c;
+
+ assert(path);
+ assert(!changes == !n_changes);
+
+ if (!changes)
+ return 0;
+
+ c = reallocarray(*changes, *n_changes + 1, sizeof(PortableChange));
+ if (!c)
+ return -ENOMEM;
+ *changes = c;
+
+ p = strdup(path);
+ if (!p)
+ return -ENOMEM;
+
+ path_simplify(p, false);
+
+ if (source) {
+ s = strdup(source);
+ if (!s)
+ return -ENOMEM;
+
+ path_simplify(s, false);
+ }
+
+ c[(*n_changes)++] = (PortableChange) {
+ .type = type,
+ .path = TAKE_PTR(p),
+ .source = TAKE_PTR(s),
+ };
+
+ return 0;
+}
+
+static int portable_changes_add_with_prefix(
+ PortableChange **changes,
+ size_t *n_changes,
+ PortableChangeType type,
+ const char *prefix,
+ const char *path,
+ const char *source) {
+
+ assert(path);
+ assert(!changes == !n_changes);
+
+ if (!changes)
+ return 0;
+
+ if (prefix) {
+ path = prefix_roota(prefix, path);
+
+ if (source)
+ source = prefix_roota(prefix, source);
+ }
+
+ return portable_changes_add(changes, n_changes, type, path, source);
+}
+
+void portable_changes_free(PortableChange *changes, size_t n_changes) {
+ size_t i;
+
+ assert(changes || n_changes == 0);
+
+ for (i = 0; i < n_changes; i++) {
+ free(changes[i].path);
+ free(changes[i].source);
+ }
+
+ free(changes);
+}
+
+static int install_chroot_dropin(
+ const char *image_path,
+ ImageType type,
+ const PortableMetadata *m,
+ const char *dropin_dir,
+ char **ret_dropin,
+ PortableChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_free_ char *text = NULL, *dropin = NULL;
+ int r;
+
+ assert(image_path);
+ assert(m);
+ assert(dropin_dir);
+
+ dropin = path_join(dropin_dir, "20-portable.conf");
+ if (!dropin)
+ return -ENOMEM;
+
+ text = strjoin(PORTABLE_DROPIN_MARKER_BEGIN, image_path, PORTABLE_DROPIN_MARKER_END "\n");
+ if (!text)
+ return -ENOMEM;
+
+ if (endswith(m->name, ".service")) {
+ const char *os_release_source;
+
+ if (access("/etc/os-release", F_OK) < 0) {
+ if (errno != ENOENT)
+ return log_debug_errno(errno, "Failed to check if /etc/os-release exists: %m");
+
+ os_release_source = "/usr/lib/os-release";
+ } else
+ os_release_source = "/etc/os-release";
+
+ if (!strextend(&text,
+ "\n"
+ "[Service]\n",
+ IN_SET(type, IMAGE_DIRECTORY, IMAGE_SUBVOLUME) ? "RootDirectory=" : "RootImage=", image_path, "\n"
+ "Environment=PORTABLE=", basename(image_path), "\n"
+ "BindReadOnlyPaths=", os_release_source, ":/run/host/os-release\n"
+ "LogExtraFields=PORTABLE=", basename(image_path), "\n",
+ NULL))
+
+ return -ENOMEM;
+ }
+
+ r = write_string_file(dropin, text, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to write '%s': %m", dropin);
+
+ (void) portable_changes_add(changes, n_changes, PORTABLE_WRITE, dropin, NULL);
+
+ if (ret_dropin)
+ *ret_dropin = TAKE_PTR(dropin);
+
+ return 0;
+}
+
+static int find_profile(const char *name, const char *unit, char **ret) {
+ const char *p, *dot;
+
+ assert(name);
+ assert(ret);
+
+ assert_se(dot = strrchr(unit, '.'));
+
+ NULSTR_FOREACH(p, profile_dirs) {
+ _cleanup_free_ char *joined;
+
+ joined = strjoin(p, "/", name, "/", dot + 1, ".conf");
+ if (!joined)
+ return -ENOMEM;
+
+ if (laccess(joined, F_OK) >= 0) {
+ *ret = TAKE_PTR(joined);
+ return 0;
+ }
+
+ if (errno != ENOENT)
+ return -errno;
+ }
+
+ return -ENOENT;
+}
+
+static int install_profile_dropin(
+ const char *image_path,
+ const PortableMetadata *m,
+ const char *dropin_dir,
+ const char *profile,
+ PortableFlags flags,
+ char **ret_dropin,
+ PortableChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_free_ char *dropin = NULL, *from = NULL;
+ int r;
+
+ assert(image_path);
+ assert(m);
+ assert(dropin_dir);
+
+ if (!profile)
+ return 0;
+
+ r = find_profile(profile, m->name, &from);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return log_debug_errno(errno, "Profile '%s' is not accessible: %m", profile);
+
+ log_debug_errno(errno, "Skipping link to profile '%s', as it does not exist: %m", profile);
+ return 0;
+ }
+
+ dropin = path_join(dropin_dir, "10-profile.conf");
+ if (!dropin)
+ return -ENOMEM;
+
+ if (flags & PORTABLE_PREFER_COPY) {
+
+ r = copy_file_atomic(from, dropin, 0644, 0, 0, COPY_REFLINK);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to copy %s %s %s: %m", from, special_glyph(SPECIAL_GLYPH_ARROW), dropin);
+
+ (void) portable_changes_add(changes, n_changes, PORTABLE_COPY, dropin, from);
+
+ } else {
+
+ if (symlink(from, dropin) < 0)
+ return log_debug_errno(errno, "Failed to link %s %s %s: %m", from, special_glyph(SPECIAL_GLYPH_ARROW), dropin);
+
+ (void) portable_changes_add(changes, n_changes, PORTABLE_SYMLINK, dropin, from);
+ }
+
+ if (ret_dropin)
+ *ret_dropin = TAKE_PTR(dropin);
+
+ return 0;
+}
+
+static const char *attached_path(const LookupPaths *paths, PortableFlags flags) {
+ const char *where;
+
+ assert(paths);
+
+ if (flags & PORTABLE_RUNTIME)
+ where = paths->runtime_attached;
+ else
+ where = paths->persistent_attached;
+
+ assert(where);
+ return where;
+}
+
+static int attach_unit_file(
+ const LookupPaths *paths,
+ const char *image_path,
+ ImageType type,
+ const PortableMetadata *m,
+ const char *profile,
+ PortableFlags flags,
+ PortableChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(unlink_and_freep) char *chroot_dropin = NULL, *profile_dropin = NULL;
+ _cleanup_(rmdir_and_freep) char *dropin_dir = NULL;
+ const char *where, *path;
+ int r;
+
+ assert(paths);
+ assert(image_path);
+ assert(m);
+ assert(PORTABLE_METADATA_IS_UNIT(m));
+
+ where = attached_path(paths, flags);
+
+ (void) mkdir_parents(where, 0755);
+ if (mkdir(where, 0755) < 0) {
+ if (errno != EEXIST)
+ return -errno;
+ } else
+ (void) portable_changes_add(changes, n_changes, PORTABLE_MKDIR, where, NULL);
+
+ path = prefix_roota(where, m->name);
+ dropin_dir = strjoin(path, ".d");
+ if (!dropin_dir)
+ return -ENOMEM;
+
+ if (mkdir(dropin_dir, 0755) < 0) {
+ if (errno != EEXIST)
+ return -errno;
+ } else
+ (void) portable_changes_add(changes, n_changes, PORTABLE_MKDIR, dropin_dir, NULL);
+
+ /* We install the drop-ins first, and the actual unit file last to achieve somewhat atomic behaviour if PID 1
+ * is reloaded while we are creating things here: as long as only the drop-ins exist the unit doesn't exist at
+ * all for PID 1. */
+
+ r = install_chroot_dropin(image_path, type, m, dropin_dir, &chroot_dropin, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ r = install_profile_dropin(image_path, m, dropin_dir, profile, flags, &profile_dropin, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ if ((flags & PORTABLE_PREFER_SYMLINK) && m->source) {
+
+ if (symlink(m->source, path) < 0)
+ return log_debug_errno(errno, "Failed to symlink unit file '%s': %m", path);
+
+ (void) portable_changes_add(changes, n_changes, PORTABLE_SYMLINK, path, m->source);
+
+ } else {
+ _cleanup_(unlink_and_freep) char *tmp = NULL;
+ _cleanup_close_ int fd = -1;
+
+ fd = open_tmpfile_linkable(path, O_WRONLY|O_CLOEXEC, &tmp);
+ if (fd < 0)
+ return log_debug_errno(fd, "Failed to create unit file '%s': %m", path);
+
+ r = copy_bytes(m->fd, fd, UINT64_MAX, COPY_REFLINK);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to copy unit file '%s': %m", path);
+
+ if (fchmod(fd, 0644) < 0)
+ return log_debug_errno(errno, "Failed to change unit file access mode for '%s': %m", path);
+
+ r = link_tmpfile(fd, tmp, path);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to install unit file '%s': %m", path);
+
+ tmp = mfree(tmp);
+
+ (void) portable_changes_add(changes, n_changes, PORTABLE_COPY, path, m->source);
+ }
+
+ /* All is established now, now let's disable any rollbacks */
+ chroot_dropin = mfree(chroot_dropin);
+ profile_dropin = mfree(profile_dropin);
+ dropin_dir = mfree(dropin_dir);
+
+ return 0;
+}
+
+static int image_symlink(
+ const char *image_path,
+ PortableFlags flags,
+ char **ret) {
+
+ const char *fn, *where;
+ char *joined = NULL;
+
+ assert(image_path);
+ assert(ret);
+
+ fn = last_path_component(image_path);
+
+ if (flags & PORTABLE_RUNTIME)
+ where = "/run/portables/";
+ else
+ where = "/etc/portables/";
+
+ joined = strjoin(where, fn);
+ if (!joined)
+ return -ENOMEM;
+
+ *ret = joined;
+ return 0;
+}
+
+static int install_image_symlink(
+ const char *image_path,
+ PortableFlags flags,
+ PortableChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_free_ char *sl = NULL;
+ int r;
+
+ assert(image_path);
+
+ /* If the image is outside of the image search also link it into it, so that it can be found with short image
+ * names and is listed among the images. */
+
+ if (image_in_search_path(IMAGE_PORTABLE, image_path))
+ return 0;
+
+ r = image_symlink(image_path, flags, &sl);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to generate image symlink path: %m");
+
+ (void) mkdir_parents(sl, 0755);
+
+ if (symlink(image_path, sl) < 0)
+ return log_debug_errno(errno, "Failed to link %s %s %s: %m", image_path, special_glyph(SPECIAL_GLYPH_ARROW), sl);
+
+ (void) portable_changes_add(changes, n_changes, PORTABLE_SYMLINK, sl, image_path);
+ return 0;
+}
+
+int portable_attach(
+ sd_bus *bus,
+ const char *name_or_path,
+ char **matches,
+ const char *profile,
+ PortableFlags flags,
+ PortableChange **changes,
+ size_t *n_changes,
+ sd_bus_error *error) {
+
+ _cleanup_hashmap_free_ Hashmap *unit_files = NULL;
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(image_unrefp) Image *image = NULL;
+ PortableMetadata *item;
+ int r;
+
+ assert(name_or_path);
+
+ r = image_find_harder(IMAGE_PORTABLE, name_or_path, &image);
+ if (r < 0)
+ return r;
+
+ r = portable_extract_by_path(image->path, matches, NULL, &unit_files, error);
+ if (r < 0)
+ return r;
+
+ r = lookup_paths_init(&paths, UNIT_FILE_SYSTEM, LOOKUP_PATHS_SPLIT_USR, NULL);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(item, unit_files) {
+ r = unit_file_exists(UNIT_FILE_SYSTEM, &paths, item->name);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to determine whether unit '%s' exists on the host: %m", item->name);
+ if (r > 0)
+ return sd_bus_error_setf(error, BUS_ERROR_UNIT_EXISTS, "Unit file '%s' exists on the host already, refusing.", item->name);
+
+ r = unit_file_is_active(bus, item->name, error);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return sd_bus_error_setf(error, BUS_ERROR_UNIT_EXISTS, "Unit file '%s' is active already, refusing.", item->name);
+ }
+
+ HASHMAP_FOREACH(item, unit_files) {
+ r = attach_unit_file(&paths, image->path, image->type, item, profile, flags, changes, n_changes);
+ if (r < 0)
+ return r;
+ }
+
+ /* We don't care too much for the image symlink, it's just a convenience thing, it's not necessary for proper
+ * operation otherwise. */
+ (void) install_image_symlink(image->path, flags, changes, n_changes);
+
+ return 0;
+}
+
+static bool marker_matches_image(const char *marker, const char *name_or_path) {
+ const char *a;
+
+ assert(marker);
+ assert(name_or_path);
+
+ a = last_path_component(marker);
+
+ if (image_name_is_valid(name_or_path)) {
+ const char *e;
+
+ /* We shall match against an image name. In that case let's compare the last component, and optionally
+ * allow either a suffix of ".raw" or a series of "/". */
+
+ e = startswith(a, name_or_path);
+ if (!e)
+ return false;
+
+ return
+ e[strspn(e, "/")] == 0 ||
+ streq(e, ".raw");
+ } else {
+ const char *b;
+ size_t l;
+
+ /* We shall match against a path. Let's ignore any prefix here though, as often there are many ways to
+ * reach the same file. However, in this mode, let's validate any file suffix. */
+
+ l = strcspn(a, "/");
+ b = last_path_component(name_or_path);
+
+ if (strcspn(b, "/") != l)
+ return false;
+
+ return memcmp(a, b, l) == 0;
+ }
+}
+
+static int test_chroot_dropin(
+ DIR *d,
+ const char *where,
+ const char *fname,
+ const char *name_or_path,
+ char **ret_marker) {
+
+ _cleanup_free_ char *line = NULL, *marker = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_close_ int fd = -1;
+ const char *p, *e, *k;
+ int r;
+
+ assert(d);
+ assert(where);
+ assert(fname);
+
+ /* We recognize unis created from portable images via the drop-in we created for them */
+
+ p = strjoina(fname, ".d/20-portable.conf");
+ fd = openat(dirfd(d), p, O_RDONLY|O_CLOEXEC);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_debug_errno(errno, "Failed to open %s/%s: %m", where, p);
+ }
+
+ r = take_fdopen_unlocked(&fd, "r", &f);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to convert file handle: %m");
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read from %s/%s: %m", where, p);
+
+ e = startswith(line, PORTABLE_DROPIN_MARKER_BEGIN);
+ if (!e)
+ return 0;
+
+ k = endswith(e, PORTABLE_DROPIN_MARKER_END);
+ if (!k)
+ return 0;
+
+ marker = strndup(e, k - e);
+ if (!marker)
+ return -ENOMEM;
+
+ if (!name_or_path)
+ r = true;
+ else
+ r = marker_matches_image(marker, name_or_path);
+
+ if (ret_marker)
+ *ret_marker = TAKE_PTR(marker);
+
+ return r;
+}
+
+int portable_detach(
+ sd_bus *bus,
+ const char *name_or_path,
+ PortableFlags flags,
+ PortableChange **changes,
+ size_t *n_changes,
+ sd_bus_error *error) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_set_free_ Set *unit_files = NULL, *markers = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ const char *where, *item;
+ struct dirent *de;
+ int ret = 0;
+ int r;
+
+ assert(name_or_path);
+
+ r = lookup_paths_init(&paths, UNIT_FILE_SYSTEM, LOOKUP_PATHS_SPLIT_USR, NULL);
+ if (r < 0)
+ return r;
+
+ where = attached_path(&paths, flags);
+
+ d = opendir(where);
+ if (!d) {
+ if (errno == ENOENT)
+ goto not_found;
+
+ return log_debug_errno(errno, "Failed to open '%s' directory: %m", where);
+ }
+
+ FOREACH_DIRENT(de, d, return log_debug_errno(errno, "Failed to enumerate '%s' directory: %m", where)) {
+ _cleanup_free_ char *marker = NULL;
+ UnitFileState state;
+
+ if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY))
+ continue;
+
+ /* Filter out duplicates */
+ if (set_contains(unit_files, de->d_name))
+ continue;
+
+ dirent_ensure_type(d, de);
+ if (!IN_SET(de->d_type, DT_LNK, DT_REG))
+ continue;
+
+ r = test_chroot_dropin(d, where, de->d_name, name_or_path, &marker);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = unit_file_lookup_state(UNIT_FILE_SYSTEM, &paths, de->d_name, &state);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to determine unit file state of '%s': %m", de->d_name);
+ if (!IN_SET(state, UNIT_FILE_STATIC, UNIT_FILE_DISABLED, UNIT_FILE_LINKED, UNIT_FILE_RUNTIME, UNIT_FILE_LINKED_RUNTIME))
+ return sd_bus_error_setf(error, BUS_ERROR_UNIT_EXISTS, "Unit file '%s' is in state '%s', can't detach.", de->d_name, unit_file_state_to_string(state));
+
+ r = unit_file_is_active(bus, de->d_name, error);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return sd_bus_error_setf(error, BUS_ERROR_UNIT_EXISTS, "Unit file '%s' is active, can't detach.", de->d_name);
+
+ r = set_put_strdup(&unit_files, de->d_name);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add unit name '%s' to set: %m", de->d_name);
+
+ if (path_is_absolute(marker) &&
+ !image_in_search_path(IMAGE_PORTABLE, marker)) {
+
+ r = set_ensure_consume(&markers, &path_hash_ops_free, TAKE_PTR(marker));
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (set_isempty(unit_files))
+ goto not_found;
+
+ SET_FOREACH(item, unit_files) {
+ _cleanup_free_ char *md = NULL;
+ const char *suffix;
+
+ if (unlinkat(dirfd(d), item, 0) < 0) {
+ log_debug_errno(errno, "Can't remove unit file %s/%s: %m", where, item);
+
+ if (errno != ENOENT && ret >= 0)
+ ret = -errno;
+ } else
+ portable_changes_add_with_prefix(changes, n_changes, PORTABLE_UNLINK, where, item, NULL);
+
+ FOREACH_STRING(suffix, ".d/10-profile.conf", ".d/20-portable.conf") {
+ _cleanup_free_ char *dropin = NULL;
+
+ dropin = strjoin(item, suffix);
+ if (!dropin)
+ return -ENOMEM;
+
+ if (unlinkat(dirfd(d), dropin, 0) < 0) {
+ log_debug_errno(errno, "Can't remove drop-in %s/%s: %m", where, dropin);
+
+ if (errno != ENOENT && ret >= 0)
+ ret = -errno;
+ } else
+ portable_changes_add_with_prefix(changes, n_changes, PORTABLE_UNLINK, where, dropin, NULL);
+ }
+
+ md = strjoin(item, ".d");
+ if (!md)
+ return -ENOMEM;
+
+ if (unlinkat(dirfd(d), md, AT_REMOVEDIR) < 0) {
+ log_debug_errno(errno, "Can't remove drop-in directory %s/%s: %m", where, md);
+
+ if (errno != ENOENT && ret >= 0)
+ ret = -errno;
+ } else
+ portable_changes_add_with_prefix(changes, n_changes, PORTABLE_UNLINK, where, md, NULL);
+ }
+
+ /* Now, also drop any image symlink, for images outside of the sarch path */
+ SET_FOREACH(item, markers) {
+ _cleanup_free_ char *sl = NULL;
+ struct stat st;
+
+ r = image_symlink(item, flags, &sl);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to determine image symlink for '%s', ignoring: %m", item);
+ continue;
+ }
+
+ if (lstat(sl, &st) < 0) {
+ log_debug_errno(errno, "Failed to stat '%s', ignoring: %m", sl);
+ continue;
+ }
+
+ if (!S_ISLNK(st.st_mode)) {
+ log_debug("Image '%s' is not a symlink, ignoring.", sl);
+ continue;
+ }
+
+ if (unlink(sl) < 0) {
+ log_debug_errno(errno, "Can't remove image symlink '%s': %m", sl);
+
+ if (errno != ENOENT && ret >= 0)
+ ret = -errno;
+ } else
+ portable_changes_add(changes, n_changes, PORTABLE_UNLINK, sl, NULL);
+ }
+
+ /* Try to remove the unit file directory, if we can */
+ if (rmdir(where) >= 0)
+ portable_changes_add(changes, n_changes, PORTABLE_UNLINK, where, NULL);
+
+ return ret;
+
+not_found:
+ log_debug("No unit files associated with '%s' found. Image not attached?", name_or_path);
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "No unit files associated with '%s' found. Image not attached?", name_or_path);
+}
+
+static int portable_get_state_internal(
+ sd_bus *bus,
+ const char *name_or_path,
+ PortableFlags flags,
+ PortableState *ret,
+ sd_bus_error *error) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ bool found_enabled = false, found_running = false;
+ _cleanup_set_free_ Set *unit_files = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ const char *where;
+ struct dirent *de;
+ int r;
+
+ assert(name_or_path);
+ assert(ret);
+
+ r = lookup_paths_init(&paths, UNIT_FILE_SYSTEM, LOOKUP_PATHS_SPLIT_USR, NULL);
+ if (r < 0)
+ return r;
+
+ where = attached_path(&paths, flags);
+
+ d = opendir(where);
+ if (!d) {
+ if (errno == ENOENT) {
+ /* If the 'attached' directory doesn't exist at all, then we know for sure this image isn't attached. */
+ *ret = PORTABLE_DETACHED;
+ return 0;
+ }
+
+ return log_debug_errno(errno, "Failed to open '%s' directory: %m", where);
+ }
+
+ FOREACH_DIRENT(de, d, return log_debug_errno(errno, "Failed to enumerate '%s' directory: %m", where)) {
+ UnitFileState state;
+
+ if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY))
+ continue;
+
+ /* Filter out duplicates */
+ if (set_contains(unit_files, de->d_name))
+ continue;
+
+ dirent_ensure_type(d, de);
+ if (!IN_SET(de->d_type, DT_LNK, DT_REG))
+ continue;
+
+ r = test_chroot_dropin(d, where, de->d_name, name_or_path, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = unit_file_lookup_state(UNIT_FILE_SYSTEM, &paths, de->d_name, &state);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to determine unit file state of '%s': %m", de->d_name);
+ if (!IN_SET(state, UNIT_FILE_STATIC, UNIT_FILE_DISABLED, UNIT_FILE_LINKED, UNIT_FILE_LINKED_RUNTIME))
+ found_enabled = true;
+
+ r = unit_file_is_active(bus, de->d_name, error);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ found_running = true;
+
+ r = set_put_strdup(&unit_files, de->d_name);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add unit name '%s' to set: %m", de->d_name);
+ }
+
+ *ret = found_running ? (!set_isempty(unit_files) && (flags & PORTABLE_RUNTIME) ? PORTABLE_RUNNING_RUNTIME : PORTABLE_RUNNING) :
+ found_enabled ? (flags & PORTABLE_RUNTIME ? PORTABLE_ENABLED_RUNTIME : PORTABLE_ENABLED) :
+ !set_isempty(unit_files) ? (flags & PORTABLE_RUNTIME ? PORTABLE_ATTACHED_RUNTIME : PORTABLE_ATTACHED) : PORTABLE_DETACHED;
+
+ return 0;
+}
+
+int portable_get_state(
+ sd_bus *bus,
+ const char *name_or_path,
+ PortableFlags flags,
+ PortableState *ret,
+ sd_bus_error *error) {
+
+ PortableState state;
+ int r;
+
+ assert(name_or_path);
+ assert(ret);
+
+ /* We look for matching units twice: once in the regular directories, and once in the runtime directories — but
+ * the latter only if we didn't find anything in the former. */
+
+ r = portable_get_state_internal(bus, name_or_path, flags & ~PORTABLE_RUNTIME, &state, error);
+ if (r < 0)
+ return r;
+
+ if (state == PORTABLE_DETACHED) {
+ r = portable_get_state_internal(bus, name_or_path, flags | PORTABLE_RUNTIME, &state, error);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = state;
+ return 0;
+}
+
+int portable_get_profiles(char ***ret) {
+ assert(ret);
+
+ return conf_files_list_nulstr(ret, NULL, NULL, CONF_FILES_DIRECTORY|CONF_FILES_BASENAME|CONF_FILES_FILTER_MASKED, profile_dirs);
+}
+
+static const char* const portable_change_type_table[_PORTABLE_CHANGE_TYPE_MAX] = {
+ [PORTABLE_COPY] = "copy",
+ [PORTABLE_MKDIR] = "mkdir",
+ [PORTABLE_SYMLINK] = "symlink",
+ [PORTABLE_UNLINK] = "unlink",
+ [PORTABLE_WRITE] = "write",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(portable_change_type, PortableChangeType);
+
+static const char* const portable_state_table[_PORTABLE_STATE_MAX] = {
+ [PORTABLE_DETACHED] = "detached",
+ [PORTABLE_ATTACHED] = "attached",
+ [PORTABLE_ATTACHED_RUNTIME] = "attached-runtime",
+ [PORTABLE_ENABLED] = "enabled",
+ [PORTABLE_ENABLED_RUNTIME] = "enabled-runtime",
+ [PORTABLE_RUNNING] = "running",
+ [PORTABLE_RUNNING_RUNTIME] = "running-runtime",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(portable_state, PortableState);
diff --git a/src/portable/portable.h b/src/portable/portable.h
new file mode 100644
index 0000000..fd9605e
--- /dev/null
+++ b/src/portable/portable.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "hashmap.h"
+#include "macro.h"
+#include "set.h"
+#include "string-util.h"
+
+typedef struct PortableMetadata {
+ int fd;
+ char *source;
+ char name[];
+} PortableMetadata;
+
+#define PORTABLE_METADATA_IS_OS_RELEASE(m) (streq((m)->name, "/etc/os-release"))
+#define PORTABLE_METADATA_IS_UNIT(m) (!IN_SET((m)->name[0], 0, '/'))
+
+typedef enum PortableFlags {
+ PORTABLE_PREFER_COPY = 1 << 0,
+ PORTABLE_PREFER_SYMLINK = 1 << 1,
+ PORTABLE_RUNTIME = 1 << 2,
+} PortableFlags;
+
+typedef enum PortableChangeType {
+ PORTABLE_COPY,
+ PORTABLE_SYMLINK,
+ PORTABLE_UNLINK,
+ PORTABLE_WRITE,
+ PORTABLE_MKDIR,
+ _PORTABLE_CHANGE_TYPE_MAX,
+ _PORTABLE_CHANGE_TYPE_INVALID = INT_MIN,
+} PortableChangeType;
+
+typedef enum PortableState {
+ PORTABLE_DETACHED,
+ PORTABLE_ATTACHED,
+ PORTABLE_ATTACHED_RUNTIME,
+ PORTABLE_ENABLED,
+ PORTABLE_ENABLED_RUNTIME,
+ PORTABLE_RUNNING,
+ PORTABLE_RUNNING_RUNTIME,
+ _PORTABLE_STATE_MAX,
+ _PORTABLE_STATE_INVALID = -1
+} PortableState;
+
+typedef struct PortableChange {
+ int type; /* PortableFileChangeType or negative error number */
+ char *path;
+ char *source;
+} PortableChange;
+
+PortableMetadata *portable_metadata_unref(PortableMetadata *i);
+DEFINE_TRIVIAL_CLEANUP_FUNC(PortableMetadata*, portable_metadata_unref);
+
+int portable_metadata_hashmap_to_sorted_array(Hashmap *unit_files, PortableMetadata ***ret);
+
+int portable_extract(const char *image, char **matches, PortableMetadata **ret_os_release, Hashmap **ret_unit_files, sd_bus_error *error);
+
+int portable_attach(sd_bus *bus, const char *name_or_path, char **matches, const char *profile, PortableFlags flags, PortableChange **changes, size_t *n_changes, sd_bus_error *error);
+int portable_detach(sd_bus *bus, const char *name_or_path, PortableFlags flags, PortableChange **changes, size_t *n_changes, sd_bus_error *error);
+
+int portable_get_state(sd_bus *bus, const char *name_or_path, PortableFlags flags, PortableState *ret, sd_bus_error *error);
+
+int portable_get_profiles(char ***ret);
+
+void portable_changes_free(PortableChange *changes, size_t n_changes);
+
+const char *portable_change_type_to_string(PortableChangeType t) _const_;
+PortableChangeType portable_change_type_from_string(const char *t) _pure_;
+
+const char *portable_state_to_string(PortableState t) _const_;
+PortableState portable_state_from_string(const char *t) _pure_;
diff --git a/src/portable/portablectl.c b/src/portable/portablectl.c
new file mode 100644
index 0000000..457170e
--- /dev/null
+++ b/src/portable/portablectl.c
@@ -0,0 +1,1126 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "bus-unit-util.h"
+#include "bus-wait-for-jobs.h"
+#include "def.h"
+#include "dirent-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-table.h"
+#include "fs-util.h"
+#include "locale-util.h"
+#include "machine-image.h"
+#include "main-func.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "spawn-polkit-agent.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "verbs.h"
+
+static PagerFlags arg_pager_flags = 0;
+static bool arg_legend = true;
+static bool arg_ask_password = true;
+static bool arg_quiet = false;
+static const char *arg_profile = "default";
+static const char* arg_copy_mode = NULL;
+static bool arg_runtime = false;
+static bool arg_reload = true;
+static bool arg_cat = false;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static const char *arg_host = NULL;
+static bool arg_enable = false;
+static bool arg_now = false;
+static bool arg_no_block = false;
+
+static int determine_image(const char *image, bool permit_non_existing, char **ret) {
+ int r;
+
+ /* If the specified name is a valid image name, we pass it as-is to portabled, which will search for it in the
+ * usual search directories. Otherwise we presume it's a path, and will normalize it on the client's side
+ * (among other things, to make the path independent of the client's working directory) before passing it
+ * over. */
+
+ if (image_name_is_valid(image)) {
+ char *c;
+
+ if (!arg_quiet && laccess(image, F_OK) >= 0)
+ log_warning("Ambiguous invocation: current working directory contains file matching non-path argument '%s', ignoring. "
+ "Prefix argument with './' to force reference to file in current working directory.", image);
+
+ c = strdup(image);
+ if (!c)
+ return log_oom();
+
+ *ret = c;
+ return 0;
+ }
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Operations on images by path not supported when connecting to remote systems.");
+
+ r = chase_symlinks(image, NULL, CHASE_TRAIL_SLASH | (permit_non_existing ? CHASE_NONEXISTENT : 0), ret, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Cannot normalize specified image path '%s': %m", image);
+
+ return 0;
+}
+
+static int extract_prefix(const char *path, char **ret) {
+ _cleanup_free_ char *name = NULL;
+ const char *bn, *underscore;
+ size_t m;
+
+ bn = basename(path);
+
+ underscore = strchr(bn, '_');
+ if (underscore)
+ m = underscore - bn;
+ else {
+ const char *e;
+
+ e = endswith(bn, ".raw");
+ if (!e)
+ e = strchr(bn, 0);
+
+ m = e - bn;
+ }
+
+ name = strndup(bn, m);
+ if (!name)
+ return -ENOMEM;
+
+ /* A slightly reduced version of what's permitted in unit names. With ':' and '\' are removed, as well as '_'
+ * which we use as delimiter for the second part of the image string, which we ignore for now. */
+ if (!in_charset(name, DIGITS LETTERS "-."))
+ return -EINVAL;
+
+ if (!filename_is_valid(name))
+ return -EINVAL;
+
+ *ret = TAKE_PTR(name);
+
+ return 0;
+}
+
+static int determine_matches(const char *image, char **l, bool allow_any, char ***ret) {
+ _cleanup_strv_free_ char **k = NULL;
+ int r;
+
+ /* Determine the matches to apply. If the list is empty we derive the match from the image name. If the list
+ * contains exactly the "-" we return a wildcard list (which is the empty list), but only if this is expressly
+ * permitted. */
+
+ if (strv_isempty(l)) {
+ char *prefix;
+
+ r = extract_prefix(image, &prefix);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extract prefix of image name '%s': %m", image);
+
+ if (!arg_quiet)
+ log_info("(Matching unit files with prefix '%s'.)", prefix);
+
+ r = strv_consume(&k, prefix);
+ if (r < 0)
+ return log_oom();
+
+ } else if (strv_equal(l, STRV_MAKE("-"))) {
+
+ if (!allow_any)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Refusing all unit file match.");
+
+ if (!arg_quiet)
+ log_info("(Matching all unit files.)");
+ } else {
+
+ k = strv_copy(l);
+ if (!k)
+ return log_oom();
+
+ if (!arg_quiet) {
+ _cleanup_free_ char *joined = NULL;
+
+ joined = strv_join(k, "', '");
+ if (!joined)
+ return log_oom();
+
+ log_info("(Matching unit files with prefixes '%s'.)", joined);
+ }
+ }
+
+ *ret = TAKE_PTR(k);
+
+ return 0;
+}
+
+static int acquire_bus(sd_bus **bus) {
+ int r;
+
+ assert(bus);
+
+ if (*bus)
+ return 0;
+
+ r = bus_connect_transport(arg_transport, arg_host, false, bus);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ (void) sd_bus_set_allow_interactive_authorization(*bus, arg_ask_password);
+
+ return 0;
+}
+
+static int maybe_reload(sd_bus **bus) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ if (!arg_reload)
+ return 0;
+
+ r = acquire_bus(bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_call(
+ *bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "Reload");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Reloading the daemon may take long, hence set a longer timeout here */
+ r = sd_bus_call(*bus, m, DEFAULT_TIMEOUT_USEC * 2, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to reload daemon: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int inspect_image(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_strv_free_ char **matches = NULL;
+ _cleanup_free_ char *image = NULL;
+ bool nl = false, header = false;
+ const void *data;
+ const char *path;
+ size_t sz;
+ int r;
+
+ r = determine_image(argv[1], false, &image);
+ if (r < 0)
+ return r;
+
+ r = determine_matches(argv[1], argv + 2, true, &matches);
+ if (r < 0)
+ return r;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ r = bus_message_new_method_call(bus, &m, bus_portable_mgr, "GetImageMetadata");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", image);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, matches);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to inspect image metadata: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "s", &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_array(reply, 'y', &data, &sz);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ (void) pager_open(arg_pager_flags);
+
+ if (arg_cat) {
+ printf("%s-- OS Release: --%s\n", ansi_highlight(), ansi_normal());
+ fwrite(data, sz, 1, stdout);
+ fflush(stdout);
+ nl = true;
+ } else {
+ _cleanup_free_ char *pretty_portable = NULL, *pretty_os = NULL;
+ _cleanup_fclose_ FILE *f;
+
+ f = fmemopen_unlocked((void*) data, sz, "re");
+ if (!f)
+ return log_error_errno(errno, "Failed to open /etc/os-release buffer: %m");
+
+ r = parse_env_file(f, "/etc/os-release",
+ "PORTABLE_PRETTY_NAME", &pretty_portable,
+ "PRETTY_NAME", &pretty_os);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse /etc/os-release: %m");
+
+ printf("Image:\n\t%s\n"
+ "Portable Service:\n\t%s\n"
+ "Operating System:\n\t%s\n",
+ path,
+ strna(pretty_portable),
+ strna(pretty_os));
+ }
+
+ r = sd_bus_message_enter_container(reply, 'a', "{say}");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ const char *name;
+
+ r = sd_bus_message_enter_container(reply, 'e', "say");
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_read(reply, "s", &name);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_array(reply, 'y', &data, &sz);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (arg_cat) {
+ if (nl)
+ fputc('\n', stdout);
+
+ printf("%s-- Unit file: %s --%s\n", ansi_highlight(), name, ansi_normal());
+ fwrite(data, sz, 1, stdout);
+ fflush(stdout);
+ nl = true;
+ } else {
+ if (!header) {
+ fputs("Unit files:\n", stdout);
+ header = true;
+ }
+
+ fputc('\t', stdout);
+ fputs(name, stdout);
+ fputc('\n', stdout);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 0;
+}
+
+static int print_changes(sd_bus_message *m) {
+ int r;
+
+ if (arg_quiet)
+ return 0;
+
+ r = sd_bus_message_enter_container(m, 'a', "(sss)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ const char *type, *path, *source;
+
+ r = sd_bus_message_read(m, "(sss)", &type, &path, &source);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ if (streq(type, "symlink"))
+ log_info("Created symlink %s %s %s.", path, special_glyph(SPECIAL_GLYPH_ARROW), source);
+ else if (streq(type, "copy")) {
+ if (isempty(source))
+ log_info("Copied %s.", path);
+ else
+ log_info("Copied %s %s %s.", source, special_glyph(SPECIAL_GLYPH_ARROW), path);
+ } else if (streq(type, "unlink"))
+ log_info("Removed %s.", path);
+ else if (streq(type, "write"))
+ log_info("Written %s.", path);
+ else if (streq(type, "mkdir"))
+ log_info("Created directory %s.", path);
+ else
+ log_error("Unexpected change: %s/%s/%s", type, path, source);
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int maybe_enable_disable(sd_bus *bus, const char *path, bool enable) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_strv_free_ char **names = NULL;
+ UnitFileChange *changes = NULL;
+ const uint64_t flags = UNIT_FILE_PORTABLE | (arg_runtime ? UNIT_FILE_RUNTIME : 0);
+ size_t n_changes = 0;
+ int r;
+
+ if (!arg_enable)
+ return 0;
+
+ names = strv_new(path, NULL);
+ if (!names)
+ return log_oom();
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ enable ? "EnableUnitFilesWithFlags" : "DisableUnitFilesWithFlags");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, names);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "t", flags);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to %s the portable service %s: %s",
+ enable ? "enable" : "disable", path, bus_error_message(&error, r));
+
+ if (enable) {
+ r = sd_bus_message_skip(reply, "b");
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+ (void) bus_deserialize_and_dump_unit_file_changes(reply, arg_quiet, &changes, &n_changes);
+
+ return 0;
+}
+
+static int maybe_start_stop(sd_bus *bus, const char *path, bool start, BusWaitForJobs *wait) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ char *name = (char *)basename(path), *job = NULL;
+ int r;
+
+ if (!arg_now)
+ return 0;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ start ? "StartUnit" : "StopUnit",
+ &error,
+ &reply,
+ "ss", name, "replace");
+ if (r < 0)
+ return log_error_errno(r, "Failed to %s the portable service %s: %s",
+ start ? "start" : "stop",
+ path,
+ bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "o", &job);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (!arg_quiet)
+ log_info("Queued %s to %s portable service %s.", job, start ? "start" : "stop", name);
+
+ if (wait) {
+ r = bus_wait_for_jobs_add(wait, job);
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch %s job for %s %s: %m",
+ job, start ? "starting" : "stopping", name);
+ }
+
+ return 0;
+}
+
+static int maybe_enable_start(sd_bus *bus, sd_bus_message *reply) {
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *wait = NULL;
+ int r;
+
+ if (!arg_enable && !arg_now)
+ return 0;
+
+ if (!arg_no_block) {
+ r = bus_wait_for_jobs_new(bus, &wait);
+ if (r < 0)
+ return log_error_errno(r, "Could not watch jobs: %m");
+ }
+
+ r = sd_bus_message_rewind(reply, true);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_enter_container(reply, 'a', "(sss)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ char *type, *path, *source;
+
+ r = sd_bus_message_read(reply, "(sss)", &type, &path, &source);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ if (STR_IN_SET(type, "symlink", "copy") && ENDSWITH_SET(path, ".service", ".target", ".socket")) {
+ (void) maybe_enable_disable(bus, path, true);
+ (void) maybe_start_stop(bus, path, true, wait);
+ }
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return r;
+
+ if (!arg_no_block) {
+ r = bus_wait_for_jobs(wait, arg_quiet, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int maybe_stop_disable(sd_bus *bus, char *image, char *argv[]) {
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *wait = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_strv_free_ char **matches = NULL;
+ int r;
+
+ if (!arg_enable && !arg_now)
+ return 0;
+
+ r = determine_matches(argv[1], argv + 2, true, &matches);
+ if (r < 0)
+ return r;
+
+ r = bus_wait_for_jobs_new(bus, &wait);
+ if (r < 0)
+ return log_error_errno(r, "Could not watch jobs: %m");
+
+ r = bus_message_new_method_call(bus, &m, bus_portable_mgr, "GetImageMetadata");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", image);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, matches);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to inspect image metadata: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_skip(reply, "say");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_enter_container(reply, 'a', "{say}");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ const char *name;
+
+ r = sd_bus_message_enter_container(reply, 'e', "say");
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_read(reply, "s", &name);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_skip(reply, "ay");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ (void) maybe_start_stop(bus, name, false, wait);
+ (void) maybe_enable_disable(bus, name, false);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ /* Stopping must always block or the detach will fail if the unit is still running */
+ r = bus_wait_for_jobs(wait, arg_quiet, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int attach_image(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_strv_free_ char **matches = NULL;
+ _cleanup_free_ char *image = NULL;
+ int r;
+
+ r = determine_image(argv[1], false, &image);
+ if (r < 0)
+ return r;
+
+ r = determine_matches(argv[1], argv + 2, false, &matches);
+ if (r < 0)
+ return r;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = bus_message_new_method_call(bus, &m, bus_portable_mgr, "AttachImage");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", image);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, matches);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "sbs", arg_profile, arg_runtime, arg_copy_mode);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach image: %s", bus_error_message(&error, r));
+
+ (void) maybe_reload(&bus);
+
+ print_changes(reply);
+
+ (void) maybe_enable_start(bus, reply);
+
+ return 0;
+}
+
+static int detach_image(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *image = NULL;
+ int r;
+
+ r = determine_image(argv[1], true, &image);
+ if (r < 0)
+ return r;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ (void) maybe_stop_disable(bus, image, argv);
+
+ r = bus_call_method(bus, bus_portable_mgr, "DetachImage", &error, &reply, "sb", image, arg_runtime);
+ if (r < 0)
+ return log_error_errno(r, "Failed to detach image: %s", bus_error_message(&error, r));
+
+ (void) maybe_reload(&bus);
+
+ print_changes(reply);
+ return 0;
+}
+
+static int list_images(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ int r;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ r = bus_call_method(bus, bus_portable_mgr, "ListImages", &error, &reply, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list images: %s", bus_error_message(&error, r));
+
+ table = table_new("name", "type", "ro", "crtime", "mtime", "usage", "state");
+ if (!table)
+ return log_oom();
+
+ r = sd_bus_message_enter_container(reply, 'a', "(ssbtttso)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ const char *name, *type, *state;
+ uint64_t crtime, mtime, usage;
+ int ro_int;
+
+ r = sd_bus_message_read(reply, "(ssbtttso)", &name, &type, &ro_int, &crtime, &mtime, &usage, &state, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ r = table_add_many(table,
+ TABLE_STRING, name,
+ TABLE_STRING, type,
+ TABLE_BOOLEAN, ro_int,
+ TABLE_SET_COLOR, ro_int ? ansi_highlight_red() : NULL,
+ TABLE_TIMESTAMP, crtime,
+ TABLE_TIMESTAMP, mtime,
+ TABLE_SIZE, usage,
+ TABLE_STRING, state,
+ TABLE_SET_COLOR, !streq(state, "detached") ? ansi_highlight_green() : NULL);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (table_get_rows(table) > 1) {
+ r = table_set_sort(table, (size_t) 0, (size_t) -1);
+ if (r < 0)
+ return table_log_sort_error(r);
+
+ table_set_header(table, arg_legend);
+
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+ }
+
+ if (arg_legend) {
+ if (table_get_rows(table) > 1)
+ printf("\n%zu images listed.\n", table_get_rows(table) - 1);
+ else
+ printf("No images.\n");
+ }
+
+ return 0;
+}
+
+static int remove_image(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r, i;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ for (i = 1; i < argc; i++) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = bus_message_new_method_call(bus, &m, bus_portable_mgr, "RemoveImage");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", argv[i]);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* This is a slow operation, hence turn off any method call timeouts */
+ r = sd_bus_call(bus, m, USEC_INFINITY, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Could not remove image: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int read_only_image(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int b = true, r;
+
+ if (argc > 2) {
+ b = parse_boolean(argv[2]);
+ if (b < 0)
+ return log_error_errno(b, "Failed to parse boolean argument: %s", argv[2]);
+ }
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = bus_call_method(bus, bus_portable_mgr, "MarkImageReadOnly", &error, NULL, "sb", argv[1], b);
+ if (r < 0)
+ return log_error_errno(r, "Could not mark image read-only: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int set_limit(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ uint64_t limit;
+ int r;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ (void) polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ if (STR_IN_SET(argv[argc-1], "-", "none", "infinity"))
+ limit = (uint64_t) -1;
+ else {
+ r = parse_size(argv[argc-1], 1024, &limit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse size: %s", argv[argc-1]);
+ }
+
+ if (argc > 2)
+ /* With two arguments changes the quota limit of the specified image */
+ r = bus_call_method(bus, bus_portable_mgr, "SetImageLimit", &error, NULL, "st", argv[1], limit);
+ else
+ /* With one argument changes the pool quota limit */
+ r = bus_call_method(bus, bus_portable_mgr, "SetPoolLimit", &error, NULL, "t", limit);
+
+ if (r < 0)
+ return log_error_errno(r, "Could not set limit: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int is_image_attached(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *image = NULL;
+ const char *state;
+ int r;
+
+ r = determine_image(argv[1], true, &image);
+ if (r < 0)
+ return r;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ r = bus_call_method(bus, bus_portable_mgr, "GetImageState", &error, &reply, "s", image);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get image state: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "s", &state);
+ if (r < 0)
+ return r;
+
+ if (!arg_quiet)
+ puts(state);
+
+ return streq(state, "detached");
+}
+
+static int dump_profiles(void) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ char **i;
+ int r;
+
+ r = acquire_bus(&bus);
+ if (r < 0)
+ return r;
+
+ r = bus_get_property_strv(bus, bus_portable_mgr, "Profiles", &error, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire list of profiles: %s", bus_error_message(&error, r));
+
+ if (arg_legend)
+ log_info("Available unit profiles:");
+
+ STRV_FOREACH(i, l) {
+ fputs(*i, stdout);
+ fputc('\n', stdout);
+ }
+
+ return 0;
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = terminal_urlify_man("portablectl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND ...\n\n"
+ "%sAttach or detach portable services from the local system.%s\n"
+ "\nCommands:\n"
+ " list List available portable service images\n"
+ " attach NAME|PATH [PREFIX...]\n"
+ " Attach the specified portable service image\n"
+ " detach NAME|PATH [PREFIX...]\n"
+ " Detach the specified portable service image\n"
+ " inspect NAME|PATH [PREFIX...]\n"
+ " Show details of specified portable service image\n"
+ " is-attached NAME|PATH Query if portable service image is attached\n"
+ " read-only NAME|PATH [BOOL] Mark or unmark portable service image read-only\n"
+ " remove NAME|PATH... Remove a portable service image\n"
+ " set-limit [NAME|PATH] Set image or pool size limit (disk quota)\n"
+ "\nOptions:\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-legend Do not show the headers and footers\n"
+ " --no-ask-password Do not ask for system passwords\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " -q --quiet Suppress informational messages\n"
+ " -p --profile=PROFILE Pick security profile for portable service\n"
+ " --copy=copy|auto|symlink Prefer copying or symlinks if possible\n"
+ " --runtime Attach portable service until next reboot only\n"
+ " --no-reload Don't reload the system and service manager\n"
+ " --cat When inspecting include unit and os-release file\n"
+ " contents\n"
+ " --enable Immediately enable/disable the portable service\n"
+ " after attach/detach\n"
+ " --now Immediately start/stop the portable service after\n"
+ " attach/before detach\n"
+ " --no-block Don't block waiting for attach --now to complete\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight()
+ , ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_PAGER,
+ ARG_NO_LEGEND,
+ ARG_NO_ASK_PASSWORD,
+ ARG_COPY,
+ ARG_RUNTIME,
+ ARG_NO_RELOAD,
+ ARG_CAT,
+ ARG_ENABLE,
+ ARG_NOW,
+ ARG_NO_BLOCK,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "quiet", no_argument, NULL, 'q' },
+ { "profile", required_argument, NULL, 'p' },
+ { "copy", required_argument, NULL, ARG_COPY },
+ { "runtime", no_argument, NULL, ARG_RUNTIME },
+ { "no-reload", no_argument, NULL, ARG_NO_RELOAD },
+ { "cat", no_argument, NULL, ARG_CAT },
+ { "enable", no_argument, NULL, ARG_ENABLE },
+ { "now", no_argument, NULL, ARG_NOW },
+ { "no-block", no_argument, NULL, ARG_NO_BLOCK },
+ {}
+ };
+
+ assert(argc >= 0);
+ assert(argv);
+
+ for (;;) {
+ int c;
+
+ c = getopt_long(argc, argv, "hH:M:qp:", options, NULL);
+ if (c < 0)
+ break;
+
+ switch (c) {
+
+ case 'h':
+ return help(0, NULL, NULL);
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_LEGEND:
+ arg_legend = false;
+ break;
+
+ case ARG_NO_ASK_PASSWORD:
+ arg_ask_password = false;
+ break;
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case 'p':
+ if (streq(optarg, "help"))
+ return dump_profiles();
+
+ if (!filename_is_valid(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unit profile name not valid: %s", optarg);
+
+ arg_profile = optarg;
+ break;
+
+ case ARG_COPY:
+ if (streq(optarg, "auto"))
+ arg_copy_mode = NULL;
+ else if (STR_IN_SET(optarg, "copy", "symlink"))
+ arg_copy_mode = optarg;
+ else if (streq(optarg, "help")) {
+ puts("auto\n"
+ "copy\n"
+ "symlink");
+ return 0;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse --copy= argument: %s", optarg);
+
+ break;
+
+ case ARG_RUNTIME:
+ arg_runtime = true;
+ break;
+
+ case ARG_NO_RELOAD:
+ arg_reload = false;
+ break;
+
+ case ARG_CAT:
+ arg_cat = true;
+ break;
+
+ case ARG_ENABLE:
+ arg_enable = true;
+ break;
+
+ case ARG_NOW:
+ arg_now = true;
+ break;
+
+ case ARG_NO_BLOCK:
+ arg_no_block = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "list", VERB_ANY, 1, VERB_DEFAULT, list_images },
+ { "attach", 2, VERB_ANY, 0, attach_image },
+ { "detach", 2, VERB_ANY, 0, detach_image },
+ { "inspect", 2, VERB_ANY, 0, inspect_image },
+ { "is-attached", 2, 2, 0, is_image_attached },
+ { "read-only", 2, 3, 0, read_only_image },
+ { "remove", 2, VERB_ANY, 0, remove_image },
+ { "set-limit", 3, 3, 0, set_limit },
+ {}
+ };
+
+ int r;
+
+ log_setup_cli();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/portable/portabled-bus.c b/src/portable/portabled-bus.c
new file mode 100644
index 0000000..cf50d58
--- /dev/null
+++ b/src/portable/portabled-bus.c
@@ -0,0 +1,402 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "bus-common-errors.h"
+#include "bus-polkit.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "machine-image.h"
+#include "missing_capability.h"
+#include "portable.h"
+#include "portabled-bus.h"
+#include "portabled-image-bus.h"
+#include "portabled-image.h"
+#include "portabled.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int property_get_pool_path(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ assert(bus);
+ assert(reply);
+
+ return sd_bus_message_append(reply, "s", "/var/lib/portables");
+}
+
+static int property_get_pool_usage(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_close_ int fd = -1;
+ uint64_t usage = (uint64_t) -1;
+
+ assert(bus);
+ assert(reply);
+
+ fd = open("/var/lib/portables", O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (fd >= 0) {
+ BtrfsQuotaInfo q;
+
+ if (btrfs_subvol_get_subtree_quota_fd(fd, 0, &q) >= 0)
+ usage = q.referenced;
+ }
+
+ return sd_bus_message_append(reply, "t", usage);
+}
+
+static int property_get_pool_limit(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_close_ int fd = -1;
+ uint64_t size = (uint64_t) -1;
+
+ assert(bus);
+ assert(reply);
+
+ fd = open("/var/lib/portables", O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (fd >= 0) {
+ BtrfsQuotaInfo q;
+
+ if (btrfs_subvol_get_subtree_quota_fd(fd, 0, &q) >= 0)
+ size = q.referenced_max;
+ }
+
+ return sd_bus_message_append(reply, "t", size);
+}
+
+static int property_get_profiles(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_strv_free_ char **l = NULL;
+ int r;
+
+ assert(bus);
+ assert(reply);
+
+ r = portable_get_profiles(&l);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_append_strv(reply, l);
+}
+
+static int method_get_image(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ Manager *m = userdata;
+ const char *name;
+ Image *image;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = bus_image_acquire(m, message, name, NULL, BUS_IMAGE_REFUSE_BY_PATH, NULL, &image, error);
+ if (r < 0)
+ return r;
+
+ r = bus_image_path(image, &p);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+static int method_list_images(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_hashmap_free_ Hashmap *images = NULL;
+ Manager *m = userdata;
+ Image *image;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ images = hashmap_new(&image_hash_ops);
+ if (!images)
+ return -ENOMEM;
+
+ r = manager_image_cache_discover(m, images, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(ssbtttso)");
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(image, images) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_state = SD_BUS_ERROR_NULL;
+ PortableState state = _PORTABLE_STATE_INVALID;
+ _cleanup_free_ char *p = NULL;
+
+ r = bus_image_path(image, &p);
+ if (r < 0)
+ return r;
+
+ r = portable_get_state(
+ sd_bus_message_get_bus(message),
+ image->path,
+ 0,
+ &state,
+ &error_state);
+ if (r < 0)
+ log_debug_errno(r, "Failed to get state of image '%s', ignoring: %s",
+ image->path, bus_error_message(&error_state, r));
+
+ r = sd_bus_message_append(reply, "(ssbtttso)",
+ image->name,
+ image_type_to_string(image->type),
+ image->read_only,
+ image->crtime,
+ image->mtime,
+ image->usage,
+ portable_state_to_string(state),
+ p);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int redirect_method_to_image(
+ Manager *m,
+ sd_bus_message *message,
+ sd_bus_error *error,
+ int (*method)(Manager *m, sd_bus_message *message, const char *name_or_path, Image *image, sd_bus_error* error)) {
+
+ const char *name_or_path;
+ int r;
+
+ assert(m);
+ assert(message);
+ assert(method);
+
+ r = sd_bus_message_read(message, "s", &name_or_path);
+ if (r < 0)
+ return r;
+
+ return method(m, message, name_or_path, NULL, error);
+}
+
+static int method_get_image_os_release(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(userdata, message, error, bus_image_common_get_os_release);
+}
+
+static int method_get_image_metadata(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(userdata, message, error, bus_image_common_get_metadata);
+}
+
+static int method_get_image_state(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ const char *name_or_path;
+ PortableState state;
+ int r;
+
+ assert(message);
+
+ r = sd_bus_message_read(message, "s", &name_or_path);
+ if (r < 0)
+ return r;
+
+ r = portable_get_state(
+ sd_bus_message_get_bus(message),
+ name_or_path,
+ 0,
+ &state,
+ error);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "s", portable_state_to_string(state));
+}
+
+static int method_attach_image(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(userdata, message, error, bus_image_common_attach);
+}
+
+static int method_detach_image(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ PortableChange *changes = NULL;
+ Manager *m = userdata;
+ size_t n_changes = 0;
+ const char *name_or_path;
+ int r, runtime;
+
+ assert(message);
+ assert(m);
+
+ /* Note that we do not redirect detaching to the image object here, because we want to allow that users can
+ * detach already deleted images too, in case the user already deleted an image before properly detaching
+ * it. */
+
+ r = sd_bus_message_read(message, "sb", &name_or_path, &runtime);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.portable1.attach-images",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = portable_detach(
+ sd_bus_message_get_bus(message),
+ name_or_path,
+ runtime ? PORTABLE_RUNTIME : 0,
+ &changes,
+ &n_changes,
+ error);
+ if (r < 0)
+ goto finish;
+
+ r = reply_portable_changes(message, changes, n_changes);
+
+finish:
+ portable_changes_free(changes, n_changes);
+ return r;
+}
+
+static int method_remove_image(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(userdata, message, error, bus_image_common_remove);
+}
+
+static int method_mark_image_read_only(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(userdata, message, error, bus_image_common_mark_read_only);
+}
+
+static int method_set_image_limit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return redirect_method_to_image(userdata, message, error, bus_image_common_set_limit);
+}
+
+static int method_set_pool_limit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ uint64_t limit;
+ int r;
+
+ assert(message);
+
+ r = sd_bus_message_read(message, "t", &limit);
+ if (r < 0)
+ return r;
+ if (!FILE_SIZE_VALID_OR_INFINITY(limit))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "New limit out of range");
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.portable1.manage-images",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ (void) btrfs_qgroup_set_limit("/var/lib/portables", 0, limit);
+
+ r = btrfs_subvol_set_subtree_quota_limit("/var/lib/portables", 0, limit);
+ if (r == -ENOTTY)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Quota is only supported on btrfs.");
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to adjust quota limit: %m");
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+const sd_bus_vtable manager_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("PoolPath", "s", property_get_pool_path, 0, 0),
+ SD_BUS_PROPERTY("PoolUsage", "t", property_get_pool_usage, 0, 0),
+ SD_BUS_PROPERTY("PoolLimit", "t", property_get_pool_limit, 0, 0),
+ SD_BUS_PROPERTY("Profiles", "as", property_get_profiles, 0, 0),
+ SD_BUS_METHOD("GetImage", "s", "o", method_get_image, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("ListImages", NULL, "a(ssbtttso)", method_list_images, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetImageOSRelease", "s", "a{ss}", method_get_image_os_release, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetImageMetadata", "sas", "saya{say}", method_get_image_metadata, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetImageState", "s", "s", method_get_image_state, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("AttachImage", "sassbs", "a(sss)", method_attach_image, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("DetachImage", "sb", "a(sss)", method_detach_image, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("RemoveImage", "s", NULL, method_remove_image, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("MarkImageReadOnly", "sb", NULL, method_mark_image_read_only, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetImageLimit", "st", NULL, method_set_image_limit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetPoolLimit", "t", NULL, method_set_pool_limit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_VTABLE_END
+};
+
+int reply_portable_changes(sd_bus_message *m, const PortableChange *changes, size_t n_changes) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ size_t i;
+ int r;
+
+ assert(m);
+ assert(changes || n_changes == 0);
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(sss)");
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < n_changes; i++) {
+ r = sd_bus_message_append(reply, "(sss)",
+ portable_change_type_to_string(changes[i].type),
+ changes[i].path,
+ changes[i].source);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
diff --git a/src/portable/portabled-bus.h b/src/portable/portabled-bus.h
new file mode 100644
index 0000000..e8e4c3a
--- /dev/null
+++ b/src/portable/portabled-bus.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "portable.h"
+
+extern const sd_bus_vtable manager_vtable[];
+
+int reply_portable_changes(sd_bus_message *m, const PortableChange *changes, size_t n_changes);
diff --git a/src/portable/portabled-image-bus.c b/src/portable/portabled-image-bus.c
new file mode 100644
index 0000000..eb0786e
--- /dev/null
+++ b/src/portable/portabled-image-bus.c
@@ -0,0 +1,740 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-get-properties.h"
+#include "bus-label.h"
+#include "bus-polkit.h"
+#include "bus-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "io-util.h"
+#include "machine-image.h"
+#include "missing_capability.h"
+#include "portable.h"
+#include "portabled-bus.h"
+#include "portabled-image-bus.h"
+#include "portabled-image.h"
+#include "portabled.h"
+#include "process-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, image_type, ImageType);
+
+int bus_image_common_get_os_release(
+ Manager *m,
+ sd_bus_message *message,
+ const char *name_or_path,
+ Image *image,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(name_or_path || image);
+ assert(message);
+
+ if (!m) {
+ assert(image);
+ m = image->userdata;
+ }
+
+ r = bus_image_acquire(m,
+ message,
+ name_or_path,
+ image,
+ BUS_IMAGE_AUTHENTICATE_BY_PATH,
+ "org.freedesktop.portable1.inspect-images",
+ &image,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0) /* Will call us back */
+ return 1;
+
+ if (!image->metadata_valid) {
+ r = image_read_metadata(image);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to read image metadata: %m");
+ }
+
+ return bus_reply_pair_array(message, image->os_release);
+}
+
+static int bus_image_method_get_os_release(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_image_common_get_os_release(NULL, message, NULL, userdata, error);
+}
+
+static int append_fd(sd_bus_message *m, PortableMetadata *d) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *buf = NULL;
+ size_t n;
+ int r;
+
+ assert(m);
+ assert(d);
+ assert(d->fd >= 0);
+
+ f = take_fdopen(&d->fd, "r");
+ if (!f)
+ return -errno;
+
+ r = read_full_stream(f, &buf, &n);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_append_array(m, 'y', buf, n);
+}
+
+int bus_image_common_get_metadata(
+ Manager *m,
+ sd_bus_message *message,
+ const char *name_or_path,
+ Image *image,
+ sd_bus_error *error) {
+
+ _cleanup_(portable_metadata_unrefp) PortableMetadata *os_release = NULL;
+ _cleanup_hashmap_free_ Hashmap *unit_files = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ PortableMetadata **sorted = NULL;
+ _cleanup_strv_free_ char **matches = NULL;
+ size_t i;
+ int r;
+
+ assert(name_or_path || image);
+ assert(message);
+
+ if (!m) {
+ assert(image);
+ m = image->userdata;
+ }
+
+ r = sd_bus_message_read_strv(message, &matches);
+ if (r < 0)
+ return r;
+
+ r = bus_image_acquire(m,
+ message,
+ name_or_path,
+ image,
+ BUS_IMAGE_AUTHENTICATE_BY_PATH,
+ "org.freedesktop.portable1.inspect-images",
+ &image,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0) /* Will call us back */
+ return 1;
+
+ r = portable_extract(
+ image->path,
+ matches,
+ &os_release,
+ &unit_files,
+ error);
+ if (r < 0)
+ return r;
+
+ r = portable_metadata_hashmap_to_sorted_array(unit_files, &sorted);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", image->path);
+ if (r < 0)
+ return r;
+
+ r = append_fd(reply, os_release);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "{say}");
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < hashmap_size(unit_files); i++) {
+
+ r = sd_bus_message_open_container(reply, 'e', "say");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", sorted[i]->name);
+ if (r < 0)
+ return r;
+
+ r = append_fd(reply, sorted[i]);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static int bus_image_method_get_metadata(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_image_common_get_metadata(NULL, message, NULL, userdata, error);
+}
+
+static int bus_image_method_get_state(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Image *image = userdata;
+ PortableState state;
+ int r;
+
+ assert(message);
+ assert(image);
+
+ r = portable_get_state(
+ sd_bus_message_get_bus(message),
+ image->path,
+ 0,
+ &state,
+ error);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "s", portable_state_to_string(state));
+}
+
+int bus_image_common_attach(
+ Manager *m,
+ sd_bus_message *message,
+ const char *name_or_path,
+ Image *image,
+ sd_bus_error *error) {
+
+ _cleanup_strv_free_ char **matches = NULL;
+ PortableChange *changes = NULL;
+ PortableFlags flags = 0;
+ const char *profile, *copy_mode;
+ size_t n_changes = 0;
+ int runtime, r;
+
+ assert(message);
+ assert(name_or_path || image);
+
+ if (!m) {
+ assert(image);
+ m = image->userdata;
+ }
+
+ r = sd_bus_message_read_strv(message, &matches);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "sbs", &profile, &runtime, &copy_mode);
+ if (r < 0)
+ return r;
+
+ if (streq(copy_mode, "symlink"))
+ flags |= PORTABLE_PREFER_SYMLINK;
+ else if (streq(copy_mode, "copy"))
+ flags |= PORTABLE_PREFER_COPY;
+ else if (!isempty(copy_mode))
+ return sd_bus_reply_method_errorf(message, SD_BUS_ERROR_INVALID_ARGS, "Unknown copy mode '%s'", copy_mode);
+
+ if (runtime)
+ flags |= PORTABLE_RUNTIME;
+
+ r = bus_image_acquire(m,
+ message,
+ name_or_path,
+ image,
+ BUS_IMAGE_AUTHENTICATE_ALL,
+ "org.freedesktop.portable1.attach-images",
+ &image,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0) /* Will call us back */
+ return 1;
+
+ r = portable_attach(
+ sd_bus_message_get_bus(message),
+ image->path,
+ matches,
+ profile,
+ flags,
+ &changes,
+ &n_changes,
+ error);
+ if (r < 0)
+ goto finish;
+
+ r = reply_portable_changes(message, changes, n_changes);
+
+finish:
+ portable_changes_free(changes, n_changes);
+ return r;
+}
+
+static int bus_image_method_attach(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_image_common_attach(NULL, message, NULL, userdata, error);
+}
+
+static int bus_image_method_detach(
+ sd_bus_message *message,
+ void *userdata,
+ sd_bus_error *error) {
+
+ PortableChange *changes = NULL;
+ Image *image = userdata;
+ Manager *m = image->userdata;
+ size_t n_changes = 0;
+ int r, runtime;
+
+ assert(message);
+ assert(image);
+ assert(m);
+
+ r = sd_bus_message_read(message, "b", &runtime);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ "org.freedesktop.portable1.attach-images",
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = portable_detach(
+ sd_bus_message_get_bus(message),
+ image->path,
+ runtime ? PORTABLE_RUNTIME : 0,
+ &changes,
+ &n_changes,
+ error);
+ if (r < 0)
+ goto finish;
+
+ r = reply_portable_changes(message, changes, n_changes);
+
+finish:
+ portable_changes_free(changes, n_changes);
+ return r;
+}
+
+int bus_image_common_remove(
+ Manager *m,
+ sd_bus_message *message,
+ const char *name_or_path,
+ Image *image,
+ sd_bus_error *error) {
+
+ _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 };
+ _cleanup_(sigkill_waitp) pid_t child = 0;
+ PortableState state;
+ int r;
+
+ assert(message);
+ assert(name_or_path || image);
+
+ if (!m) {
+ assert(image);
+ m = image->userdata;
+ }
+
+ if (m->n_operations >= OPERATIONS_MAX)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_LIMITS_EXCEEDED, "Too many ongoing operations.");
+
+ r = bus_image_acquire(m,
+ message,
+ name_or_path,
+ image,
+ BUS_IMAGE_AUTHENTICATE_ALL,
+ "org.freedesktop.portable1.manage-images",
+ &image,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = portable_get_state(
+ sd_bus_message_get_bus(message),
+ image->path,
+ 0,
+ &state,
+ error);
+ if (r < 0)
+ return r;
+
+ if (state != PORTABLE_DETACHED)
+ return sd_bus_error_set_errnof(error, EBUSY, "Image '%s' is not detached, refusing.", image->path);
+
+ if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0)
+ return sd_bus_error_set_errnof(error, errno, "Failed to create pipe: %m");
+
+ r = safe_fork("(sd-imgrm)", FORK_RESET_SIGNALS, &child);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to fork(): %m");
+ if (r == 0) {
+ errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]);
+
+ r = image_remove(image);
+ if (r < 0) {
+ (void) write(errno_pipe_fd[1], &r, sizeof(r));
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
+
+ r = operation_new(m, child, message, errno_pipe_fd[0], NULL);
+ if (r < 0)
+ return r;
+
+ child = 0;
+ errno_pipe_fd[0] = -1;
+
+ return 1;
+}
+
+static int bus_image_method_remove(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_image_common_remove(NULL, message, NULL, userdata, error);
+}
+
+int bus_image_common_mark_read_only(
+ Manager *m,
+ sd_bus_message *message,
+ const char *name_or_path,
+ Image *image,
+ sd_bus_error *error) {
+
+ int r, read_only;
+
+ assert(message);
+ assert(name_or_path || image);
+
+ if (!m) {
+ assert(image);
+ m = image->userdata;
+ }
+
+ r = sd_bus_message_read(message, "b", &read_only);
+ if (r < 0)
+ return r;
+
+ r = bus_image_acquire(m,
+ message,
+ name_or_path,
+ image,
+ BUS_IMAGE_AUTHENTICATE_ALL,
+ "org.freedesktop.portable1.manage-images",
+ &image,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = image_read_only(image, read_only);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int bus_image_method_mark_read_only(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_image_common_mark_read_only(NULL, message, NULL, userdata, error);
+}
+
+int bus_image_common_set_limit(
+ Manager *m,
+ sd_bus_message *message,
+ const char *name_or_path,
+ Image *image,
+ sd_bus_error *error) {
+
+ uint64_t limit;
+ int r;
+
+ assert(message);
+ assert(name_or_path || image);
+
+ if (!m) {
+ assert(image);
+ m = image->userdata;
+ }
+
+ r = sd_bus_message_read(message, "t", &limit);
+ if (r < 0)
+ return r;
+ if (!FILE_SIZE_VALID_OR_INFINITY(limit))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "New limit out of range");
+
+ r = bus_image_acquire(m,
+ message,
+ name_or_path,
+ image,
+ BUS_IMAGE_AUTHENTICATE_ALL,
+ "org.freedesktop.portable1.manage-images",
+ &image,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Will call us back */
+
+ r = image_set_limit(image, limit);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int bus_image_method_set_limit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_image_common_set_limit(NULL, message, NULL, userdata, error);
+}
+
+const sd_bus_vtable image_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Name", "s", NULL, offsetof(Image, name), 0),
+ SD_BUS_PROPERTY("Path", "s", NULL, offsetof(Image, path), 0),
+ SD_BUS_PROPERTY("Type", "s", property_get_type, offsetof(Image, type), 0),
+ SD_BUS_PROPERTY("ReadOnly", "b", bus_property_get_bool, offsetof(Image, read_only), 0),
+ SD_BUS_PROPERTY("CreationTimestamp", "t", NULL, offsetof(Image, crtime), 0),
+ SD_BUS_PROPERTY("ModificationTimestamp", "t", NULL, offsetof(Image, mtime), 0),
+ SD_BUS_PROPERTY("Usage", "t", NULL, offsetof(Image, usage), 0),
+ SD_BUS_PROPERTY("Limit", "t", NULL, offsetof(Image, limit), 0),
+ SD_BUS_PROPERTY("UsageExclusive", "t", NULL, offsetof(Image, usage_exclusive), 0),
+ SD_BUS_PROPERTY("LimitExclusive", "t", NULL, offsetof(Image, limit_exclusive), 0),
+ SD_BUS_METHOD("GetOSRelease", NULL, "a{ss}", bus_image_method_get_os_release, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetMetadata", "as", "saya{say}", bus_image_method_get_metadata, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetState", NULL, "s", bus_image_method_get_state, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Attach", "assbs", "a(sss)", bus_image_method_attach, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Detach", "b", "a(sss)", bus_image_method_detach, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Remove", NULL, NULL, bus_image_method_remove, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("MarkReadOnly", "b", NULL, bus_image_method_mark_read_only, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("SetLimit", "t", NULL, bus_image_method_set_limit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_VTABLE_END
+};
+
+int bus_image_path(Image *image, char **ret) {
+ assert(image);
+ assert(ret);
+
+ if (!image->discoverable)
+ return -EINVAL;
+
+ return sd_bus_path_encode("/org/freedesktop/portable1/image", image->name, ret);
+}
+
+int bus_image_acquire(
+ Manager *m,
+ sd_bus_message *message,
+ const char *name_or_path,
+ Image *image,
+ ImageAcquireMode mode,
+ const char *polkit_action,
+ Image **ret,
+ sd_bus_error *error) {
+
+ _cleanup_(image_unrefp) Image *loaded = NULL;
+ Image *cached;
+ int r;
+
+ assert(m);
+ assert(message);
+ assert(name_or_path || image);
+ assert(mode >= 0);
+ assert(mode < _BUS_IMAGE_ACQUIRE_MODE_MAX);
+ assert(polkit_action || mode == BUS_IMAGE_REFUSE_BY_PATH);
+ assert(ret);
+
+ /* Acquires an 'Image' object if not acquired yet, and enforces necessary authentication while doing so. */
+
+ if (mode == BUS_IMAGE_AUTHENTICATE_ALL) {
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ polkit_action,
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0) { /* Will call us back */
+ *ret = NULL;
+ return 0;
+ }
+ }
+
+ /* Already passed in? */
+ if (image) {
+ *ret = image;
+ return 1;
+ }
+
+ /* Let's see if this image is already cached? */
+ cached = manager_image_cache_get(m, name_or_path);
+ if (cached) {
+ *ret = cached;
+ return 1;
+ }
+
+ if (image_name_is_valid(name_or_path)) {
+
+ /* If it's a short name, let's search for it */
+ r = image_find(IMAGE_PORTABLE, name_or_path, &loaded);
+ if (r == -ENOENT)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_PORTABLE_IMAGE, "No image '%s' found.", name_or_path);
+
+ /* other errors are handled below… */
+ } else {
+ /* Don't accept path if this is always forbidden */
+ if (mode == BUS_IMAGE_REFUSE_BY_PATH)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Expected image name, not path in place of '%s'.", name_or_path);
+
+ if (!path_is_absolute(name_or_path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Image name '%s' is not valid or not a valid path.", name_or_path);
+
+ if (!path_is_normalized(name_or_path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Image path '%s' is not normalized.", name_or_path);
+
+ if (mode == BUS_IMAGE_AUTHENTICATE_BY_PATH) {
+ r = bus_verify_polkit_async(
+ message,
+ CAP_SYS_ADMIN,
+ polkit_action,
+ NULL,
+ false,
+ UID_INVALID,
+ &m->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0) { /* Will call us back */
+ *ret = NULL;
+ return 0;
+ }
+ }
+
+ r = image_from_path(name_or_path, &loaded);
+ }
+ if (r == -EMEDIUMTYPE) {
+ sd_bus_error_setf(error, BUS_ERROR_BAD_PORTABLE_IMAGE_TYPE, "Typ of image '%s' not recognized; supported image types are directories/btrfs subvolumes, block devices, and raw disk image files with suffix '.raw'.", name_or_path);
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ /* Add what we just loaded to the cache. This has as side-effect that the object stays in memory until the
+ * cache is purged again, i.e. at least for the current event loop iteration, which is all we need, and which
+ * means we don't actually need to ref the return object. */
+ r = manager_image_cache_add(m, loaded);
+ if (r < 0)
+ return r;
+
+ *ret = loaded;
+ return 1;
+}
+
+int bus_image_object_find(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ void *userdata,
+ void **found,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *e = NULL;
+ Manager *m = userdata;
+ Image *image = NULL;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+
+ r = sd_bus_path_decode(path, "/org/freedesktop/portable1/image", &e);
+ if (r < 0)
+ return 0;
+ if (r == 0)
+ goto not_found;
+
+ r = bus_image_acquire(m, sd_bus_get_current_message(bus), e, NULL, BUS_IMAGE_REFUSE_BY_PATH, NULL, &image, error);
+ if (r == -ENOENT)
+ goto not_found;
+ if (r < 0)
+ return r;
+
+ *found = image;
+ return 1;
+
+not_found:
+ *found = NULL;
+ return 0;
+}
+
+int bus_image_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_hashmap_free_ Hashmap *images = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ size_t n_allocated = 0, n = 0;
+ Manager *m = userdata;
+ Image *image;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(nodes);
+
+ images = hashmap_new(&image_hash_ops);
+ if (!images)
+ return -ENOMEM;
+
+ r = manager_image_cache_discover(m, images, error);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(image, images) {
+ char *p;
+
+ r = bus_image_path(image, &p);
+ if (r < 0)
+ return r;
+
+ if (!GREEDY_REALLOC(l, n_allocated, n+2)) {
+ free(p);
+ return -ENOMEM;
+ }
+
+ l[n++] = p;
+ l[n] = NULL;
+ }
+
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
diff --git a/src/portable/portabled-image-bus.h b/src/portable/portabled-image-bus.h
new file mode 100644
index 0000000..aa2a3ad
--- /dev/null
+++ b/src/portable/portabled-image-bus.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "machine-image.h"
+#include "portabled.h"
+
+int bus_image_common_get_os_release(Manager *m, sd_bus_message *message, const char *name_or_path, Image *image, sd_bus_error *error);
+int bus_image_common_get_metadata(Manager *m, sd_bus_message *message, const char *name_or_path, Image *image, sd_bus_error *error);
+int bus_image_common_attach(Manager *m, sd_bus_message *message, const char *name_or_path, Image *image, sd_bus_error *error);
+int bus_image_common_remove(Manager *m, sd_bus_message *message, const char *name_or_path, Image *image, sd_bus_error *error);
+int bus_image_common_mark_read_only(Manager *m, sd_bus_message *message, const char *name_or_path, Image *image, sd_bus_error *error);
+int bus_image_common_set_limit(Manager *m, sd_bus_message *message, const char *name_or_path, Image *image, sd_bus_error *error);
+
+extern const sd_bus_vtable image_vtable[];
+
+int bus_image_path(Image *image, char **ret);
+
+/* So here's some complexity: some of operations can either take an image name, or a fully qualified file system path
+ * to an image. We need to authenticate differently when processing these two: images referenced via simple image names
+ * mean the images are located in the image search path and thus safe for limited read access for unprivileged
+ * clients. For operations on images located anywhere else we need explicit authentication however, so that
+ * unprivileged clients can't make us open arbitrary files in the file system.
+ *
+ * The "Image" bus objects directly represent images in the image search path, but do not exist for path-referenced
+ * images. Hence, when requesting a bus object we need to refuse references by file system path, but still allow
+ * references by image name. Depending on the operation to execute potentially we need to authenticate in all cases. */
+
+typedef enum ImageAcquireMode {
+ BUS_IMAGE_REFUSE_BY_PATH, /* allow by name + prohibit by path */
+ BUS_IMAGE_AUTHENTICATE_BY_PATH, /* allow by name + polkit by path */
+ BUS_IMAGE_AUTHENTICATE_ALL, /* polkit by name + polkit by path */
+ _BUS_IMAGE_ACQUIRE_MODE_MAX,
+ _BUS_IMAGE_ACQUIRE_MODE_INVALID = -1
+} ImageAcquireMode;
+
+int bus_image_acquire(Manager *m, sd_bus_message *message, const char *name_or_path, Image *image, ImageAcquireMode mode, const char *polkit_action, Image **ret, sd_bus_error *error);
+
+int bus_image_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error);
+int bus_image_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error);
diff --git a/src/portable/portabled-image.c b/src/portable/portabled-image.c
new file mode 100644
index 0000000..b025c20
--- /dev/null
+++ b/src/portable/portabled-image.c
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "portable.h"
+#include "portabled-image.h"
+#include "portabled.h"
+
+Image *manager_image_cache_get(Manager *m, const char *name_or_path) {
+ assert(m);
+
+ return hashmap_get(m->image_cache, name_or_path);
+}
+
+static int image_cache_flush(sd_event_source *s, void *userdata) {
+ Manager *m = userdata;
+
+ assert(s);
+ assert(m);
+
+ hashmap_clear(m->image_cache);
+ return 0;
+}
+
+static int manager_image_cache_initialize(Manager *m) {
+ int r;
+
+ assert(m);
+
+ r = hashmap_ensure_allocated(&m->image_cache, &image_hash_ops);
+ if (r < 0)
+ return r;
+
+ /* We flush the cache as soon as we are idle again */
+ if (!m->image_cache_defer_event) {
+ r = sd_event_add_defer(m->event, &m->image_cache_defer_event, image_cache_flush, m);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(m->image_cache_defer_event, SD_EVENT_PRIORITY_IDLE);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_source_set_enabled(m->image_cache_defer_event, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int manager_image_cache_add(Manager *m, Image *image) {
+ int r;
+
+ assert(m);
+
+ /* We add the specified image to the cache under two keys.
+ *
+ * 1. Always under its path
+ *
+ * 2. If the image was discovered in the search path (i.e. its discoverable boolean set) we'll also add it
+ * under its short name.
+ */
+
+ r = manager_image_cache_initialize(m);
+ if (r < 0)
+ return r;
+
+ image->userdata = m;
+
+ r = hashmap_put(m->image_cache, image->path, image);
+ if (r < 0)
+ return r;
+
+ image_ref(image);
+
+ if (image->discoverable) {
+ r = hashmap_put(m->image_cache, image->name, image);
+ if (r < 0)
+ return r;
+
+ image_ref(image);
+ }
+
+ return 0;
+}
+
+int manager_image_cache_discover(Manager *m, Hashmap *images, sd_bus_error *error) {
+ Image *image;
+ int r;
+
+ assert(m);
+
+ /* A wrapper around image_discover() (for finding images in search path) and portable_discover_attached() (for
+ * finding attached images). */
+
+ r = image_discover(IMAGE_PORTABLE, images);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(image, images)
+ (void) manager_image_cache_add(m, image);
+
+ return 0;
+}
diff --git a/src/portable/portabled-image.h b/src/portable/portabled-image.h
new file mode 100644
index 0000000..eeefffe
--- /dev/null
+++ b/src/portable/portabled-image.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "hashmap.h"
+#include "machine-image.h"
+#include "portabled.h"
+
+Image *manager_image_cache_get(Manager *m, const char *name_or_path);
+
+int manager_image_cache_add(Manager *m, Image *image);
+
+int manager_image_cache_discover(Manager *m, Hashmap *images, sd_bus_error *error);
diff --git a/src/portable/portabled-operation.c b/src/portable/portabled-operation.c
new file mode 100644
index 0000000..848b784
--- /dev/null
+++ b/src/portable/portabled-operation.c
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "portabled-operation.h"
+#include "process-util.h"
+
+static int operation_done(sd_event_source *s, const siginfo_t *si, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ Operation *o = userdata;
+ int r;
+
+ assert(o);
+ assert(si);
+
+ log_debug("Operating " PID_FMT " is now complete with code=%s status=%i",
+ o->pid,
+ sigchld_code_to_string(si->si_code), si->si_status);
+
+ o->pid = 0;
+
+ if (si->si_code != CLD_EXITED) {
+ r = sd_bus_error_setf(&error, SD_BUS_ERROR_FAILED, "Child died abnormally.");
+ goto fail;
+ }
+
+ if (si->si_status == EXIT_SUCCESS)
+ r = 0;
+ else if (read(o->errno_fd, &r, sizeof(r)) != sizeof(r)) { /* Try to acquire error code for failed operation */
+ r = sd_bus_error_setf(&error, SD_BUS_ERROR_FAILED, "Child failed.");
+ goto fail;
+ }
+
+ if (o->done) {
+ /* A completion routine is set for this operation, call it. */
+ r = o->done(o, r, &error);
+ if (r < 0) {
+ if (!sd_bus_error_is_set(&error))
+ sd_bus_error_set_errno(&error, r);
+
+ goto fail;
+ }
+
+ } else {
+ /* The default operation when done is to simply return an error on failure or an empty success
+ * message on success. */
+ if (r < 0) {
+ sd_bus_error_set_errno(&error, r);
+ goto fail;
+ }
+
+ r = sd_bus_reply_method_return(o->message, NULL);
+ if (r < 0)
+ log_error_errno(r, "Failed to reply to message: %m");
+ }
+
+ operation_free(o);
+ return 0;
+
+fail:
+ r = sd_bus_reply_method_error(o->message, &error);
+ if (r < 0)
+ log_error_errno(r, "Failed to reply to message: %m");
+
+ operation_free(o);
+ return 0;
+}
+
+int operation_new(Manager *manager, pid_t child, sd_bus_message *message, int errno_fd, Operation **ret) {
+ Operation *o;
+ int r;
+
+ assert(manager);
+ assert(child > 1);
+ assert(message);
+ assert(errno_fd >= 0);
+
+ o = new0(Operation, 1);
+ if (!o)
+ return -ENOMEM;
+
+ o->extra_fd = -1;
+
+ r = sd_event_add_child(manager->event, &o->event_source, child, WEXITED, operation_done, o);
+ if (r < 0) {
+ free(o);
+ return r;
+ }
+
+ o->pid = child;
+ o->message = sd_bus_message_ref(message);
+ o->errno_fd = errno_fd;
+
+ LIST_PREPEND(operations, manager->operations, o);
+ manager->n_operations++;
+ o->manager = manager;
+
+ log_debug("Started new operation " PID_FMT ".", child);
+
+ /* At this point we took ownership of both the child and the errno file descriptor! */
+
+ if (ret)
+ *ret = o;
+
+ return 0;
+}
+
+Operation *operation_free(Operation *o) {
+ if (!o)
+ return NULL;
+
+ sd_event_source_unref(o->event_source);
+
+ safe_close(o->errno_fd);
+ safe_close(o->extra_fd);
+
+ if (o->pid > 1)
+ (void) sigkill_wait(o->pid);
+
+ sd_bus_message_unref(o->message);
+
+ if (o->manager) {
+ LIST_REMOVE(operations, o->manager->operations, o);
+ o->manager->n_operations--;
+ }
+
+ return mfree(o);
+}
diff --git a/src/portable/portabled-operation.h b/src/portable/portabled-operation.h
new file mode 100644
index 0000000..f64740e
--- /dev/null
+++ b/src/portable/portabled-operation.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+#include "sd-bus.h"
+#include "sd-event.h"
+
+#include "list.h"
+
+typedef struct Operation Operation;
+
+#include "portabled.h"
+
+#define OPERATIONS_MAX 64
+
+struct Operation {
+ Manager *manager;
+ pid_t pid;
+ sd_bus_message *message;
+ int errno_fd;
+ int extra_fd;
+ sd_event_source *event_source;
+ int (*done)(Operation *o, int ret, sd_bus_error *error);
+ LIST_FIELDS(Operation, operations);
+};
+
+int operation_new(Manager *manager, pid_t child, sd_bus_message *message, int errno_fd, Operation **ret);
+Operation *operation_free(Operation *o);
diff --git a/src/portable/portabled.c b/src/portable/portabled.c
new file mode 100644
index 0000000..f008f84
--- /dev/null
+++ b/src/portable/portabled.c
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "sd-bus.h"
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "bus-log-control-api.h"
+#include "bus-polkit.h"
+#include "def.h"
+#include "main-func.h"
+#include "portabled-bus.h"
+#include "portabled-image-bus.h"
+#include "portabled.h"
+#include "process-util.h"
+#include "signal-util.h"
+
+static Manager* manager_unref(Manager *m);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_unref);
+
+static int manager_new(Manager **ret) {
+ _cleanup_(manager_unrefp) Manager *m = NULL;
+ int r;
+
+ assert(ret);
+
+ m = new0(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_set_watchdog(m->event, true);
+
+ *ret = TAKE_PTR(m);
+ return 0;
+}
+
+static Manager* manager_unref(Manager *m) {
+ assert(m);
+
+ hashmap_free(m->image_cache);
+
+ sd_event_source_unref(m->image_cache_defer_event);
+
+ bus_verify_polkit_async_registry_free(m->polkit_registry);
+
+ sd_bus_flush_close_unref(m->bus);
+ sd_event_unref(m->event);
+
+ return mfree(m);
+}
+
+static int manager_connect_bus(Manager *m) {
+ int r;
+
+ assert(m);
+ assert(!m->bus);
+
+ r = sd_bus_default_system(&m->bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to system bus: %m");
+
+ r = sd_bus_add_object_vtable(m->bus, NULL, "/org/freedesktop/portable1", "org.freedesktop.portable1.Manager", manager_vtable, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add manager object vtable: %m");
+
+ r = sd_bus_add_fallback_vtable(m->bus, NULL, "/org/freedesktop/portable1/image", "org.freedesktop.portable1.Image", image_vtable, bus_image_object_find, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add image object vtable: %m");
+
+ r = sd_bus_add_node_enumerator(m->bus, NULL, "/org/freedesktop/portable1/image", bus_image_node_enumerator, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add image enumerator: %m");
+
+ r = bus_log_control_api_register(m->bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_request_name_async(m->bus, NULL, "org.freedesktop.portable1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(m->bus, m->event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ (void) sd_bus_set_exit_on_disconnect(m->bus, true);
+
+ return 0;
+}
+
+static int manager_startup(Manager *m) {
+ int r;
+
+ assert(m);
+
+ r = manager_connect_bus(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static bool check_idle(void *userdata) {
+ Manager *m = userdata;
+
+ return !m->operations;
+}
+
+static int manager_run(Manager *m) {
+ assert(m);
+
+ return bus_event_loop_with_idle(
+ m->event,
+ m->bus,
+ "org.freedesktop.portable1",
+ DEFAULT_EXIT_USEC,
+ check_idle, m);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(manager_unrefp) Manager *m = NULL;
+ int r;
+
+ log_setup_service();
+
+ umask(0022);
+
+ if (argc != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program takes no arguments.");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, SIGTERM, SIGINT, -1) >= 0);
+
+ r = manager_new(&m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate manager object: %m");
+
+ r = manager_startup(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to fully start up daemon: %m");
+
+ log_debug("systemd-portabled running as pid " PID_FMT, getpid_cached());
+ sd_notify(false,
+ "READY=1\n"
+ "STATUS=Processing requests...");
+
+ r = manager_run(m);
+
+ (void) sd_notify(false,
+ "STOPPING=1\n"
+ "STATUS=Shutting down...");
+ log_debug("systemd-portabled stopped as pid " PID_FMT, getpid_cached());
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/portable/portabled.h b/src/portable/portabled.h
new file mode 100644
index 0000000..03a9996
--- /dev/null
+++ b/src/portable/portabled.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-event.h"
+
+#include "hashmap.h"
+#include "list.h"
+
+typedef struct Manager Manager;
+
+#include "portabled-operation.h"
+
+struct Manager {
+ sd_event *event;
+ sd_bus *bus;
+
+ Hashmap *polkit_registry;
+
+ Hashmap *image_cache;
+ sd_event_source *image_cache_defer_event;
+
+ LIST_HEAD(Operation, operations);
+ unsigned n_operations;
+};
diff --git a/src/portable/profile/default/service.conf b/src/portable/profile/default/service.conf
new file mode 100644
index 0000000..792be50
--- /dev/null
+++ b/src/portable/profile/default/service.conf
@@ -0,0 +1,32 @@
+# The "default" security profile for services, i.e. a number of useful restrictions
+
+[Service]
+MountAPIVFS=yes
+TemporaryFileSystem=/run
+BindReadOnlyPaths=/run/systemd/notify
+BindReadOnlyPaths=/dev/log /run/systemd/journal/socket /run/systemd/journal/stdout
+BindReadOnlyPaths=/etc/machine-id
+BindReadOnlyPaths=/etc/resolv.conf
+BindReadOnlyPaths=/run/dbus/system_bus_socket
+DynamicUser=yes
+RemoveIPC=yes
+CapabilityBoundingSet=CAP_CHOWN CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_FOWNER \
+ CAP_FSETID CAP_IPC_LOCK CAP_IPC_OWNER CAP_KILL CAP_MKNOD CAP_NET_ADMIN \
+ CAP_NET_BIND_SERVICE CAP_NET_BROADCAST CAP_SETGID CAP_SETPCAP \
+ CAP_SETUID CAP_SYS_ADMIN CAP_SYS_CHROOT CAP_SYS_NICE CAP_SYS_RESOURCE
+PrivateTmp=yes
+PrivateDevices=yes
+PrivateUsers=yes
+ProtectSystem=strict
+ProtectHome=yes
+ProtectKernelTunables=yes
+ProtectKernelModules=yes
+ProtectControlGroups=yes
+RestrictAddressFamilies=AF_UNIX AF_NETLINK AF_INET AF_INET6
+LockPersonality=yes
+MemoryDenyWriteExecute=yes
+RestrictRealtime=yes
+RestrictNamespaces=yes
+SystemCallFilter=@system-service
+SystemCallErrorNumber=EPERM
+SystemCallArchitectures=native
diff --git a/src/portable/profile/nonetwork/service.conf b/src/portable/profile/nonetwork/service.conf
new file mode 100644
index 0000000..c81cebe
--- /dev/null
+++ b/src/portable/profile/nonetwork/service.conf
@@ -0,0 +1,32 @@
+# The "nonetwork" security profile for services, i.e. like "default" but without networking
+
+[Service]
+MountAPIVFS=yes
+TemporaryFileSystem=/run
+BindReadOnlyPaths=/run/systemd/notify
+BindReadOnlyPaths=/dev/log /run/systemd/journal/socket /run/systemd/journal/stdout
+BindReadOnlyPaths=/etc/machine-id
+BindReadOnlyPaths=/run/dbus/system_bus_socket
+DynamicUser=yes
+RemoveIPC=yes
+CapabilityBoundingSet=CAP_CHOWN CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_FOWNER \
+ CAP_FSETID CAP_IPC_LOCK CAP_IPC_OWNER CAP_KILL CAP_MKNOD CAP_SETGID CAP_SETPCAP \
+ CAP_SETUID CAP_SYS_ADMIN CAP_SYS_CHROOT CAP_SYS_NICE CAP_SYS_RESOURCE
+PrivateTmp=yes
+PrivateDevices=yes
+PrivateUsers=yes
+ProtectSystem=strict
+ProtectHome=yes
+ProtectKernelTunables=yes
+ProtectKernelModules=yes
+ProtectControlGroups=yes
+RestrictAddressFamilies=AF_UNIX AF_NETLINK
+LockPersonality=yes
+MemoryDenyWriteExecute=yes
+RestrictRealtime=yes
+RestrictNamespaces=yes
+SystemCallFilter=@system-service
+SystemCallErrorNumber=EPERM
+SystemCallArchitectures=native
+PrivateNetwork=yes
+IPAddressDeny=any
diff --git a/src/portable/profile/strict/service.conf b/src/portable/profile/strict/service.conf
new file mode 100644
index 0000000..d10fb5a
--- /dev/null
+++ b/src/portable/profile/strict/service.conf
@@ -0,0 +1,31 @@
+# The "strict" security profile for services, all options turned on
+
+[Service]
+MountAPIVFS=yes
+TemporaryFileSystem=/run
+BindReadOnlyPaths=/run/systemd/notify
+BindReadOnlyPaths=/dev/log /run/systemd/journal/socket /run/systemd/journal/stdout
+BindReadOnlyPaths=/etc/machine-id
+DynamicUser=yes
+RemoveIPC=yes
+CapabilityBoundingSet=
+PrivateTmp=yes
+PrivateDevices=yes
+PrivateUsers=yes
+ProtectSystem=strict
+ProtectHome=yes
+ProtectKernelTunables=yes
+ProtectKernelModules=yes
+ProtectControlGroups=yes
+RestrictAddressFamilies=AF_UNIX
+LockPersonality=yes
+NoNewPrivileges=yes
+MemoryDenyWriteExecute=yes
+RestrictRealtime=yes
+RestrictNamespaces=yes
+SystemCallFilter=@system-service
+SystemCallErrorNumber=EPERM
+SystemCallArchitectures=native
+PrivateNetwork=yes
+IPAddressDeny=any
+TasksMax=4
diff --git a/src/portable/profile/trusted/service.conf b/src/portable/profile/trusted/service.conf
new file mode 100644
index 0000000..9a6af70
--- /dev/null
+++ b/src/portable/profile/trusted/service.conf
@@ -0,0 +1,7 @@
+# The "trusted" profile for services, i.e. no restrictions are applied
+
+[Service]
+MountAPIVFS=yes
+BindPaths=/run
+BindReadOnlyPaths=/etc/machine-id
+BindReadOnlyPaths=/etc/resolv.conf
diff --git a/src/pstore/meson.build b/src/pstore/meson.build
new file mode 100644
index 0000000..6c0ab05
--- /dev/null
+++ b/src/pstore/meson.build
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+systemd_pstore_sources = files('''
+ pstore.c
+'''.split())
+
+if conf.get('ENABLE_PSTORE') == 1 and install_sysconfdir
+ install_data('pstore.conf',
+ install_dir : pkgsysconfdir)
+endif
diff --git a/src/pstore/pstore.c b/src/pstore/pstore.c
new file mode 100644
index 0000000..db8a71f
--- /dev/null
+++ b/src/pstore/pstore.c
@@ -0,0 +1,408 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/* Copyright © 2019 Oracle and/or its affiliates. */
+
+/* Generally speaking, the pstore contains a small number of files
+ * that in turn contain a small amount of data. */
+#include <errno.h>
+#include <stdio.h>
+#include <stdio_ext.h>
+#include <sys/prctl.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+#include "sd-journal.h"
+#include "sd-login.h"
+#include "sd-messages.h"
+
+#include "acl-util.h"
+#include "alloc-util.h"
+#include "capability-util.h"
+#include "cgroup-util.h"
+#include "compress.h"
+#include "conf-parser.h"
+#include "copy.h"
+#include "dirent-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "journal-importer.h"
+#include "log.h"
+#include "macro.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "special.h"
+#include "sort-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+
+/* Command line argument handling */
+typedef enum PStoreStorage {
+ PSTORE_STORAGE_NONE,
+ PSTORE_STORAGE_EXTERNAL,
+ PSTORE_STORAGE_JOURNAL,
+ _PSTORE_STORAGE_MAX,
+ _PSTORE_STORAGE_INVALID = -1
+} PStoreStorage;
+
+static const char* const pstore_storage_table[_PSTORE_STORAGE_MAX] = {
+ [PSTORE_STORAGE_NONE] = "none",
+ [PSTORE_STORAGE_EXTERNAL] = "external",
+ [PSTORE_STORAGE_JOURNAL] = "journal",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(pstore_storage, PStoreStorage);
+static DEFINE_CONFIG_PARSE_ENUM(config_parse_pstore_storage, pstore_storage, PStoreStorage, "Failed to parse storage setting");
+
+static PStoreStorage arg_storage = PSTORE_STORAGE_EXTERNAL;
+
+static bool arg_unlink = true;
+static const char *arg_sourcedir = "/sys/fs/pstore";
+static const char *arg_archivedir = "/var/lib/systemd/pstore";
+
+static int parse_config(void) {
+ static const ConfigTableItem items[] = {
+ { "PStore", "Unlink", config_parse_bool, 0, &arg_unlink },
+ { "PStore", "Storage", config_parse_pstore_storage, 0, &arg_storage },
+ {}
+ };
+
+ return config_parse_many_nulstr(
+ PKGSYSCONFDIR "/pstore.conf",
+ CONF_PATHS_NULSTR("systemd/pstore.conf.d"),
+ "PStore\0",
+ config_item_table_lookup, items,
+ CONFIG_PARSE_WARN,
+ NULL,
+ NULL);
+}
+
+/* File list handling - PStoreEntry is the struct and
+ * and PStoreEntry is the type that contains all info
+ * about a pstore entry. */
+typedef struct PStoreEntry {
+ struct dirent dirent;
+ bool is_binary;
+ bool handled;
+ char *content;
+ size_t content_size;
+} PStoreEntry;
+
+typedef struct PStoreList {
+ PStoreEntry *entries;
+ size_t n_allocated;
+ size_t n_entries;
+} PStoreList;
+
+static void pstore_entries_reset(PStoreList *list) {
+ for (size_t i = 0; i < list->n_entries; i++)
+ free(list->entries[i].content);
+ free(list->entries);
+ list->n_entries = 0;
+}
+
+static int compare_pstore_entries(const PStoreEntry *a, const PStoreEntry *b) {
+ return strcmp(a->dirent.d_name, b->dirent.d_name);
+}
+
+static int move_file(PStoreEntry *pe, const char *subdir) {
+ _cleanup_free_ char *ifd_path = NULL, *ofd_path = NULL;
+ _cleanup_free_ void *field = NULL;
+ const char *suffix, *message;
+ struct iovec iovec[2];
+ int n_iovec = 0, r;
+
+ if (pe->handled)
+ return 0;
+
+ ifd_path = path_join(arg_sourcedir, pe->dirent.d_name);
+ if (!ifd_path)
+ return log_oom();
+
+ ofd_path = path_join(arg_archivedir, subdir, pe->dirent.d_name);
+ if (!ofd_path)
+ return log_oom();
+
+ /* Always log to the journal */
+ suffix = arg_storage == PSTORE_STORAGE_EXTERNAL ? strjoina(" moved to ", ofd_path) : (char *)".";
+ message = strjoina("MESSAGE=PStore ", pe->dirent.d_name, suffix);
+ iovec[n_iovec++] = IOVEC_MAKE_STRING(message);
+
+ if (pe->content_size > 0) {
+ size_t field_size;
+
+ field_size = strlen("FILE=") + pe->content_size;
+ field = malloc(field_size);
+ if (!field)
+ return log_oom();
+ memcpy(stpcpy(field, "FILE="), pe->content, pe->content_size);
+ iovec[n_iovec++] = IOVEC_MAKE(field, field_size);
+ }
+
+ r = sd_journal_sendv(iovec, n_iovec);
+ if (r < 0)
+ return log_error_errno(r, "Failed to log pstore entry: %m");
+
+ if (arg_storage == PSTORE_STORAGE_EXTERNAL) {
+ /* Move file from pstore to external storage */
+ r = mkdir_parents(ofd_path, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create directory %s: %m", ofd_path);
+ r = copy_file_atomic(ifd_path, ofd_path, 0600, 0, 0, COPY_REPLACE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to copy_file_atomic: %s to %s", ifd_path, ofd_path);
+ }
+
+ /* If file copied properly, remove it from pstore */
+ if (arg_unlink)
+ (void) unlink(ifd_path);
+
+ pe->handled = true;
+
+ return 0;
+}
+
+static int write_dmesg(const char *dmesg, size_t size, const char *id) {
+ _cleanup_(unlink_and_freep) char *tmp_path = NULL;
+ _cleanup_free_ char *ofd_path = NULL;
+ _cleanup_close_ int ofd = -1;
+ ssize_t wr;
+ int r;
+
+ if (size == 0)
+ return 0;
+
+ assert(dmesg);
+
+ ofd_path = path_join(arg_archivedir, id, "dmesg.txt");
+ if (!ofd_path)
+ return log_oom();
+
+ ofd = open_tmpfile_linkable(ofd_path, O_CLOEXEC|O_CREAT|O_TRUNC|O_WRONLY, &tmp_path);
+ if (ofd < 0)
+ return log_error_errno(ofd, "Failed to open temporary file %s: %m", ofd_path);
+ wr = write(ofd, dmesg, size);
+ if (wr < 0)
+ return log_error_errno(errno, "Failed to store dmesg to %s: %m", ofd_path);
+ if (wr != (ssize_t)size)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to store dmesg to %s. %zu bytes are lost.", ofd_path, size - wr);
+ r = link_tmpfile(ofd, tmp_path, ofd_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write temporary file %s: %m", ofd_path);
+ tmp_path = mfree(tmp_path);
+
+ return 0;
+}
+
+static void process_dmesg_files(PStoreList *list) {
+ /* Move files, reconstruct dmesg.txt */
+ _cleanup_free_ char *dmesg = NULL, *dmesg_id = NULL;
+ size_t dmesg_size = 0, dmesg_allocated = 0;
+ bool dmesg_bad = false;
+ PStoreEntry *pe;
+
+ /* Handle each dmesg file: files processed in reverse
+ * order so as to properly reconstruct original dmesg */
+ for (size_t n = list->n_entries; n > 0; n--) {
+ bool move_file_and_continue = false;
+ _cleanup_free_ char *pe_id = NULL;
+ char *p;
+ size_t plen;
+
+ pe = &list->entries[n-1];
+
+ if (pe->handled)
+ continue;
+ if (!startswith(pe->dirent.d_name, "dmesg-"))
+ continue;
+
+ if (endswith(pe->dirent.d_name, ".enc.z")) /* indicates a problem */
+ move_file_and_continue = true;
+ p = strrchr(pe->dirent.d_name, '-');
+ if (!p)
+ move_file_and_continue = true;
+
+ if (move_file_and_continue) {
+ /* A dmesg file on which we do NO additional processing */
+ (void) move_file(pe, NULL);
+ continue;
+ }
+
+ /* See if this file is one of a related group of files
+ * in order to reconstruct dmesg */
+
+ /* When dmesg is written into pstore, it is done so in
+ * small chunks, whatever the exchange buffer size is
+ * with the underlying pstore backend (ie. EFI may be
+ * ~2KiB), which means an example pstore with approximately
+ * 64KB of storage may have up to roughly 32 dmesg files
+ * that could be related, depending upon the size of the
+ * original dmesg.
+ *
+ * Here we look at the dmesg filename and try to discern
+ * if files are part of a related group, meaning the same
+ * original dmesg.
+ *
+ * The two known pstore backends are EFI and ERST. These
+ * backends store data in the Common Platform Error
+ * Record, CPER, format. The dmesg- filename contains the
+ * CPER record id, a 64bit number (in decimal notation).
+ * In Linux, the record id is encoded with two digits for
+ * the dmesg part (chunk) number and 3 digits for the
+ * count number. So allowing an additional digit to
+ * compensate for advancing time, this code ignores the
+ * last six digits of the filename in determining the
+ * record id.
+ *
+ * For the EFI backend, the record id encodes an id in the
+ * upper 32 bits, and a timestamp in the lower 32-bits.
+ * So ignoring the least significant 6 digits has proven
+ * to generally identify related dmesg entries. */
+#define PSTORE_FILENAME_IGNORE 6
+
+ /* determine common portion of record id */
+ ++p; /* move beyond dmesg- */
+ plen = strlen(p);
+ if (plen > PSTORE_FILENAME_IGNORE) {
+ pe_id = memdup_suffix0(p, plen - PSTORE_FILENAME_IGNORE);
+ if (!pe_id) {
+ log_oom();
+ return;
+ }
+ } else
+ pe_id = mfree(pe_id);
+
+ /* Now move file from pstore to archive storage */
+ move_file(pe, pe_id);
+
+ if (dmesg_bad)
+ continue;
+
+ /* If the current record id is NOT the same as the
+ * previous record id, then start a new dmesg.txt file */
+ if (!streq_ptr(pe_id, dmesg_id)) {
+ /* Encountered a new dmesg group, close out old one, open new one */
+ (void) write_dmesg(dmesg, dmesg_size, dmesg_id);
+ dmesg_size = 0;
+
+ /* now point dmesg_id to storage of pe_id */
+ free_and_replace(dmesg_id, pe_id);
+ }
+
+ /* Reconstruction of dmesg is done as a useful courtesy: do not fail, but don't write garbled
+ * output either. */
+ size_t needed = strlen(pe->dirent.d_name) + strlen(":\n") + pe->content_size + 1;
+ if (!GREEDY_REALLOC(dmesg, dmesg_allocated, dmesg_size + needed)) {
+ log_oom();
+ dmesg_bad = true;
+ continue;
+ }
+
+ dmesg_size += sprintf(dmesg + dmesg_size, "%s:\n", pe->dirent.d_name);
+ if (pe->content) {
+ memcpy(dmesg + dmesg_size, pe->content, pe->content_size);
+ dmesg_size += pe->content_size;
+ }
+
+ pe_id = mfree(pe_id);
+ }
+
+ if (!dmesg_bad)
+ (void) write_dmesg(dmesg, dmesg_size, dmesg_id);
+}
+
+static int list_files(PStoreList *list, const char *sourcepath) {
+ _cleanup_(closedirp) DIR *dirp = NULL;
+ struct dirent *de;
+ int r;
+
+ dirp = opendir(sourcepath);
+ if (!dirp)
+ return log_error_errno(errno, "Failed to opendir %s: %m", sourcepath);
+
+ FOREACH_DIRENT(de, dirp, return log_error_errno(errno, "Failed to iterate through %s: %m", sourcepath)) {
+ _cleanup_free_ char *ifd_path = NULL;
+
+ ifd_path = path_join(sourcepath, de->d_name);
+ if (!ifd_path)
+ return log_oom();
+
+ _cleanup_free_ char *buf = NULL;
+ size_t buf_size;
+
+ /* Now read contents of pstore file */
+ r = read_full_file(ifd_path, &buf, &buf_size);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to read file %s, skipping: %m", ifd_path);
+ continue;
+ }
+
+ if (!GREEDY_REALLOC(list->entries, list->n_allocated, list->n_entries + 1))
+ return log_oom();
+
+ list->entries[list->n_entries++] = (PStoreEntry) {
+ .dirent = *de,
+ .content = TAKE_PTR(buf),
+ .content_size = buf_size,
+ .is_binary = true,
+ .handled = false,
+ };
+ }
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(pstore_entries_reset) PStoreList list = {};
+ int r;
+
+ log_setup_service();
+
+ if (argc == 3) {
+ arg_sourcedir = argv[1];
+ arg_archivedir = argv[2];
+ } else if (argc > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program takes zero or two arguments.");
+
+ /* Ignore all parse errors */
+ (void) parse_config();
+
+ log_debug("Selected storage: %s.", pstore_storage_to_string(arg_storage));
+ log_debug("Selected unlink: %s.", yes_no(arg_unlink));
+
+ if (arg_storage == PSTORE_STORAGE_NONE)
+ /* Do nothing, intentionally, leaving pstore untouched */
+ return 0;
+
+ /* Obtain list of files in pstore */
+ r = list_files(&list, arg_sourcedir);
+ if (r < 0)
+ return r;
+
+ /* Handle each pstore file */
+ /* Sort files lexigraphically ascending, generally needed by all */
+ typesafe_qsort(list.entries, list.n_entries, compare_pstore_entries);
+
+ /* Process known file types */
+ process_dmesg_files(&list);
+
+ /* Move left over files out of pstore */
+ for (size_t n = 0; n < list.n_entries; n++)
+ move_file(&list.entries[n], NULL);
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/pstore/pstore.conf b/src/pstore/pstore.conf
new file mode 100644
index 0000000..93a8b67
--- /dev/null
+++ b/src/pstore/pstore.conf
@@ -0,0 +1,16 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See pstore.conf(5) for details.
+
+[PStore]
+#Storage=external
+#Unlink=yes
diff --git a/src/quotacheck/quotacheck.c b/src/quotacheck/quotacheck.c
new file mode 100644
index 0000000..d617b0b
--- /dev/null
+++ b/src/quotacheck/quotacheck.c
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "main-func.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "util.h"
+
+static bool arg_skip = false;
+static bool arg_force = false;
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+
+ if (streq(key, "quotacheck.mode")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (streq(value, "auto"))
+ arg_force = arg_skip = false;
+ else if (streq(value, "force"))
+ arg_force = true;
+ else if (streq(value, "skip"))
+ arg_skip = true;
+ else
+ log_warning("Invalid quotacheck.mode= parameter '%s'. Ignoring.", value);
+ }
+
+#if HAVE_SYSV_COMPAT
+ else if (streq(key, "forcequotacheck") && !value) {
+ log_warning("Please use 'quotacheck.mode=force' rather than 'forcequotacheck' on the kernel command line.");
+ arg_force = true;
+ }
+#endif
+
+ return 0;
+}
+
+static void test_files(void) {
+
+#if HAVE_SYSV_COMPAT
+ if (access("/forcequotacheck", F_OK) >= 0) {
+ log_error("Please pass 'quotacheck.mode=force' on the kernel command line rather than creating /forcequotacheck on the root file system.");
+ arg_force = true;
+ }
+#endif
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ log_setup_service();
+
+ if (argc > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program takes no arguments.");
+
+ umask(0022);
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ test_files();
+
+ if (!arg_force) {
+ if (arg_skip)
+ return 0;
+
+ if (access("/run/systemd/quotacheck", F_OK) < 0)
+ return 0;
+ }
+
+ r = safe_fork("(quotacheck)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_RLIMIT_NOFILE_SAFE|FORK_WAIT|FORK_LOG, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ static const char * const cmdline[] = {
+ QUOTACHECK,
+ "-anug",
+ NULL
+ };
+
+ /* Child */
+
+ execv(cmdline[0], (char**) cmdline);
+ _exit(EXIT_FAILURE); /* Operational error */
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/random-seed/random-seed.c b/src/random-seed/random-seed.c
new file mode 100644
index 0000000..8f8766c
--- /dev/null
+++ b/src/random-seed/random-seed.c
@@ -0,0 +1,301 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/random.h>
+#include <sys/ioctl.h>
+#if USE_SYS_RANDOM_H
+# include <sys/random.h>
+#endif
+#include <sys/stat.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "missing_random.h"
+#include "missing_syscall.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "random-util.h"
+#include "string-util.h"
+#include "util.h"
+#include "xattr-util.h"
+
+typedef enum CreditEntropy {
+ CREDIT_ENTROPY_NO_WAY,
+ CREDIT_ENTROPY_YES_PLEASE,
+ CREDIT_ENTROPY_YES_FORCED,
+} CreditEntropy;
+
+static CreditEntropy may_credit(int seed_fd) {
+ _cleanup_free_ char *creditable = NULL;
+ const char *e;
+ int r;
+
+ assert(seed_fd >= 0);
+
+ e = getenv("SYSTEMD_RANDOM_SEED_CREDIT");
+ if (!e) {
+ log_debug("$SYSTEMD_RANDOM_SEED_CREDIT is not set, not crediting entropy.");
+ return CREDIT_ENTROPY_NO_WAY;
+ }
+ if (streq(e, "force")) {
+ log_debug("$SYSTEMD_RANDOM_SEED_CREDIT is set to 'force', crediting entropy.");
+ return CREDIT_ENTROPY_YES_FORCED;
+ }
+
+ r = parse_boolean(e);
+ if (r <= 0) {
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse $SYSTEMD_RANDOM_SEED_CREDIT, not crediting entropy: %m");
+ else
+ log_debug("Crediting entropy is turned off via $SYSTEMD_RANDOM_SEED_CREDIT, not crediting entropy.");
+
+ return CREDIT_ENTROPY_NO_WAY;
+ }
+
+ /* Determine if the file is marked as creditable */
+ r = fgetxattr_malloc(seed_fd, "user.random-seed-creditable", &creditable);
+ if (r < 0) {
+ if (IN_SET(r, -ENODATA, -ENOSYS, -EOPNOTSUPP))
+ log_debug_errno(r, "Seed file is not marked as creditable, not crediting.");
+ else
+ log_warning_errno(r, "Failed to read extended attribute, ignoring: %m");
+
+ return CREDIT_ENTROPY_NO_WAY;
+ }
+
+ r = parse_boolean(creditable);
+ if (r <= 0) {
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse user.random-seed-creditable extended attribute, ignoring: %s", creditable);
+ else
+ log_debug("Seed file is marked as not creditable, not crediting.");
+
+ return CREDIT_ENTROPY_NO_WAY;
+ }
+
+ /* Don't credit the random seed if we are in first-boot mode, because we are supposed to start from
+ * scratch. This is a safety precaution for cases where we people ship "golden" images with empty
+ * /etc but populated /var that contains a random seed. */
+ if (access("/run/systemd/first-boot", F_OK) < 0) {
+
+ if (errno != ENOENT) {
+ log_warning_errno(errno, "Failed to check whether we are in first-boot mode, not crediting entropy: %m");
+ return CREDIT_ENTROPY_NO_WAY;
+ }
+
+ /* If ENOENT all is good, we are not in first-boot mode. */
+ } else {
+ log_debug("Not crediting entropy, since booted in first-boot mode.");
+ return CREDIT_ENTROPY_NO_WAY;
+ }
+
+ return CREDIT_ENTROPY_YES_PLEASE;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_close_ int seed_fd = -1, random_fd = -1;
+ bool read_seed_file, write_seed_file, synchronous;
+ _cleanup_free_ void* buf = NULL;
+ size_t buf_size;
+ struct stat st;
+ ssize_t k;
+ int r;
+
+ log_setup_service();
+
+ if (argc != 2)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program requires one argument.");
+
+ umask(0022);
+
+ buf_size = random_pool_size();
+
+ r = mkdir_parents(RANDOM_SEED, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create directory " RANDOM_SEED_DIR ": %m");
+
+ /* When we load the seed we read it and write it to the device and then immediately update the saved seed with
+ * new data, to make sure the next boot gets seeded differently. */
+
+ if (streq(argv[1], "load")) {
+
+ seed_fd = open(RANDOM_SEED, O_RDWR|O_CLOEXEC|O_NOCTTY|O_CREAT, 0600);
+ if (seed_fd < 0) {
+ int open_rw_error = -errno;
+
+ write_seed_file = false;
+
+ seed_fd = open(RANDOM_SEED, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (seed_fd < 0) {
+ bool missing = errno == ENOENT;
+
+ log_full_errno(missing ? LOG_DEBUG : LOG_ERR,
+ open_rw_error, "Failed to open " RANDOM_SEED " for writing: %m");
+ r = log_full_errno(missing ? LOG_DEBUG : LOG_ERR,
+ errno, "Failed to open " RANDOM_SEED " for reading: %m");
+ return missing ? 0 : r;
+ }
+ } else
+ write_seed_file = true;
+
+ random_fd = open("/dev/urandom", O_RDWR|O_CLOEXEC|O_NOCTTY, 0600);
+ if (random_fd < 0)
+ return log_error_errno(errno, "Failed to open /dev/urandom: %m");
+
+ read_seed_file = true;
+ synchronous = true; /* make this invocation a synchronous barrier for random pool initialization */
+
+ } else if (streq(argv[1], "save")) {
+
+ random_fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (random_fd < 0)
+ return log_error_errno(errno, "Failed to open /dev/urandom: %m");
+
+ seed_fd = open(RANDOM_SEED, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_CREAT, 0600);
+ if (seed_fd < 0)
+ return log_error_errno(errno, "Failed to open " RANDOM_SEED ": %m");
+
+ read_seed_file = false;
+ write_seed_file = true;
+ synchronous = false;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown verb '%s'.", argv[1]);
+
+ if (fstat(seed_fd, &st) < 0)
+ return log_error_errno(errno, "Failed to stat() seed file " RANDOM_SEED ": %m");
+
+ /* If the seed file is larger than what we expect, then honour the existing size and save/restore as much as it says */
+ if ((uint64_t) st.st_size > buf_size)
+ buf_size = MIN(st.st_size, RANDOM_POOL_SIZE_MAX);
+
+ buf = malloc(buf_size);
+ if (!buf)
+ return log_oom();
+
+ if (read_seed_file) {
+ sd_id128_t mid;
+
+ /* First, let's write the machine ID into /dev/urandom, not crediting entropy. Why? As an
+ * extra protection against "golden images" that are put together sloppily, i.e. images which
+ * are duplicated on multiple systems but where the random seed file is not properly
+ * reset. Frequently the machine ID is properly reset on those systems however (simply
+ * because it's easier to notice, if it isn't due to address clashes and so on, while random
+ * seed equivalence is generally not noticed easily), hence let's simply write the machined
+ * ID into the random pool too. */
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ log_debug_errno(r, "Failed to get machine ID, ignoring: %m");
+ else {
+ r = loop_write(random_fd, &mid, sizeof(mid), false);
+ if (r < 0)
+ log_debug_errno(r, "Failed to write machine ID to /dev/urandom, ignoring: %m");
+ }
+
+ k = loop_read(seed_fd, buf, buf_size, false);
+ if (k < 0)
+ log_error_errno(k, "Failed to read seed from " RANDOM_SEED ": %m");
+ else if (k == 0)
+ log_debug("Seed file " RANDOM_SEED " not yet initialized, proceeding.");
+ else {
+ CreditEntropy lets_credit;
+
+ (void) lseek(seed_fd, 0, SEEK_SET);
+
+ lets_credit = may_credit(seed_fd);
+
+ /* Before we credit or use the entropy, let's make sure to securely drop the
+ * creditable xattr from the file, so that we never credit the same random seed
+ * again. Note that further down we'll write a new seed again, and likely mark it as
+ * credible again, hence this is just paranoia to close the short time window between
+ * the time we upload the random seed into the kernel and download the new one from
+ * it. */
+
+ if (fremovexattr(seed_fd, "user.random-seed-creditable") < 0) {
+ if (!IN_SET(errno, ENODATA, ENOSYS, EOPNOTSUPP))
+ log_warning_errno(errno, "Failed to remove extended attribute, ignoring: %m");
+
+ /* Otherwise, there was no creditable flag set, which is OK. */
+ } else {
+ r = fsync_full(seed_fd);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to synchronize seed to disk, not crediting entropy: %m");
+
+ if (lets_credit == CREDIT_ENTROPY_YES_PLEASE)
+ lets_credit = CREDIT_ENTROPY_NO_WAY;
+ }
+ }
+
+ r = random_write_entropy(random_fd, buf, k,
+ IN_SET(lets_credit, CREDIT_ENTROPY_YES_PLEASE, CREDIT_ENTROPY_YES_FORCED));
+ if (r < 0)
+ log_error_errno(r, "Failed to write seed to /dev/urandom: %m");
+ }
+ }
+
+ if (write_seed_file) {
+ bool getrandom_worked = false;
+
+ /* This is just a safety measure. Given that we are root and most likely created the file
+ * ourselves the mode and owner should be correct anyway. */
+ r = fchmod_and_chown(seed_fd, 0600, 0, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust seed file ownership and access mode.");
+
+ /* Let's make this whole job asynchronous, i.e. let's make ourselves a barrier for
+ * proper initialization of the random pool. */
+ k = getrandom(buf, buf_size, GRND_NONBLOCK);
+ if (k < 0 && errno == EAGAIN && synchronous) {
+ log_notice("Kernel entropy pool is not initialized yet, waiting until it is.");
+ k = getrandom(buf, buf_size, 0); /* retry synchronously */
+ }
+ if (k < 0)
+ log_debug_errno(errno, "Failed to read random data with getrandom(), falling back to /dev/urandom: %m");
+ else if ((size_t) k < buf_size)
+ log_debug("Short read from getrandom(), falling back to /dev/urandom: %m");
+ else
+ getrandom_worked = true;
+
+ if (!getrandom_worked) {
+ /* Retry with classic /dev/urandom */
+ k = loop_read(random_fd, buf, buf_size, false);
+ if (k < 0)
+ return log_error_errno(k, "Failed to read new seed from /dev/urandom: %m");
+ if (k == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Got EOF while reading from /dev/urandom.");
+ }
+
+ r = loop_write(seed_fd, buf, (size_t) k, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write new random seed file: %m");
+
+ if (ftruncate(seed_fd, k) < 0)
+ return log_error_errno(r, "Failed to truncate random seed file: %m");
+
+ r = fsync_full(seed_fd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to synchronize seed file: %m");
+
+ /* If we got this random seed data from getrandom() the data is suitable for crediting
+ * entropy later on. Let's keep that in mind by setting an extended attribute. on the file */
+ if (getrandom_worked)
+ if (fsetxattr(seed_fd, "user.random-seed-creditable", "1", 1, 0) < 0)
+ log_full_errno(ERRNO_IS_NOT_SUPPORTED(errno) ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to mark seed file as creditable, ignoring: %m");
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/rc-local-generator/rc-local-generator.c b/src/rc-local-generator/rc-local-generator.c
new file mode 100644
index 0000000..ad8dfab
--- /dev/null
+++ b/src/rc-local-generator/rc-local-generator.c
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "generator.h"
+#include "log.h"
+#include "mkdir.h"
+#include "string-util.h"
+#include "util.h"
+
+static const char *arg_dest = NULL;
+
+/* So you are reading this, and might wonder: why is this implemented as a generator rather than as a plain, statically
+ * enabled service that carries appropriate ConditionFileIsExecutable= lines? The answer is this: conditions bypass
+ * execution of a service's binary, but they have no influence on unit dependencies. Thus, a service that is
+ * conditioned out will still act as synchronization point in the dependency tree, and we'd rather not have that for
+ * these two legacy scripts. */
+
+static int add_symlink(const char *service, const char *where) {
+ const char *from, *to;
+
+ assert(service);
+ assert(where);
+
+ from = strjoina(SYSTEM_DATA_UNIT_PATH "/", service);
+ to = strjoina(arg_dest, "/", where, ".wants/", service);
+
+ (void) mkdir_parents_label(to, 0755);
+
+ if (symlink(from, to) < 0) {
+ if (errno == EEXIST)
+ return 0;
+
+ return log_error_errno(errno, "Failed to create symlink %s: %m", to);
+ }
+
+ return 1;
+}
+
+static int check_executable(const char *path) {
+ assert(path);
+
+ if (access(path, X_OK) < 0) {
+ if (errno == ENOENT)
+ return log_debug_errno(errno, "%s does not exist, skipping.", path);
+ if (errno == EACCES)
+ return log_info_errno(errno, "%s is not marked executable, skipping.", path);
+
+ return log_warning_errno(errno, "Couldn't determine if %s exists and is executable, skipping: %m", path);
+ }
+
+ return 0;
+}
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ int r = 0, k = 0;
+
+ assert_se(arg_dest = dest);
+
+ if (check_executable(RC_LOCAL_PATH) >= 0) {
+ log_debug("Automatically adding rc-local.service.");
+
+ r = add_symlink("rc-local.service", "multi-user.target");
+ }
+
+ return r < 0 ? r : k;
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/remount-fs/remount-fs.c b/src/remount-fs/remount-fs.c
new file mode 100644
index 0000000..19f5bd0
--- /dev/null
+++ b/src/remount-fs/remount-fs.c
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <mntent.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "env-util.h"
+#include "exit-status.h"
+#include "fstab-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "mount-setup.h"
+#include "mount-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "strv.h"
+#include "util.h"
+
+/* Goes through /etc/fstab and remounts all API file systems, applying options that are in /etc/fstab that systemd
+ * might not have respected */
+
+static int track_pid(Hashmap **h, const char *path, pid_t pid) {
+ _cleanup_free_ char *c = NULL;
+ int r;
+
+ assert(h);
+ assert(path);
+ assert(pid_is_valid(pid));
+
+ r = hashmap_ensure_allocated(h, NULL);
+ if (r < 0)
+ return log_oom();
+
+ c = strdup(path);
+ if (!c)
+ return log_oom();
+
+ r = hashmap_put(*h, PID_TO_PTR(pid), c);
+ if (r < 0)
+ return log_oom();
+
+ TAKE_PTR(c);
+ return 0;
+}
+
+static int do_remount(const char *path, bool force_rw, Hashmap **pids) {
+ pid_t pid;
+ int r;
+
+ log_debug("Remounting %s...", path);
+
+ r = safe_fork(force_rw ? "(remount-rw)" : "(remount)",
+ FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Child */
+ execv(MOUNT_PATH,
+ STRV_MAKE(MOUNT_PATH,
+ path,
+ "-o",
+ force_rw ? "remount,rw" : "remount"));
+ log_error_errno(errno, "Failed to execute " MOUNT_PATH ": %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ /* Parent */
+ return track_pid(pids, path, pid);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_hashmap_free_free_ Hashmap *pids = NULL;
+ _cleanup_endmntent_ FILE *f = NULL;
+ bool has_root = false;
+ struct mntent* me;
+ int r;
+
+ log_setup_service();
+
+ if (argc > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program takes no arguments.");
+
+ umask(0022);
+
+ f = setmntent(fstab_path(), "re");
+ if (!f) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to open %s: %m", fstab_path());
+ } else
+ while ((me = getmntent(f))) {
+ /* Remount the root fs, /usr, and all API VFSs */
+ if (!mount_point_is_api(me->mnt_dir) &&
+ !PATH_IN_SET(me->mnt_dir, "/", "/usr"))
+ continue;
+
+ if (path_equal(me->mnt_dir, "/"))
+ has_root = true;
+
+ r = do_remount(me->mnt_dir, false, &pids);
+ if (r < 0)
+ return r;
+ }
+
+ if (!has_root) {
+ /* The $SYSTEMD_REMOUNT_ROOT_RW environment variable is set by systemd-gpt-auto-generator to tell us
+ * whether to remount things. We honour it only if there's no explicit line in /etc/fstab configured
+ * which takes precedence. */
+
+ r = getenv_bool("SYSTEMD_REMOUNT_ROOT_RW");
+ if (r < 0 && r != -ENXIO)
+ log_warning_errno(r, "Failed to parse $SYSTEMD_REMOUNT_ROOT_RW, ignoring: %m");
+
+ if (r > 0) {
+ r = do_remount("/", true, &pids);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ r = 0;
+ while (!hashmap_isempty(pids)) {
+ _cleanup_free_ char *s = NULL;
+ siginfo_t si = {};
+
+ if (waitid(P_ALL, 0, &si, WEXITED) < 0) {
+ if (errno == EINTR)
+ continue;
+
+ return log_error_errno(errno, "waitid() failed: %m");
+ }
+
+ s = hashmap_remove(pids, PID_TO_PTR(si.si_pid));
+ if (s &&
+ !is_clean_exit(si.si_code, si.si_status, EXIT_CLEAN_COMMAND, NULL)) {
+ if (si.si_code == CLD_EXITED)
+ log_error(MOUNT_PATH " for %s exited with exit status %i.", s, si.si_status);
+ else
+ log_error(MOUNT_PATH " for %s terminated by signal %s.", s, signal_to_string(si.si_status));
+
+ r = -ENOEXEC;
+ }
+ }
+
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/reply-password/reply-password.c b/src/reply-password/reply-password.c
new file mode 100644
index 0000000..a73334e
--- /dev/null
+++ b/src/reply-password/reply-password.c
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stddef.h>
+#include <sys/un.h>
+
+#include "alloc-util.h"
+#include "main-func.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "util.h"
+
+static int send_on_socket(int fd, const char *socket_name, const void *packet, size_t size) {
+ union sockaddr_union sa = {};
+ int salen;
+
+ assert(fd >= 0);
+ assert(socket_name);
+ assert(packet);
+
+ salen = sockaddr_un_set_path(&sa.un, socket_name);
+ if (salen < 0)
+ return log_error_errno(salen, "Specified socket path for AF_UNIX socket invalid, refusing: %s", socket_name);
+
+ if (sendto(fd, packet, size, MSG_NOSIGNAL, &sa.sa, salen) < 0)
+ return log_error_errno(errno, "Failed to send: %m");
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(erase_and_freep) char *packet = NULL;
+ _cleanup_close_ int fd = -1;
+ size_t length = 0;
+ int r;
+
+ log_setup_service();
+
+ if (argc != 3)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Wrong number of arguments.");
+
+ if (streq(argv[1], "1")) {
+ _cleanup_(erase_and_freep) char *line = NULL;
+
+ r = read_line(stdin, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read password: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Got EOF while reading password.");
+
+ packet = strjoin("+", line);
+ if (!packet)
+ return log_oom();
+
+ length = 1 + strlen(line) + 1;
+
+ } else if (streq(argv[1], "0")) {
+ packet = strdup("-");
+ if (!packet)
+ return log_oom();
+
+ length = 1;
+
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid first argument %s", argv[1]);
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return log_error_errno(errno, "socket() failed: %m");
+
+ return send_on_socket(fd, argv[2], packet, length);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/resolve/RFCs b/src/resolve/RFCs
new file mode 100644
index 0000000..7190c16
--- /dev/null
+++ b/src/resolve/RFCs
@@ -0,0 +1,60 @@
+Y = Comprehensively Implemented, to the point appropriate for resolved
+D = Comprehensively Implemented, by a dependency of resolved
+! = Missing and something we might want to implement
+~ = Needs no explicit support or doesn't apply
+? = Is this relevant today?
+ = We are working on this
+
+Y https://tools.ietf.org/html/rfc1034 → DOMAIN NAMES - CONCEPTS AND FACILITIES
+Y https://tools.ietf.org/html/rfc1035 → DOMAIN NAMES - IMPLEMENTATION AND SPECIFICATION
+? https://tools.ietf.org/html/rfc1101 → DNS Encoding of Network Names and Other Types
+Y https://tools.ietf.org/html/rfc1123 → Requirements for Internet Hosts — Application and Support
+~ https://tools.ietf.org/html/rfc1464 → Using the Domain Name System To Store Arbitrary String Attributes
+Y https://tools.ietf.org/html/rfc1536 → Common DNS Implementation Errors and Suggested Fixes
+Y https://tools.ietf.org/html/rfc1876 → A Means for Expressing Location Information in the Domain Name System
+Y https://tools.ietf.org/html/rfc2181 → Clarifications to the DNS Specification
+Y https://tools.ietf.org/html/rfc2308 → Negative Caching of DNS Queries (DNS NCACHE)
+Y https://tools.ietf.org/html/rfc2782 → A DNS RR for specifying the location of services (DNS SRV)
+D https://tools.ietf.org/html/rfc3492 → Punycode: A Bootstring encoding of Unicode for Internationalized Domain Names in Applications (IDNA)
+Y https://tools.ietf.org/html/rfc3596 → DNS Extensions to Support IP Version 6
+Y https://tools.ietf.org/html/rfc3597 → Handling of Unknown DNS Resource Record (RR) Types
+Y https://tools.ietf.org/html/rfc4033 → DNS Security Introduction and Requirements
+Y https://tools.ietf.org/html/rfc4034 → Resource Records for the DNS Security Extensions
+Y https://tools.ietf.org/html/rfc4035 → Protocol Modifications for the DNS Security Extensions
+! https://tools.ietf.org/html/rfc4183 → A Suggested Scheme for DNS Resolution of Networks and Gateways
+Y https://tools.ietf.org/html/rfc4255 → Using DNS to Securely Publish Secure Shell (SSH) Key Fingerprints
+Y https://tools.ietf.org/html/rfc4343 → Domain Name System (DNS) Case Insensitivity Clarification
+~ https://tools.ietf.org/html/rfc4470 → Minimally Covering NSEC Records and DNSSEC On-line Signing
+Y https://tools.ietf.org/html/rfc4501 → Domain Name System Uniform Resource Identifiers
+Y https://tools.ietf.org/html/rfc4509 → Use of SHA-256 in DNSSEC Delegation Signer (DS) Resource Records (RRs)
+~ https://tools.ietf.org/html/rfc4592 → The Role of Wildcards in the Domain Name System
+~ https://tools.ietf.org/html/rfc4697 → Observed DNS Resolution Misbehavior
+Y https://tools.ietf.org/html/rfc4795 → Link-Local Multicast Name Resolution (LLMNR)
+Y https://tools.ietf.org/html/rfc5011 → Automated Updates of DNS Security (DNSSEC) Trust Anchors
+Y https://tools.ietf.org/html/rfc5155 → DNS Security (DNSSEC) Hashed Authenticated Denial of Existence
+Y https://tools.ietf.org/html/rfc5452 → Measures for Making DNS More Resilient against Forged Answers
+Y https://tools.ietf.org/html/rfc5702 → Use of SHA-2 Algorithms with RSA in DNSKEY and RRSIG Resource Records for DNSSEC
+Y https://tools.ietf.org/html/rfc5890 → Internationalized Domain Names for Applications (IDNA): Definitions and Document Framework
+Y https://tools.ietf.org/html/rfc5891 → Internationalized Domain Names in Applications (IDNA): Protocol
+Y https://tools.ietf.org/html/rfc5966 → DNS Transport over TCP - Implementation Requirements
+Y https://tools.ietf.org/html/rfc6303 → Locally Served DNS Zones
+Y https://tools.ietf.org/html/rfc6604 → xNAME RCODE and Status Bits Clarification
+Y https://tools.ietf.org/html/rfc6605 → Elliptic Curve Digital Signature Algorithm (DSA) for DNSSEC
+ https://tools.ietf.org/html/rfc6672 → DNAME Redirection in the DNS
+! https://tools.ietf.org/html/rfc6731 → Improved Recursive DNS Server Selection for Multi-Interfaced Nodes
+Y https://tools.ietf.org/html/rfc6761 → Special-Use Domain Names
+ https://tools.ietf.org/html/rfc6762 → Multicast DNS
+ https://tools.ietf.org/html/rfc6763 → DNS-Based Service Discovery
+~ https://tools.ietf.org/html/rfc6781 → DNSSEC Operational Practices, Version 2
+Y https://tools.ietf.org/html/rfc6840 → Clarifications and Implementation Notes for DNS Security (DNSSEC)
+Y https://tools.ietf.org/html/rfc6891 → Extension Mechanisms for DNS (EDNS(0))
+Y https://tools.ietf.org/html/rfc6944 → Applicability Statement: DNS Security (DNSSEC) DNSKEY Algorithm Implementation Status
+Y https://tools.ietf.org/html/rfc6975 → Signaling Cryptographic Algorithm Understanding in DNS Security Extensions (DNSSEC)
+Y https://tools.ietf.org/html/rfc7129 → Authenticated Denial of Existence in the DNS
+Y https://tools.ietf.org/html/rfc7646 → Definition and Use of DNSSEC Negative Trust Anchors
+~ https://tools.ietf.org/html/rfc7719 → DNS Terminology
+Y https://tools.ietf.org/html/rfc8080 → Edwards-Curve Digital Security Algorithm (EdDSA) for DNSSEC
+
+Also relevant:
+
+ https://www.iab.org/documents/correspondence-reports-documents/2013-2/iab-statement-dotless-domains-considered-harmful/
diff --git a/src/resolve/dns-type.c b/src/resolve/dns-type.c
new file mode 100644
index 0000000..1f73347
--- /dev/null
+++ b/src/resolve/dns-type.c
@@ -0,0 +1,316 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/socket.h>
+#include <errno.h>
+
+#include "dns-type.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+typedef const struct {
+ uint16_t type;
+ const char *name;
+} dns_type;
+
+static const struct dns_type_name *
+lookup_dns_type (register const char *str, register GPERF_LEN_TYPE len);
+
+#include "dns_type-from-name.h"
+#include "dns_type-to-name.h"
+
+int dns_type_from_string(const char *s) {
+ const struct dns_type_name *sc;
+
+ assert(s);
+
+ sc = lookup_dns_type(s, strlen(s));
+ if (sc)
+ return sc->id;
+
+ s = startswith_no_case(s, "TYPE");
+ if (s) {
+ unsigned x;
+
+ if (safe_atou(s, &x) >= 0 &&
+ x <= UINT16_MAX)
+ return (int) x;
+ }
+
+ return _DNS_TYPE_INVALID;
+}
+
+bool dns_type_is_pseudo(uint16_t type) {
+
+ /* Checks whether the specified type is a "pseudo-type". What
+ * a "pseudo-type" precisely is, is defined only very weakly,
+ * but apparently entails all RR types that are not actually
+ * stored as RRs on the server and should hence also not be
+ * cached. We use this list primarily to validate NSEC type
+ * bitfields, and to verify what to cache. */
+
+ return IN_SET(type,
+ 0, /* A Pseudo RR type, according to RFC 2931 */
+ DNS_TYPE_ANY,
+ DNS_TYPE_AXFR,
+ DNS_TYPE_IXFR,
+ DNS_TYPE_OPT,
+ DNS_TYPE_TSIG,
+ DNS_TYPE_TKEY
+ );
+}
+
+bool dns_class_is_pseudo(uint16_t class) {
+ return class == DNS_TYPE_ANY;
+}
+
+bool dns_type_is_valid_query(uint16_t type) {
+
+ /* The types valid as questions in packets */
+
+ return !IN_SET(type,
+ 0,
+ DNS_TYPE_OPT,
+ DNS_TYPE_TSIG,
+ DNS_TYPE_TKEY,
+
+ /* RRSIG are technically valid as questions, but we refuse doing explicit queries for them, as
+ * they aren't really payload, but signatures for payload, and cannot be validated on their
+ * own. After all they are the signatures, and have no signatures of their own validating
+ * them. */
+ DNS_TYPE_RRSIG);
+}
+
+bool dns_type_is_zone_transer(uint16_t type) {
+
+ /* Zone transfers, either normal or incremental */
+
+ return IN_SET(type,
+ DNS_TYPE_AXFR,
+ DNS_TYPE_IXFR);
+}
+
+bool dns_type_is_valid_rr(uint16_t type) {
+
+ /* The types valid as RR in packets (but not necessarily
+ * stored on servers). */
+
+ return !IN_SET(type,
+ DNS_TYPE_ANY,
+ DNS_TYPE_AXFR,
+ DNS_TYPE_IXFR);
+}
+
+bool dns_class_is_valid_rr(uint16_t class) {
+ return class != DNS_CLASS_ANY;
+}
+
+bool dns_type_may_redirect(uint16_t type) {
+ /* The following record types should never be redirected using
+ * CNAME/DNAME RRs. See
+ * <https://tools.ietf.org/html/rfc4035#section-2.5>. */
+
+ if (dns_type_is_pseudo(type))
+ return false;
+
+ return !IN_SET(type,
+ DNS_TYPE_CNAME,
+ DNS_TYPE_DNAME,
+ DNS_TYPE_NSEC3,
+ DNS_TYPE_NSEC,
+ DNS_TYPE_RRSIG,
+ DNS_TYPE_NXT,
+ DNS_TYPE_SIG,
+ DNS_TYPE_KEY);
+}
+
+bool dns_type_may_wildcard(uint16_t type) {
+
+ /* The following records may not be expanded from wildcard RRsets */
+
+ if (dns_type_is_pseudo(type))
+ return false;
+
+ return !IN_SET(type,
+ DNS_TYPE_NSEC3,
+ DNS_TYPE_SOA,
+
+ /* Prohibited by https://tools.ietf.org/html/rfc4592#section-4.4 */
+ DNS_TYPE_DNAME);
+}
+
+bool dns_type_apex_only(uint16_t type) {
+
+ /* Returns true for all RR types that may only appear signed in a zone apex */
+
+ return IN_SET(type,
+ DNS_TYPE_SOA,
+ DNS_TYPE_NS, /* this one can appear elsewhere, too, but not signed */
+ DNS_TYPE_DNSKEY,
+ DNS_TYPE_NSEC3PARAM);
+}
+
+bool dns_type_is_dnssec(uint16_t type) {
+ return IN_SET(type,
+ DNS_TYPE_DS,
+ DNS_TYPE_DNSKEY,
+ DNS_TYPE_RRSIG,
+ DNS_TYPE_NSEC,
+ DNS_TYPE_NSEC3,
+ DNS_TYPE_NSEC3PARAM);
+}
+
+bool dns_type_is_obsolete(uint16_t type) {
+ return IN_SET(type,
+ /* Obsoleted by RFC 973 */
+ DNS_TYPE_MD,
+ DNS_TYPE_MF,
+ DNS_TYPE_MAILA,
+
+ /* Kinda obsoleted by RFC 2505 */
+ DNS_TYPE_MB,
+ DNS_TYPE_MG,
+ DNS_TYPE_MR,
+ DNS_TYPE_MINFO,
+ DNS_TYPE_MAILB,
+
+ /* RFC1127 kinda obsoleted this by recommending against its use */
+ DNS_TYPE_WKS,
+
+ /* Declared historical by RFC 6563 */
+ DNS_TYPE_A6,
+
+ /* Obsoleted by DNSSEC-bis */
+ DNS_TYPE_NXT,
+
+ /* RFC 1035 removed support for concepts that needed this from RFC 883 */
+ DNS_TYPE_NULL);
+}
+
+bool dns_type_needs_authentication(uint16_t type) {
+
+ /* Returns true for all (non-obsolete) RR types where records are not useful if they aren't
+ * authenticated. I.e. everything that contains crypto keys. */
+
+ return IN_SET(type,
+ DNS_TYPE_CERT,
+ DNS_TYPE_SSHFP,
+ DNS_TYPE_IPSECKEY,
+ DNS_TYPE_DS,
+ DNS_TYPE_DNSKEY,
+ DNS_TYPE_TLSA,
+ DNS_TYPE_CDNSKEY,
+ DNS_TYPE_OPENPGPKEY,
+ DNS_TYPE_CAA);
+}
+
+int dns_type_to_af(uint16_t t) {
+ switch (t) {
+
+ case DNS_TYPE_A:
+ return AF_INET;
+
+ case DNS_TYPE_AAAA:
+ return AF_INET6;
+
+ case DNS_TYPE_ANY:
+ return AF_UNSPEC;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+const char *dns_class_to_string(uint16_t class) {
+
+ switch (class) {
+
+ case DNS_CLASS_IN:
+ return "IN";
+
+ case DNS_CLASS_ANY:
+ return "ANY";
+ }
+
+ return NULL;
+}
+
+int dns_class_from_string(const char *s) {
+
+ if (!s)
+ return _DNS_CLASS_INVALID;
+
+ if (strcaseeq(s, "IN"))
+ return DNS_CLASS_IN;
+ else if (strcaseeq(s, "ANY"))
+ return DNS_CLASS_ANY;
+
+ return _DNS_CLASS_INVALID;
+}
+
+const char* tlsa_cert_usage_to_string(uint8_t cert_usage) {
+
+ switch (cert_usage) {
+
+ case 0:
+ return "CA constraint";
+
+ case 1:
+ return "Service certificate constraint";
+
+ case 2:
+ return "Trust anchor assertion";
+
+ case 3:
+ return "Domain-issued certificate";
+
+ case 4 ... 254:
+ return "Unassigned";
+
+ case 255:
+ return "Private use";
+ }
+
+ return NULL; /* clang cannot count that we covered everything */
+}
+
+const char* tlsa_selector_to_string(uint8_t selector) {
+ switch (selector) {
+
+ case 0:
+ return "Full Certificate";
+
+ case 1:
+ return "SubjectPublicKeyInfo";
+
+ case 2 ... 254:
+ return "Unassigned";
+
+ case 255:
+ return "Private use";
+ }
+
+ return NULL;
+}
+
+const char* tlsa_matching_type_to_string(uint8_t selector) {
+
+ switch (selector) {
+
+ case 0:
+ return "No hash used";
+
+ case 1:
+ return "SHA-256";
+
+ case 2:
+ return "SHA-512";
+
+ case 3 ... 254:
+ return "Unassigned";
+
+ case 255:
+ return "Private use";
+ }
+
+ return NULL;
+}
diff --git a/src/resolve/dns-type.h b/src/resolve/dns-type.h
new file mode 100644
index 0000000..4370db9
--- /dev/null
+++ b/src/resolve/dns-type.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "macro.h"
+
+/* DNS record types, taken from
+ * http://www.iana.org/assignments/dns-parameters/dns-parameters.xhtml.
+ */
+enum {
+ /* Normal records */
+ DNS_TYPE_A = 0x01,
+ DNS_TYPE_NS,
+ DNS_TYPE_MD,
+ DNS_TYPE_MF,
+ DNS_TYPE_CNAME,
+ DNS_TYPE_SOA,
+ DNS_TYPE_MB,
+ DNS_TYPE_MG,
+ DNS_TYPE_MR,
+ DNS_TYPE_NULL,
+ DNS_TYPE_WKS,
+ DNS_TYPE_PTR,
+ DNS_TYPE_HINFO,
+ DNS_TYPE_MINFO,
+ DNS_TYPE_MX,
+ DNS_TYPE_TXT,
+ DNS_TYPE_RP,
+ DNS_TYPE_AFSDB,
+ DNS_TYPE_X25,
+ DNS_TYPE_ISDN,
+ DNS_TYPE_RT,
+ DNS_TYPE_NSAP,
+ DNS_TYPE_NSAP_PTR,
+ DNS_TYPE_SIG,
+ DNS_TYPE_KEY,
+ DNS_TYPE_PX,
+ DNS_TYPE_GPOS,
+ DNS_TYPE_AAAA,
+ DNS_TYPE_LOC,
+ DNS_TYPE_NXT,
+ DNS_TYPE_EID,
+ DNS_TYPE_NIMLOC,
+ DNS_TYPE_SRV,
+ DNS_TYPE_ATMA,
+ DNS_TYPE_NAPTR,
+ DNS_TYPE_KX,
+ DNS_TYPE_CERT,
+ DNS_TYPE_A6,
+ DNS_TYPE_DNAME,
+ DNS_TYPE_SINK,
+ DNS_TYPE_OPT, /* EDNS0 option */
+ DNS_TYPE_APL,
+ DNS_TYPE_DS,
+ DNS_TYPE_SSHFP,
+ DNS_TYPE_IPSECKEY,
+ DNS_TYPE_RRSIG,
+ DNS_TYPE_NSEC,
+ DNS_TYPE_DNSKEY,
+ DNS_TYPE_DHCID,
+ DNS_TYPE_NSEC3,
+ DNS_TYPE_NSEC3PARAM,
+ DNS_TYPE_TLSA,
+
+ DNS_TYPE_HIP = 0x37,
+ DNS_TYPE_NINFO,
+ DNS_TYPE_RKEY,
+ DNS_TYPE_TALINK,
+ DNS_TYPE_CDS,
+ DNS_TYPE_CDNSKEY,
+ DNS_TYPE_OPENPGPKEY,
+
+ DNS_TYPE_SPF = 0x63,
+ DNS_TYPE_NID,
+ DNS_TYPE_L32,
+ DNS_TYPE_L64,
+ DNS_TYPE_LP,
+ DNS_TYPE_EUI48,
+ DNS_TYPE_EUI64,
+
+ DNS_TYPE_TKEY = 0xF9,
+ DNS_TYPE_TSIG,
+ DNS_TYPE_IXFR,
+ DNS_TYPE_AXFR,
+ DNS_TYPE_MAILB,
+ DNS_TYPE_MAILA,
+ DNS_TYPE_ANY,
+ DNS_TYPE_URI,
+ DNS_TYPE_CAA,
+ DNS_TYPE_TA = 0x8000,
+ DNS_TYPE_DLV,
+
+ _DNS_TYPE_MAX,
+ _DNS_TYPE_INVALID = -1
+};
+
+assert_cc(DNS_TYPE_SSHFP == 44);
+assert_cc(DNS_TYPE_TLSA == 52);
+assert_cc(DNS_TYPE_ANY == 255);
+
+/* DNS record classes, see RFC 1035 */
+enum {
+ DNS_CLASS_IN = 0x01,
+ DNS_CLASS_ANY = 0xFF,
+
+ _DNS_CLASS_MAX,
+ _DNS_CLASS_INVALID = -1
+};
+
+#define _DNS_CLASS_STRING_MAX (sizeof "CLASS" + DECIMAL_STR_MAX(uint16_t))
+#define _DNS_TYPE_STRING_MAX (sizeof "CLASS" + DECIMAL_STR_MAX(uint16_t))
+
+bool dns_type_is_pseudo(uint16_t type);
+bool dns_type_is_valid_query(uint16_t type);
+bool dns_type_is_valid_rr(uint16_t type);
+bool dns_type_may_redirect(uint16_t type);
+bool dns_type_is_dnssec(uint16_t type);
+bool dns_type_is_obsolete(uint16_t type);
+bool dns_type_may_wildcard(uint16_t type);
+bool dns_type_apex_only(uint16_t type);
+bool dns_type_needs_authentication(uint16_t type);
+bool dns_type_is_zone_transer(uint16_t type);
+int dns_type_to_af(uint16_t type);
+
+bool dns_class_is_pseudo(uint16_t class);
+bool dns_class_is_valid_rr(uint16_t class);
+
+/* TYPE?? follows http://tools.ietf.org/html/rfc3597#section-5 */
+const char *dns_type_to_string(int type);
+int dns_type_from_string(const char *s);
+
+const char *dns_class_to_string(uint16_t class);
+int dns_class_from_string(const char *name);
+
+/* https://tools.ietf.org/html/draft-ietf-dane-protocol-23#section-7.2 */
+const char *tlsa_cert_usage_to_string(uint8_t cert_usage);
+
+/* https://tools.ietf.org/html/draft-ietf-dane-protocol-23#section-7.3 */
+const char *tlsa_selector_to_string(uint8_t selector);
+
+/* https://tools.ietf.org/html/draft-ietf-dane-protocol-23#section-7.4 */
+const char *tlsa_matching_type_to_string(uint8_t selector);
+
+/* https://tools.ietf.org/html/rfc6844#section-5.1 */
+#define CAA_FLAG_CRITICAL (1u << 7)
diff --git a/src/resolve/dns_type-to-name.awk b/src/resolve/dns_type-to-name.awk
new file mode 100644
index 0000000..badb182
--- /dev/null
+++ b/src/resolve/dns_type-to-name.awk
@@ -0,0 +1,11 @@
+BEGIN{
+ print "const char *dns_type_to_string(int type) {\n\tswitch(type) {"
+}
+{
+ printf " case DNS_TYPE_%s: return ", $1;
+ sub(/_/, "-");
+ printf "\"%s\";\n", $1
+}
+END{
+ print " default: return NULL;\n\t}\n}\n"
+}
diff --git a/src/resolve/generate-dns_type-gperf.py b/src/resolve/generate-dns_type-gperf.py
new file mode 100755
index 0000000..d4f7b94
--- /dev/null
+++ b/src/resolve/generate-dns_type-gperf.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+
+"""Generate %-from-name.gperf from %-list.txt
+"""
+
+import sys
+
+name, prefix, input = sys.argv[1:]
+
+print("""\
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \\"-Wimplicit-fallthrough\\"")
+#endif
+%}""")
+print("""\
+struct {}_name {{ const char* name; int id; }};
+%null-strings
+%%""".format(name))
+
+for line in open(input):
+ line = line.rstrip()
+ s = line.replace('_', '-')
+ print("{}, {}{}".format(s, prefix, line))
diff --git a/src/resolve/generate-dns_type-list.sed b/src/resolve/generate-dns_type-list.sed
new file mode 100644
index 0000000..b7bc30f
--- /dev/null
+++ b/src/resolve/generate-dns_type-list.sed
@@ -0,0 +1 @@
+s/.* DNS_TYPE_(\w+).*/\1/p
diff --git a/src/resolve/meson.build b/src/resolve/meson.build
new file mode 100644
index 0000000..8e7bad0
--- /dev/null
+++ b/src/resolve/meson.build
@@ -0,0 +1,235 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+basic_dns_sources = files('''
+ resolved-dns-dnssec.c
+ resolved-dns-dnssec.h
+ resolved-dns-packet.c
+ resolved-dns-packet.h
+ resolved-dns-rr.c
+ resolved-dns-rr.h
+ resolved-dns-answer.c
+ resolved-dns-answer.h
+ resolved-dns-question.c
+ resolved-dns-question.h
+ dns-type.c
+'''.split())
+
+dns_type_h = files('dns-type.h')[0]
+
+systemd_resolved_sources = files('''
+ resolved-bus.c
+ resolved-bus.h
+ resolved-conf.c
+ resolved-conf.h
+ resolved-def.h
+ resolved-dns-cache.c
+ resolved-dns-cache.h
+ resolved-dns-query.c
+ resolved-dns-query.h
+ resolved-dns-scope.c
+ resolved-dns-scope.h
+ resolved-dns-search-domain.c
+ resolved-dns-search-domain.h
+ resolved-dns-server.c
+ resolved-dns-server.h
+ resolved-dns-stream.c
+ resolved-dns-stream.h
+ resolved-dns-stub.c
+ resolved-dns-stub.h
+ resolved-dns-synthesize.c
+ resolved-dns-synthesize.h
+ resolved-dns-transaction.c
+ resolved-dns-transaction.h
+ resolved-dns-trust-anchor.c
+ resolved-dns-trust-anchor.h
+ resolved-dns-zone.c
+ resolved-dns-zone.h
+ resolved-dnssd-bus.c
+ resolved-dnssd-bus.h
+ resolved-dnssd.c
+ resolved-dnssd.h
+ resolved-dnstls.h
+ resolved-etc-hosts.c
+ resolved-etc-hosts.h
+ resolved-link-bus.c
+ resolved-link-bus.h
+ resolved-link.c
+ resolved-link.h
+ resolved-llmnr.c
+ resolved-llmnr.h
+ resolved-manager.c
+ resolved-manager.h
+ resolved-mdns.c
+ resolved-mdns.h
+ resolved-resolv-conf.c
+ resolved-resolv-conf.h
+ resolved-varlink.c
+ resolved-varlink.h
+ resolved.c
+'''.split())
+
+resolvectl_sources = files('''
+ resolvconf-compat.c
+ resolvconf-compat.h
+ resolvectl.c
+ resolvectl.h
+'''.split())
+
+############################################################
+
+dns_type_list_txt = custom_target(
+ 'dns_type-list.txt',
+ input : ['generate-dns_type-list.sed', dns_type_h],
+ output : 'dns_type-list.txt',
+ command : [sed, '-n', '-r', '-f', '@INPUT0@', '@INPUT1@'],
+ capture : true)
+
+generate_dns_type_gperf = find_program('generate-dns_type-gperf.py')
+
+dns_type_headers = [dns_type_h]
+foreach item : [['dns_type', dns_type_list_txt, 'dns_type', 'DNS_TYPE_']]
+
+ fname = '@0@-from-name.gperf'.format(item[0])
+ gperf_file = custom_target(
+ fname,
+ input : item[1],
+ output : fname,
+ command : [generate_dns_type_gperf, item[2], item[3], '@INPUT@'],
+ capture : true)
+
+ fname = '@0@-from-name.h'.format(item[0])
+ target1 = custom_target(
+ fname,
+ input : gperf_file,
+ output : fname,
+ command : [gperf,
+ '-L', 'ANSI-C', '-t', '--ignore-case',
+ '-N', 'lookup_@0@'.format(item[2]),
+ '-H', 'hash_@0@_name'.format(item[2]),
+ '-p', '-C',
+ '@INPUT@'],
+ capture : true)
+
+ fname = '@0@-to-name.h'.format(item[0])
+ awkscript = '@0@-to-name.awk'.format(item[0])
+ target2 = custom_target(
+ fname,
+ input : [awkscript, item[1]],
+ output : fname,
+ command : [awk, '-f', '@INPUT0@', '@INPUT1@'],
+ capture : true)
+
+ dns_type_headers += [target1, target2]
+endforeach
+
+resolved_gperf_c = custom_target(
+ 'resolved_gperf.c',
+ input : 'resolved-gperf.gperf',
+ output : 'resolved-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+resolved_dnssd_gperf_c = custom_target(
+ 'resolved_dnssd_gperf.c',
+ input : 'resolved-dnssd-gperf.gperf',
+ output : 'resolved-dnssd-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+libsystemd_resolve_core = static_library(
+ 'systemd-resolve-core',
+ basic_dns_sources,
+ dns_type_headers,
+ include_directories : includes)
+
+systemd_resolved_sources += [resolved_gperf_c, resolved_dnssd_gperf_c]
+
+systemd_resolved_dependencies = [threads, libgpg_error, libm]
+if conf.get('ENABLE_DNS_OVER_TLS') == 1
+ if conf.get('DNS_OVER_TLS_USE_GNUTLS') == 1
+ systemd_resolved_sources += files('resolved-dnstls-gnutls.c',
+ 'resolved-dnstls-gnutls.h')
+ systemd_resolved_dependencies += libgnutls
+ elif conf.get('DNS_OVER_TLS_USE_OPENSSL') == 1
+ systemd_resolved_sources += files('resolved-dnstls-openssl.c',
+ 'resolved-dnstls-openssl.h')
+ systemd_resolved_dependencies += libopenssl
+ else
+ error('unknown dependency for supporting DNS-over-TLS')
+ endif
+endif
+
+if conf.get('ENABLE_RESOLVE') == 1
+ install_data('org.freedesktop.resolve1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.resolve1.service',
+ install_dir : dbussystemservicedir)
+ install_data('org.freedesktop.resolve1.policy',
+ install_dir : polkitpolicydir)
+
+ resolved_conf = configure_file(
+ input : 'resolved.conf.in',
+ output : 'resolved.conf',
+ configuration : substs)
+ if install_sysconfdir
+ install_data(resolved_conf,
+ install_dir : pkgsysconfdir)
+ endif
+
+ install_data('resolv.conf',
+ install_dir : rootlibexecdir)
+endif
+
+tests += [
+ [['src/resolve/test-resolve-tables.c',
+ dns_type_headers,
+ 'src/shared/test-tables.h'],
+ [libsystemd_resolve_core,
+ libshared],
+ [libgcrypt,
+ libgpg_error,
+ libm],
+ 'ENABLE_RESOLVE'],
+
+ [['src/resolve/test-dns-packet.c',
+ dns_type_headers],
+ [libsystemd_resolve_core,
+ libshared],
+ [libgcrypt,
+ libgpg_error,
+ libm],
+ 'ENABLE_RESOLVE'],
+
+ [['src/resolve/test-resolved-etc-hosts.c',
+ 'src/resolve/resolved-etc-hosts.c',
+ 'src/resolve/resolved-etc-hosts.h'],
+ [libsystemd_resolve_core,
+ libshared],
+ [libgcrypt,
+ libgpg_error,
+ libm],
+ 'ENABLE_RESOLVE'],
+
+ [['src/resolve/test-resolved-packet.c',
+ dns_type_headers],
+ [libsystemd_resolve_core,
+ libshared],
+ [libgcrypt,
+ libgpg_error,
+ libm],
+ 'ENABLE_RESOLVE'],
+
+ [['src/resolve/test-dnssec.c',
+ dns_type_headers],
+ [libsystemd_resolve_core,
+ libshared],
+ [libgcrypt,
+ libgpg_error,
+ libm],
+ 'ENABLE_RESOLVE'],
+
+ [['src/resolve/test-dnssec-complex.c',
+ 'src/resolve/dns-type.c',
+ dns_type_headers],
+ [],
+ [],
+ 'ENABLE_RESOLVE', 'manual'],
+]
diff --git a/src/resolve/org.freedesktop.resolve1.conf b/src/resolve/org.freedesktop.resolve1.conf
new file mode 100644
index 0000000..25b0977
--- /dev/null
+++ b/src/resolve/org.freedesktop.resolve1.conf
@@ -0,0 +1,27 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="systemd-resolve">
+ <allow own="org.freedesktop.resolve1"/>
+ <allow send_destination="org.freedesktop.resolve1"/>
+ <allow receive_sender="org.freedesktop.resolve1"/>
+ </policy>
+
+ <policy context="default">
+ <allow send_destination="org.freedesktop.resolve1"/>
+ <allow receive_sender="org.freedesktop.resolve1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/resolve/org.freedesktop.resolve1.policy b/src/resolve/org.freedesktop.resolve1.policy
new file mode 100644
index 0000000..08615ec
--- /dev/null
+++ b/src/resolve/org.freedesktop.resolve1.policy
@@ -0,0 +1,142 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.resolve1.register-service">
+ <description gettext-domain="systemd">Register a DNS-SD service</description>
+ <message gettext-domain="systemd">Authentication is required to register a DNS-SD service</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-resolve</annotate>
+ </action>
+
+ <action id="org.freedesktop.resolve1.unregister-service">
+ <description gettext-domain="systemd">Unregister a DNS-SD service</description>
+ <message gettext-domain="systemd">Authentication is required to unregister a DNS-SD service</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-resolve</annotate>
+ </action>
+
+ <action id="org.freedesktop.resolve1.set-dns-servers">
+ <description gettext-domain="systemd">Set DNS servers</description>
+ <message gettext-domain="systemd">Authentication is required to set DNS servers.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-resolve</annotate>
+ </action>
+
+ <action id="org.freedesktop.resolve1.set-domains">
+ <description gettext-domain="systemd">Set domains</description>
+ <message gettext-domain="systemd">Authentication is required to set domains.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-resolve</annotate>
+ </action>
+
+ <action id="org.freedesktop.resolve1.set-default-route">
+ <description gettext-domain="systemd">Set default route</description>
+ <message gettext-domain="systemd">Authentication is required to set default route.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-resolve</annotate>
+ </action>
+
+ <action id="org.freedesktop.resolve1.set-llmnr">
+ <description gettext-domain="systemd">Enable/disable LLMNR</description>
+ <message gettext-domain="systemd">Authentication is required to enable or disable LLMNR.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-resolve</annotate>
+ </action>
+
+ <action id="org.freedesktop.resolve1.set-mdns">
+ <description gettext-domain="systemd">Enable/disable multicast DNS</description>
+ <message gettext-domain="systemd">Authentication is required to enable or disable multicast DNS.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-resolve</annotate>
+ </action>
+
+ <action id="org.freedesktop.resolve1.set-dns-over-tls">
+ <description gettext-domain="systemd">Enable/disable DNS over TLS</description>
+ <message gettext-domain="systemd">Authentication is required to enable or disable DNS over TLS.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-resolve</annotate>
+ </action>
+
+ <action id="org.freedesktop.resolve1.set-dnssec">
+ <description gettext-domain="systemd">Enable/disable DNSSEC</description>
+ <message gettext-domain="systemd">Authentication is required to enable or disable DNSSEC.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-resolve</annotate>
+ </action>
+
+ <action id="org.freedesktop.resolve1.set-dnssec-negative-trust-anchors">
+ <description gettext-domain="systemd">Set DNSSEC Negative Trust Anchors</description>
+ <message gettext-domain="systemd">Authentication is required to set DNSSEC Negative Trust Anchors.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-resolve</annotate>
+ </action>
+
+ <action id="org.freedesktop.resolve1.revert">
+ <description gettext-domain="systemd">Revert name resolution settings</description>
+ <message gettext-domain="systemd">Authentication is required to reset name resolution settings.</message>
+ <defaults>
+ <allow_any>auth_admin</allow_any>
+ <allow_inactive>auth_admin</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.owner">unix-user:systemd-resolve</annotate>
+ </action>
+
+</policyconfig>
diff --git a/src/resolve/org.freedesktop.resolve1.service b/src/resolve/org.freedesktop.resolve1.service
new file mode 100644
index 0000000..32a04f3
--- /dev/null
+++ b/src/resolve/org.freedesktop.resolve1.service
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.resolve1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.resolve1.service
diff --git a/src/resolve/resolv.conf b/src/resolve/resolv.conf
new file mode 100644
index 0000000..b4e9a96
--- /dev/null
+++ b/src/resolve/resolv.conf
@@ -0,0 +1,19 @@
+# This file belongs to man:systemd-resolved(8). Do not edit.
+#
+# This is a static resolv.conf file for connecting local clients to the
+# internal DNS stub resolver of systemd-resolved. This file lists no search
+# domains.
+#
+# Run "resolvectl status" to see details about the uplink DNS servers
+# currently in use.
+#
+# Third party programs must not access this file directly, but only through the
+# symlink at /etc/resolv.conf. To manage man:resolv.conf(5) in a different way,
+# replace this symlink by a static file or a different symlink.
+#
+# See man:systemd-resolved.service(8) for details about the supported modes of
+# operation for /etc/resolv.conf.
+
+nameserver 127.0.0.53
+options edns0 trust-ad
+search .
diff --git a/src/resolve/resolvconf-compat.c b/src/resolve/resolvconf-compat.c
new file mode 100644
index 0000000..5bc936f
--- /dev/null
+++ b/src/resolve/resolvconf-compat.c
@@ -0,0 +1,275 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <net/if.h>
+
+#include "alloc-util.h"
+#include "def.h"
+#include "dns-domain.h"
+#include "extract-word.h"
+#include "fileio.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "resolvconf-compat.h"
+#include "resolvectl.h"
+#include "resolved-def.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+
+static int resolvconf_help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("resolvectl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%1$s -a INTERFACE < FILE\n"
+ "%1$s -d INTERFACE\n"
+ "\n"
+ "Register DNS server and domain configuration with systemd-resolved.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " -a Register per-interface DNS server and domain data\n"
+ " -d Unregister per-interface DNS server and domain data\n"
+ " -f Ignore if specified interface does not exist\n"
+ " -x Send DNS traffic preferably over this interface\n"
+ "\n"
+ "This is a compatibility alias for the resolvectl(1) tool, providing native\n"
+ "command line compatibility with the resolvconf(8) tool of various Linux\n"
+ "distributions and BSD systems. Some options supported by other implementations\n"
+ "are not supported and are ignored: -m, -p. Various options supported by other\n"
+ "implementations are not supported and will cause the invocation to fail: -u,\n"
+ "-I, -i, -l, -R, -r, -v, -V, --enable-updates, --disable-updates,\n"
+ "--updates-are-enabled.\n"
+ "\nSee the %2$s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_nameserver(const char *string) {
+ int r;
+
+ assert(string);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&string, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (strv_push(&arg_set_dns, word) < 0)
+ return log_oom();
+
+ word = NULL;
+ }
+
+ return 0;
+}
+
+static int parse_search_domain(const char *string) {
+ int r;
+
+ assert(string);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&string, &word, NULL, EXTRACT_UNQUOTE);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (strv_push(&arg_set_domain, word) < 0)
+ return log_oom();
+
+ word = NULL;
+ }
+
+ return 0;
+}
+
+int resolvconf_parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_ENABLE_UPDATES,
+ ARG_DISABLE_UPDATES,
+ ARG_UPDATES_ARE_ENABLED,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+
+ /* The following are specific to Debian's original resolvconf */
+ { "enable-updates", no_argument, NULL, ARG_ENABLE_UPDATES },
+ { "disable-updates", no_argument, NULL, ARG_DISABLE_UPDATES },
+ { "updates-are-enabled", no_argument, NULL, ARG_UPDATES_ARE_ENABLED },
+ {}
+ };
+
+ enum {
+ TYPE_REGULAR,
+ TYPE_PRIVATE, /* -p: Not supported, treated identically to TYPE_REGULAR */
+ TYPE_EXCLUSIVE, /* -x */
+ } type = TYPE_REGULAR;
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ /* openresolv checks these environment variables */
+ if (getenv("IF_EXCLUSIVE"))
+ type = TYPE_EXCLUSIVE;
+ if (getenv("IF_PRIVATE"))
+ type = TYPE_PRIVATE; /* not actually supported */
+
+ arg_mode = _MODE_INVALID;
+
+ while ((c = getopt_long(argc, argv, "hadxpfm:uIi:l:Rr:vV", options, NULL)) >= 0)
+ switch(c) {
+
+ case 'h':
+ return resolvconf_help();
+
+ case ARG_VERSION:
+ return version();
+
+ /* -a and -d is what everybody can agree on */
+ case 'a':
+ arg_mode = MODE_SET_LINK;
+ break;
+
+ case 'd':
+ arg_mode = MODE_REVERT_LINK;
+ break;
+
+ /* The exclusive/private/force stuff is an openresolv invention, we support in some skewed way */
+ case 'x':
+ type = TYPE_EXCLUSIVE;
+ break;
+
+ case 'p':
+ type = TYPE_PRIVATE; /* not actually supported */
+ break;
+
+ case 'f':
+ arg_ifindex_permissive = true;
+ break;
+
+ /* The metrics stuff is an openresolv invention we ignore (and don't really need) */
+ case 'm':
+ log_debug("Switch -%c ignored.", c);
+ break;
+
+ /* Everybody else can agree on the existence of -u but we don't support it. */
+ case 'u':
+
+ /* The following options are openresolv inventions we don't support. */
+ case 'I':
+ case 'i':
+ case 'l':
+ case 'R':
+ case 'r':
+ case 'v':
+ case 'V':
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Switch -%c not supported.", c);
+
+ /* The Debian resolvconf commands we don't support. */
+ case ARG_ENABLE_UPDATES:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Switch --enable-updates not supported.");
+ case ARG_DISABLE_UPDATES:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Switch --disable-updates not supported.");
+ case ARG_UPDATES_ARE_ENABLED:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Switch --updates-are-enabled not supported.");
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_mode == _MODE_INVALID)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Expected either -a or -d on the command line.");
+
+ if (optind+1 != argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Expected interface name as argument.");
+
+ r = ifname_mangle(argv[optind]);
+ if (r <= 0)
+ return r;
+
+ optind++;
+
+ if (arg_mode == MODE_SET_LINK) {
+ unsigned n = 0;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *a, *l;
+
+ r = read_line(stdin, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read from stdin: %m");
+ if (r == 0)
+ break;
+
+ n++;
+
+ l = strstrip(line);
+ if (IN_SET(*l, '#', ';', 0))
+ continue;
+
+ a = first_word(l, "nameserver");
+ if (a) {
+ (void) parse_nameserver(a);
+ continue;
+ }
+
+ a = first_word(l, "domain");
+ if (!a)
+ a = first_word(l, "search");
+ if (a) {
+ (void) parse_search_domain(a);
+ continue;
+ }
+
+ log_syntax(NULL, LOG_DEBUG, "stdin", n, 0, "Ignoring resolv.conf line: %s", l);
+ }
+
+ if (type == TYPE_EXCLUSIVE) {
+
+ /* If -x mode is selected, let's preferably route non-suffixed lookups to this interface. This
+ * somewhat matches the original -x behaviour */
+
+ r = strv_extend(&arg_set_domain, "~.");
+ if (r < 0)
+ return log_oom();
+
+ } else if (type == TYPE_PRIVATE)
+ log_debug("Private DNS server data not supported, ignoring.");
+
+ if (!arg_set_dns)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "No DNS servers specified, refusing operation.");
+ }
+
+ return 1; /* work to do */
+}
diff --git a/src/resolve/resolvconf-compat.h b/src/resolve/resolvconf-compat.h
new file mode 100644
index 0000000..33a5318
--- /dev/null
+++ b/src/resolve/resolvconf-compat.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int resolvconf_parse_argv(int argc, char *argv[]);
diff --git a/src/resolve/resolvectl.c b/src/resolve/resolvectl.c
new file mode 100644
index 0000000..b479335
--- /dev/null
+++ b/src/resolve/resolvectl.c
@@ -0,0 +1,3322 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <locale.h>
+#include <net/if.h>
+
+#include "sd-bus.h"
+#include "sd-netlink.h"
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "bus-map-properties.h"
+#include "bus-message-util.h"
+#include "dns-domain.h"
+#include "escape.h"
+#include "format-table.h"
+#include "format-util.h"
+#include "gcrypt-util.h"
+#include "main-func.h"
+#include "missing_network.h"
+#include "netlink-util.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "resolvconf-compat.h"
+#include "resolvectl.h"
+#include "resolved-def.h"
+#include "resolved-dns-packet.h"
+#include "socket-netlink.h"
+#include "sort-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "utf8.h"
+#include "verbs.h"
+
+static int arg_family = AF_UNSPEC;
+static int arg_ifindex = 0;
+static char *arg_ifname = NULL;
+static uint16_t arg_type = 0;
+static uint16_t arg_class = 0;
+static bool arg_legend = true;
+static uint64_t arg_flags = 0;
+static PagerFlags arg_pager_flags = 0;
+bool arg_ifindex_permissive = false; /* If true, don't generate an error if the specified interface index doesn't exist */
+static const char *arg_service_family = NULL;
+
+typedef enum RawType {
+ RAW_NONE,
+ RAW_PAYLOAD,
+ RAW_PACKET,
+} RawType;
+static RawType arg_raw = RAW_NONE;
+
+ExecutionMode arg_mode = MODE_RESOLVE_HOST;
+
+char **arg_set_dns = NULL;
+char **arg_set_domain = NULL;
+static const char *arg_set_llmnr = NULL;
+static const char *arg_set_mdns = NULL;
+static const char *arg_set_dns_over_tls = NULL;
+static const char *arg_set_dnssec = NULL;
+static char **arg_set_nta = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_ifname, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_set_dns, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_set_domain, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_set_nta, strv_freep);
+
+typedef enum StatusMode {
+ STATUS_ALL,
+ STATUS_DNS,
+ STATUS_DOMAIN,
+ STATUS_DEFAULT_ROUTE,
+ STATUS_LLMNR,
+ STATUS_MDNS,
+ STATUS_PRIVATE,
+ STATUS_DNSSEC,
+ STATUS_NTA,
+} StatusMode;
+
+typedef struct InterfaceInfo {
+ int index;
+ const char *name;
+} InterfaceInfo;
+
+static int interface_info_compare(const InterfaceInfo *a, const InterfaceInfo *b) {
+ int r;
+
+ r = CMP(a->index, b->index);
+ if (r != 0)
+ return r;
+
+ return strcmp_ptr(a->name, b->name);
+}
+
+int ifname_mangle(const char *s) {
+ _cleanup_free_ char *iface = NULL;
+ const char *dot;
+ int ifi;
+
+ assert(s);
+
+ dot = strchr(s, '.');
+ if (dot) {
+ log_debug("Ignoring protocol specifier '%s'.", dot + 1);
+ iface = strndup(s, dot - s);
+
+ } else
+ iface = strdup(s);
+ if (!iface)
+ return log_oom();
+
+ ifi = resolve_interface(NULL, iface);
+ if (ifi < 0) {
+ if (ifi == -ENODEV && arg_ifindex_permissive) {
+ log_debug("Interface '%s' not found, but -f specified, ignoring.", iface);
+ return 0; /* done */
+ }
+
+ return log_error_errno(ifi, "Failed to resolve interface \"%s\": %m", iface);
+ }
+
+ if (arg_ifindex > 0 && arg_ifindex != ifi)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Specified multiple different interfaces. Refusing.");
+
+ arg_ifindex = ifi;
+ free_and_replace(arg_ifname, iface);
+
+ return 1;
+}
+
+static void print_source(uint64_t flags, usec_t rtt) {
+ char rtt_str[FORMAT_TIMESTAMP_MAX];
+
+ if (!arg_legend)
+ return;
+
+ if (flags == 0)
+ return;
+
+ printf("\n%s-- Information acquired via", ansi_grey());
+
+ printf(" protocol%s%s%s%s%s",
+ flags & SD_RESOLVED_DNS ? " DNS" :"",
+ flags & SD_RESOLVED_LLMNR_IPV4 ? " LLMNR/IPv4" : "",
+ flags & SD_RESOLVED_LLMNR_IPV6 ? " LLMNR/IPv6" : "",
+ flags & SD_RESOLVED_MDNS_IPV4 ? " mDNS/IPv4" : "",
+ flags & SD_RESOLVED_MDNS_IPV6 ? " mDNS/IPv6" : "");
+
+ assert_se(format_timespan(rtt_str, sizeof(rtt_str), rtt, 100));
+
+ printf(" in %s.%s\n"
+ "%s-- Data is authenticated: %s%s\n",
+ rtt_str, ansi_normal(),
+ ansi_grey(), yes_no(flags & SD_RESOLVED_AUTHENTICATED), ansi_normal());
+}
+
+static void print_ifindex_comment(int printed_so_far, int ifindex) {
+ char ifname[IF_NAMESIZE + 1];
+
+ if (ifindex <= 0)
+ return;
+
+ if (!format_ifname(ifindex, ifname))
+ log_warning_errno(errno, "Failed to resolve interface name for index %i, ignoring: %m", ifindex);
+ else
+ printf("%*s%s-- link: %s%s",
+ 60 > printed_so_far ? 60 - printed_so_far : 0, " ", /* Align comment to the 60th column */
+ ansi_grey(), ifname, ansi_normal());
+}
+
+static int resolve_host(sd_bus *bus, const char *name) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *canonical = NULL;
+ unsigned c = 0;
+ uint64_t flags;
+ usec_t ts;
+ int r;
+
+ assert(name);
+
+ log_debug("Resolving %s (family %s, interface %s).", name, af_to_name(arg_family) ?: "*", isempty(arg_ifname) ? "*" : arg_ifname);
+
+ r = bus_message_new_method_call(bus, &req, bus_resolve_mgr, "ResolveHostname");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "isit", arg_ifindex, name, arg_family, arg_flags);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ ts = now(CLOCK_MONOTONIC);
+
+ r = sd_bus_call(bus, req, SD_RESOLVED_QUERY_TIMEOUT_USEC, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "%s: resolve call failed: %s", name, bus_error_message(&error, r));
+
+ ts = now(CLOCK_MONOTONIC) - ts;
+
+ r = sd_bus_message_enter_container(reply, 'a', "(iiay)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_enter_container(reply, 'r', "iiay")) > 0) {
+ _cleanup_free_ char *pretty = NULL;
+ int ifindex, family, k;
+ union in_addr_union a;
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(reply, "i", &ifindex);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ sd_bus_error_free(&error);
+ r = bus_message_read_in_addr_auto(reply, &error, &family, &a);
+ if (r < 0 && !sd_bus_error_has_name(&error, SD_BUS_ERROR_INVALID_ARGS))
+ return log_error_errno(r, "%s: systemd-resolved returned invalid result: %s", name, bus_error_message(&error, r));
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (sd_bus_error_has_name(&error, SD_BUS_ERROR_INVALID_ARGS)) {
+ log_debug_errno(r, "%s: systemd-resolved returned invalid result, ignoring: %s", name, bus_error_message(&error, r));
+ continue;
+ }
+
+ r = in_addr_ifindex_to_string(family, &a, ifindex, &pretty);
+ if (r < 0)
+ return log_error_errno(r, "Failed to print address for %s: %m", name);
+
+ k = printf("%*s%s %s%s%s",
+ (int) strlen(name), c == 0 ? name : "", c == 0 ? ":" : " ",
+ ansi_highlight(), pretty, ansi_normal());
+
+ print_ifindex_comment(k, ifindex);
+ fputc('\n', stdout);
+
+ c++;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read(reply, "st", &canonical, &flags);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (!streq(name, canonical))
+ printf("%*s%s (%s)\n",
+ (int) strlen(name), c == 0 ? name : "", c == 0 ? ":" : " ",
+ canonical);
+
+ if (c == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ESRCH),
+ "%s: no addresses found", name);
+
+ print_source(flags, ts);
+
+ return 0;
+}
+
+static int resolve_address(sd_bus *bus, int family, const union in_addr_union *address, int ifindex) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *pretty = NULL;
+ uint64_t flags;
+ unsigned c = 0;
+ usec_t ts;
+ int r;
+
+ assert(bus);
+ assert(IN_SET(family, AF_INET, AF_INET6));
+ assert(address);
+
+ if (ifindex <= 0)
+ ifindex = arg_ifindex;
+
+ r = in_addr_ifindex_to_string(family, address, ifindex, &pretty);
+ if (r < 0)
+ return log_oom();
+
+ log_debug("Resolving %s.", pretty);
+
+ r = bus_message_new_method_call(bus, &req, bus_resolve_mgr, "ResolveAddress");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "ii", ifindex, family);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(req, 'y', address, FAMILY_ADDRESS_SIZE(family));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "t", arg_flags);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ ts = now(CLOCK_MONOTONIC);
+
+ r = sd_bus_call(bus, req, SD_RESOLVED_QUERY_TIMEOUT_USEC, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "%s: resolve call failed: %s", pretty, bus_error_message(&error, r));
+
+ ts = now(CLOCK_MONOTONIC) - ts;
+
+ r = sd_bus_message_enter_container(reply, 'a', "(is)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ while ((r = sd_bus_message_enter_container(reply, 'r', "is")) > 0) {
+ const char *n;
+ int k;
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(reply, "is", &ifindex, &n);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return r;
+
+ k = printf("%*s%s %s%s%s",
+ (int) strlen(pretty), c == 0 ? pretty : "",
+ c == 0 ? ":" : " ",
+ ansi_highlight(), n, ansi_normal());
+
+ print_ifindex_comment(k, ifindex);
+ fputc('\n', stdout);
+
+ c++;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read(reply, "t", &flags);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (c == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ESRCH),
+ "%s: no names found", pretty);
+
+ print_source(flags, ts);
+
+ return 0;
+}
+
+static int output_rr_packet(const void *d, size_t l, int ifindex) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ int r;
+
+ r = dns_packet_new(&p, DNS_PROTOCOL_DNS, 0, DNS_PACKET_SIZE_MAX);
+ if (r < 0)
+ return log_oom();
+
+ p->refuse_compression = true;
+
+ r = dns_packet_append_blob(p, d, l, NULL);
+ if (r < 0)
+ return log_oom();
+
+ r = dns_packet_read_rr(p, &rr, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse RR: %m");
+
+ if (arg_raw == RAW_PAYLOAD) {
+ void *data;
+ ssize_t k;
+
+ k = dns_resource_record_payload(rr, &data);
+ if (k < 0)
+ return log_error_errno(k, "Cannot dump RR: %m");
+ fwrite(data, 1, k, stdout);
+ } else {
+ const char *s;
+ int k;
+
+ s = dns_resource_record_to_string(rr);
+ if (!s)
+ return log_oom();
+
+ k = printf("%s", s);
+ print_ifindex_comment(k, ifindex);
+ fputc('\n', stdout);
+ }
+
+ return 0;
+}
+
+static int resolve_record(sd_bus *bus, const char *name, uint16_t class, uint16_t type, bool warn_missing) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ unsigned n = 0;
+ uint64_t flags;
+ int r;
+ usec_t ts;
+ bool needs_authentication = false;
+
+ assert(name);
+
+ log_debug("Resolving %s %s %s (interface %s).", name, dns_class_to_string(class), dns_type_to_string(type), isempty(arg_ifname) ? "*" : arg_ifname);
+
+ r = bus_message_new_method_call(bus, &req, bus_resolve_mgr, "ResolveRecord");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "isqqt", arg_ifindex, name, class, type, arg_flags);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ ts = now(CLOCK_MONOTONIC);
+
+ r = sd_bus_call(bus, req, SD_RESOLVED_QUERY_TIMEOUT_USEC, &error, &reply);
+ if (r < 0) {
+ if (warn_missing || r != -ENXIO)
+ log_error("%s: resolve call failed: %s", name, bus_error_message(&error, r));
+ return r;
+ }
+
+ ts = now(CLOCK_MONOTONIC) - ts;
+
+ r = sd_bus_message_enter_container(reply, 'a', "(iqqay)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_enter_container(reply, 'r', "iqqay")) > 0) {
+ uint16_t c, t;
+ int ifindex;
+ const void *d;
+ size_t l;
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(reply, "iqq", &ifindex, &c, &t);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_array(reply, 'y', &d, &l);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (arg_raw == RAW_PACKET) {
+ uint64_t u64 = htole64(l);
+
+ fwrite(&u64, sizeof(u64), 1, stdout);
+ fwrite(d, 1, l, stdout);
+ } else {
+ r = output_rr_packet(d, l, ifindex);
+ if (r < 0)
+ return r;
+ }
+
+ if (dns_type_needs_authentication(t))
+ needs_authentication = true;
+
+ n++;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read(reply, "t", &flags);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (n == 0) {
+ if (warn_missing)
+ log_error("%s: no records found", name);
+ return -ESRCH;
+ }
+
+ print_source(flags, ts);
+
+ if ((flags & SD_RESOLVED_AUTHENTICATED) == 0 && needs_authentication) {
+ fflush(stdout);
+
+ fprintf(stderr, "\n%s"
+ "WARNING: The resources shown contain cryptographic key data which could not be\n"
+ " authenticated. It is not suitable to authenticate any communication.\n"
+ " This is usually indication that DNSSEC authentication was not enabled\n"
+ " or is not available for the selected protocol or DNS servers.%s\n",
+ ansi_highlight_red(),
+ ansi_normal());
+ }
+
+ return 0;
+}
+
+static int resolve_rfc4501(sd_bus *bus, const char *name) {
+ uint16_t type = 0, class = 0;
+ const char *p, *q, *n;
+ int r;
+
+ assert(bus);
+ assert(name);
+ assert(startswith(name, "dns:"));
+
+ /* Parse RFC 4501 dns: URIs */
+
+ p = name + 4;
+
+ if (p[0] == '/') {
+ const char *e;
+
+ if (p[1] != '/')
+ goto invalid;
+
+ e = strchr(p + 2, '/');
+ if (!e)
+ goto invalid;
+
+ if (e != p + 2)
+ log_warning("DNS authority specification not supported; ignoring specified authority.");
+
+ p = e + 1;
+ }
+
+ q = strchr(p, '?');
+ if (q) {
+ n = strndupa(p, q - p);
+ q++;
+
+ for (;;) {
+ const char *f;
+
+ f = startswith_no_case(q, "class=");
+ if (f) {
+ _cleanup_free_ char *t = NULL;
+ const char *e;
+
+ if (class != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "DNS class specified twice.");
+
+ e = strchrnul(f, ';');
+ t = strndup(f, e - f);
+ if (!t)
+ return log_oom();
+
+ r = dns_class_from_string(t);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown DNS class %s.", t);
+
+ class = r;
+
+ if (*e == ';') {
+ q = e + 1;
+ continue;
+ }
+
+ break;
+ }
+
+ f = startswith_no_case(q, "type=");
+ if (f) {
+ _cleanup_free_ char *t = NULL;
+ const char *e;
+
+ if (type != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "DNS type specified twice.");
+
+ e = strchrnul(f, ';');
+ t = strndup(f, e - f);
+ if (!t)
+ return log_oom();
+
+ r = dns_type_from_string(t);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown DNS type %s.", t);
+
+ type = r;
+
+ if (*e == ';') {
+ q = e + 1;
+ continue;
+ }
+
+ break;
+ }
+
+ goto invalid;
+ }
+ } else
+ n = p;
+
+ if (class == 0)
+ class = arg_class ?: DNS_CLASS_IN;
+ if (type == 0)
+ type = arg_type ?: DNS_TYPE_A;
+
+ return resolve_record(bus, n, class, type, true);
+
+invalid:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid DNS URI: %s", name);
+}
+
+static int verb_query(int argc, char **argv, void *userdata) {
+ sd_bus *bus = userdata;
+ char **p;
+ int q, r = 0;
+
+ if (arg_type != 0)
+ STRV_FOREACH(p, argv + 1) {
+ q = resolve_record(bus, *p, arg_class, arg_type, true);
+ if (q < 0)
+ r = q;
+ }
+
+ else
+ STRV_FOREACH(p, argv + 1) {
+ if (startswith(*p, "dns:"))
+ q = resolve_rfc4501(bus, *p);
+ else {
+ int family, ifindex;
+ union in_addr_union a;
+
+ q = in_addr_ifindex_from_string_auto(*p, &family, &a, &ifindex);
+ if (q >= 0)
+ q = resolve_address(bus, family, &a, ifindex);
+ else
+ q = resolve_host(bus, *p);
+ }
+ if (q < 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int resolve_service(sd_bus *bus, const char *name, const char *type, const char *domain) {
+ const char *canonical_name, *canonical_type, *canonical_domain;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ size_t indent, sz;
+ uint64_t flags;
+ const char *p;
+ unsigned c;
+ usec_t ts;
+ int r;
+
+ assert(bus);
+ assert(domain);
+
+ name = empty_to_null(name);
+ type = empty_to_null(type);
+
+ if (name)
+ log_debug("Resolving service \"%s\" of type %s in %s (family %s, interface %s).", name, type, domain, af_to_name(arg_family) ?: "*", isempty(arg_ifname) ? "*" : arg_ifname);
+ else if (type)
+ log_debug("Resolving service type %s of %s (family %s, interface %s).", type, domain, af_to_name(arg_family) ?: "*", isempty(arg_ifname) ? "*" : arg_ifname);
+ else
+ log_debug("Resolving service type %s (family %s, interface %s).", domain, af_to_name(arg_family) ?: "*", isempty(arg_ifname) ? "*" : arg_ifname);
+
+ r = bus_message_new_method_call(bus, &req, bus_resolve_mgr, "ResolveService");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "isssit", arg_ifindex, name, type, domain, arg_family, arg_flags);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ ts = now(CLOCK_MONOTONIC);
+
+ r = sd_bus_call(bus, req, SD_RESOLVED_QUERY_TIMEOUT_USEC, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Resolve call failed: %s", bus_error_message(&error, r));
+
+ ts = now(CLOCK_MONOTONIC) - ts;
+
+ r = sd_bus_message_enter_container(reply, 'a', "(qqqsa(iiay)s)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ indent =
+ (name ? strlen(name) + 1 : 0) +
+ (type ? strlen(type) + 1 : 0) +
+ strlen(domain) + 2;
+
+ c = 0;
+ while ((r = sd_bus_message_enter_container(reply, 'r', "qqqsa(iiay)s")) > 0) {
+ uint16_t priority, weight, port;
+ const char *hostname, *canonical;
+
+ r = sd_bus_message_read(reply, "qqqs", &priority, &weight, &port, &hostname);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (name)
+ printf("%*s%s", (int) strlen(name), c == 0 ? name : "", c == 0 ? "/" : " ");
+ if (type)
+ printf("%*s%s", (int) strlen(type), c == 0 ? type : "", c == 0 ? "/" : " ");
+
+ printf("%*s%s %s:%u [priority=%u, weight=%u]\n",
+ (int) strlen(domain), c == 0 ? domain : "",
+ c == 0 ? ":" : " ",
+ hostname, port,
+ priority, weight);
+
+ r = sd_bus_message_enter_container(reply, 'a', "(iiay)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_enter_container(reply, 'r', "iiay")) > 0) {
+ _cleanup_free_ char *pretty = NULL;
+ int ifindex, family, k;
+ union in_addr_union a;;
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(reply, "i", &ifindex);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ sd_bus_error_free(&error);
+ r = bus_message_read_in_addr_auto(reply, &error, &family, &a);
+ if (r < 0 && !sd_bus_error_has_name(&error, SD_BUS_ERROR_INVALID_ARGS))
+ return log_error_errno(r, "%s: systemd-resolved returned invalid result: %s", name, bus_error_message(&error, r));
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (sd_bus_error_has_name(&error, SD_BUS_ERROR_INVALID_ARGS)) {
+ log_debug_errno(r, "%s: systemd-resolved returned invalid result, ignoring: %s", name, bus_error_message(&error, r));
+ continue;
+ }
+
+ r = in_addr_ifindex_to_string(family, &a, ifindex, &pretty);
+ if (r < 0)
+ return log_error_errno(r, "Failed to print address for %s: %m", name);
+
+ k = printf("%*s%s", (int) indent, "", pretty);
+ print_ifindex_comment(k, ifindex);
+ fputc('\n', stdout);
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read(reply, "s", &canonical);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (!streq(hostname, canonical))
+ printf("%*s(%s)\n", (int) indent, "", canonical);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ c++;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_enter_container(reply, 'a', "ay");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read_array(reply, 'y', (const void**) &p, &sz)) > 0) {
+ _cleanup_free_ char *escaped = NULL;
+
+ escaped = cescape_length(p, sz);
+ if (!escaped)
+ return log_oom();
+
+ printf("%*s%s\n", (int) indent, "", escaped);
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read(reply, "ssst", &canonical_name, &canonical_type, &canonical_domain, &flags);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ canonical_name = empty_to_null(canonical_name);
+ canonical_type = empty_to_null(canonical_type);
+
+ if (!streq_ptr(name, canonical_name) ||
+ !streq_ptr(type, canonical_type) ||
+ !streq_ptr(domain, canonical_domain)) {
+
+ printf("%*s(", (int) indent, "");
+
+ if (canonical_name)
+ printf("%s/", canonical_name);
+ if (canonical_type)
+ printf("%s/", canonical_type);
+
+ printf("%s)\n", canonical_domain);
+ }
+
+ print_source(flags, ts);
+
+ return 0;
+}
+
+static int verb_service(int argc, char **argv, void *userdata) {
+ sd_bus *bus = userdata;
+
+ if (argc == 2)
+ return resolve_service(bus, NULL, NULL, argv[1]);
+ else if (argc == 3)
+ return resolve_service(bus, NULL, argv[1], argv[2]);
+ else
+ return resolve_service(bus, argv[1], argv[2], argv[3]);
+}
+
+static int resolve_openpgp(sd_bus *bus, const char *address) {
+ const char *domain, *full;
+ int r;
+ _cleanup_free_ char *hashed = NULL;
+
+ assert(bus);
+ assert(address);
+
+ domain = strrchr(address, '@');
+ if (!domain)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Address does not contain '@': \"%s\"", address);
+ if (domain == address || domain[1] == '\0')
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Address starts or ends with '@': \"%s\"", address);
+ domain++;
+
+ r = string_hashsum_sha256(address, domain - 1 - address, &hashed);
+ if (r < 0)
+ return log_error_errno(r, "Hashing failed: %m");
+
+ strshorten(hashed, 56);
+
+ full = strjoina(hashed, "._openpgpkey.", domain);
+ log_debug("Looking up \"%s\".", full);
+
+ r = resolve_record(bus, full,
+ arg_class ?: DNS_CLASS_IN,
+ arg_type ?: DNS_TYPE_OPENPGPKEY, false);
+
+ if (IN_SET(r, -ENXIO, -ESRCH)) { /* NXDOMAIN or NODATA? */
+ hashed = mfree(hashed);
+ r = string_hashsum_sha224(address, domain - 1 - address, &hashed);
+ if (r < 0)
+ return log_error_errno(r, "Hashing failed: %m");
+
+ full = strjoina(hashed, "._openpgpkey.", domain);
+ log_debug("Looking up \"%s\".", full);
+
+ return resolve_record(bus, full,
+ arg_class ?: DNS_CLASS_IN,
+ arg_type ?: DNS_TYPE_OPENPGPKEY, true);
+ }
+
+ return r;
+}
+
+static int verb_openpgp(int argc, char **argv, void *userdata) {
+ sd_bus *bus = userdata;
+ char **p;
+ int q, r = 0;
+
+ STRV_FOREACH(p, argv + 1) {
+ q = resolve_openpgp(bus, *p);
+ if (q < 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int resolve_tlsa(sd_bus *bus, const char *family, const char *address) {
+ const char *port;
+ uint16_t port_num = 443;
+ _cleanup_free_ char *full = NULL;
+ int r;
+
+ assert(bus);
+ assert(address);
+
+ port = strrchr(address, ':');
+ if (port) {
+ r = parse_ip_port(port + 1, &port_num);
+ if (r < 0)
+ return log_error_errno(r, "Invalid port \"%s\".", port + 1);
+
+ address = strndupa(address, port - address);
+ }
+
+ r = asprintf(&full, "_%u._%s.%s",
+ port_num,
+ family,
+ address);
+ if (r < 0)
+ return log_oom();
+
+ log_debug("Looking up \"%s\".", full);
+
+ return resolve_record(bus, full,
+ arg_class ?: DNS_CLASS_IN,
+ arg_type ?: DNS_TYPE_TLSA, true);
+}
+
+static bool service_family_is_valid(const char *s) {
+ return STR_IN_SET(s, "tcp", "udp", "sctp");
+}
+
+static int verb_tlsa(int argc, char **argv, void *userdata) {
+ sd_bus *bus = userdata;
+ char **p, **args = argv + 1;
+ const char *family = "tcp";
+ int q, r = 0;
+
+ if (service_family_is_valid(argv[1])) {
+ family = argv[1];
+ args++;
+ }
+
+ STRV_FOREACH(p, args) {
+ q = resolve_tlsa(bus, family, *p);
+ if (q < 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int show_statistics(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ sd_bus *bus = userdata;
+ uint64_t n_current_transactions, n_total_transactions,
+ cache_size, n_cache_hit, n_cache_miss,
+ n_dnssec_secure, n_dnssec_insecure, n_dnssec_bogus, n_dnssec_indeterminate;
+ int r, dnssec_supported;
+
+ assert(bus);
+
+ r = bus_get_property_trivial(bus, bus_resolve_mgr, "DNSSECSupported", &error, 'b', &dnssec_supported);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get DNSSEC supported state: %s", bus_error_message(&error, r));
+
+ printf("DNSSEC supported by current servers: %s%s%s\n\n",
+ ansi_highlight(),
+ yes_no(dnssec_supported),
+ ansi_normal());
+
+ r = bus_get_property(bus, bus_resolve_mgr, "TransactionStatistics", &error, &reply, "(tt)");
+ if (r < 0)
+ return log_error_errno(r, "Failed to get transaction statistics: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "(tt)",
+ &n_current_transactions,
+ &n_total_transactions);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ reply = sd_bus_message_unref(reply);
+
+ r = bus_get_property(bus, bus_resolve_mgr, "CacheStatistics", &error, &reply, "(ttt)");
+ if (r < 0)
+ return log_error_errno(r, "Failed to get cache statistics: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "(ttt)",
+ &cache_size,
+ &n_cache_hit,
+ &n_cache_miss);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ reply = sd_bus_message_unref(reply);
+
+ r = bus_get_property(bus, bus_resolve_mgr, "DNSSECStatistics", &error, &reply, "(tttt)");
+ if (r < 0)
+ return log_error_errno(r, "Failed to get DNSSEC statistics: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "(tttt)",
+ &n_dnssec_secure,
+ &n_dnssec_insecure,
+ &n_dnssec_bogus,
+ &n_dnssec_indeterminate);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ table = table_new("key", "value");
+ if (!table)
+ return log_oom();
+
+ table_set_header(table, false);
+
+ r = table_add_many(table,
+ TABLE_STRING, "Transactions",
+ TABLE_SET_COLOR, ansi_highlight(),
+ TABLE_EMPTY,
+ TABLE_STRING, "Current Transactions:",
+ TABLE_SET_ALIGN_PERCENT, 100,
+ TABLE_UINT64, n_current_transactions,
+ TABLE_STRING, "Total Transactions:",
+ TABLE_UINT64, n_total_transactions,
+ TABLE_EMPTY, TABLE_EMPTY,
+ TABLE_STRING, "Cache",
+ TABLE_SET_COLOR, ansi_highlight(),
+ TABLE_SET_ALIGN_PERCENT, 0,
+ TABLE_EMPTY,
+ TABLE_STRING, "Current Cache Size:",
+ TABLE_SET_ALIGN_PERCENT, 100,
+ TABLE_UINT64, cache_size,
+ TABLE_STRING, "Cache Hits:",
+ TABLE_UINT64, n_cache_hit,
+ TABLE_STRING, "Cache Misses:",
+ TABLE_UINT64, n_cache_miss,
+ TABLE_EMPTY, TABLE_EMPTY,
+ TABLE_STRING, "DNSSEC Verdicts",
+ TABLE_SET_COLOR, ansi_highlight(),
+ TABLE_SET_ALIGN_PERCENT, 0,
+ TABLE_EMPTY,
+ TABLE_STRING, "Secure:",
+ TABLE_SET_ALIGN_PERCENT, 100,
+ TABLE_UINT64, n_dnssec_secure,
+ TABLE_STRING, "Insecure:",
+ TABLE_UINT64, n_dnssec_insecure,
+ TABLE_STRING, "Bogus:",
+ TABLE_UINT64, n_dnssec_bogus,
+ TABLE_STRING, "Indeterminate:",
+ TABLE_UINT64, n_dnssec_indeterminate);
+ if (r < 0)
+ table_log_add_error(r);
+
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+
+ return 0;
+}
+
+static int reset_statistics(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ r = bus_call_method(bus, bus_resolve_mgr, "ResetStatistics", &error, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to reset statistics: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int flush_caches(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ r = bus_call_method(bus, bus_resolve_mgr, "FlushCaches", &error, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to flush caches: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int reset_server_features(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ r = bus_call_method(bus, bus_resolve_mgr, "ResetServerFeatures", &error, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to reset server features: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int read_dns_server_one(sd_bus_message *m, bool with_ifindex, bool extended, char **ret) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *pretty = NULL;
+ int ifindex, family, r, k;
+ union in_addr_union a;
+ const char *name = NULL;
+ uint16_t port = 0;
+
+ assert(m);
+ assert(ret);
+
+ r = sd_bus_message_enter_container(m, 'r', with_ifindex ? (extended ? "iiayqs" : "iiay") : (extended ? "iayqs" : "iay"));
+ if (r <= 0)
+ return r;
+
+ if (with_ifindex) {
+ r = sd_bus_message_read(m, "i", &ifindex);
+ if (r < 0)
+ return r;
+ }
+
+ k = bus_message_read_in_addr_auto(m, &error, &family, &a);
+ if (k < 0 && !sd_bus_error_has_name(&error, SD_BUS_ERROR_INVALID_ARGS))
+ return k;
+
+ if (extended) {
+ r = sd_bus_message_read(m, "q", &port);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(m, "s", &name);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ if (k < 0) {
+ log_debug("Invalid DNS server, ignoring: %s", bus_error_message(&error, k));
+ *ret = NULL;
+ return 1;
+ }
+
+ if (with_ifindex && ifindex != 0) {
+ /* only show the global ones here */
+ *ret = NULL;
+ return 1;
+ }
+
+ r = in_addr_port_ifindex_name_to_string(family, &a, port, ifindex, name, &pretty);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(pretty);
+
+ return 1;
+}
+
+static int map_link_dns_servers_internal(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata, bool extended) {
+ char ***l = userdata;
+ int r;
+
+ assert(bus);
+ assert(member);
+ assert(m);
+ assert(l);
+
+ r = sd_bus_message_enter_container(m, 'a', extended ? "(iayqs)" : "(iay)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *pretty = NULL;
+
+ r = read_dns_server_one(m, false, extended, &pretty);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (isempty(pretty))
+ continue;
+
+ r = strv_consume(l, TAKE_PTR(pretty));
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int map_link_dns_servers(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ return map_link_dns_servers_internal(bus, member, m, error, userdata, false);
+}
+
+static int map_link_dns_servers_ex(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ return map_link_dns_servers_internal(bus, member, m, error, userdata, true);
+}
+
+static int map_link_current_dns_server(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ assert(m);
+ assert(userdata);
+
+ return read_dns_server_one(m, false, false, userdata);
+}
+
+static int map_link_current_dns_server_ex(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ assert(m);
+ assert(userdata);
+
+ return read_dns_server_one(m, false, true, userdata);
+}
+
+static int read_domain_one(sd_bus_message *m, bool with_ifindex, char **ret) {
+ _cleanup_free_ char *str = NULL;
+ int ifindex, route_only, r;
+ const char *domain;
+
+ assert(m);
+ assert(ret);
+
+ if (with_ifindex)
+ r = sd_bus_message_read(m, "(isb)", &ifindex, &domain, &route_only);
+ else
+ r = sd_bus_message_read(m, "(sb)", &domain, &route_only);
+ if (r <= 0)
+ return r;
+
+ if (with_ifindex && ifindex != 0) {
+ /* only show the global ones here */
+ *ret = NULL;
+ return 1;
+ }
+
+ if (route_only)
+ str = strjoin("~", domain);
+ else
+ str = strdup(domain);
+ if (!str)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(str);
+
+ return 1;
+}
+
+static int map_link_domains(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ char ***l = userdata;
+ int r;
+
+ assert(bus);
+ assert(member);
+ assert(m);
+ assert(l);
+
+ r = sd_bus_message_enter_container(m, 'a', "(sb)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *pretty = NULL;
+
+ r = read_domain_one(m, false, &pretty);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (isempty(pretty))
+ continue;
+
+ r = strv_consume(l, TAKE_PTR(pretty));
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ strv_sort(*l);
+
+ return 0;
+}
+
+static int status_print_strv_ifindex(int ifindex, const char *ifname, char **p) {
+ const unsigned indent = strlen("Global: "); /* Use the same indentation everywhere to make things nice */
+ int pos1, pos2;
+
+ if (ifname)
+ printf("%s%nLink %i (%s)%n%s:", ansi_highlight(), &pos1, ifindex, ifname, &pos2, ansi_normal());
+ else
+ printf("%s%nGlobal%n%s:", ansi_highlight(), &pos1, &pos2, ansi_normal());
+
+ size_t cols = columns(), position = pos2 - pos1 + 2;
+ char **i;
+
+ STRV_FOREACH(i, p) {
+ size_t our_len = utf8_console_width(*i); /* This returns -1 on invalid utf-8 (which shouldn't happen).
+ * If that happens, we'll just print one item per line. */
+
+ if (position <= indent || size_add(size_add(position, 1), our_len) < cols) {
+ printf(" %s", *i);
+ position = size_add(size_add(position, 1), our_len);
+ } else {
+ printf("\n%*s%s", indent, "", *i);
+ position = size_add(our_len, indent);
+ }
+ }
+
+ printf("\n");
+
+ return 0;
+}
+
+static int status_print_strv_global(char **p) {
+ return status_print_strv_ifindex(0, NULL, p);
+}
+
+typedef struct LinkInfo {
+ uint64_t scopes_mask;
+ const char *llmnr;
+ const char *mdns;
+ const char *dns_over_tls;
+ const char *dnssec;
+ char *current_dns;
+ char *current_dns_ex;
+ char **dns;
+ char **dns_ex;
+ char **domains;
+ char **ntas;
+ bool dnssec_supported;
+ bool default_route;
+} LinkInfo;
+
+typedef struct GlobalInfo {
+ char *current_dns;
+ char *current_dns_ex;
+ char **dns;
+ char **dns_ex;
+ char **fallback_dns;
+ char **fallback_dns_ex;
+ char **domains;
+ char **ntas;
+ const char *llmnr;
+ const char *mdns;
+ const char *dns_over_tls;
+ const char *dnssec;
+ const char *resolv_conf_mode;
+ bool dnssec_supported;
+} GlobalInfo;
+
+static void link_info_clear(LinkInfo *p) {
+ free(p->current_dns);
+ free(p->current_dns_ex);
+ strv_free(p->dns);
+ strv_free(p->dns_ex);
+ strv_free(p->domains);
+ strv_free(p->ntas);
+}
+
+static void global_info_clear(GlobalInfo *p) {
+ free(p->current_dns);
+ free(p->current_dns_ex);
+ strv_free(p->dns);
+ strv_free(p->dns_ex);
+ strv_free(p->fallback_dns);
+ strv_free(p->fallback_dns_ex);
+ strv_free(p->domains);
+ strv_free(p->ntas);
+}
+
+static int dump_list(Table *table, const char *prefix, char * const *l) {
+ int r;
+
+ if (strv_isempty(l))
+ return 0;
+
+ r = table_add_many(table,
+ TABLE_STRING, prefix,
+ TABLE_STRV_WRAPPED, l);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ return 0;
+}
+
+static int strv_extend_extended_bool(char ***strv, const char *name, const char *value) {
+ int r;
+
+ if (value) {
+ r = parse_boolean(value);
+ if (r >= 0)
+ return strv_extendf(strv, "%s%s", plus_minus(r), name);
+ }
+
+ return strv_extendf(strv, "%s=%s", name, value ?: "???");
+}
+
+static char** link_protocol_status(const LinkInfo *info) {
+ _cleanup_strv_free_ char **s = NULL;
+
+ if (strv_extendf(&s, "%sDefaultRoute", plus_minus(info->default_route)) < 0)
+ return NULL;
+
+ if (strv_extend_extended_bool(&s, "LLMNR", info->llmnr) < 0)
+ return NULL;
+
+ if (strv_extend_extended_bool(&s, "mDNS", info->mdns) < 0)
+ return NULL;
+
+ if (strv_extend_extended_bool(&s, "DNSOverTLS", info->dns_over_tls) < 0)
+ return NULL;
+
+ if (strv_extendf(&s, "DNSSEC=%s/%s",
+ info->dnssec ?: "???",
+ info->dnssec_supported ? "supported" : "unsupported") < 0)
+ return NULL;
+
+ return TAKE_PTR(s);
+}
+
+static char** global_protocol_status(const GlobalInfo *info) {
+ _cleanup_strv_free_ char **s = NULL;
+
+ if (strv_extend_extended_bool(&s, "LLMNR", info->llmnr) < 0)
+ return NULL;
+
+ if (strv_extend_extended_bool(&s, "mDNS", info->mdns) < 0)
+ return NULL;
+
+ if (strv_extend_extended_bool(&s, "DNSOverTLS", info->dns_over_tls) < 0)
+ return NULL;
+
+ if (strv_extendf(&s, "DNSSEC=%s/%s",
+ info->dnssec ?: "???",
+ info->dnssec_supported ? "supported" : "unsupported") < 0)
+ return NULL;
+
+ return TAKE_PTR(s);
+}
+
+static int status_ifindex(sd_bus *bus, int ifindex, const char *name, StatusMode mode, bool *empty_line) {
+ static const struct bus_properties_map property_map[] = {
+ { "ScopesMask", "t", NULL, offsetof(LinkInfo, scopes_mask) },
+ { "DNS", "a(iay)", map_link_dns_servers, offsetof(LinkInfo, dns) },
+ { "DNSEx", "a(iayqs)", map_link_dns_servers_ex, offsetof(LinkInfo, dns_ex) },
+ { "CurrentDNSServer", "(iay)", map_link_current_dns_server, offsetof(LinkInfo, current_dns) },
+ { "CurrentDNSServerEx", "(iayqs)", map_link_current_dns_server_ex, offsetof(LinkInfo, current_dns_ex) },
+ { "Domains", "a(sb)", map_link_domains, offsetof(LinkInfo, domains) },
+ { "DefaultRoute", "b", NULL, offsetof(LinkInfo, default_route) },
+ { "LLMNR", "s", NULL, offsetof(LinkInfo, llmnr) },
+ { "MulticastDNS", "s", NULL, offsetof(LinkInfo, mdns) },
+ { "DNSOverTLS", "s", NULL, offsetof(LinkInfo, dns_over_tls) },
+ { "DNSSEC", "s", NULL, offsetof(LinkInfo, dnssec) },
+ { "DNSSECNegativeTrustAnchors", "as", bus_map_strv_sort, offsetof(LinkInfo, ntas) },
+ { "DNSSECSupported", "b", NULL, offsetof(LinkInfo, dnssec_supported) },
+ {}
+ };
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(link_info_clear) LinkInfo link_info = {};
+ _cleanup_(table_unrefp) Table *table = NULL;
+ _cleanup_free_ char *p = NULL;
+ char ifi[DECIMAL_STR_MAX(int)], ifname[IF_NAMESIZE + 1] = "";
+ int r;
+
+ assert(bus);
+ assert(ifindex > 0);
+
+ if (!name) {
+ if (!format_ifname(ifindex, ifname))
+ return log_error_errno(errno, "Failed to resolve interface name for %i: %m", ifindex);
+
+ name = ifname;
+ }
+
+ xsprintf(ifi, "%i", ifindex);
+ r = sd_bus_path_encode("/org/freedesktop/resolve1/link", ifi, &p);
+ if (r < 0)
+ return log_oom();
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.resolve1",
+ p,
+ property_map,
+ BUS_MAP_BOOLEAN_AS_BOOL,
+ &error,
+ &m,
+ &link_info);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get link data for %i: %s", ifindex, bus_error_message(&error, r));
+
+ (void) pager_open(arg_pager_flags);
+
+ if (mode == STATUS_DNS)
+ return status_print_strv_ifindex(ifindex, name, link_info.dns_ex ?: link_info.dns);
+
+ if (mode == STATUS_DOMAIN)
+ return status_print_strv_ifindex(ifindex, name, link_info.domains);
+
+ if (mode == STATUS_NTA)
+ return status_print_strv_ifindex(ifindex, name, link_info.ntas);
+
+ if (mode == STATUS_DEFAULT_ROUTE) {
+ printf("%sLink %i (%s)%s: %s\n",
+ ansi_highlight(), ifindex, name, ansi_normal(),
+ yes_no(link_info.default_route));
+
+ return 0;
+ }
+
+ if (mode == STATUS_LLMNR) {
+ printf("%sLink %i (%s)%s: %s\n",
+ ansi_highlight(), ifindex, name, ansi_normal(),
+ strna(link_info.llmnr));
+
+ return 0;
+ }
+
+ if (mode == STATUS_MDNS) {
+ printf("%sLink %i (%s)%s: %s\n",
+ ansi_highlight(), ifindex, name, ansi_normal(),
+ strna(link_info.mdns));
+
+ return 0;
+ }
+
+ if (mode == STATUS_PRIVATE) {
+ printf("%sLink %i (%s)%s: %s\n",
+ ansi_highlight(), ifindex, name, ansi_normal(),
+ strna(link_info.dns_over_tls));
+
+ return 0;
+ }
+
+ if (mode == STATUS_DNSSEC) {
+ printf("%sLink %i (%s)%s: %s\n",
+ ansi_highlight(), ifindex, name, ansi_normal(),
+ strna(link_info.dnssec));
+
+ return 0;
+ }
+
+ if (empty_line && *empty_line)
+ fputc('\n', stdout);
+
+ printf("%sLink %i (%s)%s\n",
+ ansi_highlight(), ifindex, name, ansi_normal());
+
+ table = table_new("key", "value");
+ if (!table)
+ return log_oom();
+
+ table_set_header(table, false);
+
+ r = table_add_many(table,
+ TABLE_STRING, "Current Scopes:",
+ TABLE_SET_ALIGN_PERCENT, 100);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (link_info.scopes_mask == 0)
+ r = table_add_cell(table, NULL, TABLE_STRING, "none");
+ else {
+ _cleanup_free_ char *buf = NULL;
+ size_t len;
+
+ if (asprintf(&buf, "%s%s%s%s%s",
+ link_info.scopes_mask & SD_RESOLVED_DNS ? "DNS " : "",
+ link_info.scopes_mask & SD_RESOLVED_LLMNR_IPV4 ? "LLMNR/IPv4 " : "",
+ link_info.scopes_mask & SD_RESOLVED_LLMNR_IPV6 ? "LLMNR/IPv6 " : "",
+ link_info.scopes_mask & SD_RESOLVED_MDNS_IPV4 ? "mDNS/IPv4 " : "",
+ link_info.scopes_mask & SD_RESOLVED_MDNS_IPV6 ? "mDNS/IPv6 " : "") < 0)
+ return log_oom();
+
+ len = strlen(buf);
+ assert(len > 0);
+ buf[len - 1] = '\0';
+
+ r = table_add_cell(table, NULL, TABLE_STRING, buf);
+ }
+ if (r < 0)
+ return table_log_add_error(r);
+
+ _cleanup_strv_free_ char **pstatus = link_protocol_status(&link_info);
+ if (!pstatus)
+ return log_oom();
+
+ r = table_add_many(table,
+ TABLE_STRING, "Protocols:",
+ TABLE_STRV_WRAPPED, pstatus);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (link_info.current_dns) {
+ r = table_add_many(table,
+ TABLE_STRING, "Current DNS Server:",
+ TABLE_STRING, link_info.current_dns_ex ?: link_info.current_dns);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = dump_list(table, "DNS Servers:", link_info.dns_ex ?: link_info.dns);
+ if (r < 0)
+ return r;
+
+ r = dump_list(table, "DNS Domain:", link_info.domains);
+ if (r < 0)
+ return r;
+
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+
+ if (empty_line)
+ *empty_line = true;
+
+ return 0;
+}
+
+static int map_global_dns_servers_internal(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata, bool extended) {
+ char ***l = userdata;
+ int r;
+
+ assert(bus);
+ assert(member);
+ assert(m);
+ assert(l);
+
+ r = sd_bus_message_enter_container(m, 'a', extended ? "(iiayqs)" : "(iiay)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *pretty = NULL;
+
+ r = read_dns_server_one(m, true, extended, &pretty);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (isempty(pretty))
+ continue;
+
+ r = strv_consume(l, TAKE_PTR(pretty));
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int map_global_dns_servers(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ return map_global_dns_servers_internal(bus, member, m, error, userdata, false);
+}
+
+static int map_global_dns_servers_ex(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ return map_global_dns_servers_internal(bus, member, m, error, userdata, true);
+}
+
+static int map_global_current_dns_server(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ assert(m);
+ assert(userdata);
+
+ return read_dns_server_one(m, true, false, userdata);
+}
+
+static int map_global_current_dns_server_ex(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ assert(m);
+ assert(userdata);
+
+ return read_dns_server_one(m, true, true, userdata);
+}
+
+static int map_global_domains(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ char ***l = userdata;
+ int r;
+
+ assert(bus);
+ assert(member);
+ assert(m);
+ assert(l);
+
+ r = sd_bus_message_enter_container(m, 'a', "(isb)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *pretty = NULL;
+
+ r = read_domain_one(m, true, &pretty);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (isempty(pretty))
+ continue;
+
+ r = strv_consume(l, TAKE_PTR(pretty));
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ strv_sort(*l);
+
+ return 0;
+}
+
+static int status_global(sd_bus *bus, StatusMode mode, bool *empty_line) {
+ static const struct bus_properties_map property_map[] = {
+ { "DNS", "a(iiay)", map_global_dns_servers, offsetof(GlobalInfo, dns) },
+ { "DNSEx", "a(iiayqs)", map_global_dns_servers_ex, offsetof(GlobalInfo, dns_ex) },
+ { "FallbackDNS", "a(iiay)", map_global_dns_servers, offsetof(GlobalInfo, fallback_dns) },
+ { "FallbackDNSEx", "a(iiayqs)", map_global_dns_servers_ex, offsetof(GlobalInfo, fallback_dns_ex) },
+ { "CurrentDNSServer", "(iiay)", map_global_current_dns_server, offsetof(GlobalInfo, current_dns) },
+ { "CurrentDNSServerEx", "(iiayqs)", map_global_current_dns_server_ex, offsetof(GlobalInfo, current_dns_ex) },
+ { "Domains", "a(isb)", map_global_domains, offsetof(GlobalInfo, domains) },
+ { "DNSSECNegativeTrustAnchors", "as", bus_map_strv_sort, offsetof(GlobalInfo, ntas) },
+ { "LLMNR", "s", NULL, offsetof(GlobalInfo, llmnr) },
+ { "MulticastDNS", "s", NULL, offsetof(GlobalInfo, mdns) },
+ { "DNSOverTLS", "s", NULL, offsetof(GlobalInfo, dns_over_tls) },
+ { "DNSSEC", "s", NULL, offsetof(GlobalInfo, dnssec) },
+ { "DNSSECSupported", "b", NULL, offsetof(GlobalInfo, dnssec_supported) },
+ { "ResolvConfMode", "s", NULL, offsetof(GlobalInfo, resolv_conf_mode) },
+ {}
+ };
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(global_info_clear) GlobalInfo global_info = {};
+ _cleanup_(table_unrefp) Table *table = NULL;
+ int r;
+
+ assert(bus);
+ assert(empty_line);
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ property_map,
+ BUS_MAP_BOOLEAN_AS_BOOL,
+ &error,
+ &m,
+ &global_info);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get global data: %s", bus_error_message(&error, r));
+
+ (void) pager_open(arg_pager_flags);
+
+ if (mode == STATUS_DNS)
+ return status_print_strv_global(global_info.dns_ex ?: global_info.dns);
+
+ if (mode == STATUS_DOMAIN)
+ return status_print_strv_global(global_info.domains);
+
+ if (mode == STATUS_NTA)
+ return status_print_strv_global(global_info.ntas);
+
+ if (mode == STATUS_LLMNR) {
+ printf("%sGlobal%s: %s\n", ansi_highlight(), ansi_normal(),
+ strna(global_info.llmnr));
+
+ return 0;
+ }
+
+ if (mode == STATUS_MDNS) {
+ printf("%sGlobal%s: %s\n", ansi_highlight(), ansi_normal(),
+ strna(global_info.mdns));
+
+ return 0;
+ }
+
+ if (mode == STATUS_PRIVATE) {
+ printf("%sGlobal%s: %s\n", ansi_highlight(), ansi_normal(),
+ strna(global_info.dns_over_tls));
+
+ return 0;
+ }
+
+ if (mode == STATUS_DNSSEC) {
+ printf("%sGlobal%s: %s\n", ansi_highlight(), ansi_normal(),
+ strna(global_info.dnssec));
+
+ return 0;
+ }
+
+ printf("%sGlobal%s\n", ansi_highlight(), ansi_normal());
+
+ table = table_new("key", "value");
+ if (!table)
+ return log_oom();
+
+ table_set_header(table, false);
+
+ _cleanup_strv_free_ char **pstatus = global_protocol_status(&global_info);
+ if (!pstatus)
+ return log_oom();
+
+ r = table_add_many(table,
+ TABLE_STRING, "Protocols:",
+ TABLE_SET_ALIGN_PERCENT, 100,
+ TABLE_STRV_WRAPPED, pstatus);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (global_info.resolv_conf_mode) {
+ r = table_add_many(table,
+ TABLE_STRING, "resolv.conf mode:",
+ TABLE_STRING, global_info.resolv_conf_mode);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (global_info.current_dns) {
+ r = table_add_many(table,
+ TABLE_STRING, "Current DNS Server:",
+ TABLE_STRING, global_info.current_dns_ex ?: global_info.current_dns);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = dump_list(table, "DNS Servers:", global_info.dns_ex ?: global_info.dns);
+ if (r < 0)
+ return r;
+
+ r = dump_list(table, "Fallback DNS Servers:", global_info.fallback_dns_ex ?: global_info.fallback_dns);
+ if (r < 0)
+ return r;
+
+ r = dump_list(table, "DNS Domain:", global_info.domains);
+ if (r < 0)
+ return r;
+
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+
+ *empty_line = true;
+
+ return 0;
+}
+
+static int status_all(sd_bus *bus, StatusMode mode) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ bool empty_line = false;
+ int r;
+
+ assert(bus);
+
+ r = status_global(bus, mode, &empty_line);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ r = sd_rtnl_message_new_link(rtnl, &req, RTM_GETLINK, 0);
+ if (r < 0)
+ return rtnl_log_create_error(r);
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return rtnl_log_create_error(r);
+
+ r = sd_netlink_call(rtnl, req, 0, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate links: %m");
+
+ _cleanup_free_ InterfaceInfo *infos = NULL;
+ size_t n_allocated = 0, n_infos = 0;
+
+ for (sd_netlink_message *i = reply; i; i = sd_netlink_message_next(i)) {
+ const char *name;
+ int ifindex;
+ uint16_t type;
+
+ r = sd_netlink_message_get_type(i, &type);
+ if (r < 0)
+ return rtnl_log_parse_error(r);
+
+ if (type != RTM_NEWLINK)
+ continue;
+
+ r = sd_rtnl_message_link_get_ifindex(i, &ifindex);
+ if (r < 0)
+ return rtnl_log_parse_error(r);
+
+ if (ifindex == LOOPBACK_IFINDEX)
+ continue;
+
+ r = sd_netlink_message_read_string(i, IFLA_IFNAME, &name);
+ if (r < 0)
+ return rtnl_log_parse_error(r);
+
+ if (!GREEDY_REALLOC(infos, n_allocated, n_infos + 1))
+ return log_oom();
+
+ infos[n_infos++] = (InterfaceInfo) { ifindex, name };
+ }
+
+ typesafe_qsort(infos, n_infos, interface_info_compare);
+
+ r = 0;
+ for (size_t i = 0; i < n_infos; i++) {
+ int q = status_ifindex(bus, infos[i].index, infos[i].name, mode, &empty_line);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int verb_status(int argc, char **argv, void *userdata) {
+ sd_bus *bus = userdata;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ int r = 0;
+
+ if (argc > 1) {
+ char **ifname;
+ bool empty_line = false;
+
+ STRV_FOREACH(ifname, argv + 1) {
+ int ifindex, q;
+
+ ifindex = resolve_interface(&rtnl, *ifname);
+ if (ifindex < 0) {
+ log_warning_errno(ifindex, "Failed to resolve interface \"%s\", ignoring: %m", *ifname);
+ continue;
+ }
+
+ q = status_ifindex(bus, ifindex, NULL, STATUS_ALL, &empty_line);
+ if (q < 0)
+ r = q;
+ }
+ } else
+ r = status_all(bus, STATUS_ALL);
+
+ return r;
+}
+
+static int call_dns(sd_bus *bus, char **dns, const BusLocator *locator, sd_bus_error *error, bool extended) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL;
+ char **p;
+ int r;
+
+ r = bus_message_new_method_call(bus, &req, locator, extended ? "SetLinkDNSEx" : "SetLinkDNS");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "i", arg_ifindex);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(req, 'a', extended ? "(iayqs)" : "(iay)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* If only argument is the empty string, then call SetLinkDNS() with an
+ * empty list, which will clear the list of domains for an interface. */
+ if (!strv_equal(dns, STRV_MAKE("")))
+ STRV_FOREACH(p, dns) {
+ _cleanup_free_ char *name = NULL;
+ struct in_addr_data data;
+ uint16_t port;
+ int ifindex;
+
+ r = in_addr_port_ifindex_name_from_string_auto(*p, &data.family, &data.address, &port, &ifindex, &name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse DNS server address: %s", *p);
+
+ if (ifindex != 0 && ifindex != arg_ifindex)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid ifindex: %i", ifindex);
+
+ r = sd_bus_message_open_container(req, 'r', extended ? "iayqs" : "iay");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "i", data.family);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(req, 'y', &data.address, FAMILY_ADDRESS_SIZE(data.family));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (extended) {
+ r = sd_bus_message_append(req, "q", port);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "s", name);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(req);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(req);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, req, 0, error, NULL);
+ if (r < 0 && extended && sd_bus_error_has_name(error, SD_BUS_ERROR_UNKNOWN_METHOD)) {
+ sd_bus_error_free(error);
+ return call_dns(bus, dns, locator, error, false);
+ }
+ return r;
+}
+
+static int verb_dns(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2) {
+ r = ifname_mangle(argv[1]);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_ifindex <= 0)
+ return status_all(bus, STATUS_DNS);
+
+ if (argc < 3)
+ return status_ifindex(bus, arg_ifindex, NULL, STATUS_DNS, NULL);
+
+ r = call_dns(bus, argv + 2, bus_resolve_mgr, &error, true);
+ if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) {
+ sd_bus_error_free(&error);
+
+ r = call_dns(bus, argv + 2, bus_network_mgr, &error, true);
+ }
+ if (r < 0) {
+ if (arg_ifindex_permissive &&
+ sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK))
+ return 0;
+
+ return log_error_errno(r, "Failed to set DNS configuration: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int call_domain(sd_bus *bus, char **domain, const BusLocator *locator, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL;
+ char **p;
+ int r;
+
+ r = bus_message_new_method_call(bus, &req, locator, "SetLinkDomains");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "i", arg_ifindex);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(req, 'a', "(sb)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* If only argument is the empty string, then call SetLinkDomains() with an
+ * empty list, which will clear the list of domains for an interface. */
+ if (!strv_equal(domain, STRV_MAKE("")))
+ STRV_FOREACH(p, domain) {
+ const char *n;
+
+ n = **p == '~' ? *p + 1 : *p;
+
+ r = dns_name_is_valid(n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to validate specified domain %s: %m", n);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Domain not valid: %s",
+ n);
+
+ r = sd_bus_message_append(req, "(sb)", n, **p == '~');
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(req);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return sd_bus_call(bus, req, 0, error, NULL);
+}
+
+static int verb_domain(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2) {
+ r = ifname_mangle(argv[1]);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_ifindex <= 0)
+ return status_all(bus, STATUS_DOMAIN);
+
+ if (argc < 3)
+ return status_ifindex(bus, arg_ifindex, NULL, STATUS_DOMAIN, NULL);
+
+ r = call_domain(bus, argv + 2, bus_resolve_mgr, &error);
+ if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) {
+ sd_bus_error_free(&error);
+
+ r = call_domain(bus, argv + 2, bus_network_mgr, &error);
+ }
+ if (r < 0) {
+ if (arg_ifindex_permissive &&
+ sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK))
+ return 0;
+
+ return log_error_errno(r, "Failed to set domain configuration: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int verb_default_route(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r, b;
+
+ assert(bus);
+
+ if (argc >= 2) {
+ r = ifname_mangle(argv[1]);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_ifindex <= 0)
+ return status_all(bus, STATUS_DEFAULT_ROUTE);
+
+ if (argc < 3)
+ return status_ifindex(bus, arg_ifindex, NULL, STATUS_DEFAULT_ROUTE, NULL);
+
+ b = parse_boolean(argv[2]);
+ if (b < 0)
+ return log_error_errno(b, "Failed to parse boolean argument: %s", argv[2]);
+
+ r = bus_call_method(bus, bus_resolve_mgr, "SetLinkDefaultRoute", &error, NULL, "ib", arg_ifindex, b);
+ if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) {
+ sd_bus_error_free(&error);
+
+ r = bus_call_method(bus, bus_network_mgr, "SetLinkDefaultRoute", &error, NULL, "ib", arg_ifindex, b);
+ }
+ if (r < 0) {
+ if (arg_ifindex_permissive &&
+ sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK))
+ return 0;
+
+ return log_error_errno(r, "Failed to set default route configuration: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int verb_llmnr(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2) {
+ r = ifname_mangle(argv[1]);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_ifindex <= 0)
+ return status_all(bus, STATUS_LLMNR);
+
+ if (argc < 3)
+ return status_ifindex(bus, arg_ifindex, NULL, STATUS_LLMNR, NULL);
+
+ r = bus_call_method(bus, bus_resolve_mgr, "SetLinkLLMNR", &error, NULL, "is", arg_ifindex, argv[2]);
+ if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) {
+ sd_bus_error_free(&error);
+
+ r = bus_call_method(bus, bus_network_mgr, "SetLinkLLMNR", &error, NULL, "is", arg_ifindex, argv[2]);
+ }
+ if (r < 0) {
+ if (arg_ifindex_permissive &&
+ sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK))
+ return 0;
+
+ return log_error_errno(r, "Failed to set LLMNR configuration: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int verb_mdns(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2) {
+ r = ifname_mangle(argv[1]);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_ifindex <= 0)
+ return status_all(bus, STATUS_MDNS);
+
+ if (argc < 3)
+ return status_ifindex(bus, arg_ifindex, NULL, STATUS_MDNS, NULL);
+
+ r = bus_call_method(bus, bus_resolve_mgr, "SetLinkMulticastDNS", &error, NULL, "is", arg_ifindex, argv[2]);
+ if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) {
+ sd_bus_error_free(&error);
+
+ r = bus_call_method(
+ bus,
+ bus_network_mgr,
+ "SetLinkMulticastDNS",
+ &error,
+ NULL,
+ "is", arg_ifindex, argv[2]);
+ }
+ if (r < 0) {
+ if (arg_ifindex_permissive &&
+ sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK))
+ return 0;
+
+ return log_error_errno(r, "Failed to set MulticastDNS configuration: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int verb_dns_over_tls(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2) {
+ r = ifname_mangle(argv[1]);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_ifindex <= 0)
+ return status_all(bus, STATUS_PRIVATE);
+
+ if (argc < 3)
+ return status_ifindex(bus, arg_ifindex, NULL, STATUS_PRIVATE, NULL);
+
+ r = bus_call_method(bus, bus_resolve_mgr, "SetLinkDNSOverTLS", &error, NULL, "is", arg_ifindex, argv[2]);
+ if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) {
+ sd_bus_error_free(&error);
+
+ r = bus_call_method(
+ bus,
+ bus_network_mgr,
+ "SetLinkDNSOverTLS",
+ &error,
+ NULL,
+ "is", arg_ifindex, argv[2]);
+ }
+ if (r < 0) {
+ if (arg_ifindex_permissive &&
+ sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK))
+ return 0;
+
+ return log_error_errno(r, "Failed to set DNSOverTLS configuration: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int verb_dnssec(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2) {
+ r = ifname_mangle(argv[1]);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_ifindex <= 0)
+ return status_all(bus, STATUS_DNSSEC);
+
+ if (argc < 3)
+ return status_ifindex(bus, arg_ifindex, NULL, STATUS_DNSSEC, NULL);
+
+ r = bus_call_method(bus, bus_resolve_mgr, "SetLinkDNSSEC", &error, NULL, "is", arg_ifindex, argv[2]);
+ if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) {
+ sd_bus_error_free(&error);
+
+ r = bus_call_method(bus, bus_network_mgr, "SetLinkDNSSEC", &error, NULL, "is", arg_ifindex, argv[2]);
+ }
+ if (r < 0) {
+ if (arg_ifindex_permissive &&
+ sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK))
+ return 0;
+
+ return log_error_errno(r, "Failed to set DNSSEC configuration: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int call_nta(sd_bus *bus, char **nta, const BusLocator *locator, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL;
+ int r;
+
+ r = bus_message_new_method_call(bus, &req, locator, "SetLinkDNSSECNegativeTrustAnchors");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "i", arg_ifindex);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(req, nta);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return sd_bus_call(bus, req, 0, error, NULL);
+}
+
+static int verb_nta(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ char **p;
+ int r;
+ bool clear;
+
+ assert(bus);
+
+ if (argc >= 2) {
+ r = ifname_mangle(argv[1]);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_ifindex <= 0)
+ return status_all(bus, STATUS_NTA);
+
+ if (argc < 3)
+ return status_ifindex(bus, arg_ifindex, NULL, STATUS_NTA, NULL);
+
+ /* If only argument is the empty string, then call SetLinkDNSSECNegativeTrustAnchors()
+ * with an empty list, which will clear the list of domains for an interface. */
+ clear = strv_equal(argv + 2, STRV_MAKE(""));
+
+ if (!clear)
+ STRV_FOREACH(p, argv + 2) {
+ r = dns_name_is_valid(*p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to validate specified domain %s: %m", *p);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Domain not valid: %s",
+ *p);
+ }
+
+ r = call_nta(bus, clear ? NULL : argv + 2, bus_resolve_mgr, &error);
+ if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) {
+ sd_bus_error_free(&error);
+
+ r = call_nta(bus, clear ? NULL : argv + 2, bus_network_mgr, &error);
+ }
+ if (r < 0) {
+ if (arg_ifindex_permissive &&
+ sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK))
+ return 0;
+
+ return log_error_errno(r, "Failed to set DNSSEC NTA configuration: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int verb_revert_link(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ if (argc >= 2) {
+ r = ifname_mangle(argv[1]);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_ifindex <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Interface argument required.");
+
+ r = bus_call_method(bus, bus_resolve_mgr, "RevertLink", &error, NULL, "i", arg_ifindex);
+ if (r < 0 && sd_bus_error_has_name(&error, BUS_ERROR_LINK_BUSY)) {
+ sd_bus_error_free(&error);
+
+ r = bus_call_method(bus, bus_network_mgr, "RevertLinkDNS", &error, NULL, "i", arg_ifindex);
+ }
+ if (r < 0) {
+ if (arg_ifindex_permissive &&
+ sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_LINK))
+ return 0;
+
+ return log_error_errno(r, "Failed to revert interface configuration: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int verb_log_level(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ if (argc == 1) {
+ _cleanup_free_ char *level = NULL;
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/LogControl1",
+ "org.freedesktop.LogControl1",
+ "LogLevel",
+ &error,
+ &level);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get log level: %s", bus_error_message(&error, r));
+
+ puts(level);
+
+ } else {
+ assert(argc == 2);
+
+ r = sd_bus_set_property(
+ bus,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/LogControl1",
+ "org.freedesktop.LogControl1",
+ "LogLevel",
+ &error,
+ "s",
+ argv[1]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set log level: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static void help_protocol_types(void) {
+ if (arg_legend)
+ puts("Known protocol types:");
+ puts("dns\nllmnr\nllmnr-ipv4\nllmnr-ipv6\nmdns\nmdns-ipv4\nmdns-ipv6");
+}
+
+static void help_dns_types(void) {
+ if (arg_legend)
+ puts("Known DNS RR types:");
+
+ DUMP_STRING_TABLE(dns_type, int, _DNS_TYPE_MAX);
+}
+
+static void help_dns_classes(void) {
+ if (arg_legend)
+ puts("Known DNS RR classes:");
+
+ DUMP_STRING_TABLE(dns_class, int, _DNS_CLASS_MAX);
+}
+
+static int compat_help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("resolvectl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%1$s [OPTIONS...] HOSTNAME|ADDRESS...\n"
+ "%1$s [OPTIONS...] --service [[NAME] TYPE] DOMAIN\n"
+ "%1$s [OPTIONS...] --openpgp EMAIL@DOMAIN...\n"
+ "%1$s [OPTIONS...] --statistics\n"
+ "%1$s [OPTIONS...] --reset-statistics\n"
+ "\n"
+ "%2$sResolve domain names, IPv4 and IPv6 addresses, DNS records, and services.%3$s\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " -4 Resolve IPv4 addresses\n"
+ " -6 Resolve IPv6 addresses\n"
+ " -i --interface=INTERFACE Look on interface\n"
+ " -p --protocol=PROTO|help Look via protocol\n"
+ " -t --type=TYPE|help Query RR with DNS type\n"
+ " -c --class=CLASS|help Query RR with DNS class\n"
+ " --service Resolve service (SRV)\n"
+ " --service-address=BOOL Resolve address for services (default: yes)\n"
+ " --service-txt=BOOL Resolve TXT records for services (default: yes)\n"
+ " --openpgp Query OpenPGP public key\n"
+ " --tlsa Query TLS public key\n"
+ " --cname=BOOL Follow CNAME redirects (default: yes)\n"
+ " --search=BOOL Use search domains for single-label names\n"
+ " (default: yes)\n"
+ " --raw[=payload|packet] Dump the answer as binary data\n"
+ " --legend=BOOL Print headers and additional info (default: yes)\n"
+ " --statistics Show resolver statistics\n"
+ " --reset-statistics Reset resolver statistics\n"
+ " --status Show link and server status\n"
+ " --flush-caches Flush all local DNS caches\n"
+ " --reset-server-features\n"
+ " Forget learnt DNS server feature levels\n"
+ " --set-dns=SERVER Set per-interface DNS server address\n"
+ " --set-domain=DOMAIN Set per-interface search domain\n"
+ " --set-llmnr=MODE Set per-interface LLMNR mode\n"
+ " --set-mdns=MODE Set per-interface MulticastDNS mode\n"
+ " --set-dnsovertls=MODE Set per-interface DNS-over-TLS mode\n"
+ " --set-dnssec=MODE Set per-interface DNSSEC mode\n"
+ " --set-nta=DOMAIN Set per-interface DNSSEC NTA\n"
+ " --revert Revert per-interface configuration\n"
+ "\nSee the %4$s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight()
+ , ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int native_help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("resolvectl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND ...\n"
+ "\n"
+ "%sSend control commands to the network name resolution manager, or%s\n"
+ "%sresolve domain names, IPv4 and IPv6 addresses, DNS records, and services.%s\n"
+ "\nCommands:\n"
+ " query HOSTNAME|ADDRESS... Resolve domain names, IPv4 and IPv6 addresses\n"
+ " service [[NAME] TYPE] DOMAIN Resolve service (SRV)\n"
+ " openpgp EMAIL@DOMAIN... Query OpenPGP public key\n"
+ " tlsa DOMAIN[:PORT]... Query TLS public key\n"
+ " status [LINK...] Show link and server status\n"
+ " statistics Show resolver statistics\n"
+ " reset-statistics Reset resolver statistics\n"
+ " flush-caches Flush all local DNS caches\n"
+ " reset-server-features Forget learnt DNS server feature levels\n"
+ " dns [LINK [SERVER...]] Get/set per-interface DNS server address\n"
+ " domain [LINK [DOMAIN...]] Get/set per-interface search domain\n"
+ " default-route [LINK [BOOL]] Get/set per-interface default route flag\n"
+ " llmnr [LINK [MODE]] Get/set per-interface LLMNR mode\n"
+ " mdns [LINK [MODE]] Get/set per-interface MulticastDNS mode\n"
+ " dnsovertls [LINK [MODE]] Get/set per-interface DNS-over-TLS mode\n"
+ " dnssec [LINK [MODE]] Get/set per-interface DNSSEC mode\n"
+ " nta [LINK [DOMAIN...]] Get/set per-interface DNSSEC NTA\n"
+ " revert LINK Revert per-interface configuration\n"
+ " log-level [LEVEL] Get/set logging threshold for systemd-resolved\n"
+ "\nOptions:\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " -4 Resolve IPv4 addresses\n"
+ " -6 Resolve IPv6 addresses\n"
+ " -i --interface=INTERFACE Look on interface\n"
+ " -p --protocol=PROTO|help Look via protocol\n"
+ " -t --type=TYPE|help Query RR with DNS type\n"
+ " -c --class=CLASS|help Query RR with DNS class\n"
+ " --service-address=BOOL Resolve address for services (default: yes)\n"
+ " --service-txt=BOOL Resolve TXT records for services (default: yes)\n"
+ " --cname=BOOL Follow CNAME redirects (default: yes)\n"
+ " --search=BOOL Use search domains for single-label names\n"
+ " (default: yes)\n"
+ " --raw[=payload|packet] Dump the answer as binary data\n"
+ " --legend=BOOL Print headers and additional info (default: yes)\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight()
+ , ansi_normal()
+ , ansi_highlight()
+ , ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int verb_help(int argc, char **argv, void *userdata) {
+ return native_help();
+}
+
+static int compat_parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_LEGEND,
+ ARG_SERVICE,
+ ARG_CNAME,
+ ARG_SERVICE_ADDRESS,
+ ARG_SERVICE_TXT,
+ ARG_OPENPGP,
+ ARG_TLSA,
+ ARG_RAW,
+ ARG_SEARCH,
+ ARG_STATISTICS,
+ ARG_RESET_STATISTICS,
+ ARG_STATUS,
+ ARG_FLUSH_CACHES,
+ ARG_RESET_SERVER_FEATURES,
+ ARG_NO_PAGER,
+ ARG_SET_DNS,
+ ARG_SET_DOMAIN,
+ ARG_SET_LLMNR,
+ ARG_SET_MDNS,
+ ARG_SET_PRIVATE,
+ ARG_SET_DNSSEC,
+ ARG_SET_NTA,
+ ARG_REVERT_LINK,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "type", required_argument, NULL, 't' },
+ { "class", required_argument, NULL, 'c' },
+ { "legend", required_argument, NULL, ARG_LEGEND },
+ { "interface", required_argument, NULL, 'i' },
+ { "protocol", required_argument, NULL, 'p' },
+ { "cname", required_argument, NULL, ARG_CNAME },
+ { "service", no_argument, NULL, ARG_SERVICE },
+ { "service-address", required_argument, NULL, ARG_SERVICE_ADDRESS },
+ { "service-txt", required_argument, NULL, ARG_SERVICE_TXT },
+ { "openpgp", no_argument, NULL, ARG_OPENPGP },
+ { "tlsa", optional_argument, NULL, ARG_TLSA },
+ { "raw", optional_argument, NULL, ARG_RAW },
+ { "search", required_argument, NULL, ARG_SEARCH },
+ { "statistics", no_argument, NULL, ARG_STATISTICS, },
+ { "reset-statistics", no_argument, NULL, ARG_RESET_STATISTICS },
+ { "status", no_argument, NULL, ARG_STATUS },
+ { "flush-caches", no_argument, NULL, ARG_FLUSH_CACHES },
+ { "reset-server-features", no_argument, NULL, ARG_RESET_SERVER_FEATURES },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "set-dns", required_argument, NULL, ARG_SET_DNS },
+ { "set-domain", required_argument, NULL, ARG_SET_DOMAIN },
+ { "set-llmnr", required_argument, NULL, ARG_SET_LLMNR },
+ { "set-mdns", required_argument, NULL, ARG_SET_MDNS },
+ { "set-dnsovertls", required_argument, NULL, ARG_SET_PRIVATE },
+ { "set-dnssec", required_argument, NULL, ARG_SET_DNSSEC },
+ { "set-nta", required_argument, NULL, ARG_SET_NTA },
+ { "revert", no_argument, NULL, ARG_REVERT_LINK },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h46i:t:c:p:", options, NULL)) >= 0)
+ switch(c) {
+
+ case 'h':
+ return compat_help();
+
+ case ARG_VERSION:
+ return version();
+
+ case '4':
+ arg_family = AF_INET;
+ break;
+
+ case '6':
+ arg_family = AF_INET6;
+ break;
+
+ case 'i':
+ r = ifname_mangle(optarg);
+ if (r < 0)
+ return r;
+ break;
+
+ case 't':
+ if (streq(optarg, "help")) {
+ help_dns_types();
+ return 0;
+ }
+
+ r = dns_type_from_string(optarg);
+ if (r < 0) {
+ log_error("Failed to parse RR record type %s", optarg);
+ return r;
+ }
+ arg_type = (uint16_t) r;
+ assert((int) arg_type == r);
+
+ arg_mode = MODE_RESOLVE_RECORD;
+ break;
+
+ case 'c':
+ if (streq(optarg, "help")) {
+ help_dns_classes();
+ return 0;
+ }
+
+ r = dns_class_from_string(optarg);
+ if (r < 0) {
+ log_error("Failed to parse RR record class %s", optarg);
+ return r;
+ }
+ arg_class = (uint16_t) r;
+ assert((int) arg_class == r);
+
+ break;
+
+ case ARG_LEGEND:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --legend= argument");
+
+ arg_legend = r;
+ break;
+
+ case 'p':
+ if (streq(optarg, "help")) {
+ help_protocol_types();
+ return 0;
+ } else if (streq(optarg, "dns"))
+ arg_flags |= SD_RESOLVED_DNS;
+ else if (streq(optarg, "llmnr"))
+ arg_flags |= SD_RESOLVED_LLMNR;
+ else if (streq(optarg, "llmnr-ipv4"))
+ arg_flags |= SD_RESOLVED_LLMNR_IPV4;
+ else if (streq(optarg, "llmnr-ipv6"))
+ arg_flags |= SD_RESOLVED_LLMNR_IPV6;
+ else if (streq(optarg, "mdns"))
+ arg_flags |= SD_RESOLVED_MDNS;
+ else if (streq(optarg, "mdns-ipv4"))
+ arg_flags |= SD_RESOLVED_MDNS_IPV4;
+ else if (streq(optarg, "mdns-ipv6"))
+ arg_flags |= SD_RESOLVED_MDNS_IPV6;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown protocol specifier: %s", optarg);
+
+ break;
+
+ case ARG_SERVICE:
+ arg_mode = MODE_RESOLVE_SERVICE;
+ break;
+
+ case ARG_OPENPGP:
+ arg_mode = MODE_RESOLVE_OPENPGP;
+ break;
+
+ case ARG_TLSA:
+ arg_mode = MODE_RESOLVE_TLSA;
+ if (!optarg || service_family_is_valid(optarg))
+ arg_service_family = optarg;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown service family \"%s\".", optarg);
+ break;
+
+ case ARG_RAW:
+ if (on_tty())
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTTY),
+ "Refusing to write binary data to tty.");
+
+ if (optarg == NULL || streq(optarg, "payload"))
+ arg_raw = RAW_PAYLOAD;
+ else if (streq(optarg, "packet"))
+ arg_raw = RAW_PACKET;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown --raw specifier \"%s\".",
+ optarg);
+
+ arg_legend = false;
+ break;
+
+ case ARG_CNAME:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --cname= argument.");
+ SET_FLAG(arg_flags, SD_RESOLVED_NO_CNAME, r == 0);
+ break;
+
+ case ARG_SERVICE_ADDRESS:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --service-address= argument.");
+ SET_FLAG(arg_flags, SD_RESOLVED_NO_ADDRESS, r == 0);
+ break;
+
+ case ARG_SERVICE_TXT:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --service-txt= argument.");
+ SET_FLAG(arg_flags, SD_RESOLVED_NO_TXT, r == 0);
+ break;
+
+ case ARG_SEARCH:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --search argument.");
+ SET_FLAG(arg_flags, SD_RESOLVED_NO_SEARCH, r == 0);
+ break;
+
+ case ARG_STATISTICS:
+ arg_mode = MODE_STATISTICS;
+ break;
+
+ case ARG_RESET_STATISTICS:
+ arg_mode = MODE_RESET_STATISTICS;
+ break;
+
+ case ARG_FLUSH_CACHES:
+ arg_mode = MODE_FLUSH_CACHES;
+ break;
+
+ case ARG_RESET_SERVER_FEATURES:
+ arg_mode = MODE_RESET_SERVER_FEATURES;
+ break;
+
+ case ARG_STATUS:
+ arg_mode = MODE_STATUS;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_SET_DNS:
+ r = strv_extend(&arg_set_dns, optarg);
+ if (r < 0)
+ return log_oom();
+
+ arg_mode = MODE_SET_LINK;
+ break;
+
+ case ARG_SET_DOMAIN:
+ r = strv_extend(&arg_set_domain, optarg);
+ if (r < 0)
+ return log_oom();
+
+ arg_mode = MODE_SET_LINK;
+ break;
+
+ case ARG_SET_LLMNR:
+ arg_set_llmnr = optarg;
+ arg_mode = MODE_SET_LINK;
+ break;
+
+ case ARG_SET_MDNS:
+ arg_set_mdns = optarg;
+ arg_mode = MODE_SET_LINK;
+ break;
+
+ case ARG_SET_PRIVATE:
+ arg_set_dns_over_tls = optarg;
+ arg_mode = MODE_SET_LINK;
+ break;
+
+ case ARG_SET_DNSSEC:
+ arg_set_dnssec = optarg;
+ arg_mode = MODE_SET_LINK;
+ break;
+
+ case ARG_SET_NTA:
+ r = strv_extend(&arg_set_nta, optarg);
+ if (r < 0)
+ return log_oom();
+
+ arg_mode = MODE_SET_LINK;
+ break;
+
+ case ARG_REVERT_LINK:
+ arg_mode = MODE_REVERT_LINK;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_type == 0 && arg_class != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--class= may only be used in conjunction with --type=.");
+
+ if (arg_type != 0 && arg_mode == MODE_RESOLVE_SERVICE)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--service and --type= may not be combined.");
+
+ if (arg_type != 0 && arg_class == 0)
+ arg_class = DNS_CLASS_IN;
+
+ if (arg_class != 0 && arg_type == 0)
+ arg_type = DNS_TYPE_A;
+
+ if (IN_SET(arg_mode, MODE_SET_LINK, MODE_REVERT_LINK)) {
+
+ if (arg_ifindex <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--set-dns=, --set-domain=, --set-llmnr=, --set-mdns=, --set-dnsovertls=, --set-dnssec=, --set-nta= and --revert require --interface=.");
+ }
+
+ return 1 /* work to do */;
+}
+
+static int native_parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_LEGEND,
+ ARG_CNAME,
+ ARG_SERVICE_ADDRESS,
+ ARG_SERVICE_TXT,
+ ARG_RAW,
+ ARG_SEARCH,
+ ARG_NO_PAGER,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "type", required_argument, NULL, 't' },
+ { "class", required_argument, NULL, 'c' },
+ { "legend", required_argument, NULL, ARG_LEGEND },
+ { "interface", required_argument, NULL, 'i' },
+ { "protocol", required_argument, NULL, 'p' },
+ { "cname", required_argument, NULL, ARG_CNAME },
+ { "service-address", required_argument, NULL, ARG_SERVICE_ADDRESS },
+ { "service-txt", required_argument, NULL, ARG_SERVICE_TXT },
+ { "raw", optional_argument, NULL, ARG_RAW },
+ { "search", required_argument, NULL, ARG_SEARCH },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h46i:t:c:p:", options, NULL)) >= 0)
+ switch(c) {
+
+ case 'h':
+ return native_help();
+
+ case ARG_VERSION:
+ return version();
+
+ case '4':
+ arg_family = AF_INET;
+ break;
+
+ case '6':
+ arg_family = AF_INET6;
+ break;
+
+ case 'i':
+ r = ifname_mangle(optarg);
+ if (r < 0)
+ return r;
+ break;
+
+ case 't':
+ if (streq(optarg, "help")) {
+ help_dns_types();
+ return 0;
+ }
+
+ r = dns_type_from_string(optarg);
+ if (r < 0) {
+ log_error("Failed to parse RR record type %s", optarg);
+ return r;
+ }
+ arg_type = (uint16_t) r;
+ assert((int) arg_type == r);
+
+ break;
+
+ case 'c':
+ if (streq(optarg, "help")) {
+ help_dns_classes();
+ return 0;
+ }
+
+ r = dns_class_from_string(optarg);
+ if (r < 0) {
+ log_error("Failed to parse RR record class %s", optarg);
+ return r;
+ }
+ arg_class = (uint16_t) r;
+ assert((int) arg_class == r);
+
+ break;
+
+ case ARG_LEGEND:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --legend= argument");
+
+ arg_legend = r;
+ break;
+
+ case 'p':
+ if (streq(optarg, "help")) {
+ help_protocol_types();
+ return 0;
+ } else if (streq(optarg, "dns"))
+ arg_flags |= SD_RESOLVED_DNS;
+ else if (streq(optarg, "llmnr"))
+ arg_flags |= SD_RESOLVED_LLMNR;
+ else if (streq(optarg, "llmnr-ipv4"))
+ arg_flags |= SD_RESOLVED_LLMNR_IPV4;
+ else if (streq(optarg, "llmnr-ipv6"))
+ arg_flags |= SD_RESOLVED_LLMNR_IPV6;
+ else if (streq(optarg, "mdns"))
+ arg_flags |= SD_RESOLVED_MDNS;
+ else if (streq(optarg, "mdns-ipv4"))
+ arg_flags |= SD_RESOLVED_MDNS_IPV4;
+ else if (streq(optarg, "mdns-ipv6"))
+ arg_flags |= SD_RESOLVED_MDNS_IPV6;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown protocol specifier: %s",
+ optarg);
+
+ break;
+
+ case ARG_RAW:
+ if (on_tty())
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTTY),
+ "Refusing to write binary data to tty.");
+
+ if (optarg == NULL || streq(optarg, "payload"))
+ arg_raw = RAW_PAYLOAD;
+ else if (streq(optarg, "packet"))
+ arg_raw = RAW_PACKET;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown --raw specifier \"%s\".",
+ optarg);
+
+ arg_legend = false;
+ break;
+
+ case ARG_CNAME:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --cname= argument.");
+ SET_FLAG(arg_flags, SD_RESOLVED_NO_CNAME, r == 0);
+ break;
+
+ case ARG_SERVICE_ADDRESS:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --service-address= argument.");
+ SET_FLAG(arg_flags, SD_RESOLVED_NO_ADDRESS, r == 0);
+ break;
+
+ case ARG_SERVICE_TXT:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --service-txt= argument.");
+ SET_FLAG(arg_flags, SD_RESOLVED_NO_TXT, r == 0);
+ break;
+
+ case ARG_SEARCH:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --search argument.");
+ SET_FLAG(arg_flags, SD_RESOLVED_NO_SEARCH, r == 0);
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_type == 0 && arg_class != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--class= may only be used in conjunction with --type=.");
+
+ if (arg_type != 0 && arg_class == 0)
+ arg_class = DNS_CLASS_IN;
+
+ if (arg_class != 0 && arg_type == 0)
+ arg_type = DNS_TYPE_A;
+
+ return 1 /* work to do */;
+}
+
+static int native_main(int argc, char *argv[], sd_bus *bus) {
+
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, verb_help },
+ { "status", VERB_ANY, VERB_ANY, VERB_DEFAULT, verb_status },
+ { "query", 2, VERB_ANY, 0, verb_query },
+ { "service", 2, 4, 0, verb_service },
+ { "openpgp", 2, VERB_ANY, 0, verb_openpgp },
+ { "tlsa", 2, VERB_ANY, 0, verb_tlsa },
+ { "statistics", VERB_ANY, 1, 0, show_statistics },
+ { "reset-statistics", VERB_ANY, 1, 0, reset_statistics },
+ { "flush-caches", VERB_ANY, 1, 0, flush_caches },
+ { "reset-server-features", VERB_ANY, 1, 0, reset_server_features },
+ { "dns", VERB_ANY, VERB_ANY, 0, verb_dns },
+ { "domain", VERB_ANY, VERB_ANY, 0, verb_domain },
+ { "default-route", VERB_ANY, 3, 0, verb_default_route },
+ { "llmnr", VERB_ANY, 3, 0, verb_llmnr },
+ { "mdns", VERB_ANY, 3, 0, verb_mdns },
+ { "dnsovertls", VERB_ANY, 3, 0, verb_dns_over_tls },
+ { "dnssec", VERB_ANY, 3, 0, verb_dnssec },
+ { "nta", VERB_ANY, VERB_ANY, 0, verb_nta },
+ { "revert", VERB_ANY, 2, 0, verb_revert_link },
+ { "log-level", VERB_ANY, 2, 0, verb_log_level },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, bus);
+}
+
+static int translate(const char *verb, const char *single_arg, size_t num_args, char **args, sd_bus *bus) {
+ char **fake, **p;
+ size_t num;
+
+ assert(verb);
+ assert(num_args == 0 || args);
+
+ num = !!single_arg + num_args + 1;
+
+ p = fake = newa0(char *, num + 1);
+ *p++ = (char *) verb;
+ if (single_arg)
+ *p++ = (char *) single_arg;
+ for (size_t i = 0; i < num_args; i++)
+ *p++ = args[i];
+
+ optind = 0;
+ return native_main((int) num, fake, bus);
+}
+
+static int compat_main(int argc, char *argv[], sd_bus *bus) {
+ int r = 0;
+
+ switch (arg_mode) {
+ case MODE_RESOLVE_HOST:
+ case MODE_RESOLVE_RECORD:
+ return translate("query", NULL, argc - optind, argv + optind, bus);
+
+ case MODE_RESOLVE_SERVICE:
+ return translate("service", NULL, argc - optind, argv + optind, bus);
+
+ case MODE_RESOLVE_OPENPGP:
+ return translate("openpgp", NULL, argc - optind, argv + optind, bus);
+
+ case MODE_RESOLVE_TLSA:
+ return translate("tlsa", arg_service_family, argc - optind, argv + optind, bus);
+
+ case MODE_STATISTICS:
+ return translate("statistics", NULL, 0, NULL, bus);
+
+ case MODE_RESET_STATISTICS:
+ return translate("reset-statistics", NULL, 0, NULL, bus);
+
+ case MODE_FLUSH_CACHES:
+ return translate("flush-caches", NULL, 0, NULL, bus);
+
+ case MODE_RESET_SERVER_FEATURES:
+ return translate("reset-server-features", NULL, 0, NULL, bus);
+
+ case MODE_STATUS:
+ return translate("status", NULL, argc - optind, argv + optind, bus);
+
+ case MODE_SET_LINK:
+ assert(arg_ifname);
+
+ if (arg_set_dns) {
+ r = translate("dns", arg_ifname, strv_length(arg_set_dns), arg_set_dns, bus);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_set_domain) {
+ r = translate("domain", arg_ifname, strv_length(arg_set_domain), arg_set_domain, bus);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_set_nta) {
+ r = translate("nta", arg_ifname, strv_length(arg_set_nta), arg_set_nta, bus);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_set_llmnr) {
+ r = translate("llmnr", arg_ifname, 1, (char **) &arg_set_llmnr, bus);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_set_mdns) {
+ r = translate("mdns", arg_ifname, 1, (char **) &arg_set_mdns, bus);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_set_dns_over_tls) {
+ r = translate("dnsovertls", arg_ifname, 1, (char **) &arg_set_dns_over_tls, bus);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_set_dnssec) {
+ r = translate("dnssec", arg_ifname, 1, (char **) &arg_set_dnssec, bus);
+ if (r < 0)
+ return r;
+ }
+
+ return r;
+
+ case MODE_REVERT_LINK:
+ assert(arg_ifname);
+
+ return translate("revert", arg_ifname, 0, NULL, bus);
+
+ case _MODE_INVALID:
+ assert_not_reached("invalid mode");
+ }
+
+ return 0;
+}
+
+static int run(int argc, char **argv) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_setup_cli();
+
+ if (streq(program_invocation_short_name, "resolvconf"))
+ r = resolvconf_parse_argv(argc, argv);
+ else if (streq(program_invocation_short_name, "systemd-resolve"))
+ r = compat_parse_argv(argc, argv);
+ else
+ r = native_parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "sd_bus_open_system: %m");
+
+ if (STR_IN_SET(program_invocation_short_name, "systemd-resolve", "resolvconf"))
+ return compat_main(argc, argv, bus);
+
+ return native_main(argc, argv, bus);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/resolve/resolvectl.h b/src/resolve/resolvectl.h
new file mode 100644
index 0000000..830c81d
--- /dev/null
+++ b/src/resolve/resolvectl.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <in-addr-util.h>
+#include <stdbool.h>
+#include <sys/types.h>
+
+typedef enum ExecutionMode {
+ MODE_RESOLVE_HOST,
+ MODE_RESOLVE_RECORD,
+ MODE_RESOLVE_SERVICE,
+ MODE_RESOLVE_OPENPGP,
+ MODE_RESOLVE_TLSA,
+ MODE_STATISTICS,
+ MODE_RESET_STATISTICS,
+ MODE_FLUSH_CACHES,
+ MODE_RESET_SERVER_FEATURES,
+ MODE_STATUS,
+ MODE_SET_LINK,
+ MODE_REVERT_LINK,
+ _MODE_INVALID = -1,
+} ExecutionMode;
+
+extern ExecutionMode arg_mode;
+extern char **arg_set_dns;
+extern char **arg_set_domain;
+extern bool arg_ifindex_permissive;
+
+int ifname_mangle(const char *s);
diff --git a/src/resolve/resolved-bus.c b/src/resolve/resolved-bus.c
new file mode 100644
index 0000000..dca9b88
--- /dev/null
+++ b/src/resolve/resolved-bus.c
@@ -0,0 +1,2236 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-get-properties.h"
+#include "bus-log-control-api.h"
+#include "bus-message-util.h"
+#include "bus-polkit.h"
+#include "dns-domain.h"
+#include "memory-util.h"
+#include "missing_capability.h"
+#include "resolved-bus.h"
+#include "resolved-def.h"
+#include "resolved-dns-synthesize.h"
+#include "resolved-dnssd-bus.h"
+#include "resolved-dnssd.h"
+#include "resolved-link-bus.h"
+#include "resolved-resolv-conf.h"
+#include "socket-netlink.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "syslog-util.h"
+#include "user-util.h"
+#include "utf8.h"
+
+BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_resolve_support, resolve_support, ResolveSupport);
+
+static int query_on_bus_track(sd_bus_track *t, void *userdata) {
+ DnsQuery *q = userdata;
+
+ assert(t);
+ assert(q);
+
+ if (!DNS_TRANSACTION_IS_LIVE(q->state))
+ return 0;
+
+ log_debug("Client of active query vanished, aborting query.");
+ dns_query_complete(q, DNS_TRANSACTION_ABORTED);
+ return 0;
+}
+
+static int dns_query_bus_track(DnsQuery *q, sd_bus_message *m) {
+ int r;
+
+ assert(q);
+ assert(m);
+
+ if (!q->bus_track) {
+ r = sd_bus_track_new(sd_bus_message_get_bus(m), &q->bus_track, query_on_bus_track, q);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_track_add_sender(q->bus_track, m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int reply_query_state(DnsQuery *q) {
+
+ assert(q);
+ assert(q->bus_request);
+
+ switch (q->state) {
+
+ case DNS_TRANSACTION_NO_SERVERS:
+ return sd_bus_reply_method_errorf(q->bus_request, BUS_ERROR_NO_NAME_SERVERS, "No appropriate name servers or networks for name found");
+
+ case DNS_TRANSACTION_TIMEOUT:
+ return sd_bus_reply_method_errorf(q->bus_request, SD_BUS_ERROR_TIMEOUT, "Query timed out");
+
+ case DNS_TRANSACTION_ATTEMPTS_MAX_REACHED:
+ return sd_bus_reply_method_errorf(q->bus_request, SD_BUS_ERROR_TIMEOUT, "All attempts to contact name servers or networks failed");
+
+ case DNS_TRANSACTION_INVALID_REPLY:
+ return sd_bus_reply_method_errorf(q->bus_request, BUS_ERROR_INVALID_REPLY, "Received invalid reply");
+
+ case DNS_TRANSACTION_ERRNO:
+ return sd_bus_reply_method_errnof(q->bus_request, q->answer_errno, "Lookup failed due to system error: %m");
+
+ case DNS_TRANSACTION_ABORTED:
+ return sd_bus_reply_method_errorf(q->bus_request, BUS_ERROR_ABORTED, "Query aborted");
+
+ case DNS_TRANSACTION_DNSSEC_FAILED:
+ return sd_bus_reply_method_errorf(q->bus_request, BUS_ERROR_DNSSEC_FAILED, "DNSSEC validation failed: %s",
+ dnssec_result_to_string(q->answer_dnssec_result));
+
+ case DNS_TRANSACTION_NO_TRUST_ANCHOR:
+ return sd_bus_reply_method_errorf(q->bus_request, BUS_ERROR_NO_TRUST_ANCHOR, "No suitable trust anchor known");
+
+ case DNS_TRANSACTION_RR_TYPE_UNSUPPORTED:
+ return sd_bus_reply_method_errorf(q->bus_request, BUS_ERROR_RR_TYPE_UNSUPPORTED, "Server does not support requested resource record type");
+
+ case DNS_TRANSACTION_NETWORK_DOWN:
+ return sd_bus_reply_method_errorf(q->bus_request, BUS_ERROR_NETWORK_DOWN, "Network is down");
+
+ case DNS_TRANSACTION_NOT_FOUND:
+ /* We return this as NXDOMAIN. This is only generated when a host doesn't implement LLMNR/TCP, and we
+ * thus quickly know that we cannot resolve an in-addr.arpa or ip6.arpa address. */
+ return sd_bus_reply_method_errorf(q->bus_request, _BUS_ERROR_DNS "NXDOMAIN", "'%s' not found", dns_query_string(q));
+
+ case DNS_TRANSACTION_RCODE_FAILURE: {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ if (q->answer_rcode == DNS_RCODE_NXDOMAIN)
+ sd_bus_error_setf(&error, _BUS_ERROR_DNS "NXDOMAIN", "'%s' not found", dns_query_string(q));
+ else {
+ const char *rc, *n;
+ char p[DECIMAL_STR_MAX(q->answer_rcode)];
+
+ rc = dns_rcode_to_string(q->answer_rcode);
+ if (!rc) {
+ xsprintf(p, "%i", q->answer_rcode);
+ rc = p;
+ }
+
+ n = strjoina(_BUS_ERROR_DNS, rc);
+ sd_bus_error_setf(&error, n, "Could not resolve '%s', server or network returned error %s", dns_query_string(q), rc);
+ }
+
+ return sd_bus_reply_method_error(q->bus_request, &error);
+ }
+
+ case DNS_TRANSACTION_NULL:
+ case DNS_TRANSACTION_PENDING:
+ case DNS_TRANSACTION_VALIDATING:
+ case DNS_TRANSACTION_SUCCESS:
+ default:
+ assert_not_reached("Impossible state");
+ }
+}
+
+static int append_address(sd_bus_message *reply, DnsResourceRecord *rr, int ifindex) {
+ int r;
+
+ assert(reply);
+ assert(rr);
+
+ r = sd_bus_message_open_container(reply, 'r', "iiay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "i", ifindex);
+ if (r < 0)
+ return r;
+
+ if (rr->key->type == DNS_TYPE_A) {
+ r = sd_bus_message_append(reply, "i", AF_INET);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', &rr->a.in_addr, sizeof(struct in_addr));
+
+ } else if (rr->key->type == DNS_TYPE_AAAA) {
+ r = sd_bus_message_append(reply, "i", AF_INET6);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', &rr->aaaa.in6_addr, sizeof(struct in6_addr));
+ } else
+ return -EAFNOSUPPORT;
+
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static void bus_method_resolve_hostname_complete(DnsQuery *q) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *canonical = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *normalized = NULL;
+ DnsQuestion *question;
+ DnsResourceRecord *rr;
+ unsigned added = 0;
+ int ifindex, r;
+
+ assert(q);
+
+ if (q->state != DNS_TRANSACTION_SUCCESS) {
+ r = reply_query_state(q);
+ goto finish;
+ }
+
+ r = dns_query_process_cname(q);
+ if (r == -ELOOP) {
+ r = sd_bus_reply_method_errorf(q->bus_request, BUS_ERROR_CNAME_LOOP, "CNAME loop detected, or CNAME resolving disabled on '%s'", dns_query_string(q));
+ goto finish;
+ }
+ if (r < 0)
+ goto finish;
+ if (r == DNS_QUERY_RESTARTED) /* This was a cname, and the query was restarted. */
+ return;
+
+ r = sd_bus_message_new_method_return(q->bus_request, &reply);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_message_open_container(reply, 'a', "(iiay)");
+ if (r < 0)
+ goto finish;
+
+ question = dns_query_question_for_protocol(q, q->answer_protocol);
+
+ DNS_ANSWER_FOREACH_IFINDEX(rr, ifindex, q->answer) {
+
+ r = dns_question_matches_rr(question, rr, DNS_SEARCH_DOMAIN_NAME(q->answer_search_domain));
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ continue;
+
+ r = append_address(reply, rr, ifindex);
+ if (r < 0)
+ goto finish;
+
+ if (!canonical)
+ canonical = dns_resource_record_ref(rr);
+
+ added++;
+ }
+
+ if (added <= 0) {
+ r = sd_bus_reply_method_errorf(q->bus_request, BUS_ERROR_NO_SUCH_RR, "'%s' does not have any RR of the requested type", dns_query_string(q));
+ goto finish;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ goto finish;
+
+ /* The key names are not necessarily normalized, make sure that they are when we return them to our
+ * bus clients. */
+ assert(canonical);
+ r = dns_name_normalize(dns_resource_key_name(canonical->key), 0, &normalized);
+ if (r < 0)
+ goto finish;
+
+ /* Return the precise spelling and uppercasing and CNAME target reported by the server */
+ r = sd_bus_message_append(
+ reply, "st",
+ normalized,
+ SD_RESOLVED_FLAGS_MAKE(q->answer_protocol, q->answer_family, dns_query_fully_authenticated(q)));
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_send(q->manager->bus, reply, NULL);
+
+finish:
+ if (r < 0) {
+ log_error_errno(r, "Failed to send hostname reply: %m");
+ sd_bus_reply_method_errno(q->bus_request, r, NULL);
+ }
+
+ dns_query_free(q);
+}
+
+static int validate_and_mangle_flags(
+ const char *name,
+ uint64_t *flags,
+ uint64_t ok,
+ sd_bus_error *error) {
+
+ assert(flags);
+
+ /* Checks that the client supplied interface index and flags parameter actually are valid and make
+ * sense in our method call context. Specifically:
+ *
+ * 1. Checks that the interface index is either 0 (meaning *all* interfaces) or positive
+ *
+ * 2. Only the protocols flags and the NO_CNAME flag are set, at most. Plus additional flags specific
+ * to our method, passed in the "ok" parameter.
+ *
+ * 3. If zero protocol flags are specified it is automatically turned into *all* protocols. This way
+ * clients can simply pass 0 as flags and all will work as it should. They can also use this so
+ * that clients don't have to know all the protocols resolved implements, but can just specify 0
+ * to mean "all supported protocols".
+ */
+
+ if (*flags & ~(SD_RESOLVED_PROTOCOLS_ALL|SD_RESOLVED_NO_CNAME|ok))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid flags parameter");
+
+ if ((*flags & SD_RESOLVED_PROTOCOLS_ALL) == 0) /* If no protocol is enabled, enable all */
+ *flags |= SD_RESOLVED_PROTOCOLS_ALL;
+
+ /* Imply SD_RESOLVED_NO_SEARCH if permitted and name is dot suffixed. */
+ if (name && FLAGS_SET(ok, SD_RESOLVED_NO_SEARCH) && dns_name_dot_suffixed(name) > 0)
+ *flags |= SD_RESOLVED_NO_SEARCH;
+
+ return 0;
+}
+
+static int parse_as_address(sd_bus_message *m, int ifindex, const char *hostname, int family, uint64_t flags) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *canonical = NULL;
+ union in_addr_union parsed;
+ int r, ff, parsed_ifindex = 0;
+
+ /* Check if the hostname is actually already an IP address formatted as string. In that case just parse it,
+ * let's not attempt to look it up. */
+
+ r = in_addr_ifindex_from_string_auto(hostname, &ff, &parsed, &parsed_ifindex);
+ if (r < 0) /* not an address */
+ return 0;
+
+ if (family != AF_UNSPEC && ff != family)
+ return sd_bus_reply_method_errorf(m, BUS_ERROR_NO_SUCH_RR, "The specified address is not of the requested family.");
+ if (ifindex > 0 && parsed_ifindex > 0 && parsed_ifindex != ifindex)
+ return sd_bus_reply_method_errorf(m, BUS_ERROR_NO_SUCH_RR, "The specified address interface index does not match requested interface.");
+
+ if (parsed_ifindex > 0)
+ ifindex = parsed_ifindex;
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(iiay)");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'r', "iiay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "ii", ifindex, ff);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', &parsed, FAMILY_ADDRESS_SIZE(ff));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ /* When an IP address is specified we just return it as canonical name, in order to avoid a DNS
+ * look-up. However, we reformat it to make sure it's in a truly canonical form (i.e. on IPv6 the inner
+ * omissions are always done the same way). */
+ r = in_addr_ifindex_to_string(ff, &parsed, ifindex, &canonical);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "st", canonical,
+ SD_RESOLVED_FLAGS_MAKE(dns_synthesize_protocol(flags), ff, true));
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(sd_bus_message_get_bus(m), reply, NULL);
+}
+
+static int bus_method_resolve_hostname(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(dns_question_unrefp) DnsQuestion *question_idna = NULL, *question_utf8 = NULL;
+ Manager *m = userdata;
+ const char *hostname;
+ int family, ifindex;
+ uint64_t flags;
+ DnsQuery *q;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(message, "isit", &ifindex, &hostname, &family, &flags);
+ if (r < 0)
+ return r;
+
+ if (ifindex < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid interface index");
+
+ if (!IN_SET(family, AF_INET, AF_INET6, AF_UNSPEC))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown address family %i", family);
+
+ r = validate_and_mangle_flags(hostname, &flags, SD_RESOLVED_NO_SEARCH, error);
+ if (r < 0)
+ return r;
+
+ r = parse_as_address(message, ifindex, hostname, family, flags);
+ if (r != 0)
+ return r;
+
+ r = dns_name_is_valid(hostname);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid hostname '%s'", hostname);
+
+ r = dns_question_new_address(&question_utf8, family, hostname, false);
+ if (r < 0)
+ return r;
+
+ r = dns_question_new_address(&question_idna, family, hostname, true);
+ if (r < 0 && r != -EALREADY)
+ return r;
+
+ r = dns_query_new(m, &q, question_utf8, question_idna ?: question_utf8, ifindex, flags);
+ if (r < 0)
+ return r;
+
+ q->bus_request = sd_bus_message_ref(message);
+ q->request_family = family;
+ q->complete = bus_method_resolve_hostname_complete;
+
+ r = dns_query_bus_track(q, message);
+ if (r < 0)
+ goto fail;
+
+ r = dns_query_go(q);
+ if (r < 0)
+ goto fail;
+
+ return 1;
+
+fail:
+ dns_query_free(q);
+ return r;
+}
+
+static void bus_method_resolve_address_complete(DnsQuery *q) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ DnsQuestion *question;
+ DnsResourceRecord *rr;
+ unsigned added = 0;
+ int ifindex, r;
+
+ assert(q);
+
+ if (q->state != DNS_TRANSACTION_SUCCESS) {
+ r = reply_query_state(q);
+ goto finish;
+ }
+
+ r = dns_query_process_cname(q);
+ if (r == -ELOOP) {
+ r = sd_bus_reply_method_errorf(q->bus_request, BUS_ERROR_CNAME_LOOP, "CNAME loop detected, or CNAME resolving disabled on '%s'", dns_query_string(q));
+ goto finish;
+ }
+ if (r < 0)
+ goto finish;
+ if (r == DNS_QUERY_RESTARTED) /* This was a cname, and the query was restarted. */
+ return;
+
+ r = sd_bus_message_new_method_return(q->bus_request, &reply);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_message_open_container(reply, 'a', "(is)");
+ if (r < 0)
+ goto finish;
+
+ question = dns_query_question_for_protocol(q, q->answer_protocol);
+
+ DNS_ANSWER_FOREACH_IFINDEX(rr, ifindex, q->answer) {
+ _cleanup_free_ char *normalized = NULL;
+
+ r = dns_question_matches_rr(question, rr, NULL);
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ continue;
+
+ r = dns_name_normalize(rr->ptr.name, 0, &normalized);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_message_append(reply, "(is)", ifindex, normalized);
+ if (r < 0)
+ goto finish;
+
+ added++;
+ }
+
+ if (added <= 0) {
+ _cleanup_free_ char *ip = NULL;
+
+ (void) in_addr_to_string(q->request_family, &q->request_address, &ip);
+ r = sd_bus_reply_method_errorf(q->bus_request, BUS_ERROR_NO_SUCH_RR,
+ "Address '%s' does not have any RR of requested type", strnull(ip));
+ goto finish;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_message_append(reply, "t", SD_RESOLVED_FLAGS_MAKE(q->answer_protocol, q->answer_family, dns_query_fully_authenticated(q)));
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_send(q->manager->bus, reply, NULL);
+
+finish:
+ if (r < 0) {
+ log_error_errno(r, "Failed to send address reply: %m");
+ sd_bus_reply_method_errno(q->bus_request, r, NULL);
+ }
+
+ dns_query_free(q);
+}
+
+static int bus_method_resolve_address(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(dns_question_unrefp) DnsQuestion *question = NULL;
+ Manager *m = userdata;
+ union in_addr_union a;
+ int family, ifindex;
+ uint64_t flags;
+ DnsQuery *q;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(message, "i", &ifindex);
+ if (r < 0)
+ return r;
+
+ r = bus_message_read_in_addr_auto(message, error, &family, &a);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "t", &flags);
+ if (r < 0)
+ return r;
+
+ if (ifindex < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid interface index");
+
+ r = validate_and_mangle_flags(NULL, &flags, 0, error);
+ if (r < 0)
+ return r;
+
+ r = dns_question_new_reverse(&question, family, &a);
+ if (r < 0)
+ return r;
+
+ r = dns_query_new(m, &q, question, question, ifindex, flags|SD_RESOLVED_NO_SEARCH);
+ if (r < 0)
+ return r;
+
+ q->bus_request = sd_bus_message_ref(message);
+ q->request_family = family;
+ q->request_address = a;
+ q->complete = bus_method_resolve_address_complete;
+
+ r = dns_query_bus_track(q, message);
+ if (r < 0)
+ goto fail;
+
+ r = dns_query_go(q);
+ if (r < 0)
+ goto fail;
+
+ return 1;
+
+fail:
+ dns_query_free(q);
+ return r;
+}
+
+static int bus_message_append_rr(sd_bus_message *m, DnsResourceRecord *rr, int ifindex) {
+ int r;
+
+ assert(m);
+ assert(rr);
+
+ r = sd_bus_message_open_container(m, 'r', "iqqay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "iqq",
+ ifindex,
+ rr->key->class,
+ rr->key->type);
+ if (r < 0)
+ return r;
+
+ r = dns_resource_record_to_wire_format(rr, false);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(m, 'y', rr->wire_format, rr->wire_format_size);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(m);
+}
+
+static void bus_method_resolve_record_complete(DnsQuery *q) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ DnsResourceRecord *rr;
+ DnsQuestion *question;
+ unsigned added = 0;
+ int ifindex;
+ int r;
+
+ assert(q);
+
+ if (q->state != DNS_TRANSACTION_SUCCESS) {
+ r = reply_query_state(q);
+ goto finish;
+ }
+
+ r = dns_query_process_cname(q);
+ if (r == -ELOOP) {
+ r = sd_bus_reply_method_errorf(q->bus_request, BUS_ERROR_CNAME_LOOP, "CNAME loop detected, or CNAME resolving disabled on '%s'", dns_query_string(q));
+ goto finish;
+ }
+ if (r < 0)
+ goto finish;
+ if (r == DNS_QUERY_RESTARTED) /* This was a cname, and the query was restarted. */
+ return;
+
+ r = sd_bus_message_new_method_return(q->bus_request, &reply);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_message_open_container(reply, 'a', "(iqqay)");
+ if (r < 0)
+ goto finish;
+
+ question = dns_query_question_for_protocol(q, q->answer_protocol);
+
+ DNS_ANSWER_FOREACH_IFINDEX(rr, ifindex, q->answer) {
+ r = dns_question_matches_rr(question, rr, NULL);
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ continue;
+
+ r = bus_message_append_rr(reply, rr, ifindex);
+ if (r < 0)
+ goto finish;
+
+ added++;
+ }
+
+ if (added <= 0) {
+ r = sd_bus_reply_method_errorf(q->bus_request, BUS_ERROR_NO_SUCH_RR, "Name '%s' does not have any RR of the requested type", dns_query_string(q));
+ goto finish;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_message_append(reply, "t", SD_RESOLVED_FLAGS_MAKE(q->answer_protocol, q->answer_family, dns_query_fully_authenticated(q)));
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_send(q->manager->bus, reply, NULL);
+
+finish:
+ if (r < 0) {
+ log_error_errno(r, "Failed to send record reply: %m");
+ sd_bus_reply_method_errno(q->bus_request, r, NULL);
+ }
+
+ dns_query_free(q);
+}
+
+static int bus_method_resolve_record(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ _cleanup_(dns_question_unrefp) DnsQuestion *question = NULL;
+ Manager *m = userdata;
+ uint16_t class, type;
+ const char *name;
+ int r, ifindex;
+ uint64_t flags;
+ DnsQuery *q;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(message, "isqqt", &ifindex, &name, &class, &type, &flags);
+ if (r < 0)
+ return r;
+
+ if (ifindex < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid interface index");
+
+ r = dns_name_is_valid(name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid name '%s'", name);
+
+ if (!dns_type_is_valid_query(type))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Specified resource record type %" PRIu16 " may not be used in a query.", type);
+ if (dns_type_is_zone_transer(type))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Zone transfers not permitted via this programming interface.");
+ if (dns_type_is_obsolete(type))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Specified DNS resource record type %" PRIu16 " is obsolete.", type);
+
+ r = validate_and_mangle_flags(name, &flags, 0, error);
+ if (r < 0)
+ return r;
+
+ question = dns_question_new(1);
+ if (!question)
+ return -ENOMEM;
+
+ key = dns_resource_key_new(class, type, name);
+ if (!key)
+ return -ENOMEM;
+
+ r = dns_question_add(question, key);
+ if (r < 0)
+ return r;
+
+ r = dns_query_new(m, &q, question, question, ifindex, flags|SD_RESOLVED_NO_SEARCH);
+ if (r < 0)
+ return r;
+
+ /* Let's request that the TTL is fixed up for locally cached entries, after all we return it in the wire format
+ * blob */
+ q->clamp_ttl = true;
+
+ q->bus_request = sd_bus_message_ref(message);
+ q->complete = bus_method_resolve_record_complete;
+
+ r = dns_query_bus_track(q, message);
+ if (r < 0)
+ goto fail;
+
+ r = dns_query_go(q);
+ if (r < 0)
+ goto fail;
+
+ return 1;
+
+fail:
+ dns_query_free(q);
+ return r;
+}
+
+static int append_srv(DnsQuery *q, sd_bus_message *reply, DnsResourceRecord *rr) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *canonical = NULL;
+ _cleanup_free_ char *normalized = NULL;
+ DnsQuery *aux;
+ int r;
+
+ assert(q);
+ assert(reply);
+ assert(rr);
+ assert(rr->key);
+
+ if (rr->key->type != DNS_TYPE_SRV)
+ return 0;
+
+ if ((q->flags & SD_RESOLVED_NO_ADDRESS) == 0) {
+ /* First, let's see if we could find an appropriate A or AAAA
+ * record for the SRV record */
+ LIST_FOREACH(auxiliary_queries, aux, q->auxiliary_queries) {
+ DnsResourceRecord *zz;
+ DnsQuestion *question;
+
+ if (aux->state != DNS_TRANSACTION_SUCCESS)
+ continue;
+ if (aux->auxiliary_result != 0)
+ continue;
+
+ question = dns_query_question_for_protocol(aux, aux->answer_protocol);
+
+ r = dns_name_equal(dns_question_first_name(question), rr->srv.name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ DNS_ANSWER_FOREACH(zz, aux->answer) {
+
+ r = dns_question_matches_rr(question, zz, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ canonical = dns_resource_record_ref(zz);
+ break;
+ }
+
+ if (canonical)
+ break;
+ }
+
+ /* Is there are successful A/AAAA lookup for this SRV RR? If not, don't add it */
+ if (!canonical)
+ return 0;
+ }
+
+ r = sd_bus_message_open_container(reply, 'r', "qqqsa(iiay)s");
+ if (r < 0)
+ return r;
+
+ r = dns_name_normalize(rr->srv.name, 0, &normalized);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(
+ reply,
+ "qqqs",
+ rr->srv.priority, rr->srv.weight, rr->srv.port, normalized);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "(iiay)");
+ if (r < 0)
+ return r;
+
+ if ((q->flags & SD_RESOLVED_NO_ADDRESS) == 0) {
+ LIST_FOREACH(auxiliary_queries, aux, q->auxiliary_queries) {
+ DnsResourceRecord *zz;
+ DnsQuestion *question;
+ int ifindex;
+
+ if (aux->state != DNS_TRANSACTION_SUCCESS)
+ continue;
+ if (aux->auxiliary_result != 0)
+ continue;
+
+ question = dns_query_question_for_protocol(aux, aux->answer_protocol);
+
+ r = dns_name_equal(dns_question_first_name(question), rr->srv.name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ DNS_ANSWER_FOREACH_IFINDEX(zz, ifindex, aux->answer) {
+
+ r = dns_question_matches_rr(question, zz, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = append_address(reply, zz, ifindex);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ if (canonical) {
+ normalized = mfree(normalized);
+
+ r = dns_name_normalize(dns_resource_key_name(canonical->key), 0, &normalized);
+ if (r < 0)
+ return r;
+ }
+
+ /* Note that above we appended the hostname as encoded in the
+ * SRV, and here the canonical hostname this maps to. */
+ r = sd_bus_message_append(reply, "s", normalized);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int append_txt(sd_bus_message *reply, DnsResourceRecord *rr) {
+ DnsTxtItem *i;
+ int r;
+
+ assert(reply);
+ assert(rr);
+ assert(rr->key);
+
+ if (rr->key->type != DNS_TYPE_TXT)
+ return 0;
+
+ LIST_FOREACH(items, i, rr->txt.items) {
+
+ if (i->length <= 0)
+ continue;
+
+ r = sd_bus_message_append_array(reply, 'y', i->data, i->length);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+static void resolve_service_all_complete(DnsQuery *q) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *canonical = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ char *name = NULL, *type = NULL, *domain = NULL;
+ DnsQuestion *question;
+ DnsResourceRecord *rr;
+ unsigned added = 0;
+ DnsQuery *aux;
+ int r;
+
+ assert(q);
+
+ if (q->block_all_complete > 0)
+ return;
+
+ if ((q->flags & SD_RESOLVED_NO_ADDRESS) == 0) {
+ DnsQuery *bad = NULL;
+ bool have_success = false;
+
+ LIST_FOREACH(auxiliary_queries, aux, q->auxiliary_queries) {
+
+ switch (aux->state) {
+
+ case DNS_TRANSACTION_PENDING:
+ /* If an auxiliary query is still pending, let's wait */
+ return;
+
+ case DNS_TRANSACTION_SUCCESS:
+ if (aux->auxiliary_result == 0)
+ have_success = true;
+ else
+ bad = aux;
+ break;
+
+ default:
+ bad = aux;
+ break;
+ }
+ }
+
+ if (!have_success) {
+ /* We can only return one error, hence pick the last error we encountered */
+
+ assert(bad);
+
+ if (bad->state == DNS_TRANSACTION_SUCCESS) {
+ assert(bad->auxiliary_result != 0);
+
+ if (bad->auxiliary_result == -ELOOP) {
+ r = sd_bus_reply_method_errorf(q->bus_request, BUS_ERROR_CNAME_LOOP, "CNAME loop detected, or CNAME resolving disabled on '%s'", dns_query_string(bad));
+ goto finish;
+ }
+
+ r = bad->auxiliary_result;
+ goto finish;
+ }
+
+ r = reply_query_state(bad);
+ goto finish;
+ }
+ }
+
+ r = sd_bus_message_new_method_return(q->bus_request, &reply);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_message_open_container(reply, 'a', "(qqqsa(iiay)s)");
+ if (r < 0)
+ goto finish;
+
+ question = dns_query_question_for_protocol(q, q->answer_protocol);
+
+ DNS_ANSWER_FOREACH(rr, q->answer) {
+ r = dns_question_matches_rr(question, rr, NULL);
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ continue;
+
+ r = append_srv(q, reply, rr);
+ if (r < 0)
+ goto finish;
+ if (r == 0) /* not an SRV record */
+ continue;
+
+ if (!canonical)
+ canonical = dns_resource_record_ref(rr);
+
+ added++;
+ }
+
+ if (added <= 0) {
+ r = sd_bus_reply_method_errorf(q->bus_request, BUS_ERROR_NO_SUCH_RR, "'%s' does not have any RR of the requested type", dns_query_string(q));
+ goto finish;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_message_open_container(reply, 'a', "ay");
+ if (r < 0)
+ goto finish;
+
+ DNS_ANSWER_FOREACH(rr, q->answer) {
+ r = dns_question_matches_rr(question, rr, NULL);
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ continue;
+
+ r = append_txt(reply, rr);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ goto finish;
+
+ assert(canonical);
+ r = dns_service_split(dns_resource_key_name(canonical->key), &name, &type, &domain);
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_message_append(
+ reply,
+ "ssst",
+ name, type, domain,
+ SD_RESOLVED_FLAGS_MAKE(q->answer_protocol, q->answer_family, dns_query_fully_authenticated(q)));
+ if (r < 0)
+ goto finish;
+
+ r = sd_bus_send(q->manager->bus, reply, NULL);
+
+finish:
+ if (r < 0) {
+ log_error_errno(r, "Failed to send service reply: %m");
+ sd_bus_reply_method_errno(q->bus_request, r, NULL);
+ }
+
+ dns_query_free(q);
+}
+
+static void resolve_service_hostname_complete(DnsQuery *q) {
+ int r;
+
+ assert(q);
+ assert(q->auxiliary_for);
+
+ if (q->state != DNS_TRANSACTION_SUCCESS) {
+ resolve_service_all_complete(q->auxiliary_for);
+ return;
+ }
+
+ r = dns_query_process_cname(q);
+ if (r == DNS_QUERY_RESTARTED) /* This was a cname, and the query was restarted. */
+ return;
+
+ /* This auxiliary lookup is finished or failed, let's see if all are finished now. */
+ q->auxiliary_result = r;
+ resolve_service_all_complete(q->auxiliary_for);
+}
+
+static int resolve_service_hostname(DnsQuery *q, DnsResourceRecord *rr, int ifindex) {
+ _cleanup_(dns_question_unrefp) DnsQuestion *question = NULL;
+ DnsQuery *aux;
+ int r;
+
+ assert(q);
+ assert(rr);
+ assert(rr->key);
+ assert(rr->key->type == DNS_TYPE_SRV);
+
+ /* OK, we found an SRV record for the service. Let's resolve
+ * the hostname included in it */
+
+ r = dns_question_new_address(&question, q->request_family, rr->srv.name, false);
+ if (r < 0)
+ return r;
+
+ r = dns_query_new(q->manager, &aux, question, question, ifindex, q->flags|SD_RESOLVED_NO_SEARCH);
+ if (r < 0)
+ return r;
+
+ aux->request_family = q->request_family;
+ aux->complete = resolve_service_hostname_complete;
+
+ r = dns_query_make_auxiliary(aux, q);
+ if (r == -EAGAIN) {
+ /* Too many auxiliary lookups? If so, don't complain,
+ * let's just not add this one, we already have more
+ * than enough */
+
+ dns_query_free(aux);
+ return 0;
+ }
+ if (r < 0)
+ goto fail;
+
+ /* Note that auxiliary queries do not track the original bus
+ * client, only the primary request does that. */
+
+ r = dns_query_go(aux);
+ if (r < 0)
+ goto fail;
+
+ return 1;
+
+fail:
+ dns_query_free(aux);
+ return r;
+}
+
+static void bus_method_resolve_service_complete(DnsQuery *q) {
+ bool has_root_domain = false;
+ DnsResourceRecord *rr;
+ DnsQuestion *question;
+ unsigned found = 0;
+ int ifindex, r;
+
+ assert(q);
+
+ if (q->state != DNS_TRANSACTION_SUCCESS) {
+ r = reply_query_state(q);
+ goto finish;
+ }
+
+ r = dns_query_process_cname(q);
+ if (r == -ELOOP) {
+ r = sd_bus_reply_method_errorf(q->bus_request, BUS_ERROR_CNAME_LOOP, "CNAME loop detected, or CNAME resolving disabled on '%s'", dns_query_string(q));
+ goto finish;
+ }
+ if (r < 0)
+ goto finish;
+ if (r == DNS_QUERY_RESTARTED) /* This was a cname, and the query was restarted. */
+ return;
+
+ question = dns_query_question_for_protocol(q, q->answer_protocol);
+
+ DNS_ANSWER_FOREACH_IFINDEX(rr, ifindex, q->answer) {
+ r = dns_question_matches_rr(question, rr, NULL);
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ continue;
+
+ if (rr->key->type != DNS_TYPE_SRV)
+ continue;
+
+ if (dns_name_is_root(rr->srv.name)) {
+ has_root_domain = true;
+ continue;
+ }
+
+ if ((q->flags & SD_RESOLVED_NO_ADDRESS) == 0) {
+ q->block_all_complete++;
+ r = resolve_service_hostname(q, rr, ifindex);
+ q->block_all_complete--;
+
+ if (r < 0)
+ goto finish;
+ }
+
+ found++;
+ }
+
+ if (has_root_domain && found <= 0) {
+ /* If there's exactly one SRV RR and it uses
+ * the root domain as hostname, then the
+ * service is explicitly not offered on the
+ * domain. Report this as a recognizable
+ * error. See RFC 2782, Section "Usage
+ * Rules". */
+ r = sd_bus_reply_method_errorf(q->bus_request, BUS_ERROR_NO_SUCH_SERVICE, "'%s' does not provide the requested service", dns_query_string(q));
+ goto finish;
+ }
+
+ if (found <= 0) {
+ r = sd_bus_reply_method_errorf(q->bus_request, BUS_ERROR_NO_SUCH_RR, "'%s' does not have any RR of the requested type", dns_query_string(q));
+ goto finish;
+ }
+
+ /* Maybe we are already finished? check now... */
+ resolve_service_all_complete(q);
+ return;
+
+finish:
+ if (r < 0) {
+ log_error_errno(r, "Failed to send service reply: %m");
+ sd_bus_reply_method_errno(q->bus_request, r, NULL);
+ }
+
+ dns_query_free(q);
+}
+
+static int bus_method_resolve_service(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(dns_question_unrefp) DnsQuestion *question_idna = NULL, *question_utf8 = NULL;
+ const char *name, *type, *domain;
+ Manager *m = userdata;
+ int family, ifindex;
+ uint64_t flags;
+ DnsQuery *q;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(message, "isssit", &ifindex, &name, &type, &domain, &family, &flags);
+ if (r < 0)
+ return r;
+
+ if (ifindex < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid interface index");
+
+ if (!IN_SET(family, AF_INET, AF_INET6, AF_UNSPEC))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown address family %i", family);
+
+ if (isempty(name))
+ name = NULL;
+ else if (!dns_service_name_is_valid(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid service name '%s'", name);
+
+ if (isempty(type))
+ type = NULL;
+ else if (!dns_srv_type_is_valid(type))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid SRV service type '%s'", type);
+
+ r = dns_name_is_valid(domain);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid domain '%s'", domain);
+
+ if (name && !type)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Service name cannot be specified without service type.");
+
+ r = validate_and_mangle_flags(name, &flags, SD_RESOLVED_NO_TXT|SD_RESOLVED_NO_ADDRESS, error);
+ if (r < 0)
+ return r;
+
+ r = dns_question_new_service(&question_utf8, name, type, domain, !(flags & SD_RESOLVED_NO_TXT), false);
+ if (r < 0)
+ return r;
+
+ r = dns_question_new_service(&question_idna, name, type, domain, !(flags & SD_RESOLVED_NO_TXT), true);
+ if (r < 0)
+ return r;
+
+ r = dns_query_new(m, &q, question_utf8, question_idna, ifindex, flags|SD_RESOLVED_NO_SEARCH);
+ if (r < 0)
+ return r;
+
+ q->bus_request = sd_bus_message_ref(message);
+ q->request_family = family;
+ q->complete = bus_method_resolve_service_complete;
+
+ r = dns_query_bus_track(q, message);
+ if (r < 0)
+ goto fail;
+
+ r = dns_query_go(q);
+ if (r < 0)
+ goto fail;
+
+ return 1;
+
+fail:
+ dns_query_free(q);
+ return r;
+}
+
+int bus_dns_server_append(sd_bus_message *reply, DnsServer *s, bool with_ifindex, bool extended) {
+ int r;
+
+ assert(reply);
+
+ if (!s) {
+ if (with_ifindex) {
+ if (extended)
+ return sd_bus_message_append(reply, "(iiayqs)", 0, AF_UNSPEC, 0, 0, NULL);
+ else
+ return sd_bus_message_append(reply, "(iiay)", 0, AF_UNSPEC, 0);
+ } else {
+ if (extended)
+ return sd_bus_message_append(reply, "(iayqs)", AF_UNSPEC, 0, 0, NULL);
+ else
+ return sd_bus_message_append(reply, "(iay)", AF_UNSPEC, 0);
+ }
+ }
+
+ r = sd_bus_message_open_container(reply, 'r', with_ifindex ? (extended ? "iiayqs" : "iiay") : (extended ? "iayqs" : "iay"));
+ if (r < 0)
+ return r;
+
+ if (with_ifindex) {
+ r = sd_bus_message_append(reply, "i", dns_server_ifindex(s));
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_append(reply, "i", s->family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', &s->address, FAMILY_ADDRESS_SIZE(s->family));
+ if (r < 0)
+ return r;
+
+ if (extended) {
+ r = sd_bus_message_append(reply, "q", s->port);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", s->server_name);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int bus_property_get_dns_servers_internal(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error,
+ bool extended) {
+
+ Manager *m = userdata;
+ DnsServer *s;
+ Link *l;
+ int r;
+
+ assert(reply);
+ assert(m);
+
+ r = sd_bus_message_open_container(reply, 'a', extended ? "(iiayqs)" : "(iiay)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(servers, s, m->dns_servers) {
+ r = bus_dns_server_append(reply, s, true, extended);
+ if (r < 0)
+ return r;
+ }
+
+ HASHMAP_FOREACH(l, m->links)
+ LIST_FOREACH(servers, s, l->dns_servers) {
+ r = bus_dns_server_append(reply, s, true, extended);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int bus_property_get_dns_servers(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+ return bus_property_get_dns_servers_internal(bus, path, interface, property, reply, userdata, error, false);
+}
+
+static int bus_property_get_dns_servers_ex(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+ return bus_property_get_dns_servers_internal(bus, path, interface, property, reply, userdata, error, true);
+}
+
+static int bus_property_get_fallback_dns_servers_internal(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error,
+ bool extended) {
+
+ DnsServer *s, **f = userdata;
+ int r;
+
+ assert(reply);
+ assert(f);
+
+ r = sd_bus_message_open_container(reply, 'a', extended ? "(iiayqs)" : "(iiay)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(servers, s, *f) {
+ r = bus_dns_server_append(reply, s, true, extended);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int bus_property_get_fallback_dns_servers(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+ return bus_property_get_fallback_dns_servers_internal(bus, path, interface, property, reply, userdata, error, false);
+}
+
+static int bus_property_get_fallback_dns_servers_ex(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+ return bus_property_get_fallback_dns_servers_internal(bus, path, interface, property, reply, userdata, error, true);
+}
+
+static int bus_property_get_current_dns_server_internal(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error,
+ bool extended) {
+
+ DnsServer *s;
+
+ assert(reply);
+ assert(userdata);
+
+ s = *(DnsServer **) userdata;
+
+ return bus_dns_server_append(reply, s, true, extended);
+}
+
+static int bus_property_get_current_dns_server(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+ return bus_property_get_current_dns_server_internal(bus, path, interface, property, reply, userdata, error, false);
+}
+
+static int bus_property_get_current_dns_server_ex(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+ return bus_property_get_current_dns_server_internal(bus, path, interface, property, reply, userdata, error, true);
+}
+
+static int bus_property_get_domains(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ DnsSearchDomain *d;
+ Link *l;
+ int r;
+
+ assert(reply);
+ assert(m);
+
+ r = sd_bus_message_open_container(reply, 'a', "(isb)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(domains, d, m->search_domains) {
+ r = sd_bus_message_append(reply, "(isb)", 0, d->name, d->route_only);
+ if (r < 0)
+ return r;
+ }
+
+ HASHMAP_FOREACH(l, m->links) {
+ LIST_FOREACH(domains, d, l->search_domains) {
+ r = sd_bus_message_append(reply, "(isb)", l->ifindex, d->name, d->route_only);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int bus_property_get_transaction_statistics(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+
+ assert(reply);
+ assert(m);
+
+ return sd_bus_message_append(reply, "(tt)",
+ (uint64_t) hashmap_size(m->dns_transactions),
+ (uint64_t) m->n_transactions_total);
+}
+
+static int bus_property_get_cache_statistics(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ uint64_t size = 0, hit = 0, miss = 0;
+ Manager *m = userdata;
+ DnsScope *s;
+
+ assert(reply);
+ assert(m);
+
+ LIST_FOREACH(scopes, s, m->dns_scopes) {
+ size += dns_cache_size(&s->cache);
+ hit += s->cache.n_hit;
+ miss += s->cache.n_miss;
+ }
+
+ return sd_bus_message_append(reply, "(ttt)", size, hit, miss);
+}
+
+static int bus_property_get_dnssec_statistics(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+
+ assert(reply);
+ assert(m);
+
+ return sd_bus_message_append(reply, "(tttt)",
+ (uint64_t) m->n_dnssec_verdict[DNSSEC_SECURE],
+ (uint64_t) m->n_dnssec_verdict[DNSSEC_INSECURE],
+ (uint64_t) m->n_dnssec_verdict[DNSSEC_BOGUS],
+ (uint64_t) m->n_dnssec_verdict[DNSSEC_INDETERMINATE]);
+}
+
+static int bus_property_get_ntas(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ const char *domain;
+ int r;
+
+ assert(reply);
+ assert(m);
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ SET_FOREACH(domain, m->trust_anchor.negative_by_name) {
+ r = sd_bus_message_append(reply, "s", domain);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_dns_stub_listener_mode, dns_stub_listener_mode, DnsStubListenerMode);
+static BUS_DEFINE_PROPERTY_GET(bus_property_get_dnssec_supported, "b", Manager, manager_dnssec_supported);
+static BUS_DEFINE_PROPERTY_GET2(bus_property_get_dnssec_mode, "s", Manager, manager_get_dnssec_mode, dnssec_mode_to_string);
+static BUS_DEFINE_PROPERTY_GET2(bus_property_get_dns_over_tls_mode, "s", Manager, manager_get_dns_over_tls_mode, dns_over_tls_mode_to_string);
+
+static int bus_property_get_resolv_conf_mode(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(reply);
+
+ r = resolv_conf_mode();
+ if (r < 0) {
+ log_warning_errno(r, "Failed to test /etc/resolv.conf mode, ignoring: %m");
+ return sd_bus_message_append(reply, "s", NULL);
+ }
+
+ return sd_bus_message_append(reply, "s", resolv_conf_mode_to_string(r));
+}
+
+static int bus_method_reset_statistics(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ DnsScope *s;
+
+ assert(message);
+ assert(m);
+
+ LIST_FOREACH(scopes, s, m->dns_scopes)
+ s->cache.n_hit = s->cache.n_miss = 0;
+
+ m->n_transactions_total = 0;
+ zero(m->n_dnssec_verdict);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int get_any_link(Manager *m, int ifindex, Link **ret, sd_bus_error *error) {
+ Link *l;
+
+ assert(m);
+ assert(ret);
+
+ l = hashmap_get(m->links, INT_TO_PTR(ifindex));
+ if (!l)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_LINK, "Link %i not known", ifindex);
+
+ *ret = l;
+ return 0;
+}
+
+static int call_link_method(Manager *m, sd_bus_message *message, sd_bus_message_handler_t handler, sd_bus_error *error) {
+ int ifindex, r;
+ Link *l;
+
+ assert(m);
+ assert(message);
+ assert(handler);
+
+ r = bus_message_read_ifindex(message, error, &ifindex);
+ if (r < 0)
+ return r;
+
+ r = get_any_link(m, ifindex, &l, error);
+ if (r < 0)
+ return r;
+
+ return handler(message, l, error);
+}
+
+static int bus_method_set_link_dns_servers(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_dns_servers, error);
+}
+
+static int bus_method_set_link_dns_servers_ex(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_dns_servers_ex, error);
+}
+
+static int bus_method_set_link_domains(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_domains, error);
+}
+
+static int bus_method_set_link_default_route(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_default_route, error);
+}
+
+static int bus_method_set_link_llmnr(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_llmnr, error);
+}
+
+static int bus_method_set_link_mdns(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_mdns, error);
+}
+
+static int bus_method_set_link_dns_over_tls(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_dns_over_tls, error);
+}
+
+static int bus_method_set_link_dnssec(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_dnssec, error);
+}
+
+static int bus_method_set_link_dnssec_negative_trust_anchors(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_set_dnssec_negative_trust_anchors, error);
+}
+
+static int bus_method_revert_link(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return call_link_method(userdata, message, bus_link_method_revert, error);
+}
+
+static int bus_method_get_link(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *p = NULL;
+ Manager *m = userdata;
+ int r, ifindex;
+ Link *l;
+
+ assert(message);
+ assert(m);
+
+ r = bus_message_read_ifindex(message, error, &ifindex);
+ if (r < 0)
+ return r;
+
+ r = get_any_link(m, ifindex, &l, error);
+ if (r < 0)
+ return r;
+
+ p = link_bus_path(l);
+ if (!p)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", p);
+}
+
+static int bus_method_flush_caches(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ assert(message);
+ assert(m);
+
+ manager_flush_caches(m);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int bus_method_reset_server_features(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ assert(message);
+ assert(m);
+
+ manager_reset_server_features(m);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int dnssd_service_on_bus_track(sd_bus_track *t, void *userdata) {
+ DnssdService *s = userdata;
+
+ assert(t);
+ assert(s);
+
+ log_debug("Client of active request vanished, destroying DNS-SD service.");
+ dnssd_service_free(s);
+
+ return 0;
+}
+
+static int bus_method_register_service(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ _cleanup_(dnssd_service_freep) DnssdService *service = NULL;
+ _cleanup_(sd_bus_track_unrefp) sd_bus_track *bus_track = NULL;
+ _cleanup_free_ char *path = NULL;
+ _cleanup_free_ char *instance_name = NULL;
+ Manager *m = userdata;
+ DnssdService *s = NULL;
+ const char *name;
+ const char *name_template;
+ const char *type;
+ uid_t euid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ if (m->mdns_support != RESOLVE_SUPPORT_YES)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Support for MulticastDNS is disabled");
+
+ service = new0(DnssdService, 1);
+ if (!service)
+ return log_oom();
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_euid(creds, &euid);
+ if (r < 0)
+ return r;
+ service->originator = euid;
+
+ r = sd_bus_message_read(message, "sssqqq", &name, &name_template, &type,
+ &service->port, &service->priority,
+ &service->weight);
+ if (r < 0)
+ return r;
+
+ s = hashmap_get(m->dnssd_services, name);
+ if (s)
+ return sd_bus_error_setf(error, BUS_ERROR_DNSSD_SERVICE_EXISTS, "DNS-SD service '%s' exists already", name);
+
+ if (!dnssd_srv_type_is_valid(type))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "DNS-SD service type '%s' is invalid", type);
+
+ service->name = strdup(name);
+ if (!service->name)
+ return log_oom();
+
+ service->name_template = strdup(name_template);
+ if (!service->name_template)
+ return log_oom();
+
+ service->type = strdup(type);
+ if (!service->type)
+ return log_oom();
+
+ r = dnssd_render_instance_name(service, &instance_name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(message, SD_BUS_TYPE_ARRAY, "a{say}");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_enter_container(message, SD_BUS_TYPE_ARRAY, "{say}")) > 0) {
+ _cleanup_(dnssd_txtdata_freep) DnssdTxtData *txt_data = NULL;
+ DnsTxtItem *last = NULL;
+
+ txt_data = new0(DnssdTxtData, 1);
+ if (!txt_data)
+ return log_oom();
+
+ while ((r = sd_bus_message_enter_container(message, SD_BUS_TYPE_DICT_ENTRY, "say")) > 0) {
+ const char *key;
+ const void *value;
+ size_t size;
+ DnsTxtItem *i;
+
+ r = sd_bus_message_read(message, "s", &key);
+ if (r < 0)
+ return r;
+
+ if (isempty(key))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Keys in DNS-SD TXT RRs can't be empty");
+
+ if (!ascii_is_valid(key))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "TXT key '%s' contains non-ASCII symbols", key);
+
+ r = sd_bus_message_read_array(message, 'y', &value, &size);
+ if (r < 0)
+ return r;
+
+ r = dnssd_txt_item_new_from_data(key, value, size, &i);
+ if (r < 0)
+ return r;
+
+ LIST_INSERT_AFTER(items, txt_data->txt, last, i);
+ last = i;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (txt_data->txt) {
+ LIST_PREPEND(items, service->txt_data_items, txt_data);
+ txt_data = NULL;
+ }
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!service->txt_data_items) {
+ _cleanup_(dnssd_txtdata_freep) DnssdTxtData *txt_data = NULL;
+
+ txt_data = new0(DnssdTxtData, 1);
+ if (!txt_data)
+ return log_oom();
+
+ r = dns_txt_item_new_empty(&txt_data->txt);
+ if (r < 0)
+ return r;
+
+ LIST_PREPEND(items, service->txt_data_items, txt_data);
+ txt_data = NULL;
+ }
+
+ r = sd_bus_path_encode("/org/freedesktop/resolve1/dnssd", service->name, &path);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(message, CAP_SYS_ADMIN,
+ "org.freedesktop.resolve1.register-service",
+ NULL, false, UID_INVALID,
+ &m->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ r = hashmap_ensure_allocated(&m->dnssd_services, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(m->dnssd_services, service->name, service);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_track_new(sd_bus_message_get_bus(message), &bus_track, dnssd_service_on_bus_track, service);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_track_add_sender(bus_track, message);
+ if (r < 0)
+ return r;
+
+ service->manager = m;
+
+ service = NULL;
+
+ manager_refresh_rrs(m);
+
+ return sd_bus_reply_method_return(message, "o", path);
+}
+
+static int call_dnssd_method(Manager *m, sd_bus_message *message, sd_bus_message_handler_t handler, sd_bus_error *error) {
+ _cleanup_free_ char *name = NULL;
+ DnssdService *s = NULL;
+ const char *path;
+ int r;
+
+ assert(m);
+ assert(message);
+ assert(handler);
+
+ r = sd_bus_message_read(message, "o", &path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_path_decode(path, "/org/freedesktop/resolve1/dnssd", &name);
+ if (r == 0)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_DNSSD_SERVICE, "DNS-SD service with object path '%s' does not exist", path);
+ if (r < 0)
+ return r;
+
+ s = hashmap_get(m->dnssd_services, name);
+ if (!s)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_DNSSD_SERVICE, "DNS-SD service '%s' not known", name);
+
+ return handler(message, s, error);
+}
+
+static int bus_method_unregister_service(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+
+ assert(message);
+ assert(m);
+
+ return call_dnssd_method(m, message, bus_dnssd_method_unregister, error);
+}
+
+static const sd_bus_vtable resolve_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("LLMNRHostname", "s", NULL, offsetof(Manager, llmnr_hostname), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("LLMNR", "s", bus_property_get_resolve_support, offsetof(Manager, llmnr_support), 0),
+ SD_BUS_PROPERTY("MulticastDNS", "s", bus_property_get_resolve_support, offsetof(Manager, mdns_support), 0),
+ SD_BUS_PROPERTY("DNSOverTLS", "s", bus_property_get_dns_over_tls_mode, 0, 0),
+ SD_BUS_PROPERTY("DNS", "a(iiay)", bus_property_get_dns_servers, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("DNSEx", "a(iiayqs)", bus_property_get_dns_servers_ex, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("FallbackDNS", "a(iiay)", bus_property_get_fallback_dns_servers, offsetof(Manager, fallback_dns_servers), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FallbackDNSEx", "a(iiayqs)", bus_property_get_fallback_dns_servers_ex, offsetof(Manager, fallback_dns_servers), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("CurrentDNSServer", "(iiay)", bus_property_get_current_dns_server, offsetof(Manager, current_dns_server), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("CurrentDNSServerEx", "(iiayqs)", bus_property_get_current_dns_server_ex, offsetof(Manager, current_dns_server), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Domains", "a(isb)", bus_property_get_domains, 0, 0),
+ SD_BUS_PROPERTY("TransactionStatistics", "(tt)", bus_property_get_transaction_statistics, 0, 0),
+ SD_BUS_PROPERTY("CacheStatistics", "(ttt)", bus_property_get_cache_statistics, 0, 0),
+ SD_BUS_PROPERTY("DNSSEC", "s", bus_property_get_dnssec_mode, 0, 0),
+ SD_BUS_PROPERTY("DNSSECStatistics", "(tttt)", bus_property_get_dnssec_statistics, 0, 0),
+ SD_BUS_PROPERTY("DNSSECSupported", "b", bus_property_get_dnssec_supported, 0, 0),
+ SD_BUS_PROPERTY("DNSSECNegativeTrustAnchors", "as", bus_property_get_ntas, 0, 0),
+ SD_BUS_PROPERTY("DNSStubListener", "s", bus_property_get_dns_stub_listener_mode, offsetof(Manager, dns_stub_listener_mode), 0),
+ SD_BUS_PROPERTY("ResolvConfMode", "s", bus_property_get_resolv_conf_mode, 0, 0),
+
+ SD_BUS_METHOD_WITH_ARGS("ResolveHostname",
+ SD_BUS_ARGS("i", ifindex, "s", name, "i", family, "t", flags),
+ SD_BUS_RESULT("a(iiay)", addresses, "s", canonical, "t", flags),
+ bus_method_resolve_hostname,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("ResolveAddress",
+ SD_BUS_ARGS("i", ifindex, "i", family, "ay", address, "t", flags),
+ SD_BUS_RESULT("a(is)", names, "t", flags),
+ bus_method_resolve_address,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("ResolveRecord",
+ SD_BUS_ARGS("i", ifindex, "s", name, "q", class, "q", type, "t", flags),
+ SD_BUS_RESULT("a(iqqay)", records, "t", flags),
+ bus_method_resolve_record,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("ResolveService",
+ SD_BUS_ARGS("i", ifindex,
+ "s", name,
+ "s", type,
+ "s", domain,
+ "i", family,
+ "t", flags),
+ SD_BUS_RESULT("a(qqqsa(iiay)s)", srv_data,
+ "aay", txt_data,
+ "s", canonical_name,
+ "s", canonical_type,
+ "s", canonical_domain,
+ "t", flags),
+ bus_method_resolve_service,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("GetLink",
+ SD_BUS_ARGS("i", ifindex),
+ SD_BUS_RESULT("o", path),
+ bus_method_get_link,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("SetLinkDNS",
+ SD_BUS_ARGS("i", ifindex, "a(iay)", addresses),
+ SD_BUS_NO_RESULT,
+ bus_method_set_link_dns_servers,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("SetLinkDNSEx",
+ SD_BUS_ARGS("i", ifindex, "a(iayqs)", addresses),
+ SD_BUS_NO_RESULT,
+ bus_method_set_link_dns_servers_ex,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("SetLinkDomains",
+ SD_BUS_ARGS("i", ifindex, "a(sb)", domains),
+ SD_BUS_NO_RESULT,
+ bus_method_set_link_domains,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("SetLinkDefaultRoute",
+ SD_BUS_ARGS("i", ifindex, "b", enable),
+ SD_BUS_NO_RESULT,
+ bus_method_set_link_default_route,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("SetLinkLLMNR",
+ SD_BUS_ARGS("i", ifindex, "s", mode),
+ SD_BUS_NO_RESULT,
+ bus_method_set_link_llmnr,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("SetLinkMulticastDNS",
+ SD_BUS_ARGS("i", ifindex, "s", mode),
+ SD_BUS_NO_RESULT,
+ bus_method_set_link_mdns,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("SetLinkDNSOverTLS",
+ SD_BUS_ARGS("i", ifindex, "s", mode),
+ SD_BUS_NO_RESULT,
+ bus_method_set_link_dns_over_tls,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("SetLinkDNSSEC",
+ SD_BUS_ARGS("i", ifindex, "s", mode),
+ SD_BUS_NO_RESULT,
+ bus_method_set_link_dnssec,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("SetLinkDNSSECNegativeTrustAnchors",
+ SD_BUS_ARGS("i", ifindex, "as", names),
+ SD_BUS_NO_RESULT,
+ bus_method_set_link_dnssec_negative_trust_anchors,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("RevertLink",
+ SD_BUS_ARGS("i", ifindex),
+ SD_BUS_NO_RESULT,
+ bus_method_revert_link,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("RegisterService",
+ SD_BUS_ARGS("s", name,
+ "s", name_template,
+ "s", type,
+ "q", service_port,
+ "q", service_priority,
+ "q", service_weight,
+ "aa{say}", txt_datas),
+ SD_BUS_RESULT("o", service_path),
+ bus_method_register_service,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("UnregisterService",
+ SD_BUS_ARGS("o", service_path),
+ SD_BUS_NO_RESULT,
+ bus_method_unregister_service,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("ResetStatistics",
+ SD_BUS_NO_ARGS,
+ SD_BUS_NO_RESULT,
+ bus_method_reset_statistics,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("FlushCaches",
+ SD_BUS_NO_ARGS,
+ SD_BUS_NO_RESULT,
+ bus_method_flush_caches,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("ResetServerFeatures",
+ SD_BUS_NO_ARGS,
+ SD_BUS_NO_RESULT,
+ bus_method_reset_server_features,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_VTABLE_END,
+};
+
+const BusObjectImplementation manager_object = {
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ .vtables = BUS_VTABLES(resolve_vtable),
+ .children = BUS_IMPLEMENTATIONS(&link_object,
+ &dnssd_object),
+};
+
+static int match_prepare_for_sleep(sd_bus_message *message, void *userdata, sd_bus_error *ret_error) {
+ Manager *m = userdata;
+ int b, r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ if (b)
+ return 0;
+
+ log_debug("Coming back from suspend, verifying all RRs...");
+
+ manager_verify_all(m);
+ return 0;
+}
+
+int manager_connect_bus(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (m->bus)
+ return 0;
+
+ r = bus_open_system_watch_bind_with_description(&m->bus, "bus-api-resolve");
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to system bus: %m");
+
+ r = bus_add_implementation(m->bus, &manager_object, m);
+ if (r < 0)
+ return r;
+
+ r = bus_log_control_api_register(m->bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_request_name_async(m->bus, NULL, "org.freedesktop.resolve1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(m->bus, m->event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ r = sd_bus_match_signal_async(
+ m->bus,
+ NULL,
+ "org.freedesktop.login1",
+ "/org/freedesktop/login1",
+ "org.freedesktop.login1.Manager",
+ "PrepareForSleep",
+ match_prepare_for_sleep,
+ NULL,
+ m);
+ if (r < 0)
+ log_warning_errno(r, "Failed to request match for PrepareForSleep, ignoring: %m");
+
+ return 0;
+}
+
+int _manager_send_changed(Manager *manager, const char *property, ...) {
+ assert(manager);
+
+ char **l = strv_from_stdarg_alloca(property);
+
+ int r = sd_bus_emit_properties_changed_strv(
+ manager->bus,
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ l);
+ if (r < 0)
+ log_notice_errno(r, "Failed to emit notification about changed property %s: %m", property);
+ return r;
+}
diff --git a/src/resolve/resolved-bus.h b/src/resolve/resolved-bus.h
new file mode 100644
index 0000000..8628d8b
--- /dev/null
+++ b/src/resolve/resolved-bus.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "bus-object.h"
+#include "resolved-manager.h"
+
+extern const BusObjectImplementation manager_object;
+
+int manager_connect_bus(Manager *m);
+int _manager_send_changed(Manager *manager, const char *property, ...) _sentinel_;
+#define manager_send_changed(manager, ...) _manager_send_changed(manager, __VA_ARGS__, NULL)
+int bus_dns_server_append(sd_bus_message *reply, DnsServer *s, bool with_ifindex, bool extended);
+int bus_property_get_resolve_support(sd_bus *bus, const char *path, const char *interface,
+ const char *property, sd_bus_message *reply,
+ void *userdata, sd_bus_error *error);
diff --git a/src/resolve/resolved-conf.c b/src/resolve/resolved-conf.c
new file mode 100644
index 0000000..f2a3316
--- /dev/null
+++ b/src/resolve/resolved-conf.c
@@ -0,0 +1,516 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "def.h"
+#include "extract-word.h"
+#include "hexdecoct.h"
+#include "parse-util.h"
+#include "resolved-conf.h"
+#include "resolved-dnssd.h"
+#include "resolved-manager.h"
+#include "resolved-dns-search-domain.h"
+#include "resolved-dns-stub.h"
+#include "dns-domain.h"
+#include "socket-netlink.h"
+#include "specifier.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_dns_stub_listener_mode, dns_stub_listener_mode, DnsStubListenerMode, "Failed to parse DNS stub listener mode setting");
+
+static int manager_add_dns_server_by_string(Manager *m, DnsServerType type, const char *word) {
+ _cleanup_free_ char *server_name = NULL;
+ union in_addr_union address;
+ int family, r, ifindex = 0;
+ uint16_t port;
+ DnsServer *s;
+
+ assert(m);
+ assert(word);
+
+ r = in_addr_port_ifindex_name_from_string_auto(word, &family, &address, &port, &ifindex, &server_name);
+ if (r < 0)
+ return r;
+
+ /* Silently filter out 0.0.0.0 and 127.0.0.53 (our own stub DNS listener) */
+ if (!dns_server_address_valid(family, &address))
+ return 0;
+
+ /* By default, the port number is determined with the transaction feature level.
+ * See dns_transaction_port() and dns_server_port(). */
+ if (IN_SET(port, 53, 853))
+ port = 0;
+
+ /* Filter out duplicates */
+ s = dns_server_find(manager_get_first_dns_server(m, type), family, &address, port, ifindex, server_name);
+ if (s) {
+ /* Drop the marker. This is used to find the servers that ceased to exist, see
+ * manager_mark_dns_servers() and manager_flush_marked_dns_servers(). */
+ dns_server_move_back_and_unmark(s);
+ return 0;
+ }
+
+ return dns_server_new(m, NULL, type, NULL, family, &address, port, ifindex, server_name);
+}
+
+int manager_parse_dns_server_string_and_warn(Manager *m, DnsServerType type, const char *string) {
+ int r;
+
+ assert(m);
+ assert(string);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&string, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = manager_add_dns_server_by_string(m, type, word);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add DNS server address '%s', ignoring: %m", word);
+ }
+
+ return 0;
+}
+
+static int manager_add_search_domain_by_string(Manager *m, const char *domain) {
+ DnsSearchDomain *d;
+ bool route_only;
+ int r;
+
+ assert(m);
+ assert(domain);
+
+ route_only = *domain == '~';
+ if (route_only)
+ domain++;
+
+ if (dns_name_is_root(domain) || streq(domain, "*")) {
+ route_only = true;
+ domain = ".";
+ }
+
+ r = dns_search_domain_find(m->search_domains, domain, &d);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ dns_search_domain_move_back_and_unmark(d);
+ else {
+ r = dns_search_domain_new(m, &d, DNS_SEARCH_DOMAIN_SYSTEM, NULL, domain);
+ if (r < 0)
+ return r;
+ }
+
+ d->route_only = route_only;
+ return 0;
+}
+
+int manager_parse_search_domains_and_warn(Manager *m, const char *string) {
+ int r;
+
+ assert(m);
+ assert(string);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&string, &word, NULL, EXTRACT_UNQUOTE);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = manager_add_search_domain_by_string(m, word);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add search domain '%s', ignoring: %m", word);
+ }
+
+ return 0;
+}
+
+int config_parse_dns_servers(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Manager *m = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(m);
+
+ if (isempty(rvalue))
+ /* Empty assignment means clear the list */
+ dns_server_unlink_all(manager_get_first_dns_server(m, ltype));
+ else {
+ /* Otherwise, add to the list */
+ r = manager_parse_dns_server_string_and_warn(m, ltype, rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse DNS server string '%s', ignoring.", rvalue);
+ return 0;
+ }
+ }
+
+ /* If we have a manual setting, then we stop reading
+ * /etc/resolv.conf */
+ if (ltype == DNS_SERVER_SYSTEM)
+ m->read_resolv_conf = false;
+ if (ltype == DNS_SERVER_FALLBACK)
+ m->need_builtin_fallbacks = false;
+
+ return 0;
+}
+
+int config_parse_search_domains(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Manager *m = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(m);
+
+ if (isempty(rvalue))
+ /* Empty assignment means clear the list */
+ dns_search_domain_unlink_all(m->search_domains);
+ else {
+ /* Otherwise, add to the list */
+ r = manager_parse_search_domains_and_warn(m, rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse search domains string '%s', ignoring.", rvalue);
+ return 0;
+ }
+ }
+
+ /* If we have a manual setting, then we stop reading
+ * /etc/resolv.conf */
+ m->read_resolv_conf = false;
+
+ return 0;
+}
+
+int config_parse_dnssd_service_name(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ static const Specifier specifier_table[] = {
+ { 'a', specifier_architecture, NULL },
+ { 'b', specifier_boot_id, NULL },
+ { 'B', specifier_os_build_id, NULL },
+ { 'H', specifier_host_name, NULL }, /* We will use specifier_dnssd_host_name(). */
+ { 'm', specifier_machine_id, NULL },
+ { 'o', specifier_os_id, NULL },
+ { 'v', specifier_kernel_release, NULL },
+ { 'w', specifier_os_version_id, NULL },
+ { 'W', specifier_os_variant_id, NULL },
+ {}
+ };
+ DnssdService *s = userdata;
+ _cleanup_free_ char *name = NULL;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(s);
+
+ if (isempty(rvalue)) {
+ s->name_template = mfree(s->name_template);
+ return 0;
+ }
+
+ r = specifier_printf(rvalue, specifier_table, NULL, &name);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid service instance name template '%s', ignoring assignment: %m", rvalue);
+ return 0;
+ }
+
+ if (!dns_service_name_is_valid(name)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Service instance name template '%s' renders to invalid name '%s'. Ignoring assignment.",
+ rvalue, name);
+ return 0;
+ }
+
+ return free_and_strdup_warn(&s->name_template, rvalue);
+}
+
+int config_parse_dnssd_service_type(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ DnssdService *s = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(s);
+
+ if (isempty(rvalue)) {
+ s->type = mfree(s->type);
+ return 0;
+ }
+
+ if (!dnssd_srv_type_is_valid(rvalue)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Service type is invalid. Ignoring.");
+ return 0;
+ }
+
+ r = free_and_strdup(&s->type, rvalue);
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+int config_parse_dnssd_txt(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(dnssd_txtdata_freep) DnssdTxtData *txt_data = NULL;
+ DnssdService *s = userdata;
+ DnsTxtItem *last = NULL;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(s);
+
+ if (isempty(rvalue)) {
+ /* Flush out collected items */
+ s->txt_data_items = dnssd_txtdata_free_all(s->txt_data_items);
+ return 0;
+ }
+
+ txt_data = new0(DnssdTxtData, 1);
+ if (!txt_data)
+ return log_oom();
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *key = NULL, *value = NULL;
+ _cleanup_free_ void *decoded = NULL;
+ size_t length = 0;
+ DnsTxtItem *i;
+ int r;
+
+ r = extract_first_word(&rvalue, &word, NULL,
+ EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE|EXTRACT_CUNESCAPE_RELAX);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = split_pair(word, "=", &key, &value);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r == -EINVAL)
+ key = TAKE_PTR(word);
+
+ if (!ascii_is_valid(key)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid key, ignoring: %s", key);
+ continue;
+ }
+
+ switch (ltype) {
+
+ case DNS_TXT_ITEM_DATA:
+ if (value) {
+ r = unbase64mem(value, strlen(value), &decoded, &length);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Invalid base64 encoding, ignoring: %s", value);
+ continue;
+ }
+ }
+
+ r = dnssd_txt_item_new_from_data(key, decoded, length, &i);
+ if (r < 0)
+ return log_oom();
+ break;
+
+ case DNS_TXT_ITEM_TEXT:
+ r = dnssd_txt_item_new_from_string(key, value, &i);
+ if (r < 0)
+ return log_oom();
+ break;
+
+ default:
+ assert_not_reached("Unknown type of Txt config");
+ }
+
+ LIST_INSERT_AFTER(items, txt_data->txt, last, i);
+ last = i;
+ }
+
+ if (!LIST_IS_EMPTY(txt_data->txt)) {
+ LIST_PREPEND(items, s->txt_data_items, txt_data);
+ TAKE_PTR(txt_data);
+ }
+
+ return 0;
+}
+
+int config_parse_dns_stub_listener_extra(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ DnsStubListenerExtra *stub = NULL;
+ Manager *m = userdata;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ m->dns_extra_stub_listeners = ordered_set_free(m->dns_extra_stub_listeners);
+ return 0;
+ }
+
+ r = dns_stub_listener_extra_new(m, &stub);
+ if (r < 0)
+ return log_oom();
+
+ p = startswith(rvalue, "udp:");
+ if (p)
+ stub->mode = DNS_STUB_LISTENER_UDP;
+ else {
+ p = startswith(rvalue, "tcp:");
+ if (p)
+ stub->mode = DNS_STUB_LISTENER_TCP;
+ else {
+ stub->mode = DNS_STUB_LISTENER_YES;
+ p = rvalue;
+ }
+ }
+
+ r = in_addr_port_ifindex_name_from_string_auto(p, &stub->family, &stub->address, &stub->port, NULL, NULL);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse address in %s=%s, ignoring assignment: %m",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ r = ordered_set_ensure_put(&m->dns_extra_stub_listeners, &dns_stub_listener_extra_hash_ops, stub);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to store %s=%s, ignoring assignment: %m", lvalue, rvalue);
+ return 0;
+ }
+
+ TAKE_PTR(stub);
+
+ return 0;
+}
+
+int manager_parse_config_file(Manager *m) {
+ int r;
+
+ assert(m);
+
+ r = config_parse_many_nulstr(
+ PKGSYSCONFDIR "/resolved.conf",
+ CONF_PATHS_NULSTR("systemd/resolved.conf.d"),
+ "Resolve\0",
+ config_item_perf_lookup, resolved_gperf_lookup,
+ CONFIG_PARSE_WARN,
+ m,
+ NULL);
+ if (r < 0)
+ return r;
+
+ if (m->need_builtin_fallbacks) {
+ r = manager_parse_dns_server_string_and_warn(m, DNS_SERVER_FALLBACK, DNS_SERVERS);
+ if (r < 0)
+ return r;
+ }
+
+#if ! HAVE_GCRYPT
+ if (m->dnssec_mode != DNSSEC_NO) {
+ log_warning("DNSSEC option cannot be enabled or set to allow-downgrade when systemd-resolved is built without gcrypt support. Turning off DNSSEC support.");
+ m->dnssec_mode = DNSSEC_NO;
+ }
+#endif
+
+#if ! ENABLE_DNS_OVER_TLS
+ if (m->dns_over_tls_mode != DNS_OVER_TLS_NO) {
+ log_warning("DNS-over-TLS option cannot be enabled or set to opportunistic when systemd-resolved is built without DNS-over-TLS support. Turning off DNS-over-TLS support.");
+ m->dns_over_tls_mode = DNS_OVER_TLS_NO;
+ }
+#endif
+ return 0;
+
+}
diff --git a/src/resolve/resolved-conf.h b/src/resolve/resolved-conf.h
new file mode 100644
index 0000000..07ce259
--- /dev/null
+++ b/src/resolve/resolved-conf.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "conf-parser.h"
+
+#include "resolved-dns-server.h"
+
+int manager_parse_config_file(Manager *m);
+
+int manager_parse_search_domains_and_warn(Manager *m, const char *string);
+int manager_parse_dns_server_string_and_warn(Manager *m, DnsServerType type, const char *string);
+
+const struct ConfigPerfItem* resolved_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+const struct ConfigPerfItem* resolved_dnssd_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_dns_servers);
+CONFIG_PARSER_PROTOTYPE(config_parse_search_domains);
+CONFIG_PARSER_PROTOTYPE(config_parse_dns_stub_listener_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_dnssd_service_name);
+CONFIG_PARSER_PROTOTYPE(config_parse_dnssd_service_type);
+CONFIG_PARSER_PROTOTYPE(config_parse_dnssd_txt);
+CONFIG_PARSER_PROTOTYPE(config_parse_dns_stub_listener_extra);
diff --git a/src/resolve/resolved-def.h b/src/resolve/resolved-def.h
new file mode 100644
index 0000000..21eb699
--- /dev/null
+++ b/src/resolve/resolved-def.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+
+#include "time-util.h"
+
+/* Input + Output: The various protocols we can use */
+#define SD_RESOLVED_DNS (UINT64_C(1) << 0)
+#define SD_RESOLVED_LLMNR_IPV4 (UINT64_C(1) << 1)
+#define SD_RESOLVED_LLMNR_IPV6 (UINT64_C(1) << 2)
+#define SD_RESOLVED_MDNS_IPV4 (UINT64_C(1) << 3)
+#define SD_RESOLVED_MDNS_IPV6 (UINT64_C(1) << 4)
+
+/* Input: Don't follow CNAMEs/DNAMEs */
+#define SD_RESOLVED_NO_CNAME (UINT64_C(1) << 5)
+
+/* Input: When doing service (SRV) resolving, don't resolve associated mDNS-style TXT records */
+#define SD_RESOLVED_NO_TXT (UINT64_C(1) << 6)
+
+/* Input: When doing service (SRV) resolving, don't resolve A/AAA RR for included hostname */
+#define SD_RESOLVED_NO_ADDRESS (UINT64_C(1) << 7)
+
+/* Input: Don't apply search domain logic to request */
+#define SD_RESOLVED_NO_SEARCH (UINT64_C(1) << 8)
+
+/* Output: Result is authenticated */
+#define SD_RESOLVED_AUTHENTICATED (UINT64_C(1) << 9)
+
+#define SD_RESOLVED_LLMNR (SD_RESOLVED_LLMNR_IPV4|SD_RESOLVED_LLMNR_IPV6)
+#define SD_RESOLVED_MDNS (SD_RESOLVED_MDNS_IPV4|SD_RESOLVED_MDNS_IPV6)
+#define SD_RESOLVED_PROTOCOLS_ALL (SD_RESOLVED_MDNS|SD_RESOLVED_LLMNR|SD_RESOLVED_DNS)
+
+#define SD_RESOLVED_QUERY_TIMEOUT_USEC (120 * USEC_PER_SEC)
diff --git a/src/resolve/resolved-dns-answer.c b/src/resolve/resolved-dns-answer.c
new file mode 100644
index 0000000..5b762a8
--- /dev/null
+++ b/src/resolve/resolved-dns-answer.c
@@ -0,0 +1,771 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "resolved-dns-answer.h"
+#include "resolved-dns-dnssec.h"
+#include "string-util.h"
+
+DnsAnswer *dns_answer_new(size_t n) {
+ DnsAnswer *a;
+
+ if (n > UINT16_MAX) /* We can only place 64K RRs in an answer at max */
+ n = UINT16_MAX;
+
+ a = malloc0(offsetof(DnsAnswer, items) + sizeof(DnsAnswerItem) * n);
+ if (!a)
+ return NULL;
+
+ a->n_ref = 1;
+ a->n_allocated = n;
+
+ return a;
+}
+
+static void dns_answer_flush(DnsAnswer *a) {
+ DnsResourceRecord *rr;
+
+ if (!a)
+ return;
+
+ DNS_ANSWER_FOREACH(rr, a)
+ dns_resource_record_unref(rr);
+
+ a->n_rrs = 0;
+}
+
+static DnsAnswer *dns_answer_free(DnsAnswer *a) {
+ assert(a);
+
+ dns_answer_flush(a);
+ return mfree(a);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsAnswer, dns_answer, dns_answer_free);
+
+static int dns_answer_add_raw(DnsAnswer *a, DnsResourceRecord *rr, int ifindex, DnsAnswerFlags flags) {
+ assert(rr);
+
+ if (!a)
+ return -ENOSPC;
+
+ if (a->n_rrs >= a->n_allocated)
+ return -ENOSPC;
+
+ a->items[a->n_rrs++] = (DnsAnswerItem) {
+ .rr = dns_resource_record_ref(rr),
+ .ifindex = ifindex,
+ .flags = flags,
+ };
+
+ return 1;
+}
+
+static int dns_answer_add_raw_all(DnsAnswer *a, DnsAnswer *source) {
+ DnsResourceRecord *rr;
+ DnsAnswerFlags flags;
+ int ifindex, r;
+
+ DNS_ANSWER_FOREACH_FULL(rr, ifindex, flags, source) {
+ r = dns_answer_add_raw(a, rr, ifindex, flags);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int dns_answer_add(DnsAnswer *a, DnsResourceRecord *rr, int ifindex, DnsAnswerFlags flags) {
+ size_t i;
+ int r;
+
+ assert(rr);
+
+ if (!a)
+ return -ENOSPC;
+ if (a->n_ref > 1)
+ return -EBUSY;
+
+ for (i = 0; i < a->n_rrs; i++) {
+ if (a->items[i].ifindex != ifindex)
+ continue;
+
+ r = dns_resource_key_equal(a->items[i].rr->key, rr->key);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* There's already an RR of the same RRset in place! Let's see if the TTLs more or less
+ * match. We don't really care if they match precisely, but we do care whether one is 0 and
+ * the other is not. See RFC 2181, Section 5.2. */
+ if ((rr->ttl == 0) != (a->items[i].rr->ttl == 0))
+ return -EINVAL;
+
+ r = dns_resource_record_payload_equal(a->items[i].rr, rr);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* Entry already exists, keep the entry with the higher RR. */
+ if (rr->ttl > a->items[i].rr->ttl) {
+ dns_resource_record_ref(rr);
+ dns_resource_record_unref(a->items[i].rr);
+ a->items[i].rr = rr;
+ }
+
+ a->items[i].flags |= flags;
+ return 0;
+ }
+
+ return dns_answer_add_raw(a, rr, ifindex, flags);
+}
+
+static int dns_answer_add_all(DnsAnswer *a, DnsAnswer *b) {
+ DnsResourceRecord *rr;
+ DnsAnswerFlags flags;
+ int ifindex, r;
+
+ DNS_ANSWER_FOREACH_FULL(rr, ifindex, flags, b) {
+ r = dns_answer_add(a, rr, ifindex, flags);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int dns_answer_add_extend(DnsAnswer **a, DnsResourceRecord *rr, int ifindex, DnsAnswerFlags flags) {
+ int r;
+
+ assert(a);
+ assert(rr);
+
+ r = dns_answer_reserve_or_clone(a, 1);
+ if (r < 0)
+ return r;
+
+ return dns_answer_add(*a, rr, ifindex, flags);
+}
+
+int dns_answer_add_soa(DnsAnswer *a, const char *name, uint32_t ttl, int ifindex) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *soa = NULL;
+
+ soa = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_SOA, name);
+ if (!soa)
+ return -ENOMEM;
+
+ soa->ttl = ttl;
+
+ soa->soa.mname = strdup(name);
+ if (!soa->soa.mname)
+ return -ENOMEM;
+
+ soa->soa.rname = strjoin("root.", name);
+ if (!soa->soa.rname)
+ return -ENOMEM;
+
+ soa->soa.serial = 1;
+ soa->soa.refresh = 1;
+ soa->soa.retry = 1;
+ soa->soa.expire = 1;
+ soa->soa.minimum = ttl;
+
+ return dns_answer_add(a, soa, ifindex, DNS_ANSWER_AUTHENTICATED);
+}
+
+int dns_answer_match_key(DnsAnswer *a, const DnsResourceKey *key, DnsAnswerFlags *ret_flags) {
+ DnsAnswerFlags flags = 0, i_flags;
+ DnsResourceRecord *i;
+ bool found = false;
+ int r;
+
+ assert(key);
+
+ DNS_ANSWER_FOREACH_FLAGS(i, i_flags, a) {
+ r = dns_resource_key_match_rr(key, i, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (!ret_flags)
+ return 1;
+
+ if (found)
+ flags &= i_flags;
+ else {
+ flags = i_flags;
+ found = true;
+ }
+ }
+
+ if (ret_flags)
+ *ret_flags = flags;
+
+ return found;
+}
+
+int dns_answer_contains_nsec_or_nsec3(DnsAnswer *a) {
+ DnsResourceRecord *i;
+
+ DNS_ANSWER_FOREACH(i, a) {
+ if (IN_SET(i->key->type, DNS_TYPE_NSEC, DNS_TYPE_NSEC3))
+ return true;
+ }
+
+ return false;
+}
+
+int dns_answer_contains_zone_nsec3(DnsAnswer *answer, const char *zone) {
+ DnsResourceRecord *rr;
+ int r;
+
+ /* Checks whether the specified answer contains at least one NSEC3 RR in the specified zone */
+
+ DNS_ANSWER_FOREACH(rr, answer) {
+ const char *p;
+
+ if (rr->key->type != DNS_TYPE_NSEC3)
+ continue;
+
+ p = dns_resource_key_name(rr->key);
+ r = dns_name_parent(&p);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = dns_name_equal(p, zone);
+ if (r != 0)
+ return r;
+ }
+
+ return false;
+}
+
+int dns_answer_find_soa(DnsAnswer *a, const DnsResourceKey *key, DnsResourceRecord **ret, DnsAnswerFlags *flags) {
+ DnsResourceRecord *rr, *soa = NULL;
+ DnsAnswerFlags rr_flags, soa_flags = 0;
+ int r;
+
+ assert(key);
+
+ /* For a SOA record we can never find a matching SOA record */
+ if (key->type == DNS_TYPE_SOA)
+ return 0;
+
+ DNS_ANSWER_FOREACH_FLAGS(rr, rr_flags, a) {
+ r = dns_resource_key_match_soa(key, rr->key);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+
+ if (soa) {
+ r = dns_name_endswith(dns_resource_key_name(rr->key), dns_resource_key_name(soa->key));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+ }
+
+ soa = rr;
+ soa_flags = rr_flags;
+ }
+ }
+
+ if (!soa)
+ return 0;
+
+ if (ret)
+ *ret = soa;
+ if (flags)
+ *flags = soa_flags;
+
+ return 1;
+}
+
+int dns_answer_find_cname_or_dname(DnsAnswer *a, const DnsResourceKey *key, DnsResourceRecord **ret, DnsAnswerFlags *flags) {
+ DnsResourceRecord *rr;
+ DnsAnswerFlags rr_flags;
+ int r;
+
+ assert(key);
+
+ /* For a {C,D}NAME record we can never find a matching {C,D}NAME record */
+ if (!dns_type_may_redirect(key->type))
+ return 0;
+
+ DNS_ANSWER_FOREACH_FLAGS(rr, rr_flags, a) {
+ r = dns_resource_key_match_cname_or_dname(key, rr->key, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (ret)
+ *ret = rr;
+ if (flags)
+ *flags = rr_flags;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int dns_answer_merge(DnsAnswer *a, DnsAnswer *b, DnsAnswer **ret) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *k = NULL;
+ int r;
+
+ assert(ret);
+
+ if (a == b) {
+ *ret = dns_answer_ref(a);
+ return 0;
+ }
+
+ if (dns_answer_size(a) <= 0) {
+ *ret = dns_answer_ref(b);
+ return 0;
+ }
+
+ if (dns_answer_size(b) <= 0) {
+ *ret = dns_answer_ref(a);
+ return 0;
+ }
+
+ k = dns_answer_new(a->n_rrs + b->n_rrs);
+ if (!k)
+ return -ENOMEM;
+
+ r = dns_answer_add_raw_all(k, a);
+ if (r < 0)
+ return r;
+
+ r = dns_answer_add_all(k, b);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(k);
+
+ return 0;
+}
+
+int dns_answer_extend(DnsAnswer **a, DnsAnswer *b) {
+ DnsAnswer *merged;
+ int r;
+
+ assert(a);
+
+ r = dns_answer_merge(*a, b, &merged);
+ if (r < 0)
+ return r;
+
+ dns_answer_unref(*a);
+ *a = merged;
+
+ return 0;
+}
+
+int dns_answer_remove_by_key(DnsAnswer **a, const DnsResourceKey *key) {
+ bool found = false, other = false;
+ DnsResourceRecord *rr;
+ size_t i;
+ int r;
+
+ assert(a);
+ assert(key);
+
+ /* Remove all entries matching the specified key from *a */
+
+ DNS_ANSWER_FOREACH(rr, *a) {
+ r = dns_resource_key_equal(rr->key, key);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ found = true;
+ else
+ other = true;
+
+ if (found && other)
+ break;
+ }
+
+ if (!found)
+ return 0;
+
+ if (!other) {
+ *a = dns_answer_unref(*a); /* Return NULL for the empty answer */
+ return 1;
+ }
+
+ if ((*a)->n_ref > 1) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *copy = NULL;
+ DnsAnswerFlags flags;
+ int ifindex;
+
+ copy = dns_answer_new((*a)->n_rrs);
+ if (!copy)
+ return -ENOMEM;
+
+ DNS_ANSWER_FOREACH_FULL(rr, ifindex, flags, *a) {
+ r = dns_resource_key_equal(rr->key, key);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ r = dns_answer_add_raw(copy, rr, ifindex, flags);
+ if (r < 0)
+ return r;
+ }
+
+ dns_answer_unref(*a);
+ *a = TAKE_PTR(copy);
+
+ return 1;
+ }
+
+ /* Only a single reference, edit in-place */
+
+ i = 0;
+ for (;;) {
+ if (i >= (*a)->n_rrs)
+ break;
+
+ r = dns_resource_key_equal((*a)->items[i].rr->key, key);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* Kill this entry */
+
+ dns_resource_record_unref((*a)->items[i].rr);
+ memmove((*a)->items + i, (*a)->items + i + 1, sizeof(DnsAnswerItem) * ((*a)->n_rrs - i - 1));
+ (*a)->n_rrs--;
+ continue;
+
+ } else
+ /* Keep this entry */
+ i++;
+ }
+
+ return 1;
+}
+
+int dns_answer_remove_by_rr(DnsAnswer **a, DnsResourceRecord *rm) {
+ bool found = false, other = false;
+ DnsResourceRecord *rr;
+ size_t i;
+ int r;
+
+ assert(a);
+ assert(rm);
+
+ /* Remove all entries matching the specified RR from *a */
+
+ DNS_ANSWER_FOREACH(rr, *a) {
+ r = dns_resource_record_equal(rr, rm);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ found = true;
+ else
+ other = true;
+
+ if (found && other)
+ break;
+ }
+
+ if (!found)
+ return 0;
+
+ if (!other) {
+ *a = dns_answer_unref(*a); /* Return NULL for the empty answer */
+ return 1;
+ }
+
+ if ((*a)->n_ref > 1) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *copy = NULL;
+ DnsAnswerFlags flags;
+ int ifindex;
+
+ copy = dns_answer_new((*a)->n_rrs);
+ if (!copy)
+ return -ENOMEM;
+
+ DNS_ANSWER_FOREACH_FULL(rr, ifindex, flags, *a) {
+ r = dns_resource_record_equal(rr, rm);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ r = dns_answer_add_raw(copy, rr, ifindex, flags);
+ if (r < 0)
+ return r;
+ }
+
+ dns_answer_unref(*a);
+ *a = TAKE_PTR(copy);
+
+ return 1;
+ }
+
+ /* Only a single reference, edit in-place */
+
+ i = 0;
+ for (;;) {
+ if (i >= (*a)->n_rrs)
+ break;
+
+ r = dns_resource_record_equal((*a)->items[i].rr, rm);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* Kill this entry */
+
+ dns_resource_record_unref((*a)->items[i].rr);
+ memmove((*a)->items + i, (*a)->items + i + 1, sizeof(DnsAnswerItem) * ((*a)->n_rrs - i - 1));
+ (*a)->n_rrs--;
+ continue;
+
+ } else
+ /* Keep this entry */
+ i++;
+ }
+
+ return 1;
+}
+
+int dns_answer_copy_by_key(DnsAnswer **a, DnsAnswer *source, const DnsResourceKey *key, DnsAnswerFlags or_flags) {
+ DnsResourceRecord *rr_source;
+ int ifindex_source, r;
+ DnsAnswerFlags flags_source;
+
+ assert(a);
+ assert(key);
+
+ /* Copy all RRs matching the specified key from source into *a */
+
+ DNS_ANSWER_FOREACH_FULL(rr_source, ifindex_source, flags_source, source) {
+
+ r = dns_resource_key_equal(rr_source->key, key);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* Make space for at least one entry */
+ r = dns_answer_reserve_or_clone(a, 1);
+ if (r < 0)
+ return r;
+
+ r = dns_answer_add(*a, rr_source, ifindex_source, flags_source|or_flags);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int dns_answer_move_by_key(DnsAnswer **to, DnsAnswer **from, const DnsResourceKey *key, DnsAnswerFlags or_flags) {
+ int r;
+
+ assert(to);
+ assert(from);
+ assert(key);
+
+ r = dns_answer_copy_by_key(to, *from, key, or_flags);
+ if (r < 0)
+ return r;
+
+ return dns_answer_remove_by_key(from, key);
+}
+
+void dns_answer_order_by_scope(DnsAnswer *a, bool prefer_link_local) {
+ DnsAnswerItem *items;
+ size_t i, start, end;
+
+ if (!a)
+ return;
+
+ if (a->n_rrs <= 1)
+ return;
+
+ start = 0;
+ end = a->n_rrs-1;
+
+ /* RFC 4795, Section 2.6 suggests we should order entries
+ * depending on whether the sender is a link-local address. */
+
+ items = newa(DnsAnswerItem, a->n_rrs);
+ for (i = 0; i < a->n_rrs; i++) {
+
+ if (a->items[i].rr->key->class == DNS_CLASS_IN &&
+ ((a->items[i].rr->key->type == DNS_TYPE_A && in_addr_is_link_local(AF_INET, (union in_addr_union*) &a->items[i].rr->a.in_addr) != prefer_link_local) ||
+ (a->items[i].rr->key->type == DNS_TYPE_AAAA && in_addr_is_link_local(AF_INET6, (union in_addr_union*) &a->items[i].rr->aaaa.in6_addr) != prefer_link_local)))
+ /* Order address records that are not preferred to the end of the array */
+ items[end--] = a->items[i];
+ else
+ /* Order all other records to the beginning of the array */
+ items[start++] = a->items[i];
+ }
+
+ assert(start == end+1);
+ memcpy(a->items, items, sizeof(DnsAnswerItem) * a->n_rrs);
+}
+
+int dns_answer_reserve(DnsAnswer **a, size_t n_free) {
+ DnsAnswer *n;
+
+ assert(a);
+
+ if (n_free <= 0)
+ return 0;
+
+ if (*a) {
+ size_t ns;
+
+ if ((*a)->n_ref > 1)
+ return -EBUSY;
+
+ ns = (*a)->n_rrs + n_free;
+ if (ns > UINT16_MAX) /* Maximum number of RRs we can stick into a DNS packet section */
+ ns = UINT16_MAX;
+
+ if ((*a)->n_allocated >= ns)
+ return 0;
+
+ /* Allocate more than we need */
+ ns *= 2;
+ if (ns > UINT16_MAX)
+ ns = UINT16_MAX;
+
+ n = realloc(*a, offsetof(DnsAnswer, items) + sizeof(DnsAnswerItem) * ns);
+ if (!n)
+ return -ENOMEM;
+
+ n->n_allocated = ns;
+ } else {
+ n = dns_answer_new(n_free);
+ if (!n)
+ return -ENOMEM;
+ }
+
+ *a = n;
+ return 0;
+}
+
+int dns_answer_reserve_or_clone(DnsAnswer **a, size_t n_free) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *n = NULL;
+ int r;
+
+ assert(a);
+
+ /* Tries to extend the DnsAnswer object. And if that's not
+ * possible, since we are not the sole owner, then allocate a
+ * new, appropriately sized one. Either way, after this call
+ * the object will only have a single reference, and has room
+ * for at least the specified number of RRs. */
+
+ r = dns_answer_reserve(a, n_free);
+ if (r != -EBUSY)
+ return r;
+
+ assert(*a);
+
+ n = dns_answer_new(((*a)->n_rrs + n_free) * 2);
+ if (!n)
+ return -ENOMEM;
+
+ r = dns_answer_add_raw_all(n, *a);
+ if (r < 0)
+ return r;
+
+ dns_answer_unref(*a);
+ *a = TAKE_PTR(n);
+
+ return 0;
+}
+
+/*
+ * This function is not used in the code base, but is useful when debugging. Do not delete.
+ */
+void dns_answer_dump(DnsAnswer *answer, FILE *f) {
+ DnsResourceRecord *rr;
+ DnsAnswerFlags flags;
+ int ifindex;
+
+ if (!f)
+ f = stdout;
+
+ DNS_ANSWER_FOREACH_FULL(rr, ifindex, flags, answer) {
+ const char *t;
+
+ fputc('\t', f);
+
+ t = dns_resource_record_to_string(rr);
+ if (!t) {
+ log_oom();
+ continue;
+ }
+
+ fputs(t, f);
+
+ if (ifindex != 0 || flags != 0)
+ fputs("\t;", f);
+
+ if (ifindex != 0)
+ fprintf(f, " ifindex=%i", ifindex);
+ if (flags & DNS_ANSWER_AUTHENTICATED)
+ fputs(" authenticated", f);
+ if (flags & DNS_ANSWER_CACHEABLE)
+ fputs(" cacheable", f);
+ if (flags & DNS_ANSWER_SHARED_OWNER)
+ fputs(" shared-owner", f);
+ if (flags & DNS_ANSWER_CACHE_FLUSH)
+ fputs(" cache-flush", f);
+ if (flags & DNS_ANSWER_GOODBYE)
+ fputs(" goodbye", f);
+
+ fputc('\n', f);
+ }
+}
+
+int dns_answer_has_dname_for_cname(DnsAnswer *a, DnsResourceRecord *cname) {
+ DnsResourceRecord *rr;
+ int r;
+
+ assert(cname);
+
+ /* Checks whether the answer contains a DNAME record that indicates that the specified CNAME record is
+ * synthesized from it */
+
+ if (cname->key->type != DNS_TYPE_CNAME)
+ return 0;
+
+ DNS_ANSWER_FOREACH(rr, a) {
+ _cleanup_free_ char *n = NULL;
+
+ if (rr->key->type != DNS_TYPE_DNAME)
+ continue;
+ if (rr->key->class != cname->key->class)
+ continue;
+
+ r = dns_name_change_suffix(cname->cname.name, rr->dname.name, dns_resource_key_name(rr->key), &n);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = dns_name_equal(n, dns_resource_key_name(cname->key));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/src/resolve/resolved-dns-answer.h b/src/resolve/resolved-dns-answer.h
new file mode 100644
index 0000000..fd94c51
--- /dev/null
+++ b/src/resolve/resolved-dns-answer.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct DnsAnswer DnsAnswer;
+typedef struct DnsAnswerItem DnsAnswerItem;
+
+#include "macro.h"
+#include "resolved-dns-rr.h"
+
+/* A simple array of resource records. We keep track of the
+ * originating ifindex for each RR where that makes sense, so that we
+ * can qualify A and AAAA RRs referring to a local link with the
+ * right ifindex.
+ *
+ * Note that we usually encode the empty DnsAnswer object as a simple NULL. */
+
+typedef enum DnsAnswerFlags {
+ DNS_ANSWER_AUTHENTICATED = 1 << 0, /* Item has been authenticated */
+ DNS_ANSWER_CACHEABLE = 1 << 1, /* Item is subject to caching */
+ DNS_ANSWER_SHARED_OWNER = 1 << 2, /* For mDNS: RRset may be owner by multiple peers */
+ DNS_ANSWER_CACHE_FLUSH = 1 << 3, /* For mDNS: sets cache-flush bit in the rrclass of response records */
+ DNS_ANSWER_GOODBYE = 1 << 4, /* For mDNS: item is subject to disappear */
+} DnsAnswerFlags;
+
+struct DnsAnswerItem {
+ DnsResourceRecord *rr;
+ int ifindex;
+ DnsAnswerFlags flags;
+};
+
+struct DnsAnswer {
+ unsigned n_ref;
+ size_t n_rrs, n_allocated;
+ DnsAnswerItem items[0];
+};
+
+DnsAnswer *dns_answer_new(size_t n);
+DnsAnswer *dns_answer_ref(DnsAnswer *a);
+DnsAnswer *dns_answer_unref(DnsAnswer *a);
+
+int dns_answer_add(DnsAnswer *a, DnsResourceRecord *rr, int ifindex, DnsAnswerFlags flags);
+int dns_answer_add_extend(DnsAnswer **a, DnsResourceRecord *rr, int ifindex, DnsAnswerFlags flags);
+int dns_answer_add_soa(DnsAnswer *a, const char *name, uint32_t ttl, int ifindex);
+
+int dns_answer_match_key(DnsAnswer *a, const DnsResourceKey *key, DnsAnswerFlags *combined_flags);
+int dns_answer_contains_nsec_or_nsec3(DnsAnswer *a);
+int dns_answer_contains_zone_nsec3(DnsAnswer *answer, const char *zone);
+
+int dns_answer_find_soa(DnsAnswer *a, const DnsResourceKey *key, DnsResourceRecord **ret, DnsAnswerFlags *flags);
+int dns_answer_find_cname_or_dname(DnsAnswer *a, const DnsResourceKey *key, DnsResourceRecord **ret, DnsAnswerFlags *flags);
+
+int dns_answer_merge(DnsAnswer *a, DnsAnswer *b, DnsAnswer **ret);
+int dns_answer_extend(DnsAnswer **a, DnsAnswer *b);
+
+void dns_answer_order_by_scope(DnsAnswer *a, bool prefer_link_local);
+
+int dns_answer_reserve(DnsAnswer **a, size_t n_free);
+int dns_answer_reserve_or_clone(DnsAnswer **a, size_t n_free);
+
+int dns_answer_remove_by_key(DnsAnswer **a, const DnsResourceKey *key);
+int dns_answer_remove_by_rr(DnsAnswer **a, DnsResourceRecord *rr);
+
+int dns_answer_copy_by_key(DnsAnswer **a, DnsAnswer *source, const DnsResourceKey *key, DnsAnswerFlags or_flags);
+int dns_answer_move_by_key(DnsAnswer **to, DnsAnswer **from, const DnsResourceKey *key, DnsAnswerFlags or_flags);
+
+int dns_answer_has_dname_for_cname(DnsAnswer *a, DnsResourceRecord *cname);
+
+static inline size_t dns_answer_size(DnsAnswer *a) {
+ return a ? a->n_rrs : 0;
+}
+
+static inline bool dns_answer_isempty(DnsAnswer *a) {
+ return dns_answer_size(a) <= 0;
+}
+
+void dns_answer_dump(DnsAnswer *answer, FILE *f);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsAnswer*, dns_answer_unref);
+
+#define _DNS_ANSWER_FOREACH(q, kk, a) \
+ for (size_t UNIQ_T(i, q) = ({ \
+ (kk) = ((a) && (a)->n_rrs > 0) ? (a)->items[0].rr : NULL; \
+ 0; \
+ }); \
+ (a) && (UNIQ_T(i, q) < (a)->n_rrs); \
+ UNIQ_T(i, q)++, (kk) = (UNIQ_T(i, q) < (a)->n_rrs ? (a)->items[UNIQ_T(i, q)].rr : NULL))
+
+#define DNS_ANSWER_FOREACH(kk, a) _DNS_ANSWER_FOREACH(UNIQ, kk, a)
+
+#define _DNS_ANSWER_FOREACH_IFINDEX(q, kk, ifi, a) \
+ for (size_t UNIQ_T(i, q) = ({ \
+ (kk) = ((a) && (a)->n_rrs > 0) ? (a)->items[0].rr : NULL; \
+ (ifi) = ((a) && (a)->n_rrs > 0) ? (a)->items[0].ifindex : 0; \
+ 0; \
+ }); \
+ (a) && (UNIQ_T(i, q) < (a)->n_rrs); \
+ UNIQ_T(i, q)++, \
+ (kk) = ((UNIQ_T(i, q) < (a)->n_rrs) ? (a)->items[UNIQ_T(i, q)].rr : NULL), \
+ (ifi) = ((UNIQ_T(i, q) < (a)->n_rrs) ? (a)->items[UNIQ_T(i, q)].ifindex : 0))
+
+#define DNS_ANSWER_FOREACH_IFINDEX(kk, ifindex, a) _DNS_ANSWER_FOREACH_IFINDEX(UNIQ, kk, ifindex, a)
+
+#define _DNS_ANSWER_FOREACH_FLAGS(q, kk, fl, a) \
+ for (size_t UNIQ_T(i, q) = ({ \
+ (kk) = ((a) && (a)->n_rrs > 0) ? (a)->items[0].rr : NULL; \
+ (fl) = ((a) && (a)->n_rrs > 0) ? (a)->items[0].flags : 0; \
+ 0; \
+ }); \
+ (a) && (UNIQ_T(i, q) < (a)->n_rrs); \
+ UNIQ_T(i, q)++, \
+ (kk) = ((UNIQ_T(i, q) < (a)->n_rrs) ? (a)->items[UNIQ_T(i, q)].rr : NULL), \
+ (fl) = ((UNIQ_T(i, q) < (a)->n_rrs) ? (a)->items[UNIQ_T(i, q)].flags : 0))
+
+#define DNS_ANSWER_FOREACH_FLAGS(kk, flags, a) _DNS_ANSWER_FOREACH_FLAGS(UNIQ, kk, flags, a)
+
+#define _DNS_ANSWER_FOREACH_FULL(q, kk, ifi, fl, a) \
+ for (size_t UNIQ_T(i, q) = ({ \
+ (kk) = ((a) && (a)->n_rrs > 0) ? (a)->items[0].rr : NULL; \
+ (ifi) = ((a) && (a)->n_rrs > 0) ? (a)->items[0].ifindex : 0; \
+ (fl) = ((a) && (a)->n_rrs > 0) ? (a)->items[0].flags : 0; \
+ 0; \
+ }); \
+ (a) && (UNIQ_T(i, q) < (a)->n_rrs); \
+ UNIQ_T(i, q)++, \
+ (kk) = ((UNIQ_T(i, q) < (a)->n_rrs) ? (a)->items[UNIQ_T(i, q)].rr : NULL), \
+ (ifi) = ((UNIQ_T(i, q) < (a)->n_rrs) ? (a)->items[UNIQ_T(i, q)].ifindex : 0), \
+ (fl) = ((UNIQ_T(i, q) < (a)->n_rrs) ? (a)->items[UNIQ_T(i, q)].flags : 0))
+
+#define DNS_ANSWER_FOREACH_FULL(kk, ifindex, flags, a) _DNS_ANSWER_FOREACH_FULL(UNIQ, kk, ifindex, flags, a)
diff --git a/src/resolve/resolved-dns-cache.c b/src/resolve/resolved-dns-cache.c
new file mode 100644
index 0000000..75f1ccb
--- /dev/null
+++ b/src/resolve/resolved-dns-cache.c
@@ -0,0 +1,1119 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "format-util.h"
+#include "resolved-dns-answer.h"
+#include "resolved-dns-cache.h"
+#include "resolved-dns-packet.h"
+#include "string-util.h"
+
+/* Never cache more than 4K entries. RFC 1536, Section 5 suggests to
+ * leave DNS caches unbounded, but that's crazy. */
+#define CACHE_MAX 4096
+
+/* We never keep any item longer than 2h in our cache */
+#define CACHE_TTL_MAX_USEC (2 * USEC_PER_HOUR)
+
+/* How long to cache strange rcodes, i.e. rcodes != SUCCESS and != NXDOMAIN (specifically: that's only SERVFAIL for
+ * now) */
+#define CACHE_TTL_STRANGE_RCODE_USEC (10 * USEC_PER_SEC)
+
+typedef enum DnsCacheItemType DnsCacheItemType;
+typedef struct DnsCacheItem DnsCacheItem;
+
+enum DnsCacheItemType {
+ DNS_CACHE_POSITIVE,
+ DNS_CACHE_NODATA,
+ DNS_CACHE_NXDOMAIN,
+ DNS_CACHE_RCODE, /* "strange" RCODE (effective only SERVFAIL for now) */
+};
+
+struct DnsCacheItem {
+ DnsCacheItemType type;
+ DnsResourceKey *key;
+ DnsResourceRecord *rr;
+ int rcode;
+
+ usec_t until;
+ bool authenticated:1;
+ bool shared_owner:1;
+
+ int ifindex;
+ int owner_family;
+ union in_addr_union owner_address;
+
+ unsigned prioq_idx;
+ LIST_FIELDS(DnsCacheItem, by_key);
+};
+
+static const char *dns_cache_item_type_to_string(DnsCacheItem *item) {
+ assert(item);
+
+ switch (item->type) {
+
+ case DNS_CACHE_POSITIVE:
+ return "POSITIVE";
+
+ case DNS_CACHE_NODATA:
+ return "NODATA";
+
+ case DNS_CACHE_NXDOMAIN:
+ return "NXDOMAIN";
+
+ case DNS_CACHE_RCODE:
+ return dns_rcode_to_string(item->rcode);
+ }
+
+ return NULL;
+}
+
+static void dns_cache_item_free(DnsCacheItem *i) {
+ if (!i)
+ return;
+
+ dns_resource_record_unref(i->rr);
+ dns_resource_key_unref(i->key);
+ free(i);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsCacheItem*, dns_cache_item_free);
+
+static void dns_cache_item_unlink_and_free(DnsCache *c, DnsCacheItem *i) {
+ DnsCacheItem *first;
+
+ assert(c);
+
+ if (!i)
+ return;
+
+ first = hashmap_get(c->by_key, i->key);
+ LIST_REMOVE(by_key, first, i);
+
+ if (first)
+ assert_se(hashmap_replace(c->by_key, first->key, first) >= 0);
+ else
+ hashmap_remove(c->by_key, i->key);
+
+ prioq_remove(c->by_expiry, i, &i->prioq_idx);
+
+ dns_cache_item_free(i);
+}
+
+static bool dns_cache_remove_by_rr(DnsCache *c, DnsResourceRecord *rr) {
+ DnsCacheItem *first, *i;
+ int r;
+
+ first = hashmap_get(c->by_key, rr->key);
+ LIST_FOREACH(by_key, i, first) {
+ r = dns_resource_record_equal(i->rr, rr);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ dns_cache_item_unlink_and_free(c, i);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool dns_cache_remove_by_key(DnsCache *c, DnsResourceKey *key) {
+ DnsCacheItem *first, *i, *n;
+
+ assert(c);
+ assert(key);
+
+ first = hashmap_remove(c->by_key, key);
+ if (!first)
+ return false;
+
+ LIST_FOREACH_SAFE(by_key, i, n, first) {
+ prioq_remove(c->by_expiry, i, &i->prioq_idx);
+ dns_cache_item_free(i);
+ }
+
+ return true;
+}
+
+void dns_cache_flush(DnsCache *c) {
+ DnsResourceKey *key;
+
+ assert(c);
+
+ while ((key = hashmap_first_key(c->by_key)))
+ dns_cache_remove_by_key(c, key);
+
+ assert(hashmap_size(c->by_key) == 0);
+ assert(prioq_size(c->by_expiry) == 0);
+
+ c->by_key = hashmap_free(c->by_key);
+ c->by_expiry = prioq_free(c->by_expiry);
+}
+
+static void dns_cache_make_space(DnsCache *c, unsigned add) {
+ assert(c);
+
+ if (add <= 0)
+ return;
+
+ /* Makes space for n new entries. Note that we actually allow
+ * the cache to grow beyond CACHE_MAX, but only when we shall
+ * add more RRs to the cache than CACHE_MAX at once. In that
+ * case the cache will be emptied completely otherwise. */
+
+ for (;;) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ DnsCacheItem *i;
+
+ if (prioq_size(c->by_expiry) <= 0)
+ break;
+
+ if (prioq_size(c->by_expiry) + add < CACHE_MAX)
+ break;
+
+ i = prioq_peek(c->by_expiry);
+ assert(i);
+
+ /* Take an extra reference to the key so that it
+ * doesn't go away in the middle of the remove call */
+ key = dns_resource_key_ref(i->key);
+ dns_cache_remove_by_key(c, key);
+ }
+}
+
+void dns_cache_prune(DnsCache *c) {
+ usec_t t = 0;
+
+ assert(c);
+
+ /* Remove all entries that are past their TTL */
+
+ for (;;) {
+ DnsCacheItem *i;
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+
+ i = prioq_peek(c->by_expiry);
+ if (!i)
+ break;
+
+ if (t <= 0)
+ t = now(clock_boottime_or_monotonic());
+
+ if (i->until > t)
+ break;
+
+ /* Depending whether this is an mDNS shared entry
+ * either remove only this one RR or the whole RRset */
+ log_debug("Removing %scache entry for %s (expired "USEC_FMT"s ago)",
+ i->shared_owner ? "shared " : "",
+ dns_resource_key_to_string(i->key, key_str, sizeof key_str),
+ (t - i->until) / USEC_PER_SEC);
+
+ if (i->shared_owner)
+ dns_cache_item_unlink_and_free(c, i);
+ else {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+
+ /* Take an extra reference to the key so that it
+ * doesn't go away in the middle of the remove call */
+ key = dns_resource_key_ref(i->key);
+ dns_cache_remove_by_key(c, key);
+ }
+ }
+}
+
+static int dns_cache_item_prioq_compare_func(const void *a, const void *b) {
+ const DnsCacheItem *x = a, *y = b;
+
+ return CMP(x->until, y->until);
+}
+
+static int dns_cache_init(DnsCache *c) {
+ int r;
+
+ assert(c);
+
+ r = prioq_ensure_allocated(&c->by_expiry, dns_cache_item_prioq_compare_func);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&c->by_key, &dns_resource_key_hash_ops);
+ if (r < 0)
+ return r;
+
+ return r;
+}
+
+static int dns_cache_link_item(DnsCache *c, DnsCacheItem *i) {
+ DnsCacheItem *first;
+ int r;
+
+ assert(c);
+ assert(i);
+
+ r = prioq_put(c->by_expiry, i, &i->prioq_idx);
+ if (r < 0)
+ return r;
+
+ first = hashmap_get(c->by_key, i->key);
+ if (first) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *k = NULL;
+
+ /* Keep a reference to the original key, while we manipulate the list. */
+ k = dns_resource_key_ref(first->key);
+
+ /* Now, try to reduce the number of keys we keep */
+ dns_resource_key_reduce(&first->key, &i->key);
+
+ if (first->rr)
+ dns_resource_key_reduce(&first->rr->key, &i->key);
+ if (i->rr)
+ dns_resource_key_reduce(&i->rr->key, &i->key);
+
+ LIST_PREPEND(by_key, first, i);
+ assert_se(hashmap_replace(c->by_key, first->key, first) >= 0);
+ } else {
+ r = hashmap_put(c->by_key, i->key, i);
+ if (r < 0) {
+ prioq_remove(c->by_expiry, i, &i->prioq_idx);
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static DnsCacheItem* dns_cache_get(DnsCache *c, DnsResourceRecord *rr) {
+ DnsCacheItem *i;
+
+ assert(c);
+ assert(rr);
+
+ LIST_FOREACH(by_key, i, hashmap_get(c->by_key, rr->key))
+ if (i->rr && dns_resource_record_equal(i->rr, rr) > 0)
+ return i;
+
+ return NULL;
+}
+
+static usec_t calculate_until(DnsResourceRecord *rr, uint32_t nsec_ttl, usec_t timestamp, bool use_soa_minimum) {
+ uint32_t ttl;
+ usec_t u;
+
+ assert(rr);
+
+ ttl = MIN(rr->ttl, nsec_ttl);
+ if (rr->key->type == DNS_TYPE_SOA && use_soa_minimum) {
+ /* If this is a SOA RR, and it is requested, clamp to
+ * the SOA's minimum field. This is used when we do
+ * negative caching, to determine the TTL for the
+ * negative caching entry. See RFC 2308, Section
+ * 5. */
+
+ if (ttl > rr->soa.minimum)
+ ttl = rr->soa.minimum;
+ }
+
+ u = ttl * USEC_PER_SEC;
+ if (u > CACHE_TTL_MAX_USEC)
+ u = CACHE_TTL_MAX_USEC;
+
+ if (rr->expiry != USEC_INFINITY) {
+ usec_t left;
+
+ /* Make use of the DNSSEC RRSIG expiry info, if we
+ * have it */
+
+ left = LESS_BY(rr->expiry, now(CLOCK_REALTIME));
+ if (u > left)
+ u = left;
+ }
+
+ return timestamp + u;
+}
+
+static void dns_cache_item_update_positive(
+ DnsCache *c,
+ DnsCacheItem *i,
+ DnsResourceRecord *rr,
+ bool authenticated,
+ bool shared_owner,
+ usec_t timestamp,
+ int ifindex,
+ int owner_family,
+ const union in_addr_union *owner_address) {
+
+ assert(c);
+ assert(i);
+ assert(rr);
+ assert(owner_address);
+
+ i->type = DNS_CACHE_POSITIVE;
+
+ if (!i->by_key_prev)
+ /* We are the first item in the list, we need to
+ * update the key used in the hashmap */
+
+ assert_se(hashmap_replace(c->by_key, rr->key, i) >= 0);
+
+ dns_resource_record_ref(rr);
+ dns_resource_record_unref(i->rr);
+ i->rr = rr;
+
+ dns_resource_key_unref(i->key);
+ i->key = dns_resource_key_ref(rr->key);
+
+ i->until = calculate_until(rr, (uint32_t) -1, timestamp, false);
+ i->authenticated = authenticated;
+ i->shared_owner = shared_owner;
+
+ i->ifindex = ifindex;
+
+ i->owner_family = owner_family;
+ i->owner_address = *owner_address;
+
+ prioq_reshuffle(c->by_expiry, i, &i->prioq_idx);
+}
+
+static int dns_cache_put_positive(
+ DnsCache *c,
+ DnsResourceRecord *rr,
+ bool authenticated,
+ bool shared_owner,
+ usec_t timestamp,
+ int ifindex,
+ int owner_family,
+ const union in_addr_union *owner_address) {
+
+ _cleanup_(dns_cache_item_freep) DnsCacheItem *i = NULL;
+ DnsCacheItem *existing;
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+ int r, k;
+
+ assert(c);
+ assert(rr);
+ assert(owner_address);
+
+ /* Never cache pseudo RRs */
+ if (dns_class_is_pseudo(rr->key->class))
+ return 0;
+ if (dns_type_is_pseudo(rr->key->type))
+ return 0;
+
+ /* New TTL is 0? Delete this specific entry... */
+ if (rr->ttl <= 0) {
+ k = dns_cache_remove_by_rr(c, rr);
+ log_debug("%s: %s",
+ k > 0 ? "Removed zero TTL entry from cache" : "Not caching zero TTL cache entry",
+ dns_resource_key_to_string(rr->key, key_str, sizeof key_str));
+ return 0;
+ }
+
+ /* Entry exists already? Update TTL, timestamp and owner */
+ existing = dns_cache_get(c, rr);
+ if (existing) {
+ dns_cache_item_update_positive(
+ c,
+ existing,
+ rr,
+ authenticated,
+ shared_owner,
+ timestamp,
+ ifindex,
+ owner_family,
+ owner_address);
+ return 0;
+ }
+
+ /* Otherwise, add the new RR */
+ r = dns_cache_init(c);
+ if (r < 0)
+ return r;
+
+ dns_cache_make_space(c, 1);
+
+ i = new(DnsCacheItem, 1);
+ if (!i)
+ return -ENOMEM;
+
+ *i = (DnsCacheItem) {
+ .type = DNS_CACHE_POSITIVE,
+ .key = dns_resource_key_ref(rr->key),
+ .rr = dns_resource_record_ref(rr),
+ .until = calculate_until(rr, (uint32_t) -1, timestamp, false),
+ .authenticated = authenticated,
+ .shared_owner = shared_owner,
+ .ifindex = ifindex,
+ .owner_family = owner_family,
+ .owner_address = *owner_address,
+ .prioq_idx = PRIOQ_IDX_NULL,
+ };
+
+ r = dns_cache_link_item(c, i);
+ if (r < 0)
+ return r;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *t = NULL;
+ char ifname[IF_NAMESIZE + 1];
+
+ (void) in_addr_to_string(i->owner_family, &i->owner_address, &t);
+
+ log_debug("Added positive %s%s cache entry for %s "USEC_FMT"s on %s/%s/%s",
+ i->authenticated ? "authenticated" : "unauthenticated",
+ i->shared_owner ? " shared" : "",
+ dns_resource_key_to_string(i->key, key_str, sizeof key_str),
+ (i->until - timestamp) / USEC_PER_SEC,
+ i->ifindex == 0 ? "*" : strna(format_ifname(i->ifindex, ifname)),
+ af_to_name_short(i->owner_family),
+ strna(t));
+ }
+
+ i = NULL;
+ return 0;
+}
+
+static int dns_cache_put_negative(
+ DnsCache *c,
+ DnsResourceKey *key,
+ int rcode,
+ bool authenticated,
+ uint32_t nsec_ttl,
+ usec_t timestamp,
+ DnsResourceRecord *soa,
+ int owner_family,
+ const union in_addr_union *owner_address) {
+
+ _cleanup_(dns_cache_item_freep) DnsCacheItem *i = NULL;
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+ int r;
+
+ assert(c);
+ assert(key);
+ assert(owner_address);
+
+ /* Never cache pseudo RR keys. DNS_TYPE_ANY is particularly
+ * important to filter out as we use this as a pseudo-type for
+ * NXDOMAIN entries */
+ if (dns_class_is_pseudo(key->class))
+ return 0;
+ if (dns_type_is_pseudo(key->type))
+ return 0;
+
+ if (IN_SET(rcode, DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN)) {
+ if (!soa)
+ return 0;
+
+ /* For negative replies, check if we have a TTL of a SOA */
+ if (nsec_ttl <= 0 || soa->soa.minimum <= 0 || soa->ttl <= 0) {
+ log_debug("Not caching negative entry with zero SOA/NSEC/NSEC3 TTL: %s",
+ dns_resource_key_to_string(key, key_str, sizeof key_str));
+ return 0;
+ }
+ } else if (rcode != DNS_RCODE_SERVFAIL)
+ return 0;
+
+ r = dns_cache_init(c);
+ if (r < 0)
+ return r;
+
+ dns_cache_make_space(c, 1);
+
+ i = new(DnsCacheItem, 1);
+ if (!i)
+ return -ENOMEM;
+
+ *i = (DnsCacheItem) {
+ .type =
+ rcode == DNS_RCODE_SUCCESS ? DNS_CACHE_NODATA :
+ rcode == DNS_RCODE_NXDOMAIN ? DNS_CACHE_NXDOMAIN : DNS_CACHE_RCODE,
+ .authenticated = authenticated,
+ .owner_family = owner_family,
+ .owner_address = *owner_address,
+ .prioq_idx = PRIOQ_IDX_NULL,
+ .rcode = rcode,
+ };
+
+ i->until =
+ i->type == DNS_CACHE_RCODE ? timestamp + CACHE_TTL_STRANGE_RCODE_USEC :
+ calculate_until(soa, nsec_ttl, timestamp, true);
+
+ if (i->type == DNS_CACHE_NXDOMAIN) {
+ /* NXDOMAIN entries should apply equally to all types, so we use ANY as
+ * a pseudo type for this purpose here. */
+ i->key = dns_resource_key_new(key->class, DNS_TYPE_ANY, dns_resource_key_name(key));
+ if (!i->key)
+ return -ENOMEM;
+
+ /* Make sure to remove any previous entry for this
+ * specific ANY key. (For non-ANY keys the cache data
+ * is already cleared by the caller.) Note that we
+ * don't bother removing positive or NODATA cache
+ * items in this case, because it would either be slow
+ * or require explicit indexing by name */
+ dns_cache_remove_by_key(c, key);
+ } else
+ i->key = dns_resource_key_ref(key);
+
+ r = dns_cache_link_item(c, i);
+ if (r < 0)
+ return r;
+
+ log_debug("Added %s cache entry for %s "USEC_FMT"s",
+ dns_cache_item_type_to_string(i),
+ dns_resource_key_to_string(i->key, key_str, sizeof key_str),
+ (i->until - timestamp) / USEC_PER_SEC);
+
+ i = NULL;
+ return 0;
+}
+
+static void dns_cache_remove_previous(
+ DnsCache *c,
+ DnsResourceKey *key,
+ DnsAnswer *answer) {
+
+ DnsResourceRecord *rr;
+ DnsAnswerFlags flags;
+
+ assert(c);
+
+ /* First, if we were passed a key (i.e. on LLMNR/DNS, but
+ * not on mDNS), delete all matching old RRs, so that we only
+ * keep complete by_key in place. */
+ if (key)
+ dns_cache_remove_by_key(c, key);
+
+ /* Second, flush all entries matching the answer, unless this
+ * is an RR that is explicitly marked to be "shared" between
+ * peers (i.e. mDNS RRs without the flush-cache bit set). */
+ DNS_ANSWER_FOREACH_FLAGS(rr, flags, answer) {
+ if ((flags & DNS_ANSWER_CACHEABLE) == 0)
+ continue;
+
+ if (flags & DNS_ANSWER_SHARED_OWNER)
+ continue;
+
+ dns_cache_remove_by_key(c, rr->key);
+ }
+}
+
+static bool rr_eligible(DnsResourceRecord *rr) {
+ assert(rr);
+
+ /* When we see an NSEC/NSEC3 RR, we'll only cache it if it is from the lower zone, not the upper zone, since
+ * that's where the interesting bits are (with exception of DS RRs). Of course, this way we cannot derive DS
+ * existence from any cached NSEC/NSEC3, but that should be fine. */
+
+ switch (rr->key->type) {
+
+ case DNS_TYPE_NSEC:
+ return !bitmap_isset(rr->nsec.types, DNS_TYPE_NS) ||
+ bitmap_isset(rr->nsec.types, DNS_TYPE_SOA);
+
+ case DNS_TYPE_NSEC3:
+ return !bitmap_isset(rr->nsec3.types, DNS_TYPE_NS) ||
+ bitmap_isset(rr->nsec3.types, DNS_TYPE_SOA);
+
+ default:
+ return true;
+ }
+}
+
+int dns_cache_put(
+ DnsCache *c,
+ DnsCacheMode cache_mode,
+ DnsResourceKey *key,
+ int rcode,
+ DnsAnswer *answer,
+ bool authenticated,
+ uint32_t nsec_ttl,
+ usec_t timestamp,
+ int owner_family,
+ const union in_addr_union *owner_address) {
+
+ DnsResourceRecord *soa = NULL, *rr;
+ bool weird_rcode = false;
+ DnsAnswerFlags flags;
+ unsigned cache_keys;
+ int r, ifindex;
+
+ assert(c);
+ assert(owner_address);
+
+ dns_cache_remove_previous(c, key, answer);
+
+ /* We only care for positive replies and NXDOMAINs, on all other replies we will simply flush the respective
+ * entries, and that's it. (Well, with one further exception: since some DNS zones (akamai!) return SERVFAIL
+ * consistently for some lookups, and forwarders tend to propagate that we'll cache that too, but only for a
+ * short time.) */
+
+ if (IN_SET(rcode, DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN)) {
+ if (dns_answer_size(answer) <= 0) {
+ if (key) {
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+
+ log_debug("Not caching negative entry without a SOA record: %s",
+ dns_resource_key_to_string(key, key_str, sizeof key_str));
+ }
+ return 0;
+ }
+
+ } else {
+ /* Only cache SERVFAIL as "weird" rcode for now. We can add more later, should that turn out to be
+ * beneficial. */
+ if (rcode != DNS_RCODE_SERVFAIL)
+ return 0;
+
+ weird_rcode = true;
+ }
+
+ cache_keys = dns_answer_size(answer);
+ if (key)
+ cache_keys++;
+
+ /* Make some space for our new entries */
+ dns_cache_make_space(c, cache_keys);
+
+ if (timestamp <= 0)
+ timestamp = now(clock_boottime_or_monotonic());
+
+ /* Second, add in positive entries for all contained RRs */
+ DNS_ANSWER_FOREACH_FULL(rr, ifindex, flags, answer) {
+ if ((flags & DNS_ANSWER_CACHEABLE) == 0 ||
+ !rr_eligible(rr))
+ continue;
+
+ r = dns_cache_put_positive(
+ c,
+ rr,
+ flags & DNS_ANSWER_AUTHENTICATED,
+ flags & DNS_ANSWER_SHARED_OWNER,
+ timestamp,
+ ifindex,
+ owner_family, owner_address);
+ if (r < 0)
+ goto fail;
+ }
+
+ if (!key) /* mDNS doesn't know negative caching, really */
+ return 0;
+
+ /* Third, add in negative entries if the key has no RR */
+ r = dns_answer_match_key(answer, key, NULL);
+ if (r < 0)
+ goto fail;
+ if (r > 0)
+ return 0;
+
+ /* But not if it has a matching CNAME/DNAME (the negative
+ * caching will be done on the canonical name, not on the
+ * alias) */
+ r = dns_answer_find_cname_or_dname(answer, key, NULL, NULL);
+ if (r < 0)
+ goto fail;
+ if (r > 0)
+ return 0;
+
+ /* See https://tools.ietf.org/html/rfc2308, which say that a matching SOA record in the packet is used to
+ * enable negative caching. We apply one exception though: if we are about to cache a weird rcode we do so
+ * regardless of a SOA. */
+ r = dns_answer_find_soa(answer, key, &soa, &flags);
+ if (r < 0)
+ goto fail;
+ if (r == 0 && !weird_rcode)
+ return 0;
+ if (r > 0) {
+ /* Refuse using the SOA data if it is unsigned, but the key is
+ * signed */
+ if (authenticated && (flags & DNS_ANSWER_AUTHENTICATED) == 0)
+ return 0;
+ }
+
+ if (cache_mode == DNS_CACHE_MODE_NO_NEGATIVE) {
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+ log_debug("Not caching negative entry for: %s, cache mode set to no-negative",
+ dns_resource_key_to_string(key, key_str, sizeof key_str));
+ return 0;
+ }
+
+ r = dns_cache_put_negative(
+ c,
+ key,
+ rcode,
+ authenticated,
+ nsec_ttl,
+ timestamp,
+ soa,
+ owner_family, owner_address);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ /* Adding all RRs failed. Let's clean up what we already
+ * added, just in case */
+
+ if (key)
+ dns_cache_remove_by_key(c, key);
+
+ DNS_ANSWER_FOREACH_FLAGS(rr, flags, answer) {
+ if ((flags & DNS_ANSWER_CACHEABLE) == 0)
+ continue;
+
+ dns_cache_remove_by_key(c, rr->key);
+ }
+
+ return r;
+}
+
+static DnsCacheItem *dns_cache_get_by_key_follow_cname_dname_nsec(DnsCache *c, DnsResourceKey *k) {
+ DnsCacheItem *i;
+ const char *n;
+ int r;
+
+ assert(c);
+ assert(k);
+
+ /* If we hit some OOM error, or suchlike, we don't care too
+ * much, after all this is just a cache */
+
+ i = hashmap_get(c->by_key, k);
+ if (i)
+ return i;
+
+ n = dns_resource_key_name(k);
+
+ /* Check if we have an NXDOMAIN cache item for the name, notice that we use
+ * the pseudo-type ANY for NXDOMAIN cache items. */
+ i = hashmap_get(c->by_key, &DNS_RESOURCE_KEY_CONST(k->class, DNS_TYPE_ANY, n));
+ if (i && i->type == DNS_CACHE_NXDOMAIN)
+ return i;
+
+ if (dns_type_may_redirect(k->type)) {
+ /* Check if we have a CNAME record instead */
+ i = hashmap_get(c->by_key, &DNS_RESOURCE_KEY_CONST(k->class, DNS_TYPE_CNAME, n));
+ if (i && i->type != DNS_CACHE_NODATA)
+ return i;
+
+ /* OK, let's look for cached DNAME records. */
+ for (;;) {
+ if (isempty(n))
+ return NULL;
+
+ i = hashmap_get(c->by_key, &DNS_RESOURCE_KEY_CONST(k->class, DNS_TYPE_DNAME, n));
+ if (i && i->type != DNS_CACHE_NODATA)
+ return i;
+
+ /* Jump one label ahead */
+ r = dns_name_parent(&n);
+ if (r <= 0)
+ return NULL;
+ }
+ }
+
+ if (k->type != DNS_TYPE_NSEC) {
+ /* Check if we have an NSEC record instead for the name. */
+ i = hashmap_get(c->by_key, &DNS_RESOURCE_KEY_CONST(k->class, DNS_TYPE_NSEC, n));
+ if (i)
+ return i;
+ }
+
+ return NULL;
+}
+
+int dns_cache_lookup(DnsCache *c, DnsResourceKey *key, bool clamp_ttl, int *rcode, DnsAnswer **ret, bool *authenticated) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+ unsigned n = 0;
+ int r;
+ bool nxdomain = false;
+ DnsCacheItem *j, *first, *nsec = NULL;
+ bool have_authenticated = false, have_non_authenticated = false;
+ usec_t current;
+ int found_rcode = -1;
+
+ assert(c);
+ assert(key);
+ assert(rcode);
+ assert(ret);
+ assert(authenticated);
+
+ if (key->type == DNS_TYPE_ANY || key->class == DNS_CLASS_ANY) {
+ /* If we have ANY lookups we don't use the cache, so
+ * that the caller refreshes via the network. */
+
+ log_debug("Ignoring cache for ANY lookup: %s",
+ dns_resource_key_to_string(key, key_str, sizeof key_str));
+
+ c->n_miss++;
+
+ *ret = NULL;
+ *rcode = DNS_RCODE_SUCCESS;
+ *authenticated = false;
+
+ return 0;
+ }
+
+ first = dns_cache_get_by_key_follow_cname_dname_nsec(c, key);
+ if (!first) {
+ /* If one question cannot be answered we need to refresh */
+
+ log_debug("Cache miss for %s",
+ dns_resource_key_to_string(key, key_str, sizeof key_str));
+
+ c->n_miss++;
+
+ *ret = NULL;
+ *rcode = DNS_RCODE_SUCCESS;
+ *authenticated = false;
+
+ return 0;
+ }
+
+ LIST_FOREACH(by_key, j, first) {
+ if (j->rr) {
+ if (j->rr->key->type == DNS_TYPE_NSEC)
+ nsec = j;
+
+ n++;
+ } else if (j->type == DNS_CACHE_NXDOMAIN)
+ nxdomain = true;
+ else if (j->type == DNS_CACHE_RCODE)
+ found_rcode = j->rcode;
+
+ if (j->authenticated)
+ have_authenticated = true;
+ else
+ have_non_authenticated = true;
+ }
+
+ if (found_rcode >= 0) {
+ log_debug("RCODE %s cache hit for %s",
+ dns_rcode_to_string(found_rcode),
+ dns_resource_key_to_string(key, key_str, sizeof(key_str)));
+
+ *ret = NULL;
+ *rcode = found_rcode;
+ *authenticated = false;
+
+ c->n_hit++;
+ return 1;
+ }
+
+ if (nsec && !IN_SET(key->type, DNS_TYPE_NSEC, DNS_TYPE_DS)) {
+ /* Note that we won't derive information for DS RRs from an NSEC, because we only cache NSEC RRs from
+ * the lower-zone of a zone cut, but the DS RRs are on the upper zone. */
+
+ log_debug("NSEC NODATA cache hit for %s",
+ dns_resource_key_to_string(key, key_str, sizeof key_str));
+
+ /* We only found an NSEC record that matches our name.
+ * If it says the type doesn't exist report
+ * NODATA. Otherwise report a cache miss. */
+
+ *ret = NULL;
+ *rcode = DNS_RCODE_SUCCESS;
+ *authenticated = nsec->authenticated;
+
+ if (!bitmap_isset(nsec->rr->nsec.types, key->type) &&
+ !bitmap_isset(nsec->rr->nsec.types, DNS_TYPE_CNAME) &&
+ !bitmap_isset(nsec->rr->nsec.types, DNS_TYPE_DNAME)) {
+ c->n_hit++;
+ return 1;
+ }
+
+ c->n_miss++;
+ return 0;
+ }
+
+ log_debug("%s cache hit for %s",
+ n > 0 ? "Positive" :
+ nxdomain ? "NXDOMAIN" : "NODATA",
+ dns_resource_key_to_string(key, key_str, sizeof key_str));
+
+ if (n <= 0) {
+ c->n_hit++;
+
+ *ret = NULL;
+ *rcode = nxdomain ? DNS_RCODE_NXDOMAIN : DNS_RCODE_SUCCESS;
+ *authenticated = have_authenticated && !have_non_authenticated;
+ return 1;
+ }
+
+ answer = dns_answer_new(n);
+ if (!answer)
+ return -ENOMEM;
+
+ if (clamp_ttl)
+ current = now(clock_boottime_or_monotonic());
+
+ LIST_FOREACH(by_key, j, first) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+
+ if (!j->rr)
+ continue;
+
+ if (clamp_ttl) {
+ rr = dns_resource_record_ref(j->rr);
+
+ r = dns_resource_record_clamp_ttl(&rr, LESS_BY(j->until, current) / USEC_PER_SEC);
+ if (r < 0)
+ return r;
+ }
+
+ r = dns_answer_add(answer, rr ?: j->rr, j->ifindex, j->authenticated ? DNS_ANSWER_AUTHENTICATED : 0);
+ if (r < 0)
+ return r;
+ }
+
+ c->n_hit++;
+
+ *ret = answer;
+ *rcode = DNS_RCODE_SUCCESS;
+ *authenticated = have_authenticated && !have_non_authenticated;
+ answer = NULL;
+
+ return n;
+}
+
+int dns_cache_check_conflicts(DnsCache *cache, DnsResourceRecord *rr, int owner_family, const union in_addr_union *owner_address) {
+ DnsCacheItem *i, *first;
+ bool same_owner = true;
+
+ assert(cache);
+ assert(rr);
+
+ dns_cache_prune(cache);
+
+ /* See if there's a cache entry for the same key. If there
+ * isn't there's no conflict */
+ first = hashmap_get(cache->by_key, rr->key);
+ if (!first)
+ return 0;
+
+ /* See if the RR key is owned by the same owner, if so, there
+ * isn't a conflict either */
+ LIST_FOREACH(by_key, i, first) {
+ if (i->owner_family != owner_family ||
+ !in_addr_equal(owner_family, &i->owner_address, owner_address)) {
+ same_owner = false;
+ break;
+ }
+ }
+ if (same_owner)
+ return 0;
+
+ /* See if there's the exact same RR in the cache. If yes, then
+ * there's no conflict. */
+ if (dns_cache_get(cache, rr))
+ return 0;
+
+ /* There's a conflict */
+ return 1;
+}
+
+int dns_cache_export_shared_to_packet(DnsCache *cache, DnsPacket *p) {
+ unsigned ancount = 0;
+ DnsCacheItem *i;
+ int r;
+
+ assert(cache);
+ assert(p);
+
+ HASHMAP_FOREACH(i, cache->by_key) {
+ DnsCacheItem *j;
+
+ LIST_FOREACH(by_key, j, i) {
+ if (!j->rr)
+ continue;
+
+ if (!j->shared_owner)
+ continue;
+
+ r = dns_packet_append_rr(p, j->rr, 0, NULL, NULL);
+ if (r == -EMSGSIZE && p->protocol == DNS_PROTOCOL_MDNS) {
+ /* For mDNS, if we're unable to stuff all known answers into the given packet,
+ * allocate a new one, push the RR into that one and link it to the current one.
+ */
+
+ DNS_PACKET_HEADER(p)->ancount = htobe16(ancount);
+ ancount = 0;
+
+ r = dns_packet_new_query(&p->more, p->protocol, 0, true);
+ if (r < 0)
+ return r;
+
+ /* continue with new packet */
+ p = p->more;
+ r = dns_packet_append_rr(p, j->rr, 0, NULL, NULL);
+ }
+
+ if (r < 0)
+ return r;
+
+ ancount++;
+ }
+ }
+
+ DNS_PACKET_HEADER(p)->ancount = htobe16(ancount);
+
+ return 0;
+}
+
+void dns_cache_dump(DnsCache *cache, FILE *f) {
+ DnsCacheItem *i;
+
+ if (!cache)
+ return;
+
+ if (!f)
+ f = stdout;
+
+ HASHMAP_FOREACH(i, cache->by_key) {
+ DnsCacheItem *j;
+
+ LIST_FOREACH(by_key, j, i) {
+
+ fputc('\t', f);
+
+ if (j->rr) {
+ const char *t;
+ t = dns_resource_record_to_string(j->rr);
+ if (!t) {
+ log_oom();
+ continue;
+ }
+
+ fputs(t, f);
+ fputc('\n', f);
+ } else {
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+
+ fputs(dns_resource_key_to_string(j->key, key_str, sizeof key_str), f);
+ fputs(" -- ", f);
+ fputs(dns_cache_item_type_to_string(j), f);
+ fputc('\n', f);
+ }
+ }
+ }
+}
+
+bool dns_cache_is_empty(DnsCache *cache) {
+ if (!cache)
+ return true;
+
+ return hashmap_isempty(cache->by_key);
+}
+
+unsigned dns_cache_size(DnsCache *cache) {
+ if (!cache)
+ return 0;
+
+ return hashmap_size(cache->by_key);
+}
diff --git a/src/resolve/resolved-dns-cache.h b/src/resolve/resolved-dns-cache.h
new file mode 100644
index 0000000..4ab213d
--- /dev/null
+++ b/src/resolve/resolved-dns-cache.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "hashmap.h"
+#include "list.h"
+#include "prioq.h"
+#include "resolve-util.h"
+#include "time-util.h"
+
+typedef struct DnsCache {
+ Hashmap *by_key;
+ Prioq *by_expiry;
+ unsigned n_hit;
+ unsigned n_miss;
+} DnsCache;
+
+#include "resolved-dns-answer.h"
+#include "resolved-dns-packet.h"
+#include "resolved-dns-question.h"
+#include "resolved-dns-rr.h"
+
+void dns_cache_flush(DnsCache *c);
+void dns_cache_prune(DnsCache *c);
+
+int dns_cache_put(DnsCache *c, DnsCacheMode cache_mode, DnsResourceKey *key, int rcode, DnsAnswer *answer, bool authenticated, uint32_t nsec_ttl, usec_t timestamp, int owner_family, const union in_addr_union *owner_address);
+int dns_cache_lookup(DnsCache *c, DnsResourceKey *key, bool clamp_ttl, int *rcode, DnsAnswer **answer, bool *authenticated);
+
+int dns_cache_check_conflicts(DnsCache *cache, DnsResourceRecord *rr, int owner_family, const union in_addr_union *owner_address);
+
+void dns_cache_dump(DnsCache *cache, FILE *f);
+bool dns_cache_is_empty(DnsCache *cache);
+
+unsigned dns_cache_size(DnsCache *cache);
+
+int dns_cache_export_shared_to_packet(DnsCache *cache, DnsPacket *p);
diff --git a/src/resolve/resolved-dns-dnssec.c b/src/resolve/resolved-dns-dnssec.c
new file mode 100644
index 0000000..2f5776b
--- /dev/null
+++ b/src/resolve/resolved-dns-dnssec.c
@@ -0,0 +1,2263 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "gcrypt-util.h"
+#include "hexdecoct.h"
+#include "memory-util.h"
+#include "resolved-dns-dnssec.h"
+#include "resolved-dns-packet.h"
+#include "sort-util.h"
+#include "string-table.h"
+
+#define VERIFY_RRS_MAX 256
+#define MAX_KEY_SIZE (32*1024)
+
+/* Permit a maximum clock skew of 1h 10min. This should be enough to deal with DST confusion */
+#define SKEW_MAX (1*USEC_PER_HOUR + 10*USEC_PER_MINUTE)
+
+/* Maximum number of NSEC3 iterations we'll do. RFC5155 says 2500 shall be the maximum useful value */
+#define NSEC3_ITERATIONS_MAX 2500
+
+/*
+ * The DNSSEC Chain of trust:
+ *
+ * Normal RRs are protected via RRSIG RRs in combination with DNSKEY RRs, all in the same zone
+ * DNSKEY RRs are either protected like normal RRs, or via a DS from a zone "higher" up the tree
+ * DS RRs are protected like normal RRs
+ *
+ * Example chain:
+ * Normal RR → RRSIG/DNSKEY+ → DS → RRSIG/DNSKEY+ → DS → ... → DS → RRSIG/DNSKEY+ → DS
+ */
+
+uint16_t dnssec_keytag(DnsResourceRecord *dnskey, bool mask_revoke) {
+ const uint8_t *p;
+ uint32_t sum, f;
+ size_t i;
+
+ /* The algorithm from RFC 4034, Appendix B. */
+
+ assert(dnskey);
+ assert(dnskey->key->type == DNS_TYPE_DNSKEY);
+
+ f = (uint32_t) dnskey->dnskey.flags;
+
+ if (mask_revoke)
+ f &= ~DNSKEY_FLAG_REVOKE;
+
+ sum = f + ((((uint32_t) dnskey->dnskey.protocol) << 8) + (uint32_t) dnskey->dnskey.algorithm);
+
+ p = dnskey->dnskey.key;
+
+ for (i = 0; i < dnskey->dnskey.key_size; i++)
+ sum += (i & 1) == 0 ? (uint32_t) p[i] << 8 : (uint32_t) p[i];
+
+ sum += (sum >> 16) & UINT32_C(0xFFFF);
+
+ return sum & UINT32_C(0xFFFF);
+}
+
+#if HAVE_GCRYPT
+
+static int rr_compare(DnsResourceRecord * const *a, DnsResourceRecord * const *b) {
+ const DnsResourceRecord *x = *a, *y = *b;
+ size_t m;
+ int r;
+
+ /* Let's order the RRs according to RFC 4034, Section 6.3 */
+
+ assert(x);
+ assert(x->wire_format);
+ assert(y);
+ assert(y->wire_format);
+
+ m = MIN(DNS_RESOURCE_RECORD_RDATA_SIZE(x), DNS_RESOURCE_RECORD_RDATA_SIZE(y));
+
+ r = memcmp(DNS_RESOURCE_RECORD_RDATA(x), DNS_RESOURCE_RECORD_RDATA(y), m);
+ if (r != 0)
+ return r;
+
+ return CMP(DNS_RESOURCE_RECORD_RDATA_SIZE(x), DNS_RESOURCE_RECORD_RDATA_SIZE(y));
+}
+
+static int dnssec_rsa_verify_raw(
+ const char *hash_algorithm,
+ const void *signature, size_t signature_size,
+ const void *data, size_t data_size,
+ const void *exponent, size_t exponent_size,
+ const void *modulus, size_t modulus_size) {
+
+ gcry_sexp_t public_key_sexp = NULL, data_sexp = NULL, signature_sexp = NULL;
+ gcry_mpi_t n = NULL, e = NULL, s = NULL;
+ gcry_error_t ge;
+ int r;
+
+ assert(hash_algorithm);
+
+ ge = gcry_mpi_scan(&s, GCRYMPI_FMT_USG, signature, signature_size, NULL);
+ if (ge != 0) {
+ r = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_mpi_scan(&e, GCRYMPI_FMT_USG, exponent, exponent_size, NULL);
+ if (ge != 0) {
+ r = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_mpi_scan(&n, GCRYMPI_FMT_USG, modulus, modulus_size, NULL);
+ if (ge != 0) {
+ r = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_sexp_build(&signature_sexp,
+ NULL,
+ "(sig-val (rsa (s %m)))",
+ s);
+
+ if (ge != 0) {
+ r = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_sexp_build(&data_sexp,
+ NULL,
+ "(data (flags pkcs1) (hash %s %b))",
+ hash_algorithm,
+ (int) data_size,
+ data);
+ if (ge != 0) {
+ r = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_sexp_build(&public_key_sexp,
+ NULL,
+ "(public-key (rsa (n %m) (e %m)))",
+ n,
+ e);
+ if (ge != 0) {
+ r = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_pk_verify(signature_sexp, data_sexp, public_key_sexp);
+ if (gpg_err_code(ge) == GPG_ERR_BAD_SIGNATURE)
+ r = 0;
+ else if (ge != 0) {
+ log_debug("RSA signature check failed: %s", gpg_strerror(ge));
+ r = -EIO;
+ } else
+ r = 1;
+
+finish:
+ if (e)
+ gcry_mpi_release(e);
+ if (n)
+ gcry_mpi_release(n);
+ if (s)
+ gcry_mpi_release(s);
+
+ if (public_key_sexp)
+ gcry_sexp_release(public_key_sexp);
+ if (signature_sexp)
+ gcry_sexp_release(signature_sexp);
+ if (data_sexp)
+ gcry_sexp_release(data_sexp);
+
+ return r;
+}
+
+static int dnssec_rsa_verify(
+ const char *hash_algorithm,
+ const void *hash, size_t hash_size,
+ DnsResourceRecord *rrsig,
+ DnsResourceRecord *dnskey) {
+
+ size_t exponent_size, modulus_size;
+ void *exponent, *modulus;
+
+ assert(hash_algorithm);
+ assert(hash);
+ assert(hash_size > 0);
+ assert(rrsig);
+ assert(dnskey);
+
+ if (*(uint8_t*) dnskey->dnskey.key == 0) {
+ /* exponent is > 255 bytes long */
+
+ exponent = (uint8_t*) dnskey->dnskey.key + 3;
+ exponent_size =
+ ((size_t) (((uint8_t*) dnskey->dnskey.key)[1]) << 8) |
+ ((size_t) ((uint8_t*) dnskey->dnskey.key)[2]);
+
+ if (exponent_size < 256)
+ return -EINVAL;
+
+ if (3 + exponent_size >= dnskey->dnskey.key_size)
+ return -EINVAL;
+
+ modulus = (uint8_t*) dnskey->dnskey.key + 3 + exponent_size;
+ modulus_size = dnskey->dnskey.key_size - 3 - exponent_size;
+
+ } else {
+ /* exponent is <= 255 bytes long */
+
+ exponent = (uint8_t*) dnskey->dnskey.key + 1;
+ exponent_size = (size_t) ((uint8_t*) dnskey->dnskey.key)[0];
+
+ if (exponent_size <= 0)
+ return -EINVAL;
+
+ if (1 + exponent_size >= dnskey->dnskey.key_size)
+ return -EINVAL;
+
+ modulus = (uint8_t*) dnskey->dnskey.key + 1 + exponent_size;
+ modulus_size = dnskey->dnskey.key_size - 1 - exponent_size;
+ }
+
+ return dnssec_rsa_verify_raw(
+ hash_algorithm,
+ rrsig->rrsig.signature, rrsig->rrsig.signature_size,
+ hash, hash_size,
+ exponent, exponent_size,
+ modulus, modulus_size);
+}
+
+static int dnssec_ecdsa_verify_raw(
+ const char *hash_algorithm,
+ const char *curve,
+ const void *signature_r, size_t signature_r_size,
+ const void *signature_s, size_t signature_s_size,
+ const void *data, size_t data_size,
+ const void *key, size_t key_size) {
+
+ gcry_sexp_t public_key_sexp = NULL, data_sexp = NULL, signature_sexp = NULL;
+ gcry_mpi_t q = NULL, r = NULL, s = NULL;
+ gcry_error_t ge;
+ int k;
+
+ assert(hash_algorithm);
+
+ ge = gcry_mpi_scan(&r, GCRYMPI_FMT_USG, signature_r, signature_r_size, NULL);
+ if (ge != 0) {
+ k = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_mpi_scan(&s, GCRYMPI_FMT_USG, signature_s, signature_s_size, NULL);
+ if (ge != 0) {
+ k = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_mpi_scan(&q, GCRYMPI_FMT_USG, key, key_size, NULL);
+ if (ge != 0) {
+ k = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_sexp_build(&signature_sexp,
+ NULL,
+ "(sig-val (ecdsa (r %m) (s %m)))",
+ r,
+ s);
+ if (ge != 0) {
+ k = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_sexp_build(&data_sexp,
+ NULL,
+ "(data (flags rfc6979) (hash %s %b))",
+ hash_algorithm,
+ (int) data_size,
+ data);
+ if (ge != 0) {
+ k = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_sexp_build(&public_key_sexp,
+ NULL,
+ "(public-key (ecc (curve %s) (q %m)))",
+ curve,
+ q);
+ if (ge != 0) {
+ k = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_pk_verify(signature_sexp, data_sexp, public_key_sexp);
+ if (gpg_err_code(ge) == GPG_ERR_BAD_SIGNATURE)
+ k = 0;
+ else if (ge != 0) {
+ log_debug("ECDSA signature check failed: %s", gpg_strerror(ge));
+ k = -EIO;
+ } else
+ k = 1;
+finish:
+ if (r)
+ gcry_mpi_release(r);
+ if (s)
+ gcry_mpi_release(s);
+ if (q)
+ gcry_mpi_release(q);
+
+ if (public_key_sexp)
+ gcry_sexp_release(public_key_sexp);
+ if (signature_sexp)
+ gcry_sexp_release(signature_sexp);
+ if (data_sexp)
+ gcry_sexp_release(data_sexp);
+
+ return k;
+}
+
+static int dnssec_ecdsa_verify(
+ const char *hash_algorithm,
+ int algorithm,
+ const void *hash, size_t hash_size,
+ DnsResourceRecord *rrsig,
+ DnsResourceRecord *dnskey) {
+
+ const char *curve;
+ size_t key_size;
+ uint8_t *q;
+
+ assert(hash);
+ assert(hash_size);
+ assert(rrsig);
+ assert(dnskey);
+
+ if (algorithm == DNSSEC_ALGORITHM_ECDSAP256SHA256) {
+ key_size = 32;
+ curve = "NIST P-256";
+ } else if (algorithm == DNSSEC_ALGORITHM_ECDSAP384SHA384) {
+ key_size = 48;
+ curve = "NIST P-384";
+ } else
+ return -EOPNOTSUPP;
+
+ if (dnskey->dnskey.key_size != key_size * 2)
+ return -EINVAL;
+
+ if (rrsig->rrsig.signature_size != key_size * 2)
+ return -EINVAL;
+
+ q = newa(uint8_t, key_size*2 + 1);
+ q[0] = 0x04; /* Prepend 0x04 to indicate an uncompressed key */
+ memcpy(q+1, dnskey->dnskey.key, key_size*2);
+
+ return dnssec_ecdsa_verify_raw(
+ hash_algorithm,
+ curve,
+ rrsig->rrsig.signature, key_size,
+ (uint8_t*) rrsig->rrsig.signature + key_size, key_size,
+ hash, hash_size,
+ q, key_size*2+1);
+}
+
+#if GCRYPT_VERSION_NUMBER >= 0x010600
+static int dnssec_eddsa_verify_raw(
+ const char *curve,
+ const void *signature_r, size_t signature_r_size,
+ const void *signature_s, size_t signature_s_size,
+ const void *data, size_t data_size,
+ const void *key, size_t key_size) {
+
+ gcry_sexp_t public_key_sexp = NULL, data_sexp = NULL, signature_sexp = NULL;
+ gcry_error_t ge;
+ int k;
+
+ ge = gcry_sexp_build(&signature_sexp,
+ NULL,
+ "(sig-val (eddsa (r %b) (s %b)))",
+ (int) signature_r_size,
+ signature_r,
+ (int) signature_s_size,
+ signature_s);
+ if (ge != 0) {
+ k = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_sexp_build(&data_sexp,
+ NULL,
+ "(data (flags eddsa) (hash-algo sha512) (value %b))",
+ (int) data_size,
+ data);
+ if (ge != 0) {
+ k = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_sexp_build(&public_key_sexp,
+ NULL,
+ "(public-key (ecc (curve %s) (flags eddsa) (q %b)))",
+ curve,
+ (int) key_size,
+ key);
+ if (ge != 0) {
+ k = -EIO;
+ goto finish;
+ }
+
+ ge = gcry_pk_verify(signature_sexp, data_sexp, public_key_sexp);
+ if (gpg_err_code(ge) == GPG_ERR_BAD_SIGNATURE)
+ k = 0;
+ else if (ge != 0) {
+ log_debug("EdDSA signature check failed: %s", gpg_strerror(ge));
+ k = -EIO;
+ } else
+ k = 1;
+finish:
+ if (public_key_sexp)
+ gcry_sexp_release(public_key_sexp);
+ if (signature_sexp)
+ gcry_sexp_release(signature_sexp);
+ if (data_sexp)
+ gcry_sexp_release(data_sexp);
+
+ return k;
+}
+
+static int dnssec_eddsa_verify(
+ int algorithm,
+ const void *data, size_t data_size,
+ DnsResourceRecord *rrsig,
+ DnsResourceRecord *dnskey) {
+ const char *curve;
+ size_t key_size;
+
+ if (algorithm == DNSSEC_ALGORITHM_ED25519) {
+ curve = "Ed25519";
+ key_size = 32;
+ } else
+ return -EOPNOTSUPP;
+
+ if (dnskey->dnskey.key_size != key_size)
+ return -EINVAL;
+
+ if (rrsig->rrsig.signature_size != key_size * 2)
+ return -EINVAL;
+
+ return dnssec_eddsa_verify_raw(
+ curve,
+ rrsig->rrsig.signature, key_size,
+ (uint8_t*) rrsig->rrsig.signature + key_size, key_size,
+ data, data_size,
+ dnskey->dnskey.key, key_size);
+}
+#endif
+
+static void md_add_uint8(gcry_md_hd_t md, uint8_t v) {
+ gcry_md_write(md, &v, sizeof(v));
+}
+
+static void md_add_uint16(gcry_md_hd_t md, uint16_t v) {
+ v = htobe16(v);
+ gcry_md_write(md, &v, sizeof(v));
+}
+
+static void fwrite_uint8(FILE *fp, uint8_t v) {
+ fwrite(&v, sizeof(v), 1, fp);
+}
+
+static void fwrite_uint16(FILE *fp, uint16_t v) {
+ v = htobe16(v);
+ fwrite(&v, sizeof(v), 1, fp);
+}
+
+static void fwrite_uint32(FILE *fp, uint32_t v) {
+ v = htobe32(v);
+ fwrite(&v, sizeof(v), 1, fp);
+}
+
+static int dnssec_rrsig_prepare(DnsResourceRecord *rrsig) {
+ int n_key_labels, n_signer_labels;
+ const char *name;
+ int r;
+
+ /* Checks whether the specified RRSIG RR is somewhat valid, and initializes the .n_skip_labels_source and
+ * .n_skip_labels_signer fields so that we can use them later on. */
+
+ assert(rrsig);
+ assert(rrsig->key->type == DNS_TYPE_RRSIG);
+
+ /* Check if this RRSIG RR is already prepared */
+ if (rrsig->n_skip_labels_source != (unsigned) -1)
+ return 0;
+
+ if (rrsig->rrsig.inception > rrsig->rrsig.expiration)
+ return -EINVAL;
+
+ name = dns_resource_key_name(rrsig->key);
+
+ n_key_labels = dns_name_count_labels(name);
+ if (n_key_labels < 0)
+ return n_key_labels;
+ if (rrsig->rrsig.labels > n_key_labels)
+ return -EINVAL;
+
+ n_signer_labels = dns_name_count_labels(rrsig->rrsig.signer);
+ if (n_signer_labels < 0)
+ return n_signer_labels;
+ if (n_signer_labels > rrsig->rrsig.labels)
+ return -EINVAL;
+
+ r = dns_name_skip(name, n_key_labels - n_signer_labels, &name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ /* Check if the signer is really a suffix of us */
+ r = dns_name_equal(name, rrsig->rrsig.signer);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ rrsig->n_skip_labels_source = n_key_labels - rrsig->rrsig.labels;
+ rrsig->n_skip_labels_signer = n_key_labels - n_signer_labels;
+
+ return 0;
+}
+
+static int dnssec_rrsig_expired(DnsResourceRecord *rrsig, usec_t realtime) {
+ usec_t expiration, inception, skew;
+
+ assert(rrsig);
+ assert(rrsig->key->type == DNS_TYPE_RRSIG);
+
+ if (realtime == USEC_INFINITY)
+ realtime = now(CLOCK_REALTIME);
+
+ expiration = rrsig->rrsig.expiration * USEC_PER_SEC;
+ inception = rrsig->rrsig.inception * USEC_PER_SEC;
+
+ /* Consider inverted validity intervals as expired */
+ if (inception > expiration)
+ return true;
+
+ /* Permit a certain amount of clock skew of 10% of the valid
+ * time range. This takes inspiration from unbound's
+ * resolver. */
+ skew = (expiration - inception) / 10;
+ if (skew > SKEW_MAX)
+ skew = SKEW_MAX;
+
+ if (inception < skew)
+ inception = 0;
+ else
+ inception -= skew;
+
+ if (expiration + skew < expiration)
+ expiration = USEC_INFINITY;
+ else
+ expiration += skew;
+
+ return realtime < inception || realtime > expiration;
+}
+
+static int algorithm_to_gcrypt_md(uint8_t algorithm) {
+
+ /* Translates a DNSSEC signature algorithm into a gcrypt
+ * digest identifier.
+ *
+ * Note that we implement all algorithms listed as "Must
+ * implement" and "Recommended to Implement" in RFC6944. We
+ * don't implement any algorithms that are listed as
+ * "Optional" or "Must Not Implement". Specifically, we do not
+ * implement RSAMD5, DSASHA1, DH, DSA-NSEC3-SHA1, and
+ * GOST-ECC. */
+
+ switch (algorithm) {
+
+ case DNSSEC_ALGORITHM_RSASHA1:
+ case DNSSEC_ALGORITHM_RSASHA1_NSEC3_SHA1:
+ return GCRY_MD_SHA1;
+
+ case DNSSEC_ALGORITHM_RSASHA256:
+ case DNSSEC_ALGORITHM_ECDSAP256SHA256:
+ return GCRY_MD_SHA256;
+
+ case DNSSEC_ALGORITHM_ECDSAP384SHA384:
+ return GCRY_MD_SHA384;
+
+ case DNSSEC_ALGORITHM_RSASHA512:
+ return GCRY_MD_SHA512;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void dnssec_fix_rrset_ttl(
+ DnsResourceRecord *list[],
+ unsigned n,
+ DnsResourceRecord *rrsig,
+ usec_t realtime) {
+
+ unsigned k;
+
+ assert(list);
+ assert(n > 0);
+ assert(rrsig);
+
+ for (k = 0; k < n; k++) {
+ DnsResourceRecord *rr = list[k];
+
+ /* Pick the TTL as the minimum of the RR's TTL, the
+ * RR's original TTL according to the RRSIG and the
+ * RRSIG's own TTL, see RFC 4035, Section 5.3.3 */
+ rr->ttl = MIN3(rr->ttl, rrsig->rrsig.original_ttl, rrsig->ttl);
+ rr->expiry = rrsig->rrsig.expiration * USEC_PER_SEC;
+
+ /* Copy over information about the signer and wildcard source of synthesis */
+ rr->n_skip_labels_source = rrsig->n_skip_labels_source;
+ rr->n_skip_labels_signer = rrsig->n_skip_labels_signer;
+ }
+
+ rrsig->expiry = rrsig->rrsig.expiration * USEC_PER_SEC;
+}
+
+int dnssec_verify_rrset(
+ DnsAnswer *a,
+ const DnsResourceKey *key,
+ DnsResourceRecord *rrsig,
+ DnsResourceRecord *dnskey,
+ usec_t realtime,
+ DnssecResult *result) {
+
+ uint8_t wire_format_name[DNS_WIRE_FORMAT_HOSTNAME_MAX];
+ DnsResourceRecord **list, *rr;
+ const char *source, *name;
+ _cleanup_(gcry_md_closep) gcry_md_hd_t md = NULL;
+ int r, md_algorithm;
+ size_t k, n = 0;
+ size_t sig_size = 0;
+ _cleanup_free_ char *sig_data = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ size_t hash_size;
+ void *hash;
+ bool wildcard;
+
+ assert(key);
+ assert(rrsig);
+ assert(dnskey);
+ assert(result);
+ assert(rrsig->key->type == DNS_TYPE_RRSIG);
+ assert(dnskey->key->type == DNS_TYPE_DNSKEY);
+
+ /* Verifies that the RRSet matches the specified "key" in "a",
+ * using the signature "rrsig" and the key "dnskey". It's
+ * assumed that RRSIG and DNSKEY match. */
+
+ r = dnssec_rrsig_prepare(rrsig);
+ if (r == -EINVAL) {
+ *result = DNSSEC_INVALID;
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ r = dnssec_rrsig_expired(rrsig, realtime);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ *result = DNSSEC_SIGNATURE_EXPIRED;
+ return 0;
+ }
+
+ name = dns_resource_key_name(key);
+
+ /* Some keys may only appear signed in the zone apex, and are invalid anywhere else. (SOA, NS...) */
+ if (dns_type_apex_only(rrsig->rrsig.type_covered)) {
+ r = dns_name_equal(rrsig->rrsig.signer, name);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ *result = DNSSEC_INVALID;
+ return 0;
+ }
+ }
+
+ /* OTOH DS RRs may not appear in the zone apex, but are valid everywhere else. */
+ if (rrsig->rrsig.type_covered == DNS_TYPE_DS) {
+ r = dns_name_equal(rrsig->rrsig.signer, name);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ *result = DNSSEC_INVALID;
+ return 0;
+ }
+ }
+
+ /* Determine the "Source of Synthesis" and whether this is a wildcard RRSIG */
+ r = dns_name_suffix(name, rrsig->rrsig.labels, &source);
+ if (r < 0)
+ return r;
+ if (r > 0 && !dns_type_may_wildcard(rrsig->rrsig.type_covered)) {
+ /* We refuse to validate NSEC3 or SOA RRs that are synthesized from wildcards */
+ *result = DNSSEC_INVALID;
+ return 0;
+ }
+ if (r == 1) {
+ /* If we stripped a single label, then let's see if that maybe was "*". If so, we are not really
+ * synthesized from a wildcard, we are the wildcard itself. Treat that like a normal name. */
+ r = dns_name_startswith(name, "*");
+ if (r < 0)
+ return r;
+ if (r > 0)
+ source = name;
+
+ wildcard = r == 0;
+ } else
+ wildcard = r > 0;
+
+ /* Collect all relevant RRs in a single array, so that we can look at the RRset */
+ list = newa(DnsResourceRecord *, dns_answer_size(a));
+
+ DNS_ANSWER_FOREACH(rr, a) {
+ r = dns_resource_key_equal(key, rr->key);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* We need the wire format for ordering, and digest calculation */
+ r = dns_resource_record_to_wire_format(rr, true);
+ if (r < 0)
+ return r;
+
+ list[n++] = rr;
+
+ if (n > VERIFY_RRS_MAX)
+ return -E2BIG;
+ }
+
+ if (n <= 0)
+ return -ENODATA;
+
+ /* Bring the RRs into canonical order */
+ typesafe_qsort(list, n, rr_compare);
+
+ f = open_memstream_unlocked(&sig_data, &sig_size);
+ if (!f)
+ return -ENOMEM;
+
+ fwrite_uint16(f, rrsig->rrsig.type_covered);
+ fwrite_uint8(f, rrsig->rrsig.algorithm);
+ fwrite_uint8(f, rrsig->rrsig.labels);
+ fwrite_uint32(f, rrsig->rrsig.original_ttl);
+ fwrite_uint32(f, rrsig->rrsig.expiration);
+ fwrite_uint32(f, rrsig->rrsig.inception);
+ fwrite_uint16(f, rrsig->rrsig.key_tag);
+
+ r = dns_name_to_wire_format(rrsig->rrsig.signer, wire_format_name, sizeof(wire_format_name), true);
+ if (r < 0)
+ return r;
+ fwrite(wire_format_name, 1, r, f);
+
+ /* Convert the source of synthesis into wire format */
+ r = dns_name_to_wire_format(source, wire_format_name, sizeof(wire_format_name), true);
+ if (r < 0)
+ return r;
+
+ for (k = 0; k < n; k++) {
+ size_t l;
+
+ rr = list[k];
+
+ /* Hash the source of synthesis. If this is a wildcard, then prefix it with the *. label */
+ if (wildcard)
+ fwrite((uint8_t[]) { 1, '*'}, sizeof(uint8_t), 2, f);
+ fwrite(wire_format_name, 1, r, f);
+
+ fwrite_uint16(f, rr->key->type);
+ fwrite_uint16(f, rr->key->class);
+ fwrite_uint32(f, rrsig->rrsig.original_ttl);
+
+ l = DNS_RESOURCE_RECORD_RDATA_SIZE(rr);
+ assert(l <= 0xFFFF);
+
+ fwrite_uint16(f, (uint16_t) l);
+ fwrite(DNS_RESOURCE_RECORD_RDATA(rr), 1, l, f);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ initialize_libgcrypt(false);
+
+ switch (rrsig->rrsig.algorithm) {
+#if GCRYPT_VERSION_NUMBER >= 0x010600
+ case DNSSEC_ALGORITHM_ED25519:
+ break;
+#else
+ case DNSSEC_ALGORITHM_ED25519:
+#endif
+ case DNSSEC_ALGORITHM_ED448:
+ *result = DNSSEC_UNSUPPORTED_ALGORITHM;
+ return 0;
+ default:
+ /* OK, the RRs are now in canonical order. Let's calculate the digest */
+ md_algorithm = algorithm_to_gcrypt_md(rrsig->rrsig.algorithm);
+ if (md_algorithm == -EOPNOTSUPP) {
+ *result = DNSSEC_UNSUPPORTED_ALGORITHM;
+ return 0;
+ }
+ if (md_algorithm < 0)
+ return md_algorithm;
+
+ gcry_md_open(&md, md_algorithm, 0);
+ if (!md)
+ return -EIO;
+
+ hash_size = gcry_md_get_algo_dlen(md_algorithm);
+ assert(hash_size > 0);
+
+ gcry_md_write(md, sig_data, sig_size);
+
+ hash = gcry_md_read(md, 0);
+ if (!hash)
+ return -EIO;
+ }
+
+ switch (rrsig->rrsig.algorithm) {
+
+ case DNSSEC_ALGORITHM_RSASHA1:
+ case DNSSEC_ALGORITHM_RSASHA1_NSEC3_SHA1:
+ case DNSSEC_ALGORITHM_RSASHA256:
+ case DNSSEC_ALGORITHM_RSASHA512:
+ r = dnssec_rsa_verify(
+ gcry_md_algo_name(md_algorithm),
+ hash, hash_size,
+ rrsig,
+ dnskey);
+ break;
+
+ case DNSSEC_ALGORITHM_ECDSAP256SHA256:
+ case DNSSEC_ALGORITHM_ECDSAP384SHA384:
+ r = dnssec_ecdsa_verify(
+ gcry_md_algo_name(md_algorithm),
+ rrsig->rrsig.algorithm,
+ hash, hash_size,
+ rrsig,
+ dnskey);
+ break;
+#if GCRYPT_VERSION_NUMBER >= 0x010600
+ case DNSSEC_ALGORITHM_ED25519:
+ r = dnssec_eddsa_verify(
+ rrsig->rrsig.algorithm,
+ sig_data, sig_size,
+ rrsig,
+ dnskey);
+ break;
+#endif
+ }
+ if (r < 0)
+ return r;
+
+ /* Now, fix the ttl, expiry, and remember the synthesizing source and the signer */
+ if (r > 0)
+ dnssec_fix_rrset_ttl(list, n, rrsig, realtime);
+
+ if (r == 0)
+ *result = DNSSEC_INVALID;
+ else if (wildcard)
+ *result = DNSSEC_VALIDATED_WILDCARD;
+ else
+ *result = DNSSEC_VALIDATED;
+
+ return 0;
+}
+
+int dnssec_rrsig_match_dnskey(DnsResourceRecord *rrsig, DnsResourceRecord *dnskey, bool revoked_ok) {
+
+ assert(rrsig);
+ assert(dnskey);
+
+ /* Checks if the specified DNSKEY RR matches the key used for
+ * the signature in the specified RRSIG RR */
+
+ if (rrsig->key->type != DNS_TYPE_RRSIG)
+ return -EINVAL;
+
+ if (dnskey->key->type != DNS_TYPE_DNSKEY)
+ return 0;
+ if (dnskey->key->class != rrsig->key->class)
+ return 0;
+ if ((dnskey->dnskey.flags & DNSKEY_FLAG_ZONE_KEY) == 0)
+ return 0;
+ if (!revoked_ok && (dnskey->dnskey.flags & DNSKEY_FLAG_REVOKE))
+ return 0;
+ if (dnskey->dnskey.protocol != 3)
+ return 0;
+ if (dnskey->dnskey.algorithm != rrsig->rrsig.algorithm)
+ return 0;
+
+ if (dnssec_keytag(dnskey, false) != rrsig->rrsig.key_tag)
+ return 0;
+
+ return dns_name_equal(dns_resource_key_name(dnskey->key), rrsig->rrsig.signer);
+}
+
+int dnssec_key_match_rrsig(const DnsResourceKey *key, DnsResourceRecord *rrsig) {
+ assert(key);
+ assert(rrsig);
+
+ /* Checks if the specified RRSIG RR protects the RRSet of the specified RR key. */
+
+ if (rrsig->key->type != DNS_TYPE_RRSIG)
+ return 0;
+ if (rrsig->key->class != key->class)
+ return 0;
+ if (rrsig->rrsig.type_covered != key->type)
+ return 0;
+
+ return dns_name_equal(dns_resource_key_name(rrsig->key), dns_resource_key_name(key));
+}
+
+int dnssec_verify_rrset_search(
+ DnsAnswer *a,
+ const DnsResourceKey *key,
+ DnsAnswer *validated_dnskeys,
+ usec_t realtime,
+ DnssecResult *result,
+ DnsResourceRecord **ret_rrsig) {
+
+ bool found_rrsig = false, found_invalid = false, found_expired_rrsig = false, found_unsupported_algorithm = false;
+ DnsResourceRecord *rrsig;
+ int r;
+
+ assert(key);
+ assert(result);
+
+ /* Verifies all RRs from "a" that match the key "key" against DNSKEYs in "validated_dnskeys" */
+
+ if (!a || a->n_rrs <= 0)
+ return -ENODATA;
+
+ /* Iterate through each RRSIG RR. */
+ DNS_ANSWER_FOREACH(rrsig, a) {
+ DnsResourceRecord *dnskey;
+ DnsAnswerFlags flags;
+
+ /* Is this an RRSIG RR that applies to RRs matching our key? */
+ r = dnssec_key_match_rrsig(key, rrsig);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ found_rrsig = true;
+
+ /* Look for a matching key */
+ DNS_ANSWER_FOREACH_FLAGS(dnskey, flags, validated_dnskeys) {
+ DnssecResult one_result;
+
+ if ((flags & DNS_ANSWER_AUTHENTICATED) == 0)
+ continue;
+
+ /* Is this a DNSKEY RR that matches they key of our RRSIG? */
+ r = dnssec_rrsig_match_dnskey(rrsig, dnskey, false);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* Take the time here, if it isn't set yet, so
+ * that we do all validations with the same
+ * time. */
+ if (realtime == USEC_INFINITY)
+ realtime = now(CLOCK_REALTIME);
+
+ /* Yay, we found a matching RRSIG with a matching
+ * DNSKEY, awesome. Now let's verify all entries of
+ * the RRSet against the RRSIG and DNSKEY
+ * combination. */
+
+ r = dnssec_verify_rrset(a, key, rrsig, dnskey, realtime, &one_result);
+ if (r < 0)
+ return r;
+
+ switch (one_result) {
+
+ case DNSSEC_VALIDATED:
+ case DNSSEC_VALIDATED_WILDCARD:
+ /* Yay, the RR has been validated,
+ * return immediately, but fix up the expiry */
+ if (ret_rrsig)
+ *ret_rrsig = rrsig;
+
+ *result = one_result;
+ return 0;
+
+ case DNSSEC_INVALID:
+ /* If the signature is invalid, let's try another
+ key and/or signature. After all they
+ key_tags and stuff are not unique, and
+ might be shared by multiple keys. */
+ found_invalid = true;
+ continue;
+
+ case DNSSEC_UNSUPPORTED_ALGORITHM:
+ /* If the key algorithm is
+ unsupported, try another
+ RRSIG/DNSKEY pair, but remember we
+ encountered this, so that we can
+ return a proper error when we
+ encounter nothing better. */
+ found_unsupported_algorithm = true;
+ continue;
+
+ case DNSSEC_SIGNATURE_EXPIRED:
+ /* If the signature is expired, try
+ another one, but remember it, so
+ that we can return this */
+ found_expired_rrsig = true;
+ continue;
+
+ default:
+ assert_not_reached("Unexpected DNSSEC validation result");
+ }
+ }
+ }
+
+ if (found_expired_rrsig)
+ *result = DNSSEC_SIGNATURE_EXPIRED;
+ else if (found_unsupported_algorithm)
+ *result = DNSSEC_UNSUPPORTED_ALGORITHM;
+ else if (found_invalid)
+ *result = DNSSEC_INVALID;
+ else if (found_rrsig)
+ *result = DNSSEC_MISSING_KEY;
+ else
+ *result = DNSSEC_NO_SIGNATURE;
+
+ if (ret_rrsig)
+ *ret_rrsig = NULL;
+
+ return 0;
+}
+
+int dnssec_has_rrsig(DnsAnswer *a, const DnsResourceKey *key) {
+ DnsResourceRecord *rr;
+ int r;
+
+ /* Checks whether there's at least one RRSIG in 'a' that protects RRs of the specified key */
+
+ DNS_ANSWER_FOREACH(rr, a) {
+ r = dnssec_key_match_rrsig(key, rr);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+static int digest_to_gcrypt_md(uint8_t algorithm) {
+
+ /* Translates a DNSSEC digest algorithm into a gcrypt digest identifier */
+
+ switch (algorithm) {
+
+ case DNSSEC_DIGEST_SHA1:
+ return GCRY_MD_SHA1;
+
+ case DNSSEC_DIGEST_SHA256:
+ return GCRY_MD_SHA256;
+
+ case DNSSEC_DIGEST_SHA384:
+ return GCRY_MD_SHA384;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int dnssec_verify_dnskey_by_ds(DnsResourceRecord *dnskey, DnsResourceRecord *ds, bool mask_revoke) {
+ uint8_t wire_format[DNS_WIRE_FORMAT_HOSTNAME_MAX];
+ _cleanup_(gcry_md_closep) gcry_md_hd_t md = NULL;
+ size_t hash_size;
+ int md_algorithm, r;
+ void *result;
+
+ assert(dnskey);
+ assert(ds);
+
+ /* Implements DNSKEY verification by a DS, according to RFC 4035, section 5.2 */
+
+ if (dnskey->key->type != DNS_TYPE_DNSKEY)
+ return -EINVAL;
+ if (ds->key->type != DNS_TYPE_DS)
+ return -EINVAL;
+ if ((dnskey->dnskey.flags & DNSKEY_FLAG_ZONE_KEY) == 0)
+ return -EKEYREJECTED;
+ if (!mask_revoke && (dnskey->dnskey.flags & DNSKEY_FLAG_REVOKE))
+ return -EKEYREJECTED;
+ if (dnskey->dnskey.protocol != 3)
+ return -EKEYREJECTED;
+
+ if (dnskey->dnskey.algorithm != ds->ds.algorithm)
+ return 0;
+ if (dnssec_keytag(dnskey, mask_revoke) != ds->ds.key_tag)
+ return 0;
+
+ initialize_libgcrypt(false);
+
+ md_algorithm = digest_to_gcrypt_md(ds->ds.digest_type);
+ if (md_algorithm < 0)
+ return md_algorithm;
+
+ hash_size = gcry_md_get_algo_dlen(md_algorithm);
+ assert(hash_size > 0);
+
+ if (ds->ds.digest_size != hash_size)
+ return 0;
+
+ r = dns_name_to_wire_format(dns_resource_key_name(dnskey->key), wire_format, sizeof(wire_format), true);
+ if (r < 0)
+ return r;
+
+ gcry_md_open(&md, md_algorithm, 0);
+ if (!md)
+ return -EIO;
+
+ gcry_md_write(md, wire_format, r);
+ if (mask_revoke)
+ md_add_uint16(md, dnskey->dnskey.flags & ~DNSKEY_FLAG_REVOKE);
+ else
+ md_add_uint16(md, dnskey->dnskey.flags);
+ md_add_uint8(md, dnskey->dnskey.protocol);
+ md_add_uint8(md, dnskey->dnskey.algorithm);
+ gcry_md_write(md, dnskey->dnskey.key, dnskey->dnskey.key_size);
+
+ result = gcry_md_read(md, 0);
+ if (!result)
+ return -EIO;
+
+ return memcmp(result, ds->ds.digest, ds->ds.digest_size) == 0;
+}
+
+int dnssec_verify_dnskey_by_ds_search(DnsResourceRecord *dnskey, DnsAnswer *validated_ds) {
+ DnsResourceRecord *ds;
+ DnsAnswerFlags flags;
+ int r;
+
+ assert(dnskey);
+
+ if (dnskey->key->type != DNS_TYPE_DNSKEY)
+ return 0;
+
+ DNS_ANSWER_FOREACH_FLAGS(ds, flags, validated_ds) {
+
+ if ((flags & DNS_ANSWER_AUTHENTICATED) == 0)
+ continue;
+
+ if (ds->key->type != DNS_TYPE_DS)
+ continue;
+ if (ds->key->class != dnskey->key->class)
+ continue;
+
+ r = dns_name_equal(dns_resource_key_name(dnskey->key), dns_resource_key_name(ds->key));
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = dnssec_verify_dnskey_by_ds(dnskey, ds, false);
+ if (IN_SET(r, -EKEYREJECTED, -EOPNOTSUPP))
+ return 0; /* The DNSKEY is revoked or otherwise invalid, or we don't support the digest algorithm */
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+static int nsec3_hash_to_gcrypt_md(uint8_t algorithm) {
+
+ /* Translates a DNSSEC NSEC3 hash algorithm into a gcrypt digest identifier */
+
+ switch (algorithm) {
+
+ case NSEC3_ALGORITHM_SHA1:
+ return GCRY_MD_SHA1;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int dnssec_nsec3_hash(DnsResourceRecord *nsec3, const char *name, void *ret) {
+ uint8_t wire_format[DNS_WIRE_FORMAT_HOSTNAME_MAX];
+ gcry_md_hd_t md = NULL;
+ size_t hash_size;
+ int algorithm;
+ void *result;
+ unsigned k;
+ int r;
+
+ assert(nsec3);
+ assert(name);
+ assert(ret);
+
+ if (nsec3->key->type != DNS_TYPE_NSEC3)
+ return -EINVAL;
+
+ if (nsec3->nsec3.iterations > NSEC3_ITERATIONS_MAX)
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Ignoring NSEC3 RR %s with excessive number of iterations.",
+ dns_resource_record_to_string(nsec3));
+
+ algorithm = nsec3_hash_to_gcrypt_md(nsec3->nsec3.algorithm);
+ if (algorithm < 0)
+ return algorithm;
+
+ initialize_libgcrypt(false);
+
+ hash_size = gcry_md_get_algo_dlen(algorithm);
+ assert(hash_size > 0);
+
+ if (nsec3->nsec3.next_hashed_name_size != hash_size)
+ return -EINVAL;
+
+ r = dns_name_to_wire_format(name, wire_format, sizeof(wire_format), true);
+ if (r < 0)
+ return r;
+
+ gcry_md_open(&md, algorithm, 0);
+ if (!md)
+ return -EIO;
+
+ gcry_md_write(md, wire_format, r);
+ gcry_md_write(md, nsec3->nsec3.salt, nsec3->nsec3.salt_size);
+
+ result = gcry_md_read(md, 0);
+ if (!result) {
+ r = -EIO;
+ goto finish;
+ }
+
+ for (k = 0; k < nsec3->nsec3.iterations; k++) {
+ uint8_t tmp[hash_size];
+ memcpy(tmp, result, hash_size);
+
+ gcry_md_reset(md);
+ gcry_md_write(md, tmp, hash_size);
+ gcry_md_write(md, nsec3->nsec3.salt, nsec3->nsec3.salt_size);
+
+ result = gcry_md_read(md, 0);
+ if (!result) {
+ r = -EIO;
+ goto finish;
+ }
+ }
+
+ memcpy(ret, result, hash_size);
+ r = (int) hash_size;
+
+finish:
+ gcry_md_close(md);
+ return r;
+}
+
+static int nsec3_is_good(DnsResourceRecord *rr, DnsResourceRecord *nsec3) {
+ const char *a, *b;
+ int r;
+
+ assert(rr);
+
+ if (rr->key->type != DNS_TYPE_NSEC3)
+ return 0;
+
+ /* RFC 5155, Section 8.2 says we MUST ignore NSEC3 RRs with flags != 0 or 1 */
+ if (!IN_SET(rr->nsec3.flags, 0, 1))
+ return 0;
+
+ /* Ignore NSEC3 RRs whose algorithm we don't know */
+ if (nsec3_hash_to_gcrypt_md(rr->nsec3.algorithm) < 0)
+ return 0;
+ /* Ignore NSEC3 RRs with an excessive number of required iterations */
+ if (rr->nsec3.iterations > NSEC3_ITERATIONS_MAX)
+ return 0;
+
+ /* Ignore NSEC3 RRs generated from wildcards. If these NSEC3 RRs weren't correctly signed we can't make this
+ * check (since rr->n_skip_labels_source is -1), but that's OK, as we won't trust them anyway in that case. */
+ if (!IN_SET(rr->n_skip_labels_source, 0, (unsigned) -1))
+ return 0;
+ /* Ignore NSEC3 RRs that are located anywhere else than one label below the zone */
+ if (!IN_SET(rr->n_skip_labels_signer, 1, (unsigned) -1))
+ return 0;
+
+ if (!nsec3)
+ return 1;
+
+ /* If a second NSEC3 RR is specified, also check if they are from the same zone. */
+
+ if (nsec3 == rr) /* Shortcut */
+ return 1;
+
+ if (rr->key->class != nsec3->key->class)
+ return 0;
+ if (rr->nsec3.algorithm != nsec3->nsec3.algorithm)
+ return 0;
+ if (rr->nsec3.iterations != nsec3->nsec3.iterations)
+ return 0;
+ if (rr->nsec3.salt_size != nsec3->nsec3.salt_size)
+ return 0;
+ if (memcmp_safe(rr->nsec3.salt, nsec3->nsec3.salt, rr->nsec3.salt_size) != 0)
+ return 0;
+
+ a = dns_resource_key_name(rr->key);
+ r = dns_name_parent(&a); /* strip off hash */
+ if (r <= 0)
+ return r;
+
+ b = dns_resource_key_name(nsec3->key);
+ r = dns_name_parent(&b); /* strip off hash */
+ if (r <= 0)
+ return r;
+
+ /* Make sure both have the same parent */
+ return dns_name_equal(a, b);
+}
+
+static int nsec3_hashed_domain_format(const uint8_t *hashed, size_t hashed_size, const char *zone, char **ret) {
+ _cleanup_free_ char *l = NULL;
+ char *j;
+
+ assert(hashed);
+ assert(hashed_size > 0);
+ assert(zone);
+ assert(ret);
+
+ l = base32hexmem(hashed, hashed_size, false);
+ if (!l)
+ return -ENOMEM;
+
+ j = strjoin(l, ".", zone);
+ if (!j)
+ return -ENOMEM;
+
+ *ret = j;
+ return (int) hashed_size;
+}
+
+static int nsec3_hashed_domain_make(DnsResourceRecord *nsec3, const char *domain, const char *zone, char **ret) {
+ uint8_t hashed[DNSSEC_HASH_SIZE_MAX];
+ int hashed_size;
+
+ assert(nsec3);
+ assert(domain);
+ assert(zone);
+ assert(ret);
+
+ hashed_size = dnssec_nsec3_hash(nsec3, domain, hashed);
+ if (hashed_size < 0)
+ return hashed_size;
+
+ return nsec3_hashed_domain_format(hashed, (size_t) hashed_size, zone, ret);
+}
+
+/* See RFC 5155, Section 8
+ * First try to find a NSEC3 record that matches our query precisely, if that fails, find the closest
+ * enclosure. Secondly, find a proof that there is no closer enclosure and either a proof that there
+ * is no wildcard domain as a direct descendant of the closest enclosure, or find an NSEC3 record that
+ * matches the wildcard domain.
+ *
+ * Based on this we can prove either the existence of the record in @key, or NXDOMAIN or NODATA, or
+ * that there is no proof either way. The latter is the case if a proof of non-existence of a given
+ * name uses an NSEC3 record with the opt-out bit set. Lastly, if we are given insufficient NSEC3 records
+ * to conclude anything we indicate this by returning NO_RR. */
+static int dnssec_test_nsec3(DnsAnswer *answer, DnsResourceKey *key, DnssecNsecResult *result, bool *authenticated, uint32_t *ttl) {
+ _cleanup_free_ char *next_closer_domain = NULL, *wildcard_domain = NULL;
+ const char *zone, *p, *pp = NULL, *wildcard;
+ DnsResourceRecord *rr, *enclosure_rr, *zone_rr, *wildcard_rr = NULL;
+ DnsAnswerFlags flags;
+ int hashed_size, r;
+ bool a, no_closer = false, no_wildcard = false, optout = false;
+
+ assert(key);
+ assert(result);
+
+ /* First step, find the zone name and the NSEC3 parameters of the zone.
+ * it is sufficient to look for the longest common suffix we find with
+ * any NSEC3 RR in the response. Any NSEC3 record will do as all NSEC3
+ * records from a given zone in a response must use the same
+ * parameters. */
+ zone = dns_resource_key_name(key);
+ for (;;) {
+ DNS_ANSWER_FOREACH_FLAGS(zone_rr, flags, answer) {
+ r = nsec3_is_good(zone_rr, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = dns_name_equal_skip(dns_resource_key_name(zone_rr->key), 1, zone);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ goto found_zone;
+ }
+
+ /* Strip one label from the front */
+ r = dns_name_parent(&zone);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+
+ *result = DNSSEC_NSEC_NO_RR;
+ return 0;
+
+found_zone:
+ /* Second step, find the closest encloser NSEC3 RR in 'answer' that matches 'key' */
+ p = dns_resource_key_name(key);
+ for (;;) {
+ _cleanup_free_ char *hashed_domain = NULL;
+
+ hashed_size = nsec3_hashed_domain_make(zone_rr, p, zone, &hashed_domain);
+ if (hashed_size == -EOPNOTSUPP) {
+ *result = DNSSEC_NSEC_UNSUPPORTED_ALGORITHM;
+ return 0;
+ }
+ if (hashed_size < 0)
+ return hashed_size;
+
+ DNS_ANSWER_FOREACH_FLAGS(enclosure_rr, flags, answer) {
+
+ r = nsec3_is_good(enclosure_rr, zone_rr);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (enclosure_rr->nsec3.next_hashed_name_size != (size_t) hashed_size)
+ continue;
+
+ r = dns_name_equal(dns_resource_key_name(enclosure_rr->key), hashed_domain);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ a = flags & DNS_ANSWER_AUTHENTICATED;
+ goto found_closest_encloser;
+ }
+ }
+
+ /* We didn't find the closest encloser with this name,
+ * but let's remember this domain name, it might be
+ * the next closer name */
+
+ pp = p;
+
+ /* Strip one label from the front */
+ r = dns_name_parent(&p);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+
+ *result = DNSSEC_NSEC_NO_RR;
+ return 0;
+
+found_closest_encloser:
+ /* We found a closest encloser in 'p'; next closer is 'pp' */
+
+ if (!pp) {
+ /* We have an exact match! If we area looking for a DS RR, then we must insist that we got the NSEC3 RR
+ * from the parent. Otherwise the one from the child. Do so, by checking whether SOA and NS are
+ * appropriately set. */
+
+ if (key->type == DNS_TYPE_DS) {
+ if (bitmap_isset(enclosure_rr->nsec3.types, DNS_TYPE_SOA))
+ return -EBADMSG;
+ } else {
+ if (bitmap_isset(enclosure_rr->nsec3.types, DNS_TYPE_NS) &&
+ !bitmap_isset(enclosure_rr->nsec3.types, DNS_TYPE_SOA))
+ return -EBADMSG;
+ }
+
+ /* No next closer NSEC3 RR. That means there's a direct NSEC3 RR for our key. */
+ if (bitmap_isset(enclosure_rr->nsec3.types, key->type))
+ *result = DNSSEC_NSEC_FOUND;
+ else if (bitmap_isset(enclosure_rr->nsec3.types, DNS_TYPE_CNAME))
+ *result = DNSSEC_NSEC_CNAME;
+ else
+ *result = DNSSEC_NSEC_NODATA;
+
+ if (authenticated)
+ *authenticated = a;
+ if (ttl)
+ *ttl = enclosure_rr->ttl;
+
+ return 0;
+ }
+
+ /* Ensure this is not a DNAME domain, see RFC5155, section 8.3. */
+ if (bitmap_isset(enclosure_rr->nsec3.types, DNS_TYPE_DNAME))
+ return -EBADMSG;
+
+ /* Ensure that this data is from the delegated domain
+ * (i.e. originates from the "lower" DNS server), and isn't
+ * just glue records (i.e. doesn't originate from the "upper"
+ * DNS server). */
+ if (bitmap_isset(enclosure_rr->nsec3.types, DNS_TYPE_NS) &&
+ !bitmap_isset(enclosure_rr->nsec3.types, DNS_TYPE_SOA))
+ return -EBADMSG;
+
+ /* Prove that there is no next closer and whether or not there is a wildcard domain. */
+
+ wildcard = strjoina("*.", p);
+ r = nsec3_hashed_domain_make(enclosure_rr, wildcard, zone, &wildcard_domain);
+ if (r < 0)
+ return r;
+ if (r != hashed_size)
+ return -EBADMSG;
+
+ r = nsec3_hashed_domain_make(enclosure_rr, pp, zone, &next_closer_domain);
+ if (r < 0)
+ return r;
+ if (r != hashed_size)
+ return -EBADMSG;
+
+ DNS_ANSWER_FOREACH_FLAGS(rr, flags, answer) {
+ _cleanup_free_ char *next_hashed_domain = NULL;
+
+ r = nsec3_is_good(rr, zone_rr);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = nsec3_hashed_domain_format(rr->nsec3.next_hashed_name, rr->nsec3.next_hashed_name_size, zone, &next_hashed_domain);
+ if (r < 0)
+ return r;
+
+ r = dns_name_between(dns_resource_key_name(rr->key), next_closer_domain, next_hashed_domain);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (rr->nsec3.flags & 1)
+ optout = true;
+
+ a = a && (flags & DNS_ANSWER_AUTHENTICATED);
+
+ no_closer = true;
+ }
+
+ r = dns_name_equal(dns_resource_key_name(rr->key), wildcard_domain);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ a = a && (flags & DNS_ANSWER_AUTHENTICATED);
+
+ wildcard_rr = rr;
+ }
+
+ r = dns_name_between(dns_resource_key_name(rr->key), wildcard_domain, next_hashed_domain);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (rr->nsec3.flags & 1)
+ /* This only makes sense if we have a wildcard delegation, which is
+ * very unlikely, see RFC 4592, Section 4.2, but we cannot rely on
+ * this not happening, so hence cannot simply conclude NXDOMAIN as
+ * we would wish */
+ optout = true;
+
+ a = a && (flags & DNS_ANSWER_AUTHENTICATED);
+
+ no_wildcard = true;
+ }
+ }
+
+ if (wildcard_rr && no_wildcard)
+ return -EBADMSG;
+
+ if (!no_closer) {
+ *result = DNSSEC_NSEC_NO_RR;
+ return 0;
+ }
+
+ if (wildcard_rr) {
+ /* A wildcard exists that matches our query. */
+ if (optout)
+ /* This is not specified in any RFC to the best of my knowledge, but
+ * if the next closer enclosure is covered by an opt-out NSEC3 RR
+ * it means that we cannot prove that the source of synthesis is
+ * correct, as there may be a closer match. */
+ *result = DNSSEC_NSEC_OPTOUT;
+ else if (bitmap_isset(wildcard_rr->nsec3.types, key->type))
+ *result = DNSSEC_NSEC_FOUND;
+ else if (bitmap_isset(wildcard_rr->nsec3.types, DNS_TYPE_CNAME))
+ *result = DNSSEC_NSEC_CNAME;
+ else
+ *result = DNSSEC_NSEC_NODATA;
+ } else {
+ if (optout)
+ /* The RFC only specifies that we have to care for optout for NODATA for
+ * DS records. However, children of an insecure opt-out delegation should
+ * also be considered opt-out, rather than verified NXDOMAIN.
+ * Note that we do not require a proof of wildcard non-existence if the
+ * next closer domain is covered by an opt-out, as that would not provide
+ * any additional information. */
+ *result = DNSSEC_NSEC_OPTOUT;
+ else if (no_wildcard)
+ *result = DNSSEC_NSEC_NXDOMAIN;
+ else {
+ *result = DNSSEC_NSEC_NO_RR;
+
+ return 0;
+ }
+ }
+
+ if (authenticated)
+ *authenticated = a;
+
+ if (ttl)
+ *ttl = enclosure_rr->ttl;
+
+ return 0;
+}
+
+static int dnssec_nsec_wildcard_equal(DnsResourceRecord *rr, const char *name) {
+ char label[DNS_LABEL_MAX];
+ const char *n;
+ int r;
+
+ assert(rr);
+ assert(rr->key->type == DNS_TYPE_NSEC);
+
+ /* Checks whether the specified RR has a name beginning in "*.", and if the rest is a suffix of our name */
+
+ if (rr->n_skip_labels_source != 1)
+ return 0;
+
+ n = dns_resource_key_name(rr->key);
+ r = dns_label_unescape(&n, label, sizeof label, 0);
+ if (r <= 0)
+ return r;
+ if (r != 1 || label[0] != '*')
+ return 0;
+
+ return dns_name_endswith(name, n);
+}
+
+static int dnssec_nsec_in_path(DnsResourceRecord *rr, const char *name) {
+ const char *nn, *common_suffix;
+ int r;
+
+ assert(rr);
+ assert(rr->key->type == DNS_TYPE_NSEC);
+
+ /* Checks whether the specified nsec RR indicates that name is an empty non-terminal (ENT)
+ *
+ * A couple of examples:
+ *
+ * NSEC bar → waldo.foo.bar: indicates that foo.bar exists and is an ENT
+ * NSEC waldo.foo.bar → yyy.zzz.xoo.bar: indicates that xoo.bar and zzz.xoo.bar exist and are ENTs
+ * NSEC yyy.zzz.xoo.bar → bar: indicates pretty much nothing about ENTs
+ */
+
+ /* First, determine parent of next domain. */
+ nn = rr->nsec.next_domain_name;
+ r = dns_name_parent(&nn);
+ if (r <= 0)
+ return r;
+
+ /* If the name we just determined is not equal or child of the name we are interested in, then we can't say
+ * anything at all. */
+ r = dns_name_endswith(nn, name);
+ if (r <= 0)
+ return r;
+
+ /* If the name we are interested in is not a prefix of the common suffix of the NSEC RR's owner and next domain names, then we can't say anything either. */
+ r = dns_name_common_suffix(dns_resource_key_name(rr->key), rr->nsec.next_domain_name, &common_suffix);
+ if (r < 0)
+ return r;
+
+ return dns_name_endswith(name, common_suffix);
+}
+
+static int dnssec_nsec_from_parent_zone(DnsResourceRecord *rr, const char *name) {
+ int r;
+
+ assert(rr);
+ assert(rr->key->type == DNS_TYPE_NSEC);
+
+ /* Checks whether this NSEC originates to the parent zone or the child zone. */
+
+ r = dns_name_parent(&name);
+ if (r <= 0)
+ return r;
+
+ r = dns_name_equal(name, dns_resource_key_name(rr->key));
+ if (r <= 0)
+ return r;
+
+ /* DNAME, and NS without SOA is an indication for a delegation. */
+ if (bitmap_isset(rr->nsec.types, DNS_TYPE_DNAME))
+ return 1;
+
+ if (bitmap_isset(rr->nsec.types, DNS_TYPE_NS) && !bitmap_isset(rr->nsec.types, DNS_TYPE_SOA))
+ return 1;
+
+ return 0;
+}
+
+static int dnssec_nsec_covers(DnsResourceRecord *rr, const char *name) {
+ const char *signer;
+ int r;
+
+ assert(rr);
+ assert(rr->key->type == DNS_TYPE_NSEC);
+
+ /* Checks whether the name is covered by this NSEC RR. This means, that the name is somewhere below the NSEC's
+ * signer name, and between the NSEC's two names. */
+
+ r = dns_resource_record_signer(rr, &signer);
+ if (r < 0)
+ return r;
+
+ r = dns_name_endswith(name, signer); /* this NSEC isn't suitable the name is not in the signer's domain */
+ if (r <= 0)
+ return r;
+
+ return dns_name_between(dns_resource_key_name(rr->key), name, rr->nsec.next_domain_name);
+}
+
+static int dnssec_nsec_generate_wildcard(DnsResourceRecord *rr, const char *name, char **wc) {
+ const char *common_suffix1, *common_suffix2, *signer;
+ int r, labels1, labels2;
+
+ assert(rr);
+ assert(rr->key->type == DNS_TYPE_NSEC);
+
+ /* Generates "Wildcard at the Closest Encloser" for the given name and NSEC RR. */
+
+ r = dns_resource_record_signer(rr, &signer);
+ if (r < 0)
+ return r;
+
+ r = dns_name_endswith(name, signer); /* this NSEC isn't suitable the name is not in the signer's domain */
+ if (r <= 0)
+ return r;
+
+ r = dns_name_common_suffix(name, dns_resource_key_name(rr->key), &common_suffix1);
+ if (r < 0)
+ return r;
+
+ r = dns_name_common_suffix(name, rr->nsec.next_domain_name, &common_suffix2);
+ if (r < 0)
+ return r;
+
+ labels1 = dns_name_count_labels(common_suffix1);
+ if (labels1 < 0)
+ return labels1;
+
+ labels2 = dns_name_count_labels(common_suffix2);
+ if (labels2 < 0)
+ return labels2;
+
+ if (labels1 > labels2)
+ r = dns_name_concat("*", common_suffix1, 0, wc);
+ else
+ r = dns_name_concat("*", common_suffix2, 0, wc);
+
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int dnssec_nsec_test(DnsAnswer *answer, DnsResourceKey *key, DnssecNsecResult *result, bool *authenticated, uint32_t *ttl) {
+ bool have_nsec3 = false, covering_rr_authenticated = false, wildcard_rr_authenticated = false;
+ DnsResourceRecord *rr, *covering_rr = NULL, *wildcard_rr = NULL;
+ DnsAnswerFlags flags;
+ const char *name;
+ int r;
+
+ assert(key);
+ assert(result);
+
+ /* Look for any NSEC/NSEC3 RRs that say something about the specified key. */
+
+ name = dns_resource_key_name(key);
+
+ DNS_ANSWER_FOREACH_FLAGS(rr, flags, answer) {
+
+ if (rr->key->class != key->class)
+ continue;
+
+ have_nsec3 = have_nsec3 || (rr->key->type == DNS_TYPE_NSEC3);
+
+ if (rr->key->type != DNS_TYPE_NSEC)
+ continue;
+
+ /* The following checks only make sense for NSEC RRs that are not expanded from a wildcard */
+ r = dns_resource_record_is_synthetic(rr);
+ if (r == -ENODATA) /* No signing RR known. */
+ continue;
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ /* Check if this is a direct match. If so, we have encountered a NODATA case */
+ r = dns_name_equal(dns_resource_key_name(rr->key), name);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* If it's not a direct match, maybe it's a wild card match? */
+ r = dnssec_nsec_wildcard_equal(rr, name);
+ if (r < 0)
+ return r;
+ }
+ if (r > 0) {
+ if (key->type == DNS_TYPE_DS) {
+ /* If we look for a DS RR and the server sent us the NSEC RR of the child zone
+ * we have a problem. For DS RRs we want the NSEC RR from the parent */
+ if (bitmap_isset(rr->nsec.types, DNS_TYPE_SOA))
+ continue;
+ } else {
+ /* For all RR types, ensure that if NS is set SOA is set too, so that we know
+ * we got the child's NSEC. */
+ if (bitmap_isset(rr->nsec.types, DNS_TYPE_NS) &&
+ !bitmap_isset(rr->nsec.types, DNS_TYPE_SOA))
+ continue;
+ }
+
+ if (bitmap_isset(rr->nsec.types, key->type))
+ *result = DNSSEC_NSEC_FOUND;
+ else if (bitmap_isset(rr->nsec.types, DNS_TYPE_CNAME))
+ *result = DNSSEC_NSEC_CNAME;
+ else
+ *result = DNSSEC_NSEC_NODATA;
+
+ if (authenticated)
+ *authenticated = flags & DNS_ANSWER_AUTHENTICATED;
+ if (ttl)
+ *ttl = rr->ttl;
+
+ return 0;
+ }
+
+ /* Check if the name we are looking for is an empty non-terminal within the owner or next name
+ * of the NSEC RR. */
+ r = dnssec_nsec_in_path(rr, name);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ *result = DNSSEC_NSEC_NODATA;
+
+ if (authenticated)
+ *authenticated = flags & DNS_ANSWER_AUTHENTICATED;
+ if (ttl)
+ *ttl = rr->ttl;
+
+ return 0;
+ }
+
+ /* The following two "covering" checks, are not useful if the NSEC is from the parent */
+ r = dnssec_nsec_from_parent_zone(rr, name);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ /* Check if this NSEC RR proves the absence of an explicit RR under this name */
+ r = dnssec_nsec_covers(rr, name);
+ if (r < 0)
+ return r;
+ if (r > 0 && (!covering_rr || !covering_rr_authenticated)) {
+ covering_rr = rr;
+ covering_rr_authenticated = flags & DNS_ANSWER_AUTHENTICATED;
+ }
+ }
+
+ if (covering_rr) {
+ _cleanup_free_ char *wc = NULL;
+ r = dnssec_nsec_generate_wildcard(covering_rr, name, &wc);
+ if (r < 0)
+ return r;
+
+ DNS_ANSWER_FOREACH_FLAGS(rr, flags, answer) {
+
+ if (rr->key->class != key->class)
+ continue;
+
+ if (rr->key->type != DNS_TYPE_NSEC)
+ continue;
+
+ /* Check if this NSEC RR proves the nonexistence of the wildcard */
+ r = dnssec_nsec_covers(rr, wc);
+ if (r < 0)
+ return r;
+ if (r > 0 && (!wildcard_rr || !wildcard_rr_authenticated)) {
+ wildcard_rr = rr;
+ wildcard_rr_authenticated = flags & DNS_ANSWER_AUTHENTICATED;
+ }
+ }
+ }
+
+ if (covering_rr && wildcard_rr) {
+ /* If we could prove that neither the name itself, nor the wildcard at the closest encloser exists, we
+ * proved the NXDOMAIN case. */
+ *result = DNSSEC_NSEC_NXDOMAIN;
+
+ if (authenticated)
+ *authenticated = covering_rr_authenticated && wildcard_rr_authenticated;
+ if (ttl)
+ *ttl = MIN(covering_rr->ttl, wildcard_rr->ttl);
+
+ return 0;
+ }
+
+ /* OK, this was not sufficient. Let's see if NSEC3 can help. */
+ if (have_nsec3)
+ return dnssec_test_nsec3(answer, key, result, authenticated, ttl);
+
+ /* No appropriate NSEC RR found, report this. */
+ *result = DNSSEC_NSEC_NO_RR;
+ return 0;
+}
+
+static int dnssec_nsec_test_enclosed(DnsAnswer *answer, uint16_t type, const char *name, const char *zone, bool *authenticated) {
+ DnsResourceRecord *rr;
+ DnsAnswerFlags flags;
+ int r;
+
+ assert(name);
+ assert(zone);
+
+ /* Checks whether there's an NSEC/NSEC3 that proves that the specified 'name' is non-existing in the specified
+ * 'zone'. The 'zone' must be a suffix of the 'name'. */
+
+ DNS_ANSWER_FOREACH_FLAGS(rr, flags, answer) {
+ bool found = false;
+
+ if (rr->key->type != type && type != DNS_TYPE_ANY)
+ continue;
+
+ switch (rr->key->type) {
+
+ case DNS_TYPE_NSEC:
+
+ /* We only care for NSEC RRs from the indicated zone */
+ r = dns_resource_record_is_signer(rr, zone);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = dns_name_between(dns_resource_key_name(rr->key), name, rr->nsec.next_domain_name);
+ if (r < 0)
+ return r;
+
+ found = r > 0;
+ break;
+
+ case DNS_TYPE_NSEC3: {
+ _cleanup_free_ char *hashed_domain = NULL, *next_hashed_domain = NULL;
+
+ /* We only care for NSEC3 RRs from the indicated zone */
+ r = dns_resource_record_is_signer(rr, zone);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = nsec3_is_good(rr, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ /* Format the domain we are testing with the NSEC3 RR's hash function */
+ r = nsec3_hashed_domain_make(
+ rr,
+ name,
+ zone,
+ &hashed_domain);
+ if (r < 0)
+ return r;
+ if ((size_t) r != rr->nsec3.next_hashed_name_size)
+ break;
+
+ /* Format the NSEC3's next hashed name as proper domain name */
+ r = nsec3_hashed_domain_format(
+ rr->nsec3.next_hashed_name,
+ rr->nsec3.next_hashed_name_size,
+ zone,
+ &next_hashed_domain);
+ if (r < 0)
+ return r;
+
+ r = dns_name_between(dns_resource_key_name(rr->key), hashed_domain, next_hashed_domain);
+ if (r < 0)
+ return r;
+
+ found = r > 0;
+ break;
+ }
+
+ default:
+ continue;
+ }
+
+ if (found) {
+ if (authenticated)
+ *authenticated = flags & DNS_ANSWER_AUTHENTICATED;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int dnssec_test_positive_wildcard_nsec3(
+ DnsAnswer *answer,
+ const char *name,
+ const char *source,
+ const char *zone,
+ bool *authenticated) {
+
+ const char *next_closer = NULL;
+ int r;
+
+ /* Run a positive NSEC3 wildcard proof. Specifically:
+ *
+ * A proof that the "next closer" of the generating wildcard does not exist.
+ *
+ * Note a key difference between the NSEC3 and NSEC versions of the proof. NSEC RRs don't have to exist for
+ * empty non-transients. NSEC3 RRs however have to. This means it's sufficient to check if the next closer name
+ * exists for the NSEC3 RR and we are done.
+ *
+ * To prove that a.b.c.d.e.f is rightfully synthesized from a wildcard *.d.e.f all we have to check is that
+ * c.d.e.f does not exist. */
+
+ for (;;) {
+ next_closer = name;
+ r = dns_name_parent(&name);
+ if (r <= 0)
+ return r;
+
+ r = dns_name_equal(name, source);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ break;
+ }
+
+ return dnssec_nsec_test_enclosed(answer, DNS_TYPE_NSEC3, next_closer, zone, authenticated);
+}
+
+static int dnssec_test_positive_wildcard_nsec(
+ DnsAnswer *answer,
+ const char *name,
+ const char *source,
+ const char *zone,
+ bool *_authenticated) {
+
+ bool authenticated = true;
+ int r;
+
+ /* Run a positive NSEC wildcard proof. Specifically:
+ *
+ * A proof that there's neither a wildcard name nor a non-wildcard name that is a suffix of the name "name" and
+ * a prefix of the synthesizing source "source" in the zone "zone".
+ *
+ * See RFC 5155, Section 8.8 and RFC 4035, Section 5.3.4
+ *
+ * Note that if we want to prove that a.b.c.d.e.f is rightfully synthesized from a wildcard *.d.e.f, then we
+ * have to prove that none of the following exist:
+ *
+ * 1) a.b.c.d.e.f
+ * 2) *.b.c.d.e.f
+ * 3) b.c.d.e.f
+ * 4) *.c.d.e.f
+ * 5) c.d.e.f
+ */
+
+ for (;;) {
+ _cleanup_free_ char *wc = NULL;
+ bool a = false;
+
+ /* Check if there's an NSEC or NSEC3 RR that proves that the mame we determined is really non-existing,
+ * i.e between the owner name and the next name of an NSEC RR. */
+ r = dnssec_nsec_test_enclosed(answer, DNS_TYPE_NSEC, name, zone, &a);
+ if (r <= 0)
+ return r;
+
+ authenticated = authenticated && a;
+
+ /* Strip one label off */
+ r = dns_name_parent(&name);
+ if (r <= 0)
+ return r;
+
+ /* Did we reach the source of synthesis? */
+ r = dns_name_equal(name, source);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* Successful exit */
+ *_authenticated = authenticated;
+ return 1;
+ }
+
+ /* Safety check, that the source of synthesis is still our suffix */
+ r = dns_name_endswith(name, source);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EBADMSG;
+
+ /* Replace the label we stripped off with an asterisk */
+ wc = strjoin("*.", name);
+ if (!wc)
+ return -ENOMEM;
+
+ /* And check if the proof holds for the asterisk name, too */
+ r = dnssec_nsec_test_enclosed(answer, DNS_TYPE_NSEC, wc, zone, &a);
+ if (r <= 0)
+ return r;
+
+ authenticated = authenticated && a;
+ /* In the next iteration we'll check the non-asterisk-prefixed version */
+ }
+}
+
+int dnssec_test_positive_wildcard(
+ DnsAnswer *answer,
+ const char *name,
+ const char *source,
+ const char *zone,
+ bool *authenticated) {
+
+ int r;
+
+ assert(name);
+ assert(source);
+ assert(zone);
+ assert(authenticated);
+
+ r = dns_answer_contains_zone_nsec3(answer, zone);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return dnssec_test_positive_wildcard_nsec3(answer, name, source, zone, authenticated);
+ else
+ return dnssec_test_positive_wildcard_nsec(answer, name, source, zone, authenticated);
+}
+
+#else
+
+int dnssec_verify_rrset(
+ DnsAnswer *a,
+ const DnsResourceKey *key,
+ DnsResourceRecord *rrsig,
+ DnsResourceRecord *dnskey,
+ usec_t realtime,
+ DnssecResult *result) {
+
+ return -EOPNOTSUPP;
+}
+
+int dnssec_rrsig_match_dnskey(DnsResourceRecord *rrsig, DnsResourceRecord *dnskey, bool revoked_ok) {
+
+ return -EOPNOTSUPP;
+}
+
+int dnssec_key_match_rrsig(const DnsResourceKey *key, DnsResourceRecord *rrsig) {
+
+ return -EOPNOTSUPP;
+}
+
+int dnssec_verify_rrset_search(
+ DnsAnswer *a,
+ const DnsResourceKey *key,
+ DnsAnswer *validated_dnskeys,
+ usec_t realtime,
+ DnssecResult *result,
+ DnsResourceRecord **ret_rrsig) {
+
+ return -EOPNOTSUPP;
+}
+
+int dnssec_has_rrsig(DnsAnswer *a, const DnsResourceKey *key) {
+
+ return -EOPNOTSUPP;
+}
+
+int dnssec_verify_dnskey_by_ds(DnsResourceRecord *dnskey, DnsResourceRecord *ds, bool mask_revoke) {
+
+ return -EOPNOTSUPP;
+}
+
+int dnssec_verify_dnskey_by_ds_search(DnsResourceRecord *dnskey, DnsAnswer *validated_ds) {
+
+ return -EOPNOTSUPP;
+}
+
+int dnssec_nsec3_hash(DnsResourceRecord *nsec3, const char *name, void *ret) {
+
+ return -EOPNOTSUPP;
+}
+
+int dnssec_nsec_test(DnsAnswer *answer, DnsResourceKey *key, DnssecNsecResult *result, bool *authenticated, uint32_t *ttl) {
+
+ return -EOPNOTSUPP;
+}
+
+int dnssec_test_positive_wildcard(
+ DnsAnswer *answer,
+ const char *name,
+ const char *source,
+ const char *zone,
+ bool *authenticated) {
+
+ return -EOPNOTSUPP;
+}
+
+#endif
+
+static const char* const dnssec_result_table[_DNSSEC_RESULT_MAX] = {
+ [DNSSEC_VALIDATED] = "validated",
+ [DNSSEC_VALIDATED_WILDCARD] = "validated-wildcard",
+ [DNSSEC_INVALID] = "invalid",
+ [DNSSEC_SIGNATURE_EXPIRED] = "signature-expired",
+ [DNSSEC_UNSUPPORTED_ALGORITHM] = "unsupported-algorithm",
+ [DNSSEC_NO_SIGNATURE] = "no-signature",
+ [DNSSEC_MISSING_KEY] = "missing-key",
+ [DNSSEC_UNSIGNED] = "unsigned",
+ [DNSSEC_FAILED_AUXILIARY] = "failed-auxiliary",
+ [DNSSEC_NSEC_MISMATCH] = "nsec-mismatch",
+ [DNSSEC_INCOMPATIBLE_SERVER] = "incompatible-server",
+};
+DEFINE_STRING_TABLE_LOOKUP(dnssec_result, DnssecResult);
+
+static const char* const dnssec_verdict_table[_DNSSEC_VERDICT_MAX] = {
+ [DNSSEC_SECURE] = "secure",
+ [DNSSEC_INSECURE] = "insecure",
+ [DNSSEC_BOGUS] = "bogus",
+ [DNSSEC_INDETERMINATE] = "indeterminate",
+};
+DEFINE_STRING_TABLE_LOOKUP(dnssec_verdict, DnssecVerdict);
diff --git a/src/resolve/resolved-dns-dnssec.h b/src/resolve/resolved-dns-dnssec.h
new file mode 100644
index 0000000..9c3c0dc
--- /dev/null
+++ b/src/resolve/resolved-dns-dnssec.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef enum DnssecResult DnssecResult;
+typedef enum DnssecVerdict DnssecVerdict;
+
+#include "dns-domain.h"
+#include "resolved-dns-answer.h"
+#include "resolved-dns-rr.h"
+
+enum DnssecResult {
+ /* These five are returned by dnssec_verify_rrset() */
+ DNSSEC_VALIDATED,
+ DNSSEC_VALIDATED_WILDCARD, /* Validated via a wildcard RRSIG, further NSEC/NSEC3 checks necessary */
+ DNSSEC_INVALID,
+ DNSSEC_SIGNATURE_EXPIRED,
+ DNSSEC_UNSUPPORTED_ALGORITHM,
+
+ /* These two are added by dnssec_verify_rrset_search() */
+ DNSSEC_NO_SIGNATURE,
+ DNSSEC_MISSING_KEY,
+
+ /* These two are added by the DnsTransaction logic */
+ DNSSEC_UNSIGNED,
+ DNSSEC_FAILED_AUXILIARY,
+ DNSSEC_NSEC_MISMATCH,
+ DNSSEC_INCOMPATIBLE_SERVER,
+
+ _DNSSEC_RESULT_MAX,
+ _DNSSEC_RESULT_INVALID = -1
+};
+
+enum DnssecVerdict {
+ DNSSEC_SECURE,
+ DNSSEC_INSECURE,
+ DNSSEC_BOGUS,
+ DNSSEC_INDETERMINATE,
+
+ _DNSSEC_VERDICT_MAX,
+ _DNSSEC_VERDICT_INVALID = -1
+};
+
+#define DNSSEC_CANONICAL_HOSTNAME_MAX (DNS_HOSTNAME_MAX + 2)
+
+/* The longest digest we'll ever generate, of all digest algorithms we support */
+#define DNSSEC_HASH_SIZE_MAX (MAX(20, 32))
+
+int dnssec_rrsig_match_dnskey(DnsResourceRecord *rrsig, DnsResourceRecord *dnskey, bool revoked_ok);
+int dnssec_key_match_rrsig(const DnsResourceKey *key, DnsResourceRecord *rrsig);
+
+int dnssec_verify_rrset(DnsAnswer *answer, const DnsResourceKey *key, DnsResourceRecord *rrsig, DnsResourceRecord *dnskey, usec_t realtime, DnssecResult *result);
+int dnssec_verify_rrset_search(DnsAnswer *answer, const DnsResourceKey *key, DnsAnswer *validated_dnskeys, usec_t realtime, DnssecResult *result, DnsResourceRecord **rrsig);
+
+int dnssec_verify_dnskey_by_ds(DnsResourceRecord *dnskey, DnsResourceRecord *ds, bool mask_revoke);
+int dnssec_verify_dnskey_by_ds_search(DnsResourceRecord *dnskey, DnsAnswer *validated_ds);
+
+int dnssec_has_rrsig(DnsAnswer *a, const DnsResourceKey *key);
+
+uint16_t dnssec_keytag(DnsResourceRecord *dnskey, bool mask_revoke);
+
+int dnssec_nsec3_hash(DnsResourceRecord *nsec3, const char *name, void *ret);
+
+typedef enum DnssecNsecResult {
+ DNSSEC_NSEC_NO_RR, /* No suitable NSEC/NSEC3 RR found */
+ DNSSEC_NSEC_CNAME, /* Didn't find what was asked for, but did find CNAME */
+ DNSSEC_NSEC_UNSUPPORTED_ALGORITHM,
+ DNSSEC_NSEC_NXDOMAIN,
+ DNSSEC_NSEC_NODATA,
+ DNSSEC_NSEC_FOUND,
+ DNSSEC_NSEC_OPTOUT,
+} DnssecNsecResult;
+
+int dnssec_nsec_test(DnsAnswer *answer, DnsResourceKey *key, DnssecNsecResult *result, bool *authenticated, uint32_t *ttl);
+
+int dnssec_test_positive_wildcard(DnsAnswer *a, const char *name, const char *source, const char *zone, bool *authenticated);
+
+const char* dnssec_result_to_string(DnssecResult m) _const_;
+DnssecResult dnssec_result_from_string(const char *s) _pure_;
+
+const char* dnssec_verdict_to_string(DnssecVerdict m) _const_;
+DnssecVerdict dnssec_verdict_from_string(const char *s) _pure_;
diff --git a/src/resolve/resolved-dns-packet.c b/src/resolve/resolved-dns-packet.c
new file mode 100644
index 0000000..b4eb5ef
--- /dev/null
+++ b/src/resolve/resolved-dns-packet.c
@@ -0,0 +1,2410 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_GCRYPT
+#include <gcrypt.h>
+#endif
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "memory-util.h"
+#include "resolved-dns-packet.h"
+#include "set.h"
+#include "string-table.h"
+#include "strv.h"
+#include "unaligned.h"
+#include "utf8.h"
+#include "util.h"
+
+#define EDNS0_OPT_DO (1<<15)
+
+assert_cc(DNS_PACKET_SIZE_START > DNS_PACKET_HEADER_SIZE);
+
+typedef struct DnsPacketRewinder {
+ DnsPacket *packet;
+ size_t saved_rindex;
+} DnsPacketRewinder;
+
+static void rewind_dns_packet(DnsPacketRewinder *rewinder) {
+ if (rewinder->packet)
+ dns_packet_rewind(rewinder->packet, rewinder->saved_rindex);
+}
+
+#define INIT_REWINDER(rewinder, p) do { rewinder.packet = p; rewinder.saved_rindex = p->rindex; } while (0)
+#define CANCEL_REWINDER(rewinder) do { rewinder.packet = NULL; } while (0)
+
+int dns_packet_new(
+ DnsPacket **ret,
+ DnsProtocol protocol,
+ size_t min_alloc_dsize,
+ size_t max_size) {
+
+ DnsPacket *p;
+ size_t a;
+
+ assert(ret);
+ assert(max_size >= DNS_PACKET_HEADER_SIZE);
+
+ if (max_size > DNS_PACKET_SIZE_MAX)
+ max_size = DNS_PACKET_SIZE_MAX;
+
+ /* The caller may not check what is going to be truly allocated, so do not allow to
+ * allocate a DNS packet bigger than DNS_PACKET_SIZE_MAX.
+ */
+ if (min_alloc_dsize > DNS_PACKET_SIZE_MAX)
+ return log_error_errno(SYNTHETIC_ERRNO(EFBIG),
+ "Requested packet data size too big: %zu",
+ min_alloc_dsize);
+
+ /* When dns_packet_new() is called with min_alloc_dsize == 0, allocate more than the
+ * absolute minimum (which is the dns packet header size), to avoid
+ * resizing immediately again after appending the first data to the packet.
+ */
+ if (min_alloc_dsize < DNS_PACKET_HEADER_SIZE)
+ a = DNS_PACKET_SIZE_START;
+ else
+ a = min_alloc_dsize;
+
+ /* round up to next page size */
+ a = PAGE_ALIGN(ALIGN(sizeof(DnsPacket)) + a) - ALIGN(sizeof(DnsPacket));
+
+ /* make sure we never allocate more than useful */
+ if (a > max_size)
+ a = max_size;
+
+ p = malloc0(ALIGN(sizeof(DnsPacket)) + a);
+ if (!p)
+ return -ENOMEM;
+
+ *p = (DnsPacket) {
+ .n_ref = 1,
+ .protocol = protocol,
+ .size = DNS_PACKET_HEADER_SIZE,
+ .rindex = DNS_PACKET_HEADER_SIZE,
+ .allocated = a,
+ .max_size = max_size,
+ .opt_start = (size_t) -1,
+ .opt_size = (size_t) -1,
+ };
+
+ *ret = p;
+
+ return 0;
+}
+
+void dns_packet_set_flags(DnsPacket *p, bool dnssec_checking_disabled, bool truncated) {
+
+ DnsPacketHeader *h;
+
+ assert(p);
+
+ h = DNS_PACKET_HEADER(p);
+
+ switch(p->protocol) {
+ case DNS_PROTOCOL_LLMNR:
+ assert(!truncated);
+
+ h->flags = htobe16(DNS_PACKET_MAKE_FLAGS(0 /* qr */,
+ 0 /* opcode */,
+ 0 /* c */,
+ 0 /* tc */,
+ 0 /* t */,
+ 0 /* ra */,
+ 0 /* ad */,
+ 0 /* cd */,
+ 0 /* rcode */));
+ break;
+
+ case DNS_PROTOCOL_MDNS:
+ h->flags = htobe16(DNS_PACKET_MAKE_FLAGS(0 /* qr */,
+ 0 /* opcode */,
+ 0 /* aa */,
+ truncated /* tc */,
+ 0 /* rd (ask for recursion) */,
+ 0 /* ra */,
+ 0 /* ad */,
+ 0 /* cd */,
+ 0 /* rcode */));
+ break;
+
+ default:
+ assert(!truncated);
+
+ h->flags = htobe16(DNS_PACKET_MAKE_FLAGS(0 /* qr */,
+ 0 /* opcode */,
+ 0 /* aa */,
+ 0 /* tc */,
+ 1 /* rd (ask for recursion) */,
+ 0 /* ra */,
+ 0 /* ad */,
+ dnssec_checking_disabled /* cd */,
+ 0 /* rcode */));
+ }
+}
+
+int dns_packet_new_query(DnsPacket **ret, DnsProtocol protocol, size_t min_alloc_dsize, bool dnssec_checking_disabled) {
+ DnsPacket *p;
+ int r;
+
+ assert(ret);
+
+ r = dns_packet_new(&p, protocol, min_alloc_dsize, DNS_PACKET_SIZE_MAX);
+ if (r < 0)
+ return r;
+
+ /* Always set the TC bit to 0 initially.
+ * If there are multiple packets later, we'll update the bit shortly before sending.
+ */
+ dns_packet_set_flags(p, dnssec_checking_disabled, false);
+
+ *ret = p;
+ return 0;
+}
+
+DnsPacket *dns_packet_ref(DnsPacket *p) {
+
+ if (!p)
+ return NULL;
+
+ assert(!p->on_stack);
+
+ assert(p->n_ref > 0);
+ p->n_ref++;
+ return p;
+}
+
+static void dns_packet_free(DnsPacket *p) {
+ char *s;
+
+ assert(p);
+
+ dns_question_unref(p->question);
+ dns_answer_unref(p->answer);
+ dns_resource_record_unref(p->opt);
+
+ while ((s = hashmap_steal_first_key(p->names)))
+ free(s);
+ hashmap_free(p->names);
+
+ free(p->_data);
+
+ if (!p->on_stack)
+ free(p);
+}
+
+DnsPacket *dns_packet_unref(DnsPacket *p) {
+ if (!p)
+ return NULL;
+
+ assert(p->n_ref > 0);
+
+ dns_packet_unref(p->more);
+
+ if (p->n_ref == 1)
+ dns_packet_free(p);
+ else
+ p->n_ref--;
+
+ return NULL;
+}
+
+int dns_packet_validate(DnsPacket *p) {
+ assert(p);
+
+ if (p->size < DNS_PACKET_HEADER_SIZE)
+ return -EBADMSG;
+
+ if (p->size > DNS_PACKET_SIZE_MAX)
+ return -EBADMSG;
+
+ return 1;
+}
+
+int dns_packet_validate_reply(DnsPacket *p) {
+ int r;
+
+ assert(p);
+
+ r = dns_packet_validate(p);
+ if (r < 0)
+ return r;
+
+ if (DNS_PACKET_QR(p) != 1)
+ return 0;
+
+ if (DNS_PACKET_OPCODE(p) != 0)
+ return -EBADMSG;
+
+ switch (p->protocol) {
+
+ case DNS_PROTOCOL_LLMNR:
+ /* RFC 4795, Section 2.1.1. says to discard all replies with QDCOUNT != 1 */
+ if (DNS_PACKET_QDCOUNT(p) != 1)
+ return -EBADMSG;
+
+ break;
+
+ case DNS_PROTOCOL_MDNS:
+ /* RFC 6762, Section 18 */
+ if (DNS_PACKET_RCODE(p) != 0)
+ return -EBADMSG;
+
+ break;
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+int dns_packet_validate_query(DnsPacket *p) {
+ int r;
+
+ assert(p);
+
+ r = dns_packet_validate(p);
+ if (r < 0)
+ return r;
+
+ if (DNS_PACKET_QR(p) != 0)
+ return 0;
+
+ if (DNS_PACKET_OPCODE(p) != 0)
+ return -EBADMSG;
+
+ if (DNS_PACKET_TC(p))
+ return -EBADMSG;
+
+ switch (p->protocol) {
+
+ case DNS_PROTOCOL_LLMNR:
+ case DNS_PROTOCOL_DNS:
+ /* RFC 4795, Section 2.1.1. says to discard all queries with QDCOUNT != 1 */
+ if (DNS_PACKET_QDCOUNT(p) != 1)
+ return -EBADMSG;
+
+ /* RFC 4795, Section 2.1.1. says to discard all queries with ANCOUNT != 0 */
+ if (DNS_PACKET_ANCOUNT(p) > 0)
+ return -EBADMSG;
+
+ /* RFC 4795, Section 2.1.1. says to discard all queries with NSCOUNT != 0 */
+ if (DNS_PACKET_NSCOUNT(p) > 0)
+ return -EBADMSG;
+
+ break;
+
+ case DNS_PROTOCOL_MDNS:
+ /* RFC 6762, Section 18 */
+ if (DNS_PACKET_AA(p) != 0 ||
+ DNS_PACKET_RD(p) != 0 ||
+ DNS_PACKET_RA(p) != 0 ||
+ DNS_PACKET_AD(p) != 0 ||
+ DNS_PACKET_CD(p) != 0 ||
+ DNS_PACKET_RCODE(p) != 0)
+ return -EBADMSG;
+
+ break;
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+static int dns_packet_extend(DnsPacket *p, size_t add, void **ret, size_t *start) {
+ assert(p);
+
+ if (p->size + add > p->allocated) {
+ size_t a, ms;
+
+ a = PAGE_ALIGN((p->size + add) * 2);
+
+ ms = dns_packet_size_max(p);
+ if (a > ms)
+ a = ms;
+
+ if (p->size + add > a)
+ return -EMSGSIZE;
+
+ if (p->_data) {
+ void *d;
+
+ d = realloc(p->_data, a);
+ if (!d)
+ return -ENOMEM;
+
+ p->_data = d;
+ } else {
+ p->_data = malloc(a);
+ if (!p->_data)
+ return -ENOMEM;
+
+ memcpy(p->_data, (uint8_t*) p + ALIGN(sizeof(DnsPacket)), p->size);
+ memzero((uint8_t*) p->_data + p->size, a - p->size);
+ }
+
+ p->allocated = a;
+ }
+
+ if (start)
+ *start = p->size;
+
+ if (ret)
+ *ret = (uint8_t*) DNS_PACKET_DATA(p) + p->size;
+
+ p->size += add;
+ return 0;
+}
+
+void dns_packet_truncate(DnsPacket *p, size_t sz) {
+ char *s;
+ void *n;
+
+ assert(p);
+
+ if (p->size <= sz)
+ return;
+
+ HASHMAP_FOREACH_KEY(n, s, p->names) {
+
+ if (PTR_TO_SIZE(n) < sz)
+ continue;
+
+ hashmap_remove(p->names, s);
+ free(s);
+ }
+
+ p->size = sz;
+}
+
+int dns_packet_append_blob(DnsPacket *p, const void *d, size_t l, size_t *start) {
+ void *q;
+ int r;
+
+ assert(p);
+
+ r = dns_packet_extend(p, l, &q, start);
+ if (r < 0)
+ return r;
+
+ memcpy_safe(q, d, l);
+ return 0;
+}
+
+int dns_packet_append_uint8(DnsPacket *p, uint8_t v, size_t *start) {
+ void *d;
+ int r;
+
+ assert(p);
+
+ r = dns_packet_extend(p, sizeof(uint8_t), &d, start);
+ if (r < 0)
+ return r;
+
+ ((uint8_t*) d)[0] = v;
+
+ return 0;
+}
+
+int dns_packet_append_uint16(DnsPacket *p, uint16_t v, size_t *start) {
+ void *d;
+ int r;
+
+ assert(p);
+
+ r = dns_packet_extend(p, sizeof(uint16_t), &d, start);
+ if (r < 0)
+ return r;
+
+ unaligned_write_be16(d, v);
+
+ return 0;
+}
+
+int dns_packet_append_uint32(DnsPacket *p, uint32_t v, size_t *start) {
+ void *d;
+ int r;
+
+ assert(p);
+
+ r = dns_packet_extend(p, sizeof(uint32_t), &d, start);
+ if (r < 0)
+ return r;
+
+ unaligned_write_be32(d, v);
+
+ return 0;
+}
+
+int dns_packet_append_string(DnsPacket *p, const char *s, size_t *start) {
+ assert(p);
+ assert(s);
+
+ return dns_packet_append_raw_string(p, s, strlen(s), start);
+}
+
+int dns_packet_append_raw_string(DnsPacket *p, const void *s, size_t size, size_t *start) {
+ void *d;
+ int r;
+
+ assert(p);
+ assert(s || size == 0);
+
+ if (size > 255)
+ return -E2BIG;
+
+ r = dns_packet_extend(p, 1 + size, &d, start);
+ if (r < 0)
+ return r;
+
+ ((uint8_t*) d)[0] = (uint8_t) size;
+
+ memcpy_safe(((uint8_t*) d) + 1, s, size);
+
+ return 0;
+}
+
+int dns_packet_append_label(DnsPacket *p, const char *d, size_t l, bool canonical_candidate, size_t *start) {
+ uint8_t *w;
+ int r;
+
+ /* Append a label to a packet. Optionally, does this in DNSSEC
+ * canonical form, if this label is marked as a candidate for
+ * it, and the canonical form logic is enabled for the
+ * packet */
+
+ assert(p);
+ assert(d);
+
+ if (l > DNS_LABEL_MAX)
+ return -E2BIG;
+
+ r = dns_packet_extend(p, 1 + l, (void**) &w, start);
+ if (r < 0)
+ return r;
+
+ *(w++) = (uint8_t) l;
+
+ if (p->canonical_form && canonical_candidate) {
+ size_t i;
+
+ /* Generate in canonical form, as defined by DNSSEC
+ * RFC 4034, Section 6.2, i.e. all lower-case. */
+
+ for (i = 0; i < l; i++)
+ w[i] = (uint8_t) ascii_tolower(d[i]);
+ } else
+ /* Otherwise, just copy the string unaltered. This is
+ * essential for DNS-SD, where the casing of labels
+ * matters and needs to be retained. */
+ memcpy(w, d, l);
+
+ return 0;
+}
+
+int dns_packet_append_name(
+ DnsPacket *p,
+ const char *name,
+ bool allow_compression,
+ bool canonical_candidate,
+ size_t *start) {
+
+ size_t saved_size;
+ int r;
+
+ assert(p);
+ assert(name);
+
+ if (p->refuse_compression)
+ allow_compression = false;
+
+ saved_size = p->size;
+
+ while (!dns_name_is_root(name)) {
+ const char *z = name;
+ char label[DNS_LABEL_MAX];
+ size_t n = 0;
+
+ if (allow_compression)
+ n = PTR_TO_SIZE(hashmap_get(p->names, name));
+ if (n > 0) {
+ assert(n < p->size);
+
+ if (n < 0x4000) {
+ r = dns_packet_append_uint16(p, 0xC000 | n, NULL);
+ if (r < 0)
+ goto fail;
+
+ goto done;
+ }
+ }
+
+ r = dns_label_unescape(&name, label, sizeof label, 0);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_label(p, label, r, canonical_candidate, &n);
+ if (r < 0)
+ goto fail;
+
+ if (allow_compression) {
+ _cleanup_free_ char *s = NULL;
+
+ s = strdup(z);
+ if (!s) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ r = hashmap_ensure_allocated(&p->names, &dns_name_hash_ops);
+ if (r < 0)
+ goto fail;
+
+ r = hashmap_put(p->names, s, SIZE_TO_PTR(n));
+ if (r < 0)
+ goto fail;
+
+ s = NULL;
+ }
+ }
+
+ r = dns_packet_append_uint8(p, 0, NULL);
+ if (r < 0)
+ return r;
+
+done:
+ if (start)
+ *start = saved_size;
+
+ return 0;
+
+fail:
+ dns_packet_truncate(p, saved_size);
+ return r;
+}
+
+int dns_packet_append_key(DnsPacket *p, const DnsResourceKey *k, const DnsAnswerFlags flags, size_t *start) {
+ size_t saved_size;
+ uint16_t class;
+ int r;
+
+ assert(p);
+ assert(k);
+
+ saved_size = p->size;
+
+ r = dns_packet_append_name(p, dns_resource_key_name(k), true, true, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint16(p, k->type, NULL);
+ if (r < 0)
+ goto fail;
+
+ class = flags & DNS_ANSWER_CACHE_FLUSH ? k->class | MDNS_RR_CACHE_FLUSH : k->class;
+ r = dns_packet_append_uint16(p, class, NULL);
+ if (r < 0)
+ goto fail;
+
+ if (start)
+ *start = saved_size;
+
+ return 0;
+
+fail:
+ dns_packet_truncate(p, saved_size);
+ return r;
+}
+
+static int dns_packet_append_type_window(DnsPacket *p, uint8_t window, uint8_t length, const uint8_t *types, size_t *start) {
+ size_t saved_size;
+ int r;
+
+ assert(p);
+ assert(types);
+ assert(length > 0);
+
+ saved_size = p->size;
+
+ r = dns_packet_append_uint8(p, window, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, length, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, types, length, NULL);
+ if (r < 0)
+ goto fail;
+
+ if (start)
+ *start = saved_size;
+
+ return 0;
+fail:
+ dns_packet_truncate(p, saved_size);
+ return r;
+}
+
+static int dns_packet_append_types(DnsPacket *p, Bitmap *types, size_t *start) {
+ uint8_t window = 0;
+ uint8_t entry = 0;
+ uint8_t bitmaps[32] = {};
+ unsigned n;
+ size_t saved_size;
+ int r;
+
+ assert(p);
+
+ saved_size = p->size;
+
+ BITMAP_FOREACH(n, types) {
+ assert(n <= 0xffff);
+
+ if ((n >> 8) != window && bitmaps[entry / 8] != 0) {
+ r = dns_packet_append_type_window(p, window, entry / 8 + 1, bitmaps, NULL);
+ if (r < 0)
+ goto fail;
+
+ zero(bitmaps);
+ }
+
+ window = n >> 8;
+ entry = n & 255;
+
+ bitmaps[entry / 8] |= 1 << (7 - (entry % 8));
+ }
+
+ if (bitmaps[entry / 8] != 0) {
+ r = dns_packet_append_type_window(p, window, entry / 8 + 1, bitmaps, NULL);
+ if (r < 0)
+ goto fail;
+ }
+
+ if (start)
+ *start = saved_size;
+
+ return 0;
+fail:
+ dns_packet_truncate(p, saved_size);
+ return r;
+}
+
+/* Append the OPT pseudo-RR described in RFC6891 */
+int dns_packet_append_opt(
+ DnsPacket *p,
+ uint16_t max_udp_size,
+ bool edns0_do,
+ bool include_rfc6975,
+ int rcode,
+ size_t *start) {
+
+ size_t saved_size;
+ int r;
+
+ assert(p);
+ /* we must never advertise supported packet size smaller than the legacy max */
+ assert(max_udp_size >= DNS_PACKET_UNICAST_SIZE_MAX);
+ assert(rcode >= 0);
+ assert(rcode <= _DNS_RCODE_MAX);
+
+ if (p->opt_start != (size_t) -1)
+ return -EBUSY;
+
+ assert(p->opt_size == (size_t) -1);
+
+ saved_size = p->size;
+
+ /* empty name */
+ r = dns_packet_append_uint8(p, 0, NULL);
+ if (r < 0)
+ return r;
+
+ /* type */
+ r = dns_packet_append_uint16(p, DNS_TYPE_OPT, NULL);
+ if (r < 0)
+ goto fail;
+
+ /* class: maximum udp packet that can be received */
+ r = dns_packet_append_uint16(p, max_udp_size, NULL);
+ if (r < 0)
+ goto fail;
+
+ /* extended RCODE and VERSION */
+ r = dns_packet_append_uint16(p, ((uint16_t) rcode & 0x0FF0) << 4, NULL);
+ if (r < 0)
+ goto fail;
+
+ /* flags: DNSSEC OK (DO), see RFC3225 */
+ r = dns_packet_append_uint16(p, edns0_do ? EDNS0_OPT_DO : 0, NULL);
+ if (r < 0)
+ goto fail;
+
+ /* RDLENGTH */
+ if (edns0_do && include_rfc6975) {
+ /* If DO is on and this is requested, also append RFC6975 Algorithm data. This is supposed to
+ * be done on queries, not on replies, hencer callers should turn this off when finishing off
+ * replies. */
+
+ static const uint8_t rfc6975[] = {
+
+ 0, 5, /* OPTION_CODE: DAU */
+#if HAVE_GCRYPT && GCRYPT_VERSION_NUMBER >= 0x010600
+ 0, 7, /* LIST_LENGTH */
+#else
+ 0, 6, /* LIST_LENGTH */
+#endif
+ DNSSEC_ALGORITHM_RSASHA1,
+ DNSSEC_ALGORITHM_RSASHA1_NSEC3_SHA1,
+ DNSSEC_ALGORITHM_RSASHA256,
+ DNSSEC_ALGORITHM_RSASHA512,
+ DNSSEC_ALGORITHM_ECDSAP256SHA256,
+ DNSSEC_ALGORITHM_ECDSAP384SHA384,
+#if HAVE_GCRYPT && GCRYPT_VERSION_NUMBER >= 0x010600
+ DNSSEC_ALGORITHM_ED25519,
+#endif
+
+ 0, 6, /* OPTION_CODE: DHU */
+ 0, 3, /* LIST_LENGTH */
+ DNSSEC_DIGEST_SHA1,
+ DNSSEC_DIGEST_SHA256,
+ DNSSEC_DIGEST_SHA384,
+
+ 0, 7, /* OPTION_CODE: N3U */
+ 0, 1, /* LIST_LENGTH */
+ NSEC3_ALGORITHM_SHA1,
+ };
+
+ r = dns_packet_append_uint16(p, sizeof(rfc6975), NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, rfc6975, sizeof(rfc6975), NULL);
+ } else
+ r = dns_packet_append_uint16(p, 0, NULL);
+ if (r < 0)
+ goto fail;
+
+ DNS_PACKET_HEADER(p)->arcount = htobe16(DNS_PACKET_ARCOUNT(p) + 1);
+
+ p->opt_start = saved_size;
+ p->opt_size = p->size - saved_size;
+
+ if (start)
+ *start = saved_size;
+
+ return 0;
+
+fail:
+ dns_packet_truncate(p, saved_size);
+ return r;
+}
+
+int dns_packet_truncate_opt(DnsPacket *p) {
+ assert(p);
+
+ if (p->opt_start == (size_t) -1) {
+ assert(p->opt_size == (size_t) -1);
+ return 0;
+ }
+
+ assert(p->opt_size != (size_t) -1);
+ assert(DNS_PACKET_ARCOUNT(p) > 0);
+
+ if (p->opt_start + p->opt_size != p->size)
+ return -EBUSY;
+
+ dns_packet_truncate(p, p->opt_start);
+ DNS_PACKET_HEADER(p)->arcount = htobe16(DNS_PACKET_ARCOUNT(p) - 1);
+ p->opt_start = p->opt_size = (size_t) -1;
+
+ return 1;
+}
+
+int dns_packet_append_rr(DnsPacket *p, const DnsResourceRecord *rr, const DnsAnswerFlags flags, size_t *start, size_t *rdata_start) {
+
+ size_t saved_size, rdlength_offset, end, rdlength, rds;
+ uint32_t ttl;
+ int r;
+
+ assert(p);
+ assert(rr);
+
+ saved_size = p->size;
+
+ r = dns_packet_append_key(p, rr->key, flags, NULL);
+ if (r < 0)
+ goto fail;
+
+ ttl = flags & DNS_ANSWER_GOODBYE ? 0 : rr->ttl;
+ r = dns_packet_append_uint32(p, ttl, NULL);
+ if (r < 0)
+ goto fail;
+
+ /* Initially we write 0 here */
+ r = dns_packet_append_uint16(p, 0, &rdlength_offset);
+ if (r < 0)
+ goto fail;
+
+ rds = p->size - saved_size;
+
+ switch (rr->unparsable ? _DNS_TYPE_INVALID : rr->key->type) {
+
+ case DNS_TYPE_SRV:
+ r = dns_packet_append_uint16(p, rr->srv.priority, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint16(p, rr->srv.weight, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint16(p, rr->srv.port, NULL);
+ if (r < 0)
+ goto fail;
+
+ /* RFC 2782 states "Unless and until permitted by future standards
+ * action, name compression is not to be used for this field." */
+ r = dns_packet_append_name(p, rr->srv.name, false, true, NULL);
+ break;
+
+ case DNS_TYPE_PTR:
+ case DNS_TYPE_NS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME:
+ r = dns_packet_append_name(p, rr->ptr.name, true, true, NULL);
+ break;
+
+ case DNS_TYPE_HINFO:
+ r = dns_packet_append_string(p, rr->hinfo.cpu, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_string(p, rr->hinfo.os, NULL);
+ break;
+
+ case DNS_TYPE_SPF: /* exactly the same as TXT */
+ case DNS_TYPE_TXT:
+
+ if (!rr->txt.items) {
+ /* RFC 6763, section 6.1 suggests to generate
+ * single empty string for an empty array. */
+
+ r = dns_packet_append_raw_string(p, NULL, 0, NULL);
+ if (r < 0)
+ goto fail;
+ } else {
+ DnsTxtItem *i;
+
+ LIST_FOREACH(items, i, rr->txt.items) {
+ r = dns_packet_append_raw_string(p, i->data, i->length, NULL);
+ if (r < 0)
+ goto fail;
+ }
+ }
+
+ r = 0;
+ break;
+
+ case DNS_TYPE_A:
+ r = dns_packet_append_blob(p, &rr->a.in_addr, sizeof(struct in_addr), NULL);
+ break;
+
+ case DNS_TYPE_AAAA:
+ r = dns_packet_append_blob(p, &rr->aaaa.in6_addr, sizeof(struct in6_addr), NULL);
+ break;
+
+ case DNS_TYPE_SOA:
+ r = dns_packet_append_name(p, rr->soa.mname, true, true, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_name(p, rr->soa.rname, true, true, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->soa.serial, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->soa.refresh, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->soa.retry, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->soa.expire, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->soa.minimum, NULL);
+ break;
+
+ case DNS_TYPE_MX:
+ r = dns_packet_append_uint16(p, rr->mx.priority, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_name(p, rr->mx.exchange, true, true, NULL);
+ break;
+
+ case DNS_TYPE_LOC:
+ r = dns_packet_append_uint8(p, rr->loc.version, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->loc.size, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->loc.horiz_pre, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->loc.vert_pre, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->loc.latitude, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->loc.longitude, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->loc.altitude, NULL);
+ break;
+
+ case DNS_TYPE_DS:
+ r = dns_packet_append_uint16(p, rr->ds.key_tag, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->ds.algorithm, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->ds.digest_type, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, rr->ds.digest, rr->ds.digest_size, NULL);
+ break;
+
+ case DNS_TYPE_SSHFP:
+ r = dns_packet_append_uint8(p, rr->sshfp.algorithm, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->sshfp.fptype, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, rr->sshfp.fingerprint, rr->sshfp.fingerprint_size, NULL);
+ break;
+
+ case DNS_TYPE_DNSKEY:
+ r = dns_packet_append_uint16(p, rr->dnskey.flags, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->dnskey.protocol, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->dnskey.algorithm, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, rr->dnskey.key, rr->dnskey.key_size, NULL);
+ break;
+
+ case DNS_TYPE_RRSIG:
+ r = dns_packet_append_uint16(p, rr->rrsig.type_covered, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->rrsig.algorithm, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->rrsig.labels, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->rrsig.original_ttl, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->rrsig.expiration, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint32(p, rr->rrsig.inception, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint16(p, rr->rrsig.key_tag, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_name(p, rr->rrsig.signer, false, true, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, rr->rrsig.signature, rr->rrsig.signature_size, NULL);
+ break;
+
+ case DNS_TYPE_NSEC:
+ r = dns_packet_append_name(p, rr->nsec.next_domain_name, false, false, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_types(p, rr->nsec.types, NULL);
+ if (r < 0)
+ goto fail;
+
+ break;
+
+ case DNS_TYPE_NSEC3:
+ r = dns_packet_append_uint8(p, rr->nsec3.algorithm, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->nsec3.flags, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint16(p, rr->nsec3.iterations, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->nsec3.salt_size, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, rr->nsec3.salt, rr->nsec3.salt_size, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->nsec3.next_hashed_name_size, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, rr->nsec3.next_hashed_name, rr->nsec3.next_hashed_name_size, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_types(p, rr->nsec3.types, NULL);
+ if (r < 0)
+ goto fail;
+
+ break;
+
+ case DNS_TYPE_TLSA:
+ r = dns_packet_append_uint8(p, rr->tlsa.cert_usage, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->tlsa.selector, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_uint8(p, rr->tlsa.matching_type, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, rr->tlsa.data, rr->tlsa.data_size, NULL);
+ break;
+
+ case DNS_TYPE_CAA:
+ r = dns_packet_append_uint8(p, rr->caa.flags, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_string(p, rr->caa.tag, NULL);
+ if (r < 0)
+ goto fail;
+
+ r = dns_packet_append_blob(p, rr->caa.value, rr->caa.value_size, NULL);
+ break;
+
+ case DNS_TYPE_OPT:
+ case DNS_TYPE_OPENPGPKEY:
+ case _DNS_TYPE_INVALID: /* unparsable */
+ default:
+
+ r = dns_packet_append_blob(p, rr->generic.data, rr->generic.data_size, NULL);
+ break;
+ }
+ if (r < 0)
+ goto fail;
+
+ /* Let's calculate the actual data size and update the field */
+ rdlength = p->size - rdlength_offset - sizeof(uint16_t);
+ if (rdlength > 0xFFFF) {
+ r = -ENOSPC;
+ goto fail;
+ }
+
+ end = p->size;
+ p->size = rdlength_offset;
+ r = dns_packet_append_uint16(p, rdlength, NULL);
+ if (r < 0)
+ goto fail;
+ p->size = end;
+
+ if (start)
+ *start = saved_size;
+
+ if (rdata_start)
+ *rdata_start = rds;
+
+ return 0;
+
+fail:
+ dns_packet_truncate(p, saved_size);
+ return r;
+}
+
+int dns_packet_append_question(DnsPacket *p, DnsQuestion *q) {
+ DnsResourceKey *key;
+ int r;
+
+ assert(p);
+
+ DNS_QUESTION_FOREACH(key, q) {
+ r = dns_packet_append_key(p, key, 0, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int dns_packet_append_answer(DnsPacket *p, DnsAnswer *a) {
+ DnsResourceRecord *rr;
+ DnsAnswerFlags flags;
+ int r;
+
+ assert(p);
+
+ DNS_ANSWER_FOREACH_FLAGS(rr, flags, a) {
+ r = dns_packet_append_rr(p, rr, flags, NULL, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int dns_packet_read(DnsPacket *p, size_t sz, const void **ret, size_t *start) {
+ assert(p);
+
+ if (p->rindex + sz > p->size)
+ return -EMSGSIZE;
+
+ if (ret)
+ *ret = (uint8_t*) DNS_PACKET_DATA(p) + p->rindex;
+
+ if (start)
+ *start = p->rindex;
+
+ p->rindex += sz;
+ return 0;
+}
+
+void dns_packet_rewind(DnsPacket *p, size_t idx) {
+ assert(p);
+ assert(idx <= p->size);
+ assert(idx >= DNS_PACKET_HEADER_SIZE);
+
+ p->rindex = idx;
+}
+
+int dns_packet_read_blob(DnsPacket *p, void *d, size_t sz, size_t *start) {
+ const void *q;
+ int r;
+
+ assert(p);
+ assert(d);
+
+ r = dns_packet_read(p, sz, &q, start);
+ if (r < 0)
+ return r;
+
+ memcpy(d, q, sz);
+ return 0;
+}
+
+static int dns_packet_read_memdup(
+ DnsPacket *p, size_t size,
+ void **ret, size_t *ret_size,
+ size_t *ret_start) {
+
+ const void *src;
+ size_t start;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ r = dns_packet_read(p, size, &src, &start);
+ if (r < 0)
+ return r;
+
+ if (size <= 0)
+ *ret = NULL;
+ else {
+ void *copy;
+
+ copy = memdup(src, size);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret = copy;
+ }
+
+ if (ret_size)
+ *ret_size = size;
+ if (ret_start)
+ *ret_start = start;
+
+ return 0;
+}
+
+int dns_packet_read_uint8(DnsPacket *p, uint8_t *ret, size_t *start) {
+ const void *d;
+ int r;
+
+ assert(p);
+
+ r = dns_packet_read(p, sizeof(uint8_t), &d, start);
+ if (r < 0)
+ return r;
+
+ *ret = ((uint8_t*) d)[0];
+ return 0;
+}
+
+int dns_packet_read_uint16(DnsPacket *p, uint16_t *ret, size_t *start) {
+ const void *d;
+ int r;
+
+ assert(p);
+
+ r = dns_packet_read(p, sizeof(uint16_t), &d, start);
+ if (r < 0)
+ return r;
+
+ *ret = unaligned_read_be16(d);
+
+ return 0;
+}
+
+int dns_packet_read_uint32(DnsPacket *p, uint32_t *ret, size_t *start) {
+ const void *d;
+ int r;
+
+ assert(p);
+
+ r = dns_packet_read(p, sizeof(uint32_t), &d, start);
+ if (r < 0)
+ return r;
+
+ *ret = unaligned_read_be32(d);
+
+ return 0;
+}
+
+int dns_packet_read_string(DnsPacket *p, char **ret, size_t *start) {
+ _cleanup_(rewind_dns_packet) DnsPacketRewinder rewinder;
+ const void *d;
+ char *t;
+ uint8_t c;
+ int r;
+
+ assert(p);
+ INIT_REWINDER(rewinder, p);
+
+ r = dns_packet_read_uint8(p, &c, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read(p, c, &d, NULL);
+ if (r < 0)
+ return r;
+
+ if (memchr(d, 0, c))
+ return -EBADMSG;
+
+ t = strndup(d, c);
+ if (!t)
+ return -ENOMEM;
+
+ if (!utf8_is_valid(t)) {
+ free(t);
+ return -EBADMSG;
+ }
+
+ *ret = t;
+
+ if (start)
+ *start = rewinder.saved_rindex;
+ CANCEL_REWINDER(rewinder);
+
+ return 0;
+}
+
+int dns_packet_read_raw_string(DnsPacket *p, const void **ret, size_t *size, size_t *start) {
+ _cleanup_(rewind_dns_packet) DnsPacketRewinder rewinder;
+ uint8_t c;
+ int r;
+
+ assert(p);
+ INIT_REWINDER(rewinder, p);
+
+ r = dns_packet_read_uint8(p, &c, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read(p, c, ret, NULL);
+ if (r < 0)
+ return r;
+
+ if (size)
+ *size = c;
+ if (start)
+ *start = rewinder.saved_rindex;
+ CANCEL_REWINDER(rewinder);
+
+ return 0;
+}
+
+int dns_packet_read_name(
+ DnsPacket *p,
+ char **_ret,
+ bool allow_compression,
+ size_t *start) {
+
+ _cleanup_(rewind_dns_packet) DnsPacketRewinder rewinder;
+ size_t after_rindex = 0, jump_barrier;
+ _cleanup_free_ char *ret = NULL;
+ size_t n = 0, allocated = 0;
+ bool first = true;
+ int r;
+
+ assert(p);
+ assert(_ret);
+ INIT_REWINDER(rewinder, p);
+ jump_barrier = p->rindex;
+
+ if (p->refuse_compression)
+ allow_compression = false;
+
+ for (;;) {
+ uint8_t c, d;
+
+ r = dns_packet_read_uint8(p, &c, NULL);
+ if (r < 0)
+ return r;
+
+ if (c == 0)
+ /* End of name */
+ break;
+ else if (c <= 63) {
+ const char *label;
+
+ /* Literal label */
+ r = dns_packet_read(p, c, (const void**) &label, NULL);
+ if (r < 0)
+ return r;
+
+ if (!GREEDY_REALLOC(ret, allocated, n + !first + DNS_LABEL_ESCAPED_MAX))
+ return -ENOMEM;
+
+ if (first)
+ first = false;
+ else
+ ret[n++] = '.';
+
+ r = dns_label_escape(label, c, ret + n, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ n += r;
+ continue;
+ } else if (allow_compression && FLAGS_SET(c, 0xc0)) {
+ uint16_t ptr;
+
+ /* Pointer */
+ r = dns_packet_read_uint8(p, &d, NULL);
+ if (r < 0)
+ return r;
+
+ ptr = (uint16_t) (c & ~0xc0) << 8 | (uint16_t) d;
+ if (ptr < DNS_PACKET_HEADER_SIZE || ptr >= jump_barrier)
+ return -EBADMSG;
+
+ if (after_rindex == 0)
+ after_rindex = p->rindex;
+
+ /* Jumps are limited to a "prior occurrence" (RFC-1035 4.1.4) */
+ jump_barrier = ptr;
+ p->rindex = ptr;
+ } else
+ return -EBADMSG;
+ }
+
+ if (!GREEDY_REALLOC(ret, allocated, n + 1))
+ return -ENOMEM;
+
+ ret[n] = 0;
+
+ if (after_rindex != 0)
+ p->rindex= after_rindex;
+
+ *_ret = TAKE_PTR(ret);
+
+ if (start)
+ *start = rewinder.saved_rindex;
+ CANCEL_REWINDER(rewinder);
+
+ return 0;
+}
+
+static int dns_packet_read_type_window(DnsPacket *p, Bitmap **types, size_t *start) {
+ uint8_t window;
+ uint8_t length;
+ const uint8_t *bitmap;
+ uint8_t bit = 0;
+ unsigned i;
+ bool found = false;
+ _cleanup_(rewind_dns_packet) DnsPacketRewinder rewinder;
+ int r;
+
+ assert(p);
+ assert(types);
+ INIT_REWINDER(rewinder, p);
+
+ r = bitmap_ensure_allocated(types);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &window, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &length, NULL);
+ if (r < 0)
+ return r;
+
+ if (length == 0 || length > 32)
+ return -EBADMSG;
+
+ r = dns_packet_read(p, length, (const void **)&bitmap, NULL);
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < length; i++) {
+ uint8_t bitmask = 1 << 7;
+
+ if (!bitmap[i]) {
+ found = false;
+ bit += 8;
+ continue;
+ }
+
+ found = true;
+
+ for (; bitmask; bit++, bitmask >>= 1)
+ if (bitmap[i] & bitmask) {
+ uint16_t n;
+
+ n = (uint16_t) window << 8 | (uint16_t) bit;
+
+ /* Ignore pseudo-types. see RFC4034 section 4.1.2 */
+ if (dns_type_is_pseudo(n))
+ continue;
+
+ r = bitmap_set(*types, n);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (!found)
+ return -EBADMSG;
+
+ if (start)
+ *start = rewinder.saved_rindex;
+ CANCEL_REWINDER(rewinder);
+
+ return 0;
+}
+
+static int dns_packet_read_type_windows(DnsPacket *p, Bitmap **types, size_t size, size_t *start) {
+ _cleanup_(rewind_dns_packet) DnsPacketRewinder rewinder;
+ int r;
+
+ INIT_REWINDER(rewinder, p);
+
+ while (p->rindex < rewinder.saved_rindex + size) {
+ r = dns_packet_read_type_window(p, types, NULL);
+ if (r < 0)
+ return r;
+
+ /* don't read past end of current RR */
+ if (p->rindex > rewinder.saved_rindex + size)
+ return -EBADMSG;
+ }
+
+ if (p->rindex != rewinder.saved_rindex + size)
+ return -EBADMSG;
+
+ if (start)
+ *start = rewinder.saved_rindex;
+ CANCEL_REWINDER(rewinder);
+
+ return 0;
+}
+
+int dns_packet_read_key(DnsPacket *p, DnsResourceKey **ret, bool *ret_cache_flush, size_t *start) {
+ _cleanup_(rewind_dns_packet) DnsPacketRewinder rewinder;
+ _cleanup_free_ char *name = NULL;
+ bool cache_flush = false;
+ uint16_t class, type;
+ DnsResourceKey *key;
+ int r;
+
+ assert(p);
+ assert(ret);
+ INIT_REWINDER(rewinder, p);
+
+ r = dns_packet_read_name(p, &name, true, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint16(p, &type, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint16(p, &class, NULL);
+ if (r < 0)
+ return r;
+
+ if (p->protocol == DNS_PROTOCOL_MDNS) {
+ /* See RFC6762, Section 10.2 */
+
+ if (type != DNS_TYPE_OPT && (class & MDNS_RR_CACHE_FLUSH)) {
+ class &= ~MDNS_RR_CACHE_FLUSH;
+ cache_flush = true;
+ }
+ }
+
+ key = dns_resource_key_new_consume(class, type, name);
+ if (!key)
+ return -ENOMEM;
+
+ name = NULL;
+ *ret = key;
+
+ if (ret_cache_flush)
+ *ret_cache_flush = cache_flush;
+ if (start)
+ *start = rewinder.saved_rindex;
+ CANCEL_REWINDER(rewinder);
+
+ return 0;
+}
+
+static bool loc_size_ok(uint8_t size) {
+ uint8_t m = size >> 4, e = size & 0xF;
+
+ return m <= 9 && e <= 9 && (m > 0 || e == 0);
+}
+
+int dns_packet_read_rr(DnsPacket *p, DnsResourceRecord **ret, bool *ret_cache_flush, size_t *start) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ _cleanup_(rewind_dns_packet) DnsPacketRewinder rewinder;
+ size_t offset;
+ uint16_t rdlength;
+ bool cache_flush;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ INIT_REWINDER(rewinder, p);
+
+ r = dns_packet_read_key(p, &key, &cache_flush, NULL);
+ if (r < 0)
+ return r;
+
+ if (!dns_class_is_valid_rr(key->class) || !dns_type_is_valid_rr(key->type))
+ return -EBADMSG;
+
+ rr = dns_resource_record_new(key);
+ if (!rr)
+ return -ENOMEM;
+
+ r = dns_packet_read_uint32(p, &rr->ttl, NULL);
+ if (r < 0)
+ return r;
+
+ /* RFC 2181, Section 8, suggests to
+ * treat a TTL with the MSB set as a zero TTL. */
+ if (rr->ttl & UINT32_C(0x80000000))
+ rr->ttl = 0;
+
+ r = dns_packet_read_uint16(p, &rdlength, NULL);
+ if (r < 0)
+ return r;
+
+ if (p->rindex + rdlength > p->size)
+ return -EBADMSG;
+
+ offset = p->rindex;
+
+ switch (rr->key->type) {
+
+ case DNS_TYPE_SRV:
+ r = dns_packet_read_uint16(p, &rr->srv.priority, NULL);
+ if (r < 0)
+ return r;
+ r = dns_packet_read_uint16(p, &rr->srv.weight, NULL);
+ if (r < 0)
+ return r;
+ r = dns_packet_read_uint16(p, &rr->srv.port, NULL);
+ if (r < 0)
+ return r;
+ r = dns_packet_read_name(p, &rr->srv.name, true, NULL);
+ break;
+
+ case DNS_TYPE_PTR:
+ case DNS_TYPE_NS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME:
+ r = dns_packet_read_name(p, &rr->ptr.name, true, NULL);
+ break;
+
+ case DNS_TYPE_HINFO:
+ r = dns_packet_read_string(p, &rr->hinfo.cpu, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_string(p, &rr->hinfo.os, NULL);
+ break;
+
+ case DNS_TYPE_SPF: /* exactly the same as TXT */
+ case DNS_TYPE_TXT:
+ if (rdlength <= 0) {
+ r = dns_txt_item_new_empty(&rr->txt.items);
+ if (r < 0)
+ return r;
+ } else {
+ DnsTxtItem *last = NULL;
+
+ while (p->rindex < offset + rdlength) {
+ DnsTxtItem *i;
+ const void *data;
+ size_t sz;
+
+ r = dns_packet_read_raw_string(p, &data, &sz, NULL);
+ if (r < 0)
+ return r;
+
+ i = malloc0(offsetof(DnsTxtItem, data) + sz + 1); /* extra NUL byte at the end */
+ if (!i)
+ return -ENOMEM;
+
+ memcpy(i->data, data, sz);
+ i->length = sz;
+
+ LIST_INSERT_AFTER(items, rr->txt.items, last, i);
+ last = i;
+ }
+ }
+
+ r = 0;
+ break;
+
+ case DNS_TYPE_A:
+ r = dns_packet_read_blob(p, &rr->a.in_addr, sizeof(struct in_addr), NULL);
+ break;
+
+ case DNS_TYPE_AAAA:
+ r = dns_packet_read_blob(p, &rr->aaaa.in6_addr, sizeof(struct in6_addr), NULL);
+ break;
+
+ case DNS_TYPE_SOA:
+ r = dns_packet_read_name(p, &rr->soa.mname, true, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_name(p, &rr->soa.rname, true, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->soa.serial, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->soa.refresh, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->soa.retry, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->soa.expire, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->soa.minimum, NULL);
+ break;
+
+ case DNS_TYPE_MX:
+ r = dns_packet_read_uint16(p, &rr->mx.priority, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_name(p, &rr->mx.exchange, true, NULL);
+ break;
+
+ case DNS_TYPE_LOC: {
+ uint8_t t;
+ size_t pos;
+
+ r = dns_packet_read_uint8(p, &t, &pos);
+ if (r < 0)
+ return r;
+
+ if (t == 0) {
+ rr->loc.version = t;
+
+ r = dns_packet_read_uint8(p, &rr->loc.size, NULL);
+ if (r < 0)
+ return r;
+
+ if (!loc_size_ok(rr->loc.size))
+ return -EBADMSG;
+
+ r = dns_packet_read_uint8(p, &rr->loc.horiz_pre, NULL);
+ if (r < 0)
+ return r;
+
+ if (!loc_size_ok(rr->loc.horiz_pre))
+ return -EBADMSG;
+
+ r = dns_packet_read_uint8(p, &rr->loc.vert_pre, NULL);
+ if (r < 0)
+ return r;
+
+ if (!loc_size_ok(rr->loc.vert_pre))
+ return -EBADMSG;
+
+ r = dns_packet_read_uint32(p, &rr->loc.latitude, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->loc.longitude, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->loc.altitude, NULL);
+ if (r < 0)
+ return r;
+
+ break;
+ } else {
+ dns_packet_rewind(p, pos);
+ rr->unparsable = true;
+ goto unparsable;
+ }
+ }
+
+ case DNS_TYPE_DS:
+ r = dns_packet_read_uint16(p, &rr->ds.key_tag, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->ds.algorithm, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->ds.digest_type, NULL);
+ if (r < 0)
+ return r;
+
+ if (rdlength < 4)
+ return -EBADMSG;
+
+ r = dns_packet_read_memdup(p, rdlength - 4,
+ &rr->ds.digest, &rr->ds.digest_size,
+ NULL);
+ if (r < 0)
+ return r;
+
+ if (rr->ds.digest_size <= 0)
+ /* the accepted size depends on the algorithm, but for now
+ just ensure that the value is greater than zero */
+ return -EBADMSG;
+
+ break;
+
+ case DNS_TYPE_SSHFP:
+ r = dns_packet_read_uint8(p, &rr->sshfp.algorithm, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->sshfp.fptype, NULL);
+ if (r < 0)
+ return r;
+
+ if (rdlength < 2)
+ return -EBADMSG;
+
+ r = dns_packet_read_memdup(p, rdlength - 2,
+ &rr->sshfp.fingerprint, &rr->sshfp.fingerprint_size,
+ NULL);
+
+ if (rr->sshfp.fingerprint_size <= 0)
+ /* the accepted size depends on the algorithm, but for now
+ just ensure that the value is greater than zero */
+ return -EBADMSG;
+
+ break;
+
+ case DNS_TYPE_DNSKEY:
+ r = dns_packet_read_uint16(p, &rr->dnskey.flags, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->dnskey.protocol, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->dnskey.algorithm, NULL);
+ if (r < 0)
+ return r;
+
+ if (rdlength < 4)
+ return -EBADMSG;
+
+ r = dns_packet_read_memdup(p, rdlength - 4,
+ &rr->dnskey.key, &rr->dnskey.key_size,
+ NULL);
+
+ if (rr->dnskey.key_size <= 0)
+ /* the accepted size depends on the algorithm, but for now
+ just ensure that the value is greater than zero */
+ return -EBADMSG;
+
+ break;
+
+ case DNS_TYPE_RRSIG:
+ r = dns_packet_read_uint16(p, &rr->rrsig.type_covered, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->rrsig.algorithm, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->rrsig.labels, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->rrsig.original_ttl, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->rrsig.expiration, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint32(p, &rr->rrsig.inception, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint16(p, &rr->rrsig.key_tag, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_name(p, &rr->rrsig.signer, false, NULL);
+ if (r < 0)
+ return r;
+
+ if (rdlength + offset < p->rindex)
+ return -EBADMSG;
+
+ r = dns_packet_read_memdup(p, offset + rdlength - p->rindex,
+ &rr->rrsig.signature, &rr->rrsig.signature_size,
+ NULL);
+
+ if (rr->rrsig.signature_size <= 0)
+ /* the accepted size depends on the algorithm, but for now
+ just ensure that the value is greater than zero */
+ return -EBADMSG;
+
+ break;
+
+ case DNS_TYPE_NSEC: {
+
+ /*
+ * RFC6762, section 18.14 explicitly states mDNS should use name compression.
+ * This contradicts RFC3845, section 2.1.1
+ */
+
+ bool allow_compressed = p->protocol == DNS_PROTOCOL_MDNS;
+
+ r = dns_packet_read_name(p, &rr->nsec.next_domain_name, allow_compressed, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_type_windows(p, &rr->nsec.types, offset + rdlength - p->rindex, NULL);
+
+ /* We accept empty NSEC bitmaps. The bit indicating the presence of the NSEC record itself
+ * is redundant and in e.g., RFC4956 this fact is used to define a use for NSEC records
+ * without the NSEC bit set. */
+
+ break;
+ }
+ case DNS_TYPE_NSEC3: {
+ uint8_t size;
+
+ r = dns_packet_read_uint8(p, &rr->nsec3.algorithm, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->nsec3.flags, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint16(p, &rr->nsec3.iterations, NULL);
+ if (r < 0)
+ return r;
+
+ /* this may be zero */
+ r = dns_packet_read_uint8(p, &size, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_memdup(p, size, &rr->nsec3.salt, &rr->nsec3.salt_size, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &size, NULL);
+ if (r < 0)
+ return r;
+
+ if (size <= 0)
+ return -EBADMSG;
+
+ r = dns_packet_read_memdup(p, size,
+ &rr->nsec3.next_hashed_name, &rr->nsec3.next_hashed_name_size,
+ NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_type_windows(p, &rr->nsec3.types, offset + rdlength - p->rindex, NULL);
+
+ /* empty non-terminals can have NSEC3 records, so empty bitmaps are allowed */
+
+ break;
+ }
+
+ case DNS_TYPE_TLSA:
+ r = dns_packet_read_uint8(p, &rr->tlsa.cert_usage, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->tlsa.selector, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_uint8(p, &rr->tlsa.matching_type, NULL);
+ if (r < 0)
+ return r;
+
+ if (rdlength < 3)
+ return -EBADMSG;
+
+ r = dns_packet_read_memdup(p, rdlength - 3,
+ &rr->tlsa.data, &rr->tlsa.data_size,
+ NULL);
+
+ if (rr->tlsa.data_size <= 0)
+ /* the accepted size depends on the algorithm, but for now
+ just ensure that the value is greater than zero */
+ return -EBADMSG;
+
+ break;
+
+ case DNS_TYPE_CAA:
+ r = dns_packet_read_uint8(p, &rr->caa.flags, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_read_string(p, &rr->caa.tag, NULL);
+ if (r < 0)
+ return r;
+
+ if (rdlength + offset < p->rindex)
+ return -EBADMSG;
+
+ r = dns_packet_read_memdup(p,
+ rdlength + offset - p->rindex,
+ &rr->caa.value, &rr->caa.value_size, NULL);
+
+ break;
+
+ case DNS_TYPE_OPT: /* we only care about the header of OPT for now. */
+ case DNS_TYPE_OPENPGPKEY:
+ default:
+ unparsable:
+ r = dns_packet_read_memdup(p, rdlength, &rr->generic.data, &rr->generic.data_size, NULL);
+
+ break;
+ }
+ if (r < 0)
+ return r;
+ if (p->rindex != offset + rdlength)
+ return -EBADMSG;
+
+ *ret = TAKE_PTR(rr);
+
+ if (ret_cache_flush)
+ *ret_cache_flush = cache_flush;
+ if (start)
+ *start = rewinder.saved_rindex;
+ CANCEL_REWINDER(rewinder);
+
+ return 0;
+}
+
+static bool opt_is_good(DnsResourceRecord *rr, bool *rfc6975) {
+ const uint8_t* p;
+ bool found_dau_dhu_n3u = false;
+ size_t l;
+
+ /* Checks whether the specified OPT RR is well-formed and whether it contains RFC6975 data (which is not OK in
+ * a reply). */
+
+ assert(rr);
+ assert(rr->key->type == DNS_TYPE_OPT);
+
+ /* Check that the version is 0 */
+ if (((rr->ttl >> 16) & UINT32_C(0xFF)) != 0) {
+ *rfc6975 = false;
+ return true; /* if it's not version 0, it's OK, but we will ignore the OPT field contents */
+ }
+
+ p = rr->opt.data;
+ l = rr->opt.data_size;
+ while (l > 0) {
+ uint16_t option_code, option_length;
+
+ /* At least four bytes for OPTION-CODE and OPTION-LENGTH are required */
+ if (l < 4U)
+ return false;
+
+ option_code = unaligned_read_be16(p);
+ option_length = unaligned_read_be16(p + 2);
+
+ if (l < option_length + 4U)
+ return false;
+
+ /* RFC 6975 DAU, DHU or N3U fields found. */
+ if (IN_SET(option_code, 5, 6, 7))
+ found_dau_dhu_n3u = true;
+
+ p += option_length + 4U;
+ l -= option_length + 4U;
+ }
+
+ *rfc6975 = found_dau_dhu_n3u;
+ return true;
+}
+
+static int dns_packet_extract_question(DnsPacket *p, DnsQuestion **ret_question) {
+ _cleanup_(dns_question_unrefp) DnsQuestion *question = NULL;
+ unsigned n, i;
+ int r;
+
+ n = DNS_PACKET_QDCOUNT(p);
+ if (n > 0) {
+ question = dns_question_new(n);
+ if (!question)
+ return -ENOMEM;
+
+ _cleanup_set_free_ Set *keys = NULL; /* references to keys are kept by Question */
+
+ keys = set_new(&dns_resource_key_hash_ops);
+ if (!keys)
+ return log_oom();
+
+ r = set_reserve(keys, n * 2); /* Higher multipliers give slightly higher efficiency through
+ * hash collisions, but the gains quickly drop of after 2. */
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < n; i++) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ bool cache_flush;
+
+ r = dns_packet_read_key(p, &key, &cache_flush, NULL);
+ if (r < 0)
+ return r;
+
+ if (cache_flush)
+ return -EBADMSG;
+
+ if (!dns_type_is_valid_query(key->type))
+ return -EBADMSG;
+
+ r = set_put(keys, key);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ /* Already in the Question, let's skip */
+ continue;
+
+ r = dns_question_add_raw(question, key);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ *ret_question = TAKE_PTR(question);
+
+ return 0;
+}
+
+static int dns_packet_extract_answer(DnsPacket *p, DnsAnswer **ret_answer) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ unsigned n, i;
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *previous = NULL;
+ bool bad_opt = false;
+ int r;
+
+ n = DNS_PACKET_RRCOUNT(p);
+ if (n == 0)
+ return 0;
+
+ answer = dns_answer_new(n);
+ if (!answer)
+ return -ENOMEM;
+
+ for (i = 0; i < n; i++) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+ bool cache_flush = false;
+
+ r = dns_packet_read_rr(p, &rr, &cache_flush, NULL);
+ if (r < 0)
+ return r;
+
+ /* Try to reduce memory usage a bit */
+ if (previous)
+ dns_resource_key_reduce(&rr->key, &previous->key);
+
+ if (rr->key->type == DNS_TYPE_OPT) {
+ bool has_rfc6975;
+
+ if (p->opt || bad_opt) {
+ /* Multiple OPT RRs? if so, let's ignore all, because there's
+ * something wrong with the server, and if one is valid we wouldn't
+ * know which one. */
+ log_debug("Multiple OPT RRs detected, ignoring all.");
+ bad_opt = true;
+ continue;
+ }
+
+ if (!dns_name_is_root(dns_resource_key_name(rr->key))) {
+ /* If the OPT RR is not owned by the root domain, then it is bad,
+ * let's ignore it. */
+ log_debug("OPT RR is not owned by root domain, ignoring.");
+ bad_opt = true;
+ continue;
+ }
+
+ if (i < DNS_PACKET_ANCOUNT(p) + DNS_PACKET_NSCOUNT(p)) {
+ /* OPT RR is in the wrong section? Some Belkin routers do this. This
+ * is a hint the EDNS implementation is borked, like the Belkin one
+ * is, hence ignore it. */
+ log_debug("OPT RR in wrong section, ignoring.");
+ bad_opt = true;
+ continue;
+ }
+
+ if (!opt_is_good(rr, &has_rfc6975)) {
+ log_debug("Malformed OPT RR, ignoring.");
+ bad_opt = true;
+ continue;
+ }
+
+ if (DNS_PACKET_QR(p)) {
+ /* Additional checks for responses */
+
+ if (!DNS_RESOURCE_RECORD_OPT_VERSION_SUPPORTED(rr))
+ /* If this is a reply and we don't know the EDNS version
+ * then something is weird... */
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "EDNS version newer that our request, bad server.");
+
+ if (has_rfc6975) {
+ /* If the OPT RR contains RFC6975 algorithm data, then this
+ * is indication that the server just copied the OPT it got
+ * from us (which contained that data) back into the reply.
+ * If so, then it doesn't properly support EDNS, as RFC6975
+ * makes it very clear that the algorithm data should only
+ * be contained in questions, never in replies. Crappy
+ * Belkin routers copy the OPT data for example, hence let's
+ * detect this so that we downgrade early. */
+ log_debug("OPT RR contains RFC6975 data, ignoring.");
+ bad_opt = true;
+ continue;
+ }
+ }
+
+ p->opt = dns_resource_record_ref(rr);
+ } else {
+ /* According to RFC 4795, section 2.9. only the RRs from the Answer section
+ * shall be cached. Hence mark only those RRs as cacheable by default, but
+ * not the ones from the Additional or Authority sections. */
+ DnsAnswerFlags flags =
+ (i < DNS_PACKET_ANCOUNT(p) ? DNS_ANSWER_CACHEABLE : 0) |
+ (p->protocol == DNS_PROTOCOL_MDNS && !cache_flush ? DNS_ANSWER_SHARED_OWNER : 0);
+
+ r = dns_answer_add(answer, rr, p->ifindex, flags);
+ if (r < 0)
+ return r;
+ }
+
+ /* Remember this RR, so that we potentically can merge it's ->key object with the
+ * next RR. Note that we only do this if we actually decided to keep the RR around.
+ */
+ dns_resource_record_unref(previous);
+ previous = dns_resource_record_ref(rr);
+ }
+
+ if (bad_opt)
+ p->opt = dns_resource_record_unref(p->opt);
+
+ *ret_answer = TAKE_PTR(answer);
+
+ return 0;
+}
+
+int dns_packet_extract(DnsPacket *p) {
+ _cleanup_(dns_question_unrefp) DnsQuestion *question = NULL;
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ _cleanup_(rewind_dns_packet) DnsPacketRewinder rewinder = {};
+ int r;
+
+ if (p->extracted)
+ return 0;
+
+ INIT_REWINDER(rewinder, p);
+ dns_packet_rewind(p, DNS_PACKET_HEADER_SIZE);
+
+ r = dns_packet_extract_question(p, &question);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_extract_answer(p, &answer);
+ if (r < 0)
+ return r;
+
+ p->question = TAKE_PTR(question);
+ p->answer = TAKE_PTR(answer);
+
+ p->extracted = true;
+
+ /* no CANCEL, always rewind */
+ return 0;
+}
+
+int dns_packet_is_reply_for(DnsPacket *p, const DnsResourceKey *key) {
+ int r;
+
+ assert(p);
+ assert(key);
+
+ /* Checks if the specified packet is a reply for the specified
+ * key and the specified key is the only one in the question
+ * section. */
+
+ if (DNS_PACKET_QR(p) != 1)
+ return 0;
+
+ /* Let's unpack the packet, if that hasn't happened yet. */
+ r = dns_packet_extract(p);
+ if (r < 0)
+ return r;
+
+ if (!p->question)
+ return 0;
+
+ if (p->question->n_keys != 1)
+ return 0;
+
+ return dns_resource_key_equal(p->question->keys[0], key);
+}
+
+static void dns_packet_hash_func(const DnsPacket *s, struct siphash *state) {
+ assert(s);
+
+ siphash24_compress(&s->size, sizeof(s->size), state);
+ siphash24_compress(DNS_PACKET_DATA((DnsPacket*) s), s->size, state);
+}
+
+static int dns_packet_compare_func(const DnsPacket *x, const DnsPacket *y) {
+ int r;
+
+ r = CMP(x->size, y->size);
+ if (r != 0)
+ return r;
+
+ return memcmp(DNS_PACKET_DATA((DnsPacket*) x), DNS_PACKET_DATA((DnsPacket*) y), x->size);
+}
+
+DEFINE_HASH_OPS(dns_packet_hash_ops, DnsPacket, dns_packet_hash_func, dns_packet_compare_func);
+
+static const char* const dns_rcode_table[_DNS_RCODE_MAX_DEFINED] = {
+ [DNS_RCODE_SUCCESS] = "SUCCESS",
+ [DNS_RCODE_FORMERR] = "FORMERR",
+ [DNS_RCODE_SERVFAIL] = "SERVFAIL",
+ [DNS_RCODE_NXDOMAIN] = "NXDOMAIN",
+ [DNS_RCODE_NOTIMP] = "NOTIMP",
+ [DNS_RCODE_REFUSED] = "REFUSED",
+ [DNS_RCODE_YXDOMAIN] = "YXDOMAIN",
+ [DNS_RCODE_YXRRSET] = "YRRSET",
+ [DNS_RCODE_NXRRSET] = "NXRRSET",
+ [DNS_RCODE_NOTAUTH] = "NOTAUTH",
+ [DNS_RCODE_NOTZONE] = "NOTZONE",
+ [DNS_RCODE_BADVERS] = "BADVERS",
+ [DNS_RCODE_BADKEY] = "BADKEY",
+ [DNS_RCODE_BADTIME] = "BADTIME",
+ [DNS_RCODE_BADMODE] = "BADMODE",
+ [DNS_RCODE_BADNAME] = "BADNAME",
+ [DNS_RCODE_BADALG] = "BADALG",
+ [DNS_RCODE_BADTRUNC] = "BADTRUNC",
+ [DNS_RCODE_BADCOOKIE] = "BADCOOKIE",
+};
+DEFINE_STRING_TABLE_LOOKUP(dns_rcode, int);
+
+static const char* const dns_protocol_table[_DNS_PROTOCOL_MAX] = {
+ [DNS_PROTOCOL_DNS] = "dns",
+ [DNS_PROTOCOL_MDNS] = "mdns",
+ [DNS_PROTOCOL_LLMNR] = "llmnr",
+};
+DEFINE_STRING_TABLE_LOOKUP(dns_protocol, DnsProtocol);
diff --git a/src/resolve/resolved-dns-packet.h b/src/resolve/resolved-dns-packet.h
new file mode 100644
index 0000000..7d6ee2b
--- /dev/null
+++ b/src/resolve/resolved-dns-packet.h
@@ -0,0 +1,302 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+
+#include "hashmap.h"
+#include "in-addr-util.h"
+#include "macro.h"
+#include "sparse-endian.h"
+
+typedef struct DnsPacketHeader DnsPacketHeader;
+typedef struct DnsPacket DnsPacket;
+
+#include "resolved-def.h"
+#include "resolved-dns-answer.h"
+#include "resolved-dns-question.h"
+#include "resolved-dns-rr.h"
+
+typedef enum DnsProtocol {
+ DNS_PROTOCOL_DNS,
+ DNS_PROTOCOL_MDNS,
+ DNS_PROTOCOL_LLMNR,
+ _DNS_PROTOCOL_MAX,
+ _DNS_PROTOCOL_INVALID = -1
+} DnsProtocol;
+
+struct DnsPacketHeader {
+ uint16_t id;
+ be16_t flags;
+ be16_t qdcount;
+ be16_t ancount;
+ be16_t nscount;
+ be16_t arcount;
+};
+
+#define DNS_PACKET_HEADER_SIZE sizeof(DnsPacketHeader)
+#define UDP_PACKET_HEADER_SIZE (sizeof(struct iphdr) + sizeof(struct udphdr))
+
+/* The various DNS protocols deviate in how large a packet can grow,
+ * but the TCP transport has a 16bit size field, hence that appears to
+ * be the absolute maximum. */
+#define DNS_PACKET_SIZE_MAX 0xFFFFu
+
+/* The default size to use for allocation when we don't know how large
+ * the packet will turn out to be. */
+#define DNS_PACKET_SIZE_START 512u
+
+/* RFC 1035 say 512 is the maximum, for classic unicast DNS */
+#define DNS_PACKET_UNICAST_SIZE_MAX 512u
+
+/* With EDNS0 we can use larger packets, default to 4096, which is what is commonly used */
+#define DNS_PACKET_UNICAST_SIZE_LARGE_MAX 4096u
+
+struct DnsPacket {
+ unsigned n_ref;
+ DnsProtocol protocol;
+ size_t size, allocated, rindex, max_size;
+ void *_data; /* don't access directly, use DNS_PACKET_DATA()! */
+ Hashmap *names; /* For name compression */
+ size_t opt_start, opt_size;
+
+ /* Parsed data */
+ DnsQuestion *question;
+ DnsAnswer *answer;
+ DnsResourceRecord *opt;
+
+ /* Packet reception metadata */
+ int ifindex;
+ int family, ipproto;
+ union in_addr_union sender, destination;
+ uint16_t sender_port, destination_port;
+ uint32_t ttl;
+
+ /* For support of truncated packets */
+ DnsPacket *more;
+
+ bool on_stack:1;
+ bool extracted:1;
+ bool refuse_compression:1;
+ bool canonical_form:1;
+};
+
+static inline uint8_t* DNS_PACKET_DATA(DnsPacket *p) {
+ if (_unlikely_(!p))
+ return NULL;
+
+ if (p->_data)
+ return p->_data;
+
+ return ((uint8_t*) p) + ALIGN(sizeof(DnsPacket));
+}
+
+#define DNS_PACKET_HEADER(p) ((DnsPacketHeader*) DNS_PACKET_DATA(p))
+#define DNS_PACKET_ID(p) DNS_PACKET_HEADER(p)->id
+#define DNS_PACKET_QR(p) ((be16toh(DNS_PACKET_HEADER(p)->flags) >> 15) & 1)
+#define DNS_PACKET_OPCODE(p) ((be16toh(DNS_PACKET_HEADER(p)->flags) >> 11) & 15)
+#define DNS_PACKET_AA(p) ((be16toh(DNS_PACKET_HEADER(p)->flags) >> 10) & 1)
+#define DNS_PACKET_TC(p) ((be16toh(DNS_PACKET_HEADER(p)->flags) >> 9) & 1)
+#define DNS_PACKET_RD(p) ((be16toh(DNS_PACKET_HEADER(p)->flags) >> 8) & 1)
+#define DNS_PACKET_RA(p) ((be16toh(DNS_PACKET_HEADER(p)->flags) >> 7) & 1)
+#define DNS_PACKET_AD(p) ((be16toh(DNS_PACKET_HEADER(p)->flags) >> 5) & 1)
+#define DNS_PACKET_CD(p) ((be16toh(DNS_PACKET_HEADER(p)->flags) >> 4) & 1)
+
+#define DNS_PACKET_FLAG_TC (UINT16_C(1) << 9)
+
+static inline uint16_t DNS_PACKET_RCODE(DnsPacket *p) {
+ uint16_t rcode;
+
+ if (p->opt)
+ rcode = (uint16_t) (p->opt->ttl >> 24);
+ else
+ rcode = 0;
+
+ return rcode | (be16toh(DNS_PACKET_HEADER(p)->flags) & 0xF);
+}
+
+static inline uint16_t DNS_PACKET_PAYLOAD_SIZE_MAX(DnsPacket *p) {
+
+ /* Returns the advertised maximum size for replies, or the DNS default if there's nothing defined. */
+
+ if (p->ipproto == IPPROTO_TCP) /* we ignore EDNS(0) size data on TCP, like everybody else */
+ return DNS_PACKET_SIZE_MAX;
+
+ if (p->opt)
+ return MAX(DNS_PACKET_UNICAST_SIZE_MAX, p->opt->key->class);
+
+ return DNS_PACKET_UNICAST_SIZE_MAX;
+}
+
+static inline bool DNS_PACKET_DO(DnsPacket *p) {
+ if (!p->opt)
+ return false;
+
+ return !!(p->opt->ttl & (1U << 15));
+}
+
+static inline bool DNS_PACKET_VERSION_SUPPORTED(DnsPacket *p) {
+ /* Returns true if this packet is in a version we support. Which means either non-EDNS or EDNS(0), but not EDNS
+ * of any newer versions */
+
+ if (!p->opt)
+ return true;
+
+ return DNS_RESOURCE_RECORD_OPT_VERSION_SUPPORTED(p->opt);
+}
+
+/* LLMNR defines some bits differently */
+#define DNS_PACKET_LLMNR_C(p) DNS_PACKET_AA(p)
+#define DNS_PACKET_LLMNR_T(p) DNS_PACKET_RD(p)
+
+#define DNS_PACKET_QDCOUNT(p) be16toh(DNS_PACKET_HEADER(p)->qdcount)
+#define DNS_PACKET_ANCOUNT(p) be16toh(DNS_PACKET_HEADER(p)->ancount)
+#define DNS_PACKET_NSCOUNT(p) be16toh(DNS_PACKET_HEADER(p)->nscount)
+#define DNS_PACKET_ARCOUNT(p) be16toh(DNS_PACKET_HEADER(p)->arcount)
+
+#define DNS_PACKET_MAKE_FLAGS(qr, opcode, aa, tc, rd, ra, ad, cd, rcode) \
+ (((uint16_t) !!(qr) << 15) | \
+ ((uint16_t) ((opcode) & 15) << 11) | \
+ ((uint16_t) !!(aa) << 10) | /* on LLMNR: c */ \
+ ((uint16_t) !!(tc) << 9) | \
+ ((uint16_t) !!(rd) << 8) | /* on LLMNR: t */ \
+ ((uint16_t) !!(ra) << 7) | \
+ ((uint16_t) !!(ad) << 5) | \
+ ((uint16_t) !!(cd) << 4) | \
+ ((uint16_t) ((rcode) & 15)))
+
+static inline unsigned DNS_PACKET_RRCOUNT(DnsPacket *p) {
+ return
+ (unsigned) DNS_PACKET_ANCOUNT(p) +
+ (unsigned) DNS_PACKET_NSCOUNT(p) +
+ (unsigned) DNS_PACKET_ARCOUNT(p);
+}
+
+int dns_packet_new(DnsPacket **p, DnsProtocol protocol, size_t min_alloc_dsize, size_t max_size);
+int dns_packet_new_query(DnsPacket **p, DnsProtocol protocol, size_t min_alloc_dsize, bool dnssec_checking_disabled);
+
+void dns_packet_set_flags(DnsPacket *p, bool dnssec_checking_disabled, bool truncated);
+
+DnsPacket *dns_packet_ref(DnsPacket *p);
+DnsPacket *dns_packet_unref(DnsPacket *p);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsPacket*, dns_packet_unref);
+
+int dns_packet_validate(DnsPacket *p);
+int dns_packet_validate_reply(DnsPacket *p);
+int dns_packet_validate_query(DnsPacket *p);
+
+int dns_packet_is_reply_for(DnsPacket *p, const DnsResourceKey *key);
+
+int dns_packet_append_blob(DnsPacket *p, const void *d, size_t sz, size_t *start);
+int dns_packet_append_uint8(DnsPacket *p, uint8_t v, size_t *start);
+int dns_packet_append_uint16(DnsPacket *p, uint16_t v, size_t *start);
+int dns_packet_append_uint32(DnsPacket *p, uint32_t v, size_t *start);
+int dns_packet_append_string(DnsPacket *p, const char *s, size_t *start);
+int dns_packet_append_raw_string(DnsPacket *p, const void *s, size_t size, size_t *start);
+int dns_packet_append_label(DnsPacket *p, const char *s, size_t l, bool canonical_candidate, size_t *start);
+int dns_packet_append_name(DnsPacket *p, const char *name, bool allow_compression, bool canonical_candidate, size_t *start);
+int dns_packet_append_key(DnsPacket *p, const DnsResourceKey *key, const DnsAnswerFlags flags, size_t *start);
+int dns_packet_append_rr(DnsPacket *p, const DnsResourceRecord *rr, const DnsAnswerFlags flags, size_t *start, size_t *rdata_start);
+int dns_packet_append_opt(DnsPacket *p, uint16_t max_udp_size, bool edns0_do, bool include_rfc6975, int rcode, size_t *start);
+int dns_packet_append_question(DnsPacket *p, DnsQuestion *q);
+int dns_packet_append_answer(DnsPacket *p, DnsAnswer *a);
+
+void dns_packet_truncate(DnsPacket *p, size_t sz);
+int dns_packet_truncate_opt(DnsPacket *p);
+
+int dns_packet_read(DnsPacket *p, size_t sz, const void **ret, size_t *start);
+int dns_packet_read_blob(DnsPacket *p, void *d, size_t sz, size_t *start);
+int dns_packet_read_uint8(DnsPacket *p, uint8_t *ret, size_t *start);
+int dns_packet_read_uint16(DnsPacket *p, uint16_t *ret, size_t *start);
+int dns_packet_read_uint32(DnsPacket *p, uint32_t *ret, size_t *start);
+int dns_packet_read_string(DnsPacket *p, char **ret, size_t *start);
+int dns_packet_read_raw_string(DnsPacket *p, const void **ret, size_t *size, size_t *start);
+int dns_packet_read_name(DnsPacket *p, char **ret, bool allow_compression, size_t *start);
+int dns_packet_read_key(DnsPacket *p, DnsResourceKey **ret, bool *ret_cache_flush, size_t *start);
+int dns_packet_read_rr(DnsPacket *p, DnsResourceRecord **ret, bool *ret_cache_flush, size_t *start);
+
+void dns_packet_rewind(DnsPacket *p, size_t idx);
+
+int dns_packet_skip_question(DnsPacket *p);
+int dns_packet_extract(DnsPacket *p);
+
+static inline bool DNS_PACKET_SHALL_CACHE(DnsPacket *p) {
+ /* Never cache data originating from localhost, under the
+ * assumption, that it's coming from a locally DNS forwarder
+ * or server, that is caching on its own. */
+
+ return in_addr_is_localhost(p->family, &p->sender) == 0;
+}
+
+/* https://www.iana.org/assignments/dns-parameters/dns-parameters.xhtml#dns-parameters-6 */
+enum {
+ DNS_RCODE_SUCCESS = 0,
+ DNS_RCODE_FORMERR = 1,
+ DNS_RCODE_SERVFAIL = 2,
+ DNS_RCODE_NXDOMAIN = 3,
+ DNS_RCODE_NOTIMP = 4,
+ DNS_RCODE_REFUSED = 5,
+ DNS_RCODE_YXDOMAIN = 6,
+ DNS_RCODE_YXRRSET = 7,
+ DNS_RCODE_NXRRSET = 8,
+ DNS_RCODE_NOTAUTH = 9,
+ DNS_RCODE_NOTZONE = 10,
+ DNS_RCODE_BADVERS = 16,
+ DNS_RCODE_BADSIG = 16, /* duplicate value! */
+ DNS_RCODE_BADKEY = 17,
+ DNS_RCODE_BADTIME = 18,
+ DNS_RCODE_BADMODE = 19,
+ DNS_RCODE_BADNAME = 20,
+ DNS_RCODE_BADALG = 21,
+ DNS_RCODE_BADTRUNC = 22,
+ DNS_RCODE_BADCOOKIE = 23,
+ _DNS_RCODE_MAX_DEFINED,
+ _DNS_RCODE_MAX = 4095 /* 4 bit rcode in the header plus 8 bit rcode in OPT, makes 12 bit */
+};
+
+const char* dns_rcode_to_string(int i) _const_;
+int dns_rcode_from_string(const char *s) _pure_;
+
+const char* dns_protocol_to_string(DnsProtocol p) _const_;
+DnsProtocol dns_protocol_from_string(const char *s) _pure_;
+
+#define LLMNR_MULTICAST_IPV4_ADDRESS ((struct in_addr) { .s_addr = htobe32(224U << 24 | 252U) })
+#define LLMNR_MULTICAST_IPV6_ADDRESS ((struct in6_addr) { .s6_addr = { 0xFF, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x03 } })
+
+#define MDNS_MULTICAST_IPV4_ADDRESS ((struct in_addr) { .s_addr = htobe32(224U << 24 | 251U) })
+#define MDNS_MULTICAST_IPV6_ADDRESS ((struct in6_addr) { .s6_addr = { 0xFF, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfb } })
+
+extern const struct hash_ops dns_packet_hash_ops;
+
+static inline uint64_t SD_RESOLVED_FLAGS_MAKE(DnsProtocol protocol, int family, bool authenticated) {
+ uint64_t f;
+
+ /* Converts a protocol + family into a flags field as used in queries and responses */
+
+ f = authenticated ? SD_RESOLVED_AUTHENTICATED : 0;
+
+ switch (protocol) {
+ case DNS_PROTOCOL_DNS:
+ return f|SD_RESOLVED_DNS;
+
+ case DNS_PROTOCOL_LLMNR:
+ return f|(family == AF_INET6 ? SD_RESOLVED_LLMNR_IPV6 : SD_RESOLVED_LLMNR_IPV4);
+
+ case DNS_PROTOCOL_MDNS:
+ return f|(family == AF_INET6 ? SD_RESOLVED_MDNS_IPV6 : SD_RESOLVED_MDNS_IPV4);
+
+ default:
+ return f;
+ }
+}
+
+static inline size_t dns_packet_size_max(DnsPacket *p) {
+ assert(p);
+
+ /* Why not insist on a fully initialized max_size during DnsPacket construction? Well, this way it's easy to
+ * allocate a transient, throw-away DnsPacket on the stack by simple zero initialization, without having to
+ * deal with explicit field initialization. */
+
+ return p->max_size != 0 ? p->max_size : DNS_PACKET_SIZE_MAX;
+}
diff --git a/src/resolve/resolved-dns-query.c b/src/resolve/resolved-dns-query.c
new file mode 100644
index 0000000..8ee4fd8
--- /dev/null
+++ b/src/resolve/resolved-dns-query.c
@@ -0,0 +1,1041 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "dns-type.h"
+#include "hostname-util.h"
+#include "local-addresses.h"
+#include "resolved-dns-query.h"
+#include "resolved-dns-synthesize.h"
+#include "resolved-etc-hosts.h"
+#include "string-util.h"
+
+#define CNAME_MAX 8
+#define QUERIES_MAX 2048
+#define AUXILIARY_QUERIES_MAX 64
+
+static int dns_query_candidate_new(DnsQueryCandidate **ret, DnsQuery *q, DnsScope *s) {
+ DnsQueryCandidate *c;
+
+ assert(ret);
+ assert(q);
+ assert(s);
+
+ c = new(DnsQueryCandidate, 1);
+ if (!c)
+ return -ENOMEM;
+
+ *c = (DnsQueryCandidate) {
+ .n_ref = 1,
+ .query = q,
+ .scope = s,
+ };
+
+ LIST_PREPEND(candidates_by_query, q->candidates, c);
+ LIST_PREPEND(candidates_by_scope, s->query_candidates, c);
+
+ *ret = c;
+ return 0;
+}
+
+static void dns_query_candidate_stop(DnsQueryCandidate *c) {
+ DnsTransaction *t;
+
+ assert(c);
+
+ while ((t = set_steal_first(c->transactions))) {
+ set_remove(t->notify_query_candidates, c);
+ set_remove(t->notify_query_candidates_done, c);
+ dns_transaction_gc(t);
+ }
+}
+
+static DnsQueryCandidate* dns_query_candidate_free(DnsQueryCandidate *c) {
+ if (!c)
+ return NULL;
+
+ dns_query_candidate_stop(c);
+
+ set_free(c->transactions);
+ dns_search_domain_unref(c->search_domain);
+
+ if (c->query)
+ LIST_REMOVE(candidates_by_query, c->query->candidates, c);
+
+ if (c->scope)
+ LIST_REMOVE(candidates_by_scope, c->scope->query_candidates, c);
+
+ return mfree(c);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(DnsQueryCandidate, dns_query_candidate, dns_query_candidate_free);
+
+static int dns_query_candidate_next_search_domain(DnsQueryCandidate *c) {
+ DnsSearchDomain *next;
+
+ assert(c);
+
+ if (c->search_domain && c->search_domain->linked)
+ next = c->search_domain->domains_next;
+ else
+ next = dns_scope_get_search_domains(c->scope);
+
+ for (;;) {
+ if (!next) /* We hit the end of the list */
+ return 0;
+
+ if (!next->route_only)
+ break;
+
+ /* Skip over route-only domains */
+ next = next->domains_next;
+ }
+
+ dns_search_domain_unref(c->search_domain);
+ c->search_domain = dns_search_domain_ref(next);
+
+ return 1;
+}
+
+static int dns_query_candidate_add_transaction(DnsQueryCandidate *c, DnsResourceKey *key) {
+ _cleanup_(dns_transaction_gcp) DnsTransaction *t = NULL;
+ int r;
+
+ assert(c);
+ assert(key);
+
+ t = dns_scope_find_transaction(c->scope, key, true);
+ if (!t) {
+ r = dns_transaction_new(&t, c->scope, key);
+ if (r < 0)
+ return r;
+ } else if (set_contains(c->transactions, t))
+ return 0;
+
+ r = set_ensure_allocated(&t->notify_query_candidates_done, NULL);
+ if (r < 0)
+ return r;
+
+ r = set_ensure_put(&t->notify_query_candidates, NULL, c);
+ if (r < 0)
+ return r;
+
+ r = set_ensure_put(&c->transactions, NULL, t);
+ if (r < 0) {
+ (void) set_remove(t->notify_query_candidates, c);
+ return r;
+ }
+
+ t->clamp_ttl = c->query->clamp_ttl;
+ TAKE_PTR(t);
+ return 1;
+}
+
+static int dns_query_candidate_go(DnsQueryCandidate *c) {
+ _cleanup_(dns_query_candidate_unrefp) DnsQueryCandidate *keep_c = NULL;
+ DnsTransaction *t;
+ int r;
+ unsigned n = 0;
+
+ assert(c);
+
+ /* Let's keep a reference to the query while we're operating */
+ keep_c = dns_query_candidate_ref(c);
+
+ /* Start the transactions that are not started yet */
+ SET_FOREACH(t, c->transactions) {
+ if (t->state != DNS_TRANSACTION_NULL)
+ continue;
+
+ r = dns_transaction_go(t);
+ if (r < 0)
+ return r;
+
+ n++;
+ }
+
+ /* If there was nothing to start, then let's proceed immediately */
+ if (n == 0)
+ dns_query_candidate_notify(c);
+
+ return 0;
+}
+
+static DnsTransactionState dns_query_candidate_state(DnsQueryCandidate *c) {
+ DnsTransactionState state = DNS_TRANSACTION_NO_SERVERS;
+ DnsTransaction *t;
+
+ assert(c);
+
+ if (c->error_code != 0)
+ return DNS_TRANSACTION_ERRNO;
+
+ SET_FOREACH(t, c->transactions) {
+
+ switch (t->state) {
+
+ case DNS_TRANSACTION_NULL:
+ /* If there's a NULL transaction pending, then
+ * this means not all transactions where
+ * started yet, and we were called from within
+ * the stackframe that is supposed to start
+ * remaining transactions. In this case,
+ * simply claim the candidate is pending. */
+
+ case DNS_TRANSACTION_PENDING:
+ case DNS_TRANSACTION_VALIDATING:
+ /* If there's one transaction currently in
+ * VALIDATING state, then this means there's
+ * also one in PENDING state, hence we can
+ * return PENDING immediately. */
+ return DNS_TRANSACTION_PENDING;
+
+ case DNS_TRANSACTION_SUCCESS:
+ state = t->state;
+ break;
+
+ default:
+ if (state != DNS_TRANSACTION_SUCCESS)
+ state = t->state;
+
+ break;
+ }
+ }
+
+ return state;
+}
+
+static int dns_query_candidate_setup_transactions(DnsQueryCandidate *c) {
+ DnsQuestion *question;
+ DnsResourceKey *key;
+ int n = 0, r;
+
+ assert(c);
+
+ dns_query_candidate_stop(c);
+
+ question = dns_query_question_for_protocol(c->query, c->scope->protocol);
+
+ /* Create one transaction per question key */
+ DNS_QUESTION_FOREACH(key, question) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *new_key = NULL;
+ DnsResourceKey *qkey;
+
+ if (c->search_domain) {
+ r = dns_resource_key_new_append_suffix(&new_key, key, c->search_domain->name);
+ if (r < 0)
+ goto fail;
+
+ qkey = new_key;
+ } else
+ qkey = key;
+
+ if (!dns_scope_good_key(c->scope, qkey))
+ continue;
+
+ r = dns_query_candidate_add_transaction(c, qkey);
+ if (r < 0)
+ goto fail;
+
+ n++;
+ }
+
+ return n;
+
+fail:
+ dns_query_candidate_stop(c);
+ return r;
+}
+
+void dns_query_candidate_notify(DnsQueryCandidate *c) {
+ DnsTransactionState state;
+ int r;
+
+ assert(c);
+
+ state = dns_query_candidate_state(c);
+
+ if (DNS_TRANSACTION_IS_LIVE(state))
+ return;
+
+ if (state != DNS_TRANSACTION_SUCCESS && c->search_domain) {
+
+ r = dns_query_candidate_next_search_domain(c);
+ if (r < 0)
+ goto fail;
+
+ if (r > 0) {
+ /* OK, there's another search domain to try, let's do so. */
+
+ r = dns_query_candidate_setup_transactions(c);
+ if (r < 0)
+ goto fail;
+
+ if (r > 0) {
+ /* New transactions where queued. Start them and wait */
+
+ r = dns_query_candidate_go(c);
+ if (r < 0)
+ goto fail;
+
+ return;
+ }
+ }
+
+ }
+
+ dns_query_ready(c->query);
+ return;
+
+fail:
+ c->error_code = log_warning_errno(r, "Failed to follow search domains: %m");
+ dns_query_ready(c->query);
+}
+
+static void dns_query_stop(DnsQuery *q) {
+ DnsQueryCandidate *c;
+
+ assert(q);
+
+ q->timeout_event_source = sd_event_source_unref(q->timeout_event_source);
+
+ LIST_FOREACH(candidates_by_query, c, q->candidates)
+ dns_query_candidate_stop(c);
+}
+
+static void dns_query_unref_candidates(DnsQuery *q) {
+ assert(q);
+
+ while (q->candidates)
+ dns_query_candidate_unref(q->candidates);
+}
+
+static void dns_query_reset_answer(DnsQuery *q) {
+ assert(q);
+
+ q->answer = dns_answer_unref(q->answer);
+ q->answer_rcode = 0;
+ q->answer_dnssec_result = _DNSSEC_RESULT_INVALID;
+ q->answer_errno = 0;
+ q->answer_authenticated = false;
+ q->answer_protocol = _DNS_PROTOCOL_INVALID;
+ q->answer_family = AF_UNSPEC;
+ q->answer_search_domain = dns_search_domain_unref(q->answer_search_domain);
+}
+
+DnsQuery *dns_query_free(DnsQuery *q) {
+ if (!q)
+ return NULL;
+
+ while (q->auxiliary_queries)
+ dns_query_free(q->auxiliary_queries);
+
+ if (q->auxiliary_for) {
+ assert(q->auxiliary_for->n_auxiliary_queries > 0);
+ q->auxiliary_for->n_auxiliary_queries--;
+ LIST_REMOVE(auxiliary_queries, q->auxiliary_for->auxiliary_queries, q);
+ }
+
+ dns_query_unref_candidates(q);
+
+ dns_question_unref(q->question_idna);
+ dns_question_unref(q->question_utf8);
+
+ dns_query_reset_answer(q);
+
+ sd_bus_message_unref(q->bus_request);
+ sd_bus_track_unref(q->bus_track);
+
+ if (q->varlink_request) {
+ varlink_set_userdata(q->varlink_request, NULL);
+ varlink_unref(q->varlink_request);
+ }
+
+ dns_packet_unref(q->request_dns_packet);
+ dns_packet_unref(q->reply_dns_packet);
+
+ if (q->request_dns_stream) {
+ /* Detach the stream from our query, in case something else keeps a reference to it. */
+ (void) set_remove(q->request_dns_stream->queries, q);
+ q->request_dns_stream = dns_stream_unref(q->request_dns_stream);
+ }
+
+ free(q->request_address_string);
+
+ if (q->manager) {
+ LIST_REMOVE(queries, q->manager->dns_queries, q);
+ q->manager->n_dns_queries--;
+ }
+
+ return mfree(q);
+}
+
+int dns_query_new(
+ Manager *m,
+ DnsQuery **ret,
+ DnsQuestion *question_utf8,
+ DnsQuestion *question_idna,
+ int ifindex,
+ uint64_t flags) {
+
+ _cleanup_(dns_query_freep) DnsQuery *q = NULL;
+ DnsResourceKey *key;
+ bool good = false;
+ int r;
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+
+ assert(m);
+
+ if (dns_question_size(question_utf8) > 0) {
+ r = dns_question_is_valid_for_query(question_utf8);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ good = true;
+ }
+
+ /* If the IDNA and UTF8 questions are the same, merge their references */
+ r = dns_question_is_equal(question_idna, question_utf8);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ question_idna = question_utf8;
+ else {
+ if (dns_question_size(question_idna) > 0) {
+ r = dns_question_is_valid_for_query(question_idna);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ good = true;
+ }
+ }
+
+ if (!good) /* don't allow empty queries */
+ return -EINVAL;
+
+ if (m->n_dns_queries >= QUERIES_MAX)
+ return -EBUSY;
+
+ q = new(DnsQuery, 1);
+ if (!q)
+ return -ENOMEM;
+
+ *q = (DnsQuery) {
+ .question_utf8 = dns_question_ref(question_utf8),
+ .question_idna = dns_question_ref(question_idna),
+ .ifindex = ifindex,
+ .flags = flags,
+ .answer_dnssec_result = _DNSSEC_RESULT_INVALID,
+ .answer_protocol = _DNS_PROTOCOL_INVALID,
+ .answer_family = AF_UNSPEC,
+ };
+
+ /* First dump UTF8 question */
+ DNS_QUESTION_FOREACH(key, question_utf8)
+ log_debug("Looking up RR for %s.",
+ dns_resource_key_to_string(key, key_str, sizeof key_str));
+
+ /* And then dump the IDNA question, but only what hasn't been dumped already through the UTF8 question. */
+ DNS_QUESTION_FOREACH(key, question_idna) {
+ r = dns_question_contains(question_utf8, key);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ log_debug("Looking up IDNA RR for %s.",
+ dns_resource_key_to_string(key, key_str, sizeof key_str));
+ }
+
+ LIST_PREPEND(queries, m->dns_queries, q);
+ m->n_dns_queries++;
+ q->manager = m;
+
+ if (ret)
+ *ret = q;
+ q = NULL;
+
+ return 0;
+}
+
+int dns_query_make_auxiliary(DnsQuery *q, DnsQuery *auxiliary_for) {
+ assert(q);
+ assert(auxiliary_for);
+
+ /* Ensure that the query is not auxiliary yet, and
+ * nothing else is auxiliary to it either */
+ assert(!q->auxiliary_for);
+ assert(!q->auxiliary_queries);
+
+ /* Ensure that the unit we shall be made auxiliary for isn't
+ * auxiliary itself */
+ assert(!auxiliary_for->auxiliary_for);
+
+ if (auxiliary_for->n_auxiliary_queries >= AUXILIARY_QUERIES_MAX)
+ return -EAGAIN;
+
+ LIST_PREPEND(auxiliary_queries, auxiliary_for->auxiliary_queries, q);
+ q->auxiliary_for = auxiliary_for;
+
+ auxiliary_for->n_auxiliary_queries++;
+ return 0;
+}
+
+void dns_query_complete(DnsQuery *q, DnsTransactionState state) {
+ assert(q);
+ assert(!DNS_TRANSACTION_IS_LIVE(state));
+ assert(DNS_TRANSACTION_IS_LIVE(q->state));
+
+ /* Note that this call might invalidate the query. Callers should hence not attempt to access the
+ * query or transaction after calling this function. */
+
+ q->state = state;
+
+ dns_query_stop(q);
+ if (q->complete)
+ q->complete(q);
+}
+
+static int on_query_timeout(sd_event_source *s, usec_t usec, void *userdata) {
+ DnsQuery *q = userdata;
+
+ assert(s);
+ assert(q);
+
+ dns_query_complete(q, DNS_TRANSACTION_TIMEOUT);
+ return 0;
+}
+
+static int dns_query_add_candidate(DnsQuery *q, DnsScope *s) {
+ _cleanup_(dns_query_candidate_unrefp) DnsQueryCandidate *c = NULL;
+ int r;
+
+ assert(q);
+ assert(s);
+
+ r = dns_query_candidate_new(&c, q, s);
+ if (r < 0)
+ return r;
+
+ /* If this a single-label domain on DNS, we might append a suitable search domain first. */
+ if (!FLAGS_SET(q->flags, SD_RESOLVED_NO_SEARCH) &&
+ dns_scope_name_wants_search_domain(s, dns_question_first_name(q->question_idna))) {
+ /* OK, we want a search domain now. Let's find one for this scope */
+
+ r = dns_query_candidate_next_search_domain(c);
+ if (r < 0)
+ return r;
+ }
+
+ r = dns_query_candidate_setup_transactions(c);
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(c);
+ return 0;
+}
+
+static int dns_query_synthesize_reply(DnsQuery *q, DnsTransactionState *state) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ int r;
+
+ assert(q);
+ assert(state);
+
+ /* Tries to synthesize localhost RR replies (and others) where appropriate. Note that this is done *after* the
+ * the normal lookup finished. The data from the network hence takes precedence over the data we
+ * synthesize. (But note that many scopes refuse to resolve certain domain names) */
+
+ if (!IN_SET(*state,
+ DNS_TRANSACTION_RCODE_FAILURE,
+ DNS_TRANSACTION_NO_SERVERS,
+ DNS_TRANSACTION_TIMEOUT,
+ DNS_TRANSACTION_ATTEMPTS_MAX_REACHED,
+ DNS_TRANSACTION_NETWORK_DOWN,
+ DNS_TRANSACTION_NOT_FOUND))
+ return 0;
+
+ r = dns_synthesize_answer(
+ q->manager,
+ q->question_utf8,
+ q->ifindex,
+ &answer);
+ if (r == -ENXIO) {
+ /* If we get ENXIO this tells us to generate NXDOMAIN unconditionally. */
+
+ dns_query_reset_answer(q);
+ q->answer_rcode = DNS_RCODE_NXDOMAIN;
+ q->answer_protocol = dns_synthesize_protocol(q->flags);
+ q->answer_family = dns_synthesize_family(q->flags);
+ q->answer_authenticated = true;
+ *state = DNS_TRANSACTION_RCODE_FAILURE;
+
+ return 0;
+ }
+ if (r <= 0)
+ return r;
+
+ dns_query_reset_answer(q);
+
+ q->answer = TAKE_PTR(answer);
+ q->answer_rcode = DNS_RCODE_SUCCESS;
+ q->answer_protocol = dns_synthesize_protocol(q->flags);
+ q->answer_family = dns_synthesize_family(q->flags);
+ q->answer_authenticated = true;
+
+ *state = DNS_TRANSACTION_SUCCESS;
+
+ return 1;
+}
+
+static int dns_query_try_etc_hosts(DnsQuery *q) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ int r;
+
+ assert(q);
+
+ /* Looks in /etc/hosts for matching entries. Note that this is done *before* the normal lookup is
+ * done. The data from /etc/hosts hence takes precedence over the network. */
+
+ r = manager_etc_hosts_lookup(
+ q->manager,
+ q->question_utf8,
+ &answer);
+ if (r <= 0)
+ return r;
+
+ dns_query_reset_answer(q);
+
+ q->answer = TAKE_PTR(answer);
+ q->answer_rcode = DNS_RCODE_SUCCESS;
+ q->answer_protocol = dns_synthesize_protocol(q->flags);
+ q->answer_family = dns_synthesize_family(q->flags);
+ q->answer_authenticated = true;
+
+ return 1;
+}
+
+int dns_query_go(DnsQuery *q) {
+ DnsScopeMatch found = DNS_SCOPE_NO;
+ DnsScope *s, *first = NULL;
+ DnsQueryCandidate *c;
+ int r;
+
+ assert(q);
+
+ if (q->state != DNS_TRANSACTION_NULL)
+ return 0;
+
+ r = dns_query_try_etc_hosts(q);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ dns_query_complete(q, DNS_TRANSACTION_SUCCESS);
+ return 1;
+ }
+
+ LIST_FOREACH(scopes, s, q->manager->dns_scopes) {
+ DnsScopeMatch match;
+ const char *name;
+
+ name = dns_question_first_name(dns_query_question_for_protocol(q, s->protocol));
+ if (!name)
+ continue;
+
+ match = dns_scope_good_domain(s, q->ifindex, q->flags, name);
+ if (match < 0) {
+ log_debug("Couldn't check if '%s' matches against scope, ignoring.", name);
+ continue;
+ }
+
+ if (match > found) { /* Does this match better? If so, remember how well it matched, and the first one
+ * that matches this well */
+ found = match;
+ first = s;
+ }
+ }
+
+ if (found == DNS_SCOPE_NO) {
+ DnsTransactionState state = DNS_TRANSACTION_NO_SERVERS;
+
+ r = dns_query_synthesize_reply(q, &state);
+ if (r < 0)
+ return r;
+
+ dns_query_complete(q, state);
+ return 1;
+ }
+
+ r = dns_query_add_candidate(q, first);
+ if (r < 0)
+ goto fail;
+
+ LIST_FOREACH(scopes, s, first->scopes_next) {
+ DnsScopeMatch match;
+ const char *name;
+
+ name = dns_question_first_name(dns_query_question_for_protocol(q, s->protocol));
+ if (!name)
+ continue;
+
+ match = dns_scope_good_domain(s, q->ifindex, q->flags, name);
+ if (match < 0) {
+ log_debug("Couldn't check if '%s' matches against scope, ignoring.", name);
+ continue;
+ }
+
+ if (match < found)
+ continue;
+
+ r = dns_query_add_candidate(q, s);
+ if (r < 0)
+ goto fail;
+ }
+
+ dns_query_reset_answer(q);
+
+ r = sd_event_add_time_relative(
+ q->manager->event,
+ &q->timeout_event_source,
+ clock_boottime_or_monotonic(),
+ SD_RESOLVED_QUERY_TIMEOUT_USEC,
+ 0, on_query_timeout, q);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(q->timeout_event_source, "query-timeout");
+
+ q->state = DNS_TRANSACTION_PENDING;
+ q->block_ready++;
+
+ /* Start the transactions */
+ LIST_FOREACH(candidates_by_query, c, q->candidates) {
+ r = dns_query_candidate_go(c);
+ if (r < 0) {
+ q->block_ready--;
+ goto fail;
+ }
+ }
+
+ q->block_ready--;
+ dns_query_ready(q);
+
+ return 1;
+
+fail:
+ dns_query_stop(q);
+ return r;
+}
+
+static void dns_query_accept(DnsQuery *q, DnsQueryCandidate *c) {
+ DnsTransactionState state = DNS_TRANSACTION_NO_SERVERS;
+ bool has_authenticated = false, has_non_authenticated = false;
+ DnssecResult dnssec_result_authenticated = _DNSSEC_RESULT_INVALID, dnssec_result_non_authenticated = _DNSSEC_RESULT_INVALID;
+ DnsTransaction *t;
+ int r;
+
+ assert(q);
+
+ if (!c) {
+ r = dns_query_synthesize_reply(q, &state);
+ if (r < 0)
+ goto fail;
+
+ dns_query_complete(q, state);
+ return;
+ }
+
+ if (c->error_code != 0) {
+ /* If the candidate had an error condition of its own, start with that. */
+ state = DNS_TRANSACTION_ERRNO;
+ q->answer = dns_answer_unref(q->answer);
+ q->answer_rcode = 0;
+ q->answer_dnssec_result = _DNSSEC_RESULT_INVALID;
+ q->answer_authenticated = false;
+ q->answer_errno = c->error_code;
+ }
+
+ SET_FOREACH(t, c->transactions) {
+
+ switch (t->state) {
+
+ case DNS_TRANSACTION_SUCCESS: {
+ /* We found a successfully reply, merge it into the answer */
+ r = dns_answer_extend(&q->answer, t->answer);
+ if (r < 0)
+ goto fail;
+
+ q->answer_rcode = t->answer_rcode;
+ q->answer_errno = 0;
+
+ if (t->answer_authenticated) {
+ has_authenticated = true;
+ dnssec_result_authenticated = t->answer_dnssec_result;
+ } else {
+ has_non_authenticated = true;
+ dnssec_result_non_authenticated = t->answer_dnssec_result;
+ }
+
+ state = DNS_TRANSACTION_SUCCESS;
+ break;
+ }
+
+ case DNS_TRANSACTION_NULL:
+ case DNS_TRANSACTION_PENDING:
+ case DNS_TRANSACTION_VALIDATING:
+ case DNS_TRANSACTION_ABORTED:
+ /* Ignore transactions that didn't complete */
+ continue;
+
+ default:
+ /* Any kind of failure? Store the data away, if there's nothing stored yet. */
+ if (state == DNS_TRANSACTION_SUCCESS)
+ continue;
+
+ /* If there's already an authenticated negative reply stored, then prefer that over any unauthenticated one */
+ if (q->answer_authenticated && !t->answer_authenticated)
+ continue;
+
+ q->answer = dns_answer_unref(q->answer);
+ q->answer_rcode = t->answer_rcode;
+ q->answer_dnssec_result = t->answer_dnssec_result;
+ q->answer_authenticated = t->answer_authenticated;
+ q->answer_errno = t->answer_errno;
+
+ state = t->state;
+ break;
+ }
+ }
+
+ if (state == DNS_TRANSACTION_SUCCESS) {
+ q->answer_authenticated = has_authenticated && !has_non_authenticated;
+ q->answer_dnssec_result = q->answer_authenticated ? dnssec_result_authenticated : dnssec_result_non_authenticated;
+ }
+
+ q->answer_protocol = c->scope->protocol;
+ q->answer_family = c->scope->family;
+
+ dns_search_domain_unref(q->answer_search_domain);
+ q->answer_search_domain = dns_search_domain_ref(c->search_domain);
+
+ r = dns_query_synthesize_reply(q, &state);
+ if (r < 0)
+ goto fail;
+
+ dns_query_complete(q, state);
+ return;
+
+fail:
+ q->answer_errno = -r;
+ dns_query_complete(q, DNS_TRANSACTION_ERRNO);
+}
+
+void dns_query_ready(DnsQuery *q) {
+
+ DnsQueryCandidate *bad = NULL, *c;
+ bool pending = false;
+
+ assert(q);
+ assert(DNS_TRANSACTION_IS_LIVE(q->state));
+
+ /* Note that this call might invalidate the query. Callers
+ * should hence not attempt to access the query or transaction
+ * after calling this function, unless the block_ready
+ * counter was explicitly bumped before doing so. */
+
+ if (q->block_ready > 0)
+ return;
+
+ LIST_FOREACH(candidates_by_query, c, q->candidates) {
+ DnsTransactionState state;
+
+ state = dns_query_candidate_state(c);
+ switch (state) {
+
+ case DNS_TRANSACTION_SUCCESS:
+ /* One of the candidates is successful,
+ * let's use it, and copy its data out */
+ dns_query_accept(q, c);
+ return;
+
+ case DNS_TRANSACTION_NULL:
+ case DNS_TRANSACTION_PENDING:
+ case DNS_TRANSACTION_VALIDATING:
+ /* One of the candidates is still going on,
+ * let's maybe wait for it */
+ pending = true;
+ break;
+
+ default:
+ /* Any kind of failure */
+ bad = c;
+ break;
+ }
+ }
+
+ if (pending)
+ return;
+
+ dns_query_accept(q, bad);
+}
+
+static int dns_query_cname_redirect(DnsQuery *q, const DnsResourceRecord *cname) {
+ _cleanup_(dns_question_unrefp) DnsQuestion *nq_idna = NULL, *nq_utf8 = NULL;
+ int r, k;
+
+ assert(q);
+
+ q->n_cname_redirects++;
+ if (q->n_cname_redirects > CNAME_MAX)
+ return -ELOOP;
+
+ r = dns_question_cname_redirect(q->question_idna, cname, &nq_idna);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ log_debug("Following CNAME/DNAME %s → %s.", dns_question_first_name(q->question_idna), dns_question_first_name(nq_idna));
+
+ k = dns_question_is_equal(q->question_idna, q->question_utf8);
+ if (k < 0)
+ return r;
+ if (k > 0) {
+ /* Same question? Shortcut new question generation */
+ nq_utf8 = dns_question_ref(nq_idna);
+ k = r;
+ } else {
+ k = dns_question_cname_redirect(q->question_utf8, cname, &nq_utf8);
+ if (k < 0)
+ return k;
+ else if (k > 0)
+ log_debug("Following UTF8 CNAME/DNAME %s → %s.", dns_question_first_name(q->question_utf8), dns_question_first_name(nq_utf8));
+ }
+
+ if (r == 0 && k == 0) /* No actual cname happened? */
+ return -ELOOP;
+
+ if (q->answer_protocol == DNS_PROTOCOL_DNS)
+ /* Don't permit CNAME redirects from unicast DNS to LLMNR or MulticastDNS, so that global resources
+ * cannot invade the local namespace. The opposite way we permit: local names may redirect to global
+ * ones. */
+ q->flags &= ~(SD_RESOLVED_LLMNR|SD_RESOLVED_MDNS); /* mask away the local protocols */
+
+ /* Turn off searching for the new name */
+ q->flags |= SD_RESOLVED_NO_SEARCH;
+
+ dns_question_unref(q->question_idna);
+ q->question_idna = TAKE_PTR(nq_idna);
+
+ dns_question_unref(q->question_utf8);
+ q->question_utf8 = TAKE_PTR(nq_utf8);
+
+ dns_query_unref_candidates(q);
+ dns_query_reset_answer(q);
+
+ q->state = DNS_TRANSACTION_NULL;
+
+ return 0;
+}
+
+int dns_query_process_cname(DnsQuery *q) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *cname = NULL;
+ DnsQuestion *question;
+ DnsResourceRecord *rr;
+ int r;
+
+ assert(q);
+
+ if (!IN_SET(q->state, DNS_TRANSACTION_SUCCESS, DNS_TRANSACTION_NULL))
+ return DNS_QUERY_NOMATCH;
+
+ question = dns_query_question_for_protocol(q, q->answer_protocol);
+
+ DNS_ANSWER_FOREACH(rr, q->answer) {
+ r = dns_question_matches_rr(question, rr, DNS_SEARCH_DOMAIN_NAME(q->answer_search_domain));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return DNS_QUERY_MATCH; /* The answer matches directly, no need to follow cnames */
+
+ r = dns_question_matches_cname_or_dname(question, rr, DNS_SEARCH_DOMAIN_NAME(q->answer_search_domain));
+ if (r < 0)
+ return r;
+ if (r > 0 && !cname)
+ cname = dns_resource_record_ref(rr);
+ }
+
+ if (!cname)
+ return DNS_QUERY_NOMATCH; /* No match and no cname to follow */
+
+ if (q->flags & SD_RESOLVED_NO_CNAME)
+ return -ELOOP;
+
+ if (!q->answer_authenticated)
+ q->previous_redirect_unauthenticated = true;
+
+ /* OK, let's actually follow the CNAME */
+ r = dns_query_cname_redirect(q, cname);
+ if (r < 0)
+ return r;
+
+ /* Let's see if the answer can already answer the new
+ * redirected question */
+ r = dns_query_process_cname(q);
+ if (r != DNS_QUERY_NOMATCH)
+ return r;
+
+ /* OK, it cannot, let's begin with the new query */
+ r = dns_query_go(q);
+ if (r < 0)
+ return r;
+
+ return DNS_QUERY_RESTARTED; /* We restarted the query for a new cname */
+}
+
+DnsQuestion* dns_query_question_for_protocol(DnsQuery *q, DnsProtocol protocol) {
+ assert(q);
+
+ switch (protocol) {
+
+ case DNS_PROTOCOL_DNS:
+ return q->question_idna;
+
+ case DNS_PROTOCOL_MDNS:
+ case DNS_PROTOCOL_LLMNR:
+ return q->question_utf8;
+
+ default:
+ return NULL;
+ }
+}
+
+const char *dns_query_string(DnsQuery *q) {
+ const char *name;
+ int r;
+
+ /* Returns a somewhat useful human-readable lookup key string for this query */
+
+ if (q->request_address_string)
+ return q->request_address_string;
+
+ if (q->request_address_valid) {
+ r = in_addr_to_string(q->request_family, &q->request_address, &q->request_address_string);
+ if (r >= 0)
+ return q->request_address_string;
+ }
+
+ name = dns_question_first_name(q->question_utf8);
+ if (name)
+ return name;
+
+ return dns_question_first_name(q->question_idna);
+}
+
+bool dns_query_fully_authenticated(DnsQuery *q) {
+ assert(q);
+
+ return q->answer_authenticated && !q->previous_redirect_unauthenticated;
+}
diff --git a/src/resolve/resolved-dns-query.h b/src/resolve/resolved-dns-query.h
new file mode 100644
index 0000000..133076d
--- /dev/null
+++ b/src/resolve/resolved-dns-query.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "set.h"
+#include "varlink.h"
+
+typedef struct DnsQueryCandidate DnsQueryCandidate;
+typedef struct DnsQuery DnsQuery;
+typedef struct DnsStubListenerExtra DnsStubListenerExtra;
+
+#include "resolved-dns-answer.h"
+#include "resolved-dns-question.h"
+#include "resolved-dns-search-domain.h"
+#include "resolved-dns-transaction.h"
+
+struct DnsQueryCandidate {
+ unsigned n_ref;
+ int error_code;
+
+ DnsQuery *query;
+ DnsScope *scope;
+
+ DnsSearchDomain *search_domain;
+
+ Set *transactions;
+
+ LIST_FIELDS(DnsQueryCandidate, candidates_by_query);
+ LIST_FIELDS(DnsQueryCandidate, candidates_by_scope);
+};
+
+struct DnsQuery {
+ Manager *manager;
+
+ /* When resolving a service, we first create a TXT+SRV query, and then for the hostnames we discover
+ * auxiliary A+AAAA queries. This pointer always points from the auxiliary queries back to the
+ * TXT+SRV query. */
+ DnsQuery *auxiliary_for;
+ LIST_HEAD(DnsQuery, auxiliary_queries);
+ unsigned n_auxiliary_queries;
+ int auxiliary_result;
+
+ /* The question, formatted in IDNA for use on classic DNS, and as UTF8 for use in LLMNR or mDNS. Note
+ * that even on classic DNS some labels might use UTF8 encoding. Specifically, DNS-SD service names
+ * (in contrast to their domain suffixes) use UTF-8 encoding even on DNS. Thus, the difference
+ * between these two fields is mostly relevant only for explicit *hostname* lookups as well as the
+ * domain suffixes of service lookups. */
+ DnsQuestion *question_idna;
+ DnsQuestion *question_utf8;
+
+ uint64_t flags;
+ int ifindex;
+
+ /* If true, the RR TTLs of the answer will be clamped by their current left validity in the cache */
+ bool clamp_ttl;
+
+ DnsTransactionState state;
+ unsigned n_cname_redirects;
+
+ LIST_HEAD(DnsQueryCandidate, candidates);
+ sd_event_source *timeout_event_source;
+
+ /* Discovered data */
+ DnsAnswer *answer;
+ int answer_rcode;
+ DnssecResult answer_dnssec_result;
+ bool answer_authenticated;
+ DnsProtocol answer_protocol;
+ int answer_family;
+ DnsSearchDomain *answer_search_domain;
+ int answer_errno; /* if state is DNS_TRANSACTION_ERRNO */
+ bool previous_redirect_unauthenticated;
+
+ /* Bus + Varlink client information */
+ sd_bus_message *bus_request;
+ Varlink *varlink_request;
+ int request_family;
+ bool request_address_valid;
+ union in_addr_union request_address;
+ unsigned block_all_complete;
+ char *request_address_string;
+
+ /* DNS stub information */
+ DnsPacket *request_dns_packet;
+ DnsStream *request_dns_stream;
+ DnsPacket *reply_dns_packet;
+ DnsStubListenerExtra *stub_listener_extra;
+
+ /* Completion callback */
+ void (*complete)(DnsQuery* q);
+ unsigned block_ready;
+
+ sd_bus_track *bus_track;
+
+ LIST_FIELDS(DnsQuery, queries);
+ LIST_FIELDS(DnsQuery, auxiliary_queries);
+};
+
+enum {
+ DNS_QUERY_MATCH,
+ DNS_QUERY_NOMATCH,
+ DNS_QUERY_RESTARTED,
+};
+
+DnsQueryCandidate* dns_query_candidate_ref(DnsQueryCandidate*);
+DnsQueryCandidate* dns_query_candidate_unref(DnsQueryCandidate*);
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsQueryCandidate*, dns_query_candidate_unref);
+
+void dns_query_candidate_notify(DnsQueryCandidate *c);
+
+int dns_query_new(Manager *m, DnsQuery **q, DnsQuestion *question_utf8, DnsQuestion *question_idna, int family, uint64_t flags);
+DnsQuery *dns_query_free(DnsQuery *q);
+
+int dns_query_make_auxiliary(DnsQuery *q, DnsQuery *auxiliary_for);
+
+int dns_query_go(DnsQuery *q);
+void dns_query_ready(DnsQuery *q);
+
+int dns_query_process_cname(DnsQuery *q);
+
+void dns_query_complete(DnsQuery *q, DnsTransactionState state);
+
+DnsQuestion* dns_query_question_for_protocol(DnsQuery *q, DnsProtocol protocol);
+
+const char *dns_query_string(DnsQuery *q);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsQuery*, dns_query_free);
+
+bool dns_query_fully_authenticated(DnsQuery *q);
diff --git a/src/resolve/resolved-dns-question.c b/src/resolve/resolved-dns-question.c
new file mode 100644
index 0000000..0471708
--- /dev/null
+++ b/src/resolve/resolved-dns-question.c
@@ -0,0 +1,447 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "dns-type.h"
+#include "resolved-dns-question.h"
+
+DnsQuestion *dns_question_new(size_t n) {
+ DnsQuestion *q;
+
+ if (n > UINT16_MAX) /* We can only place 64K key in an question section at max */
+ n = UINT16_MAX;
+
+ q = malloc0(offsetof(DnsQuestion, keys) + sizeof(DnsResourceKey*) * n);
+ if (!q)
+ return NULL;
+
+ q->n_ref = 1;
+ q->n_allocated = n;
+
+ return q;
+}
+
+static DnsQuestion *dns_question_free(DnsQuestion *q) {
+ size_t i;
+
+ assert(q);
+
+ for (i = 0; i < q->n_keys; i++)
+ dns_resource_key_unref(q->keys[i]);
+ return mfree(q);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsQuestion, dns_question, dns_question_free);
+
+int dns_question_add_raw(DnsQuestion *q, DnsResourceKey *key) {
+ /* Insert without checking for duplicates. */
+
+ assert(key);
+ assert(q);
+
+ if (q->n_keys >= q->n_allocated)
+ return -ENOSPC;
+
+ q->keys[q->n_keys++] = dns_resource_key_ref(key);
+ return 0;
+}
+
+int dns_question_add(DnsQuestion *q, DnsResourceKey *key) {
+ int r;
+
+ assert(key);
+
+ if (!q)
+ return -ENOSPC;
+
+ for (size_t i = 0; i < q->n_keys; i++) {
+ r = dns_resource_key_equal(q->keys[i], key);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+ }
+
+ return dns_question_add_raw(q, key);
+}
+
+int dns_question_matches_rr(DnsQuestion *q, DnsResourceRecord *rr, const char *search_domain) {
+ size_t i;
+ int r;
+
+ assert(rr);
+
+ if (!q)
+ return 0;
+
+ for (i = 0; i < q->n_keys; i++) {
+ r = dns_resource_key_match_rr(q->keys[i], rr, search_domain);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int dns_question_matches_cname_or_dname(DnsQuestion *q, DnsResourceRecord *rr, const char *search_domain) {
+ size_t i;
+ int r;
+
+ assert(rr);
+
+ if (!q)
+ return 0;
+
+ if (!IN_SET(rr->key->type, DNS_TYPE_CNAME, DNS_TYPE_DNAME))
+ return 0;
+
+ for (i = 0; i < q->n_keys; i++) {
+ /* For a {C,D}NAME record we can never find a matching {C,D}NAME record */
+ if (!dns_type_may_redirect(q->keys[i]->type))
+ return 0;
+
+ r = dns_resource_key_match_cname_or_dname(q->keys[i], rr->key, search_domain);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int dns_question_is_valid_for_query(DnsQuestion *q) {
+ const char *name;
+ size_t i;
+ int r;
+
+ if (!q)
+ return 0;
+
+ if (q->n_keys <= 0)
+ return 0;
+
+ if (q->n_keys > 65535)
+ return 0;
+
+ name = dns_resource_key_name(q->keys[0]);
+ if (!name)
+ return 0;
+
+ /* Check that all keys in this question bear the same name */
+ for (i = 0; i < q->n_keys; i++) {
+ assert(q->keys[i]);
+
+ if (i > 0) {
+ r = dns_name_equal(dns_resource_key_name(q->keys[i]), name);
+ if (r <= 0)
+ return r;
+ }
+
+ if (!dns_type_is_valid_query(q->keys[i]->type))
+ return 0;
+ }
+
+ return 1;
+}
+
+int dns_question_contains(DnsQuestion *a, const DnsResourceKey *k) {
+ size_t j;
+ int r;
+
+ assert(k);
+
+ if (!a)
+ return 0;
+
+ for (j = 0; j < a->n_keys; j++) {
+ r = dns_resource_key_equal(a->keys[j], k);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int dns_question_is_equal(DnsQuestion *a, DnsQuestion *b) {
+ size_t j;
+ int r;
+
+ if (a == b)
+ return 1;
+
+ if (!a)
+ return !b || b->n_keys == 0;
+ if (!b)
+ return a->n_keys == 0;
+
+ /* Checks if all keys in a are also contained b, and vice versa */
+
+ for (j = 0; j < a->n_keys; j++) {
+ r = dns_question_contains(b, a->keys[j]);
+ if (r <= 0)
+ return r;
+ }
+
+ for (j = 0; j < b->n_keys; j++) {
+ r = dns_question_contains(a, b->keys[j]);
+ if (r <= 0)
+ return r;
+ }
+
+ return 1;
+}
+
+int dns_question_cname_redirect(DnsQuestion *q, const DnsResourceRecord *cname, DnsQuestion **ret) {
+ _cleanup_(dns_question_unrefp) DnsQuestion *n = NULL;
+ DnsResourceKey *key;
+ bool same = true;
+ int r;
+
+ assert(cname);
+ assert(ret);
+ assert(IN_SET(cname->key->type, DNS_TYPE_CNAME, DNS_TYPE_DNAME));
+
+ if (dns_question_size(q) <= 0) {
+ *ret = NULL;
+ return 0;
+ }
+
+ DNS_QUESTION_FOREACH(key, q) {
+ _cleanup_free_ char *destination = NULL;
+ const char *d;
+
+ if (cname->key->type == DNS_TYPE_CNAME)
+ d = cname->cname.name;
+ else {
+ r = dns_name_change_suffix(dns_resource_key_name(key), dns_resource_key_name(cname->key), cname->dname.name, &destination);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ d = destination;
+ }
+
+ r = dns_name_equal(dns_resource_key_name(key), d);
+ if (r < 0)
+ return r;
+
+ if (r == 0) {
+ same = false;
+ break;
+ }
+ }
+
+ /* Fully the same, indicate we didn't do a thing */
+ if (same) {
+ *ret = NULL;
+ return 0;
+ }
+
+ n = dns_question_new(q->n_keys);
+ if (!n)
+ return -ENOMEM;
+
+ /* Create a new question, and patch in the new name */
+ DNS_QUESTION_FOREACH(key, q) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *k = NULL;
+
+ k = dns_resource_key_new_redirect(key, cname);
+ if (!k)
+ return -ENOMEM;
+
+ r = dns_question_add(n, k);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(n);
+
+ return 1;
+}
+
+const char *dns_question_first_name(DnsQuestion *q) {
+
+ if (!q)
+ return NULL;
+
+ if (q->n_keys < 1)
+ return NULL;
+
+ return dns_resource_key_name(q->keys[0]);
+}
+
+int dns_question_new_address(DnsQuestion **ret, int family, const char *name, bool convert_idna) {
+ _cleanup_(dns_question_unrefp) DnsQuestion *q = NULL;
+ _cleanup_free_ char *buf = NULL;
+ int r;
+
+ assert(ret);
+ assert(name);
+
+ if (!IN_SET(family, AF_INET, AF_INET6, AF_UNSPEC))
+ return -EAFNOSUPPORT;
+
+ if (convert_idna) {
+ r = dns_name_apply_idna(name, &buf);
+ if (r < 0)
+ return r;
+ if (r > 0 && !streq(name, buf))
+ name = buf;
+ else
+ /* We did not manage to create convert the idna name, or it's
+ * the same as the original name. We assume the caller already
+ * created an unconverted question, so let's not repeat work
+ * unnecessarily. */
+ return -EALREADY;
+ }
+
+ q = dns_question_new(family == AF_UNSPEC ? 2 : 1);
+ if (!q)
+ return -ENOMEM;
+
+ if (family != AF_INET6) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+
+ key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_A, name);
+ if (!key)
+ return -ENOMEM;
+
+ r = dns_question_add(q, key);
+ if (r < 0)
+ return r;
+ }
+
+ if (family != AF_INET) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+
+ key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_AAAA, name);
+ if (!key)
+ return -ENOMEM;
+
+ r = dns_question_add(q, key);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(q);
+
+ return 0;
+}
+
+int dns_question_new_reverse(DnsQuestion **ret, int family, const union in_addr_union *a) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ _cleanup_(dns_question_unrefp) DnsQuestion *q = NULL;
+ _cleanup_free_ char *reverse = NULL;
+ int r;
+
+ assert(ret);
+ assert(a);
+
+ if (!IN_SET(family, AF_INET, AF_INET6, AF_UNSPEC))
+ return -EAFNOSUPPORT;
+
+ r = dns_name_reverse(family, a, &reverse);
+ if (r < 0)
+ return r;
+
+ q = dns_question_new(1);
+ if (!q)
+ return -ENOMEM;
+
+ key = dns_resource_key_new_consume(DNS_CLASS_IN, DNS_TYPE_PTR, reverse);
+ if (!key)
+ return -ENOMEM;
+
+ reverse = NULL;
+
+ r = dns_question_add(q, key);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(q);
+
+ return 0;
+}
+
+int dns_question_new_service(
+ DnsQuestion **ret,
+ const char *service,
+ const char *type,
+ const char *domain,
+ bool with_txt,
+ bool convert_idna) {
+
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ _cleanup_(dns_question_unrefp) DnsQuestion *q = NULL;
+ _cleanup_free_ char *buf = NULL, *joined = NULL;
+ const char *name;
+ int r;
+
+ assert(ret);
+
+ /* We support three modes of invocation:
+ *
+ * 1. Only a domain is specified, in which case we assume a properly encoded SRV RR name, including service
+ * type and possibly a service name. If specified in this way we assume it's already IDNA converted if
+ * that's necessary.
+ *
+ * 2. Both service type and a domain specified, in which case a normal SRV RR is assumed, without a DNS-SD
+ * style prefix. In this case we'll IDNA convert the domain, if that's requested.
+ *
+ * 3. All three of service name, type and domain are specified, in which case a DNS-SD service is put
+ * together. The service name is never IDNA converted, and the domain is if requested.
+ *
+ * It's not supported to specify a service name without a type, or no domain name.
+ */
+
+ if (!domain)
+ return -EINVAL;
+
+ if (type) {
+ if (convert_idna) {
+ r = dns_name_apply_idna(domain, &buf);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ domain = buf;
+ }
+
+ r = dns_service_join(service, type, domain, &joined);
+ if (r < 0)
+ return r;
+
+ name = joined;
+ } else {
+ if (service)
+ return -EINVAL;
+
+ name = domain;
+ }
+
+ q = dns_question_new(1 + with_txt);
+ if (!q)
+ return -ENOMEM;
+
+ key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_SRV, name);
+ if (!key)
+ return -ENOMEM;
+
+ r = dns_question_add(q, key);
+ if (r < 0)
+ return r;
+
+ if (with_txt) {
+ dns_resource_key_unref(key);
+ key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_TXT, name);
+ if (!key)
+ return -ENOMEM;
+
+ r = dns_question_add(q, key);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(q);
+
+ return 0;
+}
diff --git a/src/resolve/resolved-dns-question.h b/src/resolve/resolved-dns-question.h
new file mode 100644
index 0000000..a6444b0
--- /dev/null
+++ b/src/resolve/resolved-dns-question.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct DnsQuestion DnsQuestion;
+
+#include "macro.h"
+#include "resolved-dns-rr.h"
+
+/* A simple array of resource keys */
+
+struct DnsQuestion {
+ unsigned n_ref;
+ size_t n_keys, n_allocated;
+ DnsResourceKey* keys[0];
+};
+
+DnsQuestion *dns_question_new(size_t n);
+DnsQuestion *dns_question_ref(DnsQuestion *q);
+DnsQuestion *dns_question_unref(DnsQuestion *q);
+
+int dns_question_new_address(DnsQuestion **ret, int family, const char *name, bool convert_idna);
+int dns_question_new_reverse(DnsQuestion **ret, int family, const union in_addr_union *a);
+int dns_question_new_service(DnsQuestion **ret, const char *service, const char *type, const char *domain, bool with_txt, bool convert_idna);
+
+int dns_question_add_raw(DnsQuestion *q, DnsResourceKey *key);
+int dns_question_add(DnsQuestion *q, DnsResourceKey *key);
+
+int dns_question_matches_rr(DnsQuestion *q, DnsResourceRecord *rr, const char *search_domain);
+int dns_question_matches_cname_or_dname(DnsQuestion *q, DnsResourceRecord *rr, const char* search_domain);
+int dns_question_is_valid_for_query(DnsQuestion *q);
+int dns_question_contains(DnsQuestion *a, const DnsResourceKey *k);
+int dns_question_is_equal(DnsQuestion *a, DnsQuestion *b);
+
+int dns_question_cname_redirect(DnsQuestion *q, const DnsResourceRecord *cname, DnsQuestion **ret);
+
+const char *dns_question_first_name(DnsQuestion *q);
+
+static inline size_t dns_question_size(DnsQuestion *q) {
+ return q ? q->n_keys : 0;
+}
+
+static inline bool dns_question_isempty(DnsQuestion *q) {
+ return dns_question_size(q) <= 0;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsQuestion*, dns_question_unref);
+
+#define _DNS_QUESTION_FOREACH(u, key, q) \
+ for (size_t UNIQ_T(i, u) = ({ \
+ (key) = ((q) && (q)->n_keys > 0) ? (q)->keys[0] : NULL; \
+ 0; \
+ }); \
+ (q) && (UNIQ_T(i, u) < (q)->n_keys); \
+ UNIQ_T(i, u)++, (key) = (UNIQ_T(i, u) < (q)->n_keys ? (q)->keys[UNIQ_T(i, u)] : NULL))
+
+#define DNS_QUESTION_FOREACH(key, q) _DNS_QUESTION_FOREACH(UNIQ, key, q)
diff --git a/src/resolve/resolved-dns-rr.c b/src/resolve/resolved-dns-rr.c
new file mode 100644
index 0000000..52c76ee
--- /dev/null
+++ b/src/resolve/resolved-dns-rr.c
@@ -0,0 +1,1824 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <math.h>
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "dns-type.h"
+#include "escape.h"
+#include "hexdecoct.h"
+#include "memory-util.h"
+#include "resolved-dns-dnssec.h"
+#include "resolved-dns-packet.h"
+#include "resolved-dns-rr.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+
+DnsResourceKey* dns_resource_key_new(uint16_t class, uint16_t type, const char *name) {
+ DnsResourceKey *k;
+ size_t l;
+
+ assert(name);
+
+ l = strlen(name);
+ k = malloc0(sizeof(DnsResourceKey) + l + 1);
+ if (!k)
+ return NULL;
+
+ k->n_ref = 1;
+ k->class = class;
+ k->type = type;
+
+ strcpy((char*) k + sizeof(DnsResourceKey), name);
+
+ return k;
+}
+
+DnsResourceKey* dns_resource_key_new_redirect(const DnsResourceKey *key, const DnsResourceRecord *cname) {
+ int r;
+
+ assert(key);
+ assert(cname);
+
+ assert(IN_SET(cname->key->type, DNS_TYPE_CNAME, DNS_TYPE_DNAME));
+
+ if (cname->key->type == DNS_TYPE_CNAME)
+ return dns_resource_key_new(key->class, key->type, cname->cname.name);
+ else {
+ DnsResourceKey *k;
+ char *destination = NULL;
+
+ r = dns_name_change_suffix(dns_resource_key_name(key), dns_resource_key_name(cname->key), cname->dname.name, &destination);
+ if (r < 0)
+ return NULL;
+ if (r == 0)
+ return dns_resource_key_ref((DnsResourceKey*) key);
+
+ k = dns_resource_key_new_consume(key->class, key->type, destination);
+ if (!k)
+ return mfree(destination);
+
+ return k;
+ }
+}
+
+int dns_resource_key_new_append_suffix(DnsResourceKey **ret, DnsResourceKey *key, char *name) {
+ DnsResourceKey *new_key;
+ char *joined;
+ int r;
+
+ assert(ret);
+ assert(key);
+ assert(name);
+
+ if (dns_name_is_root(name)) {
+ *ret = dns_resource_key_ref(key);
+ return 0;
+ }
+
+ r = dns_name_concat(dns_resource_key_name(key), name, 0, &joined);
+ if (r < 0)
+ return r;
+
+ new_key = dns_resource_key_new_consume(key->class, key->type, joined);
+ if (!new_key) {
+ free(joined);
+ return -ENOMEM;
+ }
+
+ *ret = new_key;
+ return 0;
+}
+
+DnsResourceKey* dns_resource_key_new_consume(uint16_t class, uint16_t type, char *name) {
+ DnsResourceKey *k;
+
+ assert(name);
+
+ k = new(DnsResourceKey, 1);
+ if (!k)
+ return NULL;
+
+ *k = (DnsResourceKey) {
+ .n_ref = 1,
+ .class = class,
+ .type = type,
+ ._name = name,
+ };
+
+ return k;
+}
+
+DnsResourceKey* dns_resource_key_ref(DnsResourceKey *k) {
+
+ if (!k)
+ return NULL;
+
+ /* Static/const keys created with DNS_RESOURCE_KEY_CONST will
+ * set this to -1, they should not be reffed/unreffed */
+ assert(k->n_ref != (unsigned) -1);
+
+ assert(k->n_ref > 0);
+ k->n_ref++;
+
+ return k;
+}
+
+DnsResourceKey* dns_resource_key_unref(DnsResourceKey *k) {
+ if (!k)
+ return NULL;
+
+ assert(k->n_ref != (unsigned) -1);
+ assert(k->n_ref > 0);
+
+ if (k->n_ref == 1) {
+ free(k->_name);
+ free(k);
+ } else
+ k->n_ref--;
+
+ return NULL;
+}
+
+const char* dns_resource_key_name(const DnsResourceKey *key) {
+ const char *name;
+
+ if (!key)
+ return NULL;
+
+ if (key->_name)
+ name = key->_name;
+ else
+ name = (char*) key + sizeof(DnsResourceKey);
+
+ if (dns_name_is_root(name))
+ return ".";
+ else
+ return name;
+}
+
+bool dns_resource_key_is_address(const DnsResourceKey *key) {
+ assert(key);
+
+ /* Check if this is an A or AAAA resource key */
+
+ return key->class == DNS_CLASS_IN && IN_SET(key->type, DNS_TYPE_A, DNS_TYPE_AAAA);
+}
+
+bool dns_resource_key_is_dnssd_ptr(const DnsResourceKey *key) {
+ assert(key);
+
+ /* Check if this is a PTR resource key used in
+ Service Instance Enumeration as described in RFC6763 p4.1. */
+
+ if (key->type != DNS_TYPE_PTR)
+ return false;
+
+ return dns_name_endswith(dns_resource_key_name(key), "_tcp.local") ||
+ dns_name_endswith(dns_resource_key_name(key), "_udp.local");
+}
+
+int dns_resource_key_equal(const DnsResourceKey *a, const DnsResourceKey *b) {
+ int r;
+
+ if (a == b)
+ return 1;
+
+ r = dns_name_equal(dns_resource_key_name(a), dns_resource_key_name(b));
+ if (r <= 0)
+ return r;
+
+ if (a->class != b->class)
+ return 0;
+
+ if (a->type != b->type)
+ return 0;
+
+ return 1;
+}
+
+int dns_resource_key_match_rr(const DnsResourceKey *key, DnsResourceRecord *rr, const char *search_domain) {
+ int r;
+
+ assert(key);
+ assert(rr);
+
+ if (key == rr->key)
+ return 1;
+
+ /* Checks if an rr matches the specified key. If a search
+ * domain is specified, it will also be checked if the key
+ * with the search domain suffixed might match the RR. */
+
+ if (rr->key->class != key->class && key->class != DNS_CLASS_ANY)
+ return 0;
+
+ if (rr->key->type != key->type && key->type != DNS_TYPE_ANY)
+ return 0;
+
+ r = dns_name_equal(dns_resource_key_name(rr->key), dns_resource_key_name(key));
+ if (r != 0)
+ return r;
+
+ if (search_domain) {
+ _cleanup_free_ char *joined = NULL;
+
+ r = dns_name_concat(dns_resource_key_name(key), search_domain, 0, &joined);
+ if (r < 0)
+ return r;
+
+ return dns_name_equal(dns_resource_key_name(rr->key), joined);
+ }
+
+ return 0;
+}
+
+int dns_resource_key_match_cname_or_dname(const DnsResourceKey *key, const DnsResourceKey *cname, const char *search_domain) {
+ int r;
+
+ assert(key);
+ assert(cname);
+
+ if (cname->class != key->class && key->class != DNS_CLASS_ANY)
+ return 0;
+
+ if (cname->type == DNS_TYPE_CNAME)
+ r = dns_name_equal(dns_resource_key_name(key), dns_resource_key_name(cname));
+ else if (cname->type == DNS_TYPE_DNAME)
+ r = dns_name_endswith(dns_resource_key_name(key), dns_resource_key_name(cname));
+ else
+ return 0;
+
+ if (r != 0)
+ return r;
+
+ if (search_domain) {
+ _cleanup_free_ char *joined = NULL;
+
+ r = dns_name_concat(dns_resource_key_name(key), search_domain, 0, &joined);
+ if (r < 0)
+ return r;
+
+ if (cname->type == DNS_TYPE_CNAME)
+ return dns_name_equal(joined, dns_resource_key_name(cname));
+ else if (cname->type == DNS_TYPE_DNAME)
+ return dns_name_endswith(joined, dns_resource_key_name(cname));
+ }
+
+ return 0;
+}
+
+int dns_resource_key_match_soa(const DnsResourceKey *key, const DnsResourceKey *soa) {
+ assert(soa);
+ assert(key);
+
+ /* Checks whether 'soa' is a SOA record for the specified key. */
+
+ if (soa->class != key->class)
+ return 0;
+
+ if (soa->type != DNS_TYPE_SOA)
+ return 0;
+
+ return dns_name_endswith(dns_resource_key_name(key), dns_resource_key_name(soa));
+}
+
+static void dns_resource_key_hash_func(const DnsResourceKey *k, struct siphash *state) {
+ assert(k);
+
+ dns_name_hash_func(dns_resource_key_name(k), state);
+ siphash24_compress(&k->class, sizeof(k->class), state);
+ siphash24_compress(&k->type, sizeof(k->type), state);
+}
+
+static int dns_resource_key_compare_func(const DnsResourceKey *x, const DnsResourceKey *y) {
+ int ret;
+
+ ret = dns_name_compare_func(dns_resource_key_name(x), dns_resource_key_name(y));
+ if (ret != 0)
+ return ret;
+
+ ret = CMP(x->type, y->type);
+ if (ret != 0)
+ return ret;
+
+ ret = CMP(x->class, y->class);
+ if (ret != 0)
+ return ret;
+
+ return 0;
+}
+
+DEFINE_HASH_OPS(dns_resource_key_hash_ops, DnsResourceKey, dns_resource_key_hash_func, dns_resource_key_compare_func);
+
+char* dns_resource_key_to_string(const DnsResourceKey *key, char *buf, size_t buf_size) {
+ const char *c, *t;
+ char *ans = buf;
+
+ /* If we cannot convert the CLASS/TYPE into a known string,
+ use the format recommended by RFC 3597, Section 5. */
+
+ c = dns_class_to_string(key->class);
+ t = dns_type_to_string(key->type);
+
+ snprintf(buf, buf_size, "%s %s%s%.0u %s%s%.0u",
+ dns_resource_key_name(key),
+ strempty(c), c ? "" : "CLASS", c ? 0 : key->class,
+ strempty(t), t ? "" : "TYPE", t ? 0 : key->type);
+
+ return ans;
+}
+
+bool dns_resource_key_reduce(DnsResourceKey **a, DnsResourceKey **b) {
+ assert(a);
+ assert(b);
+
+ /* Try to replace one RR key by another if they are identical, thus saving a bit of memory. Note that we do
+ * this only for RR keys, not for RRs themselves, as they carry a lot of additional metadata (where they come
+ * from, validity data, and suchlike), and cannot be replaced so easily by other RRs that have the same
+ * superficial data. */
+
+ if (!*a)
+ return false;
+ if (!*b)
+ return false;
+
+ /* We refuse merging const keys */
+ if ((*a)->n_ref == (unsigned) -1)
+ return false;
+ if ((*b)->n_ref == (unsigned) -1)
+ return false;
+
+ /* Already the same? */
+ if (*a == *b)
+ return true;
+
+ /* Are they really identical? */
+ if (dns_resource_key_equal(*a, *b) <= 0)
+ return false;
+
+ /* Keep the one which already has more references. */
+ if ((*a)->n_ref > (*b)->n_ref) {
+ dns_resource_key_unref(*b);
+ *b = dns_resource_key_ref(*a);
+ } else {
+ dns_resource_key_unref(*a);
+ *a = dns_resource_key_ref(*b);
+ }
+
+ return true;
+}
+
+DnsResourceRecord* dns_resource_record_new(DnsResourceKey *key) {
+ DnsResourceRecord *rr;
+
+ rr = new(DnsResourceRecord, 1);
+ if (!rr)
+ return NULL;
+
+ *rr = (DnsResourceRecord) {
+ .n_ref = 1,
+ .key = dns_resource_key_ref(key),
+ .expiry = USEC_INFINITY,
+ .n_skip_labels_signer = (unsigned) -1,
+ .n_skip_labels_source = (unsigned) -1,
+ };
+
+ return rr;
+}
+
+DnsResourceRecord* dns_resource_record_new_full(uint16_t class, uint16_t type, const char *name) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+
+ key = dns_resource_key_new(class, type, name);
+ if (!key)
+ return NULL;
+
+ return dns_resource_record_new(key);
+}
+
+static DnsResourceRecord* dns_resource_record_free(DnsResourceRecord *rr) {
+ assert(rr);
+
+ if (rr->key) {
+ switch(rr->key->type) {
+
+ case DNS_TYPE_SRV:
+ free(rr->srv.name);
+ break;
+
+ case DNS_TYPE_PTR:
+ case DNS_TYPE_NS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME:
+ free(rr->ptr.name);
+ break;
+
+ case DNS_TYPE_HINFO:
+ free(rr->hinfo.cpu);
+ free(rr->hinfo.os);
+ break;
+
+ case DNS_TYPE_TXT:
+ case DNS_TYPE_SPF:
+ dns_txt_item_free_all(rr->txt.items);
+ break;
+
+ case DNS_TYPE_SOA:
+ free(rr->soa.mname);
+ free(rr->soa.rname);
+ break;
+
+ case DNS_TYPE_MX:
+ free(rr->mx.exchange);
+ break;
+
+ case DNS_TYPE_DS:
+ free(rr->ds.digest);
+ break;
+
+ case DNS_TYPE_SSHFP:
+ free(rr->sshfp.fingerprint);
+ break;
+
+ case DNS_TYPE_DNSKEY:
+ free(rr->dnskey.key);
+ break;
+
+ case DNS_TYPE_RRSIG:
+ free(rr->rrsig.signer);
+ free(rr->rrsig.signature);
+ break;
+
+ case DNS_TYPE_NSEC:
+ free(rr->nsec.next_domain_name);
+ bitmap_free(rr->nsec.types);
+ break;
+
+ case DNS_TYPE_NSEC3:
+ free(rr->nsec3.next_hashed_name);
+ free(rr->nsec3.salt);
+ bitmap_free(rr->nsec3.types);
+ break;
+
+ case DNS_TYPE_LOC:
+ case DNS_TYPE_A:
+ case DNS_TYPE_AAAA:
+ break;
+
+ case DNS_TYPE_TLSA:
+ free(rr->tlsa.data);
+ break;
+
+ case DNS_TYPE_CAA:
+ free(rr->caa.tag);
+ free(rr->caa.value);
+ break;
+
+ case DNS_TYPE_OPENPGPKEY:
+ default:
+ if (!rr->unparsable)
+ free(rr->generic.data);
+ }
+
+ if (rr->unparsable)
+ free(rr->generic.data);
+
+ free(rr->wire_format);
+ dns_resource_key_unref(rr->key);
+ }
+
+ free(rr->to_string);
+ return mfree(rr);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsResourceRecord, dns_resource_record, dns_resource_record_free);
+
+int dns_resource_record_new_reverse(DnsResourceRecord **ret, int family, const union in_addr_union *address, const char *hostname) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+ _cleanup_free_ char *ptr = NULL;
+ int r;
+
+ assert(ret);
+ assert(address);
+ assert(hostname);
+
+ r = dns_name_reverse(family, address, &ptr);
+ if (r < 0)
+ return r;
+
+ key = dns_resource_key_new_consume(DNS_CLASS_IN, DNS_TYPE_PTR, ptr);
+ if (!key)
+ return -ENOMEM;
+
+ ptr = NULL;
+
+ rr = dns_resource_record_new(key);
+ if (!rr)
+ return -ENOMEM;
+
+ rr->ptr.name = strdup(hostname);
+ if (!rr->ptr.name)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(rr);
+
+ return 0;
+}
+
+int dns_resource_record_new_address(DnsResourceRecord **ret, int family, const union in_addr_union *address, const char *name) {
+ DnsResourceRecord *rr;
+
+ assert(ret);
+ assert(address);
+ assert(family);
+
+ if (family == AF_INET) {
+
+ rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_A, name);
+ if (!rr)
+ return -ENOMEM;
+
+ rr->a.in_addr = address->in;
+
+ } else if (family == AF_INET6) {
+
+ rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_AAAA, name);
+ if (!rr)
+ return -ENOMEM;
+
+ rr->aaaa.in6_addr = address->in6;
+ } else
+ return -EAFNOSUPPORT;
+
+ *ret = rr;
+
+ return 0;
+}
+
+#define FIELD_EQUAL(a, b, field) \
+ ((a).field ## _size == (b).field ## _size && \
+ memcmp_safe((a).field, (b).field, (a).field ## _size) == 0)
+
+int dns_resource_record_payload_equal(const DnsResourceRecord *a, const DnsResourceRecord *b) {
+ int r;
+
+ /* Check if a and b are the same, but don't look at their keys */
+
+ if (a->unparsable != b->unparsable)
+ return 0;
+
+ switch (a->unparsable ? _DNS_TYPE_INVALID : a->key->type) {
+
+ case DNS_TYPE_SRV:
+ r = dns_name_equal(a->srv.name, b->srv.name);
+ if (r <= 0)
+ return r;
+
+ return a->srv.priority == b->srv.priority &&
+ a->srv.weight == b->srv.weight &&
+ a->srv.port == b->srv.port;
+
+ case DNS_TYPE_PTR:
+ case DNS_TYPE_NS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME:
+ return dns_name_equal(a->ptr.name, b->ptr.name);
+
+ case DNS_TYPE_HINFO:
+ return strcaseeq(a->hinfo.cpu, b->hinfo.cpu) &&
+ strcaseeq(a->hinfo.os, b->hinfo.os);
+
+ case DNS_TYPE_SPF: /* exactly the same as TXT */
+ case DNS_TYPE_TXT:
+ return dns_txt_item_equal(a->txt.items, b->txt.items);
+
+ case DNS_TYPE_A:
+ return memcmp(&a->a.in_addr, &b->a.in_addr, sizeof(struct in_addr)) == 0;
+
+ case DNS_TYPE_AAAA:
+ return memcmp(&a->aaaa.in6_addr, &b->aaaa.in6_addr, sizeof(struct in6_addr)) == 0;
+
+ case DNS_TYPE_SOA:
+ r = dns_name_equal(a->soa.mname, b->soa.mname);
+ if (r <= 0)
+ return r;
+ r = dns_name_equal(a->soa.rname, b->soa.rname);
+ if (r <= 0)
+ return r;
+
+ return a->soa.serial == b->soa.serial &&
+ a->soa.refresh == b->soa.refresh &&
+ a->soa.retry == b->soa.retry &&
+ a->soa.expire == b->soa.expire &&
+ a->soa.minimum == b->soa.minimum;
+
+ case DNS_TYPE_MX:
+ if (a->mx.priority != b->mx.priority)
+ return 0;
+
+ return dns_name_equal(a->mx.exchange, b->mx.exchange);
+
+ case DNS_TYPE_LOC:
+ assert(a->loc.version == b->loc.version);
+
+ return a->loc.size == b->loc.size &&
+ a->loc.horiz_pre == b->loc.horiz_pre &&
+ a->loc.vert_pre == b->loc.vert_pre &&
+ a->loc.latitude == b->loc.latitude &&
+ a->loc.longitude == b->loc.longitude &&
+ a->loc.altitude == b->loc.altitude;
+
+ case DNS_TYPE_DS:
+ return a->ds.key_tag == b->ds.key_tag &&
+ a->ds.algorithm == b->ds.algorithm &&
+ a->ds.digest_type == b->ds.digest_type &&
+ FIELD_EQUAL(a->ds, b->ds, digest);
+
+ case DNS_TYPE_SSHFP:
+ return a->sshfp.algorithm == b->sshfp.algorithm &&
+ a->sshfp.fptype == b->sshfp.fptype &&
+ FIELD_EQUAL(a->sshfp, b->sshfp, fingerprint);
+
+ case DNS_TYPE_DNSKEY:
+ return a->dnskey.flags == b->dnskey.flags &&
+ a->dnskey.protocol == b->dnskey.protocol &&
+ a->dnskey.algorithm == b->dnskey.algorithm &&
+ FIELD_EQUAL(a->dnskey, b->dnskey, key);
+
+ case DNS_TYPE_RRSIG:
+ /* do the fast comparisons first */
+ return a->rrsig.type_covered == b->rrsig.type_covered &&
+ a->rrsig.algorithm == b->rrsig.algorithm &&
+ a->rrsig.labels == b->rrsig.labels &&
+ a->rrsig.original_ttl == b->rrsig.original_ttl &&
+ a->rrsig.expiration == b->rrsig.expiration &&
+ a->rrsig.inception == b->rrsig.inception &&
+ a->rrsig.key_tag == b->rrsig.key_tag &&
+ FIELD_EQUAL(a->rrsig, b->rrsig, signature) &&
+ dns_name_equal(a->rrsig.signer, b->rrsig.signer);
+
+ case DNS_TYPE_NSEC:
+ return dns_name_equal(a->nsec.next_domain_name, b->nsec.next_domain_name) &&
+ bitmap_equal(a->nsec.types, b->nsec.types);
+
+ case DNS_TYPE_NSEC3:
+ return a->nsec3.algorithm == b->nsec3.algorithm &&
+ a->nsec3.flags == b->nsec3.flags &&
+ a->nsec3.iterations == b->nsec3.iterations &&
+ FIELD_EQUAL(a->nsec3, b->nsec3, salt) &&
+ FIELD_EQUAL(a->nsec3, b->nsec3, next_hashed_name) &&
+ bitmap_equal(a->nsec3.types, b->nsec3.types);
+
+ case DNS_TYPE_TLSA:
+ return a->tlsa.cert_usage == b->tlsa.cert_usage &&
+ a->tlsa.selector == b->tlsa.selector &&
+ a->tlsa.matching_type == b->tlsa.matching_type &&
+ FIELD_EQUAL(a->tlsa, b->tlsa, data);
+
+ case DNS_TYPE_CAA:
+ return a->caa.flags == b->caa.flags &&
+ streq(a->caa.tag, b->caa.tag) &&
+ FIELD_EQUAL(a->caa, b->caa, value);
+
+ case DNS_TYPE_OPENPGPKEY:
+ default:
+ return FIELD_EQUAL(a->generic, b->generic, data);
+ }
+}
+
+int dns_resource_record_equal(const DnsResourceRecord *a, const DnsResourceRecord *b) {
+ int r;
+
+ assert(a);
+ assert(b);
+
+ if (a == b)
+ return 1;
+
+ r = dns_resource_key_equal(a->key, b->key);
+ if (r <= 0)
+ return r;
+
+ return dns_resource_record_payload_equal(a, b);
+}
+
+static char* format_location(uint32_t latitude, uint32_t longitude, uint32_t altitude,
+ uint8_t size, uint8_t horiz_pre, uint8_t vert_pre) {
+ char *s;
+ char NS = latitude >= 1U<<31 ? 'N' : 'S';
+ char EW = longitude >= 1U<<31 ? 'E' : 'W';
+
+ int lat = latitude >= 1U<<31 ? (int) (latitude - (1U<<31)) : (int) ((1U<<31) - latitude);
+ int lon = longitude >= 1U<<31 ? (int) (longitude - (1U<<31)) : (int) ((1U<<31) - longitude);
+ double alt = altitude >= 10000000u ? altitude - 10000000u : -(double)(10000000u - altitude);
+ double siz = (size >> 4) * exp10((double) (size & 0xF));
+ double hor = (horiz_pre >> 4) * exp10((double) (horiz_pre & 0xF));
+ double ver = (vert_pre >> 4) * exp10((double) (vert_pre & 0xF));
+
+ if (asprintf(&s, "%d %d %.3f %c %d %d %.3f %c %.2fm %.2fm %.2fm %.2fm",
+ (lat / 60000 / 60),
+ (lat / 60000) % 60,
+ (lat % 60000) / 1000.,
+ NS,
+ (lon / 60000 / 60),
+ (lon / 60000) % 60,
+ (lon % 60000) / 1000.,
+ EW,
+ alt / 100.,
+ siz / 100.,
+ hor / 100.,
+ ver / 100.) < 0)
+ return NULL;
+
+ return s;
+}
+
+static int format_timestamp_dns(char *buf, size_t l, time_t sec) {
+ struct tm tm;
+
+ assert(buf);
+ assert(l > STRLEN("YYYYMMDDHHmmSS"));
+
+ if (!gmtime_r(&sec, &tm))
+ return -EINVAL;
+
+ if (strftime(buf, l, "%Y%m%d%H%M%S", &tm) <= 0)
+ return -EINVAL;
+
+ return 0;
+}
+
+static char *format_types(Bitmap *types) {
+ _cleanup_strv_free_ char **strv = NULL;
+ _cleanup_free_ char *str = NULL;
+ unsigned type;
+ int r;
+
+ BITMAP_FOREACH(type, types) {
+ if (dns_type_to_string(type)) {
+ r = strv_extend(&strv, dns_type_to_string(type));
+ if (r < 0)
+ return NULL;
+ } else {
+ char *t;
+
+ r = asprintf(&t, "TYPE%u", type);
+ if (r < 0)
+ return NULL;
+
+ r = strv_consume(&strv, t);
+ if (r < 0)
+ return NULL;
+ }
+ }
+
+ str = strv_join(strv, " ");
+ if (!str)
+ return NULL;
+
+ return strjoin("( ", str, " )");
+}
+
+static char *format_txt(DnsTxtItem *first) {
+ DnsTxtItem *i;
+ size_t c = 1;
+ char *p, *s;
+
+ LIST_FOREACH(items, i, first)
+ c += i->length * 4 + 3;
+
+ p = s = new(char, c);
+ if (!s)
+ return NULL;
+
+ LIST_FOREACH(items, i, first) {
+ size_t j;
+
+ if (i != first)
+ *(p++) = ' ';
+
+ *(p++) = '"';
+
+ for (j = 0; j < i->length; j++) {
+ if (i->data[j] < ' ' || i->data[j] == '"' || i->data[j] >= 127) {
+ *(p++) = '\\';
+ *(p++) = '0' + (i->data[j] / 100);
+ *(p++) = '0' + ((i->data[j] / 10) % 10);
+ *(p++) = '0' + (i->data[j] % 10);
+ } else
+ *(p++) = i->data[j];
+ }
+
+ *(p++) = '"';
+ }
+
+ *p = 0;
+ return s;
+}
+
+const char *dns_resource_record_to_string(DnsResourceRecord *rr) {
+ _cleanup_free_ char *t = NULL;
+ char *s, k[DNS_RESOURCE_KEY_STRING_MAX];
+ int r;
+
+ assert(rr);
+
+ if (rr->to_string)
+ return rr->to_string;
+
+ dns_resource_key_to_string(rr->key, k, sizeof(k));
+
+ switch (rr->unparsable ? _DNS_TYPE_INVALID : rr->key->type) {
+
+ case DNS_TYPE_SRV:
+ r = asprintf(&s, "%s %u %u %u %s",
+ k,
+ rr->srv.priority,
+ rr->srv.weight,
+ rr->srv.port,
+ strna(rr->srv.name));
+ if (r < 0)
+ return NULL;
+ break;
+
+ case DNS_TYPE_PTR:
+ case DNS_TYPE_NS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME:
+ s = strjoin(k, " ", rr->ptr.name);
+ if (!s)
+ return NULL;
+
+ break;
+
+ case DNS_TYPE_HINFO:
+ s = strjoin(k, " ", rr->hinfo.cpu, " ", rr->hinfo.os);
+ if (!s)
+ return NULL;
+ break;
+
+ case DNS_TYPE_SPF: /* exactly the same as TXT */
+ case DNS_TYPE_TXT:
+ t = format_txt(rr->txt.items);
+ if (!t)
+ return NULL;
+
+ s = strjoin(k, " ", t);
+ if (!s)
+ return NULL;
+ break;
+
+ case DNS_TYPE_A: {
+ _cleanup_free_ char *x = NULL;
+
+ r = in_addr_to_string(AF_INET, (const union in_addr_union*) &rr->a.in_addr, &x);
+ if (r < 0)
+ return NULL;
+
+ s = strjoin(k, " ", x);
+ if (!s)
+ return NULL;
+ break;
+ }
+
+ case DNS_TYPE_AAAA:
+ r = in_addr_to_string(AF_INET6, (const union in_addr_union*) &rr->aaaa.in6_addr, &t);
+ if (r < 0)
+ return NULL;
+
+ s = strjoin(k, " ", t);
+ if (!s)
+ return NULL;
+ break;
+
+ case DNS_TYPE_SOA:
+ r = asprintf(&s, "%s %s %s %u %u %u %u %u",
+ k,
+ strna(rr->soa.mname),
+ strna(rr->soa.rname),
+ rr->soa.serial,
+ rr->soa.refresh,
+ rr->soa.retry,
+ rr->soa.expire,
+ rr->soa.minimum);
+ if (r < 0)
+ return NULL;
+ break;
+
+ case DNS_TYPE_MX:
+ r = asprintf(&s, "%s %u %s",
+ k,
+ rr->mx.priority,
+ rr->mx.exchange);
+ if (r < 0)
+ return NULL;
+ break;
+
+ case DNS_TYPE_LOC:
+ assert(rr->loc.version == 0);
+
+ t = format_location(rr->loc.latitude,
+ rr->loc.longitude,
+ rr->loc.altitude,
+ rr->loc.size,
+ rr->loc.horiz_pre,
+ rr->loc.vert_pre);
+ if (!t)
+ return NULL;
+
+ s = strjoin(k, " ", t);
+ if (!s)
+ return NULL;
+ break;
+
+ case DNS_TYPE_DS:
+ t = hexmem(rr->ds.digest, rr->ds.digest_size);
+ if (!t)
+ return NULL;
+
+ r = asprintf(&s, "%s %u %u %u %s",
+ k,
+ rr->ds.key_tag,
+ rr->ds.algorithm,
+ rr->ds.digest_type,
+ t);
+ if (r < 0)
+ return NULL;
+ break;
+
+ case DNS_TYPE_SSHFP:
+ t = hexmem(rr->sshfp.fingerprint, rr->sshfp.fingerprint_size);
+ if (!t)
+ return NULL;
+
+ r = asprintf(&s, "%s %u %u %s",
+ k,
+ rr->sshfp.algorithm,
+ rr->sshfp.fptype,
+ t);
+ if (r < 0)
+ return NULL;
+ break;
+
+ case DNS_TYPE_DNSKEY: {
+ _cleanup_free_ char *alg = NULL;
+ char *ss;
+ uint16_t key_tag;
+
+ key_tag = dnssec_keytag(rr, true);
+
+ r = dnssec_algorithm_to_string_alloc(rr->dnskey.algorithm, &alg);
+ if (r < 0)
+ return NULL;
+
+ r = asprintf(&s, "%s %u %u %s",
+ k,
+ rr->dnskey.flags,
+ rr->dnskey.protocol,
+ alg);
+ if (r < 0)
+ return NULL;
+
+ r = base64_append(&s, r,
+ rr->dnskey.key, rr->dnskey.key_size,
+ 8, columns());
+ if (r < 0)
+ return NULL;
+
+ r = asprintf(&ss, "%s\n"
+ " -- Flags:%s%s%s\n"
+ " -- Key tag: %u",
+ s,
+ rr->dnskey.flags & DNSKEY_FLAG_SEP ? " SEP" : "",
+ rr->dnskey.flags & DNSKEY_FLAG_REVOKE ? " REVOKE" : "",
+ rr->dnskey.flags & DNSKEY_FLAG_ZONE_KEY ? " ZONE_KEY" : "",
+ key_tag);
+ if (r < 0)
+ return NULL;
+ free(s);
+ s = ss;
+
+ break;
+ }
+
+ case DNS_TYPE_RRSIG: {
+ _cleanup_free_ char *alg = NULL;
+ char expiration[STRLEN("YYYYMMDDHHmmSS") + 1], inception[STRLEN("YYYYMMDDHHmmSS") + 1];
+ const char *type;
+
+ type = dns_type_to_string(rr->rrsig.type_covered);
+
+ r = dnssec_algorithm_to_string_alloc(rr->rrsig.algorithm, &alg);
+ if (r < 0)
+ return NULL;
+
+ r = format_timestamp_dns(expiration, sizeof(expiration), rr->rrsig.expiration);
+ if (r < 0)
+ return NULL;
+
+ r = format_timestamp_dns(inception, sizeof(inception), rr->rrsig.inception);
+ if (r < 0)
+ return NULL;
+
+ /* TYPE?? follows
+ * http://tools.ietf.org/html/rfc3597#section-5 */
+
+ r = asprintf(&s, "%s %s%.*u %s %u %u %s %s %u %s",
+ k,
+ type ?: "TYPE",
+ type ? 0 : 1, type ? 0u : (unsigned) rr->rrsig.type_covered,
+ alg,
+ rr->rrsig.labels,
+ rr->rrsig.original_ttl,
+ expiration,
+ inception,
+ rr->rrsig.key_tag,
+ rr->rrsig.signer);
+ if (r < 0)
+ return NULL;
+
+ r = base64_append(&s, r,
+ rr->rrsig.signature, rr->rrsig.signature_size,
+ 8, columns());
+ if (r < 0)
+ return NULL;
+
+ break;
+ }
+
+ case DNS_TYPE_NSEC:
+ t = format_types(rr->nsec.types);
+ if (!t)
+ return NULL;
+
+ r = asprintf(&s, "%s %s %s",
+ k,
+ rr->nsec.next_domain_name,
+ t);
+ if (r < 0)
+ return NULL;
+ break;
+
+ case DNS_TYPE_NSEC3: {
+ _cleanup_free_ char *salt = NULL, *hash = NULL;
+
+ if (rr->nsec3.salt_size > 0) {
+ salt = hexmem(rr->nsec3.salt, rr->nsec3.salt_size);
+ if (!salt)
+ return NULL;
+ }
+
+ hash = base32hexmem(rr->nsec3.next_hashed_name, rr->nsec3.next_hashed_name_size, false);
+ if (!hash)
+ return NULL;
+
+ t = format_types(rr->nsec3.types);
+ if (!t)
+ return NULL;
+
+ r = asprintf(&s, "%s %"PRIu8" %"PRIu8" %"PRIu16" %s %s %s",
+ k,
+ rr->nsec3.algorithm,
+ rr->nsec3.flags,
+ rr->nsec3.iterations,
+ rr->nsec3.salt_size > 0 ? salt : "-",
+ hash,
+ t);
+ if (r < 0)
+ return NULL;
+
+ break;
+ }
+
+ case DNS_TYPE_TLSA: {
+ const char *cert_usage, *selector, *matching_type;
+
+ cert_usage = tlsa_cert_usage_to_string(rr->tlsa.cert_usage);
+ selector = tlsa_selector_to_string(rr->tlsa.selector);
+ matching_type = tlsa_matching_type_to_string(rr->tlsa.matching_type);
+
+ t = hexmem(rr->sshfp.fingerprint, rr->sshfp.fingerprint_size);
+ if (!t)
+ return NULL;
+
+ r = asprintf(&s,
+ "%s %u %u %u %s\n"
+ " -- Cert. usage: %s\n"
+ " -- Selector: %s\n"
+ " -- Matching type: %s",
+ k,
+ rr->tlsa.cert_usage,
+ rr->tlsa.selector,
+ rr->tlsa.matching_type,
+ t,
+ cert_usage,
+ selector,
+ matching_type);
+ if (r < 0)
+ return NULL;
+
+ break;
+ }
+
+ case DNS_TYPE_CAA: {
+ _cleanup_free_ char *value;
+
+ value = octescape(rr->caa.value, rr->caa.value_size);
+ if (!value)
+ return NULL;
+
+ r = asprintf(&s, "%s %u %s \"%s\"%s%s%s%.0u",
+ k,
+ rr->caa.flags,
+ rr->caa.tag,
+ value,
+ rr->caa.flags ? "\n -- Flags:" : "",
+ rr->caa.flags & CAA_FLAG_CRITICAL ? " critical" : "",
+ rr->caa.flags & ~CAA_FLAG_CRITICAL ? " " : "",
+ rr->caa.flags & ~CAA_FLAG_CRITICAL);
+ if (r < 0)
+ return NULL;
+
+ break;
+ }
+
+ case DNS_TYPE_OPENPGPKEY: {
+ r = asprintf(&s, "%s", k);
+ if (r < 0)
+ return NULL;
+
+ r = base64_append(&s, r,
+ rr->generic.data, rr->generic.data_size,
+ 8, columns());
+ if (r < 0)
+ return NULL;
+ break;
+ }
+
+ default:
+ t = hexmem(rr->generic.data, rr->generic.data_size);
+ if (!t)
+ return NULL;
+
+ /* Format as documented in RFC 3597, Section 5 */
+ r = asprintf(&s, "%s \\# %zu %s", k, rr->generic.data_size, t);
+ if (r < 0)
+ return NULL;
+ break;
+ }
+
+ rr->to_string = s;
+ return s;
+}
+
+ssize_t dns_resource_record_payload(DnsResourceRecord *rr, void **out) {
+ assert(rr);
+ assert(out);
+
+ switch(rr->unparsable ? _DNS_TYPE_INVALID : rr->key->type) {
+ case DNS_TYPE_SRV:
+ case DNS_TYPE_PTR:
+ case DNS_TYPE_NS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME:
+ case DNS_TYPE_HINFO:
+ case DNS_TYPE_SPF:
+ case DNS_TYPE_TXT:
+ case DNS_TYPE_A:
+ case DNS_TYPE_AAAA:
+ case DNS_TYPE_SOA:
+ case DNS_TYPE_MX:
+ case DNS_TYPE_LOC:
+ case DNS_TYPE_DS:
+ case DNS_TYPE_DNSKEY:
+ case DNS_TYPE_RRSIG:
+ case DNS_TYPE_NSEC:
+ case DNS_TYPE_NSEC3:
+ return -EINVAL;
+
+ case DNS_TYPE_SSHFP:
+ *out = rr->sshfp.fingerprint;
+ return rr->sshfp.fingerprint_size;
+
+ case DNS_TYPE_TLSA:
+ *out = rr->tlsa.data;
+ return rr->tlsa.data_size;
+
+ case DNS_TYPE_OPENPGPKEY:
+ default:
+ *out = rr->generic.data;
+ return rr->generic.data_size;
+ }
+}
+
+int dns_resource_record_to_wire_format(DnsResourceRecord *rr, bool canonical) {
+
+ DnsPacket packet = {
+ .n_ref = 1,
+ .protocol = DNS_PROTOCOL_DNS,
+ .on_stack = true,
+ .refuse_compression = true,
+ .canonical_form = canonical,
+ };
+
+ size_t start, rds;
+ int r;
+
+ assert(rr);
+
+ /* Generates the RR in wire-format, optionally in the
+ * canonical form as discussed in the DNSSEC RFC 4034, Section
+ * 6.2. We allocate a throw-away DnsPacket object on the stack
+ * here, because we need some book-keeping for memory
+ * management, and can reuse the DnsPacket serializer, that
+ * can generate the canonical form, too, but also knows label
+ * compression and suchlike. */
+
+ if (rr->wire_format && rr->wire_format_canonical == canonical)
+ return 0;
+
+ r = dns_packet_append_rr(&packet, rr, 0, &start, &rds);
+ if (r < 0)
+ return r;
+
+ assert(start == 0);
+ assert(packet._data);
+
+ free(rr->wire_format);
+ rr->wire_format = packet._data;
+ rr->wire_format_size = packet.size;
+ rr->wire_format_rdata_offset = rds;
+ rr->wire_format_canonical = canonical;
+
+ packet._data = NULL;
+ dns_packet_unref(&packet);
+
+ return 0;
+}
+
+int dns_resource_record_signer(DnsResourceRecord *rr, const char **ret) {
+ const char *n;
+ int r;
+
+ assert(rr);
+ assert(ret);
+
+ /* Returns the RRset's signer, if it is known. */
+
+ if (rr->n_skip_labels_signer == (unsigned) -1)
+ return -ENODATA;
+
+ n = dns_resource_key_name(rr->key);
+ r = dns_name_skip(n, rr->n_skip_labels_signer, &n);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ *ret = n;
+ return 0;
+}
+
+int dns_resource_record_source(DnsResourceRecord *rr, const char **ret) {
+ const char *n;
+ int r;
+
+ assert(rr);
+ assert(ret);
+
+ /* Returns the RRset's synthesizing source, if it is known. */
+
+ if (rr->n_skip_labels_source == (unsigned) -1)
+ return -ENODATA;
+
+ n = dns_resource_key_name(rr->key);
+ r = dns_name_skip(n, rr->n_skip_labels_source, &n);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ *ret = n;
+ return 0;
+}
+
+int dns_resource_record_is_signer(DnsResourceRecord *rr, const char *zone) {
+ const char *signer;
+ int r;
+
+ assert(rr);
+
+ r = dns_resource_record_signer(rr, &signer);
+ if (r < 0)
+ return r;
+
+ return dns_name_equal(zone, signer);
+}
+
+int dns_resource_record_is_synthetic(DnsResourceRecord *rr) {
+ int r;
+
+ assert(rr);
+
+ /* Returns > 0 if the RR is generated from a wildcard, and is not the asterisk name itself */
+
+ if (rr->n_skip_labels_source == (unsigned) -1)
+ return -ENODATA;
+
+ if (rr->n_skip_labels_source == 0)
+ return 0;
+
+ if (rr->n_skip_labels_source > 1)
+ return 1;
+
+ r = dns_name_startswith(dns_resource_key_name(rr->key), "*");
+ if (r < 0)
+ return r;
+
+ return !r;
+}
+
+void dns_resource_record_hash_func(const DnsResourceRecord *rr, struct siphash *state) {
+ assert(rr);
+
+ dns_resource_key_hash_func(rr->key, state);
+
+ switch (rr->unparsable ? _DNS_TYPE_INVALID : rr->key->type) {
+
+ case DNS_TYPE_SRV:
+ siphash24_compress(&rr->srv.priority, sizeof(rr->srv.priority), state);
+ siphash24_compress(&rr->srv.weight, sizeof(rr->srv.weight), state);
+ siphash24_compress(&rr->srv.port, sizeof(rr->srv.port), state);
+ dns_name_hash_func(rr->srv.name, state);
+ break;
+
+ case DNS_TYPE_PTR:
+ case DNS_TYPE_NS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME:
+ dns_name_hash_func(rr->ptr.name, state);
+ break;
+
+ case DNS_TYPE_HINFO:
+ string_hash_func(rr->hinfo.cpu, state);
+ string_hash_func(rr->hinfo.os, state);
+ break;
+
+ case DNS_TYPE_TXT:
+ case DNS_TYPE_SPF: {
+ DnsTxtItem *j;
+
+ LIST_FOREACH(items, j, rr->txt.items) {
+ siphash24_compress_safe(j->data, j->length, state);
+
+ /* Add an extra NUL byte, so that "a" followed by "b" doesn't result in the same hash as "ab"
+ * followed by "". */
+ siphash24_compress_byte(0, state);
+ }
+ break;
+ }
+
+ case DNS_TYPE_A:
+ siphash24_compress(&rr->a.in_addr, sizeof(rr->a.in_addr), state);
+ break;
+
+ case DNS_TYPE_AAAA:
+ siphash24_compress(&rr->aaaa.in6_addr, sizeof(rr->aaaa.in6_addr), state);
+ break;
+
+ case DNS_TYPE_SOA:
+ dns_name_hash_func(rr->soa.mname, state);
+ dns_name_hash_func(rr->soa.rname, state);
+ siphash24_compress(&rr->soa.serial, sizeof(rr->soa.serial), state);
+ siphash24_compress(&rr->soa.refresh, sizeof(rr->soa.refresh), state);
+ siphash24_compress(&rr->soa.retry, sizeof(rr->soa.retry), state);
+ siphash24_compress(&rr->soa.expire, sizeof(rr->soa.expire), state);
+ siphash24_compress(&rr->soa.minimum, sizeof(rr->soa.minimum), state);
+ break;
+
+ case DNS_TYPE_MX:
+ siphash24_compress(&rr->mx.priority, sizeof(rr->mx.priority), state);
+ dns_name_hash_func(rr->mx.exchange, state);
+ break;
+
+ case DNS_TYPE_LOC:
+ siphash24_compress(&rr->loc.version, sizeof(rr->loc.version), state);
+ siphash24_compress(&rr->loc.size, sizeof(rr->loc.size), state);
+ siphash24_compress(&rr->loc.horiz_pre, sizeof(rr->loc.horiz_pre), state);
+ siphash24_compress(&rr->loc.vert_pre, sizeof(rr->loc.vert_pre), state);
+ siphash24_compress(&rr->loc.latitude, sizeof(rr->loc.latitude), state);
+ siphash24_compress(&rr->loc.longitude, sizeof(rr->loc.longitude), state);
+ siphash24_compress(&rr->loc.altitude, sizeof(rr->loc.altitude), state);
+ break;
+
+ case DNS_TYPE_SSHFP:
+ siphash24_compress(&rr->sshfp.algorithm, sizeof(rr->sshfp.algorithm), state);
+ siphash24_compress(&rr->sshfp.fptype, sizeof(rr->sshfp.fptype), state);
+ siphash24_compress_safe(rr->sshfp.fingerprint, rr->sshfp.fingerprint_size, state);
+ break;
+
+ case DNS_TYPE_DNSKEY:
+ siphash24_compress(&rr->dnskey.flags, sizeof(rr->dnskey.flags), state);
+ siphash24_compress(&rr->dnskey.protocol, sizeof(rr->dnskey.protocol), state);
+ siphash24_compress(&rr->dnskey.algorithm, sizeof(rr->dnskey.algorithm), state);
+ siphash24_compress_safe(rr->dnskey.key, rr->dnskey.key_size, state);
+ break;
+
+ case DNS_TYPE_RRSIG:
+ siphash24_compress(&rr->rrsig.type_covered, sizeof(rr->rrsig.type_covered), state);
+ siphash24_compress(&rr->rrsig.algorithm, sizeof(rr->rrsig.algorithm), state);
+ siphash24_compress(&rr->rrsig.labels, sizeof(rr->rrsig.labels), state);
+ siphash24_compress(&rr->rrsig.original_ttl, sizeof(rr->rrsig.original_ttl), state);
+ siphash24_compress(&rr->rrsig.expiration, sizeof(rr->rrsig.expiration), state);
+ siphash24_compress(&rr->rrsig.inception, sizeof(rr->rrsig.inception), state);
+ siphash24_compress(&rr->rrsig.key_tag, sizeof(rr->rrsig.key_tag), state);
+ dns_name_hash_func(rr->rrsig.signer, state);
+ siphash24_compress_safe(rr->rrsig.signature, rr->rrsig.signature_size, state);
+ break;
+
+ case DNS_TYPE_NSEC:
+ dns_name_hash_func(rr->nsec.next_domain_name, state);
+ /* FIXME: we leave out the type bitmap here. Hash
+ * would be better if we'd take it into account
+ * too. */
+ break;
+
+ case DNS_TYPE_DS:
+ siphash24_compress(&rr->ds.key_tag, sizeof(rr->ds.key_tag), state);
+ siphash24_compress(&rr->ds.algorithm, sizeof(rr->ds.algorithm), state);
+ siphash24_compress(&rr->ds.digest_type, sizeof(rr->ds.digest_type), state);
+ siphash24_compress_safe(rr->ds.digest, rr->ds.digest_size, state);
+ break;
+
+ case DNS_TYPE_NSEC3:
+ siphash24_compress(&rr->nsec3.algorithm, sizeof(rr->nsec3.algorithm), state);
+ siphash24_compress(&rr->nsec3.flags, sizeof(rr->nsec3.flags), state);
+ siphash24_compress(&rr->nsec3.iterations, sizeof(rr->nsec3.iterations), state);
+ siphash24_compress_safe(rr->nsec3.salt, rr->nsec3.salt_size, state);
+ siphash24_compress_safe(rr->nsec3.next_hashed_name, rr->nsec3.next_hashed_name_size, state);
+ /* FIXME: We leave the bitmaps out */
+ break;
+
+ case DNS_TYPE_TLSA:
+ siphash24_compress(&rr->tlsa.cert_usage, sizeof(rr->tlsa.cert_usage), state);
+ siphash24_compress(&rr->tlsa.selector, sizeof(rr->tlsa.selector), state);
+ siphash24_compress(&rr->tlsa.matching_type, sizeof(rr->tlsa.matching_type), state);
+ siphash24_compress_safe(rr->tlsa.data, rr->tlsa.data_size, state);
+ break;
+
+ case DNS_TYPE_CAA:
+ siphash24_compress(&rr->caa.flags, sizeof(rr->caa.flags), state);
+ string_hash_func(rr->caa.tag, state);
+ siphash24_compress_safe(rr->caa.value, rr->caa.value_size, state);
+ break;
+
+ case DNS_TYPE_OPENPGPKEY:
+ default:
+ siphash24_compress_safe(rr->generic.data, rr->generic.data_size, state);
+ break;
+ }
+}
+
+static int dns_resource_record_compare_func(const DnsResourceRecord *x, const DnsResourceRecord *y) {
+ int r;
+
+ r = dns_resource_key_compare_func(x->key, y->key);
+ if (r != 0)
+ return r;
+
+ if (dns_resource_record_equal(x, y))
+ return 0;
+
+ /* We still use CMP() here, even though don't implement proper
+ * ordering, since the hashtable doesn't need ordering anyway. */
+ return CMP(x, y);
+}
+
+DEFINE_HASH_OPS(dns_resource_record_hash_ops, DnsResourceRecord, dns_resource_record_hash_func, dns_resource_record_compare_func);
+
+DnsResourceRecord *dns_resource_record_copy(DnsResourceRecord *rr) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *copy = NULL;
+ DnsResourceRecord *t;
+
+ assert(rr);
+
+ copy = dns_resource_record_new(rr->key);
+ if (!copy)
+ return NULL;
+
+ copy->ttl = rr->ttl;
+ copy->expiry = rr->expiry;
+ copy->n_skip_labels_signer = rr->n_skip_labels_signer;
+ copy->n_skip_labels_source = rr->n_skip_labels_source;
+ copy->unparsable = rr->unparsable;
+
+ switch (rr->unparsable ? _DNS_TYPE_INVALID : rr->key->type) {
+
+ case DNS_TYPE_SRV:
+ copy->srv.priority = rr->srv.priority;
+ copy->srv.weight = rr->srv.weight;
+ copy->srv.port = rr->srv.port;
+ copy->srv.name = strdup(rr->srv.name);
+ if (!copy->srv.name)
+ return NULL;
+ break;
+
+ case DNS_TYPE_PTR:
+ case DNS_TYPE_NS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME:
+ copy->ptr.name = strdup(rr->ptr.name);
+ if (!copy->ptr.name)
+ return NULL;
+ break;
+
+ case DNS_TYPE_HINFO:
+ copy->hinfo.cpu = strdup(rr->hinfo.cpu);
+ if (!copy->hinfo.cpu)
+ return NULL;
+
+ copy->hinfo.os = strdup(rr->hinfo.os);
+ if (!copy->hinfo.os)
+ return NULL;
+ break;
+
+ case DNS_TYPE_TXT:
+ case DNS_TYPE_SPF:
+ copy->txt.items = dns_txt_item_copy(rr->txt.items);
+ if (!copy->txt.items)
+ return NULL;
+ break;
+
+ case DNS_TYPE_A:
+ copy->a = rr->a;
+ break;
+
+ case DNS_TYPE_AAAA:
+ copy->aaaa = rr->aaaa;
+ break;
+
+ case DNS_TYPE_SOA:
+ copy->soa.mname = strdup(rr->soa.mname);
+ if (!copy->soa.mname)
+ return NULL;
+ copy->soa.rname = strdup(rr->soa.rname);
+ if (!copy->soa.rname)
+ return NULL;
+ copy->soa.serial = rr->soa.serial;
+ copy->soa.refresh = rr->soa.refresh;
+ copy->soa.retry = rr->soa.retry;
+ copy->soa.expire = rr->soa.expire;
+ copy->soa.minimum = rr->soa.minimum;
+ break;
+
+ case DNS_TYPE_MX:
+ copy->mx.priority = rr->mx.priority;
+ copy->mx.exchange = strdup(rr->mx.exchange);
+ if (!copy->mx.exchange)
+ return NULL;
+ break;
+
+ case DNS_TYPE_LOC:
+ copy->loc = rr->loc;
+ break;
+
+ case DNS_TYPE_SSHFP:
+ copy->sshfp.algorithm = rr->sshfp.algorithm;
+ copy->sshfp.fptype = rr->sshfp.fptype;
+ copy->sshfp.fingerprint = memdup(rr->sshfp.fingerprint, rr->sshfp.fingerprint_size);
+ if (!copy->sshfp.fingerprint)
+ return NULL;
+ copy->sshfp.fingerprint_size = rr->sshfp.fingerprint_size;
+ break;
+
+ case DNS_TYPE_DNSKEY:
+ copy->dnskey.flags = rr->dnskey.flags;
+ copy->dnskey.protocol = rr->dnskey.protocol;
+ copy->dnskey.algorithm = rr->dnskey.algorithm;
+ copy->dnskey.key = memdup(rr->dnskey.key, rr->dnskey.key_size);
+ if (!copy->dnskey.key)
+ return NULL;
+ copy->dnskey.key_size = rr->dnskey.key_size;
+ break;
+
+ case DNS_TYPE_RRSIG:
+ copy->rrsig.type_covered = rr->rrsig.type_covered;
+ copy->rrsig.algorithm = rr->rrsig.algorithm;
+ copy->rrsig.labels = rr->rrsig.labels;
+ copy->rrsig.original_ttl = rr->rrsig.original_ttl;
+ copy->rrsig.expiration = rr->rrsig.expiration;
+ copy->rrsig.inception = rr->rrsig.inception;
+ copy->rrsig.key_tag = rr->rrsig.key_tag;
+ copy->rrsig.signer = strdup(rr->rrsig.signer);
+ if (!copy->rrsig.signer)
+ return NULL;
+ copy->rrsig.signature = memdup(rr->rrsig.signature, rr->rrsig.signature_size);
+ if (!copy->rrsig.signature)
+ return NULL;
+ copy->rrsig.signature_size = rr->rrsig.signature_size;
+ break;
+
+ case DNS_TYPE_NSEC:
+ copy->nsec.next_domain_name = strdup(rr->nsec.next_domain_name);
+ if (!copy->nsec.next_domain_name)
+ return NULL;
+ copy->nsec.types = bitmap_copy(rr->nsec.types);
+ if (!copy->nsec.types)
+ return NULL;
+ break;
+
+ case DNS_TYPE_DS:
+ copy->ds.key_tag = rr->ds.key_tag;
+ copy->ds.algorithm = rr->ds.algorithm;
+ copy->ds.digest_type = rr->ds.digest_type;
+ copy->ds.digest = memdup(rr->ds.digest, rr->ds.digest_size);
+ if (!copy->ds.digest)
+ return NULL;
+ copy->ds.digest_size = rr->ds.digest_size;
+ break;
+
+ case DNS_TYPE_NSEC3:
+ copy->nsec3.algorithm = rr->nsec3.algorithm;
+ copy->nsec3.flags = rr->nsec3.flags;
+ copy->nsec3.iterations = rr->nsec3.iterations;
+ copy->nsec3.salt = memdup(rr->nsec3.salt, rr->nsec3.salt_size);
+ if (!copy->nsec3.salt)
+ return NULL;
+ copy->nsec3.salt_size = rr->nsec3.salt_size;
+ copy->nsec3.next_hashed_name = memdup(rr->nsec3.next_hashed_name, rr->nsec3.next_hashed_name_size);
+ if (!copy->nsec3.next_hashed_name)
+ return NULL;
+ copy->nsec3.next_hashed_name_size = rr->nsec3.next_hashed_name_size;
+ copy->nsec3.types = bitmap_copy(rr->nsec3.types);
+ if (!copy->nsec3.types)
+ return NULL;
+ break;
+
+ case DNS_TYPE_TLSA:
+ copy->tlsa.cert_usage = rr->tlsa.cert_usage;
+ copy->tlsa.selector = rr->tlsa.selector;
+ copy->tlsa.matching_type = rr->tlsa.matching_type;
+ copy->tlsa.data = memdup(rr->tlsa.data, rr->tlsa.data_size);
+ if (!copy->tlsa.data)
+ return NULL;
+ copy->tlsa.data_size = rr->tlsa.data_size;
+ break;
+
+ case DNS_TYPE_CAA:
+ copy->caa.flags = rr->caa.flags;
+ copy->caa.tag = strdup(rr->caa.tag);
+ if (!copy->caa.tag)
+ return NULL;
+ copy->caa.value = memdup(rr->caa.value, rr->caa.value_size);
+ if (!copy->caa.value)
+ return NULL;
+ copy->caa.value_size = rr->caa.value_size;
+ break;
+
+ case DNS_TYPE_OPT:
+ default:
+ copy->generic.data = memdup(rr->generic.data, rr->generic.data_size);
+ if (!copy->generic.data)
+ return NULL;
+ copy->generic.data_size = rr->generic.data_size;
+ break;
+ }
+
+ t = TAKE_PTR(copy);
+
+ return t;
+}
+
+int dns_resource_record_clamp_ttl(DnsResourceRecord **rr, uint32_t max_ttl) {
+ DnsResourceRecord *old_rr, *new_rr;
+ uint32_t new_ttl;
+
+ assert(rr);
+ old_rr = *rr;
+
+ if (old_rr->key->type == DNS_TYPE_OPT)
+ return -EINVAL;
+
+ new_ttl = MIN(old_rr->ttl, max_ttl);
+ if (new_ttl == old_rr->ttl)
+ return 0;
+
+ if (old_rr->n_ref == 1) {
+ /* Patch in place */
+ old_rr->ttl = new_ttl;
+ return 1;
+ }
+
+ new_rr = dns_resource_record_copy(old_rr);
+ if (!new_rr)
+ return -ENOMEM;
+
+ new_rr->ttl = new_ttl;
+
+ dns_resource_record_unref(*rr);
+ *rr = new_rr;
+
+ return 1;
+}
+
+DnsTxtItem *dns_txt_item_free_all(DnsTxtItem *i) {
+ DnsTxtItem *n;
+
+ if (!i)
+ return NULL;
+
+ n = i->items_next;
+
+ free(i);
+ return dns_txt_item_free_all(n);
+}
+
+bool dns_txt_item_equal(DnsTxtItem *a, DnsTxtItem *b) {
+
+ if (a == b)
+ return true;
+
+ if (!a != !b)
+ return false;
+
+ if (!a)
+ return true;
+
+ if (a->length != b->length)
+ return false;
+
+ if (memcmp(a->data, b->data, a->length) != 0)
+ return false;
+
+ return dns_txt_item_equal(a->items_next, b->items_next);
+}
+
+DnsTxtItem *dns_txt_item_copy(DnsTxtItem *first) {
+ DnsTxtItem *i, *copy = NULL, *end = NULL;
+
+ LIST_FOREACH(items, i, first) {
+ DnsTxtItem *j;
+
+ j = memdup(i, offsetof(DnsTxtItem, data) + i->length + 1);
+ if (!j) {
+ dns_txt_item_free_all(copy);
+ return NULL;
+ }
+
+ LIST_INSERT_AFTER(items, copy, end, j);
+ end = j;
+ }
+
+ return copy;
+}
+
+int dns_txt_item_new_empty(DnsTxtItem **ret) {
+ DnsTxtItem *i;
+
+ /* RFC 6763, section 6.1 suggests to treat
+ * empty TXT RRs as equivalent to a TXT record
+ * with a single empty string. */
+
+ i = malloc0(offsetof(DnsTxtItem, data) + 1); /* for safety reasons we add an extra NUL byte */
+ if (!i)
+ return -ENOMEM;
+
+ *ret = i;
+
+ return 0;
+}
+
+static const char* const dnssec_algorithm_table[_DNSSEC_ALGORITHM_MAX_DEFINED] = {
+ /* Mnemonics as listed on https://www.iana.org/assignments/dns-sec-alg-numbers/dns-sec-alg-numbers.xhtml */
+ [DNSSEC_ALGORITHM_RSAMD5] = "RSAMD5",
+ [DNSSEC_ALGORITHM_DH] = "DH",
+ [DNSSEC_ALGORITHM_DSA] = "DSA",
+ [DNSSEC_ALGORITHM_ECC] = "ECC",
+ [DNSSEC_ALGORITHM_RSASHA1] = "RSASHA1",
+ [DNSSEC_ALGORITHM_DSA_NSEC3_SHA1] = "DSA-NSEC3-SHA1",
+ [DNSSEC_ALGORITHM_RSASHA1_NSEC3_SHA1] = "RSASHA1-NSEC3-SHA1",
+ [DNSSEC_ALGORITHM_RSASHA256] = "RSASHA256",
+ [DNSSEC_ALGORITHM_RSASHA512] = "RSASHA512",
+ [DNSSEC_ALGORITHM_ECC_GOST] = "ECC-GOST",
+ [DNSSEC_ALGORITHM_ECDSAP256SHA256] = "ECDSAP256SHA256",
+ [DNSSEC_ALGORITHM_ECDSAP384SHA384] = "ECDSAP384SHA384",
+ [DNSSEC_ALGORITHM_ED25519] = "ED25519",
+ [DNSSEC_ALGORITHM_ED448] = "ED448",
+ [DNSSEC_ALGORITHM_INDIRECT] = "INDIRECT",
+ [DNSSEC_ALGORITHM_PRIVATEDNS] = "PRIVATEDNS",
+ [DNSSEC_ALGORITHM_PRIVATEOID] = "PRIVATEOID",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(dnssec_algorithm, int, 255);
+
+static const char* const dnssec_digest_table[_DNSSEC_DIGEST_MAX_DEFINED] = {
+ /* Names as listed on https://www.iana.org/assignments/ds-rr-types/ds-rr-types.xhtml */
+ [DNSSEC_DIGEST_SHA1] = "SHA-1",
+ [DNSSEC_DIGEST_SHA256] = "SHA-256",
+ [DNSSEC_DIGEST_GOST_R_34_11_94] = "GOST_R_34.11-94",
+ [DNSSEC_DIGEST_SHA384] = "SHA-384",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(dnssec_digest, int, 255);
diff --git a/src/resolve/resolved-dns-rr.h b/src/resolve/resolved-dns-rr.h
new file mode 100644
index 0000000..59b3a70
--- /dev/null
+++ b/src/resolve/resolved-dns-rr.h
@@ -0,0 +1,341 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <netinet/in.h>
+
+#include "bitmap.h"
+#include "dns-type.h"
+#include "hashmap.h"
+#include "in-addr-util.h"
+#include "list.h"
+#include "string-util.h"
+#include "time-util.h"
+
+typedef struct DnsResourceKey DnsResourceKey;
+typedef struct DnsResourceRecord DnsResourceRecord;
+typedef struct DnsTxtItem DnsTxtItem;
+
+/* DNSKEY RR flags */
+#define DNSKEY_FLAG_SEP (UINT16_C(1) << 0)
+#define DNSKEY_FLAG_REVOKE (UINT16_C(1) << 7)
+#define DNSKEY_FLAG_ZONE_KEY (UINT16_C(1) << 8)
+
+/* mDNS RR flags */
+#define MDNS_RR_CACHE_FLUSH (UINT16_C(1) << 15)
+
+/* DNSSEC algorithm identifiers, see
+ * http://tools.ietf.org/html/rfc4034#appendix-A.1 and
+ * https://www.iana.org/assignments/dns-sec-alg-numbers/dns-sec-alg-numbers.xhtml */
+enum {
+ DNSSEC_ALGORITHM_RSAMD5 = 1,
+ DNSSEC_ALGORITHM_DH,
+ DNSSEC_ALGORITHM_DSA,
+ DNSSEC_ALGORITHM_ECC,
+ DNSSEC_ALGORITHM_RSASHA1,
+ DNSSEC_ALGORITHM_DSA_NSEC3_SHA1,
+ DNSSEC_ALGORITHM_RSASHA1_NSEC3_SHA1,
+ DNSSEC_ALGORITHM_RSASHA256 = 8, /* RFC 5702 */
+ DNSSEC_ALGORITHM_RSASHA512 = 10, /* RFC 5702 */
+ DNSSEC_ALGORITHM_ECC_GOST = 12, /* RFC 5933 */
+ DNSSEC_ALGORITHM_ECDSAP256SHA256 = 13, /* RFC 6605 */
+ DNSSEC_ALGORITHM_ECDSAP384SHA384 = 14, /* RFC 6605 */
+ DNSSEC_ALGORITHM_ED25519 = 15, /* RFC 8080 */
+ DNSSEC_ALGORITHM_ED448 = 16, /* RFC 8080 */
+ DNSSEC_ALGORITHM_INDIRECT = 252,
+ DNSSEC_ALGORITHM_PRIVATEDNS,
+ DNSSEC_ALGORITHM_PRIVATEOID,
+ _DNSSEC_ALGORITHM_MAX_DEFINED
+};
+
+/* DNSSEC digest identifiers, see
+ * https://www.iana.org/assignments/ds-rr-types/ds-rr-types.xhtml */
+enum {
+ DNSSEC_DIGEST_SHA1 = 1,
+ DNSSEC_DIGEST_SHA256 = 2, /* RFC 4509 */
+ DNSSEC_DIGEST_GOST_R_34_11_94 = 3, /* RFC 5933 */
+ DNSSEC_DIGEST_SHA384 = 4, /* RFC 6605 */
+ _DNSSEC_DIGEST_MAX_DEFINED
+};
+
+/* DNSSEC NSEC3 hash algorithms, see
+ * https://www.iana.org/assignments/dnssec-nsec3-parameters/dnssec-nsec3-parameters.xhtml */
+enum {
+ NSEC3_ALGORITHM_SHA1 = 1,
+ _NSEC3_ALGORITHM_MAX_DEFINED
+};
+
+struct DnsResourceKey {
+ unsigned n_ref; /* (unsigned -1) for const keys, see below */
+ uint16_t class, type;
+ char *_name; /* don't access directly, use dns_resource_key_name()! */
+};
+
+/* Creates a temporary resource key. This is only useful to quickly
+ * look up something, without allocating a full DnsResourceKey object
+ * for it. Note that it is not OK to take references to this kind of
+ * resource key object. */
+#define DNS_RESOURCE_KEY_CONST(c, t, n) \
+ ((DnsResourceKey) { \
+ .n_ref = (unsigned) -1, \
+ .class = c, \
+ .type = t, \
+ ._name = (char*) n, \
+ })
+
+struct DnsTxtItem {
+ size_t length;
+ LIST_FIELDS(DnsTxtItem, items);
+ uint8_t data[];
+};
+
+struct DnsResourceRecord {
+ unsigned n_ref;
+ DnsResourceKey *key;
+
+ char *to_string;
+
+ uint32_t ttl;
+ usec_t expiry; /* RRSIG signature expiry */
+
+ /* How many labels to strip to determine "signer" of the RRSIG (aka, the zone). -1 if not signed. */
+ unsigned n_skip_labels_signer;
+ /* How many labels to strip to determine "synthesizing source" of this RR, i.e. the wildcard's immediate parent. -1 if not signed. */
+ unsigned n_skip_labels_source;
+
+ bool unparsable:1;
+
+ bool wire_format_canonical:1;
+ void *wire_format;
+ size_t wire_format_size;
+ size_t wire_format_rdata_offset;
+
+ union {
+ struct {
+ void *data;
+ size_t data_size;
+ } generic, opt;
+
+ struct {
+ uint16_t priority;
+ uint16_t weight;
+ uint16_t port;
+ char *name;
+ } srv;
+
+ struct {
+ char *name;
+ } ptr, ns, cname, dname;
+
+ struct {
+ char *cpu;
+ char *os;
+ } hinfo;
+
+ struct {
+ DnsTxtItem *items;
+ } txt, spf;
+
+ struct {
+ struct in_addr in_addr;
+ } a;
+
+ struct {
+ struct in6_addr in6_addr;
+ } aaaa;
+
+ struct {
+ char *mname;
+ char *rname;
+ uint32_t serial;
+ uint32_t refresh;
+ uint32_t retry;
+ uint32_t expire;
+ uint32_t minimum;
+ } soa;
+
+ struct {
+ uint16_t priority;
+ char *exchange;
+ } mx;
+
+ /* https://tools.ietf.org/html/rfc1876 */
+ struct {
+ uint8_t version;
+ uint8_t size;
+ uint8_t horiz_pre;
+ uint8_t vert_pre;
+ uint32_t latitude;
+ uint32_t longitude;
+ uint32_t altitude;
+ } loc;
+
+ /* https://tools.ietf.org/html/rfc4255#section-3.1 */
+ struct {
+ uint8_t algorithm;
+ uint8_t fptype;
+ void *fingerprint;
+ size_t fingerprint_size;
+ } sshfp;
+
+ /* http://tools.ietf.org/html/rfc4034#section-2.1 */
+ struct {
+ uint16_t flags;
+ uint8_t protocol;
+ uint8_t algorithm;
+ void* key;
+ size_t key_size;
+ } dnskey;
+
+ /* http://tools.ietf.org/html/rfc4034#section-3.1 */
+ struct {
+ uint16_t type_covered;
+ uint8_t algorithm;
+ uint8_t labels;
+ uint32_t original_ttl;
+ uint32_t expiration;
+ uint32_t inception;
+ uint16_t key_tag;
+ char *signer;
+ void *signature;
+ size_t signature_size;
+ } rrsig;
+
+ /* https://tools.ietf.org/html/rfc4034#section-4.1 */
+ struct {
+ char *next_domain_name;
+ Bitmap *types;
+ } nsec;
+
+ /* https://tools.ietf.org/html/rfc4034#section-5.1 */
+ struct {
+ uint16_t key_tag;
+ uint8_t algorithm;
+ uint8_t digest_type;
+ void *digest;
+ size_t digest_size;
+ } ds;
+
+ struct {
+ uint8_t algorithm;
+ uint8_t flags;
+ uint16_t iterations;
+ void *salt;
+ size_t salt_size;
+ void *next_hashed_name;
+ size_t next_hashed_name_size;
+ Bitmap *types;
+ } nsec3;
+
+ /* https://tools.ietf.org/html/draft-ietf-dane-protocol-23 */
+ struct {
+ uint8_t cert_usage;
+ uint8_t selector;
+ uint8_t matching_type;
+ void *data;
+ size_t data_size;
+ } tlsa;
+
+ /* https://tools.ietf.org/html/rfc6844 */
+ struct {
+ uint8_t flags;
+ char *tag;
+ void *value;
+ size_t value_size;
+ } caa;
+ };
+};
+
+static inline const void* DNS_RESOURCE_RECORD_RDATA(const DnsResourceRecord *rr) {
+ if (!rr)
+ return NULL;
+
+ if (!rr->wire_format)
+ return NULL;
+
+ assert(rr->wire_format_rdata_offset <= rr->wire_format_size);
+ return (uint8_t*) rr->wire_format + rr->wire_format_rdata_offset;
+}
+
+static inline size_t DNS_RESOURCE_RECORD_RDATA_SIZE(const DnsResourceRecord *rr) {
+ if (!rr)
+ return 0;
+ if (!rr->wire_format)
+ return 0;
+
+ assert(rr->wire_format_rdata_offset <= rr->wire_format_size);
+ return rr->wire_format_size - rr->wire_format_rdata_offset;
+}
+
+static inline uint8_t DNS_RESOURCE_RECORD_OPT_VERSION_SUPPORTED(const DnsResourceRecord *rr) {
+ assert(rr);
+ assert(rr->key->type == DNS_TYPE_OPT);
+
+ return ((rr->ttl >> 16) & 0xFF) == 0;
+}
+
+DnsResourceKey* dns_resource_key_new(uint16_t class, uint16_t type, const char *name);
+DnsResourceKey* dns_resource_key_new_redirect(const DnsResourceKey *key, const DnsResourceRecord *cname);
+int dns_resource_key_new_append_suffix(DnsResourceKey **ret, DnsResourceKey *key, char *name);
+DnsResourceKey* dns_resource_key_new_consume(uint16_t class, uint16_t type, char *name);
+DnsResourceKey* dns_resource_key_ref(DnsResourceKey *key);
+DnsResourceKey* dns_resource_key_unref(DnsResourceKey *key);
+const char* dns_resource_key_name(const DnsResourceKey *key);
+bool dns_resource_key_is_address(const DnsResourceKey *key);
+bool dns_resource_key_is_dnssd_ptr(const DnsResourceKey *key);
+int dns_resource_key_equal(const DnsResourceKey *a, const DnsResourceKey *b);
+int dns_resource_key_match_rr(const DnsResourceKey *key, DnsResourceRecord *rr, const char *search_domain);
+int dns_resource_key_match_cname_or_dname(const DnsResourceKey *key, const DnsResourceKey *cname, const char *search_domain);
+int dns_resource_key_match_soa(const DnsResourceKey *key, const DnsResourceKey *soa);
+
+/* _DNS_{CLASS,TYPE}_STRING_MAX include one byte for NUL, which we use for space instead below.
+ * DNS_HOSTNAME_MAX does not include the NUL byte, so we need to add 1. */
+#define DNS_RESOURCE_KEY_STRING_MAX (_DNS_CLASS_STRING_MAX + _DNS_TYPE_STRING_MAX + DNS_HOSTNAME_MAX + 1)
+
+char* dns_resource_key_to_string(const DnsResourceKey *key, char *buf, size_t buf_size);
+ssize_t dns_resource_record_payload(DnsResourceRecord *rr, void **out);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsResourceKey*, dns_resource_key_unref);
+
+static inline bool dns_key_is_shared(const DnsResourceKey *key) {
+ return IN_SET(key->type, DNS_TYPE_PTR);
+}
+
+bool dns_resource_key_reduce(DnsResourceKey **a, DnsResourceKey **b);
+
+DnsResourceRecord* dns_resource_record_new(DnsResourceKey *key);
+DnsResourceRecord* dns_resource_record_new_full(uint16_t class, uint16_t type, const char *name);
+DnsResourceRecord* dns_resource_record_ref(DnsResourceRecord *rr);
+DnsResourceRecord* dns_resource_record_unref(DnsResourceRecord *rr);
+int dns_resource_record_new_reverse(DnsResourceRecord **ret, int family, const union in_addr_union *address, const char *name);
+int dns_resource_record_new_address(DnsResourceRecord **ret, int family, const union in_addr_union *address, const char *name);
+int dns_resource_record_equal(const DnsResourceRecord *a, const DnsResourceRecord *b);
+int dns_resource_record_payload_equal(const DnsResourceRecord *a, const DnsResourceRecord *b);
+
+const char* dns_resource_record_to_string(DnsResourceRecord *rr);
+DnsResourceRecord *dns_resource_record_copy(DnsResourceRecord *rr);
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsResourceRecord*, dns_resource_record_unref);
+
+int dns_resource_record_to_wire_format(DnsResourceRecord *rr, bool canonical);
+
+int dns_resource_record_signer(DnsResourceRecord *rr, const char **ret);
+int dns_resource_record_source(DnsResourceRecord *rr, const char **ret);
+int dns_resource_record_is_signer(DnsResourceRecord *rr, const char *zone);
+int dns_resource_record_is_synthetic(DnsResourceRecord *rr);
+
+int dns_resource_record_clamp_ttl(DnsResourceRecord **rr, uint32_t max_ttl);
+
+DnsTxtItem *dns_txt_item_free_all(DnsTxtItem *i);
+bool dns_txt_item_equal(DnsTxtItem *a, DnsTxtItem *b);
+DnsTxtItem *dns_txt_item_copy(DnsTxtItem *i);
+int dns_txt_item_new_empty(DnsTxtItem **ret);
+
+void dns_resource_record_hash_func(const DnsResourceRecord *i, struct siphash *state);
+
+extern const struct hash_ops dns_resource_key_hash_ops;
+extern const struct hash_ops dns_resource_record_hash_ops;
+
+int dnssec_algorithm_to_string_alloc(int i, char **ret);
+int dnssec_algorithm_from_string(const char *s) _pure_;
+
+int dnssec_digest_to_string_alloc(int i, char **ret);
+int dnssec_digest_from_string(const char *s) _pure_;
diff --git a/src/resolve/resolved-dns-scope.c b/src/resolve/resolved-dns-scope.c
new file mode 100644
index 0000000..509a206
--- /dev/null
+++ b/src/resolve/resolved-dns-scope.c
@@ -0,0 +1,1436 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/tcp.h>
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "fd-util.h"
+#include "hostname-util.h"
+#include "missing_network.h"
+#include "random-util.h"
+#include "resolved-dnssd.h"
+#include "resolved-dns-scope.h"
+#include "resolved-dns-zone.h"
+#include "resolved-llmnr.h"
+#include "resolved-mdns.h"
+#include "socket-util.h"
+#include "strv.h"
+
+#define MULTICAST_RATELIMIT_INTERVAL_USEC (1*USEC_PER_SEC)
+#define MULTICAST_RATELIMIT_BURST 1000
+
+/* After how much time to repeat LLMNR requests, see RFC 4795 Section 7 */
+#define MULTICAST_RESEND_TIMEOUT_MIN_USEC (100 * USEC_PER_MSEC)
+#define MULTICAST_RESEND_TIMEOUT_MAX_USEC (1 * USEC_PER_SEC)
+
+int dns_scope_new(Manager *m, DnsScope **ret, Link *l, DnsProtocol protocol, int family) {
+ DnsScope *s;
+
+ assert(m);
+ assert(ret);
+
+ s = new(DnsScope, 1);
+ if (!s)
+ return -ENOMEM;
+
+ *s = (DnsScope) {
+ .manager = m,
+ .link = l,
+ .protocol = protocol,
+ .family = family,
+ .resend_timeout = MULTICAST_RESEND_TIMEOUT_MIN_USEC,
+ };
+
+ if (protocol == DNS_PROTOCOL_DNS) {
+ /* Copy DNSSEC mode from the link if it is set there,
+ * otherwise take the manager's DNSSEC mode. Note that
+ * we copy this only at scope creation time, and do
+ * not update it from the on, even if the setting
+ * changes. */
+
+ if (l) {
+ s->dnssec_mode = link_get_dnssec_mode(l);
+ s->dns_over_tls_mode = link_get_dns_over_tls_mode(l);
+ } else {
+ s->dnssec_mode = manager_get_dnssec_mode(m);
+ s->dns_over_tls_mode = manager_get_dns_over_tls_mode(m);
+ }
+
+ } else {
+ s->dnssec_mode = DNSSEC_NO;
+ s->dns_over_tls_mode = DNS_OVER_TLS_NO;
+ }
+
+ LIST_PREPEND(scopes, m->dns_scopes, s);
+
+ dns_scope_llmnr_membership(s, true);
+ dns_scope_mdns_membership(s, true);
+
+ log_debug("New scope on link %s, protocol %s, family %s", l ? l->ifname : "*", dns_protocol_to_string(protocol), family == AF_UNSPEC ? "*" : af_to_name(family));
+
+ /* Enforce ratelimiting for the multicast protocols */
+ s->ratelimit = (RateLimit) { MULTICAST_RATELIMIT_INTERVAL_USEC, MULTICAST_RATELIMIT_BURST };
+
+ *ret = s;
+ return 0;
+}
+
+static void dns_scope_abort_transactions(DnsScope *s) {
+ assert(s);
+
+ while (s->transactions) {
+ DnsTransaction *t = s->transactions;
+
+ /* Abort the transaction, but make sure it is not
+ * freed while we still look at it */
+
+ t->block_gc++;
+ if (DNS_TRANSACTION_IS_LIVE(t->state))
+ dns_transaction_complete(t, DNS_TRANSACTION_ABORTED);
+ t->block_gc--;
+
+ dns_transaction_free(t);
+ }
+}
+
+DnsScope* dns_scope_free(DnsScope *s) {
+ if (!s)
+ return NULL;
+
+ log_debug("Removing scope on link %s, protocol %s, family %s", s->link ? s->link->ifname : "*", dns_protocol_to_string(s->protocol), s->family == AF_UNSPEC ? "*" : af_to_name(s->family));
+
+ dns_scope_llmnr_membership(s, false);
+ dns_scope_mdns_membership(s, false);
+ dns_scope_abort_transactions(s);
+
+ while (s->query_candidates)
+ dns_query_candidate_unref(s->query_candidates);
+
+ hashmap_free(s->transactions_by_key);
+
+ ordered_hashmap_free_with_destructor(s->conflict_queue, dns_resource_record_unref);
+ sd_event_source_unref(s->conflict_event_source);
+
+ sd_event_source_unref(s->announce_event_source);
+
+ dns_cache_flush(&s->cache);
+ dns_zone_flush(&s->zone);
+
+ LIST_REMOVE(scopes, s->manager->dns_scopes, s);
+ return mfree(s);
+}
+
+DnsServer *dns_scope_get_dns_server(DnsScope *s) {
+ assert(s);
+
+ if (s->protocol != DNS_PROTOCOL_DNS)
+ return NULL;
+
+ if (s->link)
+ return link_get_dns_server(s->link);
+ else
+ return manager_get_dns_server(s->manager);
+}
+
+unsigned dns_scope_get_n_dns_servers(DnsScope *s) {
+ unsigned n = 0;
+ DnsServer *i;
+
+ assert(s);
+
+ if (s->protocol != DNS_PROTOCOL_DNS)
+ return 0;
+
+ if (s->link)
+ i = s->link->dns_servers;
+ else
+ i = s->manager->dns_servers;
+
+ for (; i; i = i->servers_next)
+ n++;
+
+ return n;
+}
+
+void dns_scope_next_dns_server(DnsScope *s) {
+ assert(s);
+
+ if (s->protocol != DNS_PROTOCOL_DNS)
+ return;
+
+ if (s->link)
+ link_next_dns_server(s->link);
+ else
+ manager_next_dns_server(s->manager);
+}
+
+void dns_scope_packet_received(DnsScope *s, usec_t rtt) {
+ assert(s);
+
+ if (rtt <= s->max_rtt)
+ return;
+
+ s->max_rtt = rtt;
+ s->resend_timeout = MIN(MAX(MULTICAST_RESEND_TIMEOUT_MIN_USEC, s->max_rtt * 2), MULTICAST_RESEND_TIMEOUT_MAX_USEC);
+}
+
+void dns_scope_packet_lost(DnsScope *s, usec_t usec) {
+ assert(s);
+
+ if (s->resend_timeout <= usec)
+ s->resend_timeout = MIN(s->resend_timeout * 2, MULTICAST_RESEND_TIMEOUT_MAX_USEC);
+}
+
+static int dns_scope_emit_one(DnsScope *s, int fd, DnsPacket *p) {
+ union in_addr_union addr;
+ int ifindex = 0, r;
+ int family;
+ uint32_t mtu;
+
+ assert(s);
+ assert(p);
+ assert(p->protocol == s->protocol);
+
+ if (s->link) {
+ mtu = s->link->mtu;
+ ifindex = s->link->ifindex;
+ } else
+ mtu = manager_find_mtu(s->manager);
+
+ switch (s->protocol) {
+
+ case DNS_PROTOCOL_DNS:
+ assert(fd >= 0);
+
+ if (DNS_PACKET_QDCOUNT(p) > 1)
+ return -EOPNOTSUPP;
+
+ if (p->size > DNS_PACKET_UNICAST_SIZE_MAX)
+ return -EMSGSIZE;
+
+ if (p->size + UDP_PACKET_HEADER_SIZE > mtu)
+ return -EMSGSIZE;
+
+ r = manager_write(s->manager, fd, p);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case DNS_PROTOCOL_LLMNR:
+ assert(fd < 0);
+
+ if (DNS_PACKET_QDCOUNT(p) > 1)
+ return -EOPNOTSUPP;
+
+ if (!ratelimit_below(&s->ratelimit))
+ return -EBUSY;
+
+ family = s->family;
+
+ if (family == AF_INET) {
+ addr.in = LLMNR_MULTICAST_IPV4_ADDRESS;
+ fd = manager_llmnr_ipv4_udp_fd(s->manager);
+ } else if (family == AF_INET6) {
+ addr.in6 = LLMNR_MULTICAST_IPV6_ADDRESS;
+ fd = manager_llmnr_ipv6_udp_fd(s->manager);
+ } else
+ return -EAFNOSUPPORT;
+ if (fd < 0)
+ return fd;
+
+ r = manager_send(s->manager, fd, ifindex, family, &addr, LLMNR_PORT, NULL, p);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case DNS_PROTOCOL_MDNS:
+ assert(fd < 0);
+
+ if (!ratelimit_below(&s->ratelimit))
+ return -EBUSY;
+
+ family = s->family;
+
+ if (family == AF_INET) {
+ addr.in = MDNS_MULTICAST_IPV4_ADDRESS;
+ fd = manager_mdns_ipv4_fd(s->manager);
+ } else if (family == AF_INET6) {
+ addr.in6 = MDNS_MULTICAST_IPV6_ADDRESS;
+ fd = manager_mdns_ipv6_fd(s->manager);
+ } else
+ return -EAFNOSUPPORT;
+ if (fd < 0)
+ return fd;
+
+ r = manager_send(s->manager, fd, ifindex, family, &addr, MDNS_PORT, NULL, p);
+ if (r < 0)
+ return r;
+
+ break;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ return 1;
+}
+
+int dns_scope_emit_udp(DnsScope *s, int fd, DnsPacket *p) {
+ int r;
+
+ assert(s);
+ assert(p);
+ assert(p->protocol == s->protocol);
+ assert((s->protocol == DNS_PROTOCOL_DNS) == (fd >= 0));
+
+ do {
+ /* If there are multiple linked packets, set the TC bit in all but the last of them */
+ if (p->more) {
+ assert(p->protocol == DNS_PROTOCOL_MDNS);
+ dns_packet_set_flags(p, true, true);
+ }
+
+ r = dns_scope_emit_one(s, fd, p);
+ if (r < 0)
+ return r;
+
+ p = p->more;
+ } while (p);
+
+ return 0;
+}
+
+static int dns_scope_socket(
+ DnsScope *s,
+ int type,
+ int family,
+ const union in_addr_union *address,
+ DnsServer *server,
+ uint16_t port,
+ union sockaddr_union *ret_socket_address) {
+
+ _cleanup_close_ int fd = -1;
+ union sockaddr_union sa;
+ socklen_t salen;
+ int r, ifindex;
+
+ assert(s);
+
+ if (server) {
+ assert(family == AF_UNSPEC);
+ assert(!address);
+
+ ifindex = dns_server_ifindex(server);
+
+ switch (server->family) {
+ case AF_INET:
+ sa = (union sockaddr_union) {
+ .in.sin_family = server->family,
+ .in.sin_port = htobe16(port),
+ .in.sin_addr = server->address.in,
+ };
+ salen = sizeof(sa.in);
+ break;
+ case AF_INET6:
+ sa = (union sockaddr_union) {
+ .in6.sin6_family = server->family,
+ .in6.sin6_port = htobe16(port),
+ .in6.sin6_addr = server->address.in6,
+ .in6.sin6_scope_id = ifindex,
+ };
+ salen = sizeof(sa.in6);
+ break;
+ default:
+ return -EAFNOSUPPORT;
+ }
+ } else {
+ assert(family != AF_UNSPEC);
+ assert(address);
+
+ ifindex = s->link ? s->link->ifindex : 0;
+
+ switch (family) {
+ case AF_INET:
+ sa = (union sockaddr_union) {
+ .in.sin_family = family,
+ .in.sin_port = htobe16(port),
+ .in.sin_addr = address->in,
+ };
+ salen = sizeof(sa.in);
+ break;
+ case AF_INET6:
+ sa = (union sockaddr_union) {
+ .in6.sin6_family = family,
+ .in6.sin6_port = htobe16(port),
+ .in6.sin6_addr = address->in6,
+ .in6.sin6_scope_id = ifindex,
+ };
+ salen = sizeof(sa.in6);
+ break;
+ default:
+ return -EAFNOSUPPORT;
+ }
+ }
+
+ fd = socket(sa.sa.sa_family, type|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return -errno;
+
+ if (type == SOCK_STREAM) {
+ r = setsockopt_int(fd, IPPROTO_TCP, TCP_NODELAY, true);
+ if (r < 0)
+ return r;
+ }
+
+ if (s->link) {
+ r = socket_set_unicast_if(fd, sa.sa.sa_family, ifindex);
+ if (r < 0)
+ return r;
+ }
+
+ if (s->protocol == DNS_PROTOCOL_LLMNR) {
+ /* RFC 4795, section 2.5 requires the TTL to be set to 1 */
+ r = socket_set_ttl(fd, sa.sa.sa_family, 1);
+ if (r < 0)
+ return r;
+ }
+
+ if (type == SOCK_DGRAM) {
+ /* Set IP_RECVERR or IPV6_RECVERR to get ICMP error feedback. See discussion in #10345. */
+ r = socket_set_recverr(fd, sa.sa.sa_family, true);
+ if (r < 0)
+ return r;
+
+ r = socket_set_recvpktinfo(fd, sa.sa.sa_family, true);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_socket_address)
+ *ret_socket_address = sa;
+ else {
+ r = connect(fd, &sa.sa, salen);
+ if (r < 0 && errno != EINPROGRESS)
+ return -errno;
+ }
+
+ return TAKE_FD(fd);
+}
+
+int dns_scope_socket_udp(DnsScope *s, DnsServer *server) {
+ return dns_scope_socket(s, SOCK_DGRAM, AF_UNSPEC, NULL, server, dns_server_port(server), NULL);
+}
+
+int dns_scope_socket_tcp(DnsScope *s, int family, const union in_addr_union *address, DnsServer *server, uint16_t port, union sockaddr_union *ret_socket_address) {
+ /* If ret_socket_address is not NULL, the caller is responsible
+ * for calling connect() or sendmsg(). This is required by TCP
+ * Fast Open, to be able to send the initial SYN packet along
+ * with the first data packet. */
+ return dns_scope_socket(s, SOCK_STREAM, family, address, server, port, ret_socket_address);
+}
+
+static DnsScopeMatch accept_link_local_reverse_lookups(const char *domain) {
+ assert(domain);
+
+ if (dns_name_endswith(domain, "254.169.in-addr.arpa") > 0)
+ return DNS_SCOPE_YES_BASE + 4; /* 4 labels match */
+
+ if (dns_name_endswith(domain, "8.e.f.ip6.arpa") > 0 ||
+ dns_name_endswith(domain, "9.e.f.ip6.arpa") > 0 ||
+ dns_name_endswith(domain, "a.e.f.ip6.arpa") > 0 ||
+ dns_name_endswith(domain, "b.e.f.ip6.arpa") > 0)
+ return DNS_SCOPE_YES_BASE + 5; /* 5 labels match */
+
+ return _DNS_SCOPE_MATCH_INVALID;
+}
+
+DnsScopeMatch dns_scope_good_domain(
+ DnsScope *s,
+ int ifindex,
+ uint64_t flags,
+ const char *domain) {
+
+ DnsSearchDomain *d;
+
+ /* This returns the following return values:
+ *
+ * DNS_SCOPE_NO → This scope is not suitable for lookups of this domain, at all
+ * DNS_SCOPE_MAYBE → This scope is suitable, but only if nothing else wants it
+ * DNS_SCOPE_YES_BASE+n → This scope is suitable, and 'n' suffix labels match
+ *
+ * (The idea is that the caller will only use the scopes with the longest 'n' returned. If no scopes return
+ * DNS_SCOPE_YES_BASE+n, then it should use those which returned DNS_SCOPE_MAYBE. It should never use those
+ * which returned DNS_SCOPE_NO.)
+ */
+
+ assert(s);
+ assert(domain);
+
+ /* Checks if the specified domain is something to look up on this scope. Note that this accepts
+ * non-qualified hostnames, i.e. those without any search path suffixed. */
+
+ if (ifindex != 0 && (!s->link || s->link->ifindex != ifindex))
+ return DNS_SCOPE_NO;
+
+ if ((SD_RESOLVED_FLAGS_MAKE(s->protocol, s->family, 0) & flags) == 0)
+ return DNS_SCOPE_NO;
+
+ /* Never resolve any loopback hostname or IP address via DNS, LLMNR or mDNS. Instead, always rely on
+ * synthesized RRs for these. */
+ if (is_localhost(domain) ||
+ dns_name_endswith(domain, "127.in-addr.arpa") > 0 ||
+ dns_name_equal(domain, "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa") > 0)
+ return DNS_SCOPE_NO;
+
+ /* Never respond to some of the domains listed in RFC6303 */
+ if (dns_name_endswith(domain, "0.in-addr.arpa") > 0 ||
+ dns_name_equal(domain, "255.255.255.255.in-addr.arpa") > 0 ||
+ dns_name_equal(domain, "0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa") > 0)
+ return DNS_SCOPE_NO;
+
+ /* Never respond to some of the domains listed in RFC6761 */
+ if (dns_name_endswith(domain, "invalid") > 0)
+ return DNS_SCOPE_NO;
+
+ /* Never go to network for the _gateway domain, it's something special, synthesized locally. Note
+ * that we don't use is_gateway_hostname() here, since that has support for the legacy "gateway"
+ * hostname (without the prefix underscore), which we don't want to filter on all protocols. i.e. we
+ * don't want to filter "gateway" on classic DNS, since there might very well be such a host inside
+ * some search domain, and we shouldn't block that. We do filter it in LLMNR however (and on mDNS by
+ * side-effect, since it's a single-label name which mDNS doesn't accept anyway). */
+ if (dns_name_equal(domain, "_gateway") > 0)
+ return DNS_SCOPE_NO;
+
+ switch (s->protocol) {
+
+ case DNS_PROTOCOL_DNS: {
+ bool has_search_domains = false;
+ int n_best = -1;
+
+ /* Never route things to scopes that lack DNS servers */
+ if (!dns_scope_get_dns_server(s))
+ return DNS_SCOPE_NO;
+
+ /* Always honour search domains for routing queries, except if this scope lacks DNS servers. Note that
+ * we return DNS_SCOPE_YES here, rather than just DNS_SCOPE_MAYBE, which means other wildcard scopes
+ * won't be considered anymore. */
+ LIST_FOREACH(domains, d, dns_scope_get_search_domains(s)) {
+
+ if (!d->route_only && !dns_name_is_root(d->name))
+ has_search_domains = true;
+
+ if (dns_name_endswith(domain, d->name) > 0) {
+ int c;
+
+ c = dns_name_count_labels(d->name);
+ if (c < 0)
+ continue;
+
+ if (c > n_best)
+ n_best = c;
+ }
+ }
+
+ /* If there's a true search domain defined for this scope, and the query is single-label,
+ * then let's resolve things here, prefereably. Note that LLMNR considers itself
+ * authoritative for single-label names too, at the same preference, see below. */
+ if (has_search_domains && dns_name_is_single_label(domain))
+ return DNS_SCOPE_YES_BASE + 1;
+
+ /* Let's return the number of labels in the best matching result */
+ if (n_best >= 0) {
+ assert(n_best <= DNS_SCOPE_YES_END - DNS_SCOPE_YES_BASE);
+ return DNS_SCOPE_YES_BASE + n_best;
+ }
+
+ /* See if this scope is suitable as default route. */
+ if (!dns_scope_is_default_route(s))
+ return DNS_SCOPE_NO;
+
+ /* Exclude link-local IP ranges */
+ if (dns_name_endswith(domain, "254.169.in-addr.arpa") == 0 &&
+ dns_name_endswith(domain, "8.e.f.ip6.arpa") == 0 &&
+ dns_name_endswith(domain, "9.e.f.ip6.arpa") == 0 &&
+ dns_name_endswith(domain, "a.e.f.ip6.arpa") == 0 &&
+ dns_name_endswith(domain, "b.e.f.ip6.arpa") == 0 &&
+ /* If networks use .local in their private setups, they are supposed to also add .local to their search
+ * domains, which we already checked above. Otherwise, we consider .local specific to mDNS and won't
+ * send such queries ordinary DNS servers. */
+ dns_name_endswith(domain, "local") == 0)
+ return DNS_SCOPE_MAYBE;
+
+ return DNS_SCOPE_NO;
+ }
+
+ case DNS_PROTOCOL_MDNS: {
+ DnsScopeMatch m;
+
+ m = accept_link_local_reverse_lookups(domain);
+ if (m >= 0)
+ return m;
+
+ if ((s->family == AF_INET && dns_name_endswith(domain, "in-addr.arpa") > 0) ||
+ (s->family == AF_INET6 && dns_name_endswith(domain, "ip6.arpa") > 0))
+ return DNS_SCOPE_MAYBE;
+
+ if ((dns_name_endswith(domain, "local") > 0 && /* only resolve names ending in .local via mDNS */
+ dns_name_equal(domain, "local") == 0 && /* but not the single-label "local" name itself */
+ manager_is_own_hostname(s->manager, domain) <= 0)) /* never resolve the local hostname via mDNS */
+ return DNS_SCOPE_YES_BASE + 1; /* Return +1, as the top-level .local domain matches, i.e. one label */
+
+ return DNS_SCOPE_NO;
+ }
+
+ case DNS_PROTOCOL_LLMNR: {
+ DnsScopeMatch m;
+
+ m = accept_link_local_reverse_lookups(domain);
+ if (m >= 0)
+ return m;
+
+ if ((s->family == AF_INET && dns_name_endswith(domain, "in-addr.arpa") > 0) ||
+ (s->family == AF_INET6 && dns_name_endswith(domain, "ip6.arpa") > 0))
+ return DNS_SCOPE_MAYBE;
+
+ if ((dns_name_is_single_label(domain) && /* only resolve single label names via LLMNR */
+ !is_gateway_hostname(domain) && /* don't resolve "_gateway" with LLMNR, let local synthesizing logic handle that */
+ dns_name_equal(domain, "local") == 0 && /* don't resolve "local" with LLMNR, it's the top-level domain of mDNS after all, see above */
+ manager_is_own_hostname(s->manager, domain) <= 0)) /* never resolve the local hostname via LLMNR */
+ return DNS_SCOPE_YES_BASE + 1; /* Return +1, as we consider ourselves authoritative
+ * for single-label names, i.e. one label. This is
+ * particularly relevant as it means a "." route on some
+ * other scope won't pull all traffic away from
+ * us. (If people actually want to pull traffic away
+ * from us they should turn off LLMNR on the
+ * link). Note that unicast DNS scopes with search
+ * domains also consider themselves authoritative for
+ * single-label domains, at the same preference (see
+ * above). */
+
+ return DNS_SCOPE_NO;
+ }
+
+ default:
+ assert_not_reached("Unknown scope protocol");
+ }
+}
+
+bool dns_scope_good_key(DnsScope *s, const DnsResourceKey *key) {
+ int key_family;
+
+ assert(s);
+ assert(key);
+
+ /* Check if it makes sense to resolve the specified key on
+ * this scope. Note that this call assumes as fully qualified
+ * name, i.e. the search suffixes already appended. */
+
+ if (key->class != DNS_CLASS_IN)
+ return false;
+
+ if (s->protocol == DNS_PROTOCOL_DNS) {
+
+ /* On classic DNS, looking up non-address RRs is always fine. (Specifically, we want to
+ * permit looking up DNSKEY and DS records on the root and top-level domains.) */
+ if (!dns_resource_key_is_address(key))
+ return true;
+
+ /* Unless explicitly overridden, we refuse to look up A and AAAA RRs on the root and
+ * single-label domains, under the assumption that those should be resolved via LLMNR or
+ * search path only, and should not be leaked onto the internet. */
+ const char* name = dns_resource_key_name(key);
+
+ if (!s->manager->resolve_unicast_single_label &&
+ dns_name_is_single_label(name))
+ return false;
+
+ return !dns_name_is_root(name);
+ }
+
+ /* On mDNS and LLMNR, send A and AAAA queries only on the
+ * respective scopes */
+
+ key_family = dns_type_to_af(key->type);
+ if (key_family < 0)
+ return true;
+
+ return key_family == s->family;
+}
+
+static int dns_scope_multicast_membership(DnsScope *s, bool b, struct in_addr in, struct in6_addr in6) {
+ int fd;
+
+ assert(s);
+ assert(s->link);
+
+ if (s->family == AF_INET) {
+ struct ip_mreqn mreqn = {
+ .imr_multiaddr = in,
+ .imr_ifindex = s->link->ifindex,
+ };
+
+ if (s->protocol == DNS_PROTOCOL_LLMNR)
+ fd = manager_llmnr_ipv4_udp_fd(s->manager);
+ else
+ fd = manager_mdns_ipv4_fd(s->manager);
+
+ if (fd < 0)
+ return fd;
+
+ /* Always first try to drop membership before we add
+ * one. This is necessary on some devices, such as
+ * veth. */
+ if (b)
+ (void) setsockopt(fd, IPPROTO_IP, IP_DROP_MEMBERSHIP, &mreqn, sizeof(mreqn));
+
+ if (setsockopt(fd, IPPROTO_IP, b ? IP_ADD_MEMBERSHIP : IP_DROP_MEMBERSHIP, &mreqn, sizeof(mreqn)) < 0)
+ return -errno;
+
+ } else if (s->family == AF_INET6) {
+ struct ipv6_mreq mreq = {
+ .ipv6mr_multiaddr = in6,
+ .ipv6mr_interface = s->link->ifindex,
+ };
+
+ if (s->protocol == DNS_PROTOCOL_LLMNR)
+ fd = manager_llmnr_ipv6_udp_fd(s->manager);
+ else
+ fd = manager_mdns_ipv6_fd(s->manager);
+
+ if (fd < 0)
+ return fd;
+
+ if (b)
+ (void) setsockopt(fd, IPPROTO_IPV6, IPV6_DROP_MEMBERSHIP, &mreq, sizeof(mreq));
+
+ if (setsockopt(fd, IPPROTO_IPV6, b ? IPV6_ADD_MEMBERSHIP : IPV6_DROP_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
+ return -errno;
+ } else
+ return -EAFNOSUPPORT;
+
+ return 0;
+}
+
+int dns_scope_llmnr_membership(DnsScope *s, bool b) {
+ assert(s);
+
+ if (s->protocol != DNS_PROTOCOL_LLMNR)
+ return 0;
+
+ return dns_scope_multicast_membership(s, b, LLMNR_MULTICAST_IPV4_ADDRESS, LLMNR_MULTICAST_IPV6_ADDRESS);
+}
+
+int dns_scope_mdns_membership(DnsScope *s, bool b) {
+ assert(s);
+
+ if (s->protocol != DNS_PROTOCOL_MDNS)
+ return 0;
+
+ return dns_scope_multicast_membership(s, b, MDNS_MULTICAST_IPV4_ADDRESS, MDNS_MULTICAST_IPV6_ADDRESS);
+}
+
+int dns_scope_make_reply_packet(
+ DnsScope *s,
+ uint16_t id,
+ int rcode,
+ DnsQuestion *q,
+ DnsAnswer *answer,
+ DnsAnswer *soa,
+ bool tentative,
+ DnsPacket **ret) {
+
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ int r;
+
+ assert(s);
+ assert(ret);
+
+ if (dns_question_isempty(q) &&
+ dns_answer_isempty(answer) &&
+ dns_answer_isempty(soa))
+ return -EINVAL;
+
+ r = dns_packet_new(&p, s->protocol, 0, DNS_PACKET_SIZE_MAX);
+ if (r < 0)
+ return r;
+
+ DNS_PACKET_HEADER(p)->id = id;
+ DNS_PACKET_HEADER(p)->flags = htobe16(DNS_PACKET_MAKE_FLAGS(
+ 1 /* qr */,
+ 0 /* opcode */,
+ 0 /* c */,
+ 0 /* tc */,
+ tentative,
+ 0 /* (ra) */,
+ 0 /* (ad) */,
+ 0 /* (cd) */,
+ rcode));
+
+ r = dns_packet_append_question(p, q);
+ if (r < 0)
+ return r;
+ DNS_PACKET_HEADER(p)->qdcount = htobe16(dns_question_size(q));
+
+ r = dns_packet_append_answer(p, answer);
+ if (r < 0)
+ return r;
+ DNS_PACKET_HEADER(p)->ancount = htobe16(dns_answer_size(answer));
+
+ r = dns_packet_append_answer(p, soa);
+ if (r < 0)
+ return r;
+ DNS_PACKET_HEADER(p)->arcount = htobe16(dns_answer_size(soa));
+
+ *ret = TAKE_PTR(p);
+
+ return 0;
+}
+
+static void dns_scope_verify_conflicts(DnsScope *s, DnsPacket *p) {
+ DnsResourceRecord *rr;
+ DnsResourceKey *key;
+
+ assert(s);
+ assert(p);
+
+ DNS_QUESTION_FOREACH(key, p->question)
+ dns_zone_verify_conflicts(&s->zone, key);
+
+ DNS_ANSWER_FOREACH(rr, p->answer)
+ dns_zone_verify_conflicts(&s->zone, rr->key);
+}
+
+void dns_scope_process_query(DnsScope *s, DnsStream *stream, DnsPacket *p) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL, *soa = NULL;
+ _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
+ DnsResourceKey *key = NULL;
+ bool tentative = false;
+ int r;
+
+ assert(s);
+ assert(p);
+
+ if (p->protocol != DNS_PROTOCOL_LLMNR)
+ return;
+
+ if (p->ipproto == IPPROTO_UDP) {
+ /* Don't accept UDP queries directed to anything but
+ * the LLMNR multicast addresses. See RFC 4795,
+ * section 2.5. */
+
+ if (p->family == AF_INET && !in_addr_equal(AF_INET, &p->destination, (union in_addr_union*) &LLMNR_MULTICAST_IPV4_ADDRESS))
+ return;
+
+ if (p->family == AF_INET6 && !in_addr_equal(AF_INET6, &p->destination, (union in_addr_union*) &LLMNR_MULTICAST_IPV6_ADDRESS))
+ return;
+ }
+
+ r = dns_packet_extract(p);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to extract resource records from incoming packet: %m");
+ return;
+ }
+
+ if (DNS_PACKET_LLMNR_C(p)) {
+ /* Somebody notified us about a possible conflict */
+ dns_scope_verify_conflicts(s, p);
+ return;
+ }
+
+ assert(dns_question_size(p->question) == 1);
+ key = p->question->keys[0];
+
+ r = dns_zone_lookup(&s->zone, key, 0, &answer, &soa, &tentative);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to look up key: %m");
+ return;
+ }
+ if (r == 0)
+ return;
+
+ if (answer)
+ dns_answer_order_by_scope(answer, in_addr_is_link_local(p->family, &p->sender) > 0);
+
+ r = dns_scope_make_reply_packet(s, DNS_PACKET_ID(p), DNS_RCODE_SUCCESS, p->question, answer, soa, tentative, &reply);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to build reply packet: %m");
+ return;
+ }
+
+ if (stream) {
+ r = dns_stream_write_packet(stream, reply);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to enqueue reply packet: %m");
+ return;
+ }
+
+ /* Let's take an extra reference on this stream, so that it stays around after returning. The reference
+ * will be dangling until the stream is disconnected, and the default completion handler of the stream
+ * will then unref the stream and destroy it */
+ if (DNS_STREAM_QUEUED(stream))
+ dns_stream_ref(stream);
+ } else {
+ int fd;
+
+ if (!ratelimit_below(&s->ratelimit))
+ return;
+
+ if (p->family == AF_INET)
+ fd = manager_llmnr_ipv4_udp_fd(s->manager);
+ else if (p->family == AF_INET6)
+ fd = manager_llmnr_ipv6_udp_fd(s->manager);
+ else {
+ log_debug("Unknown protocol");
+ return;
+ }
+ if (fd < 0) {
+ log_debug_errno(fd, "Failed to get reply socket: %m");
+ return;
+ }
+
+ /* Note that we always immediately reply to all LLMNR
+ * requests, and do not wait any time, since we
+ * verified uniqueness for all records. Also see RFC
+ * 4795, Section 2.7 */
+
+ r = manager_send(s->manager, fd, p->ifindex, p->family, &p->sender, p->sender_port, NULL, reply);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to send reply packet: %m");
+ return;
+ }
+ }
+}
+
+DnsTransaction *dns_scope_find_transaction(DnsScope *scope, DnsResourceKey *key, bool cache_ok) {
+ DnsTransaction *t;
+
+ assert(scope);
+ assert(key);
+
+ /* Try to find an ongoing transaction that is a equal to the
+ * specified question */
+ t = hashmap_get(scope->transactions_by_key, key);
+ if (!t)
+ return NULL;
+
+ /* Refuse reusing transactions that completed based on cached
+ * data instead of a real packet, if that's requested. */
+ if (!cache_ok &&
+ IN_SET(t->state, DNS_TRANSACTION_SUCCESS, DNS_TRANSACTION_RCODE_FAILURE) &&
+ t->answer_source != DNS_TRANSACTION_NETWORK)
+ return NULL;
+
+ return t;
+}
+
+static int dns_scope_make_conflict_packet(
+ DnsScope *s,
+ DnsResourceRecord *rr,
+ DnsPacket **ret) {
+
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ int r;
+
+ assert(s);
+ assert(rr);
+ assert(ret);
+
+ r = dns_packet_new(&p, s->protocol, 0, DNS_PACKET_SIZE_MAX);
+ if (r < 0)
+ return r;
+
+ DNS_PACKET_HEADER(p)->flags = htobe16(DNS_PACKET_MAKE_FLAGS(
+ 0 /* qr */,
+ 0 /* opcode */,
+ 1 /* conflict */,
+ 0 /* tc */,
+ 0 /* t */,
+ 0 /* (ra) */,
+ 0 /* (ad) */,
+ 0 /* (cd) */,
+ 0));
+
+ /* For mDNS, the transaction ID should always be 0 */
+ if (s->protocol != DNS_PROTOCOL_MDNS)
+ random_bytes(&DNS_PACKET_HEADER(p)->id, sizeof(uint16_t));
+
+ DNS_PACKET_HEADER(p)->qdcount = htobe16(1);
+ DNS_PACKET_HEADER(p)->arcount = htobe16(1);
+
+ r = dns_packet_append_key(p, rr->key, 0, NULL);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_append_rr(p, rr, 0, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(p);
+
+ return 0;
+}
+
+static int on_conflict_dispatch(sd_event_source *es, usec_t usec, void *userdata) {
+ DnsScope *scope = userdata;
+ int r;
+
+ assert(es);
+ assert(scope);
+
+ scope->conflict_event_source = sd_event_source_unref(scope->conflict_event_source);
+
+ for (;;) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+
+ key = ordered_hashmap_first_key(scope->conflict_queue);
+ if (!key)
+ break;
+
+ rr = ordered_hashmap_remove(scope->conflict_queue, key);
+ assert(rr);
+
+ r = dns_scope_make_conflict_packet(scope, rr, &p);
+ if (r < 0) {
+ log_error_errno(r, "Failed to make conflict packet: %m");
+ return 0;
+ }
+
+ r = dns_scope_emit_udp(scope, -1, p);
+ if (r < 0)
+ log_debug_errno(r, "Failed to send conflict packet: %m");
+ }
+
+ return 0;
+}
+
+int dns_scope_notify_conflict(DnsScope *scope, DnsResourceRecord *rr) {
+ usec_t jitter;
+ int r;
+
+ assert(scope);
+ assert(rr);
+
+ /* We don't send these queries immediately. Instead, we queue
+ * them, and send them after some jitter delay. */
+ r = ordered_hashmap_ensure_allocated(&scope->conflict_queue, &dns_resource_key_hash_ops);
+ if (r < 0) {
+ log_oom();
+ return r;
+ }
+
+ /* We only place one RR per key in the conflict
+ * messages, not all of them. That should be enough to
+ * indicate where there might be a conflict */
+ r = ordered_hashmap_put(scope->conflict_queue, rr->key, rr);
+ if (IN_SET(r, 0, -EEXIST))
+ return 0;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to queue conflicting RR: %m");
+
+ dns_resource_key_ref(rr->key);
+ dns_resource_record_ref(rr);
+
+ if (scope->conflict_event_source)
+ return 0;
+
+ random_bytes(&jitter, sizeof(jitter));
+ jitter %= LLMNR_JITTER_INTERVAL_USEC;
+
+ r = sd_event_add_time_relative(
+ scope->manager->event,
+ &scope->conflict_event_source,
+ clock_boottime_or_monotonic(),
+ jitter,
+ LLMNR_JITTER_INTERVAL_USEC,
+ on_conflict_dispatch, scope);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add conflict dispatch event: %m");
+
+ (void) sd_event_source_set_description(scope->conflict_event_source, "scope-conflict");
+
+ return 0;
+}
+
+void dns_scope_check_conflicts(DnsScope *scope, DnsPacket *p) {
+ DnsResourceRecord *rr;
+ int r;
+
+ assert(scope);
+ assert(p);
+
+ if (!IN_SET(p->protocol, DNS_PROTOCOL_LLMNR, DNS_PROTOCOL_MDNS))
+ return;
+
+ if (DNS_PACKET_RRCOUNT(p) <= 0)
+ return;
+
+ if (p->protocol == DNS_PROTOCOL_LLMNR) {
+ if (DNS_PACKET_LLMNR_C(p) != 0)
+ return;
+
+ if (DNS_PACKET_LLMNR_T(p) != 0)
+ return;
+ }
+
+ if (manager_our_packet(scope->manager, p))
+ return;
+
+ r = dns_packet_extract(p);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to extract packet: %m");
+ return;
+ }
+
+ log_debug("Checking for conflicts...");
+
+ DNS_ANSWER_FOREACH(rr, p->answer) {
+ /* No conflict if it is DNS-SD RR used for service enumeration. */
+ if (dns_resource_key_is_dnssd_ptr(rr->key))
+ continue;
+
+ /* Check for conflicts against the local zone. If we
+ * found one, we won't check any further */
+ r = dns_zone_check_conflicts(&scope->zone, rr);
+ if (r != 0)
+ continue;
+
+ /* Check for conflicts against the local cache. If so,
+ * send out an advisory query, to inform everybody */
+ r = dns_cache_check_conflicts(&scope->cache, rr, p->family, &p->sender);
+ if (r <= 0)
+ continue;
+
+ dns_scope_notify_conflict(scope, rr);
+ }
+}
+
+void dns_scope_dump(DnsScope *s, FILE *f) {
+ assert(s);
+
+ if (!f)
+ f = stdout;
+
+ fputs("[Scope protocol=", f);
+ fputs(dns_protocol_to_string(s->protocol), f);
+
+ if (s->link) {
+ fputs(" interface=", f);
+ fputs(s->link->ifname, f);
+ }
+
+ if (s->family != AF_UNSPEC) {
+ fputs(" family=", f);
+ fputs(af_to_name(s->family), f);
+ }
+
+ fputs("]\n", f);
+
+ if (!dns_zone_is_empty(&s->zone)) {
+ fputs("ZONE:\n", f);
+ dns_zone_dump(&s->zone, f);
+ }
+
+ if (!dns_cache_is_empty(&s->cache)) {
+ fputs("CACHE:\n", f);
+ dns_cache_dump(&s->cache, f);
+ }
+}
+
+DnsSearchDomain *dns_scope_get_search_domains(DnsScope *s) {
+ assert(s);
+
+ if (s->protocol != DNS_PROTOCOL_DNS)
+ return NULL;
+
+ if (s->link)
+ return s->link->search_domains;
+
+ return s->manager->search_domains;
+}
+
+bool dns_scope_name_wants_search_domain(DnsScope *s, const char *name) {
+ assert(s);
+
+ if (s->protocol != DNS_PROTOCOL_DNS)
+ return false;
+
+ return dns_name_is_single_label(name);
+}
+
+bool dns_scope_network_good(DnsScope *s) {
+ /* Checks whether the network is in good state for lookups on this scope. For mDNS/LLMNR/Classic DNS scopes
+ * bound to links this is easy, as they don't even exist if the link isn't in a suitable state. For the global
+ * DNS scope we check whether there are any links that are up and have an address.
+ *
+ * Note that Linux routing is complex and even systems that superficially have no IPv4 address might
+ * be able to route IPv4 (and similar for IPv6), hence let's make a check here independent of address
+ * family. */
+
+ if (s->link)
+ return true;
+
+ return manager_routable(s->manager);
+}
+
+int dns_scope_ifindex(DnsScope *s) {
+ assert(s);
+
+ if (s->link)
+ return s->link->ifindex;
+
+ return 0;
+}
+
+static int on_announcement_timeout(sd_event_source *s, usec_t usec, void *userdata) {
+ DnsScope *scope = userdata;
+
+ assert(s);
+
+ scope->announce_event_source = sd_event_source_unref(scope->announce_event_source);
+
+ (void) dns_scope_announce(scope, false);
+ return 0;
+}
+
+int dns_scope_announce(DnsScope *scope, bool goodbye) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ _cleanup_set_free_ Set *types = NULL;
+ DnsTransaction *t;
+ DnsZoneItem *z, *i;
+ unsigned size = 0;
+ char *service_type;
+ int r;
+
+ if (!scope)
+ return 0;
+
+ if (scope->protocol != DNS_PROTOCOL_MDNS)
+ return 0;
+
+ /* Check if we're done with probing. */
+ LIST_FOREACH(transactions_by_scope, t, scope->transactions)
+ if (DNS_TRANSACTION_IS_LIVE(t->state))
+ return 0;
+
+ /* Check if there're services pending conflict resolution. */
+ if (manager_next_dnssd_names(scope->manager))
+ return 0; /* we reach this point only if changing hostname didn't help */
+
+ /* Calculate answer's size. */
+ HASHMAP_FOREACH(z, scope->zone.by_key) {
+ if (z->state != DNS_ZONE_ITEM_ESTABLISHED)
+ continue;
+
+ if (z->rr->key->type == DNS_TYPE_PTR &&
+ !dns_zone_contains_name(&scope->zone, z->rr->ptr.name)) {
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+
+ log_debug("Skip PTR RR <%s> since its counterparts seem to be withdrawn", dns_resource_key_to_string(z->rr->key, key_str, sizeof key_str));
+ z->state = DNS_ZONE_ITEM_WITHDRAWN;
+ continue;
+ }
+
+ /* Collect service types for _services._dns-sd._udp.local RRs in a set */
+ if (!scope->announced &&
+ dns_resource_key_is_dnssd_ptr(z->rr->key)) {
+ if (!set_contains(types, dns_resource_key_name(z->rr->key))) {
+ r = set_ensure_put(&types, &dns_name_hash_ops, dns_resource_key_name(z->rr->key));
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add item to set: %m");
+ }
+ }
+
+ LIST_FOREACH(by_key, i, z)
+ size++;
+ }
+
+ answer = dns_answer_new(size + set_size(types));
+ if (!answer)
+ return log_oom();
+
+ /* Second iteration, actually add RRs to the answer. */
+ HASHMAP_FOREACH(z, scope->zone.by_key)
+ LIST_FOREACH (by_key, i, z) {
+ DnsAnswerFlags flags;
+
+ if (i->state != DNS_ZONE_ITEM_ESTABLISHED)
+ continue;
+
+ if (dns_resource_key_is_dnssd_ptr(i->rr->key))
+ flags = goodbye ? DNS_ANSWER_GOODBYE : 0;
+ else
+ flags = goodbye ? (DNS_ANSWER_GOODBYE|DNS_ANSWER_CACHE_FLUSH) : DNS_ANSWER_CACHE_FLUSH;
+
+ r = dns_answer_add(answer, i->rr, 0 , flags);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add RR to announce: %m");
+ }
+
+ /* Since all the active services are in the zone make them discoverable now. */
+ SET_FOREACH(service_type, types) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr;
+
+ rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_PTR,
+ "_services._dns-sd._udp.local");
+ rr->ptr.name = strdup(service_type);
+ rr->ttl = MDNS_DEFAULT_TTL;
+
+ r = dns_zone_put(&scope->zone, scope, rr, false);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add DNS-SD PTR record to MDNS zone: %m");
+
+ r = dns_answer_add(answer, rr, 0 , 0);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add RR to announce: %m");
+ }
+
+ if (dns_answer_isempty(answer))
+ return 0;
+
+ r = dns_scope_make_reply_packet(scope, 0, DNS_RCODE_SUCCESS, NULL, answer, NULL, false, &p);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to build reply packet: %m");
+
+ r = dns_scope_emit_udp(scope, -1, p);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to send reply packet: %m");
+
+ /* In section 8.3 of RFC6762: "The Multicast DNS responder MUST send at least two unsolicited
+ * responses, one second apart." */
+ if (!scope->announced) {
+ scope->announced = true;
+
+ r = sd_event_add_time_relative(
+ scope->manager->event,
+ &scope->announce_event_source,
+ clock_boottime_or_monotonic(),
+ MDNS_ANNOUNCE_DELAY,
+ MDNS_JITTER_RANGE_USEC,
+ on_announcement_timeout, scope);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to schedule second announcement: %m");
+
+ (void) sd_event_source_set_description(scope->announce_event_source, "mdns-announce");
+ }
+
+ return 0;
+}
+
+int dns_scope_add_dnssd_services(DnsScope *scope) {
+ DnssdService *service;
+ DnssdTxtData *txt_data;
+ int r;
+
+ assert(scope);
+
+ if (hashmap_size(scope->manager->dnssd_services) == 0)
+ return 0;
+
+ scope->announced = false;
+
+ HASHMAP_FOREACH(service, scope->manager->dnssd_services) {
+ service->withdrawn = false;
+
+ r = dns_zone_put(&scope->zone, scope, service->ptr_rr, false);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add PTR record to MDNS zone: %m");
+
+ r = dns_zone_put(&scope->zone, scope, service->srv_rr, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add SRV record to MDNS zone: %m");
+
+ LIST_FOREACH(items, txt_data, service->txt_data_items) {
+ r = dns_zone_put(&scope->zone, scope, txt_data->rr, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add TXT record to MDNS zone: %m");
+ }
+ }
+
+ return 0;
+}
+
+int dns_scope_remove_dnssd_services(DnsScope *scope) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ DnssdService *service;
+ DnssdTxtData *txt_data;
+ int r;
+
+ assert(scope);
+
+ key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_PTR,
+ "_services._dns-sd._udp.local");
+ if (!key)
+ return log_oom();
+
+ r = dns_zone_remove_rrs_by_key(&scope->zone, key);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(service, scope->manager->dnssd_services) {
+ dns_zone_remove_rr(&scope->zone, service->ptr_rr);
+ dns_zone_remove_rr(&scope->zone, service->srv_rr);
+ LIST_FOREACH(items, txt_data, service->txt_data_items)
+ dns_zone_remove_rr(&scope->zone, txt_data->rr);
+ }
+
+ return 0;
+}
+
+static bool dns_scope_has_route_only_domains(DnsScope *scope) {
+ DnsSearchDomain *domain, *first;
+ bool route_only = false;
+
+ assert(scope);
+ assert(scope->protocol == DNS_PROTOCOL_DNS);
+
+ /* Returns 'true' if this scope is suitable for queries to specific domains only. For that we check
+ * if there are any route-only domains on this interface, as a heuristic to discern VPN-style links
+ * from non-VPN-style links. Returns 'false' for all other cases, i.e. if the scope is intended to
+ * take queries to arbitrary domains, i.e. has no routing domains set. */
+
+ if (scope->link)
+ first = scope->link->search_domains;
+ else
+ first = scope->manager->search_domains;
+
+ LIST_FOREACH(domains, domain, first) {
+ /* "." means "any domain", thus the interface takes any kind of traffic. Thus, we exit early
+ * here, as it doesn't really matter whether this link has any route-only domains or not,
+ * "~." really trumps everything and clearly indicates that this interface shall receive all
+ * traffic it can get. */
+ if (dns_name_is_root(DNS_SEARCH_DOMAIN_NAME(domain)))
+ return false;
+
+ if (domain->route_only)
+ route_only = true;
+ }
+
+ return route_only;
+}
+
+bool dns_scope_is_default_route(DnsScope *scope) {
+ assert(scope);
+
+ /* Only use DNS scopes as default routes */
+ if (scope->protocol != DNS_PROTOCOL_DNS)
+ return false;
+
+ /* The global DNS scope is always suitable as default route */
+ if (!scope->link)
+ return true;
+
+ /* Honour whatever is explicitly configured. This is really the best approach, and trumps any
+ * automatic logic. */
+ if (scope->link->default_route >= 0)
+ return scope->link->default_route;
+
+ /* Otherwise check if we have any route-only domains, as a sensible heuristic: if so, let's not
+ * volunteer as default route. */
+ return !dns_scope_has_route_only_domains(scope);
+}
diff --git a/src/resolve/resolved-dns-scope.h b/src/resolve/resolved-dns-scope.h
new file mode 100644
index 0000000..de05c08
--- /dev/null
+++ b/src/resolve/resolved-dns-scope.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "list.h"
+#include "ratelimit.h"
+
+typedef struct DnsQueryCandidate DnsQueryCandidate;
+typedef struct DnsScope DnsScope;
+
+#include "resolved-dns-cache.h"
+#include "resolved-dns-dnssec.h"
+#include "resolved-dns-packet.h"
+
+#include "resolved-dns-search-domain.h"
+#include "resolved-dns-server.h"
+#include "resolved-dns-stream.h"
+#include "resolved-dns-zone.h"
+
+typedef enum DnsScopeMatch {
+ DNS_SCOPE_NO,
+ DNS_SCOPE_MAYBE,
+ DNS_SCOPE_YES_BASE, /* Add the number of matching labels to this */
+ DNS_SCOPE_YES_END = DNS_SCOPE_YES_BASE + DNS_N_LABELS_MAX,
+ _DNS_SCOPE_MATCH_MAX,
+ _DNS_SCOPE_MATCH_INVALID = -1
+} DnsScopeMatch;
+
+struct DnsScope {
+ Manager *manager;
+
+ DnsProtocol protocol;
+ int family;
+
+ /* Copied at scope creation time from the link/manager */
+ DnssecMode dnssec_mode;
+ DnsOverTlsMode dns_over_tls_mode;
+
+ Link *link;
+
+ DnsCache cache;
+ DnsZone zone;
+
+ OrderedHashmap *conflict_queue;
+ sd_event_source *conflict_event_source;
+
+ bool announced:1;
+ sd_event_source *announce_event_source;
+
+ RateLimit ratelimit;
+
+ usec_t resend_timeout;
+ usec_t max_rtt;
+
+ LIST_HEAD(DnsQueryCandidate, query_candidates);
+
+ /* Note that we keep track of ongoing transactions in two
+ * ways: once in a hashmap, indexed by the rr key, and once in
+ * a linked list. We use the hashmap to quickly find
+ * transactions we can reuse for a key. But note that there
+ * might be multiple transactions for the same key (because
+ * the zone probing can't reuse a transaction answered from
+ * the zone or the cache), and the hashmap only tracks the
+ * most recent entry. */
+ Hashmap *transactions_by_key;
+ LIST_HEAD(DnsTransaction, transactions);
+
+ LIST_FIELDS(DnsScope, scopes);
+};
+
+int dns_scope_new(Manager *m, DnsScope **ret, Link *l, DnsProtocol p, int family);
+DnsScope* dns_scope_free(DnsScope *s);
+
+void dns_scope_packet_received(DnsScope *s, usec_t rtt);
+void dns_scope_packet_lost(DnsScope *s, usec_t usec);
+
+int dns_scope_emit_udp(DnsScope *s, int fd, DnsPacket *p);
+int dns_scope_socket_tcp(DnsScope *s, int family, const union in_addr_union *address, DnsServer *server, uint16_t port, union sockaddr_union *ret_socket_address);
+int dns_scope_socket_udp(DnsScope *s, DnsServer *server);
+
+DnsScopeMatch dns_scope_good_domain(DnsScope *s, int ifindex, uint64_t flags, const char *domain);
+bool dns_scope_good_key(DnsScope *s, const DnsResourceKey *key);
+
+DnsServer *dns_scope_get_dns_server(DnsScope *s);
+unsigned dns_scope_get_n_dns_servers(DnsScope *s);
+void dns_scope_next_dns_server(DnsScope *s);
+
+int dns_scope_llmnr_membership(DnsScope *s, bool b);
+int dns_scope_mdns_membership(DnsScope *s, bool b);
+
+int dns_scope_make_reply_packet(DnsScope *s, uint16_t id, int rcode, DnsQuestion *q, DnsAnswer *answer, DnsAnswer *soa, bool tentative, DnsPacket **ret);
+void dns_scope_process_query(DnsScope *s, DnsStream *stream, DnsPacket *p);
+
+DnsTransaction *dns_scope_find_transaction(DnsScope *scope, DnsResourceKey *key, bool cache_ok);
+
+int dns_scope_notify_conflict(DnsScope *scope, DnsResourceRecord *rr);
+void dns_scope_check_conflicts(DnsScope *scope, DnsPacket *p);
+
+void dns_scope_dump(DnsScope *s, FILE *f);
+
+DnsSearchDomain *dns_scope_get_search_domains(DnsScope *s);
+
+bool dns_scope_name_wants_search_domain(DnsScope *s, const char *name);
+
+bool dns_scope_network_good(DnsScope *s);
+
+int dns_scope_ifindex(DnsScope *s);
+
+int dns_scope_announce(DnsScope *scope, bool goodbye);
+
+int dns_scope_add_dnssd_services(DnsScope *scope);
+int dns_scope_remove_dnssd_services(DnsScope *scope);
+
+bool dns_scope_is_default_route(DnsScope *scope);
diff --git a/src/resolve/resolved-dns-search-domain.c b/src/resolve/resolved-dns-search-domain.c
new file mode 100644
index 0000000..a01f3dc
--- /dev/null
+++ b/src/resolve/resolved-dns-search-domain.c
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "resolved-dns-search-domain.h"
+#include "resolved-link.h"
+#include "resolved-manager.h"
+
+int dns_search_domain_new(
+ Manager *m,
+ DnsSearchDomain **ret,
+ DnsSearchDomainType type,
+ Link *l,
+ const char *name) {
+
+ _cleanup_free_ char *normalized = NULL;
+ DnsSearchDomain *d;
+ int r;
+
+ assert(m);
+ assert((type == DNS_SEARCH_DOMAIN_LINK) == !!l);
+ assert(name);
+
+ r = dns_name_normalize(name, 0, &normalized);
+ if (r < 0)
+ return r;
+
+ if (l) {
+ if (l->n_search_domains >= LINK_SEARCH_DOMAINS_MAX)
+ return -E2BIG;
+ } else {
+ if (m->n_search_domains >= MANAGER_SEARCH_DOMAINS_MAX)
+ return -E2BIG;
+ }
+
+ d = new(DnsSearchDomain, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (DnsSearchDomain) {
+ .n_ref = 1,
+ .manager = m,
+ .type = type,
+ .name = TAKE_PTR(normalized),
+ };
+
+ switch (type) {
+
+ case DNS_SEARCH_DOMAIN_LINK:
+ d->link = l;
+ LIST_APPEND(domains, l->search_domains, d);
+ l->n_search_domains++;
+ break;
+
+ case DNS_SERVER_SYSTEM:
+ LIST_APPEND(domains, m->search_domains, d);
+ m->n_search_domains++;
+ break;
+
+ default:
+ assert_not_reached("Unknown search domain type");
+ }
+
+ d->linked = true;
+
+ if (ret)
+ *ret = d;
+
+ return 0;
+}
+
+static DnsSearchDomain* dns_search_domain_free(DnsSearchDomain *d) {
+ assert(d);
+
+ free(d->name);
+ return mfree(d);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsSearchDomain, dns_search_domain, dns_search_domain_free);
+
+void dns_search_domain_unlink(DnsSearchDomain *d) {
+ assert(d);
+ assert(d->manager);
+
+ if (!d->linked)
+ return;
+
+ switch (d->type) {
+
+ case DNS_SEARCH_DOMAIN_LINK:
+ assert(d->link);
+ assert(d->link->n_search_domains > 0);
+ LIST_REMOVE(domains, d->link->search_domains, d);
+ d->link->n_search_domains--;
+ break;
+
+ case DNS_SEARCH_DOMAIN_SYSTEM:
+ assert(d->manager->n_search_domains > 0);
+ LIST_REMOVE(domains, d->manager->search_domains, d);
+ d->manager->n_search_domains--;
+ break;
+ }
+
+ d->linked = false;
+
+ dns_search_domain_unref(d);
+}
+
+void dns_search_domain_move_back_and_unmark(DnsSearchDomain *d) {
+ DnsSearchDomain *tail;
+
+ assert(d);
+
+ if (!d->marked)
+ return;
+
+ d->marked = false;
+
+ if (!d->linked || !d->domains_next)
+ return;
+
+ switch (d->type) {
+
+ case DNS_SEARCH_DOMAIN_LINK:
+ assert(d->link);
+ LIST_FIND_TAIL(domains, d, tail);
+ LIST_REMOVE(domains, d->link->search_domains, d);
+ LIST_INSERT_AFTER(domains, d->link->search_domains, tail, d);
+ break;
+
+ case DNS_SEARCH_DOMAIN_SYSTEM:
+ LIST_FIND_TAIL(domains, d, tail);
+ LIST_REMOVE(domains, d->manager->search_domains, d);
+ LIST_INSERT_AFTER(domains, d->manager->search_domains, tail, d);
+ break;
+
+ default:
+ assert_not_reached("Unknown search domain type");
+ }
+}
+
+void dns_search_domain_unlink_all(DnsSearchDomain *first) {
+ DnsSearchDomain *next;
+
+ if (!first)
+ return;
+
+ next = first->domains_next;
+ dns_search_domain_unlink(first);
+
+ dns_search_domain_unlink_all(next);
+}
+
+void dns_search_domain_unlink_marked(DnsSearchDomain *first) {
+ DnsSearchDomain *next;
+
+ if (!first)
+ return;
+
+ next = first->domains_next;
+
+ if (first->marked)
+ dns_search_domain_unlink(first);
+
+ dns_search_domain_unlink_marked(next);
+}
+
+void dns_search_domain_mark_all(DnsSearchDomain *first) {
+ if (!first)
+ return;
+
+ first->marked = true;
+ dns_search_domain_mark_all(first->domains_next);
+}
+
+int dns_search_domain_find(DnsSearchDomain *first, const char *name, DnsSearchDomain **ret) {
+ DnsSearchDomain *d;
+ int r;
+
+ assert(name);
+ assert(ret);
+
+ LIST_FOREACH(domains, d, first) {
+
+ r = dns_name_equal(name, d->name);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ *ret = d;
+ return 1;
+ }
+ }
+
+ *ret = NULL;
+ return 0;
+}
diff --git a/src/resolve/resolved-dns-search-domain.h b/src/resolve/resolved-dns-search-domain.h
new file mode 100644
index 0000000..ea91a4e
--- /dev/null
+++ b/src/resolve/resolved-dns-search-domain.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "list.h"
+#include "macro.h"
+
+typedef struct DnsSearchDomain DnsSearchDomain;
+typedef struct Link Link;
+typedef struct Manager Manager;
+
+typedef enum DnsSearchDomainType {
+ DNS_SEARCH_DOMAIN_SYSTEM,
+ DNS_SEARCH_DOMAIN_LINK,
+} DnsSearchDomainType;
+
+struct DnsSearchDomain {
+ Manager *manager;
+
+ unsigned n_ref;
+
+ DnsSearchDomainType type;
+ Link *link;
+
+ char *name;
+
+ bool marked:1;
+ bool route_only:1;
+
+ bool linked:1;
+ LIST_FIELDS(DnsSearchDomain, domains);
+};
+
+int dns_search_domain_new(
+ Manager *m,
+ DnsSearchDomain **ret,
+ DnsSearchDomainType type,
+ Link *link,
+ const char *name);
+
+DnsSearchDomain* dns_search_domain_ref(DnsSearchDomain *d);
+DnsSearchDomain* dns_search_domain_unref(DnsSearchDomain *d);
+
+void dns_search_domain_unlink(DnsSearchDomain *d);
+void dns_search_domain_move_back_and_unmark(DnsSearchDomain *d);
+
+void dns_search_domain_unlink_all(DnsSearchDomain *first);
+void dns_search_domain_unlink_marked(DnsSearchDomain *first);
+void dns_search_domain_mark_all(DnsSearchDomain *first);
+
+int dns_search_domain_find(DnsSearchDomain *first, const char *name, DnsSearchDomain **ret);
+
+static inline const char* DNS_SEARCH_DOMAIN_NAME(DnsSearchDomain *d) {
+ return d ? d->name : NULL;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsSearchDomain*, dns_search_domain_unref);
diff --git a/src/resolve/resolved-dns-server.c b/src/resolve/resolved-dns-server.c
new file mode 100644
index 0000000..8112374
--- /dev/null
+++ b/src/resolve/resolved-dns-server.c
@@ -0,0 +1,953 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "resolved-bus.h"
+#include "resolved-dns-server.h"
+#include "resolved-dns-stub.h"
+#include "resolved-manager.h"
+#include "resolved-resolv-conf.h"
+#include "siphash24.h"
+#include "string-table.h"
+#include "string-util.h"
+
+/* The amount of time to wait before retrying with a full feature set */
+#define DNS_SERVER_FEATURE_GRACE_PERIOD_MAX_USEC (6 * USEC_PER_HOUR)
+#define DNS_SERVER_FEATURE_GRACE_PERIOD_MIN_USEC (5 * USEC_PER_MINUTE)
+
+/* The number of times we will attempt a certain feature set before degrading */
+#define DNS_SERVER_FEATURE_RETRY_ATTEMPTS 3
+
+int dns_server_new(
+ Manager *m,
+ DnsServer **ret,
+ DnsServerType type,
+ Link *l,
+ int family,
+ const union in_addr_union *in_addr,
+ uint16_t port,
+ int ifindex,
+ const char *server_name) {
+
+ _cleanup_free_ char *name = NULL;
+ DnsServer *s;
+
+ assert(m);
+ assert((type == DNS_SERVER_LINK) == !!l);
+ assert(in_addr);
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return -EAFNOSUPPORT;
+
+ if (l) {
+ if (l->n_dns_servers >= LINK_DNS_SERVERS_MAX)
+ return -E2BIG;
+ } else {
+ if (m->n_dns_servers >= MANAGER_DNS_SERVERS_MAX)
+ return -E2BIG;
+ }
+
+ if (!isempty(server_name)) {
+ name = strdup(server_name);
+ if (!name)
+ return -ENOMEM;
+ }
+
+ s = new(DnsServer, 1);
+ if (!s)
+ return -ENOMEM;
+
+ *s = (DnsServer) {
+ .n_ref = 1,
+ .manager = m,
+ .type = type,
+ .family = family,
+ .address = *in_addr,
+ .port = port,
+ .ifindex = ifindex,
+ .server_name = TAKE_PTR(name),
+ };
+
+ dns_server_reset_features(s);
+
+ switch (type) {
+
+ case DNS_SERVER_LINK:
+ s->link = l;
+ LIST_APPEND(servers, l->dns_servers, s);
+ l->n_dns_servers++;
+ break;
+
+ case DNS_SERVER_SYSTEM:
+ LIST_APPEND(servers, m->dns_servers, s);
+ m->n_dns_servers++;
+ break;
+
+ case DNS_SERVER_FALLBACK:
+ LIST_APPEND(servers, m->fallback_dns_servers, s);
+ m->n_dns_servers++;
+ break;
+
+ default:
+ assert_not_reached("Unknown server type");
+ }
+
+ s->linked = true;
+
+ /* A new DNS server that isn't fallback is added and the one
+ * we used so far was a fallback one? Then let's try to pick
+ * the new one */
+ if (type != DNS_SERVER_FALLBACK &&
+ m->current_dns_server &&
+ m->current_dns_server->type == DNS_SERVER_FALLBACK)
+ manager_set_dns_server(m, NULL);
+
+ if (ret)
+ *ret = s;
+
+ return 0;
+}
+
+static DnsServer* dns_server_free(DnsServer *s) {
+ assert(s);
+
+ dns_server_unref_stream(s);
+
+#if ENABLE_DNS_OVER_TLS
+ dnstls_server_free(s);
+#endif
+
+ free(s->server_string);
+ free(s->server_string_full);
+ free(s->server_name);
+ return mfree(s);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsServer, dns_server, dns_server_free);
+
+void dns_server_unlink(DnsServer *s) {
+ assert(s);
+ assert(s->manager);
+
+ /* This removes the specified server from the linked list of
+ * servers, but any server might still stay around if it has
+ * refs, for example from an ongoing transaction. */
+
+ if (!s->linked)
+ return;
+
+ switch (s->type) {
+
+ case DNS_SERVER_LINK:
+ assert(s->link);
+ assert(s->link->n_dns_servers > 0);
+ LIST_REMOVE(servers, s->link->dns_servers, s);
+ s->link->n_dns_servers--;
+ break;
+
+ case DNS_SERVER_SYSTEM:
+ assert(s->manager->n_dns_servers > 0);
+ LIST_REMOVE(servers, s->manager->dns_servers, s);
+ s->manager->n_dns_servers--;
+ break;
+
+ case DNS_SERVER_FALLBACK:
+ assert(s->manager->n_dns_servers > 0);
+ LIST_REMOVE(servers, s->manager->fallback_dns_servers, s);
+ s->manager->n_dns_servers--;
+ break;
+ default:
+ assert_not_reached("Unknown server type");
+ }
+
+ s->linked = false;
+
+ if (s->link && s->link->current_dns_server == s)
+ link_set_dns_server(s->link, NULL);
+
+ if (s->manager->current_dns_server == s)
+ manager_set_dns_server(s->manager, NULL);
+
+ /* No need to keep a default stream around anymore */
+ dns_server_unref_stream(s);
+
+ dns_server_unref(s);
+}
+
+void dns_server_move_back_and_unmark(DnsServer *s) {
+ DnsServer *tail;
+
+ assert(s);
+
+ if (!s->marked)
+ return;
+
+ s->marked = false;
+
+ if (!s->linked || !s->servers_next)
+ return;
+
+ /* Move us to the end of the list, so that the order is
+ * strictly kept, if we are not at the end anyway. */
+
+ switch (s->type) {
+
+ case DNS_SERVER_LINK:
+ assert(s->link);
+ LIST_FIND_TAIL(servers, s, tail);
+ LIST_REMOVE(servers, s->link->dns_servers, s);
+ LIST_INSERT_AFTER(servers, s->link->dns_servers, tail, s);
+ break;
+
+ case DNS_SERVER_SYSTEM:
+ LIST_FIND_TAIL(servers, s, tail);
+ LIST_REMOVE(servers, s->manager->dns_servers, s);
+ LIST_INSERT_AFTER(servers, s->manager->dns_servers, tail, s);
+ break;
+
+ case DNS_SERVER_FALLBACK:
+ LIST_FIND_TAIL(servers, s, tail);
+ LIST_REMOVE(servers, s->manager->fallback_dns_servers, s);
+ LIST_INSERT_AFTER(servers, s->manager->fallback_dns_servers, tail, s);
+ break;
+
+ default:
+ assert_not_reached("Unknown server type");
+ }
+}
+
+static void dns_server_verified(DnsServer *s, DnsServerFeatureLevel level) {
+ assert(s);
+
+ if (s->verified_feature_level > level)
+ return;
+
+ if (s->verified_feature_level != level) {
+ log_debug("Verified we get a response at feature level %s from DNS server %s.",
+ dns_server_feature_level_to_string(level),
+ strna(dns_server_string_full(s)));
+ s->verified_feature_level = level;
+ }
+
+ assert_se(sd_event_now(s->manager->event, clock_boottime_or_monotonic(), &s->verified_usec) >= 0);
+}
+
+static void dns_server_reset_counters(DnsServer *s) {
+ assert(s);
+
+ s->n_failed_udp = 0;
+ s->n_failed_tcp = 0;
+ s->n_failed_tls = 0;
+ s->packet_truncated = false;
+ s->verified_usec = 0;
+
+ /* Note that we do not reset s->packet_bad_opt and s->packet_rrsig_missing here. We reset them only when the
+ * grace period ends, but not when lowering the possible feature level, as a lower level feature level should
+ * not make RRSIGs appear or OPT appear, but rather make them disappear. If the reappear anyway, then that's
+ * indication for a differently broken OPT/RRSIG implementation, and we really don't want to support that
+ * either.
+ *
+ * This is particularly important to deal with certain Belkin routers which break OPT for certain lookups (A),
+ * but pass traffic through for others (AAAA). If we detect the broken behaviour on one lookup we should not
+ * re-enable it for another, because we cannot validate things anyway, given that the RRSIG/OPT data will be
+ * incomplete. */
+}
+
+void dns_server_packet_received(DnsServer *s, int protocol, DnsServerFeatureLevel level, size_t size) {
+ assert(s);
+
+ if (protocol == IPPROTO_UDP) {
+ if (s->possible_feature_level == level)
+ s->n_failed_udp = 0;
+ } else if (protocol == IPPROTO_TCP) {
+ if (DNS_SERVER_FEATURE_LEVEL_IS_TLS(level)) {
+ if (s->possible_feature_level == level)
+ s->n_failed_tls = 0;
+ } else {
+ if (s->possible_feature_level == level)
+ s->n_failed_tcp = 0;
+
+ /* Successful TCP connections are only useful to verify the TCP feature level. */
+ level = DNS_SERVER_FEATURE_LEVEL_TCP;
+ }
+ }
+
+ /* If the RRSIG data is missing, then we can only validate EDNS0 at max */
+ if (s->packet_rrsig_missing && level >= DNS_SERVER_FEATURE_LEVEL_DO)
+ level = DNS_SERVER_FEATURE_LEVEL_IS_TLS(level) ? DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN : DNS_SERVER_FEATURE_LEVEL_EDNS0;
+
+ /* If the OPT RR got lost, then we can only validate UDP at max */
+ if (s->packet_bad_opt && level >= DNS_SERVER_FEATURE_LEVEL_EDNS0)
+ level = DNS_SERVER_FEATURE_LEVEL_EDNS0 - 1;
+
+ /* Even if we successfully receive a reply to a request announcing support for large packets,
+ that does not mean we can necessarily receive large packets. */
+ if (level == DNS_SERVER_FEATURE_LEVEL_LARGE)
+ level = DNS_SERVER_FEATURE_LEVEL_LARGE - 1;
+
+ dns_server_verified(s, level);
+
+ /* Remember the size of the largest UDP packet we received from a server,
+ we know that we can always announce support for packets with at least
+ this size. */
+ if (protocol == IPPROTO_UDP && s->received_udp_packet_max < size)
+ s->received_udp_packet_max = size;
+}
+
+void dns_server_packet_lost(DnsServer *s, int protocol, DnsServerFeatureLevel level) {
+ assert(s);
+ assert(s->manager);
+
+ if (s->possible_feature_level == level) {
+ if (protocol == IPPROTO_UDP)
+ s->n_failed_udp++;
+ else if (protocol == IPPROTO_TCP) {
+ if (DNS_SERVER_FEATURE_LEVEL_IS_TLS(level))
+ s->n_failed_tls++;
+ else
+ s->n_failed_tcp++;
+ }
+ }
+}
+
+void dns_server_packet_truncated(DnsServer *s, DnsServerFeatureLevel level) {
+ assert(s);
+
+ /* Invoked whenever we get a packet with TC bit set. */
+
+ if (s->possible_feature_level != level)
+ return;
+
+ s->packet_truncated = true;
+}
+
+void dns_server_packet_rrsig_missing(DnsServer *s, DnsServerFeatureLevel level) {
+ assert(s);
+
+ if (level < DNS_SERVER_FEATURE_LEVEL_DO)
+ return;
+
+ /* If the RRSIG RRs are missing, we have to downgrade what we previously verified */
+ if (s->verified_feature_level >= DNS_SERVER_FEATURE_LEVEL_DO)
+ s->verified_feature_level = DNS_SERVER_FEATURE_LEVEL_IS_TLS(level) ? DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN : DNS_SERVER_FEATURE_LEVEL_EDNS0;
+
+ s->packet_rrsig_missing = true;
+}
+
+void dns_server_packet_bad_opt(DnsServer *s, DnsServerFeatureLevel level) {
+ assert(s);
+
+ if (level < DNS_SERVER_FEATURE_LEVEL_EDNS0)
+ return;
+
+ /* If the OPT RR got lost, we have to downgrade what we previously verified */
+ if (s->verified_feature_level >= DNS_SERVER_FEATURE_LEVEL_EDNS0)
+ s->verified_feature_level = DNS_SERVER_FEATURE_LEVEL_EDNS0-1;
+
+ s->packet_bad_opt = true;
+}
+
+void dns_server_packet_rcode_downgrade(DnsServer *s, DnsServerFeatureLevel level) {
+ assert(s);
+
+ /* Invoked whenever we got a FORMERR, SERVFAIL or NOTIMP rcode from a server and downgrading the feature level
+ * for the transaction made it go away. In this case we immediately downgrade to the feature level that made
+ * things work. */
+
+ if (s->verified_feature_level > level)
+ s->verified_feature_level = level;
+
+ if (s->possible_feature_level > level) {
+ s->possible_feature_level = level;
+ dns_server_reset_counters(s);
+ }
+
+ log_debug("Downgrading transaction feature level fixed an RCODE error, downgrading server %s too.", strna(dns_server_string_full(s)));
+}
+
+static bool dns_server_grace_period_expired(DnsServer *s) {
+ usec_t ts;
+
+ assert(s);
+ assert(s->manager);
+
+ if (s->verified_usec == 0)
+ return false;
+
+ assert_se(sd_event_now(s->manager->event, clock_boottime_or_monotonic(), &ts) >= 0);
+
+ if (s->verified_usec + s->features_grace_period_usec > ts)
+ return false;
+
+ s->features_grace_period_usec = MIN(s->features_grace_period_usec * 2, DNS_SERVER_FEATURE_GRACE_PERIOD_MAX_USEC);
+
+ return true;
+}
+
+DnsServerFeatureLevel dns_server_possible_feature_level(DnsServer *s) {
+ DnsServerFeatureLevel best;
+
+ assert(s);
+
+ /* Determine the best feature level we care about. If DNSSEC mode is off there's no point in using anything
+ * better than EDNS0, hence don't even try. */
+ if (dns_server_get_dnssec_mode(s) != DNSSEC_NO)
+ best = dns_server_get_dns_over_tls_mode(s) == DNS_OVER_TLS_NO ?
+ DNS_SERVER_FEATURE_LEVEL_LARGE :
+ DNS_SERVER_FEATURE_LEVEL_TLS_DO;
+ else
+ best = dns_server_get_dns_over_tls_mode(s) == DNS_OVER_TLS_NO ?
+ DNS_SERVER_FEATURE_LEVEL_EDNS0 :
+ DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN;
+
+ /* Clamp the feature level the highest level we care about. The DNSSEC mode might have changed since the last
+ * time, hence let's downgrade if we are still at a higher level. */
+ if (s->possible_feature_level > best)
+ s->possible_feature_level = best;
+
+ if (s->possible_feature_level < best && dns_server_grace_period_expired(s)) {
+
+ s->possible_feature_level = best;
+
+ dns_server_reset_counters(s);
+
+ s->packet_bad_opt = false;
+ s->packet_rrsig_missing = false;
+
+ log_info("Grace period over, resuming full feature set (%s) for DNS server %s.",
+ dns_server_feature_level_to_string(s->possible_feature_level),
+ strna(dns_server_string_full(s)));
+
+ dns_server_flush_cache(s);
+
+ } else if (s->possible_feature_level <= s->verified_feature_level)
+ s->possible_feature_level = s->verified_feature_level;
+ else {
+ DnsServerFeatureLevel p = s->possible_feature_level;
+ int log_level = LOG_WARNING;
+
+ if (s->n_failed_tcp >= DNS_SERVER_FEATURE_RETRY_ATTEMPTS &&
+ s->possible_feature_level == DNS_SERVER_FEATURE_LEVEL_TCP) {
+
+ /* We are at the TCP (lowest) level, and we tried a couple of TCP connections, and it didn't
+ * work. Upgrade back to UDP again. */
+ log_debug("Reached maximum number of failed TCP connection attempts, trying UDP again...");
+ s->possible_feature_level = DNS_SERVER_FEATURE_LEVEL_UDP;
+ } else if (s->n_failed_tls > 0 &&
+ DNS_SERVER_FEATURE_LEVEL_IS_TLS(s->possible_feature_level) && dns_server_get_dns_over_tls_mode(s) != DNS_OVER_TLS_YES) {
+
+ /* We tried to connect using DNS-over-TLS, and it didn't work. Downgrade to plaintext UDP
+ * if we don't require DNS-over-TLS */
+
+ log_debug("Server doesn't support DNS-over-TLS, downgrading protocol...");
+ s->possible_feature_level--;
+ } else if (s->packet_bad_opt &&
+ s->possible_feature_level >= DNS_SERVER_FEATURE_LEVEL_EDNS0) {
+
+ /* A reply to one of our EDNS0 queries didn't carry a valid OPT RR, then downgrade to below
+ * EDNS0 levels. After all, some records generate different responses with and without OPT RR
+ * in the request. Example:
+ * https://open.nlnetlabs.nl/pipermail/dnssec-trigger/2014-November/000376.html */
+
+ log_debug("Server doesn't support EDNS(0) properly, downgrading feature level...");
+ s->possible_feature_level = DNS_SERVER_FEATURE_LEVEL_UDP;
+
+ /* Users often don't control the DNS server they use so let's not complain too loudly
+ * when we can't use EDNS because the DNS server doesn't support it. */
+ log_level = LOG_NOTICE;
+
+ } else if (s->packet_rrsig_missing &&
+ s->possible_feature_level >= DNS_SERVER_FEATURE_LEVEL_DO) {
+
+ /* RRSIG data was missing on a EDNS0 packet with DO bit set. This means the server doesn't
+ * augment responses with DNSSEC RRs. If so, let's better not ask the server for it anymore,
+ * after all some servers generate different replies depending if an OPT RR is in the query or
+ * not. */
+
+ log_debug("Detected server responses lack RRSIG records, downgrading feature level...");
+ s->possible_feature_level = DNS_SERVER_FEATURE_LEVEL_IS_TLS(s->possible_feature_level) ? DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN : DNS_SERVER_FEATURE_LEVEL_EDNS0;
+
+ } else if (s->n_failed_udp >= DNS_SERVER_FEATURE_RETRY_ATTEMPTS &&
+ s->possible_feature_level >= (dns_server_get_dnssec_mode(s) == DNSSEC_YES ? DNS_SERVER_FEATURE_LEVEL_LARGE : DNS_SERVER_FEATURE_LEVEL_UDP)) {
+
+ /* We lost too many UDP packets in a row, and are on a feature level of UDP or higher. If the
+ * packets are lost, maybe the server cannot parse them, hence downgrading sounds like a good
+ * idea. We might downgrade all the way down to TCP this way.
+ *
+ * If strict DNSSEC mode is used we won't downgrade below DO level however, as packet loss
+ * might have many reasons, a broken DNSSEC implementation being only one reason. And if the
+ * user is strict on DNSSEC, then let's assume that DNSSEC is not the fault here. */
+
+ log_debug("Lost too many UDP packets, downgrading feature level...");
+ s->possible_feature_level--;
+
+ } else if (s->n_failed_tcp >= DNS_SERVER_FEATURE_RETRY_ATTEMPTS &&
+ s->packet_truncated &&
+ s->possible_feature_level > (dns_server_get_dnssec_mode(s) == DNSSEC_YES ? DNS_SERVER_FEATURE_LEVEL_LARGE : DNS_SERVER_FEATURE_LEVEL_UDP)) {
+
+ /* We got too many TCP connection failures in a row, we had at least one truncated packet, and
+ * are on a feature level above UDP. By downgrading things and getting rid of DNSSEC or EDNS0
+ * data we hope to make the packet smaller, so that it still works via UDP given that TCP
+ * appears not to be a fallback. Note that if we are already at the lowest UDP level, we don't
+ * go further down, since that's TCP, and TCP failed too often after all. */
+
+ log_debug("Got too many failed TCP connection failures and truncated UDP packets, downgrading feature level...");
+ s->possible_feature_level--;
+ }
+
+ if (p != s->possible_feature_level) {
+
+ /* We changed the feature level, reset the counting */
+ dns_server_reset_counters(s);
+
+ log_full(log_level, "Using degraded feature set %s instead of %s for DNS server %s.",
+ dns_server_feature_level_to_string(s->possible_feature_level),
+ dns_server_feature_level_to_string(p), strna(dns_server_string_full(s)));
+ }
+ }
+
+ return s->possible_feature_level;
+}
+
+int dns_server_adjust_opt(DnsServer *server, DnsPacket *packet, DnsServerFeatureLevel level) {
+ size_t packet_size;
+ bool edns_do;
+ int r;
+
+ assert(server);
+ assert(packet);
+ assert(packet->protocol == DNS_PROTOCOL_DNS);
+
+ /* Fix the OPT field in the packet to match our current feature level. */
+
+ r = dns_packet_truncate_opt(packet);
+ if (r < 0)
+ return r;
+
+ if (level < DNS_SERVER_FEATURE_LEVEL_EDNS0)
+ return 0;
+
+ edns_do = level >= DNS_SERVER_FEATURE_LEVEL_DO;
+
+ if (level >= DNS_SERVER_FEATURE_LEVEL_LARGE)
+ packet_size = DNS_PACKET_UNICAST_SIZE_LARGE_MAX;
+ else
+ packet_size = server->received_udp_packet_max;
+
+ return dns_packet_append_opt(packet, packet_size, edns_do, /* include_rfc6975 = */ true, 0, NULL);
+}
+
+int dns_server_ifindex(const DnsServer *s) {
+ assert(s);
+
+ /* The link ifindex always takes precedence */
+ if (s->link)
+ return s->link->ifindex;
+
+ if (s->ifindex > 0)
+ return s->ifindex;
+
+ return 0;
+}
+
+uint16_t dns_server_port(const DnsServer *s) {
+ assert(s);
+
+ if (s->port > 0)
+ return s->port;
+
+ return 53;
+}
+
+const char *dns_server_string(DnsServer *server) {
+ assert(server);
+
+ if (!server->server_string)
+ (void) in_addr_ifindex_to_string(server->family, &server->address, dns_server_ifindex(server), &server->server_string);
+
+ return server->server_string;
+}
+
+const char *dns_server_string_full(DnsServer *server) {
+ assert(server);
+
+ if (!server->server_string_full)
+ (void) in_addr_port_ifindex_name_to_string(
+ server->family,
+ &server->address,
+ server->port,
+ dns_server_ifindex(server),
+ server->server_name,
+ &server->server_string_full);
+
+ return server->server_string_full;
+}
+
+bool dns_server_dnssec_supported(DnsServer *server) {
+ assert(server);
+
+ /* Returns whether the server supports DNSSEC according to what we know about it */
+
+ if (server->possible_feature_level < DNS_SERVER_FEATURE_LEVEL_DO)
+ return false;
+
+ if (server->packet_bad_opt)
+ return false;
+
+ if (server->packet_rrsig_missing)
+ return false;
+
+ /* DNSSEC servers need to support TCP properly (see RFC5966), if they don't, we assume DNSSEC is borked too */
+ if (server->n_failed_tcp >= DNS_SERVER_FEATURE_RETRY_ATTEMPTS)
+ return false;
+
+ return true;
+}
+
+void dns_server_warn_downgrade(DnsServer *server) {
+ assert(server);
+
+ if (server->warned_downgrade)
+ return;
+
+ log_struct(LOG_NOTICE,
+ "MESSAGE_ID=" SD_MESSAGE_DNSSEC_DOWNGRADE_STR,
+ LOG_MESSAGE("Server %s does not support DNSSEC, downgrading to non-DNSSEC mode.", strna(dns_server_string_full(server))),
+ "DNS_SERVER=%s", strna(dns_server_string_full(server)),
+ "DNS_SERVER_FEATURE_LEVEL=%s", dns_server_feature_level_to_string(server->possible_feature_level));
+
+ server->warned_downgrade = true;
+}
+
+static void dns_server_hash_func(const DnsServer *s, struct siphash *state) {
+ assert(s);
+
+ siphash24_compress(&s->family, sizeof(s->family), state);
+ siphash24_compress(&s->address, FAMILY_ADDRESS_SIZE(s->family), state);
+ siphash24_compress(&s->port, sizeof(s->port), state);
+ siphash24_compress(&s->ifindex, sizeof(s->ifindex), state);
+ siphash24_compress_string(s->server_name, state);
+}
+
+static int dns_server_compare_func(const DnsServer *x, const DnsServer *y) {
+ int r;
+
+ r = CMP(x->family, y->family);
+ if (r != 0)
+ return r;
+
+ r = memcmp(&x->address, &y->address, FAMILY_ADDRESS_SIZE(x->family));
+ if (r != 0)
+ return r;
+
+ r = CMP(x->port, y->port);
+ if (r != 0)
+ return r;
+
+ r = CMP(x->ifindex, y->ifindex);
+ if (r != 0)
+ return r;
+
+ return streq_ptr(x->server_name, y->server_name);
+}
+
+DEFINE_HASH_OPS(dns_server_hash_ops, DnsServer, dns_server_hash_func, dns_server_compare_func);
+
+void dns_server_unlink_all(DnsServer *first) {
+ DnsServer *next;
+
+ if (!first)
+ return;
+
+ next = first->servers_next;
+ dns_server_unlink(first);
+
+ dns_server_unlink_all(next);
+}
+
+void dns_server_unlink_marked(DnsServer *first) {
+ DnsServer *next;
+
+ if (!first)
+ return;
+
+ next = first->servers_next;
+
+ if (first->marked)
+ dns_server_unlink(first);
+
+ dns_server_unlink_marked(next);
+}
+
+void dns_server_mark_all(DnsServer *first) {
+ if (!first)
+ return;
+
+ first->marked = true;
+ dns_server_mark_all(first->servers_next);
+}
+
+DnsServer *dns_server_find(DnsServer *first, int family, const union in_addr_union *in_addr, uint16_t port, int ifindex, const char *name) {
+ DnsServer *s;
+
+ LIST_FOREACH(servers, s, first)
+ if (s->family == family &&
+ in_addr_equal(family, &s->address, in_addr) > 0 &&
+ s->port == port &&
+ s->ifindex == ifindex &&
+ streq_ptr(s->server_name, name))
+ return s;
+
+ return NULL;
+}
+
+DnsServer *manager_get_first_dns_server(Manager *m, DnsServerType t) {
+ assert(m);
+
+ switch (t) {
+
+ case DNS_SERVER_SYSTEM:
+ return m->dns_servers;
+
+ case DNS_SERVER_FALLBACK:
+ return m->fallback_dns_servers;
+
+ default:
+ return NULL;
+ }
+}
+
+DnsServer *manager_set_dns_server(Manager *m, DnsServer *s) {
+ assert(m);
+
+ if (m->current_dns_server == s)
+ return s;
+
+ if (s)
+ log_debug("Switching to %s DNS server %s.",
+ dns_server_type_to_string(s->type),
+ strna(dns_server_string_full(s)));
+
+ dns_server_unref(m->current_dns_server);
+ m->current_dns_server = dns_server_ref(s);
+
+ if (m->unicast_scope)
+ dns_cache_flush(&m->unicast_scope->cache);
+
+ (void) manager_send_changed(m, "CurrentDNSServer");
+
+ return s;
+}
+
+DnsServer *manager_get_dns_server(Manager *m) {
+ Link *l;
+ assert(m);
+
+ /* Try to read updates resolv.conf */
+ manager_read_resolv_conf(m);
+
+ /* If no DNS server was chosen so far, pick the first one */
+ if (!m->current_dns_server)
+ manager_set_dns_server(m, m->dns_servers);
+
+ if (!m->current_dns_server) {
+ bool found = false;
+
+ /* No DNS servers configured, let's see if there are
+ * any on any links. If not, we use the fallback
+ * servers */
+
+ HASHMAP_FOREACH(l, m->links)
+ if (l->dns_servers) {
+ found = true;
+ break;
+ }
+
+ if (!found)
+ manager_set_dns_server(m, m->fallback_dns_servers);
+ }
+
+ return m->current_dns_server;
+}
+
+void manager_next_dns_server(Manager *m) {
+ assert(m);
+
+ /* If there's currently no DNS server set, then the next
+ * manager_get_dns_server() will find one */
+ if (!m->current_dns_server)
+ return;
+
+ /* Change to the next one, but make sure to follow the linked
+ * list only if the server is still linked. */
+ if (m->current_dns_server->linked && m->current_dns_server->servers_next) {
+ manager_set_dns_server(m, m->current_dns_server->servers_next);
+ return;
+ }
+
+ /* If there was no next one, then start from the beginning of
+ * the list */
+ if (m->current_dns_server->type == DNS_SERVER_FALLBACK)
+ manager_set_dns_server(m, m->fallback_dns_servers);
+ else
+ manager_set_dns_server(m, m->dns_servers);
+}
+
+DnssecMode dns_server_get_dnssec_mode(DnsServer *s) {
+ assert(s);
+
+ if (s->link)
+ return link_get_dnssec_mode(s->link);
+
+ return manager_get_dnssec_mode(s->manager);
+}
+
+DnsOverTlsMode dns_server_get_dns_over_tls_mode(DnsServer *s) {
+ assert(s);
+
+ if (s->link)
+ return link_get_dns_over_tls_mode(s->link);
+
+ return manager_get_dns_over_tls_mode(s->manager);
+}
+
+void dns_server_flush_cache(DnsServer *s) {
+ DnsServer *current;
+ DnsScope *scope;
+
+ assert(s);
+
+ /* Flush the cache of the scope this server belongs to */
+
+ current = s->link ? s->link->current_dns_server : s->manager->current_dns_server;
+ if (current != s)
+ return;
+
+ scope = s->link ? s->link->unicast_scope : s->manager->unicast_scope;
+ if (!scope)
+ return;
+
+ dns_cache_flush(&scope->cache);
+}
+
+void dns_server_reset_features(DnsServer *s) {
+ assert(s);
+
+ s->verified_feature_level = _DNS_SERVER_FEATURE_LEVEL_INVALID;
+ s->possible_feature_level = DNS_SERVER_FEATURE_LEVEL_BEST;
+
+ s->received_udp_packet_max = DNS_PACKET_UNICAST_SIZE_MAX;
+
+ s->packet_bad_opt = false;
+ s->packet_rrsig_missing = false;
+
+ s->features_grace_period_usec = DNS_SERVER_FEATURE_GRACE_PERIOD_MIN_USEC;
+
+ s->warned_downgrade = false;
+
+ dns_server_reset_counters(s);
+
+ /* Let's close the default stream, so that we reprobe with the new features */
+ dns_server_unref_stream(s);
+}
+
+void dns_server_reset_features_all(DnsServer *s) {
+ DnsServer *i;
+
+ LIST_FOREACH(servers, i, s)
+ dns_server_reset_features(i);
+}
+
+void dns_server_dump(DnsServer *s, FILE *f) {
+ assert(s);
+
+ if (!f)
+ f = stdout;
+
+ fputs("[Server ", f);
+ fputs(strna(dns_server_string_full(s)), f);
+ fputs(" type=", f);
+ fputs(dns_server_type_to_string(s->type), f);
+
+ if (s->type == DNS_SERVER_LINK) {
+ assert(s->link);
+
+ fputs(" interface=", f);
+ fputs(s->link->ifname, f);
+ }
+
+ fputs("]\n", f);
+
+ fputs("\tVerified feature level: ", f);
+ fputs(strna(dns_server_feature_level_to_string(s->verified_feature_level)), f);
+ fputc('\n', f);
+
+ fputs("\tPossible feature level: ", f);
+ fputs(strna(dns_server_feature_level_to_string(s->possible_feature_level)), f);
+ fputc('\n', f);
+
+ fputs("\tDNSSEC Mode: ", f);
+ fputs(strna(dnssec_mode_to_string(dns_server_get_dnssec_mode(s))), f);
+ fputc('\n', f);
+
+ fputs("\tCan do DNSSEC: ", f);
+ fputs(yes_no(dns_server_dnssec_supported(s)), f);
+ fputc('\n', f);
+
+ fprintf(f,
+ "\tMaximum UDP packet size received: %zu\n"
+ "\tFailed UDP attempts: %u\n"
+ "\tFailed TCP attempts: %u\n"
+ "\tSeen truncated packet: %s\n"
+ "\tSeen OPT RR getting lost: %s\n"
+ "\tSeen RRSIG RR missing: %s\n",
+ s->received_udp_packet_max,
+ s->n_failed_udp,
+ s->n_failed_tcp,
+ yes_no(s->packet_truncated),
+ yes_no(s->packet_bad_opt),
+ yes_no(s->packet_rrsig_missing));
+}
+
+void dns_server_unref_stream(DnsServer *s) {
+ DnsStream *ref;
+
+ assert(s);
+
+ /* Detaches the default stream of this server. Some special care needs to be taken here, as that stream and
+ * this server reference each other. First, take the stream out of the server. It's destructor will check if it
+ * is registered with us, hence let's invalidate this separately, so that it is already unregistered. */
+ ref = TAKE_PTR(s->stream);
+
+ /* And then, unref it */
+ dns_stream_unref(ref);
+}
+
+DnsScope *dns_server_scope(DnsServer *s) {
+ assert(s);
+ assert((s->type == DNS_SERVER_LINK) == !!s->link);
+
+ if (s->link)
+ return s->link->unicast_scope;
+
+ return s->manager->unicast_scope;
+}
+
+static const char* const dns_server_type_table[_DNS_SERVER_TYPE_MAX] = {
+ [DNS_SERVER_SYSTEM] = "system",
+ [DNS_SERVER_FALLBACK] = "fallback",
+ [DNS_SERVER_LINK] = "link",
+};
+DEFINE_STRING_TABLE_LOOKUP(dns_server_type, DnsServerType);
+
+static const char* const dns_server_feature_level_table[_DNS_SERVER_FEATURE_LEVEL_MAX] = {
+ [DNS_SERVER_FEATURE_LEVEL_TCP] = "TCP",
+ [DNS_SERVER_FEATURE_LEVEL_UDP] = "UDP",
+ [DNS_SERVER_FEATURE_LEVEL_EDNS0] = "UDP+EDNS0",
+ [DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN] = "TLS+EDNS0",
+ [DNS_SERVER_FEATURE_LEVEL_DO] = "UDP+EDNS0+DO",
+ [DNS_SERVER_FEATURE_LEVEL_LARGE] = "UDP+EDNS0+DO+LARGE",
+ [DNS_SERVER_FEATURE_LEVEL_TLS_DO] = "TLS+EDNS0+D0",
+};
+DEFINE_STRING_TABLE_LOOKUP(dns_server_feature_level, DnsServerFeatureLevel);
diff --git a/src/resolve/resolved-dns-server.h b/src/resolve/resolved-dns-server.h
new file mode 100644
index 0000000..20afee7
--- /dev/null
+++ b/src/resolve/resolved-dns-server.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "in-addr-util.h"
+#include "list.h"
+#include "resolve-util.h"
+#include "time-util.h"
+
+typedef struct DnsScope DnsScope;
+typedef struct DnsServer DnsServer;
+typedef struct DnsStream DnsStream;
+typedef struct DnsPacket DnsPacket;
+typedef struct Link Link;
+typedef struct Manager Manager;
+
+#include "resolved-dnstls.h"
+
+typedef enum DnsServerType {
+ DNS_SERVER_SYSTEM,
+ DNS_SERVER_FALLBACK,
+ DNS_SERVER_LINK,
+ _DNS_SERVER_TYPE_MAX,
+ _DNS_SERVER_TYPE_INVALID = -1
+} DnsServerType;
+
+const char* dns_server_type_to_string(DnsServerType i) _const_;
+DnsServerType dns_server_type_from_string(const char *s) _pure_;
+
+typedef enum DnsServerFeatureLevel {
+ DNS_SERVER_FEATURE_LEVEL_TCP,
+ DNS_SERVER_FEATURE_LEVEL_UDP,
+ DNS_SERVER_FEATURE_LEVEL_EDNS0,
+ DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN,
+ DNS_SERVER_FEATURE_LEVEL_DO,
+ DNS_SERVER_FEATURE_LEVEL_LARGE,
+ DNS_SERVER_FEATURE_LEVEL_TLS_DO,
+ _DNS_SERVER_FEATURE_LEVEL_MAX,
+ _DNS_SERVER_FEATURE_LEVEL_INVALID = -1
+} DnsServerFeatureLevel;
+
+#define DNS_SERVER_FEATURE_LEVEL_WORST 0
+#define DNS_SERVER_FEATURE_LEVEL_BEST (_DNS_SERVER_FEATURE_LEVEL_MAX - 1)
+#define DNS_SERVER_FEATURE_LEVEL_IS_TLS(x) IN_SET(x, DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN, DNS_SERVER_FEATURE_LEVEL_TLS_DO)
+
+const char* dns_server_feature_level_to_string(int i) _const_;
+int dns_server_feature_level_from_string(const char *s) _pure_;
+
+struct DnsServer {
+ Manager *manager;
+
+ unsigned n_ref;
+
+ DnsServerType type;
+ Link *link;
+
+ int family;
+ union in_addr_union address;
+ int ifindex; /* for IPv6 link-local DNS servers */
+ uint16_t port;
+ char *server_name;
+
+ char *server_string;
+ char *server_string_full;
+
+ /* The long-lived stream towards this server. */
+ DnsStream *stream;
+
+#if ENABLE_DNS_OVER_TLS
+ DnsTlsServerData dnstls_data;
+#endif
+
+ DnsServerFeatureLevel verified_feature_level;
+ DnsServerFeatureLevel possible_feature_level;
+
+ size_t received_udp_packet_max;
+
+ unsigned n_failed_udp;
+ unsigned n_failed_tcp;
+ unsigned n_failed_tls;
+
+ bool packet_truncated:1;
+ bool packet_bad_opt:1;
+ bool packet_rrsig_missing:1;
+
+ usec_t verified_usec;
+ usec_t features_grace_period_usec;
+
+ /* Whether we already warned about downgrading to non-DNSSEC mode for this server */
+ bool warned_downgrade:1;
+
+ /* Used when GC'ing old DNS servers when configuration changes. */
+ bool marked:1;
+
+ /* If linked is set, then this server appears in the servers linked list */
+ bool linked:1;
+ LIST_FIELDS(DnsServer, servers);
+};
+
+int dns_server_new(
+ Manager *m,
+ DnsServer **ret,
+ DnsServerType type,
+ Link *link,
+ int family,
+ const union in_addr_union *address,
+ uint16_t port,
+ int ifindex,
+ const char *server_string);
+
+DnsServer* dns_server_ref(DnsServer *s);
+DnsServer* dns_server_unref(DnsServer *s);
+
+void dns_server_unlink(DnsServer *s);
+void dns_server_move_back_and_unmark(DnsServer *s);
+
+void dns_server_packet_received(DnsServer *s, int protocol, DnsServerFeatureLevel level, size_t size);
+void dns_server_packet_lost(DnsServer *s, int protocol, DnsServerFeatureLevel level);
+void dns_server_packet_truncated(DnsServer *s, DnsServerFeatureLevel level);
+void dns_server_packet_rrsig_missing(DnsServer *s, DnsServerFeatureLevel level);
+void dns_server_packet_bad_opt(DnsServer *s, DnsServerFeatureLevel level);
+void dns_server_packet_rcode_downgrade(DnsServer *s, DnsServerFeatureLevel level);
+
+DnsServerFeatureLevel dns_server_possible_feature_level(DnsServer *s);
+
+int dns_server_adjust_opt(DnsServer *server, DnsPacket *packet, DnsServerFeatureLevel level);
+
+const char *dns_server_string(DnsServer *server);
+const char *dns_server_string_full(DnsServer *server);
+int dns_server_ifindex(const DnsServer *s);
+uint16_t dns_server_port(const DnsServer *s);
+
+bool dns_server_dnssec_supported(DnsServer *server);
+
+void dns_server_warn_downgrade(DnsServer *server);
+
+DnsServer *dns_server_find(DnsServer *first, int family, const union in_addr_union *in_addr, uint16_t port, int ifindex, const char *name);
+
+void dns_server_unlink_all(DnsServer *first);
+void dns_server_unlink_marked(DnsServer *first);
+void dns_server_mark_all(DnsServer *first);
+
+DnsServer *manager_get_first_dns_server(Manager *m, DnsServerType t);
+
+DnsServer *manager_set_dns_server(Manager *m, DnsServer *s);
+DnsServer *manager_get_dns_server(Manager *m);
+void manager_next_dns_server(Manager *m);
+
+DnssecMode dns_server_get_dnssec_mode(DnsServer *s);
+DnsOverTlsMode dns_server_get_dns_over_tls_mode(DnsServer *s);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsServer*, dns_server_unref);
+
+extern const struct hash_ops dns_server_hash_ops;
+
+void dns_server_flush_cache(DnsServer *s);
+
+void dns_server_reset_features(DnsServer *s);
+void dns_server_reset_features_all(DnsServer *s);
+
+void dns_server_dump(DnsServer *s, FILE *f);
+
+void dns_server_unref_stream(DnsServer *s);
+
+DnsScope *dns_server_scope(DnsServer *s);
diff --git a/src/resolve/resolved-dns-stream.c b/src/resolve/resolved-dns-stream.c
new file mode 100644
index 0000000..1aab089
--- /dev/null
+++ b/src/resolve/resolved-dns-stream.c
@@ -0,0 +1,590 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/tcp.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "missing_network.h"
+#include "resolved-dns-stream.h"
+#include "resolved-manager.h"
+
+#define DNS_STREAM_TIMEOUT_USEC (10 * USEC_PER_SEC)
+#define DNS_STREAMS_MAX 128
+
+#define DNS_QUERIES_PER_STREAM 32
+
+static void dns_stream_stop(DnsStream *s) {
+ assert(s);
+
+ s->io_event_source = sd_event_source_unref(s->io_event_source);
+ s->timeout_event_source = sd_event_source_unref(s->timeout_event_source);
+ s->fd = safe_close(s->fd);
+
+ /* Disconnect us from the server object if we are now not usable anymore */
+ dns_stream_detach(s);
+}
+
+static int dns_stream_update_io(DnsStream *s) {
+ int f = 0;
+
+ assert(s);
+
+ if (s->write_packet && s->n_written < sizeof(s->write_size) + s->write_packet->size)
+ f |= EPOLLOUT;
+ else if (!ordered_set_isempty(s->write_queue)) {
+ dns_packet_unref(s->write_packet);
+ s->write_packet = ordered_set_steal_first(s->write_queue);
+ s->write_size = htobe16(s->write_packet->size);
+ s->n_written = 0;
+ f |= EPOLLOUT;
+ }
+
+ /* Let's read a packet if we haven't queued any yet. Except if we already hit a limit of parallel
+ * queries for this connection. */
+ if ((!s->read_packet || s->n_read < sizeof(s->read_size) + s->read_packet->size) &&
+ set_size(s->queries) < DNS_QUERIES_PER_STREAM)
+ f |= EPOLLIN;
+
+#if ENABLE_DNS_OVER_TLS
+ /* For handshake and clean closing purposes, TLS can override requested events */
+ if (s->dnstls_events != 0)
+ f = s->dnstls_events;
+#endif
+
+ return sd_event_source_set_io_events(s->io_event_source, f);
+}
+
+static int dns_stream_complete(DnsStream *s, int error) {
+ _cleanup_(dns_stream_unrefp) _unused_ DnsStream *ref = dns_stream_ref(s); /* Protect stream while we process it */
+
+ assert(s);
+ assert(error >= 0);
+
+ /* Error is > 0 when the connection failed for some reason in the network stack. It's == 0 if we sent
+ * and received exactly one packet each (in the LLMNR client case). */
+
+#if ENABLE_DNS_OVER_TLS
+ if (s->encrypted) {
+ int r;
+
+ r = dnstls_stream_shutdown(s, error);
+ if (r != -EAGAIN)
+ dns_stream_stop(s);
+ } else
+#endif
+ dns_stream_stop(s);
+
+ dns_stream_detach(s);
+
+ if (s->complete)
+ s->complete(s, error);
+ else /* the default action if no completion function is set is to close the stream */
+ dns_stream_unref(s);
+
+ return 0;
+}
+
+static int dns_stream_identify(DnsStream *s) {
+ CMSG_BUFFER_TYPE(CMSG_SPACE(MAXSIZE(struct in_pktinfo, struct in6_pktinfo))
+ + CMSG_SPACE(int) + /* for the TTL */
+ + EXTRA_CMSG_SPACE /* kernel appears to require extra space */) control;
+ struct msghdr mh = {};
+ struct cmsghdr *cmsg;
+ socklen_t sl;
+ int r;
+
+ assert(s);
+
+ if (s->identified)
+ return 0;
+
+ /* Query the local side */
+ s->local_salen = sizeof(s->local);
+ r = getsockname(s->fd, &s->local.sa, &s->local_salen);
+ if (r < 0)
+ return -errno;
+ if (s->local.sa.sa_family == AF_INET6 && s->ifindex <= 0)
+ s->ifindex = s->local.in6.sin6_scope_id;
+
+ /* Query the remote side */
+ s->peer_salen = sizeof(s->peer);
+ r = getpeername(s->fd, &s->peer.sa, &s->peer_salen);
+ if (r < 0)
+ return -errno;
+ if (s->peer.sa.sa_family == AF_INET6 && s->ifindex <= 0)
+ s->ifindex = s->peer.in6.sin6_scope_id;
+
+ /* Check consistency */
+ assert(s->peer.sa.sa_family == s->local.sa.sa_family);
+ assert(IN_SET(s->peer.sa.sa_family, AF_INET, AF_INET6));
+
+ /* Query connection meta information */
+ sl = sizeof(control);
+ if (s->peer.sa.sa_family == AF_INET) {
+ r = getsockopt(s->fd, IPPROTO_IP, IP_PKTOPTIONS, &control, &sl);
+ if (r < 0)
+ return -errno;
+ } else if (s->peer.sa.sa_family == AF_INET6) {
+
+ r = getsockopt(s->fd, IPPROTO_IPV6, IPV6_2292PKTOPTIONS, &control, &sl);
+ if (r < 0)
+ return -errno;
+ } else
+ return -EAFNOSUPPORT;
+
+ mh.msg_control = &control;
+ mh.msg_controllen = sl;
+
+ CMSG_FOREACH(cmsg, &mh) {
+
+ if (cmsg->cmsg_level == IPPROTO_IPV6) {
+ assert(s->peer.sa.sa_family == AF_INET6);
+
+ switch (cmsg->cmsg_type) {
+
+ case IPV6_PKTINFO: {
+ struct in6_pktinfo *i = (struct in6_pktinfo*) CMSG_DATA(cmsg);
+
+ if (s->ifindex <= 0)
+ s->ifindex = i->ipi6_ifindex;
+ break;
+ }
+
+ case IPV6_HOPLIMIT:
+ s->ttl = *(int *) CMSG_DATA(cmsg);
+ break;
+ }
+
+ } else if (cmsg->cmsg_level == IPPROTO_IP) {
+ assert(s->peer.sa.sa_family == AF_INET);
+
+ switch (cmsg->cmsg_type) {
+
+ case IP_PKTINFO: {
+ struct in_pktinfo *i = (struct in_pktinfo*) CMSG_DATA(cmsg);
+
+ if (s->ifindex <= 0)
+ s->ifindex = i->ipi_ifindex;
+ break;
+ }
+
+ case IP_TTL:
+ s->ttl = *(int *) CMSG_DATA(cmsg);
+ break;
+ }
+ }
+ }
+
+ /* The Linux kernel sets the interface index to the loopback
+ * device if the connection came from the local host since it
+ * avoids the routing table in such a case. Let's unset the
+ * interface index in such a case. */
+ if (s->ifindex == LOOPBACK_IFINDEX)
+ s->ifindex = 0;
+
+ /* If we don't know the interface index still, we look for the
+ * first local interface with a matching address. Yuck! */
+ if (s->ifindex <= 0)
+ s->ifindex = manager_find_ifindex(s->manager, s->local.sa.sa_family, s->local.sa.sa_family == AF_INET ? (union in_addr_union*) &s->local.in.sin_addr : (union in_addr_union*) &s->local.in6.sin6_addr);
+
+ if (s->protocol == DNS_PROTOCOL_LLMNR && s->ifindex > 0) {
+ /* Make sure all packets for this connection are sent on the same interface */
+ r = socket_set_unicast_if(s->fd, s->local.sa.sa_family, s->ifindex);
+ if (r < 0)
+ log_debug_errno(errno, "Failed to invoke IP_UNICAST_IF/IPV6_UNICAST_IF: %m");
+ }
+
+ s->identified = true;
+
+ return 0;
+}
+
+ssize_t dns_stream_writev(DnsStream *s, const struct iovec *iov, size_t iovcnt, int flags) {
+ ssize_t m;
+
+ assert(s);
+ assert(iov);
+
+#if ENABLE_DNS_OVER_TLS
+ if (s->encrypted && !(flags & DNS_STREAM_WRITE_TLS_DATA)) {
+ ssize_t ss;
+ size_t i;
+
+ m = 0;
+ for (i = 0; i < iovcnt; i++) {
+ ss = dnstls_stream_write(s, iov[i].iov_base, iov[i].iov_len);
+ if (ss < 0)
+ return ss;
+
+ m += ss;
+ if (ss != (ssize_t) iov[i].iov_len)
+ continue;
+ }
+ } else
+#endif
+ if (s->tfo_salen > 0) {
+ struct msghdr hdr = {
+ .msg_iov = (struct iovec*) iov,
+ .msg_iovlen = iovcnt,
+ .msg_name = &s->tfo_address.sa,
+ .msg_namelen = s->tfo_salen
+ };
+
+ m = sendmsg(s->fd, &hdr, MSG_FASTOPEN);
+ if (m < 0) {
+ if (errno == EOPNOTSUPP) {
+ s->tfo_salen = 0;
+ if (connect(s->fd, &s->tfo_address.sa, s->tfo_salen) < 0)
+ return -errno;
+
+ return -EAGAIN;
+ }
+ if (errno == EINPROGRESS)
+ return -EAGAIN;
+
+ return -errno;
+ } else
+ s->tfo_salen = 0; /* connection is made */
+ } else {
+ m = writev(s->fd, iov, iovcnt);
+ if (m < 0)
+ return -errno;
+ }
+
+ return m;
+}
+
+static ssize_t dns_stream_read(DnsStream *s, void *buf, size_t count) {
+ ssize_t ss;
+
+#if ENABLE_DNS_OVER_TLS
+ if (s->encrypted)
+ ss = dnstls_stream_read(s, buf, count);
+ else
+#endif
+ {
+ ss = read(s->fd, buf, count);
+ if (ss < 0)
+ return -errno;
+ }
+
+ return ss;
+}
+
+static int on_stream_timeout(sd_event_source *es, usec_t usec, void *userdata) {
+ DnsStream *s = userdata;
+
+ assert(s);
+
+ return dns_stream_complete(s, ETIMEDOUT);
+}
+
+static int on_stream_io(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(dns_stream_unrefp) DnsStream *s = dns_stream_ref(userdata); /* Protect stream while we process it */
+ bool progressed = false;
+ int r;
+
+ assert(s);
+
+#if ENABLE_DNS_OVER_TLS
+ if (s->encrypted) {
+ r = dnstls_stream_on_io(s, revents);
+ if (r == DNSTLS_STREAM_CLOSED)
+ return 0;
+ if (r == -EAGAIN)
+ return dns_stream_update_io(s);
+ if (r < 0)
+ return dns_stream_complete(s, -r);
+
+ r = dns_stream_update_io(s);
+ if (r < 0)
+ return r;
+ }
+#endif
+
+ /* only identify after connecting */
+ if (s->tfo_salen == 0) {
+ r = dns_stream_identify(s);
+ if (r < 0)
+ return dns_stream_complete(s, -r);
+ }
+
+ if ((revents & EPOLLOUT) &&
+ s->write_packet &&
+ s->n_written < sizeof(s->write_size) + s->write_packet->size) {
+
+ struct iovec iov[2];
+ ssize_t ss;
+
+ iov[0] = IOVEC_MAKE(&s->write_size, sizeof(s->write_size));
+ iov[1] = IOVEC_MAKE(DNS_PACKET_DATA(s->write_packet), s->write_packet->size);
+
+ IOVEC_INCREMENT(iov, 2, s->n_written);
+
+ ss = dns_stream_writev(s, iov, 2, 0);
+ if (ss < 0) {
+ if (!IN_SET(-ss, EINTR, EAGAIN))
+ return dns_stream_complete(s, -ss);
+ } else {
+ progressed = true;
+ s->n_written += ss;
+ }
+
+ /* Are we done? If so, disable the event source for EPOLLOUT */
+ if (s->n_written >= sizeof(s->write_size) + s->write_packet->size) {
+ r = dns_stream_update_io(s);
+ if (r < 0)
+ return dns_stream_complete(s, -r);
+ }
+ }
+
+ if ((revents & (EPOLLIN|EPOLLHUP|EPOLLRDHUP)) &&
+ (!s->read_packet ||
+ s->n_read < sizeof(s->read_size) + s->read_packet->size)) {
+
+ if (s->n_read < sizeof(s->read_size)) {
+ ssize_t ss;
+
+ ss = dns_stream_read(s, (uint8_t*) &s->read_size + s->n_read, sizeof(s->read_size) - s->n_read);
+ if (ss < 0) {
+ if (!IN_SET(-ss, EINTR, EAGAIN))
+ return dns_stream_complete(s, -ss);
+ } else if (ss == 0)
+ return dns_stream_complete(s, ECONNRESET);
+ else {
+ progressed = true;
+ s->n_read += ss;
+ }
+ }
+
+ if (s->n_read >= sizeof(s->read_size)) {
+
+ if (be16toh(s->read_size) < DNS_PACKET_HEADER_SIZE)
+ return dns_stream_complete(s, EBADMSG);
+
+ if (s->n_read < sizeof(s->read_size) + be16toh(s->read_size)) {
+ ssize_t ss;
+
+ if (!s->read_packet) {
+ r = dns_packet_new(&s->read_packet, s->protocol, be16toh(s->read_size), DNS_PACKET_SIZE_MAX);
+ if (r < 0)
+ return dns_stream_complete(s, -r);
+
+ s->read_packet->size = be16toh(s->read_size);
+ s->read_packet->ipproto = IPPROTO_TCP;
+ s->read_packet->family = s->peer.sa.sa_family;
+ s->read_packet->ttl = s->ttl;
+ s->read_packet->ifindex = s->ifindex;
+
+ if (s->read_packet->family == AF_INET) {
+ s->read_packet->sender.in = s->peer.in.sin_addr;
+ s->read_packet->sender_port = be16toh(s->peer.in.sin_port);
+ s->read_packet->destination.in = s->local.in.sin_addr;
+ s->read_packet->destination_port = be16toh(s->local.in.sin_port);
+ } else {
+ assert(s->read_packet->family == AF_INET6);
+ s->read_packet->sender.in6 = s->peer.in6.sin6_addr;
+ s->read_packet->sender_port = be16toh(s->peer.in6.sin6_port);
+ s->read_packet->destination.in6 = s->local.in6.sin6_addr;
+ s->read_packet->destination_port = be16toh(s->local.in6.sin6_port);
+
+ if (s->read_packet->ifindex == 0)
+ s->read_packet->ifindex = s->peer.in6.sin6_scope_id;
+ if (s->read_packet->ifindex == 0)
+ s->read_packet->ifindex = s->local.in6.sin6_scope_id;
+ }
+ }
+
+ ss = dns_stream_read(s,
+ (uint8_t*) DNS_PACKET_DATA(s->read_packet) + s->n_read - sizeof(s->read_size),
+ sizeof(s->read_size) + be16toh(s->read_size) - s->n_read);
+ if (ss < 0) {
+ if (!IN_SET(-ss, EINTR, EAGAIN))
+ return dns_stream_complete(s, -ss);
+ } else if (ss == 0)
+ return dns_stream_complete(s, ECONNRESET);
+ else
+ s->n_read += ss;
+ }
+
+ /* Are we done? If so, disable the event source for EPOLLIN */
+ if (s->n_read >= sizeof(s->read_size) + be16toh(s->read_size)) {
+ /* If there's a packet handler
+ * installed, call that. Note that
+ * this is optional... */
+ if (s->on_packet) {
+ r = s->on_packet(s);
+ if (r < 0)
+ return r;
+ }
+
+ r = dns_stream_update_io(s);
+ if (r < 0)
+ return dns_stream_complete(s, -r);
+ }
+ }
+ }
+
+ /* Call "complete" callback if finished reading and writing one packet, and there's nothing else left
+ * to write. */
+ if (s->type == DNS_STREAM_LLMNR_SEND &&
+ (s->write_packet && s->n_written >= sizeof(s->write_size) + s->write_packet->size) &&
+ ordered_set_isempty(s->write_queue) &&
+ (s->read_packet && s->n_read >= sizeof(s->read_size) + s->read_packet->size))
+ return dns_stream_complete(s, 0);
+
+ /* If we did something, let's restart the timeout event source */
+ if (progressed && s->timeout_event_source) {
+ r = sd_event_source_set_time_relative(s->timeout_event_source, DNS_STREAM_TIMEOUT_USEC);
+ if (r < 0)
+ log_warning_errno(errno, "Couldn't restart TCP connection timeout, ignoring: %m");
+ }
+
+ return 0;
+}
+
+static DnsStream *dns_stream_free(DnsStream *s) {
+ DnsPacket *p;
+
+ assert(s);
+
+ dns_stream_stop(s);
+
+ if (s->manager) {
+ LIST_REMOVE(streams, s->manager->dns_streams, s);
+ s->manager->n_dns_streams[s->type]--;
+ }
+
+#if ENABLE_DNS_OVER_TLS
+ if (s->encrypted)
+ dnstls_stream_free(s);
+#endif
+
+ ORDERED_SET_FOREACH(p, s->write_queue)
+ dns_packet_unref(ordered_set_remove(s->write_queue, p));
+
+ dns_packet_unref(s->write_packet);
+ dns_packet_unref(s->read_packet);
+ dns_server_unref(s->server);
+
+ ordered_set_free(s->write_queue);
+
+ return mfree(s);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(DnsStream, dns_stream, dns_stream_free);
+
+int dns_stream_new(
+ Manager *m,
+ DnsStream **ret,
+ DnsStreamType type,
+ DnsProtocol protocol,
+ int fd,
+ const union sockaddr_union *tfo_address) {
+
+ _cleanup_(dns_stream_unrefp) DnsStream *s = NULL;
+ int r;
+
+ assert(m);
+ assert(ret);
+ assert(type >= 0);
+ assert(type < _DNS_STREAM_TYPE_MAX);
+ assert(protocol >= 0);
+ assert(protocol < _DNS_PROTOCOL_MAX);
+ assert(fd >= 0);
+
+ if (m->n_dns_streams[type] > DNS_STREAMS_MAX)
+ return -EBUSY;
+
+ s = new(DnsStream, 1);
+ if (!s)
+ return -ENOMEM;
+
+ *s = (DnsStream) {
+ .n_ref = 1,
+ .fd = -1,
+ .protocol = protocol,
+ .type = type,
+ };
+
+ r = ordered_set_ensure_allocated(&s->write_queue, &dns_packet_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_io(m->event, &s->io_event_source, fd, EPOLLIN, on_stream_io, s);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(s->io_event_source, "dns-stream-io");
+
+ r = sd_event_add_time_relative(
+ m->event,
+ &s->timeout_event_source,
+ clock_boottime_or_monotonic(),
+ DNS_STREAM_TIMEOUT_USEC, 0,
+ on_stream_timeout, s);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(s->timeout_event_source, "dns-stream-timeout");
+
+ LIST_PREPEND(streams, m->dns_streams, s);
+ m->n_dns_streams[type]++;
+ s->manager = m;
+
+ s->fd = fd;
+
+ if (tfo_address) {
+ s->tfo_address = *tfo_address;
+ s->tfo_salen = tfo_address->sa.sa_family == AF_INET6 ? sizeof(tfo_address->in6) : sizeof(tfo_address->in);
+ }
+
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
+
+int dns_stream_write_packet(DnsStream *s, DnsPacket *p) {
+ int r;
+
+ assert(s);
+ assert(p);
+
+ r = ordered_set_put(s->write_queue, p);
+ if (r < 0)
+ return r;
+
+ dns_packet_ref(p);
+
+ return dns_stream_update_io(s);
+}
+
+DnsPacket *dns_stream_take_read_packet(DnsStream *s) {
+ assert(s);
+
+ if (!s->read_packet)
+ return NULL;
+
+ if (s->n_read < sizeof(s->read_size))
+ return NULL;
+
+ if (s->n_read < sizeof(s->read_size) + be16toh(s->read_size))
+ return NULL;
+
+ s->n_read = 0;
+ return TAKE_PTR(s->read_packet);
+}
+
+void dns_stream_detach(DnsStream *s) {
+ assert(s);
+
+ if (!s->server)
+ return;
+
+ if (s->server->stream != s)
+ return;
+
+ dns_server_unref_stream(s->server);
+}
diff --git a/src/resolve/resolved-dns-stream.h b/src/resolve/resolved-dns-stream.h
new file mode 100644
index 0000000..dba0644
--- /dev/null
+++ b/src/resolve/resolved-dns-stream.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-event.h"
+
+#include "ordered-set.h"
+#include "socket-util.h"
+
+typedef struct DnsServer DnsServer;
+typedef struct DnsStream DnsStream;
+typedef struct DnsTransaction DnsTransaction;
+typedef struct Manager Manager;
+typedef struct DnsStubListenerExtra DnsStubListenerExtra;
+
+#include "resolved-dns-packet.h"
+#include "resolved-dnstls.h"
+
+typedef enum DnsStreamType {
+ DNS_STREAM_LOOKUP, /* Outgoing connection to a classic DNS server */
+ DNS_STREAM_LLMNR_SEND, /* Outgoing LLMNR TCP lookup */
+ DNS_STREAM_LLMNR_RECV, /* Incoming LLMNR TCP lookup */
+ DNS_STREAM_STUB, /* Incoming DNS stub connection */
+ _DNS_STREAM_TYPE_MAX,
+ _DNS_STREAM_TYPE_INVALID = -1,
+} DnsStreamType;
+
+#define DNS_STREAM_WRITE_TLS_DATA 1
+
+/* Streams are used by three subsystems:
+ *
+ * 1. The normal transaction logic when doing a DNS or LLMNR lookup via TCP
+ * 2. The LLMNR logic when accepting a TCP-based lookup
+ * 3. The DNS stub logic when accepting a TCP-based lookup
+ */
+
+struct DnsStream {
+ Manager *manager;
+ unsigned n_ref;
+
+ DnsStreamType type;
+ DnsProtocol protocol;
+
+ int fd;
+ union sockaddr_union peer;
+ socklen_t peer_salen;
+ union sockaddr_union local;
+ socklen_t local_salen;
+ int ifindex;
+ uint32_t ttl;
+ bool identified;
+
+ /* only when using TCP fast open */
+ union sockaddr_union tfo_address;
+ socklen_t tfo_salen;
+
+#if ENABLE_DNS_OVER_TLS
+ DnsTlsStreamData dnstls_data;
+ int dnstls_events;
+#endif
+
+ sd_event_source *io_event_source;
+ sd_event_source *timeout_event_source;
+
+ be16_t write_size, read_size;
+ DnsPacket *write_packet, *read_packet;
+ size_t n_written, n_read;
+ OrderedSet *write_queue;
+
+ int (*on_packet)(DnsStream *s);
+ int (*complete)(DnsStream *s, int error);
+
+ LIST_HEAD(DnsTransaction, transactions); /* when used by the transaction logic */
+ DnsServer *server; /* when used by the transaction logic */
+ Set *queries; /* when used by the DNS stub logic */
+
+ /* used when DNS-over-TLS is enabled */
+ bool encrypted:1;
+
+ DnsStubListenerExtra *stub_listener_extra;
+
+ LIST_FIELDS(DnsStream, streams);
+};
+
+int dns_stream_new(Manager *m, DnsStream **s, DnsStreamType type, DnsProtocol protocol, int fd, const union sockaddr_union *tfo_address);
+#if ENABLE_DNS_OVER_TLS
+int dns_stream_connect_tls(DnsStream *s, void *tls_session);
+#endif
+DnsStream *dns_stream_unref(DnsStream *s);
+DnsStream *dns_stream_ref(DnsStream *s);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsStream*, dns_stream_unref);
+
+int dns_stream_write_packet(DnsStream *s, DnsPacket *p);
+ssize_t dns_stream_writev(DnsStream *s, const struct iovec *iov, size_t iovcnt, int flags);
+
+static inline bool DNS_STREAM_QUEUED(DnsStream *s) {
+ assert(s);
+
+ if (s->fd < 0) /* already stopped? */
+ return false;
+
+ return !!s->write_packet;
+}
+
+DnsPacket *dns_stream_take_read_packet(DnsStream *s);
+
+void dns_stream_detach(DnsStream *s);
diff --git a/src/resolve/resolved-dns-stub.c b/src/resolve/resolved-dns-stub.c
new file mode 100644
index 0000000..6a3dc99
--- /dev/null
+++ b/src/resolve/resolved-dns-stub.c
@@ -0,0 +1,776 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if_arp.h>
+
+#include "errno-util.h"
+#include "fd-util.h"
+#include "missing_network.h"
+#include "missing_socket.h"
+#include "resolved-dns-stub.h"
+#include "socket-netlink.h"
+#include "socket-util.h"
+#include "string-table.h"
+
+/* The MTU of the loopback device is 64K on Linux, advertise that as maximum datagram size, but subtract the Ethernet,
+ * IP and UDP header sizes */
+#define ADVERTISE_DATAGRAM_SIZE_MAX (65536U-14U-20U-8U)
+
+/* On the extra stubs, use a more conservative choice */
+#define ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX DNS_PACKET_UNICAST_SIZE_LARGE_MAX
+
+static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type);
+
+static void dns_stub_listener_extra_hash_func(const DnsStubListenerExtra *a, struct siphash *state) {
+ assert(a);
+
+ siphash24_compress(&a->mode, sizeof(a->mode), state);
+ siphash24_compress(&a->family, sizeof(a->family), state);
+ siphash24_compress(&a->address, FAMILY_ADDRESS_SIZE(a->family), state);
+ siphash24_compress(&a->port, sizeof(a->port), state);
+}
+
+static int dns_stub_listener_extra_compare_func(const DnsStubListenerExtra *a, const DnsStubListenerExtra *b) {
+ int r;
+
+ assert(a);
+ assert(b);
+
+ r = CMP(a->mode, b->mode);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->family, b->family);
+ if (r != 0)
+ return r;
+
+ r = memcmp(&a->address, &b->address, FAMILY_ADDRESS_SIZE(a->family));
+ if (r != 0)
+ return r;
+
+ return CMP(a->port, b->port);
+}
+
+DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(
+ dns_stub_listener_extra_hash_ops,
+ DnsStubListenerExtra,
+ dns_stub_listener_extra_hash_func,
+ dns_stub_listener_extra_compare_func,
+ dns_stub_listener_extra_free);
+
+int dns_stub_listener_extra_new(
+ Manager *m,
+ DnsStubListenerExtra **ret) {
+
+ DnsStubListenerExtra *l;
+
+ l = new(DnsStubListenerExtra, 1);
+ if (!l)
+ return -ENOMEM;
+
+ *l = (DnsStubListenerExtra) {
+ .manager = m,
+ };
+
+ *ret = TAKE_PTR(l);
+ return 0;
+}
+
+DnsStubListenerExtra *dns_stub_listener_extra_free(DnsStubListenerExtra *p) {
+ if (!p)
+ return NULL;
+
+ p->udp_event_source = sd_event_source_unref(p->udp_event_source);
+ p->tcp_event_source = sd_event_source_unref(p->tcp_event_source);
+
+ return mfree(p);
+}
+
+static int dns_stub_make_reply_packet(
+ DnsPacket **p,
+ size_t max_size,
+ DnsQuestion *q,
+ DnsAnswer *answer,
+ bool *ret_truncated) {
+
+ bool truncated = false;
+ DnsResourceRecord *rr;
+ unsigned c = 0;
+ int r;
+
+ assert(p);
+
+ /* Note that we don't bother with any additional RRs, as this is stub is for local lookups only, and hence
+ * roundtrips aren't expensive. */
+
+ if (!*p) {
+ r = dns_packet_new(p, DNS_PROTOCOL_DNS, 0, max_size);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_append_question(*p, q);
+ if (r < 0)
+ return r;
+
+ DNS_PACKET_HEADER(*p)->qdcount = htobe16(dns_question_size(q));
+ }
+
+ DNS_ANSWER_FOREACH(rr, answer) {
+
+ r = dns_question_matches_rr(q, rr, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ goto add;
+
+ r = dns_question_matches_cname_or_dname(q, rr, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ goto add;
+
+ continue;
+ add:
+ r = dns_packet_append_rr(*p, rr, 0, NULL, NULL);
+ if (r == -EMSGSIZE) {
+ truncated = true;
+ break;
+ }
+ if (r < 0)
+ return r;
+
+ c++;
+ }
+
+ if (ret_truncated)
+ *ret_truncated = truncated;
+ else if (truncated)
+ return -EMSGSIZE;
+
+ DNS_PACKET_HEADER(*p)->ancount = htobe16(be16toh(DNS_PACKET_HEADER(*p)->ancount) + c);
+
+ return 0;
+}
+
+static int dns_stub_finish_reply_packet(
+ DnsPacket *p,
+ uint16_t id,
+ int rcode,
+ bool tc, /* set the Truncated bit? */
+ bool add_opt, /* add an OPT RR to this packet? */
+ bool edns0_do, /* set the EDNS0 DNSSEC OK bit? */
+ bool ad, /* set the DNSSEC authenticated data bit? */
+ uint16_t max_udp_size) { /* The maximum UDP datagram size to advertise to clients */
+
+ int r;
+
+ assert(p);
+
+ if (add_opt) {
+ r = dns_packet_append_opt(p, max_udp_size, edns0_do, /* include_rfc6975 = */ false, rcode, NULL);
+ if (r == -EMSGSIZE) /* Hit the size limit? then indicate truncation */
+ tc = true;
+ else if (r < 0)
+ return r;
+
+ } else {
+ /* If the client can't to EDNS0, don't do DO either */
+ edns0_do = false;
+
+ /* If the client didn't do EDNS, clamp the rcode to 4 bit */
+ if (rcode > 0xF)
+ rcode = DNS_RCODE_SERVFAIL;
+ }
+
+ /* Don't set the AD bit unless DO is on, too */
+ if (!edns0_do)
+ ad = false;
+
+ DNS_PACKET_HEADER(p)->id = id;
+
+ DNS_PACKET_HEADER(p)->flags = htobe16(DNS_PACKET_MAKE_FLAGS(
+ 1 /* qr */,
+ 0 /* opcode */,
+ 0 /* aa */,
+ tc /* tc */,
+ 1 /* rd */,
+ 1 /* ra */,
+ ad /* ad */,
+ 0 /* cd */,
+ rcode));
+
+ return 0;
+}
+
+static int dns_stub_send(
+ Manager *m,
+ DnsStubListenerExtra *l,
+ DnsStream *s,
+ DnsPacket *p,
+ DnsPacket *reply) {
+
+ int r;
+
+ assert(m);
+ assert(p);
+ assert(reply);
+
+ if (s)
+ r = dns_stream_write_packet(s, reply);
+ else
+ /* Note that it is essential here that we explicitly choose the source IP address for this packet. This
+ * is because otherwise the kernel will choose it automatically based on the routing table and will
+ * thus pick 127.0.0.1 rather than 127.0.0.53. */
+ r = manager_send(m,
+ manager_dns_stub_fd_extra(m, l, SOCK_DGRAM),
+ l ? p->ifindex : LOOPBACK_IFINDEX, /* force loopback iface if this is the main listener stub */
+ p->family, &p->sender, p->sender_port, &p->destination,
+ reply);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to send reply packet: %m");
+
+ return 0;
+}
+
+static int dns_stub_send_failure(
+ Manager *m,
+ DnsStubListenerExtra *l,
+ DnsStream *s,
+ DnsPacket *p,
+ int rcode,
+ bool authenticated) {
+
+ _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
+ int r;
+
+ assert(m);
+ assert(p);
+
+ r = dns_stub_make_reply_packet(&reply, DNS_PACKET_PAYLOAD_SIZE_MAX(p), p->question, NULL, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to make failure packet: %m");
+
+ r = dns_stub_finish_reply_packet(
+ reply,
+ DNS_PACKET_ID(p),
+ rcode,
+ /* truncated = */ false,
+ !!p->opt,
+ DNS_PACKET_DO(p),
+ authenticated,
+ l ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to build failure packet: %m");
+
+ return dns_stub_send(m, l, s, p, reply);
+}
+
+static void dns_stub_query_complete(DnsQuery *q) {
+ int r;
+
+ assert(q);
+ assert(q->request_dns_packet);
+
+ switch (q->state) {
+
+ case DNS_TRANSACTION_SUCCESS: {
+ bool truncated;
+
+ r = dns_stub_make_reply_packet(&q->reply_dns_packet, DNS_PACKET_PAYLOAD_SIZE_MAX(q->request_dns_packet), q->question_idna, q->answer, &truncated);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to build reply packet: %m");
+ break;
+ }
+
+ if (!truncated) {
+ r = dns_query_process_cname(q);
+ if (r == -ELOOP) {
+ (void) dns_stub_send_failure(q->manager, q->stub_listener_extra, q->request_dns_stream, q->request_dns_packet, DNS_RCODE_SERVFAIL, false);
+ break;
+ }
+ if (r < 0) {
+ log_debug_errno(r, "Failed to process CNAME: %m");
+ break;
+ }
+ if (r == DNS_QUERY_RESTARTED)
+ return;
+ }
+
+ r = dns_stub_finish_reply_packet(
+ q->reply_dns_packet,
+ DNS_PACKET_ID(q->request_dns_packet),
+ q->answer_rcode,
+ truncated,
+ !!q->request_dns_packet->opt,
+ DNS_PACKET_DO(q->request_dns_packet),
+ dns_query_fully_authenticated(q),
+ q->stub_listener_extra ? ADVERTISE_EXTRA_DATAGRAM_SIZE_MAX : ADVERTISE_DATAGRAM_SIZE_MAX);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to finish reply packet: %m");
+ break;
+ }
+
+ (void) dns_stub_send(q->manager, q->stub_listener_extra, q->request_dns_stream, q->request_dns_packet, q->reply_dns_packet);
+ break;
+ }
+
+ case DNS_TRANSACTION_RCODE_FAILURE:
+ (void) dns_stub_send_failure(q->manager, q->stub_listener_extra, q->request_dns_stream, q->request_dns_packet, q->answer_rcode, dns_query_fully_authenticated(q));
+ break;
+
+ case DNS_TRANSACTION_NOT_FOUND:
+ (void) dns_stub_send_failure(q->manager, q->stub_listener_extra, q->request_dns_stream, q->request_dns_packet, DNS_RCODE_NXDOMAIN, dns_query_fully_authenticated(q));
+ break;
+
+ case DNS_TRANSACTION_TIMEOUT:
+ case DNS_TRANSACTION_ATTEMPTS_MAX_REACHED:
+ /* Propagate a timeout as a no packet, i.e. that the client also gets a timeout */
+ break;
+
+ case DNS_TRANSACTION_NO_SERVERS:
+ case DNS_TRANSACTION_INVALID_REPLY:
+ case DNS_TRANSACTION_ERRNO:
+ case DNS_TRANSACTION_ABORTED:
+ case DNS_TRANSACTION_DNSSEC_FAILED:
+ case DNS_TRANSACTION_NO_TRUST_ANCHOR:
+ case DNS_TRANSACTION_RR_TYPE_UNSUPPORTED:
+ case DNS_TRANSACTION_NETWORK_DOWN:
+ (void) dns_stub_send_failure(q->manager, q->stub_listener_extra, q->request_dns_stream, q->request_dns_packet, DNS_RCODE_SERVFAIL, false);
+ break;
+
+ case DNS_TRANSACTION_NULL:
+ case DNS_TRANSACTION_PENDING:
+ case DNS_TRANSACTION_VALIDATING:
+ default:
+ assert_not_reached("Impossible state");
+ }
+
+ dns_query_free(q);
+}
+
+static int dns_stub_stream_complete(DnsStream *s, int error) {
+ assert(s);
+
+ log_debug_errno(error, "DNS TCP connection terminated, destroying queries: %m");
+
+ for (;;) {
+ DnsQuery *q;
+
+ q = set_first(s->queries);
+ if (!q)
+ break;
+
+ dns_query_free(q);
+ }
+
+ /* This drops the implicit ref we keep around since it was allocated, as incoming stub connections
+ * should be kept as long as the client wants to. */
+ dns_stream_unref(s);
+ return 0;
+}
+
+static void dns_stub_process_query(Manager *m, DnsStubListenerExtra *l, DnsStream *s, DnsPacket *p) {
+ _cleanup_(dns_query_freep) DnsQuery *q = NULL;
+ int r;
+
+ assert(m);
+ assert(p);
+ assert(p->protocol == DNS_PROTOCOL_DNS);
+
+ if (!l && /* l == NULL if this is the main stub */
+ (in_addr_is_localhost(p->family, &p->sender) <= 0 ||
+ in_addr_is_localhost(p->family, &p->destination) <= 0)) {
+ log_warning("Got packet on unexpected (i.e. non-localhost) IP range, ignoring.");
+ return;
+ }
+
+ r = dns_packet_extract(p);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to extract resources from incoming packet, ignoring packet: %m");
+ dns_stub_send_failure(m, l, s, p, DNS_RCODE_FORMERR, false);
+ return;
+ }
+
+ if (!DNS_PACKET_VERSION_SUPPORTED(p)) {
+ log_debug("Got EDNS OPT field with unsupported version number.");
+ dns_stub_send_failure(m, l, s, p, DNS_RCODE_BADVERS, false);
+ return;
+ }
+
+ if (dns_type_is_obsolete(p->question->keys[0]->type)) {
+ log_debug("Got message with obsolete key type, refusing.");
+ dns_stub_send_failure(m, l, s, p, DNS_RCODE_NOTIMP, false);
+ return;
+ }
+
+ if (dns_type_is_zone_transer(p->question->keys[0]->type)) {
+ log_debug("Got request for zone transfer, refusing.");
+ dns_stub_send_failure(m, l, s, p, DNS_RCODE_NOTIMP, false);
+ return;
+ }
+
+ if (!DNS_PACKET_RD(p)) {
+ /* If the "rd" bit is off (i.e. recursion was not requested), then refuse operation */
+ log_debug("Got request with recursion disabled, refusing.");
+ dns_stub_send_failure(m, l, s, p, DNS_RCODE_REFUSED, false);
+ return;
+ }
+
+ if (DNS_PACKET_DO(p) && DNS_PACKET_CD(p)) {
+ log_debug("Got request with DNSSEC CD bit set, refusing.");
+ dns_stub_send_failure(m, l, s, p, DNS_RCODE_NOTIMP, false);
+ return;
+ }
+
+ r = dns_query_new(m, &q, p->question, p->question, 0, SD_RESOLVED_PROTOCOLS_ALL|SD_RESOLVED_NO_SEARCH);
+ if (r < 0) {
+ log_error_errno(r, "Failed to generate query object: %m");
+ dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false);
+ return;
+ }
+
+ /* Request that the TTL is corrected by the cached time for this lookup, so that we return vaguely useful TTLs */
+ q->clamp_ttl = true;
+
+ q->request_dns_packet = dns_packet_ref(p);
+ q->request_dns_stream = dns_stream_ref(s); /* make sure the stream stays around until we can send a reply through it */
+ q->stub_listener_extra = l;
+ q->complete = dns_stub_query_complete;
+
+ if (s) {
+ /* Remember which queries belong to this stream, so that we can cancel them when the stream
+ * is disconnected early */
+
+ r = set_ensure_put(&s->queries, NULL, q);
+ if (r < 0) {
+ log_oom();
+ return;
+ }
+ assert(r > 0);
+ }
+
+ r = dns_query_go(q);
+ if (r < 0) {
+ log_error_errno(r, "Failed to start query: %m");
+ dns_stub_send_failure(m, l, s, p, DNS_RCODE_SERVFAIL, false);
+ return;
+ }
+
+ log_debug("Processing query...");
+ TAKE_PTR(q);
+}
+
+static int on_dns_stub_packet_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ int r;
+
+ r = manager_recv(m, fd, DNS_PROTOCOL_DNS, &p);
+ if (r <= 0)
+ return r;
+
+ if (dns_packet_validate_query(p) > 0) {
+ log_debug("Got DNS stub UDP query packet for id %u", DNS_PACKET_ID(p));
+
+ dns_stub_process_query(m, l, NULL, p);
+ } else
+ log_debug("Invalid DNS stub UDP packet, ignoring.");
+
+ return 0;
+}
+
+static int on_dns_stub_packet(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ return on_dns_stub_packet_internal(s, fd, revents, userdata, NULL);
+}
+
+static int on_dns_stub_packet_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ DnsStubListenerExtra *l = userdata;
+
+ assert(l);
+
+ return on_dns_stub_packet_internal(s, fd, revents, l->manager, l);
+}
+
+static int on_dns_stub_stream_packet(DnsStream *s) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+
+ assert(s);
+
+ p = dns_stream_take_read_packet(s);
+ assert(p);
+
+ if (dns_packet_validate_query(p) > 0) {
+ log_debug("Got DNS stub TCP query packet for id %u", DNS_PACKET_ID(p));
+
+ dns_stub_process_query(s->manager, s->stub_listener_extra, s, p);
+ } else
+ log_debug("Invalid DNS stub TCP packet, ignoring.");
+
+ return 0;
+}
+
+static int on_dns_stub_stream_internal(sd_event_source *s, int fd, uint32_t revents, Manager *m, DnsStubListenerExtra *l) {
+ DnsStream *stream;
+ int cfd, r;
+
+ cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (cfd < 0) {
+ if (ERRNO_IS_ACCEPT_AGAIN(errno))
+ return 0;
+
+ return -errno;
+ }
+
+ r = dns_stream_new(m, &stream, DNS_STREAM_STUB, DNS_PROTOCOL_DNS, cfd, NULL);
+ if (r < 0) {
+ safe_close(cfd);
+ return r;
+ }
+
+ stream->stub_listener_extra = l;
+ stream->on_packet = on_dns_stub_stream_packet;
+ stream->complete = dns_stub_stream_complete;
+
+ /* We let the reference to the stream dangle here, it will be dropped later by the complete callback. */
+
+ return 0;
+}
+
+static int on_dns_stub_stream(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ return on_dns_stub_stream_internal(s, fd, revents, userdata, NULL);
+}
+
+static int on_dns_stub_stream_extra(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ DnsStubListenerExtra *l = userdata;
+
+ assert(l);
+ return on_dns_stub_stream_internal(s, fd, revents, l->manager, l);
+}
+
+static int set_dns_stub_common_socket_options(int fd, int family) {
+ int r;
+
+ assert(fd >= 0);
+ assert(IN_SET(family, AF_INET, AF_INET6));
+
+ r = setsockopt_int(fd, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return r;
+
+ r = socket_set_recvpktinfo(fd, family, true);
+ if (r < 0)
+ return r;
+
+ r = socket_set_recvttl(fd, family, true);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int manager_dns_stub_fd(Manager *m, int type) {
+ union sockaddr_union sa = {
+ .in.sin_family = AF_INET,
+ .in.sin_addr.s_addr = htobe32(INADDR_DNS_STUB),
+ .in.sin_port = htobe16(53),
+ };
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(IN_SET(type, SOCK_DGRAM, SOCK_STREAM));
+
+ sd_event_source **event_source = type == SOCK_DGRAM ? &m->dns_stub_udp_event_source : &m->dns_stub_tcp_event_source;
+ if (*event_source)
+ return sd_event_source_get_io_fd(*event_source);
+
+ fd = socket(AF_INET, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return -errno;
+
+ r = set_dns_stub_common_socket_options(fd, AF_INET);
+ if (r < 0)
+ return r;
+
+ /* Make sure no traffic from outside the local host can leak to onto this socket */
+ r = socket_bind_to_ifindex(fd, LOOPBACK_IFINDEX);
+ if (r < 0)
+ return r;
+
+ r = setsockopt_int(fd, IPPROTO_IP, IP_TTL, 1);
+ if (r < 0)
+ return r;
+
+ if (bind(fd, &sa.sa, sizeof(sa.in)) < 0)
+ return -errno;
+
+ if (type == SOCK_STREAM &&
+ listen(fd, SOMAXCONN) < 0)
+ return -errno;
+
+ r = sd_event_add_io(m->event, event_source, fd, EPOLLIN,
+ type == SOCK_DGRAM ? on_dns_stub_packet : on_dns_stub_stream,
+ m);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_io_fd_own(*event_source, true);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(*event_source,
+ type == SOCK_DGRAM ? "dns-stub-udp" : "dns-stub-tcp");
+
+ return TAKE_FD(fd);
+}
+
+static int manager_dns_stub_fd_extra(Manager *m, DnsStubListenerExtra *l, int type) {
+ _cleanup_free_ char *pretty = NULL;
+ _cleanup_close_ int fd = -1;
+ union sockaddr_union sa;
+ int r;
+
+ assert(m);
+ assert(IN_SET(type, SOCK_DGRAM, SOCK_STREAM));
+
+ if (!l)
+ return manager_dns_stub_fd(m, type);
+
+ sd_event_source **event_source = type == SOCK_DGRAM ? &l->udp_event_source : &l->tcp_event_source;
+ if (*event_source)
+ return sd_event_source_get_io_fd(*event_source);
+
+ if (l->family == AF_INET)
+ sa = (union sockaddr_union) {
+ .in.sin_family = l->family,
+ .in.sin_port = htobe16(l->port != 0 ? l->port : 53U),
+ .in.sin_addr = l->address.in,
+ };
+ else
+ sa = (union sockaddr_union) {
+ .in6.sin6_family = l->family,
+ .in6.sin6_port = htobe16(l->port != 0 ? l->port : 53U),
+ .in6.sin6_addr = l->address.in6,
+ };
+
+ fd = socket(l->family, type | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
+ if (fd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ r = set_dns_stub_common_socket_options(fd, l->family);
+ if (r < 0)
+ goto fail;
+
+ /* Do not set IP_TTL for extra DNS stub listeners, as the address may not be local and in that case
+ * people may want ttl > 1. */
+
+ r = socket_set_freebind(fd, l->family, true);
+ if (r < 0)
+ goto fail;
+
+ if (bind(fd, &sa.sa, SOCKADDR_LEN(sa)) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (type == SOCK_STREAM &&
+ listen(fd, SOMAXCONN) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ r = sd_event_add_io(m->event, event_source, fd, EPOLLIN,
+ type == SOCK_DGRAM ? on_dns_stub_packet_extra : on_dns_stub_stream_extra,
+ l);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_io_fd_own(*event_source, true);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(*event_source,
+ type == SOCK_DGRAM ? "dns-stub-udp-extra" : "dns-stub-tcp-extra");
+
+ if (DEBUG_LOGGING) {
+ (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty);
+ log_debug("Listening on %s socket %s.",
+ type == SOCK_DGRAM ? "UDP" : "TCP",
+ strnull(pretty));
+ }
+
+ return TAKE_FD(fd);
+
+fail:
+ assert(r < 0);
+ (void) in_addr_port_to_string(l->family, &l->address, l->port, &pretty);
+ return log_warning_errno(r,
+ r == -EADDRINUSE ? "Another process is already listening on %s socket %s: %m" :
+ "Failed to listen on %s socket %s: %m",
+ type == SOCK_DGRAM ? "UDP" : "TCP",
+ strnull(pretty));
+}
+
+int manager_dns_stub_start(Manager *m) {
+ const char *t = "UDP";
+ int r = 0;
+
+ assert(m);
+
+ if (m->dns_stub_listener_mode == DNS_STUB_LISTENER_NO)
+ log_debug("Not creating stub listener.");
+ else
+ log_debug("Creating stub listener using %s.",
+ m->dns_stub_listener_mode == DNS_STUB_LISTENER_UDP ? "UDP" :
+ m->dns_stub_listener_mode == DNS_STUB_LISTENER_TCP ? "TCP" :
+ "UDP/TCP");
+
+ if (FLAGS_SET(m->dns_stub_listener_mode, DNS_STUB_LISTENER_UDP))
+ r = manager_dns_stub_fd(m, SOCK_DGRAM);
+
+ if (r >= 0 &&
+ FLAGS_SET(m->dns_stub_listener_mode, DNS_STUB_LISTENER_TCP)) {
+ t = "TCP";
+ r = manager_dns_stub_fd(m, SOCK_STREAM);
+ }
+
+ if (IN_SET(r, -EADDRINUSE, -EPERM)) {
+ log_warning_errno(r,
+ r == -EADDRINUSE ? "Another process is already listening on %s socket 127.0.0.53:53.\n"
+ "Turning off local DNS stub support." :
+ "Failed to listen on %s socket 127.0.0.53:53: %m.\n"
+ "Turning off local DNS stub support.",
+ t);
+ manager_dns_stub_stop(m);
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to listen on %s socket 127.0.0.53:53: %m", t);
+
+ if (!ordered_set_isempty(m->dns_extra_stub_listeners)) {
+ DnsStubListenerExtra *l;
+
+ log_debug("Creating extra stub listeners.");
+
+ ORDERED_SET_FOREACH(l, m->dns_extra_stub_listeners) {
+ if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_UDP))
+ (void) manager_dns_stub_fd_extra(m, l, SOCK_DGRAM);
+ if (FLAGS_SET(l->mode, DNS_STUB_LISTENER_TCP))
+ (void) manager_dns_stub_fd_extra(m, l, SOCK_STREAM);
+ }
+ }
+
+ return 0;
+}
+
+void manager_dns_stub_stop(Manager *m) {
+ assert(m);
+
+ m->dns_stub_udp_event_source = sd_event_source_unref(m->dns_stub_udp_event_source);
+ m->dns_stub_tcp_event_source = sd_event_source_unref(m->dns_stub_tcp_event_source);
+}
+
+static const char* const dns_stub_listener_mode_table[_DNS_STUB_LISTENER_MODE_MAX] = {
+ [DNS_STUB_LISTENER_NO] = "no",
+ [DNS_STUB_LISTENER_UDP] = "udp",
+ [DNS_STUB_LISTENER_TCP] = "tcp",
+ [DNS_STUB_LISTENER_YES] = "yes",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_stub_listener_mode, DnsStubListenerMode, DNS_STUB_LISTENER_YES);
diff --git a/src/resolve/resolved-dns-stub.h b/src/resolve/resolved-dns-stub.h
new file mode 100644
index 0000000..072f213
--- /dev/null
+++ b/src/resolve/resolved-dns-stub.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "hash-funcs.h"
+
+typedef struct DnsStubListenerExtra DnsStubListenerExtra;
+
+typedef enum DnsStubListenerMode {
+ DNS_STUB_LISTENER_NO,
+ DNS_STUB_LISTENER_UDP = 1 << 0,
+ DNS_STUB_LISTENER_TCP = 1 << 1,
+ DNS_STUB_LISTENER_YES = DNS_STUB_LISTENER_UDP | DNS_STUB_LISTENER_TCP,
+ _DNS_STUB_LISTENER_MODE_MAX,
+ _DNS_STUB_LISTENER_MODE_INVALID = -1
+} DnsStubListenerMode;
+
+#include "resolved-manager.h"
+
+struct DnsStubListenerExtra {
+ Manager *manager;
+
+ DnsStubListenerMode mode;
+
+ int family;
+ union in_addr_union address;
+ uint16_t port;
+
+ sd_event_source *udp_event_source;
+ sd_event_source *tcp_event_source;
+};
+
+extern const struct hash_ops dns_stub_listener_extra_hash_ops;
+
+int dns_stub_listener_extra_new(Manager *m, DnsStubListenerExtra **ret);
+DnsStubListenerExtra *dns_stub_listener_extra_free(DnsStubListenerExtra *p);
+
+void manager_dns_stub_stop(Manager *m);
+int manager_dns_stub_start(Manager *m);
+
+const char* dns_stub_listener_mode_to_string(DnsStubListenerMode p) _const_;
+DnsStubListenerMode dns_stub_listener_mode_from_string(const char *s) _pure_;
diff --git a/src/resolve/resolved-dns-synthesize.c b/src/resolve/resolved-dns-synthesize.c
new file mode 100644
index 0000000..f08d621
--- /dev/null
+++ b/src/resolve/resolved-dns-synthesize.c
@@ -0,0 +1,450 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "hostname-util.h"
+#include "local-addresses.h"
+#include "missing_network.h"
+#include "resolved-dns-synthesize.h"
+
+int dns_synthesize_ifindex(int ifindex) {
+
+ /* When the caller asked for resolving on a specific
+ * interface, we synthesize the answer for that
+ * interface. However, if nothing specific was claimed and we
+ * only return localhost RRs, we synthesize the answer for
+ * localhost. */
+
+ if (ifindex > 0)
+ return ifindex;
+
+ return LOOPBACK_IFINDEX;
+}
+
+int dns_synthesize_family(uint64_t flags) {
+
+ /* Picks an address family depending on set flags. This is
+ * purely for synthesized answers, where the family we return
+ * for the reply should match what was requested in the
+ * question, even though we are synthesizing the answer
+ * here. */
+
+ if (!(flags & SD_RESOLVED_DNS)) {
+ if (flags & (SD_RESOLVED_LLMNR_IPV4|SD_RESOLVED_MDNS_IPV4))
+ return AF_INET;
+ if (flags & (SD_RESOLVED_LLMNR_IPV6|SD_RESOLVED_MDNS_IPV6))
+ return AF_INET6;
+ }
+
+ return AF_UNSPEC;
+}
+
+DnsProtocol dns_synthesize_protocol(uint64_t flags) {
+
+ /* Similar as dns_synthesize_family() but does this for the
+ * protocol. If resolving via DNS was requested, we claim it
+ * was DNS. Similar, if nothing specific was
+ * requested. However, if only resolving via LLMNR was
+ * requested we return that. */
+
+ if (flags & SD_RESOLVED_DNS)
+ return DNS_PROTOCOL_DNS;
+ if (flags & SD_RESOLVED_LLMNR)
+ return DNS_PROTOCOL_LLMNR;
+ if (flags & SD_RESOLVED_MDNS)
+ return DNS_PROTOCOL_MDNS;
+
+ return DNS_PROTOCOL_DNS;
+}
+
+static int synthesize_localhost_rr(Manager *m, const DnsResourceKey *key, int ifindex, DnsAnswer **answer) {
+ int r;
+
+ assert(m);
+ assert(key);
+ assert(answer);
+
+ r = dns_answer_reserve(answer, 2);
+ if (r < 0)
+ return r;
+
+ if (IN_SET(key->type, DNS_TYPE_A, DNS_TYPE_ANY)) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+
+ rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_A, dns_resource_key_name(key));
+ if (!rr)
+ return -ENOMEM;
+
+ rr->a.in_addr.s_addr = htobe32(INADDR_LOOPBACK);
+
+ r = dns_answer_add(*answer, rr, dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+ }
+
+ if (IN_SET(key->type, DNS_TYPE_AAAA, DNS_TYPE_ANY)) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+
+ rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_AAAA, dns_resource_key_name(key));
+ if (!rr)
+ return -ENOMEM;
+
+ rr->aaaa.in6_addr = in6addr_loopback;
+
+ r = dns_answer_add(*answer, rr, dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int answer_add_ptr(DnsAnswer **answer, const char *from, const char *to, int ifindex, DnsAnswerFlags flags) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+
+ rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_PTR, from);
+ if (!rr)
+ return -ENOMEM;
+
+ rr->ptr.name = strdup(to);
+ if (!rr->ptr.name)
+ return -ENOMEM;
+
+ return dns_answer_add(*answer, rr, ifindex, flags);
+}
+
+static int synthesize_localhost_ptr(Manager *m, const DnsResourceKey *key, int ifindex, DnsAnswer **answer) {
+ int r;
+
+ assert(m);
+ assert(key);
+ assert(answer);
+
+ if (IN_SET(key->type, DNS_TYPE_PTR, DNS_TYPE_ANY)) {
+ r = dns_answer_reserve(answer, 1);
+ if (r < 0)
+ return r;
+
+ r = answer_add_ptr(answer, dns_resource_key_name(key), "localhost", dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int answer_add_addresses_rr(
+ DnsAnswer **answer,
+ const char *name,
+ struct local_address *addresses,
+ unsigned n_addresses) {
+
+ unsigned j;
+ int r;
+
+ assert(answer);
+ assert(name);
+
+ r = dns_answer_reserve(answer, n_addresses);
+ if (r < 0)
+ return r;
+
+ for (j = 0; j < n_addresses; j++) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+
+ r = dns_resource_record_new_address(&rr, addresses[j].family, &addresses[j].address, name);
+ if (r < 0)
+ return r;
+
+ r = dns_answer_add(*answer, rr, addresses[j].ifindex, DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int answer_add_addresses_ptr(
+ DnsAnswer **answer,
+ const char *name,
+ struct local_address *addresses,
+ unsigned n_addresses,
+ int af, const union in_addr_union *match) {
+
+ bool added = false;
+ unsigned j;
+ int r;
+
+ assert(answer);
+ assert(name);
+
+ for (j = 0; j < n_addresses; j++) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+
+ if (af != AF_UNSPEC) {
+
+ if (addresses[j].family != af)
+ continue;
+
+ if (match && !in_addr_equal(af, match, &addresses[j].address))
+ continue;
+ }
+
+ r = dns_answer_reserve(answer, 1);
+ if (r < 0)
+ return r;
+
+ r = dns_resource_record_new_reverse(&rr, addresses[j].family, &addresses[j].address, name);
+ if (r < 0)
+ return r;
+
+ r = dns_answer_add(*answer, rr, addresses[j].ifindex, DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+
+ added = true;
+ }
+
+ return added;
+}
+
+static int synthesize_system_hostname_rr(Manager *m, const DnsResourceKey *key, int ifindex, DnsAnswer **answer) {
+ _cleanup_free_ struct local_address *addresses = NULL;
+ int n = 0, af;
+
+ assert(m);
+ assert(key);
+ assert(answer);
+
+ af = dns_type_to_af(key->type);
+ if (af >= 0) {
+ n = local_addresses(m->rtnl, ifindex, af, &addresses);
+ if (n < 0)
+ return n;
+
+ if (n == 0) {
+ struct local_address buffer[2];
+
+ /* If we have no local addresses then use ::1
+ * and 127.0.0.2 as local ones. */
+
+ if (IN_SET(af, AF_INET, AF_UNSPEC))
+ buffer[n++] = (struct local_address) {
+ .family = AF_INET,
+ .ifindex = dns_synthesize_ifindex(ifindex),
+ .address.in.s_addr = htobe32(0x7F000002),
+ };
+
+ if (IN_SET(af, AF_INET6, AF_UNSPEC))
+ buffer[n++] = (struct local_address) {
+ .family = AF_INET6,
+ .ifindex = dns_synthesize_ifindex(ifindex),
+ .address.in6 = in6addr_loopback,
+ };
+
+ return answer_add_addresses_rr(answer,
+ dns_resource_key_name(key),
+ buffer, n);
+ }
+ }
+
+ return answer_add_addresses_rr(answer, dns_resource_key_name(key), addresses, n);
+}
+
+static int synthesize_system_hostname_ptr(Manager *m, int af, const union in_addr_union *address, int ifindex, DnsAnswer **answer) {
+ _cleanup_free_ struct local_address *addresses = NULL;
+ bool added = false;
+ int n, r;
+
+ assert(m);
+ assert(address);
+ assert(answer);
+
+ if (af == AF_INET && address->in.s_addr == htobe32(0x7F000002)) {
+
+ /* Always map the IPv4 address 127.0.0.2 to the local hostname, in addition to "localhost": */
+
+ r = dns_answer_reserve(answer, 4);
+ if (r < 0)
+ return r;
+
+ r = answer_add_ptr(answer, "2.0.0.127.in-addr.arpa", m->full_hostname, dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+
+ r = answer_add_ptr(answer, "2.0.0.127.in-addr.arpa", m->llmnr_hostname, dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+
+ r = answer_add_ptr(answer, "2.0.0.127.in-addr.arpa", m->mdns_hostname, dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+
+ r = answer_add_ptr(answer, "2.0.0.127.in-addr.arpa", "localhost", dns_synthesize_ifindex(ifindex), DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ n = local_addresses(m->rtnl, ifindex, af, &addresses);
+ if (n <= 0)
+ return n;
+
+ r = answer_add_addresses_ptr(answer, m->full_hostname, addresses, n, af, address);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ added = true;
+
+ r = answer_add_addresses_ptr(answer, m->llmnr_hostname, addresses, n, af, address);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ added = true;
+
+ r = answer_add_addresses_ptr(answer, m->mdns_hostname, addresses, n, af, address);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ added = true;
+
+ return added;
+}
+
+static int synthesize_gateway_rr(Manager *m, const DnsResourceKey *key, int ifindex, DnsAnswer **answer) {
+ _cleanup_free_ struct local_address *addresses = NULL;
+ int n = 0, af, r;
+
+ assert(m);
+ assert(key);
+ assert(answer);
+
+ af = dns_type_to_af(key->type);
+ if (af >= 0) {
+ n = local_gateways(m->rtnl, ifindex, af, &addresses);
+ if (n < 0) /* < 0 means: error */
+ return n;
+
+ if (n == 0) { /* == 0 means we have no gateway */
+ /* See if there's a gateway on the other protocol */
+ if (af == AF_INET)
+ n = local_gateways(m->rtnl, ifindex, AF_INET6, NULL);
+ else {
+ assert(af == AF_INET6);
+ n = local_gateways(m->rtnl, ifindex, AF_INET, NULL);
+ }
+ if (n <= 0) /* error (if < 0) or really no gateway at all (if == 0) */
+ return n;
+
+ /* We have a gateway on the other protocol. Let's return > 0 without adding any RR to
+ * the answer, i.e. synthesize NODATA (and not NXDOMAIN!) */
+ return 1;
+ }
+ }
+
+ r = answer_add_addresses_rr(answer, dns_resource_key_name(key), addresses, n);
+ if (r < 0)
+ return r;
+
+ return 1; /* > 0 means: we have some gateway */
+}
+
+static int synthesize_gateway_ptr(Manager *m, int af, const union in_addr_union *address, int ifindex, DnsAnswer **answer) {
+ _cleanup_free_ struct local_address *addresses = NULL;
+ int n;
+
+ assert(m);
+ assert(address);
+ assert(answer);
+
+ n = local_gateways(m->rtnl, ifindex, af, &addresses);
+ if (n <= 0)
+ return n;
+
+ return answer_add_addresses_ptr(answer, "_gateway", addresses, n, af, address);
+}
+
+int dns_synthesize_answer(
+ Manager *m,
+ DnsQuestion *q,
+ int ifindex,
+ DnsAnswer **ret) {
+
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ DnsResourceKey *key;
+ bool found = false, nxdomain = false;
+ int r;
+
+ assert(m);
+ assert(q);
+
+ DNS_QUESTION_FOREACH(key, q) {
+ union in_addr_union address;
+ const char *name;
+ int af;
+
+ if (!IN_SET(key->class, DNS_CLASS_IN, DNS_CLASS_ANY))
+ continue;
+
+ name = dns_resource_key_name(key);
+
+ if (is_localhost(name)) {
+
+ r = synthesize_localhost_rr(m, key, ifindex, &answer);
+ if (r < 0)
+ return log_error_errno(r, "Failed to synthesize localhost RRs: %m");
+
+ } else if (manager_is_own_hostname(m, name)) {
+
+ r = synthesize_system_hostname_rr(m, key, ifindex, &answer);
+ if (r < 0)
+ return log_error_errno(r, "Failed to synthesize system hostname RRs: %m");
+
+ } else if (is_gateway_hostname(name)) {
+
+ r = synthesize_gateway_rr(m, key, ifindex, &answer);
+ if (r < 0)
+ return log_error_errno(r, "Failed to synthesize gateway RRs: %m");
+ if (r == 0) { /* if we have no gateway return NXDOMAIN */
+ nxdomain = true;
+ continue;
+ }
+
+ } else if ((dns_name_endswith(name, "127.in-addr.arpa") > 0 && dns_name_equal(name, "2.0.0.127.in-addr.arpa") == 0) ||
+ dns_name_equal(name, "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa") > 0) {
+
+ r = synthesize_localhost_ptr(m, key, ifindex, &answer);
+ if (r < 0)
+ return log_error_errno(r, "Failed to synthesize localhost PTR RRs: %m");
+
+ } else if (dns_name_address(name, &af, &address) > 0) {
+ int v, w;
+
+ v = synthesize_system_hostname_ptr(m, af, &address, ifindex, &answer);
+ if (v < 0)
+ return log_error_errno(v, "Failed to synthesize system hostname PTR RR: %m");
+
+ w = synthesize_gateway_ptr(m, af, &address, ifindex, &answer);
+ if (w < 0)
+ return log_error_errno(w, "Failed to synthesize gateway hostname PTR RR: %m");
+
+ if (v == 0 && w == 0) /* This IP address is neither a local one nor a gateway */
+ continue;
+
+ } else
+ continue;
+
+ found = true;
+ }
+
+ if (found) {
+
+ if (ret)
+ *ret = TAKE_PTR(answer);
+
+ return 1;
+ } else if (nxdomain)
+ return -ENXIO;
+
+ return 0;
+}
diff --git a/src/resolve/resolved-dns-synthesize.h b/src/resolve/resolved-dns-synthesize.h
new file mode 100644
index 0000000..fb62458
--- /dev/null
+++ b/src/resolve/resolved-dns-synthesize.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "resolved-dns-answer.h"
+#include "resolved-dns-question.h"
+#include "resolved-manager.h"
+
+int dns_synthesize_ifindex(int ifindex);
+int dns_synthesize_family(uint64_t flags);
+DnsProtocol dns_synthesize_protocol(uint64_t flags);
+
+int dns_synthesize_answer(Manager *m, DnsQuestion *q, int ifindex, DnsAnswer **ret);
diff --git a/src/resolve/resolved-dns-transaction.c b/src/resolve/resolved-dns-transaction.c
new file mode 100644
index 0000000..37f0ddd
--- /dev/null
+++ b/src/resolve/resolved-dns-transaction.c
@@ -0,0 +1,3258 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-messages.h"
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "errno-list.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "random-util.h"
+#include "resolved-dns-cache.h"
+#include "resolved-dns-transaction.h"
+#include "resolved-dnstls.h"
+#include "resolved-llmnr.h"
+#include "string-table.h"
+
+#define TRANSACTIONS_MAX 4096
+#define TRANSACTION_TCP_TIMEOUT_USEC (10U*USEC_PER_SEC)
+
+/* After how much time to repeat classic DNS requests */
+#define DNS_TIMEOUT_USEC (SD_RESOLVED_QUERY_TIMEOUT_USEC / DNS_TRANSACTION_ATTEMPTS_MAX)
+
+static void dns_transaction_reset_answer(DnsTransaction *t) {
+ assert(t);
+
+ t->received = dns_packet_unref(t->received);
+ t->answer = dns_answer_unref(t->answer);
+ t->answer_rcode = 0;
+ t->answer_dnssec_result = _DNSSEC_RESULT_INVALID;
+ t->answer_source = _DNS_TRANSACTION_SOURCE_INVALID;
+ t->answer_authenticated = false;
+ t->answer_nsec_ttl = (uint32_t) -1;
+ t->answer_errno = 0;
+}
+
+static void dns_transaction_flush_dnssec_transactions(DnsTransaction *t) {
+ DnsTransaction *z;
+
+ assert(t);
+
+ while ((z = set_steal_first(t->dnssec_transactions))) {
+ set_remove(z->notify_transactions, t);
+ set_remove(z->notify_transactions_done, t);
+ dns_transaction_gc(z);
+ }
+}
+
+static void dns_transaction_close_connection(DnsTransaction *t) {
+ assert(t);
+
+ if (t->stream) {
+ /* Let's detach the stream from our transaction, in case something else keeps a reference to it. */
+ LIST_REMOVE(transactions_by_stream, t->stream->transactions, t);
+
+ /* Remove packet in case it's still in the queue */
+ dns_packet_unref(ordered_set_remove(t->stream->write_queue, t->sent));
+
+ t->stream = dns_stream_unref(t->stream);
+ }
+
+ t->dns_udp_event_source = sd_event_source_unref(t->dns_udp_event_source);
+ t->dns_udp_fd = safe_close(t->dns_udp_fd);
+}
+
+static void dns_transaction_stop_timeout(DnsTransaction *t) {
+ assert(t);
+
+ t->timeout_event_source = sd_event_source_unref(t->timeout_event_source);
+}
+
+DnsTransaction* dns_transaction_free(DnsTransaction *t) {
+ DnsQueryCandidate *c;
+ DnsZoneItem *i;
+ DnsTransaction *z;
+
+ if (!t)
+ return NULL;
+
+ log_debug("Freeing transaction %" PRIu16 ".", t->id);
+
+ dns_transaction_close_connection(t);
+ dns_transaction_stop_timeout(t);
+
+ dns_packet_unref(t->sent);
+ dns_transaction_reset_answer(t);
+
+ dns_server_unref(t->server);
+
+ if (t->scope) {
+ hashmap_remove_value(t->scope->transactions_by_key, t->key, t);
+ LIST_REMOVE(transactions_by_scope, t->scope->transactions, t);
+
+ if (t->id != 0)
+ hashmap_remove(t->scope->manager->dns_transactions, UINT_TO_PTR(t->id));
+ }
+
+ while ((c = set_steal_first(t->notify_query_candidates)))
+ set_remove(c->transactions, t);
+ set_free(t->notify_query_candidates);
+
+ while ((c = set_steal_first(t->notify_query_candidates_done)))
+ set_remove(c->transactions, t);
+ set_free(t->notify_query_candidates_done);
+
+ while ((i = set_steal_first(t->notify_zone_items)))
+ i->probe_transaction = NULL;
+ set_free(t->notify_zone_items);
+
+ while ((i = set_steal_first(t->notify_zone_items_done)))
+ i->probe_transaction = NULL;
+ set_free(t->notify_zone_items_done);
+
+ while ((z = set_steal_first(t->notify_transactions)))
+ set_remove(z->dnssec_transactions, t);
+ set_free(t->notify_transactions);
+
+ while ((z = set_steal_first(t->notify_transactions_done)))
+ set_remove(z->dnssec_transactions, t);
+ set_free(t->notify_transactions_done);
+
+ dns_transaction_flush_dnssec_transactions(t);
+ set_free(t->dnssec_transactions);
+
+ dns_answer_unref(t->validated_keys);
+ dns_resource_key_unref(t->key);
+
+ return mfree(t);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsTransaction*, dns_transaction_free);
+
+bool dns_transaction_gc(DnsTransaction *t) {
+ assert(t);
+
+ if (t->block_gc > 0)
+ return true;
+
+ if (set_isempty(t->notify_query_candidates) &&
+ set_isempty(t->notify_query_candidates_done) &&
+ set_isempty(t->notify_zone_items) &&
+ set_isempty(t->notify_zone_items_done) &&
+ set_isempty(t->notify_transactions) &&
+ set_isempty(t->notify_transactions_done)) {
+ dns_transaction_free(t);
+ return false;
+ }
+
+ return true;
+}
+
+static uint16_t pick_new_id(Manager *m) {
+ uint16_t new_id;
+
+ /* Find a fresh, unused transaction id. Note that this loop is bounded because there's a limit on the
+ * number of transactions, and it's much lower than the space of IDs. */
+
+ assert_cc(TRANSACTIONS_MAX < 0xFFFF);
+
+ do
+ random_bytes(&new_id, sizeof(new_id));
+ while (new_id == 0 ||
+ hashmap_get(m->dns_transactions, UINT_TO_PTR(new_id)));
+
+ return new_id;
+}
+
+int dns_transaction_new(DnsTransaction **ret, DnsScope *s, DnsResourceKey *key) {
+ _cleanup_(dns_transaction_freep) DnsTransaction *t = NULL;
+ int r;
+
+ assert(ret);
+ assert(s);
+ assert(key);
+
+ /* Don't allow looking up invalid or pseudo RRs */
+ if (!dns_type_is_valid_query(key->type))
+ return -EINVAL;
+ if (dns_type_is_obsolete(key->type))
+ return -EOPNOTSUPP;
+
+ /* We only support the IN class */
+ if (!IN_SET(key->class, DNS_CLASS_IN, DNS_CLASS_ANY))
+ return -EOPNOTSUPP;
+
+ if (hashmap_size(s->manager->dns_transactions) >= TRANSACTIONS_MAX)
+ return -EBUSY;
+
+ r = hashmap_ensure_allocated(&s->manager->dns_transactions, NULL);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&s->transactions_by_key, &dns_resource_key_hash_ops);
+ if (r < 0)
+ return r;
+
+ t = new(DnsTransaction, 1);
+ if (!t)
+ return -ENOMEM;
+
+ *t = (DnsTransaction) {
+ .dns_udp_fd = -1,
+ .answer_source = _DNS_TRANSACTION_SOURCE_INVALID,
+ .answer_dnssec_result = _DNSSEC_RESULT_INVALID,
+ .answer_nsec_ttl = (uint32_t) -1,
+ .key = dns_resource_key_ref(key),
+ .current_feature_level = _DNS_SERVER_FEATURE_LEVEL_INVALID,
+ .clamp_feature_level = _DNS_SERVER_FEATURE_LEVEL_INVALID,
+ .id = pick_new_id(s->manager),
+ };
+
+ r = hashmap_put(s->manager->dns_transactions, UINT_TO_PTR(t->id), t);
+ if (r < 0) {
+ t->id = 0;
+ return r;
+ }
+
+ r = hashmap_replace(s->transactions_by_key, t->key, t);
+ if (r < 0) {
+ hashmap_remove(s->manager->dns_transactions, UINT_TO_PTR(t->id));
+ return r;
+ }
+
+ LIST_PREPEND(transactions_by_scope, s->transactions, t);
+ t->scope = s;
+
+ s->manager->n_transactions_total++;
+
+ if (ret)
+ *ret = t;
+
+ t = NULL;
+
+ return 0;
+}
+
+static void dns_transaction_shuffle_id(DnsTransaction *t) {
+ uint16_t new_id;
+ assert(t);
+
+ /* Pick a new ID for this transaction. */
+
+ new_id = pick_new_id(t->scope->manager);
+ assert_se(hashmap_remove_and_put(t->scope->manager->dns_transactions, UINT_TO_PTR(t->id), UINT_TO_PTR(new_id), t) >= 0);
+
+ log_debug("Transaction %" PRIu16 " is now %" PRIu16 ".", t->id, new_id);
+ t->id = new_id;
+
+ /* Make sure we generate a new packet with the new ID */
+ t->sent = dns_packet_unref(t->sent);
+}
+
+static void dns_transaction_tentative(DnsTransaction *t, DnsPacket *p) {
+ _cleanup_free_ char *pretty = NULL;
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+ DnsZoneItem *z;
+
+ assert(t);
+ assert(p);
+
+ if (manager_our_packet(t->scope->manager, p) != 0)
+ return;
+
+ (void) in_addr_to_string(p->family, &p->sender, &pretty);
+
+ log_debug("Transaction %" PRIu16 " for <%s> on scope %s on %s/%s got tentative packet from %s.",
+ t->id,
+ dns_resource_key_to_string(t->key, key_str, sizeof key_str),
+ dns_protocol_to_string(t->scope->protocol),
+ t->scope->link ? t->scope->link->ifname : "*",
+ af_to_name_short(t->scope->family),
+ strnull(pretty));
+
+ /* RFC 4795, Section 4.1 says that the peer with the
+ * lexicographically smaller IP address loses */
+ if (memcmp(&p->sender, &p->destination, FAMILY_ADDRESS_SIZE(p->family)) >= 0) {
+ log_debug("Peer has lexicographically larger IP address and thus lost in the conflict.");
+ return;
+ }
+
+ log_debug("We have the lexicographically larger IP address and thus lost in the conflict.");
+
+ t->block_gc++;
+
+ while ((z = set_first(t->notify_zone_items))) {
+ /* First, make sure the zone item drops the reference
+ * to us */
+ dns_zone_item_probe_stop(z);
+
+ /* Secondly, report this as conflict, so that we might
+ * look for a different hostname */
+ dns_zone_item_conflict(z);
+ }
+ t->block_gc--;
+
+ dns_transaction_gc(t);
+}
+
+void dns_transaction_complete(DnsTransaction *t, DnsTransactionState state) {
+ DnsQueryCandidate *c;
+ DnsZoneItem *z;
+ DnsTransaction *d;
+ const char *st;
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+
+ assert(t);
+ assert(!DNS_TRANSACTION_IS_LIVE(state));
+
+ if (state == DNS_TRANSACTION_DNSSEC_FAILED) {
+ dns_resource_key_to_string(t->key, key_str, sizeof key_str);
+
+ log_struct(LOG_NOTICE,
+ "MESSAGE_ID=" SD_MESSAGE_DNSSEC_FAILURE_STR,
+ LOG_MESSAGE("DNSSEC validation failed for question %s: %s", key_str, dnssec_result_to_string(t->answer_dnssec_result)),
+ "DNS_TRANSACTION=%" PRIu16, t->id,
+ "DNS_QUESTION=%s", key_str,
+ "DNSSEC_RESULT=%s", dnssec_result_to_string(t->answer_dnssec_result),
+ "DNS_SERVER=%s", strna(dns_server_string_full(t->server)),
+ "DNS_SERVER_FEATURE_LEVEL=%s", dns_server_feature_level_to_string(t->server->possible_feature_level));
+ }
+
+ /* Note that this call might invalidate the query. Callers
+ * should hence not attempt to access the query or transaction
+ * after calling this function. */
+
+ if (state == DNS_TRANSACTION_ERRNO)
+ st = errno_to_name(t->answer_errno);
+ else
+ st = dns_transaction_state_to_string(state);
+
+ log_debug("Transaction %" PRIu16 " for <%s> on scope %s on %s/%s now complete with <%s> from %s (%s).",
+ t->id,
+ dns_resource_key_to_string(t->key, key_str, sizeof key_str),
+ dns_protocol_to_string(t->scope->protocol),
+ t->scope->link ? t->scope->link->ifname : "*",
+ af_to_name_short(t->scope->family),
+ st,
+ t->answer_source < 0 ? "none" : dns_transaction_source_to_string(t->answer_source),
+ t->answer_authenticated ? "authenticated" : "unsigned");
+
+ t->state = state;
+
+ dns_transaction_close_connection(t);
+ dns_transaction_stop_timeout(t);
+
+ /* Notify all queries that are interested, but make sure the
+ * transaction isn't freed while we are still looking at it */
+ t->block_gc++;
+
+ SET_FOREACH_MOVE(c, t->notify_query_candidates_done, t->notify_query_candidates)
+ dns_query_candidate_notify(c);
+ SWAP_TWO(t->notify_query_candidates, t->notify_query_candidates_done);
+
+ SET_FOREACH_MOVE(z, t->notify_zone_items_done, t->notify_zone_items)
+ dns_zone_item_notify(z);
+ SWAP_TWO(t->notify_zone_items, t->notify_zone_items_done);
+ if (t->probing && t->state == DNS_TRANSACTION_ATTEMPTS_MAX_REACHED)
+ (void) dns_scope_announce(t->scope, false);
+
+ SET_FOREACH_MOVE(d, t->notify_transactions_done, t->notify_transactions)
+ dns_transaction_notify(d, t);
+ SWAP_TWO(t->notify_transactions, t->notify_transactions_done);
+
+ t->block_gc--;
+ dns_transaction_gc(t);
+}
+
+static void dns_transaction_complete_errno(DnsTransaction *t, int error) {
+ assert(t);
+ assert(error != 0);
+
+ t->answer_errno = abs(error);
+ dns_transaction_complete(t, DNS_TRANSACTION_ERRNO);
+}
+
+static int dns_transaction_pick_server(DnsTransaction *t) {
+ DnsServer *server;
+
+ assert(t);
+ assert(t->scope->protocol == DNS_PROTOCOL_DNS);
+
+ /* Pick a DNS server and a feature level for it. */
+
+ server = dns_scope_get_dns_server(t->scope);
+ if (!server)
+ return -ESRCH;
+
+ /* If we changed the server invalidate the feature level clamping, as the new server might have completely
+ * different properties. */
+ if (server != t->server)
+ t->clamp_feature_level = _DNS_SERVER_FEATURE_LEVEL_INVALID;
+
+ t->current_feature_level = dns_server_possible_feature_level(server);
+
+ /* Clamp the feature level if that is requested. */
+ if (t->clamp_feature_level != _DNS_SERVER_FEATURE_LEVEL_INVALID &&
+ t->current_feature_level > t->clamp_feature_level)
+ t->current_feature_level = t->clamp_feature_level;
+
+ log_debug("Using feature level %s for transaction %u.", dns_server_feature_level_to_string(t->current_feature_level), t->id);
+
+ if (server == t->server)
+ return 0;
+
+ dns_server_unref(t->server);
+ t->server = dns_server_ref(server);
+
+ t->n_picked_servers ++;
+
+ log_debug("Using DNS server %s for transaction %u.", strna(dns_server_string_full(t->server)), t->id);
+
+ return 1;
+}
+
+static void dns_transaction_retry(DnsTransaction *t, bool next_server) {
+ int r;
+
+ assert(t);
+
+ log_debug("Retrying transaction %" PRIu16 ".", t->id);
+
+ /* Before we try again, switch to a new server. */
+ if (next_server)
+ dns_scope_next_dns_server(t->scope);
+
+ r = dns_transaction_go(t);
+ if (r < 0)
+ dns_transaction_complete_errno(t, r);
+}
+
+static int dns_transaction_maybe_restart(DnsTransaction *t) {
+ int r;
+
+ assert(t);
+
+ /* Returns > 0 if the transaction was restarted, 0 if not */
+
+ if (!t->server)
+ return 0;
+
+ if (t->current_feature_level <= dns_server_possible_feature_level(t->server))
+ return 0;
+
+ /* The server's current feature level is lower than when we sent the original query. We learnt something from
+ the response or possibly an auxiliary DNSSEC response that we didn't know before. We take that as reason to
+ restart the whole transaction. This is a good idea to deal with servers that respond rubbish if we include
+ OPT RR or DO bit. One of these cases is documented here, for example:
+ https://open.nlnetlabs.nl/pipermail/dnssec-trigger/2014-November/000376.html */
+
+ log_debug("Server feature level is now lower than when we began our transaction. Restarting with new ID.");
+ dns_transaction_shuffle_id(t);
+
+ r = dns_transaction_go(t);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static void on_transaction_stream_error(DnsTransaction *t, int error) {
+ assert(t);
+
+ dns_transaction_close_connection(t);
+
+ if (ERRNO_IS_DISCONNECT(error)) {
+ if (t->scope->protocol == DNS_PROTOCOL_LLMNR) {
+ /* If the LLMNR/TCP connection failed, the host doesn't support LLMNR, and we cannot answer the
+ * question on this scope. */
+ dns_transaction_complete(t, DNS_TRANSACTION_NOT_FOUND);
+ return;
+ }
+
+ dns_transaction_retry(t, true);
+ return;
+ }
+ if (error != 0)
+ dns_transaction_complete_errno(t, error);
+}
+
+static int dns_transaction_on_stream_packet(DnsTransaction *t, DnsPacket *p) {
+ assert(t);
+ assert(p);
+
+ dns_transaction_close_connection(t);
+
+ if (dns_packet_validate_reply(p) <= 0) {
+ log_debug("Invalid TCP reply packet.");
+ dns_transaction_complete(t, DNS_TRANSACTION_INVALID_REPLY);
+ return 0;
+ }
+
+ dns_scope_check_conflicts(t->scope, p);
+
+ t->block_gc++;
+ dns_transaction_process_reply(t, p);
+ t->block_gc--;
+
+ /* If the response wasn't useful, then complete the transition
+ * now. After all, we are the worst feature set now with TCP
+ * sockets, and there's really no point in retrying. */
+ if (t->state == DNS_TRANSACTION_PENDING)
+ dns_transaction_complete(t, DNS_TRANSACTION_INVALID_REPLY);
+ else
+ dns_transaction_gc(t);
+
+ return 0;
+}
+
+static int on_stream_complete(DnsStream *s, int error) {
+ assert(s);
+
+ if (ERRNO_IS_DISCONNECT(error) && s->protocol != DNS_PROTOCOL_LLMNR) {
+ log_debug_errno(error, "Connection failure for DNS TCP stream: %m");
+
+ if (s->transactions) {
+ DnsTransaction *t;
+
+ t = s->transactions;
+ dns_server_packet_lost(t->server, IPPROTO_TCP, t->current_feature_level);
+ }
+ }
+
+ if (error != 0) {
+ DnsTransaction *t, *n;
+
+ LIST_FOREACH_SAFE(transactions_by_stream, t, n, s->transactions)
+ on_transaction_stream_error(t, error);
+ }
+
+ return 0;
+}
+
+static int on_stream_packet(DnsStream *s) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ DnsTransaction *t;
+
+ assert(s);
+
+ /* Take ownership of packet to be able to receive new packets */
+ p = dns_stream_take_read_packet(s);
+ assert(p);
+
+ t = hashmap_get(s->manager->dns_transactions, UINT_TO_PTR(DNS_PACKET_ID(p)));
+ if (t)
+ return dns_transaction_on_stream_packet(t, p);
+
+ /* Ignore incorrect transaction id as an old transaction can have been canceled. */
+ log_debug("Received unexpected TCP reply packet with id %" PRIu16 ", ignoring.", DNS_PACKET_ID(p));
+ return 0;
+}
+
+static uint16_t dns_transaction_port(DnsTransaction *t) {
+ if (t->server->port > 0)
+ return t->server->port;
+ return DNS_SERVER_FEATURE_LEVEL_IS_TLS(t->current_feature_level) ? 853 : 53;
+}
+
+static int dns_transaction_emit_tcp(DnsTransaction *t) {
+ _cleanup_(dns_stream_unrefp) DnsStream *s = NULL;
+ _cleanup_close_ int fd = -1;
+ union sockaddr_union sa;
+ DnsStreamType type;
+ int r;
+
+ assert(t);
+
+ dns_transaction_close_connection(t);
+
+ switch (t->scope->protocol) {
+
+ case DNS_PROTOCOL_DNS:
+ r = dns_transaction_pick_server(t);
+ if (r < 0)
+ return r;
+
+ if (!dns_server_dnssec_supported(t->server) && dns_type_is_dnssec(t->key->type))
+ return -EOPNOTSUPP;
+
+ r = dns_server_adjust_opt(t->server, t->sent, t->current_feature_level);
+ if (r < 0)
+ return r;
+
+ if (t->server->stream && (DNS_SERVER_FEATURE_LEVEL_IS_TLS(t->current_feature_level) == t->server->stream->encrypted))
+ s = dns_stream_ref(t->server->stream);
+ else
+ fd = dns_scope_socket_tcp(t->scope, AF_UNSPEC, NULL, t->server, dns_transaction_port(t), &sa);
+
+ type = DNS_STREAM_LOOKUP;
+ break;
+
+ case DNS_PROTOCOL_LLMNR:
+ /* When we already received a reply to this (but it was truncated), send to its sender address */
+ if (t->received)
+ fd = dns_scope_socket_tcp(t->scope, t->received->family, &t->received->sender, NULL, t->received->sender_port, &sa);
+ else {
+ union in_addr_union address;
+ int family = AF_UNSPEC;
+
+ /* Otherwise, try to talk to the owner of a
+ * the IP address, in case this is a reverse
+ * PTR lookup */
+
+ r = dns_name_address(dns_resource_key_name(t->key), &family, &address);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+ if (family != t->scope->family)
+ return -ESRCH;
+
+ fd = dns_scope_socket_tcp(t->scope, family, &address, NULL, LLMNR_PORT, &sa);
+ }
+
+ type = DNS_STREAM_LLMNR_SEND;
+ break;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ if (!s) {
+ if (fd < 0)
+ return fd;
+
+ r = dns_stream_new(t->scope->manager, &s, type, t->scope->protocol, fd, &sa);
+ if (r < 0)
+ return r;
+
+ fd = -1;
+
+#if ENABLE_DNS_OVER_TLS
+ if (t->scope->protocol == DNS_PROTOCOL_DNS &&
+ DNS_SERVER_FEATURE_LEVEL_IS_TLS(t->current_feature_level)) {
+
+ assert(t->server);
+ r = dnstls_stream_connect_tls(s, t->server);
+ if (r < 0)
+ return r;
+ }
+#endif
+
+ if (t->server) {
+ dns_server_unref_stream(t->server);
+ s->server = dns_server_ref(t->server);
+ t->server->stream = dns_stream_ref(s);
+ }
+
+ s->complete = on_stream_complete;
+ s->on_packet = on_stream_packet;
+
+ /* The interface index is difficult to determine if we are
+ * connecting to the local host, hence fill this in right away
+ * instead of determining it from the socket */
+ s->ifindex = dns_scope_ifindex(t->scope);
+ }
+
+ t->stream = TAKE_PTR(s);
+ LIST_PREPEND(transactions_by_stream, t->stream->transactions, t);
+
+ r = dns_stream_write_packet(t->stream, t->sent);
+ if (r < 0) {
+ dns_transaction_close_connection(t);
+ return r;
+ }
+
+ dns_transaction_reset_answer(t);
+
+ t->tried_stream = true;
+
+ return 0;
+}
+
+static void dns_transaction_cache_answer(DnsTransaction *t) {
+ assert(t);
+
+ /* For mDNS we cache whenever we get the packet, rather than
+ * in each transaction. */
+ if (!IN_SET(t->scope->protocol, DNS_PROTOCOL_DNS, DNS_PROTOCOL_LLMNR))
+ return;
+
+ /* Caching disabled? */
+ if (t->scope->manager->enable_cache == DNS_CACHE_MODE_NO)
+ return;
+
+ /* We never cache if this packet is from the local host, under
+ * the assumption that a locally running DNS server would
+ * cache this anyway, and probably knows better when to flush
+ * the cache then we could. */
+ if (!DNS_PACKET_SHALL_CACHE(t->received))
+ return;
+
+ dns_cache_put(&t->scope->cache,
+ t->scope->manager->enable_cache,
+ t->key,
+ t->answer_rcode,
+ t->answer,
+ t->answer_authenticated,
+ t->answer_nsec_ttl,
+ 0,
+ t->received->family,
+ &t->received->sender);
+}
+
+static bool dns_transaction_dnssec_is_live(DnsTransaction *t) {
+ DnsTransaction *dt;
+
+ assert(t);
+
+ SET_FOREACH(dt, t->dnssec_transactions)
+ if (DNS_TRANSACTION_IS_LIVE(dt->state))
+ return true;
+
+ return false;
+}
+
+static int dns_transaction_dnssec_ready(DnsTransaction *t) {
+ DnsTransaction *dt;
+
+ assert(t);
+
+ /* Checks whether the auxiliary DNSSEC transactions of our transaction have completed, or are still
+ * ongoing. Returns 0, if we aren't ready for the DNSSEC validation, positive if we are. */
+
+ SET_FOREACH(dt, t->dnssec_transactions) {
+
+ switch (dt->state) {
+
+ case DNS_TRANSACTION_NULL:
+ case DNS_TRANSACTION_PENDING:
+ case DNS_TRANSACTION_VALIDATING:
+ /* Still ongoing */
+ return 0;
+
+ case DNS_TRANSACTION_RCODE_FAILURE:
+ if (!IN_SET(dt->answer_rcode, DNS_RCODE_NXDOMAIN, DNS_RCODE_SERVFAIL)) {
+ log_debug("Auxiliary DNSSEC RR query failed with rcode=%s.", dns_rcode_to_string(dt->answer_rcode));
+ goto fail;
+ }
+
+ /* Fall-through: NXDOMAIN/SERVFAIL is good enough for us. This is because some DNS servers
+ * erroneously return NXDOMAIN/SERVFAIL for empty non-terminals (Akamai...) or missing DS
+ * records (Facebook), and we need to handle that nicely, when asking for parent SOA or similar
+ * RRs to make unsigned proofs. */
+
+ case DNS_TRANSACTION_SUCCESS:
+ /* All good. */
+ break;
+
+ case DNS_TRANSACTION_DNSSEC_FAILED:
+ /* We handle DNSSEC failures different from other errors, as we care about the DNSSEC
+ * validationr result */
+
+ log_debug("Auxiliary DNSSEC RR query failed validation: %s", dnssec_result_to_string(dt->answer_dnssec_result));
+ t->answer_dnssec_result = dt->answer_dnssec_result; /* Copy error code over */
+ dns_transaction_complete(t, DNS_TRANSACTION_DNSSEC_FAILED);
+ return 0;
+
+ default:
+ log_debug("Auxiliary DNSSEC RR query failed with %s", dns_transaction_state_to_string(dt->state));
+ goto fail;
+ }
+ }
+
+ /* All is ready, we can go and validate */
+ return 1;
+
+fail:
+ t->answer_dnssec_result = DNSSEC_FAILED_AUXILIARY;
+ dns_transaction_complete(t, DNS_TRANSACTION_DNSSEC_FAILED);
+ return 0;
+}
+
+static void dns_transaction_process_dnssec(DnsTransaction *t) {
+ int r;
+
+ assert(t);
+
+ /* Are there ongoing DNSSEC transactions? If so, let's wait for them. */
+ r = dns_transaction_dnssec_ready(t);
+ if (r < 0)
+ goto fail;
+ if (r == 0) /* We aren't ready yet (or one of our auxiliary transactions failed, and we shouldn't validate now */
+ return;
+
+ /* See if we learnt things from the additional DNSSEC transactions, that we didn't know before, and better
+ * restart the lookup immediately. */
+ r = dns_transaction_maybe_restart(t);
+ if (r < 0)
+ goto fail;
+ if (r > 0) /* Transaction got restarted... */
+ return;
+
+ /* All our auxiliary DNSSEC transactions are complete now. Try
+ * to validate our RRset now. */
+ r = dns_transaction_validate_dnssec(t);
+ if (r == -EBADMSG) {
+ dns_transaction_complete(t, DNS_TRANSACTION_INVALID_REPLY);
+ return;
+ }
+ if (r < 0)
+ goto fail;
+
+ if (t->answer_dnssec_result == DNSSEC_INCOMPATIBLE_SERVER &&
+ t->scope->dnssec_mode == DNSSEC_YES) {
+
+ /* We are not in automatic downgrade mode, and the server is bad. Let's try a different server, maybe
+ * that works. */
+
+ if (t->n_picked_servers < dns_scope_get_n_dns_servers(t->scope)) {
+ /* We tried fewer servers on this transaction than we know, let's try another one then */
+ dns_transaction_retry(t, true);
+ return;
+ }
+
+ /* OK, let's give up, apparently all servers we tried didn't work. */
+ dns_transaction_complete(t, DNS_TRANSACTION_DNSSEC_FAILED);
+ return;
+ }
+
+ if (!IN_SET(t->answer_dnssec_result,
+ _DNSSEC_RESULT_INVALID, /* No DNSSEC validation enabled */
+ DNSSEC_VALIDATED, /* Answer is signed and validated successfully */
+ DNSSEC_UNSIGNED, /* Answer is right-fully unsigned */
+ DNSSEC_INCOMPATIBLE_SERVER)) { /* Server does not do DNSSEC (Yay, we are downgrade attack vulnerable!) */
+ dns_transaction_complete(t, DNS_TRANSACTION_DNSSEC_FAILED);
+ return;
+ }
+
+ if (t->answer_dnssec_result == DNSSEC_INCOMPATIBLE_SERVER)
+ dns_server_warn_downgrade(t->server);
+
+ dns_transaction_cache_answer(t);
+
+ if (t->answer_rcode == DNS_RCODE_SUCCESS)
+ dns_transaction_complete(t, DNS_TRANSACTION_SUCCESS);
+ else
+ dns_transaction_complete(t, DNS_TRANSACTION_RCODE_FAILURE);
+
+ return;
+
+fail:
+ dns_transaction_complete_errno(t, r);
+}
+
+static int dns_transaction_has_positive_answer(DnsTransaction *t, DnsAnswerFlags *flags) {
+ int r;
+
+ assert(t);
+
+ /* Checks whether the answer is positive, i.e. either a direct
+ * answer to the question, or a CNAME/DNAME for it */
+
+ r = dns_answer_match_key(t->answer, t->key, flags);
+ if (r != 0)
+ return r;
+
+ r = dns_answer_find_cname_or_dname(t->answer, t->key, NULL, flags);
+ if (r != 0)
+ return r;
+
+ return false;
+}
+
+static int dns_transaction_fix_rcode(DnsTransaction *t) {
+ int r;
+
+ assert(t);
+
+ /* Fix up the RCODE to SUCCESS if we get at least one matching RR in a response. Note that this contradicts the
+ * DNS RFCs a bit. Specifically, RFC 6604 Section 3 clarifies that the RCODE shall say something about a
+ * CNAME/DNAME chain element coming after the last chain element contained in the message, and not the first
+ * one included. However, it also indicates that not all DNS servers implement this correctly. Moreover, when
+ * using DNSSEC we usually only can prove the first element of a CNAME/DNAME chain anyway, hence let's settle
+ * on always processing the RCODE as referring to the immediate look-up we do, i.e. the first element of a
+ * CNAME/DNAME chain. This way, we uniformly handle CNAME/DNAME chains, regardless if the DNS server
+ * incorrectly implements RCODE, whether DNSSEC is in use, or whether the DNS server only supplied us with an
+ * incomplete CNAME/DNAME chain.
+ *
+ * Or in other words: if we get at least one positive reply in a message we patch NXDOMAIN to become SUCCESS,
+ * and then rely on the CNAME chasing logic to figure out that there's actually a CNAME error with a new
+ * lookup. */
+
+ if (t->answer_rcode != DNS_RCODE_NXDOMAIN)
+ return 0;
+
+ r = dns_transaction_has_positive_answer(t, NULL);
+ if (r <= 0)
+ return r;
+
+ t->answer_rcode = DNS_RCODE_SUCCESS;
+ return 0;
+}
+
+void dns_transaction_process_reply(DnsTransaction *t, DnsPacket *p) {
+ usec_t ts;
+ int r;
+
+ assert(t);
+ assert(p);
+ assert(t->scope);
+ assert(t->scope->manager);
+
+ if (t->state != DNS_TRANSACTION_PENDING)
+ return;
+
+ /* Note that this call might invalidate the query. Callers
+ * should hence not attempt to access the query or transaction
+ * after calling this function. */
+
+ log_debug("Processing incoming packet on transaction %" PRIu16" (rcode=%s).",
+ t->id, dns_rcode_to_string(DNS_PACKET_RCODE(p)));
+
+ switch (t->scope->protocol) {
+
+ case DNS_PROTOCOL_LLMNR:
+ /* For LLMNR we will not accept any packets from other interfaces */
+
+ if (p->ifindex != dns_scope_ifindex(t->scope))
+ return;
+
+ if (p->family != t->scope->family)
+ return;
+
+ /* Tentative packets are not full responses but still
+ * useful for identifying uniqueness conflicts during
+ * probing. */
+ if (DNS_PACKET_LLMNR_T(p)) {
+ dns_transaction_tentative(t, p);
+ return;
+ }
+
+ break;
+
+ case DNS_PROTOCOL_MDNS:
+ /* For mDNS we will not accept any packets from other interfaces */
+
+ if (p->ifindex != dns_scope_ifindex(t->scope))
+ return;
+
+ if (p->family != t->scope->family)
+ return;
+
+ break;
+
+ case DNS_PROTOCOL_DNS:
+ /* Note that we do not need to verify the
+ * addresses/port numbers of incoming traffic, as we
+ * invoked connect() on our UDP socket in which case
+ * the kernel already does the needed verification for
+ * us. */
+ break;
+
+ default:
+ assert_not_reached("Invalid DNS protocol.");
+ }
+
+ if (t->received != p) {
+ dns_packet_unref(t->received);
+ t->received = dns_packet_ref(p);
+ }
+
+ t->answer_source = DNS_TRANSACTION_NETWORK;
+
+ if (p->ipproto == IPPROTO_TCP) {
+ if (DNS_PACKET_TC(p)) {
+ /* Truncated via TCP? Somebody must be fucking with us */
+ dns_transaction_complete(t, DNS_TRANSACTION_INVALID_REPLY);
+ return;
+ }
+
+ if (DNS_PACKET_ID(p) != t->id) {
+ /* Not the reply to our query? Somebody must be fucking with us */
+ dns_transaction_complete(t, DNS_TRANSACTION_INVALID_REPLY);
+ return;
+ }
+ }
+
+ assert_se(sd_event_now(t->scope->manager->event, clock_boottime_or_monotonic(), &ts) >= 0);
+
+ switch (t->scope->protocol) {
+
+ case DNS_PROTOCOL_DNS:
+ assert(t->server);
+
+ if (IN_SET(DNS_PACKET_RCODE(p), DNS_RCODE_FORMERR, DNS_RCODE_SERVFAIL, DNS_RCODE_NOTIMP)) {
+
+ /* Request failed, immediately try again with reduced features */
+
+ if (t->current_feature_level <= DNS_SERVER_FEATURE_LEVEL_UDP) {
+
+ /* This was already at UDP feature level? If so, it doesn't make sense to downgrade
+ * this transaction anymore, but let's see if it might make sense to send the request
+ * to a different DNS server instead. If not let's process the response, and accept the
+ * rcode. Note that we don't retry on TCP, since that's a suitable way to mitigate
+ * packet loss, but is not going to give us better rcodes should we actually have
+ * managed to get them already at UDP level. */
+
+ if (t->n_picked_servers < dns_scope_get_n_dns_servers(t->scope)) {
+ /* We tried fewer servers on this transaction than we know, let's try another one then */
+ dns_transaction_retry(t, true);
+ return;
+ }
+
+ /* Give up, accept the rcode */
+ log_debug("Server returned error: %s", dns_rcode_to_string(DNS_PACKET_RCODE(p)));
+ break;
+ }
+
+ /* Reduce this feature level by one and try again. */
+ switch (t->current_feature_level) {
+ case DNS_SERVER_FEATURE_LEVEL_TLS_DO:
+ t->clamp_feature_level = DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN;
+ break;
+ case DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN + 1:
+ /* Skip plain TLS when TLS is not supported */
+ t->clamp_feature_level = DNS_SERVER_FEATURE_LEVEL_TLS_PLAIN - 1;
+ break;
+ default:
+ t->clamp_feature_level = t->current_feature_level - 1;
+ }
+
+ log_debug("Server returned error %s, retrying transaction with reduced feature level %s.",
+ dns_rcode_to_string(DNS_PACKET_RCODE(p)),
+ dns_server_feature_level_to_string(t->clamp_feature_level));
+
+ dns_transaction_retry(t, false /* use the same server */);
+ return;
+ }
+
+ if (DNS_PACKET_RCODE(p) == DNS_RCODE_REFUSED) {
+ /* This server refused our request? If so, try again, use a different server */
+ log_debug("Server returned REFUSED, switching servers, and retrying.");
+ dns_transaction_retry(t, true /* pick a new server */);
+ return;
+ }
+
+ if (DNS_PACKET_TC(p))
+ dns_server_packet_truncated(t->server, t->current_feature_level);
+
+ break;
+
+ case DNS_PROTOCOL_LLMNR:
+ case DNS_PROTOCOL_MDNS:
+ dns_scope_packet_received(t->scope, ts - t->start_usec);
+ break;
+
+ default:
+ assert_not_reached("Invalid DNS protocol.");
+ }
+
+ if (DNS_PACKET_TC(p)) {
+
+ /* Truncated packets for mDNS are not allowed. Give up immediately. */
+ if (t->scope->protocol == DNS_PROTOCOL_MDNS) {
+ dns_transaction_complete(t, DNS_TRANSACTION_INVALID_REPLY);
+ return;
+ }
+
+ log_debug("Reply truncated, retrying via TCP.");
+
+ /* Response was truncated, let's try again with good old TCP */
+ r = dns_transaction_emit_tcp(t);
+ if (r == -ESRCH) {
+ /* No servers found? Damn! */
+ dns_transaction_complete(t, DNS_TRANSACTION_NO_SERVERS);
+ return;
+ }
+ if (r == -EOPNOTSUPP) {
+ /* Tried to ask for DNSSEC RRs, on a server that doesn't do DNSSEC */
+ dns_transaction_complete(t, DNS_TRANSACTION_RR_TYPE_UNSUPPORTED);
+ return;
+ }
+ if (r < 0) {
+ /* On LLMNR, if we cannot connect to the host,
+ * we immediately give up */
+ if (t->scope->protocol != DNS_PROTOCOL_DNS)
+ goto fail;
+
+ /* On DNS, couldn't send? Try immediately again, with a new server */
+ dns_transaction_retry(t, true);
+ }
+
+ return;
+ }
+
+ /* After the superficial checks, actually parse the message. */
+ r = dns_packet_extract(p);
+ if (r < 0) {
+ dns_transaction_complete(t, DNS_TRANSACTION_INVALID_REPLY);
+ return;
+ }
+
+ if (t->server) {
+ /* Report that we successfully received a valid packet with a good rcode after we initially got a bad
+ * rcode and subsequently downgraded the protocol */
+
+ if (IN_SET(DNS_PACKET_RCODE(p), DNS_RCODE_SUCCESS, DNS_RCODE_NXDOMAIN) &&
+ t->clamp_feature_level != _DNS_SERVER_FEATURE_LEVEL_INVALID)
+ dns_server_packet_rcode_downgrade(t->server, t->clamp_feature_level);
+
+ /* Report that the OPT RR was missing */
+ if (!p->opt)
+ dns_server_packet_bad_opt(t->server, t->current_feature_level);
+
+ /* Report that we successfully received a packet */
+ dns_server_packet_received(t->server, p->ipproto, t->current_feature_level, p->size);
+ }
+
+ /* See if we know things we didn't know before that indicate we better restart the lookup immediately. */
+ r = dns_transaction_maybe_restart(t);
+ if (r < 0)
+ goto fail;
+ if (r > 0) /* Transaction got restarted... */
+ return;
+
+ /* When dealing with protocols other than mDNS only consider responses with equivalent query section
+ * to the request. For mDNS this check doesn't make sense, because the section 6 of RFC6762 states
+ * that "Multicast DNS responses MUST NOT contain any questions in the Question Section". */
+ if (t->scope->protocol != DNS_PROTOCOL_MDNS) {
+ r = dns_packet_is_reply_for(p, t->key);
+ if (r < 0)
+ goto fail;
+ if (r == 0) {
+ dns_transaction_complete(t, DNS_TRANSACTION_INVALID_REPLY);
+ return;
+ }
+ }
+
+ /* Install the answer as answer to the transaction */
+ dns_answer_unref(t->answer);
+ t->answer = dns_answer_ref(p->answer);
+ t->answer_rcode = DNS_PACKET_RCODE(p);
+ t->answer_dnssec_result = _DNSSEC_RESULT_INVALID;
+ t->answer_authenticated = false;
+
+ r = dns_transaction_fix_rcode(t);
+ if (r < 0)
+ goto fail;
+
+ /* Block GC while starting requests for additional DNSSEC RRs */
+ t->block_gc++;
+ r = dns_transaction_request_dnssec_keys(t);
+ t->block_gc--;
+
+ /* Maybe the transaction is ready for GC'ing now? If so, free it and return. */
+ if (!dns_transaction_gc(t))
+ return;
+
+ /* Requesting additional keys might have resulted in this transaction to fail, since the auxiliary
+ * request failed for some reason. If so, we are not in pending state anymore, and we should exit
+ * quickly. */
+ if (t->state != DNS_TRANSACTION_PENDING)
+ return;
+ if (r < 0)
+ goto fail;
+ if (r > 0) {
+ /* There are DNSSEC transactions pending now. Update the state accordingly. */
+ t->state = DNS_TRANSACTION_VALIDATING;
+ dns_transaction_close_connection(t);
+ dns_transaction_stop_timeout(t);
+ return;
+ }
+
+ dns_transaction_process_dnssec(t);
+ return;
+
+fail:
+ dns_transaction_complete_errno(t, r);
+}
+
+static int on_dns_packet(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ DnsTransaction *t = userdata;
+ int r;
+
+ assert(t);
+ assert(t->scope);
+
+ r = manager_recv(t->scope->manager, fd, DNS_PROTOCOL_DNS, &p);
+ if (ERRNO_IS_DISCONNECT(r)) {
+ usec_t usec;
+
+ /* UDP connection failures get reported via ICMP and then are possibly delivered to us on the
+ * next recvmsg(). Treat this like a lost packet. */
+
+ log_debug_errno(r, "Connection failure for DNS UDP packet: %m");
+ assert_se(sd_event_now(t->scope->manager->event, clock_boottime_or_monotonic(), &usec) >= 0);
+ dns_server_packet_lost(t->server, IPPROTO_UDP, t->current_feature_level);
+
+ dns_transaction_retry(t, true);
+ return 0;
+ }
+ if (r < 0) {
+ dns_transaction_complete_errno(t, r);
+ return 0;
+ }
+ if (r == 0)
+ /* Spurious wakeup without any data */
+ return 0;
+
+ r = dns_packet_validate_reply(p);
+ if (r < 0) {
+ log_debug_errno(r, "Received invalid DNS packet as response, ignoring: %m");
+ return 0;
+ }
+ if (r == 0) {
+ log_debug("Received inappropriate DNS packet as response, ignoring.");
+ return 0;
+ }
+
+ if (DNS_PACKET_ID(p) != t->id) {
+ log_debug("Received packet with incorrect transaction ID, ignoring.");
+ return 0;
+ }
+
+ dns_transaction_process_reply(t, p);
+ return 0;
+}
+
+static int dns_transaction_emit_udp(DnsTransaction *t) {
+ int r;
+
+ assert(t);
+
+ if (t->scope->protocol == DNS_PROTOCOL_DNS) {
+
+ r = dns_transaction_pick_server(t);
+ if (r < 0)
+ return r;
+
+ if (t->current_feature_level < DNS_SERVER_FEATURE_LEVEL_UDP || DNS_SERVER_FEATURE_LEVEL_IS_TLS(t->current_feature_level))
+ return -EAGAIN; /* Sorry, can't do UDP, try TCP! */
+
+ if (!dns_server_dnssec_supported(t->server) && dns_type_is_dnssec(t->key->type))
+ return -EOPNOTSUPP;
+
+ if (r > 0 || t->dns_udp_fd < 0) { /* Server changed, or no connection yet. */
+ int fd;
+
+ dns_transaction_close_connection(t);
+
+ fd = dns_scope_socket_udp(t->scope, t->server);
+ if (fd < 0)
+ return fd;
+
+ r = sd_event_add_io(t->scope->manager->event, &t->dns_udp_event_source, fd, EPOLLIN, on_dns_packet, t);
+ if (r < 0) {
+ safe_close(fd);
+ return r;
+ }
+
+ (void) sd_event_source_set_description(t->dns_udp_event_source, "dns-transaction-udp");
+ t->dns_udp_fd = fd;
+ }
+
+ r = dns_server_adjust_opt(t->server, t->sent, t->current_feature_level);
+ if (r < 0)
+ return r;
+ } else
+ dns_transaction_close_connection(t);
+
+ r = dns_scope_emit_udp(t->scope, t->dns_udp_fd, t->sent);
+ if (r < 0)
+ return r;
+
+ dns_transaction_reset_answer(t);
+
+ return 0;
+}
+
+static int on_transaction_timeout(sd_event_source *s, usec_t usec, void *userdata) {
+ DnsTransaction *t = userdata;
+
+ assert(s);
+ assert(t);
+
+ if (!t->initial_jitter_scheduled || t->initial_jitter_elapsed) {
+ /* Timeout reached? Increase the timeout for the server used */
+ switch (t->scope->protocol) {
+
+ case DNS_PROTOCOL_DNS:
+ assert(t->server);
+ dns_server_packet_lost(t->server, t->stream ? IPPROTO_TCP : IPPROTO_UDP, t->current_feature_level);
+ break;
+
+ case DNS_PROTOCOL_LLMNR:
+ case DNS_PROTOCOL_MDNS:
+ dns_scope_packet_lost(t->scope, usec - t->start_usec);
+ break;
+
+ default:
+ assert_not_reached("Invalid DNS protocol.");
+ }
+
+ if (t->initial_jitter_scheduled)
+ t->initial_jitter_elapsed = true;
+ }
+
+ log_debug("Timeout reached on transaction %" PRIu16 ".", t->id);
+
+ dns_transaction_retry(t, true);
+ return 0;
+}
+
+static usec_t transaction_get_resend_timeout(DnsTransaction *t) {
+ assert(t);
+ assert(t->scope);
+
+ switch (t->scope->protocol) {
+
+ case DNS_PROTOCOL_DNS:
+
+ /* When we do TCP, grant a much longer timeout, as in this case there's no need for us to quickly
+ * resend, as the kernel does that anyway for us, and we really don't want to interrupt it in that
+ * needlessly. */
+ if (t->stream)
+ return TRANSACTION_TCP_TIMEOUT_USEC;
+
+ return DNS_TIMEOUT_USEC;
+
+ case DNS_PROTOCOL_MDNS:
+ assert(t->n_attempts > 0);
+ if (t->probing)
+ return MDNS_PROBING_INTERVAL_USEC;
+ else
+ return (1 << (t->n_attempts - 1)) * USEC_PER_SEC;
+
+ case DNS_PROTOCOL_LLMNR:
+ return t->scope->resend_timeout;
+
+ default:
+ assert_not_reached("Invalid DNS protocol.");
+ }
+}
+
+static int dns_transaction_prepare(DnsTransaction *t, usec_t ts) {
+ int r;
+
+ assert(t);
+
+ /* Returns 0 if dns_transaction_complete() has been called. In that case the transaction and query
+ * candidate objects may have been invalidated and must not be accessed. Returns 1 if the transaction
+ * has been prepared. */
+
+ dns_transaction_stop_timeout(t);
+
+ if (!dns_scope_network_good(t->scope)) {
+ dns_transaction_complete(t, DNS_TRANSACTION_NETWORK_DOWN);
+ return 0;
+ }
+
+ if (t->n_attempts >= TRANSACTION_ATTEMPTS_MAX(t->scope->protocol)) {
+ DnsTransactionState result;
+
+ if (t->scope->protocol == DNS_PROTOCOL_LLMNR)
+ /* If we didn't find anything on LLMNR, it's not an error, but a failure to resolve
+ * the name. */
+ result = DNS_TRANSACTION_NOT_FOUND;
+ else
+ result = DNS_TRANSACTION_ATTEMPTS_MAX_REACHED;
+
+ dns_transaction_complete(t, result);
+ return 0;
+ }
+
+ if (t->scope->protocol == DNS_PROTOCOL_LLMNR && t->tried_stream) {
+ /* If we already tried via a stream, then we don't
+ * retry on LLMNR. See RFC 4795, Section 2.7. */
+ dns_transaction_complete(t, DNS_TRANSACTION_ATTEMPTS_MAX_REACHED);
+ return 0;
+ }
+
+ t->n_attempts++;
+ t->start_usec = ts;
+
+ dns_transaction_reset_answer(t);
+ dns_transaction_flush_dnssec_transactions(t);
+
+ /* Check the trust anchor. Do so only on classic DNS, since DNSSEC does not apply otherwise. */
+ if (t->scope->protocol == DNS_PROTOCOL_DNS) {
+ r = dns_trust_anchor_lookup_positive(&t->scope->manager->trust_anchor, t->key, &t->answer);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ t->answer_rcode = DNS_RCODE_SUCCESS;
+ t->answer_source = DNS_TRANSACTION_TRUST_ANCHOR;
+ t->answer_authenticated = true;
+ dns_transaction_complete(t, DNS_TRANSACTION_SUCCESS);
+ return 0;
+ }
+
+ if (dns_name_is_root(dns_resource_key_name(t->key)) &&
+ t->key->type == DNS_TYPE_DS) {
+
+ /* Hmm, this is a request for the root DS? A
+ * DS RR doesn't exist in the root zone, and
+ * if our trust anchor didn't know it either,
+ * this means we cannot do any DNSSEC logic
+ * anymore. */
+
+ if (t->scope->dnssec_mode == DNSSEC_ALLOW_DOWNGRADE) {
+ /* We are in downgrade mode. In this
+ * case, synthesize an unsigned empty
+ * response, so that the any lookup
+ * depending on this one can continue
+ * assuming there was no DS, and hence
+ * the root zone was unsigned. */
+
+ t->answer_rcode = DNS_RCODE_SUCCESS;
+ t->answer_source = DNS_TRANSACTION_TRUST_ANCHOR;
+ t->answer_authenticated = false;
+ dns_transaction_complete(t, DNS_TRANSACTION_SUCCESS);
+ } else
+ /* If we are not in downgrade mode,
+ * then fail the lookup, because we
+ * cannot reasonably answer it. There
+ * might be DS RRs, but we don't know
+ * them, and the DNS server won't tell
+ * them to us (and even if it would,
+ * we couldn't validate and trust them. */
+ dns_transaction_complete(t, DNS_TRANSACTION_NO_TRUST_ANCHOR);
+
+ return 0;
+ }
+ }
+
+ /* Check the zone, but only if this transaction is not used
+ * for probing or verifying a zone item. */
+ if (set_isempty(t->notify_zone_items)) {
+
+ r = dns_zone_lookup(&t->scope->zone, t->key, dns_scope_ifindex(t->scope), &t->answer, NULL, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ t->answer_rcode = DNS_RCODE_SUCCESS;
+ t->answer_source = DNS_TRANSACTION_ZONE;
+ t->answer_authenticated = true;
+ dns_transaction_complete(t, DNS_TRANSACTION_SUCCESS);
+ return 0;
+ }
+ }
+
+ /* Check the cache, but only if this transaction is not used
+ * for probing or verifying a zone item. */
+ if (set_isempty(t->notify_zone_items)) {
+
+ /* Before trying the cache, let's make sure we figured out a
+ * server to use. Should this cause a change of server this
+ * might flush the cache. */
+ (void) dns_scope_get_dns_server(t->scope);
+
+ /* Let's then prune all outdated entries */
+ dns_cache_prune(&t->scope->cache);
+
+ r = dns_cache_lookup(&t->scope->cache, t->key, t->clamp_ttl, &t->answer_rcode, &t->answer, &t->answer_authenticated);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ t->answer_source = DNS_TRANSACTION_CACHE;
+ if (t->answer_rcode == DNS_RCODE_SUCCESS)
+ dns_transaction_complete(t, DNS_TRANSACTION_SUCCESS);
+ else
+ dns_transaction_complete(t, DNS_TRANSACTION_RCODE_FAILURE);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static int dns_transaction_make_packet_mdns(DnsTransaction *t) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ bool add_known_answers = false;
+ DnsTransaction *other;
+ DnsResourceKey *tkey;
+ _cleanup_set_free_ Set *keys = NULL;
+ unsigned qdcount;
+ unsigned nscount = 0;
+ usec_t ts;
+ int r;
+
+ assert(t);
+ assert(t->scope->protocol == DNS_PROTOCOL_MDNS);
+
+ /* Discard any previously prepared packet, so we can start over and coalesce again */
+ t->sent = dns_packet_unref(t->sent);
+
+ r = dns_packet_new_query(&p, t->scope->protocol, 0, false);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_append_key(p, t->key, 0, NULL);
+ if (r < 0)
+ return r;
+
+ qdcount = 1;
+
+ if (dns_key_is_shared(t->key))
+ add_known_answers = true;
+
+ if (t->key->type == DNS_TYPE_ANY) {
+ r = set_ensure_put(&keys, &dns_resource_key_hash_ops, t->key);
+ if (r < 0)
+ return r;
+ }
+
+ /*
+ * For mDNS, we want to coalesce as many open queries in pending transactions into one single
+ * query packet on the wire as possible. To achieve that, we iterate through all pending transactions
+ * in our current scope, and see whether their timing constraints allow them to be sent.
+ */
+
+ assert_se(sd_event_now(t->scope->manager->event, clock_boottime_or_monotonic(), &ts) >= 0);
+
+ LIST_FOREACH(transactions_by_scope, other, t->scope->transactions) {
+
+ /* Skip ourselves */
+ if (other == t)
+ continue;
+
+ if (other->state != DNS_TRANSACTION_PENDING)
+ continue;
+
+ if (other->next_attempt_after > ts)
+ continue;
+
+ if (qdcount >= UINT16_MAX)
+ break;
+
+ r = dns_packet_append_key(p, other->key, 0, NULL);
+
+ /*
+ * If we can't stuff more questions into the packet, just give up.
+ * One of the 'other' transactions will fire later and take care of the rest.
+ */
+ if (r == -EMSGSIZE)
+ break;
+
+ if (r < 0)
+ return r;
+
+ r = dns_transaction_prepare(other, ts);
+ if (r <= 0)
+ continue;
+
+ ts += transaction_get_resend_timeout(other);
+
+ r = sd_event_add_time(
+ other->scope->manager->event,
+ &other->timeout_event_source,
+ clock_boottime_or_monotonic(),
+ ts, 0,
+ on_transaction_timeout, other);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(other->timeout_event_source, "dns-transaction-timeout");
+
+ other->state = DNS_TRANSACTION_PENDING;
+ other->next_attempt_after = ts;
+
+ qdcount++;
+
+ if (dns_key_is_shared(other->key))
+ add_known_answers = true;
+
+ if (other->key->type == DNS_TYPE_ANY) {
+ r = set_ensure_put(&keys, &dns_resource_key_hash_ops, other->key);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ DNS_PACKET_HEADER(p)->qdcount = htobe16(qdcount);
+
+ /* Append known answer section if we're asking for any shared record */
+ if (add_known_answers) {
+ r = dns_cache_export_shared_to_packet(&t->scope->cache, p);
+ if (r < 0)
+ return r;
+ }
+
+ SET_FOREACH(tkey, keys) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ bool tentative;
+
+ r = dns_zone_lookup(&t->scope->zone, tkey, t->scope->link->ifindex, &answer, NULL, &tentative);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_append_answer(p, answer);
+ if (r < 0)
+ return r;
+
+ nscount += dns_answer_size(answer);
+ }
+ DNS_PACKET_HEADER(p)->nscount = htobe16(nscount);
+
+ t->sent = TAKE_PTR(p);
+
+ return 0;
+}
+
+static int dns_transaction_make_packet(DnsTransaction *t) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ int r;
+
+ assert(t);
+
+ if (t->scope->protocol == DNS_PROTOCOL_MDNS)
+ return dns_transaction_make_packet_mdns(t);
+
+ if (t->sent)
+ return 0;
+
+ r = dns_packet_new_query(&p, t->scope->protocol, 0, t->scope->dnssec_mode != DNSSEC_NO);
+ if (r < 0)
+ return r;
+
+ r = dns_packet_append_key(p, t->key, 0, NULL);
+ if (r < 0)
+ return r;
+
+ DNS_PACKET_HEADER(p)->qdcount = htobe16(1);
+ DNS_PACKET_HEADER(p)->id = t->id;
+
+ t->sent = TAKE_PTR(p);
+
+ return 0;
+}
+
+int dns_transaction_go(DnsTransaction *t) {
+ usec_t ts;
+ int r;
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+
+ assert(t);
+
+ /* Returns > 0 if the transaction is now pending, returns 0 if could be processed immediately and has
+ * finished now. In the latter case, the transaction and query candidate objects must not be accessed.
+ */
+
+ assert_se(sd_event_now(t->scope->manager->event, clock_boottime_or_monotonic(), &ts) >= 0);
+
+ r = dns_transaction_prepare(t, ts);
+ if (r <= 0)
+ return r;
+
+ log_debug("Transaction %" PRIu16 " for <%s> scope %s on %s/%s.",
+ t->id,
+ dns_resource_key_to_string(t->key, key_str, sizeof key_str),
+ dns_protocol_to_string(t->scope->protocol),
+ t->scope->link ? t->scope->link->ifname : "*",
+ af_to_name_short(t->scope->family));
+
+ if (!t->initial_jitter_scheduled &&
+ IN_SET(t->scope->protocol, DNS_PROTOCOL_LLMNR, DNS_PROTOCOL_MDNS)) {
+ usec_t jitter, accuracy;
+
+ /* RFC 4795 Section 2.7 suggests all queries should be
+ * delayed by a random time from 0 to JITTER_INTERVAL. */
+
+ t->initial_jitter_scheduled = true;
+
+ random_bytes(&jitter, sizeof(jitter));
+
+ switch (t->scope->protocol) {
+
+ case DNS_PROTOCOL_LLMNR:
+ jitter %= LLMNR_JITTER_INTERVAL_USEC;
+ accuracy = LLMNR_JITTER_INTERVAL_USEC;
+ break;
+
+ case DNS_PROTOCOL_MDNS:
+ jitter %= MDNS_JITTER_RANGE_USEC;
+ jitter += MDNS_JITTER_MIN_USEC;
+ accuracy = MDNS_JITTER_RANGE_USEC;
+ break;
+ default:
+ assert_not_reached("bad protocol");
+ }
+
+ r = sd_event_add_time(
+ t->scope->manager->event,
+ &t->timeout_event_source,
+ clock_boottime_or_monotonic(),
+ ts + jitter, accuracy,
+ on_transaction_timeout, t);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(t->timeout_event_source, "dns-transaction-timeout");
+
+ t->n_attempts = 0;
+ t->next_attempt_after = ts;
+ t->state = DNS_TRANSACTION_PENDING;
+
+ log_debug("Delaying %s transaction for " USEC_FMT "us.", dns_protocol_to_string(t->scope->protocol), jitter);
+ return 1;
+ }
+
+ /* Otherwise, we need to ask the network */
+ r = dns_transaction_make_packet(t);
+ if (r < 0)
+ return r;
+
+ if (t->scope->protocol == DNS_PROTOCOL_LLMNR &&
+ (dns_name_endswith(dns_resource_key_name(t->key), "in-addr.arpa") > 0 ||
+ dns_name_endswith(dns_resource_key_name(t->key), "ip6.arpa") > 0)) {
+
+ /* RFC 4795, Section 2.4. says reverse lookups shall
+ * always be made via TCP on LLMNR */
+ r = dns_transaction_emit_tcp(t);
+ } else {
+ /* Try via UDP, and if that fails due to large size or lack of
+ * support try via TCP */
+ r = dns_transaction_emit_udp(t);
+ if (r == -EMSGSIZE)
+ log_debug("Sending query via TCP since it is too large.");
+ else if (r == -EAGAIN)
+ log_debug("Sending query via TCP since UDP isn't supported or DNS-over-TLS is selected.");
+ if (IN_SET(r, -EMSGSIZE, -EAGAIN))
+ r = dns_transaction_emit_tcp(t);
+ }
+
+ if (r == -ESRCH) {
+ /* No servers to send this to? */
+ dns_transaction_complete(t, DNS_TRANSACTION_NO_SERVERS);
+ return 0;
+ }
+ if (r == -EOPNOTSUPP) {
+ /* Tried to ask for DNSSEC RRs, on a server that doesn't do DNSSEC */
+ dns_transaction_complete(t, DNS_TRANSACTION_RR_TYPE_UNSUPPORTED);
+ return 0;
+ }
+ if (t->scope->protocol == DNS_PROTOCOL_LLMNR && ERRNO_IS_DISCONNECT(r)) {
+ /* On LLMNR, if we cannot connect to a host via TCP when doing reverse lookups. This means we cannot
+ * answer this request with this protocol. */
+ dns_transaction_complete(t, DNS_TRANSACTION_NOT_FOUND);
+ return 0;
+ }
+ if (r < 0) {
+ if (t->scope->protocol != DNS_PROTOCOL_DNS)
+ return r;
+
+ /* Couldn't send? Try immediately again, with a new server */
+ dns_scope_next_dns_server(t->scope);
+
+ return dns_transaction_go(t);
+ }
+
+ ts += transaction_get_resend_timeout(t);
+
+ r = sd_event_add_time(
+ t->scope->manager->event,
+ &t->timeout_event_source,
+ clock_boottime_or_monotonic(),
+ ts, 0,
+ on_transaction_timeout, t);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(t->timeout_event_source, "dns-transaction-timeout");
+
+ t->state = DNS_TRANSACTION_PENDING;
+ t->next_attempt_after = ts;
+
+ return 1;
+}
+
+static int dns_transaction_find_cyclic(DnsTransaction *t, DnsTransaction *aux) {
+ DnsTransaction *n;
+ int r;
+
+ assert(t);
+ assert(aux);
+
+ /* Try to find cyclic dependencies between transaction objects */
+
+ if (t == aux)
+ return 1;
+
+ SET_FOREACH(n, aux->dnssec_transactions) {
+ r = dns_transaction_find_cyclic(t, n);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int dns_transaction_add_dnssec_transaction(DnsTransaction *t, DnsResourceKey *key, DnsTransaction **ret) {
+ _cleanup_(dns_transaction_gcp) DnsTransaction *aux = NULL;
+ int r;
+
+ assert(t);
+ assert(ret);
+ assert(key);
+
+ aux = dns_scope_find_transaction(t->scope, key, true);
+ if (!aux) {
+ r = dns_transaction_new(&aux, t->scope, key);
+ if (r < 0)
+ return r;
+ } else {
+ if (set_contains(t->dnssec_transactions, aux)) {
+ *ret = aux;
+ return 0;
+ }
+
+ r = dns_transaction_find_cyclic(t, aux);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ char s[DNS_RESOURCE_KEY_STRING_MAX], saux[DNS_RESOURCE_KEY_STRING_MAX];
+
+ return log_debug_errno(SYNTHETIC_ERRNO(ELOOP),
+ "Potential cyclic dependency, refusing to add transaction %" PRIu16 " (%s) as dependency for %" PRIu16 " (%s).",
+ aux->id,
+ dns_resource_key_to_string(t->key, s, sizeof s),
+ t->id,
+ dns_resource_key_to_string(aux->key, saux, sizeof saux));
+ }
+ }
+
+ r = set_ensure_allocated(&aux->notify_transactions_done, NULL);
+ if (r < 0)
+ return r;
+
+ r = set_ensure_put(&t->dnssec_transactions, NULL, aux);
+ if (r < 0)
+ return r;
+
+ r = set_ensure_put(&aux->notify_transactions, NULL, t);
+ if (r < 0) {
+ (void) set_remove(t->dnssec_transactions, aux);
+ return r;
+ }
+
+ *ret = TAKE_PTR(aux);
+ return 1;
+}
+
+static int dns_transaction_request_dnssec_rr(DnsTransaction *t, DnsResourceKey *key) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *a = NULL;
+ DnsTransaction *aux;
+ int r;
+
+ assert(t);
+ assert(key);
+
+ /* Try to get the data from the trust anchor */
+ r = dns_trust_anchor_lookup_positive(&t->scope->manager->trust_anchor, key, &a);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ r = dns_answer_extend(&t->validated_keys, a);
+ if (r < 0)
+ return r;
+
+ return 0;
+ }
+
+ /* This didn't work, ask for it via the network/cache then. */
+ r = dns_transaction_add_dnssec_transaction(t, key, &aux);
+ if (r == -ELOOP) /* This would result in a cyclic dependency */
+ return 0;
+ if (r < 0)
+ return r;
+
+ if (aux->state == DNS_TRANSACTION_NULL) {
+ r = dns_transaction_go(aux);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+static int dns_transaction_negative_trust_anchor_lookup(DnsTransaction *t, const char *name) {
+ int r;
+
+ assert(t);
+
+ /* Check whether the specified name is in the NTA
+ * database, either in the global one, or the link-local
+ * one. */
+
+ r = dns_trust_anchor_lookup_negative(&t->scope->manager->trust_anchor, name);
+ if (r != 0)
+ return r;
+
+ if (!t->scope->link)
+ return 0;
+
+ return link_negative_trust_anchor_lookup(t->scope->link, name);
+}
+
+static int dns_transaction_has_unsigned_negative_answer(DnsTransaction *t) {
+ int r;
+
+ assert(t);
+
+ /* Checks whether the answer is negative, and lacks NSEC/NSEC3
+ * RRs to prove it */
+
+ r = dns_transaction_has_positive_answer(t, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return false;
+
+ /* Is this key explicitly listed as a negative trust anchor?
+ * If so, it's nothing we need to care about */
+ r = dns_transaction_negative_trust_anchor_lookup(t, dns_resource_key_name(t->key));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return false;
+
+ /* The answer does not contain any RRs that match to the
+ * question. If so, let's see if there are any NSEC/NSEC3 RRs
+ * included. If not, the answer is unsigned. */
+
+ r = dns_answer_contains_nsec_or_nsec3(t->answer);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return false;
+
+ return true;
+}
+
+static int dns_transaction_is_primary_response(DnsTransaction *t, DnsResourceRecord *rr) {
+ int r;
+
+ assert(t);
+ assert(rr);
+
+ /* Check if the specified RR is the "primary" response,
+ * i.e. either matches the question precisely or is a
+ * CNAME/DNAME for it. */
+
+ r = dns_resource_key_match_rr(t->key, rr, NULL);
+ if (r != 0)
+ return r;
+
+ return dns_resource_key_match_cname_or_dname(t->key, rr->key, NULL);
+}
+
+static bool dns_transaction_dnssec_supported(DnsTransaction *t) {
+ assert(t);
+
+ /* Checks whether our transaction's DNS server is assumed to be compatible with DNSSEC. Returns false as soon
+ * as we changed our mind about a server, and now believe it is incompatible with DNSSEC. */
+
+ if (t->scope->protocol != DNS_PROTOCOL_DNS)
+ return false;
+
+ /* If we have picked no server, then we are working from the cache or some other source, and DNSSEC might well
+ * be supported, hence return true. */
+ if (!t->server)
+ return true;
+
+ /* Note that we do not check the feature level actually used for the transaction but instead the feature level
+ * the server is known to support currently, as the transaction feature level might be lower than what the
+ * server actually supports, since we might have downgraded this transaction's feature level because we got a
+ * SERVFAIL earlier and wanted to check whether downgrading fixes it. */
+
+ return dns_server_dnssec_supported(t->server);
+}
+
+static bool dns_transaction_dnssec_supported_full(DnsTransaction *t) {
+ DnsTransaction *dt;
+
+ assert(t);
+
+ /* Checks whether our transaction our any of the auxiliary transactions couldn't do DNSSEC. */
+
+ if (!dns_transaction_dnssec_supported(t))
+ return false;
+
+ SET_FOREACH(dt, t->dnssec_transactions)
+ if (!dns_transaction_dnssec_supported(dt))
+ return false;
+
+ return true;
+}
+
+int dns_transaction_request_dnssec_keys(DnsTransaction *t) {
+ DnsResourceRecord *rr;
+
+ int r;
+
+ assert(t);
+
+ /*
+ * Retrieve all auxiliary RRs for the answer we got, so that
+ * we can verify signatures or prove that RRs are rightfully
+ * unsigned. Specifically:
+ *
+ * - For RRSIG we get the matching DNSKEY
+ * - For DNSKEY we get the matching DS
+ * - For unsigned SOA/NS we get the matching DS
+ * - For unsigned CNAME/DNAME/DS we get the parent SOA RR
+ * - For other unsigned RRs we get the matching SOA RR
+ * - For SOA/NS queries with no matching response RR, and no NSEC/NSEC3, the DS RR
+ * - For DS queries with no matching response RRs, and no NSEC/NSEC3, the parent's SOA RR
+ * - For other queries with no matching response RRs, and no NSEC/NSEC3, the SOA RR
+ */
+
+ if (t->scope->dnssec_mode == DNSSEC_NO)
+ return 0;
+ if (t->answer_source != DNS_TRANSACTION_NETWORK)
+ return 0; /* We only need to validate stuff from the network */
+ if (!dns_transaction_dnssec_supported(t))
+ return 0; /* If we can't do DNSSEC anyway there's no point in getting the auxiliary RRs */
+
+ DNS_ANSWER_FOREACH(rr, t->answer) {
+
+ if (dns_type_is_pseudo(rr->key->type))
+ continue;
+
+ /* If this RR is in the negative trust anchor, we don't need to validate it. */
+ r = dns_transaction_negative_trust_anchor_lookup(t, dns_resource_key_name(rr->key));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ switch (rr->key->type) {
+
+ case DNS_TYPE_RRSIG: {
+ /* For each RRSIG we request the matching DNSKEY */
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *dnskey = NULL;
+
+ /* If this RRSIG is about a DNSKEY RR and the
+ * signer is the same as the owner, then we
+ * already have the DNSKEY, and we don't have
+ * to look for more. */
+ if (rr->rrsig.type_covered == DNS_TYPE_DNSKEY) {
+ r = dns_name_equal(rr->rrsig.signer, dns_resource_key_name(rr->key));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+ }
+
+ /* If the signer is not a parent of our
+ * original query, then this is about an
+ * auxiliary RRset, but not anything we asked
+ * for. In this case we aren't interested,
+ * because we don't want to request additional
+ * RRs for stuff we didn't really ask for, and
+ * also to avoid request loops, where
+ * additional RRs from one transaction result
+ * in another transaction whose additional RRs
+ * point back to the original transaction, and
+ * we deadlock. */
+ r = dns_name_endswith(dns_resource_key_name(t->key), rr->rrsig.signer);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ dnskey = dns_resource_key_new(rr->key->class, DNS_TYPE_DNSKEY, rr->rrsig.signer);
+ if (!dnskey)
+ return -ENOMEM;
+
+ log_debug("Requesting DNSKEY to validate transaction %" PRIu16" (%s, RRSIG with key tag: %" PRIu16 ").",
+ t->id, dns_resource_key_name(rr->key), rr->rrsig.key_tag);
+ r = dns_transaction_request_dnssec_rr(t, dnskey);
+ if (r < 0)
+ return r;
+ break;
+ }
+
+ case DNS_TYPE_DNSKEY: {
+ /* For each DNSKEY we request the matching DS */
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *ds = NULL;
+
+ /* If the DNSKEY we are looking at is not for
+ * zone we are interested in, nor any of its
+ * parents, we aren't interested, and don't
+ * request it. After all, we don't want to end
+ * up in request loops, and want to keep
+ * additional traffic down. */
+
+ r = dns_name_endswith(dns_resource_key_name(t->key), dns_resource_key_name(rr->key));
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ ds = dns_resource_key_new(rr->key->class, DNS_TYPE_DS, dns_resource_key_name(rr->key));
+ if (!ds)
+ return -ENOMEM;
+
+ log_debug("Requesting DS to validate transaction %" PRIu16" (%s, DNSKEY with key tag: %" PRIu16 ").",
+ t->id, dns_resource_key_name(rr->key), dnssec_keytag(rr, false));
+ r = dns_transaction_request_dnssec_rr(t, ds);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ case DNS_TYPE_SOA:
+ case DNS_TYPE_NS: {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *ds = NULL;
+
+ /* For an unsigned SOA or NS, try to acquire
+ * the matching DS RR, as we are at a zone cut
+ * then, and whether a DS exists tells us
+ * whether the zone is signed. Do so only if
+ * this RR matches our original question,
+ * however. */
+
+ r = dns_resource_key_match_rr(t->key, rr, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Hmm, so this SOA RR doesn't match our original question. In this case, maybe this is
+ * a negative reply, and we need the SOA RR's TTL in order to cache a negative entry?
+ * If so, we need to validate it, too. */
+
+ r = dns_answer_match_key(t->answer, t->key, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0) /* positive reply, we won't need the SOA and hence don't need to validate
+ * it. */
+ continue;
+
+ /* Only bother with this if the SOA/NS RR we are looking at is actually a parent of
+ * what we are looking for, otherwise there's no value in it for us. */
+ r = dns_name_endswith(dns_resource_key_name(t->key), dns_resource_key_name(rr->key));
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+ }
+
+ r = dnssec_has_rrsig(t->answer, rr->key);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ ds = dns_resource_key_new(rr->key->class, DNS_TYPE_DS, dns_resource_key_name(rr->key));
+ if (!ds)
+ return -ENOMEM;
+
+ log_debug("Requesting DS to validate transaction %" PRIu16 " (%s, unsigned SOA/NS RRset).",
+ t->id, dns_resource_key_name(rr->key));
+ r = dns_transaction_request_dnssec_rr(t, ds);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ case DNS_TYPE_DS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME: {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *soa = NULL;
+ const char *name;
+
+ /* CNAMEs and DNAMEs cannot be located at a
+ * zone apex, hence ask for the parent SOA for
+ * unsigned CNAME/DNAME RRs, maybe that's the
+ * apex. But do all that only if this is
+ * actually a response to our original
+ * question.
+ *
+ * Similar for DS RRs, which are signed when
+ * the parent SOA is signed. */
+
+ r = dns_transaction_is_primary_response(t, rr);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = dnssec_has_rrsig(t->answer, rr->key);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ r = dns_answer_has_dname_for_cname(t->answer, rr);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ name = dns_resource_key_name(rr->key);
+ r = dns_name_parent(&name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ soa = dns_resource_key_new(rr->key->class, DNS_TYPE_SOA, name);
+ if (!soa)
+ return -ENOMEM;
+
+ log_debug("Requesting parent SOA to validate transaction %" PRIu16 " (%s, unsigned CNAME/DNAME/DS RRset).",
+ t->id, dns_resource_key_name(rr->key));
+ r = dns_transaction_request_dnssec_rr(t, soa);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ default: {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *soa = NULL;
+
+ /* For other unsigned RRsets (including
+ * NSEC/NSEC3!), look for proof the zone is
+ * unsigned, by requesting the SOA RR of the
+ * zone. However, do so only if they are
+ * directly relevant to our original
+ * question. */
+
+ r = dns_transaction_is_primary_response(t, rr);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = dnssec_has_rrsig(t->answer, rr->key);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ soa = dns_resource_key_new(rr->key->class, DNS_TYPE_SOA, dns_resource_key_name(rr->key));
+ if (!soa)
+ return -ENOMEM;
+
+ log_debug("Requesting SOA to validate transaction %" PRIu16 " (%s, unsigned non-SOA/NS RRset <%s>).",
+ t->id, dns_resource_key_name(rr->key), dns_resource_record_to_string(rr));
+ r = dns_transaction_request_dnssec_rr(t, soa);
+ if (r < 0)
+ return r;
+ break;
+ }}
+ }
+
+ /* Above, we requested everything necessary to validate what
+ * we got. Now, let's request what we need to validate what we
+ * didn't get... */
+
+ r = dns_transaction_has_unsigned_negative_answer(t);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ const char *name;
+ uint16_t type = 0;
+
+ name = dns_resource_key_name(t->key);
+
+ /* If this was a SOA or NS request, then check if there's a DS RR for the same domain. Note that this
+ * could also be used as indication that we are not at a zone apex, but in real world setups there are
+ * too many broken DNS servers (Hello, incapdns.net!) where non-terminal zones return NXDOMAIN even
+ * though they have further children. If this was a DS request, then it's signed when the parent zone
+ * is signed, hence ask the parent SOA in that case. If this was any other RR then ask for the SOA RR,
+ * to see if that is signed. */
+
+ if (t->key->type == DNS_TYPE_DS) {
+ r = dns_name_parent(&name);
+ if (r > 0) {
+ type = DNS_TYPE_SOA;
+ log_debug("Requesting parent SOA (→ %s) to validate transaction %" PRIu16 " (%s, unsigned empty DS response).",
+ name, t->id, dns_resource_key_name(t->key));
+ } else
+ name = NULL;
+
+ } else if (IN_SET(t->key->type, DNS_TYPE_SOA, DNS_TYPE_NS)) {
+
+ type = DNS_TYPE_DS;
+ log_debug("Requesting DS (→ %s) to validate transaction %" PRIu16 " (%s, unsigned empty SOA/NS response).",
+ name, t->id, name);
+
+ } else {
+ type = DNS_TYPE_SOA;
+ log_debug("Requesting SOA (→ %s) to validate transaction %" PRIu16 " (%s, unsigned empty non-SOA/NS/DS response).",
+ name, t->id, name);
+ }
+
+ if (name) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *soa = NULL;
+
+ soa = dns_resource_key_new(t->key->class, type, name);
+ if (!soa)
+ return -ENOMEM;
+
+ r = dns_transaction_request_dnssec_rr(t, soa);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return dns_transaction_dnssec_is_live(t);
+}
+
+void dns_transaction_notify(DnsTransaction *t, DnsTransaction *source) {
+ assert(t);
+ assert(source);
+
+ /* Invoked whenever any of our auxiliary DNSSEC transactions completed its work. If the state is still PENDING,
+ we are still in the loop that adds further DNSSEC transactions, hence don't check if we are ready yet. If
+ the state is VALIDATING however, we should check if we are complete now. */
+
+ if (t->state == DNS_TRANSACTION_VALIDATING)
+ dns_transaction_process_dnssec(t);
+}
+
+static int dns_transaction_validate_dnskey_by_ds(DnsTransaction *t) {
+ DnsResourceRecord *rr;
+ int ifindex, r;
+
+ assert(t);
+
+ /* Add all DNSKEY RRs from the answer that are validated by DS
+ * RRs from the list of validated keys to the list of
+ * validated keys. */
+
+ DNS_ANSWER_FOREACH_IFINDEX(rr, ifindex, t->answer) {
+
+ r = dnssec_verify_dnskey_by_ds_search(rr, t->validated_keys);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* If so, the DNSKEY is validated too. */
+ r = dns_answer_add_extend(&t->validated_keys, rr, ifindex, DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int dns_transaction_requires_rrsig(DnsTransaction *t, DnsResourceRecord *rr) {
+ int r;
+
+ assert(t);
+ assert(rr);
+
+ /* Checks if the RR we are looking for must be signed with an
+ * RRSIG. This is used for positive responses. */
+
+ if (t->scope->dnssec_mode == DNSSEC_NO)
+ return false;
+
+ if (dns_type_is_pseudo(rr->key->type))
+ return -EINVAL;
+
+ r = dns_transaction_negative_trust_anchor_lookup(t, dns_resource_key_name(rr->key));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return false;
+
+ switch (rr->key->type) {
+
+ case DNS_TYPE_RRSIG:
+ /* RRSIGs are the signatures themselves, they need no signing. */
+ return false;
+
+ case DNS_TYPE_SOA:
+ case DNS_TYPE_NS: {
+ DnsTransaction *dt;
+
+ /* For SOA or NS RRs we look for a matching DS transaction */
+
+ SET_FOREACH(dt, t->dnssec_transactions) {
+
+ if (dt->key->class != rr->key->class)
+ continue;
+ if (dt->key->type != DNS_TYPE_DS)
+ continue;
+
+ r = dns_name_equal(dns_resource_key_name(dt->key), dns_resource_key_name(rr->key));
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* We found a DS transactions for the SOA/NS
+ * RRs we are looking at. If it discovered signed DS
+ * RRs, then we need to be signed, too. */
+
+ if (!dt->answer_authenticated)
+ return false;
+
+ return dns_answer_match_key(dt->answer, dt->key, NULL);
+ }
+
+ /* We found nothing that proves this is safe to leave
+ * this unauthenticated, hence ask inist on
+ * authentication. */
+ return true;
+ }
+
+ case DNS_TYPE_DS:
+ case DNS_TYPE_CNAME:
+ case DNS_TYPE_DNAME: {
+ const char *parent = NULL;
+ DnsTransaction *dt;
+
+ /*
+ * CNAME/DNAME RRs cannot be located at a zone apex, hence look directly for the parent SOA.
+ *
+ * DS RRs are signed if the parent is signed, hence also look at the parent SOA
+ */
+
+ SET_FOREACH(dt, t->dnssec_transactions) {
+
+ if (dt->key->class != rr->key->class)
+ continue;
+ if (dt->key->type != DNS_TYPE_SOA)
+ continue;
+
+ if (!parent) {
+ parent = dns_resource_key_name(rr->key);
+ r = dns_name_parent(&parent);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (rr->key->type == DNS_TYPE_DS)
+ return true;
+
+ /* A CNAME/DNAME without a parent? That's sooo weird. */
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Transaction %" PRIu16 " claims CNAME/DNAME at root. Refusing.", t->id);
+ }
+ }
+
+ r = dns_name_equal(dns_resource_key_name(dt->key), parent);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ return t->answer_authenticated;
+ }
+
+ return true;
+ }
+
+ default: {
+ DnsTransaction *dt;
+
+ /* Any other kind of RR (including DNSKEY/NSEC/NSEC3). Let's see if our SOA lookup was authenticated */
+
+ SET_FOREACH(dt, t->dnssec_transactions) {
+
+ if (dt->key->class != rr->key->class)
+ continue;
+ if (dt->key->type != DNS_TYPE_SOA)
+ continue;
+
+ r = dns_name_equal(dns_resource_key_name(dt->key), dns_resource_key_name(rr->key));
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* We found the transaction that was supposed to find
+ * the SOA RR for us. It was successful, but found no
+ * RR for us. This means we are not at a zone cut. In
+ * this case, we require authentication if the SOA
+ * lookup was authenticated too. */
+ return t->answer_authenticated;
+ }
+
+ return true;
+ }}
+}
+
+static int dns_transaction_in_private_tld(DnsTransaction *t, const DnsResourceKey *key) {
+ DnsTransaction *dt;
+ const char *tld;
+ int r;
+
+ /* If DNSSEC downgrade mode is on, checks whether the
+ * specified RR is one level below a TLD we have proven not to
+ * exist. In such a case we assume that this is a private
+ * domain, and permit it.
+ *
+ * This detects cases like the Fritz!Box router networks. Each
+ * Fritz!Box router serves a private "fritz.box" zone, in the
+ * non-existing TLD "box". Requests for the "fritz.box" domain
+ * are served by the router itself, while requests for the
+ * "box" domain will result in NXDOMAIN.
+ *
+ * Note that this logic is unable to detect cases where a
+ * router serves a private DNS zone directly under
+ * non-existing TLD. In such a case we cannot detect whether
+ * the TLD is supposed to exist or not, as all requests we
+ * make for it will be answered by the router's zone, and not
+ * by the root zone. */
+
+ assert(t);
+
+ if (t->scope->dnssec_mode != DNSSEC_ALLOW_DOWNGRADE)
+ return false; /* In strict DNSSEC mode what doesn't exist, doesn't exist */
+
+ tld = dns_resource_key_name(key);
+ r = dns_name_parent(&tld);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return false; /* Already the root domain */
+
+ if (!dns_name_is_single_label(tld))
+ return false;
+
+ SET_FOREACH(dt, t->dnssec_transactions) {
+
+ if (dt->key->class != key->class)
+ continue;
+
+ r = dns_name_equal(dns_resource_key_name(dt->key), tld);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* We found an auxiliary lookup we did for the TLD. If
+ * that returned with NXDOMAIN, we know the TLD didn't
+ * exist, and hence this might be a private zone. */
+
+ return dt->answer_rcode == DNS_RCODE_NXDOMAIN;
+ }
+
+ return false;
+}
+
+static int dns_transaction_requires_nsec(DnsTransaction *t) {
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+ DnsTransaction *dt;
+ const char *name;
+ uint16_t type = 0;
+ int r;
+
+ assert(t);
+
+ /* Checks if we need to insist on NSEC/NSEC3 RRs for proving
+ * this negative reply */
+
+ if (t->scope->dnssec_mode == DNSSEC_NO)
+ return false;
+
+ if (dns_type_is_pseudo(t->key->type))
+ return -EINVAL;
+
+ r = dns_transaction_negative_trust_anchor_lookup(t, dns_resource_key_name(t->key));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return false;
+
+ r = dns_transaction_in_private_tld(t, t->key);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* The lookup is from a TLD that is proven not to
+ * exist, and we are in downgrade mode, hence ignore
+ * that fact that we didn't get any NSEC RRs. */
+
+ log_info("Detected a negative query %s in a private DNS zone, permitting unsigned response.",
+ dns_resource_key_to_string(t->key, key_str, sizeof key_str));
+ return false;
+ }
+
+ name = dns_resource_key_name(t->key);
+
+ if (t->key->type == DNS_TYPE_DS) {
+
+ /* We got a negative reply for this DS lookup? DS RRs are signed when their parent zone is signed,
+ * hence check the parent SOA in this case. */
+
+ r = dns_name_parent(&name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return true;
+
+ type = DNS_TYPE_SOA;
+
+ } else if (IN_SET(t->key->type, DNS_TYPE_SOA, DNS_TYPE_NS))
+ /* We got a negative reply for this SOA/NS lookup? If so, check if there's a DS RR for this */
+ type = DNS_TYPE_DS;
+ else
+ /* For all other negative replies, check for the SOA lookup */
+ type = DNS_TYPE_SOA;
+
+ /* For all other RRs we check the SOA on the same level to see
+ * if it's signed. */
+
+ SET_FOREACH(dt, t->dnssec_transactions) {
+
+ if (dt->key->class != t->key->class)
+ continue;
+ if (dt->key->type != type)
+ continue;
+
+ r = dns_name_equal(dns_resource_key_name(dt->key), name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ return dt->answer_authenticated;
+ }
+
+ /* If in doubt, require NSEC/NSEC3 */
+ return true;
+}
+
+static int dns_transaction_dnskey_authenticated(DnsTransaction *t, DnsResourceRecord *rr) {
+ DnsResourceRecord *rrsig;
+ bool found = false;
+ int r;
+
+ /* Checks whether any of the DNSKEYs used for the RRSIGs for
+ * the specified RRset is authenticated (i.e. has a matching
+ * DS RR). */
+
+ r = dns_transaction_negative_trust_anchor_lookup(t, dns_resource_key_name(rr->key));
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return false;
+
+ DNS_ANSWER_FOREACH(rrsig, t->answer) {
+ DnsTransaction *dt;
+
+ r = dnssec_key_match_rrsig(rr->key, rrsig);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ SET_FOREACH(dt, t->dnssec_transactions) {
+
+ if (dt->key->class != rr->key->class)
+ continue;
+
+ if (dt->key->type == DNS_TYPE_DNSKEY) {
+
+ r = dns_name_equal(dns_resource_key_name(dt->key), rrsig->rrsig.signer);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* OK, we found an auxiliary DNSKEY
+ * lookup. If that lookup is
+ * authenticated, report this. */
+
+ if (dt->answer_authenticated)
+ return true;
+
+ found = true;
+
+ } else if (dt->key->type == DNS_TYPE_DS) {
+
+ r = dns_name_equal(dns_resource_key_name(dt->key), rrsig->rrsig.signer);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ /* OK, we found an auxiliary DS
+ * lookup. If that lookup is
+ * authenticated and non-zero, we
+ * won! */
+
+ if (!dt->answer_authenticated)
+ return false;
+
+ return dns_answer_match_key(dt->answer, dt->key, NULL);
+ }
+ }
+ }
+
+ return found ? false : -ENXIO;
+}
+
+static int dns_transaction_known_signed(DnsTransaction *t, DnsResourceRecord *rr) {
+ assert(t);
+ assert(rr);
+
+ /* We know that the root domain is signed, hence if it appears
+ * not to be signed, there's a problem with the DNS server */
+
+ return rr->key->class == DNS_CLASS_IN &&
+ dns_name_is_root(dns_resource_key_name(rr->key));
+}
+
+static int dns_transaction_check_revoked_trust_anchors(DnsTransaction *t) {
+ DnsResourceRecord *rr;
+ int r;
+
+ assert(t);
+
+ /* Maybe warn the user that we encountered a revoked DNSKEY
+ * for a key from our trust anchor. Note that we don't care
+ * whether the DNSKEY can be authenticated or not. It's
+ * sufficient if it is self-signed. */
+
+ DNS_ANSWER_FOREACH(rr, t->answer) {
+ r = dns_trust_anchor_check_revoked(&t->scope->manager->trust_anchor, rr, t->answer);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int dns_transaction_invalidate_revoked_keys(DnsTransaction *t) {
+ bool changed;
+ int r;
+
+ assert(t);
+
+ /* Removes all DNSKEY/DS objects from t->validated_keys that
+ * our trust anchors database considers revoked. */
+
+ do {
+ DnsResourceRecord *rr;
+
+ changed = false;
+
+ DNS_ANSWER_FOREACH(rr, t->validated_keys) {
+ r = dns_trust_anchor_is_revoked(&t->scope->manager->trust_anchor, rr);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ r = dns_answer_remove_by_rr(&t->validated_keys, rr);
+ if (r < 0)
+ return r;
+
+ assert(r > 0);
+ changed = true;
+ break;
+ }
+ }
+ } while (changed);
+
+ return 0;
+}
+
+static int dns_transaction_copy_validated(DnsTransaction *t) {
+ DnsTransaction *dt;
+ int r;
+
+ assert(t);
+
+ /* Copy all validated RRs from the auxiliary DNSSEC transactions into our set of validated RRs */
+
+ SET_FOREACH(dt, t->dnssec_transactions) {
+
+ if (DNS_TRANSACTION_IS_LIVE(dt->state))
+ continue;
+
+ if (!dt->answer_authenticated)
+ continue;
+
+ r = dns_answer_extend(&t->validated_keys, dt->answer);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+typedef enum {
+ DNSSEC_PHASE_DNSKEY, /* Phase #1, only validate DNSKEYs */
+ DNSSEC_PHASE_NSEC, /* Phase #2, only validate NSEC+NSEC3 */
+ DNSSEC_PHASE_ALL, /* Phase #3, validate everything else */
+} Phase;
+
+static int dnssec_validate_records(
+ DnsTransaction *t,
+ Phase phase,
+ bool *have_nsec,
+ DnsAnswer **validated) {
+
+ DnsResourceRecord *rr;
+ int r;
+
+ /* Returns negative on error, 0 if validation failed, 1 to restart validation, 2 when finished. */
+
+ DNS_ANSWER_FOREACH(rr, t->answer) {
+ DnsResourceRecord *rrsig = NULL;
+ DnssecResult result;
+
+ switch (rr->key->type) {
+ case DNS_TYPE_RRSIG:
+ continue;
+
+ case DNS_TYPE_DNSKEY:
+ /* We validate DNSKEYs only in the DNSKEY and ALL phases */
+ if (phase == DNSSEC_PHASE_NSEC)
+ continue;
+ break;
+
+ case DNS_TYPE_NSEC:
+ case DNS_TYPE_NSEC3:
+ *have_nsec = true;
+
+ /* We validate NSEC/NSEC3 only in the NSEC and ALL phases */
+ if (phase == DNSSEC_PHASE_DNSKEY)
+ continue;
+ break;
+
+ default:
+ /* We validate all other RRs only in the ALL phases */
+ if (phase != DNSSEC_PHASE_ALL)
+ continue;
+ }
+
+ r = dnssec_verify_rrset_search(t->answer, rr->key, t->validated_keys, USEC_INFINITY, &result, &rrsig);
+ if (r < 0)
+ return r;
+
+ log_debug("Looking at %s: %s", strna(dns_resource_record_to_string(rr)), dnssec_result_to_string(result));
+
+ if (result == DNSSEC_VALIDATED) {
+
+ if (rr->key->type == DNS_TYPE_DNSKEY) {
+ /* If we just validated a DNSKEY RRset, then let's add these keys to
+ * the set of validated keys for this transaction. */
+
+ r = dns_answer_copy_by_key(&t->validated_keys, t->answer, rr->key, DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+
+ /* Some of the DNSKEYs we just added might already have been revoked,
+ * remove them again in that case. */
+ r = dns_transaction_invalidate_revoked_keys(t);
+ if (r < 0)
+ return r;
+ }
+
+ /* Add the validated RRset to the new list of validated
+ * RRsets, and remove it from the unvalidated RRsets.
+ * We mark the RRset as authenticated and cacheable. */
+ r = dns_answer_move_by_key(validated, &t->answer, rr->key, DNS_ANSWER_AUTHENTICATED|DNS_ANSWER_CACHEABLE);
+ if (r < 0)
+ return r;
+
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_SECURE, rr->key);
+
+ /* Exit the loop, we dropped something from the answer, start from the beginning */
+ return 1;
+ }
+
+ /* If we haven't read all DNSKEYs yet a negative result of the validation is irrelevant, as
+ * there might be more DNSKEYs coming. Similar, if we haven't read all NSEC/NSEC3 RRs yet,
+ * we cannot do positive wildcard proofs yet, as those require the NSEC/NSEC3 RRs. */
+ if (phase != DNSSEC_PHASE_ALL)
+ continue;
+
+ if (result == DNSSEC_VALIDATED_WILDCARD) {
+ bool authenticated = false;
+ const char *source;
+
+ /* This RRset validated, but as a wildcard. This means we need
+ * to prove via NSEC/NSEC3 that no matching non-wildcard RR exists. */
+
+ /* First step, determine the source of synthesis */
+ r = dns_resource_record_source(rrsig, &source);
+ if (r < 0)
+ return r;
+
+ r = dnssec_test_positive_wildcard(*validated,
+ dns_resource_key_name(rr->key),
+ source,
+ rrsig->rrsig.signer,
+ &authenticated);
+
+ /* Unless the NSEC proof showed that the key really doesn't exist something is off. */
+ if (r == 0)
+ result = DNSSEC_INVALID;
+ else {
+ r = dns_answer_move_by_key(validated, &t->answer, rr->key,
+ authenticated ? (DNS_ANSWER_AUTHENTICATED|DNS_ANSWER_CACHEABLE) : 0);
+ if (r < 0)
+ return r;
+
+ manager_dnssec_verdict(t->scope->manager, authenticated ? DNSSEC_SECURE : DNSSEC_INSECURE, rr->key);
+
+ /* Exit the loop, we dropped something from the answer, start from the beginning */
+ return 1;
+ }
+ }
+
+ if (result == DNSSEC_NO_SIGNATURE) {
+ r = dns_transaction_requires_rrsig(t, rr);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Data does not require signing. In that case, just copy it over,
+ * but remember that this is by no means authenticated. */
+ r = dns_answer_move_by_key(validated, &t->answer, rr->key, 0);
+ if (r < 0)
+ return r;
+
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_INSECURE, rr->key);
+ return 1;
+ }
+
+ r = dns_transaction_known_signed(t, rr);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* This is an RR we know has to be signed. If it isn't this means
+ * the server is not attaching RRSIGs, hence complain. */
+
+ dns_server_packet_rrsig_missing(t->server, t->current_feature_level);
+
+ if (t->scope->dnssec_mode == DNSSEC_ALLOW_DOWNGRADE) {
+
+ /* Downgrading is OK? If so, just consider the information unsigned */
+
+ r = dns_answer_move_by_key(validated, &t->answer, rr->key, 0);
+ if (r < 0)
+ return r;
+
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_INSECURE, rr->key);
+ return 1;
+ }
+
+ /* Otherwise, fail */
+ t->answer_dnssec_result = DNSSEC_INCOMPATIBLE_SERVER;
+ return 0;
+ }
+
+ r = dns_transaction_in_private_tld(t, rr->key);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ char s[DNS_RESOURCE_KEY_STRING_MAX];
+
+ /* The data is from a TLD that is proven not to exist, and we are in downgrade
+ * mode, hence ignore the fact that this was not signed. */
+
+ log_info("Detected RRset %s is in a private DNS zone, permitting unsigned RRs.",
+ dns_resource_key_to_string(rr->key, s, sizeof s));
+
+ r = dns_answer_move_by_key(validated, &t->answer, rr->key, 0);
+ if (r < 0)
+ return r;
+
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_INSECURE, rr->key);
+ return 1;
+ }
+ }
+
+ if (IN_SET(result,
+ DNSSEC_MISSING_KEY,
+ DNSSEC_SIGNATURE_EXPIRED,
+ DNSSEC_UNSUPPORTED_ALGORITHM)) {
+
+ r = dns_transaction_dnskey_authenticated(t, rr);
+ if (r < 0 && r != -ENXIO)
+ return r;
+ if (r == 0) {
+ /* The DNSKEY transaction was not authenticated, this means there's
+ * no DS for this, which means it's OK if no keys are found for this signature. */
+
+ r = dns_answer_move_by_key(validated, &t->answer, rr->key, 0);
+ if (r < 0)
+ return r;
+
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_INSECURE, rr->key);
+ return 1;
+ }
+ }
+
+ r = dns_transaction_is_primary_response(t, rr);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* Look for a matching DNAME for this CNAME */
+ r = dns_answer_has_dname_for_cname(t->answer, rr);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Also look among the stuff we already validated */
+ r = dns_answer_has_dname_for_cname(*validated, rr);
+ if (r < 0)
+ return r;
+ }
+
+ if (r == 0) {
+ if (IN_SET(result,
+ DNSSEC_INVALID,
+ DNSSEC_SIGNATURE_EXPIRED,
+ DNSSEC_NO_SIGNATURE))
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_BOGUS, rr->key);
+ else /* DNSSEC_MISSING_KEY or DNSSEC_UNSUPPORTED_ALGORITHM */
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_INDETERMINATE, rr->key);
+
+ /* This is a primary response to our question, and it failed validation.
+ * That's fatal. */
+ t->answer_dnssec_result = result;
+ return 0;
+ }
+
+ /* This is a primary response, but we do have a DNAME RR
+ * in the RR that can replay this CNAME, hence rely on
+ * that, and we can remove the CNAME in favour of it. */
+ }
+
+ /* This is just some auxiliary data. Just remove the RRset and continue. */
+ r = dns_answer_remove_by_key(&t->answer, rr->key);
+ if (r < 0)
+ return r;
+
+ /* We dropped something from the answer, start from the beginning. */
+ return 1;
+ }
+
+ return 2; /* Finito. */
+}
+
+int dns_transaction_validate_dnssec(DnsTransaction *t) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *validated = NULL;
+ Phase phase;
+ DnsAnswerFlags flags;
+ int r;
+ char key_str[DNS_RESOURCE_KEY_STRING_MAX];
+
+ assert(t);
+
+ /* We have now collected all DS and DNSKEY RRs in
+ * t->validated_keys, let's see which RRs we can now
+ * authenticate with that. */
+
+ if (t->scope->dnssec_mode == DNSSEC_NO)
+ return 0;
+
+ /* Already validated */
+ if (t->answer_dnssec_result != _DNSSEC_RESULT_INVALID)
+ return 0;
+
+ /* Our own stuff needs no validation */
+ if (IN_SET(t->answer_source, DNS_TRANSACTION_ZONE, DNS_TRANSACTION_TRUST_ANCHOR)) {
+ t->answer_dnssec_result = DNSSEC_VALIDATED;
+ t->answer_authenticated = true;
+ return 0;
+ }
+
+ /* Cached stuff is not affected by validation. */
+ if (t->answer_source != DNS_TRANSACTION_NETWORK)
+ return 0;
+
+ if (!dns_transaction_dnssec_supported_full(t)) {
+ /* The server does not support DNSSEC, or doesn't augment responses with RRSIGs. */
+ t->answer_dnssec_result = DNSSEC_INCOMPATIBLE_SERVER;
+ log_debug("Not validating response for %" PRIu16 ", used server feature level does not support DNSSEC.", t->id);
+ return 0;
+ }
+
+ log_debug("Validating response from transaction %" PRIu16 " (%s).",
+ t->id,
+ dns_resource_key_to_string(t->key, key_str, sizeof key_str));
+
+ /* First, see if this response contains any revoked trust
+ * anchors we care about */
+ r = dns_transaction_check_revoked_trust_anchors(t);
+ if (r < 0)
+ return r;
+
+ /* Third, copy all RRs we acquired successfully from auxiliary RRs over. */
+ r = dns_transaction_copy_validated(t);
+ if (r < 0)
+ return r;
+
+ /* Second, see if there are DNSKEYs we already know a
+ * validated DS for. */
+ r = dns_transaction_validate_dnskey_by_ds(t);
+ if (r < 0)
+ return r;
+
+ /* Fourth, remove all DNSKEY and DS RRs again that our trust
+ * anchor says are revoked. After all we might have marked
+ * some keys revoked above, but they might still be lingering
+ * in our validated_keys list. */
+ r = dns_transaction_invalidate_revoked_keys(t);
+ if (r < 0)
+ return r;
+
+ phase = DNSSEC_PHASE_DNSKEY;
+ for (;;) {
+ bool have_nsec = false;
+
+ r = dnssec_validate_records(t, phase, &have_nsec, &validated);
+ if (r <= 0)
+ return r;
+
+ /* Try again as long as we managed to achieve something */
+ if (r == 1)
+ continue;
+
+ if (phase == DNSSEC_PHASE_DNSKEY && have_nsec) {
+ /* OK, we processed all DNSKEYs, and there are NSEC/NSEC3 RRs, look at those now. */
+ phase = DNSSEC_PHASE_NSEC;
+ continue;
+ }
+
+ if (phase != DNSSEC_PHASE_ALL) {
+ /* OK, we processed all DNSKEYs and NSEC/NSEC3 RRs, look at all the rest now.
+ * Note that in this third phase we start to remove RRs we couldn't validate. */
+ phase = DNSSEC_PHASE_ALL;
+ continue;
+ }
+
+ /* We're done */
+ break;
+ }
+
+ dns_answer_unref(t->answer);
+ t->answer = TAKE_PTR(validated);
+
+ /* At this point the answer only contains validated
+ * RRsets. Now, let's see if it actually answers the question
+ * we asked. If so, great! If it doesn't, then see if
+ * NSEC/NSEC3 can prove this. */
+ r = dns_transaction_has_positive_answer(t, &flags);
+ if (r > 0) {
+ /* Yes, it answers the question! */
+
+ if (flags & DNS_ANSWER_AUTHENTICATED) {
+ /* The answer is fully authenticated, yay. */
+ t->answer_dnssec_result = DNSSEC_VALIDATED;
+ t->answer_rcode = DNS_RCODE_SUCCESS;
+ t->answer_authenticated = true;
+ } else {
+ /* The answer is not fully authenticated. */
+ t->answer_dnssec_result = DNSSEC_UNSIGNED;
+ t->answer_authenticated = false;
+ }
+
+ } else if (r == 0) {
+ DnssecNsecResult nr;
+ bool authenticated = false;
+
+ /* Bummer! Let's check NSEC/NSEC3 */
+ r = dnssec_nsec_test(t->answer, t->key, &nr, &authenticated, &t->answer_nsec_ttl);
+ if (r < 0)
+ return r;
+
+ switch (nr) {
+
+ case DNSSEC_NSEC_NXDOMAIN:
+ /* NSEC proves the domain doesn't exist. Very good. */
+ log_debug("Proved NXDOMAIN via NSEC/NSEC3 for transaction %u (%s)", t->id, key_str);
+ t->answer_dnssec_result = DNSSEC_VALIDATED;
+ t->answer_rcode = DNS_RCODE_NXDOMAIN;
+ t->answer_authenticated = authenticated;
+
+ manager_dnssec_verdict(t->scope->manager, authenticated ? DNSSEC_SECURE : DNSSEC_INSECURE, t->key);
+ break;
+
+ case DNSSEC_NSEC_NODATA:
+ /* NSEC proves that there's no data here, very good. */
+ log_debug("Proved NODATA via NSEC/NSEC3 for transaction %u (%s)", t->id, key_str);
+ t->answer_dnssec_result = DNSSEC_VALIDATED;
+ t->answer_rcode = DNS_RCODE_SUCCESS;
+ t->answer_authenticated = authenticated;
+
+ manager_dnssec_verdict(t->scope->manager, authenticated ? DNSSEC_SECURE : DNSSEC_INSECURE, t->key);
+ break;
+
+ case DNSSEC_NSEC_OPTOUT:
+ /* NSEC3 says the data might not be signed */
+ log_debug("Data is NSEC3 opt-out via NSEC/NSEC3 for transaction %u (%s)", t->id, key_str);
+ t->answer_dnssec_result = DNSSEC_UNSIGNED;
+ t->answer_authenticated = false;
+
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_INSECURE, t->key);
+ break;
+
+ case DNSSEC_NSEC_NO_RR:
+ /* No NSEC data? Bummer! */
+
+ r = dns_transaction_requires_nsec(t);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ t->answer_dnssec_result = DNSSEC_NO_SIGNATURE;
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_BOGUS, t->key);
+ } else {
+ t->answer_dnssec_result = DNSSEC_UNSIGNED;
+ t->answer_authenticated = false;
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_INSECURE, t->key);
+ }
+
+ break;
+
+ case DNSSEC_NSEC_UNSUPPORTED_ALGORITHM:
+ /* We don't know the NSEC3 algorithm used? */
+ t->answer_dnssec_result = DNSSEC_UNSUPPORTED_ALGORITHM;
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_INDETERMINATE, t->key);
+ break;
+
+ case DNSSEC_NSEC_FOUND:
+ case DNSSEC_NSEC_CNAME:
+ /* NSEC says it needs to be there, but we couldn't find it? Bummer! */
+ t->answer_dnssec_result = DNSSEC_NSEC_MISMATCH;
+ manager_dnssec_verdict(t->scope->manager, DNSSEC_BOGUS, t->key);
+ break;
+
+ default:
+ assert_not_reached("Unexpected NSEC result.");
+ }
+ }
+
+ return 1;
+}
+
+static const char* const dns_transaction_state_table[_DNS_TRANSACTION_STATE_MAX] = {
+ [DNS_TRANSACTION_NULL] = "null",
+ [DNS_TRANSACTION_PENDING] = "pending",
+ [DNS_TRANSACTION_VALIDATING] = "validating",
+ [DNS_TRANSACTION_RCODE_FAILURE] = "rcode-failure",
+ [DNS_TRANSACTION_SUCCESS] = "success",
+ [DNS_TRANSACTION_NO_SERVERS] = "no-servers",
+ [DNS_TRANSACTION_TIMEOUT] = "timeout",
+ [DNS_TRANSACTION_ATTEMPTS_MAX_REACHED] = "attempts-max-reached",
+ [DNS_TRANSACTION_INVALID_REPLY] = "invalid-reply",
+ [DNS_TRANSACTION_ERRNO] = "errno",
+ [DNS_TRANSACTION_ABORTED] = "aborted",
+ [DNS_TRANSACTION_DNSSEC_FAILED] = "dnssec-failed",
+ [DNS_TRANSACTION_NO_TRUST_ANCHOR] = "no-trust-anchor",
+ [DNS_TRANSACTION_RR_TYPE_UNSUPPORTED] = "rr-type-unsupported",
+ [DNS_TRANSACTION_NETWORK_DOWN] = "network-down",
+ [DNS_TRANSACTION_NOT_FOUND] = "not-found",
+};
+DEFINE_STRING_TABLE_LOOKUP(dns_transaction_state, DnsTransactionState);
+
+static const char* const dns_transaction_source_table[_DNS_TRANSACTION_SOURCE_MAX] = {
+ [DNS_TRANSACTION_NETWORK] = "network",
+ [DNS_TRANSACTION_CACHE] = "cache",
+ [DNS_TRANSACTION_ZONE] = "zone",
+ [DNS_TRANSACTION_TRUST_ANCHOR] = "trust-anchor",
+};
+DEFINE_STRING_TABLE_LOOKUP(dns_transaction_source, DnsTransactionSource);
diff --git a/src/resolve/resolved-dns-transaction.h b/src/resolve/resolved-dns-transaction.h
new file mode 100644
index 0000000..88b0d8e
--- /dev/null
+++ b/src/resolve/resolved-dns-transaction.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-event.h"
+
+typedef struct DnsTransaction DnsTransaction;
+typedef enum DnsTransactionState DnsTransactionState;
+typedef enum DnsTransactionSource DnsTransactionSource;
+
+#include "resolved-dns-answer.h"
+#include "resolved-dns-dnssec.h"
+#include "resolved-dns-packet.h"
+#include "resolved-dns-question.h"
+#include "resolved-dns-server.h"
+
+enum DnsTransactionState {
+ DNS_TRANSACTION_NULL,
+ DNS_TRANSACTION_PENDING,
+ DNS_TRANSACTION_VALIDATING,
+ DNS_TRANSACTION_RCODE_FAILURE,
+ DNS_TRANSACTION_SUCCESS,
+ DNS_TRANSACTION_NO_SERVERS,
+ DNS_TRANSACTION_TIMEOUT,
+ DNS_TRANSACTION_ATTEMPTS_MAX_REACHED,
+ DNS_TRANSACTION_INVALID_REPLY,
+ DNS_TRANSACTION_ERRNO,
+ DNS_TRANSACTION_ABORTED,
+ DNS_TRANSACTION_DNSSEC_FAILED,
+ DNS_TRANSACTION_NO_TRUST_ANCHOR,
+ DNS_TRANSACTION_RR_TYPE_UNSUPPORTED,
+ DNS_TRANSACTION_NETWORK_DOWN,
+ DNS_TRANSACTION_NOT_FOUND, /* like NXDOMAIN, but when LLMNR/TCP connections fail */
+ _DNS_TRANSACTION_STATE_MAX,
+ _DNS_TRANSACTION_STATE_INVALID = -1
+};
+
+#define DNS_TRANSACTION_IS_LIVE(state) IN_SET((state), DNS_TRANSACTION_NULL, DNS_TRANSACTION_PENDING, DNS_TRANSACTION_VALIDATING)
+
+enum DnsTransactionSource {
+ DNS_TRANSACTION_NETWORK,
+ DNS_TRANSACTION_CACHE,
+ DNS_TRANSACTION_ZONE,
+ DNS_TRANSACTION_TRUST_ANCHOR,
+ _DNS_TRANSACTION_SOURCE_MAX,
+ _DNS_TRANSACTION_SOURCE_INVALID = -1
+};
+
+struct DnsTransaction {
+ DnsScope *scope;
+
+ DnsResourceKey *key;
+
+ DnsTransactionState state;
+
+ uint16_t id;
+
+ bool tried_stream:1;
+
+ bool initial_jitter_scheduled:1;
+ bool initial_jitter_elapsed:1;
+
+ bool clamp_ttl:1;
+
+ bool probing:1;
+
+ DnsPacket *sent, *received;
+
+ DnsAnswer *answer;
+ int answer_rcode;
+ DnssecResult answer_dnssec_result;
+ DnsTransactionSource answer_source;
+ uint32_t answer_nsec_ttl;
+ int answer_errno; /* if state is DNS_TRANSACTION_ERRNO */
+
+ /* Indicates whether the primary answer is authenticated,
+ * i.e. whether the RRs from answer which directly match the
+ * question are authenticated, or, if there are none, whether
+ * the NODATA or NXDOMAIN case is. It says nothing about
+ * additional RRs listed in the answer, however they have
+ * their own DNS_ANSWER_AUTHORIZED FLAGS. Note that this bit
+ * is defined different than the AD bit in DNS packets, as
+ * that covers more than just the actual primary answer. */
+ bool answer_authenticated;
+
+ /* Contains DNSKEY, DS, SOA RRs we already verified and need
+ * to authenticate this reply */
+ DnsAnswer *validated_keys;
+
+ usec_t start_usec;
+ usec_t next_attempt_after;
+ sd_event_source *timeout_event_source;
+ unsigned n_attempts;
+
+ unsigned n_picked_servers;
+
+ /* UDP connection logic, if we need it */
+ int dns_udp_fd;
+ sd_event_source *dns_udp_event_source;
+
+ /* TCP connection logic, if we need it */
+ DnsStream *stream;
+
+ /* The active server */
+ DnsServer *server;
+
+ /* The features of the DNS server at time of transaction start */
+ DnsServerFeatureLevel current_feature_level;
+
+ /* If we got SERVFAIL back, we retry the lookup, using a lower feature level than we used before. */
+ DnsServerFeatureLevel clamp_feature_level;
+
+ /* Query candidates this transaction is referenced by and that
+ * shall be notified about this specific transaction
+ * completing. */
+ Set *notify_query_candidates, *notify_query_candidates_done;
+
+ /* Zone items this transaction is referenced by and that shall
+ * be notified about completion. */
+ Set *notify_zone_items, *notify_zone_items_done;
+
+ /* Other transactions that this transactions is referenced by
+ * and that shall be notified about completion. This is used
+ * when transactions want to validate their RRsets, but need
+ * another DNSKEY or DS RR to do so. */
+ Set *notify_transactions, *notify_transactions_done;
+
+ /* The opposite direction: the transactions this transaction
+ * created in order to request DNSKEY or DS RRs. */
+ Set *dnssec_transactions;
+
+ unsigned block_gc;
+
+ LIST_FIELDS(DnsTransaction, transactions_by_scope);
+ LIST_FIELDS(DnsTransaction, transactions_by_stream);
+};
+
+int dns_transaction_new(DnsTransaction **ret, DnsScope *s, DnsResourceKey *key);
+DnsTransaction* dns_transaction_free(DnsTransaction *t);
+
+bool dns_transaction_gc(DnsTransaction *t);
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsTransaction*, dns_transaction_gc);
+
+int dns_transaction_go(DnsTransaction *t);
+
+void dns_transaction_process_reply(DnsTransaction *t, DnsPacket *p);
+void dns_transaction_complete(DnsTransaction *t, DnsTransactionState state);
+
+void dns_transaction_notify(DnsTransaction *t, DnsTransaction *source);
+int dns_transaction_validate_dnssec(DnsTransaction *t);
+int dns_transaction_request_dnssec_keys(DnsTransaction *t);
+
+const char* dns_transaction_state_to_string(DnsTransactionState p) _const_;
+DnsTransactionState dns_transaction_state_from_string(const char *s) _pure_;
+
+const char* dns_transaction_source_to_string(DnsTransactionSource p) _const_;
+DnsTransactionSource dns_transaction_source_from_string(const char *s) _pure_;
+
+/* LLMNR Jitter interval, see RFC 4795 Section 7 */
+#define LLMNR_JITTER_INTERVAL_USEC (100 * USEC_PER_MSEC)
+
+/* mDNS Jitter interval, see RFC 6762 Section 5.2 */
+#define MDNS_JITTER_MIN_USEC (20 * USEC_PER_MSEC)
+#define MDNS_JITTER_RANGE_USEC (100 * USEC_PER_MSEC)
+
+/* mDNS probing interval, see RFC 6762 Section 8.1 */
+#define MDNS_PROBING_INTERVAL_USEC (250 * USEC_PER_MSEC)
+
+/* Maximum attempts to send DNS requests, across all DNS servers */
+#define DNS_TRANSACTION_ATTEMPTS_MAX 24
+
+/* Maximum attempts to send LLMNR requests, see RFC 4795 Section 2.7 */
+#define LLMNR_TRANSACTION_ATTEMPTS_MAX 3
+
+/* Maximum attempts to send MDNS requests, see RFC 6762 Section 8.1 */
+#define MDNS_TRANSACTION_ATTEMPTS_MAX 3
+
+#define TRANSACTION_ATTEMPTS_MAX(p) (((p) == DNS_PROTOCOL_LLMNR) ? \
+ LLMNR_TRANSACTION_ATTEMPTS_MAX : \
+ (((p) == DNS_PROTOCOL_MDNS) ? \
+ MDNS_TRANSACTION_ATTEMPTS_MAX : \
+ DNS_TRANSACTION_ATTEMPTS_MAX))
diff --git a/src/resolve/resolved-dns-trust-anchor.c b/src/resolve/resolved-dns-trust-anchor.c
new file mode 100644
index 0000000..3e5d255
--- /dev/null
+++ b/src/resolve/resolved-dns-trust-anchor.c
@@ -0,0 +1,770 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "def.h"
+#include "dns-domain.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hexdecoct.h"
+#include "nulstr-util.h"
+#include "parse-util.h"
+#include "resolved-dns-dnssec.h"
+#include "resolved-dns-trust-anchor.h"
+#include "set.h"
+#include "sort-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+static const char trust_anchor_dirs[] = CONF_PATHS_NULSTR("dnssec-trust-anchors.d");
+
+/* The second DS RR from https://data.iana.org/root-anchors/root-anchors.xml, retrieved February 2017 */
+static const uint8_t root_digest2[] =
+ { 0xE0, 0x6D, 0x44, 0xB8, 0x0B, 0x8F, 0x1D, 0x39, 0xA9, 0x5C, 0x0B, 0x0D, 0x7C, 0x65, 0xD0, 0x84,
+ 0x58, 0xE8, 0x80, 0x40, 0x9B, 0xBC, 0x68, 0x34, 0x57, 0x10, 0x42, 0x37, 0xC7, 0xF8, 0xEC, 0x8D };
+
+static bool dns_trust_anchor_knows_domain_positive(DnsTrustAnchor *d, const char *name) {
+ assert(d);
+
+ /* Returns true if there's an entry for the specified domain
+ * name in our trust anchor */
+
+ return
+ hashmap_contains(d->positive_by_key, &DNS_RESOURCE_KEY_CONST(DNS_CLASS_IN, DNS_TYPE_DNSKEY, name)) ||
+ hashmap_contains(d->positive_by_key, &DNS_RESOURCE_KEY_CONST(DNS_CLASS_IN, DNS_TYPE_DS, name));
+}
+
+static int add_root_ksk(
+ DnsAnswer *answer,
+ DnsResourceKey *key,
+ uint16_t key_tag,
+ uint8_t algorithm,
+ uint8_t digest_type,
+ const void *digest,
+ size_t digest_size) {
+
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+ int r;
+
+ rr = dns_resource_record_new(key);
+ if (!rr)
+ return -ENOMEM;
+
+ rr->ds.key_tag = key_tag;
+ rr->ds.algorithm = algorithm;
+ rr->ds.digest_type = digest_type;
+ rr->ds.digest_size = digest_size;
+ rr->ds.digest = memdup(digest, rr->ds.digest_size);
+ if (!rr->ds.digest)
+ return -ENOMEM;
+
+ r = dns_answer_add(answer, rr, 0, DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int dns_trust_anchor_add_builtin_positive(DnsTrustAnchor *d) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+ int r;
+
+ assert(d);
+
+ r = hashmap_ensure_allocated(&d->positive_by_key, &dns_resource_key_hash_ops);
+ if (r < 0)
+ return r;
+
+ /* Only add the built-in trust anchor if there's neither a DS nor a DNSKEY defined for the root domain. That
+ * way users have an easy way to override the root domain DS/DNSKEY data. */
+ if (dns_trust_anchor_knows_domain_positive(d, "."))
+ return 0;
+
+ key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_DS, "");
+ if (!key)
+ return -ENOMEM;
+
+ answer = dns_answer_new(2);
+ if (!answer)
+ return -ENOMEM;
+
+ /* Add the currently valid RRs from https://data.iana.org/root-anchors/root-anchors.xml */
+ r = add_root_ksk(answer, key, 20326, DNSSEC_ALGORITHM_RSASHA256, DNSSEC_DIGEST_SHA256, root_digest2, sizeof(root_digest2));
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(d->positive_by_key, key, answer);
+ if (r < 0)
+ return r;
+
+ answer = NULL;
+ return 0;
+}
+
+static int dns_trust_anchor_add_builtin_negative(DnsTrustAnchor *d) {
+
+ static const char private_domains[] =
+ /* RFC 6761 says that .test is a special domain for
+ * testing and not to be installed in the root zone */
+ "test\0"
+
+ /* RFC 6761 says that these reverse IP lookup ranges
+ * are for private addresses, and hence should not
+ * show up in the root zone */
+ "10.in-addr.arpa\0"
+ "16.172.in-addr.arpa\0"
+ "17.172.in-addr.arpa\0"
+ "18.172.in-addr.arpa\0"
+ "19.172.in-addr.arpa\0"
+ "20.172.in-addr.arpa\0"
+ "21.172.in-addr.arpa\0"
+ "22.172.in-addr.arpa\0"
+ "23.172.in-addr.arpa\0"
+ "24.172.in-addr.arpa\0"
+ "25.172.in-addr.arpa\0"
+ "26.172.in-addr.arpa\0"
+ "27.172.in-addr.arpa\0"
+ "28.172.in-addr.arpa\0"
+ "29.172.in-addr.arpa\0"
+ "30.172.in-addr.arpa\0"
+ "31.172.in-addr.arpa\0"
+ "168.192.in-addr.arpa\0"
+
+ /* The same, but for IPv6. */
+ "d.f.ip6.arpa\0"
+
+ /* RFC 6762 reserves the .local domain for Multicast
+ * DNS, it hence cannot appear in the root zone. (Note
+ * that we by default do not route .local traffic to
+ * DNS anyway, except when a configured search domain
+ * suggests so.) */
+ "local\0"
+
+ /* These two are well known, popular private zone
+ * TLDs, that are blocked from delegation, according
+ * to:
+ * http://icannwiki.com/Name_Collision#NGPC_Resolution
+ *
+ * There's also ongoing work on making this official
+ * in an RRC:
+ * https://www.ietf.org/archive/id/draft-chapin-additional-reserved-tlds-02.txt */
+ "home\0"
+ "corp\0"
+
+ /* The following four TLDs are suggested for private
+ * zones in RFC 6762, Appendix G, and are hence very
+ * unlikely to be made official TLDs any day soon */
+ "lan\0"
+ "intranet\0"
+ "internal\0"
+ "private\0";
+
+ const char *name;
+ int r;
+
+ assert(d);
+
+ /* Only add the built-in trust anchor if there's no negative
+ * trust anchor defined at all. This enables easy overriding
+ * of negative trust anchors. */
+
+ if (set_size(d->negative_by_name) > 0)
+ return 0;
+
+ r = set_ensure_allocated(&d->negative_by_name, &dns_name_hash_ops);
+ if (r < 0)
+ return r;
+
+ /* We add a couple of domains as default negative trust
+ * anchors, where it's very unlikely they will be installed in
+ * the root zone. If they exist they must be private, and thus
+ * unsigned. */
+
+ NULSTR_FOREACH(name, private_domains) {
+ if (dns_trust_anchor_knows_domain_positive(d, name))
+ continue;
+
+ r = set_put_strdup(&d->negative_by_name, name);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int dns_trust_anchor_load_positive(DnsTrustAnchor *d, const char *path, unsigned line, const char *s) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+ _cleanup_free_ char *domain = NULL, *class = NULL, *type = NULL;
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ DnsAnswer *old_answer = NULL;
+ const char *p = s;
+ int r;
+
+ assert(d);
+ assert(line);
+
+ r = extract_first_word(&p, &domain, NULL, EXTRACT_UNQUOTE);
+ if (r < 0)
+ return log_warning_errno(r, "Unable to parse domain in line %s:%u: %m", path, line);
+
+ r = dns_name_is_valid(domain);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to check validity of domain name '%s', at line %s:%u, ignoring line: %m", domain, path, line);
+ if (r == 0) {
+ log_warning("Domain name %s is invalid, at line %s:%u, ignoring line.", domain, path, line);
+ return -EINVAL;
+ }
+
+ r = extract_many_words(&p, NULL, 0, &class, &type, NULL);
+ if (r < 0)
+ return log_warning_errno(r, "Unable to parse class and type in line %s:%u: %m", path, line);
+ if (r != 2) {
+ log_warning("Missing class or type in line %s:%u", path, line);
+ return -EINVAL;
+ }
+
+ if (!strcaseeq(class, "IN")) {
+ log_warning("RR class %s is not supported, ignoring line %s:%u.", class, path, line);
+ return -EINVAL;
+ }
+
+ if (strcaseeq(type, "DS")) {
+ _cleanup_free_ char *key_tag = NULL, *algorithm = NULL, *digest_type = NULL;
+ _cleanup_free_ void *dd = NULL;
+ uint16_t kt;
+ int a, dt;
+ size_t l;
+
+ r = extract_many_words(&p, NULL, 0, &key_tag, &algorithm, &digest_type, NULL);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse DS parameters on line %s:%u: %m", path, line);
+ return -EINVAL;
+ }
+ if (r != 3) {
+ log_warning("Missing DS parameters on line %s:%u", path, line);
+ return -EINVAL;
+ }
+
+ r = safe_atou16(key_tag, &kt);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse DS key tag %s on line %s:%u: %m", key_tag, path, line);
+
+ a = dnssec_algorithm_from_string(algorithm);
+ if (a < 0) {
+ log_warning("Failed to parse DS algorithm %s on line %s:%u", algorithm, path, line);
+ return -EINVAL;
+ }
+
+ dt = dnssec_digest_from_string(digest_type);
+ if (dt < 0) {
+ log_warning("Failed to parse DS digest type %s on line %s:%u", digest_type, path, line);
+ return -EINVAL;
+ }
+
+ if (isempty(p)) {
+ log_warning("Missing DS digest on line %s:%u", path, line);
+ return -EINVAL;
+ }
+
+ r = unhexmem(p, strlen(p), &dd, &l);
+ if (r < 0) {
+ log_warning("Failed to parse DS digest %s on line %s:%u", p, path, line);
+ return -EINVAL;
+ }
+
+ rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DS, domain);
+ if (!rr)
+ return log_oom();
+
+ rr->ds.key_tag = kt;
+ rr->ds.algorithm = a;
+ rr->ds.digest_type = dt;
+ rr->ds.digest_size = l;
+ rr->ds.digest = TAKE_PTR(dd);
+
+ } else if (strcaseeq(type, "DNSKEY")) {
+ _cleanup_free_ char *flags = NULL, *protocol = NULL, *algorithm = NULL;
+ _cleanup_free_ void *k = NULL;
+ uint16_t f;
+ size_t l;
+ int a;
+
+ r = extract_many_words(&p, NULL, 0, &flags, &protocol, &algorithm, NULL);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse DNSKEY parameters on line %s:%u: %m", path, line);
+ if (r != 3) {
+ log_warning("Missing DNSKEY parameters on line %s:%u", path, line);
+ return -EINVAL;
+ }
+
+ if (!streq(protocol, "3")) {
+ log_warning("DNSKEY Protocol is not 3 on line %s:%u", path, line);
+ return -EINVAL;
+ }
+
+ r = safe_atou16(flags, &f);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse DNSKEY flags field %s on line %s:%u", flags, path, line);
+ if ((f & DNSKEY_FLAG_ZONE_KEY) == 0) {
+ log_warning("DNSKEY lacks zone key bit set on line %s:%u", path, line);
+ return -EINVAL;
+ }
+ if ((f & DNSKEY_FLAG_REVOKE)) {
+ log_warning("DNSKEY is already revoked on line %s:%u", path, line);
+ return -EINVAL;
+ }
+
+ a = dnssec_algorithm_from_string(algorithm);
+ if (a < 0) {
+ log_warning("Failed to parse DNSKEY algorithm %s on line %s:%u", algorithm, path, line);
+ return -EINVAL;
+ }
+
+ if (isempty(p)) {
+ log_warning("Missing DNSKEY key on line %s:%u", path, line);
+ return -EINVAL;
+ }
+
+ r = unbase64mem(p, strlen(p), &k, &l);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse DNSKEY key data %s on line %s:%u", p, path, line);
+
+ rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DNSKEY, domain);
+ if (!rr)
+ return log_oom();
+
+ rr->dnskey.flags = f;
+ rr->dnskey.protocol = 3;
+ rr->dnskey.algorithm = a;
+ rr->dnskey.key_size = l;
+ rr->dnskey.key = TAKE_PTR(k);
+
+ } else {
+ log_warning("RR type %s is not supported, ignoring line %s:%u.", type, path, line);
+ return -EINVAL;
+ }
+
+ r = hashmap_ensure_allocated(&d->positive_by_key, &dns_resource_key_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ old_answer = hashmap_get(d->positive_by_key, rr->key);
+ answer = dns_answer_ref(old_answer);
+
+ r = dns_answer_add_extend(&answer, rr, 0, DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add trust anchor RR: %m");
+
+ r = hashmap_replace(d->positive_by_key, rr->key, answer);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add answer to trust anchor: %m");
+
+ old_answer = dns_answer_unref(old_answer);
+ answer = NULL;
+
+ return 0;
+}
+
+static int dns_trust_anchor_load_negative(DnsTrustAnchor *d, const char *path, unsigned line, const char *s) {
+ _cleanup_free_ char *domain = NULL;
+ const char *p = s;
+ int r;
+
+ assert(d);
+ assert(line);
+
+ r = extract_first_word(&p, &domain, NULL, EXTRACT_UNQUOTE);
+ if (r < 0)
+ return log_warning_errno(r, "Unable to parse line %s:%u: %m", path, line);
+
+ r = dns_name_is_valid(domain);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to check validity of domain name '%s', at line %s:%u, ignoring line: %m", domain, path, line);
+ if (r == 0) {
+ log_warning("Domain name %s is invalid, at line %s:%u, ignoring line.", domain, path, line);
+ return -EINVAL;
+ }
+
+ if (!isempty(p)) {
+ log_warning("Trailing garbage at line %s:%u, ignoring line.", path, line);
+ return -EINVAL;
+ }
+
+ r = set_ensure_consume(&d->negative_by_name, &dns_name_hash_ops, TAKE_PTR(domain));
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+static int dns_trust_anchor_load_files(
+ DnsTrustAnchor *d,
+ const char *suffix,
+ int (*loader)(DnsTrustAnchor *d, const char *path, unsigned n, const char *line)) {
+
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+ int r;
+
+ assert(d);
+ assert(suffix);
+ assert(loader);
+
+ r = conf_files_list_nulstr(&files, suffix, NULL, 0, trust_anchor_dirs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate %s trust anchor files: %m", suffix);
+
+ STRV_FOREACH(f, files) {
+ _cleanup_fclose_ FILE *g = NULL;
+ unsigned n = 0;
+
+ g = fopen(*f, "r");
+ if (!g) {
+ if (errno == ENOENT)
+ continue;
+
+ log_warning_errno(errno, "Failed to open '%s', ignoring: %m", *f);
+ continue;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l;
+
+ r = read_line(g, LONG_LINE_MAX, &line);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to read '%s', ignoring: %m", *f);
+ break;
+ }
+ if (r == 0)
+ break;
+
+ n++;
+
+ l = strstrip(line);
+ if (isempty(l))
+ continue;
+
+ if (*l == ';')
+ continue;
+
+ (void) loader(d, *f, n, l);
+ }
+ }
+
+ return 0;
+}
+
+static int domain_name_cmp(char * const *a, char * const *b) {
+ return dns_name_compare_func(*a, *b);
+}
+
+static int dns_trust_anchor_dump(DnsTrustAnchor *d) {
+ DnsAnswer *a;
+
+ assert(d);
+
+ if (hashmap_isempty(d->positive_by_key))
+ log_info("No positive trust anchors defined.");
+ else {
+ log_info("Positive Trust Anchors:");
+ HASHMAP_FOREACH(a, d->positive_by_key) {
+ DnsResourceRecord *rr;
+
+ DNS_ANSWER_FOREACH(rr, a)
+ log_info("%s", dns_resource_record_to_string(rr));
+ }
+ }
+
+ if (set_isempty(d->negative_by_name))
+ log_info("No negative trust anchors defined.");
+ else {
+ _cleanup_free_ char **l = NULL, *j = NULL;
+
+ l = set_get_strv(d->negative_by_name);
+ if (!l)
+ return log_oom();
+
+ typesafe_qsort(l, set_size(d->negative_by_name), domain_name_cmp);
+
+ j = strv_join(l, " ");
+ if (!j)
+ return log_oom();
+
+ log_info("Negative trust anchors: %s", j);
+ }
+
+ return 0;
+}
+
+int dns_trust_anchor_load(DnsTrustAnchor *d) {
+ int r;
+
+ assert(d);
+
+ /* If loading things from disk fails, we don't consider this fatal */
+ (void) dns_trust_anchor_load_files(d, ".positive", dns_trust_anchor_load_positive);
+ (void) dns_trust_anchor_load_files(d, ".negative", dns_trust_anchor_load_negative);
+
+ /* However, if the built-in DS fails, then we have a problem. */
+ r = dns_trust_anchor_add_builtin_positive(d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add built-in positive trust anchor: %m");
+
+ r = dns_trust_anchor_add_builtin_negative(d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add built-in negative trust anchor: %m");
+
+ dns_trust_anchor_dump(d);
+
+ return 0;
+}
+
+void dns_trust_anchor_flush(DnsTrustAnchor *d) {
+ assert(d);
+
+ d->positive_by_key = hashmap_free_with_destructor(d->positive_by_key, dns_answer_unref);
+ d->revoked_by_rr = set_free_with_destructor(d->revoked_by_rr, dns_resource_record_unref);
+ d->negative_by_name = set_free_free(d->negative_by_name);
+}
+
+int dns_trust_anchor_lookup_positive(DnsTrustAnchor *d, const DnsResourceKey *key, DnsAnswer **ret) {
+ DnsAnswer *a;
+
+ assert(d);
+ assert(key);
+ assert(ret);
+
+ /* We only serve DS and DNSKEY RRs. */
+ if (!IN_SET(key->type, DNS_TYPE_DS, DNS_TYPE_DNSKEY))
+ return 0;
+
+ a = hashmap_get(d->positive_by_key, key);
+ if (!a)
+ return 0;
+
+ *ret = dns_answer_ref(a);
+ return 1;
+}
+
+int dns_trust_anchor_lookup_negative(DnsTrustAnchor *d, const char *name) {
+ int r;
+
+ assert(d);
+ assert(name);
+
+ for (;;) {
+ /* If the domain is listed as-is in the NTA database, then that counts */
+ if (set_contains(d->negative_by_name, name))
+ return true;
+
+ /* If the domain isn't listed as NTA, but is listed as positive trust anchor, then that counts. See RFC
+ * 7646, section 1.1 */
+ if (hashmap_contains(d->positive_by_key, &DNS_RESOURCE_KEY_CONST(DNS_CLASS_IN, DNS_TYPE_DS, name)))
+ return false;
+
+ if (hashmap_contains(d->positive_by_key, &DNS_RESOURCE_KEY_CONST(DNS_CLASS_IN, DNS_TYPE_KEY, name)))
+ return false;
+
+ /* And now, let's look at the parent, and check that too */
+ r = dns_name_parent(&name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+
+ return false;
+}
+
+static int dns_trust_anchor_revoked_put(DnsTrustAnchor *d, DnsResourceRecord *rr) {
+ int r;
+
+ assert(d);
+
+ r = set_ensure_put(&d->revoked_by_rr, &dns_resource_record_hash_ops, rr);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ dns_resource_record_ref(rr);
+
+ return r;
+}
+
+static int dns_trust_anchor_remove_revoked(DnsTrustAnchor *d, DnsResourceRecord *rr) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *new_answer = NULL;
+ DnsAnswer *old_answer;
+ int r;
+
+ /* Remember that this is a revoked trust anchor RR */
+ r = dns_trust_anchor_revoked_put(d, rr);
+ if (r < 0)
+ return r;
+
+ /* Remove this from the positive trust anchor */
+ old_answer = hashmap_get(d->positive_by_key, rr->key);
+ if (!old_answer)
+ return 0;
+
+ new_answer = dns_answer_ref(old_answer);
+
+ r = dns_answer_remove_by_rr(&new_answer, rr);
+ if (r <= 0)
+ return r;
+
+ /* We found the key! Warn the user */
+ log_struct(LOG_WARNING,
+ "MESSAGE_ID=" SD_MESSAGE_DNSSEC_TRUST_ANCHOR_REVOKED_STR,
+ LOG_MESSAGE("DNSSEC trust anchor %s has been revoked.\n"
+ "Please update the trust anchor, or upgrade your operating system.",
+ strna(dns_resource_record_to_string(rr))),
+ "TRUST_ANCHOR=%s", dns_resource_record_to_string(rr));
+
+ if (dns_answer_size(new_answer) <= 0) {
+ assert_se(hashmap_remove(d->positive_by_key, rr->key) == old_answer);
+ dns_answer_unref(old_answer);
+ return 1;
+ }
+
+ r = hashmap_replace(d->positive_by_key, new_answer->items[0].rr->key, new_answer);
+ if (r < 0)
+ return r;
+
+ new_answer = NULL;
+ dns_answer_unref(old_answer);
+ return 1;
+}
+
+static int dns_trust_anchor_check_revoked_one(DnsTrustAnchor *d, DnsResourceRecord *revoked_dnskey) {
+ DnsAnswer *a;
+ int r;
+
+ assert(d);
+ assert(revoked_dnskey);
+ assert(revoked_dnskey->key->type == DNS_TYPE_DNSKEY);
+ assert(revoked_dnskey->dnskey.flags & DNSKEY_FLAG_REVOKE);
+
+ a = hashmap_get(d->positive_by_key, revoked_dnskey->key);
+ if (a) {
+ DnsResourceRecord *anchor;
+
+ /* First, look for the precise DNSKEY in our trust anchor database */
+
+ DNS_ANSWER_FOREACH(anchor, a) {
+
+ if (anchor->dnskey.protocol != revoked_dnskey->dnskey.protocol)
+ continue;
+
+ if (anchor->dnskey.algorithm != revoked_dnskey->dnskey.algorithm)
+ continue;
+
+ if (anchor->dnskey.key_size != revoked_dnskey->dnskey.key_size)
+ continue;
+
+ /* Note that we allow the REVOKE bit to be
+ * different! It will be set in the revoked
+ * key, but unset in our version of it */
+ if (((anchor->dnskey.flags ^ revoked_dnskey->dnskey.flags) | DNSKEY_FLAG_REVOKE) != DNSKEY_FLAG_REVOKE)
+ continue;
+
+ if (memcmp(anchor->dnskey.key, revoked_dnskey->dnskey.key, anchor->dnskey.key_size) != 0)
+ continue;
+
+ dns_trust_anchor_remove_revoked(d, anchor);
+ break;
+ }
+ }
+
+ a = hashmap_get(d->positive_by_key, &DNS_RESOURCE_KEY_CONST(revoked_dnskey->key->class, DNS_TYPE_DS, dns_resource_key_name(revoked_dnskey->key)));
+ if (a) {
+ DnsResourceRecord *anchor;
+
+ /* Second, look for DS RRs matching this DNSKEY in our trust anchor database */
+
+ DNS_ANSWER_FOREACH(anchor, a) {
+
+ /* We set mask_revoke to true here, since our
+ * DS fingerprint will be the one of the
+ * unrevoked DNSKEY, but the one we got passed
+ * here has the bit set. */
+ r = dnssec_verify_dnskey_by_ds(revoked_dnskey, anchor, true);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ dns_trust_anchor_remove_revoked(d, anchor);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+int dns_trust_anchor_check_revoked(DnsTrustAnchor *d, DnsResourceRecord *dnskey, DnsAnswer *rrs) {
+ DnsResourceRecord *rrsig;
+ int r;
+
+ assert(d);
+ assert(dnskey);
+
+ /* Looks if "dnskey" is a self-signed RR that has been revoked
+ * and matches one of our trust anchor entries. If so, removes
+ * it from the trust anchor and returns > 0. */
+
+ if (dnskey->key->type != DNS_TYPE_DNSKEY)
+ return 0;
+
+ /* Is this DNSKEY revoked? */
+ if ((dnskey->dnskey.flags & DNSKEY_FLAG_REVOKE) == 0)
+ return 0;
+
+ /* Could this be interesting to us at all? If not,
+ * there's no point in looking for and verifying a
+ * self-signed RRSIG. */
+ if (!dns_trust_anchor_knows_domain_positive(d, dns_resource_key_name(dnskey->key)))
+ return 0;
+
+ /* Look for a self-signed RRSIG in the other rrs belonging to this DNSKEY */
+ DNS_ANSWER_FOREACH(rrsig, rrs) {
+ DnssecResult result;
+
+ if (rrsig->key->type != DNS_TYPE_RRSIG)
+ continue;
+
+ r = dnssec_rrsig_match_dnskey(rrsig, dnskey, true);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = dnssec_verify_rrset(rrs, dnskey->key, rrsig, dnskey, USEC_INFINITY, &result);
+ if (r < 0)
+ return r;
+ if (result != DNSSEC_VALIDATED)
+ continue;
+
+ /* Bingo! This is a revoked self-signed DNSKEY. Let's
+ * see if this precise one exists in our trust anchor
+ * database, too. */
+ r = dns_trust_anchor_check_revoked_one(d, dnskey);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int dns_trust_anchor_is_revoked(DnsTrustAnchor *d, DnsResourceRecord *rr) {
+ assert(d);
+
+ if (!IN_SET(rr->key->type, DNS_TYPE_DS, DNS_TYPE_DNSKEY))
+ return 0;
+
+ return set_contains(d->revoked_by_rr, rr);
+}
diff --git a/src/resolve/resolved-dns-trust-anchor.h b/src/resolve/resolved-dns-trust-anchor.h
new file mode 100644
index 0000000..14047ec
--- /dev/null
+++ b/src/resolve/resolved-dns-trust-anchor.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct DnsTrustAnchor DnsTrustAnchor;
+
+#include "hashmap.h"
+#include "resolved-dns-answer.h"
+#include "resolved-dns-rr.h"
+
+/* This contains a fixed database mapping domain names to DS or DNSKEY records. */
+
+struct DnsTrustAnchor {
+ Hashmap *positive_by_key;
+ Set *negative_by_name;
+ Set *revoked_by_rr;
+};
+
+int dns_trust_anchor_load(DnsTrustAnchor *d);
+void dns_trust_anchor_flush(DnsTrustAnchor *d);
+
+int dns_trust_anchor_lookup_positive(DnsTrustAnchor *d, const DnsResourceKey* key, DnsAnswer **answer);
+int dns_trust_anchor_lookup_negative(DnsTrustAnchor *d, const char *name);
+
+int dns_trust_anchor_check_revoked(DnsTrustAnchor *d, DnsResourceRecord *dnskey, DnsAnswer *rrs);
+int dns_trust_anchor_is_revoked(DnsTrustAnchor *d, DnsResourceRecord *rr);
diff --git a/src/resolve/resolved-dns-zone.c b/src/resolve/resolved-dns-zone.c
new file mode 100644
index 0000000..00eb672
--- /dev/null
+++ b/src/resolve/resolved-dns-zone.c
@@ -0,0 +1,696 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "list.h"
+#include "resolved-dns-packet.h"
+#include "resolved-dns-zone.h"
+#include "resolved-dnssd.h"
+#include "resolved-manager.h"
+#include "string-util.h"
+
+/* Never allow more than 1K entries */
+#define ZONE_MAX 1024
+
+void dns_zone_item_probe_stop(DnsZoneItem *i) {
+ DnsTransaction *t;
+ assert(i);
+
+ if (!i->probe_transaction)
+ return;
+
+ t = TAKE_PTR(i->probe_transaction);
+
+ set_remove(t->notify_zone_items, i);
+ set_remove(t->notify_zone_items_done, i);
+ dns_transaction_gc(t);
+}
+
+static void dns_zone_item_free(DnsZoneItem *i) {
+ if (!i)
+ return;
+
+ dns_zone_item_probe_stop(i);
+ dns_resource_record_unref(i->rr);
+
+ free(i);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnsZoneItem*, dns_zone_item_free);
+
+static void dns_zone_item_remove_and_free(DnsZone *z, DnsZoneItem *i) {
+ DnsZoneItem *first;
+
+ assert(z);
+
+ if (!i)
+ return;
+
+ first = hashmap_get(z->by_key, i->rr->key);
+ LIST_REMOVE(by_key, first, i);
+ if (first)
+ assert_se(hashmap_replace(z->by_key, first->rr->key, first) >= 0);
+ else
+ hashmap_remove(z->by_key, i->rr->key);
+
+ first = hashmap_get(z->by_name, dns_resource_key_name(i->rr->key));
+ LIST_REMOVE(by_name, first, i);
+ if (first)
+ assert_se(hashmap_replace(z->by_name, dns_resource_key_name(first->rr->key), first) >= 0);
+ else
+ hashmap_remove(z->by_name, dns_resource_key_name(i->rr->key));
+
+ dns_zone_item_free(i);
+}
+
+void dns_zone_flush(DnsZone *z) {
+ DnsZoneItem *i;
+
+ assert(z);
+
+ while ((i = hashmap_first(z->by_key)))
+ dns_zone_item_remove_and_free(z, i);
+
+ assert(hashmap_size(z->by_key) == 0);
+ assert(hashmap_size(z->by_name) == 0);
+
+ z->by_key = hashmap_free(z->by_key);
+ z->by_name = hashmap_free(z->by_name);
+}
+
+DnsZoneItem* dns_zone_get(DnsZone *z, DnsResourceRecord *rr) {
+ DnsZoneItem *i;
+
+ assert(z);
+ assert(rr);
+
+ LIST_FOREACH(by_key, i, hashmap_get(z->by_key, rr->key))
+ if (dns_resource_record_equal(i->rr, rr) > 0)
+ return i;
+
+ return NULL;
+}
+
+void dns_zone_remove_rr(DnsZone *z, DnsResourceRecord *rr) {
+ DnsZoneItem *i;
+
+ assert(z);
+ assert(rr);
+
+ i = dns_zone_get(z, rr);
+ if (i)
+ dns_zone_item_remove_and_free(z, i);
+}
+
+int dns_zone_remove_rrs_by_key(DnsZone *z, DnsResourceKey *key) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL, *soa = NULL;
+ DnsResourceRecord *rr;
+ bool tentative;
+ int r;
+
+ r = dns_zone_lookup(z, key, 0, &answer, &soa, &tentative);
+ if (r < 0)
+ return r;
+
+ DNS_ANSWER_FOREACH(rr, answer)
+ dns_zone_remove_rr(z, rr);
+
+ return 0;
+}
+
+static int dns_zone_init(DnsZone *z) {
+ int r;
+
+ assert(z);
+
+ r = hashmap_ensure_allocated(&z->by_key, &dns_resource_key_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&z->by_name, &dns_name_hash_ops);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int dns_zone_link_item(DnsZone *z, DnsZoneItem *i) {
+ DnsZoneItem *first;
+ int r;
+
+ first = hashmap_get(z->by_key, i->rr->key);
+ if (first) {
+ LIST_PREPEND(by_key, first, i);
+ assert_se(hashmap_replace(z->by_key, first->rr->key, first) >= 0);
+ } else {
+ r = hashmap_put(z->by_key, i->rr->key, i);
+ if (r < 0)
+ return r;
+ }
+
+ first = hashmap_get(z->by_name, dns_resource_key_name(i->rr->key));
+ if (first) {
+ LIST_PREPEND(by_name, first, i);
+ assert_se(hashmap_replace(z->by_name, dns_resource_key_name(first->rr->key), first) >= 0);
+ } else {
+ r = hashmap_put(z->by_name, dns_resource_key_name(i->rr->key), i);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int dns_zone_item_probe_start(DnsZoneItem *i) {
+ _cleanup_(dns_transaction_gcp) DnsTransaction *t = NULL;
+ int r;
+
+ assert(i);
+
+ if (i->probe_transaction)
+ return 0;
+
+ t = dns_scope_find_transaction(i->scope, &DNS_RESOURCE_KEY_CONST(i->rr->key->class, DNS_TYPE_ANY, dns_resource_key_name(i->rr->key)), false);
+ if (!t) {
+ _cleanup_(dns_resource_key_unrefp) DnsResourceKey *key = NULL;
+
+ key = dns_resource_key_new(i->rr->key->class, DNS_TYPE_ANY, dns_resource_key_name(i->rr->key));
+ if (!key)
+ return -ENOMEM;
+
+ r = dns_transaction_new(&t, i->scope, key);
+ if (r < 0)
+ return r;
+ }
+
+ r = set_ensure_allocated(&t->notify_zone_items_done, NULL);
+ if (r < 0)
+ return r;
+
+ r = set_ensure_put(&t->notify_zone_items, NULL, i);
+ if (r < 0)
+ return r;
+
+ t->probing = true;
+ i->probe_transaction = TAKE_PTR(t);
+
+ if (i->probe_transaction->state == DNS_TRANSACTION_NULL) {
+ i->block_ready++;
+ r = dns_transaction_go(i->probe_transaction);
+ i->block_ready--;
+
+ if (r < 0) {
+ dns_zone_item_probe_stop(i);
+ return r;
+ }
+ }
+
+ dns_zone_item_notify(i);
+ return 0;
+}
+
+int dns_zone_put(DnsZone *z, DnsScope *s, DnsResourceRecord *rr, bool probe) {
+ _cleanup_(dns_zone_item_freep) DnsZoneItem *i = NULL;
+ DnsZoneItem *existing;
+ int r;
+
+ assert(z);
+ assert(s);
+ assert(rr);
+
+ if (dns_class_is_pseudo(rr->key->class))
+ return -EINVAL;
+ if (dns_type_is_pseudo(rr->key->type))
+ return -EINVAL;
+
+ existing = dns_zone_get(z, rr);
+ if (existing)
+ return 0;
+
+ r = dns_zone_init(z);
+ if (r < 0)
+ return r;
+
+ i = new(DnsZoneItem, 1);
+ if (!i)
+ return -ENOMEM;
+
+ *i = (DnsZoneItem) {
+ .scope = s,
+ .rr = dns_resource_record_ref(rr),
+ .probing_enabled = probe,
+ };
+
+ r = dns_zone_link_item(z, i);
+ if (r < 0)
+ return r;
+
+ if (probe) {
+ DnsZoneItem *first, *j;
+ bool established = false;
+
+ /* Check if there's already an RR with the same name
+ * established. If so, it has been probed already, and
+ * we don't need to probe again. */
+
+ LIST_FIND_HEAD(by_name, i, first);
+ LIST_FOREACH(by_name, j, first) {
+ if (i == j)
+ continue;
+
+ if (j->state == DNS_ZONE_ITEM_ESTABLISHED)
+ established = true;
+ }
+
+ if (established)
+ i->state = DNS_ZONE_ITEM_ESTABLISHED;
+ else {
+ i->state = DNS_ZONE_ITEM_PROBING;
+
+ r = dns_zone_item_probe_start(i);
+ if (r < 0) {
+ dns_zone_item_remove_and_free(z, i);
+ i = NULL;
+ return r;
+ }
+ }
+ } else
+ i->state = DNS_ZONE_ITEM_ESTABLISHED;
+
+ i = NULL;
+ return 0;
+}
+
+static int dns_zone_add_authenticated_answer(DnsAnswer *a, DnsZoneItem *i, int ifindex) {
+ DnsAnswerFlags flags;
+
+ /* From RFC 6762, Section 10.2
+ * "They (the rules about when to set the cache-flush bit) apply to
+ * startup announcements as described in Section 8.3, "Announcing",
+ * and to responses generated as a result of receiving query messages."
+ * So, set the cache-flush bit for mDNS answers except for DNS-SD
+ * service enumeration PTRs described in RFC 6763, Section 4.1. */
+ if (i->scope->protocol == DNS_PROTOCOL_MDNS &&
+ !dns_resource_key_is_dnssd_ptr(i->rr->key))
+ flags = DNS_ANSWER_AUTHENTICATED|DNS_ANSWER_CACHE_FLUSH;
+ else
+ flags = DNS_ANSWER_AUTHENTICATED;
+
+ return dns_answer_add(a, i->rr, ifindex, flags);
+}
+
+int dns_zone_lookup(DnsZone *z, DnsResourceKey *key, int ifindex, DnsAnswer **ret_answer, DnsAnswer **ret_soa, bool *ret_tentative) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL, *soa = NULL;
+ unsigned n_answer = 0;
+ DnsZoneItem *j, *first;
+ bool tentative = true, need_soa = false;
+ int r;
+
+ /* Note that we don't actually need the ifindex for anything. However when it is passed we'll initialize the
+ * ifindex field in the answer with it */
+
+ assert(z);
+ assert(key);
+ assert(ret_answer);
+
+ /* First iteration, count what we have */
+
+ if (key->type == DNS_TYPE_ANY || key->class == DNS_CLASS_ANY) {
+ bool found = false, added = false;
+ int k;
+
+ /* If this is a generic match, then we have to
+ * go through the list by the name and look
+ * for everything manually */
+
+ first = hashmap_get(z->by_name, dns_resource_key_name(key));
+ LIST_FOREACH(by_name, j, first) {
+ if (!IN_SET(j->state, DNS_ZONE_ITEM_PROBING, DNS_ZONE_ITEM_ESTABLISHED, DNS_ZONE_ITEM_VERIFYING))
+ continue;
+
+ found = true;
+
+ k = dns_resource_key_match_rr(key, j->rr, NULL);
+ if (k < 0)
+ return k;
+ if (k > 0) {
+ n_answer++;
+ added = true;
+ }
+
+ }
+
+ if (found && !added)
+ need_soa = true;
+
+ } else {
+ bool found = false;
+
+ /* If this is a specific match, then look for
+ * the right key immediately */
+
+ first = hashmap_get(z->by_key, key);
+ LIST_FOREACH(by_key, j, first) {
+ if (!IN_SET(j->state, DNS_ZONE_ITEM_PROBING, DNS_ZONE_ITEM_ESTABLISHED, DNS_ZONE_ITEM_VERIFYING))
+ continue;
+
+ found = true;
+ n_answer++;
+ }
+
+ if (!found) {
+ first = hashmap_get(z->by_name, dns_resource_key_name(key));
+ LIST_FOREACH(by_name, j, first) {
+ if (!IN_SET(j->state, DNS_ZONE_ITEM_PROBING, DNS_ZONE_ITEM_ESTABLISHED, DNS_ZONE_ITEM_VERIFYING))
+ continue;
+
+ need_soa = true;
+ break;
+ }
+ }
+ }
+
+ if (n_answer <= 0 && !need_soa)
+ goto return_empty;
+
+ if (n_answer > 0) {
+ answer = dns_answer_new(n_answer);
+ if (!answer)
+ return -ENOMEM;
+ }
+
+ if (need_soa) {
+ soa = dns_answer_new(1);
+ if (!soa)
+ return -ENOMEM;
+ }
+
+ /* Second iteration, actually add the RRs to the answers */
+ if (key->type == DNS_TYPE_ANY || key->class == DNS_CLASS_ANY) {
+ bool found = false, added = false;
+ int k;
+
+ first = hashmap_get(z->by_name, dns_resource_key_name(key));
+ LIST_FOREACH(by_name, j, first) {
+ if (!IN_SET(j->state, DNS_ZONE_ITEM_PROBING, DNS_ZONE_ITEM_ESTABLISHED, DNS_ZONE_ITEM_VERIFYING))
+ continue;
+
+ found = true;
+
+ if (j->state != DNS_ZONE_ITEM_PROBING)
+ tentative = false;
+
+ k = dns_resource_key_match_rr(key, j->rr, NULL);
+ if (k < 0)
+ return k;
+ if (k > 0) {
+ r = dns_zone_add_authenticated_answer(answer, j, ifindex);
+ if (r < 0)
+ return r;
+
+ added = true;
+ }
+ }
+
+ if (found && !added) {
+ r = dns_answer_add_soa(soa, dns_resource_key_name(key), LLMNR_DEFAULT_TTL, ifindex);
+ if (r < 0)
+ return r;
+ }
+ } else {
+ bool found = false;
+
+ first = hashmap_get(z->by_key, key);
+ LIST_FOREACH(by_key, j, first) {
+ if (!IN_SET(j->state, DNS_ZONE_ITEM_PROBING, DNS_ZONE_ITEM_ESTABLISHED, DNS_ZONE_ITEM_VERIFYING))
+ continue;
+
+ found = true;
+
+ if (j->state != DNS_ZONE_ITEM_PROBING)
+ tentative = false;
+
+ r = dns_zone_add_authenticated_answer(answer, j, ifindex);
+ if (r < 0)
+ return r;
+ }
+
+ if (!found) {
+ bool add_soa = false;
+
+ first = hashmap_get(z->by_name, dns_resource_key_name(key));
+ LIST_FOREACH(by_name, j, first) {
+ if (!IN_SET(j->state, DNS_ZONE_ITEM_PROBING, DNS_ZONE_ITEM_ESTABLISHED, DNS_ZONE_ITEM_VERIFYING))
+ continue;
+
+ if (j->state != DNS_ZONE_ITEM_PROBING)
+ tentative = false;
+
+ add_soa = true;
+ }
+
+ if (add_soa) {
+ r = dns_answer_add_soa(soa, dns_resource_key_name(key), LLMNR_DEFAULT_TTL, ifindex);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ /* If the caller sets ret_tentative to NULL, then use this as
+ * indication to not return tentative entries */
+
+ if (!ret_tentative && tentative)
+ goto return_empty;
+
+ *ret_answer = TAKE_PTR(answer);
+
+ if (ret_soa)
+ *ret_soa = TAKE_PTR(soa);
+
+ if (ret_tentative)
+ *ret_tentative = tentative;
+
+ return 1;
+
+return_empty:
+ *ret_answer = NULL;
+
+ if (ret_soa)
+ *ret_soa = NULL;
+
+ if (ret_tentative)
+ *ret_tentative = false;
+
+ return 0;
+}
+
+void dns_zone_item_conflict(DnsZoneItem *i) {
+ assert(i);
+
+ if (!IN_SET(i->state, DNS_ZONE_ITEM_PROBING, DNS_ZONE_ITEM_VERIFYING, DNS_ZONE_ITEM_ESTABLISHED))
+ return;
+
+ log_info("Detected conflict on %s", strna(dns_resource_record_to_string(i->rr)));
+
+ dns_zone_item_probe_stop(i);
+
+ /* Withdraw the conflict item */
+ i->state = DNS_ZONE_ITEM_WITHDRAWN;
+
+ dnssd_signal_conflict(i->scope->manager, dns_resource_key_name(i->rr->key));
+
+ /* Maybe change the hostname */
+ if (manager_is_own_hostname(i->scope->manager, dns_resource_key_name(i->rr->key)) > 0)
+ manager_next_hostname(i->scope->manager);
+}
+
+void dns_zone_item_notify(DnsZoneItem *i) {
+ assert(i);
+ assert(i->probe_transaction);
+
+ if (i->block_ready > 0)
+ return;
+
+ if (IN_SET(i->probe_transaction->state, DNS_TRANSACTION_NULL, DNS_TRANSACTION_PENDING, DNS_TRANSACTION_VALIDATING))
+ return;
+
+ if (i->probe_transaction->state == DNS_TRANSACTION_SUCCESS) {
+ bool we_lost = false;
+
+ /* The probe got a successful reply. If we so far
+ * weren't established we just give up.
+ *
+ * In LLMNR case if we already
+ * were established, and the peer has the
+ * lexicographically larger IP address we continue
+ * and defend it. */
+
+ if (!IN_SET(i->state, DNS_ZONE_ITEM_ESTABLISHED, DNS_ZONE_ITEM_VERIFYING)) {
+ log_debug("Got a successful probe for not yet established RR, we lost.");
+ we_lost = true;
+ } else if (i->probe_transaction->scope->protocol == DNS_PROTOCOL_LLMNR) {
+ assert(i->probe_transaction->received);
+ we_lost = memcmp(&i->probe_transaction->received->sender, &i->probe_transaction->received->destination, FAMILY_ADDRESS_SIZE(i->probe_transaction->received->family)) < 0;
+ if (we_lost)
+ log_debug("Got a successful probe reply for an established RR, and we have a lexicographically larger IP address and thus lost.");
+ }
+
+ if (we_lost) {
+ dns_zone_item_conflict(i);
+ return;
+ }
+
+ log_debug("Got a successful probe reply, but peer has lexicographically lower IP address and thus lost.");
+ }
+
+ log_debug("Record %s successfully probed.", strna(dns_resource_record_to_string(i->rr)));
+
+ dns_zone_item_probe_stop(i);
+ i->state = DNS_ZONE_ITEM_ESTABLISHED;
+}
+
+static int dns_zone_item_verify(DnsZoneItem *i) {
+ int r;
+
+ assert(i);
+
+ if (i->state != DNS_ZONE_ITEM_ESTABLISHED)
+ return 0;
+
+ log_debug("Verifying RR %s", strna(dns_resource_record_to_string(i->rr)));
+
+ i->state = DNS_ZONE_ITEM_VERIFYING;
+ r = dns_zone_item_probe_start(i);
+ if (r < 0) {
+ log_error_errno(r, "Failed to start probing for verifying RR: %m");
+ i->state = DNS_ZONE_ITEM_ESTABLISHED;
+ return r;
+ }
+
+ return 0;
+}
+
+int dns_zone_check_conflicts(DnsZone *zone, DnsResourceRecord *rr) {
+ DnsZoneItem *i, *first;
+ int c = 0;
+
+ assert(zone);
+ assert(rr);
+
+ /* This checks whether a response RR we received from somebody
+ * else is one that we actually thought was uniquely ours. If
+ * so, we'll verify our RRs. */
+
+ /* No conflict if we don't have the name at all. */
+ first = hashmap_get(zone->by_name, dns_resource_key_name(rr->key));
+ if (!first)
+ return 0;
+
+ /* No conflict if we have the exact same RR */
+ if (dns_zone_get(zone, rr))
+ return 0;
+
+ /* No conflict if it is DNS-SD RR used for service enumeration. */
+ if (dns_resource_key_is_dnssd_ptr(rr->key))
+ return 0;
+
+ /* OK, somebody else has RRs for the same name. Yuck! Let's
+ * start probing again */
+
+ LIST_FOREACH(by_name, i, first) {
+ if (dns_resource_record_equal(i->rr, rr))
+ continue;
+
+ dns_zone_item_verify(i);
+ c++;
+ }
+
+ return c;
+}
+
+int dns_zone_verify_conflicts(DnsZone *zone, DnsResourceKey *key) {
+ DnsZoneItem *i, *first;
+ int c = 0;
+
+ assert(zone);
+
+ /* Somebody else notified us about a possible conflict. Let's
+ * verify if that's true. */
+
+ first = hashmap_get(zone->by_name, dns_resource_key_name(key));
+ if (!first)
+ return 0;
+
+ LIST_FOREACH(by_name, i, first) {
+ dns_zone_item_verify(i);
+ c++;
+ }
+
+ return c;
+}
+
+void dns_zone_verify_all(DnsZone *zone) {
+ DnsZoneItem *i;
+
+ assert(zone);
+
+ HASHMAP_FOREACH(i, zone->by_key) {
+ DnsZoneItem *j;
+
+ LIST_FOREACH(by_key, j, i)
+ dns_zone_item_verify(j);
+ }
+}
+
+void dns_zone_dump(DnsZone *zone, FILE *f) {
+ DnsZoneItem *i;
+
+ if (!zone)
+ return;
+
+ if (!f)
+ f = stdout;
+
+ HASHMAP_FOREACH(i, zone->by_key) {
+ DnsZoneItem *j;
+
+ LIST_FOREACH(by_key, j, i) {
+ const char *t;
+
+ t = dns_resource_record_to_string(j->rr);
+ if (!t) {
+ log_oom();
+ continue;
+ }
+
+ fputc('\t', f);
+ fputs(t, f);
+ fputc('\n', f);
+ }
+ }
+}
+
+bool dns_zone_is_empty(DnsZone *zone) {
+ if (!zone)
+ return true;
+
+ return hashmap_isempty(zone->by_key);
+}
+
+bool dns_zone_contains_name(DnsZone *z, const char *name) {
+ DnsZoneItem *i, *first;
+
+ first = hashmap_get(z->by_name, name);
+ if (!first)
+ return false;
+
+ LIST_FOREACH(by_name, i, first) {
+ if (!IN_SET(i->state, DNS_ZONE_ITEM_PROBING, DNS_ZONE_ITEM_ESTABLISHED, DNS_ZONE_ITEM_VERIFYING))
+ continue;
+
+ return true;
+ }
+
+ return false;
+}
diff --git a/src/resolve/resolved-dns-zone.h b/src/resolve/resolved-dns-zone.h
new file mode 100644
index 0000000..1f5a6e0
--- /dev/null
+++ b/src/resolve/resolved-dns-zone.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "hashmap.h"
+
+typedef struct DnsZone {
+ Hashmap *by_key;
+ Hashmap *by_name;
+} DnsZone;
+
+typedef struct DnsZoneItem DnsZoneItem;
+typedef enum DnsZoneItemState DnsZoneItemState;
+
+#include "resolved-dns-answer.h"
+#include "resolved-dns-question.h"
+#include "resolved-dns-rr.h"
+#include "resolved-dns-transaction.h"
+
+/* RFC 4795 Section 2.8. suggests a TTL of 30s by default */
+#define LLMNR_DEFAULT_TTL (30)
+
+/* RFC 6762 Section 10. suggests a TTL of 120s by default */
+#define MDNS_DEFAULT_TTL (120)
+
+enum DnsZoneItemState {
+ DNS_ZONE_ITEM_PROBING,
+ DNS_ZONE_ITEM_ESTABLISHED,
+ DNS_ZONE_ITEM_VERIFYING,
+ DNS_ZONE_ITEM_WITHDRAWN,
+};
+
+struct DnsZoneItem {
+ DnsScope *scope;
+ DnsResourceRecord *rr;
+
+ DnsZoneItemState state;
+
+ unsigned block_ready;
+
+ bool probing_enabled;
+
+ LIST_FIELDS(DnsZoneItem, by_key);
+ LIST_FIELDS(DnsZoneItem, by_name);
+
+ DnsTransaction *probe_transaction;
+};
+
+void dns_zone_flush(DnsZone *z);
+
+int dns_zone_put(DnsZone *z, DnsScope *s, DnsResourceRecord *rr, bool probe);
+DnsZoneItem* dns_zone_get(DnsZone *z, DnsResourceRecord *rr);
+void dns_zone_remove_rr(DnsZone *z, DnsResourceRecord *rr);
+int dns_zone_remove_rrs_by_key(DnsZone *z, DnsResourceKey *key);
+
+int dns_zone_lookup(DnsZone *z, DnsResourceKey *key, int ifindex, DnsAnswer **answer, DnsAnswer **soa, bool *tentative);
+
+void dns_zone_item_conflict(DnsZoneItem *i);
+void dns_zone_item_notify(DnsZoneItem *i);
+
+int dns_zone_check_conflicts(DnsZone *zone, DnsResourceRecord *rr);
+int dns_zone_verify_conflicts(DnsZone *zone, DnsResourceKey *key);
+
+void dns_zone_verify_all(DnsZone *zone);
+
+void dns_zone_item_probe_stop(DnsZoneItem *i);
+
+void dns_zone_dump(DnsZone *zone, FILE *f);
+bool dns_zone_is_empty(DnsZone *zone);
+bool dns_zone_contains_name(DnsZone *z, const char *name);
diff --git a/src/resolve/resolved-dnssd-bus.c b/src/resolve/resolved-dnssd-bus.c
new file mode 100644
index 0000000..d908cc6
--- /dev/null
+++ b/src/resolve/resolved-dnssd-bus.c
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-polkit.h"
+#include "missing_capability.h"
+#include "resolved-dnssd-bus.h"
+#include "resolved-dnssd.h"
+#include "resolved-link.h"
+#include "resolved-manager.h"
+#include "strv.h"
+#include "user-util.h"
+
+int bus_dnssd_method_unregister(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ DnssdService *s = userdata;
+ DnssdTxtData *txt_data;
+ Manager *m;
+ Link *l;
+ int r;
+
+ assert(message);
+ assert(s);
+
+ m = s->manager;
+
+ r = bus_verify_polkit_async(message, CAP_SYS_ADMIN,
+ "org.freedesktop.resolve1.unregister-service",
+ NULL, false, s->originator,
+ &m->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ HASHMAP_FOREACH(l, m->links) {
+ if (l->mdns_ipv4_scope) {
+ r = dns_scope_announce(l->mdns_ipv4_scope, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to send goodbye messages in IPv4 scope: %m");
+
+ dns_zone_remove_rr(&l->mdns_ipv4_scope->zone, s->ptr_rr);
+ dns_zone_remove_rr(&l->mdns_ipv4_scope->zone, s->srv_rr);
+ LIST_FOREACH(items, txt_data, s->txt_data_items)
+ dns_zone_remove_rr(&l->mdns_ipv4_scope->zone, txt_data->rr);
+ }
+
+ if (l->mdns_ipv6_scope) {
+ r = dns_scope_announce(l->mdns_ipv6_scope, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to send goodbye messages in IPv6 scope: %m");
+
+ dns_zone_remove_rr(&l->mdns_ipv6_scope->zone, s->ptr_rr);
+ dns_zone_remove_rr(&l->mdns_ipv6_scope->zone, s->srv_rr);
+ LIST_FOREACH(items, txt_data, s->txt_data_items)
+ dns_zone_remove_rr(&l->mdns_ipv6_scope->zone, txt_data->rr);
+ }
+ }
+
+ dnssd_service_free(s);
+
+ manager_refresh_rrs(m);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int dnssd_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ _cleanup_free_ char *name = NULL;
+ Manager *m = userdata;
+ DnssdService *service;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ r = sd_bus_path_decode(path, "/org/freedesktop/resolve1/dnssd", &name);
+ if (r <= 0)
+ return 0;
+
+ service = hashmap_get(m->dnssd_services, name);
+ if (!service)
+ return 0;
+
+ *found = service;
+ return 1;
+}
+
+static int dnssd_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ DnssdService *service;
+ unsigned c = 0;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(m);
+ assert(nodes);
+
+ l = new0(char*, hashmap_size(m->dnssd_services) + 1);
+ if (!l)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(service, m->dnssd_services) {
+ char *p;
+
+ r = sd_bus_path_encode("/org/freedesktop/resolve1/dnssd", service->name, &p);
+ if (r < 0)
+ return r;
+
+ l[c++] = p;
+ }
+
+ l[c] = NULL;
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
+
+static const sd_bus_vtable dnssd_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_METHOD("Unregister", NULL, NULL, bus_dnssd_method_unregister, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_SIGNAL("Conflicted", NULL, 0),
+
+ SD_BUS_VTABLE_END
+};
+
+const BusObjectImplementation dnssd_object = {
+ "/org/freedesktop/resolve1/dnssd",
+ "org.freedesktop.resolve1.DnssdService",
+ .fallback_vtables = BUS_FALLBACK_VTABLES({dnssd_vtable, dnssd_object_find}),
+ .node_enumerator = dnssd_node_enumerator,
+};
diff --git a/src/resolve/resolved-dnssd-bus.h b/src/resolve/resolved-dnssd-bus.h
new file mode 100644
index 0000000..f396e23
--- /dev/null
+++ b/src/resolve/resolved-dnssd-bus.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include "sd-bus.h"
+
+#include "bus-object.h"
+
+extern const BusObjectImplementation dnssd_object;
+
+int bus_dnssd_method_unregister(sd_bus_message *message, void *userdata, sd_bus_error *error);
diff --git a/src/resolve/resolved-dnssd-gperf.gperf b/src/resolve/resolved-dnssd-gperf.gperf
new file mode 100644
index 0000000..2780b85
--- /dev/null
+++ b/src/resolve/resolved-dnssd-gperf.gperf
@@ -0,0 +1,24 @@
+%{
+#include <stddef.h>
+#include "conf-parser.h"
+#include "resolved-conf.h"
+#include "resolved-dnssd.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name resolved_dnssd_gperf_hash
+%define lookup-function-name resolved_dnssd_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Service.Name, config_parse_dnssd_service_name, 0, 0
+Service.Type, config_parse_dnssd_service_type, 0, 0
+Service.Port, config_parse_ip_port, 0, offsetof(DnssdService, port)
+Service.Priority, config_parse_uint16, 0, offsetof(DnssdService, priority)
+Service.Weight, config_parse_uint16, 0, offsetof(DnssdService, weight)
+Service.TxtText, config_parse_dnssd_txt, DNS_TXT_ITEM_TEXT, 0
+Service.TxtData, config_parse_dnssd_txt, DNS_TXT_ITEM_DATA, 0
diff --git a/src/resolve/resolved-dnssd.c b/src/resolve/resolved-dnssd.c
new file mode 100644
index 0000000..8b40639
--- /dev/null
+++ b/src/resolve/resolved-dnssd.c
@@ -0,0 +1,368 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "conf-files.h"
+#include "conf-parser.h"
+#include "def.h"
+#include "resolved-dnssd.h"
+#include "resolved-dns-rr.h"
+#include "resolved-manager.h"
+#include "resolved-conf.h"
+#include "specifier.h"
+#include "strv.h"
+
+#define DNSSD_SERVICE_DIRS ((const char* const*) CONF_PATHS_STRV("systemd/dnssd"))
+
+DnssdTxtData *dnssd_txtdata_free(DnssdTxtData *txt_data) {
+ if (!txt_data)
+ return NULL;
+
+ dns_resource_record_unref(txt_data->rr);
+ dns_txt_item_free_all(txt_data->txt);
+
+ return mfree(txt_data);
+}
+
+DnssdTxtData *dnssd_txtdata_free_all(DnssdTxtData *txt_data) {
+ DnssdTxtData *next;
+
+ if (!txt_data)
+ return NULL;
+
+ next = txt_data->items_next;
+
+ dnssd_txtdata_free(txt_data);
+
+ return dnssd_txtdata_free_all(next);
+}
+
+DnssdService *dnssd_service_free(DnssdService *service) {
+ if (!service)
+ return NULL;
+
+ if (service->manager)
+ hashmap_remove(service->manager->dnssd_services, service->name);
+
+ dns_resource_record_unref(service->ptr_rr);
+ dns_resource_record_unref(service->srv_rr);
+
+ dnssd_txtdata_free_all(service->txt_data_items);
+
+ free(service->filename);
+ free(service->name);
+ free(service->type);
+ free(service->name_template);
+
+ return mfree(service);
+}
+
+static int dnssd_service_load(Manager *manager, const char *filename) {
+ _cleanup_(dnssd_service_freep) DnssdService *service = NULL;
+ _cleanup_(dnssd_txtdata_freep) DnssdTxtData *txt_data = NULL;
+ char *d;
+ const char *dropin_dirname;
+ int r;
+
+ assert(manager);
+ assert(filename);
+
+ service = new0(DnssdService, 1);
+ if (!service)
+ return log_oom();
+
+ service->filename = strdup(filename);
+ if (!service->filename)
+ return log_oom();
+
+ service->name = strdup(basename(filename));
+ if (!service->name)
+ return log_oom();
+
+ d = endswith(service->name, ".dnssd");
+ if (!d)
+ return -EINVAL;
+
+ assert(streq(d, ".dnssd"));
+
+ *d = '\0';
+
+ dropin_dirname = strjoina(service->name, ".dnssd.d");
+
+ r = config_parse_many(
+ filename, DNSSD_SERVICE_DIRS, dropin_dirname,
+ "Service\0",
+ config_item_perf_lookup, resolved_dnssd_gperf_lookup,
+ CONFIG_PARSE_WARN,
+ service,
+ NULL);
+ if (r < 0)
+ return r;
+
+ if (!service->name_template)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s doesn't define service instance name",
+ service->name);
+
+ if (!service->type)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s doesn't define service type",
+ service->name);
+
+ if (LIST_IS_EMPTY(service->txt_data_items)) {
+ txt_data = new0(DnssdTxtData, 1);
+ if (!txt_data)
+ return log_oom();
+
+ r = dns_txt_item_new_empty(&txt_data->txt);
+ if (r < 0)
+ return r;
+
+ LIST_PREPEND(items, service->txt_data_items, txt_data);
+ txt_data = NULL;
+ }
+
+ r = hashmap_ensure_allocated(&manager->dnssd_services, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(manager->dnssd_services, service->name, service);
+ if (r < 0)
+ return r;
+
+ service->manager = manager;
+
+ r = dnssd_update_rrs(service);
+ if (r < 0)
+ return r;
+
+ service = NULL;
+
+ return 0;
+}
+
+static int specifier_dnssd_host_name(char specifier, const void *data, const void *userdata, char **ret) {
+ DnssdService *s = (DnssdService *) userdata;
+ char *n;
+
+ assert(s);
+ assert(s->manager);
+ assert(s->manager->llmnr_hostname);
+
+ n = strdup(s->manager->llmnr_hostname);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int dnssd_render_instance_name(DnssdService *s, char **ret_name) {
+ static const Specifier specifier_table[] = {
+ { 'a', specifier_architecture, NULL },
+ { 'b', specifier_boot_id, NULL },
+ { 'B', specifier_os_build_id, NULL },
+ { 'H', specifier_dnssd_host_name, NULL },
+ { 'm', specifier_machine_id, NULL },
+ { 'o', specifier_os_id, NULL },
+ { 'v', specifier_kernel_release, NULL },
+ { 'w', specifier_os_version_id, NULL },
+ { 'W', specifier_os_variant_id, NULL },
+ {}
+ };
+ _cleanup_free_ char *name = NULL;
+ int r;
+
+ assert(s);
+ assert(s->name_template);
+
+ r = specifier_printf(s->name_template, specifier_table, s, &name);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to replace specifiers: %m");
+
+ if (!dns_service_name_is_valid(name))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Service instance name '%s' is invalid.",
+ name);
+
+ *ret_name = TAKE_PTR(name);
+
+ return 0;
+}
+
+int dnssd_load(Manager *manager) {
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+ int r;
+
+ assert(manager);
+
+ if (manager->mdns_support != RESOLVE_SUPPORT_YES)
+ return 0;
+
+ r = conf_files_list_strv(&files, ".dnssd", NULL, 0, DNSSD_SERVICE_DIRS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate .dnssd files: %m");
+
+ STRV_FOREACH_BACKWARDS(f, files) {
+ r = dnssd_service_load(manager, *f);
+ if (r < 0)
+ log_warning_errno(r, "Failed to load '%s': %m", *f);;
+ }
+
+ return 0;
+}
+
+int dnssd_update_rrs(DnssdService *s) {
+ _cleanup_free_ char *n = NULL;
+ _cleanup_free_ char *service_name = NULL;
+ _cleanup_free_ char *full_name = NULL;
+ DnssdTxtData *txt_data;
+ int r;
+
+ assert(s);
+ assert(s->txt_data_items);
+ assert(s->manager);
+
+ s->ptr_rr = dns_resource_record_unref(s->ptr_rr);
+ s->srv_rr = dns_resource_record_unref(s->srv_rr);
+ LIST_FOREACH(items, txt_data, s->txt_data_items)
+ txt_data->rr = dns_resource_record_unref(txt_data->rr);
+
+ r = dnssd_render_instance_name(s, &n);
+ if (r < 0)
+ return r;
+
+ r = dns_name_concat(s->type, "local", 0, &service_name);
+ if (r < 0)
+ return r;
+ r = dns_name_concat(n, service_name, 0, &full_name);
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(items, txt_data, s->txt_data_items) {
+ txt_data->rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_TXT,
+ full_name);
+ if (!txt_data->rr)
+ goto oom;
+
+ txt_data->rr->ttl = MDNS_DEFAULT_TTL;
+ txt_data->rr->txt.items = dns_txt_item_copy(txt_data->txt);
+ if (!txt_data->rr->txt.items)
+ goto oom;
+ }
+
+ s->ptr_rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_PTR,
+ service_name);
+ if (!s->ptr_rr)
+ goto oom;
+
+ s->ptr_rr->ttl = MDNS_DEFAULT_TTL;
+ s->ptr_rr->ptr.name = strdup(full_name);
+ if (!s->ptr_rr->ptr.name)
+ goto oom;
+
+ s->srv_rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_SRV,
+ full_name);
+ if (!s->srv_rr)
+ goto oom;
+
+ s->srv_rr->ttl = MDNS_DEFAULT_TTL;
+ s->srv_rr->srv.priority = s->priority;
+ s->srv_rr->srv.weight = s->weight;
+ s->srv_rr->srv.port = s->port;
+ s->srv_rr->srv.name = strdup(s->manager->mdns_hostname);
+ if (!s->srv_rr->srv.name)
+ goto oom;
+
+ return 0;
+
+oom:
+ LIST_FOREACH(items, txt_data, s->txt_data_items)
+ txt_data->rr = dns_resource_record_unref(txt_data->rr);
+ s->ptr_rr = dns_resource_record_unref(s->ptr_rr);
+ s->srv_rr = dns_resource_record_unref(s->srv_rr);
+ return -ENOMEM;
+}
+
+int dnssd_txt_item_new_from_string(const char *key, const char *value, DnsTxtItem **ret_item) {
+ size_t length;
+ DnsTxtItem *i;
+
+ length = strlen(key);
+
+ if (!isempty(value))
+ length += strlen(value) + 1; /* length of value plus '=' */
+
+ i = malloc0(offsetof(DnsTxtItem, data) + length + 1); /* for safety reasons we add an extra NUL byte */
+ if (!i)
+ return -ENOMEM;
+
+ memcpy(i->data, key, strlen(key));
+ if (!isempty(value)) {
+ memcpy(i->data + strlen(key), "=", 1);
+ memcpy(i->data + strlen(key) + 1, value, strlen(value));
+ }
+ i->length = length;
+
+ *ret_item = TAKE_PTR(i);
+
+ return 0;
+}
+
+int dnssd_txt_item_new_from_data(const char *key, const void *data, const size_t size, DnsTxtItem **ret_item) {
+ size_t length;
+ DnsTxtItem *i;
+
+ length = strlen(key);
+
+ if (size > 0)
+ length += size + 1; /* size of date plus '=' */
+
+ i = malloc0(offsetof(DnsTxtItem, data) + length + 1); /* for safety reasons we add an extra NUL byte */
+ if (!i)
+ return -ENOMEM;
+
+ memcpy(i->data, key, strlen(key));
+ if (size > 0) {
+ memcpy(i->data + strlen(key), "=", 1);
+ memcpy(i->data + strlen(key) + 1, data, size);
+ }
+ i->length = length;
+
+ *ret_item = TAKE_PTR(i);
+
+ return 0;
+}
+
+void dnssd_signal_conflict(Manager *manager, const char *name) {
+ DnssdService *s;
+ int r;
+
+ HASHMAP_FOREACH(s, manager->dnssd_services) {
+ if (s->withdrawn)
+ continue;
+
+ if (dns_name_equal(dns_resource_key_name(s->srv_rr->key), name)) {
+ _cleanup_free_ char *path = NULL;
+
+ s->withdrawn = true;
+
+ r = sd_bus_path_encode("/org/freedesktop/resolve1/dnssd", s->name, &path);
+ if (r < 0) {
+ log_error_errno(r, "Can't get D-BUS object path: %m");
+ return;
+ }
+
+ r = sd_bus_emit_signal(manager->bus,
+ path,
+ "org.freedesktop.resolve1.DnssdService",
+ "Conflicted",
+ NULL);
+ if (r < 0) {
+ log_error_errno(r, "Cannot emit signal: %m");
+ return;
+ }
+
+ break;
+ }
+ }
+}
diff --git a/src/resolve/resolved-dnssd.h b/src/resolve/resolved-dnssd.h
new file mode 100644
index 0000000..8fe7556
--- /dev/null
+++ b/src/resolve/resolved-dnssd.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include "list.h"
+
+typedef struct DnssdService DnssdService;
+typedef struct DnssdTxtData DnssdTxtData;
+
+typedef struct Manager Manager;
+typedef struct DnsResourceRecord DnsResourceRecord;
+typedef struct DnsTxtItem DnsTxtItem;
+
+enum {
+ DNS_TXT_ITEM_TEXT,
+ DNS_TXT_ITEM_DATA
+};
+
+struct DnssdTxtData {
+ DnsResourceRecord *rr;
+
+ LIST_HEAD(DnsTxtItem, txt);
+
+ LIST_FIELDS(DnssdTxtData, items);
+};
+
+struct DnssdService {
+ char *filename;
+ char *name;
+ char *name_template;
+ char *type;
+ uint16_t port;
+ uint16_t priority;
+ uint16_t weight;
+
+ DnsResourceRecord *ptr_rr;
+ DnsResourceRecord *srv_rr;
+
+ /* Section 6.8 of RFC 6763 allows having service
+ * instances with multiple TXT resource records. */
+ LIST_HEAD(DnssdTxtData, txt_data_items);
+
+ Manager *manager;
+
+ bool withdrawn:1;
+ uid_t originator;
+};
+
+DnssdService *dnssd_service_free(DnssdService *service);
+DnssdTxtData *dnssd_txtdata_free(DnssdTxtData *txt_data);
+DnssdTxtData *dnssd_txtdata_free_all(DnssdTxtData *txt_data);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnssdService*, dnssd_service_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(DnssdTxtData*, dnssd_txtdata_free);
+
+int dnssd_render_instance_name(DnssdService *s, char **ret_name);
+int dnssd_load(Manager *manager);
+int dnssd_txt_item_new_from_string(const char *key, const char *value, DnsTxtItem **ret_item);
+int dnssd_txt_item_new_from_data(const char *key, const void *value, const size_t size, DnsTxtItem **ret_item);
+int dnssd_update_rrs(DnssdService *s);
+void dnssd_signal_conflict(Manager *manager, const char *name);
diff --git a/src/resolve/resolved-dnstls-gnutls.c b/src/resolve/resolved-dnstls-gnutls.c
new file mode 100644
index 0000000..d3edd35
--- /dev/null
+++ b/src/resolve/resolved-dnstls-gnutls.c
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if !ENABLE_DNS_OVER_TLS || !DNS_OVER_TLS_USE_GNUTLS
+#error This source file requires DNS-over-TLS to be enabled and GnuTLS to be available.
+#endif
+
+#include <gnutls/socket.h>
+
+#include "resolved-dns-stream.h"
+#include "resolved-dnstls.h"
+#include "resolved-manager.h"
+
+#define TLS_PROTOCOL_PRIORITY "NORMAL:-VERS-ALL:+VERS-TLS1.3:+VERS-TLS1.2"
+DEFINE_TRIVIAL_CLEANUP_FUNC(gnutls_session_t, gnutls_deinit);
+
+static ssize_t dnstls_stream_writev(gnutls_transport_ptr_t p, const giovec_t *iov, int iovcnt) {
+ int r;
+
+ assert(p);
+
+ r = dns_stream_writev((DnsStream*) p, (const struct iovec*) iov, iovcnt, DNS_STREAM_WRITE_TLS_DATA);
+ if (r < 0) {
+ errno = -r;
+ return -1;
+ }
+
+ return r;
+}
+
+int dnstls_stream_connect_tls(DnsStream *stream, DnsServer *server) {
+ _cleanup_(gnutls_deinitp) gnutls_session_t gs = NULL;
+ int r;
+
+ assert(stream);
+ assert(server);
+
+ r = gnutls_init(&gs, GNUTLS_CLIENT | GNUTLS_ENABLE_FALSE_START | GNUTLS_NONBLOCK);
+ if (r < 0)
+ return r;
+
+ /* As DNS-over-TLS is a recent protocol, older TLS versions can be disabled */
+ r = gnutls_priority_set_direct(gs, TLS_PROTOCOL_PRIORITY, NULL);
+ if (r < 0)
+ return r;
+
+ r = gnutls_credentials_set(gs, GNUTLS_CRD_CERTIFICATE, stream->manager->dnstls_data.cert_cred);
+ if (r < 0)
+ return r;
+
+ if (server->dnstls_data.session_data.size > 0) {
+ gnutls_session_set_data(gs, server->dnstls_data.session_data.data, server->dnstls_data.session_data.size);
+
+ // Clear old session ticket
+ gnutls_free(server->dnstls_data.session_data.data);
+ server->dnstls_data.session_data.data = NULL;
+ server->dnstls_data.session_data.size = 0;
+ }
+
+ if (server->manager->dns_over_tls_mode == DNS_OVER_TLS_YES) {
+ if (server->server_name)
+ gnutls_session_set_verify_cert(gs, server->server_name, 0);
+ else {
+ stream->dnstls_data.validation.type = GNUTLS_DT_IP_ADDRESS;
+ if (server->family == AF_INET) {
+ stream->dnstls_data.validation.data = (unsigned char*) &server->address.in.s_addr;
+ stream->dnstls_data.validation.size = 4;
+ } else {
+ stream->dnstls_data.validation.data = server->address.in6.s6_addr;
+ stream->dnstls_data.validation.size = 16;
+ }
+ gnutls_session_set_verify_cert2(gs, &stream->dnstls_data.validation, 1, 0);
+ }
+ }
+
+ if (server->server_name) {
+ r = gnutls_server_name_set(gs, GNUTLS_NAME_DNS, server->server_name, strlen(server->server_name));
+ if (r < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to set server name: %s", gnutls_strerror(r));
+ }
+
+ gnutls_handshake_set_timeout(gs, GNUTLS_DEFAULT_HANDSHAKE_TIMEOUT);
+
+ gnutls_transport_set_ptr2(gs, (gnutls_transport_ptr_t) (long) stream->fd, stream);
+ gnutls_transport_set_vec_push_function(gs, &dnstls_stream_writev);
+
+ stream->encrypted = true;
+ stream->dnstls_data.handshake = gnutls_handshake(gs);
+ if (stream->dnstls_data.handshake < 0 && gnutls_error_is_fatal(stream->dnstls_data.handshake))
+ return -ECONNREFUSED;
+
+ stream->dnstls_data.session = TAKE_PTR(gs);
+
+ return 0;
+}
+
+void dnstls_stream_free(DnsStream *stream) {
+ assert(stream);
+ assert(stream->encrypted);
+
+ if (stream->dnstls_data.session)
+ gnutls_deinit(stream->dnstls_data.session);
+}
+
+int dnstls_stream_on_io(DnsStream *stream, uint32_t revents) {
+ int r;
+
+ assert(stream);
+ assert(stream->encrypted);
+ assert(stream->dnstls_data.session);
+
+ if (stream->dnstls_data.shutdown) {
+ r = gnutls_bye(stream->dnstls_data.session, GNUTLS_SHUT_RDWR);
+ if (r == GNUTLS_E_AGAIN) {
+ stream->dnstls_events = gnutls_record_get_direction(stream->dnstls_data.session) == 1 ? EPOLLOUT : EPOLLIN;
+ return -EAGAIN;
+ } else if (r < 0)
+ log_debug("Failed to invoke gnutls_bye: %s", gnutls_strerror(r));
+
+ stream->dnstls_events = 0;
+ stream->dnstls_data.shutdown = false;
+ dns_stream_unref(stream);
+ return DNSTLS_STREAM_CLOSED;
+ } else if (stream->dnstls_data.handshake < 0) {
+ stream->dnstls_data.handshake = gnutls_handshake(stream->dnstls_data.session);
+ if (stream->dnstls_data.handshake == GNUTLS_E_AGAIN) {
+ stream->dnstls_events = gnutls_record_get_direction(stream->dnstls_data.session) == 1 ? EPOLLOUT : EPOLLIN;
+ return -EAGAIN;
+ } else if (stream->dnstls_data.handshake < 0) {
+ log_debug("Failed to invoke gnutls_handshake: %s", gnutls_strerror(stream->dnstls_data.handshake));
+ if (gnutls_error_is_fatal(stream->dnstls_data.handshake))
+ return -ECONNREFUSED;
+ }
+
+ stream->dnstls_events = 0;
+ }
+
+ return 0;
+}
+
+int dnstls_stream_shutdown(DnsStream *stream, int error) {
+ int r;
+
+ assert(stream);
+ assert(stream->encrypted);
+ assert(stream->dnstls_data.session);
+
+ /* Store TLS Ticket for faster successive TLS handshakes */
+ if (stream->server && stream->server->dnstls_data.session_data.size == 0 && stream->dnstls_data.handshake == GNUTLS_E_SUCCESS)
+ gnutls_session_get_data2(stream->dnstls_data.session, &stream->server->dnstls_data.session_data);
+
+ if (IN_SET(error, ETIMEDOUT, 0)) {
+ r = gnutls_bye(stream->dnstls_data.session, GNUTLS_SHUT_RDWR);
+ if (r == GNUTLS_E_AGAIN) {
+ if (!stream->dnstls_data.shutdown) {
+ stream->dnstls_data.shutdown = true;
+ dns_stream_ref(stream);
+ return -EAGAIN;
+ }
+ } else if (r < 0)
+ log_debug("Failed to invoke gnutls_bye: %s", gnutls_strerror(r));
+ }
+
+ return 0;
+}
+
+ssize_t dnstls_stream_write(DnsStream *stream, const char *buf, size_t count) {
+ ssize_t ss;
+
+ assert(stream);
+ assert(stream->encrypted);
+ assert(stream->dnstls_data.session);
+ assert(buf);
+
+ ss = gnutls_record_send(stream->dnstls_data.session, buf, count);
+ if (ss < 0)
+ switch(ss) {
+ case GNUTLS_E_INTERRUPTED:
+ return -EINTR;
+ case GNUTLS_E_AGAIN:
+ return -EAGAIN;
+ default:
+ return log_debug_errno(SYNTHETIC_ERRNO(EPIPE),
+ "Failed to invoke gnutls_record_send: %s",
+ gnutls_strerror(ss));
+ }
+
+ return ss;
+}
+
+ssize_t dnstls_stream_read(DnsStream *stream, void *buf, size_t count) {
+ ssize_t ss;
+
+ assert(stream);
+ assert(stream->encrypted);
+ assert(stream->dnstls_data.session);
+ assert(buf);
+
+ ss = gnutls_record_recv(stream->dnstls_data.session, buf, count);
+ if (ss < 0)
+ switch(ss) {
+ case GNUTLS_E_INTERRUPTED:
+ return -EINTR;
+ case GNUTLS_E_AGAIN:
+ return -EAGAIN;
+ default:
+ return log_debug_errno(SYNTHETIC_ERRNO(EPIPE),
+ "Failed to invoke gnutls_record_recv: %s",
+ gnutls_strerror(ss));
+ }
+
+ return ss;
+}
+
+void dnstls_server_free(DnsServer *server) {
+ assert(server);
+
+ if (server->dnstls_data.session_data.data)
+ gnutls_free(server->dnstls_data.session_data.data);
+}
+
+int dnstls_manager_init(Manager *manager) {
+ int r;
+ assert(manager);
+
+ r = gnutls_certificate_allocate_credentials(&manager->dnstls_data.cert_cred);
+ if (r < 0)
+ return -ENOMEM;
+
+ r = gnutls_certificate_set_x509_system_trust(manager->dnstls_data.cert_cred);
+ if (r < 0)
+ log_warning("Failed to load system trust store: %s", gnutls_strerror(r));
+
+ return 0;
+}
+
+void dnstls_manager_free(Manager *manager) {
+ assert(manager);
+
+ if (manager->dnstls_data.cert_cred)
+ gnutls_certificate_free_credentials(manager->dnstls_data.cert_cred);
+}
diff --git a/src/resolve/resolved-dnstls-gnutls.h b/src/resolve/resolved-dnstls-gnutls.h
new file mode 100644
index 0000000..dc1255f
--- /dev/null
+++ b/src/resolve/resolved-dnstls-gnutls.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#if !ENABLE_DNS_OVER_TLS || !DNS_OVER_TLS_USE_GNUTLS
+#error This source file requires DNS-over-TLS to be enabled and GnuTLS to be available.
+#endif
+
+#include <gnutls/gnutls.h>
+#include <stdbool.h>
+
+struct DnsTlsManagerData {
+ gnutls_certificate_credentials_t cert_cred;
+};
+
+struct DnsTlsServerData {
+ gnutls_datum_t session_data;
+};
+
+struct DnsTlsStreamData {
+ gnutls_session_t session;
+ gnutls_typed_vdata_st validation;
+ int handshake;
+ bool shutdown;
+};
diff --git a/src/resolve/resolved-dnstls-openssl.c b/src/resolve/resolved-dnstls-openssl.c
new file mode 100644
index 0000000..defddb5
--- /dev/null
+++ b/src/resolve/resolved-dnstls-openssl.c
@@ -0,0 +1,411 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if !ENABLE_DNS_OVER_TLS || !DNS_OVER_TLS_USE_OPENSSL
+#error This source file requires DNS-over-TLS to be enabled and OpenSSL to be available.
+#endif
+
+#include <openssl/bio.h>
+#include <openssl/err.h>
+#include <openssl/x509v3.h>
+
+#include "io-util.h"
+#include "resolved-dns-stream.h"
+#include "resolved-dnstls.h"
+#include "resolved-manager.h"
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(SSL*, SSL_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(BIO*, BIO_free);
+
+static int dnstls_flush_write_buffer(DnsStream *stream) {
+ ssize_t ss;
+
+ assert(stream);
+ assert(stream->encrypted);
+
+ if (stream->dnstls_data.buffer_offset < stream->dnstls_data.write_buffer->length) {
+ assert(stream->dnstls_data.write_buffer->data);
+
+ struct iovec iov[1];
+ iov[0] = IOVEC_MAKE(stream->dnstls_data.write_buffer->data + stream->dnstls_data.buffer_offset,
+ stream->dnstls_data.write_buffer->length - stream->dnstls_data.buffer_offset);
+ ss = dns_stream_writev(stream, iov, 1, DNS_STREAM_WRITE_TLS_DATA);
+ if (ss < 0) {
+ if (ss == -EAGAIN)
+ stream->dnstls_events |= EPOLLOUT;
+
+ return ss;
+ } else {
+ stream->dnstls_data.buffer_offset += ss;
+
+ if (stream->dnstls_data.buffer_offset < stream->dnstls_data.write_buffer->length) {
+ stream->dnstls_events |= EPOLLOUT;
+ return -EAGAIN;
+ } else {
+ BIO_reset(SSL_get_wbio(stream->dnstls_data.ssl));
+ stream->dnstls_data.buffer_offset = 0;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int dnstls_stream_connect_tls(DnsStream *stream, DnsServer *server) {
+ _cleanup_(BIO_freep) BIO *rb = NULL, *wb = NULL;
+ _cleanup_(SSL_freep) SSL *s = NULL;
+ int error, r;
+
+ assert(stream);
+ assert(stream->manager);
+ assert(server);
+
+ rb = BIO_new_socket(stream->fd, 0);
+ if (!rb)
+ return -ENOMEM;
+
+ wb = BIO_new(BIO_s_mem());
+ if (!wb)
+ return -ENOMEM;
+
+ BIO_get_mem_ptr(wb, &stream->dnstls_data.write_buffer);
+ stream->dnstls_data.buffer_offset = 0;
+
+ s = SSL_new(stream->manager->dnstls_data.ctx);
+ if (!s)
+ return -ENOMEM;
+
+ SSL_set_connect_state(s);
+ r = SSL_set_session(s, server->dnstls_data.session);
+ if (r == 0)
+ return -EIO;
+ SSL_set_bio(s, TAKE_PTR(rb), TAKE_PTR(wb));
+
+ if (server->manager->dns_over_tls_mode == DNS_OVER_TLS_YES) {
+ X509_VERIFY_PARAM *v;
+
+ SSL_set_verify(s, SSL_VERIFY_PEER, NULL);
+ v = SSL_get0_param(s);
+ if (server->server_name) {
+ X509_VERIFY_PARAM_set_hostflags(v, X509_CHECK_FLAG_NO_PARTIAL_WILDCARDS);
+ if (X509_VERIFY_PARAM_set1_host(v, server->server_name, 0) == 0)
+ return -ECONNREFUSED;
+ } else {
+ const unsigned char *ip;
+ ip = server->family == AF_INET ? (const unsigned char*) &server->address.in.s_addr : server->address.in6.s6_addr;
+ if (X509_VERIFY_PARAM_set1_ip(v, ip, FAMILY_ADDRESS_SIZE(server->family)) == 0)
+ return -ECONNREFUSED;
+ }
+ }
+
+ if (server->server_name) {
+ r = SSL_set_tlsext_host_name(s, server->server_name);
+ if (r <= 0) {
+ char errbuf[256];
+
+ error = ERR_get_error();
+ ERR_error_string_n(error, errbuf, sizeof(errbuf));
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to set server name: %s", errbuf);
+ }
+ }
+
+ ERR_clear_error();
+ stream->dnstls_data.handshake = SSL_do_handshake(s);
+ if (stream->dnstls_data.handshake <= 0) {
+ error = SSL_get_error(s, stream->dnstls_data.handshake);
+ if (!IN_SET(error, SSL_ERROR_WANT_READ, SSL_ERROR_WANT_WRITE)) {
+ char errbuf[256];
+
+ ERR_error_string_n(error, errbuf, sizeof(errbuf));
+ return log_debug_errno(SYNTHETIC_ERRNO(ECONNREFUSED),
+ "Failed to invoke SSL_do_handshake: %s", errbuf);
+ }
+ }
+
+ stream->encrypted = true;
+ stream->dnstls_data.ssl = TAKE_PTR(s);
+
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0 && r != -EAGAIN) {
+ SSL_free(TAKE_PTR(stream->dnstls_data.ssl));
+ return r;
+ }
+
+ return 0;
+}
+
+void dnstls_stream_free(DnsStream *stream) {
+ assert(stream);
+ assert(stream->encrypted);
+
+ if (stream->dnstls_data.ssl)
+ SSL_free(stream->dnstls_data.ssl);
+}
+
+int dnstls_stream_on_io(DnsStream *stream, uint32_t revents) {
+ int error, r;
+
+ assert(stream);
+ assert(stream->encrypted);
+ assert(stream->dnstls_data.ssl);
+
+ /* Flush write buffer when requested by OpenSSL */
+ if ((revents & EPOLLOUT) && (stream->dnstls_events & EPOLLOUT)) {
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+ }
+
+ if (stream->dnstls_data.shutdown) {
+ ERR_clear_error();
+ r = SSL_shutdown(stream->dnstls_data.ssl);
+ if (r == 0) {
+ stream->dnstls_events = 0;
+
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+
+ return -EAGAIN;
+ } else if (r < 0) {
+ error = SSL_get_error(stream->dnstls_data.ssl, r);
+ if (IN_SET(error, SSL_ERROR_WANT_READ, SSL_ERROR_WANT_WRITE)) {
+ stream->dnstls_events = error == SSL_ERROR_WANT_READ ? EPOLLIN : EPOLLOUT;
+
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+
+ return -EAGAIN;
+ } else if (error == SSL_ERROR_SYSCALL) {
+ if (errno > 0)
+ log_debug_errno(errno, "Failed to invoke SSL_shutdown, ignoring: %m");
+ } else {
+ char errbuf[256];
+
+ ERR_error_string_n(error, errbuf, sizeof(errbuf));
+ log_debug("Failed to invoke SSL_shutdown, ignoring: %s", errbuf);
+ }
+ }
+
+ stream->dnstls_events = 0;
+ stream->dnstls_data.shutdown = false;
+
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+
+ dns_stream_unref(stream);
+ return DNSTLS_STREAM_CLOSED;
+ } else if (stream->dnstls_data.handshake <= 0) {
+ ERR_clear_error();
+ stream->dnstls_data.handshake = SSL_do_handshake(stream->dnstls_data.ssl);
+ if (stream->dnstls_data.handshake <= 0) {
+ error = SSL_get_error(stream->dnstls_data.ssl, stream->dnstls_data.handshake);
+ if (IN_SET(error, SSL_ERROR_WANT_READ, SSL_ERROR_WANT_WRITE)) {
+ stream->dnstls_events = error == SSL_ERROR_WANT_READ ? EPOLLIN : EPOLLOUT;
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+
+ return -EAGAIN;
+ } else {
+ char errbuf[256];
+
+ ERR_error_string_n(error, errbuf, sizeof(errbuf));
+ return log_debug_errno(SYNTHETIC_ERRNO(ECONNREFUSED),
+ "Failed to invoke SSL_do_handshake: %s",
+ errbuf);
+ }
+ }
+
+ stream->dnstls_events = 0;
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int dnstls_stream_shutdown(DnsStream *stream, int error) {
+ int ssl_error, r;
+ SSL_SESSION *s;
+
+ assert(stream);
+ assert(stream->encrypted);
+ assert(stream->dnstls_data.ssl);
+
+ if (stream->server) {
+ s = SSL_get1_session(stream->dnstls_data.ssl);
+ if (s) {
+ if (stream->server->dnstls_data.session)
+ SSL_SESSION_free(stream->server->dnstls_data.session);
+
+ stream->server->dnstls_data.session = s;
+ }
+ }
+
+ if (error == ETIMEDOUT) {
+ ERR_clear_error();
+ r = SSL_shutdown(stream->dnstls_data.ssl);
+ if (r == 0) {
+ if (!stream->dnstls_data.shutdown) {
+ stream->dnstls_data.shutdown = true;
+ dns_stream_ref(stream);
+ }
+
+ stream->dnstls_events = 0;
+
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+
+ return -EAGAIN;
+ } else if (r < 0) {
+ ssl_error = SSL_get_error(stream->dnstls_data.ssl, r);
+ if (IN_SET(ssl_error, SSL_ERROR_WANT_READ, SSL_ERROR_WANT_WRITE)) {
+ stream->dnstls_events = ssl_error == SSL_ERROR_WANT_READ ? EPOLLIN : EPOLLOUT;
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0 && r != -EAGAIN)
+ return r;
+
+ if (!stream->dnstls_data.shutdown) {
+ stream->dnstls_data.shutdown = true;
+ dns_stream_ref(stream);
+ }
+ return -EAGAIN;
+ } else if (ssl_error == SSL_ERROR_SYSCALL) {
+ if (errno > 0)
+ log_debug_errno(errno, "Failed to invoke SSL_shutdown, ignoring: %m");
+ } else {
+ char errbuf[256];
+
+ ERR_error_string_n(ssl_error, errbuf, sizeof(errbuf));
+ log_debug("Failed to invoke SSL_shutdown, ignoring: %s", errbuf);
+ }
+ }
+
+ stream->dnstls_events = 0;
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+ssize_t dnstls_stream_write(DnsStream *stream, const char *buf, size_t count) {
+ int error, r;
+ ssize_t ss;
+
+ assert(stream);
+ assert(stream->encrypted);
+ assert(stream->dnstls_data.ssl);
+ assert(buf);
+
+ ERR_clear_error();
+ ss = r = SSL_write(stream->dnstls_data.ssl, buf, count);
+ if (r <= 0) {
+ error = SSL_get_error(stream->dnstls_data.ssl, r);
+ if (IN_SET(error, SSL_ERROR_WANT_READ, SSL_ERROR_WANT_WRITE)) {
+ stream->dnstls_events = error == SSL_ERROR_WANT_READ ? EPOLLIN : EPOLLOUT;
+ ss = -EAGAIN;
+ } else if (error == SSL_ERROR_ZERO_RETURN) {
+ stream->dnstls_events = 0;
+ ss = 0;
+ } else {
+ char errbuf[256];
+
+ ERR_error_string_n(error, errbuf, sizeof(errbuf));
+ log_debug("Failed to invoke SSL_write: %s", errbuf);
+ stream->dnstls_events = 0;
+ ss = -EPIPE;
+ }
+ } else
+ stream->dnstls_events = 0;
+
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+
+ return ss;
+}
+
+ssize_t dnstls_stream_read(DnsStream *stream, void *buf, size_t count) {
+ int error, r;
+ ssize_t ss;
+
+ assert(stream);
+ assert(stream->encrypted);
+ assert(stream->dnstls_data.ssl);
+ assert(buf);
+
+ ERR_clear_error();
+ ss = r = SSL_read(stream->dnstls_data.ssl, buf, count);
+ if (r <= 0) {
+ error = SSL_get_error(stream->dnstls_data.ssl, r);
+ if (IN_SET(error, SSL_ERROR_WANT_READ, SSL_ERROR_WANT_WRITE)) {
+ stream->dnstls_events = error == SSL_ERROR_WANT_READ ? EPOLLIN : EPOLLOUT;
+ ss = -EAGAIN;
+ } else if (error == SSL_ERROR_ZERO_RETURN) {
+ stream->dnstls_events = 0;
+ ss = 0;
+ } else {
+ char errbuf[256];
+
+ ERR_error_string_n(error, errbuf, sizeof(errbuf));
+ log_debug("Failed to invoke SSL_read: %s", errbuf);
+ stream->dnstls_events = 0;
+ ss = -EPIPE;
+ }
+ } else
+ stream->dnstls_events = 0;
+
+ /* flush write buffer in cache of renegotiation */
+ r = dnstls_flush_write_buffer(stream);
+ if (r < 0)
+ return r;
+
+ return ss;
+}
+
+void dnstls_server_free(DnsServer *server) {
+ assert(server);
+
+ if (server->dnstls_data.session)
+ SSL_SESSION_free(server->dnstls_data.session);
+}
+
+int dnstls_manager_init(Manager *manager) {
+ int r;
+
+ assert(manager);
+
+ ERR_load_crypto_strings();
+ SSL_load_error_strings();
+
+ manager->dnstls_data.ctx = SSL_CTX_new(TLS_client_method());
+ if (!manager->dnstls_data.ctx)
+ return -ENOMEM;
+
+ r = SSL_CTX_set_min_proto_version(manager->dnstls_data.ctx, TLS1_2_VERSION);
+ if (r == 0)
+ return -EIO;
+
+ (void) SSL_CTX_set_options(manager->dnstls_data.ctx, SSL_OP_NO_COMPRESSION);
+
+ r = SSL_CTX_set_default_verify_paths(manager->dnstls_data.ctx);
+ if (r == 0)
+ return log_warning_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to load system trust store: %s",
+ ERR_error_string(ERR_get_error(), NULL));
+
+ return 0;
+}
+
+void dnstls_manager_free(Manager *manager) {
+ assert(manager);
+
+ if (manager->dnstls_data.ctx)
+ SSL_CTX_free(manager->dnstls_data.ctx);
+}
diff --git a/src/resolve/resolved-dnstls-openssl.h b/src/resolve/resolved-dnstls-openssl.h
new file mode 100644
index 0000000..a73b77b
--- /dev/null
+++ b/src/resolve/resolved-dnstls-openssl.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#if !ENABLE_DNS_OVER_TLS || !DNS_OVER_TLS_USE_OPENSSL
+#error This source file requires DNS-over-TLS to be enabled and OpenSSL to be available.
+#endif
+
+#include <openssl/ssl.h>
+#include <stdbool.h>
+
+struct DnsTlsManagerData {
+ SSL_CTX *ctx;
+};
+
+struct DnsTlsServerData {
+ SSL_SESSION *session;
+};
+
+struct DnsTlsStreamData {
+ int handshake;
+ bool shutdown;
+ SSL *ssl;
+ BUF_MEM *write_buffer;
+ size_t buffer_offset;
+};
diff --git a/src/resolve/resolved-dnstls.h b/src/resolve/resolved-dnstls.h
new file mode 100644
index 0000000..b638d61
--- /dev/null
+++ b/src/resolve/resolved-dnstls.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#if ENABLE_DNS_OVER_TLS
+
+#include <stdint.h>
+
+typedef struct DnsServer DnsServer;
+typedef struct DnsStream DnsStream;
+typedef struct DnsTlsManagerData DnsTlsManagerData;
+typedef struct DnsTlsServerData DnsTlsServerData;
+typedef struct DnsTlsStreamData DnsTlsStreamData;
+typedef struct Manager Manager;
+
+#if DNS_OVER_TLS_USE_GNUTLS
+#include "resolved-dnstls-gnutls.h"
+#elif DNS_OVER_TLS_USE_OPENSSL
+#include "resolved-dnstls-openssl.h"
+#else
+#error Unknown dependency for supporting DNS-over-TLS
+#endif
+
+#define DNSTLS_STREAM_CLOSED 1
+
+int dnstls_stream_connect_tls(DnsStream *stream, DnsServer *server);
+void dnstls_stream_free(DnsStream *stream);
+int dnstls_stream_on_io(DnsStream *stream, uint32_t revents);
+int dnstls_stream_shutdown(DnsStream *stream, int error);
+ssize_t dnstls_stream_write(DnsStream *stream, const char *buf, size_t count);
+ssize_t dnstls_stream_read(DnsStream *stream, void *buf, size_t count);
+
+void dnstls_server_free(DnsServer *server);
+
+int dnstls_manager_init(Manager *manager);
+void dnstls_manager_free(Manager *manager);
+
+#endif /* ENABLE_DNS_OVER_TLS */
diff --git a/src/resolve/resolved-etc-hosts.c b/src/resolve/resolved-etc-hosts.c
new file mode 100644
index 0000000..e784213
--- /dev/null
+++ b/src/resolve/resolved-etc-hosts.c
@@ -0,0 +1,384 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "fileio.h"
+#include "hostname-util.h"
+#include "resolved-dns-synthesize.h"
+#include "resolved-etc-hosts.h"
+#include "socket-netlink.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+/* Recheck /etc/hosts at most once every 2s */
+#define ETC_HOSTS_RECHECK_USEC (2*USEC_PER_SEC)
+
+static void etc_hosts_item_free(EtcHostsItem *item) {
+ strv_free(item->names);
+ free(item);
+}
+
+static void etc_hosts_item_by_name_free(EtcHostsItemByName *item) {
+ free(item->name);
+ free(item->addresses);
+ free(item);
+}
+
+void etc_hosts_free(EtcHosts *hosts) {
+ hosts->by_address = hashmap_free_with_destructor(hosts->by_address, etc_hosts_item_free);
+ hosts->by_name = hashmap_free_with_destructor(hosts->by_name, etc_hosts_item_by_name_free);
+ hosts->no_address = set_free_free(hosts->no_address);
+}
+
+void manager_etc_hosts_flush(Manager *m) {
+ etc_hosts_free(&m->etc_hosts);
+ m->etc_hosts_mtime = USEC_INFINITY;
+ m->etc_hosts_ino = 0;
+ m->etc_hosts_dev = 0;
+}
+
+static int parse_line(EtcHosts *hosts, unsigned nr, const char *line) {
+ _cleanup_free_ char *address_str = NULL;
+ struct in_addr_data address = {};
+ bool found = false;
+ EtcHostsItem *item;
+ int r;
+
+ assert(hosts);
+ assert(line);
+
+ r = extract_first_word(&line, &address_str, NULL, EXTRACT_RELAX);
+ if (r < 0)
+ return log_error_errno(r, "/etc/hosts:%u: failed to extract address: %m", nr);
+ assert(r > 0); /* We already checked that the line is not empty, so it should contain *something* */
+
+ r = in_addr_ifindex_from_string_auto(address_str, &address.family, &address.address, NULL);
+ if (r < 0) {
+ log_warning_errno(r, "/etc/hosts:%u: address '%s' is invalid, ignoring: %m", nr, address_str);
+ return 0;
+ }
+
+ r = in_addr_is_null(address.family, &address.address);
+ if (r < 0) {
+ log_warning_errno(r, "/etc/hosts:%u: address '%s' is invalid, ignoring: %m", nr, address_str);
+ return 0;
+ }
+ if (r > 0)
+ /* This is an 0.0.0.0 or :: item, which we assume means that we shall map the specified hostname to
+ * nothing. */
+ item = NULL;
+ else {
+ /* If this is a normal address, then simply add entry mapping it to the specified names */
+
+ item = hashmap_get(hosts->by_address, &address);
+ if (!item) {
+ r = hashmap_ensure_allocated(&hosts->by_address, &in_addr_data_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ item = new(EtcHostsItem, 1);
+ if (!item)
+ return log_oom();
+
+ *item = (EtcHostsItem) {
+ .address = address,
+ };
+
+ r = hashmap_put(hosts->by_address, &item->address, item);
+ if (r < 0) {
+ free(item);
+ return log_oom();
+ }
+ }
+ }
+
+ for (;;) {
+ _cleanup_free_ char *name = NULL;
+ EtcHostsItemByName *bn;
+
+ r = extract_first_word(&line, &name, NULL, EXTRACT_RELAX);
+ if (r < 0)
+ return log_error_errno(r, "/etc/hosts:%u: couldn't extract hostname: %m", nr);
+ if (r == 0)
+ break;
+
+ found = true;
+
+ r = dns_name_is_valid_ldh(name);
+ if (r <= 0) {
+ log_warning_errno(r, "/etc/hosts:%u: hostname \"%s\" is not valid, ignoring.", nr, name);
+ continue;
+ }
+
+ if (is_localhost(name))
+ /* Suppress the "localhost" line that is often seen */
+ continue;
+
+ if (!item) {
+ /* Optimize the case where we don't need to store any addresses, by storing
+ * only the name in a dedicated Set instead of the hashmap */
+
+ r = set_ensure_consume(&hosts->no_address, &dns_name_hash_ops, TAKE_PTR(name));
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ r = strv_extend(&item->names, name);
+ if (r < 0)
+ return log_oom();
+
+ bn = hashmap_get(hosts->by_name, name);
+ if (!bn) {
+ r = hashmap_ensure_allocated(&hosts->by_name, &dns_name_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ bn = new0(EtcHostsItemByName, 1);
+ if (!bn)
+ return log_oom();
+
+ r = hashmap_put(hosts->by_name, name, bn);
+ if (r < 0) {
+ free(bn);
+ return log_oom();
+ }
+
+ bn->name = TAKE_PTR(name);
+ }
+
+ if (!GREEDY_REALLOC(bn->addresses, bn->n_allocated, bn->n_addresses + 1))
+ return log_oom();
+
+ bn->addresses[bn->n_addresses++] = &item->address;
+ }
+
+ if (!found)
+ log_warning("/etc/hosts:%u: line is missing any hostnames", nr);
+
+ return 0;
+}
+
+int etc_hosts_parse(EtcHosts *hosts, FILE *f) {
+ _cleanup_(etc_hosts_free) EtcHosts t = {};
+ unsigned nr = 0;
+ int r;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read /etc/hosts: %m");
+ if (r == 0)
+ break;
+
+ nr++;
+
+ l = strchr(line, '#');
+ if (l)
+ *l = '\0';
+
+ l = strstrip(line);
+ if (isempty(l))
+ continue;
+
+ r = parse_line(&t, nr, l);
+ if (r < 0)
+ return r;
+ }
+
+ etc_hosts_free(hosts);
+ *hosts = t;
+ t = (EtcHosts) {}; /* prevent cleanup */
+ return 0;
+}
+
+static int manager_etc_hosts_read(Manager *m) {
+ _cleanup_fclose_ FILE *f = NULL;
+ struct stat st;
+ usec_t ts;
+ int r;
+
+ assert_se(sd_event_now(m->event, clock_boottime_or_monotonic(), &ts) >= 0);
+
+ /* See if we checked /etc/hosts recently already */
+ if (m->etc_hosts_last != USEC_INFINITY && m->etc_hosts_last + ETC_HOSTS_RECHECK_USEC > ts)
+ return 0;
+
+ m->etc_hosts_last = ts;
+
+ if (m->etc_hosts_mtime != USEC_INFINITY) {
+ if (stat("/etc/hosts", &st) < 0) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to stat /etc/hosts: %m");
+
+ manager_etc_hosts_flush(m);
+ return 0;
+ }
+
+ /* Did the mtime or ino/dev change? If not, there's no point in re-reading the file. */
+ if (timespec_load(&st.st_mtim) == m->etc_hosts_mtime &&
+ st.st_ino == m->etc_hosts_ino && st.st_dev == m->etc_hosts_dev)
+ return 0;
+ }
+
+ f = fopen("/etc/hosts", "re");
+ if (!f) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to open /etc/hosts: %m");
+
+ manager_etc_hosts_flush(m);
+ return 0;
+ }
+
+ /* Take the timestamp at the beginning of processing, so that any changes made later are read on the next
+ * invocation */
+ r = fstat(fileno(f), &st);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to fstat() /etc/hosts: %m");
+
+ r = etc_hosts_parse(&m->etc_hosts, f);
+ if (r < 0)
+ return r;
+
+ m->etc_hosts_mtime = timespec_load(&st.st_mtim);
+ m->etc_hosts_ino = st.st_ino;
+ m->etc_hosts_dev = st.st_dev;
+ m->etc_hosts_last = ts;
+
+ return 1;
+}
+
+int manager_etc_hosts_lookup(Manager *m, DnsQuestion* q, DnsAnswer **answer) {
+ bool found_a = false, found_aaaa = false;
+ struct in_addr_data k = {};
+ EtcHostsItemByName *bn;
+ DnsResourceKey *t;
+ const char *name;
+ unsigned i;
+ int r;
+
+ assert(m);
+ assert(q);
+ assert(answer);
+
+ if (!m->read_etc_hosts)
+ return 0;
+
+ (void) manager_etc_hosts_read(m);
+
+ name = dns_question_first_name(q);
+ if (!name)
+ return 0;
+
+ r = dns_name_address(name, &k.family, &k.address);
+ if (r > 0) {
+ EtcHostsItem *item;
+ DnsResourceKey *found_ptr = NULL;
+
+ item = hashmap_get(m->etc_hosts.by_address, &k);
+ if (!item)
+ return 0;
+
+ /* We have an address in /etc/hosts that matches the queried name. Let's return successful. Actual data
+ * we'll only return if the request was for PTR. */
+
+ DNS_QUESTION_FOREACH(t, q) {
+ if (!IN_SET(t->type, DNS_TYPE_PTR, DNS_TYPE_ANY))
+ continue;
+ if (!IN_SET(t->class, DNS_CLASS_IN, DNS_CLASS_ANY))
+ continue;
+
+ r = dns_name_equal(dns_resource_key_name(t), name);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ found_ptr = t;
+ break;
+ }
+ }
+
+ if (found_ptr) {
+ char **n;
+
+ r = dns_answer_reserve(answer, strv_length(item->names));
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(n, item->names) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+
+ rr = dns_resource_record_new(found_ptr);
+ if (!rr)
+ return -ENOMEM;
+
+ rr->ptr.name = strdup(*n);
+ if (!rr->ptr.name)
+ return -ENOMEM;
+
+ r = dns_answer_add(*answer, rr, 0, DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 1;
+ }
+
+ bn = hashmap_get(m->etc_hosts.by_name, name);
+ if (bn) {
+ r = dns_answer_reserve(answer, bn->n_addresses);
+ if (r < 0)
+ return r;
+ } else {
+ /* Check if name was listed with no address. If yes, continue to return an answer. */
+ if (!set_contains(m->etc_hosts.no_address, name))
+ return 0;
+ }
+
+ DNS_QUESTION_FOREACH(t, q) {
+ if (!IN_SET(t->type, DNS_TYPE_A, DNS_TYPE_AAAA, DNS_TYPE_ANY))
+ continue;
+ if (!IN_SET(t->class, DNS_CLASS_IN, DNS_CLASS_ANY))
+ continue;
+
+ r = dns_name_equal(dns_resource_key_name(t), name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (IN_SET(t->type, DNS_TYPE_A, DNS_TYPE_ANY))
+ found_a = true;
+ if (IN_SET(t->type, DNS_TYPE_AAAA, DNS_TYPE_ANY))
+ found_aaaa = true;
+
+ if (found_a && found_aaaa)
+ break;
+ }
+
+ for (i = 0; bn && i < bn->n_addresses; i++) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+
+ if ((!found_a && bn->addresses[i]->family == AF_INET) ||
+ (!found_aaaa && bn->addresses[i]->family == AF_INET6))
+ continue;
+
+ r = dns_resource_record_new_address(&rr, bn->addresses[i]->family, &bn->addresses[i]->address, bn->name);
+ if (r < 0)
+ return r;
+
+ r = dns_answer_add(*answer, rr, 0, DNS_ANSWER_AUTHENTICATED);
+ if (r < 0)
+ return r;
+ }
+
+ return found_a || found_aaaa;
+}
diff --git a/src/resolve/resolved-etc-hosts.h b/src/resolve/resolved-etc-hosts.h
new file mode 100644
index 0000000..459b310
--- /dev/null
+++ b/src/resolve/resolved-etc-hosts.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "resolved-manager.h"
+#include "resolved-dns-question.h"
+#include "resolved-dns-answer.h"
+
+typedef struct EtcHostsItem {
+ struct in_addr_data address;
+
+ char **names;
+} EtcHostsItem;
+
+typedef struct EtcHostsItemByName {
+ char *name;
+
+ struct in_addr_data **addresses;
+ size_t n_addresses, n_allocated;
+} EtcHostsItemByName;
+
+int etc_hosts_parse(EtcHosts *hosts, FILE *f);
+void etc_hosts_free(EtcHosts *hosts);
+
+void manager_etc_hosts_flush(Manager *m);
+int manager_etc_hosts_lookup(Manager *m, DnsQuestion* q, DnsAnswer **answer);
diff --git a/src/resolve/resolved-gperf.gperf b/src/resolve/resolved-gperf.gperf
new file mode 100644
index 0000000..b54fa1b
--- /dev/null
+++ b/src/resolve/resolved-gperf.gperf
@@ -0,0 +1,32 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include "conf-parser.h"
+#include "resolved-conf.h"
+#include "resolved-manager.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name resolved_gperf_hash
+%define lookup-function-name resolved_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Resolve.DNS, config_parse_dns_servers, DNS_SERVER_SYSTEM, 0
+Resolve.FallbackDNS, config_parse_dns_servers, DNS_SERVER_FALLBACK, 0
+Resolve.Domains, config_parse_search_domains, 0, 0
+Resolve.LLMNR, config_parse_resolve_support, 0, offsetof(Manager, llmnr_support)
+Resolve.MulticastDNS, config_parse_resolve_support, 0, offsetof(Manager, mdns_support)
+Resolve.DNSSEC, config_parse_dnssec_mode, 0, offsetof(Manager, dnssec_mode)
+Resolve.DNSOverTLS, config_parse_dns_over_tls_mode, 0, offsetof(Manager, dns_over_tls_mode)
+Resolve.Cache, config_parse_dns_cache_mode, DNS_CACHE_MODE_YES, offsetof(Manager, enable_cache)
+Resolve.DNSStubListener, config_parse_dns_stub_listener_mode, 0, offsetof(Manager, dns_stub_listener_mode)
+Resolve.ReadEtcHosts, config_parse_bool, 0, offsetof(Manager, read_etc_hosts)
+Resolve.ResolveUnicastSingleLabel, config_parse_bool, 0, offsetof(Manager, resolve_unicast_single_label)
+Resolve.DNSStubListenerExtra, config_parse_dns_stub_listener_extra, 0, offsetof(Manager, dns_extra_stub_listeners)
diff --git a/src/resolve/resolved-link-bus.c b/src/resolve/resolved-link-bus.c
new file mode 100644
index 0000000..6a693ff
--- /dev/null
+++ b/src/resolve/resolved-link-bus.c
@@ -0,0 +1,851 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+#include <netinet/in.h>
+#include <sys/capability.h>
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-get-properties.h"
+#include "bus-message-util.h"
+#include "bus-polkit.h"
+#include "parse-util.h"
+#include "resolve-util.h"
+#include "resolved-bus.h"
+#include "resolved-link-bus.h"
+#include "resolved-resolv-conf.h"
+#include "socket-netlink.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static BUS_DEFINE_PROPERTY_GET(property_get_dnssec_supported, "b", Link, link_dnssec_supported);
+static BUS_DEFINE_PROPERTY_GET2(property_get_dnssec_mode, "s", Link, link_get_dnssec_mode, dnssec_mode_to_string);
+
+static int property_get_dns_over_tls_mode(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Link *l = userdata;
+
+ assert(reply);
+ assert(l);
+
+ return sd_bus_message_append(reply, "s", dns_over_tls_mode_to_string(link_get_dns_over_tls_mode(l)));
+}
+
+static int property_get_dns_internal(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error,
+ bool extended) {
+
+ Link *l = userdata;
+ DnsServer *s;
+ int r;
+
+ assert(reply);
+ assert(l);
+
+ r = sd_bus_message_open_container(reply, 'a', extended ? "(iayqs)" : "(iay)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(servers, s, l->dns_servers) {
+ r = bus_dns_server_append(reply, s, false, extended);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_dns(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+ return property_get_dns_internal(bus, path, interface, property, reply, userdata, error, false);
+}
+
+static int property_get_dns_ex(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+ return property_get_dns_internal(bus, path, interface, property, reply, userdata, error, true);
+}
+
+static int property_get_current_dns_server_internal(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error,
+ bool extended) {
+
+ DnsServer *s;
+
+ assert(reply);
+ assert(userdata);
+
+ s = *(DnsServer **) userdata;
+
+ return bus_dns_server_append(reply, s, false, extended);
+}
+
+static int property_get_current_dns_server(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+ return property_get_current_dns_server_internal(bus, path, interface, property, reply, userdata, error, false);
+}
+
+static int property_get_current_dns_server_ex(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+ return property_get_current_dns_server_internal(bus, path, interface, property, reply, userdata, error, true);
+}
+
+static int property_get_domains(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Link *l = userdata;
+ DnsSearchDomain *d;
+ int r;
+
+ assert(reply);
+ assert(l);
+
+ r = sd_bus_message_open_container(reply, 'a', "(sb)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(domains, d, l->search_domains) {
+ r = sd_bus_message_append(reply, "(sb)", d->name, d->route_only);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_default_route(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Link *l = userdata;
+
+ assert(reply);
+ assert(l);
+
+ /* Return what is configured, if there's something configured */
+ if (l->default_route >= 0)
+ return sd_bus_message_append(reply, "b", l->default_route);
+
+ /* Otherwise report what is in effect */
+ if (l->unicast_scope)
+ return sd_bus_message_append(reply, "b", dns_scope_is_default_route(l->unicast_scope));
+
+ return sd_bus_message_append(reply, "b", false);
+}
+
+static int property_get_scopes_mask(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Link *l = userdata;
+ uint64_t mask;
+
+ assert(reply);
+ assert(l);
+
+ mask = (l->unicast_scope ? SD_RESOLVED_DNS : 0) |
+ (l->llmnr_ipv4_scope ? SD_RESOLVED_LLMNR_IPV4 : 0) |
+ (l->llmnr_ipv6_scope ? SD_RESOLVED_LLMNR_IPV6 : 0) |
+ (l->mdns_ipv4_scope ? SD_RESOLVED_MDNS_IPV4 : 0) |
+ (l->mdns_ipv6_scope ? SD_RESOLVED_MDNS_IPV6 : 0);
+
+ return sd_bus_message_append(reply, "t", mask);
+}
+
+static int property_get_ntas(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Link *l = userdata;
+ const char *name;
+ int r;
+
+ assert(reply);
+ assert(l);
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ SET_FOREACH(name, l->dnssec_negative_trust_anchors) {
+ r = sd_bus_message_append(reply, "s", name);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int verify_unmanaged_link(Link *l, sd_bus_error *error) {
+ assert(l);
+
+ if (l->flags & IFF_LOOPBACK)
+ return sd_bus_error_setf(error, BUS_ERROR_LINK_BUSY, "Link %s is loopback device.", l->ifname);
+ if (l->is_managed)
+ return sd_bus_error_setf(error, BUS_ERROR_LINK_BUSY, "Link %s is managed.", l->ifname);
+
+ return 0;
+}
+
+static int bus_link_method_set_dns_servers_internal(sd_bus_message *message, void *userdata, sd_bus_error *error, bool extended) {
+ struct in_addr_full **dns;
+ Link *l = userdata;
+ size_t n;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_unmanaged_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = bus_message_read_dns_servers(message, error, extended, &dns, &n);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.resolve1.set-dns-servers",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ goto finalize;
+ if (r == 0) {
+ r = 1; /* Polkit will call us back */
+ goto finalize;
+ }
+
+ dns_server_mark_all(l->dns_servers);
+
+ for (size_t i = 0; i < n; i++) {
+ DnsServer *s;
+
+ s = dns_server_find(l->dns_servers, dns[i]->family, &dns[i]->address, dns[i]->port, 0, dns[i]->server_name);
+ if (s)
+ dns_server_move_back_and_unmark(s);
+ else {
+ r = dns_server_new(l->manager, NULL, DNS_SERVER_LINK, l, dns[i]->family, &dns[i]->address, dns[i]->port, 0, dns[i]->server_name);
+ if (r < 0) {
+ dns_server_unlink_all(l->dns_servers);
+ goto finalize;
+ }
+ }
+
+ }
+
+ dns_server_unlink_marked(l->dns_servers);
+ link_allocate_scopes(l);
+
+ (void) link_save_user(l);
+ (void) manager_write_resolv_conf(l->manager);
+ (void) manager_send_changed(l->manager, "DNS");
+
+ r = sd_bus_reply_method_return(message, NULL);
+
+finalize:
+ for (size_t i = 0; i < n; i++)
+ in_addr_full_free(dns[i]);
+ free(dns);
+
+ return r;
+}
+
+int bus_link_method_set_dns_servers(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_link_method_set_dns_servers_internal(message, userdata, error, false);
+}
+
+int bus_link_method_set_dns_servers_ex(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ return bus_link_method_set_dns_servers_internal(message, userdata, error, true);
+}
+
+int bus_link_method_set_domains(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_unmanaged_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(message, 'a', "(sb)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *name;
+ int route_only;
+
+ r = sd_bus_message_read(message, "(sb)", &name, &route_only);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = dns_name_is_valid(name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid search domain %s", name);
+ if (!route_only && dns_name_is_root(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Root domain is not suitable as search domain");
+ }
+
+ r = sd_bus_message_rewind(message, false);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.resolve1.set-domains",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ dns_search_domain_mark_all(l->search_domains);
+
+ for (;;) {
+ DnsSearchDomain *d;
+ const char *name;
+ int route_only;
+
+ r = sd_bus_message_read(message, "(sb)", &name, &route_only);
+ if (r < 0)
+ goto clear;
+ if (r == 0)
+ break;
+
+ r = dns_search_domain_find(l->search_domains, name, &d);
+ if (r < 0)
+ goto clear;
+
+ if (r > 0)
+ dns_search_domain_move_back_and_unmark(d);
+ else {
+ r = dns_search_domain_new(l->manager, &d, DNS_SEARCH_DOMAIN_LINK, l, name);
+ if (r < 0)
+ goto clear;
+ }
+
+ d->route_only = route_only;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ goto clear;
+
+ dns_search_domain_unlink_marked(l->search_domains);
+
+ (void) link_save_user(l);
+ (void) manager_write_resolv_conf(l->manager);
+
+ return sd_bus_reply_method_return(message, NULL);
+
+clear:
+ dns_search_domain_unlink_all(l->search_domains);
+ return r;
+}
+
+int bus_link_method_set_default_route(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ int r, b;
+
+ assert(message);
+ assert(l);
+
+ r = verify_unmanaged_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.resolve1.set-default-route",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ if (l->default_route != b) {
+ l->default_route = b;
+
+ (void) link_save_user(l);
+ (void) manager_write_resolv_conf(l->manager);
+ }
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_set_llmnr(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ ResolveSupport mode;
+ const char *llmnr;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_unmanaged_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "s", &llmnr);
+ if (r < 0)
+ return r;
+
+ if (isempty(llmnr))
+ mode = RESOLVE_SUPPORT_YES;
+ else {
+ mode = resolve_support_from_string(llmnr);
+ if (mode < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid LLMNR setting: %s", llmnr);
+ }
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.resolve1.set-llmnr",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ l->llmnr_support = mode;
+ link_allocate_scopes(l);
+ link_add_rrs(l, false);
+
+ (void) link_save_user(l);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_set_mdns(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ ResolveSupport mode;
+ const char *mdns;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_unmanaged_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "s", &mdns);
+ if (r < 0)
+ return r;
+
+ if (isempty(mdns))
+ mode = RESOLVE_SUPPORT_NO;
+ else {
+ mode = resolve_support_from_string(mdns);
+ if (mode < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid MulticastDNS setting: %s", mdns);
+ }
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.resolve1.set-mdns",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ l->mdns_support = mode;
+ link_allocate_scopes(l);
+ link_add_rrs(l, false);
+
+ (void) link_save_user(l);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_set_dns_over_tls(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ const char *dns_over_tls;
+ DnsOverTlsMode mode;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_unmanaged_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "s", &dns_over_tls);
+ if (r < 0)
+ return r;
+
+ if (isempty(dns_over_tls))
+ mode = _DNS_OVER_TLS_MODE_INVALID;
+ else {
+ mode = dns_over_tls_mode_from_string(dns_over_tls);
+ if (mode < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid DNSOverTLS setting: %s", dns_over_tls);
+ }
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.resolve1.set-dns-over-tls",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ link_set_dns_over_tls_mode(l, mode);
+
+ (void) link_save_user(l);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_set_dnssec(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ const char *dnssec;
+ DnssecMode mode;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_unmanaged_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "s", &dnssec);
+ if (r < 0)
+ return r;
+
+ if (isempty(dnssec))
+ mode = _DNSSEC_MODE_INVALID;
+ else {
+ mode = dnssec_mode_from_string(dnssec);
+ if (mode < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid DNSSEC setting: %s", dnssec);
+ }
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.resolve1.set-dnssec",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ link_set_dnssec_mode(l, mode);
+
+ (void) link_save_user(l);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_set_dnssec_negative_trust_anchors(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_set_free_free_ Set *ns = NULL;
+ _cleanup_strv_free_ char **ntas = NULL;
+ Link *l = userdata;
+ int r;
+ char **i;
+
+ assert(message);
+ assert(l);
+
+ r = verify_unmanaged_link(l, error);
+ if (r < 0)
+ return r;
+
+ ns = set_new(&dns_name_hash_ops);
+ if (!ns)
+ return -ENOMEM;
+
+ r = sd_bus_message_read_strv(message, &ntas);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, ntas) {
+ r = dns_name_is_valid(*i);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+ "Invalid negative trust anchor domain: %s", *i);
+
+ r = set_put_strdup(&ns, *i);
+ if (r < 0)
+ return r;
+ }
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.resolve1.set-dnssec-negative-trust-anchors",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ set_free_free(l->dnssec_negative_trust_anchors);
+ l->dnssec_negative_trust_anchors = TAKE_PTR(ns);
+
+ (void) link_save_user(l);
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_link_method_revert(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Link *l = userdata;
+ int r;
+
+ assert(message);
+ assert(l);
+
+ r = verify_unmanaged_link(l, error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_polkit_async(message, CAP_NET_ADMIN,
+ "org.freedesktop.resolve1.revert",
+ NULL, true, UID_INVALID,
+ &l->manager->polkit_registry, error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* Polkit will call us back */
+
+ link_flush_settings(l);
+ link_allocate_scopes(l);
+ link_add_rrs(l, false);
+
+ (void) link_save_user(l);
+ (void) manager_write_resolv_conf(l->manager);
+ (void) manager_send_changed(l->manager, "DNS");
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+static int link_object_find(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ _cleanup_free_ char *e = NULL;
+ Manager *m = userdata;
+ Link *link;
+ int ifindex, r;
+
+ assert(bus);
+ assert(path);
+ assert(interface);
+ assert(found);
+ assert(m);
+
+ r = sd_bus_path_decode(path, "/org/freedesktop/resolve1/link", &e);
+ if (r <= 0)
+ return 0;
+
+ ifindex = parse_ifindex(e);
+ if (ifindex < 0)
+ return 0;
+
+ link = hashmap_get(m->links, INT_TO_PTR(ifindex));
+ if (!link)
+ return 0;
+
+ *found = link;
+ return 1;
+}
+
+char *link_bus_path(const Link *link) {
+ char *p, ifindex[DECIMAL_STR_MAX(link->ifindex)];
+ int r;
+
+ assert(link);
+
+ xsprintf(ifindex, "%i", link->ifindex);
+
+ r = sd_bus_path_encode("/org/freedesktop/resolve1/link", ifindex, &p);
+ if (r < 0)
+ return NULL;
+
+ return p;
+}
+
+static int link_node_enumerator(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **l = NULL;
+ Manager *m = userdata;
+ Link *link;
+ unsigned c = 0;
+
+ assert(bus);
+ assert(path);
+ assert(m);
+ assert(nodes);
+
+ l = new0(char*, hashmap_size(m->links) + 1);
+ if (!l)
+ return -ENOMEM;
+
+ HASHMAP_FOREACH(link, m->links) {
+ char *p;
+
+ p = link_bus_path(link);
+ if (!p)
+ return -ENOMEM;
+
+ l[c++] = p;
+ }
+
+ l[c] = NULL;
+ *nodes = TAKE_PTR(l);
+
+ return 1;
+}
+
+static const sd_bus_vtable link_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("ScopesMask", "t", property_get_scopes_mask, 0, 0),
+ SD_BUS_PROPERTY("DNS", "a(iay)", property_get_dns, 0, 0),
+ SD_BUS_PROPERTY("DNSEx", "a(iayqs)", property_get_dns_ex, 0, 0),
+ SD_BUS_PROPERTY("CurrentDNSServer", "(iay)", property_get_current_dns_server, offsetof(Link, current_dns_server), 0),
+ SD_BUS_PROPERTY("CurrentDNSServerEx", "(iayqs)", property_get_current_dns_server_ex, offsetof(Link, current_dns_server), 0),
+ SD_BUS_PROPERTY("Domains", "a(sb)", property_get_domains, 0, 0),
+ SD_BUS_PROPERTY("DefaultRoute", "b", property_get_default_route, 0, 0),
+ SD_BUS_PROPERTY("LLMNR", "s", bus_property_get_resolve_support, offsetof(Link, llmnr_support), 0),
+ SD_BUS_PROPERTY("MulticastDNS", "s", bus_property_get_resolve_support, offsetof(Link, mdns_support), 0),
+ SD_BUS_PROPERTY("DNSOverTLS", "s", property_get_dns_over_tls_mode, 0, 0),
+ SD_BUS_PROPERTY("DNSSEC", "s", property_get_dnssec_mode, 0, 0),
+ SD_BUS_PROPERTY("DNSSECNegativeTrustAnchors", "as", property_get_ntas, 0, 0),
+ SD_BUS_PROPERTY("DNSSECSupported", "b", property_get_dnssec_supported, 0, 0),
+
+ SD_BUS_METHOD_WITH_ARGS("SetDNS",
+ SD_BUS_ARGS("a(iay)", addresses),
+ SD_BUS_NO_RESULT,
+ bus_link_method_set_dns_servers,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("SetDNSEx",
+ SD_BUS_ARGS("a(iayqs)", addresses),
+ SD_BUS_NO_RESULT,
+ bus_link_method_set_dns_servers_ex,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("SetDomains",
+ SD_BUS_ARGS("a(sb)", domains),
+ SD_BUS_NO_RESULT,
+ bus_link_method_set_domains,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("SetDefaultRoute",
+ SD_BUS_ARGS("b", enable),
+ SD_BUS_NO_RESULT,
+ bus_link_method_set_default_route,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("SetLLMNR",
+ SD_BUS_ARGS("s", mode),
+ SD_BUS_NO_RESULT,
+ bus_link_method_set_llmnr,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("SetMulticastDNS",
+ SD_BUS_ARGS("s", mode),
+ SD_BUS_NO_RESULT,
+ bus_link_method_set_mdns,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("SetDNSOverTLS",
+ SD_BUS_ARGS("s", mode),
+ SD_BUS_NO_RESULT,
+ bus_link_method_set_dns_over_tls,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("SetDNSSEC",
+ SD_BUS_ARGS("s", mode),
+ SD_BUS_NO_RESULT,
+ bus_link_method_set_dnssec,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("SetDNSSECNegativeTrustAnchors",
+ SD_BUS_ARGS("as", names),
+ SD_BUS_NO_RESULT,
+ bus_link_method_set_dnssec_negative_trust_anchors,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_ARGS("Revert",
+ SD_BUS_NO_ARGS,
+ SD_BUS_NO_RESULT,
+ bus_link_method_revert,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_VTABLE_END
+};
+
+const BusObjectImplementation link_object = {
+ "/org/freedesktop/resolve1/link",
+ "org.freedesktop.resolve1.Link",
+ .fallback_vtables = BUS_FALLBACK_VTABLES({link_vtable, link_object_find}),
+ .node_enumerator = link_node_enumerator,
+};
diff --git a/src/resolve/resolved-link-bus.h b/src/resolve/resolved-link-bus.h
new file mode 100644
index 0000000..b882df5
--- /dev/null
+++ b/src/resolve/resolved-link-bus.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "bus-util.h"
+#include "resolved-link.h"
+
+extern const BusObjectImplementation link_object;
+
+char *link_bus_path(const Link *link);
+
+int bus_link_method_set_dns_servers(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_dns_servers_ex(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_domains(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_default_route(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_llmnr(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_mdns(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_dns_over_tls(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_dnssec(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_set_dnssec_negative_trust_anchors(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_link_method_revert(sd_bus_message *message, void *userdata, sd_bus_error *error);
diff --git a/src/resolve/resolved-link.c b/src/resolve/resolved-link.c
new file mode 100644
index 0000000..4fa4451
--- /dev/null
+++ b/src/resolve/resolved-link.c
@@ -0,0 +1,1432 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/if.h>
+#include <unistd.h>
+
+#include "sd-network.h"
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log-link.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "resolved-link.h"
+#include "resolved-llmnr.h"
+#include "resolved-mdns.h"
+#include "socket-netlink.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+
+int link_new(Manager *m, Link **ret, int ifindex) {
+ _cleanup_(link_freep) Link *l = NULL;
+ int r;
+
+ assert(m);
+ assert(ifindex > 0);
+
+ r = hashmap_ensure_allocated(&m->links, NULL);
+ if (r < 0)
+ return r;
+
+ l = new(Link, 1);
+ if (!l)
+ return -ENOMEM;
+
+ *l = (Link) {
+ .ifindex = ifindex,
+ .default_route = -1,
+ .llmnr_support = RESOLVE_SUPPORT_YES,
+ .mdns_support = RESOLVE_SUPPORT_NO,
+ .dnssec_mode = _DNSSEC_MODE_INVALID,
+ .dns_over_tls_mode = _DNS_OVER_TLS_MODE_INVALID,
+ .operstate = IF_OPER_UNKNOWN,
+ };
+
+ if (asprintf(&l->state_file, "/run/systemd/resolve/netif/%i", ifindex) < 0)
+ return -ENOMEM;
+
+ r = hashmap_put(m->links, INT_TO_PTR(ifindex), l);
+ if (r < 0)
+ return r;
+
+ l->manager = m;
+
+ if (ret)
+ *ret = l;
+ l = NULL;
+
+ return 0;
+}
+
+void link_flush_settings(Link *l) {
+ assert(l);
+
+ l->default_route = -1;
+ l->llmnr_support = RESOLVE_SUPPORT_YES;
+ l->mdns_support = RESOLVE_SUPPORT_NO;
+ l->dnssec_mode = _DNSSEC_MODE_INVALID;
+ l->dns_over_tls_mode = _DNS_OVER_TLS_MODE_INVALID;
+
+ dns_server_unlink_all(l->dns_servers);
+ dns_search_domain_unlink_all(l->search_domains);
+
+ l->dnssec_negative_trust_anchors = set_free_free(l->dnssec_negative_trust_anchors);
+}
+
+Link *link_free(Link *l) {
+ if (!l)
+ return NULL;
+
+ /* Send goodbye messages. */
+ dns_scope_announce(l->mdns_ipv4_scope, true);
+ dns_scope_announce(l->mdns_ipv6_scope, true);
+
+ link_flush_settings(l);
+
+ while (l->addresses)
+ (void) link_address_free(l->addresses);
+
+ if (l->manager)
+ hashmap_remove(l->manager->links, INT_TO_PTR(l->ifindex));
+
+ dns_scope_free(l->unicast_scope);
+ dns_scope_free(l->llmnr_ipv4_scope);
+ dns_scope_free(l->llmnr_ipv6_scope);
+ dns_scope_free(l->mdns_ipv4_scope);
+ dns_scope_free(l->mdns_ipv6_scope);
+
+ free(l->state_file);
+ free(l->ifname);
+
+ return mfree(l);
+}
+
+void link_allocate_scopes(Link *l) {
+ bool unicast_relevant;
+ int r;
+
+ assert(l);
+
+ /* If a link that used to be relevant is no longer, or a link that did not use to be relevant now becomes
+ * relevant, let's reinit the learnt global DNS server information, since we might talk to different servers
+ * now, even if they have the same addresses as before. */
+
+ unicast_relevant = link_relevant(l, AF_UNSPEC, false);
+ if (unicast_relevant != l->unicast_relevant) {
+ l->unicast_relevant = unicast_relevant;
+
+ dns_server_reset_features_all(l->manager->fallback_dns_servers);
+ dns_server_reset_features_all(l->manager->dns_servers);
+
+ /* Also, flush the global unicast scope, to deal with split horizon setups, where talking through one
+ * interface reveals different DNS zones than through others. */
+ if (l->manager->unicast_scope)
+ dns_cache_flush(&l->manager->unicast_scope->cache);
+ }
+
+ /* And now, allocate all scopes that makes sense now if we didn't have them yet, and drop those which we don't
+ * need anymore */
+
+ if (unicast_relevant && l->dns_servers) {
+ if (!l->unicast_scope) {
+ dns_server_reset_features_all(l->dns_servers);
+
+ r = dns_scope_new(l->manager, &l->unicast_scope, l, DNS_PROTOCOL_DNS, AF_UNSPEC);
+ if (r < 0)
+ log_warning_errno(r, "Failed to allocate DNS scope: %m");
+ }
+ } else
+ l->unicast_scope = dns_scope_free(l->unicast_scope);
+
+ if (link_relevant(l, AF_INET, true) &&
+ l->llmnr_support != RESOLVE_SUPPORT_NO &&
+ l->manager->llmnr_support != RESOLVE_SUPPORT_NO) {
+ if (!l->llmnr_ipv4_scope) {
+ r = dns_scope_new(l->manager, &l->llmnr_ipv4_scope, l, DNS_PROTOCOL_LLMNR, AF_INET);
+ if (r < 0)
+ log_warning_errno(r, "Failed to allocate LLMNR IPv4 scope: %m");
+ }
+ } else
+ l->llmnr_ipv4_scope = dns_scope_free(l->llmnr_ipv4_scope);
+
+ if (link_relevant(l, AF_INET6, true) &&
+ l->llmnr_support != RESOLVE_SUPPORT_NO &&
+ l->manager->llmnr_support != RESOLVE_SUPPORT_NO &&
+ socket_ipv6_is_supported()) {
+ if (!l->llmnr_ipv6_scope) {
+ r = dns_scope_new(l->manager, &l->llmnr_ipv6_scope, l, DNS_PROTOCOL_LLMNR, AF_INET6);
+ if (r < 0)
+ log_warning_errno(r, "Failed to allocate LLMNR IPv6 scope: %m");
+ }
+ } else
+ l->llmnr_ipv6_scope = dns_scope_free(l->llmnr_ipv6_scope);
+
+ if (link_relevant(l, AF_INET, true) &&
+ l->mdns_support != RESOLVE_SUPPORT_NO &&
+ l->manager->mdns_support != RESOLVE_SUPPORT_NO) {
+ if (!l->mdns_ipv4_scope) {
+ r = dns_scope_new(l->manager, &l->mdns_ipv4_scope, l, DNS_PROTOCOL_MDNS, AF_INET);
+ if (r < 0)
+ log_warning_errno(r, "Failed to allocate mDNS IPv4 scope: %m");
+ }
+ } else
+ l->mdns_ipv4_scope = dns_scope_free(l->mdns_ipv4_scope);
+
+ if (link_relevant(l, AF_INET6, true) &&
+ l->mdns_support != RESOLVE_SUPPORT_NO &&
+ l->manager->mdns_support != RESOLVE_SUPPORT_NO) {
+ if (!l->mdns_ipv6_scope) {
+ r = dns_scope_new(l->manager, &l->mdns_ipv6_scope, l, DNS_PROTOCOL_MDNS, AF_INET6);
+ if (r < 0)
+ log_warning_errno(r, "Failed to allocate mDNS IPv6 scope: %m");
+ }
+ } else
+ l->mdns_ipv6_scope = dns_scope_free(l->mdns_ipv6_scope);
+}
+
+void link_add_rrs(Link *l, bool force_remove) {
+ LinkAddress *a;
+ int r;
+
+ LIST_FOREACH(addresses, a, l->addresses)
+ link_address_add_rrs(a, force_remove);
+
+ if (!force_remove &&
+ l->mdns_support == RESOLVE_SUPPORT_YES &&
+ l->manager->mdns_support == RESOLVE_SUPPORT_YES) {
+
+ if (l->mdns_ipv4_scope) {
+ r = dns_scope_add_dnssd_services(l->mdns_ipv4_scope);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add IPv4 DNS-SD services: %m");
+ }
+
+ if (l->mdns_ipv6_scope) {
+ r = dns_scope_add_dnssd_services(l->mdns_ipv6_scope);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add IPv6 DNS-SD services: %m");
+ }
+
+ } else {
+
+ if (l->mdns_ipv4_scope) {
+ r = dns_scope_remove_dnssd_services(l->mdns_ipv4_scope);
+ if (r < 0)
+ log_warning_errno(r, "Failed to remove IPv4 DNS-SD services: %m");
+ }
+
+ if (l->mdns_ipv6_scope) {
+ r = dns_scope_remove_dnssd_services(l->mdns_ipv6_scope);
+ if (r < 0)
+ log_warning_errno(r, "Failed to remove IPv6 DNS-SD services: %m");
+ }
+ }
+}
+
+int link_process_rtnl(Link *l, sd_netlink_message *m) {
+ const char *n = NULL;
+ int r;
+
+ assert(l);
+ assert(m);
+
+ r = sd_rtnl_message_link_get_flags(m, &l->flags);
+ if (r < 0)
+ return r;
+
+ (void) sd_netlink_message_read_u32(m, IFLA_MTU, &l->mtu);
+ (void) sd_netlink_message_read_u8(m, IFLA_OPERSTATE, &l->operstate);
+
+ if (sd_netlink_message_read_string(m, IFLA_IFNAME, &n) >= 0) {
+ r = free_and_strdup(&l->ifname, n);
+ if (r < 0)
+ return r;
+ }
+
+ link_allocate_scopes(l);
+ link_add_rrs(l, false);
+
+ return 0;
+}
+
+static int link_update_dns_server_one(Link *l, const char *str) {
+ _cleanup_free_ char *name = NULL;
+ int family, ifindex, r;
+ union in_addr_union a;
+ DnsServer *s;
+ uint16_t port;
+
+ assert(l);
+ assert(str);
+
+ r = in_addr_port_ifindex_name_from_string_auto(str, &family, &a, &port, &ifindex, &name);
+ if (r < 0)
+ return r;
+
+ if (ifindex != 0 && ifindex != l->ifindex)
+ return -EINVAL;
+
+ /* By default, the port number is determined with the transaction feature level.
+ * See dns_transaction_port() and dns_server_port(). */
+ if (IN_SET(port, 53, 853))
+ port = 0;
+
+ s = dns_server_find(l->dns_servers, family, &a, port, 0, name);
+ if (s) {
+ dns_server_move_back_and_unmark(s);
+ return 0;
+ }
+
+ return dns_server_new(l->manager, NULL, DNS_SERVER_LINK, l, family, &a, port, 0, name);
+}
+
+static int link_update_dns_servers(Link *l) {
+ _cleanup_strv_free_ char **nameservers = NULL;
+ char **nameserver;
+ int r;
+
+ assert(l);
+
+ r = sd_network_link_get_dns(l->ifindex, &nameservers);
+ if (r == -ENODATA) {
+ r = 0;
+ goto clear;
+ }
+ if (r < 0)
+ goto clear;
+
+ dns_server_mark_all(l->dns_servers);
+
+ STRV_FOREACH(nameserver, nameservers) {
+ r = link_update_dns_server_one(l, *nameserver);
+ if (r < 0)
+ goto clear;
+ }
+
+ dns_server_unlink_marked(l->dns_servers);
+ return 0;
+
+clear:
+ dns_server_unlink_all(l->dns_servers);
+ return r;
+}
+
+static int link_update_default_route(Link *l) {
+ int r;
+
+ assert(l);
+
+ r = sd_network_link_get_dns_default_route(l->ifindex);
+ if (r == -ENODATA) {
+ r = 0;
+ goto clear;
+ }
+ if (r < 0)
+ goto clear;
+
+ l->default_route = r > 0;
+ return 0;
+
+clear:
+ l->default_route = -1;
+ return r;
+}
+
+static int link_update_llmnr_support(Link *l) {
+ _cleanup_free_ char *b = NULL;
+ int r;
+
+ assert(l);
+
+ r = sd_network_link_get_llmnr(l->ifindex, &b);
+ if (r == -ENODATA) {
+ r = 0;
+ goto clear;
+ }
+ if (r < 0)
+ goto clear;
+
+ l->llmnr_support = resolve_support_from_string(b);
+ if (l->llmnr_support < 0) {
+ r = -EINVAL;
+ goto clear;
+ }
+
+ return 0;
+
+clear:
+ l->llmnr_support = RESOLVE_SUPPORT_YES;
+ return r;
+}
+
+static int link_update_mdns_support(Link *l) {
+ _cleanup_free_ char *b = NULL;
+ int r;
+
+ assert(l);
+
+ r = sd_network_link_get_mdns(l->ifindex, &b);
+ if (r == -ENODATA) {
+ r = 0;
+ goto clear;
+ }
+ if (r < 0)
+ goto clear;
+
+ l->mdns_support = resolve_support_from_string(b);
+ if (l->mdns_support < 0) {
+ r = -EINVAL;
+ goto clear;
+ }
+
+ return 0;
+
+clear:
+ l->mdns_support = RESOLVE_SUPPORT_NO;
+ return r;
+}
+
+void link_set_dns_over_tls_mode(Link *l, DnsOverTlsMode mode) {
+
+ assert(l);
+
+#if ! ENABLE_DNS_OVER_TLS
+ if (mode != DNS_OVER_TLS_NO)
+ log_warning("DNS-over-TLS option for the link cannot be enabled or set to opportunistic when systemd-resolved is built without DNS-over-TLS support. Turning off DNS-over-TLS support.");
+ return;
+#endif
+
+ l->dns_over_tls_mode = mode;
+}
+
+static int link_update_dns_over_tls_mode(Link *l) {
+ _cleanup_free_ char *b = NULL;
+ int r;
+
+ assert(l);
+
+ r = sd_network_link_get_dns_over_tls(l->ifindex, &b);
+ if (r == -ENODATA) {
+ r = 0;
+ goto clear;
+ }
+ if (r < 0)
+ goto clear;
+
+ l->dns_over_tls_mode = dns_over_tls_mode_from_string(b);
+ if (l->dns_over_tls_mode < 0) {
+ r = -EINVAL;
+ goto clear;
+ }
+
+ return 0;
+
+clear:
+ l->dns_over_tls_mode = _DNS_OVER_TLS_MODE_INVALID;
+ return r;
+}
+
+void link_set_dnssec_mode(Link *l, DnssecMode mode) {
+
+ assert(l);
+
+#if ! HAVE_GCRYPT
+ if (IN_SET(mode, DNSSEC_YES, DNSSEC_ALLOW_DOWNGRADE))
+ log_warning("DNSSEC option for the link cannot be enabled or set to allow-downgrade when systemd-resolved is built without gcrypt support. Turning off DNSSEC support.");
+ return;
+#endif
+
+ if (l->dnssec_mode == mode)
+ return;
+
+ if ((l->dnssec_mode == _DNSSEC_MODE_INVALID) ||
+ (l->dnssec_mode == DNSSEC_NO && mode != DNSSEC_NO) ||
+ (l->dnssec_mode == DNSSEC_ALLOW_DOWNGRADE && mode == DNSSEC_YES)) {
+
+ /* When switching from non-DNSSEC mode to DNSSEC mode, flush the cache. Also when switching from the
+ * allow-downgrade mode to full DNSSEC mode, flush it too. */
+ if (l->unicast_scope)
+ dns_cache_flush(&l->unicast_scope->cache);
+ }
+
+ l->dnssec_mode = mode;
+}
+
+static int link_update_dnssec_mode(Link *l) {
+ _cleanup_free_ char *m = NULL;
+ DnssecMode mode;
+ int r;
+
+ assert(l);
+
+ r = sd_network_link_get_dnssec(l->ifindex, &m);
+ if (r == -ENODATA) {
+ r = 0;
+ goto clear;
+ }
+ if (r < 0)
+ goto clear;
+
+ mode = dnssec_mode_from_string(m);
+ if (mode < 0) {
+ r = -EINVAL;
+ goto clear;
+ }
+
+ link_set_dnssec_mode(l, mode);
+
+ return 0;
+
+clear:
+ l->dnssec_mode = _DNSSEC_MODE_INVALID;
+ return r;
+}
+
+static int link_update_dnssec_negative_trust_anchors(Link *l) {
+ _cleanup_strv_free_ char **ntas = NULL;
+ _cleanup_set_free_free_ Set *ns = NULL;
+ int r;
+
+ assert(l);
+
+ r = sd_network_link_get_dnssec_negative_trust_anchors(l->ifindex, &ntas);
+ if (r == -ENODATA) {
+ r = 0;
+ goto clear;
+ }
+ if (r < 0)
+ goto clear;
+
+ ns = set_new(&dns_name_hash_ops);
+ if (!ns)
+ return -ENOMEM;
+
+ r = set_put_strdupv(&ns, ntas);
+ if (r < 0)
+ return r;
+
+ set_free_free(l->dnssec_negative_trust_anchors);
+ l->dnssec_negative_trust_anchors = TAKE_PTR(ns);
+
+ return 0;
+
+clear:
+ l->dnssec_negative_trust_anchors = set_free_free(l->dnssec_negative_trust_anchors);
+ return r;
+}
+
+static int link_update_search_domain_one(Link *l, const char *name, bool route_only) {
+ DnsSearchDomain *d;
+ int r;
+
+ assert(l);
+ assert(name);
+
+ r = dns_search_domain_find(l->search_domains, name, &d);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ dns_search_domain_move_back_and_unmark(d);
+ else {
+ r = dns_search_domain_new(l->manager, &d, DNS_SEARCH_DOMAIN_LINK, l, name);
+ if (r < 0)
+ return r;
+ }
+
+ d->route_only = route_only;
+ return 0;
+}
+
+static int link_update_search_domains(Link *l) {
+ _cleanup_strv_free_ char **sdomains = NULL, **rdomains = NULL;
+ char **i;
+ int r, q;
+
+ assert(l);
+
+ r = sd_network_link_get_search_domains(l->ifindex, &sdomains);
+ if (r < 0 && r != -ENODATA)
+ goto clear;
+
+ q = sd_network_link_get_route_domains(l->ifindex, &rdomains);
+ if (q < 0 && q != -ENODATA) {
+ r = q;
+ goto clear;
+ }
+
+ if (r == -ENODATA && q == -ENODATA) {
+ /* networkd knows nothing about this interface, and that's fine. */
+ r = 0;
+ goto clear;
+ }
+
+ dns_search_domain_mark_all(l->search_domains);
+
+ STRV_FOREACH(i, sdomains) {
+ r = link_update_search_domain_one(l, *i, false);
+ if (r < 0)
+ goto clear;
+ }
+
+ STRV_FOREACH(i, rdomains) {
+ r = link_update_search_domain_one(l, *i, true);
+ if (r < 0)
+ goto clear;
+ }
+
+ dns_search_domain_unlink_marked(l->search_domains);
+ return 0;
+
+clear:
+ dns_search_domain_unlink_all(l->search_domains);
+ return r;
+}
+
+static int link_is_managed(Link *l) {
+ _cleanup_free_ char *state = NULL;
+ int r;
+
+ assert(l);
+
+ r = sd_network_link_get_setup_state(l->ifindex, &state);
+ if (r == -ENODATA)
+ return 0;
+ if (r < 0)
+ return r;
+
+ return !STR_IN_SET(state, "pending", "unmanaged");
+}
+
+static void link_read_settings(Link *l) {
+ int r;
+
+ assert(l);
+
+ /* Read settings from networkd, except when networkd is not managing this interface. */
+
+ r = link_is_managed(l);
+ if (r < 0) {
+ log_link_warning_errno(l, r, "Failed to determine whether the interface is managed: %m");
+ return;
+ }
+ if (r == 0) {
+
+ /* If this link used to be managed, but is now unmanaged, flush all our settings — but only once. */
+ if (l->is_managed)
+ link_flush_settings(l);
+
+ l->is_managed = false;
+ return;
+ }
+
+ l->is_managed = true;
+
+ r = link_update_dns_servers(l);
+ if (r < 0)
+ log_link_warning_errno(l, r, "Failed to read DNS servers for the interface, ignoring: %m");
+
+ r = link_update_llmnr_support(l);
+ if (r < 0)
+ log_link_warning_errno(l, r, "Failed to read LLMNR support for the interface, ignoring: %m");
+
+ r = link_update_mdns_support(l);
+ if (r < 0)
+ log_link_warning_errno(l, r, "Failed to read mDNS support for the interface, ignoring: %m");
+
+ r = link_update_dns_over_tls_mode(l);
+ if (r < 0)
+ log_link_warning_errno(l, r, "Failed to read DNS-over-TLS mode for the interface, ignoring: %m");
+
+ r = link_update_dnssec_mode(l);
+ if (r < 0)
+ log_link_warning_errno(l, r, "Failed to read DNSSEC mode for the interface, ignoring: %m");
+
+ r = link_update_dnssec_negative_trust_anchors(l);
+ if (r < 0)
+ log_link_warning_errno(l, r, "Failed to read DNSSEC negative trust anchors for the interface, ignoring: %m");
+
+ r = link_update_search_domains(l);
+ if (r < 0)
+ log_link_warning_errno(l, r, "Failed to read search domains for the interface, ignoring: %m");
+
+ r = link_update_default_route(l);
+ if (r < 0)
+ log_link_warning_errno(l, r, "Failed to read default route setting for the interface, proceeding anyway: %m");
+}
+
+int link_update(Link *l) {
+ int r;
+
+ assert(l);
+
+ link_read_settings(l);
+ r = link_load_user(l);
+ if (r < 0)
+ return r;
+
+ if (l->llmnr_support != RESOLVE_SUPPORT_NO) {
+ r = manager_llmnr_start(l->manager);
+ if (r < 0)
+ return r;
+ }
+
+ if (l->mdns_support != RESOLVE_SUPPORT_NO) {
+ r = manager_mdns_start(l->manager);
+ if (r < 0)
+ return r;
+ }
+
+ link_allocate_scopes(l);
+ link_add_rrs(l, false);
+
+ return 0;
+}
+
+bool link_relevant(Link *l, int family, bool local_multicast) {
+ _cleanup_free_ char *state = NULL;
+ LinkAddress *a;
+
+ assert(l);
+
+ /* A link is relevant for local multicast traffic if it isn't a loopback device, has a link
+ * beat, can do multicast and has at least one link-local (or better) IP address.
+ *
+ * A link is relevant for non-multicast traffic if it isn't a loopback device, has a link beat, and has at
+ * least one routable address. */
+
+ if (l->flags & (IFF_LOOPBACK|IFF_DORMANT))
+ return false;
+
+ if ((l->flags & (IFF_UP|IFF_LOWER_UP)) != (IFF_UP|IFF_LOWER_UP))
+ return false;
+
+ if (local_multicast) {
+ if ((l->flags & IFF_MULTICAST) != IFF_MULTICAST)
+ return false;
+ }
+
+ /* Check kernel operstate
+ * https://www.kernel.org/doc/Documentation/networking/operstates.txt */
+ if (!IN_SET(l->operstate, IF_OPER_UNKNOWN, IF_OPER_UP))
+ return false;
+
+ (void) sd_network_link_get_operational_state(l->ifindex, &state);
+ if (state && !STR_IN_SET(state, "unknown", "degraded", "degraded-carrier", "routable"))
+ return false;
+
+ LIST_FOREACH(addresses, a, l->addresses)
+ if ((family == AF_UNSPEC || a->family == family) && link_address_relevant(a, local_multicast))
+ return true;
+
+ return false;
+}
+
+LinkAddress *link_find_address(Link *l, int family, const union in_addr_union *in_addr) {
+ LinkAddress *a;
+
+ assert(l);
+
+ LIST_FOREACH(addresses, a, l->addresses)
+ if (a->family == family && in_addr_equal(family, &a->in_addr, in_addr))
+ return a;
+
+ return NULL;
+}
+
+DnsServer* link_set_dns_server(Link *l, DnsServer *s) {
+ assert(l);
+
+ if (l->current_dns_server == s)
+ return s;
+
+ if (s)
+ log_debug("Switching to DNS server %s for interface %s.", strna(dns_server_string_full(s)), l->ifname);
+
+ dns_server_unref(l->current_dns_server);
+ l->current_dns_server = dns_server_ref(s);
+
+ if (l->unicast_scope)
+ dns_cache_flush(&l->unicast_scope->cache);
+
+ return s;
+}
+
+DnsServer *link_get_dns_server(Link *l) {
+ assert(l);
+
+ if (!l->current_dns_server)
+ link_set_dns_server(l, l->dns_servers);
+
+ return l->current_dns_server;
+}
+
+void link_next_dns_server(Link *l) {
+ assert(l);
+
+ if (!l->current_dns_server)
+ return;
+
+ /* Change to the next one, but make sure to follow the linked
+ * list only if this server is actually still linked. */
+ if (l->current_dns_server->linked && l->current_dns_server->servers_next) {
+ link_set_dns_server(l, l->current_dns_server->servers_next);
+ return;
+ }
+
+ link_set_dns_server(l, l->dns_servers);
+}
+
+DnsOverTlsMode link_get_dns_over_tls_mode(Link *l) {
+ assert(l);
+
+ if (l->dns_over_tls_mode != _DNS_OVER_TLS_MODE_INVALID)
+ return l->dns_over_tls_mode;
+
+ return manager_get_dns_over_tls_mode(l->manager);
+}
+
+DnssecMode link_get_dnssec_mode(Link *l) {
+ assert(l);
+
+ if (l->dnssec_mode != _DNSSEC_MODE_INVALID)
+ return l->dnssec_mode;
+
+ return manager_get_dnssec_mode(l->manager);
+}
+
+bool link_dnssec_supported(Link *l) {
+ DnsServer *server;
+
+ assert(l);
+
+ if (link_get_dnssec_mode(l) == DNSSEC_NO)
+ return false;
+
+ server = link_get_dns_server(l);
+ if (server)
+ return dns_server_dnssec_supported(server);
+
+ return true;
+}
+
+int link_address_new(Link *l, LinkAddress **ret, int family, const union in_addr_union *in_addr) {
+ LinkAddress *a;
+
+ assert(l);
+ assert(in_addr);
+
+ a = new(LinkAddress, 1);
+ if (!a)
+ return -ENOMEM;
+
+ *a = (LinkAddress) {
+ .family = family,
+ .in_addr = *in_addr,
+ .link = l,
+ };
+
+ LIST_PREPEND(addresses, l->addresses, a);
+ l->n_addresses++;
+
+ if (ret)
+ *ret = a;
+
+ return 0;
+}
+
+LinkAddress *link_address_free(LinkAddress *a) {
+ if (!a)
+ return NULL;
+
+ if (a->link) {
+ LIST_REMOVE(addresses, a->link->addresses, a);
+
+ assert(a->link->n_addresses > 0);
+ a->link->n_addresses--;
+
+ if (a->llmnr_address_rr) {
+ if (a->family == AF_INET && a->link->llmnr_ipv4_scope)
+ dns_zone_remove_rr(&a->link->llmnr_ipv4_scope->zone, a->llmnr_address_rr);
+ else if (a->family == AF_INET6 && a->link->llmnr_ipv6_scope)
+ dns_zone_remove_rr(&a->link->llmnr_ipv6_scope->zone, a->llmnr_address_rr);
+ }
+
+ if (a->llmnr_ptr_rr) {
+ if (a->family == AF_INET && a->link->llmnr_ipv4_scope)
+ dns_zone_remove_rr(&a->link->llmnr_ipv4_scope->zone, a->llmnr_ptr_rr);
+ else if (a->family == AF_INET6 && a->link->llmnr_ipv6_scope)
+ dns_zone_remove_rr(&a->link->llmnr_ipv6_scope->zone, a->llmnr_ptr_rr);
+ }
+
+ if (a->mdns_address_rr) {
+ if (a->family == AF_INET && a->link->mdns_ipv4_scope)
+ dns_zone_remove_rr(&a->link->mdns_ipv4_scope->zone, a->mdns_address_rr);
+ else if (a->family == AF_INET6 && a->link->mdns_ipv6_scope)
+ dns_zone_remove_rr(&a->link->mdns_ipv6_scope->zone, a->mdns_address_rr);
+ }
+
+ if (a->mdns_ptr_rr) {
+ if (a->family == AF_INET && a->link->mdns_ipv4_scope)
+ dns_zone_remove_rr(&a->link->mdns_ipv4_scope->zone, a->mdns_ptr_rr);
+ else if (a->family == AF_INET6 && a->link->mdns_ipv6_scope)
+ dns_zone_remove_rr(&a->link->mdns_ipv6_scope->zone, a->mdns_ptr_rr);
+ }
+ }
+
+ dns_resource_record_unref(a->llmnr_address_rr);
+ dns_resource_record_unref(a->llmnr_ptr_rr);
+ dns_resource_record_unref(a->mdns_address_rr);
+ dns_resource_record_unref(a->mdns_ptr_rr);
+
+ return mfree(a);
+}
+
+void link_address_add_rrs(LinkAddress *a, bool force_remove) {
+ int r;
+
+ assert(a);
+
+ if (a->family == AF_INET) {
+
+ if (!force_remove &&
+ link_address_relevant(a, true) &&
+ a->link->llmnr_ipv4_scope &&
+ a->link->llmnr_support == RESOLVE_SUPPORT_YES &&
+ a->link->manager->llmnr_support == RESOLVE_SUPPORT_YES) {
+
+ if (!a->link->manager->llmnr_host_ipv4_key) {
+ a->link->manager->llmnr_host_ipv4_key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_A, a->link->manager->llmnr_hostname);
+ if (!a->link->manager->llmnr_host_ipv4_key) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (!a->llmnr_address_rr) {
+ a->llmnr_address_rr = dns_resource_record_new(a->link->manager->llmnr_host_ipv4_key);
+ if (!a->llmnr_address_rr) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ a->llmnr_address_rr->a.in_addr = a->in_addr.in;
+ a->llmnr_address_rr->ttl = LLMNR_DEFAULT_TTL;
+ }
+
+ if (!a->llmnr_ptr_rr) {
+ r = dns_resource_record_new_reverse(&a->llmnr_ptr_rr, a->family, &a->in_addr, a->link->manager->llmnr_hostname);
+ if (r < 0)
+ goto fail;
+
+ a->llmnr_ptr_rr->ttl = LLMNR_DEFAULT_TTL;
+ }
+
+ r = dns_zone_put(&a->link->llmnr_ipv4_scope->zone, a->link->llmnr_ipv4_scope, a->llmnr_address_rr, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add A record to LLMNR zone: %m");
+
+ r = dns_zone_put(&a->link->llmnr_ipv4_scope->zone, a->link->llmnr_ipv4_scope, a->llmnr_ptr_rr, false);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add IPv4 PTR record to LLMNR zone: %m");
+ } else {
+ if (a->llmnr_address_rr) {
+ if (a->link->llmnr_ipv4_scope)
+ dns_zone_remove_rr(&a->link->llmnr_ipv4_scope->zone, a->llmnr_address_rr);
+ a->llmnr_address_rr = dns_resource_record_unref(a->llmnr_address_rr);
+ }
+
+ if (a->llmnr_ptr_rr) {
+ if (a->link->llmnr_ipv4_scope)
+ dns_zone_remove_rr(&a->link->llmnr_ipv4_scope->zone, a->llmnr_ptr_rr);
+ a->llmnr_ptr_rr = dns_resource_record_unref(a->llmnr_ptr_rr);
+ }
+ }
+
+ if (!force_remove &&
+ link_address_relevant(a, true) &&
+ a->link->mdns_ipv4_scope &&
+ a->link->mdns_support == RESOLVE_SUPPORT_YES &&
+ a->link->manager->mdns_support == RESOLVE_SUPPORT_YES) {
+ if (!a->link->manager->mdns_host_ipv4_key) {
+ a->link->manager->mdns_host_ipv4_key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_A, a->link->manager->mdns_hostname);
+ if (!a->link->manager->mdns_host_ipv4_key) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (!a->mdns_address_rr) {
+ a->mdns_address_rr = dns_resource_record_new(a->link->manager->mdns_host_ipv4_key);
+ if (!a->mdns_address_rr) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ a->mdns_address_rr->a.in_addr = a->in_addr.in;
+ a->mdns_address_rr->ttl = MDNS_DEFAULT_TTL;
+ }
+
+ if (!a->mdns_ptr_rr) {
+ r = dns_resource_record_new_reverse(&a->mdns_ptr_rr, a->family, &a->in_addr, a->link->manager->mdns_hostname);
+ if (r < 0)
+ goto fail;
+
+ a->mdns_ptr_rr->ttl = MDNS_DEFAULT_TTL;
+ }
+
+ r = dns_zone_put(&a->link->mdns_ipv4_scope->zone, a->link->mdns_ipv4_scope, a->mdns_address_rr, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add A record to MDNS zone: %m");
+
+ r = dns_zone_put(&a->link->mdns_ipv4_scope->zone, a->link->mdns_ipv4_scope, a->mdns_ptr_rr, false);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add IPv4 PTR record to MDNS zone: %m");
+ } else {
+ if (a->mdns_address_rr) {
+ if (a->link->mdns_ipv4_scope)
+ dns_zone_remove_rr(&a->link->mdns_ipv4_scope->zone, a->mdns_address_rr);
+ a->mdns_address_rr = dns_resource_record_unref(a->mdns_address_rr);
+ }
+
+ if (a->mdns_ptr_rr) {
+ if (a->link->mdns_ipv4_scope)
+ dns_zone_remove_rr(&a->link->mdns_ipv4_scope->zone, a->mdns_ptr_rr);
+ a->mdns_ptr_rr = dns_resource_record_unref(a->mdns_ptr_rr);
+ }
+ }
+ }
+
+ if (a->family == AF_INET6) {
+
+ if (!force_remove &&
+ link_address_relevant(a, true) &&
+ a->link->llmnr_ipv6_scope &&
+ a->link->llmnr_support == RESOLVE_SUPPORT_YES &&
+ a->link->manager->llmnr_support == RESOLVE_SUPPORT_YES) {
+
+ if (!a->link->manager->llmnr_host_ipv6_key) {
+ a->link->manager->llmnr_host_ipv6_key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_AAAA, a->link->manager->llmnr_hostname);
+ if (!a->link->manager->llmnr_host_ipv6_key) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (!a->llmnr_address_rr) {
+ a->llmnr_address_rr = dns_resource_record_new(a->link->manager->llmnr_host_ipv6_key);
+ if (!a->llmnr_address_rr) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ a->llmnr_address_rr->aaaa.in6_addr = a->in_addr.in6;
+ a->llmnr_address_rr->ttl = LLMNR_DEFAULT_TTL;
+ }
+
+ if (!a->llmnr_ptr_rr) {
+ r = dns_resource_record_new_reverse(&a->llmnr_ptr_rr, a->family, &a->in_addr, a->link->manager->llmnr_hostname);
+ if (r < 0)
+ goto fail;
+
+ a->llmnr_ptr_rr->ttl = LLMNR_DEFAULT_TTL;
+ }
+
+ r = dns_zone_put(&a->link->llmnr_ipv6_scope->zone, a->link->llmnr_ipv6_scope, a->llmnr_address_rr, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add AAAA record to LLMNR zone: %m");
+
+ r = dns_zone_put(&a->link->llmnr_ipv6_scope->zone, a->link->llmnr_ipv6_scope, a->llmnr_ptr_rr, false);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add IPv6 PTR record to LLMNR zone: %m");
+ } else {
+ if (a->llmnr_address_rr) {
+ if (a->link->llmnr_ipv6_scope)
+ dns_zone_remove_rr(&a->link->llmnr_ipv6_scope->zone, a->llmnr_address_rr);
+ a->llmnr_address_rr = dns_resource_record_unref(a->llmnr_address_rr);
+ }
+
+ if (a->llmnr_ptr_rr) {
+ if (a->link->llmnr_ipv6_scope)
+ dns_zone_remove_rr(&a->link->llmnr_ipv6_scope->zone, a->llmnr_ptr_rr);
+ a->llmnr_ptr_rr = dns_resource_record_unref(a->llmnr_ptr_rr);
+ }
+ }
+
+ if (!force_remove &&
+ link_address_relevant(a, true) &&
+ a->link->mdns_ipv6_scope &&
+ a->link->mdns_support == RESOLVE_SUPPORT_YES &&
+ a->link->manager->mdns_support == RESOLVE_SUPPORT_YES) {
+
+ if (!a->link->manager->mdns_host_ipv6_key) {
+ a->link->manager->mdns_host_ipv6_key = dns_resource_key_new(DNS_CLASS_IN, DNS_TYPE_AAAA, a->link->manager->mdns_hostname);
+ if (!a->link->manager->mdns_host_ipv6_key) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ if (!a->mdns_address_rr) {
+ a->mdns_address_rr = dns_resource_record_new(a->link->manager->mdns_host_ipv6_key);
+ if (!a->mdns_address_rr) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ a->mdns_address_rr->aaaa.in6_addr = a->in_addr.in6;
+ a->mdns_address_rr->ttl = MDNS_DEFAULT_TTL;
+ }
+
+ if (!a->mdns_ptr_rr) {
+ r = dns_resource_record_new_reverse(&a->mdns_ptr_rr, a->family, &a->in_addr, a->link->manager->mdns_hostname);
+ if (r < 0)
+ goto fail;
+
+ a->mdns_ptr_rr->ttl = MDNS_DEFAULT_TTL;
+ }
+
+ r = dns_zone_put(&a->link->mdns_ipv6_scope->zone, a->link->mdns_ipv6_scope, a->mdns_address_rr, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add AAAA record to MDNS zone: %m");
+
+ r = dns_zone_put(&a->link->mdns_ipv6_scope->zone, a->link->mdns_ipv6_scope, a->mdns_ptr_rr, false);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add IPv6 PTR record to MDNS zone: %m");
+ } else {
+ if (a->mdns_address_rr) {
+ if (a->link->mdns_ipv6_scope)
+ dns_zone_remove_rr(&a->link->mdns_ipv6_scope->zone, a->mdns_address_rr);
+ a->mdns_address_rr = dns_resource_record_unref(a->mdns_address_rr);
+ }
+
+ if (a->mdns_ptr_rr) {
+ if (a->link->mdns_ipv6_scope)
+ dns_zone_remove_rr(&a->link->mdns_ipv6_scope->zone, a->mdns_ptr_rr);
+ a->mdns_ptr_rr = dns_resource_record_unref(a->mdns_ptr_rr);
+ }
+ }
+ }
+
+ return;
+
+fail:
+ log_debug_errno(r, "Failed to update address RRs: %m");
+}
+
+int link_address_update_rtnl(LinkAddress *a, sd_netlink_message *m) {
+ int r;
+ assert(a);
+ assert(m);
+
+ r = sd_rtnl_message_addr_get_flags(m, &a->flags);
+ if (r < 0)
+ return r;
+
+ sd_rtnl_message_addr_get_scope(m, &a->scope);
+
+ link_allocate_scopes(a->link);
+ link_add_rrs(a->link, false);
+
+ return 0;
+}
+
+bool link_address_relevant(LinkAddress *a, bool local_multicast) {
+ assert(a);
+
+ if (a->flags & (IFA_F_DEPRECATED|IFA_F_TENTATIVE))
+ return false;
+
+ if (a->scope >= (local_multicast ? RT_SCOPE_HOST : RT_SCOPE_LINK))
+ return false;
+
+ return true;
+}
+
+static bool link_needs_save(Link *l) {
+ assert(l);
+
+ /* Returns true if any of the settings where set different from the default */
+
+ if (l->is_managed)
+ return false;
+
+ if (l->llmnr_support != RESOLVE_SUPPORT_YES ||
+ l->mdns_support != RESOLVE_SUPPORT_NO ||
+ l->dnssec_mode != _DNSSEC_MODE_INVALID ||
+ l->dns_over_tls_mode != _DNS_OVER_TLS_MODE_INVALID)
+ return true;
+
+ if (l->dns_servers ||
+ l->search_domains)
+ return true;
+
+ if (!set_isempty(l->dnssec_negative_trust_anchors))
+ return true;
+
+ if (l->default_route >= 0)
+ return true;
+
+ return false;
+}
+
+int link_save_user(Link *l) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *v;
+ int r;
+
+ assert(l);
+ assert(l->state_file);
+
+ if (!link_needs_save(l)) {
+ (void) unlink(l->state_file);
+ return 0;
+ }
+
+ r = mkdir_parents(l->state_file, 0700);
+ if (r < 0)
+ goto fail;
+
+ r = fopen_temporary(l->state_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ (void) fchmod(fileno(f), 0644);
+
+ fputs("# This is private data. Do not parse.\n", f);
+
+ v = resolve_support_to_string(l->llmnr_support);
+ if (v)
+ fprintf(f, "LLMNR=%s\n", v);
+
+ v = resolve_support_to_string(l->mdns_support);
+ if (v)
+ fprintf(f, "MDNS=%s\n", v);
+
+ v = dnssec_mode_to_string(l->dnssec_mode);
+ if (v)
+ fprintf(f, "DNSSEC=%s\n", v);
+
+ if (l->default_route >= 0)
+ fprintf(f, "DEFAULT_ROUTE=%s\n", yes_no(l->default_route));
+
+ if (l->dns_servers) {
+ DnsServer *server;
+
+ fputs("SERVERS=", f);
+ LIST_FOREACH(servers, server, l->dns_servers) {
+
+ if (server != l->dns_servers)
+ fputc(' ', f);
+
+ v = dns_server_string_full(server);
+ if (!v) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fputs(v, f);
+ }
+ fputc('\n', f);
+ }
+
+ if (l->search_domains) {
+ DnsSearchDomain *domain;
+
+ fputs("DOMAINS=", f);
+ LIST_FOREACH(domains, domain, l->search_domains) {
+
+ if (domain != l->search_domains)
+ fputc(' ', f);
+
+ if (domain->route_only)
+ fputc('~', f);
+
+ fputs(DNS_SEARCH_DOMAIN_NAME(domain), f);
+ }
+ fputc('\n', f);
+ }
+
+ if (!set_isempty(l->dnssec_negative_trust_anchors)) {
+ bool space = false;
+ char *nta;
+
+ fputs("NTAS=", f);
+ SET_FOREACH(nta, l->dnssec_negative_trust_anchors) {
+
+ if (space)
+ fputc(' ', f);
+
+ fputs(nta, f);
+ space = true;
+ }
+ fputc('\n', f);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, l->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(l->state_file);
+
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return log_error_errno(r, "Failed to save link data %s: %m", l->state_file);
+}
+
+int link_load_user(Link *l) {
+ _cleanup_free_ char
+ *llmnr = NULL,
+ *mdns = NULL,
+ *dnssec = NULL,
+ *servers = NULL,
+ *domains = NULL,
+ *ntas = NULL,
+ *default_route = NULL;
+
+ ResolveSupport s;
+ const char *p;
+ int r;
+
+ assert(l);
+ assert(l->state_file);
+
+ /* Try to load only a single time */
+ if (l->loaded)
+ return 0;
+ l->loaded = true;
+
+ if (l->is_managed)
+ return 0; /* if the device is managed, then networkd is our configuration source, not the bus API */
+
+ r = parse_env_file(NULL, l->state_file,
+ "LLMNR", &llmnr,
+ "MDNS", &mdns,
+ "DNSSEC", &dnssec,
+ "SERVERS", &servers,
+ "DOMAINS", &domains,
+ "NTAS", &ntas,
+ "DEFAULT_ROUTE", &default_route);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ goto fail;
+
+ link_flush_settings(l);
+
+ /* If we can't recognize the LLMNR or MDNS setting we don't override the default */
+ s = resolve_support_from_string(llmnr);
+ if (s >= 0)
+ l->llmnr_support = s;
+
+ s = resolve_support_from_string(mdns);
+ if (s >= 0)
+ l->mdns_support = s;
+
+ r = parse_boolean(default_route);
+ if (r >= 0)
+ l->default_route = r;
+
+ /* If we can't recognize the DNSSEC setting, then set it to invalid, so that the daemon default is used. */
+ l->dnssec_mode = dnssec_mode_from_string(dnssec);
+
+ for (p = servers;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r < 0)
+ goto fail;
+ if (r == 0)
+ break;
+
+ r = link_update_dns_server_one(l, word);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to load DNS server '%s', ignoring: %m", word);
+ continue;
+ }
+ }
+
+ for (p = domains;;) {
+ _cleanup_free_ char *word = NULL;
+ const char *n;
+ bool is_route;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r < 0)
+ goto fail;
+ if (r == 0)
+ break;
+
+ is_route = word[0] == '~';
+ n = is_route ? word + 1 : word;
+
+ r = link_update_search_domain_one(l, n, is_route);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to load search domain '%s', ignoring: %m", word);
+ continue;
+ }
+ }
+
+ if (ntas) {
+ _cleanup_set_free_free_ Set *ns = NULL;
+
+ ns = set_new(&dns_name_hash_ops);
+ if (!ns) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ r = set_put_strsplit(ns, ntas, NULL, 0);
+ if (r < 0)
+ goto fail;
+
+ l->dnssec_negative_trust_anchors = TAKE_PTR(ns);
+ }
+
+ return 0;
+
+fail:
+ return log_error_errno(r, "Failed to load link data %s: %m", l->state_file);
+}
+
+void link_remove_user(Link *l) {
+ assert(l);
+ assert(l->state_file);
+
+ (void) unlink(l->state_file);
+}
+
+bool link_negative_trust_anchor_lookup(Link *l, const char *name) {
+ int r;
+
+ assert(l);
+ assert(name);
+
+ /* Checks whether the specified domain (or any of its parent domains) are listed as per-link NTA. */
+
+ for (;;) {
+ if (set_contains(l->dnssec_negative_trust_anchors, name))
+ return true;
+
+ /* And now, let's look at the parent, and check that too */
+ r = dns_name_parent(&name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+
+ return false;
+}
diff --git a/src/resolve/resolved-link.h b/src/resolve/resolved-link.h
new file mode 100644
index 0000000..26b0d13
--- /dev/null
+++ b/src/resolve/resolved-link.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-netlink.h"
+
+#include "in-addr-util.h"
+#include "ratelimit.h"
+#include "resolve-util.h"
+
+typedef struct Link Link;
+typedef struct LinkAddress LinkAddress;
+
+#include "resolved-dns-rr.h"
+#include "resolved-dns-scope.h"
+#include "resolved-dns-search-domain.h"
+#include "resolved-dns-server.h"
+
+#define LINK_SEARCH_DOMAINS_MAX 256
+#define LINK_DNS_SERVERS_MAX 256
+
+struct LinkAddress {
+ Link *link;
+
+ int family;
+ union in_addr_union in_addr;
+
+ unsigned char flags, scope;
+
+ DnsResourceRecord *llmnr_address_rr;
+ DnsResourceRecord *llmnr_ptr_rr;
+ DnsResourceRecord *mdns_address_rr;
+ DnsResourceRecord *mdns_ptr_rr;
+
+ LIST_FIELDS(LinkAddress, addresses);
+};
+
+struct Link {
+ Manager *manager;
+
+ int ifindex;
+ unsigned flags;
+
+ LIST_HEAD(LinkAddress, addresses);
+ unsigned n_addresses;
+
+ LIST_HEAD(DnsServer, dns_servers);
+ DnsServer *current_dns_server;
+ unsigned n_dns_servers;
+
+ LIST_HEAD(DnsSearchDomain, search_domains);
+ unsigned n_search_domains;
+
+ int default_route;
+
+ ResolveSupport llmnr_support;
+ ResolveSupport mdns_support;
+ DnsOverTlsMode dns_over_tls_mode;
+ DnssecMode dnssec_mode;
+ Set *dnssec_negative_trust_anchors;
+
+ DnsScope *unicast_scope;
+ DnsScope *llmnr_ipv4_scope;
+ DnsScope *llmnr_ipv6_scope;
+ DnsScope *mdns_ipv4_scope;
+ DnsScope *mdns_ipv6_scope;
+
+ bool is_managed;
+
+ char *ifname;
+ uint32_t mtu;
+ uint8_t operstate;
+
+ bool loaded;
+ char *state_file;
+
+ bool unicast_relevant;
+};
+
+int link_new(Manager *m, Link **ret, int ifindex);
+Link *link_free(Link *l);
+int link_process_rtnl(Link *l, sd_netlink_message *m);
+int link_update(Link *l);
+bool link_relevant(Link *l, int family, bool local_multicast);
+LinkAddress* link_find_address(Link *l, int family, const union in_addr_union *in_addr);
+void link_add_rrs(Link *l, bool force_remove);
+
+void link_flush_settings(Link *l);
+void link_set_dnssec_mode(Link *l, DnssecMode mode);
+void link_set_dns_over_tls_mode(Link *l, DnsOverTlsMode mode);
+void link_allocate_scopes(Link *l);
+
+DnsServer* link_set_dns_server(Link *l, DnsServer *s);
+DnsServer* link_get_dns_server(Link *l);
+void link_next_dns_server(Link *l);
+
+DnssecMode link_get_dnssec_mode(Link *l);
+bool link_dnssec_supported(Link *l);
+
+DnsOverTlsMode link_get_dns_over_tls_mode(Link *l);
+
+int link_save_user(Link *l);
+int link_load_user(Link *l);
+void link_remove_user(Link *l);
+
+int link_address_new(Link *l, LinkAddress **ret, int family, const union in_addr_union *in_addr);
+LinkAddress *link_address_free(LinkAddress *a);
+int link_address_update_rtnl(LinkAddress *a, sd_netlink_message *m);
+bool link_address_relevant(LinkAddress *l, bool local_multicast);
+void link_address_add_rrs(LinkAddress *a, bool force_remove);
+
+bool link_negative_trust_anchor_lookup(Link *l, const char *name);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Link*, link_free);
diff --git a/src/resolve/resolved-llmnr.c b/src/resolve/resolved-llmnr.c
new file mode 100644
index 0000000..2ddf088
--- /dev/null
+++ b/src/resolve/resolved-llmnr.c
@@ -0,0 +1,450 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <resolv.h>
+
+#include "errno-util.h"
+#include "fd-util.h"
+#include "resolved-llmnr.h"
+#include "resolved-manager.h"
+
+void manager_llmnr_stop(Manager *m) {
+ assert(m);
+
+ m->llmnr_ipv4_udp_event_source = sd_event_source_unref(m->llmnr_ipv4_udp_event_source);
+ m->llmnr_ipv4_udp_fd = safe_close(m->llmnr_ipv4_udp_fd);
+
+ m->llmnr_ipv6_udp_event_source = sd_event_source_unref(m->llmnr_ipv6_udp_event_source);
+ m->llmnr_ipv6_udp_fd = safe_close(m->llmnr_ipv6_udp_fd);
+
+ m->llmnr_ipv4_tcp_event_source = sd_event_source_unref(m->llmnr_ipv4_tcp_event_source);
+ m->llmnr_ipv4_tcp_fd = safe_close(m->llmnr_ipv4_tcp_fd);
+
+ m->llmnr_ipv6_tcp_event_source = sd_event_source_unref(m->llmnr_ipv6_tcp_event_source);
+ m->llmnr_ipv6_tcp_fd = safe_close(m->llmnr_ipv6_tcp_fd);
+}
+
+int manager_llmnr_start(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (m->llmnr_support == RESOLVE_SUPPORT_NO)
+ return 0;
+
+ r = manager_llmnr_ipv4_udp_fd(m);
+ if (r == -EADDRINUSE)
+ goto eaddrinuse;
+ if (r < 0)
+ return r;
+
+ r = manager_llmnr_ipv4_tcp_fd(m);
+ if (r == -EADDRINUSE)
+ goto eaddrinuse;
+ if (r < 0)
+ return r;
+
+ if (socket_ipv6_is_supported()) {
+ r = manager_llmnr_ipv6_udp_fd(m);
+ if (r == -EADDRINUSE)
+ goto eaddrinuse;
+ if (r < 0)
+ return r;
+
+ r = manager_llmnr_ipv6_tcp_fd(m);
+ if (r == -EADDRINUSE)
+ goto eaddrinuse;
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+
+eaddrinuse:
+ log_warning("Another LLMNR responder prohibits binding the socket to the same port. Turning off LLMNR support.");
+ m->llmnr_support = RESOLVE_SUPPORT_NO;
+ manager_llmnr_stop(m);
+
+ return 0;
+}
+
+static int on_llmnr_packet(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ DnsTransaction *t = NULL;
+ Manager *m = userdata;
+ DnsScope *scope;
+ int r;
+
+ assert(s);
+ assert(fd >= 0);
+ assert(m);
+
+ r = manager_recv(m, fd, DNS_PROTOCOL_LLMNR, &p);
+ if (r <= 0)
+ return r;
+
+ if (manager_our_packet(m, p))
+ return 0;
+
+ scope = manager_find_scope(m, p);
+ if (!scope) {
+ log_debug("Got LLMNR UDP packet on unknown scope. Ignoring.");
+ return 0;
+ }
+
+ if (dns_packet_validate_reply(p) > 0) {
+ log_debug("Got LLMNR UDP reply packet for id %u", DNS_PACKET_ID(p));
+
+ dns_scope_check_conflicts(scope, p);
+
+ t = hashmap_get(m->dns_transactions, UINT_TO_PTR(DNS_PACKET_ID(p)));
+ if (t)
+ dns_transaction_process_reply(t, p);
+
+ } else if (dns_packet_validate_query(p) > 0) {
+ log_debug("Got LLMNR UDP query packet for id %u", DNS_PACKET_ID(p));
+
+ dns_scope_process_query(scope, NULL, p);
+ } else
+ log_debug("Invalid LLMNR UDP packet, ignoring.");
+
+ return 0;
+}
+
+int manager_llmnr_ipv4_udp_fd(Manager *m) {
+ union sockaddr_union sa = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = htobe16(LLMNR_PORT),
+ };
+ _cleanup_close_ int s = -1;
+ int r;
+
+ assert(m);
+
+ if (m->llmnr_ipv4_udp_fd >= 0)
+ return m->llmnr_ipv4_udp_fd;
+
+ s = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s < 0)
+ return log_error_errno(errno, "LLMNR-IPv4(UDP): Failed to create socket: %m");
+
+ /* RFC 4795, section 2.5 recommends setting the TTL of UDP packets to 255. */
+ r = setsockopt_int(s, IPPROTO_IP, IP_TTL, 255);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(UDP): Failed to set IP_TTL: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_MULTICAST_TTL, 255);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(UDP): Failed to set IP_MULTICAST_TTL: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_MULTICAST_LOOP, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(UDP): Failed to set IP_MULTICAST_LOOP: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_PKTINFO, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(UDP): Failed to set IP_PKTINFO: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_RECVTTL, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(UDP): Failed to set IP_RECVTTL: %m");
+
+ /* Disable Don't-Fragment bit in the IP header */
+ r = setsockopt_int(s, IPPROTO_IP, IP_MTU_DISCOVER, IP_PMTUDISC_DONT);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(UDP): Failed to set IP_MTU_DISCOVER: %m");
+
+ /* first try to bind without SO_REUSEADDR to detect another LLMNR responder */
+ r = bind(s, &sa.sa, sizeof(sa.in));
+ if (r < 0) {
+ if (errno != EADDRINUSE)
+ return log_error_errno(errno, "LLMNR-IPv4(UDP): Failed to bind socket: %m");
+
+ log_warning("LLMNR-IPv4(UDP): There appears to be another LLMNR responder running, or previously systemd-resolved crashed with some outstanding transfers.");
+
+ /* try again with SO_REUSEADDR */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(UDP): Failed to set SO_REUSEADDR: %m");
+
+ r = bind(s, &sa.sa, sizeof(sa.in));
+ if (r < 0)
+ return log_error_errno(errno, "LLMNR-IPv4(UDP): Failed to bind socket: %m");
+ } else {
+ /* enable SO_REUSEADDR for the case that the user really wants multiple LLMNR responders */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(UDP): Failed to set SO_REUSEADDR: %m");
+ }
+
+ r = sd_event_add_io(m->event, &m->llmnr_ipv4_udp_event_source, s, EPOLLIN, on_llmnr_packet, m);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(UDP): Failed to create event source: %m");
+
+ (void) sd_event_source_set_description(m->llmnr_ipv4_udp_event_source, "llmnr-ipv4-udp");
+
+ return m->llmnr_ipv4_udp_fd = TAKE_FD(s);
+}
+
+int manager_llmnr_ipv6_udp_fd(Manager *m) {
+ union sockaddr_union sa = {
+ .in6.sin6_family = AF_INET6,
+ .in6.sin6_port = htobe16(LLMNR_PORT),
+ };
+ _cleanup_close_ int s = -1;
+ int r;
+
+ assert(m);
+
+ if (m->llmnr_ipv6_udp_fd >= 0)
+ return m->llmnr_ipv6_udp_fd;
+
+ s = socket(AF_INET6, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s < 0)
+ return log_error_errno(errno, "LLMNR-IPv6(UDP): Failed to create socket: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 255);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(UDP): Failed to set IPV6_UNICAST_HOPS: %m");
+
+ /* RFC 4795, section 2.5 recommends setting the TTL of UDP packets to 255. */
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, 255);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(UDP): Failed to set IPV6_MULTICAST_HOPS: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(UDP): Failed to set IPV6_MULTICAST_LOOP: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_V6ONLY, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(UDP): Failed to set IPV6_V6ONLY: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_RECVPKTINFO, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(UDP): Failed to set IPV6_RECVPKTINFO: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(UDP): Failed to set IPV6_RECVHOPLIMIT: %m");
+
+ /* first try to bind without SO_REUSEADDR to detect another LLMNR responder */
+ r = bind(s, &sa.sa, sizeof(sa.in6));
+ if (r < 0) {
+ if (errno != EADDRINUSE)
+ return log_error_errno(errno, "LLMNR-IPv6(UDP): Failed to bind socket: %m");
+
+ log_warning("LLMNR-IPv6(UDP): There appears to be another LLMNR responder running, or previously systemd-resolved crashed with some outstanding transfers.");
+
+ /* try again with SO_REUSEADDR */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(UDP): Failed to set SO_REUSEADDR: %m");
+
+ r = bind(s, &sa.sa, sizeof(sa.in6));
+ if (r < 0)
+ return log_error_errno(errno, "LLMNR-IPv6(UDP): Failed to bind socket: %m");
+ } else {
+ /* enable SO_REUSEADDR for the case that the user really wants multiple LLMNR responders */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(UDP): Failed to set SO_REUSEADDR: %m");
+ }
+
+ r = sd_event_add_io(m->event, &m->llmnr_ipv6_udp_event_source, s, EPOLLIN, on_llmnr_packet, m);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(UDP): Failed to create event source: %m");
+
+ (void) sd_event_source_set_description(m->llmnr_ipv6_udp_event_source, "llmnr-ipv6-udp");
+
+ return m->llmnr_ipv6_udp_fd = TAKE_FD(s);
+}
+
+static int on_llmnr_stream_packet(DnsStream *s) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ DnsScope *scope;
+
+ assert(s);
+
+ p = dns_stream_take_read_packet(s);
+ assert(p);
+
+ scope = manager_find_scope(s->manager, p);
+ if (!scope)
+ log_debug("Got LLMNR TCP packet on unknown scope. Ignoring.");
+ else if (dns_packet_validate_query(p) > 0) {
+ log_debug("Got LLMNR TCP query packet for id %u", DNS_PACKET_ID(p));
+
+ dns_scope_process_query(scope, s, p);
+ } else
+ log_debug("Invalid LLMNR TCP packet, ignoring.");
+
+ dns_stream_unref(s);
+ return 0;
+}
+
+static int on_llmnr_stream(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ DnsStream *stream;
+ Manager *m = userdata;
+ int cfd, r;
+
+ cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (cfd < 0) {
+ if (ERRNO_IS_ACCEPT_AGAIN(errno))
+ return 0;
+
+ return -errno;
+ }
+
+ r = dns_stream_new(m, &stream, DNS_STREAM_LLMNR_RECV, DNS_PROTOCOL_LLMNR, cfd, NULL);
+ if (r < 0) {
+ safe_close(cfd);
+ return r;
+ }
+
+ stream->on_packet = on_llmnr_stream_packet;
+ /* We don't configure a "complete" handler here, we rely on the default handler than simply drops the
+ * reference to the stream, thus freeing it */
+ return 0;
+}
+
+int manager_llmnr_ipv4_tcp_fd(Manager *m) {
+ union sockaddr_union sa = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = htobe16(LLMNR_PORT),
+ };
+ _cleanup_close_ int s = -1;
+ int r;
+
+ assert(m);
+
+ if (m->llmnr_ipv4_tcp_fd >= 0)
+ return m->llmnr_ipv4_tcp_fd;
+
+ s = socket(AF_INET, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s < 0)
+ return log_error_errno(errno, "LLMNR-IPv4(TCP): Failed to create socket: %m");
+
+ /* RFC 4795, section 2.5. requires setting the TTL of TCP streams to 1 */
+ r = setsockopt_int(s, IPPROTO_IP, IP_TTL, 1);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(TCP): Failed to set IP_TTL: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_PKTINFO, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(TCP): Failed to set IP_PKTINFO: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_RECVTTL, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(TCP): Failed to set IP_RECVTTL: %m");
+
+ /* Disable Don't-Fragment bit in the IP header */
+ r = setsockopt_int(s, IPPROTO_IP, IP_MTU_DISCOVER, IP_PMTUDISC_DONT);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(TCP): Failed to set IP_MTU_DISCOVER: %m");
+
+ /* first try to bind without SO_REUSEADDR to detect another LLMNR responder */
+ r = bind(s, &sa.sa, sizeof(sa.in));
+ if (r < 0) {
+ if (errno != EADDRINUSE)
+ return log_error_errno(errno, "LLMNR-IPv4(TCP): Failed to bind socket: %m");
+
+ log_warning("LLMNR-IPv4(TCP): There appears to be another LLMNR responder running, or previously systemd-resolved crashed with some outstanding transfers.");
+
+ /* try again with SO_REUSEADDR */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(TCP): Failed to set SO_REUSEADDR: %m");
+
+ r = bind(s, &sa.sa, sizeof(sa.in));
+ if (r < 0)
+ return log_error_errno(errno, "LLMNR-IPv4(TCP): Failed to bind socket: %m");
+ } else {
+ /* enable SO_REUSEADDR for the case that the user really wants multiple LLMNR responders */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(TCP): Failed to set SO_REUSEADDR: %m");
+ }
+
+ r = listen(s, SOMAXCONN);
+ if (r < 0)
+ return log_error_errno(errno, "LLMNR-IPv4(TCP): Failed to listen the stream: %m");
+
+ r = sd_event_add_io(m->event, &m->llmnr_ipv4_tcp_event_source, s, EPOLLIN, on_llmnr_stream, m);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv4(TCP): Failed to create event source: %m");
+
+ (void) sd_event_source_set_description(m->llmnr_ipv4_tcp_event_source, "llmnr-ipv4-tcp");
+
+ return m->llmnr_ipv4_tcp_fd = TAKE_FD(s);
+}
+
+int manager_llmnr_ipv6_tcp_fd(Manager *m) {
+ union sockaddr_union sa = {
+ .in6.sin6_family = AF_INET6,
+ .in6.sin6_port = htobe16(LLMNR_PORT),
+ };
+ _cleanup_close_ int s = -1;
+ int r;
+
+ assert(m);
+
+ if (m->llmnr_ipv6_tcp_fd >= 0)
+ return m->llmnr_ipv6_tcp_fd;
+
+ s = socket(AF_INET6, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s < 0)
+ return log_error_errno(errno, "LLMNR-IPv6(TCP): Failed to create socket: %m");
+
+ /* RFC 4795, section 2.5. requires setting the TTL of TCP streams to 1 */
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 1);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(TCP): Failed to set IPV6_UNICAST_HOPS: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_V6ONLY, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(TCP): Failed to set IPV6_V6ONLY: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_RECVPKTINFO, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(TCP): Failed to set IPV6_RECVPKTINFO: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(TCP): Failed to set IPV6_RECVHOPLIMIT: %m");
+
+ /* first try to bind without SO_REUSEADDR to detect another LLMNR responder */
+ r = bind(s, &sa.sa, sizeof(sa.in6));
+ if (r < 0) {
+ if (errno != EADDRINUSE)
+ return log_error_errno(errno, "LLMNR-IPv6(TCP): Failed to bind socket: %m");
+
+ log_warning("LLMNR-IPv6(TCP): There appears to be another LLMNR responder running, or previously systemd-resolved crashed with some outstanding transfers.");
+
+ /* try again with SO_REUSEADDR */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(TCP): Failed to set SO_REUSEADDR: %m");
+
+ r = bind(s, &sa.sa, sizeof(sa.in6));
+ if (r < 0)
+ return log_error_errno(errno, "LLMNR-IPv6(TCP): Failed to bind socket: %m");
+ } else {
+ /* enable SO_REUSEADDR for the case that the user really wants multiple LLMNR responders */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(TCP): Failed to set SO_REUSEADDR: %m");
+ }
+
+ r = listen(s, SOMAXCONN);
+ if (r < 0)
+ return log_error_errno(errno, "LLMNR-IPv6(TCP): Failed to listen the stream: %m");
+
+ r = sd_event_add_io(m->event, &m->llmnr_ipv6_tcp_event_source, s, EPOLLIN, on_llmnr_stream, m);
+ if (r < 0)
+ return log_error_errno(r, "LLMNR-IPv6(TCP): Failed to create event source: %m");
+
+ (void) sd_event_source_set_description(m->llmnr_ipv6_tcp_event_source, "llmnr-ipv6-tcp");
+
+ return m->llmnr_ipv6_tcp_fd = TAKE_FD(s);
+}
diff --git a/src/resolve/resolved-llmnr.h b/src/resolve/resolved-llmnr.h
new file mode 100644
index 0000000..4cdd260
--- /dev/null
+++ b/src/resolve/resolved-llmnr.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "resolved-manager.h"
+
+#define LLMNR_PORT 5355
+
+int manager_llmnr_ipv4_udp_fd(Manager *m);
+int manager_llmnr_ipv6_udp_fd(Manager *m);
+int manager_llmnr_ipv4_tcp_fd(Manager *m);
+int manager_llmnr_ipv6_tcp_fd(Manager *m);
+
+void manager_llmnr_stop(Manager *m);
+int manager_llmnr_start(Manager *m);
diff --git a/src/resolve/resolved-manager.c b/src/resolve/resolved-manager.c
new file mode 100644
index 0000000..7690eac
--- /dev/null
+++ b/src/resolve/resolved-manager.c
@@ -0,0 +1,1551 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <netinet/in.h>
+#include <poll.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "bus-polkit.h"
+#include "dirent-util.h"
+#include "dns-domain.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hostname-util.h"
+#include "idn-util.h"
+#include "io-util.h"
+#include "missing_network.h"
+#include "netlink-util.h"
+#include "network-internal.h"
+#include "ordered-set.h"
+#include "parse-util.h"
+#include "random-util.h"
+#include "resolved-bus.h"
+#include "resolved-conf.h"
+#include "resolved-dns-stub.h"
+#include "resolved-dnssd.h"
+#include "resolved-etc-hosts.h"
+#include "resolved-llmnr.h"
+#include "resolved-manager.h"
+#include "resolved-mdns.h"
+#include "resolved-resolv-conf.h"
+#include "resolved-varlink.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "utf8.h"
+
+#define SEND_TIMEOUT_USEC (200 * USEC_PER_MSEC)
+
+static int manager_process_link(sd_netlink *rtnl, sd_netlink_message *mm, void *userdata) {
+ Manager *m = userdata;
+ uint16_t type;
+ Link *l;
+ int ifindex, r;
+
+ assert(rtnl);
+ assert(m);
+ assert(mm);
+
+ r = sd_netlink_message_get_type(mm, &type);
+ if (r < 0)
+ goto fail;
+
+ r = sd_rtnl_message_link_get_ifindex(mm, &ifindex);
+ if (r < 0)
+ goto fail;
+
+ l = hashmap_get(m->links, INT_TO_PTR(ifindex));
+
+ switch (type) {
+
+ case RTM_NEWLINK:{
+ bool is_new = !l;
+
+ if (!l) {
+ r = link_new(m, &l, ifindex);
+ if (r < 0)
+ goto fail;
+ }
+
+ r = link_process_rtnl(l, mm);
+ if (r < 0)
+ goto fail;
+
+ r = link_update(l);
+ if (r < 0)
+ goto fail;
+
+ if (is_new)
+ log_debug("Found new link %i/%s", ifindex, l->ifname);
+
+ break;
+ }
+
+ case RTM_DELLINK:
+ if (l) {
+ log_debug("Removing link %i/%s", l->ifindex, l->ifname);
+ link_remove_user(l);
+ link_free(l);
+ }
+
+ break;
+ }
+
+ return 0;
+
+fail:
+ log_warning_errno(r, "Failed to process RTNL link message: %m");
+ return 0;
+}
+
+static int manager_process_address(sd_netlink *rtnl, sd_netlink_message *mm, void *userdata) {
+ Manager *m = userdata;
+ union in_addr_union address;
+ uint16_t type;
+ int r, ifindex, family;
+ LinkAddress *a;
+ Link *l;
+
+ assert(rtnl);
+ assert(mm);
+ assert(m);
+
+ r = sd_netlink_message_get_type(mm, &type);
+ if (r < 0)
+ goto fail;
+
+ r = sd_rtnl_message_addr_get_ifindex(mm, &ifindex);
+ if (r < 0)
+ goto fail;
+
+ l = hashmap_get(m->links, INT_TO_PTR(ifindex));
+ if (!l)
+ return 0;
+
+ r = sd_rtnl_message_addr_get_family(mm, &family);
+ if (r < 0)
+ goto fail;
+
+ switch (family) {
+
+ case AF_INET:
+ r = sd_netlink_message_read_in_addr(mm, IFA_LOCAL, &address.in);
+ if (r < 0) {
+ r = sd_netlink_message_read_in_addr(mm, IFA_ADDRESS, &address.in);
+ if (r < 0)
+ goto fail;
+ }
+
+ break;
+
+ case AF_INET6:
+ r = sd_netlink_message_read_in6_addr(mm, IFA_LOCAL, &address.in6);
+ if (r < 0) {
+ r = sd_netlink_message_read_in6_addr(mm, IFA_ADDRESS, &address.in6);
+ if (r < 0)
+ goto fail;
+ }
+
+ break;
+
+ default:
+ return 0;
+ }
+
+ a = link_find_address(l, family, &address);
+
+ switch (type) {
+
+ case RTM_NEWADDR:
+
+ if (!a) {
+ r = link_address_new(l, &a, family, &address);
+ if (r < 0)
+ return r;
+ }
+
+ r = link_address_update_rtnl(a, mm);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case RTM_DELADDR:
+ link_address_free(a);
+ break;
+ }
+
+ return 0;
+
+fail:
+ log_warning_errno(r, "Failed to process RTNL address message: %m");
+ return 0;
+}
+
+static int manager_rtnl_listen(Manager *m) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ sd_netlink_message *i;
+ int r;
+
+ assert(m);
+
+ /* First, subscribe to interfaces coming and going */
+ r = sd_netlink_open(&m->rtnl);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_attach_event(m->rtnl, m->event, SD_EVENT_PRIORITY_IMPORTANT);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_NEWLINK, manager_process_link, NULL, m, "resolve-NEWLINK");
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_DELLINK, manager_process_link, NULL, m, "resolve-DELLINK");
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_NEWADDR, manager_process_address, NULL, m, "resolve-NEWADDR");
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_add_match(m->rtnl, NULL, RTM_DELADDR, manager_process_address, NULL, m, "resolve-DELADDR");
+ if (r < 0)
+ return r;
+
+ /* Then, enumerate all links */
+ r = sd_rtnl_message_new_link(m->rtnl, &req, RTM_GETLINK, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(m->rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ for (i = reply; i; i = sd_netlink_message_next(i)) {
+ r = manager_process_link(m->rtnl, i, m);
+ if (r < 0)
+ return r;
+ }
+
+ req = sd_netlink_message_unref(req);
+ reply = sd_netlink_message_unref(reply);
+
+ /* Finally, enumerate all addresses, too */
+ r = sd_rtnl_message_new_addr(m->rtnl, &req, RTM_GETADDR, 0, AF_UNSPEC);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(m->rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ for (i = reply; i; i = sd_netlink_message_next(i)) {
+ r = manager_process_address(m->rtnl, i, m);
+ if (r < 0)
+ return r;
+ }
+
+ return r;
+}
+
+static int on_network_event(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+ Link *l;
+ int r;
+
+ assert(m);
+
+ sd_network_monitor_flush(m->network_monitor);
+
+ HASHMAP_FOREACH(l, m->links) {
+ r = link_update(l);
+ if (r < 0)
+ log_warning_errno(r, "Failed to update monitor information for %i: %m", l->ifindex);
+ }
+
+ (void) manager_write_resolv_conf(m);
+ (void) manager_send_changed(m, "DNS");
+
+ return 0;
+}
+
+static int manager_network_monitor_listen(Manager *m) {
+ int r, fd, events;
+
+ assert(m);
+
+ r = sd_network_monitor_new(&m->network_monitor, NULL);
+ if (r < 0)
+ return r;
+
+ fd = sd_network_monitor_get_fd(m->network_monitor);
+ if (fd < 0)
+ return fd;
+
+ events = sd_network_monitor_get_events(m->network_monitor);
+ if (events < 0)
+ return events;
+
+ r = sd_event_add_io(m->event, &m->network_event_source, fd, events, &on_network_event, m);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(m->network_event_source, SD_EVENT_PRIORITY_IMPORTANT+5);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(m->network_event_source, "network-monitor");
+
+ return 0;
+}
+
+static int determine_hostname(char **full_hostname, char **llmnr_hostname, char **mdns_hostname) {
+ _cleanup_free_ char *h = NULL, *n = NULL;
+#if HAVE_LIBIDN2
+ _cleanup_free_ char *utf8 = NULL;
+#elif HAVE_LIBIDN
+ int k;
+#endif
+ char label[DNS_LABEL_MAX];
+ const char *p, *decoded;
+ int r;
+
+ assert(full_hostname);
+ assert(llmnr_hostname);
+ assert(mdns_hostname);
+
+ /* Extract and normalize the first label of the locally configured hostname, and check it's not "localhost". */
+
+ r = gethostname_strict(&h);
+ if (r < 0)
+ return log_debug_errno(r, "Can't determine system hostname: %m");
+
+ p = h;
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to unescape hostname: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Couldn't find a single label in hostname.");
+
+#if HAVE_LIBIDN || HAVE_LIBIDN2
+ r = dlopen_idn();
+ if (r < 0) {
+ log_debug_errno(r, "Failed to initialize IDN support, ignoring: %m");
+ decoded = label; /* no decoding */
+ } else
+#endif
+ {
+#if HAVE_LIBIDN2
+ r = sym_idn2_to_unicode_8z8z(label, &utf8, 0);
+ if (r != IDN2_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EUCLEAN),
+ "Failed to undo IDNA: %s", sym_idn2_strerror(r));
+ assert(utf8_is_valid(utf8));
+
+ r = strlen(utf8);
+ decoded = utf8;
+#elif HAVE_LIBIDN
+ k = dns_label_undo_idna(label, r, label, sizeof label);
+ if (k < 0)
+ return log_error_errno(k, "Failed to undo IDNA: %m");
+ if (k > 0)
+ r = k;
+
+ if (!utf8_is_valid(label))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "System hostname is not UTF-8 clean.");
+ decoded = label;
+#else
+ decoded = label; /* no decoding */
+#endif
+ }
+
+ r = dns_label_escape_new(decoded, r, &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to escape hostname: %m");
+
+ if (is_localhost(n))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "System hostname is 'localhost', ignoring.");
+
+ r = dns_name_concat(n, "local", 0, mdns_hostname);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine mDNS hostname: %m");
+
+ *llmnr_hostname = TAKE_PTR(n);
+ *full_hostname = TAKE_PTR(h);
+
+ return 0;
+}
+
+static const char *fallback_hostname(void) {
+
+ /* Determine the fall back hostname. For exposing this system to the outside world, we cannot have it to be
+ * "localhost" even if that's the compiled in hostname. In this case, let's revert to "linux" instead. */
+
+ if (is_localhost(FALLBACK_HOSTNAME))
+ return "linux";
+
+ return FALLBACK_HOSTNAME;
+}
+
+static int make_fallback_hostnames(char **full_hostname, char **llmnr_hostname, char **mdns_hostname) {
+ _cleanup_free_ char *n = NULL, *m = NULL;
+ char label[DNS_LABEL_MAX], *h;
+ const char *p;
+ int r;
+
+ assert(full_hostname);
+ assert(llmnr_hostname);
+ assert(mdns_hostname);
+
+ p = fallback_hostname();
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to unescape fallback hostname: %m");
+
+ assert(r > 0); /* The fallback hostname must have at least one label */
+
+ r = dns_label_escape_new(label, r, &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to escape fallback hostname: %m");
+
+ r = dns_name_concat(n, "local", 0, &m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to concatenate mDNS hostname: %m");
+
+ h = strdup(fallback_hostname());
+ if (!h)
+ return log_oom();
+
+ *llmnr_hostname = TAKE_PTR(n);
+ *mdns_hostname = TAKE_PTR(m);
+
+ *full_hostname = h;
+
+ return 0;
+}
+
+static int on_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ _cleanup_free_ char *full_hostname = NULL, *llmnr_hostname = NULL, *mdns_hostname = NULL;
+ Manager *m = userdata;
+ bool llmnr_hostname_changed;
+ int r;
+
+ assert(m);
+
+ r = determine_hostname(&full_hostname, &llmnr_hostname, &mdns_hostname);
+ if (r < 0)
+ return 0; /* ignore invalid hostnames */
+
+ llmnr_hostname_changed = !streq(llmnr_hostname, m->llmnr_hostname);
+ if (streq(full_hostname, m->full_hostname) &&
+ !llmnr_hostname_changed &&
+ streq(mdns_hostname, m->mdns_hostname))
+ return 0;
+
+ log_info("System hostname changed to '%s'.", full_hostname);
+
+ free_and_replace(m->full_hostname, full_hostname);
+ free_and_replace(m->llmnr_hostname, llmnr_hostname);
+ free_and_replace(m->mdns_hostname, mdns_hostname);
+
+ manager_refresh_rrs(m);
+ (void) manager_send_changed(m, "LLMNRHostname");
+
+ return 0;
+}
+
+static int manager_watch_hostname(Manager *m) {
+ int r;
+
+ assert(m);
+
+ m->hostname_fd = open("/proc/sys/kernel/hostname",
+ O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (m->hostname_fd < 0) {
+ log_warning_errno(errno, "Failed to watch hostname: %m");
+ return 0;
+ }
+
+ r = sd_event_add_io(m->event, &m->hostname_event_source, m->hostname_fd, 0, on_hostname_change, m);
+ if (r < 0) {
+ if (r == -EPERM)
+ /* kernels prior to 3.2 don't support polling this file. Ignore the failure. */
+ m->hostname_fd = safe_close(m->hostname_fd);
+ else
+ return log_error_errno(r, "Failed to add hostname event source: %m");
+ }
+
+ (void) sd_event_source_set_description(m->hostname_event_source, "hostname");
+
+ r = determine_hostname(&m->full_hostname, &m->llmnr_hostname, &m->mdns_hostname);
+ if (r < 0) {
+ log_info("Defaulting to hostname '%s'.", fallback_hostname());
+
+ r = make_fallback_hostnames(&m->full_hostname, &m->llmnr_hostname, &m->mdns_hostname);
+ if (r < 0)
+ return r;
+ } else
+ log_info("Using system hostname '%s'.", m->full_hostname);
+
+ return 0;
+}
+
+static int manager_sigusr1(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ _cleanup_free_ char *buffer = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ Manager *m = userdata;
+ DnsServer *server;
+ size_t size = 0;
+ DnsScope *scope;
+ Link *l;
+
+ assert(s);
+ assert(si);
+ assert(m);
+
+ f = open_memstream_unlocked(&buffer, &size);
+ if (!f)
+ return log_oom();
+
+ LIST_FOREACH(scopes, scope, m->dns_scopes)
+ dns_scope_dump(scope, f);
+
+ LIST_FOREACH(servers, server, m->dns_servers)
+ dns_server_dump(server, f);
+ LIST_FOREACH(servers, server, m->fallback_dns_servers)
+ dns_server_dump(server, f);
+ HASHMAP_FOREACH(l, m->links)
+ LIST_FOREACH(servers, server, l->dns_servers)
+ dns_server_dump(server, f);
+
+ if (fflush_and_check(f) < 0)
+ return log_oom();
+
+ log_dump(LOG_INFO, buffer);
+ return 0;
+}
+
+static int manager_sigusr2(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ Manager *m = userdata;
+
+ assert(s);
+ assert(si);
+ assert(m);
+
+ manager_flush_caches(m);
+
+ return 0;
+}
+
+static int manager_sigrtmin1(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ Manager *m = userdata;
+
+ assert(s);
+ assert(si);
+ assert(m);
+
+ manager_reset_server_features(m);
+ return 0;
+}
+
+int manager_new(Manager **ret) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ int r;
+
+ assert(ret);
+
+ m = new(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (Manager) {
+ .llmnr_ipv4_udp_fd = -1,
+ .llmnr_ipv6_udp_fd = -1,
+ .llmnr_ipv4_tcp_fd = -1,
+ .llmnr_ipv6_tcp_fd = -1,
+ .mdns_ipv4_fd = -1,
+ .mdns_ipv6_fd = -1,
+ .hostname_fd = -1,
+
+ .llmnr_support = DEFAULT_LLMNR_MODE,
+ .mdns_support = DEFAULT_MDNS_MODE,
+ .dnssec_mode = DEFAULT_DNSSEC_MODE,
+ .dns_over_tls_mode = DEFAULT_DNS_OVER_TLS_MODE,
+ .enable_cache = DNS_CACHE_MODE_YES,
+ .dns_stub_listener_mode = DNS_STUB_LISTENER_YES,
+ .read_resolv_conf = true,
+ .need_builtin_fallbacks = true,
+ .etc_hosts_last = USEC_INFINITY,
+ .etc_hosts_mtime = USEC_INFINITY,
+ .etc_hosts_ino = 0,
+ .etc_hosts_dev = 0,
+ .read_etc_hosts = true,
+ };
+
+ r = dns_trust_anchor_load(&m->trust_anchor);
+ if (r < 0)
+ return r;
+
+ r = manager_parse_config_file(m);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse configuration file: %m");
+
+#if ENABLE_DNS_OVER_TLS
+ r = dnstls_manager_init(m);
+ if (r < 0)
+ return r;
+#endif
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
+ (void) sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
+
+ (void) sd_event_set_watchdog(m->event, true);
+
+ r = manager_watch_hostname(m);
+ if (r < 0)
+ return r;
+
+ r = dnssd_load(m);
+ if (r < 0)
+ log_warning_errno(r, "Failed to load DNS-SD configuration files: %m");
+
+ r = dns_scope_new(m, &m->unicast_scope, NULL, DNS_PROTOCOL_DNS, AF_UNSPEC);
+ if (r < 0)
+ return r;
+
+ r = manager_network_monitor_listen(m);
+ if (r < 0)
+ return r;
+
+ r = manager_rtnl_listen(m);
+ if (r < 0)
+ return r;
+
+ r = manager_connect_bus(m);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_add_signal(m->event, &m->sigusr1_event_source, SIGUSR1, manager_sigusr1, m);
+ (void) sd_event_add_signal(m->event, &m->sigusr2_event_source, SIGUSR2, manager_sigusr2, m);
+ (void) sd_event_add_signal(m->event, &m->sigrtmin1_event_source, SIGRTMIN+1, manager_sigrtmin1, m);
+
+ manager_cleanup_saved_user(m);
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
+
+int manager_start(Manager *m) {
+ int r;
+
+ assert(m);
+
+ r = manager_dns_stub_start(m);
+ if (r < 0)
+ return r;
+
+ r = manager_varlink_init(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+Manager *manager_free(Manager *m) {
+ Link *l;
+ DnssdService *s;
+
+ if (!m)
+ return NULL;
+
+ dns_server_unlink_all(m->dns_servers);
+ dns_server_unlink_all(m->fallback_dns_servers);
+ dns_search_domain_unlink_all(m->search_domains);
+
+ while ((l = hashmap_first(m->links)))
+ link_free(l);
+
+ while (m->dns_queries)
+ dns_query_free(m->dns_queries);
+
+ dns_scope_free(m->unicast_scope);
+
+ /* At this point only orphaned streams should remain. All others should have been freed already by their
+ * owners */
+ while (m->dns_streams)
+ dns_stream_unref(m->dns_streams);
+
+#if ENABLE_DNS_OVER_TLS
+ dnstls_manager_free(m);
+#endif
+
+ hashmap_free(m->links);
+ hashmap_free(m->dns_transactions);
+
+ sd_event_source_unref(m->network_event_source);
+ sd_network_monitor_unref(m->network_monitor);
+
+ sd_netlink_unref(m->rtnl);
+ sd_event_source_unref(m->rtnl_event_source);
+
+ manager_llmnr_stop(m);
+ manager_mdns_stop(m);
+ manager_dns_stub_stop(m);
+ manager_varlink_done(m);
+
+ ordered_set_free(m->dns_extra_stub_listeners);
+
+ bus_verify_polkit_async_registry_free(m->polkit_registry);
+
+ sd_bus_flush_close_unref(m->bus);
+
+ sd_event_source_unref(m->sigusr1_event_source);
+ sd_event_source_unref(m->sigusr2_event_source);
+ sd_event_source_unref(m->sigrtmin1_event_source);
+
+ sd_event_unref(m->event);
+
+ dns_resource_key_unref(m->llmnr_host_ipv4_key);
+ dns_resource_key_unref(m->llmnr_host_ipv6_key);
+ dns_resource_key_unref(m->mdns_host_ipv4_key);
+ dns_resource_key_unref(m->mdns_host_ipv6_key);
+
+ sd_event_source_unref(m->hostname_event_source);
+ safe_close(m->hostname_fd);
+
+ free(m->full_hostname);
+ free(m->llmnr_hostname);
+ free(m->mdns_hostname);
+
+ while ((s = hashmap_first(m->dnssd_services)))
+ dnssd_service_free(s);
+ hashmap_free(m->dnssd_services);
+
+ dns_trust_anchor_flush(&m->trust_anchor);
+ manager_etc_hosts_flush(m);
+
+ return mfree(m);
+}
+
+int manager_recv(Manager *m, int fd, DnsProtocol protocol, DnsPacket **ret) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ CMSG_BUFFER_TYPE(CMSG_SPACE(MAXSIZE(struct in_pktinfo, struct in6_pktinfo))
+ + CMSG_SPACE(int) /* ttl/hoplimit */
+ + EXTRA_CMSG_SPACE /* kernel appears to require extra buffer space */) control;
+ union sockaddr_union sa;
+ struct iovec iov;
+ struct msghdr mh = {
+ .msg_name = &sa.sa,
+ .msg_namelen = sizeof(sa),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ struct cmsghdr *cmsg;
+ ssize_t ms, l;
+ int r;
+
+ assert(m);
+ assert(fd >= 0);
+ assert(ret);
+
+ ms = next_datagram_size_fd(fd);
+ if (ms < 0)
+ return ms;
+
+ r = dns_packet_new(&p, protocol, ms, DNS_PACKET_SIZE_MAX);
+ if (r < 0)
+ return r;
+
+ iov = IOVEC_MAKE(DNS_PACKET_DATA(p), p->allocated);
+
+ l = recvmsg_safe(fd, &mh, 0);
+ if (IN_SET(l, -EAGAIN, -EINTR))
+ return 0;
+ if (l <= 0)
+ return l;
+
+ assert(!(mh.msg_flags & MSG_TRUNC));
+
+ p->size = (size_t) l;
+
+ p->family = sa.sa.sa_family;
+ p->ipproto = IPPROTO_UDP;
+ if (p->family == AF_INET) {
+ p->sender.in = sa.in.sin_addr;
+ p->sender_port = be16toh(sa.in.sin_port);
+ } else if (p->family == AF_INET6) {
+ p->sender.in6 = sa.in6.sin6_addr;
+ p->sender_port = be16toh(sa.in6.sin6_port);
+ p->ifindex = sa.in6.sin6_scope_id;
+ } else
+ return -EAFNOSUPPORT;
+
+ CMSG_FOREACH(cmsg, &mh) {
+
+ if (cmsg->cmsg_level == IPPROTO_IPV6) {
+ assert(p->family == AF_INET6);
+
+ switch (cmsg->cmsg_type) {
+
+ case IPV6_PKTINFO: {
+ struct in6_pktinfo *i = (struct in6_pktinfo*) CMSG_DATA(cmsg);
+
+ if (p->ifindex <= 0)
+ p->ifindex = i->ipi6_ifindex;
+
+ p->destination.in6 = i->ipi6_addr;
+ break;
+ }
+
+ case IPV6_HOPLIMIT:
+ p->ttl = *(int *) CMSG_DATA(cmsg);
+ break;
+
+ }
+ } else if (cmsg->cmsg_level == IPPROTO_IP) {
+ assert(p->family == AF_INET);
+
+ switch (cmsg->cmsg_type) {
+
+ case IP_PKTINFO: {
+ struct in_pktinfo *i = (struct in_pktinfo*) CMSG_DATA(cmsg);
+
+ if (p->ifindex <= 0)
+ p->ifindex = i->ipi_ifindex;
+
+ p->destination.in = i->ipi_addr;
+ break;
+ }
+
+ case IP_TTL:
+ p->ttl = *(int *) CMSG_DATA(cmsg);
+ break;
+ }
+ }
+ }
+
+ /* The Linux kernel sets the interface index to the loopback
+ * device if the packet came from the local host since it
+ * avoids the routing table in such a case. Let's unset the
+ * interface index in such a case. */
+ if (p->ifindex == LOOPBACK_IFINDEX)
+ p->ifindex = 0;
+
+ if (protocol != DNS_PROTOCOL_DNS) {
+ /* If we don't know the interface index still, we look for the
+ * first local interface with a matching address. Yuck! */
+ if (p->ifindex <= 0)
+ p->ifindex = manager_find_ifindex(m, p->family, &p->destination);
+ }
+
+ *ret = TAKE_PTR(p);
+
+ return 1;
+}
+
+static int sendmsg_loop(int fd, struct msghdr *mh, int flags) {
+ int r;
+
+ assert(fd >= 0);
+ assert(mh);
+
+ for (;;) {
+ if (sendmsg(fd, mh, flags) >= 0)
+ return 0;
+
+ if (errno == EINTR)
+ continue;
+
+ if (errno != EAGAIN)
+ return -errno;
+
+ r = fd_wait_for_event(fd, POLLOUT, SEND_TIMEOUT_USEC);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ETIMEDOUT;
+ }
+}
+
+static int write_loop(int fd, void *message, size_t length) {
+ int r;
+
+ assert(fd >= 0);
+ assert(message);
+
+ for (;;) {
+ if (write(fd, message, length) >= 0)
+ return 0;
+
+ if (errno == EINTR)
+ continue;
+
+ if (errno != EAGAIN)
+ return -errno;
+
+ r = fd_wait_for_event(fd, POLLOUT, SEND_TIMEOUT_USEC);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ETIMEDOUT;
+ }
+}
+
+int manager_write(Manager *m, int fd, DnsPacket *p) {
+ int r;
+
+ log_debug("Sending %s%s packet with id %" PRIu16 " of size %zu.",
+ DNS_PACKET_TC(p) ? "truncated (!) " : "",
+ DNS_PACKET_QR(p) ? "response" : "query",
+ DNS_PACKET_ID(p),
+ p->size);
+
+ r = write_loop(fd, DNS_PACKET_DATA(p), p->size);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int manager_ipv4_send(
+ Manager *m,
+ int fd,
+ int ifindex,
+ const struct in_addr *destination,
+ uint16_t port,
+ const struct in_addr *source,
+ DnsPacket *p) {
+
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct in_pktinfo))) control = {};
+ union sockaddr_union sa;
+ struct iovec iov;
+ struct msghdr mh = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_name = &sa.sa,
+ .msg_namelen = sizeof(sa.in),
+ };
+
+ assert(m);
+ assert(fd >= 0);
+ assert(destination);
+ assert(port > 0);
+ assert(p);
+
+ iov = IOVEC_MAKE(DNS_PACKET_DATA(p), p->size);
+
+ sa = (union sockaddr_union) {
+ .in.sin_family = AF_INET,
+ .in.sin_addr = *destination,
+ .in.sin_port = htobe16(port),
+ };
+
+ if (ifindex > 0) {
+ struct cmsghdr *cmsg;
+ struct in_pktinfo *pi;
+
+ mh.msg_control = &control;
+ mh.msg_controllen = sizeof(control);
+
+ cmsg = CMSG_FIRSTHDR(&mh);
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
+ cmsg->cmsg_level = IPPROTO_IP;
+ cmsg->cmsg_type = IP_PKTINFO;
+
+ pi = (struct in_pktinfo*) CMSG_DATA(cmsg);
+ pi->ipi_ifindex = ifindex;
+
+ if (source)
+ pi->ipi_spec_dst = *source;
+ }
+
+ return sendmsg_loop(fd, &mh, 0);
+}
+
+static int manager_ipv6_send(
+ Manager *m,
+ int fd,
+ int ifindex,
+ const struct in6_addr *destination,
+ uint16_t port,
+ const struct in6_addr *source,
+ DnsPacket *p) {
+
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct in6_pktinfo))) control = {};
+ union sockaddr_union sa;
+ struct iovec iov;
+ struct msghdr mh = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_name = &sa.sa,
+ .msg_namelen = sizeof(sa.in6),
+ };
+
+ assert(m);
+ assert(fd >= 0);
+ assert(destination);
+ assert(port > 0);
+ assert(p);
+
+ iov = IOVEC_MAKE(DNS_PACKET_DATA(p), p->size);
+
+ sa = (union sockaddr_union) {
+ .in6.sin6_family = AF_INET6,
+ .in6.sin6_addr = *destination,
+ .in6.sin6_port = htobe16(port),
+ .in6.sin6_scope_id = ifindex,
+ };
+
+ if (ifindex > 0) {
+ struct cmsghdr *cmsg;
+ struct in6_pktinfo *pi;
+
+ mh.msg_control = &control;
+ mh.msg_controllen = sizeof(control);
+
+ cmsg = CMSG_FIRSTHDR(&mh);
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
+ cmsg->cmsg_level = IPPROTO_IPV6;
+ cmsg->cmsg_type = IPV6_PKTINFO;
+
+ pi = (struct in6_pktinfo*) CMSG_DATA(cmsg);
+ pi->ipi6_ifindex = ifindex;
+
+ if (source)
+ pi->ipi6_addr = *source;
+ }
+
+ return sendmsg_loop(fd, &mh, 0);
+}
+
+int manager_send(
+ Manager *m,
+ int fd,
+ int ifindex,
+ int family,
+ const union in_addr_union *destination,
+ uint16_t port,
+ const union in_addr_union *source,
+ DnsPacket *p) {
+
+ assert(m);
+ assert(fd >= 0);
+ assert(destination);
+ assert(port > 0);
+ assert(p);
+
+ log_debug("Sending %s%s packet with id %" PRIu16 " on interface %i/%s of size %zu.",
+ DNS_PACKET_TC(p) ? "truncated (!) " : "",
+ DNS_PACKET_QR(p) ? "response" : "query",
+ DNS_PACKET_ID(p),
+ ifindex, af_to_name(family),
+ p->size);
+
+ if (family == AF_INET)
+ return manager_ipv4_send(m, fd, ifindex, &destination->in, port, source ? &source->in : NULL, p);
+ if (family == AF_INET6)
+ return manager_ipv6_send(m, fd, ifindex, &destination->in6, port, source ? &source->in6 : NULL, p);
+
+ return -EAFNOSUPPORT;
+}
+
+uint32_t manager_find_mtu(Manager *m) {
+ uint32_t mtu = 0;
+ Link *l;
+
+ /* If we don't know on which link a DNS packet would be
+ * delivered, let's find the largest MTU that works on all
+ * interfaces we know of */
+
+ HASHMAP_FOREACH(l, m->links) {
+ if (l->mtu <= 0)
+ continue;
+
+ if (mtu <= 0 || l->mtu < mtu)
+ mtu = l->mtu;
+ }
+
+ return mtu;
+}
+
+int manager_find_ifindex(Manager *m, int family, const union in_addr_union *in_addr) {
+ LinkAddress *a;
+
+ assert(m);
+
+ a = manager_find_link_address(m, family, in_addr);
+ if (a)
+ return a->link->ifindex;
+
+ return 0;
+}
+
+void manager_refresh_rrs(Manager *m) {
+ Link *l;
+ DnssdService *s;
+
+ assert(m);
+
+ m->llmnr_host_ipv4_key = dns_resource_key_unref(m->llmnr_host_ipv4_key);
+ m->llmnr_host_ipv6_key = dns_resource_key_unref(m->llmnr_host_ipv6_key);
+ m->mdns_host_ipv4_key = dns_resource_key_unref(m->mdns_host_ipv4_key);
+ m->mdns_host_ipv6_key = dns_resource_key_unref(m->mdns_host_ipv6_key);
+
+ if (m->mdns_support == RESOLVE_SUPPORT_YES)
+ HASHMAP_FOREACH(s, m->dnssd_services)
+ if (dnssd_update_rrs(s) < 0)
+ log_warning("Failed to refresh DNS-SD service '%s'", s->name);
+
+ HASHMAP_FOREACH(l, m->links) {
+ link_add_rrs(l, true);
+ link_add_rrs(l, false);
+ }
+}
+
+static int manager_next_random_name(const char *old, char **ret_new) {
+ const char *p;
+ uint64_t u, a;
+ char *n;
+
+ p = strchr(old, 0);
+ assert(p);
+
+ while (p > old) {
+ if (!strchr(DIGITS, p[-1]))
+ break;
+
+ p--;
+ }
+
+ if (*p == 0 || safe_atou64(p, &u) < 0 || u <= 0)
+ u = 1;
+
+ /* Add a random number to the old value. This way we can avoid
+ * that two hosts pick the same hostname, win on IPv4 and lose
+ * on IPv6 (or vice versa), and pick the same hostname
+ * replacement hostname, ad infinitum. We still want the
+ * numbers to go up monotonically, hence we just add a random
+ * value 1..10 */
+
+ random_bytes(&a, sizeof(a));
+ u += 1 + a % 10;
+
+ if (asprintf(&n, "%.*s%" PRIu64, (int) (p - old), old, u) < 0)
+ return -ENOMEM;
+
+ *ret_new = n;
+
+ return 0;
+}
+
+int manager_next_hostname(Manager *m) {
+ _cleanup_free_ char *h = NULL, *k = NULL;
+ int r;
+
+ assert(m);
+
+ r = manager_next_random_name(m->llmnr_hostname, &h);
+ if (r < 0)
+ return r;
+
+ r = dns_name_concat(h, "local", 0, &k);
+ if (r < 0)
+ return r;
+
+ log_info("Hostname conflict, changing published hostname from '%s' to '%s'.", m->llmnr_hostname, h);
+
+ free_and_replace(m->llmnr_hostname, h);
+ free_and_replace(m->mdns_hostname, k);
+
+ manager_refresh_rrs(m);
+ (void) manager_send_changed(m, "LLMNRHostname");
+
+ return 0;
+}
+
+LinkAddress* manager_find_link_address(Manager *m, int family, const union in_addr_union *in_addr) {
+ Link *l;
+
+ assert(m);
+
+ HASHMAP_FOREACH(l, m->links) {
+ LinkAddress *a;
+
+ a = link_find_address(l, family, in_addr);
+ if (a)
+ return a;
+ }
+
+ return NULL;
+}
+
+bool manager_our_packet(Manager *m, DnsPacket *p) {
+ assert(m);
+ assert(p);
+
+ return !!manager_find_link_address(m, p->family, &p->sender);
+}
+
+DnsScope* manager_find_scope(Manager *m, DnsPacket *p) {
+ Link *l;
+
+ assert(m);
+ assert(p);
+
+ l = hashmap_get(m->links, INT_TO_PTR(p->ifindex));
+ if (!l)
+ return NULL;
+
+ switch (p->protocol) {
+ case DNS_PROTOCOL_LLMNR:
+ if (p->family == AF_INET)
+ return l->llmnr_ipv4_scope;
+ else if (p->family == AF_INET6)
+ return l->llmnr_ipv6_scope;
+
+ break;
+
+ case DNS_PROTOCOL_MDNS:
+ if (p->family == AF_INET)
+ return l->mdns_ipv4_scope;
+ else if (p->family == AF_INET6)
+ return l->mdns_ipv6_scope;
+
+ break;
+
+ default:
+ break;
+ }
+
+ return NULL;
+}
+
+void manager_verify_all(Manager *m) {
+ DnsScope *s;
+
+ assert(m);
+
+ LIST_FOREACH(scopes, s, m->dns_scopes)
+ dns_zone_verify_all(&s->zone);
+}
+
+int manager_is_own_hostname(Manager *m, const char *name) {
+ int r;
+
+ assert(m);
+ assert(name);
+
+ if (m->llmnr_hostname) {
+ r = dns_name_equal(name, m->llmnr_hostname);
+ if (r != 0)
+ return r;
+ }
+
+ if (m->mdns_hostname) {
+ r = dns_name_equal(name, m->mdns_hostname);
+ if (r != 0)
+ return r;
+ }
+
+ if (m->full_hostname)
+ return dns_name_equal(name, m->full_hostname);
+
+ return 0;
+}
+
+int manager_compile_dns_servers(Manager *m, OrderedSet **dns) {
+ DnsServer *s;
+ Link *l;
+ int r;
+
+ assert(m);
+ assert(dns);
+
+ r = ordered_set_ensure_allocated(dns, &dns_server_hash_ops);
+ if (r < 0)
+ return r;
+
+ /* First add the system-wide servers and domains */
+ LIST_FOREACH(servers, s, m->dns_servers) {
+ r = ordered_set_put(*dns, s);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+ }
+
+ /* Then, add the per-link servers */
+ HASHMAP_FOREACH(l, m->links) {
+ LIST_FOREACH(servers, s, l->dns_servers) {
+ r = ordered_set_put(*dns, s);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* If we found nothing, add the fallback servers */
+ if (ordered_set_isempty(*dns)) {
+ LIST_FOREACH(servers, s, m->fallback_dns_servers) {
+ r = ordered_set_put(*dns, s);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/* filter_route is a tri-state:
+ * < 0: no filtering
+ * = 0 or false: return only domains which should be used for searching
+ * > 0 or true: return only domains which are for routing only
+ */
+int manager_compile_search_domains(Manager *m, OrderedSet **domains, int filter_route) {
+ DnsSearchDomain *d;
+ Link *l;
+ int r;
+
+ assert(m);
+ assert(domains);
+
+ r = ordered_set_ensure_allocated(domains, &dns_name_hash_ops);
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(domains, d, m->search_domains) {
+
+ if (filter_route >= 0 &&
+ d->route_only != !!filter_route)
+ continue;
+
+ r = ordered_set_put(*domains, d->name);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+ }
+
+ HASHMAP_FOREACH(l, m->links) {
+
+ LIST_FOREACH(domains, d, l->search_domains) {
+
+ if (filter_route >= 0 &&
+ d->route_only != !!filter_route)
+ continue;
+
+ r = ordered_set_put(*domains, d->name);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+DnssecMode manager_get_dnssec_mode(Manager *m) {
+ assert(m);
+
+ if (m->dnssec_mode != _DNSSEC_MODE_INVALID)
+ return m->dnssec_mode;
+
+ return DNSSEC_NO;
+}
+
+bool manager_dnssec_supported(Manager *m) {
+ DnsServer *server;
+ Link *l;
+
+ assert(m);
+
+ if (manager_get_dnssec_mode(m) == DNSSEC_NO)
+ return false;
+
+ server = manager_get_dns_server(m);
+ if (server && !dns_server_dnssec_supported(server))
+ return false;
+
+ HASHMAP_FOREACH(l, m->links)
+ if (!link_dnssec_supported(l))
+ return false;
+
+ return true;
+}
+
+DnsOverTlsMode manager_get_dns_over_tls_mode(Manager *m) {
+ assert(m);
+
+ if (m->dns_over_tls_mode != _DNS_OVER_TLS_MODE_INVALID)
+ return m->dns_over_tls_mode;
+
+ return DNS_OVER_TLS_NO;
+}
+
+void manager_dnssec_verdict(Manager *m, DnssecVerdict verdict, const DnsResourceKey *key) {
+
+ assert(verdict >= 0);
+ assert(verdict < _DNSSEC_VERDICT_MAX);
+
+ if (DEBUG_LOGGING) {
+ char s[DNS_RESOURCE_KEY_STRING_MAX];
+
+ log_debug("Found verdict for lookup %s: %s",
+ dns_resource_key_to_string(key, s, sizeof s),
+ dnssec_verdict_to_string(verdict));
+ }
+
+ m->n_dnssec_verdict[verdict]++;
+}
+
+bool manager_routable(Manager *m) {
+ Link *l;
+
+ assert(m);
+
+ /* Returns true if the host has at least one interface with a routable address (regardless if IPv4 or IPv6) */
+
+ HASHMAP_FOREACH(l, m->links)
+ if (link_relevant(l, AF_UNSPEC, false))
+ return true;
+
+ return false;
+}
+
+void manager_flush_caches(Manager *m) {
+ DnsScope *scope;
+
+ assert(m);
+
+ LIST_FOREACH(scopes, scope, m->dns_scopes)
+ dns_cache_flush(&scope->cache);
+
+ log_info("Flushed all caches.");
+}
+
+void manager_reset_server_features(Manager *m) {
+ Link *l;
+
+ dns_server_reset_features_all(m->dns_servers);
+ dns_server_reset_features_all(m->fallback_dns_servers);
+
+ HASHMAP_FOREACH(l, m->links)
+ dns_server_reset_features_all(l->dns_servers);
+
+ log_info("Resetting learnt feature levels on all servers.");
+}
+
+void manager_cleanup_saved_user(Manager *m) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ assert(m);
+
+ /* Clean up all saved per-link files in /run/systemd/resolve/netif/ that don't have a matching interface
+ * anymore. These files are created to persist settings pushed in by the user via the bus, so that resolved can
+ * be restarted without losing this data. */
+
+ d = opendir("/run/systemd/resolve/netif/");
+ if (!d) {
+ if (errno == ENOENT)
+ return;
+
+ log_warning_errno(errno, "Failed to open interface directory: %m");
+ return;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, log_error_errno(errno, "Failed to read interface directory: %m")) {
+ _cleanup_free_ char *p = NULL;
+ int ifindex;
+ Link *l;
+
+ if (!IN_SET(de->d_type, DT_UNKNOWN, DT_REG))
+ continue;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ ifindex = parse_ifindex(de->d_name);
+ if (ifindex < 0) /* Probably some temporary file from a previous run. Delete it */
+ goto rm;
+
+ l = hashmap_get(m->links, INT_TO_PTR(ifindex));
+ if (!l) /* link vanished */
+ goto rm;
+
+ if (l->is_managed) /* now managed by networkd, hence the bus settings are useless */
+ goto rm;
+
+ continue;
+
+ rm:
+ p = path_join("/run/systemd/resolve/netif", de->d_name);
+ if (!p) {
+ log_oom();
+ return;
+ }
+
+ (void) unlink(p);
+ }
+}
+
+bool manager_next_dnssd_names(Manager *m) {
+ DnssdService *s;
+ bool tried = false;
+ int r;
+
+ assert(m);
+
+ HASHMAP_FOREACH(s, m->dnssd_services) {
+ _cleanup_free_ char * new_name = NULL;
+
+ if (!s->withdrawn)
+ continue;
+
+ r = manager_next_random_name(s->name_template, &new_name);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to get new name for service '%s': %m", s->name);
+ continue;
+ }
+
+ free_and_replace(s->name_template, new_name);
+
+ s->withdrawn = false;
+
+ tried = true;
+ }
+
+ if (tried)
+ manager_refresh_rrs(m);
+
+ return tried;
+}
diff --git a/src/resolve/resolved-manager.h b/src/resolve/resolved-manager.h
new file mode 100644
index 0000000..20afab0
--- /dev/null
+++ b/src/resolve/resolved-manager.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/stat.h>
+
+#include "sd-event.h"
+#include "sd-netlink.h"
+#include "sd-network.h"
+
+#include "hashmap.h"
+#include "list.h"
+#include "ordered-set.h"
+#include "resolve-util.h"
+#include "varlink.h"
+
+typedef struct Manager Manager;
+
+#include "resolved-dns-query.h"
+#include "resolved-dns-search-domain.h"
+#include "resolved-dns-stream.h"
+#include "resolved-dns-stub.h"
+#include "resolved-dns-trust-anchor.h"
+#include "resolved-link.h"
+
+#define MANAGER_SEARCH_DOMAINS_MAX 256
+#define MANAGER_DNS_SERVERS_MAX 256
+
+typedef struct EtcHosts {
+ Hashmap *by_address;
+ Hashmap *by_name;
+ Set *no_address;
+} EtcHosts;
+
+struct Manager {
+ sd_event *event;
+
+ ResolveSupport llmnr_support;
+ ResolveSupport mdns_support;
+ DnssecMode dnssec_mode;
+ DnsOverTlsMode dns_over_tls_mode;
+ DnsCacheMode enable_cache;
+ DnsStubListenerMode dns_stub_listener_mode;
+
+#if ENABLE_DNS_OVER_TLS
+ DnsTlsManagerData dnstls_data;
+#endif
+
+ /* Network */
+ Hashmap *links;
+
+ sd_netlink *rtnl;
+ sd_event_source *rtnl_event_source;
+
+ sd_network_monitor *network_monitor;
+ sd_event_source *network_event_source;
+
+ /* DNS query management */
+ Hashmap *dns_transactions;
+ LIST_HEAD(DnsQuery, dns_queries);
+ unsigned n_dns_queries;
+
+ LIST_HEAD(DnsStream, dns_streams);
+ unsigned n_dns_streams[_DNS_STREAM_TYPE_MAX];
+
+ /* Unicast dns */
+ LIST_HEAD(DnsServer, dns_servers);
+ LIST_HEAD(DnsServer, fallback_dns_servers);
+ unsigned n_dns_servers; /* counts both main and fallback */
+ DnsServer *current_dns_server;
+
+ LIST_HEAD(DnsSearchDomain, search_domains);
+ unsigned n_search_domains;
+
+ bool need_builtin_fallbacks;
+ bool read_resolv_conf;
+ bool resolve_unicast_single_label;
+
+ struct stat resolv_conf_stat;
+
+ DnsTrustAnchor trust_anchor;
+
+ LIST_HEAD(DnsScope, dns_scopes);
+ DnsScope *unicast_scope;
+
+ /* LLMNR */
+ int llmnr_ipv4_udp_fd;
+ int llmnr_ipv6_udp_fd;
+ int llmnr_ipv4_tcp_fd;
+ int llmnr_ipv6_tcp_fd;
+
+ sd_event_source *llmnr_ipv4_udp_event_source;
+ sd_event_source *llmnr_ipv6_udp_event_source;
+ sd_event_source *llmnr_ipv4_tcp_event_source;
+ sd_event_source *llmnr_ipv6_tcp_event_source;
+
+ /* mDNS */
+ int mdns_ipv4_fd;
+ int mdns_ipv6_fd;
+
+ /* DNS-SD */
+ Hashmap *dnssd_services;
+
+ sd_event_source *mdns_ipv4_event_source;
+ sd_event_source *mdns_ipv6_event_source;
+
+ /* dbus */
+ sd_bus *bus;
+
+ /* The hostname we publish on LLMNR and mDNS */
+ char *full_hostname;
+ char *llmnr_hostname;
+ char *mdns_hostname;
+ DnsResourceKey *llmnr_host_ipv4_key;
+ DnsResourceKey *llmnr_host_ipv6_key;
+ DnsResourceKey *mdns_host_ipv4_key;
+ DnsResourceKey *mdns_host_ipv6_key;
+
+ /* Watch the system hostname */
+ int hostname_fd;
+ sd_event_source *hostname_event_source;
+
+ sd_event_source *sigusr1_event_source;
+ sd_event_source *sigusr2_event_source;
+ sd_event_source *sigrtmin1_event_source;
+
+ unsigned n_transactions_total;
+ unsigned n_dnssec_verdict[_DNSSEC_VERDICT_MAX];
+
+ /* Data from /etc/hosts */
+ EtcHosts etc_hosts;
+ usec_t etc_hosts_last, etc_hosts_mtime;
+ ino_t etc_hosts_ino;
+ dev_t etc_hosts_dev;
+ bool read_etc_hosts;
+
+ OrderedSet *dns_extra_stub_listeners;
+
+ /* Local DNS stub on 127.0.0.53:53 */
+ sd_event_source *dns_stub_udp_event_source;
+ sd_event_source *dns_stub_tcp_event_source;
+
+ Hashmap *polkit_registry;
+
+ VarlinkServer *varlink_server;
+};
+
+/* Manager */
+
+int manager_new(Manager **ret);
+Manager* manager_free(Manager *m);
+
+int manager_start(Manager *m);
+
+uint32_t manager_find_mtu(Manager *m);
+
+int manager_write(Manager *m, int fd, DnsPacket *p);
+int manager_send(Manager *m, int fd, int ifindex, int family, const union in_addr_union *destination, uint16_t port, const union in_addr_union *source, DnsPacket *p);
+int manager_recv(Manager *m, int fd, DnsProtocol protocol, DnsPacket **ret);
+
+int manager_find_ifindex(Manager *m, int family, const union in_addr_union *in_addr);
+LinkAddress* manager_find_link_address(Manager *m, int family, const union in_addr_union *in_addr);
+
+void manager_refresh_rrs(Manager *m);
+int manager_next_hostname(Manager *m);
+
+bool manager_our_packet(Manager *m, DnsPacket *p);
+DnsScope* manager_find_scope(Manager *m, DnsPacket *p);
+
+void manager_verify_all(Manager *m);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
+
+/* For some reason we need some extra cmsg space on some kernels/archs. One of those days we need to figure out why */
+#define EXTRA_CMSG_SPACE 1024
+
+int manager_is_own_hostname(Manager *m, const char *name);
+
+int manager_compile_dns_servers(Manager *m, OrderedSet **servers);
+int manager_compile_search_domains(Manager *m, OrderedSet **domains, int filter_route);
+
+DnssecMode manager_get_dnssec_mode(Manager *m);
+bool manager_dnssec_supported(Manager *m);
+
+DnsOverTlsMode manager_get_dns_over_tls_mode(Manager *m);
+
+void manager_dnssec_verdict(Manager *m, DnssecVerdict verdict, const DnsResourceKey *key);
+
+bool manager_routable(Manager *m);
+
+void manager_flush_caches(Manager *m);
+void manager_reset_server_features(Manager *m);
+
+void manager_cleanup_saved_user(Manager *m);
+
+bool manager_next_dnssd_names(Manager *m);
diff --git a/src/resolve/resolved-mdns.c b/src/resolve/resolved-mdns.c
new file mode 100644
index 0000000..a093747
--- /dev/null
+++ b/src/resolve/resolved-mdns.c
@@ -0,0 +1,482 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <resolv.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "resolved-manager.h"
+#include "resolved-mdns.h"
+#include "sort-util.h"
+
+#define CLEAR_CACHE_FLUSH(x) (~MDNS_RR_CACHE_FLUSH & (x))
+
+void manager_mdns_stop(Manager *m) {
+ assert(m);
+
+ m->mdns_ipv4_event_source = sd_event_source_unref(m->mdns_ipv4_event_source);
+ m->mdns_ipv4_fd = safe_close(m->mdns_ipv4_fd);
+
+ m->mdns_ipv6_event_source = sd_event_source_unref(m->mdns_ipv6_event_source);
+ m->mdns_ipv6_fd = safe_close(m->mdns_ipv6_fd);
+}
+
+int manager_mdns_start(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (m->mdns_support == RESOLVE_SUPPORT_NO)
+ return 0;
+
+ r = manager_mdns_ipv4_fd(m);
+ if (r == -EADDRINUSE)
+ goto eaddrinuse;
+ if (r < 0)
+ return r;
+
+ if (socket_ipv6_is_supported()) {
+ r = manager_mdns_ipv6_fd(m);
+ if (r == -EADDRINUSE)
+ goto eaddrinuse;
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+
+eaddrinuse:
+ log_warning("Another mDNS responder prohibits binding the socket to the same port. Turning off mDNS support.");
+ m->mdns_support = RESOLVE_SUPPORT_NO;
+ manager_mdns_stop(m);
+
+ return 0;
+}
+
+static int mdns_rr_compare(DnsResourceRecord * const *a, DnsResourceRecord * const *b) {
+ DnsResourceRecord *x = *(DnsResourceRecord **) a, *y = *(DnsResourceRecord **) b;
+ size_t m;
+ int r;
+
+ assert(x);
+ assert(y);
+
+ r = CMP(CLEAR_CACHE_FLUSH(x->key->class), CLEAR_CACHE_FLUSH(y->key->class));
+ if (r != 0)
+ return r;
+
+ r = CMP(x->key->type, y->key->type);
+ if (r != 0)
+ return r;
+
+ r = dns_resource_record_to_wire_format(x, false);
+ if (r < 0) {
+ log_warning_errno(r, "Can't wire-format RR: %m");
+ return 0;
+ }
+
+ r = dns_resource_record_to_wire_format(y, false);
+ if (r < 0) {
+ log_warning_errno(r, "Can't wire-format RR: %m");
+ return 0;
+ }
+
+ m = MIN(DNS_RESOURCE_RECORD_RDATA_SIZE(x), DNS_RESOURCE_RECORD_RDATA_SIZE(y));
+
+ r = memcmp(DNS_RESOURCE_RECORD_RDATA(x), DNS_RESOURCE_RECORD_RDATA(y), m);
+ if (r != 0)
+ return r;
+
+ return CMP(DNS_RESOURCE_RECORD_RDATA_SIZE(x), DNS_RESOURCE_RECORD_RDATA_SIZE(y));
+}
+
+static int proposed_rrs_cmp(DnsResourceRecord **x, unsigned x_size, DnsResourceRecord **y, unsigned y_size) {
+ unsigned m;
+ int r;
+
+ m = MIN(x_size, y_size);
+ for (unsigned i = 0; i < m; i++) {
+ r = mdns_rr_compare(&x[i], &y[i]);
+ if (r != 0)
+ return r;
+ }
+
+ return CMP(x_size, y_size);
+}
+
+static int mdns_packet_extract_matching_rrs(DnsPacket *p, DnsResourceKey *key, DnsResourceRecord ***ret_rrs) {
+ _cleanup_free_ DnsResourceRecord **list = NULL;
+ unsigned n = 0, size = 0;
+ int r;
+
+ assert(p);
+ assert(key);
+ assert(ret_rrs);
+ assert_return(DNS_PACKET_NSCOUNT(p) > 0, -EINVAL);
+
+ for (size_t i = DNS_PACKET_ANCOUNT(p); i < (DNS_PACKET_ANCOUNT(p) + DNS_PACKET_NSCOUNT(p)); i++) {
+ r = dns_resource_key_match_rr(key, p->answer->items[i].rr, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ size++;
+ }
+
+ if (size == 0)
+ return 0;
+
+ list = new(DnsResourceRecord *, size);
+ if (!list)
+ return -ENOMEM;
+
+ for (size_t i = DNS_PACKET_ANCOUNT(p); i < (DNS_PACKET_ANCOUNT(p) + DNS_PACKET_NSCOUNT(p)); i++) {
+ r = dns_resource_key_match_rr(key, p->answer->items[i].rr, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ list[n++] = p->answer->items[i].rr;
+ }
+ assert(n == size);
+ typesafe_qsort(list, size, mdns_rr_compare);
+
+ *ret_rrs = TAKE_PTR(list);
+
+ return size;
+}
+
+static int mdns_do_tiebreak(DnsResourceKey *key, DnsAnswer *answer, DnsPacket *p) {
+ _cleanup_free_ DnsResourceRecord **our = NULL, **remote = NULL;
+ DnsResourceRecord *rr;
+ size_t i = 0, size;
+ int r;
+
+ size = dns_answer_size(answer);
+ our = new(DnsResourceRecord *, size);
+ if (!our)
+ return -ENOMEM;
+
+ DNS_ANSWER_FOREACH(rr, answer)
+ our[i++] = rr;
+
+ typesafe_qsort(our, size, mdns_rr_compare);
+
+ r = mdns_packet_extract_matching_rrs(p, key, &remote);
+ if (r < 0)
+ return r;
+
+ assert(r > 0);
+
+ if (proposed_rrs_cmp(remote, r, our, size) > 0)
+ return 1;
+
+ return 0;
+}
+
+static int mdns_scope_process_query(DnsScope *s, DnsPacket *p) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *full_answer = NULL;
+ _cleanup_(dns_packet_unrefp) DnsPacket *reply = NULL;
+ DnsResourceKey *key = NULL;
+ DnsResourceRecord *rr;
+ bool tentative = false;
+ int r;
+
+ assert(s);
+ assert(p);
+
+ r = dns_packet_extract(p);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to extract resource records from incoming packet: %m");
+
+ assert_return((dns_question_size(p->question) > 0), -EINVAL);
+
+ DNS_QUESTION_FOREACH(key, p->question) {
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL, *soa = NULL;
+
+ r = dns_zone_lookup(&s->zone, key, 0, &answer, &soa, &tentative);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to look up key: %m");
+
+ if (tentative && DNS_PACKET_NSCOUNT(p) > 0) {
+ /*
+ * A race condition detected with the probe packet from
+ * a remote host.
+ * Do simultaneous probe tiebreaking as described in
+ * RFC 6762, Section 8.2. In case we lost don't reply
+ * the question and withdraw conflicting RRs.
+ */
+ r = mdns_do_tiebreak(key, answer, p);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to do tiebreaking");
+
+ if (r > 0) { /* we lost */
+ DNS_ANSWER_FOREACH(rr, answer) {
+ DnsZoneItem *i;
+
+ i = dns_zone_get(&s->zone, rr);
+ if (i)
+ dns_zone_item_conflict(i);
+ }
+
+ continue;
+ }
+ }
+
+ r = dns_answer_extend(&full_answer, answer);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to extend answer: %m");
+ }
+
+ if (dns_answer_isempty(full_answer))
+ return 0;
+
+ r = dns_scope_make_reply_packet(s, DNS_PACKET_ID(p), DNS_RCODE_SUCCESS, NULL, full_answer, NULL, false, &reply);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to build reply packet: %m");
+
+ if (!ratelimit_below(&s->ratelimit))
+ return 0;
+
+ r = dns_scope_emit_udp(s, -1, reply);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to send reply packet: %m");
+
+ return 0;
+}
+
+static int on_mdns_packet(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+ Manager *m = userdata;
+ DnsScope *scope;
+ int r;
+
+ r = manager_recv(m, fd, DNS_PROTOCOL_MDNS, &p);
+ if (r <= 0)
+ return r;
+
+ if (manager_our_packet(m, p))
+ return 0;
+
+ scope = manager_find_scope(m, p);
+ if (!scope) {
+ log_debug("Got mDNS UDP packet on unknown scope. Ignoring.");
+ return 0;
+ }
+
+ if (dns_packet_validate_reply(p) > 0) {
+ DnsResourceRecord *rr;
+
+ log_debug("Got mDNS reply packet");
+
+ /*
+ * mDNS is different from regular DNS and LLMNR with regard to handling responses.
+ * While on other protocols, we can ignore every answer that doesn't match a question
+ * we broadcast earlier, RFC6762, section 18.1 recommends looking at and caching all
+ * incoming information, regardless of the DNS packet ID.
+ *
+ * Hence, extract the packet here, and try to find a transaction for answer the we got
+ * and complete it. Also store the new information in scope's cache.
+ */
+ r = dns_packet_extract(p);
+ if (r < 0) {
+ log_debug("mDNS packet extraction failed.");
+ return 0;
+ }
+
+ dns_scope_check_conflicts(scope, p);
+
+ DNS_ANSWER_FOREACH(rr, p->answer) {
+ const char *name = dns_resource_key_name(rr->key);
+ DnsTransaction *t;
+
+ /* If the received reply packet contains ANY record that is not .local or .in-addr.arpa,
+ * we assume someone's playing tricks on us and discard the packet completely. */
+ if (!(dns_name_endswith(name, "in-addr.arpa") > 0 ||
+ dns_name_endswith(name, "local") > 0))
+ return 0;
+
+ if (rr->ttl == 0) {
+ log_debug("Got a goodbye packet");
+ /* See the section 10.1 of RFC6762 */
+ rr->ttl = 1;
+ }
+
+ t = dns_scope_find_transaction(scope, rr->key, false);
+ if (t)
+ dns_transaction_process_reply(t, p);
+
+ /* Also look for the various types of ANY transactions */
+ t = dns_scope_find_transaction(scope, &DNS_RESOURCE_KEY_CONST(rr->key->class, DNS_TYPE_ANY, dns_resource_key_name(rr->key)), false);
+ if (t)
+ dns_transaction_process_reply(t, p);
+
+ t = dns_scope_find_transaction(scope, &DNS_RESOURCE_KEY_CONST(DNS_CLASS_ANY, rr->key->type, dns_resource_key_name(rr->key)), false);
+ if (t)
+ dns_transaction_process_reply(t, p);
+
+ t = dns_scope_find_transaction(scope, &DNS_RESOURCE_KEY_CONST(DNS_CLASS_ANY, DNS_TYPE_ANY, dns_resource_key_name(rr->key)), false);
+ if (t)
+ dns_transaction_process_reply(t, p);
+ }
+
+ dns_cache_put(&scope->cache, scope->manager->enable_cache, NULL, DNS_PACKET_RCODE(p), p->answer, false, (uint32_t) -1, 0, p->family, &p->sender);
+
+ } else if (dns_packet_validate_query(p) > 0) {
+ log_debug("Got mDNS query packet for id %u", DNS_PACKET_ID(p));
+
+ r = mdns_scope_process_query(scope, p);
+ if (r < 0) {
+ log_debug_errno(r, "mDNS query processing failed: %m");
+ return 0;
+ }
+ } else
+ log_debug("Invalid mDNS UDP packet.");
+
+ return 0;
+}
+
+int manager_mdns_ipv4_fd(Manager *m) {
+ union sockaddr_union sa = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = htobe16(MDNS_PORT),
+ };
+ _cleanup_close_ int s = -1;
+ int r;
+
+ assert(m);
+
+ if (m->mdns_ipv4_fd >= 0)
+ return m->mdns_ipv4_fd;
+
+ s = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s < 0)
+ return log_error_errno(errno, "mDNS-IPv4: Failed to create socket: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_TTL, 255);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv4: Failed to set IP_TTL: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_MULTICAST_TTL, 255);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv4: Failed to set IP_MULTICAST_TTL: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_MULTICAST_LOOP, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv4: Failed to set IP_MULTICAST_LOOP: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_PKTINFO, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv4: Failed to set IP_PKTINFO: %m");
+
+ r = setsockopt_int(s, IPPROTO_IP, IP_RECVTTL, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv4: Failed to set IP_RECVTTL: %m");
+
+ /* Disable Don't-Fragment bit in the IP header */
+ r = setsockopt_int(s, IPPROTO_IP, IP_MTU_DISCOVER, IP_PMTUDISC_DONT);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv4: Failed to set IP_MTU_DISCOVER: %m");
+
+ /* See the section 15.1 of RFC6762 */
+ /* first try to bind without SO_REUSEADDR to detect another mDNS responder */
+ r = bind(s, &sa.sa, sizeof(sa.in));
+ if (r < 0) {
+ if (errno != EADDRINUSE)
+ return log_error_errno(errno, "mDNS-IPv4: Failed to bind socket: %m");
+
+ log_warning("mDNS-IPv4: There appears to be another mDNS responder running, or previously systemd-resolved crashed with some outstanding transfers.");
+
+ /* try again with SO_REUSEADDR */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv4: Failed to set SO_REUSEADDR: %m");
+
+ r = bind(s, &sa.sa, sizeof(sa.in));
+ if (r < 0)
+ return log_error_errno(errno, "mDNS-IPv4: Failed to bind socket: %m");
+ } else {
+ /* enable SO_REUSEADDR for the case that the user really wants multiple mDNS responders */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv4: Failed to set SO_REUSEADDR: %m");
+ }
+
+ r = sd_event_add_io(m->event, &m->mdns_ipv4_event_source, s, EPOLLIN, on_mdns_packet, m);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv4: Failed to create event source: %m");
+
+ return m->mdns_ipv4_fd = TAKE_FD(s);
+}
+
+int manager_mdns_ipv6_fd(Manager *m) {
+ union sockaddr_union sa = {
+ .in6.sin6_family = AF_INET6,
+ .in6.sin6_port = htobe16(MDNS_PORT),
+ };
+ _cleanup_close_ int s = -1;
+ int r;
+
+ assert(m);
+
+ if (m->mdns_ipv6_fd >= 0)
+ return m->mdns_ipv6_fd;
+
+ s = socket(AF_INET6, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s < 0)
+ return log_error_errno(errno, "mDNS-IPv6: Failed to create socket: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_UNICAST_HOPS, 255);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv6: Failed to set IPV6_UNICAST_HOPS: %m");
+
+ /* RFC 4795, section 2.5 recommends setting the TTL of UDP packets to 255. */
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, 255);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv6: Failed to set IPV6_MULTICAST_HOPS: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv6: Failed to set IPV6_MULTICAST_LOOP: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_V6ONLY, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv6: Failed to set IPV6_V6ONLY: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_RECVPKTINFO, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv6: Failed to set IPV6_RECVPKTINFO: %m");
+
+ r = setsockopt_int(s, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv6: Failed to set IPV6_RECVHOPLIMIT: %m");
+
+ /* See the section 15.1 of RFC6762 */
+ /* first try to bind without SO_REUSEADDR to detect another mDNS responder */
+ r = bind(s, &sa.sa, sizeof(sa.in6));
+ if (r < 0) {
+ if (errno != EADDRINUSE)
+ return log_error_errno(errno, "mDNS-IPv6: Failed to bind socket: %m");
+
+ log_warning("mDNS-IPv6: There appears to be another mDNS responder running, or previously systemd-resolved crashed with some outstanding transfers.");
+
+ /* try again with SO_REUSEADDR */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv6: Failed to set SO_REUSEADDR: %m");
+
+ r = bind(s, &sa.sa, sizeof(sa.in6));
+ if (r < 0)
+ return log_error_errno(errno, "mDNS-IPv6: Failed to bind socket: %m");
+ } else {
+ /* enable SO_REUSEADDR for the case that the user really wants multiple mDNS responders */
+ r = setsockopt_int(s, SOL_SOCKET, SO_REUSEADDR, true);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv6: Failed to set SO_REUSEADDR: %m");
+ }
+
+ r = sd_event_add_io(m->event, &m->mdns_ipv6_event_source, s, EPOLLIN, on_mdns_packet, m);
+ if (r < 0)
+ return log_error_errno(r, "mDNS-IPv6: Failed to create event source: %m");
+
+ return m->mdns_ipv6_fd = TAKE_FD(s);
+}
diff --git a/src/resolve/resolved-mdns.h b/src/resolve/resolved-mdns.h
new file mode 100644
index 0000000..38ef180
--- /dev/null
+++ b/src/resolve/resolved-mdns.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "resolved-manager.h"
+
+#define MDNS_PORT 5353
+#define MDNS_ANNOUNCE_DELAY (1 * USEC_PER_SEC)
+
+int manager_mdns_ipv4_fd(Manager *m);
+int manager_mdns_ipv6_fd(Manager *m);
+
+void manager_mdns_stop(Manager *m);
+int manager_mdns_start(Manager *m);
diff --git a/src/resolve/resolved-resolv-conf.c b/src/resolve/resolved-resolv-conf.c
new file mode 100644
index 0000000..fce5c9b
--- /dev/null
+++ b/src/resolve/resolved-resolv-conf.c
@@ -0,0 +1,429 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <resolv.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "label.h"
+#include "ordered-set.h"
+#include "resolved-conf.h"
+#include "resolved-dns-server.h"
+#include "resolved-resolv-conf.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util-label.h"
+
+int manager_check_resolv_conf(const Manager *m) {
+ struct stat st, own;
+
+ assert(m);
+
+ /* This warns only when our stub listener is disabled and /etc/resolv.conf is a symlink to
+ * PRIVATE_STATIC_RESOLV_CONF. */
+
+ if (m->dns_stub_listener_mode != DNS_STUB_LISTENER_NO)
+ return 0;
+
+ if (stat("/etc/resolv.conf", &st) < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to stat /etc/resolv.conf: %m");
+ }
+
+ /* Is it symlinked to our own uplink file? */
+ if (stat(PRIVATE_STATIC_RESOLV_CONF, &own) >= 0 &&
+ st.st_dev == own.st_dev &&
+ st.st_ino == own.st_ino)
+ return log_warning_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "DNSStubListener= is disabled, but /etc/resolv.conf is a symlink to "
+ PRIVATE_STATIC_RESOLV_CONF " which expects DNSStubListener= to be enabled.");
+
+ return 0;
+}
+
+static bool file_is_our_own(const struct stat *st) {
+ const char *path;
+
+ assert(st);
+
+ FOREACH_STRING(path,
+ PRIVATE_UPLINK_RESOLV_CONF,
+ PRIVATE_STUB_RESOLV_CONF,
+ PRIVATE_STATIC_RESOLV_CONF) {
+
+ struct stat own;
+
+ /* Is it symlinked to our own uplink file? */
+ if (stat(path, &own) >= 0 &&
+ st->st_dev == own.st_dev &&
+ st->st_ino == own.st_ino)
+ return true;
+ }
+
+ return false;
+}
+
+int manager_read_resolv_conf(Manager *m) {
+ _cleanup_fclose_ FILE *f = NULL;
+ struct stat st;
+ unsigned n = 0;
+ int r;
+
+ assert(m);
+
+ /* Reads the system /etc/resolv.conf, if it exists and is not
+ * symlinked to our own resolv.conf instance */
+
+ if (!m->read_resolv_conf)
+ return 0;
+
+ r = stat("/etc/resolv.conf", &st);
+ if (r < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ r = log_warning_errno(errno, "Failed to stat /etc/resolv.conf: %m");
+ goto clear;
+ }
+
+ /* Have we already seen the file? */
+ if (stat_inode_unmodified(&st, &m->resolv_conf_stat))
+ return 0;
+
+ if (file_is_our_own(&st))
+ return 0;
+
+ f = fopen("/etc/resolv.conf", "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ r = log_warning_errno(errno, "Failed to open /etc/resolv.conf: %m");
+ goto clear;
+ }
+
+ if (fstat(fileno(f), &st) < 0) {
+ r = log_error_errno(errno, "Failed to stat open file: %m");
+ goto clear;
+ }
+
+ if (file_is_our_own(&st))
+ return 0;
+
+ dns_server_mark_all(m->dns_servers);
+ dns_search_domain_mark_all(m->search_domains);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *a;
+ char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0) {
+ log_error_errno(r, "Failed to read /etc/resolv.conf: %m");
+ goto clear;
+ }
+ if (r == 0)
+ break;
+
+ n++;
+
+ l = strstrip(line);
+ if (IN_SET(*l, '#', ';', 0))
+ continue;
+
+ a = first_word(l, "nameserver");
+ if (a) {
+ r = manager_parse_dns_server_string_and_warn(m, DNS_SERVER_SYSTEM, a);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse DNS server address '%s', ignoring.", a);
+
+ continue;
+ }
+
+ a = first_word(l, "domain");
+ if (!a) /* We treat "domain" lines, and "search" lines as equivalent, and add both to our list. */
+ a = first_word(l, "search");
+ if (a) {
+ r = manager_parse_search_domains_and_warn(m, a);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse search domain string '%s', ignoring.", a);
+
+ continue;
+ }
+
+ log_syntax(NULL, LOG_DEBUG, "/etc/resolv.conf", n, 0, "Ignoring resolv.conf line: %s", l);
+ }
+
+ m->resolv_conf_stat = st;
+
+ /* Flush out all servers and search domains that are still
+ * marked. Those are then ones that didn't appear in the new
+ * /etc/resolv.conf */
+ dns_server_unlink_marked(m->dns_servers);
+ dns_search_domain_unlink_marked(m->search_domains);
+
+ /* Whenever /etc/resolv.conf changes, start using the first
+ * DNS server of it. This is useful to deal with broken
+ * network managing implementations (like NetworkManager),
+ * that when connecting to a VPN place both the VPN DNS
+ * servers and the local ones in /etc/resolv.conf. Without
+ * resetting the DNS server to use back to the first entry we
+ * will continue to use the local one thus being unable to
+ * resolve VPN domains. */
+ manager_set_dns_server(m, m->dns_servers);
+
+ /* Unconditionally flush the cache when /etc/resolv.conf is
+ * modified, even if the data it contained was completely
+ * identical to the previous version we used. We do this
+ * because altering /etc/resolv.conf is typically done when
+ * the network configuration changes, and that should be
+ * enough to flush the global unicast DNS cache. */
+ if (m->unicast_scope)
+ dns_cache_flush(&m->unicast_scope->cache);
+
+ /* If /etc/resolv.conf changed, make sure to forget everything we learned about the DNS servers. After all we
+ * might now talk to a very different DNS server that just happens to have the same IP address as an old one
+ * (think 192.168.1.1). */
+ dns_server_reset_features_all(m->dns_servers);
+
+ return 0;
+
+clear:
+ dns_server_unlink_all(m->dns_servers);
+ dns_search_domain_unlink_all(m->search_domains);
+ return r;
+}
+
+static void write_resolv_conf_server(DnsServer *s, FILE *f, unsigned *count) {
+ DnsScope *scope;
+
+ assert(s);
+ assert(f);
+ assert(count);
+
+ if (!dns_server_string(s)) {
+ log_warning("Out of memory, or invalid DNS address. Ignoring server.");
+ return;
+ }
+
+ /* Check if the scope this DNS server belongs to is suitable as 'default' route for lookups; resolv.conf does
+ * not have a syntax to express that, so it must not appear as a global name server to avoid routing unrelated
+ * domains to it (which is a privacy violation, will most probably fail anyway, and adds unnecessary load) */
+ scope = dns_server_scope(s);
+ if (scope && !dns_scope_is_default_route(scope)) {
+ log_debug("Scope of DNS server %s has only route-only domains, not using as global name server", dns_server_string(s));
+ return;
+ }
+
+ if (*count == MAXNS)
+ fputs("# Too many DNS servers configured, the following entries may be ignored.\n", f);
+ (*count)++;
+
+ fprintf(f, "nameserver %s\n", dns_server_string(s));
+}
+
+static void write_resolv_conf_search(
+ OrderedSet *domains,
+ FILE *f) {
+ char *domain;
+
+ assert(domains);
+ assert(f);
+
+ fputs("search", f);
+
+ ORDERED_SET_FOREACH(domain, domains) {
+ fputc(' ', f);
+ fputs(domain, f);
+ }
+
+ fputs("\n", f);
+}
+
+static int write_uplink_resolv_conf_contents(FILE *f, OrderedSet *dns, OrderedSet *domains) {
+
+ fputs("# This file is managed by man:systemd-resolved(8). Do not edit.\n"
+ "#\n"
+ "# This is a dynamic resolv.conf file for connecting local clients directly to\n"
+ "# all known uplink DNS servers. This file lists all configured search domains.\n"
+ "#\n"
+ "# Third party programs should typically not access this file directly, but only\n"
+ "# through the symlink at /etc/resolv.conf. To manage man:resolv.conf(5) in a\n"
+ "# different way, replace this symlink by a static file or a different symlink.\n"
+ "#\n"
+ "# See man:systemd-resolved.service(8) for details about the supported modes of\n"
+ "# operation for /etc/resolv.conf.\n"
+ "\n", f);
+
+ if (ordered_set_isempty(dns))
+ fputs("# No DNS servers known.\n", f);
+ else {
+ unsigned count = 0;
+ DnsServer *s;
+
+ ORDERED_SET_FOREACH(s, dns)
+ write_resolv_conf_server(s, f, &count);
+ }
+
+ if (ordered_set_isempty(domains))
+ fputs("search .\n", f); /* Make sure that if the local hostname is chosen as fqdn this does not
+ * imply a search domain */
+ else
+ write_resolv_conf_search(domains, f);
+
+ return fflush_and_check(f);
+}
+
+static int write_stub_resolv_conf_contents(FILE *f, OrderedSet *dns, OrderedSet *domains) {
+ fputs("# This file is managed by man:systemd-resolved(8). Do not edit.\n"
+ "#\n"
+ "# This is a dynamic resolv.conf file for connecting local clients to the\n"
+ "# internal DNS stub resolver of systemd-resolved. This file lists all\n"
+ "# configured search domains.\n"
+ "#\n"
+ "# Run \"resolvectl status\" to see details about the uplink DNS servers\n"
+ "# currently in use.\n"
+ "#\n"
+ "# Third party programs should typically not access this file directly, but only\n"
+ "# through the symlink at /etc/resolv.conf. To manage man:resolv.conf(5) in a\n"
+ "# different way, replace this symlink by a static file or a different symlink.\n"
+ "#\n"
+ "# See man:systemd-resolved.service(8) for details about the supported modes of\n"
+ "# operation for /etc/resolv.conf.\n"
+ "\n"
+ "nameserver 127.0.0.53\n"
+ "options edns0 trust-ad\n", f);
+
+ if (ordered_set_isempty(domains))
+ fputs("search .\n", f); /* Make sure that if the local hostname is chosen as fqdn this does not
+ * imply a search domain */
+ else
+ write_resolv_conf_search(domains, f);
+
+ return fflush_and_check(f);
+}
+
+int manager_write_resolv_conf(Manager *m) {
+ _cleanup_ordered_set_free_ OrderedSet *dns = NULL, *domains = NULL;
+ _cleanup_free_ char *temp_path_uplink = NULL, *temp_path_stub = NULL;
+ _cleanup_fclose_ FILE *f_uplink = NULL, *f_stub = NULL;
+ int r;
+
+ assert(m);
+
+ /* Read the system /etc/resolv.conf first */
+ (void) manager_read_resolv_conf(m);
+
+ /* Add the full list to a set, to filter out duplicates */
+ r = manager_compile_dns_servers(m, &dns);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to compile list of DNS servers: %m");
+
+ r = manager_compile_search_domains(m, &domains, false);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to compile list of search domains: %m");
+
+ r = fopen_temporary_label(PRIVATE_UPLINK_RESOLV_CONF, PRIVATE_UPLINK_RESOLV_CONF, &f_uplink, &temp_path_uplink);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to open new %s for writing: %m", PRIVATE_UPLINK_RESOLV_CONF);
+
+ (void) fchmod(fileno(f_uplink), 0644);
+
+ r = write_uplink_resolv_conf_contents(f_uplink, dns, domains);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write new %s: %m", PRIVATE_UPLINK_RESOLV_CONF);
+ goto fail;
+ }
+
+ if (m->dns_stub_listener_mode != DNS_STUB_LISTENER_NO) {
+ r = fopen_temporary_label(PRIVATE_STUB_RESOLV_CONF, PRIVATE_STUB_RESOLV_CONF, &f_stub, &temp_path_stub);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to open new %s for writing: %m", PRIVATE_STUB_RESOLV_CONF);
+ goto fail;
+ }
+
+ (void) fchmod(fileno(f_stub), 0644);
+
+ r = write_stub_resolv_conf_contents(f_stub, dns, domains);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write new %s: %m", PRIVATE_STUB_RESOLV_CONF);
+ goto fail;
+ }
+
+ if (rename(temp_path_stub, PRIVATE_STUB_RESOLV_CONF) < 0)
+ r = log_error_errno(errno, "Failed to move new %s into place: %m", PRIVATE_STUB_RESOLV_CONF);
+
+ } else {
+ r = symlink_atomic_label(basename(PRIVATE_UPLINK_RESOLV_CONF), PRIVATE_STUB_RESOLV_CONF);
+ if (r < 0)
+ log_error_errno(r, "Failed to symlink %s: %m", PRIVATE_STUB_RESOLV_CONF);
+ }
+
+ if (rename(temp_path_uplink, PRIVATE_UPLINK_RESOLV_CONF) < 0)
+ r = log_error_errno(errno, "Failed to move new %s into place: %m", PRIVATE_UPLINK_RESOLV_CONF);
+
+ fail:
+ if (r < 0) {
+ /* Something went wrong, perform cleanup... */
+ (void) unlink(temp_path_uplink);
+ (void) unlink(temp_path_stub);
+ }
+
+ return r;
+}
+
+int resolv_conf_mode(void) {
+ static const char * const table[_RESOLV_CONF_MODE_MAX] = {
+ [RESOLV_CONF_UPLINK] = PRIVATE_UPLINK_RESOLV_CONF,
+ [RESOLV_CONF_STUB] = PRIVATE_STUB_RESOLV_CONF,
+ [RESOLV_CONF_STATIC] = PRIVATE_STATIC_RESOLV_CONF,
+ };
+
+ struct stat system_st;
+
+ if (stat("/etc/resolv.conf", &system_st) < 0) {
+ if (errno == ENOENT)
+ return RESOLV_CONF_MISSING;
+
+ return -errno;
+ }
+
+ for (ResolvConfMode m = 0; m < _RESOLV_CONF_MODE_MAX; m++) {
+ struct stat our_st;
+
+ if (!table[m])
+ continue;
+
+ if (stat(table[m], &our_st) < 0) {
+ if (errno != ENOENT)
+ log_debug_errno(errno, "Failed to stat() %s, ignoring: %m", table[m]);
+
+ continue;
+ }
+
+ if (system_st.st_dev == our_st.st_dev &&
+ system_st.st_ino == our_st.st_ino)
+ return m;
+ }
+
+ return RESOLV_CONF_FOREIGN;
+}
+
+static const char* const resolv_conf_mode_table[_RESOLV_CONF_MODE_MAX] = {
+ [RESOLV_CONF_UPLINK] = "uplink",
+ [RESOLV_CONF_STUB] = "stub",
+ [RESOLV_CONF_STATIC] = "static",
+ [RESOLV_CONF_MISSING] = "missing",
+ [RESOLV_CONF_FOREIGN] = "foreign",
+};
+DEFINE_STRING_TABLE_LOOKUP(resolv_conf_mode, ResolvConfMode);
diff --git a/src/resolve/resolved-resolv-conf.h b/src/resolve/resolved-resolv-conf.h
new file mode 100644
index 0000000..3734e28
--- /dev/null
+++ b/src/resolve/resolved-resolv-conf.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "resolved-manager.h"
+
+int manager_check_resolv_conf(const Manager *m);
+int manager_read_resolv_conf(Manager *m);
+int manager_write_resolv_conf(Manager *m);
+
+typedef enum ResolvConfMode {
+ RESOLV_CONF_UPLINK,
+ RESOLV_CONF_STUB,
+ RESOLV_CONF_STATIC,
+ RESOLV_CONF_FOREIGN,
+ RESOLV_CONF_MISSING,
+ _RESOLV_CONF_MODE_MAX,
+ _RESOLV_CONF_MODE_INVALID = -1,
+} ResolvConfMode;
+
+int resolv_conf_mode(void);
+
+const char* resolv_conf_mode_to_string(ResolvConfMode m) _const_;
+ResolvConfMode resolv_conf_mode_from_string(const char *s) _pure_;
diff --git a/src/resolve/resolved-varlink.c b/src/resolve/resolved-varlink.c
new file mode 100644
index 0000000..70d6f90
--- /dev/null
+++ b/src/resolve/resolved-varlink.c
@@ -0,0 +1,544 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "in-addr-util.h"
+#include "resolved-dns-synthesize.h"
+#include "resolved-varlink.h"
+#include "socket-netlink.h"
+
+typedef struct LookupParameters {
+ int ifindex;
+ uint64_t flags;
+ int family;
+ union in_addr_union address;
+ size_t address_size;
+ char *name;
+} LookupParameters;
+
+static void lookup_parameters_destroy(LookupParameters *p) {
+ assert(p);
+ free(p->name);
+}
+
+static int reply_query_state(DnsQuery *q) {
+
+ assert(q);
+ assert(q->varlink_request);
+
+ switch (q->state) {
+
+ case DNS_TRANSACTION_NO_SERVERS:
+ return varlink_error(q->varlink_request, "io.systemd.Resolve.NoNameServers", NULL);
+
+ case DNS_TRANSACTION_TIMEOUT:
+ return varlink_error(q->varlink_request, "io.systemd.Resolve.QueryTimedOut", NULL);
+
+ case DNS_TRANSACTION_ATTEMPTS_MAX_REACHED:
+ return varlink_error(q->varlink_request, "io.systemd.Resolve.MaxAttemptsReached", NULL);
+
+ case DNS_TRANSACTION_INVALID_REPLY:
+ return varlink_error(q->varlink_request, "io.systemd.Resolve.InvalidReply", NULL);
+
+ case DNS_TRANSACTION_ERRNO:
+ return varlink_error_errno(q->varlink_request, q->answer_errno);
+
+ case DNS_TRANSACTION_ABORTED:
+ return varlink_error(q->varlink_request, "io.systemd.Resolve.QueryAborted", NULL);
+
+ case DNS_TRANSACTION_DNSSEC_FAILED:
+ return varlink_errorb(q->varlink_request, "io.systemd.Resolve.DNSSECValidationFailed",
+ JSON_BUILD_OBJECT(JSON_BUILD_PAIR("result", JSON_BUILD_STRING(dnssec_result_to_string(q->answer_dnssec_result)))));
+
+ case DNS_TRANSACTION_NO_TRUST_ANCHOR:
+ return varlink_error(q->varlink_request, "io.systemd.Resolve.NoTrustAnchor", NULL);
+
+ case DNS_TRANSACTION_RR_TYPE_UNSUPPORTED:
+ return varlink_error(q->varlink_request, "io.systemd.Resolve.ResourceRecordTypeUnsupported", NULL);
+
+ case DNS_TRANSACTION_NETWORK_DOWN:
+ return varlink_error(q->varlink_request, "io.systemd.Resolve.NetworkDown", NULL);
+
+ case DNS_TRANSACTION_NOT_FOUND:
+ /* We return this as NXDOMAIN. This is only generated when a host doesn't implement LLMNR/TCP, and we
+ * thus quickly know that we cannot resolve an in-addr.arpa or ip6.arpa address. */
+ return varlink_errorb(q->varlink_request, "io.systemd.Resolve.DNSError",
+ JSON_BUILD_OBJECT(JSON_BUILD_PAIR("rcode", JSON_BUILD_INTEGER(DNS_RCODE_NXDOMAIN))));
+
+ case DNS_TRANSACTION_RCODE_FAILURE:
+ return varlink_errorb(q->varlink_request, "io.systemd.Resolve.DNSError",
+ JSON_BUILD_OBJECT(JSON_BUILD_PAIR("rcode", JSON_BUILD_INTEGER(q->answer_rcode))));
+
+ case DNS_TRANSACTION_NULL:
+ case DNS_TRANSACTION_PENDING:
+ case DNS_TRANSACTION_VALIDATING:
+ case DNS_TRANSACTION_SUCCESS:
+ default:
+ assert_not_reached("Impossible state");
+ }
+}
+
+static void vl_on_disconnect(VarlinkServer *s, Varlink *link, void *userdata) {
+ DnsQuery *q;
+
+ assert(s);
+ assert(link);
+
+ q = varlink_get_userdata(link);
+ if (!q)
+ return;
+
+ if (!DNS_TRANSACTION_IS_LIVE(q->state))
+ return;
+
+ log_debug("Client of active query vanished, aborting query.");
+ dns_query_complete(q, DNS_TRANSACTION_ABORTED);
+}
+
+static bool validate_and_mangle_flags(
+ const char *name,
+ uint64_t *flags,
+ uint64_t ok) {
+
+ assert(flags);
+
+ /* This checks that the specified client-provided flags parameter actually makes sense, and mangles
+ * it slightly. Specifically:
+ *
+ * 1. We check that only the protocol flags and the NO_CNAME flag are on at most, plus the
+ * method-specific flags specified in 'ok'.
+ *
+ * 2. If no protocols are enabled we automatically convert that to "all protocols are enabled".
+ *
+ * The second rule means that clients can just pass 0 as flags for the common case, and all supported
+ * protocols are enabled. Moreover it's useful so that client's do not have to be aware of all
+ * protocols implemented in resolved, but can use 0 as protocols flags set as indicator for
+ * "everything".
+ */
+
+ if (*flags & ~(SD_RESOLVED_PROTOCOLS_ALL|SD_RESOLVED_NO_CNAME|ok))
+ return false;
+
+ if ((*flags & SD_RESOLVED_PROTOCOLS_ALL) == 0) /* If no protocol is enabled, enable all */
+ *flags |= SD_RESOLVED_PROTOCOLS_ALL;
+
+ /* If the SD_RESOLVED_NO_SEARCH flag is acceptable, and the query name is dot-suffixed, turn off
+ * search domains. Note that DNS name normalization drops the dot suffix, hence we propagate this
+ * into the flags field as early as we can. */
+ if (name && FLAGS_SET(ok, SD_RESOLVED_NO_SEARCH) && dns_name_dot_suffixed(name) > 0)
+ *flags |= SD_RESOLVED_NO_SEARCH;
+
+ return true;
+}
+
+static void vl_method_resolve_hostname_complete(DnsQuery *q) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *canonical = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *array = NULL;
+ _cleanup_free_ char *normalized = NULL;
+ DnsResourceRecord *rr;
+ DnsQuestion *question;
+ int ifindex, r;
+
+ assert(q);
+
+ if (q->state != DNS_TRANSACTION_SUCCESS) {
+ r = reply_query_state(q);
+ goto finish;
+ }
+
+ r = dns_query_process_cname(q);
+ if (r == -ELOOP) {
+ r = varlink_error(q->varlink_request, "io.systemd.Resolve.CNAMELoop", NULL);
+ goto finish;
+ }
+ if (r < 0)
+ goto finish;
+ if (r == DNS_QUERY_RESTARTED) /* This was a cname, and the query was restarted. */
+ return;
+
+ question = dns_query_question_for_protocol(q, q->answer_protocol);
+
+ DNS_ANSWER_FOREACH_IFINDEX(rr, ifindex, q->answer) {
+ _cleanup_(json_variant_unrefp) JsonVariant *entry = NULL;
+ int family;
+ const void *p;
+
+ r = dns_question_matches_rr(question, rr, DNS_SEARCH_DOMAIN_NAME(q->answer_search_domain));
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ continue;
+
+ if (rr->key->type == DNS_TYPE_A) {
+ family = AF_INET;
+ p = &rr->a.in_addr;
+ } else if (rr->key->type == DNS_TYPE_AAAA) {
+ family = AF_INET6;
+ p = &rr->aaaa.in6_addr;
+ } else {
+ r = -EAFNOSUPPORT;
+ goto finish;
+ }
+
+ r = json_build(&entry,
+ JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("ifindex", JSON_BUILD_INTEGER(ifindex)),
+ JSON_BUILD_PAIR("family", JSON_BUILD_INTEGER(family)),
+ JSON_BUILD_PAIR("address", JSON_BUILD_BYTE_ARRAY(p, FAMILY_ADDRESS_SIZE(family)))));
+ if (r < 0)
+ goto finish;
+
+ if (!canonical)
+ canonical = dns_resource_record_ref(rr);
+
+ r = json_variant_append_array(&array, entry);
+ if (r < 0)
+ goto finish;
+ }
+
+ if (json_variant_is_blank_object(array)) {
+ r = varlink_error(q->varlink_request, "io.systemd.Resolve.NoSuchResourceRecord", NULL);
+ goto finish;
+ }
+
+ assert(canonical);
+ r = dns_name_normalize(dns_resource_key_name(canonical->key), 0, &normalized);
+ if (r < 0)
+ goto finish;
+
+ r = varlink_replyb(q->varlink_request,
+ JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("addresses", JSON_BUILD_VARIANT(array)),
+ JSON_BUILD_PAIR("name", JSON_BUILD_STRING(normalized)),
+ JSON_BUILD_PAIR("flags", JSON_BUILD_INTEGER(SD_RESOLVED_FLAGS_MAKE(q->answer_protocol, q->answer_family, dns_query_fully_authenticated(q))))));
+finish:
+ if (r < 0) {
+ log_error_errno(r, "Failed to send hostname reply: %m");
+ r = varlink_error_errno(q->varlink_request, r);
+ }
+
+ dns_query_free(q);
+}
+
+static int parse_as_address(Varlink *link, LookupParameters *p) {
+ _cleanup_free_ char *canonical = NULL;
+ int r, ff, parsed_ifindex, ifindex;
+ union in_addr_union parsed;
+
+ assert(link);
+ assert(p);
+
+ /* Check if this parses as literal address. If so, just parse it and return that, do not involve networking */
+ r = in_addr_ifindex_from_string_auto(p->name, &ff, &parsed, &parsed_ifindex);
+ if (r < 0)
+ return 0; /* not a literal address */
+
+ /* Make sure the data we parsed matches what is requested */
+ if ((p->family != AF_UNSPEC && ff != p->family) ||
+ (p->ifindex > 0 && parsed_ifindex > 0 && parsed_ifindex != p->ifindex))
+ return varlink_error(link, "io.systemd.Resolve.NoSuchResourceRecord", NULL);
+
+ ifindex = parsed_ifindex > 0 ? parsed_ifindex : p->ifindex;
+
+ /* Reformat the address as string, to return as canonicalized name */
+ r = in_addr_ifindex_to_string(ff, &parsed, ifindex, &canonical);
+ if (r < 0)
+ return r;
+
+ return varlink_replyb(
+ link,
+ JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("addresses",
+ JSON_BUILD_ARRAY(
+ JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR_CONDITION(ifindex > 0, "ifindex", JSON_BUILD_INTEGER(ifindex)),
+ JSON_BUILD_PAIR("family", JSON_BUILD_INTEGER(ff)),
+ JSON_BUILD_PAIR("address", JSON_BUILD_BYTE_ARRAY(&parsed, FAMILY_ADDRESS_SIZE(ff)))))),
+ JSON_BUILD_PAIR("name", JSON_BUILD_STRING(canonical)),
+ JSON_BUILD_PAIR("flags", JSON_BUILD_INTEGER(SD_RESOLVED_FLAGS_MAKE(dns_synthesize_protocol(p->flags), ff, true)))));
+}
+
+static int vl_method_resolve_hostname(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+ static const JsonDispatch dispatch_table[] = {
+ { "ifindex", JSON_VARIANT_UNSIGNED, json_dispatch_int, offsetof(LookupParameters, ifindex), 0 },
+ { "name", JSON_VARIANT_STRING, json_dispatch_string, offsetof(LookupParameters, name), JSON_MANDATORY },
+ { "family", JSON_VARIANT_UNSIGNED, json_dispatch_int, offsetof(LookupParameters, family), 0 },
+ { "flags", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(LookupParameters, flags), 0 },
+ {}
+ };
+
+ _cleanup_(dns_question_unrefp) DnsQuestion *question_idna = NULL, *question_utf8 = NULL;
+ _cleanup_(lookup_parameters_destroy) LookupParameters p = {
+ .family = AF_UNSPEC,
+ };
+ Manager *m = userdata;
+ DnsQuery *q;
+ int r;
+
+ assert(link);
+ assert(m);
+
+ if (FLAGS_SET(flags, VARLINK_METHOD_ONEWAY))
+ return -EINVAL;
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &p);
+ if (r < 0)
+ return r;
+
+ if (p.ifindex < 0)
+ return varlink_error_invalid_parameter(link, JSON_VARIANT_STRING_CONST("ifindex"));
+
+ r = dns_name_is_valid(p.name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return varlink_error_invalid_parameter(link, JSON_VARIANT_STRING_CONST("name"));
+
+ if (!IN_SET(p.family, AF_UNSPEC, AF_INET, AF_INET6))
+ return varlink_error_invalid_parameter(link, JSON_VARIANT_STRING_CONST("family"));
+
+ if (!validate_and_mangle_flags(p.name, &p.flags, SD_RESOLVED_NO_SEARCH))
+ return varlink_error_invalid_parameter(link, JSON_VARIANT_STRING_CONST("flags"));
+
+ r = parse_as_address(link, &p);
+ if (r != 0)
+ return r;
+
+ r = dns_question_new_address(&question_utf8, p.family, p.name, false);
+ if (r < 0)
+ return r;
+
+ r = dns_question_new_address(&question_idna, p.family, p.name, true);
+ if (r < 0 && r != -EALREADY)
+ return r;
+
+ r = dns_query_new(m, &q, question_utf8, question_idna ?: question_utf8, p.ifindex, p.flags);
+ if (r < 0)
+ return r;
+
+ q->varlink_request = varlink_ref(link);
+ varlink_set_userdata(link, q);
+ q->request_family = p.family;
+ q->complete = vl_method_resolve_hostname_complete;
+
+ r = dns_query_go(q);
+ if (r < 0)
+ goto fail;
+
+ return 1;
+
+fail:
+ dns_query_free(q);
+ return r;
+}
+
+static int json_dispatch_address(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ LookupParameters *p = userdata;
+ union in_addr_union buf = {};
+ JsonVariant *i;
+ size_t n, k = 0;
+
+ assert(variant);
+ assert(p);
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array.", strna(name));
+
+ n = json_variant_elements(variant);
+ if (!IN_SET(n, 4, 16))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is array of unexpected size.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(i, variant) {
+ intmax_t b;
+
+ if (!json_variant_is_integer(i))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Element %zu of JSON field '%s' is not an integer.", k, strna(name));
+
+ b = json_variant_integer(i);
+ if (b < 0 || b > 0xff)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Element %zu of JSON field '%s' is out of range 0…255.", k, strna(name));
+
+ buf.bytes[k++] = (uint8_t) b;
+ }
+
+ p->address = buf;
+ p->address_size = k;
+
+ return 0;
+}
+
+static void vl_method_resolve_address_complete(DnsQuery *q) {
+ _cleanup_(json_variant_unrefp) JsonVariant *array = NULL;
+ DnsQuestion *question;
+ DnsResourceRecord *rr;
+ int ifindex, r;
+
+ assert(q);
+
+ if (q->state != DNS_TRANSACTION_SUCCESS) {
+ r = reply_query_state(q);
+ goto finish;
+ }
+
+ r = dns_query_process_cname(q);
+ if (r == -ELOOP) {
+ r = varlink_error(q->varlink_request, "io.systemd.Resolve.CNAMELoop", NULL);
+ goto finish;
+ }
+ if (r < 0)
+ goto finish;
+ if (r == DNS_QUERY_RESTARTED) /* This was a cname, and the query was restarted. */
+ return;
+
+ question = dns_query_question_for_protocol(q, q->answer_protocol);
+
+ DNS_ANSWER_FOREACH_IFINDEX(rr, ifindex, q->answer) {
+ _cleanup_(json_variant_unrefp) JsonVariant *entry = NULL;
+ _cleanup_free_ char *normalized = NULL;
+
+ r = dns_question_matches_rr(question, rr, NULL);
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ continue;
+
+ r = dns_name_normalize(rr->ptr.name, 0, &normalized);
+ if (r < 0)
+ goto finish;
+
+ r = json_build(&entry,
+ JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("ifindex", JSON_BUILD_INTEGER(ifindex)),
+ JSON_BUILD_PAIR("name", JSON_BUILD_STRING(normalized))));
+ if (r < 0)
+ goto finish;
+
+ r = json_variant_append_array(&array, entry);
+ if (r < 0)
+ goto finish;
+ }
+
+ if (json_variant_is_blank_object(array)) {
+ r = varlink_error(q->varlink_request, "io.systemd.Resolve.NoSuchResourceRecord", NULL);
+ goto finish;
+ }
+
+ r = varlink_replyb(q->varlink_request,
+ JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("names", JSON_BUILD_VARIANT(array)),
+ JSON_BUILD_PAIR("flags", JSON_BUILD_INTEGER(SD_RESOLVED_FLAGS_MAKE(q->answer_protocol, q->answer_family, dns_query_fully_authenticated(q))))));
+finish:
+ if (r < 0) {
+ log_error_errno(r, "Failed to send address reply: %m");
+ r = varlink_error_errno(q->varlink_request, r);
+ }
+
+ dns_query_free(q);
+}
+
+static int vl_method_resolve_address(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+ static const JsonDispatch dispatch_table[] = {
+ { "ifindex", JSON_VARIANT_UNSIGNED, json_dispatch_int, offsetof(LookupParameters, ifindex), 0 },
+ { "family", JSON_VARIANT_UNSIGNED, json_dispatch_int, offsetof(LookupParameters, family), JSON_MANDATORY },
+ { "address", JSON_VARIANT_ARRAY, json_dispatch_address, 0, JSON_MANDATORY },
+ { "flags", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(LookupParameters, flags), 0 },
+ {}
+ };
+
+ _cleanup_(dns_question_unrefp) DnsQuestion *question = NULL;
+ _cleanup_(lookup_parameters_destroy) LookupParameters p = {
+ .family = AF_UNSPEC,
+ };
+ Manager *m = userdata;
+ DnsQuery *q;
+ int r;
+
+ assert(link);
+ assert(m);
+
+ if (FLAGS_SET(flags, VARLINK_METHOD_ONEWAY))
+ return -EINVAL;
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &p);
+ if (r < 0)
+ return r;
+
+ if (p.ifindex < 0)
+ return varlink_error_invalid_parameter(link, JSON_VARIANT_STRING_CONST("ifindex"));
+
+ if (!IN_SET(p.family, AF_UNSPEC, AF_INET, AF_INET6))
+ return varlink_error_invalid_parameter(link, JSON_VARIANT_STRING_CONST("family"));
+
+ if (FAMILY_ADDRESS_SIZE(p.family) != p.address_size)
+ return varlink_error(link, "io.systemd.UserDatabase.BadAddressSize", NULL);
+
+ if (!validate_and_mangle_flags(NULL, &p.flags, 0))
+ return varlink_error_invalid_parameter(link, JSON_VARIANT_STRING_CONST("flags"));
+
+ r = dns_question_new_reverse(&question, p.family, &p.address);
+ if (r < 0)
+ return r;
+
+ r = dns_query_new(m, &q, question, question, p.ifindex, p.flags|SD_RESOLVED_NO_SEARCH);
+ if (r < 0)
+ return r;
+
+ q->varlink_request = varlink_ref(link);
+ varlink_set_userdata(link, q);
+
+ q->request_family = p.family;
+ q->request_address = p.address;
+ q->complete = vl_method_resolve_address_complete;
+
+ r = dns_query_go(q);
+ if (r < 0)
+ goto fail;
+
+ return 1;
+
+fail:
+ dns_query_free(q);
+ return r;
+}
+
+int manager_varlink_init(Manager *m) {
+ _cleanup_(varlink_server_unrefp) VarlinkServer *s = NULL;
+ int r;
+
+ assert(m);
+
+ if (m->varlink_server)
+ return 0;
+
+ r = varlink_server_new(&s, VARLINK_SERVER_ACCOUNT_UID);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate varlink server object: %m");
+
+ varlink_server_set_userdata(s, m);
+
+ r = varlink_server_bind_method_many(
+ s,
+ "io.systemd.Resolve.ResolveHostname", vl_method_resolve_hostname,
+ "io.systemd.Resolve.ResolveAddress", vl_method_resolve_address);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register varlink methods: %m");
+
+ r = varlink_server_bind_disconnect(s, vl_on_disconnect);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register varlink disconnect handler: %m");
+
+ r = varlink_server_listen_address(s, "/run/systemd/resolve/io.systemd.Resolve", 0666);
+ if (r < 0)
+ return log_error_errno(r, "Failed to bind to varlink socket: %m");
+
+ r = varlink_server_attach_event(s, m->event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach varlink connection to event loop: %m");
+
+ m->varlink_server = TAKE_PTR(s);
+ return 0;
+}
+
+void manager_varlink_done(Manager *m) {
+ assert(m);
+
+ m->varlink_server = varlink_server_unref(m->varlink_server);
+}
diff --git a/src/resolve/resolved-varlink.h b/src/resolve/resolved-varlink.h
new file mode 100644
index 0000000..57fdfe9
--- /dev/null
+++ b/src/resolve/resolved-varlink.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "resolved-manager.h"
+
+int manager_varlink_init(Manager *m);
+void manager_varlink_done(Manager *m);
diff --git a/src/resolve/resolved.c b/src/resolve/resolved.c
new file mode 100644
index 0000000..fd9be30
--- /dev/null
+++ b/src/resolve/resolved.c
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+#include "sd-event.h"
+
+#include "bus-log-control-api.h"
+#include "capability-util.h"
+#include "daemon-util.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "resolved-bus.h"
+#include "resolved-conf.h"
+#include "resolved-manager.h"
+#include "resolved-resolv-conf.h"
+#include "selinux-util.h"
+#include "service-util.h"
+#include "signal-util.h"
+#include "user-util.h"
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ _cleanup_(notify_on_cleanup) const char *notify_stop = NULL;
+ int r;
+
+ log_setup_service();
+
+ r = service_parse_argv("systemd-resolved.service",
+ "Provide name resolution with caching using DNS, mDNS, LLMNR.",
+ BUS_IMPLEMENTATIONS(&manager_object,
+ &log_control_object),
+ argc, argv);
+ if (r <= 0)
+ return r;
+
+ umask(0022);
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return r;
+
+ /* Drop privileges, but only if we have been started as root. If we are not running as root we assume most
+ * privileges are already dropped and we can't create our directory. */
+ if (getuid() == 0) {
+ const char *user = "systemd-resolve";
+ uid_t uid;
+ gid_t gid;
+
+ r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Cannot resolve user name %s: %m", user);
+
+ /* As we're root, we can create the directory where resolv.conf will live */
+ r = mkdir_safe_label("/run/systemd/resolve", 0755, uid, gid, MKDIR_WARN_MODE);
+ if (r < 0)
+ return log_error_errno(r, "Could not create runtime directory: %m");
+
+ /* Drop privileges, but keep three caps. Note that we drop those too, later on (see below) */
+ r = drop_privileges(uid, gid,
+ (UINT64_C(1) << CAP_NET_RAW)| /* needed for SO_BINDTODEVICE */
+ (UINT64_C(1) << CAP_NET_BIND_SERVICE)| /* needed to bind on port 53 */
+ (UINT64_C(1) << CAP_SETPCAP) /* needed in order to drop the caps later */);
+ if (r < 0)
+ return log_error_errno(r, "Failed to drop privileges: %m");
+ }
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
+
+ r = manager_new(&m);
+ if (r < 0)
+ return log_error_errno(r, "Could not create manager: %m");
+
+ r = manager_start(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start manager: %m");
+
+ /* Write finish default resolv.conf to avoid a dangling symlink */
+ (void) manager_write_resolv_conf(m);
+
+ (void) manager_check_resolv_conf(m);
+
+ /* Let's drop the remaining caps now */
+ r = capability_bounding_set_drop(0, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to drop remaining caps: %m");
+
+ notify_stop = notify_start(NOTIFY_READY, NOTIFY_STOPPING);
+
+ r = sd_event_loop(m->event);
+ if (r < 0)
+ return log_error_errno(r, "Event loop failed: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/resolve/resolved.conf.in b/src/resolve/resolved.conf.in
new file mode 100644
index 0000000..93279b3
--- /dev/null
+++ b/src/resolve/resolved.conf.in
@@ -0,0 +1,30 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See resolved.conf(5) for details
+
+[Resolve]
+# Some examples of DNS servers which may be used for DNS= and FallbackDNS=:
+# Cloudflare: 1.1.1.1 1.0.0.1 2606:4700:4700::1111 2606:4700:4700::1001
+# Google: 8.8.8.8 8.8.4.4 2001:4860:4860::8888 2001:4860:4860::8844
+# Quad9: 9.9.9.9 2620:fe::fe
+#DNS=
+#FallbackDNS=@DNS_SERVERS@
+#Domains=
+#DNSSEC=@DEFAULT_DNSSEC_MODE@
+#DNSOverTLS=@DEFAULT_DNS_OVER_TLS_MODE@
+#MulticastDNS=@DEFAULT_MDNS_MODE@
+#LLMNR=@DEFAULT_LLMNR_MODE@
+#Cache=yes
+#DNSStubListener=yes
+#DNSStubListenerExtra=
+#ReadEtcHosts=yes
+#ResolveUnicastSingleLabel=no
diff --git a/src/resolve/test-dns-packet.c b/src/resolve/test-dns-packet.c
new file mode 100644
index 0000000..47c7d67
--- /dev/null
+++ b/src/resolve/test-dns-packet.c
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "glob-util.h"
+#include "log.h"
+#include "macro.h"
+#include "resolved-dns-packet.h"
+#include "resolved-dns-rr.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "unaligned.h"
+
+#define HASH_KEY SD_ID128_MAKE(d3,1e,48,90,4b,fa,4c,fe,af,9d,d5,a1,d7,2e,8a,b1)
+
+static void verify_rr_copy(DnsResourceRecord *rr) {
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *copy = NULL;
+ const char *a, *b;
+
+ assert_se(copy = dns_resource_record_copy(rr));
+ assert_se(dns_resource_record_equal(copy, rr) > 0);
+
+ assert_se(a = dns_resource_record_to_string(rr));
+ assert_se(b = dns_resource_record_to_string(copy));
+
+ assert_se(streq(a, b));
+}
+
+static uint64_t hash(DnsResourceRecord *rr) {
+ struct siphash state;
+
+ siphash24_init(&state, HASH_KEY.bytes);
+ dns_resource_record_hash_func(rr, &state);
+ return siphash24_finalize(&state);
+}
+
+static void test_packet_from_file(const char* filename, bool canonical) {
+ _cleanup_free_ char *data = NULL;
+ size_t data_size, packet_size, offset;
+
+ assert_se(read_full_file(filename, &data, &data_size) >= 0);
+ assert_se(data);
+ assert_se(data_size > 8);
+
+ log_info("============== %s %s==============", filename, canonical ? "canonical " : "");
+
+ for (offset = 0; offset < data_size; offset += 8 + packet_size) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL, *p2 = NULL;
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL, *rr2 = NULL;
+ const char *s, *s2;
+ uint64_t hash1, hash2;
+
+ packet_size = unaligned_read_le64(data + offset);
+ assert_se(packet_size > 0);
+ assert_se(offset + 8 + packet_size <= data_size);
+
+ assert_se(dns_packet_new(&p, DNS_PROTOCOL_DNS, 0, DNS_PACKET_SIZE_MAX) >= 0);
+
+ assert_se(dns_packet_append_blob(p, data + offset + 8, packet_size, NULL) >= 0);
+ assert_se(dns_packet_read_rr(p, &rr, NULL, NULL) >= 0);
+
+ verify_rr_copy(rr);
+
+ s = dns_resource_record_to_string(rr);
+ assert_se(s);
+ puts(s);
+
+ hash1 = hash(rr);
+
+ assert_se(dns_resource_record_to_wire_format(rr, canonical) >= 0);
+
+ assert_se(dns_packet_new(&p2, DNS_PROTOCOL_DNS, 0, DNS_PACKET_SIZE_MAX) >= 0);
+ assert_se(dns_packet_append_blob(p2, rr->wire_format, rr->wire_format_size, NULL) >= 0);
+ assert_se(dns_packet_read_rr(p2, &rr2, NULL, NULL) >= 0);
+
+ verify_rr_copy(rr);
+
+ s2 = dns_resource_record_to_string(rr);
+ assert_se(s2);
+ assert_se(streq(s, s2));
+
+ hash2 = hash(rr);
+ assert_se(hash1 == hash2);
+ }
+}
+
+int main(int argc, char **argv) {
+ int i, N;
+ _cleanup_globfree_ glob_t g = {};
+ char **fnames;
+
+ log_parse_environment();
+
+ if (argc >= 2) {
+ N = argc - 1;
+ fnames = argv + 1;
+ } else {
+ _cleanup_free_ char *pkts_glob = NULL;
+ assert_se(get_testdata_dir("test-resolve/*.pkts", &pkts_glob) >= 0);
+ assert_se(glob(pkts_glob, GLOB_NOSORT, NULL, &g) == 0);
+ N = g.gl_pathc;
+ fnames = g.gl_pathv;
+ }
+
+ for (i = 0; i < N; i++) {
+ test_packet_from_file(fnames[i], false);
+ puts("");
+ test_packet_from_file(fnames[i], true);
+ if (i + 1 < N)
+ puts("");
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/resolve/test-dnssec-complex.c b/src/resolve/test-dnssec-complex.c
new file mode 100644
index 0000000..e21eb86
--- /dev/null
+++ b/src/resolve/test-dnssec-complex.c
@@ -0,0 +1,223 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/ip.h>
+
+#include "sd-bus.h"
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "dns-type.h"
+#include "random-util.h"
+#include "resolved-def.h"
+#include "string-util.h"
+#include "time-util.h"
+
+static void prefix_random(const char *name, char **ret) {
+ uint64_t i, u;
+ char *m = NULL;
+
+ u = 1 + (random_u64() & 3);
+
+ for (i = 0; i < u; i++) {
+ _cleanup_free_ char *b = NULL;
+ char *x;
+
+ assert_se(asprintf(&b, "x%" PRIu64 "x", random_u64()));
+ x = strjoin(b, ".", name);
+ assert_se(x);
+
+ free(m);
+ m = x;
+ }
+
+ *ret = m;
+ }
+
+static void test_rr_lookup(sd_bus *bus, const char *name, uint16_t type, const char *result) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *m = NULL;
+ int r;
+
+ /* If the name starts with a dot, we prefix one to three random labels */
+ if (startswith(name, ".")) {
+ prefix_random(name + 1, &m);
+ name = m;
+ }
+
+ assert_se(sd_bus_message_new_method_call(
+ bus,
+ &req,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "ResolveRecord") >= 0);
+
+ assert_se(sd_bus_message_append(req, "isqqt", 0, name, DNS_CLASS_IN, type, UINT64_C(0)) >= 0);
+
+ r = sd_bus_call(bus, req, SD_RESOLVED_QUERY_TIMEOUT_USEC, &error, &reply);
+
+ if (r < 0) {
+ assert_se(result);
+ assert_se(sd_bus_error_has_name(&error, result));
+ log_info("[OK] %s/%s resulted in <%s>.", name, dns_type_to_string(type), error.name);
+ } else {
+ assert_se(!result);
+ log_info("[OK] %s/%s succeeded.", name, dns_type_to_string(type));
+ }
+}
+
+static void test_hostname_lookup(sd_bus *bus, const char *name, int family, const char *result) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *m = NULL;
+ const char *af;
+ int r;
+
+ af = family == AF_UNSPEC ? "AF_UNSPEC" : af_to_name(family);
+
+ /* If the name starts with a dot, we prefix one to three random labels */
+ if (startswith(name, ".")) {
+ prefix_random(name + 1, &m);
+ name = m;
+ }
+
+ assert_se(sd_bus_message_new_method_call(
+ bus,
+ &req,
+ "org.freedesktop.resolve1",
+ "/org/freedesktop/resolve1",
+ "org.freedesktop.resolve1.Manager",
+ "ResolveHostname") >= 0);
+
+ assert_se(sd_bus_message_append(req, "isit", 0, name, family, UINT64_C(0)) >= 0);
+
+ r = sd_bus_call(bus, req, SD_RESOLVED_QUERY_TIMEOUT_USEC, &error, &reply);
+
+ if (r < 0) {
+ assert_se(result);
+ assert_se(sd_bus_error_has_name(&error, result));
+ log_info("[OK] %s/%s resulted in <%s>.", name, af, error.name);
+ } else {
+ assert_se(!result);
+ log_info("[OK] %s/%s succeeded.", name, af);
+ }
+
+}
+
+int main(int argc, char* argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+
+ /* Note that this is a manual test as it requires:
+ *
+ * Full network access
+ * A DNSSEC capable DNS server
+ * That zones contacted are still set up as they were when I wrote this.
+ */
+
+ assert_se(sd_bus_open_system(&bus) >= 0);
+
+ /* Normally signed */
+ test_rr_lookup(bus, "www.eurid.eu", DNS_TYPE_A, NULL);
+ test_hostname_lookup(bus, "www.eurid.eu", AF_UNSPEC, NULL);
+
+ test_rr_lookup(bus, "sigok.verteiltesysteme.net", DNS_TYPE_A, NULL);
+ test_hostname_lookup(bus, "sigok.verteiltesysteme.net", AF_UNSPEC, NULL);
+
+ /* Normally signed, NODATA */
+ test_rr_lookup(bus, "www.eurid.eu", DNS_TYPE_RP, BUS_ERROR_NO_SUCH_RR);
+ test_rr_lookup(bus, "sigok.verteiltesysteme.net", DNS_TYPE_RP, BUS_ERROR_NO_SUCH_RR);
+
+ /* Invalid signature */
+ test_rr_lookup(bus, "sigfail.verteiltesysteme.net", DNS_TYPE_A, BUS_ERROR_DNSSEC_FAILED);
+ test_hostname_lookup(bus, "sigfail.verteiltesysteme.net", AF_INET, BUS_ERROR_DNSSEC_FAILED);
+
+ /* Invalid signature, RSA, wildcard */
+ test_rr_lookup(bus, ".wilda.rhybar.0skar.cz", DNS_TYPE_A, BUS_ERROR_DNSSEC_FAILED);
+ test_hostname_lookup(bus, ".wilda.rhybar.0skar.cz", AF_INET, BUS_ERROR_DNSSEC_FAILED);
+
+ /* Invalid signature, ECDSA, wildcard */
+ test_rr_lookup(bus, ".wilda.rhybar.ecdsa.0skar.cz", DNS_TYPE_A, BUS_ERROR_DNSSEC_FAILED);
+ test_hostname_lookup(bus, ".wilda.rhybar.ecdsa.0skar.cz", AF_INET, BUS_ERROR_DNSSEC_FAILED);
+
+ /* Missing DS for DNSKEY */
+ test_rr_lookup(bus, "www.dnssec-bogus.sg", DNS_TYPE_A, BUS_ERROR_DNSSEC_FAILED);
+ test_hostname_lookup(bus, "www.dnssec-bogus.sg", AF_INET, BUS_ERROR_DNSSEC_FAILED);
+
+ /* NXDOMAIN in NSEC domain */
+ test_rr_lookup(bus, "hhh.nasa.gov", DNS_TYPE_A, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, "hhh.nasa.gov", AF_UNSPEC, _BUS_ERROR_DNS "NXDOMAIN");
+ test_rr_lookup(bus, "_pgpkey-https._tcp.hkps.pool.sks-keyservers.net", DNS_TYPE_SRV, _BUS_ERROR_DNS "NXDOMAIN");
+
+ /* wildcard, NSEC zone */
+ test_rr_lookup(bus, ".wilda.nsec.0skar.cz", DNS_TYPE_A, NULL);
+ test_hostname_lookup(bus, ".wilda.nsec.0skar.cz", AF_INET, NULL);
+
+ /* wildcard, NSEC zone, NODATA */
+ test_rr_lookup(bus, ".wilda.nsec.0skar.cz", DNS_TYPE_RP, BUS_ERROR_NO_SUCH_RR);
+
+ /* wildcard, NSEC3 zone */
+ test_rr_lookup(bus, ".wilda.0skar.cz", DNS_TYPE_A, NULL);
+ test_hostname_lookup(bus, ".wilda.0skar.cz", AF_INET, NULL);
+
+ /* wildcard, NSEC3 zone, NODATA */
+ test_rr_lookup(bus, ".wilda.0skar.cz", DNS_TYPE_RP, BUS_ERROR_NO_SUCH_RR);
+
+ /* wildcard, NSEC zone, CNAME */
+ test_rr_lookup(bus, ".wild.nsec.0skar.cz", DNS_TYPE_A, NULL);
+ test_hostname_lookup(bus, ".wild.nsec.0skar.cz", AF_UNSPEC, NULL);
+ test_hostname_lookup(bus, ".wild.nsec.0skar.cz", AF_INET, NULL);
+
+ /* wildcard, NSEC zone, NODATA, CNAME */
+ test_rr_lookup(bus, ".wild.nsec.0skar.cz", DNS_TYPE_RP, BUS_ERROR_NO_SUCH_RR);
+
+ /* wildcard, NSEC3 zone, CNAME */
+ test_rr_lookup(bus, ".wild.0skar.cz", DNS_TYPE_A, NULL);
+ test_hostname_lookup(bus, ".wild.0skar.cz", AF_UNSPEC, NULL);
+ test_hostname_lookup(bus, ".wild.0skar.cz", AF_INET, NULL);
+
+ /* wildcard, NSEC3 zone, NODATA, CNAME */
+ test_rr_lookup(bus, ".wild.0skar.cz", DNS_TYPE_RP, BUS_ERROR_NO_SUCH_RR);
+
+ /* NODATA due to empty non-terminal in NSEC domain */
+ test_rr_lookup(bus, "herndon.nasa.gov", DNS_TYPE_A, BUS_ERROR_NO_SUCH_RR);
+ test_hostname_lookup(bus, "herndon.nasa.gov", AF_UNSPEC, BUS_ERROR_NO_SUCH_RR);
+ test_hostname_lookup(bus, "herndon.nasa.gov", AF_INET, BUS_ERROR_NO_SUCH_RR);
+ test_hostname_lookup(bus, "herndon.nasa.gov", AF_INET6, BUS_ERROR_NO_SUCH_RR);
+
+ /* NXDOMAIN in NSEC root zone: */
+ test_rr_lookup(bus, "jasdhjas.kjkfgjhfjg", DNS_TYPE_A, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, "jasdhjas.kjkfgjhfjg", AF_UNSPEC, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, "jasdhjas.kjkfgjhfjg", AF_INET, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, "jasdhjas.kjkfgjhfjg", AF_INET6, _BUS_ERROR_DNS "NXDOMAIN");
+
+ /* NXDOMAIN in NSEC3 .com zone: */
+ test_rr_lookup(bus, "kjkfgjhfjgsdfdsfd.com", DNS_TYPE_A, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, "kjkfgjhfjgsdfdsfd.com", AF_INET, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, "kjkfgjhfjgsdfdsfd.com", AF_INET6, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, "kjkfgjhfjgsdfdsfd.com", AF_UNSPEC, _BUS_ERROR_DNS "NXDOMAIN");
+
+ /* Unsigned A */
+ test_rr_lookup(bus, "poettering.de", DNS_TYPE_A, NULL);
+ test_rr_lookup(bus, "poettering.de", DNS_TYPE_AAAA, NULL);
+ test_hostname_lookup(bus, "poettering.de", AF_UNSPEC, NULL);
+ test_hostname_lookup(bus, "poettering.de", AF_INET, NULL);
+ test_hostname_lookup(bus, "poettering.de", AF_INET6, NULL);
+
+#if HAVE_LIBIDN2 || HAVE_LIBIDN
+ /* Unsigned A with IDNA conversion necessary */
+ test_hostname_lookup(bus, "pöttering.de", AF_UNSPEC, NULL);
+ test_hostname_lookup(bus, "pöttering.de", AF_INET, NULL);
+ test_hostname_lookup(bus, "pöttering.de", AF_INET6, NULL);
+#endif
+
+ /* DNAME, pointing to NXDOMAIN */
+ test_rr_lookup(bus, ".ireallyhpoethisdoesnexist.xn--kprw13d.", DNS_TYPE_A, _BUS_ERROR_DNS "NXDOMAIN");
+ test_rr_lookup(bus, ".ireallyhpoethisdoesnexist.xn--kprw13d.", DNS_TYPE_RP, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, ".ireallyhpoethisdoesntexist.xn--kprw13d.", AF_UNSPEC, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, ".ireallyhpoethisdoesntexist.xn--kprw13d.", AF_INET, _BUS_ERROR_DNS "NXDOMAIN");
+ test_hostname_lookup(bus, ".ireallyhpoethisdoesntexist.xn--kprw13d.", AF_INET6, _BUS_ERROR_DNS "NXDOMAIN");
+
+ return 0;
+}
diff --git a/src/resolve/test-dnssec.c b/src/resolve/test-dnssec.c
new file mode 100644
index 0000000..0275d0e
--- /dev/null
+++ b/src/resolve/test-dnssec.c
@@ -0,0 +1,623 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+#if HAVE_GCRYPT
+#include <gcrypt.h>
+#endif
+#include <netinet/in.h>
+#include <sys/socket.h>
+
+#include "alloc-util.h"
+#include "resolved-dns-dnssec.h"
+#include "resolved-dns-rr.h"
+#include "string-util.h"
+#include "hexdecoct.h"
+
+#if HAVE_GCRYPT
+
+static void test_dnssec_verify_dns_key(void) {
+
+ static const uint8_t ds1_fprint[] = {
+ 0x46, 0x8B, 0xC8, 0xDD, 0xC7, 0xE8, 0x27, 0x03, 0x40, 0xBB, 0x8A, 0x1F, 0x3B, 0x2E, 0x45, 0x9D,
+ 0x80, 0x67, 0x14, 0x01,
+ };
+ static const uint8_t ds2_fprint[] = {
+ 0x8A, 0xEE, 0x80, 0x47, 0x05, 0x5F, 0x83, 0xD1, 0x48, 0xBA, 0x8F, 0xF6, 0xDD, 0xA7, 0x60, 0xCE,
+ 0x94, 0xF7, 0xC7, 0x5E, 0x52, 0x4C, 0xF2, 0xE9, 0x50, 0xB9, 0x2E, 0xCB, 0xEF, 0x96, 0xB9, 0x98,
+ };
+ static const uint8_t dnskey_blob[] = {
+ 0x03, 0x01, 0x00, 0x01, 0xa8, 0x12, 0xda, 0x4f, 0xd2, 0x7d, 0x54, 0x14, 0x0e, 0xcc, 0x5b, 0x5e,
+ 0x45, 0x9c, 0x96, 0x98, 0xc0, 0xc0, 0x85, 0x81, 0xb1, 0x47, 0x8c, 0x7d, 0xe8, 0x39, 0x50, 0xcc,
+ 0xc5, 0xd0, 0xf2, 0x00, 0x81, 0x67, 0x79, 0xf6, 0xcc, 0x9d, 0xad, 0x6c, 0xbb, 0x7b, 0x6f, 0x48,
+ 0x97, 0x15, 0x1c, 0xfd, 0x0b, 0xfe, 0xd3, 0xd7, 0x7d, 0x9f, 0x81, 0x26, 0xd3, 0xc5, 0x65, 0x49,
+ 0xcf, 0x46, 0x62, 0xb0, 0x55, 0x6e, 0x47, 0xc7, 0x30, 0xef, 0x51, 0xfb, 0x3e, 0xc6, 0xef, 0xde,
+ 0x27, 0x3f, 0xfa, 0x57, 0x2d, 0xa7, 0x1d, 0x80, 0x46, 0x9a, 0x5f, 0x14, 0xb3, 0xb0, 0x2c, 0xbe,
+ 0x72, 0xca, 0xdf, 0xb2, 0xff, 0x36, 0x5b, 0x4f, 0xec, 0x58, 0x8e, 0x8d, 0x01, 0xe9, 0xa9, 0xdf,
+ 0xb5, 0x60, 0xad, 0x52, 0x4d, 0xfc, 0xa9, 0x3e, 0x8d, 0x35, 0x95, 0xb3, 0x4e, 0x0f, 0xca, 0x45,
+ 0x1b, 0xf7, 0xef, 0x3a, 0x88, 0x25, 0x08, 0xc7, 0x4e, 0x06, 0xc1, 0x62, 0x1a, 0xce, 0xd8, 0x77,
+ 0xbd, 0x02, 0x65, 0xf8, 0x49, 0xfb, 0xce, 0xf6, 0xa8, 0x09, 0xfc, 0xde, 0xb2, 0x09, 0x9d, 0x39,
+ 0xf8, 0x63, 0x9c, 0x32, 0x42, 0x7c, 0xa0, 0x30, 0x86, 0x72, 0x7a, 0x4a, 0xc6, 0xd4, 0xb3, 0x2d,
+ 0x24, 0xef, 0x96, 0x3f, 0xc2, 0xda, 0xd3, 0xf2, 0x15, 0x6f, 0xda, 0x65, 0x4b, 0x81, 0x28, 0x68,
+ 0xf4, 0xfe, 0x3e, 0x71, 0x4f, 0x50, 0x96, 0x72, 0x58, 0xa1, 0x89, 0xdd, 0x01, 0x61, 0x39, 0x39,
+ 0xc6, 0x76, 0xa4, 0xda, 0x02, 0x70, 0x3d, 0xc0, 0xdc, 0x8d, 0x70, 0x72, 0x04, 0x90, 0x79, 0xd4,
+ 0xec, 0x65, 0xcf, 0x49, 0x35, 0x25, 0x3a, 0x14, 0x1a, 0x45, 0x20, 0xeb, 0x31, 0xaf, 0x92, 0xba,
+ 0x20, 0xd3, 0xcd, 0xa7, 0x13, 0x44, 0xdc, 0xcf, 0xf0, 0x27, 0x34, 0xb9, 0xe7, 0x24, 0x6f, 0x73,
+ 0xe7, 0xea, 0x77, 0x03,
+ };
+
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *dnskey = NULL, *ds1 = NULL, *ds2 = NULL;
+
+ /* The two DS RRs in effect for nasa.gov on 2015-12-01. */
+ ds1 = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DS, "nasa.gov");
+ assert_se(ds1);
+
+ ds1->ds.key_tag = 47857;
+ ds1->ds.algorithm = DNSSEC_ALGORITHM_RSASHA256;
+ ds1->ds.digest_type = DNSSEC_DIGEST_SHA1;
+ ds1->ds.digest_size = sizeof(ds1_fprint);
+ ds1->ds.digest = memdup(ds1_fprint, ds1->ds.digest_size);
+ assert_se(ds1->ds.digest);
+
+ log_info("DS1: %s", strna(dns_resource_record_to_string(ds1)));
+
+ ds2 = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DS, "NASA.GOV");
+ assert_se(ds2);
+
+ ds2->ds.key_tag = 47857;
+ ds2->ds.algorithm = DNSSEC_ALGORITHM_RSASHA256;
+ ds2->ds.digest_type = DNSSEC_DIGEST_SHA256;
+ ds2->ds.digest_size = sizeof(ds2_fprint);
+ ds2->ds.digest = memdup(ds2_fprint, ds2->ds.digest_size);
+ assert_se(ds2->ds.digest);
+
+ log_info("DS2: %s", strna(dns_resource_record_to_string(ds2)));
+
+ dnskey = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DNSKEY, "nasa.GOV");
+ assert_se(dnskey);
+
+ dnskey->dnskey.flags = 257;
+ dnskey->dnskey.protocol = 3;
+ dnskey->dnskey.algorithm = DNSSEC_ALGORITHM_RSASHA256;
+ dnskey->dnskey.key_size = sizeof(dnskey_blob);
+ dnskey->dnskey.key = memdup(dnskey_blob, sizeof(dnskey_blob));
+ assert_se(dnskey->dnskey.key);
+
+ log_info("DNSKEY: %s", strna(dns_resource_record_to_string(dnskey)));
+ log_info("DNSKEY keytag: %u", dnssec_keytag(dnskey, false));
+
+ assert_se(dnssec_verify_dnskey_by_ds(dnskey, ds1, false) > 0);
+ assert_se(dnssec_verify_dnskey_by_ds(dnskey, ds2, false) > 0);
+}
+
+static void test_dnssec_verify_rfc8080_ed25519_example1(void) {
+ static const uint8_t dnskey_blob[] = {
+ 0x97, 0x4d, 0x96, 0xa2, 0x2d, 0x22, 0x4b, 0xc0, 0x1a, 0xdb, 0x91, 0x50, 0x91, 0x47, 0x7d,
+ 0x44, 0xcc, 0xd9, 0x1c, 0x9a, 0x41, 0xa1, 0x14, 0x30, 0x01, 0x01, 0x17, 0xd5, 0x2c, 0x59,
+ 0x24, 0xe
+ };
+ static const uint8_t ds_fprint[] = {
+ 0xdd, 0xa6, 0xb9, 0x69, 0xbd, 0xfb, 0x79, 0xf7, 0x1e, 0xe7, 0xb7, 0xfb, 0xdf, 0xb7, 0xdc,
+ 0xd7, 0xad, 0xbb, 0xd3, 0x5d, 0xdf, 0x79, 0xed, 0x3b, 0x6d, 0xd7, 0xf6, 0xe3, 0x56, 0xdd,
+ 0xd7, 0x47, 0xf7, 0x6f, 0x5f, 0x7a, 0xe1, 0xa6, 0xf9, 0xe5, 0xce, 0xfc, 0x7b, 0xbf, 0x5a,
+ 0xdf, 0x4e, 0x1b
+ };
+ static const uint8_t signature_blob[] = {
+ 0xa0, 0xbf, 0x64, 0xac, 0x9b, 0xa7, 0xef, 0x17, 0xc1, 0x38, 0x85, 0x9c, 0x18, 0x78, 0xbb,
+ 0x99, 0xa8, 0x39, 0xfe, 0x17, 0x59, 0xac, 0xa5, 0xb0, 0xd7, 0x98, 0xcf, 0x1a, 0xb1, 0xe9,
+ 0x8d, 0x07, 0x91, 0x02, 0xf4, 0xdd, 0xb3, 0x36, 0x8f, 0x0f, 0xe4, 0x0b, 0xb3, 0x77, 0xf1,
+ 0xf0, 0x0e, 0x0c, 0xdd, 0xed, 0xb7, 0x99, 0x16, 0x7d, 0x56, 0xb6, 0xe9, 0x32, 0x78, 0x30,
+ 0x72, 0xba, 0x8d, 0x02
+ };
+
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *dnskey = NULL, *ds = NULL, *mx = NULL,
+ *rrsig = NULL;
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ DnssecResult result;
+
+ dnskey = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DNSKEY, "example.com.");
+ assert_se(dnskey);
+
+ dnskey->dnskey.flags = 257;
+ dnskey->dnskey.protocol = 3;
+ dnskey->dnskey.algorithm = DNSSEC_ALGORITHM_ED25519;
+ dnskey->dnskey.key_size = sizeof(dnskey_blob);
+ dnskey->dnskey.key = memdup(dnskey_blob, sizeof(dnskey_blob));
+ assert_se(dnskey->dnskey.key);
+
+ log_info("DNSKEY: %s", strna(dns_resource_record_to_string(dnskey)));
+
+ ds = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DS, "example.com.");
+ assert_se(ds);
+
+ ds->ds.key_tag = 3613;
+ ds->ds.algorithm = DNSSEC_ALGORITHM_ED25519;
+ ds->ds.digest_type = DNSSEC_DIGEST_SHA256;
+ ds->ds.digest_size = sizeof(ds_fprint);
+ ds->ds.digest = memdup(ds_fprint, ds->ds.digest_size);
+ assert_se(ds->ds.digest);
+
+ log_info("DS: %s", strna(dns_resource_record_to_string(ds)));
+
+ mx = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_MX, "example.com.");
+ assert_se(mx);
+
+ mx->mx.priority = 10;
+ mx->mx.exchange = strdup("mail.example.com.");
+ assert_se(mx->mx.exchange);
+
+ log_info("MX: %s", strna(dns_resource_record_to_string(mx)));
+
+ rrsig = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_RRSIG, "example.com.");
+ assert_se(rrsig);
+
+ rrsig->rrsig.type_covered = DNS_TYPE_MX;
+ rrsig->rrsig.algorithm = DNSSEC_ALGORITHM_ED25519;
+ rrsig->rrsig.labels = 2;
+ rrsig->rrsig.original_ttl = 3600;
+ rrsig->rrsig.expiration = 1440021600;
+ rrsig->rrsig.inception = 1438207200;
+ rrsig->rrsig.key_tag = 3613;
+ rrsig->rrsig.signer = strdup("example.com.");
+ assert_se(rrsig->rrsig.signer);
+ rrsig->rrsig.signature_size = sizeof(signature_blob);
+ rrsig->rrsig.signature = memdup(signature_blob, rrsig->rrsig.signature_size);
+ assert_se(rrsig->rrsig.signature);
+
+ log_info("RRSIG: %s", strna(dns_resource_record_to_string(rrsig)));
+
+ assert_se(dnssec_key_match_rrsig(mx->key, rrsig) > 0);
+ assert_se(dnssec_rrsig_match_dnskey(rrsig, dnskey, false) > 0);
+
+ answer = dns_answer_new(1);
+ assert_se(answer);
+ assert_se(dns_answer_add(answer, mx, 0, DNS_ANSWER_AUTHENTICATED) >= 0);
+
+ assert_se(dnssec_verify_rrset(answer, mx->key, rrsig, dnskey,
+ rrsig->rrsig.inception * USEC_PER_SEC, &result) >= 0);
+#if GCRYPT_VERSION_NUMBER >= 0x010600
+ assert_se(result == DNSSEC_VALIDATED);
+#else
+ assert_se(result == DNSSEC_UNSUPPORTED_ALGORITHM);
+#endif
+}
+
+static void test_dnssec_verify_rfc8080_ed25519_example2(void) {
+ static const uint8_t dnskey_blob[] = {
+ 0xcc, 0xf9, 0xd9, 0xfd, 0x0c, 0x04, 0x7b, 0xb4, 0xbc, 0x0b, 0x94, 0x8f, 0xcf, 0x63, 0x9f,
+ 0x4b, 0x94, 0x51, 0xe3, 0x40, 0x13, 0x93, 0x6f, 0xeb, 0x62, 0x71, 0x3d, 0xc4, 0x72, 0x4,
+ 0x8a, 0x3b
+ };
+ static const uint8_t ds_fprint[] = {
+ 0xe3, 0x4d, 0x7b, 0xf3, 0x56, 0xfd, 0xdf, 0x87, 0xb7, 0xf7, 0x67, 0x5e, 0xe3, 0xdd, 0x9e,
+ 0x73, 0xbe, 0xda, 0x7b, 0x67, 0xb5, 0xe5, 0xde, 0xf4, 0x7f, 0xae, 0x7b, 0xe5, 0xad, 0x5c,
+ 0xd1, 0xb7, 0x39, 0xf5, 0xce, 0x76, 0xef, 0x97, 0x34, 0xe1, 0xe6, 0xde, 0xf3, 0x47, 0x3a,
+ 0xeb, 0x5e, 0x1c
+ };
+ static const uint8_t signature_blob[] = {
+ 0xcd, 0x74, 0x34, 0x6e, 0x46, 0x20, 0x41, 0x31, 0x05, 0xc9, 0xf2, 0xf2, 0x8b, 0xd4, 0x28,
+ 0x89, 0x8e, 0x83, 0xf1, 0x97, 0x58, 0xa3, 0x8c, 0x32, 0x52, 0x15, 0x62, 0xa1, 0x86, 0x57,
+ 0x15, 0xd4, 0xf8, 0xd7, 0x44, 0x0f, 0x44, 0x84, 0xd0, 0x4a, 0xa2, 0x52, 0x9f, 0x34, 0x28,
+ 0x4a, 0x6e, 0x69, 0xa0, 0x9e, 0xe0, 0x0f, 0xb0, 0x10, 0x47, 0x43, 0xbb, 0x2a, 0xe2, 0x39,
+ 0x93, 0x6a, 0x5c, 0x06
+ };
+
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *dnskey = NULL, *ds = NULL, *mx = NULL,
+ *rrsig = NULL;
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ DnssecResult result;
+
+ dnskey = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DNSKEY, "example.com.");
+ assert_se(dnskey);
+
+ dnskey->dnskey.flags = 257;
+ dnskey->dnskey.protocol = 3;
+ dnskey->dnskey.algorithm = DNSSEC_ALGORITHM_ED25519;
+ dnskey->dnskey.key_size = sizeof(dnskey_blob);
+ dnskey->dnskey.key = memdup(dnskey_blob, sizeof(dnskey_blob));
+ assert_se(dnskey->dnskey.key);
+
+ log_info("DNSKEY: %s", strna(dns_resource_record_to_string(dnskey)));
+
+ ds = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DS, "example.com.");
+ assert_se(ds);
+
+ ds->ds.key_tag = 35217;
+ ds->ds.algorithm = DNSSEC_ALGORITHM_ED25519;
+ ds->ds.digest_type = DNSSEC_DIGEST_SHA256;
+ ds->ds.digest_size = sizeof(ds_fprint);
+ ds->ds.digest = memdup(ds_fprint, ds->ds.digest_size);
+ assert_se(ds->ds.digest);
+
+ log_info("DS: %s", strna(dns_resource_record_to_string(ds)));
+
+ mx = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_MX, "example.com.");
+ assert_se(mx);
+
+ mx->mx.priority = 10;
+ mx->mx.exchange = strdup("mail.example.com.");
+ assert_se(mx->mx.exchange);
+
+ log_info("MX: %s", strna(dns_resource_record_to_string(mx)));
+
+ rrsig = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_RRSIG, "example.com.");
+ assert_se(rrsig);
+
+ rrsig->rrsig.type_covered = DNS_TYPE_MX;
+ rrsig->rrsig.algorithm = DNSSEC_ALGORITHM_ED25519;
+ rrsig->rrsig.labels = 2;
+ rrsig->rrsig.original_ttl = 3600;
+ rrsig->rrsig.expiration = 1440021600;
+ rrsig->rrsig.inception = 1438207200;
+ rrsig->rrsig.key_tag = 35217;
+ rrsig->rrsig.signer = strdup("example.com.");
+ assert_se(rrsig->rrsig.signer);
+ rrsig->rrsig.signature_size = sizeof(signature_blob);
+ rrsig->rrsig.signature = memdup(signature_blob, rrsig->rrsig.signature_size);
+ assert_se(rrsig->rrsig.signature);
+
+ log_info("RRSIG: %s", strna(dns_resource_record_to_string(rrsig)));
+
+ assert_se(dnssec_key_match_rrsig(mx->key, rrsig) > 0);
+ assert_se(dnssec_rrsig_match_dnskey(rrsig, dnskey, false) > 0);
+
+ answer = dns_answer_new(1);
+ assert_se(answer);
+ assert_se(dns_answer_add(answer, mx, 0, DNS_ANSWER_AUTHENTICATED) >= 0);
+
+ assert_se(dnssec_verify_rrset(answer, mx->key, rrsig, dnskey,
+ rrsig->rrsig.inception * USEC_PER_SEC, &result) >= 0);
+#if GCRYPT_VERSION_NUMBER >= 0x010600
+ assert_se(result == DNSSEC_VALIDATED);
+#else
+ assert_se(result == DNSSEC_UNSUPPORTED_ALGORITHM);
+#endif
+}
+static void test_dnssec_verify_rrset(void) {
+
+ static const uint8_t signature_blob[] = {
+ 0x7f, 0x79, 0xdd, 0x5e, 0x89, 0x79, 0x18, 0xd0, 0x34, 0x86, 0x8c, 0x72, 0x77, 0x75, 0x48, 0x4d,
+ 0xc3, 0x7d, 0x38, 0x04, 0xab, 0xcd, 0x9e, 0x4c, 0x82, 0xb0, 0x92, 0xca, 0xe9, 0x66, 0xe9, 0x6e,
+ 0x47, 0xc7, 0x68, 0x8c, 0x94, 0xf6, 0x69, 0xcb, 0x75, 0x94, 0xe6, 0x30, 0xa6, 0xfb, 0x68, 0x64,
+ 0x96, 0x1a, 0x84, 0xe1, 0xdc, 0x16, 0x4c, 0x83, 0x6c, 0x44, 0xf2, 0x74, 0x4d, 0x74, 0x79, 0x8f,
+ 0xf3, 0xf4, 0x63, 0x0d, 0xef, 0x5a, 0xe7, 0xe2, 0xfd, 0xf2, 0x2b, 0x38, 0x7c, 0x28, 0x96, 0x9d,
+ 0xb6, 0xcd, 0x5c, 0x3b, 0x57, 0xe2, 0x24, 0x78, 0x65, 0xd0, 0x9e, 0x77, 0x83, 0x09, 0x6c, 0xff,
+ 0x3d, 0x52, 0x3f, 0x6e, 0xd1, 0xed, 0x2e, 0xf9, 0xee, 0x8e, 0xa6, 0xbe, 0x9a, 0xa8, 0x87, 0x76,
+ 0xd8, 0x77, 0xcc, 0x96, 0xa0, 0x98, 0xa1, 0xd1, 0x68, 0x09, 0x43, 0xcf, 0x56, 0xd9, 0xd1, 0x66,
+ };
+
+ static const uint8_t dnskey_blob[] = {
+ 0x03, 0x01, 0x00, 0x01, 0x9b, 0x49, 0x9b, 0xc1, 0xf9, 0x9a, 0xe0, 0x4e, 0xcf, 0xcb, 0x14, 0x45,
+ 0x2e, 0xc9, 0xf9, 0x74, 0xa7, 0x18, 0xb5, 0xf3, 0xde, 0x39, 0x49, 0xdf, 0x63, 0x33, 0x97, 0x52,
+ 0xe0, 0x8e, 0xac, 0x50, 0x30, 0x8e, 0x09, 0xd5, 0x24, 0x3d, 0x26, 0xa4, 0x49, 0x37, 0x2b, 0xb0,
+ 0x6b, 0x1b, 0xdf, 0xde, 0x85, 0x83, 0xcb, 0x22, 0x4e, 0x60, 0x0a, 0x91, 0x1a, 0x1f, 0xc5, 0x40,
+ 0xb1, 0xc3, 0x15, 0xc1, 0x54, 0x77, 0x86, 0x65, 0x53, 0xec, 0x10, 0x90, 0x0c, 0x91, 0x00, 0x5e,
+ 0x15, 0xdc, 0x08, 0x02, 0x4c, 0x8c, 0x0d, 0xc0, 0xac, 0x6e, 0xc4, 0x3e, 0x1b, 0x80, 0x19, 0xe4,
+ 0xf7, 0x5f, 0x77, 0x51, 0x06, 0x87, 0x61, 0xde, 0xa2, 0x18, 0x0f, 0x40, 0x8b, 0x79, 0x72, 0xfa,
+ 0x8d, 0x1a, 0x44, 0x47, 0x0d, 0x8e, 0x3a, 0x2d, 0xc7, 0x39, 0xbf, 0x56, 0x28, 0x97, 0xd9, 0x20,
+ 0x4f, 0x00, 0x51, 0x3b,
+ };
+
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *a = NULL, *rrsig = NULL, *dnskey = NULL;
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ DnssecResult result;
+
+ a = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_A, "nAsA.gov");
+ assert_se(a);
+
+ a->a.in_addr.s_addr = inet_addr("52.0.14.116");
+
+ log_info("A: %s", strna(dns_resource_record_to_string(a)));
+
+ rrsig = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_RRSIG, "NaSa.GOV.");
+ assert_se(rrsig);
+
+ rrsig->rrsig.type_covered = DNS_TYPE_A;
+ rrsig->rrsig.algorithm = DNSSEC_ALGORITHM_RSASHA256;
+ rrsig->rrsig.labels = 2;
+ rrsig->rrsig.original_ttl = 600;
+ rrsig->rrsig.expiration = 0x5683135c;
+ rrsig->rrsig.inception = 0x565b7da8;
+ rrsig->rrsig.key_tag = 63876;
+ rrsig->rrsig.signer = strdup("Nasa.Gov.");
+ assert_se(rrsig->rrsig.signer);
+ rrsig->rrsig.signature_size = sizeof(signature_blob);
+ rrsig->rrsig.signature = memdup(signature_blob, rrsig->rrsig.signature_size);
+ assert_se(rrsig->rrsig.signature);
+
+ log_info("RRSIG: %s", strna(dns_resource_record_to_string(rrsig)));
+
+ dnskey = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DNSKEY, "nASA.gOV");
+ assert_se(dnskey);
+
+ dnskey->dnskey.flags = 256;
+ dnskey->dnskey.protocol = 3;
+ dnskey->dnskey.algorithm = DNSSEC_ALGORITHM_RSASHA256;
+ dnskey->dnskey.key_size = sizeof(dnskey_blob);
+ dnskey->dnskey.key = memdup(dnskey_blob, sizeof(dnskey_blob));
+ assert_se(dnskey->dnskey.key);
+
+ log_info("DNSKEY: %s", strna(dns_resource_record_to_string(dnskey)));
+ log_info("DNSKEY keytag: %u", dnssec_keytag(dnskey, false));
+
+ assert_se(dnssec_key_match_rrsig(a->key, rrsig) > 0);
+ assert_se(dnssec_rrsig_match_dnskey(rrsig, dnskey, false) > 0);
+
+ answer = dns_answer_new(1);
+ assert_se(answer);
+ assert_se(dns_answer_add(answer, a, 0, DNS_ANSWER_AUTHENTICATED) >= 0);
+
+ /* Validate the RR as it if was 2015-12-2 today */
+ assert_se(dnssec_verify_rrset(answer, a->key, rrsig, dnskey, 1449092754*USEC_PER_SEC, &result) >= 0);
+ assert_se(result == DNSSEC_VALIDATED);
+}
+
+static void test_dnssec_verify_rrset2(void) {
+
+ static const uint8_t signature_blob[] = {
+ 0x48, 0x45, 0xc8, 0x8b, 0xc0, 0x14, 0x92, 0xf5, 0x15, 0xc6, 0x84, 0x9d, 0x2f, 0xe3, 0x32, 0x11,
+ 0x7d, 0xf1, 0xe6, 0x87, 0xb9, 0x42, 0xd3, 0x8b, 0x9e, 0xaf, 0x92, 0x31, 0x0a, 0x53, 0xad, 0x8b,
+ 0xa7, 0x5c, 0x83, 0x39, 0x8c, 0x28, 0xac, 0xce, 0x6e, 0x9c, 0x18, 0xe3, 0x31, 0x16, 0x6e, 0xca,
+ 0x38, 0x31, 0xaf, 0xd9, 0x94, 0xf1, 0x84, 0xb1, 0xdf, 0x5a, 0xc2, 0x73, 0x22, 0xf6, 0xcb, 0xa2,
+ 0xe7, 0x8c, 0x77, 0x0c, 0x74, 0x2f, 0xc2, 0x13, 0xb0, 0x93, 0x51, 0xa9, 0x4f, 0xae, 0x0a, 0xda,
+ 0x45, 0xcc, 0xfd, 0x43, 0x99, 0x36, 0x9a, 0x0d, 0x21, 0xe0, 0xeb, 0x30, 0x65, 0xd4, 0xa0, 0x27,
+ 0x37, 0x3b, 0xe4, 0xc1, 0xc5, 0xa1, 0x2a, 0xd1, 0x76, 0xc4, 0x7e, 0x64, 0x0e, 0x5a, 0xa6, 0x50,
+ 0x24, 0xd5, 0x2c, 0xcc, 0x6d, 0xe5, 0x37, 0xea, 0xbd, 0x09, 0x34, 0xed, 0x24, 0x06, 0xa1, 0x22,
+ };
+
+ static const uint8_t dnskey_blob[] = {
+ 0x03, 0x01, 0x00, 0x01, 0xc3, 0x7f, 0x1d, 0xd1, 0x1c, 0x97, 0xb1, 0x13, 0x34, 0x3a, 0x9a, 0xea,
+ 0xee, 0xd9, 0x5a, 0x11, 0x1b, 0x17, 0xc7, 0xe3, 0xd4, 0xda, 0x20, 0xbc, 0x5d, 0xba, 0x74, 0xe3,
+ 0x37, 0x99, 0xec, 0x25, 0xce, 0x93, 0x7f, 0xbd, 0x22, 0x73, 0x7e, 0x14, 0x71, 0xe0, 0x60, 0x07,
+ 0xd4, 0x39, 0x8b, 0x5e, 0xe9, 0xba, 0x25, 0xe8, 0x49, 0xe9, 0x34, 0xef, 0xfe, 0x04, 0x5c, 0xa5,
+ 0x27, 0xcd, 0xa9, 0xda, 0x70, 0x05, 0x21, 0xab, 0x15, 0x82, 0x24, 0xc3, 0x94, 0xf5, 0xd7, 0xb7,
+ 0xc4, 0x66, 0xcb, 0x32, 0x6e, 0x60, 0x2b, 0x55, 0x59, 0x28, 0x89, 0x8a, 0x72, 0xde, 0x88, 0x56,
+ 0x27, 0x95, 0xd9, 0xac, 0x88, 0x4f, 0x65, 0x2b, 0x68, 0xfc, 0xe6, 0x41, 0xc1, 0x1b, 0xef, 0x4e,
+ 0xd6, 0xc2, 0x0f, 0x64, 0x88, 0x95, 0x5e, 0xdd, 0x3a, 0x02, 0x07, 0x50, 0xa9, 0xda, 0xa4, 0x49,
+ 0x74, 0x62, 0xfe, 0xd7,
+ };
+
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *nsec = NULL, *rrsig = NULL, *dnskey = NULL;
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ DnssecResult result;
+
+ nsec = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_NSEC, "nasa.gov");
+ assert_se(nsec);
+
+ nsec->nsec.next_domain_name = strdup("3D-Printing.nasa.gov");
+ assert_se(nsec->nsec.next_domain_name);
+
+ nsec->nsec.types = bitmap_new();
+ assert_se(nsec->nsec.types);
+ assert_se(bitmap_set(nsec->nsec.types, DNS_TYPE_A) >= 0);
+ assert_se(bitmap_set(nsec->nsec.types, DNS_TYPE_NS) >= 0);
+ assert_se(bitmap_set(nsec->nsec.types, DNS_TYPE_SOA) >= 0);
+ assert_se(bitmap_set(nsec->nsec.types, DNS_TYPE_MX) >= 0);
+ assert_se(bitmap_set(nsec->nsec.types, DNS_TYPE_TXT) >= 0);
+ assert_se(bitmap_set(nsec->nsec.types, DNS_TYPE_RRSIG) >= 0);
+ assert_se(bitmap_set(nsec->nsec.types, DNS_TYPE_NSEC) >= 0);
+ assert_se(bitmap_set(nsec->nsec.types, DNS_TYPE_DNSKEY) >= 0);
+ assert_se(bitmap_set(nsec->nsec.types, 65534) >= 0);
+
+ log_info("NSEC: %s", strna(dns_resource_record_to_string(nsec)));
+
+ rrsig = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_RRSIG, "NaSa.GOV.");
+ assert_se(rrsig);
+
+ rrsig->rrsig.type_covered = DNS_TYPE_NSEC;
+ rrsig->rrsig.algorithm = DNSSEC_ALGORITHM_RSASHA256;
+ rrsig->rrsig.labels = 2;
+ rrsig->rrsig.original_ttl = 300;
+ rrsig->rrsig.expiration = 0x5689002f;
+ rrsig->rrsig.inception = 0x56617230;
+ rrsig->rrsig.key_tag = 30390;
+ rrsig->rrsig.signer = strdup("Nasa.Gov.");
+ assert_se(rrsig->rrsig.signer);
+ rrsig->rrsig.signature_size = sizeof(signature_blob);
+ rrsig->rrsig.signature = memdup(signature_blob, rrsig->rrsig.signature_size);
+ assert_se(rrsig->rrsig.signature);
+
+ log_info("RRSIG: %s", strna(dns_resource_record_to_string(rrsig)));
+
+ dnskey = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DNSKEY, "nASA.gOV");
+ assert_se(dnskey);
+
+ dnskey->dnskey.flags = 256;
+ dnskey->dnskey.protocol = 3;
+ dnskey->dnskey.algorithm = DNSSEC_ALGORITHM_RSASHA256;
+ dnskey->dnskey.key_size = sizeof(dnskey_blob);
+ dnskey->dnskey.key = memdup(dnskey_blob, sizeof(dnskey_blob));
+ assert_se(dnskey->dnskey.key);
+
+ log_info("DNSKEY: %s", strna(dns_resource_record_to_string(dnskey)));
+ log_info("DNSKEY keytag: %u", dnssec_keytag(dnskey, false));
+
+ assert_se(dnssec_key_match_rrsig(nsec->key, rrsig) > 0);
+ assert_se(dnssec_rrsig_match_dnskey(rrsig, dnskey, false) > 0);
+
+ answer = dns_answer_new(1);
+ assert_se(answer);
+ assert_se(dns_answer_add(answer, nsec, 0, DNS_ANSWER_AUTHENTICATED) >= 0);
+
+ /* Validate the RR as it if was 2015-12-11 today */
+ assert_se(dnssec_verify_rrset(answer, nsec->key, rrsig, dnskey, 1449849318*USEC_PER_SEC, &result) >= 0);
+ assert_se(result == DNSSEC_VALIDATED);
+}
+
+static void test_dnssec_verify_rrset3(void) {
+
+ static const uint8_t signature_blob[] = {
+ 0x41, 0x09, 0x08, 0x67, 0x51, 0x6d, 0x02, 0xf2, 0x17, 0x1e, 0x61, 0x03, 0xc6, 0x80, 0x7a, 0x82,
+ 0x8f, 0x6c, 0x8c, 0x4c, 0x68, 0x6f, 0x1c, 0xaa, 0x4a, 0xe0, 0x9b, 0x72, 0xdf, 0x7f, 0x15, 0xfa,
+ 0x2b, 0xc5, 0x63, 0x6f, 0x52, 0xa2, 0x60, 0x59, 0x24, 0xb6, 0xc3, 0x43, 0x3d, 0x47, 0x38, 0xd8,
+ 0x0c, 0xcc, 0x6c, 0x10, 0x49, 0x92, 0x97, 0x6c, 0x7d, 0x32, 0xc2, 0x62, 0x83, 0x34, 0x96, 0xdf,
+ 0xbd, 0xf9, 0xcc, 0xcf, 0xd9, 0x4d, 0x8b, 0x8a, 0xa9, 0x3c, 0x1f, 0x89, 0xc4, 0xad, 0xd5, 0xbb,
+ 0x74, 0xf8, 0xee, 0x60, 0x54, 0x7a, 0xec, 0x36, 0x45, 0xf2, 0xec, 0xb9, 0x73, 0x66, 0xae, 0x57,
+ 0x2d, 0xd4, 0x91, 0x02, 0x99, 0xcd, 0xba, 0xbd, 0x6e, 0xfb, 0xa6, 0xf6, 0x34, 0xce, 0x4c, 0x44,
+ 0x0b, 0xd2, 0x66, 0xdb, 0x4e, 0x5e, 0x00, 0x72, 0x1b, 0xe5, 0x2f, 0x24, 0xd2, 0xc8, 0x72, 0x37,
+ 0x97, 0x2b, 0xd0, 0xcd, 0xa9, 0x6b, 0x84, 0x32, 0x56, 0x7a, 0x89, 0x6e, 0x3d, 0x8f, 0x03, 0x9a,
+ 0x9d, 0x6d, 0xf7, 0xe5, 0x13, 0xd7, 0x4b, 0xbc, 0xe2, 0x6c, 0xd1, 0x18, 0x60, 0x0e, 0x1a, 0xe3,
+ 0xf9, 0xc0, 0x34, 0x4b, 0x1c, 0x82, 0x17, 0x5e, 0xdf, 0x81, 0x32, 0xd7, 0x5b, 0x30, 0x1d, 0xe0,
+ 0x29, 0x80, 0x6b, 0xb1, 0x69, 0xbf, 0x3f, 0x12, 0x56, 0xb0, 0x80, 0x91, 0x22, 0x1a, 0x31, 0xd5,
+ 0x5d, 0x3d, 0xdd, 0x70, 0x5e, 0xcb, 0xc7, 0x2d, 0xb8, 0x3e, 0x54, 0x34, 0xd3, 0x50, 0x89, 0x77,
+ 0x08, 0xc1, 0xf7, 0x11, 0x6e, 0x57, 0xd7, 0x09, 0x94, 0x20, 0x03, 0x38, 0xc3, 0x3a, 0xd3, 0x93,
+ 0x8f, 0xd0, 0x65, 0xc5, 0xa1, 0xe0, 0x69, 0x2c, 0xf6, 0x0a, 0xce, 0x01, 0xb6, 0x0d, 0x95, 0xa0,
+ 0x5d, 0x97, 0x94, 0xc3, 0xf1, 0xcd, 0x49, 0xea, 0x20, 0xd3, 0xa9, 0xa6, 0x67, 0x94, 0x64, 0x17
+ };
+
+ static const uint8_t dnskey_blob[] = {
+ 0x03, 0x01, 0x00, 0x01, 0xbf, 0xdd, 0x24, 0x95, 0x21, 0x70, 0xa8, 0x5b, 0x19, 0xa6, 0x76, 0xd3,
+ 0x5b, 0x37, 0xcf, 0x59, 0x0d, 0x3c, 0xdb, 0x0c, 0xcf, 0xd6, 0x19, 0x02, 0xc7, 0x8e, 0x56, 0x4d,
+ 0x14, 0xb7, 0x9d, 0x71, 0xf4, 0xdd, 0x24, 0x36, 0xc8, 0x32, 0x1c, 0x63, 0xf7, 0xc0, 0xfc, 0xe3,
+ 0x83, 0xa6, 0x22, 0x8b, 0x6a, 0x34, 0x41, 0x72, 0xaa, 0x95, 0x98, 0x06, 0xac, 0x03, 0xec, 0xc3,
+ 0xa1, 0x6d, 0x8b, 0x1b, 0xfd, 0xa4, 0x05, 0x72, 0xe6, 0xe0, 0xb9, 0x98, 0x07, 0x54, 0x7a, 0xb2,
+ 0x55, 0x30, 0x96, 0xa3, 0x22, 0x3b, 0xe0, 0x9d, 0x61, 0xf6, 0xdc, 0x31, 0x2b, 0xc9, 0x2c, 0x12,
+ 0x06, 0x7f, 0x3c, 0x5d, 0x29, 0x76, 0x01, 0x62, 0xe3, 0x41, 0x41, 0x4f, 0xa6, 0x07, 0xfa, 0x2d,
+ 0x0c, 0x64, 0x88, 0xd1, 0x56, 0x18, 0x4b, 0x2b, 0xc2, 0x19, 0x7e, 0xd0, 0x1a, 0x8c, 0x2d, 0x8d,
+ 0x06, 0xdf, 0x4d, 0xaf, 0xd9, 0xe3, 0x31, 0x59, 0xbc, 0xc3, 0x36, 0x22, 0xe7, 0x15, 0xf9, 0xb2,
+ 0x44, 0x8a, 0x33, 0xd7, 0x6c, 0xf1, 0xcc, 0x37, 0x05, 0x69, 0x32, 0x71, 0x76, 0xd8, 0x50, 0x06,
+ 0xae, 0x27, 0xed, 0x3b, 0xdb, 0x1a, 0x97, 0x9b, 0xa3, 0x3e, 0x40, 0x42, 0x29, 0xaf, 0x75, 0x1c,
+ 0xff, 0x1d, 0xaf, 0x85, 0x02, 0xb3, 0x2e, 0x99, 0x67, 0x08, 0x13, 0xd5, 0xda, 0x6d, 0x65, 0xb2,
+ 0x36, 0x6f, 0x2f, 0x64, 0xe0, 0xfa, 0xd3, 0x81, 0x86, 0x6b, 0x41, 0x3e, 0x91, 0xaa, 0x0a, 0xd3,
+ 0xb2, 0x92, 0xd9, 0x42, 0x36, 0x8a, 0x11, 0x0b, 0x5b, 0xb0, 0xea, 0xad, 0x76, 0xd5, 0xb4, 0x81,
+ 0x30, 0xca, 0x5c, 0x4f, 0xd9, 0xea, 0xe7, 0x4b, 0x10, 0x0a, 0x09, 0x4b, 0x73, 0x66, 0xed, 0x8e,
+ 0x84, 0xa2, 0x4f, 0x93, 0x7e, 0x29, 0xdc, 0x6a, 0xbd, 0x12, 0xa1, 0x3d, 0xd2, 0xd6, 0x2a, 0x67,
+ 0x99, 0x4d, 0xf3, 0x43
+ };
+
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *mx1 = NULL, *mx2 = NULL, *mx3 = NULL, *mx4 = NULL, *rrsig = NULL, *dnskey = NULL;
+ _cleanup_(dns_answer_unrefp) DnsAnswer *answer = NULL;
+ DnssecResult result;
+
+ mx1 = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_MX, "kodapan.se");
+ assert_se(mx1);
+
+ mx1->mx.priority = 1;
+ mx1->mx.exchange = strdup("ASPMX.L.GOOGLE.COM");
+ assert_se(mx1->mx.exchange);
+
+ log_info("MX: %s", strna(dns_resource_record_to_string(mx1)));
+
+ mx2 = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_MX, "kodapan.se");
+ assert_se(mx2);
+
+ mx2->mx.priority = 5;
+ mx2->mx.exchange = strdup("ALT2.ASPMX.L.GOOGLE.COM");
+ assert_se(mx2->mx.exchange);
+
+ log_info("MX: %s", strna(dns_resource_record_to_string(mx2)));
+
+ mx3 = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_MX, "kodapan.se");
+ assert_se(mx3);
+
+ mx3->mx.priority = 10;
+ mx3->mx.exchange = strdup("ASPMX2.GOOGLEMAIL.COM");
+ assert_se(mx3->mx.exchange);
+
+ log_info("MX: %s", strna(dns_resource_record_to_string(mx3)));
+
+ mx4 = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_MX, "kodapan.se");
+ assert_se(mx4);
+
+ mx4->mx.priority = 10;
+ mx4->mx.exchange = strdup("ASPMX3.GOOGLEMAIL.COM");
+ assert_se(mx4->mx.exchange);
+
+ log_info("MX: %s", strna(dns_resource_record_to_string(mx4)));
+
+ rrsig = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_RRSIG, "kodapan.se");
+ assert_se(rrsig);
+
+ rrsig->rrsig.type_covered = DNS_TYPE_MX;
+ rrsig->rrsig.algorithm = DNSSEC_ALGORITHM_RSASHA256;
+ rrsig->rrsig.labels = 2;
+ rrsig->rrsig.original_ttl = 900;
+ rrsig->rrsig.expiration = 0x5e608a84;
+ rrsig->rrsig.inception = 0x5e4e1584;
+ rrsig->rrsig.key_tag = 44028;
+ rrsig->rrsig.signer = strdup("kodapan.se.");
+ assert_se(rrsig->rrsig.signer);
+ rrsig->rrsig.signature_size = sizeof(signature_blob);
+ rrsig->rrsig.signature = memdup(signature_blob, rrsig->rrsig.signature_size);
+ assert_se(rrsig->rrsig.signature);
+
+ log_info("RRSIG: %s", strna(dns_resource_record_to_string(rrsig)));
+
+ dnskey = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_DNSKEY, "kodapan.se");
+ assert_se(dnskey);
+
+ dnskey->dnskey.flags = 256;
+ dnskey->dnskey.protocol = 3;
+ dnskey->dnskey.algorithm = DNSSEC_ALGORITHM_RSASHA256;
+ dnskey->dnskey.key_size = sizeof(dnskey_blob);
+ dnskey->dnskey.key = memdup(dnskey_blob, sizeof(dnskey_blob));
+ assert_se(dnskey->dnskey.key);
+
+ log_info("DNSKEY: %s", strna(dns_resource_record_to_string(dnskey)));
+ log_info("DNSKEY keytag: %u", dnssec_keytag(dnskey, false));
+
+ assert_se(dnssec_key_match_rrsig(mx1->key, rrsig) > 0);
+ assert_se(dnssec_key_match_rrsig(mx2->key, rrsig) > 0);
+ assert_se(dnssec_key_match_rrsig(mx3->key, rrsig) > 0);
+ assert_se(dnssec_key_match_rrsig(mx4->key, rrsig) > 0);
+ assert_se(dnssec_rrsig_match_dnskey(rrsig, dnskey, false) > 0);
+
+ answer = dns_answer_new(4);
+ assert_se(answer);
+ assert_se(dns_answer_add(answer, mx1, 0, DNS_ANSWER_AUTHENTICATED) >= 0);
+ assert_se(dns_answer_add(answer, mx2, 0, DNS_ANSWER_AUTHENTICATED) >= 0);
+ assert_se(dns_answer_add(answer, mx3, 0, DNS_ANSWER_AUTHENTICATED) >= 0);
+ assert_se(dns_answer_add(answer, mx4, 0, DNS_ANSWER_AUTHENTICATED) >= 0);
+
+ /* Validate the RR as it if was 2020-02-24 today */
+ assert_se(dnssec_verify_rrset(answer, mx1->key, rrsig, dnskey, 1582534685*USEC_PER_SEC, &result) >= 0);
+ assert_se(result == DNSSEC_VALIDATED);
+}
+
+static void test_dnssec_nsec3_hash(void) {
+ static const uint8_t salt[] = { 0xB0, 0x1D, 0xFA, 0xCE };
+ static const uint8_t next_hashed_name[] = { 0x84, 0x10, 0x26, 0x53, 0xc9, 0xfa, 0x4d, 0x85, 0x6c, 0x97, 0x82, 0xe2, 0x8f, 0xdf, 0x2d, 0x5e, 0x87, 0x69, 0xc4, 0x52 };
+ _cleanup_(dns_resource_record_unrefp) DnsResourceRecord *rr = NULL;
+ uint8_t h[DNSSEC_HASH_SIZE_MAX];
+ _cleanup_free_ char *b = NULL;
+ int k;
+
+ /* The NSEC3 RR for eurid.eu on 2015-12-14. */
+ rr = dns_resource_record_new_full(DNS_CLASS_IN, DNS_TYPE_NSEC3, "PJ8S08RR45VIQDAQGE7EN3VHKNROTBMM.eurid.eu.");
+ assert_se(rr);
+
+ rr->nsec3.algorithm = DNSSEC_DIGEST_SHA1;
+ rr->nsec3.flags = 1;
+ rr->nsec3.iterations = 1;
+ rr->nsec3.salt = memdup(salt, sizeof(salt));
+ assert_se(rr->nsec3.salt);
+ rr->nsec3.salt_size = sizeof(salt);
+ rr->nsec3.next_hashed_name = memdup(next_hashed_name, sizeof(next_hashed_name));
+ assert_se(rr->nsec3.next_hashed_name);
+ rr->nsec3.next_hashed_name_size = sizeof(next_hashed_name);
+
+ log_info("NSEC3: %s", strna(dns_resource_record_to_string(rr)));
+
+ k = dnssec_nsec3_hash(rr, "eurid.eu", &h);
+ assert_se(k >= 0);
+
+ b = base32hexmem(h, k, false);
+ assert_se(b);
+ assert_se(strcasecmp(b, "PJ8S08RR45VIQDAQGE7EN3VHKNROTBMM") == 0);
+}
+
+#endif
+
+int main(int argc, char *argv[]) {
+
+#if HAVE_GCRYPT
+ test_dnssec_verify_dns_key();
+ test_dnssec_verify_rfc8080_ed25519_example1();
+ test_dnssec_verify_rfc8080_ed25519_example2();
+ test_dnssec_verify_rrset();
+ test_dnssec_verify_rrset2();
+ test_dnssec_verify_rrset3();
+ test_dnssec_nsec3_hash();
+#endif
+
+ return 0;
+}
diff --git a/src/resolve/test-resolve-tables.c b/src/resolve/test-resolve-tables.c
new file mode 100644
index 0000000..e4f4e14
--- /dev/null
+++ b/src/resolve/test-resolve-tables.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "dns-type.h"
+#include "resolved-dns-dnssec.h"
+#include "resolved-dns-packet.h"
+#include "test-tables.h"
+
+int main(int argc, char **argv) {
+ uint16_t i;
+
+ test_table(dns_protocol, DNS_PROTOCOL);
+ test_table(dnssec_result, DNSSEC_RESULT);
+ test_table(dnssec_verdict, DNSSEC_VERDICT);
+
+ test_table_sparse(dns_rcode, DNS_RCODE);
+ test_table_sparse(dns_type, DNS_TYPE);
+
+ log_info("/* DNS_TYPE */");
+ for (i = 0; i < _DNS_TYPE_MAX; i++) {
+ const char *s;
+
+ s = dns_type_to_string(i);
+ assert_se(s == NULL || strlen(s) < _DNS_TYPE_STRING_MAX);
+
+ if (s)
+ log_info("%-*s %s%s%s%s%s%s%s%s%s",
+ (int) _DNS_TYPE_STRING_MAX - 1, s,
+ dns_type_is_pseudo(i) ? "pseudo " : "",
+ dns_type_is_valid_query(i) ? "valid_query " : "",
+ dns_type_is_valid_rr(i) ? "is_valid_rr " : "",
+ dns_type_may_redirect(i) ? "may_redirect " : "",
+ dns_type_is_dnssec(i) ? "dnssec " : "",
+ dns_type_is_obsolete(i) ? "obsolete " : "",
+ dns_type_may_wildcard(i) ? "wildcard " : "",
+ dns_type_apex_only(i) ? "apex_only " : "",
+ dns_type_needs_authentication(i) ? "needs_authentication" : "");
+ }
+
+ log_info("/* DNS_CLASS */");
+ for (i = 0; i < _DNS_CLASS_MAX; i++) {
+ const char *s;
+
+ s = dns_class_to_string(i);
+ assert_se(s == NULL || strlen(s) < _DNS_CLASS_STRING_MAX);
+
+ if (s)
+ log_info("%-*s %s%s",
+ (int) _DNS_CLASS_STRING_MAX - 1, s,
+ dns_class_is_pseudo(i) ? "is_pseudo " : "",
+ dns_class_is_valid_rr(i) ? "is_valid_rr " : "");
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/resolve/test-resolved-etc-hosts.c b/src/resolve/test-resolved-etc-hosts.c
new file mode 100644
index 0000000..045952e
--- /dev/null
+++ b/src/resolve/test-resolved-etc-hosts.c
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "log.h"
+#include "resolved-etc-hosts.h"
+#include "strv.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+
+static void test_parse_etc_hosts_system(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ log_info("/* %s */", __func__);
+
+ f = fopen("/etc/hosts", "re");
+ if (!f) {
+ assert_se(errno == ENOENT);
+ return;
+ }
+
+ _cleanup_(etc_hosts_free) EtcHosts hosts = {};
+ assert_se(etc_hosts_parse(&hosts, f) == 0);
+}
+
+#define address_equal_4(_addr, _address) \
+ ((_addr)->family == AF_INET && \
+ !memcmp(&(_addr)->address.in, &(struct in_addr) { .s_addr = (_address) }, 4))
+
+#define address_equal_6(_addr, ...) \
+ ((_addr)->family == AF_INET6 && \
+ !memcmp(&(_addr)->address.in6, &(struct in6_addr) { .s6_addr = __VA_ARGS__}, 16) )
+
+static void test_parse_etc_hosts(void) {
+ _cleanup_(unlink_tempfilep) char
+ t[] = "/tmp/test-resolved-etc-hosts.XXXXXX";
+
+ log_info("/* %s */", __func__);
+
+ int fd;
+ _cleanup_fclose_ FILE *f;
+ const char *s;
+
+ fd = mkostemp_safe(t);
+ assert_se(fd >= 0);
+
+ f = fdopen(fd, "r+");
+ assert_se(f);
+ fputs("1.2.3.4 some.where\n"
+ "1.2.3.5 some.where\n"
+ "1.2.3.6 dash dash-dash.where-dash\n"
+ "1.2.3.7 bad-dash- -bad-dash -bad-dash.bad-\n"
+ "1.2.3.8\n"
+ "1.2.3.9 before.comment # within.comment\n"
+ "1.2.3.10 before.comment#within.comment2\n"
+ "1.2.3.11 before.comment# within.comment3\n"
+ "1.2.3.12 before.comment#\n"
+ "1.2.3 short.address\n"
+ "1.2.3.4.5 long.address\n"
+ "1::2::3 multi.colon\n"
+
+ "::0 some.where some.other\n"
+ "0.0.0.0 deny.listed\n"
+ "::5\t\t\t \tsome.where\tsome.other foobar.foo.foo\t\t\t\n"
+ " \n", f);
+ assert_se(fflush_and_check(f) >= 0);
+ rewind(f);
+
+ _cleanup_(etc_hosts_free) EtcHosts hosts = {};
+ assert_se(etc_hosts_parse(&hosts, f) == 0);
+
+ EtcHostsItemByName *bn;
+ assert_se(bn = hashmap_get(hosts.by_name, "some.where"));
+ assert_se(bn->n_addresses == 3);
+ assert_se(bn->n_allocated >= 3);
+ assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.4")));
+ assert_se(address_equal_4(bn->addresses[1], inet_addr("1.2.3.5")));
+ assert_se(address_equal_6(bn->addresses[2], {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5}));
+
+ assert_se(bn = hashmap_get(hosts.by_name, "dash"));
+ assert_se(bn->n_addresses == 1);
+ assert_se(bn->n_allocated >= 1);
+ assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.6")));
+
+ assert_se(bn = hashmap_get(hosts.by_name, "dash-dash.where-dash"));
+ assert_se(bn->n_addresses == 1);
+ assert_se(bn->n_allocated >= 1);
+ assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.6")));
+
+ /* See https://tools.ietf.org/html/rfc1035#section-2.3.1 */
+ FOREACH_STRING(s, "bad-dash-", "-bad-dash", "-bad-dash.bad-")
+ assert_se(!hashmap_get(hosts.by_name, s));
+
+ assert_se(bn = hashmap_get(hosts.by_name, "before.comment"));
+ assert_se(bn->n_addresses == 4);
+ assert_se(bn->n_allocated >= 4);
+ assert_se(address_equal_4(bn->addresses[0], inet_addr("1.2.3.9")));
+ assert_se(address_equal_4(bn->addresses[1], inet_addr("1.2.3.10")));
+ assert_se(address_equal_4(bn->addresses[2], inet_addr("1.2.3.11")));
+ assert_se(address_equal_4(bn->addresses[3], inet_addr("1.2.3.12")));
+
+ assert(!hashmap_get(hosts.by_name, "within.comment"));
+ assert(!hashmap_get(hosts.by_name, "within.comment2"));
+ assert(!hashmap_get(hosts.by_name, "within.comment3"));
+ assert(!hashmap_get(hosts.by_name, "#"));
+
+ assert(!hashmap_get(hosts.by_name, "short.address"));
+ assert(!hashmap_get(hosts.by_name, "long.address"));
+ assert(!hashmap_get(hosts.by_name, "multi.colon"));
+ assert_se(!set_contains(hosts.no_address, "short.address"));
+ assert_se(!set_contains(hosts.no_address, "long.address"));
+ assert_se(!set_contains(hosts.no_address, "multi.colon"));
+
+ assert_se(bn = hashmap_get(hosts.by_name, "some.other"));
+ assert_se(bn->n_addresses == 1);
+ assert_se(bn->n_allocated >= 1);
+ assert_se(address_equal_6(bn->addresses[0], {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5}));
+
+ assert_se( set_contains(hosts.no_address, "some.where"));
+ assert_se( set_contains(hosts.no_address, "some.other"));
+ assert_se( set_contains(hosts.no_address, "deny.listed"));
+ assert_se(!set_contains(hosts.no_address, "foobar.foo.foo"));
+}
+
+static void test_parse_file(const char *fname) {
+ _cleanup_(etc_hosts_free) EtcHosts hosts = {};
+ _cleanup_fclose_ FILE *f;
+
+ log_info("/* %s(\"%s\") */", __func__, fname);
+
+ assert_se(f = fopen(fname, "re"));
+ assert_se(etc_hosts_parse(&hosts, f) == 0);
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ if (argc == 1) {
+ test_parse_etc_hosts_system();
+ test_parse_etc_hosts();
+ } else
+ test_parse_file(argv[1]);
+
+ return 0;
+}
diff --git a/src/resolve/test-resolved-packet.c b/src/resolve/test-resolved-packet.c
new file mode 100644
index 0000000..cd93b1c
--- /dev/null
+++ b/src/resolve/test-resolved-packet.c
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "log.h"
+#include "resolved-dns-packet.h"
+#include "tests.h"
+
+static void test_dns_packet_new(void) {
+ size_t i;
+ _cleanup_(dns_packet_unrefp) DnsPacket *p2 = NULL;
+
+ for (i = 0; i <= DNS_PACKET_SIZE_MAX; i++) {
+ _cleanup_(dns_packet_unrefp) DnsPacket *p = NULL;
+
+ assert_se(dns_packet_new(&p, DNS_PROTOCOL_DNS, i, DNS_PACKET_SIZE_MAX) == 0);
+
+ log_debug("dns_packet_new: %zu → %zu", i, p->allocated);
+ assert_se(p->allocated >= MIN(DNS_PACKET_SIZE_MAX, i));
+
+ if (i > DNS_PACKET_SIZE_START + 10 && i < DNS_PACKET_SIZE_MAX - 10)
+ i = MIN(i * 2, DNS_PACKET_SIZE_MAX - 10);
+ }
+
+ assert_se(dns_packet_new(&p2, DNS_PROTOCOL_DNS, DNS_PACKET_SIZE_MAX + 1, DNS_PACKET_SIZE_MAX) == -EFBIG);
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_dns_packet_new();
+
+ return 0;
+}
diff --git a/src/rfkill/rfkill.c b/src/rfkill/rfkill.c
new file mode 100644
index 0000000..c387168
--- /dev/null
+++ b/src/rfkill/rfkill.c
@@ -0,0 +1,377 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <linux/rfkill.h>
+#include <poll.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "io-util.h"
+#include "list.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "reboot-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "udev-util.h"
+#include "util.h"
+
+/* Note that any write is delayed until exit and the rfkill state will not be
+ * stored for rfkill indices that disappear after a change. */
+#define EXIT_USEC (5 * USEC_PER_SEC)
+
+typedef struct write_queue_item {
+ LIST_FIELDS(struct write_queue_item, queue);
+ int rfkill_idx;
+ char *file;
+ int state;
+} write_queue_item;
+
+typedef struct Context {
+ LIST_HEAD(write_queue_item, write_queue);
+ int rfkill_fd;
+} Context;
+
+static struct write_queue_item* write_queue_item_free(struct write_queue_item *item) {
+ if (!item)
+ return NULL;
+
+ free(item->file);
+ return mfree(item);
+}
+
+static const char* const rfkill_type_table[NUM_RFKILL_TYPES] = {
+ [RFKILL_TYPE_ALL] = "all",
+ [RFKILL_TYPE_WLAN] = "wlan",
+ [RFKILL_TYPE_BLUETOOTH] = "bluetooth",
+ [RFKILL_TYPE_UWB] = "uwb",
+ [RFKILL_TYPE_WIMAX] = "wimax",
+ [RFKILL_TYPE_WWAN] = "wwan",
+ [RFKILL_TYPE_GPS] = "gps",
+ [RFKILL_TYPE_FM] = "fm",
+ [RFKILL_TYPE_NFC] = "nfc",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(rfkill_type, int);
+
+static int find_device(
+ const struct rfkill_event *event,
+ sd_device **ret) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ _cleanup_free_ char *sysname = NULL;
+ const char *name;
+ int r;
+
+ assert(event);
+ assert(ret);
+
+ if (asprintf(&sysname, "rfkill%i", event->idx) < 0)
+ return log_oom();
+
+ r = sd_device_new_from_subsystem_sysname(&device, "rfkill", sysname);
+ if (r < 0)
+ return log_full_errno(IN_SET(r, -ENOENT, -ENXIO, -ENODEV) ? LOG_DEBUG : LOG_ERR, r,
+ "Failed to open device '%s': %m", sysname);
+
+ r = sd_device_get_sysattr_value(device, "name", &name);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "Device has no name, ignoring: %m");
+
+ log_device_debug(device, "Operating on rfkill device '%s'.", name);
+
+ *ret = TAKE_PTR(device);
+ return 0;
+}
+
+static int determine_state_file(
+ const struct rfkill_event *event,
+ char **ret) {
+
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL, *device = NULL;
+ const char *path_id, *type;
+ char *state_file;
+ int r;
+
+ assert(event);
+ assert(ret);
+
+ r = find_device(event, &d);
+ if (r < 0)
+ return r;
+
+ r = device_wait_for_initialization(d, "rfkill", USEC_INFINITY, &device);
+ if (r < 0)
+ return r;
+
+ assert_se(type = rfkill_type_to_string(event->type));
+
+ if (sd_device_get_property_value(device, "ID_PATH", &path_id) >= 0) {
+ _cleanup_free_ char *escaped_path_id = NULL;
+
+ escaped_path_id = cescape(path_id);
+ if (!escaped_path_id)
+ return log_oom();
+
+ state_file = strjoin("/var/lib/systemd/rfkill/", escaped_path_id, ":", type);
+ } else
+ state_file = strjoin("/var/lib/systemd/rfkill/", type);
+
+ if (!state_file)
+ return log_oom();
+
+ *ret = state_file;
+ return 0;
+}
+
+static int load_state(Context *c, const struct rfkill_event *event) {
+ _cleanup_free_ char *state_file = NULL, *value = NULL;
+ struct rfkill_event we;
+ ssize_t l;
+ int b, r;
+
+ assert(c);
+ assert(c->rfkill_fd >= 0);
+ assert(event);
+
+ if (shall_restore_state() == 0)
+ return 0;
+
+ r = determine_state_file(event, &state_file);
+ if (r < 0)
+ return r;
+
+ r = read_one_line_file(state_file, &value);
+ if (IN_SET(r, -ENOENT, 0)) {
+ /* No state file or it's truncated? Then save the current state */
+
+ r = write_string_file(state_file, one_zero(event->soft), WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC|WRITE_STRING_FILE_MKDIR_0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write state file %s: %m", state_file);
+
+ log_debug("Saved state '%s' to %s.", one_zero(event->soft), state_file);
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to read state file %s: %m", state_file);
+
+ b = parse_boolean(value);
+ if (b < 0)
+ return log_error_errno(b, "Failed to parse state file %s: %m", state_file);
+
+ we = (struct rfkill_event) {
+ .op = RFKILL_OP_CHANGE,
+ .idx = event->idx,
+ .soft = b,
+ };
+
+ l = write(c->rfkill_fd, &we, sizeof(we));
+ if (l < 0)
+ return log_error_errno(errno, "Failed to restore rfkill state for %i: %m", event->idx);
+ if (l != sizeof(we))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Couldn't write rfkill event structure, too short.");
+
+ log_debug("Loaded state '%s' from %s.", one_zero(b), state_file);
+ return 0;
+}
+
+static void save_state_queue_remove(Context *c, int idx, const char *state_file) {
+ struct write_queue_item *item, *tmp;
+
+ assert(c);
+
+ LIST_FOREACH_SAFE(queue, item, tmp, c->write_queue) {
+ if ((state_file && streq(item->file, state_file)) || idx == item->rfkill_idx) {
+ log_debug("Canceled previous save state of '%s' to %s.", one_zero(item->state), item->file);
+ LIST_REMOVE(queue, c->write_queue, item);
+ write_queue_item_free(item);
+ }
+ }
+}
+
+static int save_state_queue(Context *c, const struct rfkill_event *event) {
+ _cleanup_free_ char *state_file = NULL;
+ struct write_queue_item *item;
+ int r;
+
+ assert(c);
+ assert(c->rfkill_fd >= 0);
+ assert(event);
+
+ r = determine_state_file(event, &state_file);
+ if (r < 0)
+ return r;
+
+ save_state_queue_remove(c, event->idx, state_file);
+
+ item = new0(struct write_queue_item, 1);
+ if (!item)
+ return -ENOMEM;
+
+ item->file = TAKE_PTR(state_file);
+ item->rfkill_idx = event->idx;
+ item->state = event->soft;
+
+ LIST_APPEND(queue, c->write_queue, item);
+
+ return 0;
+}
+
+static int save_state_cancel(Context *c, const struct rfkill_event *event) {
+ _cleanup_free_ char *state_file = NULL;
+ int r;
+
+ assert(c);
+ assert(c->rfkill_fd >= 0);
+ assert(event);
+
+ r = determine_state_file(event, &state_file);
+ save_state_queue_remove(c, event->idx, state_file);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int save_state_write_one(struct write_queue_item *item) {
+ int r;
+
+ r = write_string_file(item->file, one_zero(item->state), WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC|WRITE_STRING_FILE_MKDIR_0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write state file %s: %m", item->file);
+
+ log_debug("Saved state '%s' to %s.", one_zero(item->state), item->file);
+ return 0;
+}
+
+static void context_save_and_clear(Context *c) {
+ struct write_queue_item *i;
+
+ assert(c);
+
+ while ((i = c->write_queue)) {
+ LIST_REMOVE(queue, c->write_queue, i);
+ (void) save_state_write_one(i);
+ write_queue_item_free(i);
+ }
+
+ safe_close(c->rfkill_fd);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(context_save_and_clear) Context c = { .rfkill_fd = -1 };
+ bool ready = false;
+ int r, n;
+
+ if (argc > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program requires no arguments.");
+
+ log_setup_service();
+
+ umask(0022);
+
+ n = sd_listen_fds(false);
+ if (n < 0)
+ return log_error_errno(n, "Failed to determine whether we got any file descriptors passed: %m");
+ if (n > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Got too many file descriptors.");
+
+ if (n == 0) {
+ c.rfkill_fd = open("/dev/rfkill", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (c.rfkill_fd < 0) {
+ if (errno == ENOENT) {
+ log_debug_errno(errno, "Missing rfkill subsystem, or no device present, exiting.");
+ return 0;
+ }
+
+ return log_error_errno(errno, "Failed to open /dev/rfkill: %m");
+ }
+ } else {
+ c.rfkill_fd = SD_LISTEN_FDS_START;
+
+ r = fd_nonblock(c.rfkill_fd, 1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make /dev/rfkill socket non-blocking: %m");
+ }
+
+ for (;;) {
+ struct rfkill_event event;
+ const char *type;
+ ssize_t l;
+
+ l = read(c.rfkill_fd, &event, sizeof(event));
+ if (l < 0) {
+ if (errno == EAGAIN) {
+
+ if (!ready) {
+ /* Notify manager that we are
+ * now finished with
+ * processing whatever was
+ * queued */
+ (void) sd_notify(false, "READY=1");
+ ready = true;
+ }
+
+ /* Hang around for a bit, maybe there's more coming */
+
+ r = fd_wait_for_event(c.rfkill_fd, POLLIN, EXIT_USEC);
+ if (r == -EINTR)
+ continue;
+ if (r < 0)
+ return log_error_errno(r, "Failed to poll() on device: %m");
+ if (r > 0)
+ continue;
+
+ log_debug("All events read and idle, exiting.");
+ break;
+ }
+
+ log_error_errno(errno, "Failed to read from /dev/rfkill: %m");
+ }
+
+ if (l != RFKILL_EVENT_SIZE_V1)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Read event structure of invalid size.");
+
+ type = rfkill_type_to_string(event.type);
+ if (!type) {
+ log_debug("An rfkill device of unknown type %i discovered, ignoring.", event.type);
+ continue;
+ }
+
+ switch (event.op) {
+
+ case RFKILL_OP_ADD:
+ log_debug("A new rfkill device has been added with index %i and type %s.", event.idx, type);
+ (void) load_state(&c, &event);
+ break;
+
+ case RFKILL_OP_DEL:
+ log_debug("An rfkill device has been removed with index %i and type %s", event.idx, type);
+ (void) save_state_cancel(&c, &event);
+ break;
+
+ case RFKILL_OP_CHANGE:
+ log_debug("An rfkill device has changed state with index %i and type %s", event.idx, type);
+ (void) save_state_queue(&c, &event);
+ break;
+
+ default:
+ log_debug("Unknown event %i from /dev/rfkill for index %i and type %s, ignoring.", event.op, event.idx, type);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/run-generator/run-generator.c b/src/run-generator/run-generator.c
new file mode 100644
index 0000000..11e7d9e
--- /dev/null
+++ b/src/run-generator/run-generator.c
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "generator.h"
+#include "mkdir.h"
+#include "proc-cmdline.h"
+#include "special.h"
+#include "specifier.h"
+#include "strv.h"
+
+static const char *arg_dest = NULL;
+static char **arg_commands = NULL;
+static char *arg_success_action = NULL;
+static char *arg_failure_action = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_commands, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_success_action, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_failure_action, freep);
+
+static int parse(const char *key, const char *value, void *data) {
+ int r;
+
+ if (proc_cmdline_key_streq(key, "systemd.run")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = strv_extend(&arg_commands, value);
+ if (r < 0)
+ return log_oom();
+
+ } else if (proc_cmdline_key_streq(key, "systemd.run_success_action")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (free_and_strdup(&arg_success_action, value) < 0)
+ return log_oom();
+
+ } else if (proc_cmdline_key_streq(key, "systemd.run_failure_action")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (free_and_strdup(&arg_failure_action, value) < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+static int generate(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *p;
+ char **c;
+ int r;
+
+ if (strv_isempty(arg_commands) && !arg_success_action)
+ return 0;
+
+ p = strjoina(arg_dest, "/kernel-command-line.service");
+ f = fopen(p, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m", p);
+
+ fputs("# Automatically generated by systemd-run-generator\n\n"
+ "[Unit]\n"
+ "Description=Command from Kernel Command Line\n"
+ "Documentation=man:systemd-run-generator(8)\n"
+ "SourcePath=/proc/cmdline\n", f);
+
+ if (!streq_ptr(arg_success_action, "none"))
+ fprintf(f, "SuccessAction=%s\n",
+ arg_success_action ?: "exit");
+
+ if (!streq_ptr(arg_failure_action, "none"))
+ fprintf(f, "FailureAction=%s\n",
+ arg_failure_action ?: "exit");
+
+ fputs("\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "StandardOutput=journal+console\n", f);
+
+ STRV_FOREACH(c, arg_commands) {
+ _cleanup_free_ char *a = NULL;
+
+ a = specifier_escape(*c);
+ if (!a)
+ return log_oom();
+
+ fprintf(f, "ExecStart=%s\n", a);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", p);
+
+ /* Let's create a a target we can link "default.target" to */
+ p = strjoina(arg_dest, "/kernel-command-line.target");
+ r = write_string_file(
+ p,
+ "# Automatically generated by systemd-run-generator\n\n"
+ "[Unit]\n"
+ "Description=Command from Kernel Command Line\n"
+ "Documentation=man:systemd-run-generator(8)\n"
+ "SourcePath=/proc/cmdline\n"
+ "Requires=kernel-command-line.service\n"
+ "After=kernel-command-line.service\n",
+ WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_NOFOLLOW);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create unit file %s: %m", p);
+
+ /* And now redirect default.target to our new target */
+ p = strjoina(arg_dest, "/" SPECIAL_DEFAULT_TARGET);
+ if (symlink("kernel-command-line.target", p) < 0)
+ return log_error_errno(errno, "Failed to link unit file kernel-command-line.target → %s: %m", p);
+
+ return 0;
+}
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ int r;
+
+ assert_se(arg_dest = dest);
+
+ r = proc_cmdline_parse(parse, NULL, PROC_CMDLINE_RD_STRICT|PROC_CMDLINE_STRIP_RD_PREFIX);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ return generate();
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/run/run.c b/src/run/run.c
new file mode 100644
index 0000000..b4cc6fe
--- /dev/null
+++ b/src/run/run.c
@@ -0,0 +1,1777 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "sd-bus.h"
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "bus-map-properties.h"
+#include "bus-unit-util.h"
+#include "bus-wait-for-jobs.h"
+#include "calendarspec.h"
+#include "env-util.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "main-func.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "ptyfwd.h"
+#include "signal-util.h"
+#include "spawn-polkit-agent.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "unit-def.h"
+#include "unit-name.h"
+#include "user-util.h"
+
+static bool arg_ask_password = true;
+static bool arg_scope = false;
+static bool arg_remain_after_exit = false;
+static bool arg_no_block = false;
+static bool arg_wait = false;
+static const char *arg_unit = NULL;
+static const char *arg_description = NULL;
+static const char *arg_slice = NULL;
+static bool arg_slice_inherit = false;
+static bool arg_send_sighup = false;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static const char *arg_host = NULL;
+static bool arg_user = false;
+static const char *arg_service_type = NULL;
+static const char *arg_exec_user = NULL;
+static const char *arg_exec_group = NULL;
+static int arg_nice = 0;
+static bool arg_nice_set = false;
+static char **arg_environment = NULL;
+static char **arg_property = NULL;
+static enum {
+ ARG_STDIO_NONE, /* The default, as it is for normal services, stdin connected to /dev/null, and stdout+stderr to the journal */
+ ARG_STDIO_PTY, /* Interactive behaviour, requested by --pty: we allocate a pty and connect it to the TTY we are invoked from */
+ ARG_STDIO_DIRECT, /* Directly pass our stdin/stdout/stderr to the activated service, useful for usage in shell pipelines, requested by --pipe */
+ ARG_STDIO_AUTO, /* If --pipe and --pty are used together we use --pty when invoked on a TTY, and --pipe otherwise */
+} arg_stdio = ARG_STDIO_NONE;
+static char **arg_path_property = NULL;
+static char **arg_socket_property = NULL;
+static char **arg_timer_property = NULL;
+static bool arg_with_timer = false;
+static bool arg_quiet = false;
+static bool arg_aggressive_gc = false;
+static char *arg_working_directory = NULL;
+static bool arg_shell = false;
+static char **arg_cmdline = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_environment, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_property, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_path_property, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_socket_property, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_timer_property, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_working_directory, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_cmdline, strv_freep);
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-run", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND [ARGUMENTS...]\n"
+ "\n%sRun the specified command in a transient scope or service.%s\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-ask-password Do not prompt for password\n"
+ " --user Run as user unit\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " --scope Run this as scope rather than service\n"
+ " -u --unit=UNIT Run under the specified unit name\n"
+ " -p --property=NAME=VALUE Set service or scope unit property\n"
+ " --description=TEXT Description for unit\n"
+ " --slice=SLICE Run in the specified slice\n"
+ " --slice-inherit Inherit the slice\n"
+ " --no-block Do not wait until operation finished\n"
+ " -r --remain-after-exit Leave service around until explicitly stopped\n"
+ " --wait Wait until service stopped again\n"
+ " --send-sighup Send SIGHUP when terminating\n"
+ " --service-type=TYPE Service type\n"
+ " --uid=USER Run as system user\n"
+ " --gid=GROUP Run as system group\n"
+ " --nice=NICE Nice level\n"
+ " --working-directory=PATH Set working directory\n"
+ " -d --same-dir Inherit working directory from caller\n"
+ " -E --setenv=NAME=VALUE Set environment\n"
+ " -t --pty Run service on pseudo TTY as STDIN/STDOUT/\n"
+ " STDERR\n"
+ " -P --pipe Pass STDIN/STDOUT/STDERR directly to service\n"
+ " -q --quiet Suppress information messages during runtime\n"
+ " -G --collect Unload unit after it ran, even when failed\n"
+ " -S --shell Invoke a $SHELL interactively\n\n"
+ "Path options:\n"
+ " --path-property=NAME=VALUE Set path unit property\n\n"
+ "Socket options:\n"
+ " --socket-property=NAME=VALUE Set socket unit property\n\n"
+ "Timer options:\n"
+ " --on-active=SECONDS Run after SECONDS delay\n"
+ " --on-boot=SECONDS Run SECONDS after machine was booted up\n"
+ " --on-startup=SECONDS Run SECONDS after systemd activation\n"
+ " --on-unit-active=SECONDS Run SECONDS after the last activation\n"
+ " --on-unit-inactive=SECONDS Run SECONDS after the last deactivation\n"
+ " --on-calendar=SPEC Realtime timer\n"
+ " --on-timezone-change Run when the timezone changes\n"
+ " --on-clock-change Run when the realtime clock jumps\n"
+ " --timer-property=NAME=VALUE Set timer unit property\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight(), ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int add_timer_property(const char *name, const char *val) {
+ char *p;
+
+ assert(name);
+ assert(val);
+
+ p = strjoin(name, "=", val);
+ if (!p)
+ return log_oom();
+
+ if (strv_consume(&arg_timer_property, p) < 0)
+ return log_oom();
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_USER,
+ ARG_SYSTEM,
+ ARG_SCOPE,
+ ARG_DESCRIPTION,
+ ARG_SLICE,
+ ARG_SLICE_INHERIT,
+ ARG_SEND_SIGHUP,
+ ARG_SERVICE_TYPE,
+ ARG_EXEC_USER,
+ ARG_EXEC_GROUP,
+ ARG_NICE,
+ ARG_ON_ACTIVE,
+ ARG_ON_BOOT,
+ ARG_ON_STARTUP,
+ ARG_ON_UNIT_ACTIVE,
+ ARG_ON_UNIT_INACTIVE,
+ ARG_ON_CALENDAR,
+ ARG_ON_TIMEZONE_CHANGE,
+ ARG_ON_CLOCK_CHANGE,
+ ARG_TIMER_PROPERTY,
+ ARG_PATH_PROPERTY,
+ ARG_SOCKET_PROPERTY,
+ ARG_NO_BLOCK,
+ ARG_NO_ASK_PASSWORD,
+ ARG_WAIT,
+ ARG_WORKING_DIRECTORY,
+ ARG_SHELL,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "user", no_argument, NULL, ARG_USER },
+ { "system", no_argument, NULL, ARG_SYSTEM },
+ { "scope", no_argument, NULL, ARG_SCOPE },
+ { "unit", required_argument, NULL, 'u' },
+ { "description", required_argument, NULL, ARG_DESCRIPTION },
+ { "slice", required_argument, NULL, ARG_SLICE },
+ { "slice-inherit", no_argument, NULL, ARG_SLICE_INHERIT },
+ { "remain-after-exit", no_argument, NULL, 'r' },
+ { "send-sighup", no_argument, NULL, ARG_SEND_SIGHUP },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "service-type", required_argument, NULL, ARG_SERVICE_TYPE },
+ { "wait", no_argument, NULL, ARG_WAIT },
+ { "uid", required_argument, NULL, ARG_EXEC_USER },
+ { "gid", required_argument, NULL, ARG_EXEC_GROUP },
+ { "nice", required_argument, NULL, ARG_NICE },
+ { "setenv", required_argument, NULL, 'E' },
+ { "property", required_argument, NULL, 'p' },
+ { "tty", no_argument, NULL, 't' }, /* deprecated alias */
+ { "pty", no_argument, NULL, 't' },
+ { "pipe", no_argument, NULL, 'P' },
+ { "quiet", no_argument, NULL, 'q' },
+ { "on-active", required_argument, NULL, ARG_ON_ACTIVE },
+ { "on-boot", required_argument, NULL, ARG_ON_BOOT },
+ { "on-startup", required_argument, NULL, ARG_ON_STARTUP },
+ { "on-unit-active", required_argument, NULL, ARG_ON_UNIT_ACTIVE },
+ { "on-unit-inactive", required_argument, NULL, ARG_ON_UNIT_INACTIVE },
+ { "on-calendar", required_argument, NULL, ARG_ON_CALENDAR },
+ { "on-timezone-change",no_argument, NULL, ARG_ON_TIMEZONE_CHANGE},
+ { "on-clock-change", no_argument, NULL, ARG_ON_CLOCK_CHANGE },
+ { "timer-property", required_argument, NULL, ARG_TIMER_PROPERTY },
+ { "path-property", required_argument, NULL, ARG_PATH_PROPERTY },
+ { "socket-property", required_argument, NULL, ARG_SOCKET_PROPERTY },
+ { "no-block", no_argument, NULL, ARG_NO_BLOCK },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ { "collect", no_argument, NULL, 'G' },
+ { "working-directory", required_argument, NULL, ARG_WORKING_DIRECTORY },
+ { "same-dir", no_argument, NULL, 'd' },
+ { "shell", no_argument, NULL, 'S' },
+ {},
+ };
+
+ bool with_trigger = false;
+ int r, c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "+hrH:M:E:p:tPqGdSu:", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_ASK_PASSWORD:
+ arg_ask_password = false;
+ break;
+
+ case ARG_USER:
+ arg_user = true;
+ break;
+
+ case ARG_SYSTEM:
+ arg_user = false;
+ break;
+
+ case ARG_SCOPE:
+ arg_scope = true;
+ break;
+
+ case 'u':
+ arg_unit = optarg;
+ break;
+
+ case ARG_DESCRIPTION:
+ arg_description = optarg;
+ break;
+
+ case ARG_SLICE:
+ arg_slice = optarg;
+ break;
+
+ case ARG_SLICE_INHERIT:
+ arg_slice_inherit = true;
+ break;
+
+ case ARG_SEND_SIGHUP:
+ arg_send_sighup = true;
+ break;
+
+ case 'r':
+ arg_remain_after_exit = true;
+ break;
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case ARG_SERVICE_TYPE:
+ arg_service_type = optarg;
+ break;
+
+ case ARG_EXEC_USER:
+ arg_exec_user = optarg;
+ break;
+
+ case ARG_EXEC_GROUP:
+ arg_exec_group = optarg;
+ break;
+
+ case ARG_NICE:
+ r = parse_nice(optarg, &arg_nice);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse nice value: %s", optarg);
+
+ arg_nice_set = true;
+ break;
+
+ case 'E':
+ if (strv_extend(&arg_environment, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case 'p':
+ if (strv_extend(&arg_property, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case 't': /* --pty */
+ if (IN_SET(arg_stdio, ARG_STDIO_DIRECT, ARG_STDIO_AUTO)) /* if --pipe is already used, upgrade to auto mode */
+ arg_stdio = ARG_STDIO_AUTO;
+ else
+ arg_stdio = ARG_STDIO_PTY;
+ break;
+
+ case 'P': /* --pipe */
+ if (IN_SET(arg_stdio, ARG_STDIO_PTY, ARG_STDIO_AUTO)) /* If --pty is already used, upgrade to auto mode */
+ arg_stdio = ARG_STDIO_AUTO;
+ else
+ arg_stdio = ARG_STDIO_DIRECT;
+ break;
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case ARG_ON_ACTIVE:
+ r = add_timer_property("OnActiveSec", optarg);
+ if (r < 0)
+ return r;
+
+ arg_with_timer = true;
+ break;
+
+ case ARG_ON_BOOT:
+ r = add_timer_property("OnBootSec", optarg);
+ if (r < 0)
+ return r;
+
+ arg_with_timer = true;
+ break;
+
+ case ARG_ON_STARTUP:
+ r = add_timer_property("OnStartupSec", optarg);
+ if (r < 0)
+ return r;
+
+ arg_with_timer = true;
+ break;
+
+ case ARG_ON_UNIT_ACTIVE:
+ r = add_timer_property("OnUnitActiveSec", optarg);
+ if (r < 0)
+ return r;
+
+ arg_with_timer = true;
+ break;
+
+ case ARG_ON_UNIT_INACTIVE:
+ r = add_timer_property("OnUnitInactiveSec", optarg);
+ if (r < 0)
+ return r;
+
+ arg_with_timer = true;
+ break;
+
+ case ARG_ON_CALENDAR: {
+ _cleanup_(calendar_spec_freep) CalendarSpec *cs = NULL;
+
+ r = calendar_spec_from_string(optarg, &cs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse calendar event specification: %m");
+
+ /* Let's make sure the given calendar event is not in the past */
+ r = calendar_spec_next_usec(cs, now(CLOCK_REALTIME), NULL);
+ if (r == -ENOENT)
+ /* The calendar event is in the past — let's warn about this, but install it
+ * anyway as is. The service manager will trigger the service right away.
+ * Moreover, the server side might have a different clock or timezone than we
+ * do, hence it should decide when or whether to run something. */
+ log_warning("Specified calendar expression is in the past, proceeding anyway.");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to calculate next time calendar expression elapses: %m");
+
+ r = add_timer_property("OnCalendar", optarg);
+ if (r < 0)
+ return r;
+
+ arg_with_timer = true;
+ break;
+ }
+
+ case ARG_ON_TIMEZONE_CHANGE:
+ r = add_timer_property("OnTimezoneChange", "yes");
+ if (r < 0)
+ return r;
+
+ arg_with_timer = true;
+ break;
+
+ case ARG_ON_CLOCK_CHANGE:
+ r = add_timer_property("OnClockChange", "yes");
+ if (r < 0)
+ return r;
+
+ arg_with_timer = true;
+ break;
+
+ case ARG_TIMER_PROPERTY:
+
+ if (strv_extend(&arg_timer_property, optarg) < 0)
+ return log_oom();
+
+ arg_with_timer = arg_with_timer ||
+ STARTSWITH_SET(optarg,
+ "OnActiveSec=",
+ "OnBootSec=",
+ "OnStartupSec=",
+ "OnUnitActiveSec=",
+ "OnUnitInactiveSec=",
+ "OnCalendar=");
+ break;
+
+ case ARG_PATH_PROPERTY:
+
+ if (strv_extend(&arg_path_property, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_SOCKET_PROPERTY:
+
+ if (strv_extend(&arg_socket_property, optarg) < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_NO_BLOCK:
+ arg_no_block = true;
+ break;
+
+ case ARG_WAIT:
+ arg_wait = true;
+ break;
+
+ case ARG_WORKING_DIRECTORY:
+ r = parse_path_argument_and_warn(optarg, true, &arg_working_directory);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case 'd': {
+ _cleanup_free_ char *p = NULL;
+
+ r = safe_getcwd(&p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get current working directory: %m");
+
+ if (empty_or_root(p))
+ arg_working_directory = mfree(arg_working_directory);
+ else
+ free_and_replace(arg_working_directory, p);
+ break;
+ }
+
+ case 'G':
+ arg_aggressive_gc = true;
+ break;
+
+ case 'S':
+ arg_shell = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ with_trigger = !!arg_path_property || !!arg_socket_property || arg_with_timer;
+
+ /* currently, only single trigger (path, socket, timer) unit can be created simultaneously */
+ if ((int) !!arg_path_property + (int) !!arg_socket_property + (int) arg_with_timer > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Only single trigger (path, socket, timer) unit can be created.");
+
+ if (arg_shell) {
+ /* If --shell is imply --pty --pipe --same-dir --service-type=exec --wait --collect, unless otherwise
+ * specified. */
+
+ if (!arg_scope) {
+ if (arg_stdio == ARG_STDIO_NONE)
+ arg_stdio = ARG_STDIO_AUTO;
+
+ if (!arg_working_directory) {
+ r = safe_getcwd(&arg_working_directory);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get current working directory: %m");
+ }
+
+ if (!arg_service_type) {
+ arg_service_type = strdup("exec");
+ if (!arg_service_type)
+ return log_oom();
+ }
+
+ arg_wait = true;
+ }
+
+ arg_aggressive_gc = true;
+ }
+
+ if (arg_stdio == ARG_STDIO_AUTO)
+ /* If we both --pty and --pipe are specified we'll automatically pick --pty if we are connected fully
+ * to a TTY and pick direct fd passing otherwise. This way, we automatically adapt to usage in a shell
+ * pipeline, but we are neatly interactive with tty-level isolation otherwise. */
+ arg_stdio = isatty(STDIN_FILENO) && isatty(STDOUT_FILENO) && isatty(STDERR_FILENO) ?
+ ARG_STDIO_PTY :
+ ARG_STDIO_DIRECT;
+
+ if (argc > optind) {
+ char **l;
+
+ if (arg_shell)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "If --shell is used, no command line is expected.");
+
+ l = strv_copy(argv + optind);
+ if (!l)
+ return log_oom();
+
+ strv_free_and_replace(arg_cmdline, l);
+
+ } else if (arg_shell) {
+ _cleanup_free_ char *s = NULL;
+ char **l;
+
+ r = get_shell(&s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine shell: %m");
+
+ l = strv_new(s);
+ if (!l)
+ return log_oom();
+
+ strv_free_and_replace(arg_cmdline, l);
+
+ } else if (!arg_unit || !with_trigger)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Command line to execute required.");
+
+ if (arg_user && arg_transport != BUS_TRANSPORT_LOCAL)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Execution in user context is not supported on non-local systems.");
+
+ if (arg_scope && arg_transport != BUS_TRANSPORT_LOCAL)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Scope execution is not supported on non-local systems.");
+
+ if (arg_scope && (arg_remain_after_exit || arg_service_type))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--remain-after-exit and --service-type= are not supported in --scope mode.");
+
+ if (arg_stdio != ARG_STDIO_NONE && (with_trigger || arg_scope))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--pty/--pipe is not compatible in timer or --scope mode.");
+
+ if (arg_stdio != ARG_STDIO_NONE && arg_transport == BUS_TRANSPORT_REMOTE)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--pty/--pipe is only supported when connecting to the local system or containers.");
+
+ if (arg_stdio != ARG_STDIO_NONE && arg_no_block)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--pty/--pipe is not compatible with --no-block.");
+
+ if (arg_scope && with_trigger)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Path, socket or timer options are not supported in --scope mode.");
+
+ if (arg_timer_property && !arg_with_timer)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--timer-property= has no effect without any other timer options.");
+
+ if (arg_wait) {
+ if (arg_no_block)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--wait may not be combined with --no-block.");
+
+ if (with_trigger)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--wait may not be combined with path, socket or timer operations.");
+
+ if (arg_scope)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--wait may not be combined with --scope.");
+ }
+
+ return 1;
+}
+
+static int transient_unit_set_properties(sd_bus_message *m, UnitType t, char **properties) {
+ int r;
+
+ r = sd_bus_message_append(m, "(sv)", "Description", "s", arg_description);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (arg_aggressive_gc) {
+ r = sd_bus_message_append(m, "(sv)", "CollectMode", "s", "inactive-or-failed");
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = bus_append_unit_property_assignment_many(m, t, properties);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int transient_cgroup_set_properties(sd_bus_message *m) {
+ _cleanup_free_ char *name = NULL;
+ _cleanup_free_ char *slice = NULL;
+ int r;
+ assert(m);
+
+ if (arg_slice_inherit) {
+ char *end;
+
+ if (arg_user)
+ r = cg_pid_get_user_slice(0, &name);
+ else
+ r = cg_pid_get_slice(0, &name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get PID slice: %m");
+
+ end = endswith(name, ".slice");
+ if (!end)
+ return -ENXIO;
+ *end = 0;
+ }
+
+ if (!isempty(arg_slice)) {
+ if (name) {
+ char *j = strjoin(name, "-", arg_slice);
+ free_and_replace(name, j);
+ } else
+ name = strdup(arg_slice);
+ if (!name)
+ return log_oom();
+ }
+
+ if (!name)
+ return 0;
+
+ r = unit_name_mangle_with_suffix(name, "as slice",
+ arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN,
+ ".slice", &slice);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle name '%s': %m", arg_slice);
+
+ r = sd_bus_message_append(m, "(sv)", "Slice", "s", slice);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 0;
+}
+
+static int transient_kill_set_properties(sd_bus_message *m) {
+ int r;
+
+ assert(m);
+
+ if (arg_send_sighup) {
+ r = sd_bus_message_append(m, "(sv)", "SendSIGHUP", "b", arg_send_sighup);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ return 0;
+}
+
+static int transient_service_set_properties(sd_bus_message *m, const char *pty_path) {
+ bool send_term = false;
+ int r;
+
+ assert(m);
+
+ r = transient_unit_set_properties(m, UNIT_SERVICE, arg_property);
+ if (r < 0)
+ return r;
+
+ r = transient_kill_set_properties(m);
+ if (r < 0)
+ return r;
+
+ r = transient_cgroup_set_properties(m);
+ if (r < 0)
+ return r;
+
+ if (arg_wait || arg_stdio != ARG_STDIO_NONE) {
+ r = sd_bus_message_append(m, "(sv)", "AddRef", "b", 1);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (arg_remain_after_exit) {
+ r = sd_bus_message_append(m, "(sv)", "RemainAfterExit", "b", arg_remain_after_exit);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (arg_service_type) {
+ r = sd_bus_message_append(m, "(sv)", "Type", "s", arg_service_type);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (arg_exec_user) {
+ r = sd_bus_message_append(m, "(sv)", "User", "s", arg_exec_user);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (arg_exec_group) {
+ r = sd_bus_message_append(m, "(sv)", "Group", "s", arg_exec_group);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (arg_nice_set) {
+ r = sd_bus_message_append(m, "(sv)", "Nice", "i", arg_nice);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (arg_working_directory) {
+ r = sd_bus_message_append(m, "(sv)", "WorkingDirectory", "s", arg_working_directory);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (pty_path) {
+ r = sd_bus_message_append(m,
+ "(sv)(sv)(sv)(sv)",
+ "StandardInput", "s", "tty",
+ "StandardOutput", "s", "tty",
+ "StandardError", "s", "tty",
+ "TTYPath", "s", pty_path);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ send_term = true;
+
+ } else if (arg_stdio == ARG_STDIO_DIRECT) {
+ r = sd_bus_message_append(m,
+ "(sv)(sv)(sv)",
+ "StandardInputFileDescriptor", "h", STDIN_FILENO,
+ "StandardOutputFileDescriptor", "h", STDOUT_FILENO,
+ "StandardErrorFileDescriptor", "h", STDERR_FILENO);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ send_term = isatty(STDIN_FILENO) || isatty(STDOUT_FILENO) || isatty(STDERR_FILENO);
+ }
+
+ if (send_term) {
+ const char *e;
+
+ e = getenv("TERM");
+ if (e) {
+ char *n;
+
+ n = strjoina("TERM=", e);
+ r = sd_bus_message_append(m,
+ "(sv)",
+ "Environment", "as", 1, n);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+ }
+
+ if (!strv_isempty(arg_environment)) {
+ r = sd_bus_message_open_container(m, 'r', "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", "Environment");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "as");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, arg_environment);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ /* Exec container */
+ if (!strv_isempty(arg_cmdline)) {
+ r = sd_bus_message_open_container(m, 'r', "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", "ExecStart");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(sasb)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(sasb)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'r', "sasb");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", arg_cmdline[0]);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, arg_cmdline);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "b", false);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ return 0;
+}
+
+static int transient_scope_set_properties(sd_bus_message *m) {
+ int r;
+
+ assert(m);
+
+ r = transient_unit_set_properties(m, UNIT_SCOPE, arg_property);
+ if (r < 0)
+ return r;
+
+ r = transient_kill_set_properties(m);
+ if (r < 0)
+ return r;
+
+ r = transient_cgroup_set_properties(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "(sv)", "PIDs", "au", 1, (uint32_t) getpid_cached());
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 0;
+}
+
+static int transient_timer_set_properties(sd_bus_message *m) {
+ int r;
+
+ assert(m);
+
+ r = transient_unit_set_properties(m, UNIT_TIMER, arg_timer_property);
+ if (r < 0)
+ return r;
+
+ /* Automatically clean up our transient timers */
+ r = sd_bus_message_append(m, "(sv)", "RemainAfterElapse", "b", false);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 0;
+}
+
+static int make_unit_name(sd_bus *bus, UnitType t, char **ret) {
+ const char *unique, *id;
+ char *p;
+ int r;
+
+ assert(bus);
+ assert(t >= 0);
+ assert(t < _UNIT_TYPE_MAX);
+
+ r = sd_bus_get_unique_name(bus, &unique);
+ if (r < 0) {
+ sd_id128_t rnd;
+
+ /* We couldn't get the unique name, which is a pretty
+ * common case if we are connected to systemd
+ * directly. In that case, just pick a random uuid as
+ * name */
+
+ r = sd_id128_randomize(&rnd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate random run unit name: %m");
+
+ if (asprintf(ret, "run-r" SD_ID128_FORMAT_STR ".%s", SD_ID128_FORMAT_VAL(rnd), unit_type_to_string(t)) < 0)
+ return log_oom();
+
+ return 0;
+ }
+
+ /* We managed to get the unique name, then let's use that to
+ * name our transient units. */
+
+ id = startswith(unique, ":1.");
+ if (!id)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unique name %s has unexpected format.",
+ unique);
+
+ p = strjoin("run-u", id, ".", unit_type_to_string(t));
+ if (!p)
+ return log_oom();
+
+ *ret = p;
+ return 0;
+}
+
+typedef struct RunContext {
+ sd_bus *bus;
+ sd_event *event;
+ PTYForward *forward;
+ sd_bus_slot *match;
+
+ /* Current state of the unit */
+ char *active_state;
+ bool has_job;
+
+ /* The exit data of the unit */
+ uint64_t inactive_exit_usec;
+ uint64_t inactive_enter_usec;
+ char *result;
+ uint64_t cpu_usage_nsec;
+ uint64_t ip_ingress_bytes;
+ uint64_t ip_egress_bytes;
+ uint64_t io_read_bytes;
+ uint64_t io_write_bytes;
+ uint32_t exit_code;
+ uint32_t exit_status;
+} RunContext;
+
+static void run_context_free(RunContext *c) {
+ assert(c);
+
+ c->forward = pty_forward_free(c->forward);
+ c->match = sd_bus_slot_unref(c->match);
+ c->bus = sd_bus_unref(c->bus);
+ c->event = sd_event_unref(c->event);
+
+ free(c->active_state);
+ free(c->result);
+}
+
+static void run_context_check_done(RunContext *c) {
+ bool done;
+
+ assert(c);
+
+ if (c->match)
+ done = STRPTR_IN_SET(c->active_state, "inactive", "failed") && !c->has_job;
+ else
+ done = true;
+
+ if (c->forward && done) /* If the service is gone, it's time to drain the output */
+ done = pty_forward_drain(c->forward);
+
+ if (done)
+ sd_event_exit(c->event, EXIT_SUCCESS);
+}
+
+static int map_job(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ bool *b = userdata;
+ const char *job;
+ uint32_t id;
+ int r;
+
+ r = sd_bus_message_read(m, "(uo)", &id, &job);
+ if (r < 0)
+ return r;
+
+ *b = id != 0 || !streq(job, "/");
+ return 0;
+}
+
+static int run_context_update(RunContext *c, const char *path) {
+
+ static const struct bus_properties_map map[] = {
+ { "ActiveState", "s", NULL, offsetof(RunContext, active_state) },
+ { "InactiveExitTimestampMonotonic", "t", NULL, offsetof(RunContext, inactive_exit_usec) },
+ { "InactiveEnterTimestampMonotonic", "t", NULL, offsetof(RunContext, inactive_enter_usec) },
+ { "Result", "s", NULL, offsetof(RunContext, result) },
+ { "ExecMainCode", "i", NULL, offsetof(RunContext, exit_code) },
+ { "ExecMainStatus", "i", NULL, offsetof(RunContext, exit_status) },
+ { "CPUUsageNSec", "t", NULL, offsetof(RunContext, cpu_usage_nsec) },
+ { "IPIngressBytes", "t", NULL, offsetof(RunContext, ip_ingress_bytes) },
+ { "IPEgressBytes", "t", NULL, offsetof(RunContext, ip_egress_bytes) },
+ { "IOReadBytes", "t", NULL, offsetof(RunContext, io_read_bytes) },
+ { "IOWriteBytes", "t", NULL, offsetof(RunContext, io_write_bytes) },
+ { "Job", "(uo)", map_job, offsetof(RunContext, has_job) },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ r = bus_map_all_properties(c->bus,
+ "org.freedesktop.systemd1",
+ path,
+ map,
+ BUS_MAP_STRDUP,
+ &error,
+ NULL,
+ c);
+ if (r < 0) {
+ sd_event_exit(c->event, EXIT_FAILURE);
+ return log_error_errno(r, "Failed to query unit state: %s", bus_error_message(&error, r));
+ }
+
+ run_context_check_done(c);
+ return 0;
+}
+
+static int on_properties_changed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ RunContext *c = userdata;
+
+ assert(m);
+ assert(c);
+
+ return run_context_update(c, sd_bus_message_get_path(m));
+}
+
+static int pty_forward_handler(PTYForward *f, int rcode, void *userdata) {
+ RunContext *c = userdata;
+
+ assert(f);
+
+ if (rcode < 0) {
+ sd_event_exit(c->event, EXIT_FAILURE);
+ return log_error_errno(rcode, "Error on PTY forwarding logic: %m");
+ }
+
+ run_context_check_done(c);
+ return 0;
+}
+
+static int start_transient_service(
+ sd_bus *bus,
+ int *retval) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ _cleanup_free_ char *service = NULL, *pty_path = NULL;
+ _cleanup_close_ int master = -1;
+ int r;
+
+ assert(bus);
+ assert(retval);
+
+ if (arg_stdio == ARG_STDIO_PTY) {
+
+ if (arg_transport == BUS_TRANSPORT_LOCAL) {
+ master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
+ if (master < 0)
+ return log_error_errno(errno, "Failed to acquire pseudo tty: %m");
+
+ r = ptsname_malloc(master, &pty_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine tty name: %m");
+
+ if (unlockpt(master) < 0)
+ return log_error_errno(errno, "Failed to unlock tty: %m");
+
+ } else if (arg_transport == BUS_TRANSPORT_MACHINE) {
+ _cleanup_(sd_bus_unrefp) sd_bus *system_bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *pty_reply = NULL;
+ const char *s;
+
+ r = sd_bus_default_system(&system_bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to system bus: %m");
+
+ r = sd_bus_call_method(system_bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "OpenMachinePTY",
+ &error,
+ &pty_reply,
+ "s", arg_host);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get machine PTY: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(pty_reply, "hs", &master, &s);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ master = fcntl(master, F_DUPFD_CLOEXEC, 3);
+ if (master < 0)
+ return log_error_errno(errno, "Failed to duplicate master fd: %m");
+
+ pty_path = strdup(s);
+ if (!pty_path)
+ return log_oom();
+ } else
+ assert_not_reached("Can't allocate tty via ssh");
+ }
+
+ /* Optionally, wait for the start job to complete. If we are supposed to read the service's stdin
+ * lets skip this however, because we should start that already when the start job is running, and
+ * there's little point in waiting for the start job to complete in that case anyway, as we'll wait
+ * for EOF anyway, which is going to be much later. */
+ if (!arg_no_block && arg_stdio == ARG_STDIO_NONE) {
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_error_errno(r, "Could not watch jobs: %m");
+ }
+
+ if (arg_unit) {
+ r = unit_name_mangle_with_suffix(arg_unit, "as unit",
+ arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN,
+ ".service", &service);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle unit name: %m");
+ } else {
+ r = make_unit_name(bus, UNIT_SERVICE, &service);
+ if (r < 0)
+ return r;
+ }
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, "StartTransientUnit");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_set_allow_interactive_authorization(m, arg_ask_password);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Name and mode */
+ r = sd_bus_message_append(m, "ss", service, "fail");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Properties */
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = transient_service_set_properties(m, pty_path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Auxiliary units */
+ r = sd_bus_message_append(m, "a(sa(sv))", 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start transient service unit: %s", bus_error_message(&error, r));
+
+ if (w) {
+ const char *object;
+
+ r = sd_bus_message_read(reply, "o", &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = bus_wait_for_jobs_one(w, object, arg_quiet);
+ if (r < 0)
+ return r;
+ }
+
+ if (!arg_quiet)
+ log_info("Running as unit: %s", service);
+
+ if (arg_wait || arg_stdio != ARG_STDIO_NONE) {
+ _cleanup_(run_context_free) RunContext c = {
+ .cpu_usage_nsec = NSEC_INFINITY,
+ .ip_ingress_bytes = UINT64_MAX,
+ .ip_egress_bytes = UINT64_MAX,
+ .io_read_bytes = UINT64_MAX,
+ .io_write_bytes = UINT64_MAX,
+ .inactive_exit_usec = USEC_INFINITY,
+ .inactive_enter_usec = USEC_INFINITY,
+ };
+ _cleanup_free_ char *path = NULL;
+
+ c.bus = sd_bus_ref(bus);
+
+ r = sd_event_default(&c.event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get event loop: %m");
+
+ if (master >= 0) {
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGWINCH, SIGTERM, SIGINT, -1) >= 0);
+ (void) sd_event_add_signal(c.event, NULL, SIGINT, NULL, NULL);
+ (void) sd_event_add_signal(c.event, NULL, SIGTERM, NULL, NULL);
+
+ if (!arg_quiet)
+ log_info("Press ^] three times within 1s to disconnect TTY.");
+
+ r = pty_forward_new(c.event, master, PTY_FORWARD_IGNORE_INITIAL_VHANGUP, &c.forward);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create PTY forwarder: %m");
+
+ pty_forward_set_handler(c.forward, pty_forward_handler, &c);
+
+ /* Make sure to process any TTY events before we process bus events */
+ (void) pty_forward_set_priority(c.forward, SD_EVENT_PRIORITY_IMPORTANT);
+ }
+
+ path = unit_dbus_path_from_name(service);
+ if (!path)
+ return log_oom();
+
+ r = sd_bus_match_signal_async(
+ bus,
+ &c.match,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.DBus.Properties",
+ "PropertiesChanged",
+ on_properties_changed, NULL, &c);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request properties changed signal match: %m");
+
+ r = sd_bus_attach_event(bus, c.event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ r = run_context_update(&c, path);
+ if (r < 0)
+ return r;
+
+ r = sd_event_loop(c.event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ if (c.forward) {
+ char last_char = 0;
+
+ r = pty_forward_get_last_char(c.forward, &last_char);
+ if (r >= 0 && !arg_quiet && last_char != '\n')
+ fputc('\n', stdout);
+ }
+
+ if (arg_wait && !arg_quiet) {
+
+ /* Explicitly destroy the PTY forwarder, so that the PTY device is usable again, with its
+ * original settings (i.e. proper line breaks), so that we can show the summary in a pretty
+ * way. */
+ c.forward = pty_forward_free(c.forward);
+
+ if (!isempty(c.result))
+ log_info("Finished with result: %s", strna(c.result));
+
+ if (c.exit_code == CLD_EXITED)
+ log_info("Main processes terminated with: code=%s/status=%i",
+ sigchld_code_to_string(c.exit_code), c.exit_status);
+ else if (c.exit_code > 0)
+ log_info("Main processes terminated with: code=%s/status=%s",
+ sigchld_code_to_string(c.exit_code), signal_to_string(c.exit_status));
+
+ if (timestamp_is_set(c.inactive_enter_usec) &&
+ timestamp_is_set(c.inactive_exit_usec) &&
+ c.inactive_enter_usec > c.inactive_exit_usec) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ log_info("Service runtime: %s",
+ format_timespan(ts, sizeof ts, c.inactive_enter_usec - c.inactive_exit_usec, USEC_PER_MSEC));
+ }
+
+ if (c.cpu_usage_nsec != NSEC_INFINITY) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ log_info("CPU time consumed: %s",
+ format_timespan(ts, sizeof ts, DIV_ROUND_UP(c.cpu_usage_nsec, NSEC_PER_USEC), USEC_PER_MSEC));
+ }
+
+ if (c.ip_ingress_bytes != UINT64_MAX) {
+ char bytes[FORMAT_BYTES_MAX];
+ log_info("IP traffic received: %s", format_bytes(bytes, sizeof bytes, c.ip_ingress_bytes));
+ }
+ if (c.ip_egress_bytes != UINT64_MAX) {
+ char bytes[FORMAT_BYTES_MAX];
+ log_info("IP traffic sent: %s", format_bytes(bytes, sizeof bytes, c.ip_egress_bytes));
+ }
+ if (c.io_read_bytes != UINT64_MAX) {
+ char bytes[FORMAT_BYTES_MAX];
+ log_info("IO bytes read: %s", format_bytes(bytes, sizeof bytes, c.io_read_bytes));
+ }
+ if (c.io_write_bytes != UINT64_MAX) {
+ char bytes[FORMAT_BYTES_MAX];
+ log_info("IO bytes written: %s", format_bytes(bytes, sizeof bytes, c.io_write_bytes));
+ }
+ }
+
+ /* Try to propagate the service's return value. But if the service defines
+ * e.g. SuccessExitStatus, honour this, and return 0 to mean "success". */
+ if (streq_ptr(c.result, "success"))
+ *retval = 0;
+ else if (streq_ptr(c.result, "exit-code") && c.exit_status > 0)
+ *retval = c.exit_status;
+ else if (streq_ptr(c.result, "signal"))
+ *retval = EXIT_EXCEPTION;
+ else
+ *retval = EXIT_FAILURE;
+ }
+
+ return 0;
+}
+
+static int acquire_invocation_id(sd_bus *bus, sd_id128_t *ret) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const void *p;
+ size_t l;
+ int r;
+
+ assert(bus);
+ assert(ret);
+
+ r = sd_bus_get_property(bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1/unit/self",
+ "org.freedesktop.systemd1.Unit",
+ "InvocationID",
+ &error,
+ &reply,
+ "ay");
+ if (r < 0)
+ return log_error_errno(r, "Failed to request invocation ID for scope: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read_array(reply, 'y', &p, &l);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (l != sizeof(sd_id128_t))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid UUID size, %zu != %zu.", l, sizeof(sd_id128_t));
+
+ memcpy(ret, p, l);
+ return 0;
+}
+
+static int start_transient_scope(sd_bus *bus) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ _cleanup_strv_free_ char **env = NULL, **user_env = NULL;
+ _cleanup_free_ char *scope = NULL;
+ const char *object = NULL;
+ sd_id128_t invocation_id;
+ int r;
+
+ assert(bus);
+ assert(!strv_isempty(arg_cmdline));
+
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_oom();
+
+ if (arg_unit) {
+ r = unit_name_mangle_with_suffix(arg_unit, "as unit",
+ arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN,
+ ".scope", &scope);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle scope name: %m");
+ } else {
+ r = make_unit_name(bus, UNIT_SCOPE, &scope);
+ if (r < 0)
+ return r;
+ }
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, "StartTransientUnit");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_set_allow_interactive_authorization(m, arg_ask_password);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Name and Mode */
+ r = sd_bus_message_append(m, "ss", scope, "fail");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Properties */
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = transient_scope_set_properties(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Auxiliary units */
+ r = sd_bus_message_append(m, "a(sa(sv))", 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start transient scope unit: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "o", &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = bus_wait_for_jobs_one(w, object, arg_quiet);
+ if (r < 0)
+ return r;
+
+ r = acquire_invocation_id(bus, &invocation_id);
+ if (r < 0)
+ return r;
+
+ r = strv_extendf(&user_env, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(invocation_id));
+ if (r < 0)
+ return log_oom();
+
+ if (arg_nice_set) {
+ if (setpriority(PRIO_PROCESS, 0, arg_nice) < 0)
+ return log_error_errno(errno, "Failed to set nice level: %m");
+ }
+
+ if (arg_exec_group) {
+ gid_t gid;
+
+ r = get_group_creds(&arg_exec_group, &gid, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve group %s: %m", arg_exec_group);
+
+ if (setresgid(gid, gid, gid) < 0)
+ return log_error_errno(errno, "Failed to change GID to " GID_FMT ": %m", gid);
+ }
+
+ if (arg_exec_user) {
+ const char *home, *shell;
+ uid_t uid;
+ gid_t gid;
+
+ r = get_user_creds(&arg_exec_user, &uid, &gid, &home, &shell, USER_CREDS_CLEAN|USER_CREDS_PREFER_NSS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve user %s: %m", arg_exec_user);
+
+ if (home) {
+ r = strv_extendf(&user_env, "HOME=%s", home);
+ if (r < 0)
+ return log_oom();
+ }
+
+ if (shell) {
+ r = strv_extendf(&user_env, "SHELL=%s", shell);
+ if (r < 0)
+ return log_oom();
+ }
+
+ r = strv_extendf(&user_env, "USER=%s", arg_exec_user);
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extendf(&user_env, "LOGNAME=%s", arg_exec_user);
+ if (r < 0)
+ return log_oom();
+
+ if (!arg_exec_group) {
+ if (setresgid(gid, gid, gid) < 0)
+ return log_error_errno(errno, "Failed to change GID to " GID_FMT ": %m", gid);
+ }
+
+ if (setresuid(uid, uid, uid) < 0)
+ return log_error_errno(errno, "Failed to change UID to " UID_FMT ": %m", uid);
+ }
+
+ env = strv_env_merge(3, environ, user_env, arg_environment);
+ if (!env)
+ return log_oom();
+
+ if (!arg_quiet)
+ log_info("Running scope as unit: %s", scope);
+
+ execvpe(arg_cmdline[0], arg_cmdline, env);
+
+ return log_error_errno(errno, "Failed to execute: %m");
+}
+
+static int start_transient_trigger(
+ sd_bus *bus,
+ const char *suffix) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ _cleanup_free_ char *trigger = NULL, *service = NULL;
+ const char *object = NULL;
+ int r;
+
+ assert(bus);
+
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_oom();
+
+ if (arg_unit) {
+ switch (unit_name_to_type(arg_unit)) {
+
+ case UNIT_SERVICE:
+ service = strdup(arg_unit);
+ if (!service)
+ return log_oom();
+
+ r = unit_name_change_suffix(service, suffix, &trigger);
+ if (r < 0)
+ return log_error_errno(r, "Failed to change unit suffix: %m");
+ break;
+
+ case UNIT_TIMER:
+ trigger = strdup(arg_unit);
+ if (!trigger)
+ return log_oom();
+
+ r = unit_name_change_suffix(trigger, ".service", &service);
+ if (r < 0)
+ return log_error_errno(r, "Failed to change unit suffix: %m");
+ break;
+
+ default:
+ r = unit_name_mangle_with_suffix(arg_unit, "as unit",
+ arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN,
+ ".service", &service);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle unit name: %m");
+
+ r = unit_name_mangle_with_suffix(arg_unit, "as trigger",
+ arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN,
+ suffix, &trigger);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle unit name: %m");
+
+ break;
+ }
+ } else {
+ r = make_unit_name(bus, UNIT_SERVICE, &service);
+ if (r < 0)
+ return r;
+
+ r = unit_name_change_suffix(service, suffix, &trigger);
+ if (r < 0)
+ return log_error_errno(r, "Failed to change unit suffix: %m");
+ }
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, "StartTransientUnit");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_set_allow_interactive_authorization(m, arg_ask_password);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Name and Mode */
+ r = sd_bus_message_append(m, "ss", trigger, "fail");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Properties */
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (streq(suffix, ".path"))
+ r = transient_unit_set_properties(m, UNIT_PATH, arg_path_property);
+ else if (streq(suffix, ".socket"))
+ r = transient_unit_set_properties(m, UNIT_SOCKET, arg_socket_property);
+ else if (streq(suffix, ".timer"))
+ r = transient_timer_set_properties(m);
+ else
+ assert_not_reached("Invalid suffix");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(sa(sv))");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (!strv_isempty(arg_cmdline)) {
+ r = sd_bus_message_open_container(m, 'r', "sa(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", service);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = transient_service_set_properties(m, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start transient %s unit: %s", suffix + 1, bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "o", &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = bus_wait_for_jobs_one(w, object, arg_quiet);
+ if (r < 0)
+ return r;
+
+ if (!arg_quiet) {
+ log_info("Running %s as unit: %s", suffix + 1, trigger);
+ if (!strv_isempty(arg_cmdline))
+ log_info("Will run service as unit: %s", service);
+ }
+
+ return 0;
+}
+
+static int run(int argc, char* argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *description = NULL;
+ int r, retval = EXIT_SUCCESS;
+
+ log_show_color(true);
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (!strv_isempty(arg_cmdline) &&
+ arg_transport == BUS_TRANSPORT_LOCAL &&
+ !strv_find_startswith(arg_property, "RootDirectory=") &&
+ !strv_find_startswith(arg_property, "RootImage=")) {
+ /* Patch in an absolute path to fail early for user convenience, but only when we can do it
+ * (i.e. we will be running from the same file system). This also uses the user's $PATH,
+ * while we use a fixed search path in the manager. */
+
+ _cleanup_free_ char *command = NULL;
+ r = find_executable(arg_cmdline[0], &command);
+ if (r < 0)
+ return log_error_errno(r, "Failed to find executable %s: %m", arg_cmdline[0]);
+
+ free_and_replace(arg_cmdline[0], command);
+ }
+
+ if (!arg_description) {
+ description = strv_join(arg_cmdline, " ");
+ if (!description)
+ return log_oom();
+
+ if (arg_unit && isempty(description)) {
+ r = free_and_strdup(&description, arg_unit);
+ if (r < 0)
+ return log_oom();
+ }
+
+ arg_description = description;
+ }
+
+ /* If --wait is used connect via the bus, unconditionally, as ref/unref is not supported via the limited direct
+ * connection */
+ if (arg_wait || arg_stdio != ARG_STDIO_NONE)
+ r = bus_connect_transport(arg_transport, arg_host, arg_user, &bus);
+ else
+ r = bus_connect_transport_systemd(arg_transport, arg_host, arg_user, &bus);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ if (arg_scope)
+ r = start_transient_scope(bus);
+ else if (arg_path_property)
+ r = start_transient_trigger(bus, ".path");
+ else if (arg_socket_property)
+ r = start_transient_trigger(bus, ".socket");
+ else if (arg_with_timer)
+ r = start_transient_trigger(bus, ".timer");
+ else
+ r = start_transient_service(bus, &retval);
+ if (r < 0)
+ return r;
+
+ return retval;
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/shared/acl-util.c b/src/shared/acl-util.c
new file mode 100644
index 0000000..ef4b883
--- /dev/null
+++ b/src/shared/acl-util.c
@@ -0,0 +1,437 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "acl-util.h"
+#include "alloc-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+int acl_find_uid(acl_t acl, uid_t uid, acl_entry_t *ret_entry) {
+ acl_entry_t i;
+ int r;
+
+ assert(acl);
+ assert(uid_is_valid(uid));
+ assert(ret_entry);
+
+ for (r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(acl, ACL_NEXT_ENTRY, &i)) {
+
+ acl_tag_t tag;
+ uid_t *u;
+ bool b;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if (tag != ACL_USER)
+ continue;
+
+ u = acl_get_qualifier(i);
+ if (!u)
+ return -errno;
+
+ b = *u == uid;
+ acl_free(u);
+
+ if (b) {
+ *ret_entry = i;
+ return 1;
+ }
+ }
+ if (r < 0)
+ return -errno;
+
+ *ret_entry = NULL;
+ return 0;
+}
+
+int calc_acl_mask_if_needed(acl_t *acl_p) {
+ acl_entry_t i;
+ int r;
+ bool need = false;
+
+ assert(acl_p);
+
+ for (r = acl_get_entry(*acl_p, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(*acl_p, ACL_NEXT_ENTRY, &i)) {
+ acl_tag_t tag;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if (tag == ACL_MASK)
+ return 0;
+
+ if (IN_SET(tag, ACL_USER, ACL_GROUP))
+ need = true;
+ }
+ if (r < 0)
+ return -errno;
+
+ if (need && acl_calc_mask(acl_p) < 0)
+ return -errno;
+
+ return need;
+}
+
+int add_base_acls_if_needed(acl_t *acl_p, const char *path) {
+ acl_entry_t i;
+ int r;
+ bool have_user_obj = false, have_group_obj = false, have_other = false;
+ struct stat st;
+ _cleanup_(acl_freep) acl_t basic = NULL;
+
+ assert(acl_p);
+
+ for (r = acl_get_entry(*acl_p, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(*acl_p, ACL_NEXT_ENTRY, &i)) {
+ acl_tag_t tag;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if (tag == ACL_USER_OBJ)
+ have_user_obj = true;
+ else if (tag == ACL_GROUP_OBJ)
+ have_group_obj = true;
+ else if (tag == ACL_OTHER)
+ have_other = true;
+ if (have_user_obj && have_group_obj && have_other)
+ return 0;
+ }
+ if (r < 0)
+ return -errno;
+
+ r = stat(path, &st);
+ if (r < 0)
+ return -errno;
+
+ basic = acl_from_mode(st.st_mode);
+ if (!basic)
+ return -errno;
+
+ for (r = acl_get_entry(basic, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(basic, ACL_NEXT_ENTRY, &i)) {
+ acl_tag_t tag;
+ acl_entry_t dst;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if ((tag == ACL_USER_OBJ && have_user_obj) ||
+ (tag == ACL_GROUP_OBJ && have_group_obj) ||
+ (tag == ACL_OTHER && have_other))
+ continue;
+
+ r = acl_create_entry(acl_p, &dst);
+ if (r < 0)
+ return -errno;
+
+ r = acl_copy_entry(dst, i);
+ if (r < 0)
+ return -errno;
+ }
+ if (r < 0)
+ return -errno;
+ return 0;
+}
+
+int acl_search_groups(const char *path, char ***ret_groups) {
+ _cleanup_strv_free_ char **g = NULL;
+ _cleanup_(acl_freep) acl_t acl = NULL;
+ bool ret = false;
+ acl_entry_t entry;
+ int r;
+
+ assert(path);
+
+ acl = acl_get_file(path, ACL_TYPE_DEFAULT);
+ if (!acl)
+ return -errno;
+
+ r = acl_get_entry(acl, ACL_FIRST_ENTRY, &entry);
+ for (;;) {
+ _cleanup_(acl_free_gid_tpp) gid_t *gid = NULL;
+ acl_tag_t tag;
+
+ if (r < 0)
+ return -errno;
+ if (r == 0)
+ break;
+
+ if (acl_get_tag_type(entry, &tag) < 0)
+ return -errno;
+
+ if (tag != ACL_GROUP)
+ goto next;
+
+ gid = acl_get_qualifier(entry);
+ if (!gid)
+ return -errno;
+
+ if (in_gid(*gid) > 0) {
+ if (!ret_groups)
+ return true;
+
+ ret = true;
+ }
+
+ if (ret_groups) {
+ char *name;
+
+ name = gid_to_name(*gid);
+ if (!name)
+ return -ENOMEM;
+
+ r = strv_consume(&g, name);
+ if (r < 0)
+ return r;
+ }
+
+ next:
+ r = acl_get_entry(acl, ACL_NEXT_ENTRY, &entry);
+ }
+
+ if (ret_groups)
+ *ret_groups = TAKE_PTR(g);
+
+ return ret;
+}
+
+int parse_acl(const char *text, acl_t *acl_access, acl_t *acl_default, bool want_mask) {
+ _cleanup_free_ char **a = NULL, **d = NULL; /* strings are not freed */
+ _cleanup_strv_free_ char **split;
+ char **entry;
+ int r = -EINVAL;
+ _cleanup_(acl_freep) acl_t a_acl = NULL, d_acl = NULL;
+
+ split = strv_split(text, ",");
+ if (!split)
+ return -ENOMEM;
+
+ STRV_FOREACH(entry, split) {
+ char *p;
+
+ p = STARTSWITH_SET(*entry, "default:", "d:");
+ if (p)
+ r = strv_push(&d, p);
+ else
+ r = strv_push(&a, *entry);
+ if (r < 0)
+ return r;
+ }
+
+ if (!strv_isempty(a)) {
+ _cleanup_free_ char *join;
+
+ join = strv_join(a, ",");
+ if (!join)
+ return -ENOMEM;
+
+ a_acl = acl_from_text(join);
+ if (!a_acl)
+ return -errno;
+
+ if (want_mask) {
+ r = calc_acl_mask_if_needed(&a_acl);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (!strv_isempty(d)) {
+ _cleanup_free_ char *join;
+
+ join = strv_join(d, ",");
+ if (!join)
+ return -ENOMEM;
+
+ d_acl = acl_from_text(join);
+ if (!d_acl)
+ return -errno;
+
+ if (want_mask) {
+ r = calc_acl_mask_if_needed(&d_acl);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ *acl_access = TAKE_PTR(a_acl);
+ *acl_default = TAKE_PTR(d_acl);
+
+ return 0;
+}
+
+static int acl_entry_equal(acl_entry_t a, acl_entry_t b) {
+ acl_tag_t tag_a, tag_b;
+
+ if (acl_get_tag_type(a, &tag_a) < 0)
+ return -errno;
+
+ if (acl_get_tag_type(b, &tag_b) < 0)
+ return -errno;
+
+ if (tag_a != tag_b)
+ return false;
+
+ switch (tag_a) {
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ /* can have only one of those */
+ return true;
+ case ACL_USER: {
+ _cleanup_(acl_free_uid_tpp) uid_t *uid_a = NULL, *uid_b = NULL;
+
+ uid_a = acl_get_qualifier(a);
+ if (!uid_a)
+ return -errno;
+
+ uid_b = acl_get_qualifier(b);
+ if (!uid_b)
+ return -errno;
+
+ return *uid_a == *uid_b;
+ }
+ case ACL_GROUP: {
+ _cleanup_(acl_free_gid_tpp) gid_t *gid_a = NULL, *gid_b = NULL;
+
+ gid_a = acl_get_qualifier(a);
+ if (!gid_a)
+ return -errno;
+
+ gid_b = acl_get_qualifier(b);
+ if (!gid_b)
+ return -errno;
+
+ return *gid_a == *gid_b;
+ }
+ default:
+ assert_not_reached("Unknown acl tag type");
+ }
+}
+
+static int find_acl_entry(acl_t acl, acl_entry_t entry, acl_entry_t *out) {
+ acl_entry_t i;
+ int r;
+
+ for (r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(acl, ACL_NEXT_ENTRY, &i)) {
+
+ r = acl_entry_equal(i, entry);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ *out = i;
+ return 1;
+ }
+ }
+ if (r < 0)
+ return -errno;
+ return 0;
+}
+
+int acls_for_file(const char *path, acl_type_t type, acl_t new, acl_t *acl) {
+ _cleanup_(acl_freep) acl_t old;
+ acl_entry_t i;
+ int r;
+
+ old = acl_get_file(path, type);
+ if (!old)
+ return -errno;
+
+ for (r = acl_get_entry(new, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(new, ACL_NEXT_ENTRY, &i)) {
+
+ acl_entry_t j;
+
+ r = find_acl_entry(old, i, &j);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ if (acl_create_entry(&old, &j) < 0)
+ return -errno;
+
+ if (acl_copy_entry(j, i) < 0)
+ return -errno;
+ }
+ if (r < 0)
+ return -errno;
+
+ *acl = TAKE_PTR(old);
+
+ return 0;
+}
+
+/* POSIX says that ACL_{READ,WRITE,EXECUTE} don't have to be bitmasks. But that is a natural thing to do and
+ * all extant implementations do it. Let's make sure that we fail verbosely in the (imho unlikely) scenario
+ * that we get a new implementation that does not satisfy this. */
+assert_cc(!(ACL_READ & ACL_WRITE));
+assert_cc(!(ACL_WRITE & ACL_EXECUTE));
+assert_cc(!(ACL_EXECUTE & ACL_READ));
+assert_cc((unsigned) ACL_READ == ACL_READ);
+assert_cc((unsigned) ACL_WRITE == ACL_WRITE);
+assert_cc((unsigned) ACL_EXECUTE == ACL_EXECUTE);
+
+int fd_add_uid_acl_permission(
+ int fd,
+ uid_t uid,
+ unsigned mask) {
+
+ _cleanup_(acl_freep) acl_t acl = NULL;
+ acl_permset_t permset;
+ acl_entry_t entry;
+ int r;
+
+ /* Adds an ACL entry for the specified file to allow the indicated access to the specified
+ * user. Operates purely incrementally. */
+
+ assert(fd >= 0);
+ assert(uid_is_valid(uid));
+
+ acl = acl_get_fd(fd);
+ if (!acl)
+ return -errno;
+
+ r = acl_find_uid(acl, uid, &entry);
+ if (r <= 0) {
+ if (acl_create_entry(&acl, &entry) < 0 ||
+ acl_set_tag_type(entry, ACL_USER) < 0 ||
+ acl_set_qualifier(entry, &uid) < 0)
+ return -errno;
+ }
+
+ if (acl_get_permset(entry, &permset) < 0)
+ return -errno;
+
+ if ((mask & ACL_READ) && acl_add_perm(permset, ACL_READ) < 0)
+ return -errno;
+ if ((mask & ACL_WRITE) && acl_add_perm(permset, ACL_WRITE) < 0)
+ return -errno;
+ if ((mask & ACL_EXECUTE) && acl_add_perm(permset, ACL_EXECUTE) < 0)
+ return -errno;
+
+ r = calc_acl_mask_if_needed(&acl);
+ if (r < 0)
+ return r;
+
+ if (acl_set_fd(fd, acl) < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/shared/acl-util.h b/src/shared/acl-util.h
new file mode 100644
index 0000000..837e869
--- /dev/null
+++ b/src/shared/acl-util.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <errno.h>
+#include <unistd.h>
+
+#if HAVE_ACL
+#include <acl/libacl.h>
+#include <stdbool.h>
+#include <sys/acl.h>
+
+#include "macro.h"
+
+int acl_find_uid(acl_t acl, uid_t uid, acl_entry_t *entry);
+int calc_acl_mask_if_needed(acl_t *acl_p);
+int add_base_acls_if_needed(acl_t *acl_p, const char *path);
+int acl_search_groups(const char* path, char ***ret_groups);
+int parse_acl(const char *text, acl_t *acl_access, acl_t *acl_default, bool want_mask);
+int acls_for_file(const char *path, acl_type_t type, acl_t new, acl_t *acl);
+int fd_add_uid_acl_permission(int fd, uid_t uid, unsigned mask);
+
+/* acl_free takes multiple argument types.
+ * Multiple cleanup functions are necessary. */
+DEFINE_TRIVIAL_CLEANUP_FUNC(acl_t, acl_free);
+#define acl_free_charp acl_free
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, acl_free_charp);
+#define acl_free_uid_tp acl_free
+DEFINE_TRIVIAL_CLEANUP_FUNC(uid_t*, acl_free_uid_tp);
+#define acl_free_gid_tp acl_free
+DEFINE_TRIVIAL_CLEANUP_FUNC(gid_t*, acl_free_gid_tp);
+
+#else
+#define ACL_READ 0x04
+#define ACL_WRITE 0x02
+#define ACL_EXECUTE 0x01
+
+static inline int fd_add_uid_acl_permission(int fd, uid_t uid, unsigned mask) {
+ return -EOPNOTSUPP;
+}
+#endif
diff --git a/src/shared/acpi-fpdt.c b/src/shared/acpi-fpdt.c
new file mode 100644
index 0000000..1124453
--- /dev/null
+++ b/src/shared/acpi-fpdt.c
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "acpi-fpdt.h"
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "time-util.h"
+
+struct acpi_table_header {
+ char signature[4];
+ uint32_t length;
+ uint8_t revision;
+ uint8_t checksum;
+ char oem_id[6];
+ char oem_table_id[8];
+ uint32_t oem_revision;
+ char asl_compiler_id[4];
+ uint32_t asl_compiler_revision;
+} _packed_;
+
+enum {
+ ACPI_FPDT_TYPE_BOOT = 0,
+ ACPI_FPDT_TYPE_S3PERF = 1,
+};
+
+struct acpi_fpdt_header {
+ uint16_t type;
+ uint8_t length;
+ uint8_t revision;
+ uint8_t reserved[4];
+ uint64_t ptr;
+} _packed_;
+
+struct acpi_fpdt_boot_header {
+ char signature[4];
+ uint32_t length;
+} _packed_;
+
+enum {
+ ACPI_FPDT_S3PERF_RESUME_REC = 0,
+ ACPI_FPDT_S3PERF_SUSPEND_REC = 1,
+ ACPI_FPDT_BOOT_REC = 2,
+};
+
+struct acpi_fpdt_boot {
+ uint16_t type;
+ uint8_t length;
+ uint8_t revision;
+ uint8_t reserved[4];
+ uint64_t reset_end;
+ uint64_t load_start;
+ uint64_t startup_start;
+ uint64_t exit_services_entry;
+ uint64_t exit_services_exit;
+} _packed;
+
+int acpi_get_boot_usec(usec_t *loader_start, usec_t *loader_exit) {
+ _cleanup_free_ char *buf = NULL;
+ struct acpi_table_header *tbl;
+ size_t l = 0;
+ struct acpi_fpdt_header *rec;
+ int r;
+ uint64_t ptr = 0;
+ _cleanup_close_ int fd = -1;
+ struct acpi_fpdt_boot_header hbrec;
+ struct acpi_fpdt_boot brec;
+
+ r = read_full_file("/sys/firmware/acpi/tables/FPDT", &buf, &l);
+ if (r < 0)
+ return r;
+
+ if (l < sizeof(struct acpi_table_header) + sizeof(struct acpi_fpdt_header))
+ return -EINVAL;
+
+ tbl = (struct acpi_table_header *)buf;
+ if (l != tbl->length)
+ return -EINVAL;
+
+ if (memcmp(tbl->signature, "FPDT", 4) != 0)
+ return -EINVAL;
+
+ /* find Firmware Basic Boot Performance Pointer Record */
+ for (rec = (struct acpi_fpdt_header *)(buf + sizeof(struct acpi_table_header));
+ (char *)rec < buf + l;
+ rec = (struct acpi_fpdt_header *)((char *)rec + rec->length)) {
+ if (rec->length <= 0)
+ break;
+ if (rec->type != ACPI_FPDT_TYPE_BOOT)
+ continue;
+ if (rec->length != sizeof(struct acpi_fpdt_header))
+ continue;
+
+ ptr = rec->ptr;
+ break;
+ }
+
+ if (ptr == 0)
+ return -ENODATA;
+
+ /* read Firmware Basic Boot Performance Data Record */
+ fd = open("/dev/mem", O_CLOEXEC|O_RDONLY);
+ if (fd < 0)
+ return -errno;
+
+ l = pread(fd, &hbrec, sizeof(struct acpi_fpdt_boot_header), ptr);
+ if (l != sizeof(struct acpi_fpdt_boot_header))
+ return -EINVAL;
+
+ if (memcmp(hbrec.signature, "FBPT", 4) != 0)
+ return -EINVAL;
+
+ if (hbrec.length < sizeof(struct acpi_fpdt_boot_header) + sizeof(struct acpi_fpdt_boot))
+ return -EINVAL;
+
+ l = pread(fd, &brec, sizeof(struct acpi_fpdt_boot), ptr + sizeof(struct acpi_fpdt_boot_header));
+ if (l != sizeof(struct acpi_fpdt_boot))
+ return -EINVAL;
+
+ if (brec.length != sizeof(struct acpi_fpdt_boot))
+ return -EINVAL;
+
+ if (brec.type != ACPI_FPDT_BOOT_REC)
+ return -EINVAL;
+
+ if (brec.exit_services_exit == 0)
+ /* Non-UEFI compatible boot. */
+ return -ENODATA;
+
+ if (brec.startup_start == 0 || brec.exit_services_exit < brec.startup_start)
+ return -EINVAL;
+ if (brec.exit_services_exit > NSEC_PER_HOUR)
+ return -EINVAL;
+
+ if (loader_start)
+ *loader_start = brec.startup_start / 1000;
+ if (loader_exit)
+ *loader_exit = brec.exit_services_exit / 1000;
+
+ return 0;
+}
diff --git a/src/shared/acpi-fpdt.h b/src/shared/acpi-fpdt.h
new file mode 100644
index 0000000..9eef92b
--- /dev/null
+++ b/src/shared/acpi-fpdt.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <time-util.h>
+
+int acpi_get_boot_usec(usec_t *loader_start, usec_t *loader_exit);
diff --git a/src/shared/apparmor-util.c b/src/shared/apparmor-util.c
new file mode 100644
index 0000000..68e1c55
--- /dev/null
+++ b/src/shared/apparmor-util.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stddef.h>
+
+#include "alloc-util.h"
+#include "apparmor-util.h"
+#include "fileio.h"
+#include "parse-util.h"
+
+bool mac_apparmor_use(void) {
+ static int cached_use = -1;
+
+ if (cached_use < 0) {
+ _cleanup_free_ char *p = NULL;
+
+ cached_use =
+ read_one_line_file("/sys/module/apparmor/parameters/enabled", &p) >= 0 &&
+ parse_boolean(p) > 0;
+ }
+
+ return cached_use;
+}
diff --git a/src/shared/apparmor-util.h b/src/shared/apparmor-util.h
new file mode 100644
index 0000000..8007aeb
--- /dev/null
+++ b/src/shared/apparmor-util.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+bool mac_apparmor_use(void);
diff --git a/src/shared/ask-password-api.c b/src/shared/ask-password-api.c
new file mode 100644
index 0000000..8d66f9f
--- /dev/null
+++ b/src/shared/ask-password-api.c
@@ -0,0 +1,1050 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/inotify.h>
+#include <sys/signalfd.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "ask-password-api.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "locale-util.h"
+#include "log.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "missing_syscall.h"
+#include "mkdir.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+#include "utf8.h"
+
+#define KEYRING_TIMEOUT_USEC ((5 * USEC_PER_MINUTE) / 2)
+
+static int lookup_key(const char *keyname, key_serial_t *ret) {
+ key_serial_t serial;
+
+ assert(keyname);
+ assert(ret);
+
+ serial = request_key("user", keyname, NULL, 0);
+ if (serial == -1)
+ return negative_errno();
+
+ *ret = serial;
+ return 0;
+}
+
+static int retrieve_key(key_serial_t serial, char ***ret) {
+ size_t nfinal, m = 100;
+ char **l;
+ _cleanup_(erase_and_freep) char *pfinal = NULL;
+
+ assert(ret);
+
+ for (;;) {
+ _cleanup_(erase_and_freep) char *p = NULL;
+ long n;
+
+ p = new(char, m);
+ if (!p)
+ return -ENOMEM;
+
+ n = keyctl(KEYCTL_READ, (unsigned long) serial, (unsigned long) p, (unsigned long) m, 0);
+ if (n < 0)
+ return -errno;
+ if ((size_t) n <= m) {
+ nfinal = (size_t) n;
+ pfinal = TAKE_PTR(p);
+ break;
+ }
+
+ if (m > LONG_MAX / 2) /* overflow check */
+ return -ENOMEM;
+ m *= 2;
+ }
+
+ l = strv_parse_nulstr(pfinal, nfinal);
+ if (!l)
+ return -ENOMEM;
+
+ *ret = l;
+ return 0;
+}
+
+static int add_to_keyring(const char *keyname, AskPasswordFlags flags, char **passwords) {
+ _cleanup_strv_free_erase_ char **l = NULL;
+ _cleanup_(erase_and_freep) char *p = NULL;
+ key_serial_t serial;
+ size_t n;
+ int r;
+
+ assert(keyname);
+
+ if (!(flags & ASK_PASSWORD_PUSH_CACHE))
+ return 0;
+ if (strv_isempty(passwords))
+ return 0;
+
+ r = lookup_key(keyname, &serial);
+ if (r >= 0) {
+ r = retrieve_key(serial, &l);
+ if (r < 0)
+ return r;
+ } else if (r != -ENOKEY)
+ return r;
+
+ r = strv_extend_strv(&l, passwords, true);
+ if (r <= 0)
+ return r;
+
+ r = strv_make_nulstr(l, &p, &n);
+ if (r < 0)
+ return r;
+
+ serial = add_key("user", keyname, p, n, KEY_SPEC_USER_KEYRING);
+ if (serial == -1)
+ return -errno;
+
+ if (keyctl(KEYCTL_SET_TIMEOUT,
+ (unsigned long) serial,
+ (unsigned long) DIV_ROUND_UP(KEYRING_TIMEOUT_USEC, USEC_PER_SEC), 0, 0) < 0)
+ log_debug_errno(errno, "Failed to adjust kernel keyring key timeout: %m");
+
+ /* Tell everyone to check the keyring */
+ (void) touch("/run/systemd/ask-password");
+
+ log_debug("Added key to kernel keyring as %" PRIi32 ".", serial);
+
+ return 1;
+}
+
+static int add_to_keyring_and_log(const char *keyname, AskPasswordFlags flags, char **passwords) {
+ int r;
+
+ assert(keyname);
+
+ r = add_to_keyring(keyname, flags, passwords);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add password to kernel keyring: %m");
+
+ return 0;
+}
+
+static int ask_password_keyring(const char *keyname, AskPasswordFlags flags, char ***ret) {
+
+ key_serial_t serial;
+ int r;
+
+ assert(keyname);
+ assert(ret);
+
+ if (!(flags & ASK_PASSWORD_ACCEPT_CACHED))
+ return -EUNATCH;
+
+ r = lookup_key(keyname, &serial);
+ if (ERRNO_IS_NOT_SUPPORTED(r) || r == -EPERM) /* when retrieving the distinction between "kernel or
+ * container manager don't support or allow this" and
+ * "no matching key known" doesn't matter. Note that we
+ * propagate EACCESS here (even if EPERM not) since
+ * that is used if the keyring is available but we lack
+ * access to the key. */
+ return -ENOKEY;
+ if (r < 0)
+ return r;
+
+ return retrieve_key(serial, ret);
+}
+
+static int backspace_chars(int ttyfd, size_t p) {
+ if (ttyfd < 0)
+ return 0;
+
+ _cleanup_free_ char *buf = malloc_multiply(3, p);
+ if (!buf)
+ return log_oom();
+
+ for (size_t i = 0; i < p; i++)
+ memcpy(buf + 3 * i, "\b \b", 3);
+
+ return loop_write(ttyfd, buf, 3*p, false);
+}
+
+static int backspace_string(int ttyfd, const char *str) {
+ assert(str);
+
+ /* Backspaces through enough characters to entirely undo printing of the specified string. */
+
+ if (ttyfd < 0)
+ return 0;
+
+ size_t m = utf8_n_codepoints(str);
+ if (m == (size_t) -1)
+ m = strlen(str); /* Not a valid UTF-8 string? If so, let's backspace the number of bytes
+ * output. Most likely this happened because we are not in an UTF-8 locale,
+ * and in that case that is the correct thing to do. And even if it's not,
+ * terminals tend to stop backspacing at the leftmost column, hence
+ * backspacing too much should be mostly OK. */
+
+ return backspace_chars(ttyfd, m);
+}
+
+int ask_password_plymouth(
+ const char *message,
+ usec_t until,
+ AskPasswordFlags flags,
+ const char *flag_file,
+ char ***ret) {
+
+ static const union sockaddr_union sa = PLYMOUTH_SOCKET;
+ _cleanup_close_ int fd = -1, notify = -1;
+ _cleanup_free_ char *packet = NULL;
+ ssize_t k;
+ int r, n;
+ struct pollfd pollfd[2] = {};
+ char buffer[LINE_MAX];
+ size_t p = 0;
+ enum {
+ POLL_SOCKET,
+ POLL_INOTIFY
+ };
+
+ assert(ret);
+
+ if (!message)
+ message = "Password:";
+
+ if (flag_file) {
+ notify = inotify_init1(IN_CLOEXEC|IN_NONBLOCK);
+ if (notify < 0)
+ return -errno;
+
+ r = inotify_add_watch(notify, flag_file, IN_ATTRIB); /* for the link count */
+ if (r < 0)
+ return -errno;
+ }
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return -errno;
+
+ r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ if (r < 0)
+ return -errno;
+
+ if (flags & ASK_PASSWORD_ACCEPT_CACHED) {
+ packet = strdup("c");
+ n = 1;
+ } else if (asprintf(&packet, "*\002%c%s%n", (int) (strlen(message) + 1), message, &n) < 0)
+ packet = NULL;
+ if (!packet)
+ return -ENOMEM;
+
+ r = loop_write(fd, packet, n + 1, true);
+ if (r < 0)
+ return r;
+
+ pollfd[POLL_SOCKET].fd = fd;
+ pollfd[POLL_SOCKET].events = POLLIN;
+ pollfd[POLL_INOTIFY].fd = notify;
+ pollfd[POLL_INOTIFY].events = POLLIN;
+
+ for (;;) {
+ int sleep_for = -1, j;
+
+ if (until > 0) {
+ usec_t y;
+
+ y = now(CLOCK_MONOTONIC);
+
+ if (y > until) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ sleep_for = (int) ((until - y) / USEC_PER_MSEC);
+ }
+
+ if (flag_file && access(flag_file, F_OK) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ j = poll(pollfd, notify >= 0 ? 2 : 1, sleep_for);
+ if (j < 0) {
+ if (errno == EINTR)
+ continue;
+
+ r = -errno;
+ goto finish;
+ } else if (j == 0) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ if (pollfd[POLL_SOCKET].revents & POLLNVAL ||
+ (notify >= 0 && pollfd[POLL_INOTIFY].revents & POLLNVAL)) {
+ r = -EBADF;
+ goto finish;
+ }
+
+ if (notify >= 0 && pollfd[POLL_INOTIFY].revents != 0)
+ (void) flush_fd(notify);
+
+ if (pollfd[POLL_SOCKET].revents == 0)
+ continue;
+
+ k = read(fd, buffer + p, sizeof(buffer) - p);
+ if (k < 0) {
+ if (IN_SET(errno, EINTR, EAGAIN))
+ continue;
+
+ r = -errno;
+ goto finish;
+ } else if (k == 0) {
+ r = -EIO;
+ goto finish;
+ }
+
+ p += k;
+
+ if (buffer[0] == 5) {
+
+ if (flags & ASK_PASSWORD_ACCEPT_CACHED) {
+ /* Hmm, first try with cached
+ * passwords failed, so let's retry
+ * with a normal password request */
+ packet = mfree(packet);
+
+ if (asprintf(&packet, "*\002%c%s%n", (int) (strlen(message) + 1), message, &n) < 0) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ r = loop_write(fd, packet, n+1, true);
+ if (r < 0)
+ goto finish;
+
+ flags &= ~ASK_PASSWORD_ACCEPT_CACHED;
+ p = 0;
+ continue;
+ }
+
+ /* No password, because UI not shown */
+ r = -ENOENT;
+ goto finish;
+
+ } else if (IN_SET(buffer[0], 2, 9)) {
+ uint32_t size;
+ char **l;
+
+ /* One or more answers */
+ if (p < 5)
+ continue;
+
+ memcpy(&size, buffer+1, sizeof(size));
+ size = le32toh(size);
+ if (size + 5 > sizeof(buffer)) {
+ r = -EIO;
+ goto finish;
+ }
+
+ if (p-5 < size)
+ continue;
+
+ l = strv_parse_nulstr(buffer + 5, size);
+ if (!l) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ *ret = l;
+ break;
+
+ } else {
+ /* Unknown packet */
+ r = -EIO;
+ goto finish;
+ }
+ }
+
+ r = 0;
+
+finish:
+ explicit_bzero_safe(buffer, sizeof(buffer));
+ return r;
+}
+
+#define NO_ECHO "(no echo) "
+#define PRESS_TAB "(press TAB for no echo) "
+#define SKIPPED "(skipped)"
+
+int ask_password_tty(
+ int ttyfd,
+ const char *message,
+ const char *keyname,
+ usec_t until,
+ AskPasswordFlags flags,
+ const char *flag_file,
+ char ***ret) {
+
+ enum {
+ POLL_TTY,
+ POLL_INOTIFY,
+ _POLL_MAX,
+ };
+
+ bool reset_tty = false, dirty = false, use_color = false, press_tab_visible = false;
+ _cleanup_close_ int cttyfd = -1, notify = -1;
+ struct termios old_termios, new_termios;
+ char passphrase[LINE_MAX + 1] = {}, *x;
+ _cleanup_strv_free_erase_ char **l = NULL;
+ struct pollfd pollfd[_POLL_MAX];
+ size_t p = 0, codepoint = 0;
+ int r;
+
+ assert(ret);
+
+ if (flags & ASK_PASSWORD_NO_TTY)
+ return -EUNATCH;
+
+ if (!message)
+ message = "Password:";
+
+ if (emoji_enabled())
+ message = strjoina(special_glyph(SPECIAL_GLYPH_LOCK_AND_KEY), " ", message);
+
+ if (flag_file || ((flags & ASK_PASSWORD_ACCEPT_CACHED) && keyname)) {
+ notify = inotify_init1(IN_CLOEXEC|IN_NONBLOCK);
+ if (notify < 0)
+ return -errno;
+ }
+ if (flag_file) {
+ if (inotify_add_watch(notify, flag_file, IN_ATTRIB /* for the link count */) < 0)
+ return -errno;
+ }
+ if ((flags & ASK_PASSWORD_ACCEPT_CACHED) && keyname) {
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r >= 0)
+ return 0;
+ else if (r != -ENOKEY)
+ return r;
+
+ if (inotify_add_watch(notify, "/run/systemd/ask-password", IN_ATTRIB /* for mtime */) < 0)
+ return -errno;
+ }
+
+ /* If the caller didn't specify a TTY, then use the controlling tty, if we can. */
+ if (ttyfd < 0)
+ ttyfd = cttyfd = open("/dev/tty", O_RDWR|O_NOCTTY|O_CLOEXEC);
+
+ if (ttyfd >= 0) {
+ if (tcgetattr(ttyfd, &old_termios) < 0)
+ return -errno;
+
+ if (flags & ASK_PASSWORD_CONSOLE_COLOR)
+ use_color = dev_console_colors_enabled();
+ else
+ use_color = colors_enabled();
+
+ if (use_color)
+ (void) loop_write(ttyfd, ANSI_HIGHLIGHT, STRLEN(ANSI_HIGHLIGHT), false);
+
+ (void) loop_write(ttyfd, message, strlen(message), false);
+ (void) loop_write(ttyfd, " ", 1, false);
+
+ if (!(flags & ASK_PASSWORD_SILENT) && !(flags & ASK_PASSWORD_ECHO)) {
+ if (use_color)
+ (void) loop_write(ttyfd, ANSI_GREY, STRLEN(ANSI_GREY), false);
+ (void) loop_write(ttyfd, PRESS_TAB, strlen(PRESS_TAB), false);
+ press_tab_visible = true;
+ }
+
+ if (use_color)
+ (void) loop_write(ttyfd, ANSI_NORMAL, STRLEN(ANSI_NORMAL), false);
+
+ new_termios = old_termios;
+ new_termios.c_lflag &= ~(ICANON|ECHO);
+ new_termios.c_cc[VMIN] = 1;
+ new_termios.c_cc[VTIME] = 0;
+
+ if (tcsetattr(ttyfd, TCSADRAIN, &new_termios) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ reset_tty = true;
+ }
+
+ pollfd[POLL_TTY] = (struct pollfd) {
+ .fd = ttyfd >= 0 ? ttyfd : STDIN_FILENO,
+ .events = POLLIN,
+ };
+ pollfd[POLL_INOTIFY] = (struct pollfd) {
+ .fd = notify,
+ .events = POLLIN,
+ };
+
+ for (;;) {
+ _cleanup_(erase_char) char c;
+ int sleep_for = -1, k;
+ ssize_t n;
+
+ if (until > 0) {
+ usec_t y;
+
+ y = now(CLOCK_MONOTONIC);
+
+ if (y > until) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ sleep_for = (int) DIV_ROUND_UP(until - y, USEC_PER_MSEC);
+ }
+
+ if (flag_file)
+ if (access(flag_file, F_OK) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ k = poll(pollfd, notify >= 0 ? 2 : 1, sleep_for);
+ if (k < 0) {
+ if (errno == EINTR)
+ continue;
+
+ r = -errno;
+ goto finish;
+ } else if (k == 0) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ if ((pollfd[POLL_TTY].revents & POLLNVAL) ||
+ (notify >= 0 && (pollfd[POLL_INOTIFY].revents & POLLNVAL))) {
+ r = -EBADF;
+ goto finish;
+ }
+
+ if (notify >= 0 && pollfd[POLL_INOTIFY].revents != 0 && keyname) {
+ (void) flush_fd(notify);
+
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r >= 0) {
+ r = 0;
+ goto finish;
+ } else if (r != -ENOKEY)
+ goto finish;
+ }
+
+ if (pollfd[POLL_TTY].revents == 0)
+ continue;
+
+ n = read(ttyfd >= 0 ? ttyfd : STDIN_FILENO, &c, 1);
+ if (n < 0) {
+ if (IN_SET(errno, EINTR, EAGAIN))
+ continue;
+
+ r = -errno;
+ goto finish;
+
+ }
+
+ if (press_tab_visible) {
+ assert(ttyfd >= 0);
+ backspace_chars(ttyfd, strlen(PRESS_TAB));
+ press_tab_visible = false;
+ }
+
+ /* We treat EOF, newline and NUL byte all as valid end markers */
+ if (n == 0 || c == '\n' || c == 0)
+ break;
+
+ if (c == 4) { /* C-d also known as EOT */
+ if (ttyfd >= 0)
+ (void) loop_write(ttyfd, SKIPPED, strlen(SKIPPED), false);
+
+ goto skipped;
+ }
+
+ if (c == 21) { /* C-u */
+
+ if (!(flags & ASK_PASSWORD_SILENT))
+ (void) backspace_string(ttyfd, passphrase);
+
+ explicit_bzero_safe(passphrase, sizeof(passphrase));
+ p = codepoint = 0;
+
+ } else if (IN_SET(c, '\b', 127)) {
+
+ if (p > 0) {
+ size_t q;
+
+ if (!(flags & ASK_PASSWORD_SILENT))
+ (void) backspace_chars(ttyfd, 1);
+
+ /* Remove a full UTF-8 codepoint from the end. For that, figure out where the
+ * last one begins */
+ q = 0;
+ for (;;) {
+ size_t z;
+
+ z = utf8_encoded_valid_unichar(passphrase + q, (size_t) -1);
+ if (z == 0) {
+ q = (size_t) -1; /* Invalid UTF8! */
+ break;
+ }
+
+ if (q + z >= p) /* This one brings us over the edge */
+ break;
+
+ q += z;
+ }
+
+ p = codepoint = q == (size_t) -1 ? p - 1 : q;
+ explicit_bzero_safe(passphrase + p, sizeof(passphrase) - p);
+
+ } else if (!dirty && !(flags & ASK_PASSWORD_SILENT)) {
+
+ flags |= ASK_PASSWORD_SILENT;
+
+ /* There are two ways to enter silent mode. Either by pressing backspace as
+ * first key (and only as first key), or ... */
+
+ if (ttyfd >= 0)
+ (void) loop_write(ttyfd, NO_ECHO, strlen(NO_ECHO), false);
+
+ } else if (ttyfd >= 0)
+ (void) loop_write(ttyfd, "\a", 1, false);
+
+ } else if (c == '\t' && !(flags & ASK_PASSWORD_SILENT)) {
+
+ (void) backspace_string(ttyfd, passphrase);
+ flags |= ASK_PASSWORD_SILENT;
+
+ /* ... or by pressing TAB at any time. */
+
+ if (ttyfd >= 0)
+ (void) loop_write(ttyfd, NO_ECHO, strlen(NO_ECHO), false);
+
+ } else if (p >= sizeof(passphrase)-1) {
+
+ /* Reached the size limit */
+ if (ttyfd >= 0)
+ (void) loop_write(ttyfd, "\a", 1, false);
+
+ } else {
+ passphrase[p++] = c;
+
+ if (!(flags & ASK_PASSWORD_SILENT) && ttyfd >= 0) {
+ /* Check if we got a complete UTF-8 character now. If so, let's output one '*'. */
+ n = utf8_encoded_valid_unichar(passphrase + codepoint, (size_t) -1);
+ if (n >= 0) {
+ if (flags & ASK_PASSWORD_ECHO)
+ (void) loop_write(ttyfd, passphrase + codepoint, n, false);
+ else
+ (void) loop_write(ttyfd, "*", 1, false);
+ codepoint = p;
+ }
+ }
+
+ dirty = true;
+ }
+ }
+
+ x = strndup(passphrase, p);
+ explicit_bzero_safe(passphrase, sizeof(passphrase));
+ if (!x) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ r = strv_consume(&l, x);
+ if (r < 0)
+ goto finish;
+
+skipped:
+ if (strv_isempty(l))
+ r = log_debug_errno(SYNTHETIC_ERRNO(ECANCELED), "Password query was cancelled.");
+ else {
+ if (keyname)
+ (void) add_to_keyring_and_log(keyname, flags, l);
+
+ *ret = TAKE_PTR(l);
+ r = 0;
+ }
+
+finish:
+ if (ttyfd >= 0 && reset_tty) {
+ (void) loop_write(ttyfd, "\n", 1, false);
+ (void) tcsetattr(ttyfd, TCSADRAIN, &old_termios);
+ }
+
+ return r;
+}
+
+static int create_socket(char **ret) {
+ _cleanup_free_ char *path = NULL;
+ union sockaddr_union sa;
+ socklen_t sa_len;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(ret);
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return -errno;
+
+ if (asprintf(&path, "/run/systemd/ask-password/sck.%" PRIx64, random_u64()) < 0)
+ return -ENOMEM;
+
+ r = sockaddr_un_set_path(&sa.un, path);
+ if (r < 0)
+ return r;
+ sa_len = r;
+
+ RUN_WITH_UMASK(0177)
+ if (bind(fd, &sa.sa, sa_len) < 0)
+ return -errno;
+
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(path);
+ return TAKE_FD(fd);
+}
+
+int ask_password_agent(
+ const char *message,
+ const char *icon,
+ const char *id,
+ const char *keyname,
+ usec_t until,
+ AskPasswordFlags flags,
+ char ***ret) {
+
+ enum {
+ FD_SOCKET,
+ FD_SIGNAL,
+ FD_INOTIFY,
+ _FD_MAX
+ };
+
+ _cleanup_close_ int socket_fd = -1, signal_fd = -1, notify = -1, fd = -1;
+ char temp[] = "/run/systemd/ask-password/tmp.XXXXXX";
+ char final[sizeof(temp)] = "";
+ _cleanup_free_ char *socket_name = NULL;
+ _cleanup_strv_free_erase_ char **l = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ struct pollfd pollfd[_FD_MAX];
+ sigset_t mask, oldmask;
+ int r;
+
+ assert(ret);
+
+ if (flags & ASK_PASSWORD_NO_AGENT)
+ return -EUNATCH;
+
+ assert_se(sigemptyset(&mask) >= 0);
+ assert_se(sigset_add_many(&mask, SIGINT, SIGTERM, -1) >= 0);
+ assert_se(sigprocmask(SIG_BLOCK, &mask, &oldmask) >= 0);
+
+ (void) mkdir_p_label("/run/systemd/ask-password", 0755);
+
+ if ((flags & ASK_PASSWORD_ACCEPT_CACHED) && keyname) {
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r >= 0) {
+ r = 0;
+ goto finish;
+ } else if (r != -ENOKEY)
+ goto finish;
+
+ notify = inotify_init1(IN_CLOEXEC | IN_NONBLOCK);
+ if (notify < 0) {
+ r = -errno;
+ goto finish;
+ }
+ if (inotify_add_watch(notify, "/run/systemd/ask-password", IN_ATTRIB /* for mtime */) < 0) {
+ r = -errno;
+ goto finish;
+ }
+ }
+
+ fd = mkostemp_safe(temp);
+ if (fd < 0) {
+ r = fd;
+ goto finish;
+ }
+
+ (void) fchmod(fd, 0644);
+
+ f = take_fdopen(&fd, "w");
+ if (!f) {
+ r = -errno;
+ goto finish;
+ }
+
+ signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
+ if (signal_fd < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ socket_fd = create_socket(&socket_name);
+ if (socket_fd < 0) {
+ r = socket_fd;
+ goto finish;
+ }
+
+ fprintf(f,
+ "[Ask]\n"
+ "PID="PID_FMT"\n"
+ "Socket=%s\n"
+ "AcceptCached=%i\n"
+ "Echo=%i\n"
+ "NotAfter="USEC_FMT"\n",
+ getpid_cached(),
+ socket_name,
+ (flags & ASK_PASSWORD_ACCEPT_CACHED) ? 1 : 0,
+ (flags & ASK_PASSWORD_ECHO) ? 1 : 0,
+ until);
+
+ if (message)
+ fprintf(f, "Message=%s\n", message);
+
+ if (icon)
+ fprintf(f, "Icon=%s\n", icon);
+
+ if (id)
+ fprintf(f, "Id=%s\n", id);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto finish;
+
+ memcpy(final, temp, sizeof(temp));
+
+ final[sizeof(final)-11] = 'a';
+ final[sizeof(final)-10] = 's';
+ final[sizeof(final)-9] = 'k';
+
+ if (rename(temp, final) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ zero(pollfd);
+ pollfd[FD_SOCKET].fd = socket_fd;
+ pollfd[FD_SOCKET].events = POLLIN;
+ pollfd[FD_SIGNAL].fd = signal_fd;
+ pollfd[FD_SIGNAL].events = POLLIN;
+ pollfd[FD_INOTIFY].fd = notify;
+ pollfd[FD_INOTIFY].events = POLLIN;
+
+ for (;;) {
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control;
+ char passphrase[LINE_MAX+1];
+ struct iovec iovec;
+ struct ucred *ucred;
+ ssize_t n;
+ int k;
+ usec_t t;
+
+ t = now(CLOCK_MONOTONIC);
+
+ if (until > 0 && until <= t) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ k = poll(pollfd, notify >= 0 ? _FD_MAX : _FD_MAX - 1, until > 0 ? (int) ((until-t)/USEC_PER_MSEC) : -1);
+ if (k < 0) {
+ if (errno == EINTR)
+ continue;
+
+ r = -errno;
+ goto finish;
+ }
+
+ if (k <= 0) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ if (pollfd[FD_SOCKET].revents & POLLNVAL ||
+ pollfd[FD_SIGNAL].revents & POLLNVAL ||
+ (notify >= 0 && pollfd[FD_INOTIFY].revents & POLLNVAL)) {
+ r = -EBADF;
+ goto finish;
+ }
+
+ if (pollfd[FD_SIGNAL].revents & POLLIN) {
+ r = -EINTR;
+ goto finish;
+ }
+
+ if (notify >= 0 && pollfd[FD_INOTIFY].revents != 0) {
+ (void) flush_fd(notify);
+
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r >= 0) {
+ r = 0;
+ goto finish;
+ } else if (r != -ENOKEY)
+ goto finish;
+ }
+
+ if (pollfd[FD_SOCKET].revents == 0)
+ continue;
+
+ if (pollfd[FD_SOCKET].revents != POLLIN) {
+ r = -EIO;
+ goto finish;
+ }
+
+ iovec = IOVEC_MAKE(passphrase, sizeof(passphrase));
+
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+
+ n = recvmsg_safe(socket_fd, &msghdr, 0);
+ if (IN_SET(n, -EAGAIN, -EINTR))
+ continue;
+ if (n == -EXFULL) {
+ log_debug("Got message with truncated control data, ignoring.");
+ continue;
+ }
+ if (n < 0) {
+ r = (int) n;
+ goto finish;
+ }
+
+ cmsg_close_all(&msghdr);
+
+ if (n <= 0) {
+ log_debug("Message too short");
+ continue;
+ }
+
+ ucred = CMSG_FIND_DATA(&msghdr, SOL_SOCKET, SCM_CREDENTIALS, struct ucred);
+ if (!ucred) {
+ log_debug("Received message without credentials. Ignoring.");
+ continue;
+ }
+
+ if (ucred->uid != 0) {
+ log_debug("Got request from unprivileged user. Ignoring.");
+ continue;
+ }
+
+ if (passphrase[0] == '+') {
+ /* An empty message refers to the empty password */
+ if (n == 1)
+ l = strv_new("");
+ else
+ l = strv_parse_nulstr(passphrase+1, n-1);
+ explicit_bzero_safe(passphrase, n);
+ if (!l) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ if (strv_isempty(l)) {
+ l = strv_free(l);
+ log_debug("Invalid packet");
+ continue;
+ }
+
+ break;
+ }
+
+ if (passphrase[0] == '-') {
+ r = -ECANCELED;
+ goto finish;
+ }
+
+ log_debug("Invalid packet");
+ }
+
+ if (keyname)
+ (void) add_to_keyring_and_log(keyname, flags, l);
+
+ *ret = TAKE_PTR(l);
+ r = 0;
+
+finish:
+ if (socket_name)
+ (void) unlink(socket_name);
+
+ (void) unlink(temp);
+
+ if (final[0])
+ (void) unlink(final);
+
+ assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) == 0);
+ return r;
+}
+
+int ask_password_auto(
+ const char *message,
+ const char *icon,
+ const char *id,
+ const char *keyname,
+ usec_t until,
+ AskPasswordFlags flags,
+ char ***ret) {
+
+ int r;
+
+ assert(ret);
+
+ if ((flags & ASK_PASSWORD_ACCEPT_CACHED) &&
+ keyname &&
+ ((flags & ASK_PASSWORD_NO_TTY) || !isatty(STDIN_FILENO)) &&
+ (flags & ASK_PASSWORD_NO_AGENT)) {
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r != -ENOKEY)
+ return r;
+ }
+
+ if (!(flags & ASK_PASSWORD_NO_TTY) && isatty(STDIN_FILENO))
+ return ask_password_tty(-1, message, keyname, until, flags, NULL, ret);
+
+ if (!(flags & ASK_PASSWORD_NO_AGENT))
+ return ask_password_agent(message, icon, id, keyname, until, flags, ret);
+
+ return -EUNATCH;
+}
diff --git a/src/shared/ask-password-api.h b/src/shared/ask-password-api.h
new file mode 100644
index 0000000..7aac5e5
--- /dev/null
+++ b/src/shared/ask-password-api.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "time-util.h"
+
+typedef enum AskPasswordFlags {
+ ASK_PASSWORD_ACCEPT_CACHED = 1 << 0,
+ ASK_PASSWORD_PUSH_CACHE = 1 << 1,
+ ASK_PASSWORD_ECHO = 1 << 2, /* show the password literally while reading, instead of "*" */
+ ASK_PASSWORD_SILENT = 1 << 3, /* do no show any password at all while reading */
+ ASK_PASSWORD_NO_TTY = 1 << 4,
+ ASK_PASSWORD_NO_AGENT = 1 << 5,
+ ASK_PASSWORD_CONSOLE_COLOR = 1 << 6, /* Use color if /dev/console points to a console that supports color */
+} AskPasswordFlags;
+
+int ask_password_tty(int tty_fd, const char *message, const char *keyname, usec_t until, AskPasswordFlags flags, const char *flag_file, char ***ret);
+int ask_password_plymouth(const char *message, usec_t until, AskPasswordFlags flags, const char *flag_file, char ***ret);
+int ask_password_agent(const char *message, const char *icon, const char *id, const char *keyname, usec_t until, AskPasswordFlags flag, char ***ret);
+int ask_password_auto(const char *message, const char *icon, const char *id, const char *keyname, usec_t until, AskPasswordFlags flag, char ***ret);
diff --git a/src/shared/barrier.c b/src/shared/barrier.c
new file mode 100644
index 0000000..9c93d61
--- /dev/null
+++ b/src/shared/barrier.c
@@ -0,0 +1,398 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/eventfd.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "barrier.h"
+#include "fd-util.h"
+#include "macro.h"
+
+/**
+ * Barriers
+ * This barrier implementation provides a simple synchronization method based
+ * on file-descriptors that can safely be used between threads and processes. A
+ * barrier object contains 2 shared counters based on eventfd. Both processes
+ * can now place barriers and wait for the other end to reach a random or
+ * specific barrier.
+ * Barriers are numbered, so you can either wait for the other end to reach any
+ * barrier or the last barrier that you placed. This way, you can use barriers
+ * for one-way *and* full synchronization. Note that even-though barriers are
+ * numbered, these numbers are internal and recycled once both sides reached the
+ * same barrier (implemented as a simple signed counter). It is thus not
+ * possible to address barriers by their ID.
+ *
+ * Barrier-API: Both ends can place as many barriers via barrier_place() as
+ * they want and each pair of barriers on both sides will be implicitly linked.
+ * Each side can use the barrier_wait/sync_*() family of calls to wait for the
+ * other side to place a specific barrier. barrier_wait_next() waits until the
+ * other side calls barrier_place(). No links between the barriers are
+ * considered and this simply serves as most basic asynchronous barrier.
+ * barrier_sync_next() is like barrier_wait_next() and waits for the other side
+ * to place their next barrier via barrier_place(). However, it only waits for
+ * barriers that are linked to a barrier we already placed. If the other side
+ * already placed more barriers than we did, barrier_sync_next() returns
+ * immediately.
+ * barrier_sync() extends barrier_sync_next() and waits until the other end
+ * placed as many barriers via barrier_place() as we did. If they already placed
+ * as many as we did (or more), it returns immediately.
+ *
+ * Additionally to basic barriers, an abortion event is available.
+ * barrier_abort() places an abortion event that cannot be undone. An abortion
+ * immediately cancels all placed barriers and replaces them. Any running and
+ * following wait/sync call besides barrier_wait_abortion() will immediately
+ * return false on both sides (otherwise, they always return true).
+ * barrier_abort() can be called multiple times on both ends and will be a
+ * no-op if already called on this side.
+ * barrier_wait_abortion() can be used to wait for the other side to call
+ * barrier_abort() and is the only wait/sync call that does not return
+ * immediately if we aborted outself. It only returns once the other side
+ * called barrier_abort().
+ *
+ * Barriers can be used for in-process and inter-process synchronization.
+ * However, for in-process synchronization you could just use mutexes.
+ * Therefore, main target is IPC and we require both sides to *not* share the FD
+ * table. If that's given, barriers provide target tracking: If the remote side
+ * exit()s, an abortion event is implicitly queued on the other side. This way,
+ * a sync/wait call will be woken up if the remote side crashed or exited
+ * unexpectedly. However, note that these abortion events are only queued if the
+ * barrier-queue has been drained. Therefore, it is safe to place a barrier and
+ * exit. The other side can safely wait on the barrier even though the exit
+ * queued an abortion event. Usually, the abortion event would overwrite the
+ * barrier, however, that's not true for exit-abortion events. Those are only
+ * queued if the barrier-queue is drained (thus, the receiving side has placed
+ * more barriers than the remote side).
+ */
+
+/**
+ * barrier_create() - Initialize a barrier object
+ * @obj: barrier to initialize
+ *
+ * This initializes a barrier object. The caller is responsible of allocating
+ * the memory and keeping it valid. The memory does not have to be zeroed
+ * beforehand.
+ * Two eventfd objects are allocated for each barrier. If allocation fails, an
+ * error is returned.
+ *
+ * If this function fails, the barrier is reset to an invalid state so it is
+ * safe to call barrier_destroy() on the object regardless whether the
+ * initialization succeeded or not.
+ *
+ * The caller is responsible to destroy the object via barrier_destroy() before
+ * releasing the underlying memory.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+int barrier_create(Barrier *b) {
+ _cleanup_(barrier_destroyp) Barrier *staging = b;
+ int r;
+
+ assert(b);
+
+ b->me = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
+ if (b->me < 0)
+ return -errno;
+
+ b->them = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
+ if (b->them < 0)
+ return -errno;
+
+ r = pipe2(b->pipe, O_CLOEXEC | O_NONBLOCK);
+ if (r < 0)
+ return -errno;
+
+ staging = NULL;
+ return 0;
+}
+
+/**
+ * barrier_destroy() - Destroy a barrier object
+ * @b: barrier to destroy or NULL
+ *
+ * This destroys a barrier object that has previously been passed to
+ * barrier_create(). The object is released and reset to invalid
+ * state. Therefore, it is safe to call barrier_destroy() multiple
+ * times or even if barrier_create() failed. However, barrier must be
+ * always initialized with BARRIER_NULL.
+ *
+ * If @b is NULL, this is a no-op.
+ */
+void barrier_destroy(Barrier *b) {
+ if (!b)
+ return;
+
+ b->me = safe_close(b->me);
+ b->them = safe_close(b->them);
+ safe_close_pair(b->pipe);
+ b->barriers = 0;
+}
+
+/**
+ * barrier_set_role() - Set the local role of the barrier
+ * @b: barrier to operate on
+ * @role: role to set on the barrier
+ *
+ * This sets the roles on a barrier object. This is needed to know
+ * which side of the barrier you're on. Usually, the parent creates
+ * the barrier via barrier_create() and then calls fork() or clone().
+ * Therefore, the FDs are duplicated and the child retains the same
+ * barrier object.
+ *
+ * Both sides need to call barrier_set_role() after fork() or clone()
+ * are done. If this is not done, barriers will not work correctly.
+ *
+ * Note that barriers could be supported without fork() or clone(). However,
+ * this is currently not needed so it hasn't been implemented.
+ */
+void barrier_set_role(Barrier *b, unsigned role) {
+ assert(b);
+ assert(IN_SET(role, BARRIER_PARENT, BARRIER_CHILD));
+ /* make sure this is only called once */
+ assert(b->pipe[0] >= 0 && b->pipe[1] >= 0);
+
+ if (role == BARRIER_PARENT)
+ b->pipe[1] = safe_close(b->pipe[1]);
+ else {
+ b->pipe[0] = safe_close(b->pipe[0]);
+
+ /* swap me/them for children */
+ SWAP_TWO(b->me, b->them);
+ }
+}
+
+/* places barrier; returns false if we aborted, otherwise true */
+static bool barrier_write(Barrier *b, uint64_t buf) {
+ ssize_t len;
+
+ /* prevent new sync-points if we already aborted */
+ if (barrier_i_aborted(b))
+ return false;
+
+ assert(b->me >= 0);
+ do {
+ len = write(b->me, &buf, sizeof(buf));
+ } while (len < 0 && IN_SET(errno, EAGAIN, EINTR));
+
+ if (len != sizeof(buf))
+ goto error;
+
+ /* lock if we aborted */
+ if (buf >= (uint64_t)BARRIER_ABORTION) {
+ if (barrier_they_aborted(b))
+ b->barriers = BARRIER_WE_ABORTED;
+ else
+ b->barriers = BARRIER_I_ABORTED;
+ } else if (!barrier_is_aborted(b))
+ b->barriers += buf;
+
+ return !barrier_i_aborted(b);
+
+error:
+ /* If there is an unexpected error, we have to make this fatal. There
+ * is no way we can recover from sync-errors. Therefore, we close the
+ * pipe-ends and treat this as abortion. The other end will notice the
+ * pipe-close and treat it as abortion, too. */
+
+ safe_close_pair(b->pipe);
+ b->barriers = BARRIER_WE_ABORTED;
+ return false;
+}
+
+/* waits for barriers; returns false if they aborted, otherwise true */
+static bool barrier_read(Barrier *b, int64_t comp) {
+ if (barrier_they_aborted(b))
+ return false;
+
+ while (b->barriers > comp) {
+ struct pollfd pfd[2] = {
+ { .fd = b->pipe[0] >= 0 ? b->pipe[0] : b->pipe[1],
+ .events = POLLHUP },
+ { .fd = b->them,
+ .events = POLLIN }};
+ uint64_t buf;
+ int r;
+
+ r = poll(pfd, ELEMENTSOF(pfd), -1);
+ if (r < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ continue;
+ goto error;
+ }
+ if (pfd[0].revents & POLLNVAL ||
+ pfd[1].revents & POLLNVAL)
+ goto error;
+
+ if (pfd[1].revents) {
+ ssize_t len;
+
+ /* events on @them signal new data for us */
+ len = read(b->them, &buf, sizeof(buf));
+ if (len < 0 && IN_SET(errno, EAGAIN, EINTR))
+ continue;
+
+ if (len != sizeof(buf))
+ goto error;
+ } else if (pfd[0].revents & (POLLHUP | POLLERR | POLLNVAL))
+ /* POLLHUP on the pipe tells us the other side exited.
+ * We treat this as implicit abortion. But we only
+ * handle it if there's no event on the eventfd. This
+ * guarantees that exit-abortions do not overwrite real
+ * barriers. */
+ buf = BARRIER_ABORTION;
+ else
+ continue;
+
+ /* lock if they aborted */
+ if (buf >= (uint64_t)BARRIER_ABORTION) {
+ if (barrier_i_aborted(b))
+ b->barriers = BARRIER_WE_ABORTED;
+ else
+ b->barriers = BARRIER_THEY_ABORTED;
+ } else if (!barrier_is_aborted(b))
+ b->barriers -= buf;
+ }
+
+ return !barrier_they_aborted(b);
+
+error:
+ /* If there is an unexpected error, we have to make this fatal. There
+ * is no way we can recover from sync-errors. Therefore, we close the
+ * pipe-ends and treat this as abortion. The other end will notice the
+ * pipe-close and treat it as abortion, too. */
+
+ safe_close_pair(b->pipe);
+ b->barriers = BARRIER_WE_ABORTED;
+ return false;
+}
+
+/**
+ * barrier_place() - Place a new barrier
+ * @b: barrier object
+ *
+ * This places a new barrier on the barrier object. If either side already
+ * aborted, this is a no-op and returns "false". Otherwise, the barrier is
+ * placed and this returns "true".
+ *
+ * Returns: true if barrier was placed, false if either side aborted.
+ */
+bool barrier_place(Barrier *b) {
+ assert(b);
+
+ if (barrier_is_aborted(b))
+ return false;
+
+ barrier_write(b, BARRIER_SINGLE);
+ return true;
+}
+
+/**
+ * barrier_abort() - Abort the synchronization
+ * @b: barrier object to abort
+ *
+ * This aborts the barrier-synchronization. If barrier_abort() was already
+ * called on this side, this is a no-op. Otherwise, the barrier is put into the
+ * ABORT-state and will stay there. The other side is notified about the
+ * abortion. Any following attempt to place normal barriers or to wait on normal
+ * barriers will return immediately as "false".
+ *
+ * You can wait for the other side to call barrier_abort(), too. Use
+ * barrier_wait_abortion() for that.
+ *
+ * Returns: false if the other side already aborted, true otherwise.
+ */
+bool barrier_abort(Barrier *b) {
+ assert(b);
+
+ barrier_write(b, BARRIER_ABORTION);
+ return !barrier_they_aborted(b);
+}
+
+/**
+ * barrier_wait_next() - Wait for the next barrier of the other side
+ * @b: barrier to operate on
+ *
+ * This waits until the other side places its next barrier. This is independent
+ * of any barrier-links and just waits for any next barrier of the other side.
+ *
+ * If either side aborted, this returns false.
+ *
+ * Returns: false if either side aborted, true otherwise.
+ */
+bool barrier_wait_next(Barrier *b) {
+ assert(b);
+
+ if (barrier_is_aborted(b))
+ return false;
+
+ barrier_read(b, b->barriers - 1);
+ return !barrier_is_aborted(b);
+}
+
+/**
+ * barrier_wait_abortion() - Wait for the other side to abort
+ * @b: barrier to operate on
+ *
+ * This waits until the other side called barrier_abort(). This can be called
+ * regardless whether the local side already called barrier_abort() or not.
+ *
+ * If the other side has already aborted, this returns immediately.
+ *
+ * Returns: false if the local side aborted, true otherwise.
+ */
+bool barrier_wait_abortion(Barrier *b) {
+ assert(b);
+
+ barrier_read(b, BARRIER_THEY_ABORTED);
+ return !barrier_i_aborted(b);
+}
+
+/**
+ * barrier_sync_next() - Wait for the other side to place a next linked barrier
+ * @b: barrier to operate on
+ *
+ * This is like barrier_wait_next() and waits for the other side to call
+ * barrier_place(). However, this only waits for linked barriers. That means, if
+ * the other side already placed more barriers than (or as much as) we did, this
+ * returns immediately instead of waiting.
+ *
+ * If either side aborted, this returns false.
+ *
+ * Returns: false if either side aborted, true otherwise.
+ */
+bool barrier_sync_next(Barrier *b) {
+ assert(b);
+
+ if (barrier_is_aborted(b))
+ return false;
+
+ barrier_read(b, MAX((int64_t)0, b->barriers - 1));
+ return !barrier_is_aborted(b);
+}
+
+/**
+ * barrier_sync() - Wait for the other side to place as many barriers as we did
+ * @b: barrier to operate on
+ *
+ * This is like barrier_sync_next() but waits for the other side to call
+ * barrier_place() as often as we did (in total). If they already placed as much
+ * as we did (or more), this returns immediately instead of waiting.
+ *
+ * If either side aborted, this returns false.
+ *
+ * Returns: false if either side aborted, true otherwise.
+ */
+bool barrier_sync(Barrier *b) {
+ assert(b);
+
+ if (barrier_is_aborted(b))
+ return false;
+
+ barrier_read(b, 0);
+ return !barrier_is_aborted(b);
+}
diff --git a/src/shared/barrier.h b/src/shared/barrier.h
new file mode 100644
index 0000000..b11dce4
--- /dev/null
+++ b/src/shared/barrier.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+/* See source file for an API description. */
+
+typedef struct Barrier Barrier;
+
+enum {
+ BARRIER_SINGLE = 1LL,
+ BARRIER_ABORTION = INT64_MAX,
+
+ /* bias values to store state; keep @WE < @THEY < @I */
+ BARRIER_BIAS = INT64_MIN,
+ BARRIER_WE_ABORTED = BARRIER_BIAS + 1LL,
+ BARRIER_THEY_ABORTED = BARRIER_BIAS + 2LL,
+ BARRIER_I_ABORTED = BARRIER_BIAS + 3LL,
+};
+
+enum {
+ BARRIER_PARENT,
+ BARRIER_CHILD,
+};
+
+struct Barrier {
+ int me;
+ int them;
+ int pipe[2];
+ int64_t barriers;
+};
+
+#define BARRIER_NULL {-1, -1, {-1, -1}, 0}
+
+int barrier_create(Barrier *obj);
+void barrier_destroy(Barrier *b);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Barrier*, barrier_destroy);
+
+void barrier_set_role(Barrier *b, unsigned role);
+
+bool barrier_place(Barrier *b);
+bool barrier_abort(Barrier *b);
+
+bool barrier_wait_next(Barrier *b);
+bool barrier_wait_abortion(Barrier *b);
+bool barrier_sync_next(Barrier *b);
+bool barrier_sync(Barrier *b);
+
+static inline bool barrier_i_aborted(Barrier *b) {
+ return IN_SET(b->barriers, BARRIER_I_ABORTED, BARRIER_WE_ABORTED);
+}
+
+static inline bool barrier_they_aborted(Barrier *b) {
+ return IN_SET(b->barriers, BARRIER_THEY_ABORTED, BARRIER_WE_ABORTED);
+}
+
+static inline bool barrier_we_aborted(Barrier *b) {
+ return b->barriers == BARRIER_WE_ABORTED;
+}
+
+static inline bool barrier_is_aborted(Barrier *b) {
+ return IN_SET(b->barriers,
+ BARRIER_I_ABORTED, BARRIER_THEY_ABORTED, BARRIER_WE_ABORTED);
+}
+
+static inline bool barrier_place_and_sync(Barrier *b) {
+ (void) barrier_place(b);
+ return barrier_sync(b);
+}
diff --git a/src/shared/base-filesystem.c b/src/shared/base-filesystem.c
new file mode 100644
index 0000000..1d05409
--- /dev/null
+++ b/src/shared/base-filesystem.c
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <syslog.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "base-filesystem.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "nulstr-util.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+
+typedef struct BaseFilesystem {
+ const char *dir;
+ mode_t mode;
+ const char *target;
+ const char *exists;
+ bool ignore_failure;
+} BaseFilesystem;
+
+static const BaseFilesystem table[] = {
+ { "bin", 0, "usr/bin\0", NULL },
+ { "lib", 0, "usr/lib\0", NULL },
+ { "root", 0755, NULL, NULL, true },
+ { "sbin", 0, "usr/sbin\0", NULL },
+ { "usr", 0755, NULL, NULL },
+ { "var", 0755, NULL, NULL },
+ { "etc", 0755, NULL, NULL },
+ { "proc", 0755, NULL, NULL, true },
+ { "sys", 0755, NULL, NULL, true },
+ { "dev", 0755, NULL, NULL, true },
+#if defined(__i386__) || defined(__x86_64__)
+ { "lib64", 0, "usr/lib/x86_64-linux-gnu\0"
+ "usr/lib64\0", "ld-linux-x86-64.so.2" },
+#endif
+};
+
+int base_filesystem_create(const char *root, uid_t uid, gid_t gid) {
+ _cleanup_close_ int fd = -1;
+ size_t i;
+ int r;
+
+ fd = open(root, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open root file system: %m");
+
+ for (i = 0; i < ELEMENTSOF(table); i ++) {
+ if (faccessat(fd, table[i].dir, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
+ continue;
+
+ if (table[i].target) {
+ const char *target = NULL, *s;
+
+ /* check if one of the targets exists */
+ NULSTR_FOREACH(s, table[i].target) {
+ if (faccessat(fd, s, F_OK, AT_SYMLINK_NOFOLLOW) < 0)
+ continue;
+
+ /* check if a specific file exists at the target path */
+ if (table[i].exists) {
+ _cleanup_free_ char *p = NULL;
+
+ p = path_join(s, table[i].exists);
+ if (!p)
+ return log_oom();
+
+ if (faccessat(fd, p, F_OK, AT_SYMLINK_NOFOLLOW) < 0)
+ continue;
+ }
+
+ target = s;
+ break;
+ }
+
+ if (!target)
+ continue;
+
+ if (symlinkat(target, fd, table[i].dir) < 0) {
+ log_full_errno(IN_SET(errno, EEXIST, EROFS) || table[i].ignore_failure ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to create symlink at %s/%s: %m", root, table[i].dir);
+
+ if (IN_SET(errno, EEXIST, EROFS) || table[i].ignore_failure)
+ continue;
+
+ return -errno;
+ }
+
+ if (uid_is_valid(uid) || gid_is_valid(gid)) {
+ if (fchownat(fd, table[i].dir, uid, gid, AT_SYMLINK_NOFOLLOW) < 0)
+ return log_error_errno(errno, "Failed to chown symlink at %s/%s: %m", root, table[i].dir);
+ }
+
+ continue;
+ }
+
+ RUN_WITH_UMASK(0000)
+ r = mkdirat(fd, table[i].dir, table[i].mode);
+ if (r < 0) {
+ log_full_errno(IN_SET(errno, EEXIST, EROFS) || table[i].ignore_failure ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to create directory at %s/%s: %m", root, table[i].dir);
+
+ if (IN_SET(errno, EEXIST, EROFS) || table[i].ignore_failure)
+ continue;
+
+ return -errno;
+ }
+
+ if (uid != UID_INVALID || gid != UID_INVALID) {
+ if (fchownat(fd, table[i].dir, uid, gid, AT_SYMLINK_NOFOLLOW) < 0)
+ return log_error_errno(errno, "Failed to chown directory at %s/%s: %m", root, table[i].dir);
+ }
+ }
+
+ return 0;
+}
diff --git a/src/shared/base-filesystem.h b/src/shared/base-filesystem.h
new file mode 100644
index 0000000..a33975f
--- /dev/null
+++ b/src/shared/base-filesystem.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+int base_filesystem_create(const char *root, uid_t uid, gid_t gid);
diff --git a/src/shared/binfmt-util.c b/src/shared/binfmt-util.c
new file mode 100644
index 0000000..724d7f2
--- /dev/null
+++ b/src/shared/binfmt-util.c
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <sys/vfs.h>
+
+#include "binfmt-util.h"
+#include "fileio.h"
+#include "missing_magic.h"
+#include "stat-util.h"
+
+int disable_binfmt(void) {
+ int r;
+
+ /* Flush out all rules. This is important during shutdown to cover for rules using "F", since those
+ * might pin a file and thus block us from unmounting stuff cleanly.
+ *
+ * We are a bit careful here, since binfmt_misc might still be an autofs which we don't want to
+ * trigger. */
+
+ r = path_is_fs_type("/proc/sys/fs/binfmt_misc", BINFMTFS_MAGIC);
+ if (r == 0 || r == -ENOENT) {
+ log_debug("binfmt_misc is not mounted, not detaching entries.");
+ return 0;
+ }
+ if (r < 0)
+ return log_warning_errno(r, "Failed to determine whether binfmt_misc is mounted: %m");
+
+ r = write_string_file("/proc/sys/fs/binfmt_misc/status", "-1", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to unregister binfmt_misc entries: %m");
+
+ log_debug("Unregistered all remaining binfmt_misc entries.");
+ return 0;
+}
diff --git a/src/shared/binfmt-util.h b/src/shared/binfmt-util.h
new file mode 100644
index 0000000..2f008d1
--- /dev/null
+++ b/src/shared/binfmt-util.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int disable_binfmt(void);
diff --git a/src/shared/bitmap.c b/src/shared/bitmap.c
new file mode 100644
index 0000000..5d450c8
--- /dev/null
+++ b/src/shared/bitmap.c
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "bitmap.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "memory-util.h"
+
+/* Bitmaps are only meant to store relatively small numbers
+ * (corresponding to, say, an enum), so it is ok to limit
+ * the max entry. 64k should be plenty. */
+#define BITMAPS_MAX_ENTRY 0xffff
+
+/* This indicates that we reached the end of the bitmap */
+#define BITMAP_END ((unsigned) -1)
+
+#define BITMAP_NUM_TO_OFFSET(n) ((n) / (sizeof(uint64_t) * 8))
+#define BITMAP_NUM_TO_REM(n) ((n) % (sizeof(uint64_t) * 8))
+#define BITMAP_OFFSET_TO_NUM(offset, rem) ((offset) * sizeof(uint64_t) * 8 + (rem))
+
+Bitmap *bitmap_new(void) {
+ return new0(Bitmap, 1);
+}
+
+Bitmap *bitmap_copy(Bitmap *b) {
+ Bitmap *ret;
+
+ ret = bitmap_new();
+ if (!ret)
+ return NULL;
+
+ ret->bitmaps = newdup(uint64_t, b->bitmaps, b->n_bitmaps);
+ if (!ret->bitmaps)
+ return mfree(ret);
+
+ ret->n_bitmaps = ret->bitmaps_allocated = b->n_bitmaps;
+ return ret;
+}
+
+void bitmap_free(Bitmap *b) {
+ if (!b)
+ return;
+
+ free(b->bitmaps);
+ free(b);
+}
+
+int bitmap_ensure_allocated(Bitmap **b) {
+ Bitmap *a;
+
+ assert(b);
+
+ if (*b)
+ return 0;
+
+ a = bitmap_new();
+ if (!a)
+ return -ENOMEM;
+
+ *b = a;
+
+ return 0;
+}
+
+int bitmap_set(Bitmap *b, unsigned n) {
+ uint64_t bitmask;
+ unsigned offset;
+
+ assert(b);
+
+ /* we refuse to allocate huge bitmaps */
+ if (n > BITMAPS_MAX_ENTRY)
+ return -ERANGE;
+
+ offset = BITMAP_NUM_TO_OFFSET(n);
+
+ if (offset >= b->n_bitmaps) {
+ if (!GREEDY_REALLOC0(b->bitmaps, b->bitmaps_allocated, offset + 1))
+ return -ENOMEM;
+
+ b->n_bitmaps = offset + 1;
+ }
+
+ bitmask = UINT64_C(1) << BITMAP_NUM_TO_REM(n);
+
+ b->bitmaps[offset] |= bitmask;
+
+ return 0;
+}
+
+void bitmap_unset(Bitmap *b, unsigned n) {
+ uint64_t bitmask;
+ unsigned offset;
+
+ if (!b)
+ return;
+
+ offset = BITMAP_NUM_TO_OFFSET(n);
+
+ if (offset >= b->n_bitmaps)
+ return;
+
+ bitmask = UINT64_C(1) << BITMAP_NUM_TO_REM(n);
+
+ b->bitmaps[offset] &= ~bitmask;
+}
+
+bool bitmap_isset(const Bitmap *b, unsigned n) {
+ uint64_t bitmask;
+ unsigned offset;
+
+ if (!b)
+ return false;
+
+ offset = BITMAP_NUM_TO_OFFSET(n);
+
+ if (offset >= b->n_bitmaps)
+ return false;
+
+ bitmask = UINT64_C(1) << BITMAP_NUM_TO_REM(n);
+
+ return !!(b->bitmaps[offset] & bitmask);
+}
+
+bool bitmap_isclear(const Bitmap *b) {
+ unsigned i;
+
+ if (!b)
+ return true;
+
+ for (i = 0; i < b->n_bitmaps; i++)
+ if (b->bitmaps[i] != 0)
+ return false;
+
+ return true;
+}
+
+void bitmap_clear(Bitmap *b) {
+ if (!b)
+ return;
+
+ b->bitmaps = mfree(b->bitmaps);
+ b->n_bitmaps = 0;
+ b->bitmaps_allocated = 0;
+}
+
+bool bitmap_iterate(const Bitmap *b, Iterator *i, unsigned *n) {
+ uint64_t bitmask;
+ unsigned offset, rem;
+
+ assert(i);
+ assert(n);
+
+ if (!b || i->idx == BITMAP_END)
+ return false;
+
+ offset = BITMAP_NUM_TO_OFFSET(i->idx);
+ rem = BITMAP_NUM_TO_REM(i->idx);
+ bitmask = UINT64_C(1) << rem;
+
+ for (; offset < b->n_bitmaps; offset ++) {
+ if (b->bitmaps[offset]) {
+ for (; bitmask; bitmask <<= 1, rem ++) {
+ if (b->bitmaps[offset] & bitmask) {
+ *n = BITMAP_OFFSET_TO_NUM(offset, rem);
+ i->idx = *n + 1;
+
+ return true;
+ }
+ }
+ }
+
+ rem = 0;
+ bitmask = 1;
+ }
+
+ i->idx = BITMAP_END;
+
+ return false;
+}
+
+bool bitmap_equal(const Bitmap *a, const Bitmap *b) {
+ size_t common_n_bitmaps;
+ const Bitmap *c;
+ unsigned i;
+
+ if (a == b)
+ return true;
+
+ if (!a != !b)
+ return false;
+
+ if (!a)
+ return true;
+
+ common_n_bitmaps = MIN(a->n_bitmaps, b->n_bitmaps);
+ if (memcmp_safe(a->bitmaps, b->bitmaps, sizeof(uint64_t) * common_n_bitmaps) != 0)
+ return false;
+
+ c = a->n_bitmaps > b->n_bitmaps ? a : b;
+ for (i = common_n_bitmaps; i < c->n_bitmaps; i++)
+ if (c->bitmaps[i] != 0)
+ return false;
+
+ return true;
+}
diff --git a/src/shared/bitmap.h b/src/shared/bitmap.h
new file mode 100644
index 0000000..1c305a2
--- /dev/null
+++ b/src/shared/bitmap.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "macro.h"
+
+typedef struct Bitmap {
+ uint64_t *bitmaps;
+ size_t n_bitmaps;
+ size_t bitmaps_allocated;
+} Bitmap;
+
+Bitmap *bitmap_new(void);
+Bitmap *bitmap_copy(Bitmap *b);
+int bitmap_ensure_allocated(Bitmap **b);
+void bitmap_free(Bitmap *b);
+
+int bitmap_set(Bitmap *b, unsigned n);
+void bitmap_unset(Bitmap *b, unsigned n);
+bool bitmap_isset(const Bitmap *b, unsigned n);
+bool bitmap_isclear(const Bitmap *b);
+void bitmap_clear(Bitmap *b);
+
+bool bitmap_iterate(const Bitmap *b, Iterator *i, unsigned *n);
+
+bool bitmap_equal(const Bitmap *a, const Bitmap *b);
+
+#define _BITMAP_FOREACH(n, b, i) \
+ for (Iterator i = {}; bitmap_iterate((b), &i, (unsigned*)&(n)); )
+#define BITMAP_FOREACH(n, b) \
+ _BITMAP_FOREACH(n, b, UNIQ_T(i, UNIQ))
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Bitmap*, bitmap_free);
+
+#define _cleanup_bitmap_free_ _cleanup_(bitmap_freep)
diff --git a/src/shared/blkid-util.h b/src/shared/blkid-util.h
new file mode 100644
index 0000000..3f38e9b
--- /dev/null
+++ b/src/shared/blkid-util.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#if HAVE_BLKID
+# include <blkid.h>
+
+# include "macro.h"
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(blkid_probe, blkid_free_probe);
+#endif
diff --git a/src/shared/bond-util.c b/src/shared/bond-util.c
new file mode 100644
index 0000000..e04b201
--- /dev/null
+++ b/src/shared/bond-util.c
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bond-util.h"
+#include "string-table.h"
+
+static const char* const bond_mode_table[_NETDEV_BOND_MODE_MAX] = {
+ [NETDEV_BOND_MODE_BALANCE_RR] = "balance-rr",
+ [NETDEV_BOND_MODE_ACTIVE_BACKUP] = "active-backup",
+ [NETDEV_BOND_MODE_BALANCE_XOR] = "balance-xor",
+ [NETDEV_BOND_MODE_BROADCAST] = "broadcast",
+ [NETDEV_BOND_MODE_802_3AD] = "802.3ad",
+ [NETDEV_BOND_MODE_BALANCE_TLB] = "balance-tlb",
+ [NETDEV_BOND_MODE_BALANCE_ALB] = "balance-alb",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_mode, BondMode);
+
+static const char* const bond_xmit_hash_policy_table[_NETDEV_BOND_XMIT_HASH_POLICY_MAX] = {
+ [NETDEV_BOND_XMIT_HASH_POLICY_LAYER2] = "layer2",
+ [NETDEV_BOND_XMIT_HASH_POLICY_LAYER34] = "layer3+4",
+ [NETDEV_BOND_XMIT_HASH_POLICY_LAYER23] = "layer2+3",
+ [NETDEV_BOND_XMIT_HASH_POLICY_ENCAP23] = "encap2+3",
+ [NETDEV_BOND_XMIT_HASH_POLICY_ENCAP34] = "encap3+4",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_xmit_hash_policy, BondXmitHashPolicy);
+
+static const char* const bond_lacp_rate_table[_NETDEV_BOND_LACP_RATE_MAX] = {
+ [NETDEV_BOND_LACP_RATE_SLOW] = "slow",
+ [NETDEV_BOND_LACP_RATE_FAST] = "fast",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_lacp_rate, BondLacpRate);
+
+static const char* const bond_ad_select_table[_NETDEV_BOND_AD_SELECT_MAX] = {
+ [NETDEV_BOND_AD_SELECT_STABLE] = "stable",
+ [NETDEV_BOND_AD_SELECT_BANDWIDTH] = "bandwidth",
+ [NETDEV_BOND_AD_SELECT_COUNT] = "count",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_ad_select, BondAdSelect);
+
+static const char* const bond_fail_over_mac_table[_NETDEV_BOND_FAIL_OVER_MAC_MAX] = {
+ [NETDEV_BOND_FAIL_OVER_MAC_NONE] = "none",
+ [NETDEV_BOND_FAIL_OVER_MAC_ACTIVE] = "active",
+ [NETDEV_BOND_FAIL_OVER_MAC_FOLLOW] = "follow",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_fail_over_mac, BondFailOverMac);
+
+static const char *const bond_arp_validate_table[_NETDEV_BOND_ARP_VALIDATE_MAX] = {
+ [NETDEV_BOND_ARP_VALIDATE_NONE] = "none",
+ [NETDEV_BOND_ARP_VALIDATE_ACTIVE]= "active",
+ [NETDEV_BOND_ARP_VALIDATE_BACKUP]= "backup",
+ [NETDEV_BOND_ARP_VALIDATE_ALL]= "all",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_arp_validate, BondArpValidate);
+
+static const char *const bond_arp_all_targets_table[_NETDEV_BOND_ARP_ALL_TARGETS_MAX] = {
+ [NETDEV_BOND_ARP_ALL_TARGETS_ANY] = "any",
+ [NETDEV_BOND_ARP_ALL_TARGETS_ALL] = "all",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_arp_all_targets, BondArpAllTargets);
+
+static const char *const bond_primary_reselect_table[_NETDEV_BOND_PRIMARY_RESELECT_MAX] = {
+ [NETDEV_BOND_PRIMARY_RESELECT_ALWAYS] = "always",
+ [NETDEV_BOND_PRIMARY_RESELECT_BETTER]= "better",
+ [NETDEV_BOND_PRIMARY_RESELECT_FAILURE]= "failure",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_primary_reselect, BondPrimaryReselect);
diff --git a/src/shared/bond-util.h b/src/shared/bond-util.h
new file mode 100644
index 0000000..a8f9ecb
--- /dev/null
+++ b/src/shared/bond-util.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <netinet/in.h>
+#include <linux/if_bonding.h>
+
+#include "macro.h"
+
+/*
+ * Maximum number of targets supported by the kernel for a single
+ * bond netdev.
+ */
+#define NETDEV_BOND_ARP_TARGETS_MAX 16
+
+typedef enum BondMode {
+ NETDEV_BOND_MODE_BALANCE_RR = BOND_MODE_ROUNDROBIN,
+ NETDEV_BOND_MODE_ACTIVE_BACKUP = BOND_MODE_ACTIVEBACKUP,
+ NETDEV_BOND_MODE_BALANCE_XOR = BOND_MODE_XOR,
+ NETDEV_BOND_MODE_BROADCAST = BOND_MODE_BROADCAST,
+ NETDEV_BOND_MODE_802_3AD = BOND_MODE_8023AD,
+ NETDEV_BOND_MODE_BALANCE_TLB = BOND_MODE_TLB,
+ NETDEV_BOND_MODE_BALANCE_ALB = BOND_MODE_ALB,
+ _NETDEV_BOND_MODE_MAX,
+ _NETDEV_BOND_MODE_INVALID = -1
+} BondMode;
+
+typedef enum BondXmitHashPolicy {
+ NETDEV_BOND_XMIT_HASH_POLICY_LAYER2 = BOND_XMIT_POLICY_LAYER2,
+ NETDEV_BOND_XMIT_HASH_POLICY_LAYER34 = BOND_XMIT_POLICY_LAYER34,
+ NETDEV_BOND_XMIT_HASH_POLICY_LAYER23 = BOND_XMIT_POLICY_LAYER23,
+ NETDEV_BOND_XMIT_HASH_POLICY_ENCAP23 = BOND_XMIT_POLICY_ENCAP23,
+ NETDEV_BOND_XMIT_HASH_POLICY_ENCAP34 = BOND_XMIT_POLICY_ENCAP34,
+ _NETDEV_BOND_XMIT_HASH_POLICY_MAX,
+ _NETDEV_BOND_XMIT_HASH_POLICY_INVALID = -1
+} BondXmitHashPolicy;
+
+typedef enum BondLacpRate {
+ NETDEV_BOND_LACP_RATE_SLOW,
+ NETDEV_BOND_LACP_RATE_FAST,
+ _NETDEV_BOND_LACP_RATE_MAX,
+ _NETDEV_BOND_LACP_RATE_INVALID = -1,
+} BondLacpRate;
+
+typedef enum BondAdSelect {
+ NETDEV_BOND_AD_SELECT_STABLE,
+ NETDEV_BOND_AD_SELECT_BANDWIDTH,
+ NETDEV_BOND_AD_SELECT_COUNT,
+ _NETDEV_BOND_AD_SELECT_MAX,
+ _NETDEV_BOND_AD_SELECT_INVALID = -1,
+} BondAdSelect;
+
+typedef enum BondFailOverMac {
+ NETDEV_BOND_FAIL_OVER_MAC_NONE,
+ NETDEV_BOND_FAIL_OVER_MAC_ACTIVE,
+ NETDEV_BOND_FAIL_OVER_MAC_FOLLOW,
+ _NETDEV_BOND_FAIL_OVER_MAC_MAX,
+ _NETDEV_BOND_FAIL_OVER_MAC_INVALID = -1,
+} BondFailOverMac;
+
+typedef enum BondArpValidate {
+ NETDEV_BOND_ARP_VALIDATE_NONE,
+ NETDEV_BOND_ARP_VALIDATE_ACTIVE,
+ NETDEV_BOND_ARP_VALIDATE_BACKUP,
+ NETDEV_BOND_ARP_VALIDATE_ALL,
+ _NETDEV_BOND_ARP_VALIDATE_MAX,
+ _NETDEV_BOND_ARP_VALIDATE_INVALID = -1,
+} BondArpValidate;
+
+typedef enum BondArpAllTargets {
+ NETDEV_BOND_ARP_ALL_TARGETS_ANY,
+ NETDEV_BOND_ARP_ALL_TARGETS_ALL,
+ _NETDEV_BOND_ARP_ALL_TARGETS_MAX,
+ _NETDEV_BOND_ARP_ALL_TARGETS_INVALID = -1,
+} BondArpAllTargets;
+
+typedef enum BondPrimaryReselect {
+ NETDEV_BOND_PRIMARY_RESELECT_ALWAYS,
+ NETDEV_BOND_PRIMARY_RESELECT_BETTER,
+ NETDEV_BOND_PRIMARY_RESELECT_FAILURE,
+ _NETDEV_BOND_PRIMARY_RESELECT_MAX,
+ _NETDEV_BOND_PRIMARY_RESELECT_INVALID = -1,
+} BondPrimaryReselect;
+
+const char *bond_mode_to_string(BondMode d) _const_;
+BondMode bond_mode_from_string(const char *d) _pure_;
+
+const char *bond_xmit_hash_policy_to_string(BondXmitHashPolicy d) _const_;
+BondXmitHashPolicy bond_xmit_hash_policy_from_string(const char *d) _pure_;
+
+const char *bond_lacp_rate_to_string(BondLacpRate d) _const_;
+BondLacpRate bond_lacp_rate_from_string(const char *d) _pure_;
+
+const char *bond_fail_over_mac_to_string(BondFailOverMac d) _const_;
+BondFailOverMac bond_fail_over_mac_from_string(const char *d) _pure_;
+
+const char *bond_ad_select_to_string(BondAdSelect d) _const_;
+BondAdSelect bond_ad_select_from_string(const char *d) _pure_;
+
+const char *bond_arp_validate_to_string(BondArpValidate d) _const_;
+BondArpValidate bond_arp_validate_from_string(const char *d) _pure_;
+
+const char *bond_arp_all_targets_to_string(BondArpAllTargets d) _const_;
+BondArpAllTargets bond_arp_all_targets_from_string(const char *d) _pure_;
+
+const char *bond_primary_reselect_to_string(BondPrimaryReselect d) _const_;
+BondPrimaryReselect bond_primary_reselect_from_string(const char *d) _pure_;
diff --git a/src/shared/boot-timestamps.c b/src/shared/boot-timestamps.c
new file mode 100644
index 0000000..8786e89
--- /dev/null
+++ b/src/shared/boot-timestamps.c
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "acpi-fpdt.h"
+#include "boot-timestamps.h"
+#include "efi-loader.h"
+#include "macro.h"
+#include "time-util.h"
+
+int boot_timestamps(const dual_timestamp *n, dual_timestamp *firmware, dual_timestamp *loader) {
+ usec_t x = 0, y = 0, a;
+ int r;
+ dual_timestamp _n;
+
+ assert(firmware);
+ assert(loader);
+
+ if (!n) {
+ dual_timestamp_get(&_n);
+ n = &_n;
+ }
+
+ r = acpi_get_boot_usec(&x, &y);
+ if (r < 0) {
+ r = efi_loader_get_boot_usec(&x, &y);
+ if (r < 0)
+ return r;
+ }
+
+ /* Let's convert this to timestamps where the firmware
+ * began/loader began working. To make this more confusing:
+ * since usec_t is unsigned and the kernel's monotonic clock
+ * begins at kernel initialization we'll actually initialize
+ * the monotonic timestamps here as negative of the actual
+ * value. */
+
+ firmware->monotonic = y;
+ loader->monotonic = y - x;
+
+ a = n->monotonic + firmware->monotonic;
+ firmware->realtime = n->realtime > a ? n->realtime - a : 0;
+
+ a = n->monotonic + loader->monotonic;
+ loader->realtime = n->realtime > a ? n->realtime - a : 0;
+
+ return 0;
+}
diff --git a/src/shared/boot-timestamps.h b/src/shared/boot-timestamps.h
new file mode 100644
index 0000000..55b7ad1
--- /dev/null
+++ b/src/shared/boot-timestamps.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <time-util.h>
+
+int boot_timestamps(const dual_timestamp *n, dual_timestamp *firmware, dual_timestamp *loader);
diff --git a/src/shared/bootspec.c b/src/shared/bootspec.c
new file mode 100644
index 0000000..e50408a
--- /dev/null
+++ b/src/shared/bootspec.c
@@ -0,0 +1,1432 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+#include <linux/magic.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "blkid-util.h"
+#include "bootspec.h"
+#include "conf-files.h"
+#include "def.h"
+#include "device-nodes.h"
+#include "dirent-util.h"
+#include "efivars.h"
+#include "efi-loader.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pe-header.h"
+#include "sort-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unaligned.h"
+#include "util.h"
+#include "virt.h"
+
+static void boot_entry_free(BootEntry *entry) {
+ assert(entry);
+
+ free(entry->id);
+ free(entry->id_old);
+ free(entry->path);
+ free(entry->root);
+ free(entry->title);
+ free(entry->show_title);
+ free(entry->version);
+ free(entry->machine_id);
+ free(entry->architecture);
+ strv_free(entry->options);
+ free(entry->kernel);
+ free(entry->efi);
+ strv_free(entry->initrd);
+ free(entry->device_tree);
+}
+
+static int boot_entry_load(
+ const char *root,
+ const char *path,
+ BootEntry *entry) {
+
+ _cleanup_(boot_entry_free) BootEntry tmp = {
+ .type = BOOT_ENTRY_CONF,
+ };
+
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned line = 1;
+ char *b, *c;
+ int r;
+
+ assert(root);
+ assert(path);
+ assert(entry);
+
+ c = endswith_no_case(path, ".conf");
+ if (!c)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid loader entry file suffix: %s", path);
+
+ b = basename(path);
+ tmp.id = strdup(b);
+ tmp.id_old = strndup(b, c - b);
+ if (!tmp.id || !tmp.id_old)
+ return log_oom();
+
+ if (!efi_loader_entry_name_valid(tmp.id))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid loader entry name: %s", tmp.id);
+
+ tmp.path = strdup(path);
+ if (!tmp.path)
+ return log_oom();
+
+ tmp.root = strdup(root);
+ if (!tmp.root)
+ return log_oom();
+
+ f = fopen(path, "re");
+ if (!f)
+ return log_error_errno(errno, "Failed to open \"%s\": %m", path);
+
+ for (;;) {
+ _cleanup_free_ char *buf = NULL, *field = NULL;
+ const char *p;
+
+ r = read_line(f, LONG_LINE_MAX, &buf);
+ if (r == 0)
+ break;
+ if (r == -ENOBUFS)
+ return log_error_errno(r, "%s:%u: Line too long", path, line);
+ if (r < 0)
+ return log_error_errno(r, "%s:%u: Error while reading: %m", path, line);
+
+ line++;
+
+ if (IN_SET(*strstrip(buf), '#', '\0'))
+ continue;
+
+ p = buf;
+ r = extract_first_word(&p, &field, " \t", 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse config file %s line %u: %m", path, line);
+ continue;
+ }
+ if (r == 0) {
+ log_warning("%s:%u: Bad syntax", path, line);
+ continue;
+ }
+
+ if (streq(field, "title"))
+ r = free_and_strdup(&tmp.title, p);
+ else if (streq(field, "version"))
+ r = free_and_strdup(&tmp.version, p);
+ else if (streq(field, "machine-id"))
+ r = free_and_strdup(&tmp.machine_id, p);
+ else if (streq(field, "architecture"))
+ r = free_and_strdup(&tmp.architecture, p);
+ else if (streq(field, "options"))
+ r = strv_extend(&tmp.options, p);
+ else if (streq(field, "linux"))
+ r = free_and_strdup(&tmp.kernel, p);
+ else if (streq(field, "efi"))
+ r = free_and_strdup(&tmp.efi, p);
+ else if (streq(field, "initrd"))
+ r = strv_extend(&tmp.initrd, p);
+ else if (streq(field, "devicetree"))
+ r = free_and_strdup(&tmp.device_tree, p);
+ else {
+ log_notice("%s:%u: Unknown line \"%s\", ignoring.", path, line, field);
+ continue;
+ }
+ if (r < 0)
+ return log_error_errno(r, "%s:%u: Error while reading: %m", path, line);
+ }
+
+ *entry = tmp;
+ tmp = (BootEntry) {};
+ return 0;
+}
+
+void boot_config_free(BootConfig *config) {
+ size_t i;
+
+ assert(config);
+
+ free(config->default_pattern);
+ free(config->timeout);
+ free(config->editor);
+ free(config->auto_entries);
+ free(config->auto_firmware);
+ free(config->console_mode);
+ free(config->random_seed_mode);
+
+ free(config->entry_oneshot);
+ free(config->entry_default);
+
+ for (i = 0; i < config->n_entries; i++)
+ boot_entry_free(config->entries + i);
+ free(config->entries);
+}
+
+static int boot_loader_read_conf(const char *path, BootConfig *config) {
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned line = 1;
+ int r;
+
+ assert(path);
+ assert(config);
+
+ f = fopen(path, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open \"%s\": %m", path);
+ }
+
+ for (;;) {
+ _cleanup_free_ char *buf = NULL, *field = NULL;
+ const char *p;
+
+ r = read_line(f, LONG_LINE_MAX, &buf);
+ if (r == 0)
+ break;
+ if (r == -ENOBUFS)
+ return log_error_errno(r, "%s:%u: Line too long", path, line);
+ if (r < 0)
+ return log_error_errno(r, "%s:%u: Error while reading: %m", path, line);
+
+ line++;
+
+ if (IN_SET(*strstrip(buf), '#', '\0'))
+ continue;
+
+ p = buf;
+ r = extract_first_word(&p, &field, " \t", 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse config file %s line %u: %m", path, line);
+ continue;
+ }
+ if (r == 0) {
+ log_warning("%s:%u: Bad syntax", path, line);
+ continue;
+ }
+
+ if (streq(field, "default"))
+ r = free_and_strdup(&config->default_pattern, p);
+ else if (streq(field, "timeout"))
+ r = free_and_strdup(&config->timeout, p);
+ else if (streq(field, "editor"))
+ r = free_and_strdup(&config->editor, p);
+ else if (streq(field, "auto-entries"))
+ r = free_and_strdup(&config->auto_entries, p);
+ else if (streq(field, "auto-firmware"))
+ r = free_and_strdup(&config->auto_firmware, p);
+ else if (streq(field, "console-mode"))
+ r = free_and_strdup(&config->console_mode, p);
+ else if (streq(field, "random-seed-mode"))
+ r = free_and_strdup(&config->random_seed_mode, p);
+ else {
+ log_notice("%s:%u: Unknown line \"%s\", ignoring.", path, line, field);
+ continue;
+ }
+ if (r < 0)
+ return log_error_errno(r, "%s:%u: Error while reading: %m", path, line);
+ }
+
+ return 1;
+}
+
+static int boot_entry_compare(const BootEntry *a, const BootEntry *b) {
+ return str_verscmp(a->id, b->id);
+}
+
+static int boot_entries_find(
+ const char *root,
+ const char *dir,
+ BootEntry **entries,
+ size_t *n_entries) {
+
+ _cleanup_strv_free_ char **files = NULL;
+ size_t n_allocated = *n_entries;
+ char **f;
+ int r;
+
+ assert(root);
+ assert(dir);
+ assert(entries);
+ assert(n_entries);
+
+ r = conf_files_list(&files, ".conf", NULL, 0, dir);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list files in \"%s\": %m", dir);
+
+ STRV_FOREACH(f, files) {
+ if (!GREEDY_REALLOC0(*entries, n_allocated, *n_entries + 1))
+ return log_oom();
+
+ r = boot_entry_load(root, *f, *entries + *n_entries);
+ if (r < 0)
+ continue;
+
+ (*n_entries) ++;
+ }
+
+ return 0;
+}
+
+static int boot_entry_load_unified(
+ const char *root,
+ const char *path,
+ const char *osrelease,
+ const char *cmdline,
+ BootEntry *ret) {
+
+ _cleanup_free_ char *os_pretty_name = NULL, *os_id = NULL, *version_id = NULL, *build_id = NULL;
+ _cleanup_(boot_entry_free) BootEntry tmp = {
+ .type = BOOT_ENTRY_UNIFIED,
+ };
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *k;
+ char *b;
+ int r;
+
+ assert(root);
+ assert(path);
+ assert(osrelease);
+
+ k = path_startswith(path, root);
+ if (!k)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Path is not below root: %s", path);
+
+ f = fmemopen_unlocked((void*) osrelease, strlen(osrelease), "r");
+ if (!f)
+ return log_error_errno(errno, "Failed to open os-release buffer: %m");
+
+ r = parse_env_file(f, "os-release",
+ "PRETTY_NAME", &os_pretty_name,
+ "ID", &os_id,
+ "VERSION_ID", &version_id,
+ "BUILD_ID", &build_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse os-release data from unified kernel image %s: %m", path);
+
+ if (!os_pretty_name || !os_id || !(version_id || build_id))
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Missing fields in os-release data from unified kernel image %s, refusing.", path);
+
+ b = basename(path);
+ tmp.id = strdup(b);
+ tmp.id_old = strjoin(os_id, "-", version_id ?: build_id);
+ if (!tmp.id || !tmp.id_old)
+ return log_oom();
+
+ if (!efi_loader_entry_name_valid(tmp.id))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid loader entry name: %s", tmp.id);
+
+ tmp.path = strdup(path);
+ if (!tmp.path)
+ return log_oom();
+
+ tmp.root = strdup(root);
+ if (!tmp.root)
+ return log_oom();
+
+ tmp.kernel = strdup(skip_leading_chars(k, "/"));
+ if (!tmp.kernel)
+ return log_oom();
+
+ tmp.options = strv_new(skip_leading_chars(cmdline, WHITESPACE));
+ if (!tmp.options)
+ return log_oom();
+
+ delete_trailing_chars(tmp.options[0], WHITESPACE);
+
+ tmp.title = TAKE_PTR(os_pretty_name);
+
+ *ret = tmp;
+ tmp = (BootEntry) {};
+ return 0;
+}
+
+/* Maximum PE section we are willing to load (Note that sections we are not interested in may be larger, but
+ * the ones we do care about and we are willing to load into memory have this size limit.) */
+#define PE_SECTION_SIZE_MAX (4U*1024U*1024U)
+
+static int find_sections(
+ int fd,
+ char **ret_osrelease,
+ char **ret_cmdline) {
+
+ _cleanup_free_ struct PeSectionHeader *sections = NULL;
+ _cleanup_free_ char *osrelease = NULL, *cmdline = NULL;
+ size_t i, n_sections;
+ struct DosFileHeader dos;
+ struct PeHeader pe;
+ uint64_t start;
+ ssize_t n;
+
+ n = pread(fd, &dos, sizeof(dos), 0);
+ if (n < 0)
+ return log_error_errno(errno, "Failed read DOS header: %m");
+ if (n != sizeof(dos))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading DOS header, refusing.");
+
+ if (dos.Magic[0] != 'M' || dos.Magic[1] != 'Z')
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "DOS executable magic missing, refusing.");
+
+ start = unaligned_read_le32(&dos.ExeHeader);
+ n = pread(fd, &pe, sizeof(pe), start);
+ if (n < 0)
+ return log_error_errno(errno, "Failed to read PE header: %m");
+ if (n != sizeof(pe))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading PE header, refusing.");
+
+ if (pe.Magic[0] != 'P' || pe.Magic[1] != 'E' || pe.Magic[2] != 0 || pe.Magic[3] != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "PE executable magic missing, refusing.");
+
+ n_sections = unaligned_read_le16(&pe.FileHeader.NumberOfSections);
+ if (n_sections > 96)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "PE header has too many sections, refusing.");
+
+ sections = new(struct PeSectionHeader, n_sections);
+ if (!sections)
+ return log_oom();
+
+ n = pread(fd, sections,
+ n_sections * sizeof(struct PeSectionHeader),
+ start + sizeof(pe) + unaligned_read_le16(&pe.FileHeader.SizeOfOptionalHeader));
+ if (n < 0)
+ return log_error_errno(errno, "Failed to read section data: %m");
+ if ((size_t) n != n_sections * sizeof(struct PeSectionHeader))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading sections, refusing.");
+
+ for (i = 0; i < n_sections; i++) {
+ _cleanup_free_ char *k = NULL;
+ uint32_t offset, size;
+ char **b;
+
+ if (strneq((char*) sections[i].Name, ".osrel", sizeof(sections[i].Name)))
+ b = &osrelease;
+ else if (strneq((char*) sections[i].Name, ".cmdline", sizeof(sections[i].Name)))
+ b = &cmdline;
+ else
+ continue;
+
+ if (*b)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Duplicate section %s, refusing.", sections[i].Name);
+
+ offset = unaligned_read_le32(&sections[i].PointerToRawData);
+ size = unaligned_read_le32(&sections[i].VirtualSize);
+
+ if (size > PE_SECTION_SIZE_MAX)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Section %s too large, refusing.", sections[i].Name);
+
+ k = new(char, size+1);
+ if (!k)
+ return log_oom();
+
+ n = pread(fd, k, size, offset);
+ if (n < 0)
+ return log_error_errno(errno, "Failed to read section payload: %m");
+ if ((size_t) n != size)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading section payload, refusing:");
+
+ /* Allow one trailing NUL byte, but nothing more. */
+ if (size > 0 && memchr(k, 0, size - 1))
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Section contains embedded NUL byte: %m");
+
+ k[size] = 0;
+ *b = TAKE_PTR(k);
+ }
+
+ if (!osrelease)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Image lacks .osrel section, refusing.");
+
+ if (ret_osrelease)
+ *ret_osrelease = TAKE_PTR(osrelease);
+ if (ret_cmdline)
+ *ret_cmdline = TAKE_PTR(cmdline);
+
+ return 0;
+}
+
+static int boot_entries_find_unified(
+ const char *root,
+ const char *dir,
+ BootEntry **entries,
+ size_t *n_entries) {
+
+ _cleanup_(closedirp) DIR *d = NULL;
+ size_t n_allocated = *n_entries;
+ struct dirent *de;
+ int r;
+
+ assert(root);
+ assert(dir);
+ assert(entries);
+ assert(n_entries);
+
+ d = opendir(dir);
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open %s: %m", dir);
+ }
+
+ FOREACH_DIRENT(de, d, return log_error_errno(errno, "Failed to read %s: %m", dir)) {
+ _cleanup_free_ char *j = NULL, *osrelease = NULL, *cmdline = NULL;
+ _cleanup_close_ int fd = -1;
+
+ dirent_ensure_type(d, de);
+ if (!dirent_is_file(de))
+ continue;
+
+ if (!endswith_no_case(de->d_name, ".efi"))
+ continue;
+
+ if (!GREEDY_REALLOC0(*entries, n_allocated, *n_entries + 1))
+ return log_oom();
+
+ fd = openat(dirfd(d), de->d_name, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0) {
+ log_warning_errno(errno, "Failed to open %s/%s, ignoring: %m", dir, de->d_name);
+ continue;
+ }
+
+ r = fd_verify_regular(fd);
+ if (r < 0) {
+ log_warning_errno(r, "File %s/%s is not regular, ignoring: %m", dir, de->d_name);
+ continue;
+ }
+
+ r = find_sections(fd, &osrelease, &cmdline);
+ if (r < 0)
+ continue;
+
+ j = path_join(dir, de->d_name);
+ if (!j)
+ return log_oom();
+
+ r = boot_entry_load_unified(root, j, osrelease, cmdline, *entries + *n_entries);
+ if (r < 0)
+ continue;
+
+ (*n_entries) ++;
+ }
+
+ return 0;
+}
+
+static bool find_nonunique(BootEntry *entries, size_t n_entries, bool *arr) {
+ size_t i, j;
+ bool non_unique = false;
+
+ assert(entries || n_entries == 0);
+ assert(arr || n_entries == 0);
+
+ for (i = 0; i < n_entries; i++)
+ arr[i] = false;
+
+ for (i = 0; i < n_entries; i++)
+ for (j = 0; j < n_entries; j++)
+ if (i != j && streq(boot_entry_title(entries + i),
+ boot_entry_title(entries + j)))
+ non_unique = arr[i] = arr[j] = true;
+
+ return non_unique;
+}
+
+static int boot_entries_uniquify(BootEntry *entries, size_t n_entries) {
+ char *s;
+ size_t i;
+ int r;
+ bool arr[n_entries];
+
+ assert(entries || n_entries == 0);
+
+ /* Find _all_ non-unique titles */
+ if (!find_nonunique(entries, n_entries, arr))
+ return 0;
+
+ /* Add version to non-unique titles */
+ for (i = 0; i < n_entries; i++)
+ if (arr[i] && entries[i].version) {
+ r = asprintf(&s, "%s (%s)", boot_entry_title(entries + i), entries[i].version);
+ if (r < 0)
+ return -ENOMEM;
+
+ free_and_replace(entries[i].show_title, s);
+ }
+
+ if (!find_nonunique(entries, n_entries, arr))
+ return 0;
+
+ /* Add machine-id to non-unique titles */
+ for (i = 0; i < n_entries; i++)
+ if (arr[i] && entries[i].machine_id) {
+ r = asprintf(&s, "%s (%s)", boot_entry_title(entries + i), entries[i].machine_id);
+ if (r < 0)
+ return -ENOMEM;
+
+ free_and_replace(entries[i].show_title, s);
+ }
+
+ if (!find_nonunique(entries, n_entries, arr))
+ return 0;
+
+ /* Add file name to non-unique titles */
+ for (i = 0; i < n_entries; i++)
+ if (arr[i]) {
+ r = asprintf(&s, "%s (%s)", boot_entry_title(entries + i), entries[i].id);
+ if (r < 0)
+ return -ENOMEM;
+
+ free_and_replace(entries[i].show_title, s);
+ }
+
+ return 0;
+}
+
+static int boot_entries_select_default(const BootConfig *config) {
+ int i;
+
+ assert(config);
+ assert(config->entries || config->n_entries == 0);
+
+ if (config->n_entries == 0) {
+ log_debug("Found no default boot entry :(");
+ return -1; /* -1 means "no default" */
+ }
+
+ if (config->entry_oneshot)
+ for (i = config->n_entries - 1; i >= 0; i--)
+ if (streq(config->entry_oneshot, config->entries[i].id)) {
+ log_debug("Found default: id \"%s\" is matched by LoaderEntryOneShot",
+ config->entries[i].id);
+ return i;
+ }
+
+ if (config->entry_default)
+ for (i = config->n_entries - 1; i >= 0; i--)
+ if (streq(config->entry_default, config->entries[i].id)) {
+ log_debug("Found default: id \"%s\" is matched by LoaderEntryDefault",
+ config->entries[i].id);
+ return i;
+ }
+
+ if (config->default_pattern)
+ for (i = config->n_entries - 1; i >= 0; i--)
+ if (fnmatch(config->default_pattern, config->entries[i].id, FNM_CASEFOLD) == 0) {
+ log_debug("Found default: id \"%s\" is matched by pattern \"%s\"",
+ config->entries[i].id, config->default_pattern);
+ return i;
+ }
+
+ log_debug("Found default: last entry \"%s\"", config->entries[config->n_entries - 1].id);
+ return config->n_entries - 1;
+}
+
+int boot_entries_load_config(
+ const char *esp_path,
+ const char *xbootldr_path,
+ BootConfig *config) {
+
+ const char *p;
+ int r;
+
+ assert(config);
+
+ if (esp_path) {
+ p = strjoina(esp_path, "/loader/loader.conf");
+ r = boot_loader_read_conf(p, config);
+ if (r < 0)
+ return r;
+
+ p = strjoina(esp_path, "/loader/entries");
+ r = boot_entries_find(esp_path, p, &config->entries, &config->n_entries);
+ if (r < 0)
+ return r;
+
+ p = strjoina(esp_path, "/EFI/Linux/");
+ r = boot_entries_find_unified(esp_path, p, &config->entries, &config->n_entries);
+ if (r < 0)
+ return r;
+ }
+
+ if (xbootldr_path) {
+ p = strjoina(xbootldr_path, "/loader/entries");
+ r = boot_entries_find(xbootldr_path, p, &config->entries, &config->n_entries);
+ if (r < 0)
+ return r;
+
+ p = strjoina(xbootldr_path, "/EFI/Linux/");
+ r = boot_entries_find_unified(xbootldr_path, p, &config->entries, &config->n_entries);
+ if (r < 0)
+ return r;
+ }
+
+ typesafe_qsort(config->entries, config->n_entries, boot_entry_compare);
+
+ r = boot_entries_uniquify(config->entries, config->n_entries);
+ if (r < 0)
+ return log_error_errno(r, "Failed to uniquify boot entries: %m");
+
+ if (is_efi_boot()) {
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderEntryOneShot", &config->entry_oneshot);
+ if (r < 0 && !IN_SET(r, -ENOENT, -ENODATA)) {
+ log_warning_errno(r, "Failed to read EFI variable \"LoaderEntryOneShot\": %m");
+ if (r == -ENOMEM)
+ return r;
+ }
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderEntryDefault", &config->entry_default);
+ if (r < 0 && !IN_SET(r, -ENOENT, -ENODATA)) {
+ log_warning_errno(r, "Failed to read EFI variable \"LoaderEntryDefault\": %m");
+ if (r == -ENOMEM)
+ return r;
+ }
+ }
+
+ config->default_entry = boot_entries_select_default(config);
+ return 0;
+}
+
+int boot_entries_load_config_auto(
+ const char *override_esp_path,
+ const char *override_xbootldr_path,
+ BootConfig *config) {
+
+ _cleanup_free_ char *esp_where = NULL, *xbootldr_where = NULL;
+ int r;
+
+ assert(config);
+
+ /* This function is similar to boot_entries_load_config(), however we automatically search for the
+ * ESP and the XBOOTLDR partition unless it is explicitly specified. Also, if the user did not pass
+ * an ESP or XBOOTLDR path directly, let's see if /run/boot-loader-entries/ exists. If so, let's
+ * read data from there, as if it was an ESP (i.e. loading both entries and loader.conf data from
+ * it). This allows other boot loaders to pass boot loader entry information to our tools if they
+ * want to. */
+
+ if (!override_esp_path && !override_xbootldr_path) {
+ if (access("/run/boot-loader-entries/", F_OK) >= 0)
+ return boot_entries_load_config("/run/boot-loader-entries/", NULL, config);
+
+ if (errno != ENOENT)
+ return log_error_errno(errno,
+ "Failed to determine whether /run/boot-loader-entries/ exists: %m");
+ }
+
+ r = find_esp_and_warn(override_esp_path, false, &esp_where, NULL, NULL, NULL, NULL);
+ if (r < 0) /* we don't log about ENOKEY here, but propagate it, leaving it to the caller to log */
+ return r;
+
+ r = find_xbootldr_and_warn(override_xbootldr_path, false, &xbootldr_where, NULL);
+ if (r < 0 && r != -ENOKEY)
+ return r; /* It's fine if the XBOOTLDR partition doesn't exist, hence we ignore ENOKEY here */
+
+ return boot_entries_load_config(esp_where, xbootldr_where, config);
+}
+
+int boot_entries_augment_from_loader(
+ BootConfig *config,
+ char **found_by_loader,
+ bool only_auto) {
+
+ static const char *const title_table[] = {
+ /* Pretty names for a few well-known automatically discovered entries. */
+ "auto-osx", "macOS",
+ "auto-windows", "Windows Boot Manager",
+ "auto-efi-shell", "EFI Shell",
+ "auto-efi-default", "EFI Default Loader",
+ "auto-reboot-to-firmware-setup", "Reboot Into Firmware Interface",
+ };
+
+ size_t n_allocated;
+ char **i;
+
+ assert(config);
+
+ /* Let's add the entries discovered by the boot loader to the end of our list, unless they are
+ * already included there. */
+
+ n_allocated = config->n_entries;
+
+ STRV_FOREACH(i, found_by_loader) {
+ _cleanup_free_ char *c = NULL, *t = NULL, *p = NULL;
+ char **a, **b;
+
+ if (boot_config_has_entry(config, *i))
+ continue;
+
+ if (only_auto && !startswith(*i, "auto-"))
+ continue;
+
+ c = strdup(*i);
+ if (!c)
+ return log_oom();
+
+ STRV_FOREACH_PAIR(a, b, (char**) title_table)
+ if (streq(*a, *i)) {
+ t = strdup(*b);
+ if (!t)
+ return log_oom();
+ break;
+ }
+
+ p = efi_variable_path(EFI_VENDOR_LOADER, "LoaderEntries");
+ if (!p)
+ return log_oom();
+
+ if (!GREEDY_REALLOC0(config->entries, n_allocated, config->n_entries + 1))
+ return log_oom();
+
+ config->entries[config->n_entries++] = (BootEntry) {
+ .type = BOOT_ENTRY_LOADER,
+ .id = TAKE_PTR(c),
+ .title = TAKE_PTR(t),
+ .path = TAKE_PTR(p),
+ };
+ }
+
+ return 0;
+}
+
+/********************************************************************************/
+
+static int verify_esp_blkid(
+ dev_t devid,
+ bool searching,
+ uint32_t *ret_part,
+ uint64_t *ret_pstart,
+ uint64_t *ret_psize,
+ sd_id128_t *ret_uuid) {
+
+ sd_id128_t uuid = SD_ID128_NULL;
+ uint64_t pstart = 0, psize = 0;
+ uint32_t part = 0;
+
+#if HAVE_BLKID
+ _cleanup_(blkid_free_probep) blkid_probe b = NULL;
+ _cleanup_free_ char *node = NULL;
+ const char *v;
+ int r;
+
+ r = device_path_make_major_minor(S_IFBLK, devid, &node);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format major/minor device path: %m");
+
+ errno = 0;
+ b = blkid_new_probe_from_filename(node);
+ if (!b)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(ENOMEM), "Failed to open file system \"%s\": %m", node);
+
+ blkid_probe_enable_superblocks(b, 1);
+ blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
+ blkid_probe_enable_partitions(b, 1);
+ blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
+
+ errno = 0;
+ r = blkid_do_safeprobe(b);
+ if (r == -2)
+ return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "File system \"%s\" is ambiguous.", node);
+ else if (r == 1)
+ return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "File system \"%s\" does not contain a label.", node);
+ else if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe file system \"%s\": %m", node);
+
+ r = blkid_probe_lookup_value(b, "TYPE", &v, NULL);
+ if (r != 0)
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "No filesystem found on \"%s\": %m", node);
+ if (!streq(v, "vfat"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "File system \"%s\" is not FAT.", node);
+
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_SCHEME", &v, NULL);
+ if (r != 0)
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "File system \"%s\" is not located on a partitioned block device.", node);
+ if (!streq(v, "gpt"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "File system \"%s\" is not on a GPT partition table.", node);
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_TYPE", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: EIO, "Failed to probe partition type UUID of \"%s\": %m", node);
+ if (!streq(v, "c12a7328-f81f-11d2-ba4b-00a0c93ec93b"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "File system \"%s\" has wrong type for an EFI System Partition (ESP).", node);
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_UUID", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition entry UUID of \"%s\": %m", node);
+ r = sd_id128_from_string(v, &uuid);
+ if (r < 0)
+ return log_error_errno(r, "Partition \"%s\" has invalid UUID \"%s\".", node, v);
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_NUMBER", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition number of \"%s\": %m", node);
+ r = safe_atou32(v, &part);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PART_ENTRY_NUMBER field.");
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_OFFSET", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition offset of \"%s\": %m", node);
+ r = safe_atou64(v, &pstart);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PART_ENTRY_OFFSET field.");
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_SIZE", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition size of \"%s\": %m", node);
+ r = safe_atou64(v, &psize);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PART_ENTRY_SIZE field.");
+#endif
+
+ if (ret_part)
+ *ret_part = part;
+ if (ret_pstart)
+ *ret_pstart = pstart;
+ if (ret_psize)
+ *ret_psize = psize;
+ if (ret_uuid)
+ *ret_uuid = uuid;
+
+ return 0;
+}
+
+static int verify_esp_udev(
+ dev_t devid,
+ bool searching,
+ uint32_t *ret_part,
+ uint64_t *ret_pstart,
+ uint64_t *ret_psize,
+ sd_id128_t *ret_uuid) {
+
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ _cleanup_free_ char *node = NULL;
+ sd_id128_t uuid = SD_ID128_NULL;
+ uint64_t pstart = 0, psize = 0;
+ uint32_t part = 0;
+ const char *v;
+ int r;
+
+ r = device_path_make_major_minor(S_IFBLK, devid, &node);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format major/minor device path: %m");
+
+ r = sd_device_new_from_devnum(&d, 'b', devid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device from device number: %m");
+
+ r = sd_device_get_property_value(d, "ID_FS_TYPE", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ if (!streq(v, "vfat"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "File system \"%s\" is not FAT.", node );
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_SCHEME", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ if (!streq(v, "gpt"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "File system \"%s\" is not on a GPT partition table.", node);
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_TYPE", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ if (!streq(v, "c12a7328-f81f-11d2-ba4b-00a0c93ec93b"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "File system \"%s\" has wrong type for an EFI System Partition (ESP).", node);
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_UUID", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ r = sd_id128_from_string(v, &uuid);
+ if (r < 0)
+ return log_error_errno(r, "Partition \"%s\" has invalid UUID \"%s\".", node, v);
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_NUMBER", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ r = safe_atou32(v, &part);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PART_ENTRY_NUMBER field.");
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_OFFSET", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ r = safe_atou64(v, &pstart);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PART_ENTRY_OFFSET field.");
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_SIZE", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ r = safe_atou64(v, &psize);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PART_ENTRY_SIZE field.");
+
+ if (ret_part)
+ *ret_part = part;
+ if (ret_pstart)
+ *ret_pstart = pstart;
+ if (ret_psize)
+ *ret_psize = psize;
+ if (ret_uuid)
+ *ret_uuid = uuid;
+
+ return 0;
+}
+
+static int verify_fsroot_dir(
+ const char *path,
+ bool searching,
+ bool unprivileged_mode,
+ dev_t *ret_dev) {
+
+ struct stat st, st2;
+ const char *t2, *trigger;
+ int r;
+
+ assert(path);
+ assert(ret_dev);
+
+ /* So, the ESP and XBOOTLDR partition are commonly located on an autofs mount. stat() on the
+ * directory won't trigger it, if it is not mounted yet. Let's hence explicitly trigger it here,
+ * before stat()ing */
+ trigger = strjoina(path, "/trigger"); /* Filename doesn't matter... */
+ (void) access(trigger, F_OK);
+
+ if (stat(path, &st) < 0)
+ return log_full_errno((searching && errno == ENOENT) ||
+ (unprivileged_mode && errno == EACCES) ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to determine block device node of \"%s\": %m", path);
+
+ if (major(st.st_dev) == 0)
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "Block device node of \"%s\" is invalid.", path);
+
+ t2 = strjoina(path, "/..");
+ if (stat(t2, &st2) < 0) {
+ if (errno != EACCES)
+ r = -errno;
+ else {
+ _cleanup_free_ char *parent = NULL;
+
+ /* If going via ".." didn't work due to EACCESS, then let's determine the parent path
+ * directly instead. It's not as good, due to symlinks and such, but we can't do
+ * anything better here. */
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return log_oom();
+
+ if (stat(parent, &st2) < 0)
+ r = -errno;
+ else
+ r = 0;
+ }
+
+ if (r < 0)
+ return log_full_errno(unprivileged_mode && r == -EACCES ? LOG_DEBUG : LOG_ERR, r,
+ "Failed to determine block device node of parent of \"%s\": %m", path);
+ }
+
+ if (st.st_dev == st2.st_dev)
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "Directory \"%s\" is not the root of the file system.", path);
+
+ if (ret_dev)
+ *ret_dev = st.st_dev;
+
+ return 0;
+}
+
+static int verify_esp(
+ const char *p,
+ bool searching,
+ bool unprivileged_mode,
+ uint32_t *ret_part,
+ uint64_t *ret_pstart,
+ uint64_t *ret_psize,
+ sd_id128_t *ret_uuid) {
+
+ bool relax_checks;
+ dev_t devid;
+ int r;
+
+ assert(p);
+
+ /* This logs about all errors, except:
+ *
+ * -ENOENT → if 'searching' is set, and the dir doesn't exist
+ * -EADDRNOTAVAIL → if 'searching' is set, and the dir doesn't look like an ESP
+ * -EACESS → if 'unprivileged_mode' is set, and we have trouble accessing the thing
+ */
+
+ relax_checks = getenv_bool("SYSTEMD_RELAX_ESP_CHECKS") > 0;
+
+ /* Non-root user can only check the status, so if an error occurred in the following, it does not cause any
+ * issues. Let's also, silence the error messages. */
+
+ if (!relax_checks) {
+ struct statfs sfs;
+
+ if (statfs(p, &sfs) < 0)
+ /* If we are searching for the mount point, don't generate a log message if we can't find the path */
+ return log_full_errno((searching && errno == ENOENT) ||
+ (unprivileged_mode && errno == EACCES) ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to check file system type of \"%s\": %m", p);
+
+ if (!F_TYPE_EQUAL(sfs.f_type, MSDOS_SUPER_MAGIC))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "File system \"%s\" is not a FAT EFI System Partition (ESP) file system.", p);
+ }
+
+ r = verify_fsroot_dir(p, searching, unprivileged_mode, &devid);
+ if (r < 0)
+ return r;
+
+ /* In a container we don't have access to block devices, skip this part of the verification, we trust
+ * the container manager set everything up correctly on its own. */
+ if (detect_container() > 0 || relax_checks)
+ goto finish;
+
+ /* If we are unprivileged we ask udev for the metadata about the partition. If we are privileged we
+ * use blkid instead. Why? Because this code is called from 'bootctl' which is pretty much an
+ * emergency recovery tool that should also work when udev isn't up (i.e. from the emergency shell),
+ * however blkid can't work if we have no privileges to access block devices directly, which is why
+ * we use udev in that case. */
+ if (unprivileged_mode)
+ return verify_esp_udev(devid, searching, ret_part, ret_pstart, ret_psize, ret_uuid);
+ else
+ return verify_esp_blkid(devid, searching, ret_part, ret_pstart, ret_psize, ret_uuid);
+
+finish:
+ if (ret_part)
+ *ret_part = 0;
+ if (ret_pstart)
+ *ret_pstart = 0;
+ if (ret_psize)
+ *ret_psize = 0;
+ if (ret_uuid)
+ *ret_uuid = SD_ID128_NULL;
+
+ return 0;
+}
+
+int find_esp_and_warn(
+ const char *path,
+ bool unprivileged_mode,
+ char **ret_path,
+ uint32_t *ret_part,
+ uint64_t *ret_pstart,
+ uint64_t *ret_psize,
+ sd_id128_t *ret_uuid) {
+
+ int r;
+
+ /* This logs about all errors except:
+ *
+ * -ENOKEY → when we can't find the partition
+ * -EACCESS → when unprivileged_mode is true, and we can't access something
+ */
+
+ if (path) {
+ r = verify_esp(path, false, unprivileged_mode, ret_part, ret_pstart, ret_psize, ret_uuid);
+ if (r < 0)
+ return r;
+
+ goto found;
+ }
+
+ path = getenv("SYSTEMD_ESP_PATH");
+ if (path) {
+ if (!path_is_valid(path) || !path_is_absolute(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "$SYSTEMD_ESP_PATH does not refer to absolute path, refusing to use it: %s",
+ path);
+
+ /* Note: when the user explicitly configured things with an env var we won't validate the mount
+ * point. After all we want this to be useful for testing. */
+ goto found;
+ }
+
+ FOREACH_STRING(path, "/efi", "/boot", "/boot/efi") {
+
+ r = verify_esp(path, true, unprivileged_mode, ret_part, ret_pstart, ret_psize, ret_uuid);
+ if (r >= 0)
+ goto found;
+ if (!IN_SET(r, -ENOENT, -EADDRNOTAVAIL)) /* This one is not it */
+ return r;
+ }
+
+ /* No logging here */
+ return -ENOKEY;
+
+found:
+ if (ret_path) {
+ char *c;
+
+ c = strdup(path);
+ if (!c)
+ return log_oom();
+
+ *ret_path = c;
+ }
+
+ return 0;
+}
+
+static int verify_xbootldr_blkid(
+ dev_t devid,
+ bool searching,
+ sd_id128_t *ret_uuid) {
+
+ sd_id128_t uuid = SD_ID128_NULL;
+
+#if HAVE_BLKID
+ _cleanup_(blkid_free_probep) blkid_probe b = NULL;
+ _cleanup_free_ char *node = NULL;
+ const char *v;
+ int r;
+
+ r = device_path_make_major_minor(S_IFBLK, devid, &node);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format major/minor device path: %m");
+ errno = 0;
+ b = blkid_new_probe_from_filename(node);
+ if (!b)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(ENOMEM), "Failed to open file system \"%s\": %m", node);
+
+ blkid_probe_enable_partitions(b, 1);
+ blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
+
+ errno = 0;
+ r = blkid_do_safeprobe(b);
+ if (r == -2)
+ return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "File system \"%s\" is ambiguous.", node);
+ else if (r == 1)
+ return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "File system \"%s\" does not contain a label.", node);
+ else if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe file system \"%s\": %m", node);
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_SCHEME", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition scheme of \"%s\": %m", node);
+ if (streq(v, "gpt")) {
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_TYPE", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition type UUID of \"%s\": %m", node);
+ if (!streq(v, "bc13c2ff-59e6-4262-a352-b275fd6f7172"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV),
+ "File system \"%s\" has wrong type for extended boot loader partition.", node);
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_UUID", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition entry UUID of \"%s\": %m", node);
+ r = sd_id128_from_string(v, &uuid);
+ if (r < 0)
+ return log_error_errno(r, "Partition \"%s\" has invalid UUID \"%s\".", node, v);
+
+ } else if (streq(v, "dos")) {
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_TYPE", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition type UUID of \"%s\": %m", node);
+ if (!streq(v, "0xea"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV),
+ "File system \"%s\" has wrong type for extended boot loader partition.", node);
+
+ } else
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV),
+ "File system \"%s\" is not on a GPT or DOS partition table.", node);
+#endif
+
+ if (ret_uuid)
+ *ret_uuid = uuid;
+
+ return 0;
+}
+
+static int verify_xbootldr_udev(
+ dev_t devid,
+ bool searching,
+ sd_id128_t *ret_uuid) {
+
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ _cleanup_free_ char *node = NULL;
+ sd_id128_t uuid = SD_ID128_NULL;
+ const char *v;
+ int r;
+
+ r = device_path_make_major_minor(S_IFBLK, devid, &node);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format major/minor device path: %m");
+
+ r = sd_device_new_from_devnum(&d, 'b', devid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device from device number: %m");
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_SCHEME", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+
+ if (streq(v, "gpt")) {
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_TYPE", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ if (!streq(v, "bc13c2ff-59e6-4262-a352-b275fd6f7172"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV),
+ "File system \"%s\" has wrong type for extended boot loader partition.", node);
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_UUID", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ r = sd_id128_from_string(v, &uuid);
+ if (r < 0)
+ return log_error_errno(r, "Partition \"%s\" has invalid UUID \"%s\".", node, v);
+
+ } else if (streq(v, "dos")) {
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_TYPE", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ if (!streq(v, "0xea"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV),
+ "File system \"%s\" has wrong type for extended boot loader partition.", node);
+ } else
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV),
+ "File system \"%s\" is not on a GPT or DOS partition table.", node);
+
+ if (ret_uuid)
+ *ret_uuid = uuid;
+
+ return 0;
+}
+
+static int verify_xbootldr(
+ const char *p,
+ bool searching,
+ bool unprivileged_mode,
+ sd_id128_t *ret_uuid) {
+
+ bool relax_checks;
+ dev_t devid;
+ int r;
+
+ assert(p);
+
+ relax_checks = getenv_bool("SYSTEMD_RELAX_XBOOTLDR_CHECKS") > 0;
+
+ r = verify_fsroot_dir(p, searching, unprivileged_mode, &devid);
+ if (r < 0)
+ return r;
+
+ if (detect_container() > 0 || relax_checks)
+ goto finish;
+
+ if (unprivileged_mode)
+ return verify_xbootldr_udev(devid, searching, ret_uuid);
+ else
+ return verify_xbootldr_blkid(devid, searching, ret_uuid);
+
+finish:
+ if (ret_uuid)
+ *ret_uuid = SD_ID128_NULL;
+
+ return 0;
+}
+
+int find_xbootldr_and_warn(
+ const char *path,
+ bool unprivileged_mode,
+ char **ret_path,
+ sd_id128_t *ret_uuid) {
+
+ int r;
+
+ /* Similar to find_esp_and_warn(), but finds the XBOOTLDR partition. Returns the same errors. */
+
+ if (path) {
+ r = verify_xbootldr(path, false, unprivileged_mode, ret_uuid);
+ if (r < 0)
+ return r;
+
+ goto found;
+ }
+
+ path = getenv("SYSTEMD_XBOOTLDR_PATH");
+ if (path) {
+ if (!path_is_valid(path) || !path_is_absolute(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "$SYSTEMD_XBOOTLDR_PATH does not refer to absolute path, refusing to use it: %s",
+ path);
+
+ goto found;
+ }
+
+ r = verify_xbootldr("/boot", true, unprivileged_mode, ret_uuid);
+ if (r >= 0) {
+ path = "/boot";
+ goto found;
+ }
+ if (!IN_SET(r, -ENOENT, -EADDRNOTAVAIL)) /* This one is not it */
+ return r;
+
+ return -ENOKEY;
+
+found:
+ if (ret_path) {
+ char *c;
+
+ c = strdup(path);
+ if (!c)
+ return log_oom();
+
+ *ret_path = c;
+ }
+
+ return 0;
+}
diff --git a/src/shared/bootspec.h b/src/shared/bootspec.h
new file mode 100644
index 0000000..1557bd0
--- /dev/null
+++ b/src/shared/bootspec.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-id128.h"
+
+#include "string-util.h"
+
+typedef enum BootEntryType {
+ BOOT_ENTRY_CONF, /* Type #1 entries: *.conf files */
+ BOOT_ENTRY_UNIFIED, /* Type #2 entries: *.efi files */
+ BOOT_ENTRY_LOADER, /* Additional entries augmented from LoaderEntries EFI var */
+ _BOOT_ENTRY_MAX,
+ _BOOT_ENTRY_INVALID = -1,
+} BootEntryType;
+
+typedef struct BootEntry {
+ BootEntryType type;
+ char *id; /* This is the file basename without extension */
+ char *id_old; /* Old-style ID, for deduplication purposes. */
+ char *path; /* This is the full path to the drop-in file */
+ char *root; /* The root path in which the drop-in was found, i.e. to which 'kernel', 'efi' and 'initrd' are relative */
+ char *title;
+ char *show_title;
+ char *version;
+ char *machine_id;
+ char *architecture;
+ char **options;
+ char *kernel; /* linux is #defined to 1, yikes! */
+ char *efi;
+ char **initrd;
+ char *device_tree;
+} BootEntry;
+
+typedef struct BootConfig {
+ char *default_pattern;
+ char *timeout;
+ char *editor;
+ char *auto_entries;
+ char *auto_firmware;
+ char *console_mode;
+ char *random_seed_mode;
+
+ char *entry_oneshot;
+ char *entry_default;
+
+ BootEntry *entries;
+ size_t n_entries;
+ ssize_t default_entry;
+} BootConfig;
+
+static inline bool boot_config_has_entry(BootConfig *config, const char *id) {
+ size_t j;
+
+ for (j = 0; j < config->n_entries; j++) {
+ const char* entry_id_old = config->entries[j].id_old;
+ if (streq(config->entries[j].id, id) ||
+ (entry_id_old && streq(entry_id_old, id)))
+ return true;
+ }
+
+ return false;
+}
+
+static inline BootEntry* boot_config_default_entry(BootConfig *config) {
+ if (config->default_entry < 0)
+ return NULL;
+
+ return config->entries + config->default_entry;
+}
+
+void boot_config_free(BootConfig *config);
+int boot_entries_load_config(const char *esp_path, const char *xbootldr_path, BootConfig *config);
+int boot_entries_load_config_auto(const char *override_esp_path, const char *override_xbootldr_path, BootConfig *config);
+int boot_entries_augment_from_loader(BootConfig *config, char **list, bool only_auto);
+
+static inline const char* boot_entry_title(const BootEntry *entry) {
+ return entry->show_title ?: entry->title ?: entry->id;
+}
+
+int find_esp_and_warn(const char *path, bool unprivileged_mode, char **ret_path, uint32_t *ret_part, uint64_t *ret_pstart, uint64_t *ret_psize, sd_id128_t *ret_uuid);
+int find_xbootldr_and_warn(const char *path, bool unprivileged_mode, char **ret_path,sd_id128_t *ret_uuid);
diff --git a/src/shared/bpf-program.c b/src/shared/bpf-program.c
new file mode 100644
index 0000000..1023914
--- /dev/null
+++ b/src/shared/bpf-program.c
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bpf-program.h"
+#include "fd-util.h"
+#include "memory-util.h"
+#include "missing_syscall.h"
+#include "path-util.h"
+
+int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
+ _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
+
+ p = new0(BPFProgram, 1);
+ if (!p)
+ return -ENOMEM;
+
+ p->n_ref = 1;
+ p->prog_type = prog_type;
+ p->kernel_fd = -1;
+
+ *ret = TAKE_PTR(p);
+
+ return 0;
+}
+
+static BPFProgram *bpf_program_free(BPFProgram *p) {
+ assert(p);
+
+ /* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last
+ * fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated
+ * programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with
+ * zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in
+ * question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during
+ * operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To
+ * counter this, we track closely to which cgroup a program was attached to and will detach it on our own
+ * whenever we close the BPF fd. */
+ (void) bpf_program_cgroup_detach(p);
+
+ safe_close(p->kernel_fd);
+ free(p->instructions);
+ free(p->attached_path);
+
+ return mfree(p);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(BPFProgram, bpf_program, bpf_program_free);
+
+int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) {
+
+ assert(p);
+
+ if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */
+ return -EBUSY;
+
+ if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count))
+ return -ENOMEM;
+
+ memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
+ p->n_instructions += count;
+
+ return 0;
+}
+
+int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) {
+ union bpf_attr attr;
+
+ assert(p);
+
+ if (p->kernel_fd >= 0) { /* make this idempotent */
+ memzero(log_buf, log_size);
+ return 0;
+ }
+
+ // FIXME: Clang doesn't 0-pad with structured initialization, causing
+ // the kernel to reject the bpf_attr as invalid. See:
+ // https://github.com/torvalds/linux/blob/v5.9/kernel/bpf/syscall.c#L65
+ // Ideally it should behave like GCC, so that we can remove these workarounds.
+ zero(attr);
+ attr.prog_type = p->prog_type;
+ attr.insns = PTR_TO_UINT64(p->instructions);
+ attr.insn_cnt = p->n_instructions;
+ attr.license = PTR_TO_UINT64("GPL");
+ attr.log_buf = PTR_TO_UINT64(log_buf);
+ attr.log_level = !!log_buf;
+ attr.log_size = log_size;
+
+ p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (p->kernel_fd < 0)
+ return -errno;
+
+ return 0;
+}
+
+int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path) {
+ union bpf_attr attr;
+
+ assert(p);
+
+ if (p->kernel_fd >= 0) /* don't overwrite an assembled or loaded program */
+ return -EBUSY;
+
+ zero(attr);
+ attr.pathname = PTR_TO_UINT64(path);
+
+ p->kernel_fd = bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+ if (p->kernel_fd < 0)
+ return -errno;
+
+ return 0;
+}
+
+int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) {
+ _cleanup_free_ char *copy = NULL;
+ _cleanup_close_ int fd = -1;
+ union bpf_attr attr;
+ int r;
+
+ assert(p);
+ assert(type >= 0);
+ assert(path);
+
+ if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI))
+ return -EINVAL;
+
+ /* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's
+ * refuse this early. */
+ if (p->attached_path) {
+ if (!path_equal(p->attached_path, path))
+ return -EBUSY;
+ if (p->attached_type != type)
+ return -EBUSY;
+ if (p->attached_flags != flags)
+ return -EBUSY;
+
+ /* Here's a shortcut: if we previously attached this program already, then we don't have to do so
+ * again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have
+ * replaced our program since the last time, hence let's reattach it again, just to be safe. In flags
+ * == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags
+ * == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours
+ * would remain in effect. */
+ if (flags != BPF_F_ALLOW_OVERRIDE)
+ return 0;
+ }
+
+ /* Ensure we have a kernel object for this. */
+ r = bpf_program_load_kernel(p, NULL, 0);
+ if (r < 0)
+ return r;
+
+ copy = strdup(path);
+ if (!copy)
+ return -ENOMEM;
+
+ fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ zero(attr);
+ attr.attach_type = type;
+ attr.target_fd = fd;
+ attr.attach_bpf_fd = p->kernel_fd;
+ attr.attach_flags = flags;
+
+ if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
+ return -errno;
+
+ free_and_replace(p->attached_path, copy);
+ p->attached_type = type;
+ p->attached_flags = flags;
+
+ return 0;
+}
+
+int bpf_program_cgroup_detach(BPFProgram *p) {
+ _cleanup_close_ int fd = -1;
+
+ assert(p);
+
+ if (!p->attached_path)
+ return -EUNATCH;
+
+ fd = open(p->attached_path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+ if (fd < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ /* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached
+ * implicitly by the removal, hence don't complain */
+
+ } else {
+ union bpf_attr attr;
+
+ zero(attr);
+ attr.attach_type = p->attached_type;
+ attr.target_fd = fd;
+ attr.attach_bpf_fd = p->kernel_fd;
+
+ if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
+ return -errno;
+ }
+
+ p->attached_path = mfree(p->attached_path);
+
+ return 0;
+}
+
+int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
+ union bpf_attr attr;
+ int fd;
+
+ zero(attr);
+ attr.map_type = type;
+ attr.key_size = key_size;
+ attr.value_size = value_size;
+ attr.max_entries = max_entries;
+ attr.map_flags = flags;
+
+ fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
+
+int bpf_map_update_element(int fd, const void *key, void *value) {
+ union bpf_attr attr;
+
+ zero(attr);
+ attr.map_fd = fd;
+ attr.key = PTR_TO_UINT64(key);
+ attr.value = PTR_TO_UINT64(value);
+
+ if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int bpf_map_lookup_element(int fd, const void *key, void *value) {
+ union bpf_attr attr;
+
+ zero(attr);
+ attr.map_fd = fd;
+ attr.key = PTR_TO_UINT64(key);
+ attr.value = PTR_TO_UINT64(value);
+
+ if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/shared/bpf-program.h b/src/shared/bpf-program.h
new file mode 100644
index 0000000..eef77f9
--- /dev/null
+++ b/src/shared/bpf-program.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <sys/syscall.h>
+
+#include "list.h"
+#include "macro.h"
+
+typedef struct BPFProgram BPFProgram;
+
+struct BPFProgram {
+ unsigned n_ref;
+
+ int kernel_fd;
+ uint32_t prog_type;
+
+ size_t n_instructions;
+ size_t allocated;
+ struct bpf_insn *instructions;
+
+ char *attached_path;
+ int attached_type;
+ uint32_t attached_flags;
+};
+
+int bpf_program_new(uint32_t prog_type, BPFProgram **ret);
+BPFProgram *bpf_program_unref(BPFProgram *p);
+BPFProgram *bpf_program_ref(BPFProgram *p);
+
+int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *insn, size_t count);
+int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size);
+int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path);
+
+int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags);
+int bpf_program_cgroup_detach(BPFProgram *p);
+
+int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags);
+int bpf_map_update_element(int fd, const void *key, void *value);
+int bpf_map_lookup_element(int fd, const void *key, void *value);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(BPFProgram*, bpf_program_unref);
diff --git a/src/shared/bridge-util.c b/src/shared/bridge-util.c
new file mode 100644
index 0000000..e1a8bcb
--- /dev/null
+++ b/src/shared/bridge-util.c
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bridge-util.h"
+#include "string-table.h"
+
+static const char* const bridge_state_table[_NETDEV_BRIDGE_STATE_MAX] = {
+ [NETDEV_BRIDGE_STATE_DISABLED] = "disabled",
+ [NETDEV_BRIDGE_STATE_LISTENING] = "listening",
+ [NETDEV_BRIDGE_STATE_LEARNING] = "learning",
+ [NETDEV_BRIDGE_STATE_FORWARDING] = "forwarding",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bridge_state, BridgeState);
diff --git a/src/shared/bridge-util.h b/src/shared/bridge-util.h
new file mode 100644
index 0000000..c9b02d8
--- /dev/null
+++ b/src/shared/bridge-util.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <netinet/in.h>
+#include <linux/if_bridge.h>
+
+#include "conf-parser.h"
+
+typedef enum BridgeState {
+ NETDEV_BRIDGE_STATE_DISABLED = BR_STATE_DISABLED,
+ NETDEV_BRIDGE_STATE_LISTENING = BR_STATE_LISTENING,
+ NETDEV_BRIDGE_STATE_LEARNING = BR_STATE_LEARNING,
+ NETDEV_BRIDGE_STATE_FORWARDING = BR_STATE_FORWARDING,
+ NETDEV_BRIDGE_STATE_BLOCKING = BR_STATE_BLOCKING,
+ _NETDEV_BRIDGE_STATE_MAX,
+ _NETDEV_BRIDGE_STATE_INVALID = -1,
+} BridgeState;
+
+const char *bridge_state_to_string(BridgeState d) _const_;
+BridgeState bridge_state_from_string(const char *d) _pure_;
diff --git a/src/shared/bus-get-properties.c b/src/shared/bus-get-properties.c
new file mode 100644
index 0000000..32f68d5
--- /dev/null
+++ b/src/shared/bus-get-properties.c
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-get-properties.h"
+#include "rlimit-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+int bus_property_get_bool(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int b = *(bool*) userdata;
+
+ return sd_bus_message_append_basic(reply, 'b', &b);
+}
+
+int bus_property_set_bool(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *value,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int b, r;
+
+ r = sd_bus_message_read(value, "b", &b);
+ if (r < 0)
+ return r;
+
+ *(bool*) userdata = b;
+ return 0;
+}
+
+int bus_property_get_id128(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ sd_id128_t *id = userdata;
+
+ if (sd_id128_is_null(*id)) /* Add an empty array if the ID is zero */
+ return sd_bus_message_append(reply, "ay", 0);
+ else
+ return sd_bus_message_append_array(reply, 'y', id->bytes, 16);
+}
+
+int bus_property_get_percent(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ char pstr[DECIMAL_STR_MAX(int) + 2];
+ int p = *(int*) userdata;
+
+ xsprintf(pstr, "%d%%", p);
+
+ return sd_bus_message_append_basic(reply, 's', pstr);
+}
+
+#if __SIZEOF_SIZE_T__ != 8
+int bus_property_get_size(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ uint64_t sz = *(size_t*) userdata;
+
+ return sd_bus_message_append_basic(reply, 't', &sz);
+}
+#endif
+
+#if __SIZEOF_LONG__ != 8
+int bus_property_get_long(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int64_t l = *(long*) userdata;
+
+ return sd_bus_message_append_basic(reply, 'x', &l);
+}
+
+int bus_property_get_ulong(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ uint64_t ul = *(unsigned long*) userdata;
+
+ return sd_bus_message_append_basic(reply, 't', &ul);
+}
+#endif
+
+int bus_property_get_rlimit(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ const char *is_soft;
+ struct rlimit *rl;
+ uint64_t u;
+ rlim_t x;
+
+ assert(bus);
+ assert(reply);
+ assert(userdata);
+
+ is_soft = endswith(property, "Soft");
+
+ rl = *(struct rlimit**) userdata;
+ if (rl)
+ x = is_soft ? rl->rlim_cur : rl->rlim_max;
+ else {
+ struct rlimit buf = {};
+ const char *s, *p;
+ int z;
+
+ /* Chop off "Soft" suffix */
+ s = is_soft ? strndupa(property, is_soft - property) : property;
+
+ /* Skip over any prefix, such as "Default" */
+ assert_se(p = strstr(s, "Limit"));
+
+ z = rlimit_from_string(p + 5);
+ assert(z >= 0);
+
+ (void) getrlimit(z, &buf);
+ x = is_soft ? buf.rlim_cur : buf.rlim_max;
+ }
+
+ /* rlim_t might have different sizes, let's map RLIMIT_INFINITY to (uint64_t) -1, so that it is the same on all
+ * archs */
+ u = x == RLIM_INFINITY ? (uint64_t) -1 : (uint64_t) x;
+
+ return sd_bus_message_append(reply, "t", u);
+}
diff --git a/src/shared/bus-get-properties.h b/src/shared/bus-get-properties.h
new file mode 100644
index 0000000..9832c0d
--- /dev/null
+++ b/src/shared/bus-get-properties.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "macro.h"
+
+int bus_property_get_bool(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+int bus_property_set_bool(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error);
+int bus_property_get_id128(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+int bus_property_get_percent(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+
+#define bus_property_get_usec ((sd_bus_property_get_t) NULL)
+#define bus_property_set_usec ((sd_bus_property_set_t) NULL)
+
+assert_cc(sizeof(int) == sizeof(int32_t));
+#define bus_property_get_int ((sd_bus_property_get_t) NULL)
+
+assert_cc(sizeof(unsigned) == sizeof(uint32_t));
+#define bus_property_get_unsigned ((sd_bus_property_get_t) NULL)
+
+/* On 64bit machines we can use the default serializer for size_t and
+ * friends, otherwise we need to cast this manually */
+#if __SIZEOF_SIZE_T__ == 8
+#define bus_property_get_size ((sd_bus_property_get_t) NULL)
+#else
+int bus_property_get_size(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+#endif
+
+#if __SIZEOF_LONG__ == 8
+#define bus_property_get_long ((sd_bus_property_get_t) NULL)
+#define bus_property_get_ulong ((sd_bus_property_get_t) NULL)
+#else
+int bus_property_get_long(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+int bus_property_get_ulong(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+#endif
+
+/* uid_t and friends on Linux 32 bit. This means we can just use the
+ * default serializer for 32bit unsigned, for serializing it, and map
+ * it to NULL here */
+assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+#define bus_property_get_uid ((sd_bus_property_get_t) NULL)
+
+assert_cc(sizeof(gid_t) == sizeof(uint32_t));
+#define bus_property_get_gid ((sd_bus_property_get_t) NULL)
+
+assert_cc(sizeof(pid_t) == sizeof(uint32_t));
+#define bus_property_get_pid ((sd_bus_property_get_t) NULL)
+
+assert_cc(sizeof(mode_t) == sizeof(uint32_t));
+#define bus_property_get_mode ((sd_bus_property_get_t) NULL)
+
+int bus_property_get_rlimit(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+
+#define BUS_DEFINE_PROPERTY_GET_GLOBAL(function, bus_type, val) \
+ int function(sd_bus *bus, \
+ const char *path, \
+ const char *interface, \
+ const char *property, \
+ sd_bus_message *reply, \
+ void *userdata, \
+ sd_bus_error *error) { \
+ \
+ assert(bus); \
+ assert(reply); \
+ \
+ return sd_bus_message_append(reply, bus_type, val); \
+ }
+
+#define BUS_DEFINE_PROPERTY_GET2(function, bus_type, data_type, get1, get2) \
+ int function(sd_bus *bus, \
+ const char *path, \
+ const char *interface, \
+ const char *property, \
+ sd_bus_message *reply, \
+ void *userdata, \
+ sd_bus_error *error) { \
+ \
+ data_type *data = userdata; \
+ \
+ assert(bus); \
+ assert(reply); \
+ assert(data); \
+ \
+ return sd_bus_message_append(reply, bus_type, \
+ get2(get1(data))); \
+ }
+
+#define ident(x) (x)
+#define BUS_DEFINE_PROPERTY_GET(function, bus_type, data_type, get1) \
+ BUS_DEFINE_PROPERTY_GET2(function, bus_type, data_type, get1, ident)
+
+#define ref(x) (*(x))
+#define BUS_DEFINE_PROPERTY_GET_REF(function, bus_type, data_type, get) \
+ BUS_DEFINE_PROPERTY_GET2(function, bus_type, data_type, ref, get)
+
+#define BUS_DEFINE_PROPERTY_GET_ENUM(function, name, type) \
+ BUS_DEFINE_PROPERTY_GET_REF(function, "s", type, name##_to_string)
+
+#define BUS_PROPERTY_DUAL_TIMESTAMP(name, offset, flags) \
+ SD_BUS_PROPERTY(name, "t", bus_property_get_usec, (offset) + offsetof(struct dual_timestamp, realtime), (flags)), \
+ SD_BUS_PROPERTY(name "Monotonic", "t", bus_property_get_usec, (offset) + offsetof(struct dual_timestamp, monotonic), (flags))
diff --git a/src/shared/bus-locator.c b/src/shared/bus-locator.c
new file mode 100644
index 0000000..3754d1d
--- /dev/null
+++ b/src/shared/bus-locator.c
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-locator.h"
+#include "macro.h"
+
+const BusLocator* const bus_home_mgr = &(BusLocator){
+ .destination = "org.freedesktop.home1",
+ .path = "/org/freedesktop/home1",
+ .interface = "org.freedesktop.home1.Manager",
+};
+
+const BusLocator* const bus_import_mgr = &(BusLocator){
+ .destination ="org.freedesktop.import1",
+ .path = "/org/freedesktop/import1",
+ .interface = "org.freedesktop.import1.Manager"
+};
+
+const BusLocator* const bus_locale = &(BusLocator){
+ .destination = "org.freedesktop.locale1",
+ .path = "/org/freedesktop/locale1",
+ .interface = "org.freedesktop.locale1"
+};
+
+const BusLocator* const bus_login_mgr = &(BusLocator){
+ .destination = "org.freedesktop.login1",
+ .path = "/org/freedesktop/login1",
+ .interface = "org.freedesktop.login1.Manager"
+};
+
+const BusLocator* const bus_machine_mgr = &(BusLocator){
+ .destination ="org.freedesktop.machine1",
+ .path = "/org/freedesktop/machine1",
+ .interface = "org.freedesktop.machine1.Manager"
+};
+
+const BusLocator* const bus_network_mgr = &(BusLocator){
+ .destination = "org.freedesktop.network1",
+ .path = "/org/freedesktop/network1",
+ .interface = "org.freedesktop.network1.Manager"
+};
+
+const BusLocator* const bus_portable_mgr = &(BusLocator){
+ .destination = "org.freedesktop.portable1",
+ .path = "/org/freedesktop/portable1",
+ .interface = "org.freedesktop.portable1.Manager"
+};
+
+const BusLocator* const bus_resolve_mgr = &(BusLocator){
+ .destination = "org.freedesktop.resolve1",
+ .path = "/org/freedesktop/resolve1",
+ .interface = "org.freedesktop.resolve1.Manager"
+};
+
+const BusLocator* const bus_systemd_mgr = &(BusLocator){
+ .destination = "org.freedesktop.systemd1",
+ .path = "/org/freedesktop/systemd1",
+ .interface = "org.freedesktop.systemd1.Manager"
+};
+
+const BusLocator* const bus_timedate = &(BusLocator){
+ .destination = "org.freedesktop.timedate1",
+ .path = "/org/freedesktop/timedate1",
+ .interface = "org.freedesktop.timedate1"
+};
+
+/* Shorthand flavors of the sd-bus convenience helpers with destination,path,interface strings encapsulated
+ * within a single struct. */
+int bus_call_method_async(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const BusLocator *locator,
+ const char *member,
+ sd_bus_message_handler_t callback,
+ void *userdata,
+ const char *types, ...) {
+
+ va_list ap;
+ int r;
+
+ assert(locator);
+
+ va_start(ap, types);
+ r = sd_bus_call_method_asyncv(bus, slot, locator->destination, locator->path, locator->interface, member, callback, userdata, types, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int bus_call_method(
+ sd_bus *bus,
+ const BusLocator *locator,
+ const char *member,
+ sd_bus_error *error,
+ sd_bus_message **reply,
+ const char *types, ...) {
+
+ va_list ap;
+ int r;
+
+ assert(locator);
+
+ va_start(ap, types);
+ r = sd_bus_call_methodv(bus, locator->destination, locator->path, locator->interface, member, error, reply, types, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int bus_get_property(
+ sd_bus *bus,
+ const BusLocator *locator,
+ const char *member,
+ sd_bus_error *error,
+ sd_bus_message **reply,
+ const char *type) {
+
+ assert(locator);
+
+ return sd_bus_get_property(bus, locator->destination, locator->path, locator->interface, member, error, reply, type);
+}
+
+int bus_get_property_trivial(
+ sd_bus *bus,
+ const BusLocator *locator,
+ const char *member,
+ sd_bus_error *error,
+ char type, void *ptr) {
+
+ assert(locator);
+
+ return sd_bus_get_property_trivial(bus, locator->destination, locator->path, locator->interface, member, error, type, ptr);
+}
+
+int bus_get_property_string(
+ sd_bus *bus,
+ const BusLocator *locator,
+ const char *member,
+ sd_bus_error *error,
+ char **ret) {
+
+ assert(locator);
+
+ return sd_bus_get_property_string(bus, locator->destination, locator->path, locator->interface, member, error, ret);
+}
+
+int bus_get_property_strv(
+ sd_bus *bus,
+ const BusLocator *locator,
+ const char *member,
+ sd_bus_error *error,
+ char ***ret) {
+
+ assert(locator);
+
+ return sd_bus_get_property_strv(bus, locator->destination, locator->path, locator->interface, member, error, ret);
+}
+
+int bus_set_property(
+ sd_bus *bus,
+ const BusLocator *locator,
+ const char *member,
+ sd_bus_error *error,
+ const char *type, ...) {
+
+ va_list ap;
+ int r;
+
+ assert(locator);
+
+ va_start(ap, type);
+ r = sd_bus_set_propertyv(bus, locator->destination, locator->path, locator->interface, member, error, type, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int bus_match_signal(
+ sd_bus *bus,
+ sd_bus_slot **ret,
+ const BusLocator *locator,
+ const char *member,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ assert(locator);
+
+ return sd_bus_match_signal(bus, ret, locator->destination, locator->path, locator->interface, member, callback, userdata);
+}
+
+int bus_match_signal_async(
+ sd_bus *bus,
+ sd_bus_slot **ret,
+ const BusLocator *locator,
+ const char *member,
+ sd_bus_message_handler_t callback,
+ sd_bus_message_handler_t install_callback,
+ void *userdata) {
+
+ assert(locator);
+
+ return sd_bus_match_signal_async(bus, ret, locator->destination, locator->path, locator->interface, member, callback, install_callback, userdata);
+}
+
+int bus_message_new_method_call(
+ sd_bus *bus,
+ sd_bus_message **m,
+ const BusLocator *locator,
+ const char *member) {
+
+ assert(locator);
+
+ return sd_bus_message_new_method_call(bus, m, locator->destination, locator->path, locator->interface, member);
+}
diff --git a/src/shared/bus-locator.h b/src/shared/bus-locator.h
new file mode 100644
index 0000000..fe3b876
--- /dev/null
+++ b/src/shared/bus-locator.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+typedef struct BusLocator {
+ const char *destination;
+ const char *path;
+ const char *interface;
+} BusLocator;
+
+extern const BusLocator* const bus_home_mgr;
+extern const BusLocator* const bus_import_mgr;
+extern const BusLocator* const bus_locale;
+extern const BusLocator* const bus_login_mgr;
+extern const BusLocator* const bus_machine_mgr;
+extern const BusLocator* const bus_network_mgr;
+extern const BusLocator* const bus_portable_mgr;
+extern const BusLocator* const bus_resolve_mgr;
+extern const BusLocator* const bus_systemd_mgr;
+extern const BusLocator* const bus_timedate;
+
+/* Shorthand flavors of the sd-bus convenience helpers with destination,path,interface strings encapsulated
+ * within a single struct. */
+int bus_call_method_async(sd_bus *bus, sd_bus_slot **slot, const BusLocator *locator, const char *member, sd_bus_message_handler_t callback, void *userdata, const char *types, ...);
+int bus_call_method(sd_bus *bus, const BusLocator *locator, const char *member, sd_bus_error *error, sd_bus_message **reply, const char *types, ...);
+int bus_get_property(sd_bus *bus, const BusLocator *locator, const char *member, sd_bus_error *error, sd_bus_message **reply, const char *type);
+int bus_get_property_trivial(sd_bus *bus, const BusLocator *locator, const char *member, sd_bus_error *error, char type, void *ptr);
+int bus_get_property_string(sd_bus *bus, const BusLocator *locator, const char *member, sd_bus_error *error, char **ret);
+int bus_get_property_strv(sd_bus *bus, const BusLocator *locator, const char *member, sd_bus_error *error, char ***ret);
+int bus_set_property(sd_bus *bus, const BusLocator *locator, const char *member, sd_bus_error *error, const char *type, ...);
+int bus_match_signal(sd_bus *bus, sd_bus_slot **ret, const BusLocator *locator, const char *member, sd_bus_message_handler_t callback, void *userdata);
+int bus_match_signal_async(sd_bus *bus, sd_bus_slot **ret, const BusLocator *locator, const char *member, sd_bus_message_handler_t callback, sd_bus_message_handler_t install_callback, void *userdata);
+int bus_message_new_method_call(sd_bus *bus, sd_bus_message **m, const BusLocator *locator, const char *member);
diff --git a/src/shared/bus-log-control-api.c b/src/shared/bus-log-control-api.c
new file mode 100644
index 0000000..06e6697
--- /dev/null
+++ b/src/shared/bus-log-control-api.c
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-get-properties.h"
+#include "bus-log-control-api.h"
+#include "bus-util.h"
+#include "log.h"
+#include "sd-bus.h"
+#include "syslog-util.h"
+
+int bus_property_get_log_level(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(bus);
+ assert(reply);
+
+ r = log_level_to_string_alloc(log_get_max_level(), &t);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_append(reply, "s", t);
+}
+
+int bus_property_set_log_level(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *value,
+ void *userdata,
+ sd_bus_error *error) {
+
+ const char *t;
+ int r;
+
+ assert(bus);
+ assert(value);
+
+ r = sd_bus_message_read(value, "s", &t);
+ if (r < 0)
+ return r;
+
+ r = log_level_from_string(t);
+ if (r < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid log level '%s'", t);
+
+ log_info("Setting log level to %s.", t);
+ log_set_max_level(r);
+
+ return 0;
+}
+
+BUS_DEFINE_PROPERTY_GET_GLOBAL(bus_property_get_log_target, "s", log_target_to_string(log_get_target()));
+
+int bus_property_set_log_target(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *value,
+ void *userdata,
+ sd_bus_error *error) {
+
+ LogTarget target;
+ const char *t;
+ int r;
+
+ assert(bus);
+ assert(value);
+
+ r = sd_bus_message_read(value, "s", &t);
+ if (r < 0)
+ return r;
+
+ target = log_target_from_string(t);
+ if (target < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid log target '%s'", t);
+
+ log_info("Setting log target to %s.", log_target_to_string(target));
+ log_set_target(target);
+ log_open();
+
+ return 0;
+}
+
+BUS_DEFINE_PROPERTY_GET_GLOBAL(bus_property_get_syslog_identifier, "s", program_invocation_short_name);
+
+static const sd_bus_vtable log_control_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_WRITABLE_PROPERTY("LogLevel", "s", bus_property_get_log_level, bus_property_set_log_level, 0, 0),
+ SD_BUS_WRITABLE_PROPERTY("LogTarget", "s", bus_property_get_log_target, bus_property_set_log_target, 0, 0),
+ SD_BUS_PROPERTY("SyslogIdentifier", "s", bus_property_get_syslog_identifier, 0, 0),
+
+ /* One of those days we might want to add a similar, second interface to cover common service
+ * operations such as Reload(), Reexecute(), Exit() … and maybe some properties exposing version
+ * number and other meta-data of the service. */
+
+ SD_BUS_VTABLE_END,
+};
+
+const BusObjectImplementation log_control_object = {
+ "/org/freedesktop/LogControl1",
+ "org.freedesktop.LogControl1",
+ .vtables = BUS_VTABLES(log_control_vtable),
+};
diff --git a/src/shared/bus-log-control-api.h b/src/shared/bus-log-control-api.h
new file mode 100644
index 0000000..85f60a7
--- /dev/null
+++ b/src/shared/bus-log-control-api.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "bus-object.h"
+
+extern const BusObjectImplementation log_control_object;
+static inline int bus_log_control_api_register(sd_bus *bus) {
+ return bus_add_implementation(bus, &log_control_object, NULL);
+}
+
+int bus_property_get_log_level(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+int bus_property_set_log_level(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error);
+
+int bus_property_get_log_target(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+int bus_property_set_log_target(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+
+int bus_property_get_syslog_identifier(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
diff --git a/src/shared/bus-map-properties.c b/src/shared/bus-map-properties.c
new file mode 100644
index 0000000..8460856
--- /dev/null
+++ b/src/shared/bus-map-properties.c
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-map-properties.h"
+#include "alloc-util.h"
+#include "strv.h"
+#include "bus-message.h"
+
+int bus_map_id128(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ sd_id128_t *p = userdata;
+ const void *v;
+ size_t n;
+ int r;
+
+ r = sd_bus_message_read_array(m, SD_BUS_TYPE_BYTE, &v, &n);
+ if (r < 0)
+ return r;
+
+ if (n == 0)
+ *p = SD_ID128_NULL;
+ else if (n == 16)
+ memcpy((*p).bytes, v, n);
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+int bus_map_strv_sort(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ _cleanup_strv_free_ char **l = NULL;
+ char ***p = userdata;
+ int r;
+
+ r = bus_message_read_strv_extend(m, &l);
+ if (r < 0)
+ return r;
+
+ r = strv_extend_strv(p, l, false);
+ if (r < 0)
+ return r;
+
+ strv_sort(*p);
+ return 0;
+}
+
+static int map_basic(sd_bus *bus, const char *member, sd_bus_message *m, unsigned flags, sd_bus_error *error, void *userdata) {
+ char type;
+ int r;
+
+ r = sd_bus_message_peek_type(m, &type, NULL);
+ if (r < 0)
+ return r;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH: {
+ const char **p = userdata;
+ const char *s;
+
+ r = sd_bus_message_read_basic(m, type, &s);
+ if (r < 0)
+ return r;
+
+ if (isempty(s))
+ s = NULL;
+
+ if (flags & BUS_MAP_STRDUP)
+ return free_and_strdup((char **) userdata, s);
+
+ *p = s;
+ return 0;
+ }
+
+ case SD_BUS_TYPE_ARRAY: {
+ _cleanup_strv_free_ char **l = NULL;
+ char ***p = userdata;
+
+ r = bus_message_read_strv_extend(m, &l);
+ if (r < 0)
+ return r;
+
+ return strv_extend_strv(p, l, false);
+ }
+
+ case SD_BUS_TYPE_BOOLEAN: {
+ int b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return r;
+
+ if (flags & BUS_MAP_BOOLEAN_AS_BOOL)
+ *(bool*) userdata = b;
+ else
+ *(int*) userdata = b;
+
+ return 0;
+ }
+
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32: {
+ uint32_t u, *p = userdata;
+
+ r = sd_bus_message_read_basic(m, type, &u);
+ if (r < 0)
+ return r;
+
+ *p = u;
+ return 0;
+ }
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64: {
+ uint64_t t, *p = userdata;
+
+ r = sd_bus_message_read_basic(m, type, &t);
+ if (r < 0)
+ return r;
+
+ *p = t;
+ return 0;
+ }
+
+ case SD_BUS_TYPE_DOUBLE: {
+ double d, *p = userdata;
+
+ r = sd_bus_message_read_basic(m, type, &d);
+ if (r < 0)
+ return r;
+
+ *p = d;
+ return 0;
+ }}
+
+ return -EOPNOTSUPP;
+}
+
+int bus_message_map_all_properties(
+ sd_bus_message *m,
+ const struct bus_properties_map *map,
+ unsigned flags,
+ sd_bus_error *error,
+ void *userdata) {
+
+ int r;
+
+ assert(m);
+ assert(map);
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "{sv}");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_enter_container(m, SD_BUS_TYPE_DICT_ENTRY, "sv")) > 0) {
+ const struct bus_properties_map *prop;
+ const char *member;
+ const char *contents;
+ void *v;
+ unsigned i;
+
+ r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &member);
+ if (r < 0)
+ return r;
+
+ for (i = 0, prop = NULL; map[i].member; i++)
+ if (streq(map[i].member, member)) {
+ prop = &map[i];
+ break;
+ }
+
+ if (prop) {
+ r = sd_bus_message_peek_type(m, NULL, &contents);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, contents);
+ if (r < 0)
+ return r;
+
+ v = (uint8_t *)userdata + prop->offset;
+ if (map[i].set)
+ r = prop->set(sd_bus_message_get_bus(m), member, m, error, v);
+ else
+ r = map_basic(sd_bus_message_get_bus(m), member, m, flags, error, v);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ } else {
+ r = sd_bus_message_skip(m, "v");
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_exit_container(m);
+}
+
+int bus_map_all_properties(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const struct bus_properties_map *map,
+ unsigned flags,
+ sd_bus_error *error,
+ sd_bus_message **reply,
+ void *userdata) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert(bus);
+ assert(destination);
+ assert(path);
+ assert(map);
+ assert(reply || (flags & BUS_MAP_STRDUP));
+
+ r = sd_bus_call_method(
+ bus,
+ destination,
+ path,
+ "org.freedesktop.DBus.Properties",
+ "GetAll",
+ error,
+ &m,
+ "s", "");
+ if (r < 0)
+ return r;
+
+ r = bus_message_map_all_properties(m, map, flags, error, userdata);
+ if (r < 0)
+ return r;
+
+ if (reply)
+ *reply = sd_bus_message_ref(m);
+
+ return r;
+}
diff --git a/src/shared/bus-map-properties.h b/src/shared/bus-map-properties.h
new file mode 100644
index 0000000..2a766e3
--- /dev/null
+++ b/src/shared/bus-map-properties.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+typedef int (*bus_property_set_t) (sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata);
+
+struct bus_properties_map {
+ const char *member;
+ const char *signature;
+ bus_property_set_t set;
+ size_t offset;
+};
+
+enum {
+ BUS_MAP_STRDUP = 1 << 0, /* If set, each "s" message is duplicated. Thus, each pointer needs to be freed. */
+ BUS_MAP_BOOLEAN_AS_BOOL = 1 << 1, /* If set, each "b" message is written to a bool pointer. If not set, "b" is written to a int pointer. */
+};
+
+int bus_map_id128(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata);
+int bus_map_strv_sort(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata);
+
+int bus_message_map_all_properties(sd_bus_message *m, const struct bus_properties_map *map, unsigned flags, sd_bus_error *error, void *userdata);
+int bus_map_all_properties(sd_bus *bus, const char *destination, const char *path, const struct bus_properties_map *map,
+ unsigned flags, sd_bus_error *error, sd_bus_message **reply, void *userdata);
diff --git a/src/shared/bus-message-util.c b/src/shared/bus-message-util.c
new file mode 100644
index 0000000..19500a5
--- /dev/null
+++ b/src/shared/bus-message-util.c
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-message-util.h"
+
+#include "resolve-util.h"
+
+int bus_message_read_ifindex(sd_bus_message *message, sd_bus_error *error, int *ret) {
+ int ifindex, r;
+
+ assert(message);
+ assert(ret);
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(message, "i", &ifindex);
+ if (r < 0)
+ return r;
+
+ if (ifindex <= 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid interface index");
+
+ *ret = ifindex;
+
+ return 0;
+}
+
+int bus_message_read_family(sd_bus_message *message, sd_bus_error *error, int *ret) {
+ int family, r;
+
+ assert(message);
+ assert(ret);
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(message, "i", &family);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown address family %i", family);
+
+ *ret = family;
+ return 0;
+}
+
+int bus_message_read_in_addr_auto(sd_bus_message *message, sd_bus_error *error, int *ret_family, union in_addr_union *ret_addr) {
+ int family, r;
+ const void *d;
+ size_t sz;
+
+ assert(message);
+
+ r = sd_bus_message_read(message, "i", &family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_array(message, 'y', &d, &sz);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown address family %i", family);
+
+ if (sz != FAMILY_ADDRESS_SIZE(family))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid address size");
+
+ if (ret_family)
+ *ret_family = family;
+ if (ret_addr)
+ memcpy(ret_addr, d, sz);
+ return 0;
+}
+
+static int bus_message_read_dns_one(
+ sd_bus_message *message,
+ sd_bus_error *error,
+ bool extended,
+ int *ret_family,
+ union in_addr_union *ret_address,
+ uint16_t *ret_port,
+ const char **ret_server_name) {
+ const char *server_name = NULL;
+ union in_addr_union a;
+ uint16_t port = 0;
+ int family, r;
+
+ assert(message);
+ assert(ret_family);
+ assert(ret_address);
+ assert(ret_port);
+ assert(ret_server_name);
+
+ r = sd_bus_message_enter_container(message, 'r', extended ? "iayqs" : "iay");
+ if (r <= 0)
+ return r;
+
+ r = bus_message_read_in_addr_auto(message, error, &family, &a);
+ if (r < 0)
+ return r;
+
+ if (!dns_server_address_valid(family, &a))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid DNS server address");
+
+ if (extended) {
+ r = sd_bus_message_read(message, "q", &port);
+ if (r < 0)
+ return r;
+
+ if (IN_SET(port, 53, 853))
+ port = 0;
+
+ r = sd_bus_message_read(message, "s", &server_name);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ *ret_family = family;
+ *ret_address = a;
+ *ret_port = port;
+ *ret_server_name = server_name;
+
+ return 1;
+}
+
+int bus_message_read_dns_servers(
+ sd_bus_message *message,
+ sd_bus_error *error,
+ bool extended,
+ struct in_addr_full ***ret_dns,
+ size_t *ret_n_dns) {
+
+ struct in_addr_full **dns = NULL;
+ size_t n = 0, allocated = 0;
+ int r;
+
+ assert(message);
+ assert(ret_dns);
+ assert(ret_n_dns);
+
+ r = sd_bus_message_enter_container(message, 'a', extended ? "(iayqs)" : "(iay)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *server_name;
+ union in_addr_union a;
+ uint16_t port;
+ int family;
+
+ r = bus_message_read_dns_one(message, error, extended, &family, &a, &port, &server_name);
+ if (r < 0)
+ goto clear;
+ if (r == 0)
+ break;
+
+ if (!GREEDY_REALLOC(dns, allocated, n+1)) {
+ r = -ENOMEM;
+ goto clear;
+ }
+
+ r = in_addr_full_new(family, &a, port, 0, server_name, dns + n);
+ if (r < 0)
+ goto clear;
+
+ n++;
+ }
+
+ *ret_dns = TAKE_PTR(dns);
+ *ret_n_dns = n;
+ return 0;
+
+clear:
+ for (size_t i = 0; i < n; i++)
+ in_addr_full_free(dns[i]);
+ free(dns);
+
+ return r;
+}
diff --git a/src/shared/bus-message-util.h b/src/shared/bus-message-util.h
new file mode 100644
index 0000000..b82c083
--- /dev/null
+++ b/src/shared/bus-message-util.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "in-addr-util.h"
+#include "socket-netlink.h"
+
+int bus_message_read_ifindex(sd_bus_message *message, sd_bus_error *error, int *ret);
+int bus_message_read_family(sd_bus_message *message, sd_bus_error *error, int *ret);
+int bus_message_read_in_addr_auto(sd_bus_message *message, sd_bus_error *error, int *ret_family, union in_addr_union *ret_addr);
+
+int bus_message_read_dns_servers(
+ sd_bus_message *message,
+ sd_bus_error *error,
+ bool extended,
+ struct in_addr_full ***ret_dns,
+ size_t *ret_n_dns);
diff --git a/src/shared/bus-object.c b/src/shared/bus-object.c
new file mode 100644
index 0000000..f2e5391
--- /dev/null
+++ b/src/shared/bus-object.c
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-introspect.h"
+#include "bus-object.h"
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+
+int bus_add_implementation(sd_bus *bus, const BusObjectImplementation *impl, void *userdata) {
+ int r;
+
+ log_debug("Registering bus object implementation for path=%s iface=%s", impl->path, impl->interface);
+
+ for (const sd_bus_vtable **p = impl->vtables; p && *p; p++) {
+ r = sd_bus_add_object_vtable(bus, NULL,
+ impl->path,
+ impl->interface,
+ *p,
+ userdata);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register bus path %s with interface %s: %m",
+ impl->path,
+ impl->interface);
+ }
+
+ for (const BusObjectVtablePair *p = impl->fallback_vtables; p && p->vtable; p++) {
+ r = sd_bus_add_fallback_vtable(bus, NULL,
+ impl->path,
+ impl->interface,
+ p->vtable,
+ p->object_find,
+ userdata);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register bus path %s with interface %s: %m",
+ impl->path,
+ impl->interface);
+ }
+
+ if (impl->node_enumerator) {
+ r = sd_bus_add_node_enumerator(bus, NULL,
+ impl->path,
+ impl->node_enumerator,
+ userdata);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add node enumerator for %s: %m",
+ impl->path);
+ }
+
+ if (impl->manager) {
+ r = sd_bus_add_object_manager(bus, NULL, impl->path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add object manager for %s: %m", impl->path);
+ }
+
+ for (size_t i = 0; impl->children && impl->children[i]; i++) {
+ r = bus_add_implementation(bus, impl->children[i], userdata);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static const BusObjectImplementation* find_implementation(
+ const char *pattern,
+ const BusObjectImplementation* const* bus_objects) {
+
+ for (size_t i = 0; bus_objects && bus_objects[i]; i++) {
+ const BusObjectImplementation *impl = bus_objects[i];
+
+ if (STR_IN_SET(pattern, impl->path, impl->interface))
+ return impl;
+
+ impl = find_implementation(pattern, impl->children);
+ if (impl)
+ return impl;
+ }
+
+ return NULL;
+}
+
+static int bus_introspect_implementation(
+ struct introspect *intro,
+ const BusObjectImplementation *impl) {
+ int r;
+
+ for (const sd_bus_vtable **p = impl->vtables; p && *p; p++) {
+ r = introspect_write_interface(intro, impl->interface, *p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write introspection data: %m");
+ }
+
+ for (const BusObjectVtablePair *p = impl->fallback_vtables; p && p->vtable; p++) {
+ r = introspect_write_interface(intro, impl->interface, p->vtable);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write introspection data: %m");
+ }
+
+ return 0;
+}
+
+static void list_paths(
+ FILE *out,
+ const BusObjectImplementation* const* bus_objects) {
+
+ for (size_t i = 0; bus_objects[i]; i++) {
+ fprintf(out, "%s\t%s\n", bus_objects[i]->path, bus_objects[i]->interface);
+ if (bus_objects[i]->children)
+ list_paths(out, bus_objects[i]->children);
+ }
+}
+
+int bus_introspect_implementations(
+ FILE *out,
+ const char *pattern,
+ const BusObjectImplementation* const* bus_objects) {
+
+ const BusObjectImplementation *impl, *main_impl = NULL;
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ if (streq(pattern, "list")) {
+ list_paths(out, bus_objects);
+ return 0;
+ }
+
+ struct introspect intro = {};
+ bool is_interface = sd_bus_interface_name_is_valid(pattern);
+
+ impl = find_implementation(pattern, bus_objects);
+ if (!impl)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT),
+ "%s %s not found",
+ is_interface ? "Interface" : "Object path",
+ pattern);
+
+ /* We use trusted=false here to get all the @org.freedesktop.systemd1.Privileged annotations. */
+ r = introspect_begin(&intro, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write introspection data: %m");
+
+ r = introspect_write_default_interfaces(&intro, impl->manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write introspection data: %m");
+
+ /* Check if there is a non-fallback path that applies to the given interface, also
+ * print it. This is useful in the case of units: o.fd.systemd1.Service is declared
+ * as a fallback vtable for o/fd/systemd1/unit, and we also want to print
+ * o.fd.systemd1.Unit, which is the non-fallback implementation. */
+ if (impl->fallback_vtables && is_interface)
+ main_impl = find_implementation(impl->path, bus_objects);
+
+ if (main_impl)
+ bus_introspect_implementation(&intro, main_impl);
+
+ if (impl != main_impl)
+ bus_introspect_implementation(&intro, impl);
+
+ _cleanup_set_free_ Set *nodes = NULL;
+
+ for (size_t i = 0; impl->children && impl->children[i]; i++) {
+ r = set_put_strdup(&nodes, impl->children[i]->path);
+ if (r < 0)
+ return log_oom();
+ }
+
+ r = introspect_write_child_nodes(&intro, nodes, impl->path);
+ if (r < 0)
+ return r;
+
+ r = introspect_finish(&intro, &s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write introspection data: %m");
+
+ fputs(s, out);
+ return 0;
+}
diff --git a/src/shared/bus-object.h b/src/shared/bus-object.h
new file mode 100644
index 0000000..145bbd2
--- /dev/null
+++ b/src/shared/bus-object.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "sd-bus.h"
+
+typedef struct BusObjectImplementation BusObjectImplementation;
+
+typedef struct BusObjectVtablePair {
+ const sd_bus_vtable *vtable;
+ sd_bus_object_find_t object_find;
+} BusObjectVtablePair;
+
+struct BusObjectImplementation {
+ const char *path;
+ const char *interface;
+ const sd_bus_vtable **vtables;
+ const BusObjectVtablePair *fallback_vtables;
+ sd_bus_node_enumerator_t node_enumerator;
+ bool manager;
+ const BusObjectImplementation **children;
+};
+
+#define BUS_VTABLES(...) ((const sd_bus_vtable* []){ __VA_ARGS__, NULL })
+#define BUS_FALLBACK_VTABLES(...) ((const BusObjectVtablePair[]) { __VA_ARGS__, {} })
+#define BUS_IMPLEMENTATIONS(...) ((const BusObjectImplementation* []) { __VA_ARGS__, NULL })
+
+int bus_add_implementation(sd_bus *bus, const BusObjectImplementation *impl, void *userdata);
+int bus_introspect_implementations(
+ FILE *out,
+ const char *pattern,
+ const BusObjectImplementation* const* bus_objects);
diff --git a/src/shared/bus-polkit.c b/src/shared/bus-polkit.c
new file mode 100644
index 0000000..14122e0
--- /dev/null
+++ b/src/shared/bus-polkit.c
@@ -0,0 +1,415 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-polkit.h"
+#include "bus-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int check_good_user(sd_bus_message *m, uid_t good_user) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ uid_t sender_uid;
+ int r;
+
+ assert(m);
+
+ if (good_user == UID_INVALID)
+ return 0;
+
+ r = sd_bus_query_sender_creds(m, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ /* Don't trust augmented credentials for authorization */
+ assert_return((sd_bus_creds_get_augmented_mask(creds) & SD_BUS_CREDS_EUID) == 0, -EPERM);
+
+ r = sd_bus_creds_get_euid(creds, &sender_uid);
+ if (r < 0)
+ return r;
+
+ return sender_uid == good_user;
+}
+
+#if ENABLE_POLKIT
+static int bus_message_append_strv_key_value(
+ sd_bus_message *m,
+ const char **l) {
+
+ const char **k, **v;
+ int r;
+
+ assert(m);
+
+ r = sd_bus_message_open_container(m, 'a', "{ss}");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH_PAIR(k, v, l) {
+ r = sd_bus_message_append(m, "{ss}", *k, *v);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ return r;
+}
+#endif
+
+int bus_test_polkit(
+ sd_bus_message *call,
+ int capability,
+ const char *action,
+ const char **details,
+ uid_t good_user,
+ bool *_challenge,
+ sd_bus_error *ret_error) {
+
+ int r;
+
+ assert(call);
+ assert(action);
+
+ /* Tests non-interactively! */
+
+ r = check_good_user(call, good_user);
+ if (r != 0)
+ return r;
+
+ r = sd_bus_query_sender_privilege(call, capability);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ return 1;
+#if ENABLE_POLKIT
+ else {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *request = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int authorized = false, challenge = false;
+ const char *sender;
+
+ sender = sd_bus_message_get_sender(call);
+ if (!sender)
+ return -EBADMSG;
+
+ r = sd_bus_message_new_method_call(
+ call->bus,
+ &request,
+ "org.freedesktop.PolicyKit1",
+ "/org/freedesktop/PolicyKit1/Authority",
+ "org.freedesktop.PolicyKit1.Authority",
+ "CheckAuthorization");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(
+ request,
+ "(sa{sv})s",
+ "system-bus-name", 1, "name", "s", sender,
+ action);
+ if (r < 0)
+ return r;
+
+ r = bus_message_append_strv_key_value(request, details);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(request, "us", 0, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call(call->bus, request, 0, ret_error, &reply);
+ if (r < 0) {
+ /* Treat no PK available as access denied */
+ if (bus_error_is_unknown_service(ret_error)) {
+ sd_bus_error_free(ret_error);
+ return -EACCES;
+ }
+
+ return r;
+ }
+
+ r = sd_bus_message_enter_container(reply, 'r', "bba{ss}");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "bb", &authorized, &challenge);
+ if (r < 0)
+ return r;
+
+ if (authorized)
+ return 1;
+
+ if (_challenge) {
+ *_challenge = challenge;
+ return 0;
+ }
+ }
+#endif
+
+ return -EACCES;
+}
+
+#if ENABLE_POLKIT
+
+typedef struct AsyncPolkitQuery {
+ char *action;
+ char **details;
+
+ sd_bus_message *request, *reply;
+ sd_bus_slot *slot;
+
+ Hashmap *registry;
+ sd_event_source *defer_event_source;
+} AsyncPolkitQuery;
+
+static void async_polkit_query_free(AsyncPolkitQuery *q) {
+ if (!q)
+ return;
+
+ sd_bus_slot_unref(q->slot);
+
+ if (q->registry && q->request)
+ hashmap_remove(q->registry, q->request);
+
+ sd_bus_message_unref(q->request);
+ sd_bus_message_unref(q->reply);
+
+ free(q->action);
+ strv_free(q->details);
+
+ sd_event_source_disable_unref(q->defer_event_source);
+ free(q);
+}
+
+static int async_polkit_defer(sd_event_source *s, void *userdata) {
+ AsyncPolkitQuery *q = userdata;
+
+ assert(s);
+
+ /* This is called as idle event source after we processed the async polkit reply, hopefully after the
+ * method call we re-enqueued has been properly processed. */
+
+ async_polkit_query_free(q);
+ return 0;
+}
+
+static int async_polkit_callback(sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ AsyncPolkitQuery *q = userdata;
+ int r;
+
+ assert(reply);
+ assert(q);
+
+ assert(q->slot);
+ q->slot = sd_bus_slot_unref(q->slot);
+
+ assert(!q->reply);
+ q->reply = sd_bus_message_ref(reply);
+
+ /* Now, let's dispatch the original message a second time be re-enqueing. This will then traverse the
+ * whole message processing again, and thus re-validating and re-retrieving the "userdata" field
+ * again.
+ *
+ * We install an idle event loop event to clean-up the PolicyKit request data when we are idle again,
+ * i.e. after the second time the message is processed is complete. */
+
+ assert(!q->defer_event_source);
+ r = sd_event_add_defer(sd_bus_get_event(sd_bus_message_get_bus(reply)), &q->defer_event_source, async_polkit_defer, q);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(q->defer_event_source, SD_EVENT_PRIORITY_IDLE);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_enabled(q->defer_event_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_rewind(q->request, true);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_enqueue_for_read(sd_bus_message_get_bus(q->request), q->request);
+ if (r < 0)
+ goto fail;
+
+ return 1;
+
+fail:
+ log_debug_errno(r, "Processing asynchronous PolicyKit reply failed, ignoring: %m");
+ (void) sd_bus_reply_method_errno(q->request, r, NULL);
+ async_polkit_query_free(q);
+ return r;
+}
+
+#endif
+
+int bus_verify_polkit_async(
+ sd_bus_message *call,
+ int capability,
+ const char *action,
+ const char **details,
+ bool interactive,
+ uid_t good_user,
+ Hashmap **registry,
+ sd_bus_error *ret_error) {
+
+#if ENABLE_POLKIT
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *pk = NULL;
+ AsyncPolkitQuery *q;
+ int c;
+#endif
+ const char *sender;
+ int r;
+
+ assert(call);
+ assert(action);
+ assert(registry);
+
+ r = check_good_user(call, good_user);
+ if (r != 0)
+ return r;
+
+#if ENABLE_POLKIT
+ q = hashmap_get(*registry, call);
+ if (q) {
+ int authorized, challenge;
+
+ /* This is the second invocation of this function, and there's already a response from
+ * polkit, let's process it */
+ assert(q->reply);
+
+ /* If the operation we want to authenticate changed between the first and the second time,
+ * let's not use this authentication, it might be out of date as the object and context we
+ * operate on might have changed. */
+ if (!streq(q->action, action) ||
+ !strv_equal(q->details, (char**) details))
+ return -ESTALE;
+
+ if (sd_bus_message_is_method_error(q->reply, NULL)) {
+ const sd_bus_error *e;
+
+ e = sd_bus_message_get_error(q->reply);
+
+ /* Treat no PK available as access denied */
+ if (bus_error_is_unknown_service(e))
+ return -EACCES;
+
+ /* Copy error from polkit reply */
+ sd_bus_error_copy(ret_error, e);
+ return -sd_bus_error_get_errno(e);
+ }
+
+ r = sd_bus_message_enter_container(q->reply, 'r', "bba{ss}");
+ if (r >= 0)
+ r = sd_bus_message_read(q->reply, "bb", &authorized, &challenge);
+ if (r < 0)
+ return r;
+
+ if (authorized)
+ return 1;
+
+ if (challenge)
+ return sd_bus_error_set(ret_error, SD_BUS_ERROR_INTERACTIVE_AUTHORIZATION_REQUIRED, "Interactive authentication required.");
+
+ return -EACCES;
+ }
+#endif
+
+ r = sd_bus_query_sender_privilege(call, capability);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ return 1;
+
+ sender = sd_bus_message_get_sender(call);
+ if (!sender)
+ return -EBADMSG;
+
+#if ENABLE_POLKIT
+ c = sd_bus_message_get_allow_interactive_authorization(call);
+ if (c < 0)
+ return c;
+ if (c > 0)
+ interactive = true;
+
+ r = hashmap_ensure_allocated(registry, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_call(
+ call->bus,
+ &pk,
+ "org.freedesktop.PolicyKit1",
+ "/org/freedesktop/PolicyKit1/Authority",
+ "org.freedesktop.PolicyKit1.Authority",
+ "CheckAuthorization");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(
+ pk,
+ "(sa{sv})s",
+ "system-bus-name", 1, "name", "s", sender,
+ action);
+ if (r < 0)
+ return r;
+
+ r = bus_message_append_strv_key_value(pk, details);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(pk, "us", interactive, NULL);
+ if (r < 0)
+ return r;
+
+ q = new(AsyncPolkitQuery, 1);
+ if (!q)
+ return -ENOMEM;
+
+ *q = (AsyncPolkitQuery) {
+ .request = sd_bus_message_ref(call),
+ };
+
+ q->action = strdup(action);
+ if (!q->action) {
+ async_polkit_query_free(q);
+ return -ENOMEM;
+ }
+
+ q->details = strv_copy((char**) details);
+ if (!q->details) {
+ async_polkit_query_free(q);
+ return -ENOMEM;
+ }
+
+ r = hashmap_put(*registry, call, q);
+ if (r < 0) {
+ async_polkit_query_free(q);
+ return r;
+ }
+
+ q->registry = *registry;
+
+ r = sd_bus_call_async(call->bus, &q->slot, pk, async_polkit_callback, q, 0);
+ if (r < 0) {
+ async_polkit_query_free(q);
+ return r;
+ }
+
+ return 0;
+#endif
+
+ return -EACCES;
+}
+
+void bus_verify_polkit_async_registry_free(Hashmap *registry) {
+#if ENABLE_POLKIT
+ hashmap_free_with_destructor(registry, async_polkit_query_free);
+#endif
+}
diff --git a/src/shared/bus-polkit.h b/src/shared/bus-polkit.h
new file mode 100644
index 0000000..91a88a2
--- /dev/null
+++ b/src/shared/bus-polkit.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "hashmap.h"
+
+int bus_test_polkit(sd_bus_message *call, int capability, const char *action, const char **details, uid_t good_user, bool *_challenge, sd_bus_error *e);
+
+int bus_verify_polkit_async(sd_bus_message *call, int capability, const char *action, const char **details, bool interactive, uid_t good_user, Hashmap **registry, sd_bus_error *error);
+void bus_verify_polkit_async_registry_free(Hashmap *registry);
diff --git a/src/shared/bus-print-properties.c b/src/shared/bus-print-properties.c
new file mode 100644
index 0000000..4cea250
--- /dev/null
+++ b/src/shared/bus-print-properties.c
@@ -0,0 +1,462 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-print-properties.h"
+#include "cap-list.h"
+#include "cgroup-util.h"
+#include "escape.h"
+#include "mountpoint-util.h"
+#include "nsflags.h"
+#include "parse-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "user-util.h"
+
+int bus_print_property_value(const char *name, const char *expected_value, bool only_value, const char *value) {
+ assert(name);
+
+ if (expected_value && !streq_ptr(expected_value, value))
+ return 0;
+
+ if (only_value)
+ puts(value);
+ else
+ printf("%s=%s\n", name, value);
+
+ return 0;
+}
+
+int bus_print_property_valuef(const char *name, const char *expected_value, bool only_value, const char *fmt, ...) {
+ va_list ap;
+ int r;
+
+ assert(name);
+ assert(fmt);
+
+ if (expected_value) {
+ _cleanup_free_ char *s = NULL;
+
+ va_start(ap, fmt);
+ r = vasprintf(&s, fmt, ap);
+ va_end(ap);
+ if (r < 0)
+ return -ENOMEM;
+
+ if (streq_ptr(expected_value, s)) {
+ if (only_value)
+ puts(s);
+ else
+ printf("%s=%s\n", name, s);
+ }
+
+ return 0;
+ }
+
+ if (!only_value)
+ printf("%s=", name);
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+ puts("");
+
+ return 0;
+}
+
+static int bus_print_property(const char *name, const char *expected_value, sd_bus_message *m, bool value, bool all) {
+ char type;
+ const char *contents;
+ int r;
+
+ assert(name);
+ assert(m);
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return r;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_STRING: {
+ const char *s;
+
+ r = sd_bus_message_read_basic(m, type, &s);
+ if (r < 0)
+ return r;
+
+ if (all || !isempty(s)) {
+ bool good;
+
+ /* This property has a single value, so we need to take
+ * care not to print a new line, everything else is OK. */
+ good = !strchr(s, '\n');
+ bus_print_property_value(name, expected_value, value, good ? s : "[unprintable]");
+ }
+
+ return 1;
+ }
+
+ case SD_BUS_TYPE_BOOLEAN: {
+ int b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return r;
+
+ if (expected_value && parse_boolean(expected_value) != b)
+ return 1;
+
+ bus_print_property_value(name, NULL, value, yes_no(b));
+ return 1;
+ }
+
+ case SD_BUS_TYPE_UINT64: {
+ uint64_t u;
+
+ r = sd_bus_message_read_basic(m, type, &u);
+ if (r < 0)
+ return r;
+
+ /* Yes, heuristics! But we can change this check
+ * should it turn out to not be sufficient */
+
+ if (endswith(name, "Timestamp") ||
+ STR_IN_SET(name, "NextElapseUSecRealtime", "LastTriggerUSec", "TimeUSec", "RTCTimeUSec")) {
+ char timestamp[FORMAT_TIMESTAMP_MAX];
+ const char *t;
+
+ t = format_timestamp(timestamp, sizeof(timestamp), u);
+ if (t || all)
+ bus_print_property_value(name, expected_value, value, strempty(t));
+
+ } else if (strstr(name, "USec")) {
+ char timespan[FORMAT_TIMESPAN_MAX];
+
+ (void) format_timespan(timespan, sizeof(timespan), u, 0);
+ bus_print_property_value(name, expected_value, value, timespan);
+
+ } else if (streq(name, "CoredumpFilter")) {
+ char buf[STRLEN("0xFFFFFFFF")];
+
+ xsprintf(buf, "0x%"PRIx64, u);
+ bus_print_property_value(name, expected_value, value, buf);
+
+ } else if (streq(name, "RestrictNamespaces")) {
+ _cleanup_free_ char *s = NULL;
+ const char *result;
+
+ if ((u & NAMESPACE_FLAGS_ALL) == 0)
+ result = "yes";
+ else if (FLAGS_SET(u, NAMESPACE_FLAGS_ALL))
+ result = "no";
+ else {
+ r = namespace_flags_to_string(u, &s);
+ if (r < 0)
+ return r;
+
+ result = strempty(s);
+ }
+
+ bus_print_property_value(name, expected_value, value, result);
+
+ } else if (streq(name, "MountFlags")) {
+ const char *result;
+
+ result = mount_propagation_flags_to_string(u);
+ if (!result)
+ return -EINVAL;
+
+ bus_print_property_value(name, expected_value, value, result);
+
+ } else if (STR_IN_SET(name, "CapabilityBoundingSet", "AmbientCapabilities")) {
+ _cleanup_free_ char *s = NULL;
+
+ r = capability_set_to_string_alloc(u, &s);
+ if (r < 0)
+ return r;
+
+ bus_print_property_value(name, expected_value, value, s);
+
+ } else if ((STR_IN_SET(name, "CPUWeight", "StartupCPUWeight", "IOWeight", "StartupIOWeight") && u == CGROUP_WEIGHT_INVALID) ||
+ (STR_IN_SET(name, "CPUShares", "StartupCPUShares") && u == CGROUP_CPU_SHARES_INVALID) ||
+ (STR_IN_SET(name, "BlockIOWeight", "StartupBlockIOWeight") && u == CGROUP_BLKIO_WEIGHT_INVALID) ||
+ (STR_IN_SET(name, "MemoryCurrent", "TasksCurrent") && u == (uint64_t) -1) ||
+ (endswith(name, "NSec") && u == (uint64_t) -1))
+
+ bus_print_property_value(name, expected_value, value, "[not set]");
+
+ else if ((STR_IN_SET(name, "DefaultMemoryLow", "DefaultMemoryMin", "MemoryLow", "MemoryHigh", "MemoryMax", "MemorySwapMax", "MemoryLimit") && u == CGROUP_LIMIT_MAX) ||
+ (STR_IN_SET(name, "TasksMax", "DefaultTasksMax") && u == (uint64_t) -1) ||
+ (startswith(name, "Limit") && u == (uint64_t) -1) ||
+ (startswith(name, "DefaultLimit") && u == (uint64_t) -1))
+
+ bus_print_property_value(name, expected_value, value, "infinity");
+ else if (STR_IN_SET(name, "IPIngressBytes", "IPIngressPackets", "IPEgressBytes", "IPEgressPackets") && u == (uint64_t) -1)
+ bus_print_property_value(name, expected_value, value, "[no data]");
+ else
+ bus_print_property_valuef(name, expected_value, value, "%"PRIu64, u);
+
+ return 1;
+ }
+
+ case SD_BUS_TYPE_INT64: {
+ int64_t i;
+
+ r = sd_bus_message_read_basic(m, type, &i);
+ if (r < 0)
+ return r;
+
+ bus_print_property_valuef(name, expected_value, value, "%"PRIi64, i);
+ return 1;
+ }
+
+ case SD_BUS_TYPE_UINT32: {
+ uint32_t u;
+
+ r = sd_bus_message_read_basic(m, type, &u);
+ if (r < 0)
+ return r;
+
+ if (strstr(name, "UMask") || strstr(name, "Mode"))
+ bus_print_property_valuef(name, expected_value, value, "%04o", u);
+
+ else if (streq(name, "UID")) {
+ if (u == UID_INVALID)
+ bus_print_property_value(name, expected_value, value, "[not set]");
+ else
+ bus_print_property_valuef(name, expected_value, value, "%"PRIu32, u);
+ } else if (streq(name, "GID")) {
+ if (u == GID_INVALID)
+ bus_print_property_value(name, expected_value, value, "[not set]");
+ else
+ bus_print_property_valuef(name, expected_value, value, "%"PRIu32, u);
+ } else
+ bus_print_property_valuef(name, expected_value, value, "%"PRIu32, u);
+
+ return 1;
+ }
+
+ case SD_BUS_TYPE_INT32: {
+ int32_t i;
+
+ r = sd_bus_message_read_basic(m, type, &i);
+ if (r < 0)
+ return r;
+
+ bus_print_property_valuef(name, expected_value, value, "%"PRIi32, i);
+ return 1;
+ }
+
+ case SD_BUS_TYPE_DOUBLE: {
+ double d;
+
+ r = sd_bus_message_read_basic(m, type, &d);
+ if (r < 0)
+ return r;
+
+ bus_print_property_valuef(name, expected_value, value, "%g", d);
+ return 1;
+ }
+
+ case SD_BUS_TYPE_ARRAY:
+ if (streq(contents, "s")) {
+ bool first = true;
+ const char *str;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, contents);
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &str)) > 0) {
+ _cleanup_free_ char *e = NULL;
+
+ e = shell_maybe_quote(str, ESCAPE_BACKSLASH_ONELINE);
+ if (!e)
+ return -ENOMEM;
+
+ if (first) {
+ if (!value)
+ printf("%s=", name);
+ first = false;
+ } else
+ fputs(" ", stdout);
+
+ fputs(e, stdout);
+ }
+ if (r < 0)
+ return r;
+
+ if (first && all && !value)
+ printf("%s=", name);
+ if (!first || all)
+ puts("");
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 1;
+
+ } else if (streq(contents, "y")) {
+ const uint8_t *u;
+ size_t n;
+
+ r = sd_bus_message_read_array(m, SD_BUS_TYPE_BYTE, (const void**) &u, &n);
+ if (r < 0)
+ return r;
+
+ if (all || n > 0) {
+ unsigned i;
+
+ if (!value)
+ printf("%s=", name);
+
+ for (i = 0; i < n; i++)
+ printf("%02x", u[i]);
+
+ puts("");
+ }
+
+ return 1;
+
+ } else if (streq(contents, "u")) {
+ uint32_t *u;
+ size_t n;
+
+ r = sd_bus_message_read_array(m, SD_BUS_TYPE_UINT32, (const void**) &u, &n);
+ if (r < 0)
+ return r;
+
+ if (all || n > 0) {
+ unsigned i;
+
+ if (!value)
+ printf("%s=", name);
+
+ for (i = 0; i < n; i++)
+ printf("%08x", u[i]);
+
+ puts("");
+ }
+
+ return 1;
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+int bus_message_print_all_properties(
+ sd_bus_message *m,
+ bus_message_print_t func,
+ char **filter,
+ bool value,
+ bool all,
+ Set **found_properties) {
+
+ int r;
+
+ assert(m);
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "{sv}");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_enter_container(m, SD_BUS_TYPE_DICT_ENTRY, "sv")) > 0) {
+ _cleanup_free_ char *name_with_equal = NULL;
+ const char *name, *contents, *expected_value = NULL;
+
+ r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &name);
+ if (r < 0)
+ return r;
+
+ if (found_properties) {
+ r = set_ensure_put(found_properties, &string_hash_ops, name);
+ if (r < 0)
+ return log_oom();
+ }
+
+ name_with_equal = strjoin(name, "=");
+ if (!name_with_equal)
+ return log_oom();
+
+ if (!filter || strv_find(filter, name) ||
+ (expected_value = strv_find_startswith(filter, name_with_equal))) {
+ r = sd_bus_message_peek_type(m, NULL, &contents);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, contents);
+ if (r < 0)
+ return r;
+
+ if (func)
+ r = func(name, expected_value, m, value, all);
+ if (!func || r == 0)
+ r = bus_print_property(name, expected_value, m, value, all);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (all && !expected_value)
+ printf("%s=[unprintable]\n", name);
+ /* skip what we didn't read */
+ r = sd_bus_message_skip(m, contents);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ } else {
+ r = sd_bus_message_skip(m, "v");
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int bus_print_all_properties(
+ sd_bus *bus,
+ const char *dest,
+ const char *path,
+ bus_message_print_t func,
+ char **filter,
+ bool value,
+ bool all,
+ Set **found_properties) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+ assert(path);
+
+ r = sd_bus_call_method(bus,
+ dest,
+ path,
+ "org.freedesktop.DBus.Properties",
+ "GetAll",
+ &error,
+ &reply,
+ "s", "");
+ if (r < 0)
+ return r;
+
+ return bus_message_print_all_properties(reply, func, filter, value, all, found_properties);
+}
diff --git a/src/shared/bus-print-properties.h b/src/shared/bus-print-properties.h
new file mode 100644
index 0000000..a457475
--- /dev/null
+++ b/src/shared/bus-print-properties.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-bus.h"
+
+#include "macro.h"
+#include "set.h"
+
+typedef int (*bus_message_print_t) (const char *name, const char *expected_value, sd_bus_message *m, bool value, bool all);
+
+int bus_print_property_value(const char *name, const char *expected_value, bool only_value, const char *value);
+int bus_print_property_valuef(const char *name, const char *expected_value, bool only_value, const char *fmt, ...) _printf_(4,5);
+int bus_message_print_all_properties(sd_bus_message *m, bus_message_print_t func, char **filter, bool value, bool all, Set **found_properties);
+int bus_print_all_properties(sd_bus *bus, const char *dest, const char *path, bus_message_print_t func, char **filter, bool value, bool all, Set **found_properties);
diff --git a/src/shared/bus-unit-procs.c b/src/shared/bus-unit-procs.c
new file mode 100644
index 0000000..3e97be9
--- /dev/null
+++ b/src/shared/bus-unit-procs.c
@@ -0,0 +1,407 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-unit-procs.h"
+#include "hashmap.h"
+#include "list.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "sort-util.h"
+#include "string-util.h"
+#include "terminal-util.h"
+
+struct CGroupInfo {
+ char *cgroup_path;
+ bool is_const; /* If false, cgroup_path should be free()'d */
+
+ Hashmap *pids; /* PID → process name */
+ bool done;
+
+ struct CGroupInfo *parent;
+ LIST_FIELDS(struct CGroupInfo, siblings);
+ LIST_HEAD(struct CGroupInfo, children);
+ size_t n_children;
+};
+
+static int add_cgroup(Hashmap *cgroups, const char *path, bool is_const, struct CGroupInfo **ret) {
+ struct CGroupInfo *parent = NULL, *cg;
+ int r;
+
+ assert(cgroups);
+ assert(ret);
+
+ path = empty_to_root(path);
+
+ cg = hashmap_get(cgroups, path);
+ if (cg) {
+ *ret = cg;
+ return 0;
+ }
+
+ if (!empty_or_root(path)) {
+ const char *e, *pp;
+
+ e = strrchr(path, '/');
+ if (!e)
+ return -EINVAL;
+
+ pp = strndupa(path, e - path);
+
+ r = add_cgroup(cgroups, pp, false, &parent);
+ if (r < 0)
+ return r;
+ }
+
+ cg = new0(struct CGroupInfo, 1);
+ if (!cg)
+ return -ENOMEM;
+
+ if (is_const)
+ cg->cgroup_path = (char*) path;
+ else {
+ cg->cgroup_path = strdup(path);
+ if (!cg->cgroup_path) {
+ free(cg);
+ return -ENOMEM;
+ }
+ }
+
+ cg->is_const = is_const;
+ cg->parent = parent;
+
+ r = hashmap_put(cgroups, cg->cgroup_path, cg);
+ if (r < 0) {
+ if (!is_const)
+ free(cg->cgroup_path);
+ free(cg);
+ return r;
+ }
+
+ if (parent) {
+ LIST_PREPEND(siblings, parent->children, cg);
+ parent->n_children++;
+ }
+
+ *ret = cg;
+ return 1;
+}
+
+static int add_process(
+ Hashmap *cgroups,
+ const char *path,
+ pid_t pid,
+ const char *name) {
+
+ struct CGroupInfo *cg;
+ int r;
+
+ assert(cgroups);
+ assert(name);
+ assert(pid > 0);
+
+ r = add_cgroup(cgroups, path, true, &cg);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&cg->pids, &trivial_hash_ops);
+ if (r < 0)
+ return r;
+
+ return hashmap_put(cg->pids, PID_TO_PTR(pid), (void*) name);
+}
+
+static void remove_cgroup(Hashmap *cgroups, struct CGroupInfo *cg) {
+ assert(cgroups);
+ assert(cg);
+
+ while (cg->children)
+ remove_cgroup(cgroups, cg->children);
+
+ hashmap_remove(cgroups, cg->cgroup_path);
+
+ if (!cg->is_const)
+ free(cg->cgroup_path);
+
+ hashmap_free(cg->pids);
+
+ if (cg->parent)
+ LIST_REMOVE(siblings, cg->parent->children, cg);
+
+ free(cg);
+}
+
+static int cgroup_info_compare_func(struct CGroupInfo * const *a, struct CGroupInfo * const *b) {
+ return strcmp((*a)->cgroup_path, (*b)->cgroup_path);
+}
+
+static int dump_processes(
+ Hashmap *cgroups,
+ const char *cgroup_path,
+ const char *prefix,
+ unsigned n_columns,
+ OutputFlags flags) {
+
+ struct CGroupInfo *cg;
+ int r;
+
+ assert(prefix);
+
+ cgroup_path = empty_to_root(cgroup_path);
+
+ cg = hashmap_get(cgroups, cgroup_path);
+ if (!cg)
+ return 0;
+
+ if (!hashmap_isempty(cg->pids)) {
+ const char *name;
+ size_t n = 0, i;
+ pid_t *pids;
+ void *pidp;
+ int width;
+
+ /* Order processes by their PID */
+ pids = newa(pid_t, hashmap_size(cg->pids));
+
+ HASHMAP_FOREACH_KEY(name, pidp, cg->pids)
+ pids[n++] = PTR_TO_PID(pidp);
+
+ assert(n == hashmap_size(cg->pids));
+ typesafe_qsort(pids, n, pid_compare_func);
+
+ width = DECIMAL_STR_WIDTH(pids[n-1]);
+
+ for (i = 0; i < n; i++) {
+ _cleanup_free_ char *e = NULL;
+ const char *special;
+ bool more;
+
+ name = hashmap_get(cg->pids, PID_TO_PTR(pids[i]));
+ assert(name);
+
+ if (n_columns != 0) {
+ unsigned k;
+
+ k = MAX(LESS_BY(n_columns, 2U + width + 1U), 20U);
+
+ e = ellipsize(name, k, 100);
+ if (e)
+ name = e;
+ }
+
+ more = i+1 < n || cg->children;
+ special = special_glyph(more ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT);
+
+ fprintf(stdout, "%s%s%*"PID_PRI" %s\n",
+ prefix,
+ special,
+ width, pids[i],
+ name);
+ }
+ }
+
+ if (cg->children) {
+ struct CGroupInfo **children, *child;
+ size_t n = 0, i;
+
+ /* Order subcgroups by their name */
+ children = newa(struct CGroupInfo*, cg->n_children);
+ LIST_FOREACH(siblings, child, cg->children)
+ children[n++] = child;
+ assert(n == cg->n_children);
+ typesafe_qsort(children, n, cgroup_info_compare_func);
+
+ if (n_columns != 0)
+ n_columns = MAX(LESS_BY(n_columns, 2U), 20U);
+
+ for (i = 0; i < n; i++) {
+ _cleanup_free_ char *pp = NULL;
+ const char *name, *special;
+ bool more;
+
+ child = children[i];
+
+ name = strrchr(child->cgroup_path, '/');
+ if (!name)
+ return -EINVAL;
+ name++;
+
+ more = i+1 < n;
+ special = special_glyph(more ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT);
+
+ fputs(prefix, stdout);
+ fputs(special, stdout);
+ fputs(name, stdout);
+ fputc('\n', stdout);
+
+ special = special_glyph(more ? SPECIAL_GLYPH_TREE_VERTICAL : SPECIAL_GLYPH_TREE_SPACE);
+
+ pp = strjoin(prefix, special);
+ if (!pp)
+ return -ENOMEM;
+
+ r = dump_processes(cgroups, child->cgroup_path, pp, n_columns, flags);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ cg->done = true;
+ return 0;
+}
+
+static int dump_extra_processes(
+ Hashmap *cgroups,
+ const char *prefix,
+ unsigned n_columns,
+ OutputFlags flags) {
+
+ _cleanup_free_ pid_t *pids = NULL;
+ _cleanup_hashmap_free_ Hashmap *names = NULL;
+ struct CGroupInfo *cg;
+ size_t n_allocated = 0, n = 0, k;
+ int width, r;
+
+ /* Prints the extra processes, i.e. those that are in cgroups we haven't displayed yet. We show them as
+ * combined, sorted, linear list. */
+
+ HASHMAP_FOREACH(cg, cgroups) {
+ const char *name;
+ void *pidp;
+
+ if (cg->done)
+ continue;
+
+ if (hashmap_isempty(cg->pids))
+ continue;
+
+ r = hashmap_ensure_allocated(&names, &trivial_hash_ops);
+ if (r < 0)
+ return r;
+
+ if (!GREEDY_REALLOC(pids, n_allocated, n + hashmap_size(cg->pids)))
+ return -ENOMEM;
+
+ HASHMAP_FOREACH_KEY(name, pidp, cg->pids) {
+ pids[n++] = PTR_TO_PID(pidp);
+
+ r = hashmap_put(names, pidp, (void*) name);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (n == 0)
+ return 0;
+
+ typesafe_qsort(pids, n, pid_compare_func);
+ width = DECIMAL_STR_WIDTH(pids[n-1]);
+
+ for (k = 0; k < n; k++) {
+ _cleanup_free_ char *e = NULL;
+ const char *name;
+
+ name = hashmap_get(names, PID_TO_PTR(pids[k]));
+ assert(name);
+
+ if (n_columns != 0) {
+ unsigned z;
+
+ z = MAX(LESS_BY(n_columns, 2U + width + 1U), 20U);
+
+ e = ellipsize(name, z, 100);
+ if (e)
+ name = e;
+ }
+
+ fprintf(stdout, "%s%s %*" PID_PRI " %s\n",
+ prefix,
+ special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET),
+ width, pids[k],
+ name);
+ }
+
+ return 0;
+}
+
+int unit_show_processes(
+ sd_bus *bus,
+ const char *unit,
+ const char *cgroup_path,
+ const char *prefix,
+ unsigned n_columns,
+ OutputFlags flags,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Hashmap *cgroups = NULL;
+ struct CGroupInfo *cg;
+ int r;
+
+ assert(bus);
+ assert(unit);
+
+ if (flags & OUTPUT_FULL_WIDTH)
+ n_columns = 0;
+ else if (n_columns <= 0)
+ n_columns = columns();
+
+ prefix = strempty(prefix);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "GetUnitProcesses",
+ error,
+ &reply,
+ "s",
+ unit);
+ if (r < 0)
+ return r;
+
+ cgroups = hashmap_new(&path_hash_ops);
+ if (!cgroups)
+ return -ENOMEM;
+
+ r = sd_bus_message_enter_container(reply, 'a', "(sus)");
+ if (r < 0)
+ goto finish;
+
+ for (;;) {
+ const char *path = NULL, *name = NULL;
+ uint32_t pid;
+
+ r = sd_bus_message_read(reply, "(sus)", &path, &pid, &name);
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ break;
+
+ r = add_process(cgroups, path, pid, name);
+ if (r == -ENOMEM)
+ goto finish;
+ if (r < 0)
+ log_warning_errno(r, "Invalid process description in GetUnitProcesses reply: cgroup=\"%s\" pid="PID_FMT" command=\"%s\", ignoring: %m",
+ path, pid, name);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ goto finish;
+
+ r = dump_processes(cgroups, cgroup_path, prefix, n_columns, flags);
+ if (r < 0)
+ goto finish;
+
+ r = dump_extra_processes(cgroups, prefix, n_columns, flags);
+
+finish:
+ while ((cg = hashmap_first(cgroups)))
+ remove_cgroup(cgroups, cg);
+
+ hashmap_free(cgroups);
+
+ return r;
+}
diff --git a/src/shared/bus-unit-procs.h b/src/shared/bus-unit-procs.h
new file mode 100644
index 0000000..78c5569
--- /dev/null
+++ b/src/shared/bus-unit-procs.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "output-mode.h"
+
+int unit_show_processes(sd_bus *bus, const char *unit, const char *cgroup_path, const char *prefix, unsigned n_columns, OutputFlags flags, sd_bus_error *error);
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
new file mode 100644
index 0000000..2bab229
--- /dev/null
+++ b/src/shared/bus-unit-util.c
@@ -0,0 +1,2432 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-unit-util.h"
+#include "bus-util.h"
+#include "cap-list.h"
+#include "cgroup-setup.h"
+#include "cgroup-util.h"
+#include "condition.h"
+#include "coredump-util.h"
+#include "cpu-set-util.h"
+#include "dissect-image.h"
+#include "escape.h"
+#include "exec-util.h"
+#include "exit-status.h"
+#include "fileio.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
+#include "ip-protocol-list.h"
+#include "libmount-util.h"
+#include "locale-util.h"
+#include "log.h"
+#include "missing_fs.h"
+#include "mountpoint-util.h"
+#include "nsflags.h"
+#include "numa-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#if HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
+#include "securebits-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "sort-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "syslog-util.h"
+#include "terminal-util.h"
+#include "unit-def.h"
+#include "user-util.h"
+#include "utf8.h"
+
+int bus_parse_unit_info(sd_bus_message *message, UnitInfo *u) {
+ assert(message);
+ assert(u);
+
+ u->machine = NULL;
+
+ return sd_bus_message_read(
+ message,
+ "(ssssssouso)",
+ &u->id,
+ &u->description,
+ &u->load_state,
+ &u->active_state,
+ &u->sub_state,
+ &u->following,
+ &u->unit_path,
+ &u->job_id,
+ &u->job_type,
+ &u->job_path);
+}
+
+#define DEFINE_BUS_APPEND_PARSE_PTR(bus_type, cast_type, type, parse_func) \
+ static int bus_append_##parse_func( \
+ sd_bus_message *m, \
+ const char *field, \
+ const char *eq) { \
+ type val; \
+ int r; \
+ \
+ r = parse_func(eq, &val); \
+ if (r < 0) \
+ return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq); \
+ \
+ r = sd_bus_message_append(m, "(sv)", field, \
+ bus_type, (cast_type) val); \
+ if (r < 0) \
+ return bus_log_create_error(r); \
+ \
+ return 1; \
+ }
+
+#define DEFINE_BUS_APPEND_PARSE(bus_type, parse_func) \
+ static int bus_append_##parse_func( \
+ sd_bus_message *m, \
+ const char *field, \
+ const char *eq) { \
+ int r; \
+ \
+ r = parse_func(eq); \
+ if (r < 0) \
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse %s: %s", field, eq); \
+ \
+ r = sd_bus_message_append(m, "(sv)", field, \
+ bus_type, (int32_t) r); \
+ if (r < 0) \
+ return bus_log_create_error(r); \
+ \
+ return 1; \
+ }
+
+DEFINE_BUS_APPEND_PARSE("b", parse_boolean);
+DEFINE_BUS_APPEND_PARSE("i", ioprio_class_from_string);
+DEFINE_BUS_APPEND_PARSE("i", ip_tos_from_string);
+DEFINE_BUS_APPEND_PARSE("i", log_facility_unshifted_from_string);
+DEFINE_BUS_APPEND_PARSE("i", log_level_from_string);
+#if !HAVE_SECCOMP
+static inline int seccomp_parse_errno_or_action(const char *eq) { return -EINVAL; }
+#endif
+DEFINE_BUS_APPEND_PARSE("i", seccomp_parse_errno_or_action);
+DEFINE_BUS_APPEND_PARSE("i", sched_policy_from_string);
+DEFINE_BUS_APPEND_PARSE("i", secure_bits_from_string);
+DEFINE_BUS_APPEND_PARSE("i", signal_from_string);
+DEFINE_BUS_APPEND_PARSE("i", parse_ip_protocol);
+DEFINE_BUS_APPEND_PARSE_PTR("i", int32_t, int, ioprio_parse_priority);
+DEFINE_BUS_APPEND_PARSE_PTR("i", int32_t, int, parse_nice);
+DEFINE_BUS_APPEND_PARSE_PTR("i", int32_t, int, safe_atoi);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, nsec_t, parse_nsec);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, cg_blkio_weight_parse);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, cg_cpu_shares_parse);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, cg_weight_parse);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, unsigned long, mount_propagation_flags_from_string);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, safe_atou64);
+DEFINE_BUS_APPEND_PARSE_PTR("u", uint32_t, mode_t, parse_mode);
+DEFINE_BUS_APPEND_PARSE_PTR("u", uint32_t, unsigned, safe_atou);
+DEFINE_BUS_APPEND_PARSE_PTR("x", int64_t, int64_t, safe_atoi64);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, coredump_filter_mask_from_string);
+
+static int bus_append_string(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ r = sd_bus_message_append(m, "(sv)", field, "s", eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_strv(sd_bus_message *m, const char *field, const char *eq, ExtractFlags flags) {
+ const char *p;
+ int r;
+
+ r = sd_bus_message_open_container(m, 'r', "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, 's', field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "as");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (p = eq;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, flags);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Invalid syntax: %s", eq);
+
+ r = sd_bus_message_append_basic(m, 's', word);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_byte_array(sd_bus_message *m, const char *field, const void *buf, size_t n) {
+ int r;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "ay");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(m, 'y', buf, n);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_parse_sec_rename(sd_bus_message *m, const char *field, const char *eq) {
+ char *n;
+ usec_t t;
+ size_t l;
+ int r;
+
+ r = parse_sec(eq, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq);
+
+ l = strlen(field);
+ n = newa(char, l + 2);
+ /* Change suffix Sec → USec */
+ strcpy(mempcpy(n, field, l - 3), "USec");
+
+ r = sd_bus_message_append(m, "(sv)", n, "t", t);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_parse_size(sd_bus_message *m, const char *field, const char *eq, uint64_t base) {
+ uint64_t v;
+ int r;
+
+ r = parse_size(eq, base, &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq);
+
+ r = sd_bus_message_append(m, "(sv)", field, "t", v);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_exec_command(sd_bus_message *m, const char *field, const char *eq) {
+ bool explicit_path = false, done = false;
+ _cleanup_strv_free_ char **l = NULL, **ex_opts = NULL;
+ _cleanup_free_ char *path = NULL, *upgraded_name = NULL;
+ ExecCommandFlags flags = 0;
+ bool is_ex_prop = endswith(field, "Ex");
+ int r;
+
+ do {
+ switch (*eq) {
+
+ case '-':
+ if (FLAGS_SET(flags, EXEC_COMMAND_IGNORE_FAILURE))
+ done = true;
+ else {
+ flags |= EXEC_COMMAND_IGNORE_FAILURE;
+ eq++;
+ }
+ break;
+
+ case '@':
+ if (explicit_path)
+ done = true;
+ else {
+ explicit_path = true;
+ eq++;
+ }
+ break;
+
+ case ':':
+ if (FLAGS_SET(flags, EXEC_COMMAND_NO_ENV_EXPAND))
+ done = true;
+ else {
+ flags |= EXEC_COMMAND_NO_ENV_EXPAND;
+ eq++;
+ }
+ break;
+
+ case '+':
+ if (flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID|EXEC_COMMAND_AMBIENT_MAGIC))
+ done = true;
+ else {
+ flags |= EXEC_COMMAND_FULLY_PRIVILEGED;
+ eq++;
+ }
+ break;
+
+ case '!':
+ if (flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_AMBIENT_MAGIC))
+ done = true;
+ else if (FLAGS_SET(flags, EXEC_COMMAND_NO_SETUID)) {
+ flags &= ~EXEC_COMMAND_NO_SETUID;
+ flags |= EXEC_COMMAND_AMBIENT_MAGIC;
+ eq++;
+ } else {
+ flags |= EXEC_COMMAND_NO_SETUID;
+ eq++;
+ }
+ break;
+
+ default:
+ done = true;
+ break;
+ }
+ } while (!done);
+
+ if (!is_ex_prop && (flags & (EXEC_COMMAND_NO_ENV_EXPAND|EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID|EXEC_COMMAND_AMBIENT_MAGIC))) {
+ /* Upgrade the ExecXYZ= property to ExecXYZEx= for convenience */
+ is_ex_prop = true;
+ upgraded_name = strjoin(field, "Ex");
+ if (!upgraded_name)
+ return log_oom();
+ }
+
+ if (is_ex_prop) {
+ r = exec_command_flags_to_strv(flags, &ex_opts);
+ if (r < 0)
+ return log_error_errno(r, "Failed to convert ExecCommandFlags to strv: %m");
+ }
+
+ if (explicit_path) {
+ r = extract_first_word(&eq, &path, NULL, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse path: %m");
+ }
+
+ r = strv_split_full(&l, eq, NULL, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse command line: %m");
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, upgraded_name ?: field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', is_ex_prop ? "a(sasas)" : "a(sasb)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', is_ex_prop ? "(sasas)" : "(sasb)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (!strv_isempty(l)) {
+
+ r = sd_bus_message_open_container(m, 'r', is_ex_prop ? "sasas" : "sasb");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", path ?: l[0]);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, l);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = is_ex_prop ? sd_bus_message_append_strv(m, ex_opts) : sd_bus_message_append(m, "b", FLAGS_SET(flags, EXEC_COMMAND_IGNORE_FAILURE));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_ip_address_access(sd_bus_message *m, int family, const union in_addr_union *prefix, unsigned char prefixlen) {
+ int r;
+
+ assert(m);
+ assert(prefix);
+
+ r = sd_bus_message_open_container(m, 'r', "iayu");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "i", family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(m, 'y', prefix, FAMILY_ADDRESS_SIZE(family));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "u", prefixlen);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(m);
+}
+
+static int bus_append_cgroup_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (STR_IN_SET(field, "DevicePolicy",
+ "Slice",
+ "ManagedOOMSwap",
+ "ManagedOOMMemoryPressure",
+ "ManagedOOMMemoryPressureLimitPercent"))
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field, "CPUAccounting",
+ "MemoryAccounting",
+ "IOAccounting",
+ "BlockIOAccounting",
+ "TasksAccounting",
+ "IPAccounting"))
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "CPUWeight",
+ "StartupCPUWeight",
+ "IOWeight",
+ "StartupIOWeight"))
+ return bus_append_cg_weight_parse(m, field, eq);
+
+ if (STR_IN_SET(field, "CPUShares",
+ "StartupCPUShares"))
+ return bus_append_cg_cpu_shares_parse(m, field, eq);
+
+ if (STR_IN_SET(field, "AllowedCPUs",
+ "AllowedMemoryNodes")) {
+ _cleanup_(cpu_set_reset) CPUSet cpuset = {};
+ _cleanup_free_ uint8_t *array = NULL;
+ size_t allocated;
+
+ r = parse_cpu_set(eq, &cpuset);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
+
+ r = cpu_set_to_dbus(&cpuset, &array, &allocated);
+ if (r < 0)
+ return log_error_errno(r, "Failed to serialize CPUSet: %m");
+
+ return bus_append_byte_array(m, field, array, allocated);
+ }
+
+ if (STR_IN_SET(field, "BlockIOWeight",
+ "StartupBlockIOWeight"))
+ return bus_append_cg_blkio_weight_parse(m, field, eq);
+
+ if (streq(field, "DisableControllers"))
+ return bus_append_strv(m, "DisableControllers", eq, EXTRACT_UNQUOTE);
+
+ if (streq(field, "Delegate")) {
+ r = parse_boolean(eq);
+ if (r < 0)
+ return bus_append_strv(m, "DelegateControllers", eq, EXTRACT_UNQUOTE);
+
+ r = sd_bus_message_append(m, "(sv)", "Delegate", "b", r);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "MemoryMin",
+ "DefaultMemoryLow",
+ "DefaultMemoryMin",
+ "MemoryLow",
+ "MemoryHigh",
+ "MemoryMax",
+ "MemorySwapMax",
+ "MemoryLimit",
+ "TasksMax")) {
+
+ if (streq(eq, "infinity")) {
+ r = sd_bus_message_append(m, "(sv)", field, "t", CGROUP_LIMIT_MAX);
+ if (r < 0)
+ return bus_log_create_error(r);
+ return 1;
+ } else if (isempty(eq)) {
+ uint64_t empty_value = STR_IN_SET(field,
+ "DefaultMemoryLow",
+ "DefaultMemoryMin",
+ "MemoryLow",
+ "MemoryMin") ?
+ CGROUP_LIMIT_MIN :
+ CGROUP_LIMIT_MAX;
+
+ r = sd_bus_message_append(m, "(sv)", field, "t", empty_value);
+ if (r < 0)
+ return bus_log_create_error(r);
+ return 1;
+ }
+
+ r = parse_permille(eq);
+ if (r >= 0) {
+ char *n;
+
+ /* When this is a percentage we'll convert this into a relative value in the range 0…UINT32_MAX
+ * and pass it in the MemoryLowScale property (and related ones). This way the physical memory
+ * size can be determined server-side. */
+
+ n = strjoina(field, "Scale");
+ r = sd_bus_message_append(m, "(sv)", n, "u", (uint32_t) (((uint64_t) r * UINT32_MAX) / 1000U));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "TasksMax"))
+ return bus_append_safe_atou64(m, field, eq);
+
+ return bus_append_parse_size(m, field, eq, 1024);
+ }
+
+ if (streq(field, "CPUQuota")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "CPUQuotaPerSecUSec", "t", USEC_INFINITY);
+ else {
+ r = parse_permille_unbounded(eq);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ERANGE),
+ "CPU quota too small.");
+ if (r < 0)
+ return log_error_errno(r, "CPU quota '%s' invalid.", eq);
+
+ r = sd_bus_message_append(m, "(sv)", "CPUQuotaPerSecUSec", "t", (((uint64_t) r * USEC_PER_SEC) / 1000U));
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "CPUQuotaPeriodSec")) {
+ usec_t u = USEC_INFINITY;
+
+ r = parse_sec_def_infinity(eq, &u);
+ if (r < 0)
+ return log_error_errno(r, "CPU quota period '%s' invalid.", eq);
+
+ r = sd_bus_message_append(m, "(sv)", "CPUQuotaPeriodUSec", "t", u);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "DeviceAllow")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field, "a(ss)", 0);
+ else {
+ const char *path = eq, *rwm = NULL, *e;
+
+ e = strchr(eq, ' ');
+ if (e) {
+ path = strndupa(eq, e - eq);
+ rwm = e+1;
+ }
+
+ r = sd_bus_message_append(m, "(sv)", field, "a(ss)", 1, path, strempty(rwm));
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (cgroup_io_limit_type_from_string(field) >= 0 || STR_IN_SET(field, "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field, "a(st)", 0);
+ else {
+ const char *path, *bandwidth, *e;
+ uint64_t bytes;
+
+ e = strchr(eq, ' ');
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse %s value %s.",
+ field, eq);
+
+ path = strndupa(eq, e - eq);
+ bandwidth = e+1;
+
+ if (streq(bandwidth, "infinity"))
+ bytes = CGROUP_LIMIT_MAX;
+ else {
+ r = parse_size(bandwidth, 1000, &bytes);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse byte value %s: %m", bandwidth);
+ }
+
+ r = sd_bus_message_append(m, "(sv)", field, "a(st)", 1, path, bytes);
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "IODeviceWeight",
+ "BlockIODeviceWeight")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field, "a(st)", 0);
+ else {
+ const char *path, *weight, *e;
+ uint64_t u;
+
+ e = strchr(eq, ' ');
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse %s value %s.",
+ field, eq);
+
+ path = strndupa(eq, e - eq);
+ weight = e+1;
+
+ r = safe_atou64(weight, &u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value %s: %m", field, weight);
+
+ r = sd_bus_message_append(m, "(sv)", field, "a(st)", 1, path, u);
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "IODeviceLatencyTargetSec")) {
+ const char *field_usec = "IODeviceLatencyTargetUSec";
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field_usec, "a(st)", USEC_INFINITY);
+ else {
+ const char *path, *target, *e;
+ usec_t usec;
+
+ e = strchr(eq, ' ');
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse %s value %s.",
+ field, eq);
+
+ path = strndupa(eq, e - eq);
+ target = e+1;
+
+ r = parse_sec(target, &usec);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value %s: %m", field, target);
+
+ r = sd_bus_message_append(m, "(sv)", field_usec, "a(st)", 1, path, usec);
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "IPAddressAllow",
+ "IPAddressDeny")) {
+ unsigned char prefixlen;
+ union in_addr_union prefix = {};
+ int family;
+
+ if (isempty(eq)) {
+ r = sd_bus_message_append(m, "(sv)", field, "a(iayu)", 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(iayu)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(iayu)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (streq(eq, "any")) {
+ /* "any" is a shortcut for 0.0.0.0/0 and ::/0 */
+
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ } else if (is_localhost(eq)) {
+ /* "localhost" is a shortcut for 127.0.0.0/8 and ::1/128 */
+
+ prefix.in.s_addr = htobe32(0x7f000000);
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 8);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ prefix.in6 = (struct in6_addr) IN6ADDR_LOOPBACK_INIT;
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 128);
+ if (r < 0)
+ return r;
+
+ } else if (streq(eq, "link-local")) {
+ /* "link-local" is a shortcut for 169.254.0.0/16 and fe80::/64 */
+
+ prefix.in.s_addr = htobe32((UINT32_C(169) << 24 | UINT32_C(254) << 16));
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 16);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ prefix.in6 = (struct in6_addr) {
+ .s6_addr32[0] = htobe32(0xfe800000)
+ };
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 64);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ } else if (streq(eq, "multicast")) {
+ /* "multicast" is a shortcut for 224.0.0.0/4 and ff00::/8 */
+
+ prefix.in.s_addr = htobe32((UINT32_C(224) << 24));
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 4);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ prefix.in6 = (struct in6_addr) {
+ .s6_addr32[0] = htobe32(0xff000000)
+ };
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 8);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ } else {
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&eq, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s: %s", field, eq);
+
+ r = in_addr_prefix_from_string_auto(word, &family, &prefix, &prefixlen);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse IP address prefix: %s", word);
+
+ r = bus_append_ip_address_access(m, family, &prefix, prefixlen);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "IPIngressFilterPath",
+ "IPEgressFilterPath")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field, "as", 0);
+ else
+ r = sd_bus_message_append(m, "(sv)", field, "as", 1, eq);
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_automount_property(sd_bus_message *m, const char *field, const char *eq) {
+ if (streq(field, "Where"))
+ return bus_append_string(m, field, eq);
+
+ if (streq(field, "DirectoryMode"))
+ return bus_append_parse_mode(m, field, eq);
+
+ if (streq(field, "TimeoutIdleSec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ return 0;
+}
+
+static int bus_append_execute_property(sd_bus_message *m, const char *field, const char *eq) {
+ const char *suffix;
+ int r;
+
+ if (STR_IN_SET(field, "User",
+ "Group",
+ "UtmpIdentifier",
+ "UtmpMode",
+ "PAMName",
+ "TTYPath",
+ "WorkingDirectory",
+ "RootDirectory",
+ "SyslogIdentifier",
+ "ProtectSystem",
+ "ProtectHome",
+ "SELinuxContext",
+ "RootImage",
+ "RootVerity",
+ "RuntimeDirectoryPreserve",
+ "Personality",
+ "KeyringMode",
+ "ProtectProc",
+ "ProcSubset",
+ "NetworkNamespacePath",
+ "LogNamespace"))
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field, "IgnoreSIGPIPE",
+ "TTYVHangup",
+ "TTYReset",
+ "TTYVTDisallocate",
+ "PrivateTmp",
+ "PrivateDevices",
+ "PrivateNetwork",
+ "PrivateUsers",
+ "PrivateMounts",
+ "NoNewPrivileges",
+ "SyslogLevelPrefix",
+ "MemoryDenyWriteExecute",
+ "RestrictRealtime",
+ "DynamicUser",
+ "RemoveIPC",
+ "ProtectKernelTunables",
+ "ProtectKernelModules",
+ "ProtectKernelLogs",
+ "ProtectClock",
+ "ProtectControlGroups",
+ "MountAPIVFS",
+ "CPUSchedulingResetOnFork",
+ "LockPersonality",
+ "ProtectHostname",
+ "RestrictSUIDSGID"))
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "ReadWriteDirectories",
+ "ReadOnlyDirectories",
+ "InaccessibleDirectories",
+ "ReadWritePaths",
+ "ReadOnlyPaths",
+ "InaccessiblePaths",
+ "RuntimeDirectory",
+ "StateDirectory",
+ "CacheDirectory",
+ "LogsDirectory",
+ "ConfigurationDirectory",
+ "SupplementaryGroups",
+ "SystemCallArchitectures"))
+ return bus_append_strv(m, field, eq, EXTRACT_UNQUOTE);
+
+ if (STR_IN_SET(field, "SyslogLevel",
+ "LogLevelMax"))
+ return bus_append_log_level_from_string(m, field, eq);
+
+ if (streq(field, "SyslogFacility"))
+ return bus_append_log_facility_unshifted_from_string(m, field, eq);
+
+ if (streq(field, "SecureBits"))
+ return bus_append_secure_bits_from_string(m, field, eq);
+
+ if (streq(field, "CPUSchedulingPolicy"))
+ return bus_append_sched_policy_from_string(m, field, eq);
+
+ if (STR_IN_SET(field, "CPUSchedulingPriority",
+ "OOMScoreAdjust"))
+ return bus_append_safe_atoi(m, field, eq);
+
+ if (streq(field, "CoredumpFilter"))
+ return bus_append_coredump_filter_mask_from_string(m, field, eq);
+
+ if (streq(field, "Nice"))
+ return bus_append_parse_nice(m, field, eq);
+
+ if (streq(field, "SystemCallErrorNumber"))
+ return bus_append_seccomp_parse_errno_or_action(m, field, eq);
+
+ if (streq(field, "IOSchedulingClass"))
+ return bus_append_ioprio_class_from_string(m, field, eq);
+
+ if (streq(field, "IOSchedulingPriority"))
+ return bus_append_ioprio_parse_priority(m, field, eq);
+
+ if (STR_IN_SET(field, "RuntimeDirectoryMode",
+ "StateDirectoryMode",
+ "CacheDirectoryMode",
+ "LogsDirectoryMode",
+ "ConfigurationDirectoryMode",
+ "UMask"))
+ return bus_append_parse_mode(m, field, eq);
+
+ if (streq(field, "TimerSlackNSec"))
+ return bus_append_parse_nsec(m, field, eq);
+
+ if (streq(field, "LogRateLimitIntervalSec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (streq(field, "LogRateLimitBurst"))
+ return bus_append_safe_atou(m, field, eq);
+
+ if (streq(field, "MountFlags"))
+ return bus_append_mount_propagation_flags_from_string(m, field, eq);
+
+ if (STR_IN_SET(field, "Environment",
+ "UnsetEnvironment",
+ "PassEnvironment"))
+ return bus_append_strv(m, field, eq, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE);
+
+ if (streq(field, "EnvironmentFile")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "EnvironmentFiles", "a(sb)", 0);
+ else
+ r = sd_bus_message_append(m, "(sv)", "EnvironmentFiles", "a(sb)", 1,
+ eq[0] == '-' ? eq + 1 : eq,
+ eq[0] == '-');
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "SetCredential")) {
+ r = sd_bus_message_open_container(m, 'r', "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, 's', "SetCredential");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(say)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "a(say)", 0);
+ else {
+ _cleanup_free_ char *word = NULL, *unescaped = NULL;
+ const char *p = eq;
+ int l;
+
+ r = extract_first_word(&p, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse SetCredential= parameter: %s", eq);
+ if (r == 0 || !p)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Missing argument to SetCredential=.");
+
+ l = cunescape(p, UNESCAPE_ACCEPT_NUL, &unescaped);
+ if (l < 0)
+ return log_error_errno(l, "Failed to unescape SetCredential= value: %s", p);
+
+ r = sd_bus_message_open_container(m, 'a', "(say)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'r', "say");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", word);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(m, 'y', unescaped, l);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ }
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "LoadCredential")) {
+ r = sd_bus_message_open_container(m, 'r', "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, 's', "LoadCredential");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "a(ss)", 0);
+ else {
+ _cleanup_free_ char *word = NULL;
+ const char *p = eq;
+
+ r = extract_first_word(&p, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse LoadCredential= parameter: %s", eq);
+ if (r == 0 || !p)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Missing argument to LoadCredential=.");
+
+ r = sd_bus_message_append(m, "a(ss)", 1, word, p);
+ }
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "LogExtraFields")) {
+ r = sd_bus_message_open_container(m, 'r', "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, 's', "LogExtraFields");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "aay");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "ay");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(m, 'y', eq, strlen(eq));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "StandardInput",
+ "StandardOutput",
+ "StandardError")) {
+ const char *n, *appended;
+
+ if ((n = startswith(eq, "fd:"))) {
+ appended = strjoina(field, "FileDescriptorName");
+ r = sd_bus_message_append(m, "(sv)", appended, "s", n);
+ } else if ((n = startswith(eq, "file:"))) {
+ appended = strjoina(field, "File");
+ r = sd_bus_message_append(m, "(sv)", appended, "s", n);
+ } else if ((n = startswith(eq, "append:"))) {
+ appended = strjoina(field, "FileToAppend");
+ r = sd_bus_message_append(m, "(sv)", appended, "s", n);
+ } else
+ r = sd_bus_message_append(m, "(sv)", field, "s", eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "StandardInputText")) {
+ _cleanup_free_ char *unescaped = NULL;
+
+ r = cunescape(eq, 0, &unescaped);
+ if (r < 0)
+ return log_error_errno(r, "Failed to unescape text '%s': %m", eq);
+
+ if (!strextend(&unescaped, "\n", NULL))
+ return log_oom();
+
+ /* Note that we don't expand specifiers here, but that should be OK, as this is a programmatic
+ * interface anyway */
+
+ return bus_append_byte_array(m, field, unescaped, strlen(unescaped));
+ }
+
+ if (streq(field, "StandardInputData")) {
+ _cleanup_free_ void *decoded = NULL;
+ size_t sz;
+
+ r = unbase64mem(eq, (size_t) -1, &decoded, &sz);
+ if (r < 0)
+ return log_error_errno(r, "Failed to decode base64 data '%s': %m", eq);
+
+ return bus_append_byte_array(m, field, decoded, sz);
+ }
+
+ if ((suffix = startswith(field, "Limit"))) {
+ int rl;
+
+ rl = rlimit_from_string(suffix);
+ if (rl >= 0) {
+ const char *sn;
+ struct rlimit l;
+
+ r = rlimit_parse(rl, eq, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse resource limit: %s", eq);
+
+ r = sd_bus_message_append(m, "(sv)", field, "t", l.rlim_max);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ sn = strjoina(field, "Soft");
+ r = sd_bus_message_append(m, "(sv)", sn, "t", l.rlim_cur);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+ }
+
+ if (STR_IN_SET(field, "AppArmorProfile",
+ "SmackProcessLabel")) {
+ int ignore = 0;
+ const char *s = eq;
+
+ if (eq[0] == '-') {
+ ignore = 1;
+ s = eq + 1;
+ }
+
+ r = sd_bus_message_append(m, "(sv)", field, "(bs)", ignore, s);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "CapabilityBoundingSet",
+ "AmbientCapabilities")) {
+ uint64_t sum = 0;
+ bool invert = false;
+ const char *p = eq;
+
+ if (*p == '~') {
+ invert = true;
+ p++;
+ }
+
+ r = capability_set_from_string(p, &sum);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value %s: %m", field, eq);
+
+ sum = invert ? ~sum : sum;
+
+ r = sd_bus_message_append(m, "(sv)", field, "t", sum);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "CPUAffinity")) {
+ _cleanup_(cpu_set_reset) CPUSet cpuset = {};
+ _cleanup_free_ uint8_t *array = NULL;
+ size_t allocated;
+
+ if (eq && streq(eq, "numa")) {
+ r = sd_bus_message_append(m, "(sv)", "CPUAffinityFromNUMA", "b", true);
+ if (r < 0)
+ return bus_log_create_error(r);
+ return r;
+ }
+
+ r = parse_cpu_set(eq, &cpuset);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
+
+ r = cpu_set_to_dbus(&cpuset, &array, &allocated);
+ if (r < 0)
+ return log_error_errno(r, "Failed to serialize CPUAffinity: %m");
+
+ return bus_append_byte_array(m, field, array, allocated);
+ }
+
+ if (streq(field, "NUMAPolicy")) {
+ r = mpol_from_string(eq);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
+
+ r = sd_bus_message_append(m, "(sv)", field, "i", (int32_t) r);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "NUMAMask")) {
+ _cleanup_(cpu_set_reset) CPUSet nodes = {};
+ _cleanup_free_ uint8_t *array = NULL;
+ size_t allocated;
+
+ if (eq && streq(eq, "all")) {
+ r = numa_mask_add_all(&nodes);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create NUMA mask representing \"all\" NUMA nodes: %m");
+ } else {
+ r = parse_cpu_set(eq, &nodes);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
+ }
+
+ r = cpu_set_to_dbus(&nodes, &array, &allocated);
+ if (r < 0)
+ return log_error_errno(r, "Failed to serialize NUMAMask: %m");
+
+ return bus_append_byte_array(m, field, array, allocated);
+ }
+
+ if (STR_IN_SET(field, "RestrictAddressFamilies",
+ "SystemCallFilter",
+ "SystemCallLog")) {
+ int allow_list = 1;
+ const char *p = eq;
+
+ if (*p == '~') {
+ allow_list = 0;
+ p++;
+ }
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "(bas)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'r', "bas");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, 'b', &allow_list);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Invalid syntax: %s", eq);
+
+ r = sd_bus_message_append_basic(m, 's', word);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "RestrictNamespaces")) {
+ bool invert = false;
+ unsigned long flags;
+
+ r = parse_boolean(eq);
+ if (r > 0)
+ flags = 0;
+ else if (r == 0)
+ flags = NAMESPACE_FLAGS_ALL;
+ else {
+ if (eq[0] == '~') {
+ invert = true;
+ eq++;
+ }
+
+ r = namespace_flags_from_string(eq, &flags);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value %s.", field, eq);
+ }
+
+ if (invert)
+ flags = (~flags) & NAMESPACE_FLAGS_ALL;
+
+ r = sd_bus_message_append(m, "(sv)", field, "t", (uint64_t) flags);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "BindPaths",
+ "BindReadOnlyPaths")) {
+ const char *p = eq;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(ssbt)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(ssbt)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *source = NULL, *destination = NULL;
+ char *s = NULL, *d = NULL;
+ bool ignore_enoent = false;
+ uint64_t flags = MS_REC;
+
+ r = extract_first_word(&p, &source, ":" WHITESPACE, EXTRACT_UNQUOTE|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+ if (r == 0)
+ break;
+
+ s = source;
+ if (s[0] == '-') {
+ ignore_enoent = true;
+ s++;
+ }
+
+ if (p && p[-1] == ':') {
+ r = extract_first_word(&p, &destination, ":" WHITESPACE, EXTRACT_UNQUOTE|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Missing argument after ':': %s",
+ eq);
+
+ d = destination;
+
+ if (p && p[-1] == ':') {
+ _cleanup_free_ char *options = NULL;
+
+ r = extract_first_word(&p, &options, NULL, EXTRACT_UNQUOTE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+
+ if (isempty(options) || streq(options, "rbind"))
+ flags = MS_REC;
+ else if (streq(options, "norbind"))
+ flags = 0;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown options: %s",
+ eq);
+ }
+ } else
+ d = s;
+
+ r = sd_bus_message_append(m, "(ssbt)", s, d, ignore_enoent, flags);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "TemporaryFileSystem")) {
+ const char *p = eq;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *path = NULL;
+ const char *w;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+ if (r == 0)
+ break;
+
+ w = word;
+ r = extract_first_word(&w, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse argument: %s",
+ p);
+
+ r = sd_bus_message_append(m, "(ss)", path, w);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "RootHash")) {
+ _cleanup_free_ void *roothash_decoded = NULL;
+ size_t roothash_decoded_size = 0;
+
+ /* We have the path to a roothash to load and decode, eg: RootHash=/foo/bar.roothash */
+ if (path_is_absolute(eq))
+ return bus_append_string(m, "RootHashPath", eq);
+
+ /* We have a roothash to decode, eg: RootHash=012345789abcdef */
+ r = unhexmem(eq, strlen(eq), &roothash_decoded, &roothash_decoded_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to decode RootHash= '%s': %m", eq);
+ if (roothash_decoded_size < sizeof(sd_id128_t))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "RootHash= '%s' is too short: %m", eq);
+
+ return bus_append_byte_array(m, field, roothash_decoded, roothash_decoded_size);
+ }
+
+ if (streq(field, "RootHashSignature")) {
+ _cleanup_free_ void *roothash_sig_decoded = NULL;
+ char *value;
+ size_t roothash_sig_decoded_size = 0;
+
+ /* We have the path to a roothash signature to load and decode, eg: RootHash=/foo/bar.roothash.p7s */
+ if (path_is_absolute(eq))
+ return bus_append_string(m, "RootHashSignaturePath", eq);
+
+ if (!(value = startswith(eq, "base64:")))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to decode RootHashSignature= '%s', not a path but doesn't start with 'base64:': %m", eq);
+
+ /* We have a roothash signature to decode, eg: RootHashSignature=base64:012345789abcdef */
+ r = unbase64mem(value, strlen(value), &roothash_sig_decoded, &roothash_sig_decoded_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to decode RootHashSignature= '%s': %m", eq);
+
+ return bus_append_byte_array(m, field, roothash_sig_decoded, roothash_sig_decoded_size);
+ }
+
+ if (streq(field, "RootImageOptions")) {
+ _cleanup_strv_free_ char **l = NULL;
+ char **first = NULL, **second = NULL;
+ const char *p = eq;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = strv_split_colon_pairs(&l, p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+
+ STRV_FOREACH_PAIR(first, second, l) {
+ /* Format is either 'root:foo' or 'foo' (root is implied) */
+ if (!isempty(*second) && partition_designator_from_string(*first) < 0)
+ return bus_log_create_error(-EINVAL);
+
+ r = sd_bus_message_append(m, "(ss)",
+ !isempty(*second) ? *first : "root",
+ !isempty(*second) ? *second : *first);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "MountImages")) {
+ const char *p = eq;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(ssba(ss))");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(ssba(ss))");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *first = NULL, *second = NULL, *tuple = NULL;
+ const char *q = NULL, *source = NULL;
+ bool permissive = false;
+
+ r = extract_first_word(&p, &tuple, NULL, EXTRACT_UNQUOTE|EXTRACT_RETAIN_ESCAPE);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ q = tuple;
+ r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &first, &second, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ source = first;
+ if (source[0] == '-') {
+ permissive = true;
+ source++;
+ }
+
+ if (isempty(second))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Missing argument after ':': %s",
+ eq);
+
+ r = sd_bus_message_open_container(m, 'r', "ssba(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "ssb", source, second, permissive);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *partition = NULL, *mount_options = NULL;
+
+ r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &partition, &mount_options, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ /* Single set of options, applying to the root partition/single filesystem */
+ if (r == 1) {
+ r = sd_bus_message_append(m, "(ss)", "root", partition);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ break;
+ }
+
+ if (partition_designator_from_string(partition) < 0)
+ return bus_log_create_error(-EINVAL);
+
+ r = sd_bus_message_append(m, "(ss)", partition, mount_options);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_kill_property(sd_bus_message *m, const char *field, const char *eq) {
+ if (streq(field, "KillMode"))
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field, "SendSIGHUP",
+ "SendSIGKILL"))
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "KillSignal",
+ "RestartKillSignal",
+ "FinalKillSignal",
+ "WatchdogSignal"))
+ return bus_append_signal_from_string(m, field, eq);
+
+ return 0;
+}
+
+static int bus_append_mount_property(sd_bus_message *m, const char *field, const char *eq) {
+
+ if (STR_IN_SET(field, "What",
+ "Where",
+ "Options",
+ "Type"))
+ return bus_append_string(m, field, eq);
+
+ if (streq(field, "TimeoutSec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (streq(field, "DirectoryMode"))
+ return bus_append_parse_mode(m, field, eq);
+
+ if (STR_IN_SET(field, "SloppyOptions",
+ "LazyUnmount",
+ "ForceUnmount",
+ "ReadwriteOnly"))
+ return bus_append_parse_boolean(m, field, eq);
+
+ return 0;
+}
+
+static int bus_append_path_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (streq(field, "MakeDirectory"))
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (streq(field, "DirectoryMode"))
+ return bus_append_parse_mode(m, field, eq);
+
+ if (STR_IN_SET(field, "PathExists",
+ "PathExistsGlob",
+ "PathChanged",
+ "PathModified",
+ "DirectoryNotEmpty")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "Paths", "a(ss)", 0);
+ else
+ r = sd_bus_message_append(m, "(sv)", "Paths", "a(ss)", 1, field, eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_scope_property(sd_bus_message *m, const char *field, const char *eq) {
+ if (streq(field, "RuntimeMaxSec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (streq(field, "TimeoutStopSec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ return 0;
+}
+
+static int bus_append_service_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (STR_IN_SET(field, "PIDFile",
+ "Type",
+ "Restart",
+ "BusName",
+ "NotifyAccess",
+ "USBFunctionDescriptors",
+ "USBFunctionStrings",
+ "OOMPolicy",
+ "TimeoutStartFailureMode",
+ "TimeoutStopFailureMode"))
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field, "PermissionsStartOnly",
+ "RootDirectoryStartOnly",
+ "RemainAfterExit",
+ "GuessMainPID"))
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "RestartSec",
+ "TimeoutStartSec",
+ "TimeoutStopSec",
+ "TimeoutAbortSec",
+ "RuntimeMaxSec",
+ "WatchdogSec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (streq(field, "TimeoutSec")) {
+ r = bus_append_parse_sec_rename(m, "TimeoutStartSec", eq);
+ if (r < 0)
+ return r;
+
+ return bus_append_parse_sec_rename(m, "TimeoutStopSec", eq);
+ }
+
+ if (streq(field, "FileDescriptorStoreMax"))
+ return bus_append_safe_atou(m, field, eq);
+
+ if (STR_IN_SET(field, "ExecCondition",
+ "ExecStartPre",
+ "ExecStart",
+ "ExecStartPost",
+ "ExecConditionEx",
+ "ExecStartPreEx",
+ "ExecStartEx",
+ "ExecStartPostEx",
+ "ExecReload",
+ "ExecStop",
+ "ExecStopPost",
+ "ExecReloadEx",
+ "ExecStopEx",
+ "ExecStopPostEx"))
+ return bus_append_exec_command(m, field, eq);
+
+ if (STR_IN_SET(field, "RestartPreventExitStatus",
+ "RestartForceExitStatus",
+ "SuccessExitStatus")) {
+ _cleanup_free_ int *status = NULL, *signal = NULL;
+ size_t n_status = 0, n_signal = 0;
+ const char *p;
+
+ for (p = eq;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Invalid syntax in %s: %s", field, eq);
+
+ /* We need to call exit_status_from_string() first, because we want
+ * to parse numbers as exit statuses, not signals. */
+
+ r = exit_status_from_string(word);
+ if (r >= 0) {
+ assert(r >= 0 && r < 256);
+
+ status = reallocarray(status, n_status + 1, sizeof(int));
+ if (!status)
+ return log_oom();
+
+ status[n_status++] = r;
+
+ } else if ((r = signal_from_string(word)) >= 0) {
+ signal = reallocarray(signal, n_signal + 1, sizeof(int));
+ if (!signal)
+ return log_oom();
+
+ signal[n_signal++] = r;
+
+ } else
+ /* original r from exit_status_to_string() */
+ return log_error_errno(r, "Invalid status or signal %s in %s: %m",
+ word, field);
+ }
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "(aiai)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'r', "aiai");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(m, 'i', status, n_status * sizeof(int));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(m, 'i', signal, n_signal * sizeof(int));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_socket_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (STR_IN_SET(field, "Accept",
+ "FlushPending",
+ "Writable",
+ "KeepAlive",
+ "NoDelay",
+ "FreeBind",
+ "Transparent",
+ "Broadcast",
+ "PassCredentials",
+ "PassSecurity",
+ "PassPacketInfo",
+ "ReusePort",
+ "RemoveOnStop",
+ "SELinuxContextFromNet"))
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "Priority",
+ "IPTTL",
+ "Mark"))
+ return bus_append_safe_atoi(m, field, eq);
+
+ if (streq(field, "IPTOS"))
+ return bus_append_ip_tos_from_string(m, field, eq);
+
+ if (STR_IN_SET(field, "Backlog",
+ "MaxConnections",
+ "MaxConnectionsPerSource",
+ "KeepAliveProbes",
+ "TriggerLimitBurst"))
+ return bus_append_safe_atou(m, field, eq);
+
+ if (STR_IN_SET(field, "SocketMode",
+ "DirectoryMode"))
+ return bus_append_parse_mode(m, field, eq);
+
+ if (STR_IN_SET(field, "MessageQueueMaxMessages",
+ "MessageQueueMessageSize"))
+ return bus_append_safe_atoi64(m, field, eq);
+
+ if (STR_IN_SET(field, "TimeoutSec",
+ "KeepAliveTimeSec",
+ "KeepAliveIntervalSec",
+ "DeferAcceptSec",
+ "TriggerLimitIntervalSec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (STR_IN_SET(field, "ReceiveBuffer",
+ "SendBuffer",
+ "PipeSize"))
+ return bus_append_parse_size(m, field, eq, 1024);
+
+ if (STR_IN_SET(field, "ExecStartPre",
+ "ExecStartPost",
+ "ExecReload",
+ "ExecStopPost"))
+ return bus_append_exec_command(m, field, eq);
+
+ if (STR_IN_SET(field, "SmackLabel",
+ "SmackLabelIPIn",
+ "SmackLabelIPOut",
+ "TCPCongestion",
+ "BindToDevice",
+ "BindIPv6Only",
+ "FileDescriptorName",
+ "SocketUser",
+ "SocketGroup",
+ "Timestamping"))
+ return bus_append_string(m, field, eq);
+
+ if (streq(field, "Symlinks"))
+ return bus_append_strv(m, field, eq, EXTRACT_UNQUOTE);
+
+ if (streq(field, "SocketProtocol"))
+ return bus_append_parse_ip_protocol(m, field, eq);
+
+ if (STR_IN_SET(field, "ListenStream",
+ "ListenDatagram",
+ "ListenSequentialPacket",
+ "ListenNetlink",
+ "ListenSpecial",
+ "ListenMessageQueue",
+ "ListenFIFO",
+ "ListenUSBFunction")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "Listen", "a(ss)", 0);
+ else
+ r = sd_bus_message_append(m, "(sv)", "Listen", "a(ss)", 1, field + STRLEN("Listen"), eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+static int bus_append_timer_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (STR_IN_SET(field, "WakeSystem",
+ "RemainAfterElapse",
+ "Persistent",
+ "OnTimezoneChange",
+ "OnClockChange",
+ "FixedRandomDelay"))
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "AccuracySec",
+ "RandomizedDelaySec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (STR_IN_SET(field, "OnActiveSec",
+ "OnBootSec",
+ "OnStartupSec",
+ "OnUnitActiveSec",
+ "OnUnitInactiveSec")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "TimersMonotonic", "a(st)", 0);
+ else {
+ usec_t t;
+ r = parse_sec(eq, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq);
+
+ r = sd_bus_message_append(m, "(sv)", "TimersMonotonic", "a(st)", 1, field, t);
+ }
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "OnCalendar")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "TimersCalendar", "a(ss)", 0);
+ else
+ r = sd_bus_message_append(m, "(sv)", "TimersCalendar", "a(ss)", 1, field, eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_unit_property(sd_bus_message *m, const char *field, const char *eq) {
+ ConditionType t = _CONDITION_TYPE_INVALID;
+ bool is_condition = false;
+ int r;
+
+ if (STR_IN_SET(field, "Description",
+ "SourcePath",
+ "OnFailureJobMode",
+ "JobTimeoutAction",
+ "JobTimeoutRebootArgument",
+ "StartLimitAction",
+ "FailureAction",
+ "SuccessAction",
+ "RebootArgument",
+ "CollectMode"))
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field, "StopWhenUnneeded",
+ "RefuseManualStart",
+ "RefuseManualStop",
+ "AllowIsolate",
+ "IgnoreOnIsolate",
+ "DefaultDependencies"))
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "JobTimeoutSec",
+ "JobRunningTimeoutSec",
+ "StartLimitIntervalSec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (streq(field, "StartLimitBurst"))
+ return bus_append_safe_atou(m, field, eq);
+
+ if (STR_IN_SET(field, "SuccessActionExitStatus",
+ "FailureActionExitStatus")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field, "i", -1);
+ else {
+ uint8_t u;
+
+ r = safe_atou8(eq, &u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s=%s", field, eq);
+
+ r = sd_bus_message_append(m, "(sv)", field, "i", (int) u);
+ }
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (unit_dependency_from_string(field) >= 0 ||
+ STR_IN_SET(field, "Documentation",
+ "RequiresMountsFor"))
+ return bus_append_strv(m, field, eq, EXTRACT_UNQUOTE);
+
+ t = condition_type_from_string(field);
+ if (t >= 0)
+ is_condition = true;
+ else
+ t = assert_type_from_string(field);
+ if (t >= 0) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", is_condition ? "Conditions" : "Asserts", "a(sbbs)", 0);
+ else {
+ const char *p = eq;
+ int trigger, negate;
+
+ trigger = *p == '|';
+ if (trigger)
+ p++;
+
+ negate = *p == '!';
+ if (negate)
+ p++;
+
+ r = sd_bus_message_append(m, "(sv)", is_condition ? "Conditions" : "Asserts", "a(sbbs)", 1,
+ field, trigger, negate, p);
+ }
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int bus_append_unit_property_assignment(sd_bus_message *m, UnitType t, const char *assignment) {
+ const char *eq, *field;
+ int r;
+
+ assert(m);
+ assert(assignment);
+
+ eq = strchr(assignment, '=');
+ if (!eq)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not an assignment: %s", assignment);
+
+ field = strndupa(assignment, eq - assignment);
+ eq++;
+
+ switch (t) {
+ case UNIT_SERVICE:
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_execute_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_kill_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_service_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_SOCKET:
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_execute_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_kill_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_socket_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_TIMER:
+ r = bus_append_timer_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_PATH:
+ r = bus_append_path_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_SLICE:
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_SCOPE:
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_kill_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_scope_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_MOUNT:
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_execute_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_kill_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_mount_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ break;
+
+ case UNIT_AUTOMOUNT:
+ r = bus_append_automount_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ break;
+
+ case UNIT_TARGET:
+ case UNIT_DEVICE:
+ case UNIT_SWAP:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not supported unit type");
+
+ default:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid unit type");
+ }
+
+ r = bus_append_unit_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown assignment: %s", assignment);
+}
+
+int bus_append_unit_property_assignment_many(sd_bus_message *m, UnitType t, char **l) {
+ char **i;
+ int r;
+
+ assert(m);
+
+ STRV_FOREACH(i, l) {
+ r = bus_append_unit_property_assignment(m, t, *i);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int bus_deserialize_and_dump_unit_file_changes(sd_bus_message *m, bool quiet, UnitFileChange **changes, size_t *n_changes) {
+ const char *type, *path, *source;
+ int r;
+
+ /* changes is dereferenced when calling unit_file_dump_changes() later,
+ * so we have to make sure this is not NULL. */
+ assert(changes);
+ assert(n_changes);
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(sss)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(sss)", &type, &path, &source)) > 0) {
+ /* We expect only "success" changes to be sent over the bus.
+ Hence, reject anything negative. */
+ UnitFileChangeType ch = unit_file_change_type_from_string(type);
+
+ if (ch < 0) {
+ log_notice("Manager reported unknown change type \"%s\" for path \"%s\", ignoring.", type, path);
+ continue;
+ }
+
+ r = unit_file_changes_add(changes, n_changes, ch, path, source);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ unit_file_dump_changes(0, NULL, *changes, *n_changes, quiet);
+ return 0;
+}
+
+int unit_load_state(sd_bus *bus, const char *name, char **load_state) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ path = unit_dbus_path_from_name(name);
+ if (!path)
+ return log_oom();
+
+ /* This function warns on it's own, because otherwise it'd be awkward to pass
+ * the dbus error message around. */
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "LoadState",
+ &error,
+ load_state);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get load state of %s: %s", name, bus_error_message(&error, r));
+
+ return 0;
+}
+
+int unit_info_compare(const UnitInfo *a, const UnitInfo *b) {
+ int r;
+
+ /* First, order by machine */
+ r = strcasecmp_ptr(a->machine, b->machine);
+ if (r != 0)
+ return r;
+
+ /* Second, order by unit type */
+ r = strcasecmp_ptr(strrchr(a->id, '.'), strrchr(b->id, '.'));
+ if (r != 0)
+ return r;
+
+ /* Third, order by name */
+ return strcasecmp(a->id, b->id);
+}
diff --git a/src/shared/bus-unit-util.h b/src/shared/bus-unit-util.h
new file mode 100644
index 0000000..999caf6
--- /dev/null
+++ b/src/shared/bus-unit-util.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "install.h"
+#include "unit-def.h"
+
+typedef struct UnitInfo {
+ const char *machine;
+ const char *id;
+ const char *description;
+ const char *load_state;
+ const char *active_state;
+ const char *sub_state;
+ const char *following;
+ const char *unit_path;
+ uint32_t job_id;
+ const char *job_type;
+ const char *job_path;
+} UnitInfo;
+
+int bus_parse_unit_info(sd_bus_message *message, UnitInfo *u);
+
+int bus_append_unit_property_assignment(sd_bus_message *m, UnitType t, const char *assignment);
+int bus_append_unit_property_assignment_many(sd_bus_message *m, UnitType t, char **l);
+
+int bus_deserialize_and_dump_unit_file_changes(sd_bus_message *m, bool quiet, UnitFileChange **changes, size_t *n_changes);
+
+int unit_load_state(sd_bus *bus, const char *name, char **load_state);
+
+int unit_info_compare(const UnitInfo *a, const UnitInfo *b);
diff --git a/src/shared/bus-util.c b/src/shared/bus-util.c
new file mode 100644
index 0000000..fbda218
--- /dev/null
+++ b/src/shared/bus-util.c
@@ -0,0 +1,577 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-daemon.h"
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "bus-common-errors.h"
+#include "bus-internal.h"
+#include "bus-label.h"
+#include "bus-util.h"
+#include "path-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+
+static int name_owner_change_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ sd_event *e = userdata;
+
+ assert(m);
+ assert(e);
+
+ sd_bus_close(sd_bus_message_get_bus(m));
+ sd_event_exit(e, 0);
+
+ return 1;
+}
+
+int bus_async_unregister_and_exit(sd_event *e, sd_bus *bus, const char *name) {
+ const char *match;
+ const char *unique;
+ int r;
+
+ assert(e);
+ assert(bus);
+ assert(name);
+
+ /* We unregister the name here and then wait for the
+ * NameOwnerChanged signal for this event to arrive before we
+ * quit. We do this in order to make sure that any queued
+ * requests are still processed before we really exit. */
+
+ r = sd_bus_get_unique_name(bus, &unique);
+ if (r < 0)
+ return r;
+
+ match = strjoina(
+ "sender='org.freedesktop.DBus',"
+ "type='signal',"
+ "interface='org.freedesktop.DBus',"
+ "member='NameOwnerChanged',"
+ "path='/org/freedesktop/DBus',"
+ "arg0='", name, "',",
+ "arg1='", unique, "',",
+ "arg2=''");
+
+ r = sd_bus_add_match_async(bus, NULL, match, name_owner_change_callback, NULL, e);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_release_name_async(bus, NULL, name, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int bus_event_loop_with_idle(
+ sd_event *e,
+ sd_bus *bus,
+ const char *name,
+ usec_t timeout,
+ check_idle_t check_idle,
+ void *userdata) {
+ bool exiting = false;
+ int r, code;
+
+ assert(e);
+ assert(bus);
+ assert(name);
+
+ for (;;) {
+ bool idle;
+
+ r = sd_event_get_state(e);
+ if (r < 0)
+ return r;
+ if (r == SD_EVENT_FINISHED)
+ break;
+
+ if (check_idle)
+ idle = check_idle(userdata);
+ else
+ idle = true;
+
+ r = sd_event_run(e, exiting || !idle ? (uint64_t) -1 : timeout);
+ if (r < 0)
+ return r;
+
+ if (r == 0 && !exiting && idle) {
+ /* Inform the service manager that we are going down, so that it will queue all
+ * further start requests, instead of assuming we are already running. */
+ sd_notify(false, "STOPPING=1");
+
+ r = bus_async_unregister_and_exit(e, bus, name);
+ if (r < 0)
+ return r;
+
+ exiting = true;
+ continue;
+ }
+ }
+
+ r = sd_event_get_exit_code(e, &code);
+ if (r < 0)
+ return r;
+
+ return code;
+}
+
+int bus_name_has_owner(sd_bus *c, const char *name, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *rep = NULL;
+ int r, has_owner = 0;
+
+ assert(c);
+ assert(name);
+
+ r = sd_bus_call_method(c,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/dbus",
+ "org.freedesktop.DBus",
+ "NameHasOwner",
+ error,
+ &rep,
+ "s",
+ name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_basic(rep, 'b', &has_owner);
+ if (r < 0)
+ return sd_bus_error_set_errno(error, r);
+
+ return has_owner;
+}
+
+bool bus_error_is_unknown_service(const sd_bus_error *error) {
+ return sd_bus_error_has_names(error,
+ SD_BUS_ERROR_SERVICE_UNKNOWN,
+ SD_BUS_ERROR_NAME_HAS_NO_OWNER,
+ BUS_ERROR_NO_SUCH_UNIT);
+}
+
+int bus_check_peercred(sd_bus *c) {
+ struct ucred ucred;
+ int fd, r;
+
+ assert(c);
+
+ fd = sd_bus_get_fd(c);
+ if (fd < 0)
+ return fd;
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ if (ucred.uid != 0 && ucred.uid != geteuid())
+ return -EPERM;
+
+ return 1;
+}
+
+int bus_connect_system_systemd(sd_bus **_bus) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(_bus);
+
+ if (geteuid() != 0)
+ return sd_bus_default_system(_bus);
+
+ /* If we are root then let's talk directly to the system
+ * instance, instead of going via the bus */
+
+ r = sd_bus_new(&bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_address(bus, "unix:path=/run/systemd/private");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_start(bus);
+ if (r < 0)
+ return sd_bus_default_system(_bus);
+
+ r = bus_check_peercred(bus);
+ if (r < 0)
+ return r;
+
+ *_bus = TAKE_PTR(bus);
+
+ return 0;
+}
+
+int bus_connect_user_systemd(sd_bus **_bus) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *ee = NULL;
+ const char *e;
+ int r;
+
+ assert(_bus);
+
+ e = secure_getenv("XDG_RUNTIME_DIR");
+ if (!e)
+ return sd_bus_default_user(_bus);
+
+ ee = bus_address_escape(e);
+ if (!ee)
+ return -ENOMEM;
+
+ r = sd_bus_new(&bus);
+ if (r < 0)
+ return r;
+
+ bus->address = strjoin("unix:path=", ee, "/systemd/private");
+ if (!bus->address)
+ return -ENOMEM;
+
+ r = sd_bus_start(bus);
+ if (r < 0)
+ return sd_bus_default_user(_bus);
+
+ r = bus_check_peercred(bus);
+ if (r < 0)
+ return r;
+
+ *_bus = TAKE_PTR(bus);
+
+ return 0;
+}
+
+int bus_connect_transport(BusTransport transport, const char *host, bool user, sd_bus **ret) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(transport >= 0);
+ assert(transport < _BUS_TRANSPORT_MAX);
+ assert(ret);
+
+ assert_return((transport == BUS_TRANSPORT_LOCAL) == !host, -EINVAL);
+ assert_return(transport == BUS_TRANSPORT_LOCAL || !user, -EOPNOTSUPP);
+
+ switch (transport) {
+
+ case BUS_TRANSPORT_LOCAL:
+ if (user)
+ r = sd_bus_default_user(&bus);
+ else {
+ if (sd_booted() <= 0)
+ /* Print a friendly message when the local system is actually not running systemd as PID 1. */
+ return log_error_errno(SYNTHETIC_ERRNO(EHOSTDOWN),
+ "System has not been booted with systemd as init system (PID 1). Can't operate.");
+ r = sd_bus_default_system(&bus);
+ }
+ break;
+
+ case BUS_TRANSPORT_REMOTE:
+ r = sd_bus_open_system_remote(&bus, host);
+ break;
+
+ case BUS_TRANSPORT_MACHINE:
+ r = sd_bus_open_system_machine(&bus, host);
+ break;
+
+ default:
+ assert_not_reached("Hmm, unknown transport type.");
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_exit_on_disconnect(bus, true);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(bus);
+
+ return 0;
+}
+
+int bus_connect_transport_systemd(BusTransport transport, const char *host, bool user, sd_bus **bus) {
+ int r;
+
+ assert(transport >= 0);
+ assert(transport < _BUS_TRANSPORT_MAX);
+ assert(bus);
+
+ assert_return((transport == BUS_TRANSPORT_LOCAL) == !host, -EINVAL);
+ assert_return(transport == BUS_TRANSPORT_LOCAL || !user, -EOPNOTSUPP);
+
+ switch (transport) {
+
+ case BUS_TRANSPORT_LOCAL:
+ if (user)
+ r = bus_connect_user_systemd(bus);
+ else {
+ if (sd_booted() <= 0)
+ /* Print a friendly message when the local system is actually not running systemd as PID 1. */
+ return log_error_errno(SYNTHETIC_ERRNO(EHOSTDOWN),
+ "System has not been booted with systemd as init system (PID 1). Can't operate.");
+ r = bus_connect_system_systemd(bus);
+ }
+ break;
+
+ case BUS_TRANSPORT_REMOTE:
+ r = sd_bus_open_system_remote(bus, host);
+ break;
+
+ case BUS_TRANSPORT_MACHINE:
+ r = sd_bus_open_system_machine(bus, host);
+ break;
+
+ default:
+ assert_not_reached("Hmm, unknown transport type.");
+ }
+
+ return r;
+}
+
+/**
+ * bus_path_encode_unique() - encode unique object path
+ * @b: bus connection or NULL
+ * @prefix: object path prefix
+ * @sender_id: unique-name of client, or NULL
+ * @external_id: external ID to be chosen by client, or NULL
+ * @ret_path: storage for encoded object path pointer
+ *
+ * Whenever we provide a bus API that allows clients to create and manage
+ * server-side objects, we need to provide a unique name for these objects. If
+ * we let the server choose the name, we suffer from a race condition: If a
+ * client creates an object asynchronously, it cannot destroy that object until
+ * it received the method reply. It cannot know the name of the new object,
+ * thus, it cannot destroy it. Furthermore, it enforces a round-trip.
+ *
+ * Therefore, many APIs allow the client to choose the unique name for newly
+ * created objects. There're two problems to solve, though:
+ * 1) Object names are usually defined via dbus object paths, which are
+ * usually globally namespaced. Therefore, multiple clients must be able
+ * to choose unique object names without interference.
+ * 2) If multiple libraries share the same bus connection, they must be
+ * able to choose unique object names without interference.
+ * The first problem is solved easily by prefixing a name with the
+ * unique-bus-name of a connection. The server side must enforce this and
+ * reject any other name. The second problem is solved by providing unique
+ * suffixes from within sd-bus.
+ *
+ * This helper allows clients to create unique object-paths. It uses the
+ * template '/prefix/sender_id/external_id' and returns the new path in
+ * @ret_path (must be freed by the caller).
+ * If @sender_id is NULL, the unique-name of @b is used. If @external_id is
+ * NULL, this function allocates a unique suffix via @b (by requesting a new
+ * cookie). If both @sender_id and @external_id are given, @b can be passed as
+ * NULL.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+int bus_path_encode_unique(sd_bus *b, const char *prefix, const char *sender_id, const char *external_id, char **ret_path) {
+ _cleanup_free_ char *sender_label = NULL, *external_label = NULL;
+ char external_buf[DECIMAL_STR_MAX(uint64_t)], *p;
+ int r;
+
+ assert_return(b || (sender_id && external_id), -EINVAL);
+ assert_return(sd_bus_object_path_is_valid(prefix), -EINVAL);
+ assert_return(ret_path, -EINVAL);
+
+ if (!sender_id) {
+ r = sd_bus_get_unique_name(b, &sender_id);
+ if (r < 0)
+ return r;
+ }
+
+ if (!external_id) {
+ xsprintf(external_buf, "%"PRIu64, ++b->cookie);
+ external_id = external_buf;
+ }
+
+ sender_label = bus_label_escape(sender_id);
+ if (!sender_label)
+ return -ENOMEM;
+
+ external_label = bus_label_escape(external_id);
+ if (!external_label)
+ return -ENOMEM;
+
+ p = path_join(prefix, sender_label, external_label);
+ if (!p)
+ return -ENOMEM;
+
+ *ret_path = p;
+ return 0;
+}
+
+/**
+ * bus_path_decode_unique() - decode unique object path
+ * @path: object path to decode
+ * @prefix: object path prefix
+ * @ret_sender: output parameter for sender-id label
+ * @ret_external: output parameter for external-id label
+ *
+ * This does the reverse of bus_path_encode_unique() (see its description for
+ * details). Both trailing labels, sender-id and external-id, are unescaped and
+ * returned in the given output parameters (the caller must free them).
+ *
+ * Note that this function returns 0 if the path does not match the template
+ * (see bus_path_encode_unique()), 1 if it matched.
+ *
+ * Returns: Negative error code on failure, 0 if the given object path does not
+ * match the template (return parameters are set to NULL), 1 if it was
+ * parsed successfully (return parameters contain allocated labels).
+ */
+int bus_path_decode_unique(const char *path, const char *prefix, char **ret_sender, char **ret_external) {
+ const char *p, *q;
+ char *sender, *external;
+
+ assert(sd_bus_object_path_is_valid(path));
+ assert(sd_bus_object_path_is_valid(prefix));
+ assert(ret_sender);
+ assert(ret_external);
+
+ p = object_path_startswith(path, prefix);
+ if (!p) {
+ *ret_sender = NULL;
+ *ret_external = NULL;
+ return 0;
+ }
+
+ q = strchr(p, '/');
+ if (!q) {
+ *ret_sender = NULL;
+ *ret_external = NULL;
+ return 0;
+ }
+
+ sender = bus_label_unescape_n(p, q - p);
+ external = bus_label_unescape(q + 1);
+ if (!sender || !external) {
+ free(sender);
+ free(external);
+ return -ENOMEM;
+ }
+
+ *ret_sender = sender;
+ *ret_external = external;
+ return 1;
+}
+
+int bus_track_add_name_many(sd_bus_track *t, char **l) {
+ int r = 0;
+ char **i;
+
+ assert(t);
+
+ /* Continues adding after failure, and returns the first failure. */
+
+ STRV_FOREACH(i, l) {
+ int k;
+
+ k = sd_bus_track_add_name(t, *i);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ return r;
+}
+
+int bus_open_system_watch_bind_with_description(sd_bus **ret, const char *description) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ const char *e;
+ int r;
+
+ assert(ret);
+
+ /* Match like sd_bus_open_system(), but with the "watch_bind" feature and the Connected() signal
+ * turned on. */
+
+ r = sd_bus_new(&bus);
+ if (r < 0)
+ return r;
+
+ if (description) {
+ r = sd_bus_set_description(bus, description);
+ if (r < 0)
+ return r;
+ }
+
+ e = secure_getenv("DBUS_SYSTEM_BUS_ADDRESS");
+ if (!e)
+ e = DEFAULT_SYSTEM_BUS_ADDRESS;
+
+ r = sd_bus_set_address(bus, e);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_bus_client(bus, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_negotiate_creds(bus, true, SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_EFFECTIVE_CAPS);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_watch_bind(bus, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_connected_signal(bus, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_start(bus);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(bus);
+
+ return 0;
+}
+
+int bus_reply_pair_array(sd_bus_message *m, char **l) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ char **k, **v;
+ int r;
+
+ assert(m);
+
+ /* Reply to the specified message with a message containing a dictionary put together from the
+ * specified strv */
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "{ss}");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH_PAIR(k, v, l) {
+ r = sd_bus_message_append(reply, "{ss}", *k, *v);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static void bus_message_unref_wrapper(void *m) {
+ sd_bus_message_unref(m);
+}
+
+const struct hash_ops bus_message_hash_ops = {
+ .hash = trivial_hash_func,
+ .compare = trivial_compare_func,
+ .free_value = bus_message_unref_wrapper,
+};
diff --git a/src/shared/bus-util.h b/src/shared/bus-util.h
new file mode 100644
index 0000000..27dd6c1
--- /dev/null
+++ b/src/shared/bus-util.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "sd-bus.h"
+#include "sd-event.h"
+
+#include "errno-util.h"
+#include "macro.h"
+#include "string-util.h"
+#include "time-util.h"
+
+typedef enum BusTransport {
+ BUS_TRANSPORT_LOCAL,
+ BUS_TRANSPORT_REMOTE,
+ BUS_TRANSPORT_MACHINE,
+ _BUS_TRANSPORT_MAX,
+ _BUS_TRANSPORT_INVALID = -1
+} BusTransport;
+
+int bus_async_unregister_and_exit(sd_event *e, sd_bus *bus, const char *name);
+
+typedef bool (*check_idle_t)(void *userdata);
+
+int bus_event_loop_with_idle(sd_event *e, sd_bus *bus, const char *name, usec_t timeout, check_idle_t check_idle, void *userdata);
+
+int bus_name_has_owner(sd_bus *c, const char *name, sd_bus_error *error);
+bool bus_error_is_unknown_service(const sd_bus_error *error);
+
+int bus_check_peercred(sd_bus *c);
+
+int bus_connect_system_systemd(sd_bus **_bus);
+int bus_connect_user_systemd(sd_bus **_bus);
+
+int bus_connect_transport(BusTransport transport, const char *host, bool user, sd_bus **bus);
+int bus_connect_transport_systemd(BusTransport transport, const char *host, bool user, sd_bus **bus);
+
+#define bus_log_address_error(r) \
+ ({ \
+ int _k = (r); \
+ log_error_errno(_k, \
+ _k == -ENOMEDIUM ? "Failed to set bus address: $DBUS_SESSION_BUS_ADDRESS and $XDG_RUNTIME_DIR not defined (consider using --machine=<user>@.host --user to connect to bus of other user)" : \
+ "Failed to set bus address: %m"); \
+ })
+
+#define bus_log_connect_error(r) \
+ ({ \
+ int _k = (r); \
+ log_error_errno(_k, \
+ _k == -ENOMEDIUM ? "Failed to connect to bus: $DBUS_SESSION_BUS_ADDRESS and $XDG_RUNTIME_DIR not defined (consider using --machine=<user>@.host --user to connect to bus of other user)" : \
+ ERRNO_IS_PRIVILEGE(_k) ? "Failed to connect to bus: Operation not permitted (consider using --machine=<user>@.host --user to connect to bus of other user)" : \
+ "Failed to connect to bus: %m"); \
+ })
+
+#define bus_log_parse_error(r) \
+ log_error_errno(r, "Failed to parse bus message: %m")
+
+#define bus_log_create_error(r) \
+ log_error_errno(r, "Failed to create bus message: %m")
+
+int bus_path_encode_unique(sd_bus *b, const char *prefix, const char *sender_id, const char *external_id, char **ret_path);
+int bus_path_decode_unique(const char *path, const char *prefix, char **ret_sender, char **ret_external);
+
+int bus_track_add_name_many(sd_bus_track *t, char **l);
+
+int bus_open_system_watch_bind_with_description(sd_bus **ret, const char *description);
+static inline int bus_open_system_watch_bind(sd_bus **ret) {
+ return bus_open_system_watch_bind_with_description(ret, NULL);
+}
+
+int bus_reply_pair_array(sd_bus_message *m, char **l);
+
+extern const struct hash_ops bus_message_hash_ops;
diff --git a/src/shared/bus-wait-for-jobs.c b/src/shared/bus-wait-for-jobs.c
new file mode 100644
index 0000000..b2a9e03
--- /dev/null
+++ b/src/shared/bus-wait-for-jobs.c
@@ -0,0 +1,331 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-wait-for-jobs.h"
+#include "set.h"
+#include "bus-util.h"
+#include "bus-internal.h"
+#include "unit-def.h"
+#include "escape.h"
+#include "strv.h"
+
+typedef struct BusWaitForJobs {
+ sd_bus *bus;
+
+ /* The set of jobs to wait for, as bus object paths */
+ Set *jobs;
+
+ /* The unit name and job result of the last Job message */
+ char *name;
+ char *result;
+
+ sd_bus_slot *slot_job_removed;
+ sd_bus_slot *slot_disconnected;
+} BusWaitForJobs;
+
+static int match_disconnected(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ assert(m);
+
+ log_error("Warning! D-Bus connection terminated.");
+ sd_bus_close(sd_bus_message_get_bus(m));
+
+ return 0;
+}
+
+static int match_job_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ const char *path, *unit, *result;
+ BusWaitForJobs *d = userdata;
+ uint32_t id;
+ char *found;
+ int r;
+
+ assert(m);
+ assert(d);
+
+ r = sd_bus_message_read(m, "uoss", &id, &path, &unit, &result);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ found = set_remove(d->jobs, (char*) path);
+ if (!found)
+ return 0;
+
+ free(found);
+
+ (void) free_and_strdup(&d->result, empty_to_null(result));
+
+ (void) free_and_strdup(&d->name, empty_to_null(unit));
+
+ return 0;
+}
+
+void bus_wait_for_jobs_free(BusWaitForJobs *d) {
+ if (!d)
+ return;
+
+ set_free(d->jobs);
+
+ sd_bus_slot_unref(d->slot_disconnected);
+ sd_bus_slot_unref(d->slot_job_removed);
+
+ sd_bus_unref(d->bus);
+
+ free(d->name);
+ free(d->result);
+
+ free(d);
+}
+
+int bus_wait_for_jobs_new(sd_bus *bus, BusWaitForJobs **ret) {
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *d = NULL;
+ int r;
+
+ assert(bus);
+ assert(ret);
+
+ d = new(BusWaitForJobs, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (BusWaitForJobs) {
+ .bus = sd_bus_ref(bus),
+ };
+
+ /* When we are a bus client we match by sender. Direct
+ * connections OTOH have no initialized sender field, and
+ * hence we ignore the sender then */
+ r = sd_bus_match_signal_async(
+ bus,
+ &d->slot_job_removed,
+ bus->bus_client ? "org.freedesktop.systemd1" : NULL,
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "JobRemoved",
+ match_job_removed, NULL, d);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_match_signal_async(
+ bus,
+ &d->slot_disconnected,
+ "org.freedesktop.DBus.Local",
+ NULL,
+ "org.freedesktop.DBus.Local",
+ "Disconnected",
+ match_disconnected, NULL, d);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(d);
+
+ return 0;
+}
+
+static int bus_process_wait(sd_bus *bus) {
+ int r;
+
+ for (;;) {
+ r = sd_bus_process(bus, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ r = sd_bus_wait(bus, (uint64_t) -1);
+ if (r < 0)
+ return r;
+ }
+}
+
+static int bus_job_get_service_result(BusWaitForJobs *d, char **result) {
+ _cleanup_free_ char *dbus_path = NULL;
+
+ assert(d);
+ assert(d->name);
+ assert(result);
+
+ if (!endswith(d->name, ".service"))
+ return -EINVAL;
+
+ dbus_path = unit_dbus_path_from_name(d->name);
+ if (!dbus_path)
+ return -ENOMEM;
+
+ return sd_bus_get_property_string(d->bus,
+ "org.freedesktop.systemd1",
+ dbus_path,
+ "org.freedesktop.systemd1.Service",
+ "Result",
+ NULL,
+ result);
+}
+
+static void log_job_error_with_service_result(const char* service, const char *result, const char* const* extra_args) {
+ _cleanup_free_ char *service_shell_quoted = NULL;
+ const char *systemctl = "systemctl", *journalctl = "journalctl";
+
+ static const struct {
+ const char *result, *explanation;
+ } explanations[] = {
+ { "resources", "of unavailable resources or another system error" },
+ { "protocol", "the service did not take the steps required by its unit configuration" },
+ { "timeout", "a timeout was exceeded" },
+ { "exit-code", "the control process exited with error code" },
+ { "signal", "a fatal signal was delivered to the control process" },
+ { "core-dump", "a fatal signal was delivered causing the control process to dump core" },
+ { "watchdog", "the service failed to send watchdog ping" },
+ { "start-limit", "start of the service was attempted too often" }
+ };
+
+ assert(service);
+
+ service_shell_quoted = shell_maybe_quote(service, ESCAPE_BACKSLASH);
+
+ if (!strv_isempty((char**) extra_args)) {
+ _cleanup_free_ char *t;
+
+ t = strv_join((char**) extra_args, " ");
+ systemctl = strjoina("systemctl ", t ? : "<args>");
+ journalctl = strjoina("journalctl ", t ? : "<args>");
+ }
+
+ if (!isempty(result)) {
+ size_t i;
+
+ for (i = 0; i < ELEMENTSOF(explanations); ++i)
+ if (streq(result, explanations[i].result))
+ break;
+
+ if (i < ELEMENTSOF(explanations)) {
+ log_error("Job for %s failed because %s.\n"
+ "See \"%s status %s\" and \"%s -xe\" for details.\n",
+ service,
+ explanations[i].explanation,
+ systemctl,
+ service_shell_quoted ?: "<service>",
+ journalctl);
+ goto finish;
+ }
+ }
+
+ log_error("Job for %s failed.\n"
+ "See \"%s status %s\" and \"%s -xe\" for details.\n",
+ service,
+ systemctl,
+ service_shell_quoted ?: "<service>",
+ journalctl);
+
+finish:
+ /* For some results maybe additional explanation is required */
+ if (streq_ptr(result, "start-limit"))
+ log_info("To force a start use \"%1$s reset-failed %2$s\"\n"
+ "followed by \"%1$s start %2$s\" again.",
+ systemctl,
+ service_shell_quoted ?: "<service>");
+}
+
+static int check_wait_response(BusWaitForJobs *d, bool quiet, const char* const* extra_args) {
+ assert(d);
+ assert(d->name);
+ assert(d->result);
+
+ if (!quiet) {
+ if (streq(d->result, "canceled"))
+ log_error("Job for %s canceled.", strna(d->name));
+ else if (streq(d->result, "timeout"))
+ log_error("Job for %s timed out.", strna(d->name));
+ else if (streq(d->result, "dependency"))
+ log_error("A dependency job for %s failed. See 'journalctl -xe' for details.", strna(d->name));
+ else if (streq(d->result, "invalid"))
+ log_error("%s is not active, cannot reload.", strna(d->name));
+ else if (streq(d->result, "assert"))
+ log_error("Assertion failed on job for %s.", strna(d->name));
+ else if (streq(d->result, "unsupported"))
+ log_error("Operation on or unit type of %s not supported on this system.", strna(d->name));
+ else if (streq(d->result, "collected"))
+ log_error("Queued job for %s was garbage collected.", strna(d->name));
+ else if (streq(d->result, "once"))
+ log_error("Unit %s was started already once and can't be started again.", strna(d->name));
+ else if (!STR_IN_SET(d->result, "done", "skipped")) {
+
+ if (d->name && endswith(d->name, ".service")) {
+ _cleanup_free_ char *result = NULL;
+ int q;
+
+ q = bus_job_get_service_result(d, &result);
+ if (q < 0)
+ log_debug_errno(q, "Failed to get Result property of unit %s: %m", d->name);
+
+ log_job_error_with_service_result(d->name, result, extra_args);
+ } else
+ log_error("Job failed. See \"journalctl -xe\" for details.");
+ }
+ }
+
+ if (STR_IN_SET(d->result, "canceled", "collected"))
+ return -ECANCELED;
+ else if (streq(d->result, "timeout"))
+ return -ETIME;
+ else if (streq(d->result, "dependency"))
+ return -EIO;
+ else if (streq(d->result, "invalid"))
+ return -ENOEXEC;
+ else if (streq(d->result, "assert"))
+ return -EPROTO;
+ else if (streq(d->result, "unsupported"))
+ return -EOPNOTSUPP;
+ else if (streq(d->result, "once"))
+ return -ESTALE;
+ else if (STR_IN_SET(d->result, "done", "skipped"))
+ return 0;
+
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO),
+ "Unexpected job result, assuming server side newer than us: %s", d->result);
+}
+
+int bus_wait_for_jobs(BusWaitForJobs *d, bool quiet, const char* const* extra_args) {
+ int r = 0;
+
+ assert(d);
+
+ while (!set_isempty(d->jobs)) {
+ int q;
+
+ q = bus_process_wait(d->bus);
+ if (q < 0)
+ return log_error_errno(q, "Failed to wait for response: %m");
+
+ if (d->name && d->result) {
+ q = check_wait_response(d, quiet, extra_args);
+ /* Return the first error as it is most likely to be
+ * meaningful. */
+ if (q < 0 && r == 0)
+ r = q;
+
+ log_debug_errno(q, "Got result %s/%m for job %s", d->result, d->name);
+ }
+
+ d->name = mfree(d->name);
+ d->result = mfree(d->result);
+ }
+
+ return r;
+}
+
+int bus_wait_for_jobs_add(BusWaitForJobs *d, const char *path) {
+ assert(d);
+
+ return set_put_strdup(&d->jobs, path);
+}
+
+int bus_wait_for_jobs_one(BusWaitForJobs *d, const char *path, bool quiet) {
+ int r;
+
+ r = bus_wait_for_jobs_add(d, path);
+ if (r < 0)
+ return log_oom();
+
+ return bus_wait_for_jobs(d, quiet, NULL);
+}
diff --git a/src/shared/bus-wait-for-jobs.h b/src/shared/bus-wait-for-jobs.h
new file mode 100644
index 0000000..0155887
--- /dev/null
+++ b/src/shared/bus-wait-for-jobs.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "macro.h"
+
+typedef struct BusWaitForJobs BusWaitForJobs;
+
+int bus_wait_for_jobs_new(sd_bus *bus, BusWaitForJobs **ret);
+void bus_wait_for_jobs_free(BusWaitForJobs *d);
+int bus_wait_for_jobs_add(BusWaitForJobs *d, const char *path);
+int bus_wait_for_jobs(BusWaitForJobs *d, bool quiet, const char* const* extra_args);
+int bus_wait_for_jobs_one(BusWaitForJobs *d, const char *path, bool quiet);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(BusWaitForJobs*, bus_wait_for_jobs_free);
diff --git a/src/shared/bus-wait-for-units.c b/src/shared/bus-wait-for-units.c
new file mode 100644
index 0000000..4f1c505
--- /dev/null
+++ b/src/shared/bus-wait-for-units.c
@@ -0,0 +1,429 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-map-properties.h"
+#include "bus-wait-for-units.h"
+#include "hashmap.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-def.h"
+
+typedef struct WaitForItem {
+ BusWaitForUnits *parent;
+
+ BusWaitForUnitsFlags flags;
+
+ char *bus_path;
+
+ sd_bus_slot *slot_get_all;
+ sd_bus_slot *slot_properties_changed;
+
+ bus_wait_for_units_unit_callback unit_callback;
+ void *userdata;
+
+ char *active_state;
+ uint32_t job_id;
+ char *clean_result;
+} WaitForItem;
+
+typedef struct BusWaitForUnits {
+ sd_bus *bus;
+ sd_bus_slot *slot_disconnected;
+
+ Hashmap *items;
+
+ bus_wait_for_units_ready_callback ready_callback;
+ void *userdata;
+
+ WaitForItem *current;
+
+ BusWaitForUnitsState state;
+ bool has_failed:1;
+} BusWaitForUnits;
+
+static WaitForItem *wait_for_item_free(WaitForItem *item) {
+ int r;
+
+ if (!item)
+ return NULL;
+
+ if (item->parent) {
+ if (FLAGS_SET(item->flags, BUS_WAIT_REFFED) && item->bus_path && item->parent->bus) {
+ r = sd_bus_call_method_async(
+ item->parent->bus,
+ NULL,
+ "org.freedesktop.systemd1",
+ item->bus_path,
+ "org.freedesktop.systemd1.Unit",
+ "Unref",
+ NULL,
+ NULL,
+ NULL);
+ if (r < 0)
+ log_debug_errno(r, "Failed to drop reference to unit %s, ignoring: %m", item->bus_path);
+ }
+
+ assert_se(hashmap_remove(item->parent->items, item->bus_path) == item);
+
+ if (item->parent->current == item)
+ item->parent->current = NULL;
+ }
+
+ sd_bus_slot_unref(item->slot_properties_changed);
+ sd_bus_slot_unref(item->slot_get_all);
+
+ free(item->bus_path);
+ free(item->active_state);
+ free(item->clean_result);
+
+ return mfree(item);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(WaitForItem*, wait_for_item_free);
+
+static void call_unit_callback_and_wait(BusWaitForUnits *d, WaitForItem *item, bool good) {
+ d->current = item;
+
+ if (item->unit_callback)
+ item->unit_callback(d, item->bus_path, good, item->userdata);
+
+ wait_for_item_free(item);
+}
+
+static void bus_wait_for_units_clear(BusWaitForUnits *d) {
+ WaitForItem *item;
+
+ assert(d);
+
+ d->slot_disconnected = sd_bus_slot_unref(d->slot_disconnected);
+ d->bus = sd_bus_unref(d->bus);
+
+ while ((item = hashmap_first(d->items)))
+ call_unit_callback_and_wait(d, item, false);
+
+ d->items = hashmap_free(d->items);
+}
+
+static int match_disconnected(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ BusWaitForUnits *d = userdata;
+
+ assert(m);
+ assert(d);
+
+ log_error("Warning! D-Bus connection terminated.");
+
+ bus_wait_for_units_clear(d);
+
+ if (d->ready_callback)
+ d->ready_callback(d, false, d->userdata);
+ else /* If no ready callback is specified close the connection so that the event loop exits */
+ sd_bus_close(sd_bus_message_get_bus(m));
+
+ return 0;
+}
+
+int bus_wait_for_units_new(sd_bus *bus, BusWaitForUnits **ret) {
+ _cleanup_(bus_wait_for_units_freep) BusWaitForUnits *d = NULL;
+ int r;
+
+ assert(bus);
+ assert(ret);
+
+ d = new(BusWaitForUnits, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (BusWaitForUnits) {
+ .state = BUS_WAIT_SUCCESS,
+ .bus = sd_bus_ref(bus),
+ };
+
+ r = sd_bus_match_signal_async(
+ bus,
+ &d->slot_disconnected,
+ "org.freedesktop.DBus.Local",
+ NULL,
+ "org.freedesktop.DBus.Local",
+ "Disconnected",
+ match_disconnected, NULL, d);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(d);
+ return 0;
+}
+
+BusWaitForUnits* bus_wait_for_units_free(BusWaitForUnits *d) {
+ if (!d)
+ return NULL;
+
+ bus_wait_for_units_clear(d);
+ sd_bus_slot_unref(d->slot_disconnected);
+ sd_bus_unref(d->bus);
+
+ return mfree(d);
+}
+
+static bool bus_wait_for_units_is_ready(BusWaitForUnits *d) {
+ assert(d);
+
+ if (!d->bus) /* Disconnected? */
+ return true;
+
+ return hashmap_isempty(d->items);
+}
+
+void bus_wait_for_units_set_ready_callback(BusWaitForUnits *d, bus_wait_for_units_ready_callback callback, void *userdata) {
+ assert(d);
+
+ d->ready_callback = callback;
+ d->userdata = userdata;
+}
+
+static void bus_wait_for_units_check_ready(BusWaitForUnits *d) {
+ assert(d);
+
+ if (!bus_wait_for_units_is_ready(d))
+ return;
+
+ d->state = d->has_failed ? BUS_WAIT_FAILURE : BUS_WAIT_SUCCESS;
+
+ if (d->ready_callback)
+ d->ready_callback(d, d->state, d->userdata);
+}
+
+static void wait_for_item_check_ready(WaitForItem *item) {
+ BusWaitForUnits *d;
+
+ assert(item);
+ assert_se(d = item->parent);
+
+ if (FLAGS_SET(item->flags, BUS_WAIT_FOR_MAINTENANCE_END)) {
+
+ if (item->clean_result && !streq(item->clean_result, "success"))
+ d->has_failed = true;
+
+ if (!item->active_state || streq(item->active_state, "maintenance"))
+ return;
+ }
+
+ if (FLAGS_SET(item->flags, BUS_WAIT_NO_JOB) && item->job_id != 0)
+ return;
+
+ if (FLAGS_SET(item->flags, BUS_WAIT_FOR_INACTIVE)) {
+
+ if (streq_ptr(item->active_state, "failed"))
+ d->has_failed = true;
+ else if (!streq_ptr(item->active_state, "inactive"))
+ return;
+ }
+
+ call_unit_callback_and_wait(d, item, true);
+ bus_wait_for_units_check_ready(d);
+}
+
+static int property_map_job(
+ sd_bus *bus,
+ const char *member,
+ sd_bus_message *m,
+ sd_bus_error *error,
+ void *userdata) {
+
+ WaitForItem *item = userdata;
+ const char *path;
+ uint32_t id;
+ int r;
+
+ assert(item);
+
+ r = sd_bus_message_read(m, "(uo)", &id, &path);
+ if (r < 0)
+ return r;
+
+ item->job_id = id;
+ return 0;
+}
+
+static int wait_for_item_parse_properties(WaitForItem *item, sd_bus_message *m) {
+
+ static const struct bus_properties_map map[] = {
+ { "ActiveState", "s", NULL, offsetof(WaitForItem, active_state) },
+ { "Job", "(uo)", property_map_job, 0 },
+ { "CleanResult", "s", NULL, offsetof(WaitForItem, clean_result) },
+ {}
+ };
+
+ int r;
+
+ assert(item);
+ assert(m);
+
+ r = bus_message_map_all_properties(m, map, BUS_MAP_STRDUP, NULL, item);
+ if (r < 0)
+ return r;
+
+ wait_for_item_check_ready(item);
+ return 0;
+}
+
+static int on_properties_changed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ WaitForItem *item = userdata;
+ const char *interface;
+ int r;
+
+ assert(item);
+
+ r = sd_bus_message_read(m, "s", &interface);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse PropertiesChanged signal: %m");
+ return 0;
+ }
+
+ if (!streq(interface, "org.freedesktop.systemd1.Unit"))
+ return 0;
+
+ r = wait_for_item_parse_properties(item, m);
+ if (r < 0)
+ log_debug_errno(r, "Failed to process PropertiesChanged signal: %m");
+
+ return 0;
+}
+
+static int on_get_all_properties(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ WaitForItem *item = userdata;
+ int r;
+
+ assert(item);
+
+ if (sd_bus_error_is_set(error)) {
+ BusWaitForUnits *d = item->parent;
+
+ d->has_failed = true;
+
+ log_debug_errno(sd_bus_error_get_errno(error), "GetAll() failed for %s: %s",
+ item->bus_path, error->message);
+
+ call_unit_callback_and_wait(d, item, false);
+ bus_wait_for_units_check_ready(d);
+ return 0;
+ }
+
+ r = wait_for_item_parse_properties(item, m);
+ if (r < 0)
+ log_debug_errno(r, "Failed to process GetAll method reply: %m");
+
+ return 0;
+}
+
+int bus_wait_for_units_add_unit(
+ BusWaitForUnits *d,
+ const char *unit,
+ BusWaitForUnitsFlags flags,
+ bus_wait_for_units_unit_callback callback,
+ void *userdata) {
+
+ _cleanup_(wait_for_item_freep) WaitForItem *item = NULL;
+ int r;
+
+ assert(d);
+ assert(unit);
+
+ assert(flags != 0);
+
+ r = hashmap_ensure_allocated(&d->items, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ item = new(WaitForItem, 1);
+ if (!item)
+ return -ENOMEM;
+
+ *item = (WaitForItem) {
+ .flags = flags,
+ .bus_path = unit_dbus_path_from_name(unit),
+ .unit_callback = callback,
+ .userdata = userdata,
+ .job_id = UINT32_MAX,
+ };
+
+ if (!item->bus_path)
+ return -ENOMEM;
+
+ if (!FLAGS_SET(item->flags, BUS_WAIT_REFFED)) {
+ r = sd_bus_call_method_async(
+ d->bus,
+ NULL,
+ "org.freedesktop.systemd1",
+ item->bus_path,
+ "org.freedesktop.systemd1.Unit",
+ "Ref",
+ NULL,
+ NULL,
+ NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add reference to unit %s: %m", unit);
+
+ item->flags |= BUS_WAIT_REFFED;
+ }
+
+ r = sd_bus_match_signal_async(
+ d->bus,
+ &item->slot_properties_changed,
+ "org.freedesktop.systemd1",
+ item->bus_path,
+ "org.freedesktop.DBus.Properties",
+ "PropertiesChanged",
+ on_properties_changed,
+ NULL,
+ item);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to request match for PropertiesChanged signal: %m");
+
+ r = sd_bus_call_method_async(
+ d->bus,
+ &item->slot_get_all,
+ "org.freedesktop.systemd1",
+ item->bus_path,
+ "org.freedesktop.DBus.Properties",
+ "GetAll",
+ on_get_all_properties,
+ item,
+ "s", FLAGS_SET(item->flags, BUS_WAIT_FOR_MAINTENANCE_END) ? NULL : "org.freedesktop.systemd1.Unit");
+ if (r < 0)
+ return log_debug_errno(r, "Failed to request properties of unit %s: %m", unit);
+
+ r = hashmap_put(d->items, item->bus_path, item);
+ if (r < 0)
+ return r;
+
+ d->state = BUS_WAIT_RUNNING;
+ item->parent = d;
+ TAKE_PTR(item);
+ return 0;
+}
+
+int bus_wait_for_units_run(BusWaitForUnits *d) {
+ int r;
+
+ assert(d);
+
+ while (d->state == BUS_WAIT_RUNNING) {
+
+ r = sd_bus_process(d->bus, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ r = sd_bus_wait(d->bus, (uint64_t) -1);
+ if (r < 0)
+ return r;
+ }
+
+ return d->state;
+}
+
+BusWaitForUnitsState bus_wait_for_units_state(BusWaitForUnits *d) {
+ assert(d);
+
+ return d->state;
+}
diff --git a/src/shared/bus-wait-for-units.h b/src/shared/bus-wait-for-units.h
new file mode 100644
index 0000000..f7ab666
--- /dev/null
+++ b/src/shared/bus-wait-for-units.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "macro.h"
+#include "sd-bus.h"
+
+typedef struct BusWaitForUnits BusWaitForUnits;
+
+typedef enum BusWaitForUnitsState {
+ BUS_WAIT_SUCCESS, /* Nothing to wait for anymore and nothing failed */
+ BUS_WAIT_FAILURE, /* dito, but something failed */
+ BUS_WAIT_RUNNING, /* Still something to wait for */
+ _BUS_WAIT_FOR_UNITS_STATE_MAX,
+ _BUS_WAIT_FOR_UNITS_STATE_INVALID = -1,
+} BusWaitForUnitsState;
+
+typedef enum BusWaitForUnitsFlags {
+ BUS_WAIT_FOR_MAINTENANCE_END = 1 << 0, /* Wait until the unit is no longer in maintenance state */
+ BUS_WAIT_FOR_INACTIVE = 1 << 1, /* Wait until the unit is back in inactive or dead state */
+ BUS_WAIT_NO_JOB = 1 << 2, /* Wait until there's no more job pending */
+ BUS_WAIT_REFFED = 1 << 3, /* The unit is already reffed with RefUnit() */
+} BusWaitForUnitsFlags;
+
+typedef void (*bus_wait_for_units_ready_callback)(BusWaitForUnits *d, BusWaitForUnitsState state, void *userdata);
+typedef void (*bus_wait_for_units_unit_callback)(BusWaitForUnits *d, const char *unit_path, bool good, void *userdata);
+
+int bus_wait_for_units_new(sd_bus *bus, BusWaitForUnits **ret);
+BusWaitForUnits* bus_wait_for_units_free(BusWaitForUnits *d);
+
+BusWaitForUnitsState bus_wait_for_units_state(BusWaitForUnits *d);
+void bus_wait_for_units_set_ready_callback(BusWaitForUnits *d, bus_wait_for_units_ready_callback callback, void *userdata);
+int bus_wait_for_units_add_unit(BusWaitForUnits *d, const char *unit, BusWaitForUnitsFlags flags, bus_wait_for_units_unit_callback callback, void *userdata);
+int bus_wait_for_units_run(BusWaitForUnits *d);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(BusWaitForUnits*, bus_wait_for_units_free);
diff --git a/src/shared/calendarspec.c b/src/shared/calendarspec.c
new file mode 100644
index 0000000..7162592
--- /dev/null
+++ b/src/shared/calendarspec.c
@@ -0,0 +1,1405 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "alloc-util.h"
+#include "calendarspec.h"
+#include "errno-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "sort-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+#define BITS_WEEKDAYS 127
+#define MIN_YEAR 1970
+#define MAX_YEAR 2199
+
+/* An arbitrary limit on the length of the chains of components. We don't want to
+ * build a very long linked list, which would be slow to iterate over and might cause
+ * our stack to overflow. It's unlikely that legitimate uses require more than a few
+ * linked compenents anyway. */
+#define CALENDARSPEC_COMPONENTS_MAX 240
+
+/* Let's make sure that the microsecond component is safe to be stored in an 'int' */
+assert_cc(INT_MAX >= USEC_PER_SEC);
+
+static void chain_free(CalendarComponent *c) {
+ CalendarComponent *n;
+
+ while (c) {
+ n = c->next;
+ free(c);
+ c = n;
+ }
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(CalendarComponent*, chain_free);
+
+CalendarSpec* calendar_spec_free(CalendarSpec *c) {
+
+ if (!c)
+ return NULL;
+
+ chain_free(c->year);
+ chain_free(c->month);
+ chain_free(c->day);
+ chain_free(c->hour);
+ chain_free(c->minute);
+ chain_free(c->microsecond);
+ free(c->timezone);
+
+ return mfree(c);
+}
+
+static int component_compare(CalendarComponent * const *a, CalendarComponent * const *b) {
+ int r;
+
+ r = CMP((*a)->start, (*b)->start);
+ if (r != 0)
+ return r;
+
+ r = CMP((*a)->stop, (*b)->stop);
+ if (r != 0)
+ return r;
+
+ return CMP((*a)->repeat, (*b)->repeat);
+}
+
+static void normalize_chain(CalendarComponent **c) {
+ CalendarComponent **b, *i, **j, *next;
+ size_t n = 0, k;
+
+ assert(c);
+
+ for (i = *c; i; i = i->next) {
+ n++;
+
+ /*
+ * While we're counting the chain, also normalize `stop`
+ * so the length of the range is a multiple of `repeat`
+ */
+ if (i->stop > i->start && i->repeat > 0)
+ i->stop -= (i->stop - i->start) % i->repeat;
+
+ /* If a repeat value is specified, but it cannot even be triggered once, let's suppress
+ * it.
+ *
+ * Similar, if the stop value is the same as the start value, then let's just make this a
+ * non-repeating chain element */
+ if ((i->stop > i->start && i->repeat > 0 && i->start + i->repeat > i->stop) ||
+ i->start == i->stop) {
+ i->repeat = 0;
+ i->stop = -1;
+ }
+ }
+
+ if (n <= 1)
+ return;
+
+ j = b = newa(CalendarComponent*, n);
+ for (i = *c; i; i = i->next)
+ *(j++) = i;
+
+ typesafe_qsort(b, n, component_compare);
+
+ b[n-1]->next = NULL;
+ next = b[n-1];
+
+ /* Drop non-unique entries */
+ for (k = n-1; k > 0; k--) {
+ if (component_compare(&b[k-1], &next) == 0) {
+ free(b[k-1]);
+ continue;
+ }
+
+ b[k-1]->next = next;
+ next = b[k-1];
+ }
+
+ *c = next;
+}
+
+static void fix_year(CalendarComponent *c) {
+ /* Turns 12 → 2012, 89 → 1989 */
+
+ while (c) {
+ if (c->start >= 0 && c->start < 70)
+ c->start += 2000;
+
+ if (c->stop >= 0 && c->stop < 70)
+ c->stop += 2000;
+
+ if (c->start >= 70 && c->start < 100)
+ c->start += 1900;
+
+ if (c->stop >= 70 && c->stop < 100)
+ c->stop += 1900;
+
+ c = c->next;
+ }
+}
+
+int calendar_spec_normalize(CalendarSpec *c) {
+ assert(c);
+
+ if (streq_ptr(c->timezone, "UTC")) {
+ c->utc = true;
+ c->timezone = mfree(c->timezone);
+ }
+
+ if (c->weekdays_bits <= 0 || c->weekdays_bits >= BITS_WEEKDAYS)
+ c->weekdays_bits = -1;
+
+ if (c->end_of_month && !c->day)
+ c->end_of_month = false;
+
+ fix_year(c->year);
+
+ normalize_chain(&c->year);
+ normalize_chain(&c->month);
+ normalize_chain(&c->day);
+ normalize_chain(&c->hour);
+ normalize_chain(&c->minute);
+ normalize_chain(&c->microsecond);
+
+ return 0;
+}
+
+static bool chain_valid(CalendarComponent *c, int from, int to, bool end_of_month) {
+ assert(to >= from);
+
+ if (!c)
+ return true;
+
+ /* Forbid dates more than 28 days from the end of the month */
+ if (end_of_month)
+ to -= 3;
+
+ if (c->start < from || c->start > to)
+ return false;
+
+ /* Avoid overly large values that could cause overflow */
+ if (c->repeat > to - from)
+ return false;
+
+ /*
+ * c->repeat must be short enough so at least one repetition may
+ * occur before the end of the interval. For dates scheduled
+ * relative to the end of the month, c->start and c->stop
+ * correspond to the Nth last day of the month.
+ */
+ if (c->stop >= 0) {
+ if (c->stop < from || c ->stop > to)
+ return false;
+
+ if (c->start + c->repeat > c->stop)
+ return false;
+ } else {
+ if (end_of_month && c->start - c->repeat < from)
+ return false;
+
+ if (!end_of_month && c->start + c->repeat > to)
+ return false;
+ }
+
+ if (c->next)
+ return chain_valid(c->next, from, to, end_of_month);
+
+ return true;
+}
+
+_pure_ bool calendar_spec_valid(CalendarSpec *c) {
+ assert(c);
+
+ if (c->weekdays_bits > BITS_WEEKDAYS)
+ return false;
+
+ if (!chain_valid(c->year, MIN_YEAR, MAX_YEAR, false))
+ return false;
+
+ if (!chain_valid(c->month, 1, 12, false))
+ return false;
+
+ if (!chain_valid(c->day, 1, 31, c->end_of_month))
+ return false;
+
+ if (!chain_valid(c->hour, 0, 23, false))
+ return false;
+
+ if (!chain_valid(c->minute, 0, 59, false))
+ return false;
+
+ if (!chain_valid(c->microsecond, 0, 60*USEC_PER_SEC-1, false))
+ return false;
+
+ return true;
+}
+
+static void format_weekdays(FILE *f, const CalendarSpec *c) {
+ static const char *const days[] = {
+ "Mon",
+ "Tue",
+ "Wed",
+ "Thu",
+ "Fri",
+ "Sat",
+ "Sun"
+ };
+
+ int l, x;
+ bool need_comma = false;
+
+ assert(f);
+ assert(c);
+ assert(c->weekdays_bits > 0 && c->weekdays_bits <= BITS_WEEKDAYS);
+
+ for (x = 0, l = -1; x < (int) ELEMENTSOF(days); x++) {
+
+ if (c->weekdays_bits & (1 << x)) {
+
+ if (l < 0) {
+ if (need_comma)
+ fputc(',', f);
+ else
+ need_comma = true;
+
+ fputs(days[x], f);
+ l = x;
+ }
+
+ } else if (l >= 0) {
+
+ if (x > l + 1) {
+ fputs(x > l + 2 ? ".." : ",", f);
+ fputs(days[x-1], f);
+ }
+
+ l = -1;
+ }
+ }
+
+ if (l >= 0 && x > l + 1) {
+ fputs(x > l + 2 ? ".." : ",", f);
+ fputs(days[x-1], f);
+ }
+}
+
+static void format_chain(FILE *f, int space, const CalendarComponent *c, bool usec) {
+ int d = usec ? (int) USEC_PER_SEC : 1;
+
+ assert(f);
+
+ if (!c) {
+ fputc('*', f);
+ return;
+ }
+
+ if (usec && c->start == 0 && c->repeat == USEC_PER_SEC && !c->next) {
+ fputc('*', f);
+ return;
+ }
+
+ assert(c->start >= 0);
+
+ fprintf(f, "%0*i", space, c->start / d);
+ if (c->start % d > 0)
+ fprintf(f, ".%06i", c->start % d);
+
+ if (c->stop > 0)
+ fprintf(f, "..%0*i", space, c->stop / d);
+ if (c->stop % d > 0)
+ fprintf(f, ".%06i", c->stop % d);
+
+ if (c->repeat > 0 && !(c->stop > 0 && c->repeat == d))
+ fprintf(f, "/%i", c->repeat / d);
+ if (c->repeat % d > 0)
+ fprintf(f, ".%06i", c->repeat % d);
+
+ if (c->next) {
+ fputc(',', f);
+ format_chain(f, space, c->next, usec);
+ }
+}
+
+int calendar_spec_to_string(const CalendarSpec *c, char **p) {
+ char *buf = NULL;
+ size_t sz = 0;
+ FILE *f;
+ int r;
+
+ assert(c);
+ assert(p);
+
+ f = open_memstream_unlocked(&buf, &sz);
+ if (!f)
+ return -ENOMEM;
+
+ if (c->weekdays_bits > 0 && c->weekdays_bits <= BITS_WEEKDAYS) {
+ format_weekdays(f, c);
+ fputc(' ', f);
+ }
+
+ format_chain(f, 4, c->year, false);
+ fputc('-', f);
+ format_chain(f, 2, c->month, false);
+ fputc(c->end_of_month ? '~' : '-', f);
+ format_chain(f, 2, c->day, false);
+ fputc(' ', f);
+ format_chain(f, 2, c->hour, false);
+ fputc(':', f);
+ format_chain(f, 2, c->minute, false);
+ fputc(':', f);
+ format_chain(f, 2, c->microsecond, true);
+
+ if (c->utc)
+ fputs(" UTC", f);
+ else if (c->timezone) {
+ fputc(' ', f);
+ fputs(c->timezone, f);
+ } else if (IN_SET(c->dst, 0, 1)) {
+
+ /* If daylight saving is explicitly on or off, let's show the used timezone. */
+
+ tzset();
+
+ if (!isempty(tzname[c->dst])) {
+ fputc(' ', f);
+ fputs(tzname[c->dst], f);
+ }
+ }
+
+ r = fflush_and_check(f);
+ fclose(f);
+
+ if (r < 0) {
+ free(buf);
+ return r;
+ }
+
+ *p = buf;
+ return 0;
+}
+
+static int parse_weekdays(const char **p, CalendarSpec *c) {
+ static const struct {
+ const char *name;
+ const int nr;
+ } day_nr[] = {
+ { "Monday", 0 },
+ { "Mon", 0 },
+ { "Tuesday", 1 },
+ { "Tue", 1 },
+ { "Wednesday", 2 },
+ { "Wed", 2 },
+ { "Thursday", 3 },
+ { "Thu", 3 },
+ { "Friday", 4 },
+ { "Fri", 4 },
+ { "Saturday", 5 },
+ { "Sat", 5 },
+ { "Sunday", 6 },
+ { "Sun", 6 }
+ };
+
+ int l = -1;
+ bool first = true;
+
+ assert(p);
+ assert(*p);
+ assert(c);
+
+ for (;;) {
+ size_t i;
+
+ for (i = 0; i < ELEMENTSOF(day_nr); i++) {
+ size_t skip;
+
+ if (!startswith_no_case(*p, day_nr[i].name))
+ continue;
+
+ skip = strlen(day_nr[i].name);
+
+ if (!IN_SET((*p)[skip], 0, '-', '.', ',', ' '))
+ return -EINVAL;
+
+ c->weekdays_bits |= 1 << day_nr[i].nr;
+
+ if (l >= 0) {
+ int j;
+
+ if (l > day_nr[i].nr)
+ return -EINVAL;
+
+ for (j = l + 1; j < day_nr[i].nr; j++)
+ c->weekdays_bits |= 1 << j;
+ }
+
+ *p += skip;
+ break;
+ }
+
+ /* Couldn't find this prefix, so let's assume the
+ weekday was not specified and let's continue with
+ the date */
+ if (i >= ELEMENTSOF(day_nr))
+ return first ? 0 : -EINVAL;
+
+ /* We reached the end of the string */
+ if (**p == 0)
+ return 0;
+
+ /* We reached the end of the weekday spec part */
+ if (**p == ' ') {
+ *p += strspn(*p, " ");
+ return 0;
+ }
+
+ if (**p == '.') {
+ if (l >= 0)
+ return -EINVAL;
+
+ if ((*p)[1] != '.')
+ return -EINVAL;
+
+ l = day_nr[i].nr;
+ *p += 2;
+
+ /* Support ranges with "-" for backwards compatibility */
+ } else if (**p == '-') {
+ if (l >= 0)
+ return -EINVAL;
+
+ l = day_nr[i].nr;
+ *p += 1;
+
+ } else if (**p == ',') {
+ l = -1;
+ *p += 1;
+ }
+
+ /* Allow a trailing comma but not an open range */
+ if (IN_SET(**p, 0, ' ')) {
+ *p += strspn(*p, " ");
+ return l < 0 ? 0 : -EINVAL;
+ }
+
+ first = false;
+ }
+}
+
+static int parse_one_number(const char *p, const char **e, unsigned long *ret) {
+ char *ee = NULL;
+ unsigned long value;
+
+ errno = 0;
+ value = strtoul(p, &ee, 10);
+ if (errno > 0)
+ return -errno;
+ if (ee == p)
+ return -EINVAL;
+
+ *ret = value;
+ *e = ee;
+ return 0;
+}
+
+static int parse_component_decimal(const char **p, bool usec, int *res) {
+ unsigned long value;
+ const char *e = NULL;
+ int r;
+
+ if (!isdigit(**p))
+ return -EINVAL;
+
+ r = parse_one_number(*p, &e, &value);
+ if (r < 0)
+ return r;
+
+ if (usec) {
+ if (value * USEC_PER_SEC / USEC_PER_SEC != value)
+ return -ERANGE;
+
+ value *= USEC_PER_SEC;
+
+ /* One "." is a decimal point, but ".." is a range separator */
+ if (e[0] == '.' && e[1] != '.') {
+ unsigned add;
+
+ e++;
+ r = parse_fractional_part_u(&e, 6, &add);
+ if (r < 0)
+ return r;
+
+ if (add + value < value)
+ return -ERANGE;
+ value += add;
+ }
+ }
+
+ if (value > INT_MAX)
+ return -ERANGE;
+
+ *p = e;
+ *res = value;
+
+ return 0;
+}
+
+static int const_chain(int value, CalendarComponent **c) {
+ CalendarComponent *cc = NULL;
+
+ assert(c);
+
+ cc = new(CalendarComponent, 1);
+ if (!cc)
+ return -ENOMEM;
+
+ *cc = (CalendarComponent) {
+ .start = value,
+ .stop = -1,
+ .repeat = 0,
+ .next = *c,
+ };
+
+ *c = cc;
+
+ return 0;
+}
+
+static int calendarspec_from_time_t(CalendarSpec *c, time_t time) {
+ _cleanup_(chain_freep) CalendarComponent
+ *year = NULL, *month = NULL, *day = NULL,
+ *hour = NULL, *minute = NULL, *us = NULL;
+ struct tm tm;
+ int r;
+
+ if (!gmtime_r(&time, &tm))
+ return -ERANGE;
+
+ if (tm.tm_year > INT_MAX - 1900)
+ return -ERANGE;
+
+ r = const_chain(tm.tm_year + 1900, &year);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_mon + 1, &month);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_mday, &day);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_hour, &hour);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_min, &minute);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_sec * USEC_PER_SEC, &us);
+ if (r < 0)
+ return r;
+
+ c->utc = true;
+ c->year = TAKE_PTR(year);
+ c->month = TAKE_PTR(month);
+ c->day = TAKE_PTR(day);
+ c->hour = TAKE_PTR(hour);
+ c->minute = TAKE_PTR(minute);
+ c->microsecond = TAKE_PTR(us);
+ return 0;
+}
+
+static int prepend_component(const char **p, bool usec, unsigned nesting, CalendarComponent **c) {
+ int r, start, stop = -1, repeat = 0;
+ CalendarComponent *cc;
+ const char *e = *p;
+
+ assert(p);
+ assert(c);
+
+ if (nesting > CALENDARSPEC_COMPONENTS_MAX)
+ return -ENOBUFS;
+
+ r = parse_component_decimal(&e, usec, &start);
+ if (r < 0)
+ return r;
+
+ if (e[0] == '.' && e[1] == '.') {
+ e += 2;
+ r = parse_component_decimal(&e, usec, &stop);
+ if (r < 0)
+ return r;
+
+ repeat = usec ? USEC_PER_SEC : 1;
+ }
+
+ if (*e == '/') {
+ e++;
+ r = parse_component_decimal(&e, usec, &repeat);
+ if (r < 0)
+ return r;
+
+ if (repeat == 0)
+ return -ERANGE;
+ } else {
+ /* If no repeat value is specified for the µs component, then let's explicitly refuse ranges
+ * below 1s because our default repeat granularity is beyond that. */
+
+ /* Overflow check */
+ if (start > INT_MAX - repeat)
+ return -ERANGE;
+
+ if (usec && stop >= 0 && start + repeat > stop)
+ return -EINVAL;
+ }
+
+ if (!IN_SET(*e, 0, ' ', ',', '-', '~', ':'))
+ return -EINVAL;
+
+ cc = new(CalendarComponent, 1);
+ if (!cc)
+ return -ENOMEM;
+
+ *cc = (CalendarComponent) {
+ .start = start,
+ .stop = stop,
+ .repeat = repeat,
+ .next = *c,
+ };
+
+ *p = e;
+ *c = cc;
+
+ if (*e ==',') {
+ *p += 1;
+ return prepend_component(p, usec, nesting + 1, c);
+ }
+
+ return 0;
+}
+
+static int parse_chain(const char **p, bool usec, CalendarComponent **c) {
+ _cleanup_(chain_freep) CalendarComponent *cc = NULL;
+ const char *t;
+ int r;
+
+ assert(p);
+ assert(c);
+
+ t = *p;
+
+ if (t[0] == '*') {
+ if (usec) {
+ r = const_chain(0, c);
+ if (r < 0)
+ return r;
+ (*c)->repeat = USEC_PER_SEC;
+ } else
+ *c = NULL;
+
+ *p = t + 1;
+ return 0;
+ }
+
+ r = prepend_component(&t, usec, 0, &cc);
+ if (r < 0)
+ return r;
+
+ *p = t;
+ *c = TAKE_PTR(cc);
+ return 0;
+}
+
+static int parse_date(const char **p, CalendarSpec *c) {
+ _cleanup_(chain_freep) CalendarComponent *first = NULL, *second = NULL, *third = NULL;
+ const char *t;
+ int r;
+
+ assert(p);
+ assert(*p);
+ assert(c);
+
+ t = *p;
+
+ if (*t == 0)
+ return 0;
+
+ /* @TIMESTAMP — UNIX time in seconds since the epoch */
+ if (*t == '@') {
+ unsigned long value;
+ time_t time;
+
+ r = parse_one_number(t + 1, &t, &value);
+ if (r < 0)
+ return r;
+
+ time = value;
+ if ((unsigned long) time != value)
+ return -ERANGE;
+
+ r = calendarspec_from_time_t(c, time);
+ if (r < 0)
+ return r;
+
+ *p = t;
+ return 1; /* finito, don't parse H:M:S after that */
+ }
+
+ r = parse_chain(&t, false, &first);
+ if (r < 0)
+ return r;
+
+ /* Already the end? A ':' as separator? In that case this was a time, not a date */
+ if (IN_SET(*t, 0, ':'))
+ return 0;
+
+ if (*t == '~')
+ c->end_of_month = true;
+ else if (*t != '-')
+ return -EINVAL;
+
+ t++;
+ r = parse_chain(&t, false, &second);
+ if (r < 0)
+ return r;
+
+ /* Got two parts, hence it's month and day */
+ if (IN_SET(*t, 0, ' ')) {
+ *p = t + strspn(t, " ");
+ c->month = TAKE_PTR(first);
+ c->day = TAKE_PTR(second);
+ return 0;
+ } else if (c->end_of_month)
+ return -EINVAL;
+
+ if (*t == '~')
+ c->end_of_month = true;
+ else if (*t != '-')
+ return -EINVAL;
+
+ t++;
+ r = parse_chain(&t, false, &third);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(*t, 0, ' '))
+ return -EINVAL;
+
+ /* Got three parts, hence it is year, month and day */
+ *p = t + strspn(t, " ");
+ c->year = TAKE_PTR(first);
+ c->month = TAKE_PTR(second);
+ c->day = TAKE_PTR(third);
+ return 0;
+}
+
+static int parse_calendar_time(const char **p, CalendarSpec *c) {
+ _cleanup_(chain_freep) CalendarComponent *h = NULL, *m = NULL, *s = NULL;
+ const char *t;
+ int r;
+
+ assert(p);
+ assert(*p);
+ assert(c);
+
+ t = *p;
+
+ /* If no time is specified at all, then this means 00:00:00 */
+ if (*t == 0)
+ goto null_hour;
+
+ r = parse_chain(&t, false, &h);
+ if (r < 0)
+ return r;
+
+ if (*t != ':')
+ return -EINVAL;
+
+ t++;
+ r = parse_chain(&t, false, &m);
+ if (r < 0)
+ return r;
+
+ /* Already at the end? Then it's hours and minutes, and seconds are 0 */
+ if (*t == 0)
+ goto null_second;
+
+ if (*t != ':')
+ return -EINVAL;
+
+ t++;
+ r = parse_chain(&t, true, &s);
+ if (r < 0)
+ return r;
+
+ /* At the end? Then it's hours, minutes and seconds */
+ if (*t == 0)
+ goto finish;
+
+ return -EINVAL;
+
+null_hour:
+ r = const_chain(0, &h);
+ if (r < 0)
+ return r;
+
+ r = const_chain(0, &m);
+ if (r < 0)
+ return r;
+
+null_second:
+ r = const_chain(0, &s);
+ if (r < 0)
+ return r;
+
+finish:
+ *p = t;
+ c->hour = TAKE_PTR(h);
+ c->minute = TAKE_PTR(m);
+ c->microsecond = TAKE_PTR(s);
+
+ return 0;
+}
+
+int calendar_spec_from_string(const char *p, CalendarSpec **spec) {
+ const char *utc;
+ _cleanup_(calendar_spec_freep) CalendarSpec *c = NULL;
+ _cleanup_free_ char *p_tmp = NULL;
+ int r;
+
+ assert(p);
+
+ c = new(CalendarSpec, 1);
+ if (!c)
+ return -ENOMEM;
+
+ *c = (CalendarSpec) {
+ .dst = -1,
+ .timezone = NULL,
+ };
+
+ utc = endswith_no_case(p, " UTC");
+ if (utc) {
+ c->utc = true;
+ p = p_tmp = strndup(p, utc - p);
+ if (!p)
+ return -ENOMEM;
+ } else {
+ const char *e = NULL;
+ int j;
+
+ tzset();
+
+ /* Check if the local timezone was specified? */
+ for (j = 0; j <= 1; j++) {
+ if (isempty(tzname[j]))
+ continue;
+
+ e = endswith_no_case(p, tzname[j]);
+ if (!e)
+ continue;
+ if (e == p)
+ continue;
+ if (e[-1] != ' ')
+ continue;
+
+ break;
+ }
+
+ /* Found one of the two timezones specified? */
+ if (IN_SET(j, 0, 1)) {
+ p = p_tmp = strndup(p, e - p - 1);
+ if (!p)
+ return -ENOMEM;
+
+ c->dst = j;
+ } else {
+ const char *last_space;
+
+ last_space = strrchr(p, ' ');
+ if (last_space != NULL && timezone_is_valid(last_space + 1, LOG_DEBUG)) {
+ c->timezone = strdup(last_space + 1);
+ if (!c->timezone)
+ return -ENOMEM;
+
+ p = p_tmp = strndup(p, last_space - p);
+ if (!p)
+ return -ENOMEM;
+ }
+ }
+ }
+
+ if (isempty(p))
+ return -EINVAL;
+
+ if (strcaseeq(p, "minutely")) {
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "hourly")) {
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "daily")) {
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "monthly")) {
+ r = const_chain(1, &c->day);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (STRCASE_IN_SET(p,
+ "annually",
+ "yearly",
+ "anually") /* backwards compatibility */ ) {
+
+ r = const_chain(1, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(1, &c->day);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "weekly")) {
+
+ c->weekdays_bits = 1;
+
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "quarterly")) {
+
+ r = const_chain(1, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(4, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(7, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(10, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(1, &c->day);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (STRCASE_IN_SET(p,
+ "biannually",
+ "bi-annually",
+ "semiannually",
+ "semi-annually")) {
+
+ r = const_chain(1, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(7, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(1, &c->day);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else {
+ r = parse_weekdays(&p, c);
+ if (r < 0)
+ return r;
+
+ r = parse_date(&p, c);
+ if (r < 0)
+ return r;
+
+ if (r == 0) {
+ r = parse_calendar_time(&p, c);
+ if (r < 0)
+ return r;
+ }
+
+ if (*p != 0)
+ return -EINVAL;
+ }
+
+ r = calendar_spec_normalize(c);
+ if (r < 0)
+ return r;
+
+ if (!calendar_spec_valid(c))
+ return -EINVAL;
+
+ if (spec)
+ *spec = TAKE_PTR(c);
+ return 0;
+}
+
+static int find_end_of_month(struct tm *tm, bool utc, int day) {
+ struct tm t = *tm;
+
+ t.tm_mon++;
+ t.tm_mday = 1 - day;
+
+ if (mktime_or_timegm(&t, utc) < 0 ||
+ t.tm_mon != tm->tm_mon)
+ return -1;
+
+ return t.tm_mday;
+}
+
+static int find_matching_component(const CalendarSpec *spec, const CalendarComponent *c,
+ struct tm *tm, int *val) {
+ const CalendarComponent *p = c;
+ int start, stop, d = -1;
+ bool d_set = false;
+ int r;
+
+ assert(val);
+
+ if (!c)
+ return 0;
+
+ while (c) {
+ start = c->start;
+ stop = c->stop;
+
+ if (spec->end_of_month && p == spec->day) {
+ start = find_end_of_month(tm, spec->utc, start);
+ stop = find_end_of_month(tm, spec->utc, stop);
+
+ if (stop > 0)
+ SWAP_TWO(start, stop);
+ }
+
+ if (start >= *val) {
+
+ if (!d_set || start < d) {
+ d = start;
+ d_set = true;
+ }
+
+ } else if (c->repeat > 0) {
+ int k;
+
+ k = start + c->repeat * DIV_ROUND_UP(*val - start, c->repeat);
+
+ if ((!d_set || k < d) && (stop < 0 || k <= stop)) {
+ d = k;
+ d_set = true;
+ }
+ }
+
+ c = c->next;
+ }
+
+ if (!d_set)
+ return -ENOENT;
+
+ r = *val != d;
+ *val = d;
+ return r;
+}
+
+static int tm_within_bounds(struct tm *tm, bool utc) {
+ struct tm t;
+ assert(tm);
+
+ /*
+ * Set an upper bound on the year so impossible dates like "*-02-31"
+ * don't cause find_next() to loop forever. tm_year contains years
+ * since 1900, so adjust it accordingly.
+ */
+ if (tm->tm_year + 1900 > MAX_YEAR)
+ return -ERANGE;
+
+ t = *tm;
+ if (mktime_or_timegm(&t, utc) < 0)
+ return negative_errno();
+
+ /* Did any normalization take place? If so, it was out of bounds before */
+ bool good = t.tm_year == tm->tm_year &&
+ t.tm_mon == tm->tm_mon &&
+ t.tm_mday == tm->tm_mday &&
+ t.tm_hour == tm->tm_hour &&
+ t.tm_min == tm->tm_min &&
+ t.tm_sec == tm->tm_sec;
+ if (!good)
+ *tm = t;
+ return good;
+}
+
+static bool matches_weekday(int weekdays_bits, const struct tm *tm, bool utc) {
+ struct tm t;
+ int k;
+
+ if (weekdays_bits < 0 || weekdays_bits >= BITS_WEEKDAYS)
+ return true;
+
+ t = *tm;
+ if (mktime_or_timegm(&t, utc) < 0)
+ return false;
+
+ k = t.tm_wday == 0 ? 6 : t.tm_wday - 1;
+ return (weekdays_bits & (1 << k));
+}
+
+static int find_next(const CalendarSpec *spec, struct tm *tm, usec_t *usec) {
+ struct tm c;
+ int tm_usec;
+ int r;
+
+ /* Returns -ENOENT if the expression is not going to elapse anymore */
+
+ assert(spec);
+ assert(tm);
+
+ c = *tm;
+ tm_usec = *usec;
+
+ for (;;) {
+ /* Normalize the current date */
+ (void) mktime_or_timegm(&c, spec->utc);
+ c.tm_isdst = spec->dst;
+
+ c.tm_year += 1900;
+ r = find_matching_component(spec, spec->year, &c, &c.tm_year);
+ c.tm_year -= 1900;
+
+ if (r > 0) {
+ c.tm_mon = 0;
+ c.tm_mday = 1;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ }
+ if (r < 0)
+ return r;
+ if (tm_within_bounds(&c, spec->utc) <= 0)
+ return -ENOENT;
+
+ c.tm_mon += 1;
+ r = find_matching_component(spec, spec->month, &c, &c.tm_mon);
+ c.tm_mon -= 1;
+
+ if (r > 0) {
+ c.tm_mday = 1;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ }
+ if (r < 0 || (r = tm_within_bounds(&c, spec->utc)) < 0) {
+ c.tm_year++;
+ c.tm_mon = 0;
+ c.tm_mday = 1;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+ if (r == 0)
+ continue;
+
+ r = find_matching_component(spec, spec->day, &c, &c.tm_mday);
+ if (r > 0)
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ if (r < 0 || (r = tm_within_bounds(&c, spec->utc)) < 0) {
+ c.tm_mon++;
+ c.tm_mday = 1;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+ if (r == 0)
+ continue;
+
+ if (!matches_weekday(spec->weekdays_bits, &c, spec->utc)) {
+ c.tm_mday++;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+
+ r = find_matching_component(spec, spec->hour, &c, &c.tm_hour);
+ if (r > 0)
+ c.tm_min = c.tm_sec = tm_usec = 0;
+ if (r < 0 || (r = tm_within_bounds(&c, spec->utc)) < 0) {
+ c.tm_mday++;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+ if (r == 0)
+ /* The next hour we set might be missing if there
+ * are time zone changes. Let's try again starting at
+ * normalized time. */
+ continue;
+
+ r = find_matching_component(spec, spec->minute, &c, &c.tm_min);
+ if (r > 0)
+ c.tm_sec = tm_usec = 0;
+ if (r < 0 || (r = tm_within_bounds(&c, spec->utc)) < 0) {
+ c.tm_hour++;
+ c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+ if (r == 0)
+ continue;
+
+ c.tm_sec = c.tm_sec * USEC_PER_SEC + tm_usec;
+ r = find_matching_component(spec, spec->microsecond, &c, &c.tm_sec);
+ tm_usec = c.tm_sec % USEC_PER_SEC;
+ c.tm_sec /= USEC_PER_SEC;
+
+ if (r < 0 || (r = tm_within_bounds(&c, spec->utc)) < 0) {
+ c.tm_min++;
+ c.tm_sec = tm_usec = 0;
+ continue;
+ }
+ if (r == 0)
+ continue;
+
+ *tm = c;
+ *usec = tm_usec;
+ return 0;
+ }
+}
+
+static int calendar_spec_next_usec_impl(const CalendarSpec *spec, usec_t usec, usec_t *ret_next) {
+ struct tm tm;
+ time_t t;
+ int r;
+ usec_t tm_usec;
+
+ assert(spec);
+
+ if (usec > USEC_TIMESTAMP_FORMATTABLE_MAX)
+ return -EINVAL;
+
+ usec++;
+ t = (time_t) (usec / USEC_PER_SEC);
+ assert_se(localtime_or_gmtime_r(&t, &tm, spec->utc));
+ tm_usec = usec % USEC_PER_SEC;
+
+ r = find_next(spec, &tm, &tm_usec);
+ if (r < 0)
+ return r;
+
+ t = mktime_or_timegm(&tm, spec->utc);
+ if (t < 0)
+ return -EINVAL;
+
+ if (ret_next)
+ *ret_next = (usec_t) t * USEC_PER_SEC + tm_usec;
+
+ return 0;
+}
+
+typedef struct SpecNextResult {
+ usec_t next;
+ int return_value;
+} SpecNextResult;
+
+int calendar_spec_next_usec(const CalendarSpec *spec, usec_t usec, usec_t *ret_next) {
+ SpecNextResult *shared, tmp;
+ int r;
+
+ assert(spec);
+
+ if (isempty(spec->timezone))
+ return calendar_spec_next_usec_impl(spec, usec, ret_next);
+
+ shared = mmap(NULL, sizeof *shared, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
+ if (shared == MAP_FAILED)
+ return negative_errno();
+
+ r = safe_fork("(sd-calendar)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_WAIT, NULL);
+ if (r < 0) {
+ (void) munmap(shared, sizeof *shared);
+ return r;
+ }
+ if (r == 0) {
+ char *colon_tz;
+
+ /* tzset(3) says $TZ should be prefixed with ":" if we reference timezone files */
+ colon_tz = strjoina(":", spec->timezone);
+
+ if (setenv("TZ", colon_tz, 1) != 0) {
+ shared->return_value = negative_errno();
+ _exit(EXIT_FAILURE);
+ }
+
+ tzset();
+
+ shared->return_value = calendar_spec_next_usec_impl(spec, usec, &shared->next);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ tmp = *shared;
+ if (munmap(shared, sizeof *shared) < 0)
+ return negative_errno();
+
+ if (tmp.return_value == 0 && ret_next)
+ *ret_next = tmp.next;
+
+ return tmp.return_value;
+}
diff --git a/src/shared/calendarspec.h b/src/shared/calendarspec.h
new file mode 100644
index 0000000..3bfe82d
--- /dev/null
+++ b/src/shared/calendarspec.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/* A structure for specifying (possibly repetitive) points in calendar
+ * time, a la cron */
+
+#include <stdbool.h>
+
+#include "time-util.h"
+#include "util.h"
+
+typedef struct CalendarComponent {
+ int start;
+ int stop;
+ int repeat;
+
+ struct CalendarComponent *next;
+} CalendarComponent;
+
+typedef struct CalendarSpec {
+ int weekdays_bits;
+ bool end_of_month:1;
+ bool utc:1;
+ signed int dst:2;
+ char *timezone;
+
+ CalendarComponent *year;
+ CalendarComponent *month;
+ CalendarComponent *day;
+
+ CalendarComponent *hour;
+ CalendarComponent *minute;
+ CalendarComponent *microsecond;
+} CalendarSpec;
+
+CalendarSpec* calendar_spec_free(CalendarSpec *c);
+
+int calendar_spec_normalize(CalendarSpec *spec);
+bool calendar_spec_valid(CalendarSpec *spec);
+
+int calendar_spec_to_string(const CalendarSpec *spec, char **p);
+int calendar_spec_from_string(const char *p, CalendarSpec **spec);
+
+int calendar_spec_next_usec(const CalendarSpec *spec, usec_t usec, usec_t *next);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(CalendarSpec*, calendar_spec_free);
diff --git a/src/shared/cgroup-setup.c b/src/shared/cgroup-setup.c
new file mode 100644
index 0000000..f197f71
--- /dev/null
+++ b/src/shared/cgroup-setup.c
@@ -0,0 +1,841 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ftw.h>
+#include <unistd.h>
+
+#include "cgroup-setup.h"
+#include "cgroup-util.h"
+#include "errno-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "fs-util.h"
+#include "mkdir.h"
+#include "process-util.h"
+#include "fileio.h"
+#include "user-util.h"
+#include "fd-util.h"
+
+bool cg_is_unified_wanted(void) {
+ static thread_local int wanted = -1;
+ bool b;
+ const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
+ _cleanup_free_ char *c = NULL;
+ int r;
+
+ /* If we have a cached value, return that. */
+ if (wanted >= 0)
+ return wanted;
+
+ /* If the hierarchy is already mounted, then follow whatever was chosen for it. */
+ r = cg_unified_cached(true);
+ if (r >= 0)
+ return (wanted = r >= CGROUP_UNIFIED_ALL);
+
+ /* If we were explicitly passed systemd.unified_cgroup_hierarchy, respect that. */
+ r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
+ if (r > 0)
+ return (wanted = b);
+
+ /* If we passed cgroup_no_v1=all with no other instructions, it seems highly unlikely that we want to
+ * use hybrid or legacy hierarchy. */
+ r = proc_cmdline_get_key("cgroup_no_v1", 0, &c);
+ if (r > 0 && streq_ptr(c, "all"))
+ return (wanted = true);
+
+ return (wanted = is_default);
+}
+
+bool cg_is_legacy_wanted(void) {
+ static thread_local int wanted = -1;
+
+ /* If we have a cached value, return that. */
+ if (wanted >= 0)
+ return wanted;
+
+ /* Check if we have cgroup v2 already mounted. */
+ if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
+ return (wanted = false);
+
+ /* Otherwise, assume that at least partial legacy is wanted,
+ * since cgroup v2 should already be mounted at this point. */
+ return (wanted = true);
+}
+
+bool cg_is_hybrid_wanted(void) {
+ static thread_local int wanted = -1;
+ int r;
+ bool b;
+ const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
+ /* We default to true if the default is "hybrid", obviously, but also when the default is "unified",
+ * because if we get called, it means that unified hierarchy was not mounted. */
+
+ /* If we have a cached value, return that. */
+ if (wanted >= 0)
+ return wanted;
+
+ /* If the hierarchy is already mounted, then follow whatever was chosen for it. */
+ if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
+ return (wanted = false);
+
+ /* Otherwise, let's see what the kernel command line has to say. Since checking is expensive, cache
+ * a non-error result. */
+ r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
+
+ /* The meaning of the kernel option is reversed wrt. to the return value of this function, hence the
+ * negation. */
+ return (wanted = r > 0 ? !b : is_default);
+}
+
+int cg_weight_parse(const char *s, uint64_t *ret) {
+ uint64_t u;
+ int r;
+
+ if (isempty(s)) {
+ *ret = CGROUP_WEIGHT_INVALID;
+ return 0;
+ }
+
+ r = safe_atou64(s, &u);
+ if (r < 0)
+ return r;
+
+ if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
+int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
+ uint64_t u;
+ int r;
+
+ if (isempty(s)) {
+ *ret = CGROUP_CPU_SHARES_INVALID;
+ return 0;
+ }
+
+ r = safe_atou64(s, &u);
+ if (r < 0)
+ return r;
+
+ if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
+int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
+ uint64_t u;
+ int r;
+
+ if (isempty(s)) {
+ *ret = CGROUP_BLKIO_WEIGHT_INVALID;
+ return 0;
+ }
+
+ r = safe_atou64(s, &u);
+ if (r < 0)
+ return r;
+
+ if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
+static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
+ assert(path);
+ assert(sb);
+ assert(ftwbuf);
+
+ if (typeflag != FTW_DP)
+ return 0;
+
+ if (ftwbuf->level < 1)
+ return 0;
+
+ (void) rmdir(path);
+ return 0;
+}
+
+int cg_trim(const char *controller, const char *path, bool delete_root) {
+ _cleanup_free_ char *fs = NULL;
+ int r, q;
+
+ assert(path);
+
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ errno = 0;
+ if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
+ if (errno == ENOENT)
+ r = 0;
+ else
+ r = errno_or_else(EIO);
+ }
+
+ if (delete_root) {
+ if (rmdir(fs) < 0 && errno != ENOENT)
+ return -errno;
+ }
+
+ q = cg_hybrid_unified();
+ if (q < 0)
+ return q;
+ if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
+ if (q < 0)
+ log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
+ }
+
+ return r;
+}
+
+/* Create a cgroup in the hierarchy of controller.
+ * Returns 0 if the group already existed, 1 on success, negative otherwise.
+ */
+int cg_create(const char *controller, const char *path) {
+ _cleanup_free_ char *fs = NULL;
+ int r;
+
+ r = cg_get_path_and_check(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ r = mkdir_parents(fs, 0755);
+ if (r < 0)
+ return r;
+
+ r = mkdir_errno_wrapper(fs, 0755);
+ if (r == -EEXIST)
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = cg_hybrid_unified();
+ if (r < 0)
+ return r;
+
+ if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
+ if (r < 0)
+ log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
+ }
+
+ return 1;
+}
+
+int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
+ int r, q;
+
+ assert(pid >= 0);
+
+ r = cg_create(controller, path);
+ if (r < 0)
+ return r;
+
+ q = cg_attach(controller, path, pid);
+ if (q < 0)
+ return q;
+
+ /* This does not remove the cgroup on failure */
+ return r;
+}
+
+int cg_attach(const char *controller, const char *path, pid_t pid) {
+ _cleanup_free_ char *fs = NULL;
+ char c[DECIMAL_STR_MAX(pid_t) + 2];
+ int r;
+
+ assert(path);
+ assert(pid >= 0);
+
+ r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
+ if (r < 0)
+ return r;
+
+ if (pid == 0)
+ pid = getpid_cached();
+
+ xsprintf(c, PID_FMT "\n", pid);
+
+ r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+
+ r = cg_hybrid_unified();
+ if (r < 0)
+ return r;
+
+ if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
+ if (r < 0)
+ log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
+ }
+
+ return 0;
+}
+
+int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
+ int r;
+
+ assert(controller);
+ assert(path);
+ assert(pid >= 0);
+
+ r = cg_attach(controller, path, pid);
+ if (r < 0) {
+ char prefix[strlen(path) + 1];
+
+ /* This didn't work? Then let's try all prefixes of
+ * the destination */
+
+ PATH_FOREACH_PREFIX(prefix, path) {
+ int q;
+
+ q = cg_attach(controller, prefix, pid);
+ if (q >= 0)
+ return q;
+ }
+ }
+
+ return r;
+}
+
+int cg_set_access(
+ const char *controller,
+ const char *path,
+ uid_t uid,
+ gid_t gid) {
+
+ struct Attribute {
+ const char *name;
+ bool fatal;
+ };
+
+ /* cgroup v1, aka legacy/non-unified */
+ static const struct Attribute legacy_attributes[] = {
+ { "cgroup.procs", true },
+ { "tasks", false },
+ { "cgroup.clone_children", false },
+ {},
+ };
+
+ /* cgroup v2, aka unified */
+ static const struct Attribute unified_attributes[] = {
+ { "cgroup.procs", true },
+ { "cgroup.subtree_control", true },
+ { "cgroup.threads", false },
+ {},
+ };
+
+ static const struct Attribute* const attributes[] = {
+ [false] = legacy_attributes,
+ [true] = unified_attributes,
+ };
+
+ _cleanup_free_ char *fs = NULL;
+ const struct Attribute *i;
+ int r, unified;
+
+ assert(path);
+
+ if (uid == UID_INVALID && gid == GID_INVALID)
+ return 0;
+
+ unified = cg_unified_controller(controller);
+ if (unified < 0)
+ return unified;
+
+ /* Configure access to the cgroup itself */
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ r = chmod_and_chown(fs, 0755, uid, gid);
+ if (r < 0)
+ return r;
+
+ /* Configure access to the cgroup's attributes */
+ for (i = attributes[unified]; i->name; i++) {
+ fs = mfree(fs);
+
+ r = cg_get_path(controller, path, i->name, &fs);
+ if (r < 0)
+ return r;
+
+ r = chmod_and_chown(fs, 0644, uid, gid);
+ if (r < 0) {
+ if (i->fatal)
+ return r;
+
+ log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
+ }
+ }
+
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ r = cg_hybrid_unified();
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* Always propagate access mode from unified to legacy controller */
+ r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
+ if (r < 0)
+ log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
+ }
+ }
+
+ return 0;
+}
+
+int cg_migrate(
+ const char *cfrom,
+ const char *pfrom,
+ const char *cto,
+ const char *pto,
+ CGroupFlags flags) {
+
+ bool done = false;
+ _cleanup_set_free_ Set *s = NULL;
+ int r, ret = 0;
+ pid_t my_pid;
+
+ assert(cfrom);
+ assert(pfrom);
+ assert(cto);
+ assert(pto);
+
+ s = set_new(NULL);
+ if (!s)
+ return -ENOMEM;
+
+ my_pid = getpid_cached();
+
+ do {
+ _cleanup_fclose_ FILE *f = NULL;
+ pid_t pid = 0;
+ done = true;
+
+ r = cg_enumerate_processes(cfrom, pfrom, &f);
+ if (r < 0) {
+ if (ret >= 0 && r != -ENOENT)
+ return r;
+
+ return ret;
+ }
+
+ while ((r = cg_read_pid(f, &pid)) > 0) {
+
+ /* This might do weird stuff if we aren't a
+ * single-threaded program. However, we
+ * luckily know we are not */
+ if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
+ continue;
+
+ if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
+ continue;
+
+ /* Ignore kernel threads. Since they can only
+ * exist in the root cgroup, we only check for
+ * them there. */
+ if (cfrom &&
+ empty_or_root(pfrom) &&
+ is_kernel_thread(pid) > 0)
+ continue;
+
+ r = cg_attach(cto, pto, pid);
+ if (r < 0) {
+ if (ret >= 0 && r != -ESRCH)
+ ret = r;
+ } else if (ret == 0)
+ ret = 1;
+
+ done = false;
+
+ r = set_put(s, PID_TO_PTR(pid));
+ if (r < 0) {
+ if (ret >= 0)
+ return r;
+
+ return ret;
+ }
+ }
+
+ if (r < 0) {
+ if (ret >= 0)
+ return r;
+
+ return ret;
+ }
+ } while (!done);
+
+ return ret;
+}
+
+int cg_migrate_recursive(
+ const char *cfrom,
+ const char *pfrom,
+ const char *cto,
+ const char *pto,
+ CGroupFlags flags) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ int r, ret = 0;
+ char *fn;
+
+ assert(cfrom);
+ assert(pfrom);
+ assert(cto);
+ assert(pto);
+
+ ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
+
+ r = cg_enumerate_subgroups(cfrom, pfrom, &d);
+ if (r < 0) {
+ if (ret >= 0 && r != -ENOENT)
+ return r;
+
+ return ret;
+ }
+
+ while ((r = cg_read_subgroup(d, &fn)) > 0) {
+ _cleanup_free_ char *p = NULL;
+
+ p = path_join(empty_to_root(pfrom), fn);
+ free(fn);
+ if (!p)
+ return -ENOMEM;
+
+ r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
+ if (r != 0 && ret >= 0)
+ ret = r;
+ }
+
+ if (r < 0 && ret >= 0)
+ ret = r;
+
+ if (flags & CGROUP_REMOVE) {
+ r = cg_rmdir(cfrom, pfrom);
+ if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
+ return r;
+ }
+
+ return ret;
+}
+
+int cg_migrate_recursive_fallback(
+ const char *cfrom,
+ const char *pfrom,
+ const char *cto,
+ const char *pto,
+ CGroupFlags flags) {
+
+ int r;
+
+ assert(cfrom);
+ assert(pfrom);
+ assert(cto);
+ assert(pto);
+
+ r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
+ if (r < 0) {
+ char prefix[strlen(pto) + 1];
+
+ /* This didn't work? Then let's try all prefixes of the destination */
+
+ PATH_FOREACH_PREFIX(prefix, pto) {
+ int q;
+
+ q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
+ if (q >= 0)
+ return q;
+ }
+ }
+
+ return r;
+}
+
+int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
+ CGroupController c;
+ CGroupMask done;
+ bool created;
+ int r;
+
+ /* This one will create a cgroup in our private tree, but also
+ * duplicate it in the trees specified in mask, and remove it
+ * in all others.
+ *
+ * Returns 0 if the group already existed in the systemd hierarchy,
+ * 1 on success, negative otherwise.
+ */
+
+ /* First create the cgroup in our own hierarchy. */
+ r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
+ if (r < 0)
+ return r;
+ created = r;
+
+ /* If we are in the unified hierarchy, we are done now */
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return created;
+
+ supported &= CGROUP_MASK_V1;
+ mask = CGROUP_MASK_EXTEND_JOINED(mask);
+ done = 0;
+
+ /* Otherwise, do the same in the other hierarchies */
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *n;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ n = cgroup_controller_to_string(c);
+ if (FLAGS_SET(mask, bit))
+ (void) cg_create(n, path);
+
+ done |= CGROUP_MASK_EXTEND_JOINED(bit);
+ }
+
+ return created;
+}
+
+int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
+ int r;
+
+ r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
+ if (r < 0)
+ return r;
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ supported &= CGROUP_MASK_V1;
+ CGroupMask done = 0;
+
+ for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *p = NULL;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ if (path_callback)
+ p = path_callback(bit, userdata);
+ if (!p)
+ p = path;
+
+ (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
+ done |= CGROUP_MASK_EXTEND_JOINED(bit);
+ }
+
+ return 0;
+}
+
+int cg_migrate_v1_controllers(CGroupMask supported, CGroupMask mask, const char *from, cg_migrate_callback_t to_callback, void *userdata) {
+ CGroupController c;
+ CGroupMask done;
+ int r = 0, q;
+
+ assert(to_callback);
+
+ supported &= CGROUP_MASK_V1;
+ mask = CGROUP_MASK_EXTEND_JOINED(mask);
+ done = 0;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *to = NULL;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ if (!FLAGS_SET(mask, bit))
+ continue;
+
+ to = to_callback(bit, userdata);
+
+ /* Remember first error and try continuing */
+ q = cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, from, cgroup_controller_to_string(c), to, 0);
+ r = (r < 0) ? r : q;
+ }
+
+ return r;
+}
+
+int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
+ int r, q;
+
+ r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
+ if (r < 0)
+ return r;
+
+ q = cg_all_unified();
+ if (q < 0)
+ return q;
+ if (q > 0)
+ return r;
+
+ return cg_trim_v1_controllers(supported, _CGROUP_MASK_ALL, path, delete_root);
+}
+
+int cg_trim_v1_controllers(CGroupMask supported, CGroupMask mask, const char *path, bool delete_root) {
+ CGroupController c;
+ CGroupMask done;
+ int r = 0, q;
+
+ supported &= CGROUP_MASK_V1;
+ mask = CGROUP_MASK_EXTEND_JOINED(mask);
+ done = 0;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ if (FLAGS_SET(mask, bit)) {
+ /* Remember first error and try continuing */
+ q = cg_trim(cgroup_controller_to_string(c), path, delete_root);
+ r = (r < 0) ? r : q;
+ }
+ done |= CGROUP_MASK_EXTEND_JOINED(bit);
+ }
+
+ return r;
+}
+
+int cg_enable_everywhere(
+ CGroupMask supported,
+ CGroupMask mask,
+ const char *p,
+ CGroupMask *ret_result_mask) {
+
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *fs = NULL;
+ CGroupController c;
+ CGroupMask ret = 0;
+ int r;
+
+ assert(p);
+
+ if (supported == 0) {
+ if (ret_result_mask)
+ *ret_result_mask = 0;
+ return 0;
+ }
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* On the legacy hierarchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
+ * complete success right away. (If you wonder why we return the full mask here, rather than zero: the
+ * caller tends to use the returned mask later on to compare if all controllers where properly joined,
+ * and if not requeues realization. This use is the primary purpose of the return value, hence let's
+ * minimize surprises here and reduce triggers for re-realization by always saying we fully
+ * succeeded.) */
+ if (ret_result_mask)
+ *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
+ * CGROUP_MASK_V2: The 'supported' mask
+ * might contain pure-V1 or BPF
+ * controllers, and we never want to
+ * claim that we could enable those with
+ * cgroup.subtree_control */
+ return 0;
+ }
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
+ if (r < 0)
+ return r;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *n;
+
+ if (!FLAGS_SET(CGROUP_MASK_V2, bit))
+ continue;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ n = cgroup_controller_to_string(c);
+ {
+ char s[1 + strlen(n) + 1];
+
+ s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
+ strcpy(s + 1, n);
+
+ if (!f) {
+ f = fopen(fs, "we");
+ if (!f)
+ return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
+ }
+
+ r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
+ FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
+ clearerr(f);
+
+ /* If we can't turn off a controller, leave it on in the reported resulting mask. This
+ * happens for example when we attempt to turn off a controller up in the tree that is
+ * used down in the tree. */
+ if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
+ * only here, and not follow the same logic
+ * for other errors such as EINVAL or
+ * EOPNOTSUPP or anything else. That's
+ * because EBUSY indicates that the
+ * controllers is currently enabled and
+ * cannot be disabled because something down
+ * the hierarchy is still using it. Any other
+ * error most likely means something like "I
+ * never heard of this controller" or
+ * similar. In the former case it's hence
+ * safe to assume the controller is still on
+ * after the failed operation, while in the
+ * latter case it's safer to assume the
+ * controller is unknown and hence certainly
+ * not enabled. */
+ ret |= bit;
+ } else {
+ /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
+ if (FLAGS_SET(mask, bit))
+ ret |= bit;
+ }
+ }
+ }
+
+ /* Let's return the precise set of controllers now enabled for the cgroup. */
+ if (ret_result_mask)
+ *ret_result_mask = ret;
+
+ return 0;
+}
diff --git a/src/shared/cgroup-setup.h b/src/shared/cgroup-setup.h
new file mode 100644
index 0000000..7eabce2
--- /dev/null
+++ b/src/shared/cgroup-setup.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "cgroup-util.h"
+
+bool cg_is_unified_wanted(void);
+bool cg_is_legacy_wanted(void);
+bool cg_is_hybrid_wanted(void);
+
+int cg_weight_parse(const char *s, uint64_t *ret);
+int cg_cpu_shares_parse(const char *s, uint64_t *ret);
+int cg_blkio_weight_parse(const char *s, uint64_t *ret);
+
+int cg_trim(const char *controller, const char *path, bool delete_root);
+
+int cg_create(const char *controller, const char *path);
+int cg_attach(const char *controller, const char *path, pid_t pid);
+int cg_attach_fallback(const char *controller, const char *path, pid_t pid);
+int cg_create_and_attach(const char *controller, const char *path, pid_t pid);
+
+int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags);
+int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags);
+int cg_migrate_recursive_fallback(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags);
+
+int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path);
+int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t callback, void *userdata);
+int cg_migrate_v1_controllers(CGroupMask supported, CGroupMask mask, const char *from, cg_migrate_callback_t to_callback, void *userdata);
+int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root);
+int cg_trim_v1_controllers(CGroupMask supported, CGroupMask mask, const char *path, bool delete_root);
+int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p, CGroupMask *ret_result_mask);
diff --git a/src/shared/cgroup-show.c b/src/shared/cgroup-show.c
new file mode 100644
index 0000000..f7c24f8
--- /dev/null
+++ b/src/shared/cgroup-show.c
@@ -0,0 +1,400 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <dirent.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "cgroup-show.h"
+#include "cgroup-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "output-mode.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "sort-util.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "unit-name.h"
+#include "xattr-util.h"
+
+static void show_pid_array(
+ pid_t pids[],
+ unsigned n_pids,
+ const char *prefix,
+ size_t n_columns,
+ bool extra,
+ bool more,
+ OutputFlags flags) {
+
+ unsigned i, j, pid_width;
+
+ if (n_pids == 0)
+ return;
+
+ typesafe_qsort(pids, n_pids, pid_compare_func);
+
+ /* Filter duplicates */
+ for (j = 0, i = 1; i < n_pids; i++) {
+ if (pids[i] == pids[j])
+ continue;
+ pids[++j] = pids[i];
+ }
+ n_pids = j + 1;
+ pid_width = DECIMAL_STR_WIDTH(pids[j]);
+
+ if (flags & OUTPUT_FULL_WIDTH)
+ n_columns = SIZE_MAX;
+ else {
+ if (n_columns > pid_width + 3) /* something like "├─1114784 " */
+ n_columns -= pid_width + 3;
+ else
+ n_columns = 20;
+ }
+ for (i = 0; i < n_pids; i++) {
+ _cleanup_free_ char *t = NULL;
+
+ (void) get_process_cmdline(pids[i], n_columns,
+ PROCESS_CMDLINE_COMM_FALLBACK | PROCESS_CMDLINE_USE_LOCALE,
+ &t);
+
+ if (extra)
+ printf("%s%s ", prefix, special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET));
+ else
+ printf("%s%s", prefix, special_glyph(((more || i < n_pids-1) ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT)));
+
+ printf("%s%*"PID_PRI" %s%s\n", ansi_grey(), pid_width, pids[i], strna(t), ansi_normal());
+ }
+}
+
+static int show_cgroup_one_by_path(
+ const char *path,
+ const char *prefix,
+ size_t n_columns,
+ bool more,
+ OutputFlags flags) {
+
+ char *fn;
+ _cleanup_fclose_ FILE *f = NULL;
+ size_t n = 0, n_allocated = 0;
+ _cleanup_free_ pid_t *pids = NULL;
+ _cleanup_free_ char *p = NULL;
+ pid_t pid;
+ int r;
+
+ r = cg_mangle_path(path, &p);
+ if (r < 0)
+ return r;
+
+ fn = strjoina(p, "/cgroup.procs");
+ f = fopen(fn, "re");
+ if (!f)
+ return -errno;
+
+ while ((r = cg_read_pid(f, &pid)) > 0) {
+
+ if (!(flags & OUTPUT_KERNEL_THREADS) && is_kernel_thread(pid) > 0)
+ continue;
+
+ if (!GREEDY_REALLOC(pids, n_allocated, n + 1))
+ return -ENOMEM;
+
+ assert(n < n_allocated);
+ pids[n++] = pid;
+ }
+
+ if (r < 0)
+ return r;
+
+ show_pid_array(pids, n, prefix, n_columns, false, more, flags);
+
+ return 0;
+}
+
+static int show_cgroup_name(
+ const char *path,
+ const char *prefix,
+ const char *glyph) {
+
+ _cleanup_free_ char *b = NULL;
+ bool delegate = false;
+ int r;
+
+ r = getxattr_malloc(path, "trusted.delegate", &b, false);
+ if (r < 0) {
+ if (r != -ENODATA)
+ log_debug_errno(r, "Failed to read trusted.delegate extended attribute: %m");
+ } else {
+ r = parse_boolean(b);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse trusted.delegate extended attribute boolean value: %m");
+ else
+ delegate = r > 0;
+
+ b = mfree(b);
+ }
+
+ b = strdup(basename(path));
+ if (!b)
+ return -ENOMEM;
+
+ printf("%s%s%s%s%s %s%s%s\n",
+ prefix, glyph,
+ delegate ? ansi_underline() : "",
+ cg_unescape(b),
+ delegate ? ansi_normal() : "",
+ delegate ? ansi_highlight() : "",
+ delegate ? special_glyph(SPECIAL_GLYPH_ELLIPSIS) : "",
+ delegate ? ansi_normal() : "");
+ return 0;
+}
+
+int show_cgroup_by_path(
+ const char *path,
+ const char *prefix,
+ size_t n_columns,
+ OutputFlags flags) {
+
+ _cleanup_free_ char *fn = NULL, *p1 = NULL, *last = NULL, *p2 = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ bool shown_pids = false;
+ char *gn = NULL;
+ int r;
+
+ assert(path);
+
+ if (n_columns <= 0)
+ n_columns = columns();
+
+ prefix = strempty(prefix);
+
+ r = cg_mangle_path(path, &fn);
+ if (r < 0)
+ return r;
+
+ d = opendir(fn);
+ if (!d)
+ return -errno;
+
+ while ((r = cg_read_subgroup(d, &gn)) > 0) {
+ _cleanup_free_ char *k = NULL;
+
+ k = path_join(fn, gn);
+ free(gn);
+ if (!k)
+ return -ENOMEM;
+
+ if (!(flags & OUTPUT_SHOW_ALL) && cg_is_empty_recursive(NULL, k) > 0)
+ continue;
+
+ if (!shown_pids) {
+ show_cgroup_one_by_path(path, prefix, n_columns, true, flags);
+ shown_pids = true;
+ }
+
+ if (last) {
+ r = show_cgroup_name(last, prefix, special_glyph(SPECIAL_GLYPH_TREE_BRANCH));
+ if (r < 0)
+ return r;
+
+ if (!p1) {
+ p1 = strjoin(prefix, special_glyph(SPECIAL_GLYPH_TREE_VERTICAL));
+ if (!p1)
+ return -ENOMEM;
+ }
+
+ show_cgroup_by_path(last, p1, n_columns-2, flags);
+ free(last);
+ }
+
+ last = TAKE_PTR(k);
+ }
+
+ if (r < 0)
+ return r;
+
+ if (!shown_pids)
+ show_cgroup_one_by_path(path, prefix, n_columns, !!last, flags);
+
+ if (last) {
+ r = show_cgroup_name(last, prefix, special_glyph(SPECIAL_GLYPH_TREE_RIGHT));
+ if (r < 0)
+ return r;
+
+ if (!p2) {
+ p2 = strjoin(prefix, " ");
+ if (!p2)
+ return -ENOMEM;
+ }
+
+ show_cgroup_by_path(last, p2, n_columns-2, flags);
+ }
+
+ return 0;
+}
+
+int show_cgroup(const char *controller,
+ const char *path,
+ const char *prefix,
+ size_t n_columns,
+ OutputFlags flags) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(path);
+
+ r = cg_get_path(controller, path, NULL, &p);
+ if (r < 0)
+ return r;
+
+ return show_cgroup_by_path(p, prefix, n_columns, flags);
+}
+
+static int show_extra_pids(
+ const char *controller,
+ const char *path,
+ const char *prefix,
+ size_t n_columns,
+ const pid_t pids[],
+ unsigned n_pids,
+ OutputFlags flags) {
+
+ _cleanup_free_ pid_t *copy = NULL;
+ unsigned i, j;
+ int r;
+
+ assert(path);
+
+ if (n_pids <= 0)
+ return 0;
+
+ if (n_columns <= 0)
+ n_columns = columns();
+
+ prefix = strempty(prefix);
+
+ copy = new(pid_t, n_pids);
+ if (!copy)
+ return -ENOMEM;
+
+ for (i = 0, j = 0; i < n_pids; i++) {
+ _cleanup_free_ char *k = NULL;
+
+ r = cg_pid_get_path(controller, pids[i], &k);
+ if (r < 0)
+ return r;
+
+ if (path_startswith(k, path))
+ continue;
+
+ copy[j++] = pids[i];
+ }
+
+ show_pid_array(copy, j, prefix, n_columns, true, false, flags);
+
+ return 0;
+}
+
+int show_cgroup_and_extra(
+ const char *controller,
+ const char *path,
+ const char *prefix,
+ size_t n_columns,
+ const pid_t extra_pids[],
+ unsigned n_extra_pids,
+ OutputFlags flags) {
+
+ int r;
+
+ assert(path);
+
+ r = show_cgroup(controller, path, prefix, n_columns, flags);
+ if (r < 0)
+ return r;
+
+ return show_extra_pids(controller, path, prefix, n_columns, extra_pids, n_extra_pids, flags);
+}
+
+int show_cgroup_get_unit_path_and_warn(
+ sd_bus *bus,
+ const char *unit,
+ char **ret) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ path = unit_dbus_path_from_name(unit);
+ if (!path)
+ return log_oom();
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ unit_dbus_interface_from_name(unit),
+ "ControlGroup",
+ &error,
+ ret);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query unit control group path: %s",
+ bus_error_message(&error, r));
+
+ return 0;
+}
+
+int show_cgroup_get_path_and_warn(
+ const char *machine,
+ const char *prefix,
+ char **ret) {
+
+ int r;
+ _cleanup_free_ char *root = NULL;
+
+ if (machine) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *unit = NULL;
+ const char *m;
+
+ m = strjoina("/run/systemd/machines/", machine);
+ r = parse_env_file(NULL, m, "SCOPE", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to load machine data: %m");
+
+ r = bus_connect_transport_systemd(BUS_TRANSPORT_LOCAL, NULL, false, &bus);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ r = show_cgroup_get_unit_path_and_warn(bus, unit, &root);
+ if (r < 0)
+ return r;
+ } else {
+ r = cg_get_root_path(&root);
+ if (r == -ENOMEDIUM)
+ return log_error_errno(r, "Failed to get root control group path.\n"
+ "No cgroup filesystem mounted on /sys/fs/cgroup");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to get root control group path: %m");
+ }
+
+ if (prefix) {
+ char *t;
+
+ t = strjoin(root, prefix);
+ if (!t)
+ return log_oom();
+
+ *ret = t;
+ } else
+ *ret = TAKE_PTR(root);
+
+ return 0;
+}
diff --git a/src/shared/cgroup-show.h b/src/shared/cgroup-show.h
new file mode 100644
index 0000000..fbbf766
--- /dev/null
+++ b/src/shared/cgroup-show.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-bus.h"
+
+#include "logs-show.h"
+#include "output-mode.h"
+
+int show_cgroup_by_path(const char *path, const char *prefix, size_t n_columns, OutputFlags flags);
+int show_cgroup(const char *controller, const char *path, const char *prefix, size_t n_columns, OutputFlags flags);
+
+int show_cgroup_and_extra(const char *controller, const char *path, const char *prefix, size_t n_columns, const pid_t extra_pids[], unsigned n_extra_pids, OutputFlags flags);
+
+int show_cgroup_get_unit_path_and_warn(
+ sd_bus *bus,
+ const char *unit,
+ char **ret);
+int show_cgroup_get_path_and_warn(
+ const char *machine,
+ const char *prefix,
+ char **ret);
diff --git a/src/shared/chown-recursive.c b/src/shared/chown-recursive.c
new file mode 100644
index 0000000..4563729
--- /dev/null
+++ b/src/shared/chown-recursive.c
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+
+#include "chown-recursive.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int chown_one(
+ int fd,
+ const struct stat *st,
+ uid_t uid,
+ gid_t gid,
+ mode_t mask) {
+
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
+ const char *n;
+ int r;
+
+ assert(fd >= 0);
+ assert(st);
+
+ /* We change ACLs through the /proc/self/fd/%i path, so that we have a stable reference that works
+ * with O_PATH. */
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+
+ /* Drop any ACL if there is one */
+ FOREACH_STRING(n, "system.posix_acl_access", "system.posix_acl_default")
+ if (removexattr(procfs_path, n) < 0)
+ if (!IN_SET(errno, ENODATA, EOPNOTSUPP, ENOSYS, ENOTTY))
+ return -errno;
+
+ r = fchmod_and_chown(fd, st->st_mode & mask, uid, gid);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int chown_recursive_internal(
+ int fd,
+ const struct stat *st,
+ uid_t uid,
+ gid_t gid,
+ mode_t mask) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ bool changed = false;
+ struct dirent *de;
+ int r;
+
+ assert(fd >= 0);
+ assert(st);
+
+ d = fdopendir(fd);
+ if (!d) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ _cleanup_close_ int path_fd = -1;
+ struct stat fst;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ /* Let's pin the child inode we want to fix now with an O_PATH fd, so that it cannot be swapped out
+ * while we manipulate it. */
+ path_fd = openat(dirfd(d), de->d_name, O_PATH|O_CLOEXEC|O_NOFOLLOW);
+ if (path_fd < 0)
+ return -errno;
+
+ if (fstat(path_fd, &fst) < 0)
+ return -errno;
+
+ if (S_ISDIR(fst.st_mode)) {
+ int subdir_fd;
+
+ /* Convert it to a "real" (i.e. non-O_PATH) fd now */
+ subdir_fd = fd_reopen(path_fd, O_RDONLY|O_CLOEXEC|O_NOATIME);
+ if (subdir_fd < 0)
+ return subdir_fd;
+
+ r = chown_recursive_internal(subdir_fd, &fst, uid, gid, mask); /* takes possession of subdir_fd even on failure */
+ if (r < 0)
+ return r;
+ if (r > 0)
+ changed = true;
+ } else {
+ r = chown_one(path_fd, &fst, uid, gid, mask);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ changed = true;
+ }
+ }
+
+ r = chown_one(dirfd(d), st, uid, gid, mask);
+ if (r < 0)
+ return r;
+
+ return r > 0 || changed;
+}
+
+int path_chown_recursive(
+ const char *path,
+ uid_t uid,
+ gid_t gid,
+ mode_t mask) {
+
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+
+ fd = open(path, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
+ if (fd < 0)
+ return -errno;
+
+ if (!uid_is_valid(uid) && !gid_is_valid(gid) && FLAGS_SET(mask, 07777))
+ return 0; /* nothing to do */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /* Let's take a shortcut: if the top-level directory is properly owned, we don't descend into the
+ * whole tree, under the assumption that all is OK anyway. */
+ if ((!uid_is_valid(uid) || st.st_uid == uid) &&
+ (!gid_is_valid(gid) || st.st_gid == gid) &&
+ ((st.st_mode & ~mask & 07777) == 0))
+ return 0;
+
+ return chown_recursive_internal(TAKE_FD(fd), &st, uid, gid, mask); /* we donate the fd to the call, regardless if it succeeded or failed */
+}
+
+int fd_chown_recursive(
+ int fd,
+ uid_t uid,
+ gid_t gid,
+ mode_t mask) {
+
+ int duplicated_fd = -1;
+ struct stat st;
+
+ /* Note that the slightly different order of fstat() and the checks here and in
+ * path_chown_recursive(). That's because when we open the directory ourselves we can specify
+ * O_DIRECTORY and we always want to ensure we are operating on a directory before deciding whether
+ * the operation is otherwise redundant. */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode))
+ return -ENOTDIR;
+
+ if (!uid_is_valid(uid) && !gid_is_valid(gid) && FLAGS_SET(mask, 07777))
+ return 0; /* nothing to do */
+
+ /* Shortcut, as above */
+ if ((!uid_is_valid(uid) || st.st_uid == uid) &&
+ (!gid_is_valid(gid) || st.st_gid == gid) &&
+ ((st.st_mode & ~mask & 07777) == 0))
+ return 0;
+
+ /* Let's duplicate the fd here, as opendir() wants to take possession of it and close it afterwards */
+ duplicated_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (duplicated_fd < 0)
+ return -errno;
+
+ return chown_recursive_internal(duplicated_fd, &st, uid, gid, mask); /* fd donated even on failure */
+}
diff --git a/src/shared/chown-recursive.h b/src/shared/chown-recursive.h
new file mode 100644
index 0000000..00038c3
--- /dev/null
+++ b/src/shared/chown-recursive.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+int path_chown_recursive(const char *path, uid_t uid, gid_t gid, mode_t mask);
+
+int fd_chown_recursive(int fd, uid_t uid, gid_t gid, mode_t mask);
diff --git a/src/shared/clean-ipc.c b/src/shared/clean-ipc.c
new file mode 100644
index 0000000..77fe227
--- /dev/null
+++ b/src/shared/clean-ipc.c
@@ -0,0 +1,454 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <mqueue.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/ipc.h>
+#include <sys/msg.h>
+#include <sys/sem.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "clean-ipc.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "log.h"
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static bool match_uid_gid(uid_t subject_uid, gid_t subject_gid, uid_t delete_uid, gid_t delete_gid) {
+
+ if (uid_is_valid(delete_uid) && subject_uid == delete_uid)
+ return true;
+
+ if (gid_is_valid(delete_gid) && subject_gid == delete_gid)
+ return true;
+
+ return false;
+}
+
+static int clean_sysvipc_shm(uid_t delete_uid, gid_t delete_gid, bool rm) {
+ _cleanup_fclose_ FILE *f = NULL;
+ bool first = true;
+ int ret = 0, r;
+
+ f = fopen("/proc/sysvipc/shm", "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /proc/sysvipc/shm: %m");
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ unsigned n_attached;
+ pid_t cpid, lpid;
+ uid_t uid, cuid;
+ gid_t gid, cgid;
+ int shmid;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to read /proc/sysvipc/shm: %m");
+ if (r == 0)
+ break;
+
+ if (first) {
+ first = false;
+ continue;
+ }
+
+ if (sscanf(line, "%*i %i %*o %*u " PID_FMT " " PID_FMT " %u " UID_FMT " " GID_FMT " " UID_FMT " " GID_FMT,
+ &shmid, &cpid, &lpid, &n_attached, &uid, &gid, &cuid, &cgid) != 8)
+ continue;
+
+ if (n_attached > 0)
+ continue;
+
+ if (!match_uid_gid(uid, gid, delete_uid, delete_gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (shmctl(shmid, IPC_RMID, NULL) < 0) {
+
+ /* Ignore entries that are already deleted */
+ if (IN_SET(errno, EIDRM, EINVAL))
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to remove SysV shared memory segment %i: %m",
+ shmid);
+ } else {
+ log_debug("Removed SysV shared memory segment %i.", shmid);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+
+ return ret;
+}
+
+static int clean_sysvipc_sem(uid_t delete_uid, gid_t delete_gid, bool rm) {
+ _cleanup_fclose_ FILE *f = NULL;
+ bool first = true;
+ int ret = 0, r;
+
+ f = fopen("/proc/sysvipc/sem", "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /proc/sysvipc/sem: %m");
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ uid_t uid, cuid;
+ gid_t gid, cgid;
+ int semid;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to read /proc/sysvipc/sem: %m");
+ if (r == 0)
+ break;
+
+ if (first) {
+ first = false;
+ continue;
+ }
+
+ if (sscanf(line, "%*i %i %*o %*u " UID_FMT " " GID_FMT " " UID_FMT " " GID_FMT,
+ &semid, &uid, &gid, &cuid, &cgid) != 5)
+ continue;
+
+ if (!match_uid_gid(uid, gid, delete_uid, delete_gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (semctl(semid, 0, IPC_RMID) < 0) {
+
+ /* Ignore entries that are already deleted */
+ if (IN_SET(errno, EIDRM, EINVAL))
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to remove SysV semaphores object %i: %m",
+ semid);
+ } else {
+ log_debug("Removed SysV semaphore %i.", semid);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+
+ return ret;
+}
+
+static int clean_sysvipc_msg(uid_t delete_uid, gid_t delete_gid, bool rm) {
+ _cleanup_fclose_ FILE *f = NULL;
+ bool first = true;
+ int ret = 0, r;
+
+ f = fopen("/proc/sysvipc/msg", "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /proc/sysvipc/msg: %m");
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ uid_t uid, cuid;
+ gid_t gid, cgid;
+ pid_t cpid, lpid;
+ int msgid;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to read /proc/sysvipc/msg: %m");
+ if (r == 0)
+ break;
+
+ if (first) {
+ first = false;
+ continue;
+ }
+
+ if (sscanf(line, "%*i %i %*o %*u %*u " PID_FMT " " PID_FMT " " UID_FMT " " GID_FMT " " UID_FMT " " GID_FMT,
+ &msgid, &cpid, &lpid, &uid, &gid, &cuid, &cgid) != 7)
+ continue;
+
+ if (!match_uid_gid(uid, gid, delete_uid, delete_gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (msgctl(msgid, IPC_RMID, NULL) < 0) {
+
+ /* Ignore entries that are already deleted */
+ if (IN_SET(errno, EIDRM, EINVAL))
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to remove SysV message queue %i: %m",
+ msgid);
+ } else {
+ log_debug("Removed SysV message queue %i.", msgid);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+
+ return ret;
+}
+
+static int clean_posix_shm_internal(const char *dirname, DIR *dir, uid_t uid, gid_t gid, bool rm) {
+ struct dirent *de;
+ int ret = 0, r;
+
+ assert(dir);
+
+ FOREACH_DIRENT_ALL(de, dir, goto fail) {
+ struct stat st;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (fstatat(dirfd(dir), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno, "Failed to stat() POSIX shared memory segment %s/%s: %m",
+ dirname, de->d_name);
+ continue;
+ }
+
+ if (S_ISDIR(st.st_mode)) {
+ _cleanup_closedir_ DIR *kid;
+
+ kid = xopendirat(dirfd(dir), de->d_name, O_NOFOLLOW|O_NOATIME);
+ if (!kid) {
+ if (errno != ENOENT)
+ ret = log_warning_errno(errno, "Failed to enter shared memory directory %s/%s: %m",
+ dirname, de->d_name);
+ } else {
+ r = clean_posix_shm_internal(de->d_name, kid, uid, gid, rm);
+ if (r < 0)
+ ret = r;
+ }
+
+ if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (unlinkat(dirfd(dir), de->d_name, AT_REMOVEDIR) < 0) {
+
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno, "Failed to remove POSIX shared memory directory %s/%s: %m",
+ dirname, de->d_name);
+ } else {
+ log_debug("Removed POSIX shared memory directory %s", de->d_name);
+ if (ret == 0)
+ ret = 1;
+ }
+ } else {
+
+ if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (unlinkat(dirfd(dir), de->d_name, 0) < 0) {
+
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno, "Failed to remove POSIX shared memory segment %s: %m", de->d_name);
+ } else {
+ log_debug("Removed POSIX shared memory segment %s", de->d_name);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+ }
+
+ return ret;
+
+fail:
+ return log_warning_errno(errno, "Failed to read /dev/shm: %m");
+}
+
+static int clean_posix_shm(uid_t uid, gid_t gid, bool rm) {
+ _cleanup_closedir_ DIR *dir = NULL;
+
+ dir = opendir("/dev/shm");
+ if (!dir) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /dev/shm: %m");
+ }
+
+ return clean_posix_shm_internal("/dev/shm", dir, uid, gid, rm);
+}
+
+static int clean_posix_mq(uid_t uid, gid_t gid, bool rm) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *de;
+ int ret = 0;
+
+ dir = opendir("/dev/mqueue");
+ if (!dir) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /dev/mqueue: %m");
+ }
+
+ FOREACH_DIRENT_ALL(de, dir, goto fail) {
+ struct stat st;
+ char fn[1+strlen(de->d_name)+1];
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (fstatat(dirfd(dir), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to stat() MQ segment %s: %m",
+ de->d_name);
+ continue;
+ }
+
+ if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ fn[0] = '/';
+ strcpy(fn+1, de->d_name);
+
+ if (mq_unlink(fn) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to unlink POSIX message queue %s: %m",
+ fn);
+ } else {
+ log_debug("Removed POSIX message queue %s", fn);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+
+ return ret;
+
+fail:
+ return log_warning_errno(errno, "Failed to read /dev/mqueue: %m");
+}
+
+int clean_ipc_internal(uid_t uid, gid_t gid, bool rm) {
+ int ret = 0, r;
+
+ /* If 'rm' is true, clean all IPC objects owned by either the specified UID or the specified GID. Return the
+ * last error encountered or == 0 if no matching IPC objects have been found or > 0 if matching IPC objects
+ * have been found and have been removed.
+ *
+ * If 'rm' is false, just search for IPC objects owned by either the specified UID or the specified GID. In
+ * this case we return < 0 on error, > 0 if we found a matching object, == 0 if we didn't.
+ *
+ * As special rule: if UID/GID is specified as root we'll silently not clean up things, and always claim that
+ * there are IPC objects for it. */
+
+ if (uid == 0) {
+ if (!rm)
+ return 1;
+
+ uid = UID_INVALID;
+ }
+ if (gid == 0) {
+ if (!rm)
+ return 1;
+
+ gid = GID_INVALID;
+ }
+
+ /* Anything to do? */
+ if (!uid_is_valid(uid) && !gid_is_valid(gid))
+ return 0;
+
+ r = clean_sysvipc_shm(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ r = clean_sysvipc_sem(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ r = clean_sysvipc_msg(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ r = clean_posix_shm(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ r = clean_posix_mq(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ return ret;
+}
+
+int clean_ipc_by_uid(uid_t uid) {
+ return clean_ipc_internal(uid, GID_INVALID, true);
+}
+
+int clean_ipc_by_gid(gid_t gid) {
+ return clean_ipc_internal(UID_INVALID, gid, true);
+}
diff --git a/src/shared/clean-ipc.h b/src/shared/clean-ipc.h
new file mode 100644
index 0000000..ed348fb
--- /dev/null
+++ b/src/shared/clean-ipc.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+#include "user-util.h"
+
+int clean_ipc_internal(uid_t uid, gid_t gid, bool rm);
+
+/* Remove all IPC objects owned by the specified UID or GID */
+int clean_ipc_by_uid(uid_t uid);
+int clean_ipc_by_gid(gid_t gid);
+
+/* Check if any IPC object owned by the specified UID or GID exists, returns > 0 if so, == 0 if not */
+static inline int search_ipc(uid_t uid, gid_t gid) {
+ return clean_ipc_internal(uid, gid, false);
+}
diff --git a/src/shared/clock-util.c b/src/shared/clock-util.c
new file mode 100644
index 0000000..2caa70f
--- /dev/null
+++ b/src/shared/clock-util.c
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <time.h>
+#include <linux/rtc.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include "alloc-util.h"
+#include "clock-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "string-util.h"
+
+int clock_get_hwclock(struct tm *tm) {
+ _cleanup_close_ int fd = -1;
+
+ assert(tm);
+
+ fd = open("/dev/rtc", O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ /* This leaves the timezone fields of struct tm
+ * uninitialized! */
+ if (ioctl(fd, RTC_RD_TIME, tm) < 0)
+ return -errno;
+
+ /* We don't know daylight saving, so we reset this in order not
+ * to confuse mktime(). */
+ tm->tm_isdst = -1;
+
+ return 0;
+}
+
+int clock_set_hwclock(const struct tm *tm) {
+ _cleanup_close_ int fd = -1;
+
+ assert(tm);
+
+ fd = open("/dev/rtc", O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (ioctl(fd, RTC_SET_TIME, tm) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int clock_is_localtime(const char* adjtime_path) {
+ _cleanup_fclose_ FILE *f;
+ int r;
+
+ if (!adjtime_path)
+ adjtime_path = "/etc/adjtime";
+
+ /*
+ * The third line of adjtime is "UTC" or "LOCAL" or nothing.
+ * # /etc/adjtime
+ * 0.0 0 0
+ * 0
+ * UTC
+ */
+ f = fopen(adjtime_path, "re");
+ if (f) {
+ _cleanup_free_ char *line = NULL;
+ unsigned i;
+
+ for (i = 0; i < 2; i++) { /* skip the first two lines */
+ r = read_line(f, LONG_LINE_MAX, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return false; /* less than three lines → default to UTC */
+ }
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return false; /* less than three lines → default to UTC */
+
+ return streq(line, "LOCAL");
+
+ } else if (errno != ENOENT)
+ return -errno;
+
+ /* adjtime not present → default to UTC */
+ return false;
+}
+
+int clock_set_timezone(int *min) {
+ const struct timeval *tv_null = NULL;
+ struct timespec ts;
+ struct tm tm;
+ int minutesdelta;
+ struct timezone tz;
+
+ assert_se(clock_gettime(CLOCK_REALTIME, &ts) == 0);
+ assert_se(localtime_r(&ts.tv_sec, &tm));
+ minutesdelta = tm.tm_gmtoff / 60;
+
+ tz.tz_minuteswest = -minutesdelta;
+ tz.tz_dsttime = 0; /* DST_NONE */
+
+ /*
+ * If the RTC does not run in UTC but in local time, the very first
+ * call to settimeofday() will set the kernel's timezone and will warp the
+ * system clock, so that it runs in UTC instead of the local time we
+ * have read from the RTC.
+ */
+ if (settimeofday(tv_null, &tz) < 0)
+ return negative_errno();
+
+ if (min)
+ *min = minutesdelta;
+ return 0;
+}
+
+int clock_reset_timewarp(void) {
+ const struct timeval *tv_null = NULL;
+ struct timezone tz;
+
+ tz.tz_minuteswest = 0;
+ tz.tz_dsttime = 0; /* DST_NONE */
+
+ /*
+ * The very first call to settimeofday() does time warp magic. Do a
+ * dummy call here, so the time warping is sealed and all later calls
+ * behave as expected.
+ */
+ if (settimeofday(tv_null, &tz) < 0)
+ return -errno;
+
+ return 0;
+}
+
+#define EPOCH_FILE "/usr/lib/clock-epoch"
+
+int clock_apply_epoch(void) {
+ struct stat st;
+ struct timespec ts;
+ usec_t epoch_usec;
+
+ if (stat(EPOCH_FILE, &st) < 0) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Cannot stat " EPOCH_FILE ": %m");
+
+ epoch_usec = (usec_t) TIME_EPOCH * USEC_PER_SEC;
+ } else
+ epoch_usec = timespec_load(&st.st_mtim);
+
+ if (now(CLOCK_REALTIME) >= epoch_usec)
+ return 0;
+
+ if (clock_settime(CLOCK_REALTIME, timespec_store(&ts, epoch_usec)) < 0)
+ return -errno;
+
+ return 1;
+}
diff --git a/src/shared/clock-util.h b/src/shared/clock-util.h
new file mode 100644
index 0000000..3f1ae7a
--- /dev/null
+++ b/src/shared/clock-util.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <time.h>
+
+int clock_is_localtime(const char* adjtime_path);
+int clock_set_timezone(int *min);
+int clock_reset_timewarp(void);
+int clock_get_hwclock(struct tm *tm);
+int clock_set_hwclock(const struct tm *tm);
+int clock_apply_epoch(void);
diff --git a/src/shared/condition.c b/src/shared/condition.c
new file mode 100644
index 0000000..b2ec690
--- /dev/null
+++ b/src/shared/condition.c
@@ -0,0 +1,973 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "apparmor-util.h"
+#include "architecture.h"
+#include "audit-util.h"
+#include "cap-list.h"
+#include "cgroup-util.h"
+#include "condition.h"
+#include "cpu-set-util.h"
+#include "efi-loader.h"
+#include "env-file.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "glob-util.h"
+#include "hostname-util.h"
+#include "ima-util.h"
+#include "limits-util.h"
+#include "list.h"
+#include "macro.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "smack-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "tomoyo-util.h"
+#include "user-record.h"
+#include "user-util.h"
+#include "util.h"
+#include "virt.h"
+
+Condition* condition_new(ConditionType type, const char *parameter, bool trigger, bool negate) {
+ Condition *c;
+
+ assert(type >= 0);
+ assert(type < _CONDITION_TYPE_MAX);
+ assert(parameter);
+
+ c = new(Condition, 1);
+ if (!c)
+ return NULL;
+
+ *c = (Condition) {
+ .type = type,
+ .trigger = trigger,
+ .negate = negate,
+ };
+
+ if (parameter) {
+ c->parameter = strdup(parameter);
+ if (!c->parameter)
+ return mfree(c);
+ }
+
+ return c;
+}
+
+Condition* condition_free(Condition *c) {
+ assert(c);
+
+ free(c->parameter);
+ return mfree(c);
+}
+
+Condition* condition_free_list_type(Condition *head, ConditionType type) {
+ Condition *c, *n;
+
+ LIST_FOREACH_SAFE(conditions, c, n, head)
+ if (type < 0 || c->type == type) {
+ LIST_REMOVE(conditions, head, c);
+ condition_free(c);
+ }
+
+ assert(type >= 0 || !head);
+ return head;
+}
+
+static int condition_test_kernel_command_line(Condition *c, char **env) {
+ _cleanup_free_ char *line = NULL;
+ const char *p;
+ bool equal;
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_KERNEL_COMMAND_LINE);
+
+ r = proc_cmdline(&line);
+ if (r < 0)
+ return r;
+
+ equal = strchr(c->parameter, '=');
+
+ for (p = line;;) {
+ _cleanup_free_ char *word = NULL;
+ bool found;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE|EXTRACT_RELAX);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (equal)
+ found = streq(word, c->parameter);
+ else {
+ const char *f;
+
+ f = startswith(word, c->parameter);
+ found = f && IN_SET(*f, 0, '=');
+ }
+
+ if (found)
+ return true;
+ }
+
+ return false;
+}
+
+typedef enum {
+ /* Listed in order of checking. Note that some comparators are prefixes of others, hence the longest
+ * should be listed first. */
+ ORDER_LOWER_OR_EQUAL,
+ ORDER_GREATER_OR_EQUAL,
+ ORDER_LOWER,
+ ORDER_GREATER,
+ ORDER_EQUAL,
+ ORDER_UNEQUAL,
+ _ORDER_MAX,
+ _ORDER_INVALID = -1
+} OrderOperator;
+
+static OrderOperator parse_order(const char **s) {
+
+ static const char *const prefix[_ORDER_MAX] = {
+ [ORDER_LOWER_OR_EQUAL] = "<=",
+ [ORDER_GREATER_OR_EQUAL] = ">=",
+ [ORDER_LOWER] = "<",
+ [ORDER_GREATER] = ">",
+ [ORDER_EQUAL] = "=",
+ [ORDER_UNEQUAL] = "!=",
+ };
+
+ OrderOperator i;
+
+ for (i = 0; i < _ORDER_MAX; i++) {
+ const char *e;
+
+ e = startswith(*s, prefix[i]);
+ if (e) {
+ *s = e;
+ return i;
+ }
+ }
+
+ return _ORDER_INVALID;
+}
+
+static bool test_order(int k, OrderOperator p) {
+
+ switch (p) {
+
+ case ORDER_LOWER:
+ return k < 0;
+
+ case ORDER_LOWER_OR_EQUAL:
+ return k <= 0;
+
+ case ORDER_EQUAL:
+ return k == 0;
+
+ case ORDER_UNEQUAL:
+ return k != 0;
+
+ case ORDER_GREATER_OR_EQUAL:
+ return k >= 0;
+
+ case ORDER_GREATER:
+ return k > 0;
+
+ default:
+ assert_not_reached("unknown order");
+
+ }
+}
+
+static int condition_test_kernel_version(Condition *c, char **env) {
+ OrderOperator order;
+ struct utsname u;
+ const char *p;
+ bool first = true;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_KERNEL_VERSION);
+
+ assert_se(uname(&u) >= 0);
+
+ p = c->parameter;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ const char *s;
+ int r;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse condition string \"%s\": %m", p);
+ if (r == 0)
+ break;
+
+ s = strstrip(word);
+ order = parse_order(&s);
+ if (order >= 0) {
+ s += strspn(s, WHITESPACE);
+ if (isempty(s)) {
+ if (first) {
+ /* For backwards compatibility, allow whitespace between the operator and
+ * value, without quoting, but only in the first expression. */
+ word = mfree(word);
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse condition string \"%s\": %m", p);
+ if (r == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Unexpected end of expression: %s", p);
+ s = word;
+ } else
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Unexpected end of expression: %s", p);
+ }
+
+ r = test_order(str_verscmp(u.release, s), order);
+ } else
+ /* No prefix? Then treat as glob string */
+ r = fnmatch(s, u.release, 0) == 0;
+
+ if (r == 0)
+ return false;
+
+ first = false;
+ }
+
+ return true;
+}
+
+static int condition_test_memory(Condition *c, char **env) {
+ OrderOperator order;
+ uint64_t m, k;
+ const char *p;
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_MEMORY);
+
+ m = physical_memory();
+
+ p = c->parameter;
+ order = parse_order(&p);
+ if (order < 0)
+ order = ORDER_GREATER_OR_EQUAL; /* default to >= check, if nothing is specified. */
+
+ r = safe_atou64(p, &k);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse size: %m");
+
+ return test_order(CMP(m, k), order);
+}
+
+static int condition_test_cpus(Condition *c, char **env) {
+ OrderOperator order;
+ const char *p;
+ unsigned k;
+ int r, n;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_CPUS);
+
+ n = cpus_in_affinity_mask();
+ if (n < 0)
+ return log_debug_errno(n, "Failed to determine CPUs in affinity mask: %m");
+
+ p = c->parameter;
+ order = parse_order(&p);
+ if (order < 0)
+ order = ORDER_GREATER_OR_EQUAL; /* default to >= check, if nothing is specified. */
+
+ r = safe_atou(p, &k);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse number of CPUs: %m");
+
+ return test_order(CMP((unsigned) n, k), order);
+}
+
+static int condition_test_user(Condition *c, char **env) {
+ uid_t id;
+ int r;
+ _cleanup_free_ char *username = NULL;
+ const char *u;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_USER);
+
+ r = parse_uid(c->parameter, &id);
+ if (r >= 0)
+ return id == getuid() || id == geteuid();
+
+ if (streq("@system", c->parameter))
+ return uid_is_system(getuid()) || uid_is_system(geteuid());
+
+ username = getusername_malloc();
+ if (!username)
+ return -ENOMEM;
+
+ if (streq(username, c->parameter))
+ return 1;
+
+ if (getpid_cached() == 1)
+ return streq(c->parameter, "root");
+
+ u = c->parameter;
+ r = get_user_creds(&u, &id, NULL, NULL, NULL, USER_CREDS_ALLOW_MISSING);
+ if (r < 0)
+ return 0;
+
+ return id == getuid() || id == geteuid();
+}
+
+static int condition_test_control_group_controller(Condition *c, char **env) {
+ int r;
+ CGroupMask system_mask, wanted_mask = 0;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_CONTROL_GROUP_CONTROLLER);
+
+ r = cg_mask_supported(&system_mask);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to determine supported controllers: %m");
+
+ r = cg_mask_from_string(c->parameter, &wanted_mask);
+ if (r < 0 || wanted_mask <= 0) {
+ /* This won't catch the case that we have an unknown controller
+ * mixed in with valid ones -- these are only assessed on the
+ * validity of the valid controllers found. */
+ log_debug("Failed to parse cgroup string: %s", c->parameter);
+ return 1;
+ }
+
+ return FLAGS_SET(system_mask, wanted_mask);
+}
+
+static int condition_test_group(Condition *c, char **env) {
+ gid_t id;
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_GROUP);
+
+ r = parse_gid(c->parameter, &id);
+ if (r >= 0)
+ return in_gid(id);
+
+ /* Avoid any NSS lookups if we are PID1 */
+ if (getpid_cached() == 1)
+ return streq(c->parameter, "root");
+
+ return in_group(c->parameter) > 0;
+}
+
+static int condition_test_virtualization(Condition *c, char **env) {
+ int b, v;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_VIRTUALIZATION);
+
+ if (streq(c->parameter, "private-users"))
+ return running_in_userns();
+
+ v = detect_virtualization();
+ if (v < 0)
+ return v;
+
+ /* First, compare with yes/no */
+ b = parse_boolean(c->parameter);
+ if (b >= 0)
+ return b == !!v;
+
+ /* Then, compare categorization */
+ if (streq(c->parameter, "vm"))
+ return VIRTUALIZATION_IS_VM(v);
+
+ if (streq(c->parameter, "container"))
+ return VIRTUALIZATION_IS_CONTAINER(v);
+
+ /* Finally compare id */
+ return v != VIRTUALIZATION_NONE && streq(c->parameter, virtualization_to_string(v));
+}
+
+static int condition_test_architecture(Condition *c, char **env) {
+ int a, b;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_ARCHITECTURE);
+
+ a = uname_architecture();
+ if (a < 0)
+ return a;
+
+ if (streq(c->parameter, "native"))
+ b = native_architecture();
+ else {
+ b = architecture_from_string(c->parameter);
+ if (b < 0) /* unknown architecture? Then it's definitely not ours */
+ return false;
+ }
+
+ return a == b;
+}
+
+static int condition_test_host(Condition *c, char **env) {
+ _cleanup_free_ char *h = NULL;
+ sd_id128_t x, y;
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_HOST);
+
+ if (sd_id128_from_string(c->parameter, &x) >= 0) {
+
+ r = sd_id128_get_machine(&y);
+ if (r < 0)
+ return r;
+
+ return sd_id128_equal(x, y);
+ }
+
+ h = gethostname_malloc();
+ if (!h)
+ return -ENOMEM;
+
+ return fnmatch(c->parameter, h, FNM_CASEFOLD) == 0;
+}
+
+static int condition_test_ac_power(Condition *c, char **env) {
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_AC_POWER);
+
+ r = parse_boolean(c->parameter);
+ if (r < 0)
+ return r;
+
+ return (on_ac_power() != 0) == !!r;
+}
+
+static int condition_test_security(Condition *c, char **env) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_SECURITY);
+
+ if (streq(c->parameter, "selinux"))
+ return mac_selinux_use();
+ if (streq(c->parameter, "smack"))
+ return mac_smack_use();
+ if (streq(c->parameter, "apparmor"))
+ return mac_apparmor_use();
+ if (streq(c->parameter, "audit"))
+ return use_audit();
+ if (streq(c->parameter, "ima"))
+ return use_ima();
+ if (streq(c->parameter, "tomoyo"))
+ return mac_tomoyo_use();
+ if (streq(c->parameter, "uefi-secureboot"))
+ return is_efi_secure_boot();
+
+ return false;
+}
+
+static int condition_test_capability(Condition *c, char **env) {
+ unsigned long long capabilities = (unsigned long long) -1;
+ _cleanup_fclose_ FILE *f = NULL;
+ int value, r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_CAPABILITY);
+
+ /* If it's an invalid capability, we don't have it */
+ value = capability_from_name(c->parameter);
+ if (value < 0)
+ return -EINVAL;
+
+ /* If it's a valid capability we default to assume
+ * that we have it */
+
+ f = fopen("/proc/self/status", "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *p;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ p = startswith(line, "CapBnd:");
+ if (p) {
+ if (sscanf(line+7, "%llx", &capabilities) != 1)
+ return -EIO;
+
+ break;
+ }
+ }
+
+ return !!(capabilities & (1ULL << value));
+}
+
+static int condition_test_needs_update(Condition *c, char **env) {
+ struct stat usr, other;
+ const char *p;
+ bool b;
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_NEEDS_UPDATE);
+
+ r = proc_cmdline_get_bool("systemd.condition-needs-update", &b);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse systemd.condition-needs-update= kernel command line argument, ignoring: %m");
+ if (r > 0)
+ return b;
+
+ if (!path_is_absolute(c->parameter)) {
+ log_debug("Specified condition parameter '%s' is not absolute, assuming an update is needed.", c->parameter);
+ return true;
+ }
+
+ /* If the file system is read-only we shouldn't suggest an update */
+ r = path_is_read_only_fs(c->parameter);
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine if '%s' is read-only, ignoring: %m", c->parameter);
+ if (r > 0)
+ return false;
+
+ /* Any other failure means we should allow the condition to be true, so that we rather invoke too
+ * many update tools than too few. */
+
+ p = strjoina(c->parameter, "/.updated");
+ if (lstat(p, &other) < 0) {
+ if (errno != ENOENT)
+ log_debug_errno(errno, "Failed to stat() '%s', assuming an update is needed: %m", p);
+ return true;
+ }
+
+ if (lstat("/usr/", &usr) < 0) {
+ log_debug_errno(errno, "Failed to stat() /usr/, assuming an update is needed: %m");
+ return true;
+ }
+
+ /*
+ * First, compare seconds as they are always accurate...
+ */
+ if (usr.st_mtim.tv_sec != other.st_mtim.tv_sec)
+ return usr.st_mtim.tv_sec > other.st_mtim.tv_sec;
+
+ /*
+ * ...then compare nanoseconds.
+ *
+ * A false positive is only possible when /usr's nanoseconds > 0
+ * (otherwise /usr cannot be strictly newer than the target file)
+ * AND the target file's nanoseconds == 0
+ * (otherwise the filesystem supports nsec timestamps, see stat(2)).
+ */
+ if (usr.st_mtim.tv_nsec == 0 || other.st_mtim.tv_nsec > 0)
+ return usr.st_mtim.tv_nsec > other.st_mtim.tv_nsec;
+
+ _cleanup_free_ char *timestamp_str = NULL;
+ r = parse_env_file(NULL, p, "TIMESTAMP_NSEC", &timestamp_str);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse timestamp file '%s', using mtime: %m", p);
+ return true;
+ } else if (r == 0) {
+ log_debug("No data in timestamp file '%s', using mtime.", p);
+ return true;
+ }
+
+ uint64_t timestamp;
+ r = safe_atou64(timestamp_str, &timestamp);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse timestamp value '%s' in file '%s', using mtime: %m", timestamp_str, p);
+ return true;
+ }
+
+ return timespec_load_nsec(&usr.st_mtim) > timestamp;
+}
+
+static int condition_test_first_boot(Condition *c, char **env) {
+ int r, q;
+ bool b;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_FIRST_BOOT);
+
+ r = proc_cmdline_get_bool("systemd.condition-first-boot", &b);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse systemd.condition-first-boot= kernel command line argument, ignoring: %m");
+ if (r > 0)
+ return b == !!r;
+
+ r = parse_boolean(c->parameter);
+ if (r < 0)
+ return r;
+
+ q = access("/run/systemd/first-boot", F_OK);
+ if (q < 0 && errno != ENOENT)
+ log_debug_errno(errno, "Failed to check if /run/systemd/first-boot exists, ignoring: %m");
+
+ return (q >= 0) == !!r;
+}
+
+static int condition_test_environment(Condition *c, char **env) {
+ bool equal;
+ char **i;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_ENVIRONMENT);
+
+ equal = strchr(c->parameter, '=');
+
+ STRV_FOREACH(i, env) {
+ bool found;
+
+ if (equal)
+ found = streq(c->parameter, *i);
+ else {
+ const char *f;
+
+ f = startswith(*i, c->parameter);
+ found = f && IN_SET(*f, 0, '=');
+ }
+
+ if (found)
+ return true;
+ }
+
+ return false;
+}
+
+static int condition_test_path_exists(Condition *c, char **env) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_EXISTS);
+
+ return access(c->parameter, F_OK) >= 0;
+}
+
+static int condition_test_path_exists_glob(Condition *c, char **env) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_EXISTS_GLOB);
+
+ return glob_exists(c->parameter) > 0;
+}
+
+static int condition_test_path_is_directory(Condition *c, char **env) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_IS_DIRECTORY);
+
+ return is_dir(c->parameter, true) > 0;
+}
+
+static int condition_test_path_is_symbolic_link(Condition *c, char **env) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_IS_SYMBOLIC_LINK);
+
+ return is_symlink(c->parameter) > 0;
+}
+
+static int condition_test_path_is_mount_point(Condition *c, char **env) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_IS_MOUNT_POINT);
+
+ return path_is_mount_point(c->parameter, NULL, AT_SYMLINK_FOLLOW) > 0;
+}
+
+static int condition_test_path_is_read_write(Condition *c, char **env) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_IS_READ_WRITE);
+
+ return path_is_read_only_fs(c->parameter) <= 0;
+}
+
+static int condition_test_path_is_encrypted(Condition *c, char **env) {
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_IS_ENCRYPTED);
+
+ r = path_is_encrypted(c->parameter);
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to determine if '%s' is encrypted: %m", c->parameter);
+
+ return r > 0;
+}
+
+static int condition_test_directory_not_empty(Condition *c, char **env) {
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_DIRECTORY_NOT_EMPTY);
+
+ r = dir_is_empty(c->parameter);
+ return r <= 0 && r != -ENOENT;
+}
+
+static int condition_test_file_not_empty(Condition *c, char **env) {
+ struct stat st;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_FILE_NOT_EMPTY);
+
+ return (stat(c->parameter, &st) >= 0 &&
+ S_ISREG(st.st_mode) &&
+ st.st_size > 0);
+}
+
+static int condition_test_file_is_executable(Condition *c, char **env) {
+ struct stat st;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_FILE_IS_EXECUTABLE);
+
+ return (stat(c->parameter, &st) >= 0 &&
+ S_ISREG(st.st_mode) &&
+ (st.st_mode & 0111));
+}
+
+int condition_test(Condition *c, char **env) {
+
+ static int (*const condition_tests[_CONDITION_TYPE_MAX])(Condition *c, char **env) = {
+ [CONDITION_PATH_EXISTS] = condition_test_path_exists,
+ [CONDITION_PATH_EXISTS_GLOB] = condition_test_path_exists_glob,
+ [CONDITION_PATH_IS_DIRECTORY] = condition_test_path_is_directory,
+ [CONDITION_PATH_IS_SYMBOLIC_LINK] = condition_test_path_is_symbolic_link,
+ [CONDITION_PATH_IS_MOUNT_POINT] = condition_test_path_is_mount_point,
+ [CONDITION_PATH_IS_READ_WRITE] = condition_test_path_is_read_write,
+ [CONDITION_PATH_IS_ENCRYPTED] = condition_test_path_is_encrypted,
+ [CONDITION_DIRECTORY_NOT_EMPTY] = condition_test_directory_not_empty,
+ [CONDITION_FILE_NOT_EMPTY] = condition_test_file_not_empty,
+ [CONDITION_FILE_IS_EXECUTABLE] = condition_test_file_is_executable,
+ [CONDITION_KERNEL_COMMAND_LINE] = condition_test_kernel_command_line,
+ [CONDITION_KERNEL_VERSION] = condition_test_kernel_version,
+ [CONDITION_VIRTUALIZATION] = condition_test_virtualization,
+ [CONDITION_SECURITY] = condition_test_security,
+ [CONDITION_CAPABILITY] = condition_test_capability,
+ [CONDITION_HOST] = condition_test_host,
+ [CONDITION_AC_POWER] = condition_test_ac_power,
+ [CONDITION_ARCHITECTURE] = condition_test_architecture,
+ [CONDITION_NEEDS_UPDATE] = condition_test_needs_update,
+ [CONDITION_FIRST_BOOT] = condition_test_first_boot,
+ [CONDITION_USER] = condition_test_user,
+ [CONDITION_GROUP] = condition_test_group,
+ [CONDITION_CONTROL_GROUP_CONTROLLER] = condition_test_control_group_controller,
+ [CONDITION_CPUS] = condition_test_cpus,
+ [CONDITION_MEMORY] = condition_test_memory,
+ [CONDITION_ENVIRONMENT] = condition_test_environment,
+ };
+
+ int r, b;
+
+ assert(c);
+ assert(c->type >= 0);
+ assert(c->type < _CONDITION_TYPE_MAX);
+
+ r = condition_tests[c->type](c, env);
+ if (r < 0) {
+ c->result = CONDITION_ERROR;
+ return r;
+ }
+
+ b = (r > 0) == !c->negate;
+ c->result = b ? CONDITION_SUCCEEDED : CONDITION_FAILED;
+ return b;
+}
+
+bool condition_test_list(
+ Condition *first,
+ char **env,
+ condition_to_string_t to_string,
+ condition_test_logger_t logger,
+ void *userdata) {
+
+ Condition *c;
+ int triggered = -1;
+
+ assert(!!logger == !!to_string);
+
+ /* If the condition list is empty, then it is true */
+ if (!first)
+ return true;
+
+ /* Otherwise, if all of the non-trigger conditions apply and
+ * if any of the trigger conditions apply (unless there are
+ * none) we return true */
+ LIST_FOREACH(conditions, c, first) {
+ int r;
+
+ r = condition_test(c, env);
+
+ if (logger) {
+ if (r < 0)
+ logger(userdata, LOG_WARNING, r, PROJECT_FILE, __LINE__, __func__,
+ "Couldn't determine result for %s=%s%s%s, assuming failed: %m",
+ to_string(c->type),
+ c->trigger ? "|" : "",
+ c->negate ? "!" : "",
+ c->parameter);
+ else
+ logger(userdata, LOG_DEBUG, 0, PROJECT_FILE, __LINE__, __func__,
+ "%s=%s%s%s %s.",
+ to_string(c->type),
+ c->trigger ? "|" : "",
+ c->negate ? "!" : "",
+ c->parameter,
+ condition_result_to_string(c->result));
+ }
+
+ if (!c->trigger && r <= 0)
+ return false;
+
+ if (c->trigger && triggered <= 0)
+ triggered = r > 0;
+ }
+
+ return triggered != 0;
+}
+
+void condition_dump(Condition *c, FILE *f, const char *prefix, condition_to_string_t to_string) {
+ assert(c);
+ assert(f);
+ assert(to_string);
+
+ prefix = strempty(prefix);
+
+ fprintf(f,
+ "%s\t%s: %s%s%s %s\n",
+ prefix,
+ to_string(c->type),
+ c->trigger ? "|" : "",
+ c->negate ? "!" : "",
+ c->parameter,
+ condition_result_to_string(c->result));
+}
+
+void condition_dump_list(Condition *first, FILE *f, const char *prefix, condition_to_string_t to_string) {
+ Condition *c;
+
+ LIST_FOREACH(conditions, c, first)
+ condition_dump(c, f, prefix, to_string);
+}
+
+static const char* const condition_type_table[_CONDITION_TYPE_MAX] = {
+ [CONDITION_ARCHITECTURE] = "ConditionArchitecture",
+ [CONDITION_VIRTUALIZATION] = "ConditionVirtualization",
+ [CONDITION_HOST] = "ConditionHost",
+ [CONDITION_KERNEL_COMMAND_LINE] = "ConditionKernelCommandLine",
+ [CONDITION_KERNEL_VERSION] = "ConditionKernelVersion",
+ [CONDITION_SECURITY] = "ConditionSecurity",
+ [CONDITION_CAPABILITY] = "ConditionCapability",
+ [CONDITION_AC_POWER] = "ConditionACPower",
+ [CONDITION_NEEDS_UPDATE] = "ConditionNeedsUpdate",
+ [CONDITION_FIRST_BOOT] = "ConditionFirstBoot",
+ [CONDITION_PATH_EXISTS] = "ConditionPathExists",
+ [CONDITION_PATH_EXISTS_GLOB] = "ConditionPathExistsGlob",
+ [CONDITION_PATH_IS_DIRECTORY] = "ConditionPathIsDirectory",
+ [CONDITION_PATH_IS_SYMBOLIC_LINK] = "ConditionPathIsSymbolicLink",
+ [CONDITION_PATH_IS_MOUNT_POINT] = "ConditionPathIsMountPoint",
+ [CONDITION_PATH_IS_READ_WRITE] = "ConditionPathIsReadWrite",
+ [CONDITION_PATH_IS_ENCRYPTED] = "ConditionPathIsEncrypted",
+ [CONDITION_DIRECTORY_NOT_EMPTY] = "ConditionDirectoryNotEmpty",
+ [CONDITION_FILE_NOT_EMPTY] = "ConditionFileNotEmpty",
+ [CONDITION_FILE_IS_EXECUTABLE] = "ConditionFileIsExecutable",
+ [CONDITION_USER] = "ConditionUser",
+ [CONDITION_GROUP] = "ConditionGroup",
+ [CONDITION_CONTROL_GROUP_CONTROLLER] = "ConditionControlGroupController",
+ [CONDITION_CPUS] = "ConditionCPUs",
+ [CONDITION_MEMORY] = "ConditionMemory",
+ [CONDITION_ENVIRONMENT] = "ConditionEnvironment",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(condition_type, ConditionType);
+
+static const char* const assert_type_table[_CONDITION_TYPE_MAX] = {
+ [CONDITION_ARCHITECTURE] = "AssertArchitecture",
+ [CONDITION_VIRTUALIZATION] = "AssertVirtualization",
+ [CONDITION_HOST] = "AssertHost",
+ [CONDITION_KERNEL_COMMAND_LINE] = "AssertKernelCommandLine",
+ [CONDITION_KERNEL_VERSION] = "AssertKernelVersion",
+ [CONDITION_SECURITY] = "AssertSecurity",
+ [CONDITION_CAPABILITY] = "AssertCapability",
+ [CONDITION_AC_POWER] = "AssertACPower",
+ [CONDITION_NEEDS_UPDATE] = "AssertNeedsUpdate",
+ [CONDITION_FIRST_BOOT] = "AssertFirstBoot",
+ [CONDITION_PATH_EXISTS] = "AssertPathExists",
+ [CONDITION_PATH_EXISTS_GLOB] = "AssertPathExistsGlob",
+ [CONDITION_PATH_IS_DIRECTORY] = "AssertPathIsDirectory",
+ [CONDITION_PATH_IS_SYMBOLIC_LINK] = "AssertPathIsSymbolicLink",
+ [CONDITION_PATH_IS_MOUNT_POINT] = "AssertPathIsMountPoint",
+ [CONDITION_PATH_IS_READ_WRITE] = "AssertPathIsReadWrite",
+ [CONDITION_PATH_IS_ENCRYPTED] = "AssertPathIsEncrypted",
+ [CONDITION_DIRECTORY_NOT_EMPTY] = "AssertDirectoryNotEmpty",
+ [CONDITION_FILE_NOT_EMPTY] = "AssertFileNotEmpty",
+ [CONDITION_FILE_IS_EXECUTABLE] = "AssertFileIsExecutable",
+ [CONDITION_USER] = "AssertUser",
+ [CONDITION_GROUP] = "AssertGroup",
+ [CONDITION_CONTROL_GROUP_CONTROLLER] = "AssertControlGroupController",
+ [CONDITION_CPUS] = "AssertCPUs",
+ [CONDITION_MEMORY] = "AssertMemory",
+ [CONDITION_ENVIRONMENT] = "AssertEnvironment",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(assert_type, ConditionType);
+
+static const char* const condition_result_table[_CONDITION_RESULT_MAX] = {
+ [CONDITION_UNTESTED] = "untested",
+ [CONDITION_SUCCEEDED] = "succeeded",
+ [CONDITION_FAILED] = "failed",
+ [CONDITION_ERROR] = "error",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(condition_result, ConditionResult);
diff --git a/src/shared/condition.h b/src/shared/condition.h
new file mode 100644
index 0000000..0d9754e
--- /dev/null
+++ b/src/shared/condition.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "list.h"
+#include "macro.h"
+
+typedef enum ConditionType {
+ CONDITION_ARCHITECTURE,
+ CONDITION_VIRTUALIZATION,
+ CONDITION_HOST,
+ CONDITION_KERNEL_COMMAND_LINE,
+ CONDITION_KERNEL_VERSION,
+ CONDITION_SECURITY,
+ CONDITION_CAPABILITY,
+ CONDITION_AC_POWER,
+ CONDITION_MEMORY,
+ CONDITION_CPUS,
+ CONDITION_ENVIRONMENT,
+
+ CONDITION_NEEDS_UPDATE,
+ CONDITION_FIRST_BOOT,
+
+ CONDITION_PATH_EXISTS,
+ CONDITION_PATH_EXISTS_GLOB,
+ CONDITION_PATH_IS_DIRECTORY,
+ CONDITION_PATH_IS_SYMBOLIC_LINK,
+ CONDITION_PATH_IS_MOUNT_POINT,
+ CONDITION_PATH_IS_READ_WRITE,
+ CONDITION_PATH_IS_ENCRYPTED,
+ CONDITION_DIRECTORY_NOT_EMPTY,
+ CONDITION_FILE_NOT_EMPTY,
+ CONDITION_FILE_IS_EXECUTABLE,
+
+ CONDITION_USER,
+ CONDITION_GROUP,
+
+ CONDITION_CONTROL_GROUP_CONTROLLER,
+
+ _CONDITION_TYPE_MAX,
+ _CONDITION_TYPE_INVALID = -1
+} ConditionType;
+
+typedef enum ConditionResult {
+ CONDITION_UNTESTED,
+ CONDITION_SUCCEEDED,
+ CONDITION_FAILED,
+ CONDITION_ERROR,
+ _CONDITION_RESULT_MAX,
+ _CONDITION_RESULT_INVALID = -1
+} ConditionResult;
+
+typedef struct Condition {
+ ConditionType type:8;
+
+ bool trigger:1;
+ bool negate:1;
+
+ ConditionResult result:6;
+
+ char *parameter;
+
+ LIST_FIELDS(struct Condition, conditions);
+} Condition;
+
+Condition* condition_new(ConditionType type, const char *parameter, bool trigger, bool negate);
+Condition* condition_free(Condition *c);
+Condition* condition_free_list_type(Condition *first, ConditionType type);
+static inline Condition* condition_free_list(Condition *first) {
+ return condition_free_list_type(first, _CONDITION_TYPE_INVALID);
+}
+
+int condition_test(Condition *c, char **env);
+
+typedef int (*condition_test_logger_t)(void *userdata, int level, int error, const char *file, int line, const char *func, const char *format, ...) _printf_(7, 8);
+typedef const char* (*condition_to_string_t)(ConditionType t) _const_;
+bool condition_test_list(Condition *first, char **env, condition_to_string_t to_string, condition_test_logger_t logger, void *userdata);
+
+void condition_dump(Condition *c, FILE *f, const char *prefix, condition_to_string_t to_string);
+void condition_dump_list(Condition *c, FILE *f, const char *prefix, condition_to_string_t to_string);
+
+const char* condition_type_to_string(ConditionType t) _const_;
+ConditionType condition_type_from_string(const char *s) _pure_;
+
+const char* assert_type_to_string(ConditionType t) _const_;
+ConditionType assert_type_from_string(const char *s) _pure_;
+
+const char* condition_result_to_string(ConditionResult r) _const_;
+ConditionResult condition_result_from_string(const char *s) _pure_;
+
+static inline bool condition_takes_path(ConditionType t) {
+ return IN_SET(t,
+ CONDITION_PATH_EXISTS,
+ CONDITION_PATH_EXISTS_GLOB,
+ CONDITION_PATH_IS_DIRECTORY,
+ CONDITION_PATH_IS_SYMBOLIC_LINK,
+ CONDITION_PATH_IS_MOUNT_POINT,
+ CONDITION_PATH_IS_READ_WRITE,
+ CONDITION_PATH_IS_ENCRYPTED,
+ CONDITION_DIRECTORY_NOT_EMPTY,
+ CONDITION_FILE_NOT_EMPTY,
+ CONDITION_FILE_IS_EXECUTABLE,
+ CONDITION_NEEDS_UPDATE);
+}
diff --git a/src/shared/conf-parser.c b/src/shared/conf-parser.c
new file mode 100644
index 0000000..35d301d
--- /dev/null
+++ b/src/shared/conf-parser.c
@@ -0,0 +1,1247 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "conf-parser.h"
+#include "def.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing_network.h"
+#include "nulstr-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "sd-id128.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "syslog-util.h"
+#include "time-util.h"
+#include "utf8.h"
+
+int config_item_table_lookup(
+ const void *table,
+ const char *section,
+ const char *lvalue,
+ ConfigParserCallback *func,
+ int *ltype,
+ void **data,
+ void *userdata) {
+
+ const ConfigTableItem *t;
+
+ assert(table);
+ assert(lvalue);
+ assert(func);
+ assert(ltype);
+ assert(data);
+
+ for (t = table; t->lvalue; t++) {
+
+ if (!streq(lvalue, t->lvalue))
+ continue;
+
+ if (!streq_ptr(section, t->section))
+ continue;
+
+ *func = t->parse;
+ *ltype = t->ltype;
+ *data = t->data;
+ return 1;
+ }
+
+ return 0;
+}
+
+int config_item_perf_lookup(
+ const void *table,
+ const char *section,
+ const char *lvalue,
+ ConfigParserCallback *func,
+ int *ltype,
+ void **data,
+ void *userdata) {
+
+ ConfigPerfItemLookup lookup = (ConfigPerfItemLookup) table;
+ const ConfigPerfItem *p;
+
+ assert(table);
+ assert(lvalue);
+ assert(func);
+ assert(ltype);
+ assert(data);
+
+ if (section) {
+ const char *key;
+
+ key = strjoina(section, ".", lvalue);
+ p = lookup(key, strlen(key));
+ } else
+ p = lookup(lvalue, strlen(lvalue));
+ if (!p)
+ return 0;
+
+ *func = p->parse;
+ *ltype = p->ltype;
+ *data = (uint8_t*) userdata + p->offset;
+ return 1;
+}
+
+/* Run the user supplied parser for an assignment */
+static int next_assignment(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ ConfigItemLookup lookup,
+ const void *table,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ const char *rvalue,
+ ConfigParseFlags flags,
+ void *userdata) {
+
+ ConfigParserCallback func = NULL;
+ int ltype = 0;
+ void *data = NULL;
+ int r;
+
+ assert(filename);
+ assert(line > 0);
+ assert(lookup);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = lookup(table, section, lvalue, &func, &ltype, &data, userdata);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (func)
+ return func(unit, filename, line, section, section_line,
+ lvalue, ltype, rvalue, data, userdata);
+
+ return 0;
+ }
+
+ /* Warn about unknown non-extension fields. */
+ if (!(flags & CONFIG_PARSE_RELAXED) && !startswith(lvalue, "X-"))
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Unknown key name '%s' in section '%s', ignoring.", lvalue, section);
+
+ return 0;
+}
+
+/* Parse a single logical line */
+static int parse_line(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ char **section,
+ unsigned *section_line,
+ bool *section_ignored,
+ char *l,
+ void *userdata) {
+
+ char *e;
+
+ assert(filename);
+ assert(line > 0);
+ assert(lookup);
+ assert(l);
+
+ l = strstrip(l);
+ if (!*l)
+ return 0;
+
+ if (*l == '\n')
+ return 0;
+
+ if (!utf8_is_valid(l))
+ return log_syntax_invalid_utf8(unit, LOG_WARNING, filename, line, l);
+
+ if (*l == '[') {
+ size_t k;
+ char *n;
+
+ k = strlen(l);
+ assert(k > 0);
+
+ if (l[k-1] != ']')
+ return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EBADMSG), "Invalid section header '%s'", l);
+
+ n = strndup(l+1, k-2);
+ if (!n)
+ return log_oom();
+
+ if (sections && !nulstr_contains(sections, n)) {
+ bool ignore = flags & CONFIG_PARSE_RELAXED;
+ const char *t;
+
+ ignore = ignore || startswith(n, "X-");
+
+ if (!ignore)
+ NULSTR_FOREACH(t, sections)
+ if (streq_ptr(n, startswith(t, "-"))) {
+ ignore = true;
+ break;
+ }
+
+ if (!ignore)
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Unknown section '%s'. Ignoring.", n);
+
+ free(n);
+ *section = mfree(*section);
+ *section_line = 0;
+ *section_ignored = true;
+ } else {
+ free_and_replace(*section, n);
+ *section_line = line;
+ *section_ignored = false;
+ }
+
+ return 0;
+ }
+
+ if (sections && !*section) {
+ if (!(flags & CONFIG_PARSE_RELAXED) && !*section_ignored)
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Assignment outside of section. Ignoring.");
+
+ return 0;
+ }
+
+ e = strchr(l, '=');
+ if (!e)
+ return log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Missing '=', ignoring line.");
+ if (e == l)
+ return log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Missing key name before '=', ignoring line.");
+
+ *e = 0;
+ e++;
+
+ return next_assignment(unit,
+ filename,
+ line,
+ lookup,
+ table,
+ *section,
+ *section_line,
+ strstrip(l),
+ strstrip(e),
+ flags,
+ userdata);
+}
+
+/* Go through the file and parse each line */
+int config_parse(const char *unit,
+ const char *filename,
+ FILE *f,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata,
+ usec_t *ret_mtime) {
+
+ _cleanup_free_ char *section = NULL, *continuation = NULL;
+ _cleanup_fclose_ FILE *ours = NULL;
+ unsigned line = 0, section_line = 0;
+ bool section_ignored = false, bom_seen = false;
+ int r, fd;
+ usec_t mtime;
+
+ assert(filename);
+ assert(lookup);
+
+ if (!f) {
+ f = ours = fopen(filename, "re");
+ if (!f) {
+ /* Only log on request, except for ENOENT,
+ * since we return 0 to the caller. */
+ if ((flags & CONFIG_PARSE_WARN) || errno == ENOENT)
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to open configuration file '%s': %m", filename);
+ return errno == ENOENT ? 0 : -errno;
+ }
+ }
+
+ fd = fileno(f);
+ if (fd >= 0) { /* stream might not have an fd, let's be careful hence */
+ struct stat st;
+
+ if (fstat(fd, &st) < 0)
+ return log_full_errno(FLAGS_SET(flags, CONFIG_PARSE_WARN) ? LOG_ERR : LOG_DEBUG, errno,
+ "Failed to fstat(%s): %m", filename);
+
+ (void) stat_warn_permissions(filename, &st);
+ mtime = timespec_load(&st.st_mtim);
+ }
+
+ for (;;) {
+ _cleanup_free_ char *buf = NULL;
+ bool escaped = false;
+ char *l, *p, *e;
+
+ r = read_line(f, LONG_LINE_MAX, &buf);
+ if (r == 0)
+ break;
+ if (r == -ENOBUFS) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_error_errno(r, "%s:%u: Line too long", filename, line);
+
+ return r;
+ }
+ if (r < 0) {
+ if (FLAGS_SET(flags, CONFIG_PARSE_WARN))
+ log_error_errno(r, "%s:%u: Error while reading configuration file: %m", filename, line);
+
+ return r;
+ }
+
+ line++;
+
+ l = skip_leading_chars(buf, WHITESPACE);
+ if (*l != '\0' && strchr(COMMENTS, *l))
+ continue;
+
+ l = buf;
+ if (!bom_seen) {
+ char *q;
+
+ q = startswith(buf, UTF8_BYTE_ORDER_MARK);
+ if (q) {
+ l = q;
+ bom_seen = true;
+ }
+ }
+
+ if (continuation) {
+ if (strlen(continuation) + strlen(l) > LONG_LINE_MAX) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_error("%s:%u: Continuation line too long", filename, line);
+ return -ENOBUFS;
+ }
+
+ if (!strextend(&continuation, l, NULL)) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_oom();
+ return -ENOMEM;
+ }
+
+ p = continuation;
+ } else
+ p = l;
+
+ for (e = p; *e; e++) {
+ if (escaped)
+ escaped = false;
+ else if (*e == '\\')
+ escaped = true;
+ }
+
+ if (escaped) {
+ *(e-1) = ' ';
+
+ if (!continuation) {
+ continuation = strdup(l);
+ if (!continuation) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_oom();
+ return -ENOMEM;
+ }
+ }
+
+ continue;
+ }
+
+ r = parse_line(unit,
+ filename,
+ line,
+ sections,
+ lookup,
+ table,
+ flags,
+ &section,
+ &section_line,
+ &section_ignored,
+ p,
+ userdata);
+ if (r < 0) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_warning_errno(r, "%s:%u: Failed to parse file: %m", filename, line);
+ return r;
+ }
+
+ continuation = mfree(continuation);
+ }
+
+ if (continuation) {
+ r = parse_line(unit,
+ filename,
+ ++line,
+ sections,
+ lookup,
+ table,
+ flags,
+ &section,
+ &section_line,
+ &section_ignored,
+ continuation,
+ userdata);
+ if (r < 0) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_warning_errno(r, "%s:%u: Failed to parse file: %m", filename, line);
+ return r;
+ }
+ }
+
+ if (ret_mtime)
+ *ret_mtime = mtime;
+
+ return 0;
+}
+
+static int config_parse_many_files(
+ const char *conf_file,
+ char **files,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata,
+ usec_t *ret_mtime) {
+
+ usec_t mtime = 0;
+ char **fn;
+ int r;
+
+ if (conf_file) {
+ r = config_parse(NULL, conf_file, NULL, sections, lookup, table, flags, userdata, &mtime);
+ if (r < 0)
+ return r;
+ }
+
+ STRV_FOREACH(fn, files) {
+ usec_t t;
+
+ r = config_parse(NULL, *fn, NULL, sections, lookup, table, flags, userdata, &t);
+ if (r < 0)
+ return r;
+ if (t > mtime) /* Find the newest */
+ mtime = t;
+ }
+
+ if (ret_mtime)
+ *ret_mtime = mtime;
+
+ return 0;
+}
+
+/* Parse each config file in the directories specified as nulstr. */
+int config_parse_many_nulstr(
+ const char *conf_file,
+ const char *conf_file_dirs,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata,
+ usec_t *ret_mtime) {
+
+ _cleanup_strv_free_ char **files = NULL;
+ int r;
+
+ r = conf_files_list_nulstr(&files, ".conf", NULL, 0, conf_file_dirs);
+ if (r < 0)
+ return r;
+
+ return config_parse_many_files(conf_file, files, sections, lookup, table, flags, userdata, ret_mtime);
+}
+
+/* Parse each config file in the directories specified as strv. */
+int config_parse_many(
+ const char *conf_file,
+ const char* const* conf_file_dirs,
+ const char *dropin_dirname,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata,
+ usec_t *ret_mtime) {
+
+ _cleanup_strv_free_ char **dropin_dirs = NULL;
+ _cleanup_strv_free_ char **files = NULL;
+ const char *suffix;
+ int r;
+
+ suffix = strjoina("/", dropin_dirname);
+ r = strv_extend_strv_concat(&dropin_dirs, (char**) conf_file_dirs, suffix);
+ if (r < 0)
+ return r;
+
+ r = conf_files_list_strv(&files, ".conf", NULL, 0, (const char* const*) dropin_dirs);
+ if (r < 0)
+ return r;
+
+ return config_parse_many_files(conf_file, files, sections, lookup, table, flags, userdata, ret_mtime);
+}
+
+#define DEFINE_PARSER(type, vartype, conv_func) \
+ DEFINE_CONFIG_PARSE_PTR(config_parse_##type, conv_func, vartype, "Failed to parse " #type " value")
+
+DEFINE_PARSER(int, int, safe_atoi);
+DEFINE_PARSER(long, long, safe_atoli);
+DEFINE_PARSER(uint8, uint8_t, safe_atou8);
+DEFINE_PARSER(uint16, uint16_t, safe_atou16);
+DEFINE_PARSER(uint32, uint32_t, safe_atou32);
+DEFINE_PARSER(int32, int32_t, safe_atoi32);
+DEFINE_PARSER(uint64, uint64_t, safe_atou64);
+DEFINE_PARSER(unsigned, unsigned, safe_atou);
+DEFINE_PARSER(double, double, safe_atod);
+DEFINE_PARSER(nsec, nsec_t, parse_nsec);
+DEFINE_PARSER(sec, usec_t, parse_sec);
+DEFINE_PARSER(sec_def_infinity, usec_t, parse_sec_def_infinity);
+DEFINE_PARSER(mode, mode_t, parse_mode);
+
+int config_parse_iec_size(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ size_t *sz = data;
+ uint64_t v;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_size(rvalue, 1024, &v);
+ if (r >= 0 && (uint64_t) (size_t) v != v)
+ r = -ERANGE;
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse size value '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ *sz = (size_t) v;
+ return 0;
+}
+
+int config_parse_si_uint64(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t *sz = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_size(rvalue, 1000, sz);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse size value '%s', ignoring: %m", rvalue);
+
+ return 0;
+}
+
+int config_parse_iec_uint64(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t *bytes = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_size(rvalue, 1024, bytes);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse size value, ignoring: %s", rvalue);
+
+ return 0;
+}
+
+int config_parse_bool(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int k;
+ bool *b = data;
+ bool fatal = ltype;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ k = parse_boolean(rvalue);
+ if (k < 0) {
+ log_syntax(unit, fatal ? LOG_ERR : LOG_WARNING, filename, line, k,
+ "Failed to parse boolean value%s: %s",
+ fatal ? "" : ", ignoring", rvalue);
+ return fatal ? -ENOEXEC : 0;
+ }
+
+ *b = k;
+ return 0;
+}
+
+int config_parse_id128(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ sd_id128_t t, *result = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = sd_id128_from_string(rvalue, &t);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse 128bit ID/UUID, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (sd_id128_is_null(t)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "128bit ID/UUID is all 0, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *result = t;
+ return 0;
+}
+
+int config_parse_tristate(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int k, *t = data;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* A tristate is pretty much a boolean, except that it can
+ * also take the special value -1, indicating "uninitialized",
+ * much like NULL is for a pointer type. */
+
+ k = parse_boolean(rvalue);
+ if (k < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, k, "Failed to parse boolean value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *t = k;
+ return 0;
+}
+
+int config_parse_string(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char **s = data;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (free_and_strdup(s, empty_to_null(rvalue)) < 0)
+ return log_oom();
+
+ return 0;
+}
+
+int config_parse_path(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *n = NULL;
+ bool fatal = ltype;
+ char **s = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue))
+ goto finalize;
+
+ n = strdup(rvalue);
+ if (!n)
+ return log_oom();
+
+ r = path_simplify_and_warn(n, PATH_CHECK_ABSOLUTE | (fatal ? PATH_CHECK_FATAL : 0), unit, filename, line, lvalue);
+ if (r < 0)
+ return fatal ? -ENOEXEC : 0;
+
+finalize:
+ return free_and_replace(*s, n);
+}
+
+int config_parse_strv(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char ***sv = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *sv = strv_free(*sv);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE|EXTRACT_RETAIN_ESCAPE);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = strv_consume(sv, word);
+ if (r < 0)
+ return log_oom();
+ }
+}
+
+int config_parse_warn_compat(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Disabled reason = ltype;
+
+ switch(reason) {
+
+ case DISABLED_CONFIGURATION:
+ log_syntax(unit, LOG_DEBUG, filename, line, 0,
+ "Support for option %s= has been disabled at compile time and it is ignored", lvalue);
+ break;
+
+ case DISABLED_LEGACY:
+ log_syntax(unit, LOG_INFO, filename, line, 0,
+ "Support for option %s= has been removed and it is ignored", lvalue);
+ break;
+
+ case DISABLED_EXPERIMENTAL:
+ log_syntax(unit, LOG_INFO, filename, line, 0,
+ "Support for option %s= has not yet been enabled and it is ignored", lvalue);
+ break;
+ }
+
+ return 0;
+}
+
+int config_parse_log_facility(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int *o = data, x;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ x = log_facility_unshifted_from_string(rvalue);
+ if (x < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse log facility, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *o = (x << 3) | LOG_PRI(*o);
+
+ return 0;
+}
+
+int config_parse_log_level(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int *o = data, x;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ x = log_level_from_string(rvalue);
+ if (x < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse log level, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (*o < 0) /* if it wasn't initialized so far, assume zero facility */
+ *o = x;
+ else
+ *o = (*o & LOG_FACMASK) | x;
+
+ return 0;
+}
+
+int config_parse_signal(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int *sig = data, r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(sig);
+
+ r = signal_from_string(rvalue);
+ if (r <= 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse signal name, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *sig = r;
+ return 0;
+}
+
+int config_parse_personality(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ unsigned long *personality = data, p;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(personality);
+
+ if (isempty(rvalue))
+ p = PERSONALITY_INVALID;
+ else {
+ p = personality_from_string(rvalue);
+ if (p == PERSONALITY_INVALID) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse personality, ignoring: %s", rvalue);
+ return 0;
+ }
+ }
+
+ *personality = p;
+ return 0;
+}
+
+int config_parse_ifname(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char **s = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!ifname_valid(rvalue)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Interface name is not valid or too long, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ r = free_and_strdup(s, rvalue);
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+int config_parse_ifnames(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_strv_free_ char **names = NULL;
+ char ***s = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *s = strv_free(*s);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to extract interface name, ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+ if (r == 0)
+ break;
+
+ if (!ifname_valid_full(word, ltype)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Interface name is not valid or too long, ignoring assignment: %s",
+ word);
+ continue;
+ }
+
+ r = strv_consume(&names, TAKE_PTR(word));
+ if (r < 0)
+ return log_oom();
+ }
+
+ r = strv_extend_strv(s, names, true);
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+int config_parse_ip_port(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint16_t *s = data;
+ uint16_t port;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *s = 0;
+ return 0;
+ }
+
+ r = parse_ip_port(rvalue, &port);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse port '%s'.", rvalue);
+ return 0;
+ }
+
+ *s = port;
+
+ return 0;
+}
+
+int config_parse_mtu(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint32_t *mtu = data;
+ int r;
+
+ assert(rvalue);
+ assert(mtu);
+
+ r = parse_mtu(ltype, rvalue, mtu);
+ if (r == -ERANGE) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Maximum transfer unit (MTU) value out of range. Permitted range is %" PRIu32 "…%" PRIu32 ", ignoring: %s",
+ (uint32_t) (ltype == AF_INET6 ? IPV6_MIN_MTU : IPV4_MIN_MTU), (uint32_t) UINT32_MAX,
+ rvalue);
+ return 0;
+ }
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse MTU value '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_rlimit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ struct rlimit **rl = data, d = {};
+ int r;
+
+ assert(rvalue);
+ assert(rl);
+
+ r = rlimit_parse(ltype, rvalue, &d);
+ if (r == -EILSEQ) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Soft resource limit chosen higher than hard limit, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse resource value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (rl[ltype])
+ *rl[ltype] = d;
+ else {
+ rl[ltype] = newdup(struct rlimit, &d, 1);
+ if (!rl[ltype])
+ return log_oom();
+ }
+
+ return 0;
+}
+
+int config_parse_permille(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ unsigned *permille = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(permille);
+
+ r = parse_permille(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse permille value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *permille = (unsigned) r;
+
+ return 0;
+}
+
+int config_parse_vlanprotocol(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ int *vlan_protocol = data;
+ assert(filename);
+ assert(lvalue);
+
+ if (isempty(rvalue)) {
+ *vlan_protocol = -1;
+ return 0;
+ }
+
+ if (STR_IN_SET(rvalue, "802.1ad", "802.1AD"))
+ *vlan_protocol = ETH_P_8021AD;
+ else if (STR_IN_SET(rvalue, "802.1q", "802.1Q"))
+ *vlan_protocol = ETH_P_8021Q;
+ else {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to parse VLAN protocol value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+DEFINE_CONFIG_PARSE(config_parse_percent, parse_percent, "Failed to parse percent value");
diff --git a/src/shared/conf-parser.h b/src/shared/conf-parser.h
new file mode 100644
index 0000000..f115cb2
--- /dev/null
+++ b/src/shared/conf-parser.h
@@ -0,0 +1,303 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <syslog.h>
+
+#include "alloc-util.h"
+#include "log.h"
+#include "macro.h"
+#include "time-util.h"
+
+/* An abstract parser for simple, line based, shallow configuration files consisting of variable assignments only. */
+
+typedef enum ConfigParseFlags {
+ CONFIG_PARSE_RELAXED = 1 << 0, /* Do not warn about unknown non-extension fields */
+ CONFIG_PARSE_WARN = 1 << 1, /* Emit non-debug messages */
+} ConfigParseFlags;
+
+/* Argument list for parsers of specific configuration settings. */
+#define CONFIG_PARSER_ARGUMENTS \
+ const char *unit, \
+ const char *filename, \
+ unsigned line, \
+ const char *section, \
+ unsigned section_line, \
+ const char *lvalue, \
+ int ltype, \
+ const char *rvalue, \
+ void *data, \
+ void *userdata
+
+/* Prototype for a parser for a specific configuration setting */
+typedef int (*ConfigParserCallback)(CONFIG_PARSER_ARGUMENTS);
+
+/* A macro declaring a function prototype, following the typedef above, simply because it's so cumbersomely long
+ * otherwise. (And current emacs gets irritatingly slow when editing files that contain lots of very long function
+ * prototypes on the same screen…) */
+#define CONFIG_PARSER_PROTOTYPE(name) int name(CONFIG_PARSER_ARGUMENTS)
+
+/* Wraps information for parsing a specific configuration variable, to
+ * be stored in a simple array */
+typedef struct ConfigTableItem {
+ const char *section; /* Section */
+ const char *lvalue; /* Name of the variable */
+ ConfigParserCallback parse; /* Function that is called to parse the variable's value */
+ int ltype; /* Distinguish different variables passed to the same callback */
+ void *data; /* Where to store the variable's data */
+} ConfigTableItem;
+
+/* Wraps information for parsing a specific configuration variable, to
+ * be stored in a gperf perfect hashtable */
+typedef struct ConfigPerfItem {
+ const char *section_and_lvalue; /* Section + "." + name of the variable */
+ ConfigParserCallback parse; /* Function that is called to parse the variable's value */
+ int ltype; /* Distinguish different variables passed to the same callback */
+ size_t offset; /* Offset where to store data, from the beginning of userdata */
+} ConfigPerfItem;
+
+/* Prototype for a low-level gperf lookup function */
+typedef const ConfigPerfItem* (*ConfigPerfItemLookup)(const char *section_and_lvalue, unsigned length);
+
+/* Prototype for a generic high-level lookup function */
+typedef int (*ConfigItemLookup)(
+ const void *table,
+ const char *section,
+ const char *lvalue,
+ ConfigParserCallback *func,
+ int *ltype,
+ void **data,
+ void *userdata);
+
+/* Linear table search implementation of ConfigItemLookup, based on
+ * ConfigTableItem arrays */
+int config_item_table_lookup(const void *table, const char *section, const char *lvalue, ConfigParserCallback *func, int *ltype, void **data, void *userdata);
+
+/* gperf implementation of ConfigItemLookup, based on gperf
+ * ConfigPerfItem tables */
+int config_item_perf_lookup(const void *table, const char *section, const char *lvalue, ConfigParserCallback *func, int *ltype, void **data, void *userdata);
+
+int config_parse(
+ const char *unit,
+ const char *filename,
+ FILE *f,
+ const char *sections, /* nulstr */
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata,
+ usec_t *ret_mtime); /* possibly NULL */
+
+int config_parse_many_nulstr(
+ const char *conf_file, /* possibly NULL */
+ const char *conf_file_dirs, /* nulstr */
+ const char *sections, /* nulstr */
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata,
+ usec_t *ret_mtime); /* possibly NULL */
+
+int config_parse_many(
+ const char *conf_file, /* possibly NULL */
+ const char* const* conf_file_dirs,
+ const char *dropin_dirname,
+ const char *sections, /* nulstr */
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata,
+ usec_t *ret_mtime); /* possibly NULL */
+
+CONFIG_PARSER_PROTOTYPE(config_parse_int);
+CONFIG_PARSER_PROTOTYPE(config_parse_unsigned);
+CONFIG_PARSER_PROTOTYPE(config_parse_long);
+CONFIG_PARSER_PROTOTYPE(config_parse_uint8);
+CONFIG_PARSER_PROTOTYPE(config_parse_uint16);
+CONFIG_PARSER_PROTOTYPE(config_parse_uint32);
+CONFIG_PARSER_PROTOTYPE(config_parse_int32);
+CONFIG_PARSER_PROTOTYPE(config_parse_uint64);
+CONFIG_PARSER_PROTOTYPE(config_parse_double);
+CONFIG_PARSER_PROTOTYPE(config_parse_iec_size);
+CONFIG_PARSER_PROTOTYPE(config_parse_si_uint64);
+CONFIG_PARSER_PROTOTYPE(config_parse_iec_uint64);
+CONFIG_PARSER_PROTOTYPE(config_parse_bool);
+CONFIG_PARSER_PROTOTYPE(config_parse_id128);
+CONFIG_PARSER_PROTOTYPE(config_parse_tristate);
+CONFIG_PARSER_PROTOTYPE(config_parse_string);
+CONFIG_PARSER_PROTOTYPE(config_parse_path);
+CONFIG_PARSER_PROTOTYPE(config_parse_strv);
+CONFIG_PARSER_PROTOTYPE(config_parse_sec);
+CONFIG_PARSER_PROTOTYPE(config_parse_sec_def_infinity);
+CONFIG_PARSER_PROTOTYPE(config_parse_sec_def_unset);
+CONFIG_PARSER_PROTOTYPE(config_parse_nsec);
+CONFIG_PARSER_PROTOTYPE(config_parse_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_warn_compat);
+CONFIG_PARSER_PROTOTYPE(config_parse_log_facility);
+CONFIG_PARSER_PROTOTYPE(config_parse_log_level);
+CONFIG_PARSER_PROTOTYPE(config_parse_signal);
+CONFIG_PARSER_PROTOTYPE(config_parse_personality);
+CONFIG_PARSER_PROTOTYPE(config_parse_permille);
+CONFIG_PARSER_PROTOTYPE(config_parse_ifname);
+CONFIG_PARSER_PROTOTYPE(config_parse_ifnames);
+CONFIG_PARSER_PROTOTYPE(config_parse_ip_port);
+CONFIG_PARSER_PROTOTYPE(config_parse_mtu);
+CONFIG_PARSER_PROTOTYPE(config_parse_rlimit);
+CONFIG_PARSER_PROTOTYPE(config_parse_vlanprotocol);
+CONFIG_PARSER_PROTOTYPE(config_parse_percent);
+
+typedef enum Disabled {
+ DISABLED_CONFIGURATION,
+ DISABLED_LEGACY,
+ DISABLED_EXPERIMENTAL,
+} Disabled;
+
+#define DEFINE_CONFIG_PARSE(function, parser, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ int *i = data, r; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ r = parser(rvalue); \
+ if (r < 0) { \
+ log_syntax(unit, LOG_WARNING, filename, line, r, \
+ msg ", ignoring: %s", rvalue); \
+ return 0; \
+ } \
+ \
+ *i = r; \
+ return 0; \
+ }
+
+#define DEFINE_CONFIG_PARSE_PTR(function, parser, type, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ type *i = data; \
+ int r; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ r = parser(rvalue, i); \
+ if (r < 0) \
+ log_syntax(unit, LOG_WARNING, filename, line, r, \
+ msg ", ignoring: %s", rvalue); \
+ \
+ return 0; \
+ }
+
+#define DEFINE_CONFIG_PARSE_ENUM_FULL(function, from_string, type, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ type *i = data, x; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ x = from_string(rvalue); \
+ if (x < 0) { \
+ log_syntax(unit, LOG_WARNING, filename, line, 0, \
+ msg ", ignoring: %s", rvalue); \
+ return 0; \
+ } \
+ \
+ *i = x; \
+ return 0; \
+ }
+
+#define DEFINE_CONFIG_PARSE_ENUM(function, name, type, msg) \
+ DEFINE_CONFIG_PARSE_ENUM_FULL(function, name##_from_string, type, msg)
+
+#define DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(function, name, type, default_value, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ type *i = data, x; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ if (isempty(rvalue)) { \
+ *i = default_value; \
+ return 0; \
+ } \
+ \
+ x = name##_from_string(rvalue); \
+ if (x < 0) { \
+ log_syntax(unit, LOG_WARNING, filename, line, 0, \
+ msg ", ignoring: %s", rvalue); \
+ return 0; \
+ } \
+ \
+ *i = x; \
+ return 0; \
+ }
+
+#define DEFINE_CONFIG_PARSE_ENUMV(function, name, type, invalid, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ type **enums = data; \
+ _cleanup_free_ type *xs = NULL; \
+ size_t i = 0; \
+ int r; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ xs = new0(type, 1); \
+ if (!xs) \
+ return -ENOMEM; \
+ \
+ *xs = invalid; \
+ \
+ for (const char *p = rvalue;;) { \
+ _cleanup_free_ char *en = NULL; \
+ type x, *new_xs; \
+ \
+ r = extract_first_word(&p, &en, NULL, 0); \
+ if (r == -ENOMEM) \
+ return log_oom(); \
+ if (r < 0) \
+ return log_syntax(unit, LOG_ERR, filename, line, 0, \
+ msg ": %s", en); \
+ if (r == 0) \
+ break; \
+ \
+ if ((x = name##_from_string(en)) < 0) { \
+ log_syntax(unit, LOG_WARNING, filename, line, 0, \
+ msg ", ignoring: %s", en); \
+ continue; \
+ } \
+ \
+ for (type *ys = xs; x != invalid && *ys != invalid; ys++) \
+ if (*ys == x) { \
+ log_syntax(unit, LOG_NOTICE, filename, line, 0, \
+ "Duplicate entry, ignoring: %s", \
+ en); \
+ x = invalid; \
+ } \
+ \
+ if (x == invalid) \
+ continue; \
+ \
+ *(xs + i) = x; \
+ new_xs = realloc(xs, (++i + 1) * sizeof(type)); \
+ if (new_xs) \
+ xs = new_xs; \
+ else \
+ return log_oom(); \
+ \
+ *(xs + i) = invalid; \
+ } \
+ \
+ return free_and_replace(*enums, xs); \
+ }
diff --git a/src/shared/coredump-util.c b/src/shared/coredump-util.c
new file mode 100644
index 0000000..a0b648b
--- /dev/null
+++ b/src/shared/coredump-util.c
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "coredump-util.h"
+#include "extract-word.h"
+#include "fileio.h"
+#include "string-table.h"
+
+static const char *const coredump_filter_table[_COREDUMP_FILTER_MAX] = {
+ [COREDUMP_FILTER_PRIVATE_ANONYMOUS] = "private-anonymous",
+ [COREDUMP_FILTER_SHARED_ANONYMOUS] = "shared-anonymous",
+ [COREDUMP_FILTER_PRIVATE_FILE_BACKED] = "private-file-backed",
+ [COREDUMP_FILTER_SHARED_FILE_BACKED] = "shared-file-backed",
+ [COREDUMP_FILTER_ELF_HEADERS] = "elf-headers",
+ [COREDUMP_FILTER_PRIVATE_HUGE] = "private-huge",
+ [COREDUMP_FILTER_SHARED_HUGE] = "shared-huge",
+ [COREDUMP_FILTER_PRIVATE_DAX] = "private-dax",
+ [COREDUMP_FILTER_SHARED_DAX] = "shared-dax",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(coredump_filter, CoredumpFilter);
+
+int coredump_filter_mask_from_string(const char *s, uint64_t *ret) {
+ uint64_t m = 0;
+
+ assert(s);
+ assert(ret);
+
+ for (;;) {
+ _cleanup_free_ char *n = NULL;
+ CoredumpFilter v;
+ int r;
+
+ r = extract_first_word(&s, &n, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (streq(n, "default")) {
+ m |= COREDUMP_FILTER_MASK_DEFAULT;
+ continue;
+ }
+
+ if (streq(n, "all")) {
+ m = UINT64_MAX;
+ continue;
+ }
+
+ v = coredump_filter_from_string(n);
+ if (v >= 0) {
+ m |= 1u << v;
+ continue;
+ }
+
+ uint64_t x;
+ r = safe_atoux64(n, &x);
+ if (r < 0)
+ return r;
+
+ m |= x;
+ }
+
+ *ret = m;
+ return 0;
+}
+
+int set_coredump_filter(uint64_t value) {
+ char t[STRLEN("0xFFFFFFFF")];
+
+ sprintf(t, "0x%"PRIx64, value);
+
+ return write_string_file("/proc/self/coredump_filter", t,
+ WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_DISABLE_BUFFER);
+}
diff --git a/src/shared/coredump-util.h b/src/shared/coredump-util.h
new file mode 100644
index 0000000..a7f3c0e
--- /dev/null
+++ b/src/shared/coredump-util.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "macro.h"
+
+typedef enum CoredumpFilter {
+ COREDUMP_FILTER_PRIVATE_ANONYMOUS = 0,
+ COREDUMP_FILTER_SHARED_ANONYMOUS,
+ COREDUMP_FILTER_PRIVATE_FILE_BACKED,
+ COREDUMP_FILTER_SHARED_FILE_BACKED,
+ COREDUMP_FILTER_ELF_HEADERS,
+ COREDUMP_FILTER_PRIVATE_HUGE,
+ COREDUMP_FILTER_SHARED_HUGE,
+ COREDUMP_FILTER_PRIVATE_DAX,
+ COREDUMP_FILTER_SHARED_DAX,
+ _COREDUMP_FILTER_MAX,
+ _COREDUMP_FILTER_INVALID = -1,
+} CoredumpFilter;
+
+#define COREDUMP_FILTER_MASK_DEFAULT (1u << COREDUMP_FILTER_PRIVATE_ANONYMOUS | \
+ 1u << COREDUMP_FILTER_SHARED_ANONYMOUS | \
+ 1u << COREDUMP_FILTER_ELF_HEADERS | \
+ 1u << COREDUMP_FILTER_PRIVATE_HUGE)
+
+const char* coredump_filter_to_string(CoredumpFilter i) _const_;
+CoredumpFilter coredump_filter_from_string(const char *s) _pure_;
+int coredump_filter_mask_from_string(const char *s, uint64_t *ret);
+
+int set_coredump_filter(uint64_t value);
diff --git a/src/shared/cpu-set-util.c b/src/shared/cpu-set-util.c
new file mode 100644
index 0000000..2c3b5bb
--- /dev/null
+++ b/src/shared/cpu-set-util.c
@@ -0,0 +1,295 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <syslog.h>
+
+#include "alloc-util.h"
+#include "cpu-set-util.h"
+#include "dirent-util.h"
+#include "errno-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "parse-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+char* cpu_set_to_string(const CPUSet *a) {
+ _cleanup_free_ char *str = NULL;
+ size_t allocated = 0, len = 0;
+ int i, r;
+
+ for (i = 0; (size_t) i < a->allocated * 8; i++) {
+ if (!CPU_ISSET_S(i, a->allocated, a->set))
+ continue;
+
+ if (!GREEDY_REALLOC(str, allocated, len + 1 + DECIMAL_STR_MAX(int)))
+ return NULL;
+
+ r = sprintf(str + len, len > 0 ? " %d" : "%d", i);
+ assert_se(r > 0);
+ len += r;
+ }
+
+ return TAKE_PTR(str) ?: strdup("");
+}
+
+char *cpu_set_to_range_string(const CPUSet *set) {
+ unsigned range_start = 0, range_end;
+ _cleanup_free_ char *str = NULL;
+ size_t allocated = 0, len = 0;
+ bool in_range = false;
+ int r;
+
+ for (unsigned i = 0; i < set->allocated * 8; i++)
+ if (CPU_ISSET_S(i, set->allocated, set->set)) {
+ if (in_range)
+ range_end++;
+ else {
+ range_start = range_end = i;
+ in_range = true;
+ }
+ } else if (in_range) {
+ in_range = false;
+
+ if (!GREEDY_REALLOC(str, allocated, len + 2 + 2 * DECIMAL_STR_MAX(unsigned)))
+ return NULL;
+
+ if (range_end > range_start)
+ r = sprintf(str + len, len > 0 ? " %d-%d" : "%d-%d", range_start, range_end);
+ else
+ r = sprintf(str + len, len > 0 ? " %d" : "%d", range_start);
+ assert_se(r > 0);
+ len += r;
+ }
+
+ if (in_range) {
+ if (!GREEDY_REALLOC(str, allocated, len + 2 + 2 * DECIMAL_STR_MAX(int)))
+ return NULL;
+
+ if (range_end > range_start)
+ r = sprintf(str + len, len > 0 ? " %d-%d" : "%d-%d", range_start, range_end);
+ else
+ r = sprintf(str + len, len > 0 ? " %d" : "%d", range_start);
+ assert_se(r > 0);
+ }
+
+ return TAKE_PTR(str) ?: strdup("");
+}
+
+int cpu_set_realloc(CPUSet *cpu_set, unsigned ncpus) {
+ size_t need;
+
+ assert(cpu_set);
+
+ need = CPU_ALLOC_SIZE(ncpus);
+ if (need > cpu_set->allocated) {
+ cpu_set_t *t;
+
+ t = realloc(cpu_set->set, need);
+ if (!t)
+ return -ENOMEM;
+
+ memzero((uint8_t*) t + cpu_set->allocated, need - cpu_set->allocated);
+
+ cpu_set->set = t;
+ cpu_set->allocated = need;
+ }
+
+ return 0;
+}
+
+int cpu_set_add(CPUSet *cpu_set, unsigned cpu) {
+ int r;
+
+ if (cpu >= 8192)
+ /* As of kernel 5.1, CONFIG_NR_CPUS can be set to 8192 on PowerPC */
+ return -ERANGE;
+
+ r = cpu_set_realloc(cpu_set, cpu + 1);
+ if (r < 0)
+ return r;
+
+ CPU_SET_S(cpu, cpu_set->allocated, cpu_set->set);
+ return 0;
+}
+
+int cpu_set_add_all(CPUSet *a, const CPUSet *b) {
+ int r;
+
+ /* Do this backwards, so if we fail, we fail before changing anything. */
+ for (unsigned cpu_p1 = b->allocated * 8; cpu_p1 > 0; cpu_p1--)
+ if (CPU_ISSET_S(cpu_p1 - 1, b->allocated, b->set)) {
+ r = cpu_set_add(a, cpu_p1 - 1);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+int parse_cpu_set_full(
+ const char *rvalue,
+ CPUSet *cpu_set,
+ bool warn,
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *lvalue) {
+
+ _cleanup_(cpu_set_reset) CPUSet c = {};
+ const char *p = rvalue;
+
+ assert(p);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ unsigned cpu_lower, cpu_upper;
+ int r;
+
+ r = extract_first_word(&p, &word, WHITESPACE ",", EXTRACT_UNQUOTE);
+ if (r == -ENOMEM)
+ return warn ? log_oom() : -ENOMEM;
+ if (r < 0)
+ return warn ? log_syntax(unit, LOG_ERR, filename, line, r, "Invalid value for %s: %s", lvalue, rvalue) : r;
+ if (r == 0)
+ break;
+
+ r = parse_range(word, &cpu_lower, &cpu_upper);
+ if (r < 0)
+ return warn ? log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse CPU affinity '%s'", word) : r;
+
+ if (cpu_lower > cpu_upper) {
+ if (warn)
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Range '%s' is invalid, %u > %u, ignoring.",
+ word, cpu_lower, cpu_upper);
+
+ /* Make sure something is allocated, to distinguish this from the empty case */
+ r = cpu_set_realloc(&c, 1);
+ if (r < 0)
+ return r;
+ }
+
+ for (unsigned cpu_p1 = MIN(cpu_upper, UINT_MAX-1) + 1; cpu_p1 > cpu_lower; cpu_p1--) {
+ r = cpu_set_add(&c, cpu_p1 - 1);
+ if (r < 0)
+ return warn ? log_syntax(unit, LOG_ERR, filename, line, r,
+ "Cannot add CPU %u to set: %m", cpu_p1 - 1) : r;
+ }
+ }
+
+ /* On success, transfer ownership to the output variable */
+ *cpu_set = c;
+ c = (CPUSet) {};
+
+ return 0;
+}
+
+int parse_cpu_set_extend(
+ const char *rvalue,
+ CPUSet *old,
+ bool warn,
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *lvalue) {
+
+ _cleanup_(cpu_set_reset) CPUSet cpuset = {};
+ int r;
+
+ r = parse_cpu_set_full(rvalue, &cpuset, true, unit, filename, line, lvalue);
+ if (r < 0)
+ return r;
+
+ if (!cpuset.set) {
+ /* An empty assignment resets the CPU list */
+ cpu_set_reset(old);
+ return 0;
+ }
+
+ if (!old->set) {
+ *old = cpuset;
+ cpuset = (CPUSet) {};
+ return 1;
+ }
+
+ return cpu_set_add_all(old, &cpuset);
+}
+
+int cpus_in_affinity_mask(void) {
+ size_t n = 16;
+ int r;
+
+ for (;;) {
+ cpu_set_t *c;
+
+ c = CPU_ALLOC(n);
+ if (!c)
+ return -ENOMEM;
+
+ if (sched_getaffinity(0, CPU_ALLOC_SIZE(n), c) >= 0) {
+ int k;
+
+ k = CPU_COUNT_S(CPU_ALLOC_SIZE(n), c);
+ CPU_FREE(c);
+
+ if (k <= 0)
+ return -EINVAL;
+
+ return k;
+ }
+
+ r = -errno;
+ CPU_FREE(c);
+
+ if (r != -EINVAL)
+ return r;
+ if (n > SIZE_MAX/2)
+ return -ENOMEM;
+ n *= 2;
+ }
+}
+
+int cpu_set_to_dbus(const CPUSet *set, uint8_t **ret, size_t *allocated) {
+ uint8_t *out;
+
+ assert(set);
+ assert(ret);
+
+ out = new0(uint8_t, set->allocated);
+ if (!out)
+ return -ENOMEM;
+
+ for (unsigned cpu = 0; cpu < set->allocated * 8; cpu++)
+ if (CPU_ISSET_S(cpu, set->allocated, set->set))
+ out[cpu / 8] |= 1u << (cpu % 8);
+
+ *ret = out;
+ *allocated = set->allocated;
+ return 0;
+}
+
+int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set) {
+ _cleanup_(cpu_set_reset) CPUSet s = {};
+ int r;
+
+ assert(bits);
+ assert(set);
+
+ for (unsigned cpu = size * 8; cpu > 0; cpu--)
+ if (bits[(cpu - 1) / 8] & (1u << ((cpu - 1) % 8))) {
+ r = cpu_set_add(&s, cpu - 1);
+ if (r < 0)
+ return r;
+ }
+
+ *set = s;
+ s = (CPUSet) {};
+ return 0;
+}
diff --git a/src/shared/cpu-set-util.h b/src/shared/cpu-set-util.h
new file mode 100644
index 0000000..3c63a58
--- /dev/null
+++ b/src/shared/cpu-set-util.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sched.h>
+
+#include "macro.h"
+#include "missing_syscall.h"
+
+/* This wraps the libc interface with a variable to keep the allocated size. */
+typedef struct CPUSet {
+ cpu_set_t *set;
+ size_t allocated; /* in bytes */
+} CPUSet;
+
+static inline void cpu_set_reset(CPUSet *a) {
+ assert((a->allocated > 0) == !!a->set);
+ if (a->set)
+ CPU_FREE(a->set);
+ *a = (CPUSet) {};
+}
+
+int cpu_set_add_all(CPUSet *a, const CPUSet *b);
+int cpu_set_add(CPUSet *a, unsigned cpu);
+
+char* cpu_set_to_string(const CPUSet *a);
+char *cpu_set_to_range_string(const CPUSet *a);
+int cpu_set_realloc(CPUSet *cpu_set, unsigned ncpus);
+
+int parse_cpu_set_full(
+ const char *rvalue,
+ CPUSet *cpu_set,
+ bool warn,
+ const char *unit,
+ const char *filename, unsigned line,
+ const char *lvalue);
+int parse_cpu_set_extend(
+ const char *rvalue,
+ CPUSet *old,
+ bool warn,
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *lvalue);
+
+static inline int parse_cpu_set(const char *rvalue, CPUSet *cpu_set){
+ return parse_cpu_set_full(rvalue, cpu_set, false, NULL, NULL, 0, NULL);
+}
+
+int cpu_set_to_dbus(const CPUSet *set, uint8_t **ret, size_t *allocated);
+int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set);
+
+int cpus_in_affinity_mask(void);
diff --git a/src/shared/cryptsetup-util.c b/src/shared/cryptsetup-util.c
new file mode 100644
index 0000000..34a078e
--- /dev/null
+++ b/src/shared/cryptsetup-util.c
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_LIBCRYPTSETUP
+#include "alloc-util.h"
+#include "cryptsetup-util.h"
+#include "dlfcn-util.h"
+#include "log.h"
+
+static void *cryptsetup_dl = NULL;
+
+int (*sym_crypt_activate_by_passphrase)(struct crypt_device *cd, const char *name, int keyslot, const char *passphrase, size_t passphrase_size, uint32_t flags);
+#if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY
+int (*sym_crypt_activate_by_signed_key)(struct crypt_device *cd, const char *name, const char *volume_key, size_t volume_key_size, const char *signature, size_t signature_size, uint32_t flags);
+#endif
+int (*sym_crypt_activate_by_volume_key)(struct crypt_device *cd, const char *name, const char *volume_key, size_t volume_key_size, uint32_t flags);
+int (*sym_crypt_deactivate_by_name)(struct crypt_device *cd, const char *name, uint32_t flags);
+int (*sym_crypt_format)(struct crypt_device *cd, const char *type, const char *cipher, const char *cipher_mode, const char *uuid, const char *volume_key, size_t volume_key_size, void *params);
+void (*sym_crypt_free)(struct crypt_device *cd);
+const char *(*sym_crypt_get_dir)(void);
+int (*sym_crypt_get_verity_info)(struct crypt_device *cd, struct crypt_params_verity *vp);
+int (*sym_crypt_init)(struct crypt_device **cd, const char *device);
+int (*sym_crypt_init_by_name)(struct crypt_device **cd, const char *name);
+int (*sym_crypt_keyslot_add_by_volume_key)(struct crypt_device *cd, int keyslot, const char *volume_key, size_t volume_key_size, const char *passphrase, size_t passphrase_size);
+int (*sym_crypt_load)(struct crypt_device *cd, const char *requested_type, void *params);
+int (*sym_crypt_resize)(struct crypt_device *cd, const char *name, uint64_t new_size);
+int (*sym_crypt_set_data_device)(struct crypt_device *cd, const char *device);
+void (*sym_crypt_set_debug_level)(int level);
+void (*sym_crypt_set_log_callback)(struct crypt_device *cd, void (*log)(int level, const char *msg, void *usrptr), void *usrptr);
+int (*sym_crypt_volume_key_get)(struct crypt_device *cd, int keyslot, char *volume_key, size_t *volume_key_size, const char *passphrase, size_t passphrase_size);
+
+int dlopen_cryptsetup(void) {
+ _cleanup_(dlclosep) void *dl = NULL;
+ int r;
+
+ if (cryptsetup_dl)
+ return 0; /* Already loaded */
+
+ dl = dlopen("libcryptsetup.so.12", RTLD_LAZY);
+ if (!dl)
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "libcryptsetup support is not installed: %s", dlerror());
+
+ r = dlsym_many_and_warn(
+ dl,
+ LOG_DEBUG,
+ &sym_crypt_activate_by_passphrase, "crypt_activate_by_passphrase",
+#if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY
+ &sym_crypt_activate_by_signed_key, "crypt_activate_by_signed_key",
+#endif
+ &sym_crypt_activate_by_volume_key, "crypt_activate_by_volume_key",
+ &sym_crypt_deactivate_by_name, "crypt_deactivate_by_name",
+ &sym_crypt_format, "crypt_format",
+ &sym_crypt_free, "crypt_free",
+ &sym_crypt_get_dir, "crypt_get_dir",
+ &sym_crypt_get_verity_info, "crypt_get_verity_info",
+ &sym_crypt_init, "crypt_init",
+ &sym_crypt_init_by_name, "crypt_init_by_name",
+ &sym_crypt_keyslot_add_by_volume_key, "crypt_keyslot_add_by_volume_key",
+ &sym_crypt_load, "crypt_load",
+ &sym_crypt_resize, "crypt_resize",
+ &sym_crypt_set_data_device, "crypt_set_data_device",
+ &sym_crypt_set_debug_level, "crypt_set_debug_level",
+ &sym_crypt_set_log_callback, "crypt_set_log_callback",
+ &sym_crypt_volume_key_get, "crypt_volume_key_get",
+ NULL);
+ if (r < 0)
+ return r;
+
+ /* Note that we never release the reference here, because there's no real reason to, after all this
+ * was traditionally a regular shared library dependency which lives forever too. */
+ cryptsetup_dl = TAKE_PTR(dl);
+ return 1;
+}
+
+static void cryptsetup_log_glue(int level, const char *msg, void *usrptr) {
+
+ switch (level) {
+ case CRYPT_LOG_NORMAL:
+ level = LOG_NOTICE;
+ break;
+ case CRYPT_LOG_ERROR:
+ level = LOG_ERR;
+ break;
+ case CRYPT_LOG_VERBOSE:
+ level = LOG_INFO;
+ break;
+ case CRYPT_LOG_DEBUG:
+ level = LOG_DEBUG;
+ break;
+ default:
+ log_error("Unknown libcryptsetup log level: %d", level);
+ level = LOG_ERR;
+ }
+
+ log_full(level, "%s", msg);
+}
+
+void cryptsetup_enable_logging(struct crypt_device *cd) {
+ if (!cd)
+ return;
+
+ if (dlopen_cryptsetup() < 0) /* If this fails, let's gracefully ignore the issue, this is just debug
+ * logging after all, and if this failed we already generated a debug
+ * log message that should help to track things down. */
+ return;
+
+ sym_crypt_set_log_callback(cd, cryptsetup_log_glue, NULL);
+ sym_crypt_set_debug_level(DEBUG_LOGGING ? CRYPT_DEBUG_ALL : CRYPT_DEBUG_NONE);
+}
+
+#endif
diff --git a/src/shared/cryptsetup-util.h b/src/shared/cryptsetup-util.h
new file mode 100644
index 0000000..e7d885d
--- /dev/null
+++ b/src/shared/cryptsetup-util.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "macro.h"
+
+#if HAVE_LIBCRYPTSETUP
+#include <libcryptsetup.h>
+
+/* These next two are defined in libcryptsetup.h from cryptsetup version 2.3.4 forwards. */
+#ifndef CRYPT_ACTIVATE_NO_READ_WORKQUEUE
+#define CRYPT_ACTIVATE_NO_READ_WORKQUEUE (1 << 24)
+#endif
+#ifndef CRYPT_ACTIVATE_NO_WRITE_WORKQUEUE
+#define CRYPT_ACTIVATE_NO_WRITE_WORKQUEUE (1 << 25)
+#endif
+
+extern int (*sym_crypt_activate_by_passphrase)(struct crypt_device *cd, const char *name, int keyslot, const char *passphrase, size_t passphrase_size, uint32_t flags);
+#if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY
+extern int (*sym_crypt_activate_by_signed_key)(struct crypt_device *cd, const char *name, const char *volume_key, size_t volume_key_size, const char *signature, size_t signature_size, uint32_t flags);
+#endif
+extern int (*sym_crypt_activate_by_volume_key)(struct crypt_device *cd, const char *name, const char *volume_key, size_t volume_key_size, uint32_t flags);
+extern int (*sym_crypt_deactivate_by_name)(struct crypt_device *cd, const char *name, uint32_t flags);
+extern int (*sym_crypt_format)(struct crypt_device *cd, const char *type, const char *cipher, const char *cipher_mode, const char *uuid, const char *volume_key, size_t volume_key_size, void *params);
+extern void (*sym_crypt_free)(struct crypt_device *cd);
+extern const char *(*sym_crypt_get_dir)(void);
+extern int (*sym_crypt_get_verity_info)(struct crypt_device *cd, struct crypt_params_verity *vp);
+extern int (*sym_crypt_init)(struct crypt_device **cd, const char *device);
+extern int (*sym_crypt_init_by_name)(struct crypt_device **cd, const char *name);
+extern int (*sym_crypt_keyslot_add_by_volume_key)(struct crypt_device *cd, int keyslot, const char *volume_key, size_t volume_key_size, const char *passphrase, size_t passphrase_size);
+extern int (*sym_crypt_load)(struct crypt_device *cd, const char *requested_type, void *params);
+extern int (*sym_crypt_resize)(struct crypt_device *cd, const char *name, uint64_t new_size);
+extern int (*sym_crypt_set_data_device)(struct crypt_device *cd, const char *device);
+extern void (*sym_crypt_set_debug_level)(int level);
+extern void (*sym_crypt_set_log_callback)(struct crypt_device *cd, void (*log)(int level, const char *msg, void *usrptr), void *usrptr);
+extern int (*sym_crypt_volume_key_get)(struct crypt_device *cd, int keyslot, char *volume_key, size_t *volume_key_size, const char *passphrase, size_t passphrase_size);
+
+int dlopen_cryptsetup(void);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct crypt_device *, crypt_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct crypt_device *, sym_crypt_free);
+
+void cryptsetup_enable_logging(struct crypt_device *cd);
+
+#endif
diff --git a/src/shared/daemon-util.h b/src/shared/daemon-util.h
new file mode 100644
index 0000000..585e489
--- /dev/null
+++ b/src/shared/daemon-util.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-daemon.h"
+
+#define NOTIFY_READY "READY=1\n" "STATUS=Processing requests..."
+#define NOTIFY_STOPPING "STOPPING=1\n" "STATUS=Shutting down..."
+
+static inline const char *notify_start(const char *start, const char *stop) {
+ if (start)
+ (void) sd_notify(false, start);
+
+ return stop;
+}
+
+/* This is intended to be used with _cleanup_ attribute. */
+static inline void notify_on_cleanup(const char **p) {
+ if (*p)
+ (void) sd_notify(false, *p);
+}
diff --git a/src/shared/dev-setup.c b/src/shared/dev-setup.c
new file mode 100644
index 0000000..b788b06
--- /dev/null
+++ b/src/shared/dev-setup.c
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dev-setup.h"
+#include "label.h"
+#include "log.h"
+#include "nulstr-util.h"
+#include "path-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+
+int dev_setup(const char *prefix, uid_t uid, gid_t gid) {
+ static const char symlinks[] =
+ "-/proc/kcore\0" "/dev/core\0"
+ "/proc/self/fd\0" "/dev/fd\0"
+ "/proc/self/fd/0\0" "/dev/stdin\0"
+ "/proc/self/fd/1\0" "/dev/stdout\0"
+ "/proc/self/fd/2\0" "/dev/stderr\0";
+
+ const char *j, *k;
+ int r;
+
+ NULSTR_FOREACH_PAIR(j, k, symlinks) {
+ _cleanup_free_ char *link_name = NULL;
+ const char *n;
+
+ if (j[0] == '-') {
+ j++;
+
+ if (access(j, F_OK) < 0)
+ continue;
+ }
+
+ if (prefix) {
+ link_name = path_join(prefix, k);
+ if (!link_name)
+ return -ENOMEM;
+
+ n = link_name;
+ } else
+ n = k;
+
+ r = symlink_label(j, n);
+ if (r < 0)
+ log_debug_errno(r, "Failed to symlink %s to %s: %m", j, n);
+
+ if (uid != UID_INVALID || gid != GID_INVALID)
+ if (lchown(n, uid, gid) < 0)
+ log_debug_errno(errno, "Failed to chown %s: %m", n);
+ }
+
+ return 0;
+}
+
+int make_inaccessible_nodes(
+ const char *parent_dir,
+ uid_t uid,
+ gid_t gid) {
+
+ static const struct {
+ const char *name;
+ mode_t mode;
+ } table[] = {
+ { "inaccessible", S_IFDIR | 0755 },
+ { "inaccessible/reg", S_IFREG | 0000 },
+ { "inaccessible/dir", S_IFDIR | 0000 },
+ { "inaccessible/fifo", S_IFIFO | 0000 },
+ { "inaccessible/sock", S_IFSOCK | 0000 },
+
+ /* The following two are likely to fail if we lack the privs for it (for example in an userns
+ * environment, if CAP_SYS_MKNOD is missing, or if a device node policy prohibits creation of
+ * device nodes with a major/minor of 0). But that's entirely fine. Consumers of these files
+ * should implement falling back to use a different node then, for example
+ * <root>/inaccessible/sock, which is close enough in behaviour and semantics for most uses.
+ */
+ { "inaccessible/chr", S_IFCHR | 0000 },
+ { "inaccessible/blk", S_IFBLK | 0000 },
+ };
+
+ _cleanup_umask_ mode_t u;
+ int r;
+
+ if (!parent_dir)
+ parent_dir = "/run/systemd";
+
+ u = umask(0000);
+
+ /* Set up inaccessible (and empty) file nodes of all types. This are used to as mount sources for over-mounting
+ * ("masking") file nodes that shall become inaccessible and empty for specific containers or services. We try
+ * to lock down these nodes as much as we can, but otherwise try to match them as closely as possible with the
+ * underlying file, i.e. in the best case we offer the same node type as the underlying node. */
+
+ for (size_t i = 0; i < ELEMENTSOF(table); i++) {
+ _cleanup_free_ char *path = NULL;
+
+ path = path_join(parent_dir, table[i].name);
+ if (!path)
+ return log_oom();
+
+ if (S_ISDIR(table[i].mode))
+ r = mkdir_label(path, table[i].mode & 07777);
+ else
+ r = mknod_label(path, table[i].mode, makedev(0, 0));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to create '%s', ignoring: %m", path);
+ continue;
+ }
+
+ if (uid != UID_INVALID || gid != GID_INVALID) {
+ if (lchown(path, uid, gid) < 0)
+ log_debug_errno(errno, "Failed to chown '%s': %m", path);
+ }
+ }
+
+ return 0;
+}
diff --git a/src/shared/dev-setup.h b/src/shared/dev-setup.h
new file mode 100644
index 0000000..92ba6cf
--- /dev/null
+++ b/src/shared/dev-setup.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+int dev_setup(const char *prefix, uid_t uid, gid_t gid);
+
+int make_inaccessible_nodes(const char *parent_dir, uid_t uid, gid_t gid);
diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c
new file mode 100644
index 0000000..d1f299a
--- /dev/null
+++ b/src/shared/dissect-image.c
@@ -0,0 +1,2557 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_VALGRIND_MEMCHECK_H
+#include <valgrind/memcheck.h>
+#endif
+
+#include <linux/dm-ioctl.h>
+#include <linux/loop.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <sysexits.h>
+
+#include "sd-device.h"
+#include "sd-id128.h"
+
+#include "architecture.h"
+#include "ask-password-api.h"
+#include "blkid-util.h"
+#include "blockdev-util.h"
+#include "copy.h"
+#include "cryptsetup-util.h"
+#include "def.h"
+#include "device-nodes.h"
+#include "device-util.h"
+#include "dissect-image.h"
+#include "dm-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "fsck-util.h"
+#include "gpt.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "namespace-util.h"
+#include "nulstr-util.h"
+#include "os-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "raw-clone.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "udev-util.h"
+#include "user-util.h"
+#include "xattr-util.h"
+
+/* how many times to wait for the device nodes to appear */
+#define N_DEVICE_NODE_LIST_ATTEMPTS 10
+
+int probe_filesystem(const char *node, char **ret_fstype) {
+ /* Try to find device content type and return it in *ret_fstype. If nothing is found,
+ * 0/NULL will be returned. -EUCLEAN will be returned for ambiguous results, and an
+ * different error otherwise. */
+
+#if HAVE_BLKID
+ _cleanup_(blkid_free_probep) blkid_probe b = NULL;
+ const char *fstype;
+ int r;
+
+ errno = 0;
+ b = blkid_new_probe_from_filename(node);
+ if (!b)
+ return errno_or_else(ENOMEM);
+
+ blkid_probe_enable_superblocks(b, 1);
+ blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
+
+ errno = 0;
+ r = blkid_do_safeprobe(b);
+ if (r == 1) {
+ log_debug("No type detected on partition %s", node);
+ goto not_found;
+ }
+ if (r == -2)
+ return log_debug_errno(SYNTHETIC_ERRNO(EUCLEAN),
+ "Results ambiguous for partition %s", node);
+ if (r != 0)
+ return errno_or_else(EIO);
+
+ (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
+
+ if (fstype) {
+ char *t;
+
+ t = strdup(fstype);
+ if (!t)
+ return -ENOMEM;
+
+ *ret_fstype = t;
+ return 1;
+ }
+
+not_found:
+ *ret_fstype = NULL;
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+#if HAVE_BLKID
+static int enumerator_for_parent(sd_device *d, sd_device_enumerator **ret) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ int r;
+
+ assert(d);
+ assert(ret);
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_parent(e, d);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(e);
+ return 0;
+}
+
+static int device_is_partition(sd_device *d, blkid_partition pp) {
+ blkid_loff_t bsize, bstart;
+ uint64_t size, start;
+ int partno, bpartno, r;
+ const char *ss, *v;
+
+ assert(d);
+ assert(pp);
+
+ r = sd_device_get_subsystem(d, &ss);
+ if (r < 0)
+ return r;
+ if (!streq(ss, "block"))
+ return false;
+
+ r = sd_device_get_sysattr_value(d, "partition", &v);
+ if (r == -ENOENT) /* Not a partition device */
+ return false;
+ if (r < 0)
+ return r;
+ r = safe_atoi(v, &partno);
+ if (r < 0)
+ return r;
+
+ errno = 0;
+ bpartno = blkid_partition_get_partno(pp);
+ if (bpartno < 0)
+ return errno_or_else(EIO);
+
+ if (partno != bpartno)
+ return false;
+
+ r = sd_device_get_sysattr_value(d, "start", &v);
+ if (r < 0)
+ return r;
+ r = safe_atou64(v, &start);
+ if (r < 0)
+ return r;
+
+ errno = 0;
+ bstart = blkid_partition_get_start(pp);
+ if (bstart < 0)
+ return errno_or_else(EIO);
+
+ if (start != (uint64_t) bstart)
+ return false;
+
+ r = sd_device_get_sysattr_value(d, "size", &v);
+ if (r < 0)
+ return r;
+ r = safe_atou64(v, &size);
+ if (r < 0)
+ return r;
+
+ errno = 0;
+ bsize = blkid_partition_get_size(pp);
+ if (bsize < 0)
+ return errno_or_else(EIO);
+
+ if (size != (uint64_t) bsize)
+ return false;
+
+ return true;
+}
+
+static int find_partition(
+ sd_device *parent,
+ blkid_partition pp,
+ sd_device **ret) {
+
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *q;
+ int r;
+
+ assert(parent);
+ assert(pp);
+ assert(ret);
+
+ r = enumerator_for_parent(parent, &e);
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, q) {
+ r = device_is_partition(q, pp);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ *ret = sd_device_ref(q);
+ return 0;
+ }
+ }
+
+ return -ENXIO;
+}
+
+struct wait_data {
+ sd_device *parent_device;
+ blkid_partition blkidp;
+ sd_device *found;
+};
+
+static inline void wait_data_done(struct wait_data *d) {
+ sd_device_unref(d->found);
+}
+
+static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) {
+ const char *parent1_path, *parent2_path;
+ struct wait_data *w = userdata;
+ sd_device *pp;
+ int r;
+
+ assert(w);
+
+ if (device_for_action(device, DEVICE_ACTION_REMOVE))
+ return 0;
+
+ r = sd_device_get_parent(device, &pp);
+ if (r < 0)
+ return 0; /* Doesn't have a parent? No relevant to us */
+
+ r = sd_device_get_syspath(pp, &parent1_path); /* Check parent of device of this action */
+ if (r < 0)
+ goto finish;
+
+ r = sd_device_get_syspath(w->parent_device, &parent2_path); /* Check parent of device we are looking for */
+ if (r < 0)
+ goto finish;
+
+ if (!path_equal(parent1_path, parent2_path))
+ return 0; /* Has a different parent than what we need, not interesting to us */
+
+ r = device_is_partition(device, w->blkidp);
+ if (r < 0)
+ goto finish;
+ if (r == 0) /* Not the one we need */
+ return 0;
+
+ /* It's the one we need! Yay! */
+ assert(!w->found);
+ w->found = sd_device_ref(device);
+ r = 0;
+
+finish:
+ return sd_event_exit(sd_device_monitor_get_event(monitor), r);
+}
+
+static int wait_for_partition_device(
+ sd_device *parent,
+ blkid_partition pp,
+ usec_t deadline,
+ sd_device **ret) {
+
+ _cleanup_(sd_event_source_unrefp) sd_event_source *timeout_source = NULL;
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ int r;
+
+ assert(parent);
+ assert(pp);
+ assert(ret);
+
+ r = find_partition(parent, pp, ret);
+ if (r != -ENXIO)
+ return r;
+
+ r = sd_event_new(&event);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_new(&monitor);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(monitor, "block", "partition");
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_attach_event(monitor, event);
+ if (r < 0)
+ return r;
+
+ _cleanup_(wait_data_done) struct wait_data w = {
+ .parent_device = parent,
+ .blkidp = pp,
+ };
+
+ r = sd_device_monitor_start(monitor, device_monitor_handler, &w);
+ if (r < 0)
+ return r;
+
+ /* Check again, the partition might have appeared in the meantime */
+ r = find_partition(parent, pp, ret);
+ if (r != -ENXIO)
+ return r;
+
+ if (deadline != USEC_INFINITY) {
+ r = sd_event_add_time(
+ event, &timeout_source,
+ CLOCK_MONOTONIC, deadline, 0,
+ NULL, INT_TO_PTR(-ETIMEDOUT));
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return r;
+
+ assert(w.found);
+ *ret = TAKE_PTR(w.found);
+ return 0;
+}
+
+static void check_partition_flags(
+ const char *node,
+ unsigned long long pflags,
+ unsigned long long supported) {
+
+ assert(node);
+
+ /* Mask away all flags supported by this partition's type and the three flags the UEFI spec defines generically */
+ pflags &= ~(supported | GPT_FLAG_REQUIRED_PARTITION | GPT_FLAG_NO_BLOCK_IO_PROTOCOL | GPT_FLAG_LEGACY_BIOS_BOOTABLE);
+
+ if (pflags == 0)
+ return;
+
+ /* If there are other bits set, then log about it, to make things discoverable */
+ for (unsigned i = 0; i < sizeof(pflags) * 8; i++) {
+ unsigned long long bit = 1ULL << i;
+ if (!FLAGS_SET(pflags, bit))
+ continue;
+
+ log_debug("Unexpected partition flag %llu set on %s!", bit, node);
+ }
+}
+
+static int device_wait_for_initialization_harder(
+ sd_device *device,
+ const char *subsystem,
+ usec_t deadline,
+ sd_device **ret) {
+
+ _cleanup_free_ char *uevent = NULL;
+ usec_t start, left, retrigger_timeout;
+ int r;
+
+ start = now(CLOCK_MONOTONIC);
+ left = usec_sub_unsigned(deadline, start);
+
+ if (DEBUG_LOGGING) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ const char *sn = NULL;
+
+ (void) sd_device_get_sysname(device, &sn);
+ log_debug("Waiting for device '%s' to initialize for %s.", strna(sn), format_timespan(buf, sizeof(buf), left, 0));
+ }
+
+ if (left != USEC_INFINITY)
+ retrigger_timeout = CLAMP(left / 4, 1 * USEC_PER_SEC, 5 * USEC_PER_SEC); /* A fourth of the total timeout, but let's clamp to 1s…5s range */
+ else
+ retrigger_timeout = 2 * USEC_PER_SEC;
+
+ for (;;) {
+ usec_t local_deadline, n;
+ bool last_try;
+
+ n = now(CLOCK_MONOTONIC);
+ assert(n >= start);
+
+ /* Find next deadline, when we'll retrigger */
+ local_deadline = start +
+ DIV_ROUND_UP(n - start, retrigger_timeout) * retrigger_timeout;
+
+ if (deadline != USEC_INFINITY && deadline <= local_deadline) {
+ local_deadline = deadline;
+ last_try = true;
+ } else
+ last_try = false;
+
+ r = device_wait_for_initialization(device, subsystem, local_deadline, ret);
+ if (r >= 0 && DEBUG_LOGGING) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ const char *sn = NULL;
+
+ (void) sd_device_get_sysname(device, &sn);
+ log_debug("Successfully waited for device '%s' to initialize for %s.", strna(sn), format_timespan(buf, sizeof(buf), usec_sub_unsigned(now(CLOCK_MONOTONIC), start), 0));
+
+ }
+ if (r != -ETIMEDOUT || last_try)
+ return r;
+
+ if (!uevent) {
+ const char *syspath;
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ uevent = path_join(syspath, "uevent");
+ if (!uevent)
+ return -ENOMEM;
+ }
+
+ if (DEBUG_LOGGING) {
+ char buf[FORMAT_TIMESPAN_MAX];
+
+ log_debug("Device didn't initialize within %s, assuming lost event. Retriggering device through %s.",
+ format_timespan(buf, sizeof(buf), usec_sub_unsigned(now(CLOCK_MONOTONIC), start), 0),
+ uevent);
+ }
+
+ r = write_string_file(uevent, "change", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+ }
+}
+#endif
+
+#define DEVICE_TIMEOUT_USEC (45 * USEC_PER_SEC)
+
+int dissect_image(
+ int fd,
+ const VeritySettings *verity,
+ const MountOptions *mount_options,
+ DissectImageFlags flags,
+ DissectedImage **ret) {
+
+#if HAVE_BLKID
+#ifdef GPT_ROOT_NATIVE
+ sd_id128_t root_uuid = SD_ID128_NULL, root_verity_uuid = SD_ID128_NULL;
+#endif
+#ifdef GPT_USR_NATIVE
+ sd_id128_t usr_uuid = SD_ID128_NULL, usr_verity_uuid = SD_ID128_NULL;
+#endif
+ bool is_gpt, is_mbr, generic_rw, multiple_generic = false;
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
+ _cleanup_(blkid_free_probep) blkid_probe b = NULL;
+ _cleanup_free_ char *generic_node = NULL;
+ sd_id128_t generic_uuid = SD_ID128_NULL;
+ const char *pttype = NULL;
+ blkid_partlist pl;
+ int r, generic_nr, n_partitions;
+ struct stat st;
+ usec_t deadline;
+
+ assert(fd >= 0);
+ assert(ret);
+ assert(!verity || verity->root_hash || verity->root_hash_size == 0);
+ assert(!((flags & DISSECT_IMAGE_GPT_ONLY) && (flags & DISSECT_IMAGE_NO_PARTITION_TABLE)));
+
+ /* Probes a disk image, and returns information about what it found in *ret.
+ *
+ * Returns -ENOPKG if no suitable partition table or file system could be found.
+ * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found. */
+
+ if (verity && verity->root_hash) {
+ sd_id128_t fsuuid, vuuid;
+
+ /* If a root hash is supplied, then we use the root partition that has a UUID that match the
+ * first 128bit of the root hash. And we use the verity partition that has a UUID that match
+ * the final 128bit. */
+
+ if (verity->root_hash_size < sizeof(sd_id128_t))
+ return -EINVAL;
+
+ memcpy(&fsuuid, verity->root_hash, sizeof(sd_id128_t));
+ memcpy(&vuuid, (const uint8_t*) verity->root_hash + verity->root_hash_size - sizeof(sd_id128_t), sizeof(sd_id128_t));
+
+ if (sd_id128_is_null(fsuuid))
+ return -EINVAL;
+ if (sd_id128_is_null(vuuid))
+ return -EINVAL;
+
+ /* If the verity data declares it's for the /usr partition, then search for that, in all
+ * other cases assume it's for the root partition. */
+#ifdef GPT_USR_NATIVE
+ if (verity->designator == PARTITION_USR) {
+ usr_uuid = fsuuid;
+ usr_verity_uuid = vuuid;
+ } else {
+#endif
+#ifdef GPT_ROOT_NATIVE
+ root_uuid = fsuuid;
+ root_verity_uuid = vuuid;
+#endif
+#ifdef GPT_USR_NATIVE
+ }
+#endif
+ }
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTBLK;
+
+ r = sd_device_new_from_devnum(&d, 'b', st.st_rdev);
+ if (r < 0)
+ return r;
+
+ if (!FLAGS_SET(flags, DISSECT_IMAGE_NO_UDEV)) {
+ _cleanup_(sd_device_unrefp) sd_device *initialized = NULL;
+
+ /* If udev support is enabled, then let's wait for the device to be initialized before we doing anything. */
+
+ r = device_wait_for_initialization_harder(
+ d,
+ "block",
+ usec_add(now(CLOCK_MONOTONIC), DEVICE_TIMEOUT_USEC),
+ &initialized);
+ if (r < 0)
+ return r;
+
+ sd_device_unref(d);
+ d = TAKE_PTR(initialized);
+ }
+
+ b = blkid_new_probe();
+ if (!b)
+ return -ENOMEM;
+
+ errno = 0;
+ r = blkid_probe_set_device(b, fd, 0, 0);
+ if (r != 0)
+ return errno_or_else(ENOMEM);
+
+ if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) {
+ /* Look for file system superblocks, unless we only shall look for GPT partition tables */
+ blkid_probe_enable_superblocks(b, 1);
+ blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE);
+ }
+
+ blkid_probe_enable_partitions(b, 1);
+ blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
+
+ errno = 0;
+ r = blkid_do_safeprobe(b);
+ if (IN_SET(r, -2, 1))
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOPKG), "Failed to identify any partition table.");
+ if (r != 0)
+ return errno_or_else(EIO);
+
+ m = new0(DissectedImage, 1);
+ if (!m)
+ return -ENOMEM;
+
+ if ((!(flags & DISSECT_IMAGE_GPT_ONLY) &&
+ (flags & DISSECT_IMAGE_REQUIRE_ROOT)) ||
+ (flags & DISSECT_IMAGE_NO_PARTITION_TABLE)) {
+ const char *usage = NULL;
+
+ /* If flags permit this, also allow using non-partitioned single-filesystem images */
+
+ (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
+ if (STRPTR_IN_SET(usage, "filesystem", "crypto")) {
+ const char *fstype = NULL, *options = NULL, *devname = NULL;
+ _cleanup_free_ char *t = NULL, *n = NULL, *o = NULL;
+
+ /* OK, we have found a file system, that's our root partition then. */
+ (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
+
+ if (fstype) {
+ t = strdup(fstype);
+ if (!t)
+ return -ENOMEM;
+ }
+
+ r = sd_device_get_devname(d, &devname);
+ if (r < 0)
+ return r;
+
+ n = strdup(devname);
+ if (!n)
+ return -ENOMEM;
+
+ m->single_file_system = true;
+ m->verity = verity && verity->root_hash && verity->data_path && (verity->designator < 0 || verity->designator == PARTITION_ROOT);
+ m->can_verity = verity && verity->data_path;
+
+ options = mount_options_from_designator(mount_options, PARTITION_ROOT);
+ if (options) {
+ o = strdup(options);
+ if (!o)
+ return -ENOMEM;
+ }
+
+ m->partitions[PARTITION_ROOT] = (DissectedPartition) {
+ .found = true,
+ .rw = !m->verity,
+ .partno = -1,
+ .architecture = _ARCHITECTURE_INVALID,
+ .fstype = TAKE_PTR(t),
+ .node = TAKE_PTR(n),
+ .mount_options = TAKE_PTR(o),
+ };
+
+ m->encrypted = streq_ptr(fstype, "crypto_LUKS");
+
+ *ret = TAKE_PTR(m);
+ return 0;
+ }
+ }
+
+ (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
+ if (!pttype)
+ return -ENOPKG;
+
+ is_gpt = streq_ptr(pttype, "gpt");
+ is_mbr = streq_ptr(pttype, "dos");
+
+ if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr))
+ return -ENOPKG;
+
+ /* Safety check: refuse block devices that carry a partition table but for which the kernel doesn't
+ * do partition scanning. */
+ r = blockdev_partscan_enabled(fd);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EPROTONOSUPPORT;
+
+ errno = 0;
+ pl = blkid_probe_get_partitions(b);
+ if (!pl)
+ return errno_or_else(ENOMEM);
+
+ errno = 0;
+ n_partitions = blkid_partlist_numof_partitions(pl);
+ if (n_partitions < 0)
+ return errno_or_else(EIO);
+
+ deadline = usec_add(now(CLOCK_MONOTONIC), DEVICE_TIMEOUT_USEC);
+ for (int i = 0; i < n_partitions; i++) {
+ _cleanup_(sd_device_unrefp) sd_device *q = NULL;
+ unsigned long long pflags;
+ blkid_partition pp;
+ const char *node;
+ int nr;
+
+ errno = 0;
+ pp = blkid_partlist_get_partition(pl, i);
+ if (!pp)
+ return errno_or_else(EIO);
+
+ r = wait_for_partition_device(d, pp, deadline, &q);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_devname(q, &node);
+ if (r < 0)
+ return r;
+
+ pflags = blkid_partition_get_flags(pp);
+
+ errno = 0;
+ nr = blkid_partition_get_partno(pp);
+ if (nr < 0)
+ return errno_or_else(EIO);
+
+ if (is_gpt) {
+ PartitionDesignator designator = _PARTITION_DESIGNATOR_INVALID;
+ int architecture = _ARCHITECTURE_INVALID;
+ const char *stype, *sid, *fstype = NULL;
+ sd_id128_t type_id, id;
+ bool rw = true;
+
+ sid = blkid_partition_get_uuid(pp);
+ if (!sid)
+ continue;
+ if (sd_id128_from_string(sid, &id) < 0)
+ continue;
+
+ stype = blkid_partition_get_type_string(pp);
+ if (!stype)
+ continue;
+ if (sd_id128_from_string(stype, &type_id) < 0)
+ continue;
+
+ if (sd_id128_equal(type_id, GPT_HOME)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ designator = PARTITION_HOME;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+
+ } else if (sd_id128_equal(type_id, GPT_SRV)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ designator = PARTITION_SRV;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+
+ } else if (sd_id128_equal(type_id, GPT_ESP)) {
+
+ /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is
+ * not defined there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as
+ * recommended by the UEFI spec (See "12.3.3 Number and Location of System
+ * Partitions"). */
+
+ if (pflags & GPT_FLAG_NO_BLOCK_IO_PROTOCOL)
+ continue;
+
+ designator = PARTITION_ESP;
+ fstype = "vfat";
+
+ } else if (sd_id128_equal(type_id, GPT_XBOOTLDR)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ designator = PARTITION_XBOOTLDR;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+ }
+#ifdef GPT_ROOT_NATIVE
+ else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ /* If a root ID is specified, ignore everything but the root id */
+ if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
+ continue;
+
+ designator = PARTITION_ROOT;
+ architecture = native_architecture();
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+
+ } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ m->can_verity = true;
+
+ /* Ignore verity unless a root hash is specified */
+ if (sd_id128_is_null(root_verity_uuid) || !sd_id128_equal(root_verity_uuid, id))
+ continue;
+
+ designator = PARTITION_ROOT_VERITY;
+ fstype = "DM_verity_hash";
+ architecture = native_architecture();
+ rw = false;
+ }
+#endif
+#ifdef GPT_ROOT_SECONDARY
+ else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ /* If a root ID is specified, ignore everything but the root id */
+ if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
+ continue;
+
+ designator = PARTITION_ROOT_SECONDARY;
+ architecture = SECONDARY_ARCHITECTURE;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+
+ } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ m->can_verity = true;
+
+ /* Ignore verity unless root has is specified */
+ if (sd_id128_is_null(root_verity_uuid) || !sd_id128_equal(root_verity_uuid, id))
+ continue;
+
+ designator = PARTITION_ROOT_SECONDARY_VERITY;
+ fstype = "DM_verity_hash";
+ architecture = SECONDARY_ARCHITECTURE;
+ rw = false;
+ }
+#endif
+#ifdef GPT_USR_NATIVE
+ else if (sd_id128_equal(type_id, GPT_USR_NATIVE)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ /* If a usr ID is specified, ignore everything but the usr id */
+ if (!sd_id128_is_null(usr_uuid) && !sd_id128_equal(usr_uuid, id))
+ continue;
+
+ designator = PARTITION_USR;
+ architecture = native_architecture();
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+
+ } else if (sd_id128_equal(type_id, GPT_USR_NATIVE_VERITY)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ m->can_verity = true;
+
+ /* Ignore verity unless a usr hash is specified */
+ if (sd_id128_is_null(usr_verity_uuid) || !sd_id128_equal(usr_verity_uuid, id))
+ continue;
+
+ designator = PARTITION_USR_VERITY;
+ fstype = "DM_verity_hash";
+ architecture = native_architecture();
+ rw = false;
+ }
+#endif
+#ifdef GPT_USR_SECONDARY
+ else if (sd_id128_equal(type_id, GPT_USR_SECONDARY)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ /* If a usr ID is specified, ignore everything but the usr id */
+ if (!sd_id128_is_null(usr_uuid) && !sd_id128_equal(usr_uuid, id))
+ continue;
+
+ designator = PARTITION_USR_SECONDARY;
+ architecture = SECONDARY_ARCHITECTURE;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+
+ } else if (sd_id128_equal(type_id, GPT_USR_SECONDARY_VERITY)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ m->can_verity = true;
+
+ /* Ignore verity unless usr has is specified */
+ if (sd_id128_is_null(usr_verity_uuid) || !sd_id128_equal(usr_verity_uuid, id))
+ continue;
+
+ designator = PARTITION_USR_SECONDARY_VERITY;
+ fstype = "DM_verity_hash";
+ architecture = SECONDARY_ARCHITECTURE;
+ rw = false;
+ }
+#endif
+ else if (sd_id128_equal(type_id, GPT_SWAP)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ designator = PARTITION_SWAP;
+ fstype = "swap";
+
+ } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ if (generic_node)
+ multiple_generic = true;
+ else {
+ generic_nr = nr;
+ generic_rw = !(pflags & GPT_FLAG_READ_ONLY);
+ generic_uuid = id;
+ generic_node = strdup(node);
+ if (!generic_node)
+ return -ENOMEM;
+ }
+
+ } else if (sd_id128_equal(type_id, GPT_TMP)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ designator = PARTITION_TMP;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+
+ } else if (sd_id128_equal(type_id, GPT_VAR)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ if (!FLAGS_SET(flags, DISSECT_IMAGE_RELAX_VAR_CHECK)) {
+ sd_id128_t var_uuid;
+
+ /* For /var we insist that the uuid of the partition matches the
+ * HMAC-SHA256 of the /var GPT partition type uuid, keyed by machine
+ * ID. Why? Unlike the other partitions /var is inherently
+ * installation specific, hence we need to be careful not to mount it
+ * in the wrong installation. By hashing the partition UUID from
+ * /etc/machine-id we can securely bind the partition to the
+ * installation. */
+
+ r = sd_id128_get_machine_app_specific(GPT_VAR, &var_uuid);
+ if (r < 0)
+ return r;
+
+ if (!sd_id128_equal(var_uuid, id)) {
+ log_debug("Found a /var/ partition, but its UUID didn't match our expectations, ignoring.");
+ continue;
+ }
+ }
+
+ designator = PARTITION_VAR;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+ }
+
+ if (designator != _PARTITION_DESIGNATOR_INVALID) {
+ _cleanup_free_ char *t = NULL, *n = NULL, *o = NULL;
+ const char *options = NULL;
+
+ /* First one wins */
+ if (m->partitions[designator].found)
+ continue;
+
+ if (fstype) {
+ t = strdup(fstype);
+ if (!t)
+ return -ENOMEM;
+ }
+
+ n = strdup(node);
+ if (!n)
+ return -ENOMEM;
+
+ options = mount_options_from_designator(mount_options, designator);
+ if (options) {
+ o = strdup(options);
+ if (!o)
+ return -ENOMEM;
+ }
+
+ m->partitions[designator] = (DissectedPartition) {
+ .found = true,
+ .partno = nr,
+ .rw = rw,
+ .architecture = architecture,
+ .node = TAKE_PTR(n),
+ .fstype = TAKE_PTR(t),
+ .uuid = id,
+ .mount_options = TAKE_PTR(o),
+ };
+ }
+
+ } else if (is_mbr) {
+
+ switch (blkid_partition_get_type(pp)) {
+
+ case 0x83: /* Linux partition */
+
+ if (pflags != 0x80) /* Bootable flag */
+ continue;
+
+ if (generic_node)
+ multiple_generic = true;
+ else {
+ generic_nr = nr;
+ generic_rw = true;
+ generic_node = strdup(node);
+ if (!generic_node)
+ return -ENOMEM;
+ }
+
+ break;
+
+ case 0xEA: { /* Boot Loader Spec extended $BOOT partition */
+ _cleanup_free_ char *n = NULL, *o = NULL;
+ sd_id128_t id = SD_ID128_NULL;
+ const char *sid, *options = NULL;
+
+ /* First one wins */
+ if (m->partitions[PARTITION_XBOOTLDR].found)
+ continue;
+
+ sid = blkid_partition_get_uuid(pp);
+ if (sid)
+ (void) sd_id128_from_string(sid, &id);
+
+ n = strdup(node);
+ if (!n)
+ return -ENOMEM;
+
+ options = mount_options_from_designator(mount_options, PARTITION_XBOOTLDR);
+ if (options) {
+ o = strdup(options);
+ if (!o)
+ return -ENOMEM;
+ }
+
+ m->partitions[PARTITION_XBOOTLDR] = (DissectedPartition) {
+ .found = true,
+ .partno = nr,
+ .rw = true,
+ .architecture = _ARCHITECTURE_INVALID,
+ .node = TAKE_PTR(n),
+ .uuid = id,
+ .mount_options = TAKE_PTR(o),
+ };
+
+ break;
+ }}
+ }
+ }
+
+ if (m->partitions[PARTITION_ROOT].found) {
+ /* If we found the primary arch, then invalidate the secondary arch to avoid any ambiguities,
+ * since we never want to mount the secondary arch in this case. */
+ m->partitions[PARTITION_ROOT_SECONDARY].found = false;
+ m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found = false;
+ m->partitions[PARTITION_USR_SECONDARY].found = false;
+ m->partitions[PARTITION_USR_SECONDARY_VERITY].found = false;
+ } else {
+ /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not
+ * either, then check if there's a single generic one, and use that. */
+
+ if (m->partitions[PARTITION_ROOT_VERITY].found)
+ return -EADDRNOTAVAIL;
+
+ /* We didn't find a primary architecture root, but we found a primary architecture /usr? Refuse that for now. */
+ if (m->partitions[PARTITION_USR].found || m->partitions[PARTITION_USR_VERITY].found)
+ return -EADDRNOTAVAIL;
+
+ if (m->partitions[PARTITION_ROOT_SECONDARY].found) {
+ /* Upgrade secondary arch to first */
+ m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY];
+ zero(m->partitions[PARTITION_ROOT_SECONDARY]);
+ m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY];
+ zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]);
+
+ m->partitions[PARTITION_USR] = m->partitions[PARTITION_USR_SECONDARY];
+ zero(m->partitions[PARTITION_USR_SECONDARY]);
+ m->partitions[PARTITION_USR_VERITY] = m->partitions[PARTITION_USR_SECONDARY_VERITY];
+ zero(m->partitions[PARTITION_USR_SECONDARY_VERITY]);
+
+ } else if (flags & DISSECT_IMAGE_REQUIRE_ROOT) {
+ _cleanup_free_ char *o = NULL;
+ const char *options = NULL;
+
+ /* If the root hash was set, then we won't fall back to a generic node, because the
+ * root hash decides. */
+ if (verity && verity->root_hash)
+ return -EADDRNOTAVAIL;
+
+ /* If we didn't find a generic node, then we can't fix this up either */
+ if (!generic_node)
+ return -ENXIO;
+
+ /* If we didn't find a properly marked root partition, but we did find a single suitable
+ * generic Linux partition, then use this as root partition, if the caller asked for it. */
+ if (multiple_generic)
+ return -ENOTUNIQ;
+
+ options = mount_options_from_designator(mount_options, PARTITION_ROOT);
+ if (options) {
+ o = strdup(options);
+ if (!o)
+ return -ENOMEM;
+ }
+
+ m->partitions[PARTITION_ROOT] = (DissectedPartition) {
+ .found = true,
+ .rw = generic_rw,
+ .partno = generic_nr,
+ .architecture = _ARCHITECTURE_INVALID,
+ .node = TAKE_PTR(generic_node),
+ .uuid = generic_uuid,
+ .mount_options = TAKE_PTR(o),
+ };
+ }
+ }
+
+ /* Refuse if we found a verity partition for /usr but no matching file system partition */
+ if (!m->partitions[PARTITION_USR].found && m->partitions[PARTITION_USR_VERITY].found)
+ return -EADDRNOTAVAIL;
+
+ /* Combinations of verity /usr with verity-less root is OK, but the reverse is not */
+ if (m->partitions[PARTITION_ROOT_VERITY].found && m->partitions[PARTITION_USR].found && !m->partitions[PARTITION_USR_VERITY].found)
+ return -EADDRNOTAVAIL;
+
+ if (verity && verity->root_hash) {
+ if (verity->designator < 0 || verity->designator == PARTITION_ROOT) {
+ if (!m->partitions[PARTITION_ROOT_VERITY].found || !m->partitions[PARTITION_ROOT].found)
+ return -EADDRNOTAVAIL;
+
+ /* If we found a verity setup, then the root partition is necessarily read-only. */
+ m->partitions[PARTITION_ROOT].rw = false;
+ m->verity = true;
+ }
+
+ if (verity->designator == PARTITION_USR) {
+ if (!m->partitions[PARTITION_USR_VERITY].found || !m->partitions[PARTITION_USR].found)
+ return -EADDRNOTAVAIL;
+
+ m->partitions[PARTITION_USR].rw = false;
+ m->verity = true;
+ }
+ }
+
+ blkid_free_probe(b);
+ b = NULL;
+
+ /* Fill in file system types if we don't know them yet. */
+ for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
+ DissectedPartition *p = m->partitions + i;
+
+ if (!p->found)
+ continue;
+
+ if (!p->fstype && p->node) {
+ r = probe_filesystem(p->node, &p->fstype);
+ if (r < 0 && r != -EUCLEAN)
+ return r;
+ }
+
+ if (streq_ptr(p->fstype, "crypto_LUKS"))
+ m->encrypted = true;
+
+ if (p->fstype && fstype_is_ro(p->fstype))
+ p->rw = false;
+ }
+
+ *ret = TAKE_PTR(m);
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+DissectedImage* dissected_image_unref(DissectedImage *m) {
+ if (!m)
+ return NULL;
+
+ for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
+ free(m->partitions[i].fstype);
+ free(m->partitions[i].node);
+ free(m->partitions[i].decrypted_fstype);
+ free(m->partitions[i].decrypted_node);
+ free(m->partitions[i].mount_options);
+ }
+
+ free(m->hostname);
+ strv_free(m->machine_info);
+ strv_free(m->os_release);
+
+ return mfree(m);
+}
+
+static int is_loop_device(const char *path) {
+ char s[SYS_BLOCK_PATH_MAX("/../loop/")];
+ struct stat st;
+
+ assert(path);
+
+ if (stat(path, &st) < 0)
+ return -errno;
+
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTBLK;
+
+ xsprintf_sys_block_path(s, "/loop/", st.st_dev);
+ if (access(s, F_OK) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */
+ xsprintf_sys_block_path(s, "/../loop/", st.st_dev);
+ if (access(s, F_OK) < 0)
+ return errno == ENOENT ? false : -errno;
+ }
+
+ return true;
+}
+
+static int run_fsck(const char *node, const char *fstype) {
+ int r, exit_status;
+ pid_t pid;
+
+ assert(node);
+ assert(fstype);
+
+ r = fsck_exists(fstype);
+ if (r < 0) {
+ log_debug_errno(r, "Couldn't determine whether fsck for %s exists, proceeding anyway.", fstype);
+ return 0;
+ }
+ if (r == 0) {
+ log_debug("Not checking partition %s, as fsck for %s does not exist.", node, fstype);
+ return 0;
+ }
+
+ r = safe_fork("(fsck)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_NULL_STDIO, &pid);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to fork off fsck: %m");
+ if (r == 0) {
+ /* Child */
+ execl("/sbin/fsck", "/sbin/fsck", "-aT", node, NULL);
+ log_debug_errno(errno, "Failed to execl() fsck: %m");
+ _exit(FSCK_OPERATIONAL_ERROR);
+ }
+
+ exit_status = wait_for_terminate_and_check("fsck", pid, 0);
+ if (exit_status < 0)
+ return log_debug_errno(exit_status, "Failed to fork off /sbin/fsck: %m");
+
+ if ((exit_status & ~FSCK_ERROR_CORRECTED) != FSCK_SUCCESS) {
+ log_debug("fsck failed with exit status %i.", exit_status);
+
+ if ((exit_status & (FSCK_SYSTEM_SHOULD_REBOOT|FSCK_ERRORS_LEFT_UNCORRECTED)) != 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EUCLEAN), "File system is corrupted, refusing.");
+
+ log_debug("Ignoring fsck error.");
+ }
+
+ return 0;
+}
+
+static int mount_partition(
+ DissectedPartition *m,
+ const char *where,
+ const char *directory,
+ uid_t uid_shift,
+ DissectImageFlags flags) {
+
+ _cleanup_free_ char *chased = NULL, *options = NULL;
+ const char *p, *node, *fstype;
+ bool rw;
+ int r;
+
+ assert(m);
+ assert(where);
+
+ /* Use decrypted node and matching fstype if available, otherwise use the original device */
+ node = m->decrypted_node ?: m->node;
+ fstype = m->decrypted_node ? m->decrypted_fstype: m->fstype;
+
+ if (!m->found || !node)
+ return 0;
+ if (!fstype)
+ return -EAFNOSUPPORT;
+
+ /* We are looking at an encrypted partition? This either means stacked encryption, or the caller didn't call dissected_image_decrypt() beforehand. Let's return a recognizable error for this case. */
+ if (streq(fstype, "crypto_LUKS"))
+ return -EUNATCH;
+
+ rw = m->rw && !(flags & DISSECT_IMAGE_READ_ONLY);
+
+ if (FLAGS_SET(flags, DISSECT_IMAGE_FSCK) && rw) {
+ r = run_fsck(node, fstype);
+ if (r < 0)
+ return r;
+ }
+
+ if (directory) {
+ if (!FLAGS_SET(flags, DISSECT_IMAGE_READ_ONLY)) {
+ /* Automatically create missing mount points, if necessary. */
+ r = mkdir_p_root(where, directory, uid_shift, (gid_t) uid_shift, 0755);
+ if (r < 0)
+ return r;
+ }
+
+ r = chase_symlinks(directory, where, CHASE_PREFIX_ROOT, &chased, NULL);
+ if (r < 0)
+ return r;
+
+ p = chased;
+ } else
+ p = where;
+
+ /* If requested, turn on discard support. */
+ if (fstype_can_discard(fstype) &&
+ ((flags & DISSECT_IMAGE_DISCARD) ||
+ ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node) > 0))) {
+ options = strdup("discard");
+ if (!options)
+ return -ENOMEM;
+ }
+
+ if (uid_is_valid(uid_shift) && uid_shift != 0 && fstype_can_uid_gid(fstype)) {
+ _cleanup_free_ char *uid_option = NULL;
+
+ if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
+ return -ENOMEM;
+
+ if (!strextend_with_separator(&options, ",", uid_option, NULL))
+ return -ENOMEM;
+ }
+
+ if (!isempty(m->mount_options))
+ if (!strextend_with_separator(&options, ",", m->mount_options, NULL))
+ return -ENOMEM;
+
+ if (FLAGS_SET(flags, DISSECT_IMAGE_MKDIR)) {
+ r = mkdir_p(p, 0755);
+ if (r < 0)
+ return r;
+ }
+
+ r = mount_nofollow_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
+ int r, xbootldr_mounted;
+
+ assert(m);
+ assert(where);
+
+ /* Returns:
+ *
+ * -ENXIO → No root partition found
+ * -EMEDIUMTYPE → DISSECT_IMAGE_VALIDATE_OS set but no os-release file found
+ * -EUNATCH → Encrypted partition found for which no dm-crypt was set up yet
+ * -EUCLEAN → fsck for file system failed
+ * -EBUSY → File system already mounted/used elsewhere (kernel)
+ * -EAFNOSUPPORT → File system type not supported or not known
+ */
+
+ if (!m->partitions[PARTITION_ROOT].found)
+ return -ENXIO;
+
+ if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
+ r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, flags);
+ if (r < 0)
+ return r;
+ }
+
+ /* Mask DISSECT_IMAGE_MKDIR for all subdirs: the idea is that only the top-level mount point is
+ * created if needed, but the image itself not modified. */
+ flags &= ~DISSECT_IMAGE_MKDIR;
+
+ if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
+ /* For us mounting root always means mounting /usr as well */
+ r = mount_partition(m->partitions + PARTITION_USR, where, "/usr", uid_shift, flags);
+ if (r < 0)
+ return r;
+
+ if (flags & DISSECT_IMAGE_VALIDATE_OS) {
+ r = path_is_os_tree(where);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EMEDIUMTYPE;
+ }
+ }
+
+ if (flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY)
+ return 0;
+
+ r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, flags);
+ if (r < 0)
+ return r;
+
+ r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, flags);
+ if (r < 0)
+ return r;
+
+ r = mount_partition(m->partitions + PARTITION_VAR, where, "/var", uid_shift, flags);
+ if (r < 0)
+ return r;
+
+ r = mount_partition(m->partitions + PARTITION_TMP, where, "/var/tmp", uid_shift, flags);
+ if (r < 0)
+ return r;
+
+ xbootldr_mounted = mount_partition(m->partitions + PARTITION_XBOOTLDR, where, "/boot", uid_shift, flags);
+ if (xbootldr_mounted < 0)
+ return xbootldr_mounted;
+
+ if (m->partitions[PARTITION_ESP].found) {
+ int esp_done = false;
+
+ /* Mount the ESP to /efi if it exists. If it doesn't exist, use /boot instead, but only if it
+ * exists and is empty, and we didn't already mount the XBOOTLDR partition into it. */
+
+ r = chase_symlinks("/efi", where, CHASE_PREFIX_ROOT, NULL, NULL);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return r;
+
+ /* /efi doesn't exist. Let's see if /boot is suitable then */
+
+ if (!xbootldr_mounted) {
+ _cleanup_free_ char *p = NULL;
+
+ r = chase_symlinks("/boot", where, CHASE_PREFIX_ROOT, &p, NULL);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return r;
+ } else if (dir_is_empty(p) > 0) {
+ /* It exists and is an empty directory. Let's mount the ESP there. */
+ r = mount_partition(m->partitions + PARTITION_ESP, where, "/boot", uid_shift, flags);
+ if (r < 0)
+ return r;
+
+ esp_done = true;
+ }
+ }
+ }
+
+ if (!esp_done) {
+ /* OK, let's mount the ESP now to /efi (possibly creating the dir if missing) */
+
+ r = mount_partition(m->partitions + PARTITION_ESP, where, "/efi", uid_shift, flags);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+int dissected_image_mount_and_warn(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
+ int r;
+
+ assert(m);
+ assert(where);
+
+ r = dissected_image_mount(m, where, uid_shift, flags);
+ if (r == -ENXIO)
+ return log_error_errno(r, "Not root file system found in image.");
+ if (r == -EMEDIUMTYPE)
+ return log_error_errno(r, "No suitable os-release file in image found.");
+ if (r == -EUNATCH)
+ return log_error_errno(r, "Encrypted file system discovered, but decryption not requested.");
+ if (r == -EUCLEAN)
+ return log_error_errno(r, "File system check on image failed.");
+ if (r == -EBUSY)
+ return log_error_errno(r, "File system already mounted elsewhere.");
+ if (r == -EAFNOSUPPORT)
+ return log_error_errno(r, "File system type not supported or not known.");
+ if (r < 0)
+ return log_error_errno(r, "Failed to mount image: %m");
+
+ return r;
+}
+
+#if HAVE_LIBCRYPTSETUP
+typedef struct DecryptedPartition {
+ struct crypt_device *device;
+ char *name;
+ bool relinquished;
+} DecryptedPartition;
+
+struct DecryptedImage {
+ DecryptedPartition *decrypted;
+ size_t n_decrypted;
+ size_t n_allocated;
+};
+#endif
+
+DecryptedImage* decrypted_image_unref(DecryptedImage* d) {
+#if HAVE_LIBCRYPTSETUP
+ size_t i;
+ int r;
+
+ if (!d)
+ return NULL;
+
+ for (i = 0; i < d->n_decrypted; i++) {
+ DecryptedPartition *p = d->decrypted + i;
+
+ if (p->device && p->name && !p->relinquished) {
+ r = sym_crypt_deactivate_by_name(p->device, p->name, 0);
+ if (r < 0)
+ log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name);
+ }
+
+ if (p->device)
+ sym_crypt_free(p->device);
+ free(p->name);
+ }
+
+ free(d);
+#endif
+ return NULL;
+}
+
+#if HAVE_LIBCRYPTSETUP
+
+static int make_dm_name_and_node(const void *original_node, const char *suffix, char **ret_name, char **ret_node) {
+ _cleanup_free_ char *name = NULL, *node = NULL;
+ const char *base;
+
+ assert(original_node);
+ assert(suffix);
+ assert(ret_name);
+ assert(ret_node);
+
+ base = strrchr(original_node, '/');
+ if (!base)
+ base = original_node;
+ else
+ base++;
+ if (isempty(base))
+ return -EINVAL;
+
+ name = strjoin(base, suffix);
+ if (!name)
+ return -ENOMEM;
+ if (!filename_is_valid(name))
+ return -EINVAL;
+
+ node = path_join(sym_crypt_get_dir(), name);
+ if (!node)
+ return -ENOMEM;
+
+ *ret_name = TAKE_PTR(name);
+ *ret_node = TAKE_PTR(node);
+
+ return 0;
+}
+
+static int decrypt_partition(
+ DissectedPartition *m,
+ const char *passphrase,
+ DissectImageFlags flags,
+ DecryptedImage *d) {
+
+ _cleanup_free_ char *node = NULL, *name = NULL;
+ _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
+ int r;
+
+ assert(m);
+ assert(d);
+
+ if (!m->found || !m->node || !m->fstype)
+ return 0;
+
+ if (!streq(m->fstype, "crypto_LUKS"))
+ return 0;
+
+ if (!passphrase)
+ return -ENOKEY;
+
+ r = dlopen_cryptsetup();
+ if (r < 0)
+ return r;
+
+ r = make_dm_name_and_node(m->node, "-decrypted", &name, &node);
+ if (r < 0)
+ return r;
+
+ if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
+ return -ENOMEM;
+
+ r = sym_crypt_init(&cd, m->node);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to initialize dm-crypt: %m");
+
+ cryptsetup_enable_logging(cd);
+
+ r = sym_crypt_load(cd, CRYPT_LUKS, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to load LUKS metadata: %m");
+
+ r = sym_crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase),
+ ((flags & DISSECT_IMAGE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) |
+ ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to activate LUKS device: %m");
+ return r == -EPERM ? -EKEYREJECTED : r;
+ }
+
+ d->decrypted[d->n_decrypted++] = (DecryptedPartition) {
+ .name = TAKE_PTR(name),
+ .device = TAKE_PTR(cd),
+ };
+
+ m->decrypted_node = TAKE_PTR(node);
+
+ return 0;
+}
+
+static int verity_can_reuse(
+ const VeritySettings *verity,
+ const char *name,
+ struct crypt_device **ret_cd) {
+
+ /* If the same volume was already open, check that the root hashes match, and reuse it if they do */
+ _cleanup_free_ char *root_hash_existing = NULL;
+ _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
+ struct crypt_params_verity crypt_params = {};
+ size_t root_hash_existing_size;
+ int r;
+
+ assert(verity);
+ assert(name);
+ assert(ret_cd);
+
+ r = sym_crypt_init_by_name(&cd, name);
+ if (r < 0)
+ return log_debug_errno(r, "Error opening verity device, crypt_init_by_name failed: %m");
+
+ r = sym_crypt_get_verity_info(cd, &crypt_params);
+ if (r < 0)
+ return log_debug_errno(r, "Error opening verity device, crypt_get_verity_info failed: %m");
+
+ root_hash_existing_size = verity->root_hash_size;
+ root_hash_existing = malloc0(root_hash_existing_size);
+ if (!root_hash_existing)
+ return -ENOMEM;
+
+ r = sym_crypt_volume_key_get(cd, CRYPT_ANY_SLOT, root_hash_existing, &root_hash_existing_size, NULL, 0);
+ if (r < 0)
+ return log_debug_errno(r, "Error opening verity device, crypt_volume_key_get failed: %m");
+ if (verity->root_hash_size != root_hash_existing_size ||
+ memcmp(root_hash_existing, verity->root_hash, verity->root_hash_size) != 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Error opening verity device, it already exists but root hashes are different.");
+
+#if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY
+ /* Ensure that, if signatures are supported, we only reuse the device if the previous mount used the
+ * same settings, so that a previous unsigned mount will not be reused if the user asks to use
+ * signing for the new one, and viceversa. */
+ if (!!verity->root_hash_sig != !!(crypt_params.flags & CRYPT_VERITY_ROOT_HASH_SIGNATURE))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Error opening verity device, it already exists but signature settings are not the same.");
+#endif
+
+ *ret_cd = TAKE_PTR(cd);
+ return 0;
+}
+
+static inline void dm_deferred_remove_clean(char *name) {
+ if (!name)
+ return;
+
+ (void) sym_crypt_deactivate_by_name(NULL, name, CRYPT_DEACTIVATE_DEFERRED);
+ free(name);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(char *, dm_deferred_remove_clean);
+
+static int verity_partition(
+ PartitionDesignator designator,
+ DissectedPartition *m,
+ DissectedPartition *v,
+ const VeritySettings *verity,
+ DissectImageFlags flags,
+ DecryptedImage *d) {
+
+ _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
+ _cleanup_(dm_deferred_remove_cleanp) char *restore_deferred_remove = NULL;
+ _cleanup_free_ char *node = NULL, *name = NULL;
+ int r;
+
+ assert(m);
+ assert(v || (verity && verity->data_path));
+
+ if (!verity || !verity->root_hash)
+ return 0;
+ if (!((verity->designator < 0 && designator == PARTITION_ROOT) ||
+ (verity->designator == designator)))
+ return 0;
+
+ if (!m->found || !m->node || !m->fstype)
+ return 0;
+ if (!verity->data_path) {
+ if (!v->found || !v->node || !v->fstype)
+ return 0;
+
+ if (!streq(v->fstype, "DM_verity_hash"))
+ return 0;
+ }
+
+ r = dlopen_cryptsetup();
+ if (r < 0)
+ return r;
+
+ if (FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE)) {
+ /* Use the roothash, which is unique per volume, as the device node name, so that it can be reused */
+ _cleanup_free_ char *root_hash_encoded = NULL;
+
+ root_hash_encoded = hexmem(verity->root_hash, verity->root_hash_size);
+ if (!root_hash_encoded)
+ return -ENOMEM;
+
+ r = make_dm_name_and_node(root_hash_encoded, "-verity", &name, &node);
+ } else
+ r = make_dm_name_and_node(m->node, "-verity", &name, &node);
+ if (r < 0)
+ return r;
+
+ r = sym_crypt_init(&cd, verity->data_path ?: v->node);
+ if (r < 0)
+ return r;
+
+ cryptsetup_enable_logging(cd);
+
+ r = sym_crypt_load(cd, CRYPT_VERITY, NULL);
+ if (r < 0)
+ return r;
+
+ r = sym_crypt_set_data_device(cd, m->node);
+ if (r < 0)
+ return r;
+
+ if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
+ return -ENOMEM;
+
+ /* If activating fails because the device already exists, check the metadata and reuse it if it matches.
+ * In case of ENODEV/ENOENT, which can happen if another process is activating at the exact same time,
+ * retry a few times before giving up. */
+ for (unsigned i = 0; i < N_DEVICE_NODE_LIST_ATTEMPTS; i++) {
+ if (verity->root_hash_sig) {
+#if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY
+ r = sym_crypt_activate_by_signed_key(
+ cd,
+ name,
+ verity->root_hash,
+ verity->root_hash_size,
+ verity->root_hash_sig,
+ verity->root_hash_sig_size,
+ CRYPT_ACTIVATE_READONLY);
+#else
+ r = log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Activation of verity device with signature requested, but not supported by %s due to missing crypt_activate_by_signed_key().", program_invocation_short_name);
+#endif
+ } else
+ r = sym_crypt_activate_by_volume_key(
+ cd,
+ name,
+ verity->root_hash,
+ verity->root_hash_size,
+ CRYPT_ACTIVATE_READONLY);
+ /* libdevmapper can return EINVAL when the device is already in the activation stage.
+ * There's no way to distinguish this situation from a genuine error due to invalid
+ * parameters, so immediately fall back to activating the device with a unique name.
+ * Improvements in libcrypsetup can ensure this never happens:
+ * https://gitlab.com/cryptsetup/cryptsetup/-/merge_requests/96 */
+ if (r == -EINVAL && FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE))
+ return verity_partition(designator, m, v, verity, flags & ~DISSECT_IMAGE_VERITY_SHARE, d);
+ if (!IN_SET(r,
+ 0, /* Success */
+ -EEXIST, /* Volume is already open and ready to be used */
+ -EBUSY, /* Volume is being opened but not ready, crypt_init_by_name can fetch details */
+ -ENODEV /* Volume is being opened but not ready, crypt_init_by_name would fail, try to open again */))
+ return r;
+ if (IN_SET(r, -EEXIST, -EBUSY)) {
+ struct crypt_device *existing_cd = NULL;
+
+ if (!restore_deferred_remove){
+ /* To avoid races, disable automatic removal on umount while setting up the new device. Restore it on failure. */
+ r = dm_deferred_remove_cancel(name);
+ /* If activation returns EBUSY there might be no deferred removal to cancel, that's fine */
+ if (r < 0 && r != -ENXIO)
+ return log_debug_errno(r, "Disabling automated deferred removal for verity device %s failed: %m", node);
+ if (r == 0) {
+ restore_deferred_remove = strdup(name);
+ if (!restore_deferred_remove)
+ return -ENOMEM;
+ }
+ }
+
+ r = verity_can_reuse(verity, name, &existing_cd);
+ /* Same as above, -EINVAL can randomly happen when it actually means -EEXIST */
+ if (r == -EINVAL && FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE))
+ return verity_partition(designator, m, v, verity, flags & ~DISSECT_IMAGE_VERITY_SHARE, d);
+ if (!IN_SET(r, 0, -ENODEV, -ENOENT, -EBUSY))
+ return log_debug_errno(r, "Checking whether existing verity device %s can be reused failed: %m", node);
+ if (r == 0) {
+ /* devmapper might say that the device exists, but the devlink might not yet have been
+ * created. Check and wait for the udev event in that case. */
+ r = device_wait_for_devlink(node, "block", usec_add(now(CLOCK_MONOTONIC), 100 * USEC_PER_MSEC), NULL);
+ /* Fallback to activation with a unique device if it's taking too long */
+ if (r == -ETIMEDOUT)
+ break;
+ if (r < 0)
+ return r;
+
+ if (cd)
+ sym_crypt_free(cd);
+ cd = existing_cd;
+ }
+ }
+ if (r == 0)
+ break;
+
+ /* Device is being opened by another process, but it has not finished yet, yield for 2ms */
+ (void) usleep(2 * USEC_PER_MSEC);
+ }
+
+ /* An existing verity device was reported by libcryptsetup/libdevmapper, but we can't use it at this time.
+ * Fall back to activating it with a unique device name. */
+ if (r != 0 && FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE))
+ return verity_partition(designator, m, v, verity, flags & ~DISSECT_IMAGE_VERITY_SHARE, d);
+
+ /* Everything looks good and we'll be able to mount the device, so deferred remove will be re-enabled at that point. */
+ restore_deferred_remove = mfree(restore_deferred_remove);
+
+ d->decrypted[d->n_decrypted++] = (DecryptedPartition) {
+ .name = TAKE_PTR(name),
+ .device = TAKE_PTR(cd),
+ };
+
+ m->decrypted_node = TAKE_PTR(node);
+
+ return 0;
+}
+#endif
+
+int dissected_image_decrypt(
+ DissectedImage *m,
+ const char *passphrase,
+ const VeritySettings *verity,
+ DissectImageFlags flags,
+ DecryptedImage **ret) {
+
+#if HAVE_LIBCRYPTSETUP
+ _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL;
+ int r;
+#endif
+
+ assert(m);
+ assert(!verity || verity->root_hash || verity->root_hash_size == 0);
+
+ /* Returns:
+ *
+ * = 0 → There was nothing to decrypt
+ * > 0 → Decrypted successfully
+ * -ENOKEY → There's something to decrypt but no key was supplied
+ * -EKEYREJECTED → Passed key was not correct
+ */
+
+ if (verity && verity->root_hash && verity->root_hash_size < sizeof(sd_id128_t))
+ return -EINVAL;
+
+ if (!m->encrypted && !m->verity) {
+ *ret = NULL;
+ return 0;
+ }
+
+#if HAVE_LIBCRYPTSETUP
+ d = new0(DecryptedImage, 1);
+ if (!d)
+ return -ENOMEM;
+
+ for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
+ DissectedPartition *p = m->partitions + i;
+ PartitionDesignator k;
+
+ if (!p->found)
+ continue;
+
+ r = decrypt_partition(p, passphrase, flags, d);
+ if (r < 0)
+ return r;
+
+ k = PARTITION_VERITY_OF(i);
+ if (k >= 0) {
+ r = verity_partition(i, p, m->partitions + k, verity, flags | DISSECT_IMAGE_VERITY_SHARE, d);
+ if (r < 0)
+ return r;
+ }
+
+ if (!p->decrypted_fstype && p->decrypted_node) {
+ r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype);
+ if (r < 0 && r != -EUCLEAN)
+ return r;
+ }
+ }
+
+ *ret = TAKE_PTR(d);
+
+ return 1;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+int dissected_image_decrypt_interactively(
+ DissectedImage *m,
+ const char *passphrase,
+ const VeritySettings *verity,
+ DissectImageFlags flags,
+ DecryptedImage **ret) {
+
+ _cleanup_strv_free_erase_ char **z = NULL;
+ int n = 3, r;
+
+ if (passphrase)
+ n--;
+
+ for (;;) {
+ r = dissected_image_decrypt(m, passphrase, verity, flags, ret);
+ if (r >= 0)
+ return r;
+ if (r == -EKEYREJECTED)
+ log_error_errno(r, "Incorrect passphrase, try again!");
+ else if (r != -ENOKEY)
+ return log_error_errno(r, "Failed to decrypt image: %m");
+
+ if (--n < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EKEYREJECTED),
+ "Too many retries.");
+
+ z = strv_free(z);
+
+ r = ask_password_auto("Please enter image passphrase:", NULL, "dissect", "dissect", USEC_INFINITY, 0, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query for passphrase: %m");
+
+ passphrase = z[0];
+ }
+}
+
+int decrypted_image_relinquish(DecryptedImage *d) {
+
+#if HAVE_LIBCRYPTSETUP
+ size_t i;
+ int r;
+#endif
+
+ assert(d);
+
+ /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a boolean so
+ * that we don't clean it up ourselves either anymore */
+
+#if HAVE_LIBCRYPTSETUP
+ for (i = 0; i < d->n_decrypted; i++) {
+ DecryptedPartition *p = d->decrypted + i;
+
+ if (p->relinquished)
+ continue;
+
+ r = sym_crypt_deactivate_by_name(NULL, p->name, CRYPT_DEACTIVATE_DEFERRED);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name);
+
+ p->relinquished = true;
+ }
+#endif
+
+ return 0;
+}
+
+static char *build_auxiliary_path(const char *image, const char *suffix) {
+ const char *e;
+ char *n;
+
+ assert(image);
+ assert(suffix);
+
+ e = endswith(image, ".raw");
+ if (!e)
+ return strjoin(e, suffix);
+
+ n = new(char, e - image + strlen(suffix) + 1);
+ if (!n)
+ return NULL;
+
+ strcpy(mempcpy(n, image, e - image), suffix);
+ return n;
+}
+
+void verity_settings_done(VeritySettings *v) {
+ assert(v);
+
+ v->root_hash = mfree(v->root_hash);
+ v->root_hash_size = 0;
+
+ v->root_hash_sig = mfree(v->root_hash_sig);
+ v->root_hash_sig_size = 0;
+
+ v->data_path = mfree(v->data_path);
+}
+
+int verity_settings_load(
+ VeritySettings *verity,
+ const char *image,
+ const char *root_hash_path,
+ const char *root_hash_sig_path) {
+
+ _cleanup_free_ void *root_hash = NULL, *root_hash_sig = NULL;
+ size_t root_hash_size = 0, root_hash_sig_size = 0;
+ _cleanup_free_ char *verity_data_path = NULL;
+ PartitionDesignator designator;
+ int r;
+
+ assert(verity);
+ assert(image);
+ assert(verity->designator < 0 || IN_SET(verity->designator, PARTITION_ROOT, PARTITION_USR));
+
+ /* If we are asked to load the root hash for a device node, exit early */
+ if (is_device_path(image))
+ return 0;
+
+ designator = verity->designator;
+
+ /* We only fill in what isn't already filled in */
+
+ if (!verity->root_hash) {
+ _cleanup_free_ char *text = NULL;
+
+ if (root_hash_path) {
+ /* If explicitly specified it takes precedence */
+ r = read_one_line_file(root_hash_path, &text);
+ if (r < 0)
+ return r;
+
+ if (designator < 0)
+ designator = PARTITION_ROOT;
+ } else {
+ /* Otherwise look for xattr and separate file, and first for the data for root and if
+ * that doesn't exist for /usr */
+
+ if (designator < 0 || designator == PARTITION_ROOT) {
+ r = getxattr_malloc(image, "user.verity.roothash", &text, true);
+ if (r < 0) {
+ _cleanup_free_ char *p = NULL;
+
+ if (!IN_SET(r, -ENODATA, -ENOENT) && !ERRNO_IS_NOT_SUPPORTED(r))
+ return r;
+
+ p = build_auxiliary_path(image, ".roothash");
+ if (!p)
+ return -ENOMEM;
+
+ r = read_one_line_file(p, &text);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ }
+
+ if (text)
+ designator = PARTITION_ROOT;
+ }
+
+ if (!text && (designator < 0 || designator == PARTITION_USR)) {
+ /* So in the "roothash" xattr/file name above the "root" of course primarily
+ * refers to the root of the Verity Merkle tree. But coincidentally it also
+ * is the hash for the *root* file system, i.e. the "root" neatly refers to
+ * two distinct concepts called "root". Taking benefit of this happy
+ * coincidence we call the file with the root hash for the /usr/ file system
+ * `usrhash`, because `usrroothash` or `rootusrhash` would just be too
+ * confusing. We thus drop the reference to the root of the Merkle tree, and
+ * just indicate which file system it's about. */
+ r = getxattr_malloc(image, "user.verity.usrhash", &text, true);
+ if (r < 0) {
+ _cleanup_free_ char *p = NULL;
+
+ if (!IN_SET(r, -ENODATA, -ENOENT) && !ERRNO_IS_NOT_SUPPORTED(r))
+ return r;
+
+ p = build_auxiliary_path(image, ".usrhash");
+ if (!p)
+ return -ENOMEM;
+
+ r = read_one_line_file(p, &text);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ }
+
+ if (text)
+ designator = PARTITION_USR;
+ }
+ }
+
+ if (text) {
+ r = unhexmem(text, strlen(text), &root_hash, &root_hash_size);
+ if (r < 0)
+ return r;
+ if (root_hash_size < sizeof(sd_id128_t))
+ return -EINVAL;
+ }
+ }
+
+ if ((root_hash || verity->root_hash) && !verity->root_hash_sig) {
+ if (root_hash_sig_path) {
+ r = read_full_file_full(AT_FDCWD, root_hash_sig_path, 0, NULL, (char**) &root_hash_sig, &root_hash_sig_size);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ if (designator < 0)
+ designator = PARTITION_ROOT;
+ } else {
+ if (designator < 0 || designator == PARTITION_ROOT) {
+ _cleanup_free_ char *p = NULL;
+
+ /* Follow naming convention recommended by the relevant RFC:
+ * https://tools.ietf.org/html/rfc5751#section-3.2.1 */
+ p = build_auxiliary_path(image, ".roothash.p7s");
+ if (!p)
+ return -ENOMEM;
+
+ r = read_full_file_full(AT_FDCWD, p, 0, NULL, (char**) &root_hash_sig, &root_hash_sig_size);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ if (r >= 0)
+ designator = PARTITION_ROOT;
+ }
+
+ if (!root_hash_sig && (designator < 0 || designator == PARTITION_USR)) {
+ _cleanup_free_ char *p = NULL;
+
+ p = build_auxiliary_path(image, ".usrhash.p7s");
+ if (!p)
+ return -ENOMEM;
+
+ r = read_full_file_full(AT_FDCWD, p, 0, NULL, (char**) &root_hash_sig, &root_hash_sig_size);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ if (r >= 0)
+ designator = PARTITION_USR;
+ }
+ }
+
+ if (root_hash_sig && root_hash_sig_size == 0) /* refuse empty size signatures */
+ return -EINVAL;
+ }
+
+ if (!verity->data_path) {
+ _cleanup_free_ char *p = NULL;
+
+ p = build_auxiliary_path(image, ".verity");
+ if (!p)
+ return -ENOMEM;
+
+ if (access(p, F_OK) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+ } else
+ verity_data_path = TAKE_PTR(p);
+ }
+
+ if (root_hash) {
+ verity->root_hash = TAKE_PTR(root_hash);
+ verity->root_hash_size = root_hash_size;
+ }
+
+ if (root_hash_sig) {
+ verity->root_hash_sig = TAKE_PTR(root_hash_sig);
+ verity->root_hash_sig_size = root_hash_sig_size;
+ }
+
+ if (verity_data_path)
+ verity->data_path = TAKE_PTR(verity_data_path);
+
+ if (verity->designator < 0)
+ verity->designator = designator;
+
+ return 1;
+}
+
+int dissected_image_acquire_metadata(DissectedImage *m) {
+
+ enum {
+ META_HOSTNAME,
+ META_MACHINE_ID,
+ META_MACHINE_INFO,
+ META_OS_RELEASE,
+ _META_MAX,
+ };
+
+ static const char *const paths[_META_MAX] = {
+ [META_HOSTNAME] = "/etc/hostname\0",
+ [META_MACHINE_ID] = "/etc/machine-id\0",
+ [META_MACHINE_INFO] = "/etc/machine-info\0",
+ [META_OS_RELEASE] = "/etc/os-release\0"
+ "/usr/lib/os-release\0",
+ };
+
+ _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
+ _cleanup_close_pair_ int error_pipe[2] = { -1, -1 };
+ _cleanup_(rmdir_and_freep) char *t = NULL;
+ _cleanup_(sigkill_waitp) pid_t child = 0;
+ sd_id128_t machine_id = SD_ID128_NULL;
+ _cleanup_free_ char *hostname = NULL;
+ unsigned n_meta_initialized = 0, k;
+ int fds[2 * _META_MAX], r, v;
+ ssize_t n;
+
+ BLOCK_SIGNALS(SIGCHLD);
+
+ assert(m);
+
+ for (; n_meta_initialized < _META_MAX; n_meta_initialized ++)
+ if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ r = mkdtemp_malloc("/tmp/dissect-XXXXXX", &t);
+ if (r < 0)
+ goto finish;
+
+ if (pipe2(error_pipe, O_CLOEXEC) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE, &child);
+ if (r < 0)
+ goto finish;
+ if (r == 0) {
+ error_pipe[0] = safe_close(error_pipe[0]);
+
+ r = dissected_image_mount(m, t, UID_INVALID, DISSECT_IMAGE_READ_ONLY|DISSECT_IMAGE_MOUNT_ROOT_ONLY|DISSECT_IMAGE_VALIDATE_OS);
+ if (r < 0) {
+ /* Let parent know the error */
+ (void) write(error_pipe[1], &r, sizeof(r));
+
+ log_debug_errno(r, "Failed to mount dissected image: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ for (k = 0; k < _META_MAX; k++) {
+ _cleanup_close_ int fd = -ENOENT;
+ const char *p;
+
+ fds[2*k] = safe_close(fds[2*k]);
+
+ NULSTR_FOREACH(p, paths[k]) {
+ fd = chase_symlinks_and_open(p, t, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL);
+ if (fd >= 0)
+ break;
+ }
+ if (fd < 0) {
+ log_debug_errno(fd, "Failed to read %s file of image, ignoring: %m", paths[k]);
+ fds[2*k+1] = safe_close(fds[2*k+1]);
+ continue;
+ }
+
+ r = copy_bytes(fd, fds[2*k+1], (uint64_t) -1, 0);
+ if (r < 0) {
+ (void) write(error_pipe[1], &r, sizeof(r));
+ _exit(EXIT_FAILURE);
+ }
+
+ fds[2*k+1] = safe_close(fds[2*k+1]);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ error_pipe[1] = safe_close(error_pipe[1]);
+
+ for (k = 0; k < _META_MAX; k++) {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ fds[2*k+1] = safe_close(fds[2*k+1]);
+
+ f = take_fdopen(&fds[2*k], "r");
+ if (!f) {
+ r = -errno;
+ goto finish;
+ }
+
+ switch (k) {
+
+ case META_HOSTNAME:
+ r = read_etc_hostname_stream(f, &hostname);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read /etc/hostname: %m");
+
+ break;
+
+ case META_MACHINE_ID: {
+ _cleanup_free_ char *line = NULL;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read /etc/machine-id: %m");
+ else if (r == 33) {
+ r = sd_id128_from_string(line, &machine_id);
+ if (r < 0)
+ log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line);
+ } else if (r == 0)
+ log_debug("/etc/machine-id file is empty.");
+ else if (streq(line, "uninitialized"))
+ log_debug("/etc/machine-id file is uninitialized (likely aborted first boot).");
+ else
+ log_debug("/etc/machine-id has unexpected length %i.", r);
+
+ break;
+ }
+
+ case META_MACHINE_INFO:
+ r = load_env_file_pairs(f, "machine-info", &machine_info);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read /etc/machine-info: %m");
+
+ break;
+
+ case META_OS_RELEASE:
+ r = load_env_file_pairs(f, "os-release", &os_release);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read OS release file: %m");
+
+ break;
+ }
+ }
+
+ r = wait_for_terminate_and_check("(sd-dissect)", child, 0);
+ child = 0;
+ if (r < 0)
+ return r;
+
+ n = read(error_pipe[0], &v, sizeof(v));
+ if (n < 0)
+ return -errno;
+ if (n == sizeof(v))
+ return v; /* propagate error sent to us from child */
+ if (n != 0)
+ return -EIO;
+
+ if (r != EXIT_SUCCESS)
+ return -EPROTO;
+
+ free_and_replace(m->hostname, hostname);
+ m->machine_id = machine_id;
+ strv_free_and_replace(m->machine_info, machine_info);
+ strv_free_and_replace(m->os_release, os_release);
+
+finish:
+ for (k = 0; k < n_meta_initialized; k++)
+ safe_close_pair(fds + 2*k);
+
+ return r;
+}
+
+int dissect_image_and_warn(
+ int fd,
+ const char *name,
+ const VeritySettings *verity,
+ const MountOptions *mount_options,
+ DissectImageFlags flags,
+ DissectedImage **ret) {
+
+ _cleanup_free_ char *buffer = NULL;
+ int r;
+
+ if (!name) {
+ r = fd_get_path(fd, &buffer);
+ if (r < 0)
+ return r;
+
+ name = buffer;
+ }
+
+ r = dissect_image(fd, verity, mount_options, flags, ret);
+ switch (r) {
+
+ case -EOPNOTSUPP:
+ return log_error_errno(r, "Dissecting images is not supported, compiled without blkid support.");
+
+ case -ENOPKG:
+ return log_error_errno(r, "Couldn't identify a suitable partition table or file system in '%s'.", name);
+
+ case -EADDRNOTAVAIL:
+ return log_error_errno(r, "No root partition for specified root hash found in '%s'.", name);
+
+ case -ENOTUNIQ:
+ return log_error_errno(r, "Multiple suitable root partitions found in image '%s'.", name);
+
+ case -ENXIO:
+ return log_error_errno(r, "No suitable root partition found in image '%s'.", name);
+
+ case -EPROTONOSUPPORT:
+ return log_error_errno(r, "Device '%s' is loopback block device with partition scanning turned off, please turn it on.", name);
+
+ default:
+ if (r < 0)
+ return log_error_errno(r, "Failed to dissect image '%s': %m", name);
+
+ return r;
+ }
+}
+
+bool dissected_image_can_do_verity(const DissectedImage *image, PartitionDesignator partition_designator) {
+ if (image->single_file_system)
+ return partition_designator == PARTITION_ROOT && image->can_verity;
+
+ return PARTITION_VERITY_OF(partition_designator) >= 0;
+}
+
+bool dissected_image_has_verity(const DissectedImage *image, PartitionDesignator partition_designator) {
+ int k;
+
+ if (image->single_file_system)
+ return partition_designator == PARTITION_ROOT && image->verity;
+
+ k = PARTITION_VERITY_OF(partition_designator);
+ return k >= 0 && image->partitions[k].found;
+}
+
+MountOptions* mount_options_free_all(MountOptions *options) {
+ MountOptions *m;
+
+ while ((m = options)) {
+ LIST_REMOVE(mount_options, options, m);
+ free(m->options);
+ free(m);
+ }
+
+ return NULL;
+}
+
+const char* mount_options_from_designator(const MountOptions *options, PartitionDesignator designator) {
+ const MountOptions *m;
+
+ LIST_FOREACH(mount_options, m, options)
+ if (designator == m->partition_designator && !isempty(m->options))
+ return m->options;
+
+ return NULL;
+}
+
+int mount_image_privately_interactively(
+ const char *image,
+ DissectImageFlags flags,
+ char **ret_directory,
+ LoopDevice **ret_loop_device,
+ DecryptedImage **ret_decrypted_image) {
+
+ _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
+ _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL;
+ _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
+ _cleanup_(rmdir_and_freep) char *created_dir = NULL;
+ _cleanup_free_ char *temp = NULL;
+ int r;
+
+ /* Mounts an OS image at a temporary place, inside a newly created mount namespace of our own. This
+ * is used by tools such as systemd-tmpfiles or systemd-firstboot to operate on some disk image
+ * easily. */
+
+ assert(image);
+ assert(ret_directory);
+ assert(ret_loop_device);
+ assert(ret_decrypted_image);
+
+ r = tempfn_random_child(NULL, program_invocation_short_name, &temp);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate temporary mount directory: %m");
+
+ r = loop_device_make_by_path(
+ image,
+ FLAGS_SET(flags, DISSECT_IMAGE_READ_ONLY) ? O_RDONLY : O_RDWR,
+ FLAGS_SET(flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN,
+ &d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up loopback device: %m");
+
+ r = dissect_image_and_warn(d->fd, image, NULL, NULL, flags, &dissected_image);
+ if (r < 0)
+ return r;
+
+ r = dissected_image_decrypt_interactively(dissected_image, NULL, NULL, flags, &decrypted_image);
+ if (r < 0)
+ return r;
+
+ r = detach_mount_namespace();
+ if (r < 0)
+ return log_error_errno(r, "Failed to detach mount namespace: %m");
+
+ r = mkdir_p(temp, 0700);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create mount point: %m");
+
+ created_dir = TAKE_PTR(temp);
+
+ r = dissected_image_mount_and_warn(dissected_image, created_dir, UID_INVALID, flags);
+ if (r < 0)
+ return r;
+
+ if (decrypted_image) {
+ r = decrypted_image_relinquish(decrypted_image);
+ if (r < 0)
+ return log_error_errno(r, "Failed to relinquish DM devices: %m");
+ }
+
+ loop_device_relinquish(d);
+
+ *ret_directory = TAKE_PTR(created_dir);
+ *ret_loop_device = TAKE_PTR(d);
+ *ret_decrypted_image = TAKE_PTR(decrypted_image);
+
+ return 0;
+}
+
+static const char *const partition_designator_table[] = {
+ [PARTITION_ROOT] = "root",
+ [PARTITION_ROOT_SECONDARY] = "root-secondary",
+ [PARTITION_USR] = "usr",
+ [PARTITION_USR_SECONDARY] = "usr-secondary",
+ [PARTITION_HOME] = "home",
+ [PARTITION_SRV] = "srv",
+ [PARTITION_ESP] = "esp",
+ [PARTITION_XBOOTLDR] = "xbootldr",
+ [PARTITION_SWAP] = "swap",
+ [PARTITION_ROOT_VERITY] = "root-verity",
+ [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity",
+ [PARTITION_USR_VERITY] = "usr-verity",
+ [PARTITION_USR_SECONDARY_VERITY] = "usr-secondary-verity",
+ [PARTITION_TMP] = "tmp",
+ [PARTITION_VAR] = "var",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(partition_designator, PartitionDesignator);
diff --git a/src/shared/dissect-image.h b/src/shared/dissect-image.h
new file mode 100644
index 0000000..3b30e08
--- /dev/null
+++ b/src/shared/dissect-image.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-id128.h"
+
+#include "list.h"
+#include "loop-util.h"
+#include "macro.h"
+
+typedef struct DissectedImage DissectedImage;
+typedef struct DissectedPartition DissectedPartition;
+typedef struct DecryptedImage DecryptedImage;
+typedef struct MountOptions MountOptions;
+typedef struct VeritySettings VeritySettings;
+
+struct DissectedPartition {
+ bool found:1;
+ bool rw:1;
+ int partno; /* -1 if there was no partition and the images contains a file system directly */
+ int architecture; /* Intended architecture: either native, secondary or unset (-1). */
+ sd_id128_t uuid; /* Partition entry UUID as reported by the GPT */
+ char *fstype;
+ char *node;
+ char *decrypted_node;
+ char *decrypted_fstype;
+ char *mount_options;
+};
+
+typedef enum PartitionDesignator {
+ PARTITION_ROOT,
+ PARTITION_ROOT_SECONDARY, /* Secondary architecture */
+ PARTITION_USR,
+ PARTITION_USR_SECONDARY,
+ PARTITION_HOME,
+ PARTITION_SRV,
+ PARTITION_ESP,
+ PARTITION_XBOOTLDR,
+ PARTITION_SWAP,
+ PARTITION_ROOT_VERITY, /* verity data for the PARTITION_ROOT partition */
+ PARTITION_ROOT_SECONDARY_VERITY, /* verity data for the PARTITION_ROOT_SECONDARY partition */
+ PARTITION_USR_VERITY,
+ PARTITION_USR_SECONDARY_VERITY,
+ PARTITION_TMP,
+ PARTITION_VAR,
+ _PARTITION_DESIGNATOR_MAX,
+ _PARTITION_DESIGNATOR_INVALID = -1
+} PartitionDesignator;
+
+static inline PartitionDesignator PARTITION_VERITY_OF(PartitionDesignator p) {
+ switch (p) {
+
+ case PARTITION_ROOT:
+ return PARTITION_ROOT_VERITY;
+
+ case PARTITION_ROOT_SECONDARY:
+ return PARTITION_ROOT_SECONDARY_VERITY;
+
+ case PARTITION_USR:
+ return PARTITION_USR_VERITY;
+
+ case PARTITION_USR_SECONDARY:
+ return PARTITION_USR_SECONDARY_VERITY;
+
+ default:
+ return _PARTITION_DESIGNATOR_INVALID;
+ }
+}
+
+typedef enum DissectImageFlags {
+ DISSECT_IMAGE_READ_ONLY = 1 << 0,
+ DISSECT_IMAGE_DISCARD_ON_LOOP = 1 << 1, /* Turn on "discard" if on a loop device and file system supports it */
+ DISSECT_IMAGE_DISCARD = 1 << 2, /* Turn on "discard" if file system supports it, on all block devices */
+ DISSECT_IMAGE_DISCARD_ON_CRYPTO = 1 << 3, /* Turn on "discard" also on crypto devices */
+ DISSECT_IMAGE_DISCARD_ANY = DISSECT_IMAGE_DISCARD_ON_LOOP |
+ DISSECT_IMAGE_DISCARD |
+ DISSECT_IMAGE_DISCARD_ON_CRYPTO,
+ DISSECT_IMAGE_GPT_ONLY = 1 << 4, /* Only recognize images with GPT partition tables */
+ DISSECT_IMAGE_REQUIRE_ROOT = 1 << 5, /* Don't accept disks without root partition (and if no partition table or only single generic partition, assume it's root) */
+ DISSECT_IMAGE_MOUNT_ROOT_ONLY = 1 << 6, /* Mount only the root and /usr partitions */
+ DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY = 1 << 7, /* Mount only the non-root and non-/usr partitions */
+ DISSECT_IMAGE_VALIDATE_OS = 1 << 8, /* Refuse mounting images that aren't identifiable as OS images */
+ DISSECT_IMAGE_NO_UDEV = 1 << 9, /* Don't wait for udev initializing things */
+ DISSECT_IMAGE_RELAX_VAR_CHECK = 1 << 10, /* Don't insist that the UUID of /var is hashed from /etc/machine-id */
+ DISSECT_IMAGE_FSCK = 1 << 11, /* File system check the partition before mounting (no effect when combined with DISSECT_IMAGE_READ_ONLY) */
+ DISSECT_IMAGE_NO_PARTITION_TABLE = 1 << 12, /* Only recognize single file system images */
+ DISSECT_IMAGE_VERITY_SHARE = 1 << 13, /* When activating a verity device, reuse existing one if already open */
+ DISSECT_IMAGE_MKDIR = 1 << 14, /* Make directory to mount right before mounting, if missing */
+} DissectImageFlags;
+
+struct DissectedImage {
+ bool encrypted:1;
+ bool verity:1; /* verity available and usable */
+ bool can_verity:1; /* verity available, but not necessarily used */
+ bool single_file_system:1; /* MBR/GPT or single file system */
+
+ DissectedPartition partitions[_PARTITION_DESIGNATOR_MAX];
+
+ char *hostname;
+ sd_id128_t machine_id;
+ char **machine_info;
+ char **os_release;
+};
+
+struct MountOptions {
+ PartitionDesignator partition_designator;
+ char *options;
+ LIST_FIELDS(MountOptions, mount_options);
+};
+
+struct VeritySettings {
+ /* Binary root hash for the Verity Merkle tree */
+ void *root_hash;
+ size_t root_hash_size;
+
+ /* PKCS#7 signature of the above */
+ void *root_hash_sig;
+ size_t root_hash_sig_size;
+
+ /* Path to the verity data file, if stored externally */
+ char *data_path;
+
+ /* PARTITION_ROOT or PARTITION_USR, depending on what these Verity settings are for */
+ PartitionDesignator designator;
+};
+
+#define VERITY_SETTINGS_DEFAULT { \
+ .designator = _PARTITION_DESIGNATOR_INVALID \
+ }
+
+MountOptions* mount_options_free_all(MountOptions *options);
+DEFINE_TRIVIAL_CLEANUP_FUNC(MountOptions*, mount_options_free_all);
+const char* mount_options_from_designator(const MountOptions *options, PartitionDesignator designator);
+
+int probe_filesystem(const char *node, char **ret_fstype);
+int dissect_image(int fd, const VeritySettings *verity, const MountOptions *mount_options, DissectImageFlags flags, DissectedImage **ret);
+int dissect_image_and_warn(int fd, const char *name, const VeritySettings *verity, const MountOptions *mount_options, DissectImageFlags flags, DissectedImage **ret);
+
+DissectedImage* dissected_image_unref(DissectedImage *m);
+DEFINE_TRIVIAL_CLEANUP_FUNC(DissectedImage*, dissected_image_unref);
+
+int dissected_image_decrypt(DissectedImage *m, const char *passphrase, const VeritySettings *verity, DissectImageFlags flags, DecryptedImage **ret);
+int dissected_image_decrypt_interactively(DissectedImage *m, const char *passphrase, const VeritySettings *verity, DissectImageFlags flags, DecryptedImage **ret);
+int dissected_image_mount(DissectedImage *m, const char *dest, uid_t uid_shift, DissectImageFlags flags);
+int dissected_image_mount_and_warn(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags);
+
+int dissected_image_acquire_metadata(DissectedImage *m);
+
+DecryptedImage* decrypted_image_unref(DecryptedImage *p);
+DEFINE_TRIVIAL_CLEANUP_FUNC(DecryptedImage*, decrypted_image_unref);
+int decrypted_image_relinquish(DecryptedImage *d);
+
+const char* partition_designator_to_string(PartitionDesignator d) _const_;
+PartitionDesignator partition_designator_from_string(const char *name) _pure_;
+
+int verity_settings_load(VeritySettings *verity, const char *image, const char *root_hash_path, const char *root_hash_sig_path);
+void verity_settings_done(VeritySettings *verity);
+
+bool dissected_image_can_do_verity(const DissectedImage *image, PartitionDesignator d);
+bool dissected_image_has_verity(const DissectedImage *image, PartitionDesignator d);
+
+int mount_image_privately_interactively(const char *path, DissectImageFlags flags, char **ret_directory, LoopDevice **ret_loop_device, DecryptedImage **ret_decrypted_image);
diff --git a/src/shared/dm-util.c b/src/shared/dm-util.c
new file mode 100644
index 0000000..b48b9b5
--- /dev/null
+++ b/src/shared/dm-util.c
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <linux/dm-ioctl.h>
+#include <sys/ioctl.h>
+
+#include "dm-util.h"
+#include "fd-util.h"
+#include "string-util.h"
+
+int dm_deferred_remove_cancel(const char *name) {
+ _cleanup_close_ int fd = -1;
+ struct message {
+ struct dm_ioctl dm_ioctl;
+ struct dm_target_msg dm_target_msg;
+ char msg_text[STRLEN("@cancel_deferred_remove") + 1];
+ } _packed_ message = {
+ .dm_ioctl = {
+ .version = {
+ DM_VERSION_MAJOR,
+ DM_VERSION_MINOR,
+ DM_VERSION_PATCHLEVEL
+ },
+ .data_size = sizeof(struct message),
+ .data_start = sizeof(struct dm_ioctl),
+ },
+ .msg_text = "@cancel_deferred_remove",
+ };
+
+ assert(name);
+
+ if (strlen(name) >= sizeof(message.dm_ioctl.name))
+ return -ENODEV; /* A device with a name longer than this cannot possibly exist */
+
+ strncpy_exact(message.dm_ioctl.name, name, sizeof(message.dm_ioctl.name));
+
+ fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (ioctl(fd, DM_TARGET_MSG, &message))
+ return -errno;
+
+ return 0;
+}
diff --git a/src/shared/dm-util.h b/src/shared/dm-util.h
new file mode 100644
index 0000000..e6e3d7d
--- /dev/null
+++ b/src/shared/dm-util.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int dm_deferred_remove_cancel(const char *name);
diff --git a/src/shared/dns-domain.c b/src/shared/dns-domain.c
new file mode 100644
index 0000000..ec42b29
--- /dev/null
+++ b/src/shared/dns-domain.c
@@ -0,0 +1,1414 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <endian.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <sys/socket.h>
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "hashmap.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "idn-util.h"
+#include "in-addr-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+
+int dns_label_unescape(const char **name, char *dest, size_t sz, DNSLabelFlags flags) {
+ const char *n;
+ char *d, last_char = 0;
+ int r = 0;
+
+ assert(name);
+ assert(*name);
+
+ n = *name;
+ d = dest;
+
+ for (;;) {
+ if (IN_SET(*n, 0, '.')) {
+ if (FLAGS_SET(flags, DNS_LABEL_LDH) && last_char == '-')
+ /* Trailing dash */
+ return -EINVAL;
+
+ if (n[0] == '.' && (n[1] != 0 || !FLAGS_SET(flags, DNS_LABEL_LEAVE_TRAILING_DOT)))
+ n++;
+
+ break;
+ }
+
+ if (r >= DNS_LABEL_MAX)
+ return -EINVAL;
+
+ if (sz <= 0)
+ return -ENOBUFS;
+
+ if (*n == '\\') {
+ /* Escaped character */
+ if (FLAGS_SET(flags, DNS_LABEL_NO_ESCAPES))
+ return -EINVAL;
+
+ n++;
+
+ if (*n == 0)
+ /* Ending NUL */
+ return -EINVAL;
+
+ else if (IN_SET(*n, '\\', '.')) {
+ /* Escaped backslash or dot */
+
+ if (FLAGS_SET(flags, DNS_LABEL_LDH))
+ return -EINVAL;
+
+ last_char = *n;
+ if (d)
+ *(d++) = *n;
+ sz--;
+ r++;
+ n++;
+
+ } else if (n[0] >= '0' && n[0] <= '9') {
+ unsigned k;
+
+ /* Escaped literal ASCII character */
+
+ if (!(n[1] >= '0' && n[1] <= '9') ||
+ !(n[2] >= '0' && n[2] <= '9'))
+ return -EINVAL;
+
+ k = ((unsigned) (n[0] - '0') * 100) +
+ ((unsigned) (n[1] - '0') * 10) +
+ ((unsigned) (n[2] - '0'));
+
+ /* Don't allow anything that doesn't
+ * fit in 8bit. Note that we do allow
+ * control characters, as some servers
+ * (e.g. cloudflare) are happy to
+ * generate labels with them
+ * inside. */
+ if (k > 255)
+ return -EINVAL;
+
+ if (FLAGS_SET(flags, DNS_LABEL_LDH) &&
+ !valid_ldh_char((char) k))
+ return -EINVAL;
+
+ last_char = (char) k;
+ if (d)
+ *(d++) = (char) k;
+ sz--;
+ r++;
+
+ n += 3;
+ } else
+ return -EINVAL;
+
+ } else if ((uint8_t) *n >= (uint8_t) ' ' && *n != 127) {
+
+ /* Normal character */
+
+ if (FLAGS_SET(flags, DNS_LABEL_LDH)) {
+ if (!valid_ldh_char(*n))
+ return -EINVAL;
+ if (r == 0 && *n == '-')
+ /* Leading dash */
+ return -EINVAL;
+ }
+
+ last_char = *n;
+ if (d)
+ *(d++) = *n;
+ sz--;
+ r++;
+ n++;
+ } else
+ return -EINVAL;
+ }
+
+ /* Empty label that is not at the end? */
+ if (r == 0 && *n)
+ return -EINVAL;
+
+ /* More than one trailing dot? */
+ if (n[0] == '.' && !FLAGS_SET(flags, DNS_LABEL_LEAVE_TRAILING_DOT))
+ return -EINVAL;
+
+ if (sz >= 1 && d)
+ *d = 0;
+
+ *name = n;
+ return r;
+}
+
+/* @label_terminal: terminal character of a label, updated to point to the terminal character of
+ * the previous label (always skipping one dot) or to NULL if there are no more
+ * labels. */
+int dns_label_unescape_suffix(const char *name, const char **label_terminal, char *dest, size_t sz) {
+ const char *terminal;
+ int r;
+
+ assert(name);
+ assert(label_terminal);
+ assert(dest);
+
+ /* no more labels */
+ if (!*label_terminal) {
+ if (sz >= 1)
+ *dest = 0;
+
+ return 0;
+ }
+
+ terminal = *label_terminal;
+ assert(IN_SET(*terminal, 0, '.'));
+
+ /* Skip current terminal character (and accept domain names ending it ".") */
+ if (*terminal == 0)
+ terminal--;
+ if (terminal >= name && *terminal == '.')
+ terminal--;
+
+ /* Point name to the last label, and terminal to the preceding terminal symbol (or make it a NULL pointer) */
+ for (;;) {
+ if (terminal < name) {
+ /* Reached the first label, so indicate that there are no more */
+ terminal = NULL;
+ break;
+ }
+
+ /* Find the start of the last label */
+ if (*terminal == '.') {
+ const char *y;
+ unsigned slashes = 0;
+
+ for (y = terminal - 1; y >= name && *y == '\\'; y--)
+ slashes++;
+
+ if (slashes % 2 == 0) {
+ /* The '.' was not escaped */
+ name = terminal + 1;
+ break;
+ } else {
+ terminal = y;
+ continue;
+ }
+ }
+
+ terminal--;
+ }
+
+ r = dns_label_unescape(&name, dest, sz, 0);
+ if (r < 0)
+ return r;
+
+ *label_terminal = terminal;
+
+ return r;
+}
+
+int dns_label_escape(const char *p, size_t l, char *dest, size_t sz) {
+ char *q;
+
+ /* DNS labels must be between 1 and 63 characters long. A
+ * zero-length label does not exist. See RFC 2182, Section
+ * 11. */
+
+ if (l <= 0 || l > DNS_LABEL_MAX)
+ return -EINVAL;
+ if (sz < 1)
+ return -ENOBUFS;
+
+ assert(p);
+ assert(dest);
+
+ q = dest;
+ while (l > 0) {
+
+ if (IN_SET(*p, '.', '\\')) {
+
+ /* Dot or backslash */
+
+ if (sz < 3)
+ return -ENOBUFS;
+
+ *(q++) = '\\';
+ *(q++) = *p;
+
+ sz -= 2;
+
+ } else if (IN_SET(*p, '_', '-') ||
+ (*p >= '0' && *p <= '9') ||
+ (*p >= 'a' && *p <= 'z') ||
+ (*p >= 'A' && *p <= 'Z')) {
+
+ /* Proper character */
+
+ if (sz < 2)
+ return -ENOBUFS;
+
+ *(q++) = *p;
+ sz -= 1;
+
+ } else {
+
+ /* Everything else */
+
+ if (sz < 5)
+ return -ENOBUFS;
+
+ *(q++) = '\\';
+ *(q++) = '0' + (char) ((uint8_t) *p / 100);
+ *(q++) = '0' + (char) (((uint8_t) *p / 10) % 10);
+ *(q++) = '0' + (char) ((uint8_t) *p % 10);
+
+ sz -= 4;
+ }
+
+ p++;
+ l--;
+ }
+
+ *q = 0;
+ return (int) (q - dest);
+}
+
+int dns_label_escape_new(const char *p, size_t l, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ if (l <= 0 || l > DNS_LABEL_MAX)
+ return -EINVAL;
+
+ s = new(char, DNS_LABEL_ESCAPED_MAX);
+ if (!s)
+ return -ENOMEM;
+
+ r = dns_label_escape(p, l, s, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(s);
+
+ return r;
+}
+
+#if HAVE_LIBIDN
+int dns_label_apply_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max) {
+ _cleanup_free_ uint32_t *input = NULL;
+ size_t input_size, l;
+ const char *p;
+ bool contains_8bit = false;
+ char buffer[DNS_LABEL_MAX+1];
+ int r;
+
+ assert(encoded);
+ assert(decoded);
+
+ /* Converts an U-label into an A-label */
+
+ r = dlopen_idn();
+ if (r < 0)
+ return r;
+
+ if (encoded_size <= 0)
+ return -EINVAL;
+
+ for (p = encoded; p < encoded + encoded_size; p++)
+ if ((uint8_t) *p > 127)
+ contains_8bit = true;
+
+ if (!contains_8bit) {
+ if (encoded_size > DNS_LABEL_MAX)
+ return -EINVAL;
+
+ return 0;
+ }
+
+ input = sym_stringprep_utf8_to_ucs4(encoded, encoded_size, &input_size);
+ if (!input)
+ return -ENOMEM;
+
+ if (sym_idna_to_ascii_4i(input, input_size, buffer, 0) != 0)
+ return -EINVAL;
+
+ l = strlen(buffer);
+
+ /* Verify that the result is not longer than one DNS label. */
+ if (l <= 0 || l > DNS_LABEL_MAX)
+ return -EINVAL;
+ if (l > decoded_max)
+ return -ENOBUFS;
+
+ memcpy(decoded, buffer, l);
+
+ /* If there's room, append a trailing NUL byte, but only then */
+ if (decoded_max > l)
+ decoded[l] = 0;
+
+ return (int) l;
+}
+
+int dns_label_undo_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max) {
+ size_t input_size, output_size;
+ _cleanup_free_ uint32_t *input = NULL;
+ _cleanup_free_ char *result = NULL;
+ uint32_t *output = NULL;
+ size_t w;
+ int r;
+
+ /* To be invoked after unescaping. Converts an A-label into an U-label. */
+
+ assert(encoded);
+ assert(decoded);
+
+ r = dlopen_idn();
+ if (r < 0)
+ return r;
+
+ if (encoded_size <= 0 || encoded_size > DNS_LABEL_MAX)
+ return -EINVAL;
+
+ if (!memory_startswith(encoded, encoded_size, IDNA_ACE_PREFIX))
+ return 0;
+
+ input = sym_stringprep_utf8_to_ucs4(encoded, encoded_size, &input_size);
+ if (!input)
+ return -ENOMEM;
+
+ output_size = input_size;
+ output = newa(uint32_t, output_size);
+
+ sym_idna_to_unicode_44i(input, input_size, output, &output_size, 0);
+
+ result = sym_stringprep_ucs4_to_utf8(output, output_size, NULL, &w);
+ if (!result)
+ return -ENOMEM;
+ if (w <= 0)
+ return -EINVAL;
+ if (w > decoded_max)
+ return -ENOBUFS;
+
+ memcpy(decoded, result, w);
+
+ /* Append trailing NUL byte if there's space, but only then. */
+ if (decoded_max > w)
+ decoded[w] = 0;
+
+ return w;
+}
+#endif
+
+int dns_name_concat(const char *a, const char *b, DNSLabelFlags flags, char **_ret) {
+ _cleanup_free_ char *ret = NULL;
+ size_t n = 0, allocated = 0;
+ const char *p;
+ bool first = true;
+ int r;
+
+ if (a)
+ p = a;
+ else if (b)
+ p = TAKE_PTR(b);
+ else
+ goto finish;
+
+ for (;;) {
+ char label[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&p, label, sizeof label, flags);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (*p != 0)
+ return -EINVAL;
+
+ if (b) {
+ /* Now continue with the second string, if there is one */
+ p = TAKE_PTR(b);
+ continue;
+ }
+
+ break;
+ }
+
+ if (_ret) {
+ if (!GREEDY_REALLOC(ret, allocated, n + !first + DNS_LABEL_ESCAPED_MAX))
+ return -ENOMEM;
+
+ r = dns_label_escape(label, r, ret + n + !first, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ if (!first)
+ ret[n] = '.';
+ } else {
+ char escaped[DNS_LABEL_ESCAPED_MAX];
+
+ r = dns_label_escape(label, r, escaped, sizeof(escaped));
+ if (r < 0)
+ return r;
+ }
+
+ if (!first)
+ n++;
+ else
+ first = false;
+
+ n += r;
+ }
+
+finish:
+ if (n > DNS_HOSTNAME_MAX)
+ return -EINVAL;
+
+ if (_ret) {
+ if (n == 0) {
+ /* Nothing appended? If so, generate at least a single dot, to indicate the DNS root domain */
+ if (!GREEDY_REALLOC(ret, allocated, 2))
+ return -ENOMEM;
+
+ ret[n++] = '.';
+ } else {
+ if (!GREEDY_REALLOC(ret, allocated, n + 1))
+ return -ENOMEM;
+ }
+
+ ret[n] = 0;
+ *_ret = TAKE_PTR(ret);
+ }
+
+ return 0;
+}
+
+void dns_name_hash_func(const char *p, struct siphash *state) {
+ int r;
+
+ assert(p);
+
+ for (;;) {
+ char label[DNS_LABEL_MAX+1];
+
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r < 0)
+ break;
+ if (r == 0)
+ break;
+
+ ascii_strlower_n(label, r);
+ siphash24_compress(label, r, state);
+ siphash24_compress_byte(0, state); /* make sure foobar and foo.bar result in different hashes */
+ }
+
+ /* enforce that all names are terminated by the empty label */
+ string_hash_func("", state);
+}
+
+int dns_name_compare_func(const char *a, const char *b) {
+ const char *x, *y;
+ int r, q;
+
+ assert(a);
+ assert(b);
+
+ x = a + strlen(a);
+ y = b + strlen(b);
+
+ for (;;) {
+ char la[DNS_LABEL_MAX], lb[DNS_LABEL_MAX];
+
+ if (x == NULL && y == NULL)
+ return 0;
+
+ r = dns_label_unescape_suffix(a, &x, la, sizeof(la));
+ q = dns_label_unescape_suffix(b, &y, lb, sizeof(lb));
+ if (r < 0 || q < 0)
+ return CMP(r, q);
+
+ r = ascii_strcasecmp_nn(la, r, lb, q);
+ if (r != 0)
+ return r;
+ }
+}
+
+DEFINE_HASH_OPS(dns_name_hash_ops, char, dns_name_hash_func, dns_name_compare_func);
+
+int dns_name_equal(const char *x, const char *y) {
+ int r, q;
+
+ assert(x);
+ assert(y);
+
+ for (;;) {
+ char la[DNS_LABEL_MAX], lb[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&x, la, sizeof la, 0);
+ if (r < 0)
+ return r;
+
+ q = dns_label_unescape(&y, lb, sizeof lb, 0);
+ if (q < 0)
+ return q;
+
+ if (r != q)
+ return false;
+ if (r == 0)
+ return true;
+
+ if (ascii_strcasecmp_n(la, lb, r) != 0)
+ return false;
+ }
+}
+
+int dns_name_endswith(const char *name, const char *suffix) {
+ const char *n, *s, *saved_n = NULL;
+ int r, q;
+
+ assert(name);
+ assert(suffix);
+
+ n = name;
+ s = suffix;
+
+ for (;;) {
+ char ln[DNS_LABEL_MAX], ls[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&n, ln, sizeof ln, 0);
+ if (r < 0)
+ return r;
+
+ if (!saved_n)
+ saved_n = n;
+
+ q = dns_label_unescape(&s, ls, sizeof ls, 0);
+ if (q < 0)
+ return q;
+
+ if (r == 0 && q == 0)
+ return true;
+ if (r == 0 && saved_n == n)
+ return false;
+
+ if (r != q || ascii_strcasecmp_n(ln, ls, r) != 0) {
+
+ /* Not the same, let's jump back, and try with the next label again */
+ s = suffix;
+ n = TAKE_PTR(saved_n);
+ }
+ }
+}
+
+int dns_name_startswith(const char *name, const char *prefix) {
+ const char *n, *p;
+ int r, q;
+
+ assert(name);
+ assert(prefix);
+
+ n = name;
+ p = prefix;
+
+ for (;;) {
+ char ln[DNS_LABEL_MAX], lp[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&p, lp, sizeof lp, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return true;
+
+ q = dns_label_unescape(&n, ln, sizeof ln, 0);
+ if (q < 0)
+ return q;
+
+ if (r != q)
+ return false;
+ if (ascii_strcasecmp_n(ln, lp, r) != 0)
+ return false;
+ }
+}
+
+int dns_name_change_suffix(const char *name, const char *old_suffix, const char *new_suffix, char **ret) {
+ const char *n, *s, *saved_before = NULL, *saved_after = NULL, *prefix;
+ int r, q;
+
+ assert(name);
+ assert(old_suffix);
+ assert(new_suffix);
+ assert(ret);
+
+ n = name;
+ s = old_suffix;
+
+ for (;;) {
+ char ln[DNS_LABEL_MAX], ls[DNS_LABEL_MAX];
+
+ if (!saved_before)
+ saved_before = n;
+
+ r = dns_label_unescape(&n, ln, sizeof ln, 0);
+ if (r < 0)
+ return r;
+
+ if (!saved_after)
+ saved_after = n;
+
+ q = dns_label_unescape(&s, ls, sizeof ls, 0);
+ if (q < 0)
+ return q;
+
+ if (r == 0 && q == 0)
+ break;
+ if (r == 0 && saved_after == n) {
+ *ret = NULL; /* doesn't match */
+ return 0;
+ }
+
+ if (r != q || ascii_strcasecmp_n(ln, ls, r) != 0) {
+
+ /* Not the same, let's jump back, and try with the next label again */
+ s = old_suffix;
+ n = TAKE_PTR(saved_after);
+ saved_before = NULL;
+ }
+ }
+
+ /* Found it! Now generate the new name */
+ prefix = strndupa(name, saved_before - name);
+
+ r = dns_name_concat(prefix, new_suffix, 0, ret);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int dns_name_between(const char *a, const char *b, const char *c) {
+ /* Determine if b is strictly greater than a and strictly smaller than c.
+ We consider the order of names to be circular, so that if a is
+ strictly greater than c, we consider b to be between them if it is
+ either greater than a or smaller than c. This is how the canonical
+ DNS name order used in NSEC records work. */
+
+ if (dns_name_compare_func(a, c) < 0)
+ /*
+ a and c are properly ordered:
+ a<---b--->c
+ */
+ return dns_name_compare_func(a, b) < 0 &&
+ dns_name_compare_func(b, c) < 0;
+ else
+ /*
+ a and c are equal or 'reversed':
+ <--b--c a----->
+ or:
+ <-----c a--b-->
+ */
+ return dns_name_compare_func(b, c) < 0 ||
+ dns_name_compare_func(a, b) < 0;
+}
+
+int dns_name_reverse(int family, const union in_addr_union *a, char **ret) {
+ const uint8_t *p;
+ int r;
+
+ assert(a);
+ assert(ret);
+
+ p = (const uint8_t*) a;
+
+ if (family == AF_INET)
+ r = asprintf(ret, "%u.%u.%u.%u.in-addr.arpa", p[3], p[2], p[1], p[0]);
+ else if (family == AF_INET6)
+ r = asprintf(ret, "%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.ip6.arpa",
+ hexchar(p[15] & 0xF), hexchar(p[15] >> 4), hexchar(p[14] & 0xF), hexchar(p[14] >> 4),
+ hexchar(p[13] & 0xF), hexchar(p[13] >> 4), hexchar(p[12] & 0xF), hexchar(p[12] >> 4),
+ hexchar(p[11] & 0xF), hexchar(p[11] >> 4), hexchar(p[10] & 0xF), hexchar(p[10] >> 4),
+ hexchar(p[ 9] & 0xF), hexchar(p[ 9] >> 4), hexchar(p[ 8] & 0xF), hexchar(p[ 8] >> 4),
+ hexchar(p[ 7] & 0xF), hexchar(p[ 7] >> 4), hexchar(p[ 6] & 0xF), hexchar(p[ 6] >> 4),
+ hexchar(p[ 5] & 0xF), hexchar(p[ 5] >> 4), hexchar(p[ 4] & 0xF), hexchar(p[ 4] >> 4),
+ hexchar(p[ 3] & 0xF), hexchar(p[ 3] >> 4), hexchar(p[ 2] & 0xF), hexchar(p[ 2] >> 4),
+ hexchar(p[ 1] & 0xF), hexchar(p[ 1] >> 4), hexchar(p[ 0] & 0xF), hexchar(p[ 0] >> 4));
+ else
+ return -EAFNOSUPPORT;
+ if (r < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int dns_name_address(const char *p, int *family, union in_addr_union *address) {
+ int r;
+
+ assert(p);
+ assert(family);
+ assert(address);
+
+ r = dns_name_endswith(p, "in-addr.arpa");
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ uint8_t a[4];
+ unsigned i;
+
+ for (i = 0; i < ELEMENTSOF(a); i++) {
+ char label[DNS_LABEL_MAX+1];
+
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+ if (r > 3)
+ return -EINVAL;
+
+ r = safe_atou8(label, &a[i]);
+ if (r < 0)
+ return r;
+ }
+
+ r = dns_name_equal(p, "in-addr.arpa");
+ if (r <= 0)
+ return r;
+
+ *family = AF_INET;
+ address->in.s_addr = htobe32(((uint32_t) a[3] << 24) |
+ ((uint32_t) a[2] << 16) |
+ ((uint32_t) a[1] << 8) |
+ (uint32_t) a[0]);
+
+ return 1;
+ }
+
+ r = dns_name_endswith(p, "ip6.arpa");
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ struct in6_addr a;
+ unsigned i;
+
+ for (i = 0; i < ELEMENTSOF(a.s6_addr); i++) {
+ char label[DNS_LABEL_MAX+1];
+ int x, y;
+
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r <= 0)
+ return r;
+ if (r != 1)
+ return -EINVAL;
+ x = unhexchar(label[0]);
+ if (x < 0)
+ return -EINVAL;
+
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r <= 0)
+ return r;
+ if (r != 1)
+ return -EINVAL;
+ y = unhexchar(label[0]);
+ if (y < 0)
+ return -EINVAL;
+
+ a.s6_addr[ELEMENTSOF(a.s6_addr) - i - 1] = (uint8_t) y << 4 | (uint8_t) x;
+ }
+
+ r = dns_name_equal(p, "ip6.arpa");
+ if (r <= 0)
+ return r;
+
+ *family = AF_INET6;
+ address->in6 = a;
+ return 1;
+ }
+
+ return 0;
+}
+
+bool dns_name_is_root(const char *name) {
+
+ assert(name);
+
+ /* There are exactly two ways to encode the root domain name:
+ * as empty string, or with a single dot. */
+
+ return STR_IN_SET(name, "", ".");
+}
+
+bool dns_name_is_single_label(const char *name) {
+ int r;
+
+ assert(name);
+
+ r = dns_name_parent(&name);
+ if (r <= 0)
+ return false;
+
+ return dns_name_is_root(name);
+}
+
+/* Encode a domain name according to RFC 1035 Section 3.1, without compression */
+int dns_name_to_wire_format(const char *domain, uint8_t *buffer, size_t len, bool canonical) {
+ uint8_t *label_length, *out;
+ int r;
+
+ assert(domain);
+ assert(buffer);
+
+ out = buffer;
+
+ do {
+ /* Reserve a byte for label length */
+ if (len <= 0)
+ return -ENOBUFS;
+ len--;
+ label_length = out;
+ out++;
+
+ /* Convert and copy a single label. Note that
+ * dns_label_unescape() returns 0 when it hits the end
+ * of the domain name, which we rely on here to encode
+ * the trailing NUL byte. */
+ r = dns_label_unescape(&domain, (char *) out, len, 0);
+ if (r < 0)
+ return r;
+
+ /* Optionally, output the name in DNSSEC canonical
+ * format, as described in RFC 4034, section 6.2. Or
+ * in other words: in lower-case. */
+ if (canonical)
+ ascii_strlower_n((char*) out, (size_t) r);
+
+ /* Fill label length, move forward */
+ *label_length = r;
+ out += r;
+ len -= r;
+
+ } while (r != 0);
+
+ /* Verify the maximum size of the encoded name. The trailing
+ * dot + NUL byte account are included this time, hence
+ * compare against DNS_HOSTNAME_MAX + 2 (which is 255) this
+ * time. */
+ if (out - buffer > DNS_HOSTNAME_MAX + 2)
+ return -EINVAL;
+
+ return out - buffer;
+}
+
+static bool srv_type_label_is_valid(const char *label, size_t n) {
+ size_t k;
+
+ assert(label);
+
+ if (n < 2) /* Label needs to be at least 2 chars long */
+ return false;
+
+ if (label[0] != '_') /* First label char needs to be underscore */
+ return false;
+
+ /* Second char must be a letter */
+ if (!(label[1] >= 'A' && label[1] <= 'Z') &&
+ !(label[1] >= 'a' && label[1] <= 'z'))
+ return false;
+
+ /* Third and further chars must be alphanumeric or a hyphen */
+ for (k = 2; k < n; k++) {
+ if (!(label[k] >= 'A' && label[k] <= 'Z') &&
+ !(label[k] >= 'a' && label[k] <= 'z') &&
+ !(label[k] >= '0' && label[k] <= '9') &&
+ label[k] != '-')
+ return false;
+ }
+
+ return true;
+}
+
+bool dns_srv_type_is_valid(const char *name) {
+ unsigned c = 0;
+ int r;
+
+ if (!name)
+ return false;
+
+ for (;;) {
+ char label[DNS_LABEL_MAX];
+
+ /* This more or less implements RFC 6335, Section 5.1 */
+
+ r = dns_label_unescape(&name, label, sizeof label, 0);
+ if (r < 0)
+ return false;
+ if (r == 0)
+ break;
+
+ if (c >= 2)
+ return false;
+
+ if (!srv_type_label_is_valid(label, r))
+ return false;
+
+ c++;
+ }
+
+ return c == 2; /* exactly two labels */
+}
+
+bool dnssd_srv_type_is_valid(const char *name) {
+ return dns_srv_type_is_valid(name) &&
+ ((dns_name_endswith(name, "_tcp") > 0) ||
+ (dns_name_endswith(name, "_udp") > 0)); /* Specific to DNS-SD. RFC 6763, Section 7 */
+}
+
+bool dns_service_name_is_valid(const char *name) {
+ size_t l;
+
+ /* This more or less implements RFC 6763, Section 4.1.1 */
+
+ if (!name)
+ return false;
+
+ if (!utf8_is_valid(name))
+ return false;
+
+ if (string_has_cc(name, NULL))
+ return false;
+
+ l = strlen(name);
+ if (l <= 0)
+ return false;
+ if (l > 63)
+ return false;
+
+ return true;
+}
+
+int dns_service_join(const char *name, const char *type, const char *domain, char **ret) {
+ char escaped[DNS_LABEL_ESCAPED_MAX];
+ _cleanup_free_ char *n = NULL;
+ int r;
+
+ assert(type);
+ assert(domain);
+ assert(ret);
+
+ if (!dns_srv_type_is_valid(type))
+ return -EINVAL;
+
+ if (!name)
+ return dns_name_concat(type, domain, 0, ret);
+
+ if (!dns_service_name_is_valid(name))
+ return -EINVAL;
+
+ r = dns_label_escape(name, strlen(name), escaped, sizeof(escaped));
+ if (r < 0)
+ return r;
+
+ r = dns_name_concat(type, domain, 0, &n);
+ if (r < 0)
+ return r;
+
+ return dns_name_concat(escaped, n, 0, ret);
+}
+
+static bool dns_service_name_label_is_valid(const char *label, size_t n) {
+ char *s;
+
+ assert(label);
+
+ if (memchr(label, 0, n))
+ return false;
+
+ s = strndupa(label, n);
+ return dns_service_name_is_valid(s);
+}
+
+int dns_service_split(const char *joined, char **_name, char **_type, char **_domain) {
+ _cleanup_free_ char *name = NULL, *type = NULL, *domain = NULL;
+ const char *p = joined, *q = NULL, *d = NULL;
+ char a[DNS_LABEL_MAX], b[DNS_LABEL_MAX], c[DNS_LABEL_MAX];
+ int an, bn, cn, r;
+ unsigned x = 0;
+
+ assert(joined);
+
+ /* Get first label from the full name */
+ an = dns_label_unescape(&p, a, sizeof(a), 0);
+ if (an < 0)
+ return an;
+
+ if (an > 0) {
+ x++;
+
+ /* If there was a first label, try to get the second one */
+ bn = dns_label_unescape(&p, b, sizeof(b), 0);
+ if (bn < 0)
+ return bn;
+
+ if (bn > 0) {
+ x++;
+
+ /* If there was a second label, try to get the third one */
+ q = p;
+ cn = dns_label_unescape(&p, c, sizeof(c), 0);
+ if (cn < 0)
+ return cn;
+
+ if (cn > 0)
+ x++;
+ } else
+ cn = 0;
+ } else
+ an = 0;
+
+ if (x >= 2 && srv_type_label_is_valid(b, bn)) {
+
+ if (x >= 3 && srv_type_label_is_valid(c, cn)) {
+
+ if (dns_service_name_label_is_valid(a, an)) {
+ /* OK, got <name> . <type> . <type2> . <domain> */
+
+ name = strndup(a, an);
+ if (!name)
+ return -ENOMEM;
+
+ type = strjoin(b, ".", c);
+ if (!type)
+ return -ENOMEM;
+
+ d = p;
+ goto finish;
+ }
+
+ } else if (srv_type_label_is_valid(a, an)) {
+
+ /* OK, got <type> . <type2> . <domain> */
+
+ name = NULL;
+
+ type = strjoin(a, ".", b);
+ if (!type)
+ return -ENOMEM;
+
+ d = q;
+ goto finish;
+ }
+ }
+
+ name = NULL;
+ type = NULL;
+ d = joined;
+
+finish:
+ r = dns_name_normalize(d, 0, &domain);
+ if (r < 0)
+ return r;
+
+ if (_domain)
+ *_domain = TAKE_PTR(domain);
+
+ if (_type)
+ *_type = TAKE_PTR(type);
+
+ if (_name)
+ *_name = TAKE_PTR(name);
+
+ return 0;
+}
+
+static int dns_name_build_suffix_table(const char *name, const char *table[]) {
+ const char *p;
+ unsigned n = 0;
+ int r;
+
+ assert(name);
+ assert(table);
+
+ p = name;
+ for (;;) {
+ if (n > DNS_N_LABELS_MAX)
+ return -EINVAL;
+
+ table[n] = p;
+ r = dns_name_parent(&p);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ n++;
+ }
+
+ return (int) n;
+}
+
+int dns_name_suffix(const char *name, unsigned n_labels, const char **ret) {
+ const char* labels[DNS_N_LABELS_MAX+1];
+ int n;
+
+ assert(name);
+ assert(ret);
+
+ n = dns_name_build_suffix_table(name, labels);
+ if (n < 0)
+ return n;
+
+ if ((unsigned) n < n_labels)
+ return -EINVAL;
+
+ *ret = labels[n - n_labels];
+ return (int) (n - n_labels);
+}
+
+int dns_name_skip(const char *a, unsigned n_labels, const char **ret) {
+ int r;
+
+ assert(a);
+ assert(ret);
+
+ for (; n_labels > 0; n_labels--) {
+ r = dns_name_parent(&a);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ *ret = "";
+ return 0;
+ }
+ }
+
+ *ret = a;
+ return 1;
+}
+
+int dns_name_count_labels(const char *name) {
+ unsigned n = 0;
+ const char *p;
+ int r;
+
+ assert(name);
+
+ p = name;
+ for (;;) {
+ r = dns_name_parent(&p);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (n >= DNS_N_LABELS_MAX)
+ return -EINVAL;
+
+ n++;
+ }
+
+ return (int) n;
+}
+
+int dns_name_equal_skip(const char *a, unsigned n_labels, const char *b) {
+ int r;
+
+ assert(a);
+ assert(b);
+
+ r = dns_name_skip(a, n_labels, &a);
+ if (r <= 0)
+ return r;
+
+ return dns_name_equal(a, b);
+}
+
+int dns_name_common_suffix(const char *a, const char *b, const char **ret) {
+ const char *a_labels[DNS_N_LABELS_MAX+1], *b_labels[DNS_N_LABELS_MAX+1];
+ int n = 0, m = 0, k = 0, r, q;
+
+ assert(a);
+ assert(b);
+ assert(ret);
+
+ /* Determines the common suffix of domain names a and b */
+
+ n = dns_name_build_suffix_table(a, a_labels);
+ if (n < 0)
+ return n;
+
+ m = dns_name_build_suffix_table(b, b_labels);
+ if (m < 0)
+ return m;
+
+ for (;;) {
+ char la[DNS_LABEL_MAX], lb[DNS_LABEL_MAX];
+ const char *x, *y;
+
+ if (k >= n || k >= m) {
+ *ret = a_labels[n - k];
+ return 0;
+ }
+
+ x = a_labels[n - 1 - k];
+ r = dns_label_unescape(&x, la, sizeof la, 0);
+ if (r < 0)
+ return r;
+
+ y = b_labels[m - 1 - k];
+ q = dns_label_unescape(&y, lb, sizeof lb, 0);
+ if (q < 0)
+ return q;
+
+ if (r != q || ascii_strcasecmp_n(la, lb, r) != 0) {
+ *ret = a_labels[n - k];
+ return 0;
+ }
+
+ k++;
+ }
+}
+
+int dns_name_apply_idna(const char *name, char **ret) {
+
+ /* Return negative on error, 0 if not implemented, positive on success. */
+
+#if HAVE_LIBIDN2 || HAVE_LIBIDN2
+ int r;
+
+ r = dlopen_idn();
+ if (r == -EOPNOTSUPP) {
+ *ret = NULL;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+#endif
+
+#if HAVE_LIBIDN2
+ _cleanup_free_ char *t = NULL;
+
+ assert(name);
+ assert(ret);
+
+ /* First, try non-transitional mode (i.e. IDN2008 rules) */
+ r = sym_idn2_lookup_u8((uint8_t*) name, (uint8_t**) &t,
+ IDN2_NFC_INPUT | IDN2_NONTRANSITIONAL);
+ if (r == IDN2_DISALLOWED) /* If that failed, because of disallowed characters, try transitional mode.
+ * (i.e. IDN2003 rules which supports some unicode chars IDN2008 doesn't allow). */
+ r = sym_idn2_lookup_u8((uint8_t*) name, (uint8_t**) &t,
+ IDN2_NFC_INPUT | IDN2_TRANSITIONAL);
+
+ log_debug("idn2_lookup_u8: %s → %s", name, t);
+ if (r == IDN2_OK) {
+ if (!startswith(name, "xn--")) {
+ _cleanup_free_ char *s = NULL;
+
+ r = sym_idn2_to_unicode_8z8z(t, &s, 0);
+ if (r != IDN2_OK) {
+ log_debug("idn2_to_unicode_8z8z(\"%s\") failed: %d/%s",
+ t, r, sym_idn2_strerror(r));
+ return 0;
+ }
+
+ if (!streq_ptr(name, s)) {
+ log_debug("idn2 roundtrip failed: \"%s\" → \"%s\" → \"%s\", ignoring.",
+ name, t, s);
+ return 0;
+ }
+ }
+
+ *ret = TAKE_PTR(t);
+
+ return 1; /* *ret has been written */
+ }
+
+ log_debug("idn2_lookup_u8(\"%s\") failed: %d/%s", name, r, sym_idn2_strerror(r));
+ if (r == IDN2_2HYPHEN)
+ /* The name has two hyphens — forbidden by IDNA2008 in some cases */
+ return 0;
+ if (IN_SET(r, IDN2_TOO_BIG_DOMAIN, IDN2_TOO_BIG_LABEL))
+ return -ENOSPC;
+ return -EINVAL;
+#elif HAVE_LIBIDN
+ _cleanup_free_ char *buf = NULL;
+ size_t n = 0, allocated = 0;
+ bool first = true;
+ int r, q;
+
+ assert(name);
+ assert(ret);
+
+ for (;;) {
+ char label[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&name, label, sizeof label, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ q = dns_label_apply_idna(label, r, label, sizeof label);
+ if (q < 0)
+ return q;
+ if (q > 0)
+ r = q;
+
+ if (!GREEDY_REALLOC(buf, allocated, n + !first + DNS_LABEL_ESCAPED_MAX))
+ return -ENOMEM;
+
+ r = dns_label_escape(label, r, buf + n + !first, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ if (first)
+ first = false;
+ else
+ buf[n++] = '.';
+
+ n += r;
+ }
+
+ if (n > DNS_HOSTNAME_MAX)
+ return -EINVAL;
+
+ if (!GREEDY_REALLOC(buf, allocated, n + 1))
+ return -ENOMEM;
+
+ buf[n] = 0;
+ *ret = TAKE_PTR(buf);
+
+ return 1;
+#else
+ *ret = NULL;
+ return 0;
+#endif
+}
+
+int dns_name_is_valid_or_address(const char *name) {
+ /* Returns > 0 if the specified name is either a valid IP address formatted as string or a valid DNS name */
+
+ if (isempty(name))
+ return 0;
+
+ if (in_addr_from_string_auto(name, NULL, NULL) >= 0)
+ return 1;
+
+ return dns_name_is_valid(name);
+}
+
+int dns_name_dot_suffixed(const char *name) {
+ const char *p = name;
+ int r;
+
+ for (;;) {
+ if (streq(p, "."))
+ return true;
+
+ r = dns_label_unescape(&p, NULL, DNS_LABEL_MAX, DNS_LABEL_LEAVE_TRAILING_DOT);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return false;
+ }
+}
diff --git a/src/shared/dns-domain.h b/src/shared/dns-domain.h
new file mode 100644
index 0000000..77f5962
--- /dev/null
+++ b/src/shared/dns-domain.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "hashmap.h"
+#include "in-addr-util.h"
+
+/* Length of a single label, with all escaping removed, excluding any trailing dot or NUL byte */
+#define DNS_LABEL_MAX 63
+
+/* Worst case length of a single label, with all escaping applied and room for a trailing NUL byte. */
+#define DNS_LABEL_ESCAPED_MAX (DNS_LABEL_MAX*4+1)
+
+/* Maximum length of a full hostname, consisting of a series of unescaped labels, and no trailing dot or NUL byte */
+#define DNS_HOSTNAME_MAX 253
+
+/* Maximum length of a full hostname, on the wire, including the final NUL byte */
+#define DNS_WIRE_FORMAT_HOSTNAME_MAX 255
+
+/* Maximum number of labels per valid hostname */
+#define DNS_N_LABELS_MAX 127
+
+typedef enum DNSLabelFlags {
+ DNS_LABEL_LDH = 1 << 0, /* Follow the "LDH" rule — only letters, digits, and internal hyphens. */
+ DNS_LABEL_NO_ESCAPES = 1 << 1, /* Do not treat backslashes specially */
+ DNS_LABEL_LEAVE_TRAILING_DOT = 1 << 2, /* Leave trailing dot in place */
+} DNSLabelFlags;
+
+int dns_label_unescape(const char **name, char *dest, size_t sz, DNSLabelFlags flags);
+int dns_label_unescape_suffix(const char *name, const char **label_end, char *dest, size_t sz);
+int dns_label_escape(const char *p, size_t l, char *dest, size_t sz);
+int dns_label_escape_new(const char *p, size_t l, char **ret);
+
+static inline int dns_name_parent(const char **name) {
+ return dns_label_unescape(name, NULL, DNS_LABEL_MAX, 0);
+}
+
+#if HAVE_LIBIDN
+int dns_label_apply_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max);
+int dns_label_undo_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max);
+#endif
+
+int dns_name_concat(const char *a, const char *b, DNSLabelFlags flags, char **ret);
+
+static inline int dns_name_normalize(const char *s, DNSLabelFlags flags, char **ret) {
+ /* dns_name_concat() normalizes as a side-effect */
+ return dns_name_concat(s, NULL, flags, ret);
+}
+
+static inline int dns_name_is_valid(const char *s) {
+ int r;
+
+ /* dns_name_normalize() verifies as a side effect */
+ r = dns_name_normalize(s, 0, NULL);
+ if (r == -EINVAL)
+ return 0;
+ if (r < 0)
+ return r;
+ return 1;
+}
+
+static inline int dns_name_is_valid_ldh(const char *s) {
+ int r;
+
+ r = dns_name_concat(s, NULL, DNS_LABEL_LDH|DNS_LABEL_NO_ESCAPES, NULL);
+ if (r == -EINVAL)
+ return 0;
+ if (r < 0)
+ return r;
+ return 1;
+}
+
+void dns_name_hash_func(const char *s, struct siphash *state);
+int dns_name_compare_func(const char *a, const char *b);
+extern const struct hash_ops dns_name_hash_ops;
+
+int dns_name_between(const char *a, const char *b, const char *c);
+int dns_name_equal(const char *x, const char *y);
+int dns_name_endswith(const char *name, const char *suffix);
+int dns_name_startswith(const char *name, const char *prefix);
+
+int dns_name_change_suffix(const char *name, const char *old_suffix, const char *new_suffix, char **ret);
+
+int dns_name_reverse(int family, const union in_addr_union *a, char **ret);
+int dns_name_address(const char *p, int *family, union in_addr_union *a);
+
+bool dns_name_is_root(const char *name);
+bool dns_name_is_single_label(const char *name);
+
+int dns_name_to_wire_format(const char *domain, uint8_t *buffer, size_t len, bool canonical);
+
+bool dns_srv_type_is_valid(const char *name);
+bool dnssd_srv_type_is_valid(const char *name);
+bool dns_service_name_is_valid(const char *name);
+
+int dns_service_join(const char *name, const char *type, const char *domain, char **ret);
+int dns_service_split(const char *joined, char **name, char **type, char **domain);
+
+int dns_name_suffix(const char *name, unsigned n_labels, const char **ret);
+int dns_name_count_labels(const char *name);
+
+int dns_name_skip(const char *a, unsigned n_labels, const char **ret);
+int dns_name_equal_skip(const char *a, unsigned n_labels, const char *b);
+
+int dns_name_common_suffix(const char *a, const char *b, const char **ret);
+
+int dns_name_apply_idna(const char *name, char **ret);
+
+int dns_name_is_valid_or_address(const char *name);
+
+int dns_name_dot_suffixed(const char *name);
diff --git a/src/shared/dropin.c b/src/shared/dropin.c
new file mode 100644
index 0000000..89f4b8a
--- /dev/null
+++ b/src/shared/dropin.c
@@ -0,0 +1,279 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "dirent-util.h"
+#include "dropin.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio-label.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "log.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "set.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+
+int drop_in_file(const char *dir, const char *unit, unsigned level,
+ const char *name, char **ret_p, char **ret_q) {
+
+ char prefix[DECIMAL_STR_MAX(unsigned)];
+ _cleanup_free_ char *b = NULL, *p = NULL, *q = NULL;
+
+ assert(unit);
+ assert(name);
+ assert(ret_p);
+ assert(ret_q);
+
+ sprintf(prefix, "%u", level);
+
+ b = xescape(name, "/.");
+ if (!b)
+ return -ENOMEM;
+
+ if (!filename_is_valid(b))
+ return -EINVAL;
+
+ p = strjoin(dir, "/", unit, ".d");
+ q = strjoin(p, "/", prefix, "-", b, ".conf");
+ if (!p || !q)
+ return -ENOMEM;
+
+ *ret_p = TAKE_PTR(p);
+ *ret_q = TAKE_PTR(q);
+ return 0;
+}
+
+int write_drop_in(const char *dir, const char *unit, unsigned level,
+ const char *name, const char *data) {
+
+ _cleanup_free_ char *p = NULL, *q = NULL;
+ int r;
+
+ assert(dir);
+ assert(unit);
+ assert(name);
+ assert(data);
+
+ r = drop_in_file(dir, unit, level, name, &p, &q);
+ if (r < 0)
+ return r;
+
+ (void) mkdir_p(p, 0755);
+ return write_string_file_atomic_label(q, data);
+}
+
+int write_drop_in_format(const char *dir, const char *unit, unsigned level,
+ const char *name, const char *format, ...) {
+ _cleanup_free_ char *p = NULL;
+ va_list ap;
+ int r;
+
+ assert(dir);
+ assert(unit);
+ assert(name);
+ assert(format);
+
+ va_start(ap, format);
+ r = vasprintf(&p, format, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return -ENOMEM;
+
+ return write_drop_in(dir, unit, level, name, p);
+}
+
+static int unit_file_add_dir(
+ const char *original_root,
+ const char *path,
+ char ***dirs) {
+
+ _cleanup_free_ char *chased = NULL;
+ int r;
+
+ assert(path);
+
+ /* This adds [original_root]/path to dirs, if it exists. */
+
+ r = chase_symlinks(path, original_root, 0, &chased, NULL);
+ if (r == -ENOENT) /* Ignore -ENOENT, after all most units won't have a drop-in dir. */
+ return 0;
+ if (r == -ENAMETOOLONG) {
+ /* Also, ignore -ENAMETOOLONG but log about it. After all, users are not even able to create the
+ * drop-in dir in such case. This mostly happens for device units with an overly long /sys path. */
+ log_debug_errno(r, "Path '%s' too long, couldn't canonicalize, ignoring.", path);
+ return 0;
+ }
+ if (r < 0)
+ return log_warning_errno(r, "Failed to canonicalize path '%s': %m", path);
+
+ if (strv_consume(dirs, TAKE_PTR(chased)) < 0)
+ return log_oom();
+
+ return 0;
+}
+
+static int unit_file_find_dirs(
+ const char *original_root,
+ Set *unit_path_cache,
+ const char *unit_path,
+ const char *name,
+ const char *suffix,
+ char ***dirs) {
+
+ _cleanup_free_ char *prefix = NULL, *instance = NULL, *built = NULL;
+ bool is_instance, chopped;
+ const char *dash;
+ UnitType type;
+ char *path;
+ size_t n;
+ int r;
+
+ assert(unit_path);
+ assert(name);
+ assert(suffix);
+
+ path = strjoina(unit_path, "/", name, suffix);
+ if (!unit_path_cache || set_get(unit_path_cache, path)) {
+ r = unit_file_add_dir(original_root, path, dirs);
+ if (r < 0)
+ return r;
+ }
+
+ is_instance = unit_name_is_valid(name, UNIT_NAME_INSTANCE);
+ if (is_instance) { /* Also try the template dir */
+ _cleanup_free_ char *template = NULL;
+
+ r = unit_name_template(name, &template);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate template from unit name: %m");
+
+ r = unit_file_find_dirs(original_root, unit_path_cache, unit_path, template, suffix, dirs);
+ if (r < 0)
+ return r;
+ }
+
+ /* Return early for top level drop-ins. */
+ if (unit_type_from_string(name) >= 0)
+ return 0;
+
+ /* Let's see if there's a "-" prefix for this unit name. If so, let's invoke ourselves for it. This will then
+ * recursively do the same for all our prefixes. i.e. this means given "foo-bar-waldo.service" we'll also
+ * search "foo-bar-.service" and "foo-.service".
+ *
+ * Note the order in which we do it: we traverse up adding drop-ins on each step. This means the more specific
+ * drop-ins may override the more generic drop-ins, which is the intended behaviour. */
+
+ r = unit_name_to_prefix(name, &prefix);
+ if (r < 0)
+ return log_error_errno(r, "Failed to derive unit name prefix from unit name: %m");
+
+ chopped = false;
+ for (;;) {
+ dash = strrchr(prefix, '-');
+ if (!dash) /* No dash? if so we are done */
+ return 0;
+
+ n = (size_t) (dash - prefix);
+ if (n == 0) /* Leading dash? If so, we are done */
+ return 0;
+
+ if (prefix[n+1] != 0 || chopped) {
+ prefix[n+1] = 0;
+ break;
+ }
+
+ /* Trailing dash? If so, chop it off and try again, but not more than once. */
+ prefix[n] = 0;
+ chopped = true;
+ }
+
+ if (!unit_prefix_is_valid(prefix))
+ return 0;
+
+ type = unit_name_to_type(name);
+ if (type < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to derive unit type from unit name: %s",
+ name);
+
+ if (is_instance) {
+ r = unit_name_to_instance(name, &instance);
+ if (r < 0)
+ return log_error_errno(r, "Failed to derive unit name instance from unit name: %m");
+ }
+
+ r = unit_name_build_from_type(prefix, instance, type, &built);
+ if (r < 0)
+ return log_error_errno(r, "Failed to build prefix unit name: %m");
+
+ return unit_file_find_dirs(original_root, unit_path_cache, unit_path, built, suffix, dirs);
+}
+
+int unit_file_find_dropin_paths(
+ const char *original_root,
+ char **lookup_path,
+ Set *unit_path_cache,
+ const char *dir_suffix,
+ const char *file_suffix,
+ const char *name,
+ const Set *aliases,
+ char ***ret) {
+
+ _cleanup_strv_free_ char **dirs = NULL;
+ const char *n;
+ char **p;
+ int r;
+
+ assert(ret);
+
+ if (name)
+ STRV_FOREACH(p, lookup_path)
+ (void) unit_file_find_dirs(original_root, unit_path_cache, *p, name, dir_suffix, &dirs);
+
+ SET_FOREACH(n, aliases)
+ STRV_FOREACH(p, lookup_path)
+ (void) unit_file_find_dirs(original_root, unit_path_cache, *p, n, dir_suffix, &dirs);
+
+ /* All the names in the unit are of the same type so just grab one. */
+ n = name ?: (const char*) set_first(aliases);
+ if (n) {
+ UnitType type = _UNIT_TYPE_INVALID;
+
+ type = unit_name_to_type(n);
+ if (type < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to derive unit type from unit name: %s", n);
+
+ /* Special top level drop in for "<unit type>.<suffix>". Add this last as it's the most generic
+ * and should be able to be overridden by more specific drop-ins. */
+ STRV_FOREACH(p, lookup_path)
+ (void) unit_file_find_dirs(original_root,
+ unit_path_cache,
+ *p,
+ unit_type_to_string(type),
+ dir_suffix,
+ &dirs);
+ }
+
+ if (strv_isempty(dirs)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ r = conf_files_list_strv(ret, file_suffix, NULL, 0, (const char**) dirs);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to create the list of configuration files: %m");
+
+ return 1;
+}
diff --git a/src/shared/dropin.h b/src/shared/dropin.h
new file mode 100644
index 0000000..54cceaf
--- /dev/null
+++ b/src/shared/dropin.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "hashmap.h"
+#include "macro.h"
+#include "set.h"
+#include "unit-name.h"
+
+int drop_in_file(const char *dir, const char *unit, unsigned level,
+ const char *name, char **_p, char **_q);
+
+int write_drop_in(const char *dir, const char *unit, unsigned level,
+ const char *name, const char *data);
+
+int write_drop_in_format(const char *dir, const char *unit, unsigned level,
+ const char *name, const char *format, ...) _printf_(5, 6);
+
+int unit_file_find_dropin_paths(
+ const char *original_root,
+ char **lookup_path,
+ Set *unit_path_cache,
+ const char *dir_suffix,
+ const char *file_suffix,
+ const char *name,
+ const Set *aliases,
+ char ***paths);
diff --git a/src/shared/efi-loader.c b/src/shared/efi-loader.c
new file mode 100644
index 0000000..20f70da
--- /dev/null
+++ b/src/shared/efi-loader.c
@@ -0,0 +1,806 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "efi-loader.h"
+#include "efivars.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "parse-util.h"
+#include "sort-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "utf8.h"
+#include "virt.h"
+
+#if ENABLE_EFI
+
+#define LOAD_OPTION_ACTIVE 0x00000001
+#define MEDIA_DEVICE_PATH 0x04
+#define MEDIA_HARDDRIVE_DP 0x01
+#define MEDIA_FILEPATH_DP 0x04
+#define SIGNATURE_TYPE_GUID 0x02
+#define MBR_TYPE_EFI_PARTITION_TABLE_HEADER 0x02
+#define END_DEVICE_PATH_TYPE 0x7f
+#define END_ENTIRE_DEVICE_PATH_SUBTYPE 0xff
+#define EFI_OS_INDICATIONS_BOOT_TO_FW_UI 0x0000000000000001
+
+#define boot_option__contents \
+ { \
+ uint32_t attr; \
+ uint16_t path_len; \
+ uint16_t title[]; \
+ }
+
+struct boot_option boot_option__contents;
+struct boot_option__packed boot_option__contents _packed_;
+assert_cc(offsetof(struct boot_option, title) == offsetof(struct boot_option__packed, title));
+/* sizeof(struct boot_option) != sizeof(struct boot_option__packed), so
+ * the *size* of the structure should not be used anywhere below. */
+
+struct drive_path {
+ uint32_t part_nr;
+ uint64_t part_start;
+ uint64_t part_size;
+ char signature[16];
+ uint8_t mbr_type;
+ uint8_t signature_type;
+} _packed_;
+
+#define device_path__contents \
+ { \
+ uint8_t type; \
+ uint8_t sub_type; \
+ uint16_t length; \
+ union { \
+ uint16_t path[0]; \
+ struct drive_path drive; \
+ }; \
+ }
+
+struct device_path device_path__contents;
+struct device_path__packed device_path__contents _packed_;
+assert_cc(sizeof(struct device_path) == sizeof(struct device_path__packed));
+
+int efi_reboot_to_firmware_supported(void) {
+ _cleanup_free_ void *v = NULL;
+ static int cache = -1;
+ uint64_t b;
+ size_t s;
+ int r;
+
+ if (cache > 0)
+ return 0;
+ if (cache == 0)
+ return -EOPNOTSUPP;
+
+ if (!is_efi_boot())
+ goto not_supported;
+
+ r = efi_get_variable(EFI_VENDOR_GLOBAL, "OsIndicationsSupported", NULL, &v, &s);
+ if (r == -ENOENT)
+ goto not_supported; /* variable doesn't exist? it's not supported then */
+ if (r < 0)
+ return r;
+ if (s != sizeof(uint64_t))
+ return -EINVAL;
+
+ b = *(uint64_t*) v;
+ if (!(b & EFI_OS_INDICATIONS_BOOT_TO_FW_UI))
+ goto not_supported; /* bit unset? it's not supported then */
+
+ cache = 1;
+ return 0;
+
+not_supported:
+ cache = 0;
+ return -EOPNOTSUPP;
+}
+
+static int get_os_indications(uint64_t *ret) {
+ static struct stat cache_stat = {};
+ _cleanup_free_ void *v = NULL;
+ _cleanup_free_ char *fn = NULL;
+ static uint64_t cache;
+ struct stat new_stat;
+ size_t s;
+ int r;
+
+ assert(ret);
+
+ /* Let's verify general support first */
+ r = efi_reboot_to_firmware_supported();
+ if (r < 0)
+ return r;
+
+ fn = efi_variable_path(EFI_VENDOR_GLOBAL, "OsIndications");
+ if (!fn)
+ return -ENOMEM;
+
+ /* stat() the EFI variable, to see if the mtime changed. If it did we need to cache again. */
+ if (stat(fn, &new_stat) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ /* Doesn't exist? Then we can exit early (also see below) */
+ *ret = 0;
+ return 0;
+
+ } else if (stat_inode_unmodified(&new_stat, &cache_stat)) {
+ /* inode didn't change, we can return the cached value */
+ *ret = cache;
+ return 0;
+ }
+
+ r = efi_get_variable(EFI_VENDOR_GLOBAL, "OsIndications", NULL, &v, &s);
+ if (r == -ENOENT) {
+ /* Some firmware implementations that do support OsIndications and report that with
+ * OsIndicationsSupported will remove the OsIndications variable when it is unset. Let's
+ * pretend it's 0 then, to hide this implementation detail. Note that this call will return
+ * -ENOENT then only if the support for OsIndications is missing entirely, as determined by
+ * efi_reboot_to_firmware_supported() above. */
+ *ret = 0;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+ if (s != sizeof(uint64_t))
+ return -EINVAL;
+
+ cache_stat = new_stat;
+ *ret = cache = *(uint64_t *)v;
+ return 0;
+}
+
+int efi_get_reboot_to_firmware(void) {
+ int r;
+ uint64_t b;
+
+ r = get_os_indications(&b);
+ if (r < 0)
+ return r;
+
+ return !!(b & EFI_OS_INDICATIONS_BOOT_TO_FW_UI);
+}
+
+int efi_set_reboot_to_firmware(bool value) {
+ int r;
+ uint64_t b, b_new;
+
+ r = get_os_indications(&b);
+ if (r < 0)
+ return r;
+
+ b_new = UPDATE_FLAG(b, EFI_OS_INDICATIONS_BOOT_TO_FW_UI, value);
+
+ /* Avoid writing to efi vars store if we can due to firmware bugs. */
+ if (b != b_new)
+ return efi_set_variable(EFI_VENDOR_GLOBAL, "OsIndications", &b_new, sizeof(uint64_t));
+
+ return 0;
+}
+
+static ssize_t utf16_size(const uint16_t *s, size_t buf_len_bytes) {
+ size_t l = 0;
+
+ /* Returns the size of the string in bytes without the terminating two zero bytes */
+
+ if (buf_len_bytes % sizeof(uint16_t) != 0)
+ return -EINVAL;
+
+ while (l < buf_len_bytes / sizeof(uint16_t)) {
+ if (s[l] == 0)
+ return (l + 1) * sizeof(uint16_t);
+ l++;
+ }
+
+ return -EINVAL; /* The terminator was not found */
+}
+
+struct guid {
+ uint32_t u1;
+ uint16_t u2;
+ uint16_t u3;
+ uint8_t u4[8];
+} _packed_;
+
+static void efi_guid_to_id128(const void *guid, sd_id128_t *id128) {
+ uint32_t u1;
+ uint16_t u2, u3;
+ const struct guid *uuid = guid;
+
+ memcpy(&u1, &uuid->u1, sizeof(uint32_t));
+ id128->bytes[0] = (u1 >> 24) & 0xff;
+ id128->bytes[1] = (u1 >> 16) & 0xff;
+ id128->bytes[2] = (u1 >> 8) & 0xff;
+ id128->bytes[3] = u1 & 0xff;
+ memcpy(&u2, &uuid->u2, sizeof(uint16_t));
+ id128->bytes[4] = (u2 >> 8) & 0xff;
+ id128->bytes[5] = u2 & 0xff;
+ memcpy(&u3, &uuid->u3, sizeof(uint16_t));
+ id128->bytes[6] = (u3 >> 8) & 0xff;
+ id128->bytes[7] = u3 & 0xff;
+ memcpy(&id128->bytes[8], uuid->u4, sizeof(uuid->u4));
+}
+
+int efi_get_boot_option(
+ uint16_t id,
+ char **title,
+ sd_id128_t *part_uuid,
+ char **path,
+ bool *active) {
+
+ char boot_id[9];
+ _cleanup_free_ uint8_t *buf = NULL;
+ size_t l;
+ struct boot_option *header;
+ ssize_t title_size;
+ _cleanup_free_ char *s = NULL, *p = NULL;
+ sd_id128_t p_uuid = SD_ID128_NULL;
+ int r;
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ xsprintf(boot_id, "Boot%04X", id);
+ r = efi_get_variable(EFI_VENDOR_GLOBAL, boot_id, NULL, (void **)&buf, &l);
+ if (r < 0)
+ return r;
+ if (l < offsetof(struct boot_option, title))
+ return -ENOENT;
+
+ header = (struct boot_option *)buf;
+ title_size = utf16_size(header->title, l - offsetof(struct boot_option, title));
+ if (title_size < 0)
+ return title_size;
+
+ if (title) {
+ s = utf16_to_utf8(header->title, title_size);
+ if (!s)
+ return -ENOMEM;
+ }
+
+ if (header->path_len > 0) {
+ uint8_t *dbuf;
+ size_t dnext, doff;
+
+ doff = offsetof(struct boot_option, title) + title_size;
+ dbuf = buf + doff;
+ if (header->path_len > l - doff)
+ return -EINVAL;
+
+ dnext = 0;
+ while (dnext < header->path_len) {
+ struct device_path *dpath;
+
+ dpath = (struct device_path *)(dbuf + dnext);
+ if (dpath->length < 4)
+ break;
+
+ /* Type 0x7F – End of Hardware Device Path, Sub-Type 0xFF – End Entire Device Path */
+ if (dpath->type == END_DEVICE_PATH_TYPE && dpath->sub_type == END_ENTIRE_DEVICE_PATH_SUBTYPE)
+ break;
+
+ dnext += dpath->length;
+
+ /* Type 0x04 – Media Device Path */
+ if (dpath->type != MEDIA_DEVICE_PATH)
+ continue;
+
+ /* Sub-Type 1 – Hard Drive */
+ if (dpath->sub_type == MEDIA_HARDDRIVE_DP) {
+ /* 0x02 – GUID Partition Table */
+ if (dpath->drive.mbr_type != MBR_TYPE_EFI_PARTITION_TABLE_HEADER)
+ continue;
+
+ /* 0x02 – GUID signature */
+ if (dpath->drive.signature_type != SIGNATURE_TYPE_GUID)
+ continue;
+
+ if (part_uuid)
+ efi_guid_to_id128(dpath->drive.signature, &p_uuid);
+ continue;
+ }
+
+ /* Sub-Type 4 – File Path */
+ if (dpath->sub_type == MEDIA_FILEPATH_DP && !p && path) {
+ p = utf16_to_utf8(dpath->path, dpath->length-4);
+ if (!p)
+ return -ENOMEM;
+
+ efi_tilt_backslashes(p);
+ continue;
+ }
+ }
+ }
+
+ if (title)
+ *title = TAKE_PTR(s);
+ if (part_uuid)
+ *part_uuid = p_uuid;
+ if (path)
+ *path = TAKE_PTR(p);
+ if (active)
+ *active = header->attr & LOAD_OPTION_ACTIVE;
+
+ return 0;
+}
+
+static void to_utf16(uint16_t *dest, const char *src) {
+ int i;
+
+ for (i = 0; src[i] != '\0'; i++)
+ dest[i] = src[i];
+ dest[i] = '\0';
+}
+
+static void id128_to_efi_guid(sd_id128_t id, void *guid) {
+ struct guid uuid = {
+ .u1 = id.bytes[0] << 24 | id.bytes[1] << 16 | id.bytes[2] << 8 | id.bytes[3],
+ .u2 = id.bytes[4] << 8 | id.bytes[5],
+ .u3 = id.bytes[6] << 8 | id.bytes[7],
+ };
+ memcpy(uuid.u4, id.bytes+8, sizeof(uuid.u4));
+ memcpy(guid, &uuid, sizeof(uuid));
+}
+
+static uint16_t *tilt_slashes(uint16_t *s) {
+ uint16_t *p;
+
+ for (p = s; *p; p++)
+ if (*p == '/')
+ *p = '\\';
+
+ return s;
+}
+
+int efi_add_boot_option(
+ uint16_t id,
+ const char *title,
+ uint32_t part,
+ uint64_t pstart,
+ uint64_t psize,
+ sd_id128_t part_uuid,
+ const char *path) {
+
+ size_t size, title_len, path_len;
+ _cleanup_free_ char *buf = NULL;
+ struct boot_option *option;
+ struct device_path *devicep;
+ char boot_id[9];
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ title_len = (strlen(title)+1) * 2;
+ path_len = (strlen(path)+1) * 2;
+
+ buf = malloc0(offsetof(struct boot_option, title) + title_len +
+ sizeof(struct drive_path) +
+ sizeof(struct device_path) + path_len);
+ if (!buf)
+ return -ENOMEM;
+
+ /* header */
+ option = (struct boot_option *)buf;
+ option->attr = LOAD_OPTION_ACTIVE;
+ option->path_len = offsetof(struct device_path, drive) + sizeof(struct drive_path) +
+ offsetof(struct device_path, path) + path_len +
+ offsetof(struct device_path, path);
+ to_utf16(option->title, title);
+ size = offsetof(struct boot_option, title) + title_len;
+
+ /* partition info */
+ devicep = (struct device_path *)(buf + size);
+ devicep->type = MEDIA_DEVICE_PATH;
+ devicep->sub_type = MEDIA_HARDDRIVE_DP;
+ devicep->length = offsetof(struct device_path, drive) + sizeof(struct drive_path);
+ memcpy(&devicep->drive.part_nr, &part, sizeof(uint32_t));
+ memcpy(&devicep->drive.part_start, &pstart, sizeof(uint64_t));
+ memcpy(&devicep->drive.part_size, &psize, sizeof(uint64_t));
+ id128_to_efi_guid(part_uuid, devicep->drive.signature);
+ devicep->drive.mbr_type = MBR_TYPE_EFI_PARTITION_TABLE_HEADER;
+ devicep->drive.signature_type = SIGNATURE_TYPE_GUID;
+ size += devicep->length;
+
+ /* path to loader */
+ devicep = (struct device_path *)(buf + size);
+ devicep->type = MEDIA_DEVICE_PATH;
+ devicep->sub_type = MEDIA_FILEPATH_DP;
+ devicep->length = offsetof(struct device_path, path) + path_len;
+ to_utf16(devicep->path, path);
+ tilt_slashes(devicep->path);
+ size += devicep->length;
+
+ /* end of path */
+ devicep = (struct device_path *)(buf + size);
+ devicep->type = END_DEVICE_PATH_TYPE;
+ devicep->sub_type = END_ENTIRE_DEVICE_PATH_SUBTYPE;
+ devicep->length = offsetof(struct device_path, path);
+ size += devicep->length;
+
+ xsprintf(boot_id, "Boot%04X", id);
+ return efi_set_variable(EFI_VENDOR_GLOBAL, boot_id, buf, size);
+}
+
+int efi_remove_boot_option(uint16_t id) {
+ char boot_id[9];
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ xsprintf(boot_id, "Boot%04X", id);
+ return efi_set_variable(EFI_VENDOR_GLOBAL, boot_id, NULL, 0);
+}
+
+int efi_get_boot_order(uint16_t **order) {
+ _cleanup_free_ void *buf = NULL;
+ size_t l;
+ int r;
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ r = efi_get_variable(EFI_VENDOR_GLOBAL, "BootOrder", NULL, &buf, &l);
+ if (r < 0)
+ return r;
+
+ if (l <= 0)
+ return -ENOENT;
+
+ if (l % sizeof(uint16_t) > 0 ||
+ l / sizeof(uint16_t) > INT_MAX)
+ return -EINVAL;
+
+ *order = TAKE_PTR(buf);
+ return (int) (l / sizeof(uint16_t));
+}
+
+int efi_set_boot_order(uint16_t *order, size_t n) {
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ return efi_set_variable(EFI_VENDOR_GLOBAL, "BootOrder", order, n * sizeof(uint16_t));
+}
+
+static int boot_id_hex(const char s[static 4]) {
+ int id = 0, i;
+
+ assert(s);
+
+ for (i = 0; i < 4; i++)
+ if (s[i] >= '0' && s[i] <= '9')
+ id |= (s[i] - '0') << (3 - i) * 4;
+ else if (s[i] >= 'A' && s[i] <= 'F')
+ id |= (s[i] - 'A' + 10) << (3 - i) * 4;
+ else
+ return -EINVAL;
+
+ return id;
+}
+
+static int cmp_uint16(const uint16_t *a, const uint16_t *b) {
+ return CMP(*a, *b);
+}
+
+int efi_get_boot_options(uint16_t **options) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ _cleanup_free_ uint16_t *list = NULL;
+ struct dirent *de;
+ size_t alloc = 0;
+ int count = 0;
+
+ assert(options);
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ dir = opendir("/sys/firmware/efi/efivars/");
+ if (!dir)
+ return -errno;
+
+ FOREACH_DIRENT(de, dir, return -errno) {
+ int id;
+
+ if (strncmp(de->d_name, "Boot", 4) != 0)
+ continue;
+
+ if (strlen(de->d_name) != 45)
+ continue;
+
+ if (strcmp(de->d_name + 8, "-8be4df61-93ca-11d2-aa0d-00e098032b8c") != 0)
+ continue;
+
+ id = boot_id_hex(de->d_name + 4);
+ if (id < 0)
+ continue;
+
+ if (!GREEDY_REALLOC(list, alloc, count + 1))
+ return -ENOMEM;
+
+ list[count++] = id;
+ }
+
+ typesafe_qsort(list, count, cmp_uint16);
+
+ *options = TAKE_PTR(list);
+
+ return count;
+}
+
+static int read_usec(sd_id128_t vendor, const char *name, usec_t *u) {
+ _cleanup_free_ char *j = NULL;
+ int r;
+ uint64_t x = 0;
+
+ assert(name);
+ assert(u);
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, name, &j);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(j, &x);
+ if (r < 0)
+ return r;
+
+ *u = x;
+ return 0;
+}
+
+int efi_loader_get_boot_usec(usec_t *firmware, usec_t *loader) {
+ uint64_t x, y;
+ int r;
+
+ assert(firmware);
+ assert(loader);
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ r = read_usec(EFI_VENDOR_LOADER, "LoaderTimeInitUSec", &x);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read LoaderTimeInitUSec: %m");
+
+ r = read_usec(EFI_VENDOR_LOADER, "LoaderTimeExecUSec", &y);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read LoaderTimeExecUSec: %m");
+
+ if (y == 0 || y < x || y - x > USEC_PER_HOUR)
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO),
+ "Bad LoaderTimeInitUSec=%"PRIu64", LoaderTimeExecUSec=%" PRIu64"; refusing.",
+ x, y);
+
+ *firmware = x;
+ *loader = y;
+
+ return 0;
+}
+
+int efi_loader_get_device_part_uuid(sd_id128_t *u) {
+ _cleanup_free_ char *p = NULL;
+ int r, parsed[16];
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderDevicePartUUID", &p);
+ if (r < 0)
+ return r;
+
+ if (sscanf(p, SD_ID128_UUID_FORMAT_STR,
+ &parsed[0], &parsed[1], &parsed[2], &parsed[3],
+ &parsed[4], &parsed[5], &parsed[6], &parsed[7],
+ &parsed[8], &parsed[9], &parsed[10], &parsed[11],
+ &parsed[12], &parsed[13], &parsed[14], &parsed[15]) != 16)
+ return -EIO;
+
+ if (u) {
+ unsigned i;
+
+ for (i = 0; i < ELEMENTSOF(parsed); i++)
+ u->bytes[i] = parsed[i];
+ }
+
+ return 0;
+}
+
+int efi_loader_get_entries(char ***ret) {
+ _cleanup_free_ char16_t *entries = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ size_t size, i, start;
+ int r;
+
+ assert(ret);
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ r = efi_get_variable(EFI_VENDOR_LOADER, "LoaderEntries", NULL, (void**) &entries, &size);
+ if (r < 0)
+ return r;
+
+ /* The variable contains a series of individually NUL terminated UTF-16 strings. */
+
+ for (i = 0, start = 0;; i++) {
+ _cleanup_free_ char *decoded = NULL;
+ bool end;
+
+ /* Is this the end of the variable's data? */
+ end = i * sizeof(char16_t) >= size;
+
+ /* Are we in the middle of a string? (i.e. not at the end of the variable, nor at a NUL terminator?) If
+ * so, let's go to the next entry. */
+ if (!end && entries[i] != 0)
+ continue;
+
+ /* We reached the end of a string, let's decode it into UTF-8 */
+ decoded = utf16_to_utf8(entries + start, (i - start) * sizeof(char16_t));
+ if (!decoded)
+ return -ENOMEM;
+
+ if (efi_loader_entry_name_valid(decoded)) {
+ r = strv_consume(&l, TAKE_PTR(decoded));
+ if (r < 0)
+ return r;
+ } else
+ log_debug("Ignoring invalid loader entry '%s'.", decoded);
+
+ /* We reached the end of the variable */
+ if (end)
+ break;
+
+ /* Continue after the NUL byte */
+ start = i + 1;
+ }
+
+ *ret = TAKE_PTR(l);
+ return 0;
+}
+
+int efi_loader_get_features(uint64_t *ret) {
+ _cleanup_free_ void *v = NULL;
+ size_t s;
+ int r;
+
+ if (!is_efi_boot()) {
+ *ret = 0;
+ return 0;
+ }
+
+ r = efi_get_variable(EFI_VENDOR_LOADER, "LoaderFeatures", NULL, &v, &s);
+ if (r == -ENOENT) {
+ _cleanup_free_ char *info = NULL;
+
+ /* The new (v240+) LoaderFeatures variable is not supported, let's see if it's systemd-boot at all */
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderInfo", &info);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return r;
+
+ /* Variable not set, definitely means not systemd-boot */
+
+ } else if (first_word(info, "systemd-boot")) {
+
+ /* An older systemd-boot version. Let's hardcode the feature set, since it was pretty
+ * static in all its versions. */
+
+ *ret = EFI_LOADER_FEATURE_CONFIG_TIMEOUT |
+ EFI_LOADER_FEATURE_ENTRY_DEFAULT |
+ EFI_LOADER_FEATURE_ENTRY_ONESHOT;
+
+ return 0;
+ }
+
+ /* No features supported */
+ *ret = 0;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ if (s != sizeof(uint64_t))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "LoaderFeatures EFI variable doesn't have the right size.");
+
+ memcpy(ret, v, sizeof(uint64_t));
+ return 0;
+}
+
+int efi_loader_get_config_timeout_one_shot(usec_t *ret) {
+ _cleanup_free_ char *v = NULL, *fn = NULL;
+ static struct stat cache_stat = {};
+ struct stat new_stat;
+ static usec_t cache;
+ uint64_t sec;
+ int r;
+
+ assert(ret);
+
+ fn = efi_variable_path(EFI_VENDOR_LOADER, "LoaderConfigTimeoutOneShot");
+ if (!fn)
+ return -ENOMEM;
+
+ /* stat() the EFI variable, to see if the mtime changed. If it did we need to cache again. */
+ if (stat(fn, &new_stat) < 0)
+ return -errno;
+
+ if (stat_inode_unmodified(&new_stat, &cache_stat)) {
+ *ret = cache;
+ return 0;
+ }
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderConfigTimeoutOneShot", &v);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(v, &sec);
+ if (r < 0)
+ return r;
+ if (sec > USEC_INFINITY / USEC_PER_SEC)
+ return -ERANGE;
+
+ cache_stat = new_stat;
+ *ret = cache = sec * USEC_PER_SEC; /* return in µs */
+ return 0;
+}
+
+int efi_loader_update_entry_one_shot_cache(char **cache, struct stat *cache_stat) {
+ _cleanup_free_ char *fn = NULL, *v = NULL;
+ struct stat new_stat;
+ int r;
+
+ assert(cache);
+ assert(cache_stat);
+
+ fn = efi_variable_path(EFI_VENDOR_LOADER, "LoaderEntryOneShot");
+ if (!fn)
+ return -ENOMEM;
+
+ /* stat() the EFI variable, to see if the mtime changed. If it did we need to cache again. */
+ if (stat(fn, &new_stat) < 0)
+ return -errno;
+
+ if (stat_inode_unmodified(&new_stat, cache_stat))
+ return 0;
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderEntryOneShot", &v);
+ if (r < 0)
+ return r;
+
+ if (!efi_loader_entry_name_valid(v))
+ return -EINVAL;
+
+ *cache_stat = new_stat;
+ free_and_replace(*cache, v);
+
+ return 0;
+}
+
+#endif
+
+bool efi_loader_entry_name_valid(const char *s) {
+ if (isempty(s))
+ return false;
+
+ if (strlen(s) > FILENAME_MAX) /* Make sure entry names fit in filenames */
+ return false;
+
+ return in_charset(s, ALPHANUMERICAL "+-_.");
+}
+
+char *efi_tilt_backslashes(char *s) {
+ char *p;
+
+ for (p = s; *p; p++)
+ if (*p == '\\')
+ *p = '/';
+
+ return s;
+}
diff --git a/src/shared/efi-loader.h b/src/shared/efi-loader.h
new file mode 100644
index 0000000..34476f4
--- /dev/null
+++ b/src/shared/efi-loader.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "efivars.h"
+
+#include <sys/stat.h>
+
+#if ENABLE_EFI
+
+int efi_reboot_to_firmware_supported(void);
+int efi_get_reboot_to_firmware(void);
+int efi_set_reboot_to_firmware(bool value);
+
+int efi_get_boot_option(uint16_t nr, char **title, sd_id128_t *part_uuid, char **path, bool *active);
+int efi_add_boot_option(uint16_t id, const char *title, uint32_t part, uint64_t pstart, uint64_t psize, sd_id128_t part_uuid, const char *path);
+int efi_remove_boot_option(uint16_t id);
+int efi_get_boot_order(uint16_t **order);
+int efi_set_boot_order(uint16_t *order, size_t n);
+int efi_get_boot_options(uint16_t **options);
+
+int efi_loader_get_device_part_uuid(sd_id128_t *u);
+int efi_loader_get_boot_usec(usec_t *firmware, usec_t *loader);
+
+int efi_loader_get_entries(char ***ret);
+
+int efi_loader_get_features(uint64_t *ret);
+
+int efi_loader_get_config_timeout_one_shot(usec_t *ret);
+int efi_loader_update_entry_one_shot_cache(char **cache, struct stat *cache_stat);
+
+#else
+
+static inline int efi_reboot_to_firmware_supported(void) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_reboot_to_firmware(void) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_set_reboot_to_firmware(bool value) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_boot_option(uint16_t nr, char **title, sd_id128_t *part_uuid, char **path, bool *active) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_add_boot_option(uint16_t id, const char *title, uint32_t part, uint64_t pstart, uint64_t psize, sd_id128_t part_uuid, const char *path) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_remove_boot_option(uint16_t id) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_boot_order(uint16_t **order) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_set_boot_order(uint16_t *order, size_t n) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_boot_options(uint16_t **options) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_get_device_part_uuid(sd_id128_t *u) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_get_boot_usec(usec_t *firmware, usec_t *loader) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_get_entries(char ***ret) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_get_features(uint64_t *ret) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_get_config_timeout_one_shot(usec_t *ret) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_update_entry_one_shot_cache(char **cache, struct stat *cache_stat) {
+ return -EOPNOTSUPP;
+}
+
+#endif
+
+bool efi_loader_entry_name_valid(const char *s);
+
+char *efi_tilt_backslashes(char *s);
diff --git a/src/shared/enable-mempool.c b/src/shared/enable-mempool.c
new file mode 100644
index 0000000..1abfccb
--- /dev/null
+++ b/src/shared/enable-mempool.c
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "mempool.h"
+
+const bool mempool_use_allowed = true;
diff --git a/src/shared/env-file-label.c b/src/shared/env-file-label.c
new file mode 100644
index 0000000..468afce
--- /dev/null
+++ b/src/shared/env-file-label.c
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/stat.h>
+
+#include "env-file-label.h"
+#include "env-file.h"
+#include "selinux-util.h"
+
+int write_env_file_label(const char *fname, char **l) {
+ int r;
+
+ r = mac_selinux_create_file_prepare(fname, S_IFREG);
+ if (r < 0)
+ return r;
+
+ r = write_env_file(fname, l);
+
+ mac_selinux_create_file_clear();
+
+ return r;
+}
diff --git a/src/shared/env-file-label.h b/src/shared/env-file-label.h
new file mode 100644
index 0000000..d68058a
--- /dev/null
+++ b/src/shared/env-file-label.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/* These functions are split out of fileio.h (and not for example just flags to the functions they wrap) in order to
+ * optimize linking: This way, -lselinux is needed only for the callers of these functions that need selinux, but not
+ * for all */
+
+int write_env_file_label(const char *fname, char **l);
diff --git a/src/shared/ethtool-util.c b/src/shared/ethtool-util.c
new file mode 100644
index 0000000..e6fab26
--- /dev/null
+++ b/src/shared/ethtool-util.c
@@ -0,0 +1,1149 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+#include <linux/sockios.h>
+
+#include "conf-parser.h"
+#include "ethtool-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "log.h"
+#include "memory-util.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "strxcpyx.h"
+
+static const char* const duplex_table[_DUP_MAX] = {
+ [DUP_FULL] = "full",
+ [DUP_HALF] = "half"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(duplex, Duplex);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_duplex, duplex, Duplex, "Failed to parse duplex setting");
+
+static const char* const wol_table[_WOL_MAX] = {
+ [WOL_PHY] = "phy",
+ [WOL_UCAST] = "unicast",
+ [WOL_MCAST] = "multicast",
+ [WOL_BCAST] = "broadcast",
+ [WOL_ARP] = "arp",
+ [WOL_MAGIC] = "magic",
+ [WOL_MAGICSECURE] = "secureon",
+ [WOL_OFF] = "off",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(wol, WakeOnLan);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_wol, wol, WakeOnLan, "Failed to parse WakeOnLan setting");
+
+static const char* const port_table[] = {
+ [NET_DEV_PORT_TP] = "tp",
+ [NET_DEV_PORT_AUI] = "aui",
+ [NET_DEV_PORT_MII] = "mii",
+ [NET_DEV_PORT_FIBRE] = "fibre",
+ [NET_DEV_PORT_BNC] = "bnc",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(port, NetDevPort);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_port, port, NetDevPort, "Failed to parse Port setting");
+
+static const char* const netdev_feature_table[_NET_DEV_FEAT_MAX] = {
+ [NET_DEV_FEAT_RX] = "rx-checksum",
+ [NET_DEV_FEAT_TX] = "tx-checksum-", /* The suffix "-" means any feature beginning with "tx-checksum-" */
+ [NET_DEV_FEAT_GSO] = "tx-generic-segmentation",
+ [NET_DEV_FEAT_GRO] = "rx-gro",
+ [NET_DEV_FEAT_LRO] = "rx-lro",
+ [NET_DEV_FEAT_TSO] = "tx-tcp-segmentation",
+ [NET_DEV_FEAT_TSO6] = "tx-tcp6-segmentation",
+};
+
+static const char* const ethtool_link_mode_bit_table[] = {
+ [ETHTOOL_LINK_MODE_10baseT_Half_BIT] = "10baset-half",
+ [ETHTOOL_LINK_MODE_10baseT_Full_BIT] = "10baset-full",
+ [ETHTOOL_LINK_MODE_100baseT_Half_BIT] = "100baset-half",
+ [ETHTOOL_LINK_MODE_100baseT_Full_BIT] = "100baset-full",
+ [ETHTOOL_LINK_MODE_1000baseT_Half_BIT] = "1000baset-half",
+ [ETHTOOL_LINK_MODE_1000baseT_Full_BIT] = "1000baset-full",
+ [ETHTOOL_LINK_MODE_Autoneg_BIT] = "autonegotiation",
+ [ETHTOOL_LINK_MODE_TP_BIT] = "tp",
+ [ETHTOOL_LINK_MODE_AUI_BIT] = "aui",
+ [ETHTOOL_LINK_MODE_MII_BIT] = "mii",
+ [ETHTOOL_LINK_MODE_FIBRE_BIT] = "fibre",
+ [ETHTOOL_LINK_MODE_BNC_BIT] = "bnc",
+ [ETHTOOL_LINK_MODE_10000baseT_Full_BIT] = "10000baset-full",
+ [ETHTOOL_LINK_MODE_Pause_BIT] = "pause",
+ [ETHTOOL_LINK_MODE_Asym_Pause_BIT] = "asym-pause",
+ [ETHTOOL_LINK_MODE_2500baseX_Full_BIT] = "2500basex-full",
+ [ETHTOOL_LINK_MODE_Backplane_BIT] = "backplane",
+ [ETHTOOL_LINK_MODE_1000baseKX_Full_BIT] = "1000basekx-full",
+ [ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT] = "10000basekx4-full",
+ [ETHTOOL_LINK_MODE_10000baseKR_Full_BIT] = "10000basekr-full",
+ [ETHTOOL_LINK_MODE_10000baseR_FEC_BIT] = "10000baser-fec",
+ [ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT] = "20000basemld2-full",
+ [ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT] = "20000basekr2-full",
+ [ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT] = "40000basekr4-full",
+ [ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT] = "40000basecr4-full",
+ [ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT] = "40000basesr4-full",
+ [ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT] = "40000baselr4-full",
+ [ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT] = "56000basekr4-full",
+ [ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT] = "56000basecr4-full",
+ [ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT] = "56000basesr4-full",
+ [ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT] = "56000baselr4-full",
+ [ETHTOOL_LINK_MODE_25000baseCR_Full_BIT] = "25000basecr-full",
+ [ETHTOOL_LINK_MODE_25000baseKR_Full_BIT] = "25000basekr-full",
+ [ETHTOOL_LINK_MODE_25000baseSR_Full_BIT] = "25000basesr-full",
+ [ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT] = "50000basecr2-full",
+ [ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT] = "50000basekr2-full",
+ [ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT] = "100000basekr4-full",
+ [ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT] = "100000basesr4-full",
+ [ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT] = "100000basecr4-full",
+ [ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT] = "100000baselr4-er4-full",
+ [ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT] = "50000basesr2-full",
+ [ETHTOOL_LINK_MODE_1000baseX_Full_BIT] = "1000basex-full",
+ [ETHTOOL_LINK_MODE_10000baseCR_Full_BIT] = "10000basecr-full",
+ [ETHTOOL_LINK_MODE_10000baseSR_Full_BIT] = "10000basesr-full",
+ [ETHTOOL_LINK_MODE_10000baseLR_Full_BIT] = "10000baselr-full",
+ [ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT] = "10000baselrm-full",
+ [ETHTOOL_LINK_MODE_10000baseER_Full_BIT] = "10000baseer-full",
+ [ETHTOOL_LINK_MODE_2500baseT_Full_BIT] = "2500baset-full",
+ [ETHTOOL_LINK_MODE_5000baseT_Full_BIT] = "5000baset-full",
+ [ETHTOOL_LINK_MODE_FEC_NONE_BIT] = "fec-none",
+ [ETHTOOL_LINK_MODE_FEC_RS_BIT] = "fec-rs",
+ [ETHTOOL_LINK_MODE_FEC_BASER_BIT] = "fec-baser",
+ [ETHTOOL_LINK_MODE_50000baseKR_Full_BIT] = "50000basekr-full",
+ [ETHTOOL_LINK_MODE_50000baseSR_Full_BIT] = "50000basesr-full",
+ [ETHTOOL_LINK_MODE_50000baseCR_Full_BIT] = "50000basecr-full",
+ [ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT] = "50000baselr-er-fr-full",
+ [ETHTOOL_LINK_MODE_50000baseDR_Full_BIT] = "50000basedr-full",
+ [ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT] = "100000basekr2-full",
+ [ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT] = "100000basesr2-full",
+ [ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT] = "100000basecr2-full",
+ [ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT] = "100000baselr2-er2-fr2-full",
+ [ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT] = "100000basedr2-full",
+ [ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT] = "200000basekr4-full",
+ [ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT] = "200000basesr4-full",
+ [ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT] = "200000baselr4-er4-fr4-full",
+ [ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT] = "200000basedr4-full",
+ [ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT] = "200000basecr4-full",
+ [ETHTOOL_LINK_MODE_100baseT1_Full_BIT] = "100baset1-full",
+ [ETHTOOL_LINK_MODE_1000baseT1_Full_BIT] = "1000baset1-full",
+ [ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT] = "400000basekr8-full",
+ [ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT] = "400000basesr8-full",
+ [ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT] = "400000baselr8-er8-fr8-full",
+ [ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT] = "400000basedr8-full",
+ [ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT] = "400000basecr8-full",
+ [ETHTOOL_LINK_MODE_FEC_LLRS_BIT] = "fec-llrs",
+ [ETHTOOL_LINK_MODE_100000baseKR_Full_BIT] = "100000basekr-full",
+ [ETHTOOL_LINK_MODE_100000baseSR_Full_BIT] = "100000basesr-full",
+ [ETHTOOL_LINK_MODE_100000baseLR_ER_FR_Full_BIT] = "100000baselr-er-fr-full",
+ [ETHTOOL_LINK_MODE_100000baseCR_Full_BIT] = "100000basecr-full",
+ [ETHTOOL_LINK_MODE_100000baseDR_Full_BIT] = "100000basedr-full",
+ [ETHTOOL_LINK_MODE_200000baseKR2_Full_BIT] = "200000basekr2-full",
+ [ETHTOOL_LINK_MODE_200000baseSR2_Full_BIT] = "200000basesr2-full",
+ [ETHTOOL_LINK_MODE_200000baseLR2_ER2_FR2_Full_BIT] = "200000baselr2-er2-fr2-full",
+ [ETHTOOL_LINK_MODE_200000baseDR2_Full_BIT] = "200000basedr2-full",
+ [ETHTOOL_LINK_MODE_200000baseCR2_Full_BIT] = "200000basecr2-full",
+ [ETHTOOL_LINK_MODE_400000baseKR4_Full_BIT] = "400000basekr4-full",
+ [ETHTOOL_LINK_MODE_400000baseSR4_Full_BIT] = "400000basesr4-full",
+ [ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT] = "400000baselr4-er4-fr4-full",
+ [ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT] = "400000basedr4-full",
+ [ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT] = "400000basecr4-full",
+};
+/* Make sure the array is large enough to fit all bits */
+assert_cc((ELEMENTSOF(ethtool_link_mode_bit_table)-1) / 32 < N_ADVERTISE);
+
+DEFINE_STRING_TABLE_LOOKUP(ethtool_link_mode_bit, enum ethtool_link_mode_bit_indices);
+
+static int ethtool_connect_or_warn(int *ret, bool warn) {
+ int fd;
+
+ assert_return(ret, -EINVAL);
+
+ fd = socket_ioctl_fd();
+ if (fd < 0)
+ return log_full_errno(warn ? LOG_WARNING: LOG_DEBUG, fd,
+ "ethtool: could not create control socket: %m");
+
+ *ret = fd;
+
+ return 0;
+}
+
+int ethtool_get_driver(int *ethtool_fd, const char *ifname, char **ret) {
+ struct ethtool_drvinfo ecmd = {
+ .cmd = ETHTOOL_GDRVINFO,
+ };
+ struct ifreq ifr = {
+ .ifr_data = (void*) &ecmd,
+ };
+ char *d;
+ int r;
+
+ assert(ethtool_fd);
+ assert(ifname);
+ assert(ret);
+
+ if (*ethtool_fd < 0) {
+ r = ethtool_connect_or_warn(ethtool_fd, true);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ if (isempty(ecmd.driver))
+ return -ENODATA;
+
+ d = strdup(ecmd.driver);
+ if (!d)
+ return -ENOMEM;
+
+ *ret = d;
+ return 0;
+}
+
+int ethtool_get_link_info(
+ int *ethtool_fd,
+ const char *ifname,
+ int *ret_autonegotiation,
+ uint64_t *ret_speed,
+ Duplex *ret_duplex,
+ NetDevPort *ret_port) {
+
+ struct ethtool_cmd ecmd = {
+ .cmd = ETHTOOL_GSET,
+ };
+ struct ifreq ifr = {
+ .ifr_data = (void*) &ecmd,
+ };
+ int r;
+
+ assert(ethtool_fd);
+ assert(ifname);
+
+ if (*ethtool_fd < 0) {
+ r = ethtool_connect_or_warn(ethtool_fd, false);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ if (ret_autonegotiation)
+ *ret_autonegotiation = ecmd.autoneg;
+
+ if (ret_speed) {
+ uint32_t speed;
+
+ speed = ethtool_cmd_speed(&ecmd);
+ *ret_speed = speed == (uint32_t) SPEED_UNKNOWN ?
+ UINT64_MAX : (uint64_t) speed * 1000 * 1000;
+ }
+
+ if (ret_duplex)
+ *ret_duplex = ecmd.duplex;
+
+ if (ret_port)
+ *ret_port = ecmd.port;
+
+ return 0;
+}
+
+int ethtool_get_permanent_macaddr(int *ethtool_fd, const char *ifname, struct ether_addr *ret) {
+ _cleanup_close_ int fd = -1;
+ struct {
+ struct ethtool_perm_addr addr;
+ uint8_t space[MAX_ADDR_LEN];
+ } epaddr = {
+ .addr.cmd = ETHTOOL_GPERMADDR,
+ .addr.size = MAX_ADDR_LEN,
+ };
+ struct ifreq ifr = {
+ .ifr_data = (caddr_t) &epaddr,
+ };
+ int r;
+
+ assert(ifname);
+ assert(ret);
+
+ if (!ethtool_fd)
+ ethtool_fd = &fd;
+
+ if (*ethtool_fd < 0) {
+ r = ethtool_connect_or_warn(ethtool_fd, false);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ if (epaddr.addr.size != 6)
+ return -EOPNOTSUPP;
+
+#pragma GCC diagnostic push
+#if HAVE_ZERO_LENGTH_BOUNDS
+# pragma GCC diagnostic ignored "-Wzero-length-bounds"
+#endif
+ for (size_t i = 0; i < epaddr.addr.size; i++)
+ ret->ether_addr_octet[i] = epaddr.addr.data[i];
+#pragma GCC diagnostic pop
+
+ return 0;
+}
+
+int ethtool_set_speed(int *ethtool_fd, const char *ifname, unsigned speed, Duplex duplex) {
+ struct ethtool_cmd ecmd = {
+ .cmd = ETHTOOL_GSET,
+ };
+ struct ifreq ifr = {
+ .ifr_data = (void*) &ecmd,
+ };
+ bool need_update = false;
+ int r;
+
+ assert(ethtool_fd);
+ assert(ifname);
+
+ if (speed == 0 && duplex == _DUP_INVALID)
+ return 0;
+
+ if (*ethtool_fd < 0) {
+ r = ethtool_connect_or_warn(ethtool_fd, true);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ if (ethtool_cmd_speed(&ecmd) != speed) {
+ ethtool_cmd_speed_set(&ecmd, speed);
+ need_update = true;
+ }
+
+ switch (duplex) {
+ case DUP_HALF:
+ if (ecmd.duplex != DUPLEX_HALF) {
+ ecmd.duplex = DUPLEX_HALF;
+ need_update = true;
+ }
+ break;
+ case DUP_FULL:
+ if (ecmd.duplex != DUPLEX_FULL) {
+ ecmd.duplex = DUPLEX_FULL;
+ need_update = true;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (need_update) {
+ ecmd.cmd = ETHTOOL_SSET;
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int ethtool_set_wol(int *ethtool_fd, const char *ifname, WakeOnLan wol) {
+ struct ethtool_wolinfo ecmd = {
+ .cmd = ETHTOOL_GWOL,
+ };
+ struct ifreq ifr = {
+ .ifr_data = (void*) &ecmd,
+ };
+ bool need_update = false;
+ int r;
+
+ assert(ethtool_fd);
+ assert(ifname);
+
+ if (wol == _WOL_INVALID)
+ return 0;
+
+ if (*ethtool_fd < 0) {
+ r = ethtool_connect_or_warn(ethtool_fd, true);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ switch (wol) {
+ case WOL_PHY:
+ if (ecmd.wolopts != WAKE_PHY) {
+ ecmd.wolopts = WAKE_PHY;
+ need_update = true;
+ }
+ break;
+ case WOL_UCAST:
+ if (ecmd.wolopts != WAKE_UCAST) {
+ ecmd.wolopts = WAKE_UCAST;
+ need_update = true;
+ }
+ break;
+ case WOL_MCAST:
+ if (ecmd.wolopts != WAKE_MCAST) {
+ ecmd.wolopts = WAKE_MCAST;
+ need_update = true;
+ }
+ break;
+ case WOL_BCAST:
+ if (ecmd.wolopts != WAKE_BCAST) {
+ ecmd.wolopts = WAKE_BCAST;
+ need_update = true;
+ }
+ break;
+ case WOL_ARP:
+ if (ecmd.wolopts != WAKE_ARP) {
+ ecmd.wolopts = WAKE_ARP;
+ need_update = true;
+ }
+ break;
+ case WOL_MAGIC:
+ if (ecmd.wolopts != WAKE_MAGIC) {
+ ecmd.wolopts = WAKE_MAGIC;
+ need_update = true;
+ }
+ break;
+ case WOL_MAGICSECURE:
+ if (ecmd.wolopts != WAKE_MAGICSECURE) {
+ ecmd.wolopts = WAKE_MAGICSECURE;
+ need_update = true;
+ }
+ break;
+ case WOL_OFF:
+ if (ecmd.wolopts != 0) {
+ ecmd.wolopts = 0;
+ need_update = true;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (need_update) {
+ ecmd.cmd = ETHTOOL_SWOL;
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int ethtool_set_nic_buffer_size(int *ethtool_fd, const char *ifname, const netdev_ring_param *ring) {
+ struct ethtool_ringparam ecmd = {
+ .cmd = ETHTOOL_GRINGPARAM,
+ };
+ struct ifreq ifr = {
+ .ifr_data = (void*) &ecmd,
+ };
+ bool need_update = false;
+ int r;
+
+ assert(ethtool_fd);
+ assert(ifname);
+ assert(ring);
+
+ if (*ethtool_fd < 0) {
+ r = ethtool_connect_or_warn(ethtool_fd, true);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ if (ring->rx_pending_set && ecmd.rx_pending != ring->rx_pending) {
+ ecmd.rx_pending = ring->rx_pending;
+ need_update = true;
+ }
+
+ if (ring->rx_mini_pending_set && ecmd.rx_mini_pending != ring->rx_mini_pending) {
+ ecmd.rx_mini_pending = ring->rx_mini_pending;
+ need_update = true;
+ }
+
+ if (ring->rx_jumbo_pending_set && ecmd.rx_jumbo_pending != ring->rx_jumbo_pending) {
+ ecmd.rx_jumbo_pending = ring->rx_jumbo_pending;
+ need_update = true;
+ }
+
+ if (ring->tx_pending_set && ecmd.tx_pending != ring->tx_pending) {
+ ecmd.tx_pending = ring->tx_pending;
+ need_update = true;
+ }
+
+ if (need_update) {
+ ecmd.cmd = ETHTOOL_SRINGPARAM;
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int get_stringset(int ethtool_fd, struct ifreq *ifr, int stringset_id, struct ethtool_gstrings **ret) {
+ _cleanup_free_ struct ethtool_gstrings *strings = NULL;
+ struct {
+ struct ethtool_sset_info info;
+ uint32_t space;
+ } buffer = {
+ .info = {
+ .cmd = ETHTOOL_GSSET_INFO,
+ .sset_mask = UINT64_C(1) << stringset_id,
+ },
+ };
+ unsigned len;
+ int r;
+
+ assert(ethtool_fd >= 0);
+ assert(ifr);
+ assert(ret);
+
+ ifr->ifr_data = (void *) &buffer.info;
+
+ r = ioctl(ethtool_fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ if (!buffer.info.sset_mask)
+ return -EINVAL;
+
+#pragma GCC diagnostic push
+#if HAVE_ZERO_LENGTH_BOUNDS
+# pragma GCC diagnostic ignored "-Wzero-length-bounds"
+#endif
+ len = buffer.info.data[0];
+#pragma GCC diagnostic pop
+
+ strings = malloc0(sizeof(struct ethtool_gstrings) + len * ETH_GSTRING_LEN);
+ if (!strings)
+ return -ENOMEM;
+
+ strings->cmd = ETHTOOL_GSTRINGS;
+ strings->string_set = stringset_id;
+ strings->len = len;
+
+ ifr->ifr_data = (void *) strings;
+
+ r = ioctl(ethtool_fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ *ret = TAKE_PTR(strings);
+
+ return 0;
+}
+
+static int set_features_bit(
+ const struct ethtool_gstrings *strings,
+ const char *feature,
+ bool flag,
+ struct ethtool_sfeatures *sfeatures) {
+ bool found = false;
+
+ assert(strings);
+ assert(feature);
+ assert(sfeatures);
+
+ for (size_t i = 0; i < strings->len; i++)
+ if (streq((char *) &strings->data[i * ETH_GSTRING_LEN], feature) ||
+ (endswith(feature, "-") && startswith((char *) &strings->data[i * ETH_GSTRING_LEN], feature))) {
+ size_t block, bit;
+
+ block = i / 32;
+ bit = i % 32;
+
+ sfeatures->features[block].valid |= 1 << bit;
+ SET_FLAG(sfeatures->features[block].requested, 1 << bit, flag);
+ found = true;
+ }
+
+ return found ? 0 : -ENODATA;
+}
+
+int ethtool_set_features(int *ethtool_fd, const char *ifname, const int *features) {
+ _cleanup_free_ struct ethtool_gstrings *strings = NULL;
+ struct ethtool_sfeatures *sfeatures;
+ struct ifreq ifr = {};
+ int i, r;
+
+ assert(ethtool_fd);
+ assert(ifname);
+ assert(features);
+
+ if (*ethtool_fd < 0) {
+ r = ethtool_connect_or_warn(ethtool_fd, true);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = get_stringset(*ethtool_fd, &ifr, ETH_SS_FEATURES, &strings);
+ if (r < 0)
+ return log_warning_errno(r, "ethtool: could not get ethtool features for %s", ifname);
+
+ sfeatures = alloca0(sizeof(struct ethtool_sfeatures) + DIV_ROUND_UP(strings->len, 32U) * sizeof(sfeatures->features[0]));
+ sfeatures->cmd = ETHTOOL_SFEATURES;
+ sfeatures->size = DIV_ROUND_UP(strings->len, 32U);
+
+ for (i = 0; i < _NET_DEV_FEAT_MAX; i++)
+ if (features[i] != -1) {
+ r = set_features_bit(strings, netdev_feature_table[i], features[i], sfeatures);
+ if (r < 0) {
+ log_warning_errno(r, "ethtool: could not find feature, ignoring: %s", netdev_feature_table[i]);
+ continue;
+ }
+ }
+
+ ifr.ifr_data = (void *) sfeatures;
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return log_warning_errno(r, "ethtool: could not set ethtool features for %s", ifname);
+
+ return 0;
+}
+
+static int get_glinksettings(int fd, struct ifreq *ifr, struct ethtool_link_usettings **ret) {
+ struct ecmd {
+ struct ethtool_link_settings req;
+ __u32 link_mode_data[3 * ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32];
+ } ecmd = {
+ .req.cmd = ETHTOOL_GLINKSETTINGS,
+ };
+ struct ethtool_link_usettings *u;
+ unsigned offset;
+ int r;
+
+ assert(fd >= 0);
+ assert(ifr);
+ assert(ret);
+
+ /* The interaction user/kernel via the new API requires a small ETHTOOL_GLINKSETTINGS
+ handshake first to agree on the length of the link mode bitmaps. If kernel doesn't
+ agree with user, it returns the bitmap length it is expecting from user as a negative
+ length (and cmd field is 0). When kernel and user agree, kernel returns valid info in
+ all fields (ie. link mode length > 0 and cmd is ETHTOOL_GLINKSETTINGS). Based on
+ https://github.com/torvalds/linux/commit/3f1ac7a700d039c61d8d8b99f28d605d489a60cf
+ */
+
+ ifr->ifr_data = (void *) &ecmd;
+
+ r = ioctl(fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ if (ecmd.req.link_mode_masks_nwords >= 0 || ecmd.req.cmd != ETHTOOL_GLINKSETTINGS)
+ return -EOPNOTSUPP;
+
+ ecmd.req.link_mode_masks_nwords = -ecmd.req.link_mode_masks_nwords;
+
+ ifr->ifr_data = (void *) &ecmd;
+
+ r = ioctl(fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ if (ecmd.req.link_mode_masks_nwords <= 0 || ecmd.req.cmd != ETHTOOL_GLINKSETTINGS)
+ return -EOPNOTSUPP;
+
+ u = new(struct ethtool_link_usettings, 1);
+ if (!u)
+ return -ENOMEM;
+
+ *u = (struct ethtool_link_usettings) {
+ .base = ecmd.req,
+ };
+
+ offset = 0;
+ memcpy(u->link_modes.supported, &ecmd.link_mode_data[offset], 4 * ecmd.req.link_mode_masks_nwords);
+
+ offset += ecmd.req.link_mode_masks_nwords;
+ memcpy(u->link_modes.advertising, &ecmd.link_mode_data[offset], 4 * ecmd.req.link_mode_masks_nwords);
+
+ offset += ecmd.req.link_mode_masks_nwords;
+ memcpy(u->link_modes.lp_advertising, &ecmd.link_mode_data[offset], 4 * ecmd.req.link_mode_masks_nwords);
+
+ *ret = u;
+
+ return 0;
+}
+
+static int get_gset(int fd, struct ifreq *ifr, struct ethtool_link_usettings **ret) {
+ struct ethtool_link_usettings *e;
+ struct ethtool_cmd ecmd = {
+ .cmd = ETHTOOL_GSET,
+ };
+ int r;
+
+ assert(fd >= 0);
+ assert(ifr);
+ assert(ret);
+
+ ifr->ifr_data = (void *) &ecmd;
+
+ r = ioctl(fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ e = new(struct ethtool_link_usettings, 1);
+ if (!e)
+ return -ENOMEM;
+
+ *e = (struct ethtool_link_usettings) {
+ .base.cmd = ETHTOOL_GSET,
+ .base.link_mode_masks_nwords = 1,
+ .base.speed = ethtool_cmd_speed(&ecmd),
+ .base.duplex = ecmd.duplex,
+ .base.port = ecmd.port,
+ .base.phy_address = ecmd.phy_address,
+ .base.autoneg = ecmd.autoneg,
+ .base.mdio_support = ecmd.mdio_support,
+
+ .link_modes.supported[0] = ecmd.supported,
+ .link_modes.advertising[0] = ecmd.advertising,
+ .link_modes.lp_advertising[0] = ecmd.lp_advertising,
+ };
+
+ *ret = e;
+
+ return 0;
+}
+
+static int set_slinksettings(int fd, struct ifreq *ifr, const struct ethtool_link_usettings *u) {
+ struct {
+ struct ethtool_link_settings req;
+ __u32 link_mode_data[3 * ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32];
+ } ecmd = {};
+ unsigned offset;
+ int r;
+
+ assert(fd >= 0);
+ assert(ifr);
+ assert(u);
+
+ if (u->base.cmd != ETHTOOL_GLINKSETTINGS || u->base.link_mode_masks_nwords <= 0)
+ return -EINVAL;
+
+ ecmd.req = u->base;
+ ecmd.req.cmd = ETHTOOL_SLINKSETTINGS;
+ offset = 0;
+ memcpy(&ecmd.link_mode_data[offset], u->link_modes.supported, 4 * ecmd.req.link_mode_masks_nwords);
+
+ offset += ecmd.req.link_mode_masks_nwords;
+ memcpy(&ecmd.link_mode_data[offset], u->link_modes.advertising, 4 * ecmd.req.link_mode_masks_nwords);
+
+ offset += ecmd.req.link_mode_masks_nwords;
+ memcpy(&ecmd.link_mode_data[offset], u->link_modes.lp_advertising, 4 * ecmd.req.link_mode_masks_nwords);
+
+ ifr->ifr_data = (void *) &ecmd;
+
+ r = ioctl(fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int set_sset(int fd, struct ifreq *ifr, const struct ethtool_link_usettings *u) {
+ struct ethtool_cmd ecmd = {
+ .cmd = ETHTOOL_SSET,
+ };
+ int r;
+
+ assert(fd >= 0);
+ assert(ifr);
+ assert(u);
+
+ if (u->base.cmd != ETHTOOL_GSET || u->base.link_mode_masks_nwords <= 0)
+ return -EINVAL;
+
+ ecmd.supported = u->link_modes.supported[0];
+ ecmd.advertising = u->link_modes.advertising[0];
+ ecmd.lp_advertising = u->link_modes.lp_advertising[0];
+
+ ethtool_cmd_speed_set(&ecmd, u->base.speed);
+
+ ecmd.duplex = u->base.duplex;
+ ecmd.port = u->base.port;
+ ecmd.phy_address = u->base.phy_address;
+ ecmd.autoneg = u->base.autoneg;
+ ecmd.mdio_support = u->base.mdio_support;
+ ecmd.eth_tp_mdix = u->base.eth_tp_mdix;
+ ecmd.eth_tp_mdix_ctrl = u->base.eth_tp_mdix_ctrl;
+
+ ifr->ifr_data = (void *) &ecmd;
+
+ r = ioctl(fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+/* If autonegotiation is disabled, the speed and duplex represent the fixed link
+ * mode and are writable if the driver supports multiple link modes. If it is
+ * enabled then they are read-only. If the link is up they represent the negotiated
+ * link mode; if the link is down, the speed is 0, %SPEED_UNKNOWN or the highest
+ * enabled speed and @duplex is %DUPLEX_UNKNOWN or the best enabled duplex mode.
+ */
+int ethtool_set_glinksettings(
+ int *fd,
+ const char *ifname,
+ int autonegotiation,
+ const uint32_t advertise[static N_ADVERTISE],
+ uint64_t speed,
+ Duplex duplex,
+ NetDevPort port) {
+
+ _cleanup_free_ struct ethtool_link_usettings *u = NULL;
+ struct ifreq ifr = {};
+ int r;
+
+ assert(fd);
+ assert(ifname);
+ assert(advertise);
+
+ if (autonegotiation != AUTONEG_DISABLE && memeqzero(advertise, sizeof(uint32_t) * N_ADVERTISE)) {
+ log_info("ethtool: autonegotiation is unset or enabled, the speed and duplex are not writable.");
+ return 0;
+ }
+
+ if (*fd < 0) {
+ r = ethtool_connect_or_warn(fd, true);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = get_glinksettings(*fd, &ifr, &u);
+ if (r < 0) {
+ r = get_gset(*fd, &ifr, &u);
+ if (r < 0)
+ return log_warning_errno(r, "ethtool: Cannot get device settings for %s : %m", ifname);
+ }
+
+ if (speed > 0)
+ u->base.speed = DIV_ROUND_UP(speed, 1000000);
+
+ if (duplex != _DUP_INVALID)
+ u->base.duplex = duplex;
+
+ if (port != _NET_DEV_PORT_INVALID)
+ u->base.port = port;
+
+ if (autonegotiation >= 0)
+ u->base.autoneg = autonegotiation;
+
+ if (!memeqzero(advertise, sizeof(uint32_t) * N_ADVERTISE)) {
+ u->base.autoneg = AUTONEG_ENABLE;
+ memcpy(&u->link_modes.advertising, advertise, sizeof(uint32_t) * N_ADVERTISE);
+ memzero((uint8_t*) &u->link_modes.advertising + sizeof(uint32_t) * N_ADVERTISE,
+ ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NBYTES - sizeof(uint32_t) * N_ADVERTISE);
+ }
+
+ if (u->base.cmd == ETHTOOL_GLINKSETTINGS)
+ r = set_slinksettings(*fd, &ifr, u);
+ else
+ r = set_sset(*fd, &ifr, u);
+ if (r < 0)
+ return log_warning_errno(r, "ethtool: Cannot set device settings for %s: %m", ifname);
+
+ return r;
+}
+
+int ethtool_set_channels(int *fd, const char *ifname, const netdev_channels *channels) {
+ struct ethtool_channels ecmd = {
+ .cmd = ETHTOOL_GCHANNELS,
+ };
+ struct ifreq ifr = {
+ .ifr_data = (void*) &ecmd,
+ };
+ bool need_update = false;
+ int r;
+
+ assert(fd);
+ assert(ifname);
+ assert(channels);
+
+ if (*fd < 0) {
+ r = ethtool_connect_or_warn(fd, true);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ if (channels->rx_count_set && ecmd.rx_count != channels->rx_count) {
+ ecmd.rx_count = channels->rx_count;
+ need_update = true;
+ }
+
+ if (channels->tx_count_set && ecmd.tx_count != channels->tx_count) {
+ ecmd.tx_count = channels->tx_count;
+ need_update = true;
+ }
+
+ if (channels->other_count_set && ecmd.other_count != channels->other_count) {
+ ecmd.other_count = channels->other_count;
+ need_update = true;
+ }
+
+ if (channels->combined_count_set && ecmd.combined_count != channels->combined_count) {
+ ecmd.combined_count = channels->combined_count;
+ need_update = true;
+ }
+
+ if (need_update) {
+ ecmd.cmd = ETHTOOL_SCHANNELS;
+
+ r = ioctl(*fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int ethtool_set_flow_control(int *fd, const char *ifname, int rx, int tx, int autoneg) {
+ struct ethtool_pauseparam ecmd = {
+ .cmd = ETHTOOL_GPAUSEPARAM,
+ };
+ struct ifreq ifr = {
+ .ifr_data = (void*) &ecmd,
+ };
+ bool need_update = false;
+ int r;
+
+ assert(fd);
+ assert(ifname);
+
+ if (*fd < 0) {
+ r = ethtool_connect_or_warn(fd, true);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ if (rx >= 0 && ecmd.rx_pause != (uint32_t) rx) {
+ ecmd.rx_pause = rx;
+ need_update = true;
+ }
+
+ if (tx >= 0 && ecmd.tx_pause != (uint32_t) tx) {
+ ecmd.tx_pause = tx;
+ need_update = true;
+ }
+
+ if (autoneg >= 0 && ecmd.autoneg != (uint32_t) autoneg) {
+ ecmd.autoneg = autoneg;
+ need_update = true;
+ }
+
+ if (need_update) {
+ ecmd.cmd = ETHTOOL_SPAUSEPARAM;
+
+ r = ioctl(*fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int config_parse_channel(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ netdev_channels *channels = data;
+ uint32_t k;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou32(rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse channel value for %s=, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+ if (k < 1) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid %s= value, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "RxChannels")) {
+ channels->rx_count = k;
+ channels->rx_count_set = true;
+ } else if (streq(lvalue, "TxChannels")) {
+ channels->tx_count = k;
+ channels->tx_count_set = true;
+ } else if (streq(lvalue, "OtherChannels")) {
+ channels->other_count = k;
+ channels->other_count_set = true;
+ } else if (streq(lvalue, "CombinedChannels")) {
+ channels->combined_count = k;
+ channels->combined_count_set = true;
+ }
+
+ return 0;
+}
+
+int config_parse_advertise(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ uint32_t *advertise = data;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty string resets the value. */
+ memzero(advertise, sizeof(uint32_t) * N_ADVERTISE);
+ return 0;
+ }
+
+ for (p = rvalue;;) {
+ _cleanup_free_ char *w = NULL;
+ enum ethtool_link_mode_bit_indices mode;
+
+ r = extract_first_word(&p, &w, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to split advertise modes '%s', ignoring assignment: %m", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ mode = ethtool_link_mode_bit_from_string(w);
+ /* We reuse the kernel provided enum which does not contain negative value. So, the cast
+ * below is mandatory. Otherwise, the check below always passes and access an invalid address. */
+ if ((int) mode < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to parse advertise mode, ignoring: %s", w);
+ continue;
+ }
+
+ advertise[mode / 32] |= 1UL << (mode % 32);
+ }
+}
+
+int config_parse_nic_buffer_size(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ netdev_ring_param *ring = data;
+ uint32_t k;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou32(rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse interface buffer value, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (k < 1) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid %s= value, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "RxBufferSize")) {
+ ring->rx_pending = k;
+ ring->rx_pending_set = true;
+ } else if (streq(lvalue, "RxMiniBufferSize")) {
+ ring->rx_mini_pending = k;
+ ring->rx_mini_pending_set = true;
+ } else if (streq(lvalue, "RxJumboBufferSize")) {
+ ring->rx_jumbo_pending = k;
+ ring->rx_jumbo_pending_set = true;
+ } else if (streq(lvalue, "TxBufferSize")) {
+ ring->tx_pending = k;
+ ring->tx_pending_set = true;
+ }
+
+ return 0;
+}
diff --git a/src/shared/ethtool-util.h b/src/shared/ethtool-util.h
new file mode 100644
index 0000000..f94b3e1
--- /dev/null
+++ b/src/shared/ethtool-util.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <macro.h>
+#include <net/ethernet.h>
+#include <linux/ethtool.h>
+
+#include "conf-parser.h"
+
+#define N_ADVERTISE 3
+
+/* we can't use DUPLEX_ prefix, as it
+ * clashes with <linux/ethtool.h> */
+typedef enum Duplex {
+ DUP_HALF = DUPLEX_HALF,
+ DUP_FULL = DUPLEX_FULL,
+ _DUP_MAX,
+ _DUP_INVALID = -1
+} Duplex;
+
+typedef enum WakeOnLan {
+ WOL_PHY,
+ WOL_UCAST,
+ WOL_MCAST,
+ WOL_BCAST,
+ WOL_ARP,
+ WOL_MAGIC,
+ WOL_MAGICSECURE,
+ WOL_OFF,
+ _WOL_MAX,
+ _WOL_INVALID = -1
+} WakeOnLan;
+
+typedef enum NetDevFeature {
+ NET_DEV_FEAT_RX,
+ NET_DEV_FEAT_TX,
+ NET_DEV_FEAT_GSO,
+ NET_DEV_FEAT_GRO,
+ NET_DEV_FEAT_LRO,
+ NET_DEV_FEAT_TSO,
+ NET_DEV_FEAT_TSO6,
+ _NET_DEV_FEAT_MAX,
+ _NET_DEV_FEAT_INVALID = -1
+} NetDevFeature;
+
+typedef enum NetDevPort {
+ NET_DEV_PORT_TP = PORT_TP,
+ NET_DEV_PORT_AUI = PORT_AUI,
+ NET_DEV_PORT_MII = PORT_MII,
+ NET_DEV_PORT_FIBRE = PORT_FIBRE,
+ NET_DEV_PORT_BNC = PORT_BNC,
+ NET_DEV_PORT_DA = PORT_DA,
+ NET_DEV_PORT_NONE = PORT_NONE,
+ NET_DEV_PORT_OTHER = PORT_OTHER,
+ _NET_DEV_PORT_MAX,
+ _NET_DEV_PORT_INVALID = -1
+} NetDevPort;
+
+#define ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32 (SCHAR_MAX)
+#define ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NBYTES (4 * ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32)
+
+/* layout of the struct passed from/to userland */
+struct ethtool_link_usettings {
+ struct ethtool_link_settings base;
+
+ struct {
+ uint32_t supported[ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32];
+ uint32_t advertising[ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32];
+ uint32_t lp_advertising[ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32];
+ } link_modes;
+};
+
+typedef struct netdev_channels {
+ uint32_t rx_count;
+ uint32_t tx_count;
+ uint32_t other_count;
+ uint32_t combined_count;
+
+ bool rx_count_set;
+ bool tx_count_set;
+ bool other_count_set;
+ bool combined_count_set;
+} netdev_channels;
+
+typedef struct netdev_ring_param {
+ uint32_t rx_pending;
+ uint32_t rx_mini_pending;
+ uint32_t rx_jumbo_pending;
+ uint32_t tx_pending;
+
+ bool rx_pending_set;
+ bool rx_mini_pending_set;
+ bool rx_jumbo_pending_set;
+ bool tx_pending_set;
+} netdev_ring_param;
+
+int ethtool_get_driver(int *ethtool_fd, const char *ifname, char **ret);
+int ethtool_get_link_info(int *ethtool_fd, const char *ifname,
+ int *ret_autonegotiation, uint64_t *ret_speed,
+ Duplex *ret_duplex, NetDevPort *ret_port);
+int ethtool_get_permanent_macaddr(int *ethtool_fd, const char *ifname, struct ether_addr *ret);
+int ethtool_set_speed(int *ethtool_fd, const char *ifname, unsigned speed, Duplex duplex);
+int ethtool_set_wol(int *ethtool_fd, const char *ifname, WakeOnLan wol);
+int ethtool_set_nic_buffer_size(int *ethtool_fd, const char *ifname, const netdev_ring_param *ring);
+int ethtool_set_features(int *ethtool_fd, const char *ifname, const int *features);
+int ethtool_set_glinksettings(int *ethtool_fd, const char *ifname,
+ int autonegotiation, const uint32_t advertise[static N_ADVERTISE],
+ uint64_t speed, Duplex duplex, NetDevPort port);
+int ethtool_set_channels(int *ethtool_fd, const char *ifname, const netdev_channels *channels);
+int ethtool_set_flow_control(int *fd, const char *ifname, int rx, int tx, int autoneg);
+
+const char *duplex_to_string(Duplex d) _const_;
+Duplex duplex_from_string(const char *d) _pure_;
+
+const char *wol_to_string(WakeOnLan wol) _const_;
+WakeOnLan wol_from_string(const char *wol) _pure_;
+
+const char *port_to_string(NetDevPort port) _const_;
+NetDevPort port_from_string(const char *port) _pure_;
+
+const char *ethtool_link_mode_bit_to_string(enum ethtool_link_mode_bit_indices val) _const_;
+enum ethtool_link_mode_bit_indices ethtool_link_mode_bit_from_string(const char *str) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_duplex);
+CONFIG_PARSER_PROTOTYPE(config_parse_wol);
+CONFIG_PARSER_PROTOTYPE(config_parse_port);
+CONFIG_PARSER_PROTOTYPE(config_parse_channel);
+CONFIG_PARSER_PROTOTYPE(config_parse_advertise);
+CONFIG_PARSER_PROTOTYPE(config_parse_nic_buffer_size);
diff --git a/src/shared/exec-util.c b/src/shared/exec-util.c
new file mode 100644
index 0000000..61ee3b1
--- /dev/null
+++ b/src/shared/exec-util.c
@@ -0,0 +1,446 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <dirent.h>
+#include <errno.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "exec-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "serialize.h"
+#include "set.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+/* Put this test here for a lack of better place */
+assert_cc(EAGAIN == EWOULDBLOCK);
+
+static int do_spawn(const char *path, char *argv[], int stdout_fd, pid_t *pid) {
+
+ pid_t _pid;
+ int r;
+
+ if (null_or_empty_path(path)) {
+ log_debug("%s is empty (a mask).", path);
+ return 0;
+ }
+
+ r = safe_fork("(direxec)", FORK_DEATHSIG|FORK_LOG, &_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ char *_argv[2];
+
+ if (stdout_fd >= 0) {
+ r = rearrange_stdio(STDIN_FILENO, stdout_fd, STDERR_FILENO);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+ }
+
+ (void) rlimit_nofile_safe();
+
+ if (!argv) {
+ _argv[0] = (char*) path;
+ _argv[1] = NULL;
+ argv = _argv;
+ } else
+ argv[0] = (char*) path;
+
+ execv(path, argv);
+ log_error_errno(errno, "Failed to execute %s: %m", path);
+ _exit(EXIT_FAILURE);
+ }
+
+ *pid = _pid;
+ return 1;
+}
+
+static int do_execute(
+ char **directories,
+ usec_t timeout,
+ gather_stdout_callback_t const callbacks[_STDOUT_CONSUME_MAX],
+ void* const callback_args[_STDOUT_CONSUME_MAX],
+ int output_fd,
+ char *argv[],
+ char *envp[],
+ ExecDirFlags flags) {
+
+ _cleanup_hashmap_free_free_ Hashmap *pids = NULL;
+ _cleanup_strv_free_ char **paths = NULL;
+ char **path, **e;
+ int r;
+ bool parallel_execution;
+
+ /* We fork this all off from a child process so that we can somewhat cleanly make
+ * use of SIGALRM to set a time limit.
+ *
+ * We attempt to perform parallel execution if configured by the user, however
+ * if `callbacks` is nonnull, execution must be serial.
+ */
+ parallel_execution = FLAGS_SET(flags, EXEC_DIR_PARALLEL) && !callbacks;
+
+ r = conf_files_list_strv(&paths, NULL, NULL, CONF_FILES_EXECUTABLE|CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED, (const char* const*) directories);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate executables: %m");
+
+ if (parallel_execution) {
+ pids = hashmap_new(NULL);
+ if (!pids)
+ return log_oom();
+ }
+
+ /* Abort execution of this process after the timeout. We simply rely on SIGALRM as
+ * default action terminating the process, and turn on alarm(). */
+
+ if (timeout != USEC_INFINITY)
+ alarm(DIV_ROUND_UP(timeout, USEC_PER_SEC));
+
+ STRV_FOREACH(e, envp)
+ if (putenv(*e) != 0)
+ return log_error_errno(errno, "Failed to set environment variable: %m");
+
+ STRV_FOREACH(path, paths) {
+ _cleanup_free_ char *t = NULL;
+ _cleanup_close_ int fd = -1;
+ pid_t pid;
+
+ t = strdup(*path);
+ if (!t)
+ return log_oom();
+
+ if (callbacks) {
+ fd = open_serialization_fd(basename(*path));
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to open serialization file: %m");
+ }
+
+ r = do_spawn(t, argv, fd, &pid);
+ if (r <= 0)
+ continue;
+
+ if (parallel_execution) {
+ r = hashmap_put(pids, PID_TO_PTR(pid), t);
+ if (r < 0)
+ return log_oom();
+ t = NULL;
+ } else {
+ r = wait_for_terminate_and_check(t, pid, WAIT_LOG);
+ if (FLAGS_SET(flags, EXEC_DIR_IGNORE_ERRORS)) {
+ if (r < 0)
+ continue;
+ } else if (r > 0)
+ return r;
+
+ if (callbacks) {
+ if (lseek(fd, 0, SEEK_SET) < 0)
+ return log_error_errno(errno, "Failed to seek on serialization fd: %m");
+
+ r = callbacks[STDOUT_GENERATE](fd, callback_args[STDOUT_GENERATE]);
+ fd = -1;
+ if (r < 0)
+ return log_error_errno(r, "Failed to process output from %s: %m", *path);
+ }
+ }
+ }
+
+ if (callbacks) {
+ r = callbacks[STDOUT_COLLECT](output_fd, callback_args[STDOUT_COLLECT]);
+ if (r < 0)
+ return log_error_errno(r, "Callback two failed: %m");
+ }
+
+ while (!hashmap_isempty(pids)) {
+ _cleanup_free_ char *t = NULL;
+ pid_t pid;
+
+ pid = PTR_TO_PID(hashmap_first_key(pids));
+ assert(pid > 0);
+
+ t = hashmap_remove(pids, PID_TO_PTR(pid));
+ assert(t);
+
+ r = wait_for_terminate_and_check(t, pid, WAIT_LOG);
+ if (!FLAGS_SET(flags, EXEC_DIR_IGNORE_ERRORS) && r > 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int execute_directories(
+ const char* const* directories,
+ usec_t timeout,
+ gather_stdout_callback_t const callbacks[_STDOUT_CONSUME_MAX],
+ void* const callback_args[_STDOUT_CONSUME_MAX],
+ char *argv[],
+ char *envp[],
+ ExecDirFlags flags) {
+
+ char **dirs = (char**) directories;
+ _cleanup_close_ int fd = -1;
+ char *name;
+ int r;
+ pid_t executor_pid;
+
+ assert(!strv_isempty(dirs));
+
+ name = basename(dirs[0]);
+ assert(!isempty(name));
+
+ if (callbacks) {
+ assert(callback_args);
+ assert(callbacks[STDOUT_GENERATE]);
+ assert(callbacks[STDOUT_COLLECT]);
+ assert(callbacks[STDOUT_CONSUME]);
+
+ fd = open_serialization_fd(name);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to open serialization file: %m");
+ }
+
+ /* Executes all binaries in the directories serially or in parallel and waits for
+ * them to finish. Optionally a timeout is applied. If a file with the same name
+ * exists in more than one directory, the earliest one wins. */
+
+ r = safe_fork("(sd-executor)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &executor_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ r = do_execute(dirs, timeout, callbacks, callback_args, fd, argv, envp, flags);
+ _exit(r < 0 ? EXIT_FAILURE : r);
+ }
+
+ r = wait_for_terminate_and_check("(sd-executor)", executor_pid, 0);
+ if (r < 0)
+ return r;
+ if (!FLAGS_SET(flags, EXEC_DIR_IGNORE_ERRORS) && r > 0)
+ return r;
+
+ if (!callbacks)
+ return 0;
+
+ if (lseek(fd, 0, SEEK_SET) < 0)
+ return log_error_errno(errno, "Failed to rewind serialization fd: %m");
+
+ r = callbacks[STDOUT_CONSUME](fd, callback_args[STDOUT_CONSUME]);
+ fd = -1;
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse returned data: %m");
+ return 0;
+}
+
+static int gather_environment_generate(int fd, void *arg) {
+ char ***env = arg, **x, **y;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **new = NULL;
+ int r;
+
+ /* Read a series of VAR=value assignments from fd, use them to update the list of
+ * variables in env. Also update the exported environment.
+ *
+ * fd is always consumed, even on error.
+ */
+
+ assert(env);
+
+ f = fdopen(fd, "r");
+ if (!f) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ r = load_env_file_pairs(f, NULL, &new);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH_PAIR(x, y, new) {
+ char *p;
+
+ if (!env_name_is_valid(*x)) {
+ log_warning("Invalid variable assignment \"%s=...\", ignoring.", *x);
+ continue;
+ }
+
+ p = strjoin(*x, "=", *y);
+ if (!p)
+ return -ENOMEM;
+
+ r = strv_env_replace(env, p);
+ if (r < 0)
+ return r;
+
+ if (setenv(*x, *y, true) < 0)
+ return -errno;
+ }
+
+ return r;
+}
+
+static int gather_environment_collect(int fd, void *arg) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char ***env = arg;
+ int r;
+
+ /* Write out a series of env=cescape(VAR=value) assignments to fd. */
+
+ assert(env);
+
+ f = fdopen(fd, "w");
+ if (!f) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ r = serialize_strv(f, "env", *env);
+ if (r < 0)
+ return r;
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int gather_environment_consume(int fd, void *arg) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char ***env = arg;
+ int r = 0;
+
+ /* Read a series of env=cescape(VAR=value) assignments from fd into env. */
+
+ assert(env);
+
+ f = fdopen(fd, "r");
+ if (!f) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *v;
+ int k;
+
+ k = read_line(f, LONG_LINE_MAX, &line);
+ if (k < 0)
+ return k;
+ if (k == 0)
+ break;
+
+ v = startswith(line, "env=");
+ if (!v) {
+ log_debug("Serialization line \"%s\" unexpectedly didn't start with \"env=\".", line);
+ if (r == 0)
+ r = -EINVAL;
+
+ continue;
+ }
+
+ k = deserialize_environment(v, env);
+ if (k < 0) {
+ log_debug_errno(k, "Invalid serialization line \"%s\": %m", line);
+
+ if (r == 0)
+ r = k;
+ }
+ }
+
+ return r;
+}
+
+int exec_command_flags_from_strv(char **ex_opts, ExecCommandFlags *flags) {
+ ExecCommandFlags ex_flag, ret_flags = 0;
+ char **opt;
+
+ assert(flags);
+
+ STRV_FOREACH(opt, ex_opts) {
+ ex_flag = exec_command_flags_from_string(*opt);
+ if (ex_flag >= 0)
+ ret_flags |= ex_flag;
+ else
+ return -EINVAL;
+ }
+
+ *flags = ret_flags;
+
+ return 0;
+}
+
+int exec_command_flags_to_strv(ExecCommandFlags flags, char ***ex_opts) {
+ _cleanup_strv_free_ char **ret_opts = NULL;
+ ExecCommandFlags it = flags;
+ const char *str;
+ int i, r;
+
+ assert(ex_opts);
+
+ for (i = 0; it != 0; it &= ~(1 << i), i++) {
+ if (FLAGS_SET(flags, (1 << i))) {
+ str = exec_command_flags_to_string(1 << i);
+ if (!str)
+ return -EINVAL;
+
+ r = strv_extend(&ret_opts, str);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ *ex_opts = TAKE_PTR(ret_opts);
+
+ return 0;
+}
+
+const gather_stdout_callback_t gather_environment[] = {
+ gather_environment_generate,
+ gather_environment_collect,
+ gather_environment_consume,
+};
+
+static const char* const exec_command_strings[] = {
+ "ignore-failure", /* EXEC_COMMAND_IGNORE_FAILURE */
+ "privileged", /* EXEC_COMMAND_FULLY_PRIVILEGED */
+ "no-setuid", /* EXEC_COMMAND_NO_SETUID */
+ "ambient", /* EXEC_COMMAND_AMBIENT_MAGIC */
+ "no-env-expand", /* EXEC_COMMAND_NO_ENV_EXPAND */
+};
+
+const char* exec_command_flags_to_string(ExecCommandFlags i) {
+ size_t idx;
+
+ for (idx = 0; idx < ELEMENTSOF(exec_command_strings); idx++)
+ if (i == (1 << idx))
+ return exec_command_strings[idx];
+
+ return NULL;
+}
+
+ExecCommandFlags exec_command_flags_from_string(const char *s) {
+ ssize_t idx;
+
+ idx = string_table_lookup(exec_command_strings, ELEMENTSOF(exec_command_strings), s);
+
+ if (idx < 0)
+ return _EXEC_COMMAND_FLAGS_INVALID;
+ else
+ return 1 << idx;
+}
diff --git a/src/shared/exec-util.h b/src/shared/exec-util.h
new file mode 100644
index 0000000..a69d57c
--- /dev/null
+++ b/src/shared/exec-util.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "time-util.h"
+
+typedef int (*gather_stdout_callback_t) (int fd, void *arg);
+
+enum {
+ STDOUT_GENERATE, /* from generators to helper process */
+ STDOUT_COLLECT, /* from helper process to main process */
+ STDOUT_CONSUME, /* process data in main process */
+ _STDOUT_CONSUME_MAX,
+};
+
+typedef enum {
+ EXEC_DIR_NONE = 0, /* No execdir flags */
+ EXEC_DIR_PARALLEL = 1 << 0, /* Execute scripts in parallel, if possible */
+ EXEC_DIR_IGNORE_ERRORS = 1 << 1, /* Ignore non-zero exit status of scripts */
+} ExecDirFlags;
+
+typedef enum ExecCommandFlags {
+ EXEC_COMMAND_IGNORE_FAILURE = 1 << 0,
+ EXEC_COMMAND_FULLY_PRIVILEGED = 1 << 1,
+ EXEC_COMMAND_NO_SETUID = 1 << 2,
+ EXEC_COMMAND_AMBIENT_MAGIC = 1 << 3,
+ EXEC_COMMAND_NO_ENV_EXPAND = 1 << 4,
+ _EXEC_COMMAND_FLAGS_INVALID = -1,
+} ExecCommandFlags;
+
+int execute_directories(
+ const char* const* directories,
+ usec_t timeout,
+ gather_stdout_callback_t const callbacks[_STDOUT_CONSUME_MAX],
+ void* const callback_args[_STDOUT_CONSUME_MAX],
+ char *argv[],
+ char *envp[],
+ ExecDirFlags flags);
+
+int exec_command_flags_from_strv(char **ex_opts, ExecCommandFlags *flags);
+int exec_command_flags_to_strv(ExecCommandFlags flags, char ***ex_opts);
+
+extern const gather_stdout_callback_t gather_environment[_STDOUT_CONSUME_MAX];
+
+const char* exec_command_flags_to_string(ExecCommandFlags i);
+ExecCommandFlags exec_command_flags_from_string(const char *s);
diff --git a/src/shared/exit-status.c b/src/shared/exit-status.c
new file mode 100644
index 0000000..b71dd7a
--- /dev/null
+++ b/src/shared/exit-status.c
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <signal.h>
+#include <stdlib.h>
+#include <sysexits.h>
+
+#include "exit-status.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "set.h"
+#include "string-util.h"
+
+const ExitStatusMapping exit_status_mappings[256] = {
+ /* Exit status ranges:
+ *
+ * 0…1 │ ISO C, EXIT_SUCCESS + EXIT_FAILURE
+ * 2…7 │ LSB exit codes for init scripts
+ * 8…63 │ (Currently unmapped)
+ * 64…78 │ BSD defined exit codes
+ * 79…199 │ (Currently unmapped)
+ * 200…242 │ systemd's private error codes (might be extended to 254 in future development)
+ * 243…254 │ (Currently unmapped, but see above)
+ *
+ * 255 │ EXIT_EXCEPTION (We use this to propagate exit-by-signal events. It's frequently used by others apps (like bash)
+ * │ to indicate exit reason that cannot really be expressed in a single exit status value — such as a propagated
+ * │ signal or such, and we follow that logic here.)
+ */
+
+ [EXIT_SUCCESS] = { "SUCCESS", EXIT_STATUS_LIBC },
+ [EXIT_FAILURE] = { "FAILURE", EXIT_STATUS_LIBC },
+
+ [EXIT_CHDIR] = { "CHDIR", EXIT_STATUS_SYSTEMD },
+ [EXIT_NICE] = { "NICE", EXIT_STATUS_SYSTEMD },
+ [EXIT_FDS] = { "FDS", EXIT_STATUS_SYSTEMD },
+ [EXIT_EXEC] = { "EXEC", EXIT_STATUS_SYSTEMD },
+ [EXIT_MEMORY] = { "MEMORY", EXIT_STATUS_SYSTEMD },
+ [EXIT_LIMITS] = { "LIMITS", EXIT_STATUS_SYSTEMD },
+ [EXIT_OOM_ADJUST] = { "OOM_ADJUST", EXIT_STATUS_SYSTEMD },
+ [EXIT_SIGNAL_MASK] = { "SIGNAL_MASK", EXIT_STATUS_SYSTEMD },
+ [EXIT_STDIN] = { "STDIN", EXIT_STATUS_SYSTEMD },
+ [EXIT_STDOUT] = { "STDOUT", EXIT_STATUS_SYSTEMD },
+ [EXIT_CHROOT] = { "CHROOT", EXIT_STATUS_SYSTEMD },
+ [EXIT_IOPRIO] = { "IOPRIO", EXIT_STATUS_SYSTEMD },
+ [EXIT_TIMERSLACK] = { "TIMERSLACK", EXIT_STATUS_SYSTEMD },
+ [EXIT_SECUREBITS] = { "SECUREBITS", EXIT_STATUS_SYSTEMD },
+ [EXIT_SETSCHEDULER] = { "SETSCHEDULER", EXIT_STATUS_SYSTEMD },
+ [EXIT_CPUAFFINITY] = { "CPUAFFINITY", EXIT_STATUS_SYSTEMD },
+ [EXIT_GROUP] = { "GROUP", EXIT_STATUS_SYSTEMD },
+ [EXIT_USER] = { "USER", EXIT_STATUS_SYSTEMD },
+ [EXIT_CAPABILITIES] = { "CAPABILITIES", EXIT_STATUS_SYSTEMD },
+ [EXIT_CGROUP] = { "CGROUP", EXIT_STATUS_SYSTEMD },
+ [EXIT_SETSID] = { "SETSID", EXIT_STATUS_SYSTEMD },
+ [EXIT_CONFIRM] = { "CONFIRM", EXIT_STATUS_SYSTEMD },
+ [EXIT_STDERR] = { "STDERR", EXIT_STATUS_SYSTEMD },
+ [EXIT_PAM] = { "PAM", EXIT_STATUS_SYSTEMD },
+ [EXIT_NETWORK] = { "NETWORK", EXIT_STATUS_SYSTEMD },
+ [EXIT_NAMESPACE] = { "NAMESPACE", EXIT_STATUS_SYSTEMD },
+ [EXIT_NO_NEW_PRIVILEGES] = { "NO_NEW_PRIVILEGES", EXIT_STATUS_SYSTEMD },
+ [EXIT_SECCOMP] = { "SECCOMP", EXIT_STATUS_SYSTEMD },
+ [EXIT_SELINUX_CONTEXT] = { "SELINUX_CONTEXT", EXIT_STATUS_SYSTEMD },
+ [EXIT_PERSONALITY] = { "PERSONALITY", EXIT_STATUS_SYSTEMD },
+ [EXIT_APPARMOR_PROFILE] = { "APPARMOR", EXIT_STATUS_SYSTEMD },
+ [EXIT_ADDRESS_FAMILIES] = { "ADDRESS_FAMILIES", EXIT_STATUS_SYSTEMD },
+ [EXIT_RUNTIME_DIRECTORY] = { "RUNTIME_DIRECTORY", EXIT_STATUS_SYSTEMD },
+ [EXIT_CHOWN] = { "CHOWN", EXIT_STATUS_SYSTEMD },
+ [EXIT_SMACK_PROCESS_LABEL] = { "SMACK_PROCESS_LABEL", EXIT_STATUS_SYSTEMD },
+ [EXIT_KEYRING] = { "KEYRING", EXIT_STATUS_SYSTEMD },
+ [EXIT_STATE_DIRECTORY] = { "STATE_DIRECTORY", EXIT_STATUS_SYSTEMD },
+ [EXIT_CACHE_DIRECTORY] = { "CACHE_DIRECTORY", EXIT_STATUS_SYSTEMD },
+ [EXIT_LOGS_DIRECTORY] = { "LOGS_DIRECTORY", EXIT_STATUS_SYSTEMD },
+ [EXIT_CONFIGURATION_DIRECTORY] = { "CONFIGURATION_DIRECTORY", EXIT_STATUS_SYSTEMD },
+ [EXIT_NUMA_POLICY] = { "NUMA_POLICY", EXIT_STATUS_SYSTEMD },
+ [EXIT_CREDENTIALS] = { "CREDENTIALS", EXIT_STATUS_SYSTEMD },
+
+ [EXIT_EXCEPTION] = { "EXCEPTION", EXIT_STATUS_SYSTEMD },
+
+ [EXIT_INVALIDARGUMENT] = { "INVALIDARGUMENT", EXIT_STATUS_LSB },
+ [EXIT_NOTIMPLEMENTED] = { "NOTIMPLEMENTED", EXIT_STATUS_LSB },
+ [EXIT_NOPERMISSION] = { "NOPERMISSION", EXIT_STATUS_LSB },
+ [EXIT_NOTINSTALLED] = { "NOTINSTALLED", EXIT_STATUS_LSB },
+ [EXIT_NOTCONFIGURED] = { "NOTCONFIGURED", EXIT_STATUS_LSB },
+ [EXIT_NOTRUNNING] = { "NOTRUNNING", EXIT_STATUS_LSB },
+
+ [EX_USAGE] = { "USAGE", EXIT_STATUS_BSD },
+ [EX_DATAERR] = { "DATAERR", EXIT_STATUS_BSD },
+ [EX_NOINPUT] = { "NOINPUT", EXIT_STATUS_BSD },
+ [EX_NOUSER] = { "NOUSER", EXIT_STATUS_BSD },
+ [EX_NOHOST] = { "NOHOST", EXIT_STATUS_BSD },
+ [EX_UNAVAILABLE] = { "UNAVAILABLE", EXIT_STATUS_BSD },
+ [EX_SOFTWARE] = { "SOFTWARE", EXIT_STATUS_BSD },
+ [EX_OSERR] = { "OSERR", EXIT_STATUS_BSD },
+ [EX_OSFILE] = { "OSFILE", EXIT_STATUS_BSD },
+ [EX_CANTCREAT] = { "CANTCREAT", EXIT_STATUS_BSD },
+ [EX_IOERR] = { "IOERR", EXIT_STATUS_BSD },
+ [EX_TEMPFAIL] = { "TEMPFAIL", EXIT_STATUS_BSD },
+ [EX_PROTOCOL] = { "PROTOCOL", EXIT_STATUS_BSD },
+ [EX_NOPERM] = { "NOPERM", EXIT_STATUS_BSD },
+ [EX_CONFIG] = { "CONFIG", EXIT_STATUS_BSD },
+};
+
+const char* exit_status_to_string(int code, ExitStatusClass class) {
+ if (code < 0 || (size_t) code >= ELEMENTSOF(exit_status_mappings))
+ return NULL;
+ return class & exit_status_mappings[code].class ? exit_status_mappings[code].name : NULL;
+}
+
+const char* exit_status_class(int code) {
+ if (code < 0 || (size_t) code >= ELEMENTSOF(exit_status_mappings))
+ return NULL;
+
+ switch (exit_status_mappings[code].class) {
+ case EXIT_STATUS_LIBC:
+ return "libc";
+ case EXIT_STATUS_SYSTEMD:
+ return "systemd";
+ case EXIT_STATUS_LSB:
+ return "LSB";
+ case EXIT_STATUS_BSD:
+ return "BSD";
+ default: return NULL;
+ }
+}
+
+int exit_status_from_string(const char *s) {
+ uint8_t val;
+ int r;
+
+ for (size_t i = 0; i < ELEMENTSOF(exit_status_mappings); i++)
+ if (streq_ptr(s, exit_status_mappings[i].name))
+ return i;
+
+ r = safe_atou8(s, &val);
+ if (r < 0)
+ return r;
+
+ return val;
+}
+
+bool is_clean_exit(int code, int status, ExitClean clean, const ExitStatusSet *success_status) {
+ if (code == CLD_EXITED)
+ return status == 0 ||
+ (success_status &&
+ bitmap_isset(&success_status->status, status));
+
+ /* If a daemon does not implement handlers for some of the signals, we do not consider this an
+ unclean shutdown */
+ if (code == CLD_KILLED)
+ return
+ (clean == EXIT_CLEAN_DAEMON && IN_SET(status, SIGHUP, SIGINT, SIGTERM, SIGPIPE)) ||
+ (success_status &&
+ bitmap_isset(&success_status->signal, status));
+
+ return false;
+}
+
+void exit_status_set_free(ExitStatusSet *x) {
+ assert(x);
+
+ bitmap_clear(&x->status);
+ bitmap_clear(&x->signal);
+}
+
+bool exit_status_set_is_empty(const ExitStatusSet *x) {
+ if (!x)
+ return true;
+
+ return bitmap_isclear(&x->status) && bitmap_isclear(&x->signal);
+}
+
+bool exit_status_set_test(const ExitStatusSet *x, int code, int status) {
+ if (code == CLD_EXITED && bitmap_isset(&x->status, status))
+ return true;
+
+ if (IN_SET(code, CLD_KILLED, CLD_DUMPED) && bitmap_isset(&x->signal, status))
+ return true;
+
+ return false;
+}
diff --git a/src/shared/exit-status.h b/src/shared/exit-status.h
new file mode 100644
index 0000000..05707bf
--- /dev/null
+++ b/src/shared/exit-status.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "bitmap.h"
+#include "hashmap.h"
+#include "macro.h"
+
+/* This defines pretty names for the LSB 'start' verb exit codes. Note that they shouldn't be confused with
+ * the LSB 'status' verb exit codes which are defined very differently. For details see:
+ *
+ * https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/iniscrptact.html
+ */
+
+enum {
+ /* EXIT_SUCCESS defined by libc */
+ /* EXIT_FAILURE defined by libc */
+ EXIT_INVALIDARGUMENT = 2,
+ EXIT_NOTIMPLEMENTED = 3,
+ EXIT_NOPERMISSION = 4,
+ EXIT_NOTINSTALLED = 5,
+ EXIT_NOTCONFIGURED = 6,
+ EXIT_NOTRUNNING = 7,
+
+ /* BSD's sysexits.h defines a couple EX_xyz exit codes in the range 64 … 78 */
+
+ /* The LSB suggests that error codes >= 200 are "reserved". We use them here under the assumption
+ * that they hence are unused by init scripts. */
+ EXIT_CHDIR = 200,
+ EXIT_NICE,
+ EXIT_FDS,
+ EXIT_EXEC,
+ EXIT_MEMORY,
+ EXIT_LIMITS,
+ EXIT_OOM_ADJUST,
+ EXIT_SIGNAL_MASK,
+ EXIT_STDIN,
+ EXIT_STDOUT,
+ EXIT_CHROOT, /* 210 */
+ EXIT_IOPRIO,
+ EXIT_TIMERSLACK,
+ EXIT_SECUREBITS,
+ EXIT_SETSCHEDULER,
+ EXIT_CPUAFFINITY,
+ EXIT_GROUP,
+ EXIT_USER,
+ EXIT_CAPABILITIES,
+ EXIT_CGROUP,
+ EXIT_SETSID, /* 220 */
+ EXIT_CONFIRM,
+ EXIT_STDERR,
+ _EXIT_RESERVED, /* used to be tcpwrap, don't reuse! */
+ EXIT_PAM,
+ EXIT_NETWORK,
+ EXIT_NAMESPACE,
+ EXIT_NO_NEW_PRIVILEGES,
+ EXIT_SECCOMP,
+ EXIT_SELINUX_CONTEXT,
+ EXIT_PERSONALITY, /* 230 */
+ EXIT_APPARMOR_PROFILE,
+ EXIT_ADDRESS_FAMILIES,
+ EXIT_RUNTIME_DIRECTORY,
+ _EXIT_RESERVED2, /* used to be used by kdbus, don't reuse */
+ EXIT_CHOWN,
+ EXIT_SMACK_PROCESS_LABEL,
+ EXIT_KEYRING,
+ EXIT_STATE_DIRECTORY,
+ EXIT_CACHE_DIRECTORY,
+ EXIT_LOGS_DIRECTORY, /* 240 */
+ EXIT_CONFIGURATION_DIRECTORY,
+ EXIT_NUMA_POLICY,
+ EXIT_CREDENTIALS,
+
+ EXIT_EXCEPTION = 255, /* Whenever we want to propagate an abnormal/signal exit, in line with bash */
+};
+
+typedef enum ExitStatusClass {
+ EXIT_STATUS_LIBC = 1 << 0, /* libc EXIT_STATUS/EXIT_FAILURE */
+ EXIT_STATUS_SYSTEMD = 1 << 1, /* systemd's own exit codes */
+ EXIT_STATUS_LSB = 1 << 2, /* LSB exit codes */
+ EXIT_STATUS_BSD = 1 << 3, /* BSD (EX_xyz) exit codes */
+ EXIT_STATUS_FULL = EXIT_STATUS_LIBC | EXIT_STATUS_SYSTEMD | EXIT_STATUS_LSB | EXIT_STATUS_BSD,
+} ExitStatusClass;
+
+typedef struct ExitStatusSet {
+ Bitmap status;
+ Bitmap signal;
+} ExitStatusSet;
+
+const char* exit_status_to_string(int code, ExitStatusClass class) _const_;
+const char* exit_status_class(int code) _const_;
+int exit_status_from_string(const char *s) _pure_;
+
+typedef struct ExitStatusMapping {
+ const char *name;
+ ExitStatusClass class;
+} ExitStatusMapping;
+
+extern const ExitStatusMapping exit_status_mappings[256];
+
+typedef enum ExitClean {
+ EXIT_CLEAN_DAEMON,
+ EXIT_CLEAN_COMMAND,
+} ExitClean;
+
+bool is_clean_exit(int code, int status, ExitClean clean, const ExitStatusSet *success_status);
+
+void exit_status_set_free(ExitStatusSet *x);
+bool exit_status_set_is_empty(const ExitStatusSet *x);
+bool exit_status_set_test(const ExitStatusSet *x, int code, int status);
diff --git a/src/shared/fdset.c b/src/shared/fdset.c
new file mode 100644
index 0000000..679e4aa
--- /dev/null
+++ b/src/shared/fdset.c
@@ -0,0 +1,252 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fdset.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "set.h"
+
+#define MAKE_SET(s) ((Set*) s)
+#define MAKE_FDSET(s) ((FDSet*) s)
+
+FDSet *fdset_new(void) {
+ return MAKE_FDSET(set_new(NULL));
+}
+
+int fdset_new_array(FDSet **ret, const int *fds, size_t n_fds) {
+ size_t i;
+ FDSet *s;
+ int r;
+
+ assert(ret);
+
+ s = fdset_new();
+ if (!s)
+ return -ENOMEM;
+
+ for (i = 0; i < n_fds; i++) {
+
+ r = fdset_put(s, fds[i]);
+ if (r < 0) {
+ set_free(MAKE_SET(s));
+ return r;
+ }
+ }
+
+ *ret = s;
+ return 0;
+}
+
+void fdset_close(FDSet *s) {
+ void *p;
+
+ while ((p = set_steal_first(MAKE_SET(s)))) {
+ /* Valgrind's fd might have ended up in this set here, due to fdset_new_fill(). We'll ignore
+ * all failures here, so that the EBADFD that valgrind will return us on close() doesn't
+ * influence us */
+
+ /* When reloading duplicates of the private bus connection fds and suchlike are closed here,
+ * which has no effect at all, since they are only duplicates. So don't be surprised about
+ * these log messages. */
+
+ log_debug("Closing set fd %i", PTR_TO_FD(p));
+ (void) close_nointr(PTR_TO_FD(p));
+ }
+}
+
+FDSet* fdset_free(FDSet *s) {
+ fdset_close(s);
+ set_free(MAKE_SET(s));
+ return NULL;
+}
+
+int fdset_put(FDSet *s, int fd) {
+ assert(s);
+ assert(fd >= 0);
+
+ return set_put(MAKE_SET(s), FD_TO_PTR(fd));
+}
+
+int fdset_put_dup(FDSet *s, int fd) {
+ int copy, r;
+
+ assert(s);
+ assert(fd >= 0);
+
+ copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (copy < 0)
+ return -errno;
+
+ r = fdset_put(s, copy);
+ if (r < 0) {
+ safe_close(copy);
+ return r;
+ }
+
+ return copy;
+}
+
+bool fdset_contains(FDSet *s, int fd) {
+ assert(s);
+ assert(fd >= 0);
+
+ return !!set_get(MAKE_SET(s), FD_TO_PTR(fd));
+}
+
+int fdset_remove(FDSet *s, int fd) {
+ assert(s);
+ assert(fd >= 0);
+
+ return set_remove(MAKE_SET(s), FD_TO_PTR(fd)) ? fd : -ENOENT;
+}
+
+int fdset_new_fill(FDSet **_s) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+ FDSet *s;
+
+ assert(_s);
+
+ /* Creates an fdset and fills in all currently open file
+ * descriptors. */
+
+ d = opendir("/proc/self/fd");
+ if (!d)
+ return -errno;
+
+ s = fdset_new();
+ if (!s) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ int fd = -1;
+
+ r = safe_atoi(de->d_name, &fd);
+ if (r < 0)
+ goto finish;
+
+ if (fd < 3)
+ continue;
+
+ if (fd == dirfd(d))
+ continue;
+
+ r = fdset_put(s, fd);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = 0;
+ *_s = TAKE_PTR(s);
+
+finish:
+ /* We won't close the fds here! */
+ if (s)
+ set_free(MAKE_SET(s));
+
+ return r;
+}
+
+int fdset_cloexec(FDSet *fds, bool b) {
+ void *p;
+ int r;
+
+ assert(fds);
+
+ SET_FOREACH(p, MAKE_SET(fds)) {
+ r = fd_cloexec(PTR_TO_FD(p), b);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int fdset_new_listen_fds(FDSet **_s, bool unset) {
+ int n, fd, r;
+ FDSet *s;
+
+ assert(_s);
+
+ /* Creates an fdset and fills in all passed file descriptors */
+
+ s = fdset_new();
+ if (!s) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ n = sd_listen_fds(unset);
+ for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd ++) {
+ r = fdset_put(s, fd);
+ if (r < 0)
+ goto fail;
+ }
+
+ *_s = s;
+ return 0;
+
+fail:
+ if (s)
+ set_free(MAKE_SET(s));
+
+ return r;
+}
+
+int fdset_close_others(FDSet *fds) {
+ void *e;
+ int *a = NULL;
+ size_t j = 0, m;
+
+ m = fdset_size(fds);
+
+ if (m > 0) {
+ a = newa(int, m);
+ SET_FOREACH(e, MAKE_SET(fds))
+ a[j++] = PTR_TO_FD(e);
+ }
+
+ assert(j == m);
+
+ return close_all_fds(a, j);
+}
+
+unsigned fdset_size(FDSet *fds) {
+ return set_size(MAKE_SET(fds));
+}
+
+bool fdset_isempty(FDSet *fds) {
+ return set_isempty(MAKE_SET(fds));
+}
+
+int fdset_iterate(FDSet *s, Iterator *i) {
+ void *p;
+
+ if (!set_iterate(MAKE_SET(s), i, &p))
+ return -ENOENT;
+
+ return PTR_TO_FD(p);
+}
+
+int fdset_steal_first(FDSet *fds) {
+ void *p;
+
+ p = set_steal_first(MAKE_SET(fds));
+ if (!p)
+ return -ENOENT;
+
+ return PTR_TO_FD(p);
+}
diff --git a/src/shared/fdset.h b/src/shared/fdset.h
new file mode 100644
index 0000000..39d15ee
--- /dev/null
+++ b/src/shared/fdset.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "macro.h"
+#include "set.h"
+
+typedef struct FDSet FDSet;
+
+FDSet* fdset_new(void);
+FDSet* fdset_free(FDSet *s);
+
+int fdset_put(FDSet *s, int fd);
+int fdset_put_dup(FDSet *s, int fd);
+
+bool fdset_contains(FDSet *s, int fd);
+int fdset_remove(FDSet *s, int fd);
+
+int fdset_new_array(FDSet **ret, const int *fds, size_t n_fds);
+int fdset_new_fill(FDSet **ret);
+int fdset_new_listen_fds(FDSet **ret, bool unset);
+
+int fdset_cloexec(FDSet *fds, bool b);
+
+int fdset_close_others(FDSet *fds);
+
+unsigned fdset_size(FDSet *fds);
+bool fdset_isempty(FDSet *fds);
+
+int fdset_iterate(FDSet *s, Iterator *i);
+
+int fdset_steal_first(FDSet *fds);
+
+void fdset_close(FDSet *fds);
+
+#define _FDSET_FOREACH(fd, fds, i) \
+ for (Iterator i = ITERATOR_FIRST; ((fd) = fdset_iterate((fds), &i)) >= 0; )
+#define FDSET_FOREACH(fd, fds) \
+ _FDSET_FOREACH(fd, fds, UNIQ_T(i, UNIQ))
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(FDSet*, fdset_free);
+#define _cleanup_fdset_free_ _cleanup_(fdset_freep)
diff --git a/src/shared/fileio-label.c b/src/shared/fileio-label.c
new file mode 100644
index 0000000..d03b054
--- /dev/null
+++ b/src/shared/fileio-label.c
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/stat.h>
+
+#include "fileio-label.h"
+#include "fileio.h"
+#include "selinux-util.h"
+
+int write_string_file_atomic_label_ts(const char *fn, const char *line, struct timespec *ts) {
+ int r;
+
+ r = mac_selinux_create_file_prepare(fn, S_IFREG);
+ if (r < 0)
+ return r;
+
+ r = write_string_file_ts(fn, line, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC, ts);
+
+ mac_selinux_create_file_clear();
+
+ return r;
+}
+
+int create_shutdown_run_nologin_or_warn(void) {
+ int r;
+
+ /* This is used twice: once in systemd-user-sessions.service, in order to block logins when we actually go
+ * down, and once in systemd-logind.service when shutdowns are scheduled, and logins are to be turned off a bit
+ * in advance. We use the same wording of the message in both cases. */
+
+ r = write_string_file_atomic_label("/run/nologin",
+ "System is going down. Unprivileged users are not permitted to log in anymore. "
+ "For technical details, see pam_nologin(8).");
+ if (r < 0)
+ return log_error_errno(r, "Failed to create /run/nologin: %m");
+
+ return 0;
+}
diff --git a/src/shared/fileio-label.h b/src/shared/fileio-label.h
new file mode 100644
index 0000000..03b4a16
--- /dev/null
+++ b/src/shared/fileio-label.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdio.h>
+
+/* These functions are split out of fileio.h (and not for example just flags to the functions they wrap) in order to
+ * optimize linking: This way, -lselinux is needed only for the callers of these functions that need selinux, but not
+ * for all */
+
+int write_string_file_atomic_label_ts(const char *fn, const char *line, struct timespec *ts);
+static inline int write_string_file_atomic_label(const char *fn, const char *line) {
+ return write_string_file_atomic_label_ts(fn, line, NULL);
+}
+
+int create_shutdown_run_nologin_or_warn(void);
diff --git a/src/shared/firewall-util.c b/src/shared/firewall-util.c
new file mode 100644
index 0000000..007d2cb
--- /dev/null
+++ b/src/shared/firewall-util.c
@@ -0,0 +1,350 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/* Temporary work-around for broken glibc vs. linux kernel header definitions
+ * This is already fixed upstream, remove this when distributions have updated.
+ */
+#define _NET_IF_H 1
+
+#include <arpa/inet.h>
+#include <endian.h>
+#include <errno.h>
+#include <stddef.h>
+#include <string.h>
+#include <net/if.h>
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+#include <linux/if.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/nf_nat.h>
+#include <linux/netfilter/xt_addrtype.h>
+#include <libiptc/libiptc.h>
+
+#include "alloc-util.h"
+#include "firewall-util.h"
+#include "in-addr-util.h"
+#include "macro.h"
+#include "socket-util.h"
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct xtc_handle*, iptc_free);
+
+static int entry_fill_basics(
+ struct ipt_entry *entry,
+ int protocol,
+ const char *in_interface,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const char *out_interface,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen) {
+
+ assert(entry);
+
+ if (out_interface && !ifname_valid(out_interface))
+ return -EINVAL;
+ if (in_interface && !ifname_valid(in_interface))
+ return -EINVAL;
+
+ entry->ip.proto = protocol;
+
+ if (in_interface) {
+ size_t l;
+
+ l = strlen(in_interface);
+ assert(l < sizeof entry->ip.iniface);
+ assert(l < sizeof entry->ip.iniface_mask);
+
+ strcpy(entry->ip.iniface, in_interface);
+ memset(entry->ip.iniface_mask, 0xFF, l + 1);
+ }
+ if (source) {
+ entry->ip.src = source->in;
+ in4_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen);
+ }
+
+ if (out_interface) {
+ size_t l = strlen(out_interface);
+ assert(l < sizeof entry->ip.outiface);
+ assert(l < sizeof entry->ip.outiface_mask);
+
+ strcpy(entry->ip.outiface, out_interface);
+ memset(entry->ip.outiface_mask, 0xFF, l + 1);
+ }
+ if (destination) {
+ entry->ip.dst = destination->in;
+ in4_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen);
+ }
+
+ return 0;
+}
+
+int fw_add_masquerade(
+ bool add,
+ int af,
+ int protocol,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const char *out_interface,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen) {
+
+ static const xt_chainlabel chain = "POSTROUTING";
+ _cleanup_(iptc_freep) struct xtc_handle *h = NULL;
+ struct ipt_entry *entry, *mask;
+ struct ipt_entry_target *t;
+ size_t sz;
+ struct nf_nat_ipv4_multi_range_compat *mr;
+ int r;
+
+ if (af != AF_INET)
+ return -EOPNOTSUPP;
+
+ if (!IN_SET(protocol, 0, IPPROTO_TCP, IPPROTO_UDP))
+ return -EOPNOTSUPP;
+
+ h = iptc_init("nat");
+ if (!h)
+ return -errno;
+
+ sz = XT_ALIGN(sizeof(struct ipt_entry)) +
+ XT_ALIGN(sizeof(struct ipt_entry_target)) +
+ XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
+
+ /* Put together the entry we want to add or remove */
+ entry = alloca0(sz);
+ entry->next_offset = sz;
+ entry->target_offset = XT_ALIGN(sizeof(struct ipt_entry));
+ r = entry_fill_basics(entry, protocol, NULL, source, source_prefixlen, out_interface, destination, destination_prefixlen);
+ if (r < 0)
+ return r;
+
+ /* Fill in target part */
+ t = ipt_get_target(entry);
+ t->u.target_size =
+ XT_ALIGN(sizeof(struct ipt_entry_target)) +
+ XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
+ strncpy(t->u.user.name, "MASQUERADE", sizeof(t->u.user.name));
+ mr = (struct nf_nat_ipv4_multi_range_compat*) t->data;
+ mr->rangesize = 1;
+
+ /* Create a search mask entry */
+ mask = alloca(sz);
+ memset(mask, 0xFF, sz);
+
+ if (add) {
+ if (iptc_check_entry(chain, entry, (unsigned char*) mask, h))
+ return 0;
+ if (errno != ENOENT) /* if other error than not existing yet, fail */
+ return -errno;
+
+ if (!iptc_insert_entry(chain, entry, 0, h))
+ return -errno;
+ } else {
+ if (!iptc_delete_entry(chain, entry, (unsigned char*) mask, h)) {
+ if (errno == ENOENT) /* if it's already gone, all is good! */
+ return 0;
+
+ return -errno;
+ }
+ }
+
+ if (!iptc_commit(h))
+ return -errno;
+
+ return 0;
+}
+
+int fw_add_local_dnat(
+ bool add,
+ int af,
+ int protocol,
+ const char *in_interface,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen,
+ uint16_t local_port,
+ const union in_addr_union *remote,
+ uint16_t remote_port,
+ const union in_addr_union *previous_remote) {
+
+ static const xt_chainlabel chain_pre = "PREROUTING", chain_output = "OUTPUT";
+ _cleanup_(iptc_freep) struct xtc_handle *h = NULL;
+ struct ipt_entry *entry, *mask;
+ struct ipt_entry_target *t;
+ struct ipt_entry_match *m;
+ struct xt_addrtype_info_v1 *at;
+ struct nf_nat_ipv4_multi_range_compat *mr;
+ size_t sz, msz;
+ int r;
+
+ assert(add || !previous_remote);
+
+ if (af != AF_INET)
+ return -EOPNOTSUPP;
+
+ if (!IN_SET(protocol, IPPROTO_TCP, IPPROTO_UDP))
+ return -EOPNOTSUPP;
+
+ if (local_port <= 0)
+ return -EINVAL;
+
+ if (remote_port <= 0)
+ return -EINVAL;
+
+ h = iptc_init("nat");
+ if (!h)
+ return -errno;
+
+ sz = XT_ALIGN(sizeof(struct ipt_entry)) +
+ XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) +
+ XT_ALIGN(sizeof(struct ipt_entry_target)) +
+ XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
+
+ if (protocol == IPPROTO_TCP)
+ msz = XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_tcp));
+ else
+ msz = XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_udp));
+
+ sz += msz;
+
+ /* Fill in basic part */
+ entry = alloca0(sz);
+ entry->next_offset = sz;
+ entry->target_offset =
+ XT_ALIGN(sizeof(struct ipt_entry)) +
+ XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) +
+ msz;
+ r = entry_fill_basics(entry, protocol, in_interface, source, source_prefixlen, NULL, destination, destination_prefixlen);
+ if (r < 0)
+ return r;
+
+ /* Fill in first match */
+ m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry)));
+ m->u.match_size = msz;
+ if (protocol == IPPROTO_TCP) {
+ struct xt_tcp *tcp;
+
+ strncpy(m->u.user.name, "tcp", sizeof(m->u.user.name));
+ tcp = (struct xt_tcp*) m->data;
+ tcp->dpts[0] = tcp->dpts[1] = local_port;
+ tcp->spts[0] = 0;
+ tcp->spts[1] = 0xFFFF;
+
+ } else {
+ struct xt_udp *udp;
+
+ strncpy(m->u.user.name, "udp", sizeof(m->u.user.name));
+ udp = (struct xt_udp*) m->data;
+ udp->dpts[0] = udp->dpts[1] = local_port;
+ udp->spts[0] = 0;
+ udp->spts[1] = 0xFFFF;
+ }
+
+ /* Fill in second match */
+ m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry)) + msz);
+ m->u.match_size =
+ XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_addrtype_info_v1));
+ strncpy(m->u.user.name, "addrtype", sizeof(m->u.user.name));
+ m->u.user.revision = 1;
+ at = (struct xt_addrtype_info_v1*) m->data;
+ at->dest = XT_ADDRTYPE_LOCAL;
+
+ /* Fill in target part */
+ t = ipt_get_target(entry);
+ t->u.target_size =
+ XT_ALIGN(sizeof(struct ipt_entry_target)) +
+ XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
+ strncpy(t->u.user.name, "DNAT", sizeof(t->u.user.name));
+ mr = (struct nf_nat_ipv4_multi_range_compat*) t->data;
+ mr->rangesize = 1;
+ mr->range[0].flags = NF_NAT_RANGE_PROTO_SPECIFIED|NF_NAT_RANGE_MAP_IPS;
+ mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr;
+ if (protocol == IPPROTO_TCP)
+ mr->range[0].min.tcp.port = mr->range[0].max.tcp.port = htobe16(remote_port);
+ else
+ mr->range[0].min.udp.port = mr->range[0].max.udp.port = htobe16(remote_port);
+
+ mask = alloca0(sz);
+ memset(mask, 0xFF, sz);
+
+ if (add) {
+ /* Add the PREROUTING rule, if it is missing so far */
+ if (!iptc_check_entry(chain_pre, entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -EINVAL;
+
+ if (!iptc_insert_entry(chain_pre, entry, 0, h))
+ return -errno;
+ }
+
+ /* If a previous remote is set, remove its entry */
+ if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) {
+ mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr;
+
+ if (!iptc_delete_entry(chain_pre, entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+ }
+
+ mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr;
+ }
+
+ /* Add the OUTPUT rule, if it is missing so far */
+ if (!in_interface) {
+
+ /* Don't apply onto loopback addresses */
+ if (!destination) {
+ entry->ip.dst.s_addr = htobe32(0x7F000000);
+ entry->ip.dmsk.s_addr = htobe32(0xFF000000);
+ entry->ip.invflags = IPT_INV_DSTIP;
+ }
+
+ if (!iptc_check_entry(chain_output, entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+
+ if (!iptc_insert_entry(chain_output, entry, 0, h))
+ return -errno;
+ }
+
+ /* If a previous remote is set, remove its entry */
+ if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) {
+ mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr;
+
+ if (!iptc_delete_entry(chain_output, entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+ }
+ }
+ }
+ } else {
+ if (!iptc_delete_entry(chain_pre, entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+ }
+
+ if (!in_interface) {
+ if (!destination) {
+ entry->ip.dst.s_addr = htobe32(0x7F000000);
+ entry->ip.dmsk.s_addr = htobe32(0xFF000000);
+ entry->ip.invflags = IPT_INV_DSTIP;
+ }
+
+ if (!iptc_delete_entry(chain_output, entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+ }
+ }
+ }
+
+ if (!iptc_commit(h))
+ return -errno;
+
+ return 0;
+}
diff --git a/src/shared/firewall-util.h b/src/shared/firewall-util.h
new file mode 100644
index 0000000..0a51a3c
--- /dev/null
+++ b/src/shared/firewall-util.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "in-addr-util.h"
+
+#if HAVE_LIBIPTC
+
+int fw_add_masquerade(
+ bool add,
+ int af,
+ int protocol,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const char *out_interface,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen);
+
+int fw_add_local_dnat(
+ bool add,
+ int af,
+ int protocol,
+ const char *in_interface,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen,
+ uint16_t local_port,
+ const union in_addr_union *remote,
+ uint16_t remote_port,
+ const union in_addr_union *previous_remote);
+
+#else
+
+static inline int fw_add_masquerade(
+ bool add,
+ int af,
+ int protocol,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const char *out_interface,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen) {
+ return -EOPNOTSUPP;
+}
+
+static inline int fw_add_local_dnat(
+ bool add,
+ int af,
+ int protocol,
+ const char *in_interface,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen,
+ uint16_t local_port,
+ const union in_addr_union *remote,
+ uint16_t remote_port,
+ const union in_addr_union *previous_remote) {
+ return -EOPNOTSUPP;
+}
+
+#endif
diff --git a/src/shared/format-table.c b/src/shared/format-table.c
new file mode 100644
index 0000000..a13a198
--- /dev/null
+++ b/src/shared/format-table.c
@@ -0,0 +1,2549 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ctype.h>
+#include <net/if.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-table.h"
+#include "format-util.h"
+#include "gunicode.h"
+#include "id128-util.h"
+#include "in-addr-util.h"
+#include "locale-util.h"
+#include "memory-util.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "sort-util.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "utf8.h"
+#include "util.h"
+
+#define DEFAULT_WEIGHT 100
+
+/*
+ A few notes on implementation details:
+
+ - TableCell is a 'fake' structure, it's just used as data type to pass references to specific cell positions in the
+ table. It can be easily converted to an index number and back.
+
+ - TableData is where the actual data is stored: it encapsulates the data and formatting for a specific cell. It's
+ 'pseudo-immutable' and ref-counted. When a cell's data's formatting is to be changed, we duplicate the object if the
+ ref-counting is larger than 1. Note that TableData and its ref-counting is mostly not visible to the outside. The
+ outside only sees Table and TableCell.
+
+ - The Table object stores a simple one-dimensional array of references to TableData objects, one row after the
+ previous one.
+
+ - There's no special concept of a "row" or "column" in the table, and no special concept of the "header" row. It's all
+ derived from the cell index: we know how many cells are to be stored in a row, and can determine the rest from
+ that. The first row is always the header row. If header display is turned off we simply skip outputting the first
+ row. Also, when sorting rows we always leave the first row where it is, as the header shouldn't move.
+
+ - Note because there's no row and no column object some properties that might be appropriate as row/column properties
+ are exposed as cell properties instead. For example, the "weight" of a column (which is used to determine where to
+ add/remove space preferable when expanding/compressing tables horizontally) is actually made the "weight" of a
+ cell. Given that we usually need it per-column though we will calculate the average across every cell of the column
+ instead.
+
+ - To make things easy, when cells are added without any explicit configured formatting, then we'll copy the formatting
+ from the same cell in the previous cell. This is particularly useful for the "weight" of the cell (see above), as
+ this means setting the weight of the cells of the header row will nicely propagate to all cells in the other rows.
+*/
+
+typedef struct TableData {
+ unsigned n_ref;
+ TableDataType type;
+
+ size_t minimum_width; /* minimum width for the column */
+ size_t maximum_width; /* maximum width for the column */
+ size_t formatted_for_width; /* the width we tried to format for */
+ unsigned weight; /* the horizontal weight for this column, in case the table is expanded/compressed */
+ unsigned ellipsize_percent; /* 0 … 100, where to place the ellipsis when compression is needed */
+ unsigned align_percent; /* 0 … 100, where to pad with spaces when expanding is needed. 0: left-aligned, 100: right-aligned */
+
+ bool uppercase; /* Uppercase string on display */
+
+ const char *color; /* ANSI color string to use for this cell. When written to terminal should not move cursor. Will automatically be reset after the cell */
+ const char *rgap_color; /* The ANSI color to use for the gap right of this cell. Usually used to underline entire rows in a gapless fashion */
+ char *url; /* A URL to use for a clickable hyperlink */
+ char *formatted; /* A cached textual representation of the cell data, before ellipsation/alignment */
+
+ union {
+ uint8_t data[0]; /* data is generic array */
+ bool boolean;
+ usec_t timestamp;
+ usec_t timespan;
+ uint64_t size;
+ char string[0];
+ char **strv;
+ int int_val;
+ int8_t int8;
+ int16_t int16;
+ int32_t int32;
+ int64_t int64;
+ unsigned uint_val;
+ uint8_t uint8;
+ uint16_t uint16;
+ uint32_t uint32;
+ uint64_t uint64;
+ int percent; /* we use 'int' as datatype for percent values in order to match the result of parse_percent() */
+ int ifindex;
+ union in_addr_union address;
+ sd_id128_t id128;
+ /* … add more here as we start supporting more cell data types … */
+ };
+} TableData;
+
+static size_t TABLE_CELL_TO_INDEX(TableCell *cell) {
+ size_t i;
+
+ assert(cell);
+
+ i = PTR_TO_SIZE(cell);
+ assert(i > 0);
+
+ return i-1;
+}
+
+static TableCell* TABLE_INDEX_TO_CELL(size_t index) {
+ assert(index != (size_t) -1);
+ return SIZE_TO_PTR(index + 1);
+}
+
+struct Table {
+ size_t n_columns;
+ size_t n_cells;
+
+ bool header; /* Whether to show the header row? */
+ size_t width; /* If == 0 format this as wide as necessary. If (size_t) -1 format this to console
+ * width or less wide, but not wider. Otherwise the width to format this table in. */
+ size_t cell_height_max; /* Maximum number of lines per cell. (If there are more, ellipsis is shown. If (size_t) -1 then no limit is set, the default. == 0 is not allowed.) */
+
+ TableData **data;
+ size_t n_allocated;
+
+ size_t *display_map; /* List of columns to show (by their index). It's fine if columns are listed multiple times or not at all */
+ size_t n_display_map;
+
+ size_t *sort_map; /* The columns to order rows by, in order of preference. */
+ size_t n_sort_map;
+
+ bool *reverse_map;
+
+ char *empty_string;
+};
+
+Table *table_new_raw(size_t n_columns) {
+ _cleanup_(table_unrefp) Table *t = NULL;
+
+ assert(n_columns > 0);
+
+ t = new(Table, 1);
+ if (!t)
+ return NULL;
+
+ *t = (struct Table) {
+ .n_columns = n_columns,
+ .header = true,
+ .width = (size_t) -1,
+ .cell_height_max = (size_t) -1,
+ };
+
+ return TAKE_PTR(t);
+}
+
+Table *table_new_internal(const char *first_header, ...) {
+ _cleanup_(table_unrefp) Table *t = NULL;
+ size_t n_columns = 1;
+ va_list ap;
+ int r;
+
+ assert(first_header);
+
+ va_start(ap, first_header);
+ for (;;) {
+ if (!va_arg(ap, const char*))
+ break;
+
+ n_columns++;
+ }
+ va_end(ap);
+
+ t = table_new_raw(n_columns);
+ if (!t)
+ return NULL;
+
+ va_start(ap, first_header);
+ for (const char *h = first_header; h; h = va_arg(ap, const char*)) {
+ TableCell *cell;
+
+ r = table_add_cell(t, &cell, TABLE_STRING, h);
+ if (r < 0) {
+ va_end(ap);
+ return NULL;
+ }
+
+ /* Make the table header uppercase */
+ r = table_set_uppercase(t, cell, true);
+ if (r < 0) {
+ va_end(ap);
+ return NULL;
+ }
+ }
+ va_end(ap);
+
+ assert(t->n_columns == t->n_cells);
+ return TAKE_PTR(t);
+}
+
+static TableData *table_data_free(TableData *d) {
+ assert(d);
+
+ free(d->formatted);
+ free(d->url);
+
+ if (IN_SET(d->type, TABLE_STRV, TABLE_STRV_WRAPPED))
+ strv_free(d->strv);
+
+ return mfree(d);
+}
+
+DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(TableData, table_data, table_data_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(TableData*, table_data_unref);
+
+Table *table_unref(Table *t) {
+ if (!t)
+ return NULL;
+
+ for (size_t i = 0; i < t->n_cells; i++)
+ table_data_unref(t->data[i]);
+
+ free(t->data);
+ free(t->display_map);
+ free(t->sort_map);
+ free(t->reverse_map);
+ free(t->empty_string);
+
+ return mfree(t);
+}
+
+static size_t table_data_size(TableDataType type, const void *data) {
+
+ switch (type) {
+
+ case TABLE_EMPTY:
+ return 0;
+
+ case TABLE_STRING:
+ case TABLE_PATH:
+ return strlen(data) + 1;
+
+ case TABLE_STRV:
+ case TABLE_STRV_WRAPPED:
+ return sizeof(char **);
+
+ case TABLE_BOOLEAN:
+ return sizeof(bool);
+
+ case TABLE_TIMESTAMP:
+ case TABLE_TIMESTAMP_UTC:
+ case TABLE_TIMESTAMP_RELATIVE:
+ case TABLE_TIMESPAN:
+ case TABLE_TIMESPAN_MSEC:
+ return sizeof(usec_t);
+
+ case TABLE_SIZE:
+ case TABLE_INT64:
+ case TABLE_UINT64:
+ case TABLE_BPS:
+ return sizeof(uint64_t);
+
+ case TABLE_INT32:
+ case TABLE_UINT32:
+ return sizeof(uint32_t);
+
+ case TABLE_INT16:
+ case TABLE_UINT16:
+ return sizeof(uint16_t);
+
+ case TABLE_INT8:
+ case TABLE_UINT8:
+ return sizeof(uint8_t);
+
+ case TABLE_INT:
+ case TABLE_UINT:
+ case TABLE_PERCENT:
+ case TABLE_IFINDEX:
+ return sizeof(int);
+
+ case TABLE_IN_ADDR:
+ return sizeof(struct in_addr);
+
+ case TABLE_IN6_ADDR:
+ return sizeof(struct in6_addr);
+
+ case TABLE_UUID:
+ case TABLE_ID128:
+ return sizeof(sd_id128_t);
+
+ default:
+ assert_not_reached("Uh? Unexpected cell type");
+ }
+}
+
+static bool table_data_matches(
+ TableData *d,
+ TableDataType type,
+ const void *data,
+ size_t minimum_width,
+ size_t maximum_width,
+ unsigned weight,
+ unsigned align_percent,
+ unsigned ellipsize_percent) {
+
+ size_t k, l;
+ assert(d);
+
+ if (d->type != type)
+ return false;
+
+ if (d->minimum_width != minimum_width)
+ return false;
+
+ if (d->maximum_width != maximum_width)
+ return false;
+
+ if (d->weight != weight)
+ return false;
+
+ if (d->align_percent != align_percent)
+ return false;
+
+ if (d->ellipsize_percent != ellipsize_percent)
+ return false;
+
+ /* If a color/url/uppercase flag is set, refuse to merge */
+ if (d->color || d->rgap_color)
+ return false;
+ if (d->url)
+ return false;
+ if (d->uppercase)
+ return false;
+
+ k = table_data_size(type, data);
+ l = table_data_size(d->type, d->data);
+ if (k != l)
+ return false;
+
+ return memcmp_safe(data, d->data, l) == 0;
+}
+
+static TableData *table_data_new(
+ TableDataType type,
+ const void *data,
+ size_t minimum_width,
+ size_t maximum_width,
+ unsigned weight,
+ unsigned align_percent,
+ unsigned ellipsize_percent) {
+
+ _cleanup_free_ TableData *d = NULL;
+ size_t data_size;
+
+ data_size = table_data_size(type, data);
+
+ d = malloc0(offsetof(TableData, data) + data_size);
+ if (!d)
+ return NULL;
+
+ d->n_ref = 1;
+ d->type = type;
+ d->minimum_width = minimum_width;
+ d->maximum_width = maximum_width;
+ d->weight = weight;
+ d->align_percent = align_percent;
+ d->ellipsize_percent = ellipsize_percent;
+
+ if (IN_SET(type, TABLE_STRV, TABLE_STRV_WRAPPED)) {
+ d->strv = strv_copy(data);
+ if (!d->strv)
+ return NULL;
+ } else
+ memcpy_safe(d->data, data, data_size);
+
+ return TAKE_PTR(d);
+}
+
+int table_add_cell_full(
+ Table *t,
+ TableCell **ret_cell,
+ TableDataType type,
+ const void *data,
+ size_t minimum_width,
+ size_t maximum_width,
+ unsigned weight,
+ unsigned align_percent,
+ unsigned ellipsize_percent) {
+
+ _cleanup_(table_data_unrefp) TableData *d = NULL;
+ TableData *p;
+
+ assert(t);
+ assert(type >= 0);
+ assert(type < _TABLE_DATA_TYPE_MAX);
+
+ /* Special rule: patch NULL data fields to the empty field */
+ if (!data)
+ type = TABLE_EMPTY;
+
+ /* Determine the cell adjacent to the current one, but one row up */
+ if (t->n_cells >= t->n_columns)
+ assert_se(p = t->data[t->n_cells - t->n_columns]);
+ else
+ p = NULL;
+
+ /* If formatting parameters are left unspecified, copy from the previous row */
+ if (minimum_width == (size_t) -1)
+ minimum_width = p ? p->minimum_width : 1;
+
+ if (weight == (unsigned) -1)
+ weight = p ? p->weight : DEFAULT_WEIGHT;
+
+ if (align_percent == (unsigned) -1)
+ align_percent = p ? p->align_percent : 0;
+
+ if (ellipsize_percent == (unsigned) -1)
+ ellipsize_percent = p ? p->ellipsize_percent : 100;
+
+ assert(align_percent <= 100);
+ assert(ellipsize_percent <= 100);
+
+ /* Small optimization: Pretty often adjacent cells in two subsequent lines have the same data and
+ * formatting. Let's see if we can reuse the cell data and ref it once more. */
+
+ if (p && table_data_matches(p, type, data, minimum_width, maximum_width, weight, align_percent, ellipsize_percent))
+ d = table_data_ref(p);
+ else {
+ d = table_data_new(type, data, minimum_width, maximum_width, weight, align_percent, ellipsize_percent);
+ if (!d)
+ return -ENOMEM;
+ }
+
+ if (!GREEDY_REALLOC(t->data, t->n_allocated, MAX(t->n_cells + 1, t->n_columns)))
+ return -ENOMEM;
+
+ if (ret_cell)
+ *ret_cell = TABLE_INDEX_TO_CELL(t->n_cells);
+
+ t->data[t->n_cells++] = TAKE_PTR(d);
+
+ return 0;
+}
+
+int table_add_cell_stringf(Table *t, TableCell **ret_cell, const char *format, ...) {
+ _cleanup_free_ char *buffer = NULL;
+ va_list ap;
+ int r;
+
+ va_start(ap, format);
+ r = vasprintf(&buffer, format, ap);
+ va_end(ap);
+ if (r < 0)
+ return -ENOMEM;
+
+ return table_add_cell(t, ret_cell, TABLE_STRING, buffer);
+}
+
+int table_fill_empty(Table *t, size_t until_column) {
+ int r;
+
+ assert(t);
+
+ /* Fill the rest of the current line with empty cells until we reach the specified column. Will add
+ * at least one cell. Pass 0 in order to fill a line to the end or insert an empty line. */
+
+ if (until_column >= t->n_columns)
+ return -EINVAL;
+
+ do {
+ r = table_add_cell(t, NULL, TABLE_EMPTY, NULL);
+ if (r < 0)
+ return r;
+
+ } while ((t->n_cells % t->n_columns) != until_column);
+
+ return 0;
+}
+
+int table_dup_cell(Table *t, TableCell *cell) {
+ size_t i;
+
+ assert(t);
+
+ /* Add the data of the specified cell a second time as a new cell to the end. */
+
+ i = TABLE_CELL_TO_INDEX(cell);
+ if (i >= t->n_cells)
+ return -ENXIO;
+
+ if (!GREEDY_REALLOC(t->data, t->n_allocated, MAX(t->n_cells + 1, t->n_columns)))
+ return -ENOMEM;
+
+ t->data[t->n_cells++] = table_data_ref(t->data[i]);
+ return 0;
+}
+
+static int table_dedup_cell(Table *t, TableCell *cell) {
+ _cleanup_free_ char *curl = NULL;
+ TableData *nd, *od;
+ size_t i;
+
+ assert(t);
+
+ /* Helper call that ensures the specified cell's data object has a ref count of 1, which we can use before
+ * changing a cell's formatting without effecting every other cell's formatting that shares the same data */
+
+ i = TABLE_CELL_TO_INDEX(cell);
+ if (i >= t->n_cells)
+ return -ENXIO;
+
+ assert_se(od = t->data[i]);
+ if (od->n_ref == 1)
+ return 0;
+
+ assert(od->n_ref > 1);
+
+ if (od->url) {
+ curl = strdup(od->url);
+ if (!curl)
+ return -ENOMEM;
+ }
+
+ nd = table_data_new(
+ od->type,
+ od->data,
+ od->minimum_width,
+ od->maximum_width,
+ od->weight,
+ od->align_percent,
+ od->ellipsize_percent);
+ if (!nd)
+ return -ENOMEM;
+
+ nd->color = od->color;
+ nd->rgap_color = od->rgap_color;
+ nd->url = TAKE_PTR(curl);
+ nd->uppercase = od->uppercase;
+
+ table_data_unref(od);
+ t->data[i] = nd;
+
+ assert(nd->n_ref == 1);
+
+ return 1;
+}
+
+static TableData *table_get_data(Table *t, TableCell *cell) {
+ size_t i;
+
+ assert(t);
+ assert(cell);
+
+ /* Get the data object of the specified cell, or NULL if it doesn't exist */
+
+ i = TABLE_CELL_TO_INDEX(cell);
+ if (i >= t->n_cells)
+ return NULL;
+
+ assert(t->data[i]);
+ assert(t->data[i]->n_ref > 0);
+
+ return t->data[i];
+}
+
+int table_set_minimum_width(Table *t, TableCell *cell, size_t minimum_width) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (minimum_width == (size_t) -1)
+ minimum_width = 1;
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->minimum_width = minimum_width;
+ return 0;
+}
+
+int table_set_maximum_width(Table *t, TableCell *cell, size_t maximum_width) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->maximum_width = maximum_width;
+ return 0;
+}
+
+int table_set_weight(Table *t, TableCell *cell, unsigned weight) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (weight == (unsigned) -1)
+ weight = DEFAULT_WEIGHT;
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->weight = weight;
+ return 0;
+}
+
+int table_set_align_percent(Table *t, TableCell *cell, unsigned percent) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (percent == (unsigned) -1)
+ percent = 0;
+
+ assert(percent <= 100);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->align_percent = percent;
+ return 0;
+}
+
+int table_set_ellipsize_percent(Table *t, TableCell *cell, unsigned percent) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (percent == (unsigned) -1)
+ percent = 100;
+
+ assert(percent <= 100);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->ellipsize_percent = percent;
+ return 0;
+}
+
+int table_set_color(Table *t, TableCell *cell, const char *color) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->color = empty_to_null(color);
+ return 0;
+}
+
+int table_set_rgap_color(Table *t, TableCell *cell, const char *color) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->rgap_color = empty_to_null(color);
+ return 0;
+}
+
+int table_set_url(Table *t, TableCell *cell, const char *url) {
+ _cleanup_free_ char *copy = NULL;
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (url) {
+ copy = strdup(url);
+ if (!copy)
+ return -ENOMEM;
+ }
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ return free_and_replace(table_get_data(t, cell)->url, copy);
+}
+
+int table_set_uppercase(Table *t, TableCell *cell, bool b) {
+ TableData *d;
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ assert_se(d = table_get_data(t, cell));
+
+ if (d->uppercase == b)
+ return 0;
+
+ d->formatted = mfree(d->formatted);
+ d->uppercase = b;
+ return 1;
+}
+
+int table_update(Table *t, TableCell *cell, TableDataType type, const void *data) {
+ _cleanup_free_ char *curl = NULL;
+ TableData *nd, *od;
+ size_t i;
+
+ assert(t);
+ assert(cell);
+
+ i = TABLE_CELL_TO_INDEX(cell);
+ if (i >= t->n_cells)
+ return -ENXIO;
+
+ assert_se(od = t->data[i]);
+
+ if (od->url) {
+ curl = strdup(od->url);
+ if (!curl)
+ return -ENOMEM;
+ }
+
+ nd = table_data_new(
+ type,
+ data,
+ od->minimum_width,
+ od->maximum_width,
+ od->weight,
+ od->align_percent,
+ od->ellipsize_percent);
+ if (!nd)
+ return -ENOMEM;
+
+ nd->color = od->color;
+ nd->rgap_color = od->rgap_color;
+ nd->url = TAKE_PTR(curl);
+ nd->uppercase = od->uppercase;
+
+ table_data_unref(od);
+ t->data[i] = nd;
+
+ return 0;
+}
+
+int table_add_many_internal(Table *t, TableDataType first_type, ...) {
+ TableDataType type;
+ va_list ap;
+ TableCell *last_cell = NULL;
+ int r;
+
+ assert(t);
+ assert(first_type >= 0);
+ assert(first_type < _TABLE_DATA_TYPE_MAX);
+
+ type = first_type;
+
+ va_start(ap, first_type);
+ for (;;) {
+ const void *data;
+ union {
+ uint64_t size;
+ usec_t usec;
+ int int_val;
+ int8_t int8;
+ int16_t int16;
+ int32_t int32;
+ int64_t int64;
+ unsigned uint_val;
+ uint8_t uint8;
+ uint16_t uint16;
+ uint32_t uint32;
+ uint64_t uint64;
+ int percent;
+ int ifindex;
+ bool b;
+ union in_addr_union address;
+ sd_id128_t id128;
+ } buffer;
+
+ switch (type) {
+
+ case TABLE_EMPTY:
+ data = NULL;
+ break;
+
+ case TABLE_STRING:
+ case TABLE_PATH:
+ data = va_arg(ap, const char *);
+ break;
+
+ case TABLE_STRV:
+ case TABLE_STRV_WRAPPED:
+ data = va_arg(ap, char * const *);
+ break;
+
+ case TABLE_BOOLEAN:
+ buffer.b = va_arg(ap, int);
+ data = &buffer.b;
+ break;
+
+ case TABLE_TIMESTAMP:
+ case TABLE_TIMESTAMP_UTC:
+ case TABLE_TIMESTAMP_RELATIVE:
+ case TABLE_TIMESPAN:
+ case TABLE_TIMESPAN_MSEC:
+ buffer.usec = va_arg(ap, usec_t);
+ data = &buffer.usec;
+ break;
+
+ case TABLE_SIZE:
+ case TABLE_BPS:
+ buffer.size = va_arg(ap, uint64_t);
+ data = &buffer.size;
+ break;
+
+ case TABLE_INT:
+ buffer.int_val = va_arg(ap, int);
+ data = &buffer.int_val;
+ break;
+
+ case TABLE_INT8: {
+ int x = va_arg(ap, int);
+ assert(x >= INT8_MIN && x <= INT8_MAX);
+
+ buffer.int8 = x;
+ data = &buffer.int8;
+ break;
+ }
+
+ case TABLE_INT16: {
+ int x = va_arg(ap, int);
+ assert(x >= INT16_MIN && x <= INT16_MAX);
+
+ buffer.int16 = x;
+ data = &buffer.int16;
+ break;
+ }
+
+ case TABLE_INT32:
+ buffer.int32 = va_arg(ap, int32_t);
+ data = &buffer.int32;
+ break;
+
+ case TABLE_INT64:
+ buffer.int64 = va_arg(ap, int64_t);
+ data = &buffer.int64;
+ break;
+
+ case TABLE_UINT:
+ buffer.uint_val = va_arg(ap, unsigned);
+ data = &buffer.uint_val;
+ break;
+
+ case TABLE_UINT8: {
+ unsigned x = va_arg(ap, unsigned);
+ assert(x <= UINT8_MAX);
+
+ buffer.uint8 = x;
+ data = &buffer.uint8;
+ break;
+ }
+
+ case TABLE_UINT16: {
+ unsigned x = va_arg(ap, unsigned);
+ assert(x <= UINT16_MAX);
+
+ buffer.uint16 = x;
+ data = &buffer.uint16;
+ break;
+ }
+
+ case TABLE_UINT32:
+ buffer.uint32 = va_arg(ap, uint32_t);
+ data = &buffer.uint32;
+ break;
+
+ case TABLE_UINT64:
+ buffer.uint64 = va_arg(ap, uint64_t);
+ data = &buffer.uint64;
+ break;
+
+ case TABLE_PERCENT:
+ buffer.percent = va_arg(ap, int);
+ data = &buffer.percent;
+ break;
+
+ case TABLE_IFINDEX:
+ buffer.ifindex = va_arg(ap, int);
+ data = &buffer.ifindex;
+ break;
+
+ case TABLE_IN_ADDR:
+ buffer.address = *va_arg(ap, union in_addr_union *);
+ data = &buffer.address.in;
+ break;
+
+ case TABLE_IN6_ADDR:
+ buffer.address = *va_arg(ap, union in_addr_union *);
+ data = &buffer.address.in6;
+ break;
+
+ case TABLE_UUID:
+ case TABLE_ID128:
+ buffer.id128 = va_arg(ap, sd_id128_t);
+ data = &buffer.id128;
+ break;
+
+ case TABLE_SET_MINIMUM_WIDTH: {
+ size_t w = va_arg(ap, size_t);
+
+ r = table_set_minimum_width(t, last_cell, w);
+ break;
+ }
+
+ case TABLE_SET_MAXIMUM_WIDTH: {
+ size_t w = va_arg(ap, size_t);
+ r = table_set_maximum_width(t, last_cell, w);
+ break;
+ }
+
+ case TABLE_SET_WEIGHT: {
+ unsigned w = va_arg(ap, unsigned);
+ r = table_set_weight(t, last_cell, w);
+ break;
+ }
+
+ case TABLE_SET_ALIGN_PERCENT: {
+ unsigned p = va_arg(ap, unsigned);
+ r = table_set_align_percent(t, last_cell, p);
+ break;
+ }
+
+ case TABLE_SET_ELLIPSIZE_PERCENT: {
+ unsigned p = va_arg(ap, unsigned);
+ r = table_set_ellipsize_percent(t, last_cell, p);
+ break;
+ }
+
+ case TABLE_SET_COLOR: {
+ const char *c = va_arg(ap, const char*);
+ r = table_set_color(t, last_cell, c);
+ break;
+ }
+
+ case TABLE_SET_RGAP_COLOR: {
+ const char *c = va_arg(ap, const char*);
+ r = table_set_rgap_color(t, last_cell, c);
+ break;
+ }
+
+ case TABLE_SET_BOTH_COLORS: {
+ const char *c = va_arg(ap, const char*);
+
+ r = table_set_color(t, last_cell, c);
+ if (r < 0) {
+ va_end(ap);
+ return r;
+ }
+
+ r = table_set_rgap_color(t, last_cell, c);
+ break;
+ }
+
+ case TABLE_SET_URL: {
+ const char *u = va_arg(ap, const char*);
+ r = table_set_url(t, last_cell, u);
+ break;
+ }
+
+ case TABLE_SET_UPPERCASE: {
+ int u = va_arg(ap, int);
+ r = table_set_uppercase(t, last_cell, u);
+ break;
+ }
+
+ case _TABLE_DATA_TYPE_MAX:
+ /* Used as end marker */
+ va_end(ap);
+ return 0;
+
+ default:
+ assert_not_reached("Uh? Unexpected data type.");
+ }
+
+ if (type < _TABLE_DATA_TYPE_MAX)
+ r = table_add_cell(t, &last_cell, type, data);
+
+ if (r < 0) {
+ va_end(ap);
+ return r;
+ }
+
+ type = va_arg(ap, TableDataType);
+ }
+}
+
+void table_set_header(Table *t, bool b) {
+ assert(t);
+
+ t->header = b;
+}
+
+void table_set_width(Table *t, size_t width) {
+ assert(t);
+
+ t->width = width;
+}
+
+void table_set_cell_height_max(Table *t, size_t height) {
+ assert(t);
+ assert(height >= 1 || height == (size_t) -1);
+
+ t->cell_height_max = height;
+}
+
+int table_set_empty_string(Table *t, const char *empty) {
+ assert(t);
+
+ return free_and_strdup(&t->empty_string, empty);
+}
+
+int table_set_display_all(Table *t) {
+ assert(t);
+
+ size_t allocated = t->n_display_map;
+
+ if (!GREEDY_REALLOC(t->display_map, allocated, MAX(t->n_columns, allocated)))
+ return -ENOMEM;
+
+ for (size_t i = 0; i < t->n_columns; i++)
+ t->display_map[i] = i;
+
+ t->n_display_map = t->n_columns;
+
+ return 0;
+}
+
+int table_set_display(Table *t, size_t first_column, ...) {
+ size_t allocated, column;
+ va_list ap;
+
+ assert(t);
+
+ allocated = t->n_display_map;
+ column = first_column;
+
+ va_start(ap, first_column);
+ for (;;) {
+ assert(column < t->n_columns);
+
+ if (!GREEDY_REALLOC(t->display_map, allocated, MAX(t->n_columns, t->n_display_map+1))) {
+ va_end(ap);
+ return -ENOMEM;
+ }
+
+ t->display_map[t->n_display_map++] = column;
+
+ column = va_arg(ap, size_t);
+ if (column == (size_t) -1)
+ break;
+
+ }
+ va_end(ap);
+
+ return 0;
+}
+
+int table_set_sort(Table *t, size_t first_column, ...) {
+ size_t allocated, column;
+ va_list ap;
+
+ assert(t);
+
+ allocated = t->n_sort_map;
+ column = first_column;
+
+ va_start(ap, first_column);
+ for (;;) {
+ assert(column < t->n_columns);
+
+ if (!GREEDY_REALLOC(t->sort_map, allocated, MAX(t->n_columns, t->n_sort_map+1))) {
+ va_end(ap);
+ return -ENOMEM;
+ }
+
+ t->sort_map[t->n_sort_map++] = column;
+
+ column = va_arg(ap, size_t);
+ if (column == (size_t) -1)
+ break;
+ }
+ va_end(ap);
+
+ return 0;
+}
+
+int table_hide_column_from_display(Table *t, size_t column) {
+ int r;
+
+ assert(t);
+ assert(column < t->n_columns);
+
+ /* If the display map is empty, initialize it with all available columns */
+ if (!t->display_map) {
+ r = table_set_display_all(t);
+ if (r < 0)
+ return r;
+ }
+
+ size_t allocated = t->n_display_map, cur = 0;
+
+ for (size_t i = 0; i < allocated; i++) {
+ if (t->display_map[i] == column)
+ continue;
+
+ t->display_map[cur++] = t->display_map[i];
+ }
+
+ t->n_display_map = cur;
+
+ return 0;
+}
+
+static int cell_data_compare(TableData *a, size_t index_a, TableData *b, size_t index_b) {
+ assert(a);
+ assert(b);
+
+ if (a->type == b->type) {
+
+ /* We only define ordering for cells of the same data type. If cells with different data types are
+ * compared we follow the order the cells were originally added in */
+
+ switch (a->type) {
+
+ case TABLE_STRING:
+ return strcmp(a->string, b->string);
+
+ case TABLE_PATH:
+ return path_compare(a->string, b->string);
+
+ case TABLE_STRV:
+ case TABLE_STRV_WRAPPED:
+ return strv_compare(a->strv, b->strv);
+
+ case TABLE_BOOLEAN:
+ if (!a->boolean && b->boolean)
+ return -1;
+ if (a->boolean && !b->boolean)
+ return 1;
+ return 0;
+
+ case TABLE_TIMESTAMP:
+ case TABLE_TIMESTAMP_UTC:
+ case TABLE_TIMESTAMP_RELATIVE:
+ return CMP(a->timestamp, b->timestamp);
+
+ case TABLE_TIMESPAN:
+ case TABLE_TIMESPAN_MSEC:
+ return CMP(a->timespan, b->timespan);
+
+ case TABLE_SIZE:
+ case TABLE_BPS:
+ return CMP(a->size, b->size);
+
+ case TABLE_INT:
+ return CMP(a->int_val, b->int_val);
+
+ case TABLE_INT8:
+ return CMP(a->int8, b->int8);
+
+ case TABLE_INT16:
+ return CMP(a->int16, b->int16);
+
+ case TABLE_INT32:
+ return CMP(a->int32, b->int32);
+
+ case TABLE_INT64:
+ return CMP(a->int64, b->int64);
+
+ case TABLE_UINT:
+ return CMP(a->uint_val, b->uint_val);
+
+ case TABLE_UINT8:
+ return CMP(a->uint8, b->uint8);
+
+ case TABLE_UINT16:
+ return CMP(a->uint16, b->uint16);
+
+ case TABLE_UINT32:
+ return CMP(a->uint32, b->uint32);
+
+ case TABLE_UINT64:
+ return CMP(a->uint64, b->uint64);
+
+ case TABLE_PERCENT:
+ return CMP(a->percent, b->percent);
+
+ case TABLE_IFINDEX:
+ return CMP(a->ifindex, b->ifindex);
+
+ case TABLE_IN_ADDR:
+ return CMP(a->address.in.s_addr, b->address.in.s_addr);
+
+ case TABLE_IN6_ADDR:
+ return memcmp(&a->address.in6, &b->address.in6, FAMILY_ADDRESS_SIZE(AF_INET6));
+
+ case TABLE_UUID:
+ case TABLE_ID128:
+ return memcmp(&a->id128, &b->id128, sizeof(sd_id128_t));
+
+ default:
+ ;
+ }
+ }
+
+ /* Generic fallback using the original order in which the cells where added. */
+ return CMP(index_a, index_b);
+}
+
+static int table_data_compare(const size_t *a, const size_t *b, Table *t) {
+ int r;
+
+ assert(t);
+ assert(t->sort_map);
+
+ /* Make sure the header stays at the beginning */
+ if (*a < t->n_columns && *b < t->n_columns)
+ return 0;
+ if (*a < t->n_columns)
+ return -1;
+ if (*b < t->n_columns)
+ return 1;
+
+ /* Order other lines by the sorting map */
+ for (size_t i = 0; i < t->n_sort_map; i++) {
+ TableData *d, *dd;
+
+ d = t->data[*a + t->sort_map[i]];
+ dd = t->data[*b + t->sort_map[i]];
+
+ r = cell_data_compare(d, *a, dd, *b);
+ if (r != 0)
+ return t->reverse_map && t->reverse_map[t->sort_map[i]] ? -r : r;
+ }
+
+ /* Order identical lines by the order there were originally added in */
+ return CMP(*a, *b);
+}
+
+static char* format_strv_width(char **strv, size_t column_width) {
+ _cleanup_fclose_ FILE *f = NULL;
+ size_t sz = 0;
+ _cleanup_free_ char *buf = NULL;
+
+ f = open_memstream_unlocked(&buf, &sz);
+ if (!f)
+ return NULL;
+
+ size_t position = 0;
+ char **p;
+ STRV_FOREACH(p, strv) {
+ size_t our_len = utf8_console_width(*p); /* This returns -1 on invalid utf-8 (which shouldn't happen).
+ * If that happens, we'll just print one item per line. */
+
+ if (position == 0) {
+ fputs(*p, f);
+ position = our_len;
+ } else if (size_add(size_add(position, 1), our_len) <= column_width) {
+ fprintf(f, " %s", *p);
+ position = size_add(size_add(position, 1), our_len);
+ } else {
+ fprintf(f, "\n%s", *p);
+ position = our_len;
+ }
+ }
+
+ if (fflush_and_check(f) < 0)
+ return NULL;
+
+ f = safe_fclose(f);
+ return TAKE_PTR(buf);
+}
+
+static const char *table_data_format(Table *t, TableData *d, bool avoid_uppercasing, size_t column_width, bool *have_soft) {
+ assert(d);
+
+ if (d->formatted &&
+ /* Only TABLE_STRV_WRAPPED adjust based on column_width so far… */
+ (d->type != TABLE_STRV_WRAPPED || d->formatted_for_width == column_width))
+ return d->formatted;
+
+ switch (d->type) {
+ case TABLE_EMPTY:
+ return strempty(t->empty_string);
+
+ case TABLE_STRING:
+ case TABLE_PATH:
+ if (d->uppercase && !avoid_uppercasing) {
+ d->formatted = new(char, strlen(d->string) + 1);
+ if (!d->formatted)
+ return NULL;
+
+ char *q = d->formatted;
+ for (char *p = d->string; *p; p++, q++)
+ *q = (char) toupper((unsigned char) *p);
+ *q = 0;
+
+ return d->formatted;
+ }
+
+ return d->string;
+
+ case TABLE_STRV:
+ if (strv_isempty(d->strv))
+ return strempty(t->empty_string);
+
+ d->formatted = strv_join(d->strv, "\n");
+ if (!d->formatted)
+ return NULL;
+ break;
+
+ case TABLE_STRV_WRAPPED: {
+ if (strv_isempty(d->strv))
+ return strempty(t->empty_string);
+
+ char *buf = format_strv_width(d->strv, column_width);
+ if (!buf)
+ return NULL;
+
+ free_and_replace(d->formatted, buf);
+ d->formatted_for_width = column_width;
+ if (have_soft)
+ *have_soft = true;
+
+ break;
+ }
+
+ case TABLE_BOOLEAN:
+ return yes_no(d->boolean);
+
+ case TABLE_TIMESTAMP:
+ case TABLE_TIMESTAMP_UTC:
+ case TABLE_TIMESTAMP_RELATIVE: {
+ _cleanup_free_ char *p;
+ char *ret;
+
+ p = new(char, FORMAT_TIMESTAMP_MAX);
+ if (!p)
+ return NULL;
+
+ if (d->type == TABLE_TIMESTAMP)
+ ret = format_timestamp(p, FORMAT_TIMESTAMP_MAX, d->timestamp);
+ else if (d->type == TABLE_TIMESTAMP_UTC)
+ ret = format_timestamp_style(p, FORMAT_TIMESTAMP_MAX, d->timestamp, TIMESTAMP_UTC);
+ else
+ ret = format_timestamp_relative(p, FORMAT_TIMESTAMP_MAX, d->timestamp);
+ if (!ret)
+ return "n/a";
+
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_TIMESPAN:
+ case TABLE_TIMESPAN_MSEC: {
+ _cleanup_free_ char *p;
+
+ p = new(char, FORMAT_TIMESPAN_MAX);
+ if (!p)
+ return NULL;
+
+ if (!format_timespan(p, FORMAT_TIMESPAN_MAX, d->timespan,
+ d->type == TABLE_TIMESPAN ? 0 : USEC_PER_MSEC))
+ return "n/a";
+
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_SIZE: {
+ _cleanup_free_ char *p;
+
+ p = new(char, FORMAT_BYTES_MAX);
+ if (!p)
+ return NULL;
+
+ if (!format_bytes(p, FORMAT_BYTES_MAX, d->size))
+ return "n/a";
+
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_BPS: {
+ _cleanup_free_ char *p;
+ size_t n;
+
+ p = new(char, FORMAT_BYTES_MAX+2);
+ if (!p)
+ return NULL;
+
+ if (!format_bytes_full(p, FORMAT_BYTES_MAX, d->size, 0))
+ return "n/a";
+
+ n = strlen(p);
+ strscpy(p + n, FORMAT_BYTES_MAX + 2 - n, "bps");
+
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_INT: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->int_val) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%i", d->int_val);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_INT8: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->int8) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIi8, d->int8);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_INT16: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->int16) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIi16, d->int16);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_INT32: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->int32) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIi32, d->int32);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_INT64: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->int64) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIi64, d->int64);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_UINT: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->uint_val) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%u", d->uint_val);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_UINT8: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->uint8) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIu8, d->uint8);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_UINT16: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->uint16) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIu16, d->uint16);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_UINT32: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->uint32) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIu32, d->uint32);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_UINT64: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->uint64) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIu64, d->uint64);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_PERCENT: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->percent) + 2);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%i%%" , d->percent);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_IFINDEX: {
+ _cleanup_free_ char *p = NULL;
+ char name[IF_NAMESIZE + 1];
+
+ if (format_ifname(d->ifindex, name)) {
+ p = strdup(name);
+ if (!p)
+ return NULL;
+ } else {
+ if (asprintf(&p, "%i" , d->ifindex) < 0)
+ return NULL;
+ }
+
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_IN_ADDR:
+ case TABLE_IN6_ADDR: {
+ _cleanup_free_ char *p = NULL;
+
+ if (in_addr_to_string(d->type == TABLE_IN_ADDR ? AF_INET : AF_INET6,
+ &d->address, &p) < 0)
+ return NULL;
+
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_ID128: {
+ char *p;
+
+ p = new(char, SD_ID128_STRING_MAX);
+ if (!p)
+ return NULL;
+
+ d->formatted = sd_id128_to_string(d->id128, p);
+ break;
+ }
+
+ case TABLE_UUID: {
+ char *p;
+
+ p = new(char, ID128_UUID_STRING_MAX);
+ if (!p)
+ return NULL;
+
+ d->formatted = id128_to_uuid_string(d->id128, p);
+ break;
+ }
+
+ default:
+ assert_not_reached("Unexpected type?");
+ }
+
+ return d->formatted;
+}
+
+static int console_width_height(
+ const char *s,
+ size_t *ret_width,
+ size_t *ret_height) {
+
+ size_t max_width = 0, height = 0;
+ const char *p;
+
+ assert(s);
+
+ /* Determine the width and height in console character cells the specified string needs. */
+
+ do {
+ size_t k;
+
+ p = strchr(s, '\n');
+ if (p) {
+ _cleanup_free_ char *c = NULL;
+
+ c = strndup(s, p - s);
+ if (!c)
+ return -ENOMEM;
+
+ k = utf8_console_width(c);
+ s = p + 1;
+ } else {
+ k = utf8_console_width(s);
+ s = NULL;
+ }
+ if (k == (size_t) -1)
+ return -EINVAL;
+ if (k > max_width)
+ max_width = k;
+
+ height++;
+ } while (!isempty(s));
+
+ if (ret_width)
+ *ret_width = max_width;
+
+ if (ret_height)
+ *ret_height = height;
+
+ return 0;
+}
+
+static int table_data_requested_width_height(
+ Table *table,
+ TableData *d,
+ size_t available_width,
+ size_t *ret_width,
+ size_t *ret_height,
+ bool *have_soft) {
+
+ _cleanup_free_ char *truncated = NULL;
+ bool truncation_applied = false;
+ size_t width, height;
+ const char *t;
+ int r;
+ bool soft = false;
+
+ t = table_data_format(table, d, false, available_width, &soft);
+ if (!t)
+ return -ENOMEM;
+
+ if (table->cell_height_max != (size_t) -1) {
+ r = string_truncate_lines(t, table->cell_height_max, &truncated);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ truncation_applied = true;
+
+ t = truncated;
+ }
+
+ r = console_width_height(t, &width, &height);
+ if (r < 0)
+ return r;
+
+ if (d->maximum_width != (size_t) -1 && width > d->maximum_width)
+ width = d->maximum_width;
+
+ if (width < d->minimum_width)
+ width = d->minimum_width;
+
+ if (ret_width)
+ *ret_width = width;
+ if (ret_height)
+ *ret_height = height;
+ if (have_soft && soft)
+ *have_soft = true;
+
+ return truncation_applied;
+}
+
+static char *align_string_mem(const char *str, const char *url, size_t new_length, unsigned percent) {
+ size_t w = 0, space, lspace, old_length, clickable_length;
+ _cleanup_free_ char *clickable = NULL;
+ const char *p;
+ char *ret;
+ int r;
+
+ /* As with ellipsize_mem(), 'old_length' is a byte size while 'new_length' is a width in character cells */
+
+ assert(str);
+ assert(percent <= 100);
+
+ old_length = strlen(str);
+
+ if (url) {
+ r = terminal_urlify(url, str, &clickable);
+ if (r < 0)
+ return NULL;
+
+ clickable_length = strlen(clickable);
+ } else
+ clickable_length = old_length;
+
+ /* Determine current width on screen */
+ p = str;
+ while (p < str + old_length) {
+ char32_t c;
+
+ if (utf8_encoded_to_unichar(p, &c) < 0) {
+ p++, w++; /* count invalid chars as 1 */
+ continue;
+ }
+
+ p = utf8_next_char(p);
+ w += unichar_iswide(c) ? 2 : 1;
+ }
+
+ /* Already wider than the target, if so, don't do anything */
+ if (w >= new_length)
+ return clickable ? TAKE_PTR(clickable) : strdup(str);
+
+ /* How much spaces shall we add? An how much on the left side? */
+ space = new_length - w;
+ lspace = space * percent / 100U;
+
+ ret = new(char, space + clickable_length + 1);
+ if (!ret)
+ return NULL;
+
+ for (size_t i = 0; i < lspace; i++)
+ ret[i] = ' ';
+ memcpy(ret + lspace, clickable ?: str, clickable_length);
+ for (size_t i = lspace + clickable_length; i < space + clickable_length; i++)
+ ret[i] = ' ';
+
+ ret[space + clickable_length] = 0;
+ return ret;
+}
+
+static bool table_data_isempty(TableData *d) {
+ assert(d);
+
+ if (d->type == TABLE_EMPTY)
+ return true;
+
+ /* Let's also consider an empty strv as truly empty. */
+ if (IN_SET(d->type, TABLE_STRV, TABLE_STRV_WRAPPED))
+ return strv_isempty(d->strv);
+
+ /* Note that an empty string we do not consider empty here! */
+ return false;
+}
+
+static const char* table_data_color(TableData *d) {
+ assert(d);
+
+ if (d->color)
+ return d->color;
+
+ /* Let's implicitly color all "empty" cells in grey, in case an "empty_string" is set that is not empty */
+ if (table_data_isempty(d))
+ return ansi_grey();
+
+ return NULL;
+}
+
+static const char* table_data_rgap_color(TableData *d) {
+ assert(d);
+
+ if (d->rgap_color)
+ return d->rgap_color;
+
+ return NULL;
+}
+
+int table_print(Table *t, FILE *f) {
+ size_t n_rows, *minimum_width, *maximum_width, display_columns, *requested_width,
+ table_minimum_width, table_maximum_width, table_requested_width, table_effective_width,
+ *width = NULL;
+ _cleanup_free_ size_t *sorted = NULL;
+ uint64_t *column_weight, weight_sum;
+ int r;
+
+ assert(t);
+
+ if (!f)
+ f = stdout;
+
+ /* Ensure we have no incomplete rows */
+ assert(t->n_cells % t->n_columns == 0);
+
+ n_rows = t->n_cells / t->n_columns;
+ assert(n_rows > 0); /* at least the header row must be complete */
+
+ if (t->sort_map) {
+ /* If sorting is requested, let's calculate an index table we use to lookup the actual index to display with. */
+
+ sorted = new(size_t, n_rows);
+ if (!sorted)
+ return -ENOMEM;
+
+ for (size_t i = 0; i < n_rows; i++)
+ sorted[i] = i * t->n_columns;
+
+ typesafe_qsort_r(sorted, n_rows, table_data_compare, t);
+ }
+
+ if (t->display_map)
+ display_columns = t->n_display_map;
+ else
+ display_columns = t->n_columns;
+
+ assert(display_columns > 0);
+
+ minimum_width = newa(size_t, display_columns);
+ maximum_width = newa(size_t, display_columns);
+ requested_width = newa(size_t, display_columns);
+ column_weight = newa0(uint64_t, display_columns);
+
+ for (size_t j = 0; j < display_columns; j++) {
+ minimum_width[j] = 1;
+ maximum_width[j] = (size_t) -1;
+ }
+
+ for (unsigned pass = 0; pass < 2; pass++) {
+ /* First pass: determine column sizes */
+
+ for (size_t j = 0; j < display_columns; j++)
+ requested_width[j] = (size_t) -1;
+
+ bool any_soft = false;
+
+ for (size_t i = t->header ? 0 : 1; i < n_rows; i++) {
+ TableData **row;
+
+ /* Note that we don't care about ordering at this time, as we just want to determine column sizes,
+ * hence we don't care for sorted[] during the first pass. */
+ row = t->data + i * t->n_columns;
+
+ for (size_t j = 0; j < display_columns; j++) {
+ TableData *d;
+ size_t req_width, req_height;
+
+ assert_se(d = row[t->display_map ? t->display_map[j] : j]);
+
+ r = table_data_requested_width_height(t, d,
+ width ? width[j] : SIZE_MAX,
+ &req_width, &req_height, &any_soft);
+ if (r < 0)
+ return r;
+ if (r > 0) { /* Truncated because too many lines? */
+ _cleanup_free_ char *last = NULL;
+ const char *field;
+
+ /* If we are going to show only the first few lines of a cell that has
+ * multiple make sure that we have enough space horizontally to show an
+ * ellipsis. Hence, let's figure out the last line, and account for its
+ * length plus ellipsis. */
+
+ field = table_data_format(t, d, false,
+ width ? width[j] : SIZE_MAX,
+ &any_soft);
+ if (!field)
+ return -ENOMEM;
+
+ assert_se(t->cell_height_max > 0);
+ r = string_extract_line(field, t->cell_height_max-1, &last);
+ if (r < 0)
+ return r;
+
+ req_width = MAX(req_width,
+ utf8_console_width(last) +
+ utf8_console_width(special_glyph(SPECIAL_GLYPH_ELLIPSIS)));
+ }
+
+ /* Determine the biggest width that any cell in this column would like to have */
+ if (requested_width[j] == (size_t) -1 ||
+ requested_width[j] < req_width)
+ requested_width[j] = req_width;
+
+ /* Determine the minimum width any cell in this column needs */
+ if (minimum_width[j] < d->minimum_width)
+ minimum_width[j] = d->minimum_width;
+
+ /* Determine the maximum width any cell in this column needs */
+ if (d->maximum_width != (size_t) -1 &&
+ (maximum_width[j] == (size_t) -1 ||
+ maximum_width[j] > d->maximum_width))
+ maximum_width[j] = d->maximum_width;
+
+ /* Determine the full columns weight */
+ column_weight[j] += d->weight;
+ }
+ }
+
+ /* One space between each column */
+ table_requested_width = table_minimum_width = table_maximum_width = display_columns - 1;
+
+ /* Calculate the total weight for all columns, plus the minimum, maximum and requested width for the table. */
+ weight_sum = 0;
+ for (size_t j = 0; j < display_columns; j++) {
+ weight_sum += column_weight[j];
+
+ table_minimum_width += minimum_width[j];
+
+ if (maximum_width[j] == (size_t) -1)
+ table_maximum_width = (size_t) -1;
+ else
+ table_maximum_width += maximum_width[j];
+
+ table_requested_width += requested_width[j];
+ }
+
+ /* Calculate effective table width */
+ if (t->width != 0 && t->width != (size_t) -1)
+ table_effective_width = t->width;
+ else if (t->width == 0 ||
+ ((pass > 0 || !any_soft) && (pager_have() || !isatty(STDOUT_FILENO))))
+ table_effective_width = table_requested_width;
+ else
+ table_effective_width = MIN(table_requested_width, columns());
+
+ if (table_maximum_width != (size_t) -1 && table_effective_width > table_maximum_width)
+ table_effective_width = table_maximum_width;
+
+ if (table_effective_width < table_minimum_width)
+ table_effective_width = table_minimum_width;
+
+ if (!width)
+ width = newa(size_t, display_columns);
+
+ if (table_effective_width >= table_requested_width) {
+ size_t extra;
+
+ /* We have extra room, let's distribute it among columns according to their weights. We first provide
+ * each column with what it asked for and the distribute the rest. */
+
+ extra = table_effective_width - table_requested_width;
+
+ for (size_t j = 0; j < display_columns; j++) {
+ size_t delta;
+
+ if (weight_sum == 0)
+ width[j] = requested_width[j] + extra / (display_columns - j); /* Avoid division by zero */
+ else
+ width[j] = requested_width[j] + (extra * column_weight[j]) / weight_sum;
+
+ if (maximum_width[j] != (size_t) -1 && width[j] > maximum_width[j])
+ width[j] = maximum_width[j];
+
+ if (width[j] < minimum_width[j])
+ width[j] = minimum_width[j];
+
+ assert(width[j] >= requested_width[j]);
+ delta = width[j] - requested_width[j];
+
+ /* Subtract what we just added from the rest */
+ if (extra > delta)
+ extra -= delta;
+ else
+ extra = 0;
+
+ assert(weight_sum >= column_weight[j]);
+ weight_sum -= column_weight[j];
+ }
+
+ break; /* Every column should be happy, no need to repeat calculations. */
+ } else {
+ /* We need to compress the table, columns can't get what they asked for. We first provide each column
+ * with the minimum they need, and then distribute anything left. */
+ bool finalize = false;
+ size_t extra;
+
+ extra = table_effective_width - table_minimum_width;
+
+ for (size_t j = 0; j < display_columns; j++)
+ width[j] = (size_t) -1;
+
+ for (;;) {
+ bool restart = false;
+
+ for (size_t j = 0; j < display_columns; j++) {
+ size_t delta, w;
+
+ /* Did this column already get something assigned? If so, let's skip to the next */
+ if (width[j] != (size_t) -1)
+ continue;
+
+ if (weight_sum == 0)
+ w = minimum_width[j] + extra / (display_columns - j); /* avoid division by zero */
+ else
+ w = minimum_width[j] + (extra * column_weight[j]) / weight_sum;
+
+ if (w >= requested_width[j]) {
+ /* Never give more than requested. If we hit a column like this, there's more
+ * space to allocate to other columns which means we need to restart the
+ * iteration. However, if we hit a column like this, let's assign it the space
+ * it wanted for good early.*/
+
+ w = requested_width[j];
+ restart = true;
+
+ } else if (!finalize)
+ continue;
+
+ width[j] = w;
+
+ assert(w >= minimum_width[j]);
+ delta = w - minimum_width[j];
+
+ assert(delta <= extra);
+ extra -= delta;
+
+ assert(weight_sum >= column_weight[j]);
+ weight_sum -= column_weight[j];
+
+ if (restart && !finalize)
+ break;
+ }
+
+ if (finalize)
+ break;
+
+ if (!restart)
+ finalize = true;
+ }
+
+ if (!any_soft) /* Some columns got less than requested. If some cells were "soft",
+ * let's try to reformat them with the new widths. Otherwise, let's
+ * move on. */
+ break;
+ }
+ }
+
+ /* Second pass: show output */
+ for (size_t i = t->header ? 0 : 1; i < n_rows; i++) {
+ size_t n_subline = 0;
+ bool more_sublines;
+ TableData **row;
+
+ if (sorted)
+ row = t->data + sorted[i];
+ else
+ row = t->data + i * t->n_columns;
+
+ do {
+ const char *gap_color = NULL;
+ more_sublines = false;
+
+ for (size_t j = 0; j < display_columns; j++) {
+ _cleanup_free_ char *buffer = NULL, *extracted = NULL;
+ bool lines_truncated = false;
+ const char *field, *color = NULL;
+ TableData *d;
+ size_t l;
+
+ assert_se(d = row[t->display_map ? t->display_map[j] : j]);
+
+ field = table_data_format(t, d, false, width[j], NULL);
+ if (!field)
+ return -ENOMEM;
+
+ r = string_extract_line(field, n_subline, &extracted);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* There are more lines to come */
+ if ((t->cell_height_max == (size_t) -1 || n_subline + 1 < t->cell_height_max))
+ more_sublines = true; /* There are more lines to come */
+ else
+ lines_truncated = true;
+ }
+ if (extracted)
+ field = extracted;
+
+ l = utf8_console_width(field);
+ if (l > width[j]) {
+ /* Field is wider than allocated space. Let's ellipsize */
+
+ buffer = ellipsize(field, width[j], /* ellipsize at the end if we truncated coming lines, otherwise honour configuration */
+ lines_truncated ? 100 : d->ellipsize_percent);
+ if (!buffer)
+ return -ENOMEM;
+
+ field = buffer;
+ } else {
+ if (lines_truncated) {
+ _cleanup_free_ char *padded = NULL;
+
+ /* We truncated more lines of this cell, let's add an
+ * ellipsis. We first append it, but that might make our
+ * string grow above what we have space for, hence ellipsize
+ * right after. This will truncate the ellipsis and add a new
+ * one. */
+
+ padded = strjoin(field, special_glyph(SPECIAL_GLYPH_ELLIPSIS));
+ if (!padded)
+ return -ENOMEM;
+
+ buffer = ellipsize(padded, width[j], 100);
+ if (!buffer)
+ return -ENOMEM;
+
+ field = buffer;
+ l = utf8_console_width(field);
+ }
+
+ if (l < width[j]) {
+ _cleanup_free_ char *aligned = NULL;
+ /* Field is shorter than allocated space. Let's align with spaces */
+
+ aligned = align_string_mem(field, d->url, width[j], d->align_percent);
+ if (!aligned)
+ return -ENOMEM;
+
+ free_and_replace(buffer, aligned);
+ field = buffer;
+ }
+ }
+
+ if (l >= width[j] && d->url) {
+ _cleanup_free_ char *clickable = NULL;
+
+ r = terminal_urlify(d->url, field, &clickable);
+ if (r < 0)
+ return r;
+
+ free_and_replace(buffer, clickable);
+ field = buffer;
+ }
+
+ if (colors_enabled()) {
+ if (gap_color)
+ fputs(gap_color, f);
+ else if (row == t->data) /* underline header line fully, including the column separator */
+ fputs(ansi_underline(), f);
+ }
+
+ if (j > 0)
+ fputc(' ', f); /* column separator left of cell */
+
+ if (colors_enabled()) {
+ color = table_data_color(d);
+
+ /* Undo gap color */
+ if (gap_color || (color && row == t->data))
+ fputs(ANSI_NORMAL, f);
+
+ if (color)
+ fputs(color, f);
+ else if (gap_color && row == t->data) /* underline header line cell */
+ fputs(ansi_underline(), f);
+ }
+
+ fputs(field, f);
+
+ if (colors_enabled() && (color || row == t->data))
+ fputs(ANSI_NORMAL, f);
+
+ gap_color = table_data_rgap_color(d);
+ }
+
+ fputc('\n', f);
+ n_subline ++;
+ } while (more_sublines);
+ }
+
+ return fflush_and_check(f);
+}
+
+int table_format(Table *t, char **ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char *buf = NULL;
+ size_t sz = 0;
+ int r;
+
+ f = open_memstream_unlocked(&buf, &sz);
+ if (!f)
+ return -ENOMEM;
+
+ r = table_print(t, f);
+ if (r < 0)
+ return r;
+
+ f = safe_fclose(f);
+
+ *ret = buf;
+
+ return 0;
+}
+
+size_t table_get_rows(Table *t) {
+ if (!t)
+ return 0;
+
+ assert(t->n_columns > 0);
+ return t->n_cells / t->n_columns;
+}
+
+size_t table_get_columns(Table *t) {
+ if (!t)
+ return 0;
+
+ assert(t->n_columns > 0);
+ return t->n_columns;
+}
+
+int table_set_reverse(Table *t, size_t column, bool b) {
+ assert(t);
+ assert(column < t->n_columns);
+
+ if (!t->reverse_map) {
+ if (!b)
+ return 0;
+
+ t->reverse_map = new0(bool, t->n_columns);
+ if (!t->reverse_map)
+ return -ENOMEM;
+ }
+
+ t->reverse_map[column] = b;
+ return 0;
+}
+
+TableCell *table_get_cell(Table *t, size_t row, size_t column) {
+ size_t i;
+
+ assert(t);
+
+ if (column >= t->n_columns)
+ return NULL;
+
+ i = row * t->n_columns + column;
+ if (i >= t->n_cells)
+ return NULL;
+
+ return TABLE_INDEX_TO_CELL(i);
+}
+
+const void *table_get(Table *t, TableCell *cell) {
+ TableData *d;
+
+ assert(t);
+
+ d = table_get_data(t, cell);
+ if (!d)
+ return NULL;
+
+ return d->data;
+}
+
+const void* table_get_at(Table *t, size_t row, size_t column) {
+ TableCell *cell;
+
+ cell = table_get_cell(t, row, column);
+ if (!cell)
+ return NULL;
+
+ return table_get(t, cell);
+}
+
+static int table_data_to_json(TableData *d, JsonVariant **ret) {
+
+ switch (d->type) {
+
+ case TABLE_EMPTY:
+ return json_variant_new_null(ret);
+
+ case TABLE_STRING:
+ case TABLE_PATH:
+ return json_variant_new_string(ret, d->string);
+
+ case TABLE_STRV:
+ case TABLE_STRV_WRAPPED:
+ return json_variant_new_array_strv(ret, d->strv);
+
+ case TABLE_BOOLEAN:
+ return json_variant_new_boolean(ret, d->boolean);
+
+ case TABLE_TIMESTAMP:
+ case TABLE_TIMESTAMP_UTC:
+ case TABLE_TIMESTAMP_RELATIVE:
+ if (d->timestamp == USEC_INFINITY)
+ return json_variant_new_null(ret);
+
+ return json_variant_new_unsigned(ret, d->timestamp);
+
+ case TABLE_TIMESPAN:
+ case TABLE_TIMESPAN_MSEC:
+ if (d->timespan == USEC_INFINITY)
+ return json_variant_new_null(ret);
+
+ return json_variant_new_unsigned(ret, d->timespan);
+
+ case TABLE_SIZE:
+ case TABLE_BPS:
+ if (d->size == (uint64_t) -1)
+ return json_variant_new_null(ret);
+
+ return json_variant_new_unsigned(ret, d->size);
+
+ case TABLE_INT:
+ return json_variant_new_integer(ret, d->int_val);
+
+ case TABLE_INT8:
+ return json_variant_new_integer(ret, d->int8);
+
+ case TABLE_INT16:
+ return json_variant_new_integer(ret, d->int16);
+
+ case TABLE_INT32:
+ return json_variant_new_integer(ret, d->int32);
+
+ case TABLE_INT64:
+ return json_variant_new_integer(ret, d->int64);
+
+ case TABLE_UINT:
+ return json_variant_new_unsigned(ret, d->uint_val);
+
+ case TABLE_UINT8:
+ return json_variant_new_unsigned(ret, d->uint8);
+
+ case TABLE_UINT16:
+ return json_variant_new_unsigned(ret, d->uint16);
+
+ case TABLE_UINT32:
+ return json_variant_new_unsigned(ret, d->uint32);
+
+ case TABLE_UINT64:
+ return json_variant_new_unsigned(ret, d->uint64);
+
+ case TABLE_PERCENT:
+ return json_variant_new_integer(ret, d->percent);
+
+ case TABLE_IFINDEX:
+ return json_variant_new_integer(ret, d->ifindex);
+
+ case TABLE_IN_ADDR:
+ return json_variant_new_array_bytes(ret, &d->address, FAMILY_ADDRESS_SIZE(AF_INET));
+
+ case TABLE_IN6_ADDR:
+ return json_variant_new_array_bytes(ret, &d->address, FAMILY_ADDRESS_SIZE(AF_INET6));
+
+ case TABLE_ID128: {
+ char buf[SD_ID128_STRING_MAX];
+ return json_variant_new_string(ret, sd_id128_to_string(d->id128, buf));
+ }
+
+ case TABLE_UUID: {
+ char buf[ID128_UUID_STRING_MAX];
+ return json_variant_new_string(ret, id128_to_uuid_string(d->id128, buf));
+ }
+
+ default:
+ return -EINVAL;
+ }
+}
+
+static char* string_to_json_field_name(const char *f) {
+ /* Tries to make a string more suitable as JSON field name. There are no strict rules defined what a
+ * field name can be hence this is a bit vague and black magic. Right now we only convert spaces to
+ * underscores and leave everything as is. */
+
+ char *c = strdup(f);
+ if (!c)
+ return NULL;
+
+ for (char *x = c; *x; x++)
+ if (isspace(*x))
+ *x = '_';
+
+ return c;
+}
+
+int table_to_json(Table *t, JsonVariant **ret) {
+ JsonVariant **rows = NULL, **elements = NULL;
+ _cleanup_free_ size_t *sorted = NULL;
+ size_t n_rows, display_columns;
+ int r;
+
+ assert(t);
+
+ /* Ensure we have no incomplete rows */
+ assert(t->n_cells % t->n_columns == 0);
+
+ n_rows = t->n_cells / t->n_columns;
+ assert(n_rows > 0); /* at least the header row must be complete */
+
+ if (t->sort_map) {
+ /* If sorting is requested, let's calculate an index table we use to lookup the actual index to display with. */
+
+ sorted = new(size_t, n_rows);
+ if (!sorted) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ for (size_t i = 0; i < n_rows; i++)
+ sorted[i] = i * t->n_columns;
+
+ typesafe_qsort_r(sorted, n_rows, table_data_compare, t);
+ }
+
+ if (t->display_map)
+ display_columns = t->n_display_map;
+ else
+ display_columns = t->n_columns;
+ assert(display_columns > 0);
+
+ elements = new0(JsonVariant*, display_columns * 2);
+ if (!elements) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ for (size_t j = 0; j < display_columns; j++) {
+ _cleanup_free_ char *mangled = NULL;
+ const char *formatted;
+ TableData *d;
+
+ assert_se(d = t->data[t->display_map ? t->display_map[j] : j]);
+
+ /* Field names must be strings, hence format whatever we got here as a string first */
+ formatted = table_data_format(t, d, true, SIZE_MAX, NULL);
+ if (!formatted) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ /* Arbitrary strings suck as field names, try to mangle them into something more suitable hence */
+ mangled = string_to_json_field_name(formatted);
+ if (!mangled) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ r = json_variant_new_string(elements + j*2, mangled);
+ if (r < 0)
+ goto finish;
+ }
+
+ rows = new0(JsonVariant*, n_rows-1);
+ if (!rows) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ for (size_t i = 1; i < n_rows; i++) {
+ TableData **row;
+
+ if (sorted)
+ row = t->data + sorted[i];
+ else
+ row = t->data + i * t->n_columns;
+
+ for (size_t j = 0; j < display_columns; j++) {
+ TableData *d;
+ size_t k;
+
+ assert_se(d = row[t->display_map ? t->display_map[j] : j]);
+
+ k = j*2+1;
+ elements[k] = json_variant_unref(elements[k]);
+
+ r = table_data_to_json(d, elements + k);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = json_variant_new_object(rows + i - 1, elements, display_columns * 2);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = json_variant_new_array(ret, rows, n_rows - 1);
+
+finish:
+ if (rows) {
+ json_variant_unref_many(rows, n_rows-1);
+ free(rows);
+ }
+
+ if (elements) {
+ json_variant_unref_many(elements, display_columns*2);
+ free(elements);
+ }
+
+ return r;
+}
+
+int table_print_json(Table *t, FILE *f, JsonFormatFlags flags) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ int r;
+
+ assert(t);
+
+ if (!f)
+ f = stdout;
+
+ r = table_to_json(t, &v);
+ if (r < 0)
+ return r;
+
+ json_variant_dump(v, flags, f, NULL);
+
+ return fflush_and_check(f);
+}
diff --git a/src/shared/format-table.h b/src/shared/format-table.h
new file mode 100644
index 0000000..965549b
--- /dev/null
+++ b/src/shared/format-table.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "json.h"
+#include "macro.h"
+
+typedef enum TableDataType {
+ TABLE_EMPTY,
+ TABLE_STRING,
+ TABLE_STRV,
+ TABLE_STRV_WRAPPED,
+ TABLE_PATH,
+ TABLE_BOOLEAN,
+ TABLE_TIMESTAMP,
+ TABLE_TIMESTAMP_UTC,
+ TABLE_TIMESTAMP_RELATIVE,
+ TABLE_TIMESPAN,
+ TABLE_TIMESPAN_MSEC,
+ TABLE_SIZE,
+ TABLE_BPS,
+ TABLE_INT,
+ TABLE_INT8,
+ TABLE_INT16,
+ TABLE_INT32,
+ TABLE_INT64,
+ TABLE_UINT,
+ TABLE_UINT8,
+ TABLE_UINT16,
+ TABLE_UINT32,
+ TABLE_UINT64,
+ TABLE_PERCENT,
+ TABLE_IFINDEX,
+ TABLE_IN_ADDR, /* Takes a union in_addr_union (or a struct in_addr) */
+ TABLE_IN6_ADDR, /* Takes a union in_addr_union (or a struct in6_addr) */
+ TABLE_ID128,
+ TABLE_UUID,
+ _TABLE_DATA_TYPE_MAX,
+
+ /* The following are not really data types, but commands for table_add_cell_many() to make changes to
+ * a cell just added. */
+ TABLE_SET_MINIMUM_WIDTH,
+ TABLE_SET_MAXIMUM_WIDTH,
+ TABLE_SET_WEIGHT,
+ TABLE_SET_ALIGN_PERCENT,
+ TABLE_SET_ELLIPSIZE_PERCENT,
+ TABLE_SET_COLOR,
+ TABLE_SET_RGAP_COLOR,
+ TABLE_SET_BOTH_COLORS,
+ TABLE_SET_URL,
+ TABLE_SET_UPPERCASE,
+
+ _TABLE_DATA_TYPE_INVALID = -1,
+} TableDataType;
+
+/* PIDs are just 32bit signed integers on Linux */
+#define TABLE_PID TABLE_INT32
+assert_cc(sizeof(pid_t) == sizeof(int32_t));
+
+/* UIDs/GIDs are just 32bit unsigned integers on Linux */
+#define TABLE_UID TABLE_UINT32
+#define TABLE_GID TABLE_UINT32
+assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+assert_cc(sizeof(gid_t) == sizeof(uint32_t));
+
+typedef struct Table Table;
+typedef struct TableCell TableCell;
+
+Table *table_new_internal(const char *first_header, ...) _sentinel_;
+#define table_new(...) table_new_internal(__VA_ARGS__, NULL)
+Table *table_new_raw(size_t n_columns);
+Table *table_unref(Table *t);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Table*, table_unref);
+
+int table_add_cell_full(Table *t, TableCell **ret_cell, TableDataType type, const void *data, size_t minimum_width, size_t maximum_width, unsigned weight, unsigned align_percent, unsigned ellipsize_percent);
+static inline int table_add_cell(Table *t, TableCell **ret_cell, TableDataType type, const void *data) {
+ return table_add_cell_full(t, ret_cell, type, data, (size_t) -1, (size_t) -1, (unsigned) -1, (unsigned) -1, (unsigned) -1);
+}
+int table_add_cell_stringf(Table *t, TableCell **ret_cell, const char *format, ...) _printf_(3, 4);
+
+int table_fill_empty(Table *t, size_t until_column);
+
+int table_dup_cell(Table *t, TableCell *cell);
+
+int table_set_minimum_width(Table *t, TableCell *cell, size_t minimum_width);
+int table_set_maximum_width(Table *t, TableCell *cell, size_t maximum_width);
+int table_set_weight(Table *t, TableCell *cell, unsigned weight);
+int table_set_align_percent(Table *t, TableCell *cell, unsigned percent);
+int table_set_ellipsize_percent(Table *t, TableCell *cell, unsigned percent);
+int table_set_color(Table *t, TableCell *cell, const char *color);
+int table_set_rgap_color(Table *t, TableCell *cell, const char *color);
+int table_set_url(Table *t, TableCell *cell, const char *url);
+int table_set_uppercase(Table *t, TableCell *cell, bool b);
+
+int table_update(Table *t, TableCell *cell, TableDataType type, const void *data);
+
+int table_add_many_internal(Table *t, TableDataType first_type, ...);
+#define table_add_many(t, ...) table_add_many_internal(t, __VA_ARGS__, _TABLE_DATA_TYPE_MAX)
+
+void table_set_header(Table *table, bool b);
+void table_set_width(Table *t, size_t width);
+void table_set_cell_height_max(Table *t, size_t height);
+int table_set_empty_string(Table *t, const char *empty);
+int table_set_display_all(Table *t);
+int table_set_display(Table *t, size_t first_column, ...);
+int table_set_sort(Table *t, size_t first_column, ...);
+int table_set_reverse(Table *t, size_t column, bool b);
+int table_hide_column_from_display(Table *t, size_t column);
+
+int table_print(Table *t, FILE *f);
+int table_format(Table *t, char **ret);
+
+static inline TableCell* TABLE_HEADER_CELL(size_t i) {
+ return SIZE_TO_PTR(i + 1);
+}
+
+size_t table_get_rows(Table *t);
+size_t table_get_columns(Table *t);
+
+TableCell *table_get_cell(Table *t, size_t row, size_t column);
+
+const void *table_get(Table *t, TableCell *cell);
+const void *table_get_at(Table *t, size_t row, size_t column);
+
+int table_to_json(Table *t, JsonVariant **ret);
+int table_print_json(Table *t, FILE *f, JsonFormatFlags json_flags);
+
+#define table_log_add_error(r) \
+ log_error_errno(r, "Failed to add cell(s) to table: %m")
+
+#define table_log_print_error(r) \
+ log_error_errno(r, "Failed to print table: %m")
+
+#define table_log_sort_error(r) \
+ log_error_errno(r, "Failed to sort table: %m")
diff --git a/src/shared/fsck-util.h b/src/shared/fsck-util.h
new file mode 100644
index 0000000..855137c
--- /dev/null
+++ b/src/shared/fsck-util.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/* exit codes as defined in fsck(8) */
+enum {
+ FSCK_SUCCESS = 0,
+ FSCK_ERROR_CORRECTED = 1 << 0,
+ FSCK_SYSTEM_SHOULD_REBOOT = 1 << 1,
+ FSCK_ERRORS_LEFT_UNCORRECTED = 1 << 2,
+ FSCK_OPERATIONAL_ERROR = 1 << 3,
+ FSCK_USAGE_OR_SYNTAX_ERROR = 1 << 4,
+ FSCK_USER_CANCELLED = 1 << 5,
+ FSCK_SHARED_LIB_ERROR = 1 << 7,
+};
diff --git a/src/shared/fstab-util.c b/src/shared/fstab-util.c
new file mode 100644
index 0000000..292b97c
--- /dev/null
+++ b/src/shared/fstab-util.c
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "device-nodes.h"
+#include "fstab-util.h"
+#include "macro.h"
+#include "mount-util.h"
+#include "nulstr-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+int fstab_has_fstype(const char *fstype) {
+ _cleanup_endmntent_ FILE *f = NULL;
+ struct mntent *m;
+
+ f = setmntent(fstab_path(), "re");
+ if (!f)
+ return errno == ENOENT ? false : -errno;
+
+ for (;;) {
+ errno = 0;
+ m = getmntent(f);
+ if (!m)
+ return errno != 0 ? -errno : false;
+
+ if (streq(m->mnt_type, fstype))
+ return true;
+ }
+ return false;
+}
+
+bool fstab_is_extrinsic(const char *mount, const char *opts) {
+
+ /* Don't bother with the OS data itself */
+ if (PATH_IN_SET(mount,
+ "/",
+ "/usr",
+ "/etc"))
+ return true;
+
+ if (PATH_STARTSWITH_SET(mount,
+ "/run/initramfs", /* This should stay around from before we boot until after we shutdown */
+ "/proc", /* All of this is API VFS */
+ "/sys", /* … dito … */
+ "/dev")) /* … dito … */
+ return true;
+
+ /* If this is an initrd mount, and we are not in the initrd, then leave
+ * this around forever, too. */
+ if (opts && fstab_test_option(opts, "x-initrd.mount\0") && !in_initrd())
+ return true;
+
+ return false;
+}
+
+int fstab_is_mount_point(const char *mount) {
+ _cleanup_endmntent_ FILE *f = NULL;
+ struct mntent *m;
+
+ f = setmntent(fstab_path(), "re");
+ if (!f)
+ return errno == ENOENT ? false : -errno;
+
+ for (;;) {
+ errno = 0;
+ m = getmntent(f);
+ if (!m)
+ return errno != 0 ? -errno : false;
+
+ if (path_equal(m->mnt_dir, mount))
+ return true;
+ }
+ return false;
+}
+
+int fstab_filter_options(const char *opts, const char *names,
+ const char **ret_namefound, char **ret_value, char **ret_filtered) {
+ const char *name, *namefound = NULL, *x;
+ _cleanup_strv_free_ char **stor = NULL;
+ _cleanup_free_ char *v = NULL, **strv = NULL;
+ int r;
+
+ assert(names && *names);
+
+ if (!opts)
+ goto answer;
+
+ /* If !ret_value and !ret_filtered, this function is not allowed to fail. */
+
+ if (!ret_filtered) {
+ for (const char *word = opts;;) {
+ const char *end = word;
+
+ /* Look for an *non-escaped* comma separator. Only commas can be escaped, so "\," is
+ * the only valid escape sequence, so we can do a very simple test here. */
+ for (;;) {
+ size_t n = strcspn(end, ",");
+
+ end += n;
+ if (n > 0 && end[-1] == '\\')
+ end++;
+ else
+ break;
+ }
+
+ NULSTR_FOREACH(name, names) {
+ if (end < word + strlen(name))
+ continue;
+ if (!strneq(word, name, strlen(name)))
+ continue;
+
+ /* We know that the string is NUL terminated, so *x is valid */
+ x = word + strlen(name);
+ if (IN_SET(*x, '\0', '=', ',')) {
+ namefound = name;
+ if (ret_value) {
+ bool eq = *x == '=';
+ assert(eq || IN_SET(*x, ',', '\0'));
+
+ r = free_and_strndup(&v,
+ eq ? x + 1 : NULL,
+ eq ? end - x - 1 : 0);
+ if (r < 0)
+ return r;
+ }
+
+ break;
+ }
+ }
+
+ if (*end)
+ word = end + 1;
+ else
+ break;
+ }
+ } else {
+ r = strv_split_full(&stor, opts, ",", EXTRACT_DONT_COALESCE_SEPARATORS | EXTRACT_UNESCAPE_SEPARATORS);
+ if (r < 0)
+ return r;
+
+ strv = memdup(stor, sizeof(char*) * (strv_length(stor) + 1));
+ if (!strv)
+ return -ENOMEM;
+
+ char **t = strv;
+ for (char **s = strv; *s; s++) {
+ NULSTR_FOREACH(name, names) {
+ x = startswith(*s, name);
+ if (x && IN_SET(*x, '\0', '='))
+ goto found;
+ }
+
+ *t = *s;
+ t++;
+ continue;
+ found:
+ /* Keep the last occurrence found */
+ namefound = name;
+ if (ret_value) {
+ assert(IN_SET(*x, '=', '\0'));
+ r = free_and_strdup(&v, *x == '=' ? x + 1 : NULL);
+ if (r < 0)
+ return r;
+ }
+ }
+ *t = NULL;
+ }
+
+answer:
+ if (ret_namefound)
+ *ret_namefound = namefound;
+ if (ret_filtered) {
+ char *f;
+
+ f = strv_join_full(strv, ",", NULL, true);
+ if (!f)
+ return -ENOMEM;
+
+ *ret_filtered = f;
+ }
+ if (ret_value)
+ *ret_value = TAKE_PTR(v);
+
+ return !!namefound;
+}
+
+int fstab_extract_values(const char *opts, const char *name, char ***values) {
+ _cleanup_strv_free_ char **optsv = NULL, **res = NULL;
+ char **s;
+
+ assert(opts);
+ assert(name);
+ assert(values);
+
+ optsv = strv_split(opts, ",");
+ if (!optsv)
+ return -ENOMEM;
+
+ STRV_FOREACH(s, optsv) {
+ char *arg;
+ int r;
+
+ arg = startswith(*s, name);
+ if (!arg || *arg != '=')
+ continue;
+ r = strv_extend(&res, arg + 1);
+ if (r < 0)
+ return r;
+ }
+
+ *values = TAKE_PTR(res);
+
+ return !!*values;
+}
+
+int fstab_find_pri(const char *options, int *ret) {
+ _cleanup_free_ char *opt = NULL;
+ int r, pri;
+
+ assert(ret);
+
+ r = fstab_filter_options(options, "pri\0", NULL, &opt, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0 || !opt)
+ return 0;
+
+ r = safe_atoi(opt, &pri);
+ if (r < 0)
+ return r;
+
+ *ret = pri;
+ return 1;
+}
+
+static char *unquote(const char *s, const char* quotes) {
+ size_t l;
+ assert(s);
+
+ /* This is rather stupid, simply removes the heading and
+ * trailing quotes if there is one. Doesn't care about
+ * escaping or anything.
+ *
+ * DON'T USE THIS FOR NEW CODE ANYMORE! */
+
+ l = strlen(s);
+ if (l < 2)
+ return strdup(s);
+
+ if (strchr(quotes, s[0]) && s[l-1] == s[0])
+ return strndup(s+1, l-2);
+
+ return strdup(s);
+}
+
+static char *tag_to_udev_node(const char *tagvalue, const char *by) {
+ _cleanup_free_ char *t = NULL, *u = NULL;
+ size_t enc_len;
+
+ u = unquote(tagvalue, QUOTES);
+ if (!u)
+ return NULL;
+
+ enc_len = strlen(u) * 4 + 1;
+ t = new(char, enc_len);
+ if (!t)
+ return NULL;
+
+ if (encode_devnode_name(u, t, enc_len) < 0)
+ return NULL;
+
+ return strjoin("/dev/disk/by-", by, "/", t);
+}
+
+char *fstab_node_to_udev_node(const char *p) {
+ assert(p);
+
+ if (startswith(p, "LABEL="))
+ return tag_to_udev_node(p+6, "label");
+
+ if (startswith(p, "UUID="))
+ return tag_to_udev_node(p+5, "uuid");
+
+ if (startswith(p, "PARTUUID="))
+ return tag_to_udev_node(p+9, "partuuid");
+
+ if (startswith(p, "PARTLABEL="))
+ return tag_to_udev_node(p+10, "partlabel");
+
+ return strdup(p);
+}
diff --git a/src/shared/fstab-util.h b/src/shared/fstab-util.h
new file mode 100644
index 0000000..1a602cb
--- /dev/null
+++ b/src/shared/fstab-util.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "macro.h"
+
+bool fstab_is_extrinsic(const char *mount, const char *opts);
+int fstab_is_mount_point(const char *mount);
+int fstab_has_fstype(const char *fstype);
+
+int fstab_filter_options(const char *opts, const char *names, const char **namefound, char **value, char **filtered);
+
+int fstab_extract_values(const char *opts, const char *name, char ***values);
+
+static inline bool fstab_test_option(const char *opts, const char *names) {
+ return !!fstab_filter_options(opts, names, NULL, NULL, NULL);
+}
+
+int fstab_find_pri(const char *options, int *ret);
+
+static inline bool fstab_test_yes_no_option(const char *opts, const char *yes_no) {
+ const char *opt;
+
+ /* If first name given is last, return 1.
+ * If second name given is last or neither is found, return 0. */
+
+ assert_se(fstab_filter_options(opts, yes_no, &opt, NULL, NULL) >= 0);
+
+ return opt == yes_no;
+}
+
+char *fstab_node_to_udev_node(const char *p);
+
+static inline const char* fstab_path(void) {
+ return secure_getenv("SYSTEMD_FSTAB") ?: "/etc/fstab";
+}
diff --git a/src/shared/generate-ip-protocol-list.sh b/src/shared/generate-ip-protocol-list.sh
new file mode 100755
index 0000000..3f91979
--- /dev/null
+++ b/src/shared/generate-ip-protocol-list.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+set -eu
+
+$1 -dM -include netinet/in.h - </dev/null | \
+ awk '/^#define[ \t]+IPPROTO_[^ \t]+[ \t]+[^ \t]/ { print $2; }' | \
+ sed -e 's/IPPROTO_//'
diff --git a/src/shared/generate-syscall-list.py b/src/shared/generate-syscall-list.py
new file mode 100755
index 0000000..030c3fe
--- /dev/null
+++ b/src/shared/generate-syscall-list.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+import sys
+import os
+
+s390 = 's390' in os.uname().machine
+arm = 'arm' in os.uname().machine
+
+for line in open(sys.argv[1]):
+ if line.startswith('s390_') and not s390:
+ continue
+ if line.startswith('arm_') and not arm:
+ continue
+
+ print('"{}\\0"'.format(line.strip()))
diff --git a/src/shared/generator.c b/src/shared/generator.c
new file mode 100644
index 0000000..1eccc5a
--- /dev/null
+++ b/src/shared/generator.c
@@ -0,0 +1,631 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "cgroup-util.h"
+#include "dropin.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fstab-util.h"
+#include "generator.h"
+#include "log.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "special.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "unit-name.h"
+#include "util.h"
+
+int generator_open_unit_file(
+ const char *dest,
+ const char *source,
+ const char *name,
+ FILE **file) {
+
+ const char *unit;
+ FILE *f;
+ int r;
+
+ unit = prefix_roota(dest, name);
+
+ r = fopen_unlocked(unit, "wxe", &f);
+ if (r < 0) {
+ if (source && r == -EEXIST)
+ return log_error_errno(r,
+ "Failed to create unit file %s, as it already exists. Duplicate entry in %s?",
+ unit, source);
+ else
+ return log_error_errno(r,
+ "Failed to create unit file %s: %m",
+ unit);
+ }
+
+ fprintf(f,
+ "# Automatically generated by %s\n\n",
+ program_invocation_short_name);
+
+ *file = f;
+ return 0;
+}
+
+int generator_add_symlink(const char *dir, const char *dst, const char *dep_type, const char *src) {
+ /* Adds a symlink from <dst>.<dep_type>/ to <src> (if src is absolute)
+ * or ../<src> (otherwise). */
+
+ const char *from, *to;
+
+ from = path_is_absolute(src) ? src : strjoina("../", src);
+ to = strjoina(dir, "/", dst, ".", dep_type, "/", basename(src));
+
+ mkdir_parents_label(to, 0755);
+ if (symlink(from, to) < 0)
+ if (errno != EEXIST)
+ return log_error_errno(errno, "Failed to create symlink \"%s\": %m", to);
+
+ return 0;
+}
+
+static int write_fsck_sysroot_service(const char *dir, const char *what) {
+ _cleanup_free_ char *device = NULL, *escaped = NULL, *escaped2 = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *unit;
+ int r;
+
+ escaped = specifier_escape(what);
+ if (!escaped)
+ return log_oom();
+
+ escaped2 = cescape(escaped);
+ if (!escaped2)
+ return log_oom();
+
+ unit = strjoina(dir, "/"SPECIAL_FSCK_ROOT_SERVICE);
+ log_debug("Creating %s", unit);
+
+ r = unit_name_from_path(what, ".device", &device);
+ if (r < 0)
+ return log_error_errno(r, "Failed to convert device \"%s\" to unit name: %m", what);
+
+ f = fopen(unit, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m", unit);
+
+ fprintf(f,
+ "# Automatically generated by %1$s\n\n"
+ "[Unit]\n"
+ "Description=File System Check on %2$s\n"
+ "Documentation=man:systemd-fsck-root.service(8)\n"
+ "DefaultDependencies=no\n"
+ "BindsTo=%3$s\n"
+ "Conflicts=shutdown.target\n"
+ "After=initrd-root-device.target local-fs-pre.target %3$s\n"
+ "Before=shutdown.target\n"
+ "\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "ExecStart=" SYSTEMD_FSCK_PATH " %4$s\n"
+ "TimeoutSec=0\n",
+ program_invocation_short_name,
+ escaped,
+ device,
+ escaped2);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", unit);
+
+ return 0;
+}
+
+int generator_write_fsck_deps(
+ FILE *f,
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *fstype) {
+
+ int r;
+
+ assert(f);
+ assert(dir);
+ assert(what);
+ assert(where);
+
+ if (!is_device_path(what)) {
+ log_warning("Checking was requested for \"%s\", but it is not a device.", what);
+ return 0;
+ }
+
+ if (!isempty(fstype) && !streq(fstype, "auto")) {
+ r = fsck_exists(fstype);
+ if (r < 0)
+ log_warning_errno(r, "Checking was requested for %s, but couldn't detect if fsck.%s may be used, proceeding: %m", what, fstype);
+ else if (r == 0) {
+ /* treat missing check as essentially OK */
+ log_debug("Checking was requested for %s, but fsck.%s does not exist.", what, fstype);
+ return 0;
+ }
+ }
+
+ if (path_equal(where, "/")) {
+ const char *lnk;
+
+ lnk = strjoina(dir, "/" SPECIAL_LOCAL_FS_TARGET ".wants/" SPECIAL_FSCK_ROOT_SERVICE);
+
+ (void) mkdir_parents(lnk, 0755);
+ if (symlink(SYSTEM_DATA_UNIT_PATH "/" SPECIAL_FSCK_ROOT_SERVICE, lnk) < 0)
+ return log_error_errno(errno, "Failed to create symlink %s: %m", lnk);
+
+ } else {
+ _cleanup_free_ char *_fsck = NULL;
+ const char *fsck, *dep;
+
+ if (in_initrd() && path_equal(where, "/sysroot")) {
+ r = write_fsck_sysroot_service(dir, what);
+ if (r < 0)
+ return r;
+
+ fsck = SPECIAL_FSCK_ROOT_SERVICE;
+ dep = "Requires";
+ } else {
+ /* When this is /usr, then let's add a Wants= dependency, otherwise a Requires=
+ * dependency. Why? We can't possibly unmount /usr during shutdown, but if we have a
+ * Requires= from /usr onto a fsck@.service unit and that unit is shut down, then
+ * we'd have to unmount /usr too. */
+
+ dep = !in_initrd() && path_equal(where, "/usr") ? "Wants" : "Requires";
+
+ r = unit_name_from_path_instance("systemd-fsck", what, ".service", &_fsck);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create fsck service name: %m");
+
+ fsck = _fsck;
+ }
+
+ fprintf(f,
+ "%1$s=%2$s\n"
+ "After=%2$s\n",
+ dep, fsck);
+ }
+
+ return 0;
+}
+
+int generator_write_timeouts(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *opts,
+ char **filtered) {
+
+ /* Configure how long we wait for a device that backs a mount point or a
+ * swap partition to show up. This is useful to support endless device timeouts
+ * for devices that show up only after user input, like crypto devices. */
+
+ _cleanup_free_ char *node = NULL, *unit = NULL, *timeout = NULL;
+ usec_t u;
+ int r;
+
+ r = fstab_filter_options(opts, "comment=systemd.device-timeout\0"
+ "x-systemd.device-timeout\0",
+ NULL, &timeout, filtered);
+ if (r <= 0)
+ return r;
+
+ r = parse_sec_fix_0(timeout, &u);
+ if (r < 0) {
+ log_warning("Failed to parse timeout for %s, ignoring: %s", where, timeout);
+ return 0;
+ }
+
+ node = fstab_node_to_udev_node(what);
+ if (!node)
+ return log_oom();
+ if (!is_device_path(node)) {
+ log_warning("x-systemd.device-timeout ignored for %s", what);
+ return 0;
+ }
+
+ r = unit_name_from_path(node, ".device", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path: %m");
+
+ return write_drop_in_format(dir, unit, 50, "device-timeout",
+ "# Automatically generated by %s\n"
+ "# from supplied options \"%s\"\n\n"
+ "[Unit]\n"
+ "JobRunningTimeoutSec=%s",
+ program_invocation_short_name,
+ opts,
+ timeout);
+}
+
+int generator_write_device_deps(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *opts) {
+
+ /* fstab records that specify _netdev option should apply the network
+ * ordering on the actual device depending on network connection. If we
+ * are not mounting real device (NFS, CIFS), we rely on _netdev effect
+ * on the mount unit itself. */
+
+ _cleanup_free_ char *node = NULL, *unit = NULL;
+ int r;
+
+ if (fstab_is_extrinsic(where, opts))
+ return 0;
+
+ if (!fstab_test_option(opts, "_netdev\0"))
+ return 0;
+
+ node = fstab_node_to_udev_node(what);
+ if (!node)
+ return log_oom();
+
+ /* Nothing to apply dependencies to. */
+ if (!is_device_path(node))
+ return 0;
+
+ r = unit_name_from_path(node, ".device", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ node);
+
+ /* See mount_add_default_dependencies for explanation why we create such
+ * dependencies. */
+ return write_drop_in_format(dir, unit, 50, "netdev-dependencies",
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "After=" SPECIAL_NETWORK_ONLINE_TARGET " " SPECIAL_NETWORK_TARGET "\n"
+ "Wants=" SPECIAL_NETWORK_ONLINE_TARGET "\n",
+ program_invocation_short_name);
+}
+
+int generator_write_initrd_root_device_deps(const char *dir, const char *what) {
+ _cleanup_free_ char *unit = NULL;
+ int r;
+
+ r = unit_name_from_path(what, ".device", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ what);
+
+ return write_drop_in_format(dir, SPECIAL_INITRD_ROOT_DEVICE_TARGET, 50, "root-device",
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "Requires=%s\n"
+ "After=%s",
+ program_invocation_short_name,
+ unit,
+ unit);
+}
+
+int generator_hook_up_mkswap(
+ const char *dir,
+ const char *what) {
+
+ _cleanup_free_ char *node = NULL, *unit = NULL, *escaped = NULL, *where_unit = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *unit_file;
+ int r;
+
+ node = fstab_node_to_udev_node(what);
+ if (!node)
+ return log_oom();
+
+ /* Nothing to work on. */
+ if (!is_device_path(node))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot format something that is not a device node: %s",
+ node);
+
+ r = unit_name_from_path_instance("systemd-mkswap", node, ".service", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit instance name from path \"%s\": %m",
+ node);
+
+ unit_file = prefix_roota(dir, unit);
+ log_debug("Creating %s", unit_file);
+
+ escaped = cescape(node);
+ if (!escaped)
+ return log_oom();
+
+ r = unit_name_from_path(what, ".swap", &where_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ what);
+
+ f = fopen(unit_file, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m",
+ unit_file);
+
+ fprintf(f,
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "Description=Make Swap on %%f\n"
+ "Documentation=man:systemd-mkswap@.service(8)\n"
+ "DefaultDependencies=no\n"
+ "BindsTo=%%i.device\n"
+ "Conflicts=shutdown.target\n"
+ "After=%%i.device\n"
+ "Before=shutdown.target %s\n"
+ "\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "ExecStart="SYSTEMD_MAKEFS_PATH " swap %s\n"
+ "TimeoutSec=0\n",
+ program_invocation_short_name,
+ where_unit,
+ escaped);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", unit_file);
+
+ return generator_add_symlink(dir, where_unit, "requires", unit);
+}
+
+int generator_hook_up_mkfs(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *type) {
+
+ _cleanup_free_ char *node = NULL, *unit = NULL, *escaped = NULL, *where_unit = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *unit_file;
+ int r;
+
+ node = fstab_node_to_udev_node(what);
+ if (!node)
+ return log_oom();
+
+ /* Nothing to work on. */
+ if (!is_device_path(node))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot format something that is not a device node: %s",
+ node);
+
+ if (!type || streq(type, "auto"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot format partition %s, filesystem type is not specified",
+ node);
+
+ r = unit_name_from_path_instance("systemd-makefs", node, ".service", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit instance name from path \"%s\": %m",
+ node);
+
+ unit_file = prefix_roota(dir, unit);
+ log_debug("Creating %s", unit_file);
+
+ escaped = cescape(node);
+ if (!escaped)
+ return log_oom();
+
+ r = unit_name_from_path(where, ".mount", &where_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ where);
+
+ f = fopen(unit_file, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m",
+ unit_file);
+
+ fprintf(f,
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "Description=Make File System on %%f\n"
+ "Documentation=man:systemd-makefs@.service(8)\n"
+ "DefaultDependencies=no\n"
+ "BindsTo=%%i.device\n"
+ "Conflicts=shutdown.target\n"
+ "After=%%i.device\n"
+ /* fsck might or might not be used, so let's be safe and order
+ * ourselves before both systemd-fsck@.service and the mount unit. */
+ "Before=shutdown.target systemd-fsck@%%i.service %s\n"
+ "\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "ExecStart="SYSTEMD_MAKEFS_PATH " %s %s\n"
+ "TimeoutSec=0\n",
+ program_invocation_short_name,
+ where_unit,
+ type,
+ escaped);
+ // XXX: what about local-fs-pre.target?
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", unit_file);
+
+ return generator_add_symlink(dir, where_unit, "requires", unit);
+}
+
+int generator_hook_up_growfs(
+ const char *dir,
+ const char *where,
+ const char *target) {
+
+ _cleanup_free_ char *unit = NULL, *escaped = NULL, *where_unit = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *unit_file;
+ int r;
+
+ escaped = cescape(where);
+ if (!escaped)
+ return log_oom();
+
+ r = unit_name_from_path_instance("systemd-growfs", where, ".service", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit instance name from path \"%s\": %m",
+ where);
+
+ r = unit_name_from_path(where, ".mount", &where_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ where);
+
+ unit_file = prefix_roota(dir, unit);
+ log_debug("Creating %s", unit_file);
+
+ f = fopen(unit_file, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m",
+ unit_file);
+
+ fprintf(f,
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "Description=Grow File System on %%f\n"
+ "Documentation=man:systemd-growfs@.service(8)\n"
+ "DefaultDependencies=no\n"
+ "BindsTo=%%i.mount\n"
+ "Conflicts=shutdown.target\n"
+ "After=%%i.mount\n"
+ "Before=shutdown.target %s\n",
+ program_invocation_short_name,
+ target);
+
+ if (empty_or_root(where)) /* Make sure the root fs is actually writable before we resize it */
+ fprintf(f,
+ "After=systemd-remount-fs.service\n");
+
+ fprintf(f,
+ "\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "ExecStart="SYSTEMD_GROWFS_PATH " %s\n"
+ "TimeoutSec=0\n",
+ escaped);
+
+ return generator_add_symlink(dir, where_unit, "wants", unit);
+}
+
+int generator_enable_remount_fs_service(const char *dir) {
+ /* Pull in systemd-remount-fs.service */
+ return generator_add_symlink(dir, SPECIAL_LOCAL_FS_TARGET, "wants",
+ SYSTEM_DATA_UNIT_PATH "/" SPECIAL_REMOUNT_FS_SERVICE);
+}
+
+int generator_write_blockdev_dependency(
+ FILE *f,
+ const char *what) {
+
+ _cleanup_free_ char *escaped = NULL;
+ int r;
+
+ assert(f);
+ assert(what);
+
+ if (!path_startswith(what, "/dev/"))
+ return 0;
+
+ r = unit_name_path_escape(what, &escaped);
+ if (r < 0)
+ return log_error_errno(r, "Failed to escape device node path %s: %m", what);
+
+ fprintf(f,
+ "After=blockdev@%s.target\n",
+ escaped);
+
+ return 0;
+}
+
+int generator_write_cryptsetup_unit_section(
+ FILE *f,
+ const char *source) {
+
+ assert(f);
+
+ fprintf(f,
+ "[Unit]\n"
+ "Description=Cryptography Setup for %%I\n"
+ "Documentation=man:crypttab(5) man:systemd-cryptsetup-generator(8) man:systemd-cryptsetup@.service(8)\n");
+
+ if (source)
+ fprintf(f, "SourcePath=%s\n", source);
+
+ fprintf(f,
+ "DefaultDependencies=no\n"
+ "IgnoreOnIsolate=true\n"
+ "After=cryptsetup-pre.target systemd-udevd-kernel.socket\n"
+ "Before=blockdev@dev-mapper-%%i.target\n"
+ "Wants=blockdev@dev-mapper-%%i.target\n");
+
+ return 0;
+}
+
+int generator_write_cryptsetup_service_section(
+ FILE *f,
+ const char *name,
+ const char *what,
+ const char *password,
+ const char *options) {
+
+ _cleanup_free_ char *name_escaped = NULL, *what_escaped = NULL, *password_escaped = NULL, *options_escaped = NULL;
+
+ assert(f);
+ assert(name);
+ assert(what);
+
+ name_escaped = specifier_escape(name);
+ if (!name_escaped)
+ return log_oom();
+
+ what_escaped = specifier_escape(what);
+ if (!what_escaped)
+ return log_oom();
+
+ if (password) {
+ password_escaped = specifier_escape(password);
+ if (!password_escaped)
+ return log_oom();
+ }
+
+ if (options) {
+ options_escaped = specifier_escape(options);
+ if (!options_escaped)
+ return log_oom();
+ }
+
+ fprintf(f,
+ "\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "TimeoutSec=0\n" /* The binary handles timeouts on its own */
+ "KeyringMode=shared\n" /* Make sure we can share cached keys among instances */
+ "OOMScoreAdjust=500\n" /* Unlocking can allocate a lot of memory if Argon2 is used */
+ "ExecStart=" SYSTEMD_CRYPTSETUP_PATH " attach '%s' '%s' '%s' '%s'\n"
+ "ExecStop=" SYSTEMD_CRYPTSETUP_PATH " detach '%s'\n",
+ name_escaped, what_escaped, strempty(password_escaped), strempty(options_escaped),
+ name_escaped);
+
+ return 0;
+}
+
+void log_setup_generator(void) {
+ /* Disable talking to syslog/journal (i.e. the two IPC-based loggers) if we run in system context. */
+ if (cg_pid_get_owner_uid(0, NULL) == -ENXIO /* not running in a per-user slice */)
+ log_set_prohibit_ipc(true);
+
+ log_set_target(LOG_TARGET_JOURNAL_OR_KMSG); /* This effectively means: journal for per-user generators, kmsg otherwise */
+ log_parse_environment();
+ (void) log_open();
+}
diff --git a/src/shared/generator.h b/src/shared/generator.h
new file mode 100644
index 0000000..ff6072f
--- /dev/null
+++ b/src/shared/generator.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdio.h>
+
+#include "main-func.h"
+
+int generator_open_unit_file(
+ const char *dest,
+ const char *source,
+ const char *name,
+ FILE **file);
+
+int generator_add_symlink(const char *dir, const char *dst, const char *dep_type, const char *src);
+
+int generator_write_fsck_deps(
+ FILE *f,
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *type);
+
+int generator_write_timeouts(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *opts,
+ char **filtered);
+
+int generator_write_blockdev_dependency(
+ FILE *f,
+ const char *what);
+
+int generator_write_cryptsetup_unit_section(
+ FILE *f,
+ const char *source);
+
+int generator_write_cryptsetup_service_section(
+ FILE *f,
+ const char *name,
+ const char *what,
+ const char *password,
+ const char *options);
+
+int generator_write_device_deps(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *opts);
+
+int generator_write_initrd_root_device_deps(
+ const char *dir,
+ const char *what);
+
+int generator_hook_up_mkswap(
+ const char *dir,
+ const char *what);
+int generator_hook_up_mkfs(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *type);
+int generator_hook_up_growfs(
+ const char *dir,
+ const char *where,
+ const char *target);
+
+int generator_enable_remount_fs_service(const char *dir);
+
+void log_setup_generator(void);
+
+/* Similar to DEFINE_MAIN_FUNCTION, but initializes logging and assigns positional arguments. */
+#define DEFINE_MAIN_GENERATOR_FUNCTION(impl) \
+ _DEFINE_MAIN_FUNCTION( \
+ ({ \
+ log_setup_generator(); \
+ if (argc > 1 && argc != 4) \
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), \
+ "This program takes zero or three arguments."); \
+ }), \
+ impl(argc > 1 ? argv[1] : "/tmp", \
+ argc > 1 ? argv[2] : "/tmp", \
+ argc > 1 ? argv[3] : "/tmp"), \
+ r < 0 ? EXIT_FAILURE : EXIT_SUCCESS)
diff --git a/src/shared/geneve-util.c b/src/shared/geneve-util.c
new file mode 100644
index 0000000..36ef9c8
--- /dev/null
+++ b/src/shared/geneve-util.c
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "geneve-util.h"
+#include "string-table.h"
+
+static const char* const geneve_df_table[_NETDEV_GENEVE_DF_MAX] = {
+ [NETDEV_GENEVE_DF_UNSET] = "unset",
+ [NETDEV_GENEVE_DF_SET] = "set",
+ [NETDEV_GENEVE_DF_INHERIT] = "inherit",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(geneve_df, GeneveDF);
diff --git a/src/shared/geneve-util.h b/src/shared/geneve-util.h
new file mode 100644
index 0000000..3865f80
--- /dev/null
+++ b/src/shared/geneve-util.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/if_link.h>
+
+#include "conf-parser.h"
+
+typedef enum GeneveDF {
+ NETDEV_GENEVE_DF_UNSET = GENEVE_DF_UNSET,
+ NETDEV_GENEVE_DF_SET = GENEVE_DF_SET,
+ NETDEV_GENEVE_DF_INHERIT = GENEVE_DF_INHERIT,
+ _NETDEV_GENEVE_DF_MAX,
+ _NETDEV_GENEVE_DF_INVALID = -1,
+} GeneveDF;
+
+const char *geneve_df_to_string(GeneveDF d) _const_;
+GeneveDF geneve_df_from_string(const char *d) _pure_;
diff --git a/src/shared/gpt.c b/src/shared/gpt.c
new file mode 100644
index 0000000..15ea2f0
--- /dev/null
+++ b/src/shared/gpt.c
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "gpt.h"
+#include "string-util.h"
+
+const GptPartitionType gpt_partition_type_table[] = {
+ { GPT_ROOT_X86, "root-x86" },
+ { GPT_ROOT_X86_VERITY, "root-x86-verity" },
+ { GPT_ROOT_X86_64, "root-x86-64" },
+ { GPT_ROOT_X86_64_VERITY, "root-x86-64-verity" },
+ { GPT_ROOT_ARM, "root-arm" },
+ { GPT_ROOT_ARM_VERITY, "root-arm-verity" },
+ { GPT_ROOT_ARM_64, "root-arm64" },
+ { GPT_ROOT_ARM_64_VERITY, "root-arm64-verity" },
+ { GPT_ROOT_IA64, "root-ia64" },
+ { GPT_ROOT_IA64_VERITY, "root-ia64-verity" },
+ { GPT_ROOT_RISCV32, "root-riscv32" },
+ { GPT_ROOT_RISCV32_VERITY, "root-riscv32-verity" },
+ { GPT_ROOT_RISCV64, "root-riscv64" },
+ { GPT_ROOT_RISCV64_VERITY, "root-riscv64-verity" },
+#ifdef GPT_ROOT_NATIVE
+ { GPT_ROOT_NATIVE, "root" },
+ { GPT_ROOT_NATIVE_VERITY, "root-verity" },
+#endif
+#ifdef GPT_ROOT_SECONDARY
+ { GPT_ROOT_SECONDARY, "root-secondary" },
+ { GPT_ROOT_SECONDARY_VERITY, "root-secondary-verity" },
+#endif
+ { GPT_USR_X86, "usr-x86" },
+ { GPT_USR_X86_VERITY, "usr-x86-verity" },
+ { GPT_USR_X86_64, "usr-x86-64" },
+ { GPT_USR_X86_64_VERITY, "usr-x86-64-verity" },
+ { GPT_USR_ARM, "usr-arm" },
+ { GPT_USR_ARM_VERITY, "usr-arm-verity" },
+ { GPT_USR_ARM_64, "usr-arm64" },
+ { GPT_USR_ARM_64_VERITY, "usr-arm64-verity" },
+ { GPT_USR_IA64, "usr-ia64" },
+ { GPT_USR_IA64_VERITY, "usr-ia64-verity" },
+ { GPT_USR_RISCV32, "usr-riscv32" },
+ { GPT_USR_RISCV32_VERITY, "usr-riscv32-verity" },
+ { GPT_USR_RISCV64, "usr-riscv64" },
+ { GPT_USR_RISCV64_VERITY, "usr-riscv64-verity" },
+#ifdef GPT_USR_NATIVE
+ { GPT_USR_NATIVE, "usr" },
+ { GPT_USR_NATIVE_VERITY, "usr-verity" },
+#endif
+#ifdef GPT_USR_SECONDARY
+ { GPT_USR_SECONDARY, "usr-secondary" },
+ { GPT_USR_SECONDARY_VERITY, "usr-secondary-verity" },
+#endif
+ { GPT_ESP, "esp" },
+ { GPT_XBOOTLDR, "xbootldr" },
+ { GPT_SWAP, "swap" },
+ { GPT_HOME, "home" },
+ { GPT_SRV, "srv" },
+ { GPT_VAR, "var" },
+ { GPT_TMP, "tmp" },
+ { GPT_USER_HOME, "user-home" },
+ { GPT_LINUX_GENERIC, "linux-generic" },
+ {}
+};
+
+const char *gpt_partition_type_uuid_to_string(sd_id128_t id) {
+ for (size_t i = 0; i < ELEMENTSOF(gpt_partition_type_table) - 1; i++)
+ if (sd_id128_equal(id, gpt_partition_type_table[i].uuid))
+ return gpt_partition_type_table[i].name;
+
+ return NULL;
+}
+
+const char *gpt_partition_type_uuid_to_string_harder(
+ sd_id128_t id,
+ char buffer[static ID128_UUID_STRING_MAX]) {
+
+ const char *s;
+
+ assert(buffer);
+
+ s = gpt_partition_type_uuid_to_string(id);
+ if (s)
+ return s;
+
+ return id128_to_uuid_string(id, buffer);
+}
+
+int gpt_partition_type_uuid_from_string(const char *s, sd_id128_t *ret) {
+ assert(s);
+ assert(ret);
+
+ for (size_t i = 0; i < ELEMENTSOF(gpt_partition_type_table) - 1; i++)
+ if (streq(s, gpt_partition_type_table[i].name)) {
+ *ret = gpt_partition_type_table[i].uuid;
+ return 0;
+ }
+
+ return sd_id128_from_string(s, ret);
+}
diff --git a/src/shared/gpt.h b/src/shared/gpt.h
new file mode 100644
index 0000000..241ff03
--- /dev/null
+++ b/src/shared/gpt.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <endian.h>
+
+#include "sd-id128.h"
+
+#include "id128-util.h"
+
+/* We only support root disk discovery for x86, x86-64, Itanium and ARM for now, since EFI for anything else
+ * doesn't really exist, and we only care for root partitions on the same disk as the EFI ESP. */
+
+#define GPT_ROOT_X86 SD_ID128_MAKE(44,47,95,40,f2,97,41,b2,9a,f7,d1,31,d5,f0,45,8a)
+#define GPT_ROOT_X86_64 SD_ID128_MAKE(4f,68,bc,e3,e8,cd,4d,b1,96,e7,fb,ca,f9,84,b7,09)
+#define GPT_ROOT_ARM SD_ID128_MAKE(69,da,d7,10,2c,e4,4e,3c,b1,6c,21,a1,d4,9a,be,d3)
+#define GPT_ROOT_ARM_64 SD_ID128_MAKE(b9,21,b0,45,1d,f0,41,c3,af,44,4c,6f,28,0d,3f,ae)
+#define GPT_ROOT_IA64 SD_ID128_MAKE(99,3d,8d,3d,f8,0e,42,25,85,5a,9d,af,8e,d7,ea,97)
+#define GPT_ROOT_RISCV32 SD_ID128_MAKE(60,d5,a7,fe,8e,7d,43,5c,b7,14,3d,d8,16,21,44,e1)
+#define GPT_ROOT_RISCV64 SD_ID128_MAKE(72,ec,70,a6,cf,74,40,e6,bd,49,4b,da,08,e8,f2,24)
+#define GPT_USR_X86 SD_ID128_MAKE(75,25,0d,76,8c,c6,45,8e,bd,66,bd,47,cc,81,a8,12)
+#define GPT_USR_X86_64 SD_ID128_MAKE(84,84,68,0c,95,21,48,c6,9c,11,b0,72,06,56,f6,9e)
+#define GPT_USR_ARM SD_ID128_MAKE(7d,03,59,a3,02,b3,4f,0a,86,5c,65,44,03,e7,06,25)
+#define GPT_USR_ARM_64 SD_ID128_MAKE(b0,e0,10,50,ee,5f,43,90,94,9a,91,01,b1,71,04,e9)
+#define GPT_USR_IA64 SD_ID128_MAKE(43,01,d2,a6,4e,3b,4b,2a,bb,94,9e,0b,2c,42,25,ea)
+#define GPT_USR_RISCV32 SD_ID128_MAKE(b9,33,fb,22,5c,3f,4f,91,af,90,e2,bb,0f,a5,07,02)
+#define GPT_USR_RISCV64 SD_ID128_MAKE(be,ae,c3,4b,84,42,43,9b,a4,0b,98,43,81,ed,09,7d)
+#define GPT_ESP SD_ID128_MAKE(c1,2a,73,28,f8,1f,11,d2,ba,4b,00,a0,c9,3e,c9,3b)
+#define GPT_XBOOTLDR SD_ID128_MAKE(bc,13,c2,ff,59,e6,42,62,a3,52,b2,75,fd,6f,71,72)
+#define GPT_SWAP SD_ID128_MAKE(06,57,fd,6d,a4,ab,43,c4,84,e5,09,33,c8,4b,4f,4f)
+#define GPT_HOME SD_ID128_MAKE(93,3a,c7,e1,2e,b4,4f,13,b8,44,0e,14,e2,ae,f9,15)
+#define GPT_SRV SD_ID128_MAKE(3b,8f,84,25,20,e0,4f,3b,90,7f,1a,25,a7,6f,98,e8)
+#define GPT_VAR SD_ID128_MAKE(4d,21,b0,16,b5,34,45,c2,a9,fb,5c,16,e0,91,fd,2d)
+#define GPT_TMP SD_ID128_MAKE(7e,c6,f5,57,3b,c5,4a,ca,b2,93,16,ef,5d,f6,39,d1)
+#define GPT_USER_HOME SD_ID128_MAKE(77,3f,91,ef,66,d4,49,b5,bd,83,d6,83,bf,40,ad,16)
+#define GPT_LINUX_GENERIC SD_ID128_MAKE(0f,c6,3d,af,84,83,47,72,8e,79,3d,69,d8,47,7d,e4)
+
+/* Verity partitions for the root partitions above (we only define them for the root and /usr partitions,
+ * because only they are are commonly read-only and hence suitable for verity). */
+#define GPT_ROOT_X86_VERITY SD_ID128_MAKE(d1,3c,5d,3b,b5,d1,42,2a,b2,9f,94,54,fd,c8,9d,76)
+#define GPT_ROOT_X86_64_VERITY SD_ID128_MAKE(2c,73,57,ed,eb,d2,46,d9,ae,c1,23,d4,37,ec,2b,f5)
+#define GPT_ROOT_ARM_VERITY SD_ID128_MAKE(73,86,cd,f2,20,3c,47,a9,a4,98,f2,ec,ce,45,a2,d6)
+#define GPT_ROOT_ARM_64_VERITY SD_ID128_MAKE(df,33,00,ce,d6,9f,4c,92,97,8c,9b,fb,0f,38,d8,20)
+#define GPT_ROOT_IA64_VERITY SD_ID128_MAKE(86,ed,10,d5,b6,07,45,bb,89,57,d3,50,f2,3d,05,71)
+#define GPT_ROOT_RISCV32_VERITY SD_ID128_MAKE(ae,02,53,be,11,67,40,07,ac,68,43,92,6c,14,c5,de)
+#define GPT_ROOT_RISCV64_VERITY SD_ID128_MAKE(b6,ed,55,82,44,0b,42,09,b8,da,5f,f7,c4,19,ea,3d)
+#define GPT_USR_X86_VERITY SD_ID128_MAKE(8f,46,1b,0d,14,ee,4e,81,9a,a9,04,9b,6f,b9,7a,bd)
+#define GPT_USR_X86_64_VERITY SD_ID128_MAKE(77,ff,5f,63,e7,b6,46,33,ac,f4,15,65,b8,64,c0,e6)
+#define GPT_USR_ARM_VERITY SD_ID128_MAKE(c2,15,d7,51,7b,cd,46,49,be,90,66,27,49,0a,4c,05)
+#define GPT_USR_ARM_64_VERITY SD_ID128_MAKE(6e,11,a4,e7,fb,ca,4d,ed,b9,e9,e1,a5,12,bb,66,4e)
+#define GPT_USR_IA64_VERITY SD_ID128_MAKE(6a,49,1e,03,3b,e7,45,45,8e,38,83,32,0e,0e,a8,80)
+#define GPT_USR_RISCV32_VERITY SD_ID128_MAKE(cb,1e,e4,e3,8c,d0,41,36,a0,a4,aa,61,a3,2e,87,30)
+#define GPT_USR_RISCV64_VERITY SD_ID128_MAKE(8f,10,56,be,9b,05,47,c4,81,d6,be,53,12,8e,5b,54)
+
+#if defined(__x86_64__)
+# define GPT_ROOT_NATIVE GPT_ROOT_X86_64
+# define GPT_ROOT_SECONDARY GPT_ROOT_X86
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_X86_64_VERITY
+# define GPT_ROOT_SECONDARY_VERITY GPT_ROOT_X86_VERITY
+# define GPT_USR_NATIVE GPT_USR_X86_64
+# define GPT_USR_SECONDARY GPT_USR_X86
+# define GPT_USR_NATIVE_VERITY GPT_USR_X86_64_VERITY
+# define GPT_USR_SECONDARY_VERITY GPT_USR_X86_VERITY
+#elif defined(__i386__)
+# define GPT_ROOT_NATIVE GPT_ROOT_X86
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_X86_VERITY
+# define GPT_USR_NATIVE GPT_USR_X86
+# define GPT_USR_NATIVE_VERITY GPT_USR_X86_VERITY
+#endif
+
+#if defined(__ia64__)
+# define GPT_ROOT_NATIVE GPT_ROOT_IA64
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_IA64_VERITY
+# define GPT_USR_NATIVE GPT_USR_IA64
+# define GPT_USR_NATIVE_VERITY GPT_USR_IA64_VERITY
+#endif
+
+#if defined(__aarch64__) && (__BYTE_ORDER != __BIG_ENDIAN)
+# define GPT_ROOT_NATIVE GPT_ROOT_ARM_64
+# define GPT_ROOT_SECONDARY GPT_ROOT_ARM
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_ARM_64_VERITY
+# define GPT_ROOT_SECONDARY_VERITY GPT_ROOT_ARM_VERITY
+# define GPT_USR_NATIVE GPT_USR_ARM_64
+# define GPT_USR_SECONDARY GPT_USR_ARM
+# define GPT_USR_NATIVE_VERITY GPT_USR_ARM_64_VERITY
+# define GPT_USR_SECONDARY_VERITY GPT_USR_ARM_VERITY
+#elif defined(__arm__) && (__BYTE_ORDER != __BIG_ENDIAN)
+# define GPT_ROOT_NATIVE GPT_ROOT_ARM
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_ARM_VERITY
+# define GPT_USR_NATIVE GPT_USR_ARM
+# define GPT_USR_NATIVE_VERITY GPT_USR_ARM_VERITY
+#endif
+
+#if defined(__riscv)
+#if (__riscv_xlen == 32)
+# define GPT_ROOT_NATIVE GPT_ROOT_RISCV32
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_RISCV32_VERITY
+# define GPT_USR_NATIVE GPT_USR_RISCV32
+# define GPT_USR_NATIVE_VERITY GPT_USR_RISCV32_VERITY
+#elif (__riscv_xlen == 64)
+# define GPT_ROOT_NATIVE GPT_ROOT_RISCV64
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_RISCV64_VERITY
+# define GPT_USR_NATIVE GPT_USR_RISCV64
+# define GPT_USR_NATIVE_VERITY GPT_USR_RISCV64_VERITY
+#endif
+#endif
+
+#define GPT_FLAG_REQUIRED_PARTITION (1ULL << 0)
+#define GPT_FLAG_NO_BLOCK_IO_PROTOCOL (1ULL << 1)
+#define GPT_FLAG_LEGACY_BIOS_BOOTABLE (1ULL << 2)
+
+/* Flags we recognize on the root, usr, xbootldr, swap, home, srv, var, tmp partitions when doing
+ * auto-discovery. These happen to be identical to what Microsoft defines for its own Basic Data Partitions,
+ * but that's just because we saw no point in defining any other values here. */
+#define GPT_FLAG_READ_ONLY (1ULL << 60)
+#define GPT_FLAG_NO_AUTO (1ULL << 63)
+
+const char *gpt_partition_type_uuid_to_string(sd_id128_t id);
+const char *gpt_partition_type_uuid_to_string_harder(
+ sd_id128_t id,
+ char buffer[static ID128_UUID_STRING_MAX]);
+int gpt_partition_type_uuid_from_string(const char *s, sd_id128_t *ret);
+
+typedef struct GptPartitionType {
+ sd_id128_t uuid;
+ const char *name;
+} GptPartitionType;
+
+extern const GptPartitionType gpt_partition_type_table[];
diff --git a/src/shared/group-record.c b/src/shared/group-record.c
new file mode 100644
index 0000000..da3ed0a
--- /dev/null
+++ b/src/shared/group-record.c
@@ -0,0 +1,348 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "group-record.h"
+#include "strv.h"
+#include "user-util.h"
+
+GroupRecord* group_record_new(void) {
+ GroupRecord *h;
+
+ h = new(GroupRecord, 1);
+ if (!h)
+ return NULL;
+
+ *h = (GroupRecord) {
+ .n_ref = 1,
+ .disposition = _USER_DISPOSITION_INVALID,
+ .last_change_usec = UINT64_MAX,
+ .gid = GID_INVALID,
+ };
+
+ return h;
+}
+
+static GroupRecord *group_record_free(GroupRecord *g) {
+ if (!g)
+ return NULL;
+
+ free(g->group_name);
+ free(g->realm);
+ free(g->group_name_and_realm_auto);
+ free(g->description);
+
+ strv_free(g->members);
+ free(g->service);
+ strv_free(g->administrators);
+ strv_free_erase(g->hashed_password);
+
+ json_variant_unref(g->json);
+
+ return mfree(g);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(GroupRecord, group_record, group_record_free);
+
+static int dispatch_privileged(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch privileged_dispatch_table[] = {
+ { "hashedPassword", _JSON_VARIANT_TYPE_INVALID, json_dispatch_strv, offsetof(GroupRecord, hashed_password), JSON_SAFE },
+ {},
+ };
+
+ return json_dispatch(variant, privileged_dispatch_table, NULL, flags, userdata);
+}
+
+static int dispatch_binding(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch binding_dispatch_table[] = {
+ { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(GroupRecord, gid), 0 },
+ {},
+ };
+
+ char smid[SD_ID128_STRING_MAX];
+ JsonVariant *m;
+ sd_id128_t mid;
+ int r;
+
+ if (!variant)
+ return 0;
+
+ if (!json_variant_is_object(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an object.", strna(name));
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to determine machine ID: %m");
+
+ m = json_variant_by_key(variant, sd_id128_to_string(mid, smid));
+ if (!m)
+ return 0;
+
+ return json_dispatch(m, binding_dispatch_table, NULL, flags, userdata);
+}
+
+static int dispatch_per_machine(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch per_machine_dispatch_table[] = {
+ { "matchMachineId", _JSON_VARIANT_TYPE_INVALID, NULL, 0, 0 },
+ { "matchHostname", _JSON_VARIANT_TYPE_INVALID, NULL, 0, 0 },
+ { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(GroupRecord, gid), 0 },
+ { "members", JSON_VARIANT_ARRAY, json_dispatch_user_group_list, offsetof(GroupRecord, members), JSON_RELAX},
+ { "administrators", JSON_VARIANT_ARRAY, json_dispatch_user_group_list, offsetof(GroupRecord, administrators), JSON_RELAX},
+ {},
+ };
+
+ JsonVariant *e;
+ int r;
+
+ if (!variant)
+ return 0;
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(e, variant) {
+ bool matching = false;
+ JsonVariant *m;
+
+ if (!json_variant_is_object(e))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of objects.", strna(name));
+
+ m = json_variant_by_key(e, "matchMachineId");
+ if (m) {
+ r = per_machine_id_match(m, flags);
+ if (r < 0)
+ return r;
+
+ matching = r > 0;
+ }
+
+ if (!matching) {
+ m = json_variant_by_key(e, "matchHostname");
+ if (m) {
+ r = per_machine_hostname_match(m, flags);
+ if (r < 0)
+ return r;
+
+ matching = r > 0;
+ }
+ }
+
+ if (!matching)
+ continue;
+
+ r = json_dispatch(e, per_machine_dispatch_table, NULL, flags, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int dispatch_status(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch status_dispatch_table[] = {
+ { "service", JSON_VARIANT_STRING, json_dispatch_string, offsetof(GroupRecord, service), JSON_SAFE },
+ {},
+ };
+
+ char smid[SD_ID128_STRING_MAX];
+ JsonVariant *m;
+ sd_id128_t mid;
+ int r;
+
+ if (!variant)
+ return 0;
+
+ if (!json_variant_is_object(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an object.", strna(name));
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to determine machine ID: %m");
+
+ m = json_variant_by_key(variant, sd_id128_to_string(mid, smid));
+ if (!m)
+ return 0;
+
+ return json_dispatch(m, status_dispatch_table, NULL, flags, userdata);
+}
+
+static int group_record_augment(GroupRecord *h, JsonDispatchFlags json_flags) {
+ assert(h);
+
+ if (!FLAGS_SET(h->mask, USER_RECORD_REGULAR))
+ return 0;
+
+ assert(h->group_name);
+
+ if (!h->group_name_and_realm_auto && h->realm) {
+ h->group_name_and_realm_auto = strjoin(h->group_name, "@", h->realm);
+ if (!h->group_name_and_realm_auto)
+ return json_log_oom(h->json, json_flags);
+ }
+
+ return 0;
+}
+
+int group_record_load(
+ GroupRecord *h,
+ JsonVariant *v,
+ UserRecordLoadFlags load_flags) {
+
+ static const JsonDispatch group_dispatch_table[] = {
+ { "groupName", JSON_VARIANT_STRING, json_dispatch_user_group_name, offsetof(GroupRecord, group_name), JSON_RELAX},
+ { "realm", JSON_VARIANT_STRING, json_dispatch_realm, offsetof(GroupRecord, realm), 0 },
+ { "description", JSON_VARIANT_STRING, json_dispatch_gecos, offsetof(GroupRecord, description), 0 },
+ { "disposition", JSON_VARIANT_STRING, json_dispatch_user_disposition, offsetof(GroupRecord, disposition), 0 },
+ { "service", JSON_VARIANT_STRING, json_dispatch_string, offsetof(GroupRecord, service), JSON_SAFE },
+ { "lastChangeUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(GroupRecord, last_change_usec), 0 },
+ { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(GroupRecord, gid), 0 },
+ { "members", JSON_VARIANT_ARRAY, json_dispatch_user_group_list, offsetof(GroupRecord, members), JSON_RELAX},
+ { "administrators", JSON_VARIANT_ARRAY, json_dispatch_user_group_list, offsetof(GroupRecord, administrators), JSON_RELAX},
+
+ { "privileged", JSON_VARIANT_OBJECT, dispatch_privileged, 0, 0 },
+
+ /* Not defined for now, for groups, but let's at least generate sensible errors about it */
+ { "secret", JSON_VARIANT_OBJECT, json_dispatch_unsupported, 0, 0 },
+
+ /* Ignore the perMachine, binding and status stuff here, and process it later, so that it overrides whatever is set above */
+ { "perMachine", JSON_VARIANT_ARRAY, NULL, 0, 0 },
+ { "binding", JSON_VARIANT_OBJECT, NULL, 0, 0 },
+ { "status", JSON_VARIANT_OBJECT, NULL, 0, 0 },
+
+ /* Ignore 'signature', we check it with explicit accessors instead */
+ { "signature", JSON_VARIANT_ARRAY, NULL, 0, 0 },
+ {},
+ };
+
+ JsonDispatchFlags json_flags = USER_RECORD_LOAD_FLAGS_TO_JSON_DISPATCH_FLAGS(load_flags);
+ int r;
+
+ assert(h);
+ assert(!h->json);
+
+ /* Note that this call will leave a half-initialized record around on failure! */
+
+ if ((USER_RECORD_REQUIRE_MASK(load_flags) & (USER_RECORD_SECRET|USER_RECORD_PRIVILEGED)))
+ return json_log(v, json_flags, SYNTHETIC_ERRNO(EINVAL), "Secret and privileged section currently not available for groups, refusing.");
+
+ r = user_group_record_mangle(v, load_flags, &h->json, &h->mask);
+ if (r < 0)
+ return r;
+
+ r = json_dispatch(h->json, group_dispatch_table, NULL, json_flags, h);
+ if (r < 0)
+ return r;
+
+ /* During the parsing operation above we ignored the 'perMachine', 'binding' and 'status' fields, since we want
+ * them to override the global options. Let's process them now. */
+
+ r = dispatch_per_machine("perMachine", json_variant_by_key(h->json, "perMachine"), json_flags, h);
+ if (r < 0)
+ return r;
+
+ r = dispatch_binding("binding", json_variant_by_key(h->json, "binding"), json_flags, h);
+ if (r < 0)
+ return r;
+
+ r = dispatch_status("status", json_variant_by_key(h->json, "status"), json_flags, h);
+ if (r < 0)
+ return r;
+
+ if (FLAGS_SET(h->mask, USER_RECORD_REGULAR) && !h->group_name)
+ return json_log(h->json, json_flags, SYNTHETIC_ERRNO(EINVAL), "Group name field missing, refusing.");
+
+ r = group_record_augment(h, json_flags);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int group_record_build(GroupRecord **ret, ...) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_(group_record_unrefp) GroupRecord *g = NULL;
+ va_list ap;
+ int r;
+
+ assert(ret);
+
+ va_start(ap, ret);
+ r = json_buildv(&v, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ g = group_record_new();
+ if (!g)
+ return -ENOMEM;
+
+ r = group_record_load(g, v, USER_RECORD_LOAD_FULL);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(g);
+ return 0;
+}
+
+const char *group_record_group_name_and_realm(GroupRecord *h) {
+ assert(h);
+
+ /* Return the pre-initialized joined string if it is defined */
+ if (h->group_name_and_realm_auto)
+ return h->group_name_and_realm_auto;
+
+ /* If it's not defined then we cannot have a realm */
+ assert(!h->realm);
+ return h->group_name;
+}
+
+UserDisposition group_record_disposition(GroupRecord *h) {
+ assert(h);
+
+ if (h->disposition >= 0)
+ return h->disposition;
+
+ /* If not declared, derive from GID */
+
+ if (!gid_is_valid(h->gid))
+ return _USER_DISPOSITION_INVALID;
+
+ if (h->gid == 0 || h->gid == GID_NOBODY)
+ return USER_INTRINSIC;
+
+ if (gid_is_system(h->gid))
+ return USER_SYSTEM;
+
+ if (gid_is_dynamic(h->gid))
+ return USER_DYNAMIC;
+
+ if (gid_is_container(h->gid))
+ return USER_CONTAINER;
+
+ if (h->gid > INT32_MAX)
+ return USER_RESERVED;
+
+ return USER_REGULAR;
+}
+
+int group_record_clone(GroupRecord *h, UserRecordLoadFlags flags, GroupRecord **ret) {
+ _cleanup_(group_record_unrefp) GroupRecord *c = NULL;
+ int r;
+
+ assert(h);
+ assert(ret);
+
+ c = group_record_new();
+ if (!c)
+ return -ENOMEM;
+
+ r = group_record_load(c, h->json, flags);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(c);
+ return 0;
+}
diff --git a/src/shared/group-record.h b/src/shared/group-record.h
new file mode 100644
index 0000000..f810204
--- /dev/null
+++ b/src/shared/group-record.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "json.h"
+#include "user-record.h"
+
+typedef struct GroupRecord {
+ unsigned n_ref;
+ UserRecordMask mask;
+ bool incomplete;
+
+ char *group_name;
+ char *realm;
+ char *group_name_and_realm_auto;
+
+ char *description;
+
+ UserDisposition disposition;
+ uint64_t last_change_usec;
+
+ gid_t gid;
+
+ char **members;
+
+ char *service;
+
+ /* The following exist mostly so that we can cover the full /etc/gshadow set of fields, we currently
+ * do not actually make use of these */
+ char **administrators; /* maps to 'struct sgrp' .sg_adm field */
+ char **hashed_password; /* maps to 'struct sgrp' .sg_passwd field */
+
+ JsonVariant *json;
+} GroupRecord;
+
+GroupRecord* group_record_new(void);
+GroupRecord* group_record_ref(GroupRecord *g);
+GroupRecord* group_record_unref(GroupRecord *g);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(GroupRecord*, group_record_unref);
+
+int group_record_load(GroupRecord *h, JsonVariant *v, UserRecordLoadFlags flags);
+int group_record_build(GroupRecord **ret, ...);
+int group_record_clone(GroupRecord *g, UserRecordLoadFlags flags, GroupRecord **ret);
+
+const char *group_record_group_name_and_realm(GroupRecord *h);
+UserDisposition group_record_disposition(GroupRecord *h);
diff --git a/src/shared/id128-print.c b/src/shared/id128-print.c
new file mode 100644
index 0000000..f232767
--- /dev/null
+++ b/src/shared/id128-print.c
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "id128-print.h"
+#include "log.h"
+#include "pretty-print.h"
+#include "terminal-util.h"
+
+int id128_pretty_print_sample(const char *name, sd_id128_t id) {
+ _cleanup_free_ char *man_link = NULL, *mod_link = NULL;
+ const char *on, *off;
+ unsigned i;
+
+ on = ansi_highlight();
+ off = ansi_normal();
+
+ if (terminal_urlify("man:systemd-id128(1)", "systemd-id128(1)", &man_link) < 0)
+ return log_oom();
+
+ if (terminal_urlify("https://docs.python.org/3/library/uuid.html", "uuid", &mod_link) < 0)
+ return log_oom();
+
+ printf("As string:\n"
+ "%s" SD_ID128_FORMAT_STR "%s\n\n"
+ "As UUID:\n"
+ "%s" SD_ID128_UUID_FORMAT_STR "%s\n\n"
+ "As %s macro:\n"
+ "%s#define %s SD_ID128_MAKE(",
+ on, SD_ID128_FORMAT_VAL(id), off,
+ on, SD_ID128_FORMAT_VAL(id), off,
+ man_link,
+ on, name);
+ for (i = 0; i < 16; i++)
+ printf("%02x%s", id.bytes[i], i != 15 ? "," : "");
+ printf(")%s\n\n", off);
+
+ printf("As Python constant:\n"
+ ">>> import %s\n"
+ ">>> %s%s = uuid.UUID('" SD_ID128_FORMAT_STR "')%s\n",
+ mod_link,
+ on, name, SD_ID128_FORMAT_VAL(id), off);
+
+ return 0;
+}
+
+
+int id128_pretty_print(sd_id128_t id, Id128PrettyPrintMode mode) {
+ assert(mode >= 0);
+ assert(mode < _ID128_PRETTY_PRINT_MODE_MAX);
+
+ if (mode == ID128_PRINT_ID128) {
+ printf(SD_ID128_FORMAT_STR "\n",
+ SD_ID128_FORMAT_VAL(id));
+ return 0;
+ } else if (mode == ID128_PRINT_UUID) {
+ printf(SD_ID128_UUID_FORMAT_STR "\n",
+ SD_ID128_FORMAT_VAL(id));
+ return 0;
+ } else
+ return id128_pretty_print_sample("XYZ", id);
+}
+
+int id128_print_new(Id128PrettyPrintMode mode) {
+ sd_id128_t id;
+ int r;
+
+ r = sd_id128_randomize(&id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate ID: %m");
+
+ return id128_pretty_print(id, mode);
+}
diff --git a/src/shared/id128-print.h b/src/shared/id128-print.h
new file mode 100644
index 0000000..d69cb9b
--- /dev/null
+++ b/src/shared/id128-print.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-id128.h"
+
+typedef enum Id128PrettyPrintMode {
+ ID128_PRINT_ID128,
+ ID128_PRINT_UUID,
+ ID128_PRINT_PRETTY,
+ _ID128_PRETTY_PRINT_MODE_MAX,
+ _ID128_PRETTY_PRINT_MODE_INVALID = -1
+} Id128PrettyPrintMode;
+
+int id128_pretty_print_sample(const char *name, sd_id128_t id);
+int id128_pretty_print(sd_id128_t id, Id128PrettyPrintMode mode);
+int id128_print_new(Id128PrettyPrintMode mode);
diff --git a/src/shared/idn-util.c b/src/shared/idn-util.c
new file mode 100644
index 0000000..83c4b3c
--- /dev/null
+++ b/src/shared/idn-util.c
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_LIBIDN2
+# include <idn2.h>
+#elif HAVE_LIBIDN
+# include <idna.h>
+# include <stringprep.h>
+#endif
+
+#include "alloc-util.h"
+#include "dlfcn-util.h"
+#include "idn-util.h"
+
+#if HAVE_LIBIDN || HAVE_LIBIDN2
+static void* idn_dl = NULL;
+#endif
+
+#if HAVE_LIBIDN2
+int (*sym_idn2_lookup_u8)(const uint8_t* src, uint8_t** lookupname, int flags) = NULL;
+const char *(*sym_idn2_strerror)(int rc) = NULL;
+int (*sym_idn2_to_unicode_8z8z)(const char * input, char ** output, int flags) = NULL;
+
+int dlopen_idn(void) {
+ _cleanup_(dlclosep) void *dl = NULL;
+ int r;
+
+ if (idn_dl)
+ return 0; /* Already loaded */
+
+ dl = dlopen("libidn2.so.0", RTLD_LAZY);
+ if (!dl)
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "libidn2 support is not installed: %s", dlerror());
+
+ r = dlsym_many_and_warn(
+ dl,
+ LOG_DEBUG,
+ &sym_idn2_lookup_u8, "idn2_lookup_u8",
+ &sym_idn2_strerror, "idn2_strerror",
+ &sym_idn2_to_unicode_8z8z, "idn2_to_unicode_8z8z",
+ NULL);
+ if (r < 0)
+ return r;
+
+ /* Note that we never release the reference here, because there's no real reason to, after all this
+ * was traditionally a regular shared library dependency which lives forever too. */
+ idn_dl = TAKE_PTR(dl);
+
+ return 1;
+}
+#endif
+
+#if HAVE_LIBIDN
+int (*sym_idna_to_ascii_4i)(const uint32_t * in, size_t inlen, char *out, int flags);
+int (*sym_idna_to_unicode_44i)(const uint32_t * in, size_t inlen,uint32_t * out, size_t * outlen, int flags);
+char* (*sym_stringprep_ucs4_to_utf8)(const uint32_t * str, ssize_t len, size_t * items_read, size_t * items_written);
+uint32_t* (*sym_stringprep_utf8_to_ucs4)(const char *str, ssize_t len, size_t *items_written);
+
+int dlopen_idn(void) {
+ _cleanup_(dlclosep) void *dl = NULL;
+ int r;
+
+ if (idn_dl)
+ return 0; /* Already loaded */
+
+ dl = dlopen("libidn.so.12", RTLD_LAZY);
+ if (!dl) {
+ /* libidn broke ABI in 1.34, but not in a way we care about (a new field got added to an
+ * open-coded struct we do not use), hence support both versions. */
+ dl = dlopen("libidn.so.11", RTLD_LAZY);
+ if (!dl)
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "libidn support is not installed: %s", dlerror());
+ }
+
+ r = dlsym_many_and_warn(
+ dl,
+ LOG_DEBUG,
+ &sym_idna_to_ascii_4i, "idna_to_ascii_4i",
+ &sym_idna_to_unicode_44i, "idna_to_unicode_44i",
+ &sym_stringprep_ucs4_to_utf8, "stringprep_ucs4_to_utf8",
+ &sym_stringprep_utf8_to_ucs4, "stringprep_utf8_to_ucs4",
+ NULL);
+ if (r < 0)
+ return r;
+
+ idn_dl = TAKE_PTR(dl);
+
+ return 1;
+}
+#endif
diff --git a/src/shared/idn-util.h b/src/shared/idn-util.h
new file mode 100644
index 0000000..4698eed
--- /dev/null
+++ b/src/shared/idn-util.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#if HAVE_LIBIDN2
+# include <idn2.h>
+#elif HAVE_LIBIDN
+# include <idna.h>
+# include <stringprep.h>
+#endif
+
+#include <inttypes.h>
+
+#if HAVE_LIBIDN2 || HAVE_LIBIDN
+int dlopen_idn(void);
+#else
+static inline int dlopen_idn(void) {
+ return -EOPNOTSUPP;
+}
+#endif
+
+#if HAVE_LIBIDN2
+extern int (*sym_idn2_lookup_u8)(const uint8_t* src, uint8_t** lookupname, int flags);
+extern const char *(*sym_idn2_strerror)(int rc);
+extern int (*sym_idn2_to_unicode_8z8z)(const char * input, char ** output, int flags);
+#endif
+
+#if HAVE_LIBIDN
+extern int (*sym_idna_to_ascii_4i)(const uint32_t * in, size_t inlen, char *out, int flags);
+extern int (*sym_idna_to_unicode_44i)(const uint32_t * in, size_t inlen,uint32_t * out, size_t * outlen, int flags);
+extern char* (*sym_stringprep_ucs4_to_utf8)(const uint32_t * str, ssize_t len, size_t * items_read, size_t * items_written);
+extern uint32_t* (*sym_stringprep_utf8_to_ucs4)(const char *str, ssize_t len, size_t *items_written);
+#endif
diff --git a/src/shared/ima-util.c b/src/shared/ima-util.c
new file mode 100644
index 0000000..e37c9ad
--- /dev/null
+++ b/src/shared/ima-util.c
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "ima-util.h"
+
+static int use_ima_cached = -1;
+
+bool use_ima(void) {
+
+ if (use_ima_cached < 0)
+ use_ima_cached = access("/sys/kernel/security/ima/", F_OK) >= 0;
+
+ return use_ima_cached;
+}
diff --git a/src/shared/ima-util.h b/src/shared/ima-util.h
new file mode 100644
index 0000000..922db78
--- /dev/null
+++ b/src/shared/ima-util.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+bool use_ima(void);
diff --git a/src/shared/import-util.c b/src/shared/import-util.c
new file mode 100644
index 0000000..298c066
--- /dev/null
+++ b/src/shared/import-util.c
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "chattr-util.h"
+#include "errno-util.h"
+#include "import-util.h"
+#include "log.h"
+#include "macro.h"
+#include "nulstr-util.h"
+#include "path-util.h"
+#include "string-table.h"
+#include "string-util.h"
+
+int import_url_last_component(const char *url, char **ret) {
+ const char *e, *p;
+ char *s;
+
+ e = strchrnul(url, '?');
+
+ while (e > url && e[-1] == '/')
+ e--;
+
+ p = e;
+ while (p > url && p[-1] != '/')
+ p--;
+
+ if (e <= p)
+ return -EINVAL;
+
+ s = strndup(p, e - p);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int import_url_change_last_component(const char *url, const char *suffix, char **ret) {
+ const char *e;
+ char *s;
+
+ assert(url);
+ assert(ret);
+
+ e = strchrnul(url, '?');
+
+ while (e > url && e[-1] == '/')
+ e--;
+
+ while (e > url && e[-1] != '/')
+ e--;
+
+ if (e <= url)
+ return -EINVAL;
+
+ s = new(char, (e - url) + strlen(suffix) + 1);
+ if (!s)
+ return -ENOMEM;
+
+ strcpy(mempcpy(s, url, e - url), suffix);
+ *ret = s;
+ return 0;
+}
+
+static const char* const import_verify_table[_IMPORT_VERIFY_MAX] = {
+ [IMPORT_VERIFY_NO] = "no",
+ [IMPORT_VERIFY_CHECKSUM] = "checksum",
+ [IMPORT_VERIFY_SIGNATURE] = "signature",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(import_verify, ImportVerify);
+
+int tar_strip_suffixes(const char *name, char **ret) {
+ const char *e;
+ char *s;
+
+ e = endswith(name, ".tar");
+ if (!e)
+ e = endswith(name, ".tar.xz");
+ if (!e)
+ e = endswith(name, ".tar.gz");
+ if (!e)
+ e = endswith(name, ".tar.bz2");
+ if (!e)
+ e = endswith(name, ".tgz");
+ if (!e)
+ e = strchr(name, 0);
+
+ if (e <= name)
+ return -EINVAL;
+
+ s = strndup(name, e - name);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int raw_strip_suffixes(const char *p, char **ret) {
+
+ static const char suffixes[] =
+ ".xz\0"
+ ".gz\0"
+ ".bz2\0"
+ ".raw\0"
+ ".qcow2\0"
+ ".img\0"
+ ".bin\0";
+
+ _cleanup_free_ char *q = NULL;
+
+ q = strdup(p);
+ if (!q)
+ return -ENOMEM;
+
+ for (;;) {
+ const char *sfx;
+ bool changed = false;
+
+ NULSTR_FOREACH(sfx, suffixes) {
+ char *e;
+
+ e = endswith(q, sfx);
+ if (e) {
+ *e = 0;
+ changed = true;
+ }
+ }
+
+ if (!changed)
+ break;
+ }
+
+ *ret = TAKE_PTR(q);
+
+ return 0;
+}
+
+int import_assign_pool_quota_and_warn(const char *path) {
+ int r;
+
+ r = btrfs_subvol_auto_qgroup("/var/lib/machines", 0, true);
+ if (r == -ENOTTY) {
+ log_debug_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines, as directory is not on btrfs or not a subvolume. Ignoring.");
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines: %m");
+ if (r > 0)
+ log_info("Set up default quota hierarchy for /var/lib/machines.");
+
+ r = btrfs_subvol_auto_qgroup(path, 0, true);
+ if (r == -ENOTTY) {
+ log_debug_errno(r, "Failed to set up quota hierarchy for %s, as directory is not on btrfs or not a subvolume. Ignoring.", path);
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up default quota hierarchy for %s: %m", path);
+ if (r > 0)
+ log_debug("Set up default quota hierarchy for %s.", path);
+
+ return 0;
+}
+
+int import_set_nocow_and_log(int fd, const char *path) {
+ int r;
+
+ r = chattr_fd(fd, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+ if (r < 0)
+ return log_full_errno(
+ ERRNO_IS_NOT_SUPPORTED(r) ? LOG_DEBUG : LOG_WARNING,
+ r, "Failed to set file attributes on %s: %m", path);
+
+ return 0;
+}
diff --git a/src/shared/import-util.h b/src/shared/import-util.h
new file mode 100644
index 0000000..8d017f6
--- /dev/null
+++ b/src/shared/import-util.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+typedef enum ImportVerify {
+ IMPORT_VERIFY_NO,
+ IMPORT_VERIFY_CHECKSUM,
+ IMPORT_VERIFY_SIGNATURE,
+ _IMPORT_VERIFY_MAX,
+ _IMPORT_VERIFY_INVALID = -1,
+} ImportVerify;
+
+int import_url_last_component(const char *url, char **ret);
+int import_url_change_last_component(const char *url, const char *suffix, char **ret);
+
+const char* import_verify_to_string(ImportVerify v) _const_;
+ImportVerify import_verify_from_string(const char *s) _pure_;
+
+int tar_strip_suffixes(const char *name, char **ret);
+int raw_strip_suffixes(const char *name, char **ret);
+
+int import_assign_pool_quota_and_warn(const char *path);
+
+int import_set_nocow_and_log(int fd, const char *path);
diff --git a/src/shared/initreq.h b/src/shared/initreq.h
new file mode 100644
index 0000000..1bf5b8e
--- /dev/null
+++ b/src/shared/initreq.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2+ */
+/*
+ * initreq.h Interface to talk to init through /dev/initctl.
+ *
+ * Copyright (C) 1995-2004 Miquel van Smoorenburg
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * Version: @(#)initreq.h 1.28 31-Mar-2004 MvS
+ */
+
+#pragma once
+
+#include <sys/param.h>
+
+#if defined(__FreeBSD_kernel__)
+# define INIT_FIFO "/etc/.initctl"
+#else
+# define INIT_FIFO "/dev/initctl"
+#endif
+
+#define INIT_MAGIC 0x03091969
+#define INIT_CMD_START 0
+#define INIT_CMD_RUNLVL 1
+#define INIT_CMD_POWERFAIL 2
+#define INIT_CMD_POWERFAILNOW 3
+#define INIT_CMD_POWEROK 4
+#define INIT_CMD_BSD 5
+#define INIT_CMD_SETENV 6
+#define INIT_CMD_UNSETENV 7
+
+#define INIT_CMD_CHANGECONS 12345
+
+#ifdef MAXHOSTNAMELEN
+# define INITRQ_HLEN MAXHOSTNAMELEN
+#else
+# define INITRQ_HLEN 64
+#endif
+
+/*
+ * This is what BSD 4.4 uses when talking to init.
+ * Linux doesn't use this right now.
+ */
+struct init_request_bsd {
+ char gen_id[8]; /* Beats me.. telnetd uses "fe" */
+ char tty_id[16]; /* Tty name minus /dev/tty */
+ char host[INITRQ_HLEN]; /* Hostname */
+ char term_type[16]; /* Terminal type */
+ int signal; /* Signal to send */
+ int pid; /* Process to send to */
+ char exec_name[128]; /* Program to execute */
+ char reserved[128]; /* For future expansion. */
+};
+
+/*
+ * Because of legacy interfaces, "runlevel" and "sleeptime"
+ * aren't in a separate struct in the union.
+ *
+ * The weird sizes are because init expects the whole
+ * struct to be 384 bytes.
+ */
+struct init_request {
+ int magic; /* Magic number */
+ int cmd; /* What kind of request */
+ int runlevel; /* Runlevel to change to */
+ int sleeptime; /* Time between TERM and KILL */
+ union {
+ struct init_request_bsd bsd;
+ char data[368];
+ } i;
+};
diff --git a/src/shared/install-printf.c b/src/shared/install-printf.c
new file mode 100644
index 0000000..6bc3f15
--- /dev/null
+++ b/src/shared/install-printf.c
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "format-util.h"
+#include "install-printf.h"
+#include "install.h"
+#include "macro.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "unit-name.h"
+#include "user-util.h"
+
+static int specifier_prefix_and_instance(char specifier, const void *data, const void *userdata, char **ret) {
+ const UnitFileInstallInfo *i = userdata;
+ _cleanup_free_ char *prefix = NULL;
+ int r;
+
+ assert(i);
+
+ r = unit_name_to_prefix_and_instance(i->name, &prefix);
+ if (r < 0)
+ return r;
+
+ if (endswith(prefix, "@") && i->default_instance) {
+ char *ans;
+
+ ans = strjoin(prefix, i->default_instance);
+ if (!ans)
+ return -ENOMEM;
+ *ret = ans;
+ } else
+ *ret = TAKE_PTR(prefix);
+
+ return 0;
+}
+
+static int specifier_name(char specifier, const void *data, const void *userdata, char **ret) {
+ const UnitFileInstallInfo *i = userdata;
+ char *ans;
+
+ assert(i);
+
+ if (unit_name_is_valid(i->name, UNIT_NAME_TEMPLATE) && i->default_instance)
+ return unit_name_replace_instance(i->name, i->default_instance, ret);
+
+ ans = strdup(i->name);
+ if (!ans)
+ return -ENOMEM;
+ *ret = ans;
+ return 0;
+}
+
+static int specifier_prefix(char specifier, const void *data, const void *userdata, char **ret) {
+ const UnitFileInstallInfo *i = userdata;
+
+ assert(i);
+
+ return unit_name_to_prefix(i->name, ret);
+}
+
+static int specifier_instance(char specifier, const void *data, const void *userdata, char **ret) {
+ const UnitFileInstallInfo *i = userdata;
+ char *instance;
+ int r;
+
+ assert(i);
+
+ r = unit_name_to_instance(i->name, &instance);
+ if (r < 0)
+ return r;
+
+ if (isempty(instance)) {
+ r = free_and_strdup(&instance, strempty(i->default_instance));
+ if (r < 0)
+ return r;
+ }
+
+ *ret = instance;
+ return 0;
+}
+
+static int specifier_last_component(char specifier, const void *data, const void *userdata, char **ret) {
+ _cleanup_free_ char *prefix = NULL;
+ char *dash;
+ int r;
+
+ r = specifier_prefix(specifier, data, userdata, &prefix);
+ if (r < 0)
+ return r;
+
+ dash = strrchr(prefix, '-');
+ if (dash) {
+ dash = strdup(dash + 1);
+ if (!dash)
+ return -ENOMEM;
+ *ret = dash;
+ } else
+ *ret = TAKE_PTR(prefix);
+
+ return 0;
+}
+
+int install_full_printf(const UnitFileInstallInfo *i, const char *format, char **ret) {
+ /* This is similar to unit_name_printf() */
+
+ const Specifier table[] = {
+ { 'i', specifier_instance, NULL },
+ { 'j', specifier_last_component, NULL },
+ { 'n', specifier_name, NULL },
+ { 'N', specifier_prefix_and_instance, NULL },
+ { 'p', specifier_prefix, NULL },
+
+ COMMON_SYSTEM_SPECIFIERS,
+
+ COMMON_CREDS_SPECIFIERS,
+ {}
+ };
+
+ assert(i);
+ assert(format);
+ assert(ret);
+
+ return specifier_printf(format, table, i, ret);
+}
diff --git a/src/shared/install-printf.h b/src/shared/install-printf.h
new file mode 100644
index 0000000..34e1294
--- /dev/null
+++ b/src/shared/install-printf.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "install.h"
+
+int install_full_printf(const UnitFileInstallInfo *i, const char *format, char **ret);
diff --git a/src/shared/install.c b/src/shared/install.c
new file mode 100644
index 0000000..302497a
--- /dev/null
+++ b/src/shared/install.c
@@ -0,0 +1,3479 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "conf-parser.h"
+#include "def.h"
+#include "dirent-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "install-printf.h"
+#include "install.h"
+#include "locale-util.h"
+#include "log.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-lookup.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "set.h"
+#include "special.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-file.h"
+
+#define UNIT_FILE_FOLLOW_SYMLINK_MAX 64
+
+typedef enum SearchFlags {
+ SEARCH_LOAD = 1 << 0,
+ SEARCH_FOLLOW_CONFIG_SYMLINKS = 1 << 1,
+ SEARCH_DROPIN = 1 << 2,
+} SearchFlags;
+
+typedef struct {
+ OrderedHashmap *will_process;
+ OrderedHashmap *have_processed;
+} InstallContext;
+
+typedef enum {
+ PRESET_UNKNOWN,
+ PRESET_ENABLE,
+ PRESET_DISABLE,
+} PresetAction;
+
+struct UnitFilePresetRule {
+ char *pattern;
+ PresetAction action;
+ char **instances;
+};
+
+static bool unit_file_install_info_has_rules(const UnitFileInstallInfo *i) {
+ assert(i);
+
+ return !strv_isempty(i->aliases) ||
+ !strv_isempty(i->wanted_by) ||
+ !strv_isempty(i->required_by);
+}
+
+static bool unit_file_install_info_has_also(const UnitFileInstallInfo *i) {
+ assert(i);
+
+ return !strv_isempty(i->also);
+}
+
+void unit_file_presets_freep(UnitFilePresets *p) {
+ if (!p)
+ return;
+
+ for (size_t i = 0; i < p->n_rules; i++) {
+ free(p->rules[i].pattern);
+ strv_free(p->rules[i].instances);
+ }
+
+ free(p->rules);
+ p->n_rules = 0;
+}
+
+static const char *const unit_file_type_table[_UNIT_FILE_TYPE_MAX] = {
+ [UNIT_FILE_TYPE_REGULAR] = "regular",
+ [UNIT_FILE_TYPE_SYMLINK] = "symlink",
+ [UNIT_FILE_TYPE_MASKED] = "masked",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(unit_file_type, UnitFileType);
+
+static int in_search_path(const LookupPaths *p, const char *path) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(path);
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ return path_strv_contains(p->search_path, parent);
+}
+
+static const char* skip_root(const LookupPaths *p, const char *path) {
+ char *e;
+
+ assert(p);
+ assert(path);
+
+ if (!p->root_dir)
+ return path;
+
+ e = path_startswith(path, p->root_dir);
+ if (!e)
+ return NULL;
+
+ /* Make sure the returned path starts with a slash */
+ if (e[0] != '/') {
+ if (e == path || e[-1] != '/')
+ return NULL;
+
+ e--;
+ }
+
+ return e;
+}
+
+static int path_is_generator(const LookupPaths *p, const char *path) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(p);
+ assert(path);
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ return path_equal_ptr(parent, p->generator) ||
+ path_equal_ptr(parent, p->generator_early) ||
+ path_equal_ptr(parent, p->generator_late);
+}
+
+static int path_is_transient(const LookupPaths *p, const char *path) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(p);
+ assert(path);
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ return path_equal_ptr(parent, p->transient);
+}
+
+static int path_is_control(const LookupPaths *p, const char *path) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(p);
+ assert(path);
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ return path_equal_ptr(parent, p->persistent_control) ||
+ path_equal_ptr(parent, p->runtime_control);
+}
+
+static int path_is_config(const LookupPaths *p, const char *path, bool check_parent) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(p);
+ assert(path);
+
+ /* Note that we do *not* have generic checks for /etc or /run in place, since with
+ * them we couldn't discern configuration from transient or generated units */
+
+ if (check_parent) {
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ path = parent;
+ }
+
+ return path_equal_ptr(path, p->persistent_config) ||
+ path_equal_ptr(path, p->runtime_config);
+}
+
+static int path_is_runtime(const LookupPaths *p, const char *path, bool check_parent) {
+ _cleanup_free_ char *parent = NULL;
+ const char *rpath;
+
+ assert(p);
+ assert(path);
+
+ /* Everything in /run is considered runtime. On top of that we also add
+ * explicit checks for the various runtime directories, as safety net. */
+
+ rpath = skip_root(p, path);
+ if (rpath && path_startswith(rpath, "/run"))
+ return true;
+
+ if (check_parent) {
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ path = parent;
+ }
+
+ return path_equal_ptr(path, p->runtime_config) ||
+ path_equal_ptr(path, p->generator) ||
+ path_equal_ptr(path, p->generator_early) ||
+ path_equal_ptr(path, p->generator_late) ||
+ path_equal_ptr(path, p->transient) ||
+ path_equal_ptr(path, p->runtime_control);
+}
+
+static int path_is_vendor_or_generator(const LookupPaths *p, const char *path) {
+ const char *rpath;
+
+ assert(p);
+ assert(path);
+
+ rpath = skip_root(p, path);
+ if (!rpath)
+ return 0;
+
+ if (path_startswith(rpath, "/usr"))
+ return true;
+
+#if HAVE_SPLIT_USR
+ if (path_startswith(rpath, "/lib"))
+ return true;
+#endif
+
+ if (path_is_generator(p, rpath))
+ return true;
+
+ return path_equal(rpath, SYSTEM_DATA_UNIT_PATH);
+}
+
+static const char* config_path_from_flags(const LookupPaths *paths, UnitFileFlags flags) {
+ assert(paths);
+
+ if (FLAGS_SET(flags, UNIT_FILE_PORTABLE))
+ return FLAGS_SET(flags, UNIT_FILE_RUNTIME) ? paths->runtime_attached : paths->persistent_attached;
+ else
+ return FLAGS_SET(flags, UNIT_FILE_RUNTIME) ? paths->runtime_config : paths->persistent_config;
+}
+
+int unit_file_changes_add(
+ UnitFileChange **changes,
+ size_t *n_changes,
+ int type,
+ const char *path,
+ const char *source) {
+
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ UnitFileChange *c;
+
+ assert(path);
+ assert(!changes == !n_changes);
+
+ if (!changes)
+ return 0;
+
+ c = reallocarray(*changes, *n_changes + 1, sizeof(UnitFileChange));
+ if (!c)
+ return -ENOMEM;
+ *changes = c;
+
+ p = strdup(path);
+ if (source)
+ s = strdup(source);
+
+ if (!p || (source && !s))
+ return -ENOMEM;
+
+ path_simplify(p, false);
+ if (s)
+ path_simplify(s, false);
+
+ c[*n_changes] = (UnitFileChange) { type, p, s };
+ p = s = NULL;
+ (*n_changes) ++;
+ return 0;
+}
+
+void unit_file_changes_free(UnitFileChange *changes, size_t n_changes) {
+ assert(changes || n_changes == 0);
+
+ for (size_t i = 0; i < n_changes; i++) {
+ free(changes[i].path);
+ free(changes[i].source);
+ }
+
+ free(changes);
+}
+
+void unit_file_dump_changes(int r, const char *verb, const UnitFileChange *changes, size_t n_changes, bool quiet) {
+ bool logged = false;
+
+ assert(changes || n_changes == 0);
+ /* If verb is not specified, errors are not allowed! */
+ assert(verb || r >= 0);
+
+ for (size_t i = 0; i < n_changes; i++) {
+ assert(verb || changes[i].type >= 0);
+
+ switch(changes[i].type) {
+ case UNIT_FILE_SYMLINK:
+ if (!quiet)
+ log_info("Created symlink %s %s %s.",
+ changes[i].path,
+ special_glyph(SPECIAL_GLYPH_ARROW),
+ changes[i].source);
+ break;
+ case UNIT_FILE_UNLINK:
+ if (!quiet)
+ log_info("Removed %s.", changes[i].path);
+ break;
+ case UNIT_FILE_IS_MASKED:
+ if (!quiet)
+ log_info("Unit %s is masked, ignoring.", changes[i].path);
+ break;
+ case UNIT_FILE_IS_DANGLING:
+ if (!quiet)
+ log_info("Unit %s is an alias to a unit that is not present, ignoring.",
+ changes[i].path);
+ break;
+ case -EEXIST:
+ if (changes[i].source)
+ log_error_errno(changes[i].type,
+ "Failed to %s unit, file %s already exists and is a symlink to %s.",
+ verb, changes[i].path, changes[i].source);
+ else
+ log_error_errno(changes[i].type,
+ "Failed to %s unit, file %s already exists.",
+ verb, changes[i].path);
+ logged = true;
+ break;
+ case -ERFKILL:
+ log_error_errno(changes[i].type, "Failed to %s unit, unit %s is masked.",
+ verb, changes[i].path);
+ logged = true;
+ break;
+ case -EADDRNOTAVAIL:
+ log_error_errno(changes[i].type, "Failed to %s unit, unit %s is transient or generated.",
+ verb, changes[i].path);
+ logged = true;
+ break;
+ case -EUCLEAN:
+ log_error_errno(changes[i].type,
+ "Failed to %s unit, \"%s\" is not a valid unit name.",
+ verb, changes[i].path);
+ logged = true;
+ break;
+ case -ELOOP:
+ log_error_errno(changes[i].type, "Failed to %s unit, refusing to operate on linked unit file %s",
+ verb, changes[i].path);
+ logged = true;
+ break;
+
+ case -ENOENT:
+ log_error_errno(changes[i].type, "Failed to %s unit, unit %s does not exist.", verb, changes[i].path);
+ logged = true;
+ break;
+
+ default:
+ assert(changes[i].type < 0);
+ log_error_errno(changes[i].type, "Failed to %s unit, file %s: %m.",
+ verb, changes[i].path);
+ logged = true;
+ }
+ }
+
+ if (r < 0 && !logged)
+ log_error_errno(r, "Failed to %s: %m.", verb);
+}
+
+/**
+ * Checks if two paths or symlinks from wd are the same, when root is the root of the filesystem.
+ * wc should be the full path in the host file system.
+ */
+static bool chroot_symlinks_same(const char *root, const char *wd, const char *a, const char *b) {
+ assert(path_is_absolute(wd));
+
+ /* This will give incorrect results if the paths are relative and go outside
+ * of the chroot. False negatives are possible. */
+
+ if (!root)
+ root = "/";
+
+ a = strjoina(path_is_absolute(a) ? root : wd, "/", a);
+ b = strjoina(path_is_absolute(b) ? root : wd, "/", b);
+ return path_equal_or_files_same(a, b, 0);
+}
+
+static int create_symlink(
+ const LookupPaths *paths,
+ const char *old_path,
+ const char *new_path,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_free_ char *dest = NULL, *dirname = NULL;
+ const char *rp;
+ int r;
+
+ assert(old_path);
+ assert(new_path);
+
+ rp = skip_root(paths, old_path);
+ if (rp)
+ old_path = rp;
+
+ /* Actually create a symlink, and remember that we did. Is
+ * smart enough to check if there's already a valid symlink in
+ * place.
+ *
+ * Returns 1 if a symlink was created or already exists and points to
+ * the right place, or negative on error.
+ */
+
+ mkdir_parents_label(new_path, 0755);
+
+ if (symlink(old_path, new_path) >= 0) {
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_SYMLINK, new_path, old_path);
+ return 1;
+ }
+
+ if (errno != EEXIST) {
+ unit_file_changes_add(changes, n_changes, -errno, new_path, NULL);
+ return -errno;
+ }
+
+ r = readlink_malloc(new_path, &dest);
+ if (r < 0) {
+ /* translate EINVAL (non-symlink exists) to EEXIST */
+ if (r == -EINVAL)
+ r = -EEXIST;
+
+ unit_file_changes_add(changes, n_changes, r, new_path, NULL);
+ return r;
+ }
+
+ dirname = dirname_malloc(new_path);
+ if (!dirname)
+ return -ENOMEM;
+
+ if (chroot_symlinks_same(paths->root_dir, dirname, dest, old_path)) {
+ log_debug("Symlink %s → %s already exists", new_path, dest);
+ return 1;
+ }
+
+ if (!force) {
+ unit_file_changes_add(changes, n_changes, -EEXIST, new_path, dest);
+ return -EEXIST;
+ }
+
+ r = symlink_atomic(old_path, new_path);
+ if (r < 0) {
+ unit_file_changes_add(changes, n_changes, r, new_path, NULL);
+ return r;
+ }
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, new_path, NULL);
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_SYMLINK, new_path, old_path);
+
+ return 1;
+}
+
+static int mark_symlink_for_removal(
+ Set **remove_symlinks_to,
+ const char *p) {
+
+ char *n;
+ int r;
+
+ assert(p);
+
+ r = set_ensure_allocated(remove_symlinks_to, &path_hash_ops);
+ if (r < 0)
+ return r;
+
+ n = strdup(p);
+ if (!n)
+ return -ENOMEM;
+
+ path_simplify(n, false);
+
+ r = set_consume(*remove_symlinks_to, n);
+ if (r == -EEXIST)
+ return 0;
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int remove_marked_symlinks_fd(
+ Set *remove_symlinks_to,
+ int fd,
+ const char *path,
+ const char *config_path,
+ const LookupPaths *lp,
+ bool dry_run,
+ bool *restart,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+
+ assert(remove_symlinks_to);
+ assert(fd >= 0);
+ assert(path);
+ assert(config_path);
+ assert(lp);
+ assert(restart);
+
+ d = fdopendir(fd);
+ if (!d) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ rewinddir(d);
+
+ FOREACH_DIRENT(de, d, return -errno) {
+
+ dirent_ensure_type(d, de);
+
+ if (de->d_type == DT_DIR) {
+ _cleanup_free_ char *p = NULL;
+ int nfd, q;
+
+ nfd = openat(fd, de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
+ if (nfd < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ if (r == 0)
+ r = -errno;
+ continue;
+ }
+
+ p = path_make_absolute(de->d_name, path);
+ if (!p) {
+ safe_close(nfd);
+ return -ENOMEM;
+ }
+
+ /* This will close nfd, regardless whether it succeeds or not */
+ q = remove_marked_symlinks_fd(remove_symlinks_to, nfd, p, config_path, lp, dry_run, restart, changes, n_changes);
+ if (q < 0 && r == 0)
+ r = q;
+
+ } else if (de->d_type == DT_LNK) {
+ _cleanup_free_ char *p = NULL, *dest = NULL;
+ const char *rp;
+ bool found;
+ int q;
+
+ if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY))
+ continue;
+
+ p = path_make_absolute(de->d_name, path);
+ if (!p)
+ return -ENOMEM;
+ path_simplify(p, false);
+
+ q = chase_symlinks(p, NULL, CHASE_NONEXISTENT, &dest, NULL);
+ if (q == -ENOENT)
+ continue;
+ if (q < 0) {
+ if (r == 0)
+ r = q;
+ continue;
+ }
+
+ /* We remove all links pointing to a file or path that is marked, as well as all files sharing
+ * the same name as a file that is marked. */
+
+ found = set_contains(remove_symlinks_to, dest) ||
+ set_contains(remove_symlinks_to, basename(dest)) ||
+ set_contains(remove_symlinks_to, de->d_name);
+
+ if (!found)
+ continue;
+
+ if (!dry_run) {
+ if (unlinkat(fd, de->d_name, 0) < 0 && errno != ENOENT) {
+ if (r == 0)
+ r = -errno;
+ unit_file_changes_add(changes, n_changes, -errno, p, NULL);
+ continue;
+ }
+
+ (void) rmdir_parents(p, config_path);
+ }
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, p, NULL);
+
+ /* Now, remember the full path (but with the root prefix removed) of
+ * the symlink we just removed, and remove any symlinks to it, too. */
+
+ rp = skip_root(lp, p);
+ q = mark_symlink_for_removal(&remove_symlinks_to, rp ?: p);
+ if (q < 0)
+ return q;
+ if (q > 0 && !dry_run)
+ *restart = true;
+ }
+ }
+
+ return r;
+}
+
+static int remove_marked_symlinks(
+ Set *remove_symlinks_to,
+ const char *config_path,
+ const LookupPaths *lp,
+ bool dry_run,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_close_ int fd = -1;
+ bool restart;
+ int r = 0;
+
+ assert(config_path);
+ assert(lp);
+
+ if (set_size(remove_symlinks_to) <= 0)
+ return 0;
+
+ fd = open(config_path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC);
+ if (fd < 0)
+ return errno == ENOENT ? 0 : -errno;
+
+ do {
+ int q, cfd;
+ restart = false;
+
+ cfd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (cfd < 0)
+ return -errno;
+
+ /* This takes possession of cfd and closes it */
+ q = remove_marked_symlinks_fd(remove_symlinks_to, cfd, config_path, config_path, lp, dry_run, &restart, changes, n_changes);
+ if (r == 0)
+ r = q;
+ } while (restart);
+
+ return r;
+}
+
+static int is_symlink_with_known_name(const UnitFileInstallInfo *i, const char *name) {
+ int r;
+
+ if (streq(name, i->name))
+ return true;
+
+ if (strv_contains(i->aliases, name))
+ return true;
+
+ /* Look for template symlink matching DefaultInstance */
+ if (i->default_instance && unit_name_is_valid(i->name, UNIT_NAME_TEMPLATE)) {
+ _cleanup_free_ char *s = NULL;
+
+ r = unit_name_replace_instance(i->name, i->default_instance, &s);
+ if (r < 0) {
+ if (r != -EINVAL)
+ return r;
+
+ } else if (streq(name, s))
+ return true;
+ }
+
+ return false;
+}
+
+static int find_symlinks_fd(
+ const char *root_dir,
+ const UnitFileInstallInfo *i,
+ bool match_aliases,
+ bool ignore_same_name,
+ int fd,
+ const char *path,
+ const char *config_path,
+ bool *same_name_link) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+
+ assert(i);
+ assert(fd >= 0);
+ assert(path);
+ assert(config_path);
+ assert(same_name_link);
+
+ d = fdopendir(fd);
+ if (!d) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+
+ dirent_ensure_type(d, de);
+
+ if (de->d_type == DT_DIR) {
+ _cleanup_free_ char *p = NULL;
+ int nfd, q;
+
+ nfd = openat(fd, de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
+ if (nfd < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ if (r == 0)
+ r = -errno;
+ continue;
+ }
+
+ p = path_make_absolute(de->d_name, path);
+ if (!p) {
+ safe_close(nfd);
+ return -ENOMEM;
+ }
+
+ /* This will close nfd, regardless whether it succeeds or not */
+ q = find_symlinks_fd(root_dir, i, match_aliases, ignore_same_name, nfd,
+ p, config_path, same_name_link);
+ if (q > 0)
+ return 1;
+ if (r == 0)
+ r = q;
+
+ } else if (de->d_type == DT_LNK) {
+ _cleanup_free_ char *p = NULL, *dest = NULL;
+ bool found_path = false, found_dest, b = false;
+ int q;
+
+ /* Acquire symlink name */
+ p = path_make_absolute(de->d_name, path);
+ if (!p)
+ return -ENOMEM;
+
+ /* Acquire symlink destination */
+ q = readlink_malloc(p, &dest);
+ if (q == -ENOENT)
+ continue;
+ if (q < 0) {
+ if (r == 0)
+ r = q;
+ continue;
+ }
+
+ /* Make absolute */
+ if (!path_is_absolute(dest)) {
+ char *x;
+
+ x = path_join(root_dir, dest);
+ if (!x)
+ return -ENOMEM;
+
+ free_and_replace(dest, x);
+ }
+
+ assert(unit_name_is_valid(i->name, UNIT_NAME_ANY));
+ if (!ignore_same_name)
+ /* Check if the symlink itself matches what we are looking for.
+ *
+ * If ignore_same_name is specified, we are in one of the directories which
+ * have lower priority than the unit file, and even if a file or symlink with
+ * this name was found, we should ignore it. */
+ found_path = streq(de->d_name, i->name);
+
+ /* Check if what the symlink points to matches what we are looking for */
+ found_dest = streq(basename(dest), i->name);
+
+ if (found_path && found_dest) {
+ _cleanup_free_ char *t = NULL;
+
+ /* Filter out same name links in the main
+ * config path */
+ t = path_make_absolute(i->name, config_path);
+ if (!t)
+ return -ENOMEM;
+
+ b = path_equal(t, p);
+ }
+
+ if (b)
+ *same_name_link = true;
+ else if (found_path || found_dest) {
+ if (!match_aliases)
+ return 1;
+
+ /* Check if symlink name is in the set of names used by [Install] */
+ q = is_symlink_with_known_name(i, de->d_name);
+ if (q < 0)
+ return q;
+ if (q > 0)
+ return 1;
+ }
+ }
+ }
+
+ return r;
+}
+
+static int find_symlinks(
+ const char *root_dir,
+ const UnitFileInstallInfo *i,
+ bool match_name,
+ bool ignore_same_name,
+ const char *config_path,
+ bool *same_name_link) {
+
+ int fd;
+
+ assert(i);
+ assert(config_path);
+ assert(same_name_link);
+
+ fd = open(config_path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC);
+ if (fd < 0) {
+ if (IN_SET(errno, ENOENT, ENOTDIR, EACCES))
+ return 0;
+ return -errno;
+ }
+
+ /* This takes possession of fd and closes it */
+ return find_symlinks_fd(root_dir, i, match_name, ignore_same_name, fd,
+ config_path, config_path, same_name_link);
+}
+
+static int find_symlinks_in_scope(
+ UnitFileScope scope,
+ const LookupPaths *paths,
+ const UnitFileInstallInfo *i,
+ bool match_name,
+ UnitFileState *state) {
+
+ bool same_name_link_runtime = false, same_name_link_config = false;
+ bool enabled_in_runtime = false, enabled_at_all = false;
+ bool ignore_same_name = false;
+ char **p;
+ int r;
+
+ assert(paths);
+ assert(i);
+
+ /* As we iterate over the list of search paths in paths->search_path, we may encounter "same name"
+ * symlinks. The ones which are "below" (i.e. have lower priority) than the unit file itself are
+ * effectively masked, so we should ignore them. */
+
+ STRV_FOREACH(p, paths->search_path) {
+ bool same_name_link = false;
+
+ r = find_symlinks(paths->root_dir, i, match_name, ignore_same_name, *p, &same_name_link);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* We found symlinks in this dir? Yay! Let's see where precisely it is enabled. */
+
+ if (path_equal_ptr(*p, paths->persistent_config)) {
+ /* This is the best outcome, let's return it immediately. */
+ *state = UNIT_FILE_ENABLED;
+ return 1;
+ }
+
+ /* look for global enablement of user units */
+ if (scope == UNIT_FILE_USER && path_is_user_config_dir(*p)) {
+ *state = UNIT_FILE_ENABLED;
+ return 1;
+ }
+
+ r = path_is_runtime(paths, *p, false);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ enabled_in_runtime = true;
+ else
+ enabled_at_all = true;
+
+ } else if (same_name_link) {
+ if (path_equal_ptr(*p, paths->persistent_config))
+ same_name_link_config = true;
+ else {
+ r = path_is_runtime(paths, *p, false);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ same_name_link_runtime = true;
+ }
+ }
+
+ /* Check if next iteration will be "below" the unit file (either a regular file
+ * or a symlink), and hence should be ignored */
+ if (!ignore_same_name && path_startswith(i->path, *p))
+ ignore_same_name = true;
+ }
+
+ if (enabled_in_runtime) {
+ *state = UNIT_FILE_ENABLED_RUNTIME;
+ return 1;
+ }
+
+ /* Here's a special rule: if the unit we are looking for is an instance, and it symlinked in the search path
+ * outside of runtime and configuration directory, then we consider it statically enabled. Note we do that only
+ * for instance, not for regular names, as those are merely aliases, while instances explicitly instantiate
+ * something, and hence are a much stronger concept. */
+ if (enabled_at_all && unit_name_is_valid(i->name, UNIT_NAME_INSTANCE)) {
+ *state = UNIT_FILE_STATIC;
+ return 1;
+ }
+
+ /* Hmm, we didn't find it, but maybe we found the same name
+ * link? */
+ if (same_name_link_config) {
+ *state = UNIT_FILE_LINKED;
+ return 1;
+ }
+ if (same_name_link_runtime) {
+ *state = UNIT_FILE_LINKED_RUNTIME;
+ return 1;
+ }
+
+ return 0;
+}
+
+static void install_info_free(UnitFileInstallInfo *i) {
+
+ if (!i)
+ return;
+
+ free(i->name);
+ free(i->path);
+ strv_free(i->aliases);
+ strv_free(i->wanted_by);
+ strv_free(i->required_by);
+ strv_free(i->also);
+ free(i->default_instance);
+ free(i->symlink_target);
+ free(i);
+}
+
+static void install_context_done(InstallContext *c) {
+ assert(c);
+
+ c->will_process = ordered_hashmap_free_with_destructor(c->will_process, install_info_free);
+ c->have_processed = ordered_hashmap_free_with_destructor(c->have_processed, install_info_free);
+}
+
+static UnitFileInstallInfo *install_info_find(InstallContext *c, const char *name) {
+ UnitFileInstallInfo *i;
+
+ i = ordered_hashmap_get(c->have_processed, name);
+ if (i)
+ return i;
+
+ return ordered_hashmap_get(c->will_process, name);
+}
+
+static int install_info_may_process(
+ const UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+ assert(i);
+ assert(paths);
+
+ /* Checks whether the loaded unit file is one we should process, or is masked,
+ * transient or generated and thus not subject to enable/disable operations. */
+
+ if (i->type == UNIT_FILE_TYPE_MASKED) {
+ unit_file_changes_add(changes, n_changes, -ERFKILL, i->path, NULL);
+ return -ERFKILL;
+ }
+ if (path_is_generator(paths, i->path) ||
+ path_is_transient(paths, i->path)) {
+ unit_file_changes_add(changes, n_changes, -EADDRNOTAVAIL, i->path, NULL);
+ return -EADDRNOTAVAIL;
+ }
+
+ return 0;
+}
+
+/**
+ * Adds a new UnitFileInstallInfo entry under name in the InstallContext.will_process
+ * hashmap, or retrieves the existing one if already present.
+ *
+ * Returns negative on error, 0 if the unit was already known, 1 otherwise.
+ */
+static int install_info_add(
+ InstallContext *c,
+ const char *name,
+ const char *path,
+ bool auxiliary,
+ UnitFileInstallInfo **ret) {
+
+ UnitFileInstallInfo *i = NULL;
+ int r;
+
+ assert(c);
+
+ if (!name) {
+ /* 'name' and 'path' must not both be null. Check here 'path' using assert_se() to
+ * workaround a bug in gcc that generates a -Wnonnull warning when calling basename(),
+ * but this cannot be possible in any code path (See #6119). */
+ assert_se(path);
+ name = basename(path);
+ }
+
+ if (!unit_name_is_valid(name, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ i = install_info_find(c, name);
+ if (i) {
+ i->auxiliary = i->auxiliary && auxiliary;
+
+ if (ret)
+ *ret = i;
+ return 0;
+ }
+
+ r = ordered_hashmap_ensure_allocated(&c->will_process, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ i = new(UnitFileInstallInfo, 1);
+ if (!i)
+ return -ENOMEM;
+
+ *i = (UnitFileInstallInfo) {
+ .type = _UNIT_FILE_TYPE_INVALID,
+ .auxiliary = auxiliary,
+ };
+
+ i->name = strdup(name);
+ if (!i->name) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ if (path) {
+ i->path = strdup(path);
+ if (!i->path) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ r = ordered_hashmap_put(c->will_process, i->name, i);
+ if (r < 0)
+ goto fail;
+
+ if (ret)
+ *ret = i;
+
+ return 1;
+
+fail:
+ install_info_free(i);
+ return r;
+}
+
+static int config_parse_alias(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ UnitType type;
+
+ assert(unit);
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ type = unit_name_to_type(unit);
+ if (!unit_type_may_alias(type))
+ return log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Alias= is not allowed for %s units, ignoring.",
+ unit_type_to_string(type));
+
+ return config_parse_strv(unit, filename, line, section, section_line,
+ lvalue, ltype, rvalue, data, userdata);
+}
+
+static int config_parse_also(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ UnitFileInstallInfo *info = userdata;
+ InstallContext *c = data;
+ int r;
+
+ assert(unit);
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *printed = NULL;
+
+ r = extract_first_word(&rvalue, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = install_full_printf(info, word, &printed);
+ if (r < 0)
+ return r;
+
+ r = install_info_add(c, printed, NULL, true, NULL);
+ if (r < 0)
+ return r;
+
+ r = strv_push(&info->also, printed);
+ if (r < 0)
+ return r;
+
+ printed = NULL;
+ }
+
+ return 0;
+}
+
+static int config_parse_default_instance(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ UnitFileInstallInfo *i = data;
+ _cleanup_free_ char *printed = NULL;
+ int r;
+
+ assert(unit);
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (unit_name_is_valid(unit, UNIT_NAME_INSTANCE))
+ /* When enabling an instance, we might be using a template unit file,
+ * but we should ignore DefaultInstance silently. */
+ return 0;
+ if (!unit_name_is_valid(unit, UNIT_NAME_TEMPLATE))
+ return log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "DefaultInstance= only makes sense for template units, ignoring.");
+
+ r = install_full_printf(i, rvalue, &printed);
+ if (r < 0)
+ return r;
+
+ if (isempty(printed)) {
+ i->default_instance = mfree(i->default_instance);
+ return 0;
+ }
+
+ if (!unit_instance_is_valid(printed))
+ return log_syntax(unit, LOG_WARNING, filename, line, SYNTHETIC_ERRNO(EINVAL),
+ "Invalid DefaultInstance= value \"%s\".", printed);
+
+ return free_and_replace(i->default_instance, printed);
+}
+
+static int unit_file_load(
+ InstallContext *c,
+ UnitFileInstallInfo *info,
+ const char *path,
+ const char *root_dir,
+ SearchFlags flags) {
+
+ const ConfigTableItem items[] = {
+ { "Install", "Alias", config_parse_alias, 0, &info->aliases },
+ { "Install", "WantedBy", config_parse_strv, 0, &info->wanted_by },
+ { "Install", "RequiredBy", config_parse_strv, 0, &info->required_by },
+ { "Install", "DefaultInstance", config_parse_default_instance, 0, info },
+ { "Install", "Also", config_parse_also, 0, c },
+ {}
+ };
+
+ UnitType type;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+ int r;
+
+ assert(info);
+ assert(path);
+
+ if (!(flags & SEARCH_DROPIN)) {
+ /* Loading or checking for the main unit file… */
+
+ type = unit_name_to_type(info->name);
+ if (type < 0)
+ return -EINVAL;
+ if (unit_name_is_valid(info->name, UNIT_NAME_TEMPLATE|UNIT_NAME_INSTANCE) && !unit_type_may_template(type))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: unit type %s cannot be templated, ignoring.", path, unit_type_to_string(type));
+
+ if (!(flags & SEARCH_LOAD)) {
+ if (lstat(path, &st) < 0)
+ return -errno;
+
+ if (null_or_empty(&st))
+ info->type = UNIT_FILE_TYPE_MASKED;
+ else if (S_ISREG(st.st_mode))
+ info->type = UNIT_FILE_TYPE_REGULAR;
+ else if (S_ISLNK(st.st_mode))
+ return -ELOOP;
+ else if (S_ISDIR(st.st_mode))
+ return -EISDIR;
+ else
+ return -ENOTTY;
+
+ return 0;
+ }
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+ } else {
+ /* Operating on a drop-in file. If we aren't supposed to load the unit file drop-ins don't matter, let's hence shortcut this. */
+
+ if (!(flags & SEARCH_LOAD))
+ return 0;
+
+ fd = chase_symlinks_and_open(path, root_dir, 0, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL);
+ if (fd < 0)
+ return fd;
+ }
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (null_or_empty(&st)) {
+ if ((flags & SEARCH_DROPIN) == 0)
+ info->type = UNIT_FILE_TYPE_MASKED;
+
+ return 0;
+ }
+
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ return r;
+
+ f = take_fdopen(&fd, "r");
+ if (!f)
+ return -errno;
+
+ /* c is only needed if we actually load the file (it's referenced from items[] btw, in case you wonder.) */
+ assert(c);
+
+ r = config_parse(info->name, path, f,
+ "Install\0"
+ "-Unit\0"
+ "-Automount\0"
+ "-Device\0"
+ "-Mount\0"
+ "-Path\0"
+ "-Scope\0"
+ "-Service\0"
+ "-Slice\0"
+ "-Socket\0"
+ "-Swap\0"
+ "-Target\0"
+ "-Timer\0",
+ config_item_table_lookup, items,
+ 0, info,
+ NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse %s: %m", info->name);
+
+ if ((flags & SEARCH_DROPIN) == 0)
+ info->type = UNIT_FILE_TYPE_REGULAR;
+
+ return
+ (int) strv_length(info->aliases) +
+ (int) strv_length(info->wanted_by) +
+ (int) strv_length(info->required_by);
+}
+
+static int unit_file_load_or_readlink(
+ InstallContext *c,
+ UnitFileInstallInfo *info,
+ const char *path,
+ const char *root_dir,
+ SearchFlags flags) {
+ _cleanup_free_ char *resolved = NULL;
+ struct stat st;
+ int r;
+
+ r = unit_file_load(c, info, path, root_dir, flags);
+ if (r != -ELOOP || (flags & SEARCH_DROPIN))
+ return r;
+
+ r = chase_symlinks(path, root_dir, CHASE_WARN | CHASE_NONEXISTENT, &resolved, NULL);
+ if (r >= 0 &&
+ root_dir &&
+ path_equal_ptr(path_startswith(resolved, root_dir), "dev/null"))
+ /* When looking under root_dir, we can't expect /dev/ to be mounted,
+ * so let's see if the path is a (possibly dangling) symlink to /dev/null. */
+ info->type = UNIT_FILE_TYPE_MASKED;
+
+ else if (r > 0 &&
+ stat(resolved, &st) >= 0 &&
+ null_or_empty(&st))
+
+ info->type = UNIT_FILE_TYPE_MASKED;
+
+ else {
+ _cleanup_free_ char *target = NULL;
+ const char *bn;
+ UnitType a, b;
+
+ /* This is a symlink, let's read it. We read the link again, because last time
+ * we followed the link until resolution, and here we need to do one step. */
+
+ r = readlink_malloc(path, &target);
+ if (r < 0)
+ return r;
+
+ bn = basename(target);
+
+ if (unit_name_is_valid(info->name, UNIT_NAME_PLAIN)) {
+
+ if (!unit_name_is_valid(bn, UNIT_NAME_PLAIN))
+ return -EINVAL;
+
+ } else if (unit_name_is_valid(info->name, UNIT_NAME_INSTANCE)) {
+
+ if (!unit_name_is_valid(bn, UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE))
+ return -EINVAL;
+
+ } else if (unit_name_is_valid(info->name, UNIT_NAME_TEMPLATE)) {
+
+ if (!unit_name_is_valid(bn, UNIT_NAME_TEMPLATE))
+ return -EINVAL;
+ } else
+ return -EINVAL;
+
+ /* Enforce that the symlink destination does not
+ * change the unit file type. */
+
+ a = unit_name_to_type(info->name);
+ b = unit_name_to_type(bn);
+ if (a < 0 || b < 0 || a != b)
+ return -EINVAL;
+
+ if (path_is_absolute(target))
+ /* This is an absolute path, prefix the root so that we always deal with fully qualified paths */
+ info->symlink_target = path_join(root_dir, target);
+ else
+ /* This is a relative path, take it relative to the dir the symlink is located in. */
+ info->symlink_target = file_in_same_dir(path, target);
+ if (!info->symlink_target)
+ return -ENOMEM;
+
+ info->type = UNIT_FILE_TYPE_SYMLINK;
+ }
+
+ return 0;
+}
+
+static int unit_file_search(
+ InstallContext *c,
+ UnitFileInstallInfo *info,
+ const LookupPaths *paths,
+ SearchFlags flags) {
+
+ const char *dropin_dir_name = NULL, *dropin_template_dir_name = NULL;
+ _cleanup_strv_free_ char **dirs = NULL, **files = NULL;
+ _cleanup_free_ char *template = NULL;
+ bool found_unit = false;
+ int r, result;
+ char **p;
+
+ assert(info);
+ assert(paths);
+
+ /* Was this unit already loaded? */
+ if (info->type != _UNIT_FILE_TYPE_INVALID)
+ return 0;
+
+ if (info->path)
+ return unit_file_load_or_readlink(c, info, info->path, paths->root_dir, flags);
+
+ assert(info->name);
+
+ if (unit_name_is_valid(info->name, UNIT_NAME_INSTANCE)) {
+ r = unit_name_template(info->name, &template);
+ if (r < 0)
+ return r;
+ }
+
+ STRV_FOREACH(p, paths->search_path) {
+ _cleanup_free_ char *path = NULL;
+
+ path = path_join(*p, info->name);
+ if (!path)
+ return -ENOMEM;
+
+ r = unit_file_load_or_readlink(c, info, path, paths->root_dir, flags);
+ if (r >= 0) {
+ info->path = TAKE_PTR(path);
+ result = r;
+ found_unit = true;
+ break;
+ } else if (!IN_SET(r, -ENOENT, -ENOTDIR, -EACCES))
+ return r;
+ }
+
+ if (!found_unit && template) {
+
+ /* Unit file doesn't exist, however instance
+ * enablement was requested. We will check if it is
+ * possible to load template unit file. */
+
+ STRV_FOREACH(p, paths->search_path) {
+ _cleanup_free_ char *path = NULL;
+
+ path = path_join(*p, template);
+ if (!path)
+ return -ENOMEM;
+
+ r = unit_file_load_or_readlink(c, info, path, paths->root_dir, flags);
+ if (r >= 0) {
+ info->path = TAKE_PTR(path);
+ result = r;
+ found_unit = true;
+ break;
+ } else if (!IN_SET(r, -ENOENT, -ENOTDIR, -EACCES))
+ return r;
+ }
+ }
+
+ if (!found_unit)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
+ "Cannot find unit %s%s%s.",
+ info->name, template ? " or " : "", strempty(template));
+
+ if (info->type == UNIT_FILE_TYPE_MASKED)
+ return result;
+
+ /* Search for drop-in directories */
+
+ dropin_dir_name = strjoina(info->name, ".d");
+ STRV_FOREACH(p, paths->search_path) {
+ char *path;
+
+ path = path_join(*p, dropin_dir_name);
+ if (!path)
+ return -ENOMEM;
+
+ r = strv_consume(&dirs, path);
+ if (r < 0)
+ return r;
+ }
+
+ if (template) {
+ dropin_template_dir_name = strjoina(template, ".d");
+ STRV_FOREACH(p, paths->search_path) {
+ char *path;
+
+ path = path_join(*p, dropin_template_dir_name);
+ if (!path)
+ return -ENOMEM;
+
+ r = strv_consume(&dirs, path);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* Load drop-in conf files */
+
+ r = conf_files_list_strv(&files, ".conf", NULL, 0, (const char**) dirs);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get list of conf files: %m");
+
+ STRV_FOREACH(p, files) {
+ r = unit_file_load_or_readlink(c, info, *p, paths->root_dir, flags | SEARCH_DROPIN);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to load conf file %s: %m", *p);
+ }
+
+ return result;
+}
+
+static int install_info_follow(
+ InstallContext *c,
+ UnitFileInstallInfo *i,
+ const char *root_dir,
+ SearchFlags flags,
+ bool ignore_different_name) {
+
+ assert(c);
+ assert(i);
+
+ if (i->type != UNIT_FILE_TYPE_SYMLINK)
+ return -EINVAL;
+ if (!i->symlink_target)
+ return -EINVAL;
+
+ /* If the basename doesn't match, the caller should add a
+ * complete new entry for this. */
+
+ if (!ignore_different_name && !streq(basename(i->symlink_target), i->name))
+ return -EXDEV;
+
+ free_and_replace(i->path, i->symlink_target);
+ i->type = _UNIT_FILE_TYPE_INVALID;
+
+ return unit_file_load_or_readlink(c, i, i->path, root_dir, flags);
+}
+
+/**
+ * Search for the unit file. If the unit name is a symlink, follow the symlink to the
+ * target, maybe more than once. Propagate the instance name if present.
+ */
+static int install_info_traverse(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ UnitFileInstallInfo *start,
+ SearchFlags flags,
+ UnitFileInstallInfo **ret) {
+
+ UnitFileInstallInfo *i;
+ unsigned k = 0;
+ int r;
+
+ assert(paths);
+ assert(start);
+ assert(c);
+
+ r = unit_file_search(c, start, paths, flags);
+ if (r < 0)
+ return r;
+
+ i = start;
+ while (i->type == UNIT_FILE_TYPE_SYMLINK) {
+ /* Follow the symlink */
+
+ if (++k > UNIT_FILE_FOLLOW_SYMLINK_MAX)
+ return -ELOOP;
+
+ if (!(flags & SEARCH_FOLLOW_CONFIG_SYMLINKS)) {
+ r = path_is_config(paths, i->path, true);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return -ELOOP;
+ }
+
+ r = install_info_follow(c, i, paths->root_dir, flags, false);
+ if (r == -EXDEV) {
+ _cleanup_free_ char *buffer = NULL;
+ const char *bn;
+
+ /* Target has a different name, create a new
+ * install info object for that, and continue
+ * with that. */
+
+ bn = basename(i->symlink_target);
+
+ if (unit_name_is_valid(i->name, UNIT_NAME_INSTANCE) &&
+ unit_name_is_valid(bn, UNIT_NAME_TEMPLATE)) {
+
+ _cleanup_free_ char *instance = NULL;
+
+ r = unit_name_to_instance(i->name, &instance);
+ if (r < 0)
+ return r;
+
+ r = unit_name_replace_instance(bn, instance, &buffer);
+ if (r < 0)
+ return r;
+
+ if (streq(buffer, i->name)) {
+
+ /* We filled in the instance, and the target stayed the same? If so, then let's
+ * honour the link as it is. */
+
+ r = install_info_follow(c, i, paths->root_dir, flags, true);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ bn = buffer;
+ }
+
+ r = install_info_add(c, bn, NULL, false, &i);
+ if (r < 0)
+ return r;
+
+ /* Try again, with the new target we found. */
+ r = unit_file_search(c, i, paths, flags);
+ if (r == -ENOENT)
+ /* Translate error code to highlight this specific case */
+ return -ENOLINK;
+ }
+
+ if (r < 0)
+ return r;
+ }
+
+ if (ret)
+ *ret = i;
+
+ return 0;
+}
+
+/**
+ * Call install_info_add() with name_or_path as the path (if name_or_path starts with "/")
+ * or the name (otherwise). root_dir is prepended to the path.
+ */
+static int install_info_add_auto(
+ InstallContext *c,
+ const LookupPaths *paths,
+ const char *name_or_path,
+ UnitFileInstallInfo **ret) {
+
+ assert(c);
+ assert(name_or_path);
+
+ if (path_is_absolute(name_or_path)) {
+ const char *pp;
+
+ pp = prefix_roota(paths->root_dir, name_or_path);
+
+ return install_info_add(c, NULL, pp, false, ret);
+ } else
+ return install_info_add(c, name_or_path, NULL, false, ret);
+}
+
+static int install_info_discover(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ const char *name,
+ SearchFlags flags,
+ UnitFileInstallInfo **ret,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ UnitFileInstallInfo *i;
+ int r;
+
+ assert(c);
+ assert(paths);
+ assert(name);
+
+ r = install_info_add_auto(c, paths, name, &i);
+ if (r >= 0)
+ r = install_info_traverse(scope, c, paths, i, flags, ret);
+
+ if (r < 0)
+ unit_file_changes_add(changes, n_changes, r, name, NULL);
+ return r;
+}
+
+static int install_info_discover_and_check(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ const char *name,
+ SearchFlags flags,
+ UnitFileInstallInfo **ret,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ int r;
+
+ r = install_info_discover(scope, c, paths, name, flags, ret, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ return install_info_may_process(ret ? *ret : NULL, paths, changes, n_changes);
+}
+
+int unit_file_verify_alias(const UnitFileInstallInfo *i, const char *dst, char **ret_dst) {
+ _cleanup_free_ char *dst_updated = NULL;
+ int r;
+
+ /* Verify that dst is a valid either a valid alias or a valid .wants/.requires symlink for the target
+ * unit *i. Return negative on error or if not compatible, zero on success.
+ *
+ * ret_dst is set in cases where "instance propagation" happens, i.e. when the instance part is
+ * inserted into dst. It is not normally set, even on success, so that the caller can easily
+ * distinguish the case where instance propagation occurred.
+ */
+
+ const char *path_alias = strrchr(dst, '/');
+ if (path_alias) {
+ /* This branch covers legacy Alias= function of creating .wants and .requires symlinks. */
+ _cleanup_free_ char *dir = NULL;
+ char *p;
+
+ path_alias ++; /* skip over slash */
+
+ dir = dirname_malloc(dst);
+ if (!dir)
+ return log_oom();
+
+ p = endswith(dir, ".wants");
+ if (!p)
+ p = endswith(dir, ".requires");
+ if (!p)
+ return log_warning_errno(SYNTHETIC_ERRNO(EXDEV),
+ "Invalid path \"%s\" in alias.", dir);
+ *p = '\0'; /* dir should now be a unit name */
+
+ r = unit_name_classify(dir);
+ if (r < 0)
+ return log_warning_errno(SYNTHETIC_ERRNO(EXDEV),
+ "Invalid unit name component \"%s\" in alias.", dir);
+
+ const bool instance_propagation = r == UNIT_NAME_TEMPLATE;
+
+ /* That's the name we want to use for verification. */
+ r = unit_symlink_name_compatible(path_alias, i->name, instance_propagation);
+ if (r < 0)
+ return log_error_errno(r, "Failed to verify alias validity: %m");
+ if (r == 0)
+ return log_warning_errno(SYNTHETIC_ERRNO(EXDEV),
+ "Invalid unit %s symlink %s.",
+ i->name, dst);
+
+ } else {
+ /* If the symlink target has an instance set and the symlink source doesn't, we "propagate
+ * the instance", i.e. instantiate the symlink source with the target instance. */
+ if (unit_name_is_valid(dst, UNIT_NAME_TEMPLATE)) {
+ _cleanup_free_ char *inst = NULL;
+
+ r = unit_name_to_instance(i->name, &inst);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extract instance name from %s: %m", i->name);
+
+ if (r == UNIT_NAME_INSTANCE) {
+ r = unit_name_replace_instance(dst, inst, &dst_updated);
+ if (r < 0)
+ return log_error_errno(r, "Failed to build unit name from %s+%s: %m",
+ dst, inst);
+ }
+ }
+
+ r = unit_validate_alias_symlink_and_warn(dst_updated ?: dst, i->name);
+ if (r < 0)
+ return r;
+
+ }
+
+ *ret_dst = TAKE_PTR(dst_updated);
+ return 0;
+}
+
+static int install_info_symlink_alias(
+ UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ const char *config_path,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ char **s;
+ int r = 0, q;
+
+ assert(i);
+ assert(paths);
+ assert(config_path);
+
+ STRV_FOREACH(s, i->aliases) {
+ _cleanup_free_ char *alias_path = NULL, *dst = NULL, *dst_updated = NULL;
+
+ q = install_full_printf(i, *s, &dst);
+ if (q < 0)
+ return q;
+
+ q = unit_file_verify_alias(i, dst, &dst_updated);
+ if (q < 0)
+ continue;
+
+ alias_path = path_make_absolute(dst_updated ?: dst, config_path);
+ if (!alias_path)
+ return -ENOMEM;
+
+ q = create_symlink(paths, i->path, alias_path, force, changes, n_changes);
+ if (r == 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int install_info_symlink_wants(
+ UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ const char *config_path,
+ char **list,
+ const char *suffix,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_free_ char *buf = NULL;
+ const char *n;
+ char **s;
+ int r = 0, q;
+
+ assert(i);
+ assert(paths);
+ assert(config_path);
+
+ if (strv_isempty(list))
+ return 0;
+
+ if (unit_name_is_valid(i->name, UNIT_NAME_TEMPLATE)) {
+ UnitFileInstallInfo instance = {
+ .type = _UNIT_FILE_TYPE_INVALID,
+ };
+ _cleanup_free_ char *path = NULL;
+
+ /* If this is a template, and we have no instance, don't do anything */
+ if (!i->default_instance)
+ return 1;
+
+ r = unit_name_replace_instance(i->name, i->default_instance, &buf);
+ if (r < 0)
+ return r;
+
+ instance.name = buf;
+ r = unit_file_search(NULL, &instance, paths, SEARCH_FOLLOW_CONFIG_SYMLINKS);
+ if (r < 0)
+ return r;
+
+ path = TAKE_PTR(instance.path);
+
+ if (instance.type == UNIT_FILE_TYPE_MASKED) {
+ unit_file_changes_add(changes, n_changes, -ERFKILL, path, NULL);
+ return -ERFKILL;
+ }
+
+ n = buf;
+ } else
+ n = i->name;
+
+ STRV_FOREACH(s, list) {
+ _cleanup_free_ char *path = NULL, *dst = NULL;
+
+ q = install_full_printf(i, *s, &dst);
+ if (q < 0)
+ return q;
+
+ if (!unit_name_is_valid(dst, UNIT_NAME_ANY)) {
+ unit_file_changes_add(changes, n_changes, -EUCLEAN, dst, NULL);
+ r = -EUCLEAN;
+ continue;
+ }
+
+ path = strjoin(config_path, "/", dst, suffix, n);
+ if (!path)
+ return -ENOMEM;
+
+ q = create_symlink(paths, i->path, path, true, changes, n_changes);
+ if (r == 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int install_info_symlink_link(
+ UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ const char *config_path,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ assert(i);
+ assert(paths);
+ assert(config_path);
+ assert(i->path);
+
+ r = in_search_path(paths, i->path);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ path = path_join(config_path, i->name);
+ if (!path)
+ return -ENOMEM;
+
+ return create_symlink(paths, i->path, path, force, changes, n_changes);
+}
+
+static int install_info_apply(
+ UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ const char *config_path,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ int r, q;
+
+ assert(i);
+ assert(paths);
+ assert(config_path);
+
+ if (i->type != UNIT_FILE_TYPE_REGULAR)
+ return 0;
+
+ r = install_info_symlink_alias(i, paths, config_path, force, changes, n_changes);
+
+ q = install_info_symlink_wants(i, paths, config_path, i->wanted_by, ".wants/", changes, n_changes);
+ if (r == 0)
+ r = q;
+
+ q = install_info_symlink_wants(i, paths, config_path, i->required_by, ".requires/", changes, n_changes);
+ if (r == 0)
+ r = q;
+
+ q = install_info_symlink_link(i, paths, config_path, force, changes, n_changes);
+ /* Do not count links to the unit file towards the "carries_install_info" count */
+ if (r == 0 && q < 0)
+ r = q;
+
+ return r;
+}
+
+static int install_context_apply(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ const char *config_path,
+ bool force,
+ SearchFlags flags,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ UnitFileInstallInfo *i;
+ int r;
+
+ assert(c);
+ assert(paths);
+ assert(config_path);
+
+ if (ordered_hashmap_isempty(c->will_process))
+ return 0;
+
+ r = ordered_hashmap_ensure_allocated(&c->have_processed, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = 0;
+ while ((i = ordered_hashmap_first(c->will_process))) {
+ int q;
+
+ q = ordered_hashmap_move_one(c->have_processed, c->will_process, i->name);
+ if (q < 0)
+ return q;
+
+ q = install_info_traverse(scope, c, paths, i, flags, NULL);
+ if (q < 0) {
+ unit_file_changes_add(changes, n_changes, q, i->name, NULL);
+ return q;
+ }
+
+ /* We can attempt to process a masked unit when a different unit
+ * that we were processing specifies it in Also=. */
+ if (i->type == UNIT_FILE_TYPE_MASKED) {
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_IS_MASKED, i->path, NULL);
+ if (r >= 0)
+ /* Assume that something *could* have been enabled here,
+ * avoid "empty [Install] section" warning. */
+ r += 1;
+ continue;
+ }
+
+ if (i->type != UNIT_FILE_TYPE_REGULAR)
+ continue;
+
+ q = install_info_apply(i, paths, config_path, force, changes, n_changes);
+ if (r >= 0) {
+ if (q < 0)
+ r = q;
+ else
+ r += q;
+ }
+ }
+
+ return r;
+}
+
+static int install_context_mark_for_removal(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ Set **remove_symlinks_to,
+ const char *config_path,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ UnitFileInstallInfo *i;
+ int r;
+
+ assert(c);
+ assert(paths);
+ assert(config_path);
+
+ /* Marks all items for removal */
+
+ if (ordered_hashmap_isempty(c->will_process))
+ return 0;
+
+ r = ordered_hashmap_ensure_allocated(&c->have_processed, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ while ((i = ordered_hashmap_first(c->will_process))) {
+
+ r = ordered_hashmap_move_one(c->have_processed, c->will_process, i->name);
+ if (r < 0)
+ return r;
+
+ r = install_info_traverse(scope, c, paths, i, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS, NULL);
+ if (r == -ENOLINK) {
+ log_debug_errno(r, "Name %s leads to a dangling symlink, removing name.", i->name);
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_IS_DANGLING, i->path ?: i->name, NULL);
+ } else if (r == -ENOENT) {
+
+ if (i->auxiliary) /* some unit specified in Also= or similar is missing */
+ log_debug_errno(r, "Auxiliary unit of %s not found, removing name.", i->name);
+ else {
+ log_debug_errno(r, "Unit %s not found, removing name.", i->name);
+ unit_file_changes_add(changes, n_changes, r, i->path ?: i->name, NULL);
+ }
+
+ } else if (r < 0) {
+ log_debug_errno(r, "Failed to find unit %s, removing name: %m", i->name);
+ unit_file_changes_add(changes, n_changes, r, i->path ?: i->name, NULL);
+ } else if (i->type == UNIT_FILE_TYPE_MASKED) {
+ log_debug("Unit file %s is masked, ignoring.", i->name);
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_IS_MASKED, i->path ?: i->name, NULL);
+ continue;
+ } else if (i->type != UNIT_FILE_TYPE_REGULAR) {
+ log_debug("Unit %s has type %s, ignoring.", i->name, unit_file_type_to_string(i->type) ?: "invalid");
+ continue;
+ }
+
+ r = mark_symlink_for_removal(remove_symlinks_to, i->name);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int unit_file_mask(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ const char *config_path;
+ char **i;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ STRV_FOREACH(i, files) {
+ _cleanup_free_ char *path = NULL;
+ int q;
+
+ if (!unit_name_is_valid(*i, UNIT_NAME_ANY)) {
+ if (r == 0)
+ r = -EINVAL;
+ continue;
+ }
+
+ path = path_make_absolute(*i, config_path);
+ if (!path)
+ return -ENOMEM;
+
+ q = create_symlink(&paths, "/dev/null", path, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ return r;
+}
+
+int unit_file_unmask(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_set_free_free_ Set *remove_symlinks_to = NULL;
+ _cleanup_strv_free_ char **todo = NULL;
+ size_t n_todo = 0, n_allocated = 0;
+ const char *config_path;
+ char **i;
+ bool dry_run;
+ int r, q;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ dry_run = !!(flags & UNIT_FILE_DRY_RUN);
+
+ STRV_FOREACH(i, files) {
+ _cleanup_free_ char *path = NULL;
+
+ if (!unit_name_is_valid(*i, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ path = path_make_absolute(*i, config_path);
+ if (!path)
+ return -ENOMEM;
+
+ r = null_or_empty_path(path);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2))
+ return -ENOMEM;
+
+ todo[n_todo] = strdup(*i);
+ if (!todo[n_todo])
+ return -ENOMEM;
+
+ n_todo++;
+ }
+
+ strv_uniq(todo);
+
+ r = 0;
+ STRV_FOREACH(i, todo) {
+ _cleanup_free_ char *path = NULL;
+ const char *rp;
+
+ path = path_make_absolute(*i, config_path);
+ if (!path)
+ return -ENOMEM;
+
+ if (!dry_run && unlink(path) < 0) {
+ if (errno != ENOENT) {
+ if (r >= 0)
+ r = -errno;
+ unit_file_changes_add(changes, n_changes, -errno, path, NULL);
+ }
+
+ continue;
+ }
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, path, NULL);
+
+ rp = skip_root(&paths, path);
+ q = mark_symlink_for_removal(&remove_symlinks_to, rp ?: path);
+ if (q < 0)
+ return q;
+ }
+
+ q = remove_marked_symlinks(remove_symlinks_to, config_path, &paths, dry_run, changes, n_changes);
+ if (r >= 0)
+ r = q;
+
+ return r;
+}
+
+int unit_file_link(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_strv_free_ char **todo = NULL;
+ size_t n_todo = 0, n_allocated = 0;
+ const char *config_path;
+ char **i;
+ int r, q;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ STRV_FOREACH(i, files) {
+ _cleanup_free_ char *full = NULL;
+ struct stat st;
+ char *fn;
+
+ if (!path_is_absolute(*i))
+ return -EINVAL;
+
+ fn = basename(*i);
+ if (!unit_name_is_valid(fn, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ full = path_join(paths.root_dir, *i);
+ if (!full)
+ return -ENOMEM;
+
+ if (lstat(full, &st) < 0)
+ return -errno;
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ return r;
+
+ q = in_search_path(&paths, *i);
+ if (q < 0)
+ return q;
+ if (q > 0)
+ continue;
+
+ if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2))
+ return -ENOMEM;
+
+ todo[n_todo] = strdup(*i);
+ if (!todo[n_todo])
+ return -ENOMEM;
+
+ n_todo++;
+ }
+
+ strv_uniq(todo);
+
+ r = 0;
+ STRV_FOREACH(i, todo) {
+ _cleanup_free_ char *new_path = NULL;
+
+ new_path = path_make_absolute(basename(*i), config_path);
+ if (!new_path)
+ return -ENOMEM;
+
+ q = create_symlink(&paths, *i, new_path, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int path_shall_revert(const LookupPaths *paths, const char *path) {
+ int r;
+
+ assert(paths);
+ assert(path);
+
+ /* Checks whether the path is one where the drop-in directories shall be removed. */
+
+ r = path_is_config(paths, path, true);
+ if (r != 0)
+ return r;
+
+ r = path_is_control(paths, path);
+ if (r != 0)
+ return r;
+
+ return path_is_transient(paths, path);
+}
+
+int unit_file_revert(
+ UnitFileScope scope,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_set_free_free_ Set *remove_symlinks_to = NULL;
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_strv_free_ char **todo = NULL;
+ size_t n_todo = 0, n_allocated = 0;
+ char **i;
+ int r, q;
+
+ /* Puts a unit file back into vendor state. This means:
+ *
+ * a) we remove all drop-in snippets added by the user ("config"), add to transient units ("transient"), and
+ * added via "systemctl set-property" ("control"), but not if the drop-in is generated ("generated").
+ *
+ * c) if there's a vendor unit file (i.e. one in /usr) we remove any configured overriding unit files (i.e. in
+ * "config", but not in "transient" or "control" or even "generated").
+ *
+ * We remove all that in both the runtime and the persistent directories, if that applies.
+ */
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, files) {
+ bool has_vendor = false;
+ char **p;
+
+ if (!unit_name_is_valid(*i, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ STRV_FOREACH(p, paths.search_path) {
+ _cleanup_free_ char *path = NULL, *dropin = NULL;
+ struct stat st;
+
+ path = path_make_absolute(*i, *p);
+ if (!path)
+ return -ENOMEM;
+
+ r = lstat(path, &st);
+ if (r < 0) {
+ if (errno != ENOENT)
+ return -errno;
+ } else if (S_ISREG(st.st_mode)) {
+ /* Check if there's a vendor version */
+ r = path_is_vendor_or_generator(&paths, path);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ has_vendor = true;
+ }
+
+ dropin = strjoin(path, ".d");
+ if (!dropin)
+ return -ENOMEM;
+
+ r = lstat(dropin, &st);
+ if (r < 0) {
+ if (errno != ENOENT)
+ return -errno;
+ } else if (S_ISDIR(st.st_mode)) {
+ /* Remove the drop-ins */
+ r = path_shall_revert(&paths, dropin);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2))
+ return -ENOMEM;
+
+ todo[n_todo++] = TAKE_PTR(dropin);
+ }
+ }
+ }
+
+ if (!has_vendor)
+ continue;
+
+ /* OK, there's a vendor version, hence drop all configuration versions */
+ STRV_FOREACH(p, paths.search_path) {
+ _cleanup_free_ char *path = NULL;
+ struct stat st;
+
+ path = path_make_absolute(*i, *p);
+ if (!path)
+ return -ENOMEM;
+
+ r = lstat(path, &st);
+ if (r < 0) {
+ if (errno != ENOENT)
+ return -errno;
+ } else if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) {
+ r = path_is_config(&paths, path, true);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2))
+ return -ENOMEM;
+
+ todo[n_todo++] = TAKE_PTR(path);
+ }
+ }
+ }
+ }
+
+ strv_uniq(todo);
+
+ r = 0;
+ STRV_FOREACH(i, todo) {
+ _cleanup_strv_free_ char **fs = NULL;
+ const char *rp;
+ char **j;
+
+ (void) get_files_in_directory(*i, &fs);
+
+ q = rm_rf(*i, REMOVE_ROOT|REMOVE_PHYSICAL);
+ if (q < 0 && q != -ENOENT && r >= 0) {
+ r = q;
+ continue;
+ }
+
+ STRV_FOREACH(j, fs) {
+ _cleanup_free_ char *t = NULL;
+
+ t = path_join(*i, *j);
+ if (!t)
+ return -ENOMEM;
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, t, NULL);
+ }
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, *i, NULL);
+
+ rp = skip_root(&paths, *i);
+ q = mark_symlink_for_removal(&remove_symlinks_to, rp ?: *i);
+ if (q < 0)
+ return q;
+ }
+
+ q = remove_marked_symlinks(remove_symlinks_to, paths.runtime_config, &paths, false, changes, n_changes);
+ if (r >= 0)
+ r = q;
+
+ q = remove_marked_symlinks(remove_symlinks_to, paths.persistent_config, &paths, false, changes, n_changes);
+ if (r >= 0)
+ r = q;
+
+ return r;
+}
+
+int unit_file_add_dependency(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ const char *target,
+ UnitDependency dep,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ UnitFileInstallInfo *i, *target_info;
+ const char *config_path;
+ char **f;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(target);
+
+ if (!IN_SET(dep, UNIT_WANTS, UNIT_REQUIRES))
+ return -EINVAL;
+
+ if (!unit_name_is_valid(target, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ r = install_info_discover_and_check(scope, &c, &paths, target, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &target_info, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ assert(target_info->type == UNIT_FILE_TYPE_REGULAR);
+
+ STRV_FOREACH(f, files) {
+ char ***l;
+
+ r = install_info_discover_and_check(scope, &c, &paths, *f, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ assert(i->type == UNIT_FILE_TYPE_REGULAR);
+
+ /* We didn't actually load anything from the unit
+ * file, but instead just add in our new symlink to
+ * create. */
+
+ if (dep == UNIT_WANTS)
+ l = &i->wanted_by;
+ else
+ l = &i->required_by;
+
+ strv_free(*l);
+ *l = strv_new(target_info->name);
+ if (!*l)
+ return -ENOMEM;
+ }
+
+ return install_context_apply(scope, &c, &paths, config_path, !!(flags & UNIT_FILE_FORCE), SEARCH_FOLLOW_CONFIG_SYMLINKS, changes, n_changes);
+}
+
+int unit_file_enable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ const char *config_path;
+ UnitFileInstallInfo *i;
+ char **f;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = config_path_from_flags(&paths, flags);
+ if (!config_path)
+ return -ENXIO;
+
+ STRV_FOREACH(f, files) {
+ r = install_info_discover_and_check(scope, &c, &paths, *f, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ assert(i->type == UNIT_FILE_TYPE_REGULAR);
+ }
+
+ /* This will return the number of symlink rules that were
+ supposed to be created, not the ones actually created. This
+ is useful to determine whether the passed files had any
+ installation data at all. */
+
+ return install_context_apply(scope, &c, &paths, config_path, !!(flags & UNIT_FILE_FORCE), SEARCH_LOAD, changes, n_changes);
+}
+
+int unit_file_disable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ _cleanup_set_free_free_ Set *remove_symlinks_to = NULL;
+ const char *config_path;
+ char **i;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = config_path_from_flags(&paths, flags);
+ if (!config_path)
+ return -ENXIO;
+
+ STRV_FOREACH(i, files) {
+ if (!unit_name_is_valid(*i, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ r = install_info_add(&c, *i, NULL, false, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ r = install_context_mark_for_removal(scope, &c, &paths, &remove_symlinks_to, config_path, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ return remove_marked_symlinks(remove_symlinks_to, config_path, &paths, !!(flags & UNIT_FILE_DRY_RUN), changes, n_changes);
+}
+
+int unit_file_reenable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ char **n;
+ int r;
+ size_t l, i;
+
+ /* First, we invoke the disable command with only the basename... */
+ l = strv_length(files);
+ n = newa(char*, l+1);
+ for (i = 0; i < l; i++)
+ n[i] = basename(files[i]);
+ n[i] = NULL;
+
+ r = unit_file_disable(scope, flags, root_dir, n, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ /* But the enable command with the full name */
+ return unit_file_enable(scope, flags, root_dir, files, changes, n_changes);
+}
+
+int unit_file_set_default(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ const char *name,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ UnitFileInstallInfo *i;
+ const char *new_path;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(name);
+
+ if (unit_name_to_type(name) != UNIT_TARGET) /* this also validates the name */
+ return -EINVAL;
+ if (streq(name, SPECIAL_DEFAULT_TARGET))
+ return -EINVAL;
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ r = install_info_discover_and_check(scope, &c, &paths, name, 0, &i, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ new_path = strjoina(paths.persistent_config, "/" SPECIAL_DEFAULT_TARGET);
+ return create_symlink(&paths, i->path, new_path, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+}
+
+int unit_file_get_default(
+ UnitFileScope scope,
+ const char *root_dir,
+ char **name) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ UnitFileInstallInfo *i;
+ char *n;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(name);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ r = install_info_discover(scope, &c, &paths, SPECIAL_DEFAULT_TARGET, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, NULL, NULL);
+ if (r < 0)
+ return r;
+ r = install_info_may_process(i, &paths, NULL, 0);
+ if (r < 0)
+ return r;
+
+ n = strdup(i->name);
+ if (!n)
+ return -ENOMEM;
+
+ *name = n;
+ return 0;
+}
+
+int unit_file_lookup_state(
+ UnitFileScope scope,
+ const LookupPaths *paths,
+ const char *name,
+ UnitFileState *ret) {
+
+ _cleanup_(install_context_done) InstallContext c = {};
+ UnitFileInstallInfo *i;
+ UnitFileState state;
+ int r;
+
+ assert(paths);
+ assert(name);
+
+ if (!unit_name_is_valid(name, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ r = install_info_discover(scope, &c, paths, name, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, NULL, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to discover unit %s: %m", name);
+
+ assert(IN_SET(i->type, UNIT_FILE_TYPE_REGULAR, UNIT_FILE_TYPE_MASKED));
+ log_debug("Found unit %s at %s (%s)", name, strna(i->path),
+ i->type == UNIT_FILE_TYPE_REGULAR ? "regular file" : "mask");
+
+ /* Shortcut things, if the caller just wants to know if this unit exists. */
+ if (!ret)
+ return 0;
+
+ switch (i->type) {
+
+ case UNIT_FILE_TYPE_MASKED:
+ r = path_is_runtime(paths, i->path, true);
+ if (r < 0)
+ return r;
+
+ state = r > 0 ? UNIT_FILE_MASKED_RUNTIME : UNIT_FILE_MASKED;
+ break;
+
+ case UNIT_FILE_TYPE_REGULAR:
+ /* Check if the name we were querying is actually an alias */
+ if (!streq(name, basename(i->path)) && !unit_name_is_valid(i->name, UNIT_NAME_INSTANCE)) {
+ state = UNIT_FILE_ALIAS;
+ break;
+ }
+
+ r = path_is_generator(paths, i->path);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ state = UNIT_FILE_GENERATED;
+ break;
+ }
+
+ r = path_is_transient(paths, i->path);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ state = UNIT_FILE_TRANSIENT;
+ break;
+ }
+
+ /* Check if any of the Alias= symlinks have been created.
+ * We ignore other aliases, and only check those that would
+ * be created by systemctl enable for this unit. */
+ r = find_symlinks_in_scope(scope, paths, i, true, &state);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ break;
+
+ /* Check if the file is known under other names. If it is,
+ * it might be in use. Report that as UNIT_FILE_INDIRECT. */
+ r = find_symlinks_in_scope(scope, paths, i, false, &state);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ state = UNIT_FILE_INDIRECT;
+ else {
+ if (unit_file_install_info_has_rules(i))
+ state = UNIT_FILE_DISABLED;
+ else if (unit_file_install_info_has_also(i))
+ state = UNIT_FILE_INDIRECT;
+ else
+ state = UNIT_FILE_STATIC;
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Unexpected unit file type.");
+ }
+
+ *ret = state;
+ return 0;
+}
+
+int unit_file_get_state(
+ UnitFileScope scope,
+ const char *root_dir,
+ const char *name,
+ UnitFileState *ret) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(name);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ return unit_file_lookup_state(scope, &paths, name, ret);
+}
+
+int unit_file_exists(UnitFileScope scope, const LookupPaths *paths, const char *name) {
+ _cleanup_(install_context_done) InstallContext c = {};
+ int r;
+
+ assert(paths);
+ assert(name);
+
+ if (!unit_name_is_valid(name, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ r = install_info_discover(scope, &c, paths, name, 0, NULL, NULL, NULL);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int split_pattern_into_name_and_instances(const char *pattern, char **out_unit_name, char ***out_instances) {
+ _cleanup_strv_free_ char **instances = NULL;
+ _cleanup_free_ char *unit_name = NULL;
+ int r;
+
+ assert(pattern);
+ assert(out_instances);
+ assert(out_unit_name);
+
+ r = extract_first_word(&pattern, &unit_name, NULL, EXTRACT_RETAIN_ESCAPE);
+ if (r < 0)
+ return r;
+
+ /* We handle the instances logic when unit name is extracted */
+ if (pattern) {
+ /* We only create instances when a rule of templated unit
+ * is seen. A rule like enable foo@.service a b c will
+ * result in an array of (a, b, c) as instance names */
+ if (!unit_name_is_valid(unit_name, UNIT_NAME_TEMPLATE))
+ return -EINVAL;
+
+ instances = strv_split(pattern, WHITESPACE);
+ if (!instances)
+ return -ENOMEM;
+
+ *out_instances = TAKE_PTR(instances);
+ }
+
+ *out_unit_name = TAKE_PTR(unit_name);
+
+ return 0;
+}
+
+static int presets_find_config(UnitFileScope scope, const char *root_dir, char ***files) {
+ static const char* const system_dirs[] = {CONF_PATHS("systemd/system-preset"), NULL};
+ static const char* const user_dirs[] = {CONF_PATHS_USR("systemd/user-preset"), NULL};
+ const char* const* dirs;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ if (scope == UNIT_FILE_SYSTEM)
+ dirs = system_dirs;
+ else if (IN_SET(scope, UNIT_FILE_GLOBAL, UNIT_FILE_USER))
+ dirs = user_dirs;
+ else
+ assert_not_reached("Invalid unit file scope");
+
+ return conf_files_list_strv(files, ".preset", root_dir, 0, dirs);
+}
+
+static int read_presets(UnitFileScope scope, const char *root_dir, UnitFilePresets *presets) {
+ _cleanup_(unit_file_presets_freep) UnitFilePresets ps = {};
+ size_t n_allocated = 0;
+ _cleanup_strv_free_ char **files = NULL;
+ char **p;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(presets);
+
+ r = presets_find_config(scope, root_dir, &files);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, files) {
+ _cleanup_fclose_ FILE *f;
+ int n = 0;
+
+ f = fopen(*p, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ UnitFilePresetRule rule = {};
+ const char *parameter;
+ char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ l = strstrip(line);
+ n++;
+
+ if (isempty(l))
+ continue;
+ if (strchr(COMMENTS, *l))
+ continue;
+
+ parameter = first_word(l, "enable");
+ if (parameter) {
+ char *unit_name;
+ char **instances = NULL;
+
+ /* Unit_name will remain the same as parameter when no instances are specified */
+ r = split_pattern_into_name_and_instances(parameter, &unit_name, &instances);
+ if (r < 0) {
+ log_syntax(NULL, LOG_WARNING, *p, n, r, "Couldn't parse line '%s'. Ignoring.", line);
+ continue;
+ }
+
+ rule = (UnitFilePresetRule) {
+ .pattern = unit_name,
+ .action = PRESET_ENABLE,
+ .instances = instances,
+ };
+ }
+
+ parameter = first_word(l, "disable");
+ if (parameter) {
+ char *pattern;
+
+ pattern = strdup(parameter);
+ if (!pattern)
+ return -ENOMEM;
+
+ rule = (UnitFilePresetRule) {
+ .pattern = pattern,
+ .action = PRESET_DISABLE,
+ };
+ }
+
+ if (rule.action) {
+ if (!GREEDY_REALLOC(ps.rules, n_allocated, ps.n_rules + 1))
+ return -ENOMEM;
+
+ ps.rules[ps.n_rules++] = rule;
+ continue;
+ }
+
+ log_syntax(NULL, LOG_WARNING, *p, n, 0, "Couldn't parse line '%s'. Ignoring.", line);
+ }
+ }
+
+ ps.initialized = true;
+ *presets = ps;
+ ps = (UnitFilePresets){};
+
+ return 0;
+}
+
+static int pattern_match_multiple_instances(
+ const UnitFilePresetRule rule,
+ const char *unit_name,
+ char ***ret) {
+
+ _cleanup_free_ char *templated_name = NULL;
+ int r;
+
+ /* If no ret is needed or the rule itself does not have instances
+ * initialized, we return not matching */
+ if (!ret || !rule.instances)
+ return 0;
+
+ r = unit_name_template(unit_name, &templated_name);
+ if (r < 0)
+ return r;
+ if (!streq(rule.pattern, templated_name))
+ return 0;
+
+ /* Compose a list of specified instances when unit name is a template */
+ if (unit_name_is_valid(unit_name, UNIT_NAME_TEMPLATE)) {
+ _cleanup_strv_free_ char **out_strv = NULL;
+
+ char **iter;
+ STRV_FOREACH(iter, rule.instances) {
+ _cleanup_free_ char *name = NULL;
+
+ r = unit_name_replace_instance(unit_name, *iter, &name);
+ if (r < 0)
+ return r;
+
+ r = strv_consume(&out_strv, TAKE_PTR(name));
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(out_strv);
+ return 1;
+ } else {
+ /* We now know the input unit name is an instance name */
+ _cleanup_free_ char *instance_name = NULL;
+
+ r = unit_name_to_instance(unit_name, &instance_name);
+ if (r < 0)
+ return r;
+
+ if (strv_find(rule.instances, instance_name))
+ return 1;
+ }
+ return 0;
+}
+
+static int query_presets(const char *name, const UnitFilePresets *presets, char ***instance_name_list) {
+ PresetAction action = PRESET_UNKNOWN;
+
+ if (!unit_name_is_valid(name, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ for (size_t i = 0; i < presets->n_rules; i++)
+ if (pattern_match_multiple_instances(presets->rules[i], name, instance_name_list) > 0 ||
+ fnmatch(presets->rules[i].pattern, name, FNM_NOESCAPE) == 0) {
+ action = presets->rules[i].action;
+ break;
+ }
+
+ switch (action) {
+ case PRESET_UNKNOWN:
+ log_debug("Preset files don't specify rule for %s. Enabling.", name);
+ return 1;
+ case PRESET_ENABLE:
+ if (instance_name_list && *instance_name_list) {
+ char **s;
+ STRV_FOREACH(s, *instance_name_list)
+ log_debug("Preset files say enable %s.", *s);
+ } else
+ log_debug("Preset files say enable %s.", name);
+ return 1;
+ case PRESET_DISABLE:
+ log_debug("Preset files say disable %s.", name);
+ return 0;
+ default:
+ assert_not_reached("invalid preset action");
+ }
+}
+
+int unit_file_query_preset(UnitFileScope scope, const char *root_dir, const char *name, UnitFilePresets *cached) {
+ _cleanup_(unit_file_presets_freep) UnitFilePresets tmp = {};
+ int r;
+
+ if (!cached)
+ cached = &tmp;
+ if (!cached->initialized) {
+ r = read_presets(scope, root_dir, cached);
+ if (r < 0)
+ return r;
+ }
+
+ return query_presets(name, cached, NULL);
+}
+
+static int execute_preset(
+ UnitFileScope scope,
+ InstallContext *plus,
+ InstallContext *minus,
+ const LookupPaths *paths,
+ const char *config_path,
+ char **files,
+ UnitFilePresetMode mode,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ int r;
+
+ assert(plus);
+ assert(minus);
+ assert(paths);
+ assert(config_path);
+
+ if (mode != UNIT_FILE_PRESET_ENABLE_ONLY) {
+ _cleanup_set_free_free_ Set *remove_symlinks_to = NULL;
+
+ r = install_context_mark_for_removal(scope, minus, paths, &remove_symlinks_to, config_path, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ r = remove_marked_symlinks(remove_symlinks_to, config_path, paths, false, changes, n_changes);
+ } else
+ r = 0;
+
+ if (mode != UNIT_FILE_PRESET_DISABLE_ONLY) {
+ int q;
+
+ /* Returns number of symlinks that where supposed to be installed. */
+ q = install_context_apply(scope, plus, paths, config_path, force, SEARCH_LOAD, changes, n_changes);
+ if (r >= 0) {
+ if (q < 0)
+ r = q;
+ else
+ r += q;
+ }
+ }
+
+ return r;
+}
+
+static int preset_prepare_one(
+ UnitFileScope scope,
+ InstallContext *plus,
+ InstallContext *minus,
+ LookupPaths *paths,
+ const char *name,
+ const UnitFilePresets *presets,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(install_context_done) InstallContext tmp = {};
+ _cleanup_strv_free_ char **instance_name_list = NULL;
+ UnitFileInstallInfo *i;
+ int r;
+
+ if (install_info_find(plus, name) || install_info_find(minus, name))
+ return 0;
+
+ r = install_info_discover(scope, &tmp, paths, name, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+ if (!streq(name, i->name)) {
+ log_debug("Skipping %s because it is an alias for %s.", name, i->name);
+ return 0;
+ }
+
+ r = query_presets(name, presets, &instance_name_list);
+ if (r < 0)
+ return r;
+
+ if (r > 0) {
+ if (instance_name_list) {
+ char **s;
+ STRV_FOREACH(s, instance_name_list) {
+ r = install_info_discover_and_check(scope, plus, paths, *s, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+ }
+ } else {
+ r = install_info_discover_and_check(scope, plus, paths, name, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+ }
+
+ } else
+ r = install_info_discover(scope, minus, paths, name, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+
+ return r;
+}
+
+int unit_file_preset(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFilePresetMode mode,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(install_context_done) InstallContext plus = {}, minus = {};
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(unit_file_presets_freep) UnitFilePresets presets = {};
+ const char *config_path;
+ char **i;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(mode < _UNIT_FILE_PRESET_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ r = read_presets(scope, root_dir, &presets);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, files) {
+ r = preset_prepare_one(scope, &plus, &minus, &paths, *i, &presets, changes, n_changes);
+ if (r < 0)
+ return r;
+ }
+
+ return execute_preset(scope, &plus, &minus, &paths, config_path, files, mode, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+}
+
+int unit_file_preset_all(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ UnitFilePresetMode mode,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(install_context_done) InstallContext plus = {}, minus = {};
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(unit_file_presets_freep) UnitFilePresets presets = {};
+ const char *config_path = NULL;
+ char **i;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(mode < _UNIT_FILE_PRESET_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ r = read_presets(scope, root_dir, &presets);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, paths.search_path) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ d = opendir(*i);
+ if (!d) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+
+ if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY))
+ continue;
+
+ dirent_ensure_type(d, de);
+
+ if (!IN_SET(de->d_type, DT_LNK, DT_REG))
+ continue;
+
+ /* we don't pass changes[] in, because we want to handle errors on our own */
+ r = preset_prepare_one(scope, &plus, &minus, &paths, de->d_name, &presets, NULL, 0);
+ if (r == -ERFKILL)
+ r = unit_file_changes_add(changes, n_changes,
+ UNIT_FILE_IS_MASKED, de->d_name, NULL);
+ else if (r == -ENOLINK)
+ r = unit_file_changes_add(changes, n_changes,
+ UNIT_FILE_IS_DANGLING, de->d_name, NULL);
+ else if (r == -EADDRNOTAVAIL) /* Ignore generated/transient units when applying preset */
+ continue;
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return execute_preset(scope, &plus, &minus, &paths, config_path, NULL, mode, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+}
+
+static void unit_file_list_free_one(UnitFileList *f) {
+ if (!f)
+ return;
+
+ free(f->path);
+ free(f);
+}
+
+Hashmap* unit_file_list_free(Hashmap *h) {
+ return hashmap_free_with_destructor(h, unit_file_list_free_one);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(UnitFileList*, unit_file_list_free_one);
+
+int unit_file_get_list(
+ UnitFileScope scope,
+ const char *root_dir,
+ Hashmap *h,
+ char **states,
+ char **patterns) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ char **dirname;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(h);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(dirname, paths.search_path) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ d = opendir(*dirname);
+ if (!d) {
+ if (errno == ENOENT)
+ continue;
+ if (IN_SET(errno, ENOTDIR, EACCES)) {
+ log_debug_errno(errno, "Failed to open \"%s\": %m", *dirname);
+ continue;
+ }
+
+ return -errno;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ _cleanup_(unit_file_list_free_onep) UnitFileList *f = NULL;
+
+ if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY))
+ continue;
+
+ if (!strv_fnmatch_or_empty(patterns, de->d_name, FNM_NOESCAPE))
+ continue;
+
+ if (hashmap_get(h, de->d_name))
+ continue;
+
+ dirent_ensure_type(d, de);
+
+ if (!IN_SET(de->d_type, DT_LNK, DT_REG))
+ continue;
+
+ f = new0(UnitFileList, 1);
+ if (!f)
+ return -ENOMEM;
+
+ f->path = path_make_absolute(de->d_name, *dirname);
+ if (!f->path)
+ return -ENOMEM;
+
+ r = unit_file_lookup_state(scope, &paths, de->d_name, &f->state);
+ if (r < 0)
+ f->state = UNIT_FILE_BAD;
+
+ if (!strv_isempty(states) &&
+ !strv_contains(states, unit_file_state_to_string(f->state)))
+ continue;
+
+ r = hashmap_put(h, basename(f->path), f);
+ if (r < 0)
+ return r;
+
+ f = NULL; /* prevent cleanup */
+ }
+ }
+
+ return 0;
+}
+
+static const char* const unit_file_state_table[_UNIT_FILE_STATE_MAX] = {
+ [UNIT_FILE_ENABLED] = "enabled",
+ [UNIT_FILE_ENABLED_RUNTIME] = "enabled-runtime",
+ [UNIT_FILE_LINKED] = "linked",
+ [UNIT_FILE_LINKED_RUNTIME] = "linked-runtime",
+ [UNIT_FILE_ALIAS] = "alias",
+ [UNIT_FILE_MASKED] = "masked",
+ [UNIT_FILE_MASKED_RUNTIME] = "masked-runtime",
+ [UNIT_FILE_STATIC] = "static",
+ [UNIT_FILE_DISABLED] = "disabled",
+ [UNIT_FILE_INDIRECT] = "indirect",
+ [UNIT_FILE_GENERATED] = "generated",
+ [UNIT_FILE_TRANSIENT] = "transient",
+ [UNIT_FILE_BAD] = "bad",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_file_state, UnitFileState);
+
+static const char* const unit_file_change_type_table[_UNIT_FILE_CHANGE_TYPE_MAX] = {
+ [UNIT_FILE_SYMLINK] = "symlink",
+ [UNIT_FILE_UNLINK] = "unlink",
+ [UNIT_FILE_IS_MASKED] = "masked",
+ [UNIT_FILE_IS_DANGLING] = "dangling",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_file_change_type, UnitFileChangeType);
+
+static const char* const unit_file_preset_mode_table[_UNIT_FILE_PRESET_MAX] = {
+ [UNIT_FILE_PRESET_FULL] = "full",
+ [UNIT_FILE_PRESET_ENABLE_ONLY] = "enable-only",
+ [UNIT_FILE_PRESET_DISABLE_ONLY] = "disable-only",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_file_preset_mode, UnitFilePresetMode);
diff --git a/src/shared/install.h b/src/shared/install.h
new file mode 100644
index 0000000..84bf1f5
--- /dev/null
+++ b/src/shared/install.h
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef enum UnitFilePresetMode UnitFilePresetMode;
+typedef enum UnitFileChangeType UnitFileChangeType;
+typedef enum UnitFileFlags UnitFileFlags;
+typedef enum UnitFileType UnitFileType;
+typedef struct UnitFileChange UnitFileChange;
+typedef struct UnitFileList UnitFileList;
+typedef struct UnitFileInstallInfo UnitFileInstallInfo;
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "macro.h"
+#include "path-lookup.h"
+#include "strv.h"
+#include "unit-name.h"
+
+enum UnitFilePresetMode {
+ UNIT_FILE_PRESET_FULL,
+ UNIT_FILE_PRESET_ENABLE_ONLY,
+ UNIT_FILE_PRESET_DISABLE_ONLY,
+ _UNIT_FILE_PRESET_MAX,
+ _UNIT_FILE_PRESET_INVALID = -1
+};
+
+enum UnitFileChangeType {
+ UNIT_FILE_SYMLINK,
+ UNIT_FILE_UNLINK,
+ UNIT_FILE_IS_MASKED,
+ UNIT_FILE_IS_DANGLING,
+ _UNIT_FILE_CHANGE_TYPE_MAX,
+ _UNIT_FILE_CHANGE_TYPE_INVALID = INT_MIN
+};
+
+enum UnitFileFlags {
+ UNIT_FILE_RUNTIME = 1 << 0, /* Public API via DBUS, do not change */
+ UNIT_FILE_FORCE = 1 << 1, /* Public API via DBUS, do not change */
+ UNIT_FILE_PORTABLE = 1 << 2, /* Public API via DBUS, do not change */
+ UNIT_FILE_DRY_RUN = 1 << 3,
+ _UNIT_FILE_FLAGS_MASK_PUBLIC = UNIT_FILE_RUNTIME|UNIT_FILE_PORTABLE|UNIT_FILE_FORCE,
+};
+
+/* type can either one of the UnitFileChangeTypes listed above, or a negative error.
+ * If source is specified, it should be the contents of the path symlink.
+ * In case of an error, source should be the existing symlink contents or NULL
+ */
+struct UnitFileChange {
+ int type; /* UnitFileChangeType or bust */
+ char *path;
+ char *source;
+};
+
+static inline bool unit_file_changes_have_modification(const UnitFileChange* changes, size_t n_changes) {
+ size_t i;
+ for (i = 0; i < n_changes; i++)
+ if (IN_SET(changes[i].type, UNIT_FILE_SYMLINK, UNIT_FILE_UNLINK))
+ return true;
+ return false;
+}
+
+struct UnitFileList {
+ char *path;
+ UnitFileState state;
+};
+
+enum UnitFileType {
+ UNIT_FILE_TYPE_REGULAR,
+ UNIT_FILE_TYPE_SYMLINK,
+ UNIT_FILE_TYPE_MASKED,
+ _UNIT_FILE_TYPE_MAX,
+ _UNIT_FILE_TYPE_INVALID = -1,
+};
+
+struct UnitFileInstallInfo {
+ char *name;
+ char *path;
+
+ char **aliases;
+ char **wanted_by;
+ char **required_by;
+ char **also;
+
+ char *default_instance;
+ char *symlink_target;
+
+ UnitFileType type;
+ bool auxiliary;
+};
+
+int unit_file_enable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_disable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_reenable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_preset(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFilePresetMode mode,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_preset_all(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ UnitFilePresetMode mode,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_mask(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_unmask(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_link(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_revert(
+ UnitFileScope scope,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_set_default(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ const char *file,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_get_default(
+ UnitFileScope scope,
+ const char *root_dir,
+ char **name);
+int unit_file_add_dependency(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ const char *target,
+ UnitDependency dep,
+ UnitFileChange **changes,
+ size_t *n_changes);
+
+int unit_file_lookup_state(
+ UnitFileScope scope,
+ const LookupPaths *paths,
+ const char *name,
+ UnitFileState *ret);
+
+int unit_file_get_state(UnitFileScope scope, const char *root_dir, const char *filename, UnitFileState *ret);
+int unit_file_exists(UnitFileScope scope, const LookupPaths *paths, const char *name);
+
+int unit_file_get_list(UnitFileScope scope, const char *root_dir, Hashmap *h, char **states, char **patterns);
+Hashmap* unit_file_list_free(Hashmap *h);
+
+int unit_file_changes_add(UnitFileChange **changes, size_t *n_changes, int type, const char *path, const char *source);
+void unit_file_changes_free(UnitFileChange *changes, size_t n_changes);
+void unit_file_dump_changes(int r, const char *verb, const UnitFileChange *changes, size_t n_changes, bool quiet);
+
+int unit_file_verify_alias(const UnitFileInstallInfo *i, const char *dst, char **ret_dst);
+
+typedef struct UnitFilePresetRule UnitFilePresetRule;
+
+typedef struct {
+ UnitFilePresetRule *rules;
+ size_t n_rules;
+ bool initialized;
+} UnitFilePresets;
+
+void unit_file_presets_freep(UnitFilePresets *p);
+int unit_file_query_preset(UnitFileScope scope, const char *root_dir, const char *name, UnitFilePresets *cached);
+
+const char *unit_file_state_to_string(UnitFileState s) _const_;
+UnitFileState unit_file_state_from_string(const char *s) _pure_;
+/* from_string conversion is unreliable because of the overlap between -EPERM and -1 for error. */
+
+const char *unit_file_change_type_to_string(UnitFileChangeType s) _const_;
+UnitFileChangeType unit_file_change_type_from_string(const char *s) _pure_;
+
+const char *unit_file_preset_mode_to_string(UnitFilePresetMode m) _const_;
+UnitFilePresetMode unit_file_preset_mode_from_string(const char *s) _pure_;
diff --git a/src/shared/ip-protocol-list.c b/src/shared/ip-protocol-list.c
new file mode 100644
index 0000000..0623d5e
--- /dev/null
+++ b/src/shared/ip-protocol-list.c
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <netinet/in.h>
+
+#include "alloc-util.h"
+#include "ip-protocol-list.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+static const struct ip_protocol_name* lookup_ip_protocol(register const char *str, register GPERF_LEN_TYPE len);
+
+#include "ip-protocol-from-name.h"
+#include "ip-protocol-to-name.h"
+
+const char *ip_protocol_to_name(int id) {
+
+ if (id < 0)
+ return NULL;
+
+ if ((size_t) id >= ELEMENTSOF(ip_protocol_names))
+ return NULL;
+
+ return ip_protocol_names[id];
+}
+
+int ip_protocol_from_name(const char *name) {
+ const struct ip_protocol_name *sc;
+
+ assert(name);
+
+ sc = lookup_ip_protocol(name, strlen(name));
+ if (!sc)
+ return -EINVAL;
+
+ return sc->id;
+}
+
+int parse_ip_protocol(const char *s) {
+ _cleanup_free_ char *str = NULL;
+ int i, r;
+
+ assert(s);
+
+ if (isempty(s))
+ return IPPROTO_IP;
+
+ /* Do not use strdupa() here, as the input string may come from *
+ * command line or config files. */
+ str = strdup(s);
+ if (!str)
+ return -ENOMEM;
+
+ i = ip_protocol_from_name(ascii_strlower(str));
+ if (i >= 0)
+ return i;
+
+ r = safe_atoi(str, &i);
+ if (r < 0)
+ return r;
+
+ if (!ip_protocol_to_name(i))
+ return -EINVAL;
+
+ return i;
+}
diff --git a/src/shared/ip-protocol-list.h b/src/shared/ip-protocol-list.h
new file mode 100644
index 0000000..abe3f5f
--- /dev/null
+++ b/src/shared/ip-protocol-list.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+const char *ip_protocol_to_name(int id);
+int ip_protocol_from_name(const char *name);
+int parse_ip_protocol(const char *s);
diff --git a/src/shared/ip-protocol-to-name.awk b/src/shared/ip-protocol-to-name.awk
new file mode 100644
index 0000000..824f811
--- /dev/null
+++ b/src/shared/ip-protocol-to-name.awk
@@ -0,0 +1,9 @@
+BEGIN{
+ print "static const char* const ip_protocol_names[] = { "
+}
+!/HOPOPTS/ {
+ printf " [IPPROTO_%s] = \"%s\",\n", $1, tolower($1)
+}
+END{
+ print "};"
+}
diff --git a/src/shared/ipvlan-util.c b/src/shared/ipvlan-util.c
new file mode 100644
index 0000000..1f2e2ff
--- /dev/null
+++ b/src/shared/ipvlan-util.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+
+#include "ipvlan-util.h"
+#include "string-table.h"
+
+static const char* const ipvlan_mode_table[_NETDEV_IPVLAN_MODE_MAX] = {
+ [NETDEV_IPVLAN_MODE_L2] = "L2",
+ [NETDEV_IPVLAN_MODE_L3] = "L3",
+ [NETDEV_IPVLAN_MODE_L3S] = "L3S",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(ipvlan_mode, IPVlanMode);
+
+static const char* const ipvlan_flags_table[_NETDEV_IPVLAN_FLAGS_MAX] = {
+ [NETDEV_IPVLAN_FLAGS_BRIGDE] = "bridge",
+ [NETDEV_IPVLAN_FLAGS_PRIVATE] = "private",
+ [NETDEV_IPVLAN_FLAGS_VEPA] = "vepa",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(ipvlan_flags, IPVlanFlags);
diff --git a/src/shared/ipvlan-util.h b/src/shared/ipvlan-util.h
new file mode 100644
index 0000000..90f755b
--- /dev/null
+++ b/src/shared/ipvlan-util.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <netinet/in.h>
+#include <linux/if_link.h>
+
+#include "macro.h"
+
+typedef enum IPVlanMode {
+ NETDEV_IPVLAN_MODE_L2 = IPVLAN_MODE_L2,
+ NETDEV_IPVLAN_MODE_L3 = IPVLAN_MODE_L3,
+ NETDEV_IPVLAN_MODE_L3S = IPVLAN_MODE_L3S,
+ _NETDEV_IPVLAN_MODE_MAX,
+ _NETDEV_IPVLAN_MODE_INVALID = -1
+} IPVlanMode;
+
+typedef enum IPVlanFlags {
+ NETDEV_IPVLAN_FLAGS_BRIGDE,
+ NETDEV_IPVLAN_FLAGS_PRIVATE = IPVLAN_F_PRIVATE,
+ NETDEV_IPVLAN_FLAGS_VEPA = IPVLAN_F_VEPA,
+ _NETDEV_IPVLAN_FLAGS_MAX,
+ _NETDEV_IPVLAN_FLAGS_INVALID = -1
+} IPVlanFlags;
+
+const char *ipvlan_mode_to_string(IPVlanMode d) _const_;
+IPVlanMode ipvlan_mode_from_string(const char *d) _pure_;
+
+const char *ipvlan_flags_to_string(IPVlanFlags d) _const_;
+IPVlanFlags ipvlan_flags_from_string(const char *d) _pure_;
diff --git a/src/shared/journal-importer.c b/src/shared/journal-importer.c
new file mode 100644
index 0000000..b2785f0
--- /dev/null
+++ b/src/shared/journal-importer.c
@@ -0,0 +1,483 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "journal-file.h"
+#include "journal-importer.h"
+#include "journal-util.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "unaligned.h"
+
+enum {
+ IMPORTER_STATE_LINE = 0, /* waiting to read, or reading line */
+ IMPORTER_STATE_DATA_START, /* reading binary data header */
+ IMPORTER_STATE_DATA, /* reading binary data */
+ IMPORTER_STATE_DATA_FINISH, /* expecting newline */
+ IMPORTER_STATE_EOF, /* done */
+};
+
+void journal_importer_cleanup(JournalImporter *imp) {
+ if (imp->fd >= 0 && !imp->passive_fd) {
+ log_debug("Closing %s (fd=%d)", imp->name ?: "importer", imp->fd);
+ safe_close(imp->fd);
+ }
+
+ free(imp->name);
+ free(imp->buf);
+ iovw_free_contents(&imp->iovw, false);
+}
+
+static char* realloc_buffer(JournalImporter *imp, size_t size) {
+ char *b, *old = imp->buf;
+
+ b = GREEDY_REALLOC(imp->buf, imp->size, size);
+ if (!b)
+ return NULL;
+
+ iovw_rebase(&imp->iovw, old, imp->buf);
+
+ return b;
+}
+
+static int get_line(JournalImporter *imp, char **line, size_t *size) {
+ ssize_t n;
+ char *c = NULL;
+
+ assert(imp);
+ assert(imp->state == IMPORTER_STATE_LINE);
+ assert(imp->offset <= imp->filled);
+ assert(imp->filled <= imp->size);
+ assert(!imp->buf || imp->size > 0);
+ assert(imp->fd >= 0);
+
+ for (;;) {
+ if (imp->buf) {
+ size_t start = MAX(imp->scanned, imp->offset);
+
+ c = memchr(imp->buf + start, '\n',
+ imp->filled - start);
+ if (c)
+ break;
+ }
+
+ imp->scanned = imp->filled;
+ if (imp->scanned >= DATA_SIZE_MAX)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOBUFS),
+ "Entry is bigger than %u bytes.",
+ DATA_SIZE_MAX);
+
+ if (imp->passive_fd)
+ /* we have to wait for some data to come to us */
+ return -EAGAIN;
+
+ /* We know that imp->filled is at most DATA_SIZE_MAX, so if
+ we reallocate it, we'll increase the size at least a bit. */
+ assert_cc(DATA_SIZE_MAX < ENTRY_SIZE_MAX);
+ if (imp->size - imp->filled < LINE_CHUNK &&
+ !realloc_buffer(imp, MIN(imp->filled + LINE_CHUNK, ENTRY_SIZE_MAX)))
+ return log_oom();
+
+ assert(imp->buf);
+ assert(imp->size - imp->filled >= LINE_CHUNK ||
+ imp->size == ENTRY_SIZE_MAX);
+
+ n = read(imp->fd,
+ imp->buf + imp->filled,
+ imp->size - imp->filled);
+ if (n < 0) {
+ if (errno != EAGAIN)
+ log_error_errno(errno, "read(%d, ..., %zu): %m",
+ imp->fd,
+ imp->size - imp->filled);
+ return -errno;
+ } else if (n == 0)
+ return 0;
+
+ imp->filled += n;
+ }
+
+ *line = imp->buf + imp->offset;
+ *size = c + 1 - imp->buf - imp->offset;
+ imp->offset += *size;
+
+ return 1;
+}
+
+static int fill_fixed_size(JournalImporter *imp, void **data, size_t size) {
+
+ assert(imp);
+ assert(IN_SET(imp->state, IMPORTER_STATE_DATA_START, IMPORTER_STATE_DATA, IMPORTER_STATE_DATA_FINISH));
+ assert(size <= DATA_SIZE_MAX);
+ assert(imp->offset <= imp->filled);
+ assert(imp->filled <= imp->size);
+ assert(imp->buf || imp->size == 0);
+ assert(!imp->buf || imp->size > 0);
+ assert(imp->fd >= 0);
+ assert(data);
+
+ while (imp->filled - imp->offset < size) {
+ int n;
+
+ if (imp->passive_fd)
+ /* we have to wait for some data to come to us */
+ return -EAGAIN;
+
+ if (!realloc_buffer(imp, imp->offset + size))
+ return log_oom();
+
+ n = read(imp->fd, imp->buf + imp->filled,
+ imp->size - imp->filled);
+ if (n < 0) {
+ if (errno != EAGAIN)
+ log_error_errno(errno, "read(%d, ..., %zu): %m", imp->fd,
+ imp->size - imp->filled);
+ return -errno;
+ } else if (n == 0)
+ return 0;
+
+ imp->filled += n;
+ }
+
+ *data = imp->buf + imp->offset;
+ imp->offset += size;
+
+ return 1;
+}
+
+static int get_data_size(JournalImporter *imp) {
+ int r;
+ void *data;
+
+ assert(imp);
+ assert(imp->state == IMPORTER_STATE_DATA_START);
+ assert(imp->data_size == 0);
+
+ r = fill_fixed_size(imp, &data, sizeof(uint64_t));
+ if (r <= 0)
+ return r;
+
+ imp->data_size = unaligned_read_le64(data);
+ if (imp->data_size > DATA_SIZE_MAX)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Stream declares field with size %zu > DATA_SIZE_MAX = %u",
+ imp->data_size, DATA_SIZE_MAX);
+ if (imp->data_size == 0)
+ log_warning("Binary field with zero length");
+
+ return 1;
+}
+
+static int get_data_data(JournalImporter *imp, void **data) {
+ int r;
+
+ assert(imp);
+ assert(data);
+ assert(imp->state == IMPORTER_STATE_DATA);
+
+ r = fill_fixed_size(imp, data, imp->data_size);
+ if (r <= 0)
+ return r;
+
+ return 1;
+}
+
+static int get_data_newline(JournalImporter *imp) {
+ int r;
+ char *data;
+
+ assert(imp);
+ assert(imp->state == IMPORTER_STATE_DATA_FINISH);
+
+ r = fill_fixed_size(imp, (void**) &data, 1);
+ if (r <= 0)
+ return r;
+
+ assert(data);
+ if (*data != '\n') {
+ char buf[4];
+ int l;
+
+ l = cescape_char(*data, buf);
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Expected newline, got '%.*s'", l, buf);
+ }
+
+ return 1;
+}
+
+static int process_special_field(JournalImporter *imp, char *line) {
+ const char *value;
+ char buf[CELLESCAPE_DEFAULT_LENGTH];
+ int r;
+
+ assert(line);
+
+ value = startswith(line, "__CURSOR=");
+ if (value)
+ /* ignore __CURSOR */
+ return 1;
+
+ value = startswith(line, "__REALTIME_TIMESTAMP=");
+ if (value) {
+ uint64_t x;
+
+ r = safe_atou64(value, &x);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse __REALTIME_TIMESTAMP '%s': %m",
+ cellescape(buf, sizeof buf, value));
+ else if (!VALID_REALTIME(x)) {
+ log_warning("__REALTIME_TIMESTAMP out of range, ignoring: %"PRIu64, x);
+ return -ERANGE;
+ }
+
+ imp->ts.realtime = x;
+ return 1;
+ }
+
+ value = startswith(line, "__MONOTONIC_TIMESTAMP=");
+ if (value) {
+ uint64_t x;
+
+ r = safe_atou64(value, &x);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse __MONOTONIC_TIMESTAMP '%s': %m",
+ cellescape(buf, sizeof buf, value));
+ else if (!VALID_MONOTONIC(x)) {
+ log_warning("__MONOTONIC_TIMESTAMP out of range, ignoring: %"PRIu64, x);
+ return -ERANGE;
+ }
+
+ imp->ts.monotonic = x;
+ return 1;
+ }
+
+ /* Just a single underline, but it needs special treatment too. */
+ value = startswith(line, "_BOOT_ID=");
+ if (value) {
+ r = sd_id128_from_string(value, &imp->boot_id);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse _BOOT_ID '%s': %m",
+ cellescape(buf, sizeof buf, value));
+
+ /* store the field in the usual fashion too */
+ return 0;
+ }
+
+ value = startswith(line, "__");
+ if (value) {
+ log_notice("Unknown dunder line __%s, ignoring.", cellescape(buf, sizeof buf, value));
+ return 1;
+ }
+
+ /* no dunder */
+ return 0;
+}
+
+int journal_importer_process_data(JournalImporter *imp) {
+ int r;
+
+ switch(imp->state) {
+ case IMPORTER_STATE_LINE: {
+ char *line, *sep;
+ size_t n = 0;
+
+ assert(imp->data_size == 0);
+
+ r = get_line(imp, &line, &n);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ imp->state = IMPORTER_STATE_EOF;
+ return 0;
+ }
+ assert(n > 0);
+ assert(line[n-1] == '\n');
+
+ if (n == 1) {
+ log_trace("Received empty line, event is ready");
+ return 1;
+ }
+
+ /* MESSAGE=xxx\n
+ or
+ COREDUMP\n
+ LLLLLLLL0011223344...\n
+ */
+ sep = memchr(line, '=', n);
+ if (sep) {
+ /* chomp newline */
+ n--;
+
+ if (!journal_field_valid(line, sep - line, true)) {
+ char buf[64], *t;
+
+ t = strndupa(line, sep - line);
+ log_debug("Ignoring invalid field: \"%s\"",
+ cellescape(buf, sizeof buf, t));
+
+ return 0;
+ }
+
+ line[n] = '\0';
+ r = process_special_field(imp, line);
+ if (r != 0)
+ return r < 0 ? r : 0;
+
+ r = iovw_put(&imp->iovw, line, n);
+ if (r < 0)
+ return r;
+ } else {
+ if (!journal_field_valid(line, n - 1, true)) {
+ char buf[64], *t;
+
+ t = strndupa(line, n - 1);
+ log_debug("Ignoring invalid field: \"%s\"",
+ cellescape(buf, sizeof buf, t));
+
+ return 0;
+ }
+
+ /* replace \n with = */
+ line[n-1] = '=';
+
+ imp->field_len = n;
+ imp->state = IMPORTER_STATE_DATA_START;
+
+ /* we cannot put the field in iovec until we have all data */
+ }
+
+ log_trace("Received: %.*s (%s)", (int) n, line, sep ? "text" : "binary");
+
+ return 0; /* continue */
+ }
+
+ case IMPORTER_STATE_DATA_START:
+ assert(imp->data_size == 0);
+
+ r = get_data_size(imp);
+ // log_debug("get_data_size() -> %d", r);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ imp->state = IMPORTER_STATE_EOF;
+ return 0;
+ }
+
+ imp->state = imp->data_size > 0 ?
+ IMPORTER_STATE_DATA : IMPORTER_STATE_DATA_FINISH;
+
+ return 0; /* continue */
+
+ case IMPORTER_STATE_DATA: {
+ void *data;
+ char *field;
+
+ assert(imp->data_size > 0);
+
+ r = get_data_data(imp, &data);
+ // log_debug("get_data_data() -> %d", r);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ imp->state = IMPORTER_STATE_EOF;
+ return 0;
+ }
+
+ assert(data);
+
+ field = (char*) data - sizeof(uint64_t) - imp->field_len;
+ memmove(field + sizeof(uint64_t), field, imp->field_len);
+
+ r = iovw_put(&imp->iovw, field + sizeof(uint64_t), imp->field_len + imp->data_size);
+ if (r < 0)
+ return r;
+
+ imp->state = IMPORTER_STATE_DATA_FINISH;
+
+ return 0; /* continue */
+ }
+
+ case IMPORTER_STATE_DATA_FINISH:
+ r = get_data_newline(imp);
+ // log_debug("get_data_newline() -> %d", r);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ imp->state = IMPORTER_STATE_EOF;
+ return 0;
+ }
+
+ imp->data_size = 0;
+ imp->state = IMPORTER_STATE_LINE;
+
+ return 0; /* continue */
+ default:
+ assert_not_reached("wtf?");
+ }
+}
+
+int journal_importer_push_data(JournalImporter *imp, const char *data, size_t size) {
+ assert(imp);
+ assert(imp->state != IMPORTER_STATE_EOF);
+
+ if (!realloc_buffer(imp, imp->filled + size))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOMEM),
+ "Failed to store received data of size %zu "
+ "(in addition to existing %zu bytes with %zu filled): %s",
+ size, imp->size, imp->filled,
+ strerror_safe(ENOMEM));
+
+ memcpy(imp->buf + imp->filled, data, size);
+ imp->filled += size;
+
+ return 0;
+}
+
+void journal_importer_drop_iovw(JournalImporter *imp) {
+ size_t remain, target;
+
+ /* This function drops processed data that along with the iovw that points at it */
+
+ iovw_free_contents(&imp->iovw, false);
+
+ /* possibly reset buffer position */
+ remain = imp->filled - imp->offset;
+
+ if (remain == 0) /* no brainer */
+ imp->offset = imp->scanned = imp->filled = 0;
+ else if (imp->offset > imp->size - imp->filled &&
+ imp->offset > remain) {
+ memcpy(imp->buf, imp->buf + imp->offset, remain);
+ imp->offset = imp->scanned = 0;
+ imp->filled = remain;
+ }
+
+ target = imp->size;
+ while (target > 16 * LINE_CHUNK && imp->filled < target / 2)
+ target /= 2;
+ if (target < imp->size) {
+ char *tmp;
+
+ tmp = realloc(imp->buf, target);
+ if (!tmp)
+ log_warning("Failed to reallocate buffer to (smaller) size %zu",
+ target);
+ else {
+ log_debug("Reallocated buffer from %zu to %zu bytes",
+ imp->size, target);
+ imp->buf = tmp;
+ imp->size = target;
+ }
+ }
+}
+
+bool journal_importer_eof(const JournalImporter *imp) {
+ return imp->state == IMPORTER_STATE_EOF;
+}
diff --git a/src/shared/journal-importer.h b/src/shared/journal-importer.h
new file mode 100644
index 0000000..e0073fc
--- /dev/null
+++ b/src/shared/journal-importer.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/uio.h>
+
+#include "sd-id128.h"
+
+#include "io-util.h"
+#include "time-util.h"
+
+/* Make sure not to make this smaller than the maximum coredump size.
+ * See JOURNAL_SIZE_MAX in coredump.c */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+#define ENTRY_SIZE_MAX (1024*1024*770u)
+#define DATA_SIZE_MAX (1024*1024*768u)
+#else
+#define ENTRY_SIZE_MAX (1024*1024*13u)
+#define DATA_SIZE_MAX (1024*1024*11u)
+#endif
+#define LINE_CHUNK 8*1024u
+
+/* The maximum number of fields in an entry */
+#define ENTRY_FIELD_COUNT_MAX 1024
+
+typedef struct JournalImporter {
+ int fd;
+ bool passive_fd;
+ char *name;
+
+ char *buf;
+ size_t size; /* total size of the buffer */
+ size_t offset; /* offset to the beginning of live data in the buffer */
+ size_t scanned; /* number of bytes since the beginning of data without a newline */
+ size_t filled; /* total number of bytes in the buffer */
+
+ size_t field_len; /* used for binary fields: the field name length */
+ size_t data_size; /* and the size of the binary data chunk being processed */
+
+ struct iovec_wrapper iovw;
+
+ int state;
+ dual_timestamp ts;
+ sd_id128_t boot_id;
+} JournalImporter;
+
+#define JOURNAL_IMPORTER_INIT(_fd) { .fd = (_fd), .iovw = {} }
+#define JOURNAL_IMPORTER_MAKE(_fd) (JournalImporter) JOURNAL_IMPORTER_INIT(_fd)
+
+void journal_importer_cleanup(JournalImporter *);
+int journal_importer_process_data(JournalImporter *);
+int journal_importer_push_data(JournalImporter *, const char *data, size_t size);
+void journal_importer_drop_iovw(JournalImporter *);
+bool journal_importer_eof(const JournalImporter *);
+
+static inline size_t journal_importer_bytes_remaining(const JournalImporter *imp) {
+ return imp->filled;
+}
diff --git a/src/shared/journal-util.c b/src/shared/journal-util.c
new file mode 100644
index 0000000..9e1870e
--- /dev/null
+++ b/src/shared/journal-util.c
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "acl-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "journal-internal.h"
+#include "journal-util.h"
+#include "log.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int access_check_var_log_journal(sd_journal *j, bool want_other_users) {
+#if HAVE_ACL
+ _cleanup_strv_free_ char **g = NULL;
+ const char* dir;
+#endif
+ int r;
+
+ assert(j);
+
+ /* If we are root, we should have access, don't warn. */
+ if (getuid() == 0)
+ return 0;
+
+ /* If we are in the 'systemd-journal' group, we should have
+ * access too. */
+ r = in_group("systemd-journal");
+ if (r < 0)
+ return log_error_errno(r, "Failed to check if we are in the 'systemd-journal' group: %m");
+ if (r > 0)
+ return 0;
+
+#if HAVE_ACL
+ if (laccess("/run/log/journal", F_OK) >= 0)
+ dir = "/run/log/journal";
+ else
+ dir = "/var/log/journal";
+
+ /* If we are in any of the groups listed in the journal ACLs,
+ * then all is good, too. Let's enumerate all groups from the
+ * default ACL of the directory, which generally should allow
+ * access to most journal files too. */
+ r = acl_search_groups(dir, &g);
+ if (r < 0)
+ return log_error_errno(r, "Failed to search journal ACL: %m");
+ if (r > 0)
+ return 0;
+
+ /* Print a pretty list, if there were ACLs set. */
+ if (!strv_isempty(g)) {
+ _cleanup_free_ char *s = NULL;
+
+ /* There are groups in the ACL, let's list them */
+ r = strv_extend(&g, "systemd-journal");
+ if (r < 0)
+ return log_oom();
+
+ strv_sort(g);
+ strv_uniq(g);
+
+ s = strv_join(g, "', '");
+ if (!s)
+ return log_oom();
+
+ log_notice("Hint: You are currently not seeing messages from %s.\n"
+ " Users in groups '%s' can see all messages.\n"
+ " Pass -q to turn off this notice.",
+ want_other_users ? "other users and the system" : "the system",
+ s);
+ return 1;
+ }
+#endif
+
+ /* If no ACLs were found, print a short version of the message. */
+ log_notice("Hint: You are currently not seeing messages from %s.\n"
+ " Users in the 'systemd-journal' group can see all messages. Pass -q to\n"
+ " turn off this notice.",
+ want_other_users ? "other users and the system" : "the system");
+
+ return 1;
+}
+
+int journal_access_blocked(sd_journal *j) {
+ return hashmap_contains(j->errors, INT_TO_PTR(-EACCES));
+}
+
+int journal_access_check_and_warn(sd_journal *j, bool quiet, bool want_other_users) {
+ void *code;
+ char *path;
+ int r = 0;
+
+ assert(j);
+
+ if (hashmap_isempty(j->errors)) {
+ if (ordered_hashmap_isempty(j->files) && !quiet)
+ log_notice("No journal files were found.");
+
+ return 0;
+ }
+
+ if (journal_access_blocked(j)) {
+ if (!quiet)
+ (void) access_check_var_log_journal(j, want_other_users);
+
+ if (ordered_hashmap_isempty(j->files))
+ r = log_error_errno(EACCES, "No journal files were opened due to insufficient permissions.");
+ }
+
+ HASHMAP_FOREACH_KEY(path, code, j->errors) {
+ int err;
+
+ err = abs(PTR_TO_INT(code));
+
+ switch (err) {
+ case EACCES:
+ continue;
+
+ case ENODATA:
+ log_warning_errno(err, "Journal file %s is truncated, ignoring file.", path);
+ break;
+
+ case EPROTONOSUPPORT:
+ log_warning_errno(err, "Journal file %1$s uses an unsupported feature, ignoring file.\n"
+ "Use SYSTEMD_LOG_LEVEL=debug journalctl --file=%1$s to see the details.",
+ path);
+ break;
+
+ case EBADMSG:
+ log_warning_errno(err, "Journal file %s corrupted, ignoring file.", path);
+ break;
+
+ default:
+ log_warning_errno(err, "An error was encountered while opening journal file or directory %s, ignoring file: %m", path);
+ break;
+ }
+ }
+
+ return r;
+}
diff --git a/src/shared/journal-util.h b/src/shared/journal-util.h
new file mode 100644
index 0000000..86fcba0
--- /dev/null
+++ b/src/shared/journal-util.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-journal.h"
+
+int journal_access_blocked(sd_journal *j);
+int journal_access_check_and_warn(sd_journal *j, bool quiet, bool want_other_users);
diff --git a/src/shared/json-internal.h b/src/shared/json-internal.h
new file mode 100644
index 0000000..63afd22
--- /dev/null
+++ b/src/shared/json-internal.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include "json.h"
+
+/* This header should include all prototypes only the JSON parser itself and
+ * its tests need access to. Normal code consuming the JSON parser should not
+ * interface with this. */
+
+typedef union JsonValue {
+ /* Encodes a simple value. On x86-64 this structure is 16 bytes wide (as long double is 128bit). */
+ bool boolean;
+ long double real;
+ intmax_t integer;
+ uintmax_t unsig;
+} JsonValue;
+
+/* Let's protect us against accidental structure size changes on our most relevant arch */
+#ifdef __x86_64__
+assert_cc(sizeof(JsonValue) == 16U);
+#endif
+
+#define JSON_VALUE_NULL ((JsonValue) {})
+
+/* We use fake JsonVariant objects for some special values, in order to avoid memory allocations for them. Note that
+ * effectively this means that there are multiple ways to encode the same objects: via these magic values or as
+ * properly allocated JsonVariant. We convert between both on-the-fly as necessary. */
+enum
+{
+ _JSON_VARIANT_MAGIC_TRUE = 1,
+#define JSON_VARIANT_MAGIC_TRUE ((JsonVariant*) _JSON_VARIANT_MAGIC_TRUE)
+ _JSON_VARIANT_MAGIC_FALSE,
+#define JSON_VARIANT_MAGIC_FALSE ((JsonVariant*) _JSON_VARIANT_MAGIC_FALSE)
+ _JSON_VARIANT_MAGIC_NULL,
+#define JSON_VARIANT_MAGIC_NULL ((JsonVariant*) _JSON_VARIANT_MAGIC_NULL)
+ _JSON_VARIANT_MAGIC_ZERO_INTEGER,
+#define JSON_VARIANT_MAGIC_ZERO_INTEGER ((JsonVariant*) _JSON_VARIANT_MAGIC_ZERO_INTEGER)
+ _JSON_VARIANT_MAGIC_ZERO_UNSIGNED,
+#define JSON_VARIANT_MAGIC_ZERO_UNSIGNED ((JsonVariant*) _JSON_VARIANT_MAGIC_ZERO_UNSIGNED)
+ _JSON_VARIANT_MAGIC_ZERO_REAL,
+#define JSON_VARIANT_MAGIC_ZERO_REAL ((JsonVariant*) _JSON_VARIANT_MAGIC_ZERO_REAL)
+ _JSON_VARIANT_MAGIC_EMPTY_STRING,
+#define JSON_VARIANT_MAGIC_EMPTY_STRING ((JsonVariant*) _JSON_VARIANT_MAGIC_EMPTY_STRING)
+ _JSON_VARIANT_MAGIC_EMPTY_ARRAY,
+#define JSON_VARIANT_MAGIC_EMPTY_ARRAY ((JsonVariant*) _JSON_VARIANT_MAGIC_EMPTY_ARRAY)
+ _JSON_VARIANT_MAGIC_EMPTY_OBJECT,
+#define JSON_VARIANT_MAGIC_EMPTY_OBJECT ((JsonVariant*) _JSON_VARIANT_MAGIC_EMPTY_OBJECT)
+ __JSON_VARIANT_MAGIC_MAX
+#define _JSON_VARIANT_MAGIC_MAX ((JsonVariant*) __JSON_VARIANT_MAGIC_MAX)
+};
+
+/* This is only safe as long as we don't define more than 4K magic pointers, i.e. the page size of the simplest
+ * architectures we support. That's because we rely on the fact that malloc() will never allocate from the first memory
+ * page, as it is a faulting page for catching NULL pointer dereferences. */
+assert_cc((unsigned) __JSON_VARIANT_MAGIC_MAX < 4096U);
+
+enum { /* JSON tokens */
+ JSON_TOKEN_END,
+ JSON_TOKEN_COLON,
+ JSON_TOKEN_COMMA,
+ JSON_TOKEN_OBJECT_OPEN,
+ JSON_TOKEN_OBJECT_CLOSE,
+ JSON_TOKEN_ARRAY_OPEN,
+ JSON_TOKEN_ARRAY_CLOSE,
+ JSON_TOKEN_STRING,
+ JSON_TOKEN_REAL,
+ JSON_TOKEN_INTEGER,
+ JSON_TOKEN_UNSIGNED,
+ JSON_TOKEN_BOOLEAN,
+ JSON_TOKEN_NULL,
+ _JSON_TOKEN_MAX,
+ _JSON_TOKEN_INVALID = -1,
+};
+
+int json_tokenize(const char **p, char **ret_string, JsonValue *ret_value, unsigned *ret_line, unsigned *ret_column, void **state, unsigned *line, unsigned *column);
diff --git a/src/shared/json.c b/src/shared/json.c
new file mode 100644
index 0000000..ddf6dcb
--- /dev/null
+++ b/src/shared/json.c
@@ -0,0 +1,4410 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <locale.h>
+#include <math.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "float.h"
+#include "hexdecoct.h"
+#include "json-internal.h"
+#include "json.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "user-util.h"
+#include "utf8.h"
+
+/* Refuse putting together variants with a larger depth than 2K by default (as a protection against overflowing stacks
+ * if code processes JSON objects recursively. Note that we store the depth in an uint16_t, hence make sure this
+ * remains under 2^16.
+ *
+ * The value first was 16k, but it was discovered to be too high on llvm/x86-64. See also:
+ * https://github.com/systemd/systemd/issues/10738
+ *
+ * The value then was 4k, but it was discovered to be too high on s390x/aarch64. See also:
+ * https://github.com/systemd/systemd/issues/14396 */
+
+#define DEPTH_MAX (2U*1024U)
+assert_cc(DEPTH_MAX <= UINT16_MAX);
+
+typedef struct JsonSource {
+ /* When we parse from a file or similar, encodes the filename, to indicate the source of a json variant */
+ size_t n_ref;
+ unsigned max_line;
+ unsigned max_column;
+ char name[];
+} JsonSource;
+
+/* On x86-64 this whole structure should have a size of 6 * 64 bit = 48 bytes */
+struct JsonVariant {
+ union {
+ /* We either maintain a reference counter for this variant itself, or we are embedded into an
+ * array/object, in which case only that surrounding object is ref-counted. (If 'embedded' is false,
+ * see below.) */
+ size_t n_ref;
+
+ /* If this JsonVariant is part of an array/object, then this field points to the surrounding
+ * JSON_VARIANT_ARRAY/JSON_VARIANT_OBJECT object. (If 'embedded' is true, see below.) */
+ JsonVariant *parent;
+ };
+
+ /* If this was parsed from some file or buffer, this stores where from, as well as the source line/column */
+ JsonSource *source;
+ unsigned line, column;
+
+ JsonVariantType type:5;
+
+ /* A marker whether this variant is embedded into in array/object or not. If true, the 'parent' pointer above
+ * is valid. If false, the 'n_ref' field above is valid instead. */
+ bool is_embedded:1;
+
+ /* In some conditions (for example, if this object is part of an array of strings or objects), we don't store
+ * any data inline, but instead simply reference an external object and act as surrogate of it. In that case
+ * this bool is set, and the external object is referenced through the .reference field below. */
+ bool is_reference:1;
+
+ /* While comparing two arrays, we use this for marking what we already have seen */
+ bool is_marked:1;
+
+ /* Erase from memory when freeing */
+ bool sensitive:1;
+
+ /* If this is an object the fields are strictly ordered by name */
+ bool sorted:1;
+
+ /* If in addition to this object all objects referenced by it are also ordered strictly by name */
+ bool normalized:1;
+
+ /* The current 'depth' of the JsonVariant, i.e. how many levels of member variants this has */
+ uint16_t depth;
+
+ union {
+ /* For simple types we store the value in-line. */
+ JsonValue value;
+
+ /* For objects and arrays we store the number of elements immediately following */
+ size_t n_elements;
+
+ /* If is_reference as indicated above is set, this is where the reference object is actually stored. */
+ JsonVariant *reference;
+
+ /* Strings are placed immediately after the structure. Note that when this is a JsonVariant embedded
+ * into an array we might encode strings up to INLINE_STRING_LENGTH characters directly inside the
+ * element, while longer strings are stored as references. When this object is not embedded into an
+ * array, but stand-alone we allocate the right size for the whole structure, i.e. the array might be
+ * much larger than INLINE_STRING_LENGTH.
+ *
+ * Note that because we want to allocate arrays of the JsonVariant structure we specify [0] here,
+ * rather than the prettier []. If we wouldn't, then this char array would have undefined size, and so
+ * would the union and then the struct this is included in. And of structures with undefined size we
+ * can't allocate arrays (at least not easily). */
+ char string[0];
+ };
+};
+
+/* Inside string arrays we have a series of JasonVariant structures one after the other. In this case, strings longer
+ * than INLINE_STRING_MAX are stored as references, and all shorter ones inline. (This means — on x86-64 — strings up
+ * to 15 chars are stored within the array elements, and all others in separate allocations) */
+#define INLINE_STRING_MAX (sizeof(JsonVariant) - offsetof(JsonVariant, string) - 1U)
+
+/* Let's make sure this structure isn't increased in size accidentally. This check is only for our most relevant arch
+ * (x86-64). */
+#ifdef __x86_64__
+assert_cc(sizeof(JsonVariant) == 48U);
+assert_cc(INLINE_STRING_MAX == 15U);
+#endif
+
+static JsonSource* json_source_new(const char *name) {
+ JsonSource *s;
+
+ assert(name);
+
+ s = malloc(offsetof(JsonSource, name) + strlen(name) + 1);
+ if (!s)
+ return NULL;
+
+ *s = (JsonSource) {
+ .n_ref = 1,
+ };
+ strcpy(s->name, name);
+
+ return s;
+}
+
+DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(JsonSource, json_source, mfree);
+
+static bool json_source_equal(JsonSource *a, JsonSource *b) {
+ if (a == b)
+ return true;
+
+ if (!a || !b)
+ return false;
+
+ return streq(a->name, b->name);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(JsonSource*, json_source_unref);
+
+/* There are four kind of JsonVariant* pointers:
+ *
+ * 1. NULL
+ * 2. A 'regular' one, i.e. pointing to malloc() memory
+ * 3. A 'magic' one, i.e. one of the special JSON_VARIANT_MAGIC_XYZ values, that encode a few very basic values directly in the pointer.
+ * 4. A 'const string' one, i.e. a pointer to a const string.
+ *
+ * The four kinds of pointers can be discerned like this:
+ *
+ * Detecting #1 is easy, just compare with NULL. Detecting #3 is similarly easy: all magic pointers are below
+ * _JSON_VARIANT_MAGIC_MAX (which is pretty low, within the first memory page, which is special on Linux and other
+ * OSes, as it is a faulting page). In order to discern #2 and #4 we check the lowest bit. If it's off it's #2,
+ * otherwise #4. This makes use of the fact that malloc() will return "maximum aligned" memory, which definitely
+ * means the pointer is even. This means we can use the uneven pointers to reference static strings, as long as we
+ * make sure that all static strings used like this are aligned to 2 (or higher), and that we mask the bit on
+ * access. The JSON_VARIANT_STRING_CONST() macro encodes strings as JsonVariant* pointers, with the bit set. */
+
+static bool json_variant_is_magic(const JsonVariant *v) {
+ if (!v)
+ return false;
+
+ return v < _JSON_VARIANT_MAGIC_MAX;
+}
+
+static bool json_variant_is_const_string(const JsonVariant *v) {
+
+ if (v < _JSON_VARIANT_MAGIC_MAX)
+ return false;
+
+ /* A proper JsonVariant is aligned to whatever malloc() aligns things too, which is definitely not uneven. We
+ * hence use all uneven pointers as indicators for const strings. */
+
+ return (((uintptr_t) v) & 1) != 0;
+}
+
+static bool json_variant_is_regular(const JsonVariant *v) {
+
+ if (v < _JSON_VARIANT_MAGIC_MAX)
+ return false;
+
+ return (((uintptr_t) v) & 1) == 0;
+}
+
+static JsonVariant *json_variant_dereference(JsonVariant *v) {
+
+ /* Recursively dereference variants that are references to other variants */
+
+ if (!v)
+ return NULL;
+
+ if (!json_variant_is_regular(v))
+ return v;
+
+ if (!v->is_reference)
+ return v;
+
+ return json_variant_dereference(v->reference);
+}
+
+static uint16_t json_variant_depth(JsonVariant *v) {
+
+ v = json_variant_dereference(v);
+ if (!v)
+ return 0;
+
+ if (!json_variant_is_regular(v))
+ return 0;
+
+ return v->depth;
+}
+
+static JsonVariant *json_variant_formalize(JsonVariant *v) {
+
+ /* Converts json variant pointers to their normalized form, i.e. fully dereferenced and wherever
+ * possible converted to the "magic" version if there is one */
+
+ if (!v)
+ return NULL;
+
+ v = json_variant_dereference(v);
+
+ switch (json_variant_type(v)) {
+
+ case JSON_VARIANT_BOOLEAN:
+ return json_variant_boolean(v) ? JSON_VARIANT_MAGIC_TRUE : JSON_VARIANT_MAGIC_FALSE;
+
+ case JSON_VARIANT_NULL:
+ return JSON_VARIANT_MAGIC_NULL;
+
+ case JSON_VARIANT_INTEGER:
+ return json_variant_integer(v) == 0 ? JSON_VARIANT_MAGIC_ZERO_INTEGER : v;
+
+ case JSON_VARIANT_UNSIGNED:
+ return json_variant_unsigned(v) == 0 ? JSON_VARIANT_MAGIC_ZERO_UNSIGNED : v;
+
+ case JSON_VARIANT_REAL:
+ DISABLE_WARNING_FLOAT_EQUAL;
+ return json_variant_real(v) == 0.0 ? JSON_VARIANT_MAGIC_ZERO_REAL : v;
+ REENABLE_WARNING;
+
+ case JSON_VARIANT_STRING:
+ return isempty(json_variant_string(v)) ? JSON_VARIANT_MAGIC_EMPTY_STRING : v;
+
+ case JSON_VARIANT_ARRAY:
+ return json_variant_elements(v) == 0 ? JSON_VARIANT_MAGIC_EMPTY_ARRAY : v;
+
+ case JSON_VARIANT_OBJECT:
+ return json_variant_elements(v) == 0 ? JSON_VARIANT_MAGIC_EMPTY_OBJECT : v;
+
+ default:
+ return v;
+ }
+}
+
+static JsonVariant *json_variant_conservative_formalize(JsonVariant *v) {
+
+ /* Much like json_variant_formalize(), but won't simplify if the variant has a source/line location attached to
+ * it, in order not to lose context */
+
+ if (!v)
+ return NULL;
+
+ if (!json_variant_is_regular(v))
+ return v;
+
+ if (v->source || v->line > 0 || v->column > 0)
+ return v;
+
+ return json_variant_formalize(v);
+}
+
+static int json_variant_new(JsonVariant **ret, JsonVariantType type, size_t space) {
+ JsonVariant *v;
+
+ assert_return(ret, -EINVAL);
+
+ v = malloc0(MAX(sizeof(JsonVariant),
+ offsetof(JsonVariant, value) + space));
+ if (!v)
+ return -ENOMEM;
+
+ v->n_ref = 1;
+ v->type = type;
+
+ *ret = v;
+ return 0;
+}
+
+int json_variant_new_integer(JsonVariant **ret, intmax_t i) {
+ JsonVariant *v;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ if (i == 0) {
+ *ret = JSON_VARIANT_MAGIC_ZERO_INTEGER;
+ return 0;
+ }
+
+ r = json_variant_new(&v, JSON_VARIANT_INTEGER, sizeof(i));
+ if (r < 0)
+ return r;
+
+ v->value.integer = i;
+ *ret = v;
+
+ return 0;
+}
+
+int json_variant_new_unsigned(JsonVariant **ret, uintmax_t u) {
+ JsonVariant *v;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ if (u == 0) {
+ *ret = JSON_VARIANT_MAGIC_ZERO_UNSIGNED;
+ return 0;
+ }
+
+ r = json_variant_new(&v, JSON_VARIANT_UNSIGNED, sizeof(u));
+ if (r < 0)
+ return r;
+
+ v->value.unsig = u;
+ *ret = v;
+
+ return 0;
+}
+
+int json_variant_new_real(JsonVariant **ret, long double d) {
+ JsonVariant *v;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ DISABLE_WARNING_FLOAT_EQUAL;
+ if (d == 0.0) {
+ *ret = JSON_VARIANT_MAGIC_ZERO_REAL;
+ return 0;
+ }
+ REENABLE_WARNING;
+
+ r = json_variant_new(&v, JSON_VARIANT_REAL, sizeof(d));
+ if (r < 0)
+ return r;
+
+ v->value.real = d;
+ *ret = v;
+
+ return 0;
+}
+
+int json_variant_new_boolean(JsonVariant **ret, bool b) {
+ assert_return(ret, -EINVAL);
+
+ if (b)
+ *ret = JSON_VARIANT_MAGIC_TRUE;
+ else
+ *ret = JSON_VARIANT_MAGIC_FALSE;
+
+ return 0;
+}
+
+int json_variant_new_null(JsonVariant **ret) {
+ assert_return(ret, -EINVAL);
+
+ *ret = JSON_VARIANT_MAGIC_NULL;
+ return 0;
+}
+
+int json_variant_new_stringn(JsonVariant **ret, const char *s, size_t n) {
+ JsonVariant *v;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ if (!s) {
+ assert_return(IN_SET(n, 0, (size_t) -1), -EINVAL);
+ return json_variant_new_null(ret);
+ }
+ if (n == (size_t) -1) /* determine length automatically */
+ n = strlen(s);
+ else if (memchr(s, 0, n)) /* don't allow embedded NUL, as we can't express that in JSON */
+ return -EINVAL;
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_STRING;
+ return 0;
+ }
+
+ if (!utf8_is_valid_n(s, n)) /* JSON strings must be valid UTF-8 */
+ return -EUCLEAN;
+
+ r = json_variant_new(&v, JSON_VARIANT_STRING, n + 1);
+ if (r < 0)
+ return r;
+
+ memcpy(v->string, s, n);
+ v->string[n] = 0;
+
+ *ret = v;
+ return 0;
+}
+
+int json_variant_new_base64(JsonVariant **ret, const void *p, size_t n) {
+ _cleanup_free_ char *s = NULL;
+ ssize_t k;
+
+ assert_return(ret, -EINVAL);
+ assert_return(n == 0 || p, -EINVAL);
+
+ k = base64mem(p, n, &s);
+ if (k < 0)
+ return k;
+
+ return json_variant_new_stringn(ret, s, k);
+}
+
+int json_variant_new_id128(JsonVariant **ret, sd_id128_t id) {
+ char s[SD_ID128_STRING_MAX];
+
+ return json_variant_new_string(ret, sd_id128_to_string(id, s));
+}
+
+static void json_variant_set(JsonVariant *a, JsonVariant *b) {
+ assert(a);
+
+ b = json_variant_dereference(b);
+ if (!b) {
+ a->type = JSON_VARIANT_NULL;
+ return;
+ }
+
+ a->type = json_variant_type(b);
+ switch (a->type) {
+
+ case JSON_VARIANT_INTEGER:
+ a->value.integer = json_variant_integer(b);
+ break;
+
+ case JSON_VARIANT_UNSIGNED:
+ a->value.unsig = json_variant_unsigned(b);
+ break;
+
+ case JSON_VARIANT_REAL:
+ a->value.real = json_variant_real(b);
+ break;
+
+ case JSON_VARIANT_BOOLEAN:
+ a->value.boolean = json_variant_boolean(b);
+ break;
+
+ case JSON_VARIANT_STRING: {
+ const char *s;
+
+ assert_se(s = json_variant_string(b));
+
+ /* Short strings we can store inline */
+ if (strnlen(s, INLINE_STRING_MAX+1) <= INLINE_STRING_MAX) {
+ strcpy(a->string, s);
+ break;
+ }
+
+ /* For longer strings, use a reference… */
+ _fallthrough_;
+ }
+
+ case JSON_VARIANT_ARRAY:
+ case JSON_VARIANT_OBJECT:
+ a->is_reference = true;
+ a->reference = json_variant_ref(json_variant_conservative_formalize(b));
+ break;
+
+ case JSON_VARIANT_NULL:
+ break;
+
+ default:
+ assert_not_reached("Unexpected variant type");
+ }
+}
+
+static void json_variant_copy_source(JsonVariant *v, JsonVariant *from) {
+ assert(v);
+ assert(from);
+
+ if (!json_variant_is_regular(from))
+ return;
+
+ v->line = from->line;
+ v->column = from->column;
+ v->source = json_source_ref(from->source);
+}
+
+int json_variant_new_array(JsonVariant **ret, JsonVariant **array, size_t n) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ bool normalized = true;
+
+ assert_return(ret, -EINVAL);
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_ARRAY;
+ return 0;
+ }
+ assert_return(array, -EINVAL);
+
+ v = new(JsonVariant, n + 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (JsonVariant) {
+ .n_ref = 1,
+ .type = JSON_VARIANT_ARRAY,
+ };
+
+ for (v->n_elements = 0; v->n_elements < n; v->n_elements++) {
+ JsonVariant *w = v + 1 + v->n_elements,
+ *c = array[v->n_elements];
+ uint16_t d;
+
+ d = json_variant_depth(c);
+ if (d >= DEPTH_MAX) /* Refuse too deep nesting */
+ return -ELNRNG;
+ if (d >= v->depth)
+ v->depth = d + 1;
+
+ *w = (JsonVariant) {
+ .is_embedded = true,
+ .parent = v,
+ };
+
+ json_variant_set(w, c);
+ json_variant_copy_source(w, c);
+
+ if (!json_variant_is_normalized(c))
+ normalized = false;
+ }
+
+ v->normalized = normalized;
+
+ *ret = TAKE_PTR(v);
+ return 0;
+}
+
+int json_variant_new_array_bytes(JsonVariant **ret, const void *p, size_t n) {
+ JsonVariant *v;
+ size_t i;
+
+ assert_return(ret, -EINVAL);
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_ARRAY;
+ return 0;
+ }
+ assert_return(p, -EINVAL);
+
+ v = new(JsonVariant, n + 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (JsonVariant) {
+ .n_ref = 1,
+ .type = JSON_VARIANT_ARRAY,
+ .n_elements = n,
+ .depth = 1,
+ };
+
+ for (i = 0; i < n; i++) {
+ JsonVariant *w = v + 1 + i;
+
+ *w = (JsonVariant) {
+ .is_embedded = true,
+ .parent = v,
+ .type = JSON_VARIANT_UNSIGNED,
+ .value.unsig = ((const uint8_t*) p)[i],
+ };
+ }
+
+ v->normalized = true;
+
+ *ret = v;
+ return 0;
+}
+
+int json_variant_new_array_strv(JsonVariant **ret, char **l) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ size_t n;
+ int r;
+
+ assert(ret);
+
+ n = strv_length(l);
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_ARRAY;
+ return 0;
+ }
+
+ v = new(JsonVariant, n + 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (JsonVariant) {
+ .n_ref = 1,
+ .type = JSON_VARIANT_ARRAY,
+ .depth = 1,
+ };
+
+ for (v->n_elements = 0; v->n_elements < n; v->n_elements++) {
+ JsonVariant *w = v + 1 + v->n_elements;
+ size_t k;
+
+ *w = (JsonVariant) {
+ .is_embedded = true,
+ .parent = v,
+ .type = JSON_VARIANT_STRING,
+ };
+
+ k = strlen(l[v->n_elements]);
+
+ if (k > INLINE_STRING_MAX) {
+ /* If string is too long, store it as reference. */
+
+ r = json_variant_new_string(&w->reference, l[v->n_elements]);
+ if (r < 0)
+ return r;
+
+ w->is_reference = true;
+ } else {
+ if (!utf8_is_valid_n(l[v->n_elements], k)) /* JSON strings must be valid UTF-8 */
+ return -EUCLEAN;
+
+ memcpy(w->string, l[v->n_elements], k+1);
+ }
+ }
+
+ v->normalized = true;
+
+ *ret = TAKE_PTR(v);
+ return 0;
+}
+
+int json_variant_new_object(JsonVariant **ret, JsonVariant **array, size_t n) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ const char *prev = NULL;
+ bool sorted = true, normalized = true;
+
+ assert_return(ret, -EINVAL);
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_OBJECT;
+ return 0;
+ }
+ assert_return(array, -EINVAL);
+ assert_return(n % 2 == 0, -EINVAL);
+
+ v = new(JsonVariant, n + 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (JsonVariant) {
+ .n_ref = 1,
+ .type = JSON_VARIANT_OBJECT,
+ };
+
+ for (v->n_elements = 0; v->n_elements < n; v->n_elements++) {
+ JsonVariant *w = v + 1 + v->n_elements,
+ *c = array[v->n_elements];
+ uint16_t d;
+
+ if ((v->n_elements & 1) == 0) {
+ const char *k;
+
+ if (!json_variant_is_string(c))
+ return -EINVAL; /* Every second one needs to be a string, as it is the key name */
+
+ assert_se(k = json_variant_string(c));
+
+ if (prev && strcmp(k, prev) <= 0)
+ sorted = normalized = false;
+
+ prev = k;
+ } else if (!json_variant_is_normalized(c))
+ normalized = false;
+
+ d = json_variant_depth(c);
+ if (d >= DEPTH_MAX) /* Refuse too deep nesting */
+ return -ELNRNG;
+ if (d >= v->depth)
+ v->depth = d + 1;
+
+ *w = (JsonVariant) {
+ .is_embedded = true,
+ .parent = v,
+ };
+
+ json_variant_set(w, c);
+ json_variant_copy_source(w, c);
+ }
+
+ v->normalized = normalized;
+ v->sorted = sorted;
+
+ *ret = TAKE_PTR(v);
+ return 0;
+}
+
+static size_t json_variant_size(JsonVariant* v) {
+
+ if (!json_variant_is_regular(v))
+ return 0;
+
+ if (v->is_reference)
+ return offsetof(JsonVariant, reference) + sizeof(JsonVariant*);
+
+ switch (v->type) {
+
+ case JSON_VARIANT_STRING:
+ return offsetof(JsonVariant, string) + strlen(v->string) + 1;
+
+ case JSON_VARIANT_REAL:
+ return offsetof(JsonVariant, value) + sizeof(long double);
+
+ case JSON_VARIANT_UNSIGNED:
+ return offsetof(JsonVariant, value) + sizeof(uintmax_t);
+
+ case JSON_VARIANT_INTEGER:
+ return offsetof(JsonVariant, value) + sizeof(intmax_t);
+
+ case JSON_VARIANT_BOOLEAN:
+ return offsetof(JsonVariant, value) + sizeof(bool);
+
+ case JSON_VARIANT_ARRAY:
+ case JSON_VARIANT_OBJECT:
+ return offsetof(JsonVariant, n_elements) + sizeof(size_t);
+
+ case JSON_VARIANT_NULL:
+ return offsetof(JsonVariant, value);
+
+ default:
+ assert_not_reached("unexpected type");
+ }
+}
+
+static void json_variant_free_inner(JsonVariant *v, bool force_sensitive) {
+ bool sensitive;
+
+ assert(v);
+
+ if (!json_variant_is_regular(v))
+ return;
+
+ json_source_unref(v->source);
+
+ sensitive = v->sensitive || force_sensitive;
+
+ if (v->is_reference) {
+ if (sensitive)
+ json_variant_sensitive(v->reference);
+
+ json_variant_unref(v->reference);
+ return;
+ }
+
+ if (IN_SET(v->type, JSON_VARIANT_ARRAY, JSON_VARIANT_OBJECT)) {
+ size_t i;
+
+ for (i = 0; i < v->n_elements; i++)
+ json_variant_free_inner(v + 1 + i, sensitive);
+ }
+
+ if (sensitive)
+ explicit_bzero_safe(v, json_variant_size(v));
+}
+
+JsonVariant *json_variant_ref(JsonVariant *v) {
+ if (!v)
+ return NULL;
+ if (!json_variant_is_regular(v))
+ return v;
+
+ if (v->is_embedded)
+ json_variant_ref(v->parent); /* ref the compounding variant instead */
+ else {
+ assert(v->n_ref > 0);
+ v->n_ref++;
+ }
+
+ return v;
+}
+
+JsonVariant *json_variant_unref(JsonVariant *v) {
+ if (!v)
+ return NULL;
+ if (!json_variant_is_regular(v))
+ return NULL;
+
+ if (v->is_embedded)
+ json_variant_unref(v->parent);
+ else {
+ assert(v->n_ref > 0);
+ v->n_ref--;
+
+ if (v->n_ref == 0) {
+ json_variant_free_inner(v, false);
+ free(v);
+ }
+ }
+
+ return NULL;
+}
+
+void json_variant_unref_many(JsonVariant **array, size_t n) {
+ size_t i;
+
+ assert(array || n == 0);
+
+ for (i = 0; i < n; i++)
+ json_variant_unref(array[i]);
+}
+
+const char *json_variant_string(JsonVariant *v) {
+ if (!v)
+ return NULL;
+ if (v == JSON_VARIANT_MAGIC_EMPTY_STRING)
+ return "";
+ if (json_variant_is_magic(v))
+ goto mismatch;
+ if (json_variant_is_const_string(v)) {
+ uintptr_t p = (uintptr_t) v;
+
+ assert((p & 1) != 0);
+ return (const char*) (p ^ 1U);
+ }
+
+ if (v->is_reference)
+ return json_variant_string(v->reference);
+ if (v->type != JSON_VARIANT_STRING)
+ goto mismatch;
+
+ return v->string;
+
+mismatch:
+ log_debug("Non-string JSON variant requested as string, returning NULL.");
+ return NULL;
+}
+
+bool json_variant_boolean(JsonVariant *v) {
+ if (!v)
+ goto mismatch;
+ if (v == JSON_VARIANT_MAGIC_TRUE)
+ return true;
+ if (v == JSON_VARIANT_MAGIC_FALSE)
+ return false;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->type != JSON_VARIANT_BOOLEAN)
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_boolean(v->reference);
+
+ return v->value.boolean;
+
+mismatch:
+ log_debug("Non-boolean JSON variant requested as boolean, returning false.");
+ return false;
+}
+
+intmax_t json_variant_integer(JsonVariant *v) {
+ if (!v)
+ goto mismatch;
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER ||
+ v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED ||
+ v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return 0;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_integer(v->reference);
+
+ switch (v->type) {
+
+ case JSON_VARIANT_INTEGER:
+ return v->value.integer;
+
+ case JSON_VARIANT_UNSIGNED:
+ if (v->value.unsig <= INTMAX_MAX)
+ return (intmax_t) v->value.unsig;
+
+ log_debug("Unsigned integer %ju requested as signed integer and out of range, returning 0.", v->value.unsig);
+ return 0;
+
+ case JSON_VARIANT_REAL: {
+ intmax_t converted;
+
+ converted = (intmax_t) v->value.real;
+
+ DISABLE_WARNING_FLOAT_EQUAL;
+ if ((long double) converted == v->value.real)
+ return converted;
+ REENABLE_WARNING;
+
+ log_debug("Real %Lg requested as integer, and cannot be converted losslessly, returning 0.", v->value.real);
+ return 0;
+ }
+
+ default:
+ break;
+ }
+
+mismatch:
+ log_debug("Non-integer JSON variant requested as integer, returning 0.");
+ return 0;
+}
+
+uintmax_t json_variant_unsigned(JsonVariant *v) {
+ if (!v)
+ goto mismatch;
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER ||
+ v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED ||
+ v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return 0;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_integer(v->reference);
+
+ switch (v->type) {
+
+ case JSON_VARIANT_INTEGER:
+ if (v->value.integer >= 0)
+ return (uintmax_t) v->value.integer;
+
+ log_debug("Signed integer %ju requested as unsigned integer and out of range, returning 0.", v->value.integer);
+ return 0;
+
+ case JSON_VARIANT_UNSIGNED:
+ return v->value.unsig;
+
+ case JSON_VARIANT_REAL: {
+ uintmax_t converted;
+
+ converted = (uintmax_t) v->value.real;
+
+ DISABLE_WARNING_FLOAT_EQUAL;
+ if ((long double) converted == v->value.real)
+ return converted;
+ REENABLE_WARNING;
+
+ log_debug("Real %Lg requested as unsigned integer, and cannot be converted losslessly, returning 0.", v->value.real);
+ return 0;
+ }
+
+ default:
+ break;
+ }
+
+mismatch:
+ log_debug("Non-integer JSON variant requested as unsigned, returning 0.");
+ return 0;
+}
+
+long double json_variant_real(JsonVariant *v) {
+ if (!v)
+ return 0.0;
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER ||
+ v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED ||
+ v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return 0.0;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_real(v->reference);
+
+ switch (v->type) {
+
+ case JSON_VARIANT_REAL:
+ return v->value.real;
+
+ case JSON_VARIANT_INTEGER: {
+ long double converted;
+
+ converted = (long double) v->value.integer;
+
+ if ((intmax_t) converted == v->value.integer)
+ return converted;
+
+ log_debug("Signed integer %ji requested as real, and cannot be converted losslessly, returning 0.", v->value.integer);
+ return 0.0;
+ }
+
+ case JSON_VARIANT_UNSIGNED: {
+ long double converted;
+
+ converted = (long double) v->value.unsig;
+
+ if ((uintmax_t) converted == v->value.unsig)
+ return converted;
+
+ log_debug("Unsigned integer %ju requested as real, and cannot be converted losslessly, returning 0.", v->value.unsig);
+ return 0.0;
+ }
+
+ default:
+ break;
+ }
+
+mismatch:
+ log_debug("Non-integer JSON variant requested as integer, returning 0.");
+ return 0.0;
+}
+
+bool json_variant_is_negative(JsonVariant *v) {
+ if (!v)
+ goto mismatch;
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER ||
+ v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED ||
+ v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return false;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_is_negative(v->reference);
+
+ /* This function is useful as checking whether numbers are negative is pretty complex since we have three types
+ * of numbers. And some JSON code (OCI for example) uses negative numbers to mark "not defined" numeric
+ * values. */
+
+ switch (v->type) {
+
+ case JSON_VARIANT_REAL:
+ return v->value.real < 0;
+
+ case JSON_VARIANT_INTEGER:
+ return v->value.integer < 0;
+
+ case JSON_VARIANT_UNSIGNED:
+ return false;
+
+ default:
+ break;
+ }
+
+mismatch:
+ log_debug("Non-integer JSON variant tested for negativity, returning false.");
+ return false;
+}
+
+bool json_variant_is_blank_object(JsonVariant *v) {
+ /* Returns true if the specified object is null or empty */
+ return !v ||
+ json_variant_is_null(v) ||
+ (json_variant_is_object(v) && json_variant_elements(v) == 0);
+}
+
+bool json_variant_is_blank_array(JsonVariant *v) {
+ return !v ||
+ json_variant_is_null(v) ||
+ (json_variant_is_array(v) && json_variant_elements(v) == 0);
+}
+
+JsonVariantType json_variant_type(JsonVariant *v) {
+
+ if (!v)
+ return _JSON_VARIANT_TYPE_INVALID;
+
+ if (json_variant_is_const_string(v))
+ return JSON_VARIANT_STRING;
+
+ if (v == JSON_VARIANT_MAGIC_TRUE || v == JSON_VARIANT_MAGIC_FALSE)
+ return JSON_VARIANT_BOOLEAN;
+
+ if (v == JSON_VARIANT_MAGIC_NULL)
+ return JSON_VARIANT_NULL;
+
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER)
+ return JSON_VARIANT_INTEGER;
+
+ if (v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED)
+ return JSON_VARIANT_UNSIGNED;
+
+ if (v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return JSON_VARIANT_REAL;
+
+ if (v == JSON_VARIANT_MAGIC_EMPTY_STRING)
+ return JSON_VARIANT_STRING;
+
+ if (v == JSON_VARIANT_MAGIC_EMPTY_ARRAY)
+ return JSON_VARIANT_ARRAY;
+
+ if (v == JSON_VARIANT_MAGIC_EMPTY_OBJECT)
+ return JSON_VARIANT_OBJECT;
+
+ return v->type;
+}
+
+_function_no_sanitize_float_cast_overflow_ bool json_variant_has_type(JsonVariant *v, JsonVariantType type) {
+ JsonVariantType rt;
+
+ /* Note: we turn off ubsan float cast overflo detection for this function, since it would complain
+ * about our float casts but we do them explicitly to detect conversion errors. */
+
+ v = json_variant_dereference(v);
+ if (!v)
+ return false;
+
+ rt = json_variant_type(v);
+ if (rt == type)
+ return true;
+
+ /* If it's a const string, then it only can be a string, and if it is not, it's not */
+ if (json_variant_is_const_string(v))
+ return false;
+
+ /* All three magic zeroes qualify as integer, unsigned and as real */
+ if ((v == JSON_VARIANT_MAGIC_ZERO_INTEGER || v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED || v == JSON_VARIANT_MAGIC_ZERO_REAL) &&
+ IN_SET(type, JSON_VARIANT_INTEGER, JSON_VARIANT_UNSIGNED, JSON_VARIANT_REAL, JSON_VARIANT_NUMBER))
+ return true;
+
+ /* All other magic variant types are only equal to themselves */
+ if (json_variant_is_magic(v))
+ return false;
+
+ /* Handle the "number" pseudo type */
+ if (type == JSON_VARIANT_NUMBER)
+ return IN_SET(rt, JSON_VARIANT_INTEGER, JSON_VARIANT_UNSIGNED, JSON_VARIANT_REAL);
+
+ /* Integer conversions are OK in many cases */
+ if (rt == JSON_VARIANT_INTEGER && type == JSON_VARIANT_UNSIGNED)
+ return v->value.integer >= 0;
+ if (rt == JSON_VARIANT_UNSIGNED && type == JSON_VARIANT_INTEGER)
+ return v->value.unsig <= INTMAX_MAX;
+
+ /* Any integer that can be converted lossley to a real and back may also be considered a real */
+ if (rt == JSON_VARIANT_INTEGER && type == JSON_VARIANT_REAL)
+ return (intmax_t) (long double) v->value.integer == v->value.integer;
+ if (rt == JSON_VARIANT_UNSIGNED && type == JSON_VARIANT_REAL)
+ return (uintmax_t) (long double) v->value.unsig == v->value.unsig;
+
+ DISABLE_WARNING_FLOAT_EQUAL;
+
+ /* Any real that can be converted losslessly to an integer and back may also be considered an integer */
+ if (rt == JSON_VARIANT_REAL && type == JSON_VARIANT_INTEGER)
+ return (long double) (intmax_t) v->value.real == v->value.real;
+ if (rt == JSON_VARIANT_REAL && type == JSON_VARIANT_UNSIGNED)
+ return (long double) (uintmax_t) v->value.real == v->value.real;
+
+ REENABLE_WARNING;
+
+ return false;
+}
+
+size_t json_variant_elements(JsonVariant *v) {
+ if (!v)
+ return 0;
+ if (v == JSON_VARIANT_MAGIC_EMPTY_ARRAY ||
+ v == JSON_VARIANT_MAGIC_EMPTY_OBJECT)
+ return 0;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (!IN_SET(v->type, JSON_VARIANT_ARRAY, JSON_VARIANT_OBJECT))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_elements(v->reference);
+
+ return v->n_elements;
+
+mismatch:
+ log_debug("Number of elements in non-array/non-object JSON variant requested, returning 0.");
+ return 0;
+}
+
+JsonVariant *json_variant_by_index(JsonVariant *v, size_t idx) {
+ if (!v)
+ return NULL;
+ if (v == JSON_VARIANT_MAGIC_EMPTY_ARRAY ||
+ v == JSON_VARIANT_MAGIC_EMPTY_OBJECT)
+ return NULL;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (!IN_SET(v->type, JSON_VARIANT_ARRAY, JSON_VARIANT_OBJECT))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_by_index(v->reference, idx);
+ if (idx >= v->n_elements)
+ return NULL;
+
+ return json_variant_conservative_formalize(v + 1 + idx);
+
+mismatch:
+ log_debug("Element in non-array/non-object JSON variant requested by index, returning NULL.");
+ return NULL;
+}
+
+JsonVariant *json_variant_by_key_full(JsonVariant *v, const char *key, JsonVariant **ret_key) {
+ size_t i;
+
+ if (!v)
+ goto not_found;
+ if (!key)
+ goto not_found;
+ if (v == JSON_VARIANT_MAGIC_EMPTY_OBJECT)
+ goto not_found;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->type != JSON_VARIANT_OBJECT)
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_by_key(v->reference, key);
+
+ if (v->sorted) {
+ size_t a = 0, b = v->n_elements/2;
+
+ /* If the variant is sorted we can use bisection to find the entry we need in O(log(n)) time */
+
+ while (b > a) {
+ JsonVariant *p;
+ const char *f;
+ int c;
+
+ i = (a + b) / 2;
+ p = json_variant_dereference(v + 1 + i*2);
+
+ assert_se(f = json_variant_string(p));
+
+ c = strcmp(key, f);
+ if (c == 0) {
+ if (ret_key)
+ *ret_key = json_variant_conservative_formalize(v + 1 + i*2);
+
+ return json_variant_conservative_formalize(v + 1 + i*2 + 1);
+ } else if (c < 0)
+ b = i;
+ else
+ a = i + 1;
+ }
+
+ goto not_found;
+ }
+
+ /* The variant is not sorted, hence search for the field linearly */
+ for (i = 0; i < v->n_elements; i += 2) {
+ JsonVariant *p;
+
+ p = json_variant_dereference(v + 1 + i);
+
+ if (!json_variant_has_type(p, JSON_VARIANT_STRING))
+ continue;
+
+ if (streq(json_variant_string(p), key)) {
+
+ if (ret_key)
+ *ret_key = json_variant_conservative_formalize(v + 1 + i);
+
+ return json_variant_conservative_formalize(v + 1 + i + 1);
+ }
+ }
+
+not_found:
+ if (ret_key)
+ *ret_key = NULL;
+
+ return NULL;
+
+mismatch:
+ log_debug("Element in non-object JSON variant requested by key, returning NULL.");
+ if (ret_key)
+ *ret_key = NULL;
+
+ return NULL;
+}
+
+JsonVariant *json_variant_by_key(JsonVariant *v, const char *key) {
+ return json_variant_by_key_full(v, key, NULL);
+}
+
+bool json_variant_equal(JsonVariant *a, JsonVariant *b) {
+ JsonVariantType t;
+
+ a = json_variant_formalize(a);
+ b = json_variant_formalize(b);
+
+ if (a == b)
+ return true;
+
+ t = json_variant_type(a);
+ if (!json_variant_has_type(b, t))
+ return false;
+
+ switch (t) {
+
+ case JSON_VARIANT_STRING:
+ return streq(json_variant_string(a), json_variant_string(b));
+
+ case JSON_VARIANT_INTEGER:
+ return json_variant_integer(a) == json_variant_integer(b);
+
+ case JSON_VARIANT_UNSIGNED:
+ return json_variant_unsigned(a) == json_variant_unsigned(b);
+
+ case JSON_VARIANT_REAL:
+ DISABLE_WARNING_FLOAT_EQUAL;
+ return json_variant_real(a) == json_variant_real(b);
+ REENABLE_WARNING;
+
+ case JSON_VARIANT_BOOLEAN:
+ return json_variant_boolean(a) == json_variant_boolean(b);
+
+ case JSON_VARIANT_NULL:
+ return true;
+
+ case JSON_VARIANT_ARRAY: {
+ size_t i, n;
+
+ n = json_variant_elements(a);
+ if (n != json_variant_elements(b))
+ return false;
+
+ for (i = 0; i < n; i++) {
+ if (!json_variant_equal(json_variant_by_index(a, i), json_variant_by_index(b, i)))
+ return false;
+ }
+
+ return true;
+ }
+
+ case JSON_VARIANT_OBJECT: {
+ size_t i, n;
+
+ n = json_variant_elements(a);
+ if (n != json_variant_elements(b))
+ return false;
+
+ /* Iterate through all keys in 'a' */
+ for (i = 0; i < n; i += 2) {
+ bool found = false;
+ size_t j;
+
+ /* Match them against all keys in 'b' */
+ for (j = 0; j < n; j += 2) {
+ JsonVariant *key_b;
+
+ key_b = json_variant_by_index(b, j);
+
+ /* During the first iteration unmark everything */
+ if (i == 0)
+ key_b->is_marked = false;
+ else if (key_b->is_marked) /* In later iterations if we already marked something, don't bother with it again */
+ continue;
+
+ if (found)
+ continue;
+
+ if (json_variant_equal(json_variant_by_index(a, i), key_b) &&
+ json_variant_equal(json_variant_by_index(a, i+1), json_variant_by_index(b, j+1))) {
+ /* Key and values match! */
+ key_b->is_marked = found = true;
+
+ /* In the first iteration we continue the inner loop since we want to mark
+ * everything, otherwise exit the loop quickly after we found what we were
+ * looking for. */
+ if (i != 0)
+ break;
+ }
+ }
+
+ if (!found)
+ return false;
+ }
+
+ return true;
+ }
+
+ default:
+ assert_not_reached("Unknown variant type.");
+ }
+}
+
+void json_variant_sensitive(JsonVariant *v) {
+ assert(v);
+
+ /* Marks a variant as "sensitive", so that it is erased from memory when it is destroyed. This is a
+ * one-way operation: as soon as it is marked this way it remains marked this way until it's
+ * destroyed. A magic variant is never sensitive though, even when asked, since it's too
+ * basic. Similar, const string variant are never sensitive either, after all they are included in
+ * the source code as they are, which is not suitable for inclusion of secrets.
+ *
+ * Note that this flag has a recursive effect: when we destroy an object or array we'll propagate the
+ * flag to all contained variants. And if those are then destroyed this is propagated further down,
+ * and so on. */
+
+ v = json_variant_formalize(v);
+ if (!json_variant_is_regular(v))
+ return;
+
+ v->sensitive = true;
+}
+
+bool json_variant_is_sensitive(JsonVariant *v) {
+ v = json_variant_formalize(v);
+ if (!json_variant_is_regular(v))
+ return false;
+
+ return v->sensitive;
+}
+
+static void json_variant_propagate_sensitive(JsonVariant *from, JsonVariant *to) {
+ if (json_variant_is_sensitive(from))
+ json_variant_sensitive(to);
+}
+
+int json_variant_get_source(JsonVariant *v, const char **ret_source, unsigned *ret_line, unsigned *ret_column) {
+ assert_return(v, -EINVAL);
+
+ if (ret_source)
+ *ret_source = json_variant_is_regular(v) && v->source ? v->source->name : NULL;
+
+ if (ret_line)
+ *ret_line = json_variant_is_regular(v) ? v->line : 0;
+
+ if (ret_column)
+ *ret_column = json_variant_is_regular(v) ? v->column : 0;
+
+ return 0;
+}
+
+static int print_source(FILE *f, JsonVariant *v, JsonFormatFlags flags, bool whitespace) {
+ size_t w, k;
+
+ if (!FLAGS_SET(flags, JSON_FORMAT_SOURCE|JSON_FORMAT_PRETTY))
+ return 0;
+
+ if (!json_variant_is_regular(v))
+ return 0;
+
+ if (!v->source && v->line == 0 && v->column == 0)
+ return 0;
+
+ /* The max width we need to format the line numbers for this source file */
+ w = (v->source && v->source->max_line > 0) ?
+ DECIMAL_STR_WIDTH(v->source->max_line) :
+ DECIMAL_STR_MAX(unsigned)-1;
+ k = (v->source && v->source->max_column > 0) ?
+ DECIMAL_STR_WIDTH(v->source->max_column) :
+ DECIMAL_STR_MAX(unsigned) -1;
+
+ if (whitespace) {
+ size_t i, n;
+
+ n = 1 + (v->source ? strlen(v->source->name) : 0) +
+ ((v->source && (v->line > 0 || v->column > 0)) ? 1 : 0) +
+ (v->line > 0 ? w : 0) +
+ (((v->source || v->line > 0) && v->column > 0) ? 1 : 0) +
+ (v->column > 0 ? k : 0) +
+ 2;
+
+ for (i = 0; i < n; i++)
+ fputc(' ', f);
+ } else {
+ fputc('[', f);
+
+ if (v->source)
+ fputs(v->source->name, f);
+ if (v->source && (v->line > 0 || v->column > 0))
+ fputc(':', f);
+ if (v->line > 0)
+ fprintf(f, "%*u", (int) w, v->line);
+ if ((v->source || v->line > 0) || v->column > 0)
+ fputc(':', f);
+ if (v->column > 0)
+ fprintf(f, "%*u", (int) k, v->column);
+
+ fputc(']', f);
+ fputc(' ', f);
+ }
+
+ return 0;
+}
+
+static void json_format_string(FILE *f, const char *q, JsonFormatFlags flags) {
+ assert(q);
+
+ fputc('"', f);
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_GREEN, f);
+
+ for (; *q; q++)
+ switch (*q) {
+ case '"':
+ fputs("\\\"", f);
+ break;
+
+ case '\\':
+ fputs("\\\\", f);
+ break;
+
+ case '\b':
+ fputs("\\b", f);
+ break;
+
+ case '\f':
+ fputs("\\f", f);
+ break;
+
+ case '\n':
+ fputs("\\n", f);
+ break;
+
+ case '\r':
+ fputs("\\r", f);
+ break;
+
+ case '\t':
+ fputs("\\t", f);
+ break;
+
+ default:
+ if ((signed char) *q >= 0 && *q < ' ')
+ fprintf(f, "\\u%04x", *q);
+ else
+ fputc(*q, f);
+ break;
+ }
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+
+ fputc('"', f);
+}
+
+static int json_format(FILE *f, JsonVariant *v, JsonFormatFlags flags, const char *prefix) {
+ int r;
+
+ assert(f);
+ assert(v);
+
+ switch (json_variant_type(v)) {
+
+ case JSON_VARIANT_REAL: {
+ locale_t loc;
+
+ loc = newlocale(LC_NUMERIC_MASK, "C", (locale_t) 0);
+ if (loc == (locale_t) 0)
+ return -errno;
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT_BLUE, f);
+
+ fprintf(f, "%.*Le", DECIMAL_DIG, json_variant_real(v));
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+
+ freelocale(loc);
+ break;
+ }
+
+ case JSON_VARIANT_INTEGER:
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT_BLUE, f);
+
+ fprintf(f, "%" PRIdMAX, json_variant_integer(v));
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+ break;
+
+ case JSON_VARIANT_UNSIGNED:
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT_BLUE, f);
+
+ fprintf(f, "%" PRIuMAX, json_variant_unsigned(v));
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+ break;
+
+ case JSON_VARIANT_BOOLEAN:
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT, f);
+
+ if (json_variant_boolean(v))
+ fputs("true", f);
+ else
+ fputs("false", f);
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+
+ break;
+
+ case JSON_VARIANT_NULL:
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT, f);
+
+ fputs("null", f);
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+ break;
+
+ case JSON_VARIANT_STRING:
+ json_format_string(f, json_variant_string(v), flags);
+ break;
+
+ case JSON_VARIANT_ARRAY: {
+ size_t i, n;
+
+ n = json_variant_elements(v);
+
+ if (n == 0)
+ fputs("[]", f);
+ else {
+ _cleanup_free_ char *joined = NULL;
+ const char *prefix2;
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ joined = strjoin(strempty(prefix), "\t");
+ if (!joined)
+ return -ENOMEM;
+
+ prefix2 = joined;
+ fputs("[\n", f);
+ } else {
+ prefix2 = strempty(prefix);
+ fputc('[', f);
+ }
+
+ for (i = 0; i < n; i++) {
+ JsonVariant *e;
+
+ assert_se(e = json_variant_by_index(v, i));
+
+ if (i > 0) {
+ if (flags & JSON_FORMAT_PRETTY)
+ fputs(",\n", f);
+ else
+ fputc(',', f);
+ }
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ print_source(f, e, flags, false);
+ fputs(prefix2, f);
+ }
+
+ r = json_format(f, e, flags, prefix2);
+ if (r < 0)
+ return r;
+ }
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ fputc('\n', f);
+ print_source(f, v, flags, true);
+ fputs(strempty(prefix), f);
+ }
+
+ fputc(']', f);
+ }
+ break;
+ }
+
+ case JSON_VARIANT_OBJECT: {
+ size_t i, n;
+
+ n = json_variant_elements(v);
+
+ if (n == 0)
+ fputs("{}", f);
+ else {
+ _cleanup_free_ char *joined = NULL;
+ const char *prefix2;
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ joined = strjoin(strempty(prefix), "\t");
+ if (!joined)
+ return -ENOMEM;
+
+ prefix2 = joined;
+ fputs("{\n", f);
+ } else {
+ prefix2 = strempty(prefix);
+ fputc('{', f);
+ }
+
+ for (i = 0; i < n; i += 2) {
+ JsonVariant *e;
+
+ e = json_variant_by_index(v, i);
+
+ if (i > 0) {
+ if (flags & JSON_FORMAT_PRETTY)
+ fputs(",\n", f);
+ else
+ fputc(',', f);
+ }
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ print_source(f, e, flags, false);
+ fputs(prefix2, f);
+ }
+
+ r = json_format(f, e, flags, prefix2);
+ if (r < 0)
+ return r;
+
+ fputs(flags & JSON_FORMAT_PRETTY ? " : " : ":", f);
+
+ r = json_format(f, json_variant_by_index(v, i+1), flags, prefix2);
+ if (r < 0)
+ return r;
+ }
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ fputc('\n', f);
+ print_source(f, v, flags, true);
+ fputs(strempty(prefix), f);
+ }
+
+ fputc('}', f);
+ }
+ break;
+ }
+
+ default:
+ assert_not_reached("Unexpected variant type.");
+ }
+
+ return 0;
+}
+
+int json_variant_format(JsonVariant *v, JsonFormatFlags flags, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ size_t sz = 0;
+ int r;
+
+ /* Returns the length of the generated string (without the terminating NUL),
+ * or negative on error. */
+
+ assert_return(v, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ f = open_memstream_unlocked(&s, &sz);
+ if (!f)
+ return -ENOMEM;
+
+ json_variant_dump(v, flags, f, NULL);
+
+ /* Add terminating 0, so that the output buffer is a valid string. */
+ fputc('\0', f);
+
+ r = fflush_and_check(f);
+ }
+ if (r < 0)
+ return r;
+
+ assert(s);
+ *ret = TAKE_PTR(s);
+ assert(sz > 0);
+ return (int) sz - 1;
+}
+
+void json_variant_dump(JsonVariant *v, JsonFormatFlags flags, FILE *f, const char *prefix) {
+ if (!v)
+ return;
+
+ if (!f)
+ f = stdout;
+
+ print_source(f, v, flags, false);
+
+ if (((flags & (JSON_FORMAT_COLOR_AUTO|JSON_FORMAT_COLOR)) == JSON_FORMAT_COLOR_AUTO) && colors_enabled())
+ flags |= JSON_FORMAT_COLOR;
+
+ if (((flags & (JSON_FORMAT_PRETTY_AUTO|JSON_FORMAT_PRETTY)) == JSON_FORMAT_PRETTY_AUTO))
+ flags |= on_tty() ? JSON_FORMAT_PRETTY : JSON_FORMAT_NEWLINE;
+
+ if (flags & JSON_FORMAT_SSE)
+ fputs("data: ", f);
+ if (flags & JSON_FORMAT_SEQ)
+ fputc('\x1e', f); /* ASCII Record Separator */
+
+ json_format(f, v, flags, prefix);
+
+ if (flags & (JSON_FORMAT_PRETTY|JSON_FORMAT_SEQ|JSON_FORMAT_SSE|JSON_FORMAT_NEWLINE))
+ fputc('\n', f);
+ if (flags & JSON_FORMAT_SSE)
+ fputc('\n', f); /* In case of SSE add a second newline */
+
+ if (flags & JSON_FORMAT_FLUSH)
+ fflush(f);
+}
+
+int json_variant_filter(JsonVariant **v, char **to_remove) {
+ _cleanup_(json_variant_unrefp) JsonVariant *w = NULL;
+ _cleanup_free_ JsonVariant **array = NULL;
+ size_t i, n = 0, k = 0;
+ int r;
+
+ assert(v);
+
+ if (json_variant_is_blank_object(*v))
+ return 0;
+ if (!json_variant_is_object(*v))
+ return -EINVAL;
+
+ if (strv_isempty(to_remove))
+ return 0;
+
+ for (i = 0; i < json_variant_elements(*v); i += 2) {
+ JsonVariant *p;
+
+ p = json_variant_by_index(*v, i);
+ if (!json_variant_has_type(p, JSON_VARIANT_STRING))
+ return -EINVAL;
+
+ if (strv_contains(to_remove, json_variant_string(p))) {
+ if (!array) {
+ array = new(JsonVariant*, json_variant_elements(*v) - 2);
+ if (!array)
+ return -ENOMEM;
+
+ for (k = 0; k < i; k++)
+ array[k] = json_variant_by_index(*v, k);
+ }
+
+ n++;
+ } else if (array) {
+ array[k++] = p;
+ array[k++] = json_variant_by_index(*v, i + 1);
+ }
+ }
+
+ if (n == 0)
+ return 0;
+
+ r = json_variant_new_object(&w, array, k);
+ if (r < 0)
+ return r;
+
+ json_variant_propagate_sensitive(*v, w);
+
+ json_variant_unref(*v);
+ *v = TAKE_PTR(w);
+
+ return (int) n;
+}
+
+int json_variant_set_field(JsonVariant **v, const char *field, JsonVariant *value) {
+ _cleanup_(json_variant_unrefp) JsonVariant *field_variant = NULL, *w = NULL;
+ _cleanup_free_ JsonVariant **array = NULL;
+ size_t i, k = 0;
+ int r;
+
+ assert(v);
+ assert(field);
+
+ if (json_variant_is_blank_object(*v)) {
+ array = new(JsonVariant*, 2);
+ if (!array)
+ return -ENOMEM;
+
+ } else {
+ if (!json_variant_is_object(*v))
+ return -EINVAL;
+
+ for (i = 0; i < json_variant_elements(*v); i += 2) {
+ JsonVariant *p;
+
+ p = json_variant_by_index(*v, i);
+ if (!json_variant_is_string(p))
+ return -EINVAL;
+
+ if (streq(json_variant_string(p), field)) {
+
+ if (!array) {
+ array = new(JsonVariant*, json_variant_elements(*v));
+ if (!array)
+ return -ENOMEM;
+
+ for (k = 0; k < i; k++)
+ array[k] = json_variant_by_index(*v, k);
+ }
+
+ } else if (array) {
+ array[k++] = p;
+ array[k++] = json_variant_by_index(*v, i + 1);
+ }
+ }
+
+ if (!array) {
+ array = new(JsonVariant*, json_variant_elements(*v) + 2);
+ if (!array)
+ return -ENOMEM;
+
+ for (k = 0; k < json_variant_elements(*v); k++)
+ array[k] = json_variant_by_index(*v, k);
+ }
+ }
+
+ r = json_variant_new_string(&field_variant, field);
+ if (r < 0)
+ return r;
+
+ array[k++] = field_variant;
+ array[k++] = value;
+
+ r = json_variant_new_object(&w, array, k);
+ if (r < 0)
+ return r;
+
+ json_variant_propagate_sensitive(*v, w);
+
+ json_variant_unref(*v);
+ *v = TAKE_PTR(w);
+
+ return 1;
+}
+
+int json_variant_set_field_string(JsonVariant **v, const char *field, const char *value) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ r = json_variant_new_string(&m, value);
+ if (r < 0)
+ return r;
+
+ return json_variant_set_field(v, field, m);
+}
+
+int json_variant_set_field_integer(JsonVariant **v, const char *field, intmax_t i) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ r = json_variant_new_integer(&m, i);
+ if (r < 0)
+ return r;
+
+ return json_variant_set_field(v, field, m);
+}
+
+int json_variant_set_field_unsigned(JsonVariant **v, const char *field, uintmax_t u) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ r = json_variant_new_unsigned(&m, u);
+ if (r < 0)
+ return r;
+
+ return json_variant_set_field(v, field, m);
+}
+
+int json_variant_set_field_boolean(JsonVariant **v, const char *field, bool b) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ r = json_variant_new_boolean(&m, b);
+ if (r < 0)
+ return r;
+
+ return json_variant_set_field(v, field, m);
+}
+
+int json_variant_set_field_strv(JsonVariant **v, const char *field, char **l) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ r = json_variant_new_array_strv(&m, l);
+ if (r < 0)
+ return r;
+
+ return json_variant_set_field(v, field, m);
+}
+
+int json_variant_merge(JsonVariant **v, JsonVariant *m) {
+ _cleanup_(json_variant_unrefp) JsonVariant *w = NULL;
+ _cleanup_free_ JsonVariant **array = NULL;
+ size_t v_elements, m_elements, i, k;
+ bool v_blank, m_blank;
+ int r;
+
+ m = json_variant_dereference(m);
+
+ v_blank = json_variant_is_blank_object(*v);
+ m_blank = json_variant_is_blank_object(m);
+
+ if (!v_blank && !json_variant_is_object(*v))
+ return -EINVAL;
+ if (!m_blank && !json_variant_is_object(m))
+ return -EINVAL;
+
+ if (m_blank)
+ return 0; /* nothing to do */
+
+ if (v_blank) {
+ json_variant_unref(*v);
+ *v = json_variant_ref(m);
+ return 1;
+ }
+
+ v_elements = json_variant_elements(*v);
+ m_elements = json_variant_elements(m);
+ if (v_elements > SIZE_MAX - m_elements) /* overflow check */
+ return -ENOMEM;
+
+ array = new(JsonVariant*, v_elements + m_elements);
+ if (!array)
+ return -ENOMEM;
+
+ k = 0;
+ for (i = 0; i < v_elements; i += 2) {
+ JsonVariant *u;
+
+ u = json_variant_by_index(*v, i);
+ if (!json_variant_is_string(u))
+ return -EINVAL;
+
+ if (json_variant_by_key(m, json_variant_string(u)))
+ continue; /* skip if exists in second variant */
+
+ array[k++] = u;
+ array[k++] = json_variant_by_index(*v, i + 1);
+ }
+
+ for (i = 0; i < m_elements; i++)
+ array[k++] = json_variant_by_index(m, i);
+
+ r = json_variant_new_object(&w, array, k);
+ if (r < 0)
+ return r;
+
+ json_variant_propagate_sensitive(*v, w);
+ json_variant_propagate_sensitive(m, w);
+
+ json_variant_unref(*v);
+ *v = TAKE_PTR(w);
+
+ return 1;
+}
+
+int json_variant_append_array(JsonVariant **v, JsonVariant *element) {
+ _cleanup_(json_variant_unrefp) JsonVariant *nv = NULL;
+ bool blank;
+ int r;
+
+ assert(v);
+ assert(element);
+
+
+ if (!*v || json_variant_is_null(*v))
+ blank = true;
+ else if (!json_variant_is_array(*v))
+ return -EINVAL;
+ else
+ blank = json_variant_elements(*v) == 0;
+
+ if (blank)
+ r = json_variant_new_array(&nv, (JsonVariant*[]) { element }, 1);
+ else {
+ _cleanup_free_ JsonVariant **array = NULL;
+ size_t i;
+
+ array = new(JsonVariant*, json_variant_elements(*v) + 1);
+ if (!array)
+ return -ENOMEM;
+
+ for (i = 0; i < json_variant_elements(*v); i++)
+ array[i] = json_variant_by_index(*v, i);
+
+ array[i] = element;
+
+ r = json_variant_new_array(&nv, array, i + 1);
+ }
+ if (r < 0)
+ return r;
+
+ json_variant_propagate_sensitive(*v, nv);
+
+ json_variant_unref(*v);
+ *v = TAKE_PTR(nv);
+
+ return 0;
+}
+
+int json_variant_strv(JsonVariant *v, char ***ret) {
+ char **l = NULL;
+ size_t n, i;
+ bool sensitive;
+ int r;
+
+ assert(ret);
+
+ if (!v || json_variant_is_null(v)) {
+ l = new0(char*, 1);
+ if (!l)
+ return -ENOMEM;
+
+ *ret = l;
+ return 0;
+ }
+
+ if (!json_variant_is_array(v))
+ return -EINVAL;
+
+ sensitive = v->sensitive;
+
+ n = json_variant_elements(v);
+ l = new(char*, n+1);
+ if (!l)
+ return -ENOMEM;
+
+ for (i = 0; i < n; i++) {
+ JsonVariant *e;
+
+ assert_se(e = json_variant_by_index(v, i));
+ sensitive = sensitive || e->sensitive;
+
+ if (!json_variant_is_string(e)) {
+ l[i] = NULL;
+ r = -EINVAL;
+ goto fail;
+ }
+
+ l[i] = strdup(json_variant_string(e));
+ if (!l[i]) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ l[i] = NULL;
+ *ret = TAKE_PTR(l);
+
+ return 0;
+
+fail:
+ if (sensitive)
+ strv_free_erase(l);
+ else
+ strv_free(l);
+
+ return r;
+}
+
+static int json_variant_copy(JsonVariant **nv, JsonVariant *v) {
+ JsonVariantType t;
+ JsonVariant *c;
+ JsonValue value;
+ const void *source;
+ size_t k;
+
+ assert(nv);
+ assert(v);
+
+ /* Let's copy the simple types literally, and the larger types by references */
+ t = json_variant_type(v);
+ switch (t) {
+ case JSON_VARIANT_INTEGER:
+ k = sizeof(intmax_t);
+ value.integer = json_variant_integer(v);
+ source = &value;
+ break;
+
+ case JSON_VARIANT_UNSIGNED:
+ k = sizeof(uintmax_t);
+ value.unsig = json_variant_unsigned(v);
+ source = &value;
+ break;
+
+ case JSON_VARIANT_REAL:
+ k = sizeof(long double);
+ value.real = json_variant_real(v);
+ source = &value;
+ break;
+
+ case JSON_VARIANT_BOOLEAN:
+ k = sizeof(bool);
+ value.boolean = json_variant_boolean(v);
+ source = &value;
+ break;
+
+ case JSON_VARIANT_NULL:
+ k = 0;
+ source = NULL;
+ break;
+
+ case JSON_VARIANT_STRING:
+ source = json_variant_string(v);
+ k = strnlen(source, INLINE_STRING_MAX + 1);
+ if (k <= INLINE_STRING_MAX) {
+ k ++;
+ break;
+ }
+
+ _fallthrough_;
+
+ default:
+ /* Everything else copy by reference */
+
+ c = malloc0(MAX(sizeof(JsonVariant),
+ offsetof(JsonVariant, reference) + sizeof(JsonVariant*)));
+ if (!c)
+ return -ENOMEM;
+
+ c->n_ref = 1;
+ c->type = t;
+ c->is_reference = true;
+ c->reference = json_variant_ref(json_variant_formalize(v));
+
+ *nv = c;
+ return 0;
+ }
+
+ c = malloc0(MAX(sizeof(JsonVariant),
+ offsetof(JsonVariant, value) + k));
+ if (!c)
+ return -ENOMEM;
+
+ c->n_ref = 1;
+ c->type = t;
+
+ memcpy_safe(&c->value, source, k);
+
+ json_variant_propagate_sensitive(v, c);
+
+ *nv = c;
+ return 0;
+}
+
+static bool json_single_ref(JsonVariant *v) {
+
+ /* Checks whether the caller is the single owner of the object, i.e. can get away with changing it */
+
+ if (!json_variant_is_regular(v))
+ return false;
+
+ if (v->is_embedded)
+ return json_single_ref(v->parent);
+
+ assert(v->n_ref > 0);
+ return v->n_ref == 1;
+}
+
+static int json_variant_set_source(JsonVariant **v, JsonSource *source, unsigned line, unsigned column) {
+ JsonVariant *w;
+ int r;
+
+ assert(v);
+
+ /* Patch in source and line/column number. Tries to do this in-place if the caller is the sole referencer of
+ * the object. If not, allocates a new object, possibly a surrogate for the original one */
+
+ if (!*v)
+ return 0;
+
+ if (source && line > source->max_line)
+ source->max_line = line;
+ if (source && column > source->max_column)
+ source->max_column = column;
+
+ if (!json_variant_is_regular(*v)) {
+
+ if (!source && line == 0 && column == 0)
+ return 0;
+
+ } else {
+ if (json_source_equal((*v)->source, source) &&
+ (*v)->line == line &&
+ (*v)->column == column)
+ return 0;
+
+ if (json_single_ref(*v)) { /* Sole reference? */
+ json_source_unref((*v)->source);
+ (*v)->source = json_source_ref(source);
+ (*v)->line = line;
+ (*v)->column = column;
+ return 1;
+ }
+ }
+
+ r = json_variant_copy(&w, *v);
+ if (r < 0)
+ return r;
+
+ assert(json_variant_is_regular(w));
+ assert(!w->is_embedded);
+ assert(w->n_ref == 1);
+ assert(!w->source);
+
+ w->source = json_source_ref(source);
+ w->line = line;
+ w->column = column;
+
+ json_variant_unref(*v);
+ *v = w;
+
+ return 1;
+}
+
+static void inc_lines_columns(unsigned *line, unsigned *column, const char *s, size_t n) {
+ assert(line);
+ assert(column);
+ assert(s || n == 0);
+
+ while (n > 0) {
+ if (*s == '\n') {
+ (*line)++;
+ *column = 1;
+ } else if ((signed char) *s >= 0 && *s < 127) /* Process ASCII chars quickly */
+ (*column)++;
+ else {
+ int w;
+
+ w = utf8_encoded_valid_unichar(s, n);
+ if (w < 0) /* count invalid unichars as normal characters */
+ w = 1;
+ else if ((size_t) w > n) /* never read more than the specified number of characters */
+ w = (int) n;
+
+ (*column)++;
+
+ s += w;
+ n -= w;
+ continue;
+ }
+
+ s++;
+ n--;
+ }
+}
+
+static int unhex_ucs2(const char *c, uint16_t *ret) {
+ int aa, bb, cc, dd;
+ uint16_t x;
+
+ assert(c);
+ assert(ret);
+
+ aa = unhexchar(c[0]);
+ if (aa < 0)
+ return -EINVAL;
+
+ bb = unhexchar(c[1]);
+ if (bb < 0)
+ return -EINVAL;
+
+ cc = unhexchar(c[2]);
+ if (cc < 0)
+ return -EINVAL;
+
+ dd = unhexchar(c[3]);
+ if (dd < 0)
+ return -EINVAL;
+
+ x = ((uint16_t) aa << 12) |
+ ((uint16_t) bb << 8) |
+ ((uint16_t) cc << 4) |
+ ((uint16_t) dd);
+
+ if (x <= 0)
+ return -EINVAL;
+
+ *ret = x;
+
+ return 0;
+}
+
+static int json_parse_string(const char **p, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ size_t n = 0, allocated = 0;
+ const char *c;
+
+ assert(p);
+ assert(*p);
+ assert(ret);
+
+ c = *p;
+
+ if (*c != '"')
+ return -EINVAL;
+
+ c++;
+
+ for (;;) {
+ int len;
+
+ /* Check for EOF */
+ if (*c == 0)
+ return -EINVAL;
+
+ /* Check for control characters 0x00..0x1f */
+ if (*c > 0 && *c < ' ')
+ return -EINVAL;
+
+ /* Check for control character 0x7f */
+ if (*c == 0x7f)
+ return -EINVAL;
+
+ if (*c == '"') {
+ if (!s) {
+ s = strdup("");
+ if (!s)
+ return -ENOMEM;
+ } else
+ s[n] = 0;
+
+ *p = c + 1;
+
+ *ret = TAKE_PTR(s);
+ return JSON_TOKEN_STRING;
+ }
+
+ if (*c == '\\') {
+ char ch = 0;
+ c++;
+
+ if (*c == 0)
+ return -EINVAL;
+
+ if (IN_SET(*c, '"', '\\', '/'))
+ ch = *c;
+ else if (*c == 'b')
+ ch = '\b';
+ else if (*c == 'f')
+ ch = '\f';
+ else if (*c == 'n')
+ ch = '\n';
+ else if (*c == 'r')
+ ch = '\r';
+ else if (*c == 't')
+ ch = '\t';
+ else if (*c == 'u') {
+ char16_t x;
+ int r;
+
+ r = unhex_ucs2(c + 1, &x);
+ if (r < 0)
+ return r;
+
+ c += 5;
+
+ if (!GREEDY_REALLOC(s, allocated, n + 5))
+ return -ENOMEM;
+
+ if (!utf16_is_surrogate(x))
+ n += utf8_encode_unichar(s + n, (char32_t) x);
+ else if (utf16_is_trailing_surrogate(x))
+ return -EINVAL;
+ else {
+ char16_t y;
+
+ if (c[0] != '\\' || c[1] != 'u')
+ return -EINVAL;
+
+ r = unhex_ucs2(c + 2, &y);
+ if (r < 0)
+ return r;
+
+ c += 6;
+
+ if (!utf16_is_trailing_surrogate(y))
+ return -EINVAL;
+
+ n += utf8_encode_unichar(s + n, utf16_surrogate_pair_to_unichar(x, y));
+ }
+
+ continue;
+ } else
+ return -EINVAL;
+
+ if (!GREEDY_REALLOC(s, allocated, n + 2))
+ return -ENOMEM;
+
+ s[n++] = ch;
+ c ++;
+ continue;
+ }
+
+ len = utf8_encoded_valid_unichar(c, (size_t) -1);
+ if (len < 0)
+ return len;
+
+ if (!GREEDY_REALLOC(s, allocated, n + len + 1))
+ return -ENOMEM;
+
+ memcpy(s + n, c, len);
+ n += len;
+ c += len;
+ }
+}
+
+static int json_parse_number(const char **p, JsonValue *ret) {
+ bool negative = false, exponent_negative = false, is_real = false;
+ long double x = 0.0, y = 0.0, exponent = 0.0, shift = 1.0;
+ intmax_t i = 0;
+ uintmax_t u = 0;
+ const char *c;
+
+ assert(p);
+ assert(*p);
+ assert(ret);
+
+ c = *p;
+
+ if (*c == '-') {
+ negative = true;
+ c++;
+ }
+
+ if (*c == '0')
+ c++;
+ else {
+ if (!strchr("123456789", *c) || *c == 0)
+ return -EINVAL;
+
+ do {
+ if (!is_real) {
+ if (negative) {
+
+ if (i < INTMAX_MIN / 10) /* overflow */
+ is_real = true;
+ else {
+ intmax_t t = 10 * i;
+
+ if (t < INTMAX_MIN + (*c - '0')) /* overflow */
+ is_real = true;
+ else
+ i = t - (*c - '0');
+ }
+ } else {
+ if (u > UINTMAX_MAX / 10) /* overflow */
+ is_real = true;
+ else {
+ uintmax_t t = 10 * u;
+
+ if (t > UINTMAX_MAX - (*c - '0')) /* overflow */
+ is_real = true;
+ else
+ u = t + (*c - '0');
+ }
+ }
+ }
+
+ x = 10.0 * x + (*c - '0');
+
+ c++;
+ } while (strchr("0123456789", *c) && *c != 0);
+ }
+
+ if (*c == '.') {
+ is_real = true;
+ c++;
+
+ if (!strchr("0123456789", *c) || *c == 0)
+ return -EINVAL;
+
+ do {
+ y = 10.0 * y + (*c - '0');
+ shift = 10.0 * shift;
+ c++;
+ } while (strchr("0123456789", *c) && *c != 0);
+ }
+
+ if (IN_SET(*c, 'e', 'E')) {
+ is_real = true;
+ c++;
+
+ if (*c == '-') {
+ exponent_negative = true;
+ c++;
+ } else if (*c == '+')
+ c++;
+
+ if (!strchr("0123456789", *c) || *c == 0)
+ return -EINVAL;
+
+ do {
+ exponent = 10.0 * exponent + (*c - '0');
+ c++;
+ } while (strchr("0123456789", *c) && *c != 0);
+ }
+
+ *p = c;
+
+ if (is_real) {
+ ret->real = ((negative ? -1.0 : 1.0) * (x + (y / shift))) * exp10l((exponent_negative ? -1.0 : 1.0) * exponent);
+ return JSON_TOKEN_REAL;
+ } else if (negative) {
+ ret->integer = i;
+ return JSON_TOKEN_INTEGER;
+ } else {
+ ret->unsig = u;
+ return JSON_TOKEN_UNSIGNED;
+ }
+}
+
+int json_tokenize(
+ const char **p,
+ char **ret_string,
+ JsonValue *ret_value,
+ unsigned *ret_line, /* 'ret_line' returns the line at the beginning of this token */
+ unsigned *ret_column,
+ void **state,
+ unsigned *line, /* 'line' is used as a line state, it always reflect the line we are at after the token was read */
+ unsigned *column) {
+
+ unsigned start_line, start_column;
+ const char *start, *c;
+ size_t n;
+ int t, r;
+
+ enum {
+ STATE_NULL,
+ STATE_VALUE,
+ STATE_VALUE_POST,
+ };
+
+ assert(p);
+ assert(*p);
+ assert(ret_string);
+ assert(ret_value);
+ assert(ret_line);
+ assert(ret_column);
+ assert(line);
+ assert(column);
+ assert(state);
+
+ t = PTR_TO_INT(*state);
+ if (t == STATE_NULL) {
+ *line = 1;
+ *column = 1;
+ t = STATE_VALUE;
+ }
+
+ /* Skip over the whitespace */
+ n = strspn(*p, WHITESPACE);
+ inc_lines_columns(line, column, *p, n);
+ c = *p + n;
+
+ /* Remember where we started processing this token */
+ start = c;
+ start_line = *line;
+ start_column = *column;
+
+ if (*c == 0) {
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+ r = JSON_TOKEN_END;
+ goto finish;
+ }
+
+ switch (t) {
+
+ case STATE_VALUE:
+
+ if (*c == '{') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE);
+ r = JSON_TOKEN_OBJECT_OPEN;
+ goto null_return;
+
+ } else if (*c == '}') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_OBJECT_CLOSE;
+ goto null_return;
+
+ } else if (*c == '[') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE);
+ r = JSON_TOKEN_ARRAY_OPEN;
+ goto null_return;
+
+ } else if (*c == ']') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_ARRAY_CLOSE;
+ goto null_return;
+
+ } else if (*c == '"') {
+
+ r = json_parse_string(&c, ret_string);
+ if (r < 0)
+ return r;
+
+ *ret_value = JSON_VALUE_NULL;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ goto finish;
+
+ } else if (strchr("-0123456789", *c)) {
+
+ r = json_parse_number(&c, ret_value);
+ if (r < 0)
+ return r;
+
+ *ret_string = NULL;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ goto finish;
+
+ } else if (startswith(c, "true")) {
+ *ret_string = NULL;
+ ret_value->boolean = true;
+ c += 4;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_BOOLEAN;
+ goto finish;
+
+ } else if (startswith(c, "false")) {
+ *ret_string = NULL;
+ ret_value->boolean = false;
+ c += 5;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_BOOLEAN;
+ goto finish;
+
+ } else if (startswith(c, "null")) {
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+ c += 4;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_NULL;
+ goto finish;
+
+ }
+
+ return -EINVAL;
+
+ case STATE_VALUE_POST:
+
+ if (*c == ':') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE);
+ r = JSON_TOKEN_COLON;
+ goto null_return;
+
+ } else if (*c == ',') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE);
+ r = JSON_TOKEN_COMMA;
+ goto null_return;
+
+ } else if (*c == '}') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_OBJECT_CLOSE;
+ goto null_return;
+
+ } else if (*c == ']') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_ARRAY_CLOSE;
+ goto null_return;
+ }
+
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unexpected tokenizer state");
+ }
+
+null_return:
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+
+finish:
+ inc_lines_columns(line, column, start, c - start);
+ *p = c;
+
+ *ret_line = start_line;
+ *ret_column = start_column;
+
+ return r;
+}
+
+typedef enum JsonExpect {
+ /* The following values are used by json_parse() */
+ EXPECT_TOPLEVEL,
+ EXPECT_END,
+ EXPECT_OBJECT_FIRST_KEY,
+ EXPECT_OBJECT_NEXT_KEY,
+ EXPECT_OBJECT_COLON,
+ EXPECT_OBJECT_VALUE,
+ EXPECT_OBJECT_COMMA,
+ EXPECT_ARRAY_FIRST_ELEMENT,
+ EXPECT_ARRAY_NEXT_ELEMENT,
+ EXPECT_ARRAY_COMMA,
+
+ /* And these are used by json_build() */
+ EXPECT_ARRAY_ELEMENT,
+ EXPECT_OBJECT_KEY,
+} JsonExpect;
+
+typedef struct JsonStack {
+ JsonExpect expect;
+ JsonVariant **elements;
+ size_t n_elements, n_elements_allocated;
+ unsigned line_before;
+ unsigned column_before;
+ size_t n_suppress; /* When building: if > 0, suppress this many subsequent elements. If == (size_t) -1, suppress all subsequent elements */
+} JsonStack;
+
+static void json_stack_release(JsonStack *s) {
+ assert(s);
+
+ json_variant_unref_many(s->elements, s->n_elements);
+ s->elements = mfree(s->elements);
+}
+
+static int json_parse_internal(
+ const char **input,
+ JsonSource *source,
+ JsonParseFlags flags,
+ JsonVariant **ret,
+ unsigned *line,
+ unsigned *column,
+ bool continue_end) {
+
+ size_t n_stack = 1, n_stack_allocated = 0, i;
+ unsigned line_buffer = 0, column_buffer = 0;
+ void *tokenizer_state = NULL;
+ JsonStack *stack = NULL;
+ const char *p;
+ int r;
+
+ assert_return(input, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ p = *input;
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack))
+ return -ENOMEM;
+
+ stack[0] = (JsonStack) {
+ .expect = EXPECT_TOPLEVEL,
+ };
+
+ if (!line)
+ line = &line_buffer;
+ if (!column)
+ column = &column_buffer;
+
+ for (;;) {
+ _cleanup_(json_variant_unrefp) JsonVariant *add = NULL;
+ _cleanup_free_ char *string = NULL;
+ unsigned line_token, column_token;
+ JsonStack *current;
+ JsonValue value;
+ int token;
+
+ assert(n_stack > 0);
+ current = stack + n_stack - 1;
+
+ if (continue_end && current->expect == EXPECT_END)
+ goto done;
+
+ token = json_tokenize(&p, &string, &value, &line_token, &column_token, &tokenizer_state, line, column);
+ if (token < 0) {
+ r = token;
+ goto finish;
+ }
+
+ switch (token) {
+
+ case JSON_TOKEN_END:
+ if (current->expect != EXPECT_END) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(current->n_elements == 1);
+ assert(n_stack == 1);
+ goto done;
+
+ case JSON_TOKEN_COLON:
+
+ if (current->expect != EXPECT_OBJECT_COLON) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ current->expect = EXPECT_OBJECT_VALUE;
+ break;
+
+ case JSON_TOKEN_COMMA:
+
+ if (current->expect == EXPECT_OBJECT_COMMA)
+ current->expect = EXPECT_OBJECT_NEXT_KEY;
+ else if (current->expect == EXPECT_ARRAY_COMMA)
+ current->expect = EXPECT_ARRAY_NEXT_ELEMENT;
+ else {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ break;
+
+ case JSON_TOKEN_OBJECT_OPEN:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+ current = stack + n_stack - 1;
+
+ /* Prepare the expect for when we return from the child */
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ stack[n_stack++] = (JsonStack) {
+ .expect = EXPECT_OBJECT_FIRST_KEY,
+ .line_before = line_token,
+ .column_before = column_token,
+ };
+
+ current = stack + n_stack - 1;
+ break;
+
+ case JSON_TOKEN_OBJECT_CLOSE:
+ if (!IN_SET(current->expect, EXPECT_OBJECT_FIRST_KEY, EXPECT_OBJECT_COMMA)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(n_stack > 1);
+
+ r = json_variant_new_object(&add, current->elements, current->n_elements);
+ if (r < 0)
+ goto finish;
+
+ line_token = current->line_before;
+ column_token = current->column_before;
+
+ json_stack_release(current);
+ n_stack--, current--;
+
+ break;
+
+ case JSON_TOKEN_ARRAY_OPEN:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+ current = stack + n_stack - 1;
+
+ /* Prepare the expect for when we return from the child */
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ stack[n_stack++] = (JsonStack) {
+ .expect = EXPECT_ARRAY_FIRST_ELEMENT,
+ .line_before = line_token,
+ .column_before = column_token,
+ };
+
+ break;
+
+ case JSON_TOKEN_ARRAY_CLOSE:
+ if (!IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_COMMA)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(n_stack > 1);
+
+ r = json_variant_new_array(&add, current->elements, current->n_elements);
+ if (r < 0)
+ goto finish;
+
+ line_token = current->line_before;
+ column_token = current->column_before;
+
+ json_stack_release(current);
+ n_stack--, current--;
+ break;
+
+ case JSON_TOKEN_STRING:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_FIRST_KEY, EXPECT_OBJECT_NEXT_KEY, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_string(&add, string);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (IN_SET(current->expect, EXPECT_OBJECT_FIRST_KEY, EXPECT_OBJECT_NEXT_KEY))
+ current->expect = EXPECT_OBJECT_COLON;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_REAL:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_real(&add, value.real);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_INTEGER:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_integer(&add, value.integer);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_UNSIGNED:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_unsigned(&add, value.unsig);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_BOOLEAN:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_boolean(&add, value.boolean);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_NULL:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_null(&add);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Unexpected token");
+ }
+
+ if (add) {
+ /* If we are asked to make this parsed object sensitive, then let's apply this
+ * immediately after allocating each variant, so that when we abort half-way
+ * everything we already allocated that is then freed is correctly marked. */
+ if (FLAGS_SET(flags, JSON_PARSE_SENSITIVE))
+ json_variant_sensitive(add);
+
+ (void) json_variant_set_source(&add, source, line_token, column_token);
+
+ if (!GREEDY_REALLOC(current->elements, current->n_elements_allocated, current->n_elements + 1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ current->elements[current->n_elements++] = TAKE_PTR(add);
+ }
+ }
+
+done:
+ assert(n_stack == 1);
+ assert(stack[0].n_elements == 1);
+
+ *ret = json_variant_ref(stack[0].elements[0]);
+ *input = p;
+ r = 0;
+
+finish:
+ for (i = 0; i < n_stack; i++)
+ json_stack_release(stack + i);
+
+ free(stack);
+
+ return r;
+}
+
+int json_parse(const char *input, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) {
+ return json_parse_internal(&input, NULL, flags, ret, ret_line, ret_column, false);
+}
+
+int json_parse_continue(const char **p, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) {
+ return json_parse_internal(p, NULL, flags, ret, ret_line, ret_column, true);
+}
+
+int json_parse_file_at(FILE *f, int dir_fd, const char *path, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) {
+ _cleanup_(json_source_unrefp) JsonSource *source = NULL;
+ _cleanup_free_ char *text = NULL;
+ const char *p;
+ int r;
+
+ if (f)
+ r = read_full_stream(f, &text, NULL);
+ else if (path)
+ r = read_full_file_full(dir_fd, path, 0, NULL, &text, NULL);
+ else
+ return -EINVAL;
+ if (r < 0)
+ return r;
+
+ if (path) {
+ source = json_source_new(path);
+ if (!source)
+ return -ENOMEM;
+ }
+
+ p = text;
+ return json_parse_internal(&p, source, flags, ret, ret_line, ret_column, false);
+}
+
+int json_buildv(JsonVariant **ret, va_list ap) {
+ JsonStack *stack = NULL;
+ size_t n_stack = 1, n_stack_allocated = 0, i;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack))
+ return -ENOMEM;
+
+ stack[0] = (JsonStack) {
+ .expect = EXPECT_TOPLEVEL,
+ };
+
+ for (;;) {
+ _cleanup_(json_variant_unrefp) JsonVariant *add = NULL;
+ size_t n_subtract = 0; /* how much to subtract from current->n_suppress, i.e. how many elements would
+ * have been added to the current variant */
+ JsonStack *current;
+ int command;
+
+ assert(n_stack > 0);
+ current = stack + n_stack - 1;
+
+ if (current->expect == EXPECT_END)
+ goto done;
+
+ command = va_arg(ap, int);
+
+ switch (command) {
+
+ case _JSON_BUILD_STRING: {
+ const char *p;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ p = va_arg(ap, const char *);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_string(&add, p);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_INTEGER: {
+ intmax_t j;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ j = va_arg(ap, intmax_t);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_integer(&add, j);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_UNSIGNED: {
+ uintmax_t j;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ j = va_arg(ap, uintmax_t);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_unsigned(&add, j);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_REAL: {
+ long double d;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ d = va_arg(ap, long double);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_real(&add, d);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_BOOLEAN: {
+ bool b;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ b = va_arg(ap, int);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_boolean(&add, b);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_NULL:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_null(&add);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+
+ case _JSON_BUILD_VARIANT:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ /* Note that we don't care for current->n_suppress here, after all the variant is already
+ * allocated anyway... */
+ add = va_arg(ap, JsonVariant*);
+ if (!add)
+ add = JSON_VARIANT_MAGIC_NULL;
+ else
+ json_variant_ref(add);
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+
+ case _JSON_BUILD_VARIANT_ARRAY: {
+ JsonVariant **array;
+ size_t n;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ array = va_arg(ap, JsonVariant**);
+ n = va_arg(ap, size_t);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_array(&add, array, n);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_LITERAL: {
+ const char *l;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ l = va_arg(ap, const char *);
+
+ if (l) {
+ /* Note that we don't care for current->n_suppress here, we should generate parsing
+ * errors even in suppressed object properties */
+
+ r = json_parse(l, 0, &add, NULL, NULL);
+ if (r < 0)
+ goto finish;
+ } else
+ add = JSON_VARIANT_MAGIC_NULL;
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_ARRAY_BEGIN:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+ current = stack + n_stack - 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ stack[n_stack++] = (JsonStack) {
+ .expect = EXPECT_ARRAY_ELEMENT,
+ .n_suppress = current->n_suppress != 0 ? (size_t) -1 : 0, /* if we shall suppress the
+ * new array, then we should
+ * also suppress all array
+ * members */
+ };
+
+ break;
+
+ case _JSON_BUILD_ARRAY_END:
+ if (current->expect != EXPECT_ARRAY_ELEMENT) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(n_stack > 1);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_array(&add, current->elements, current->n_elements);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ json_stack_release(current);
+ n_stack--, current--;
+
+ break;
+
+ case _JSON_BUILD_STRV: {
+ char **l;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ l = va_arg(ap, char **);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_array_strv(&add, l);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_BASE64: {
+ const void *p;
+ size_t n;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ p = va_arg(ap, const void *);
+ n = va_arg(ap, size_t);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_base64(&add, p, n);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_ID128: {
+ sd_id128_t id;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ id = va_arg(ap, sd_id128_t);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_id128(&add, id);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_BYTE_ARRAY: {
+ const void *array;
+ size_t n;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ array = va_arg(ap, const void*);
+ n = va_arg(ap, size_t);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_array_bytes(&add, array, n);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_OBJECT_BEGIN:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+ current = stack + n_stack - 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ stack[n_stack++] = (JsonStack) {
+ .expect = EXPECT_OBJECT_KEY,
+ .n_suppress = current->n_suppress != 0 ? (size_t) -1 : 0, /* if we shall suppress the
+ * new object, then we should
+ * also suppress all object
+ * members */
+ };
+
+ break;
+
+ case _JSON_BUILD_OBJECT_END:
+
+ if (current->expect != EXPECT_OBJECT_KEY) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(n_stack > 1);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_object(&add, current->elements, current->n_elements);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ json_stack_release(current);
+ n_stack--, current--;
+
+ break;
+
+ case _JSON_BUILD_PAIR: {
+ const char *n;
+
+ if (current->expect != EXPECT_OBJECT_KEY) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ n = va_arg(ap, const char *);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_string(&add, n);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ current->expect = EXPECT_OBJECT_VALUE;
+ break;
+ }
+
+ case _JSON_BUILD_PAIR_CONDITION: {
+ const char *n;
+ bool b;
+
+ if (current->expect != EXPECT_OBJECT_KEY) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ b = va_arg(ap, int);
+ n = va_arg(ap, const char *);
+
+ if (b && current->n_suppress == 0) {
+ r = json_variant_new_string(&add, n);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1; /* we generated one item */
+
+ if (!b && current->n_suppress != (size_t) -1)
+ current->n_suppress += 2; /* Suppress this one and the next item */
+
+ current->expect = EXPECT_OBJECT_VALUE;
+ break;
+ }}
+
+ /* If a variant was generated, add it to our current variant, but only if we are not supposed to suppress additions */
+ if (add && current->n_suppress == 0) {
+ if (!GREEDY_REALLOC(current->elements, current->n_elements_allocated, current->n_elements + 1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ current->elements[current->n_elements++] = TAKE_PTR(add);
+ }
+
+ /* If we are supposed to suppress items, let's subtract how many items where generated from that
+ * counter. Except if the counter is (size_t) -1, i.e. we shall suppress an infinite number of elements
+ * on this stack level */
+ if (current->n_suppress != (size_t) -1) {
+ if (current->n_suppress <= n_subtract) /* Saturated */
+ current->n_suppress = 0;
+ else
+ current->n_suppress -= n_subtract;
+ }
+ }
+
+done:
+ assert(n_stack == 1);
+ assert(stack[0].n_elements == 1);
+
+ *ret = json_variant_ref(stack[0].elements[0]);
+ r = 0;
+
+finish:
+ for (i = 0; i < n_stack; i++)
+ json_stack_release(stack + i);
+
+ free(stack);
+
+ return r;
+}
+
+int json_build(JsonVariant **ret, ...) {
+ va_list ap;
+ int r;
+
+ va_start(ap, ret);
+ r = json_buildv(ret, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int json_log_internal(
+ JsonVariant *variant,
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) {
+
+ PROTECT_ERRNO;
+
+ unsigned source_line, source_column;
+ char buffer[LINE_MAX];
+ const char *source;
+ va_list ap;
+ int r;
+
+ errno = ERRNO_VALUE(error);
+
+ va_start(ap, format);
+ (void) vsnprintf(buffer, sizeof buffer, format, ap);
+ va_end(ap);
+
+ if (variant) {
+ r = json_variant_get_source(variant, &source, &source_line, &source_column);
+ if (r < 0)
+ return r;
+ } else {
+ source = NULL;
+ source_line = 0;
+ source_column = 0;
+ }
+
+ if (source && source_line > 0 && source_column > 0)
+ return log_struct_internal(
+ LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, level),
+ error,
+ file, line, func,
+ "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR,
+ "CONFIG_FILE=%s", source,
+ "CONFIG_LINE=%u", source_line,
+ "CONFIG_COLUMN=%u", source_column,
+ LOG_MESSAGE("%s:%u:%u: %s", source, source_line, source_column, buffer),
+ NULL);
+ else
+ return log_struct_internal(
+ LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, level),
+ error,
+ file, line, func,
+ "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR,
+ LOG_MESSAGE("%s", buffer),
+ NULL);
+}
+
+int json_dispatch(JsonVariant *v, const JsonDispatch table[], JsonDispatchCallback bad, JsonDispatchFlags flags, void *userdata) {
+ const JsonDispatch *p;
+ size_t i, n, m;
+ int r, done = 0;
+ bool *found;
+
+ if (!json_variant_is_object(v)) {
+ json_log(v, flags, 0, "JSON variant is not an object.");
+
+ if (flags & JSON_PERMISSIVE)
+ return 0;
+
+ return -EINVAL;
+ }
+
+ for (p = table, m = 0; p->name; p++)
+ m++;
+
+ found = newa0(bool, m);
+
+ n = json_variant_elements(v);
+ for (i = 0; i < n; i += 2) {
+ JsonVariant *key, *value;
+
+ assert_se(key = json_variant_by_index(v, i));
+ assert_se(value = json_variant_by_index(v, i+1));
+
+ for (p = table; p->name; p++)
+ if (p->name == POINTER_MAX ||
+ streq_ptr(json_variant_string(key), p->name))
+ break;
+
+ if (p->name) { /* Found a matching entry! :-) */
+ JsonDispatchFlags merged_flags;
+
+ merged_flags = flags | p->flags;
+
+ if (p->type != _JSON_VARIANT_TYPE_INVALID &&
+ !json_variant_has_type(value, p->type)) {
+
+ json_log(value, merged_flags, 0,
+ "Object field '%s' has wrong type %s, expected %s.", json_variant_string(key),
+ json_variant_type_to_string(json_variant_type(value)), json_variant_type_to_string(p->type));
+
+ if (merged_flags & JSON_PERMISSIVE)
+ continue;
+
+ return -EINVAL;
+ }
+
+ if (found[p-table]) {
+ json_log(value, merged_flags, 0, "Duplicate object field '%s'.", json_variant_string(key));
+
+ if (merged_flags & JSON_PERMISSIVE)
+ continue;
+
+ return -ENOTUNIQ;
+ }
+
+ found[p-table] = true;
+
+ if (p->callback) {
+ r = p->callback(json_variant_string(key), value, merged_flags, (uint8_t*) userdata + p->offset);
+ if (r < 0) {
+ if (merged_flags & JSON_PERMISSIVE)
+ continue;
+
+ return r;
+ }
+ }
+
+ done ++;
+
+ } else { /* Didn't find a matching entry! :-( */
+
+ if (bad) {
+ r = bad(json_variant_string(key), value, flags, userdata);
+ if (r < 0) {
+ if (flags & JSON_PERMISSIVE)
+ continue;
+
+ return r;
+ } else
+ done ++;
+
+ } else {
+ json_log(value, flags, 0, "Unexpected object field '%s'.", json_variant_string(key));
+
+ if (flags & JSON_PERMISSIVE)
+ continue;
+
+ return -EADDRNOTAVAIL;
+ }
+ }
+ }
+
+ for (p = table; p->name; p++) {
+ JsonDispatchFlags merged_flags = p->flags | flags;
+
+ if ((merged_flags & JSON_MANDATORY) && !found[p-table]) {
+ json_log(v, merged_flags, 0, "Missing object field '%s'.", p->name);
+
+ if ((merged_flags & JSON_PERMISSIVE))
+ continue;
+
+ return -ENXIO;
+ }
+ }
+
+ return done;
+}
+
+int json_dispatch_boolean(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ bool *b = userdata;
+
+ assert(variant);
+ assert(b);
+
+ if (!json_variant_is_boolean(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a boolean.", strna(name));
+
+ *b = json_variant_boolean(variant);
+ return 0;
+}
+
+int json_dispatch_tristate(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ int *b = userdata;
+
+ assert(variant);
+ assert(b);
+
+ if (json_variant_is_null(variant)) {
+ *b = -1;
+ return 0;
+ }
+
+ if (!json_variant_is_boolean(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a boolean.", strna(name));
+
+ *b = json_variant_boolean(variant);
+ return 0;
+}
+
+int json_dispatch_integer(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ intmax_t *i = userdata;
+
+ assert(variant);
+ assert(i);
+
+ if (!json_variant_is_integer(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an integer.", strna(name));
+
+ *i = json_variant_integer(variant);
+ return 0;
+}
+
+int json_dispatch_unsigned(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ uintmax_t *u = userdata;
+
+ assert(variant);
+ assert(u);
+
+ if (!json_variant_is_unsigned(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an unsigned integer.", strna(name));
+
+ *u = json_variant_unsigned(variant);
+ return 0;
+}
+
+int json_dispatch_uint32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ uint32_t *u = userdata;
+
+ assert(variant);
+ assert(u);
+
+ if (!json_variant_is_unsigned(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an unsigned integer.", strna(name));
+
+ if (json_variant_unsigned(variant) > UINT32_MAX)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "JSON field '%s' out of bounds.", strna(name));
+
+ *u = (uint32_t) json_variant_unsigned(variant);
+ return 0;
+}
+
+int json_dispatch_int32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ int32_t *i = userdata;
+
+ assert(variant);
+ assert(i);
+
+ if (!json_variant_is_integer(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an integer.", strna(name));
+
+ if (json_variant_integer(variant) < INT32_MIN || json_variant_integer(variant) > INT32_MAX)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "JSON field '%s' out of bounds.", strna(name));
+
+ *i = (int32_t) json_variant_integer(variant);
+ return 0;
+}
+
+int json_dispatch_string(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ int r;
+
+ assert(variant);
+ assert(s);
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ if ((flags & JSON_SAFE) && !string_is_safe(json_variant_string(variant)))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' contains unsafe characters, refusing.", strna(name));
+
+ r = free_and_strdup(s, json_variant_string(variant));
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to allocate string: %m");
+
+ return 0;
+}
+
+int json_dispatch_const_string(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ const char **s = userdata;
+
+ assert(variant);
+ assert(s);
+
+ if (json_variant_is_null(variant)) {
+ *s = NULL;
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ if ((flags & JSON_SAFE) && !string_is_safe(json_variant_string(variant)))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' contains unsafe characters, refusing.", strna(name));
+
+ *s = json_variant_string(variant);
+ return 0;
+}
+
+int json_dispatch_strv(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ _cleanup_strv_free_ char **l = NULL;
+ char ***s = userdata;
+ JsonVariant *e;
+ int r;
+
+ assert(variant);
+ assert(s);
+
+ if (json_variant_is_null(variant)) {
+ *s = strv_free(*s);
+ return 0;
+ }
+
+ /* Let's be flexible here: accept a single string in place of a single-item array */
+ if (json_variant_is_string(variant)) {
+ if ((flags & JSON_SAFE) && !string_is_safe(json_variant_string(variant)))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' contains unsafe characters, refusing.", strna(name));
+
+ l = strv_new(json_variant_string(variant));
+ if (!l)
+ return log_oom();
+
+ strv_free_and_replace(*s, l);
+ return 0;
+ }
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, SYNTHETIC_ERRNO(EINVAL), flags, "JSON field '%s' is not an array.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(e, variant) {
+ if (!json_variant_is_string(e))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not a string.");
+
+ if ((flags & JSON_SAFE) && !string_is_safe(json_variant_string(e)))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' contains unsafe characters, refusing.", strna(name));
+
+ r = strv_extend(&l, json_variant_string(e));
+ if (r < 0)
+ return json_log(e, flags, r, "Failed to append array element: %m");
+ }
+
+ strv_free_and_replace(*s, l);
+ return 0;
+}
+
+int json_dispatch_variant(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ JsonVariant **p = userdata;
+
+ assert(variant);
+ assert(p);
+
+ json_variant_unref(*p);
+ *p = json_variant_ref(variant);
+
+ return 0;
+}
+
+int json_dispatch_uid_gid(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ uid_t *uid = userdata;
+ uintmax_t k;
+
+ assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+ assert_cc(sizeof(gid_t) == sizeof(uint32_t));
+
+ DISABLE_WARNING_TYPE_LIMITS;
+ assert_cc(((uid_t) -1 < (uid_t) 0) == ((gid_t) -1 < (gid_t) 0));
+ REENABLE_WARNING;
+
+ if (json_variant_is_null(variant)) {
+ *uid = UID_INVALID;
+ return 0;
+ }
+
+ if (!json_variant_is_unsigned(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a integer.", strna(name));
+
+ k = json_variant_unsigned(variant);
+ if (k > UINT32_MAX || !uid_is_valid(k))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid UID/GID.", strna(name));
+
+ *uid = k;
+ return 0;
+}
+
+int json_dispatch_user_group_name(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ const char *n;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ n = json_variant_string(variant);
+ if (!valid_user_group_name(n, FLAGS_SET(flags, JSON_RELAX) ? VALID_USER_RELAX : 0))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid user/group name.", strna(name));
+
+ r = free_and_strdup(s, n);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to allocate string: %m");
+
+ return 0;
+}
+
+int json_dispatch_id128(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ sd_id128_t *uuid = userdata;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ *uuid = SD_ID128_NULL;
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ r = sd_id128_from_string(json_variant_string(variant), uuid);
+ if (r < 0)
+ return json_log(variant, flags, r, "JSON field '%s' is not a valid UID.", strna(name));
+
+ return 0;
+}
+
+int json_dispatch_unsupported(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not allowed in this object.", strna(name));
+}
+
+static int json_cmp_strings(const void *x, const void *y) {
+ JsonVariant *const *a = x, *const *b = y;
+
+ if (!json_variant_is_string(*a) || !json_variant_is_string(*b))
+ return CMP(*a, *b);
+
+ return strcmp(json_variant_string(*a), json_variant_string(*b));
+}
+
+int json_variant_sort(JsonVariant **v) {
+ _cleanup_free_ JsonVariant **a = NULL;
+ JsonVariant *n = NULL;
+ size_t i, m;
+ int r;
+
+ assert(v);
+
+ if (json_variant_is_sorted(*v))
+ return 0;
+
+ if (!json_variant_is_object(*v))
+ return -EMEDIUMTYPE;
+
+ /* Sorts they key/value pairs in an object variant */
+
+ m = json_variant_elements(*v);
+ a = new(JsonVariant*, m);
+ if (!a)
+ return -ENOMEM;
+
+ for (i = 0; i < m; i++)
+ a[i] = json_variant_by_index(*v, i);
+
+ qsort(a, m/2, sizeof(JsonVariant*)*2, json_cmp_strings);
+
+ r = json_variant_new_object(&n, a, m);
+ if (r < 0)
+ return r;
+
+ json_variant_propagate_sensitive(*v, n);
+
+ if (!n->sorted) /* Check if this worked. This will fail if there are multiple identical keys used. */
+ return -ENOTUNIQ;
+
+ json_variant_unref(*v);
+ *v = n;
+
+ return 1;
+}
+
+int json_variant_normalize(JsonVariant **v) {
+ _cleanup_free_ JsonVariant **a = NULL;
+ JsonVariant *n = NULL;
+ size_t i, j, m;
+ int r;
+
+ assert(v);
+
+ if (json_variant_is_normalized(*v))
+ return 0;
+
+ if (!json_variant_is_object(*v) && !json_variant_is_array(*v))
+ return -EMEDIUMTYPE;
+
+ /* Sorts the key/value pairs in an object variant anywhere down the tree in the specified variant */
+
+ m = json_variant_elements(*v);
+ a = new(JsonVariant*, m);
+ if (!a)
+ return -ENOMEM;
+
+ for (i = 0; i < m; i++) {
+ a[i] = json_variant_ref(json_variant_by_index(*v, i));
+
+ r = json_variant_normalize(a + i);
+ if (r < 0)
+ goto finish;
+ }
+
+ qsort(a, m/2, sizeof(JsonVariant*)*2, json_cmp_strings);
+
+ if (json_variant_is_object(*v))
+ r = json_variant_new_object(&n, a, m);
+ else {
+ assert(json_variant_is_array(*v));
+ r = json_variant_new_array(&n, a, m);
+ }
+ if (r < 0)
+ goto finish;
+
+ json_variant_propagate_sensitive(*v, n);
+
+ if (!n->normalized) { /* Let's see if normalization worked. It will fail if there are multiple
+ * identical keys used in the same object anywhere, or if there are floating
+ * point numbers used (see below) */
+ r = -ENOTUNIQ;
+ goto finish;
+ }
+
+ json_variant_unref(*v);
+ *v = n;
+
+ r = 1;
+
+finish:
+ for (j = 0; j < i; j++)
+ json_variant_unref(a[j]);
+
+ return r;
+}
+
+bool json_variant_is_normalized(JsonVariant *v) {
+
+ /* For now, let's consider anything containing numbers not expressible as integers as
+ * non-normalized. That's because we cannot sensibly compare them due to accuracy issues, nor even
+ * store them if they are too large. */
+ if (json_variant_is_real(v) && !json_variant_is_integer(v) && !json_variant_is_unsigned(v))
+ return false;
+
+ /* The concept only applies to variants that include other variants, i.e. objects and arrays. All
+ * others are normalized anyway. */
+ if (!json_variant_is_object(v) && !json_variant_is_array(v))
+ return true;
+
+ /* Empty objects/arrays don't include any other variant, hence are always normalized too */
+ if (json_variant_elements(v) == 0)
+ return true;
+
+ return v->normalized; /* For everything else there's an explicit boolean we maintain */
+}
+
+bool json_variant_is_sorted(JsonVariant *v) {
+
+ /* Returns true if all key/value pairs of an object are properly sorted. Note that this only applies
+ * to objects, not arrays. */
+
+ if (!json_variant_is_object(v))
+ return true;
+ if (json_variant_elements(v) <= 1)
+ return true;
+
+ return v->sorted;
+}
+
+int json_variant_unbase64(JsonVariant *v, void **ret, size_t *ret_size) {
+
+ if (!json_variant_is_string(v))
+ return -EINVAL;
+
+ return unbase64mem(json_variant_string(v), (size_t) -1, ret, ret_size);
+}
+
+static const char* const json_variant_type_table[_JSON_VARIANT_TYPE_MAX] = {
+ [JSON_VARIANT_STRING] = "string",
+ [JSON_VARIANT_INTEGER] = "integer",
+ [JSON_VARIANT_UNSIGNED] = "unsigned",
+ [JSON_VARIANT_REAL] = "real",
+ [JSON_VARIANT_NUMBER] = "number",
+ [JSON_VARIANT_BOOLEAN] = "boolean",
+ [JSON_VARIANT_ARRAY] = "array",
+ [JSON_VARIANT_OBJECT] = "object",
+ [JSON_VARIANT_NULL] = "null",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(json_variant_type, JsonVariantType);
diff --git a/src/shared/json.h b/src/shared/json.h
new file mode 100644
index 0000000..0809f31
--- /dev/null
+++ b/src/shared/json.h
@@ -0,0 +1,356 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "sd-id128.h"
+
+#include "macro.h"
+#include "string-util.h"
+#include "log.h"
+
+/*
+ In case you wonder why we have our own JSON implementation, here are a couple of reasons why this implementation has
+ benefits over various other implementations:
+
+ - We need support for 64bit signed and unsigned integers, i.e. the full 64,5bit range of -9223372036854775808…18446744073709551615
+ - All our variants are immutable after creation
+ - Special values such as true, false, zero, null, empty strings, empty array, empty objects require zero dynamic memory
+ - Progressive parsing
+ - Our integer/real type implicitly converts, but only if that's safe and loss-lessly possible
+ - There's a "builder" for putting together objects easily in varargs function calls
+ - There's a "dispatcher" for mapping objects to C data structures
+ - Every variant optionally carries parsing location information, which simplifies debugging and parse log error generation
+ - Formatter has color, line, column support
+
+ Limitations:
+ - Doesn't allow embedded NUL in strings
+ - Can't store integers outside of the -9223372036854775808…18446744073709551615 range (it will use 'long double' for
+ values outside this range, which is lossy)
+ - Can't store negative zero (will be treated identical to positive zero, and not retained across serialization)
+ - Can't store non-integer numbers that can't be stored in "long double" losslessly
+ - Allows creation and parsing of objects with duplicate keys. The "dispatcher" will refuse them however. This means
+ we can parse and pass around such objects, but will carefully refuse them when we convert them into our own data.
+
+ (These limitations should be pretty much in line with those of other JSON implementations, in fact might be less
+ limiting in most cases even.)
+*/
+
+typedef struct JsonVariant JsonVariant;
+
+typedef enum JsonVariantType {
+ JSON_VARIANT_STRING,
+ JSON_VARIANT_INTEGER,
+ JSON_VARIANT_UNSIGNED,
+ JSON_VARIANT_REAL,
+ JSON_VARIANT_NUMBER, /* This a pseudo-type: we can never create variants of this type, but we use it as wildcard check for the above three types */
+ JSON_VARIANT_BOOLEAN,
+ JSON_VARIANT_ARRAY,
+ JSON_VARIANT_OBJECT,
+ JSON_VARIANT_NULL,
+ _JSON_VARIANT_TYPE_MAX,
+ _JSON_VARIANT_TYPE_INVALID = -1
+} JsonVariantType;
+
+int json_variant_new_stringn(JsonVariant **ret, const char *s, size_t n);
+int json_variant_new_base64(JsonVariant **ret, const void *p, size_t n);
+int json_variant_new_integer(JsonVariant **ret, intmax_t i);
+int json_variant_new_unsigned(JsonVariant **ret, uintmax_t u);
+int json_variant_new_real(JsonVariant **ret, long double d);
+int json_variant_new_boolean(JsonVariant **ret, bool b);
+int json_variant_new_array(JsonVariant **ret, JsonVariant **array, size_t n);
+int json_variant_new_array_bytes(JsonVariant **ret, const void *p, size_t n);
+int json_variant_new_array_strv(JsonVariant **ret, char **l);
+int json_variant_new_object(JsonVariant **ret, JsonVariant **array, size_t n);
+int json_variant_new_null(JsonVariant **ret);
+int json_variant_new_id128(JsonVariant **ret, sd_id128_t id);
+
+static inline int json_variant_new_string(JsonVariant **ret, const char *s) {
+ return json_variant_new_stringn(ret, s, (size_t) -1);
+}
+
+JsonVariant *json_variant_ref(JsonVariant *v);
+JsonVariant *json_variant_unref(JsonVariant *v);
+void json_variant_unref_many(JsonVariant **array, size_t n);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(JsonVariant *, json_variant_unref);
+
+const char *json_variant_string(JsonVariant *v);
+intmax_t json_variant_integer(JsonVariant *v);
+uintmax_t json_variant_unsigned(JsonVariant *v);
+long double json_variant_real(JsonVariant *v);
+bool json_variant_boolean(JsonVariant *v);
+
+JsonVariantType json_variant_type(JsonVariant *v);
+bool json_variant_has_type(JsonVariant *v, JsonVariantType type);
+
+static inline bool json_variant_is_string(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_STRING);
+}
+
+static inline bool json_variant_is_integer(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_INTEGER);
+}
+
+static inline bool json_variant_is_unsigned(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_UNSIGNED);
+}
+
+static inline bool json_variant_is_real(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_REAL);
+}
+
+static inline bool json_variant_is_number(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_NUMBER);
+}
+
+static inline bool json_variant_is_boolean(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_BOOLEAN);
+}
+
+static inline bool json_variant_is_array(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_ARRAY);
+}
+
+static inline bool json_variant_is_object(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_OBJECT);
+}
+
+static inline bool json_variant_is_null(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_NULL);
+}
+
+bool json_variant_is_negative(JsonVariant *v);
+bool json_variant_is_blank_object(JsonVariant *v);
+bool json_variant_is_blank_array(JsonVariant *v);
+bool json_variant_is_normalized(JsonVariant *v);
+bool json_variant_is_sorted(JsonVariant *v);
+
+size_t json_variant_elements(JsonVariant *v);
+JsonVariant *json_variant_by_index(JsonVariant *v, size_t index);
+JsonVariant *json_variant_by_key(JsonVariant *v, const char *key);
+JsonVariant *json_variant_by_key_full(JsonVariant *v, const char *key, JsonVariant **ret_key);
+
+bool json_variant_equal(JsonVariant *a, JsonVariant *b);
+
+void json_variant_sensitive(JsonVariant *v);
+bool json_variant_is_sensitive(JsonVariant *v);
+
+struct json_variant_foreach_state {
+ JsonVariant *variant;
+ size_t idx;
+};
+
+#define JSON_VARIANT_ARRAY_FOREACH(i, v) \
+ for (struct json_variant_foreach_state _state = { (v), 0 }; \
+ json_variant_is_array(_state.variant) && \
+ _state.idx < json_variant_elements(_state.variant) && \
+ ({ i = json_variant_by_index(_state.variant, _state.idx); \
+ true; }); \
+ _state.idx++)
+
+#define JSON_VARIANT_OBJECT_FOREACH(k, e, v) \
+ for (struct json_variant_foreach_state _state = { (v), 0 }; \
+ json_variant_is_object(_state.variant) && \
+ _state.idx < json_variant_elements(_state.variant) && \
+ ({ k = json_variant_string(json_variant_by_index(_state.variant, _state.idx)); \
+ e = json_variant_by_index(_state.variant, _state.idx + 1); \
+ true; }); \
+ _state.idx += 2)
+
+int json_variant_get_source(JsonVariant *v, const char **ret_source, unsigned *ret_line, unsigned *ret_column);
+
+typedef enum JsonFormatFlags {
+ JSON_FORMAT_NEWLINE = 1 << 0, /* suffix with newline */
+ JSON_FORMAT_PRETTY = 1 << 1, /* add internal whitespace to appeal to human readers */
+ JSON_FORMAT_PRETTY_AUTO = 1 << 2, /* same, but only if connected to a tty (and JSON_FORMAT_NEWLINE otherwise) */
+ JSON_FORMAT_COLOR = 1 << 3, /* insert ANSI color sequences */
+ JSON_FORMAT_COLOR_AUTO = 1 << 4, /* insert ANSI color sequences if colors_enabled() says so */
+ JSON_FORMAT_SOURCE = 1 << 5, /* prefix with source filename/line/column */
+ JSON_FORMAT_SSE = 1 << 6, /* prefix/suffix with W3C server-sent events */
+ JSON_FORMAT_SEQ = 1 << 7, /* prefix/suffix with RFC 7464 application/json-seq */
+ JSON_FORMAT_FLUSH = 1 << 8, /* call fflush() after dumping JSON */
+} JsonFormatFlags;
+
+int json_variant_format(JsonVariant *v, JsonFormatFlags flags, char **ret);
+void json_variant_dump(JsonVariant *v, JsonFormatFlags flags, FILE *f, const char *prefix);
+
+int json_variant_filter(JsonVariant **v, char **to_remove);
+
+int json_variant_set_field(JsonVariant **v, const char *field, JsonVariant *value);
+int json_variant_set_field_string(JsonVariant **v, const char *field, const char *value);
+int json_variant_set_field_integer(JsonVariant **v, const char *field, intmax_t value);
+int json_variant_set_field_unsigned(JsonVariant **v, const char *field, uintmax_t value);
+int json_variant_set_field_boolean(JsonVariant **v, const char *field, bool b);
+int json_variant_set_field_strv(JsonVariant **v, const char *field, char **l);
+
+int json_variant_append_array(JsonVariant **v, JsonVariant *element);
+
+int json_variant_merge(JsonVariant **v, JsonVariant *m);
+
+int json_variant_strv(JsonVariant *v, char ***ret);
+
+int json_variant_sort(JsonVariant **v);
+int json_variant_normalize(JsonVariant **v);
+
+typedef enum JsonParseFlags {
+ JSON_PARSE_SENSITIVE = 1 << 0, /* mark variant as "sensitive", i.e. something containing secret key material or such */
+} JsonParseFlags;
+
+int json_parse(const char *string, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column);
+int json_parse_continue(const char **p, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column);
+int json_parse_file_at(FILE *f, int dir_fd, const char *path, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column);
+
+static inline int json_parse_file(FILE *f, const char *path, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) {
+ return json_parse_file_at(f, AT_FDCWD, path, flags, ret, ret_line, ret_column);
+}
+
+enum {
+ _JSON_BUILD_STRING,
+ _JSON_BUILD_INTEGER,
+ _JSON_BUILD_UNSIGNED,
+ _JSON_BUILD_REAL,
+ _JSON_BUILD_BOOLEAN,
+ _JSON_BUILD_ARRAY_BEGIN,
+ _JSON_BUILD_ARRAY_END,
+ _JSON_BUILD_OBJECT_BEGIN,
+ _JSON_BUILD_OBJECT_END,
+ _JSON_BUILD_PAIR,
+ _JSON_BUILD_PAIR_CONDITION,
+ _JSON_BUILD_NULL,
+ _JSON_BUILD_VARIANT,
+ _JSON_BUILD_VARIANT_ARRAY,
+ _JSON_BUILD_LITERAL,
+ _JSON_BUILD_STRV,
+ _JSON_BUILD_BASE64,
+ _JSON_BUILD_ID128,
+ _JSON_BUILD_BYTE_ARRAY,
+ _JSON_BUILD_MAX,
+};
+
+#define JSON_BUILD_STRING(s) _JSON_BUILD_STRING, ({ const char *_x = s; _x; })
+#define JSON_BUILD_INTEGER(i) _JSON_BUILD_INTEGER, ({ intmax_t _x = i; _x; })
+#define JSON_BUILD_UNSIGNED(u) _JSON_BUILD_UNSIGNED, ({ uintmax_t _x = u; _x; })
+#define JSON_BUILD_REAL(d) _JSON_BUILD_REAL, ({ long double _x = d; _x; })
+#define JSON_BUILD_BOOLEAN(b) _JSON_BUILD_BOOLEAN, ({ bool _x = b; _x; })
+#define JSON_BUILD_ARRAY(...) _JSON_BUILD_ARRAY_BEGIN, __VA_ARGS__, _JSON_BUILD_ARRAY_END
+#define JSON_BUILD_EMPTY_ARRAY _JSON_BUILD_ARRAY_BEGIN, _JSON_BUILD_ARRAY_END
+#define JSON_BUILD_OBJECT(...) _JSON_BUILD_OBJECT_BEGIN, __VA_ARGS__, _JSON_BUILD_OBJECT_END
+#define JSON_BUILD_EMPTY_OBJECT _JSON_BUILD_OBJECT_BEGIN, _JSON_BUILD_OBJECT_END
+#define JSON_BUILD_PAIR(n, ...) _JSON_BUILD_PAIR, ({ const char *_x = n; _x; }), __VA_ARGS__
+#define JSON_BUILD_PAIR_CONDITION(c, n, ...) _JSON_BUILD_PAIR_CONDITION, ({ bool _x = c; _x; }), ({ const char *_x = n; _x; }), __VA_ARGS__
+#define JSON_BUILD_NULL _JSON_BUILD_NULL
+#define JSON_BUILD_VARIANT(v) _JSON_BUILD_VARIANT, ({ JsonVariant *_x = v; _x; })
+#define JSON_BUILD_VARIANT_ARRAY(v, n) _JSON_BUILD_VARIANT_ARRAY, ({ JsonVariant **_x = v; _x; }), ({ size_t _y = n; _y; })
+#define JSON_BUILD_LITERAL(l) _JSON_BUILD_LITERAL, ({ const char *_x = l; _x; })
+#define JSON_BUILD_STRV(l) _JSON_BUILD_STRV, ({ char **_x = l; _x; })
+#define JSON_BUILD_BASE64(p, n) _JSON_BUILD_BASE64, ({ const void *_x = p; _x; }), ({ size_t _y = n; _y; })
+#define JSON_BUILD_ID128(id) _JSON_BUILD_ID128, ({ sd_id128_t _x = id; _x; })
+#define JSON_BUILD_BYTE_ARRAY(v, n) _JSON_BUILD_BYTE_ARRAY, ({ const void *_x = v; _x; }), ({ size_t _y = n; _y; })
+
+int json_build(JsonVariant **ret, ...);
+int json_buildv(JsonVariant **ret, va_list ap);
+
+/* A bitmask of flags used by the dispatch logic. Note that this is a combined bit mask, that is generated from the bit
+ * mask originally passed into json_dispatch(), the individual bitmask associated with the static JsonDispatch callout
+ * entry, as well the bitmask specified for json_log() calls */
+typedef enum JsonDispatchFlags {
+ /* The following three may be set in JsonDispatch's .flags field or the json_dispatch() flags parameter */
+ JSON_PERMISSIVE = 1 << 0, /* Shall parsing errors be considered fatal for this property? */
+ JSON_MANDATORY = 1 << 1, /* Should existence of this property be mandatory? */
+ JSON_LOG = 1 << 2, /* Should the parser log about errors? */
+ JSON_SAFE = 1 << 3, /* Don't accept "unsafe" strings in json_dispatch_string() + json_dispatch_string() */
+ JSON_RELAX = 1 << 4, /* Use relaxed user name checking in json_dispatch_user_group_name */
+
+ /* The following two may be passed into log_json() in addition to the three above */
+ JSON_DEBUG = 1 << 4, /* Indicates that this log message is a debug message */
+ JSON_WARNING = 1 << 5, /* Indicates that this log message is a warning message */
+} JsonDispatchFlags;
+
+typedef int (*JsonDispatchCallback)(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+
+typedef struct JsonDispatch {
+ const char *name;
+ JsonVariantType type;
+ JsonDispatchCallback callback;
+ size_t offset;
+ JsonDispatchFlags flags;
+} JsonDispatch;
+
+int json_dispatch(JsonVariant *v, const JsonDispatch table[], JsonDispatchCallback bad, JsonDispatchFlags flags, void *userdata);
+
+int json_dispatch_string(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_const_string(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_strv(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_boolean(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_tristate(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_variant(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_integer(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_unsigned(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_uint32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_int32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_uid_gid(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_user_group_name(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_id128(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_unsupported(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+
+assert_cc(sizeof(uintmax_t) == sizeof(uint64_t));
+#define json_dispatch_uint64 json_dispatch_unsigned
+
+assert_cc(sizeof(intmax_t) == sizeof(int64_t));
+#define json_dispatch_int64 json_dispatch_integer
+
+assert_cc(sizeof(uint32_t) == sizeof(unsigned));
+#define json_dispatch_uint json_dispatch_uint32
+
+assert_cc(sizeof(int32_t) == sizeof(int));
+#define json_dispatch_int json_dispatch_int32
+
+static inline int json_dispatch_level(JsonDispatchFlags flags) {
+
+ /* Did the user request no logging? If so, then never log higher than LOG_DEBUG. Also, if this is marked as
+ * debug message, then also log at debug level. */
+
+ if (!(flags & JSON_LOG) ||
+ (flags & JSON_DEBUG))
+ return LOG_DEBUG;
+
+ /* Are we invoked in permissive mode, or is this explicitly marked as warning message? Then this should be
+ * printed at LOG_WARNING */
+ if (flags & (JSON_PERMISSIVE|JSON_WARNING))
+ return LOG_WARNING;
+
+ /* Otherwise it's an error. */
+ return LOG_ERR;
+}
+
+int json_log_internal(JsonVariant *variant, int level, int error, const char *file, int line, const char *func, const char *format, ...) _printf_(7, 8);
+
+#define json_log(variant, flags, error, ...) \
+ ({ \
+ int _level = json_dispatch_level(flags), _e = (error); \
+ (log_get_max_level() >= LOG_PRI(_level)) \
+ ? json_log_internal(variant, _level, _e, PROJECT_FILE, __LINE__, __func__, __VA_ARGS__) \
+ : -ERRNO_VALUE(_e); \
+ })
+
+#define json_log_oom(variant, flags) \
+ json_log(variant, flags, SYNTHETIC_ERRNO(ENOMEM), "Out of memory.")
+
+#define JSON_VARIANT_STRING_CONST(x) _JSON_VARIANT_STRING_CONST(UNIQ, (x))
+
+#define _JSON_VARIANT_STRING_CONST(xq, x) \
+ ({ \
+ _align_(2) static const char UNIQ_T(json_string_const, xq)[] = (x); \
+ assert((((uintptr_t) UNIQ_T(json_string_const, xq)) & 1) == 0); \
+ (JsonVariant*) ((uintptr_t) UNIQ_T(json_string_const, xq) + 1); \
+ })
+
+int json_variant_unbase64(JsonVariant *v, void **ret, size_t *ret_size);
+
+const char *json_variant_type_to_string(JsonVariantType t);
+JsonVariantType json_variant_type_from_string(const char *s);
diff --git a/src/shared/libcrypt-util.c b/src/shared/libcrypt-util.c
new file mode 100644
index 0000000..5b31541
--- /dev/null
+++ b/src/shared/libcrypt-util.c
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_CRYPT_H
+/* libxcrypt is a replacement for glibc's libcrypt, and libcrypt might be
+ * removed from glibc at some point. As part of the removal, defines for
+ * crypt(3) are dropped from unistd.h, and we must include crypt.h instead.
+ *
+ * Newer versions of glibc (v2.0+) already ship crypt.h with a definition
+ * of crypt(3) as well, so we simply include it if it is present. MariaDB,
+ * MySQL, PostgreSQL, Perl and some other wide-spread packages do it the
+ * same way since ages without any problems.
+ */
+# include <crypt.h>
+#else
+# include <unistd.h>
+#endif
+
+#include <errno.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "libcrypt-util.h"
+#include "log.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "missing_stdlib.h"
+#include "random-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+int make_salt(char **ret) {
+
+#if HAVE_CRYPT_GENSALT_RA
+ const char *e;
+ char *salt;
+
+ /* If we have crypt_gensalt_ra() we default to the "preferred method" (i.e. usually yescrypt).
+ * crypt_gensalt_ra() is usually provided by libxcrypt. */
+
+ e = secure_getenv("SYSTEMD_CRYPT_PREFIX");
+ if (!e)
+#if HAVE_CRYPT_PREFERRED_METHOD
+ e = crypt_preferred_method();
+#else
+ e = "$6$";
+#endif
+
+ log_debug("Generating salt for hash prefix: %s", e);
+
+ salt = crypt_gensalt_ra(e, 0, NULL, 0);
+ if (!salt)
+ return -errno;
+
+ *ret = salt;
+ return 0;
+#else
+ /* If crypt_gensalt_ra() is not available, we use SHA512 and generate the salt on our own. */
+
+ static const char table[] =
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "0123456789"
+ "./";
+
+ uint8_t raw[16];
+ char *salt, *j;
+ size_t i;
+ int r;
+
+ /* This is a bit like crypt_gensalt_ra(), but doesn't require libcrypt, and doesn't do anything but
+ * SHA512, i.e. is legacy-free and minimizes our deps. */
+
+ assert_cc(sizeof(table) == 64U + 1U);
+
+ log_debug("Generating fallback salt for hash prefix: $6$");
+
+ /* Insist on the best randomness by setting RANDOM_BLOCK, this is about keeping passwords secret after all. */
+ r = genuine_random_bytes(raw, sizeof(raw), RANDOM_BLOCK);
+ if (r < 0)
+ return r;
+
+ salt = new(char, 3+sizeof(raw)+1+1);
+ if (!salt)
+ return -ENOMEM;
+
+ /* We only bother with SHA512 hashed passwords, the rest is legacy, and we don't do legacy. */
+ j = stpcpy(salt, "$6$");
+ for (i = 0; i < sizeof(raw); i++)
+ j[i] = table[raw[i] & 63];
+ j[i++] = '$';
+ j[i] = 0;
+
+ *ret = salt;
+ return 0;
+#endif
+}
+
+#if HAVE_CRYPT_RA
+# define CRYPT_RA_NAME "crypt_ra"
+#else
+# define CRYPT_RA_NAME "crypt_r"
+
+/* Provide a poor man's fallback that uses a fixed size buffer. */
+
+static char* systemd_crypt_ra(const char *phrase, const char *setting, void **data, int *size) {
+ assert(data);
+ assert(size);
+
+ /* We allocate the buffer because crypt(3) says: struct crypt_data may be quite large (32kB in this
+ * implementation of libcrypt; over 128kB in some other implementations). This is large enough that
+ * it may be unwise to allocate it on the stack. */
+
+ if (!*data) {
+ *data = new0(struct crypt_data, 1);
+ if (!*data) {
+ errno = -ENOMEM;
+ return NULL;
+ }
+
+ *size = (int) (sizeof(struct crypt_data));
+ }
+
+ char *t = crypt_r(phrase, setting, *data);
+ if (!t)
+ return NULL;
+
+ /* crypt_r may return a pointer to an invalid hashed password on error. Our callers expect NULL on
+ * error, so let's just return that. */
+ if (t[0] == '*')
+ return NULL;
+
+ return t;
+}
+
+#define crypt_ra systemd_crypt_ra
+
+#endif
+
+int hash_password_full(const char *password, void **cd_data, int *cd_size, char **ret) {
+ _cleanup_free_ char *salt = NULL;
+ _cleanup_(erase_and_freep) void *_cd_data = NULL;
+ char *p;
+ int r, _cd_size = 0;
+
+ assert(!!cd_data == !!cd_size);
+
+ r = make_salt(&salt);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to generate salt: %m");
+
+ errno = 0;
+ p = crypt_ra(password, salt, cd_data ?: &_cd_data, cd_size ?: &_cd_size);
+ if (!p)
+ return log_debug_errno(errno_or_else(SYNTHETIC_ERRNO(EINVAL)),
+ CRYPT_RA_NAME "() failed: %m");
+
+ p = strdup(p);
+ if (!p)
+ return -ENOMEM;
+
+ *ret = p;
+ return 0;
+}
+
+bool looks_like_hashed_password(const char *s) {
+ /* Returns false if the specified string is certainly not a hashed UNIX password. crypt(5) lists
+ * various hashing methods. We only reject (return false) strings which are documented to have
+ * different meanings.
+ *
+ * In particular, we allow locked passwords, i.e. strings starting with "!", including just "!",
+ * i.e. the locked empty password. See also fc58c0c7bf7e4f525b916e3e5be0de2307fef04e.
+ */
+ if (!s)
+ return false;
+
+ s += strspn(s, "!"); /* Skip (possibly duplicated) locking prefix */
+
+ return !STR_IN_SET(s, "x", "*");
+}
+
+int test_password_one(const char *hashed_password, const char *password) {
+ _cleanup_(erase_and_freep) void *cd_data = NULL;
+ int cd_size = 0;
+ const char *k;
+
+ errno = 0;
+ k = crypt_ra(password, hashed_password, &cd_data, &cd_size);
+ if (!k) {
+ if (errno == ENOMEM)
+ return -ENOMEM;
+ /* Unknown or unavailable hashing method or string too short */
+ return 0;
+ }
+
+ return streq(k, hashed_password);
+}
+
+int test_password_many(char **hashed_password, const char *password) {
+ char **hpw;
+ int r;
+
+ STRV_FOREACH(hpw, hashed_password) {
+ r = test_password_one(*hpw, password);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return true;
+ }
+
+ return false;
+}
diff --git a/src/shared/libcrypt-util.h b/src/shared/libcrypt-util.h
new file mode 100644
index 0000000..5b9b945
--- /dev/null
+++ b/src/shared/libcrypt-util.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+int make_salt(char **ret);
+int hash_password_full(const char *password, void **cd_data, int *cd_size, char **ret);
+static inline int hash_password(const char *password, char **ret) {
+ return hash_password_full(password, NULL, NULL, ret);
+}
+bool looks_like_hashed_password(const char *s);
+int test_password_one(const char *hashed_password, const char *password);
+int test_password_many(char **hashed_password, const char *password);
diff --git a/src/shared/libmount-util.h b/src/shared/libmount-util.h
new file mode 100644
index 0000000..db9728c
--- /dev/null
+++ b/src/shared/libmount-util.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdio.h>
+
+/* This needs to be after sys/mount.h */
+#include <libmount.h>
+
+#include "macro.h"
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct libmnt_table*, mnt_free_table);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct libmnt_iter*, mnt_free_iter);
+
+static inline int libmount_parse(
+ const char *path,
+ FILE *source,
+ struct libmnt_table **ret_table,
+ struct libmnt_iter **ret_iter) {
+
+ _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
+ _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
+ int r;
+
+ /* Older libmount seems to require this. */
+ assert(!source || path);
+
+ table = mnt_new_table();
+ iter = mnt_new_iter(MNT_ITER_FORWARD);
+ if (!table || !iter)
+ return -ENOMEM;
+
+ /* If source or path are specified, we use on the functions which ignore utab.
+ * Only if both are empty, we use mnt_table_parse_mtab(). */
+
+ if (source)
+ r = mnt_table_parse_stream(table, source, path);
+ else if (path)
+ r = mnt_table_parse_file(table, path);
+ else
+ r = mnt_table_parse_mtab(table, NULL);
+ if (r < 0)
+ return r;
+
+ *ret_table = TAKE_PTR(table);
+ *ret_iter = TAKE_PTR(iter);
+ return 0;
+}
diff --git a/src/shared/libshared.sym b/src/shared/libshared.sym
new file mode 100644
index 0000000..6a7495a
--- /dev/null
+++ b/src/shared/libshared.sym
@@ -0,0 +1,3 @@
+SD_SHARED {
+ global: *;
+};
diff --git a/src/shared/linux/README b/src/shared/linux/README
new file mode 100644
index 0000000..46d5547
--- /dev/null
+++ b/src/shared/linux/README
@@ -0,0 +1,8 @@
+The files in this directory are copied from kernel-5.2, and the following modifications are applied:
+- auto_dev-ioctl.h: set AUTOFS_DEV_IOCTL_VERSION_MINOR to 0
+- auto_dev-ioctl.h: define AUTOFS_IOCTL if not defined
+- bpf_insn.h: This is imported from samples/bpf/bpf_insn.h
+- bpf_insn.h: BPF_JMP_A() macro is also imported from include/linux/filter.h
+- dm-ioctl.h: set DM_VERSION_MINOR to 27
+- ethtool.h: define __KERNEL_DIV_ROUND_UP if not defined
+- ethtool.h: add casts in ethtool_cmd_speed()
diff --git a/src/shared/linux/auto_dev-ioctl.h b/src/shared/linux/auto_dev-ioctl.h
new file mode 100644
index 0000000..261546c
--- /dev/null
+++ b/src/shared/linux/auto_dev-ioctl.h
@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Copyright 2008 Red Hat, Inc. All rights reserved.
+ * Copyright 2008 Ian Kent <raven@themaw.net>
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ */
+
+#ifndef _UAPI_LINUX_AUTO_DEV_IOCTL_H
+#define _UAPI_LINUX_AUTO_DEV_IOCTL_H
+
+#include <linux/auto_fs.h>
+#include <linux/string.h>
+
+#define AUTOFS_DEVICE_NAME "autofs"
+
+#define AUTOFS_DEV_IOCTL_VERSION_MAJOR 1
+#define AUTOFS_DEV_IOCTL_VERSION_MINOR 0
+
+#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl)
+
+/*
+ * An ioctl interface for autofs mount point control.
+ */
+
+struct args_protover {
+ __u32 version;
+};
+
+struct args_protosubver {
+ __u32 sub_version;
+};
+
+struct args_openmount {
+ __u32 devid;
+};
+
+struct args_ready {
+ __u32 token;
+};
+
+struct args_fail {
+ __u32 token;
+ __s32 status;
+};
+
+struct args_setpipefd {
+ __s32 pipefd;
+};
+
+struct args_timeout {
+ __u64 timeout;
+};
+
+struct args_requester {
+ __u32 uid;
+ __u32 gid;
+};
+
+struct args_expire {
+ __u32 how;
+};
+
+struct args_askumount {
+ __u32 may_umount;
+};
+
+struct args_ismountpoint {
+ union {
+ struct args_in {
+ __u32 type;
+ } in;
+ struct args_out {
+ __u32 devid;
+ __u32 magic;
+ } out;
+ };
+};
+
+/*
+ * All the ioctls use this structure.
+ * When sending a path size must account for the total length
+ * of the chunk of memory otherwise is is the size of the
+ * structure.
+ */
+
+struct autofs_dev_ioctl {
+ __u32 ver_major;
+ __u32 ver_minor;
+ __u32 size; /* total size of data passed in
+ * including this struct */
+ __s32 ioctlfd; /* automount command fd */
+
+ /* Command parameters */
+
+ union {
+ struct args_protover protover;
+ struct args_protosubver protosubver;
+ struct args_openmount openmount;
+ struct args_ready ready;
+ struct args_fail fail;
+ struct args_setpipefd setpipefd;
+ struct args_timeout timeout;
+ struct args_requester requester;
+ struct args_expire expire;
+ struct args_askumount askumount;
+ struct args_ismountpoint ismountpoint;
+ };
+
+ char path[0];
+};
+
+static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in)
+{
+ memset(in, 0, AUTOFS_DEV_IOCTL_SIZE);
+ in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
+ in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
+ in->size = AUTOFS_DEV_IOCTL_SIZE;
+ in->ioctlfd = -1;
+}
+
+enum {
+ /* Get various version info */
+ AUTOFS_DEV_IOCTL_VERSION_CMD = 0x71,
+ AUTOFS_DEV_IOCTL_PROTOVER_CMD,
+ AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD,
+
+ /* Open mount ioctl fd */
+ AUTOFS_DEV_IOCTL_OPENMOUNT_CMD,
+
+ /* Close mount ioctl fd */
+ AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD,
+
+ /* Mount/expire status returns */
+ AUTOFS_DEV_IOCTL_READY_CMD,
+ AUTOFS_DEV_IOCTL_FAIL_CMD,
+
+ /* Activate/deactivate autofs mount */
+ AUTOFS_DEV_IOCTL_SETPIPEFD_CMD,
+ AUTOFS_DEV_IOCTL_CATATONIC_CMD,
+
+ /* Expiry timeout */
+ AUTOFS_DEV_IOCTL_TIMEOUT_CMD,
+
+ /* Get mount last requesting uid and gid */
+ AUTOFS_DEV_IOCTL_REQUESTER_CMD,
+
+ /* Check for eligible expire candidates */
+ AUTOFS_DEV_IOCTL_EXPIRE_CMD,
+
+ /* Request busy status */
+ AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD,
+
+ /* Check if path is a mountpoint */
+ AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD,
+};
+
+#ifndef AUTOFS_IOCTL
+#define AUTOFS_IOCTL 0x93
+#endif
+
+#define AUTOFS_DEV_IOCTL_VERSION \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_VERSION_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_PROTOVER \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_PROTOVER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_PROTOSUBVER \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_OPENMOUNT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_OPENMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_CLOSEMOUNT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_READY \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_READY_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_FAIL \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_FAIL_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_SETPIPEFD \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_SETPIPEFD_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_CATATONIC \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_CATATONIC_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_TIMEOUT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_TIMEOUT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_REQUESTER \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_REQUESTER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_EXPIRE \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_EXPIRE_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_ASKUMOUNT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_ISMOUNTPOINT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD, struct autofs_dev_ioctl)
+
+#endif /* _UAPI_LINUX_AUTO_DEV_IOCTL_H */
diff --git a/src/shared/linux/bpf.h b/src/shared/linux/bpf.h
new file mode 100644
index 0000000..359fc37
--- /dev/null
+++ b/src/shared/linux/bpf.h
@@ -0,0 +1,3057 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_H__
+#define _UAPI__LINUX_BPF_H__
+
+#include <linux/types.h>
+#include <linux/bpf_common.h>
+
+/* Extended instruction set based on top of classic BPF */
+
+/* instruction classes */
+#define BPF_ALU64 0x07 /* alu mode in double word width */
+
+/* ld/ldx fields */
+#define BPF_DW 0x18 /* double word (64-bit) */
+#define BPF_XADD 0xc0 /* exclusive add */
+
+/* alu/jmp fields */
+#define BPF_MOV 0xb0 /* mov reg to reg */
+#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */
+
+/* change endianness of a register */
+#define BPF_END 0xd0 /* flags for endianness conversion: */
+#define BPF_TO_LE 0x00 /* convert to little-endian */
+#define BPF_TO_BE 0x08 /* convert to big-endian */
+#define BPF_FROM_LE BPF_TO_LE
+#define BPF_FROM_BE BPF_TO_BE
+
+/* jmp encodings */
+#define BPF_JNE 0x50 /* jump != */
+#define BPF_JLT 0xa0 /* LT is unsigned, '<' */
+#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */
+#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */
+#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
+#define BPF_JSLT 0xc0 /* SLT is signed, '<' */
+#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */
+#define BPF_CALL 0x80 /* function call */
+#define BPF_EXIT 0x90 /* function return */
+
+/* Register numbers */
+enum {
+ BPF_REG_0 = 0,
+ BPF_REG_1,
+ BPF_REG_2,
+ BPF_REG_3,
+ BPF_REG_4,
+ BPF_REG_5,
+ BPF_REG_6,
+ BPF_REG_7,
+ BPF_REG_8,
+ BPF_REG_9,
+ BPF_REG_10,
+ __MAX_BPF_REG,
+};
+
+/* BPF has 10 general purpose 64-bit registers and stack frame. */
+#define MAX_BPF_REG __MAX_BPF_REG
+
+struct bpf_insn {
+ __u8 code; /* opcode */
+ __u8 dst_reg:4; /* dest register */
+ __u8 src_reg:4; /* source register */
+ __s16 off; /* signed offset */
+ __s32 imm; /* signed immediate constant */
+};
+
+/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
+struct bpf_lpm_trie_key {
+ __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
+ __u8 data[0]; /* Arbitrary size */
+};
+
+struct bpf_cgroup_storage_key {
+ __u64 cgroup_inode_id; /* cgroup inode id */
+ __u32 attach_type; /* program attach type */
+};
+
+/* BPF syscall commands, see bpf(2) man-page for details. */
+enum bpf_cmd {
+ BPF_MAP_CREATE,
+ BPF_MAP_LOOKUP_ELEM,
+ BPF_MAP_UPDATE_ELEM,
+ BPF_MAP_DELETE_ELEM,
+ BPF_MAP_GET_NEXT_KEY,
+ BPF_PROG_LOAD,
+ BPF_OBJ_PIN,
+ BPF_OBJ_GET,
+ BPF_PROG_ATTACH,
+ BPF_PROG_DETACH,
+ BPF_PROG_TEST_RUN,
+ BPF_PROG_GET_NEXT_ID,
+ BPF_MAP_GET_NEXT_ID,
+ BPF_PROG_GET_FD_BY_ID,
+ BPF_MAP_GET_FD_BY_ID,
+ BPF_OBJ_GET_INFO_BY_FD,
+ BPF_PROG_QUERY,
+ BPF_RAW_TRACEPOINT_OPEN,
+ BPF_BTF_LOAD,
+ BPF_BTF_GET_FD_BY_ID,
+ BPF_TASK_FD_QUERY,
+ BPF_MAP_LOOKUP_AND_DELETE_ELEM,
+};
+
+enum bpf_map_type {
+ BPF_MAP_TYPE_UNSPEC,
+ BPF_MAP_TYPE_HASH,
+ BPF_MAP_TYPE_ARRAY,
+ BPF_MAP_TYPE_PROG_ARRAY,
+ BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ BPF_MAP_TYPE_PERCPU_HASH,
+ BPF_MAP_TYPE_PERCPU_ARRAY,
+ BPF_MAP_TYPE_STACK_TRACE,
+ BPF_MAP_TYPE_CGROUP_ARRAY,
+ BPF_MAP_TYPE_LRU_HASH,
+ BPF_MAP_TYPE_LRU_PERCPU_HASH,
+ BPF_MAP_TYPE_LPM_TRIE,
+ BPF_MAP_TYPE_ARRAY_OF_MAPS,
+ BPF_MAP_TYPE_HASH_OF_MAPS,
+ BPF_MAP_TYPE_DEVMAP,
+ BPF_MAP_TYPE_SOCKMAP,
+ BPF_MAP_TYPE_CPUMAP,
+ BPF_MAP_TYPE_XSKMAP,
+ BPF_MAP_TYPE_SOCKHASH,
+ BPF_MAP_TYPE_CGROUP_STORAGE,
+ BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ BPF_MAP_TYPE_QUEUE,
+ BPF_MAP_TYPE_STACK,
+};
+
+/* Note that tracing related programs such as
+ * BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT}
+ * are not subject to a stable API since kernel internal data
+ * structures can change from release to release and may
+ * therefore break existing tracing BPF programs. Tracing BPF
+ * programs correspond to /a/ specific kernel which is to be
+ * analyzed, and not /a/ specific kernel /and/ all future ones.
+ */
+enum bpf_prog_type {
+ BPF_PROG_TYPE_UNSPEC,
+ BPF_PROG_TYPE_SOCKET_FILTER,
+ BPF_PROG_TYPE_KPROBE,
+ BPF_PROG_TYPE_SCHED_CLS,
+ BPF_PROG_TYPE_SCHED_ACT,
+ BPF_PROG_TYPE_TRACEPOINT,
+ BPF_PROG_TYPE_XDP,
+ BPF_PROG_TYPE_PERF_EVENT,
+ BPF_PROG_TYPE_CGROUP_SKB,
+ BPF_PROG_TYPE_CGROUP_SOCK,
+ BPF_PROG_TYPE_LWT_IN,
+ BPF_PROG_TYPE_LWT_OUT,
+ BPF_PROG_TYPE_LWT_XMIT,
+ BPF_PROG_TYPE_SOCK_OPS,
+ BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_CGROUP_DEVICE,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_RAW_TRACEPOINT,
+ BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_PROG_TYPE_LWT_SEG6LOCAL,
+ BPF_PROG_TYPE_LIRC_MODE2,
+ BPF_PROG_TYPE_SK_REUSEPORT,
+ BPF_PROG_TYPE_FLOW_DISSECTOR,
+};
+
+enum bpf_attach_type {
+ BPF_CGROUP_INET_INGRESS,
+ BPF_CGROUP_INET_EGRESS,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ BPF_CGROUP_SOCK_OPS,
+ BPF_SK_SKB_STREAM_PARSER,
+ BPF_SK_SKB_STREAM_VERDICT,
+ BPF_CGROUP_DEVICE,
+ BPF_SK_MSG_VERDICT,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_LIRC_MODE2,
+ BPF_FLOW_DISSECTOR,
+ __MAX_BPF_ATTACH_TYPE
+};
+
+#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+
+/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
+ *
+ * NONE(default): No further bpf programs allowed in the subtree.
+ *
+ * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program,
+ * the program in this cgroup yields to sub-cgroup program.
+ *
+ * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program,
+ * that cgroup program gets run in addition to the program in this cgroup.
+ *
+ * Only one program is allowed to be attached to a cgroup with
+ * NONE or BPF_F_ALLOW_OVERRIDE flag.
+ * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will
+ * release old program and attach the new one. Attach flags has to match.
+ *
+ * Multiple programs are allowed to be attached to a cgroup with
+ * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order
+ * (those that were attached first, run first)
+ * The programs of sub-cgroup are executed first, then programs of
+ * this cgroup and then programs of parent cgroup.
+ * When children program makes decision (like picking TCP CA or sock bind)
+ * parent program has a chance to override it.
+ *
+ * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
+ * A cgroup with NONE doesn't allow any programs in sub-cgroups.
+ * Ex1:
+ * cgrp1 (MULTI progs A, B) ->
+ * cgrp2 (OVERRIDE prog C) ->
+ * cgrp3 (MULTI prog D) ->
+ * cgrp4 (OVERRIDE prog E) ->
+ * cgrp5 (NONE prog F)
+ * the event in cgrp5 triggers execution of F,D,A,B in that order.
+ * if prog F is detached, the execution is E,D,A,B
+ * if prog F and D are detached, the execution is E,A,B
+ * if prog F, E and D are detached, the execution is C,A,B
+ *
+ * All eligible programs are executed regardless of return code from
+ * earlier programs.
+ */
+#define BPF_F_ALLOW_OVERRIDE (1U << 0)
+#define BPF_F_ALLOW_MULTI (1U << 1)
+
+/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
+ * verifier will perform strict alignment checking as if the kernel
+ * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
+ * and NET_IP_ALIGN defined to 2.
+ */
+#define BPF_F_STRICT_ALIGNMENT (1U << 0)
+
+/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the
+ * verifier will allow any alignment whatsoever. On platforms
+ * with strict alignment requirements for loads ands stores (such
+ * as sparc and mips) the verifier validates that all loads and
+ * stores provably follow this requirement. This flag turns that
+ * checking and enforcement off.
+ *
+ * It is mostly used for testing when we want to validate the
+ * context and memory access aspects of the verifier, but because
+ * of an unaligned access the alignment check would trigger before
+ * the one we are interested in.
+ */
+#define BPF_F_ANY_ALIGNMENT (1U << 1)
+
+/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
+#define BPF_PSEUDO_MAP_FD 1
+
+/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
+ * offset to another bpf function
+ */
+#define BPF_PSEUDO_CALL 1
+
+/* flags for BPF_MAP_UPDATE_ELEM command */
+#define BPF_ANY 0 /* create new element or update existing */
+#define BPF_NOEXIST 1 /* create new element if it didn't exist */
+#define BPF_EXIST 2 /* update existing element */
+
+/* flags for BPF_MAP_CREATE command */
+#define BPF_F_NO_PREALLOC (1U << 0)
+/* Instead of having one common LRU list in the
+ * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
+ * which can scale and perform better.
+ * Note, the LRU nodes (including free nodes) cannot be moved
+ * across different LRU lists.
+ */
+#define BPF_F_NO_COMMON_LRU (1U << 1)
+/* Specify numa node during map creation */
+#define BPF_F_NUMA_NODE (1U << 2)
+
+#define BPF_OBJ_NAME_LEN 16U
+
+/* Flags for accessing BPF object */
+#define BPF_F_RDONLY (1U << 3)
+#define BPF_F_WRONLY (1U << 4)
+
+/* Flag for stack_map, store build_id+offset instead of pointer */
+#define BPF_F_STACK_BUILD_ID (1U << 5)
+
+/* Zero-initialize hash function seed. This should only be used for testing. */
+#define BPF_F_ZERO_SEED (1U << 6)
+
+/* flags for BPF_PROG_QUERY */
+#define BPF_F_QUERY_EFFECTIVE (1U << 0)
+
+enum bpf_stack_build_id_status {
+ /* user space need an empty entry to identify end of a trace */
+ BPF_STACK_BUILD_ID_EMPTY = 0,
+ /* with valid build_id and offset */
+ BPF_STACK_BUILD_ID_VALID = 1,
+ /* couldn't get build_id, fallback to ip */
+ BPF_STACK_BUILD_ID_IP = 2,
+};
+
+#define BPF_BUILD_ID_SIZE 20
+struct bpf_stack_build_id {
+ __s32 status;
+ unsigned char build_id[BPF_BUILD_ID_SIZE];
+ union {
+ __u64 offset;
+ __u64 ip;
+ };
+};
+
+union bpf_attr {
+ struct { /* anonymous struct used by BPF_MAP_CREATE command */
+ __u32 map_type; /* one of enum bpf_map_type */
+ __u32 key_size; /* size of key in bytes */
+ __u32 value_size; /* size of value in bytes */
+ __u32 max_entries; /* max number of entries in a map */
+ __u32 map_flags; /* BPF_MAP_CREATE related
+ * flags defined above.
+ */
+ __u32 inner_map_fd; /* fd pointing to the inner map */
+ __u32 numa_node; /* numa node (effective only if
+ * BPF_F_NUMA_NODE is set).
+ */
+ char map_name[BPF_OBJ_NAME_LEN];
+ __u32 map_ifindex; /* ifindex of netdev to create on */
+ __u32 btf_fd; /* fd pointing to a BTF type data */
+ __u32 btf_key_type_id; /* BTF type_id of the key */
+ __u32 btf_value_type_id; /* BTF type_id of the value */
+ };
+
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ __u32 map_fd;
+ __aligned_u64 key;
+ union {
+ __aligned_u64 value;
+ __aligned_u64 next_key;
+ };
+ __u64 flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_LOAD command */
+ __u32 prog_type; /* one of enum bpf_prog_type */
+ __u32 insn_cnt;
+ __aligned_u64 insns;
+ __aligned_u64 license;
+ __u32 log_level; /* verbosity level of verifier */
+ __u32 log_size; /* size of user buffer */
+ __aligned_u64 log_buf; /* user supplied buffer */
+ __u32 kern_version; /* not used */
+ __u32 prog_flags;
+ char prog_name[BPF_OBJ_NAME_LEN];
+ __u32 prog_ifindex; /* ifindex of netdev to prep for */
+ /* For some prog types expected attach type must be known at
+ * load time to verify attach type specific parts of prog
+ * (context accesses, allowed helpers, etc).
+ */
+ __u32 expected_attach_type;
+ __u32 prog_btf_fd; /* fd pointing to BTF type data */
+ __u32 func_info_rec_size; /* userspace bpf_func_info size */
+ __aligned_u64 func_info; /* func info */
+ __u32 func_info_cnt; /* number of bpf_func_info records */
+ __u32 line_info_rec_size; /* userspace bpf_line_info size */
+ __aligned_u64 line_info; /* line info */
+ __u32 line_info_cnt; /* number of bpf_line_info records */
+ };
+
+ struct { /* anonymous struct used by BPF_OBJ_* commands */
+ __aligned_u64 pathname;
+ __u32 bpf_fd;
+ __u32 file_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
+ __u32 target_fd; /* container object to attach to */
+ __u32 attach_bpf_fd; /* eBPF program to attach */
+ __u32 attach_type;
+ __u32 attach_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
+ __u32 prog_fd;
+ __u32 retval;
+ __u32 data_size_in; /* input: len of data_in */
+ __u32 data_size_out; /* input/output: len of data_out
+ * returns ENOSPC if data_out
+ * is too small.
+ */
+ __aligned_u64 data_in;
+ __aligned_u64 data_out;
+ __u32 repeat;
+ __u32 duration;
+ } test;
+
+ struct { /* anonymous struct used by BPF_*_GET_*_ID */
+ union {
+ __u32 start_id;
+ __u32 prog_id;
+ __u32 map_id;
+ __u32 btf_id;
+ };
+ __u32 next_id;
+ __u32 open_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
+ __u32 bpf_fd;
+ __u32 info_len;
+ __aligned_u64 info;
+ } info;
+
+ struct { /* anonymous struct used by BPF_PROG_QUERY command */
+ __u32 target_fd; /* container object to query */
+ __u32 attach_type;
+ __u32 query_flags;
+ __u32 attach_flags;
+ __aligned_u64 prog_ids;
+ __u32 prog_cnt;
+ } query;
+
+ struct {
+ __u64 name;
+ __u32 prog_fd;
+ } raw_tracepoint;
+
+ struct { /* anonymous struct for BPF_BTF_LOAD */
+ __aligned_u64 btf;
+ __aligned_u64 btf_log_buf;
+ __u32 btf_size;
+ __u32 btf_log_size;
+ __u32 btf_log_level;
+ };
+
+ struct {
+ __u32 pid; /* input: pid */
+ __u32 fd; /* input: fd */
+ __u32 flags; /* input: flags */
+ __u32 buf_len; /* input/output: buf len */
+ __aligned_u64 buf; /* input/output:
+ * tp_name for tracepoint
+ * symbol for kprobe
+ * filename for uprobe
+ */
+ __u32 prog_id; /* output: prod_id */
+ __u32 fd_type; /* output: BPF_FD_TYPE_* */
+ __u64 probe_offset; /* output: probe_offset */
+ __u64 probe_addr; /* output: probe_addr */
+ } task_fd_query;
+} __attribute__((aligned(8)));
+
+/* The description below is an attempt at providing documentation to eBPF
+ * developers about the multiple available eBPF helper functions. It can be
+ * parsed and used to produce a manual page. The workflow is the following,
+ * and requires the rst2man utility:
+ *
+ * $ ./scripts/bpf_helpers_doc.py \
+ * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
+ * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
+ * $ man /tmp/bpf-helpers.7
+ *
+ * Note that in order to produce this external documentation, some RST
+ * formatting is used in the descriptions to get "bold" and "italics" in
+ * manual pages. Also note that the few trailing white spaces are
+ * intentional, removing them would break paragraphs for rst2man.
+ *
+ * Start of BPF helper function descriptions:
+ *
+ * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+ * Description
+ * Perform a lookup in *map* for an entry associated to *key*.
+ * Return
+ * Map value associated to *key*, or **NULL** if no entry was
+ * found.
+ *
+ * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+ * Description
+ * Add or update the value of the entry associated to *key* in
+ * *map* with *value*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * Flag value **BPF_NOEXIST** cannot be used for maps of types
+ * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all
+ * elements always exist), the helper would return an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
+ * Description
+ * Delete entry with *key* from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read(void *dst, u32 size, const void *src)
+ * Description
+ * For tracing programs, safely attempt to read *size* bytes from
+ * address *src* and store the data in *dst*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_ktime_get_ns(void)
+ * Description
+ * Return the time elapsed since system boot, in nanoseconds.
+ * Return
+ * Current *ktime*.
+ *
+ * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
+ * Description
+ * This helper is a "printk()-like" facility for debugging. It
+ * prints a message defined by format *fmt* (of size *fmt_size*)
+ * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ * available. It can take up to three additional **u64**
+ * arguments (as an eBPF helpers, the total number of arguments is
+ * limited to five).
+ *
+ * Each time the helper is called, it appends a line to the trace.
+ * The format of the trace is customizable, and the exact output
+ * one will get depends on the options set in
+ * *\/sys/kernel/debug/tracing/trace_options* (see also the
+ * *README* file under the same directory). However, it usually
+ * defaults to something like:
+ *
+ * ::
+ *
+ * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg>
+ *
+ * In the above:
+ *
+ * * ``telnet`` is the name of the current task.
+ * * ``470`` is the PID of the current task.
+ * * ``001`` is the CPU number on which the task is
+ * running.
+ * * In ``.N..``, each character refers to a set of
+ * options (whether irqs are enabled, scheduling
+ * options, whether hard/softirqs are running, level of
+ * preempt_disabled respectively). **N** means that
+ * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
+ * are set.
+ * * ``419421.045894`` is a timestamp.
+ * * ``0x00000001`` is a fake value used by BPF for the
+ * instruction pointer register.
+ * * ``<formatted msg>`` is the message formatted with
+ * *fmt*.
+ *
+ * The conversion specifiers supported by *fmt* are similar, but
+ * more limited than for printk(). They are **%d**, **%i**,
+ * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
+ * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
+ * of field, padding with zeroes, etc.) is available, and the
+ * helper will return **-EINVAL** (but print nothing) if it
+ * encounters an unknown specifier.
+ *
+ * Also, note that **bpf_trace_printk**\ () is slow, and should
+ * only be used for debugging purposes. For this reason, a notice
+ * bloc (spanning several lines) is printed to kernel logs and
+ * states that the helper should not be used "for production use"
+ * the first time this helper is used (or more precisely, when
+ * **trace_printk**\ () buffers are allocated). For passing values
+ * to user space, perf events should be preferred.
+ * Return
+ * The number of bytes written to the buffer, or a negative error
+ * in case of failure.
+ *
+ * u32 bpf_get_prandom_u32(void)
+ * Description
+ * Get a pseudo-random number.
+ *
+ * From a security point of view, this helper uses its own
+ * pseudo-random internal state, and cannot be used to infer the
+ * seed of other random functions in the kernel. However, it is
+ * essential to note that the generator used by the helper is not
+ * cryptographically secure.
+ * Return
+ * A random 32-bit unsigned value.
+ *
+ * u32 bpf_get_smp_processor_id(void)
+ * Description
+ * Get the SMP (symmetric multiprocessing) processor id. Note that
+ * all programs run with preemption disabled, which means that the
+ * SMP processor id is stable during all the execution of the
+ * program.
+ * Return
+ * The SMP id of the processor running the program.
+ *
+ * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
+ * Description
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. *flags* are a combination of
+ * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
+ * checksum for the packet after storing the bytes) and
+ * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
+ * **->swhash** and *skb*\ **->l4hash** to 0).
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
+ * Description
+ * Recompute the layer 3 (e.g. IP) checksum for the packet
+ * associated to *skb*. Computation is incremental, so the helper
+ * must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored in *size*.
+ * Alternatively, it is possible to store the difference between
+ * the previous and the new values of the header field in *to*, by
+ * setting *from* and *size* to 0. For both methods, *offset*
+ * indicates the location of the IP checksum within the packet.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
+ * Description
+ * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
+ * packet associated to *skb*. Computation is incremental, so the
+ * helper must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored on the lowest
+ * four bits of *flags*. Alternatively, it is possible to store
+ * the difference between the previous and the new values of the
+ * header field in *to*, by setting *from* and the four lowest
+ * bits of *flags* to 0. For both methods, *offset* indicates the
+ * location of the IP checksum within the packet. In addition to
+ * the size of the field, *flags* can be added (bitwise OR) actual
+ * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
+ * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
+ * for updates resulting in a null checksum the value is set to
+ * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
+ * the checksum is to be computed against a pseudo-header.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
+ * Description
+ * This special helper is used to trigger a "tail call", or in
+ * other words, to jump into another eBPF program. The same stack
+ * frame is used (but values on stack and in registers for the
+ * caller are not accessible to the callee). This mechanism allows
+ * for program chaining, either for raising the maximum number of
+ * available eBPF instructions, or to execute given programs in
+ * conditional blocks. For security reasons, there is an upper
+ * limit to the number of successive tail calls that can be
+ * performed.
+ *
+ * Upon call of this helper, the program attempts to jump into a
+ * program referenced at index *index* in *prog_array_map*, a
+ * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
+ * *ctx*, a pointer to the context.
+ *
+ * If the call succeeds, the kernel immediately runs the first
+ * instruction of the new program. This is not a function call,
+ * and it never returns to the previous program. If the call
+ * fails, then the helper has no effect, and the caller continues
+ * to run its subsequent instructions. A call can fail if the
+ * destination program for the jump does not exist (i.e. *index*
+ * is superior to the number of entries in *prog_array_map*), or
+ * if the maximum number of tail calls has been reached for this
+ * chain of programs. This limit is defined in the kernel by the
+ * macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
+ * which is currently set to 32.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
+ * Description
+ * Clone and redirect the packet associated to *skb* to another
+ * net device of index *ifindex*. Both ingress and egress
+ * interfaces can be used for redirection. The **BPF_F_INGRESS**
+ * value in *flags* is used to make the distinction (ingress path
+ * is selected if the flag is present, egress path otherwise).
+ * This is the only flag supported for now.
+ *
+ * In comparison with **bpf_redirect**\ () helper,
+ * **bpf_clone_redirect**\ () has the associated cost of
+ * duplicating the packet buffer, but this can be executed out of
+ * the eBPF program. Conversely, **bpf_redirect**\ () is more
+ * efficient, but it is handled through an action code where the
+ * redirection happens only after the eBPF program has returned.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_get_current_pid_tgid(void)
+ * Return
+ * A 64-bit integer containing the current tgid and pid, and
+ * created as such:
+ * *current_task*\ **->tgid << 32 \|**
+ * *current_task*\ **->pid**.
+ *
+ * u64 bpf_get_current_uid_gid(void)
+ * Return
+ * A 64-bit integer containing the current GID and UID, and
+ * created as such: *current_gid* **<< 32 \|** *current_uid*.
+ *
+ * int bpf_get_current_comm(char *buf, u32 size_of_buf)
+ * Description
+ * Copy the **comm** attribute of the current task into *buf* of
+ * *size_of_buf*. The **comm** attribute contains the name of
+ * the executable (excluding the path) for the current task. The
+ * *size_of_buf* must be strictly positive. On success, the
+ * helper makes sure that the *buf* is NUL-terminated. On failure,
+ * it is filled with zeroes.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u32 bpf_get_cgroup_classid(struct sk_buff *skb)
+ * Description
+ * Retrieve the classid for the current task, i.e. for the net_cls
+ * cgroup to which *skb* belongs.
+ *
+ * This helper can be used on TC egress path, but not on ingress.
+ *
+ * The net_cls cgroup provides an interface to tag network packets
+ * based on a user-provided identifier for all traffic coming from
+ * the tasks belonging to the related cgroup. See also the related
+ * kernel documentation, available from the Linux sources in file
+ * *Documentation/cgroup-v1/net_cls.txt*.
+ *
+ * The Linux kernel has two versions for cgroups: there are
+ * cgroups v1 and cgroups v2. Both are available to users, who can
+ * use a mixture of them, but note that the net_cls cgroup is for
+ * cgroup v1 only. This makes it incompatible with BPF programs
+ * run on cgroups, which is a cgroup-v2-only feature (a socket can
+ * only hold data for one version of cgroups at a time).
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
+ * "**y**" or to "**m**".
+ * Return
+ * The classid, or 0 for the default unconfigured classid.
+ *
+ * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+ * Description
+ * Push a *vlan_tci* (VLAN tag control information) of protocol
+ * *vlan_proto* to the packet associated to *skb*, then update
+ * the checksum. Note that if *vlan_proto* is different from
+ * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
+ * be **ETH_P_8021Q**.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_vlan_pop(struct sk_buff *skb)
+ * Description
+ * Pop a VLAN header from the packet associated to *skb*.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * Description
+ * Get tunnel metadata. This helper takes a pointer *key* to an
+ * empty **struct bpf_tunnel_key** of **size**, that will be
+ * filled with tunnel metadata for the packet associated to *skb*.
+ * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
+ * indicates that the tunnel is based on IPv6 protocol instead of
+ * IPv4.
+ *
+ * The **struct bpf_tunnel_key** is an object that generalizes the
+ * principal parameters used by various tunneling protocols into a
+ * single struct. This way, it can be used to easily make a
+ * decision based on the contents of the encapsulation header,
+ * "summarized" in this struct. In particular, it holds the IP
+ * address of the remote end (IPv4 or IPv6, depending on the case)
+ * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
+ * this struct exposes the *key*\ **->tunnel_id**, which is
+ * generally mapped to a VNI (Virtual Network Identifier), making
+ * it programmable together with the **bpf_skb_set_tunnel_key**\
+ * () helper.
+ *
+ * Let's imagine that the following code is part of a program
+ * attached to the TC ingress interface, on one end of a GRE
+ * tunnel, and is supposed to filter out all messages coming from
+ * remote ends with IPv4 address other than 10.0.0.1:
+ *
+ * ::
+ *
+ * int ret;
+ * struct bpf_tunnel_key key = {};
+ *
+ * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ * if (ret < 0)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * if (key.remote_ipv4 != 0x0a000001)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * return TC_ACT_OK; // accept packet
+ *
+ * This interface can also be used with all encapsulation devices
+ * that can operate in "collect metadata" mode: instead of having
+ * one network device per specific configuration, the "collect
+ * metadata" mode only requires a single device where the
+ * configuration can be extracted from this helper.
+ *
+ * This can be used together with various tunnels such as VXLan,
+ * Geneve, GRE or IP in IP (IPIP).
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * Description
+ * Populate tunnel metadata for packet associated to *skb.* The
+ * tunnel metadata is set to the contents of *key*, of *size*. The
+ * *flags* can be set to a combination of the following values:
+ *
+ * **BPF_F_TUNINFO_IPV6**
+ * Indicate that the tunnel is based on IPv6 protocol
+ * instead of IPv4.
+ * **BPF_F_ZERO_CSUM_TX**
+ * For IPv4 packets, add a flag to tunnel metadata
+ * indicating that checksum computation should be skipped
+ * and checksum set to zeroes.
+ * **BPF_F_DONT_FRAGMENT**
+ * Add a flag to tunnel metadata indicating that the
+ * packet should not be fragmented.
+ * **BPF_F_SEQ_NUMBER**
+ * Add a flag to tunnel metadata indicating that a
+ * sequence number should be added to tunnel header before
+ * sending the packet. This flag was added for GRE
+ * encapsulation, but might be used with other protocols
+ * as well in the future.
+ *
+ * Here is a typical usage on the transmit path:
+ *
+ * ::
+ *
+ * struct bpf_tunnel_key key;
+ * populate key ...
+ * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
+ *
+ * See also the description of the **bpf_skb_get_tunnel_key**\ ()
+ * helper for additional information.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags)
+ * Description
+ * Read the value of a perf event counter. This helper relies on a
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
+ * the perf event counter is selected when *map* is updated with
+ * perf event file descriptors. The *map* is an array whose size
+ * is the number of available CPUs, and each cell contains a value
+ * relative to one CPU. The value to retrieve is indicated by
+ * *flags*, that contains the index of the CPU to look up, masked
+ * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * Note that before Linux 4.13, only hardware perf event can be
+ * retrieved.
+ *
+ * Also, be aware that the newer helper
+ * **bpf_perf_event_read_value**\ () is recommended over
+ * **bpf_perf_event_read**\ () in general. The latter has some ABI
+ * quirks where error and counter value are used as a return code
+ * (which is wrong to do since ranges may overlap). This issue is
+ * fixed with **bpf_perf_event_read_value**\ (), which at the same
+ * time provides more features over the **bpf_perf_event_read**\
+ * () interface. Please refer to the description of
+ * **bpf_perf_event_read_value**\ () for details.
+ * Return
+ * The value of the perf event counter read from the map, or a
+ * negative error code in case of failure.
+ *
+ * int bpf_redirect(u32 ifindex, u64 flags)
+ * Description
+ * Redirect the packet to another net device of index *ifindex*.
+ * This helper is somewhat similar to **bpf_clone_redirect**\
+ * (), except that the packet is not cloned, which provides
+ * increased performance.
+ *
+ * Except for XDP, both ingress and egress interfaces can be used
+ * for redirection. The **BPF_F_INGRESS** value in *flags* is used
+ * to make the distinction (ingress path is selected if the flag
+ * is present, egress path otherwise). Currently, XDP only
+ * supports redirection to the egress interface, and accepts no
+ * flag at all.
+ *
+ * The same effect can be attained with the more generic
+ * **bpf_redirect_map**\ (), which requires specific maps to be
+ * used but offers better performance.
+ * Return
+ * For XDP, the helper returns **XDP_REDIRECT** on success or
+ * **XDP_ABORTED** on error. For other program types, the values
+ * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
+ * error.
+ *
+ * u32 bpf_get_route_realm(struct sk_buff *skb)
+ * Description
+ * Retrieve the realm or the route, that is to say the
+ * **tclassid** field of the destination for the *skb*. The
+ * identifier retrieved is a user-provided tag, similar to the
+ * one used with the net_cls cgroup (see description for
+ * **bpf_get_cgroup_classid**\ () helper), but here this tag is
+ * held by a route (a destination entry), not by a task.
+ *
+ * Retrieving this identifier works with the clsact TC egress hook
+ * (see also **tc-bpf(8)**), or alternatively on conventional
+ * classful egress qdiscs, but not on TC ingress path. In case of
+ * clsact TC egress hook, this has the advantage that, internally,
+ * the destination entry has not been dropped yet in the transmit
+ * path. Therefore, the destination entry does not need to be
+ * artificially held via **netif_keep_dst**\ () for a classful
+ * qdisc until the *skb* is freed.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_IP_ROUTE_CLASSID** configuration option.
+ * Return
+ * The realm of the route for the packet associated to *skb*, or 0
+ * if none was found.
+ *
+ * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * Description
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * The context of the program *ctx* needs also be passed to the
+ * helper.
+ *
+ * On user space, a program willing to read the values needs to
+ * call **perf_event_open**\ () on the perf event (either for
+ * one or for all CPUs) and to store the file descriptor into the
+ * *map*. This must be done before the eBPF program can send data
+ * into it. An example is available in file
+ * *samples/bpf/trace_output_user.c* in the Linux kernel source
+ * tree (the eBPF program counterpart is in
+ * *samples/bpf/trace_output_kern.c*).
+ *
+ * **bpf_perf_event_output**\ () achieves better performance
+ * than **bpf_trace_printk**\ () for sharing data with user
+ * space, and is much better suitable for streaming data from eBPF
+ * programs.
+ *
+ * Note that this helper is not restricted to tracing use cases
+ * and can be used with programs attached to TC or XDP as well,
+ * where it allows for passing data to user space listeners. Data
+ * can be:
+ *
+ * * Only custom structs,
+ * * Only the packet payload, or
+ * * A combination of both.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
+ * Description
+ * This helper was provided as an easy way to load data from a
+ * packet. It can be used to load *len* bytes from *offset* from
+ * the packet associated to *skb*, into the buffer pointed by
+ * *to*.
+ *
+ * Since Linux 4.7, usage of this helper has mostly been replaced
+ * by "direct packet access", enabling packet data to be
+ * manipulated with *skb*\ **->data** and *skb*\ **->data_end**
+ * pointing respectively to the first byte of packet data and to
+ * the byte after the last byte of packet data. However, it
+ * remains useful if one wishes to read large quantities of data
+ * at once from a packet into the eBPF stack.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags)
+ * Description
+ * Walk a user or a kernel stack and return its id. To achieve
+ * this, the helper needs *ctx*, which is a pointer to the context
+ * on which the tracing program is executed, and a pointer to a
+ * *map* of type **BPF_MAP_TYPE_STACK_TRACE**.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * a combination of the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_FAST_STACK_CMP**
+ * Compare stacks by hash only.
+ * **BPF_F_REUSE_STACKID**
+ * If two different stacks hash into the same *stackid*,
+ * discard the old one.
+ *
+ * The stack id retrieved is a 32 bit long integer handle which
+ * can be further combined with other data (including other stack
+ * ids) and used as a key into maps. This can be useful for
+ * generating a variety of graphs (such as flame graphs or off-cpu
+ * graphs).
+ *
+ * For walking a stack, this helper is an improvement over
+ * **bpf_probe_read**\ (), which can be used with unrolled loops
+ * but is not efficient and consumes a lot of eBPF instructions.
+ * Instead, **bpf_get_stackid**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ * Return
+ * The positive or null stack id on success, or a negative error
+ * in case of failure.
+ *
+ * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed)
+ * Description
+ * Compute a checksum difference, from the raw buffer pointed by
+ * *from*, of length *from_size* (that must be a multiple of 4),
+ * towards the raw buffer pointed by *to*, of size *to_size*
+ * (same remark). An optional *seed* can be added to the value
+ * (this can be cascaded, the seed may come from a previous call
+ * to the helper).
+ *
+ * This is flexible enough to be used in several ways:
+ *
+ * * With *from_size* == 0, *to_size* > 0 and *seed* set to
+ * checksum, it can be used when pushing new data.
+ * * With *from_size* > 0, *to_size* == 0 and *seed* set to
+ * checksum, it can be used when removing data from a packet.
+ * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
+ * can be used to compute a diff. Note that *from_size* and
+ * *to_size* do not need to be equal.
+ *
+ * This helper can be used in combination with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
+ * which one can feed in the difference computed with
+ * **bpf_csum_diff**\ ().
+ * Return
+ * The checksum result, or a negative error code in case of
+ * failure.
+ *
+ * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * Description
+ * Retrieve tunnel options metadata for the packet associated to
+ * *skb*, and store the raw tunnel option data to the buffer *opt*
+ * of *size*.
+ *
+ * This helper can be used with encapsulation devices that can
+ * operate in "collect metadata" mode (please refer to the related
+ * note in the description of **bpf_skb_get_tunnel_key**\ () for
+ * more details). A particular example where this can be used is
+ * in combination with the Geneve encapsulation protocol, where it
+ * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
+ * and retrieving arbitrary TLVs (Type-Length-Value headers) from
+ * the eBPF program. This allows for full customization of these
+ * headers.
+ * Return
+ * The size of the option data retrieved.
+ *
+ * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * Description
+ * Set tunnel options metadata for the packet associated to *skb*
+ * to the option data contained in the raw buffer *opt* of *size*.
+ *
+ * See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+ * helper for additional information.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
+ * Description
+ * Change the protocol of the *skb* to *proto*. Currently
+ * supported are transition from IPv4 to IPv6, and from IPv6 to
+ * IPv4. The helper takes care of the groundwork for the
+ * transition, including resizing the socket buffer. The eBPF
+ * program is expected to fill the new headers, if any, via
+ * **skb_store_bytes**\ () and to recompute the checksums with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
+ * (). The main case for this helper is to perform NAT64
+ * operations out of an eBPF program.
+ *
+ * Internally, the GSO type is marked as dodgy so that headers are
+ * checked and segments are recalculated by the GSO/GRO engine.
+ * The size for GSO target is adapted as well.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
+ * Description
+ * Change the packet type for the packet associated to *skb*. This
+ * comes down to setting *skb*\ **->pkt_type** to *type*, except
+ * the eBPF program does not have a write access to *skb*\
+ * **->pkt_type** beside this helper. Using a helper here allows
+ * for graceful handling of errors.
+ *
+ * The major use case is to change incoming *skb*s to
+ * **PACKET_HOST** in a programmatic way instead of having to
+ * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
+ * example.
+ *
+ * Note that *type* only allows certain values. At this time, they
+ * are:
+ *
+ * **PACKET_HOST**
+ * Packet is for us.
+ * **PACKET_BROADCAST**
+ * Send packet to all.
+ * **PACKET_MULTICAST**
+ * Send packet to group.
+ * **PACKET_OTHERHOST**
+ * Send packet to someone else.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
+ * Description
+ * Check whether *skb* is a descendant of the cgroup2 held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * Return
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 0, if the *skb* failed the cgroup2 descendant test.
+ * * 1, if the *skb* succeeded the cgroup2 descendant test.
+ * * A negative error code, if an error occurred.
+ *
+ * u32 bpf_get_hash_recalc(struct sk_buff *skb)
+ * Description
+ * Retrieve the hash of the packet, *skb*\ **->hash**. If it is
+ * not set, in particular if the hash was cleared due to mangling,
+ * recompute this hash. Later accesses to the hash can be done
+ * directly with *skb*\ **->hash**.
+ *
+ * Calling **bpf_set_hash_invalid**\ (), changing a packet
+ * prototype with **bpf_skb_change_proto**\ (), or calling
+ * **bpf_skb_store_bytes**\ () with the
+ * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear
+ * the hash and to trigger a new computation for the next call to
+ * **bpf_get_hash_recalc**\ ().
+ * Return
+ * The 32-bit hash.
+ *
+ * u64 bpf_get_current_task(void)
+ * Return
+ * A pointer to the current task struct.
+ *
+ * int bpf_probe_write_user(void *dst, const void *src, u32 len)
+ * Description
+ * Attempt in a safe way to write *len* bytes from the buffer
+ * *src* to *dst* in memory. It only works for threads that are in
+ * user context, and *dst* must be a valid user space address.
+ *
+ * This helper should not be used to implement any kind of
+ * security mechanism because of TOC-TOU attacks, but rather to
+ * debug, divert, and manipulate execution of semi-cooperative
+ * processes.
+ *
+ * Keep in mind that this feature is meant for experiments, and it
+ * has a risk of crashing the system and running programs.
+ * Therefore, when an eBPF program using this helper is attached,
+ * a warning including PID and process name is printed to kernel
+ * logs.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
+ * Description
+ * Check whether the probe is being run is the context of a given
+ * subset of the cgroup2 hierarchy. The cgroup2 to test is held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * Return
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 0, if the *skb* task belongs to the cgroup2.
+ * * 1, if the *skb* task does not belong to the cgroup2.
+ * * A negative error code, if an error occurred.
+ *
+ * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+ * Description
+ * Resize (trim or grow) the packet associated to *skb* to the
+ * new *len*. The *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * The basic idea is that the helper performs the needed work to
+ * change the size of the packet, then the eBPF program rewrites
+ * the rest via helpers like **bpf_skb_store_bytes**\ (),
+ * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
+ * and others. This helper is a slow path utility intended for
+ * replies with control messages. And because it is targeted for
+ * slow path, the helper itself can afford to be slow: it
+ * implicitly linearizes, unclones and drops offloads from the
+ * *skb*.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
+ * Description
+ * Pull in non-linear data in case the *skb* is non-linear and not
+ * all of *len* are part of the linear section. Make *len* bytes
+ * from *skb* readable and writable. If a zero value is passed for
+ * *len*, then the whole length of the *skb* is pulled.
+ *
+ * This helper is only needed for reading and writing with direct
+ * packet access.
+ *
+ * For direct packet access, testing that offsets to access
+ * are within packet boundaries (test on *skb*\ **->data_end**) is
+ * susceptible to fail if offsets are invalid, or if the requested
+ * data is in non-linear parts of the *skb*. On failure the
+ * program can just bail out, or in the case of a non-linear
+ * buffer, use a helper to make the data available. The
+ * **bpf_skb_load_bytes**\ () helper is a first solution to access
+ * the data. Another one consists in using **bpf_skb_pull_data**
+ * to pull in once the non-linear parts, then retesting and
+ * eventually access the data.
+ *
+ * At the same time, this also makes sure the *skb* is uncloned,
+ * which is a necessary condition for direct write. As this needs
+ * to be an invariant for the write part only, the verifier
+ * detects writes and adds a prologue that is calling
+ * **bpf_skb_pull_data()** to effectively unclone the *skb* from
+ * the very beginning in case it is indeed cloned.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum)
+ * Description
+ * Add the checksum *csum* into *skb*\ **->csum** in case the
+ * driver has supplied a checksum for the entire packet into that
+ * field. Return an error otherwise. This helper is intended to be
+ * used in combination with **bpf_csum_diff**\ (), in particular
+ * when the checksum needs to be updated after data has been
+ * written into the packet through direct packet access.
+ * Return
+ * The checksum on success, or a negative error code in case of
+ * failure.
+ *
+ * void bpf_set_hash_invalid(struct sk_buff *skb)
+ * Description
+ * Invalidate the current *skb*\ **->hash**. It can be used after
+ * mangling on headers through direct packet access, in order to
+ * indicate that the hash is outdated and to trigger a
+ * recalculation the next time the kernel tries to access this
+ * hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *
+ * int bpf_get_numa_node_id(void)
+ * Description
+ * Return the id of the current NUMA node. The primary use case
+ * for this helper is the selection of sockets for the local NUMA
+ * node, when the program is attached to sockets using the
+ * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**),
+ * but the helper is also available to other eBPF program types,
+ * similarly to **bpf_get_smp_processor_id**\ ().
+ * Return
+ * The id of current NUMA node.
+ *
+ * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
+ * Description
+ * Grows headroom of packet associated to *skb* and adjusts the
+ * offset of the MAC header accordingly, adding *len* bytes of
+ * space. It automatically extends and reallocates memory as
+ * required.
+ *
+ * This helper can be used on a layer 3 *skb* to push a MAC header
+ * for redirection into a layer 2 device.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
+ * it is possible to use a negative value for *delta*. This helper
+ * can be used to prepare the packet for pushing or popping
+ * headers.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
+ * Description
+ * Copy a NUL terminated string from an unsafe address
+ * *unsafe_ptr* to *dst*. The *size* should include the
+ * terminating NUL byte. In case the string length is smaller than
+ * *size*, the target is not padded with further NUL bytes. If the
+ * string length is larger than *size*, just *size*-1 bytes are
+ * copied and the last byte is set to NUL.
+ *
+ * On success, the length of the copied string is returned. This
+ * makes this helper useful in tracing programs for reading
+ * strings, and more importantly to get its length at runtime. See
+ * the following snippet:
+ *
+ * ::
+ *
+ * SEC("kprobe/sys_open")
+ * void bpf_sys_open(struct pt_regs *ctx)
+ * {
+ * char buf[PATHLEN]; // PATHLEN is defined to 256
+ * int res = bpf_probe_read_str(buf, sizeof(buf),
+ * ctx->di);
+ *
+ * // Consume buf, for example push it to
+ * // userspace via bpf_perf_event_output(); we
+ * // can use res (the string length) as event
+ * // size, after checking its boundaries.
+ * }
+ *
+ * In comparison, using **bpf_probe_read()** helper here instead
+ * to read the string would require to estimate the length at
+ * compile time, and would often result in copying more memory
+ * than necessary.
+ *
+ * Another useful use case is when parsing individual process
+ * arguments or individual environment variables navigating
+ * *current*\ **->mm->arg_start** and *current*\
+ * **->mm->env_start**: using this helper and the return value,
+ * one can quickly iterate at the right offset of the memory area.
+ * Return
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ *
+ * u64 bpf_get_socket_cookie(struct sk_buff *skb)
+ * Description
+ * If the **struct sk_buff** pointed by *skb* has a known socket,
+ * retrieve the cookie (generated by the kernel) of this socket.
+ * If no cookie has been set yet, generate a new cookie. Once
+ * generated, the socket cookie remains stable for the life of the
+ * socket. This helper can be useful for monitoring per socket
+ * networking traffic statistics as it provides a unique socket
+ * identifier per namespace.
+ * Return
+ * A 8-byte long non-decreasing number on success, or 0 if the
+ * socket field is missing inside *skb*.
+ *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
+ * Description
+ * Equivalent to bpf_get_socket_cookie() helper that accepts
+ * *skb*, but gets socket from **struct bpf_sock_addr** contex.
+ * Return
+ * A 8-byte long non-decreasing number.
+ *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
+ * Description
+ * Equivalent to bpf_get_socket_cookie() helper that accepts
+ * *skb*, but gets socket from **struct bpf_sock_ops** contex.
+ * Return
+ * A 8-byte long non-decreasing number.
+ *
+ * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ * Return
+ * The owner UID of the socket associated to *skb*. If the socket
+ * is **NULL**, or if it is not a full socket (i.e. if it is a
+ * time-wait or a request socket instead), **overflowuid** value
+ * is returned (note that **overflowuid** might also be the actual
+ * UID value for the socket).
+ *
+ * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
+ * Description
+ * Set the full hash for *skb* (set the field *skb*\ **->hash**)
+ * to value *hash*.
+ * Return
+ * 0
+ *
+ * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ * Description
+ * Emulate a call to **setsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **setsockopt(2)** for more information.
+ * The option value of length *optlen* is pointed by *optval*.
+ *
+ * This helper actually implements a subset of **setsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **SOL_SOCKET**, which supports the following *optname*\ s:
+ * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
+ * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ * * **IPPROTO_TCP**, which supports the following *optname*\ s:
+ * **TCP_CONGESTION**, **TCP_BPF_IW**,
+ * **TCP_BPF_SNDCWND_CLAMP**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
+ * Description
+ * Grow or shrink the room for data in the packet associated to
+ * *skb* by *len_diff*, and according to the selected *mode*.
+ *
+ * There is a single supported mode at this time:
+ *
+ * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
+ * (room space is added or removed below the layer 3 header).
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * Redirect the packet to the endpoint referenced by *map* at
+ * index *key*. Depending on its type, this *map* can contain
+ * references to net devices (for forwarding packets through other
+ * ports), or to CPUs (for redirecting XDP frames to another CPU;
+ * but this is only implemented for native XDP (with driver
+ * support) as of this writing).
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * When used to redirect packets to net devices, this helper
+ * provides a high performance increase over **bpf_redirect**\ ().
+ * This is due to various implementation details of the underlying
+ * mechanisms, one of which is the fact that **bpf_redirect_map**\
+ * () tries to send packet as a "bulk" to the device.
+ * Return
+ * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ *
+ * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * Redirect the packet to the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Add an entry to, or update a *map* referencing sockets. The
+ * *skops* is used as a new value for the entry associated to
+ * *key*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * If the *map* has eBPF programs (parser and verdict), those will
+ * be inherited by the socket being added. If the socket is
+ * already attached to eBPF programs, this results in an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust the address pointed by *xdp_md*\ **->data_meta** by
+ * *delta* (which can be positive or negative). Note that this
+ * operation modifies the address stored in *xdp_md*\ **->data**,
+ * so the latter must be loaded only after the helper has been
+ * called.
+ *
+ * The use of *xdp_md*\ **->data_meta** is optional and programs
+ * are not required to use it. The rationale is that when the
+ * packet is processed with XDP (e.g. as DoS filter), it is
+ * possible to push further meta data along with it before passing
+ * to the stack, and to give the guarantee that an ingress eBPF
+ * program attached as a TC classifier on the same device can pick
+ * this up for further post-processing. Since TC works with socket
+ * buffers, it remains possible to set from XDP the **mark** or
+ * **priority** pointers, or other pointers for the socket buffer.
+ * Having this scratch space generic and programmable allows for
+ * more flexibility as the user is free to store whatever meta
+ * data they need.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
+ * Description
+ * Read the value of a perf event counter, and store it into *buf*
+ * of size *buf_size*. This helper relies on a *map* of type
+ * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
+ * counter is selected when *map* is updated with perf event file
+ * descriptors. The *map* is an array whose size is the number of
+ * available CPUs, and each cell contains a value relative to one
+ * CPU. The value to retrieve is indicated by *flags*, that
+ * contains the index of the CPU to look up, masked with
+ * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * This helper behaves in a way close to
+ * **bpf_perf_event_read**\ () helper, save that instead of
+ * just returning the value observed, it fills the *buf*
+ * structure. This allows for additional data to be retrieved: in
+ * particular, the enabled and running times (in *buf*\
+ * **->enabled** and *buf*\ **->running**, respectively) are
+ * copied. In general, **bpf_perf_event_read_value**\ () is
+ * recommended over **bpf_perf_event_read**\ (), which has some
+ * ABI issues and provides fewer functionalities.
+ *
+ * These values are interesting, because hardware PMU (Performance
+ * Monitoring Unit) counters are limited resources. When there are
+ * more PMU based perf events opened than available counters,
+ * kernel will multiplex these events so each event gets certain
+ * percentage (but not all) of the PMU time. In case that
+ * multiplexing happens, the number of samples or counter value
+ * will not reflect the case compared to when no multiplexing
+ * occurs. This makes comparison between different runs difficult.
+ * Typically, the counter value should be normalized before
+ * comparing to other experiments. The usual normalization is done
+ * as follows.
+ *
+ * ::
+ *
+ * normalized_counter = counter * t_enabled / t_running
+ *
+ * Where t_enabled is the time enabled for event and t_running is
+ * the time running for event since last normalization. The
+ * enabled and running times are accumulated since the perf event
+ * open. To achieve scaling factor between two invocations of an
+ * eBPF program, users can can use CPU id as the key (which is
+ * typical for perf array usage model) to remember the previous
+ * value and do the calculation inside the eBPF program.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ * Description
+ * For en eBPF program attached to a perf event, retrieve the
+ * value of the event counter associated to *ctx* and store it in
+ * the structure pointed by *buf* and of size *buf_size*. Enabled
+ * and running times are also stored in the structure (see
+ * description of helper **bpf_perf_event_read_value**\ () for
+ * more details).
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ * Description
+ * Emulate a call to **getsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **getsockopt(2)** for more information.
+ * The retrieved value is stored in the structure pointed by
+ * *opval* and of length *optlen*.
+ *
+ * This helper actually implements a subset of **getsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **IPPROTO_TCP**, which supports *optname*
+ * **TCP_CONGESTION**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_override_return(struct pt_reg *regs, u64 rc)
+ * Description
+ * Used for error injection, this helper uses kprobes to override
+ * the return value of the probed function, and to set it to *rc*.
+ * The first argument is the context *regs* on which the kprobe
+ * works.
+ *
+ * This helper works by setting setting the PC (program counter)
+ * to an override function which is run in place of the original
+ * probed function. This means the probed function is not run at
+ * all. The replacement function just returns with the required
+ * value.
+ *
+ * This helper has security implications, and thus is subject to
+ * restrictions. It is only available if the kernel was compiled
+ * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
+ * option, and in this case it only works on functions tagged with
+ * **ALLOW_ERROR_INJECTION** in the kernel code.
+ *
+ * Also, the helper is only available for the architectures having
+ * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
+ * x86 architecture is the only one to support this feature.
+ * Return
+ * 0
+ *
+ * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
+ * Description
+ * Attempt to set the value of the **bpf_sock_ops_cb_flags** field
+ * for the full TCP socket associated to *bpf_sock_ops* to
+ * *argval*.
+ *
+ * The primary use of this field is to determine if there should
+ * be calls to eBPF programs of type
+ * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP
+ * code. A program of the same type can change its value, per
+ * connection and as necessary, when the connection is
+ * established. This field is directly accessible for reading, but
+ * this helper must be used for updates in order to return an
+ * error if an eBPF program tries to set a callback that is not
+ * supported in the current kernel.
+ *
+ * The supported callback values that *argval* can combine are:
+ *
+ * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
+ * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
+ * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ *
+ * Here are some examples of where one could call such eBPF
+ * program:
+ *
+ * * When RTO fires.
+ * * When a packet is retransmitted.
+ * * When the connection terminates.
+ * * When a packet is sent.
+ * * When a packet is received.
+ * Return
+ * Code **-EINVAL** if the socket is not a full TCP socket;
+ * otherwise, a positive number containing the bits that could not
+ * be set is returned (which comes down to 0 if all bits were set
+ * as required).
+ *
+ * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * socket level. If the message *msg* is allowed to pass (i.e. if
+ * the verdict eBPF program returns **SK_PASS**), redirect it to
+ * the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * Description
+ * For socket policies, apply the verdict of the eBPF program to
+ * the next *bytes* (number of bytes) of message *msg*.
+ *
+ * For example, this helper can be used in the following cases:
+ *
+ * * A single **sendmsg**\ () or **sendfile**\ () system call
+ * contains multiple logical messages that the eBPF program is
+ * supposed to read and for which it should apply a verdict.
+ * * An eBPF program only cares to read the first *bytes* of a
+ * *msg*. If the message has a large payload, then setting up
+ * and calling the eBPF program repeatedly for all bytes, even
+ * though the verdict is already known, would create unnecessary
+ * overhead.
+ *
+ * When called from within an eBPF program, the helper sets a
+ * counter internal to the BPF infrastructure, that is used to
+ * apply the last verdict to the next *bytes*. If *bytes* is
+ * smaller than the current data being processed from a
+ * **sendmsg**\ () or **sendfile**\ () system call, the first
+ * *bytes* will be sent and the eBPF program will be re-run with
+ * the pointer for start of data pointing to byte number *bytes*
+ * **+ 1**. If *bytes* is larger than the current data being
+ * processed, then the eBPF verdict will be applied to multiple
+ * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are
+ * consumed.
+ *
+ * Note that if a socket closes with the internal counter holding
+ * a non-zero value, this is not a problem because data is not
+ * being buffered for *bytes* and is sent as it is received.
+ * Return
+ * 0
+ *
+ * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * Description
+ * For socket policies, prevent the execution of the verdict eBPF
+ * program for message *msg* until *bytes* (byte number) have been
+ * accumulated.
+ *
+ * This can be used when one needs a specific number of bytes
+ * before a verdict can be assigned, even if the data spans
+ * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
+ * case would be a user calling **sendmsg**\ () repeatedly with
+ * 1-byte long message segments. Obviously, this is bad for
+ * performance, but it is still valid. If the eBPF program needs
+ * *bytes* bytes to validate a header, this helper can be used to
+ * prevent the eBPF program to be called again until *bytes* have
+ * been accumulated.
+ * Return
+ * 0
+ *
+ * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+ * Description
+ * For socket policies, pull in non-linear data from user space
+ * for *msg* and set pointers *msg*\ **->data** and *msg*\
+ * **->data_end** to *start* and *end* bytes offsets into *msg*,
+ * respectively.
+ *
+ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ * *msg* it can only parse data that the (**data**, **data_end**)
+ * pointers have already consumed. For **sendmsg**\ () hooks this
+ * is likely the first scatterlist element. But for calls relying
+ * on the **sendpage** handler (e.g. **sendfile**\ ()) this will
+ * be the range (**0**, **0**) because the data is shared with
+ * user space and by default the objective is to avoid allowing
+ * user space to modify data while (or after) eBPF verdict is
+ * being decided. This helper can be used to pull in data and to
+ * set the start and end pointer to given values. Data will be
+ * copied if necessary (i.e. if data was not linear and if start
+ * and end pointers do not point to the same chunk).
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
+ * Description
+ * Bind the socket associated to *ctx* to the address pointed by
+ * *addr*, of length *addr_len*. This allows for making outgoing
+ * connection from the desired IP address, which can be useful for
+ * example when all processes inside a cgroup should use one
+ * single IP address on a host that has multiple IP configured.
+ *
+ * This helper works for IPv4 and IPv6, TCP and UDP sockets. The
+ * domain (*addr*\ **->sa_family**) must be **AF_INET** (or
+ * **AF_INET6**). Looking for a free port to bind to can be
+ * expensive, therefore binding to port is not permitted by the
+ * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
+ * must be set to zero.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
+ * only possible to shrink the packet as of this writing,
+ * therefore *delta* must be a negative integer.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
+ * Description
+ * Retrieve the XFRM state (IP transform framework, see also
+ * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
+ *
+ * The retrieved value is stored in the **struct bpf_xfrm_state**
+ * pointed by *xfrm_state* and of length *size*.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_XFRM** configuration option.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
+ * Description
+ * Return a user or a kernel stack in bpf program provided buffer.
+ * To achieve this, the helper needs *ctx*, which is a pointer
+ * to the context on which the tracing program is executed.
+ * To store the stacktrace, the bpf program provides *buf* with
+ * a nonnegative *size*.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_USER_BUILD_ID**
+ * Collect buildid+offset instead of ips for user stack,
+ * only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ * **bpf_get_stack**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ * to sufficient large buffer size. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ * Return
+ * A non-negative value equal to or less than *size* on success,
+ * or a negative error in case of failure.
+ *
+ * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * Description
+ * This helper is similar to **bpf_skb_load_bytes**\ () in that
+ * it provides an easy way to load *len* bytes from *offset*
+ * from the packet associated to *skb*, into the buffer pointed
+ * by *to*. The difference to **bpf_skb_load_bytes**\ () is that
+ * a fifth argument *start_header* exists in order to select a
+ * base offset to start from. *start_header* can be one of:
+ *
+ * **BPF_HDR_START_MAC**
+ * Base offset to load data from is *skb*'s mac header.
+ * **BPF_HDR_START_NET**
+ * Base offset to load data from is *skb*'s network header.
+ *
+ * In general, "direct packet access" is the preferred method to
+ * access packet data, however, this helper is in particular useful
+ * in socket filters where *skb*\ **->data** does not always point
+ * to the start of the mac header and where "direct packet access"
+ * is not available.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+ * Description
+ * Do FIB lookup in kernel tables using parameters in *params*.
+ * If lookup is successful and result shows packet is to be
+ * forwarded, the neighbor tables are searched for the nexthop.
+ * If successful (ie., FIB lookup shows forwarding and nexthop
+ * is resolved), the nexthop address is returned in ipv4_dst
+ * or ipv6_dst based on family, smac is set to mac address of
+ * egress device, dmac is set to nexthop mac address, rt_metric
+ * is set to metric from route (IPv4/IPv6 only), and ifindex
+ * is set to the device index of the nexthop from the FIB lookup.
+ *
+ * *plen* argument is the size of the passed in struct.
+ * *flags* argument can be a combination of one or more of the
+ * following values:
+ *
+ * **BPF_FIB_LOOKUP_DIRECT**
+ * Do a direct table lookup vs full lookup using FIB
+ * rules.
+ * **BPF_FIB_LOOKUP_OUTPUT**
+ * Perform lookup from an egress perspective (default is
+ * ingress).
+ *
+ * *ctx* is either **struct xdp_md** for XDP programs or
+ * **struct sk_buff** tc cls_act programs.
+ * Return
+ * * < 0 if any input argument is invalid
+ * * 0 on success (packet is forwarded, nexthop neighbor exists)
+ * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
+ * packet is not forwarded or needs assist from full stack
+ *
+ * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Add an entry to, or update a sockhash *map* referencing sockets.
+ * The *skops* is used as a new value for the entry associated to
+ * *key*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * If the *map* has eBPF programs (parser and verdict), those will
+ * be inherited by the socket being added. If the socket is
+ * already attached to eBPF programs, this results in an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * socket level. If the message *msg* is allowed to pass (i.e. if
+ * the verdict eBPF program returns **SK_PASS**), redirect it to
+ * the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
+ * if the verdeict eBPF program returns **SK_PASS**), redirect it
+ * to the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+ * Description
+ * Encapsulate the packet associated to *skb* within a Layer 3
+ * protocol header. This header is provided in the buffer at
+ * address *hdr*, with *len* its size in bytes. *type* indicates
+ * the protocol of the header and can be one of:
+ *
+ * **BPF_LWT_ENCAP_SEG6**
+ * IPv6 encapsulation with Segment Routing Header
+ * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
+ * the IPv6 header is computed by the kernel.
+ * **BPF_LWT_ENCAP_SEG6_INLINE**
+ * Only works if *skb* contains an IPv6 packet. Insert a
+ * Segment Routing Header (**struct ipv6_sr_hdr**) inside
+ * the IPv6 header.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+ * Description
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. Only the flags, tag and TLVs
+ * inside the outermost IPv6 Segment Routing Header can be
+ * modified through this helper.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+ * Description
+ * Adjust the size allocated to TLVs in the outermost IPv6
+ * Segment Routing Header contained in the packet associated to
+ * *skb*, at position *offset* by *delta* bytes. Only offsets
+ * after the segments are accepted. *delta* can be as well
+ * positive (growing) as negative (shrinking).
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+ * Description
+ * Apply an IPv6 Segment Routing action of type *action* to the
+ * packet associated to *skb*. Each action takes a parameter
+ * contained at address *param*, and of length *param_len* bytes.
+ * *action* can be one of:
+ *
+ * **SEG6_LOCAL_ACTION_END_X**
+ * End.X action: Endpoint with Layer-3 cross-connect.
+ * Type of *param*: **struct in6_addr**.
+ * **SEG6_LOCAL_ACTION_END_T**
+ * End.T action: Endpoint with specific IPv6 table lookup.
+ * Type of *param*: **int**.
+ * **SEG6_LOCAL_ACTION_END_B6**
+ * End.B6 action: Endpoint bound to an SRv6 policy.
+ * Type of param: **struct ipv6_sr_hdr**.
+ * **SEG6_LOCAL_ACTION_END_B6_ENCAP**
+ * End.B6.Encap action: Endpoint bound to an SRv6
+ * encapsulation policy.
+ * Type of param: **struct ipv6_sr_hdr**.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ * Description
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded key press with *scancode*,
+ * *toggle* value in the given *protocol*. The scancode will be
+ * translated to a keycode using the rc keymap, and reported as
+ * an input key down event. After a period a key up event is
+ * generated. This period can be extended by calling either
+ * **bpf_rc_keydown**\ () again with the same values, or calling
+ * **bpf_rc_repeat**\ ().
+ *
+ * Some protocols include a toggle bit, in case the button was
+ * released and pressed again between consecutive scancodes.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * The *protocol* is the decoded protocol number (see
+ * **enum rc_proto** for some predefined values).
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ * Return
+ * 0
+ *
+ * int bpf_rc_repeat(void *ctx)
+ * Description
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded repeat key message. This delays
+ * the generation of a key up event for previously generated
+ * key down event.
+ *
+ * Some IR protocols like NEC have a special IR message for
+ * repeating last button, for when a button is held down.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ * Return
+ * 0
+ *
+ * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb)
+ * Description
+ * Return the cgroup v2 id of the socket associated with the *skb*.
+ * This is roughly similar to the **bpf_get_cgroup_classid**\ ()
+ * helper for cgroup v1 by providing a tag resp. identifier that
+ * can be matched on or used for map lookups e.g. to implement
+ * policy. The cgroup v2 id of a given path in the hierarchy is
+ * exposed in user space through the f_handle API in order to get
+ * to the same 64-bit id.
+ *
+ * This helper can be used on TC egress path, but not on ingress,
+ * and is available only if the kernel was compiled with the
+ * **CONFIG_SOCK_CGROUP_DATA** configuration option.
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+ * Description
+ * Return id of cgroup v2 that is ancestor of cgroup associated
+ * with the *skb* at the *ancestor_level*. The root cgroup is at
+ * *ancestor_level* zero and each step down the hierarchy
+ * increments the level. If *ancestor_level* == level of cgroup
+ * associated with *skb*, then return value will be same as that
+ * of **bpf_skb_cgroup_id**\ ().
+ *
+ * The helper is useful to implement policies based on cgroups
+ * that are upper in hierarchy than immediate cgroup associated
+ * with *skb*.
+ *
+ * The format of returned id and helper limitations are same as in
+ * **bpf_skb_cgroup_id**\ ().
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_get_current_cgroup_id(void)
+ * Return
+ * A 64-bit integer containing the current cgroup id based
+ * on the cgroup within which the current task is running.
+ *
+ * void* get_local_storage(void *map, u64 flags)
+ * Description
+ * Get the pointer to the local storage area.
+ * The type and the size of the local storage is defined
+ * by the *map* argument.
+ * The *flags* meaning is specific for each map type,
+ * and has to be 0 for cgroup local storage.
+ *
+ * Depending on the BPF program type, a local storage area
+ * can be shared between multiple instances of the BPF program,
+ * running simultaneously.
+ *
+ * A user should care about the synchronization by himself.
+ * For example, by using the **BPF_STX_XADD** instruction to alter
+ * the shared data.
+ * Return
+ * A pointer to the local storage area.
+ *
+ * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Select a **SO_REUSEPORT** socket from a
+ * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
+ * It checks the selected socket is matching the incoming
+ * request in the socket buffer.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ * Description
+ * Look for TCP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * The *ctx* should point to the context of the program, such as
+ * the skb or socket (depending on the hook in use). This is used
+ * to determine the base network namespace for the lookup.
+ *
+ * *tuple_size* must be one of:
+ *
+ * **sizeof**\ (*tuple*\ **->ipv4**)
+ * Look for an IPv4 socket.
+ * **sizeof**\ (*tuple*\ **->ipv6**)
+ * Look for an IPv6 socket.
+ *
+ * If the *netns* is a negative signed 32-bit integer, then the
+ * socket lookup table in the netns associated with the *ctx* will
+ * will be used. For the TC hooks, this is the netns of the device
+ * in the skb. For socket hooks, this is the netns of the socket.
+ * If *netns* is any other signed 32-bit value greater than or
+ * equal to zero then it specifies the ID of the netns relative to
+ * the netns associated with the *ctx*. *netns* values beyond the
+ * range of 32-bit integers are reserved for future use.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ * Return
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from **reuse->socks**\ [] using the hash of the tuple.
+ *
+ * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ * Description
+ * Look for UDP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * The *ctx* should point to the context of the program, such as
+ * the skb or socket (depending on the hook in use). This is used
+ * to determine the base network namespace for the lookup.
+ *
+ * *tuple_size* must be one of:
+ *
+ * **sizeof**\ (*tuple*\ **->ipv4**)
+ * Look for an IPv4 socket.
+ * **sizeof**\ (*tuple*\ **->ipv6**)
+ * Look for an IPv6 socket.
+ *
+ * If the *netns* is a negative signed 32-bit integer, then the
+ * socket lookup table in the netns associated with the *ctx* will
+ * will be used. For the TC hooks, this is the netns of the device
+ * in the skb. For socket hooks, this is the netns of the socket.
+ * If *netns* is any other signed 32-bit value greater than or
+ * equal to zero then it specifies the ID of the netns relative to
+ * the netns associated with the *ctx*. *netns* values beyond the
+ * range of 32-bit integers are reserved for future use.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ * Return
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from **reuse->socks**\ [] using the hash of the tuple.
+ *
+ * int bpf_sk_release(struct bpf_sock *sock)
+ * Description
+ * Release the reference held by *sock*. *sock* must be a
+ * non-**NULL** pointer that was returned from
+ * **bpf_sk_lookup_xxx**\ ().
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags)
+ * Description
+ * For socket policies, insert *len* bytes into *msg* at offset
+ * *start*.
+ *
+ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ * *msg* it may want to insert metadata or options into the *msg*.
+ * This can later be read and used by any of the lower layer BPF
+ * hooks.
+ *
+ * This helper may fail if under memory pressure (a malloc
+ * fails) in these cases BPF programs will get an appropriate
+ * error and BPF programs will need to handle them.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags)
+ * Description
+ * Will remove *pop* bytes from a *msg* starting at byte *start*.
+ * This may result in **ENOMEM** errors under certain situations if
+ * an allocation and copy are required due to a full ring buffer.
+ * However, the helper will try to avoid doing the allocation
+ * if possible. Other errors can occur if input parameters are
+ * invalid either due to *start* byte not being valid part of *msg*
+ * payload and/or *pop* value being to large.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y)
+ * Description
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded pointer movement.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ * Return
+ * 0
+ */
+#define __BPF_FUNC_MAPPER(FN) \
+ FN(unspec), \
+ FN(map_lookup_elem), \
+ FN(map_update_elem), \
+ FN(map_delete_elem), \
+ FN(probe_read), \
+ FN(ktime_get_ns), \
+ FN(trace_printk), \
+ FN(get_prandom_u32), \
+ FN(get_smp_processor_id), \
+ FN(skb_store_bytes), \
+ FN(l3_csum_replace), \
+ FN(l4_csum_replace), \
+ FN(tail_call), \
+ FN(clone_redirect), \
+ FN(get_current_pid_tgid), \
+ FN(get_current_uid_gid), \
+ FN(get_current_comm), \
+ FN(get_cgroup_classid), \
+ FN(skb_vlan_push), \
+ FN(skb_vlan_pop), \
+ FN(skb_get_tunnel_key), \
+ FN(skb_set_tunnel_key), \
+ FN(perf_event_read), \
+ FN(redirect), \
+ FN(get_route_realm), \
+ FN(perf_event_output), \
+ FN(skb_load_bytes), \
+ FN(get_stackid), \
+ FN(csum_diff), \
+ FN(skb_get_tunnel_opt), \
+ FN(skb_set_tunnel_opt), \
+ FN(skb_change_proto), \
+ FN(skb_change_type), \
+ FN(skb_under_cgroup), \
+ FN(get_hash_recalc), \
+ FN(get_current_task), \
+ FN(probe_write_user), \
+ FN(current_task_under_cgroup), \
+ FN(skb_change_tail), \
+ FN(skb_pull_data), \
+ FN(csum_update), \
+ FN(set_hash_invalid), \
+ FN(get_numa_node_id), \
+ FN(skb_change_head), \
+ FN(xdp_adjust_head), \
+ FN(probe_read_str), \
+ FN(get_socket_cookie), \
+ FN(get_socket_uid), \
+ FN(set_hash), \
+ FN(setsockopt), \
+ FN(skb_adjust_room), \
+ FN(redirect_map), \
+ FN(sk_redirect_map), \
+ FN(sock_map_update), \
+ FN(xdp_adjust_meta), \
+ FN(perf_event_read_value), \
+ FN(perf_prog_read_value), \
+ FN(getsockopt), \
+ FN(override_return), \
+ FN(sock_ops_cb_flags_set), \
+ FN(msg_redirect_map), \
+ FN(msg_apply_bytes), \
+ FN(msg_cork_bytes), \
+ FN(msg_pull_data), \
+ FN(bind), \
+ FN(xdp_adjust_tail), \
+ FN(skb_get_xfrm_state), \
+ FN(get_stack), \
+ FN(skb_load_bytes_relative), \
+ FN(fib_lookup), \
+ FN(sock_hash_update), \
+ FN(msg_redirect_hash), \
+ FN(sk_redirect_hash), \
+ FN(lwt_push_encap), \
+ FN(lwt_seg6_store_bytes), \
+ FN(lwt_seg6_adjust_srh), \
+ FN(lwt_seg6_action), \
+ FN(rc_repeat), \
+ FN(rc_keydown), \
+ FN(skb_cgroup_id), \
+ FN(get_current_cgroup_id), \
+ FN(get_local_storage), \
+ FN(sk_select_reuseport), \
+ FN(skb_ancestor_cgroup_id), \
+ FN(sk_lookup_tcp), \
+ FN(sk_lookup_udp), \
+ FN(sk_release), \
+ FN(map_push_elem), \
+ FN(map_pop_elem), \
+ FN(map_peek_elem), \
+ FN(msg_push_data), \
+ FN(msg_pop_data), \
+ FN(rc_pointer_rel),
+
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
+enum bpf_func_id {
+ __BPF_FUNC_MAPPER(__BPF_ENUM_FN)
+ __BPF_FUNC_MAX_ID,
+};
+#undef __BPF_ENUM_FN
+
+/* All flags used by eBPF helper functions, placed here. */
+
+/* BPF_FUNC_skb_store_bytes flags. */
+#define BPF_F_RECOMPUTE_CSUM (1ULL << 0)
+#define BPF_F_INVALIDATE_HASH (1ULL << 1)
+
+/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
+ * First 4 bits are for passing the header field size.
+ */
+#define BPF_F_HDR_FIELD_MASK 0xfULL
+
+/* BPF_FUNC_l4_csum_replace flags. */
+#define BPF_F_PSEUDO_HDR (1ULL << 4)
+#define BPF_F_MARK_MANGLED_0 (1ULL << 5)
+#define BPF_F_MARK_ENFORCE (1ULL << 6)
+
+/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
+#define BPF_F_INGRESS (1ULL << 0)
+
+/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
+#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
+
+/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
+#define BPF_F_SKIP_FIELD_MASK 0xffULL
+#define BPF_F_USER_STACK (1ULL << 8)
+/* flags used by BPF_FUNC_get_stackid only. */
+#define BPF_F_FAST_STACK_CMP (1ULL << 9)
+#define BPF_F_REUSE_STACKID (1ULL << 10)
+/* flags used by BPF_FUNC_get_stack only. */
+#define BPF_F_USER_BUILD_ID (1ULL << 11)
+
+/* BPF_FUNC_skb_set_tunnel_key flags. */
+#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
+#define BPF_F_DONT_FRAGMENT (1ULL << 2)
+#define BPF_F_SEQ_NUMBER (1ULL << 3)
+
+/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
+ * BPF_FUNC_perf_event_read_value flags.
+ */
+#define BPF_F_INDEX_MASK 0xffffffffULL
+#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
+
+/* Current network namespace */
+#define BPF_F_CURRENT_NETNS (-1L)
+
+/* Mode for BPF_FUNC_skb_adjust_room helper. */
+enum bpf_adj_room_mode {
+ BPF_ADJ_ROOM_NET,
+};
+
+/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
+enum bpf_hdr_start_off {
+ BPF_HDR_START_MAC,
+ BPF_HDR_START_NET,
+};
+
+/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
+enum bpf_lwt_encap_mode {
+ BPF_LWT_ENCAP_SEG6,
+ BPF_LWT_ENCAP_SEG6_INLINE
+};
+
+#define __bpf_md_ptr(type, name) \
+union { \
+ type name; \
+ __u64 :64; \
+} __attribute__((aligned(8)))
+
+/* user accessible mirror of in-kernel sk_buff.
+ * new fields can only be added to the end of this structure
+ */
+struct __sk_buff {
+ __u32 len;
+ __u32 pkt_type;
+ __u32 mark;
+ __u32 queue_mapping;
+ __u32 protocol;
+ __u32 vlan_present;
+ __u32 vlan_tci;
+ __u32 vlan_proto;
+ __u32 priority;
+ __u32 ingress_ifindex;
+ __u32 ifindex;
+ __u32 tc_index;
+ __u32 cb[5];
+ __u32 hash;
+ __u32 tc_classid;
+ __u32 data;
+ __u32 data_end;
+ __u32 napi_id;
+
+ /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+ /* ... here. */
+
+ __u32 data_meta;
+ __bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
+ __u64 tstamp;
+ __u32 wire_len;
+};
+
+struct bpf_tunnel_key {
+ __u32 tunnel_id;
+ union {
+ __u32 remote_ipv4;
+ __u32 remote_ipv6[4];
+ };
+ __u8 tunnel_tos;
+ __u8 tunnel_ttl;
+ __u16 tunnel_ext; /* Padding, future use. */
+ __u32 tunnel_label;
+};
+
+/* user accessible mirror of in-kernel xfrm_state.
+ * new fields can only be added to the end of this structure
+ */
+struct bpf_xfrm_state {
+ __u32 reqid;
+ __u32 spi; /* Stored in network byte order */
+ __u16 family;
+ __u16 ext; /* Padding, future use. */
+ union {
+ __u32 remote_ipv4; /* Stored in network byte order */
+ __u32 remote_ipv6[4]; /* Stored in network byte order */
+ };
+};
+
+/* Generic BPF return codes which all BPF program types may support.
+ * The values are binary compatible with their TC_ACT_* counter-part to
+ * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
+ * programs.
+ *
+ * XDP is handled seprately, see XDP_*.
+ */
+enum bpf_ret_code {
+ BPF_OK = 0,
+ /* 1 reserved */
+ BPF_DROP = 2,
+ /* 3-6 reserved */
+ BPF_REDIRECT = 7,
+ /* >127 are reserved for prog type specific return codes */
+};
+
+struct bpf_sock {
+ __u32 bound_dev_if;
+ __u32 family;
+ __u32 type;
+ __u32 protocol;
+ __u32 mark;
+ __u32 priority;
+ __u32 src_ip4; /* Allows 1,2,4-byte read.
+ * Stored in network byte order.
+ */
+ __u32 src_ip6[4]; /* Allows 1,2,4-byte read.
+ * Stored in network byte order.
+ */
+ __u32 src_port; /* Allows 4-byte read.
+ * Stored in host byte order
+ */
+};
+
+struct bpf_sock_tuple {
+ union {
+ struct {
+ __be32 saddr;
+ __be32 daddr;
+ __be16 sport;
+ __be16 dport;
+ } ipv4;
+ struct {
+ __be32 saddr[4];
+ __be32 daddr[4];
+ __be16 sport;
+ __be16 dport;
+ } ipv6;
+ };
+};
+
+#define XDP_PACKET_HEADROOM 256
+
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will
+ * result in packet drops and a warning via bpf_warn_invalid_xdp_action().
+ */
+enum xdp_action {
+ XDP_ABORTED = 0,
+ XDP_DROP,
+ XDP_PASS,
+ XDP_TX,
+ XDP_REDIRECT,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+ __u32 data;
+ __u32 data_end;
+ __u32 data_meta;
+ /* Below access go through struct xdp_rxq_info */
+ __u32 ingress_ifindex; /* rxq->dev->ifindex */
+ __u32 rx_queue_index; /* rxq->queue_index */
+};
+
+enum sk_action {
+ SK_DROP = 0,
+ SK_PASS,
+};
+
+/* user accessible metadata for SK_MSG packet hook, new fields must
+ * be added to the end of this structure
+ */
+struct sk_msg_md {
+ __bpf_md_ptr(void *, data);
+ __bpf_md_ptr(void *, data_end);
+
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+ __u32 size; /* Total size of sk_msg */
+};
+
+struct sk_reuseport_md {
+ /*
+ * Start of directly accessible data. It begins from
+ * the tcp/udp header.
+ */
+ __bpf_md_ptr(void *, data);
+ /* End of directly accessible data */
+ __bpf_md_ptr(void *, data_end);
+ /*
+ * Total length of packet (starting from the tcp/udp header).
+ * Note that the directly accessible bytes (data_end - data)
+ * could be less than this "len". Those bytes could be
+ * indirectly read by a helper "bpf_skb_load_bytes()".
+ */
+ __u32 len;
+ /*
+ * Eth protocol in the mac header (network byte order). e.g.
+ * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD)
+ */
+ __u32 eth_protocol;
+ __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
+ __u32 bind_inany; /* Is sock bound to an INANY address? */
+ __u32 hash; /* A hash of the packet 4 tuples */
+};
+
+#define BPF_TAG_SIZE 8
+
+struct bpf_prog_info {
+ __u32 type;
+ __u32 id;
+ __u8 tag[BPF_TAG_SIZE];
+ __u32 jited_prog_len;
+ __u32 xlated_prog_len;
+ __aligned_u64 jited_prog_insns;
+ __aligned_u64 xlated_prog_insns;
+ __u64 load_time; /* ns since boottime */
+ __u32 created_by_uid;
+ __u32 nr_map_ids;
+ __aligned_u64 map_ids;
+ char name[BPF_OBJ_NAME_LEN];
+ __u32 ifindex;
+ __u32 gpl_compatible:1;
+ __u64 netns_dev;
+ __u64 netns_ino;
+ __u32 nr_jited_ksyms;
+ __u32 nr_jited_func_lens;
+ __aligned_u64 jited_ksyms;
+ __aligned_u64 jited_func_lens;
+ __u32 btf_id;
+ __u32 func_info_rec_size;
+ __aligned_u64 func_info;
+ __u32 nr_func_info;
+ __u32 nr_line_info;
+ __aligned_u64 line_info;
+ __aligned_u64 jited_line_info;
+ __u32 nr_jited_line_info;
+ __u32 line_info_rec_size;
+ __u32 jited_line_info_rec_size;
+ __u32 nr_prog_tags;
+ __aligned_u64 prog_tags;
+} __attribute__((aligned(8)));
+
+struct bpf_map_info {
+ __u32 type;
+ __u32 id;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 map_flags;
+ char name[BPF_OBJ_NAME_LEN];
+ __u32 ifindex;
+ __u32 :32;
+ __u64 netns_dev;
+ __u64 netns_ino;
+ __u32 btf_id;
+ __u32 btf_key_type_id;
+ __u32 btf_value_type_id;
+} __attribute__((aligned(8)));
+
+struct bpf_btf_info {
+ __aligned_u64 btf;
+ __u32 btf_size;
+ __u32 id;
+} __attribute__((aligned(8)));
+
+/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
+ * by user and intended to be used by socket (e.g. to bind to, depends on
+ * attach attach type).
+ */
+struct bpf_sock_addr {
+ __u32 user_family; /* Allows 4-byte read, but no write. */
+ __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 user_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 user_port; /* Allows 4-byte read and write.
+ * Stored in network byte order
+ */
+ __u32 family; /* Allows 4-byte read, but no write */
+ __u32 type; /* Allows 4-byte read, but no write */
+ __u32 protocol; /* Allows 4-byte read, but no write */
+ __u32 msg_src_ip4; /* Allows 1,2,4-byte read an 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
+ * Stored in network byte order.
+ */
+};
+
+/* User bpf_sock_ops struct to access socket values and specify request ops
+ * and their replies.
+ * Some of this fields are in network (bigendian) byte order and may need
+ * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h).
+ * New fields can only be added at the end of this structure
+ */
+struct bpf_sock_ops {
+ __u32 op;
+ union {
+ __u32 args[4]; /* Optionally passed to bpf program */
+ __u32 reply; /* Returned by bpf program */
+ __u32 replylong[4]; /* Optionally returned by bpf prog */
+ };
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+ __u32 is_fullsock; /* Some TCP fields are only valid if
+ * there is a full socket. If not, the
+ * fields read as zero.
+ */
+ __u32 snd_cwnd;
+ __u32 srtt_us; /* Averaged RTT << 3 in usecs */
+ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
+ __u32 state;
+ __u32 rtt_min;
+ __u32 snd_ssthresh;
+ __u32 rcv_nxt;
+ __u32 snd_nxt;
+ __u32 snd_una;
+ __u32 mss_cache;
+ __u32 ecn_flags;
+ __u32 rate_delivered;
+ __u32 rate_interval_us;
+ __u32 packets_out;
+ __u32 retrans_out;
+ __u32 total_retrans;
+ __u32 segs_in;
+ __u32 data_segs_in;
+ __u32 segs_out;
+ __u32 data_segs_out;
+ __u32 lost_out;
+ __u32 sacked_out;
+ __u32 sk_txhash;
+ __u64 bytes_received;
+ __u64 bytes_acked;
+};
+
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently
+ * supported cb flags
+ */
+
+/* List of known BPF sock_ops operators.
+ * New entries can only be added at the end
+ */
+enum {
+ BPF_SOCK_OPS_VOID,
+ BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or
+ * -1 if default value should be used
+ */
+ BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized
+ * window (in packets) or -1 if default
+ * value should be used
+ */
+ BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an
+ * active connection is initialized
+ */
+ BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an
+ * active connection is
+ * established
+ */
+ BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a
+ * passive connection is
+ * established
+ */
+ BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control
+ * needs ECN
+ */
+ BPF_SOCK_OPS_BASE_RTT, /* Get base RTT. The correct value is
+ * based on the path and may be
+ * dependent on the congestion control
+ * algorithm. In general it indicates
+ * a congestion threshold. RTTs above
+ * this indicate congestion
+ */
+ BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered.
+ * Arg1: value of icsk_retransmits
+ * Arg2: value of icsk_rto
+ * Arg3: whether RTO has expired
+ */
+ BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted.
+ * Arg1: sequence number of 1st byte
+ * Arg2: # segments
+ * Arg3: return value of
+ * tcp_transmit_skb (0 => success)
+ */
+ BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state.
+ * Arg1: old_state
+ * Arg2: new_state
+ */
+ BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after
+ * socket transition to LISTEN state.
+ */
+};
+
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+ BPF_TCP_ESTABLISHED = 1,
+ BPF_TCP_SYN_SENT,
+ BPF_TCP_SYN_RECV,
+ BPF_TCP_FIN_WAIT1,
+ BPF_TCP_FIN_WAIT2,
+ BPF_TCP_TIME_WAIT,
+ BPF_TCP_CLOSE,
+ BPF_TCP_CLOSE_WAIT,
+ BPF_TCP_LAST_ACK,
+ BPF_TCP_LISTEN,
+ BPF_TCP_CLOSING, /* Now a valid state */
+ BPF_TCP_NEW_SYN_RECV,
+
+ BPF_TCP_MAX_STATES /* Leave at the end! */
+};
+
+#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
+#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */
+
+struct bpf_perf_event_value {
+ __u64 counter;
+ __u64 enabled;
+ __u64 running;
+};
+
+#define BPF_DEVCG_ACC_MKNOD (1ULL << 0)
+#define BPF_DEVCG_ACC_READ (1ULL << 1)
+#define BPF_DEVCG_ACC_WRITE (1ULL << 2)
+
+#define BPF_DEVCG_DEV_BLOCK (1ULL << 0)
+#define BPF_DEVCG_DEV_CHAR (1ULL << 1)
+
+struct bpf_cgroup_dev_ctx {
+ /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
+ __u32 access_type;
+ __u32 major;
+ __u32 minor;
+};
+
+struct bpf_raw_tracepoint_args {
+ __u64 args[0];
+};
+
+/* DIRECT: Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT: Do lookup from egress perspective; default is ingress
+ */
+#define BPF_FIB_LOOKUP_DIRECT BIT(0)
+#define BPF_FIB_LOOKUP_OUTPUT BIT(1)
+
+enum {
+ BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */
+ BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */
+ BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */
+ BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */
+ BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */
+ BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */
+ BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
+ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
+ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
+};
+
+struct bpf_fib_lookup {
+ /* input: network family for lookup (AF_INET, AF_INET6)
+ * output: network family of egress nexthop
+ */
+ __u8 family;
+
+ /* set if lookup is to consider L4 data - e.g., FIB rules */
+ __u8 l4_protocol;
+ __be16 sport;
+ __be16 dport;
+
+ /* total length of packet from network header - used for MTU check */
+ __u16 tot_len;
+
+ /* input: L3 device index for lookup
+ * output: device index from FIB lookup
+ */
+ __u32 ifindex;
+
+ union {
+ /* inputs to lookup */
+ __u8 tos; /* AF_INET */
+ __be32 flowinfo; /* AF_INET6, flow_label + priority */
+
+ /* output: metric of fib result (IPv4/IPv6 only) */
+ __u32 rt_metric;
+ };
+
+ union {
+ __be32 ipv4_src;
+ __u32 ipv6_src[4]; /* in6_addr; network order */
+ };
+
+ /* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in
+ * network header. output: bpf_fib_lookup sets to gateway address
+ * if FIB lookup returns gateway route
+ */
+ union {
+ __be32 ipv4_dst;
+ __u32 ipv6_dst[4]; /* in6_addr; network order */
+ };
+
+ /* output */
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ __u8 smac[6]; /* ETH_ALEN */
+ __u8 dmac[6]; /* ETH_ALEN */
+};
+
+enum bpf_task_fd_type {
+ BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_UPROBE, /* filename + offset */
+ BPF_FD_TYPE_URETPROBE, /* filename + offset */
+};
+
+struct bpf_flow_keys {
+ __u16 nhoff;
+ __u16 thoff;
+ __u16 addr_proto; /* ETH_P_* of valid addrs */
+ __u8 is_frag;
+ __u8 is_first_frag;
+ __u8 is_encap;
+ __u8 ip_proto;
+ __be16 n_proto;
+ __be16 sport;
+ __be16 dport;
+ union {
+ struct {
+ __be32 ipv4_src;
+ __be32 ipv4_dst;
+ };
+ struct {
+ __u32 ipv6_src[4]; /* in6_addr; network order */
+ __u32 ipv6_dst[4]; /* in6_addr; network order */
+ };
+ };
+};
+
+struct bpf_func_info {
+ __u32 insn_off;
+ __u32 type_id;
+};
+
+#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10)
+#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff)
+
+struct bpf_line_info {
+ __u32 insn_off;
+ __u32 file_name_off;
+ __u32 line_off;
+ __u32 line_col;
+};
+
+#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/src/shared/linux/bpf_common.h b/src/shared/linux/bpf_common.h
new file mode 100644
index 0000000..ee97668
--- /dev/null
+++ b/src/shared/linux/bpf_common.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_BPF_COMMON_H__
+#define _UAPI__LINUX_BPF_COMMON_H__
+
+/* Instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define BPF_LD 0x00
+#define BPF_LDX 0x01
+#define BPF_ST 0x02
+#define BPF_STX 0x03
+#define BPF_ALU 0x04
+#define BPF_JMP 0x05
+#define BPF_RET 0x06
+#define BPF_MISC 0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code) ((code) & 0x18)
+#define BPF_W 0x00 /* 32-bit */
+#define BPF_H 0x08 /* 16-bit */
+#define BPF_B 0x10 /* 8-bit */
+/* eBPF BPF_DW 0x18 64-bit */
+#define BPF_MODE(code) ((code) & 0xe0)
+#define BPF_IMM 0x00
+#define BPF_ABS 0x20
+#define BPF_IND 0x40
+#define BPF_MEM 0x60
+#define BPF_LEN 0x80
+#define BPF_MSH 0xa0
+
+/* alu/jmp fields */
+#define BPF_OP(code) ((code) & 0xf0)
+#define BPF_ADD 0x00
+#define BPF_SUB 0x10
+#define BPF_MUL 0x20
+#define BPF_DIV 0x30
+#define BPF_OR 0x40
+#define BPF_AND 0x50
+#define BPF_LSH 0x60
+#define BPF_RSH 0x70
+#define BPF_NEG 0x80
+#define BPF_MOD 0x90
+#define BPF_XOR 0xa0
+
+#define BPF_JA 0x00
+#define BPF_JEQ 0x10
+#define BPF_JGT 0x20
+#define BPF_JGE 0x30
+#define BPF_JSET 0x40
+#define BPF_SRC(code) ((code) & 0x08)
+#define BPF_K 0x00
+#define BPF_X 0x08
+
+#ifndef BPF_MAXINSNS
+#define BPF_MAXINSNS 4096
+#endif
+
+#endif /* _UAPI__LINUX_BPF_COMMON_H__ */
diff --git a/src/shared/linux/bpf_insn.h b/src/shared/linux/bpf_insn.h
new file mode 100644
index 0000000..d8d9fb2
--- /dev/null
+++ b/src/shared/linux/bpf_insn.h
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* eBPF instruction mini library */
+#ifndef __BPF_INSN_H
+#define __BPF_INSN_H
+
+struct bpf_insn;
+
+/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
+
+#define BPF_ALU64_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_ALU32_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
+
+#define BPF_ALU64_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_ALU32_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Short form of mov, dst_reg = src_reg */
+
+#define BPF_MOV64_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_MOV32_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* Short form of mov, dst_reg = imm32 */
+
+#define BPF_MOV64_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_MOV32_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+#define BPF_LD_IMM64(DST, IMM) \
+ BPF_LD_IMM64_RAW(DST, 0, IMM)
+
+#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_DW | BPF_IMM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = (__u32) (IMM) }), \
+ ((struct bpf_insn) { \
+ .code = 0, /* zero is reserved opcode */ \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = ((__u64) (IMM)) >> 32 })
+
+#ifndef BPF_PSEUDO_MAP_FD
+# define BPF_PSEUDO_MAP_FD 1
+#endif
+
+/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
+#define BPF_LD_MAP_FD(DST, MAP_FD) \
+ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+
+
+/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
+
+#define BPF_LD_ABS(SIZE, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
+
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
+
+#define BPF_JMP_REG(OP, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_REG(OP, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+#define BPF_JMP_A(OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_JA, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Raw code statement block */
+
+#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = CODE, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Program exit */
+
+#define BPF_EXIT_INSN() \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_EXIT, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = 0 })
+
+#endif
diff --git a/src/shared/linux/dm-ioctl.h b/src/shared/linux/dm-ioctl.h
new file mode 100644
index 0000000..b3aeec7
--- /dev/null
+++ b/src/shared/linux/dm-ioctl.h
@@ -0,0 +1,363 @@
+/* SPDX-License-Identifier: LGPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2001 - 2003 Sistina Software (UK) Limited.
+ * Copyright (C) 2004 - 2009 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef _LINUX_DM_IOCTL_V4_H
+#define _LINUX_DM_IOCTL_V4_H
+
+#include <linux/types.h>
+
+#define DM_DIR "mapper" /* Slashes not supported */
+#define DM_CONTROL_NODE "control"
+#define DM_MAX_TYPE_NAME 16
+#define DM_NAME_LEN 128
+#define DM_UUID_LEN 129
+
+/*
+ * A traditional ioctl interface for the device mapper.
+ *
+ * Each device can have two tables associated with it, an
+ * 'active' table which is the one currently used by io passing
+ * through the device, and an 'inactive' one which is a table
+ * that is being prepared as a replacement for the 'active' one.
+ *
+ * DM_VERSION:
+ * Just get the version information for the ioctl interface.
+ *
+ * DM_REMOVE_ALL:
+ * Remove all dm devices, destroy all tables. Only really used
+ * for debug.
+ *
+ * DM_LIST_DEVICES:
+ * Get a list of all the dm device names.
+ *
+ * DM_DEV_CREATE:
+ * Create a new device, neither the 'active' or 'inactive' table
+ * slots will be filled. The device will be in suspended state
+ * after creation, however any io to the device will get errored
+ * since it will be out-of-bounds.
+ *
+ * DM_DEV_REMOVE:
+ * Remove a device, destroy any tables.
+ *
+ * DM_DEV_RENAME:
+ * Rename a device or set its uuid if none was previously supplied.
+ *
+ * DM_SUSPEND:
+ * This performs both suspend and resume, depending which flag is
+ * passed in.
+ * Suspend: This command will not return until all pending io to
+ * the device has completed. Further io will be deferred until
+ * the device is resumed.
+ * Resume: It is no longer an error to issue this command on an
+ * unsuspended device. If a table is present in the 'inactive'
+ * slot, it will be moved to the active slot, then the old table
+ * from the active slot will be _destroyed_. Finally the device
+ * is resumed.
+ *
+ * DM_DEV_STATUS:
+ * Retrieves the status for the table in the 'active' slot.
+ *
+ * DM_DEV_WAIT:
+ * Wait for a significant event to occur to the device. This
+ * could either be caused by an event triggered by one of the
+ * targets of the table in the 'active' slot, or a table change.
+ *
+ * DM_TABLE_LOAD:
+ * Load a table into the 'inactive' slot for the device. The
+ * device does _not_ need to be suspended prior to this command.
+ *
+ * DM_TABLE_CLEAR:
+ * Destroy any table in the 'inactive' slot (ie. abort).
+ *
+ * DM_TABLE_DEPS:
+ * Return a set of device dependencies for the 'active' table.
+ *
+ * DM_TABLE_STATUS:
+ * Return the targets status for the 'active' table.
+ *
+ * DM_TARGET_MSG:
+ * Pass a message string to the target at a specific offset of a device.
+ *
+ * DM_DEV_SET_GEOMETRY:
+ * Set the geometry of a device by passing in a string in this format:
+ *
+ * "cylinders heads sectors_per_track start_sector"
+ *
+ * Beware that CHS geometry is nearly obsolete and only provided
+ * for compatibility with dm devices that can be booted by a PC
+ * BIOS. See struct hd_geometry for range limits. Also note that
+ * the geometry is erased if the device size changes.
+ */
+
+/*
+ * All ioctl arguments consist of a single chunk of memory, with
+ * this structure at the start. If a uuid is specified any
+ * lookup (eg. for a DM_INFO) will be done on that, *not* the
+ * name.
+ */
+struct dm_ioctl {
+ /*
+ * The version number is made up of three parts:
+ * major - no backward or forward compatibility,
+ * minor - only backwards compatible,
+ * patch - both backwards and forwards compatible.
+ *
+ * All clients of the ioctl interface should fill in the
+ * version number of the interface that they were
+ * compiled with.
+ *
+ * All recognised ioctl commands (ie. those that don't
+ * return -ENOTTY) fill out this field, even if the
+ * command failed.
+ */
+ __u32 version[3]; /* in/out */
+ __u32 data_size; /* total size of data passed in
+ * including this struct */
+
+ __u32 data_start; /* offset to start of data
+ * relative to start of this struct */
+
+ __u32 target_count; /* in/out */
+ __s32 open_count; /* out */
+ __u32 flags; /* in/out */
+
+ /*
+ * event_nr holds either the event number (input and output) or the
+ * udev cookie value (input only).
+ * The DM_DEV_WAIT ioctl takes an event number as input.
+ * The DM_SUSPEND, DM_DEV_REMOVE and DM_DEV_RENAME ioctls
+ * use the field as a cookie to return in the DM_COOKIE
+ * variable with the uevents they issue.
+ * For output, the ioctls return the event number, not the cookie.
+ */
+ __u32 event_nr; /* in/out */
+ __u32 padding;
+
+ __u64 dev; /* in/out */
+
+ char name[DM_NAME_LEN]; /* device name */
+ char uuid[DM_UUID_LEN]; /* unique identifier for
+ * the block device */
+ char data[7]; /* padding or data */
+};
+
+/*
+ * Used to specify tables. These structures appear after the
+ * dm_ioctl.
+ */
+struct dm_target_spec {
+ __u64 sector_start;
+ __u64 length;
+ __s32 status; /* used when reading from kernel only */
+
+ /*
+ * Location of the next dm_target_spec.
+ * - When specifying targets on a DM_TABLE_LOAD command, this value is
+ * the number of bytes from the start of the "current" dm_target_spec
+ * to the start of the "next" dm_target_spec.
+ * - When retrieving targets on a DM_TABLE_STATUS command, this value
+ * is the number of bytes from the start of the first dm_target_spec
+ * (that follows the dm_ioctl struct) to the start of the "next"
+ * dm_target_spec.
+ */
+ __u32 next;
+
+ char target_type[DM_MAX_TYPE_NAME];
+
+ /*
+ * Parameter string starts immediately after this object.
+ * Be careful to add padding after string to ensure correct
+ * alignment of subsequent dm_target_spec.
+ */
+};
+
+/*
+ * Used to retrieve the target dependencies.
+ */
+struct dm_target_deps {
+ __u32 count; /* Array size */
+ __u32 padding; /* unused */
+ __u64 dev[0]; /* out */
+};
+
+/*
+ * Used to get a list of all dm devices.
+ */
+struct dm_name_list {
+ __u64 dev;
+ __u32 next; /* offset to the next record from
+ the _start_ of this */
+ char name[0];
+};
+
+/*
+ * Used to retrieve the target versions
+ */
+struct dm_target_versions {
+ __u32 next;
+ __u32 version[3];
+
+ char name[0];
+};
+
+/*
+ * Used to pass message to a target
+ */
+struct dm_target_msg {
+ __u64 sector; /* Device sector */
+
+ char message[0];
+};
+
+/*
+ * If you change this make sure you make the corresponding change
+ * to dm-ioctl.c:lookup_ioctl()
+ */
+enum {
+ /* Top level cmds */
+ DM_VERSION_CMD = 0,
+ DM_REMOVE_ALL_CMD,
+ DM_LIST_DEVICES_CMD,
+
+ /* device level cmds */
+ DM_DEV_CREATE_CMD,
+ DM_DEV_REMOVE_CMD,
+ DM_DEV_RENAME_CMD,
+ DM_DEV_SUSPEND_CMD,
+ DM_DEV_STATUS_CMD,
+ DM_DEV_WAIT_CMD,
+
+ /* Table level cmds */
+ DM_TABLE_LOAD_CMD,
+ DM_TABLE_CLEAR_CMD,
+ DM_TABLE_DEPS_CMD,
+ DM_TABLE_STATUS_CMD,
+
+ /* Added later */
+ DM_LIST_VERSIONS_CMD,
+ DM_TARGET_MSG_CMD,
+ DM_DEV_SET_GEOMETRY_CMD,
+ DM_DEV_ARM_POLL_CMD,
+};
+
+#define DM_IOCTL 0xfd
+
+#define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
+#define DM_REMOVE_ALL _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
+#define DM_LIST_DEVICES _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, struct dm_ioctl)
+
+#define DM_DEV_CREATE _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl)
+#define DM_DEV_REMOVE _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl)
+#define DM_DEV_RENAME _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl)
+#define DM_DEV_SUSPEND _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl)
+#define DM_DEV_STATUS _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl)
+#define DM_DEV_WAIT _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, struct dm_ioctl)
+#define DM_DEV_ARM_POLL _IOWR(DM_IOCTL, DM_DEV_ARM_POLL_CMD, struct dm_ioctl)
+
+#define DM_TABLE_LOAD _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, struct dm_ioctl)
+#define DM_TABLE_CLEAR _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, struct dm_ioctl)
+#define DM_TABLE_DEPS _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, struct dm_ioctl)
+#define DM_TABLE_STATUS _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, struct dm_ioctl)
+
+#define DM_LIST_VERSIONS _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, struct dm_ioctl)
+
+#define DM_TARGET_MSG _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, struct dm_ioctl)
+#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
+
+#define DM_VERSION_MAJOR 4
+#define DM_VERSION_MINOR 27
+#define DM_VERSION_PATCHLEVEL 0
+#define DM_VERSION_EXTRA "-ioctl (2019-01-18)"
+
+/* Status bits */
+#define DM_READONLY_FLAG (1 << 0) /* In/Out */
+#define DM_SUSPEND_FLAG (1 << 1) /* In/Out */
+#define DM_PERSISTENT_DEV_FLAG (1 << 3) /* In */
+
+/*
+ * Flag passed into ioctl STATUS command to get table information
+ * rather than current status.
+ */
+#define DM_STATUS_TABLE_FLAG (1 << 4) /* In */
+
+/*
+ * Flags that indicate whether a table is present in either of
+ * the two table slots that a device has.
+ */
+#define DM_ACTIVE_PRESENT_FLAG (1 << 5) /* Out */
+#define DM_INACTIVE_PRESENT_FLAG (1 << 6) /* Out */
+
+/*
+ * Indicates that the buffer passed in wasn't big enough for the
+ * results.
+ */
+#define DM_BUFFER_FULL_FLAG (1 << 8) /* Out */
+
+/*
+ * This flag is now ignored.
+ */
+#define DM_SKIP_BDGET_FLAG (1 << 9) /* In */
+
+/*
+ * Set this to avoid attempting to freeze any filesystem when suspending.
+ */
+#define DM_SKIP_LOCKFS_FLAG (1 << 10) /* In */
+
+/*
+ * Set this to suspend without flushing queued ios.
+ * Also disables flushing uncommitted changes in the thin target before
+ * generating statistics for DM_TABLE_STATUS and DM_DEV_WAIT.
+ */
+#define DM_NOFLUSH_FLAG (1 << 11) /* In */
+
+/*
+ * If set, any table information returned will relate to the inactive
+ * table instead of the live one. Always check DM_INACTIVE_PRESENT_FLAG
+ * is set before using the data returned.
+ */
+#define DM_QUERY_INACTIVE_TABLE_FLAG (1 << 12) /* In */
+
+/*
+ * If set, a uevent was generated for which the caller may need to wait.
+ */
+#define DM_UEVENT_GENERATED_FLAG (1 << 13) /* Out */
+
+/*
+ * If set, rename changes the uuid not the name. Only permitted
+ * if no uuid was previously supplied: an existing uuid cannot be changed.
+ */
+#define DM_UUID_FLAG (1 << 14) /* In */
+
+/*
+ * If set, all buffers are wiped after use. Use when sending
+ * or requesting sensitive data such as an encryption key.
+ */
+#define DM_SECURE_DATA_FLAG (1 << 15) /* In */
+
+/*
+ * If set, a message generated output data.
+ */
+#define DM_DATA_OUT_FLAG (1 << 16) /* Out */
+
+/*
+ * If set with DM_DEV_REMOVE or DM_REMOVE_ALL this indicates that if
+ * the device cannot be removed immediately because it is still in use
+ * it should instead be scheduled for removal when it gets closed.
+ *
+ * On return from DM_DEV_REMOVE, DM_DEV_STATUS or other ioctls, this
+ * flag indicates that the device is scheduled to be removed when it
+ * gets closed.
+ */
+#define DM_DEFERRED_REMOVE (1 << 17) /* In/Out */
+
+/*
+ * If set, the device is suspended internally.
+ */
+#define DM_INTERNAL_SUSPEND_FLAG (1 << 18) /* Out */
+
+#endif /* _LINUX_DM_IOCTL_H */
diff --git a/src/shared/linux/ethtool.h b/src/shared/linux/ethtool.h
new file mode 100644
index 0000000..b06c630
--- /dev/null
+++ b/src/shared/linux/ethtool.h
@@ -0,0 +1,2021 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * ethtool.h: Defines for Linux ethtool.
+ *
+ * Copyright (C) 1998 David S. Miller (davem@redhat.com)
+ * Copyright 2001 Jeff Garzik <jgarzik@pobox.com>
+ * Portions Copyright 2001 Sun Microsystems (thockin@sun.com)
+ * Portions Copyright 2002 Intel (eli.kupermann@intel.com,
+ * christopher.leech@intel.com,
+ * scott.feldman@intel.com)
+ * Portions Copyright (C) Sun Microsystems 2008
+ */
+
+#ifndef _UAPI_LINUX_ETHTOOL_H
+#define _UAPI_LINUX_ETHTOOL_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#ifndef __KERNEL__
+#include <limits.h> /* for INT_MAX */
+#endif
+
+#ifndef __KERNEL_DIV_ROUND_UP
+#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+#endif
+
+/* All structures exposed to userland should be defined such that they
+ * have the same layout for 32-bit and 64-bit userland.
+ */
+
+/**
+ * struct ethtool_cmd - DEPRECATED, link control and status
+ * This structure is DEPRECATED, please use struct ethtool_link_settings.
+ * @cmd: Command number = %ETHTOOL_GSET or %ETHTOOL_SSET
+ * @supported: Bitmask of %SUPPORTED_* flags for the link modes,
+ * physical connectors and other link features for which the
+ * interface supports autonegotiation or auto-detection.
+ * Read-only.
+ * @advertising: Bitmask of %ADVERTISED_* flags for the link modes,
+ * physical connectors and other link features that are
+ * advertised through autonegotiation or enabled for
+ * auto-detection.
+ * @speed: Low bits of the speed, 1Mb units, 0 to INT_MAX or SPEED_UNKNOWN
+ * @duplex: Duplex mode; one of %DUPLEX_*
+ * @port: Physical connector type; one of %PORT_*
+ * @phy_address: MDIO address of PHY (transceiver); 0 or 255 if not
+ * applicable. For clause 45 PHYs this is the PRTAD.
+ * @transceiver: Historically used to distinguish different possible
+ * PHY types, but not in a consistent way. Deprecated.
+ * @autoneg: Enable/disable autonegotiation and auto-detection;
+ * either %AUTONEG_DISABLE or %AUTONEG_ENABLE
+ * @mdio_support: Bitmask of %ETH_MDIO_SUPPORTS_* flags for the MDIO
+ * protocols supported by the interface; 0 if unknown.
+ * Read-only.
+ * @maxtxpkt: Historically used to report TX IRQ coalescing; now
+ * obsoleted by &struct ethtool_coalesce. Read-only; deprecated.
+ * @maxrxpkt: Historically used to report RX IRQ coalescing; now
+ * obsoleted by &struct ethtool_coalesce. Read-only; deprecated.
+ * @speed_hi: High bits of the speed, 1Mb units, 0 to INT_MAX or SPEED_UNKNOWN
+ * @eth_tp_mdix: Ethernet twisted-pair MDI(-X) status; one of
+ * %ETH_TP_MDI_*. If the status is unknown or not applicable, the
+ * value will be %ETH_TP_MDI_INVALID. Read-only.
+ * @eth_tp_mdix_ctrl: Ethernet twisted pair MDI(-X) control; one of
+ * %ETH_TP_MDI_*. If MDI(-X) control is not implemented, reads
+ * yield %ETH_TP_MDI_INVALID and writes may be ignored or rejected.
+ * When written successfully, the link should be renegotiated if
+ * necessary.
+ * @lp_advertising: Bitmask of %ADVERTISED_* flags for the link modes
+ * and other link features that the link partner advertised
+ * through autonegotiation; 0 if unknown or not applicable.
+ * Read-only.
+ *
+ * The link speed in Mbps is split between @speed and @speed_hi. Use
+ * the ethtool_cmd_speed() and ethtool_cmd_speed_set() functions to
+ * access it.
+ *
+ * If autonegotiation is disabled, the speed and @duplex represent the
+ * fixed link mode and are writable if the driver supports multiple
+ * link modes. If it is enabled then they are read-only; if the link
+ * is up they represent the negotiated link mode; if the link is down,
+ * the speed is 0, %SPEED_UNKNOWN or the highest enabled speed and
+ * @duplex is %DUPLEX_UNKNOWN or the best enabled duplex mode.
+ *
+ * Some hardware interfaces may have multiple PHYs and/or physical
+ * connectors fitted or do not allow the driver to detect which are
+ * fitted. For these interfaces @port and/or @phy_address may be
+ * writable, possibly dependent on @autoneg being %AUTONEG_DISABLE.
+ * Otherwise, attempts to write different values may be ignored or
+ * rejected.
+ *
+ * Users should assume that all fields not marked read-only are
+ * writable and subject to validation by the driver. They should use
+ * %ETHTOOL_GSET to get the current values before making specific
+ * changes and then applying them with %ETHTOOL_SSET.
+ *
+ * Deprecated fields should be ignored by both users and drivers.
+ */
+struct ethtool_cmd {
+ __u32 cmd;
+ __u32 supported;
+ __u32 advertising;
+ __u16 speed;
+ __u8 duplex;
+ __u8 port;
+ __u8 phy_address;
+ __u8 transceiver;
+ __u8 autoneg;
+ __u8 mdio_support;
+ __u32 maxtxpkt;
+ __u32 maxrxpkt;
+ __u16 speed_hi;
+ __u8 eth_tp_mdix;
+ __u8 eth_tp_mdix_ctrl;
+ __u32 lp_advertising;
+ __u32 reserved[2];
+};
+
+static inline void ethtool_cmd_speed_set(struct ethtool_cmd *ep,
+ __u32 speed)
+{
+ ep->speed = (__u16)(speed & 0xFFFF);
+ ep->speed_hi = (__u16)(speed >> 16);
+}
+
+static inline __u32 ethtool_cmd_speed(const struct ethtool_cmd *ep)
+{
+ return ((__u32) ep->speed_hi << 16) | (__u32) ep->speed;
+}
+
+/* Device supports clause 22 register access to PHY or peripherals
+ * using the interface defined in <linux/mii.h>. This should not be
+ * set if there are known to be no such peripherals present or if
+ * the driver only emulates clause 22 registers for compatibility.
+ */
+#define ETH_MDIO_SUPPORTS_C22 1
+
+/* Device supports clause 45 register access to PHY or peripherals
+ * using the interface defined in <linux/mii.h> and <linux/mdio.h>.
+ * This should not be set if there are known to be no such peripherals
+ * present.
+ */
+#define ETH_MDIO_SUPPORTS_C45 2
+
+#define ETHTOOL_FWVERS_LEN 32
+#define ETHTOOL_BUSINFO_LEN 32
+#define ETHTOOL_EROMVERS_LEN 32
+
+/**
+ * struct ethtool_drvinfo - general driver and device information
+ * @cmd: Command number = %ETHTOOL_GDRVINFO
+ * @driver: Driver short name. This should normally match the name
+ * in its bus driver structure (e.g. pci_driver::name). Must
+ * not be an empty string.
+ * @version: Driver version string; may be an empty string
+ * @fw_version: Firmware version string; may be an empty string
+ * @erom_version: Expansion ROM version string; may be an empty string
+ * @bus_info: Device bus address. This should match the dev_name()
+ * string for the underlying bus device, if there is one. May be
+ * an empty string.
+ * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and
+ * %ETHTOOL_SPFLAGS commands; also the number of strings in the
+ * %ETH_SS_PRIV_FLAGS set
+ * @n_stats: Number of u64 statistics returned by the %ETHTOOL_GSTATS
+ * command; also the number of strings in the %ETH_SS_STATS set
+ * @testinfo_len: Number of results returned by the %ETHTOOL_TEST
+ * command; also the number of strings in the %ETH_SS_TEST set
+ * @eedump_len: Size of EEPROM accessible through the %ETHTOOL_GEEPROM
+ * and %ETHTOOL_SEEPROM commands, in bytes
+ * @regdump_len: Size of register dump returned by the %ETHTOOL_GREGS
+ * command, in bytes
+ *
+ * Users can use the %ETHTOOL_GSSET_INFO command to get the number of
+ * strings in any string set (from Linux 2.6.34).
+ *
+ * Drivers should set at most @driver, @version, @fw_version and
+ * @bus_info in their get_drvinfo() implementation. The ethtool
+ * core fills in the other fields using other driver operations.
+ */
+struct ethtool_drvinfo {
+ __u32 cmd;
+ char driver[32];
+ char version[32];
+ char fw_version[ETHTOOL_FWVERS_LEN];
+ char bus_info[ETHTOOL_BUSINFO_LEN];
+ char erom_version[ETHTOOL_EROMVERS_LEN];
+ char reserved2[12];
+ __u32 n_priv_flags;
+ __u32 n_stats;
+ __u32 testinfo_len;
+ __u32 eedump_len;
+ __u32 regdump_len;
+};
+
+#define SOPASS_MAX 6
+
+/**
+ * struct ethtool_wolinfo - Wake-On-Lan configuration
+ * @cmd: Command number = %ETHTOOL_GWOL or %ETHTOOL_SWOL
+ * @supported: Bitmask of %WAKE_* flags for supported Wake-On-Lan modes.
+ * Read-only.
+ * @wolopts: Bitmask of %WAKE_* flags for enabled Wake-On-Lan modes.
+ * @sopass: SecureOn(tm) password; meaningful only if %WAKE_MAGICSECURE
+ * is set in @wolopts.
+ */
+struct ethtool_wolinfo {
+ __u32 cmd;
+ __u32 supported;
+ __u32 wolopts;
+ __u8 sopass[SOPASS_MAX];
+};
+
+/* for passing single values */
+struct ethtool_value {
+ __u32 cmd;
+ __u32 data;
+};
+
+#define PFC_STORM_PREVENTION_AUTO 0xffff
+#define PFC_STORM_PREVENTION_DISABLE 0
+
+enum tunable_id {
+ ETHTOOL_ID_UNSPEC,
+ ETHTOOL_RX_COPYBREAK,
+ ETHTOOL_TX_COPYBREAK,
+ ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */
+ /*
+ * Add your fresh new tunable attribute above and remember to update
+ * tunable_strings[] in net/core/ethtool.c
+ */
+ __ETHTOOL_TUNABLE_COUNT,
+};
+
+enum tunable_type_id {
+ ETHTOOL_TUNABLE_UNSPEC,
+ ETHTOOL_TUNABLE_U8,
+ ETHTOOL_TUNABLE_U16,
+ ETHTOOL_TUNABLE_U32,
+ ETHTOOL_TUNABLE_U64,
+ ETHTOOL_TUNABLE_STRING,
+ ETHTOOL_TUNABLE_S8,
+ ETHTOOL_TUNABLE_S16,
+ ETHTOOL_TUNABLE_S32,
+ ETHTOOL_TUNABLE_S64,
+};
+
+struct ethtool_tunable {
+ __u32 cmd;
+ __u32 id;
+ __u32 type_id;
+ __u32 len;
+ void *data[0];
+};
+
+#define DOWNSHIFT_DEV_DEFAULT_COUNT 0xff
+#define DOWNSHIFT_DEV_DISABLE 0
+
+/* Time in msecs after which link is reported as down
+ * 0 = lowest time supported by the PHY
+ * 0xff = off, link down detection according to standard
+ */
+#define ETHTOOL_PHY_FAST_LINK_DOWN_ON 0
+#define ETHTOOL_PHY_FAST_LINK_DOWN_OFF 0xff
+
+/* Energy Detect Power Down (EDPD) is a feature supported by some PHYs, where
+ * the PHY's RX & TX blocks are put into a low-power mode when there is no
+ * link detected (typically cable is un-plugged). For RX, only a minimal
+ * link-detection is available, and for TX the PHY wakes up to send link pulses
+ * to avoid any lock-ups in case the peer PHY may also be running in EDPD mode.
+ *
+ * Some PHYs may support configuration of the wake-up interval for TX pulses,
+ * and some PHYs may support only disabling TX pulses entirely. For the latter
+ * a special value is required (ETHTOOL_PHY_EDPD_NO_TX) so that this can be
+ * configured from userspace (should the user want it).
+ *
+ * The interval units for TX wake-up are in milliseconds, since this should
+ * cover a reasonable range of intervals:
+ * - from 1 millisecond, which does not sound like much of a power-saver
+ * - to ~65 seconds which is quite a lot to wait for a link to come up when
+ * plugging a cable
+ */
+#define ETHTOOL_PHY_EDPD_DFLT_TX_MSECS 0xffff
+#define ETHTOOL_PHY_EDPD_NO_TX 0xfffe
+#define ETHTOOL_PHY_EDPD_DISABLE 0
+
+enum phy_tunable_id {
+ ETHTOOL_PHY_ID_UNSPEC,
+ ETHTOOL_PHY_DOWNSHIFT,
+ ETHTOOL_PHY_FAST_LINK_DOWN,
+ ETHTOOL_PHY_EDPD,
+ /*
+ * Add your fresh new phy tunable attribute above and remember to update
+ * phy_tunable_strings[] in net/core/ethtool.c
+ */
+ __ETHTOOL_PHY_TUNABLE_COUNT,
+};
+
+/**
+ * struct ethtool_regs - hardware register dump
+ * @cmd: Command number = %ETHTOOL_GREGS
+ * @version: Dump format version. This is driver-specific and may
+ * distinguish different chips/revisions. Drivers must use new
+ * version numbers whenever the dump format changes in an
+ * incompatible way.
+ * @len: On entry, the real length of @data. On return, the number of
+ * bytes used.
+ * @data: Buffer for the register dump
+ *
+ * Users should use %ETHTOOL_GDRVINFO to find the maximum length of
+ * a register dump for the interface. They must allocate the buffer
+ * immediately following this structure.
+ */
+struct ethtool_regs {
+ __u32 cmd;
+ __u32 version;
+ __u32 len;
+ __u8 data[0];
+};
+
+/**
+ * struct ethtool_eeprom - EEPROM dump
+ * @cmd: Command number = %ETHTOOL_GEEPROM, %ETHTOOL_GMODULEEEPROM or
+ * %ETHTOOL_SEEPROM
+ * @magic: A 'magic cookie' value to guard against accidental changes.
+ * The value passed in to %ETHTOOL_SEEPROM must match the value
+ * returned by %ETHTOOL_GEEPROM for the same device. This is
+ * unused when @cmd is %ETHTOOL_GMODULEEEPROM.
+ * @offset: Offset within the EEPROM to begin reading/writing, in bytes
+ * @len: On entry, number of bytes to read/write. On successful
+ * return, number of bytes actually read/written. In case of
+ * error, this may indicate at what point the error occurred.
+ * @data: Buffer to read/write from
+ *
+ * Users may use %ETHTOOL_GDRVINFO or %ETHTOOL_GMODULEINFO to find
+ * the length of an on-board or module EEPROM, respectively. They
+ * must allocate the buffer immediately following this structure.
+ */
+struct ethtool_eeprom {
+ __u32 cmd;
+ __u32 magic;
+ __u32 offset;
+ __u32 len;
+ __u8 data[0];
+};
+
+/**
+ * struct ethtool_eee - Energy Efficient Ethernet information
+ * @cmd: ETHTOOL_{G,S}EEE
+ * @supported: Mask of %SUPPORTED_* flags for the speed/duplex combinations
+ * for which there is EEE support.
+ * @advertised: Mask of %ADVERTISED_* flags for the speed/duplex combinations
+ * advertised as eee capable.
+ * @lp_advertised: Mask of %ADVERTISED_* flags for the speed/duplex
+ * combinations advertised by the link partner as eee capable.
+ * @eee_active: Result of the eee auto negotiation.
+ * @eee_enabled: EEE configured mode (enabled/disabled).
+ * @tx_lpi_enabled: Whether the interface should assert its tx lpi, given
+ * that eee was negotiated.
+ * @tx_lpi_timer: Time in microseconds the interface delays prior to asserting
+ * its tx lpi (after reaching 'idle' state). Effective only when eee
+ * was negotiated and tx_lpi_enabled was set.
+ */
+struct ethtool_eee {
+ __u32 cmd;
+ __u32 supported;
+ __u32 advertised;
+ __u32 lp_advertised;
+ __u32 eee_active;
+ __u32 eee_enabled;
+ __u32 tx_lpi_enabled;
+ __u32 tx_lpi_timer;
+ __u32 reserved[2];
+};
+
+/**
+ * struct ethtool_modinfo - plugin module eeprom information
+ * @cmd: %ETHTOOL_GMODULEINFO
+ * @type: Standard the module information conforms to %ETH_MODULE_SFF_xxxx
+ * @eeprom_len: Length of the eeprom
+ *
+ * This structure is used to return the information to
+ * properly size memory for a subsequent call to %ETHTOOL_GMODULEEEPROM.
+ * The type code indicates the eeprom data format
+ */
+struct ethtool_modinfo {
+ __u32 cmd;
+ __u32 type;
+ __u32 eeprom_len;
+ __u32 reserved[8];
+};
+
+/**
+ * struct ethtool_coalesce - coalescing parameters for IRQs and stats updates
+ * @cmd: ETHTOOL_{G,S}COALESCE
+ * @rx_coalesce_usecs: How many usecs to delay an RX interrupt after
+ * a packet arrives.
+ * @rx_max_coalesced_frames: Maximum number of packets to receive
+ * before an RX interrupt.
+ * @rx_coalesce_usecs_irq: Same as @rx_coalesce_usecs, except that
+ * this value applies while an IRQ is being serviced by the host.
+ * @rx_max_coalesced_frames_irq: Same as @rx_max_coalesced_frames,
+ * except that this value applies while an IRQ is being serviced
+ * by the host.
+ * @tx_coalesce_usecs: How many usecs to delay a TX interrupt after
+ * a packet is sent.
+ * @tx_max_coalesced_frames: Maximum number of packets to be sent
+ * before a TX interrupt.
+ * @tx_coalesce_usecs_irq: Same as @tx_coalesce_usecs, except that
+ * this value applies while an IRQ is being serviced by the host.
+ * @tx_max_coalesced_frames_irq: Same as @tx_max_coalesced_frames,
+ * except that this value applies while an IRQ is being serviced
+ * by the host.
+ * @stats_block_coalesce_usecs: How many usecs to delay in-memory
+ * statistics block updates. Some drivers do not have an
+ * in-memory statistic block, and in such cases this value is
+ * ignored. This value must not be zero.
+ * @use_adaptive_rx_coalesce: Enable adaptive RX coalescing.
+ * @use_adaptive_tx_coalesce: Enable adaptive TX coalescing.
+ * @pkt_rate_low: Threshold for low packet rate (packets per second).
+ * @rx_coalesce_usecs_low: How many usecs to delay an RX interrupt after
+ * a packet arrives, when the packet rate is below @pkt_rate_low.
+ * @rx_max_coalesced_frames_low: Maximum number of packets to be received
+ * before an RX interrupt, when the packet rate is below @pkt_rate_low.
+ * @tx_coalesce_usecs_low: How many usecs to delay a TX interrupt after
+ * a packet is sent, when the packet rate is below @pkt_rate_low.
+ * @tx_max_coalesced_frames_low: Maximum nuumber of packets to be sent before
+ * a TX interrupt, when the packet rate is below @pkt_rate_low.
+ * @pkt_rate_high: Threshold for high packet rate (packets per second).
+ * @rx_coalesce_usecs_high: How many usecs to delay an RX interrupt after
+ * a packet arrives, when the packet rate is above @pkt_rate_high.
+ * @rx_max_coalesced_frames_high: Maximum number of packets to be received
+ * before an RX interrupt, when the packet rate is above @pkt_rate_high.
+ * @tx_coalesce_usecs_high: How many usecs to delay a TX interrupt after
+ * a packet is sent, when the packet rate is above @pkt_rate_high.
+ * @tx_max_coalesced_frames_high: Maximum number of packets to be sent before
+ * a TX interrupt, when the packet rate is above @pkt_rate_high.
+ * @rate_sample_interval: How often to do adaptive coalescing packet rate
+ * sampling, measured in seconds. Must not be zero.
+ *
+ * Each pair of (usecs, max_frames) fields specifies that interrupts
+ * should be coalesced until
+ * (usecs > 0 && time_since_first_completion >= usecs) ||
+ * (max_frames > 0 && completed_frames >= max_frames)
+ *
+ * It is illegal to set both usecs and max_frames to zero as this
+ * would cause interrupts to never be generated. To disable
+ * coalescing, set usecs = 0 and max_frames = 1.
+ *
+ * Some implementations ignore the value of max_frames and use the
+ * condition time_since_first_completion >= usecs
+ *
+ * This is deprecated. Drivers for hardware that does not support
+ * counting completions should validate that max_frames == !rx_usecs.
+ *
+ * Adaptive RX/TX coalescing is an algorithm implemented by some
+ * drivers to improve latency under low packet rates and improve
+ * throughput under high packet rates. Some drivers only implement
+ * one of RX or TX adaptive coalescing. Anything not implemented by
+ * the driver causes these values to be silently ignored.
+ *
+ * When the packet rate is below @pkt_rate_high but above
+ * @pkt_rate_low (both measured in packets per second) the
+ * normal {rx,tx}_* coalescing parameters are used.
+ */
+struct ethtool_coalesce {
+ __u32 cmd;
+ __u32 rx_coalesce_usecs;
+ __u32 rx_max_coalesced_frames;
+ __u32 rx_coalesce_usecs_irq;
+ __u32 rx_max_coalesced_frames_irq;
+ __u32 tx_coalesce_usecs;
+ __u32 tx_max_coalesced_frames;
+ __u32 tx_coalesce_usecs_irq;
+ __u32 tx_max_coalesced_frames_irq;
+ __u32 stats_block_coalesce_usecs;
+ __u32 use_adaptive_rx_coalesce;
+ __u32 use_adaptive_tx_coalesce;
+ __u32 pkt_rate_low;
+ __u32 rx_coalesce_usecs_low;
+ __u32 rx_max_coalesced_frames_low;
+ __u32 tx_coalesce_usecs_low;
+ __u32 tx_max_coalesced_frames_low;
+ __u32 pkt_rate_high;
+ __u32 rx_coalesce_usecs_high;
+ __u32 rx_max_coalesced_frames_high;
+ __u32 tx_coalesce_usecs_high;
+ __u32 tx_max_coalesced_frames_high;
+ __u32 rate_sample_interval;
+};
+
+/**
+ * struct ethtool_ringparam - RX/TX ring parameters
+ * @cmd: Command number = %ETHTOOL_GRINGPARAM or %ETHTOOL_SRINGPARAM
+ * @rx_max_pending: Maximum supported number of pending entries per
+ * RX ring. Read-only.
+ * @rx_mini_max_pending: Maximum supported number of pending entries
+ * per RX mini ring. Read-only.
+ * @rx_jumbo_max_pending: Maximum supported number of pending entries
+ * per RX jumbo ring. Read-only.
+ * @tx_max_pending: Maximum supported number of pending entries per
+ * TX ring. Read-only.
+ * @rx_pending: Current maximum number of pending entries per RX ring
+ * @rx_mini_pending: Current maximum number of pending entries per RX
+ * mini ring
+ * @rx_jumbo_pending: Current maximum number of pending entries per RX
+ * jumbo ring
+ * @tx_pending: Current maximum supported number of pending entries
+ * per TX ring
+ *
+ * If the interface does not have separate RX mini and/or jumbo rings,
+ * @rx_mini_max_pending and/or @rx_jumbo_max_pending will be 0.
+ *
+ * There may also be driver-dependent minimum values for the number
+ * of entries per ring.
+ */
+struct ethtool_ringparam {
+ __u32 cmd;
+ __u32 rx_max_pending;
+ __u32 rx_mini_max_pending;
+ __u32 rx_jumbo_max_pending;
+ __u32 tx_max_pending;
+ __u32 rx_pending;
+ __u32 rx_mini_pending;
+ __u32 rx_jumbo_pending;
+ __u32 tx_pending;
+};
+
+/**
+ * struct ethtool_channels - configuring number of network channel
+ * @cmd: ETHTOOL_{G,S}CHANNELS
+ * @max_rx: Read only. Maximum number of receive channel the driver support.
+ * @max_tx: Read only. Maximum number of transmit channel the driver support.
+ * @max_other: Read only. Maximum number of other channel the driver support.
+ * @max_combined: Read only. Maximum number of combined channel the driver
+ * support. Set of queues RX, TX or other.
+ * @rx_count: Valid values are in the range 1 to the max_rx.
+ * @tx_count: Valid values are in the range 1 to the max_tx.
+ * @other_count: Valid values are in the range 1 to the max_other.
+ * @combined_count: Valid values are in the range 1 to the max_combined.
+ *
+ * This can be used to configure RX, TX and other channels.
+ */
+
+struct ethtool_channels {
+ __u32 cmd;
+ __u32 max_rx;
+ __u32 max_tx;
+ __u32 max_other;
+ __u32 max_combined;
+ __u32 rx_count;
+ __u32 tx_count;
+ __u32 other_count;
+ __u32 combined_count;
+};
+
+/**
+ * struct ethtool_pauseparam - Ethernet pause (flow control) parameters
+ * @cmd: Command number = %ETHTOOL_GPAUSEPARAM or %ETHTOOL_SPAUSEPARAM
+ * @autoneg: Flag to enable autonegotiation of pause frame use
+ * @rx_pause: Flag to enable reception of pause frames
+ * @tx_pause: Flag to enable transmission of pause frames
+ *
+ * Drivers should reject a non-zero setting of @autoneg when
+ * autoneogotiation is disabled (or not supported) for the link.
+ *
+ * If the link is autonegotiated, drivers should use
+ * mii_advertise_flowctrl() or similar code to set the advertised
+ * pause frame capabilities based on the @rx_pause and @tx_pause flags,
+ * even if @autoneg is zero. They should also allow the advertised
+ * pause frame capabilities to be controlled directly through the
+ * advertising field of &struct ethtool_cmd.
+ *
+ * If @autoneg is non-zero, the MAC is configured to send and/or
+ * receive pause frames according to the result of autonegotiation.
+ * Otherwise, it is configured directly based on the @rx_pause and
+ * @tx_pause flags.
+ */
+struct ethtool_pauseparam {
+ __u32 cmd;
+ __u32 autoneg;
+ __u32 rx_pause;
+ __u32 tx_pause;
+};
+
+/**
+ * enum ethtool_link_ext_state - link extended state
+ */
+enum ethtool_link_ext_state {
+ ETHTOOL_LINK_EXT_STATE_AUTONEG,
+ ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+ ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+ ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY,
+ ETHTOOL_LINK_EXT_STATE_NO_CABLE,
+ ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE,
+ ETHTOOL_LINK_EXT_STATE_EEPROM_ISSUE,
+ ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE,
+ ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED,
+ ETHTOOL_LINK_EXT_STATE_OVERHEAT,
+};
+
+/**
+ * enum ethtool_link_ext_substate_autoneg - more information in addition to
+ * ETHTOOL_LINK_EXT_STATE_AUTONEG.
+ */
+enum ethtool_link_ext_substate_autoneg {
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED = 1,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_NEXT_PAGE_EXCHANGE_FAILED,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED_FORCE_MODE,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_FEC_MISMATCH_DURING_OVERRIDE,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD,
+};
+
+/**
+ * enum ethtool_link_ext_substate_link_training - more information in addition to
+ * ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE.
+ */
+enum ethtool_link_ext_substate_link_training {
+ ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED = 1,
+ ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_INHIBIT_TIMEOUT,
+ ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_PARTNER_DID_NOT_SET_RECEIVER_READY,
+ ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT,
+};
+
+/**
+ * enum ethtool_link_ext_substate_logical_mismatch - more information in addition
+ * to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH.
+ */
+enum ethtool_link_ext_substate_link_logical_mismatch {
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK = 1,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_AM_LOCK,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_GET_ALIGN_STATUS,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_FC_FEC_IS_NOT_LOCKED,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED,
+};
+
+/**
+ * enum ethtool_link_ext_substate_bad_signal_integrity - more information in
+ * addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY.
+ */
+enum ethtool_link_ext_substate_bad_signal_integrity {
+ ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1,
+ ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE,
+};
+
+/**
+ * enum ethtool_link_ext_substate_cable_issue - more information in
+ * addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE.
+ */
+enum ethtool_link_ext_substate_cable_issue {
+ ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE = 1,
+ ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE,
+};
+
+#define ETH_GSTRING_LEN 32
+
+/**
+ * enum ethtool_stringset - string set ID
+ * @ETH_SS_TEST: Self-test result names, for use with %ETHTOOL_TEST
+ * @ETH_SS_STATS: Statistic names, for use with %ETHTOOL_GSTATS
+ * @ETH_SS_PRIV_FLAGS: Driver private flag names, for use with
+ * %ETHTOOL_GPFLAGS and %ETHTOOL_SPFLAGS
+ * @ETH_SS_NTUPLE_FILTERS: Previously used with %ETHTOOL_GRXNTUPLE;
+ * now deprecated
+ * @ETH_SS_FEATURES: Device feature names
+ * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names
+ * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS
+ * @ETH_SS_PHY_TUNABLES: PHY tunable names
+ * @ETH_SS_LINK_MODES: link mode names
+ * @ETH_SS_MSG_CLASSES: debug message class names
+ * @ETH_SS_WOL_MODES: wake-on-lan modes
+ * @ETH_SS_SOF_TIMESTAMPING: SOF_TIMESTAMPING_* flags
+ * @ETH_SS_TS_TX_TYPES: timestamping Tx types
+ * @ETH_SS_TS_RX_FILTERS: timestamping Rx filters
+ * @ETH_SS_UDP_TUNNEL_TYPES: UDP tunnel types
+ */
+enum ethtool_stringset {
+ ETH_SS_TEST = 0,
+ ETH_SS_STATS,
+ ETH_SS_PRIV_FLAGS,
+ ETH_SS_NTUPLE_FILTERS,
+ ETH_SS_FEATURES,
+ ETH_SS_RSS_HASH_FUNCS,
+ ETH_SS_TUNABLES,
+ ETH_SS_PHY_STATS,
+ ETH_SS_PHY_TUNABLES,
+ ETH_SS_LINK_MODES,
+ ETH_SS_MSG_CLASSES,
+ ETH_SS_WOL_MODES,
+ ETH_SS_SOF_TIMESTAMPING,
+ ETH_SS_TS_TX_TYPES,
+ ETH_SS_TS_RX_FILTERS,
+ ETH_SS_UDP_TUNNEL_TYPES,
+
+ /* add new constants above here */
+ ETH_SS_COUNT
+};
+
+/**
+ * struct ethtool_gstrings - string set for data tagging
+ * @cmd: Command number = %ETHTOOL_GSTRINGS
+ * @string_set: String set ID; one of &enum ethtool_stringset
+ * @len: On return, the number of strings in the string set
+ * @data: Buffer for strings. Each string is null-padded to a size of
+ * %ETH_GSTRING_LEN.
+ *
+ * Users must use %ETHTOOL_GSSET_INFO to find the number of strings in
+ * the string set. They must allocate a buffer of the appropriate
+ * size immediately following this structure.
+ */
+struct ethtool_gstrings {
+ __u32 cmd;
+ __u32 string_set;
+ __u32 len;
+ __u8 data[0];
+};
+
+/**
+ * struct ethtool_sset_info - string set information
+ * @cmd: Command number = %ETHTOOL_GSSET_INFO
+ * @sset_mask: On entry, a bitmask of string sets to query, with bits
+ * numbered according to &enum ethtool_stringset. On return, a
+ * bitmask of those string sets queried that are supported.
+ * @data: Buffer for string set sizes. On return, this contains the
+ * size of each string set that was queried and supported, in
+ * order of ID.
+ *
+ * Example: The user passes in @sset_mask = 0x7 (sets 0, 1, 2) and on
+ * return @sset_mask == 0x6 (sets 1, 2). Then @data[0] contains the
+ * size of set 1 and @data[1] contains the size of set 2.
+ *
+ * Users must allocate a buffer of the appropriate size (4 * number of
+ * sets queried) immediately following this structure.
+ */
+struct ethtool_sset_info {
+ __u32 cmd;
+ __u32 reserved;
+ __u64 sset_mask;
+ __u32 data[0];
+};
+
+/**
+ * enum ethtool_test_flags - flags definition of ethtool_test
+ * @ETH_TEST_FL_OFFLINE: if set perform online and offline tests, otherwise
+ * only online tests.
+ * @ETH_TEST_FL_FAILED: Driver set this flag if test fails.
+ * @ETH_TEST_FL_EXTERNAL_LB: Application request to perform external loopback
+ * test.
+ * @ETH_TEST_FL_EXTERNAL_LB_DONE: Driver performed the external loopback test
+ */
+
+enum ethtool_test_flags {
+ ETH_TEST_FL_OFFLINE = (1 << 0),
+ ETH_TEST_FL_FAILED = (1 << 1),
+ ETH_TEST_FL_EXTERNAL_LB = (1 << 2),
+ ETH_TEST_FL_EXTERNAL_LB_DONE = (1 << 3),
+};
+
+/**
+ * struct ethtool_test - device self-test invocation
+ * @cmd: Command number = %ETHTOOL_TEST
+ * @flags: A bitmask of flags from &enum ethtool_test_flags. Some
+ * flags may be set by the user on entry; others may be set by
+ * the driver on return.
+ * @len: On return, the number of test results
+ * @data: Array of test results
+ *
+ * Users must use %ETHTOOL_GSSET_INFO or %ETHTOOL_GDRVINFO to find the
+ * number of test results that will be returned. They must allocate a
+ * buffer of the appropriate size (8 * number of results) immediately
+ * following this structure.
+ */
+struct ethtool_test {
+ __u32 cmd;
+ __u32 flags;
+ __u32 reserved;
+ __u32 len;
+ __u64 data[0];
+};
+
+/**
+ * struct ethtool_stats - device-specific statistics
+ * @cmd: Command number = %ETHTOOL_GSTATS
+ * @n_stats: On return, the number of statistics
+ * @data: Array of statistics
+ *
+ * Users must use %ETHTOOL_GSSET_INFO or %ETHTOOL_GDRVINFO to find the
+ * number of statistics that will be returned. They must allocate a
+ * buffer of the appropriate size (8 * number of statistics)
+ * immediately following this structure.
+ */
+struct ethtool_stats {
+ __u32 cmd;
+ __u32 n_stats;
+ __u64 data[0];
+};
+
+/**
+ * struct ethtool_perm_addr - permanent hardware address
+ * @cmd: Command number = %ETHTOOL_GPERMADDR
+ * @size: On entry, the size of the buffer. On return, the size of the
+ * address. The command fails if the buffer is too small.
+ * @data: Buffer for the address
+ *
+ * Users must allocate the buffer immediately following this structure.
+ * A buffer size of %MAX_ADDR_LEN should be sufficient for any address
+ * type.
+ */
+struct ethtool_perm_addr {
+ __u32 cmd;
+ __u32 size;
+ __u8 data[0];
+};
+
+/* boolean flags controlling per-interface behavior characteristics.
+ * When reading, the flag indicates whether or not a certain behavior
+ * is enabled/present. When writing, the flag indicates whether
+ * or not the driver should turn on (set) or off (clear) a behavior.
+ *
+ * Some behaviors may read-only (unconditionally absent or present).
+ * If such is the case, return EINVAL in the set-flags operation if the
+ * flag differs from the read-only value.
+ */
+enum ethtool_flags {
+ ETH_FLAG_TXVLAN = (1 << 7), /* TX VLAN offload enabled */
+ ETH_FLAG_RXVLAN = (1 << 8), /* RX VLAN offload enabled */
+ ETH_FLAG_LRO = (1 << 15), /* LRO is enabled */
+ ETH_FLAG_NTUPLE = (1 << 27), /* N-tuple filters enabled */
+ ETH_FLAG_RXHASH = (1 << 28),
+};
+
+/* The following structures are for supporting RX network flow
+ * classification and RX n-tuple configuration. Note, all multibyte
+ * fields, e.g., ip4src, ip4dst, psrc, pdst, spi, etc. are expected to
+ * be in network byte order.
+ */
+
+/**
+ * struct ethtool_tcpip4_spec - flow specification for TCP/IPv4 etc.
+ * @ip4src: Source host
+ * @ip4dst: Destination host
+ * @psrc: Source port
+ * @pdst: Destination port
+ * @tos: Type-of-service
+ *
+ * This can be used to specify a TCP/IPv4, UDP/IPv4 or SCTP/IPv4 flow.
+ */
+struct ethtool_tcpip4_spec {
+ __be32 ip4src;
+ __be32 ip4dst;
+ __be16 psrc;
+ __be16 pdst;
+ __u8 tos;
+};
+
+/**
+ * struct ethtool_ah_espip4_spec - flow specification for IPsec/IPv4
+ * @ip4src: Source host
+ * @ip4dst: Destination host
+ * @spi: Security parameters index
+ * @tos: Type-of-service
+ *
+ * This can be used to specify an IPsec transport or tunnel over IPv4.
+ */
+struct ethtool_ah_espip4_spec {
+ __be32 ip4src;
+ __be32 ip4dst;
+ __be32 spi;
+ __u8 tos;
+};
+
+#define ETH_RX_NFC_IP4 1
+
+/**
+ * struct ethtool_usrip4_spec - general flow specification for IPv4
+ * @ip4src: Source host
+ * @ip4dst: Destination host
+ * @l4_4_bytes: First 4 bytes of transport (layer 4) header
+ * @tos: Type-of-service
+ * @ip_ver: Value must be %ETH_RX_NFC_IP4; mask must be 0
+ * @proto: Transport protocol number; mask must be 0
+ */
+struct ethtool_usrip4_spec {
+ __be32 ip4src;
+ __be32 ip4dst;
+ __be32 l4_4_bytes;
+ __u8 tos;
+ __u8 ip_ver;
+ __u8 proto;
+};
+
+/**
+ * struct ethtool_tcpip6_spec - flow specification for TCP/IPv6 etc.
+ * @ip6src: Source host
+ * @ip6dst: Destination host
+ * @psrc: Source port
+ * @pdst: Destination port
+ * @tclass: Traffic Class
+ *
+ * This can be used to specify a TCP/IPv6, UDP/IPv6 or SCTP/IPv6 flow.
+ */
+struct ethtool_tcpip6_spec {
+ __be32 ip6src[4];
+ __be32 ip6dst[4];
+ __be16 psrc;
+ __be16 pdst;
+ __u8 tclass;
+};
+
+/**
+ * struct ethtool_ah_espip6_spec - flow specification for IPsec/IPv6
+ * @ip6src: Source host
+ * @ip6dst: Destination host
+ * @spi: Security parameters index
+ * @tclass: Traffic Class
+ *
+ * This can be used to specify an IPsec transport or tunnel over IPv6.
+ */
+struct ethtool_ah_espip6_spec {
+ __be32 ip6src[4];
+ __be32 ip6dst[4];
+ __be32 spi;
+ __u8 tclass;
+};
+
+/**
+ * struct ethtool_usrip6_spec - general flow specification for IPv6
+ * @ip6src: Source host
+ * @ip6dst: Destination host
+ * @l4_4_bytes: First 4 bytes of transport (layer 4) header
+ * @tclass: Traffic Class
+ * @l4_proto: Transport protocol number (nexthdr after any Extension Headers)
+ */
+struct ethtool_usrip6_spec {
+ __be32 ip6src[4];
+ __be32 ip6dst[4];
+ __be32 l4_4_bytes;
+ __u8 tclass;
+ __u8 l4_proto;
+};
+
+union ethtool_flow_union {
+ struct ethtool_tcpip4_spec tcp_ip4_spec;
+ struct ethtool_tcpip4_spec udp_ip4_spec;
+ struct ethtool_tcpip4_spec sctp_ip4_spec;
+ struct ethtool_ah_espip4_spec ah_ip4_spec;
+ struct ethtool_ah_espip4_spec esp_ip4_spec;
+ struct ethtool_usrip4_spec usr_ip4_spec;
+ struct ethtool_tcpip6_spec tcp_ip6_spec;
+ struct ethtool_tcpip6_spec udp_ip6_spec;
+ struct ethtool_tcpip6_spec sctp_ip6_spec;
+ struct ethtool_ah_espip6_spec ah_ip6_spec;
+ struct ethtool_ah_espip6_spec esp_ip6_spec;
+ struct ethtool_usrip6_spec usr_ip6_spec;
+ struct ethhdr ether_spec;
+ __u8 hdata[52];
+};
+
+/**
+ * struct ethtool_flow_ext - additional RX flow fields
+ * @h_dest: destination MAC address
+ * @vlan_etype: VLAN EtherType
+ * @vlan_tci: VLAN tag control information
+ * @data: user defined data
+ *
+ * Note, @vlan_etype, @vlan_tci, and @data are only valid if %FLOW_EXT
+ * is set in &struct ethtool_rx_flow_spec @flow_type.
+ * @h_dest is valid if %FLOW_MAC_EXT is set.
+ */
+struct ethtool_flow_ext {
+ __u8 padding[2];
+ unsigned char h_dest[ETH_ALEN];
+ __be16 vlan_etype;
+ __be16 vlan_tci;
+ __be32 data[2];
+};
+
+/**
+ * struct ethtool_rx_flow_spec - classification rule for RX flows
+ * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW
+ * @h_u: Flow fields to match (dependent on @flow_type)
+ * @h_ext: Additional fields to match
+ * @m_u: Masks for flow field bits to be matched
+ * @m_ext: Masks for additional field bits to be matched
+ * Note, all additional fields must be ignored unless @flow_type
+ * includes the %FLOW_EXT or %FLOW_MAC_EXT flag
+ * (see &struct ethtool_flow_ext description).
+ * @ring_cookie: RX ring/queue index to deliver to, or %RX_CLS_FLOW_DISC
+ * if packets should be discarded, or %RX_CLS_FLOW_WAKE if the
+ * packets should be used for Wake-on-LAN with %WAKE_FILTER
+ * @location: Location of rule in the table. Locations must be
+ * numbered such that a flow matching multiple rules will be
+ * classified according to the first (lowest numbered) rule.
+ */
+struct ethtool_rx_flow_spec {
+ __u32 flow_type;
+ union ethtool_flow_union h_u;
+ struct ethtool_flow_ext h_ext;
+ union ethtool_flow_union m_u;
+ struct ethtool_flow_ext m_ext;
+ __u64 ring_cookie;
+ __u32 location;
+};
+
+/* How rings are laid out when accessing virtual functions or
+ * offloaded queues is device specific. To allow users to do flow
+ * steering and specify these queues the ring cookie is partitioned
+ * into a 32bit queue index with an 8 bit virtual function id.
+ * This also leaves the 3bytes for further specifiers. It is possible
+ * future devices may support more than 256 virtual functions if
+ * devices start supporting PCIe w/ARI. However at the moment I
+ * do not know of any devices that support this so I do not reserve
+ * space for this at this time. If a future patch consumes the next
+ * byte it should be aware of this possibility.
+ */
+#define ETHTOOL_RX_FLOW_SPEC_RING 0x00000000FFFFFFFFLL
+#define ETHTOOL_RX_FLOW_SPEC_RING_VF 0x000000FF00000000LL
+#define ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF 32
+static inline __u64 ethtool_get_flow_spec_ring(__u64 ring_cookie)
+{
+ return ETHTOOL_RX_FLOW_SPEC_RING & ring_cookie;
+}
+
+static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
+{
+ return (ETHTOOL_RX_FLOW_SPEC_RING_VF & ring_cookie) >>
+ ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF;
+}
+
+/**
+ * struct ethtool_rxnfc - command to get or set RX flow classification rules
+ * @cmd: Specific command number - %ETHTOOL_GRXFH, %ETHTOOL_SRXFH,
+ * %ETHTOOL_GRXRINGS, %ETHTOOL_GRXCLSRLCNT, %ETHTOOL_GRXCLSRULE,
+ * %ETHTOOL_GRXCLSRLALL, %ETHTOOL_SRXCLSRLDEL or %ETHTOOL_SRXCLSRLINS
+ * @flow_type: Type of flow to be affected, e.g. %TCP_V4_FLOW
+ * @data: Command-dependent value
+ * @fs: Flow classification rule
+ * @rss_context: RSS context to be affected
+ * @rule_cnt: Number of rules to be affected
+ * @rule_locs: Array of used rule locations
+ *
+ * For %ETHTOOL_GRXFH and %ETHTOOL_SRXFH, @data is a bitmask indicating
+ * the fields included in the flow hash, e.g. %RXH_IP_SRC. The following
+ * structure fields must not be used, except that if @flow_type includes
+ * the %FLOW_RSS flag, then @rss_context determines which RSS context to
+ * act on.
+ *
+ * For %ETHTOOL_GRXRINGS, @data is set to the number of RX rings/queues
+ * on return.
+ *
+ * For %ETHTOOL_GRXCLSRLCNT, @rule_cnt is set to the number of defined
+ * rules on return. If @data is non-zero on return then it is the
+ * size of the rule table, plus the flag %RX_CLS_LOC_SPECIAL if the
+ * driver supports any special location values. If that flag is not
+ * set in @data then special location values should not be used.
+ *
+ * For %ETHTOOL_GRXCLSRULE, @fs.@location specifies the location of an
+ * existing rule on entry and @fs contains the rule on return; if
+ * @fs.@flow_type includes the %FLOW_RSS flag, then @rss_context is
+ * filled with the RSS context ID associated with the rule.
+ *
+ * For %ETHTOOL_GRXCLSRLALL, @rule_cnt specifies the array size of the
+ * user buffer for @rule_locs on entry. On return, @data is the size
+ * of the rule table, @rule_cnt is the number of defined rules, and
+ * @rule_locs contains the locations of the defined rules. Drivers
+ * must use the second parameter to get_rxnfc() instead of @rule_locs.
+ *
+ * For %ETHTOOL_SRXCLSRLINS, @fs specifies the rule to add or update.
+ * @fs.@location either specifies the location to use or is a special
+ * location value with %RX_CLS_LOC_SPECIAL flag set. On return,
+ * @fs.@location is the actual rule location. If @fs.@flow_type
+ * includes the %FLOW_RSS flag, @rss_context is the RSS context ID to
+ * use for flow spreading traffic which matches this rule. The value
+ * from the rxfh indirection table will be added to @fs.@ring_cookie
+ * to choose which ring to deliver to.
+ *
+ * For %ETHTOOL_SRXCLSRLDEL, @fs.@location specifies the location of an
+ * existing rule on entry.
+ *
+ * A driver supporting the special location values for
+ * %ETHTOOL_SRXCLSRLINS may add the rule at any suitable unused
+ * location, and may remove a rule at a later location (lower
+ * priority) that matches exactly the same set of flows. The special
+ * values are %RX_CLS_LOC_ANY, selecting any location;
+ * %RX_CLS_LOC_FIRST, selecting the first suitable location (maximum
+ * priority); and %RX_CLS_LOC_LAST, selecting the last suitable
+ * location (minimum priority). Additional special values may be
+ * defined in future and drivers must return -%EINVAL for any
+ * unrecognised value.
+ */
+struct ethtool_rxnfc {
+ __u32 cmd;
+ __u32 flow_type;
+ __u64 data;
+ struct ethtool_rx_flow_spec fs;
+ union {
+ __u32 rule_cnt;
+ __u32 rss_context;
+ };
+ __u32 rule_locs[0];
+};
+
+
+/**
+ * struct ethtool_rxfh_indir - command to get or set RX flow hash indirection
+ * @cmd: Specific command number - %ETHTOOL_GRXFHINDIR or %ETHTOOL_SRXFHINDIR
+ * @size: On entry, the array size of the user buffer, which may be zero.
+ * On return from %ETHTOOL_GRXFHINDIR, the array size of the hardware
+ * indirection table.
+ * @ring_index: RX ring/queue index for each hash value
+ *
+ * For %ETHTOOL_GRXFHINDIR, a @size of zero means that only the size
+ * should be returned. For %ETHTOOL_SRXFHINDIR, a @size of zero means
+ * the table should be reset to default values. This last feature
+ * is not supported by the original implementations.
+ */
+struct ethtool_rxfh_indir {
+ __u32 cmd;
+ __u32 size;
+ __u32 ring_index[0];
+};
+
+/**
+ * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key.
+ * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH
+ * @rss_context: RSS context identifier. Context 0 is the default for normal
+ * traffic; other contexts can be referenced as the destination for RX flow
+ * classification rules. %ETH_RXFH_CONTEXT_ALLOC is used with command
+ * %ETHTOOL_SRSSH to allocate a new RSS context; on return this field will
+ * contain the ID of the newly allocated context.
+ * @indir_size: On entry, the array size of the user buffer for the
+ * indirection table, which may be zero, or (for %ETHTOOL_SRSSH),
+ * %ETH_RXFH_INDIR_NO_CHANGE. On return from %ETHTOOL_GRSSH,
+ * the array size of the hardware indirection table.
+ * @key_size: On entry, the array size of the user buffer for the hash key,
+ * which may be zero. On return from %ETHTOOL_GRSSH, the size of the
+ * hardware hash key.
+ * @hfunc: Defines the current RSS hash function used by HW (or to be set to).
+ * Valid values are one of the %ETH_RSS_HASH_*.
+ * @rsvd: Reserved for future extensions.
+ * @rss_config: RX ring/queue index for each hash value i.e., indirection table
+ * of @indir_size __u32 elements, followed by hash key of @key_size
+ * bytes.
+ *
+ * For %ETHTOOL_GRSSH, a @indir_size and key_size of zero means that only the
+ * size should be returned. For %ETHTOOL_SRSSH, an @indir_size of
+ * %ETH_RXFH_INDIR_NO_CHANGE means that indir table setting is not requested
+ * and a @indir_size of zero means the indir table should be reset to default
+ * values (if @rss_context == 0) or that the RSS context should be deleted.
+ * An hfunc of zero means that hash function setting is not requested.
+ */
+struct ethtool_rxfh {
+ __u32 cmd;
+ __u32 rss_context;
+ __u32 indir_size;
+ __u32 key_size;
+ __u8 hfunc;
+ __u8 rsvd8[3];
+ __u32 rsvd32;
+ __u32 rss_config[0];
+};
+#define ETH_RXFH_CONTEXT_ALLOC 0xffffffff
+#define ETH_RXFH_INDIR_NO_CHANGE 0xffffffff
+
+/**
+ * struct ethtool_rx_ntuple_flow_spec - specification for RX flow filter
+ * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW
+ * @h_u: Flow field values to match (dependent on @flow_type)
+ * @m_u: Masks for flow field value bits to be ignored
+ * @vlan_tag: VLAN tag to match
+ * @vlan_tag_mask: Mask for VLAN tag bits to be ignored
+ * @data: Driver-dependent data to match
+ * @data_mask: Mask for driver-dependent data bits to be ignored
+ * @action: RX ring/queue index to deliver to (non-negative) or other action
+ * (negative, e.g. %ETHTOOL_RXNTUPLE_ACTION_DROP)
+ *
+ * For flow types %TCP_V4_FLOW, %UDP_V4_FLOW and %SCTP_V4_FLOW, where
+ * a field value and mask are both zero this is treated as if all mask
+ * bits are set i.e. the field is ignored.
+ */
+struct ethtool_rx_ntuple_flow_spec {
+ __u32 flow_type;
+ union {
+ struct ethtool_tcpip4_spec tcp_ip4_spec;
+ struct ethtool_tcpip4_spec udp_ip4_spec;
+ struct ethtool_tcpip4_spec sctp_ip4_spec;
+ struct ethtool_ah_espip4_spec ah_ip4_spec;
+ struct ethtool_ah_espip4_spec esp_ip4_spec;
+ struct ethtool_usrip4_spec usr_ip4_spec;
+ struct ethhdr ether_spec;
+ __u8 hdata[72];
+ } h_u, m_u;
+
+ __u16 vlan_tag;
+ __u16 vlan_tag_mask;
+ __u64 data;
+ __u64 data_mask;
+
+ __s32 action;
+#define ETHTOOL_RXNTUPLE_ACTION_DROP (-1) /* drop packet */
+#define ETHTOOL_RXNTUPLE_ACTION_CLEAR (-2) /* clear filter */
+};
+
+/**
+ * struct ethtool_rx_ntuple - command to set or clear RX flow filter
+ * @cmd: Command number - %ETHTOOL_SRXNTUPLE
+ * @fs: Flow filter specification
+ */
+struct ethtool_rx_ntuple {
+ __u32 cmd;
+ struct ethtool_rx_ntuple_flow_spec fs;
+};
+
+#define ETHTOOL_FLASH_MAX_FILENAME 128
+enum ethtool_flash_op_type {
+ ETHTOOL_FLASH_ALL_REGIONS = 0,
+};
+
+/* for passing firmware flashing related parameters */
+struct ethtool_flash {
+ __u32 cmd;
+ __u32 region;
+ char data[ETHTOOL_FLASH_MAX_FILENAME];
+};
+
+/**
+ * struct ethtool_dump - used for retrieving, setting device dump
+ * @cmd: Command number - %ETHTOOL_GET_DUMP_FLAG, %ETHTOOL_GET_DUMP_DATA, or
+ * %ETHTOOL_SET_DUMP
+ * @version: FW version of the dump, filled in by driver
+ * @flag: driver dependent flag for dump setting, filled in by driver during
+ * get and filled in by ethtool for set operation.
+ * flag must be initialized by macro ETH_FW_DUMP_DISABLE value when
+ * firmware dump is disabled.
+ * @len: length of dump data, used as the length of the user buffer on entry to
+ * %ETHTOOL_GET_DUMP_DATA and this is returned as dump length by driver
+ * for %ETHTOOL_GET_DUMP_FLAG command
+ * @data: data collected for get dump data operation
+ */
+struct ethtool_dump {
+ __u32 cmd;
+ __u32 version;
+ __u32 flag;
+ __u32 len;
+ __u8 data[0];
+};
+
+#define ETH_FW_DUMP_DISABLE 0
+
+/* for returning and changing feature sets */
+
+/**
+ * struct ethtool_get_features_block - block with state of 32 features
+ * @available: mask of changeable features
+ * @requested: mask of features requested to be enabled if possible
+ * @active: mask of currently enabled features
+ * @never_changed: mask of features not changeable for any device
+ */
+struct ethtool_get_features_block {
+ __u32 available;
+ __u32 requested;
+ __u32 active;
+ __u32 never_changed;
+};
+
+/**
+ * struct ethtool_gfeatures - command to get state of device's features
+ * @cmd: command number = %ETHTOOL_GFEATURES
+ * @size: On entry, the number of elements in the features[] array;
+ * on return, the number of elements in features[] needed to hold
+ * all features
+ * @features: state of features
+ */
+struct ethtool_gfeatures {
+ __u32 cmd;
+ __u32 size;
+ struct ethtool_get_features_block features[0];
+};
+
+/**
+ * struct ethtool_set_features_block - block with request for 32 features
+ * @valid: mask of features to be changed
+ * @requested: values of features to be changed
+ */
+struct ethtool_set_features_block {
+ __u32 valid;
+ __u32 requested;
+};
+
+/**
+ * struct ethtool_sfeatures - command to request change in device's features
+ * @cmd: command number = %ETHTOOL_SFEATURES
+ * @size: array size of the features[] array
+ * @features: feature change masks
+ */
+struct ethtool_sfeatures {
+ __u32 cmd;
+ __u32 size;
+ struct ethtool_set_features_block features[0];
+};
+
+/**
+ * struct ethtool_ts_info - holds a device's timestamping and PHC association
+ * @cmd: command number = %ETHTOOL_GET_TS_INFO
+ * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags
+ * @phc_index: device index of the associated PHC, or -1 if there is none
+ * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values
+ * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values
+ *
+ * The bits in the 'tx_types' and 'rx_filters' fields correspond to
+ * the 'hwtstamp_tx_types' and 'hwtstamp_rx_filters' enumeration values,
+ * respectively. For example, if the device supports HWTSTAMP_TX_ON,
+ * then (1 << HWTSTAMP_TX_ON) in 'tx_types' will be set.
+ *
+ * Drivers should only report the filters they actually support without
+ * upscaling in the SIOCSHWTSTAMP ioctl. If the SIOCSHWSTAMP request for
+ * HWTSTAMP_FILTER_V1_SYNC is supported by HWTSTAMP_FILTER_V1_EVENT, then the
+ * driver should only report HWTSTAMP_FILTER_V1_EVENT in this op.
+ */
+struct ethtool_ts_info {
+ __u32 cmd;
+ __u32 so_timestamping;
+ __s32 phc_index;
+ __u32 tx_types;
+ __u32 tx_reserved[3];
+ __u32 rx_filters;
+ __u32 rx_reserved[3];
+};
+
+/*
+ * %ETHTOOL_SFEATURES changes features present in features[].valid to the
+ * values of corresponding bits in features[].requested. Bits in .requested
+ * not set in .valid or not changeable are ignored.
+ *
+ * Returns %EINVAL when .valid contains undefined or never-changeable bits
+ * or size is not equal to required number of features words (32-bit blocks).
+ * Returns >= 0 if request was completed; bits set in the value mean:
+ * %ETHTOOL_F_UNSUPPORTED - there were bits set in .valid that are not
+ * changeable (not present in %ETHTOOL_GFEATURES' features[].available)
+ * those bits were ignored.
+ * %ETHTOOL_F_WISH - some or all changes requested were recorded but the
+ * resulting state of bits masked by .valid is not equal to .requested.
+ * Probably there are other device-specific constraints on some features
+ * in the set. When %ETHTOOL_F_UNSUPPORTED is set, .valid is considered
+ * here as though ignored bits were cleared.
+ * %ETHTOOL_F_COMPAT - some or all changes requested were made by calling
+ * compatibility functions. Requested offload state cannot be properly
+ * managed by kernel.
+ *
+ * Meaning of bits in the masks are obtained by %ETHTOOL_GSSET_INFO (number of
+ * bits in the arrays - always multiple of 32) and %ETHTOOL_GSTRINGS commands
+ * for ETH_SS_FEATURES string set. First entry in the table corresponds to least
+ * significant bit in features[0] fields. Empty strings mark undefined features.
+ */
+enum ethtool_sfeatures_retval_bits {
+ ETHTOOL_F_UNSUPPORTED__BIT,
+ ETHTOOL_F_WISH__BIT,
+ ETHTOOL_F_COMPAT__BIT,
+};
+
+#define ETHTOOL_F_UNSUPPORTED (1 << ETHTOOL_F_UNSUPPORTED__BIT)
+#define ETHTOOL_F_WISH (1 << ETHTOOL_F_WISH__BIT)
+#define ETHTOOL_F_COMPAT (1 << ETHTOOL_F_COMPAT__BIT)
+
+#define MAX_NUM_QUEUE 4096
+
+/**
+ * struct ethtool_per_queue_op - apply sub command to the queues in mask.
+ * @cmd: ETHTOOL_PERQUEUE
+ * @sub_command: the sub command which apply to each queues
+ * @queue_mask: Bitmap of the queues which sub command apply to
+ * @data: A complete command structure following for each of the queues addressed
+ */
+struct ethtool_per_queue_op {
+ __u32 cmd;
+ __u32 sub_command;
+ __u32 queue_mask[__KERNEL_DIV_ROUND_UP(MAX_NUM_QUEUE, 32)];
+ char data[];
+};
+
+/**
+ * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters
+ * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM
+ * @active_fec: FEC mode which is active on porte
+ * @fec: Bitmask of supported/configured FEC modes
+ * @rsvd: Reserved for future extensions. i.e FEC bypass feature.
+ *
+ * Drivers should reject a non-zero setting of @autoneg when
+ * autoneogotiation is disabled (or not supported) for the link.
+ *
+ */
+struct ethtool_fecparam {
+ __u32 cmd;
+ /* bitmask of FEC modes */
+ __u32 active_fec;
+ __u32 fec;
+ __u32 reserved;
+};
+
+/**
+ * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration
+ * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported
+ * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver
+ * @ETHTOOL_FEC_OFF: No FEC Mode
+ * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode
+ * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode
+ */
+enum ethtool_fec_config_bits {
+ ETHTOOL_FEC_NONE_BIT,
+ ETHTOOL_FEC_AUTO_BIT,
+ ETHTOOL_FEC_OFF_BIT,
+ ETHTOOL_FEC_RS_BIT,
+ ETHTOOL_FEC_BASER_BIT,
+ ETHTOOL_FEC_LLRS_BIT,
+};
+
+#define ETHTOOL_FEC_NONE (1 << ETHTOOL_FEC_NONE_BIT)
+#define ETHTOOL_FEC_AUTO (1 << ETHTOOL_FEC_AUTO_BIT)
+#define ETHTOOL_FEC_OFF (1 << ETHTOOL_FEC_OFF_BIT)
+#define ETHTOOL_FEC_RS (1 << ETHTOOL_FEC_RS_BIT)
+#define ETHTOOL_FEC_BASER (1 << ETHTOOL_FEC_BASER_BIT)
+#define ETHTOOL_FEC_LLRS (1 << ETHTOOL_FEC_LLRS_BIT)
+
+/* CMDs currently supported */
+#define ETHTOOL_GSET 0x00000001 /* DEPRECATED, Get settings.
+ * Please use ETHTOOL_GLINKSETTINGS
+ */
+#define ETHTOOL_SSET 0x00000002 /* DEPRECATED, Set settings.
+ * Please use ETHTOOL_SLINKSETTINGS
+ */
+#define ETHTOOL_GDRVINFO 0x00000003 /* Get driver info. */
+#define ETHTOOL_GREGS 0x00000004 /* Get NIC registers. */
+#define ETHTOOL_GWOL 0x00000005 /* Get wake-on-lan options. */
+#define ETHTOOL_SWOL 0x00000006 /* Set wake-on-lan options. */
+#define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */
+#define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level. */
+#define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation. */
+/* Get link status for host, i.e. whether the interface *and* the
+ * physical port (if there is one) are up (ethtool_value). */
+#define ETHTOOL_GLINK 0x0000000a
+#define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */
+#define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data. */
+#define ETHTOOL_GCOALESCE 0x0000000e /* Get coalesce config */
+#define ETHTOOL_SCOALESCE 0x0000000f /* Set coalesce config. */
+#define ETHTOOL_GRINGPARAM 0x00000010 /* Get ring parameters */
+#define ETHTOOL_SRINGPARAM 0x00000011 /* Set ring parameters. */
+#define ETHTOOL_GPAUSEPARAM 0x00000012 /* Get pause parameters */
+#define ETHTOOL_SPAUSEPARAM 0x00000013 /* Set pause parameters. */
+#define ETHTOOL_GRXCSUM 0x00000014 /* Get RX hw csum enable (ethtool_value) */
+#define ETHTOOL_SRXCSUM 0x00000015 /* Set RX hw csum enable (ethtool_value) */
+#define ETHTOOL_GTXCSUM 0x00000016 /* Get TX hw csum enable (ethtool_value) */
+#define ETHTOOL_STXCSUM 0x00000017 /* Set TX hw csum enable (ethtool_value) */
+#define ETHTOOL_GSG 0x00000018 /* Get scatter-gather enable
+ * (ethtool_value) */
+#define ETHTOOL_SSG 0x00000019 /* Set scatter-gather enable
+ * (ethtool_value). */
+#define ETHTOOL_TEST 0x0000001a /* execute NIC self-test. */
+#define ETHTOOL_GSTRINGS 0x0000001b /* get specified string set */
+#define ETHTOOL_PHYS_ID 0x0000001c /* identify the NIC */
+#define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */
+#define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */
+#define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */
+#define ETHTOOL_GPERMADDR 0x00000020 /* Get permanent hardware address */
+#define ETHTOOL_GUFO 0x00000021 /* Get UFO enable (ethtool_value) */
+#define ETHTOOL_SUFO 0x00000022 /* Set UFO enable (ethtool_value) */
+#define ETHTOOL_GGSO 0x00000023 /* Get GSO enable (ethtool_value) */
+#define ETHTOOL_SGSO 0x00000024 /* Set GSO enable (ethtool_value) */
+#define ETHTOOL_GFLAGS 0x00000025 /* Get flags bitmap(ethtool_value) */
+#define ETHTOOL_SFLAGS 0x00000026 /* Set flags bitmap(ethtool_value) */
+#define ETHTOOL_GPFLAGS 0x00000027 /* Get driver-private flags bitmap */
+#define ETHTOOL_SPFLAGS 0x00000028 /* Set driver-private flags bitmap */
+
+#define ETHTOOL_GRXFH 0x00000029 /* Get RX flow hash configuration */
+#define ETHTOOL_SRXFH 0x0000002a /* Set RX flow hash configuration */
+#define ETHTOOL_GGRO 0x0000002b /* Get GRO enable (ethtool_value) */
+#define ETHTOOL_SGRO 0x0000002c /* Set GRO enable (ethtool_value) */
+#define ETHTOOL_GRXRINGS 0x0000002d /* Get RX rings available for LB */
+#define ETHTOOL_GRXCLSRLCNT 0x0000002e /* Get RX class rule count */
+#define ETHTOOL_GRXCLSRULE 0x0000002f /* Get RX classification rule */
+#define ETHTOOL_GRXCLSRLALL 0x00000030 /* Get all RX classification rule */
+#define ETHTOOL_SRXCLSRLDEL 0x00000031 /* Delete RX classification rule */
+#define ETHTOOL_SRXCLSRLINS 0x00000032 /* Insert RX classification rule */
+#define ETHTOOL_FLASHDEV 0x00000033 /* Flash firmware to device */
+#define ETHTOOL_RESET 0x00000034 /* Reset hardware */
+#define ETHTOOL_SRXNTUPLE 0x00000035 /* Add an n-tuple filter to device */
+#define ETHTOOL_GRXNTUPLE 0x00000036 /* deprecated */
+#define ETHTOOL_GSSET_INFO 0x00000037 /* Get string set info */
+#define ETHTOOL_GRXFHINDIR 0x00000038 /* Get RX flow hash indir'n table */
+#define ETHTOOL_SRXFHINDIR 0x00000039 /* Set RX flow hash indir'n table */
+
+#define ETHTOOL_GFEATURES 0x0000003a /* Get device offload settings */
+#define ETHTOOL_SFEATURES 0x0000003b /* Change device offload settings */
+#define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */
+#define ETHTOOL_SCHANNELS 0x0000003d /* Set no of channels */
+#define ETHTOOL_SET_DUMP 0x0000003e /* Set dump settings */
+#define ETHTOOL_GET_DUMP_FLAG 0x0000003f /* Get dump settings */
+#define ETHTOOL_GET_DUMP_DATA 0x00000040 /* Get dump data */
+#define ETHTOOL_GET_TS_INFO 0x00000041 /* Get time stamping and PHC info */
+#define ETHTOOL_GMODULEINFO 0x00000042 /* Get plug-in module information */
+#define ETHTOOL_GMODULEEEPROM 0x00000043 /* Get plug-in module eeprom */
+#define ETHTOOL_GEEE 0x00000044 /* Get EEE settings */
+#define ETHTOOL_SEEE 0x00000045 /* Set EEE settings */
+
+#define ETHTOOL_GRSSH 0x00000046 /* Get RX flow hash configuration */
+#define ETHTOOL_SRSSH 0x00000047 /* Set RX flow hash configuration */
+#define ETHTOOL_GTUNABLE 0x00000048 /* Get tunable configuration */
+#define ETHTOOL_STUNABLE 0x00000049 /* Set tunable configuration */
+#define ETHTOOL_GPHYSTATS 0x0000004a /* get PHY-specific statistics */
+
+#define ETHTOOL_PERQUEUE 0x0000004b /* Set per queue options */
+
+#define ETHTOOL_GLINKSETTINGS 0x0000004c /* Get ethtool_link_settings */
+#define ETHTOOL_SLINKSETTINGS 0x0000004d /* Set ethtool_link_settings */
+#define ETHTOOL_PHY_GTUNABLE 0x0000004e /* Get PHY tunable configuration */
+#define ETHTOOL_PHY_STUNABLE 0x0000004f /* Set PHY tunable configuration */
+#define ETHTOOL_GFECPARAM 0x00000050 /* Get FEC settings */
+#define ETHTOOL_SFECPARAM 0x00000051 /* Set FEC settings */
+
+/* compatibility with older code */
+#define SPARC_ETH_GSET ETHTOOL_GSET
+#define SPARC_ETH_SSET ETHTOOL_SSET
+
+/* Link mode bit indices */
+enum ethtool_link_mode_bit_indices {
+ ETHTOOL_LINK_MODE_10baseT_Half_BIT = 0,
+ ETHTOOL_LINK_MODE_10baseT_Full_BIT = 1,
+ ETHTOOL_LINK_MODE_100baseT_Half_BIT = 2,
+ ETHTOOL_LINK_MODE_100baseT_Full_BIT = 3,
+ ETHTOOL_LINK_MODE_1000baseT_Half_BIT = 4,
+ ETHTOOL_LINK_MODE_1000baseT_Full_BIT = 5,
+ ETHTOOL_LINK_MODE_Autoneg_BIT = 6,
+ ETHTOOL_LINK_MODE_TP_BIT = 7,
+ ETHTOOL_LINK_MODE_AUI_BIT = 8,
+ ETHTOOL_LINK_MODE_MII_BIT = 9,
+ ETHTOOL_LINK_MODE_FIBRE_BIT = 10,
+ ETHTOOL_LINK_MODE_BNC_BIT = 11,
+ ETHTOOL_LINK_MODE_10000baseT_Full_BIT = 12,
+ ETHTOOL_LINK_MODE_Pause_BIT = 13,
+ ETHTOOL_LINK_MODE_Asym_Pause_BIT = 14,
+ ETHTOOL_LINK_MODE_2500baseX_Full_BIT = 15,
+ ETHTOOL_LINK_MODE_Backplane_BIT = 16,
+ ETHTOOL_LINK_MODE_1000baseKX_Full_BIT = 17,
+ ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT = 18,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT = 19,
+ ETHTOOL_LINK_MODE_10000baseR_FEC_BIT = 20,
+ ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT = 21,
+ ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT = 22,
+ ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT = 23,
+ ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT = 24,
+ ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT = 25,
+ ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT = 26,
+ ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT = 27,
+ ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT = 28,
+ ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT = 29,
+ ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT = 30,
+ ETHTOOL_LINK_MODE_25000baseCR_Full_BIT = 31,
+
+ /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit
+ * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_*
+ * macro for bits > 31. The only way to use indices > 31 is to
+ * use the new ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API.
+ */
+
+ ETHTOOL_LINK_MODE_25000baseKR_Full_BIT = 32,
+ ETHTOOL_LINK_MODE_25000baseSR_Full_BIT = 33,
+ ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT = 34,
+ ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT = 35,
+ ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT = 36,
+ ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT = 37,
+ ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT = 38,
+ ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT = 39,
+ ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT = 40,
+ ETHTOOL_LINK_MODE_1000baseX_Full_BIT = 41,
+ ETHTOOL_LINK_MODE_10000baseCR_Full_BIT = 42,
+ ETHTOOL_LINK_MODE_10000baseSR_Full_BIT = 43,
+ ETHTOOL_LINK_MODE_10000baseLR_Full_BIT = 44,
+ ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT = 45,
+ ETHTOOL_LINK_MODE_10000baseER_Full_BIT = 46,
+ ETHTOOL_LINK_MODE_2500baseT_Full_BIT = 47,
+ ETHTOOL_LINK_MODE_5000baseT_Full_BIT = 48,
+
+ ETHTOOL_LINK_MODE_FEC_NONE_BIT = 49,
+ ETHTOOL_LINK_MODE_FEC_RS_BIT = 50,
+ ETHTOOL_LINK_MODE_FEC_BASER_BIT = 51,
+ ETHTOOL_LINK_MODE_50000baseKR_Full_BIT = 52,
+ ETHTOOL_LINK_MODE_50000baseSR_Full_BIT = 53,
+ ETHTOOL_LINK_MODE_50000baseCR_Full_BIT = 54,
+ ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT = 55,
+ ETHTOOL_LINK_MODE_50000baseDR_Full_BIT = 56,
+ ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT = 57,
+ ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT = 58,
+ ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT = 59,
+ ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT = 60,
+ ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT = 61,
+ ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT = 62,
+ ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT = 63,
+ ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT = 64,
+ ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT = 65,
+ ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT = 66,
+ ETHTOOL_LINK_MODE_100baseT1_Full_BIT = 67,
+ ETHTOOL_LINK_MODE_1000baseT1_Full_BIT = 68,
+ ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT = 69,
+ ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT = 70,
+ ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT = 71,
+ ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT = 72,
+ ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT = 73,
+ ETHTOOL_LINK_MODE_FEC_LLRS_BIT = 74,
+ ETHTOOL_LINK_MODE_100000baseKR_Full_BIT = 75,
+ ETHTOOL_LINK_MODE_100000baseSR_Full_BIT = 76,
+ ETHTOOL_LINK_MODE_100000baseLR_ER_FR_Full_BIT = 77,
+ ETHTOOL_LINK_MODE_100000baseCR_Full_BIT = 78,
+ ETHTOOL_LINK_MODE_100000baseDR_Full_BIT = 79,
+ ETHTOOL_LINK_MODE_200000baseKR2_Full_BIT = 80,
+ ETHTOOL_LINK_MODE_200000baseSR2_Full_BIT = 81,
+ ETHTOOL_LINK_MODE_200000baseLR2_ER2_FR2_Full_BIT = 82,
+ ETHTOOL_LINK_MODE_200000baseDR2_Full_BIT = 83,
+ ETHTOOL_LINK_MODE_200000baseCR2_Full_BIT = 84,
+ ETHTOOL_LINK_MODE_400000baseKR4_Full_BIT = 85,
+ ETHTOOL_LINK_MODE_400000baseSR4_Full_BIT = 86,
+ ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT = 87,
+ ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT = 88,
+ ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT = 89,
+ /* must be last entry */
+ __ETHTOOL_LINK_MODE_MASK_NBITS
+};
+
+#define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \
+ (1UL << (ETHTOOL_LINK_MODE_ ## base_name ## _BIT))
+
+/* DEPRECATED macros. Please migrate to
+ * ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. Please do NOT
+ * define any new SUPPORTED_* macro for bits > 31.
+ */
+#define SUPPORTED_10baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Half)
+#define SUPPORTED_10baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Full)
+#define SUPPORTED_100baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Half)
+#define SUPPORTED_100baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Full)
+#define SUPPORTED_1000baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Half)
+#define SUPPORTED_1000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Full)
+#define SUPPORTED_Autoneg __ETHTOOL_LINK_MODE_LEGACY_MASK(Autoneg)
+#define SUPPORTED_TP __ETHTOOL_LINK_MODE_LEGACY_MASK(TP)
+#define SUPPORTED_AUI __ETHTOOL_LINK_MODE_LEGACY_MASK(AUI)
+#define SUPPORTED_MII __ETHTOOL_LINK_MODE_LEGACY_MASK(MII)
+#define SUPPORTED_FIBRE __ETHTOOL_LINK_MODE_LEGACY_MASK(FIBRE)
+#define SUPPORTED_BNC __ETHTOOL_LINK_MODE_LEGACY_MASK(BNC)
+#define SUPPORTED_10000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseT_Full)
+#define SUPPORTED_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Pause)
+#define SUPPORTED_Asym_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Asym_Pause)
+#define SUPPORTED_2500baseX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(2500baseX_Full)
+#define SUPPORTED_Backplane __ETHTOOL_LINK_MODE_LEGACY_MASK(Backplane)
+#define SUPPORTED_1000baseKX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseKX_Full)
+#define SUPPORTED_10000baseKX4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKX4_Full)
+#define SUPPORTED_10000baseKR_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKR_Full)
+#define SUPPORTED_10000baseR_FEC __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseR_FEC)
+#define SUPPORTED_20000baseMLD2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseMLD2_Full)
+#define SUPPORTED_20000baseKR2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseKR2_Full)
+#define SUPPORTED_40000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseKR4_Full)
+#define SUPPORTED_40000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseCR4_Full)
+#define SUPPORTED_40000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseSR4_Full)
+#define SUPPORTED_40000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseLR4_Full)
+#define SUPPORTED_56000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseKR4_Full)
+#define SUPPORTED_56000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseCR4_Full)
+#define SUPPORTED_56000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseSR4_Full)
+#define SUPPORTED_56000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseLR4_Full)
+/* Please do not define any new SUPPORTED_* macro for bits > 31, see
+ * notice above.
+ */
+
+/*
+ * DEPRECATED macros. Please migrate to
+ * ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. Please do NOT
+ * define any new ADERTISE_* macro for bits > 31.
+ */
+#define ADVERTISED_10baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Half)
+#define ADVERTISED_10baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Full)
+#define ADVERTISED_100baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Half)
+#define ADVERTISED_100baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Full)
+#define ADVERTISED_1000baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Half)
+#define ADVERTISED_1000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Full)
+#define ADVERTISED_Autoneg __ETHTOOL_LINK_MODE_LEGACY_MASK(Autoneg)
+#define ADVERTISED_TP __ETHTOOL_LINK_MODE_LEGACY_MASK(TP)
+#define ADVERTISED_AUI __ETHTOOL_LINK_MODE_LEGACY_MASK(AUI)
+#define ADVERTISED_MII __ETHTOOL_LINK_MODE_LEGACY_MASK(MII)
+#define ADVERTISED_FIBRE __ETHTOOL_LINK_MODE_LEGACY_MASK(FIBRE)
+#define ADVERTISED_BNC __ETHTOOL_LINK_MODE_LEGACY_MASK(BNC)
+#define ADVERTISED_10000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseT_Full)
+#define ADVERTISED_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Pause)
+#define ADVERTISED_Asym_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Asym_Pause)
+#define ADVERTISED_2500baseX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(2500baseX_Full)
+#define ADVERTISED_Backplane __ETHTOOL_LINK_MODE_LEGACY_MASK(Backplane)
+#define ADVERTISED_1000baseKX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseKX_Full)
+#define ADVERTISED_10000baseKX4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKX4_Full)
+#define ADVERTISED_10000baseKR_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKR_Full)
+#define ADVERTISED_10000baseR_FEC __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseR_FEC)
+#define ADVERTISED_20000baseMLD2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseMLD2_Full)
+#define ADVERTISED_20000baseKR2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseKR2_Full)
+#define ADVERTISED_40000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseKR4_Full)
+#define ADVERTISED_40000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseCR4_Full)
+#define ADVERTISED_40000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseSR4_Full)
+#define ADVERTISED_40000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseLR4_Full)
+#define ADVERTISED_56000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseKR4_Full)
+#define ADVERTISED_56000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseCR4_Full)
+#define ADVERTISED_56000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseSR4_Full)
+#define ADVERTISED_56000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseLR4_Full)
+/* Please do not define any new ADVERTISED_* macro for bits > 31, see
+ * notice above.
+ */
+
+/* The following are all involved in forcing a particular link
+ * mode for the device for setting things. When getting the
+ * devices settings, these indicate the current mode and whether
+ * it was forced up into this mode or autonegotiated.
+ */
+
+/* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal.
+ * Update drivers/net/phy/phy.c:phy_speed_to_str() and
+ * drivers/net/bonding/bond_3ad.c:__get_link_speed() when adding new values.
+ */
+#define SPEED_10 10
+#define SPEED_100 100
+#define SPEED_1000 1000
+#define SPEED_2500 2500
+#define SPEED_5000 5000
+#define SPEED_10000 10000
+#define SPEED_14000 14000
+#define SPEED_20000 20000
+#define SPEED_25000 25000
+#define SPEED_40000 40000
+#define SPEED_50000 50000
+#define SPEED_56000 56000
+#define SPEED_100000 100000
+#define SPEED_200000 200000
+#define SPEED_400000 400000
+
+#define SPEED_UNKNOWN -1
+
+static inline int ethtool_validate_speed(__u32 speed)
+{
+ return speed <= INT_MAX || speed == (__u32)SPEED_UNKNOWN;
+}
+
+/* Duplex, half or full. */
+#define DUPLEX_HALF 0x00
+#define DUPLEX_FULL 0x01
+#define DUPLEX_UNKNOWN 0xff
+
+static inline int ethtool_validate_duplex(__u8 duplex)
+{
+ switch (duplex) {
+ case DUPLEX_HALF:
+ case DUPLEX_FULL:
+ case DUPLEX_UNKNOWN:
+ return 1;
+ }
+
+ return 0;
+}
+
+#define MASTER_SLAVE_CFG_UNSUPPORTED 0
+#define MASTER_SLAVE_CFG_UNKNOWN 1
+#define MASTER_SLAVE_CFG_MASTER_PREFERRED 2
+#define MASTER_SLAVE_CFG_SLAVE_PREFERRED 3
+#define MASTER_SLAVE_CFG_MASTER_FORCE 4
+#define MASTER_SLAVE_CFG_SLAVE_FORCE 5
+#define MASTER_SLAVE_STATE_UNSUPPORTED 0
+#define MASTER_SLAVE_STATE_UNKNOWN 1
+#define MASTER_SLAVE_STATE_MASTER 2
+#define MASTER_SLAVE_STATE_SLAVE 3
+#define MASTER_SLAVE_STATE_ERR 4
+
+/* Which connector port. */
+#define PORT_TP 0x00
+#define PORT_AUI 0x01
+#define PORT_MII 0x02
+#define PORT_FIBRE 0x03
+#define PORT_BNC 0x04
+#define PORT_DA 0x05
+#define PORT_NONE 0xef
+#define PORT_OTHER 0xff
+
+/* Which transceiver to use. */
+#define XCVR_INTERNAL 0x00 /* PHY and MAC are in the same package */
+#define XCVR_EXTERNAL 0x01 /* PHY and MAC are in different packages */
+#define XCVR_DUMMY1 0x02
+#define XCVR_DUMMY2 0x03
+#define XCVR_DUMMY3 0x04
+
+/* Enable or disable autonegotiation. */
+#define AUTONEG_DISABLE 0x00
+#define AUTONEG_ENABLE 0x01
+
+/* MDI or MDI-X status/control - if MDI/MDI_X/AUTO is set then
+ * the driver is required to renegotiate link
+ */
+#define ETH_TP_MDI_INVALID 0x00 /* status: unknown; control: unsupported */
+#define ETH_TP_MDI 0x01 /* status: MDI; control: force MDI */
+#define ETH_TP_MDI_X 0x02 /* status: MDI-X; control: force MDI-X */
+#define ETH_TP_MDI_AUTO 0x03 /* control: auto-select */
+
+/* Wake-On-Lan options. */
+#define WAKE_PHY (1 << 0)
+#define WAKE_UCAST (1 << 1)
+#define WAKE_MCAST (1 << 2)
+#define WAKE_BCAST (1 << 3)
+#define WAKE_ARP (1 << 4)
+#define WAKE_MAGIC (1 << 5)
+#define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */
+#define WAKE_FILTER (1 << 7)
+
+#define WOL_MODE_COUNT 8
+
+/* L2-L4 network traffic flow types */
+#define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */
+#define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */
+#define SCTP_V4_FLOW 0x03 /* hash or spec (sctp_ip4_spec) */
+#define AH_ESP_V4_FLOW 0x04 /* hash only */
+#define TCP_V6_FLOW 0x05 /* hash or spec (tcp_ip6_spec; nfc only) */
+#define UDP_V6_FLOW 0x06 /* hash or spec (udp_ip6_spec; nfc only) */
+#define SCTP_V6_FLOW 0x07 /* hash or spec (sctp_ip6_spec; nfc only) */
+#define AH_ESP_V6_FLOW 0x08 /* hash only */
+#define AH_V4_FLOW 0x09 /* hash or spec (ah_ip4_spec) */
+#define ESP_V4_FLOW 0x0a /* hash or spec (esp_ip4_spec) */
+#define AH_V6_FLOW 0x0b /* hash or spec (ah_ip6_spec; nfc only) */
+#define ESP_V6_FLOW 0x0c /* hash or spec (esp_ip6_spec; nfc only) */
+#define IPV4_USER_FLOW 0x0d /* spec only (usr_ip4_spec) */
+#define IP_USER_FLOW IPV4_USER_FLOW
+#define IPV6_USER_FLOW 0x0e /* spec only (usr_ip6_spec; nfc only) */
+#define IPV4_FLOW 0x10 /* hash only */
+#define IPV6_FLOW 0x11 /* hash only */
+#define ETHER_FLOW 0x12 /* spec only (ether_spec) */
+/* Flag to enable additional fields in struct ethtool_rx_flow_spec */
+#define FLOW_EXT 0x80000000
+#define FLOW_MAC_EXT 0x40000000
+/* Flag to enable RSS spreading of traffic matching rule (nfc only) */
+#define FLOW_RSS 0x20000000
+
+/* L3-L4 network traffic flow hash options */
+#define RXH_L2DA (1 << 1)
+#define RXH_VLAN (1 << 2)
+#define RXH_L3_PROTO (1 << 3)
+#define RXH_IP_SRC (1 << 4)
+#define RXH_IP_DST (1 << 5)
+#define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */
+#define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */
+#define RXH_DISCARD (1 << 31)
+
+#define RX_CLS_FLOW_DISC 0xffffffffffffffffULL
+#define RX_CLS_FLOW_WAKE 0xfffffffffffffffeULL
+
+/* Special RX classification rule insert location values */
+#define RX_CLS_LOC_SPECIAL 0x80000000 /* flag */
+#define RX_CLS_LOC_ANY 0xffffffff
+#define RX_CLS_LOC_FIRST 0xfffffffe
+#define RX_CLS_LOC_LAST 0xfffffffd
+
+/* EEPROM Standards for plug in modules */
+#define ETH_MODULE_SFF_8079 0x1
+#define ETH_MODULE_SFF_8079_LEN 256
+#define ETH_MODULE_SFF_8472 0x2
+#define ETH_MODULE_SFF_8472_LEN 512
+#define ETH_MODULE_SFF_8636 0x3
+#define ETH_MODULE_SFF_8636_LEN 256
+#define ETH_MODULE_SFF_8436 0x4
+#define ETH_MODULE_SFF_8436_LEN 256
+
+#define ETH_MODULE_SFF_8636_MAX_LEN 640
+#define ETH_MODULE_SFF_8436_MAX_LEN 640
+
+/* Reset flags */
+/* The reset() operation must clear the flags for the components which
+ * were actually reset. On successful return, the flags indicate the
+ * components which were not reset, either because they do not exist
+ * in the hardware or because they cannot be reset independently. The
+ * driver must never reset any components that were not requested.
+ */
+enum ethtool_reset_flags {
+ /* These flags represent components dedicated to the interface
+ * the command is addressed to. Shift any flag left by
+ * ETH_RESET_SHARED_SHIFT to reset a shared component of the
+ * same type.
+ */
+ ETH_RESET_MGMT = 1 << 0, /* Management processor */
+ ETH_RESET_IRQ = 1 << 1, /* Interrupt requester */
+ ETH_RESET_DMA = 1 << 2, /* DMA engine */
+ ETH_RESET_FILTER = 1 << 3, /* Filtering/flow direction */
+ ETH_RESET_OFFLOAD = 1 << 4, /* Protocol offload */
+ ETH_RESET_MAC = 1 << 5, /* Media access controller */
+ ETH_RESET_PHY = 1 << 6, /* Transceiver/PHY */
+ ETH_RESET_RAM = 1 << 7, /* RAM shared between
+ * multiple components */
+ ETH_RESET_AP = 1 << 8, /* Application processor */
+
+ ETH_RESET_DEDICATED = 0x0000ffff, /* All components dedicated to
+ * this interface */
+ ETH_RESET_ALL = 0xffffffff, /* All components used by this
+ * interface, even if shared */
+};
+#define ETH_RESET_SHARED_SHIFT 16
+
+
+/**
+ * struct ethtool_link_settings - link control and status
+ *
+ * IMPORTANT, Backward compatibility notice: When implementing new
+ * user-space tools, please first try %ETHTOOL_GLINKSETTINGS, and
+ * if it succeeds use %ETHTOOL_SLINKSETTINGS to change link
+ * settings; do not use %ETHTOOL_SSET if %ETHTOOL_GLINKSETTINGS
+ * succeeded: stick to %ETHTOOL_GLINKSETTINGS/%SLINKSETTINGS in
+ * that case. Conversely, if %ETHTOOL_GLINKSETTINGS fails, use
+ * %ETHTOOL_GSET to query and %ETHTOOL_SSET to change link
+ * settings; do not use %ETHTOOL_SLINKSETTINGS if
+ * %ETHTOOL_GLINKSETTINGS failed: stick to
+ * %ETHTOOL_GSET/%ETHTOOL_SSET in that case.
+ *
+ * @cmd: Command number = %ETHTOOL_GLINKSETTINGS or %ETHTOOL_SLINKSETTINGS
+ * @speed: Link speed (Mbps)
+ * @duplex: Duplex mode; one of %DUPLEX_*
+ * @port: Physical connector type; one of %PORT_*
+ * @phy_address: MDIO address of PHY (transceiver); 0 or 255 if not
+ * applicable. For clause 45 PHYs this is the PRTAD.
+ * @autoneg: Enable/disable autonegotiation and auto-detection;
+ * either %AUTONEG_DISABLE or %AUTONEG_ENABLE
+ * @mdio_support: Bitmask of %ETH_MDIO_SUPPORTS_* flags for the MDIO
+ * protocols supported by the interface; 0 if unknown.
+ * Read-only.
+ * @eth_tp_mdix: Ethernet twisted-pair MDI(-X) status; one of
+ * %ETH_TP_MDI_*. If the status is unknown or not applicable, the
+ * value will be %ETH_TP_MDI_INVALID. Read-only.
+ * @eth_tp_mdix_ctrl: Ethernet twisted pair MDI(-X) control; one of
+ * %ETH_TP_MDI_*. If MDI(-X) control is not implemented, reads
+ * yield %ETH_TP_MDI_INVALID and writes may be ignored or rejected.
+ * When written successfully, the link should be renegotiated if
+ * necessary.
+ * @link_mode_masks_nwords: Number of 32-bit words for each of the
+ * supported, advertising, lp_advertising link mode bitmaps. For
+ * %ETHTOOL_GLINKSETTINGS: on entry, number of words passed by user
+ * (>= 0); on return, if handshake in progress, negative if
+ * request size unsupported by kernel: absolute value indicates
+ * kernel expected size and all the other fields but cmd
+ * are 0; otherwise (handshake completed), strictly positive
+ * to indicate size used by kernel and cmd field stays
+ * %ETHTOOL_GLINKSETTINGS, all other fields populated by driver. For
+ * %ETHTOOL_SLINKSETTINGS: must be valid on entry, ie. a positive
+ * value returned previously by %ETHTOOL_GLINKSETTINGS, otherwise
+ * refused. For drivers: ignore this field (use kernel's
+ * __ETHTOOL_LINK_MODE_MASK_NBITS instead), any change to it will
+ * be overwritten by kernel.
+ * @supported: Bitmap with each bit meaning given by
+ * %ethtool_link_mode_bit_indices for the link modes, physical
+ * connectors and other link features for which the interface
+ * supports autonegotiation or auto-detection. Read-only.
+ * @advertising: Bitmap with each bit meaning given by
+ * %ethtool_link_mode_bit_indices for the link modes, physical
+ * connectors and other link features that are advertised through
+ * autonegotiation or enabled for auto-detection.
+ * @lp_advertising: Bitmap with each bit meaning given by
+ * %ethtool_link_mode_bit_indices for the link modes, and other
+ * link features that the link partner advertised through
+ * autonegotiation; 0 if unknown or not applicable. Read-only.
+ * @transceiver: Used to distinguish different possible PHY types,
+ * reported consistently by PHYLIB. Read-only.
+ *
+ * If autonegotiation is disabled, the speed and @duplex represent the
+ * fixed link mode and are writable if the driver supports multiple
+ * link modes. If it is enabled then they are read-only; if the link
+ * is up they represent the negotiated link mode; if the link is down,
+ * the speed is 0, %SPEED_UNKNOWN or the highest enabled speed and
+ * @duplex is %DUPLEX_UNKNOWN or the best enabled duplex mode.
+ *
+ * Some hardware interfaces may have multiple PHYs and/or physical
+ * connectors fitted or do not allow the driver to detect which are
+ * fitted. For these interfaces @port and/or @phy_address may be
+ * writable, possibly dependent on @autoneg being %AUTONEG_DISABLE.
+ * Otherwise, attempts to write different values may be ignored or
+ * rejected.
+ *
+ * Deprecated %ethtool_cmd fields transceiver, maxtxpkt and maxrxpkt
+ * are not available in %ethtool_link_settings. These fields will be
+ * always set to zero in %ETHTOOL_GSET reply and %ETHTOOL_SSET will
+ * fail if any of them is set to non-zero value.
+ *
+ * Users should assume that all fields not marked read-only are
+ * writable and subject to validation by the driver. They should use
+ * %ETHTOOL_GLINKSETTINGS to get the current values before making specific
+ * changes and then applying them with %ETHTOOL_SLINKSETTINGS.
+ *
+ * Drivers that implement %get_link_ksettings and/or
+ * %set_link_ksettings should ignore the @cmd
+ * and @link_mode_masks_nwords fields (any change to them overwritten
+ * by kernel), and rely only on kernel's internal
+ * %__ETHTOOL_LINK_MODE_MASK_NBITS and
+ * %ethtool_link_mode_mask_t. Drivers that implement
+ * %set_link_ksettings() should validate all fields other than @cmd
+ * and @link_mode_masks_nwords that are not described as read-only or
+ * deprecated, and must ignore all fields described as read-only.
+ */
+struct ethtool_link_settings {
+ __u32 cmd;
+ __u32 speed;
+ __u8 duplex;
+ __u8 port;
+ __u8 phy_address;
+ __u8 autoneg;
+ __u8 mdio_support;
+ __u8 eth_tp_mdix;
+ __u8 eth_tp_mdix_ctrl;
+ __s8 link_mode_masks_nwords;
+ __u8 transceiver;
+ __u8 master_slave_cfg;
+ __u8 master_slave_state;
+ __u8 reserved1[1];
+ __u32 reserved[7];
+ __u32 link_mode_masks[0];
+ /* layout of link_mode_masks fields:
+ * __u32 map_supported[link_mode_masks_nwords];
+ * __u32 map_advertising[link_mode_masks_nwords];
+ * __u32 map_lp_advertising[link_mode_masks_nwords];
+ */
+};
+#endif /* _UAPI_LINUX_ETHTOOL_H */
diff --git a/src/shared/linux/nl80211.h b/src/shared/linux/nl80211.h
new file mode 100644
index 0000000..65edfff
--- /dev/null
+++ b/src/shared/linux/nl80211.h
@@ -0,0 +1,6554 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef __LINUX_NL80211_H
+#define __LINUX_NL80211_H
+/*
+ * 802.11 netlink interface public header
+ *
+ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2008 Michael Wu <flamingice@sourmilk.net>
+ * Copyright 2008 Luis Carlos Cobo <luisca@cozybit.com>
+ * Copyright 2008 Michael Buesch <m@bues.ch>
+ * Copyright 2008, 2009 Luis R. Rodriguez <lrodriguez@atheros.com>
+ * Copyright 2008 Jouni Malinen <jouni.malinen@atheros.com>
+ * Copyright 2008 Colin McCabe <colin@cozybit.com>
+ * Copyright 2015-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018-2019 Intel Corporation
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+/*
+ * This header file defines the userspace API to the wireless stack. Please
+ * be careful not to break things - i.e. don't move anything around or so
+ * unless you can demonstrate that it breaks neither API nor ABI.
+ *
+ * Additions to the API should be accompanied by actual implementations in
+ * an upstream driver, so that example implementations exist in case there
+ * are ever concerns about the precise semantics of the API or changes are
+ * needed, and to ensure that code for dead (no longer implemented) API
+ * can actually be identified and removed.
+ * Nonetheless, semantics should also be documented carefully in this file.
+ */
+
+#include <linux/types.h>
+
+#define NL80211_GENL_NAME "nl80211"
+
+#define NL80211_MULTICAST_GROUP_CONFIG "config"
+#define NL80211_MULTICAST_GROUP_SCAN "scan"
+#define NL80211_MULTICAST_GROUP_REG "regulatory"
+#define NL80211_MULTICAST_GROUP_MLME "mlme"
+#define NL80211_MULTICAST_GROUP_VENDOR "vendor"
+#define NL80211_MULTICAST_GROUP_NAN "nan"
+#define NL80211_MULTICAST_GROUP_TESTMODE "testmode"
+
+#define NL80211_EDMG_BW_CONFIG_MIN 4
+#define NL80211_EDMG_BW_CONFIG_MAX 15
+#define NL80211_EDMG_CHANNELS_MIN 1
+#define NL80211_EDMG_CHANNELS_MAX 0x3c /* 0b00111100 */
+
+/**
+ * DOC: Station handling
+ *
+ * Stations are added per interface, but a special case exists with VLAN
+ * interfaces. When a station is bound to an AP interface, it may be moved
+ * into a VLAN identified by a VLAN interface index (%NL80211_ATTR_STA_VLAN).
+ * The station is still assumed to belong to the AP interface it was added
+ * to.
+ *
+ * Station handling varies per interface type and depending on the driver's
+ * capabilities.
+ *
+ * For drivers supporting TDLS with external setup (WIPHY_FLAG_SUPPORTS_TDLS
+ * and WIPHY_FLAG_TDLS_EXTERNAL_SETUP), the station lifetime is as follows:
+ * - a setup station entry is added, not yet authorized, without any rate
+ * or capability information, this just exists to avoid race conditions
+ * - when the TDLS setup is done, a single NL80211_CMD_SET_STATION is valid
+ * to add rate and capability information to the station and at the same
+ * time mark it authorized.
+ * - %NL80211_TDLS_ENABLE_LINK is then used
+ * - after this, the only valid operation is to remove it by tearing down
+ * the TDLS link (%NL80211_TDLS_DISABLE_LINK)
+ *
+ * TODO: need more info for other interface types
+ */
+
+/**
+ * DOC: Frame transmission/registration support
+ *
+ * Frame transmission and registration support exists to allow userspace
+ * management entities such as wpa_supplicant react to management frames
+ * that are not being handled by the kernel. This includes, for example,
+ * certain classes of action frames that cannot be handled in the kernel
+ * for various reasons.
+ *
+ * Frame registration is done on a per-interface basis and registrations
+ * cannot be removed other than by closing the socket. It is possible to
+ * specify a registration filter to register, for example, only for a
+ * certain type of action frame. In particular with action frames, those
+ * that userspace registers for will not be returned as unhandled by the
+ * driver, so that the registered application has to take responsibility
+ * for doing that.
+ *
+ * The type of frame that can be registered for is also dependent on the
+ * driver and interface type. The frame types are advertised in wiphy
+ * attributes so applications know what to expect.
+ *
+ * NOTE: When an interface changes type while registrations are active,
+ * these registrations are ignored until the interface type is
+ * changed again. This means that changing the interface type can
+ * lead to a situation that couldn't otherwise be produced, but
+ * any such registrations will be dormant in the sense that they
+ * will not be serviced, i.e. they will not receive any frames.
+ *
+ * Frame transmission allows userspace to send for example the required
+ * responses to action frames. It is subject to some sanity checking,
+ * but many frames can be transmitted. When a frame was transmitted, its
+ * status is indicated to the sending socket.
+ *
+ * For more technical details, see the corresponding command descriptions
+ * below.
+ */
+
+/**
+ * DOC: Virtual interface / concurrency capabilities
+ *
+ * Some devices are able to operate with virtual MACs, they can have
+ * more than one virtual interface. The capability handling for this
+ * is a bit complex though, as there may be a number of restrictions
+ * on the types of concurrency that are supported.
+ *
+ * To start with, each device supports the interface types listed in
+ * the %NL80211_ATTR_SUPPORTED_IFTYPES attribute, but by listing the
+ * types there no concurrency is implied.
+ *
+ * Once concurrency is desired, more attributes must be observed:
+ * To start with, since some interface types are purely managed in
+ * software, like the AP-VLAN type in mac80211 for example, there's
+ * an additional list of these, they can be added at any time and
+ * are only restricted by some semantic restrictions (e.g. AP-VLAN
+ * cannot be added without a corresponding AP interface). This list
+ * is exported in the %NL80211_ATTR_SOFTWARE_IFTYPES attribute.
+ *
+ * Further, the list of supported combinations is exported. This is
+ * in the %NL80211_ATTR_INTERFACE_COMBINATIONS attribute. Basically,
+ * it exports a list of "groups", and at any point in time the
+ * interfaces that are currently active must fall into any one of
+ * the advertised groups. Within each group, there are restrictions
+ * on the number of interfaces of different types that are supported
+ * and also the number of different channels, along with potentially
+ * some other restrictions. See &enum nl80211_if_combination_attrs.
+ *
+ * All together, these attributes define the concurrency of virtual
+ * interfaces that a given device supports.
+ */
+
+/**
+ * DOC: packet coalesce support
+ *
+ * In most cases, host that receives IPv4 and IPv6 multicast/broadcast
+ * packets does not do anything with these packets. Therefore the
+ * reception of these unwanted packets causes unnecessary processing
+ * and power consumption.
+ *
+ * Packet coalesce feature helps to reduce number of received interrupts
+ * to host by buffering these packets in firmware/hardware for some
+ * predefined time. Received interrupt will be generated when one of the
+ * following events occur.
+ * a) Expiration of hardware timer whose expiration time is set to maximum
+ * coalescing delay of matching coalesce rule.
+ * b) Coalescing buffer in hardware reaches it's limit.
+ * c) Packet doesn't match any of the configured coalesce rules.
+ *
+ * User needs to configure following parameters for creating a coalesce
+ * rule.
+ * a) Maximum coalescing delay
+ * b) List of packet patterns which needs to be matched
+ * c) Condition for coalescence. pattern 'match' or 'no match'
+ * Multiple such rules can be created.
+ */
+
+/**
+ * DOC: WPA/WPA2 EAPOL handshake offload
+ *
+ * By setting @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK flag drivers
+ * can indicate they support offloading EAPOL handshakes for WPA/WPA2
+ * preshared key authentication. In %NL80211_CMD_CONNECT the preshared
+ * key should be specified using %NL80211_ATTR_PMK. Drivers supporting
+ * this offload may reject the %NL80211_CMD_CONNECT when no preshared
+ * key material is provided, for example when that driver does not
+ * support setting the temporal keys through %CMD_NEW_KEY.
+ *
+ * Similarly @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X flag can be
+ * set by drivers indicating offload support of the PTK/GTK EAPOL
+ * handshakes during 802.1X authentication. In order to use the offload
+ * the %NL80211_CMD_CONNECT should have %NL80211_ATTR_WANT_1X_4WAY_HS
+ * attribute flag. Drivers supporting this offload may reject the
+ * %NL80211_CMD_CONNECT when the attribute flag is not present.
+ *
+ * For 802.1X the PMK or PMK-R0 are set by providing %NL80211_ATTR_PMK
+ * using %NL80211_CMD_SET_PMK. For offloaded FT support also
+ * %NL80211_ATTR_PMKR0_NAME must be provided.
+ */
+
+/**
+ * DOC: FILS shared key authentication offload
+ *
+ * FILS shared key authentication offload can be advertized by drivers by
+ * setting @NL80211_EXT_FEATURE_FILS_SK_OFFLOAD flag. The drivers that support
+ * FILS shared key authentication offload should be able to construct the
+ * authentication and association frames for FILS shared key authentication and
+ * eventually do a key derivation as per IEEE 802.11ai. The below additional
+ * parameters should be given to driver in %NL80211_CMD_CONNECT and/or in
+ * %NL80211_CMD_UPDATE_CONNECT_PARAMS.
+ * %NL80211_ATTR_FILS_ERP_USERNAME - used to construct keyname_nai
+ * %NL80211_ATTR_FILS_ERP_REALM - used to construct keyname_nai
+ * %NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM - used to construct erp message
+ * %NL80211_ATTR_FILS_ERP_RRK - used to generate the rIK and rMSK
+ * rIK should be used to generate an authentication tag on the ERP message and
+ * rMSK should be used to derive a PMKSA.
+ * rIK, rMSK should be generated and keyname_nai, sequence number should be used
+ * as specified in IETF RFC 6696.
+ *
+ * When FILS shared key authentication is completed, driver needs to provide the
+ * below additional parameters to userspace, which can be either after setting
+ * up a connection or after roaming.
+ * %NL80211_ATTR_FILS_KEK - used for key renewal
+ * %NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM - used in further EAP-RP exchanges
+ * %NL80211_ATTR_PMKID - used to identify the PMKSA used/generated
+ * %Nl80211_ATTR_PMK - used to update PMKSA cache in userspace
+ * The PMKSA can be maintained in userspace persistently so that it can be used
+ * later after reboots or wifi turn off/on also.
+ *
+ * %NL80211_ATTR_FILS_CACHE_ID is the cache identifier advertized by a FILS
+ * capable AP supporting PMK caching. It specifies the scope within which the
+ * PMKSAs are cached in an ESS. %NL80211_CMD_SET_PMKSA and
+ * %NL80211_CMD_DEL_PMKSA are enhanced to allow support for PMKSA caching based
+ * on FILS cache identifier. Additionally %NL80211_ATTR_PMK is used with
+ * %NL80211_SET_PMKSA to specify the PMK corresponding to a PMKSA for driver to
+ * use in a FILS shared key connection with PMKSA caching.
+ */
+
+/**
+ * DOC: SAE authentication offload
+ *
+ * By setting @NL80211_EXT_FEATURE_SAE_OFFLOAD flag drivers can indicate they
+ * support offloading SAE authentication for WPA3-Personal networks. In
+ * %NL80211_CMD_CONNECT the password for SAE should be specified using
+ * %NL80211_ATTR_SAE_PASSWORD.
+ */
+
+/**
+ * enum nl80211_commands - supported nl80211 commands
+ *
+ * @NL80211_CMD_UNSPEC: unspecified command to catch errors
+ *
+ * @NL80211_CMD_GET_WIPHY: request information about a wiphy or dump request
+ * to get a list of all present wiphys.
+ * @NL80211_CMD_SET_WIPHY: set wiphy parameters, needs %NL80211_ATTR_WIPHY or
+ * %NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME,
+ * %NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ (and the
+ * attributes determining the channel width; this is used for setting
+ * monitor mode channel), %NL80211_ATTR_WIPHY_RETRY_SHORT,
+ * %NL80211_ATTR_WIPHY_RETRY_LONG, %NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
+ * and/or %NL80211_ATTR_WIPHY_RTS_THRESHOLD.
+ * However, for setting the channel, see %NL80211_CMD_SET_CHANNEL
+ * instead, the support here is for backward compatibility only.
+ * @NL80211_CMD_NEW_WIPHY: Newly created wiphy, response to get request
+ * or rename notification. Has attributes %NL80211_ATTR_WIPHY and
+ * %NL80211_ATTR_WIPHY_NAME.
+ * @NL80211_CMD_DEL_WIPHY: Wiphy deleted. Has attributes
+ * %NL80211_ATTR_WIPHY and %NL80211_ATTR_WIPHY_NAME.
+ *
+ * @NL80211_CMD_GET_INTERFACE: Request an interface's configuration;
+ * either a dump request for all interfaces or a specific get with a
+ * single %NL80211_ATTR_IFINDEX is supported.
+ * @NL80211_CMD_SET_INTERFACE: Set type of a virtual interface, requires
+ * %NL80211_ATTR_IFINDEX and %NL80211_ATTR_IFTYPE.
+ * @NL80211_CMD_NEW_INTERFACE: Newly created virtual interface or response
+ * to %NL80211_CMD_GET_INTERFACE. Has %NL80211_ATTR_IFINDEX,
+ * %NL80211_ATTR_WIPHY and %NL80211_ATTR_IFTYPE attributes. Can also
+ * be sent from userspace to request creation of a new virtual interface,
+ * then requires attributes %NL80211_ATTR_WIPHY, %NL80211_ATTR_IFTYPE and
+ * %NL80211_ATTR_IFNAME.
+ * @NL80211_CMD_DEL_INTERFACE: Virtual interface was deleted, has attributes
+ * %NL80211_ATTR_IFINDEX and %NL80211_ATTR_WIPHY. Can also be sent from
+ * userspace to request deletion of a virtual interface, then requires
+ * attribute %NL80211_ATTR_IFINDEX.
+ *
+ * @NL80211_CMD_GET_KEY: Get sequence counter information for a key specified
+ * by %NL80211_ATTR_KEY_IDX and/or %NL80211_ATTR_MAC.
+ * @NL80211_CMD_SET_KEY: Set key attributes %NL80211_ATTR_KEY_DEFAULT,
+ * %NL80211_ATTR_KEY_DEFAULT_MGMT, or %NL80211_ATTR_KEY_THRESHOLD.
+ * @NL80211_CMD_NEW_KEY: add a key with given %NL80211_ATTR_KEY_DATA,
+ * %NL80211_ATTR_KEY_IDX, %NL80211_ATTR_MAC, %NL80211_ATTR_KEY_CIPHER,
+ * and %NL80211_ATTR_KEY_SEQ attributes.
+ * @NL80211_CMD_DEL_KEY: delete a key identified by %NL80211_ATTR_KEY_IDX
+ * or %NL80211_ATTR_MAC.
+ *
+ * @NL80211_CMD_GET_BEACON: (not used)
+ * @NL80211_CMD_SET_BEACON: change the beacon on an access point interface
+ * using the %NL80211_ATTR_BEACON_HEAD and %NL80211_ATTR_BEACON_TAIL
+ * attributes. For drivers that generate the beacon and probe responses
+ * internally, the following attributes must be provided: %NL80211_ATTR_IE,
+ * %NL80211_ATTR_IE_PROBE_RESP and %NL80211_ATTR_IE_ASSOC_RESP.
+ * @NL80211_CMD_START_AP: Start AP operation on an AP interface, parameters
+ * are like for %NL80211_CMD_SET_BEACON, and additionally parameters that
+ * do not change are used, these include %NL80211_ATTR_BEACON_INTERVAL,
+ * %NL80211_ATTR_DTIM_PERIOD, %NL80211_ATTR_SSID,
+ * %NL80211_ATTR_HIDDEN_SSID, %NL80211_ATTR_CIPHERS_PAIRWISE,
+ * %NL80211_ATTR_CIPHER_GROUP, %NL80211_ATTR_WPA_VERSIONS,
+ * %NL80211_ATTR_AKM_SUITES, %NL80211_ATTR_PRIVACY,
+ * %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_INACTIVITY_TIMEOUT,
+ * %NL80211_ATTR_ACL_POLICY and %NL80211_ATTR_MAC_ADDRS.
+ * The channel to use can be set on the interface or be given using the
+ * %NL80211_ATTR_WIPHY_FREQ and the attributes determining channel width.
+ * @NL80211_CMD_NEW_BEACON: old alias for %NL80211_CMD_START_AP
+ * @NL80211_CMD_STOP_AP: Stop AP operation on the given interface
+ * @NL80211_CMD_DEL_BEACON: old alias for %NL80211_CMD_STOP_AP
+ *
+ * @NL80211_CMD_GET_STATION: Get station attributes for station identified by
+ * %NL80211_ATTR_MAC on the interface identified by %NL80211_ATTR_IFINDEX.
+ * @NL80211_CMD_SET_STATION: Set station attributes for station identified by
+ * %NL80211_ATTR_MAC on the interface identified by %NL80211_ATTR_IFINDEX.
+ * @NL80211_CMD_NEW_STATION: Add a station with given attributes to the
+ * the interface identified by %NL80211_ATTR_IFINDEX.
+ * @NL80211_CMD_DEL_STATION: Remove a station identified by %NL80211_ATTR_MAC
+ * or, if no MAC address given, all stations, on the interface identified
+ * by %NL80211_ATTR_IFINDEX. %NL80211_ATTR_MGMT_SUBTYPE and
+ * %NL80211_ATTR_REASON_CODE can optionally be used to specify which type
+ * of disconnection indication should be sent to the station
+ * (Deauthentication or Disassociation frame and reason code for that
+ * frame).
+ *
+ * @NL80211_CMD_GET_MPATH: Get mesh path attributes for mesh path to
+ * destination %NL80211_ATTR_MAC on the interface identified by
+ * %NL80211_ATTR_IFINDEX.
+ * @NL80211_CMD_SET_MPATH: Set mesh path attributes for mesh path to
+ * destination %NL80211_ATTR_MAC on the interface identified by
+ * %NL80211_ATTR_IFINDEX.
+ * @NL80211_CMD_NEW_MPATH: Create a new mesh path for the destination given by
+ * %NL80211_ATTR_MAC via %NL80211_ATTR_MPATH_NEXT_HOP.
+ * @NL80211_CMD_DEL_MPATH: Delete a mesh path to the destination given by
+ * %NL80211_ATTR_MAC.
+ * @NL80211_CMD_NEW_PATH: Add a mesh path with given attributes to the
+ * the interface identified by %NL80211_ATTR_IFINDEX.
+ * @NL80211_CMD_DEL_PATH: Remove a mesh path identified by %NL80211_ATTR_MAC
+ * or, if no MAC address given, all mesh paths, on the interface identified
+ * by %NL80211_ATTR_IFINDEX.
+ * @NL80211_CMD_SET_BSS: Set BSS attributes for BSS identified by
+ * %NL80211_ATTR_IFINDEX.
+ *
+ * @NL80211_CMD_GET_REG: ask the wireless core to send us its currently set
+ * regulatory domain. If %NL80211_ATTR_WIPHY is specified and the device
+ * has a private regulatory domain, it will be returned. Otherwise, the
+ * global regdomain will be returned.
+ * A device will have a private regulatory domain if it uses the
+ * regulatory_hint() API. Even when a private regdomain is used the channel
+ * information will still be mended according to further hints from
+ * the regulatory core to help with compliance. A dump version of this API
+ * is now available which will returns the global regdomain as well as
+ * all private regdomains of present wiphys (for those that have it).
+ * If a wiphy is self-managed (%NL80211_ATTR_WIPHY_SELF_MANAGED_REG), then
+ * its private regdomain is the only valid one for it. The regulatory
+ * core is not used to help with compliance in this case.
+ * @NL80211_CMD_SET_REG: Set current regulatory domain. CRDA sends this command
+ * after being queried by the kernel. CRDA replies by sending a regulatory
+ * domain structure which consists of %NL80211_ATTR_REG_ALPHA set to our
+ * current alpha2 if it found a match. It also provides
+ * NL80211_ATTR_REG_RULE_FLAGS, and a set of regulatory rules. Each
+ * regulatory rule is a nested set of attributes given by
+ * %NL80211_ATTR_REG_RULE_FREQ_[START|END] and
+ * %NL80211_ATTR_FREQ_RANGE_MAX_BW with an attached power rule given by
+ * %NL80211_ATTR_REG_RULE_POWER_MAX_ANT_GAIN and
+ * %NL80211_ATTR_REG_RULE_POWER_MAX_EIRP.
+ * @NL80211_CMD_REQ_SET_REG: ask the wireless core to set the regulatory domain
+ * to the specified ISO/IEC 3166-1 alpha2 country code. The core will
+ * store this as a valid request and then query userspace for it.
+ *
+ * @NL80211_CMD_GET_MESH_CONFIG: Get mesh networking properties for the
+ * interface identified by %NL80211_ATTR_IFINDEX
+ *
+ * @NL80211_CMD_SET_MESH_CONFIG: Set mesh networking properties for the
+ * interface identified by %NL80211_ATTR_IFINDEX
+ *
+ * @NL80211_CMD_SET_MGMT_EXTRA_IE: Set extra IEs for management frames. The
+ * interface is identified with %NL80211_ATTR_IFINDEX and the management
+ * frame subtype with %NL80211_ATTR_MGMT_SUBTYPE. The extra IE data to be
+ * added to the end of the specified management frame is specified with
+ * %NL80211_ATTR_IE. If the command succeeds, the requested data will be
+ * added to all specified management frames generated by
+ * kernel/firmware/driver.
+ * Note: This command has been removed and it is only reserved at this
+ * point to avoid re-using existing command number. The functionality this
+ * command was planned for has been provided with cleaner design with the
+ * option to specify additional IEs in NL80211_CMD_TRIGGER_SCAN,
+ * NL80211_CMD_AUTHENTICATE, NL80211_CMD_ASSOCIATE,
+ * NL80211_CMD_DEAUTHENTICATE, and NL80211_CMD_DISASSOCIATE.
+ *
+ * @NL80211_CMD_GET_SCAN: get scan results
+ * @NL80211_CMD_TRIGGER_SCAN: trigger a new scan with the given parameters
+ * %NL80211_ATTR_TX_NO_CCK_RATE is used to decide whether to send the
+ * probe requests at CCK rate or not. %NL80211_ATTR_BSSID can be used to
+ * specify a BSSID to scan for; if not included, the wildcard BSSID will
+ * be used.
+ * @NL80211_CMD_NEW_SCAN_RESULTS: scan notification (as a reply to
+ * NL80211_CMD_GET_SCAN and on the "scan" multicast group)
+ * @NL80211_CMD_SCAN_ABORTED: scan was aborted, for unspecified reasons,
+ * partial scan results may be available
+ *
+ * @NL80211_CMD_START_SCHED_SCAN: start a scheduled scan at certain
+ * intervals and certain number of cycles, as specified by
+ * %NL80211_ATTR_SCHED_SCAN_PLANS. If %NL80211_ATTR_SCHED_SCAN_PLANS is
+ * not specified and only %NL80211_ATTR_SCHED_SCAN_INTERVAL is specified,
+ * scheduled scan will run in an infinite loop with the specified interval.
+ * These attributes are mutually exculsive,
+ * i.e. NL80211_ATTR_SCHED_SCAN_INTERVAL must not be passed if
+ * NL80211_ATTR_SCHED_SCAN_PLANS is defined.
+ * If for some reason scheduled scan is aborted by the driver, all scan
+ * plans are canceled (including scan plans that did not start yet).
+ * Like with normal scans, if SSIDs (%NL80211_ATTR_SCAN_SSIDS)
+ * are passed, they are used in the probe requests. For
+ * broadcast, a broadcast SSID must be passed (ie. an empty
+ * string). If no SSID is passed, no probe requests are sent and
+ * a passive scan is performed. %NL80211_ATTR_SCAN_FREQUENCIES,
+ * if passed, define which channels should be scanned; if not
+ * passed, all channels allowed for the current regulatory domain
+ * are used. Extra IEs can also be passed from the userspace by
+ * using the %NL80211_ATTR_IE attribute. The first cycle of the
+ * scheduled scan can be delayed by %NL80211_ATTR_SCHED_SCAN_DELAY
+ * is supplied. If the device supports multiple concurrent scheduled
+ * scans, it will allow such when the caller provides the flag attribute
+ * %NL80211_ATTR_SCHED_SCAN_MULTI to indicate user-space support for it.
+ * @NL80211_CMD_STOP_SCHED_SCAN: stop a scheduled scan. Returns -ENOENT if
+ * scheduled scan is not running. The caller may assume that as soon
+ * as the call returns, it is safe to start a new scheduled scan again.
+ * @NL80211_CMD_SCHED_SCAN_RESULTS: indicates that there are scheduled scan
+ * results available.
+ * @NL80211_CMD_SCHED_SCAN_STOPPED: indicates that the scheduled scan has
+ * stopped. The driver may issue this event at any time during a
+ * scheduled scan. One reason for stopping the scan is if the hardware
+ * does not support starting an association or a normal scan while running
+ * a scheduled scan. This event is also sent when the
+ * %NL80211_CMD_STOP_SCHED_SCAN command is received or when the interface
+ * is brought down while a scheduled scan was running.
+ *
+ * @NL80211_CMD_GET_SURVEY: get survey resuls, e.g. channel occupation
+ * or noise level
+ * @NL80211_CMD_NEW_SURVEY_RESULTS: survey data notification (as a reply to
+ * NL80211_CMD_GET_SURVEY and on the "scan" multicast group)
+ *
+ * @NL80211_CMD_SET_PMKSA: Add a PMKSA cache entry using %NL80211_ATTR_MAC
+ * (for the BSSID), %NL80211_ATTR_PMKID, and optionally %NL80211_ATTR_PMK
+ * (PMK is used for PTKSA derivation in case of FILS shared key offload) or
+ * using %NL80211_ATTR_SSID, %NL80211_ATTR_FILS_CACHE_ID,
+ * %NL80211_ATTR_PMKID, and %NL80211_ATTR_PMK in case of FILS
+ * authentication where %NL80211_ATTR_FILS_CACHE_ID is the identifier
+ * advertized by a FILS capable AP identifying the scope of PMKSA in an
+ * ESS.
+ * @NL80211_CMD_DEL_PMKSA: Delete a PMKSA cache entry, using %NL80211_ATTR_MAC
+ * (for the BSSID) and %NL80211_ATTR_PMKID or using %NL80211_ATTR_SSID,
+ * %NL80211_ATTR_FILS_CACHE_ID, and %NL80211_ATTR_PMKID in case of FILS
+ * authentication.
+ * @NL80211_CMD_FLUSH_PMKSA: Flush all PMKSA cache entries.
+ *
+ * @NL80211_CMD_REG_CHANGE: indicates to userspace the regulatory domain
+ * has been changed and provides details of the request information
+ * that caused the change such as who initiated the regulatory request
+ * (%NL80211_ATTR_REG_INITIATOR), the wiphy_idx
+ * (%NL80211_ATTR_REG_ALPHA2) on which the request was made from if
+ * the initiator was %NL80211_REGDOM_SET_BY_COUNTRY_IE or
+ * %NL80211_REGDOM_SET_BY_DRIVER, the type of regulatory domain
+ * set (%NL80211_ATTR_REG_TYPE), if the type of regulatory domain is
+ * %NL80211_REG_TYPE_COUNTRY the alpha2 to which we have moved on
+ * to (%NL80211_ATTR_REG_ALPHA2).
+ * @NL80211_CMD_REG_BEACON_HINT: indicates to userspace that an AP beacon
+ * has been found while world roaming thus enabling active scan or
+ * any mode of operation that initiates TX (beacons) on a channel
+ * where we would not have been able to do either before. As an example
+ * if you are world roaming (regulatory domain set to world or if your
+ * driver is using a custom world roaming regulatory domain) and while
+ * doing a passive scan on the 5 GHz band you find an AP there (if not
+ * on a DFS channel) you will now be able to actively scan for that AP
+ * or use AP mode on your card on that same channel. Note that this will
+ * never be used for channels 1-11 on the 2 GHz band as they are always
+ * enabled world wide. This beacon hint is only sent if your device had
+ * either disabled active scanning or beaconing on a channel. We send to
+ * userspace the wiphy on which we removed a restriction from
+ * (%NL80211_ATTR_WIPHY) and the channel on which this occurred
+ * before (%NL80211_ATTR_FREQ_BEFORE) and after (%NL80211_ATTR_FREQ_AFTER)
+ * the beacon hint was processed.
+ *
+ * @NL80211_CMD_AUTHENTICATE: authentication request and notification.
+ * This command is used both as a command (request to authenticate) and
+ * as an event on the "mlme" multicast group indicating completion of the
+ * authentication process.
+ * When used as a command, %NL80211_ATTR_IFINDEX is used to identify the
+ * interface. %NL80211_ATTR_MAC is used to specify PeerSTAAddress (and
+ * BSSID in case of station mode). %NL80211_ATTR_SSID is used to specify
+ * the SSID (mainly for association, but is included in authentication
+ * request, too, to help BSS selection. %NL80211_ATTR_WIPHY_FREQ is used
+ * to specify the frequence of the channel in MHz. %NL80211_ATTR_AUTH_TYPE
+ * is used to specify the authentication type. %NL80211_ATTR_IE is used to
+ * define IEs (VendorSpecificInfo, but also including RSN IE and FT IEs)
+ * to be added to the frame.
+ * When used as an event, this reports reception of an Authentication
+ * frame in station and IBSS modes when the local MLME processed the
+ * frame, i.e., it was for the local STA and was received in correct
+ * state. This is similar to MLME-AUTHENTICATE.confirm primitive in the
+ * MLME SAP interface (kernel providing MLME, userspace SME). The
+ * included %NL80211_ATTR_FRAME attribute contains the management frame
+ * (including both the header and frame body, but not FCS). This event is
+ * also used to indicate if the authentication attempt timed out. In that
+ * case the %NL80211_ATTR_FRAME attribute is replaced with a
+ * %NL80211_ATTR_TIMED_OUT flag (and %NL80211_ATTR_MAC to indicate which
+ * pending authentication timed out).
+ * @NL80211_CMD_ASSOCIATE: association request and notification; like
+ * NL80211_CMD_AUTHENTICATE but for Association and Reassociation
+ * (similar to MLME-ASSOCIATE.request, MLME-REASSOCIATE.request,
+ * MLME-ASSOCIATE.confirm or MLME-REASSOCIATE.confirm primitives). The
+ * %NL80211_ATTR_PREV_BSSID attribute is used to specify whether the
+ * request is for the initial association to an ESS (that attribute not
+ * included) or for reassociation within the ESS (that attribute is
+ * included).
+ * @NL80211_CMD_DEAUTHENTICATE: deauthentication request and notification; like
+ * NL80211_CMD_AUTHENTICATE but for Deauthentication frames (similar to
+ * MLME-DEAUTHENTICATION.request and MLME-DEAUTHENTICATE.indication
+ * primitives).
+ * @NL80211_CMD_DISASSOCIATE: disassociation request and notification; like
+ * NL80211_CMD_AUTHENTICATE but for Disassociation frames (similar to
+ * MLME-DISASSOCIATE.request and MLME-DISASSOCIATE.indication primitives).
+ *
+ * @NL80211_CMD_MICHAEL_MIC_FAILURE: notification of a locally detected Michael
+ * MIC (part of TKIP) failure; sent on the "mlme" multicast group; the
+ * event includes %NL80211_ATTR_MAC to describe the source MAC address of
+ * the frame with invalid MIC, %NL80211_ATTR_KEY_TYPE to show the key
+ * type, %NL80211_ATTR_KEY_IDX to indicate the key identifier, and
+ * %NL80211_ATTR_KEY_SEQ to indicate the TSC value of the frame; this
+ * event matches with MLME-MICHAELMICFAILURE.indication() primitive
+ *
+ * @NL80211_CMD_JOIN_IBSS: Join a new IBSS -- given at least an SSID and a
+ * FREQ attribute (for the initial frequency if no peer can be found)
+ * and optionally a MAC (as BSSID) and FREQ_FIXED attribute if those
+ * should be fixed rather than automatically determined. Can only be
+ * executed on a network interface that is UP, and fixed BSSID/FREQ
+ * may be rejected. Another optional parameter is the beacon interval,
+ * given in the %NL80211_ATTR_BEACON_INTERVAL attribute, which if not
+ * given defaults to 100 TU (102.4ms).
+ * @NL80211_CMD_LEAVE_IBSS: Leave the IBSS -- no special arguments, the IBSS is
+ * determined by the network interface.
+ *
+ * @NL80211_CMD_TESTMODE: testmode command, takes a wiphy (or ifindex) attribute
+ * to identify the device, and the TESTDATA blob attribute to pass through
+ * to the driver.
+ *
+ * @NL80211_CMD_CONNECT: connection request and notification; this command
+ * requests to connect to a specified network but without separating
+ * auth and assoc steps. For this, you need to specify the SSID in a
+ * %NL80211_ATTR_SSID attribute, and can optionally specify the association
+ * IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_USE_MFP,
+ * %NL80211_ATTR_MAC, %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT,
+ * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE,
+ * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT,
+ * %NL80211_ATTR_CONTROL_PORT_OVER_NL80211, %NL80211_ATTR_MAC_HINT, and
+ * %NL80211_ATTR_WIPHY_FREQ_HINT.
+ * If included, %NL80211_ATTR_MAC and %NL80211_ATTR_WIPHY_FREQ are
+ * restrictions on BSS selection, i.e., they effectively prevent roaming
+ * within the ESS. %NL80211_ATTR_MAC_HINT and %NL80211_ATTR_WIPHY_FREQ_HINT
+ * can be included to provide a recommendation of the initial BSS while
+ * allowing the driver to roam to other BSSes within the ESS and also to
+ * ignore this recommendation if the indicated BSS is not ideal. Only one
+ * set of BSSID,frequency parameters is used (i.e., either the enforcing
+ * %NL80211_ATTR_MAC,%NL80211_ATTR_WIPHY_FREQ or the less strict
+ * %NL80211_ATTR_MAC_HINT and %NL80211_ATTR_WIPHY_FREQ_HINT).
+ * %NL80211_ATTR_PREV_BSSID can be used to request a reassociation within
+ * the ESS in case the device is already associated and an association with
+ * a different BSS is desired.
+ * Background scan period can optionally be
+ * specified in %NL80211_ATTR_BG_SCAN_PERIOD,
+ * if not specified default background scan configuration
+ * in driver is used and if period value is 0, bg scan will be disabled.
+ * This attribute is ignored if driver does not support roam scan.
+ * It is also sent as an event, with the BSSID and response IEs when the
+ * connection is established or failed to be established. This can be
+ * determined by the %NL80211_ATTR_STATUS_CODE attribute (0 = success,
+ * non-zero = failure). If %NL80211_ATTR_TIMED_OUT is included in the
+ * event, the connection attempt failed due to not being able to initiate
+ * authentication/association or not receiving a response from the AP.
+ * Non-zero %NL80211_ATTR_STATUS_CODE value is indicated in that case as
+ * well to remain backwards compatible.
+ * When establishing a security association, drivers that support 4 way
+ * handshake offload should send %NL80211_CMD_PORT_AUTHORIZED event when
+ * the 4 way handshake is completed successfully.
+ * @NL80211_CMD_ROAM: Notification indicating the card/driver roamed by itself.
+ * When a security association was established with the new AP (e.g. if
+ * the FT protocol was used for roaming or the driver completed the 4 way
+ * handshake), this event should be followed by an
+ * %NL80211_CMD_PORT_AUTHORIZED event.
+ * @NL80211_CMD_DISCONNECT: drop a given connection; also used to notify
+ * userspace that a connection was dropped by the AP or due to other
+ * reasons, for this the %NL80211_ATTR_DISCONNECTED_BY_AP and
+ * %NL80211_ATTR_REASON_CODE attributes are used.
+ *
+ * @NL80211_CMD_SET_WIPHY_NETNS: Set a wiphy's netns. Note that all devices
+ * associated with this wiphy must be down and will follow.
+ *
+ * @NL80211_CMD_REMAIN_ON_CHANNEL: Request to remain awake on the specified
+ * channel for the specified amount of time. This can be used to do
+ * off-channel operations like transmit a Public Action frame and wait for
+ * a response while being associated to an AP on another channel.
+ * %NL80211_ATTR_IFINDEX is used to specify which interface (and thus
+ * radio) is used. %NL80211_ATTR_WIPHY_FREQ is used to specify the
+ * frequency for the operation.
+ * %NL80211_ATTR_DURATION is used to specify the duration in milliseconds
+ * to remain on the channel. This command is also used as an event to
+ * notify when the requested duration starts (it may take a while for the
+ * driver to schedule this time due to other concurrent needs for the
+ * radio).
+ * When called, this operation returns a cookie (%NL80211_ATTR_COOKIE)
+ * that will be included with any events pertaining to this request;
+ * the cookie is also used to cancel the request.
+ * @NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL: This command can be used to cancel a
+ * pending remain-on-channel duration if the desired operation has been
+ * completed prior to expiration of the originally requested duration.
+ * %NL80211_ATTR_WIPHY or %NL80211_ATTR_IFINDEX is used to specify the
+ * radio. The %NL80211_ATTR_COOKIE attribute must be given as well to
+ * uniquely identify the request.
+ * This command is also used as an event to notify when a requested
+ * remain-on-channel duration has expired.
+ *
+ * @NL80211_CMD_SET_TX_BITRATE_MASK: Set the mask of rates to be used in TX
+ * rate selection. %NL80211_ATTR_IFINDEX is used to specify the interface
+ * and @NL80211_ATTR_TX_RATES the set of allowed rates.
+ *
+ * @NL80211_CMD_REGISTER_FRAME: Register for receiving certain mgmt frames
+ * (via @NL80211_CMD_FRAME) for processing in userspace. This command
+ * requires an interface index, a frame type attribute (optional for
+ * backward compatibility reasons, if not given assumes action frames)
+ * and a match attribute containing the first few bytes of the frame
+ * that should match, e.g. a single byte for only a category match or
+ * four bytes for vendor frames including the OUI. The registration
+ * cannot be dropped, but is removed automatically when the netlink
+ * socket is closed. Multiple registrations can be made.
+ * @NL80211_CMD_REGISTER_ACTION: Alias for @NL80211_CMD_REGISTER_FRAME for
+ * backward compatibility
+ * @NL80211_CMD_FRAME: Management frame TX request and RX notification. This
+ * command is used both as a request to transmit a management frame and
+ * as an event indicating reception of a frame that was not processed in
+ * kernel code, but is for us (i.e., which may need to be processed in a
+ * user space application). %NL80211_ATTR_FRAME is used to specify the
+ * frame contents (including header). %NL80211_ATTR_WIPHY_FREQ is used
+ * to indicate on which channel the frame is to be transmitted or was
+ * received. If this channel is not the current channel (remain-on-channel
+ * or the operational channel) the device will switch to the given channel
+ * and transmit the frame, optionally waiting for a response for the time
+ * specified using %NL80211_ATTR_DURATION. When called, this operation
+ * returns a cookie (%NL80211_ATTR_COOKIE) that will be included with the
+ * TX status event pertaining to the TX request.
+ * %NL80211_ATTR_TX_NO_CCK_RATE is used to decide whether to send the
+ * management frames at CCK rate or not in 2GHz band.
+ * %NL80211_ATTR_CSA_C_OFFSETS_TX is an array of offsets to CSA
+ * counters which will be updated to the current value. This attribute
+ * is used during CSA period.
+ * @NL80211_CMD_FRAME_WAIT_CANCEL: When an off-channel TX was requested, this
+ * command may be used with the corresponding cookie to cancel the wait
+ * time if it is known that it is no longer necessary. This command is
+ * also sent as an event whenever the driver has completed the off-channel
+ * wait time.
+ * @NL80211_CMD_ACTION: Alias for @NL80211_CMD_FRAME for backward compatibility.
+ * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame
+ * transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies
+ * the TX command and %NL80211_ATTR_FRAME includes the contents of the
+ * frame. %NL80211_ATTR_ACK flag is included if the recipient acknowledged
+ * the frame.
+ * @NL80211_CMD_ACTION_TX_STATUS: Alias for @NL80211_CMD_FRAME_TX_STATUS for
+ * backward compatibility.
+ *
+ * @NL80211_CMD_SET_POWER_SAVE: Set powersave, using %NL80211_ATTR_PS_STATE
+ * @NL80211_CMD_GET_POWER_SAVE: Get powersave status in %NL80211_ATTR_PS_STATE
+ *
+ * @NL80211_CMD_SET_CQM: Connection quality monitor configuration. This command
+ * is used to configure connection quality monitoring notification trigger
+ * levels.
+ * @NL80211_CMD_NOTIFY_CQM: Connection quality monitor notification. This
+ * command is used as an event to indicate the that a trigger level was
+ * reached.
+ * @NL80211_CMD_SET_CHANNEL: Set the channel (using %NL80211_ATTR_WIPHY_FREQ
+ * and the attributes determining channel width) the given interface
+ * (identifed by %NL80211_ATTR_IFINDEX) shall operate on.
+ * In case multiple channels are supported by the device, the mechanism
+ * with which it switches channels is implementation-defined.
+ * When a monitor interface is given, it can only switch channel while
+ * no other interfaces are operating to avoid disturbing the operation
+ * of any other interfaces, and other interfaces will again take
+ * precedence when they are used.
+ *
+ * @NL80211_CMD_SET_WDS_PEER: Set the MAC address of the peer on a WDS interface.
+ *
+ * @NL80211_CMD_SET_MULTICAST_TO_UNICAST: Configure if this AP should perform
+ * multicast to unicast conversion. When enabled, all multicast packets
+ * with ethertype ARP, IPv4 or IPv6 (possibly within an 802.1Q header)
+ * will be sent out to each station once with the destination (multicast)
+ * MAC address replaced by the station's MAC address. Note that this may
+ * break certain expectations of the receiver, e.g. the ability to drop
+ * unicast IP packets encapsulated in multicast L2 frames, or the ability
+ * to not send destination unreachable messages in such cases.
+ * This can only be toggled per BSS. Configure this on an interface of
+ * type %NL80211_IFTYPE_AP. It applies to all its VLAN interfaces
+ * (%NL80211_IFTYPE_AP_VLAN), except for those in 4addr (WDS) mode.
+ * If %NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED is not present with this
+ * command, the feature is disabled.
+ *
+ * @NL80211_CMD_JOIN_MESH: Join a mesh. The mesh ID must be given, and initial
+ * mesh config parameters may be given.
+ * @NL80211_CMD_LEAVE_MESH: Leave the mesh network -- no special arguments, the
+ * network is determined by the network interface.
+ *
+ * @NL80211_CMD_UNPROT_DEAUTHENTICATE: Unprotected deauthentication frame
+ * notification. This event is used to indicate that an unprotected
+ * deauthentication frame was dropped when MFP is in use.
+ * @NL80211_CMD_UNPROT_DISASSOCIATE: Unprotected disassociation frame
+ * notification. This event is used to indicate that an unprotected
+ * disassociation frame was dropped when MFP is in use.
+ *
+ * @NL80211_CMD_NEW_PEER_CANDIDATE: Notification on the reception of a
+ * beacon or probe response from a compatible mesh peer. This is only
+ * sent while no station information (sta_info) exists for the new peer
+ * candidate and when @NL80211_MESH_SETUP_USERSPACE_AUTH,
+ * @NL80211_MESH_SETUP_USERSPACE_AMPE, or
+ * @NL80211_MESH_SETUP_USERSPACE_MPM is set. On reception of this
+ * notification, userspace may decide to create a new station
+ * (@NL80211_CMD_NEW_STATION). To stop this notification from
+ * reoccurring, the userspace authentication daemon may want to create the
+ * new station with the AUTHENTICATED flag unset and maybe change it later
+ * depending on the authentication result.
+ *
+ * @NL80211_CMD_GET_WOWLAN: get Wake-on-Wireless-LAN (WoWLAN) settings.
+ * @NL80211_CMD_SET_WOWLAN: set Wake-on-Wireless-LAN (WoWLAN) settings.
+ * Since wireless is more complex than wired ethernet, it supports
+ * various triggers. These triggers can be configured through this
+ * command with the %NL80211_ATTR_WOWLAN_TRIGGERS attribute. For
+ * more background information, see
+ * http://wireless.kernel.org/en/users/Documentation/WoWLAN.
+ * The @NL80211_CMD_SET_WOWLAN command can also be used as a notification
+ * from the driver reporting the wakeup reason. In this case, the
+ * @NL80211_ATTR_WOWLAN_TRIGGERS attribute will contain the reason
+ * for the wakeup, if it was caused by wireless. If it is not present
+ * in the wakeup notification, the wireless device didn't cause the
+ * wakeup but reports that it was woken up.
+ *
+ * @NL80211_CMD_SET_REKEY_OFFLOAD: This command is used give the driver
+ * the necessary information for supporting GTK rekey offload. This
+ * feature is typically used during WoWLAN. The configuration data
+ * is contained in %NL80211_ATTR_REKEY_DATA (which is nested and
+ * contains the data in sub-attributes). After rekeying happened,
+ * this command may also be sent by the driver as an MLME event to
+ * inform userspace of the new replay counter.
+ *
+ * @NL80211_CMD_PMKSA_CANDIDATE: This is used as an event to inform userspace
+ * of PMKSA caching dandidates.
+ *
+ * @NL80211_CMD_TDLS_OPER: Perform a high-level TDLS command (e.g. link setup).
+ * In addition, this can be used as an event to request userspace to take
+ * actions on TDLS links (set up a new link or tear down an existing one).
+ * In such events, %NL80211_ATTR_TDLS_OPERATION indicates the requested
+ * operation, %NL80211_ATTR_MAC contains the peer MAC address, and
+ * %NL80211_ATTR_REASON_CODE the reason code to be used (only with
+ * %NL80211_TDLS_TEARDOWN).
+ * @NL80211_CMD_TDLS_MGMT: Send a TDLS management frame. The
+ * %NL80211_ATTR_TDLS_ACTION attribute determines the type of frame to be
+ * sent. Public Action codes (802.11-2012 8.1.5.1) will be sent as
+ * 802.11 management frames, while TDLS action codes (802.11-2012
+ * 8.5.13.1) will be encapsulated and sent as data frames. The currently
+ * supported Public Action code is %WLAN_PUB_ACTION_TDLS_DISCOVER_RES
+ * and the currently supported TDLS actions codes are given in
+ * &enum ieee80211_tdls_actioncode.
+ *
+ * @NL80211_CMD_UNEXPECTED_FRAME: Used by an application controlling an AP
+ * (or GO) interface (i.e. hostapd) to ask for unexpected frames to
+ * implement sending deauth to stations that send unexpected class 3
+ * frames. Also used as the event sent by the kernel when such a frame
+ * is received.
+ * For the event, the %NL80211_ATTR_MAC attribute carries the TA and
+ * other attributes like the interface index are present.
+ * If used as the command it must have an interface index and you can
+ * only unsubscribe from the event by closing the socket. Subscription
+ * is also for %NL80211_CMD_UNEXPECTED_4ADDR_FRAME events.
+ *
+ * @NL80211_CMD_UNEXPECTED_4ADDR_FRAME: Sent as an event indicating that the
+ * associated station identified by %NL80211_ATTR_MAC sent a 4addr frame
+ * and wasn't already in a 4-addr VLAN. The event will be sent similarly
+ * to the %NL80211_CMD_UNEXPECTED_FRAME event, to the same listener.
+ *
+ * @NL80211_CMD_PROBE_CLIENT: Probe an associated station on an AP interface
+ * by sending a null data frame to it and reporting when the frame is
+ * acknowleged. This is used to allow timing out inactive clients. Uses
+ * %NL80211_ATTR_IFINDEX and %NL80211_ATTR_MAC. The command returns a
+ * direct reply with an %NL80211_ATTR_COOKIE that is later used to match
+ * up the event with the request. The event includes the same data and
+ * has %NL80211_ATTR_ACK set if the frame was ACKed.
+ *
+ * @NL80211_CMD_REGISTER_BEACONS: Register this socket to receive beacons from
+ * other BSSes when any interfaces are in AP mode. This helps implement
+ * OLBC handling in hostapd. Beacons are reported in %NL80211_CMD_FRAME
+ * messages. Note that per PHY only one application may register.
+ *
+ * @NL80211_CMD_SET_NOACK_MAP: sets a bitmap for the individual TIDs whether
+ * No Acknowledgement Policy should be applied.
+ *
+ * @NL80211_CMD_CH_SWITCH_NOTIFY: An AP or GO may decide to switch channels
+ * independently of the userspace SME, send this event indicating
+ * %NL80211_ATTR_IFINDEX is now on %NL80211_ATTR_WIPHY_FREQ and the
+ * attributes determining channel width. This indication may also be
+ * sent when a remotely-initiated switch (e.g., when a STA receives a CSA
+ * from the remote AP) is completed;
+ *
+ * @NL80211_CMD_CH_SWITCH_STARTED_NOTIFY: Notify that a channel switch
+ * has been started on an interface, regardless of the initiator
+ * (ie. whether it was requested from a remote device or
+ * initiated on our own). It indicates that
+ * %NL80211_ATTR_IFINDEX will be on %NL80211_ATTR_WIPHY_FREQ
+ * after %NL80211_ATTR_CH_SWITCH_COUNT TBTT's. The userspace may
+ * decide to react to this indication by requesting other
+ * interfaces to change channel as well.
+ *
+ * @NL80211_CMD_START_P2P_DEVICE: Start the given P2P Device, identified by
+ * its %NL80211_ATTR_WDEV identifier. It must have been created with
+ * %NL80211_CMD_NEW_INTERFACE previously. After it has been started, the
+ * P2P Device can be used for P2P operations, e.g. remain-on-channel and
+ * public action frame TX.
+ * @NL80211_CMD_STOP_P2P_DEVICE: Stop the given P2P Device, identified by
+ * its %NL80211_ATTR_WDEV identifier.
+ *
+ * @NL80211_CMD_CONN_FAILED: connection request to an AP failed; used to
+ * notify userspace that AP has rejected the connection request from a
+ * station, due to particular reason. %NL80211_ATTR_CONN_FAILED_REASON
+ * is used for this.
+ *
+ * @NL80211_CMD_SET_MCAST_RATE: Change the rate used to send multicast frames
+ * for IBSS or MESH vif.
+ *
+ * @NL80211_CMD_SET_MAC_ACL: sets ACL for MAC address based access control.
+ * This is to be used with the drivers advertising the support of MAC
+ * address based access control. List of MAC addresses is passed in
+ * %NL80211_ATTR_MAC_ADDRS and ACL policy is passed in
+ * %NL80211_ATTR_ACL_POLICY. Driver will enable ACL with this list, if it
+ * is not already done. The new list will replace any existing list. Driver
+ * will clear its ACL when the list of MAC addresses passed is empty. This
+ * command is used in AP/P2P GO mode. Driver has to make sure to clear its
+ * ACL list during %NL80211_CMD_STOP_AP.
+ *
+ * @NL80211_CMD_RADAR_DETECT: Start a Channel availability check (CAC). Once
+ * a radar is detected or the channel availability scan (CAC) has finished
+ * or was aborted, or a radar was detected, usermode will be notified with
+ * this event. This command is also used to notify userspace about radars
+ * while operating on this channel.
+ * %NL80211_ATTR_RADAR_EVENT is used to inform about the type of the
+ * event.
+ *
+ * @NL80211_CMD_GET_PROTOCOL_FEATURES: Get global nl80211 protocol features,
+ * i.e. features for the nl80211 protocol rather than device features.
+ * Returns the features in the %NL80211_ATTR_PROTOCOL_FEATURES bitmap.
+ *
+ * @NL80211_CMD_UPDATE_FT_IES: Pass down the most up-to-date Fast Transition
+ * Information Element to the WLAN driver
+ *
+ * @NL80211_CMD_FT_EVENT: Send a Fast transition event from the WLAN driver
+ * to the supplicant. This will carry the target AP's MAC address along
+ * with the relevant Information Elements. This event is used to report
+ * received FT IEs (MDIE, FTIE, RSN IE, TIE, RICIE).
+ *
+ * @NL80211_CMD_CRIT_PROTOCOL_START: Indicates user-space will start running
+ * a critical protocol that needs more reliability in the connection to
+ * complete.
+ *
+ * @NL80211_CMD_CRIT_PROTOCOL_STOP: Indicates the connection reliability can
+ * return back to normal.
+ *
+ * @NL80211_CMD_GET_COALESCE: Get currently supported coalesce rules.
+ * @NL80211_CMD_SET_COALESCE: Configure coalesce rules or clear existing rules.
+ *
+ * @NL80211_CMD_CHANNEL_SWITCH: Perform a channel switch by announcing the
+ * the new channel information (Channel Switch Announcement - CSA)
+ * in the beacon for some time (as defined in the
+ * %NL80211_ATTR_CH_SWITCH_COUNT parameter) and then change to the
+ * new channel. Userspace provides the new channel information (using
+ * %NL80211_ATTR_WIPHY_FREQ and the attributes determining channel
+ * width). %NL80211_ATTR_CH_SWITCH_BLOCK_TX may be supplied to inform
+ * other station that transmission must be blocked until the channel
+ * switch is complete.
+ *
+ * @NL80211_CMD_VENDOR: Vendor-specified command/event. The command is specified
+ * by the %NL80211_ATTR_VENDOR_ID attribute and a sub-command in
+ * %NL80211_ATTR_VENDOR_SUBCMD. Parameter(s) can be transported in
+ * %NL80211_ATTR_VENDOR_DATA.
+ * For feature advertisement, the %NL80211_ATTR_VENDOR_DATA attribute is
+ * used in the wiphy data as a nested attribute containing descriptions
+ * (&struct nl80211_vendor_cmd_info) of the supported vendor commands.
+ * This may also be sent as an event with the same attributes.
+ *
+ * @NL80211_CMD_SET_QOS_MAP: Set Interworking QoS mapping for IP DSCP values.
+ * The QoS mapping information is included in %NL80211_ATTR_QOS_MAP. If
+ * that attribute is not included, QoS mapping is disabled. Since this
+ * QoS mapping is relevant for IP packets, it is only valid during an
+ * association. This is cleared on disassociation and AP restart.
+ *
+ * @NL80211_CMD_ADD_TX_TS: Ask the kernel to add a traffic stream for the given
+ * %NL80211_ATTR_TSID and %NL80211_ATTR_MAC with %NL80211_ATTR_USER_PRIO
+ * and %NL80211_ATTR_ADMITTED_TIME parameters.
+ * Note that the action frame handshake with the AP shall be handled by
+ * userspace via the normal management RX/TX framework, this only sets
+ * up the TX TS in the driver/device.
+ * If the admitted time attribute is not added then the request just checks
+ * if a subsequent setup could be successful, the intent is to use this to
+ * avoid setting up a session with the AP when local restrictions would
+ * make that impossible. However, the subsequent "real" setup may still
+ * fail even if the check was successful.
+ * @NL80211_CMD_DEL_TX_TS: Remove an existing TS with the %NL80211_ATTR_TSID
+ * and %NL80211_ATTR_MAC parameters. It isn't necessary to call this
+ * before removing a station entry entirely, or before disassociating
+ * or similar, cleanup will happen in the driver/device in this case.
+ *
+ * @NL80211_CMD_GET_MPP: Get mesh path attributes for mesh proxy path to
+ * destination %NL80211_ATTR_MAC on the interface identified by
+ * %NL80211_ATTR_IFINDEX.
+ *
+ * @NL80211_CMD_JOIN_OCB: Join the OCB network. The center frequency and
+ * bandwidth of a channel must be given.
+ * @NL80211_CMD_LEAVE_OCB: Leave the OCB network -- no special arguments, the
+ * network is determined by the network interface.
+ *
+ * @NL80211_CMD_TDLS_CHANNEL_SWITCH: Start channel-switching with a TDLS peer,
+ * identified by the %NL80211_ATTR_MAC parameter. A target channel is
+ * provided via %NL80211_ATTR_WIPHY_FREQ and other attributes determining
+ * channel width/type. The target operating class is given via
+ * %NL80211_ATTR_OPER_CLASS.
+ * The driver is responsible for continually initiating channel-switching
+ * operations and returning to the base channel for communication with the
+ * AP.
+ * @NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH: Stop channel-switching with a TDLS
+ * peer given by %NL80211_ATTR_MAC. Both peers must be on the base channel
+ * when this command completes.
+ *
+ * @NL80211_CMD_WIPHY_REG_CHANGE: Similar to %NL80211_CMD_REG_CHANGE, but used
+ * as an event to indicate changes for devices with wiphy-specific regdom
+ * management.
+ *
+ * @NL80211_CMD_ABORT_SCAN: Stop an ongoing scan. Returns -ENOENT if a scan is
+ * not running. The driver indicates the status of the scan through
+ * cfg80211_scan_done().
+ *
+ * @NL80211_CMD_START_NAN: Start NAN operation, identified by its
+ * %NL80211_ATTR_WDEV interface. This interface must have been
+ * previously created with %NL80211_CMD_NEW_INTERFACE. After it
+ * has been started, the NAN interface will create or join a
+ * cluster. This command must have a valid
+ * %NL80211_ATTR_NAN_MASTER_PREF attribute and optional
+ * %NL80211_ATTR_BANDS attributes. If %NL80211_ATTR_BANDS is
+ * omitted or set to 0, it means don't-care and the device will
+ * decide what to use. After this command NAN functions can be
+ * added.
+ * @NL80211_CMD_STOP_NAN: Stop the NAN operation, identified by
+ * its %NL80211_ATTR_WDEV interface.
+ * @NL80211_CMD_ADD_NAN_FUNCTION: Add a NAN function. The function is defined
+ * with %NL80211_ATTR_NAN_FUNC nested attribute. When called, this
+ * operation returns the strictly positive and unique instance id
+ * (%NL80211_ATTR_NAN_FUNC_INST_ID) and a cookie (%NL80211_ATTR_COOKIE)
+ * of the function upon success.
+ * Since instance ID's can be re-used, this cookie is the right
+ * way to identify the function. This will avoid races when a termination
+ * event is handled by the user space after it has already added a new
+ * function that got the same instance id from the kernel as the one
+ * which just terminated.
+ * This cookie may be used in NAN events even before the command
+ * returns, so userspace shouldn't process NAN events until it processes
+ * the response to this command.
+ * Look at %NL80211_ATTR_SOCKET_OWNER as well.
+ * @NL80211_CMD_DEL_NAN_FUNCTION: Delete a NAN function by cookie.
+ * This command is also used as a notification sent when a NAN function is
+ * terminated. This will contain a %NL80211_ATTR_NAN_FUNC_INST_ID
+ * and %NL80211_ATTR_COOKIE attributes.
+ * @NL80211_CMD_CHANGE_NAN_CONFIG: Change current NAN
+ * configuration. NAN must be operational (%NL80211_CMD_START_NAN
+ * was executed). It must contain at least one of the following
+ * attributes: %NL80211_ATTR_NAN_MASTER_PREF,
+ * %NL80211_ATTR_BANDS. If %NL80211_ATTR_BANDS is omitted, the
+ * current configuration is not changed. If it is present but
+ * set to zero, the configuration is changed to don't-care
+ * (i.e. the device can decide what to do).
+ * @NL80211_CMD_NAN_FUNC_MATCH: Notification sent when a match is reported.
+ * This will contain a %NL80211_ATTR_NAN_MATCH nested attribute and
+ * %NL80211_ATTR_COOKIE.
+ *
+ * @NL80211_CMD_UPDATE_CONNECT_PARAMS: Update one or more connect parameters
+ * for subsequent roaming cases if the driver or firmware uses internal
+ * BSS selection. This command can be issued only while connected and it
+ * does not result in a change for the current association. Currently,
+ * only the %NL80211_ATTR_IE data is used and updated with this command.
+ *
+ * @NL80211_CMD_SET_PMK: For offloaded 4-Way handshake, set the PMK or PMK-R0
+ * for the given authenticator address (specified with %NL80211_ATTR_MAC).
+ * When %NL80211_ATTR_PMKR0_NAME is set, %NL80211_ATTR_PMK specifies the
+ * PMK-R0, otherwise it specifies the PMK.
+ * @NL80211_CMD_DEL_PMK: For offloaded 4-Way handshake, delete the previously
+ * configured PMK for the authenticator address identified by
+ * %NL80211_ATTR_MAC.
+ * @NL80211_CMD_PORT_AUTHORIZED: An event that indicates that the 4 way
+ * handshake was completed successfully by the driver. The BSSID is
+ * specified with %NL80211_ATTR_MAC. Drivers that support 4 way handshake
+ * offload should send this event after indicating 802.11 association with
+ * %NL80211_CMD_CONNECT or %NL80211_CMD_ROAM. If the 4 way handshake failed
+ * %NL80211_CMD_DISCONNECT should be indicated instead.
+ *
+ * @NL80211_CMD_CONTROL_PORT_FRAME: Control Port (e.g. PAE) frame TX request
+ * and RX notification. This command is used both as a request to transmit
+ * a control port frame and as a notification that a control port frame
+ * has been received. %NL80211_ATTR_FRAME is used to specify the
+ * frame contents. The frame is the raw EAPoL data, without ethernet or
+ * 802.11 headers.
+ * When used as an event indication %NL80211_ATTR_CONTROL_PORT_ETHERTYPE,
+ * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT and %NL80211_ATTR_MAC are added
+ * indicating the protocol type of the received frame; whether the frame
+ * was received unencrypted and the MAC address of the peer respectively.
+ *
+ * @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded.
+ *
+ * @NL80211_CMD_EXTERNAL_AUTH: This interface is exclusively defined for host
+ * drivers that do not define separate commands for authentication and
+ * association, but rely on user space for the authentication to happen.
+ * This interface acts both as the event request (driver to user space)
+ * to trigger the authentication and command response (userspace to
+ * driver) to indicate the authentication status.
+ *
+ * User space uses the %NL80211_CMD_CONNECT command to the host driver to
+ * trigger a connection. The host driver selects a BSS and further uses
+ * this interface to offload only the authentication part to the user
+ * space. Authentication frames are passed between the driver and user
+ * space through the %NL80211_CMD_FRAME interface. Host driver proceeds
+ * further with the association after getting successful authentication
+ * status. User space indicates the authentication status through
+ * %NL80211_ATTR_STATUS_CODE attribute in %NL80211_CMD_EXTERNAL_AUTH
+ * command interface.
+ *
+ * Host driver reports this status on an authentication failure to the
+ * user space through the connect result as the user space would have
+ * initiated the connection through the connect request.
+ *
+ * @NL80211_CMD_STA_OPMODE_CHANGED: An event that notify station's
+ * ht opmode or vht opmode changes using any of %NL80211_ATTR_SMPS_MODE,
+ * %NL80211_ATTR_CHANNEL_WIDTH,%NL80211_ATTR_NSS attributes with its
+ * address(specified in %NL80211_ATTR_MAC).
+ *
+ * @NL80211_CMD_GET_FTM_RESPONDER_STATS: Retrieve FTM responder statistics, in
+ * the %NL80211_ATTR_FTM_RESPONDER_STATS attribute.
+ *
+ * @NL80211_CMD_PEER_MEASUREMENT_START: start a (set of) peer measurement(s)
+ * with the given parameters, which are encapsulated in the nested
+ * %NL80211_ATTR_PEER_MEASUREMENTS attribute. Optionally, MAC address
+ * randomization may be enabled and configured by specifying the
+ * %NL80211_ATTR_MAC and %NL80211_ATTR_MAC_MASK attributes.
+ * If a timeout is requested, use the %NL80211_ATTR_TIMEOUT attribute.
+ * A u64 cookie for further %NL80211_ATTR_COOKIE use is is returned in
+ * the netlink extended ack message.
+ *
+ * To cancel a measurement, close the socket that requested it.
+ *
+ * Measurement results are reported to the socket that requested the
+ * measurement using @NL80211_CMD_PEER_MEASUREMENT_RESULT when they
+ * become available, so applications must ensure a large enough socket
+ * buffer size.
+ *
+ * Depending on driver support it may or may not be possible to start
+ * multiple concurrent measurements.
+ * @NL80211_CMD_PEER_MEASUREMENT_RESULT: This command number is used for the
+ * result notification from the driver to the requesting socket.
+ * @NL80211_CMD_PEER_MEASUREMENT_COMPLETE: Notification only, indicating that
+ * the measurement completed, using the measurement cookie
+ * (%NL80211_ATTR_COOKIE).
+ *
+ * @NL80211_CMD_NOTIFY_RADAR: Notify the kernel that a radar signal was
+ * detected and reported by a neighboring device on the channel
+ * indicated by %NL80211_ATTR_WIPHY_FREQ and other attributes
+ * determining the width and type.
+ *
+ * @NL80211_CMD_UPDATE_OWE_INFO: This interface allows the host driver to
+ * offload OWE processing to user space. This intends to support
+ * OWE AKM by the host drivers that implement SME but rely
+ * on the user space for the cryptographic/DH IE processing in AP mode.
+ *
+ * @NL80211_CMD_PROBE_MESH_LINK: The requirement for mesh link metric
+ * refreshing, is that from one mesh point we be able to send some data
+ * frames to other mesh points which are not currently selected as a
+ * primary traffic path, but which are only 1 hop away. The absence of
+ * the primary path to the chosen node makes it necessary to apply some
+ * form of marking on a chosen packet stream so that the packets can be
+ * properly steered to the selected node for testing, and not by the
+ * regular mesh path lookup. Further, the packets must be of type data
+ * so that the rate control (often embedded in firmware) is used for
+ * rate selection.
+ *
+ * Here attribute %NL80211_ATTR_MAC is used to specify connected mesh
+ * peer MAC address and %NL80211_ATTR_FRAME is used to specify the frame
+ * content. The frame is ethernet data.
+ *
+ * @NL80211_CMD_MAX: highest used command number
+ * @__NL80211_CMD_AFTER_LAST: internal use
+ */
+enum nl80211_commands {
+/* don't change the order or add anything between, this is ABI! */
+ NL80211_CMD_UNSPEC,
+
+ NL80211_CMD_GET_WIPHY, /* can dump */
+ NL80211_CMD_SET_WIPHY,
+ NL80211_CMD_NEW_WIPHY,
+ NL80211_CMD_DEL_WIPHY,
+
+ NL80211_CMD_GET_INTERFACE, /* can dump */
+ NL80211_CMD_SET_INTERFACE,
+ NL80211_CMD_NEW_INTERFACE,
+ NL80211_CMD_DEL_INTERFACE,
+
+ NL80211_CMD_GET_KEY,
+ NL80211_CMD_SET_KEY,
+ NL80211_CMD_NEW_KEY,
+ NL80211_CMD_DEL_KEY,
+
+ NL80211_CMD_GET_BEACON,
+ NL80211_CMD_SET_BEACON,
+ NL80211_CMD_START_AP,
+ NL80211_CMD_NEW_BEACON = NL80211_CMD_START_AP,
+ NL80211_CMD_STOP_AP,
+ NL80211_CMD_DEL_BEACON = NL80211_CMD_STOP_AP,
+
+ NL80211_CMD_GET_STATION,
+ NL80211_CMD_SET_STATION,
+ NL80211_CMD_NEW_STATION,
+ NL80211_CMD_DEL_STATION,
+
+ NL80211_CMD_GET_MPATH,
+ NL80211_CMD_SET_MPATH,
+ NL80211_CMD_NEW_MPATH,
+ NL80211_CMD_DEL_MPATH,
+
+ NL80211_CMD_SET_BSS,
+
+ NL80211_CMD_SET_REG,
+ NL80211_CMD_REQ_SET_REG,
+
+ NL80211_CMD_GET_MESH_CONFIG,
+ NL80211_CMD_SET_MESH_CONFIG,
+
+ NL80211_CMD_SET_MGMT_EXTRA_IE /* reserved; not used */,
+
+ NL80211_CMD_GET_REG,
+
+ NL80211_CMD_GET_SCAN,
+ NL80211_CMD_TRIGGER_SCAN,
+ NL80211_CMD_NEW_SCAN_RESULTS,
+ NL80211_CMD_SCAN_ABORTED,
+
+ NL80211_CMD_REG_CHANGE,
+
+ NL80211_CMD_AUTHENTICATE,
+ NL80211_CMD_ASSOCIATE,
+ NL80211_CMD_DEAUTHENTICATE,
+ NL80211_CMD_DISASSOCIATE,
+
+ NL80211_CMD_MICHAEL_MIC_FAILURE,
+
+ NL80211_CMD_REG_BEACON_HINT,
+
+ NL80211_CMD_JOIN_IBSS,
+ NL80211_CMD_LEAVE_IBSS,
+
+ NL80211_CMD_TESTMODE,
+
+ NL80211_CMD_CONNECT,
+ NL80211_CMD_ROAM,
+ NL80211_CMD_DISCONNECT,
+
+ NL80211_CMD_SET_WIPHY_NETNS,
+
+ NL80211_CMD_GET_SURVEY,
+ NL80211_CMD_NEW_SURVEY_RESULTS,
+
+ NL80211_CMD_SET_PMKSA,
+ NL80211_CMD_DEL_PMKSA,
+ NL80211_CMD_FLUSH_PMKSA,
+
+ NL80211_CMD_REMAIN_ON_CHANNEL,
+ NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
+
+ NL80211_CMD_SET_TX_BITRATE_MASK,
+
+ NL80211_CMD_REGISTER_FRAME,
+ NL80211_CMD_REGISTER_ACTION = NL80211_CMD_REGISTER_FRAME,
+ NL80211_CMD_FRAME,
+ NL80211_CMD_ACTION = NL80211_CMD_FRAME,
+ NL80211_CMD_FRAME_TX_STATUS,
+ NL80211_CMD_ACTION_TX_STATUS = NL80211_CMD_FRAME_TX_STATUS,
+
+ NL80211_CMD_SET_POWER_SAVE,
+ NL80211_CMD_GET_POWER_SAVE,
+
+ NL80211_CMD_SET_CQM,
+ NL80211_CMD_NOTIFY_CQM,
+
+ NL80211_CMD_SET_CHANNEL,
+ NL80211_CMD_SET_WDS_PEER,
+
+ NL80211_CMD_FRAME_WAIT_CANCEL,
+
+ NL80211_CMD_JOIN_MESH,
+ NL80211_CMD_LEAVE_MESH,
+
+ NL80211_CMD_UNPROT_DEAUTHENTICATE,
+ NL80211_CMD_UNPROT_DISASSOCIATE,
+
+ NL80211_CMD_NEW_PEER_CANDIDATE,
+
+ NL80211_CMD_GET_WOWLAN,
+ NL80211_CMD_SET_WOWLAN,
+
+ NL80211_CMD_START_SCHED_SCAN,
+ NL80211_CMD_STOP_SCHED_SCAN,
+ NL80211_CMD_SCHED_SCAN_RESULTS,
+ NL80211_CMD_SCHED_SCAN_STOPPED,
+
+ NL80211_CMD_SET_REKEY_OFFLOAD,
+
+ NL80211_CMD_PMKSA_CANDIDATE,
+
+ NL80211_CMD_TDLS_OPER,
+ NL80211_CMD_TDLS_MGMT,
+
+ NL80211_CMD_UNEXPECTED_FRAME,
+
+ NL80211_CMD_PROBE_CLIENT,
+
+ NL80211_CMD_REGISTER_BEACONS,
+
+ NL80211_CMD_UNEXPECTED_4ADDR_FRAME,
+
+ NL80211_CMD_SET_NOACK_MAP,
+
+ NL80211_CMD_CH_SWITCH_NOTIFY,
+
+ NL80211_CMD_START_P2P_DEVICE,
+ NL80211_CMD_STOP_P2P_DEVICE,
+
+ NL80211_CMD_CONN_FAILED,
+
+ NL80211_CMD_SET_MCAST_RATE,
+
+ NL80211_CMD_SET_MAC_ACL,
+
+ NL80211_CMD_RADAR_DETECT,
+
+ NL80211_CMD_GET_PROTOCOL_FEATURES,
+
+ NL80211_CMD_UPDATE_FT_IES,
+ NL80211_CMD_FT_EVENT,
+
+ NL80211_CMD_CRIT_PROTOCOL_START,
+ NL80211_CMD_CRIT_PROTOCOL_STOP,
+
+ NL80211_CMD_GET_COALESCE,
+ NL80211_CMD_SET_COALESCE,
+
+ NL80211_CMD_CHANNEL_SWITCH,
+
+ NL80211_CMD_VENDOR,
+
+ NL80211_CMD_SET_QOS_MAP,
+
+ NL80211_CMD_ADD_TX_TS,
+ NL80211_CMD_DEL_TX_TS,
+
+ NL80211_CMD_GET_MPP,
+
+ NL80211_CMD_JOIN_OCB,
+ NL80211_CMD_LEAVE_OCB,
+
+ NL80211_CMD_CH_SWITCH_STARTED_NOTIFY,
+
+ NL80211_CMD_TDLS_CHANNEL_SWITCH,
+ NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH,
+
+ NL80211_CMD_WIPHY_REG_CHANGE,
+
+ NL80211_CMD_ABORT_SCAN,
+
+ NL80211_CMD_START_NAN,
+ NL80211_CMD_STOP_NAN,
+ NL80211_CMD_ADD_NAN_FUNCTION,
+ NL80211_CMD_DEL_NAN_FUNCTION,
+ NL80211_CMD_CHANGE_NAN_CONFIG,
+ NL80211_CMD_NAN_MATCH,
+
+ NL80211_CMD_SET_MULTICAST_TO_UNICAST,
+
+ NL80211_CMD_UPDATE_CONNECT_PARAMS,
+
+ NL80211_CMD_SET_PMK,
+ NL80211_CMD_DEL_PMK,
+
+ NL80211_CMD_PORT_AUTHORIZED,
+
+ NL80211_CMD_RELOAD_REGDB,
+
+ NL80211_CMD_EXTERNAL_AUTH,
+
+ NL80211_CMD_STA_OPMODE_CHANGED,
+
+ NL80211_CMD_CONTROL_PORT_FRAME,
+
+ NL80211_CMD_GET_FTM_RESPONDER_STATS,
+
+ NL80211_CMD_PEER_MEASUREMENT_START,
+ NL80211_CMD_PEER_MEASUREMENT_RESULT,
+ NL80211_CMD_PEER_MEASUREMENT_COMPLETE,
+
+ NL80211_CMD_NOTIFY_RADAR,
+
+ NL80211_CMD_UPDATE_OWE_INFO,
+
+ NL80211_CMD_PROBE_MESH_LINK,
+
+ /* add new commands above here */
+
+ /* used to define NL80211_CMD_MAX below */
+ __NL80211_CMD_AFTER_LAST,
+ NL80211_CMD_MAX = __NL80211_CMD_AFTER_LAST - 1
+};
+
+/*
+ * Allow user space programs to use #ifdef on new commands by defining them
+ * here
+ */
+#define NL80211_CMD_SET_BSS NL80211_CMD_SET_BSS
+#define NL80211_CMD_SET_MGMT_EXTRA_IE NL80211_CMD_SET_MGMT_EXTRA_IE
+#define NL80211_CMD_REG_CHANGE NL80211_CMD_REG_CHANGE
+#define NL80211_CMD_AUTHENTICATE NL80211_CMD_AUTHENTICATE
+#define NL80211_CMD_ASSOCIATE NL80211_CMD_ASSOCIATE
+#define NL80211_CMD_DEAUTHENTICATE NL80211_CMD_DEAUTHENTICATE
+#define NL80211_CMD_DISASSOCIATE NL80211_CMD_DISASSOCIATE
+#define NL80211_CMD_REG_BEACON_HINT NL80211_CMD_REG_BEACON_HINT
+
+#define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS
+
+/* source-level API compatibility */
+#define NL80211_CMD_GET_MESH_PARAMS NL80211_CMD_GET_MESH_CONFIG
+#define NL80211_CMD_SET_MESH_PARAMS NL80211_CMD_SET_MESH_CONFIG
+#define NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE NL80211_MESH_SETUP_IE
+
+/**
+ * enum nl80211_attrs - nl80211 netlink attributes
+ *
+ * @NL80211_ATTR_UNSPEC: unspecified attribute to catch errors
+ *
+ * @NL80211_ATTR_WIPHY: index of wiphy to operate on, cf.
+ * /sys/class/ieee80211/<phyname>/index
+ * @NL80211_ATTR_WIPHY_NAME: wiphy name (used for renaming)
+ * @NL80211_ATTR_WIPHY_TXQ_PARAMS: a nested array of TX queue parameters
+ * @NL80211_ATTR_WIPHY_FREQ: frequency of the selected channel in MHz,
+ * defines the channel together with the (deprecated)
+ * %NL80211_ATTR_WIPHY_CHANNEL_TYPE attribute or the attributes
+ * %NL80211_ATTR_CHANNEL_WIDTH and if needed %NL80211_ATTR_CENTER_FREQ1
+ * and %NL80211_ATTR_CENTER_FREQ2
+ * @NL80211_ATTR_CHANNEL_WIDTH: u32 attribute containing one of the values
+ * of &enum nl80211_chan_width, describing the channel width. See the
+ * documentation of the enum for more information.
+ * @NL80211_ATTR_CENTER_FREQ1: Center frequency of the first part of the
+ * channel, used for anything but 20 MHz bandwidth
+ * @NL80211_ATTR_CENTER_FREQ2: Center frequency of the second part of the
+ * channel, used only for 80+80 MHz bandwidth
+ * @NL80211_ATTR_WIPHY_CHANNEL_TYPE: included with NL80211_ATTR_WIPHY_FREQ
+ * if HT20 or HT40 are to be used (i.e., HT disabled if not included):
+ * NL80211_CHAN_NO_HT = HT not allowed (i.e., same as not including
+ * this attribute)
+ * NL80211_CHAN_HT20 = HT20 only
+ * NL80211_CHAN_HT40MINUS = secondary channel is below the primary channel
+ * NL80211_CHAN_HT40PLUS = secondary channel is above the primary channel
+ * This attribute is now deprecated.
+ * @NL80211_ATTR_WIPHY_RETRY_SHORT: TX retry limit for frames whose length is
+ * less than or equal to the RTS threshold; allowed range: 1..255;
+ * dot11ShortRetryLimit; u8
+ * @NL80211_ATTR_WIPHY_RETRY_LONG: TX retry limit for frames whose length is
+ * greater than the RTS threshold; allowed range: 1..255;
+ * dot11ShortLongLimit; u8
+ * @NL80211_ATTR_WIPHY_FRAG_THRESHOLD: fragmentation threshold, i.e., maximum
+ * length in octets for frames; allowed range: 256..8000, disable
+ * fragmentation with (u32)-1; dot11FragmentationThreshold; u32
+ * @NL80211_ATTR_WIPHY_RTS_THRESHOLD: RTS threshold (TX frames with length
+ * larger than or equal to this use RTS/CTS handshake); allowed range:
+ * 0..65536, disable with (u32)-1; dot11RTSThreshold; u32
+ * @NL80211_ATTR_WIPHY_COVERAGE_CLASS: Coverage Class as defined by IEEE 802.11
+ * section 7.3.2.9; dot11CoverageClass; u8
+ *
+ * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on
+ * @NL80211_ATTR_IFNAME: network interface name
+ * @NL80211_ATTR_IFTYPE: type of virtual interface, see &enum nl80211_iftype
+ *
+ * @NL80211_ATTR_WDEV: wireless device identifier, used for pseudo-devices
+ * that don't have a netdev (u64)
+ *
+ * @NL80211_ATTR_MAC: MAC address (various uses)
+ *
+ * @NL80211_ATTR_KEY_DATA: (temporal) key data; for TKIP this consists of
+ * 16 bytes encryption key followed by 8 bytes each for TX and RX MIC
+ * keys
+ * @NL80211_ATTR_KEY_IDX: key ID (u8, 0-3)
+ * @NL80211_ATTR_KEY_CIPHER: key cipher suite (u32, as defined by IEEE 802.11
+ * section 7.3.2.25.1, e.g. 0x000FAC04)
+ * @NL80211_ATTR_KEY_SEQ: transmit key sequence number (IV/PN) for TKIP and
+ * CCMP keys, each six bytes in little endian
+ * @NL80211_ATTR_KEY_DEFAULT: Flag attribute indicating the key is default key
+ * @NL80211_ATTR_KEY_DEFAULT_MGMT: Flag attribute indicating the key is the
+ * default management key
+ * @NL80211_ATTR_CIPHER_SUITES_PAIRWISE: For crypto settings for connect or
+ * other commands, indicates which pairwise cipher suites are used
+ * @NL80211_ATTR_CIPHER_SUITE_GROUP: For crypto settings for connect or
+ * other commands, indicates which group cipher suite is used
+ *
+ * @NL80211_ATTR_BEACON_INTERVAL: beacon interval in TU
+ * @NL80211_ATTR_DTIM_PERIOD: DTIM period for beaconing
+ * @NL80211_ATTR_BEACON_HEAD: portion of the beacon before the TIM IE
+ * @NL80211_ATTR_BEACON_TAIL: portion of the beacon after the TIM IE
+ *
+ * @NL80211_ATTR_STA_AID: Association ID for the station (u16)
+ * @NL80211_ATTR_STA_FLAGS: flags, nested element with NLA_FLAG attributes of
+ * &enum nl80211_sta_flags (deprecated, use %NL80211_ATTR_STA_FLAGS2)
+ * @NL80211_ATTR_STA_LISTEN_INTERVAL: listen interval as defined by
+ * IEEE 802.11 7.3.1.6 (u16).
+ * @NL80211_ATTR_STA_SUPPORTED_RATES: supported rates, array of supported
+ * rates as defined by IEEE 802.11 7.3.2.2 but without the length
+ * restriction (at most %NL80211_MAX_SUPP_RATES).
+ * @NL80211_ATTR_STA_VLAN: interface index of VLAN interface to move station
+ * to, or the AP interface the station was originally added to.
+ * @NL80211_ATTR_STA_INFO: information about a station, part of station info
+ * given for %NL80211_CMD_GET_STATION, nested attribute containing
+ * info as possible, see &enum nl80211_sta_info.
+ *
+ * @NL80211_ATTR_WIPHY_BANDS: Information about an operating bands,
+ * consisting of a nested array.
+ *
+ * @NL80211_ATTR_MESH_ID: mesh id (1-32 bytes).
+ * @NL80211_ATTR_STA_PLINK_ACTION: action to perform on the mesh peer link
+ * (see &enum nl80211_plink_action).
+ * @NL80211_ATTR_MPATH_NEXT_HOP: MAC address of the next hop for a mesh path.
+ * @NL80211_ATTR_MPATH_INFO: information about a mesh_path, part of mesh path
+ * info given for %NL80211_CMD_GET_MPATH, nested attribute described at
+ * &enum nl80211_mpath_info.
+ *
+ * @NL80211_ATTR_MNTR_FLAGS: flags, nested element with NLA_FLAG attributes of
+ * &enum nl80211_mntr_flags.
+ *
+ * @NL80211_ATTR_REG_ALPHA2: an ISO-3166-alpha2 country code for which the
+ * current regulatory domain should be set to or is already set to.
+ * For example, 'CR', for Costa Rica. This attribute is used by the kernel
+ * to query the CRDA to retrieve one regulatory domain. This attribute can
+ * also be used by userspace to query the kernel for the currently set
+ * regulatory domain. We chose an alpha2 as that is also used by the
+ * IEEE-802.11 country information element to identify a country.
+ * Users can also simply ask the wireless core to set regulatory domain
+ * to a specific alpha2.
+ * @NL80211_ATTR_REG_RULES: a nested array of regulatory domain regulatory
+ * rules.
+ *
+ * @NL80211_ATTR_BSS_CTS_PROT: whether CTS protection is enabled (u8, 0 or 1)
+ * @NL80211_ATTR_BSS_SHORT_PREAMBLE: whether short preamble is enabled
+ * (u8, 0 or 1)
+ * @NL80211_ATTR_BSS_SHORT_SLOT_TIME: whether short slot time enabled
+ * (u8, 0 or 1)
+ * @NL80211_ATTR_BSS_BASIC_RATES: basic rates, array of basic
+ * rates in format defined by IEEE 802.11 7.3.2.2 but without the length
+ * restriction (at most %NL80211_MAX_SUPP_RATES).
+ *
+ * @NL80211_ATTR_HT_CAPABILITY: HT Capability information element (from
+ * association request when used with NL80211_CMD_NEW_STATION)
+ *
+ * @NL80211_ATTR_SUPPORTED_IFTYPES: nested attribute containing all
+ * supported interface types, each a flag attribute with the number
+ * of the interface mode.
+ *
+ * @NL80211_ATTR_MGMT_SUBTYPE: Management frame subtype for
+ * %NL80211_CMD_SET_MGMT_EXTRA_IE.
+ *
+ * @NL80211_ATTR_IE: Information element(s) data (used, e.g., with
+ * %NL80211_CMD_SET_MGMT_EXTRA_IE).
+ *
+ * @NL80211_ATTR_MAX_NUM_SCAN_SSIDS: number of SSIDs you can scan with
+ * a single scan request, a wiphy attribute.
+ * @NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS: number of SSIDs you can
+ * scan with a single scheduled scan request, a wiphy attribute.
+ * @NL80211_ATTR_MAX_SCAN_IE_LEN: maximum length of information elements
+ * that can be added to a scan request
+ * @NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN: maximum length of information
+ * elements that can be added to a scheduled scan request
+ * @NL80211_ATTR_MAX_MATCH_SETS: maximum number of sets that can be
+ * used with @NL80211_ATTR_SCHED_SCAN_MATCH, a wiphy attribute.
+ *
+ * @NL80211_ATTR_SCAN_FREQUENCIES: nested attribute with frequencies (in MHz)
+ * @NL80211_ATTR_SCAN_SSIDS: nested attribute with SSIDs, leave out for passive
+ * scanning and include a zero-length SSID (wildcard) for wildcard scan
+ * @NL80211_ATTR_BSS: scan result BSS
+ *
+ * @NL80211_ATTR_REG_INITIATOR: indicates who requested the regulatory domain
+ * currently in effect. This could be any of the %NL80211_REGDOM_SET_BY_*
+ * @NL80211_ATTR_REG_TYPE: indicates the type of the regulatory domain currently
+ * set. This can be one of the nl80211_reg_type (%NL80211_REGDOM_TYPE_*)
+ *
+ * @NL80211_ATTR_SUPPORTED_COMMANDS: wiphy attribute that specifies
+ * an array of command numbers (i.e. a mapping index to command number)
+ * that the driver for the given wiphy supports.
+ *
+ * @NL80211_ATTR_FRAME: frame data (binary attribute), including frame header
+ * and body, but not FCS; used, e.g., with NL80211_CMD_AUTHENTICATE and
+ * NL80211_CMD_ASSOCIATE events
+ * @NL80211_ATTR_SSID: SSID (binary attribute, 0..32 octets)
+ * @NL80211_ATTR_AUTH_TYPE: AuthenticationType, see &enum nl80211_auth_type,
+ * represented as a u32
+ * @NL80211_ATTR_REASON_CODE: ReasonCode for %NL80211_CMD_DEAUTHENTICATE and
+ * %NL80211_CMD_DISASSOCIATE, u16
+ *
+ * @NL80211_ATTR_KEY_TYPE: Key Type, see &enum nl80211_key_type, represented as
+ * a u32
+ *
+ * @NL80211_ATTR_FREQ_BEFORE: A channel which has suffered a regulatory change
+ * due to considerations from a beacon hint. This attribute reflects
+ * the state of the channel _before_ the beacon hint processing. This
+ * attributes consists of a nested attribute containing
+ * NL80211_FREQUENCY_ATTR_*
+ * @NL80211_ATTR_FREQ_AFTER: A channel which has suffered a regulatory change
+ * due to considerations from a beacon hint. This attribute reflects
+ * the state of the channel _after_ the beacon hint processing. This
+ * attributes consists of a nested attribute containing
+ * NL80211_FREQUENCY_ATTR_*
+ *
+ * @NL80211_ATTR_CIPHER_SUITES: a set of u32 values indicating the supported
+ * cipher suites
+ *
+ * @NL80211_ATTR_FREQ_FIXED: a flag indicating the IBSS should not try to look
+ * for other networks on different channels
+ *
+ * @NL80211_ATTR_TIMED_OUT: a flag indicating than an operation timed out; this
+ * is used, e.g., with %NL80211_CMD_AUTHENTICATE event
+ *
+ * @NL80211_ATTR_USE_MFP: Whether management frame protection (IEEE 802.11w) is
+ * used for the association (&enum nl80211_mfp, represented as a u32);
+ * this attribute can be used with %NL80211_CMD_ASSOCIATE and
+ * %NL80211_CMD_CONNECT requests. %NL80211_MFP_OPTIONAL is not allowed for
+ * %NL80211_CMD_ASSOCIATE since user space SME is expected and hence, it
+ * must have decided whether to use management frame protection or not.
+ * Setting %NL80211_MFP_OPTIONAL with a %NL80211_CMD_CONNECT request will
+ * let the driver (or the firmware) decide whether to use MFP or not.
+ *
+ * @NL80211_ATTR_STA_FLAGS2: Attribute containing a
+ * &struct nl80211_sta_flag_update.
+ *
+ * @NL80211_ATTR_CONTROL_PORT: A flag indicating whether user space controls
+ * IEEE 802.1X port, i.e., sets/clears %NL80211_STA_FLAG_AUTHORIZED, in
+ * station mode. If the flag is included in %NL80211_CMD_ASSOCIATE
+ * request, the driver will assume that the port is unauthorized until
+ * authorized by user space. Otherwise, port is marked authorized by
+ * default in station mode.
+ * @NL80211_ATTR_CONTROL_PORT_ETHERTYPE: A 16-bit value indicating the
+ * ethertype that will be used for key negotiation. It can be
+ * specified with the associate and connect commands. If it is not
+ * specified, the value defaults to 0x888E (PAE, 802.1X). This
+ * attribute is also used as a flag in the wiphy information to
+ * indicate that protocols other than PAE are supported.
+ * @NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT: When included along with
+ * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, indicates that the custom
+ * ethertype frames used for key negotiation must not be encrypted.
+ * @NL80211_ATTR_CONTROL_PORT_OVER_NL80211: A flag indicating whether control
+ * port frames (e.g. of type given in %NL80211_ATTR_CONTROL_PORT_ETHERTYPE)
+ * will be sent directly to the network interface or sent via the NL80211
+ * socket. If this attribute is missing, then legacy behavior of sending
+ * control port frames directly to the network interface is used. If the
+ * flag is included, then control port frames are sent over NL80211 instead
+ * using %CMD_CONTROL_PORT_FRAME. If control port routing over NL80211 is
+ * to be used then userspace must also use the %NL80211_ATTR_SOCKET_OWNER
+ * flag.
+ *
+ * @NL80211_ATTR_TESTDATA: Testmode data blob, passed through to the driver.
+ * We recommend using nested, driver-specific attributes within this.
+ *
+ * @NL80211_ATTR_DISCONNECTED_BY_AP: A flag indicating that the DISCONNECT
+ * event was due to the AP disconnecting the station, and not due to
+ * a local disconnect request.
+ * @NL80211_ATTR_STATUS_CODE: StatusCode for the %NL80211_CMD_CONNECT
+ * event (u16)
+ * @NL80211_ATTR_PRIVACY: Flag attribute, used with connect(), indicating
+ * that protected APs should be used. This is also used with NEW_BEACON to
+ * indicate that the BSS is to use protection.
+ *
+ * @NL80211_ATTR_CIPHERS_PAIRWISE: Used with CONNECT, ASSOCIATE, and NEW_BEACON
+ * to indicate which unicast key ciphers will be used with the connection
+ * (an array of u32).
+ * @NL80211_ATTR_CIPHER_GROUP: Used with CONNECT, ASSOCIATE, and NEW_BEACON to
+ * indicate which group key cipher will be used with the connection (a
+ * u32).
+ * @NL80211_ATTR_WPA_VERSIONS: Used with CONNECT, ASSOCIATE, and NEW_BEACON to
+ * indicate which WPA version(s) the AP we want to associate with is using
+ * (a u32 with flags from &enum nl80211_wpa_versions).
+ * @NL80211_ATTR_AKM_SUITES: Used with CONNECT, ASSOCIATE, and NEW_BEACON to
+ * indicate which key management algorithm(s) to use (an array of u32).
+ * This attribute is also sent in response to @NL80211_CMD_GET_WIPHY,
+ * indicating the supported AKM suites, intended for specific drivers which
+ * implement SME and have constraints on which AKMs are supported and also
+ * the cases where an AKM support is offloaded to the driver/firmware.
+ * If there is no such notification from the driver, user space should
+ * assume the driver supports all the AKM suites.
+ *
+ * @NL80211_ATTR_REQ_IE: (Re)association request information elements as
+ * sent out by the card, for ROAM and successful CONNECT events.
+ * @NL80211_ATTR_RESP_IE: (Re)association response information elements as
+ * sent by peer, for ROAM and successful CONNECT events.
+ *
+ * @NL80211_ATTR_PREV_BSSID: previous BSSID, to be used in ASSOCIATE and CONNECT
+ * commands to specify a request to reassociate within an ESS, i.e., to use
+ * Reassociate Request frame (with the value of this attribute in the
+ * Current AP address field) instead of Association Request frame which is
+ * used for the initial association to an ESS.
+ *
+ * @NL80211_ATTR_KEY: key information in a nested attribute with
+ * %NL80211_KEY_* sub-attributes
+ * @NL80211_ATTR_KEYS: array of keys for static WEP keys for connect()
+ * and join_ibss(), key information is in a nested attribute each
+ * with %NL80211_KEY_* sub-attributes
+ *
+ * @NL80211_ATTR_PID: Process ID of a network namespace.
+ *
+ * @NL80211_ATTR_GENERATION: Used to indicate consistent snapshots for
+ * dumps. This number increases whenever the object list being
+ * dumped changes, and as such userspace can verify that it has
+ * obtained a complete and consistent snapshot by verifying that
+ * all dump messages contain the same generation number. If it
+ * changed then the list changed and the dump should be repeated
+ * completely from scratch.
+ *
+ * @NL80211_ATTR_4ADDR: Use 4-address frames on a virtual interface
+ *
+ * @NL80211_ATTR_SURVEY_INFO: survey information about a channel, part of
+ * the survey response for %NL80211_CMD_GET_SURVEY, nested attribute
+ * containing info as possible, see &enum survey_info.
+ *
+ * @NL80211_ATTR_PMKID: PMK material for PMKSA caching.
+ * @NL80211_ATTR_MAX_NUM_PMKIDS: maximum number of PMKIDs a firmware can
+ * cache, a wiphy attribute.
+ *
+ * @NL80211_ATTR_DURATION: Duration of an operation in milliseconds, u32.
+ * @NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION: Device attribute that
+ * specifies the maximum duration that can be requested with the
+ * remain-on-channel operation, in milliseconds, u32.
+ *
+ * @NL80211_ATTR_COOKIE: Generic 64-bit cookie to identify objects.
+ *
+ * @NL80211_ATTR_TX_RATES: Nested set of attributes
+ * (enum nl80211_tx_rate_attributes) describing TX rates per band. The
+ * enum nl80211_band value is used as the index (nla_type() of the nested
+ * data. If a band is not included, it will be configured to allow all
+ * rates based on negotiated supported rates information. This attribute
+ * is used with %NL80211_CMD_SET_TX_BITRATE_MASK and with starting AP,
+ * and joining mesh networks (not IBSS yet). In the later case, it must
+ * specify just a single bitrate, which is to be used for the beacon.
+ * The driver must also specify support for this with the extended
+ * features NL80211_EXT_FEATURE_BEACON_RATE_LEGACY,
+ * NL80211_EXT_FEATURE_BEACON_RATE_HT and
+ * NL80211_EXT_FEATURE_BEACON_RATE_VHT.
+ *
+ * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain
+ * at least one byte, currently used with @NL80211_CMD_REGISTER_FRAME.
+ * @NL80211_ATTR_FRAME_TYPE: A u16 indicating the frame type/subtype for the
+ * @NL80211_CMD_REGISTER_FRAME command.
+ * @NL80211_ATTR_TX_FRAME_TYPES: wiphy capability attribute, which is a
+ * nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing
+ * information about which frame types can be transmitted with
+ * %NL80211_CMD_FRAME.
+ * @NL80211_ATTR_RX_FRAME_TYPES: wiphy capability attribute, which is a
+ * nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing
+ * information about which frame types can be registered for RX.
+ *
+ * @NL80211_ATTR_ACK: Flag attribute indicating that the frame was
+ * acknowledged by the recipient.
+ *
+ * @NL80211_ATTR_PS_STATE: powersave state, using &enum nl80211_ps_state values.
+ *
+ * @NL80211_ATTR_CQM: connection quality monitor configuration in a
+ * nested attribute with %NL80211_ATTR_CQM_* sub-attributes.
+ *
+ * @NL80211_ATTR_LOCAL_STATE_CHANGE: Flag attribute to indicate that a command
+ * is requesting a local authentication/association state change without
+ * invoking actual management frame exchange. This can be used with
+ * NL80211_CMD_AUTHENTICATE, NL80211_CMD_DEAUTHENTICATE,
+ * NL80211_CMD_DISASSOCIATE.
+ *
+ * @NL80211_ATTR_AP_ISOLATE: (AP mode) Do not forward traffic between stations
+ * connected to this BSS.
+ *
+ * @NL80211_ATTR_WIPHY_TX_POWER_SETTING: Transmit power setting type. See
+ * &enum nl80211_tx_power_setting for possible values.
+ * @NL80211_ATTR_WIPHY_TX_POWER_LEVEL: Transmit power level in signed mBm units.
+ * This is used in association with @NL80211_ATTR_WIPHY_TX_POWER_SETTING
+ * for non-automatic settings.
+ *
+ * @NL80211_ATTR_SUPPORT_IBSS_RSN: The device supports IBSS RSN, which mostly
+ * means support for per-station GTKs.
+ *
+ * @NL80211_ATTR_WIPHY_ANTENNA_TX: Bitmap of allowed antennas for transmitting.
+ * This can be used to mask out antennas which are not attached or should
+ * not be used for transmitting. If an antenna is not selected in this
+ * bitmap the hardware is not allowed to transmit on this antenna.
+ *
+ * Each bit represents one antenna, starting with antenna 1 at the first
+ * bit. Depending on which antennas are selected in the bitmap, 802.11n
+ * drivers can derive which chainmasks to use (if all antennas belonging to
+ * a particular chain are disabled this chain should be disabled) and if
+ * a chain has diversity antennas wether diversity should be used or not.
+ * HT capabilities (STBC, TX Beamforming, Antenna selection) can be
+ * derived from the available chains after applying the antenna mask.
+ * Non-802.11n drivers can derive wether to use diversity or not.
+ * Drivers may reject configurations or RX/TX mask combinations they cannot
+ * support by returning -EINVAL.
+ *
+ * @NL80211_ATTR_WIPHY_ANTENNA_RX: Bitmap of allowed antennas for receiving.
+ * This can be used to mask out antennas which are not attached or should
+ * not be used for receiving. If an antenna is not selected in this bitmap
+ * the hardware should not be configured to receive on this antenna.
+ * For a more detailed description see @NL80211_ATTR_WIPHY_ANTENNA_TX.
+ *
+ * @NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX: Bitmap of antennas which are available
+ * for configuration as TX antennas via the above parameters.
+ *
+ * @NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX: Bitmap of antennas which are available
+ * for configuration as RX antennas via the above parameters.
+ *
+ * @NL80211_ATTR_MCAST_RATE: Multicast tx rate (in 100 kbps) for IBSS
+ *
+ * @NL80211_ATTR_OFFCHANNEL_TX_OK: For management frame TX, the frame may be
+ * transmitted on another channel when the channel given doesn't match
+ * the current channel. If the current channel doesn't match and this
+ * flag isn't set, the frame will be rejected. This is also used as an
+ * nl80211 capability flag.
+ *
+ * @NL80211_ATTR_BSS_HT_OPMODE: HT operation mode (u16)
+ *
+ * @NL80211_ATTR_KEY_DEFAULT_TYPES: A nested attribute containing flags
+ * attributes, specifying what a key should be set as default as.
+ * See &enum nl80211_key_default_types.
+ *
+ * @NL80211_ATTR_MESH_SETUP: Optional mesh setup parameters. These cannot be
+ * changed once the mesh is active.
+ * @NL80211_ATTR_MESH_CONFIG: Mesh configuration parameters, a nested attribute
+ * containing attributes from &enum nl80211_meshconf_params.
+ * @NL80211_ATTR_SUPPORT_MESH_AUTH: Currently, this means the underlying driver
+ * allows auth frames in a mesh to be passed to userspace for processing via
+ * the @NL80211_MESH_SETUP_USERSPACE_AUTH flag.
+ * @NL80211_ATTR_STA_PLINK_STATE: The state of a mesh peer link as defined in
+ * &enum nl80211_plink_state. Used when userspace is driving the peer link
+ * management state machine. @NL80211_MESH_SETUP_USERSPACE_AMPE or
+ * @NL80211_MESH_SETUP_USERSPACE_MPM must be enabled.
+ *
+ * @NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED: indicates, as part of the wiphy
+ * capabilities, the supported WoWLAN triggers
+ * @NL80211_ATTR_WOWLAN_TRIGGERS: used by %NL80211_CMD_SET_WOWLAN to
+ * indicate which WoW triggers should be enabled. This is also
+ * used by %NL80211_CMD_GET_WOWLAN to get the currently enabled WoWLAN
+ * triggers.
+ *
+ * @NL80211_ATTR_SCHED_SCAN_INTERVAL: Interval between scheduled scan
+ * cycles, in msecs.
+ *
+ * @NL80211_ATTR_SCHED_SCAN_MATCH: Nested attribute with one or more
+ * sets of attributes to match during scheduled scans. Only BSSs
+ * that match any of the sets will be reported. These are
+ * pass-thru filter rules.
+ * For a match to succeed, the BSS must match all attributes of a
+ * set. Since not every hardware supports matching all types of
+ * attributes, there is no guarantee that the reported BSSs are
+ * fully complying with the match sets and userspace needs to be
+ * able to ignore them by itself.
+ * Thus, the implementation is somewhat hardware-dependent, but
+ * this is only an optimization and the userspace application
+ * needs to handle all the non-filtered results anyway.
+ * If the match attributes don't make sense when combined with
+ * the values passed in @NL80211_ATTR_SCAN_SSIDS (eg. if an SSID
+ * is included in the probe request, but the match attributes
+ * will never let it go through), -EINVAL may be returned.
+ * If omitted, no filtering is done.
+ *
+ * @NL80211_ATTR_INTERFACE_COMBINATIONS: Nested attribute listing the supported
+ * interface combinations. In each nested item, it contains attributes
+ * defined in &enum nl80211_if_combination_attrs.
+ * @NL80211_ATTR_SOFTWARE_IFTYPES: Nested attribute (just like
+ * %NL80211_ATTR_SUPPORTED_IFTYPES) containing the interface types that
+ * are managed in software: interfaces of these types aren't subject to
+ * any restrictions in their number or combinations.
+ *
+ * @NL80211_ATTR_REKEY_DATA: nested attribute containing the information
+ * necessary for GTK rekeying in the device, see &enum nl80211_rekey_data.
+ *
+ * @NL80211_ATTR_SCAN_SUPP_RATES: rates per to be advertised as supported in scan,
+ * nested array attribute containing an entry for each band, with the entry
+ * being a list of supported rates as defined by IEEE 802.11 7.3.2.2 but
+ * without the length restriction (at most %NL80211_MAX_SUPP_RATES).
+ *
+ * @NL80211_ATTR_HIDDEN_SSID: indicates whether SSID is to be hidden from Beacon
+ * and Probe Response (when response to wildcard Probe Request); see
+ * &enum nl80211_hidden_ssid, represented as a u32
+ *
+ * @NL80211_ATTR_IE_PROBE_RESP: Information element(s) for Probe Response frame.
+ * This is used with %NL80211_CMD_NEW_BEACON and %NL80211_CMD_SET_BEACON to
+ * provide extra IEs (e.g., WPS/P2P IE) into Probe Response frames when the
+ * driver (or firmware) replies to Probe Request frames.
+ * @NL80211_ATTR_IE_ASSOC_RESP: Information element(s) for (Re)Association
+ * Response frames. This is used with %NL80211_CMD_NEW_BEACON and
+ * %NL80211_CMD_SET_BEACON to provide extra IEs (e.g., WPS/P2P IE) into
+ * (Re)Association Response frames when the driver (or firmware) replies to
+ * (Re)Association Request frames.
+ *
+ * @NL80211_ATTR_STA_WME: Nested attribute containing the wme configuration
+ * of the station, see &enum nl80211_sta_wme_attr.
+ * @NL80211_ATTR_SUPPORT_AP_UAPSD: the device supports uapsd when working
+ * as AP.
+ *
+ * @NL80211_ATTR_ROAM_SUPPORT: Indicates whether the firmware is capable of
+ * roaming to another AP in the same ESS if the signal lever is low.
+ *
+ * @NL80211_ATTR_PMKSA_CANDIDATE: Nested attribute containing the PMKSA caching
+ * candidate information, see &enum nl80211_pmksa_candidate_attr.
+ *
+ * @NL80211_ATTR_TX_NO_CCK_RATE: Indicates whether to use CCK rate or not
+ * for management frames transmission. In order to avoid p2p probe/action
+ * frames are being transmitted at CCK rate in 2GHz band, the user space
+ * applications use this attribute.
+ * This attribute is used with %NL80211_CMD_TRIGGER_SCAN and
+ * %NL80211_CMD_FRAME commands.
+ *
+ * @NL80211_ATTR_TDLS_ACTION: Low level TDLS action code (e.g. link setup
+ * request, link setup confirm, link teardown, etc.). Values are
+ * described in the TDLS (802.11z) specification.
+ * @NL80211_ATTR_TDLS_DIALOG_TOKEN: Non-zero token for uniquely identifying a
+ * TDLS conversation between two devices.
+ * @NL80211_ATTR_TDLS_OPERATION: High level TDLS operation; see
+ * &enum nl80211_tdls_operation, represented as a u8.
+ * @NL80211_ATTR_TDLS_SUPPORT: A flag indicating the device can operate
+ * as a TDLS peer sta.
+ * @NL80211_ATTR_TDLS_EXTERNAL_SETUP: The TDLS discovery/setup and teardown
+ * procedures should be performed by sending TDLS packets via
+ * %NL80211_CMD_TDLS_MGMT. Otherwise %NL80211_CMD_TDLS_OPER should be
+ * used for asking the driver to perform a TDLS operation.
+ *
+ * @NL80211_ATTR_DEVICE_AP_SME: This u32 attribute may be listed for devices
+ * that have AP support to indicate that they have the AP SME integrated
+ * with support for the features listed in this attribute, see
+ * &enum nl80211_ap_sme_features.
+ *
+ * @NL80211_ATTR_DONT_WAIT_FOR_ACK: Used with %NL80211_CMD_FRAME, this tells
+ * the driver to not wait for an acknowledgement. Note that due to this,
+ * it will also not give a status callback nor return a cookie. This is
+ * mostly useful for probe responses to save airtime.
+ *
+ * @NL80211_ATTR_FEATURE_FLAGS: This u32 attribute contains flags from
+ * &enum nl80211_feature_flags and is advertised in wiphy information.
+ * @NL80211_ATTR_PROBE_RESP_OFFLOAD: Indicates that the HW responds to probe
+ * requests while operating in AP-mode.
+ * This attribute holds a bitmap of the supported protocols for
+ * offloading (see &enum nl80211_probe_resp_offload_support_attr).
+ *
+ * @NL80211_ATTR_PROBE_RESP: Probe Response template data. Contains the entire
+ * probe-response frame. The DA field in the 802.11 header is zero-ed out,
+ * to be filled by the FW.
+ * @NL80211_ATTR_DISABLE_HT: Force HT capable interfaces to disable
+ * this feature. Currently, only supported in mac80211 drivers.
+ * @NL80211_ATTR_HT_CAPABILITY_MASK: Specify which bits of the
+ * ATTR_HT_CAPABILITY to which attention should be paid.
+ * Currently, only mac80211 NICs support this feature.
+ * The values that may be configured are:
+ * MCS rates, MAX-AMSDU, HT-20-40 and HT_CAP_SGI_40
+ * AMPDU density and AMPDU factor.
+ * All values are treated as suggestions and may be ignored
+ * by the driver as required. The actual values may be seen in
+ * the station debugfs ht_caps file.
+ *
+ * @NL80211_ATTR_DFS_REGION: region for regulatory rules which this country
+ * abides to when initiating radiation on DFS channels. A country maps
+ * to one DFS region.
+ *
+ * @NL80211_ATTR_NOACK_MAP: This u16 bitmap contains the No Ack Policy of
+ * up to 16 TIDs.
+ *
+ * @NL80211_ATTR_INACTIVITY_TIMEOUT: timeout value in seconds, this can be
+ * used by the drivers which has MLME in firmware and does not have support
+ * to report per station tx/rx activity to free up the station entry from
+ * the list. This needs to be used when the driver advertises the
+ * capability to timeout the stations.
+ *
+ * @NL80211_ATTR_RX_SIGNAL_DBM: signal strength in dBm (as a 32-bit int);
+ * this attribute is (depending on the driver capabilities) added to
+ * received frames indicated with %NL80211_CMD_FRAME.
+ *
+ * @NL80211_ATTR_BG_SCAN_PERIOD: Background scan period in seconds
+ * or 0 to disable background scan.
+ *
+ * @NL80211_ATTR_USER_REG_HINT_TYPE: type of regulatory hint passed from
+ * userspace. If unset it is assumed the hint comes directly from
+ * a user. If set code could specify exactly what type of source
+ * was used to provide the hint. For the different types of
+ * allowed user regulatory hints see nl80211_user_reg_hint_type.
+ *
+ * @NL80211_ATTR_CONN_FAILED_REASON: The reason for which AP has rejected
+ * the connection request from a station. nl80211_connect_failed_reason
+ * enum has different reasons of connection failure.
+ *
+ * @NL80211_ATTR_AUTH_DATA: Fields and elements in Authentication frames.
+ * This contains the authentication frame body (non-IE and IE data),
+ * excluding the Authentication algorithm number, i.e., starting at the
+ * Authentication transaction sequence number field. It is used with
+ * authentication algorithms that need special fields to be added into
+ * the frames (SAE and FILS). Currently, only the SAE cases use the
+ * initial two fields (Authentication transaction sequence number and
+ * Status code). However, those fields are included in the attribute data
+ * for all authentication algorithms to keep the attribute definition
+ * consistent.
+ *
+ * @NL80211_ATTR_VHT_CAPABILITY: VHT Capability information element (from
+ * association request when used with NL80211_CMD_NEW_STATION)
+ *
+ * @NL80211_ATTR_SCAN_FLAGS: scan request control flags (u32)
+ *
+ * @NL80211_ATTR_P2P_CTWINDOW: P2P GO Client Traffic Window (u8), used with
+ * the START_AP and SET_BSS commands
+ * @NL80211_ATTR_P2P_OPPPS: P2P GO opportunistic PS (u8), used with the
+ * START_AP and SET_BSS commands. This can have the values 0 or 1;
+ * if not given in START_AP 0 is assumed, if not given in SET_BSS
+ * no change is made.
+ *
+ * @NL80211_ATTR_LOCAL_MESH_POWER_MODE: local mesh STA link-specific power mode
+ * defined in &enum nl80211_mesh_power_mode.
+ *
+ * @NL80211_ATTR_ACL_POLICY: ACL policy, see &enum nl80211_acl_policy,
+ * carried in a u32 attribute
+ *
+ * @NL80211_ATTR_MAC_ADDRS: Array of nested MAC addresses, used for
+ * MAC ACL.
+ *
+ * @NL80211_ATTR_MAC_ACL_MAX: u32 attribute to advertise the maximum
+ * number of MAC addresses that a device can support for MAC
+ * ACL.
+ *
+ * @NL80211_ATTR_RADAR_EVENT: Type of radar event for notification to userspace,
+ * contains a value of enum nl80211_radar_event (u32).
+ *
+ * @NL80211_ATTR_EXT_CAPA: 802.11 extended capabilities that the kernel driver
+ * has and handles. The format is the same as the IE contents. See
+ * 802.11-2012 8.4.2.29 for more information.
+ * @NL80211_ATTR_EXT_CAPA_MASK: Extended capabilities that the kernel driver
+ * has set in the %NL80211_ATTR_EXT_CAPA value, for multibit fields.
+ *
+ * @NL80211_ATTR_STA_CAPABILITY: Station capabilities (u16) are advertised to
+ * the driver, e.g., to enable TDLS power save (PU-APSD).
+ *
+ * @NL80211_ATTR_STA_EXT_CAPABILITY: Station extended capabilities are
+ * advertised to the driver, e.g., to enable TDLS off channel operations
+ * and PU-APSD.
+ *
+ * @NL80211_ATTR_PROTOCOL_FEATURES: global nl80211 feature flags, see
+ * &enum nl80211_protocol_features, the attribute is a u32.
+ *
+ * @NL80211_ATTR_SPLIT_WIPHY_DUMP: flag attribute, userspace supports
+ * receiving the data for a single wiphy split across multiple
+ * messages, given with wiphy dump message
+ *
+ * @NL80211_ATTR_MDID: Mobility Domain Identifier
+ *
+ * @NL80211_ATTR_IE_RIC: Resource Information Container Information
+ * Element
+ *
+ * @NL80211_ATTR_CRIT_PROT_ID: critical protocol identifier requiring increased
+ * reliability, see &enum nl80211_crit_proto_id (u16).
+ * @NL80211_ATTR_MAX_CRIT_PROT_DURATION: duration in milliseconds in which
+ * the connection should have increased reliability (u16).
+ *
+ * @NL80211_ATTR_PEER_AID: Association ID for the peer TDLS station (u16).
+ * This is similar to @NL80211_ATTR_STA_AID but with a difference of being
+ * allowed to be used with the first @NL80211_CMD_SET_STATION command to
+ * update a TDLS peer STA entry.
+ *
+ * @NL80211_ATTR_COALESCE_RULE: Coalesce rule information.
+ *
+ * @NL80211_ATTR_CH_SWITCH_COUNT: u32 attribute specifying the number of TBTT's
+ * until the channel switch event.
+ * @NL80211_ATTR_CH_SWITCH_BLOCK_TX: flag attribute specifying that transmission
+ * must be blocked on the current channel (before the channel switch
+ * operation).
+ * @NL80211_ATTR_CSA_IES: Nested set of attributes containing the IE information
+ * for the time while performing a channel switch.
+ * @NL80211_ATTR_CSA_C_OFF_BEACON: An array of offsets (u16) to the channel
+ * switch counters in the beacons tail (%NL80211_ATTR_BEACON_TAIL).
+ * @NL80211_ATTR_CSA_C_OFF_PRESP: An array of offsets (u16) to the channel
+ * switch counters in the probe response (%NL80211_ATTR_PROBE_RESP).
+ *
+ * @NL80211_ATTR_RXMGMT_FLAGS: flags for nl80211_send_mgmt(), u32.
+ * As specified in the &enum nl80211_rxmgmt_flags.
+ *
+ * @NL80211_ATTR_STA_SUPPORTED_CHANNELS: array of supported channels.
+ *
+ * @NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES: array of supported
+ * supported operating classes.
+ *
+ * @NL80211_ATTR_HANDLE_DFS: A flag indicating whether user space
+ * controls DFS operation in IBSS mode. If the flag is included in
+ * %NL80211_CMD_JOIN_IBSS request, the driver will allow use of DFS
+ * channels and reports radar events to userspace. Userspace is required
+ * to react to radar events, e.g. initiate a channel switch or leave the
+ * IBSS network.
+ *
+ * @NL80211_ATTR_SUPPORT_5_MHZ: A flag indicating that the device supports
+ * 5 MHz channel bandwidth.
+ * @NL80211_ATTR_SUPPORT_10_MHZ: A flag indicating that the device supports
+ * 10 MHz channel bandwidth.
+ *
+ * @NL80211_ATTR_OPMODE_NOTIF: Operating mode field from Operating Mode
+ * Notification Element based on association request when used with
+ * %NL80211_CMD_NEW_STATION or %NL80211_CMD_SET_STATION (only when
+ * %NL80211_FEATURE_FULL_AP_CLIENT_STATE is supported, or with TDLS);
+ * u8 attribute.
+ *
+ * @NL80211_ATTR_VENDOR_ID: The vendor ID, either a 24-bit OUI or, if
+ * %NL80211_VENDOR_ID_IS_LINUX is set, a special Linux ID (not used yet)
+ * @NL80211_ATTR_VENDOR_SUBCMD: vendor sub-command
+ * @NL80211_ATTR_VENDOR_DATA: data for the vendor command, if any; this
+ * attribute is also used for vendor command feature advertisement
+ * @NL80211_ATTR_VENDOR_EVENTS: used for event list advertising in the wiphy
+ * info, containing a nested array of possible events
+ *
+ * @NL80211_ATTR_QOS_MAP: IP DSCP mapping for Interworking QoS mapping. This
+ * data is in the format defined for the payload of the QoS Map Set element
+ * in IEEE Std 802.11-2012, 8.4.2.97.
+ *
+ * @NL80211_ATTR_MAC_HINT: MAC address recommendation as initial BSS
+ * @NL80211_ATTR_WIPHY_FREQ_HINT: frequency of the recommended initial BSS
+ *
+ * @NL80211_ATTR_MAX_AP_ASSOC_STA: Device attribute that indicates how many
+ * associated stations are supported in AP mode (including P2P GO); u32.
+ * Since drivers may not have a fixed limit on the maximum number (e.g.,
+ * other concurrent operations may affect this), drivers are allowed to
+ * advertise values that cannot always be met. In such cases, an attempt
+ * to add a new station entry with @NL80211_CMD_NEW_STATION may fail.
+ *
+ * @NL80211_ATTR_CSA_C_OFFSETS_TX: An array of csa counter offsets (u16) which
+ * should be updated when the frame is transmitted.
+ * @NL80211_ATTR_MAX_CSA_COUNTERS: U8 attribute used to advertise the maximum
+ * supported number of csa counters.
+ *
+ * @NL80211_ATTR_TDLS_PEER_CAPABILITY: flags for TDLS peer capabilities, u32.
+ * As specified in the &enum nl80211_tdls_peer_capability.
+ *
+ * @NL80211_ATTR_SOCKET_OWNER: Flag attribute, if set during interface
+ * creation then the new interface will be owned by the netlink socket
+ * that created it and will be destroyed when the socket is closed.
+ * If set during scheduled scan start then the new scan req will be
+ * owned by the netlink socket that created it and the scheduled scan will
+ * be stopped when the socket is closed.
+ * If set during configuration of regulatory indoor operation then the
+ * regulatory indoor configuration would be owned by the netlink socket
+ * that configured the indoor setting, and the indoor operation would be
+ * cleared when the socket is closed.
+ * If set during NAN interface creation, the interface will be destroyed
+ * if the socket is closed just like any other interface. Moreover, NAN
+ * notifications will be sent in unicast to that socket. Without this
+ * attribute, the notifications will be sent to the %NL80211_MCGRP_NAN
+ * multicast group.
+ * If set during %NL80211_CMD_ASSOCIATE or %NL80211_CMD_CONNECT the
+ * station will deauthenticate when the socket is closed.
+ * If set during %NL80211_CMD_JOIN_IBSS the IBSS will be automatically
+ * torn down when the socket is closed.
+ * If set during %NL80211_CMD_JOIN_MESH the mesh setup will be
+ * automatically torn down when the socket is closed.
+ * If set during %NL80211_CMD_START_AP the AP will be automatically
+ * disabled when the socket is closed.
+ *
+ * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is
+ * the TDLS link initiator.
+ *
+ * @NL80211_ATTR_USE_RRM: flag for indicating whether the current connection
+ * shall support Radio Resource Measurements (11k). This attribute can be
+ * used with %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests.
+ * User space applications are expected to use this flag only if the
+ * underlying device supports these minimal RRM features:
+ * %NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES,
+ * %NL80211_FEATURE_QUIET,
+ * Or, if global RRM is supported, see:
+ * %NL80211_EXT_FEATURE_RRM
+ * If this flag is used, driver must add the Power Capabilities IE to the
+ * association request. In addition, it must also set the RRM capability
+ * flag in the association request's Capability Info field.
+ *
+ * @NL80211_ATTR_WIPHY_DYN_ACK: flag attribute used to enable ACK timeout
+ * estimation algorithm (dynack). In order to activate dynack
+ * %NL80211_FEATURE_ACKTO_ESTIMATION feature flag must be set by lower
+ * drivers to indicate dynack capability. Dynack is automatically disabled
+ * setting valid value for coverage class.
+ *
+ * @NL80211_ATTR_TSID: a TSID value (u8 attribute)
+ * @NL80211_ATTR_USER_PRIO: user priority value (u8 attribute)
+ * @NL80211_ATTR_ADMITTED_TIME: admitted time in units of 32 microseconds
+ * (per second) (u16 attribute)
+ *
+ * @NL80211_ATTR_SMPS_MODE: SMPS mode to use (ap mode). see
+ * &enum nl80211_smps_mode.
+ *
+ * @NL80211_ATTR_OPER_CLASS: operating class
+ *
+ * @NL80211_ATTR_MAC_MASK: MAC address mask
+ *
+ * @NL80211_ATTR_WIPHY_SELF_MANAGED_REG: flag attribute indicating this device
+ * is self-managing its regulatory information and any regulatory domain
+ * obtained from it is coming from the device's wiphy and not the global
+ * cfg80211 regdomain.
+ *
+ * @NL80211_ATTR_EXT_FEATURES: extended feature flags contained in a byte
+ * array. The feature flags are identified by their bit index (see &enum
+ * nl80211_ext_feature_index). The bit index is ordered starting at the
+ * least-significant bit of the first byte in the array, ie. bit index 0
+ * is located at bit 0 of byte 0. bit index 25 would be located at bit 1
+ * of byte 3 (u8 array).
+ *
+ * @NL80211_ATTR_SURVEY_RADIO_STATS: Request overall radio statistics to be
+ * returned along with other survey data. If set, @NL80211_CMD_GET_SURVEY
+ * may return a survey entry without a channel indicating global radio
+ * statistics (only some values are valid and make sense.)
+ * For devices that don't return such an entry even then, the information
+ * should be contained in the result as the sum of the respective counters
+ * over all channels.
+ *
+ * @NL80211_ATTR_SCHED_SCAN_DELAY: delay before the first cycle of a
+ * scheduled scan is started. Or the delay before a WoWLAN
+ * net-detect scan is started, counting from the moment the
+ * system is suspended. This value is a u32, in seconds.
+
+ * @NL80211_ATTR_REG_INDOOR: flag attribute, if set indicates that the device
+ * is operating in an indoor environment.
+ *
+ * @NL80211_ATTR_MAX_NUM_SCHED_SCAN_PLANS: maximum number of scan plans for
+ * scheduled scan supported by the device (u32), a wiphy attribute.
+ * @NL80211_ATTR_MAX_SCAN_PLAN_INTERVAL: maximum interval (in seconds) for
+ * a scan plan (u32), a wiphy attribute.
+ * @NL80211_ATTR_MAX_SCAN_PLAN_ITERATIONS: maximum number of iterations in
+ * a scan plan (u32), a wiphy attribute.
+ * @NL80211_ATTR_SCHED_SCAN_PLANS: a list of scan plans for scheduled scan.
+ * Each scan plan defines the number of scan iterations and the interval
+ * between scans. The last scan plan will always run infinitely,
+ * thus it must not specify the number of iterations, only the interval
+ * between scans. The scan plans are executed sequentially.
+ * Each scan plan is a nested attribute of &enum nl80211_sched_scan_plan.
+ * @NL80211_ATTR_PBSS: flag attribute. If set it means operate
+ * in a PBSS. Specified in %NL80211_CMD_CONNECT to request
+ * connecting to a PCP, and in %NL80211_CMD_START_AP to start
+ * a PCP instead of AP. Relevant for DMG networks only.
+ * @NL80211_ATTR_BSS_SELECT: nested attribute for driver supporting the
+ * BSS selection feature. When used with %NL80211_CMD_GET_WIPHY it contains
+ * attributes according &enum nl80211_bss_select_attr to indicate what
+ * BSS selection behaviours are supported. When used with %NL80211_CMD_CONNECT
+ * it contains the behaviour-specific attribute containing the parameters for
+ * BSS selection to be done by driver and/or firmware.
+ *
+ * @NL80211_ATTR_STA_SUPPORT_P2P_PS: whether P2P PS mechanism supported
+ * or not. u8, one of the values of &enum nl80211_sta_p2p_ps_status
+ *
+ * @NL80211_ATTR_PAD: attribute used for padding for 64-bit alignment
+ *
+ * @NL80211_ATTR_IFTYPE_EXT_CAPA: Nested attribute of the following attributes:
+ * %NL80211_ATTR_IFTYPE, %NL80211_ATTR_EXT_CAPA,
+ * %NL80211_ATTR_EXT_CAPA_MASK, to specify the extended capabilities per
+ * interface type.
+ *
+ * @NL80211_ATTR_MU_MIMO_GROUP_DATA: array of 24 bytes that defines a MU-MIMO
+ * groupID for monitor mode.
+ * The first 8 bytes are a mask that defines the membership in each
+ * group (there are 64 groups, group 0 and 63 are reserved),
+ * each bit represents a group and set to 1 for being a member in
+ * that group and 0 for not being a member.
+ * The remaining 16 bytes define the position in each group: 2 bits for
+ * each group.
+ * (smaller group numbers represented on most significant bits and bigger
+ * group numbers on least significant bits.)
+ * This attribute is used only if all interfaces are in monitor mode.
+ * Set this attribute in order to monitor packets using the given MU-MIMO
+ * groupID data.
+ * to turn off that feature set all the bits of the groupID to zero.
+ * @NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR: mac address for the sniffer to follow
+ * when using MU-MIMO air sniffer.
+ * to turn that feature off set an invalid mac address
+ * (e.g. FF:FF:FF:FF:FF:FF)
+ *
+ * @NL80211_ATTR_SCAN_START_TIME_TSF: The time at which the scan was actually
+ * started (u64). The time is the TSF of the BSS the interface that
+ * requested the scan is connected to (if available, otherwise this
+ * attribute must not be included).
+ * @NL80211_ATTR_SCAN_START_TIME_TSF_BSSID: The BSS according to which
+ * %NL80211_ATTR_SCAN_START_TIME_TSF is set.
+ * @NL80211_ATTR_MEASUREMENT_DURATION: measurement duration in TUs (u16). If
+ * %NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY is not set, this is the
+ * maximum measurement duration allowed. This attribute is used with
+ * measurement requests. It can also be used with %NL80211_CMD_TRIGGER_SCAN
+ * if the scan is used for beacon report radio measurement.
+ * @NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY: flag attribute that indicates
+ * that the duration specified with %NL80211_ATTR_MEASUREMENT_DURATION is
+ * mandatory. If this flag is not set, the duration is the maximum duration
+ * and the actual measurement duration may be shorter.
+ *
+ * @NL80211_ATTR_MESH_PEER_AID: Association ID for the mesh peer (u16). This is
+ * used to pull the stored data for mesh peer in power save state.
+ *
+ * @NL80211_ATTR_NAN_MASTER_PREF: the master preference to be used by
+ * %NL80211_CMD_START_NAN and optionally with
+ * %NL80211_CMD_CHANGE_NAN_CONFIG. Its type is u8 and it can't be 0.
+ * Also, values 1 and 255 are reserved for certification purposes and
+ * should not be used during a normal device operation.
+ * @NL80211_ATTR_BANDS: operating bands configuration. This is a u32
+ * bitmask of BIT(NL80211_BAND_*) as described in %enum
+ * nl80211_band. For instance, for NL80211_BAND_2GHZ, bit 0
+ * would be set. This attribute is used with
+ * %NL80211_CMD_START_NAN and %NL80211_CMD_CHANGE_NAN_CONFIG, and
+ * it is optional. If no bands are set, it means don't-care and
+ * the device will decide what to use.
+ * @NL80211_ATTR_NAN_FUNC: a function that can be added to NAN. See
+ * &enum nl80211_nan_func_attributes for description of this nested
+ * attribute.
+ * @NL80211_ATTR_NAN_MATCH: used to report a match. This is a nested attribute.
+ * See &enum nl80211_nan_match_attributes.
+ * @NL80211_ATTR_FILS_KEK: KEK for FILS (Re)Association Request/Response frame
+ * protection.
+ * @NL80211_ATTR_FILS_NONCES: Nonces (part of AAD) for FILS (Re)Association
+ * Request/Response frame protection. This attribute contains the 16 octet
+ * STA Nonce followed by 16 octets of AP Nonce.
+ *
+ * @NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED: Indicates whether or not multicast
+ * packets should be send out as unicast to all stations (flag attribute).
+ *
+ * @NL80211_ATTR_BSSID: The BSSID of the AP. Note that %NL80211_ATTR_MAC is also
+ * used in various commands/events for specifying the BSSID.
+ *
+ * @NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI: Relative RSSI threshold by which
+ * other BSSs has to be better or slightly worse than the current
+ * connected BSS so that they get reported to user space.
+ * This will give an opportunity to userspace to consider connecting to
+ * other matching BSSs which have better or slightly worse RSSI than
+ * the current connected BSS by using an offloaded operation to avoid
+ * unnecessary wakeups.
+ *
+ * @NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST: When present the RSSI level for BSSs in
+ * the specified band is to be adjusted before doing
+ * %NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI based comparison to figure out
+ * better BSSs. The attribute value is a packed structure
+ * value as specified by &struct nl80211_bss_select_rssi_adjust.
+ *
+ * @NL80211_ATTR_TIMEOUT_REASON: The reason for which an operation timed out.
+ * u32 attribute with an &enum nl80211_timeout_reason value. This is used,
+ * e.g., with %NL80211_CMD_CONNECT event.
+ *
+ * @NL80211_ATTR_FILS_ERP_USERNAME: EAP Re-authentication Protocol (ERP)
+ * username part of NAI used to refer keys rRK and rIK. This is used with
+ * %NL80211_CMD_CONNECT.
+ *
+ * @NL80211_ATTR_FILS_ERP_REALM: EAP Re-authentication Protocol (ERP) realm part
+ * of NAI specifying the domain name of the ER server. This is used with
+ * %NL80211_CMD_CONNECT.
+ *
+ * @NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM: Unsigned 16-bit ERP next sequence number
+ * to use in ERP messages. This is used in generating the FILS wrapped data
+ * for FILS authentication and is used with %NL80211_CMD_CONNECT.
+ *
+ * @NL80211_ATTR_FILS_ERP_RRK: ERP re-authentication Root Key (rRK) for the
+ * NAI specified by %NL80211_ATTR_FILS_ERP_USERNAME and
+ * %NL80211_ATTR_FILS_ERP_REALM. This is used for generating rIK and rMSK
+ * from successful FILS authentication and is used with
+ * %NL80211_CMD_CONNECT.
+ *
+ * @NL80211_ATTR_FILS_CACHE_ID: A 2-octet identifier advertized by a FILS AP
+ * identifying the scope of PMKSAs. This is used with
+ * @NL80211_CMD_SET_PMKSA and @NL80211_CMD_DEL_PMKSA.
+ *
+ * @NL80211_ATTR_PMK: attribute for passing PMK key material. Used with
+ * %NL80211_CMD_SET_PMKSA for the PMKSA identified by %NL80211_ATTR_PMKID.
+ * For %NL80211_CMD_CONNECT it is used to provide PSK for offloading 4-way
+ * handshake for WPA/WPA2-PSK networks. For 802.1X authentication it is
+ * used with %NL80211_CMD_SET_PMK. For offloaded FT support this attribute
+ * specifies the PMK-R0 if NL80211_ATTR_PMKR0_NAME is included as well.
+ *
+ * @NL80211_ATTR_SCHED_SCAN_MULTI: flag attribute which user-space shall use to
+ * indicate that it supports multiple active scheduled scan requests.
+ * @NL80211_ATTR_SCHED_SCAN_MAX_REQS: indicates maximum number of scheduled
+ * scan request that may be active for the device (u32).
+ *
+ * @NL80211_ATTR_WANT_1X_4WAY_HS: flag attribute which user-space can include
+ * in %NL80211_CMD_CONNECT to indicate that for 802.1X authentication it
+ * wants to use the supported offload of the 4-way handshake.
+ * @NL80211_ATTR_PMKR0_NAME: PMK-R0 Name for offloaded FT.
+ * @NL80211_ATTR_PORT_AUTHORIZED: (reserved)
+ *
+ * @NL80211_ATTR_EXTERNAL_AUTH_ACTION: Identify the requested external
+ * authentication operation (u32 attribute with an
+ * &enum nl80211_external_auth_action value). This is used with the
+ * %NL80211_CMD_EXTERNAL_AUTH request event.
+ * @NL80211_ATTR_EXTERNAL_AUTH_SUPPORT: Flag attribute indicating that the user
+ * space supports external authentication. This attribute shall be used
+ * with %NL80211_CMD_CONNECT and %NL80211_CMD_START_AP request. The driver
+ * may offload authentication processing to user space if this capability
+ * is indicated in the respective requests from the user space.
+ *
+ * @NL80211_ATTR_NSS: Station's New/updated RX_NSS value notified using this
+ * u8 attribute. This is used with %NL80211_CMD_STA_OPMODE_CHANGED.
+ *
+ * @NL80211_ATTR_TXQ_STATS: TXQ statistics (nested attribute, see &enum
+ * nl80211_txq_stats)
+ * @NL80211_ATTR_TXQ_LIMIT: Total packet limit for the TXQ queues for this phy.
+ * The smaller of this and the memory limit is enforced.
+ * @NL80211_ATTR_TXQ_MEMORY_LIMIT: Total memory memory limit (in bytes) for the
+ * TXQ queues for this phy. The smaller of this and the packet limit is
+ * enforced.
+ * @NL80211_ATTR_TXQ_QUANTUM: TXQ scheduler quantum (bytes). Number of bytes
+ * a flow is assigned on each round of the DRR scheduler.
+ * @NL80211_ATTR_HE_CAPABILITY: HE Capability information element (from
+ * association request when used with NL80211_CMD_NEW_STATION). Can be set
+ * only if %NL80211_STA_FLAG_WME is set.
+ *
+ * @NL80211_ATTR_FTM_RESPONDER: nested attribute which user-space can include
+ * in %NL80211_CMD_START_AP or %NL80211_CMD_SET_BEACON for fine timing
+ * measurement (FTM) responder functionality and containing parameters as
+ * possible, see &enum nl80211_ftm_responder_attr
+ *
+ * @NL80211_ATTR_FTM_RESPONDER_STATS: Nested attribute with FTM responder
+ * statistics, see &enum nl80211_ftm_responder_stats.
+ *
+ * @NL80211_ATTR_TIMEOUT: Timeout for the given operation in milliseconds (u32),
+ * if the attribute is not given no timeout is requested. Note that 0 is an
+ * invalid value.
+ *
+ * @NL80211_ATTR_PEER_MEASUREMENTS: peer measurements request (and result)
+ * data, uses nested attributes specified in
+ * &enum nl80211_peer_measurement_attrs.
+ * This is also used for capability advertisement in the wiphy information,
+ * with the appropriate sub-attributes.
+ *
+ * @NL80211_ATTR_AIRTIME_WEIGHT: Station's weight when scheduled by the airtime
+ * scheduler.
+ *
+ * @NL80211_ATTR_STA_TX_POWER_SETTING: Transmit power setting type (u8) for
+ * station associated with the AP. See &enum nl80211_tx_power_setting for
+ * possible values.
+ * @NL80211_ATTR_STA_TX_POWER: Transmit power level (s16) in dBm units. This
+ * allows to set Tx power for a station. If this attribute is not included,
+ * the default per-interface tx power setting will be overriding. Driver
+ * should be picking up the lowest tx power, either tx power per-interface
+ * or per-station.
+ *
+ * @NL80211_ATTR_SAE_PASSWORD: attribute for passing SAE password material. It
+ * is used with %NL80211_CMD_CONNECT to provide password for offloading
+ * SAE authentication for WPA3-Personal networks.
+ *
+ * @NL80211_ATTR_TWT_RESPONDER: Enable target wait time responder support.
+ *
+ * @NL80211_ATTR_HE_OBSS_PD: nested attribute for OBSS Packet Detection
+ * functionality.
+ *
+ * @NL80211_ATTR_WIPHY_EDMG_CHANNELS: bitmap that indicates the 2.16 GHz
+ * channel(s) that are allowed to be used for EDMG transmissions.
+ * Defined by IEEE P802.11ay/D4.0 section 9.4.2.251. (u8 attribute)
+ * @NL80211_ATTR_WIPHY_EDMG_BW_CONFIG: Channel BW Configuration subfield encodes
+ * the allowed channel bandwidth configurations. (u8 attribute)
+ * Defined by IEEE P802.11ay/D4.0 section 9.4.2.251, Table 13.
+ *
+ * @NUM_NL80211_ATTR: total number of nl80211_attrs available
+ * @NL80211_ATTR_MAX: highest attribute number currently defined
+ * @__NL80211_ATTR_AFTER_LAST: internal use
+ */
+enum nl80211_attrs {
+/* don't change the order or add anything between, this is ABI! */
+ NL80211_ATTR_UNSPEC,
+
+ NL80211_ATTR_WIPHY,
+ NL80211_ATTR_WIPHY_NAME,
+
+ NL80211_ATTR_IFINDEX,
+ NL80211_ATTR_IFNAME,
+ NL80211_ATTR_IFTYPE,
+
+ NL80211_ATTR_MAC,
+
+ NL80211_ATTR_KEY_DATA,
+ NL80211_ATTR_KEY_IDX,
+ NL80211_ATTR_KEY_CIPHER,
+ NL80211_ATTR_KEY_SEQ,
+ NL80211_ATTR_KEY_DEFAULT,
+
+ NL80211_ATTR_BEACON_INTERVAL,
+ NL80211_ATTR_DTIM_PERIOD,
+ NL80211_ATTR_BEACON_HEAD,
+ NL80211_ATTR_BEACON_TAIL,
+
+ NL80211_ATTR_STA_AID,
+ NL80211_ATTR_STA_FLAGS,
+ NL80211_ATTR_STA_LISTEN_INTERVAL,
+ NL80211_ATTR_STA_SUPPORTED_RATES,
+ NL80211_ATTR_STA_VLAN,
+ NL80211_ATTR_STA_INFO,
+
+ NL80211_ATTR_WIPHY_BANDS,
+
+ NL80211_ATTR_MNTR_FLAGS,
+
+ NL80211_ATTR_MESH_ID,
+ NL80211_ATTR_STA_PLINK_ACTION,
+ NL80211_ATTR_MPATH_NEXT_HOP,
+ NL80211_ATTR_MPATH_INFO,
+
+ NL80211_ATTR_BSS_CTS_PROT,
+ NL80211_ATTR_BSS_SHORT_PREAMBLE,
+ NL80211_ATTR_BSS_SHORT_SLOT_TIME,
+
+ NL80211_ATTR_HT_CAPABILITY,
+
+ NL80211_ATTR_SUPPORTED_IFTYPES,
+
+ NL80211_ATTR_REG_ALPHA2,
+ NL80211_ATTR_REG_RULES,
+
+ NL80211_ATTR_MESH_CONFIG,
+
+ NL80211_ATTR_BSS_BASIC_RATES,
+
+ NL80211_ATTR_WIPHY_TXQ_PARAMS,
+ NL80211_ATTR_WIPHY_FREQ,
+ NL80211_ATTR_WIPHY_CHANNEL_TYPE,
+
+ NL80211_ATTR_KEY_DEFAULT_MGMT,
+
+ NL80211_ATTR_MGMT_SUBTYPE,
+ NL80211_ATTR_IE,
+
+ NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
+
+ NL80211_ATTR_SCAN_FREQUENCIES,
+ NL80211_ATTR_SCAN_SSIDS,
+ NL80211_ATTR_GENERATION, /* replaces old SCAN_GENERATION */
+ NL80211_ATTR_BSS,
+
+ NL80211_ATTR_REG_INITIATOR,
+ NL80211_ATTR_REG_TYPE,
+
+ NL80211_ATTR_SUPPORTED_COMMANDS,
+
+ NL80211_ATTR_FRAME,
+ NL80211_ATTR_SSID,
+ NL80211_ATTR_AUTH_TYPE,
+ NL80211_ATTR_REASON_CODE,
+
+ NL80211_ATTR_KEY_TYPE,
+
+ NL80211_ATTR_MAX_SCAN_IE_LEN,
+ NL80211_ATTR_CIPHER_SUITES,
+
+ NL80211_ATTR_FREQ_BEFORE,
+ NL80211_ATTR_FREQ_AFTER,
+
+ NL80211_ATTR_FREQ_FIXED,
+
+
+ NL80211_ATTR_WIPHY_RETRY_SHORT,
+ NL80211_ATTR_WIPHY_RETRY_LONG,
+ NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
+ NL80211_ATTR_WIPHY_RTS_THRESHOLD,
+
+ NL80211_ATTR_TIMED_OUT,
+
+ NL80211_ATTR_USE_MFP,
+
+ NL80211_ATTR_STA_FLAGS2,
+
+ NL80211_ATTR_CONTROL_PORT,
+
+ NL80211_ATTR_TESTDATA,
+
+ NL80211_ATTR_PRIVACY,
+
+ NL80211_ATTR_DISCONNECTED_BY_AP,
+ NL80211_ATTR_STATUS_CODE,
+
+ NL80211_ATTR_CIPHER_SUITES_PAIRWISE,
+ NL80211_ATTR_CIPHER_SUITE_GROUP,
+ NL80211_ATTR_WPA_VERSIONS,
+ NL80211_ATTR_AKM_SUITES,
+
+ NL80211_ATTR_REQ_IE,
+ NL80211_ATTR_RESP_IE,
+
+ NL80211_ATTR_PREV_BSSID,
+
+ NL80211_ATTR_KEY,
+ NL80211_ATTR_KEYS,
+
+ NL80211_ATTR_PID,
+
+ NL80211_ATTR_4ADDR,
+
+ NL80211_ATTR_SURVEY_INFO,
+
+ NL80211_ATTR_PMKID,
+ NL80211_ATTR_MAX_NUM_PMKIDS,
+
+ NL80211_ATTR_DURATION,
+
+ NL80211_ATTR_COOKIE,
+
+ NL80211_ATTR_WIPHY_COVERAGE_CLASS,
+
+ NL80211_ATTR_TX_RATES,
+
+ NL80211_ATTR_FRAME_MATCH,
+
+ NL80211_ATTR_ACK,
+
+ NL80211_ATTR_PS_STATE,
+
+ NL80211_ATTR_CQM,
+
+ NL80211_ATTR_LOCAL_STATE_CHANGE,
+
+ NL80211_ATTR_AP_ISOLATE,
+
+ NL80211_ATTR_WIPHY_TX_POWER_SETTING,
+ NL80211_ATTR_WIPHY_TX_POWER_LEVEL,
+
+ NL80211_ATTR_TX_FRAME_TYPES,
+ NL80211_ATTR_RX_FRAME_TYPES,
+ NL80211_ATTR_FRAME_TYPE,
+
+ NL80211_ATTR_CONTROL_PORT_ETHERTYPE,
+ NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT,
+
+ NL80211_ATTR_SUPPORT_IBSS_RSN,
+
+ NL80211_ATTR_WIPHY_ANTENNA_TX,
+ NL80211_ATTR_WIPHY_ANTENNA_RX,
+
+ NL80211_ATTR_MCAST_RATE,
+
+ NL80211_ATTR_OFFCHANNEL_TX_OK,
+
+ NL80211_ATTR_BSS_HT_OPMODE,
+
+ NL80211_ATTR_KEY_DEFAULT_TYPES,
+
+ NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION,
+
+ NL80211_ATTR_MESH_SETUP,
+
+ NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX,
+ NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
+
+ NL80211_ATTR_SUPPORT_MESH_AUTH,
+ NL80211_ATTR_STA_PLINK_STATE,
+
+ NL80211_ATTR_WOWLAN_TRIGGERS,
+ NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED,
+
+ NL80211_ATTR_SCHED_SCAN_INTERVAL,
+
+ NL80211_ATTR_INTERFACE_COMBINATIONS,
+ NL80211_ATTR_SOFTWARE_IFTYPES,
+
+ NL80211_ATTR_REKEY_DATA,
+
+ NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS,
+ NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN,
+
+ NL80211_ATTR_SCAN_SUPP_RATES,
+
+ NL80211_ATTR_HIDDEN_SSID,
+
+ NL80211_ATTR_IE_PROBE_RESP,
+ NL80211_ATTR_IE_ASSOC_RESP,
+
+ NL80211_ATTR_STA_WME,
+ NL80211_ATTR_SUPPORT_AP_UAPSD,
+
+ NL80211_ATTR_ROAM_SUPPORT,
+
+ NL80211_ATTR_SCHED_SCAN_MATCH,
+ NL80211_ATTR_MAX_MATCH_SETS,
+
+ NL80211_ATTR_PMKSA_CANDIDATE,
+
+ NL80211_ATTR_TX_NO_CCK_RATE,
+
+ NL80211_ATTR_TDLS_ACTION,
+ NL80211_ATTR_TDLS_DIALOG_TOKEN,
+ NL80211_ATTR_TDLS_OPERATION,
+ NL80211_ATTR_TDLS_SUPPORT,
+ NL80211_ATTR_TDLS_EXTERNAL_SETUP,
+
+ NL80211_ATTR_DEVICE_AP_SME,
+
+ NL80211_ATTR_DONT_WAIT_FOR_ACK,
+
+ NL80211_ATTR_FEATURE_FLAGS,
+
+ NL80211_ATTR_PROBE_RESP_OFFLOAD,
+
+ NL80211_ATTR_PROBE_RESP,
+
+ NL80211_ATTR_DFS_REGION,
+
+ NL80211_ATTR_DISABLE_HT,
+ NL80211_ATTR_HT_CAPABILITY_MASK,
+
+ NL80211_ATTR_NOACK_MAP,
+
+ NL80211_ATTR_INACTIVITY_TIMEOUT,
+
+ NL80211_ATTR_RX_SIGNAL_DBM,
+
+ NL80211_ATTR_BG_SCAN_PERIOD,
+
+ NL80211_ATTR_WDEV,
+
+ NL80211_ATTR_USER_REG_HINT_TYPE,
+
+ NL80211_ATTR_CONN_FAILED_REASON,
+
+ NL80211_ATTR_AUTH_DATA,
+
+ NL80211_ATTR_VHT_CAPABILITY,
+
+ NL80211_ATTR_SCAN_FLAGS,
+
+ NL80211_ATTR_CHANNEL_WIDTH,
+ NL80211_ATTR_CENTER_FREQ1,
+ NL80211_ATTR_CENTER_FREQ2,
+
+ NL80211_ATTR_P2P_CTWINDOW,
+ NL80211_ATTR_P2P_OPPPS,
+
+ NL80211_ATTR_LOCAL_MESH_POWER_MODE,
+
+ NL80211_ATTR_ACL_POLICY,
+
+ NL80211_ATTR_MAC_ADDRS,
+
+ NL80211_ATTR_MAC_ACL_MAX,
+
+ NL80211_ATTR_RADAR_EVENT,
+
+ NL80211_ATTR_EXT_CAPA,
+ NL80211_ATTR_EXT_CAPA_MASK,
+
+ NL80211_ATTR_STA_CAPABILITY,
+ NL80211_ATTR_STA_EXT_CAPABILITY,
+
+ NL80211_ATTR_PROTOCOL_FEATURES,
+ NL80211_ATTR_SPLIT_WIPHY_DUMP,
+
+ NL80211_ATTR_DISABLE_VHT,
+ NL80211_ATTR_VHT_CAPABILITY_MASK,
+
+ NL80211_ATTR_MDID,
+ NL80211_ATTR_IE_RIC,
+
+ NL80211_ATTR_CRIT_PROT_ID,
+ NL80211_ATTR_MAX_CRIT_PROT_DURATION,
+
+ NL80211_ATTR_PEER_AID,
+
+ NL80211_ATTR_COALESCE_RULE,
+
+ NL80211_ATTR_CH_SWITCH_COUNT,
+ NL80211_ATTR_CH_SWITCH_BLOCK_TX,
+ NL80211_ATTR_CSA_IES,
+ NL80211_ATTR_CSA_C_OFF_BEACON,
+ NL80211_ATTR_CSA_C_OFF_PRESP,
+
+ NL80211_ATTR_RXMGMT_FLAGS,
+
+ NL80211_ATTR_STA_SUPPORTED_CHANNELS,
+
+ NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES,
+
+ NL80211_ATTR_HANDLE_DFS,
+
+ NL80211_ATTR_SUPPORT_5_MHZ,
+ NL80211_ATTR_SUPPORT_10_MHZ,
+
+ NL80211_ATTR_OPMODE_NOTIF,
+
+ NL80211_ATTR_VENDOR_ID,
+ NL80211_ATTR_VENDOR_SUBCMD,
+ NL80211_ATTR_VENDOR_DATA,
+ NL80211_ATTR_VENDOR_EVENTS,
+
+ NL80211_ATTR_QOS_MAP,
+
+ NL80211_ATTR_MAC_HINT,
+ NL80211_ATTR_WIPHY_FREQ_HINT,
+
+ NL80211_ATTR_MAX_AP_ASSOC_STA,
+
+ NL80211_ATTR_TDLS_PEER_CAPABILITY,
+
+ NL80211_ATTR_SOCKET_OWNER,
+
+ NL80211_ATTR_CSA_C_OFFSETS_TX,
+ NL80211_ATTR_MAX_CSA_COUNTERS,
+
+ NL80211_ATTR_TDLS_INITIATOR,
+
+ NL80211_ATTR_USE_RRM,
+
+ NL80211_ATTR_WIPHY_DYN_ACK,
+
+ NL80211_ATTR_TSID,
+ NL80211_ATTR_USER_PRIO,
+ NL80211_ATTR_ADMITTED_TIME,
+
+ NL80211_ATTR_SMPS_MODE,
+
+ NL80211_ATTR_OPER_CLASS,
+
+ NL80211_ATTR_MAC_MASK,
+
+ NL80211_ATTR_WIPHY_SELF_MANAGED_REG,
+
+ NL80211_ATTR_EXT_FEATURES,
+
+ NL80211_ATTR_SURVEY_RADIO_STATS,
+
+ NL80211_ATTR_NETNS_FD,
+
+ NL80211_ATTR_SCHED_SCAN_DELAY,
+
+ NL80211_ATTR_REG_INDOOR,
+
+ NL80211_ATTR_MAX_NUM_SCHED_SCAN_PLANS,
+ NL80211_ATTR_MAX_SCAN_PLAN_INTERVAL,
+ NL80211_ATTR_MAX_SCAN_PLAN_ITERATIONS,
+ NL80211_ATTR_SCHED_SCAN_PLANS,
+
+ NL80211_ATTR_PBSS,
+
+ NL80211_ATTR_BSS_SELECT,
+
+ NL80211_ATTR_STA_SUPPORT_P2P_PS,
+
+ NL80211_ATTR_PAD,
+
+ NL80211_ATTR_IFTYPE_EXT_CAPA,
+
+ NL80211_ATTR_MU_MIMO_GROUP_DATA,
+ NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR,
+
+ NL80211_ATTR_SCAN_START_TIME_TSF,
+ NL80211_ATTR_SCAN_START_TIME_TSF_BSSID,
+ NL80211_ATTR_MEASUREMENT_DURATION,
+ NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY,
+
+ NL80211_ATTR_MESH_PEER_AID,
+
+ NL80211_ATTR_NAN_MASTER_PREF,
+ NL80211_ATTR_BANDS,
+ NL80211_ATTR_NAN_FUNC,
+ NL80211_ATTR_NAN_MATCH,
+
+ NL80211_ATTR_FILS_KEK,
+ NL80211_ATTR_FILS_NONCES,
+
+ NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED,
+
+ NL80211_ATTR_BSSID,
+
+ NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI,
+ NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST,
+
+ NL80211_ATTR_TIMEOUT_REASON,
+
+ NL80211_ATTR_FILS_ERP_USERNAME,
+ NL80211_ATTR_FILS_ERP_REALM,
+ NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM,
+ NL80211_ATTR_FILS_ERP_RRK,
+ NL80211_ATTR_FILS_CACHE_ID,
+
+ NL80211_ATTR_PMK,
+
+ NL80211_ATTR_SCHED_SCAN_MULTI,
+ NL80211_ATTR_SCHED_SCAN_MAX_REQS,
+
+ NL80211_ATTR_WANT_1X_4WAY_HS,
+ NL80211_ATTR_PMKR0_NAME,
+ NL80211_ATTR_PORT_AUTHORIZED,
+
+ NL80211_ATTR_EXTERNAL_AUTH_ACTION,
+ NL80211_ATTR_EXTERNAL_AUTH_SUPPORT,
+
+ NL80211_ATTR_NSS,
+ NL80211_ATTR_ACK_SIGNAL,
+
+ NL80211_ATTR_CONTROL_PORT_OVER_NL80211,
+
+ NL80211_ATTR_TXQ_STATS,
+ NL80211_ATTR_TXQ_LIMIT,
+ NL80211_ATTR_TXQ_MEMORY_LIMIT,
+ NL80211_ATTR_TXQ_QUANTUM,
+
+ NL80211_ATTR_HE_CAPABILITY,
+
+ NL80211_ATTR_FTM_RESPONDER,
+
+ NL80211_ATTR_FTM_RESPONDER_STATS,
+
+ NL80211_ATTR_TIMEOUT,
+
+ NL80211_ATTR_PEER_MEASUREMENTS,
+
+ NL80211_ATTR_AIRTIME_WEIGHT,
+ NL80211_ATTR_STA_TX_POWER_SETTING,
+ NL80211_ATTR_STA_TX_POWER,
+
+ NL80211_ATTR_SAE_PASSWORD,
+
+ NL80211_ATTR_TWT_RESPONDER,
+
+ NL80211_ATTR_HE_OBSS_PD,
+
+ NL80211_ATTR_WIPHY_EDMG_CHANNELS,
+ NL80211_ATTR_WIPHY_EDMG_BW_CONFIG,
+
+ /* add attributes here, update the policy in nl80211.c */
+
+ __NL80211_ATTR_AFTER_LAST,
+ NUM_NL80211_ATTR = __NL80211_ATTR_AFTER_LAST,
+ NL80211_ATTR_MAX = __NL80211_ATTR_AFTER_LAST - 1
+};
+
+/* source-level API compatibility */
+#define NL80211_ATTR_SCAN_GENERATION NL80211_ATTR_GENERATION
+#define NL80211_ATTR_MESH_PARAMS NL80211_ATTR_MESH_CONFIG
+#define NL80211_ATTR_IFACE_SOCKET_OWNER NL80211_ATTR_SOCKET_OWNER
+#define NL80211_ATTR_SAE_DATA NL80211_ATTR_AUTH_DATA
+
+/*
+ * Allow user space programs to use #ifdef on new attributes by defining them
+ * here
+ */
+#define NL80211_CMD_CONNECT NL80211_CMD_CONNECT
+#define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY
+#define NL80211_ATTR_BSS_BASIC_RATES NL80211_ATTR_BSS_BASIC_RATES
+#define NL80211_ATTR_WIPHY_TXQ_PARAMS NL80211_ATTR_WIPHY_TXQ_PARAMS
+#define NL80211_ATTR_WIPHY_FREQ NL80211_ATTR_WIPHY_FREQ
+#define NL80211_ATTR_WIPHY_CHANNEL_TYPE NL80211_ATTR_WIPHY_CHANNEL_TYPE
+#define NL80211_ATTR_MGMT_SUBTYPE NL80211_ATTR_MGMT_SUBTYPE
+#define NL80211_ATTR_IE NL80211_ATTR_IE
+#define NL80211_ATTR_REG_INITIATOR NL80211_ATTR_REG_INITIATOR
+#define NL80211_ATTR_REG_TYPE NL80211_ATTR_REG_TYPE
+#define NL80211_ATTR_FRAME NL80211_ATTR_FRAME
+#define NL80211_ATTR_SSID NL80211_ATTR_SSID
+#define NL80211_ATTR_AUTH_TYPE NL80211_ATTR_AUTH_TYPE
+#define NL80211_ATTR_REASON_CODE NL80211_ATTR_REASON_CODE
+#define NL80211_ATTR_CIPHER_SUITES_PAIRWISE NL80211_ATTR_CIPHER_SUITES_PAIRWISE
+#define NL80211_ATTR_CIPHER_SUITE_GROUP NL80211_ATTR_CIPHER_SUITE_GROUP
+#define NL80211_ATTR_WPA_VERSIONS NL80211_ATTR_WPA_VERSIONS
+#define NL80211_ATTR_AKM_SUITES NL80211_ATTR_AKM_SUITES
+#define NL80211_ATTR_KEY NL80211_ATTR_KEY
+#define NL80211_ATTR_KEYS NL80211_ATTR_KEYS
+#define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS
+
+#define NL80211_WIPHY_NAME_MAXLEN 64
+
+#define NL80211_MAX_SUPP_RATES 32
+#define NL80211_MAX_SUPP_HT_RATES 77
+#define NL80211_MAX_SUPP_REG_RULES 128
+#define NL80211_TKIP_DATA_OFFSET_ENCR_KEY 0
+#define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY 16
+#define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY 24
+#define NL80211_HT_CAPABILITY_LEN 26
+#define NL80211_VHT_CAPABILITY_LEN 12
+#define NL80211_HE_MIN_CAPABILITY_LEN 16
+#define NL80211_HE_MAX_CAPABILITY_LEN 54
+#define NL80211_MAX_NR_CIPHER_SUITES 5
+#define NL80211_MAX_NR_AKM_SUITES 2
+
+#define NL80211_MIN_REMAIN_ON_CHANNEL_TIME 10
+
+/* default RSSI threshold for scan results if none specified. */
+#define NL80211_SCAN_RSSI_THOLD_OFF -300
+
+#define NL80211_CQM_TXE_MAX_INTVL 1800
+
+/**
+ * enum nl80211_iftype - (virtual) interface types
+ *
+ * @NL80211_IFTYPE_UNSPECIFIED: unspecified type, driver decides
+ * @NL80211_IFTYPE_ADHOC: independent BSS member
+ * @NL80211_IFTYPE_STATION: managed BSS member
+ * @NL80211_IFTYPE_AP: access point
+ * @NL80211_IFTYPE_AP_VLAN: VLAN interface for access points; VLAN interfaces
+ * are a bit special in that they must always be tied to a pre-existing
+ * AP type interface.
+ * @NL80211_IFTYPE_WDS: wireless distribution interface
+ * @NL80211_IFTYPE_MONITOR: monitor interface receiving all frames
+ * @NL80211_IFTYPE_MESH_POINT: mesh point
+ * @NL80211_IFTYPE_P2P_CLIENT: P2P client
+ * @NL80211_IFTYPE_P2P_GO: P2P group owner
+ * @NL80211_IFTYPE_P2P_DEVICE: P2P device interface type, this is not a netdev
+ * and therefore can't be created in the normal ways, use the
+ * %NL80211_CMD_START_P2P_DEVICE and %NL80211_CMD_STOP_P2P_DEVICE
+ * commands to create and destroy one
+ * @NL80211_IF_TYPE_OCB: Outside Context of a BSS
+ * This mode corresponds to the MIB variable dot11OCBActivated=true
+ * @NL80211_IFTYPE_NAN: NAN device interface type (not a netdev)
+ * @NL80211_IFTYPE_MAX: highest interface type number currently defined
+ * @NUM_NL80211_IFTYPES: number of defined interface types
+ *
+ * These values are used with the %NL80211_ATTR_IFTYPE
+ * to set the type of an interface.
+ *
+ */
+enum nl80211_iftype {
+ NL80211_IFTYPE_UNSPECIFIED,
+ NL80211_IFTYPE_ADHOC,
+ NL80211_IFTYPE_STATION,
+ NL80211_IFTYPE_AP,
+ NL80211_IFTYPE_AP_VLAN,
+ NL80211_IFTYPE_WDS,
+ NL80211_IFTYPE_MONITOR,
+ NL80211_IFTYPE_MESH_POINT,
+ NL80211_IFTYPE_P2P_CLIENT,
+ NL80211_IFTYPE_P2P_GO,
+ NL80211_IFTYPE_P2P_DEVICE,
+ NL80211_IFTYPE_OCB,
+ NL80211_IFTYPE_NAN,
+
+ /* keep last */
+ NUM_NL80211_IFTYPES,
+ NL80211_IFTYPE_MAX = NUM_NL80211_IFTYPES - 1
+};
+
+/**
+ * enum nl80211_sta_flags - station flags
+ *
+ * Station flags. When a station is added to an AP interface, it is
+ * assumed to be already associated (and hence authenticated.)
+ *
+ * @__NL80211_STA_FLAG_INVALID: attribute number 0 is reserved
+ * @NL80211_STA_FLAG_AUTHORIZED: station is authorized (802.1X)
+ * @NL80211_STA_FLAG_SHORT_PREAMBLE: station is capable of receiving frames
+ * with short barker preamble
+ * @NL80211_STA_FLAG_WME: station is WME/QoS capable
+ * @NL80211_STA_FLAG_MFP: station uses management frame protection
+ * @NL80211_STA_FLAG_AUTHENTICATED: station is authenticated
+ * @NL80211_STA_FLAG_TDLS_PEER: station is a TDLS peer -- this flag should
+ * only be used in managed mode (even in the flags mask). Note that the
+ * flag can't be changed, it is only valid while adding a station, and
+ * attempts to change it will silently be ignored (rather than rejected
+ * as errors.)
+ * @NL80211_STA_FLAG_ASSOCIATED: station is associated; used with drivers
+ * that support %NL80211_FEATURE_FULL_AP_CLIENT_STATE to transition a
+ * previously added station into associated state
+ * @NL80211_STA_FLAG_MAX: highest station flag number currently defined
+ * @__NL80211_STA_FLAG_AFTER_LAST: internal use
+ */
+enum nl80211_sta_flags {
+ __NL80211_STA_FLAG_INVALID,
+ NL80211_STA_FLAG_AUTHORIZED,
+ NL80211_STA_FLAG_SHORT_PREAMBLE,
+ NL80211_STA_FLAG_WME,
+ NL80211_STA_FLAG_MFP,
+ NL80211_STA_FLAG_AUTHENTICATED,
+ NL80211_STA_FLAG_TDLS_PEER,
+ NL80211_STA_FLAG_ASSOCIATED,
+
+ /* keep last */
+ __NL80211_STA_FLAG_AFTER_LAST,
+ NL80211_STA_FLAG_MAX = __NL80211_STA_FLAG_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_sta_p2p_ps_status - station support of P2P PS
+ *
+ * @NL80211_P2P_PS_UNSUPPORTED: station doesn't support P2P PS mechanism
+ * @@NL80211_P2P_PS_SUPPORTED: station supports P2P PS mechanism
+ * @NUM_NL80211_P2P_PS_STATUS: number of values
+ */
+enum nl80211_sta_p2p_ps_status {
+ NL80211_P2P_PS_UNSUPPORTED = 0,
+ NL80211_P2P_PS_SUPPORTED,
+
+ NUM_NL80211_P2P_PS_STATUS,
+};
+
+#define NL80211_STA_FLAG_MAX_OLD_API NL80211_STA_FLAG_TDLS_PEER
+
+/**
+ * struct nl80211_sta_flag_update - station flags mask/set
+ * @mask: mask of station flags to set
+ * @set: which values to set them to
+ *
+ * Both mask and set contain bits as per &enum nl80211_sta_flags.
+ */
+struct nl80211_sta_flag_update {
+ __u32 mask;
+ __u32 set;
+} __attribute__((packed));
+
+/**
+ * enum nl80211_he_gi - HE guard interval
+ * @NL80211_RATE_INFO_HE_GI_0_8: 0.8 usec
+ * @NL80211_RATE_INFO_HE_GI_1_6: 1.6 usec
+ * @NL80211_RATE_INFO_HE_GI_3_2: 3.2 usec
+ */
+enum nl80211_he_gi {
+ NL80211_RATE_INFO_HE_GI_0_8,
+ NL80211_RATE_INFO_HE_GI_1_6,
+ NL80211_RATE_INFO_HE_GI_3_2,
+};
+
+/**
+ * enum nl80211_he_ru_alloc - HE RU allocation values
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_26: 26-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_52: 52-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_106: 106-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_242: 242-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_484: 484-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_996: 996-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_2x996: 2x996-tone RU allocation
+ */
+enum nl80211_he_ru_alloc {
+ NL80211_RATE_INFO_HE_RU_ALLOC_26,
+ NL80211_RATE_INFO_HE_RU_ALLOC_52,
+ NL80211_RATE_INFO_HE_RU_ALLOC_106,
+ NL80211_RATE_INFO_HE_RU_ALLOC_242,
+ NL80211_RATE_INFO_HE_RU_ALLOC_484,
+ NL80211_RATE_INFO_HE_RU_ALLOC_996,
+ NL80211_RATE_INFO_HE_RU_ALLOC_2x996,
+};
+
+/**
+ * enum nl80211_rate_info - bitrate information
+ *
+ * These attribute types are used with %NL80211_STA_INFO_TXRATE
+ * when getting information about the bitrate of a station.
+ * There are 2 attributes for bitrate, a legacy one that represents
+ * a 16-bit value, and new one that represents a 32-bit value.
+ * If the rate value fits into 16 bit, both attributes are reported
+ * with the same value. If the rate is too high to fit into 16 bits
+ * (>6.5535Gbps) only 32-bit attribute is included.
+ * User space tools encouraged to use the 32-bit attribute and fall
+ * back to the 16-bit one for compatibility with older kernels.
+ *
+ * @__NL80211_RATE_INFO_INVALID: attribute number 0 is reserved
+ * @NL80211_RATE_INFO_BITRATE: total bitrate (u16, 100kbit/s)
+ * @NL80211_RATE_INFO_MCS: mcs index for 802.11n (u8)
+ * @NL80211_RATE_INFO_40_MHZ_WIDTH: 40 MHz dualchannel bitrate
+ * @NL80211_RATE_INFO_SHORT_GI: 400ns guard interval
+ * @NL80211_RATE_INFO_BITRATE32: total bitrate (u32, 100kbit/s)
+ * @NL80211_RATE_INFO_MAX: highest rate_info number currently defined
+ * @NL80211_RATE_INFO_VHT_MCS: MCS index for VHT (u8)
+ * @NL80211_RATE_INFO_VHT_NSS: number of streams in VHT (u8)
+ * @NL80211_RATE_INFO_80_MHZ_WIDTH: 80 MHz VHT rate
+ * @NL80211_RATE_INFO_80P80_MHZ_WIDTH: unused - 80+80 is treated the
+ * same as 160 for purposes of the bitrates
+ * @NL80211_RATE_INFO_160_MHZ_WIDTH: 160 MHz VHT rate
+ * @NL80211_RATE_INFO_10_MHZ_WIDTH: 10 MHz width - note that this is
+ * a legacy rate and will be reported as the actual bitrate, i.e.
+ * half the base (20 MHz) rate
+ * @NL80211_RATE_INFO_5_MHZ_WIDTH: 5 MHz width - note that this is
+ * a legacy rate and will be reported as the actual bitrate, i.e.
+ * a quarter of the base (20 MHz) rate
+ * @NL80211_RATE_INFO_HE_MCS: HE MCS index (u8, 0-11)
+ * @NL80211_RATE_INFO_HE_NSS: HE NSS value (u8, 1-8)
+ * @NL80211_RATE_INFO_HE_GI: HE guard interval identifier
+ * (u8, see &enum nl80211_he_gi)
+ * @NL80211_RATE_INFO_HE_DCM: HE DCM value (u8, 0/1)
+ * @NL80211_RATE_INFO_RU_ALLOC: HE RU allocation, if not present then
+ * non-OFDMA was used (u8, see &enum nl80211_he_ru_alloc)
+ * @__NL80211_RATE_INFO_AFTER_LAST: internal use
+ */
+enum nl80211_rate_info {
+ __NL80211_RATE_INFO_INVALID,
+ NL80211_RATE_INFO_BITRATE,
+ NL80211_RATE_INFO_MCS,
+ NL80211_RATE_INFO_40_MHZ_WIDTH,
+ NL80211_RATE_INFO_SHORT_GI,
+ NL80211_RATE_INFO_BITRATE32,
+ NL80211_RATE_INFO_VHT_MCS,
+ NL80211_RATE_INFO_VHT_NSS,
+ NL80211_RATE_INFO_80_MHZ_WIDTH,
+ NL80211_RATE_INFO_80P80_MHZ_WIDTH,
+ NL80211_RATE_INFO_160_MHZ_WIDTH,
+ NL80211_RATE_INFO_10_MHZ_WIDTH,
+ NL80211_RATE_INFO_5_MHZ_WIDTH,
+ NL80211_RATE_INFO_HE_MCS,
+ NL80211_RATE_INFO_HE_NSS,
+ NL80211_RATE_INFO_HE_GI,
+ NL80211_RATE_INFO_HE_DCM,
+ NL80211_RATE_INFO_HE_RU_ALLOC,
+
+ /* keep last */
+ __NL80211_RATE_INFO_AFTER_LAST,
+ NL80211_RATE_INFO_MAX = __NL80211_RATE_INFO_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_sta_bss_param - BSS information collected by STA
+ *
+ * These attribute types are used with %NL80211_STA_INFO_BSS_PARAM
+ * when getting information about the bitrate of a station.
+ *
+ * @__NL80211_STA_BSS_PARAM_INVALID: attribute number 0 is reserved
+ * @NL80211_STA_BSS_PARAM_CTS_PROT: whether CTS protection is enabled (flag)
+ * @NL80211_STA_BSS_PARAM_SHORT_PREAMBLE: whether short preamble is enabled
+ * (flag)
+ * @NL80211_STA_BSS_PARAM_SHORT_SLOT_TIME: whether short slot time is enabled
+ * (flag)
+ * @NL80211_STA_BSS_PARAM_DTIM_PERIOD: DTIM period for beaconing (u8)
+ * @NL80211_STA_BSS_PARAM_BEACON_INTERVAL: Beacon interval (u16)
+ * @NL80211_STA_BSS_PARAM_MAX: highest sta_bss_param number currently defined
+ * @__NL80211_STA_BSS_PARAM_AFTER_LAST: internal use
+ */
+enum nl80211_sta_bss_param {
+ __NL80211_STA_BSS_PARAM_INVALID,
+ NL80211_STA_BSS_PARAM_CTS_PROT,
+ NL80211_STA_BSS_PARAM_SHORT_PREAMBLE,
+ NL80211_STA_BSS_PARAM_SHORT_SLOT_TIME,
+ NL80211_STA_BSS_PARAM_DTIM_PERIOD,
+ NL80211_STA_BSS_PARAM_BEACON_INTERVAL,
+
+ /* keep last */
+ __NL80211_STA_BSS_PARAM_AFTER_LAST,
+ NL80211_STA_BSS_PARAM_MAX = __NL80211_STA_BSS_PARAM_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_sta_info - station information
+ *
+ * These attribute types are used with %NL80211_ATTR_STA_INFO
+ * when getting information about a station.
+ *
+ * @__NL80211_STA_INFO_INVALID: attribute number 0 is reserved
+ * @NL80211_STA_INFO_INACTIVE_TIME: time since last activity (u32, msecs)
+ * @NL80211_STA_INFO_RX_BYTES: total received bytes (MPDU length)
+ * (u32, from this station)
+ * @NL80211_STA_INFO_TX_BYTES: total transmitted bytes (MPDU length)
+ * (u32, to this station)
+ * @NL80211_STA_INFO_RX_BYTES64: total received bytes (MPDU length)
+ * (u64, from this station)
+ * @NL80211_STA_INFO_TX_BYTES64: total transmitted bytes (MPDU length)
+ * (u64, to this station)
+ * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm)
+ * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute
+ * containing info as possible, see &enum nl80211_rate_info
+ * @NL80211_STA_INFO_RX_PACKETS: total received packet (MSDUs and MMPDUs)
+ * (u32, from this station)
+ * @NL80211_STA_INFO_TX_PACKETS: total transmitted packets (MSDUs and MMPDUs)
+ * (u32, to this station)
+ * @NL80211_STA_INFO_TX_RETRIES: total retries (MPDUs) (u32, to this station)
+ * @NL80211_STA_INFO_TX_FAILED: total failed packets (MPDUs)
+ * (u32, to this station)
+ * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm)
+ * @NL80211_STA_INFO_LLID: the station's mesh LLID
+ * @NL80211_STA_INFO_PLID: the station's mesh PLID
+ * @NL80211_STA_INFO_PLINK_STATE: peer link state for the station
+ * (see %enum nl80211_plink_state)
+ * @NL80211_STA_INFO_RX_BITRATE: last unicast data frame rx rate, nested
+ * attribute, like NL80211_STA_INFO_TX_BITRATE.
+ * @NL80211_STA_INFO_BSS_PARAM: current station's view of BSS, nested attribute
+ * containing info as possible, see &enum nl80211_sta_bss_param
+ * @NL80211_STA_INFO_CONNECTED_TIME: time since the station is last connected
+ * @NL80211_STA_INFO_STA_FLAGS: Contains a struct nl80211_sta_flag_update.
+ * @NL80211_STA_INFO_BEACON_LOSS: count of times beacon loss was detected (u32)
+ * @NL80211_STA_INFO_T_OFFSET: timing offset with respect to this STA (s64)
+ * @NL80211_STA_INFO_LOCAL_PM: local mesh STA link-specific power mode
+ * @NL80211_STA_INFO_PEER_PM: peer mesh STA link-specific power mode
+ * @NL80211_STA_INFO_NONPEER_PM: neighbor mesh STA power save mode towards
+ * non-peer STA
+ * @NL80211_STA_INFO_CHAIN_SIGNAL: per-chain signal strength of last PPDU
+ * Contains a nested array of signal strength attributes (u8, dBm)
+ * @NL80211_STA_INFO_CHAIN_SIGNAL_AVG: per-chain signal strength average
+ * Same format as NL80211_STA_INFO_CHAIN_SIGNAL.
+ * @NL80211_STA_EXPECTED_THROUGHPUT: expected throughput considering also the
+ * 802.11 header (u32, kbps)
+ * @NL80211_STA_INFO_RX_DROP_MISC: RX packets dropped for unspecified reasons
+ * (u64)
+ * @NL80211_STA_INFO_BEACON_RX: number of beacons received from this peer (u64)
+ * @NL80211_STA_INFO_BEACON_SIGNAL_AVG: signal strength average
+ * for beacons only (u8, dBm)
+ * @NL80211_STA_INFO_TID_STATS: per-TID statistics (see &enum nl80211_tid_stats)
+ * This is a nested attribute where each the inner attribute number is the
+ * TID+1 and the special TID 16 (i.e. value 17) is used for non-QoS frames;
+ * each one of those is again nested with &enum nl80211_tid_stats
+ * attributes carrying the actual values.
+ * @NL80211_STA_INFO_RX_DURATION: aggregate PPDU duration for all frames
+ * received from the station (u64, usec)
+ * @NL80211_STA_INFO_PAD: attribute used for padding for 64-bit alignment
+ * @NL80211_STA_INFO_ACK_SIGNAL: signal strength of the last ACK frame(u8, dBm)
+ * @NL80211_STA_INFO_ACK_SIGNAL_AVG: avg signal strength of ACK frames (s8, dBm)
+ * @NL80211_STA_INFO_RX_MPDUS: total number of received packets (MPDUs)
+ * (u32, from this station)
+ * @NL80211_STA_INFO_FCS_ERROR_COUNT: total number of packets (MPDUs) received
+ * with an FCS error (u32, from this station). This count may not include
+ * some packets with an FCS error due to TA corruption. Hence this counter
+ * might not be fully accurate.
+ * @NL80211_STA_INFO_CONNECTED_TO_GATE: set to true if STA has a path to a
+ * mesh gate (u8, 0 or 1)
+ * @NL80211_STA_INFO_TX_DURATION: aggregate PPDU duration for all frames
+ * sent to the station (u64, usec)
+ * @NL80211_STA_INFO_AIRTIME_WEIGHT: current airtime weight for station (u16)
+ * @NL80211_STA_INFO_AIRTIME_LINK_METRIC: airtime link metric for mesh station
+ * @NL80211_STA_INFO_ASSOC_AT_BOOTTIME: Timestamp (CLOCK_BOOTTIME, nanoseconds)
+ * of STA's association
+ * @__NL80211_STA_INFO_AFTER_LAST: internal
+ * @NL80211_STA_INFO_MAX: highest possible station info attribute
+ */
+enum nl80211_sta_info {
+ __NL80211_STA_INFO_INVALID,
+ NL80211_STA_INFO_INACTIVE_TIME,
+ NL80211_STA_INFO_RX_BYTES,
+ NL80211_STA_INFO_TX_BYTES,
+ NL80211_STA_INFO_LLID,
+ NL80211_STA_INFO_PLID,
+ NL80211_STA_INFO_PLINK_STATE,
+ NL80211_STA_INFO_SIGNAL,
+ NL80211_STA_INFO_TX_BITRATE,
+ NL80211_STA_INFO_RX_PACKETS,
+ NL80211_STA_INFO_TX_PACKETS,
+ NL80211_STA_INFO_TX_RETRIES,
+ NL80211_STA_INFO_TX_FAILED,
+ NL80211_STA_INFO_SIGNAL_AVG,
+ NL80211_STA_INFO_RX_BITRATE,
+ NL80211_STA_INFO_BSS_PARAM,
+ NL80211_STA_INFO_CONNECTED_TIME,
+ NL80211_STA_INFO_STA_FLAGS,
+ NL80211_STA_INFO_BEACON_LOSS,
+ NL80211_STA_INFO_T_OFFSET,
+ NL80211_STA_INFO_LOCAL_PM,
+ NL80211_STA_INFO_PEER_PM,
+ NL80211_STA_INFO_NONPEER_PM,
+ NL80211_STA_INFO_RX_BYTES64,
+ NL80211_STA_INFO_TX_BYTES64,
+ NL80211_STA_INFO_CHAIN_SIGNAL,
+ NL80211_STA_INFO_CHAIN_SIGNAL_AVG,
+ NL80211_STA_INFO_EXPECTED_THROUGHPUT,
+ NL80211_STA_INFO_RX_DROP_MISC,
+ NL80211_STA_INFO_BEACON_RX,
+ NL80211_STA_INFO_BEACON_SIGNAL_AVG,
+ NL80211_STA_INFO_TID_STATS,
+ NL80211_STA_INFO_RX_DURATION,
+ NL80211_STA_INFO_PAD,
+ NL80211_STA_INFO_ACK_SIGNAL,
+ NL80211_STA_INFO_ACK_SIGNAL_AVG,
+ NL80211_STA_INFO_RX_MPDUS,
+ NL80211_STA_INFO_FCS_ERROR_COUNT,
+ NL80211_STA_INFO_CONNECTED_TO_GATE,
+ NL80211_STA_INFO_TX_DURATION,
+ NL80211_STA_INFO_AIRTIME_WEIGHT,
+ NL80211_STA_INFO_AIRTIME_LINK_METRIC,
+ NL80211_STA_INFO_ASSOC_AT_BOOTTIME,
+
+ /* keep last */
+ __NL80211_STA_INFO_AFTER_LAST,
+ NL80211_STA_INFO_MAX = __NL80211_STA_INFO_AFTER_LAST - 1
+};
+
+/* we renamed this - stay compatible */
+#define NL80211_STA_INFO_DATA_ACK_SIGNAL_AVG NL80211_STA_INFO_ACK_SIGNAL_AVG
+
+
+/**
+ * enum nl80211_tid_stats - per TID statistics attributes
+ * @__NL80211_TID_STATS_INVALID: attribute number 0 is reserved
+ * @NL80211_TID_STATS_RX_MSDU: number of MSDUs received (u64)
+ * @NL80211_TID_STATS_TX_MSDU: number of MSDUs transmitted (or
+ * attempted to transmit; u64)
+ * @NL80211_TID_STATS_TX_MSDU_RETRIES: number of retries for
+ * transmitted MSDUs (not counting the first attempt; u64)
+ * @NL80211_TID_STATS_TX_MSDU_FAILED: number of failed transmitted
+ * MSDUs (u64)
+ * @NL80211_TID_STATS_PAD: attribute used for padding for 64-bit alignment
+ * @NL80211_TID_STATS_TXQ_STATS: TXQ stats (nested attribute)
+ * @NUM_NL80211_TID_STATS: number of attributes here
+ * @NL80211_TID_STATS_MAX: highest numbered attribute here
+ */
+enum nl80211_tid_stats {
+ __NL80211_TID_STATS_INVALID,
+ NL80211_TID_STATS_RX_MSDU,
+ NL80211_TID_STATS_TX_MSDU,
+ NL80211_TID_STATS_TX_MSDU_RETRIES,
+ NL80211_TID_STATS_TX_MSDU_FAILED,
+ NL80211_TID_STATS_PAD,
+ NL80211_TID_STATS_TXQ_STATS,
+
+ /* keep last */
+ NUM_NL80211_TID_STATS,
+ NL80211_TID_STATS_MAX = NUM_NL80211_TID_STATS - 1
+};
+
+/**
+ * enum nl80211_txq_stats - per TXQ statistics attributes
+ * @__NL80211_TXQ_STATS_INVALID: attribute number 0 is reserved
+ * @NUM_NL80211_TXQ_STATS: number of attributes here
+ * @NL80211_TXQ_STATS_BACKLOG_BYTES: number of bytes currently backlogged
+ * @NL80211_TXQ_STATS_BACKLOG_PACKETS: number of packets currently
+ * backlogged
+ * @NL80211_TXQ_STATS_FLOWS: total number of new flows seen
+ * @NL80211_TXQ_STATS_DROPS: total number of packet drops
+ * @NL80211_TXQ_STATS_ECN_MARKS: total number of packet ECN marks
+ * @NL80211_TXQ_STATS_OVERLIMIT: number of drops due to queue space overflow
+ * @NL80211_TXQ_STATS_OVERMEMORY: number of drops due to memory limit overflow
+ * (only for per-phy stats)
+ * @NL80211_TXQ_STATS_COLLISIONS: number of hash collisions
+ * @NL80211_TXQ_STATS_TX_BYTES: total number of bytes dequeued from TXQ
+ * @NL80211_TXQ_STATS_TX_PACKETS: total number of packets dequeued from TXQ
+ * @NL80211_TXQ_STATS_MAX_FLOWS: number of flow buckets for PHY
+ * @NL80211_TXQ_STATS_MAX: highest numbered attribute here
+ */
+enum nl80211_txq_stats {
+ __NL80211_TXQ_STATS_INVALID,
+ NL80211_TXQ_STATS_BACKLOG_BYTES,
+ NL80211_TXQ_STATS_BACKLOG_PACKETS,
+ NL80211_TXQ_STATS_FLOWS,
+ NL80211_TXQ_STATS_DROPS,
+ NL80211_TXQ_STATS_ECN_MARKS,
+ NL80211_TXQ_STATS_OVERLIMIT,
+ NL80211_TXQ_STATS_OVERMEMORY,
+ NL80211_TXQ_STATS_COLLISIONS,
+ NL80211_TXQ_STATS_TX_BYTES,
+ NL80211_TXQ_STATS_TX_PACKETS,
+ NL80211_TXQ_STATS_MAX_FLOWS,
+
+ /* keep last */
+ NUM_NL80211_TXQ_STATS,
+ NL80211_TXQ_STATS_MAX = NUM_NL80211_TXQ_STATS - 1
+};
+
+/**
+ * enum nl80211_mpath_flags - nl80211 mesh path flags
+ *
+ * @NL80211_MPATH_FLAG_ACTIVE: the mesh path is active
+ * @NL80211_MPATH_FLAG_RESOLVING: the mesh path discovery process is running
+ * @NL80211_MPATH_FLAG_SN_VALID: the mesh path contains a valid SN
+ * @NL80211_MPATH_FLAG_FIXED: the mesh path has been manually set
+ * @NL80211_MPATH_FLAG_RESOLVED: the mesh path discovery process succeeded
+ */
+enum nl80211_mpath_flags {
+ NL80211_MPATH_FLAG_ACTIVE = 1<<0,
+ NL80211_MPATH_FLAG_RESOLVING = 1<<1,
+ NL80211_MPATH_FLAG_SN_VALID = 1<<2,
+ NL80211_MPATH_FLAG_FIXED = 1<<3,
+ NL80211_MPATH_FLAG_RESOLVED = 1<<4,
+};
+
+/**
+ * enum nl80211_mpath_info - mesh path information
+ *
+ * These attribute types are used with %NL80211_ATTR_MPATH_INFO when getting
+ * information about a mesh path.
+ *
+ * @__NL80211_MPATH_INFO_INVALID: attribute number 0 is reserved
+ * @NL80211_MPATH_INFO_FRAME_QLEN: number of queued frames for this destination
+ * @NL80211_MPATH_INFO_SN: destination sequence number
+ * @NL80211_MPATH_INFO_METRIC: metric (cost) of this mesh path
+ * @NL80211_MPATH_INFO_EXPTIME: expiration time for the path, in msec from now
+ * @NL80211_MPATH_INFO_FLAGS: mesh path flags, enumerated in
+ * &enum nl80211_mpath_flags;
+ * @NL80211_MPATH_INFO_DISCOVERY_TIMEOUT: total path discovery timeout, in msec
+ * @NL80211_MPATH_INFO_DISCOVERY_RETRIES: mesh path discovery retries
+ * @NL80211_MPATH_INFO_HOP_COUNT: hop count to destination
+ * @NL80211_MPATH_INFO_PATH_CHANGE: total number of path changes to destination
+ * @NL80211_MPATH_INFO_MAX: highest mesh path information attribute number
+ * currently defined
+ * @__NL80211_MPATH_INFO_AFTER_LAST: internal use
+ */
+enum nl80211_mpath_info {
+ __NL80211_MPATH_INFO_INVALID,
+ NL80211_MPATH_INFO_FRAME_QLEN,
+ NL80211_MPATH_INFO_SN,
+ NL80211_MPATH_INFO_METRIC,
+ NL80211_MPATH_INFO_EXPTIME,
+ NL80211_MPATH_INFO_FLAGS,
+ NL80211_MPATH_INFO_DISCOVERY_TIMEOUT,
+ NL80211_MPATH_INFO_DISCOVERY_RETRIES,
+ NL80211_MPATH_INFO_HOP_COUNT,
+ NL80211_MPATH_INFO_PATH_CHANGE,
+
+ /* keep last */
+ __NL80211_MPATH_INFO_AFTER_LAST,
+ NL80211_MPATH_INFO_MAX = __NL80211_MPATH_INFO_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_band_iftype_attr - Interface type data attributes
+ *
+ * @__NL80211_BAND_IFTYPE_ATTR_INVALID: attribute number 0 is reserved
+ * @NL80211_BAND_IFTYPE_ATTR_IFTYPES: nested attribute containing a flag attribute
+ * for each interface type that supports the band data
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC: HE MAC capabilities as in HE
+ * capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY: HE PHY capabilities as in HE
+ * capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET: HE supported NSS/MCS as in HE
+ * capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE: HE PPE thresholds information as
+ * defined in HE capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band HE capability attribute currently
+ * defined
+ * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use
+ */
+enum nl80211_band_iftype_attr {
+ __NL80211_BAND_IFTYPE_ATTR_INVALID,
+
+ NL80211_BAND_IFTYPE_ATTR_IFTYPES,
+ NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC,
+ NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY,
+ NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET,
+ NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE,
+
+ /* keep last */
+ __NL80211_BAND_IFTYPE_ATTR_AFTER_LAST,
+ NL80211_BAND_IFTYPE_ATTR_MAX = __NL80211_BAND_IFTYPE_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_band_attr - band attributes
+ * @__NL80211_BAND_ATTR_INVALID: attribute number 0 is reserved
+ * @NL80211_BAND_ATTR_FREQS: supported frequencies in this band,
+ * an array of nested frequency attributes
+ * @NL80211_BAND_ATTR_RATES: supported bitrates in this band,
+ * an array of nested bitrate attributes
+ * @NL80211_BAND_ATTR_HT_MCS_SET: 16-byte attribute containing the MCS set as
+ * defined in 802.11n
+ * @NL80211_BAND_ATTR_HT_CAPA: HT capabilities, as in the HT information IE
+ * @NL80211_BAND_ATTR_HT_AMPDU_FACTOR: A-MPDU factor, as in 11n
+ * @NL80211_BAND_ATTR_HT_AMPDU_DENSITY: A-MPDU density, as in 11n
+ * @NL80211_BAND_ATTR_VHT_MCS_SET: 32-byte attribute containing the MCS set as
+ * defined in 802.11ac
+ * @NL80211_BAND_ATTR_VHT_CAPA: VHT capabilities, as in the HT information IE
+ * @NL80211_BAND_ATTR_IFTYPE_DATA: nested array attribute, with each entry using
+ * attributes from &enum nl80211_band_iftype_attr
+ * @NL80211_BAND_ATTR_EDMG_CHANNELS: bitmap that indicates the 2.16 GHz
+ * channel(s) that are allowed to be used for EDMG transmissions.
+ * Defined by IEEE P802.11ay/D4.0 section 9.4.2.251.
+ * @NL80211_BAND_ATTR_EDMG_BW_CONFIG: Channel BW Configuration subfield encodes
+ * the allowed channel bandwidth configurations.
+ * Defined by IEEE P802.11ay/D4.0 section 9.4.2.251, Table 13.
+ * @NL80211_BAND_ATTR_MAX: highest band attribute currently defined
+ * @__NL80211_BAND_ATTR_AFTER_LAST: internal use
+ */
+enum nl80211_band_attr {
+ __NL80211_BAND_ATTR_INVALID,
+ NL80211_BAND_ATTR_FREQS,
+ NL80211_BAND_ATTR_RATES,
+
+ NL80211_BAND_ATTR_HT_MCS_SET,
+ NL80211_BAND_ATTR_HT_CAPA,
+ NL80211_BAND_ATTR_HT_AMPDU_FACTOR,
+ NL80211_BAND_ATTR_HT_AMPDU_DENSITY,
+
+ NL80211_BAND_ATTR_VHT_MCS_SET,
+ NL80211_BAND_ATTR_VHT_CAPA,
+ NL80211_BAND_ATTR_IFTYPE_DATA,
+
+ NL80211_BAND_ATTR_EDMG_CHANNELS,
+ NL80211_BAND_ATTR_EDMG_BW_CONFIG,
+
+ /* keep last */
+ __NL80211_BAND_ATTR_AFTER_LAST,
+ NL80211_BAND_ATTR_MAX = __NL80211_BAND_ATTR_AFTER_LAST - 1
+};
+
+#define NL80211_BAND_ATTR_HT_CAPA NL80211_BAND_ATTR_HT_CAPA
+
+/**
+ * enum nl80211_wmm_rule - regulatory wmm rule
+ *
+ * @__NL80211_WMMR_INVALID: attribute number 0 is reserved
+ * @NL80211_WMMR_CW_MIN: Minimum contention window slot.
+ * @NL80211_WMMR_CW_MAX: Maximum contention window slot.
+ * @NL80211_WMMR_AIFSN: Arbitration Inter Frame Space.
+ * @NL80211_WMMR_TXOP: Maximum allowed tx operation time.
+ * @nl80211_WMMR_MAX: highest possible wmm rule.
+ * @__NL80211_WMMR_LAST: Internal use.
+ */
+enum nl80211_wmm_rule {
+ __NL80211_WMMR_INVALID,
+ NL80211_WMMR_CW_MIN,
+ NL80211_WMMR_CW_MAX,
+ NL80211_WMMR_AIFSN,
+ NL80211_WMMR_TXOP,
+
+ /* keep last */
+ __NL80211_WMMR_LAST,
+ NL80211_WMMR_MAX = __NL80211_WMMR_LAST - 1
+};
+
+/**
+ * enum nl80211_frequency_attr - frequency attributes
+ * @__NL80211_FREQUENCY_ATTR_INVALID: attribute number 0 is reserved
+ * @NL80211_FREQUENCY_ATTR_FREQ: Frequency in MHz
+ * @NL80211_FREQUENCY_ATTR_DISABLED: Channel is disabled in current
+ * regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_NO_IR: no mechanisms that initiate radiation
+ * are permitted on this channel, this includes sending probe
+ * requests, or modes of operation that require beaconing.
+ * @NL80211_FREQUENCY_ATTR_RADAR: Radar detection is mandatory
+ * on this channel in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in mBm
+ * (100 * dBm).
+ * @NL80211_FREQUENCY_ATTR_DFS_STATE: current state for DFS
+ * (enum nl80211_dfs_state)
+ * @NL80211_FREQUENCY_ATTR_DFS_TIME: time in miliseconds for how long
+ * this channel is in this DFS state.
+ * @NL80211_FREQUENCY_ATTR_NO_HT40_MINUS: HT40- isn't possible with this
+ * channel as the control channel
+ * @NL80211_FREQUENCY_ATTR_NO_HT40_PLUS: HT40+ isn't possible with this
+ * channel as the control channel
+ * @NL80211_FREQUENCY_ATTR_NO_80MHZ: any 80 MHz channel using this channel
+ * as the primary or any of the secondary channels isn't possible,
+ * this includes 80+80 channels
+ * @NL80211_FREQUENCY_ATTR_NO_160MHZ: any 160 MHz (but not 80+80) channel
+ * using this channel as the primary or any of the secondary channels
+ * isn't possible
+ * @NL80211_FREQUENCY_ATTR_DFS_CAC_TIME: DFS CAC time in milliseconds.
+ * @NL80211_FREQUENCY_ATTR_INDOOR_ONLY: Only indoor use is permitted on this
+ * channel. A channel that has the INDOOR_ONLY attribute can only be
+ * used when there is a clear assessment that the device is operating in
+ * an indoor surroundings, i.e., it is connected to AC power (and not
+ * through portable DC inverters) or is under the control of a master
+ * that is acting as an AP and is connected to AC power.
+ * @NL80211_FREQUENCY_ATTR_IR_CONCURRENT: IR operation is allowed on this
+ * channel if it's connected concurrently to a BSS on the same channel on
+ * the 2 GHz band or to a channel in the same UNII band (on the 5 GHz
+ * band), and IEEE80211_CHAN_RADAR is not set. Instantiating a GO or TDLS
+ * off-channel on a channel that has the IR_CONCURRENT attribute set can be
+ * done when there is a clear assessment that the device is operating under
+ * the guidance of an authorized master, i.e., setting up a GO or TDLS
+ * off-channel while the device is also connected to an AP with DFS and
+ * radar detection on the UNII band (it is up to user-space, i.e.,
+ * wpa_supplicant to perform the required verifications). Using this
+ * attribute for IR is disallowed for master interfaces (IBSS, AP).
+ * @NL80211_FREQUENCY_ATTR_NO_20MHZ: 20 MHz operation is not allowed
+ * on this channel in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_NO_10MHZ: 10 MHz operation is not allowed
+ * on this channel in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_WMM: this channel has wmm limitations.
+ * This is a nested attribute that contains the wmm limitation per AC.
+ * (see &enum nl80211_wmm_rule)
+ * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number
+ * currently defined
+ * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use
+ *
+ * See https://apps.fcc.gov/eas/comments/GetPublishedDocument.html?id=327&tn=528122
+ * for more information on the FCC description of the relaxations allowed
+ * by NL80211_FREQUENCY_ATTR_INDOOR_ONLY and
+ * NL80211_FREQUENCY_ATTR_IR_CONCURRENT.
+ */
+enum nl80211_frequency_attr {
+ __NL80211_FREQUENCY_ATTR_INVALID,
+ NL80211_FREQUENCY_ATTR_FREQ,
+ NL80211_FREQUENCY_ATTR_DISABLED,
+ NL80211_FREQUENCY_ATTR_NO_IR,
+ __NL80211_FREQUENCY_ATTR_NO_IBSS,
+ NL80211_FREQUENCY_ATTR_RADAR,
+ NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
+ NL80211_FREQUENCY_ATTR_DFS_STATE,
+ NL80211_FREQUENCY_ATTR_DFS_TIME,
+ NL80211_FREQUENCY_ATTR_NO_HT40_MINUS,
+ NL80211_FREQUENCY_ATTR_NO_HT40_PLUS,
+ NL80211_FREQUENCY_ATTR_NO_80MHZ,
+ NL80211_FREQUENCY_ATTR_NO_160MHZ,
+ NL80211_FREQUENCY_ATTR_DFS_CAC_TIME,
+ NL80211_FREQUENCY_ATTR_INDOOR_ONLY,
+ NL80211_FREQUENCY_ATTR_IR_CONCURRENT,
+ NL80211_FREQUENCY_ATTR_NO_20MHZ,
+ NL80211_FREQUENCY_ATTR_NO_10MHZ,
+ NL80211_FREQUENCY_ATTR_WMM,
+
+ /* keep last */
+ __NL80211_FREQUENCY_ATTR_AFTER_LAST,
+ NL80211_FREQUENCY_ATTR_MAX = __NL80211_FREQUENCY_ATTR_AFTER_LAST - 1
+};
+
+#define NL80211_FREQUENCY_ATTR_MAX_TX_POWER NL80211_FREQUENCY_ATTR_MAX_TX_POWER
+#define NL80211_FREQUENCY_ATTR_PASSIVE_SCAN NL80211_FREQUENCY_ATTR_NO_IR
+#define NL80211_FREQUENCY_ATTR_NO_IBSS NL80211_FREQUENCY_ATTR_NO_IR
+#define NL80211_FREQUENCY_ATTR_NO_IR NL80211_FREQUENCY_ATTR_NO_IR
+#define NL80211_FREQUENCY_ATTR_GO_CONCURRENT \
+ NL80211_FREQUENCY_ATTR_IR_CONCURRENT
+
+/**
+ * enum nl80211_bitrate_attr - bitrate attributes
+ * @__NL80211_BITRATE_ATTR_INVALID: attribute number 0 is reserved
+ * @NL80211_BITRATE_ATTR_RATE: Bitrate in units of 100 kbps
+ * @NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE: Short preamble supported
+ * in 2.4 GHz band.
+ * @NL80211_BITRATE_ATTR_MAX: highest bitrate attribute number
+ * currently defined
+ * @__NL80211_BITRATE_ATTR_AFTER_LAST: internal use
+ */
+enum nl80211_bitrate_attr {
+ __NL80211_BITRATE_ATTR_INVALID,
+ NL80211_BITRATE_ATTR_RATE,
+ NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE,
+
+ /* keep last */
+ __NL80211_BITRATE_ATTR_AFTER_LAST,
+ NL80211_BITRATE_ATTR_MAX = __NL80211_BITRATE_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_initiator - Indicates the initiator of a reg domain request
+ * @NL80211_REGDOM_SET_BY_CORE: Core queried CRDA for a dynamic world
+ * regulatory domain.
+ * @NL80211_REGDOM_SET_BY_USER: User asked the wireless core to set the
+ * regulatory domain.
+ * @NL80211_REGDOM_SET_BY_DRIVER: a wireless drivers has hinted to the
+ * wireless core it thinks its knows the regulatory domain we should be in.
+ * @NL80211_REGDOM_SET_BY_COUNTRY_IE: the wireless core has received an
+ * 802.11 country information element with regulatory information it
+ * thinks we should consider. cfg80211 only processes the country
+ * code from the IE, and relies on the regulatory domain information
+ * structure passed by userspace (CRDA) from our wireless-regdb.
+ * If a channel is enabled but the country code indicates it should
+ * be disabled we disable the channel and re-enable it upon disassociation.
+ */
+enum nl80211_reg_initiator {
+ NL80211_REGDOM_SET_BY_CORE,
+ NL80211_REGDOM_SET_BY_USER,
+ NL80211_REGDOM_SET_BY_DRIVER,
+ NL80211_REGDOM_SET_BY_COUNTRY_IE,
+};
+
+/**
+ * enum nl80211_reg_type - specifies the type of regulatory domain
+ * @NL80211_REGDOM_TYPE_COUNTRY: the regulatory domain set is one that pertains
+ * to a specific country. When this is set you can count on the
+ * ISO / IEC 3166 alpha2 country code being valid.
+ * @NL80211_REGDOM_TYPE_WORLD: the regulatory set domain is the world regulatory
+ * domain.
+ * @NL80211_REGDOM_TYPE_CUSTOM_WORLD: the regulatory domain set is a custom
+ * driver specific world regulatory domain. These do not apply system-wide
+ * and are only applicable to the individual devices which have requested
+ * them to be applied.
+ * @NL80211_REGDOM_TYPE_INTERSECTION: the regulatory domain set is the product
+ * of an intersection between two regulatory domains -- the previously
+ * set regulatory domain on the system and the last accepted regulatory
+ * domain request to be processed.
+ */
+enum nl80211_reg_type {
+ NL80211_REGDOM_TYPE_COUNTRY,
+ NL80211_REGDOM_TYPE_WORLD,
+ NL80211_REGDOM_TYPE_CUSTOM_WORLD,
+ NL80211_REGDOM_TYPE_INTERSECTION,
+};
+
+/**
+ * enum nl80211_reg_rule_attr - regulatory rule attributes
+ * @__NL80211_REG_RULE_ATTR_INVALID: attribute number 0 is reserved
+ * @NL80211_ATTR_REG_RULE_FLAGS: a set of flags which specify additional
+ * considerations for a given frequency range. These are the
+ * &enum nl80211_reg_rule_flags.
+ * @NL80211_ATTR_FREQ_RANGE_START: starting frequencry for the regulatory
+ * rule in KHz. This is not a center of frequency but an actual regulatory
+ * band edge.
+ * @NL80211_ATTR_FREQ_RANGE_END: ending frequency for the regulatory rule
+ * in KHz. This is not a center a frequency but an actual regulatory
+ * band edge.
+ * @NL80211_ATTR_FREQ_RANGE_MAX_BW: maximum allowed bandwidth for this
+ * frequency range, in KHz.
+ * @NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN: the maximum allowed antenna gain
+ * for a given frequency range. The value is in mBi (100 * dBi).
+ * If you don't have one then don't send this.
+ * @NL80211_ATTR_POWER_RULE_MAX_EIRP: the maximum allowed EIRP for
+ * a given frequency range. The value is in mBm (100 * dBm).
+ * @NL80211_ATTR_DFS_CAC_TIME: DFS CAC time in milliseconds.
+ * If not present or 0 default CAC time will be used.
+ * @NL80211_REG_RULE_ATTR_MAX: highest regulatory rule attribute number
+ * currently defined
+ * @__NL80211_REG_RULE_ATTR_AFTER_LAST: internal use
+ */
+enum nl80211_reg_rule_attr {
+ __NL80211_REG_RULE_ATTR_INVALID,
+ NL80211_ATTR_REG_RULE_FLAGS,
+
+ NL80211_ATTR_FREQ_RANGE_START,
+ NL80211_ATTR_FREQ_RANGE_END,
+ NL80211_ATTR_FREQ_RANGE_MAX_BW,
+
+ NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN,
+ NL80211_ATTR_POWER_RULE_MAX_EIRP,
+
+ NL80211_ATTR_DFS_CAC_TIME,
+
+ /* keep last */
+ __NL80211_REG_RULE_ATTR_AFTER_LAST,
+ NL80211_REG_RULE_ATTR_MAX = __NL80211_REG_RULE_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_sched_scan_match_attr - scheduled scan match attributes
+ * @__NL80211_SCHED_SCAN_MATCH_ATTR_INVALID: attribute number 0 is reserved
+ * @NL80211_SCHED_SCAN_MATCH_ATTR_SSID: SSID to be used for matching,
+ * only report BSS with matching SSID.
+ * (This cannot be used together with BSSID.)
+ * @NL80211_SCHED_SCAN_MATCH_ATTR_RSSI: RSSI threshold (in dBm) for reporting a
+ * BSS in scan results. Filtering is turned off if not specified. Note that
+ * if this attribute is in a match set of its own, then it is treated as
+ * the default value for all matchsets with an SSID, rather than being a
+ * matchset of its own without an RSSI filter. This is due to problems with
+ * how this API was implemented in the past. Also, due to the same problem,
+ * the only way to create a matchset with only an RSSI filter (with this
+ * attribute) is if there's only a single matchset with the RSSI attribute.
+ * @NL80211_SCHED_SCAN_MATCH_ATTR_RELATIVE_RSSI: Flag indicating whether
+ * %NL80211_SCHED_SCAN_MATCH_ATTR_RSSI to be used as absolute RSSI or
+ * relative to current bss's RSSI.
+ * @NL80211_SCHED_SCAN_MATCH_ATTR_RSSI_ADJUST: When present the RSSI level for
+ * BSS-es in the specified band is to be adjusted before doing
+ * RSSI-based BSS selection. The attribute value is a packed structure
+ * value as specified by &struct nl80211_bss_select_rssi_adjust.
+ * @NL80211_SCHED_SCAN_MATCH_ATTR_BSSID: BSSID to be used for matching
+ * (this cannot be used together with SSID).
+ * @NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI: Nested attribute that carries the
+ * band specific minimum rssi thresholds for the bands defined in
+ * enum nl80211_band. The minimum rssi threshold value(s32) specific to a
+ * band shall be encapsulated in attribute with type value equals to one
+ * of the NL80211_BAND_* defined in enum nl80211_band. For example, the
+ * minimum rssi threshold value for 2.4GHZ band shall be encapsulated
+ * within an attribute of type NL80211_BAND_2GHZ. And one or more of such
+ * attributes will be nested within this attribute.
+ * @NL80211_SCHED_SCAN_MATCH_ATTR_MAX: highest scheduled scan filter
+ * attribute number currently defined
+ * @__NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST: internal use
+ */
+enum nl80211_sched_scan_match_attr {
+ __NL80211_SCHED_SCAN_MATCH_ATTR_INVALID,
+
+ NL80211_SCHED_SCAN_MATCH_ATTR_SSID,
+ NL80211_SCHED_SCAN_MATCH_ATTR_RSSI,
+ NL80211_SCHED_SCAN_MATCH_ATTR_RELATIVE_RSSI,
+ NL80211_SCHED_SCAN_MATCH_ATTR_RSSI_ADJUST,
+ NL80211_SCHED_SCAN_MATCH_ATTR_BSSID,
+ NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI,
+
+ /* keep last */
+ __NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST,
+ NL80211_SCHED_SCAN_MATCH_ATTR_MAX =
+ __NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST - 1
+};
+
+/* only for backward compatibility */
+#define NL80211_ATTR_SCHED_SCAN_MATCH_SSID NL80211_SCHED_SCAN_MATCH_ATTR_SSID
+
+/**
+ * enum nl80211_reg_rule_flags - regulatory rule flags
+ *
+ * @NL80211_RRF_NO_OFDM: OFDM modulation not allowed
+ * @NL80211_RRF_NO_CCK: CCK modulation not allowed
+ * @NL80211_RRF_NO_INDOOR: indoor operation not allowed
+ * @NL80211_RRF_NO_OUTDOOR: outdoor operation not allowed
+ * @NL80211_RRF_DFS: DFS support is required to be used
+ * @NL80211_RRF_PTP_ONLY: this is only for Point To Point links
+ * @NL80211_RRF_PTMP_ONLY: this is only for Point To Multi Point links
+ * @NL80211_RRF_NO_IR: no mechanisms that initiate radiation are allowed,
+ * this includes probe requests or modes of operation that require
+ * beaconing.
+ * @NL80211_RRF_AUTO_BW: maximum available bandwidth should be calculated
+ * base on contiguous rules and wider channels will be allowed to cross
+ * multiple contiguous/overlapping frequency ranges.
+ * @NL80211_RRF_IR_CONCURRENT: See %NL80211_FREQUENCY_ATTR_IR_CONCURRENT
+ * @NL80211_RRF_NO_HT40MINUS: channels can't be used in HT40- operation
+ * @NL80211_RRF_NO_HT40PLUS: channels can't be used in HT40+ operation
+ * @NL80211_RRF_NO_80MHZ: 80MHz operation not allowed
+ * @NL80211_RRF_NO_160MHZ: 160MHz operation not allowed
+ */
+enum nl80211_reg_rule_flags {
+ NL80211_RRF_NO_OFDM = 1<<0,
+ NL80211_RRF_NO_CCK = 1<<1,
+ NL80211_RRF_NO_INDOOR = 1<<2,
+ NL80211_RRF_NO_OUTDOOR = 1<<3,
+ NL80211_RRF_DFS = 1<<4,
+ NL80211_RRF_PTP_ONLY = 1<<5,
+ NL80211_RRF_PTMP_ONLY = 1<<6,
+ NL80211_RRF_NO_IR = 1<<7,
+ __NL80211_RRF_NO_IBSS = 1<<8,
+ NL80211_RRF_AUTO_BW = 1<<11,
+ NL80211_RRF_IR_CONCURRENT = 1<<12,
+ NL80211_RRF_NO_HT40MINUS = 1<<13,
+ NL80211_RRF_NO_HT40PLUS = 1<<14,
+ NL80211_RRF_NO_80MHZ = 1<<15,
+ NL80211_RRF_NO_160MHZ = 1<<16,
+};
+
+#define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR
+#define NL80211_RRF_NO_IBSS NL80211_RRF_NO_IR
+#define NL80211_RRF_NO_IR NL80211_RRF_NO_IR
+#define NL80211_RRF_NO_HT40 (NL80211_RRF_NO_HT40MINUS |\
+ NL80211_RRF_NO_HT40PLUS)
+#define NL80211_RRF_GO_CONCURRENT NL80211_RRF_IR_CONCURRENT
+
+/* For backport compatibility with older userspace */
+#define NL80211_RRF_NO_IR_ALL (NL80211_RRF_NO_IR | __NL80211_RRF_NO_IBSS)
+
+/**
+ * enum nl80211_dfs_regions - regulatory DFS regions
+ *
+ * @NL80211_DFS_UNSET: Country has no DFS master region specified
+ * @NL80211_DFS_FCC: Country follows DFS master rules from FCC
+ * @NL80211_DFS_ETSI: Country follows DFS master rules from ETSI
+ * @NL80211_DFS_JP: Country follows DFS master rules from JP/MKK/Telec
+ */
+enum nl80211_dfs_regions {
+ NL80211_DFS_UNSET = 0,
+ NL80211_DFS_FCC = 1,
+ NL80211_DFS_ETSI = 2,
+ NL80211_DFS_JP = 3,
+};
+
+/**
+ * enum nl80211_user_reg_hint_type - type of user regulatory hint
+ *
+ * @NL80211_USER_REG_HINT_USER: a user sent the hint. This is always
+ * assumed if the attribute is not set.
+ * @NL80211_USER_REG_HINT_CELL_BASE: the hint comes from a cellular
+ * base station. Device drivers that have been tested to work
+ * properly to support this type of hint can enable these hints
+ * by setting the NL80211_FEATURE_CELL_BASE_REG_HINTS feature
+ * capability on the struct wiphy. The wireless core will
+ * ignore all cell base station hints until at least one device
+ * present has been registered with the wireless core that
+ * has listed NL80211_FEATURE_CELL_BASE_REG_HINTS as a
+ * supported feature.
+ * @NL80211_USER_REG_HINT_INDOOR: a user sent an hint indicating that the
+ * platform is operating in an indoor environment.
+ */
+enum nl80211_user_reg_hint_type {
+ NL80211_USER_REG_HINT_USER = 0,
+ NL80211_USER_REG_HINT_CELL_BASE = 1,
+ NL80211_USER_REG_HINT_INDOOR = 2,
+};
+
+/**
+ * enum nl80211_survey_info - survey information
+ *
+ * These attribute types are used with %NL80211_ATTR_SURVEY_INFO
+ * when getting information about a survey.
+ *
+ * @__NL80211_SURVEY_INFO_INVALID: attribute number 0 is reserved
+ * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel
+ * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm)
+ * @NL80211_SURVEY_INFO_IN_USE: channel is currently being used
+ * @NL80211_SURVEY_INFO_TIME: amount of time (in ms) that the radio
+ * was turned on (on channel or globally)
+ * @NL80211_SURVEY_INFO_TIME_BUSY: amount of the time the primary
+ * channel was sensed busy (either due to activity or energy detect)
+ * @NL80211_SURVEY_INFO_TIME_EXT_BUSY: amount of time the extension
+ * channel was sensed busy
+ * @NL80211_SURVEY_INFO_TIME_RX: amount of time the radio spent
+ * receiving data (on channel or globally)
+ * @NL80211_SURVEY_INFO_TIME_TX: amount of time the radio spent
+ * transmitting data (on channel or globally)
+ * @NL80211_SURVEY_INFO_TIME_SCAN: time the radio spent for scan
+ * (on this channel or globally)
+ * @NL80211_SURVEY_INFO_PAD: attribute used for padding for 64-bit alignment
+ * @NL80211_SURVEY_INFO_TIME_BSS_RX: amount of time the radio spent
+ * receiving frames destined to the local BSS
+ * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number
+ * currently defined
+ * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use
+ */
+enum nl80211_survey_info {
+ __NL80211_SURVEY_INFO_INVALID,
+ NL80211_SURVEY_INFO_FREQUENCY,
+ NL80211_SURVEY_INFO_NOISE,
+ NL80211_SURVEY_INFO_IN_USE,
+ NL80211_SURVEY_INFO_TIME,
+ NL80211_SURVEY_INFO_TIME_BUSY,
+ NL80211_SURVEY_INFO_TIME_EXT_BUSY,
+ NL80211_SURVEY_INFO_TIME_RX,
+ NL80211_SURVEY_INFO_TIME_TX,
+ NL80211_SURVEY_INFO_TIME_SCAN,
+ NL80211_SURVEY_INFO_PAD,
+ NL80211_SURVEY_INFO_TIME_BSS_RX,
+
+ /* keep last */
+ __NL80211_SURVEY_INFO_AFTER_LAST,
+ NL80211_SURVEY_INFO_MAX = __NL80211_SURVEY_INFO_AFTER_LAST - 1
+};
+
+/* keep old names for compatibility */
+#define NL80211_SURVEY_INFO_CHANNEL_TIME NL80211_SURVEY_INFO_TIME
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY NL80211_SURVEY_INFO_TIME_BUSY
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY NL80211_SURVEY_INFO_TIME_EXT_BUSY
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_RX NL80211_SURVEY_INFO_TIME_RX
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_TX NL80211_SURVEY_INFO_TIME_TX
+
+/**
+ * enum nl80211_mntr_flags - monitor configuration flags
+ *
+ * Monitor configuration flags.
+ *
+ * @__NL80211_MNTR_FLAG_INVALID: reserved
+ *
+ * @NL80211_MNTR_FLAG_FCSFAIL: pass frames with bad FCS
+ * @NL80211_MNTR_FLAG_PLCPFAIL: pass frames with bad PLCP
+ * @NL80211_MNTR_FLAG_CONTROL: pass control frames
+ * @NL80211_MNTR_FLAG_OTHER_BSS: disable BSSID filtering
+ * @NL80211_MNTR_FLAG_COOK_FRAMES: report frames after processing.
+ * overrides all other flags.
+ * @NL80211_MNTR_FLAG_ACTIVE: use the configured MAC address
+ * and ACK incoming unicast packets.
+ *
+ * @__NL80211_MNTR_FLAG_AFTER_LAST: internal use
+ * @NL80211_MNTR_FLAG_MAX: highest possible monitor flag
+ */
+enum nl80211_mntr_flags {
+ __NL80211_MNTR_FLAG_INVALID,
+ NL80211_MNTR_FLAG_FCSFAIL,
+ NL80211_MNTR_FLAG_PLCPFAIL,
+ NL80211_MNTR_FLAG_CONTROL,
+ NL80211_MNTR_FLAG_OTHER_BSS,
+ NL80211_MNTR_FLAG_COOK_FRAMES,
+ NL80211_MNTR_FLAG_ACTIVE,
+
+ /* keep last */
+ __NL80211_MNTR_FLAG_AFTER_LAST,
+ NL80211_MNTR_FLAG_MAX = __NL80211_MNTR_FLAG_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_mesh_power_mode - mesh power save modes
+ *
+ * @NL80211_MESH_POWER_UNKNOWN: The mesh power mode of the mesh STA is
+ * not known or has not been set yet.
+ * @NL80211_MESH_POWER_ACTIVE: Active mesh power mode. The mesh STA is
+ * in Awake state all the time.
+ * @NL80211_MESH_POWER_LIGHT_SLEEP: Light sleep mode. The mesh STA will
+ * alternate between Active and Doze states, but will wake up for
+ * neighbor's beacons.
+ * @NL80211_MESH_POWER_DEEP_SLEEP: Deep sleep mode. The mesh STA will
+ * alternate between Active and Doze states, but may not wake up
+ * for neighbor's beacons.
+ *
+ * @__NL80211_MESH_POWER_AFTER_LAST - internal use
+ * @NL80211_MESH_POWER_MAX - highest possible power save level
+ */
+
+enum nl80211_mesh_power_mode {
+ NL80211_MESH_POWER_UNKNOWN,
+ NL80211_MESH_POWER_ACTIVE,
+ NL80211_MESH_POWER_LIGHT_SLEEP,
+ NL80211_MESH_POWER_DEEP_SLEEP,
+
+ __NL80211_MESH_POWER_AFTER_LAST,
+ NL80211_MESH_POWER_MAX = __NL80211_MESH_POWER_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_meshconf_params - mesh configuration parameters
+ *
+ * Mesh configuration parameters. These can be changed while the mesh is
+ * active.
+ *
+ * @__NL80211_MESHCONF_INVALID: internal use
+ *
+ * @NL80211_MESHCONF_RETRY_TIMEOUT: specifies the initial retry timeout in
+ * millisecond units, used by the Peer Link Open message
+ *
+ * @NL80211_MESHCONF_CONFIRM_TIMEOUT: specifies the initial confirm timeout, in
+ * millisecond units, used by the peer link management to close a peer link
+ *
+ * @NL80211_MESHCONF_HOLDING_TIMEOUT: specifies the holding timeout, in
+ * millisecond units
+ *
+ * @NL80211_MESHCONF_MAX_PEER_LINKS: maximum number of peer links allowed
+ * on this mesh interface
+ *
+ * @NL80211_MESHCONF_MAX_RETRIES: specifies the maximum number of peer link
+ * open retries that can be sent to establish a new peer link instance in a
+ * mesh
+ *
+ * @NL80211_MESHCONF_TTL: specifies the value of TTL field set at a source mesh
+ * point.
+ *
+ * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically open
+ * peer links when we detect compatible mesh peers. Disabled if
+ * @NL80211_MESH_SETUP_USERSPACE_MPM or @NL80211_MESH_SETUP_USERSPACE_AMPE are
+ * set.
+ *
+ * @NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES: the number of action frames
+ * containing a PREQ that an MP can send to a particular destination (path
+ * target)
+ *
+ * @NL80211_MESHCONF_PATH_REFRESH_TIME: how frequently to refresh mesh paths
+ * (in milliseconds)
+ *
+ * @NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT: minimum length of time to wait
+ * until giving up on a path discovery (in milliseconds)
+ *
+ * @NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT: The time (in TUs) for which mesh
+ * points receiving a PREQ shall consider the forwarding information from
+ * the root to be valid. (TU = time unit)
+ *
+ * @NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL: The minimum interval of time (in
+ * TUs) during which an MP can send only one action frame containing a PREQ
+ * reference element
+ *
+ * @NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME: The interval of time (in TUs)
+ * that it takes for an HWMP information element to propagate across the
+ * mesh
+ *
+ * @NL80211_MESHCONF_HWMP_ROOTMODE: whether root mode is enabled or not
+ *
+ * @NL80211_MESHCONF_ELEMENT_TTL: specifies the value of TTL field set at a
+ * source mesh point for path selection elements.
+ *
+ * @NL80211_MESHCONF_HWMP_RANN_INTERVAL: The interval of time (in TUs) between
+ * root announcements are transmitted.
+ *
+ * @NL80211_MESHCONF_GATE_ANNOUNCEMENTS: Advertise that this mesh station has
+ * access to a broader network beyond the MBSS. This is done via Root
+ * Announcement frames.
+ *
+ * @NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL: The minimum interval of time (in
+ * TUs) during which a mesh STA can send only one Action frame containing a
+ * PERR element.
+ *
+ * @NL80211_MESHCONF_FORWARDING: set Mesh STA as forwarding or non-forwarding
+ * or forwarding entity (default is TRUE - forwarding entity)
+ *
+ * @NL80211_MESHCONF_RSSI_THRESHOLD: RSSI threshold in dBm. This specifies the
+ * threshold for average signal strength of candidate station to establish
+ * a peer link.
+ *
+ * @NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR: maximum number of neighbors
+ * to synchronize to for 11s default synchronization method
+ * (see 11C.12.2.2)
+ *
+ * @NL80211_MESHCONF_HT_OPMODE: set mesh HT protection mode.
+ *
+ * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute
+ *
+ * @NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT: The time (in TUs) for
+ * which mesh STAs receiving a proactive PREQ shall consider the forwarding
+ * information to the root mesh STA to be valid.
+ *
+ * @NL80211_MESHCONF_HWMP_ROOT_INTERVAL: The interval of time (in TUs) between
+ * proactive PREQs are transmitted.
+ *
+ * @NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL: The minimum interval of time
+ * (in TUs) during which a mesh STA can send only one Action frame
+ * containing a PREQ element for root path confirmation.
+ *
+ * @NL80211_MESHCONF_POWER_MODE: Default mesh power mode for new peer links.
+ * type &enum nl80211_mesh_power_mode (u32)
+ *
+ * @NL80211_MESHCONF_AWAKE_WINDOW: awake window duration (in TUs)
+ *
+ * @NL80211_MESHCONF_PLINK_TIMEOUT: If no tx activity is seen from a STA we've
+ * established peering with for longer than this time (in seconds), then
+ * remove it from the STA's list of peers. You may set this to 0 to disable
+ * the removal of the STA. Default is 30 minutes.
+ *
+ * @NL80211_MESHCONF_CONNECTED_TO_GATE: If set to true then this mesh STA
+ * will advertise that it is connected to a gate in the mesh formation
+ * field. If left unset then the mesh formation field will only
+ * advertise such if there is an active root mesh path.
+ *
+ * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use
+ */
+enum nl80211_meshconf_params {
+ __NL80211_MESHCONF_INVALID,
+ NL80211_MESHCONF_RETRY_TIMEOUT,
+ NL80211_MESHCONF_CONFIRM_TIMEOUT,
+ NL80211_MESHCONF_HOLDING_TIMEOUT,
+ NL80211_MESHCONF_MAX_PEER_LINKS,
+ NL80211_MESHCONF_MAX_RETRIES,
+ NL80211_MESHCONF_TTL,
+ NL80211_MESHCONF_AUTO_OPEN_PLINKS,
+ NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES,
+ NL80211_MESHCONF_PATH_REFRESH_TIME,
+ NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT,
+ NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT,
+ NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL,
+ NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
+ NL80211_MESHCONF_HWMP_ROOTMODE,
+ NL80211_MESHCONF_ELEMENT_TTL,
+ NL80211_MESHCONF_HWMP_RANN_INTERVAL,
+ NL80211_MESHCONF_GATE_ANNOUNCEMENTS,
+ NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL,
+ NL80211_MESHCONF_FORWARDING,
+ NL80211_MESHCONF_RSSI_THRESHOLD,
+ NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR,
+ NL80211_MESHCONF_HT_OPMODE,
+ NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT,
+ NL80211_MESHCONF_HWMP_ROOT_INTERVAL,
+ NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL,
+ NL80211_MESHCONF_POWER_MODE,
+ NL80211_MESHCONF_AWAKE_WINDOW,
+ NL80211_MESHCONF_PLINK_TIMEOUT,
+ NL80211_MESHCONF_CONNECTED_TO_GATE,
+
+ /* keep last */
+ __NL80211_MESHCONF_ATTR_AFTER_LAST,
+ NL80211_MESHCONF_ATTR_MAX = __NL80211_MESHCONF_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_mesh_setup_params - mesh setup parameters
+ *
+ * Mesh setup parameters. These are used to start/join a mesh and cannot be
+ * changed while the mesh is active.
+ *
+ * @__NL80211_MESH_SETUP_INVALID: Internal use
+ *
+ * @NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL: Enable this option to use a
+ * vendor specific path selection algorithm or disable it to use the
+ * default HWMP.
+ *
+ * @NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC: Enable this option to use a
+ * vendor specific path metric or disable it to use the default Airtime
+ * metric.
+ *
+ * @NL80211_MESH_SETUP_IE: Information elements for this mesh, for instance, a
+ * robust security network ie, or a vendor specific information element
+ * that vendors will use to identify the path selection methods and
+ * metrics in use.
+ *
+ * @NL80211_MESH_SETUP_USERSPACE_AUTH: Enable this option if an authentication
+ * daemon will be authenticating mesh candidates.
+ *
+ * @NL80211_MESH_SETUP_USERSPACE_AMPE: Enable this option if an authentication
+ * daemon will be securing peer link frames. AMPE is a secured version of
+ * Mesh Peering Management (MPM) and is implemented with the assistance of
+ * a userspace daemon. When this flag is set, the kernel will send peer
+ * management frames to a userspace daemon that will implement AMPE
+ * functionality (security capabilities selection, key confirmation, and
+ * key management). When the flag is unset (default), the kernel can
+ * autonomously complete (unsecured) mesh peering without the need of a
+ * userspace daemon.
+ *
+ * @NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC: Enable this option to use a
+ * vendor specific synchronization method or disable it to use the default
+ * neighbor offset synchronization
+ *
+ * @NL80211_MESH_SETUP_USERSPACE_MPM: Enable this option if userspace will
+ * implement an MPM which handles peer allocation and state.
+ *
+ * @NL80211_MESH_SETUP_AUTH_PROTOCOL: Inform the kernel of the authentication
+ * method (u8, as defined in IEEE 8.4.2.100.6, e.g. 0x1 for SAE).
+ * Default is no authentication method required.
+ *
+ * @NL80211_MESH_SETUP_ATTR_MAX: highest possible mesh setup attribute number
+ *
+ * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use
+ */
+enum nl80211_mesh_setup_params {
+ __NL80211_MESH_SETUP_INVALID,
+ NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL,
+ NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC,
+ NL80211_MESH_SETUP_IE,
+ NL80211_MESH_SETUP_USERSPACE_AUTH,
+ NL80211_MESH_SETUP_USERSPACE_AMPE,
+ NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC,
+ NL80211_MESH_SETUP_USERSPACE_MPM,
+ NL80211_MESH_SETUP_AUTH_PROTOCOL,
+
+ /* keep last */
+ __NL80211_MESH_SETUP_ATTR_AFTER_LAST,
+ NL80211_MESH_SETUP_ATTR_MAX = __NL80211_MESH_SETUP_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_txq_attr - TX queue parameter attributes
+ * @__NL80211_TXQ_ATTR_INVALID: Attribute number 0 is reserved
+ * @NL80211_TXQ_ATTR_AC: AC identifier (NL80211_AC_*)
+ * @NL80211_TXQ_ATTR_TXOP: Maximum burst time in units of 32 usecs, 0 meaning
+ * disabled
+ * @NL80211_TXQ_ATTR_CWMIN: Minimum contention window [a value of the form
+ * 2^n-1 in the range 1..32767]
+ * @NL80211_TXQ_ATTR_CWMAX: Maximum contention window [a value of the form
+ * 2^n-1 in the range 1..32767]
+ * @NL80211_TXQ_ATTR_AIFS: Arbitration interframe space [0..255]
+ * @__NL80211_TXQ_ATTR_AFTER_LAST: Internal
+ * @NL80211_TXQ_ATTR_MAX: Maximum TXQ attribute number
+ */
+enum nl80211_txq_attr {
+ __NL80211_TXQ_ATTR_INVALID,
+ NL80211_TXQ_ATTR_AC,
+ NL80211_TXQ_ATTR_TXOP,
+ NL80211_TXQ_ATTR_CWMIN,
+ NL80211_TXQ_ATTR_CWMAX,
+ NL80211_TXQ_ATTR_AIFS,
+
+ /* keep last */
+ __NL80211_TXQ_ATTR_AFTER_LAST,
+ NL80211_TXQ_ATTR_MAX = __NL80211_TXQ_ATTR_AFTER_LAST - 1
+};
+
+enum nl80211_ac {
+ NL80211_AC_VO,
+ NL80211_AC_VI,
+ NL80211_AC_BE,
+ NL80211_AC_BK,
+ NL80211_NUM_ACS
+};
+
+/* backward compat */
+#define NL80211_TXQ_ATTR_QUEUE NL80211_TXQ_ATTR_AC
+#define NL80211_TXQ_Q_VO NL80211_AC_VO
+#define NL80211_TXQ_Q_VI NL80211_AC_VI
+#define NL80211_TXQ_Q_BE NL80211_AC_BE
+#define NL80211_TXQ_Q_BK NL80211_AC_BK
+
+/**
+ * enum nl80211_channel_type - channel type
+ * @NL80211_CHAN_NO_HT: 20 MHz, non-HT channel
+ * @NL80211_CHAN_HT20: 20 MHz HT channel
+ * @NL80211_CHAN_HT40MINUS: HT40 channel, secondary channel
+ * below the control channel
+ * @NL80211_CHAN_HT40PLUS: HT40 channel, secondary channel
+ * above the control channel
+ */
+enum nl80211_channel_type {
+ NL80211_CHAN_NO_HT,
+ NL80211_CHAN_HT20,
+ NL80211_CHAN_HT40MINUS,
+ NL80211_CHAN_HT40PLUS
+};
+
+/**
+ * enum nl80211_key_mode - Key mode
+ *
+ * @NL80211_KEY_RX_TX: (Default)
+ * Key can be used for Rx and Tx immediately
+ *
+ * The following modes can only be selected for unicast keys and when the
+ * driver supports @NL80211_EXT_FEATURE_EXT_KEY_ID:
+ *
+ * @NL80211_KEY_NO_TX: Only allowed in combination with @NL80211_CMD_NEW_KEY:
+ * Unicast key can only be used for Rx, Tx not allowed, yet
+ * @NL80211_KEY_SET_TX: Only allowed in combination with @NL80211_CMD_SET_KEY:
+ * The unicast key identified by idx and mac is cleared for Tx and becomes
+ * the preferred Tx key for the station.
+ */
+enum nl80211_key_mode {
+ NL80211_KEY_RX_TX,
+ NL80211_KEY_NO_TX,
+ NL80211_KEY_SET_TX
+};
+
+/**
+ * enum nl80211_chan_width - channel width definitions
+ *
+ * These values are used with the %NL80211_ATTR_CHANNEL_WIDTH
+ * attribute.
+ *
+ * @NL80211_CHAN_WIDTH_20_NOHT: 20 MHz, non-HT channel
+ * @NL80211_CHAN_WIDTH_20: 20 MHz HT channel
+ * @NL80211_CHAN_WIDTH_40: 40 MHz channel, the %NL80211_ATTR_CENTER_FREQ1
+ * attribute must be provided as well
+ * @NL80211_CHAN_WIDTH_80: 80 MHz channel, the %NL80211_ATTR_CENTER_FREQ1
+ * attribute must be provided as well
+ * @NL80211_CHAN_WIDTH_80P80: 80+80 MHz channel, the %NL80211_ATTR_CENTER_FREQ1
+ * and %NL80211_ATTR_CENTER_FREQ2 attributes must be provided as well
+ * @NL80211_CHAN_WIDTH_160: 160 MHz channel, the %NL80211_ATTR_CENTER_FREQ1
+ * attribute must be provided as well
+ * @NL80211_CHAN_WIDTH_5: 5 MHz OFDM channel
+ * @NL80211_CHAN_WIDTH_10: 10 MHz OFDM channel
+ */
+enum nl80211_chan_width {
+ NL80211_CHAN_WIDTH_20_NOHT,
+ NL80211_CHAN_WIDTH_20,
+ NL80211_CHAN_WIDTH_40,
+ NL80211_CHAN_WIDTH_80,
+ NL80211_CHAN_WIDTH_80P80,
+ NL80211_CHAN_WIDTH_160,
+ NL80211_CHAN_WIDTH_5,
+ NL80211_CHAN_WIDTH_10,
+};
+
+/**
+ * enum nl80211_bss_scan_width - control channel width for a BSS
+ *
+ * These values are used with the %NL80211_BSS_CHAN_WIDTH attribute.
+ *
+ * @NL80211_BSS_CHAN_WIDTH_20: control channel is 20 MHz wide or compatible
+ * @NL80211_BSS_CHAN_WIDTH_10: control channel is 10 MHz wide
+ * @NL80211_BSS_CHAN_WIDTH_5: control channel is 5 MHz wide
+ */
+enum nl80211_bss_scan_width {
+ NL80211_BSS_CHAN_WIDTH_20,
+ NL80211_BSS_CHAN_WIDTH_10,
+ NL80211_BSS_CHAN_WIDTH_5,
+};
+
+/**
+ * enum nl80211_bss - netlink attributes for a BSS
+ *
+ * @__NL80211_BSS_INVALID: invalid
+ * @NL80211_BSS_BSSID: BSSID of the BSS (6 octets)
+ * @NL80211_BSS_FREQUENCY: frequency in MHz (u32)
+ * @NL80211_BSS_TSF: TSF of the received probe response/beacon (u64)
+ * (if @NL80211_BSS_PRESP_DATA is present then this is known to be
+ * from a probe response, otherwise it may be from the same beacon
+ * that the NL80211_BSS_BEACON_TSF will be from)
+ * @NL80211_BSS_BEACON_INTERVAL: beacon interval of the (I)BSS (u16)
+ * @NL80211_BSS_CAPABILITY: capability field (CPU order, u16)
+ * @NL80211_BSS_INFORMATION_ELEMENTS: binary attribute containing the
+ * raw information elements from the probe response/beacon (bin);
+ * if the %NL80211_BSS_BEACON_IES attribute is present and the data is
+ * different then the IEs here are from a Probe Response frame; otherwise
+ * they are from a Beacon frame.
+ * However, if the driver does not indicate the source of the IEs, these
+ * IEs may be from either frame subtype.
+ * If present, the @NL80211_BSS_PRESP_DATA attribute indicates that the
+ * data here is known to be from a probe response, without any heuristics.
+ * @NL80211_BSS_SIGNAL_MBM: signal strength of probe response/beacon
+ * in mBm (100 * dBm) (s32)
+ * @NL80211_BSS_SIGNAL_UNSPEC: signal strength of the probe response/beacon
+ * in unspecified units, scaled to 0..100 (u8)
+ * @NL80211_BSS_STATUS: status, if this BSS is "used"
+ * @NL80211_BSS_SEEN_MS_AGO: age of this BSS entry in ms
+ * @NL80211_BSS_BEACON_IES: binary attribute containing the raw information
+ * elements from a Beacon frame (bin); not present if no Beacon frame has
+ * yet been received
+ * @NL80211_BSS_CHAN_WIDTH: channel width of the control channel
+ * (u32, enum nl80211_bss_scan_width)
+ * @NL80211_BSS_BEACON_TSF: TSF of the last received beacon (u64)
+ * (not present if no beacon frame has been received yet)
+ * @NL80211_BSS_PRESP_DATA: the data in @NL80211_BSS_INFORMATION_ELEMENTS and
+ * @NL80211_BSS_TSF is known to be from a probe response (flag attribute)
+ * @NL80211_BSS_LAST_SEEN_BOOTTIME: CLOCK_BOOTTIME timestamp when this entry
+ * was last updated by a received frame. The value is expected to be
+ * accurate to about 10ms. (u64, nanoseconds)
+ * @NL80211_BSS_PAD: attribute used for padding for 64-bit alignment
+ * @NL80211_BSS_PARENT_TSF: the time at the start of reception of the first
+ * octet of the timestamp field of the last beacon/probe received for
+ * this BSS. The time is the TSF of the BSS specified by
+ * @NL80211_BSS_PARENT_BSSID. (u64).
+ * @NL80211_BSS_PARENT_BSSID: the BSS according to which @NL80211_BSS_PARENT_TSF
+ * is set.
+ * @NL80211_BSS_CHAIN_SIGNAL: per-chain signal strength of last BSS update.
+ * Contains a nested array of signal strength attributes (u8, dBm),
+ * using the nesting index as the antenna number.
+ * @__NL80211_BSS_AFTER_LAST: internal
+ * @NL80211_BSS_MAX: highest BSS attribute
+ */
+enum nl80211_bss {
+ __NL80211_BSS_INVALID,
+ NL80211_BSS_BSSID,
+ NL80211_BSS_FREQUENCY,
+ NL80211_BSS_TSF,
+ NL80211_BSS_BEACON_INTERVAL,
+ NL80211_BSS_CAPABILITY,
+ NL80211_BSS_INFORMATION_ELEMENTS,
+ NL80211_BSS_SIGNAL_MBM,
+ NL80211_BSS_SIGNAL_UNSPEC,
+ NL80211_BSS_STATUS,
+ NL80211_BSS_SEEN_MS_AGO,
+ NL80211_BSS_BEACON_IES,
+ NL80211_BSS_CHAN_WIDTH,
+ NL80211_BSS_BEACON_TSF,
+ NL80211_BSS_PRESP_DATA,
+ NL80211_BSS_LAST_SEEN_BOOTTIME,
+ NL80211_BSS_PAD,
+ NL80211_BSS_PARENT_TSF,
+ NL80211_BSS_PARENT_BSSID,
+ NL80211_BSS_CHAIN_SIGNAL,
+
+ /* keep last */
+ __NL80211_BSS_AFTER_LAST,
+ NL80211_BSS_MAX = __NL80211_BSS_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_bss_status - BSS "status"
+ * @NL80211_BSS_STATUS_AUTHENTICATED: Authenticated with this BSS.
+ * Note that this is no longer used since cfg80211 no longer
+ * keeps track of whether or not authentication was done with
+ * a given BSS.
+ * @NL80211_BSS_STATUS_ASSOCIATED: Associated with this BSS.
+ * @NL80211_BSS_STATUS_IBSS_JOINED: Joined to this IBSS.
+ *
+ * The BSS status is a BSS attribute in scan dumps, which
+ * indicates the status the interface has wrt. this BSS.
+ */
+enum nl80211_bss_status {
+ NL80211_BSS_STATUS_AUTHENTICATED,
+ NL80211_BSS_STATUS_ASSOCIATED,
+ NL80211_BSS_STATUS_IBSS_JOINED,
+};
+
+/**
+ * enum nl80211_auth_type - AuthenticationType
+ *
+ * @NL80211_AUTHTYPE_OPEN_SYSTEM: Open System authentication
+ * @NL80211_AUTHTYPE_SHARED_KEY: Shared Key authentication (WEP only)
+ * @NL80211_AUTHTYPE_FT: Fast BSS Transition (IEEE 802.11r)
+ * @NL80211_AUTHTYPE_NETWORK_EAP: Network EAP (some Cisco APs and mainly LEAP)
+ * @NL80211_AUTHTYPE_SAE: Simultaneous authentication of equals
+ * @NL80211_AUTHTYPE_FILS_SK: Fast Initial Link Setup shared key
+ * @NL80211_AUTHTYPE_FILS_SK_PFS: Fast Initial Link Setup shared key with PFS
+ * @NL80211_AUTHTYPE_FILS_PK: Fast Initial Link Setup public key
+ * @__NL80211_AUTHTYPE_NUM: internal
+ * @NL80211_AUTHTYPE_MAX: maximum valid auth algorithm
+ * @NL80211_AUTHTYPE_AUTOMATIC: determine automatically (if necessary by
+ * trying multiple times); this is invalid in netlink -- leave out
+ * the attribute for this on CONNECT commands.
+ */
+enum nl80211_auth_type {
+ NL80211_AUTHTYPE_OPEN_SYSTEM,
+ NL80211_AUTHTYPE_SHARED_KEY,
+ NL80211_AUTHTYPE_FT,
+ NL80211_AUTHTYPE_NETWORK_EAP,
+ NL80211_AUTHTYPE_SAE,
+ NL80211_AUTHTYPE_FILS_SK,
+ NL80211_AUTHTYPE_FILS_SK_PFS,
+ NL80211_AUTHTYPE_FILS_PK,
+
+ /* keep last */
+ __NL80211_AUTHTYPE_NUM,
+ NL80211_AUTHTYPE_MAX = __NL80211_AUTHTYPE_NUM - 1,
+ NL80211_AUTHTYPE_AUTOMATIC
+};
+
+/**
+ * enum nl80211_key_type - Key Type
+ * @NL80211_KEYTYPE_GROUP: Group (broadcast/multicast) key
+ * @NL80211_KEYTYPE_PAIRWISE: Pairwise (unicast/individual) key
+ * @NL80211_KEYTYPE_PEERKEY: PeerKey (DLS)
+ * @NUM_NL80211_KEYTYPES: number of defined key types
+ */
+enum nl80211_key_type {
+ NL80211_KEYTYPE_GROUP,
+ NL80211_KEYTYPE_PAIRWISE,
+ NL80211_KEYTYPE_PEERKEY,
+
+ NUM_NL80211_KEYTYPES
+};
+
+/**
+ * enum nl80211_mfp - Management frame protection state
+ * @NL80211_MFP_NO: Management frame protection not used
+ * @NL80211_MFP_REQUIRED: Management frame protection required
+ * @NL80211_MFP_OPTIONAL: Management frame protection is optional
+ */
+enum nl80211_mfp {
+ NL80211_MFP_NO,
+ NL80211_MFP_REQUIRED,
+ NL80211_MFP_OPTIONAL,
+};
+
+enum nl80211_wpa_versions {
+ NL80211_WPA_VERSION_1 = 1 << 0,
+ NL80211_WPA_VERSION_2 = 1 << 1,
+ NL80211_WPA_VERSION_3 = 1 << 2,
+};
+
+/**
+ * enum nl80211_key_default_types - key default types
+ * @__NL80211_KEY_DEFAULT_TYPE_INVALID: invalid
+ * @NL80211_KEY_DEFAULT_TYPE_UNICAST: key should be used as default
+ * unicast key
+ * @NL80211_KEY_DEFAULT_TYPE_MULTICAST: key should be used as default
+ * multicast key
+ * @NUM_NL80211_KEY_DEFAULT_TYPES: number of default types
+ */
+enum nl80211_key_default_types {
+ __NL80211_KEY_DEFAULT_TYPE_INVALID,
+ NL80211_KEY_DEFAULT_TYPE_UNICAST,
+ NL80211_KEY_DEFAULT_TYPE_MULTICAST,
+
+ NUM_NL80211_KEY_DEFAULT_TYPES
+};
+
+/**
+ * enum nl80211_key_attributes - key attributes
+ * @__NL80211_KEY_INVALID: invalid
+ * @NL80211_KEY_DATA: (temporal) key data; for TKIP this consists of
+ * 16 bytes encryption key followed by 8 bytes each for TX and RX MIC
+ * keys
+ * @NL80211_KEY_IDX: key ID (u8, 0-3)
+ * @NL80211_KEY_CIPHER: key cipher suite (u32, as defined by IEEE 802.11
+ * section 7.3.2.25.1, e.g. 0x000FAC04)
+ * @NL80211_KEY_SEQ: transmit key sequence number (IV/PN) for TKIP and
+ * CCMP keys, each six bytes in little endian
+ * @NL80211_KEY_DEFAULT: flag indicating default key
+ * @NL80211_KEY_DEFAULT_MGMT: flag indicating default management key
+ * @NL80211_KEY_TYPE: the key type from enum nl80211_key_type, if not
+ * specified the default depends on whether a MAC address was
+ * given with the command using the key or not (u32)
+ * @NL80211_KEY_DEFAULT_TYPES: A nested attribute containing flags
+ * attributes, specifying what a key should be set as default as.
+ * See &enum nl80211_key_default_types.
+ * @NL80211_KEY_MODE: the mode from enum nl80211_key_mode.
+ * Defaults to @NL80211_KEY_RX_TX.
+ *
+ * @__NL80211_KEY_AFTER_LAST: internal
+ * @NL80211_KEY_MAX: highest key attribute
+ */
+enum nl80211_key_attributes {
+ __NL80211_KEY_INVALID,
+ NL80211_KEY_DATA,
+ NL80211_KEY_IDX,
+ NL80211_KEY_CIPHER,
+ NL80211_KEY_SEQ,
+ NL80211_KEY_DEFAULT,
+ NL80211_KEY_DEFAULT_MGMT,
+ NL80211_KEY_TYPE,
+ NL80211_KEY_DEFAULT_TYPES,
+ NL80211_KEY_MODE,
+
+ /* keep last */
+ __NL80211_KEY_AFTER_LAST,
+ NL80211_KEY_MAX = __NL80211_KEY_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_tx_rate_attributes - TX rate set attributes
+ * @__NL80211_TXRATE_INVALID: invalid
+ * @NL80211_TXRATE_LEGACY: Legacy (non-MCS) rates allowed for TX rate selection
+ * in an array of rates as defined in IEEE 802.11 7.3.2.2 (u8 values with
+ * 1 = 500 kbps) but without the IE length restriction (at most
+ * %NL80211_MAX_SUPP_RATES in a single array).
+ * @NL80211_TXRATE_HT: HT (MCS) rates allowed for TX rate selection
+ * in an array of MCS numbers.
+ * @NL80211_TXRATE_VHT: VHT rates allowed for TX rate selection,
+ * see &struct nl80211_txrate_vht
+ * @NL80211_TXRATE_GI: configure GI, see &enum nl80211_txrate_gi
+ * @__NL80211_TXRATE_AFTER_LAST: internal
+ * @NL80211_TXRATE_MAX: highest TX rate attribute
+ */
+enum nl80211_tx_rate_attributes {
+ __NL80211_TXRATE_INVALID,
+ NL80211_TXRATE_LEGACY,
+ NL80211_TXRATE_HT,
+ NL80211_TXRATE_VHT,
+ NL80211_TXRATE_GI,
+
+ /* keep last */
+ __NL80211_TXRATE_AFTER_LAST,
+ NL80211_TXRATE_MAX = __NL80211_TXRATE_AFTER_LAST - 1
+};
+
+#define NL80211_TXRATE_MCS NL80211_TXRATE_HT
+#define NL80211_VHT_NSS_MAX 8
+
+/**
+ * struct nl80211_txrate_vht - VHT MCS/NSS txrate bitmap
+ * @mcs: MCS bitmap table for each NSS (array index 0 for 1 stream, etc.)
+ */
+struct nl80211_txrate_vht {
+ __u16 mcs[NL80211_VHT_NSS_MAX];
+};
+
+enum nl80211_txrate_gi {
+ NL80211_TXRATE_DEFAULT_GI,
+ NL80211_TXRATE_FORCE_SGI,
+ NL80211_TXRATE_FORCE_LGI,
+};
+
+/**
+ * enum nl80211_band - Frequency band
+ * @NL80211_BAND_2GHZ: 2.4 GHz ISM band
+ * @NL80211_BAND_5GHZ: around 5 GHz band (4.9 - 5.7 GHz)
+ * @NL80211_BAND_60GHZ: around 60 GHz band (58.32 - 69.12 GHz)
+ * @NL80211_BAND_6GHZ: around 6 GHz band (5.9 - 7.2 GHz)
+ * @NUM_NL80211_BANDS: number of bands, avoid using this in userspace
+ * since newer kernel versions may support more bands
+ */
+enum nl80211_band {
+ NL80211_BAND_2GHZ,
+ NL80211_BAND_5GHZ,
+ NL80211_BAND_60GHZ,
+ NL80211_BAND_6GHZ,
+
+ NUM_NL80211_BANDS,
+};
+
+/**
+ * enum nl80211_ps_state - powersave state
+ * @NL80211_PS_DISABLED: powersave is disabled
+ * @NL80211_PS_ENABLED: powersave is enabled
+ */
+enum nl80211_ps_state {
+ NL80211_PS_DISABLED,
+ NL80211_PS_ENABLED,
+};
+
+/**
+ * enum nl80211_attr_cqm - connection quality monitor attributes
+ * @__NL80211_ATTR_CQM_INVALID: invalid
+ * @NL80211_ATTR_CQM_RSSI_THOLD: RSSI threshold in dBm. This value specifies
+ * the threshold for the RSSI level at which an event will be sent. Zero
+ * to disable. Alternatively, if %NL80211_EXT_FEATURE_CQM_RSSI_LIST is
+ * set, multiple values can be supplied as a low-to-high sorted array of
+ * threshold values in dBm. Events will be sent when the RSSI value
+ * crosses any of the thresholds.
+ * @NL80211_ATTR_CQM_RSSI_HYST: RSSI hysteresis in dBm. This value specifies
+ * the minimum amount the RSSI level must change after an event before a
+ * new event may be issued (to reduce effects of RSSI oscillation).
+ * @NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT: RSSI threshold event
+ * @NL80211_ATTR_CQM_PKT_LOSS_EVENT: a u32 value indicating that this many
+ * consecutive packets were not acknowledged by the peer
+ * @NL80211_ATTR_CQM_TXE_RATE: TX error rate in %. Minimum % of TX failures
+ * during the given %NL80211_ATTR_CQM_TXE_INTVL before an
+ * %NL80211_CMD_NOTIFY_CQM with reported %NL80211_ATTR_CQM_TXE_RATE and
+ * %NL80211_ATTR_CQM_TXE_PKTS is generated.
+ * @NL80211_ATTR_CQM_TXE_PKTS: number of attempted packets in a given
+ * %NL80211_ATTR_CQM_TXE_INTVL before %NL80211_ATTR_CQM_TXE_RATE is
+ * checked.
+ * @NL80211_ATTR_CQM_TXE_INTVL: interval in seconds. Specifies the periodic
+ * interval in which %NL80211_ATTR_CQM_TXE_PKTS and
+ * %NL80211_ATTR_CQM_TXE_RATE must be satisfied before generating an
+ * %NL80211_CMD_NOTIFY_CQM. Set to 0 to turn off TX error reporting.
+ * @NL80211_ATTR_CQM_BEACON_LOSS_EVENT: flag attribute that's set in a beacon
+ * loss event
+ * @NL80211_ATTR_CQM_RSSI_LEVEL: the RSSI value in dBm that triggered the
+ * RSSI threshold event.
+ * @__NL80211_ATTR_CQM_AFTER_LAST: internal
+ * @NL80211_ATTR_CQM_MAX: highest key attribute
+ */
+enum nl80211_attr_cqm {
+ __NL80211_ATTR_CQM_INVALID,
+ NL80211_ATTR_CQM_RSSI_THOLD,
+ NL80211_ATTR_CQM_RSSI_HYST,
+ NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT,
+ NL80211_ATTR_CQM_PKT_LOSS_EVENT,
+ NL80211_ATTR_CQM_TXE_RATE,
+ NL80211_ATTR_CQM_TXE_PKTS,
+ NL80211_ATTR_CQM_TXE_INTVL,
+ NL80211_ATTR_CQM_BEACON_LOSS_EVENT,
+ NL80211_ATTR_CQM_RSSI_LEVEL,
+
+ /* keep last */
+ __NL80211_ATTR_CQM_AFTER_LAST,
+ NL80211_ATTR_CQM_MAX = __NL80211_ATTR_CQM_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_cqm_rssi_threshold_event - RSSI threshold event
+ * @NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW: The RSSI level is lower than the
+ * configured threshold
+ * @NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH: The RSSI is higher than the
+ * configured threshold
+ * @NL80211_CQM_RSSI_BEACON_LOSS_EVENT: (reserved, never sent)
+ */
+enum nl80211_cqm_rssi_threshold_event {
+ NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW,
+ NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH,
+ NL80211_CQM_RSSI_BEACON_LOSS_EVENT,
+};
+
+
+/**
+ * enum nl80211_tx_power_setting - TX power adjustment
+ * @NL80211_TX_POWER_AUTOMATIC: automatically determine transmit power
+ * @NL80211_TX_POWER_LIMITED: limit TX power by the mBm parameter
+ * @NL80211_TX_POWER_FIXED: fix TX power to the mBm parameter
+ */
+enum nl80211_tx_power_setting {
+ NL80211_TX_POWER_AUTOMATIC,
+ NL80211_TX_POWER_LIMITED,
+ NL80211_TX_POWER_FIXED,
+};
+
+/**
+ * enum nl80211_packet_pattern_attr - packet pattern attribute
+ * @__NL80211_PKTPAT_INVALID: invalid number for nested attribute
+ * @NL80211_PKTPAT_PATTERN: the pattern, values where the mask has
+ * a zero bit are ignored
+ * @NL80211_PKTPAT_MASK: pattern mask, must be long enough to have
+ * a bit for each byte in the pattern. The lowest-order bit corresponds
+ * to the first byte of the pattern, but the bytes of the pattern are
+ * in a little-endian-like format, i.e. the 9th byte of the pattern
+ * corresponds to the lowest-order bit in the second byte of the mask.
+ * For example: The match 00:xx:00:00:xx:00:00:00:00:xx:xx:xx (where
+ * xx indicates "don't care") would be represented by a pattern of
+ * twelve zero bytes, and a mask of "0xed,0x01".
+ * Note that the pattern matching is done as though frames were not
+ * 802.11 frames but 802.3 frames, i.e. the frame is fully unpacked
+ * first (including SNAP header unpacking) and then matched.
+ * @NL80211_PKTPAT_OFFSET: packet offset, pattern is matched after
+ * these fixed number of bytes of received packet
+ * @NUM_NL80211_PKTPAT: number of attributes
+ * @MAX_NL80211_PKTPAT: max attribute number
+ */
+enum nl80211_packet_pattern_attr {
+ __NL80211_PKTPAT_INVALID,
+ NL80211_PKTPAT_MASK,
+ NL80211_PKTPAT_PATTERN,
+ NL80211_PKTPAT_OFFSET,
+
+ NUM_NL80211_PKTPAT,
+ MAX_NL80211_PKTPAT = NUM_NL80211_PKTPAT - 1,
+};
+
+/**
+ * struct nl80211_pattern_support - packet pattern support information
+ * @max_patterns: maximum number of patterns supported
+ * @min_pattern_len: minimum length of each pattern
+ * @max_pattern_len: maximum length of each pattern
+ * @max_pkt_offset: maximum Rx packet offset
+ *
+ * This struct is carried in %NL80211_WOWLAN_TRIG_PKT_PATTERN when
+ * that is part of %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED or in
+ * %NL80211_ATTR_COALESCE_RULE_PKT_PATTERN when that is part of
+ * %NL80211_ATTR_COALESCE_RULE in the capability information given
+ * by the kernel to userspace.
+ */
+struct nl80211_pattern_support {
+ __u32 max_patterns;
+ __u32 min_pattern_len;
+ __u32 max_pattern_len;
+ __u32 max_pkt_offset;
+} __attribute__((packed));
+
+/* only for backward compatibility */
+#define __NL80211_WOWLAN_PKTPAT_INVALID __NL80211_PKTPAT_INVALID
+#define NL80211_WOWLAN_PKTPAT_MASK NL80211_PKTPAT_MASK
+#define NL80211_WOWLAN_PKTPAT_PATTERN NL80211_PKTPAT_PATTERN
+#define NL80211_WOWLAN_PKTPAT_OFFSET NL80211_PKTPAT_OFFSET
+#define NUM_NL80211_WOWLAN_PKTPAT NUM_NL80211_PKTPAT
+#define MAX_NL80211_WOWLAN_PKTPAT MAX_NL80211_PKTPAT
+#define nl80211_wowlan_pattern_support nl80211_pattern_support
+
+/**
+ * enum nl80211_wowlan_triggers - WoWLAN trigger definitions
+ * @__NL80211_WOWLAN_TRIG_INVALID: invalid number for nested attributes
+ * @NL80211_WOWLAN_TRIG_ANY: wake up on any activity, do not really put
+ * the chip into a special state -- works best with chips that have
+ * support for low-power operation already (flag)
+ * Note that this mode is incompatible with all of the others, if
+ * any others are even supported by the device.
+ * @NL80211_WOWLAN_TRIG_DISCONNECT: wake up on disconnect, the way disconnect
+ * is detected is implementation-specific (flag)
+ * @NL80211_WOWLAN_TRIG_MAGIC_PKT: wake up on magic packet (6x 0xff, followed
+ * by 16 repetitions of MAC addr, anywhere in payload) (flag)
+ * @NL80211_WOWLAN_TRIG_PKT_PATTERN: wake up on the specified packet patterns
+ * which are passed in an array of nested attributes, each nested attribute
+ * defining a with attributes from &struct nl80211_wowlan_trig_pkt_pattern.
+ * Each pattern defines a wakeup packet. Packet offset is associated with
+ * each pattern which is used while matching the pattern. The matching is
+ * done on the MSDU, i.e. as though the packet was an 802.3 packet, so the
+ * pattern matching is done after the packet is converted to the MSDU.
+ *
+ * In %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED, it is a binary attribute
+ * carrying a &struct nl80211_pattern_support.
+ *
+ * When reporting wakeup. it is a u32 attribute containing the 0-based
+ * index of the pattern that caused the wakeup, in the patterns passed
+ * to the kernel when configuring.
+ * @NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED: Not a real trigger, and cannot be
+ * used when setting, used only to indicate that GTK rekeying is supported
+ * by the device (flag)
+ * @NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE: wake up on GTK rekey failure (if
+ * done by the device) (flag)
+ * @NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST: wake up on EAP Identity Request
+ * packet (flag)
+ * @NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE: wake up on 4-way handshake (flag)
+ * @NL80211_WOWLAN_TRIG_RFKILL_RELEASE: wake up when rfkill is released
+ * (on devices that have rfkill in the device) (flag)
+ * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211: For wakeup reporting only, contains
+ * the 802.11 packet that caused the wakeup, e.g. a deauth frame. The frame
+ * may be truncated, the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN
+ * attribute contains the original length.
+ * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN: Original length of the 802.11
+ * packet, may be bigger than the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211
+ * attribute if the packet was truncated somewhere.
+ * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023: For wakeup reporting only, contains the
+ * 802.11 packet that caused the wakeup, e.g. a magic packet. The frame may
+ * be truncated, the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN attribute
+ * contains the original length.
+ * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN: Original length of the 802.3
+ * packet, may be bigger than the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023
+ * attribute if the packet was truncated somewhere.
+ * @NL80211_WOWLAN_TRIG_TCP_CONNECTION: TCP connection wake, see DOC section
+ * "TCP connection wakeup" for more details. This is a nested attribute
+ * containing the exact information for establishing and keeping alive
+ * the TCP connection.
+ * @NL80211_WOWLAN_TRIG_TCP_WAKEUP_MATCH: For wakeup reporting only, the
+ * wakeup packet was received on the TCP connection
+ * @NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST: For wakeup reporting only, the
+ * TCP connection was lost or failed to be established
+ * @NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS: For wakeup reporting only,
+ * the TCP connection ran out of tokens to use for data to send to the
+ * service
+ * @NL80211_WOWLAN_TRIG_NET_DETECT: wake up when a configured network
+ * is detected. This is a nested attribute that contains the
+ * same attributes used with @NL80211_CMD_START_SCHED_SCAN. It
+ * specifies how the scan is performed (e.g. the interval, the
+ * channels to scan and the initial delay) as well as the scan
+ * results that will trigger a wake (i.e. the matchsets). This
+ * attribute is also sent in a response to
+ * @NL80211_CMD_GET_WIPHY, indicating the number of match sets
+ * supported by the driver (u32).
+ * @NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS: nested attribute
+ * containing an array with information about what triggered the
+ * wake up. If no elements are present in the array, it means
+ * that the information is not available. If more than one
+ * element is present, it means that more than one match
+ * occurred.
+ * Each element in the array is a nested attribute that contains
+ * one optional %NL80211_ATTR_SSID attribute and one optional
+ * %NL80211_ATTR_SCAN_FREQUENCIES attribute. At least one of
+ * these attributes must be present. If
+ * %NL80211_ATTR_SCAN_FREQUENCIES contains more than one
+ * frequency, it means that the match occurred in more than one
+ * channel.
+ * @NUM_NL80211_WOWLAN_TRIG: number of wake on wireless triggers
+ * @MAX_NL80211_WOWLAN_TRIG: highest wowlan trigger attribute number
+ *
+ * These nested attributes are used to configure the wakeup triggers and
+ * to report the wakeup reason(s).
+ */
+enum nl80211_wowlan_triggers {
+ __NL80211_WOWLAN_TRIG_INVALID,
+ NL80211_WOWLAN_TRIG_ANY,
+ NL80211_WOWLAN_TRIG_DISCONNECT,
+ NL80211_WOWLAN_TRIG_MAGIC_PKT,
+ NL80211_WOWLAN_TRIG_PKT_PATTERN,
+ NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED,
+ NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE,
+ NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST,
+ NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE,
+ NL80211_WOWLAN_TRIG_RFKILL_RELEASE,
+ NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211,
+ NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN,
+ NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023,
+ NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN,
+ NL80211_WOWLAN_TRIG_TCP_CONNECTION,
+ NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH,
+ NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST,
+ NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS,
+ NL80211_WOWLAN_TRIG_NET_DETECT,
+ NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS,
+
+ /* keep last */
+ NUM_NL80211_WOWLAN_TRIG,
+ MAX_NL80211_WOWLAN_TRIG = NUM_NL80211_WOWLAN_TRIG - 1
+};
+
+/**
+ * DOC: TCP connection wakeup
+ *
+ * Some devices can establish a TCP connection in order to be woken up by a
+ * packet coming in from outside their network segment, or behind NAT. If
+ * configured, the device will establish a TCP connection to the given
+ * service, and periodically send data to that service. The first data
+ * packet is usually transmitted after SYN/ACK, also ACKing the SYN/ACK.
+ * The data packets can optionally include a (little endian) sequence
+ * number (in the TCP payload!) that is generated by the device, and, also
+ * optionally, a token from a list of tokens. This serves as a keep-alive
+ * with the service, and for NATed connections, etc.
+ *
+ * During this keep-alive period, the server doesn't send any data to the
+ * client. When receiving data, it is compared against the wakeup pattern
+ * (and mask) and if it matches, the host is woken up. Similarly, if the
+ * connection breaks or cannot be established to start with, the host is
+ * also woken up.
+ *
+ * Developer's note: ARP offload is required for this, otherwise TCP
+ * response packets might not go through correctly.
+ */
+
+/**
+ * struct nl80211_wowlan_tcp_data_seq - WoWLAN TCP data sequence
+ * @start: starting value
+ * @offset: offset of sequence number in packet
+ * @len: length of the sequence value to write, 1 through 4
+ *
+ * Note: don't confuse with the TCP sequence number(s), this is for the
+ * keepalive packet payload. The actual value is written into the packet
+ * in little endian.
+ */
+struct nl80211_wowlan_tcp_data_seq {
+ __u32 start, offset, len;
+};
+
+/**
+ * struct nl80211_wowlan_tcp_data_token - WoWLAN TCP data token config
+ * @offset: offset of token in packet
+ * @len: length of each token
+ * @token_stream: stream of data to be used for the tokens, the length must
+ * be a multiple of @len for this to make sense
+ */
+struct nl80211_wowlan_tcp_data_token {
+ __u32 offset, len;
+ __u8 token_stream[];
+};
+
+/**
+ * struct nl80211_wowlan_tcp_data_token_feature - data token features
+ * @min_len: minimum token length
+ * @max_len: maximum token length
+ * @bufsize: total available token buffer size (max size of @token_stream)
+ */
+struct nl80211_wowlan_tcp_data_token_feature {
+ __u32 min_len, max_len, bufsize;
+};
+
+/**
+ * enum nl80211_wowlan_tcp_attrs - WoWLAN TCP connection parameters
+ * @__NL80211_WOWLAN_TCP_INVALID: invalid number for nested attributes
+ * @NL80211_WOWLAN_TCP_SRC_IPV4: source IPv4 address (in network byte order)
+ * @NL80211_WOWLAN_TCP_DST_IPV4: destination IPv4 address
+ * (in network byte order)
+ * @NL80211_WOWLAN_TCP_DST_MAC: destination MAC address, this is given because
+ * route lookup when configured might be invalid by the time we suspend,
+ * and doing a route lookup when suspending is no longer possible as it
+ * might require ARP querying.
+ * @NL80211_WOWLAN_TCP_SRC_PORT: source port (u16); optional, if not given a
+ * socket and port will be allocated
+ * @NL80211_WOWLAN_TCP_DST_PORT: destination port (u16)
+ * @NL80211_WOWLAN_TCP_DATA_PAYLOAD: data packet payload, at least one byte.
+ * For feature advertising, a u32 attribute holding the maximum length
+ * of the data payload.
+ * @NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ: data packet sequence configuration
+ * (if desired), a &struct nl80211_wowlan_tcp_data_seq. For feature
+ * advertising it is just a flag
+ * @NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN: data packet token configuration,
+ * see &struct nl80211_wowlan_tcp_data_token and for advertising see
+ * &struct nl80211_wowlan_tcp_data_token_feature.
+ * @NL80211_WOWLAN_TCP_DATA_INTERVAL: data interval in seconds, maximum
+ * interval in feature advertising (u32)
+ * @NL80211_WOWLAN_TCP_WAKE_PAYLOAD: wake packet payload, for advertising a
+ * u32 attribute holding the maximum length
+ * @NL80211_WOWLAN_TCP_WAKE_MASK: Wake packet payload mask, not used for
+ * feature advertising. The mask works like @NL80211_PKTPAT_MASK
+ * but on the TCP payload only.
+ * @NUM_NL80211_WOWLAN_TCP: number of TCP attributes
+ * @MAX_NL80211_WOWLAN_TCP: highest attribute number
+ */
+enum nl80211_wowlan_tcp_attrs {
+ __NL80211_WOWLAN_TCP_INVALID,
+ NL80211_WOWLAN_TCP_SRC_IPV4,
+ NL80211_WOWLAN_TCP_DST_IPV4,
+ NL80211_WOWLAN_TCP_DST_MAC,
+ NL80211_WOWLAN_TCP_SRC_PORT,
+ NL80211_WOWLAN_TCP_DST_PORT,
+ NL80211_WOWLAN_TCP_DATA_PAYLOAD,
+ NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ,
+ NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN,
+ NL80211_WOWLAN_TCP_DATA_INTERVAL,
+ NL80211_WOWLAN_TCP_WAKE_PAYLOAD,
+ NL80211_WOWLAN_TCP_WAKE_MASK,
+
+ /* keep last */
+ NUM_NL80211_WOWLAN_TCP,
+ MAX_NL80211_WOWLAN_TCP = NUM_NL80211_WOWLAN_TCP - 1
+};
+
+/**
+ * struct nl80211_coalesce_rule_support - coalesce rule support information
+ * @max_rules: maximum number of rules supported
+ * @pat: packet pattern support information
+ * @max_delay: maximum supported coalescing delay in msecs
+ *
+ * This struct is carried in %NL80211_ATTR_COALESCE_RULE in the
+ * capability information given by the kernel to userspace.
+ */
+struct nl80211_coalesce_rule_support {
+ __u32 max_rules;
+ struct nl80211_pattern_support pat;
+ __u32 max_delay;
+} __attribute__((packed));
+
+/**
+ * enum nl80211_attr_coalesce_rule - coalesce rule attribute
+ * @__NL80211_COALESCE_RULE_INVALID: invalid number for nested attribute
+ * @NL80211_ATTR_COALESCE_RULE_DELAY: delay in msecs used for packet coalescing
+ * @NL80211_ATTR_COALESCE_RULE_CONDITION: condition for packet coalescence,
+ * see &enum nl80211_coalesce_condition.
+ * @NL80211_ATTR_COALESCE_RULE_PKT_PATTERN: packet offset, pattern is matched
+ * after these fixed number of bytes of received packet
+ * @NUM_NL80211_ATTR_COALESCE_RULE: number of attributes
+ * @NL80211_ATTR_COALESCE_RULE_MAX: max attribute number
+ */
+enum nl80211_attr_coalesce_rule {
+ __NL80211_COALESCE_RULE_INVALID,
+ NL80211_ATTR_COALESCE_RULE_DELAY,
+ NL80211_ATTR_COALESCE_RULE_CONDITION,
+ NL80211_ATTR_COALESCE_RULE_PKT_PATTERN,
+
+ /* keep last */
+ NUM_NL80211_ATTR_COALESCE_RULE,
+ NL80211_ATTR_COALESCE_RULE_MAX = NUM_NL80211_ATTR_COALESCE_RULE - 1
+};
+
+/**
+ * enum nl80211_coalesce_condition - coalesce rule conditions
+ * @NL80211_COALESCE_CONDITION_MATCH: coalaesce Rx packets when patterns
+ * in a rule are matched.
+ * @NL80211_COALESCE_CONDITION_NO_MATCH: coalesce Rx packets when patterns
+ * in a rule are not matched.
+ */
+enum nl80211_coalesce_condition {
+ NL80211_COALESCE_CONDITION_MATCH,
+ NL80211_COALESCE_CONDITION_NO_MATCH
+};
+
+/**
+ * enum nl80211_iface_limit_attrs - limit attributes
+ * @NL80211_IFACE_LIMIT_UNSPEC: (reserved)
+ * @NL80211_IFACE_LIMIT_MAX: maximum number of interfaces that
+ * can be chosen from this set of interface types (u32)
+ * @NL80211_IFACE_LIMIT_TYPES: nested attribute containing a
+ * flag attribute for each interface type in this set
+ * @NUM_NL80211_IFACE_LIMIT: number of attributes
+ * @MAX_NL80211_IFACE_LIMIT: highest attribute number
+ */
+enum nl80211_iface_limit_attrs {
+ NL80211_IFACE_LIMIT_UNSPEC,
+ NL80211_IFACE_LIMIT_MAX,
+ NL80211_IFACE_LIMIT_TYPES,
+
+ /* keep last */
+ NUM_NL80211_IFACE_LIMIT,
+ MAX_NL80211_IFACE_LIMIT = NUM_NL80211_IFACE_LIMIT - 1
+};
+
+/**
+ * enum nl80211_if_combination_attrs -- interface combination attributes
+ *
+ * @NL80211_IFACE_COMB_UNSPEC: (reserved)
+ * @NL80211_IFACE_COMB_LIMITS: Nested attributes containing the limits
+ * for given interface types, see &enum nl80211_iface_limit_attrs.
+ * @NL80211_IFACE_COMB_MAXNUM: u32 attribute giving the total number of
+ * interfaces that can be created in this group. This number doesn't
+ * apply to interfaces purely managed in software, which are listed
+ * in a separate attribute %NL80211_ATTR_INTERFACES_SOFTWARE.
+ * @NL80211_IFACE_COMB_STA_AP_BI_MATCH: flag attribute specifying that
+ * beacon intervals within this group must be all the same even for
+ * infrastructure and AP/GO combinations, i.e. the GO(s) must adopt
+ * the infrastructure network's beacon interval.
+ * @NL80211_IFACE_COMB_NUM_CHANNELS: u32 attribute specifying how many
+ * different channels may be used within this group.
+ * @NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS: u32 attribute containing the bitmap
+ * of supported channel widths for radar detection.
+ * @NL80211_IFACE_COMB_RADAR_DETECT_REGIONS: u32 attribute containing the bitmap
+ * of supported regulatory regions for radar detection.
+ * @NL80211_IFACE_COMB_BI_MIN_GCD: u32 attribute specifying the minimum GCD of
+ * different beacon intervals supported by all the interface combinations
+ * in this group (if not present, all beacon intervals be identical).
+ * @NUM_NL80211_IFACE_COMB: number of attributes
+ * @MAX_NL80211_IFACE_COMB: highest attribute number
+ *
+ * Examples:
+ * limits = [ #{STA} <= 1, #{AP} <= 1 ], matching BI, channels = 1, max = 2
+ * => allows an AP and a STA that must match BIs
+ *
+ * numbers = [ #{AP, P2P-GO} <= 8 ], BI min gcd, channels = 1, max = 8,
+ * => allows 8 of AP/GO that can have BI gcd >= min gcd
+ *
+ * numbers = [ #{STA} <= 2 ], channels = 2, max = 2
+ * => allows two STAs on different channels
+ *
+ * numbers = [ #{STA} <= 1, #{P2P-client,P2P-GO} <= 3 ], max = 4
+ * => allows a STA plus three P2P interfaces
+ *
+ * The list of these four possibilities could completely be contained
+ * within the %NL80211_ATTR_INTERFACE_COMBINATIONS attribute to indicate
+ * that any of these groups must match.
+ *
+ * "Combinations" of just a single interface will not be listed here,
+ * a single interface of any valid interface type is assumed to always
+ * be possible by itself. This means that implicitly, for each valid
+ * interface type, the following group always exists:
+ * numbers = [ #{<type>} <= 1 ], channels = 1, max = 1
+ */
+enum nl80211_if_combination_attrs {
+ NL80211_IFACE_COMB_UNSPEC,
+ NL80211_IFACE_COMB_LIMITS,
+ NL80211_IFACE_COMB_MAXNUM,
+ NL80211_IFACE_COMB_STA_AP_BI_MATCH,
+ NL80211_IFACE_COMB_NUM_CHANNELS,
+ NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS,
+ NL80211_IFACE_COMB_RADAR_DETECT_REGIONS,
+ NL80211_IFACE_COMB_BI_MIN_GCD,
+
+ /* keep last */
+ NUM_NL80211_IFACE_COMB,
+ MAX_NL80211_IFACE_COMB = NUM_NL80211_IFACE_COMB - 1
+};
+
+
+/**
+ * enum nl80211_plink_state - state of a mesh peer link finite state machine
+ *
+ * @NL80211_PLINK_LISTEN: initial state, considered the implicit
+ * state of non existent mesh peer links
+ * @NL80211_PLINK_OPN_SNT: mesh plink open frame has been sent to
+ * this mesh peer
+ * @NL80211_PLINK_OPN_RCVD: mesh plink open frame has been received
+ * from this mesh peer
+ * @NL80211_PLINK_CNF_RCVD: mesh plink confirm frame has been
+ * received from this mesh peer
+ * @NL80211_PLINK_ESTAB: mesh peer link is established
+ * @NL80211_PLINK_HOLDING: mesh peer link is being closed or cancelled
+ * @NL80211_PLINK_BLOCKED: all frames transmitted from this mesh
+ * plink are discarded
+ * @NUM_NL80211_PLINK_STATES: number of peer link states
+ * @MAX_NL80211_PLINK_STATES: highest numerical value of plink states
+ */
+enum nl80211_plink_state {
+ NL80211_PLINK_LISTEN,
+ NL80211_PLINK_OPN_SNT,
+ NL80211_PLINK_OPN_RCVD,
+ NL80211_PLINK_CNF_RCVD,
+ NL80211_PLINK_ESTAB,
+ NL80211_PLINK_HOLDING,
+ NL80211_PLINK_BLOCKED,
+
+ /* keep last */
+ NUM_NL80211_PLINK_STATES,
+ MAX_NL80211_PLINK_STATES = NUM_NL80211_PLINK_STATES - 1
+};
+
+/**
+ * enum nl80211_plink_action - actions to perform in mesh peers
+ *
+ * @NL80211_PLINK_ACTION_NO_ACTION: perform no action
+ * @NL80211_PLINK_ACTION_OPEN: start mesh peer link establishment
+ * @NL80211_PLINK_ACTION_BLOCK: block traffic from this mesh peer
+ * @NUM_NL80211_PLINK_ACTIONS: number of possible actions
+ */
+enum plink_actions {
+ NL80211_PLINK_ACTION_NO_ACTION,
+ NL80211_PLINK_ACTION_OPEN,
+ NL80211_PLINK_ACTION_BLOCK,
+
+ NUM_NL80211_PLINK_ACTIONS,
+};
+
+
+#define NL80211_KCK_LEN 16
+#define NL80211_KEK_LEN 16
+#define NL80211_REPLAY_CTR_LEN 8
+
+/**
+ * enum nl80211_rekey_data - attributes for GTK rekey offload
+ * @__NL80211_REKEY_DATA_INVALID: invalid number for nested attributes
+ * @NL80211_REKEY_DATA_KEK: key encryption key (binary)
+ * @NL80211_REKEY_DATA_KCK: key confirmation key (binary)
+ * @NL80211_REKEY_DATA_REPLAY_CTR: replay counter (binary)
+ * @NUM_NL80211_REKEY_DATA: number of rekey attributes (internal)
+ * @MAX_NL80211_REKEY_DATA: highest rekey attribute (internal)
+ */
+enum nl80211_rekey_data {
+ __NL80211_REKEY_DATA_INVALID,
+ NL80211_REKEY_DATA_KEK,
+ NL80211_REKEY_DATA_KCK,
+ NL80211_REKEY_DATA_REPLAY_CTR,
+
+ /* keep last */
+ NUM_NL80211_REKEY_DATA,
+ MAX_NL80211_REKEY_DATA = NUM_NL80211_REKEY_DATA - 1
+};
+
+/**
+ * enum nl80211_hidden_ssid - values for %NL80211_ATTR_HIDDEN_SSID
+ * @NL80211_HIDDEN_SSID_NOT_IN_USE: do not hide SSID (i.e., broadcast it in
+ * Beacon frames)
+ * @NL80211_HIDDEN_SSID_ZERO_LEN: hide SSID by using zero-length SSID element
+ * in Beacon frames
+ * @NL80211_HIDDEN_SSID_ZERO_CONTENTS: hide SSID by using correct length of SSID
+ * element in Beacon frames but zero out each byte in the SSID
+ */
+enum nl80211_hidden_ssid {
+ NL80211_HIDDEN_SSID_NOT_IN_USE,
+ NL80211_HIDDEN_SSID_ZERO_LEN,
+ NL80211_HIDDEN_SSID_ZERO_CONTENTS
+};
+
+/**
+ * enum nl80211_sta_wme_attr - station WME attributes
+ * @__NL80211_STA_WME_INVALID: invalid number for nested attribute
+ * @NL80211_STA_WME_UAPSD_QUEUES: bitmap of uapsd queues. the format
+ * is the same as the AC bitmap in the QoS info field.
+ * @NL80211_STA_WME_MAX_SP: max service period. the format is the same
+ * as the MAX_SP field in the QoS info field (but already shifted down).
+ * @__NL80211_STA_WME_AFTER_LAST: internal
+ * @NL80211_STA_WME_MAX: highest station WME attribute
+ */
+enum nl80211_sta_wme_attr {
+ __NL80211_STA_WME_INVALID,
+ NL80211_STA_WME_UAPSD_QUEUES,
+ NL80211_STA_WME_MAX_SP,
+
+ /* keep last */
+ __NL80211_STA_WME_AFTER_LAST,
+ NL80211_STA_WME_MAX = __NL80211_STA_WME_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_pmksa_candidate_attr - attributes for PMKSA caching candidates
+ * @__NL80211_PMKSA_CANDIDATE_INVALID: invalid number for nested attributes
+ * @NL80211_PMKSA_CANDIDATE_INDEX: candidate index (u32; the smaller, the higher
+ * priority)
+ * @NL80211_PMKSA_CANDIDATE_BSSID: candidate BSSID (6 octets)
+ * @NL80211_PMKSA_CANDIDATE_PREAUTH: RSN pre-authentication supported (flag)
+ * @NUM_NL80211_PMKSA_CANDIDATE: number of PMKSA caching candidate attributes
+ * (internal)
+ * @MAX_NL80211_PMKSA_CANDIDATE: highest PMKSA caching candidate attribute
+ * (internal)
+ */
+enum nl80211_pmksa_candidate_attr {
+ __NL80211_PMKSA_CANDIDATE_INVALID,
+ NL80211_PMKSA_CANDIDATE_INDEX,
+ NL80211_PMKSA_CANDIDATE_BSSID,
+ NL80211_PMKSA_CANDIDATE_PREAUTH,
+
+ /* keep last */
+ NUM_NL80211_PMKSA_CANDIDATE,
+ MAX_NL80211_PMKSA_CANDIDATE = NUM_NL80211_PMKSA_CANDIDATE - 1
+};
+
+/**
+ * enum nl80211_tdls_operation - values for %NL80211_ATTR_TDLS_OPERATION
+ * @NL80211_TDLS_DISCOVERY_REQ: Send a TDLS discovery request
+ * @NL80211_TDLS_SETUP: Setup TDLS link
+ * @NL80211_TDLS_TEARDOWN: Teardown a TDLS link which is already established
+ * @NL80211_TDLS_ENABLE_LINK: Enable TDLS link
+ * @NL80211_TDLS_DISABLE_LINK: Disable TDLS link
+ */
+enum nl80211_tdls_operation {
+ NL80211_TDLS_DISCOVERY_REQ,
+ NL80211_TDLS_SETUP,
+ NL80211_TDLS_TEARDOWN,
+ NL80211_TDLS_ENABLE_LINK,
+ NL80211_TDLS_DISABLE_LINK,
+};
+
+/*
+ * enum nl80211_ap_sme_features - device-integrated AP features
+ * Reserved for future use, no bits are defined in
+ * NL80211_ATTR_DEVICE_AP_SME yet.
+enum nl80211_ap_sme_features {
+};
+ */
+
+/**
+ * enum nl80211_feature_flags - device/driver features
+ * @NL80211_FEATURE_SK_TX_STATUS: This driver supports reflecting back
+ * TX status to the socket error queue when requested with the
+ * socket option.
+ * @NL80211_FEATURE_HT_IBSS: This driver supports IBSS with HT datarates.
+ * @NL80211_FEATURE_INACTIVITY_TIMER: This driver takes care of freeing up
+ * the connected inactive stations in AP mode.
+ * @NL80211_FEATURE_CELL_BASE_REG_HINTS: This driver has been tested
+ * to work properly to suppport receiving regulatory hints from
+ * cellular base stations.
+ * @NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL: (no longer available, only
+ * here to reserve the value for API/ABI compatibility)
+ * @NL80211_FEATURE_SAE: This driver supports simultaneous authentication of
+ * equals (SAE) with user space SME (NL80211_CMD_AUTHENTICATE) in station
+ * mode
+ * @NL80211_FEATURE_LOW_PRIORITY_SCAN: This driver supports low priority scan
+ * @NL80211_FEATURE_SCAN_FLUSH: Scan flush is supported
+ * @NL80211_FEATURE_AP_SCAN: Support scanning using an AP vif
+ * @NL80211_FEATURE_VIF_TXPOWER: The driver supports per-vif TX power setting
+ * @NL80211_FEATURE_NEED_OBSS_SCAN: The driver expects userspace to perform
+ * OBSS scans and generate 20/40 BSS coex reports. This flag is used only
+ * for drivers implementing the CONNECT API, for AUTH/ASSOC it is implied.
+ * @NL80211_FEATURE_P2P_GO_CTWIN: P2P GO implementation supports CT Window
+ * setting
+ * @NL80211_FEATURE_P2P_GO_OPPPS: P2P GO implementation supports opportunistic
+ * powersave
+ * @NL80211_FEATURE_FULL_AP_CLIENT_STATE: The driver supports full state
+ * transitions for AP clients. Without this flag (and if the driver
+ * doesn't have the AP SME in the device) the driver supports adding
+ * stations only when they're associated and adds them in associated
+ * state (to later be transitioned into authorized), with this flag
+ * they should be added before even sending the authentication reply
+ * and then transitioned into authenticated, associated and authorized
+ * states using station flags.
+ * Note that even for drivers that support this, the default is to add
+ * stations in authenticated/associated state, so to add unauthenticated
+ * stations the authenticated/associated bits have to be set in the mask.
+ * @NL80211_FEATURE_ADVERTISE_CHAN_LIMITS: cfg80211 advertises channel limits
+ * (HT40, VHT 80/160 MHz) if this flag is set
+ * @NL80211_FEATURE_USERSPACE_MPM: This driver supports a userspace Mesh
+ * Peering Management entity which may be implemented by registering for
+ * beacons or NL80211_CMD_NEW_PEER_CANDIDATE events. The mesh beacon is
+ * still generated by the driver.
+ * @NL80211_FEATURE_ACTIVE_MONITOR: This driver supports an active monitor
+ * interface. An active monitor interface behaves like a normal monitor
+ * interface, but gets added to the driver. It ensures that incoming
+ * unicast packets directed at the configured interface address get ACKed.
+ * @NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE: This driver supports dynamic
+ * channel bandwidth change (e.g., HT 20 <-> 40 MHz channel) during the
+ * lifetime of a BSS.
+ * @NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES: This device adds a DS Parameter
+ * Set IE to probe requests.
+ * @NL80211_FEATURE_WFA_TPC_IE_IN_PROBES: This device adds a WFA TPC Report IE
+ * to probe requests.
+ * @NL80211_FEATURE_QUIET: This device, in client mode, supports Quiet Period
+ * requests sent to it by an AP.
+ * @NL80211_FEATURE_TX_POWER_INSERTION: This device is capable of inserting the
+ * current tx power value into the TPC Report IE in the spectrum
+ * management TPC Report action frame, and in the Radio Measurement Link
+ * Measurement Report action frame.
+ * @NL80211_FEATURE_ACKTO_ESTIMATION: This driver supports dynamic ACK timeout
+ * estimation (dynack). %NL80211_ATTR_WIPHY_DYN_ACK flag attribute is used
+ * to enable dynack.
+ * @NL80211_FEATURE_STATIC_SMPS: Device supports static spatial
+ * multiplexing powersave, ie. can turn off all but one chain
+ * even on HT connections that should be using more chains.
+ * @NL80211_FEATURE_DYNAMIC_SMPS: Device supports dynamic spatial
+ * multiplexing powersave, ie. can turn off all but one chain
+ * and then wake the rest up as required after, for example,
+ * rts/cts handshake.
+ * @NL80211_FEATURE_SUPPORTS_WMM_ADMISSION: the device supports setting up WMM
+ * TSPEC sessions (TID aka TSID 0-7) with the %NL80211_CMD_ADD_TX_TS
+ * command. Standard IEEE 802.11 TSPEC setup is not yet supported, it
+ * needs to be able to handle Block-Ack agreements and other things.
+ * @NL80211_FEATURE_MAC_ON_CREATE: Device supports configuring
+ * the vif's MAC address upon creation.
+ * See 'macaddr' field in the vif_params (cfg80211.h).
+ * @NL80211_FEATURE_TDLS_CHANNEL_SWITCH: Driver supports channel switching when
+ * operating as a TDLS peer.
+ * @NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR: This device/driver supports using a
+ * random MAC address during scan (if the device is unassociated); the
+ * %NL80211_SCAN_FLAG_RANDOM_ADDR flag may be set for scans and the MAC
+ * address mask/value will be used.
+ * @NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR: This device/driver supports
+ * using a random MAC address for every scan iteration during scheduled
+ * scan (while not associated), the %NL80211_SCAN_FLAG_RANDOM_ADDR may
+ * be set for scheduled scan and the MAC address mask/value will be used.
+ * @NL80211_FEATURE_ND_RANDOM_MAC_ADDR: This device/driver supports using a
+ * random MAC address for every scan iteration during "net detect", i.e.
+ * scan in unassociated WoWLAN, the %NL80211_SCAN_FLAG_RANDOM_ADDR may
+ * be set for scheduled scan and the MAC address mask/value will be used.
+ */
+enum nl80211_feature_flags {
+ NL80211_FEATURE_SK_TX_STATUS = 1 << 0,
+ NL80211_FEATURE_HT_IBSS = 1 << 1,
+ NL80211_FEATURE_INACTIVITY_TIMER = 1 << 2,
+ NL80211_FEATURE_CELL_BASE_REG_HINTS = 1 << 3,
+ NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL = 1 << 4,
+ NL80211_FEATURE_SAE = 1 << 5,
+ NL80211_FEATURE_LOW_PRIORITY_SCAN = 1 << 6,
+ NL80211_FEATURE_SCAN_FLUSH = 1 << 7,
+ NL80211_FEATURE_AP_SCAN = 1 << 8,
+ NL80211_FEATURE_VIF_TXPOWER = 1 << 9,
+ NL80211_FEATURE_NEED_OBSS_SCAN = 1 << 10,
+ NL80211_FEATURE_P2P_GO_CTWIN = 1 << 11,
+ NL80211_FEATURE_P2P_GO_OPPPS = 1 << 12,
+ /* bit 13 is reserved */
+ NL80211_FEATURE_ADVERTISE_CHAN_LIMITS = 1 << 14,
+ NL80211_FEATURE_FULL_AP_CLIENT_STATE = 1 << 15,
+ NL80211_FEATURE_USERSPACE_MPM = 1 << 16,
+ NL80211_FEATURE_ACTIVE_MONITOR = 1 << 17,
+ NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE = 1 << 18,
+ NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES = 1 << 19,
+ NL80211_FEATURE_WFA_TPC_IE_IN_PROBES = 1 << 20,
+ NL80211_FEATURE_QUIET = 1 << 21,
+ NL80211_FEATURE_TX_POWER_INSERTION = 1 << 22,
+ NL80211_FEATURE_ACKTO_ESTIMATION = 1 << 23,
+ NL80211_FEATURE_STATIC_SMPS = 1 << 24,
+ NL80211_FEATURE_DYNAMIC_SMPS = 1 << 25,
+ NL80211_FEATURE_SUPPORTS_WMM_ADMISSION = 1 << 26,
+ NL80211_FEATURE_MAC_ON_CREATE = 1 << 27,
+ NL80211_FEATURE_TDLS_CHANNEL_SWITCH = 1 << 28,
+ NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR = 1 << 29,
+ NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR = 1 << 30,
+ NL80211_FEATURE_ND_RANDOM_MAC_ADDR = 1U << 31,
+};
+
+/**
+ * enum nl80211_ext_feature_index - bit index of extended features.
+ * @NL80211_EXT_FEATURE_VHT_IBSS: This driver supports IBSS with VHT datarates.
+ * @NL80211_EXT_FEATURE_RRM: This driver supports RRM. When featured, user can
+ * can request to use RRM (see %NL80211_ATTR_USE_RRM) with
+ * %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests, which will set
+ * the ASSOC_REQ_USE_RRM flag in the association request even if
+ * NL80211_FEATURE_QUIET is not advertized.
+ * @NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER: This device supports MU-MIMO air
+ * sniffer which means that it can be configured to hear packets from
+ * certain groups which can be configured by the
+ * %NL80211_ATTR_MU_MIMO_GROUP_DATA attribute,
+ * or can be configured to follow a station by configuring the
+ * %NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR attribute.
+ * @NL80211_EXT_FEATURE_SCAN_START_TIME: This driver includes the actual
+ * time the scan started in scan results event. The time is the TSF of
+ * the BSS that the interface that requested the scan is connected to
+ * (if available).
+ * @NL80211_EXT_FEATURE_BSS_PARENT_TSF: Per BSS, this driver reports the
+ * time the last beacon/probe was received. The time is the TSF of the
+ * BSS that the interface that requested the scan is connected to
+ * (if available).
+ * @NL80211_EXT_FEATURE_SET_SCAN_DWELL: This driver supports configuration of
+ * channel dwell time.
+ * @NL80211_EXT_FEATURE_BEACON_RATE_LEGACY: Driver supports beacon rate
+ * configuration (AP/mesh), supporting a legacy (non HT/VHT) rate.
+ * @NL80211_EXT_FEATURE_BEACON_RATE_HT: Driver supports beacon rate
+ * configuration (AP/mesh) with HT rates.
+ * @NL80211_EXT_FEATURE_BEACON_RATE_VHT: Driver supports beacon rate
+ * configuration (AP/mesh) with VHT rates.
+ * @NL80211_EXT_FEATURE_FILS_STA: This driver supports Fast Initial Link Setup
+ * with user space SME (NL80211_CMD_AUTHENTICATE) in station mode.
+ * @NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA: This driver supports randomized TA
+ * in @NL80211_CMD_FRAME while not associated.
+ * @NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED: This driver supports
+ * randomized TA in @NL80211_CMD_FRAME while associated.
+ * @NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI: The driver supports sched_scan
+ * for reporting BSSs with better RSSI than the current connected BSS
+ * (%NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI).
+ * @NL80211_EXT_FEATURE_CQM_RSSI_LIST: With this driver the
+ * %NL80211_ATTR_CQM_RSSI_THOLD attribute accepts a list of zero or more
+ * RSSI threshold values to monitor rather than exactly one threshold.
+ * @NL80211_EXT_FEATURE_FILS_SK_OFFLOAD: Driver SME supports FILS shared key
+ * authentication with %NL80211_CMD_CONNECT.
+ * @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK: Device wants to do 4-way
+ * handshake with PSK in station mode (PSK is passed as part of the connect
+ * and associate commands), doing it in the host might not be supported.
+ * @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X: Device wants to do doing 4-way
+ * handshake with 802.1X in station mode (will pass EAP frames to the host
+ * and accept the set_pmk/del_pmk commands), doing it in the host might not
+ * be supported.
+ * @NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME: Driver is capable of overriding
+ * the max channel attribute in the FILS request params IE with the
+ * actual dwell time.
+ * @NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP: Driver accepts broadcast probe
+ * response
+ * @NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE: Driver supports sending
+ * the first probe request in each channel at rate of at least 5.5Mbps.
+ * @NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: Driver supports
+ * probe request tx deferral and suppression
+ * @NL80211_EXT_FEATURE_MFP_OPTIONAL: Driver supports the %NL80211_MFP_OPTIONAL
+ * value in %NL80211_ATTR_USE_MFP.
+ * @NL80211_EXT_FEATURE_LOW_SPAN_SCAN: Driver supports low span scan.
+ * @NL80211_EXT_FEATURE_LOW_POWER_SCAN: Driver supports low power scan.
+ * @NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN: Driver supports high accuracy scan.
+ * @NL80211_EXT_FEATURE_DFS_OFFLOAD: HW/driver will offload DFS actions.
+ * Device or driver will do all DFS-related actions by itself,
+ * informing user-space about CAC progress, radar detection event,
+ * channel change triggered by radar detection event.
+ * No need to start CAC from user-space, no need to react to
+ * "radar detected" event.
+ * @NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211: Driver supports sending and
+ * receiving control port frames over nl80211 instead of the netdevice.
+ * @NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT: This driver/device supports
+ * (average) ACK signal strength reporting.
+ * @NL80211_EXT_FEATURE_TXQS: Driver supports FQ-CoDel-enabled intermediate
+ * TXQs.
+ * @NL80211_EXT_FEATURE_SCAN_RANDOM_SN: Driver/device supports randomizing the
+ * SN in probe request frames if requested by %NL80211_SCAN_FLAG_RANDOM_SN.
+ * @NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT: Driver/device can omit all data
+ * except for supported rates from the probe request content if requested
+ * by the %NL80211_SCAN_FLAG_MIN_PREQ_CONTENT flag.
+ * @NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER: Driver supports enabling fine
+ * timing measurement responder role.
+ *
+ * @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0: Driver/device confirm that they are
+ * able to rekey an in-use key correctly. Userspace must not rekey PTK keys
+ * if this flag is not set. Ignoring this can leak clear text packets and/or
+ * freeze the connection.
+ * @NL80211_EXT_FEATURE_EXT_KEY_ID: Driver supports "Extended Key ID for
+ * Individually Addressed Frames" from IEEE802.11-2016.
+ *
+ * @NL80211_EXT_FEATURE_AIRTIME_FAIRNESS: Driver supports getting airtime
+ * fairness for transmitted packets and has enabled airtime fairness
+ * scheduling.
+ *
+ * @NL80211_EXT_FEATURE_AP_PMKSA_CACHING: Driver/device supports PMKSA caching
+ * (set/del PMKSA operations) in AP mode.
+ *
+ * @NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD: Driver supports
+ * filtering of sched scan results using band specific RSSI thresholds.
+ *
+ * @NL80211_EXT_FEATURE_STA_TX_PWR: This driver supports controlling tx power
+ * to a station.
+ *
+ * @NL80211_EXT_FEATURE_SAE_OFFLOAD: Device wants to do SAE authentication in
+ * station mode (SAE password is passed as part of the connect command).
+ *
+ * @NUM_NL80211_EXT_FEATURES: number of extended features.
+ * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
+ */
+enum nl80211_ext_feature_index {
+ NL80211_EXT_FEATURE_VHT_IBSS,
+ NL80211_EXT_FEATURE_RRM,
+ NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER,
+ NL80211_EXT_FEATURE_SCAN_START_TIME,
+ NL80211_EXT_FEATURE_BSS_PARENT_TSF,
+ NL80211_EXT_FEATURE_SET_SCAN_DWELL,
+ NL80211_EXT_FEATURE_BEACON_RATE_LEGACY,
+ NL80211_EXT_FEATURE_BEACON_RATE_HT,
+ NL80211_EXT_FEATURE_BEACON_RATE_VHT,
+ NL80211_EXT_FEATURE_FILS_STA,
+ NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA,
+ NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED,
+ NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI,
+ NL80211_EXT_FEATURE_CQM_RSSI_LIST,
+ NL80211_EXT_FEATURE_FILS_SK_OFFLOAD,
+ NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK,
+ NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X,
+ NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME,
+ NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP,
+ NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE,
+ NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION,
+ NL80211_EXT_FEATURE_MFP_OPTIONAL,
+ NL80211_EXT_FEATURE_LOW_SPAN_SCAN,
+ NL80211_EXT_FEATURE_LOW_POWER_SCAN,
+ NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN,
+ NL80211_EXT_FEATURE_DFS_OFFLOAD,
+ NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211,
+ NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT,
+ /* we renamed this - stay compatible */
+ NL80211_EXT_FEATURE_DATA_ACK_SIGNAL_SUPPORT = NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT,
+ NL80211_EXT_FEATURE_TXQS,
+ NL80211_EXT_FEATURE_SCAN_RANDOM_SN,
+ NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT,
+ NL80211_EXT_FEATURE_CAN_REPLACE_PTK0,
+ NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER,
+ NL80211_EXT_FEATURE_AIRTIME_FAIRNESS,
+ NL80211_EXT_FEATURE_AP_PMKSA_CACHING,
+ NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD,
+ NL80211_EXT_FEATURE_EXT_KEY_ID,
+ NL80211_EXT_FEATURE_STA_TX_PWR,
+ NL80211_EXT_FEATURE_SAE_OFFLOAD,
+
+ /* add new features before the definition below */
+ NUM_NL80211_EXT_FEATURES,
+ MAX_NL80211_EXT_FEATURES = NUM_NL80211_EXT_FEATURES - 1
+};
+
+/**
+ * enum nl80211_probe_resp_offload_support_attr - optional supported
+ * protocols for probe-response offloading by the driver/FW.
+ * To be used with the %NL80211_ATTR_PROBE_RESP_OFFLOAD attribute.
+ * Each enum value represents a bit in the bitmap of supported
+ * protocols. Typically a subset of probe-requests belonging to a
+ * supported protocol will be excluded from offload and uploaded
+ * to the host.
+ *
+ * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS: Support for WPS ver. 1
+ * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2: Support for WPS ver. 2
+ * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P: Support for P2P
+ * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_80211U: Support for 802.11u
+ */
+enum nl80211_probe_resp_offload_support_attr {
+ NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS = 1<<0,
+ NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2 = 1<<1,
+ NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P = 1<<2,
+ NL80211_PROBE_RESP_OFFLOAD_SUPPORT_80211U = 1<<3,
+};
+
+/**
+ * enum nl80211_connect_failed_reason - connection request failed reasons
+ * @NL80211_CONN_FAIL_MAX_CLIENTS: Maximum number of clients that can be
+ * handled by the AP is reached.
+ * @NL80211_CONN_FAIL_BLOCKED_CLIENT: Connection request is rejected due to ACL.
+ */
+enum nl80211_connect_failed_reason {
+ NL80211_CONN_FAIL_MAX_CLIENTS,
+ NL80211_CONN_FAIL_BLOCKED_CLIENT,
+};
+
+/**
+ * enum nl80211_timeout_reason - timeout reasons
+ *
+ * @NL80211_TIMEOUT_UNSPECIFIED: Timeout reason unspecified.
+ * @NL80211_TIMEOUT_SCAN: Scan (AP discovery) timed out.
+ * @NL80211_TIMEOUT_AUTH: Authentication timed out.
+ * @NL80211_TIMEOUT_ASSOC: Association timed out.
+ */
+enum nl80211_timeout_reason {
+ NL80211_TIMEOUT_UNSPECIFIED,
+ NL80211_TIMEOUT_SCAN,
+ NL80211_TIMEOUT_AUTH,
+ NL80211_TIMEOUT_ASSOC,
+};
+
+/**
+ * enum nl80211_scan_flags - scan request control flags
+ *
+ * Scan request control flags are used to control the handling
+ * of NL80211_CMD_TRIGGER_SCAN and NL80211_CMD_START_SCHED_SCAN
+ * requests.
+ *
+ * NL80211_SCAN_FLAG_LOW_SPAN, NL80211_SCAN_FLAG_LOW_POWER, and
+ * NL80211_SCAN_FLAG_HIGH_ACCURACY flags are exclusive of each other, i.e., only
+ * one of them can be used in the request.
+ *
+ * @NL80211_SCAN_FLAG_LOW_PRIORITY: scan request has low priority
+ * @NL80211_SCAN_FLAG_FLUSH: flush cache before scanning
+ * @NL80211_SCAN_FLAG_AP: force a scan even if the interface is configured
+ * as AP and the beaconing has already been configured. This attribute is
+ * dangerous because will destroy stations performance as a lot of frames
+ * will be lost while scanning off-channel, therefore it must be used only
+ * when really needed
+ * @NL80211_SCAN_FLAG_RANDOM_ADDR: use a random MAC address for this scan (or
+ * for scheduled scan: a different one for every scan iteration). When the
+ * flag is set, depending on device capabilities the @NL80211_ATTR_MAC and
+ * @NL80211_ATTR_MAC_MASK attributes may also be given in which case only
+ * the masked bits will be preserved from the MAC address and the remainder
+ * randomised. If the attributes are not given full randomisation (46 bits,
+ * locally administered 1, multicast 0) is assumed.
+ * This flag must not be requested when the feature isn't supported, check
+ * the nl80211 feature flags for the device.
+ * @NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME: fill the dwell time in the FILS
+ * request parameters IE in the probe request
+ * @NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP: accept broadcast probe responses
+ * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE: send probe request frames at
+ * rate of at least 5.5M. In case non OCE AP is discovered in the channel,
+ * only the first probe req in the channel will be sent in high rate.
+ * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: allow probe request
+ * tx deferral (dot11FILSProbeDelay shall be set to 15ms)
+ * and suppression (if it has received a broadcast Probe Response frame,
+ * Beacon frame or FILS Discovery frame from an AP that the STA considers
+ * a suitable candidate for (re-)association - suitable in terms of
+ * SSID and/or RSSI.
+ * @NL80211_SCAN_FLAG_LOW_SPAN: Span corresponds to the total time taken to
+ * accomplish the scan. Thus, this flag intends the driver to perform the
+ * scan request with lesser span/duration. It is specific to the driver
+ * implementations on how this is accomplished. Scan accuracy may get
+ * impacted with this flag.
+ * @NL80211_SCAN_FLAG_LOW_POWER: This flag intends the scan attempts to consume
+ * optimal possible power. Drivers can resort to their specific means to
+ * optimize the power. Scan accuracy may get impacted with this flag.
+ * @NL80211_SCAN_FLAG_HIGH_ACCURACY: Accuracy here intends to the extent of scan
+ * results obtained. Thus HIGH_ACCURACY scan flag aims to get maximum
+ * possible scan results. This flag hints the driver to use the best
+ * possible scan configuration to improve the accuracy in scanning.
+ * Latency and power use may get impacted with this flag.
+ * @NL80211_SCAN_FLAG_RANDOM_SN: randomize the sequence number in probe
+ * request frames from this scan to avoid correlation/tracking being
+ * possible.
+ * @NL80211_SCAN_FLAG_MIN_PREQ_CONTENT: minimize probe request content to
+ * only have supported rates and no additional capabilities (unless
+ * added by userspace explicitly.)
+ */
+enum nl80211_scan_flags {
+ NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0,
+ NL80211_SCAN_FLAG_FLUSH = 1<<1,
+ NL80211_SCAN_FLAG_AP = 1<<2,
+ NL80211_SCAN_FLAG_RANDOM_ADDR = 1<<3,
+ NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME = 1<<4,
+ NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP = 1<<5,
+ NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE = 1<<6,
+ NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION = 1<<7,
+ NL80211_SCAN_FLAG_LOW_SPAN = 1<<8,
+ NL80211_SCAN_FLAG_LOW_POWER = 1<<9,
+ NL80211_SCAN_FLAG_HIGH_ACCURACY = 1<<10,
+ NL80211_SCAN_FLAG_RANDOM_SN = 1<<11,
+ NL80211_SCAN_FLAG_MIN_PREQ_CONTENT = 1<<12,
+};
+
+/**
+ * enum nl80211_acl_policy - access control policy
+ *
+ * Access control policy is applied on a MAC list set by
+ * %NL80211_CMD_START_AP and %NL80211_CMD_SET_MAC_ACL, to
+ * be used with %NL80211_ATTR_ACL_POLICY.
+ *
+ * @NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED: Deny stations which are
+ * listed in ACL, i.e. allow all the stations which are not listed
+ * in ACL to authenticate.
+ * @NL80211_ACL_POLICY_DENY_UNLESS_LISTED: Allow the stations which are listed
+ * in ACL, i.e. deny all the stations which are not listed in ACL.
+ */
+enum nl80211_acl_policy {
+ NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED,
+ NL80211_ACL_POLICY_DENY_UNLESS_LISTED,
+};
+
+/**
+ * enum nl80211_smps_mode - SMPS mode
+ *
+ * Requested SMPS mode (for AP mode)
+ *
+ * @NL80211_SMPS_OFF: SMPS off (use all antennas).
+ * @NL80211_SMPS_STATIC: static SMPS (use a single antenna)
+ * @NL80211_SMPS_DYNAMIC: dynamic smps (start with a single antenna and
+ * turn on other antennas after CTS/RTS).
+ */
+enum nl80211_smps_mode {
+ NL80211_SMPS_OFF,
+ NL80211_SMPS_STATIC,
+ NL80211_SMPS_DYNAMIC,
+
+ __NL80211_SMPS_AFTER_LAST,
+ NL80211_SMPS_MAX = __NL80211_SMPS_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_radar_event - type of radar event for DFS operation
+ *
+ * Type of event to be used with NL80211_ATTR_RADAR_EVENT to inform userspace
+ * about detected radars or success of the channel available check (CAC)
+ *
+ * @NL80211_RADAR_DETECTED: A radar pattern has been detected. The channel is
+ * now unusable.
+ * @NL80211_RADAR_CAC_FINISHED: Channel Availability Check has been finished,
+ * the channel is now available.
+ * @NL80211_RADAR_CAC_ABORTED: Channel Availability Check has been aborted, no
+ * change to the channel status.
+ * @NL80211_RADAR_NOP_FINISHED: The Non-Occupancy Period for this channel is
+ * over, channel becomes usable.
+ * @NL80211_RADAR_PRE_CAC_EXPIRED: Channel Availability Check done on this
+ * non-operating channel is expired and no longer valid. New CAC must
+ * be done on this channel before starting the operation. This is not
+ * applicable for ETSI dfs domain where pre-CAC is valid for ever.
+ * @NL80211_RADAR_CAC_STARTED: Channel Availability Check has been started,
+ * should be generated by HW if NL80211_EXT_FEATURE_DFS_OFFLOAD is enabled.
+ */
+enum nl80211_radar_event {
+ NL80211_RADAR_DETECTED,
+ NL80211_RADAR_CAC_FINISHED,
+ NL80211_RADAR_CAC_ABORTED,
+ NL80211_RADAR_NOP_FINISHED,
+ NL80211_RADAR_PRE_CAC_EXPIRED,
+ NL80211_RADAR_CAC_STARTED,
+};
+
+/**
+ * enum nl80211_dfs_state - DFS states for channels
+ *
+ * Channel states used by the DFS code.
+ *
+ * @NL80211_DFS_USABLE: The channel can be used, but channel availability
+ * check (CAC) must be performed before using it for AP or IBSS.
+ * @NL80211_DFS_UNAVAILABLE: A radar has been detected on this channel, it
+ * is therefore marked as not available.
+ * @NL80211_DFS_AVAILABLE: The channel has been CAC checked and is available.
+ */
+enum nl80211_dfs_state {
+ NL80211_DFS_USABLE,
+ NL80211_DFS_UNAVAILABLE,
+ NL80211_DFS_AVAILABLE,
+};
+
+/**
+ * enum enum nl80211_protocol_features - nl80211 protocol features
+ * @NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP: nl80211 supports splitting
+ * wiphy dumps (if requested by the application with the attribute
+ * %NL80211_ATTR_SPLIT_WIPHY_DUMP. Also supported is filtering the
+ * wiphy dump by %NL80211_ATTR_WIPHY, %NL80211_ATTR_IFINDEX or
+ * %NL80211_ATTR_WDEV.
+ */
+enum nl80211_protocol_features {
+ NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP = 1 << 0,
+};
+
+/**
+ * enum nl80211_crit_proto_id - nl80211 critical protocol identifiers
+ *
+ * @NL80211_CRIT_PROTO_UNSPEC: protocol unspecified.
+ * @NL80211_CRIT_PROTO_DHCP: BOOTP or DHCPv6 protocol.
+ * @NL80211_CRIT_PROTO_EAPOL: EAPOL protocol.
+ * @NL80211_CRIT_PROTO_APIPA: APIPA protocol.
+ * @NUM_NL80211_CRIT_PROTO: must be kept last.
+ */
+enum nl80211_crit_proto_id {
+ NL80211_CRIT_PROTO_UNSPEC,
+ NL80211_CRIT_PROTO_DHCP,
+ NL80211_CRIT_PROTO_EAPOL,
+ NL80211_CRIT_PROTO_APIPA,
+ /* add other protocols before this one */
+ NUM_NL80211_CRIT_PROTO
+};
+
+/* maximum duration for critical protocol measures */
+#define NL80211_CRIT_PROTO_MAX_DURATION 5000 /* msec */
+
+/**
+ * enum nl80211_rxmgmt_flags - flags for received management frame.
+ *
+ * Used by cfg80211_rx_mgmt()
+ *
+ * @NL80211_RXMGMT_FLAG_ANSWERED: frame was answered by device/driver.
+ * @NL80211_RXMGMT_FLAG_EXTERNAL_AUTH: Host driver intends to offload
+ * the authentication. Exclusively defined for host drivers that
+ * advertises the SME functionality but would like the userspace
+ * to handle certain authentication algorithms (e.g. SAE).
+ */
+enum nl80211_rxmgmt_flags {
+ NL80211_RXMGMT_FLAG_ANSWERED = 1 << 0,
+ NL80211_RXMGMT_FLAG_EXTERNAL_AUTH = 1 << 1,
+};
+
+/*
+ * If this flag is unset, the lower 24 bits are an OUI, if set
+ * a Linux nl80211 vendor ID is used (no such IDs are allocated
+ * yet, so that's not valid so far)
+ */
+#define NL80211_VENDOR_ID_IS_LINUX 0x80000000
+
+/**
+ * struct nl80211_vendor_cmd_info - vendor command data
+ * @vendor_id: If the %NL80211_VENDOR_ID_IS_LINUX flag is clear, then the
+ * value is a 24-bit OUI; if it is set then a separately allocated ID
+ * may be used, but no such IDs are allocated yet. New IDs should be
+ * added to this file when needed.
+ * @subcmd: sub-command ID for the command
+ */
+struct nl80211_vendor_cmd_info {
+ __u32 vendor_id;
+ __u32 subcmd;
+};
+
+/**
+ * enum nl80211_tdls_peer_capability - TDLS peer flags.
+ *
+ * Used by tdls_mgmt() to determine which conditional elements need
+ * to be added to TDLS Setup frames.
+ *
+ * @NL80211_TDLS_PEER_HT: TDLS peer is HT capable.
+ * @NL80211_TDLS_PEER_VHT: TDLS peer is VHT capable.
+ * @NL80211_TDLS_PEER_WMM: TDLS peer is WMM capable.
+ */
+enum nl80211_tdls_peer_capability {
+ NL80211_TDLS_PEER_HT = 1<<0,
+ NL80211_TDLS_PEER_VHT = 1<<1,
+ NL80211_TDLS_PEER_WMM = 1<<2,
+};
+
+/**
+ * enum nl80211_sched_scan_plan - scanning plan for scheduled scan
+ * @__NL80211_SCHED_SCAN_PLAN_INVALID: attribute number 0 is reserved
+ * @NL80211_SCHED_SCAN_PLAN_INTERVAL: interval between scan iterations. In
+ * seconds (u32).
+ * @NL80211_SCHED_SCAN_PLAN_ITERATIONS: number of scan iterations in this
+ * scan plan (u32). The last scan plan must not specify this attribute
+ * because it will run infinitely. A value of zero is invalid as it will
+ * make the scan plan meaningless.
+ * @NL80211_SCHED_SCAN_PLAN_MAX: highest scheduled scan plan attribute number
+ * currently defined
+ * @__NL80211_SCHED_SCAN_PLAN_AFTER_LAST: internal use
+ */
+enum nl80211_sched_scan_plan {
+ __NL80211_SCHED_SCAN_PLAN_INVALID,
+ NL80211_SCHED_SCAN_PLAN_INTERVAL,
+ NL80211_SCHED_SCAN_PLAN_ITERATIONS,
+
+ /* keep last */
+ __NL80211_SCHED_SCAN_PLAN_AFTER_LAST,
+ NL80211_SCHED_SCAN_PLAN_MAX =
+ __NL80211_SCHED_SCAN_PLAN_AFTER_LAST - 1
+};
+
+/**
+ * struct nl80211_bss_select_rssi_adjust - RSSI adjustment parameters.
+ *
+ * @band: band of BSS that must match for RSSI value adjustment. The value
+ * of this field is according to &enum nl80211_band.
+ * @delta: value used to adjust the RSSI value of matching BSS in dB.
+ */
+struct nl80211_bss_select_rssi_adjust {
+ __u8 band;
+ __s8 delta;
+} __attribute__((packed));
+
+/**
+ * enum nl80211_bss_select_attr - attributes for bss selection.
+ *
+ * @__NL80211_BSS_SELECT_ATTR_INVALID: reserved.
+ * @NL80211_BSS_SELECT_ATTR_RSSI: Flag indicating only RSSI-based BSS selection
+ * is requested.
+ * @NL80211_BSS_SELECT_ATTR_BAND_PREF: attribute indicating BSS
+ * selection should be done such that the specified band is preferred.
+ * When there are multiple BSS-es in the preferred band, the driver
+ * shall use RSSI-based BSS selection as a second step. The value of
+ * this attribute is according to &enum nl80211_band (u32).
+ * @NL80211_BSS_SELECT_ATTR_RSSI_ADJUST: When present the RSSI level for
+ * BSS-es in the specified band is to be adjusted before doing
+ * RSSI-based BSS selection. The attribute value is a packed structure
+ * value as specified by &struct nl80211_bss_select_rssi_adjust.
+ * @NL80211_BSS_SELECT_ATTR_MAX: highest bss select attribute number.
+ * @__NL80211_BSS_SELECT_ATTR_AFTER_LAST: internal use.
+ *
+ * One and only one of these attributes are found within %NL80211_ATTR_BSS_SELECT
+ * for %NL80211_CMD_CONNECT. It specifies the required BSS selection behaviour
+ * which the driver shall use.
+ */
+enum nl80211_bss_select_attr {
+ __NL80211_BSS_SELECT_ATTR_INVALID,
+ NL80211_BSS_SELECT_ATTR_RSSI,
+ NL80211_BSS_SELECT_ATTR_BAND_PREF,
+ NL80211_BSS_SELECT_ATTR_RSSI_ADJUST,
+
+ /* keep last */
+ __NL80211_BSS_SELECT_ATTR_AFTER_LAST,
+ NL80211_BSS_SELECT_ATTR_MAX = __NL80211_BSS_SELECT_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_nan_function_type - NAN function type
+ *
+ * Defines the function type of a NAN function
+ *
+ * @NL80211_NAN_FUNC_PUBLISH: function is publish
+ * @NL80211_NAN_FUNC_SUBSCRIBE: function is subscribe
+ * @NL80211_NAN_FUNC_FOLLOW_UP: function is follow-up
+ */
+enum nl80211_nan_function_type {
+ NL80211_NAN_FUNC_PUBLISH,
+ NL80211_NAN_FUNC_SUBSCRIBE,
+ NL80211_NAN_FUNC_FOLLOW_UP,
+
+ /* keep last */
+ __NL80211_NAN_FUNC_TYPE_AFTER_LAST,
+ NL80211_NAN_FUNC_MAX_TYPE = __NL80211_NAN_FUNC_TYPE_AFTER_LAST - 1,
+};
+
+/**
+ * enum nl80211_nan_publish_type - NAN publish tx type
+ *
+ * Defines how to send publish Service Discovery Frames
+ *
+ * @NL80211_NAN_SOLICITED_PUBLISH: publish function is solicited
+ * @NL80211_NAN_UNSOLICITED_PUBLISH: publish function is unsolicited
+ */
+enum nl80211_nan_publish_type {
+ NL80211_NAN_SOLICITED_PUBLISH = 1 << 0,
+ NL80211_NAN_UNSOLICITED_PUBLISH = 1 << 1,
+};
+
+/**
+ * enum nl80211_nan_func_term_reason - NAN functions termination reason
+ *
+ * Defines termination reasons of a NAN function
+ *
+ * @NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST: requested by user
+ * @NL80211_NAN_FUNC_TERM_REASON_TTL_EXPIRED: timeout
+ * @NL80211_NAN_FUNC_TERM_REASON_ERROR: errored
+ */
+enum nl80211_nan_func_term_reason {
+ NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST,
+ NL80211_NAN_FUNC_TERM_REASON_TTL_EXPIRED,
+ NL80211_NAN_FUNC_TERM_REASON_ERROR,
+};
+
+#define NL80211_NAN_FUNC_SERVICE_ID_LEN 6
+#define NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN 0xff
+#define NL80211_NAN_FUNC_SRF_MAX_LEN 0xff
+
+/**
+ * enum nl80211_nan_func_attributes - NAN function attributes
+ * @__NL80211_NAN_FUNC_INVALID: invalid
+ * @NL80211_NAN_FUNC_TYPE: &enum nl80211_nan_function_type (u8).
+ * @NL80211_NAN_FUNC_SERVICE_ID: 6 bytes of the service ID hash as
+ * specified in NAN spec. This is a binary attribute.
+ * @NL80211_NAN_FUNC_PUBLISH_TYPE: relevant if the function's type is
+ * publish. Defines the transmission type for the publish Service Discovery
+ * Frame, see &enum nl80211_nan_publish_type. Its type is u8.
+ * @NL80211_NAN_FUNC_PUBLISH_BCAST: relevant if the function is a solicited
+ * publish. Should the solicited publish Service Discovery Frame be sent to
+ * the NAN Broadcast address. This is a flag.
+ * @NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE: relevant if the function's type is
+ * subscribe. Is the subscribe active. This is a flag.
+ * @NL80211_NAN_FUNC_FOLLOW_UP_ID: relevant if the function's type is follow up.
+ * The instance ID for the follow up Service Discovery Frame. This is u8.
+ * @NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID: relevant if the function's type
+ * is follow up. This is a u8.
+ * The requestor instance ID for the follow up Service Discovery Frame.
+ * @NL80211_NAN_FUNC_FOLLOW_UP_DEST: the MAC address of the recipient of the
+ * follow up Service Discovery Frame. This is a binary attribute.
+ * @NL80211_NAN_FUNC_CLOSE_RANGE: is this function limited for devices in a
+ * close range. The range itself (RSSI) is defined by the device.
+ * This is a flag.
+ * @NL80211_NAN_FUNC_TTL: strictly positive number of DWs this function should
+ * stay active. If not present infinite TTL is assumed. This is a u32.
+ * @NL80211_NAN_FUNC_SERVICE_INFO: array of bytes describing the service
+ * specific info. This is a binary attribute.
+ * @NL80211_NAN_FUNC_SRF: Service Receive Filter. This is a nested attribute.
+ * See &enum nl80211_nan_srf_attributes.
+ * @NL80211_NAN_FUNC_RX_MATCH_FILTER: Receive Matching filter. This is a nested
+ * attribute. It is a list of binary values.
+ * @NL80211_NAN_FUNC_TX_MATCH_FILTER: Transmit Matching filter. This is a
+ * nested attribute. It is a list of binary values.
+ * @NL80211_NAN_FUNC_INSTANCE_ID: The instance ID of the function.
+ * Its type is u8 and it cannot be 0.
+ * @NL80211_NAN_FUNC_TERM_REASON: NAN function termination reason.
+ * See &enum nl80211_nan_func_term_reason.
+ *
+ * @NUM_NL80211_NAN_FUNC_ATTR: internal
+ * @NL80211_NAN_FUNC_ATTR_MAX: highest NAN function attribute
+ */
+enum nl80211_nan_func_attributes {
+ __NL80211_NAN_FUNC_INVALID,
+ NL80211_NAN_FUNC_TYPE,
+ NL80211_NAN_FUNC_SERVICE_ID,
+ NL80211_NAN_FUNC_PUBLISH_TYPE,
+ NL80211_NAN_FUNC_PUBLISH_BCAST,
+ NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE,
+ NL80211_NAN_FUNC_FOLLOW_UP_ID,
+ NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID,
+ NL80211_NAN_FUNC_FOLLOW_UP_DEST,
+ NL80211_NAN_FUNC_CLOSE_RANGE,
+ NL80211_NAN_FUNC_TTL,
+ NL80211_NAN_FUNC_SERVICE_INFO,
+ NL80211_NAN_FUNC_SRF,
+ NL80211_NAN_FUNC_RX_MATCH_FILTER,
+ NL80211_NAN_FUNC_TX_MATCH_FILTER,
+ NL80211_NAN_FUNC_INSTANCE_ID,
+ NL80211_NAN_FUNC_TERM_REASON,
+
+ /* keep last */
+ NUM_NL80211_NAN_FUNC_ATTR,
+ NL80211_NAN_FUNC_ATTR_MAX = NUM_NL80211_NAN_FUNC_ATTR - 1
+};
+
+/**
+ * enum nl80211_nan_srf_attributes - NAN Service Response filter attributes
+ * @__NL80211_NAN_SRF_INVALID: invalid
+ * @NL80211_NAN_SRF_INCLUDE: present if the include bit of the SRF set.
+ * This is a flag.
+ * @NL80211_NAN_SRF_BF: Bloom Filter. Present if and only if
+ * %NL80211_NAN_SRF_MAC_ADDRS isn't present. This attribute is binary.
+ * @NL80211_NAN_SRF_BF_IDX: index of the Bloom Filter. Mandatory if
+ * %NL80211_NAN_SRF_BF is present. This is a u8.
+ * @NL80211_NAN_SRF_MAC_ADDRS: list of MAC addresses for the SRF. Present if
+ * and only if %NL80211_NAN_SRF_BF isn't present. This is a nested
+ * attribute. Each nested attribute is a MAC address.
+ * @NUM_NL80211_NAN_SRF_ATTR: internal
+ * @NL80211_NAN_SRF_ATTR_MAX: highest NAN SRF attribute
+ */
+enum nl80211_nan_srf_attributes {
+ __NL80211_NAN_SRF_INVALID,
+ NL80211_NAN_SRF_INCLUDE,
+ NL80211_NAN_SRF_BF,
+ NL80211_NAN_SRF_BF_IDX,
+ NL80211_NAN_SRF_MAC_ADDRS,
+
+ /* keep last */
+ NUM_NL80211_NAN_SRF_ATTR,
+ NL80211_NAN_SRF_ATTR_MAX = NUM_NL80211_NAN_SRF_ATTR - 1,
+};
+
+/**
+ * enum nl80211_nan_match_attributes - NAN match attributes
+ * @__NL80211_NAN_MATCH_INVALID: invalid
+ * @NL80211_NAN_MATCH_FUNC_LOCAL: the local function that had the
+ * match. This is a nested attribute.
+ * See &enum nl80211_nan_func_attributes.
+ * @NL80211_NAN_MATCH_FUNC_PEER: the peer function
+ * that caused the match. This is a nested attribute.
+ * See &enum nl80211_nan_func_attributes.
+ *
+ * @NUM_NL80211_NAN_MATCH_ATTR: internal
+ * @NL80211_NAN_MATCH_ATTR_MAX: highest NAN match attribute
+ */
+enum nl80211_nan_match_attributes {
+ __NL80211_NAN_MATCH_INVALID,
+ NL80211_NAN_MATCH_FUNC_LOCAL,
+ NL80211_NAN_MATCH_FUNC_PEER,
+
+ /* keep last */
+ NUM_NL80211_NAN_MATCH_ATTR,
+ NL80211_NAN_MATCH_ATTR_MAX = NUM_NL80211_NAN_MATCH_ATTR - 1
+};
+
+/**
+ * nl80211_external_auth_action - Action to perform with external
+ * authentication request. Used by NL80211_ATTR_EXTERNAL_AUTH_ACTION.
+ * @NL80211_EXTERNAL_AUTH_START: Start the authentication.
+ * @NL80211_EXTERNAL_AUTH_ABORT: Abort the ongoing authentication.
+ */
+enum nl80211_external_auth_action {
+ NL80211_EXTERNAL_AUTH_START,
+ NL80211_EXTERNAL_AUTH_ABORT,
+};
+
+/**
+ * enum nl80211_ftm_responder_attributes - fine timing measurement
+ * responder attributes
+ * @__NL80211_FTM_RESP_ATTR_INVALID: Invalid
+ * @NL80211_FTM_RESP_ATTR_ENABLED: FTM responder is enabled
+ * @NL80211_FTM_RESP_ATTR_LCI: The content of Measurement Report Element
+ * (9.4.2.22 in 802.11-2016) with type 8 - LCI (9.4.2.22.10),
+ * i.e. starting with the measurement token
+ * @NL80211_FTM_RESP_ATTR_CIVIC: The content of Measurement Report Element
+ * (9.4.2.22 in 802.11-2016) with type 11 - Civic (Section 9.4.2.22.13),
+ * i.e. starting with the measurement token
+ * @__NL80211_FTM_RESP_ATTR_LAST: Internal
+ * @NL80211_FTM_RESP_ATTR_MAX: highest FTM responder attribute.
+ */
+enum nl80211_ftm_responder_attributes {
+ __NL80211_FTM_RESP_ATTR_INVALID,
+
+ NL80211_FTM_RESP_ATTR_ENABLED,
+ NL80211_FTM_RESP_ATTR_LCI,
+ NL80211_FTM_RESP_ATTR_CIVICLOC,
+
+ /* keep last */
+ __NL80211_FTM_RESP_ATTR_LAST,
+ NL80211_FTM_RESP_ATTR_MAX = __NL80211_FTM_RESP_ATTR_LAST - 1,
+};
+
+/*
+ * enum nl80211_ftm_responder_stats - FTM responder statistics
+ *
+ * These attribute types are used with %NL80211_ATTR_FTM_RESPONDER_STATS
+ * when getting FTM responder statistics.
+ *
+ * @__NL80211_FTM_STATS_INVALID: attribute number 0 is reserved
+ * @NL80211_FTM_STATS_SUCCESS_NUM: number of FTM sessions in which all frames
+ * were ssfully answered (u32)
+ * @NL80211_FTM_STATS_PARTIAL_NUM: number of FTM sessions in which part of the
+ * frames were successfully answered (u32)
+ * @NL80211_FTM_STATS_FAILED_NUM: number of failed FTM sessions (u32)
+ * @NL80211_FTM_STATS_ASAP_NUM: number of ASAP sessions (u32)
+ * @NL80211_FTM_STATS_NON_ASAP_NUM: number of non-ASAP sessions (u32)
+ * @NL80211_FTM_STATS_TOTAL_DURATION_MSEC: total sessions durations - gives an
+ * indication of how much time the responder was busy (u64, msec)
+ * @NL80211_FTM_STATS_UNKNOWN_TRIGGERS_NUM: number of unknown FTM triggers -
+ * triggers from initiators that didn't finish successfully the negotiation
+ * phase with the responder (u32)
+ * @NL80211_FTM_STATS_RESCHEDULE_REQUESTS_NUM: number of FTM reschedule requests
+ * - initiator asks for a new scheduling although it already has scheduled
+ * FTM slot (u32)
+ * @NL80211_FTM_STATS_OUT_OF_WINDOW_TRIGGERS_NUM: number of FTM triggers out of
+ * scheduled window (u32)
+ * @NL80211_FTM_STATS_PAD: used for padding, ignore
+ * @__NL80211_TXQ_ATTR_AFTER_LAST: Internal
+ * @NL80211_FTM_STATS_MAX: highest possible FTM responder stats attribute
+ */
+enum nl80211_ftm_responder_stats {
+ __NL80211_FTM_STATS_INVALID,
+ NL80211_FTM_STATS_SUCCESS_NUM,
+ NL80211_FTM_STATS_PARTIAL_NUM,
+ NL80211_FTM_STATS_FAILED_NUM,
+ NL80211_FTM_STATS_ASAP_NUM,
+ NL80211_FTM_STATS_NON_ASAP_NUM,
+ NL80211_FTM_STATS_TOTAL_DURATION_MSEC,
+ NL80211_FTM_STATS_UNKNOWN_TRIGGERS_NUM,
+ NL80211_FTM_STATS_RESCHEDULE_REQUESTS_NUM,
+ NL80211_FTM_STATS_OUT_OF_WINDOW_TRIGGERS_NUM,
+ NL80211_FTM_STATS_PAD,
+
+ /* keep last */
+ __NL80211_FTM_STATS_AFTER_LAST,
+ NL80211_FTM_STATS_MAX = __NL80211_FTM_STATS_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_preamble - frame preamble types
+ * @NL80211_PREAMBLE_LEGACY: legacy (HR/DSSS, OFDM, ERP PHY) preamble
+ * @NL80211_PREAMBLE_HT: HT preamble
+ * @NL80211_PREAMBLE_VHT: VHT preamble
+ * @NL80211_PREAMBLE_DMG: DMG preamble
+ */
+enum nl80211_preamble {
+ NL80211_PREAMBLE_LEGACY,
+ NL80211_PREAMBLE_HT,
+ NL80211_PREAMBLE_VHT,
+ NL80211_PREAMBLE_DMG,
+};
+
+/**
+ * enum nl80211_peer_measurement_type - peer measurement types
+ * @NL80211_PMSR_TYPE_INVALID: invalid/unused, needed as we use
+ * these numbers also for attributes
+ *
+ * @NL80211_PMSR_TYPE_FTM: flight time measurement
+ *
+ * @NUM_NL80211_PMSR_TYPES: internal
+ * @NL80211_PMSR_TYPE_MAX: highest type number
+ */
+enum nl80211_peer_measurement_type {
+ NL80211_PMSR_TYPE_INVALID,
+
+ NL80211_PMSR_TYPE_FTM,
+
+ NUM_NL80211_PMSR_TYPES,
+ NL80211_PMSR_TYPE_MAX = NUM_NL80211_PMSR_TYPES - 1
+};
+
+/**
+ * enum nl80211_peer_measurement_status - peer measurement status
+ * @NL80211_PMSR_STATUS_SUCCESS: measurement completed successfully
+ * @NL80211_PMSR_STATUS_REFUSED: measurement was locally refused
+ * @NL80211_PMSR_STATUS_TIMEOUT: measurement timed out
+ * @NL80211_PMSR_STATUS_FAILURE: measurement failed, a type-dependent
+ * reason may be available in the response data
+ */
+enum nl80211_peer_measurement_status {
+ NL80211_PMSR_STATUS_SUCCESS,
+ NL80211_PMSR_STATUS_REFUSED,
+ NL80211_PMSR_STATUS_TIMEOUT,
+ NL80211_PMSR_STATUS_FAILURE,
+};
+
+/**
+ * enum nl80211_peer_measurement_req - peer measurement request attributes
+ * @__NL80211_PMSR_REQ_ATTR_INVALID: invalid
+ *
+ * @NL80211_PMSR_REQ_ATTR_DATA: This is a nested attribute with measurement
+ * type-specific request data inside. The attributes used are from the
+ * enums named nl80211_peer_measurement_<type>_req.
+ * @NL80211_PMSR_REQ_ATTR_GET_AP_TSF: include AP TSF timestamp, if supported
+ * (flag attribute)
+ *
+ * @NUM_NL80211_PMSR_REQ_ATTRS: internal
+ * @NL80211_PMSR_REQ_ATTR_MAX: highest attribute number
+ */
+enum nl80211_peer_measurement_req {
+ __NL80211_PMSR_REQ_ATTR_INVALID,
+
+ NL80211_PMSR_REQ_ATTR_DATA,
+ NL80211_PMSR_REQ_ATTR_GET_AP_TSF,
+
+ /* keep last */
+ NUM_NL80211_PMSR_REQ_ATTRS,
+ NL80211_PMSR_REQ_ATTR_MAX = NUM_NL80211_PMSR_REQ_ATTRS - 1
+};
+
+/**
+ * enum nl80211_peer_measurement_resp - peer measurement response attributes
+ * @__NL80211_PMSR_RESP_ATTR_INVALID: invalid
+ *
+ * @NL80211_PMSR_RESP_ATTR_DATA: This is a nested attribute with measurement
+ * type-specific results inside. The attributes used are from the enums
+ * named nl80211_peer_measurement_<type>_resp.
+ * @NL80211_PMSR_RESP_ATTR_STATUS: u32 value with the measurement status
+ * (using values from &enum nl80211_peer_measurement_status.)
+ * @NL80211_PMSR_RESP_ATTR_HOST_TIME: host time (%CLOCK_BOOTTIME) when the
+ * result was measured; this value is not expected to be accurate to
+ * more than 20ms. (u64, nanoseconds)
+ * @NL80211_PMSR_RESP_ATTR_AP_TSF: TSF of the AP that the interface
+ * doing the measurement is connected to when the result was measured.
+ * This shall be accurately reported if supported and requested
+ * (u64, usec)
+ * @NL80211_PMSR_RESP_ATTR_FINAL: If results are sent to the host partially
+ * (*e.g. with FTM per-burst data) this flag will be cleared on all but
+ * the last result; if all results are combined it's set on the single
+ * result.
+ * @NL80211_PMSR_RESP_ATTR_PAD: padding for 64-bit attributes, ignore
+ *
+ * @NUM_NL80211_PMSR_RESP_ATTRS: internal
+ * @NL80211_PMSR_RESP_ATTR_MAX: highest attribute number
+ */
+enum nl80211_peer_measurement_resp {
+ __NL80211_PMSR_RESP_ATTR_INVALID,
+
+ NL80211_PMSR_RESP_ATTR_DATA,
+ NL80211_PMSR_RESP_ATTR_STATUS,
+ NL80211_PMSR_RESP_ATTR_HOST_TIME,
+ NL80211_PMSR_RESP_ATTR_AP_TSF,
+ NL80211_PMSR_RESP_ATTR_FINAL,
+ NL80211_PMSR_RESP_ATTR_PAD,
+
+ /* keep last */
+ NUM_NL80211_PMSR_RESP_ATTRS,
+ NL80211_PMSR_RESP_ATTR_MAX = NUM_NL80211_PMSR_RESP_ATTRS - 1
+};
+
+/**
+ * enum nl80211_peer_measurement_peer_attrs - peer attributes for measurement
+ * @__NL80211_PMSR_PEER_ATTR_INVALID: invalid
+ *
+ * @NL80211_PMSR_PEER_ATTR_ADDR: peer's MAC address
+ * @NL80211_PMSR_PEER_ATTR_CHAN: channel definition, nested, using top-level
+ * attributes like %NL80211_ATTR_WIPHY_FREQ etc.
+ * @NL80211_PMSR_PEER_ATTR_REQ: This is a nested attribute indexed by
+ * measurement type, with attributes from the
+ * &enum nl80211_peer_measurement_req inside.
+ * @NL80211_PMSR_PEER_ATTR_RESP: This is a nested attribute indexed by
+ * measurement type, with attributes from the
+ * &enum nl80211_peer_measurement_resp inside.
+ *
+ * @NUM_NL80211_PMSR_PEER_ATTRS: internal
+ * @NL80211_PMSR_PEER_ATTR_MAX: highest attribute number
+ */
+enum nl80211_peer_measurement_peer_attrs {
+ __NL80211_PMSR_PEER_ATTR_INVALID,
+
+ NL80211_PMSR_PEER_ATTR_ADDR,
+ NL80211_PMSR_PEER_ATTR_CHAN,
+ NL80211_PMSR_PEER_ATTR_REQ,
+ NL80211_PMSR_PEER_ATTR_RESP,
+
+ /* keep last */
+ NUM_NL80211_PMSR_PEER_ATTRS,
+ NL80211_PMSR_PEER_ATTR_MAX = NUM_NL80211_PMSR_PEER_ATTRS - 1,
+};
+
+/**
+ * enum nl80211_peer_measurement_attrs - peer measurement attributes
+ * @__NL80211_PMSR_ATTR_INVALID: invalid
+ *
+ * @NL80211_PMSR_ATTR_MAX_PEERS: u32 attribute used for capability
+ * advertisement only, indicates the maximum number of peers
+ * measurements can be done with in a single request
+ * @NL80211_PMSR_ATTR_REPORT_AP_TSF: flag attribute in capability
+ * indicating that the connected AP's TSF can be reported in
+ * measurement results
+ * @NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR: flag attribute in capability
+ * indicating that MAC address randomization is supported.
+ * @NL80211_PMSR_ATTR_TYPE_CAPA: capabilities reported by the device,
+ * this contains a nesting indexed by measurement type, and
+ * type-specific capabilities inside, which are from the enums
+ * named nl80211_peer_measurement_<type>_capa.
+ * @NL80211_PMSR_ATTR_PEERS: nested attribute, the nesting index is
+ * meaningless, just a list of peers to measure with, with the
+ * sub-attributes taken from
+ * &enum nl80211_peer_measurement_peer_attrs.
+ *
+ * @NUM_NL80211_PMSR_ATTR: internal
+ * @NL80211_PMSR_ATTR_MAX: highest attribute number
+ */
+enum nl80211_peer_measurement_attrs {
+ __NL80211_PMSR_ATTR_INVALID,
+
+ NL80211_PMSR_ATTR_MAX_PEERS,
+ NL80211_PMSR_ATTR_REPORT_AP_TSF,
+ NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR,
+ NL80211_PMSR_ATTR_TYPE_CAPA,
+ NL80211_PMSR_ATTR_PEERS,
+
+ /* keep last */
+ NUM_NL80211_PMSR_ATTR,
+ NL80211_PMSR_ATTR_MAX = NUM_NL80211_PMSR_ATTR - 1
+};
+
+/**
+ * enum nl80211_peer_measurement_ftm_capa - FTM capabilities
+ * @__NL80211_PMSR_FTM_CAPA_ATTR_INVALID: invalid
+ *
+ * @NL80211_PMSR_FTM_CAPA_ATTR_ASAP: flag attribute indicating ASAP mode
+ * is supported
+ * @NL80211_PMSR_FTM_CAPA_ATTR_NON_ASAP: flag attribute indicating non-ASAP
+ * mode is supported
+ * @NL80211_PMSR_FTM_CAPA_ATTR_REQ_LCI: flag attribute indicating if LCI
+ * data can be requested during the measurement
+ * @NL80211_PMSR_FTM_CAPA_ATTR_REQ_CIVICLOC: flag attribute indicating if civic
+ * location data can be requested during the measurement
+ * @NL80211_PMSR_FTM_CAPA_ATTR_PREAMBLES: u32 bitmap attribute of bits
+ * from &enum nl80211_preamble.
+ * @NL80211_PMSR_FTM_CAPA_ATTR_BANDWIDTHS: bitmap of values from
+ * &enum nl80211_chan_width indicating the supported channel
+ * bandwidths for FTM. Note that a higher channel bandwidth may be
+ * configured to allow for other measurements types with different
+ * bandwidth requirement in the same measurement.
+ * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_BURSTS_EXPONENT: u32 attribute indicating
+ * the maximum bursts exponent that can be used (if not present anything
+ * is valid)
+ * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST: u32 attribute indicating
+ * the maximum FTMs per burst (if not present anything is valid)
+ *
+ * @NUM_NL80211_PMSR_FTM_CAPA_ATTR: internal
+ * @NL80211_PMSR_FTM_CAPA_ATTR_MAX: highest attribute number
+ */
+enum nl80211_peer_measurement_ftm_capa {
+ __NL80211_PMSR_FTM_CAPA_ATTR_INVALID,
+
+ NL80211_PMSR_FTM_CAPA_ATTR_ASAP,
+ NL80211_PMSR_FTM_CAPA_ATTR_NON_ASAP,
+ NL80211_PMSR_FTM_CAPA_ATTR_REQ_LCI,
+ NL80211_PMSR_FTM_CAPA_ATTR_REQ_CIVICLOC,
+ NL80211_PMSR_FTM_CAPA_ATTR_PREAMBLES,
+ NL80211_PMSR_FTM_CAPA_ATTR_BANDWIDTHS,
+ NL80211_PMSR_FTM_CAPA_ATTR_MAX_BURSTS_EXPONENT,
+ NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST,
+
+ /* keep last */
+ NUM_NL80211_PMSR_FTM_CAPA_ATTR,
+ NL80211_PMSR_FTM_CAPA_ATTR_MAX = NUM_NL80211_PMSR_FTM_CAPA_ATTR - 1
+};
+
+/**
+ * enum nl80211_peer_measurement_ftm_req - FTM request attributes
+ * @__NL80211_PMSR_FTM_REQ_ATTR_INVALID: invalid
+ *
+ * @NL80211_PMSR_FTM_REQ_ATTR_ASAP: ASAP mode requested (flag)
+ * @NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE: preamble type (see
+ * &enum nl80211_preamble), optional for DMG (u32)
+ * @NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP: number of bursts exponent as in
+ * 802.11-2016 9.4.2.168 "Fine Timing Measurement Parameters element"
+ * (u8, 0-15, optional with default 15 i.e. "no preference")
+ * @NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD: interval between bursts in units
+ * of 100ms (u16, optional with default 0)
+ * @NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION: burst duration, as in 802.11-2016
+ * Table 9-257 "Burst Duration field encoding" (u8, 0-15, optional with
+ * default 15 i.e. "no preference")
+ * @NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST: number of successful FTM frames
+ * requested per burst
+ * (u8, 0-31, optional with default 0 i.e. "no preference")
+ * @NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES: number of FTMR frame retries
+ * (u8, default 3)
+ * @NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI: request LCI data (flag)
+ * @NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC: request civic location data
+ * (flag)
+ *
+ * @NUM_NL80211_PMSR_FTM_REQ_ATTR: internal
+ * @NL80211_PMSR_FTM_REQ_ATTR_MAX: highest attribute number
+ */
+enum nl80211_peer_measurement_ftm_req {
+ __NL80211_PMSR_FTM_REQ_ATTR_INVALID,
+
+ NL80211_PMSR_FTM_REQ_ATTR_ASAP,
+ NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE,
+ NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP,
+ NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD,
+ NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION,
+ NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST,
+ NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES,
+ NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI,
+ NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC,
+
+ /* keep last */
+ NUM_NL80211_PMSR_FTM_REQ_ATTR,
+ NL80211_PMSR_FTM_REQ_ATTR_MAX = NUM_NL80211_PMSR_FTM_REQ_ATTR - 1
+};
+
+/**
+ * enum nl80211_peer_measurement_ftm_failure_reasons - FTM failure reasons
+ * @NL80211_PMSR_FTM_FAILURE_UNSPECIFIED: unspecified failure, not used
+ * @NL80211_PMSR_FTM_FAILURE_NO_RESPONSE: no response from the FTM responder
+ * @NL80211_PMSR_FTM_FAILURE_REJECTED: FTM responder rejected measurement
+ * @NL80211_PMSR_FTM_FAILURE_WRONG_CHANNEL: we already know the peer is
+ * on a different channel, so can't measure (if we didn't know, we'd
+ * try and get no response)
+ * @NL80211_PMSR_FTM_FAILURE_PEER_NOT_CAPABLE: peer can't actually do FTM
+ * @NL80211_PMSR_FTM_FAILURE_INVALID_TIMESTAMP: invalid T1/T4 timestamps
+ * received
+ * @NL80211_PMSR_FTM_FAILURE_PEER_BUSY: peer reports busy, you may retry
+ * later (see %NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME)
+ * @NL80211_PMSR_FTM_FAILURE_BAD_CHANGED_PARAMS: parameters were changed
+ * by the peer and are no longer supported
+ */
+enum nl80211_peer_measurement_ftm_failure_reasons {
+ NL80211_PMSR_FTM_FAILURE_UNSPECIFIED,
+ NL80211_PMSR_FTM_FAILURE_NO_RESPONSE,
+ NL80211_PMSR_FTM_FAILURE_REJECTED,
+ NL80211_PMSR_FTM_FAILURE_WRONG_CHANNEL,
+ NL80211_PMSR_FTM_FAILURE_PEER_NOT_CAPABLE,
+ NL80211_PMSR_FTM_FAILURE_INVALID_TIMESTAMP,
+ NL80211_PMSR_FTM_FAILURE_PEER_BUSY,
+ NL80211_PMSR_FTM_FAILURE_BAD_CHANGED_PARAMS,
+};
+
+/**
+ * enum nl80211_peer_measurement_ftm_resp - FTM response attributes
+ * @__NL80211_PMSR_FTM_RESP_ATTR_INVALID: invalid
+ *
+ * @NL80211_PMSR_FTM_RESP_ATTR_FAIL_REASON: FTM-specific failure reason
+ * (u32, optional)
+ * @NL80211_PMSR_FTM_RESP_ATTR_BURST_INDEX: optional, if bursts are reported
+ * as separate results then it will be the burst index 0...(N-1) and
+ * the top level will indicate partial results (u32)
+ * @NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_ATTEMPTS: number of FTM Request frames
+ * transmitted (u32, optional)
+ * @NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_SUCCESSES: number of FTM Request frames
+ * that were acknowleged (u32, optional)
+ * @NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME: retry time received from the
+ * busy peer (u32, seconds)
+ * @NL80211_PMSR_FTM_RESP_ATTR_NUM_BURSTS_EXP: actual number of bursts exponent
+ * used by the responder (similar to request, u8)
+ * @NL80211_PMSR_FTM_RESP_ATTR_BURST_DURATION: actual burst duration used by
+ * the responder (similar to request, u8)
+ * @NL80211_PMSR_FTM_RESP_ATTR_FTMS_PER_BURST: actual FTMs per burst used
+ * by the responder (similar to request, u8)
+ * @NL80211_PMSR_FTM_RESP_ATTR_RSSI_AVG: average RSSI across all FTM action
+ * frames (optional, s32, 1/2 dBm)
+ * @NL80211_PMSR_FTM_RESP_ATTR_RSSI_SPREAD: RSSI spread across all FTM action
+ * frames (optional, s32, 1/2 dBm)
+ * @NL80211_PMSR_FTM_RESP_ATTR_TX_RATE: bitrate we used for the response to the
+ * FTM action frame (optional, nested, using &enum nl80211_rate_info
+ * attributes)
+ * @NL80211_PMSR_FTM_RESP_ATTR_RX_RATE: bitrate the responder used for the FTM
+ * action frame (optional, nested, using &enum nl80211_rate_info attrs)
+ * @NL80211_PMSR_FTM_RESP_ATTR_RTT_AVG: average RTT (s64, picoseconds, optional
+ * but one of RTT/DIST must be present)
+ * @NL80211_PMSR_FTM_RESP_ATTR_RTT_VARIANCE: RTT variance (u64, ps^2, note that
+ * standard deviation is the square root of variance, optional)
+ * @NL80211_PMSR_FTM_RESP_ATTR_RTT_SPREAD: RTT spread (u64, picoseconds,
+ * optional)
+ * @NL80211_PMSR_FTM_RESP_ATTR_DIST_AVG: average distance (s64, mm, optional
+ * but one of RTT/DIST must be present)
+ * @NL80211_PMSR_FTM_RESP_ATTR_DIST_VARIANCE: distance variance (u64, mm^2, note
+ * that standard deviation is the square root of variance, optional)
+ * @NL80211_PMSR_FTM_RESP_ATTR_DIST_SPREAD: distance spread (u64, mm, optional)
+ * @NL80211_PMSR_FTM_RESP_ATTR_LCI: LCI data from peer (binary, optional);
+ * this is the contents of the Measurement Report Element (802.11-2016
+ * 9.4.2.22.1) starting with the Measurement Token, with Measurement
+ * Type 8.
+ * @NL80211_PMSR_FTM_RESP_ATTR_CIVICLOC: civic location data from peer
+ * (binary, optional);
+ * this is the contents of the Measurement Report Element (802.11-2016
+ * 9.4.2.22.1) starting with the Measurement Token, with Measurement
+ * Type 11.
+ * @NL80211_PMSR_FTM_RESP_ATTR_PAD: ignore, for u64/s64 padding only
+ *
+ * @NUM_NL80211_PMSR_FTM_RESP_ATTR: internal
+ * @NL80211_PMSR_FTM_RESP_ATTR_MAX: highest attribute number
+ */
+enum nl80211_peer_measurement_ftm_resp {
+ __NL80211_PMSR_FTM_RESP_ATTR_INVALID,
+
+ NL80211_PMSR_FTM_RESP_ATTR_FAIL_REASON,
+ NL80211_PMSR_FTM_RESP_ATTR_BURST_INDEX,
+ NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_ATTEMPTS,
+ NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_SUCCESSES,
+ NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME,
+ NL80211_PMSR_FTM_RESP_ATTR_NUM_BURSTS_EXP,
+ NL80211_PMSR_FTM_RESP_ATTR_BURST_DURATION,
+ NL80211_PMSR_FTM_RESP_ATTR_FTMS_PER_BURST,
+ NL80211_PMSR_FTM_RESP_ATTR_RSSI_AVG,
+ NL80211_PMSR_FTM_RESP_ATTR_RSSI_SPREAD,
+ NL80211_PMSR_FTM_RESP_ATTR_TX_RATE,
+ NL80211_PMSR_FTM_RESP_ATTR_RX_RATE,
+ NL80211_PMSR_FTM_RESP_ATTR_RTT_AVG,
+ NL80211_PMSR_FTM_RESP_ATTR_RTT_VARIANCE,
+ NL80211_PMSR_FTM_RESP_ATTR_RTT_SPREAD,
+ NL80211_PMSR_FTM_RESP_ATTR_DIST_AVG,
+ NL80211_PMSR_FTM_RESP_ATTR_DIST_VARIANCE,
+ NL80211_PMSR_FTM_RESP_ATTR_DIST_SPREAD,
+ NL80211_PMSR_FTM_RESP_ATTR_LCI,
+ NL80211_PMSR_FTM_RESP_ATTR_CIVICLOC,
+ NL80211_PMSR_FTM_RESP_ATTR_PAD,
+
+ /* keep last */
+ NUM_NL80211_PMSR_FTM_RESP_ATTR,
+ NL80211_PMSR_FTM_RESP_ATTR_MAX = NUM_NL80211_PMSR_FTM_RESP_ATTR - 1
+};
+
+/**
+ * enum nl80211_obss_pd_attributes - OBSS packet detection attributes
+ * @__NL80211_HE_OBSS_PD_ATTR_INVALID: Invalid
+ *
+ * @NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET: the OBSS PD minimum tx power offset.
+ * @NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET: the OBSS PD maximum tx power offset.
+ *
+ * @__NL80211_HE_OBSS_PD_ATTR_LAST: Internal
+ * @NL80211_HE_OBSS_PD_ATTR_MAX: highest OBSS PD attribute.
+ */
+enum nl80211_obss_pd_attributes {
+ __NL80211_HE_OBSS_PD_ATTR_INVALID,
+
+ NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET,
+ NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET,
+
+ /* keep last */
+ __NL80211_HE_OBSS_PD_ATTR_LAST,
+ NL80211_HE_OBSS_PD_ATTR_MAX = __NL80211_HE_OBSS_PD_ATTR_LAST - 1,
+};
+
+
+#endif /* __LINUX_NL80211_H */
diff --git a/src/shared/local-addresses.c b/src/shared/local-addresses.c
new file mode 100644
index 0000000..2c860f7
--- /dev/null
+++ b/src/shared/local-addresses.c
@@ -0,0 +1,315 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "local-addresses.h"
+#include "macro.h"
+#include "netlink-util.h"
+#include "sort-util.h"
+
+static int address_compare(const struct local_address *a, const struct local_address *b) {
+ int r;
+
+ /* Order lowest scope first, IPv4 before IPv6, lowest interface index first */
+
+ if (a->family == AF_INET && b->family == AF_INET6)
+ return -1;
+ if (a->family == AF_INET6 && b->family == AF_INET)
+ return 1;
+
+ r = CMP(a->scope, b->scope);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->metric, b->metric);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->ifindex, b->ifindex);
+ if (r != 0)
+ return r;
+
+ return memcmp(&a->address, &b->address, FAMILY_ADDRESS_SIZE(a->family));
+}
+
+int local_addresses(sd_netlink *context, int ifindex, int af, struct local_address **ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_free_ struct local_address *list = NULL;
+ size_t n_list = 0, n_allocated = 0;
+ sd_netlink_message *m;
+ int r;
+
+ if (context)
+ rtnl = sd_netlink_ref(context);
+ else {
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_addr(rtnl, &req, RTM_GETADDR, 0, af);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ for (m = reply; m; m = sd_netlink_message_next(m)) {
+ struct local_address *a;
+ unsigned char flags;
+ uint16_t type;
+ int ifi, family;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_get_type(m, &type);
+ if (r < 0)
+ return r;
+ if (type != RTM_NEWADDR)
+ continue;
+
+ r = sd_rtnl_message_addr_get_ifindex(m, &ifi);
+ if (r < 0)
+ return r;
+ if (ifindex > 0 && ifi != ifindex)
+ continue;
+
+ r = sd_rtnl_message_addr_get_family(m, &family);
+ if (r < 0)
+ return r;
+ if (af != AF_UNSPEC && af != family)
+ continue;
+
+ r = sd_rtnl_message_addr_get_flags(m, &flags);
+ if (r < 0)
+ return r;
+ if (flags & IFA_F_DEPRECATED)
+ continue;
+
+ if (!GREEDY_REALLOC0(list, n_allocated, n_list+1))
+ return -ENOMEM;
+
+ a = list + n_list;
+
+ r = sd_rtnl_message_addr_get_scope(m, &a->scope);
+ if (r < 0)
+ return r;
+
+ if (ifindex == 0 && IN_SET(a->scope, RT_SCOPE_HOST, RT_SCOPE_NOWHERE))
+ continue;
+
+ switch (family) {
+
+ case AF_INET:
+ r = sd_netlink_message_read_in_addr(m, IFA_LOCAL, &a->address.in);
+ if (r < 0) {
+ r = sd_netlink_message_read_in_addr(m, IFA_ADDRESS, &a->address.in);
+ if (r < 0)
+ continue;
+ }
+ break;
+
+ case AF_INET6:
+ r = sd_netlink_message_read_in6_addr(m, IFA_LOCAL, &a->address.in6);
+ if (r < 0) {
+ r = sd_netlink_message_read_in6_addr(m, IFA_ADDRESS, &a->address.in6);
+ if (r < 0)
+ continue;
+ }
+ break;
+
+ default:
+ continue;
+ }
+
+ a->ifindex = ifi;
+ a->family = family;
+
+ n_list++;
+ };
+
+ if (ret) {
+ typesafe_qsort(list, n_list, address_compare);
+ *ret = TAKE_PTR(list);
+ }
+
+ return (int) n_list;
+}
+
+static int add_local_gateway(
+ struct local_address **list,
+ size_t *n_list,
+ size_t *n_allocated,
+ int af,
+ int ifindex,
+ uint32_t metric,
+ const RouteVia *via) {
+
+ assert(list);
+ assert(n_list);
+ assert(n_allocated);
+ assert(via);
+
+ if (af != AF_UNSPEC && af != via->family)
+ return 0;
+
+ if (!GREEDY_REALLOC(*list, *n_allocated, *n_list + 1))
+ return -ENOMEM;
+
+ (*list)[(*n_list)++] = (struct local_address) {
+ .ifindex = ifindex,
+ .metric = metric,
+ .family = via->family,
+ .address = via->address,
+ };
+
+ return 0;
+}
+
+int local_gateways(sd_netlink *context, int ifindex, int af, struct local_address **ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_free_ struct local_address *list = NULL;
+ size_t n_list = 0, n_allocated = 0;
+ int r;
+
+ if (context)
+ rtnl = sd_netlink_ref(context);
+ else {
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_route(rtnl, &req, RTM_GETROUTE, af, RTPROT_UNSPEC);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ for (sd_netlink_message *m = reply; m; m = sd_netlink_message_next(m)) {
+ _cleanup_ordered_set_free_free_ OrderedSet *multipath_routes = NULL;
+ _cleanup_free_ void *rta_multipath = NULL;
+ union in_addr_union gateway;
+ uint16_t type;
+ unsigned char dst_len, src_len, table;
+ uint32_t ifi, metric = 0;
+ size_t rta_len;
+ int family;
+ RouteVia via;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_get_type(m, &type);
+ if (r < 0)
+ return r;
+ if (type != RTM_NEWROUTE)
+ continue;
+
+ /* We only care for default routes */
+ r = sd_rtnl_message_route_get_dst_prefixlen(m, &dst_len);
+ if (r < 0)
+ return r;
+ if (dst_len != 0)
+ continue;
+
+ r = sd_rtnl_message_route_get_src_prefixlen(m, &src_len);
+ if (r < 0)
+ return r;
+ if (src_len != 0)
+ continue;
+
+ r = sd_rtnl_message_route_get_table(m, &table);
+ if (r < 0)
+ return r;
+ if (table != RT_TABLE_MAIN)
+ continue;
+
+ r = sd_netlink_message_read_u32(m, RTA_PRIORITY, &metric);
+ if (r < 0 && r != -ENODATA)
+ return r;
+
+ r = sd_rtnl_message_route_get_family(m, &family);
+ if (r < 0)
+ return r;
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ continue;
+
+ r = sd_netlink_message_read_u32(m, RTA_OIF, &ifi);
+ if (r < 0 && r != -ENODATA)
+ return r;
+ if (r >= 0) {
+ if (ifi <= 0)
+ return -EINVAL;
+ if (ifindex > 0 && (int) ifi != ifindex)
+ continue;
+
+ r = netlink_message_read_in_addr_union(m, RTA_GATEWAY, family, &gateway);
+ if (r < 0 && r != -ENODATA)
+ return r;
+ if (r >= 0) {
+ via.family = family;
+ via.address = gateway;
+ r = add_local_gateway(&list, &n_list, &n_allocated, af, ifi, metric, &via);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ if (family != AF_INET)
+ continue;
+
+ r = sd_netlink_message_read(m, RTA_VIA, sizeof(via), &via);
+ if (r < 0 && r != -ENODATA)
+ return r;
+ if (r >= 0) {
+ r = add_local_gateway(&list, &n_list, &n_allocated, af, ifi, metric, &via);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+ }
+
+ r = sd_netlink_message_read_data(m, RTA_MULTIPATH, &rta_len, &rta_multipath);
+ if (r < 0 && r != -ENODATA)
+ return r;
+ if (r >= 0) {
+ MultipathRoute *mr;
+
+ r = rtattr_read_nexthop(rta_multipath, rta_len, family, &multipath_routes);
+ if (r < 0)
+ return r;
+
+ ORDERED_SET_FOREACH(mr, multipath_routes) {
+ if (ifindex > 0 && mr->ifindex != ifindex)
+ continue;
+
+ r = add_local_gateway(&list, &n_list, &n_allocated, af, ifi, metric, &mr->gateway);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ if (ret) {
+ typesafe_qsort(list, n_list, address_compare);
+ *ret = TAKE_PTR(list);
+ }
+
+ return (int) n_list;
+}
diff --git a/src/shared/local-addresses.h b/src/shared/local-addresses.h
new file mode 100644
index 0000000..c633995
--- /dev/null
+++ b/src/shared/local-addresses.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-netlink.h"
+
+#include "in-addr-util.h"
+
+struct local_address {
+ int family, ifindex;
+ unsigned char scope;
+ uint32_t metric;
+ union in_addr_union address;
+};
+
+int local_addresses(sd_netlink *rtnl, int ifindex, int af, struct local_address **ret);
+
+int local_gateways(sd_netlink *rtnl, int ifindex, int af, struct local_address **ret);
diff --git a/src/shared/lockfile-util.c b/src/shared/lockfile-util.c
new file mode 100644
index 0000000..6f059ab
--- /dev/null
+++ b/src/shared/lockfile-util.c
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "lockfile-util.h"
+#include "macro.h"
+#include "missing_fcntl.h"
+#include "path-util.h"
+
+int make_lock_file(const char *p, int operation, LockFile *ret) {
+ _cleanup_close_ int fd = -1;
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ /*
+ * We use UNPOSIX locks if they are available. They have nice
+ * semantics, and are mostly compatible with NFS. However,
+ * they are only available on new kernels. When we detect we
+ * are running on an older kernel, then we fall back to good
+ * old BSD locks. They also have nice semantics, but are
+ * slightly problematic on NFS, where they are upgraded to
+ * POSIX locks, even though locally they are orthogonal to
+ * POSIX locks.
+ */
+
+ t = strdup(p);
+ if (!t)
+ return -ENOMEM;
+
+ for (;;) {
+ struct flock fl = {
+ .l_type = (operation & ~LOCK_NB) == LOCK_EX ? F_WRLCK : F_RDLCK,
+ .l_whence = SEEK_SET,
+ };
+ struct stat st;
+
+ fd = open(p, O_CREAT|O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NOCTTY, 0600);
+ if (fd < 0)
+ return -errno;
+
+ r = fcntl(fd, (operation & LOCK_NB) ? F_OFD_SETLK : F_OFD_SETLKW, &fl);
+ if (r < 0) {
+
+ /* If the kernel is too old, use good old BSD locks */
+ if (errno == EINVAL)
+ r = flock(fd, operation);
+
+ if (r < 0)
+ return errno == EAGAIN ? -EBUSY : -errno;
+ }
+
+ /* If we acquired the lock, let's check if the file
+ * still exists in the file system. If not, then the
+ * previous exclusive owner removed it and then closed
+ * it. In such a case our acquired lock is worthless,
+ * hence try again. */
+
+ r = fstat(fd, &st);
+ if (r < 0)
+ return -errno;
+ if (st.st_nlink > 0)
+ break;
+
+ fd = safe_close(fd);
+ }
+
+ ret->path = t;
+ ret->fd = fd;
+ ret->operation = operation;
+
+ fd = -1;
+ t = NULL;
+
+ return r;
+}
+
+int make_lock_file_for(const char *p, int operation, LockFile *ret) {
+ const char *fn;
+ char *t;
+
+ assert(p);
+ assert(ret);
+
+ fn = basename(p);
+ if (!filename_is_valid(fn))
+ return -EINVAL;
+
+ t = newa(char, strlen(p) + 2 + 4 + 1);
+ stpcpy(stpcpy(stpcpy(mempcpy(t, p, fn - p), ".#"), fn), ".lck");
+
+ return make_lock_file(t, operation, ret);
+}
+
+void release_lock_file(LockFile *f) {
+ int r;
+
+ if (!f)
+ return;
+
+ if (f->path) {
+
+ /* If we are the exclusive owner we can safely delete
+ * the lock file itself. If we are not the exclusive
+ * owner, we can try becoming it. */
+
+ if (f->fd >= 0 &&
+ (f->operation & ~LOCK_NB) == LOCK_SH) {
+ static const struct flock fl = {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ };
+
+ r = fcntl(f->fd, F_OFD_SETLK, &fl);
+ if (r < 0 && errno == EINVAL)
+ r = flock(f->fd, LOCK_EX|LOCK_NB);
+
+ if (r >= 0)
+ f->operation = LOCK_EX|LOCK_NB;
+ }
+
+ if ((f->operation & ~LOCK_NB) == LOCK_EX)
+ unlink_noerrno(f->path);
+
+ f->path = mfree(f->path);
+ }
+
+ f->fd = safe_close(f->fd);
+ f->operation = 0;
+}
diff --git a/src/shared/lockfile-util.h b/src/shared/lockfile-util.h
new file mode 100644
index 0000000..3606327
--- /dev/null
+++ b/src/shared/lockfile-util.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct LockFile {
+ char *path;
+ int fd;
+ int operation;
+} LockFile;
+
+int make_lock_file(const char *p, int operation, LockFile *ret);
+int make_lock_file_for(const char *p, int operation, LockFile *ret);
+void release_lock_file(LockFile *f);
+
+#define LOCK_FILE_INIT { .fd = -1, .path = NULL }
diff --git a/src/shared/log-link.h b/src/shared/log-link.h
new file mode 100644
index 0000000..bb692e0
--- /dev/null
+++ b/src/shared/log-link.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "log.h"
+
+/*
+ * The following macros append INTERFACE= to the message.
+ * The macros require a struct named 'Link' which contains 'char *ifname':
+ *
+ * typedef struct Link {
+ * char *ifname;
+ * } Link;
+ *
+ * See, network/networkd-link.h for example.
+ */
+
+#define log_link_full_errno(link, level, error, ...) \
+ ({ \
+ const Link *_l = (link); \
+ (_l && _l->ifname) ? log_object_internal(level, error, PROJECT_FILE, __LINE__, __func__, "INTERFACE=", _l->ifname, NULL, NULL, ##__VA_ARGS__) : \
+ log_internal(level, error, PROJECT_FILE, __LINE__, __func__, ##__VA_ARGS__); \
+ }) \
+
+#define log_link_full(link, level, ...) (void) log_link_full_errno(link, level, 0, __VA_ARGS__)
+
+#define log_link_debug(link, ...) log_link_full_errno(link, LOG_DEBUG, 0, __VA_ARGS__)
+#define log_link_info(link, ...) log_link_full(link, LOG_INFO, __VA_ARGS__)
+#define log_link_notice(link, ...) log_link_full(link, LOG_NOTICE, __VA_ARGS__)
+#define log_link_warning(link, ...) log_link_full(link, LOG_WARNING, __VA_ARGS__)
+#define log_link_error(link, ...) log_link_full(link, LOG_ERR, __VA_ARGS__)
+
+#define log_link_debug_errno(link, error, ...) log_link_full_errno(link, LOG_DEBUG, error, __VA_ARGS__)
+#define log_link_info_errno(link, error, ...) log_link_full_errno(link, LOG_INFO, error, __VA_ARGS__)
+#define log_link_notice_errno(link, error, ...) log_link_full_errno(link, LOG_NOTICE, error, __VA_ARGS__)
+#define log_link_warning_errno(link, error, ...) log_link_full_errno(link, LOG_WARNING, error, __VA_ARGS__)
+#define log_link_error_errno(link, error, ...) log_link_full_errno(link, LOG_ERR, error, __VA_ARGS__)
+
+#define LOG_LINK_MESSAGE(link, fmt, ...) "MESSAGE=%s: " fmt, (link)->ifname, ##__VA_ARGS__
+#define LOG_LINK_INTERFACE(link) "INTERFACE=%s", (link)->ifname
diff --git a/src/shared/logs-show.c b/src/shared/logs-show.c
new file mode 100644
index 0000000..840f221
--- /dev/null
+++ b/src/shared/logs-show.c
@@ -0,0 +1,1672 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <syslog.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "hashmap.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "io-util.h"
+#include "journal-internal.h"
+#include "journal-util.h"
+#include "json.h"
+#include "locale-util.h"
+#include "log.h"
+#include "logs-show.h"
+#include "macro.h"
+#include "namespace-util.h"
+#include "output-mode.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "sparse-endian.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "utf8.h"
+#include "util.h"
+#include "web-util.h"
+
+/* up to three lines (each up to 100 characters) or 300 characters, whichever is less */
+#define PRINT_LINE_THRESHOLD 3
+#define PRINT_CHAR_THRESHOLD 300
+
+#define JSON_THRESHOLD 4096U
+
+static int print_catalog(FILE *f, sd_journal *j) {
+ _cleanup_free_ char *t = NULL, *z = NULL;
+ const char *newline, *prefix;
+ int r;
+
+ assert(j);
+
+ r = sd_journal_get_catalog(j, &t);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to find catalog entry: %m");
+
+ if (is_locale_utf8())
+ prefix = strjoina(special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), special_glyph(SPECIAL_GLYPH_LIGHT_SHADE));
+ else
+ prefix = "--";
+
+ if (colors_enabled())
+ newline = strjoina(ANSI_NORMAL "\n" ANSI_GREY, prefix, ANSI_NORMAL " " ANSI_GREEN);
+ else
+ newline = strjoina("\n", prefix, " ");
+
+ z = strreplace(strstrip(t), "\n", newline);
+ if (!z)
+ return log_oom();
+
+ if (colors_enabled())
+ fprintf(f, ANSI_GREY "%s" ANSI_NORMAL " " ANSI_GREEN, prefix);
+ else
+ fprintf(f, "%s ", prefix);
+
+ fputs(z, f);
+
+ if (colors_enabled())
+ fputs(ANSI_NORMAL "\n", f);
+ else
+ fputc('\n', f);
+
+ return 1;
+}
+
+static int url_from_catalog(sd_journal *j, char **ret) {
+ _cleanup_free_ char *t = NULL, *url = NULL;
+ const char *weblink;
+ int r;
+
+ assert(j);
+ assert(ret);
+
+ r = sd_journal_get_catalog(j, &t);
+ if (r == -ENOENT)
+ goto notfound;
+ if (r < 0)
+ return log_error_errno(r, "Failed to find catalog entry: %m");
+
+ weblink = startswith(t, "Documentation:");
+ if (!weblink) {
+ weblink = strstr(t + 1, "\nDocumentation:");
+ if (!weblink)
+ goto notfound;
+
+ weblink += 15;
+ }
+
+ /* Skip whitespace to value */
+ weblink += strspn(weblink, " \t");
+
+ /* Cut out till next whitespace/newline */
+ url = strndup(weblink, strcspn(weblink, WHITESPACE));
+ if (!url)
+ return log_oom();
+
+ if (!documentation_url_is_valid(url))
+ goto notfound;
+
+ *ret = TAKE_PTR(url);
+ return 1;
+
+notfound:
+ *ret = NULL;
+ return 0;
+}
+
+static int parse_field(const void *data, size_t length, const char *field, size_t field_len, char **target, size_t *target_len) {
+ size_t nl;
+ char *buf;
+
+ assert(data);
+ assert(field);
+ assert(target);
+
+ if (length < field_len)
+ return 0;
+
+ if (memcmp(data, field, field_len))
+ return 0;
+
+ nl = length - field_len;
+
+ buf = newdup_suffix0(char, (const char*) data + field_len, nl);
+ if (!buf)
+ return log_oom();
+
+ free(*target);
+ *target = buf;
+
+ if (target_len)
+ *target_len = nl;
+
+ return 1;
+}
+
+typedef struct ParseFieldVec {
+ const char *field;
+ size_t field_len;
+ char **target;
+ size_t *target_len;
+} ParseFieldVec;
+
+#define PARSE_FIELD_VEC_ENTRY(_field, _target, _target_len) { \
+ .field = _field, \
+ .field_len = strlen(_field), \
+ .target = _target, \
+ .target_len = _target_len \
+ }
+
+static int parse_fieldv(const void *data, size_t length, const ParseFieldVec *fields, unsigned n_fields) {
+ unsigned i;
+
+ for (i = 0; i < n_fields; i++) {
+ const ParseFieldVec *f = &fields[i];
+ int r;
+
+ r = parse_field(data, length, f->field, f->field_len, f->target, f->target_len);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ break;
+ }
+
+ return 0;
+}
+
+static int field_set_test(const Set *fields, const char *name, size_t n) {
+ char *s;
+
+ if (!fields)
+ return 1;
+
+ s = strndupa(name, n);
+ return set_contains(fields, s);
+}
+
+static bool shall_print(const char *p, size_t l, OutputFlags flags) {
+ assert(p);
+
+ if (flags & OUTPUT_SHOW_ALL)
+ return true;
+
+ if (l >= PRINT_CHAR_THRESHOLD)
+ return false;
+
+ if (!utf8_is_printable(p, l))
+ return false;
+
+ return true;
+}
+
+static bool print_multiline(
+ FILE *f,
+ unsigned prefix,
+ unsigned n_columns,
+ OutputFlags flags,
+ int priority,
+ bool audit,
+ const char* message,
+ size_t message_len,
+ size_t highlight[2]) {
+
+ const char *color_on = "", *color_off = "", *highlight_on = "";
+ const char *pos, *end;
+ bool ellipsized = false;
+ int line = 0;
+
+ if (flags & OUTPUT_COLOR) {
+ get_log_colors(priority, &color_on, &color_off, &highlight_on);
+
+ if (audit && strempty(color_on)) {
+ color_on = ANSI_BLUE;
+ color_off = ANSI_NORMAL;
+ }
+ }
+
+ /* A special case: make sure that we print a newline when
+ the message is empty. */
+ if (message_len == 0)
+ fputs("\n", f);
+
+ for (pos = message;
+ pos < message + message_len;
+ pos = end + 1, line++) {
+ bool continuation = line > 0;
+ bool tail_line;
+ int len;
+ for (end = pos; end < message + message_len && *end != '\n'; end++)
+ ;
+ len = end - pos;
+ assert(len >= 0);
+
+ /* We need to figure out when we are showing not-last line, *and*
+ * will skip subsequent lines. In that case, we will put the dots
+ * at the end of the line, instead of putting dots in the middle
+ * or not at all.
+ */
+ tail_line =
+ line + 1 == PRINT_LINE_THRESHOLD ||
+ end + 1 >= message + PRINT_CHAR_THRESHOLD;
+
+ if (flags & (OUTPUT_FULL_WIDTH | OUTPUT_SHOW_ALL) ||
+ (prefix + len + 1 < n_columns && !tail_line)) {
+ if (highlight &&
+ (size_t) (pos - message) <= highlight[0] &&
+ highlight[0] < (size_t) len) {
+
+ fprintf(f, "%*s%s%.*s",
+ continuation * prefix, "",
+ color_on, (int) highlight[0], pos);
+ fprintf(f, "%s%.*s",
+ highlight_on,
+ (int) (MIN((size_t) len, highlight[1]) - highlight[0]),
+ pos + highlight[0]);
+ if ((size_t) len > highlight[1])
+ fprintf(f, "%s%.*s",
+ color_on,
+ (int) (len - highlight[1]),
+ pos + highlight[1]);
+ fprintf(f, "%s\n", color_off);
+
+ } else
+ fprintf(f, "%*s%s%.*s%s\n",
+ continuation * prefix, "",
+ color_on, len, pos, color_off);
+ continue;
+ }
+
+ /* Beyond this point, ellipsization will happen. */
+ ellipsized = true;
+
+ if (prefix < n_columns && n_columns - prefix >= 3) {
+ if (n_columns - prefix > (unsigned) len + 3)
+ fprintf(f, "%*s%s%.*s...%s\n",
+ continuation * prefix, "",
+ color_on, len, pos, color_off);
+ else {
+ _cleanup_free_ char *e;
+
+ e = ellipsize_mem(pos, len, n_columns - prefix,
+ tail_line ? 100 : 90);
+ if (!e)
+ fprintf(f, "%*s%s%.*s%s\n",
+ continuation * prefix, "",
+ color_on, len, pos, color_off);
+ else
+ fprintf(f, "%*s%s%s%s\n",
+ continuation * prefix, "",
+ color_on, e, color_off);
+ }
+ } else
+ fputs("...\n", f);
+
+ if (tail_line)
+ break;
+ }
+
+ return ellipsized;
+}
+
+static int output_timestamp_monotonic(FILE *f, sd_journal *j, const char *monotonic) {
+ sd_id128_t boot_id;
+ uint64_t t;
+ int r;
+
+ assert(f);
+ assert(j);
+
+ r = -ENXIO;
+ if (monotonic)
+ r = safe_atou64(monotonic, &t);
+ if (r < 0)
+ r = sd_journal_get_monotonic_usec(j, &t, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get monotonic timestamp: %m");
+
+ fprintf(f, "[%5"PRI_USEC".%06"PRI_USEC"]", t / USEC_PER_SEC, t % USEC_PER_SEC);
+ return 1 + 5 + 1 + 6 + 1;
+}
+
+static int output_timestamp_realtime(FILE *f, sd_journal *j, OutputMode mode, OutputFlags flags, const char *realtime) {
+ char buf[MAX(FORMAT_TIMESTAMP_MAX, 64U)];
+ uint64_t x;
+ int r;
+
+ assert(f);
+ assert(j);
+
+ if (realtime)
+ r = safe_atou64(realtime, &x);
+ if (!realtime || r < 0 || !VALID_REALTIME(x))
+ r = sd_journal_get_realtime_usec(j, &x);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get realtime timestamp: %m");
+
+ if (IN_SET(mode, OUTPUT_SHORT_FULL, OUTPUT_WITH_UNIT)) {
+ const char *k;
+
+ if (flags & OUTPUT_UTC)
+ k = format_timestamp_style(buf, sizeof(buf), x, TIMESTAMP_UTC);
+ else
+ k = format_timestamp(buf, sizeof(buf), x);
+ if (!k)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format timestamp: %" PRIu64, x);
+
+ } else {
+ struct tm tm;
+ time_t t;
+
+ t = (time_t) (x / USEC_PER_SEC);
+
+ switch (mode) {
+
+ case OUTPUT_SHORT_UNIX:
+ xsprintf(buf, "%10"PRI_TIME".%06"PRIu64, t, x % USEC_PER_SEC);
+ break;
+
+ case OUTPUT_SHORT_ISO:
+ if (strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S%z",
+ localtime_or_gmtime_r(&t, &tm, flags & OUTPUT_UTC)) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format ISO time");
+ break;
+
+ case OUTPUT_SHORT_ISO_PRECISE: {
+ char usec[7];
+
+ /* No usec in strftime, so we leave space and copy over */
+ if (strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S.xxxxxx%z",
+ localtime_or_gmtime_r(&t, &tm, flags & OUTPUT_UTC)) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format ISO-precise time");
+ xsprintf(usec, "%06"PRI_USEC, x % USEC_PER_SEC);
+ memcpy(buf + 20, usec, 6);
+ break;
+ }
+ case OUTPUT_SHORT:
+ case OUTPUT_SHORT_PRECISE:
+
+ if (strftime(buf, sizeof(buf), "%b %d %H:%M:%S",
+ localtime_or_gmtime_r(&t, &tm, flags & OUTPUT_UTC)) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format syslog time");
+
+ if (mode == OUTPUT_SHORT_PRECISE) {
+ size_t k;
+
+ assert(sizeof(buf) > strlen(buf));
+ k = sizeof(buf) - strlen(buf);
+
+ r = snprintf(buf + strlen(buf), k, ".%06"PRIu64, x % USEC_PER_SEC);
+ if (r <= 0 || (size_t) r >= k) /* too long? */
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format precise time");
+ }
+ break;
+
+ default:
+ assert_not_reached("Unknown time format");
+ }
+ }
+
+ fputs(buf, f);
+ return (int) strlen(buf);
+}
+
+static int output_short(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ const Set *output_fields,
+ const size_t highlight[2]) {
+
+ int r;
+ const void *data;
+ size_t length, n = 0;
+ _cleanup_free_ char *hostname = NULL, *identifier = NULL, *comm = NULL, *pid = NULL, *fake_pid = NULL,
+ *message = NULL, *realtime = NULL, *monotonic = NULL, *priority = NULL, *transport = NULL,
+ *config_file = NULL, *unit = NULL, *user_unit = NULL, *documentation_url = NULL;
+ size_t hostname_len = 0, identifier_len = 0, comm_len = 0, pid_len = 0, fake_pid_len = 0, message_len = 0,
+ realtime_len = 0, monotonic_len = 0, priority_len = 0, transport_len = 0, config_file_len = 0,
+ unit_len = 0, user_unit_len = 0, documentation_url_len = 0;
+ int p = LOG_INFO;
+ bool ellipsized = false, audit;
+ const ParseFieldVec fields[] = {
+ PARSE_FIELD_VEC_ENTRY("_PID=", &pid, &pid_len),
+ PARSE_FIELD_VEC_ENTRY("_COMM=", &comm, &comm_len),
+ PARSE_FIELD_VEC_ENTRY("MESSAGE=", &message, &message_len),
+ PARSE_FIELD_VEC_ENTRY("PRIORITY=", &priority, &priority_len),
+ PARSE_FIELD_VEC_ENTRY("_TRANSPORT=", &transport, &transport_len),
+ PARSE_FIELD_VEC_ENTRY("_HOSTNAME=", &hostname, &hostname_len),
+ PARSE_FIELD_VEC_ENTRY("SYSLOG_PID=", &fake_pid, &fake_pid_len),
+ PARSE_FIELD_VEC_ENTRY("SYSLOG_IDENTIFIER=", &identifier, &identifier_len),
+ PARSE_FIELD_VEC_ENTRY("_SOURCE_REALTIME_TIMESTAMP=", &realtime, &realtime_len),
+ PARSE_FIELD_VEC_ENTRY("_SOURCE_MONOTONIC_TIMESTAMP=", &monotonic, &monotonic_len),
+ PARSE_FIELD_VEC_ENTRY("CONFIG_FILE=", &config_file, &config_file_len),
+ PARSE_FIELD_VEC_ENTRY("_SYSTEMD_UNIT=", &unit, &unit_len),
+ PARSE_FIELD_VEC_ENTRY("_SYSTEMD_USER_UNIT=", &user_unit, &user_unit_len),
+ PARSE_FIELD_VEC_ENTRY("DOCUMENTATION=", &documentation_url, &documentation_url_len),
+ };
+ size_t highlight_shifted[] = {highlight ? highlight[0] : 0, highlight ? highlight[1] : 0};
+
+ assert(f);
+ assert(j);
+
+ /* Set the threshold to one bigger than the actual print
+ * threshold, so that if the line is actually longer than what
+ * we're willing to print, ellipsization will occur. This way
+ * we won't output a misleading line without any indication of
+ * truncation.
+ */
+ sd_journal_set_data_threshold(j, flags & (OUTPUT_SHOW_ALL|OUTPUT_FULL_WIDTH) ? 0 : PRINT_CHAR_THRESHOLD + 1);
+
+ JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) {
+ r = parse_fieldv(data, length, fields, ELEMENTSOF(fields));
+ if (r < 0)
+ return r;
+ }
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Skipping message we can't read: %m");
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to get journal fields: %m");
+
+ if (!message) {
+ log_debug("Skipping message without MESSAGE= field.");
+ return 0;
+ }
+
+ if (!(flags & OUTPUT_SHOW_ALL))
+ strip_tab_ansi(&message, &message_len, highlight_shifted);
+
+ if (priority_len == 1 && *priority >= '0' && *priority <= '7')
+ p = *priority - '0';
+
+ audit = streq_ptr(transport, "audit");
+
+ if (mode == OUTPUT_SHORT_MONOTONIC)
+ r = output_timestamp_monotonic(f, j, monotonic);
+ else
+ r = output_timestamp_realtime(f, j, mode, flags, realtime);
+ if (r < 0)
+ return r;
+ n += r;
+
+ if (flags & OUTPUT_NO_HOSTNAME) {
+ /* Suppress display of the hostname if this is requested. */
+ hostname = mfree(hostname);
+ hostname_len = 0;
+ }
+
+ if (hostname && shall_print(hostname, hostname_len, flags)) {
+ fprintf(f, " %.*s", (int) hostname_len, hostname);
+ n += hostname_len + 1;
+ }
+
+ if (mode == OUTPUT_WITH_UNIT && ((unit && shall_print(unit, unit_len, flags)) ||
+ (user_unit && shall_print(user_unit, user_unit_len, flags)))) {
+ if (unit) {
+ fprintf(f, " %.*s", (int) unit_len, unit);
+ n += unit_len + 1;
+ }
+ if (user_unit) {
+ if (unit)
+ fprintf(f, "/%.*s", (int) user_unit_len, user_unit);
+ else
+ fprintf(f, " %.*s", (int) user_unit_len, user_unit);
+ n += unit_len + 1;
+ }
+ } else if (identifier && shall_print(identifier, identifier_len, flags)) {
+ fprintf(f, " %.*s", (int) identifier_len, identifier);
+ n += identifier_len + 1;
+ } else if (comm && shall_print(comm, comm_len, flags)) {
+ fprintf(f, " %.*s", (int) comm_len, comm);
+ n += comm_len + 1;
+ } else
+ fputs(" unknown", f);
+
+ if (pid && shall_print(pid, pid_len, flags)) {
+ fprintf(f, "[%.*s]", (int) pid_len, pid);
+ n += pid_len + 2;
+ } else if (fake_pid && shall_print(fake_pid, fake_pid_len, flags)) {
+ fprintf(f, "[%.*s]", (int) fake_pid_len, fake_pid);
+ n += fake_pid_len + 2;
+ }
+
+ fputs(": ", f);
+
+ if (urlify_enabled()) {
+ _cleanup_free_ char *c = NULL;
+
+ /* Insert a hyperlink to a documentation URL before the message. Note that we don't make the
+ * whole message a hyperlink, since otherwise the whole screen might end up being just
+ * hyperlinks. Moreover, we want to be able to highlight parts of the message (such as the
+ * config file, see below) hence let's keep the documentation URL link separate. */
+
+ if (documentation_url && shall_print(documentation_url, documentation_url_len, flags)) {
+ c = strndup(documentation_url, documentation_url_len);
+ if (!c)
+ return log_oom();
+
+ if (!documentation_url_is_valid(c)) /* Eat up invalid links */
+ c = mfree(c);
+ }
+
+ if (!c)
+ (void) url_from_catalog(j, &c); /* Acquire from catalog if not embedded in log message itself */
+
+ if (c) {
+ _cleanup_free_ char *urlified = NULL;
+
+ if (terminal_urlify(c, special_glyph(SPECIAL_GLYPH_EXTERNAL_LINK), &urlified) >= 0) {
+ fputs(urlified, f);
+ fputc(' ', f);
+ }
+ }
+ }
+
+ if (!(flags & OUTPUT_SHOW_ALL) && !utf8_is_printable(message, message_len)) {
+ char bytes[FORMAT_BYTES_MAX];
+ fprintf(f, "[%s blob data]\n", format_bytes(bytes, sizeof(bytes), message_len));
+ } else {
+
+ /* URLify config_file string in message, if the message starts with it.
+ * Skip URLification if the highlighted pattern overlaps. */
+ if (config_file &&
+ message_len >= config_file_len &&
+ memcmp(message, config_file, config_file_len) == 0 &&
+ (message_len == config_file_len || IN_SET(message[config_file_len], ':', ' ')) &&
+ (!highlight || highlight_shifted[0] == 0 || highlight_shifted[0] > config_file_len)) {
+
+ _cleanup_free_ char *t = NULL, *urlified = NULL;
+
+ t = strndup(config_file, config_file_len);
+ if (t && terminal_urlify_path(t, NULL, &urlified) >= 0) {
+ size_t urlified_len = strlen(urlified);
+ size_t shift = urlified_len - config_file_len;
+ char *joined;
+
+ joined = realloc(urlified, message_len + shift);
+ if (joined) {
+ memcpy(joined + urlified_len, message + config_file_len, message_len - config_file_len);
+ free_and_replace(message, joined);
+ TAKE_PTR(urlified);
+ message_len += shift;
+ if (highlight) {
+ highlight_shifted[0] += shift;
+ highlight_shifted[1] += shift;
+ }
+ }
+ }
+ }
+
+ ellipsized |=
+ print_multiline(f, n + 2, n_columns, flags, p, audit,
+ message, message_len,
+ highlight_shifted);
+ }
+
+ if (flags & OUTPUT_CATALOG)
+ (void) print_catalog(f, j);
+
+ return ellipsized;
+}
+
+static int output_verbose(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ const Set *output_fields,
+ const size_t highlight[2]) {
+
+ const void *data;
+ size_t length;
+ _cleanup_free_ char *cursor = NULL;
+ uint64_t realtime = 0;
+ char ts[FORMAT_TIMESTAMP_MAX + 7];
+ const char *timestamp;
+ int r;
+
+ assert(f);
+ assert(j);
+
+ sd_journal_set_data_threshold(j, 0);
+
+ r = sd_journal_get_data(j, "_SOURCE_REALTIME_TIMESTAMP", &data, &length);
+ if (r == -ENOENT)
+ log_debug("Source realtime timestamp not found");
+ else if (r < 0)
+ return log_full_errno(r == -EADDRNOTAVAIL ? LOG_DEBUG : LOG_ERR, r, "Failed to get source realtime timestamp: %m");
+ else {
+ _cleanup_free_ char *value = NULL;
+
+ r = parse_field(data, length, "_SOURCE_REALTIME_TIMESTAMP=",
+ STRLEN("_SOURCE_REALTIME_TIMESTAMP="), &value,
+ NULL);
+ if (r < 0)
+ return r;
+ assert(r > 0);
+
+ r = safe_atou64(value, &realtime);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse realtime timestamp: %m");
+ }
+
+ if (r < 0) {
+ r = sd_journal_get_realtime_usec(j, &realtime);
+ if (r < 0)
+ return log_full_errno(r == -EADDRNOTAVAIL ? LOG_DEBUG : LOG_ERR, r, "Failed to get realtime timestamp: %m");
+ }
+
+ r = sd_journal_get_cursor(j, &cursor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get cursor: %m");
+
+ timestamp = format_timestamp_style(ts, sizeof ts, realtime,
+ flags & OUTPUT_UTC ? TIMESTAMP_US_UTC : TIMESTAMP_US);
+ fprintf(f, "%s [%s]\n",
+ timestamp ?: "(no timestamp)",
+ cursor);
+
+ JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) {
+ const char *c, *p;
+ int fieldlen;
+ const char *on = "", *off = "";
+ _cleanup_free_ char *urlified = NULL;
+ size_t valuelen;
+
+ c = memchr(data, '=', length);
+ if (!c)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid field.");
+
+ fieldlen = c - (const char*) data;
+ if (!journal_field_valid(data, fieldlen, true))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid field.");
+
+ r = field_set_test(output_fields, data, fieldlen);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ valuelen = length - 1 - fieldlen;
+
+ if ((flags & OUTPUT_COLOR) && (p = startswith(data, "MESSAGE="))) {
+ on = ANSI_HIGHLIGHT;
+ off = ANSI_NORMAL;
+ } else if ((p = startswith(data, "CONFIG_FILE="))) {
+ if (terminal_urlify_path(p, NULL, &urlified) >= 0) {
+ p = urlified;
+ valuelen = strlen(urlified);
+ }
+ } else
+ p = c + 1;
+
+ if ((flags & OUTPUT_SHOW_ALL) ||
+ (((length < PRINT_CHAR_THRESHOLD) || flags & OUTPUT_FULL_WIDTH)
+ && utf8_is_printable(data, length))) {
+ fprintf(f, " %s%.*s=", on, fieldlen, (const char*)data);
+ print_multiline(f, 4 + fieldlen + 1, 0, OUTPUT_FULL_WIDTH, 0, false,
+ p, valuelen,
+ NULL);
+ fputs(off, f);
+ } else {
+ char bytes[FORMAT_BYTES_MAX];
+
+ fprintf(f, " %s%.*s=[%s blob data]%s\n",
+ on,
+ (int) (c - (const char*) data),
+ (const char*) data,
+ format_bytes(bytes, sizeof(bytes), length - (c - (const char *) data) - 1),
+ off);
+ }
+ }
+
+ if (r < 0)
+ return r;
+
+ if (flags & OUTPUT_CATALOG)
+ (void) print_catalog(f, j);
+
+ return 0;
+}
+
+static int output_export(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ const Set *output_fields,
+ const size_t highlight[2]) {
+
+ sd_id128_t boot_id;
+ char sid[SD_ID128_STRING_MAX];
+ int r;
+ usec_t realtime, monotonic;
+ _cleanup_free_ char *cursor = NULL;
+ const void *data;
+ size_t length;
+
+ assert(j);
+
+ sd_journal_set_data_threshold(j, 0);
+
+ r = sd_journal_get_realtime_usec(j, &realtime);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get realtime timestamp: %m");
+
+ r = sd_journal_get_monotonic_usec(j, &monotonic, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get monotonic timestamp: %m");
+
+ r = sd_journal_get_cursor(j, &cursor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get cursor: %m");
+
+ fprintf(f,
+ "__CURSOR=%s\n"
+ "__REALTIME_TIMESTAMP="USEC_FMT"\n"
+ "__MONOTONIC_TIMESTAMP="USEC_FMT"\n"
+ "_BOOT_ID=%s\n",
+ cursor,
+ realtime,
+ monotonic,
+ sd_id128_to_string(boot_id, sid));
+
+ JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) {
+ size_t fieldlen;
+ const char *c;
+
+ /* We already printed the boot id from the data in the header, hence let's suppress it here */
+ if (memory_startswith(data, length, "_BOOT_ID="))
+ continue;
+
+ c = memchr(data, '=', length);
+ if (!c)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid field.");
+
+ fieldlen = c - (const char*) data;
+ if (!journal_field_valid(data, fieldlen, true))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid field.");
+
+ r = field_set_test(output_fields, data, fieldlen);
+ if (r < 0)
+ return r;
+ if (!r)
+ continue;
+
+ if (utf8_is_printable_newline(data, length, false))
+ fwrite(data, length, 1, f);
+ else {
+ uint64_t le64;
+
+ fwrite(data, fieldlen, 1, f);
+ fputc('\n', f);
+ le64 = htole64(length - fieldlen - 1);
+ fwrite(&le64, sizeof(le64), 1, f);
+ fwrite(c + 1, length - fieldlen - 1, 1, f);
+ }
+
+ fputc('\n', f);
+ }
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Skipping message we can't read: %m");
+ return 0;
+ }
+
+ if (r < 0)
+ return r;
+
+ fputc('\n', f);
+
+ return 0;
+}
+
+void json_escape(
+ FILE *f,
+ const char* p,
+ size_t l,
+ OutputFlags flags) {
+
+ assert(f);
+ assert(p);
+
+ if (!(flags & OUTPUT_SHOW_ALL) && l >= JSON_THRESHOLD)
+ fputs("null", f);
+
+ else if (!(flags & OUTPUT_SHOW_ALL) && !utf8_is_printable(p, l)) {
+ bool not_first = false;
+
+ fputs("[ ", f);
+
+ while (l > 0) {
+ if (not_first)
+ fprintf(f, ", %u", (uint8_t) *p);
+ else {
+ not_first = true;
+ fprintf(f, "%u", (uint8_t) *p);
+ }
+
+ p++;
+ l--;
+ }
+
+ fputs(" ]", f);
+ } else {
+ fputc('"', f);
+
+ while (l > 0) {
+ if (IN_SET(*p, '"', '\\')) {
+ fputc('\\', f);
+ fputc(*p, f);
+ } else if (*p == '\n')
+ fputs("\\n", f);
+ else if ((uint8_t) *p < ' ')
+ fprintf(f, "\\u%04x", (uint8_t) *p);
+ else
+ fputc(*p, f);
+
+ p++;
+ l--;
+ }
+
+ fputc('"', f);
+ }
+}
+
+struct json_data {
+ JsonVariant* name;
+ size_t n_values;
+ JsonVariant* values[];
+};
+
+static int update_json_data(
+ Hashmap *h,
+ OutputFlags flags,
+ const char *name,
+ const void *value,
+ size_t size) {
+
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ struct json_data *d;
+ int r;
+
+ if (!(flags & OUTPUT_SHOW_ALL) && strlen(name) + 1 + size >= JSON_THRESHOLD)
+ r = json_variant_new_null(&v);
+ else if (utf8_is_printable(value, size))
+ r = json_variant_new_stringn(&v, value, size);
+ else
+ r = json_variant_new_array_bytes(&v, value, size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate JSON data: %m");
+
+ d = hashmap_get(h, name);
+ if (d) {
+ struct json_data *w;
+
+ w = realloc(d, offsetof(struct json_data, values) + sizeof(JsonVariant*) * (d->n_values + 1));
+ if (!w)
+ return log_oom();
+
+ d = w;
+ assert_se(hashmap_update(h, json_variant_string(d->name), d) >= 0);
+ } else {
+ _cleanup_(json_variant_unrefp) JsonVariant *n = NULL;
+
+ r = json_variant_new_string(&n, name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate JSON name variant: %m");
+
+ d = malloc0(offsetof(struct json_data, values) + sizeof(JsonVariant*));
+ if (!d)
+ return log_oom();
+
+ r = hashmap_put(h, json_variant_string(n), d);
+ if (r < 0) {
+ free(d);
+ return log_error_errno(r, "Failed to insert JSON name into hashmap: %m");
+ }
+
+ d->name = TAKE_PTR(n);
+ }
+
+ d->values[d->n_values++] = TAKE_PTR(v);
+ return 0;
+}
+
+static int update_json_data_split(
+ Hashmap *h,
+ OutputFlags flags,
+ const Set *output_fields,
+ const void *data,
+ size_t size) {
+
+ size_t fieldlen;
+ const char *eq;
+ char *name;
+
+ assert(h);
+ assert(data || size == 0);
+
+ if (memory_startswith(data, size, "_BOOT_ID="))
+ return 0;
+
+ eq = memchr(data, '=', MIN(size, JSON_THRESHOLD));
+ if (!eq)
+ return 0;
+
+ fieldlen = eq - (const char*) data;
+ if (!journal_field_valid(data, fieldlen, true))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid field.");
+
+ name = strndupa(data, fieldlen);
+ if (output_fields && !set_contains(output_fields, name))
+ return 0;
+
+ return update_json_data(h, flags, name, eq + 1, size - fieldlen - 1);
+}
+
+static int output_json(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ const Set *output_fields,
+ const size_t highlight[2]) {
+
+ char sid[SD_ID128_STRING_MAX], usecbuf[DECIMAL_STR_MAX(usec_t)];
+ _cleanup_(json_variant_unrefp) JsonVariant *object = NULL;
+ _cleanup_free_ char *cursor = NULL;
+ uint64_t realtime, monotonic;
+ JsonVariant **array = NULL;
+ struct json_data *d;
+ sd_id128_t boot_id;
+ Hashmap *h = NULL;
+ size_t n = 0;
+ int r;
+
+ assert(j);
+
+ (void) sd_journal_set_data_threshold(j, flags & OUTPUT_SHOW_ALL ? 0 : JSON_THRESHOLD);
+
+ r = sd_journal_get_realtime_usec(j, &realtime);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get realtime timestamp: %m");
+
+ r = sd_journal_get_monotonic_usec(j, &monotonic, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get monotonic timestamp: %m");
+
+ r = sd_journal_get_cursor(j, &cursor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get cursor: %m");
+
+ h = hashmap_new(&string_hash_ops);
+ if (!h)
+ return log_oom();
+
+ r = update_json_data(h, flags, "__CURSOR", cursor, strlen(cursor));
+ if (r < 0)
+ goto finish;
+
+ xsprintf(usecbuf, USEC_FMT, realtime);
+ r = update_json_data(h, flags, "__REALTIME_TIMESTAMP", usecbuf, strlen(usecbuf));
+ if (r < 0)
+ goto finish;
+
+ xsprintf(usecbuf, USEC_FMT, monotonic);
+ r = update_json_data(h, flags, "__MONOTONIC_TIMESTAMP", usecbuf, strlen(usecbuf));
+ if (r < 0)
+ goto finish;
+
+ sd_id128_to_string(boot_id, sid);
+ r = update_json_data(h, flags, "_BOOT_ID", sid, strlen(sid));
+ if (r < 0)
+ goto finish;
+
+ for (;;) {
+ const void *data;
+ size_t size;
+
+ r = sd_journal_enumerate_data(j, &data, &size);
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Skipping message we can't read: %m");
+ r = 0;
+ goto finish;
+ }
+ if (r < 0) {
+ log_error_errno(r, "Failed to read journal: %m");
+ goto finish;
+ }
+ if (r == 0)
+ break;
+
+ r = update_json_data_split(h, flags, output_fields, data, size);
+ if (r < 0)
+ goto finish;
+ }
+
+ array = new(JsonVariant*, hashmap_size(h)*2);
+ if (!array) {
+ r = log_oom();
+ goto finish;
+ }
+
+ HASHMAP_FOREACH(d, h) {
+ assert(d->n_values > 0);
+
+ array[n++] = json_variant_ref(d->name);
+
+ if (d->n_values == 1)
+ array[n++] = json_variant_ref(d->values[0]);
+ else {
+ _cleanup_(json_variant_unrefp) JsonVariant *q = NULL;
+
+ r = json_variant_new_array(&q, d->values, d->n_values);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create JSON array: %m");
+ goto finish;
+ }
+
+ array[n++] = TAKE_PTR(q);
+ }
+ }
+
+ r = json_variant_new_object(&object, array, n);
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate JSON object: %m");
+ goto finish;
+ }
+
+ json_variant_dump(object,
+ output_mode_to_json_format_flags(mode) |
+ (FLAGS_SET(flags, OUTPUT_COLOR) ? JSON_FORMAT_COLOR : 0),
+ f, NULL);
+
+ r = 0;
+
+finish:
+ while ((d = hashmap_steal_first(h))) {
+ size_t k;
+
+ json_variant_unref(d->name);
+ for (k = 0; k < d->n_values; k++)
+ json_variant_unref(d->values[k]);
+
+ free(d);
+ }
+
+ hashmap_free(h);
+
+ json_variant_unref_many(array, n);
+ free(array);
+
+ return r;
+}
+
+static int output_cat_field(
+ FILE *f,
+ sd_journal *j,
+ OutputFlags flags,
+ int prio,
+ const char *field,
+ const size_t highlight[2]) {
+
+ const char *color_on = "", *color_off = "", *highlight_on = "";
+ const void *data;
+ size_t l, fl;
+ int r;
+
+ if (FLAGS_SET(flags, OUTPUT_COLOR))
+ get_log_colors(prio, &color_on, &color_off, &highlight_on);
+
+ r = sd_journal_get_data(j, field, &data, &l);
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Skipping message we can't read: %m");
+ return 0;
+ }
+ if (r == -ENOENT) /* An entry without the requested field */
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to get data: %m");
+
+ fl = strlen(field);
+ assert(l >= fl + 1);
+ assert(((char*) data)[fl] == '=');
+
+ data = (const uint8_t*) data + fl + 1;
+ l -= fl + 1;
+
+ if (FLAGS_SET(flags, OUTPUT_COLOR)) {
+ if (highlight) {
+ assert(highlight[0] <= highlight[1]);
+ assert(highlight[1] <= l);
+
+ fputs(color_on, f);
+ fwrite((const char*) data, 1, highlight[0], f);
+ fputs(highlight_on, f);
+ fwrite((const char*) data + highlight[0], 1, highlight[1] - highlight[0], f);
+ fputs(color_on, f);
+ fwrite((const char*) data + highlight[1], 1, l - highlight[1], f);
+ fputs(color_off, f);
+ } else {
+ fputs(color_on, f);
+ fwrite((const char*) data, 1, l, f);
+ fputs(color_off, f);
+ }
+ } else
+ fwrite((const char*) data, 1, l, f);
+
+ fputc('\n', f);
+ return 0;
+}
+
+static int output_cat(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ const Set *output_fields,
+ const size_t highlight[2]) {
+
+ int r, prio = LOG_INFO;
+ const char *field;
+
+ assert(j);
+ assert(f);
+
+ (void) sd_journal_set_data_threshold(j, 0);
+
+ if (FLAGS_SET(flags, OUTPUT_COLOR)) {
+ const void *data;
+ size_t l;
+
+ /* Determine priority of this entry, so that we can color it nicely */
+
+ r = sd_journal_get_data(j, "PRIORITY", &data, &l);
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Skipping message we can't read: %m");
+ return 0;
+ }
+ if (r < 0) {
+ if (r != -ENOENT)
+ return log_error_errno(r, "Failed to get data: %m");
+
+ /* An entry without PRIORITY */
+ } else if (l == 10 && memcmp(data, "PRIORITY=", 9) == 0) {
+ char c = ((char*) data)[9];
+
+ if (c >= '0' && c <= '7')
+ prio = c - '0';
+ }
+ }
+
+ if (set_isempty(output_fields))
+ return output_cat_field(f, j, flags, prio, "MESSAGE", highlight);
+
+ SET_FOREACH(field, output_fields) {
+ r = output_cat_field(f, j, flags, prio, field, streq(field, "MESSAGE") ? highlight : NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int (*output_funcs[_OUTPUT_MODE_MAX])(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ const Set *output_fields,
+ const size_t highlight[2]) = {
+
+ [OUTPUT_SHORT] = output_short,
+ [OUTPUT_SHORT_ISO] = output_short,
+ [OUTPUT_SHORT_ISO_PRECISE] = output_short,
+ [OUTPUT_SHORT_PRECISE] = output_short,
+ [OUTPUT_SHORT_MONOTONIC] = output_short,
+ [OUTPUT_SHORT_UNIX] = output_short,
+ [OUTPUT_SHORT_FULL] = output_short,
+ [OUTPUT_VERBOSE] = output_verbose,
+ [OUTPUT_EXPORT] = output_export,
+ [OUTPUT_JSON] = output_json,
+ [OUTPUT_JSON_PRETTY] = output_json,
+ [OUTPUT_JSON_SSE] = output_json,
+ [OUTPUT_JSON_SEQ] = output_json,
+ [OUTPUT_CAT] = output_cat,
+ [OUTPUT_WITH_UNIT] = output_short,
+};
+
+int show_journal_entry(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ char **output_fields,
+ const size_t highlight[2],
+ bool *ellipsized) {
+
+ _cleanup_set_free_ Set *fields = NULL;
+ int r;
+
+ assert(mode >= 0);
+ assert(mode < _OUTPUT_MODE_MAX);
+
+ if (n_columns <= 0)
+ n_columns = columns();
+
+ r = set_put_strdupv(&fields, output_fields);
+ if (r < 0)
+ return r;
+
+ r = output_funcs[mode](f, j, mode, n_columns, flags, fields, highlight);
+
+ if (ellipsized && r > 0)
+ *ellipsized = true;
+
+ return r;
+}
+
+static int maybe_print_begin_newline(FILE *f, OutputFlags *flags) {
+ assert(f);
+ assert(flags);
+
+ if (!(*flags & OUTPUT_BEGIN_NEWLINE))
+ return 0;
+
+ /* Print a beginning new line if that's request, but only once
+ * on the first line we print. */
+
+ fputc('\n', f);
+ *flags &= ~OUTPUT_BEGIN_NEWLINE;
+ return 0;
+}
+
+int show_journal(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ usec_t not_before,
+ unsigned how_many,
+ OutputFlags flags,
+ bool *ellipsized) {
+
+ int r;
+ unsigned line = 0;
+ bool need_seek = false;
+ int warn_cutoff = flags & OUTPUT_WARN_CUTOFF;
+
+ assert(j);
+ assert(mode >= 0);
+ assert(mode < _OUTPUT_MODE_MAX);
+
+ if (how_many == (unsigned) -1)
+ need_seek = true;
+ else {
+ /* Seek to end */
+ r = sd_journal_seek_tail(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to seek to tail: %m");
+
+ r = sd_journal_previous_skip(j, how_many);
+ if (r < 0)
+ return log_error_errno(r, "Failed to skip previous: %m");
+ }
+
+ for (;;) {
+ usec_t usec;
+
+ if (need_seek) {
+ r = sd_journal_next(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to iterate through journal: %m");
+ }
+
+ if (r == 0)
+ break;
+
+ need_seek = true;
+
+ if (not_before > 0) {
+ r = sd_journal_get_monotonic_usec(j, &usec, NULL);
+
+ /* -ESTALE is returned if the timestamp is not from this boot */
+ if (r == -ESTALE)
+ continue;
+ else if (r < 0)
+ return log_error_errno(r, "Failed to get journal time: %m");
+
+ if (usec < not_before)
+ continue;
+ }
+
+ line++;
+ maybe_print_begin_newline(f, &flags);
+
+ r = show_journal_entry(f, j, mode, n_columns, flags, NULL, NULL, ellipsized);
+ if (r < 0)
+ return r;
+ }
+
+ if (warn_cutoff && line < how_many && not_before > 0) {
+ sd_id128_t boot_id;
+ usec_t cutoff = 0;
+
+ /* Check whether the cutoff line is too early */
+
+ r = sd_id128_get_boot(&boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get boot id: %m");
+
+ r = sd_journal_get_cutoff_monotonic_usec(j, boot_id, &cutoff, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get journal cutoff time: %m");
+
+ if (r > 0 && not_before < cutoff) {
+ maybe_print_begin_newline(f, &flags);
+
+ /* If we logged *something* and no permission error happened, than we can reliably
+ * emit the warning about rotation. If we didn't log anything and access errors
+ * happened, emit hint about permissions. Otherwise, give a generic message, since we
+ * can't diagnose the issue. */
+
+ bool noaccess = journal_access_blocked(j);
+
+ if (line == 0 && noaccess)
+ fprintf(f, "Warning: some journal files were not opened due to insufficient permissions.");
+ else if (!noaccess)
+ fprintf(f, "Warning: journal has been rotated since unit was started, output may be incomplete.\n");
+ else
+ fprintf(f, "Warning: journal has been rotated since unit was started and some journal "
+ "files were not opened due to insufficient permissions, output may be incomplete.\n");
+ }
+
+ warn_cutoff = false;
+ }
+
+ return 0;
+}
+
+int add_matches_for_unit(sd_journal *j, const char *unit) {
+ const char *m1, *m2, *m3, *m4;
+ int r;
+
+ assert(j);
+ assert(unit);
+
+ m1 = strjoina("_SYSTEMD_UNIT=", unit);
+ m2 = strjoina("COREDUMP_UNIT=", unit);
+ m3 = strjoina("UNIT=", unit);
+ m4 = strjoina("OBJECT_SYSTEMD_UNIT=", unit);
+
+ (void)(
+ /* Look for messages from the service itself */
+ (r = sd_journal_add_match(j, m1, 0)) ||
+
+ /* Look for coredumps of the service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, "MESSAGE_ID=fc2e22bc6ee647b6b90729ab34a250b1", 0)) ||
+ (r = sd_journal_add_match(j, "_UID=0", 0)) ||
+ (r = sd_journal_add_match(j, m2, 0)) ||
+
+ /* Look for messages from PID 1 about this service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, "_PID=1", 0)) ||
+ (r = sd_journal_add_match(j, m3, 0)) ||
+
+ /* Look for messages from authorized daemons about this service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, "_UID=0", 0)) ||
+ (r = sd_journal_add_match(j, m4, 0))
+ );
+
+ if (r == 0 && endswith(unit, ".slice")) {
+ const char *m5;
+
+ m5 = strjoina("_SYSTEMD_SLICE=", unit);
+
+ /* Show all messages belonging to a slice */
+ (void)(
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m5, 0))
+ );
+ }
+
+ return r;
+}
+
+int add_matches_for_user_unit(sd_journal *j, const char *unit, uid_t uid) {
+ int r;
+ char *m1, *m2, *m3, *m4;
+ char muid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)];
+
+ assert(j);
+ assert(unit);
+
+ m1 = strjoina("_SYSTEMD_USER_UNIT=", unit);
+ m2 = strjoina("USER_UNIT=", unit);
+ m3 = strjoina("COREDUMP_USER_UNIT=", unit);
+ m4 = strjoina("OBJECT_SYSTEMD_USER_UNIT=", unit);
+ sprintf(muid, "_UID="UID_FMT, uid);
+
+ (void) (
+ /* Look for messages from the user service itself */
+ (r = sd_journal_add_match(j, m1, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0)) ||
+
+ /* Look for messages from systemd about this service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m2, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0)) ||
+
+ /* Look for coredumps of the service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m3, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0)) ||
+ (r = sd_journal_add_match(j, "_UID=0", 0)) ||
+
+ /* Look for messages from authorized daemons about this service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m4, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0)) ||
+ (r = sd_journal_add_match(j, "_UID=0", 0))
+ );
+
+ if (r == 0 && endswith(unit, ".slice")) {
+ const char *m5;
+
+ m5 = strjoina("_SYSTEMD_SLICE=", unit);
+
+ /* Show all messages belonging to a slice */
+ (void)(
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m5, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0))
+ );
+ }
+
+ return r;
+}
+
+static int get_boot_id_for_machine(const char *machine, sd_id128_t *boot_id) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, rootfd = -1;
+ char buf[ID128_UUID_STRING_MAX];
+ pid_t pid, child;
+ ssize_t k;
+ int r;
+
+ assert(machine);
+ assert(boot_id);
+
+ if (!machine_name_is_valid(machine))
+ return -EINVAL;
+
+ r = container_get_leader(machine, &pid);
+ if (r < 0)
+ return r;
+
+ r = namespace_open(pid, &pidnsfd, &mntnsfd, NULL, NULL, &rootfd);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
+ return -errno;
+
+ r = namespace_fork("(sd-bootidns)", "(sd-bootid)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ pidnsfd, mntnsfd, -1, -1, rootfd, &child);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ int fd;
+
+ pair[0] = safe_close(pair[0]);
+
+ fd = open("/proc/sys/kernel/random/boot_id", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ _exit(EXIT_FAILURE);
+
+ r = loop_read_exact(fd, buf, 36, false);
+ safe_close(fd);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ k = send(pair[1], buf, 36, MSG_NOSIGNAL);
+ if (k != 36)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ r = wait_for_terminate_and_check("(sd-bootidns)", child, 0);
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS)
+ return -EIO;
+
+ k = recv(pair[0], buf, 36, 0);
+ if (k != 36)
+ return -EIO;
+
+ buf[36] = 0;
+ r = sd_id128_from_string(buf, boot_id);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int add_match_this_boot(sd_journal *j, const char *machine) {
+ char match[9+32+1] = "_BOOT_ID=";
+ sd_id128_t boot_id;
+ int r;
+
+ assert(j);
+
+ if (machine) {
+ r = get_boot_id_for_machine(machine, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get boot id of container %s: %m", machine);
+ } else {
+ r = sd_id128_get_boot(&boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get boot id: %m");
+ }
+
+ sd_id128_to_string(boot_id, match + 9);
+ r = sd_journal_add_match(j, match, strlen(match));
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match: %m");
+
+ r = sd_journal_add_conjunction(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add conjunction: %m");
+
+ return 0;
+}
+
+int show_journal_by_unit(
+ FILE *f,
+ const char *unit,
+ const char *log_namespace,
+ OutputMode mode,
+ unsigned n_columns,
+ usec_t not_before,
+ unsigned how_many,
+ uid_t uid,
+ OutputFlags flags,
+ int journal_open_flags,
+ bool system_unit,
+ bool *ellipsized) {
+
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ int r;
+
+ assert(mode >= 0);
+ assert(mode < _OUTPUT_MODE_MAX);
+ assert(unit);
+
+ if (how_many <= 0)
+ return 0;
+
+ r = sd_journal_open_namespace(&j, log_namespace, journal_open_flags | SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open journal: %m");
+
+ r = add_match_this_boot(j, NULL);
+ if (r < 0)
+ return r;
+
+ if (system_unit)
+ r = add_matches_for_unit(j, unit);
+ else
+ r = add_matches_for_user_unit(j, unit, uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add unit matches: %m");
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *filter;
+
+ filter = journal_make_match_string(j);
+ if (!filter)
+ return log_oom();
+
+ log_debug("Journal filter: %s", filter);
+ }
+
+ return show_journal(f, j, mode, n_columns, not_before, how_many, flags, ellipsized);
+}
diff --git a/src/shared/logs-show.h b/src/shared/logs-show.h
new file mode 100644
index 0000000..71ebe13
--- /dev/null
+++ b/src/shared/logs-show.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "sd-journal.h"
+
+#include "macro.h"
+#include "output-mode.h"
+#include "time-util.h"
+#include "util.h"
+
+int show_journal_entry(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ char **output_fields,
+ const size_t highlight[2],
+ bool *ellipsized);
+int show_journal(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ usec_t not_before,
+ unsigned how_many,
+ OutputFlags flags,
+ bool *ellipsized);
+
+int add_match_this_boot(sd_journal *j, const char *machine);
+
+int add_matches_for_unit(
+ sd_journal *j,
+ const char *unit);
+
+int add_matches_for_user_unit(
+ sd_journal *j,
+ const char *unit,
+ uid_t uid);
+
+int show_journal_by_unit(
+ FILE *f,
+ const char *unit,
+ const char *namespace,
+ OutputMode mode,
+ unsigned n_columns,
+ usec_t not_before,
+ unsigned how_many,
+ uid_t uid,
+ OutputFlags flags,
+ int journal_open_flags,
+ bool system_unit,
+ bool *ellipsized);
+
+void json_escape(
+ FILE *f,
+ const char* p,
+ size_t l,
+ OutputFlags flags);
diff --git a/src/shared/loop-util.c b/src/shared/loop-util.c
new file mode 100644
index 0000000..84f415a
--- /dev/null
+++ b/src/shared/loop-util.c
@@ -0,0 +1,722 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_VALGRIND_MEMCHECK_H
+#include <valgrind/memcheck.h>
+#endif
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/blkpg.h>
+#include <linux/fs.h>
+#include <linux/loop.h>
+#include <sys/file.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "blockdev-util.h"
+#include "device-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "loop-util.h"
+#include "missing_loop.h"
+#include "parse-util.h"
+#include "random-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+
+static void cleanup_clear_loop_close(int *fd) {
+ if (*fd < 0)
+ return;
+
+ (void) ioctl(*fd, LOOP_CLR_FD);
+ (void) safe_close(*fd);
+}
+
+static int loop_is_bound(int fd) {
+ struct loop_info64 info;
+
+ assert(fd >= 0);
+
+ if (ioctl(fd, LOOP_GET_STATUS64, &info) < 0) {
+ if (errno == ENXIO)
+ return false; /* not bound! */
+
+ return -errno;
+ }
+
+ return true; /* bound! */
+}
+
+static int device_has_block_children(sd_device *d) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ const char *main_sn, *main_ss;
+ sd_device *q;
+ int r;
+
+ assert(d);
+
+ /* Checks if the specified device currently has block device children (i.e. partition block
+ * devices). */
+
+ r = sd_device_get_sysname(d, &main_sn);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_subsystem(d, &main_ss);
+ if (r < 0)
+ return r;
+
+ if (!streq(main_ss, "block"))
+ return -EINVAL;
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_parent(e, d);
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, q) {
+ const char *ss, *sn;
+
+ r = sd_device_get_subsystem(q, &ss);
+ if (r < 0)
+ continue;
+
+ if (!streq(ss, "block"))
+ continue;
+
+ r = sd_device_get_sysname(q, &sn);
+ if (r < 0)
+ continue;
+
+ if (streq(sn, main_sn))
+ continue;
+
+ return 1; /* we have block device children */
+ }
+
+ return 0;
+}
+
+static int loop_configure(
+ int fd,
+ int nr,
+ const struct loop_config *c,
+ bool *try_loop_configure) {
+
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ _cleanup_free_ char *sysname = NULL;
+ _cleanup_close_ int lock_fd = -1;
+ int r;
+
+ assert(fd >= 0);
+ assert(nr >= 0);
+ assert(c);
+ assert(try_loop_configure);
+
+ if (asprintf(&sysname, "loop%i", nr) < 0)
+ return -ENOMEM;
+
+ r = sd_device_new_from_subsystem_sysname(&d, "block", sysname);
+ if (r < 0)
+ return r;
+
+ /* Let's lock the device before we do anything. We take the BSD lock on a second, separately opened
+ * fd for the device. udev after all watches for close() events (specifically IN_CLOSE_WRITE) on
+ * block devices to reprobe them, hence by having a separate fd we will later close() we can ensure
+ * we trigger udev after everything is done. If we'd lock our own fd instead and keep it open for a
+ * long time udev would possibly never run on it again, even though the fd is unlocked, simply
+ * because we never close() it. It also has the nice benefit we can use the _cleanup_close_ logic to
+ * automatically release the lock, after we are done. */
+ lock_fd = fd_reopen(fd, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (lock_fd < 0)
+ return lock_fd;
+ if (flock(lock_fd, LOCK_EX) < 0)
+ return -errno;
+
+ /* Let's see if the device is really detached, i.e. currently has no associated partition block
+ * devices. On various kernels (such as 5.8) it is possible to have a loopback block device that
+ * superficially is detached but still has partition block devices associated for it. They only go
+ * away when the device is reattached. (Yes, LOOP_CLR_FD doesn't work then, because officially
+ * nothing is attached and LOOP_CTL_REMOVE doesn't either, since it doesn't care about partition
+ * block devices. */
+ r = device_has_block_children(d);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ r = loop_is_bound(fd);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return -EBUSY;
+
+ return -EUCLEAN; /* Bound but children? Tell caller to reattach something so that the
+ * partition block devices are gone too. */
+ }
+
+ if (*try_loop_configure) {
+ if (ioctl(fd, LOOP_CONFIGURE, c) < 0) {
+ /* Do fallback only if LOOP_CONFIGURE is not supported, propagate all other
+ * errors. Note that the kernel is weird: non-existing ioctls currently return EINVAL
+ * rather than ENOTTY on loopback block devices. They should fix that in the kernel,
+ * but in the meantime we accept both here. */
+ if (!ERRNO_IS_NOT_SUPPORTED(errno) && errno != EINVAL)
+ return -errno;
+
+ *try_loop_configure = false;
+ } else {
+ bool good = true;
+
+ if (c->info.lo_sizelimit != 0) {
+ /* Kernel 5.8 vanilla doesn't properly propagate the size limit into the
+ * block device. If it's used, let's immediately check if it had the desired
+ * effect hence. And if not use classic LOOP_SET_STATUS64. */
+ uint64_t z;
+
+ if (ioctl(fd, BLKGETSIZE64, &z) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (z != c->info.lo_sizelimit) {
+ log_debug("LOOP_CONFIGURE is broken, doesn't honour .lo_sizelimit. Falling back to LOOP_SET_STATUS64.");
+ good = false;
+ }
+ }
+
+ if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_PARTSCAN)) {
+ /* Kernel 5.8 vanilla doesn't properly propagate the partition scanning flag
+ * into the block device. Let's hence verify if things work correctly here
+ * before returning. */
+
+ r = blockdev_partscan_enabled(fd);
+ if (r < 0)
+ goto fail;
+ if (r == 0) {
+ log_debug("LOOP_CONFIGURE is broken, doesn't honour LO_FLAGS_PARTSCAN. Falling back to LOOP_SET_STATUS64.");
+ good = false;
+ }
+ }
+
+ if (!good) {
+ /* LOOP_CONFIGURE doesn't work. Remember that. */
+ *try_loop_configure = false;
+
+ /* We return EBUSY here instead of retrying immediately with LOOP_SET_FD,
+ * because LOOP_CLR_FD is async: if the operation cannot be executed right
+ * away it just sets the autoclear flag on the device. This means there's a
+ * good chance we cannot actually reuse the loopback device right-away. Hence
+ * let's assume it's busy, avoid the trouble and let the calling loop call us
+ * again with a new, likely unused device. */
+ r = -EBUSY;
+ goto fail;
+ }
+
+ return 0;
+ }
+ }
+
+ /* Since kernel commit 5db470e229e22b7eda6e23b5566e532c96fb5bc3 (kernel v5.0) the LOOP_SET_STATUS64
+ * ioctl can return EAGAIN in case we change the lo_offset field, if someone else is accessing the
+ * block device while we try to reconfigure it. This is a pretty common case, since udev might
+ * instantly start probing the device as soon as we attach an fd to it. Hence handle it in two ways:
+ * first, let's take the BSD lock to ensure that udev will not step in between the point in
+ * time where we attach the fd and where we reconfigure the device. Secondly, let's wait 50ms on
+ * EAGAIN and retry. The former should be an efficient mechanism to avoid we have to wait 50ms
+ * needlessly if we are just racing against udev. The latter is protection against all other cases,
+ * i.e. peers that do not take the BSD lock. */
+
+ if (ioctl(fd, LOOP_SET_FD, c->fd) < 0)
+ return -errno;
+
+ for (unsigned n_attempts = 0;;) {
+ if (ioctl(fd, LOOP_SET_STATUS64, &c->info) >= 0)
+ break;
+ if (errno != EAGAIN || ++n_attempts >= 64) {
+ r = log_debug_errno(errno, "Failed to configure loopback device: %m");
+ goto fail;
+ }
+
+ /* Sleep some random time, but at least 10ms, at most 250ms. Increase the delay the more
+ * failed attempts we see */
+ (void) usleep(UINT64_C(10) * USEC_PER_MSEC +
+ random_u64() % (UINT64_C(240) * USEC_PER_MSEC * n_attempts/64));
+ }
+
+ return 0;
+
+fail:
+ (void) ioctl(fd, LOOP_CLR_FD);
+ return r;
+}
+
+static int attach_empty_file(int loop, int nr) {
+ _cleanup_close_ int fd = -1;
+
+ /* So here's the thing: on various kernels (5.8 at least) loop block devices might enter a state
+ * where they are detached but nonetheless have partitions, when used heavily. Accessing these
+ * partitions results in immediatey IO errors. There's no pretty way to get rid of them
+ * again. Neither LOOP_CLR_FD nor LOOP_CTL_REMOVE suffice (see above). What does work is to
+ * reassociate them with a new fd however. This is what we do here hence: we associate the devices
+ * with an empty file (i.e. an image that definitely has no partitions). We then immediately clear it
+ * again. This suffices to make the partitions go away. Ugly but appears to work. */
+
+ log_debug("Found unattached loopback block device /dev/loop%i with partitions. Attaching empty file to remove them.", nr);
+
+ fd = open_tmpfile_unlinkable(NULL, O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ if (flock(loop, LOCK_EX) < 0)
+ return -errno;
+
+ if (ioctl(loop, LOOP_SET_FD, fd) < 0)
+ return -errno;
+
+ if (ioctl(loop, LOOP_SET_STATUS64, &(struct loop_info64) {
+ .lo_flags = LO_FLAGS_READ_ONLY|
+ LO_FLAGS_AUTOCLEAR|
+ LO_FLAGS_PARTSCAN, /* enable partscan, so that the partitions really go away */
+ }) < 0)
+ return -errno;
+
+ if (ioctl(loop, LOOP_CLR_FD) < 0)
+ return -errno;
+
+ /* The caller is expected to immediately close the loopback device after this, so that the BSD lock
+ * is released, and udev sees the changes. */
+ return 0;
+}
+
+int loop_device_make(
+ int fd,
+ int open_flags,
+ uint64_t offset,
+ uint64_t size,
+ uint32_t loop_flags,
+ LoopDevice **ret) {
+
+ _cleanup_free_ char *loopdev = NULL;
+ bool try_loop_configure = true;
+ struct loop_config config;
+ LoopDevice *d = NULL;
+ struct stat st;
+ int nr = -1, r;
+
+ assert(fd >= 0);
+ assert(ret);
+ assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (S_ISBLK(st.st_mode)) {
+ if (ioctl(fd, LOOP_GET_STATUS64, &config.info) >= 0) {
+ /* Oh! This is a loopback device? That's interesting! */
+
+#if HAVE_VALGRIND_MEMCHECK_H
+ /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
+ VALGRIND_MAKE_MEM_DEFINED(&config.info, sizeof(config.info));
+#endif
+ nr = config.info.lo_number;
+
+ if (asprintf(&loopdev, "/dev/loop%i", nr) < 0)
+ return -ENOMEM;
+ }
+
+ if (offset == 0 && IN_SET(size, 0, UINT64_MAX)) {
+ _cleanup_close_ int copy = -1;
+
+ /* If this is already a block device, store a copy of the fd as it is */
+
+ copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (copy < 0)
+ return -errno;
+
+ d = new(LoopDevice, 1);
+ if (!d)
+ return -ENOMEM;
+ *d = (LoopDevice) {
+ .fd = TAKE_FD(copy),
+ .nr = nr,
+ .node = TAKE_PTR(loopdev),
+ .relinquished = true, /* It's not allocated by us, don't destroy it when this object is freed */
+ };
+
+ *ret = d;
+ return d->fd;
+ }
+ } else {
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ return r;
+ }
+
+ _cleanup_close_ int control = -1;
+ _cleanup_(cleanup_clear_loop_close) int loop_with_fd = -1;
+
+ control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (control < 0)
+ return -errno;
+
+ config = (struct loop_config) {
+ .fd = fd,
+ .info = {
+ /* Use the specified flags, but configure the read-only flag from the open flags, and force autoclear */
+ .lo_flags = (loop_flags & ~LO_FLAGS_READ_ONLY) | ((open_flags & O_ACCMODE) == O_RDONLY ? LO_FLAGS_READ_ONLY : 0) | LO_FLAGS_AUTOCLEAR,
+ .lo_offset = offset,
+ .lo_sizelimit = size == UINT64_MAX ? 0 : size,
+ },
+ };
+
+ /* Loop around LOOP_CTL_GET_FREE, since at the moment we attempt to open the returned device it might
+ * be gone already, taken by somebody else racing against us. */
+ for (unsigned n_attempts = 0;;) {
+ _cleanup_close_ int loop = -1;
+
+ nr = ioctl(control, LOOP_CTL_GET_FREE);
+ if (nr < 0)
+ return -errno;
+
+ if (asprintf(&loopdev, "/dev/loop%i", nr) < 0)
+ return -ENOMEM;
+
+ loop = open(loopdev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
+ if (loop < 0) {
+ /* Somebody might've gotten the same number from the kernel, used the device,
+ * and called LOOP_CTL_REMOVE on it. Let's retry with a new number. */
+ if (!IN_SET(errno, ENOENT, ENXIO))
+ return -errno;
+ } else {
+ r = loop_configure(loop, nr, &config, &try_loop_configure);
+ if (r >= 0) {
+ loop_with_fd = TAKE_FD(loop);
+ break;
+ }
+ if (r == -EUCLEAN) {
+ /* Make left-over partition disappear hack (see above) */
+ r = attach_empty_file(loop, nr);
+ if (r < 0 && r != -EBUSY)
+ return r;
+ } else if (r != -EBUSY)
+ return r;
+ }
+
+ if (++n_attempts >= 64) /* Give up eventually */
+ return -EBUSY;
+
+ loopdev = mfree(loopdev);
+
+ /* Wait some random time, to make collision less likely. Let's pick a random time in the
+ * range 0ms…250ms, linearly scaled by the number of failed attempts. */
+ (void) usleep(random_u64() % (UINT64_C(10) * USEC_PER_MSEC +
+ UINT64_C(240) * USEC_PER_MSEC * n_attempts/64));
+ }
+
+ d = new(LoopDevice, 1);
+ if (!d)
+ return -ENOMEM;
+ *d = (LoopDevice) {
+ .fd = TAKE_FD(loop_with_fd),
+ .node = TAKE_PTR(loopdev),
+ .nr = nr,
+ };
+
+ *ret = d;
+ return 0;
+}
+
+int loop_device_make_by_path(const char *path, int open_flags, uint32_t loop_flags, LoopDevice **ret) {
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(path);
+ assert(ret);
+ assert(open_flags < 0 || IN_SET(open_flags, O_RDWR, O_RDONLY));
+
+ /* Passing < 0 as open_flags here means we'll try to open the device writable if we can, retrying
+ * read-only if we cannot. */
+
+ fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|(open_flags >= 0 ? open_flags : O_RDWR));
+ if (fd < 0) {
+ r = -errno;
+
+ /* Retry read-only? */
+ if (open_flags >= 0 || !(ERRNO_IS_PRIVILEGE(r) || r == -EROFS))
+ return r;
+
+ fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|O_RDONLY);
+ if (fd < 0)
+ return r; /* Propagate original error */
+
+ open_flags = O_RDONLY;
+ } else if (open_flags < 0)
+ open_flags = O_RDWR;
+
+ return loop_device_make(fd, open_flags, 0, 0, loop_flags, ret);
+}
+
+LoopDevice* loop_device_unref(LoopDevice *d) {
+ if (!d)
+ return NULL;
+
+ if (d->fd >= 0) {
+ /* Implicitly sync the device, since otherwise in-flight blocks might not get written */
+ if (fsync(d->fd) < 0)
+ log_debug_errno(errno, "Failed to sync loop block device, ignoring: %m");
+
+ if (d->nr >= 0 && !d->relinquished) {
+ if (ioctl(d->fd, LOOP_CLR_FD) < 0)
+ log_debug_errno(errno, "Failed to clear loop device: %m");
+
+ }
+
+ safe_close(d->fd);
+ }
+
+ if (d->nr >= 0 && !d->relinquished) {
+ _cleanup_close_ int control = -1;
+
+ control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (control < 0)
+ log_warning_errno(errno,
+ "Failed to open loop control device, cannot remove loop device %s: %m",
+ strna(d->node));
+ else
+ for (unsigned n_attempts = 0;;) {
+ if (ioctl(control, LOOP_CTL_REMOVE, d->nr) >= 0)
+ break;
+ if (errno != EBUSY || ++n_attempts >= 64) {
+ log_warning_errno(errno, "Failed to remove device %s: %m", strna(d->node));
+ break;
+ }
+ (void) usleep(50 * USEC_PER_MSEC);
+ }
+ }
+
+ free(d->node);
+ return mfree(d);
+}
+
+void loop_device_relinquish(LoopDevice *d) {
+ assert(d);
+
+ /* Don't attempt to clean up the loop device anymore from this point on. Leave the clean-ing up to the kernel
+ * itself, using the loop device "auto-clear" logic we already turned on when creating the device. */
+
+ d->relinquished = true;
+}
+
+int loop_device_open(const char *loop_path, int open_flags, LoopDevice **ret) {
+ _cleanup_close_ int loop_fd = -1;
+ _cleanup_free_ char *p = NULL;
+ struct loop_info64 info;
+ struct stat st;
+ LoopDevice *d;
+ int nr;
+
+ assert(loop_path);
+ assert(ret);
+
+ loop_fd = open(loop_path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
+ if (loop_fd < 0)
+ return -errno;
+
+ if (fstat(loop_fd, &st) < 0)
+ return -errno;
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTBLK;
+
+ if (ioctl(loop_fd, LOOP_GET_STATUS64, &info) >= 0) {
+#if HAVE_VALGRIND_MEMCHECK_H
+ /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
+ VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
+#endif
+ nr = info.lo_number;
+ } else
+ nr = -1;
+
+ p = strdup(loop_path);
+ if (!p)
+ return -ENOMEM;
+
+ d = new(LoopDevice, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (LoopDevice) {
+ .fd = TAKE_FD(loop_fd),
+ .nr = nr,
+ .node = TAKE_PTR(p),
+ .relinquished = true, /* It's not ours, don't try to destroy it when this object is freed */
+ };
+
+ *ret = d;
+ return d->fd;
+}
+
+static int resize_partition(int partition_fd, uint64_t offset, uint64_t size) {
+ char sysfs[STRLEN("/sys/dev/block/:/partition") + 2*DECIMAL_STR_MAX(dev_t) + 1];
+ _cleanup_free_ char *whole = NULL, *buffer = NULL;
+ uint64_t current_offset, current_size, partno;
+ _cleanup_close_ int whole_fd = -1;
+ struct stat st;
+ dev_t devno;
+ int r;
+
+ assert(partition_fd >= 0);
+
+ /* Resizes the partition the loopback device refer to (assuming it refers to one instead of an actual
+ * loopback device), and changes the offset, if needed. This is a fancy wrapper around
+ * BLKPG_RESIZE_PARTITION. */
+
+ if (fstat(partition_fd, &st) < 0)
+ return -errno;
+
+ assert(S_ISBLK(st.st_mode));
+
+ xsprintf(sysfs, "/sys/dev/block/%u:%u/partition", major(st.st_rdev), minor(st.st_rdev));
+ r = read_one_line_file(sysfs, &buffer);
+ if (r == -ENOENT) /* not a partition, cannot resize */
+ return -ENOTTY;
+ if (r < 0)
+ return r;
+ r = safe_atou64(buffer, &partno);
+ if (r < 0)
+ return r;
+
+ xsprintf(sysfs, "/sys/dev/block/%u:%u/start", major(st.st_rdev), minor(st.st_rdev));
+
+ buffer = mfree(buffer);
+ r = read_one_line_file(sysfs, &buffer);
+ if (r < 0)
+ return r;
+ r = safe_atou64(buffer, &current_offset);
+ if (r < 0)
+ return r;
+ if (current_offset > UINT64_MAX/512U)
+ return -EINVAL;
+ current_offset *= 512U;
+
+ if (ioctl(partition_fd, BLKGETSIZE64, &current_size) < 0)
+ return -EINVAL;
+
+ if (size == UINT64_MAX && offset == UINT64_MAX)
+ return 0;
+ if (current_size == size && current_offset == offset)
+ return 0;
+
+ xsprintf(sysfs, "/sys/dev/block/%u:%u/../dev", major(st.st_rdev), minor(st.st_rdev));
+
+ buffer = mfree(buffer);
+ r = read_one_line_file(sysfs, &buffer);
+ if (r < 0)
+ return r;
+ r = parse_dev(buffer, &devno);
+ if (r < 0)
+ return r;
+
+ r = device_path_make_major_minor(S_IFBLK, devno, &whole);
+ if (r < 0)
+ return r;
+
+ whole_fd = open(whole, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (whole_fd < 0)
+ return -errno;
+
+ struct blkpg_partition bp = {
+ .pno = partno,
+ .start = offset == UINT64_MAX ? current_offset : offset,
+ .length = size == UINT64_MAX ? current_size : size,
+ };
+
+ struct blkpg_ioctl_arg ba = {
+ .op = BLKPG_RESIZE_PARTITION,
+ .data = &bp,
+ .datalen = sizeof(bp),
+ };
+
+ if (ioctl(whole_fd, BLKPG, &ba) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size) {
+ struct loop_info64 info;
+ assert(d);
+
+ /* Changes the offset/start of the loop device relative to the beginning of the underlying file or
+ * block device. If this loop device actually refers to a partition and not a loopback device, we'll
+ * try to adjust the partition offsets instead.
+ *
+ * If either offset or size is UINT64_MAX we won't change that parameter. */
+
+ if (d->fd < 0)
+ return -EBADF;
+
+ if (d->nr < 0) /* not a loopback device */
+ return resize_partition(d->fd, offset, size);
+
+ if (ioctl(d->fd, LOOP_GET_STATUS64, &info) < 0)
+ return -errno;
+
+#if HAVE_VALGRIND_MEMCHECK_H
+ /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
+ VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
+#endif
+
+ if (size == UINT64_MAX && offset == UINT64_MAX)
+ return 0;
+ if (info.lo_sizelimit == size && info.lo_offset == offset)
+ return 0;
+
+ if (size != UINT64_MAX)
+ info.lo_sizelimit = size;
+ if (offset != UINT64_MAX)
+ info.lo_offset = offset;
+
+ if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int loop_device_flock(LoopDevice *d, int operation) {
+ assert(d);
+
+ if (d->fd < 0)
+ return -EBADF;
+
+ if (flock(d->fd, operation) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int loop_device_sync(LoopDevice *d) {
+ assert(d);
+
+ /* We also do this implicitly in loop_device_unref(). Doing this explicitly here has the benefit that
+ * we can check the return value though. */
+
+ if (d->fd < 0)
+ return -EBADF;
+
+ if (fsync(d->fd) < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/shared/loop-util.h b/src/shared/loop-util.h
new file mode 100644
index 0000000..9538dae
--- /dev/null
+++ b/src/shared/loop-util.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "macro.h"
+
+typedef struct LoopDevice LoopDevice;
+
+/* Some helpers for setting up loopback block devices */
+
+struct LoopDevice {
+ int fd;
+ int nr;
+ char *node;
+ bool relinquished;
+};
+
+int loop_device_make(int fd, int open_flags, uint64_t offset, uint64_t size, uint32_t loop_flags, LoopDevice **ret);
+int loop_device_make_by_path(const char *path, int open_flags, uint32_t loop_flags, LoopDevice **ret);
+int loop_device_open(const char *loop_path, int open_flags, LoopDevice **ret);
+
+LoopDevice* loop_device_unref(LoopDevice *d);
+DEFINE_TRIVIAL_CLEANUP_FUNC(LoopDevice*, loop_device_unref);
+
+void loop_device_relinquish(LoopDevice *d);
+
+int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size);
+
+int loop_device_flock(LoopDevice *d, int operation);
+int loop_device_sync(LoopDevice *d);
diff --git a/src/shared/machine-image.c b/src/shared/machine-image.c
new file mode 100644
index 0000000..671a56b
--- /dev/null
+++ b/src/shared/machine-image.c
@@ -0,0 +1,1274 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fs.h>
+#include <linux/loop.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/file.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "chattr-util.h"
+#include "copy.h"
+#include "dirent-util.h"
+#include "dissect-image.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "lockfile-util.h"
+#include "log.h"
+#include "loop-util.h"
+#include "machine-image.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "nulstr-util.h"
+#include "os-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "utf8.h"
+#include "xattr-util.h"
+
+static const char* const image_search_path[_IMAGE_CLASS_MAX] = {
+ [IMAGE_MACHINE] = "/etc/machines\0" /* only place symlinks here */
+ "/run/machines\0" /* and here too */
+ "/var/lib/machines\0" /* the main place for images */
+ "/var/lib/container\0" /* legacy */
+ "/usr/local/lib/machines\0"
+ "/usr/lib/machines\0",
+
+ [IMAGE_PORTABLE] = "/etc/portables\0" /* only place symlinks here */
+ "/run/portables\0" /* and here too */
+ "/var/lib/portables\0" /* the main place for images */
+ "/usr/local/lib/portables\0"
+ "/usr/lib/portables\0",
+};
+
+static Image *image_free(Image *i) {
+ assert(i);
+
+ free(i->name);
+ free(i->path);
+
+ free(i->hostname);
+ strv_free(i->machine_info);
+ strv_free(i->os_release);
+
+ return mfree(i);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(Image, image, image_free);
+DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(image_hash_ops, char, string_hash_func, string_compare_func,
+ Image, image_unref);
+
+static char **image_settings_path(Image *image) {
+ _cleanup_strv_free_ char **l = NULL;
+ const char *fn, *s;
+ unsigned i = 0;
+
+ assert(image);
+
+ l = new0(char*, 4);
+ if (!l)
+ return NULL;
+
+ fn = strjoina(image->name, ".nspawn");
+
+ FOREACH_STRING(s, "/etc/systemd/nspawn", "/run/systemd/nspawn") {
+ l[i] = path_join(s, fn);
+ if (!l[i])
+ return NULL;
+
+ i++;
+ }
+
+ l[i] = file_in_same_dir(image->path, fn);
+ if (!l[i])
+ return NULL;
+
+ return TAKE_PTR(l);
+}
+
+static char *image_roothash_path(Image *image) {
+ const char *fn;
+
+ assert(image);
+
+ fn = strjoina(image->name, ".roothash");
+
+ return file_in_same_dir(image->path, fn);
+}
+
+static int image_new(
+ ImageType t,
+ const char *pretty,
+ const char *path,
+ const char *filename,
+ bool read_only,
+ usec_t crtime,
+ usec_t mtime,
+ Image **ret) {
+
+ _cleanup_(image_unrefp) Image *i = NULL;
+
+ assert(t >= 0);
+ assert(t < _IMAGE_TYPE_MAX);
+ assert(pretty);
+ assert(filename);
+ assert(ret);
+
+ i = new0(Image, 1);
+ if (!i)
+ return -ENOMEM;
+
+ i->n_ref = 1;
+ i->type = t;
+ i->read_only = read_only;
+ i->crtime = crtime;
+ i->mtime = mtime;
+ i->usage = i->usage_exclusive = (uint64_t) -1;
+ i->limit = i->limit_exclusive = (uint64_t) -1;
+
+ i->name = strdup(pretty);
+ if (!i->name)
+ return -ENOMEM;
+
+ i->path = path_join(path, filename);
+ if (!i->path)
+ return -ENOMEM;
+
+ path_simplify(i->path, false);
+
+ *ret = TAKE_PTR(i);
+
+ return 0;
+}
+
+static int extract_pretty(const char *path, const char *suffix, char **ret) {
+ _cleanup_free_ char *name = NULL;
+ const char *p;
+ size_t n;
+
+ assert(path);
+ assert(ret);
+
+ p = last_path_component(path);
+ n = strcspn(p, "/");
+
+ name = strndup(p, n);
+ if (!name)
+ return -ENOMEM;
+
+ if (suffix) {
+ char *e;
+
+ e = endswith(name, suffix);
+ if (!e)
+ return -EINVAL;
+
+ *e = 0;
+ }
+
+ if (!image_name_is_valid(name))
+ return -EINVAL;
+
+ *ret = TAKE_PTR(name);
+ return 0;
+}
+
+static int image_make(
+ const char *pretty,
+ int dfd,
+ const char *path,
+ const char *filename,
+ const struct stat *st,
+ Image **ret) {
+
+ _cleanup_free_ char *pretty_buffer = NULL, *parent = NULL;
+ struct stat stbuf;
+ bool read_only;
+ int r;
+
+ assert(dfd >= 0 || dfd == AT_FDCWD);
+ assert(path || dfd == AT_FDCWD);
+ assert(filename);
+
+ /* We explicitly *do* follow symlinks here, since we want to allow symlinking trees, raw files and block
+ * devices into /var/lib/machines/, and treat them normally.
+ *
+ * This function returns -ENOENT if we can't find the image after all, and -EMEDIUMTYPE if it's not a file we
+ * recognize. */
+
+ if (!st) {
+ if (fstatat(dfd, filename, &stbuf, 0) < 0)
+ return -errno;
+
+ st = &stbuf;
+ }
+
+ if (!path) {
+ if (dfd == AT_FDCWD)
+ (void) safe_getcwd(&parent);
+ else
+ (void) fd_get_path(dfd, &parent);
+ }
+
+ read_only =
+ (path && path_startswith(path, "/usr")) ||
+ (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
+
+ if (S_ISDIR(st->st_mode)) {
+ _cleanup_close_ int fd = -1;
+ unsigned file_attr = 0;
+
+ if (!ret)
+ return 0;
+
+ if (!pretty) {
+ r = extract_pretty(filename, NULL, &pretty_buffer);
+ if (r < 0)
+ return r;
+
+ pretty = pretty_buffer;
+ }
+
+ fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
+ if (fd < 0)
+ return -errno;
+
+ /* btrfs subvolumes have inode 256 */
+ if (st->st_ino == 256) {
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (r) {
+ BtrfsSubvolInfo info;
+
+ /* It's a btrfs subvolume */
+
+ r = btrfs_subvol_get_info_fd(fd, 0, &info);
+ if (r < 0)
+ return r;
+
+ r = image_new(IMAGE_SUBVOLUME,
+ pretty,
+ path,
+ filename,
+ info.read_only || read_only,
+ info.otime,
+ 0,
+ ret);
+ if (r < 0)
+ return r;
+
+ if (btrfs_quota_scan_ongoing(fd) == 0) {
+ BtrfsQuotaInfo quota;
+
+ r = btrfs_subvol_get_subtree_quota_fd(fd, 0, &quota);
+ if (r >= 0) {
+ (*ret)->usage = quota.referenced;
+ (*ret)->usage_exclusive = quota.exclusive;
+
+ (*ret)->limit = quota.referenced_max;
+ (*ret)->limit_exclusive = quota.exclusive_max;
+ }
+ }
+
+ return 0;
+ }
+ }
+
+ /* If the IMMUTABLE bit is set, we consider the
+ * directory read-only. Since the ioctl is not
+ * supported everywhere we ignore failures. */
+ (void) read_attr_fd(fd, &file_attr);
+
+ /* It's just a normal directory. */
+ r = image_new(IMAGE_DIRECTORY,
+ pretty,
+ path,
+ filename,
+ read_only || (file_attr & FS_IMMUTABLE_FL),
+ 0,
+ 0,
+ ret);
+ if (r < 0)
+ return r;
+
+ return 0;
+
+ } else if (S_ISREG(st->st_mode) && endswith(filename, ".raw")) {
+ usec_t crtime = 0;
+
+ /* It's a RAW disk image */
+
+ if (!ret)
+ return 0;
+
+ (void) fd_getcrtime_at(dfd, filename, &crtime, 0);
+
+ if (!pretty) {
+ r = extract_pretty(filename, ".raw", &pretty_buffer);
+ if (r < 0)
+ return r;
+
+ pretty = pretty_buffer;
+ }
+
+ r = image_new(IMAGE_RAW,
+ pretty,
+ path,
+ filename,
+ !(st->st_mode & 0222) || read_only,
+ crtime,
+ timespec_load(&st->st_mtim),
+ ret);
+ if (r < 0)
+ return r;
+
+ (*ret)->usage = (*ret)->usage_exclusive = st->st_blocks * 512;
+ (*ret)->limit = (*ret)->limit_exclusive = st->st_size;
+
+ return 0;
+
+ } else if (S_ISBLK(st->st_mode)) {
+ _cleanup_close_ int block_fd = -1;
+ uint64_t size = UINT64_MAX;
+
+ /* A block device */
+
+ if (!ret)
+ return 0;
+
+ if (!pretty) {
+ r = extract_pretty(filename, NULL, &pretty_buffer);
+ if (r < 0)
+ return r;
+
+ pretty = pretty_buffer;
+ }
+
+ block_fd = openat(dfd, filename, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
+ if (block_fd < 0)
+ log_debug_errno(errno, "Failed to open block device %s/%s, ignoring: %m", path ?: strnull(parent), filename);
+ else {
+ /* Refresh stat data after opening the node */
+ if (fstat(block_fd, &stbuf) < 0)
+ return -errno;
+ st = &stbuf;
+
+ if (!S_ISBLK(st->st_mode)) /* Verify that what we opened is actually what we think it is */
+ return -ENOTTY;
+
+ if (!read_only) {
+ int state = 0;
+
+ if (ioctl(block_fd, BLKROGET, &state) < 0)
+ log_debug_errno(errno, "Failed to issue BLKROGET on device %s/%s, ignoring: %m", path ?: strnull(parent), filename);
+ else if (state)
+ read_only = true;
+ }
+
+ if (ioctl(block_fd, BLKGETSIZE64, &size) < 0)
+ log_debug_errno(errno, "Failed to issue BLKGETSIZE64 on device %s/%s, ignoring: %m", path ?: strnull(parent), filename);
+
+ block_fd = safe_close(block_fd);
+ }
+
+ r = image_new(IMAGE_BLOCK,
+ pretty,
+ path,
+ filename,
+ !(st->st_mode & 0222) || read_only,
+ 0,
+ 0,
+ ret);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(size, 0, UINT64_MAX))
+ (*ret)->usage = (*ret)->usage_exclusive = (*ret)->limit = (*ret)->limit_exclusive = size;
+
+ return 0;
+ }
+
+ return -EMEDIUMTYPE;
+}
+
+int image_find(ImageClass class, const char *name, Image **ret) {
+ const char *path;
+ int r;
+
+ assert(class >= 0);
+ assert(class < _IMAGE_CLASS_MAX);
+ assert(name);
+
+ /* There are no images with invalid names */
+ if (!image_name_is_valid(name))
+ return -ENOENT;
+
+ NULSTR_FOREACH(path, image_search_path[class]) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct stat st;
+
+ d = opendir(path);
+ if (!d) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ /* As mentioned above, we follow symlinks on this fstatat(), because we want to permit people to
+ * symlink block devices into the search path */
+ if (fstatat(dirfd(d), name, &st, 0) < 0) {
+ _cleanup_free_ char *raw = NULL;
+
+ if (errno != ENOENT)
+ return -errno;
+
+ raw = strjoin(name, ".raw");
+ if (!raw)
+ return -ENOMEM;
+
+ if (fstatat(dirfd(d), raw, &st, 0) < 0) {
+
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ if (!S_ISREG(st.st_mode))
+ continue;
+
+ r = image_make(name, dirfd(d), path, raw, &st, ret);
+
+ } else {
+ if (!S_ISDIR(st.st_mode) && !S_ISBLK(st.st_mode))
+ continue;
+
+ r = image_make(name, dirfd(d), path, name, &st, ret);
+ }
+ if (IN_SET(r, -ENOENT, -EMEDIUMTYPE))
+ continue;
+ if (r < 0)
+ return r;
+
+ if (ret)
+ (*ret)->discoverable = true;
+
+ return 1;
+ }
+
+ if (class == IMAGE_MACHINE && streq(name, ".host")) {
+ r = image_make(".host", AT_FDCWD, NULL, "/", NULL, ret);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ (*ret)->discoverable = true;
+
+ return r;
+ }
+
+ return -ENOENT;
+};
+
+int image_from_path(const char *path, Image **ret) {
+
+ /* Note that we don't set the 'discoverable' field of the returned object, because we don't check here whether
+ * the image is in the image search path. And if it is we don't know if the path we used is actually not
+ * overridden by another, different image earlier in the search path */
+
+ if (path_equal(path, "/"))
+ return image_make(".host", AT_FDCWD, NULL, "/", NULL, ret);
+
+ return image_make(NULL, AT_FDCWD, NULL, path, NULL, ret);
+}
+
+int image_find_harder(ImageClass class, const char *name_or_path, Image **ret) {
+ if (image_name_is_valid(name_or_path))
+ return image_find(class, name_or_path, ret);
+
+ return image_from_path(name_or_path, ret);
+}
+
+int image_discover(ImageClass class, Hashmap *h) {
+ const char *path;
+ int r;
+
+ assert(class >= 0);
+ assert(class < _IMAGE_CLASS_MAX);
+ assert(h);
+
+ NULSTR_FOREACH(path, image_search_path[class]) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ d = opendir(path);
+ if (!d) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ _cleanup_(image_unrefp) Image *image = NULL;
+ _cleanup_free_ char *truncated = NULL;
+ const char *pretty;
+ struct stat st;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ /* As mentioned above, we follow symlinks on this fstatat(), because we want to permit people
+ * to symlink block devices into the search path */
+ if (fstatat(dirfd(d), de->d_name, &st, 0) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ if (S_ISREG(st.st_mode)) {
+ const char *e;
+
+ e = endswith(de->d_name, ".raw");
+ if (!e)
+ continue;
+
+ truncated = strndup(de->d_name, e - de->d_name);
+ if (!truncated)
+ return -ENOMEM;
+
+ pretty = truncated;
+ } else if (S_ISDIR(st.st_mode) || S_ISBLK(st.st_mode))
+ pretty = de->d_name;
+ else
+ continue;
+
+ if (!image_name_is_valid(pretty))
+ continue;
+
+ if (hashmap_contains(h, pretty))
+ continue;
+
+ r = image_make(pretty, dirfd(d), path, de->d_name, &st, &image);
+ if (IN_SET(r, -ENOENT, -EMEDIUMTYPE))
+ continue;
+ if (r < 0)
+ return r;
+
+ image->discoverable = true;
+
+ r = hashmap_put(h, image->name, image);
+ if (r < 0)
+ return r;
+
+ image = NULL;
+ }
+ }
+
+ if (class == IMAGE_MACHINE && !hashmap_contains(h, ".host")) {
+ _cleanup_(image_unrefp) Image *image = NULL;
+
+ r = image_make(".host", AT_FDCWD, NULL, "/", NULL, &image);
+ if (r < 0)
+ return r;
+
+ image->discoverable = true;
+
+ r = hashmap_put(h, image->name, image);
+ if (r < 0)
+ return r;
+
+ image = NULL;
+ }
+
+ return 0;
+}
+
+int image_remove(Image *i) {
+ _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
+ _cleanup_strv_free_ char **settings = NULL;
+ _cleanup_free_ char *roothash = NULL;
+ char **j;
+ int r;
+
+ assert(i);
+
+ if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
+ return -EROFS;
+
+ settings = image_settings_path(i);
+ if (!settings)
+ return -ENOMEM;
+
+ roothash = image_roothash_path(i);
+ if (!roothash)
+ return -ENOMEM;
+
+ /* Make sure we don't interfere with a running nspawn */
+ r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
+ if (r < 0)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_SUBVOLUME:
+
+ /* Let's unlink first, maybe it is a symlink? If that works we are happy. Otherwise, let's get out the
+ * big guns */
+ if (unlink(i->path) < 0) {
+ r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
+ if (r < 0)
+ return r;
+ }
+
+ break;
+
+ case IMAGE_DIRECTORY:
+ /* Allow deletion of read-only directories */
+ (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL);
+ r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case IMAGE_BLOCK:
+
+ /* If this is inside of /dev, then it's a real block device, hence let's not touch the device node
+ * itself (but let's remove the stuff stored alongside it). If it's anywhere else, let's try to unlink
+ * the thing (it's most likely a symlink after all). */
+
+ if (path_startswith(i->path, "/dev"))
+ break;
+
+ _fallthrough_;
+ case IMAGE_RAW:
+ if (unlink(i->path) < 0)
+ return -errno;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ STRV_FOREACH(j, settings) {
+ if (unlink(*j) < 0 && errno != ENOENT)
+ log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j);
+ }
+
+ if (unlink(roothash) < 0 && errno != ENOENT)
+ log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", roothash);
+
+ return 0;
+}
+
+static int rename_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
+ _cleanup_free_ char *rs = NULL;
+ const char *fn;
+
+ fn = strjoina(new_name, suffix);
+
+ rs = file_in_same_dir(path, fn);
+ if (!rs)
+ return -ENOMEM;
+
+ return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs);
+}
+
+int image_rename(Image *i, const char *new_name) {
+ _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
+ _cleanup_free_ char *new_path = NULL, *nn = NULL, *roothash = NULL;
+ _cleanup_strv_free_ char **settings = NULL;
+ unsigned file_attr = 0;
+ char **j;
+ int r;
+
+ assert(i);
+
+ if (!image_name_is_valid(new_name))
+ return -EINVAL;
+
+ if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
+ return -EROFS;
+
+ settings = image_settings_path(i);
+ if (!settings)
+ return -ENOMEM;
+
+ roothash = image_roothash_path(i);
+ if (!roothash)
+ return -ENOMEM;
+
+ /* Make sure we don't interfere with a running nspawn */
+ r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
+ if (r < 0)
+ return r;
+
+ /* Make sure nobody takes the new name, between the time we
+ * checked it is currently unused in all search paths, and the
+ * time we take possession of it */
+ r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
+ if (r < 0)
+ return r;
+
+ r = image_find(IMAGE_MACHINE, new_name, NULL);
+ if (r >= 0)
+ return -EEXIST;
+ if (r != -ENOENT)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_DIRECTORY:
+ /* Turn of the immutable bit while we rename the image, so that we can rename it */
+ (void) read_attr_path(i->path, &file_attr);
+
+ if (file_attr & FS_IMMUTABLE_FL)
+ (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL);
+
+ _fallthrough_;
+ case IMAGE_SUBVOLUME:
+ new_path = file_in_same_dir(i->path, new_name);
+ break;
+
+ case IMAGE_BLOCK:
+
+ /* Refuse renaming raw block devices in /dev, the names are picked by udev after all. */
+ if (path_startswith(i->path, "/dev"))
+ return -EROFS;
+
+ new_path = file_in_same_dir(i->path, new_name);
+ break;
+
+ case IMAGE_RAW: {
+ const char *fn;
+
+ fn = strjoina(new_name, ".raw");
+ new_path = file_in_same_dir(i->path, fn);
+ break;
+ }
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (!new_path)
+ return -ENOMEM;
+
+ nn = strdup(new_name);
+ if (!nn)
+ return -ENOMEM;
+
+ r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
+ if (r < 0)
+ return r;
+
+ /* Restore the immutable bit, if it was set before */
+ if (file_attr & FS_IMMUTABLE_FL)
+ (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL, NULL);
+
+ free_and_replace(i->path, new_path);
+ free_and_replace(i->name, nn);
+
+ STRV_FOREACH(j, settings) {
+ r = rename_auxiliary_file(*j, new_name, ".nspawn");
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j);
+ }
+
+ r = rename_auxiliary_file(roothash, new_name, ".roothash");
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to rename roothash file %s, ignoring: %m", roothash);
+
+ return 0;
+}
+
+static int clone_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
+ _cleanup_free_ char *rs = NULL;
+ const char *fn;
+
+ fn = strjoina(new_name, suffix);
+
+ rs = file_in_same_dir(path, fn);
+ if (!rs)
+ return -ENOMEM;
+
+ return copy_file_atomic(path, rs, 0664, 0, 0, COPY_REFLINK);
+}
+
+int image_clone(Image *i, const char *new_name, bool read_only) {
+ _cleanup_(release_lock_file) LockFile name_lock = LOCK_FILE_INIT;
+ _cleanup_strv_free_ char **settings = NULL;
+ _cleanup_free_ char *roothash = NULL;
+ const char *new_path;
+ char **j;
+ int r;
+
+ assert(i);
+
+ if (!image_name_is_valid(new_name))
+ return -EINVAL;
+
+ settings = image_settings_path(i);
+ if (!settings)
+ return -ENOMEM;
+
+ roothash = image_roothash_path(i);
+ if (!roothash)
+ return -ENOMEM;
+
+ /* Make sure nobody takes the new name, between the time we
+ * checked it is currently unused in all search paths, and the
+ * time we take possession of it */
+ r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
+ if (r < 0)
+ return r;
+
+ r = image_find(IMAGE_MACHINE, new_name, NULL);
+ if (r >= 0)
+ return -EEXIST;
+ if (r != -ENOENT)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_SUBVOLUME:
+ case IMAGE_DIRECTORY:
+ /* If we can we'll always try to create a new btrfs subvolume here, even if the source is a plain
+ * directory. */
+
+ new_path = strjoina("/var/lib/machines/", new_name);
+
+ r = btrfs_subvol_snapshot(i->path, new_path,
+ (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
+ BTRFS_SNAPSHOT_FALLBACK_COPY |
+ BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
+ BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
+ BTRFS_SNAPSHOT_RECURSIVE |
+ BTRFS_SNAPSHOT_QUOTA);
+ if (r >= 0)
+ /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
+ (void) btrfs_subvol_auto_qgroup(new_path, 0, true);
+
+ break;
+
+ case IMAGE_RAW:
+ new_path = strjoina("/var/lib/machines/", new_name, ".raw");
+
+ r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, FS_NOCOW_FL, COPY_REFLINK|COPY_CRTIME);
+ break;
+
+ case IMAGE_BLOCK:
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(j, settings) {
+ r = clone_auxiliary_file(*j, new_name, ".nspawn");
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j);
+ }
+
+ r = clone_auxiliary_file(roothash, new_name, ".roothash");
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to clone root hash file %s, ignoring: %m", roothash);
+
+ return 0;
+}
+
+int image_read_only(Image *i, bool b) {
+ _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
+ int r;
+
+ assert(i);
+
+ if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
+ return -EROFS;
+
+ /* Make sure we don't interfere with a running nspawn */
+ r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
+ if (r < 0)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_SUBVOLUME:
+
+ /* Note that we set the flag only on the top-level
+ * subvolume of the image. */
+
+ r = btrfs_subvol_set_read_only(i->path, b);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case IMAGE_DIRECTORY:
+ /* For simple directory trees we cannot use the access
+ mode of the top-level directory, since it has an
+ effect on the container itself. However, we can
+ use the "immutable" flag, to at least make the
+ top-level directory read-only. It's not as good as
+ a read-only subvolume, but at least something, and
+ we can read the value back. */
+
+ r = chattr_path(i->path, b ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL, NULL);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case IMAGE_RAW: {
+ struct stat st;
+
+ if (stat(i->path, &st) < 0)
+ return -errno;
+
+ if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
+ return -errno;
+
+ /* If the images is now read-only, it's a good time to
+ * defrag it, given that no write patterns will
+ * fragment it again. */
+ if (b)
+ (void) btrfs_defrag(i->path);
+ break;
+ }
+
+ case IMAGE_BLOCK: {
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+ int state = b;
+
+ fd = open(i->path, O_CLOEXEC|O_RDONLY|O_NONBLOCK|O_NOCTTY);
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTTY;
+
+ if (ioctl(fd, BLKROSET, &state) < 0)
+ return -errno;
+
+ break;
+ }
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
+ _cleanup_free_ char *p = NULL;
+ LockFile t = LOCK_FILE_INIT;
+ struct stat st;
+ bool exclusive;
+ int r;
+
+ assert(path);
+ assert(global);
+ assert(local);
+
+ /* Locks an image path. This actually creates two locks: one "local" one, next to the image path
+ * itself, which might be shared via NFS. And another "global" one, in /run, that uses the
+ * device/inode number. This has the benefit that we can even lock a tree that is a mount point,
+ * correctly. */
+
+ if (!path_is_absolute(path))
+ return -EINVAL;
+
+ switch (operation & (LOCK_SH|LOCK_EX)) {
+ case LOCK_SH:
+ exclusive = false;
+ break;
+ case LOCK_EX:
+ exclusive = true;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
+ *local = *global = (LockFile) LOCK_FILE_INIT;
+ return 0;
+ }
+
+ /* Prohibit taking exclusive locks on the host image. We can't allow this, since we ourselves are
+ * running off it after all, and we don't want any images to manipulate the host image. We make an
+ * exception for shared locks however: we allow those (and make them NOPs since there's no point in
+ * taking them if there can't be exclusive locks). Strictly speaking these are questionable as well,
+ * since it means changes made to the host might propagate to the container as they happen (and a
+ * shared lock kinda suggests that no changes happen at all while it is in place), but it's too
+ * useful not to allow read-only containers off the host root, hence let's support this, and trust
+ * the user to do the right thing with this. */
+ if (path_equal(path, "/")) {
+ if (exclusive)
+ return -EBUSY;
+
+ *local = *global = (LockFile) LOCK_FILE_INIT;
+ return 0;
+ }
+
+ if (stat(path, &st) >= 0) {
+ if (S_ISBLK(st.st_mode))
+ r = asprintf(&p, "/run/systemd/nspawn/locks/block-%u:%u", major(st.st_rdev), minor(st.st_rdev));
+ else if (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode))
+ r = asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino);
+ else
+ return -ENOTTY;
+
+ if (r < 0)
+ return -ENOMEM;
+ }
+
+ /* For block devices we don't need the "local" lock, as the major/minor lock above should be
+ * sufficient, since block devices are host local anyway. */
+ if (!path_startswith(path, "/dev/")) {
+ r = make_lock_file_for(path, operation, &t);
+ if (r < 0) {
+ if (!exclusive && r == -EROFS)
+ log_debug_errno(r, "Failed to create shared lock for '%s', ignoring: %m", path);
+ else
+ return r;
+ }
+ }
+
+ if (p) {
+ (void) mkdir_p("/run/systemd/nspawn/locks", 0700);
+
+ r = make_lock_file(p, operation, global);
+ if (r < 0) {
+ release_lock_file(&t);
+ return r;
+ }
+ } else
+ *global = (LockFile) LOCK_FILE_INIT;
+
+ *local = t;
+ return 0;
+}
+
+int image_set_limit(Image *i, uint64_t referenced_max) {
+ assert(i);
+
+ if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
+ return -EROFS;
+
+ if (i->type != IMAGE_SUBVOLUME)
+ return -EOPNOTSUPP;
+
+ /* We set the quota both for the subvolume as well as for the
+ * subtree. The latter is mostly for historical reasons, since
+ * we didn't use to have a concept of subtree quota, and hence
+ * only modified the subvolume quota. */
+
+ (void) btrfs_qgroup_set_limit(i->path, 0, referenced_max);
+ (void) btrfs_subvol_auto_qgroup(i->path, 0, true);
+ return btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max);
+}
+
+int image_read_metadata(Image *i) {
+ _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
+ int r;
+
+ assert(i);
+
+ r = image_path_lock(i->path, LOCK_SH|LOCK_NB, &global_lock, &local_lock);
+ if (r < 0)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_SUBVOLUME:
+ case IMAGE_DIRECTORY: {
+ _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
+ sd_id128_t machine_id = SD_ID128_NULL;
+ _cleanup_free_ char *hostname = NULL;
+ _cleanup_free_ char *path = NULL;
+
+ r = chase_symlinks("/etc/hostname", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL);
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to chase /etc/hostname in image %s: %m", i->name);
+ else if (r >= 0) {
+ r = read_etc_hostname(path, &hostname);
+ if (r < 0)
+ log_debug_errno(errno, "Failed to read /etc/hostname of image %s: %m", i->name);
+ }
+
+ path = mfree(path);
+
+ r = chase_symlinks("/etc/machine-id", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL);
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to chase /etc/machine-id in image %s: %m", i->name);
+ else if (r >= 0) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ log_debug_errno(errno, "Failed to open %s: %m", path);
+ else {
+ r = id128_read_fd(fd, ID128_PLAIN, &machine_id);
+ if (r < 0)
+ log_debug_errno(r, "Image %s contains invalid machine ID.", i->name);
+ }
+ }
+
+ path = mfree(path);
+
+ r = chase_symlinks("/etc/machine-info", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL);
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to chase /etc/machine-info in image %s: %m", i->name);
+ else if (r >= 0) {
+ r = load_env_file_pairs(NULL, path, &machine_info);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse machine-info data of %s: %m", i->name);
+ }
+
+ r = load_os_release_pairs(i->path, &os_release);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read os-release in image, ignoring: %m");
+
+ free_and_replace(i->hostname, hostname);
+ i->machine_id = machine_id;
+ strv_free_and_replace(i->machine_info, machine_info);
+ strv_free_and_replace(i->os_release, os_release);
+
+ break;
+ }
+
+ case IMAGE_RAW:
+ case IMAGE_BLOCK: {
+ _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
+ _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
+
+ r = loop_device_make_by_path(i->path, O_RDONLY, LO_FLAGS_PARTSCAN, &d);
+ if (r < 0)
+ return r;
+
+ r = dissect_image(d->fd, NULL, NULL, DISSECT_IMAGE_REQUIRE_ROOT|DISSECT_IMAGE_RELAX_VAR_CHECK, &m);
+ if (r < 0)
+ return r;
+
+ r = dissected_image_acquire_metadata(m);
+ if (r < 0)
+ return r;
+
+ free_and_replace(i->hostname, m->hostname);
+ i->machine_id = m->machine_id;
+ strv_free_and_replace(i->machine_info, m->machine_info);
+ strv_free_and_replace(i->os_release, m->os_release);
+
+ break;
+ }
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ i->metadata_valid = true;
+
+ return 0;
+}
+
+int image_name_lock(const char *name, int operation, LockFile *ret) {
+ assert(name);
+ assert(ret);
+
+ /* Locks an image name, regardless of the precise path used. */
+
+ if (!image_name_is_valid(name))
+ return -EINVAL;
+
+ if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
+ *ret = (LockFile) LOCK_FILE_INIT;
+ return 0;
+ }
+
+ if (streq(name, ".host"))
+ return -EBUSY;
+
+ const char *p = strjoina("/run/systemd/nspawn/locks/name-", name);
+ (void) mkdir_p("/run/systemd/nspawn/locks", 0700);
+ return make_lock_file(p, operation, ret);
+}
+
+bool image_name_is_valid(const char *s) {
+ if (!filename_is_valid(s))
+ return false;
+
+ if (string_has_cc(s, NULL))
+ return false;
+
+ if (!utf8_is_valid(s))
+ return false;
+
+ /* Temporary files for atomically creating new files */
+ if (startswith(s, ".#"))
+ return false;
+
+ return true;
+}
+
+bool image_in_search_path(ImageClass class, const char *image) {
+ const char *path;
+
+ assert(image);
+
+ NULSTR_FOREACH(path, image_search_path[class]) {
+ const char *p;
+ size_t k;
+
+ p = path_startswith(image, path);
+ if (!p)
+ continue;
+
+ /* Make sure there's a filename following */
+ k = strcspn(p, "/");
+ if (k == 0)
+ continue;
+
+ p += k;
+
+ /* Accept trailing slashes */
+ if (p[strspn(p, "/")] == 0)
+ return true;
+
+ }
+
+ return false;
+}
+
+static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
+ [IMAGE_DIRECTORY] = "directory",
+ [IMAGE_SUBVOLUME] = "subvolume",
+ [IMAGE_RAW] = "raw",
+ [IMAGE_BLOCK] = "block",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);
diff --git a/src/shared/machine-image.h b/src/shared/machine-image.h
new file mode 100644
index 0000000..95a8f5c
--- /dev/null
+++ b/src/shared/machine-image.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "sd-id128.h"
+
+#include "hashmap.h"
+#include "lockfile-util.h"
+#include "macro.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "time-util.h"
+
+typedef enum ImageClass {
+ IMAGE_MACHINE,
+ IMAGE_PORTABLE,
+ _IMAGE_CLASS_MAX,
+ _IMAGE_CLASS_INVALID = -1
+} ImageClass;
+
+typedef enum ImageType {
+ IMAGE_DIRECTORY,
+ IMAGE_SUBVOLUME,
+ IMAGE_RAW,
+ IMAGE_BLOCK,
+ _IMAGE_TYPE_MAX,
+ _IMAGE_TYPE_INVALID = -1
+} ImageType;
+
+typedef struct Image {
+ unsigned n_ref;
+
+ ImageType type;
+ char *name;
+ char *path;
+ bool read_only;
+
+ usec_t crtime;
+ usec_t mtime;
+
+ uint64_t usage;
+ uint64_t usage_exclusive;
+ uint64_t limit;
+ uint64_t limit_exclusive;
+
+ char *hostname;
+ sd_id128_t machine_id;
+ char **machine_info;
+ char **os_release;
+
+ bool metadata_valid:1;
+ bool discoverable:1; /* true if we know for sure that image_find() would find the image given just the short name */
+
+ void *userdata;
+} Image;
+
+Image *image_unref(Image *i);
+Image *image_ref(Image *i);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Image*, image_unref);
+
+int image_find(ImageClass class, const char *name, Image **ret);
+int image_from_path(const char *path, Image **ret);
+int image_find_harder(ImageClass class, const char *name_or_path, Image **ret);
+int image_discover(ImageClass class, Hashmap *map);
+
+int image_remove(Image *i);
+int image_rename(Image *i, const char *new_name);
+int image_clone(Image *i, const char *new_name, bool read_only);
+int image_read_only(Image *i, bool b);
+
+const char* image_type_to_string(ImageType t) _const_;
+ImageType image_type_from_string(const char *s) _pure_;
+
+bool image_name_is_valid(const char *s) _pure_;
+
+int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local);
+int image_name_lock(const char *name, int operation, LockFile *ret);
+
+int image_set_limit(Image *i, uint64_t referenced_max);
+
+int image_read_metadata(Image *i);
+
+bool image_in_search_path(ImageClass class, const char *image);
+
+static inline bool IMAGE_IS_HIDDEN(const struct Image *i) {
+ assert(i);
+
+ return i->name && i->name[0] == '.';
+}
+
+static inline bool IMAGE_IS_VENDOR(const struct Image *i) {
+ assert(i);
+
+ return i->path && path_startswith(i->path, "/usr");
+}
+
+static inline bool IMAGE_IS_HOST(const struct Image *i) {
+ assert(i);
+
+ if (i->name && streq(i->name, ".host"))
+ return true;
+
+ if (i->path && path_equal(i->path, "/"))
+ return true;
+
+ return false;
+}
+
+extern const struct hash_ops image_hash_ops;
diff --git a/src/shared/machine-pool.c b/src/shared/machine-pool.c
new file mode 100644
index 0000000..1f0b0b4
--- /dev/null
+++ b/src/shared/machine-pool.c
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "btrfs-util.h"
+#include "label.h"
+#include "machine-pool.h"
+#include "missing_magic.h"
+#include "stat-util.h"
+
+static int check_btrfs(void) {
+ struct statfs sfs;
+
+ if (statfs("/var/lib/machines", &sfs) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ if (statfs("/var/lib", &sfs) < 0)
+ return -errno;
+ }
+
+ return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC);
+}
+
+int setup_machine_directory(sd_bus_error *error) {
+ int r;
+
+ r = check_btrfs();
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to determine whether /var/lib/machines is located on btrfs: %m");
+ if (r == 0)
+ return 0;
+
+ (void) btrfs_subvol_make_label("/var/lib/machines");
+
+ r = btrfs_quota_enable("/var/lib/machines", true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to enable quota for /var/lib/machines, ignoring: %m");
+
+ r = btrfs_subvol_auto_qgroup("/var/lib/machines", 0, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines, ignoring: %m");
+
+ return 1;
+}
diff --git a/src/shared/machine-pool.h b/src/shared/machine-pool.h
new file mode 100644
index 0000000..3f528ab
--- /dev/null
+++ b/src/shared/machine-pool.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdint.h>
+
+#include "sd-bus.h"
+
+int setup_machine_directory(sd_bus_error *error);
diff --git a/src/shared/macvlan-util.c b/src/shared/macvlan-util.c
new file mode 100644
index 0000000..11dffe9
--- /dev/null
+++ b/src/shared/macvlan-util.c
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "conf-parser.h"
+#include "macvlan-util.h"
+#include "string-table.h"
+
+static const char* const macvlan_mode_table[_NETDEV_MACVLAN_MODE_MAX] = {
+ [NETDEV_MACVLAN_MODE_PRIVATE] = "private",
+ [NETDEV_MACVLAN_MODE_VEPA] = "vepa",
+ [NETDEV_MACVLAN_MODE_BRIDGE] = "bridge",
+ [NETDEV_MACVLAN_MODE_PASSTHRU] = "passthru",
+ [NETDEV_MACVLAN_MODE_SOURCE] = "source",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(macvlan_mode, MacVlanMode);
diff --git a/src/shared/macvlan-util.h b/src/shared/macvlan-util.h
new file mode 100644
index 0000000..0d3a5f4
--- /dev/null
+++ b/src/shared/macvlan-util.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/if_link.h>
+
+typedef enum MacVlanMode {
+ NETDEV_MACVLAN_MODE_PRIVATE = MACVLAN_MODE_PRIVATE,
+ NETDEV_MACVLAN_MODE_VEPA = MACVLAN_MODE_VEPA,
+ NETDEV_MACVLAN_MODE_BRIDGE = MACVLAN_MODE_BRIDGE,
+ NETDEV_MACVLAN_MODE_PASSTHRU = MACVLAN_MODE_PASSTHRU,
+ NETDEV_MACVLAN_MODE_SOURCE = MACVLAN_MODE_SOURCE,
+ _NETDEV_MACVLAN_MODE_MAX,
+ _NETDEV_MACVLAN_MODE_INVALID = -1
+} MacVlanMode;
+
+const char *macvlan_mode_to_string(MacVlanMode d) _const_;
+MacVlanMode macvlan_mode_from_string(const char *d) _pure_;
diff --git a/src/shared/main-func.h b/src/shared/main-func.h
new file mode 100644
index 0000000..05cdffe
--- /dev/null
+++ b/src/shared/main-func.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdlib.h>
+
+#include "sd-daemon.h"
+
+#include "pager.h"
+#include "selinux-util.h"
+#include "spawn-ask-password-agent.h"
+#include "spawn-polkit-agent.h"
+#include "static-destruct.h"
+#include "util.h"
+
+#define _DEFINE_MAIN_FUNCTION(intro, impl, ret) \
+ int main(int argc, char *argv[]) { \
+ int r; \
+ save_argc_argv(argc, argv); \
+ intro; \
+ r = impl; \
+ if (r < 0) \
+ (void) sd_notifyf(0, "ERRNO=%i", -r); \
+ ask_password_agent_close(); \
+ polkit_agent_close(); \
+ pager_close(); \
+ mac_selinux_finish(); \
+ static_destruct(); \
+ return ret; \
+ }
+
+/* Negative return values from impl are mapped to EXIT_FAILURE, and
+ * everything else means success! */
+#define DEFINE_MAIN_FUNCTION(impl) \
+ _DEFINE_MAIN_FUNCTION(,impl(argc, argv), r < 0 ? EXIT_FAILURE : EXIT_SUCCESS)
+
+/* Zero is mapped to EXIT_SUCCESS, negative values are mapped to EXIT_FAILURE,
+ * and positive values are propagated.
+ * Note: "true" means failure! */
+#define DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(impl) \
+ _DEFINE_MAIN_FUNCTION(,impl(argc, argv), r < 0 ? EXIT_FAILURE : r)
diff --git a/src/shared/meson.build b/src/shared/meson.build
new file mode 100644
index 0000000..f30fe44
--- /dev/null
+++ b/src/shared/meson.build
@@ -0,0 +1,399 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+shared_sources = files('''
+ acl-util.h
+ acpi-fpdt.c
+ acpi-fpdt.h
+ apparmor-util.c
+ apparmor-util.h
+ ask-password-api.c
+ ask-password-api.h
+ barrier.c
+ barrier.h
+ base-filesystem.c
+ base-filesystem.h
+ binfmt-util.c
+ binfmt-util.h
+ bitmap.c
+ bitmap.h
+ blkid-util.h
+ bond-util.c
+ bond-util.h
+ boot-timestamps.c
+ boot-timestamps.h
+ bootspec.c
+ bootspec.h
+ bpf-program.c
+ bpf-program.h
+ bridge-util.c
+ bridge-util.h
+ bus-get-properties.c
+ bus-get-properties.h
+ bus-locator.c
+ bus-locator.h
+ bus-log-control-api.c
+ bus-log-control-api.h
+ bus-map-properties.c
+ bus-map-properties.h
+ bus-message-util.c
+ bus-message-util.h
+ bus-object.c
+ bus-object.h
+ bus-polkit.c
+ bus-polkit.h
+ bus-print-properties.c
+ bus-print-properties.h
+ bus-unit-procs.c
+ bus-unit-procs.h
+ bus-unit-util.c
+ bus-unit-util.h
+ bus-util.c
+ bus-util.h
+ bus-wait-for-jobs.c
+ bus-wait-for-jobs.h
+ bus-wait-for-units.c
+ bus-wait-for-units.h
+ calendarspec.c
+ calendarspec.h
+ cgroup-setup.c
+ cgroup-setup.h
+ cgroup-show.c
+ cgroup-show.h
+ chown-recursive.c
+ chown-recursive.h
+ clean-ipc.c
+ clean-ipc.h
+ clock-util.c
+ clock-util.h
+ condition.c
+ condition.h
+ conf-parser.c
+ conf-parser.h
+ coredump-util.c
+ coredump-util.h
+ cpu-set-util.c
+ cpu-set-util.h
+ cryptsetup-util.c
+ cryptsetup-util.h
+ daemon-util.h
+ dev-setup.c
+ dev-setup.h
+ dissect-image.c
+ dissect-image.h
+ dm-util.c
+ dm-util.h
+ dns-domain.c
+ dns-domain.h
+ dropin.c
+ dropin.h
+ efi-loader.c
+ efi-loader.h
+ enable-mempool.c
+ env-file-label.c
+ env-file-label.h
+ ethtool-util.c
+ ethtool-util.h
+ exec-util.c
+ exec-util.h
+ exit-status.c
+ exit-status.h
+ fdset.c
+ fdset.h
+ fileio-label.c
+ fileio-label.h
+ firewall-util.h
+ format-table.c
+ format-table.h
+ fsck-util.h
+ fstab-util.c
+ fstab-util.h
+ generator.c
+ generator.h
+ geneve-util.c
+ geneve-util.h
+ gpt.c
+ gpt.h
+ group-record.c
+ group-record.h
+ id128-print.c
+ id128-print.h
+ idn-util.c
+ idn-util.h
+ ima-util.c
+ ima-util.h
+ import-util.c
+ import-util.h
+ initreq.h
+ install-printf.c
+ install-printf.h
+ install.c
+ install.h
+ ipvlan-util.c
+ ipvlan-util.h
+ ip-protocol-list.c
+ ip-protocol-list.h
+ journal-importer.c
+ journal-importer.h
+ journal-util.c
+ journal-util.h
+ json-internal.h
+ json.c
+ json.h
+ libcrypt-util.c
+ libcrypt-util.h
+ libmount-util.h
+ linux/auto_dev-ioctl.h
+ linux/bpf.h
+ linux/bpf_common.h
+ linux/bpf_insn.h
+ linux/dm-ioctl.h
+ linux/ethtool.h
+ local-addresses.c
+ local-addresses.h
+ lockfile-util.c
+ lockfile-util.h
+ log-link.h
+ logs-show.c
+ logs-show.h
+ loop-util.c
+ loop-util.h
+ machine-image.c
+ machine-image.h
+ machine-pool.c
+ machine-pool.h
+ macvlan-util.c
+ macvlan-util.h
+ main-func.h
+ mkfs-util.c
+ mkfs-util.h
+ module-util.h
+ mount-util.c
+ mount-util.h
+ netif-naming-scheme.c
+ netif-naming-scheme.h
+ nscd-flush.c
+ nscd-flush.h
+ nsflags.c
+ nsflags.h
+ numa-util.c
+ numa-util.h
+ openssl-util.h
+ os-util.c
+ os-util.h
+ output-mode.c
+ output-mode.h
+ pager.c
+ pager.h
+ pe-header.h
+ pkcs11-util.c
+ pkcs11-util.h
+ pretty-print.c
+ pretty-print.h
+ psi-util.c
+ psi-util.h
+ ptyfwd.c
+ ptyfwd.h
+ pwquality-util.c
+ pwquality-util.h
+ qrcode-util.c
+ qrcode-util.h
+ reboot-util.c
+ reboot-util.h
+ resize-fs.c
+ resize-fs.h
+ resolve-util.c
+ resolve-util.h
+ seccomp-util.h
+ securebits-util.c
+ securebits-util.h
+ serialize.c
+ serialize.h
+ service-util.c
+ service-util.h
+ sleep-config.c
+ sleep-config.h
+ socket-netlink.c
+ socket-netlink.h
+ spawn-ask-password-agent.c
+ spawn-ask-password-agent.h
+ spawn-polkit-agent.c
+ spawn-polkit-agent.h
+ specifier.c
+ specifier.h
+ switch-root.c
+ switch-root.h
+ sysctl-util.c
+ sysctl-util.h
+ tmpfile-util-label.c
+ tmpfile-util-label.h
+ tomoyo-util.c
+ tomoyo-util.h
+ udev-util.c
+ udev-util.h
+ uid-range.c
+ uid-range.h
+ unit-file.c
+ unit-file.h
+ user-record-nss.c
+ user-record-nss.h
+ user-record-show.c
+ user-record-show.h
+ user-record.c
+ user-record.h
+ userdb.c
+ userdb.h
+ utmp-wtmp.h
+ varlink.c
+ varlink.h
+ verbs.c
+ verbs.h
+ vlan-util.c
+ vlan-util.h
+ volatile-util.c
+ volatile-util.h
+ watchdog.c
+ watchdog.h
+ web-util.c
+ web-util.h
+ wifi-util.c
+ wifi-util.h
+ xml.c
+ xml.h
+'''.split())
+
+if get_option('tests') != 'false'
+ shared_sources += files('tests.c', 'tests.h')
+endif
+
+test_tables_h = files('test-tables.h')
+shared_sources += test_tables_h
+
+generate_syscall_list = find_program('generate-syscall-list.py')
+fname = 'syscall-list.h'
+syscall_list_h = custom_target(
+ fname,
+ input : 'syscall-names.text',
+ output : fname,
+ command : [generate_syscall_list,
+ '@INPUT@'],
+ capture : true)
+
+if conf.get('HAVE_ACL') == 1
+ shared_sources += files('acl-util.c')
+endif
+
+if conf.get('ENABLE_UTMP') == 1
+ shared_sources += files('utmp-wtmp.c')
+endif
+
+if conf.get('HAVE_SECCOMP') == 1
+ shared_sources += files('seccomp-util.c')
+ shared_sources += syscall_list_h
+endif
+
+if conf.get('HAVE_LIBIPTC') == 1
+ shared_sources += files('firewall-util.c')
+endif
+
+if conf.get('HAVE_KMOD') == 1
+ shared_sources += files('module-util.c')
+endif
+
+if conf.get('HAVE_PAM') == 1
+ shared_sources += files('''
+ pam-util.c
+ pam-util.h
+'''.split())
+endif
+
+generate_ip_protocol_list = find_program('generate-ip-protocol-list.sh')
+ip_protocol_list_txt = custom_target(
+ 'ip-protocol-list.txt',
+ output : 'ip-protocol-list.txt',
+ command : [generate_ip_protocol_list, cpp],
+ capture : true)
+
+fname = 'ip-protocol-from-name.gperf'
+gperf_file = custom_target(
+ fname,
+ input : ip_protocol_list_txt,
+ output : fname,
+ command : [generate_gperfs, 'ip_protocol', 'IPPROTO_', '@INPUT@'],
+ capture : true)
+
+fname = 'ip-protocol-from-name.h'
+target1 = custom_target(
+ fname,
+ input : gperf_file,
+ output : fname,
+ command : [gperf,
+ '-L', 'ANSI-C', '-t', '--ignore-case',
+ '-N', 'lookup_ip_protocol',
+ '-H', 'hash_ip_protocol_name',
+ '-p', '-C',
+ '@INPUT@'],
+ capture : true)
+
+fname = 'ip-protocol-to-name.h'
+awkscript = 'ip-protocol-to-name.awk'
+target2 = custom_target(
+ fname,
+ input : [awkscript, ip_protocol_list_txt],
+ output : fname,
+ command : [awk, '-f', '@INPUT0@', '@INPUT1@'],
+ capture : true)
+
+shared_generated_gperf_headers = [target1, target2]
+shared_sources += shared_generated_gperf_headers
+
+libshared_name = 'systemd-shared-@0@'.format(meson.project_version())
+
+libshared_deps = [threads,
+ libacl,
+ libblkid,
+ libcap,
+ libcrypt,
+ libgcrypt,
+ libiptc,
+ libkmod,
+ liblz4,
+ libmount,
+ libopenssl,
+ libp11kit,
+ libpam,
+ librt,
+ libseccomp,
+ libselinux,
+ libzstd,
+ libxz]
+
+libshared_sym_path = '@0@/libshared.sym'.format(meson.current_source_dir())
+
+libshared_static = static_library(
+ libshared_name,
+ shared_sources,
+ include_directories : includes,
+ dependencies : libshared_deps,
+ c_args : ['-fvisibility=default'])
+
+libshared = shared_library(
+ libshared_name,
+ libudev_sources,
+ include_directories : includes,
+ link_args : ['-shared',
+ '-Wl,--version-script=' + libshared_sym_path],
+ link_whole : [libshared_static,
+ libbasic,
+ libbasic_gcrypt,
+ libsystemd_static,
+ libjournal_client],
+ c_args : ['-fvisibility=default'],
+ dependencies : libshared_deps,
+ install : true,
+ install_dir : rootlibexecdir)
+
+############################################################
+
+run_target(
+ 'syscall-names-update',
+ command : [syscall_names_update_sh, meson.current_source_dir()])
diff --git a/src/shared/mkfs-util.c b/src/shared/mkfs-util.c
new file mode 100644
index 0000000..ce10e60
--- /dev/null
+++ b/src/shared/mkfs-util.c
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "id128-util.h"
+#include "mkfs-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+int mkfs_exists(const char *fstype) {
+ const char *mkfs;
+ int r;
+
+ assert(fstype);
+
+ if (STR_IN_SET(fstype, "auto", "swap")) /* these aren't real file system types, refuse early */
+ return -EINVAL;
+
+ mkfs = strjoina("mkfs.", fstype);
+ if (!filename_is_valid(mkfs)) /* refuse file system types with slashes and similar */
+ return -EINVAL;
+
+ r = find_executable(mkfs, NULL);
+ if (r == -ENOENT)
+ return false;
+ if (r < 0)
+ return r;
+
+ return true;
+}
+
+int make_filesystem(
+ const char *node,
+ const char *fstype,
+ const char *label,
+ sd_id128_t uuid,
+ bool discard) {
+
+ _cleanup_free_ char *mkfs = NULL;
+ int r;
+
+ assert(node);
+ assert(fstype);
+ assert(label);
+
+ if (streq(fstype, "swap")) {
+ r = find_executable("mkswap", &mkfs);
+ if (r == -ENOENT)
+ return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "mkswap binary not available.");
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether mkswap binary exists: %m");
+ } else {
+ r = mkfs_exists(fstype);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether mkfs binary for %s exists: %m", fstype);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "mkfs binary for %s is not available.", fstype);
+
+ mkfs = strjoin("mkfs.", fstype);
+ if (!mkfs)
+ return log_oom();
+ }
+
+ r = safe_fork("(mkfs)", FORK_RESET_SIGNALS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_LOG|FORK_WAIT|FORK_STDOUT_TO_STDERR, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ char suuid[ID128_UUID_STRING_MAX];
+
+ /* Child */
+ id128_to_uuid_string(uuid, suuid);
+
+ if (streq(fstype, "ext4"))
+ (void) execlp(mkfs, mkfs,
+ "-L", label,
+ "-U", suuid,
+ "-I", "256",
+ "-O", "has_journal",
+ "-m", "0",
+ "-E", discard ? "lazy_itable_init=1,discard" : "lazy_itable_init=1,nodiscard",
+ node, NULL);
+
+ else if (streq(fstype, "btrfs")) {
+ if (discard)
+ (void) execlp(mkfs, mkfs, "-L", label, "-U", suuid, node, NULL);
+ else
+ (void) execlp(mkfs, mkfs, "-L", label, "-U", suuid, "--nodiscard", node, NULL);
+
+ } else if (streq(fstype, "xfs")) {
+ const char *j;
+
+ j = strjoina("uuid=", suuid);
+ if (discard)
+ (void) execlp(mkfs, mkfs, "-L", label, "-m", j, "-m", "reflink=1", node, NULL);
+ else
+ (void) execlp(mkfs, mkfs, "-L", label, "-m", j, "-m", "reflink=1", "-K", node, NULL);
+
+ } else if (streq(fstype, "vfat")) {
+ char mangled_label[8 + 3 + 1], vol_id[8 + 1];
+
+ /* Classic FAT only allows 11 character uppercase labels */
+ strncpy(mangled_label, label, sizeof(mangled_label)-1);
+ mangled_label[sizeof(mangled_label)-1] = 0;
+ ascii_strupper(mangled_label);
+
+ xsprintf(vol_id, "%08" PRIx32,
+ ((uint32_t) uuid.bytes[0] << 24) |
+ ((uint32_t) uuid.bytes[1] << 16) |
+ ((uint32_t) uuid.bytes[2] << 8) |
+ ((uint32_t) uuid.bytes[3])); /* Take first 32 byte of UUID */
+
+ (void) execlp(mkfs, mkfs,
+ "-i", vol_id,
+ "-n", mangled_label,
+ "-F", "32", /* yes, we force FAT32 here */
+ node, NULL);
+
+ } else if (streq(fstype, "swap")) {
+
+ (void) execlp(mkfs, mkfs,
+ "-L", label,
+ "-U", suuid,
+ node, NULL);
+
+ } else
+ /* Generic fallback for all other file systems */
+ (void) execlp(mkfs, mkfs, node, NULL);
+
+ log_error_errno(errno, "Failed to execute %s: %m", mkfs);
+
+ _exit(EXIT_FAILURE);
+ }
+
+ return 0;
+}
diff --git a/src/shared/mkfs-util.h b/src/shared/mkfs-util.h
new file mode 100644
index 0000000..7647afb
--- /dev/null
+++ b/src/shared/mkfs-util.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-id128.h"
+
+int mkfs_exists(const char *fstype);
+
+int make_filesystem(const char *node, const char *fstype, const char *label, sd_id128_t uuid, bool discard);
diff --git a/src/shared/module-util.c b/src/shared/module-util.c
new file mode 100644
index 0000000..587e636
--- /dev/null
+++ b/src/shared/module-util.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "module-util.h"
+
+int module_load_and_warn(struct kmod_ctx *ctx, const char *module, bool verbose) {
+ const int probe_flags = KMOD_PROBE_APPLY_BLACKLIST;
+ struct kmod_list *itr;
+ _cleanup_(kmod_module_unref_listp) struct kmod_list *modlist = NULL;
+ int r;
+
+ /* verbose==true means we should log at non-debug level if we
+ * fail to find or load the module. */
+
+ log_debug("Loading module: %s", module);
+
+ r = kmod_module_new_from_lookup(ctx, module, &modlist);
+ if (r < 0)
+ return log_full_errno(verbose ? LOG_ERR : LOG_DEBUG, r,
+ "Failed to look up module alias '%s': %m", module);
+
+ if (!modlist) {
+ log_full_errno(verbose ? LOG_ERR : LOG_DEBUG, r,
+ "Failed to find module '%s'", module);
+ return -ENOENT;
+ }
+
+ kmod_list_foreach(itr, modlist) {
+ _cleanup_(kmod_module_unrefp) struct kmod_module *mod = NULL;
+ int state, err;
+
+ mod = kmod_module_get_module(itr);
+ state = kmod_module_get_initstate(mod);
+
+ switch (state) {
+ case KMOD_MODULE_BUILTIN:
+ log_full(verbose ? LOG_INFO : LOG_DEBUG,
+ "Module '%s' is built in", kmod_module_get_name(mod));
+ break;
+
+ case KMOD_MODULE_LIVE:
+ log_debug("Module '%s' is already loaded", kmod_module_get_name(mod));
+ break;
+
+ default:
+ err = kmod_module_probe_insert_module(mod, probe_flags,
+ NULL, NULL, NULL, NULL);
+ if (err == 0)
+ log_full(verbose ? LOG_INFO : LOG_DEBUG,
+ "Inserted module '%s'", kmod_module_get_name(mod));
+ else if (err == KMOD_PROBE_APPLY_BLACKLIST)
+ log_full(verbose ? LOG_INFO : LOG_DEBUG,
+ "Module '%s' is deny-listed", kmod_module_get_name(mod));
+ else {
+ assert(err < 0);
+
+ log_full_errno(!verbose ? LOG_DEBUG :
+ err == -ENODEV ? LOG_NOTICE :
+ err == -ENOENT ? LOG_WARNING :
+ LOG_ERR,
+ err,
+ "Failed to insert module '%s': %m",
+ kmod_module_get_name(mod));
+ if (!IN_SET(err, -ENODEV, -ENOENT))
+ r = err;
+ }
+ }
+ }
+
+ return r;
+}
diff --git a/src/shared/module-util.h b/src/shared/module-util.h
new file mode 100644
index 0000000..4db8c5f
--- /dev/null
+++ b/src/shared/module-util.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <libkmod.h>
+
+#include "macro.h"
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct kmod_ctx*, kmod_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct kmod_module*, kmod_module_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct kmod_list*, kmod_module_unref_list);
+
+int module_load_and_warn(struct kmod_ctx *ctx, const char *module, bool verbose);
diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c
new file mode 100644
index 0000000..b19b384
--- /dev/null
+++ b/src/shared/mount-util.c
@@ -0,0 +1,744 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "libmount-util.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "set.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+int mount_fd(const char *source,
+ int target_fd,
+ const char *filesystemtype,
+ unsigned long mountflags,
+ const void *data) {
+
+ char path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+
+ xsprintf(path, "/proc/self/fd/%i", target_fd);
+ if (mount(source, path, filesystemtype, mountflags, data) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ /* ENOENT can mean two things: either that the source is missing, or that /proc/ isn't
+ * mounted. Check for the latter to generate better error messages. */
+ if (proc_mounted() == 0)
+ return -ENOSYS;
+
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+int mount_nofollow(
+ const char *source,
+ const char *target,
+ const char *filesystemtype,
+ unsigned long mountflags,
+ const void *data) {
+
+ _cleanup_close_ int fd = -1;
+
+ /* In almost all cases we want to manipulate the mount table without following symlinks, hence
+ * mount_nofollow() is usually the way to go. The only exceptions are environments where /proc/ is
+ * not available yet, since we need /proc/self/fd/ for this logic to work. i.e. during the early
+ * initialization of namespacing/container stuff where /proc is not yet mounted (and maybe even the
+ * fs to mount) we can only use traditional mount() directly.
+ *
+ * Note that this disables following only for the final component of the target, i.e symlinks within
+ * the path of the target are honoured, as are symlinks in the source path everywhere. */
+
+ fd = open(target, O_PATH|O_CLOEXEC|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return mount_fd(source, fd, filesystemtype, mountflags, data);
+}
+
+int umount_recursive(const char *prefix, int flags) {
+ int n = 0, r;
+ bool again;
+
+ /* Try to umount everything recursively below a
+ * directory. Also, take care of stacked mounts, and keep
+ * unmounting them until they are gone. */
+
+ do {
+ _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
+ _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
+
+ again = false;
+
+ r = libmount_parse("/proc/self/mountinfo", NULL, &table, &iter);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
+
+ for (;;) {
+ struct libmnt_fs *fs;
+ const char *path;
+
+ r = mnt_table_next_fs(table, iter, &fs);
+ if (r == 1)
+ break;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
+
+ path = mnt_fs_get_target(fs);
+ if (!path)
+ continue;
+
+ if (!path_startswith(path, prefix))
+ continue;
+
+ if (umount2(path, flags | UMOUNT_NOFOLLOW) < 0) {
+ log_debug_errno(errno, "Failed to umount %s, ignoring: %m", path);
+ continue;
+ }
+
+ log_debug("Successfully unmounted %s", path);
+
+ again = true;
+ n++;
+
+ break;
+ }
+ } while (again);
+
+ return n;
+}
+
+static int get_mount_flags(
+ struct libmnt_table *table,
+ const char *path,
+ unsigned long *ret) {
+
+ _cleanup_close_ int fd = -1;
+ struct libmnt_fs *fs;
+ struct statvfs buf;
+ const char *opts;
+ int r;
+
+ /* Get the mount flags for the mountpoint at "path" from "table". We have a fallback using statvfs()
+ * in place (which provides us with mostly the same info), but it's just a fallback, since using it
+ * means triggering autofs or NFS mounts, which we'd rather avoid needlessly.
+ *
+ * This generally doesn't follow symlinks. */
+
+ fs = mnt_table_find_target(table, path, MNT_ITER_FORWARD);
+ if (!fs) {
+ log_debug("Could not find '%s' in mount table, ignoring.", path);
+ goto fallback;
+ }
+
+ opts = mnt_fs_get_vfs_options(fs);
+ if (!opts) {
+ *ret = 0;
+ return 0;
+ }
+
+ r = mnt_optstr_get_flags(opts, ret, mnt_get_builtin_optmap(MNT_LINUX_MAP));
+ if (r != 0) {
+ log_debug_errno(r, "Could not get flags for '%s', ignoring: %m", path);
+ goto fallback;
+ }
+
+ /* MS_RELATIME is default and trying to set it in an unprivileged container causes EPERM */
+ *ret &= ~MS_RELATIME;
+ return 0;
+
+fallback:
+ fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ if (fstatvfs(fd, &buf) < 0)
+ return -errno;
+
+ /* The statvfs() flags and the mount flags mostly have the same values, but for some cases do
+ * not. Hence map the flags manually. (Strictly speaking, ST_RELATIME/MS_RELATIME is the most
+ * prominent one that doesn't match, but that's the one we mask away anyway, see above.) */
+
+ *ret =
+ FLAGS_SET(buf.f_flag, ST_RDONLY) * MS_RDONLY |
+ FLAGS_SET(buf.f_flag, ST_NODEV) * MS_NODEV |
+ FLAGS_SET(buf.f_flag, ST_NOEXEC) * MS_NOEXEC |
+ FLAGS_SET(buf.f_flag, ST_NOSUID) * MS_NOSUID |
+ FLAGS_SET(buf.f_flag, ST_NOATIME) * MS_NOATIME |
+ FLAGS_SET(buf.f_flag, ST_NODIRATIME) * MS_NODIRATIME;
+
+ return 0;
+}
+
+/* Use this function only if you do not have direct access to /proc/self/mountinfo but the caller can open it
+ * for you. This is the case when /proc is masked or not mounted. Otherwise, use bind_remount_recursive. */
+int bind_remount_recursive_with_mountinfo(
+ const char *prefix,
+ unsigned long new_flags,
+ unsigned long flags_mask,
+ char **deny_list,
+ FILE *proc_self_mountinfo) {
+
+ _cleanup_set_free_free_ Set *done = NULL;
+ _cleanup_free_ char *simplified = NULL;
+ int r;
+
+ assert(prefix);
+ assert(proc_self_mountinfo);
+
+ /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
+ * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
+ * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
+ * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
+ * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
+ * do not have any effect on future submounts that might get propagated, they might be writable. This includes
+ * future submounts that have been triggered via autofs.
+ *
+ * If the "deny_list" parameter is specified it may contain a list of subtrees to exclude from the
+ * remount operation. Note that we'll ignore the deny list for the top-level path. */
+
+ simplified = strdup(prefix);
+ if (!simplified)
+ return -ENOMEM;
+
+ path_simplify(simplified, false);
+
+ done = set_new(&path_hash_ops);
+ if (!done)
+ return -ENOMEM;
+
+ for (;;) {
+ _cleanup_set_free_free_ Set *todo = NULL;
+ _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
+ _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
+ bool top_autofs = false;
+ char *x;
+ unsigned long orig_flags;
+
+ todo = set_new(&path_hash_ops);
+ if (!todo)
+ return -ENOMEM;
+
+ rewind(proc_self_mountinfo);
+
+ r = libmount_parse("/proc/self/mountinfo", proc_self_mountinfo, &table, &iter);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
+
+ for (;;) {
+ struct libmnt_fs *fs;
+ const char *path, *type;
+
+ r = mnt_table_next_fs(table, iter, &fs);
+ if (r == 1)
+ break;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
+
+ path = mnt_fs_get_target(fs);
+ type = mnt_fs_get_fstype(fs);
+ if (!path || !type)
+ continue;
+
+ if (!path_startswith(path, simplified))
+ continue;
+
+ /* Ignore this mount if it is deny-listed, but only if it isn't the top-level mount
+ * we shall operate on. */
+ if (!path_equal(path, simplified)) {
+ bool deny_listed = false;
+ char **i;
+
+ STRV_FOREACH(i, deny_list) {
+ if (path_equal(*i, simplified))
+ continue;
+
+ if (!path_startswith(*i, simplified))
+ continue;
+
+ if (path_startswith(path, *i)) {
+ deny_listed = true;
+ log_debug("Not remounting %s deny-listed by %s, called for %s",
+ path, *i, simplified);
+ break;
+ }
+ }
+ if (deny_listed)
+ continue;
+ }
+
+ /* Let's ignore autofs mounts. If they aren't
+ * triggered yet, we want to avoid triggering
+ * them, as we don't make any guarantees for
+ * future submounts anyway. If they are
+ * already triggered, then we will find
+ * another entry for this. */
+ if (streq(type, "autofs")) {
+ top_autofs = top_autofs || path_equal(path, simplified);
+ continue;
+ }
+
+ if (!set_contains(done, path)) {
+ r = set_put_strdup(&todo, path);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* If we have no submounts to process anymore and if
+ * the root is either already done, or an autofs, we
+ * are done */
+ if (set_isempty(todo) &&
+ (top_autofs || set_contains(done, simplified)))
+ return 0;
+
+ if (!set_contains(done, simplified) &&
+ !set_contains(todo, simplified)) {
+ /* The prefix directory itself is not yet a mount, make it one. */
+ r = mount_nofollow(simplified, simplified, NULL, MS_BIND|MS_REC, NULL);
+ if (r < 0)
+ return r;
+
+ orig_flags = 0;
+ (void) get_mount_flags(table, simplified, &orig_flags);
+
+ r = mount_nofollow(NULL, simplified, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL);
+ if (r < 0)
+ return r;
+
+ log_debug("Made top-level directory %s a mount point.", prefix);
+
+ r = set_put_strdup(&done, simplified);
+ if (r < 0)
+ return r;
+ }
+
+ while ((x = set_steal_first(todo))) {
+
+ r = set_consume(done, x);
+ if (IN_SET(r, 0, -EEXIST))
+ continue;
+ if (r < 0)
+ return r;
+
+ /* Deal with mount points that are obstructed by a later mount */
+ r = path_is_mount_point(x, NULL, 0);
+ if (IN_SET(r, 0, -ENOENT))
+ continue;
+ if (r < 0) {
+ if (!ERRNO_IS_PRIVILEGE(r))
+ return r;
+
+ /* Even if root user invoke this, submounts under private FUSE or NFS mount points
+ * may not be acceessed. E.g.,
+ *
+ * $ bindfs --no-allow-other ~/mnt/mnt ~/mnt/mnt
+ * $ bindfs --no-allow-other ~/mnt ~/mnt
+ *
+ * Then, root user cannot access the mount point ~/mnt/mnt.
+ * In such cases, the submounts are ignored, as we have no way to manage them. */
+ log_debug_errno(r, "Failed to determine '%s' is mount point or not, ignoring: %m", x);
+ continue;
+ }
+
+ /* Try to reuse the original flag set */
+ orig_flags = 0;
+ (void) get_mount_flags(table, x, &orig_flags);
+
+ r = mount_nofollow(NULL, x, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL);
+ if (r < 0)
+ return r;
+
+ log_debug("Remounted %s read-only.", x);
+ }
+ }
+}
+
+int bind_remount_recursive(
+ const char *prefix,
+ unsigned long new_flags,
+ unsigned long flags_mask,
+ char **deny_list) {
+
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+ int r;
+
+ r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
+ if (r < 0)
+ return r;
+
+ return bind_remount_recursive_with_mountinfo(prefix, new_flags, flags_mask, deny_list, proc_self_mountinfo);
+}
+
+int bind_remount_one_with_mountinfo(
+ const char *path,
+ unsigned long new_flags,
+ unsigned long flags_mask,
+ FILE *proc_self_mountinfo) {
+
+ _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
+ unsigned long orig_flags = 0;
+ int r;
+
+ assert(path);
+ assert(proc_self_mountinfo);
+
+ rewind(proc_self_mountinfo);
+
+ table = mnt_new_table();
+ if (!table)
+ return -ENOMEM;
+
+ r = mnt_table_parse_stream(table, proc_self_mountinfo, "/proc/self/mountinfo");
+ if (r < 0)
+ return r;
+
+ /* Try to reuse the original flag set */
+ (void) get_mount_flags(table, path, &orig_flags);
+
+ r = mount_nofollow(NULL, path, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int mount_move_root(const char *path) {
+ assert(path);
+
+ if (chdir(path) < 0)
+ return -errno;
+
+ if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
+ return -errno;
+
+ if (chroot(".") < 0)
+ return -errno;
+
+ if (chdir("/") < 0)
+ return -errno;
+
+ return 0;
+}
+
+int repeat_unmount(const char *path, int flags) {
+ bool done = false;
+
+ assert(path);
+
+ /* If there are multiple mounts on a mount point, this
+ * removes them all */
+
+ for (;;) {
+ if (umount2(path, flags) < 0) {
+
+ if (errno == EINVAL)
+ return done;
+
+ return -errno;
+ }
+
+ done = true;
+ }
+}
+
+int mode_to_inaccessible_node(
+ const char *runtime_dir,
+ mode_t mode,
+ char **ret) {
+
+ /* This function maps a node type to a corresponding inaccessible file node. These nodes are created
+ * during early boot by PID 1. In some cases we lacked the privs to create the character and block
+ * devices (maybe because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a
+ * devices policy that excludes device nodes with major and minor of 0), but that's fine, in that
+ * case we use an AF_UNIX file node instead, which is not the same, but close enough for most
+ * uses. And most importantly, the kernel allows bind mounts from socket nodes to any non-directory
+ * file nodes, and that's the most important thing that matters.
+ *
+ * Note that the runtime directory argument shall be the top-level runtime directory, i.e. /run/ if
+ * we operate in system context and $XDG_RUNTIME_DIR if we operate in user context. */
+
+ _cleanup_free_ char *d = NULL;
+ const char *node = NULL;
+
+ assert(ret);
+
+ if (!runtime_dir)
+ runtime_dir = "/run";
+
+ switch(mode & S_IFMT) {
+ case S_IFREG:
+ node = "/systemd/inaccessible/reg";
+ break;
+
+ case S_IFDIR:
+ node = "/systemd/inaccessible/dir";
+ break;
+
+ case S_IFCHR:
+ node = "/systemd/inaccessible/chr";
+ break;
+
+ case S_IFBLK:
+ node = "/systemd/inaccessible/blk";
+ break;
+
+ case S_IFIFO:
+ node = "/systemd/inaccessible/fifo";
+ break;
+
+ case S_IFSOCK:
+ node = "/systemd/inaccessible/sock";
+ break;
+ }
+ if (!node)
+ return -EINVAL;
+
+ d = path_join(runtime_dir, node);
+ if (!d)
+ return -ENOMEM;
+
+ /* On new kernels unprivileged users are permitted to create 0:0 char device nodes (because they also
+ * act as whiteout inode for overlayfs), but no other char or block device nodes. On old kernels no
+ * device node whatsoever may be created by unprivileged processes. Hence, if the caller asks for the
+ * inaccessible block device node let's see if the block device node actually exists, and if not,
+ * fall back to the character device node. From there fall back to the socket device node. This means
+ * in the best case we'll get the right device node type — but if not we'll hopefully at least get a
+ * device node at all. */
+
+ if (S_ISBLK(mode) &&
+ access(d, F_OK) < 0 && errno == ENOENT) {
+ free(d);
+ d = path_join(runtime_dir, "/systemd/inaccessible/chr");
+ if (!d)
+ return -ENOMEM;
+ }
+
+ if (IN_SET(mode & S_IFMT, S_IFBLK, S_IFCHR) &&
+ access(d, F_OK) < 0 && errno == ENOENT) {
+ free(d);
+ d = path_join(runtime_dir, "/systemd/inaccessible/sock");
+ if (!d)
+ return -ENOMEM;
+ }
+
+ *ret = TAKE_PTR(d);
+ return 0;
+}
+
+#define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
+static char* mount_flags_to_string(long unsigned flags) {
+ char *x;
+ _cleanup_free_ char *y = NULL;
+ long unsigned overflow;
+
+ overflow = flags & ~(MS_RDONLY |
+ MS_NOSUID |
+ MS_NODEV |
+ MS_NOEXEC |
+ MS_SYNCHRONOUS |
+ MS_REMOUNT |
+ MS_MANDLOCK |
+ MS_DIRSYNC |
+ MS_NOATIME |
+ MS_NODIRATIME |
+ MS_BIND |
+ MS_MOVE |
+ MS_REC |
+ MS_SILENT |
+ MS_POSIXACL |
+ MS_UNBINDABLE |
+ MS_PRIVATE |
+ MS_SLAVE |
+ MS_SHARED |
+ MS_RELATIME |
+ MS_KERNMOUNT |
+ MS_I_VERSION |
+ MS_STRICTATIME |
+ MS_LAZYTIME);
+
+ if (flags == 0 || overflow != 0)
+ if (asprintf(&y, "%lx", overflow) < 0)
+ return NULL;
+
+ x = strjoin(FLAG(MS_RDONLY),
+ FLAG(MS_NOSUID),
+ FLAG(MS_NODEV),
+ FLAG(MS_NOEXEC),
+ FLAG(MS_SYNCHRONOUS),
+ FLAG(MS_REMOUNT),
+ FLAG(MS_MANDLOCK),
+ FLAG(MS_DIRSYNC),
+ FLAG(MS_NOATIME),
+ FLAG(MS_NODIRATIME),
+ FLAG(MS_BIND),
+ FLAG(MS_MOVE),
+ FLAG(MS_REC),
+ FLAG(MS_SILENT),
+ FLAG(MS_POSIXACL),
+ FLAG(MS_UNBINDABLE),
+ FLAG(MS_PRIVATE),
+ FLAG(MS_SLAVE),
+ FLAG(MS_SHARED),
+ FLAG(MS_RELATIME),
+ FLAG(MS_KERNMOUNT),
+ FLAG(MS_I_VERSION),
+ FLAG(MS_STRICTATIME),
+ FLAG(MS_LAZYTIME),
+ y);
+ if (!x)
+ return NULL;
+ if (!y)
+ x[strlen(x) - 1] = '\0'; /* truncate the last | */
+ return x;
+}
+
+int mount_verbose_full(
+ int error_log_level,
+ const char *what,
+ const char *where,
+ const char *type,
+ unsigned long flags,
+ const char *options,
+ bool follow_symlink) {
+
+ _cleanup_free_ char *fl = NULL, *o = NULL;
+ unsigned long f;
+ int r;
+
+ r = mount_option_mangle(options, flags, &f, &o);
+ if (r < 0)
+ return log_full_errno(error_log_level, r,
+ "Failed to mangle mount options %s: %m",
+ strempty(options));
+
+ fl = mount_flags_to_string(f);
+
+ if ((f & MS_REMOUNT) && !what && !type)
+ log_debug("Remounting %s (%s \"%s\")...",
+ where, strnull(fl), strempty(o));
+ else if (!what && !type)
+ log_debug("Mounting %s (%s \"%s\")...",
+ where, strnull(fl), strempty(o));
+ else if ((f & MS_BIND) && !type)
+ log_debug("Bind-mounting %s on %s (%s \"%s\")...",
+ what, where, strnull(fl), strempty(o));
+ else if (f & MS_MOVE)
+ log_debug("Moving mount %s → %s (%s \"%s\")...",
+ what, where, strnull(fl), strempty(o));
+ else
+ log_debug("Mounting %s (%s) on %s (%s \"%s\")...",
+ strna(what), strna(type), where, strnull(fl), strempty(o));
+
+ if (follow_symlink)
+ r = mount(what, where, type, f, o) < 0 ? -errno : 0;
+ else
+ r = mount_nofollow(what, where, type, f, o);
+ if (r < 0)
+ return log_full_errno(error_log_level, r,
+ "Failed to mount %s (type %s) on %s (%s \"%s\"): %m",
+ strna(what), strna(type), where, strnull(fl), strempty(o));
+ return 0;
+}
+
+int umount_verbose(
+ int error_log_level,
+ const char *what,
+ int flags) {
+
+ assert(what);
+
+ log_debug("Umounting %s...", what);
+
+ if (umount2(what, flags) < 0)
+ return log_full_errno(error_log_level, errno,
+ "Failed to unmount %s: %m", what);
+
+ return 0;
+}
+
+int mount_option_mangle(
+ const char *options,
+ unsigned long mount_flags,
+ unsigned long *ret_mount_flags,
+ char **ret_remaining_options) {
+
+ const struct libmnt_optmap *map;
+ _cleanup_free_ char *ret = NULL;
+ const char *p;
+ int r;
+
+ /* This extracts mount flags from the mount options, and store
+ * non-mount-flag options to '*ret_remaining_options'.
+ * E.g.,
+ * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000"
+ * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
+ * "size=1630748k,mode=700,uid=1000,gid=1000".
+ * See more examples in test-mount-utils.c.
+ *
+ * Note that if 'options' does not contain any non-mount-flag options,
+ * then '*ret_remaining_options' is set to NULL instead of empty string.
+ * Note that this does not check validity of options stored in
+ * '*ret_remaining_options'.
+ * Note that if 'options' is NULL, then this just copies 'mount_flags'
+ * to '*ret_mount_flags'. */
+
+ assert(ret_mount_flags);
+ assert(ret_remaining_options);
+
+ map = mnt_get_builtin_optmap(MNT_LINUX_MAP);
+ if (!map)
+ return -EINVAL;
+
+ p = options;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ const struct libmnt_optmap *ent;
+
+ r = extract_first_word(&p, &word, ",", EXTRACT_UNQUOTE);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ for (ent = map; ent->name; ent++) {
+ /* All entries in MNT_LINUX_MAP do not take any argument.
+ * Thus, ent->name does not contain "=" or "[=]". */
+ if (!streq(word, ent->name))
+ continue;
+
+ if (!(ent->mask & MNT_INVERT))
+ mount_flags |= ent->id;
+ else if (mount_flags & ent->id)
+ mount_flags ^= ent->id;
+
+ break;
+ }
+
+ /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
+ if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL))
+ return -ENOMEM;
+ }
+
+ *ret_mount_flags = mount_flags;
+ *ret_remaining_options = TAKE_PTR(ret);
+
+ return 0;
+}
diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h
new file mode 100644
index 0000000..6202008
--- /dev/null
+++ b/src/shared/mount-util.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <mntent.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "errno-util.h"
+#include "macro.h"
+
+/* 4MB for contents of regular files, 64k inodes for directories, symbolic links and device specials, using
+ * large storage array systems as a baseline */
+#define TMPFS_LIMITS_DEV ",size=4m,nr_inodes=64k"
+
+/* Very little, if any use expected */
+#define TMPFS_LIMITS_EMPTY_OR_ALMOST ",size=4m,nr_inodes=1k"
+#define TMPFS_LIMITS_SYS TMPFS_LIMITS_EMPTY_OR_ALMOST
+#define TMPFS_LIMITS_SYS_FS_CGROUP TMPFS_LIMITS_EMPTY_OR_ALMOST
+
+/* On an extremely small device with only 256MB of RAM, 20% of RAM should be enough for the re-execution of
+ * PID1 because 16MB of free space is required. */
+#define TMPFS_LIMITS_RUN ",size=20%,nr_inodes=800k"
+
+/* The limit used for various nested tmpfs mounts, in paricular for guests started by systemd-nspawn.
+ * 10% of RAM (using 16GB of RAM as a baseline) translates to 400k inodes (assuming 4k each) and 25%
+ * translates to 1M inodes.
+ * (On the host, /tmp is configured through a .mount unit file.) */
+#define NESTED_TMPFS_LIMITS ",size=10%,nr_inodes=400k"
+
+/* More space for volatile root and /var */
+#define TMPFS_LIMITS_VAR ",size=25%,nr_inodes=1m"
+#define TMPFS_LIMITS_ROOTFS TMPFS_LIMITS_VAR
+#define TMPFS_LIMITS_VOLATILE_STATE TMPFS_LIMITS_VAR
+
+int mount_fd(const char *source, int target_fd, const char *filesystemtype, unsigned long mountflags, const void *data);
+int mount_nofollow(const char *source, const char *target, const char *filesystemtype, unsigned long mountflags, const void *data);
+
+int repeat_unmount(const char *path, int flags);
+int umount_recursive(const char *target, int flags);
+int bind_remount_recursive(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **deny_list);
+int bind_remount_recursive_with_mountinfo(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **deny_list, FILE *proc_self_mountinfo);
+int bind_remount_one_with_mountinfo(const char *path, unsigned long new_flags, unsigned long flags_mask, FILE *proc_self_mountinfo);
+
+int mount_move_root(const char *path);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(FILE*, endmntent);
+#define _cleanup_endmntent_ _cleanup_(endmntentp)
+
+int mount_verbose_full(
+ int error_log_level,
+ const char *what,
+ const char *where,
+ const char *type,
+ unsigned long flags,
+ const char *options,
+ bool follow_symlink);
+
+static inline int mount_follow_verbose(
+ int error_log_level,
+ const char *what,
+ const char *where,
+ const char *type,
+ unsigned long flags,
+ const char *options) {
+ return mount_verbose_full(error_log_level, what, where, type, flags, options, true);
+}
+
+static inline int mount_nofollow_verbose(
+ int error_log_level,
+ const char *what,
+ const char *where,
+ const char *type,
+ unsigned long flags,
+ const char *options) {
+ return mount_verbose_full(error_log_level, what, where, type, flags, options, false);
+}
+
+int umount_verbose(
+ int error_log_level,
+ const char *where,
+ int flags);
+
+int mount_option_mangle(
+ const char *options,
+ unsigned long mount_flags,
+ unsigned long *ret_mount_flags,
+ char **ret_remaining_options);
+
+int mode_to_inaccessible_node(const char *runtime_dir, mode_t mode, char **dest);
+
+/* Useful for usage with _cleanup_(), unmounts, removes a directory and frees the pointer */
+static inline char* umount_and_rmdir_and_free(char *p) {
+ PROTECT_ERRNO;
+ (void) umount_recursive(p, 0);
+ (void) rmdir(p);
+ free(p);
+ return NULL;
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, umount_and_rmdir_and_free);
diff --git a/src/shared/netif-naming-scheme.c b/src/shared/netif-naming-scheme.c
new file mode 100644
index 0000000..df520ab
--- /dev/null
+++ b/src/shared/netif-naming-scheme.c
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "netif-naming-scheme.h"
+#include "proc-cmdline.h"
+#include "string-util.h"
+
+static const NamingScheme naming_schemes[] = {
+ { "v238", NAMING_V238 },
+ { "v239", NAMING_V239 },
+ { "v240", NAMING_V240 },
+ { "v241", NAMING_V241 },
+ { "v243", NAMING_V243 },
+ { "v245", NAMING_V245 },
+ { "v247", NAMING_V247 },
+ /* … add more schemes here, as the logic to name devices is updated … */
+};
+
+static const NamingScheme* naming_scheme_from_name(const char *name) {
+ size_t i;
+
+ if (streq(name, "latest"))
+ return naming_schemes + ELEMENTSOF(naming_schemes) - 1;
+
+ for (i = 0; i < ELEMENTSOF(naming_schemes); i++)
+ if (streq(naming_schemes[i].name, name))
+ return naming_schemes + i;
+
+ return NULL;
+}
+
+const NamingScheme* naming_scheme(void) {
+ static const NamingScheme *cache = NULL;
+ _cleanup_free_ char *buffer = NULL;
+ const char *e, *k;
+
+ if (cache)
+ return cache;
+
+ /* Acquire setting from the kernel command line */
+ (void) proc_cmdline_get_key("net.naming-scheme", 0, &buffer);
+
+ /* Also acquire it from an env var */
+ e = getenv("NET_NAMING_SCHEME");
+ if (e) {
+ if (*e == ':') {
+ /* If prefixed with ':' the kernel cmdline takes precedence */
+ k = buffer ?: e + 1;
+ } else
+ k = e; /* Otherwise the env var takes precedence */
+ } else
+ k = buffer;
+
+ if (k) {
+ cache = naming_scheme_from_name(k);
+ if (cache) {
+ log_info("Using interface naming scheme '%s'.", cache->name);
+ return cache;
+ }
+
+ log_warning("Unknown interface naming scheme '%s' requested, ignoring.", k);
+ }
+
+ cache = naming_scheme_from_name(DEFAULT_NET_NAMING_SCHEME);
+ assert(cache);
+ log_info("Using default interface naming scheme '%s'.", cache->name);
+
+ return cache;
+}
diff --git a/src/shared/netif-naming-scheme.h b/src/shared/netif-naming-scheme.h
new file mode 100644
index 0000000..503a74e
--- /dev/null
+++ b/src/shared/netif-naming-scheme.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+/* So here's the deal: net_id is supposed to be an exercise in providing stable names for network devices. However, we
+ * also want to keep updating the naming scheme used in future versions of net_id. These two goals of course are
+ * contradictory: on one hand we want things to not change and on the other hand we want them to improve. Our way out
+ * of this dilemma is to introduce the "naming scheme" concept: each time we improve the naming logic we define a new
+ * flag for it. Then, we keep a list of schemes, each identified by a name associated with the flags it implements. Via
+ * a kernel command line and environment variable we then allow the user to pick the scheme they want us to follow:
+ * installers could "freeze" the used scheme at the moment of installation this way.
+ *
+ * Developers: each time you tweak the naming logic here, define a new flag below, and condition the tweak with
+ * it. Each time we do a release we'll then add a new scheme entry and include all newly defined flags.
+ *
+ * Note that this is only half a solution to the problem though: not only udev/net_id gets updated all the time, the
+ * kernel gets too. And thus a kernel that previously didn't expose some sysfs attribute we look for might eventually
+ * do, and thus affect our naming scheme too. Thus, enforcing a naming scheme will make interfacing more stable across
+ * OS versions, but not fully stabilize them. */
+typedef enum NamingSchemeFlags {
+ /* First, the individual features */
+ NAMING_SR_IOV_V = 1 << 0, /* Use "v" suffix for SR-IOV, see 609948c7043a */
+ NAMING_NPAR_ARI = 1 << 1, /* Use NPAR "ARI", see 6bc04997b6ea */
+ NAMING_INFINIBAND = 1 << 2, /* Use "ib" prefix for infiniband, see 938d30aa98df */
+ NAMING_ZERO_ACPI_INDEX = 1 << 3, /* Use zero acpi_index field, see d81186ef4f6a */
+ NAMING_ALLOW_RERENAMES = 1 << 4, /* Allow re-renaming of devices, see #9006 */
+ NAMING_STABLE_VIRTUAL_MACS = 1 << 5, /* Use device name to generate MAC, see 6d3646406560 */
+ NAMING_NETDEVSIM = 1 << 6, /* Generate names for netdevsim devices, see eaa9d507d855 */
+ NAMING_LABEL_NOPREFIX = 1 << 7, /* Don't prepend ID_NET_LABEL_ONBOARD with interface type prefix */
+ NAMING_NSPAWN_LONG_HASH = 1 << 8, /* Shorten nspawn interfaces by including 24bit hash, instead of simple truncation */
+ NAMING_BRIDGE_NO_SLOT = 1 << 9, /* Don't use PCI hotplug slot information if the corresponding device is a PCI bridge */
+
+ /* And now the masks that combine the features above */
+ NAMING_V238 = 0,
+ NAMING_V239 = NAMING_V238 | NAMING_SR_IOV_V | NAMING_NPAR_ARI,
+ NAMING_V240 = NAMING_V239 | NAMING_INFINIBAND | NAMING_ZERO_ACPI_INDEX | NAMING_ALLOW_RERENAMES,
+ NAMING_V241 = NAMING_V240 | NAMING_STABLE_VIRTUAL_MACS,
+ NAMING_V243 = NAMING_V241 | NAMING_NETDEVSIM | NAMING_LABEL_NOPREFIX,
+ NAMING_V245 = NAMING_V243 | NAMING_NSPAWN_LONG_HASH,
+ NAMING_V247 = NAMING_V245 | NAMING_BRIDGE_NO_SLOT,
+
+ _NAMING_SCHEME_FLAGS_INVALID = -1,
+} NamingSchemeFlags;
+
+typedef struct NamingScheme {
+ const char *name;
+ NamingSchemeFlags flags;
+} NamingScheme;
+
+const NamingScheme* naming_scheme(void);
+
+static inline bool naming_scheme_has(NamingSchemeFlags flags) {
+ return FLAGS_SET(naming_scheme()->flags, flags);
+}
diff --git a/src/shared/nscd-flush.c b/src/shared/nscd-flush.c
new file mode 100644
index 0000000..dfc47c4
--- /dev/null
+++ b/src/shared/nscd-flush.c
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#include <sys/poll.h>
+
+#include "fd-util.h"
+#include "io-util.h"
+#include "nscd-flush.h"
+#include "socket-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+#define NSCD_FLUSH_CACHE_TIMEOUT_USEC (5*USEC_PER_SEC)
+
+struct nscdInvalidateRequest {
+ int32_t version;
+ int32_t type; /* in glibc this is an enum. We don't replicate this here 1:1. Also, wtf, how unportable is that
+ * even? */
+ int32_t key_len;
+ char dbname[];
+};
+
+static const union sockaddr_union nscd_sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/nscd/socket",
+};
+
+static int nscd_flush_cache_one(const char *database, usec_t end) {
+ size_t req_size, has_written = 0, has_read = 0, l;
+ struct nscdInvalidateRequest *req;
+ _cleanup_close_ int fd = -1;
+ int32_t resp;
+ int events;
+
+ assert(database);
+
+ l = strlen(database);
+ req_size = offsetof(struct nscdInvalidateRequest, dbname) + l + 1;
+
+ req = alloca(req_size);
+ *req = (struct nscdInvalidateRequest) {
+ .version = 2,
+ .type = 10,
+ .key_len = l + 1,
+ };
+
+ strcpy(req->dbname, database);
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return log_debug_errno(errno, "Failed to allocate nscd socket: %m");
+
+ /* Note: connect() returns EINPROGRESS if O_NONBLOCK is set and establishing a connection takes time. The
+ * kernel lets us know this way that the connection is now being established, and we should watch with poll()
+ * to learn when it is fully established. That said, AF_UNIX on Linux never triggers this IRL (connect() is
+ * always instant on AF_UNIX), hence handling this is mostly just an exercise in defensive, protocol-agnostic
+ * programming.
+ *
+ * connect() returns EAGAIN if the socket's backlog limit has been reached. When we see this we give up right
+ * away, after all this entire function here is written in a defensive style so that a non-responding nscd
+ * doesn't stall us for good. (Even if we wanted to handle this better: the Linux kernel doesn't really have a
+ * nice way to connect() to a server synchronously with a time limit that would also cover dealing with the
+ * backlog limit. After all SO_RCVTIMEO and SR_SNDTIMEO don't apply to connect(), and alarm() is frickin' ugly
+ * and not really reasonably usable from threads-aware code.) */
+ if (connect(fd, &nscd_sa.sa, SOCKADDR_UN_LEN(nscd_sa.un)) < 0) {
+ if (errno == EAGAIN)
+ return log_debug_errno(errno, "nscd is overloaded (backlog limit reached) and refuses to take further connections: %m");
+ if (errno != EINPROGRESS)
+ return log_debug_errno(errno, "Failed to connect to nscd socket: %m");
+
+ /* Continue in case of EINPROGRESS, but don't bother with send() or recv() until being notified that
+ * establishing the connection is complete. */
+ events = 0;
+ } else
+ events = POLLIN|POLLOUT; /* Let's assume initially that we can write and read to the fd, to suppress
+ * one poll() invocation */
+ for (;;) {
+ usec_t p;
+
+ if (events & POLLOUT) {
+ ssize_t m;
+
+ assert(has_written < req_size);
+
+ m = send(fd, (uint8_t*) req + has_written, req_size - has_written, MSG_NOSIGNAL);
+ if (m < 0) {
+ if (errno != EAGAIN) /* Note that EAGAIN is returned by the kernel whenever it can't
+ * take the data right now, and that includes if the connect() is
+ * asynchronous and we saw EINPROGRESS on it, and it hasn't
+ * completed yet. */
+ return log_debug_errno(errno, "Failed to write to nscd socket: %m");
+ } else
+ has_written += m;
+ }
+
+ if (events & (POLLIN|POLLERR|POLLHUP)) {
+ ssize_t m;
+
+ if (has_read >= sizeof(resp))
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Response from nscd longer than expected: %m");
+
+ m = recv(fd, (uint8_t*) &resp + has_read, sizeof(resp) - has_read, 0);
+ if (m < 0) {
+ if (errno != EAGAIN)
+ return log_debug_errno(errno, "Failed to read from nscd socket: %m");
+ } else if (m == 0) { /* EOF */
+ if (has_read == 0 && has_written >= req_size) /* Older nscd immediately terminated the
+ * connection, accept that as OK */
+ return 1;
+
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "nscd prematurely ended connection.");
+ } else
+ has_read += m;
+ }
+
+ if (has_written >= req_size && has_read >= sizeof(resp)) { /* done? */
+ if (resp < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "nscd sent us a negative error number: %i", resp);
+ if (resp > 0)
+ return log_debug_errno(resp, "nscd return failure code on invalidating '%s'.", database);
+ return 1;
+ }
+
+ p = now(CLOCK_MONOTONIC);
+ if (p >= end)
+ return -ETIMEDOUT;
+
+ events = fd_wait_for_event(fd, POLLIN | (has_written < req_size ? POLLOUT : 0), end - p);
+ if (events < 0)
+ return events;
+ }
+}
+
+int nscd_flush_cache(char **databases) {
+ usec_t end;
+ int r = 0;
+ char **i;
+
+ /* Tries to invalidate the specified database in nscd. We do this carefully, with a 5s timeout, so that we
+ * don't block indefinitely on another service. */
+
+ end = usec_add(now(CLOCK_MONOTONIC), NSCD_FLUSH_CACHE_TIMEOUT_USEC);
+
+ STRV_FOREACH(i, databases) {
+ int k;
+
+ k = nscd_flush_cache_one(*i, end);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ return r;
+}
diff --git a/src/shared/nscd-flush.h b/src/shared/nscd-flush.h
new file mode 100644
index 0000000..5aafa9a
--- /dev/null
+++ b/src/shared/nscd-flush.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int nscd_flush_cache(char **databases);
diff --git a/src/shared/nsflags.c b/src/shared/nsflags.c
new file mode 100644
index 0000000..2845041
--- /dev/null
+++ b/src/shared/nsflags.c
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "nsflags.h"
+#include "string-util.h"
+
+const struct namespace_flag_map namespace_flag_map[] = {
+ { CLONE_NEWCGROUP, "cgroup" },
+ { CLONE_NEWIPC, "ipc" },
+ { CLONE_NEWNET, "net" },
+ /* So, the mount namespace flag is called CLONE_NEWNS for historical reasons. Let's expose it here under a more
+ * explanatory name: "mnt". This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */
+ { CLONE_NEWNS, "mnt" },
+ { CLONE_NEWPID, "pid" },
+ { CLONE_NEWUSER, "user" },
+ { CLONE_NEWUTS, "uts" },
+ {}
+};
+
+int namespace_flags_from_string(const char *name, unsigned long *ret) {
+ unsigned long flags = 0;
+ int r;
+
+ assert_se(ret);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ unsigned long f = 0;
+ unsigned i;
+
+ r = extract_first_word(&name, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ for (i = 0; namespace_flag_map[i].name; i++)
+ if (streq(word, namespace_flag_map[i].name)) {
+ f = namespace_flag_map[i].flag;
+ break;
+ }
+
+ if (f == 0)
+ return -EINVAL;
+
+ flags |= f;
+ }
+
+ *ret = flags;
+ return 0;
+}
+
+int namespace_flags_to_string(unsigned long flags, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ unsigned i;
+
+ for (i = 0; namespace_flag_map[i].name; i++) {
+ if ((flags & namespace_flag_map[i].flag) != namespace_flag_map[i].flag)
+ continue;
+
+ if (!strextend_with_separator(&s, " ", namespace_flag_map[i].name, NULL))
+ return -ENOMEM;
+ }
+
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
diff --git a/src/shared/nsflags.h b/src/shared/nsflags.h
new file mode 100644
index 0000000..3d774c7
--- /dev/null
+++ b/src/shared/nsflags.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "missing_sched.h"
+
+/* The combination of all namespace flags defined by the kernel. The right type for this isn't clear. setns() and
+ * unshare() expect these flags to be passed as (signed) "int", while clone() wants them as "unsigned long". The latter
+ * is definitely more appropriate for a flags parameter, and also the larger type of the two, hence let's stick to that
+ * here. */
+#define NAMESPACE_FLAGS_ALL \
+ ((unsigned long) (CLONE_NEWCGROUP| \
+ CLONE_NEWIPC| \
+ CLONE_NEWNET| \
+ CLONE_NEWNS| \
+ CLONE_NEWPID| \
+ CLONE_NEWUSER| \
+ CLONE_NEWUTS))
+
+#define NAMESPACE_FLAGS_INITIAL ((unsigned long) -1)
+
+int namespace_flags_from_string(const char *name, unsigned long *ret);
+int namespace_flags_to_string(unsigned long flags, char **ret);
+
+struct namespace_flag_map {
+ unsigned long flag;
+ const char *name;
+};
+
+extern const struct namespace_flag_map namespace_flag_map[];
diff --git a/src/shared/numa-util.c b/src/shared/numa-util.c
new file mode 100644
index 0000000..7e41d68
--- /dev/null
+++ b/src/shared/numa-util.c
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sched.h>
+
+#include "alloc-util.h"
+#include "cpu-set-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "missing_syscall.h"
+#include "numa-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+
+bool numa_policy_is_valid(const NUMAPolicy *policy) {
+ assert(policy);
+
+ if (!mpol_is_valid(numa_policy_get_type(policy)))
+ return false;
+
+ if (!policy->nodes.set &&
+ !IN_SET(numa_policy_get_type(policy), MPOL_DEFAULT, MPOL_LOCAL, MPOL_PREFERRED))
+ return false;
+
+ if (policy->nodes.set &&
+ numa_policy_get_type(policy) == MPOL_PREFERRED &&
+ CPU_COUNT_S(policy->nodes.allocated, policy->nodes.set) != 1)
+ return false;
+
+ return true;
+}
+
+static int numa_policy_to_mempolicy(const NUMAPolicy *policy, unsigned long *ret_maxnode, unsigned long **ret_nodes) {
+ unsigned node, bits = 0, ulong_bits;
+ _cleanup_free_ unsigned long *out = NULL;
+
+ assert(policy);
+ assert(ret_maxnode);
+ assert(ret_nodes);
+
+ if (IN_SET(numa_policy_get_type(policy), MPOL_DEFAULT, MPOL_LOCAL) ||
+ (numa_policy_get_type(policy) == MPOL_PREFERRED && !policy->nodes.set)) {
+ *ret_nodes = NULL;
+ *ret_maxnode = 0;
+ return 0;
+ }
+
+ bits = policy->nodes.allocated * 8;
+ ulong_bits = sizeof(unsigned long) * 8;
+
+ out = new0(unsigned long, DIV_ROUND_UP(policy->nodes.allocated, sizeof(unsigned long)));
+ if (!out)
+ return -ENOMEM;
+
+ /* We don't make any assumptions about internal type libc is using to store NUMA node mask.
+ Hence we need to convert the node mask to the representation expected by set_mempolicy() */
+ for (node = 0; node < bits; node++)
+ if (CPU_ISSET_S(node, policy->nodes.allocated, policy->nodes.set))
+ out[node / ulong_bits] |= 1ul << (node % ulong_bits);
+
+ *ret_nodes = TAKE_PTR(out);
+ *ret_maxnode = bits + 1;
+ return 0;
+}
+
+int apply_numa_policy(const NUMAPolicy *policy) {
+ int r;
+ _cleanup_free_ unsigned long *nodes = NULL;
+ unsigned long maxnode;
+
+ assert(policy);
+
+ if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && errno == ENOSYS)
+ return -EOPNOTSUPP;
+
+ if (!numa_policy_is_valid(policy))
+ return -EINVAL;
+
+ r = numa_policy_to_mempolicy(policy, &maxnode, &nodes);
+ if (r < 0)
+ return r;
+
+ r = set_mempolicy(numa_policy_get_type(policy), nodes, maxnode);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int numa_to_cpu_set(const NUMAPolicy *policy, CPUSet *ret) {
+ int r;
+ size_t i;
+ _cleanup_(cpu_set_reset) CPUSet s = {};
+
+ assert(policy);
+ assert(ret);
+
+ for (i = 0; i < policy->nodes.allocated * 8; i++) {
+ _cleanup_free_ char *l = NULL;
+ char p[STRLEN("/sys/devices/system/node/node//cpulist") + DECIMAL_STR_MAX(size_t) + 1];
+ _cleanup_(cpu_set_reset) CPUSet part = {};
+
+ if (!CPU_ISSET_S(i, policy->nodes.allocated, policy->nodes.set))
+ continue;
+
+ xsprintf(p, "/sys/devices/system/node/node%zu/cpulist", i);
+
+ r = read_one_line_file(p, &l);
+ if (r < 0)
+ return r;
+
+ r = parse_cpu_set(l, &part);
+ if (r < 0)
+ return r;
+
+ r = cpu_set_add_all(&s, &part);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = s;
+ s = (CPUSet) {};
+
+ return 0;
+}
+
+static int numa_max_node(void) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r, max_node = 0;
+
+ d = opendir("/sys/devices/system/node");
+ if (!d)
+ return -errno;
+
+ FOREACH_DIRENT(de, d, break) {
+ int node;
+ const char *n;
+
+ (void) dirent_ensure_type(d, de);
+
+ if (de->d_type != DT_DIR)
+ continue;
+
+ n = startswith(de->d_name, "node");
+ if (!n)
+ continue;
+
+ r = safe_atoi(n, &node);
+ if (r < 0)
+ continue;
+
+ if (node > max_node)
+ max_node = node;
+ }
+
+ return max_node;
+}
+
+int numa_mask_add_all(CPUSet *mask) {
+ int m;
+
+ assert(mask);
+
+ m = numa_max_node();
+ if (m < 0) {
+ log_debug_errno(m, "Failed to determine maximum NUMA node index, assuming 1023: %m");
+ m = 1023; /* CONFIG_NODES_SHIFT is set to 10 on x86_64, i.e. 1024 NUMA nodes in total */
+ }
+
+ for (int i = 0; i <= m; i++) {
+ int r;
+
+ r = cpu_set_add(mask, i);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static const char* const mpol_table[] = {
+ [MPOL_DEFAULT] = "default",
+ [MPOL_PREFERRED] = "preferred",
+ [MPOL_BIND] = "bind",
+ [MPOL_INTERLEAVE] = "interleave",
+ [MPOL_LOCAL] = "local",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(mpol, int);
diff --git a/src/shared/numa-util.h b/src/shared/numa-util.h
new file mode 100644
index 0000000..2f736c9
--- /dev/null
+++ b/src/shared/numa-util.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "cpu-set-util.h"
+#include "missing_syscall.h"
+
+static inline bool mpol_is_valid(int t) {
+ return t >= MPOL_DEFAULT && t <= MPOL_LOCAL;
+}
+
+typedef struct NUMAPolicy {
+ /* Always use numa_policy_get_type() to read the value */
+ int type;
+ CPUSet nodes;
+} NUMAPolicy;
+
+bool numa_policy_is_valid(const NUMAPolicy *p);
+
+static inline int numa_policy_get_type(const NUMAPolicy *p) {
+ return p->type < 0 ? (p->nodes.set ? MPOL_PREFERRED : -1) : p->type;
+}
+
+static inline void numa_policy_reset(NUMAPolicy *p) {
+ assert(p);
+ cpu_set_reset(&p->nodes);
+ p->type = -1;
+}
+
+int apply_numa_policy(const NUMAPolicy *policy);
+int numa_to_cpu_set(const NUMAPolicy *policy, CPUSet *set);
+
+int numa_mask_add_all(CPUSet *mask);
+
+const char* mpol_to_string(int i) _const_;
+int mpol_from_string(const char *s) _pure_;
diff --git a/src/shared/offline-passwd.c b/src/shared/offline-passwd.c
new file mode 100644
index 0000000..b607aac
--- /dev/null
+++ b/src/shared/offline-passwd.c
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "fd-util.h"
+#include "fs-util.h"
+#include "offline-passwd.h"
+#include "path-util.h"
+#include "user-util.h"
+
+DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(uid_gid_hash_ops, char, string_hash_func, string_compare_func, free);
+
+static int open_passwd_file(const char *root, const char *fname, FILE **ret_file) {
+ _cleanup_free_ char *p = NULL;
+ _cleanup_close_ int fd = -1;
+
+ fd = chase_symlinks_and_open(fname, root, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC, &p);
+ if (fd < 0)
+ return fd;
+
+ FILE *f = fdopen(fd, "r");
+ if (!f)
+ return -errno;
+
+ TAKE_FD(fd);
+
+ log_debug("Reading %s entries from %s...", basename(fname), p);
+
+ *ret_file = f;
+ return 0;
+}
+
+static int populate_uid_cache(const char *root, Hashmap **ret) {
+ _cleanup_(hashmap_freep) Hashmap *cache = NULL;
+ int r;
+
+ cache = hashmap_new(&uid_gid_hash_ops);
+ if (!cache)
+ return -ENOMEM;
+
+ /* The directory list is hardcoded here: /etc is the standard, and rpm-ostree uses /usr/lib. This
+ * could be made configurable, but I don't see the point right now. */
+
+ const char *fname;
+ FOREACH_STRING(fname, "/etc/passwd", "/usr/lib/passwd") {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ r = open_passwd_file(root, fname, &f);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return r;
+
+ struct passwd *pw;
+ while ((r = fgetpwent_sane(f, &pw)) > 0) {
+ _cleanup_free_ char *n = NULL;
+
+ n = strdup(pw->pw_name);
+ if (!n)
+ return -ENOMEM;
+
+ r = hashmap_put(cache, n, UID_TO_PTR(pw->pw_uid));
+ if (IN_SET(r, 0 -EEXIST))
+ continue;
+ if (r < 0)
+ return r;
+ TAKE_PTR(n);
+ }
+ }
+
+ *ret = TAKE_PTR(cache);
+ return 0;
+}
+
+static int populate_gid_cache(const char *root, Hashmap **ret) {
+ _cleanup_(hashmap_freep) Hashmap *cache = NULL;
+ int r;
+
+ cache = hashmap_new(&uid_gid_hash_ops);
+ if (!cache)
+ return -ENOMEM;
+
+ const char *fname;
+ FOREACH_STRING(fname, "/etc/group", "/usr/lib/group") {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ r = open_passwd_file(root, fname, &f);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return r;
+
+ struct group *gr;
+ while ((r = fgetgrent_sane(f, &gr)) > 0) {
+ _cleanup_free_ char *n = NULL;
+
+ n = strdup(gr->gr_name);
+ if (!n)
+ return -ENOMEM;
+
+ r = hashmap_put(cache, n, GID_TO_PTR(gr->gr_gid));
+ if (IN_SET(r, 0, -EEXIST))
+ continue;
+ if (r < 0)
+ return r;
+ TAKE_PTR(n);
+ }
+ }
+
+ *ret = TAKE_PTR(cache);
+ return 0;
+}
+
+int name_to_uid_offline(
+ const char *root,
+ const char *user,
+ uid_t *ret_uid,
+ Hashmap **cache) {
+
+ void *found;
+ int r;
+
+ assert(user);
+ assert(ret_uid);
+ assert(cache);
+
+ if (!*cache) {
+ r = populate_uid_cache(root, cache);
+ if (r < 0)
+ return r;
+ }
+
+ found = hashmap_get(*cache, user);
+ if (!found)
+ return -ESRCH;
+
+ *ret_uid = PTR_TO_UID(found);
+ return 0;
+}
+
+int name_to_gid_offline(
+ const char *root,
+ const char *group,
+ gid_t *ret_gid,
+ Hashmap **cache) {
+
+ void *found;
+ int r;
+
+ assert(group);
+ assert(ret_gid);
+ assert(cache);
+
+ if (!*cache) {
+ r = populate_gid_cache(root, cache);
+ if (r < 0)
+ return r;
+ }
+
+ found = hashmap_get(*cache, group);
+ if (!found)
+ return -ESRCH;
+
+ *ret_gid = PTR_TO_GID(found);
+ return 0;
+}
diff --git a/src/shared/offline-passwd.h b/src/shared/offline-passwd.h
new file mode 100644
index 0000000..587af7b
--- /dev/null
+++ b/src/shared/offline-passwd.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+#include "hashmap.h"
+
+int name_to_uid_offline(const char *root, const char *user, uid_t *ret_uid, Hashmap **cache);
+int name_to_gid_offline(const char *root, const char *group, gid_t *ret_gid, Hashmap **cache);
diff --git a/src/shared/openssl-util.h b/src/shared/openssl-util.h
new file mode 100644
index 0000000..1b49834
--- /dev/null
+++ b/src/shared/openssl-util.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#if HAVE_OPENSSL
+# include <openssl/pem.h>
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(X509*, X509_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(X509_NAME*, X509_NAME_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(EVP_PKEY_CTX*, EVP_PKEY_CTX_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(EVP_CIPHER_CTX*, EVP_CIPHER_CTX_free);
+
+#endif
diff --git a/src/shared/os-util.c b/src/shared/os-util.c
new file mode 100644
index 0000000..3b7e495
--- /dev/null
+++ b/src/shared/os-util.c
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "os-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+int path_is_os_tree(const char *path) {
+ int r;
+
+ assert(path);
+
+ /* Does the path exist at all? If not, generate an error immediately. This is useful so that a missing root dir
+ * always results in -ENOENT, and we can properly distinguish the case where the whole root doesn't exist from
+ * the case where just the os-release file is missing. */
+ if (laccess(path, F_OK) < 0)
+ return -errno;
+
+ /* We use {/etc|/usr/lib}/os-release as flag file if something is an OS */
+ r = open_os_release(path, NULL, NULL);
+ if (r == -ENOENT) /* We got nothing */
+ return 0;
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int open_os_release(const char *root, char **ret_path, int *ret_fd) {
+ _cleanup_free_ char *q = NULL;
+ const char *p;
+ int r, fd;
+
+ FOREACH_STRING(p, "/etc/os-release", "/usr/lib/os-release") {
+ r = chase_symlinks(p, root, CHASE_PREFIX_ROOT,
+ ret_path ? &q : NULL,
+ ret_fd ? &fd : NULL);
+ if (r != -ENOENT)
+ break;
+ }
+ if (r < 0)
+ return r;
+
+ if (ret_fd) {
+ int real_fd;
+
+ /* Convert the O_PATH fd into a proper, readable one */
+ real_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ safe_close(fd);
+ if (real_fd < 0)
+ return real_fd;
+
+ *ret_fd = real_fd;
+ }
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(q);
+
+ return 0;
+}
+
+int fopen_os_release(const char *root, char **ret_path, FILE **ret_file) {
+ _cleanup_free_ char *p = NULL;
+ _cleanup_close_ int fd = -1;
+ FILE *f;
+ int r;
+
+ if (!ret_file)
+ return open_os_release(root, ret_path, NULL);
+
+ r = open_os_release(root, ret_path ? &p : NULL, &fd);
+ if (r < 0)
+ return r;
+
+ f = take_fdopen(&fd, "r");
+ if (!f)
+ return -errno;
+
+ *ret_file = f;
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(p);
+
+ return 0;
+}
+
+int parse_os_release(const char *root, ...) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ va_list ap;
+ int r;
+
+ r = fopen_os_release(root, &p, &f);
+ if (r < 0)
+ return r;
+
+ va_start(ap, root);
+ r = parse_env_filev(f, p, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int load_os_release_pairs(const char *root, char ***ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ r = fopen_os_release(root, &p, &f);
+ if (r < 0)
+ return r;
+
+ return load_env_file_pairs(f, p, ret);
+}
+
+int load_os_release_pairs_with_prefix(const char *root, const char *prefix, char ***ret) {
+ _cleanup_strv_free_ char **os_release_pairs = NULL, **os_release_pairs_prefixed = NULL;
+ char **p, **q;
+ int r;
+
+ r = load_os_release_pairs(root, &os_release_pairs);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH_PAIR(p, q, os_release_pairs) {
+ char *line;
+
+ /* We strictly return only the four main ID fields and ignore the rest */
+ if (!STR_IN_SET(*p, "ID", "VERSION_ID", "BUILD_ID", "VARIANT_ID"))
+ continue;
+
+ ascii_strlower(*p);
+ line = strjoin(prefix, *p, "=", *q);
+ if (!line)
+ return -ENOMEM;
+ r = strv_consume(&os_release_pairs_prefixed, line);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(os_release_pairs_prefixed);
+
+ return 0;
+}
diff --git a/src/shared/os-util.h b/src/shared/os-util.h
new file mode 100644
index 0000000..1d9b0b1
--- /dev/null
+++ b/src/shared/os-util.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdio.h>
+
+int path_is_os_tree(const char *path);
+
+int open_os_release(const char *root, char **ret_path, int *ret_fd);
+int fopen_os_release(const char *root, char **ret_path, FILE **ret_file);
+
+int parse_os_release(const char *root, ...) _sentinel_;
+int load_os_release_pairs(const char *root, char ***ret);
+int load_os_release_pairs_with_prefix(const char *root, const char *prefix, char ***ret);
diff --git a/src/shared/output-mode.c b/src/shared/output-mode.c
new file mode 100644
index 0000000..1645b75
--- /dev/null
+++ b/src/shared/output-mode.c
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "output-mode.h"
+#include "string-table.h"
+
+JsonFormatFlags output_mode_to_json_format_flags(OutputMode m) {
+
+ switch (m) {
+
+ case OUTPUT_JSON_SSE:
+ return JSON_FORMAT_SSE;
+
+ case OUTPUT_JSON_SEQ:
+ return JSON_FORMAT_SEQ;
+
+ case OUTPUT_JSON_PRETTY:
+ return JSON_FORMAT_PRETTY;
+
+ default:
+ return JSON_FORMAT_NEWLINE;
+ }
+}
+
+static const char *const output_mode_table[_OUTPUT_MODE_MAX] = {
+ [OUTPUT_SHORT] = "short",
+ [OUTPUT_SHORT_FULL] = "short-full",
+ [OUTPUT_SHORT_ISO] = "short-iso",
+ [OUTPUT_SHORT_ISO_PRECISE] = "short-iso-precise",
+ [OUTPUT_SHORT_PRECISE] = "short-precise",
+ [OUTPUT_SHORT_MONOTONIC] = "short-monotonic",
+ [OUTPUT_SHORT_UNIX] = "short-unix",
+ [OUTPUT_VERBOSE] = "verbose",
+ [OUTPUT_EXPORT] = "export",
+ [OUTPUT_JSON] = "json",
+ [OUTPUT_JSON_PRETTY] = "json-pretty",
+ [OUTPUT_JSON_SSE] = "json-sse",
+ [OUTPUT_JSON_SEQ] = "json-seq",
+ [OUTPUT_CAT] = "cat",
+ [OUTPUT_WITH_UNIT] = "with-unit",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(output_mode, OutputMode);
diff --git a/src/shared/output-mode.h b/src/shared/output-mode.h
new file mode 100644
index 0000000..a879054
--- /dev/null
+++ b/src/shared/output-mode.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "json.h"
+#include "macro.h"
+
+typedef enum OutputMode {
+ OUTPUT_SHORT,
+ OUTPUT_SHORT_FULL,
+ OUTPUT_SHORT_ISO,
+ OUTPUT_SHORT_ISO_PRECISE,
+ OUTPUT_SHORT_PRECISE,
+ OUTPUT_SHORT_MONOTONIC,
+ OUTPUT_SHORT_UNIX,
+ OUTPUT_VERBOSE,
+ OUTPUT_EXPORT,
+ OUTPUT_JSON,
+ OUTPUT_JSON_PRETTY,
+ OUTPUT_JSON_SSE,
+ OUTPUT_JSON_SEQ,
+ OUTPUT_CAT,
+ OUTPUT_WITH_UNIT,
+ _OUTPUT_MODE_MAX,
+ _OUTPUT_MODE_INVALID = -1
+} OutputMode;
+
+static inline bool OUTPUT_MODE_IS_JSON(OutputMode m) {
+ return IN_SET(m, OUTPUT_JSON, OUTPUT_JSON_PRETTY, OUTPUT_JSON_SSE, OUTPUT_JSON_SEQ);
+}
+
+/* The output flags definitions are shared by the logs and process tree output. Some apply to both, some only to the
+ * logs output, others only to the process tree output. */
+
+typedef enum OutputFlags {
+ OUTPUT_SHOW_ALL = 1 << 0,
+ OUTPUT_WARN_CUTOFF = 1 << 1,
+ OUTPUT_FULL_WIDTH = 1 << 2,
+ OUTPUT_COLOR = 1 << 3,
+ OUTPUT_CATALOG = 1 << 4,
+ OUTPUT_BEGIN_NEWLINE = 1 << 5,
+ OUTPUT_UTC = 1 << 6,
+ OUTPUT_KERNEL_THREADS = 1 << 7,
+ OUTPUT_NO_HOSTNAME = 1 << 8,
+} OutputFlags;
+
+JsonFormatFlags output_mode_to_json_format_flags(OutputMode m);
+
+const char* output_mode_to_string(OutputMode m) _const_;
+OutputMode output_mode_from_string(const char *s) _pure_;
diff --git a/src/shared/pager.c b/src/shared/pager.c
new file mode 100644
index 0000000..f689d9f
--- /dev/null
+++ b/src/shared/pager.c
@@ -0,0 +1,331 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "sd-login.h"
+
+#include "copy.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "io-util.h"
+#include "locale-util.h"
+#include "log.h"
+#include "macro.h"
+#include "pager.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static pid_t pager_pid = 0;
+
+static int stored_stdout = -1;
+static int stored_stderr = -1;
+static bool stdout_redirected = false;
+static bool stderr_redirected = false;
+
+_noreturn_ static void pager_fallback(void) {
+ int r;
+
+ r = copy_bytes(STDIN_FILENO, STDOUT_FILENO, (uint64_t) -1, 0);
+ if (r < 0) {
+ log_error_errno(r, "Internal pager failed: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+}
+
+static int no_quit_on_interrupt(int exe_name_fd, const char *less_opts) {
+ _cleanup_fclose_ FILE *file = NULL;
+ _cleanup_free_ char *line = NULL;
+ int r;
+
+ assert(exe_name_fd >= 0);
+ assert(less_opts);
+
+ /* This takes ownership of exe_name_fd */
+ file = fdopen(exe_name_fd, "r");
+ if (!file) {
+ safe_close(exe_name_fd);
+ return log_error_errno(errno, "Failed to create FILE object: %m");
+ }
+
+ /* Find the last line */
+ for (;;) {
+ _cleanup_free_ char *t = NULL;
+
+ r = read_line(file, LONG_LINE_MAX, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read from socket: %m");
+ if (r == 0)
+ break;
+
+ free_and_replace(line, t);
+ }
+
+ /* We only treat "less" specially.
+ * Return true whenever option K is *not* set. */
+ r = streq_ptr(line, "less") && !strchr(less_opts, 'K');
+
+ log_debug("Pager executable is \"%s\", options \"%s\", quit_on_interrupt: %s",
+ strnull(line), less_opts, yes_no(!r));
+ return r;
+}
+
+int pager_open(PagerFlags flags) {
+ _cleanup_close_pair_ int fd[2] = { -1, -1 }, exe_name_pipe[2] = { -1, -1 };
+ _cleanup_strv_free_ char **pager_args = NULL;
+ const char *pager, *less_opts;
+ int r;
+
+ if (flags & PAGER_DISABLE)
+ return 0;
+
+ if (pager_pid > 0)
+ return 1;
+
+ if (terminal_is_dumb())
+ return 0;
+
+ if (!is_main_thread())
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Pager invoked from wrong thread.");
+
+ pager = getenv("SYSTEMD_PAGER");
+ if (!pager)
+ pager = getenv("PAGER");
+
+ if (pager) {
+ pager_args = strv_split(pager, WHITESPACE);
+ if (!pager_args)
+ return log_oom();
+
+ /* If the pager is explicitly turned off, honour it */
+ if (strv_isempty(pager_args) || strv_equal(pager_args, STRV_MAKE("cat")))
+ return 0;
+ }
+
+ /* Determine and cache number of columns/lines before we spawn the pager so that we get the value from the
+ * actual tty */
+ (void) columns();
+ (void) lines();
+
+ if (pipe2(fd, O_CLOEXEC) < 0)
+ return log_error_errno(errno, "Failed to create pager pipe: %m");
+
+ /* This is a pipe to feed the name of the executed pager binary into the parent */
+ if (pipe2(exe_name_pipe, O_CLOEXEC) < 0)
+ return log_error_errno(errno, "Failed to create exe_name pipe: %m");
+
+ /* Initialize a good set of less options */
+ less_opts = getenv("SYSTEMD_LESS");
+ if (!less_opts)
+ less_opts = "FRSXMK";
+ if (flags & PAGER_JUMP_TO_END)
+ less_opts = strjoina(less_opts, " +G");
+
+ /* We set SIGINT as PR_DEATHSIG signal here, to match the "K" parameter we set in $LESS, which enables SIGINT behaviour. */
+ r = safe_fork("(pager)", FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGINT|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pager_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ const char *less_charset, *exe;
+
+ /* In the child start the pager */
+
+ if (dup2(fd[0], STDIN_FILENO) < 0) {
+ log_error_errno(errno, "Failed to duplicate file descriptor to STDIN: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ safe_close_pair(fd);
+
+ if (setenv("LESS", less_opts, 1) < 0) {
+ log_error_errno(errno, "Failed to set environment variable LESS: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ /* Initialize a good charset for less. This is particularly important if we output UTF-8
+ * characters. */
+ less_charset = getenv("SYSTEMD_LESSCHARSET");
+ if (!less_charset && is_locale_utf8())
+ less_charset = "utf-8";
+ if (less_charset &&
+ setenv("LESSCHARSET", less_charset, 1) < 0) {
+ log_error_errno(errno, "Failed to set environment variable LESSCHARSET: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ /* People might invoke us from sudo, don't needlessly allow less to be a way to shell out
+ * privileged stuff. If the user set $SYSTEMD_PAGERSECURE, trust their configuration of the
+ * pager. If they didn't, use secure mode when under euid is changed. If $SYSTEMD_PAGERSECURE
+ * wasn't explicitly set, and we autodetect the need for secure mode, only use the pager we
+ * know to be good. */
+ int use_secure_mode = getenv_bool_secure("SYSTEMD_PAGERSECURE");
+ bool trust_pager = use_secure_mode >= 0;
+ if (use_secure_mode == -ENXIO) {
+ uid_t uid;
+
+ r = sd_pid_get_owner_uid(0, &uid);
+ if (r < 0)
+ log_debug_errno(r, "sd_pid_get_owner_uid() failed, enabling pager secure mode: %m");
+
+ use_secure_mode = r < 0 || uid != geteuid();
+
+ } else if (use_secure_mode < 0) {
+ log_warning_errno(use_secure_mode, "Unable to parse $SYSTEMD_PAGERSECURE, assuming true: %m");
+ use_secure_mode = true;
+ }
+
+ /* We generally always set variables used by less, even if we end up using a different pager.
+ * They shouldn't hurt in any case, and ideally other pagers would look at them too. */
+ r = set_unset_env("LESSSECURE", use_secure_mode ? "1" : NULL, true);
+ if (r < 0) {
+ log_error_errno(r, "Failed to adjust environment variable LESSSECURE: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (trust_pager && pager_args) { /* The pager config might be set globally, and we cannot
+ * know if the user adjusted it to be appropriate for the
+ * secure mode. Thus, start the pager specified through
+ * envvars only when $SYSTEMD_PAGERSECURE was explicitly set
+ * as well. */
+ r = loop_write(exe_name_pipe[1], pager_args[0], strlen(pager_args[0]) + 1, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write pager name to socket: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ execvp(pager_args[0], pager_args);
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to execute '%s', using fallback pagers: %m", pager_args[0]);
+ }
+
+ /* Debian's alternatives command for pagers is called 'pager'. Note that we do not call
+ * sensible-pagers here, since that is just a shell script that implements a logic that is
+ * similar to this one anyway, but is Debian-specific. */
+ FOREACH_STRING(exe, "pager", "less", "more") {
+ /* Only less implements secure mode right now. */
+ if (use_secure_mode && !streq(exe, "less"))
+ continue;
+
+ r = loop_write(exe_name_pipe[1], exe, strlen(exe) + 1, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write pager name to socket: %m");
+ _exit(EXIT_FAILURE);
+ }
+ execlp(exe, exe, NULL);
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to execute '%s', using next fallback pager: %m", exe);
+ }
+
+ /* Our builtin is also very secure. */
+ r = loop_write(exe_name_pipe[1], "(built-in)", strlen("(built-in)") + 1, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write pager name to socket: %m");
+ _exit(EXIT_FAILURE);
+ }
+ /* Close pipe to signal the parent to start sending data */
+ safe_close_pair(exe_name_pipe);
+ pager_fallback();
+ /* not reached */
+ }
+
+ /* Return in the parent */
+ stored_stdout = fcntl(STDOUT_FILENO, F_DUPFD_CLOEXEC, 3);
+ if (dup2(fd[1], STDOUT_FILENO) < 0) {
+ stored_stdout = safe_close(stored_stdout);
+ return log_error_errno(errno, "Failed to duplicate pager pipe: %m");
+ }
+ stdout_redirected = true;
+
+ stored_stderr = fcntl(STDERR_FILENO, F_DUPFD_CLOEXEC, 3);
+ if (dup2(fd[1], STDERR_FILENO) < 0) {
+ stored_stderr = safe_close(stored_stderr);
+ return log_error_errno(errno, "Failed to duplicate pager pipe: %m");
+ }
+ stderr_redirected = true;
+
+ exe_name_pipe[1] = safe_close(exe_name_pipe[1]);
+
+ r = no_quit_on_interrupt(TAKE_FD(exe_name_pipe[0]), less_opts);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ (void) ignore_signals(SIGINT, -1);
+
+ return 1;
+}
+
+void pager_close(void) {
+
+ if (pager_pid <= 0)
+ return;
+
+ /* Inform pager that we are done */
+ (void) fflush(stdout);
+ if (stdout_redirected)
+ if (stored_stdout < 0 || dup2(stored_stdout, STDOUT_FILENO) < 0)
+ (void) close(STDOUT_FILENO);
+ stored_stdout = safe_close(stored_stdout);
+ (void) fflush(stderr);
+ if (stderr_redirected)
+ if (stored_stderr < 0 || dup2(stored_stderr, STDERR_FILENO) < 0)
+ (void) close(STDERR_FILENO);
+ stored_stderr = safe_close(stored_stderr);
+ stdout_redirected = stderr_redirected = false;
+
+ (void) kill(pager_pid, SIGCONT);
+ (void) wait_for_terminate(pager_pid, NULL);
+ pager_pid = 0;
+}
+
+bool pager_have(void) {
+ return pager_pid > 0;
+}
+
+int show_man_page(const char *desc, bool null_stdio) {
+ const char *args[4] = { "man", NULL, NULL, NULL };
+ char *e = NULL;
+ pid_t pid;
+ size_t k;
+ int r;
+
+ k = strlen(desc);
+
+ if (desc[k-1] == ')')
+ e = strrchr(desc, '(');
+
+ if (e) {
+ char *page = NULL, *section = NULL;
+
+ page = strndupa(desc, e - desc);
+ section = strndupa(e + 1, desc + k - e - 2);
+
+ args[1] = section;
+ args[2] = page;
+ } else
+ args[1] = desc;
+
+ r = safe_fork("(man)", FORK_RESET_SIGNALS|FORK_DEATHSIG|(null_stdio ? FORK_NULL_STDIO : 0)|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Child */
+ execvp(args[0], (char**) args);
+ log_error_errno(errno, "Failed to execute man: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ return wait_for_terminate_and_check(NULL, pid, 0);
+}
diff --git a/src/shared/pager.h b/src/shared/pager.h
new file mode 100644
index 0000000..b3b1b4f
--- /dev/null
+++ b/src/shared/pager.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+typedef enum PagerFlags {
+ PAGER_DISABLE = 1 << 0,
+ PAGER_JUMP_TO_END = 1 << 1,
+} PagerFlags;
+
+int pager_open(PagerFlags flags);
+void pager_close(void);
+bool pager_have(void) _pure_;
+
+int show_man_page(const char *page, bool null_stdio);
diff --git a/src/shared/pam-util.c b/src/shared/pam-util.c
new file mode 100644
index 0000000..621e7fe
--- /dev/null
+++ b/src/shared/pam-util.c
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <security/pam_ext.h>
+#include <syslog.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "macro.h"
+#include "pam-util.h"
+
+int pam_log_oom(pam_handle_t *handle) {
+ /* This is like log_oom(), but uses PAM logging */
+ pam_syslog(handle, LOG_ERR, "Out of memory.");
+ return PAM_BUF_ERR;
+}
+
+int pam_bus_log_create_error(pam_handle_t *handle, int r) {
+ /* This is like bus_log_create_error(), but uses PAM logging */
+ pam_syslog(handle, LOG_ERR, "Failed to create bus message: %s", strerror_safe(r));
+ return PAM_BUF_ERR;
+}
+
+int pam_bus_log_parse_error(pam_handle_t *handle, int r) {
+ /* This is like bus_log_parse_error(), but uses PAM logging */
+ pam_syslog(handle, LOG_ERR, "Failed to parse bus message: %s", strerror_safe(r));
+ return PAM_BUF_ERR;
+}
+
+static void cleanup_system_bus(pam_handle_t *handle, void *data, int error_status) {
+ sd_bus_flush_close_unref(data);
+}
+
+int pam_acquire_bus_connection(pam_handle_t *handle, sd_bus **ret) {
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(handle);
+ assert(ret);
+
+ /* We cache the bus connection so that we can share it between the session and the authentication hooks */
+ r = pam_get_data(handle, "systemd-system-bus", (const void**) &bus);
+ if (r == PAM_SUCCESS && bus) {
+ *ret = sd_bus_ref(TAKE_PTR(bus)); /* Increase the reference counter, so that the PAM data stays valid */
+ return PAM_SUCCESS;
+ }
+ if (!IN_SET(r, PAM_SUCCESS, PAM_NO_MODULE_DATA)) {
+ pam_syslog(handle, LOG_ERR, "Failed to get bus connection: %s", pam_strerror(handle, r));
+ return r;
+ }
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to connect to system bus: %s", strerror_safe(r));
+ return PAM_SERVICE_ERR;
+ }
+
+ r = pam_set_data(handle, "systemd-system-bus", bus, cleanup_system_bus);
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to set PAM bus data: %s", pam_strerror(handle, r));
+ return r;
+ }
+
+ sd_bus_ref(bus);
+ *ret = TAKE_PTR(bus);
+
+ return PAM_SUCCESS;
+}
+
+int pam_release_bus_connection(pam_handle_t *handle) {
+ int r;
+
+ r = pam_set_data(handle, "systemd-system-bus", NULL, NULL);
+ if (r != PAM_SUCCESS)
+ pam_syslog(handle, LOG_ERR, "Failed to release PAM user record data: %s", pam_strerror(handle, r));
+
+ return r;
+}
+
+void pam_cleanup_free(pam_handle_t *handle, void *data, int error_status) {
+ /* A generic destructor for pam_set_data() that just frees the specified data */
+ free(data);
+}
diff --git a/src/shared/pam-util.h b/src/shared/pam-util.h
new file mode 100644
index 0000000..41f1835
--- /dev/null
+++ b/src/shared/pam-util.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <security/pam_modules.h>
+
+#include "sd-bus.h"
+
+int pam_log_oom(pam_handle_t *handle);
+int pam_bus_log_create_error(pam_handle_t *handle, int r);
+int pam_bus_log_parse_error(pam_handle_t *handle, int r);
+
+int pam_acquire_bus_connection(pam_handle_t *handle, sd_bus **ret);
+int pam_release_bus_connection(pam_handle_t *handle);
+
+void pam_cleanup_free(pam_handle_t *handle, void *data, int error_status);
diff --git a/src/shared/pe-header.h b/src/shared/pe-header.h
new file mode 100644
index 0000000..54433c7
--- /dev/null
+++ b/src/shared/pe-header.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include <inttypes.h>
+
+#include "macro.h"
+#include "sparse-endian.h"
+
+struct DosFileHeader {
+ uint8_t Magic[2];
+ le16_t LastSize;
+ le16_t nBlocks;
+ le16_t nReloc;
+ le16_t HdrSize;
+ le16_t MinAlloc;
+ le16_t MaxAlloc;
+ le16_t ss;
+ le16_t sp;
+ le16_t Checksum;
+ le16_t ip;
+ le16_t cs;
+ le16_t RelocPos;
+ le16_t nOverlay;
+ le16_t reserved[4];
+ le16_t OEMId;
+ le16_t OEMInfo;
+ le16_t reserved2[10];
+ le32_t ExeHeader;
+} _packed_;
+
+#define PE_HEADER_MACHINE_I386 0x014cU
+#define PE_HEADER_MACHINE_X64 0x8664U
+
+struct PeFileHeader {
+ le16_t Machine;
+ le16_t NumberOfSections;
+ le32_t TimeDateStamp;
+ le32_t PointerToSymbolTable;
+ le32_t NumberOfSymbols;
+ le16_t SizeOfOptionalHeader;
+ le16_t Characteristics;
+} _packed_;
+
+struct PeHeader {
+ uint8_t Magic[4];
+ struct PeFileHeader FileHeader;
+} _packed_;
+
+struct PeSectionHeader {
+ uint8_t Name[8];
+ le32_t VirtualSize;
+ le32_t VirtualAddress;
+ le32_t SizeOfRawData;
+ le32_t PointerToRawData;
+ le32_t PointerToRelocations;
+ le32_t PointerToLinenumbers;
+ le16_t NumberOfRelocations;
+ le16_t NumberOfLinenumbers;
+ le32_t Characteristics;
+ } _packed_;
diff --git a/src/shared/pkcs11-util.c b/src/shared/pkcs11-util.c
new file mode 100644
index 0000000..e74f0be
--- /dev/null
+++ b/src/shared/pkcs11-util.c
@@ -0,0 +1,932 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+
+#include "ask-password-api.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "memory-util.h"
+#if HAVE_OPENSSL
+#include "openssl-util.h"
+#endif
+#include "pkcs11-util.h"
+#include "random-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+bool pkcs11_uri_valid(const char *uri) {
+ const char *p;
+
+ /* A very superficial checker for RFC7512 PKCS#11 URI syntax */
+
+ if (isempty(uri))
+ return false;
+
+ p = startswith(uri, "pkcs11:");
+ if (!p)
+ return false;
+
+ if (isempty(p))
+ return false;
+
+ if (!in_charset(p, ALPHANUMERICAL "-_?;&%="))
+ return false;
+
+ return true;
+}
+
+#if HAVE_P11KIT
+
+int uri_from_string(const char *p, P11KitUri **ret) {
+ _cleanup_(p11_kit_uri_freep) P11KitUri *uri = NULL;
+
+ assert(p);
+ assert(ret);
+
+ uri = p11_kit_uri_new();
+ if (!uri)
+ return -ENOMEM;
+
+ if (p11_kit_uri_parse(p, P11_KIT_URI_FOR_ANY, uri) != P11_KIT_URI_OK)
+ return -EINVAL;
+
+ *ret = TAKE_PTR(uri);
+ return 0;
+}
+
+P11KitUri *uri_from_module_info(const CK_INFO *info) {
+ P11KitUri *uri;
+
+ assert(info);
+
+ uri = p11_kit_uri_new();
+ if (!uri)
+ return NULL;
+
+ *p11_kit_uri_get_module_info(uri) = *info;
+ return uri;
+}
+
+P11KitUri *uri_from_slot_info(const CK_SLOT_INFO *slot_info) {
+ P11KitUri *uri;
+
+ assert(slot_info);
+
+ uri = p11_kit_uri_new();
+ if (!uri)
+ return NULL;
+
+ *p11_kit_uri_get_slot_info(uri) = *slot_info;
+ return uri;
+}
+
+P11KitUri *uri_from_token_info(const CK_TOKEN_INFO *token_info) {
+ P11KitUri *uri;
+
+ assert(token_info);
+
+ uri = p11_kit_uri_new();
+ if (!uri)
+ return NULL;
+
+ *p11_kit_uri_get_token_info(uri) = *token_info;
+ return uri;
+}
+
+CK_RV pkcs11_get_slot_list_malloc(
+ CK_FUNCTION_LIST *m,
+ CK_SLOT_ID **ret_slotids,
+ CK_ULONG *ret_n_slotids) {
+
+ CK_RV rv;
+
+ assert(m);
+ assert(ret_slotids);
+ assert(ret_n_slotids);
+
+ for (unsigned tries = 0; tries < 16; tries++) {
+ _cleanup_free_ CK_SLOT_ID *slotids = NULL;
+ CK_ULONG n_slotids = 0;
+
+ rv = m->C_GetSlotList(0, NULL, &n_slotids);
+ if (rv != CKR_OK)
+ return rv;
+ if (n_slotids == 0) {
+ *ret_slotids = NULL;
+ *ret_n_slotids = 0;
+ return CKR_OK;
+ }
+
+ slotids = new(CK_SLOT_ID, n_slotids);
+ if (!slotids)
+ return CKR_HOST_MEMORY;
+
+ rv = m->C_GetSlotList(0, slotids, &n_slotids);
+ if (rv == CKR_OK) {
+ *ret_slotids = TAKE_PTR(slotids);
+ *ret_n_slotids = n_slotids;
+ return CKR_OK;
+ }
+
+ if (rv != CKR_BUFFER_TOO_SMALL)
+ return rv;
+
+ /* Hu? Maybe somebody plugged something in and things changed? Let's try again */
+ }
+
+ return CKR_BUFFER_TOO_SMALL;
+}
+
+char *pkcs11_token_label(const CK_TOKEN_INFO *token_info) {
+ char *t;
+
+ /* The label is not NUL terminated and likely padded with spaces, let's make a copy here, so that we
+ * can strip that. */
+ t = strndup((char*) token_info->label, sizeof(token_info->label));
+ if (!t)
+ return NULL;
+
+ strstrip(t);
+ return t;
+}
+
+char *pkcs11_token_manufacturer_id(const CK_TOKEN_INFO *token_info) {
+ char *t;
+
+ t = strndup((char*) token_info->manufacturerID, sizeof(token_info->manufacturerID));
+ if (!t)
+ return NULL;
+
+ strstrip(t);
+ return t;
+}
+
+char *pkcs11_token_model(const CK_TOKEN_INFO *token_info) {
+ char *t;
+
+ t = strndup((char*) token_info->model, sizeof(token_info->model));
+ if (!t)
+ return NULL;
+
+ strstrip(t);
+ return t;
+}
+
+int pkcs11_token_login(
+ CK_FUNCTION_LIST *m,
+ CK_SESSION_HANDLE session,
+ CK_SLOT_ID slotid,
+ const CK_TOKEN_INFO *token_info,
+ const char *friendly_name,
+ const char *icon_name,
+ const char *keyname,
+ usec_t until,
+ char **ret_used_pin) {
+
+ _cleanup_free_ char *token_uri_string = NULL, *token_uri_escaped = NULL, *id = NULL, *token_label = NULL;
+ _cleanup_(p11_kit_uri_freep) P11KitUri *token_uri = NULL;
+ CK_TOKEN_INFO updated_token_info;
+ int uri_result, r;
+ CK_RV rv;
+
+ assert(m);
+ assert(token_info);
+
+ token_label = pkcs11_token_label(token_info);
+ if (!token_label)
+ return log_oom();
+
+ token_uri = uri_from_token_info(token_info);
+ if (!token_uri)
+ return log_oom();
+
+ uri_result = p11_kit_uri_format(token_uri, P11_KIT_URI_FOR_ANY, &token_uri_string);
+ if (uri_result != P11_KIT_URI_OK)
+ return log_warning_errno(SYNTHETIC_ERRNO(EAGAIN), "Failed to format slot URI: %s", p11_kit_uri_message(uri_result));
+
+ if (FLAGS_SET(token_info->flags, CKF_PROTECTED_AUTHENTICATION_PATH)) {
+ rv = m->C_Login(session, CKU_USER, NULL, 0);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to log into security token '%s': %s", token_label, p11_kit_strerror(rv));
+
+ log_info("Successfully logged into security token '%s' via protected authentication path.", token_label);
+ if (ret_used_pin)
+ *ret_used_pin = NULL;
+ return 0;
+ }
+
+ if (!FLAGS_SET(token_info->flags, CKF_LOGIN_REQUIRED)) {
+ log_info("No login into security token '%s' required.", token_label);
+ if (ret_used_pin)
+ *ret_used_pin = NULL;
+ return 0;
+ }
+
+ token_uri_escaped = cescape(token_uri_string);
+ if (!token_uri_escaped)
+ return log_oom();
+
+ id = strjoin("pkcs11:", token_uri_escaped);
+ if (!id)
+ return log_oom();
+
+ for (unsigned tries = 0; tries < 3; tries++) {
+ _cleanup_strv_free_erase_ char **passwords = NULL;
+ char **i, *e;
+
+ e = getenv("PIN");
+ if (e) {
+ passwords = strv_new(e);
+ if (!passwords)
+ return log_oom();
+
+ string_erase(e);
+ if (unsetenv("PIN") < 0)
+ return log_error_errno(errno, "Failed to unset $PIN: %m");
+ } else {
+ _cleanup_free_ char *text = NULL;
+
+ if (FLAGS_SET(token_info->flags, CKF_USER_PIN_FINAL_TRY))
+ r = asprintf(&text,
+ "Please enter correct PIN for security token '%s' in order to unlock %s (final try):",
+ token_label, friendly_name);
+ else if (FLAGS_SET(token_info->flags, CKF_USER_PIN_COUNT_LOW))
+ r = asprintf(&text,
+ "PIN has been entered incorrectly previously, please enter correct PIN for security token '%s' in order to unlock %s:",
+ token_label, friendly_name);
+ else if (tries == 0)
+ r = asprintf(&text,
+ "Please enter PIN for security token '%s' in order to unlock %s:",
+ token_label, friendly_name);
+ else
+ r = asprintf(&text,
+ "Please enter PIN for security token '%s' in order to unlock %s (try #%u):",
+ token_label, friendly_name, tries+1);
+ if (r < 0)
+ return log_oom();
+
+ /* We never cache PINs, simply because it's fatal if we use wrong PINs, since usually there are only 3 tries */
+ r = ask_password_auto(text, icon_name, id, keyname, until, 0, &passwords);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query PIN for security token '%s': %m", token_label);
+ }
+
+ STRV_FOREACH(i, passwords) {
+ rv = m->C_Login(session, CKU_USER, (CK_UTF8CHAR*) *i, strlen(*i));
+ if (rv == CKR_OK) {
+
+ if (ret_used_pin) {
+ char *c;
+
+ c = strdup(*i);
+ if (!c)
+ return log_oom();
+
+ *ret_used_pin = c;
+ }
+
+ log_info("Successfully logged into security token '%s'.", token_label);
+ return 0;
+ }
+ if (rv == CKR_PIN_LOCKED)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "PIN has been locked, please reset PIN of security token '%s'.", token_label);
+ if (!IN_SET(rv, CKR_PIN_INCORRECT, CKR_PIN_LEN_RANGE))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to log into security token '%s': %s", token_label, p11_kit_strerror(rv));
+
+ /* Referesh the token info, so that we can prompt knowing the new flags if they changed. */
+ rv = m->C_GetTokenInfo(slotid, &updated_token_info);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to acquire updated security token information for slot %lu: %s",
+ slotid, p11_kit_strerror(rv));
+
+ token_info = &updated_token_info;
+ log_notice("PIN for token '%s' is incorrect, please try again.", token_label);
+ }
+ }
+
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Too many attempts to log into token '%s'.", token_label);
+}
+
+int pkcs11_token_find_x509_certificate(
+ CK_FUNCTION_LIST *m,
+ CK_SESSION_HANDLE session,
+ P11KitUri *search_uri,
+ CK_OBJECT_HANDLE *ret_object) {
+
+ bool found_class = false, found_certificate_type = false;
+ _cleanup_free_ CK_ATTRIBUTE *attributes_buffer = NULL;
+ CK_ULONG n_attributes, a, n_objects;
+ CK_ATTRIBUTE *attributes = NULL;
+ CK_OBJECT_HANDLE objects[2];
+ CK_RV rv, rv2;
+
+ assert(m);
+ assert(search_uri);
+ assert(ret_object);
+
+ attributes = p11_kit_uri_get_attributes(search_uri, &n_attributes);
+ for (a = 0; a < n_attributes; a++) {
+
+ /* We use the URI's included match attributes, but make them more strict. This allows users
+ * to specify a token URL instead of an object URL and the right thing should happen if
+ * there's only one suitable key on the token. */
+
+ switch (attributes[a].type) {
+
+ case CKA_CLASS: {
+ CK_OBJECT_CLASS c;
+
+ if (attributes[a].ulValueLen != sizeof(c))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid PKCS#11 CKA_CLASS attribute size.");
+
+ memcpy(&c, attributes[a].pValue, sizeof(c));
+ if (c != CKO_CERTIFICATE)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Selected PKCS#11 object is not an X.509 certificate, refusing.");
+
+ found_class = true;
+ break;
+ }
+
+ case CKA_CERTIFICATE_TYPE: {
+ CK_CERTIFICATE_TYPE t;
+
+ if (attributes[a].ulValueLen != sizeof(t))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid PKCS#11 CKA_CERTIFICATE_TYPE attribute size.");
+
+ memcpy(&t, attributes[a].pValue, sizeof(t));
+ if (t != CKC_X_509)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Selected PKCS#11 object is not an X.509 certificate, refusing.");
+
+ found_certificate_type = true;
+ break;
+ }}
+ }
+
+ if (!found_class || !found_certificate_type) {
+ /* Hmm, let's slightly extend the attribute list we search for */
+
+ attributes_buffer = new(CK_ATTRIBUTE, n_attributes + !found_class + !found_certificate_type);
+ if (!attributes_buffer)
+ return log_oom();
+
+ memcpy(attributes_buffer, attributes, sizeof(CK_ATTRIBUTE) * n_attributes);
+
+ if (!found_class) {
+ static const CK_OBJECT_CLASS class = CKO_CERTIFICATE;
+
+ attributes_buffer[n_attributes++] = (CK_ATTRIBUTE) {
+ .type = CKA_CLASS,
+ .pValue = (CK_OBJECT_CLASS*) &class,
+ .ulValueLen = sizeof(class),
+ };
+ }
+
+ if (!found_certificate_type) {
+ static const CK_CERTIFICATE_TYPE type = CKC_X_509;
+
+ attributes_buffer[n_attributes++] = (CK_ATTRIBUTE) {
+ .type = CKA_CERTIFICATE_TYPE,
+ .pValue = (CK_CERTIFICATE_TYPE*) &type,
+ .ulValueLen = sizeof(type),
+ };
+ }
+
+ attributes = attributes_buffer;
+ }
+
+ rv = m->C_FindObjectsInit(session, attributes, n_attributes);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to initialize object find call: %s", p11_kit_strerror(rv));
+
+ rv = m->C_FindObjects(session, objects, ELEMENTSOF(objects), &n_objects);
+ rv2 = m->C_FindObjectsFinal(session);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to find objects: %s", p11_kit_strerror(rv));
+ if (rv2 != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to finalize object find call: %s", p11_kit_strerror(rv));
+ if (n_objects == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT),
+ "Failed to find selected X509 certificate on token.");
+ if (n_objects > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ),
+ "Configured URI matches multiple certificates, refusing.");
+
+ *ret_object = objects[0];
+ return 0;
+}
+
+#if HAVE_OPENSSL
+int pkcs11_token_read_x509_certificate(
+ CK_FUNCTION_LIST *m,
+ CK_SESSION_HANDLE session,
+ CK_OBJECT_HANDLE object,
+ X509 **ret_cert) {
+
+ _cleanup_free_ void *buffer = NULL;
+ _cleanup_free_ char *t = NULL;
+ CK_ATTRIBUTE attribute = {
+ .type = CKA_VALUE
+ };
+ CK_RV rv;
+ _cleanup_(X509_freep) X509 *x509 = NULL;
+ X509_NAME *name = NULL;
+ const unsigned char *p;
+
+ rv = m->C_GetAttributeValue(session, object, &attribute, 1);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to read X.509 certificate size off token: %s", p11_kit_strerror(rv));
+
+ buffer = malloc(attribute.ulValueLen);
+ if (!buffer)
+ return log_oom();
+
+ attribute.pValue = buffer;
+
+ rv = m->C_GetAttributeValue(session, object, &attribute, 1);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to read X.509 certificate data off token: %s", p11_kit_strerror(rv));
+
+ p = attribute.pValue;
+ x509 = d2i_X509(NULL, &p, attribute.ulValueLen);
+ if (!x509)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Failed parse X.509 certificate.");
+
+ name = X509_get_subject_name(x509);
+ if (!name)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Failed to acquire X.509 subject name.");
+
+ t = X509_NAME_oneline(name, NULL, 0);
+ if (!t)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to format X.509 subject name as string.");
+
+ log_debug("Using X.509 certificate issued for '%s'.", t);
+
+ *ret_cert = TAKE_PTR(x509);
+ return 0;
+}
+#endif
+
+int pkcs11_token_find_private_key(
+ CK_FUNCTION_LIST *m,
+ CK_SESSION_HANDLE session,
+ P11KitUri *search_uri,
+ CK_OBJECT_HANDLE *ret_object) {
+
+ bool found_decrypt = false, found_class = false, found_key_type = false;
+ _cleanup_free_ CK_ATTRIBUTE *attributes_buffer = NULL;
+ CK_ULONG n_attributes, a, n_objects;
+ CK_ATTRIBUTE *attributes = NULL;
+ CK_OBJECT_HANDLE objects[2];
+ CK_RV rv, rv2;
+
+ assert(m);
+ assert(search_uri);
+ assert(ret_object);
+
+ attributes = p11_kit_uri_get_attributes(search_uri, &n_attributes);
+ for (a = 0; a < n_attributes; a++) {
+
+ /* We use the URI's included match attributes, but make them more strict. This allows users
+ * to specify a token URL instead of an object URL and the right thing should happen if
+ * there's only one suitable key on the token. */
+
+ switch (attributes[a].type) {
+
+ case CKA_CLASS: {
+ CK_OBJECT_CLASS c;
+
+ if (attributes[a].ulValueLen != sizeof(c))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid PKCS#11 CKA_CLASS attribute size.");
+
+ memcpy(&c, attributes[a].pValue, sizeof(c));
+ if (c != CKO_PRIVATE_KEY)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Selected PKCS#11 object is not a private key, refusing.");
+
+ found_class = true;
+ break;
+ }
+
+ case CKA_DECRYPT: {
+ CK_BBOOL b;
+
+ if (attributes[a].ulValueLen != sizeof(b))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid PKCS#11 CKA_DECRYPT attribute size.");
+
+ memcpy(&b, attributes[a].pValue, sizeof(b));
+ if (!b)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Selected PKCS#11 object is not suitable for decryption, refusing.");
+
+ found_decrypt = true;
+ break;
+ }
+
+ case CKA_KEY_TYPE: {
+ CK_KEY_TYPE t;
+
+ if (attributes[a].ulValueLen != sizeof(t))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid PKCS#11 CKA_KEY_TYPE attribute size.");
+
+ memcpy(&t, attributes[a].pValue, sizeof(t));
+ if (t != CKK_RSA)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Selected PKCS#11 object is not an RSA key, refusing.");
+
+ found_key_type = true;
+ break;
+ }}
+ }
+
+ if (!found_decrypt || !found_class || !found_key_type) {
+ /* Hmm, let's slightly extend the attribute list we search for */
+
+ attributes_buffer = new(CK_ATTRIBUTE, n_attributes + !found_decrypt + !found_class + !found_key_type);
+ if (!attributes_buffer)
+ return log_oom();
+
+ memcpy(attributes_buffer, attributes, sizeof(CK_ATTRIBUTE) * n_attributes);
+
+ if (!found_decrypt) {
+ static const CK_BBOOL yes = true;
+
+ attributes_buffer[n_attributes++] = (CK_ATTRIBUTE) {
+ .type = CKA_DECRYPT,
+ .pValue = (CK_BBOOL*) &yes,
+ .ulValueLen = sizeof(yes),
+ };
+ }
+
+ if (!found_class) {
+ static const CK_OBJECT_CLASS class = CKO_PRIVATE_KEY;
+
+ attributes_buffer[n_attributes++] = (CK_ATTRIBUTE) {
+ .type = CKA_CLASS,
+ .pValue = (CK_OBJECT_CLASS*) &class,
+ .ulValueLen = sizeof(class),
+ };
+ }
+
+ if (!found_key_type) {
+ static const CK_KEY_TYPE type = CKK_RSA;
+
+ attributes_buffer[n_attributes++] = (CK_ATTRIBUTE) {
+ .type = CKA_KEY_TYPE,
+ .pValue = (CK_KEY_TYPE*) &type,
+ .ulValueLen = sizeof(type),
+ };
+ }
+
+ attributes = attributes_buffer;
+ }
+
+ rv = m->C_FindObjectsInit(session, attributes, n_attributes);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to initialize object find call: %s", p11_kit_strerror(rv));
+
+ rv = m->C_FindObjects(session, objects, ELEMENTSOF(objects), &n_objects);
+ rv2 = m->C_FindObjectsFinal(session);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to find objects: %s", p11_kit_strerror(rv));
+ if (rv2 != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to finalize object find call: %s", p11_kit_strerror(rv));
+ if (n_objects == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT),
+ "Failed to find selected private key suitable for decryption on token.");
+ if (n_objects > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ),
+ "Configured private key URI matches multiple keys, refusing.");
+
+ *ret_object = objects[0];
+ return 0;
+}
+
+int pkcs11_token_decrypt_data(
+ CK_FUNCTION_LIST *m,
+ CK_SESSION_HANDLE session,
+ CK_OBJECT_HANDLE object,
+ const void *encrypted_data,
+ size_t encrypted_data_size,
+ void **ret_decrypted_data,
+ size_t *ret_decrypted_data_size) {
+
+ static const CK_MECHANISM mechanism = {
+ .mechanism = CKM_RSA_PKCS
+ };
+ _cleanup_(erase_and_freep) CK_BYTE *dbuffer = NULL;
+ CK_ULONG dbuffer_size = 0;
+ CK_RV rv;
+
+ assert(m);
+ assert(encrypted_data);
+ assert(encrypted_data_size > 0);
+ assert(ret_decrypted_data);
+ assert(ret_decrypted_data_size);
+
+ rv = m->C_DecryptInit(session, (CK_MECHANISM*) &mechanism, object);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to initialize decryption on security token: %s", p11_kit_strerror(rv));
+
+ dbuffer_size = encrypted_data_size; /* Start with something reasonable */
+ dbuffer = malloc(dbuffer_size);
+ if (!dbuffer)
+ return log_oom();
+
+ rv = m->C_Decrypt(session, (CK_BYTE*) encrypted_data, encrypted_data_size, dbuffer, &dbuffer_size);
+ if (rv == CKR_BUFFER_TOO_SMALL) {
+ erase_and_free(dbuffer);
+
+ dbuffer = malloc(dbuffer_size);
+ if (!dbuffer)
+ return log_oom();
+
+ rv = m->C_Decrypt(session, (CK_BYTE*) encrypted_data, encrypted_data_size, dbuffer, &dbuffer_size);
+ }
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to decrypt key on security token: %s", p11_kit_strerror(rv));
+
+ log_info("Successfully decrypted key with security token.");
+
+ *ret_decrypted_data = TAKE_PTR(dbuffer);
+ *ret_decrypted_data_size = dbuffer_size;
+ return 0;
+}
+
+int pkcs11_token_acquire_rng(
+ CK_FUNCTION_LIST *m,
+ CK_SESSION_HANDLE session) {
+
+ _cleanup_free_ void *buffer = NULL;
+ _cleanup_close_ int fd = -1;
+ size_t rps;
+ CK_RV rv;
+ int r;
+
+ assert(m);
+
+ /* While we are at it, let's read some RNG data from the PKCS#11 token and pass it to the kernel
+ * random pool. This should be cheap if we are talking to the device already. Note that we don't
+ * credit any entropy, since we don't know about the quality of the pkcs#11 token's RNG. Why bother
+ * at all? There are two sides to the argument whether to generate private keys on tokens or on the
+ * host. By crediting some data from the token RNG to the host's pool we at least can say that any
+ * key generated from it is at least as good as both sources individually. */
+
+ rps = random_pool_size();
+
+ buffer = malloc(rps);
+ if (!buffer)
+ return log_oom();
+
+ rv = m->C_GenerateRandom(session, buffer, rps);
+ if (rv != CKR_OK)
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Failed to generate RNG data on security token: %s", p11_kit_strerror(rv));
+
+ fd = open("/dev/urandom", O_WRONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return log_debug_errno(errno, "Failed to open /dev/urandom for writing: %m");
+
+ r = loop_write(fd, buffer, rps, false);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to write PKCS#11 acquired random data to /dev/urandom: %m");
+
+ log_debug("Successfully written %zu bytes random data acquired via PKCS#11 to kernel random pool.", rps);
+
+ return 0;
+}
+
+static int token_process(
+ CK_FUNCTION_LIST *m,
+ CK_SLOT_ID slotid,
+ const CK_SLOT_INFO *slot_info,
+ const CK_TOKEN_INFO *token_info,
+ P11KitUri *search_uri,
+ pkcs11_find_token_callback_t callback,
+ void *userdata) {
+
+ _cleanup_free_ char *token_label = NULL;
+ CK_SESSION_HANDLE session;
+ CK_RV rv;
+ int r;
+
+ assert(m);
+ assert(slot_info);
+ assert(token_info);
+
+ token_label = pkcs11_token_label(token_info);
+ if (!token_label)
+ return log_oom();
+
+ rv = m->C_OpenSession(slotid, CKF_SERIAL_SESSION, NULL, NULL, &session);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to create session for security token '%s': %s", token_label, p11_kit_strerror(rv));
+
+ if (callback)
+ r = callback(m, session, slotid, slot_info, token_info, search_uri, userdata);
+ else
+ r = 1; /* if not callback was specified, just say we found what we were looking for */
+
+ rv = m->C_CloseSession(session);
+ if (rv != CKR_OK)
+ log_warning("Failed to close session on PKCS#11 token, ignoring: %s", p11_kit_strerror(rv));
+
+ return r;
+}
+
+static int slot_process(
+ CK_FUNCTION_LIST *m,
+ CK_SLOT_ID slotid,
+ P11KitUri *search_uri,
+ pkcs11_find_token_callback_t callback,
+ void *userdata) {
+
+ _cleanup_(p11_kit_uri_freep) P11KitUri* slot_uri = NULL, *token_uri = NULL;
+ _cleanup_free_ char *token_uri_string = NULL;
+ CK_TOKEN_INFO token_info;
+ CK_SLOT_INFO slot_info;
+ int uri_result;
+ CK_RV rv;
+
+ assert(m);
+
+ /* We return -EAGAIN for all failures we can attribute to a specific slot in some way, so that the
+ * caller might try other slots before giving up. */
+
+ rv = m->C_GetSlotInfo(slotid, &slot_info);
+ if (rv != CKR_OK) {
+ log_warning("Failed to acquire slot info for slot %lu, ignoring slot: %s", slotid, p11_kit_strerror(rv));
+ return -EAGAIN;
+ }
+
+ slot_uri = uri_from_slot_info(&slot_info);
+ if (!slot_uri)
+ return log_oom();
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *slot_uri_string = NULL;
+
+ uri_result = p11_kit_uri_format(slot_uri, P11_KIT_URI_FOR_ANY, &slot_uri_string);
+ if (uri_result != P11_KIT_URI_OK) {
+ log_warning("Failed to format slot URI, ignoring slot: %s", p11_kit_uri_message(uri_result));
+ return -EAGAIN;
+ }
+
+ log_debug("Found slot with URI %s", slot_uri_string);
+ }
+
+ rv = m->C_GetTokenInfo(slotid, &token_info);
+ if (rv == CKR_TOKEN_NOT_PRESENT) {
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "Token not present in slot, ignoring.");
+ } else if (rv != CKR_OK) {
+ log_warning("Failed to acquire token info for slot %lu, ignoring slot: %s", slotid, p11_kit_strerror(rv));
+ return -EAGAIN;
+ }
+
+ token_uri = uri_from_token_info(&token_info);
+ if (!token_uri)
+ return log_oom();
+
+ uri_result = p11_kit_uri_format(token_uri, P11_KIT_URI_FOR_ANY, &token_uri_string);
+ if (uri_result != P11_KIT_URI_OK) {
+ log_warning("Failed to format slot URI: %s", p11_kit_uri_message(uri_result));
+ return -EAGAIN;
+ }
+
+ if (search_uri && !p11_kit_uri_match_token_info(search_uri, &token_info))
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "Found non-matching token with URI %s.",
+ token_uri_string);
+
+ log_debug("Found matching token with URI %s.", token_uri_string);
+
+ return token_process(
+ m,
+ slotid,
+ &slot_info,
+ &token_info,
+ search_uri,
+ callback,
+ userdata);
+}
+
+static int module_process(
+ CK_FUNCTION_LIST *m,
+ P11KitUri *search_uri,
+ pkcs11_find_token_callback_t callback,
+ void *userdata) {
+
+ _cleanup_free_ char *name = NULL, *module_uri_string = NULL;
+ _cleanup_(p11_kit_uri_freep) P11KitUri* module_uri = NULL;
+ _cleanup_free_ CK_SLOT_ID *slotids = NULL;
+ CK_ULONG n_slotids = 0;
+ int uri_result;
+ CK_INFO info;
+ size_t k;
+ CK_RV rv;
+ int r;
+
+ assert(m);
+
+ /* We ignore most errors from modules here, in order to skip over faulty modules: one faulty module
+ * should not have the effect that we don't try the others anymore. We indicate such per-module
+ * failures with -EAGAIN, which let's the caller try the next module. */
+
+ name = p11_kit_module_get_name(m);
+ if (!name)
+ return log_oom();
+
+ log_debug("Trying PKCS#11 module %s.", name);
+
+ rv = m->C_GetInfo(&info);
+ if (rv != CKR_OK) {
+ log_warning("Failed to get info on PKCS#11 module, ignoring module: %s", p11_kit_strerror(rv));
+ return -EAGAIN;
+ }
+
+ module_uri = uri_from_module_info(&info);
+ if (!module_uri)
+ return log_oom();
+
+ uri_result = p11_kit_uri_format(module_uri, P11_KIT_URI_FOR_ANY, &module_uri_string);
+ if (uri_result != P11_KIT_URI_OK) {
+ log_warning("Failed to format module URI, ignoring module: %s", p11_kit_uri_message(uri_result));
+ return -EAGAIN;
+ }
+
+ log_debug("Found module with URI %s", module_uri_string);
+
+ rv = pkcs11_get_slot_list_malloc(m, &slotids, &n_slotids);
+ if (rv != CKR_OK) {
+ log_warning("Failed to get slot list, ignoring module: %s", p11_kit_strerror(rv));
+ return -EAGAIN;
+ }
+ if (n_slotids == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "This module has no slots? Ignoring module.");
+
+ for (k = 0; k < n_slotids; k++) {
+ r = slot_process(
+ m,
+ slotids[k],
+ search_uri,
+ callback,
+ userdata);
+ if (r != -EAGAIN)
+ return r;
+ }
+
+ return -EAGAIN;
+}
+
+int pkcs11_find_token(
+ const char *pkcs11_uri,
+ pkcs11_find_token_callback_t callback,
+ void *userdata) {
+
+ _cleanup_(p11_kit_modules_finalize_and_releasep) CK_FUNCTION_LIST **modules = NULL;
+ _cleanup_(p11_kit_uri_freep) P11KitUri *search_uri = NULL;
+ int r;
+
+ /* Execute the specified callback for each matching token found. If nothing is found returns
+ * -EAGAIN. Logs about all errors, except for EAGAIN, which the caller has to log about. */
+
+ if (pkcs11_uri) {
+ r = uri_from_string(pkcs11_uri, &search_uri);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PKCS#11 URI '%s': %m", pkcs11_uri);
+ }
+
+ modules = p11_kit_modules_load_and_initialize(0);
+ if (!modules)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to initialize pkcs11 modules");
+
+ for (CK_FUNCTION_LIST **i = modules; *i; i++) {
+ r = module_process(
+ *i,
+ search_uri,
+ callback,
+ userdata);
+ if (r != -EAGAIN)
+ return r;
+ }
+
+ return -EAGAIN;
+}
+
+#endif
diff --git a/src/shared/pkcs11-util.h b/src/shared/pkcs11-util.h
new file mode 100644
index 0000000..f14607d
--- /dev/null
+++ b/src/shared/pkcs11-util.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#if HAVE_P11KIT
+# include <p11-kit/p11-kit.h>
+# include <p11-kit/uri.h>
+#endif
+
+#include "macro.h"
+#include "openssl-util.h"
+#include "time-util.h"
+
+bool pkcs11_uri_valid(const char *uri);
+
+#if HAVE_P11KIT
+int uri_from_string(const char *p, P11KitUri **ret);
+
+P11KitUri *uri_from_module_info(const CK_INFO *info);
+P11KitUri *uri_from_slot_info(const CK_SLOT_INFO *slot_info);
+P11KitUri *uri_from_token_info(const CK_TOKEN_INFO *token_info);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(P11KitUri*, p11_kit_uri_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(CK_FUNCTION_LIST**, p11_kit_modules_finalize_and_release);
+
+CK_RV pkcs11_get_slot_list_malloc(CK_FUNCTION_LIST *m, CK_SLOT_ID **ret_slotids, CK_ULONG *ret_n_slotids);
+
+char *pkcs11_token_label(const CK_TOKEN_INFO *token_info);
+char *pkcs11_token_manufacturer_id(const CK_TOKEN_INFO *token_info);
+char *pkcs11_token_model(const CK_TOKEN_INFO *token_info);
+
+int pkcs11_token_login(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session, CK_SLOT_ID slotid, const CK_TOKEN_INFO *token_info, const char *friendly_name, const char *icon_name, const char *keyname, usec_t until, char **ret_used_pin);
+
+int pkcs11_token_find_x509_certificate(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session, P11KitUri *search_uri, CK_OBJECT_HANDLE *ret_object);
+#if HAVE_OPENSSL
+int pkcs11_token_read_x509_certificate(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session, CK_OBJECT_HANDLE object, X509 **ret_cert);
+#endif
+
+int pkcs11_token_find_private_key(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session, P11KitUri *search_uri, CK_OBJECT_HANDLE *ret_object);
+int pkcs11_token_decrypt_data(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session, CK_OBJECT_HANDLE object, const void *encrypted_data, size_t encrypted_data_size, void **ret_decrypted_data, size_t *ret_decrypted_data_size);
+
+int pkcs11_token_acquire_rng(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session);
+
+typedef int (*pkcs11_find_token_callback_t)(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session, CK_SLOT_ID slotid, const CK_SLOT_INFO *slot_info, const CK_TOKEN_INFO *token_info, P11KitUri *uri, void *userdata);
+int pkcs11_find_token(const char *pkcs11_uri, pkcs11_find_token_callback_t callback, void *userdata);
+#endif
diff --git a/src/shared/pretty-print.c b/src/shared/pretty-print.c
new file mode 100644
index 0000000..ca5b25a
--- /dev/null
+++ b/src/shared/pretty-print.c
@@ -0,0 +1,325 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/utsname.h>
+#include <errno.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "def.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "pager.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+
+bool urlify_enabled(void) {
+ static int cached_urlify_enabled = -1;
+
+ /* Unfortunately 'less' doesn't support links like this yet 😭, hence let's disable this as long as there's a
+ * pager in effect. Let's drop this check as soon as less got fixed a and enough time passed so that it's safe
+ * to assume that a link-enabled 'less' version has hit most installations. */
+
+ if (cached_urlify_enabled < 0) {
+ int val;
+
+ val = getenv_bool("SYSTEMD_URLIFY");
+ if (val >= 0)
+ cached_urlify_enabled = val;
+ else
+ cached_urlify_enabled = colors_enabled() && !pager_have();
+ }
+
+ return cached_urlify_enabled;
+}
+
+int terminal_urlify(const char *url, const char *text, char **ret) {
+ char *n;
+
+ assert(url);
+
+ /* Takes an URL and a pretty string and formats it as clickable link for the terminal. See
+ * https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda for details. */
+
+ if (isempty(text))
+ text = url;
+
+ if (urlify_enabled())
+ n = strjoin("\x1B]8;;", url, "\a", text, "\x1B]8;;\a");
+ else
+ n = strdup(text);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int file_url_from_path(const char *path, char **ret) {
+ _cleanup_free_ char *absolute = NULL;
+ struct utsname u;
+ char *url = NULL;
+ int r;
+
+ if (uname(&u) < 0)
+ return -errno;
+
+ if (!path_is_absolute(path)) {
+ r = path_make_absolute_cwd(path, &absolute);
+ if (r < 0)
+ return r;
+
+ path = absolute;
+ }
+
+ /* As suggested by https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda, let's include the local
+ * hostname here. Note that we don't use gethostname_malloc() or gethostname_strict() since we are interested
+ * in the raw string the kernel has set, whatever it may be, under the assumption that terminals are not overly
+ * careful with validating the strings either. */
+
+ url = strjoin("file://", u.nodename, path);
+ if (!url)
+ return -ENOMEM;
+
+ *ret = url;
+ return 0;
+}
+
+int terminal_urlify_path(const char *path, const char *text, char **ret) {
+ _cleanup_free_ char *url = NULL;
+ int r;
+
+ assert(path);
+
+ /* Much like terminal_urlify() above, but takes a file system path as input
+ * and turns it into a proper file:// URL first. */
+
+ if (isempty(path))
+ return -EINVAL;
+
+ if (isempty(text))
+ text = path;
+
+ if (!urlify_enabled()) {
+ char *n;
+
+ n = strdup(text);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+ }
+
+ r = file_url_from_path(path, &url);
+ if (r < 0)
+ return r;
+
+ return terminal_urlify(url, text, ret);
+}
+
+int terminal_urlify_man(const char *page, const char *section, char **ret) {
+ const char *url, *text;
+
+ url = strjoina("man:", page, "(", section, ")");
+ text = strjoina(page, "(", section, ") man page");
+
+ return terminal_urlify(url, text, ret);
+}
+
+static int cat_file(const char *filename, bool newline) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *urlified = NULL;
+ int r;
+
+ f = fopen(filename, "re");
+ if (!f)
+ return -errno;
+
+ r = terminal_urlify_path(filename, NULL, &urlified);
+ if (r < 0)
+ return r;
+
+ printf("%s%s# %s%s\n",
+ newline ? "\n" : "",
+ ansi_highlight_blue(),
+ urlified,
+ ansi_normal());
+ fflush(stdout);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read \"%s\": %m", filename);
+ if (r == 0)
+ break;
+
+ puts(line);
+ }
+
+ return 0;
+}
+
+int cat_files(const char *file, char **dropins, CatFlags flags) {
+ char **path;
+ int r;
+
+ if (file) {
+ r = cat_file(file, false);
+ if (r == -ENOENT && (flags & CAT_FLAGS_MAIN_FILE_OPTIONAL))
+ printf("%s# Configuration file %s not found%s\n",
+ ansi_highlight_magenta(),
+ file,
+ ansi_normal());
+ else if (r < 0)
+ return log_warning_errno(r, "Failed to cat %s: %m", file);
+ }
+
+ STRV_FOREACH(path, dropins) {
+ r = cat_file(*path, file || path != dropins);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to cat %s: %m", *path);
+ }
+
+ return 0;
+}
+
+void print_separator(void) {
+
+ /* Outputs a separator line that resolves to whitespace when copied from the terminal. We do that by outputting
+ * one line filled with spaces with ANSI underline set, followed by a second (empty) line. */
+
+ if (underline_enabled()) {
+ size_t i, c;
+
+ c = columns();
+
+ flockfile(stdout);
+ fputs_unlocked(ANSI_UNDERLINE, stdout);
+
+ for (i = 0; i < c; i++)
+ fputc_unlocked(' ', stdout);
+
+ fputs_unlocked(ANSI_NORMAL "\n\n", stdout);
+ funlockfile(stdout);
+ } else
+ fputs("\n\n", stdout);
+}
+
+static int guess_type(const char **name, char ***prefixes, bool *is_collection, const char **extension) {
+ /* Try to figure out if name is like tmpfiles.d/ or systemd/system-presets/,
+ * i.e. a collection of directories without a main config file. */
+
+ _cleanup_free_ char *n = NULL;
+ bool usr = false, run = false, coll = false;
+ const char *ext = ".conf";
+ /* This is static so that the array doesn't get deallocated when we exit the function */
+ static const char* const std_prefixes[] = { CONF_PATHS(""), NULL };
+ static const char* const usr_prefixes[] = { CONF_PATHS_USR(""), NULL };
+ static const char* const run_prefixes[] = { "/run/", NULL };
+
+ if (path_equal(*name, "environment.d"))
+ /* Special case: we need to include /etc/environment in the search path, even
+ * though the whole concept is called environment.d. */
+ *name = "environment";
+
+ n = strdup(*name);
+ if (!n)
+ return log_oom();
+
+ /* All systemd-style config files should support the /usr-/etc-/run split and
+ * dropins. Let's add a blanket rule that allows us to support them without keeping
+ * an explicit list. */
+ if (path_startswith(n, "systemd") && endswith(n, ".conf"))
+ usr = true;
+
+ delete_trailing_chars(n, "/");
+
+ if (endswith(n, ".d"))
+ coll = true;
+
+ if (path_equal(n, "environment"))
+ usr = true;
+
+ if (path_equal(n, "udev/hwdb.d"))
+ ext = ".hwdb";
+
+ if (path_equal(n, "udev/rules.d"))
+ ext = ".rules";
+
+ if (path_equal(n, "kernel/install.d"))
+ ext = ".install";
+
+ if (path_equal(n, "systemd/ntp-units.d")) {
+ coll = true;
+ ext = ".list";
+ }
+
+ if (path_equal(n, "systemd/relabel-extra.d")) {
+ coll = run = true;
+ ext = ".relabel";
+ }
+
+ if (PATH_IN_SET(n, "systemd/system-preset", "systemd/user-preset")) {
+ coll = true;
+ ext = ".preset";
+ }
+
+ if (path_equal(n, "systemd/user-preset"))
+ usr = true;
+
+ *prefixes = (char**) (usr ? usr_prefixes : run ? run_prefixes : std_prefixes);
+ *is_collection = coll;
+ *extension = ext;
+ return 0;
+}
+
+int conf_files_cat(const char *root, const char *name) {
+ _cleanup_strv_free_ char **dirs = NULL, **files = NULL;
+ _cleanup_free_ char *path = NULL;
+ char **prefixes, **prefix;
+ bool is_collection;
+ const char *extension;
+ char **t;
+ int r;
+
+ r = guess_type(&name, &prefixes, &is_collection, &extension);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(prefix, prefixes) {
+ assert(endswith(*prefix, "/"));
+ r = strv_extendf(&dirs, "%s%s%s", *prefix, name,
+ is_collection ? "" : ".d");
+ if (r < 0)
+ return log_error_errno(r, "Failed to build directory list: %m");
+ }
+
+ r = conf_files_list_strv(&files, extension, root, 0, (const char* const*) dirs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query file list: %m");
+
+ if (!is_collection) {
+ path = path_join(root, "/etc", name);
+ if (!path)
+ return log_oom();
+ }
+
+ if (DEBUG_LOGGING) {
+ log_debug("Looking for configuration in:");
+ if (path)
+ log_debug(" %s", path);
+ STRV_FOREACH(t, dirs)
+ log_debug(" %s/*%s", *t, extension);
+ }
+
+ /* show */
+ return cat_files(path, files, CAT_FLAGS_MAIN_FILE_OPTIONAL);
+}
diff --git a/src/shared/pretty-print.h b/src/shared/pretty-print.h
new file mode 100644
index 0000000..4619f4e
--- /dev/null
+++ b/src/shared/pretty-print.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+void print_separator(void);
+
+int file_url_from_path(const char *path, char **ret);
+
+bool urlify_enabled(void);
+
+int terminal_urlify(const char *url, const char *text, char **ret);
+int terminal_urlify_path(const char *path, const char *text, char **ret);
+int terminal_urlify_man(const char *page, const char *section, char **ret);
+
+typedef enum CatFlags {
+ CAT_FLAGS_MAIN_FILE_OPTIONAL = 1 << 0,
+} CatFlags;
+
+int cat_files(const char *file, char **dropins, CatFlags flags);
+int conf_files_cat(const char *root, const char *name);
diff --git a/src/shared/psi-util.c b/src/shared/psi-util.c
new file mode 100644
index 0000000..7a184d5
--- /dev/null
+++ b/src/shared/psi-util.c
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "parse-util.h"
+#include "psi-util.h"
+#include "string-util.h"
+#include "stat-util.h"
+#include "strv.h"
+
+int read_resource_pressure(const char *path, PressureType type, ResourcePressure *ret) {
+ _cleanup_free_ char *line = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned field_filled = 0;
+ ResourcePressure rp = {};
+ const char *t, *cline;
+ char *word;
+ int r;
+
+ assert(path);
+ assert(IN_SET(type, PRESSURE_TYPE_SOME, PRESSURE_TYPE_FULL));
+ assert(ret);
+
+ if (type == PRESSURE_TYPE_SOME)
+ t = "some";
+ else if (type == PRESSURE_TYPE_FULL)
+ t = "full";
+ else
+ return -EINVAL;
+
+ r = fopen_unlocked(path, "re", &f);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *l = NULL;
+ char *w;
+
+ r = read_line(f, LONG_LINE_MAX, &l);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ w = first_word(l, t);
+ if (w) {
+ line = TAKE_PTR(l);
+ cline = w;
+ break;
+ }
+ }
+
+ if (!line)
+ return -ENODATA;
+
+ /* extracts either avgX=Y.Z or total=X */
+ while ((r = extract_first_word(&cline, &word, NULL, 0)) > 0) {
+ _cleanup_free_ char *w = word;
+ const char *v;
+
+ if ((v = startswith(w, "avg10="))) {
+ if (field_filled & (1U << 0))
+ return -EINVAL;
+
+ field_filled |= 1U << 0;
+ r = parse_loadavg_fixed_point(v, &rp.avg10);
+ } else if ((v = startswith(w, "avg60="))) {
+ if (field_filled & (1U << 1))
+ return -EINVAL;
+
+ field_filled |= 1U << 1;
+ r = parse_loadavg_fixed_point(v, &rp.avg60);
+ } else if ((v = startswith(w, "avg300="))) {
+ if (field_filled & (1U << 2))
+ return -EINVAL;
+
+ field_filled |= 1U << 2;
+ r = parse_loadavg_fixed_point(v, &rp.avg300);
+ } else if ((v = startswith(w, "total="))) {
+ if (field_filled & (1U << 3))
+ return -EINVAL;
+
+ field_filled |= 1U << 3;
+ r = safe_atou64(v, &rp.total);
+ } else
+ continue;
+
+ if (r < 0)
+ return r;
+ }
+
+ if (r < 0)
+ return r;
+
+ if (field_filled != 15U)
+ return -EINVAL;
+
+ *ret = rp;
+ return 0;
+}
+
+int is_pressure_supported(void) {
+ const char *p;
+
+ FOREACH_STRING(p, "/proc/pressure/cpu", "/proc/pressure/io", "/proc/pressure/memory")
+ if (access(p, F_OK) < 0) {
+ if (errno == ENOENT)
+ return 0;
+ return -errno;
+ }
+
+ return 1;
+}
diff --git a/src/shared/psi-util.h b/src/shared/psi-util.h
new file mode 100644
index 0000000..415fbbd
--- /dev/null
+++ b/src/shared/psi-util.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "parse-util.h"
+#include "time-util.h"
+
+typedef enum PressureType {
+ PRESSURE_TYPE_SOME,
+ PRESSURE_TYPE_FULL,
+} PressureType;
+
+/* Averages are stored in fixed-point with 11 bit fractions */
+typedef struct ResourcePressure {
+ loadavg_t avg10;
+ loadavg_t avg60;
+ loadavg_t avg300;
+ usec_t total;
+} ResourcePressure;
+
+/** Upstream 4.20+ format
+ *
+ * some avg10=0.22 avg60=0.17 avg300=1.11 total=58761459
+ * full avg10=0.23 avg60=0.16 avg300=1.08 total=58464525
+ */
+int read_resource_pressure(const char *path, PressureType type, ResourcePressure *ret);
+
+/* Was the kernel compiled with CONFIG_PSI=y? 1 if yes, 0 if not, negative on error. */
+int is_pressure_supported(void);
diff --git a/src/shared/ptyfwd.c b/src/shared/ptyfwd.c
new file mode 100644
index 0000000..754b4f5
--- /dev/null
+++ b/src/shared/ptyfwd.c
@@ -0,0 +1,681 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "ptyfwd.h"
+#include "terminal-util.h"
+#include "time-util.h"
+
+struct PTYForward {
+ sd_event *event;
+
+ int input_fd;
+ int output_fd;
+ int master;
+
+ PTYForwardFlags flags;
+
+ sd_event_source *stdin_event_source;
+ sd_event_source *stdout_event_source;
+ sd_event_source *master_event_source;
+
+ sd_event_source *sigwinch_event_source;
+
+ struct termios saved_stdin_attr;
+ struct termios saved_stdout_attr;
+
+ bool close_input_fd:1;
+ bool close_output_fd:1;
+
+ bool saved_stdin:1;
+ bool saved_stdout:1;
+
+ bool stdin_readable:1;
+ bool stdin_hangup:1;
+ bool stdout_writable:1;
+ bool stdout_hangup:1;
+ bool master_readable:1;
+ bool master_writable:1;
+ bool master_hangup:1;
+
+ bool read_from_master:1;
+
+ bool done:1;
+ bool drain:1;
+
+ bool last_char_set:1;
+ char last_char;
+
+ char in_buffer[LINE_MAX], out_buffer[LINE_MAX];
+ size_t in_buffer_full, out_buffer_full;
+
+ usec_t escape_timestamp;
+ unsigned escape_counter;
+
+ PTYForwardHandler handler;
+ void *userdata;
+};
+
+#define ESCAPE_USEC (1*USEC_PER_SEC)
+
+static void pty_forward_disconnect(PTYForward *f) {
+
+ if (!f)
+ return;
+
+ f->stdin_event_source = sd_event_source_unref(f->stdin_event_source);
+ f->stdout_event_source = sd_event_source_unref(f->stdout_event_source);
+
+ f->master_event_source = sd_event_source_unref(f->master_event_source);
+ f->sigwinch_event_source = sd_event_source_unref(f->sigwinch_event_source);
+ f->event = sd_event_unref(f->event);
+
+ if (f->output_fd >= 0) {
+ if (f->saved_stdout)
+ (void) tcsetattr(f->output_fd, TCSANOW, &f->saved_stdout_attr);
+
+ /* STDIN/STDOUT should not be non-blocking normally, so let's reset it */
+ (void) fd_nonblock(f->output_fd, false);
+ if (f->close_output_fd)
+ f->output_fd = safe_close(f->output_fd);
+ }
+
+ if (f->input_fd >= 0) {
+ if (f->saved_stdin)
+ (void) tcsetattr(f->input_fd, TCSANOW, &f->saved_stdin_attr);
+
+ (void) fd_nonblock(f->input_fd, false);
+ if (f->close_input_fd)
+ f->input_fd = safe_close(f->input_fd);
+ }
+
+ f->saved_stdout = f->saved_stdin = false;
+}
+
+static int pty_forward_done(PTYForward *f, int rcode) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ assert(f);
+
+ if (f->done)
+ return 0;
+
+ e = sd_event_ref(f->event);
+
+ f->done = true;
+ pty_forward_disconnect(f);
+
+ if (f->handler)
+ return f->handler(f, rcode, f->userdata);
+ else
+ return sd_event_exit(e, rcode < 0 ? EXIT_FAILURE : rcode);
+}
+
+static bool look_for_escape(PTYForward *f, const char *buffer, size_t n) {
+ const char *p;
+
+ assert(f);
+ assert(buffer);
+ assert(n > 0);
+
+ for (p = buffer; p < buffer + n; p++) {
+
+ /* Check for ^] */
+ if (*p == 0x1D) {
+ usec_t nw = now(CLOCK_MONOTONIC);
+
+ if (f->escape_counter == 0 || nw > f->escape_timestamp + ESCAPE_USEC) {
+ f->escape_timestamp = nw;
+ f->escape_counter = 1;
+ } else {
+ (f->escape_counter)++;
+
+ if (f->escape_counter >= 3)
+ return true;
+ }
+ } else {
+ f->escape_timestamp = 0;
+ f->escape_counter = 0;
+ }
+ }
+
+ return false;
+}
+
+static bool ignore_vhangup(PTYForward *f) {
+ assert(f);
+
+ if (f->flags & PTY_FORWARD_IGNORE_VHANGUP)
+ return true;
+
+ if ((f->flags & PTY_FORWARD_IGNORE_INITIAL_VHANGUP) && !f->read_from_master)
+ return true;
+
+ return false;
+}
+
+static bool drained(PTYForward *f) {
+ int q = 0;
+
+ assert(f);
+
+ if (f->out_buffer_full > 0)
+ return false;
+
+ if (f->master_readable)
+ return false;
+
+ if (ioctl(f->master, TIOCINQ, &q) < 0)
+ log_debug_errno(errno, "TIOCINQ failed on master: %m");
+ else if (q > 0)
+ return false;
+
+ if (ioctl(f->master, TIOCOUTQ, &q) < 0)
+ log_debug_errno(errno, "TIOCOUTQ failed on master: %m");
+ else if (q > 0)
+ return false;
+
+ return true;
+}
+
+static int shovel(PTYForward *f) {
+ ssize_t k;
+
+ assert(f);
+
+ while ((f->stdin_readable && f->in_buffer_full <= 0) ||
+ (f->master_writable && f->in_buffer_full > 0) ||
+ (f->master_readable && f->out_buffer_full <= 0) ||
+ (f->stdout_writable && f->out_buffer_full > 0)) {
+
+ if (f->stdin_readable && f->in_buffer_full < LINE_MAX) {
+
+ k = read(f->input_fd, f->in_buffer + f->in_buffer_full, LINE_MAX - f->in_buffer_full);
+ if (k < 0) {
+
+ if (errno == EAGAIN)
+ f->stdin_readable = false;
+ else if (errno == EIO || ERRNO_IS_DISCONNECT(errno)) {
+ f->stdin_readable = false;
+ f->stdin_hangup = true;
+
+ f->stdin_event_source = sd_event_source_unref(f->stdin_event_source);
+ } else {
+ log_error_errno(errno, "read(): %m");
+ return pty_forward_done(f, -errno);
+ }
+ } else if (k == 0) {
+ /* EOF on stdin */
+ f->stdin_readable = false;
+ f->stdin_hangup = true;
+
+ f->stdin_event_source = sd_event_source_unref(f->stdin_event_source);
+ } else {
+ /* Check if ^] has been pressed three times within one second. If we get this we quite
+ * immediately. */
+ if (look_for_escape(f, f->in_buffer + f->in_buffer_full, k))
+ return pty_forward_done(f, -ECANCELED);
+
+ f->in_buffer_full += (size_t) k;
+ }
+ }
+
+ if (f->master_writable && f->in_buffer_full > 0) {
+
+ k = write(f->master, f->in_buffer, f->in_buffer_full);
+ if (k < 0) {
+
+ if (IN_SET(errno, EAGAIN, EIO))
+ f->master_writable = false;
+ else if (IN_SET(errno, EPIPE, ECONNRESET)) {
+ f->master_writable = f->master_readable = false;
+ f->master_hangup = true;
+
+ f->master_event_source = sd_event_source_unref(f->master_event_source);
+ } else {
+ log_error_errno(errno, "write(): %m");
+ return pty_forward_done(f, -errno);
+ }
+ } else {
+ assert(f->in_buffer_full >= (size_t) k);
+ memmove(f->in_buffer, f->in_buffer + k, f->in_buffer_full - k);
+ f->in_buffer_full -= k;
+ }
+ }
+
+ if (f->master_readable && f->out_buffer_full < LINE_MAX) {
+
+ k = read(f->master, f->out_buffer + f->out_buffer_full, LINE_MAX - f->out_buffer_full);
+ if (k < 0) {
+
+ /* Note that EIO on the master device
+ * might be caused by vhangup() or
+ * temporary closing of everything on
+ * the other side, we treat it like
+ * EAGAIN here and try again, unless
+ * ignore_vhangup is off. */
+
+ if (errno == EAGAIN || (errno == EIO && ignore_vhangup(f)))
+ f->master_readable = false;
+ else if (IN_SET(errno, EPIPE, ECONNRESET, EIO)) {
+ f->master_readable = f->master_writable = false;
+ f->master_hangup = true;
+
+ f->master_event_source = sd_event_source_unref(f->master_event_source);
+ } else {
+ log_error_errno(errno, "read(): %m");
+ return pty_forward_done(f, -errno);
+ }
+ } else {
+ f->read_from_master = true;
+ f->out_buffer_full += (size_t) k;
+ }
+ }
+
+ if (f->stdout_writable && f->out_buffer_full > 0) {
+
+ k = write(f->output_fd, f->out_buffer, f->out_buffer_full);
+ if (k < 0) {
+
+ if (errno == EAGAIN)
+ f->stdout_writable = false;
+ else if (errno == EIO || ERRNO_IS_DISCONNECT(errno)) {
+ f->stdout_writable = false;
+ f->stdout_hangup = true;
+ f->stdout_event_source = sd_event_source_unref(f->stdout_event_source);
+ } else {
+ log_error_errno(errno, "write(): %m");
+ return pty_forward_done(f, -errno);
+ }
+
+ } else {
+
+ if (k > 0) {
+ f->last_char = f->out_buffer[k-1];
+ f->last_char_set = true;
+ }
+
+ assert(f->out_buffer_full >= (size_t) k);
+ memmove(f->out_buffer, f->out_buffer + k, f->out_buffer_full - k);
+ f->out_buffer_full -= k;
+ }
+ }
+ }
+
+ if (f->stdin_hangup || f->stdout_hangup || f->master_hangup) {
+ /* Exit the loop if any side hung up and if there's
+ * nothing more to write or nothing we could write. */
+
+ if ((f->out_buffer_full <= 0 || f->stdout_hangup) &&
+ (f->in_buffer_full <= 0 || f->master_hangup))
+ return pty_forward_done(f, 0);
+ }
+
+ /* If we were asked to drain, and there's nothing more to handle from the master, then call the callback
+ * too. */
+ if (f->drain && drained(f))
+ return pty_forward_done(f, 0);
+
+ return 0;
+}
+
+static int on_master_event(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
+ PTYForward *f = userdata;
+
+ assert(f);
+ assert(e);
+ assert(e == f->master_event_source);
+ assert(fd >= 0);
+ assert(fd == f->master);
+
+ if (revents & (EPOLLIN|EPOLLHUP))
+ f->master_readable = true;
+
+ if (revents & (EPOLLOUT|EPOLLHUP))
+ f->master_writable = true;
+
+ return shovel(f);
+}
+
+static int on_stdin_event(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
+ PTYForward *f = userdata;
+
+ assert(f);
+ assert(e);
+ assert(e == f->stdin_event_source);
+ assert(fd >= 0);
+ assert(fd == f->input_fd);
+
+ if (revents & (EPOLLIN|EPOLLHUP))
+ f->stdin_readable = true;
+
+ return shovel(f);
+}
+
+static int on_stdout_event(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
+ PTYForward *f = userdata;
+
+ assert(f);
+ assert(e);
+ assert(e == f->stdout_event_source);
+ assert(fd >= 0);
+ assert(fd == f->output_fd);
+
+ if (revents & (EPOLLOUT|EPOLLHUP))
+ f->stdout_writable = true;
+
+ return shovel(f);
+}
+
+static int on_sigwinch_event(sd_event_source *e, const struct signalfd_siginfo *si, void *userdata) {
+ PTYForward *f = userdata;
+ struct winsize ws;
+
+ assert(f);
+ assert(e);
+ assert(e == f->sigwinch_event_source);
+
+ /* The window size changed, let's forward that. */
+ if (ioctl(f->output_fd, TIOCGWINSZ, &ws) >= 0)
+ (void) ioctl(f->master, TIOCSWINSZ, &ws);
+
+ return 0;
+}
+
+int pty_forward_new(
+ sd_event *event,
+ int master,
+ PTYForwardFlags flags,
+ PTYForward **ret) {
+
+ _cleanup_(pty_forward_freep) PTYForward *f = NULL;
+ struct winsize ws;
+ int r;
+
+ f = new(PTYForward, 1);
+ if (!f)
+ return -ENOMEM;
+
+ *f = (struct PTYForward) {
+ .flags = flags,
+ .master = -1,
+ .input_fd = -1,
+ .output_fd = -1,
+ };
+
+ if (event)
+ f->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&f->event);
+ if (r < 0)
+ return r;
+ }
+
+ if (FLAGS_SET(flags, PTY_FORWARD_READ_ONLY))
+ f->output_fd = STDOUT_FILENO;
+ else {
+ /* If we shall be invoked in interactive mode, let's switch on non-blocking mode, so that we
+ * never end up staving one direction while we block on the other. However, let's be careful
+ * here and not turn on O_NONBLOCK for stdin/stdout directly, but of re-opened copies of
+ * them. This has two advantages: when we are killed abruptly the stdin/stdout fds won't be
+ * left in O_NONBLOCK state for the next process using them. In addition, if some process
+ * running in the background wants to continue writing to our stdout it can do so without
+ * being confused by O_NONBLOCK. */
+
+ f->input_fd = fd_reopen(STDIN_FILENO, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (f->input_fd < 0) {
+ /* Handle failures gracefully, after all certain fd types cannot be reopened
+ * (sockets, …) */
+ log_debug_errno(f->input_fd, "Failed to reopen stdin, using original fd: %m");
+
+ r = fd_nonblock(STDIN_FILENO, true);
+ if (r < 0)
+ return r;
+
+ f->input_fd = STDIN_FILENO;
+ } else
+ f->close_input_fd = true;
+
+ f->output_fd = fd_reopen(STDOUT_FILENO, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (f->output_fd < 0) {
+ log_debug_errno(f->output_fd, "Failed to reopen stdout, using original fd: %m");
+
+ r = fd_nonblock(STDOUT_FILENO, true);
+ if (r < 0)
+ return r;
+
+ f->output_fd = STDOUT_FILENO;
+ } else
+ f->close_output_fd = true;
+ }
+
+ r = fd_nonblock(master, true);
+ if (r < 0)
+ return r;
+
+ f->master = master;
+
+ if (ioctl(f->output_fd, TIOCGWINSZ, &ws) < 0)
+ /* If we can't get the resolution from the output fd, then use our internal, regular width/height,
+ * i.e. something derived from $COLUMNS and $LINES if set. */
+ ws = (struct winsize) {
+ .ws_row = lines(),
+ .ws_col = columns(),
+ };
+
+ (void) ioctl(master, TIOCSWINSZ, &ws);
+
+ if (!(flags & PTY_FORWARD_READ_ONLY)) {
+ assert(f->input_fd >= 0);
+
+ if (tcgetattr(f->input_fd, &f->saved_stdin_attr) >= 0) {
+ struct termios raw_stdin_attr;
+
+ f->saved_stdin = true;
+
+ raw_stdin_attr = f->saved_stdin_attr;
+ cfmakeraw(&raw_stdin_attr);
+ raw_stdin_attr.c_oflag = f->saved_stdin_attr.c_oflag;
+ tcsetattr(f->input_fd, TCSANOW, &raw_stdin_attr);
+ }
+
+ if (tcgetattr(f->output_fd, &f->saved_stdout_attr) >= 0) {
+ struct termios raw_stdout_attr;
+
+ f->saved_stdout = true;
+
+ raw_stdout_attr = f->saved_stdout_attr;
+ cfmakeraw(&raw_stdout_attr);
+ raw_stdout_attr.c_iflag = f->saved_stdout_attr.c_iflag;
+ raw_stdout_attr.c_lflag = f->saved_stdout_attr.c_lflag;
+ tcsetattr(f->output_fd, TCSANOW, &raw_stdout_attr);
+ }
+
+ r = sd_event_add_io(f->event, &f->stdin_event_source, f->input_fd, EPOLLIN|EPOLLET, on_stdin_event, f);
+ if (r < 0 && r != -EPERM)
+ return r;
+
+ if (r >= 0)
+ (void) sd_event_source_set_description(f->stdin_event_source, "ptyfwd-stdin");
+ }
+
+ r = sd_event_add_io(f->event, &f->stdout_event_source, f->output_fd, EPOLLOUT|EPOLLET, on_stdout_event, f);
+ if (r == -EPERM)
+ /* stdout without epoll support. Likely redirected to regular file. */
+ f->stdout_writable = true;
+ else if (r < 0)
+ return r;
+ else
+ (void) sd_event_source_set_description(f->stdout_event_source, "ptyfwd-stdout");
+
+ r = sd_event_add_io(f->event, &f->master_event_source, master, EPOLLIN|EPOLLOUT|EPOLLET, on_master_event, f);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(f->master_event_source, "ptyfwd-master");
+
+ r = sd_event_add_signal(f->event, &f->sigwinch_event_source, SIGWINCH, on_sigwinch_event, f);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(f->sigwinch_event_source, "ptyfwd-sigwinch");
+
+ *ret = TAKE_PTR(f);
+
+ return 0;
+}
+
+PTYForward *pty_forward_free(PTYForward *f) {
+ pty_forward_disconnect(f);
+ return mfree(f);
+}
+
+int pty_forward_get_last_char(PTYForward *f, char *ch) {
+ assert(f);
+ assert(ch);
+
+ if (!f->last_char_set)
+ return -ENXIO;
+
+ *ch = f->last_char;
+ return 0;
+}
+
+int pty_forward_set_ignore_vhangup(PTYForward *f, bool b) {
+ int r;
+
+ assert(f);
+
+ if (!!(f->flags & PTY_FORWARD_IGNORE_VHANGUP) == b)
+ return 0;
+
+ SET_FLAG(f->flags, PTY_FORWARD_IGNORE_VHANGUP, b);
+
+ if (!ignore_vhangup(f)) {
+
+ /* We shall now react to vhangup()s? Let's check
+ * immediately if we might be in one */
+
+ f->master_readable = true;
+ r = shovel(f);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+bool pty_forward_get_ignore_vhangup(PTYForward *f) {
+ assert(f);
+
+ return !!(f->flags & PTY_FORWARD_IGNORE_VHANGUP);
+}
+
+bool pty_forward_is_done(PTYForward *f) {
+ assert(f);
+
+ return f->done;
+}
+
+void pty_forward_set_handler(PTYForward *f, PTYForwardHandler cb, void *userdata) {
+ assert(f);
+
+ f->handler = cb;
+ f->userdata = userdata;
+}
+
+bool pty_forward_drain(PTYForward *f) {
+ assert(f);
+
+ /* Starts draining the forwarder. Specifically:
+ *
+ * - Returns true if there are no unprocessed bytes from the pty, false otherwise
+ *
+ * - Makes sure the handler function is called the next time the number of unprocessed bytes hits zero
+ */
+
+ f->drain = true;
+ return drained(f);
+}
+
+int pty_forward_set_priority(PTYForward *f, int64_t priority) {
+ int r;
+ assert(f);
+
+ if (f->stdin_event_source) {
+ r = sd_event_source_set_priority(f->stdin_event_source, priority);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_source_set_priority(f->stdout_event_source, priority);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(f->master_event_source, priority);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(f->sigwinch_event_source, priority);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int pty_forward_set_width_height(PTYForward *f, unsigned width, unsigned height) {
+ struct winsize ws;
+
+ assert(f);
+
+ if (width == (unsigned) -1 && height == (unsigned) -1)
+ return 0; /* noop */
+
+ if (width != (unsigned) -1 &&
+ (width == 0 || width > USHRT_MAX))
+ return -ERANGE;
+
+ if (height != (unsigned) -1 &&
+ (height == 0 || height > USHRT_MAX))
+ return -ERANGE;
+
+ if (width == (unsigned) -1 || height == (unsigned) -1) {
+ if (ioctl(f->master, TIOCGWINSZ, &ws) < 0)
+ return -errno;
+
+ if (width != (unsigned) -1)
+ ws.ws_col = width;
+ if (height != (unsigned) -1)
+ ws.ws_row = height;
+ } else
+ ws = (struct winsize) {
+ .ws_row = height,
+ .ws_col = width,
+ };
+
+ if (ioctl(f->master, TIOCSWINSZ, &ws) < 0)
+ return -errno;
+
+ /* Make sure we ignore SIGWINCH window size events from now on */
+ f->sigwinch_event_source = sd_event_source_unref(f->sigwinch_event_source);
+
+ return 0;
+}
diff --git a/src/shared/ptyfwd.h b/src/shared/ptyfwd.h
new file mode 100644
index 0000000..f0ae6e9
--- /dev/null
+++ b/src/shared/ptyfwd.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-event.h"
+
+#include "macro.h"
+
+typedef struct PTYForward PTYForward;
+
+typedef enum PTYForwardFlags {
+ PTY_FORWARD_READ_ONLY = 1,
+
+ /* Continue reading after hangup? */
+ PTY_FORWARD_IGNORE_VHANGUP = 2,
+
+ /* Continue reading after hangup but only if we never read anything else? */
+ PTY_FORWARD_IGNORE_INITIAL_VHANGUP = 4,
+} PTYForwardFlags;
+
+typedef int (*PTYForwardHandler)(PTYForward *f, int rcode, void *userdata);
+
+int pty_forward_new(sd_event *event, int master, PTYForwardFlags flags, PTYForward **f);
+PTYForward *pty_forward_free(PTYForward *f);
+
+int pty_forward_get_last_char(PTYForward *f, char *ch);
+
+int pty_forward_set_ignore_vhangup(PTYForward *f, bool ignore_vhangup);
+bool pty_forward_get_ignore_vhangup(PTYForward *f);
+
+bool pty_forward_is_done(PTYForward *f);
+
+void pty_forward_set_handler(PTYForward *f, PTYForwardHandler handler, void *userdata);
+
+bool pty_forward_drain(PTYForward *f);
+
+int pty_forward_set_priority(PTYForward *f, int64_t priority);
+
+int pty_forward_set_width_height(PTYForward *f, unsigned width, unsigned height);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(PTYForward*, pty_forward_free);
diff --git a/src/shared/pwquality-util.c b/src/shared/pwquality-util.c
new file mode 100644
index 0000000..4000bef
--- /dev/null
+++ b/src/shared/pwquality-util.c
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "dlfcn-util.h"
+#include "errno-util.h"
+#include "log.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "pwquality-util.h"
+#include "strv.h"
+
+#if HAVE_PWQUALITY
+
+static void *pwquality_dl = NULL;
+
+int (*sym_pwquality_check)(pwquality_settings_t *pwq, const char *password, const char *oldpassword, const char *user, void **auxerror);
+pwquality_settings_t *(*sym_pwquality_default_settings)(void);
+void (*sym_pwquality_free_settings)(pwquality_settings_t *pwq);
+int (*sym_pwquality_generate)(pwquality_settings_t *pwq, int entropy_bits, char **password);
+int (*sym_pwquality_get_str_value)(pwquality_settings_t *pwq, int setting, const char **value);
+int (*sym_pwquality_read_config)(pwquality_settings_t *pwq, const char *cfgfile, void **auxerror);
+int (*sym_pwquality_set_int_value)(pwquality_settings_t *pwq, int setting, int value);
+const char* (*sym_pwquality_strerror)(char *buf, size_t len, int errcode, void *auxerror);
+
+int dlopen_pwquality(void) {
+ _cleanup_(dlclosep) void *dl = NULL;
+ int r;
+
+ if (pwquality_dl)
+ return 0; /* Already loaded */
+
+ dl = dlopen("libpwquality.so.1", RTLD_LAZY);
+ if (!dl)
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "libpwquality support is not installed: %s", dlerror());
+
+ r = dlsym_many_and_warn(
+ dl,
+ LOG_DEBUG,
+ &sym_pwquality_check, "pwquality_check",
+ &sym_pwquality_default_settings, "pwquality_default_settings",
+ &sym_pwquality_free_settings, "pwquality_free_settings",
+ &sym_pwquality_generate, "pwquality_generate",
+ &sym_pwquality_get_str_value, "pwquality_get_str_value",
+ &sym_pwquality_read_config, "pwquality_read_config",
+ &sym_pwquality_set_int_value, "pwquality_set_int_value",
+ &sym_pwquality_strerror, "pwquality_strerror",
+ NULL);
+ if (r < 0)
+ return r;
+
+ /* Note that we never release the reference here, because there's no real reason to, after all this
+ * was traditionally a regular shared library dependency which lives forever too. */
+ pwquality_dl = TAKE_PTR(dl);
+ return 1;
+}
+
+void pwq_maybe_disable_dictionary(pwquality_settings_t *pwq) {
+ char buf[PWQ_MAX_ERROR_MESSAGE_LEN];
+ const char *path;
+ int r;
+
+ assert(pwq);
+
+ r = sym_pwquality_get_str_value(pwq, PWQ_SETTING_DICT_PATH, &path);
+ if (r < 0) {
+ log_debug("Failed to read libpwquality dictionary path, ignoring: %s",
+ sym_pwquality_strerror(buf, sizeof(buf), r, NULL));
+ return;
+ }
+
+ // REMOVE THIS AS SOON AS https://github.com/libpwquality/libpwquality/pull/21 IS MERGED AND RELEASED
+ if (isempty(path))
+ path = "/usr/share/cracklib/pw_dict.pwd.gz";
+
+ if (isempty(path)) {
+ log_debug("Weird, no dictionary file configured, ignoring.");
+ return;
+ }
+
+ if (access(path, F_OK) >= 0)
+ return;
+
+ if (errno != ENOENT) {
+ log_debug_errno(errno, "Failed to check if dictionary file %s exists, ignoring: %m", path);
+ return;
+ }
+
+ r = sym_pwquality_set_int_value(pwq, PWQ_SETTING_DICT_CHECK, 0);
+ if (r < 0)
+ log_debug("Failed to disable libpwquality dictionary check, ignoring: %s",
+ sym_pwquality_strerror(buf, sizeof(buf), r, NULL));
+}
+
+int pwq_allocate_context(pwquality_settings_t **ret) {
+ _cleanup_(sym_pwquality_free_settingsp) pwquality_settings_t *pwq = NULL;
+ char buf[PWQ_MAX_ERROR_MESSAGE_LEN];
+ void *auxerror;
+ int r;
+
+ assert(ret);
+
+ r = dlopen_pwquality();
+ if (r < 0)
+ return r;
+
+ pwq = sym_pwquality_default_settings();
+ if (!pwq)
+ return -ENOMEM;
+
+ r = sym_pwquality_read_config(pwq, NULL, &auxerror);
+ if (r < 0)
+ log_debug("Failed to read libpwquality configuration, ignoring: %s",
+ sym_pwquality_strerror(buf, sizeof(buf), r, auxerror));
+
+ pwq_maybe_disable_dictionary(pwq);
+
+ *ret = TAKE_PTR(pwq);
+ return 0;
+}
+
+#define N_SUGGESTIONS 6
+
+int suggest_passwords(void) {
+ _cleanup_(sym_pwquality_free_settingsp) pwquality_settings_t *pwq = NULL;
+ _cleanup_strv_free_erase_ char **suggestions = NULL;
+ _cleanup_(erase_and_freep) char *joined = NULL;
+ char buf[PWQ_MAX_ERROR_MESSAGE_LEN];
+ size_t i;
+ int r;
+
+ r = pwq_allocate_context(&pwq);
+ if (ERRNO_IS_NOT_SUPPORTED(r))
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate libpwquality context: %m");
+
+ suggestions = new0(char*, N_SUGGESTIONS+1);
+ if (!suggestions)
+ return log_oom();
+
+ for (i = 0; i < N_SUGGESTIONS; i++) {
+ r = sym_pwquality_generate(pwq, 64, suggestions + i);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to generate password, ignoring: %s",
+ sym_pwquality_strerror(buf, sizeof(buf), r, NULL));
+ }
+
+ joined = strv_join(suggestions, " ");
+ if (!joined)
+ return log_oom();
+
+ log_info("Password suggestions: %s", joined);
+ return 1;
+}
+
+int quality_check_password(const char *password, const char *username, char **ret_error) {
+ _cleanup_(sym_pwquality_free_settingsp) pwquality_settings_t *pwq = NULL;
+ char buf[PWQ_MAX_ERROR_MESSAGE_LEN];
+ void *auxerror;
+ int r;
+
+ assert(password);
+
+ r = pwq_allocate_context(&pwq);
+ if (ERRNO_IS_NOT_SUPPORTED(r))
+ return 0;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to allocate libpwquality context: %m");
+
+ r = sym_pwquality_check(pwq, password, NULL, username, &auxerror);
+ if (r < 0) {
+
+ if (ret_error) {
+ _cleanup_free_ char *e = NULL;
+
+ e = strdup(sym_pwquality_strerror(buf, sizeof(buf), r, auxerror));
+ if (!e)
+ return -ENOMEM;
+
+ *ret_error = TAKE_PTR(e);
+ }
+
+ return 0; /* all bad */
+ }
+
+ return 1; /* all good */
+}
+
+#endif
diff --git a/src/shared/pwquality-util.h b/src/shared/pwquality-util.h
new file mode 100644
index 0000000..de288bb
--- /dev/null
+++ b/src/shared/pwquality-util.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "macro.h"
+
+#if HAVE_PWQUALITY
+/* pwquality.h uses size_t but doesn't include sys/types.h on its own */
+#include <sys/types.h>
+#include <pwquality.h>
+
+extern int (*sym_pwquality_check)(pwquality_settings_t *pwq, const char *password, const char *oldpassword, const char *user, void **auxerror);
+extern pwquality_settings_t *(*sym_pwquality_default_settings)(void);
+extern void (*sym_pwquality_free_settings)(pwquality_settings_t *pwq);
+extern int (*sym_pwquality_generate)(pwquality_settings_t *pwq, int entropy_bits, char **password);
+extern int (*sym_pwquality_get_str_value)(pwquality_settings_t *pwq, int setting, const char **value);
+extern int (*sym_pwquality_read_config)(pwquality_settings_t *pwq, const char *cfgfile, void **auxerror);
+extern int (*sym_pwquality_set_int_value)(pwquality_settings_t *pwq, int setting, int value);
+extern const char* (*sym_pwquality_strerror)(char *buf, size_t len, int errcode, void *auxerror);
+
+int dlopen_pwquality(void);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(pwquality_settings_t*, sym_pwquality_free_settings);
+
+void pwq_maybe_disable_dictionary(pwquality_settings_t *pwq);
+int pwq_allocate_context(pwquality_settings_t **ret);
+int suggest_passwords(void);
+int quality_check_password(const char *password, const char *username, char **ret_error);
+
+#else
+
+static inline int suggest_passwords(void) {
+ return 0;
+}
+
+static inline int quality_check_password(const char *password, const char *username, char **ret_error) {
+ if (ret_error)
+ *ret_error = NULL;
+ return 1; /* all good */
+}
+
+#endif
diff --git a/src/shared/qrcode-util.c b/src/shared/qrcode-util.c
new file mode 100644
index 0000000..7050e18
--- /dev/null
+++ b/src/shared/qrcode-util.c
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "qrcode-util.h"
+
+#if HAVE_QRENCODE
+#include <qrencode.h>
+
+#include "dlfcn-util.h"
+#include "locale-util.h"
+#include "terminal-util.h"
+
+#define ANSI_WHITE_ON_BLACK "\033[40;37;1m"
+
+static void print_border(FILE *output, unsigned width) {
+ /* Four rows of border */
+ for (unsigned y = 0; y < 4; y += 2) {
+ fputs(ANSI_WHITE_ON_BLACK, output);
+
+ for (unsigned x = 0; x < 4 + width + 4; x++)
+ fputs("\342\226\210", output);
+
+ fputs(ANSI_NORMAL "\n", output);
+ }
+}
+
+static void write_qrcode(FILE *output, QRcode *qr) {
+ assert(qr);
+
+ if (!output)
+ output = stdout;
+
+ print_border(output, qr->width);
+
+ for (unsigned y = 0; y < (unsigned) qr->width; y += 2) {
+ const uint8_t *row1 = qr->data + qr->width * y;
+ const uint8_t *row2 = row1 + qr->width;
+
+ fputs(ANSI_WHITE_ON_BLACK, output);
+ for (unsigned x = 0; x < 4; x++)
+ fputs("\342\226\210", output);
+
+ for (unsigned x = 0; x < (unsigned) qr->width; x++) {
+ bool a, b;
+
+ a = row1[x] & 1;
+ b = (y+1) < (unsigned) qr->width ? (row2[x] & 1) : false;
+
+ if (a && b)
+ fputc(' ', output);
+ else if (a)
+ fputs("\342\226\204", output);
+ else if (b)
+ fputs("\342\226\200", output);
+ else
+ fputs("\342\226\210", output);
+ }
+
+ for (unsigned x = 0; x < 4; x++)
+ fputs("\342\226\210", output);
+ fputs(ANSI_NORMAL "\n", output);
+ }
+
+ print_border(output, qr->width);
+ fflush(output);
+}
+
+int print_qrcode(FILE *out, const char *header, const char *string) {
+ QRcode* (*sym_QRcode_encodeString)(const char *string, int version, QRecLevel level, QRencodeMode hint, int casesensitive);
+ void (*sym_QRcode_free)(QRcode *qrcode);
+ _cleanup_(dlclosep) void *dl = NULL;
+ QRcode* qr;
+ int r;
+
+ /* If this is not an UTF-8 system or ANSI colors aren't supported/disabled don't print any QR
+ * codes */
+ if (!is_locale_utf8() || !colors_enabled())
+ return -EOPNOTSUPP;
+
+ dl = dlopen("libqrencode.so.4", RTLD_LAZY);
+ if (!dl)
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "QRCODE support is not installed: %s", dlerror());
+
+ r = dlsym_many_and_warn(
+ dl,
+ LOG_DEBUG,
+ &sym_QRcode_encodeString, "QRcode_encodeString",
+ &sym_QRcode_free, "QRcode_free",
+ NULL);
+ if (r < 0)
+ return r;
+
+ qr = sym_QRcode_encodeString(string, 0, QR_ECLEVEL_L, QR_MODE_8, 0);
+ if (!qr)
+ return -ENOMEM;
+
+ if (header)
+ fprintf(out, "\n%s:\n\n", header);
+
+ write_qrcode(out, qr);
+
+ fputc('\n', out);
+
+ sym_QRcode_free(qr);
+ return 0;
+}
+#endif
diff --git a/src/shared/qrcode-util.h b/src/shared/qrcode-util.h
new file mode 100644
index 0000000..6fc45c9
--- /dev/null
+++ b/src/shared/qrcode-util.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+#include <stdio.h>
+#include <errno.h>
+
+#if HAVE_QRENCODE
+int print_qrcode(FILE *out, const char *header, const char *string);
+#else
+static inline int print_qrcode(FILE *out, const char *header, const char *string) {
+ return -EOPNOTSUPP;
+}
+#endif
diff --git a/src/shared/reboot-util.c b/src/shared/reboot-util.c
new file mode 100644
index 0000000..756f9d3
--- /dev/null
+++ b/src/shared/reboot-util.c
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "proc-cmdline.h"
+#include "raw-reboot.h"
+#include "reboot-util.h"
+#include "string-util.h"
+#include "umask-util.h"
+#include "virt.h"
+
+int update_reboot_parameter_and_warn(const char *parameter, bool keep) {
+ int r;
+
+ if (isempty(parameter)) {
+ if (keep)
+ return 0;
+
+ if (unlink("/run/systemd/reboot-param") < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to unlink reboot parameter file: %m");
+ }
+
+ return 0;
+ }
+
+ RUN_WITH_UMASK(0022) {
+ r = write_string_file("/run/systemd/reboot-param", parameter,
+ WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to write reboot parameter file: %m");
+ }
+
+ return 0;
+}
+
+int read_reboot_parameter(char **parameter) {
+ int r;
+
+ assert(parameter);
+
+ r = read_one_line_file("/run/systemd/reboot-param", parameter);
+ if (r < 0 && r != -ENOENT)
+ return log_debug_errno(r, "Failed to read /run/systemd/reboot-param: %m");
+
+ return 0;
+}
+
+int reboot_with_parameter(RebootFlags flags) {
+ int r;
+
+ /* Reboots the system with a parameter that is read from /run/systemd/reboot-param. Returns 0 if
+ * REBOOT_DRY_RUN was set and the actual reboot operation was hence skipped. If REBOOT_FALLBACK is
+ * set and the reboot with parameter doesn't work out a fallback to classic reboot() is attempted. If
+ * REBOOT_FALLBACK is not set, 0 is returned instead, which should be considered indication for the
+ * caller to fall back to reboot() on its own, or somehow else deal with this. If REBOOT_LOG is
+ * specified will log about what it is going to do, as well as all errors. */
+
+ if (detect_container() == 0) {
+ _cleanup_free_ char *parameter = NULL;
+
+ r = read_one_line_file("/run/systemd/reboot-param", &parameter);
+ if (r < 0 && r != -ENOENT)
+ log_full_errno(flags & REBOOT_LOG ? LOG_WARNING : LOG_DEBUG, r,
+ "Failed to read reboot parameter file, ignoring: %m");
+
+ if (!isempty(parameter)) {
+ log_full(flags & REBOOT_LOG ? LOG_INFO : LOG_DEBUG,
+ "Rebooting with argument '%s'.", parameter);
+
+ if (flags & REBOOT_DRY_RUN)
+ return 0;
+
+ (void) raw_reboot(LINUX_REBOOT_CMD_RESTART2, parameter);
+
+ log_full_errno(flags & REBOOT_LOG ? LOG_WARNING : LOG_DEBUG, errno,
+ "Failed to reboot with parameter, retrying without: %m");
+ }
+ }
+
+ if (!(flags & REBOOT_FALLBACK))
+ return 0;
+
+ log_full(flags & REBOOT_LOG ? LOG_INFO : LOG_DEBUG, "Rebooting.");
+
+ if (flags & REBOOT_DRY_RUN)
+ return 0;
+
+ (void) reboot(RB_AUTOBOOT);
+
+ return log_full_errno(flags & REBOOT_LOG ? LOG_ERR : LOG_DEBUG, errno, "Failed to reboot: %m");
+}
+
+int shall_restore_state(void) {
+ bool ret;
+ int r;
+
+ r = proc_cmdline_get_bool("systemd.restore_state", &ret);
+ if (r < 0)
+ return r;
+
+ return r > 0 ? ret : true;
+}
diff --git a/src/shared/reboot-util.h b/src/shared/reboot-util.h
new file mode 100644
index 0000000..bbca8b8
--- /dev/null
+++ b/src/shared/reboot-util.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int update_reboot_parameter_and_warn(const char *parameter, bool keep);
+
+typedef enum RebootFlags {
+ REBOOT_LOG = 1 << 0, /* log about what we are going to do and all errors */
+ REBOOT_DRY_RUN = 1 << 1, /* return 0 right before actually doing the reboot */
+ REBOOT_FALLBACK = 1 << 2, /* fall back to plain reboot() if argument-based reboot doesn't work, isn't configured or doesn't apply otherwise */
+} RebootFlags;
+
+int read_reboot_parameter(char **parameter);
+int reboot_with_parameter(RebootFlags flags);
+
+int shall_restore_state(void);
diff --git a/src/shared/resize-fs.c b/src/shared/resize-fs.c
new file mode 100644
index 0000000..33cb78b
--- /dev/null
+++ b/src/shared/resize-fs.c
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/btrfs.h>
+#include <linux/magic.h>
+#include <sys/ioctl.h>
+#include <sys/vfs.h>
+
+#include "blockdev-util.h"
+#include "fs-util.h"
+#include "missing_fs.h"
+#include "missing_magic.h"
+#include "missing_xfs.h"
+#include "resize-fs.h"
+#include "stat-util.h"
+
+int resize_fs(int fd, uint64_t sz, uint64_t *ret_size) {
+ struct statfs sfs;
+
+ assert(fd >= 0);
+
+ /* Rounds down to next block size */
+
+ if (sz <= 0 || sz == UINT64_MAX)
+ return -ERANGE;
+
+ if (fstatfs(fd, &sfs) < 0)
+ return -errno;
+
+ if (is_fs_type(&sfs, EXT4_SUPER_MAGIC)) {
+ uint64_t u;
+
+ if (sz < EXT4_MINIMAL_SIZE)
+ return -ERANGE;
+
+ u = sz / sfs.f_bsize;
+
+ if (ioctl(fd, EXT4_IOC_RESIZE_FS, &u) < 0)
+ return -errno;
+
+ if (ret_size)
+ *ret_size = u * sfs.f_bsize;
+
+ } else if (is_fs_type(&sfs, BTRFS_SUPER_MAGIC)) {
+ struct btrfs_ioctl_vol_args args = {};
+
+ /* 256M is the minimize size enforced by the btrfs kernel code when resizing (which is
+ * strange btw, as mkfs.btrfs is fine creating file systems > 109M). It will return EINVAL in
+ * that case, let's catch this error beforehand though, and report a more explanatory
+ * error. */
+
+ if (sz < BTRFS_MINIMAL_SIZE)
+ return -ERANGE;
+
+ sz -= sz % sfs.f_bsize;
+
+ xsprintf(args.name, "%" PRIu64, sz);
+
+ if (ioctl(fd, BTRFS_IOC_RESIZE, &args) < 0)
+ return -errno;
+
+ if (ret_size)
+ *ret_size = sz;
+
+ } else if (is_fs_type(&sfs, XFS_SB_MAGIC)) {
+ xfs_fsop_geom_t geo;
+ xfs_growfs_data_t d;
+
+ if (sz < XFS_MINIMAL_SIZE)
+ return -ERANGE;
+
+ if (ioctl(fd, XFS_IOC_FSGEOMETRY, &geo) < 0)
+ return -errno;
+
+ d = (xfs_growfs_data_t) {
+ .imaxpct = geo.imaxpct,
+ .newblocks = sz / geo.blocksize,
+ };
+
+ if (ioctl(fd, XFS_IOC_FSGROWFSDATA, &d) < 0)
+ return -errno;
+
+ if (ret_size)
+ *ret_size = d.newblocks * geo.blocksize;
+
+ } else
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+uint64_t minimal_size_by_fs_magic(statfs_f_type_t magic) {
+
+ switch (magic) {
+
+ case (statfs_f_type_t) EXT4_SUPER_MAGIC:
+ return EXT4_MINIMAL_SIZE;
+
+ case (statfs_f_type_t) XFS_SB_MAGIC:
+ return XFS_MINIMAL_SIZE;
+
+ case (statfs_f_type_t) BTRFS_SUPER_MAGIC:
+ return BTRFS_MINIMAL_SIZE;
+
+ default:
+ return UINT64_MAX;
+ }
+}
+
+uint64_t minimal_size_by_fs_name(const char *name) {
+
+ if (streq_ptr(name, "ext4"))
+ return EXT4_MINIMAL_SIZE;
+
+ if (streq_ptr(name, "xfs"))
+ return XFS_MINIMAL_SIZE;
+
+ if (streq_ptr(name, "btrfs"))
+ return BTRFS_MINIMAL_SIZE;
+
+ return UINT64_MAX;
+}
diff --git a/src/shared/resize-fs.h b/src/shared/resize-fs.h
new file mode 100644
index 0000000..8831fd8
--- /dev/null
+++ b/src/shared/resize-fs.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+
+#include "stat-util.h"
+
+int resize_fs(int fd, uint64_t sz, uint64_t *ret_size);
+
+#define BTRFS_MINIMAL_SIZE (256U*1024U*1024U)
+#define XFS_MINIMAL_SIZE (14U*1024U*1024U)
+#define EXT4_MINIMAL_SIZE (1024U*1024U)
+
+uint64_t minimal_size_by_fs_magic(statfs_f_type_t magic);
+uint64_t minimal_size_by_fs_name(const char *str);
diff --git a/src/shared/resolve-util.c b/src/shared/resolve-util.c
new file mode 100644
index 0000000..1023b62
--- /dev/null
+++ b/src/shared/resolve-util.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "conf-parser.h"
+#include "resolve-util.h"
+#include "string-table.h"
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_resolve_support, resolve_support, ResolveSupport, "Failed to parse resolve support setting");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_dnssec_mode, dnssec_mode, DnssecMode, "Failed to parse DNSSEC mode setting");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_dns_over_tls_mode, dns_over_tls_mode, DnsOverTlsMode, "Failed to parse DNS-over-TLS mode setting");
+
+static const char* const resolve_support_table[_RESOLVE_SUPPORT_MAX] = {
+ [RESOLVE_SUPPORT_NO] = "no",
+ [RESOLVE_SUPPORT_YES] = "yes",
+ [RESOLVE_SUPPORT_RESOLVE] = "resolve",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(resolve_support, ResolveSupport, RESOLVE_SUPPORT_YES);
+
+static const char* const dnssec_mode_table[_DNSSEC_MODE_MAX] = {
+ [DNSSEC_NO] = "no",
+ [DNSSEC_ALLOW_DOWNGRADE] = "allow-downgrade",
+ [DNSSEC_YES] = "yes",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dnssec_mode, DnssecMode, DNSSEC_YES);
+
+static const char* const dns_over_tls_mode_table[_DNS_OVER_TLS_MODE_MAX] = {
+ [DNS_OVER_TLS_NO] = "no",
+ [DNS_OVER_TLS_OPPORTUNISTIC] = "opportunistic",
+ [DNS_OVER_TLS_YES] = "yes",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_over_tls_mode, DnsOverTlsMode, DNS_OVER_TLS_YES);
+
+bool dns_server_address_valid(int family, const union in_addr_union *sa) {
+
+ /* Refuses the 0 IP addresses as well as 127.0.0.53 (which is our own DNS stub) */
+
+ if (in_addr_is_null(family, sa))
+ return false;
+
+ if (family == AF_INET && sa->in.s_addr == htobe32(INADDR_DNS_STUB))
+ return false;
+
+ return true;
+}
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_dns_cache_mode, dns_cache_mode, DnsCacheMode, "Failed to parse DNS cache mode setting")
+
+static const char* const dns_cache_mode_table[_DNS_CACHE_MODE_MAX] = {
+ [DNS_CACHE_MODE_YES] = "yes",
+ [DNS_CACHE_MODE_NO] = "no",
+ [DNS_CACHE_MODE_NO_NEGATIVE] = "no-negative",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_cache_mode, DnsCacheMode, DNS_CACHE_MODE_YES);
diff --git a/src/shared/resolve-util.h b/src/shared/resolve-util.h
new file mode 100644
index 0000000..4ea24a6
--- /dev/null
+++ b/src/shared/resolve-util.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "conf-parser.h"
+#include "in-addr-util.h"
+#include "macro.h"
+
+/* 127.0.0.53 in native endian */
+#define INADDR_DNS_STUB ((in_addr_t) 0x7f000035U)
+
+typedef enum DnsCacheMode DnsCacheMode;
+
+enum DnsCacheMode {
+ DNS_CACHE_MODE_NO,
+ DNS_CACHE_MODE_YES,
+ DNS_CACHE_MODE_NO_NEGATIVE,
+ _DNS_CACHE_MODE_MAX,
+ _DNS_CACHE_MODE_INVALID = 1
+};
+
+typedef enum ResolveSupport ResolveSupport;
+typedef enum DnssecMode DnssecMode;
+typedef enum DnsOverTlsMode DnsOverTlsMode;
+
+enum ResolveSupport {
+ RESOLVE_SUPPORT_NO,
+ RESOLVE_SUPPORT_YES,
+ RESOLVE_SUPPORT_RESOLVE,
+ _RESOLVE_SUPPORT_MAX,
+ _RESOLVE_SUPPORT_INVALID = -1
+};
+
+enum DnssecMode {
+ /* No DNSSEC validation is done */
+ DNSSEC_NO,
+
+ /* Validate locally, if the server knows DO, but if not,
+ * don't. Don't trust the AD bit. If the server doesn't do
+ * DNSSEC properly, downgrade to non-DNSSEC operation. Of
+ * course, we then are vulnerable to a downgrade attack, but
+ * that's life and what is configured. */
+ DNSSEC_ALLOW_DOWNGRADE,
+
+ /* Insist on DNSSEC server support, and rather fail than downgrading. */
+ DNSSEC_YES,
+
+ _DNSSEC_MODE_MAX,
+ _DNSSEC_MODE_INVALID = -1
+};
+
+enum DnsOverTlsMode {
+ /* No connection is made for DNS-over-TLS */
+ DNS_OVER_TLS_NO,
+
+ /* Try to connect using DNS-over-TLS, but if connection fails,
+ * fall back to using an unencrypted connection */
+ DNS_OVER_TLS_OPPORTUNISTIC,
+
+ /* Enforce DNS-over-TLS and require valid server certificates */
+ DNS_OVER_TLS_YES,
+
+ _DNS_OVER_TLS_MODE_MAX,
+ _DNS_OVER_TLS_MODE_INVALID = -1
+};
+
+CONFIG_PARSER_PROTOTYPE(config_parse_resolve_support);
+CONFIG_PARSER_PROTOTYPE(config_parse_dnssec_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_dns_over_tls_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_dns_cache_mode);
+
+const char* resolve_support_to_string(ResolveSupport p) _const_;
+ResolveSupport resolve_support_from_string(const char *s) _pure_;
+
+const char* dnssec_mode_to_string(DnssecMode p) _const_;
+DnssecMode dnssec_mode_from_string(const char *s) _pure_;
+
+const char* dns_over_tls_mode_to_string(DnsOverTlsMode p) _const_;
+DnsOverTlsMode dns_over_tls_mode_from_string(const char *s) _pure_;
+
+bool dns_server_address_valid(int family, const union in_addr_union *sa);
+
+const char* dns_cache_mode_to_string(DnsCacheMode p) _const_;
+DnsCacheMode dns_cache_mode_from_string(const char *s) _pure_;
+
+/* A resolv.conf file containing the DNS server and domain data we learnt from uplink, i.e. the full uplink data */
+#define PRIVATE_UPLINK_RESOLV_CONF "/run/systemd/resolve/resolv.conf"
+
+/* A resolv.conf file containing the domain data we learnt from uplink, but our own DNS server address. */
+#define PRIVATE_STUB_RESOLV_CONF "/run/systemd/resolve/stub-resolv.conf"
+
+/* A static resolv.conf file containing no domains, but only our own DNS server address */
+#define PRIVATE_STATIC_RESOLV_CONF ROOTLIBEXECDIR "/resolv.conf"
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c
new file mode 100644
index 0000000..ccae9d4
--- /dev/null
+++ b/src/shared/seccomp-util.c
@@ -0,0 +1,2140 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/seccomp.h>
+#include <seccomp.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "env-util.h"
+#include "errno-list.h"
+#include "macro.h"
+#include "nsflags.h"
+#include "nulstr-util.h"
+#include "process-util.h"
+#include "seccomp-util.h"
+#include "set.h"
+#include "string-util.h"
+#include "strv.h"
+
+const uint32_t seccomp_local_archs[] = {
+
+ /* Note: always list the native arch we are compiled as last, so that users can deny-list seccomp(), but our own calls to it still succeed */
+
+#if defined(__x86_64__) && defined(__ILP32__)
+ SCMP_ARCH_X86,
+ SCMP_ARCH_X86_64,
+ SCMP_ARCH_X32, /* native */
+#elif defined(__x86_64__) && !defined(__ILP32__)
+ SCMP_ARCH_X86,
+ SCMP_ARCH_X32,
+ SCMP_ARCH_X86_64, /* native */
+#elif defined(__i386__)
+ SCMP_ARCH_X86,
+#elif defined(__aarch64__)
+ SCMP_ARCH_ARM,
+ SCMP_ARCH_AARCH64, /* native */
+#elif defined(__arm__)
+ SCMP_ARCH_ARM,
+#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL64N32,
+ SCMP_ARCH_MIPS64N32,
+ SCMP_ARCH_MIPSEL64,
+ SCMP_ARCH_MIPS64, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS64N32,
+ SCMP_ARCH_MIPSEL64N32,
+ SCMP_ARCH_MIPS64,
+ SCMP_ARCH_MIPSEL64, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL64,
+ SCMP_ARCH_MIPS64,
+ SCMP_ARCH_MIPSEL64N32,
+ SCMP_ARCH_MIPS64N32, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS64,
+ SCMP_ARCH_MIPSEL64,
+ SCMP_ARCH_MIPS64N32,
+ SCMP_ARCH_MIPSEL64N32, /* native */
+#elif defined(__powerpc64__) && __BYTE_ORDER == __BIG_ENDIAN
+ SCMP_ARCH_PPC,
+ SCMP_ARCH_PPC64LE,
+ SCMP_ARCH_PPC64, /* native */
+#elif defined(__powerpc64__) && __BYTE_ORDER == __LITTLE_ENDIAN
+ SCMP_ARCH_PPC,
+ SCMP_ARCH_PPC64,
+ SCMP_ARCH_PPC64LE, /* native */
+#elif defined(__powerpc__)
+ SCMP_ARCH_PPC,
+#elif defined(__riscv) && __riscv_xlen == 64 && defined(SCMP_ARCH_RISCV64)
+ SCMP_ARCH_RISCV64,
+#elif defined(__s390x__)
+ SCMP_ARCH_S390,
+ SCMP_ARCH_S390X, /* native */
+#elif defined(__s390__)
+ SCMP_ARCH_S390,
+#endif
+ (uint32_t) -1
+ };
+
+const char* seccomp_arch_to_string(uint32_t c) {
+ /* Maintain order used in <seccomp.h>.
+ *
+ * Names used here should be the same as those used for ConditionArchitecture=,
+ * except for "subarchitectures" like x32. */
+
+ switch(c) {
+ case SCMP_ARCH_NATIVE:
+ return "native";
+ case SCMP_ARCH_X86:
+ return "x86";
+ case SCMP_ARCH_X86_64:
+ return "x86-64";
+ case SCMP_ARCH_X32:
+ return "x32";
+ case SCMP_ARCH_ARM:
+ return "arm";
+ case SCMP_ARCH_AARCH64:
+ return "arm64";
+ case SCMP_ARCH_MIPS:
+ return "mips";
+ case SCMP_ARCH_MIPS64:
+ return "mips64";
+ case SCMP_ARCH_MIPS64N32:
+ return "mips64-n32";
+ case SCMP_ARCH_MIPSEL:
+ return "mips-le";
+ case SCMP_ARCH_MIPSEL64:
+ return "mips64-le";
+ case SCMP_ARCH_MIPSEL64N32:
+ return "mips64-le-n32";
+ case SCMP_ARCH_PPC:
+ return "ppc";
+ case SCMP_ARCH_PPC64:
+ return "ppc64";
+ case SCMP_ARCH_PPC64LE:
+ return "ppc64-le";
+#ifdef SCMP_ARCH_RISCV64
+ case SCMP_ARCH_RISCV64:
+ return "riscv64";
+#endif
+ case SCMP_ARCH_S390:
+ return "s390";
+ case SCMP_ARCH_S390X:
+ return "s390x";
+ default:
+ return NULL;
+ }
+}
+
+int seccomp_arch_from_string(const char *n, uint32_t *ret) {
+ if (!n)
+ return -EINVAL;
+
+ assert(ret);
+
+ if (streq(n, "native"))
+ *ret = SCMP_ARCH_NATIVE;
+ else if (streq(n, "x86"))
+ *ret = SCMP_ARCH_X86;
+ else if (streq(n, "x86-64"))
+ *ret = SCMP_ARCH_X86_64;
+ else if (streq(n, "x32"))
+ *ret = SCMP_ARCH_X32;
+ else if (streq(n, "arm"))
+ *ret = SCMP_ARCH_ARM;
+ else if (streq(n, "arm64"))
+ *ret = SCMP_ARCH_AARCH64;
+ else if (streq(n, "mips"))
+ *ret = SCMP_ARCH_MIPS;
+ else if (streq(n, "mips64"))
+ *ret = SCMP_ARCH_MIPS64;
+ else if (streq(n, "mips64-n32"))
+ *ret = SCMP_ARCH_MIPS64N32;
+ else if (streq(n, "mips-le"))
+ *ret = SCMP_ARCH_MIPSEL;
+ else if (streq(n, "mips64-le"))
+ *ret = SCMP_ARCH_MIPSEL64;
+ else if (streq(n, "mips64-le-n32"))
+ *ret = SCMP_ARCH_MIPSEL64N32;
+ else if (streq(n, "ppc"))
+ *ret = SCMP_ARCH_PPC;
+ else if (streq(n, "ppc64"))
+ *ret = SCMP_ARCH_PPC64;
+ else if (streq(n, "ppc64-le"))
+ *ret = SCMP_ARCH_PPC64LE;
+#ifdef SCMP_ARCH_RISCV64
+ else if (streq(n, "riscv64"))
+ *ret = SCMP_ARCH_RISCV64;
+#endif
+ else if (streq(n, "s390"))
+ *ret = SCMP_ARCH_S390;
+ else if (streq(n, "s390x"))
+ *ret = SCMP_ARCH_S390X;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+int seccomp_init_for_arch(scmp_filter_ctx *ret, uint32_t arch, uint32_t default_action) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ int r;
+
+ /* Much like seccomp_init(), but initializes the filter for one specific architecture only, without affecting
+ * any others. Also, turns off the NNP fiddling. */
+
+ seccomp = seccomp_init(default_action);
+ if (!seccomp)
+ return -ENOMEM;
+
+ if (arch != SCMP_ARCH_NATIVE &&
+ arch != seccomp_arch_native()) {
+
+ r = seccomp_arch_remove(seccomp, seccomp_arch_native());
+ if (r < 0)
+ return r;
+
+ r = seccomp_arch_add(seccomp, arch);
+ if (r < 0)
+ return r;
+
+ assert(seccomp_arch_exist(seccomp, arch) >= 0);
+ assert(seccomp_arch_exist(seccomp, SCMP_ARCH_NATIVE) == -EEXIST);
+ assert(seccomp_arch_exist(seccomp, seccomp_arch_native()) == -EEXIST);
+ } else {
+ assert(seccomp_arch_exist(seccomp, SCMP_ARCH_NATIVE) >= 0);
+ assert(seccomp_arch_exist(seccomp, seccomp_arch_native()) >= 0);
+ }
+
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_ACT_BADARCH, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+ if (r < 0)
+ return r;
+
+#if SCMP_VER_MAJOR >= 3 || (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR >= 4)
+ if (getenv_bool("SYSTEMD_LOG_SECCOMP") > 0) {
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_LOG, 1);
+ if (r < 0)
+ log_debug_errno(r, "Failed to enable seccomp event logging: %m");
+ }
+#endif
+
+ *ret = TAKE_PTR(seccomp);
+ return 0;
+}
+
+static bool is_basic_seccomp_available(void) {
+ return prctl(PR_GET_SECCOMP, 0, 0, 0, 0) >= 0;
+}
+
+static bool is_seccomp_filter_available(void) {
+ return prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, 0, 0) < 0 &&
+ errno == EFAULT;
+}
+
+bool is_seccomp_available(void) {
+ static int cached_enabled = -1;
+
+ if (cached_enabled < 0) {
+ int b;
+
+ b = getenv_bool_secure("SYSTEMD_SECCOMP");
+ if (b != 0) {
+ if (b < 0 && b != -ENXIO) /* ENXIO: env var unset */
+ log_debug_errno(b, "Failed to parse $SYSTEMD_SECCOMP value, ignoring.");
+
+ cached_enabled =
+ is_basic_seccomp_available() &&
+ is_seccomp_filter_available();
+ } else
+ cached_enabled = false;
+ }
+
+ return cached_enabled;
+}
+
+const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
+ [SYSCALL_FILTER_SET_DEFAULT] = {
+ .name = "@default",
+ .help = "System calls that are always permitted",
+ .value =
+ "brk\0"
+ "cacheflush\0"
+ "clock_getres\0"
+ "clock_getres_time64\0"
+ "clock_gettime\0"
+ "clock_gettime64\0"
+ "clock_nanosleep\0"
+ "clock_nanosleep_time64\0"
+ "execve\0"
+ "exit\0"
+ "exit_group\0"
+ "futex\0"
+ "futex_time64\0"
+ "get_robust_list\0"
+ "get_thread_area\0"
+ "getegid\0"
+ "getegid32\0"
+ "geteuid\0"
+ "geteuid32\0"
+ "getgid\0"
+ "getgid32\0"
+ "getgroups\0"
+ "getgroups32\0"
+ "getpgid\0"
+ "getpgrp\0"
+ "getpid\0"
+ "getppid\0"
+ "getresgid\0"
+ "getresgid32\0"
+ "getresuid\0"
+ "getresuid32\0"
+ "getrlimit\0" /* make sure processes can query stack size and such */
+ "getsid\0"
+ "gettid\0"
+ "gettimeofday\0"
+ "getuid\0"
+ "getuid32\0"
+ "membarrier\0"
+ "mmap\0"
+ "mmap2\0"
+ "munmap\0"
+ "nanosleep\0"
+ "pause\0"
+ "prlimit64\0"
+ "restart_syscall\0"
+ "rseq\0"
+ "rt_sigreturn\0"
+ "sched_yield\0"
+ "set_robust_list\0"
+ "set_thread_area\0"
+ "set_tid_address\0"
+ "set_tls\0"
+ "sigreturn\0"
+ "time\0"
+ "ugetrlimit\0"
+ },
+ [SYSCALL_FILTER_SET_AIO] = {
+ .name = "@aio",
+ .help = "Asynchronous IO",
+ .value =
+ "io_cancel\0"
+ "io_destroy\0"
+ "io_getevents\0"
+ "io_pgetevents\0"
+ "io_pgetevents_time64\0"
+ "io_setup\0"
+ "io_submit\0"
+ "io_uring_enter\0"
+ "io_uring_register\0"
+ "io_uring_setup\0"
+ },
+ [SYSCALL_FILTER_SET_BASIC_IO] = {
+ .name = "@basic-io",
+ .help = "Basic IO",
+ .value =
+ "_llseek\0"
+ "close\0"
+ "close_range\0"
+ "dup\0"
+ "dup2\0"
+ "dup3\0"
+ "lseek\0"
+ "pread64\0"
+ "preadv\0"
+ "preadv2\0"
+ "pwrite64\0"
+ "pwritev\0"
+ "pwritev2\0"
+ "read\0"
+ "readv\0"
+ "write\0"
+ "writev\0"
+ },
+ [SYSCALL_FILTER_SET_CHOWN] = {
+ .name = "@chown",
+ .help = "Change ownership of files and directories",
+ .value =
+ "chown\0"
+ "chown32\0"
+ "fchown\0"
+ "fchown32\0"
+ "fchownat\0"
+ "lchown\0"
+ "lchown32\0"
+ },
+ [SYSCALL_FILTER_SET_CLOCK] = {
+ .name = "@clock",
+ .help = "Change the system time",
+ .value =
+ "adjtimex\0"
+ "clock_adjtime\0"
+ "clock_adjtime64\0"
+ "clock_settime\0"
+ "clock_settime64\0"
+ "settimeofday\0"
+ },
+ [SYSCALL_FILTER_SET_CPU_EMULATION] = {
+ .name = "@cpu-emulation",
+ .help = "System calls for CPU emulation functionality",
+ .value =
+ "modify_ldt\0"
+ "subpage_prot\0"
+ "switch_endian\0"
+ "vm86\0"
+ "vm86old\0"
+ },
+ [SYSCALL_FILTER_SET_DEBUG] = {
+ .name = "@debug",
+ .help = "Debugging, performance monitoring and tracing functionality",
+ .value =
+ "lookup_dcookie\0"
+ "perf_event_open\0"
+ "pidfd_getfd\0"
+ "ptrace\0"
+ "rtas\0"
+#if defined __s390__ || defined __s390x__
+ "s390_runtime_instr\0"
+#endif
+ "sys_debug_setcontext\0"
+ },
+ [SYSCALL_FILTER_SET_FILE_SYSTEM] = {
+ .name = "@file-system",
+ .help = "File system operations",
+ .value =
+ "access\0"
+ "chdir\0"
+ "chmod\0"
+ "close\0"
+ "creat\0"
+ "faccessat\0"
+ "faccessat2\0"
+ "fallocate\0"
+ "fchdir\0"
+ "fchmod\0"
+ "fchmodat\0"
+ "fcntl\0"
+ "fcntl64\0"
+ "fgetxattr\0"
+ "flistxattr\0"
+ "fremovexattr\0"
+ "fsetxattr\0"
+ "fstat\0"
+ "fstat64\0"
+ "fstatat64\0"
+ "fstatfs\0"
+ "fstatfs64\0"
+ "ftruncate\0"
+ "ftruncate64\0"
+ "futimesat\0"
+ "getcwd\0"
+ "getdents\0"
+ "getdents64\0"
+ "getxattr\0"
+ "inotify_add_watch\0"
+ "inotify_init\0"
+ "inotify_init1\0"
+ "inotify_rm_watch\0"
+ "lgetxattr\0"
+ "link\0"
+ "linkat\0"
+ "listxattr\0"
+ "llistxattr\0"
+ "lremovexattr\0"
+ "lsetxattr\0"
+ "lstat\0"
+ "lstat64\0"
+ "mkdir\0"
+ "mkdirat\0"
+ "mknod\0"
+ "mknodat\0"
+ "newfstatat\0"
+ "oldfstat\0"
+ "oldlstat\0"
+ "oldstat\0"
+ "open\0"
+ "openat\0"
+ "openat2\0"
+ "readlink\0"
+ "readlinkat\0"
+ "removexattr\0"
+ "rename\0"
+ "renameat\0"
+ "renameat2\0"
+ "rmdir\0"
+ "setxattr\0"
+ "stat\0"
+ "stat64\0"
+ "statfs\0"
+ "statfs64\0"
+ "statx\0"
+ "symlink\0"
+ "symlinkat\0"
+ "truncate\0"
+ "truncate64\0"
+ "unlink\0"
+ "unlinkat\0"
+ "utime\0"
+ "utimensat\0"
+ "utimensat_time64\0"
+ "utimes\0"
+ },
+ [SYSCALL_FILTER_SET_IO_EVENT] = {
+ .name = "@io-event",
+ .help = "Event loop system calls",
+ .value =
+ "_newselect\0"
+ "epoll_create\0"
+ "epoll_create1\0"
+ "epoll_ctl\0"
+ "epoll_ctl_old\0"
+ "epoll_pwait\0"
+ "epoll_wait\0"
+ "epoll_wait_old\0"
+ "eventfd\0"
+ "eventfd2\0"
+ "poll\0"
+ "ppoll\0"
+ "ppoll_time64\0"
+ "pselect6\0"
+ "pselect6_time64\0"
+ "select\0"
+ },
+ [SYSCALL_FILTER_SET_IPC] = {
+ .name = "@ipc",
+ .help = "SysV IPC, POSIX Message Queues or other IPC",
+ .value =
+ "ipc\0"
+ "memfd_create\0"
+ "mq_getsetattr\0"
+ "mq_notify\0"
+ "mq_open\0"
+ "mq_timedreceive\0"
+ "mq_timedreceive_time64\0"
+ "mq_timedsend\0"
+ "mq_timedsend_time64\0"
+ "mq_unlink\0"
+ "msgctl\0"
+ "msgget\0"
+ "msgrcv\0"
+ "msgsnd\0"
+ "pipe\0"
+ "pipe2\0"
+ "process_vm_readv\0"
+ "process_vm_writev\0"
+ "semctl\0"
+ "semget\0"
+ "semop\0"
+ "semtimedop\0"
+ "semtimedop_time64\0"
+ "shmat\0"
+ "shmctl\0"
+ "shmdt\0"
+ "shmget\0"
+ },
+ [SYSCALL_FILTER_SET_KEYRING] = {
+ .name = "@keyring",
+ .help = "Kernel keyring access",
+ .value =
+ "add_key\0"
+ "keyctl\0"
+ "request_key\0"
+ },
+ [SYSCALL_FILTER_SET_MEMLOCK] = {
+ .name = "@memlock",
+ .help = "Memory locking control",
+ .value =
+ "mlock\0"
+ "mlock2\0"
+ "mlockall\0"
+ "munlock\0"
+ "munlockall\0"
+ },
+ [SYSCALL_FILTER_SET_MODULE] = {
+ .name = "@module",
+ .help = "Loading and unloading of kernel modules",
+ .value =
+ "delete_module\0"
+ "finit_module\0"
+ "init_module\0"
+ },
+ [SYSCALL_FILTER_SET_MOUNT] = {
+ .name = "@mount",
+ .help = "Mounting and unmounting of file systems",
+ .value =
+ "chroot\0"
+ "fsconfig\0"
+ "fsmount\0"
+ "fsopen\0"
+ "fspick\0"
+ "mount\0"
+ "move_mount\0"
+ "open_tree\0"
+ "pivot_root\0"
+ "umount\0"
+ "umount2\0"
+ },
+ [SYSCALL_FILTER_SET_NETWORK_IO] = {
+ .name = "@network-io",
+ .help = "Network or Unix socket IO, should not be needed if not network facing",
+ .value =
+ "accept\0"
+ "accept4\0"
+ "bind\0"
+ "connect\0"
+ "getpeername\0"
+ "getsockname\0"
+ "getsockopt\0"
+ "listen\0"
+ "recv\0"
+ "recvfrom\0"
+ "recvmmsg\0"
+ "recvmmsg_time64\0"
+ "recvmsg\0"
+ "send\0"
+ "sendmmsg\0"
+ "sendmsg\0"
+ "sendto\0"
+ "setsockopt\0"
+ "shutdown\0"
+ "socket\0"
+ "socketcall\0"
+ "socketpair\0"
+ },
+ [SYSCALL_FILTER_SET_OBSOLETE] = {
+ /* some unknown even to libseccomp */
+ .name = "@obsolete",
+ .help = "Unusual, obsolete or unimplemented system calls",
+ .value =
+ "_sysctl\0"
+ "afs_syscall\0"
+ "bdflush\0"
+ "break\0"
+ "create_module\0"
+ "ftime\0"
+ "get_kernel_syms\0"
+ "getpmsg\0"
+ "gtty\0"
+ "idle\0"
+ "lock\0"
+ "mpx\0"
+ "prof\0"
+ "profil\0"
+ "putpmsg\0"
+ "query_module\0"
+ "security\0"
+ "sgetmask\0"
+ "ssetmask\0"
+ "stime\0"
+ "stty\0"
+ "sysfs\0"
+ "tuxcall\0"
+ "ulimit\0"
+ "uselib\0"
+ "ustat\0"
+ "vserver\0"
+ },
+ [SYSCALL_FILTER_SET_PKEY] = {
+ .name = "@pkey",
+ .help = "System calls used for memory protection keys",
+ .value =
+ "pkey_alloc\0"
+ "pkey_free\0"
+ "pkey_mprotect\0"
+ },
+ [SYSCALL_FILTER_SET_PRIVILEGED] = {
+ .name = "@privileged",
+ .help = "All system calls which need super-user capabilities",
+ .value =
+ "@chown\0"
+ "@clock\0"
+ "@module\0"
+ "@raw-io\0"
+ "@reboot\0"
+ "@swap\0"
+ "_sysctl\0"
+ "acct\0"
+ "bpf\0"
+ "capset\0"
+ "chroot\0"
+ "fanotify_init\0"
+ "fanotify_mark\0"
+ "nfsservctl\0"
+ "open_by_handle_at\0"
+ "pivot_root\0"
+ "quotactl\0"
+ "setdomainname\0"
+ "setfsuid\0"
+ "setfsuid32\0"
+ "setgroups\0"
+ "setgroups32\0"
+ "sethostname\0"
+ "setresuid\0"
+ "setresuid32\0"
+ "setreuid\0"
+ "setreuid32\0"
+ "setuid\0" /* We list the explicit system calls here, as @setuid also includes setgid() which is not necessarily privileged */
+ "setuid32\0"
+ "vhangup\0"
+ },
+ [SYSCALL_FILTER_SET_PROCESS] = {
+ .name = "@process",
+ .help = "Process control, execution, namespacing operations",
+ .value =
+ "arch_prctl\0"
+ "capget\0" /* Able to query arbitrary processes */
+ "clone\0"
+ "clone3\0"
+ "execveat\0"
+ "fork\0"
+ "getrusage\0"
+ "kill\0"
+ "pidfd_open\0"
+ "pidfd_send_signal\0"
+ "prctl\0"
+ "rt_sigqueueinfo\0"
+ "rt_tgsigqueueinfo\0"
+ "setns\0"
+ "swapcontext\0" /* Some archs e.g. powerpc32 are using it to do userspace context switches */
+ "tgkill\0"
+ "times\0"
+ "tkill\0"
+ "unshare\0"
+ "vfork\0"
+ "wait4\0"
+ "waitid\0"
+ "waitpid\0"
+ },
+ [SYSCALL_FILTER_SET_RAW_IO] = {
+ .name = "@raw-io",
+ .help = "Raw I/O port access",
+ .value =
+ "ioperm\0"
+ "iopl\0"
+ "pciconfig_iobase\0"
+ "pciconfig_read\0"
+ "pciconfig_write\0"
+#if defined __s390__ || defined __s390x__
+ "s390_pci_mmio_read\0"
+ "s390_pci_mmio_write\0"
+#endif
+ },
+ [SYSCALL_FILTER_SET_REBOOT] = {
+ .name = "@reboot",
+ .help = "Reboot and reboot preparation/kexec",
+ .value =
+ "kexec_file_load\0"
+ "kexec_load\0"
+ "reboot\0"
+ },
+ [SYSCALL_FILTER_SET_RESOURCES] = {
+ .name = "@resources",
+ .help = "Alter resource settings",
+ .value =
+ "ioprio_set\0"
+ "mbind\0"
+ "migrate_pages\0"
+ "move_pages\0"
+ "nice\0"
+ "sched_setaffinity\0"
+ "sched_setattr\0"
+ "sched_setparam\0"
+ "sched_setscheduler\0"
+ "set_mempolicy\0"
+ "setpriority\0"
+ "setrlimit\0"
+ },
+ [SYSCALL_FILTER_SET_SETUID] = {
+ .name = "@setuid",
+ .help = "Operations for changing user/group credentials",
+ .value =
+ "setgid\0"
+ "setgid32\0"
+ "setgroups\0"
+ "setgroups32\0"
+ "setregid\0"
+ "setregid32\0"
+ "setresgid\0"
+ "setresgid32\0"
+ "setresuid\0"
+ "setresuid32\0"
+ "setreuid\0"
+ "setreuid32\0"
+ "setuid\0"
+ "setuid32\0"
+ },
+ [SYSCALL_FILTER_SET_SIGNAL] = {
+ .name = "@signal",
+ .help = "Process signal handling",
+ .value =
+ "rt_sigaction\0"
+ "rt_sigpending\0"
+ "rt_sigprocmask\0"
+ "rt_sigsuspend\0"
+ "rt_sigtimedwait\0"
+ "rt_sigtimedwait_time64\0"
+ "sigaction\0"
+ "sigaltstack\0"
+ "signal\0"
+ "signalfd\0"
+ "signalfd4\0"
+ "sigpending\0"
+ "sigprocmask\0"
+ "sigsuspend\0"
+ },
+ [SYSCALL_FILTER_SET_SWAP] = {
+ .name = "@swap",
+ .help = "Enable/disable swap devices",
+ .value =
+ "swapoff\0"
+ "swapon\0"
+ },
+ [SYSCALL_FILTER_SET_SYNC] = {
+ .name = "@sync",
+ .help = "Synchronize files and memory to storage",
+ .value =
+ "fdatasync\0"
+ "fsync\0"
+ "msync\0"
+ "sync\0"
+ "sync_file_range\0"
+ "sync_file_range2\0"
+ "syncfs\0"
+ },
+ [SYSCALL_FILTER_SET_SYSTEM_SERVICE] = {
+ .name = "@system-service",
+ .help = "General system service operations",
+ .value =
+ "@aio\0"
+ "@basic-io\0"
+ "@chown\0"
+ "@default\0"
+ "@file-system\0"
+ "@io-event\0"
+ "@ipc\0"
+ "@keyring\0"
+ "@memlock\0"
+ "@network-io\0"
+ "@process\0"
+ "@resources\0"
+ "@setuid\0"
+ "@signal\0"
+ "@sync\0"
+ "@timer\0"
+ "capget\0"
+ "capset\0"
+ "copy_file_range\0"
+ "fadvise64\0"
+ "fadvise64_64\0"
+ "flock\0"
+ "get_mempolicy\0"
+ "getcpu\0"
+ "getpriority\0"
+ "getrandom\0"
+ "ioctl\0"
+ "ioprio_get\0"
+ "kcmp\0"
+ "madvise\0"
+ "mprotect\0"
+ "mremap\0"
+ "name_to_handle_at\0"
+ "oldolduname\0"
+ "olduname\0"
+ "personality\0"
+ "readahead\0"
+ "readdir\0"
+ "remap_file_pages\0"
+ "sched_get_priority_max\0"
+ "sched_get_priority_min\0"
+ "sched_getaffinity\0"
+ "sched_getattr\0"
+ "sched_getparam\0"
+ "sched_getscheduler\0"
+ "sched_rr_get_interval\0"
+ "sched_rr_get_interval_time64\0"
+ "sched_yield\0"
+ "sendfile\0"
+ "sendfile64\0"
+ "setfsgid\0"
+ "setfsgid32\0"
+ "setfsuid\0"
+ "setfsuid32\0"
+ "setpgid\0"
+ "setsid\0"
+ "splice\0"
+ "sysinfo\0"
+ "tee\0"
+ "umask\0"
+ "uname\0"
+ "userfaultfd\0"
+ "vmsplice\0"
+ },
+ [SYSCALL_FILTER_SET_TIMER] = {
+ .name = "@timer",
+ .help = "Schedule operations by time",
+ .value =
+ "alarm\0"
+ "getitimer\0"
+ "setitimer\0"
+ "timer_create\0"
+ "timer_delete\0"
+ "timer_getoverrun\0"
+ "timer_gettime\0"
+ "timer_gettime64\0"
+ "timer_settime\0"
+ "timer_settime64\0"
+ "timerfd_create\0"
+ "timerfd_gettime\0"
+ "timerfd_gettime64\0"
+ "timerfd_settime\0"
+ "timerfd_settime64\0"
+ "times\0"
+ },
+ [SYSCALL_FILTER_SET_KNOWN] = {
+ .name = "@known",
+ .help = "All known syscalls declared in the kernel",
+ .value =
+#include "syscall-list.h"
+ },
+};
+
+const SyscallFilterSet *syscall_filter_set_find(const char *name) {
+ if (isempty(name) || name[0] != '@')
+ return NULL;
+
+ for (unsigned i = 0; i < _SYSCALL_FILTER_SET_MAX; i++)
+ if (streq(syscall_filter_sets[i].name, name))
+ return syscall_filter_sets + i;
+
+ return NULL;
+}
+
+static int add_syscall_filter_set(
+ scmp_filter_ctx seccomp,
+ const SyscallFilterSet *set,
+ uint32_t action,
+ char **exclude,
+ bool log_missing,
+ char ***added);
+
+int seccomp_add_syscall_filter_item(
+ scmp_filter_ctx *seccomp,
+ const char *name,
+ uint32_t action,
+ char **exclude,
+ bool log_missing,
+ char ***added) {
+
+ assert(seccomp);
+ assert(name);
+
+ if (strv_contains(exclude, name))
+ return 0;
+
+ /* Any syscalls that are handled are added to the *added strv. The pointer
+ * must be either NULL or point to a valid pre-initialized possibly-empty strv. */
+
+ if (name[0] == '@') {
+ const SyscallFilterSet *other;
+
+ other = syscall_filter_set_find(name);
+ if (!other)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Filter set %s is not known!",
+ name);
+
+ return add_syscall_filter_set(seccomp, other, action, exclude, log_missing, added);
+
+ } else {
+ int id, r;
+
+ id = seccomp_syscall_resolve_name(name);
+ if (id == __NR_SCMP_ERROR) {
+ if (log_missing)
+ log_debug("System call %s is not known, ignoring.", name);
+ return 0;
+ }
+
+ r = seccomp_rule_add_exact(seccomp, action, id, 0);
+ if (r < 0) {
+ /* If the system call is not known on this architecture, then that's fine, let's ignore it */
+ bool ignore = r == -EDOM;
+
+ if (!ignore || log_missing)
+ log_debug_errno(r, "Failed to add rule for system call %s() / %d%s: %m",
+ name, id, ignore ? ", ignoring" : "");
+ if (!ignore)
+ return r;
+ }
+
+ if (added) {
+ r = strv_extend(added, name);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+ }
+}
+
+static int add_syscall_filter_set(
+ scmp_filter_ctx seccomp,
+ const SyscallFilterSet *set,
+ uint32_t action,
+ char **exclude,
+ bool log_missing,
+ char ***added) {
+
+ const char *sys;
+ int r;
+
+ /* Any syscalls that are handled are added to the *added strv. It needs to be initialized. */
+
+ assert(seccomp);
+ assert(set);
+
+ NULSTR_FOREACH(sys, set->value) {
+ r = seccomp_add_syscall_filter_item(seccomp, sys, action, exclude, log_missing, added);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int seccomp_load_syscall_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action, bool log_missing) {
+ uint32_t arch;
+ int r;
+
+ assert(set);
+
+ /* The one-stop solution: allocate a seccomp object, add the specified filter to it, and apply it. Once for
+ * each local arch. */
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ r = seccomp_init_for_arch(&seccomp, arch, default_action);
+ if (r < 0)
+ return r;
+
+ r = add_syscall_filter_set(seccomp, set, action, NULL, log_missing, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add filter set: %m");
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* set, uint32_t action, bool log_missing) {
+ uint32_t arch;
+ int r;
+
+ /* Similar to seccomp_load_syscall_filter_set(), but takes a raw Set* of syscalls, instead of a
+ * SyscallFilterSet* table. */
+
+ if (hashmap_isempty(set) && default_action == SCMP_ACT_ALLOW)
+ return 0;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ void *syscall_id, *val;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ r = seccomp_init_for_arch(&seccomp, arch, default_action);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH_KEY(val, syscall_id, set) {
+ uint32_t a = action;
+ int id = PTR_TO_INT(syscall_id) - 1;
+ int error = PTR_TO_INT(val);
+
+ if (error == SECCOMP_ERROR_NUMBER_KILL)
+ a = scmp_act_kill_process();
+#ifdef SCMP_ACT_LOG
+ else if (action == SCMP_ACT_LOG)
+ a = SCMP_ACT_LOG;
+#endif
+ else if (action != SCMP_ACT_ALLOW && error >= 0)
+ a = SCMP_ACT_ERRNO(error);
+
+ r = seccomp_rule_add_exact(seccomp, a, id, 0);
+ if (r < 0) {
+ /* If the system call is not known on this architecture, then that's fine, let's ignore it */
+ _cleanup_free_ char *n = NULL;
+ bool ignore;
+
+ n = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, id);
+ ignore = r == -EDOM;
+ if (!ignore || log_missing)
+ log_debug_errno(r, "Failed to add rule for system call %s() / %d%s: %m",
+ strna(n), id, ignore ? ", ignoring" : "");
+ if (!ignore)
+ return r;
+ }
+ }
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_parse_syscall_filter(
+ const char *name,
+ int errno_num,
+ Hashmap *filter,
+ SeccompParseFlags flags,
+ const char *unit,
+ const char *filename,
+ unsigned line) {
+
+ int r;
+
+ assert(name);
+ assert(filter);
+
+ if (name[0] == '@') {
+ const SyscallFilterSet *set;
+ const char *i;
+
+ set = syscall_filter_set_find(name);
+ if (!set) {
+ if (!(flags & SECCOMP_PARSE_PERMISSIVE))
+ return -EINVAL;
+
+ log_syntax(unit, flags & SECCOMP_PARSE_LOG ? LOG_WARNING : LOG_DEBUG, filename, line, 0,
+ "Unknown system call group, ignoring: %s", name);
+ return 0;
+ }
+
+ NULSTR_FOREACH(i, set->value) {
+ /* Call ourselves again, for the group to parse. Note that we downgrade logging here (i.e. take
+ * away the SECCOMP_PARSE_LOG flag) since any issues in the group table are our own problem,
+ * not a problem in user configuration data and we shouldn't pretend otherwise by complaining
+ * about them. */
+ r = seccomp_parse_syscall_filter(i, errno_num, filter, flags &~ SECCOMP_PARSE_LOG, unit, filename, line);
+ if (r < 0)
+ return r;
+ }
+ } else {
+ int id;
+
+ id = seccomp_syscall_resolve_name(name);
+ if (id == __NR_SCMP_ERROR) {
+ if (!(flags & SECCOMP_PARSE_PERMISSIVE))
+ return -EINVAL;
+
+ log_syntax(unit, flags & SECCOMP_PARSE_LOG ? LOG_WARNING : LOG_DEBUG, filename, line, 0,
+ "Failed to parse system call, ignoring: %s", name);
+ return 0;
+ }
+
+ /* If we previously wanted to forbid a syscall and now
+ * we want to allow it, then remove it from the list. */
+ if (!(flags & SECCOMP_PARSE_INVERT) == !!(flags & SECCOMP_PARSE_ALLOW_LIST)) {
+ r = hashmap_put(filter, INT_TO_PTR(id + 1), INT_TO_PTR(errno_num));
+ if (r < 0)
+ switch (r) {
+ case -ENOMEM:
+ return flags & SECCOMP_PARSE_LOG ? log_oom() : -ENOMEM;
+ case -EEXIST:
+ assert_se(hashmap_update(filter, INT_TO_PTR(id + 1), INT_TO_PTR(errno_num)) == 0);
+ break;
+ default:
+ return r;
+ }
+ } else
+ (void) hashmap_remove(filter, INT_TO_PTR(id + 1));
+ }
+
+ return 0;
+}
+
+int seccomp_restrict_namespaces(unsigned long retain) {
+ uint32_t arch;
+ int r;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *s = NULL;
+
+ (void) namespace_flags_to_string(retain, &s);
+ log_debug("Restricting namespace to: %s.", strna(s));
+ }
+
+ /* NOOP? */
+ if (FLAGS_SET(retain, NAMESPACE_FLAGS_ALL))
+ return 0;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ if ((retain & NAMESPACE_FLAGS_ALL) == 0)
+ /* If every single kind of namespace shall be prohibited, then let's block the whole setns() syscall
+ * altogether. */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(setns),
+ 0);
+ else
+ /* Otherwise, block only the invocations with the appropriate flags in the loop below, but also the
+ * special invocation with a zero flags argument, right here. */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(setns),
+ 1,
+ SCMP_A1(SCMP_CMP_EQ, 0));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add setns() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ for (unsigned i = 0; namespace_flag_map[i].name; i++) {
+ unsigned long f;
+
+ f = namespace_flag_map[i].flag;
+ if (FLAGS_SET(retain, f)) {
+ log_debug("Permitting %s.", namespace_flag_map[i].name);
+ continue;
+ }
+
+ log_debug("Blocking %s.", namespace_flag_map[i].name);
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(unshare),
+ 1,
+ SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add unshare() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ break;
+ }
+
+ /* On s390/s390x the first two parameters to clone are switched */
+ if (!IN_SET(arch, SCMP_ARCH_S390, SCMP_ARCH_S390X))
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(clone),
+ 1,
+ SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
+ else
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(clone),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, f, f));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add clone() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ break;
+ }
+
+ if ((retain & NAMESPACE_FLAGS_ALL) != 0) {
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(setns),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, f, f));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add setns() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ break;
+ }
+ }
+ }
+ if (r < 0)
+ continue;
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install namespace restriction rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_protect_sysctl(void) {
+ uint32_t arch;
+ int r;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ if (IN_SET(arch,
+ SCMP_ARCH_AARCH64,
+#ifdef SCMP_ARCH_RISCV64
+ SCMP_ARCH_RISCV64,
+#endif
+ SCMP_ARCH_X32
+ ))
+ /* No _sysctl syscall */
+ continue;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(_sysctl),
+ 0);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add _sysctl() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install sysctl protection rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_protect_syslog(void) {
+ uint32_t arch;
+ int r;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(syslog),
+ 0);
+
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add syslog() rule for architecture %s, skipping %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install syslog protection rules for architecture %s, skipping %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_restrict_address_families(Set *address_families, bool allow_list) {
+ uint32_t arch;
+ int r;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ bool supported;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ switch (arch) {
+
+ case SCMP_ARCH_X86_64:
+ case SCMP_ARCH_X32:
+ case SCMP_ARCH_ARM:
+ case SCMP_ARCH_AARCH64:
+ case SCMP_ARCH_MIPSEL64N32:
+ case SCMP_ARCH_MIPS64N32:
+ case SCMP_ARCH_MIPSEL64:
+ case SCMP_ARCH_MIPS64:
+#ifdef SCMP_ARCH_RISCV64
+ case SCMP_ARCH_RISCV64:
+#endif
+ /* These we know we support (i.e. are the ones that do not use socketcall()) */
+ supported = true;
+ break;
+
+ case SCMP_ARCH_S390:
+ case SCMP_ARCH_S390X:
+ case SCMP_ARCH_X86:
+ case SCMP_ARCH_MIPSEL:
+ case SCMP_ARCH_MIPS:
+ case SCMP_ARCH_PPC:
+ case SCMP_ARCH_PPC64:
+ case SCMP_ARCH_PPC64LE:
+ default:
+ /* These we either know we don't support (i.e. are the ones that do use socketcall()), or we
+ * don't know */
+ supported = false;
+ break;
+ }
+
+ if (!supported)
+ continue;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ if (allow_list) {
+ int first = 0, last = 0;
+ void *afp;
+
+ /* If this is an allow list, we first block the address families that are out of
+ * range and then everything that is not in the set. First, we find the lowest and
+ * highest address family in the set. */
+
+ SET_FOREACH(afp, address_families) {
+ int af = PTR_TO_INT(afp);
+
+ if (af <= 0 || af >= af_max())
+ continue;
+
+ if (first == 0 || af < first)
+ first = af;
+
+ if (last == 0 || af > last)
+ last = af;
+ }
+
+ assert((first == 0) == (last == 0));
+
+ if (first == 0) {
+
+ /* No entries in the valid range, block everything */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 0);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ } else {
+
+ /* Block everything below the first entry */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_LT, first));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ /* Block everything above the last entry */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_GT, last));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ /* Block everything between the first and last entry */
+ for (int af = 1; af < af_max(); af++) {
+
+ if (set_contains(address_families, INT_TO_PTR(af)))
+ continue;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_EQ, af));
+ if (r < 0)
+ break;
+ }
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+ }
+
+ } else {
+ void *af;
+
+ /* If this is a deny list, then generate one rule for each address family that are
+ * then combined in OR checks. */
+
+ SET_FOREACH(af, address_families) {
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
+ if (r < 0)
+ break;
+ }
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+ }
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install socket family rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_restrict_realtime(void) {
+ static const int permitted_policies[] = {
+ SCHED_OTHER,
+ SCHED_BATCH,
+ SCHED_IDLE,
+ };
+
+ int r, max_policy = 0;
+ uint32_t arch;
+ unsigned i;
+
+ /* Determine the highest policy constant we want to allow */
+ for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
+ if (permitted_policies[i] > max_policy)
+ max_policy = permitted_policies[i];
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ int p;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ /* Go through all policies with lower values than that, and block them -- unless they appear in the
+ * allow list. */
+ for (p = 0; p < max_policy; p++) {
+ bool good = false;
+
+ /* Check if this is in the allow list. */
+ for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
+ if (permitted_policies[i] == p) {
+ good = true;
+ break;
+ }
+
+ if (good)
+ continue;
+
+ /* Deny this policy */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(sched_setscheduler),
+ 1,
+ SCMP_A1(SCMP_CMP_EQ, p));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+ }
+
+ /* Deny-list all other policies, i.e. the ones with higher values. Note that all comparisons
+ * are unsigned here, hence no need no check for < 0 values. */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(sched_setscheduler),
+ 1,
+ SCMP_A1(SCMP_CMP_GT, max_policy));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install realtime protection rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+static int add_seccomp_syscall_filter(scmp_filter_ctx seccomp,
+ uint32_t arch,
+ int nr,
+ unsigned arg_cnt,
+ const struct scmp_arg_cmp arg) {
+ int r;
+
+ r = seccomp_rule_add_exact(seccomp, SCMP_ACT_ERRNO(EPERM), nr, arg_cnt, arg);
+ if (r < 0) {
+ _cleanup_free_ char *n = NULL;
+
+ n = seccomp_syscall_resolve_num_arch(arch, nr);
+ log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m",
+ strna(n),
+ seccomp_arch_to_string(arch));
+ }
+
+ return r;
+}
+
+/* For known architectures, check that syscalls are indeed defined or not. */
+#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || (defined(__riscv) && __riscv_xlen == 64)
+assert_cc(SCMP_SYS(shmget) > 0);
+assert_cc(SCMP_SYS(shmat) > 0);
+assert_cc(SCMP_SYS(shmdt) > 0);
+#endif
+
+int seccomp_memory_deny_write_execute(void) {
+ uint32_t arch;
+ unsigned loaded = 0;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ int filter_syscall = 0, block_syscall = 0, shmat_syscall = 0, r;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ switch (arch) {
+
+ /* Note that on some architectures shmat() isn't available, and the call is multiplexed through ipc().
+ * We ignore that here, which means there's still a way to get writable/executable
+ * memory, if an IPC key is mapped like this. That's a pity, but no total loss. */
+
+ case SCMP_ARCH_X86:
+ case SCMP_ARCH_S390:
+ filter_syscall = SCMP_SYS(mmap2);
+ block_syscall = SCMP_SYS(mmap);
+ /* shmat multiplexed, see above */
+ break;
+
+ case SCMP_ARCH_PPC:
+ case SCMP_ARCH_PPC64:
+ case SCMP_ARCH_PPC64LE:
+ case SCMP_ARCH_S390X:
+ filter_syscall = SCMP_SYS(mmap);
+ /* shmat multiplexed, see above */
+ break;
+
+ case SCMP_ARCH_ARM:
+ filter_syscall = SCMP_SYS(mmap2); /* arm has only mmap2 */
+ shmat_syscall = SCMP_SYS(shmat);
+ break;
+
+ case SCMP_ARCH_X86_64:
+ case SCMP_ARCH_X32:
+ case SCMP_ARCH_AARCH64:
+#ifdef SCMP_ARCH_RISCV64
+ case SCMP_ARCH_RISCV64:
+#endif
+ filter_syscall = SCMP_SYS(mmap); /* amd64, x32, arm64 and riscv64 have only mmap */
+ shmat_syscall = SCMP_SYS(shmat);
+ break;
+
+ /* Please add more definitions here, if you port systemd to other architectures! */
+
+#if !defined(__i386__) && !defined(__x86_64__) && !defined(__powerpc__) && !defined(__powerpc64__) && !defined(__arm__) && !defined(__aarch64__) && !defined(__s390__) && !defined(__s390x__) && !(defined(__riscv) && __riscv_xlen == 64)
+#warning "Consider adding the right mmap() syscall definitions here!"
+#endif
+ }
+
+ /* Can't filter mmap() on this arch, then skip it */
+ if (filter_syscall == 0)
+ continue;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = add_seccomp_syscall_filter(seccomp, arch, filter_syscall,
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
+ if (r < 0)
+ continue;
+
+ if (block_syscall != 0) {
+ r = add_seccomp_syscall_filter(seccomp, arch, block_syscall, 0, (const struct scmp_arg_cmp){} );
+ if (r < 0)
+ continue;
+ }
+
+ r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(mprotect),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
+ if (r < 0)
+ continue;
+
+#ifdef __NR_pkey_mprotect
+ r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(pkey_mprotect),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
+ if (r < 0)
+ continue;
+#endif
+
+ if (shmat_syscall > 0) {
+ r = add_seccomp_syscall_filter(seccomp, arch, shmat_syscall,
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
+ if (r < 0)
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install MemoryDenyWriteExecute= rule for architecture %s, skipping: %m",
+ seccomp_arch_to_string(arch));
+ loaded++;
+ }
+
+ if (loaded == 0)
+ log_debug("Failed to install any seccomp rules for MemoryDenyWriteExecute=.");
+
+ return loaded;
+}
+
+int seccomp_restrict_archs(Set *archs) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ void *id;
+ int r;
+
+ /* This installs a filter with no rules, but that restricts the system call architectures to the specified
+ * list.
+ *
+ * There are some qualifications. However the most important use is to stop processes from bypassing
+ * system call restrictions, in case they used a broader (multiplexing) syscall which is only available
+ * in a non-native architecture. There are no holes in this use case, at least so far. */
+
+ /* Note libseccomp includes our "native" (current) architecture in the filter by default.
+ * We do not remove it. For example, our callers expect to be able to call execve() afterwards
+ * to run a program with the restrictions applied. */
+ seccomp = seccomp_init(SCMP_ACT_ALLOW);
+ if (!seccomp)
+ return -ENOMEM;
+
+ SET_FOREACH(id, archs) {
+ r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ /* The vdso for x32 assumes that x86-64 syscalls are available. Let's allow them, since x32
+ * x32 syscalls should basically match x86-64 for everything except the pointer type.
+ * The important thing is that you can block the old 32-bit x86 syscalls.
+ * https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=850047 */
+
+ if (seccomp_arch_native() == SCMP_ARCH_X32 ||
+ set_contains(archs, UINT32_TO_PTR(SCMP_ARCH_X32 + 1))) {
+
+ r = seccomp_arch_add(seccomp, SCMP_ARCH_X86_64);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+ if (r < 0)
+ return r;
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to restrict system call architectures, skipping: %m");
+
+ return 0;
+}
+
+int parse_syscall_archs(char **l, Set **ret_archs) {
+ _cleanup_set_free_ Set *archs = NULL;
+ char **s;
+ int r;
+
+ assert(l);
+ assert(ret_archs);
+
+ STRV_FOREACH(s, l) {
+ uint32_t a;
+
+ r = seccomp_arch_from_string(*s, &a);
+ if (r < 0)
+ return -EINVAL;
+
+ r = set_ensure_put(&archs, NULL, UINT32_TO_PTR(a + 1));
+ if (r < 0)
+ return -ENOMEM;
+ }
+
+ *ret_archs = TAKE_PTR(archs);
+ return 0;
+}
+
+int seccomp_filter_set_add(Hashmap *filter, bool add, const SyscallFilterSet *set) {
+ const char *i;
+ int r;
+
+ assert(set);
+
+ NULSTR_FOREACH(i, set->value) {
+
+ if (i[0] == '@') {
+ const SyscallFilterSet *more;
+
+ more = syscall_filter_set_find(i);
+ if (!more)
+ return -ENXIO;
+
+ r = seccomp_filter_set_add(filter, add, more);
+ if (r < 0)
+ return r;
+ } else {
+ int id;
+
+ id = seccomp_syscall_resolve_name(i);
+ if (id == __NR_SCMP_ERROR) {
+ log_debug("Couldn't resolve system call, ignoring: %s", i);
+ continue;
+ }
+
+ if (add) {
+ r = hashmap_put(filter, INT_TO_PTR(id + 1), INT_TO_PTR(-1));
+ if (r < 0)
+ return r;
+ } else
+ (void) hashmap_remove(filter, INT_TO_PTR(id + 1));
+ }
+ }
+
+ return 0;
+}
+
+int seccomp_lock_personality(unsigned long personality) {
+ uint32_t arch;
+ int r;
+
+ if (personality >= PERSONALITY_INVALID)
+ return -EINVAL;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(personality),
+ 1,
+ SCMP_A0(SCMP_CMP_NE, personality));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to enable personality lock for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_protect_hostname(void) {
+ uint32_t arch;
+ int r;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(sethostname),
+ 0);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add sethostname() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(setdomainname),
+ 0);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add setdomainname() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to apply hostname restrictions for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+static int seccomp_restrict_sxid(scmp_filter_ctx seccomp, mode_t m) {
+ /* Checks the mode_t parameter of the following system calls:
+ *
+ * → chmod() + fchmod() + fchmodat()
+ * → open() + creat() + openat()
+ * → mkdir() + mkdirat()
+ * → mknod() + mknodat()
+ *
+ * Returns error if *everything* failed, and 0 otherwise.
+ */
+ int r;
+ bool any = false;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(chmod),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for chmod: %m");
+ else
+ any = true;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(fchmod),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for fchmod: %m");
+ else
+ any = true;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(fchmodat),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for fchmodat: %m");
+ else
+ any = true;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(mkdir),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for mkdir: %m");
+ else
+ any = true;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(mkdirat),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for mkdirat: %m");
+ else
+ any = true;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(mknod),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for mknod: %m");
+ else
+ any = true;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(mknodat),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for mknodat: %m");
+ else
+ any = true;
+
+#if SCMP_SYS(open) > 0
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(open),
+ 2,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, O_CREAT, O_CREAT),
+ SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for open: %m");
+ else
+ any = true;
+#endif
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(openat),
+ 2,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, O_CREAT, O_CREAT),
+ SCMP_A3(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for openat: %m");
+ else
+ any = true;
+
+#if defined(__SNR_openat2)
+ /* The new openat2() system call can't be filtered sensibly, since it moves the flags parameter into
+ * an indirect structure. Let's block it entirely for now. That should be a reasonably OK thing to do
+ * for now, since openat2() is very new and code generally needs fallback logic anyway to be
+ * compatible with kernels that are not absolutely recent. */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(openat2),
+ 0);
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for openat2: %m");
+ else
+ any = true;
+#endif
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(creat),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for creat: %m");
+ else
+ any = true;
+
+ return any ? 0 : r;
+}
+
+int seccomp_restrict_suid_sgid(void) {
+ uint32_t arch;
+ int r, k;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_restrict_sxid(seccomp, S_ISUID);
+ if (r < 0)
+ log_debug_errno(r, "Failed to add suid rule for architecture %s, ignoring: %m", seccomp_arch_to_string(arch));
+
+ k = seccomp_restrict_sxid(seccomp, S_ISGID);
+ if (k < 0)
+ log_debug_errno(r, "Failed to add sgid rule for architecture %s, ignoring: %m", seccomp_arch_to_string(arch));
+
+ if (r < 0 && k < 0)
+ continue;
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to apply suid/sgid restrictions for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+uint32_t scmp_act_kill_process(void) {
+
+ /* Returns SCMP_ACT_KILL_PROCESS if it's supported, and SCMP_ACT_KILL_THREAD otherwise. We never
+ * actually want to use SCMP_ACT_KILL_THREAD as its semantics are nuts (killing arbitrary threads of
+ * a program is just a bad idea), but on old kernels/old libseccomp it is all we have, and at least
+ * for single-threaded apps does the right thing. */
+
+#ifdef SCMP_ACT_KILL_PROCESS
+ if (seccomp_api_get() >= 3)
+ return SCMP_ACT_KILL_PROCESS;
+#endif
+
+ return SCMP_ACT_KILL; /* same as SCMP_ACT_KILL_THREAD */
+}
diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h
new file mode 100644
index 0000000..6105971
--- /dev/null
+++ b/src/shared/seccomp-util.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <seccomp.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "errno-list.h"
+#include "parse-util.h"
+#include "set.h"
+#include "string-util.h"
+
+const char* seccomp_arch_to_string(uint32_t c);
+int seccomp_arch_from_string(const char *n, uint32_t *ret);
+
+int seccomp_init_for_arch(scmp_filter_ctx *ret, uint32_t arch, uint32_t default_action);
+
+bool is_seccomp_available(void);
+
+typedef struct SyscallFilterSet {
+ const char *name;
+ const char *help;
+ const char *value;
+} SyscallFilterSet;
+
+enum {
+ /* Please leave DEFAULT first and KNOWN last, but sort the rest alphabetically */
+ SYSCALL_FILTER_SET_DEFAULT,
+ SYSCALL_FILTER_SET_AIO,
+ SYSCALL_FILTER_SET_BASIC_IO,
+ SYSCALL_FILTER_SET_CHOWN,
+ SYSCALL_FILTER_SET_CLOCK,
+ SYSCALL_FILTER_SET_CPU_EMULATION,
+ SYSCALL_FILTER_SET_DEBUG,
+ SYSCALL_FILTER_SET_FILE_SYSTEM,
+ SYSCALL_FILTER_SET_IO_EVENT,
+ SYSCALL_FILTER_SET_IPC,
+ SYSCALL_FILTER_SET_KEYRING,
+ SYSCALL_FILTER_SET_MEMLOCK,
+ SYSCALL_FILTER_SET_MODULE,
+ SYSCALL_FILTER_SET_MOUNT,
+ SYSCALL_FILTER_SET_NETWORK_IO,
+ SYSCALL_FILTER_SET_OBSOLETE,
+ SYSCALL_FILTER_SET_PKEY,
+ SYSCALL_FILTER_SET_PRIVILEGED,
+ SYSCALL_FILTER_SET_PROCESS,
+ SYSCALL_FILTER_SET_RAW_IO,
+ SYSCALL_FILTER_SET_REBOOT,
+ SYSCALL_FILTER_SET_RESOURCES,
+ SYSCALL_FILTER_SET_SETUID,
+ SYSCALL_FILTER_SET_SIGNAL,
+ SYSCALL_FILTER_SET_SWAP,
+ SYSCALL_FILTER_SET_SYNC,
+ SYSCALL_FILTER_SET_SYSTEM_SERVICE,
+ SYSCALL_FILTER_SET_TIMER,
+ SYSCALL_FILTER_SET_KNOWN,
+ _SYSCALL_FILTER_SET_MAX
+};
+
+extern const SyscallFilterSet syscall_filter_sets[];
+
+const SyscallFilterSet *syscall_filter_set_find(const char *name);
+
+int seccomp_filter_set_add(Hashmap *s, bool b, const SyscallFilterSet *set);
+
+int seccomp_add_syscall_filter_item(
+ scmp_filter_ctx *ctx,
+ const char *name,
+ uint32_t action,
+ char **exclude,
+ bool log_missing,
+ char ***added);
+
+int seccomp_load_syscall_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action, bool log_missing);
+int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* set, uint32_t action, bool log_missing);
+
+typedef enum SeccompParseFlags {
+ SECCOMP_PARSE_INVERT = 1 << 0,
+ SECCOMP_PARSE_ALLOW_LIST = 1 << 1,
+ SECCOMP_PARSE_LOG = 1 << 2,
+ SECCOMP_PARSE_PERMISSIVE = 1 << 3,
+} SeccompParseFlags;
+
+int seccomp_parse_syscall_filter(
+ const char *name,
+ int errno_num,
+ Hashmap *filter,
+ SeccompParseFlags flags,
+ const char *unit,
+ const char *filename, unsigned line);
+
+int seccomp_restrict_archs(Set *archs);
+int seccomp_restrict_namespaces(unsigned long retain);
+int seccomp_protect_sysctl(void);
+int seccomp_protect_syslog(void);
+int seccomp_restrict_address_families(Set *address_families, bool allow_list);
+int seccomp_restrict_realtime(void);
+int seccomp_memory_deny_write_execute(void);
+int seccomp_lock_personality(unsigned long personality);
+int seccomp_protect_hostname(void);
+int seccomp_restrict_suid_sgid(void);
+
+extern const uint32_t seccomp_local_archs[];
+
+#define SECCOMP_FOREACH_LOCAL_ARCH(arch) \
+ for (unsigned _i = ({ (arch) = seccomp_local_archs[0]; 0; }); \
+ seccomp_local_archs[_i] != (uint32_t) -1; \
+ (arch) = seccomp_local_archs[++_i])
+
+/* EACCES: does not have the CAP_SYS_ADMIN or no_new_privs == 1
+ * ENOMEM: out of memory, failed to allocate space for a libseccomp structure, or would exceed a defined constant
+ * EFAULT: addresses passed as args (by libseccomp) are invalid */
+#define ERRNO_IS_SECCOMP_FATAL(r) \
+ IN_SET(abs(r), EPERM, EACCES, ENOMEM, EFAULT)
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(scmp_filter_ctx, seccomp_release);
+
+int parse_syscall_archs(char **l, Set **ret_archs);
+
+uint32_t scmp_act_kill_process(void);
+
+/* This is a special value to be used where syscall filters otherwise expect errno numbers, will be
+ replaced with real seccomp action. */
+enum {
+ SECCOMP_ERROR_NUMBER_KILL = INT_MAX - 1,
+};
+
+static inline bool seccomp_errno_or_action_is_valid(int n) {
+ return n == SECCOMP_ERROR_NUMBER_KILL || errno_is_valid(n);
+}
+
+static inline int seccomp_parse_errno_or_action(const char *p) {
+ if (streq_ptr(p, "kill"))
+ return SECCOMP_ERROR_NUMBER_KILL;
+ return parse_errno(p);
+}
+
+static inline const char *seccomp_errno_or_action_to_string(int num) {
+ if (num == SECCOMP_ERROR_NUMBER_KILL)
+ return "kill";
+ return errno_to_name(num);
+}
diff --git a/src/shared/securebits-util.c b/src/shared/securebits-util.c
new file mode 100644
index 0000000..c867807
--- /dev/null
+++ b/src/shared/securebits-util.c
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "securebits-util.h"
+#include "string-util.h"
+
+int secure_bits_to_string_alloc(int i, char **s) {
+ _cleanup_free_ char *str = NULL;
+ size_t len;
+ int r;
+
+ assert(s);
+
+ r = asprintf(&str, "%s%s%s%s%s%s",
+ (i & (1 << SECURE_KEEP_CAPS)) ? "keep-caps " : "",
+ (i & (1 << SECURE_KEEP_CAPS_LOCKED)) ? "keep-caps-locked " : "",
+ (i & (1 << SECURE_NO_SETUID_FIXUP)) ? "no-setuid-fixup " : "",
+ (i & (1 << SECURE_NO_SETUID_FIXUP_LOCKED)) ? "no-setuid-fixup-locked " : "",
+ (i & (1 << SECURE_NOROOT)) ? "noroot " : "",
+ (i & (1 << SECURE_NOROOT_LOCKED)) ? "noroot-locked " : "");
+ if (r < 0)
+ return -ENOMEM;
+
+ len = strlen(str);
+ if (len != 0)
+ str[len - 1] = '\0';
+
+ *s = TAKE_PTR(str);
+
+ return 0;
+}
+
+int secure_bits_from_string(const char *s) {
+ int secure_bits = 0;
+ const char *p;
+ int r;
+
+ for (p = s;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r == -ENOMEM)
+ return r;
+ if (r <= 0)
+ break;
+
+ if (streq(word, "keep-caps"))
+ secure_bits |= 1 << SECURE_KEEP_CAPS;
+ else if (streq(word, "keep-caps-locked"))
+ secure_bits |= 1 << SECURE_KEEP_CAPS_LOCKED;
+ else if (streq(word, "no-setuid-fixup"))
+ secure_bits |= 1 << SECURE_NO_SETUID_FIXUP;
+ else if (streq(word, "no-setuid-fixup-locked"))
+ secure_bits |= 1 << SECURE_NO_SETUID_FIXUP_LOCKED;
+ else if (streq(word, "noroot"))
+ secure_bits |= 1 << SECURE_NOROOT;
+ else if (streq(word, "noroot-locked"))
+ secure_bits |= 1 << SECURE_NOROOT_LOCKED;
+ }
+
+ return secure_bits;
+}
diff --git a/src/shared/securebits-util.h b/src/shared/securebits-util.h
new file mode 100644
index 0000000..f2e65cf
--- /dev/null
+++ b/src/shared/securebits-util.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "missing_securebits.h"
+
+int secure_bits_to_string_alloc(int i, char **s);
+int secure_bits_from_string(const char *s);
+
+static inline bool secure_bits_is_valid(int i) {
+ return ((SECURE_ALL_BITS | SECURE_ALL_LOCKS) & i) == i;
+}
+
+static inline int secure_bits_to_string_alloc_with_check(int n, char **s) {
+ if (!secure_bits_is_valid(n))
+ return -EINVAL;
+
+ return secure_bits_to_string_alloc(n, s);
+}
diff --git a/src/shared/serialize.c b/src/shared/serialize.c
new file mode 100644
index 0000000..45f57d6
--- /dev/null
+++ b/src/shared/serialize.c
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+
+#include "alloc-util.h"
+#include "env-util.h"
+#include "escape.h"
+#include "fileio.h"
+#include "missing_mman.h"
+#include "missing_syscall.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "serialize.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+
+int serialize_item(FILE *f, const char *key, const char *value) {
+ assert(f);
+ assert(key);
+
+ if (!value)
+ return 0;
+
+ /* Make sure that anything we serialize we can also read back again with read_line() with a maximum line size
+ * of LONG_LINE_MAX. This is a safety net only. All code calling us should filter this out earlier anyway. */
+ if (strlen(key) + 1 + strlen(value) + 1 > LONG_LINE_MAX) {
+ log_warning("Attempted to serialize overly long item '%s', refusing.", key);
+ return -EINVAL;
+ }
+
+ fputs(key, f);
+ fputc('=', f);
+ fputs(value, f);
+ fputc('\n', f);
+
+ return 1;
+}
+
+int serialize_item_escaped(FILE *f, const char *key, const char *value) {
+ _cleanup_free_ char *c = NULL;
+
+ assert(f);
+ assert(key);
+
+ if (!value)
+ return 0;
+
+ c = cescape(value);
+ if (!c)
+ return log_oom();
+
+ return serialize_item(f, key, c);
+}
+
+int serialize_item_format(FILE *f, const char *key, const char *format, ...) {
+ char buf[LONG_LINE_MAX];
+ va_list ap;
+ int k;
+
+ assert(f);
+ assert(key);
+ assert(format);
+
+ va_start(ap, format);
+ k = vsnprintf(buf, sizeof(buf), format, ap);
+ va_end(ap);
+
+ if (k < 0 || (size_t) k >= sizeof(buf) || strlen(key) + 1 + k + 1 > LONG_LINE_MAX) {
+ log_warning("Attempted to serialize overly long item '%s', refusing.", key);
+ return -EINVAL;
+ }
+
+ fputs(key, f);
+ fputc('=', f);
+ fputs(buf, f);
+ fputc('\n', f);
+
+ return 1;
+}
+
+int serialize_fd(FILE *f, FDSet *fds, const char *key, int fd) {
+ int copy;
+
+ assert(f);
+ assert(key);
+
+ if (fd < 0)
+ return 0;
+
+ copy = fdset_put_dup(fds, fd);
+ if (copy < 0)
+ return log_error_errno(copy, "Failed to add file descriptor to serialization set: %m");
+
+ return serialize_item_format(f, key, "%i", copy);
+}
+
+int serialize_usec(FILE *f, const char *key, usec_t usec) {
+ assert(f);
+ assert(key);
+
+ if (usec == USEC_INFINITY)
+ return 0;
+
+ return serialize_item_format(f, key, USEC_FMT, usec);
+}
+
+int serialize_dual_timestamp(FILE *f, const char *name, const dual_timestamp *t) {
+ assert(f);
+ assert(name);
+ assert(t);
+
+ if (!dual_timestamp_is_set(t))
+ return 0;
+
+ return serialize_item_format(f, name, USEC_FMT " " USEC_FMT, t->realtime, t->monotonic);
+}
+
+int serialize_strv(FILE *f, const char *key, char **l) {
+ int ret = 0, r;
+ char **i;
+
+ /* Returns the first error, or positive if anything was serialized, 0 otherwise. */
+
+ STRV_FOREACH(i, l) {
+ r = serialize_item_escaped(f, key, *i);
+ if ((ret >= 0 && r < 0) ||
+ (ret == 0 && r > 0))
+ ret = r;
+ }
+
+ return ret;
+}
+
+int deserialize_usec(const char *value, usec_t *ret) {
+ int r;
+
+ assert(value);
+
+ r = safe_atou64(value, ret);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse usec value \"%s\": %m", value);
+
+ return 0;
+}
+
+int deserialize_dual_timestamp(const char *value, dual_timestamp *t) {
+ uint64_t a, b;
+ int r, pos;
+
+ assert(value);
+ assert(t);
+
+ pos = strspn(value, WHITESPACE);
+ if (value[pos] == '-')
+ return -EINVAL;
+ pos += strspn(value + pos, DIGITS);
+ pos += strspn(value + pos, WHITESPACE);
+ if (value[pos] == '-')
+ return -EINVAL;
+
+ r = sscanf(value, "%" PRIu64 "%" PRIu64 "%n", &a, &b, &pos);
+ if (r != 2)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse dual timestamp value \"%s\".",
+ value);
+
+ if (value[pos] != '\0')
+ /* trailing garbage */
+ return -EINVAL;
+
+ t->realtime = a;
+ t->monotonic = b;
+
+ return 0;
+}
+
+int deserialize_environment(const char *value, char ***list) {
+ _cleanup_free_ char *unescaped = NULL;
+ int r;
+
+ assert(value);
+ assert(list);
+
+ /* Changes the *environment strv inline. */
+
+ r = cunescape(value, 0, &unescaped);
+ if (r < 0)
+ return log_error_errno(r, "Failed to unescape: %m");
+
+ r = strv_env_replace(list, unescaped);
+ if (r < 0)
+ return log_error_errno(r, "Failed to append environment variable: %m");
+
+ unescaped = NULL; /* now part of 'list' */
+ return 0;
+}
+
+int open_serialization_fd(const char *ident) {
+ int fd;
+
+ fd = memfd_create(ident, MFD_CLOEXEC);
+ if (fd < 0) {
+ const char *path;
+
+ path = getpid_cached() == 1 ? "/run/systemd" : "/tmp";
+ fd = open_tmpfile_unlinkable(path, O_RDWR|O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ log_debug("Serializing %s to %s.", ident, path);
+ } else
+ log_debug("Serializing %s to memfd.", ident);
+
+ return fd;
+}
diff --git a/src/shared/serialize.h b/src/shared/serialize.h
new file mode 100644
index 0000000..6d4f1ef
--- /dev/null
+++ b/src/shared/serialize.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdio.h>
+
+#include "fdset.h"
+#include "macro.h"
+#include "string-util.h"
+#include "time-util.h"
+
+int serialize_item(FILE *f, const char *key, const char *value);
+int serialize_item_escaped(FILE *f, const char *key, const char *value);
+int serialize_item_format(FILE *f, const char *key, const char *value, ...) _printf_(3,4);
+int serialize_fd(FILE *f, FDSet *fds, const char *key, int fd);
+int serialize_usec(FILE *f, const char *key, usec_t usec);
+int serialize_dual_timestamp(FILE *f, const char *key, const dual_timestamp *t);
+int serialize_strv(FILE *f, const char *key, char **l);
+
+static inline int serialize_bool(FILE *f, const char *key, bool b) {
+ return serialize_item(f, key, yes_no(b));
+}
+
+int deserialize_usec(const char *value, usec_t *timestamp);
+int deserialize_dual_timestamp(const char *value, dual_timestamp *t);
+int deserialize_environment(const char *value, char ***environment);
+
+int open_serialization_fd(const char *ident);
diff --git a/src/shared/service-util.c b/src/shared/service-util.c
new file mode 100644
index 0000000..092be6e
--- /dev/null
+++ b/src/shared/service-util.c
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "pretty-print.h"
+#include "service-util.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static int help(const char *program_path, const char *service, const char *description, bool bus_introspect) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man(service, "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "%s%s%s\n\n"
+ "This program takes no positional arguments.\n\n"
+ "%sOptions%s:\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --bus-introspect=PATH Write D-Bus XML introspection data\n"
+ "\nSee the %s for details.\n"
+ , program_path
+ , ansi_highlight(), description, ansi_normal()
+ , ansi_underline(), ansi_normal()
+ , link
+ );
+
+ return 0; /* No further action */
+}
+
+int service_parse_argv(
+ const char *service,
+ const char *description,
+ const BusObjectImplementation* const* bus_objects,
+ int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_BUS_INTROSPECT,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "bus-introspect", required_argument, NULL, ARG_BUS_INTROSPECT },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+ switch(c) {
+
+ case 'h':
+ return help(argv[0], service, description, bus_objects);
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_BUS_INTROSPECT:
+ return bus_introspect_implementations(
+ stdout,
+ optarg,
+ bus_objects);
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unknown option code.");
+ }
+
+ if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program takes no arguments.");
+
+ return 1; /* Further action */
+}
diff --git a/src/shared/service-util.h b/src/shared/service-util.h
new file mode 100644
index 0000000..360341f
--- /dev/null
+++ b/src/shared/service-util.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "bus-object.h"
+
+int service_parse_argv(
+ const char *service,
+ const char *description,
+ const BusObjectImplementation* const* bus_objects,
+ int argc, char *argv[]);
diff --git a/src/shared/sleep-config.c b/src/shared/sleep-config.c
new file mode 100644
index 0000000..cea5148
--- /dev/null
+++ b/src/shared/sleep-config.c
@@ -0,0 +1,703 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2018 Dell Inc.
+***/
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fs.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <syslog.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "blockdev-util.h"
+#include "btrfs-util.h"
+#include "conf-parser.h"
+#include "def.h"
+#include "env-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "sleep-config.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+int parse_sleep_config(SleepConfig **ret_sleep_config) {
+ _cleanup_(free_sleep_configp) SleepConfig *sc;
+ int allow_suspend = -1, allow_hibernate = -1,
+ allow_s2h = -1, allow_hybrid_sleep = -1;
+
+ sc = new0(SleepConfig, 1);
+ if (!sc)
+ return log_oom();
+
+ const ConfigTableItem items[] = {
+ { "Sleep", "AllowSuspend", config_parse_tristate, 0, &allow_suspend },
+ { "Sleep", "AllowHibernation", config_parse_tristate, 0, &allow_hibernate },
+ { "Sleep", "AllowSuspendThenHibernate", config_parse_tristate, 0, &allow_s2h },
+ { "Sleep", "AllowHybridSleep", config_parse_tristate, 0, &allow_hybrid_sleep },
+
+ { "Sleep", "SuspendMode", config_parse_strv, 0, &sc->suspend_modes },
+ { "Sleep", "SuspendState", config_parse_strv, 0, &sc->suspend_states },
+ { "Sleep", "HibernateMode", config_parse_strv, 0, &sc->hibernate_modes },
+ { "Sleep", "HibernateState", config_parse_strv, 0, &sc->hibernate_states },
+ { "Sleep", "HybridSleepMode", config_parse_strv, 0, &sc->hybrid_modes },
+ { "Sleep", "HybridSleepState", config_parse_strv, 0, &sc->hybrid_states },
+
+ { "Sleep", "HibernateDelaySec", config_parse_sec, 0, &sc->hibernate_delay_sec},
+ {}
+ };
+
+ (void) config_parse_many_nulstr(
+ PKGSYSCONFDIR "/sleep.conf",
+ CONF_PATHS_NULSTR("systemd/sleep.conf.d"),
+ "Sleep\0",
+ config_item_table_lookup, items,
+ CONFIG_PARSE_WARN,
+ NULL,
+ NULL);
+
+ /* use default values unless set */
+ sc->allow_suspend = allow_suspend != 0;
+ sc->allow_hibernate = allow_hibernate != 0;
+ sc->allow_hybrid_sleep = allow_hybrid_sleep >= 0 ? allow_hybrid_sleep
+ : (allow_suspend != 0 && allow_hibernate != 0);
+ sc->allow_s2h = allow_s2h >= 0 ? allow_s2h
+ : (allow_suspend != 0 && allow_hibernate != 0);
+
+ if (!sc->suspend_states)
+ sc->suspend_states = strv_new("mem", "standby", "freeze");
+ if (!sc->hibernate_modes)
+ sc->hibernate_modes = strv_new("platform", "shutdown");
+ if (!sc->hibernate_states)
+ sc->hibernate_states = strv_new("disk");
+ if (!sc->hybrid_modes)
+ sc->hybrid_modes = strv_new("suspend", "platform", "shutdown");
+ if (!sc->hybrid_states)
+ sc->hybrid_states = strv_new("disk");
+ if (sc->hibernate_delay_sec == 0)
+ sc->hibernate_delay_sec = 2 * USEC_PER_HOUR;
+
+ /* ensure values set for all required fields */
+ if (!sc->suspend_states || !sc->hibernate_modes
+ || !sc->hibernate_states || !sc->hybrid_modes || !sc->hybrid_states)
+ return log_oom();
+
+ *ret_sleep_config = TAKE_PTR(sc);
+
+ return 0;
+}
+
+int can_sleep_state(char **types) {
+ _cleanup_free_ char *text = NULL;
+ int r;
+
+ if (strv_isempty(types))
+ return true;
+
+ /* If /sys is read-only we cannot sleep */
+ if (access("/sys/power/state", W_OK) < 0) {
+ log_debug_errno(errno, "/sys/power/state is not writable, cannot sleep: %m");
+ return false;
+ }
+
+ r = read_one_line_file("/sys/power/state", &text);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to read /sys/power/state, cannot sleep: %m");
+ return false;
+ }
+
+ const char *found;
+ r = string_contains_word_strv(text, NULL, types, &found);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse /sys/power/state: %m");
+ if (r > 0)
+ log_debug("Sleep mode \"%s\" is supported by the kernel.", found);
+ else if (DEBUG_LOGGING) {
+ _cleanup_free_ char *t = strv_join(types, "/");
+ log_debug("Sleep mode %s not supported by the kernel, sorry.", strnull(t));
+ }
+ return r;
+}
+
+int can_sleep_disk(char **types) {
+ _cleanup_free_ char *text = NULL;
+ int r;
+
+ if (strv_isempty(types))
+ return true;
+
+ /* If /sys is read-only we cannot sleep */
+ if (access("/sys/power/disk", W_OK) < 0) {
+ log_debug_errno(errno, "/sys/power/disk is not writable: %m");
+ return false;
+ }
+
+ r = read_one_line_file("/sys/power/disk", &text);
+ if (r < 0) {
+ log_debug_errno(r, "Couldn't read /sys/power/disk: %m");
+ return false;
+ }
+
+ for (const char *p = text;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse /sys/power/disk: %m");
+ if (r == 0)
+ break;
+
+ char *s = word;
+ size_t l = strlen(s);
+ if (s[0] == '[' && s[l-1] == ']') {
+ s[l-1] = '\0';
+ s++;
+ }
+
+ if (strv_contains(types, s)) {
+ log_debug("Disk sleep mode \"%s\" is supported by the kernel.", s);
+ return true;
+ }
+ }
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *t = strv_join(types, "/");
+ log_debug("Disk sleep mode %s not supported by the kernel, sorry.", strnull(t));
+ }
+ return false;
+}
+
+#define HIBERNATION_SWAP_THRESHOLD 0.98
+
+SwapEntry* swap_entry_free(SwapEntry *se) {
+ if (!se)
+ return NULL;
+
+ free(se->device);
+ free(se->type);
+
+ return mfree(se);
+}
+
+HibernateLocation* hibernate_location_free(HibernateLocation *hl) {
+ if (!hl)
+ return NULL;
+
+ swap_entry_free(hl->swap);
+
+ return mfree(hl);
+}
+
+static int swap_device_to_device_id(const SwapEntry *swap, dev_t *ret_dev) {
+ struct stat sb;
+ int r;
+
+ assert(swap);
+ assert(swap->device);
+ assert(swap->type);
+
+ r = stat(swap->device, &sb);
+ if (r < 0)
+ return -errno;
+
+ if (streq(swap->type, "partition")) {
+ if (!S_ISBLK(sb.st_mode))
+ return -ENOTBLK;
+
+ *ret_dev = sb.st_rdev;
+ return 0;
+ }
+
+ return get_block_device(swap->device, ret_dev);
+}
+
+/*
+ * Attempt to calculate the swap file offset on supported filesystems. On unsupported
+ * filesystems, a debug message is logged and ret_offset is set to UINT64_MAX.
+ */
+static int calculate_swap_file_offset(const SwapEntry *swap, uint64_t *ret_offset) {
+ _cleanup_close_ int fd = -1;
+ _cleanup_free_ struct fiemap *fiemap = NULL;
+ struct stat sb;
+ int r, btrfs;
+
+ assert(swap);
+ assert(swap->device);
+ assert(streq(swap->type, "file"));
+
+ fd = open(swap->device, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return log_debug_errno(errno, "Failed to open swap file %s to determine on-disk offset: %m", swap->device);
+
+ if (fstat(fd, &sb) < 0)
+ return log_debug_errno(errno, "Failed to stat %s: %m", swap->device);
+
+ btrfs = btrfs_is_filesystem(fd);
+ if (btrfs < 0)
+ return log_debug_errno(btrfs, "Error checking %s for Btrfs filesystem: %m", swap->device);
+ if (btrfs > 0) {
+ log_debug("%s: detection of swap file offset on Btrfs is not supported", swap->device);
+ *ret_offset = UINT64_MAX;
+ return 0;
+ }
+
+ r = read_fiemap(fd, &fiemap);
+ if (r < 0)
+ return log_debug_errno(r, "Unable to read extent map for '%s': %m", swap->device);
+
+ *ret_offset = fiemap->fm_extents[0].fe_physical / page_size();
+ return 0;
+}
+
+static int read_resume_files(dev_t *ret_resume, uint64_t *ret_resume_offset) {
+ _cleanup_free_ char *resume_str = NULL, *resume_offset_str = NULL;
+ uint64_t resume_offset = 0;
+ dev_t resume;
+ int r;
+
+ r = read_one_line_file("/sys/power/resume", &resume_str);
+ if (r < 0)
+ return log_debug_errno(r, "Error reading /sys/power/resume: %m");
+
+ r = parse_dev(resume_str, &resume);
+ if (r < 0)
+ return log_debug_errno(r, "Error parsing /sys/power/resume device: %s: %m", resume_str);
+
+ r = read_one_line_file("/sys/power/resume_offset", &resume_offset_str);
+ if (r == -ENOENT)
+ log_debug_errno(r, "Kernel does not support resume_offset; swap file offset detection will be skipped.");
+ else if (r < 0)
+ return log_debug_errno(r, "Error reading /sys/power/resume_offset: %m");
+ else {
+ r = safe_atou64(resume_offset_str, &resume_offset);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse value in /sys/power/resume_offset \"%s\": %m", resume_offset_str);
+ }
+
+ if (resume_offset > 0 && resume == 0)
+ log_debug("Warning: found /sys/power/resume_offset==%" PRIu64 ", but /sys/power/resume unset. Misconfiguration?",
+ resume_offset);
+
+ *ret_resume = resume;
+ *ret_resume_offset = resume_offset;
+
+ return 0;
+}
+
+/*
+ * Determine if the HibernateLocation matches the resume= (device) and resume_offset= (file).
+ */
+static bool location_is_resume_device(const HibernateLocation *location, dev_t sys_resume, uint64_t sys_offset) {
+ if (!location)
+ return false;
+
+ return sys_resume > 0 &&
+ sys_resume == location->devno &&
+ (sys_offset == location->offset || (sys_offset > 0 && location->offset == UINT64_MAX));
+}
+
+/*
+ * Attempt to find the hibernation location by parsing /proc/swaps, /sys/power/resume, and
+ * /sys/power/resume_offset.
+ *
+ * Returns:
+ * 1 - Values are set in /sys/power/resume and /sys/power/resume_offset.
+ * ret_hibernate_location will represent matching /proc/swap entry if identified or NULL if not.
+ *
+ * 0 - No values are set in /sys/power/resume and /sys/power/resume_offset.
+ ret_hibernate_location will represent the highest priority swap with most remaining space discovered in /proc/swaps.
+ *
+ * Negative value in the case of error.
+ */
+int find_hibernate_location(HibernateLocation **ret_hibernate_location) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_(hibernate_location_freep) HibernateLocation *hibernate_location = NULL;
+ dev_t sys_resume;
+ uint64_t sys_offset = 0;
+ bool resume_match = false;
+ int r;
+
+ /* read the /sys/power/resume & /sys/power/resume_offset values */
+ r = read_resume_files(&sys_resume, &sys_offset);
+ if (r < 0)
+ return r;
+
+ f = fopen("/proc/swaps", "re");
+ if (!f) {
+ log_debug_errno(errno, "Failed to open /proc/swaps: %m");
+ return errno == ENOENT ? -EOPNOTSUPP : -errno; /* Convert swap not supported to a recognizable error */
+ }
+
+ (void) fscanf(f, "%*s %*s %*s %*s %*s\n");
+ for (unsigned i = 1;; i++) {
+ _cleanup_(swap_entry_freep) SwapEntry *swap = NULL;
+ uint64_t swap_offset = 0;
+ int k;
+
+ swap = new0(SwapEntry, 1);
+ if (!swap)
+ return -ENOMEM;
+
+ k = fscanf(f,
+ "%ms " /* device/file */
+ "%ms " /* type of swap */
+ "%" PRIu64 /* swap size */
+ "%" PRIu64 /* used */
+ "%i\n", /* priority */
+ &swap->device, &swap->type, &swap->size, &swap->used, &swap->priority);
+ if (k == EOF)
+ break;
+ if (k != 5) {
+ log_debug("Failed to parse /proc/swaps:%u, ignoring", i);
+ continue;
+ }
+
+ if (streq(swap->type, "file")) {
+ if (endswith(swap->device, "\\040(deleted)")) {
+ log_debug("Ignoring deleted swap file '%s'.", swap->device);
+ continue;
+ }
+
+ r = calculate_swap_file_offset(swap, &swap_offset);
+ if (r < 0)
+ return r;
+
+ } else if (streq(swap->type, "partition")) {
+ const char *fn;
+
+ fn = path_startswith(swap->device, "/dev/");
+ if (fn && startswith(fn, "zram")) {
+ log_debug("%s: ignoring zram swap", swap->device);
+ continue;
+ }
+
+ } else {
+ log_debug("%s: swap type %s is unsupported for hibernation, ignoring", swap->device, swap->type);
+ continue;
+ }
+
+ /* prefer resume device or highest priority swap with most remaining space */
+ if (hibernate_location && swap->priority < hibernate_location->swap->priority) {
+ log_debug("%s: ignoring device with lower priority", swap->device);
+ continue;
+ }
+ if (hibernate_location &&
+ (swap->priority == hibernate_location->swap->priority
+ && swap->size - swap->used < hibernate_location->swap->size - hibernate_location->swap->used)) {
+ log_debug("%s: ignoring device with lower usable space", swap->device);
+ continue;
+ }
+
+ dev_t swap_device;
+ r = swap_device_to_device_id(swap, &swap_device);
+ if (r < 0)
+ return log_debug_errno(r, "%s: failed to query device number: %m", swap->device);
+ if (swap_device == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENODEV), "%s: not backed by block device.", swap->device);
+
+ hibernate_location = hibernate_location_free(hibernate_location);
+ hibernate_location = new(HibernateLocation, 1);
+ if (!hibernate_location)
+ return -ENOMEM;
+
+ *hibernate_location = (HibernateLocation) {
+ .devno = swap_device,
+ .offset = swap_offset,
+ .swap = TAKE_PTR(swap),
+ };
+
+ /* if the swap is the resume device, stop the loop */
+ if (location_is_resume_device(hibernate_location, sys_resume, sys_offset)) {
+ log_debug("%s: device matches configured resume settings.", hibernate_location->swap->device);
+ resume_match = true;
+ break;
+ }
+
+ log_debug("%s: is a candidate device.", hibernate_location->swap->device);
+ }
+
+ /* We found nothing at all */
+ if (!hibernate_location)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOSYS),
+ "No possible swap partitions or files suitable for hibernation were found in /proc/swaps.");
+
+ /* resume= is set but a matching /proc/swaps entry was not found */
+ if (sys_resume != 0 && !resume_match)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOSYS),
+ "No swap partitions or files matching resume config were found in /proc/swaps.");
+
+ if (hibernate_location->offset == UINT64_MAX) {
+ if (sys_offset == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOSYS), "Offset detection failed and /sys/power/resume_offset is not set.");
+
+ hibernate_location->offset = sys_offset;
+ }
+
+ if (resume_match)
+ log_debug("Hibernation will attempt to use swap entry with path: %s, device: %u:%u, offset: %" PRIu64 ", priority: %i",
+ hibernate_location->swap->device, major(hibernate_location->devno), minor(hibernate_location->devno),
+ hibernate_location->offset, hibernate_location->swap->priority);
+ else
+ log_debug("/sys/power/resume is not configured; attempting to hibernate with path: %s, device: %u:%u, offset: %" PRIu64 ", priority: %i",
+ hibernate_location->swap->device, major(hibernate_location->devno), minor(hibernate_location->devno),
+ hibernate_location->offset, hibernate_location->swap->priority);
+
+ *ret_hibernate_location = TAKE_PTR(hibernate_location);
+
+ if (resume_match)
+ return 1;
+
+ return 0;
+}
+
+static bool enough_swap_for_hibernation(void) {
+ _cleanup_free_ char *active = NULL;
+ _cleanup_(hibernate_location_freep) HibernateLocation *hibernate_location = NULL;
+ unsigned long long act = 0;
+ int r;
+
+ if (getenv_bool("SYSTEMD_BYPASS_HIBERNATION_MEMORY_CHECK") > 0)
+ return true;
+
+ r = find_hibernate_location(&hibernate_location);
+ if (r < 0)
+ return false;
+
+ /* If /sys/power/{resume,resume_offset} is configured but a matching entry
+ * could not be identified in /proc/swaps, user is likely using Btrfs with a swapfile;
+ * return true and let the system attempt hibernation.
+ */
+ if (r > 0 && !hibernate_location) {
+ log_debug("Unable to determine remaining swap space; hibernation may fail");
+ return true;
+ }
+
+ if (!hibernate_location)
+ return false;
+
+ r = get_proc_field("/proc/meminfo", "Active(anon)", WHITESPACE, &active);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to retrieve Active(anon) from /proc/meminfo: %m");
+ return false;
+ }
+
+ r = safe_atollu(active, &act);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse Active(anon) from /proc/meminfo: %s: %m", active);
+ return false;
+ }
+
+ r = act <= (hibernate_location->swap->size - hibernate_location->swap->used) * HIBERNATION_SWAP_THRESHOLD;
+ log_debug("%s swap for hibernation, Active(anon)=%llu kB, size=%" PRIu64 " kB, used=%" PRIu64 " kB, threshold=%.2g%%",
+ r ? "Enough" : "Not enough", act, hibernate_location->swap->size, hibernate_location->swap->used, 100*HIBERNATION_SWAP_THRESHOLD);
+
+ return r;
+}
+
+int read_fiemap(int fd, struct fiemap **ret) {
+ _cleanup_free_ struct fiemap *fiemap = NULL, *result_fiemap = NULL;
+ struct stat statinfo;
+ uint32_t result_extents = 0;
+ uint64_t fiemap_start = 0, fiemap_length;
+ const size_t n_extra = DIV_ROUND_UP(sizeof(struct fiemap), sizeof(struct fiemap_extent));
+ size_t fiemap_allocated = n_extra, result_fiemap_allocated = n_extra;
+
+ if (fstat(fd, &statinfo) < 0)
+ return log_debug_errno(errno, "Cannot determine file size: %m");
+ if (!S_ISREG(statinfo.st_mode))
+ return -ENOTTY;
+ fiemap_length = statinfo.st_size;
+
+ /* Zero this out in case we run on a file with no extents */
+ fiemap = calloc(n_extra, sizeof(struct fiemap_extent));
+ if (!fiemap)
+ return -ENOMEM;
+
+ result_fiemap = malloc_multiply(n_extra, sizeof(struct fiemap_extent));
+ if (!result_fiemap)
+ return -ENOMEM;
+
+ /* XFS filesystem has incorrect implementation of fiemap ioctl and
+ * returns extents for only one block-group at a time, so we need
+ * to handle it manually, starting the next fiemap call from the end
+ * of the last extent
+ */
+ while (fiemap_start < fiemap_length) {
+ *fiemap = (struct fiemap) {
+ .fm_start = fiemap_start,
+ .fm_length = fiemap_length,
+ .fm_flags = FIEMAP_FLAG_SYNC,
+ };
+
+ /* Find out how many extents there are */
+ if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0)
+ return log_debug_errno(errno, "Failed to read extents: %m");
+
+ /* Nothing to process */
+ if (fiemap->fm_mapped_extents == 0)
+ break;
+
+ /* Resize fiemap to allow us to read in the extents, result fiemap has to hold all
+ * the extents for the whole file. Add space for the initial struct fiemap. */
+ if (!greedy_realloc0((void**) &fiemap, &fiemap_allocated,
+ n_extra + fiemap->fm_mapped_extents, sizeof(struct fiemap_extent)))
+ return -ENOMEM;
+
+ fiemap->fm_extent_count = fiemap->fm_mapped_extents;
+ fiemap->fm_mapped_extents = 0;
+
+ if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0)
+ return log_debug_errno(errno, "Failed to read extents: %m");
+
+ /* Resize result_fiemap to allow us to copy in the extents */
+ if (!greedy_realloc((void**) &result_fiemap, &result_fiemap_allocated,
+ n_extra + result_extents + fiemap->fm_mapped_extents, sizeof(struct fiemap_extent)))
+ return -ENOMEM;
+
+ memcpy(result_fiemap->fm_extents + result_extents,
+ fiemap->fm_extents,
+ sizeof(struct fiemap_extent) * fiemap->fm_mapped_extents);
+
+ result_extents += fiemap->fm_mapped_extents;
+
+ /* Highly unlikely that it is zero */
+ if (_likely_(fiemap->fm_mapped_extents > 0)) {
+ uint32_t i = fiemap->fm_mapped_extents - 1;
+
+ fiemap_start = fiemap->fm_extents[i].fe_logical +
+ fiemap->fm_extents[i].fe_length;
+
+ if (fiemap->fm_extents[i].fe_flags & FIEMAP_EXTENT_LAST)
+ break;
+ }
+ }
+
+ memcpy(result_fiemap, fiemap, sizeof(struct fiemap));
+ result_fiemap->fm_mapped_extents = result_extents;
+ *ret = TAKE_PTR(result_fiemap);
+ return 0;
+}
+
+static int can_sleep_internal(const char *verb, bool check_allowed, const SleepConfig *sleep_config);
+
+static bool can_s2h(const SleepConfig *sleep_config) {
+ const char *p;
+ int r;
+
+ if (!clock_supported(CLOCK_BOOTTIME_ALARM)) {
+ log_debug("CLOCK_BOOTTIME_ALARM is not supported.");
+ return false;
+ }
+
+ FOREACH_STRING(p, "suspend", "hibernate") {
+ r = can_sleep_internal(p, false, sleep_config);
+ if (IN_SET(r, 0, -ENOSPC, -EADV)) {
+ log_debug("Unable to %s system.", p);
+ return false;
+ }
+ if (r < 0)
+ return log_debug_errno(r, "Failed to check if %s is possible: %m", p);
+ }
+
+ return true;
+}
+
+static int can_sleep_internal(const char *verb, bool check_allowed, const SleepConfig *sleep_config) {
+ bool allow;
+ char **modes = NULL, **states = NULL;
+ int r;
+
+ assert(STR_IN_SET(verb, "suspend", "hibernate", "hybrid-sleep", "suspend-then-hibernate"));
+
+ r = sleep_settings(verb, sleep_config, &allow, &modes, &states);
+ if (r < 0)
+ return false;
+
+ if (check_allowed && !allow) {
+ log_debug("Sleep mode \"%s\" is disabled by configuration.", verb);
+ return false;
+ }
+
+ if (streq(verb, "suspend-then-hibernate"))
+ return can_s2h(sleep_config);
+
+ if (!can_sleep_state(states) || !can_sleep_disk(modes))
+ return false;
+
+ if (streq(verb, "suspend"))
+ return true;
+
+ if (!enough_swap_for_hibernation())
+ return -ENOSPC;
+
+ return true;
+}
+
+int can_sleep(const char *verb) {
+ _cleanup_(free_sleep_configp) SleepConfig *sleep_config = NULL;
+ int r;
+
+ r = parse_sleep_config(&sleep_config);
+ if (r < 0)
+ return r;
+
+ return can_sleep_internal(verb, true, sleep_config);
+}
+
+int sleep_settings(const char *verb, const SleepConfig *sleep_config, bool *ret_allow, char ***ret_modes, char ***ret_states) {
+
+ assert(verb);
+ assert(sleep_config);
+ assert(STR_IN_SET(verb, "suspend", "hibernate", "hybrid-sleep", "suspend-then-hibernate"));
+
+ if (streq(verb, "suspend")) {
+ *ret_allow = sleep_config->allow_suspend;
+ *ret_modes = sleep_config->suspend_modes;
+ *ret_states = sleep_config->suspend_states;
+ } else if (streq(verb, "hibernate")) {
+ *ret_allow = sleep_config->allow_hibernate;
+ *ret_modes = sleep_config->hibernate_modes;
+ *ret_states = sleep_config->hibernate_states;
+ } else if (streq(verb, "hybrid-sleep")) {
+ *ret_allow = sleep_config->allow_hybrid_sleep;
+ *ret_modes = sleep_config->hybrid_modes;
+ *ret_states = sleep_config->hybrid_states;
+ } else if (streq(verb, "suspend-then-hibernate")) {
+ *ret_allow = sleep_config->allow_s2h;
+ *ret_modes = *ret_states = NULL;
+ }
+
+ /* suspend modes empty by default */
+ if ((!ret_modes && !streq(verb, "suspend")) || !ret_states)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No modes or states set for %s; Check sleep.conf", verb);
+
+ return 0;
+}
+
+SleepConfig* free_sleep_config(SleepConfig *sc) {
+ if (!sc)
+ return NULL;
+
+ strv_free(sc->suspend_modes);
+ strv_free(sc->suspend_states);
+
+ strv_free(sc->hibernate_modes);
+ strv_free(sc->hibernate_states);
+
+ strv_free(sc->hybrid_modes);
+ strv_free(sc->hybrid_states);
+
+ return mfree(sc);
+}
diff --git a/src/shared/sleep-config.h b/src/shared/sleep-config.h
new file mode 100644
index 0000000..4b30e6d
--- /dev/null
+++ b/src/shared/sleep-config.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/fiemap.h>
+#include "time-util.h"
+
+typedef struct SleepConfig {
+ bool allow_suspend; /* AllowSuspend */
+ bool allow_hibernate; /* AllowHibernation */
+ bool allow_s2h; /* AllowSuspendThenHibernate */
+ bool allow_hybrid_sleep; /* AllowHybridSleep */
+
+ char **suspend_modes; /* SuspendMode */
+ char **suspend_states; /* SuspendState */
+ char **hibernate_modes; /* HibernateMode */
+ char **hibernate_states; /* HibernateState */
+ char **hybrid_modes; /* HybridSleepMode */
+ char **hybrid_states; /* HybridSleepState */
+
+ usec_t hibernate_delay_sec; /* HibernateDelaySec */
+} SleepConfig;
+
+SleepConfig* free_sleep_config(SleepConfig *sc);
+DEFINE_TRIVIAL_CLEANUP_FUNC(SleepConfig*, free_sleep_config);
+
+/* entry in /proc/swaps */
+typedef struct SwapEntry {
+ char *device;
+ char *type;
+ uint64_t size;
+ uint64_t used;
+ int priority;
+} SwapEntry;
+
+SwapEntry* swap_entry_free(SwapEntry *se);
+DEFINE_TRIVIAL_CLEANUP_FUNC(SwapEntry*, swap_entry_free);
+
+/*
+ * represents values for /sys/power/resume & /sys/power/resume_offset
+ * and the matching /proc/swap entry.
+ */
+typedef struct HibernateLocation {
+ dev_t devno;
+ uint64_t offset;
+ SwapEntry *swap;
+} HibernateLocation;
+
+HibernateLocation* hibernate_location_free(HibernateLocation *hl);
+DEFINE_TRIVIAL_CLEANUP_FUNC(HibernateLocation*, hibernate_location_free);
+
+int sleep_settings(const char *verb, const SleepConfig *sleep_config, bool *ret_allow, char ***ret_modes, char ***ret_states);
+
+int read_fiemap(int fd, struct fiemap **ret);
+int parse_sleep_config(SleepConfig **sleep_config);
+int find_hibernate_location(HibernateLocation **ret_hibernate_location);
+
+int can_sleep(const char *verb);
+int can_sleep_disk(char **types);
+int can_sleep_state(char **types);
diff --git a/src/shared/socket-netlink.c b/src/shared/socket-netlink.c
new file mode 100644
index 0000000..4a7007d
--- /dev/null
+++ b/src/shared/socket-netlink.c
@@ -0,0 +1,493 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <net/if.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "extract-word.h"
+#include "log.h"
+#include "memory-util.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "socket-netlink.h"
+#include "socket-util.h"
+#include "string-util.h"
+
+int resolve_ifname(sd_netlink **rtnl, const char *name) {
+ int r;
+
+ /* Like if_nametoindex, but resolves "alternative names" too. */
+
+ assert(name);
+
+ r = if_nametoindex(name);
+ if (r > 0)
+ return r;
+
+ return rtnl_resolve_link_alternative_name(rtnl, name);
+}
+
+int resolve_interface(sd_netlink **rtnl, const char *name) {
+ int r;
+
+ /* Like resolve_ifname, but resolves interface numbers too. */
+
+ assert(name);
+
+ r = parse_ifindex(name);
+ if (r > 0)
+ return r;
+ assert(r < 0);
+
+ return resolve_ifname(rtnl, name);
+}
+
+int resolve_interface_or_warn(sd_netlink **rtnl, const char *name) {
+ int r;
+
+ r = resolve_interface(rtnl, name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve interface \"%s\": %m", name);
+ return r;
+}
+
+int socket_address_parse(SocketAddress *a, const char *s) {
+ _cleanup_free_ char *n = NULL;
+ char *e;
+ int r;
+
+ assert(a);
+ assert(s);
+
+ if (IN_SET(*s, '/', '@')) {
+ /* AF_UNIX socket */
+ struct sockaddr_un un;
+
+ r = sockaddr_un_set_path(&un, s);
+ if (r < 0)
+ return r;
+
+ *a = (SocketAddress) {
+ .sockaddr.un = un,
+ .size = r,
+ };
+
+ } else if (startswith(s, "vsock:")) {
+ /* AF_VSOCK socket in vsock:cid:port notation */
+ const char *cid_start = s + STRLEN("vsock:");
+ unsigned port, cid;
+
+ e = strchr(cid_start, ':');
+ if (!e)
+ return -EINVAL;
+
+ r = safe_atou(e+1, &port);
+ if (r < 0)
+ return r;
+
+ n = strndup(cid_start, e - cid_start);
+ if (!n)
+ return -ENOMEM;
+
+ if (isempty(n))
+ cid = VMADDR_CID_ANY;
+ else {
+ r = safe_atou(n, &cid);
+ if (r < 0)
+ return r;
+ }
+
+ *a = (SocketAddress) {
+ .sockaddr.vm = {
+ .svm_cid = cid,
+ .svm_family = AF_VSOCK,
+ .svm_port = port,
+ },
+ .size = sizeof(struct sockaddr_vm),
+ };
+
+ } else {
+ uint16_t port;
+
+ r = parse_ip_port(s, &port);
+ if (r == -ERANGE)
+ return r; /* Valid port syntax, but the numerical value is wrong for a port. */
+ if (r >= 0) {
+ /* Just a port */
+ if (socket_ipv6_is_supported())
+ *a = (SocketAddress) {
+ .sockaddr.in6 = {
+ .sin6_family = AF_INET6,
+ .sin6_port = htobe16(port),
+ .sin6_addr = in6addr_any,
+ },
+ .size = sizeof(struct sockaddr_in6),
+ };
+ else
+ *a = (SocketAddress) {
+ .sockaddr.in = {
+ .sin_family = AF_INET,
+ .sin_port = htobe16(port),
+ .sin_addr.s_addr = INADDR_ANY,
+ },
+ .size = sizeof(struct sockaddr_in),
+ };
+
+ } else {
+ union in_addr_union address;
+ int family, ifindex;
+
+ r = in_addr_port_ifindex_name_from_string_auto(s, &family, &address, &port, &ifindex, NULL);
+ if (r < 0)
+ return r;
+
+ if (port == 0) /* No port, no go. */
+ return -EINVAL;
+
+ if (family == AF_INET)
+ *a = (SocketAddress) {
+ .sockaddr.in = {
+ .sin_family = AF_INET,
+ .sin_addr = address.in,
+ .sin_port = htobe16(port),
+ },
+ .size = sizeof(struct sockaddr_in),
+ };
+ else if (family == AF_INET6)
+ *a = (SocketAddress) {
+ .sockaddr.in6 = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = address.in6,
+ .sin6_port = htobe16(port),
+ .sin6_scope_id = ifindex,
+ },
+ .size = sizeof(struct sockaddr_in6),
+ };
+ else
+ assert_not_reached("Family quarrel");
+ }
+ }
+
+ return 0;
+}
+
+int socket_address_parse_and_warn(SocketAddress *a, const char *s) {
+ SocketAddress b;
+ int r;
+
+ /* Similar to socket_address_parse() but warns for IPv6 sockets when we don't support them. */
+
+ r = socket_address_parse(&b, s);
+ if (r < 0)
+ return r;
+
+ if (!socket_ipv6_is_supported() && b.sockaddr.sa.sa_family == AF_INET6) {
+ log_warning("Binding to IPv6 address not available since kernel does not support IPv6.");
+ return -EAFNOSUPPORT;
+ }
+
+ *a = b;
+ return 0;
+}
+
+int socket_address_parse_netlink(SocketAddress *a, const char *s) {
+ _cleanup_free_ char *word = NULL;
+ unsigned group = 0;
+ int family, r;
+
+ assert(a);
+ assert(s);
+
+ r = extract_first_word(&s, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ family = netlink_family_from_string(word);
+ if (family < 0)
+ return -EINVAL;
+
+ if (!isempty(s)) {
+ r = safe_atou(s, &group);
+ if (r < 0)
+ return r;
+ }
+
+ *a = (SocketAddress) {
+ .type = SOCK_RAW,
+ .sockaddr.nl.nl_family = AF_NETLINK,
+ .sockaddr.nl.nl_groups = group,
+ .protocol = family,
+ .size = sizeof(struct sockaddr_nl),
+ };
+
+ return 0;
+}
+
+bool socket_address_is(const SocketAddress *a, const char *s, int type) {
+ struct SocketAddress b;
+
+ assert(a);
+ assert(s);
+
+ if (socket_address_parse(&b, s) < 0)
+ return false;
+
+ b.type = type;
+
+ return socket_address_equal(a, &b);
+}
+
+bool socket_address_is_netlink(const SocketAddress *a, const char *s) {
+ struct SocketAddress b;
+
+ assert(a);
+ assert(s);
+
+ if (socket_address_parse_netlink(&b, s) < 0)
+ return false;
+
+ return socket_address_equal(a, &b);
+}
+
+int make_socket_fd(int log_level, const char* address, int type, int flags) {
+ SocketAddress a;
+ int fd, r;
+
+ r = socket_address_parse(&a, address);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse socket address \"%s\": %m", address);
+
+ a.type = type;
+
+ fd = socket_address_listen(&a, type | flags, SOMAXCONN, SOCKET_ADDRESS_DEFAULT,
+ NULL, false, false, false, 0755, 0644, NULL);
+ if (fd < 0 || log_get_max_level() >= log_level) {
+ _cleanup_free_ char *p = NULL;
+
+ r = socket_address_print(&a, &p);
+ if (r < 0)
+ return log_error_errno(r, "socket_address_print(): %m");
+
+ if (fd < 0)
+ log_error_errno(fd, "Failed to listen on %s: %m", p);
+ else
+ log_full(log_level, "Listening on %s", p);
+ }
+
+ return fd;
+}
+
+int in_addr_port_ifindex_name_from_string_auto(
+ const char *s,
+ int *ret_family,
+ union in_addr_union *ret_address,
+ uint16_t *ret_port,
+ int *ret_ifindex,
+ char **ret_server_name) {
+
+ _cleanup_free_ char *buf1 = NULL, *buf2 = NULL, *name = NULL;
+ int family, ifindex = 0, r;
+ union in_addr_union a;
+ uint16_t port = 0;
+ const char *m;
+
+ assert(s);
+
+ /* This accepts the following:
+ * 192.168.0.1:53#example.com
+ * [2001:4860:4860::8888]:53%eth0#example.com
+ *
+ * If ret_port is NULL, then the port cannot be specified.
+ * If ret_ifindex is NULL, then the interface index cannot be specified.
+ * If ret_server_name is NULL, then server_name cannot be specified.
+ *
+ * ret_family is always AF_INET or AF_INET6.
+ */
+
+ m = strchr(s, '#');
+ if (m) {
+ if (!ret_server_name)
+ return -EINVAL;
+
+ if (isempty(m + 1))
+ return -EINVAL;
+
+ name = strdup(m + 1);
+ if (!name)
+ return -ENOMEM;
+
+ s = buf1 = strndup(s, m - s);
+ if (!buf1)
+ return -ENOMEM;
+ }
+
+ m = strchr(s, '%');
+ if (m) {
+ if (!ret_ifindex)
+ return -EINVAL;
+
+ if (isempty(m + 1))
+ return -EINVAL;
+
+ if (!ifname_valid_full(m + 1, IFNAME_VALID_ALTERNATIVE | IFNAME_VALID_NUMERIC))
+ return -EINVAL; /* We want to return -EINVAL for syntactically invalid names,
+ * and -ENODEV for valid but nonexistent interfaces. */
+
+ ifindex = resolve_interface(NULL, m + 1);
+ if (ifindex < 0)
+ return ifindex;
+
+ s = buf2 = strndup(s, m - s);
+ if (!buf2)
+ return -ENOMEM;
+ }
+
+ m = strrchr(s, ':');
+ if (m) {
+ if (*s == '[') {
+ _cleanup_free_ char *ip_str = NULL;
+
+ if (!ret_port)
+ return -EINVAL;
+
+ if (*(m - 1) != ']')
+ return -EINVAL;
+
+ family = AF_INET6;
+
+ r = parse_ip_port(m + 1, &port);
+ if (r < 0)
+ return r;
+
+ ip_str = strndup(s + 1, m - s - 2);
+ if (!ip_str)
+ return -ENOMEM;
+
+ r = in_addr_from_string(family, ip_str, &a);
+ if (r < 0)
+ return r;
+ } else {
+ /* First try to parse the string as IPv6 address without port number */
+ r = in_addr_from_string(AF_INET6, s, &a);
+ if (r < 0) {
+ /* Then the input should be IPv4 address with port number */
+ _cleanup_free_ char *ip_str = NULL;
+
+ if (!ret_port)
+ return -EINVAL;
+
+ family = AF_INET;
+
+ ip_str = strndup(s, m - s);
+ if (!ip_str)
+ return -ENOMEM;
+
+ r = in_addr_from_string(family, ip_str, &a);
+ if (r < 0)
+ return r;
+
+ r = parse_ip_port(m + 1, &port);
+ if (r < 0)
+ return r;
+ } else
+ family = AF_INET6;
+ }
+ } else {
+ family = AF_INET;
+ r = in_addr_from_string(family, s, &a);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_family)
+ *ret_family = family;
+ if (ret_address)
+ *ret_address = a;
+ if (ret_port)
+ *ret_port = port;
+ if (ret_ifindex)
+ *ret_ifindex = ifindex;
+ if (ret_server_name)
+ *ret_server_name = TAKE_PTR(name);
+
+ return r;
+}
+
+struct in_addr_full *in_addr_full_free(struct in_addr_full *a) {
+ if (!a)
+ return NULL;
+
+ free(a->server_name);
+ free(a->cached_server_string);
+ return mfree(a);
+}
+
+int in_addr_full_new(
+ int family,
+ const union in_addr_union *a,
+ uint16_t port,
+ int ifindex,
+ const char *server_name,
+ struct in_addr_full **ret) {
+
+ _cleanup_free_ char *name = NULL;
+ struct in_addr_full *x;
+
+ assert(ret);
+
+ if (!isempty(server_name)) {
+ name = strdup(server_name);
+ if (!name)
+ return -ENOMEM;
+ }
+
+ x = new(struct in_addr_full, 1);
+ if (!x)
+ return -ENOMEM;
+
+ *x = (struct in_addr_full) {
+ .family = family,
+ .address = *a,
+ .port = port,
+ .ifindex = ifindex,
+ .server_name = TAKE_PTR(name),
+ };
+
+ *ret = x;
+ return 0;
+}
+
+int in_addr_full_new_from_string(const char *s, struct in_addr_full **ret) {
+ _cleanup_free_ char *server_name = NULL;
+ int family, ifindex, r;
+ union in_addr_union a;
+ uint16_t port;
+
+ assert(s);
+
+ r = in_addr_port_ifindex_name_from_string_auto(s, &family, &a, &port, &ifindex, &server_name);
+ if (r < 0)
+ return r;
+
+ return in_addr_full_new(family, &a, port, ifindex, server_name, ret);
+}
+
+const char *in_addr_full_to_string(struct in_addr_full *a) {
+ assert(a);
+
+ if (!a->cached_server_string)
+ (void) in_addr_port_ifindex_name_to_string(
+ a->family,
+ &a->address,
+ a->port,
+ a->ifindex,
+ a->server_name,
+ &a->cached_server_string);
+
+ return a->cached_server_string;
+}
diff --git a/src/shared/socket-netlink.h b/src/shared/socket-netlink.h
new file mode 100644
index 0000000..eac5991
--- /dev/null
+++ b/src/shared/socket-netlink.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-netlink.h"
+
+#include "in-addr-util.h"
+#include "macro.h"
+#include "socket-util.h"
+
+int resolve_ifname(sd_netlink **rtnl, const char *name);
+int resolve_interface(sd_netlink **rtnl, const char *name);
+int resolve_interface_or_warn(sd_netlink **rtnl, const char *name);
+
+int make_socket_fd(int log_level, const char* address, int type, int flags);
+
+int socket_address_parse(SocketAddress *a, const char *s);
+int socket_address_parse_and_warn(SocketAddress *a, const char *s);
+int socket_address_parse_netlink(SocketAddress *a, const char *s);
+
+bool socket_address_is(const SocketAddress *a, const char *s, int type);
+bool socket_address_is_netlink(const SocketAddress *a, const char *s);
+
+int in_addr_port_ifindex_name_from_string_auto(
+ const char *s,
+ int *ret_family,
+ union in_addr_union *ret_address,
+ uint16_t *ret_port,
+ int *ret_ifindex,
+ char **ret_server_name);
+static inline int in_addr_ifindex_name_from_string_auto(const char *s, int *family, union in_addr_union *ret, int *ifindex, char **server_name) {
+ return in_addr_port_ifindex_name_from_string_auto(s, family, ret, NULL, ifindex, server_name);
+}
+static inline int in_addr_ifindex_from_string_auto(const char *s, int *family, union in_addr_union *ret, int *ifindex) {
+ return in_addr_ifindex_name_from_string_auto(s, family, ret, ifindex, NULL);
+}
+
+struct in_addr_full {
+ int family;
+ union in_addr_union address;
+ uint16_t port;
+ int ifindex;
+ char *server_name;
+ char *cached_server_string; /* Should not be handled directly, but through in_addr_full_to_string(). */
+};
+
+struct in_addr_full *in_addr_full_free(struct in_addr_full *a);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct in_addr_full*, in_addr_full_free);
+int in_addr_full_new(int family, const union in_addr_union *a, uint16_t port, int ifindex, const char *server_name, struct in_addr_full **ret);
+int in_addr_full_new_from_string(const char *s, struct in_addr_full **ret);
+const char *in_addr_full_to_string(struct in_addr_full *a);
diff --git a/src/shared/spawn-ask-password-agent.c b/src/shared/spawn-ask-password-agent.c
new file mode 100644
index 0000000..1f07b19
--- /dev/null
+++ b/src/shared/spawn-ask-password-agent.c
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "log.h"
+#include "process-util.h"
+#include "spawn-ask-password-agent.h"
+#include "util.h"
+
+static pid_t agent_pid = 0;
+
+int ask_password_agent_open(void) {
+ int r;
+
+ if (agent_pid > 0)
+ return 0;
+
+ /* We check STDIN here, not STDOUT, since this is about input,
+ * not output */
+ if (!isatty(STDIN_FILENO))
+ return 0;
+
+ if (!is_main_thread())
+ return -EPERM;
+
+ r = fork_agent("(sd-askpwagent)",
+ NULL, 0,
+ &agent_pid,
+ SYSTEMD_TTY_ASK_PASSWORD_AGENT_BINARY_PATH,
+ SYSTEMD_TTY_ASK_PASSWORD_AGENT_BINARY_PATH, "--watch", NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to fork TTY ask password agent: %m");
+
+ return 1;
+}
+
+void ask_password_agent_close(void) {
+
+ if (agent_pid <= 0)
+ return;
+
+ /* Inform agent that we are done */
+ (void) kill_and_sigcont(agent_pid, SIGTERM);
+ (void) wait_for_terminate(agent_pid, NULL);
+ agent_pid = 0;
+}
+
+int ask_password_agent_open_if_enabled(BusTransport transport, bool ask_password) {
+
+ /* Open the ask password agent as a child process if necessary */
+
+ if (transport != BUS_TRANSPORT_LOCAL)
+ return 0;
+
+ if (!ask_password)
+ return 0;
+
+ return ask_password_agent_open();
+}
diff --git a/src/shared/spawn-ask-password-agent.h b/src/shared/spawn-ask-password-agent.h
new file mode 100644
index 0000000..a76cdb1
--- /dev/null
+++ b/src/shared/spawn-ask-password-agent.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "bus-util.h"
+
+int ask_password_agent_open(void);
+void ask_password_agent_close(void);
+
+int ask_password_agent_open_if_enabled(BusTransport transport, bool ask_password);
diff --git a/src/shared/spawn-polkit-agent.c b/src/shared/spawn-polkit-agent.c
new file mode 100644
index 0000000..a0024eb
--- /dev/null
+++ b/src/shared/spawn-polkit-agent.c
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "macro.h"
+#include "process-util.h"
+#include "spawn-polkit-agent.h"
+#include "stdio-util.h"
+#include "time-util.h"
+#include "util.h"
+
+#if ENABLE_POLKIT
+static pid_t agent_pid = 0;
+
+int polkit_agent_open(void) {
+ char notify_fd[DECIMAL_STR_MAX(int) + 1];
+ int pipe_fd[2], r;
+
+ if (agent_pid > 0)
+ return 0;
+
+ /* Clients that run as root don't need to activate/query polkit */
+ if (geteuid() == 0)
+ return 0;
+
+ /* We check STDIN here, not STDOUT, since this is about input, not output */
+ if (!isatty(STDIN_FILENO))
+ return 0;
+
+ if (!is_main_thread())
+ return -EPERM;
+
+ if (pipe2(pipe_fd, 0) < 0)
+ return -errno;
+
+ xsprintf(notify_fd, "%i", pipe_fd[1]);
+
+ r = fork_agent("(polkit-agent)",
+ &pipe_fd[1], 1,
+ &agent_pid,
+ POLKIT_AGENT_BINARY_PATH,
+ POLKIT_AGENT_BINARY_PATH, "--notify-fd", notify_fd, "--fallback", NULL);
+
+ /* Close the writing side, because that's the one for the agent */
+ safe_close(pipe_fd[1]);
+
+ if (r < 0)
+ log_error_errno(r, "Failed to fork TTY ask password agent: %m");
+ else
+ /* Wait until the agent closes the fd */
+ fd_wait_for_event(pipe_fd[0], POLLHUP, USEC_INFINITY);
+
+ safe_close(pipe_fd[0]);
+
+ return r;
+}
+
+void polkit_agent_close(void) {
+
+ if (agent_pid <= 0)
+ return;
+
+ /* Inform agent that we are done */
+ (void) kill_and_sigcont(agent_pid, SIGTERM);
+ (void) wait_for_terminate(agent_pid, NULL);
+ agent_pid = 0;
+}
+
+#else
+
+int polkit_agent_open(void) {
+ return 0;
+}
+
+void polkit_agent_close(void) {
+}
+
+#endif
+
+int polkit_agent_open_if_enabled(BusTransport transport, bool ask_password) {
+
+ /* Open the polkit agent as a child process if necessary */
+
+ if (transport != BUS_TRANSPORT_LOCAL)
+ return 0;
+
+ if (!ask_password)
+ return 0;
+
+ return polkit_agent_open();
+}
diff --git a/src/shared/spawn-polkit-agent.h b/src/shared/spawn-polkit-agent.h
new file mode 100644
index 0000000..325dfdd
--- /dev/null
+++ b/src/shared/spawn-polkit-agent.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "bus-util.h"
+
+int polkit_agent_open(void);
+void polkit_agent_close(void);
+
+int polkit_agent_open_if_enabled(BusTransport transport, bool ask_password);
diff --git a/src/shared/specifier.c b/src/shared/specifier.c
new file mode 100644
index 0000000..86731f8
--- /dev/null
+++ b/src/shared/specifier.c
@@ -0,0 +1,358 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <sys/utsname.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "architecture.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "macro.h"
+#include "os-util.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+/*
+ * Generic infrastructure for replacing %x style specifiers in
+ * strings. Will call a callback for each replacement.
+ */
+
+/* Any ASCII character or digit: our pool of potential specifiers,
+ * and "%" used for escaping. */
+#define POSSIBLE_SPECIFIERS ALPHANUMERICAL "%"
+
+int specifier_printf(const char *text, const Specifier table[], const void *userdata, char **_ret) {
+ size_t l, allocated = 0;
+ _cleanup_free_ char *ret = NULL;
+ char *t;
+ const char *f;
+ bool percent = false;
+ int r;
+
+ assert(text);
+ assert(table);
+
+ l = strlen(text);
+ if (!GREEDY_REALLOC(ret, allocated, l + 1))
+ return -ENOMEM;
+ t = ret;
+
+ for (f = text; *f; f++, l--)
+ if (percent) {
+ if (*f == '%')
+ *(t++) = '%';
+ else {
+ const Specifier *i;
+
+ for (i = table; i->specifier; i++)
+ if (i->specifier == *f)
+ break;
+
+ if (i->lookup) {
+ _cleanup_free_ char *w = NULL;
+ size_t k, j;
+
+ r = i->lookup(i->specifier, i->data, userdata, &w);
+ if (r < 0)
+ return r;
+
+ j = t - ret;
+ k = strlen(w);
+
+ if (!GREEDY_REALLOC(ret, allocated, j + k + l + 1))
+ return -ENOMEM;
+ memcpy(ret + j, w, k);
+ t = ret + j + k;
+ } else if (strchr(POSSIBLE_SPECIFIERS, *f))
+ /* Oops, an unknown specifier. */
+ return -EBADSLT;
+ else {
+ *(t++) = '%';
+ *(t++) = *f;
+ }
+ }
+
+ percent = false;
+ } else if (*f == '%')
+ percent = true;
+ else
+ *(t++) = *f;
+
+ /* If string ended with a stray %, also end with % */
+ if (percent)
+ *(t++) = '%';
+ *(t++) = 0;
+
+ /* Try to deallocate unused bytes, but don't sweat it too much */
+ if ((size_t)(t - ret) < allocated) {
+ t = realloc(ret, t - ret);
+ if (t)
+ ret = t;
+ }
+
+ *_ret = TAKE_PTR(ret);
+ return 0;
+}
+
+/* Generic handler for simple string replacements */
+
+int specifier_string(char specifier, const void *data, const void *userdata, char **ret) {
+ char *n;
+
+ n = strdup(strempty(data));
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int specifier_machine_id(char specifier, const void *data, const void *userdata, char **ret) {
+ sd_id128_t id;
+ char *n;
+ int r;
+
+ r = sd_id128_get_machine(&id);
+ if (r < 0)
+ return r;
+
+ n = new(char, 33);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = sd_id128_to_string(id, n);
+ return 0;
+}
+
+int specifier_boot_id(char specifier, const void *data, const void *userdata, char **ret) {
+ sd_id128_t id;
+ char *n;
+ int r;
+
+ r = sd_id128_get_boot(&id);
+ if (r < 0)
+ return r;
+
+ n = new(char, 33);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = sd_id128_to_string(id, n);
+ return 0;
+}
+
+int specifier_host_name(char specifier, const void *data, const void *userdata, char **ret) {
+ char *n;
+
+ n = gethostname_malloc();
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int specifier_short_host_name(char specifier, const void *data, const void *userdata, char **ret) {
+ char *n;
+
+ n = gethostname_short_malloc();
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int specifier_kernel_release(char specifier, const void *data, const void *userdata, char **ret) {
+ struct utsname uts;
+ char *n;
+ int r;
+
+ r = uname(&uts);
+ if (r < 0)
+ return -errno;
+
+ n = strdup(uts.release);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int specifier_architecture(char specifier, const void *data, const void *userdata, char **ret) {
+ char *t;
+
+ t = strdup(architecture_to_string(uname_architecture()));
+ if (!t)
+ return -ENOMEM;
+
+ *ret = t;
+ return 0;
+}
+
+static int specifier_os_release_common(const char *field, char **ret) {
+ char *t = NULL;
+ int r;
+
+ r = parse_os_release(NULL, field, &t, NULL);
+ if (r < 0)
+ return r;
+ if (!t) {
+ /* fields in /etc/os-release might quite possibly be missing, even if everything is entirely
+ * valid otherwise. Let's hence return "" in that case. */
+ t = strdup("");
+ if (!t)
+ return -ENOMEM;
+ }
+
+ *ret = t;
+ return 0;
+}
+
+int specifier_os_id(char specifier, const void *data, const void *userdata, char **ret) {
+ return specifier_os_release_common("ID", ret);
+}
+
+int specifier_os_version_id(char specifier, const void *data, const void *userdata, char **ret) {
+ return specifier_os_release_common("VERSION_ID", ret);
+}
+
+int specifier_os_build_id(char specifier, const void *data, const void *userdata, char **ret) {
+ return specifier_os_release_common("BUILD_ID", ret);
+}
+
+int specifier_os_variant_id(char specifier, const void *data, const void *userdata, char **ret) {
+ return specifier_os_release_common("VARIANT_ID", ret);
+}
+
+int specifier_group_name(char specifier, const void *data, const void *userdata, char **ret) {
+ char *t;
+
+ t = gid_to_name(getgid());
+ if (!t)
+ return -ENOMEM;
+
+ *ret = t;
+ return 0;
+}
+
+int specifier_group_id(char specifier, const void *data, const void *userdata, char **ret) {
+ if (asprintf(ret, UID_FMT, getgid()) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int specifier_user_name(char specifier, const void *data, const void *userdata, char **ret) {
+ char *t;
+
+ /* If we are UID 0 (root), this will not result in NSS, otherwise it might. This is good, as we want to be able
+ * to run this in PID 1, where our user ID is 0, but where NSS lookups are not allowed.
+
+ * We don't use getusername_malloc() here, because we don't want to look at $USER, to remain consistent with
+ * specifer_user_id() below.
+ */
+
+ t = uid_to_name(getuid());
+ if (!t)
+ return -ENOMEM;
+
+ *ret = t;
+ return 0;
+}
+
+int specifier_user_id(char specifier, const void *data, const void *userdata, char **ret) {
+
+ if (asprintf(ret, UID_FMT, getuid()) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int specifier_user_home(char specifier, const void *data, const void *userdata, char **ret) {
+
+ /* On PID 1 (which runs as root) this will not result in NSS,
+ * which is good. See above */
+
+ return get_home_dir(ret);
+}
+
+int specifier_user_shell(char specifier, const void *data, const void *userdata, char **ret) {
+
+ /* On PID 1 (which runs as root) this will not result in NSS,
+ * which is good. See above */
+
+ return get_shell(ret);
+}
+
+int specifier_tmp_dir(char specifier, const void *data, const void *userdata, char **ret) {
+ const char *p;
+ char *copy;
+ int r;
+
+ r = tmp_dir(&p);
+ if (r < 0)
+ return r;
+
+ copy = strdup(p);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret = copy;
+ return 0;
+}
+
+int specifier_var_tmp_dir(char specifier, const void *data, const void *userdata, char **ret) {
+ const char *p;
+ char *copy;
+ int r;
+
+ r = var_tmp_dir(&p);
+ if (r < 0)
+ return r;
+
+ copy = strdup(p);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret = copy;
+ return 0;
+}
+
+int specifier_escape_strv(char **l, char ***ret) {
+ char **z, **p, **q;
+
+ assert(ret);
+
+ if (strv_isempty(l)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ z = new(char*, strv_length(l)+1);
+ if (!z)
+ return -ENOMEM;
+
+ for (p = l, q = z; *p; p++, q++) {
+
+ *q = specifier_escape(*p);
+ if (!*q) {
+ strv_free(z);
+ return -ENOMEM;
+ }
+ }
+
+ *q = NULL;
+ *ret = z;
+
+ return 0;
+}
diff --git a/src/shared/specifier.h b/src/shared/specifier.h
new file mode 100644
index 0000000..1323b41
--- /dev/null
+++ b/src/shared/specifier.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "string-util.h"
+
+typedef int (*SpecifierCallback)(char specifier, const void *data, const void *userdata, char **ret);
+
+typedef struct Specifier {
+ const char specifier;
+ const SpecifierCallback lookup;
+ const void *data;
+} Specifier;
+
+int specifier_printf(const char *text, const Specifier table[], const void *userdata, char **ret);
+
+int specifier_string(char specifier, const void *data, const void *userdata, char **ret);
+
+int specifier_machine_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_boot_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_host_name(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_short_host_name(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_kernel_release(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_architecture(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_os_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_os_version_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_os_build_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_os_variant_id(char specifier, const void *data, const void *userdata, char **ret);
+
+int specifier_group_name(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_group_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_user_name(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_user_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_user_home(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_user_shell(char specifier, const void *data, const void *userdata, char **ret);
+
+int specifier_tmp_dir(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_var_tmp_dir(char specifier, const void *data, const void *userdata, char **ret);
+
+/* Typically, in places where one of the above specifier is to be resolved the other similar ones are to be
+ * resolved, too. Hence let's define common macros for the relevant array entries.
+ *
+ * COMMON_SYSTEM_SPECIFIERS:
+ * %a: the native userspace architecture
+ * %b: the boot ID of the running system
+ * %B: the OS build ID, according to /etc/os-release
+ * %H: the hostname of the running system
+ * %l: the short hostname of the running system
+ * %m: the machine ID of the running system
+ * %o: the OS ID according to /etc/os-release
+ * %v: the kernel version
+ * %w: the OS version ID, according to /etc/os-release
+ * %W: the OS variant ID, according to /etc/os-release
+ *
+ * COMMON_CREDS_SPECIFIERS:
+ * %g: the groupname of the running user
+ * %G: the GID of the running user
+ * %u: the username of the running user
+ * %U: the UID of the running user
+ *
+ * COMMON_TMP_SPECIFIERS:
+ * %T: the temporary directory (e.g. /tmp, or $TMPDIR, $TEMP, $TMP)
+ * %V: the temporary directory for large, persistent stuff (e.g. /var/tmp, or $TMPDIR, $TEMP, $TMP)
+ */
+
+#define COMMON_SYSTEM_SPECIFIERS \
+ { 'a', specifier_architecture, NULL }, \
+ { 'b', specifier_boot_id, NULL }, \
+ { 'B', specifier_os_build_id, NULL }, \
+ { 'H', specifier_host_name, NULL }, \
+ { 'l', specifier_short_host_name, NULL }, \
+ { 'm', specifier_machine_id, NULL }, \
+ { 'o', specifier_os_id, NULL }, \
+ { 'v', specifier_kernel_release, NULL }, \
+ { 'w', specifier_os_version_id, NULL }, \
+ { 'W', specifier_os_variant_id, NULL }
+
+#define COMMON_CREDS_SPECIFIERS \
+ { 'g', specifier_group_name, NULL }, \
+ { 'G', specifier_group_id, NULL }, \
+ { 'u', specifier_user_name, NULL }, \
+ { 'U', specifier_user_id, NULL }
+
+#define COMMON_TMP_SPECIFIERS \
+ { 'T', specifier_tmp_dir, NULL }, \
+ { 'V', specifier_var_tmp_dir, NULL }
+
+static inline char* specifier_escape(const char *string) {
+ return strreplace(string, "%", "%%");
+}
+
+int specifier_escape_strv(char **l, char ***ret);
diff --git a/src/shared/switch-root.c b/src/shared/switch-root.c
new file mode 100644
index 0000000..e0dd17a
--- /dev/null
+++ b/src/shared/switch-root.c
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "base-filesystem.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "missing_syscall.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "switch-root.h"
+#include "user-util.h"
+#include "util.h"
+
+int switch_root(const char *new_root,
+ const char *old_root_after, /* path below the new root, where to place the old root after the transition */
+ bool unmount_old_root,
+ unsigned long mount_flags) { /* MS_MOVE or MS_BIND */
+
+ _cleanup_free_ char *resolved_old_root_after = NULL;
+ _cleanup_close_ int old_root_fd = -1;
+ bool old_root_remove;
+ const char *i;
+ int r;
+
+ assert(new_root);
+ assert(old_root_after);
+
+ if (path_equal(new_root, "/"))
+ return 0;
+
+ /* Check if we shall remove the contents of the old root */
+ old_root_remove = in_initrd();
+ if (old_root_remove) {
+ old_root_fd = open("/", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
+ if (old_root_fd < 0)
+ return log_error_errno(errno, "Failed to open root directory: %m");
+ }
+
+ /* Determine where we shall place the old root after the transition */
+ r = chase_symlinks(old_root_after, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &resolved_old_root_after, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, old_root_after);
+ if (r == 0) /* Doesn't exist yet. Let's create it */
+ (void) mkdir_p_label(resolved_old_root_after, 0755);
+
+ /* Work-around for kernel design: the kernel refuses MS_MOVE if any file systems are mounted MS_SHARED. Hence
+ * remount them MS_PRIVATE here as a work-around.
+ *
+ * https://bugzilla.redhat.com/show_bug.cgi?id=847418 */
+ if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL) < 0)
+ return log_error_errno(errno, "Failed to set \"/\" mount propagation to private: %m");
+
+ FOREACH_STRING(i, "/sys", "/dev", "/run", "/proc") {
+ _cleanup_free_ char *chased = NULL;
+
+ r = chase_symlinks(i, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &chased, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, i);
+ if (r > 0) {
+ /* Already exists. Let's see if it is a mount point already. */
+ r = path_is_mount_point(chased, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", chased);
+ if (r > 0) /* If it is already mounted, then do nothing */
+ continue;
+ } else
+ /* Doesn't exist yet? */
+ (void) mkdir_p_label(chased, 0755);
+
+ if (mount(i, chased, NULL, mount_flags, NULL) < 0)
+ return log_error_errno(errno, "Failed to mount %s to %s: %m", i, chased);
+ }
+
+ /* Do not fail if base_filesystem_create() fails. Not all switch roots are like base_filesystem_create() wants
+ * them to look like. They might even boot, if they are RO and don't have the FS layout. Just ignore the error
+ * and switch_root() nevertheless. */
+ (void) base_filesystem_create(new_root, UID_INVALID, GID_INVALID);
+
+ if (chdir(new_root) < 0)
+ return log_error_errno(errno, "Failed to change directory to %s: %m", new_root);
+
+ /* We first try a pivot_root() so that we can umount the old root dir. In many cases (i.e. where rootfs is /),
+ * that's not possible however, and hence we simply overmount root */
+ if (pivot_root(new_root, resolved_old_root_after) >= 0) {
+
+ /* Immediately get rid of the old root, if detach_oldroot is set.
+ * Since we are running off it we need to do this lazily. */
+ if (unmount_old_root) {
+ r = umount_recursive(old_root_after, MNT_DETACH);
+ if (r < 0)
+ log_warning_errno(r, "Failed to unmount old root directory tree, ignoring: %m");
+ }
+
+ } else if (mount(new_root, "/", NULL, MS_MOVE, NULL) < 0)
+ return log_error_errno(errno, "Failed to move %s to /: %m", new_root);
+
+ if (chroot(".") < 0)
+ return log_error_errno(errno, "Failed to change root: %m");
+
+ if (chdir("/") < 0)
+ return log_error_errno(errno, "Failed to change directory: %m");
+
+ if (old_root_fd >= 0) {
+ struct stat rb;
+
+ if (fstat(old_root_fd, &rb) < 0)
+ log_warning_errno(errno, "Failed to stat old root directory, leaving: %m");
+ else
+ (void) rm_rf_children(TAKE_FD(old_root_fd), 0, &rb); /* takes possession of the dir fd, even on failure */
+ }
+
+ return 0;
+}
diff --git a/src/shared/switch-root.h b/src/shared/switch-root.h
new file mode 100644
index 0000000..4e04283
--- /dev/null
+++ b/src/shared/switch-root.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+int switch_root(const char *new_root, const char *oldroot, bool detach_oldroot, unsigned long mountflags);
diff --git a/src/shared/syscall-names.text b/src/shared/syscall-names.text
new file mode 100644
index 0000000..f1b7e29
--- /dev/null
+++ b/src/shared/syscall-names.text
@@ -0,0 +1,598 @@
+_llseek
+_newselect
+_sysctl
+accept
+accept4
+access
+acct
+add_key
+adjtimex
+alarm
+arc_gettls
+arc_settls
+arc_usr_cmpxchg
+arch_prctl
+arm_fadvise64_64
+arm_sync_file_range
+atomic_barrier
+atomic_cmpxchg_32
+bdflush
+bfin_spinlock
+bind
+bpf
+brk
+cache_sync
+cachectl
+cacheflush
+capget
+capset
+chdir
+chmod
+chown
+chown32
+chroot
+clock_adjtime
+clock_adjtime64
+clock_getres
+clock_getres_time64
+clock_gettime
+clock_gettime64
+clock_nanosleep
+clock_nanosleep_time64
+clock_settime
+clock_settime64
+clone
+clone2
+clone3
+close
+close_range
+connect
+copy_file_range
+creat
+create_module
+delete_module
+dipc
+dup
+dup2
+dup3
+epoll_create
+epoll_create1
+epoll_ctl
+epoll_ctl_old
+epoll_pwait
+epoll_wait
+epoll_wait_old
+eventfd
+eventfd2
+exec_with_loader
+execv
+execve
+execveat
+exit
+exit_group
+faccessat
+faccessat2
+fadvise64
+fadvise64_64
+fallocate
+fanotify_init
+fanotify_mark
+fchdir
+fchmod
+fchmodat
+fchown
+fchown32
+fchownat
+fcntl
+fcntl64
+fdatasync
+fgetxattr
+finit_module
+flistxattr
+flock
+fork
+fp_udfiex_crtl
+fremovexattr
+fsconfig
+fsetxattr
+fsmount
+fsopen
+fspick
+fstat
+fstat64
+fstatat64
+fstatfs
+fstatfs64
+fsync
+ftruncate
+ftruncate64
+futex
+futex_time64
+futimesat
+get_kernel_syms
+get_mempolicy
+get_robust_list
+get_thread_area
+getcpu
+getcwd
+getdents
+getdents64
+getdomainname
+getdtablesize
+getegid
+getegid32
+geteuid
+geteuid32
+getgid
+getgid32
+getgroups
+getgroups32
+gethostname
+getitimer
+getpagesize
+getpeername
+getpgid
+getpgrp
+getpid
+getpmsg
+getppid
+getpriority
+getrandom
+getresgid
+getresgid32
+getresuid
+getresuid32
+getrlimit
+getrusage
+getsid
+getsockname
+getsockopt
+gettid
+gettimeofday
+getuid
+getuid32
+getunwind
+getxattr
+getxgid
+getxpid
+getxuid
+idle
+init_module
+inotify_add_watch
+inotify_init
+inotify_init1
+inotify_rm_watch
+io_cancel
+io_destroy
+io_getevents
+io_pgetevents
+io_pgetevents_time64
+io_setup
+io_submit
+io_uring_enter
+io_uring_register
+io_uring_setup
+ioctl
+ioperm
+iopl
+ioprio_get
+ioprio_set
+ipc
+kcmp
+kern_features
+kexec_file_load
+kexec_load
+keyctl
+kill
+lchown
+lchown32
+lgetxattr
+link
+linkat
+listen
+listxattr
+llistxattr
+lookup_dcookie
+lremovexattr
+lseek
+lsetxattr
+lstat
+lstat64
+madvise
+mbind
+membarrier
+memfd_create
+memory_ordering
+migrate_pages
+mincore
+mkdir
+mkdirat
+mknod
+mknodat
+mlock
+mlock2
+mlockall
+mmap
+mmap2
+modify_ldt
+mount
+move_mount
+move_pages
+mprotect
+mq_getsetattr
+mq_notify
+mq_open
+mq_timedreceive
+mq_timedreceive_time64
+mq_timedsend
+mq_timedsend_time64
+mq_unlink
+mremap
+msgctl
+msgget
+msgrcv
+msgsnd
+msync
+multiplexer
+munlock
+munlockall
+munmap
+name_to_handle_at
+nanosleep
+newfstatat
+nfsservctl
+ni_syscall
+nice
+old_adjtimex
+old_getpagesize
+oldfstat
+oldlstat
+oldolduname
+oldstat
+oldumount
+olduname
+open
+open_by_handle_at
+open_tree
+openat
+openat2
+or1k_atomic
+osf_adjtime
+osf_afs_syscall
+osf_alt_plock
+osf_alt_setsid
+osf_alt_sigpending
+osf_asynch_daemon
+osf_audcntl
+osf_audgen
+osf_chflags
+osf_execve
+osf_exportfs
+osf_fchflags
+osf_fdatasync
+osf_fpathconf
+osf_fstat
+osf_fstatfs
+osf_fstatfs64
+osf_fuser
+osf_getaddressconf
+osf_getdirentries
+osf_getdomainname
+osf_getfh
+osf_getfsstat
+osf_gethostid
+osf_getitimer
+osf_getlogin
+osf_getmnt
+osf_getrusage
+osf_getsysinfo
+osf_gettimeofday
+osf_kloadcall
+osf_kmodcall
+osf_lstat
+osf_memcntl
+osf_mincore
+osf_mount
+osf_mremap
+osf_msfs_syscall
+osf_msleep
+osf_mvalid
+osf_mwakeup
+osf_naccept
+osf_nfssvc
+osf_ngetpeername
+osf_ngetsockname
+osf_nrecvfrom
+osf_nrecvmsg
+osf_nsendmsg
+osf_ntp_adjtime
+osf_ntp_gettime
+osf_old_creat
+osf_old_fstat
+osf_old_getpgrp
+osf_old_killpg
+osf_old_lstat
+osf_old_open
+osf_old_sigaction
+osf_old_sigblock
+osf_old_sigreturn
+osf_old_sigsetmask
+osf_old_sigvec
+osf_old_stat
+osf_old_vadvise
+osf_old_vtrace
+osf_old_wait
+osf_oldquota
+osf_pathconf
+osf_pid_block
+osf_pid_unblock
+osf_plock
+osf_priocntlset
+osf_profil
+osf_proplist_syscall
+osf_reboot
+osf_revoke
+osf_sbrk
+osf_security
+osf_select
+osf_set_program_attributes
+osf_set_speculative
+osf_sethostid
+osf_setitimer
+osf_setlogin
+osf_setsysinfo
+osf_settimeofday
+osf_shmat
+osf_signal
+osf_sigprocmask
+osf_sigsendset
+osf_sigstack
+osf_sigwaitprim
+osf_sstk
+osf_stat
+osf_statfs
+osf_statfs64
+osf_subsys_info
+osf_swapctl
+osf_swapon
+osf_syscall
+osf_sysinfo
+osf_table
+osf_uadmin
+osf_usleep_thread
+osf_uswitch
+osf_utc_adjtime
+osf_utc_gettime
+osf_utimes
+osf_utsname
+osf_wait4
+osf_waitid
+pause
+pciconfig_iobase
+pciconfig_read
+pciconfig_write
+perf_event_open
+perfctr
+perfmonctl
+personality
+pidfd_getfd
+pidfd_open
+pidfd_send_signal
+pipe
+pipe2
+pivot_root
+pkey_alloc
+pkey_free
+pkey_mprotect
+poll
+ppoll
+ppoll_time64
+prctl
+pread64
+preadv
+preadv2
+prlimit64
+process_madvise
+process_vm_readv
+process_vm_writev
+pselect6
+pselect6_time64
+ptrace
+pwrite64
+pwritev
+pwritev2
+query_module
+quotactl
+read
+readahead
+readdir
+readlink
+readlinkat
+readv
+reboot
+recv
+recvfrom
+recvmmsg
+recvmmsg_time64
+recvmsg
+remap_file_pages
+removexattr
+rename
+renameat
+renameat2
+request_key
+restart_syscall
+riscv_flush_icache
+rmdir
+rseq
+rt_sigaction
+rt_sigpending
+rt_sigprocmask
+rt_sigqueueinfo
+rt_sigreturn
+rt_sigsuspend
+rt_sigtimedwait
+rt_sigtimedwait_time64
+rt_tgsigqueueinfo
+rtas
+s390_guarded_storage
+s390_pci_mmio_read
+s390_pci_mmio_write
+s390_runtime_instr
+s390_sthyi
+sched_get_affinity
+sched_get_priority_max
+sched_get_priority_min
+sched_getaffinity
+sched_getattr
+sched_getparam
+sched_getscheduler
+sched_rr_get_interval
+sched_rr_get_interval_time64
+sched_set_affinity
+sched_setaffinity
+sched_setattr
+sched_setparam
+sched_setscheduler
+sched_yield
+seccomp
+select
+semctl
+semget
+semop
+semtimedop
+semtimedop_time64
+send
+sendfile
+sendfile64
+sendmmsg
+sendmsg
+sendto
+set_mempolicy
+set_robust_list
+set_thread_area
+set_tid_address
+setdomainname
+setfsgid
+setfsgid32
+setfsuid
+setfsuid32
+setgid
+setgid32
+setgroups
+setgroups32
+sethae
+sethostname
+setitimer
+setns
+setpgid
+setpgrp
+setpriority
+setregid
+setregid32
+setresgid
+setresgid32
+setresuid
+setresuid32
+setreuid
+setreuid32
+setrlimit
+setsid
+setsockopt
+settimeofday
+setuid
+setuid32
+setxattr
+sgetmask
+shmat
+shmctl
+shmdt
+shmget
+shutdown
+sigaction
+sigaltstack
+signal
+signalfd
+signalfd4
+sigpending
+sigprocmask
+sigreturn
+sigsuspend
+socket
+socketcall
+socketpair
+splice
+spu_create
+spu_run
+ssetmask
+stat
+stat64
+statfs
+statfs64
+statx
+stime
+subpage_prot
+swapcontext
+swapoff
+swapon
+switch_endian
+symlink
+symlinkat
+sync
+sync_file_range
+sync_file_range2
+syncfs
+sys_debug_setcontext
+syscall
+sysfs
+sysinfo
+syslog
+sysmips
+tee
+tgkill
+time
+timer_create
+timer_delete
+timer_getoverrun
+timer_gettime
+timer_gettime64
+timer_settime
+timer_settime64
+timerfd
+timerfd_create
+timerfd_gettime
+timerfd_gettime64
+timerfd_settime
+timerfd_settime64
+times
+tkill
+truncate
+truncate64
+udftrap
+ugetrlimit
+umask
+umount
+umount2
+uname
+unlink
+unlinkat
+unshare
+uselib
+userfaultfd
+ustat
+utime
+utimensat
+utimensat_time64
+utimes
+utimesat
+utrap_install
+vfork
+vhangup
+vm86
+vm86old
+vmsplice
+wait4
+waitid
+waitpid
+write
+writev
diff --git a/src/shared/sysctl-util.c b/src/shared/sysctl-util.c
new file mode 100644
index 0000000..670c331
--- /dev/null
+++ b/src/shared/sysctl-util.c
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "sysctl-util.h"
+
+char *sysctl_normalize(char *s) {
+ char *n;
+
+ n = strpbrk(s, "/.");
+
+ /* If the first separator is a slash, the path is
+ * assumed to be normalized and slashes remain slashes
+ * and dots remains dots. */
+
+ if (n && *n == '.')
+ /* Dots become slashes and slashes become dots. Fun. */
+ do {
+ if (*n == '.')
+ *n = '/';
+ else
+ *n = '.';
+
+ n = strpbrk(n + 1, "/.");
+ } while (n);
+
+ path_simplify(s, true);
+
+ /* Kill the leading slash, but keep the first character of the string in the same place. */
+ if (*s == '/' && *(s+1))
+ memmove(s, s+1, strlen(s));
+
+ return s;
+}
+
+int sysctl_write(const char *property, const char *value) {
+ char *p;
+ _cleanup_close_ int fd = -1;
+
+ assert(property);
+ assert(value);
+
+ log_debug("Setting '%s' to '%.*s'.", property, (int) strcspn(value, NEWLINE), value);
+
+ p = strjoina("/proc/sys/", property);
+ fd = open(p, O_WRONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (!endswith(value, "\n"))
+ value = strjoina(value, "\n");
+
+ if (write(fd, value, strlen(value)) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int sysctl_writef(const char *property, const char *format, ...) {
+ _cleanup_free_ char *v = NULL;
+ va_list ap;
+ int r;
+
+ va_start(ap, format);
+ r = vasprintf(&v, format, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return -ENOMEM;
+
+ return sysctl_write(property, v);
+}
+
+int sysctl_write_ip_property(int af, const char *ifname, const char *property, const char *value) {
+ const char *p;
+
+ assert(IN_SET(af, AF_INET, AF_INET6));
+ assert(property);
+ assert(value);
+
+ p = strjoina("/proc/sys/net/ipv", af == AF_INET ? "4" : "6",
+ ifname ? "/conf/" : "", strempty(ifname),
+ property[0] == '/' ? "" : "/", property);
+
+ log_debug("Setting '%s' to '%s'", p, value);
+
+ return write_string_file(p, value, WRITE_STRING_FILE_VERIFY_ON_FAILURE | WRITE_STRING_FILE_DISABLE_BUFFER);
+}
+
+int sysctl_read(const char *property, char **content) {
+ char *p;
+
+ assert(property);
+ assert(content);
+
+ p = strjoina("/proc/sys/", property);
+ return read_full_file(p, content, NULL);
+}
+
+int sysctl_read_ip_property(int af, const char *ifname, const char *property, char **ret) {
+ _cleanup_free_ char *value = NULL;
+ const char *p;
+ int r;
+
+ assert(IN_SET(af, AF_INET, AF_INET6));
+ assert(property);
+
+ p = strjoina("/proc/sys/net/ipv", af == AF_INET ? "4" : "6",
+ ifname ? "/conf/" : "", strempty(ifname),
+ property[0] == '/' ? "" : "/", property);
+
+ r = read_one_line_file(p, &value);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = TAKE_PTR(value);
+
+ return r;
+}
diff --git a/src/shared/sysctl-util.h b/src/shared/sysctl-util.h
new file mode 100644
index 0000000..3236419
--- /dev/null
+++ b/src/shared/sysctl-util.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "macro.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+char *sysctl_normalize(char *s);
+int sysctl_read(const char *property, char **value);
+int sysctl_write(const char *property, const char *value);
+int sysctl_writef(const char *property, const char *format, ...) _printf_(2, 3);
+
+int sysctl_read_ip_property(int af, const char *ifname, const char *property, char **ret);
+int sysctl_write_ip_property(int af, const char *ifname, const char *property, const char *value);
+static inline int sysctl_write_ip_property_boolean(int af, const char *ifname, const char *property, bool value) {
+ return sysctl_write_ip_property(af, ifname, property, one_zero(value));
+}
+
+#define DEFINE_SYSCTL_WRITE_IP_PROPERTY(name, type, format) \
+ static inline int sysctl_write_ip_property_##name(int af, const char *ifname, const char *property, type value) { \
+ char buf[DECIMAL_STR_MAX(type)]; \
+ xsprintf(buf, format, value); \
+ return sysctl_write_ip_property(af, ifname, property, buf); \
+ }
+
+DEFINE_SYSCTL_WRITE_IP_PROPERTY(int, int, "%i");
+DEFINE_SYSCTL_WRITE_IP_PROPERTY(uint32, uint32_t, "%" PRIu32);
diff --git a/src/shared/test-tables.h b/src/shared/test-tables.h
new file mode 100644
index 0000000..bb8177b
--- /dev/null
+++ b/src/shared/test-tables.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef const char* (*lookup_t)(int);
+typedef int (*reverse_t)(const char*);
+
+static inline void _test_table(const char *name,
+ lookup_t lookup,
+ reverse_t reverse,
+ int size,
+ bool sparse) {
+ int i, boring = 0;
+
+ for (i = -1; i < size + 1; i++) {
+ const char* val = lookup(i);
+ int rev;
+
+ if (val) {
+ rev = reverse(val);
+ boring = 0;
+ } else {
+ rev = reverse("--no-such--value----");
+ boring += i >= 0;
+ }
+
+ if (boring < 1 || i == size)
+ printf("%s: %d → %s → %d\n", name, i, val, rev);
+ else if (boring == 1)
+ printf("%*s ...\n", (int) strlen(name), "");
+
+ assert_se(!(i >= 0 && i < size ?
+ sparse ? rev != i && rev != -1 : val == NULL || rev != i :
+ val != NULL || rev != -1));
+ }
+}
+
+#define test_table(lower, upper) \
+ _test_table(STRINGIFY(lower), lower##_to_string, lower##_from_string, _##upper##_MAX, false)
+
+#define test_table_sparse(lower, upper) \
+ _test_table(STRINGIFY(lower), lower##_to_string, lower##_from_string, _##upper##_MAX, true)
diff --git a/src/shared/tests.c b/src/shared/tests.c
new file mode 100644
index 0000000..ab7d799
--- /dev/null
+++ b/src/shared/tests.c
@@ -0,0 +1,343 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sched.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <util.h>
+
+/* When we include libgen.h because we need dirname() we immediately
+ * undefine basename() since libgen.h defines it as a macro to the POSIX
+ * version which is really broken. We prefer GNU basename(). */
+#include <libgen.h>
+#undef basename
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "bus-util.h"
+#include "bus-wait-for-jobs.h"
+#include "cgroup-setup.h"
+#include "cgroup-util.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "namespace-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "strv.h"
+#include "tests.h"
+
+char* setup_fake_runtime_dir(void) {
+ char t[] = "/tmp/fake-xdg-runtime-XXXXXX", *p;
+
+ assert_se(mkdtemp(t));
+ assert_se(setenv("XDG_RUNTIME_DIR", t, 1) >= 0);
+ assert_se(p = strdup(t));
+
+ return p;
+}
+
+static void load_testdata_env(void) {
+ static bool called = false;
+ _cleanup_free_ char *s = NULL;
+ _cleanup_free_ char *envpath = NULL;
+ _cleanup_strv_free_ char **pairs = NULL;
+ char **k, **v;
+
+ if (called)
+ return;
+ called = true;
+
+ assert_se(readlink_and_make_absolute("/proc/self/exe", &s) >= 0);
+ dirname(s);
+
+ envpath = path_join(s, "systemd-runtest.env");
+ if (load_env_file_pairs(NULL, envpath, &pairs) < 0)
+ return;
+
+ STRV_FOREACH_PAIR(k, v, pairs)
+ setenv(*k, *v, 0);
+}
+
+int get_testdata_dir(const char *suffix, char **ret) {
+ const char *dir;
+ char *p;
+
+ load_testdata_env();
+
+ /* if the env var is set, use that */
+ dir = getenv("SYSTEMD_TEST_DATA");
+ if (!dir)
+ dir = SYSTEMD_TEST_DATA;
+ if (access(dir, F_OK) < 0)
+ return log_error_errno(errno, "ERROR: $SYSTEMD_TEST_DATA directory [%s] not accessible: %m", dir);
+
+ p = path_join(dir, suffix);
+ if (!p)
+ return log_oom();
+
+ *ret = p;
+ return 0;
+}
+
+const char* get_catalog_dir(void) {
+ const char *env;
+
+ load_testdata_env();
+
+ /* if the env var is set, use that */
+ env = getenv("SYSTEMD_CATALOG_DIR");
+ if (!env)
+ env = SYSTEMD_CATALOG_DIR;
+ if (access(env, F_OK) < 0) {
+ fprintf(stderr, "ERROR: $SYSTEMD_CATALOG_DIR directory [%s] does not exist\n", env);
+ exit(EXIT_FAILURE);
+ }
+ return env;
+}
+
+bool slow_tests_enabled(void) {
+ int r;
+
+ r = getenv_bool("SYSTEMD_SLOW_TESTS");
+ if (r >= 0)
+ return r;
+
+ if (r != -ENXIO)
+ log_warning_errno(r, "Cannot parse $SYSTEMD_SLOW_TESTS, ignoring.");
+ return SYSTEMD_SLOW_TESTS_DEFAULT;
+}
+
+void test_setup_logging(int level) {
+ log_set_max_level(level);
+ log_parse_environment();
+ log_open();
+}
+
+int log_tests_skipped(const char *message) {
+ log_notice("%s: %s, skipping tests.",
+ program_invocation_short_name, message);
+ return EXIT_TEST_SKIP;
+}
+
+int log_tests_skipped_errno(int r, const char *message) {
+ log_notice_errno(r, "%s: %s, skipping tests: %m",
+ program_invocation_short_name, message);
+ return EXIT_TEST_SKIP;
+}
+
+bool have_namespaces(void) {
+ siginfo_t si = {};
+ pid_t pid;
+
+ /* Checks whether namespaces are available. In some cases they aren't. We do this by calling unshare(), and we
+ * do so in a child process in order not to affect our own process. */
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ /* child */
+ if (detach_mount_namespace() < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(waitid(P_PID, pid, &si, WEXITED) >= 0);
+ assert_se(si.si_code == CLD_EXITED);
+
+ if (si.si_status == EXIT_SUCCESS)
+ return true;
+
+ if (si.si_status == EXIT_FAILURE)
+ return false;
+
+ assert_not_reached("unexpected exit code");
+}
+
+bool can_memlock(void) {
+ /* Let's see if we can mlock() a larger blob of memory. BPF programs are charged against
+ * RLIMIT_MEMLOCK, hence let's first make sure we can lock memory at all, and skip the test if we
+ * cannot. Why not check RLIMIT_MEMLOCK explicitly? Because in container environments the
+ * RLIMIT_MEMLOCK value we see might not match the RLIMIT_MEMLOCK value actually in effect. */
+
+ void *p = mmap(NULL, CAN_MEMLOCK_SIZE, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0);
+ if (p == MAP_FAILED)
+ return false;
+
+ bool b = mlock(p, CAN_MEMLOCK_SIZE) >= 0;
+ if (b)
+ assert_se(munlock(p, CAN_MEMLOCK_SIZE) >= 0);
+
+ assert_se(munmap(p, CAN_MEMLOCK_SIZE) >= 0);
+ return b;
+}
+
+static int allocate_scope(void) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *scope = NULL;
+ const char *object;
+ int r;
+
+ /* Let's try to run this test in a scope of its own, with delegation turned on, so that PID 1 doesn't
+ * interfere with our cgroup management. */
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to system bus: %m");
+
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_oom();
+
+ if (asprintf(&scope, "%s-%" PRIx64 ".scope", program_invocation_short_name, random_u64()) < 0)
+ return log_oom();
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, "StartTransientUnit");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Name and Mode */
+ r = sd_bus_message_append(m, "ss", scope, "fail");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Properties */
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "(sv)", "PIDs", "au", 1, (uint32_t) getpid_cached());
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "(sv)", "Delegate", "b", 1);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "(sv)", "CollectMode", "s", "inactive-or-failed");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Auxiliary units */
+ r = sd_bus_message_append(m, "a(sa(sv))", 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start transient scope unit: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "o", &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = bus_wait_for_jobs_one(w, object, false);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int enter_cgroup(char **ret_cgroup, bool enter_subroot) {
+ _cleanup_free_ char *cgroup_root = NULL, *cgroup_subroot = NULL;
+ CGroupMask supported;
+ int r;
+
+ r = allocate_scope();
+ if (r < 0)
+ log_warning_errno(r, "Couldn't allocate a scope unit for this test, proceeding without.");
+
+ r = cg_pid_get_path(NULL, 0, &cgroup_root);
+ if (r == -ENOMEDIUM)
+ return log_warning_errno(r, "cg_pid_get_path(NULL, 0, ...) failed: %m");
+ assert(r >= 0);
+
+ if (enter_subroot)
+ assert_se(asprintf(&cgroup_subroot, "%s/%" PRIx64, cgroup_root, random_u64()) >= 0);
+ else {
+ cgroup_subroot = strdup(cgroup_root);
+ assert_se(cgroup_subroot != NULL);
+ }
+
+ assert_se(cg_mask_supported(&supported) >= 0);
+
+ /* If this fails, then we don't mind as the later cgroup operations will fail too, and it's fine if
+ * we handle any errors at that point. */
+
+ r = cg_create_everywhere(supported, _CGROUP_MASK_ALL, cgroup_subroot);
+ if (r < 0)
+ return r;
+
+ r = cg_attach_everywhere(supported, cgroup_subroot, 0, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ if (ret_cgroup)
+ *ret_cgroup = TAKE_PTR(cgroup_subroot);
+
+ return 0;
+}
+
+int enter_cgroup_subroot(char **ret_cgroup) {
+ return enter_cgroup(ret_cgroup, true);
+}
+
+int enter_cgroup_root(char **ret_cgroup) {
+ return enter_cgroup(ret_cgroup, false);
+}
+
+const char *ci_environment(void) {
+ /* We return a string because we might want to provide multiple bits of information later on: not
+ * just the general CI environment type, but also whether we're sanitizing or not, etc. The caller is
+ * expected to use strstr on the returned value. */
+ static const char *ans = POINTER_MAX;
+ const char *p;
+ int r;
+
+ if (ans != POINTER_MAX)
+ return ans;
+
+ /* We allow specifying the environment with $CITYPE. Nobody uses this so far, but we are ready. */
+ p = getenv("CITYPE");
+ if (!isempty(p))
+ return (ans = p);
+
+ if (getenv_bool("TRAVIS") > 0)
+ return (ans = "travis");
+ if (getenv_bool("SEMAPHORE") > 0)
+ return (ans = "semaphore");
+ if (getenv_bool("GITHUB_ACTIONS") > 0)
+ return (ans = "github-actions");
+ if (getenv("AUTOPKGTEST_ARTIFACTS") || getenv("AUTOPKGTEST_TMP"))
+ return (ans = "autopkgtest");
+
+ FOREACH_STRING(p, "CI", "CONTINOUS_INTEGRATION") {
+ /* Those vars are booleans according to Semaphore and Travis docs:
+ * https://docs.travis-ci.com/user/environment-variables/#default-environment-variables
+ * https://docs.semaphoreci.com/ci-cd-environment/environment-variables/#ci
+ */
+ r = getenv_bool(p);
+ if (r > 0)
+ return (ans = "unknown"); /* Some other unknown thing */
+ if (r == 0)
+ return (ans = NULL);
+ }
+
+ return (ans = NULL);
+}
diff --git a/src/shared/tests.h b/src/shared/tests.h
new file mode 100644
index 0000000..c135076
--- /dev/null
+++ b/src/shared/tests.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-daemon.h"
+
+#include "macro.h"
+
+static inline bool manager_errno_skip_test(int r) {
+ return IN_SET(abs(r),
+ EPERM,
+ EACCES,
+ EADDRINUSE,
+ EHOSTDOWN,
+ ENOENT,
+ ENOMEDIUM /* cannot determine cgroup */
+ );
+}
+
+char* setup_fake_runtime_dir(void);
+int enter_cgroup_subroot(char **ret_cgroup);
+int enter_cgroup_root(char **ret_cgroup);
+int get_testdata_dir(const char *suffix, char **ret);
+const char* get_catalog_dir(void);
+bool slow_tests_enabled(void);
+void test_setup_logging(int level);
+int log_tests_skipped(const char *message);
+int log_tests_skipped_errno(int r, const char *message);
+
+bool have_namespaces(void);
+
+/* We use the small but non-trivial limit here */
+#define CAN_MEMLOCK_SIZE (512 * 1024U)
+bool can_memlock(void);
+
+#define TEST_REQ_RUNNING_SYSTEMD(x) \
+ if (sd_booted() > 0) { \
+ x; \
+ } else { \
+ printf("systemd not booted skipping '%s'\n", #x); \
+ }
+
+/* Provide a convenient way to check if we're running in CI. */
+const char *ci_environment(void);
diff --git a/src/shared/tmpfile-util-label.c b/src/shared/tmpfile-util-label.c
new file mode 100644
index 0000000..d37c0b0
--- /dev/null
+++ b/src/shared/tmpfile-util-label.c
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/stat.h>
+
+#include "selinux-util.h"
+#include "tmpfile-util-label.h"
+#include "tmpfile-util.h"
+
+int fopen_temporary_label(
+ const char *target,
+ const char *path,
+ FILE **f,
+ char **temp_path) {
+
+ int r;
+
+ r = mac_selinux_create_file_prepare(target, S_IFREG);
+ if (r < 0)
+ return r;
+
+ r = fopen_temporary(path, f, temp_path);
+
+ mac_selinux_create_file_clear();
+
+ return r;
+}
diff --git a/src/shared/tmpfile-util-label.h b/src/shared/tmpfile-util-label.h
new file mode 100644
index 0000000..01afc06
--- /dev/null
+++ b/src/shared/tmpfile-util-label.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdio.h>
+
+/* These functions are split out of tmpfile-util.h (and not for example just flags to the functions they wrap) in order
+ * to optimize linking: This way, -lselinux is needed only for the callers of these functions that need selinux, but
+ * not for all */
+
+int fopen_temporary_label(const char *target, const char *path, FILE **f, char **temp_path);
diff --git a/src/shared/tomoyo-util.c b/src/shared/tomoyo-util.c
new file mode 100644
index 0000000..2347179
--- /dev/null
+++ b/src/shared/tomoyo-util.c
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "tomoyo-util.h"
+
+bool mac_tomoyo_use(void) {
+ static int cached_use = -1;
+
+ if (cached_use < 0)
+ cached_use = (access("/sys/kernel/security/tomoyo/version",
+ F_OK) == 0);
+
+ return cached_use;
+}
diff --git a/src/shared/tomoyo-util.h b/src/shared/tomoyo-util.h
new file mode 100644
index 0000000..a6ee7d4
--- /dev/null
+++ b/src/shared/tomoyo-util.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+bool mac_tomoyo_use(void);
diff --git a/src/shared/udev-util.c b/src/shared/udev-util.c
new file mode 100644
index 0000000..030922e
--- /dev/null
+++ b/src/shared/udev-util.c
@@ -0,0 +1,371 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "device-util.h"
+#include "env-file.h"
+#include "escape.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "signal-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "udev-util.h"
+#include "utf8.h"
+
+static const char* const resolve_name_timing_table[_RESOLVE_NAME_TIMING_MAX] = {
+ [RESOLVE_NAME_NEVER] = "never",
+ [RESOLVE_NAME_LATE] = "late",
+ [RESOLVE_NAME_EARLY] = "early",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(resolve_name_timing, ResolveNameTiming);
+
+int udev_parse_config_full(
+ unsigned *ret_children_max,
+ usec_t *ret_exec_delay_usec,
+ usec_t *ret_event_timeout_usec,
+ ResolveNameTiming *ret_resolve_name_timing,
+ int *ret_timeout_signal) {
+
+ _cleanup_free_ char *log_val = NULL, *children_max = NULL, *exec_delay = NULL, *event_timeout = NULL, *resolve_names = NULL, *timeout_signal = NULL;
+ int r;
+
+ r = parse_env_file(NULL, "/etc/udev/udev.conf",
+ "udev_log", &log_val,
+ "children_max", &children_max,
+ "exec_delay", &exec_delay,
+ "event_timeout", &event_timeout,
+ "resolve_names", &resolve_names,
+ "timeout_signal", &timeout_signal);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ if (log_val) {
+ const char *log;
+ size_t n;
+
+ /* unquote */
+ n = strlen(log_val);
+ if (n >= 2 &&
+ ((log_val[0] == '"' && log_val[n-1] == '"') ||
+ (log_val[0] == '\'' && log_val[n-1] == '\''))) {
+ log_val[n - 1] = '\0';
+ log = log_val + 1;
+ } else
+ log = log_val;
+
+ /* we set the udev log level here explicitly, this is supposed
+ * to regulate the code in libudev/ and udev/. */
+ r = log_set_max_level_from_string_realm(LOG_REALM_UDEV, log);
+ if (r < 0)
+ log_syntax(NULL, LOG_WARNING, "/etc/udev/udev.conf", 0, r,
+ "failed to set udev log level '%s', ignoring: %m", log);
+ }
+
+ if (ret_children_max && children_max) {
+ r = safe_atou(children_max, ret_children_max);
+ if (r < 0)
+ log_syntax(NULL, LOG_WARNING, "/etc/udev/udev.conf", 0, r,
+ "failed to parse children_max=%s, ignoring: %m", children_max);
+ }
+
+ if (ret_exec_delay_usec && exec_delay) {
+ r = parse_sec(exec_delay, ret_exec_delay_usec);
+ if (r < 0)
+ log_syntax(NULL, LOG_WARNING, "/etc/udev/udev.conf", 0, r,
+ "failed to parse exec_delay=%s, ignoring: %m", exec_delay);
+ }
+
+ if (ret_event_timeout_usec && event_timeout) {
+ r = parse_sec(event_timeout, ret_event_timeout_usec);
+ if (r < 0)
+ log_syntax(NULL, LOG_WARNING, "/etc/udev/udev.conf", 0, r,
+ "failed to parse event_timeout=%s, ignoring: %m", event_timeout);
+ }
+
+ if (ret_resolve_name_timing && resolve_names) {
+ ResolveNameTiming t;
+
+ t = resolve_name_timing_from_string(resolve_names);
+ if (t < 0)
+ log_syntax(NULL, LOG_WARNING, "/etc/udev/udev.conf", 0, r,
+ "failed to parse resolve_names=%s, ignoring.", resolve_names);
+ else
+ *ret_resolve_name_timing = t;
+ }
+
+ if (ret_timeout_signal && timeout_signal) {
+ r = signal_from_string(timeout_signal);
+ if (r < 0)
+ log_syntax(NULL, LOG_WARNING, "/etc/udev/udev.conf", 0, r,
+ "failed to parse timeout_signal=%s, ignoring: %m", timeout_signal);
+ else
+ *ret_timeout_signal = r;
+ }
+
+ return 0;
+}
+
+/* Note that if -ENOENT is returned, it will be logged at debug level rather than error,
+ * because it's an expected, common occurrence that the caller will handle with a fallback */
+static int device_new_from_dev_path(const char *devlink, sd_device **ret_device) {
+ struct stat st;
+ int r;
+
+ assert(devlink);
+
+ if (stat(devlink, &st) < 0)
+ return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to stat() %s: %m", devlink);
+
+ if (!S_ISBLK(st.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK),
+ "%s does not point to a block device: %m", devlink);
+
+ r = sd_device_new_from_devnum(ret_device, 'b', st.st_rdev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize device from %s: %m", devlink);
+
+ return 0;
+}
+
+struct DeviceMonitorData {
+ const char *sysname;
+ const char *devlink;
+ sd_device *device;
+};
+
+static void device_monitor_data_free(struct DeviceMonitorData *d) {
+ assert(d);
+
+ sd_device_unref(d->device);
+}
+
+static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) {
+ struct DeviceMonitorData *data = userdata;
+ const char *sysname;
+
+ assert(device);
+ assert(data);
+ assert(data->sysname || data->devlink);
+ assert(!data->device);
+
+ /* Ignore REMOVE events here. We are waiting for initialization after all, not de-initialization. We
+ * might see a REMOVE event from an earlier use of the device (devices by the same name are recycled
+ * by the kernel after all), which we should not get confused by. After all we cannot distinguish use
+ * cycles of the devices, as the udev queue is entirely asynchronous.
+ *
+ * If we see a REMOVE event here for the use cycle we actually care about then we won't notice of
+ * course, but that should be OK, given the timeout logic used on the wait loop: this will be noticed
+ * by means of -ETIMEDOUT. Thus we won't notice immediately, but eventually, and that should be
+ * sufficient for an error path that should regularly not happen.
+ *
+ * (And yes, we only need to special case REMOVE. It's the only "negative" event type, where a device
+ * ceases to exist. All other event types are "positive": the device exists and is registered in the
+ * udev database, thus whenever we see the event, we can consider it initialized.) */
+ if (device_for_action(device, DEVICE_ACTION_REMOVE))
+ return 0;
+
+ if (data->sysname && sd_device_get_sysname(device, &sysname) >= 0 && streq(sysname, data->sysname))
+ goto found;
+
+ if (data->devlink) {
+ const char *devlink;
+
+ FOREACH_DEVICE_DEVLINK(device, devlink)
+ if (path_equal(devlink, data->devlink))
+ goto found;
+
+ if (sd_device_get_devname(device, &devlink) >= 0 && path_equal(devlink, data->devlink))
+ goto found;
+ }
+
+ return 0;
+
+found:
+ data->device = sd_device_ref(device);
+ return sd_event_exit(sd_device_monitor_get_event(monitor), 0);
+}
+
+static int device_wait_for_initialization_internal(
+ sd_device *_device,
+ const char *devlink,
+ const char *subsystem,
+ usec_t deadline,
+ sd_device **ret) {
+
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor = NULL;
+ _cleanup_(sd_event_source_unrefp) sd_event_source *timeout_source = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ /* Ensure that if !_device && devlink, device gets unrefd on errors since it will be new */
+ _cleanup_(sd_device_unrefp) sd_device *device = sd_device_ref(_device);
+ _cleanup_(device_monitor_data_free) struct DeviceMonitorData data = {
+ .devlink = devlink,
+ };
+ int r;
+
+ assert(device || (subsystem && devlink));
+
+ /* Devlink might already exist, if it does get the device to use the sysname filtering */
+ if (!device && devlink) {
+ r = device_new_from_dev_path(devlink, &device);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ }
+
+ if (device) {
+ if (sd_device_get_is_initialized(device) > 0) {
+ if (ret)
+ *ret = sd_device_ref(device);
+ return 0;
+ }
+ /* We need either the sysname or the devlink for filtering */
+ assert_se(sd_device_get_sysname(device, &data.sysname) >= 0 || devlink);
+ }
+
+ /* Wait until the device is initialized, so that we can get access to the ID_PATH property */
+
+ r = sd_event_new(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get default event: %m");
+
+ r = sd_device_monitor_new(&monitor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire monitor: %m");
+
+ if (device && !subsystem) {
+ r = sd_device_get_subsystem(device, &subsystem);
+ if (r < 0 && r != -ENOENT)
+ return log_device_error_errno(device, r, "Failed to get subsystem: %m");
+ }
+
+ if (subsystem) {
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(monitor, subsystem, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add %s subsystem match to monitor: %m", subsystem);
+ }
+
+ r = sd_device_monitor_attach_event(monitor, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach event to device monitor: %m");
+
+ r = sd_device_monitor_start(monitor, device_monitor_handler, &data);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start device monitor: %m");
+
+ if (deadline != USEC_INFINITY) {
+ r = sd_event_add_time(
+ event, &timeout_source,
+ CLOCK_MONOTONIC, deadline, 0,
+ NULL, INT_TO_PTR(-ETIMEDOUT));
+ if (r < 0)
+ return log_error_errno(r, "Failed to add timeout event source: %m");
+ }
+
+ /* Check again, maybe things changed. Udev will re-read the db if the device wasn't initialized
+ * yet. */
+ if (!device && devlink) {
+ r = device_new_from_dev_path(devlink, &device);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ }
+ if (device && sd_device_get_is_initialized(device) > 0) {
+ if (ret)
+ *ret = sd_device_ref(device);
+ return 0;
+ }
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait for device to be initialized: %m");
+
+ if (ret)
+ *ret = TAKE_PTR(data.device);
+ return 0;
+}
+
+int device_wait_for_initialization(sd_device *device, const char *subsystem, usec_t deadline, sd_device **ret) {
+ return device_wait_for_initialization_internal(device, NULL, subsystem, deadline, ret);
+}
+
+int device_wait_for_devlink(const char *devlink, const char *subsystem, usec_t deadline, sd_device **ret) {
+ return device_wait_for_initialization_internal(NULL, devlink, subsystem, deadline, ret);
+}
+
+int device_is_renaming(sd_device *dev) {
+ int r;
+
+ assert(dev);
+
+ r = sd_device_get_property_value(dev, "ID_RENAMING", NULL);
+ if (r == -ENOENT)
+ return false;
+ if (r < 0)
+ return r;
+
+ return true;
+}
+
+bool device_for_action(sd_device *dev, DeviceAction action) {
+ DeviceAction a;
+
+ assert(dev);
+
+ if (device_get_action(dev, &a) < 0)
+ return false;
+
+ return a == action;
+}
+
+int udev_rule_parse_value(char *str, char **ret_value, char **ret_endpos) {
+ char *i, *j;
+ int r;
+ bool is_escaped;
+
+ /* value must be double quotated */
+ is_escaped = str[0] == 'e';
+ str += is_escaped;
+ if (str[0] != '"')
+ return -EINVAL;
+ str++;
+
+ if (!is_escaped) {
+ /* unescape double quotation '\"'->'"' */
+ for (i = j = str; *i != '"'; i++, j++) {
+ if (*i == '\0')
+ return -EINVAL;
+ if (i[0] == '\\' && i[1] == '"')
+ i++;
+ *j = *i;
+ }
+ j[0] = '\0';
+ } else {
+ _cleanup_free_ char *unescaped = NULL;
+
+ /* find the end position of value */
+ for (i = str; *i != '"'; i++) {
+ if (i[0] == '\\')
+ i++;
+ if (*i == '\0')
+ return -EINVAL;
+ }
+ i[0] = '\0';
+
+ r = cunescape_length(str, i - str, 0, &unescaped);
+ if (r < 0)
+ return r;
+ assert(r <= i - str);
+ memcpy(str, unescaped, r + 1);
+ }
+
+ *ret_value = str;
+ *ret_endpos = i + 1;
+ return 0;
+}
diff --git a/src/shared/udev-util.h b/src/shared/udev-util.h
new file mode 100644
index 0000000..270861e
--- /dev/null
+++ b/src/shared/udev-util.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-device.h"
+
+#include "device-private.h"
+#include "time-util.h"
+
+typedef enum ResolveNameTiming {
+ RESOLVE_NAME_NEVER,
+ RESOLVE_NAME_LATE,
+ RESOLVE_NAME_EARLY,
+ _RESOLVE_NAME_TIMING_MAX,
+ _RESOLVE_NAME_TIMING_INVALID = -1,
+} ResolveNameTiming;
+
+ResolveNameTiming resolve_name_timing_from_string(const char *s) _pure_;
+const char *resolve_name_timing_to_string(ResolveNameTiming i) _const_;
+
+int udev_parse_config_full(
+ unsigned *ret_children_max,
+ usec_t *ret_exec_delay_usec,
+ usec_t *ret_event_timeout_usec,
+ ResolveNameTiming *ret_resolve_name_timing,
+ int *ret_timeout_signal);
+
+static inline int udev_parse_config(void) {
+ return udev_parse_config_full(NULL, NULL, NULL, NULL, NULL);
+}
+
+int device_wait_for_initialization(sd_device *device, const char *subsystem, usec_t deadline, sd_device **ret);
+int device_wait_for_devlink(const char *path, const char *subsystem, usec_t deadline, sd_device **ret);
+int device_is_renaming(sd_device *dev);
+bool device_for_action(sd_device *dev, DeviceAction action);
+
+int udev_rule_parse_value(char *str, char **ret_value, char **ret_endpos);
diff --git a/src/shared/uid-range.c b/src/shared/uid-range.c
new file mode 100644
index 0000000..5d5bf7f
--- /dev/null
+++ b/src/shared/uid-range.c
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "sort-util.h"
+#include "uid-range.h"
+#include "user-util.h"
+
+static bool uid_range_intersect(UidRange *range, uid_t start, uid_t nr) {
+ assert(range);
+
+ return range->start <= start + nr &&
+ range->start + range->nr >= start;
+}
+
+static void uid_range_coalesce(UidRange **p, unsigned *n) {
+ assert(p);
+ assert(n);
+
+ for (unsigned i = 0; i < *n; i++) {
+ for (unsigned j = i + 1; j < *n; j++) {
+ UidRange *x = (*p)+i, *y = (*p)+j;
+
+ if (uid_range_intersect(x, y->start, y->nr)) {
+ uid_t begin, end;
+
+ begin = MIN(x->start, y->start);
+ end = MAX(x->start + x->nr, y->start + y->nr);
+
+ x->start = begin;
+ x->nr = end - begin;
+
+ if (*n > j+1)
+ memmove(y, y+1, sizeof(UidRange) * (*n - j -1));
+
+ (*n)--;
+ j--;
+ }
+ }
+ }
+}
+
+static int uid_range_compare(const UidRange *a, const UidRange *b) {
+ int r;
+
+ r = CMP(a->start, b->start);
+ if (r != 0)
+ return r;
+
+ return CMP(a->nr, b->nr);
+}
+
+int uid_range_add(UidRange **p, unsigned *n, uid_t start, uid_t nr) {
+ bool found = false;
+ UidRange *x;
+
+ assert(p);
+ assert(n);
+
+ if (nr <= 0)
+ return 0;
+
+ for (unsigned i = 0; i < *n; i++) {
+ x = (*p) + i;
+ if (uid_range_intersect(x, start, nr)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) {
+ uid_t begin, end;
+
+ begin = MIN(x->start, start);
+ end = MAX(x->start + x->nr, start + nr);
+
+ x->start = begin;
+ x->nr = end - begin;
+ } else {
+ UidRange *t;
+
+ t = reallocarray(*p, *n + 1, sizeof(UidRange));
+ if (!t)
+ return -ENOMEM;
+
+ *p = t;
+ x = t + ((*n) ++);
+
+ x->start = start;
+ x->nr = nr;
+ }
+
+ typesafe_qsort(*p, *n, uid_range_compare);
+ uid_range_coalesce(p, n);
+
+ return *n;
+}
+
+int uid_range_add_str(UidRange **p, unsigned *n, const char *s) {
+ uid_t start, nr;
+ const char *t;
+ int r;
+
+ assert(p);
+ assert(n);
+ assert(s);
+
+ t = strchr(s, '-');
+ if (t) {
+ char *b;
+ uid_t end;
+
+ b = strndupa(s, t - s);
+ r = parse_uid(b, &start);
+ if (r < 0)
+ return r;
+
+ r = parse_uid(t+1, &end);
+ if (r < 0)
+ return r;
+
+ if (end < start)
+ return -EINVAL;
+
+ nr = end - start + 1;
+ } else {
+ r = parse_uid(s, &start);
+ if (r < 0)
+ return r;
+
+ nr = 1;
+ }
+
+ return uid_range_add(p, n, start, nr);
+}
+
+int uid_range_next_lower(const UidRange *p, unsigned n, uid_t *uid) {
+ uid_t closest = UID_INVALID, candidate;
+
+ assert(p);
+ assert(uid);
+
+ candidate = *uid - 1;
+
+ for (unsigned i = 0; i < n; i++) {
+ uid_t begin, end;
+
+ begin = p[i].start;
+ end = p[i].start + p[i].nr - 1;
+
+ if (candidate >= begin && candidate <= end) {
+ *uid = candidate;
+ return 1;
+ }
+
+ if (end < candidate)
+ closest = end;
+ }
+
+ if (closest == UID_INVALID)
+ return -EBUSY;
+
+ *uid = closest;
+ return 1;
+}
+
+bool uid_range_contains(const UidRange *p, unsigned n, uid_t uid) {
+ assert(p);
+ assert(uid);
+
+ for (unsigned i = 0; i < n; i++)
+ if (uid >= p[i].start && uid < p[i].start + p[i].nr)
+ return true;
+
+ return false;
+}
diff --git a/src/shared/uid-range.h b/src/shared/uid-range.h
new file mode 100644
index 0000000..ef168cd
--- /dev/null
+++ b/src/shared/uid-range.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+typedef struct UidRange {
+ uid_t start, nr;
+} UidRange;
+
+int uid_range_add(UidRange **p, unsigned *n, uid_t start, uid_t nr);
+int uid_range_add_str(UidRange **p, unsigned *n, const char *s);
+
+int uid_range_next_lower(const UidRange *p, unsigned n, uid_t *uid);
+bool uid_range_contains(const UidRange *p, unsigned n, uid_t uid);
diff --git a/src/shared/unit-file.c b/src/shared/unit-file.c
new file mode 100644
index 0000000..4c30719
--- /dev/null
+++ b/src/shared/unit-file.c
@@ -0,0 +1,601 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-id128.h"
+
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "path-lookup.h"
+#include "set.h"
+#include "special.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-file.h"
+
+bool unit_type_may_alias(UnitType type) {
+ return IN_SET(type,
+ UNIT_SERVICE,
+ UNIT_SOCKET,
+ UNIT_TARGET,
+ UNIT_DEVICE,
+ UNIT_TIMER,
+ UNIT_PATH);
+}
+
+bool unit_type_may_template(UnitType type) {
+ return IN_SET(type,
+ UNIT_SERVICE,
+ UNIT_SOCKET,
+ UNIT_TARGET,
+ UNIT_TIMER,
+ UNIT_PATH);
+}
+
+int unit_symlink_name_compatible(const char *symlink, const char *target, bool instance_propagation) {
+ _cleanup_free_ char *template = NULL;
+ int r, un_type1, un_type2;
+
+ un_type1 = unit_name_classify(symlink);
+
+ /* The straightforward case: the symlink name matches the target and we have a valid unit */
+ if (streq(symlink, target) &&
+ (un_type1 & (UNIT_NAME_PLAIN | UNIT_NAME_INSTANCE)))
+ return 1;
+
+ r = unit_name_template(symlink, &template);
+ if (r == -EINVAL)
+ return 0; /* Not a template */
+ if (r < 0)
+ return r;
+
+ un_type2 = unit_name_classify(target);
+
+ /* An instance name points to a target that is just the template name */
+ if (un_type1 == UNIT_NAME_INSTANCE &&
+ un_type2 == UNIT_NAME_TEMPLATE &&
+ streq(template, target))
+ return 1;
+
+ /* foo@.target.requires/bar@.service: instance will be propagated */
+ if (instance_propagation &&
+ un_type1 == UNIT_NAME_TEMPLATE &&
+ un_type2 == UNIT_NAME_TEMPLATE &&
+ streq(template, target))
+ return 1;
+
+ return 0;
+}
+
+int unit_validate_alias_symlink_and_warn(const char *filename, const char *target) {
+ const char *src, *dst;
+ _cleanup_free_ char *src_instance = NULL, *dst_instance = NULL;
+ UnitType src_unit_type, dst_unit_type;
+ int src_name_type, dst_name_type;
+
+ /* Check if the *alias* symlink is valid. This applies to symlinks like
+ * /etc/systemd/system/dbus.service → dbus-broker.service, but not to .wants or .requires symlinks
+ * and such. Neither does this apply to symlinks which *link* units, i.e. symlinks to outside of the
+ * unit lookup path.
+ *
+ * -EINVAL is returned if the something is wrong with the source filename or the source unit type is
+ * not allowed to symlink,
+ * -EXDEV if the target filename is not a valid unit name or doesn't match the source.
+ */
+
+ src = basename(filename);
+ dst = basename(target);
+
+ /* src checks */
+
+ src_name_type = unit_name_to_instance(src, &src_instance);
+ if (src_name_type < 0)
+ return log_notice_errno(src_name_type,
+ "%s: not a valid unit name \"%s\": %m", filename, src);
+
+ src_unit_type = unit_name_to_type(src);
+ assert(src_unit_type >= 0); /* unit_name_to_instance() checked the suffix already */
+
+ if (!unit_type_may_alias(src_unit_type))
+ return log_notice_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: symlinks are not allowed for units of this type, rejecting.",
+ filename);
+
+ if (src_name_type != UNIT_NAME_PLAIN &&
+ !unit_type_may_template(src_unit_type))
+ return log_notice_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: templates not allowed for %s units, rejecting.",
+ filename, unit_type_to_string(src_unit_type));
+
+ /* dst checks */
+
+ dst_name_type = unit_name_to_instance(dst, &dst_instance);
+ if (dst_name_type < 0)
+ return log_notice_errno(dst_name_type == -EINVAL ? SYNTHETIC_ERRNO(EXDEV) : dst_name_type,
+ "%s points to \"%s\" which is not a valid unit name: %m",
+ filename, dst);
+
+ if (!(dst_name_type == src_name_type ||
+ (src_name_type == UNIT_NAME_INSTANCE && dst_name_type == UNIT_NAME_TEMPLATE)))
+ return log_notice_errno(SYNTHETIC_ERRNO(EXDEV),
+ "%s: symlink target name type \"%s\" does not match source, rejecting.",
+ filename, dst);
+
+ if (dst_name_type == UNIT_NAME_INSTANCE) {
+ assert(src_instance);
+ assert(dst_instance);
+ if (!streq(src_instance, dst_instance))
+ return log_notice_errno(SYNTHETIC_ERRNO(EXDEV),
+ "%s: unit symlink target \"%s\" instance name doesn't match, rejecting.",
+ filename, dst);
+ }
+
+ dst_unit_type = unit_name_to_type(dst);
+ if (dst_unit_type != src_unit_type)
+ return log_notice_errno(SYNTHETIC_ERRNO(EXDEV),
+ "%s: symlink target \"%s\" has incompatible suffix, rejecting.",
+ filename, dst);
+
+ return 0;
+}
+
+#define FOLLOW_MAX 8
+
+static int unit_ids_map_get(
+ Hashmap *unit_ids_map,
+ const char *unit_name,
+ const char **ret_fragment_path) {
+
+ /* Resolve recursively until we hit an absolute path, i.e. a non-aliased unit.
+ *
+ * We distinguish the case where unit_name was not found in the hashmap at all, and the case where
+ * some symlink was broken.
+ *
+ * If a symlink target points to an instance name, then we also check for the template. */
+
+ const char *id = NULL;
+ int r;
+
+ for (unsigned n = 0; n < FOLLOW_MAX; n++) {
+ const char *t = hashmap_get(unit_ids_map, id ?: unit_name);
+ if (!t) {
+ _cleanup_free_ char *template = NULL;
+
+ if (!id)
+ return -ENOENT;
+
+ r = unit_name_template(id, &template);
+ if (r == -EINVAL)
+ return -ENXIO; /* we failed to find the symlink target */
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine template name for %s: %m", id);
+
+ t = hashmap_get(unit_ids_map, template);
+ if (!t)
+ return -ENXIO;
+
+ /* We successfully switched from instanced name to a template, let's continue */
+ }
+
+ if (path_is_absolute(t)) {
+ if (ret_fragment_path)
+ *ret_fragment_path = t;
+ return 0;
+ }
+
+ id = t;
+ }
+
+ return -ELOOP;
+}
+
+static bool lookup_paths_mtime_exclude(const LookupPaths *lp, const char *path) {
+ /* Paths that are under our exclusive control. Users shall not alter those directly. */
+
+ return streq_ptr(path, lp->generator) ||
+ streq_ptr(path, lp->generator_early) ||
+ streq_ptr(path, lp->generator_late) ||
+ streq_ptr(path, lp->transient) ||
+ streq_ptr(path, lp->persistent_control) ||
+ streq_ptr(path, lp->runtime_control);
+}
+
+#define HASH_KEY SD_ID128_MAKE(4e,86,1b,e3,39,b3,40,46,98,5d,b8,11,34,8f,c3,c1)
+
+bool lookup_paths_timestamp_hash_same(const LookupPaths *lp, uint64_t timestamp_hash, uint64_t *ret_new) {
+ struct siphash state;
+
+ siphash24_init(&state, HASH_KEY.bytes);
+
+ char **dir;
+ STRV_FOREACH(dir, (char**) lp->search_path) {
+ struct stat st;
+
+ if (lookup_paths_mtime_exclude(lp, *dir))
+ continue;
+
+ /* Determine the latest lookup path modification time */
+ if (stat(*dir, &st) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ log_debug_errno(errno, "Failed to stat %s, ignoring: %m", *dir);
+ continue;
+ }
+
+ siphash24_compress_usec_t(timespec_load(&st.st_mtim), &state);
+ }
+
+ uint64_t updated = siphash24_finalize(&state);
+ if (ret_new)
+ *ret_new = updated;
+ if (updated != timestamp_hash)
+ log_debug("Modification times have changed, need to update cache.");
+ return updated == timestamp_hash;
+}
+
+int unit_file_build_name_map(
+ const LookupPaths *lp,
+ uint64_t *cache_timestamp_hash,
+ Hashmap **unit_ids_map,
+ Hashmap **unit_names_map,
+ Set **path_cache) {
+
+ /* Build two mappings: any name → main unit (i.e. the end result of symlink resolution), unit name →
+ * all aliases (i.e. the entry for a given key is a a list of all names which point to this key). The
+ * key is included in the value iff we saw a file or symlink with that name. In other words, if we
+ * have a key, but it is not present in the value for itself, there was an alias pointing to it, but
+ * the unit itself is not loadable.
+ *
+ * At the same, build a cache of paths where to find units. The non-const parameters are for input
+ * and output. Existing contents will be freed before the new contents are stored.
+ */
+
+ _cleanup_hashmap_free_ Hashmap *ids = NULL, *names = NULL;
+ _cleanup_set_free_free_ Set *paths = NULL;
+ uint64_t timestamp_hash;
+ char **dir;
+ int r;
+
+ /* Before doing anything, check if the timestamp hash that was passed is still valid.
+ * If yes, do nothing. */
+ if (cache_timestamp_hash &&
+ lookup_paths_timestamp_hash_same(lp, *cache_timestamp_hash, &timestamp_hash))
+ return 0;
+
+ /* The timestamp hash is now set based on the mtimes from before when we start reading files.
+ * If anything is modified concurrently, we'll consider the cache outdated. */
+
+ if (path_cache) {
+ paths = set_new(&path_hash_ops_free);
+ if (!paths)
+ return log_oom();
+ }
+
+ STRV_FOREACH(dir, (char**) lp->search_path) {
+ struct dirent *de;
+ _cleanup_closedir_ DIR *d = NULL;
+
+ d = opendir(*dir);
+ if (!d) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to open \"%s\", ignoring: %m", *dir);
+ continue;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, log_warning_errno(errno, "Failed to read \"%s\", ignoring: %m", *dir)) {
+ char *filename;
+ _cleanup_free_ char *_filename_free = NULL, *simplified = NULL;
+ const char *suffix, *dst = NULL;
+ bool valid_unit_name;
+
+ valid_unit_name = unit_name_is_valid(de->d_name, UNIT_NAME_ANY);
+
+ /* We only care about valid units and dirs with certain suffixes, let's ignore the
+ * rest. */
+ if (!valid_unit_name &&
+ !ENDSWITH_SET(de->d_name, ".wants", ".requires", ".d"))
+ continue;
+
+ filename = path_join(*dir, de->d_name);
+ if (!filename)
+ return log_oom();
+
+ if (paths) {
+ r = set_consume(paths, filename);
+ if (r < 0)
+ return log_oom();
+ /* We will still use filename below. This is safe because we know the set
+ * holds a reference. */
+ } else
+ _filename_free = filename; /* Make sure we free the filename. */
+
+ if (!valid_unit_name)
+ continue;
+ assert_se(suffix = strrchr(de->d_name, '.'));
+
+ /* search_path is ordered by priority (highest first). If the name is already mapped
+ * to something (incl. itself), it means that we have already seen it, and we should
+ * ignore it here. */
+ if (hashmap_contains(ids, de->d_name))
+ continue;
+
+ dirent_ensure_type(d, de);
+ if (de->d_type == DT_LNK) {
+ /* We don't explicitly check for alias loops here. unit_ids_map_get() which
+ * limits the number of hops should be used to access the map. */
+
+ _cleanup_free_ char *target = NULL;
+
+ r = readlinkat_malloc(dirfd(d), de->d_name, &target);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to read symlink %s/%s, ignoring: %m",
+ *dir, de->d_name);
+ continue;
+ }
+
+ const bool is_abs = path_is_absolute(target);
+ if (lp->root_dir || !is_abs) {
+ char *target_abs = path_join(is_abs ? lp->root_dir : *dir, target);
+ if (!target_abs)
+ return log_oom();
+
+ free_and_replace(target, target_abs);
+ }
+
+ /* Get rid of "." and ".." components in target path */
+ r = chase_symlinks(target, lp->root_dir, CHASE_NOFOLLOW | CHASE_NONEXISTENT, &simplified, NULL);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to resolve symlink %s pointing to %s, ignoring: %m",
+ filename, target);
+ continue;
+ }
+
+ /* Check if the symlink goes outside of our search path.
+ * If yes, it's a linked unit file or mask, and we don't care about the target name.
+ * Let's just store the link destination directly.
+ * If not, let's verify that it's a good symlink. */
+ char *tail = path_startswith_strv(simplified, lp->search_path);
+ if (tail) {
+ bool self_alias;
+
+ dst = basename(simplified);
+ self_alias = streq(dst, de->d_name);
+
+ if (is_path(tail))
+ log_full(self_alias ? LOG_DEBUG : LOG_WARNING,
+ "Suspicious symlink %s→%s, treating as alias.",
+ filename, simplified);
+
+ r = unit_validate_alias_symlink_and_warn(filename, simplified);
+ if (r < 0)
+ continue;
+
+ if (self_alias) {
+ /* A self-alias that has no effect */
+ log_debug("%s: self-alias: %s/%s → %s, ignoring.",
+ __func__, *dir, de->d_name, dst);
+ continue;
+ }
+
+ log_debug("%s: alias: %s/%s → %s", __func__, *dir, de->d_name, dst);
+ } else {
+ dst = simplified;
+
+ log_debug("%s: linked unit file: %s/%s → %s", __func__, *dir, de->d_name, dst);
+ }
+
+ } else {
+ dst = filename;
+ log_debug("%s: normal unit file: %s", __func__, dst);
+ }
+
+ r = hashmap_put_strdup(&ids, de->d_name, dst);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to add entry to hashmap (%s→%s): %m",
+ de->d_name, dst);
+ }
+ }
+
+ /* Let's also put the names in the reverse db. */
+ const char *dummy, *src;
+ HASHMAP_FOREACH_KEY(dummy, src, ids) {
+ const char *dst;
+
+ r = unit_ids_map_get(ids, src, &dst);
+ if (r < 0)
+ continue;
+
+ if (null_or_empty_path(dst) != 0)
+ continue;
+
+ /* Do not treat instance symlinks that point to the template as aliases */
+ if (unit_name_is_valid(basename(dst), UNIT_NAME_TEMPLATE) &&
+ unit_name_is_valid(src, UNIT_NAME_INSTANCE))
+ continue;
+
+ r = string_strv_hashmap_put(&names, basename(dst), src);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to add entry to hashmap (%s→%s): %m",
+ basename(dst), src);
+ }
+
+ if (cache_timestamp_hash)
+ *cache_timestamp_hash = timestamp_hash;
+
+ hashmap_free_and_replace(*unit_ids_map, ids);
+ hashmap_free_and_replace(*unit_names_map, names);
+ if (path_cache)
+ set_free_and_replace(*path_cache, paths);
+
+ return 1;
+}
+
+int unit_file_find_fragment(
+ Hashmap *unit_ids_map,
+ Hashmap *unit_name_map,
+ const char *unit_name,
+ const char **ret_fragment_path,
+ Set **ret_names) {
+
+ const char *fragment = NULL;
+ _cleanup_free_ char *template = NULL, *instance = NULL;
+ _cleanup_set_free_free_ Set *names = NULL;
+ char **t, **nnn;
+ int r, name_type;
+
+ /* Finds a fragment path, and returns the set of names:
+ * if we have …/foo.service and …/foo-alias.service→foo.service,
+ * and …/foo@.service and …/foo-alias@.service→foo@.service,
+ * and …/foo@inst.service,
+ * this should return:
+ * foo.service → …/foo.service, {foo.service, foo-alias.service},
+ * foo-alias.service → …/foo.service, {foo.service, foo-alias.service},
+ * foo@.service → …/foo@.service, {foo@.service, foo-alias@.service},
+ * foo-alias@.service → …/foo@.service, {foo@.service, foo-alias@.service},
+ * foo@bar.service → …/foo@.service, {foo@bar.service, foo-alias@bar.service},
+ * foo-alias@bar.service → …/foo@.service, {foo@bar.service, foo-alias@bar.service},
+ * foo-alias@inst.service → …/foo@inst.service, {foo@inst.service, foo-alias@inst.service}.
+ */
+
+ name_type = unit_name_to_instance(unit_name, &instance);
+ if (name_type < 0)
+ return name_type;
+
+ names = set_new(&string_hash_ops);
+ if (!names)
+ return -ENOMEM;
+
+ /* The unit always has its own name if it's not a template. */
+ if (IN_SET(name_type, UNIT_NAME_PLAIN, UNIT_NAME_INSTANCE)) {
+ r = set_put_strdup(&names, unit_name);
+ if (r < 0)
+ return r;
+ }
+
+ /* First try to load fragment under the original name */
+ r = unit_ids_map_get(unit_ids_map, unit_name, &fragment);
+ if (r < 0 && !IN_SET(r, -ENOENT, -ENXIO))
+ return log_debug_errno(r, "Cannot load unit %s: %m", unit_name);
+
+ if (fragment) {
+ /* Add any aliases of the original name to the set of names */
+ nnn = hashmap_get(unit_name_map, basename(fragment));
+ STRV_FOREACH(t, nnn) {
+ if (name_type == UNIT_NAME_INSTANCE && unit_name_is_valid(*t, UNIT_NAME_TEMPLATE)) {
+ char *inst;
+
+ r = unit_name_replace_instance(*t, instance, &inst);
+ if (r < 0)
+ return log_debug_errno(r, "Cannot build instance name %s+%s: %m", *t, instance);
+
+ if (!streq(unit_name, inst))
+ log_debug("%s: %s has alias %s", __func__, unit_name, inst);
+
+ log_info("%s: %s+%s → %s", __func__, *t, instance, inst);
+ r = set_consume(names, inst);
+ } else {
+ if (!streq(unit_name, *t))
+ log_debug("%s: %s has alias %s", __func__, unit_name, *t);
+
+ r = set_put_strdup(&names, *t);
+ }
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (!fragment && name_type == UNIT_NAME_INSTANCE) {
+ /* Look for a fragment under the template name */
+
+ r = unit_name_template(unit_name, &template);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine template name: %m");
+
+ r = unit_ids_map_get(unit_ids_map, template, &fragment);
+ if (r < 0 && !IN_SET(r, -ENOENT, -ENXIO))
+ return log_debug_errno(r, "Cannot load template %s: %m", template);
+
+ if (fragment) {
+ /* Add any aliases of the original name to the set of names */
+ nnn = hashmap_get(unit_name_map, basename(fragment));
+ STRV_FOREACH(t, nnn) {
+ _cleanup_free_ char *inst = NULL;
+ const char *inst_fragment = NULL;
+
+ r = unit_name_replace_instance(*t, instance, &inst);
+ if (r < 0)
+ return log_debug_errno(r, "Cannot build instance name %s+%s: %m", template, instance);
+
+ /* Exclude any aliases that point in some other direction. */
+ r = unit_ids_map_get(unit_ids_map, inst, &inst_fragment);
+ if (r < 0 && !IN_SET(r, -ENOENT, -ENXIO))
+ return log_debug_errno(r, "Cannot find instance fragment %s: %m", inst);
+
+ if (inst_fragment &&
+ !streq(basename(inst_fragment), basename(fragment))) {
+ log_debug("Instance %s has fragment %s and is not an alias of %s.",
+ inst, inst_fragment, unit_name);
+ continue;
+ }
+
+ if (!streq(unit_name, inst))
+ log_debug("%s: %s has alias %s", __func__, unit_name, inst);
+ r = set_consume(names, TAKE_PTR(inst));
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ *ret_fragment_path = fragment;
+ *ret_names = TAKE_PTR(names);
+
+ // FIXME: if instance, consider any unit names with different template name
+ return 0;
+}
+
+static const char * const rlmap[] = {
+ "emergency", SPECIAL_EMERGENCY_TARGET,
+ "-b", SPECIAL_EMERGENCY_TARGET,
+ "rescue", SPECIAL_RESCUE_TARGET,
+ "single", SPECIAL_RESCUE_TARGET,
+ "-s", SPECIAL_RESCUE_TARGET,
+ "s", SPECIAL_RESCUE_TARGET,
+ "S", SPECIAL_RESCUE_TARGET,
+ "1", SPECIAL_RESCUE_TARGET,
+ "2", SPECIAL_MULTI_USER_TARGET,
+ "3", SPECIAL_MULTI_USER_TARGET,
+ "4", SPECIAL_MULTI_USER_TARGET,
+ "5", SPECIAL_GRAPHICAL_TARGET,
+ NULL
+};
+
+static const char * const rlmap_initrd[] = {
+ "emergency", SPECIAL_EMERGENCY_TARGET,
+ "rescue", SPECIAL_RESCUE_TARGET,
+ NULL
+};
+
+const char* runlevel_to_target(const char *word) {
+ const char * const *rlmap_ptr;
+ size_t i;
+
+ if (!word)
+ return NULL;
+
+ if (in_initrd()) {
+ word = startswith(word, "rd.");
+ if (!word)
+ return NULL;
+ }
+
+ rlmap_ptr = in_initrd() ? rlmap_initrd : rlmap;
+
+ for (i = 0; rlmap_ptr[i]; i += 2)
+ if (streq(word, rlmap_ptr[i]))
+ return rlmap_ptr[i+1];
+
+ return NULL;
+}
diff --git a/src/shared/unit-file.h b/src/shared/unit-file.h
new file mode 100644
index 0000000..5463b0a
--- /dev/null
+++ b/src/shared/unit-file.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "time-util.h"
+#include "unit-name.h"
+
+typedef enum UnitFileState UnitFileState;
+typedef enum UnitFileScope UnitFileScope;
+typedef struct LookupPaths LookupPaths;
+
+enum UnitFileState {
+ UNIT_FILE_ENABLED,
+ UNIT_FILE_ENABLED_RUNTIME,
+ UNIT_FILE_LINKED,
+ UNIT_FILE_LINKED_RUNTIME,
+ UNIT_FILE_ALIAS,
+ UNIT_FILE_MASKED,
+ UNIT_FILE_MASKED_RUNTIME,
+ UNIT_FILE_STATIC,
+ UNIT_FILE_DISABLED,
+ UNIT_FILE_INDIRECT,
+ UNIT_FILE_GENERATED,
+ UNIT_FILE_TRANSIENT,
+ UNIT_FILE_BAD,
+ _UNIT_FILE_STATE_MAX,
+ _UNIT_FILE_STATE_INVALID = -1
+};
+
+enum UnitFileScope {
+ UNIT_FILE_SYSTEM,
+ UNIT_FILE_GLOBAL,
+ UNIT_FILE_USER,
+ _UNIT_FILE_SCOPE_MAX,
+ _UNIT_FILE_SCOPE_INVALID = -1
+};
+
+bool unit_type_may_alias(UnitType type) _const_;
+bool unit_type_may_template(UnitType type) _const_;
+
+int unit_symlink_name_compatible(const char *symlink, const char *target, bool instance_propagation);
+int unit_validate_alias_symlink_and_warn(const char *filename, const char *target);
+
+bool lookup_paths_timestamp_hash_same(const LookupPaths *lp, uint64_t timestamp_hash, uint64_t *ret_new);
+int unit_file_build_name_map(
+ const LookupPaths *lp,
+ uint64_t *cache_timestamp_hash,
+ Hashmap **unit_ids_map,
+ Hashmap **unit_names_map,
+ Set **path_cache);
+
+int unit_file_find_fragment(
+ Hashmap *unit_ids_map,
+ Hashmap *unit_name_map,
+ const char *unit_name,
+ const char **ret_fragment_path,
+ Set **ret_names);
+
+const char* runlevel_to_target(const char *rl);
diff --git a/src/shared/user-record-nss.c b/src/shared/user-record-nss.c
new file mode 100644
index 0000000..88b8fc2
--- /dev/null
+++ b/src/shared/user-record-nss.c
@@ -0,0 +1,531 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "errno-util.h"
+#include "format-util.h"
+#include "libcrypt-util.h"
+#include "strv.h"
+#include "user-record-nss.h"
+#include "user-util.h"
+#include "utf8.h"
+
+#define SET_IF(field, condition, value, fallback) \
+ field = (condition) ? (value) : (fallback)
+
+static inline const char* utf8_only(const char *s) {
+ return s && utf8_is_valid(s) ? s : NULL;
+}
+
+static inline int strv_extend_strv_utf8_only(char ***dst, char **src, bool filter_duplicates) {
+ _cleanup_free_ char **t = NULL;
+ size_t l, j = 0;
+
+ /* First, do a shallow copy of s, filtering for only valid utf-8 strings */
+ l = strv_length(src);
+ t = new(char*, l + 1);
+ if (!t)
+ return -ENOMEM;
+
+ for (size_t i = 0; i < l; i++)
+ if (utf8_is_valid(src[i]))
+ t[j++] = src[i];
+ if (j == 0)
+ return 0;
+
+ t[j] = NULL;
+ return strv_extend_strv(dst, t, filter_duplicates);
+}
+
+int nss_passwd_to_user_record(
+ const struct passwd *pwd,
+ const struct spwd *spwd,
+ UserRecord **ret) {
+
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ int r;
+
+ assert(pwd);
+ assert(ret);
+
+ if (isempty(pwd->pw_name))
+ return -EINVAL;
+
+ if (spwd && !streq_ptr(spwd->sp_namp, pwd->pw_name))
+ return -EINVAL;
+
+ hr = user_record_new();
+ if (!hr)
+ return -ENOMEM;
+
+ r = free_and_strdup(&hr->user_name, pwd->pw_name);
+ if (r < 0)
+ return r;
+
+ /* Some bad NSS modules synthesize GECOS fields with embedded ":" or "\n" characters, which are not
+ * something we can output in /etc/passwd compatible format, since these are record separators
+ * there. We normally refuse that, but we need to maintain compatibility with arbitrary NSS modules,
+ * hence let's do what glibc does: mangle the data to fit the format. */
+ if (isempty(pwd->pw_gecos) || streq_ptr(pwd->pw_gecos, hr->user_name))
+ hr->real_name = mfree(hr->real_name);
+ else if (valid_gecos(pwd->pw_gecos)) {
+ r = free_and_strdup(&hr->real_name, pwd->pw_gecos);
+ if (r < 0)
+ return r;
+ } else {
+ _cleanup_free_ char *mangled = NULL;
+
+ mangled = mangle_gecos(pwd->pw_gecos);
+ if (!mangled)
+ return -ENOMEM;
+
+ free_and_replace(hr->real_name, mangled);
+ }
+
+ r = free_and_strdup(&hr->home_directory, utf8_only(empty_to_null(pwd->pw_dir)));
+ if (r < 0)
+ return r;
+
+ r = free_and_strdup(&hr->shell, utf8_only(empty_to_null(pwd->pw_shell)));
+ if (r < 0)
+ return r;
+
+ hr->uid = pwd->pw_uid;
+ hr->gid = pwd->pw_gid;
+
+ if (spwd &&
+ looks_like_hashed_password(utf8_only(spwd->sp_pwdp))) { /* Ignore locked, disabled, and mojibake passwords */
+ strv_free_erase(hr->hashed_password);
+ hr->hashed_password = strv_new(spwd->sp_pwdp);
+ if (!hr->hashed_password)
+ return -ENOMEM;
+ } else
+ hr->hashed_password = strv_free_erase(hr->hashed_password);
+
+ /* shadow-utils suggests using "chage -E 0" (or -E 1, depending on which man page you check)
+ * for locking a whole account, hence check for that. Note that it also defines a way to lock
+ * just a password instead of the whole account, but that's mostly pointless in times of
+ * password-less authorization, hence let's not bother. */
+
+ SET_IF(hr->locked,
+ spwd && spwd->sp_expire >= 0,
+ spwd->sp_expire <= 1, -1);
+
+ SET_IF(hr->not_after_usec,
+ spwd && spwd->sp_expire > 1 && (uint64_t) spwd->sp_expire < (UINT64_MAX-1)/USEC_PER_DAY,
+ spwd->sp_expire * USEC_PER_DAY, UINT64_MAX);
+
+ SET_IF(hr->password_change_now,
+ spwd && spwd->sp_lstchg >= 0,
+ spwd->sp_lstchg == 0, -1);
+
+ SET_IF(hr->last_password_change_usec,
+ spwd && spwd->sp_lstchg > 0 && (uint64_t) spwd->sp_lstchg <= (UINT64_MAX-1)/USEC_PER_DAY,
+ spwd->sp_lstchg * USEC_PER_DAY, UINT64_MAX);
+
+ SET_IF(hr->password_change_min_usec,
+ spwd && spwd->sp_min > 0 && (uint64_t) spwd->sp_min <= (UINT64_MAX-1)/USEC_PER_DAY,
+ spwd->sp_min * USEC_PER_DAY, UINT64_MAX);
+
+ SET_IF(hr->password_change_max_usec,
+ spwd && spwd->sp_max > 0 && (uint64_t) spwd->sp_max <= (UINT64_MAX-1)/USEC_PER_DAY,
+ spwd->sp_max * USEC_PER_DAY, UINT64_MAX);
+
+ SET_IF(hr->password_change_warn_usec,
+ spwd && spwd->sp_warn > 0 && (uint64_t) spwd->sp_warn <= (UINT64_MAX-1)/USEC_PER_DAY,
+ spwd->sp_warn * USEC_PER_DAY, UINT64_MAX);
+
+ SET_IF(hr->password_change_inactive_usec,
+ spwd && spwd->sp_inact > 0 && (uint64_t) spwd->sp_inact <= (UINT64_MAX-1)/USEC_PER_DAY,
+ spwd->sp_inact * USEC_PER_DAY, UINT64_MAX);
+
+ hr->json = json_variant_unref(hr->json);
+ r = json_build(&hr->json, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(hr->user_name)),
+ JSON_BUILD_PAIR("uid", JSON_BUILD_UNSIGNED(hr->uid)),
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(hr->gid)),
+ JSON_BUILD_PAIR_CONDITION(hr->real_name, "realName", JSON_BUILD_STRING(hr->real_name)),
+ JSON_BUILD_PAIR_CONDITION(hr->home_directory, "homeDirectory", JSON_BUILD_STRING(hr->home_directory)),
+ JSON_BUILD_PAIR_CONDITION(hr->shell, "shell", JSON_BUILD_STRING(hr->shell)),
+ JSON_BUILD_PAIR_CONDITION(!strv_isempty(hr->hashed_password), "privileged", JSON_BUILD_OBJECT(JSON_BUILD_PAIR("hashedPassword", JSON_BUILD_STRV(hr->hashed_password)))),
+ JSON_BUILD_PAIR_CONDITION(hr->locked >= 0, "locked", JSON_BUILD_BOOLEAN(hr->locked)),
+ JSON_BUILD_PAIR_CONDITION(hr->not_after_usec != UINT64_MAX, "notAfterUSec", JSON_BUILD_UNSIGNED(hr->not_after_usec)),
+ JSON_BUILD_PAIR_CONDITION(hr->password_change_now >= 0, "passwordChangeNow", JSON_BUILD_BOOLEAN(hr->password_change_now)),
+ JSON_BUILD_PAIR_CONDITION(hr->last_password_change_usec != UINT64_MAX, "lastPasswordChangeUSec", JSON_BUILD_UNSIGNED(hr->last_password_change_usec)),
+ JSON_BUILD_PAIR_CONDITION(hr->password_change_min_usec != UINT64_MAX, "passwordChangeMinUSec", JSON_BUILD_UNSIGNED(hr->password_change_min_usec)),
+ JSON_BUILD_PAIR_CONDITION(hr->password_change_max_usec != UINT64_MAX, "passwordChangeMaxUSec", JSON_BUILD_UNSIGNED(hr->password_change_max_usec)),
+ JSON_BUILD_PAIR_CONDITION(hr->password_change_warn_usec != UINT64_MAX, "passwordChangeWarnUSec", JSON_BUILD_UNSIGNED(hr->password_change_warn_usec)),
+ JSON_BUILD_PAIR_CONDITION(hr->password_change_inactive_usec != UINT64_MAX, "passwordChangeInactiveUSec", JSON_BUILD_UNSIGNED(hr->password_change_inactive_usec))));
+
+ if (r < 0)
+ return r;
+
+ hr->mask = USER_RECORD_REGULAR |
+ (!strv_isempty(hr->hashed_password) ? USER_RECORD_PRIVILEGED : 0);
+
+ *ret = TAKE_PTR(hr);
+ return 0;
+}
+
+int nss_spwd_for_passwd(const struct passwd *pwd, struct spwd *ret_spwd, char **ret_buffer) {
+ size_t buflen = 4096;
+ int r;
+
+ assert(pwd);
+ assert(ret_spwd);
+ assert(ret_buffer);
+
+ for (;;) {
+ _cleanup_free_ char *buf = NULL;
+ struct spwd spwd, *result;
+
+ buf = malloc(buflen);
+ if (!buf)
+ return -ENOMEM;
+
+ r = getspnam_r(pwd->pw_name, &spwd, buf, buflen, &result);
+ if (r == 0) {
+ if (!result)
+ return -ESRCH;
+
+ *ret_spwd = *result;
+ *ret_buffer = TAKE_PTR(buf);
+ return 0;
+ }
+ if (r < 0)
+ return -EIO; /* Weird, this should not return negative! */
+ if (r != ERANGE)
+ return -r;
+
+ if (buflen > SIZE_MAX / 2)
+ return -ERANGE;
+
+ buflen *= 2;
+ buf = mfree(buf);
+ }
+}
+
+int nss_user_record_by_name(
+ const char *name,
+ bool with_shadow,
+ UserRecord **ret) {
+
+ _cleanup_free_ char *buf = NULL, *sbuf = NULL;
+ struct passwd pwd, *result;
+ bool incomplete = false;
+ size_t buflen = 4096;
+ struct spwd spwd, *sresult = NULL;
+ int r;
+
+ assert(name);
+ assert(ret);
+
+ for (;;) {
+ buf = malloc(buflen);
+ if (!buf)
+ return -ENOMEM;
+
+ r = getpwnam_r(name, &pwd, buf, buflen, &result);
+ if (r == 0) {
+ if (!result)
+ return -ESRCH;
+
+ break;
+ }
+
+ if (r < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "getpwnam_r() returned a negative value");
+ if (r != ERANGE)
+ return -r;
+
+ if (buflen > SIZE_MAX / 2)
+ return -ERANGE;
+
+ buflen *= 2;
+ buf = mfree(buf);
+ }
+
+ if (with_shadow) {
+ r = nss_spwd_for_passwd(result, &spwd, &sbuf);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to do shadow lookup for user %s, ignoring: %m", name);
+ incomplete = ERRNO_IS_PRIVILEGE(r);
+ } else
+ sresult = &spwd;
+ } else
+ incomplete = true;
+
+ r = nss_passwd_to_user_record(result, sresult, ret);
+ if (r < 0)
+ return r;
+
+ (*ret)->incomplete = incomplete;
+ return 0;
+}
+
+int nss_user_record_by_uid(
+ uid_t uid,
+ bool with_shadow,
+ UserRecord **ret) {
+
+ _cleanup_free_ char *buf = NULL, *sbuf = NULL;
+ struct passwd pwd, *result;
+ bool incomplete = false;
+ size_t buflen = 4096;
+ struct spwd spwd, *sresult = NULL;
+ int r;
+
+ assert(ret);
+
+ for (;;) {
+ buf = malloc(buflen);
+ if (!buf)
+ return -ENOMEM;
+
+ r = getpwuid_r(uid, &pwd, buf, buflen, &result);
+ if (r == 0) {
+ if (!result)
+ return -ESRCH;
+
+ break;
+ }
+ if (r < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "getpwuid_r() returned a negative value");
+ if (r != ERANGE)
+ return -r;
+
+ if (buflen > SIZE_MAX / 2)
+ return -ERANGE;
+
+ buflen *= 2;
+ buf = mfree(buf);
+ }
+
+ if (with_shadow) {
+ r = nss_spwd_for_passwd(result, &spwd, &sbuf);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to do shadow lookup for UID " UID_FMT ", ignoring: %m", uid);
+ incomplete = ERRNO_IS_PRIVILEGE(r);
+ } else
+ sresult = &spwd;
+ } else
+ incomplete = true;
+
+ r = nss_passwd_to_user_record(result, sresult, ret);
+ if (r < 0)
+ return r;
+
+ (*ret)->incomplete = incomplete;
+ return 0;
+}
+
+int nss_group_to_group_record(
+ const struct group *grp,
+ const struct sgrp *sgrp,
+ GroupRecord **ret) {
+
+ _cleanup_(group_record_unrefp) GroupRecord *g = NULL;
+ int r;
+
+ assert(grp);
+ assert(ret);
+
+ if (isempty(grp->gr_name))
+ return -EINVAL;
+
+ if (sgrp && !streq_ptr(sgrp->sg_namp, grp->gr_name))
+ return -EINVAL;
+
+ g = group_record_new();
+ if (!g)
+ return -ENOMEM;
+
+ g->group_name = strdup(grp->gr_name);
+ if (!g->group_name)
+ return -ENOMEM;
+
+ r = strv_extend_strv_utf8_only(&g->members, grp->gr_mem, false);
+ if (r < 0)
+ return r;
+
+ g->gid = grp->gr_gid;
+
+ if (sgrp) {
+ if (looks_like_hashed_password(utf8_only(sgrp->sg_passwd))) {
+ g->hashed_password = strv_new(sgrp->sg_passwd);
+ if (!g->hashed_password)
+ return -ENOMEM;
+ }
+
+ r = strv_extend_strv_utf8_only(&g->members, sgrp->sg_mem, true);
+ if (r < 0)
+ return r;
+
+ r = strv_extend_strv_utf8_only(&g->administrators, sgrp->sg_adm, false);
+ if (r < 0)
+ return r;
+ }
+
+ r = json_build(&g->json, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(g->group_name)),
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(g->gid)),
+ JSON_BUILD_PAIR_CONDITION(!strv_isempty(g->members), "members", JSON_BUILD_STRV(g->members)),
+ JSON_BUILD_PAIR_CONDITION(!strv_isempty(g->hashed_password), "privileged", JSON_BUILD_OBJECT(JSON_BUILD_PAIR("hashedPassword", JSON_BUILD_STRV(g->hashed_password)))),
+ JSON_BUILD_PAIR_CONDITION(!strv_isempty(g->administrators), "administrators", JSON_BUILD_STRV(g->administrators))));
+ if (r < 0)
+ return r;
+
+ g->mask = USER_RECORD_REGULAR |
+ (!strv_isempty(g->hashed_password) ? USER_RECORD_PRIVILEGED : 0);
+
+ *ret = TAKE_PTR(g);
+ return 0;
+}
+
+int nss_sgrp_for_group(const struct group *grp, struct sgrp *ret_sgrp, char **ret_buffer) {
+ size_t buflen = 4096;
+ int r;
+
+ assert(grp);
+ assert(ret_sgrp);
+ assert(ret_buffer);
+
+ for (;;) {
+ _cleanup_free_ char *buf = NULL;
+ struct sgrp sgrp, *result;
+
+ buf = malloc(buflen);
+ if (!buf)
+ return -ENOMEM;
+
+ r = getsgnam_r(grp->gr_name, &sgrp, buf, buflen, &result);
+ if (r == 0) {
+ if (!result)
+ return -ESRCH;
+
+ *ret_sgrp = *result;
+ *ret_buffer = TAKE_PTR(buf);
+ return 0;
+ }
+ if (r < 0)
+ return -EIO; /* Weird, this should not return negative! */
+ if (r != ERANGE)
+ return -r;
+
+ if (buflen > SIZE_MAX / 2)
+ return -ERANGE;
+
+ buflen *= 2;
+ buf = mfree(buf);
+ }
+}
+
+int nss_group_record_by_name(
+ const char *name,
+ bool with_shadow,
+ GroupRecord **ret) {
+
+ _cleanup_free_ char *buf = NULL, *sbuf = NULL;
+ struct group grp, *result;
+ bool incomplete = false;
+ size_t buflen = 4096;
+ struct sgrp sgrp, *sresult = NULL;
+ int r;
+
+ assert(name);
+ assert(ret);
+
+ for (;;) {
+ buf = malloc(buflen);
+ if (!buf)
+ return -ENOMEM;
+
+ r = getgrnam_r(name, &grp, buf, buflen, &result);
+ if (r == 0) {
+ if (!result)
+ return -ESRCH;
+
+ break;
+ }
+
+ if (r < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "getgrnam_r() returned a negative value");
+ if (r != ERANGE)
+ return -r;
+ if (buflen > SIZE_MAX / 2)
+ return -ERANGE;
+
+ buflen *= 2;
+ buf = mfree(buf);
+ }
+
+ if (with_shadow) {
+ r = nss_sgrp_for_group(result, &sgrp, &sbuf);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to do shadow lookup for group %s, ignoring: %m", result->gr_name);
+ incomplete = ERRNO_IS_PRIVILEGE(r);
+ } else
+ sresult = &sgrp;
+ } else
+ incomplete = true;
+
+ r = nss_group_to_group_record(result, sresult, ret);
+ if (r < 0)
+ return r;
+
+ (*ret)->incomplete = incomplete;
+ return 0;
+}
+
+int nss_group_record_by_gid(
+ gid_t gid,
+ bool with_shadow,
+ GroupRecord **ret) {
+
+ _cleanup_free_ char *buf = NULL, *sbuf = NULL;
+ struct group grp, *result;
+ bool incomplete = false;
+ size_t buflen = 4096;
+ struct sgrp sgrp, *sresult = NULL;
+ int r;
+
+ assert(ret);
+
+ for (;;) {
+ buf = malloc(buflen);
+ if (!buf)
+ return -ENOMEM;
+
+ r = getgrgid_r(gid, &grp, buf, buflen, &result);
+ if (r == 0) {
+ if (!result)
+ return -ESRCH;
+ break;
+ }
+
+ if (r < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "getgrgid_r() returned a negative value");
+ if (r != ERANGE)
+ return -r;
+ if (buflen > SIZE_MAX / 2)
+ return -ERANGE;
+
+ buflen *= 2;
+ buf = mfree(buf);
+ }
+
+ if (with_shadow) {
+ r = nss_sgrp_for_group(result, &sgrp, &sbuf);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to do shadow lookup for group %s, ignoring: %m", result->gr_name);
+ incomplete = ERRNO_IS_PRIVILEGE(r);
+ } else
+ sresult = &sgrp;
+ } else
+ incomplete = true;
+
+ r = nss_group_to_group_record(result, sresult, ret);
+ if (r < 0)
+ return r;
+
+ (*ret)->incomplete = incomplete;
+ return 0;
+}
diff --git a/src/shared/user-record-nss.h b/src/shared/user-record-nss.h
new file mode 100644
index 0000000..22ab04d
--- /dev/null
+++ b/src/shared/user-record-nss.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <grp.h>
+#include <gshadow.h>
+#include <pwd.h>
+#include <shadow.h>
+
+#include "group-record.h"
+#include "user-record.h"
+
+/* Synthesize UserRecord and GroupRecord objects from NSS data */
+
+int nss_passwd_to_user_record(const struct passwd *pwd, const struct spwd *spwd, UserRecord **ret);
+int nss_spwd_for_passwd(const struct passwd *pwd, struct spwd *ret_spwd, char **ret_buffer);
+
+int nss_user_record_by_name(const char *name, bool with_shadow, UserRecord **ret);
+int nss_user_record_by_uid(uid_t uid, bool with_shadow, UserRecord **ret);
+
+int nss_group_to_group_record(const struct group *grp, const struct sgrp *sgrp, GroupRecord **ret);
+int nss_sgrp_for_group(const struct group *grp, struct sgrp *ret_sgrp, char **ret_buffer);
+
+int nss_group_record_by_name(const char *name, bool with_shadow, GroupRecord **ret);
+int nss_group_record_by_gid(gid_t gid, bool with_shadow, GroupRecord **ret);
diff --git a/src/shared/user-record-show.c b/src/shared/user-record-show.c
new file mode 100644
index 0000000..2979028
--- /dev/null
+++ b/src/shared/user-record-show.c
@@ -0,0 +1,584 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "format-util.h"
+#include "fs-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "user-record-show.h"
+#include "user-util.h"
+#include "userdb.h"
+
+const char *user_record_state_color(const char *state) {
+ if (STR_IN_SET(state, "unfixated", "absent"))
+ return ansi_grey();
+ else if (streq(state, "active"))
+ return ansi_highlight_green();
+ else if (STR_IN_SET(state, "locked", "dirty"))
+ return ansi_highlight_yellow();
+
+ return NULL;
+}
+
+void user_record_show(UserRecord *hr, bool show_full_group_info) {
+ const char *hd, *ip, *shell;
+ UserStorage storage;
+ usec_t t;
+ size_t k;
+ int r, b;
+
+ printf(" User name: %s\n",
+ user_record_user_name_and_realm(hr));
+
+ if (hr->state) {
+ const char *color;
+
+ color = user_record_state_color(hr->state);
+
+ printf(" State: %s%s%s\n",
+ strempty(color), hr->state, color ? ansi_normal() : "");
+ }
+
+ printf(" Disposition: %s\n", user_disposition_to_string(user_record_disposition(hr)));
+
+ if (hr->last_change_usec != USEC_INFINITY) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+ printf(" Last Change: %s\n", format_timestamp(buf, sizeof(buf), hr->last_change_usec));
+
+ if (hr->last_change_usec > now(CLOCK_REALTIME))
+ printf(" %sModification time lies in the future, system clock wrong?%s\n",
+ ansi_highlight_yellow(), ansi_normal());
+ }
+
+ if (hr->last_password_change_usec != USEC_INFINITY &&
+ hr->last_password_change_usec != hr->last_change_usec) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+ printf(" Last Passw.: %s\n", format_timestamp(buf, sizeof(buf), hr->last_password_change_usec));
+ }
+
+ r = user_record_test_blocked(hr);
+ switch (r) {
+
+ case -ENOLCK:
+ printf(" Login OK: %sno%s (record is locked)\n", ansi_highlight_red(), ansi_normal());
+ break;
+
+ case -EL2HLT:
+ printf(" Login OK: %sno%s (record not valid yet))\n", ansi_highlight_red(), ansi_normal());
+ break;
+
+ case -EL3HLT:
+ printf(" Login OK: %sno%s (record not valid anymore))\n", ansi_highlight_red(), ansi_normal());
+ break;
+
+ case -ESTALE:
+ default: {
+ usec_t y;
+
+ if (r < 0 && r != -ESTALE) {
+ errno = -r;
+ printf(" Login OK: %sno%s (%m)\n", ansi_highlight_red(), ansi_normal());
+ break;
+ }
+
+ if (is_nologin_shell(user_record_shell(hr))) {
+ printf(" Login OK: %sno%s (nologin shell)\n", ansi_highlight_red(), ansi_normal());
+ break;
+ }
+
+ y = user_record_ratelimit_next_try(hr);
+ if (y != USEC_INFINITY && y > now(CLOCK_REALTIME)) {
+ printf(" Login OK: %sno%s (ratelimit)\n", ansi_highlight_red(), ansi_normal());
+ break;
+ }
+
+ printf(" Login OK: %syes%s\n", ansi_highlight_green(), ansi_normal());
+ break;
+ }}
+
+ r = user_record_test_password_change_required(hr);
+ switch (r) {
+
+ case -EKEYREVOKED:
+ printf(" Password OK: %schange now%s\n", ansi_highlight_yellow(), ansi_normal());
+ break;
+
+ case -EOWNERDEAD:
+ printf(" Password OK: %sexpired%s (change now!)\n", ansi_highlight_yellow(), ansi_normal());
+ break;
+
+ case -EKEYREJECTED:
+ printf(" Password OK: %sexpired%s (for good)\n", ansi_highlight_red(), ansi_normal());
+ break;
+
+ case -EKEYEXPIRED:
+ printf(" Password OK: %sexpires soon%s\n", ansi_highlight_yellow(), ansi_normal());
+ break;
+
+ case -ENETDOWN:
+ printf(" Password OK: %sno timestamp%s\n", ansi_highlight_red(), ansi_normal());
+ break;
+
+ case -EROFS:
+ printf(" Password OK: %schange not permitted%s\n", ansi_highlight_yellow(), ansi_normal());
+ break;
+
+ case -ESTALE:
+ printf(" Password OK: %slast password change in future%s\n", ansi_highlight_yellow(), ansi_normal());
+ break;
+
+ default:
+ if (r < 0) {
+ errno = -r;
+ printf(" Password OK: %sno%s (%m)\n", ansi_highlight_yellow(), ansi_normal());
+ break;
+ }
+
+ printf(" Password OK: %syes%s\n", ansi_highlight_green(), ansi_normal());
+ break;
+ }
+
+ if (uid_is_valid(hr->uid))
+ printf(" UID: " UID_FMT "\n", hr->uid);
+ if (gid_is_valid(hr->gid)) {
+ if (show_full_group_info) {
+ _cleanup_(group_record_unrefp) GroupRecord *gr = NULL;
+
+ r = groupdb_by_gid(hr->gid, 0, &gr);
+ if (r < 0) {
+ errno = -r;
+ printf(" GID: " GID_FMT " (unresolvable: %m)\n", hr->gid);
+ } else
+ printf(" GID: " GID_FMT " (%s)\n", hr->gid, gr->group_name);
+ } else
+ printf(" GID: " GID_FMT "\n", hr->gid);
+ } else if (uid_is_valid(hr->uid)) /* Show UID as GID if not separately configured */
+ printf(" GID: " GID_FMT "\n", (gid_t) hr->uid);
+
+ if (show_full_group_info) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+
+ r = membershipdb_by_user(hr->user_name, 0, &iterator);
+ if (r < 0) {
+ errno = -r;
+ printf(" Aux. Groups: (can't acquire: %m)\n");
+ } else {
+ const char *prefix = " Aux. Groups:";
+
+ for (;;) {
+ _cleanup_free_ char *group = NULL;
+
+ r = membershipdb_iterator_get(iterator, NULL, &group);
+ if (r == -ESRCH)
+ break;
+ if (r < 0) {
+ errno = -r;
+ printf("%s (can't iterate: %m)\n", prefix);
+ break;
+ }
+
+ printf("%s %s\n", prefix, group);
+ prefix = " ";
+ }
+ }
+ }
+
+ if (hr->real_name && !streq(hr->real_name, hr->user_name))
+ printf(" Real Name: %s\n", hr->real_name);
+
+ hd = user_record_home_directory(hr);
+ if (hd)
+ printf(" Directory: %s\n", hd);
+
+ storage = user_record_storage(hr);
+ if (storage >= 0) /* Let's be political, and clarify which storage we like, and which we don't. About CIFS we don't complain. */
+ printf(" Storage: %s%s\n", user_storage_to_string(storage),
+ storage == USER_LUKS ? " (strong encryption)" :
+ storage == USER_FSCRYPT ? " (weak encryption)" :
+ IN_SET(storage, USER_DIRECTORY, USER_SUBVOLUME) ? " (no encryption)" : "");
+
+ ip = user_record_image_path(hr);
+ if (ip && !streq_ptr(ip, hd))
+ printf(" Image Path: %s\n", ip);
+
+ b = user_record_removable(hr);
+ if (b >= 0)
+ printf(" Removable: %s\n", yes_no(b));
+
+ shell = user_record_shell(hr);
+ if (shell)
+ printf(" Shell: %s\n", shell);
+
+ if (hr->email_address)
+ printf(" Email: %s\n", hr->email_address);
+ if (hr->location)
+ printf(" Location: %s\n", hr->location);
+ if (hr->password_hint)
+ printf(" Passw. Hint: %s\n", hr->password_hint);
+ if (hr->icon_name)
+ printf(" Icon Name: %s\n", hr->icon_name);
+
+ if (hr->time_zone)
+ printf(" Time Zone: %s\n", hr->time_zone);
+
+ if (hr->preferred_language)
+ printf(" Language: %s\n", hr->preferred_language);
+
+ if (!strv_isempty(hr->environment)) {
+ char **i;
+
+ STRV_FOREACH(i, hr->environment) {
+ printf(i == hr->environment ?
+ " Environment: %s\n" :
+ " %s\n", *i);
+ }
+ }
+
+ if (hr->locked >= 0)
+ printf(" Locked: %s\n", yes_no(hr->locked));
+
+ if (hr->not_before_usec != UINT64_MAX) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+ printf(" Not Before: %s\n", format_timestamp(buf, sizeof(buf), hr->not_before_usec));
+ }
+
+ if (hr->not_after_usec != UINT64_MAX) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+ printf(" Not After: %s\n", format_timestamp(buf, sizeof(buf), hr->not_after_usec));
+ }
+
+ if (hr->umask != MODE_INVALID)
+ printf(" UMask: 0%03o\n", hr->umask);
+
+ if (nice_is_valid(hr->nice_level))
+ printf(" Nice: %i\n", hr->nice_level);
+
+ for (int j = 0; j < _RLIMIT_MAX; j++) {
+ if (hr->rlimits[j])
+ printf(" Limit: RLIMIT_%s=%" PRIu64 ":%" PRIu64 "\n",
+ rlimit_to_string(j), (uint64_t) hr->rlimits[j]->rlim_cur, (uint64_t) hr->rlimits[j]->rlim_max);
+ }
+
+ if (hr->tasks_max != UINT64_MAX)
+ printf(" Tasks Max: %" PRIu64 "\n", hr->tasks_max);
+
+ if (hr->memory_high != UINT64_MAX) {
+ char buf[FORMAT_BYTES_MAX];
+ printf(" Memory High: %s\n", format_bytes(buf, sizeof(buf), hr->memory_high));
+ }
+
+ if (hr->memory_max != UINT64_MAX) {
+ char buf[FORMAT_BYTES_MAX];
+ printf(" Memory Max: %s\n", format_bytes(buf, sizeof(buf), hr->memory_max));
+ }
+
+ if (hr->cpu_weight != UINT64_MAX)
+ printf(" CPU Weight: %" PRIu64 "\n", hr->cpu_weight);
+
+ if (hr->io_weight != UINT64_MAX)
+ printf(" IO Weight: %" PRIu64 "\n", hr->io_weight);
+
+ if (hr->access_mode != MODE_INVALID)
+ printf(" Access Mode: 0%03oo\n", user_record_access_mode(hr));
+
+ if (storage == USER_LUKS) {
+ printf("LUKS Discard: online=%s offline=%s\n", yes_no(user_record_luks_discard(hr)), yes_no(user_record_luks_offline_discard(hr)));
+
+ if (!sd_id128_is_null(hr->luks_uuid))
+ printf(" LUKS UUID: " SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(hr->luks_uuid));
+ if (!sd_id128_is_null(hr->partition_uuid))
+ printf(" Part UUID: " SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(hr->partition_uuid));
+ if (!sd_id128_is_null(hr->file_system_uuid))
+ printf(" FS UUID: " SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(hr->file_system_uuid));
+
+ if (hr->file_system_type)
+ printf(" File System: %s\n", user_record_file_system_type(hr));
+
+ if (hr->luks_cipher)
+ printf(" LUKS Cipher: %s\n", hr->luks_cipher);
+ if (hr->luks_cipher_mode)
+ printf(" Cipher Mode: %s\n", hr->luks_cipher_mode);
+ if (hr->luks_volume_key_size != UINT64_MAX)
+ printf(" Volume Key: %" PRIu64 "bit\n", hr->luks_volume_key_size * 8);
+
+ if (hr->luks_pbkdf_type)
+ printf(" PBKDF Type: %s\n", hr->luks_pbkdf_type);
+ if (hr->luks_pbkdf_hash_algorithm)
+ printf(" PBKDF Hash: %s\n", hr->luks_pbkdf_hash_algorithm);
+ if (hr->luks_pbkdf_time_cost_usec != UINT64_MAX) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ printf(" PBKDF Time: %s\n", format_timespan(buf, sizeof(buf), hr->luks_pbkdf_time_cost_usec, 0));
+ }
+ if (hr->luks_pbkdf_memory_cost != UINT64_MAX) {
+ char buf[FORMAT_BYTES_MAX];
+ printf(" PBKDF Bytes: %s\n", format_bytes(buf, sizeof(buf), hr->luks_pbkdf_memory_cost));
+ }
+ if (hr->luks_pbkdf_parallel_threads != UINT64_MAX)
+ printf("PBKDF Thread: %" PRIu64 "\n", hr->luks_pbkdf_parallel_threads);
+
+ } else if (storage == USER_CIFS) {
+
+ if (hr->cifs_service)
+ printf("CIFS Service: %s\n", hr->cifs_service);
+ }
+
+ if (hr->cifs_user_name)
+ printf(" CIFS User: %s\n", user_record_cifs_user_name(hr));
+ if (hr->cifs_domain)
+ printf(" CIFS Domain: %s\n", hr->cifs_domain);
+
+ if (storage != USER_CLASSIC)
+ printf(" Mount Flags: %s %s %s\n",
+ hr->nosuid ? "nosuid" : "suid",
+ hr->nodev ? "nodev" : "dev",
+ hr->noexec ? "noexec" : "exec");
+
+ if (hr->skeleton_directory)
+ printf(" Skel. Dir.: %s\n", user_record_skeleton_directory(hr));
+
+ if (hr->disk_size != UINT64_MAX) {
+ char buf[FORMAT_BYTES_MAX];
+ printf(" Disk Size: %s\n", format_bytes(buf, sizeof(buf), hr->disk_size));
+ }
+
+ if (hr->disk_usage != UINT64_MAX) {
+ char buf[FORMAT_BYTES_MAX];
+
+ if (hr->disk_size != UINT64_MAX) {
+ unsigned permille;
+
+ permille = (unsigned) DIV_ROUND_UP(hr->disk_usage * 1000U, hr->disk_size); /* Round up! */
+ printf(" Disk Usage: %s (= %u.%01u%%)\n",
+ format_bytes(buf, sizeof(buf), hr->disk_usage),
+ permille / 10, permille % 10);
+ } else
+ printf(" Disk Usage: %s\n", format_bytes(buf, sizeof(buf), hr->disk_usage));
+ }
+
+ if (hr->disk_free != UINT64_MAX) {
+ char buf[FORMAT_BYTES_MAX];
+
+ if (hr->disk_size != UINT64_MAX) {
+ const char *color_on, *color_off;
+ unsigned permille;
+
+ permille = (unsigned) ((hr->disk_free * 1000U) / hr->disk_size); /* Round down! */
+
+ /* Color the output red or yellow if we are below 10% resp. 25% free. Because 10% and
+ * 25% can be a lot of space still, let's additionally make some absolute
+ * restrictions: 1G and 2G */
+ if (permille <= 100U &&
+ hr->disk_free < 1024U*1024U*1024U /* 1G */) {
+ color_on = ansi_highlight_red();
+ color_off = ansi_normal();
+ } else if (permille <= 250U &&
+ hr->disk_free < 2U*1024U*1024U*1024U /* 2G */) {
+ color_on = ansi_highlight_yellow();
+ color_off = ansi_normal();
+ } else
+ color_on = color_off = "";
+
+ printf(" Disk Free: %s%s (= %u.%01u%%)%s\n",
+ color_on,
+ format_bytes(buf, sizeof(buf), hr->disk_free),
+ permille / 10, permille % 10,
+ color_off);
+ } else
+ printf(" Disk Free: %s\n", format_bytes(buf, sizeof(buf), hr->disk_free));
+ }
+
+ if (hr->disk_floor != UINT64_MAX) {
+ char buf[FORMAT_BYTES_MAX];
+ printf(" Disk Floor: %s\n", format_bytes(buf, sizeof(buf), hr->disk_floor));
+ }
+
+ if (hr->disk_ceiling != UINT64_MAX) {
+ char buf[FORMAT_BYTES_MAX];
+ printf("Disk Ceiling: %s\n", format_bytes(buf, sizeof(buf), hr->disk_ceiling));
+ }
+
+ if (hr->good_authentication_counter != UINT64_MAX)
+ printf(" Good Auth.: %" PRIu64 "\n", hr->good_authentication_counter);
+
+ if (hr->last_good_authentication_usec != UINT64_MAX) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+ printf(" Last Good: %s\n", format_timestamp(buf, sizeof(buf), hr->last_good_authentication_usec));
+ }
+
+ if (hr->bad_authentication_counter != UINT64_MAX)
+ printf(" Bad Auth.: %" PRIu64 "\n", hr->bad_authentication_counter);
+
+ if (hr->last_bad_authentication_usec != UINT64_MAX) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+ printf(" Last Bad: %s\n", format_timestamp(buf, sizeof(buf), hr->last_bad_authentication_usec));
+ }
+
+ t = user_record_ratelimit_next_try(hr);
+ if (t != USEC_INFINITY) {
+ usec_t n = now(CLOCK_REALTIME);
+
+ if (t <= n)
+ printf(" Next Try: anytime\n");
+ else {
+ char buf[FORMAT_TIMESPAN_MAX];
+ printf(" Next Try: %sin %s%s\n",
+ ansi_highlight_red(),
+ format_timespan(buf, sizeof(buf), t - n, USEC_PER_SEC),
+ ansi_normal());
+ }
+ }
+
+ if (storage != USER_CLASSIC) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ printf(" Auth. Limit: %" PRIu64 " attempts per %s\n", user_record_ratelimit_burst(hr),
+ format_timespan(buf, sizeof(buf), user_record_ratelimit_interval_usec(hr), 0));
+ }
+
+ if (hr->enforce_password_policy >= 0)
+ printf(" Passwd Pol.: %s\n", yes_no(hr->enforce_password_policy));
+
+ if (hr->password_change_min_usec != UINT64_MAX ||
+ hr->password_change_max_usec != UINT64_MAX ||
+ hr->password_change_warn_usec != UINT64_MAX ||
+ hr->password_change_inactive_usec != UINT64_MAX) {
+
+ char buf[FORMAT_TIMESPAN_MAX];
+ printf(" Passwd Chg.:");
+
+ if (hr->password_change_min_usec != UINT64_MAX) {
+ printf(" min %s", format_timespan(buf, sizeof(buf), hr->password_change_min_usec, 0));
+
+ if (hr->password_change_max_usec != UINT64_MAX)
+ printf(" …");
+ }
+
+ if (hr->password_change_max_usec != UINT64_MAX)
+ printf(" max %s", format_timespan(buf, sizeof(buf), hr->password_change_max_usec, 0));
+
+ if (hr->password_change_warn_usec != UINT64_MAX)
+ printf("/warn %s", format_timespan(buf, sizeof(buf), hr->password_change_warn_usec, 0));
+
+ if (hr->password_change_inactive_usec != UINT64_MAX)
+ printf("/inactive %s", format_timespan(buf, sizeof(buf), hr->password_change_inactive_usec, 0));
+
+ printf("\n");
+ }
+
+ if (hr->password_change_now >= 0)
+ printf("Pas. Ch. Now: %s\n", yes_no(hr->password_change_now));
+
+ if (!strv_isempty(hr->ssh_authorized_keys))
+ printf("SSH Pub. Key: %zu\n", strv_length(hr->ssh_authorized_keys));
+
+ if (!strv_isempty(hr->pkcs11_token_uri)) {
+ char **i;
+
+ STRV_FOREACH(i, hr->pkcs11_token_uri)
+ printf(i == hr->pkcs11_token_uri ?
+ "PKCS11 Token: %s\n" :
+ " %s\n", *i);
+ }
+
+ if (hr->n_fido2_hmac_credential > 0)
+ printf(" FIDO2 Token: %zu\n", hr->n_fido2_hmac_credential);
+
+ if (!strv_isempty(hr->recovery_key_type))
+ printf("Recovery Key: %zu\n", strv_length(hr->recovery_key_type));
+
+ k = strv_length(hr->hashed_password);
+ if (k == 0)
+ printf(" Passwords: %snone%s\n",
+ user_record_disposition(hr) == USER_REGULAR ? ansi_highlight_yellow() : ansi_normal(), ansi_normal());
+ else
+ printf(" Passwords: %zu\n", k);
+
+ if (hr->signed_locally >= 0)
+ printf(" Local Sig.: %s\n", yes_no(hr->signed_locally));
+
+ if (hr->stop_delay_usec != UINT64_MAX) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ printf(" Stop Delay: %s\n", format_timespan(buf, sizeof(buf), hr->stop_delay_usec, 0));
+ }
+
+ if (hr->auto_login >= 0)
+ printf("Autom. Login: %s\n", yes_no(hr->auto_login));
+
+ if (hr->kill_processes >= 0)
+ printf(" Kill Proc.: %s\n", yes_no(hr->kill_processes));
+
+ if (hr->service)
+ printf(" Service: %s\n", hr->service);
+}
+
+void group_record_show(GroupRecord *gr, bool show_full_user_info) {
+ int r;
+
+ printf(" Group name: %s\n",
+ group_record_group_name_and_realm(gr));
+
+ printf(" Disposition: %s\n", user_disposition_to_string(group_record_disposition(gr)));
+
+ if (gr->last_change_usec != USEC_INFINITY) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+ printf(" Last Change: %s\n", format_timestamp(buf, sizeof(buf), gr->last_change_usec));
+ }
+
+ if (gid_is_valid(gr->gid))
+ printf(" GID: " GID_FMT "\n", gr->gid);
+
+ if (show_full_user_info) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+
+ r = membershipdb_by_group(gr->group_name, 0, &iterator);
+ if (r < 0) {
+ errno = -r;
+ printf(" Members: (can't acquire: %m)");
+ } else {
+ const char *prefix = " Members:";
+
+ for (;;) {
+ _cleanup_free_ char *user = NULL;
+
+ r = membershipdb_iterator_get(iterator, &user, NULL);
+ if (r == -ESRCH)
+ break;
+ if (r < 0) {
+ errno = -r;
+ printf("%s (can't iterate: %m\n", prefix);
+ break;
+ }
+
+ printf("%s %s\n", prefix, user);
+ prefix = " ";
+ }
+ }
+ } else {
+ const char *prefix = " Members:";
+ char **i;
+
+ STRV_FOREACH(i, gr->members) {
+ printf("%s %s\n", prefix, *i);
+ prefix = " ";
+ }
+ }
+
+ if (!strv_isempty(gr->administrators)) {
+ const char *prefix = " Admins:";
+ char **i;
+
+ STRV_FOREACH(i, gr->administrators) {
+ printf("%s %s\n", prefix, *i);
+ prefix = " ";
+ }
+ }
+
+ if (gr->description && !streq(gr->description, gr->group_name))
+ printf(" Description: %s\n", gr->description);
+
+ if (!strv_isempty(gr->hashed_password))
+ printf(" Passwords: %zu\n", strv_length(gr->hashed_password));
+
+ if (gr->service)
+ printf(" Service: %s\n", gr->service);
+}
diff --git a/src/shared/user-record-show.h b/src/shared/user-record-show.h
new file mode 100644
index 0000000..dcef065
--- /dev/null
+++ b/src/shared/user-record-show.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "user-record.h"
+#include "group-record.h"
+
+const char *user_record_state_color(const char *state);
+
+void user_record_show(UserRecord *hr, bool show_full_group_info);
+void group_record_show(GroupRecord *gr, bool show_full_user_info);
diff --git a/src/shared/user-record.c b/src/shared/user-record.c
new file mode 100644
index 0000000..6c48c56
--- /dev/null
+++ b/src/shared/user-record.c
@@ -0,0 +1,2272 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/mount.h>
+
+#include "cgroup-util.h"
+#include "dns-domain.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "memory-util.h"
+#include "path-util.h"
+#include "pkcs11-util.h"
+#include "rlimit-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "strv.h"
+#include "user-record.h"
+#include "user-util.h"
+
+#define DEFAULT_RATELIMIT_BURST 30
+#define DEFAULT_RATELIMIT_INTERVAL_USEC (1*USEC_PER_MINUTE)
+
+#if ENABLE_COMPAT_MUTABLE_UID_BOUNDARIES
+static int parse_alloc_uid(const char *path, const char *name, const char *t, uid_t *ret_uid) {
+ uid_t uid;
+ int r;
+
+ r = parse_uid(t, &uid);
+ if (r < 0)
+ return log_debug_errno(r, "%s: failed to parse %s %s, ignoring: %m", path, name, t);
+ if (uid == 0)
+ uid = 1;
+
+ *ret_uid = uid;
+ return 0;
+}
+#endif
+
+int read_login_defs(UGIDAllocationRange *ret_defs, const char *path, const char *root) {
+ UGIDAllocationRange defs = {
+ .system_alloc_uid_min = SYSTEM_ALLOC_UID_MIN,
+ .system_uid_max = SYSTEM_UID_MAX,
+ .system_alloc_gid_min = SYSTEM_ALLOC_GID_MIN,
+ .system_gid_max = SYSTEM_GID_MAX,
+ };
+
+#if ENABLE_COMPAT_MUTABLE_UID_BOUNDARIES
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ if (!path)
+ path = "/etc/login.defs";
+
+ r = chase_symlinks_and_fopen_unlocked(path, root, CHASE_PREFIX_ROOT, "re", &f, NULL);
+ if (r == -ENOENT)
+ goto assign;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to open %s: %m", path);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *t;
+
+ r = read_line(f, LINE_MAX, &line);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read %s: %m", path);
+ if (r == 0)
+ break;
+
+ if ((t = first_word(line, "SYS_UID_MIN")))
+ (void) parse_alloc_uid(path, "SYS_UID_MIN", t, &defs.system_alloc_uid_min);
+ else if ((t = first_word(line, "SYS_UID_MAX")))
+ (void) parse_alloc_uid(path, "SYS_UID_MAX", t, &defs.system_uid_max);
+ else if ((t = first_word(line, "SYS_GID_MIN")))
+ (void) parse_alloc_uid(path, "SYS_GID_MIN", t, &defs.system_alloc_gid_min);
+ else if ((t = first_word(line, "SYS_GID_MAX")))
+ (void) parse_alloc_uid(path, "SYS_GID_MAX", t, &defs.system_gid_max);
+ }
+
+ assign:
+ if (defs.system_alloc_uid_min > defs.system_uid_max) {
+ log_debug("%s: SYS_UID_MIN > SYS_UID_MAX, resetting.", path);
+ defs.system_alloc_uid_min = MIN(defs.system_uid_max - 1, (uid_t) SYSTEM_ALLOC_UID_MIN);
+ /* Look at sys_uid_max to make sure sys_uid_min..sys_uid_max remains a valid range. */
+ }
+ if (defs.system_alloc_gid_min > defs.system_gid_max) {
+ log_debug("%s: SYS_GID_MIN > SYS_GID_MAX, resetting.", path);
+ defs.system_alloc_gid_min = MIN(defs.system_gid_max - 1, (gid_t) SYSTEM_ALLOC_GID_MIN);
+ /* Look at sys_gid_max to make sure sys_gid_min..sys_gid_max remains a valid range. */
+ }
+#endif
+
+ *ret_defs = defs;
+ return 0;
+}
+
+const UGIDAllocationRange *acquire_ugid_allocation_range(void) {
+#if ENABLE_COMPAT_MUTABLE_UID_BOUNDARIES
+ static thread_local UGIDAllocationRange defs = {
+#else
+ static const UGIDAllocationRange defs = {
+#endif
+ .system_alloc_uid_min = SYSTEM_ALLOC_UID_MIN,
+ .system_uid_max = SYSTEM_UID_MAX,
+ .system_alloc_gid_min = SYSTEM_ALLOC_GID_MIN,
+ .system_gid_max = SYSTEM_GID_MAX,
+ };
+
+#if ENABLE_COMPAT_MUTABLE_UID_BOUNDARIES
+ /* This function will ignore failure to read the file, so it should only be called from places where
+ * we don't crucially depend on the answer. In other words, it's appropriate for journald, but
+ * probably not for sysusers. */
+
+ static thread_local bool initialized = false;
+
+ if (!initialized) {
+ (void) read_login_defs(&defs, NULL, NULL);
+ initialized = true;
+ }
+#endif
+
+ return &defs;
+}
+
+bool uid_is_system(uid_t uid) {
+ const UGIDAllocationRange *defs;
+ assert_se(defs = acquire_ugid_allocation_range());
+
+ return uid <= defs->system_uid_max;
+}
+
+bool gid_is_system(gid_t gid) {
+ const UGIDAllocationRange *defs;
+ assert_se(defs = acquire_ugid_allocation_range());
+
+ return gid <= defs->system_gid_max;
+}
+
+UserRecord* user_record_new(void) {
+ UserRecord *h;
+
+ h = new(UserRecord, 1);
+ if (!h)
+ return NULL;
+
+ *h = (UserRecord) {
+ .n_ref = 1,
+ .disposition = _USER_DISPOSITION_INVALID,
+ .last_change_usec = UINT64_MAX,
+ .last_password_change_usec = UINT64_MAX,
+ .umask = MODE_INVALID,
+ .nice_level = INT_MAX,
+ .not_before_usec = UINT64_MAX,
+ .not_after_usec = UINT64_MAX,
+ .locked = -1,
+ .storage = _USER_STORAGE_INVALID,
+ .access_mode = MODE_INVALID,
+ .disk_size = UINT64_MAX,
+ .disk_size_relative = UINT64_MAX,
+ .tasks_max = UINT64_MAX,
+ .memory_high = UINT64_MAX,
+ .memory_max = UINT64_MAX,
+ .cpu_weight = UINT64_MAX,
+ .io_weight = UINT64_MAX,
+ .uid = UID_INVALID,
+ .gid = GID_INVALID,
+ .nodev = true,
+ .nosuid = true,
+ .luks_discard = -1,
+ .luks_offline_discard = -1,
+ .luks_volume_key_size = UINT64_MAX,
+ .luks_pbkdf_time_cost_usec = UINT64_MAX,
+ .luks_pbkdf_memory_cost = UINT64_MAX,
+ .luks_pbkdf_parallel_threads = UINT64_MAX,
+ .disk_usage = UINT64_MAX,
+ .disk_free = UINT64_MAX,
+ .disk_ceiling = UINT64_MAX,
+ .disk_floor = UINT64_MAX,
+ .signed_locally = -1,
+ .good_authentication_counter = UINT64_MAX,
+ .bad_authentication_counter = UINT64_MAX,
+ .last_good_authentication_usec = UINT64_MAX,
+ .last_bad_authentication_usec = UINT64_MAX,
+ .ratelimit_begin_usec = UINT64_MAX,
+ .ratelimit_count = UINT64_MAX,
+ .ratelimit_interval_usec = UINT64_MAX,
+ .ratelimit_burst = UINT64_MAX,
+ .removable = -1,
+ .enforce_password_policy = -1,
+ .auto_login = -1,
+ .stop_delay_usec = UINT64_MAX,
+ .kill_processes = -1,
+ .password_change_min_usec = UINT64_MAX,
+ .password_change_max_usec = UINT64_MAX,
+ .password_change_warn_usec = UINT64_MAX,
+ .password_change_inactive_usec = UINT64_MAX,
+ .password_change_now = -1,
+ .pkcs11_protected_authentication_path_permitted = -1,
+ .fido2_user_presence_permitted = -1,
+ };
+
+ return h;
+}
+
+static void pkcs11_encrypted_key_done(Pkcs11EncryptedKey *k) {
+ if (!k)
+ return;
+
+ free(k->uri);
+ erase_and_free(k->data);
+ erase_and_free(k->hashed_password);
+}
+
+static void fido2_hmac_credential_done(Fido2HmacCredential *c) {
+ if (!c)
+ return;
+
+ free(c->id);
+}
+
+static void fido2_hmac_salt_done(Fido2HmacSalt *s) {
+ if (!s)
+ return;
+
+ fido2_hmac_credential_done(&s->credential);
+ erase_and_free(s->salt);
+ erase_and_free(s->hashed_password);
+}
+
+static void recovery_key_done(RecoveryKey *k) {
+ if (!k)
+ return;
+
+ free(k->type);
+ erase_and_free(k->hashed_password);
+}
+
+static UserRecord* user_record_free(UserRecord *h) {
+ if (!h)
+ return NULL;
+
+ free(h->user_name);
+ free(h->realm);
+ free(h->user_name_and_realm_auto);
+ free(h->real_name);
+ free(h->email_address);
+ erase_and_free(h->password_hint);
+ free(h->location);
+ free(h->icon_name);
+
+ free(h->shell);
+
+ strv_free(h->environment);
+ free(h->time_zone);
+ free(h->preferred_language);
+ rlimit_free_all(h->rlimits);
+
+ free(h->skeleton_directory);
+
+ strv_free_erase(h->hashed_password);
+ strv_free_erase(h->ssh_authorized_keys);
+ strv_free_erase(h->password);
+ strv_free_erase(h->token_pin);
+
+ free(h->cifs_service);
+ free(h->cifs_user_name);
+ free(h->cifs_domain);
+
+ free(h->image_path);
+ free(h->image_path_auto);
+ free(h->home_directory);
+ free(h->home_directory_auto);
+
+ strv_free(h->member_of);
+
+ free(h->file_system_type);
+ free(h->luks_cipher);
+ free(h->luks_cipher_mode);
+ free(h->luks_pbkdf_hash_algorithm);
+ free(h->luks_pbkdf_type);
+
+ free(h->state);
+ free(h->service);
+
+ strv_free(h->pkcs11_token_uri);
+ for (size_t i = 0; i < h->n_pkcs11_encrypted_key; i++)
+ pkcs11_encrypted_key_done(h->pkcs11_encrypted_key + i);
+ free(h->pkcs11_encrypted_key);
+
+ for (size_t i = 0; i < h->n_fido2_hmac_credential; i++)
+ fido2_hmac_credential_done(h->fido2_hmac_credential + i);
+ for (size_t i = 0; i < h->n_fido2_hmac_salt; i++)
+ fido2_hmac_salt_done(h->fido2_hmac_salt + i);
+
+ strv_free(h->recovery_key_type);
+ for (size_t i = 0; i < h->n_recovery_key; i++)
+ recovery_key_done(h->recovery_key + i);
+
+ json_variant_unref(h->json);
+
+ return mfree(h);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(UserRecord, user_record, user_record_free);
+
+int json_dispatch_realm(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ const char *n;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ n = json_variant_string(variant);
+ r = dns_name_is_valid(n);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to check if JSON field '%s' is a valid DNS domain.", strna(name));
+ if (r == 0)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid DNS domain.", strna(name));
+
+ r = free_and_strdup(s, n);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to allocate string: %m");
+
+ return 0;
+}
+
+int json_dispatch_gecos(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ const char *n;
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ n = json_variant_string(variant);
+ if (valid_gecos(n)) {
+ if (free_and_strdup(s, n) < 0)
+ return json_log_oom(variant, flags);
+ } else {
+ _cleanup_free_ char *m = NULL;
+
+ json_log(variant, flags|JSON_DEBUG, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid GECOS compatible string, mangling.", strna(name));
+
+ m = mangle_gecos(n);
+ if (!m)
+ return json_log_oom(variant, flags);
+
+ free_and_replace(*s, m);
+ }
+
+ return 0;
+}
+
+static int json_dispatch_nice(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ int *nl = userdata;
+ intmax_t m;
+
+ if (json_variant_is_null(variant)) {
+ *nl = INT_MAX;
+ return 0;
+ }
+
+ if (!json_variant_is_integer(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ m = json_variant_integer(variant);
+ if (m < PRIO_MIN || m >= PRIO_MAX)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "JSON field '%s' is not a valid nice level.", strna(name));
+
+ *nl = m;
+ return 0;
+}
+
+static int json_dispatch_rlimit_value(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ rlim_t *ret = userdata;
+
+ if (json_variant_is_null(variant))
+ *ret = RLIM_INFINITY;
+ else if (json_variant_is_unsigned(variant)) {
+ uintmax_t w;
+
+ w = json_variant_unsigned(variant);
+ if (w == RLIM_INFINITY || (uintmax_t) w != json_variant_unsigned(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "Resource limit value '%s' is out of range.", name);
+
+ *ret = (rlim_t) w;
+ } else
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Resource limit value '%s' is not an unsigned integer.", name);
+
+ return 0;
+}
+
+static int json_dispatch_rlimits(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ struct rlimit** limits = userdata;
+ JsonVariant *value;
+ const char *key;
+ int r;
+
+ assert_se(limits);
+
+ if (json_variant_is_null(variant)) {
+ rlimit_free_all(limits);
+ return 0;
+ }
+
+ if (!json_variant_is_object(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an object.", strna(name));
+
+ JSON_VARIANT_OBJECT_FOREACH(key, value, variant) {
+ JsonVariant *jcur, *jmax;
+ struct rlimit rl;
+ const char *p;
+ int l;
+
+ p = startswith(key, "RLIMIT_");
+ if (!p)
+ l = -1;
+ else
+ l = rlimit_from_string(p);
+ if (l < 0)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Resource limit '%s' not known.", key);
+
+ if (!json_variant_is_object(value))
+ return json_log(value, flags, SYNTHETIC_ERRNO(EINVAL), "Resource limit '%s' has invalid value.", key);
+
+ if (json_variant_elements(value) != 4)
+ return json_log(value, flags, SYNTHETIC_ERRNO(EINVAL), "Resource limit '%s' value is does not have two fields as expected.", key);
+
+ jcur = json_variant_by_key(value, "cur");
+ if (!jcur)
+ return json_log(value, flags, SYNTHETIC_ERRNO(EINVAL), "Resource limit '%s' lacks 'cur' field.", key);
+ r = json_dispatch_rlimit_value("cur", jcur, flags, &rl.rlim_cur);
+ if (r < 0)
+ return r;
+
+ jmax = json_variant_by_key(value, "max");
+ if (!jmax)
+ return json_log(value, flags, SYNTHETIC_ERRNO(EINVAL), "Resource limit '%s' lacks 'max' field.", key);
+ r = json_dispatch_rlimit_value("max", jmax, flags, &rl.rlim_max);
+ if (r < 0)
+ return r;
+
+ if (limits[l])
+ *(limits[l]) = rl;
+ else {
+ limits[l] = newdup(struct rlimit, &rl, 1);
+ if (!limits[l])
+ return log_oom();
+ }
+ }
+
+ return 0;
+}
+
+static int json_dispatch_filename_or_path(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ const char *n;
+ int r;
+
+ assert(s);
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ n = json_variant_string(variant);
+ if (!filename_is_valid(n) && !path_is_normalized(n))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid file name or normalized path.", strna(name));
+
+ r = free_and_strdup(s, n);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to allocate string: %m");
+
+ return 0;
+}
+
+static int json_dispatch_path(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ const char *n;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ n = json_variant_string(variant);
+ if (!path_is_normalized(n))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a normalized file system path.", strna(name));
+ if (!path_is_absolute(n))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an absolute file system path.", strna(name));
+
+ r = free_and_strdup(s, n);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to allocate string: %m");
+
+ return 0;
+}
+
+static int json_dispatch_home_directory(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ const char *n;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ n = json_variant_string(variant);
+ if (!valid_home(n))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid home directory path.", strna(name));
+
+ r = free_and_strdup(s, n);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to allocate string: %m");
+
+ return 0;
+}
+
+static int json_dispatch_image_path(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ const char *n;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ n = json_variant_string(variant);
+ if (empty_or_root(n) || !path_is_valid(n) || !path_is_absolute(n))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid image path.", strna(name));
+
+ r = free_and_strdup(s, n);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to allocate string: %m");
+
+ return 0;
+}
+
+static int json_dispatch_umask(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ mode_t *m = userdata;
+ uintmax_t k;
+
+ if (json_variant_is_null(variant)) {
+ *m = (mode_t) -1;
+ return 0;
+ }
+
+ if (!json_variant_is_unsigned(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a number.", strna(name));
+
+ k = json_variant_unsigned(variant);
+ if (k > 0777)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' outside of valid range 0…0777.", strna(name));
+
+ *m = (mode_t) k;
+ return 0;
+}
+
+static int json_dispatch_access_mode(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ mode_t *m = userdata;
+ uintmax_t k;
+
+ if (json_variant_is_null(variant)) {
+ *m = (mode_t) -1;
+ return 0;
+ }
+
+ if (!json_variant_is_unsigned(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a number.", strna(name));
+
+ k = json_variant_unsigned(variant);
+ if (k > 07777)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' outside of valid range 0…07777.", strna(name));
+
+ *m = (mode_t) k;
+ return 0;
+}
+
+static int json_dispatch_environment(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ _cleanup_strv_free_ char **n = NULL;
+ char ***l = userdata;
+ size_t i;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ *l = strv_free(*l);
+ return 0;
+ }
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array.", strna(name));
+
+ for (i = 0; i < json_variant_elements(variant); i++) {
+ _cleanup_free_ char *c = NULL;
+ JsonVariant *e;
+ const char *a;
+
+ e = json_variant_by_index(variant, i);
+ if (!json_variant_is_string(e))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of strings.", strna(name));
+
+ assert_se(a = json_variant_string(e));
+
+ if (!env_assignment_is_valid(a))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of environment variables.", strna(name));
+
+ c = strdup(a);
+ if (!c)
+ return json_log_oom(variant, flags);
+
+ r = strv_env_replace(&n, c);
+ if (r < 0)
+ return json_log_oom(variant, flags);
+
+ c = NULL;
+ }
+
+ strv_free_and_replace(*l, n);
+ return 0;
+}
+
+int json_dispatch_user_disposition(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ UserDisposition *disposition = userdata, k;
+
+ if (json_variant_is_null(variant)) {
+ *disposition = _USER_DISPOSITION_INVALID;
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ k = user_disposition_from_string(json_variant_string(variant));
+ if (k < 0)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Disposition type '%s' not known.", json_variant_string(variant));
+
+ *disposition = k;
+ return 0;
+}
+
+static int json_dispatch_storage(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ UserStorage *storage = userdata, k;
+
+ if (json_variant_is_null(variant)) {
+ *storage = _USER_STORAGE_INVALID;
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ k = user_storage_from_string(json_variant_string(variant));
+ if (k < 0)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Storage type '%s' not known.", json_variant_string(variant));
+
+ *storage = k;
+ return 0;
+}
+
+static int json_dispatch_disk_size(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ uint64_t *size = userdata;
+ uintmax_t k;
+
+ if (json_variant_is_null(variant)) {
+ *size = UINT64_MAX;
+ return 0;
+ }
+
+ if (!json_variant_is_unsigned(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an integer.", strna(name));
+
+ k = json_variant_unsigned(variant);
+ if (k < USER_DISK_SIZE_MIN || k > USER_DISK_SIZE_MAX)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "JSON field '%s' is not in valid range %" PRIu64 "…%" PRIu64 ".", strna(name), USER_DISK_SIZE_MIN, USER_DISK_SIZE_MAX);
+
+ *size = k;
+ return 0;
+}
+
+static int json_dispatch_tasks_or_memory_max(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ uint64_t *limit = userdata;
+ uintmax_t k;
+
+ if (json_variant_is_null(variant)) {
+ *limit = UINT64_MAX;
+ return 0;
+ }
+
+ if (!json_variant_is_unsigned(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a integer.", strna(name));
+
+ k = json_variant_unsigned(variant);
+ if (k <= 0 || k >= UINT64_MAX)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "JSON field '%s' is not in valid range %" PRIu64 "…%" PRIu64 ".", strna(name), (uint64_t) 1, UINT64_MAX-1);
+
+ *limit = k;
+ return 0;
+}
+
+static int json_dispatch_weight(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ uint64_t *weight = userdata;
+ uintmax_t k;
+
+ if (json_variant_is_null(variant)) {
+ *weight = UINT64_MAX;
+ return 0;
+ }
+
+ if (!json_variant_is_unsigned(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a integer.", strna(name));
+
+ k = json_variant_unsigned(variant);
+ if (k <= CGROUP_WEIGHT_MIN || k >= CGROUP_WEIGHT_MAX)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "JSON field '%s' is not in valid range %" PRIu64 "…%" PRIu64 ".", strna(name), (uint64_t) CGROUP_WEIGHT_MIN, (uint64_t) CGROUP_WEIGHT_MAX);
+
+ *weight = k;
+ return 0;
+}
+
+int json_dispatch_user_group_list(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ _cleanup_strv_free_ char **l = NULL;
+ char ***list = userdata;
+ JsonVariant *e;
+ int r;
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of strings.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(e, variant) {
+
+ if (!json_variant_is_string(e))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not a string.");
+
+ if (!valid_user_group_name(json_variant_string(e), FLAGS_SET(flags, JSON_RELAX) ? VALID_USER_RELAX : 0))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not a valid user/group name: %s", json_variant_string(e));
+
+ r = strv_extend(&l, json_variant_string(e));
+ if (r < 0)
+ return json_log(e, flags, r, "Failed to append array element: %m");
+ }
+
+ r = strv_extend_strv(list, l, true);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to merge user/group arrays: %m");
+
+ return 0;
+}
+
+static int dispatch_secret(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch secret_dispatch_table[] = {
+ { "password", _JSON_VARIANT_TYPE_INVALID, json_dispatch_strv, offsetof(UserRecord, password), 0 },
+ { "tokenPin", _JSON_VARIANT_TYPE_INVALID, json_dispatch_strv, offsetof(UserRecord, token_pin), 0 },
+ { "pkcs11Pin", /* legacy alias */ _JSON_VARIANT_TYPE_INVALID, json_dispatch_strv, offsetof(UserRecord, token_pin), 0 },
+ { "pkcs11ProtectedAuthenticationPathPermitted", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, pkcs11_protected_authentication_path_permitted), 0 },
+ { "fido2UserPresencePermitted", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, fido2_user_presence_permitted), 0 },
+ {},
+ };
+
+ return json_dispatch(variant, secret_dispatch_table, NULL, flags, userdata);
+}
+
+static int dispatch_pkcs11_uri(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ const char *n;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ n = json_variant_string(variant);
+ if (!pkcs11_uri_valid(n))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid RFC7512 PKCS#11 URI.", strna(name));
+
+ r = free_and_strdup(s, n);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to allocate string: %m");
+
+ return 0;
+}
+
+static int dispatch_pkcs11_uri_array(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ _cleanup_strv_free_ char **z = NULL;
+ char ***l = userdata;
+ JsonVariant *e;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ *l = strv_free(*l);
+ return 0;
+ }
+
+ if (json_variant_is_string(variant)) {
+ const char *n;
+
+ n = json_variant_string(variant);
+ if (!pkcs11_uri_valid(n))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid RFC7512 PKCS#11 URI.", strna(name));
+
+ z = strv_new(n);
+ if (!z)
+ return log_oom();
+
+ } else {
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string or array of strings.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(e, variant) {
+ const char *n;
+
+ if (!json_variant_is_string(e))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not a string.");
+
+ n = json_variant_string(e);
+ if (!pkcs11_uri_valid(n))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element in '%s' is not a valid RFC7512 PKCS#11 URI: %s", strna(name), n);
+
+ r = strv_extend(&z, n);
+ if (r < 0)
+ return log_oom();
+ }
+ }
+
+ strv_free_and_replace(*l, z);
+ return 0;
+}
+
+static int dispatch_pkcs11_key_data(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ Pkcs11EncryptedKey *k = userdata;
+ size_t l;
+ void *b;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ k->data = erase_and_free(k->data);
+ k->size = 0;
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ r = unbase64mem(json_variant_string(variant), (size_t) -1, &b, &l);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to decode encrypted PKCS#11 key: %m");
+
+ erase_and_free(k->data);
+ k->data = b;
+ k->size = l;
+
+ return 0;
+}
+
+static int dispatch_pkcs11_key(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ UserRecord *h = userdata;
+ JsonVariant *e;
+ int r;
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of objects.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(e, variant) {
+ Pkcs11EncryptedKey *array, *k;
+
+ static const JsonDispatch pkcs11_key_dispatch_table[] = {
+ { "uri", JSON_VARIANT_STRING, dispatch_pkcs11_uri, offsetof(Pkcs11EncryptedKey, uri), JSON_MANDATORY },
+ { "data", JSON_VARIANT_STRING, dispatch_pkcs11_key_data, 0, JSON_MANDATORY },
+ { "hashedPassword", JSON_VARIANT_STRING, json_dispatch_string, offsetof(Pkcs11EncryptedKey, hashed_password), JSON_MANDATORY },
+ {},
+ };
+
+ if (!json_variant_is_object(e))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not an object.");
+
+ array = reallocarray(h->pkcs11_encrypted_key, h->n_pkcs11_encrypted_key + 1, sizeof(Pkcs11EncryptedKey));
+ if (!array)
+ return log_oom();
+
+ h->pkcs11_encrypted_key = array;
+ k = h->pkcs11_encrypted_key + h->n_pkcs11_encrypted_key;
+ *k = (Pkcs11EncryptedKey) {};
+
+ r = json_dispatch(e, pkcs11_key_dispatch_table, NULL, flags, k);
+ if (r < 0) {
+ pkcs11_encrypted_key_done(k);
+ return r;
+ }
+
+ h->n_pkcs11_encrypted_key++;
+ }
+
+ return 0;
+}
+
+static int dispatch_fido2_hmac_credential(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ Fido2HmacCredential *k = userdata;
+ size_t l;
+ void *b;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ k->id = mfree(k->id);
+ k->size = 0;
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ r = unbase64mem(json_variant_string(variant), (size_t) -1, &b, &l);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to decode FIDO2 credential ID: %m");
+
+ free_and_replace(k->id, b);
+ k->size = l;
+
+ return 0;
+}
+
+static int dispatch_fido2_hmac_credential_array(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ UserRecord *h = userdata;
+ JsonVariant *e;
+ int r;
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of strings.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(e, variant) {
+ Fido2HmacCredential *array;
+ size_t l;
+ void *b;
+
+ if (!json_variant_is_string(e))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not a string.");
+
+ array = reallocarray(h->fido2_hmac_credential, h->n_fido2_hmac_credential + 1, sizeof(Fido2HmacCredential));
+ if (!array)
+ return log_oom();
+
+ r = unbase64mem(json_variant_string(e), (size_t) -1, &b, &l);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to decode FIDO2 credential ID: %m");
+
+ h->fido2_hmac_credential = array;
+
+ h->fido2_hmac_credential[h->n_fido2_hmac_credential++] = (Fido2HmacCredential) {
+ .id = b,
+ .size = l,
+ };
+ }
+
+ return 0;
+}
+
+static int dispatch_fido2_hmac_salt_value(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ Fido2HmacSalt *k = userdata;
+ size_t l;
+ void *b;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ k->salt = erase_and_free(k->salt);
+ k->salt_size = 0;
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ r = unbase64mem(json_variant_string(variant), (size_t) -1, &b, &l);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to decode FIDO2 salt: %m");
+
+ erase_and_free(k->salt);
+ k->salt = b;
+ k->salt_size = l;
+
+ return 0;
+}
+
+static int dispatch_fido2_hmac_salt(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ UserRecord *h = userdata;
+ JsonVariant *e;
+ int r;
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of objects.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(e, variant) {
+ Fido2HmacSalt *array, *k;
+
+ static const JsonDispatch fido2_hmac_salt_dispatch_table[] = {
+ { "credential", JSON_VARIANT_STRING, dispatch_fido2_hmac_credential, offsetof(Fido2HmacSalt, credential), JSON_MANDATORY },
+ { "salt", JSON_VARIANT_STRING, dispatch_fido2_hmac_salt_value, 0, JSON_MANDATORY },
+ { "hashedPassword", JSON_VARIANT_STRING, json_dispatch_string, offsetof(Fido2HmacSalt, hashed_password), JSON_MANDATORY },
+ {},
+ };
+
+ if (!json_variant_is_object(e))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not an object.");
+
+ array = reallocarray(h->fido2_hmac_salt, h->n_fido2_hmac_salt + 1, sizeof(Fido2HmacSalt));
+ if (!array)
+ return log_oom();
+
+ h->fido2_hmac_salt = array;
+ k = h->fido2_hmac_salt + h->n_fido2_hmac_salt;
+ *k = (Fido2HmacSalt) {};
+
+ r = json_dispatch(e, fido2_hmac_salt_dispatch_table, NULL, flags, k);
+ if (r < 0) {
+ fido2_hmac_salt_done(k);
+ return r;
+ }
+
+ h->n_fido2_hmac_salt++;
+ }
+
+ return 0;
+}
+
+static int dispatch_recovery_key(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ UserRecord *h = userdata;
+ JsonVariant *e;
+ int r;
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of objects.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(e, variant) {
+ RecoveryKey *array, *k;
+
+ static const JsonDispatch recovery_key_dispatch_table[] = {
+ { "type", JSON_VARIANT_STRING, json_dispatch_string, 0, JSON_MANDATORY },
+ { "hashedPassword", JSON_VARIANT_STRING, json_dispatch_string, offsetof(RecoveryKey, hashed_password), JSON_MANDATORY },
+ {},
+ };
+
+ if (!json_variant_is_object(e))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not an object.");
+
+ array = reallocarray(h->recovery_key, h->n_recovery_key + 1, sizeof(RecoveryKey));
+ if (!array)
+ return log_oom();
+
+ h->recovery_key = array;
+ k = h->recovery_key + h->n_recovery_key;
+ *k = (RecoveryKey) {};
+
+ r = json_dispatch(e, recovery_key_dispatch_table, NULL, flags, k);
+ if (r < 0) {
+ recovery_key_done(k);
+ return r;
+ }
+
+ h->n_recovery_key++;
+ }
+
+ return 0;
+}
+
+static int dispatch_privileged(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch privileged_dispatch_table[] = {
+ { "passwordHint", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, password_hint), 0 },
+ { "hashedPassword", _JSON_VARIANT_TYPE_INVALID, json_dispatch_strv, offsetof(UserRecord, hashed_password), JSON_SAFE },
+ { "sshAuthorizedKeys", _JSON_VARIANT_TYPE_INVALID, json_dispatch_strv, offsetof(UserRecord, ssh_authorized_keys), 0 },
+ { "pkcs11EncryptedKey", JSON_VARIANT_ARRAY, dispatch_pkcs11_key, 0, 0 },
+ { "fido2HmacSalt", JSON_VARIANT_ARRAY, dispatch_fido2_hmac_salt, 0, 0 },
+ { "recoveryKey", JSON_VARIANT_ARRAY, dispatch_recovery_key, 0, 0 },
+ {},
+ };
+
+ return json_dispatch(variant, privileged_dispatch_table, NULL, flags, userdata);
+}
+
+static int dispatch_binding(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch binding_dispatch_table[] = {
+ { "imagePath", JSON_VARIANT_STRING, json_dispatch_image_path, offsetof(UserRecord, image_path), 0 },
+ { "homeDirectory", JSON_VARIANT_STRING, json_dispatch_home_directory, offsetof(UserRecord, home_directory), 0 },
+ { "partitionUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, partition_uuid), 0 },
+ { "luksUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, luks_uuid), 0 },
+ { "fileSystemUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, file_system_uuid), 0 },
+ { "uid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(UserRecord, uid), 0 },
+ { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(UserRecord, gid), 0 },
+ { "storage", JSON_VARIANT_STRING, json_dispatch_storage, offsetof(UserRecord, storage), 0 },
+ { "fileSystemType", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, file_system_type), JSON_SAFE },
+ { "luksCipher", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_cipher), JSON_SAFE },
+ { "luksCipherMode", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_cipher_mode), JSON_SAFE },
+ { "luksVolumeKeySize", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_volume_key_size), 0 },
+ {},
+ };
+
+ char smid[SD_ID128_STRING_MAX];
+ JsonVariant *m;
+ sd_id128_t mid;
+ int r;
+
+ if (!variant)
+ return 0;
+
+ if (!json_variant_is_object(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an object.", strna(name));
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to determine machine ID: %m");
+
+ m = json_variant_by_key(variant, sd_id128_to_string(mid, smid));
+ if (!m)
+ return 0;
+
+ return json_dispatch(m, binding_dispatch_table, NULL, flags, userdata);
+}
+
+int per_machine_id_match(JsonVariant *ids, JsonDispatchFlags flags) {
+ sd_id128_t mid;
+ int r;
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return json_log(ids, flags, r, "Failed to acquire machine ID: %m");
+
+ if (json_variant_is_string(ids)) {
+ sd_id128_t k;
+
+ r = sd_id128_from_string(json_variant_string(ids), &k);
+ if (r < 0) {
+ json_log(ids, flags, r, "%s is not a valid machine ID, ignoring: %m", json_variant_string(ids));
+ return 0;
+ }
+
+ return sd_id128_equal(mid, k);
+ }
+
+ if (json_variant_is_array(ids)) {
+ JsonVariant *e;
+
+ JSON_VARIANT_ARRAY_FOREACH(e, ids) {
+ sd_id128_t k;
+
+ if (!json_variant_is_string(e)) {
+ json_log(e, flags, 0, "Machine ID is not a string, ignoring: %m");
+ continue;
+ }
+
+ r = sd_id128_from_string(json_variant_string(e), &k);
+ if (r < 0) {
+ json_log(e, flags, r, "%s is not a valid machine ID, ignoring: %m", json_variant_string(e));
+ continue;
+ }
+
+ if (sd_id128_equal(mid, k))
+ return true;
+ }
+
+ return false;
+ }
+
+ json_log(ids, flags, 0, "Machine ID is not a string or array of strings, ignoring: %m");
+ return false;
+}
+
+int per_machine_hostname_match(JsonVariant *hns, JsonDispatchFlags flags) {
+ _cleanup_free_ char *hn = NULL;
+ int r;
+
+ r = gethostname_strict(&hn);
+ if (r == -ENXIO) {
+ json_log(hns, flags, r, "No hostname set, not matching perMachine hostname record: %m");
+ return false;
+ }
+ if (r < 0)
+ return json_log(hns, flags, r, "Failed to acquire hostname: %m");
+
+ if (json_variant_is_string(hns))
+ return streq(json_variant_string(hns), hn);
+
+ if (json_variant_is_array(hns)) {
+ JsonVariant *e;
+
+ JSON_VARIANT_ARRAY_FOREACH(e, hns) {
+
+ if (!json_variant_is_string(e)) {
+ json_log(e, flags, 0, "Hostname is not a string, ignoring: %m");
+ continue;
+ }
+
+ if (streq(json_variant_string(hns), hn))
+ return true;
+ }
+
+ return false;
+ }
+
+ json_log(hns, flags, 0, "Hostname is not a string or array of strings, ignoring: %m");
+ return false;
+}
+
+static int dispatch_per_machine(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch per_machine_dispatch_table[] = {
+ { "matchMachineId", _JSON_VARIANT_TYPE_INVALID, NULL, 0, 0 },
+ { "matchHostname", _JSON_VARIANT_TYPE_INVALID, NULL, 0, 0 },
+ { "iconName", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, icon_name), JSON_SAFE },
+ { "location", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, location), 0 },
+ { "shell", JSON_VARIANT_STRING, json_dispatch_filename_or_path, offsetof(UserRecord, shell), 0 },
+ { "umask", JSON_VARIANT_UNSIGNED, json_dispatch_umask, offsetof(UserRecord, umask), 0 },
+ { "environment", JSON_VARIANT_ARRAY, json_dispatch_environment, offsetof(UserRecord, environment), 0 },
+ { "timeZone", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, time_zone), JSON_SAFE },
+ { "preferredLanguage", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, preferred_language), JSON_SAFE },
+ { "niceLevel", _JSON_VARIANT_TYPE_INVALID, json_dispatch_nice, offsetof(UserRecord, nice_level), 0 },
+ { "resourceLimits", _JSON_VARIANT_TYPE_INVALID, json_dispatch_rlimits, offsetof(UserRecord, rlimits), 0 },
+ { "locked", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, locked), 0 },
+ { "notBeforeUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, not_before_usec), 0 },
+ { "notAfterUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, not_after_usec), 0 },
+ { "storage", JSON_VARIANT_STRING, json_dispatch_storage, offsetof(UserRecord, storage), 0 },
+ { "diskSize", JSON_VARIANT_UNSIGNED, json_dispatch_disk_size, offsetof(UserRecord, disk_size), 0 },
+ { "diskSizeRelative", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_size_relative), 0 },
+ { "skeletonDirectory", JSON_VARIANT_STRING, json_dispatch_path, offsetof(UserRecord, skeleton_directory), 0 },
+ { "accessMode", JSON_VARIANT_UNSIGNED, json_dispatch_access_mode, offsetof(UserRecord, access_mode), 0 },
+ { "tasksMax", JSON_VARIANT_UNSIGNED, json_dispatch_tasks_or_memory_max, offsetof(UserRecord, tasks_max), 0 },
+ { "memoryHigh", JSON_VARIANT_UNSIGNED, json_dispatch_tasks_or_memory_max, offsetof(UserRecord, memory_high), 0 },
+ { "memoryMax", JSON_VARIANT_UNSIGNED, json_dispatch_tasks_or_memory_max, offsetof(UserRecord, memory_max), 0 },
+ { "cpuWeight", JSON_VARIANT_UNSIGNED, json_dispatch_weight, offsetof(UserRecord, cpu_weight), 0 },
+ { "ioWeight", JSON_VARIANT_UNSIGNED, json_dispatch_weight, offsetof(UserRecord, io_weight), 0 },
+ { "mountNoDevices", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, nodev), 0 },
+ { "mountNoSuid", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, nosuid), 0 },
+ { "mountNoExecute", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, noexec), 0 },
+ { "cifsDomain", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, cifs_domain), JSON_SAFE },
+ { "cifsUserName", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, cifs_user_name), JSON_SAFE },
+ { "cifsService", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, cifs_service), JSON_SAFE },
+ { "imagePath", JSON_VARIANT_STRING, json_dispatch_path, offsetof(UserRecord, image_path), 0 },
+ { "uid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(UserRecord, uid), 0 },
+ { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(UserRecord, gid), 0 },
+ { "memberOf", JSON_VARIANT_ARRAY, json_dispatch_user_group_list, offsetof(UserRecord, member_of), JSON_RELAX},
+ { "fileSystemType", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, file_system_type), JSON_SAFE },
+ { "partitionUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, partition_uuid), 0 },
+ { "luksUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, luks_uuid), 0 },
+ { "fileSystemUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, file_system_uuid), 0 },
+ { "luksDiscard", _JSON_VARIANT_TYPE_INVALID, json_dispatch_tristate, offsetof(UserRecord, luks_discard), 0, },
+ { "luksOfflineDiscard", _JSON_VARIANT_TYPE_INVALID, json_dispatch_tristate, offsetof(UserRecord, luks_offline_discard), 0, },
+ { "luksCipher", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_cipher), JSON_SAFE },
+ { "luksCipherMode", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_cipher_mode), JSON_SAFE },
+ { "luksVolumeKeySize", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_volume_key_size), 0 },
+ { "luksPbkdfHashAlgorithm", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_pbkdf_hash_algorithm), JSON_SAFE },
+ { "luksPbkdfType", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_pbkdf_type), JSON_SAFE },
+ { "luksPbkdfTimeCostUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_time_cost_usec), 0 },
+ { "luksPbkdfMemoryCost", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_memory_cost), 0 },
+ { "luksPbkdfParallelThreads", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_parallel_threads), 0 },
+ { "rateLimitIntervalUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, ratelimit_interval_usec), 0 },
+ { "rateLimitBurst", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, ratelimit_burst), 0 },
+ { "enforcePasswordPolicy", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, enforce_password_policy), 0 },
+ { "autoLogin", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, auto_login), 0 },
+ { "stopDelayUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, stop_delay_usec), 0 },
+ { "killProcesses", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, kill_processes), 0 },
+ { "passwordChangeMinUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_min_usec), 0 },
+ { "passwordChangeMaxUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_max_usec), 0 },
+ { "passwordChangeWarnUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_warn_usec), 0 },
+ { "passwordChangeInactiveUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_inactive_usec), 0 },
+ { "passwordChangeNow", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, password_change_now), 0 },
+ { "pkcs11TokenUri", JSON_VARIANT_ARRAY, dispatch_pkcs11_uri_array, offsetof(UserRecord, pkcs11_token_uri), 0 },
+ { "fido2HmacCredential", JSON_VARIANT_ARRAY, dispatch_fido2_hmac_credential_array, 0, 0 },
+ {},
+ };
+
+ JsonVariant *e;
+ int r;
+
+ if (!variant)
+ return 0;
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(e, variant) {
+ bool matching = false;
+ JsonVariant *m;
+
+ if (!json_variant_is_object(e))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of objects.", strna(name));
+
+ m = json_variant_by_key(e, "matchMachineId");
+ if (m) {
+ r = per_machine_id_match(m, flags);
+ if (r < 0)
+ return r;
+
+ matching = r > 0;
+ }
+
+ if (!matching) {
+ m = json_variant_by_key(e, "matchHostname");
+ if (m) {
+ r = per_machine_hostname_match(m, flags);
+ if (r < 0)
+ return r;
+
+ matching = r > 0;
+ }
+ }
+
+ if (!matching)
+ continue;
+
+ r = json_dispatch(e, per_machine_dispatch_table, NULL, flags, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int dispatch_status(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch status_dispatch_table[] = {
+ { "diskUsage", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_usage), 0 },
+ { "diskFree", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_free), 0 },
+ { "diskSize", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_size), 0 },
+ { "diskCeiling", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_ceiling), 0 },
+ { "diskFloor", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_floor), 0 },
+ { "state", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, state), JSON_SAFE },
+ { "service", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, service), JSON_SAFE },
+ { "signedLocally", _JSON_VARIANT_TYPE_INVALID, json_dispatch_tristate, offsetof(UserRecord, signed_locally), 0 },
+ { "goodAuthenticationCounter", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, good_authentication_counter), 0 },
+ { "badAuthenticationCounter", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, bad_authentication_counter), 0 },
+ { "lastGoodAuthenticationUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, last_good_authentication_usec), 0 },
+ { "lastBadAuthenticationUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, last_bad_authentication_usec), 0 },
+ { "rateLimitBeginUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, ratelimit_begin_usec), 0 },
+ { "rateLimitCount", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, ratelimit_count), 0 },
+ { "removable", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, removable), 0 },
+ {},
+ };
+
+ char smid[SD_ID128_STRING_MAX];
+ JsonVariant *m;
+ sd_id128_t mid;
+ int r;
+
+ if (!variant)
+ return 0;
+
+ if (!json_variant_is_object(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an object.", strna(name));
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to determine machine ID: %m");
+
+ m = json_variant_by_key(variant, sd_id128_to_string(mid, smid));
+ if (!m)
+ return 0;
+
+ return json_dispatch(m, status_dispatch_table, NULL, flags, userdata);
+}
+
+int user_record_build_image_path(UserStorage storage, const char *user_name_and_realm, char **ret) {
+ const char *suffix;
+ char *z;
+
+ assert(storage >= 0);
+ assert(user_name_and_realm);
+ assert(ret);
+
+ if (storage == USER_LUKS)
+ suffix = ".home";
+ else if (IN_SET(storage, USER_DIRECTORY, USER_SUBVOLUME, USER_FSCRYPT))
+ suffix = ".homedir";
+ else {
+ *ret = NULL;
+ return 0;
+ }
+
+ z = strjoin("/home/", user_name_and_realm, suffix);
+ if (!z)
+ return -ENOMEM;
+
+ *ret = z;
+ return 1;
+}
+
+static int user_record_augment(UserRecord *h, JsonDispatchFlags json_flags) {
+ int r;
+
+ assert(h);
+
+ if (!FLAGS_SET(h->mask, USER_RECORD_REGULAR))
+ return 0;
+
+ assert(h->user_name);
+
+ if (!h->user_name_and_realm_auto && h->realm) {
+ h->user_name_and_realm_auto = strjoin(h->user_name, "@", h->realm);
+ if (!h->user_name_and_realm_auto)
+ return json_log_oom(h->json, json_flags);
+ }
+
+ /* Let's add in the following automatisms only for regular users, they don't make sense for any others */
+ if (user_record_disposition(h) != USER_REGULAR)
+ return 0;
+
+ if (!h->home_directory && !h->home_directory_auto) {
+ h->home_directory_auto = path_join("/home/", h->user_name);
+ if (!h->home_directory_auto)
+ return json_log_oom(h->json, json_flags);
+ }
+
+ if (!h->image_path && !h->image_path_auto) {
+ r = user_record_build_image_path(user_record_storage(h), user_record_user_name_and_realm(h), &h->image_path_auto);
+ if (r < 0)
+ return json_log(h->json, json_flags, r, "Failed to determine default image path: %m");
+ }
+
+ return 0;
+}
+
+int user_group_record_mangle(
+ JsonVariant *v,
+ UserRecordLoadFlags load_flags,
+ JsonVariant **ret_variant,
+ UserRecordMask *ret_mask) {
+
+ static const struct {
+ UserRecordMask mask;
+ const char *name;
+ } mask_field[] = {
+ { USER_RECORD_PRIVILEGED, "privileged" },
+ { USER_RECORD_SECRET, "secret" },
+ { USER_RECORD_BINDING, "binding" },
+ { USER_RECORD_PER_MACHINE, "perMachine" },
+ { USER_RECORD_STATUS, "status" },
+ { USER_RECORD_SIGNATURE, "signature" },
+ };
+
+ JsonDispatchFlags json_flags = USER_RECORD_LOAD_FLAGS_TO_JSON_DISPATCH_FLAGS(load_flags);
+ _cleanup_(json_variant_unrefp) JsonVariant *w = NULL;
+ JsonVariant *array[ELEMENTSOF(mask_field) * 2];
+ size_t n_retain = 0, i;
+ UserRecordMask m = 0;
+ int r;
+
+ assert((load_flags & _USER_RECORD_MASK_MAX) == 0); /* detect mistakes when accidentally passing
+ * UserRecordMask bit masks as UserRecordLoadFlags
+ * value */
+
+ assert(v);
+ assert(ret_variant);
+ assert(ret_mask);
+
+ /* Note that this function is shared with the group record parser, hence we try to be generic in our
+ * log message wording here, to cover both cases. */
+
+ if (!json_variant_is_object(v))
+ return json_log(v, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record is not a JSON object, refusing.");
+
+ if (USER_RECORD_ALLOW_MASK(load_flags) == 0) /* allow nothing? */
+ return json_log(v, json_flags, SYNTHETIC_ERRNO(EINVAL), "Nothing allowed in record, refusing.");
+
+ if (USER_RECORD_STRIP_MASK(load_flags) == _USER_RECORD_MASK_MAX) /* strip everything? */
+ return json_log(v, json_flags, SYNTHETIC_ERRNO(EINVAL), "Stripping everything from record, refusing.");
+
+ /* Check if we have the special sections and if they match our flags set */
+ for (i = 0; i < ELEMENTSOF(mask_field); i++) {
+ JsonVariant *e, *k;
+
+ if (FLAGS_SET(USER_RECORD_STRIP_MASK(load_flags), mask_field[i].mask)) {
+ if (!w)
+ w = json_variant_ref(v);
+
+ r = json_variant_filter(&w, STRV_MAKE(mask_field[i].name));
+ if (r < 0)
+ return json_log(w, json_flags, r, "Failed to remove field from variant: %m");
+
+ continue;
+ }
+
+ e = json_variant_by_key_full(v, mask_field[i].name, &k);
+ if (e) {
+ if (!FLAGS_SET(USER_RECORD_ALLOW_MASK(load_flags), mask_field[i].mask))
+ return json_log(e, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record contains '%s' field, which is not allowed.", mask_field[i].name);
+
+ if (FLAGS_SET(load_flags, USER_RECORD_STRIP_REGULAR)) {
+ array[n_retain++] = k;
+ array[n_retain++] = e;
+ }
+
+ m |= mask_field[i].mask;
+ } else {
+ if (FLAGS_SET(USER_RECORD_REQUIRE_MASK(load_flags), mask_field[i].mask))
+ return json_log(v, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record lacks '%s' field, which is required.", mask_field[i].name);
+ }
+ }
+
+ if (FLAGS_SET(load_flags, USER_RECORD_STRIP_REGULAR)) {
+ /* If we are supposed to strip regular items, then let's instead just allocate a new object
+ * with just the stuff we need. */
+
+ w = json_variant_unref(w);
+ r = json_variant_new_object(&w, array, n_retain);
+ if (r < 0)
+ return json_log(v, json_flags, r, "Failed to allocate new object: %m");
+ } else {
+ /* And now check if there's anything else in the record */
+ for (i = 0; i < json_variant_elements(v); i += 2) {
+ const char *f;
+ bool special = false;
+ size_t j;
+
+ assert_se(f = json_variant_string(json_variant_by_index(v, i)));
+
+ for (j = 0; j < ELEMENTSOF(mask_field); j++)
+ if (streq(f, mask_field[j].name)) { /* already covered in the loop above */
+ special = true;
+ continue;
+ }
+
+ if (!special) {
+ if ((load_flags & (USER_RECORD_ALLOW_REGULAR|USER_RECORD_REQUIRE_REGULAR)) == 0)
+ return json_log(v, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record contains '%s' field, which is not allowed.", f);
+
+ m |= USER_RECORD_REGULAR;
+ break;
+ }
+ }
+ }
+
+ if (FLAGS_SET(load_flags, USER_RECORD_REQUIRE_REGULAR) && !FLAGS_SET(m, USER_RECORD_REGULAR))
+ return json_log(v, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record lacks basic identity fields, which are required.");
+
+ if (m == 0)
+ return json_log(v, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record is empty.");
+
+ if (w)
+ *ret_variant = TAKE_PTR(w);
+ else
+ *ret_variant = json_variant_ref(v);
+
+ *ret_mask = m;
+ return 0;
+}
+
+int user_record_load(UserRecord *h, JsonVariant *v, UserRecordLoadFlags load_flags) {
+
+ static const JsonDispatch user_dispatch_table[] = {
+ { "userName", JSON_VARIANT_STRING, json_dispatch_user_group_name, offsetof(UserRecord, user_name), JSON_RELAX},
+ { "realm", JSON_VARIANT_STRING, json_dispatch_realm, offsetof(UserRecord, realm), 0 },
+ { "realName", JSON_VARIANT_STRING, json_dispatch_gecos, offsetof(UserRecord, real_name), 0 },
+ { "emailAddress", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, email_address), JSON_SAFE },
+ { "iconName", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, icon_name), JSON_SAFE },
+ { "location", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, location), 0 },
+ { "disposition", JSON_VARIANT_STRING, json_dispatch_user_disposition, offsetof(UserRecord, disposition), 0 },
+ { "lastChangeUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, last_change_usec), 0 },
+ { "lastPasswordChangeUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, last_password_change_usec), 0 },
+ { "shell", JSON_VARIANT_STRING, json_dispatch_filename_or_path, offsetof(UserRecord, shell), 0 },
+ { "umask", JSON_VARIANT_UNSIGNED, json_dispatch_umask, offsetof(UserRecord, umask), 0 },
+ { "environment", JSON_VARIANT_ARRAY, json_dispatch_environment, offsetof(UserRecord, environment), 0 },
+ { "timeZone", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, time_zone), JSON_SAFE },
+ { "preferredLanguage", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, preferred_language), JSON_SAFE },
+ { "niceLevel", _JSON_VARIANT_TYPE_INVALID, json_dispatch_nice, offsetof(UserRecord, nice_level), 0 },
+ { "resourceLimits", _JSON_VARIANT_TYPE_INVALID, json_dispatch_rlimits, offsetof(UserRecord, rlimits), 0 },
+ { "locked", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, locked), 0 },
+ { "notBeforeUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, not_before_usec), 0 },
+ { "notAfterUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, not_after_usec), 0 },
+ { "storage", JSON_VARIANT_STRING, json_dispatch_storage, offsetof(UserRecord, storage), 0 },
+ { "diskSize", JSON_VARIANT_UNSIGNED, json_dispatch_disk_size, offsetof(UserRecord, disk_size), 0 },
+ { "diskSizeRelative", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_size_relative), 0 },
+ { "skeletonDirectory", JSON_VARIANT_STRING, json_dispatch_path, offsetof(UserRecord, skeleton_directory), 0 },
+ { "accessMode", JSON_VARIANT_UNSIGNED, json_dispatch_access_mode, offsetof(UserRecord, access_mode), 0 },
+ { "tasksMax", JSON_VARIANT_UNSIGNED, json_dispatch_tasks_or_memory_max, offsetof(UserRecord, tasks_max), 0 },
+ { "memoryHigh", JSON_VARIANT_UNSIGNED, json_dispatch_tasks_or_memory_max, offsetof(UserRecord, memory_high), 0 },
+ { "memoryMax", JSON_VARIANT_UNSIGNED, json_dispatch_tasks_or_memory_max, offsetof(UserRecord, memory_max), 0 },
+ { "cpuWeight", JSON_VARIANT_UNSIGNED, json_dispatch_weight, offsetof(UserRecord, cpu_weight), 0 },
+ { "ioWeight", JSON_VARIANT_UNSIGNED, json_dispatch_weight, offsetof(UserRecord, io_weight), 0 },
+ { "mountNoDevices", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, nodev), 0 },
+ { "mountNoSuid", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, nosuid), 0 },
+ { "mountNoExecute", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, noexec), 0 },
+ { "cifsDomain", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, cifs_domain), JSON_SAFE },
+ { "cifsUserName", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, cifs_user_name), JSON_SAFE },
+ { "cifsService", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, cifs_service), JSON_SAFE },
+ { "imagePath", JSON_VARIANT_STRING, json_dispatch_path, offsetof(UserRecord, image_path), 0 },
+ { "homeDirectory", JSON_VARIANT_STRING, json_dispatch_home_directory, offsetof(UserRecord, home_directory), 0 },
+ { "uid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(UserRecord, uid), 0 },
+ { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(UserRecord, gid), 0 },
+ { "memberOf", JSON_VARIANT_ARRAY, json_dispatch_user_group_list, offsetof(UserRecord, member_of), JSON_RELAX},
+ { "fileSystemType", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, file_system_type), JSON_SAFE },
+ { "partitionUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, partition_uuid), 0 },
+ { "luksUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, luks_uuid), 0 },
+ { "fileSystemUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, file_system_uuid), 0 },
+ { "luksDiscard", _JSON_VARIANT_TYPE_INVALID, json_dispatch_tristate, offsetof(UserRecord, luks_discard), 0 },
+ { "luksOfflineDiscard", _JSON_VARIANT_TYPE_INVALID, json_dispatch_tristate, offsetof(UserRecord, luks_offline_discard), 0 },
+ { "luksCipher", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_cipher), JSON_SAFE },
+ { "luksCipherMode", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_cipher_mode), JSON_SAFE },
+ { "luksVolumeKeySize", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_volume_key_size), 0 },
+ { "luksPbkdfHashAlgorithm", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_pbkdf_hash_algorithm), JSON_SAFE },
+ { "luksPbkdfType", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_pbkdf_type), JSON_SAFE },
+ { "luksPbkdfTimeCostUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_time_cost_usec), 0 },
+ { "luksPbkdfMemoryCost", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_memory_cost), 0 },
+ { "luksPbkdfParallelThreads", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_parallel_threads), 0 },
+ { "service", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, service), JSON_SAFE },
+ { "rateLimitIntervalUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, ratelimit_interval_usec), 0 },
+ { "rateLimitBurst", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, ratelimit_burst), 0 },
+ { "enforcePasswordPolicy", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, enforce_password_policy), 0 },
+ { "autoLogin", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, auto_login), 0 },
+ { "stopDelayUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, stop_delay_usec), 0 },
+ { "killProcesses", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, kill_processes), 0 },
+ { "passwordChangeMinUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_min_usec), 0 },
+ { "passwordChangeMaxUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_max_usec), 0 },
+ { "passwordChangeWarnUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_warn_usec), 0 },
+ { "passwordChangeInactiveUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_inactive_usec), 0 },
+ { "passwordChangeNow", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, password_change_now), 0 },
+ { "pkcs11TokenUri", JSON_VARIANT_ARRAY, dispatch_pkcs11_uri_array, offsetof(UserRecord, pkcs11_token_uri), 0 },
+ { "fido2HmacCredential", JSON_VARIANT_ARRAY, dispatch_fido2_hmac_credential_array, 0, 0 },
+ { "recoveryKeyType", JSON_VARIANT_ARRAY, json_dispatch_strv, offsetof(UserRecord, recovery_key_type), 0 },
+
+ { "secret", JSON_VARIANT_OBJECT, dispatch_secret, 0, 0 },
+ { "privileged", JSON_VARIANT_OBJECT, dispatch_privileged, 0, 0 },
+
+ /* Ignore the perMachine, binding, status stuff here, and process it later, so that it overrides whatever is set above */
+ { "perMachine", JSON_VARIANT_ARRAY, NULL, 0, 0 },
+ { "binding", JSON_VARIANT_OBJECT, NULL, 0, 0 },
+ { "status", JSON_VARIANT_OBJECT, NULL, 0, 0 },
+
+ /* Ignore 'signature', we check it with explicit accessors instead */
+ { "signature", JSON_VARIANT_ARRAY, NULL, 0, 0 },
+ {},
+ };
+
+ JsonDispatchFlags json_flags = USER_RECORD_LOAD_FLAGS_TO_JSON_DISPATCH_FLAGS(load_flags);
+ int r;
+
+ assert(h);
+ assert(!h->json);
+
+ /* Note that this call will leave a half-initialized record around on failure! */
+
+ r = user_group_record_mangle(v, load_flags, &h->json, &h->mask);
+ if (r < 0)
+ return r;
+
+ r = json_dispatch(h->json, user_dispatch_table, NULL, json_flags, h);
+ if (r < 0)
+ return r;
+
+ /* During the parsing operation above we ignored the 'perMachine', 'binding' and 'status' fields,
+ * since we want them to override the global options. Let's process them now. */
+
+ r = dispatch_per_machine("perMachine", json_variant_by_key(h->json, "perMachine"), json_flags, h);
+ if (r < 0)
+ return r;
+
+ r = dispatch_binding("binding", json_variant_by_key(h->json, "binding"), json_flags, h);
+ if (r < 0)
+ return r;
+
+ r = dispatch_status("status", json_variant_by_key(h->json, "status"), json_flags, h);
+ if (r < 0)
+ return r;
+
+ if (FLAGS_SET(h->mask, USER_RECORD_REGULAR) && !h->user_name)
+ return json_log(h->json, json_flags, SYNTHETIC_ERRNO(EINVAL), "User name field missing, refusing.");
+
+ r = user_record_augment(h, json_flags);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int user_record_build(UserRecord **ret, ...) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *u = NULL;
+ va_list ap;
+ int r;
+
+ assert(ret);
+
+ va_start(ap, ret);
+ r = json_buildv(&v, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ u = user_record_new();
+ if (!u)
+ return -ENOMEM;
+
+ r = user_record_load(u, v, USER_RECORD_LOAD_FULL);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(u);
+ return 0;
+}
+
+const char *user_record_user_name_and_realm(UserRecord *h) {
+ assert(h);
+
+ /* Return the pre-initialized joined string if it is defined */
+ if (h->user_name_and_realm_auto)
+ return h->user_name_and_realm_auto;
+
+ /* If it's not defined then we cannot have a realm */
+ assert(!h->realm);
+ return h->user_name;
+}
+
+UserStorage user_record_storage(UserRecord *h) {
+ assert(h);
+
+ if (h->storage >= 0)
+ return h->storage;
+
+ return USER_CLASSIC;
+}
+
+const char *user_record_file_system_type(UserRecord *h) {
+ assert(h);
+
+ return h->file_system_type ?: "btrfs";
+}
+
+const char *user_record_skeleton_directory(UserRecord *h) {
+ assert(h);
+
+ return h->skeleton_directory ?: "/etc/skel";
+}
+
+mode_t user_record_access_mode(UserRecord *h) {
+ assert(h);
+
+ return h->access_mode != (mode_t) -1 ? h->access_mode : 0700;
+}
+
+const char* user_record_home_directory(UserRecord *h) {
+ assert(h);
+
+ if (h->home_directory)
+ return h->home_directory;
+ if (h->home_directory_auto)
+ return h->home_directory_auto;
+
+ /* The root user is special, hence be special about it */
+ if (streq_ptr(h->user_name, "root"))
+ return "/root";
+
+ return "/";
+}
+
+const char *user_record_image_path(UserRecord *h) {
+ assert(h);
+
+ if (h->image_path)
+ return h->image_path;
+ if (h->image_path_auto)
+ return h->image_path_auto;
+
+ return IN_SET(user_record_storage(h), USER_CLASSIC, USER_DIRECTORY, USER_SUBVOLUME, USER_FSCRYPT) ? user_record_home_directory(h) : NULL;
+}
+
+const char *user_record_cifs_user_name(UserRecord *h) {
+ assert(h);
+
+ return h->cifs_user_name ?: h->user_name;
+}
+
+unsigned long user_record_mount_flags(UserRecord *h) {
+ assert(h);
+
+ return (h->nosuid ? MS_NOSUID : 0) |
+ (h->noexec ? MS_NOEXEC : 0) |
+ (h->nodev ? MS_NODEV : 0);
+}
+
+const char *user_record_shell(UserRecord *h) {
+ assert(h);
+
+ if (h->shell)
+ return h->shell;
+
+ if (streq_ptr(h->user_name, "root"))
+ return "/bin/sh";
+
+ if (user_record_disposition(h) == USER_REGULAR)
+ return "/bin/bash";
+
+ return NOLOGIN;
+}
+
+const char *user_record_real_name(UserRecord *h) {
+ assert(h);
+
+ return h->real_name ?: h->user_name;
+}
+
+bool user_record_luks_discard(UserRecord *h) {
+ const char *ip;
+
+ assert(h);
+
+ if (h->luks_discard >= 0)
+ return h->luks_discard;
+
+ ip = user_record_image_path(h);
+ if (!ip)
+ return false;
+
+ /* Use discard by default if we are referring to a real block device, but not when operating on a
+ * loopback device. We want to optimize for SSD and flash storage after all, but we should be careful
+ * when storing stuff on top of regular file systems in loopback files as doing discard then would
+ * mean thin provisioning and we should not do that willy-nilly since it means we'll risk EIO later
+ * on should the disk space to back our file systems not be available. */
+
+ return path_startswith(ip, "/dev/");
+}
+
+bool user_record_luks_offline_discard(UserRecord *h) {
+ const char *ip;
+
+ assert(h);
+
+ if (h->luks_offline_discard >= 0)
+ return h->luks_offline_discard;
+
+ /* Discard while we are logged out should generally be a good idea, except when operating directly on
+ * physical media, where we should just bind it to the online discard mode. */
+
+ ip = user_record_image_path(h);
+ if (!ip)
+ return false;
+
+ if (path_startswith(ip, "/dev/"))
+ return user_record_luks_discard(h);
+
+ return true;
+}
+
+const char *user_record_luks_cipher(UserRecord *h) {
+ assert(h);
+
+ return h->luks_cipher ?: "aes";
+}
+
+const char *user_record_luks_cipher_mode(UserRecord *h) {
+ assert(h);
+
+ return h->luks_cipher_mode ?: "xts-plain64";
+}
+
+uint64_t user_record_luks_volume_key_size(UserRecord *h) {
+ assert(h);
+
+ /* We return a value here that can be cast without loss into size_t which is what libcrypsetup expects */
+
+ if (h->luks_volume_key_size == UINT64_MAX)
+ return 256 / 8;
+
+ return MIN(h->luks_volume_key_size, SIZE_MAX);
+}
+
+const char* user_record_luks_pbkdf_type(UserRecord *h) {
+ assert(h);
+
+ return h->luks_pbkdf_type ?: "argon2i";
+}
+
+uint64_t user_record_luks_pbkdf_time_cost_usec(UserRecord *h) {
+ assert(h);
+
+ /* Returns a value with ms granularity, since that's what libcryptsetup expects */
+
+ if (h->luks_pbkdf_time_cost_usec == UINT64_MAX)
+ return 500 * USEC_PER_MSEC; /* We default to 500ms, in contrast to libcryptsetup's 2s, which is just awfully slow on every login */
+
+ return MIN(DIV_ROUND_UP(h->luks_pbkdf_time_cost_usec, USEC_PER_MSEC), UINT32_MAX) * USEC_PER_MSEC;
+}
+
+uint64_t user_record_luks_pbkdf_memory_cost(UserRecord *h) {
+ assert(h);
+
+ /* Returns a value with kb granularity, since that's what libcryptsetup expects */
+
+ if (h->luks_pbkdf_memory_cost == UINT64_MAX)
+ return 64*1024*1024; /* We default to 64M, since this should work on smaller systems too */
+
+ return MIN(DIV_ROUND_UP(h->luks_pbkdf_memory_cost, 1024), UINT32_MAX) * 1024;
+}
+
+uint64_t user_record_luks_pbkdf_parallel_threads(UserRecord *h) {
+ assert(h);
+
+ if (h->luks_pbkdf_memory_cost == UINT64_MAX)
+ return 1; /* We default to 1, since this should work on smaller systems too */
+
+ return MIN(h->luks_pbkdf_parallel_threads, UINT32_MAX);
+}
+
+const char *user_record_luks_pbkdf_hash_algorithm(UserRecord *h) {
+ assert(h);
+
+ return h->luks_pbkdf_hash_algorithm ?: "sha512";
+}
+
+gid_t user_record_gid(UserRecord *h) {
+ assert(h);
+
+ if (gid_is_valid(h->gid))
+ return h->gid;
+
+ return (gid_t) h->uid;
+}
+
+UserDisposition user_record_disposition(UserRecord *h) {
+ assert(h);
+
+ if (h->disposition >= 0)
+ return h->disposition;
+
+ /* If not declared, derive from UID */
+
+ if (!uid_is_valid(h->uid))
+ return _USER_DISPOSITION_INVALID;
+
+ if (h->uid == 0 || h->uid == UID_NOBODY)
+ return USER_INTRINSIC;
+
+ if (uid_is_system(h->uid))
+ return USER_SYSTEM;
+
+ if (uid_is_dynamic(h->uid))
+ return USER_DYNAMIC;
+
+ if (uid_is_container(h->uid))
+ return USER_CONTAINER;
+
+ if (h->uid > INT32_MAX)
+ return USER_RESERVED;
+
+ return USER_REGULAR;
+}
+
+int user_record_removable(UserRecord *h) {
+ UserStorage storage;
+ assert(h);
+
+ if (h->removable >= 0)
+ return h->removable;
+
+ /* Refuse to decide for classic records */
+ storage = user_record_storage(h);
+ if (h->storage < 0 || h->storage == USER_CLASSIC)
+ return -1;
+
+ /* For now consider only LUKS home directories with a reference by path as removable */
+ return storage == USER_LUKS && path_startswith(user_record_image_path(h), "/dev/");
+}
+
+uint64_t user_record_ratelimit_interval_usec(UserRecord *h) {
+ assert(h);
+
+ if (h->ratelimit_interval_usec == UINT64_MAX)
+ return DEFAULT_RATELIMIT_INTERVAL_USEC;
+
+ return h->ratelimit_interval_usec;
+}
+
+uint64_t user_record_ratelimit_burst(UserRecord *h) {
+ assert(h);
+
+ if (h->ratelimit_burst == UINT64_MAX)
+ return DEFAULT_RATELIMIT_BURST;
+
+ return h->ratelimit_burst;
+}
+
+bool user_record_can_authenticate(UserRecord *h) {
+ assert(h);
+
+ /* Returns true if there's some form of property configured that the user can authenticate against */
+
+ if (h->n_pkcs11_encrypted_key > 0)
+ return true;
+
+ if (h->n_fido2_hmac_salt > 0)
+ return true;
+
+ return !strv_isempty(h->hashed_password);
+}
+
+uint64_t user_record_ratelimit_next_try(UserRecord *h) {
+ assert(h);
+
+ /* Calculates when the it's possible to login next. Returns:
+ *
+ * UINT64_MAX → Nothing known
+ * 0 → Right away
+ * Any other → Next time in CLOCK_REALTIME in usec (which could be in the past)
+ */
+
+ if (h->ratelimit_begin_usec == UINT64_MAX ||
+ h->ratelimit_count == UINT64_MAX)
+ return UINT64_MAX;
+
+ if (h->ratelimit_begin_usec > now(CLOCK_REALTIME)) /* If the ratelimit time is in the future, then
+ * the local clock is probably incorrect. Let's
+ * not refuse login then. */
+ return UINT64_MAX;
+
+ if (h->ratelimit_count < user_record_ratelimit_burst(h))
+ return 0;
+
+ return usec_add(h->ratelimit_begin_usec, user_record_ratelimit_interval_usec(h));
+}
+
+bool user_record_equal(UserRecord *a, UserRecord *b) {
+ assert(a);
+ assert(b);
+
+ /* We assume that when a record is modified its JSON data is updated at the same time, hence it's
+ * sufficient to compare the JSON data. */
+
+ return json_variant_equal(a->json, b->json);
+}
+
+bool user_record_compatible(UserRecord *a, UserRecord *b) {
+ assert(a);
+ assert(b);
+
+ /* If either lacks the regular section, we can't really decide, let's hence say they are
+ * incompatible. */
+ if (!(a->mask & b->mask & USER_RECORD_REGULAR))
+ return false;
+
+ return streq_ptr(a->user_name, b->user_name) &&
+ streq_ptr(a->realm, b->realm);
+}
+
+int user_record_compare_last_change(UserRecord *a, UserRecord *b) {
+ assert(a);
+ assert(b);
+
+ if (a->last_change_usec == b->last_change_usec)
+ return 0;
+
+ /* Always consider a record with a timestamp newer than one without */
+ if (a->last_change_usec == UINT64_MAX)
+ return -1;
+ if (b->last_change_usec == UINT64_MAX)
+ return 1;
+
+ return CMP(a->last_change_usec, b->last_change_usec);
+}
+
+int user_record_clone(UserRecord *h, UserRecordLoadFlags flags, UserRecord **ret) {
+ _cleanup_(user_record_unrefp) UserRecord *c = NULL;
+ int r;
+
+ assert(h);
+ assert(ret);
+
+ c = user_record_new();
+ if (!c)
+ return -ENOMEM;
+
+ r = user_record_load(c, h->json, flags);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(c);
+ return 0;
+}
+
+int user_record_masked_equal(UserRecord *a, UserRecord *b, UserRecordMask mask) {
+ _cleanup_(user_record_unrefp) UserRecord *x = NULL, *y = NULL;
+ int r;
+
+ assert(a);
+ assert(b);
+
+ /* Compares the two records, but ignores anything not listed in the specified mask */
+
+ if ((a->mask & ~mask) != 0) {
+ r = user_record_clone(a, USER_RECORD_ALLOW(mask) | USER_RECORD_STRIP(~mask & _USER_RECORD_MASK_MAX), &x);
+ if (r < 0)
+ return r;
+
+ a = x;
+ }
+
+ if ((b->mask & ~mask) != 0) {
+ r = user_record_clone(b, USER_RECORD_ALLOW(mask) | USER_RECORD_STRIP(~mask & _USER_RECORD_MASK_MAX), &y);
+ if (r < 0)
+ return r;
+
+ b = y;
+ }
+
+ return user_record_equal(a, b);
+}
+
+int user_record_test_blocked(UserRecord *h) {
+ usec_t n;
+
+ /* Checks whether access to the specified user shall be allowed at the moment. Returns:
+ *
+ * -ESTALE: Record is from the future
+ * -ENOLCK: Record is blocked
+ * -EL2HLT: Record is not valid yet
+ * -EL3HLT: Record is not valid anymore
+ *
+ */
+
+ assert(h);
+
+ if (h->locked > 0)
+ return -ENOLCK;
+
+ n = now(CLOCK_REALTIME);
+
+ if (h->not_before_usec != UINT64_MAX && n < h->not_before_usec)
+ return -EL2HLT;
+ if (h->not_after_usec != UINT64_MAX && n > h->not_after_usec)
+ return -EL3HLT;
+
+ if (h->last_change_usec != UINT64_MAX &&
+ h->last_change_usec > n) /* Complain during log-ins when the record is from the future */
+ return -ESTALE;
+
+ return 0;
+}
+
+int user_record_test_password_change_required(UserRecord *h) {
+ bool change_permitted;
+ usec_t n;
+
+ assert(h);
+
+ /* Checks whether the user must change the password when logging in
+
+ -EKEYREVOKED: Change password now because admin said so
+ -EOWNERDEAD: Change password now because it expired
+ -EKEYREJECTED: Password is expired, no changing is allowed
+ -EKEYEXPIRED: Password is about to expire, warn user
+ -ENETDOWN: Record has expiration info but no password change timestamp
+ -EROFS: No password change required nor permitted
+ -ESTALE: RTC likely incorrect, last password change is in the future
+ 0: No password change required, but permitted
+ */
+
+ /* If a password change request has been set explicitly, it overrides everything */
+ if (h->password_change_now > 0)
+ return -EKEYREVOKED;
+
+ n = now(CLOCK_REALTIME);
+
+ /* Password change in the future? Then our RTC is likely incorrect */
+ if (h->last_password_change_usec != UINT64_MAX &&
+ h->last_password_change_usec > n &&
+ (h->password_change_min_usec != UINT64_MAX ||
+ h->password_change_max_usec != UINT64_MAX ||
+ h->password_change_inactive_usec != UINT64_MAX))
+ return -ESTALE;
+
+ /* Then, let's check if password changing is currently allowed at all */
+ if (h->password_change_min_usec != UINT64_MAX) {
+
+ /* Expiry configured but no password change timestamp known? */
+ if (h->last_password_change_usec == UINT64_MAX)
+ return -ENETDOWN;
+
+ if (h->password_change_min_usec >= UINT64_MAX - h->last_password_change_usec)
+ change_permitted = false;
+ else
+ change_permitted = n >= h->last_password_change_usec + h->password_change_min_usec;
+
+ } else
+ change_permitted = true;
+
+ /* Let's check whether the password has expired. */
+ if (!(h->password_change_max_usec == UINT64_MAX ||
+ h->password_change_max_usec >= UINT64_MAX - h->last_password_change_usec)) {
+
+ uint64_t change_before;
+
+ /* Expiry configured but no password change timestamp known? */
+ if (h->last_password_change_usec == UINT64_MAX)
+ return -ENETDOWN;
+
+ /* Password is in inactive phase? */
+ if (h->password_change_inactive_usec != UINT64_MAX &&
+ h->password_change_inactive_usec < UINT64_MAX - h->password_change_max_usec) {
+ usec_t added;
+
+ added = h->password_change_inactive_usec + h->password_change_max_usec;
+ if (added < UINT64_MAX - h->last_password_change_usec &&
+ n >= h->last_password_change_usec + added)
+ return -EKEYREJECTED;
+ }
+
+ /* Password needs to be changed now? */
+ change_before = h->last_password_change_usec + h->password_change_max_usec;
+ if (n >= change_before)
+ return change_permitted ? -EOWNERDEAD : -EKEYREJECTED;
+
+ /* Warn user? */
+ if (h->password_change_warn_usec != UINT64_MAX &&
+ (change_before < h->password_change_warn_usec ||
+ n >= change_before - h->password_change_warn_usec))
+ return change_permitted ? -EKEYEXPIRED : -EROFS;
+ }
+
+ /* No password changing necessary */
+ return change_permitted ? 0 : -EROFS;
+}
+
+static const char* const user_storage_table[_USER_STORAGE_MAX] = {
+ [USER_CLASSIC] = "classic",
+ [USER_LUKS] = "luks",
+ [USER_DIRECTORY] = "directory",
+ [USER_SUBVOLUME] = "subvolume",
+ [USER_FSCRYPT] = "fscrypt",
+ [USER_CIFS] = "cifs",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(user_storage, UserStorage);
+
+static const char* const user_disposition_table[_USER_DISPOSITION_MAX] = {
+ [USER_INTRINSIC] = "intrinsic",
+ [USER_SYSTEM] = "system",
+ [USER_DYNAMIC] = "dynamic",
+ [USER_REGULAR] = "regular",
+ [USER_CONTAINER] = "container",
+ [USER_RESERVED] = "reserved",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(user_disposition, UserDisposition);
diff --git a/src/shared/user-record.h b/src/shared/user-record.h
new file mode 100644
index 0000000..542a0dc
--- /dev/null
+++ b/src/shared/user-record.h
@@ -0,0 +1,444 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+#include "sd-id128.h"
+
+#include "json.h"
+#include "missing_resource.h"
+#include "time-util.h"
+
+/* But some limits on disk sizes: not less than 5M, not more than 5T */
+#define USER_DISK_SIZE_MIN (UINT64_C(5)*1024*1024)
+#define USER_DISK_SIZE_MAX (UINT64_C(5)*1024*1024*1024*1024)
+
+/* The default disk size to use when nothing else is specified, relative to free disk space */
+#define USER_DISK_SIZE_DEFAULT_PERCENT 85
+
+bool uid_is_system(uid_t uid);
+bool gid_is_system(gid_t gid);
+
+static inline bool uid_is_dynamic(uid_t uid) {
+ return DYNAMIC_UID_MIN <= uid && uid <= DYNAMIC_UID_MAX;
+}
+
+static inline bool gid_is_dynamic(gid_t gid) {
+ return uid_is_dynamic((uid_t) gid);
+}
+
+static inline bool uid_is_container(uid_t uid) {
+ return CONTAINER_UID_BASE_MIN <= uid && uid <= CONTAINER_UID_BASE_MAX;
+}
+
+static inline bool gid_is_container(gid_t gid) {
+ return uid_is_container((uid_t) gid);
+}
+
+typedef struct UGIDAllocationRange {
+ uid_t system_alloc_uid_min;
+ uid_t system_uid_max;
+ gid_t system_alloc_gid_min;
+ gid_t system_gid_max;
+} UGIDAllocationRange;
+
+int read_login_defs(UGIDAllocationRange *ret_defs, const char *path, const char *root);
+const UGIDAllocationRange *acquire_ugid_allocation_range(void);
+
+typedef enum UserDisposition {
+ USER_INTRINSIC, /* root and nobody */
+ USER_SYSTEM, /* statically allocated users for system services */
+ USER_DYNAMIC, /* dynamically allocated users for system services */
+ USER_REGULAR, /* regular (typically human users) */
+ USER_CONTAINER, /* UID ranges allocated for container uses */
+ USER_RESERVED, /* Range above 2^31 */
+ _USER_DISPOSITION_MAX,
+ _USER_DISPOSITION_INVALID = -1,
+} UserDisposition;
+
+typedef enum UserHomeStorage {
+ USER_CLASSIC,
+ USER_LUKS,
+ USER_DIRECTORY, /* A directory, and a .identity file in it, which USER_CLASSIC lacks */
+ USER_SUBVOLUME,
+ USER_FSCRYPT,
+ USER_CIFS,
+ _USER_STORAGE_MAX,
+ _USER_STORAGE_INVALID = -1
+} UserStorage;
+
+typedef enum UserRecordMask {
+ /* The various sections an identity record may have, as bit mask */
+ USER_RECORD_REGULAR = 1U << 0,
+ USER_RECORD_SECRET = 1U << 1,
+ USER_RECORD_PRIVILEGED = 1U << 2,
+ USER_RECORD_PER_MACHINE = 1U << 3,
+ USER_RECORD_BINDING = 1U << 4,
+ USER_RECORD_STATUS = 1U << 5,
+ USER_RECORD_SIGNATURE = 1U << 6,
+ _USER_RECORD_MASK_MAX = (1U << 7)-1
+} UserRecordMask;
+
+typedef enum UserRecordLoadFlags {
+ /* A set of flags used while loading a user record from JSON data. We leave the lower 6 bits free,
+ * just as a safety precaution so that we can detect borked conversions between UserRecordMask and
+ * UserRecordLoadFlags. */
+
+ /* What to require */
+ USER_RECORD_REQUIRE_REGULAR = USER_RECORD_REGULAR << 7,
+ USER_RECORD_REQUIRE_SECRET = USER_RECORD_SECRET << 7,
+ USER_RECORD_REQUIRE_PRIVILEGED = USER_RECORD_PRIVILEGED << 7,
+ USER_RECORD_REQUIRE_PER_MACHINE = USER_RECORD_PER_MACHINE << 7,
+ USER_RECORD_REQUIRE_BINDING = USER_RECORD_BINDING << 7,
+ USER_RECORD_REQUIRE_STATUS = USER_RECORD_STATUS << 7,
+ USER_RECORD_REQUIRE_SIGNATURE = USER_RECORD_SIGNATURE << 7,
+
+ /* What to allow */
+ USER_RECORD_ALLOW_REGULAR = USER_RECORD_REGULAR << 14,
+ USER_RECORD_ALLOW_SECRET = USER_RECORD_SECRET << 14,
+ USER_RECORD_ALLOW_PRIVILEGED = USER_RECORD_PRIVILEGED << 14,
+ USER_RECORD_ALLOW_PER_MACHINE = USER_RECORD_PER_MACHINE << 14,
+ USER_RECORD_ALLOW_BINDING = USER_RECORD_BINDING << 14,
+ USER_RECORD_ALLOW_STATUS = USER_RECORD_STATUS << 14,
+ USER_RECORD_ALLOW_SIGNATURE = USER_RECORD_SIGNATURE << 14,
+
+ /* What to strip */
+ USER_RECORD_STRIP_REGULAR = USER_RECORD_REGULAR << 21,
+ USER_RECORD_STRIP_SECRET = USER_RECORD_SECRET << 21,
+ USER_RECORD_STRIP_PRIVILEGED = USER_RECORD_PRIVILEGED << 21,
+ USER_RECORD_STRIP_PER_MACHINE = USER_RECORD_PER_MACHINE << 21,
+ USER_RECORD_STRIP_BINDING = USER_RECORD_BINDING << 21,
+ USER_RECORD_STRIP_STATUS = USER_RECORD_STATUS << 21,
+ USER_RECORD_STRIP_SIGNATURE = USER_RECORD_SIGNATURE << 21,
+
+ /* Some special combinations that deserve explicit names */
+ USER_RECORD_LOAD_FULL = USER_RECORD_REQUIRE_REGULAR |
+ USER_RECORD_ALLOW_SECRET |
+ USER_RECORD_ALLOW_PRIVILEGED |
+ USER_RECORD_ALLOW_PER_MACHINE |
+ USER_RECORD_ALLOW_BINDING |
+ USER_RECORD_ALLOW_STATUS |
+ USER_RECORD_ALLOW_SIGNATURE,
+
+ USER_RECORD_LOAD_REFUSE_SECRET = USER_RECORD_REQUIRE_REGULAR |
+ USER_RECORD_ALLOW_PRIVILEGED |
+ USER_RECORD_ALLOW_PER_MACHINE |
+ USER_RECORD_ALLOW_BINDING |
+ USER_RECORD_ALLOW_STATUS |
+ USER_RECORD_ALLOW_SIGNATURE,
+
+ USER_RECORD_LOAD_MASK_SECRET = USER_RECORD_REQUIRE_REGULAR |
+ USER_RECORD_ALLOW_PRIVILEGED |
+ USER_RECORD_ALLOW_PER_MACHINE |
+ USER_RECORD_ALLOW_BINDING |
+ USER_RECORD_ALLOW_STATUS |
+ USER_RECORD_ALLOW_SIGNATURE |
+ USER_RECORD_STRIP_SECRET,
+
+ USER_RECORD_EXTRACT_SECRET = USER_RECORD_REQUIRE_SECRET |
+ USER_RECORD_STRIP_REGULAR |
+ USER_RECORD_STRIP_PRIVILEGED |
+ USER_RECORD_STRIP_PER_MACHINE |
+ USER_RECORD_STRIP_BINDING |
+ USER_RECORD_STRIP_STATUS |
+ USER_RECORD_STRIP_SIGNATURE,
+
+ USER_RECORD_LOAD_SIGNABLE = USER_RECORD_REQUIRE_REGULAR |
+ USER_RECORD_ALLOW_PRIVILEGED |
+ USER_RECORD_ALLOW_PER_MACHINE,
+
+ USER_RECORD_EXTRACT_SIGNABLE = USER_RECORD_LOAD_SIGNABLE |
+ USER_RECORD_STRIP_SECRET |
+ USER_RECORD_STRIP_BINDING |
+ USER_RECORD_STRIP_STATUS |
+ USER_RECORD_STRIP_SIGNATURE,
+
+ USER_RECORD_LOAD_EMBEDDED = USER_RECORD_REQUIRE_REGULAR |
+ USER_RECORD_ALLOW_PRIVILEGED |
+ USER_RECORD_ALLOW_PER_MACHINE |
+ USER_RECORD_ALLOW_SIGNATURE,
+
+ USER_RECORD_EXTRACT_EMBEDDED = USER_RECORD_LOAD_EMBEDDED |
+ USER_RECORD_STRIP_SECRET |
+ USER_RECORD_STRIP_BINDING |
+ USER_RECORD_STRIP_STATUS,
+
+ /* Whether to log about loader errors beyond LOG_DEBUG */
+ USER_RECORD_LOG = 1U << 28,
+
+ /* Whether to ignore errors and load what we can */
+ USER_RECORD_PERMISSIVE = 1U << 29,
+} UserRecordLoadFlags;
+
+static inline UserRecordLoadFlags USER_RECORD_REQUIRE(UserRecordMask m) {
+ assert((m & ~_USER_RECORD_MASK_MAX) == 0);
+ return m << 7;
+}
+
+static inline UserRecordLoadFlags USER_RECORD_ALLOW(UserRecordMask m) {
+ assert((m & ~_USER_RECORD_MASK_MAX) == 0);
+ return m << 14;
+}
+
+static inline UserRecordLoadFlags USER_RECORD_STRIP(UserRecordMask m) {
+ assert((m & ~_USER_RECORD_MASK_MAX) == 0);
+ return m << 21;
+}
+
+static inline UserRecordMask USER_RECORD_REQUIRE_MASK(UserRecordLoadFlags f) {
+ return (f >> 7) & _USER_RECORD_MASK_MAX;
+}
+
+static inline UserRecordMask USER_RECORD_ALLOW_MASK(UserRecordLoadFlags f) {
+ return ((f >> 14) & _USER_RECORD_MASK_MAX) | USER_RECORD_REQUIRE_MASK(f);
+}
+
+static inline UserRecordMask USER_RECORD_STRIP_MASK(UserRecordLoadFlags f) {
+ return (f >> 21) & _USER_RECORD_MASK_MAX;
+}
+
+static inline JsonDispatchFlags USER_RECORD_LOAD_FLAGS_TO_JSON_DISPATCH_FLAGS(UserRecordLoadFlags flags) {
+ return (FLAGS_SET(flags, USER_RECORD_LOG) ? JSON_LOG : 0) |
+ (FLAGS_SET(flags, USER_RECORD_PERMISSIVE) ? JSON_PERMISSIVE : 0);
+}
+
+typedef struct Pkcs11EncryptedKey {
+ /* The encrypted passphrase, which can be decrypted with the private key indicated below */
+ void *data;
+ size_t size;
+
+ /* Where to find the private key to decrypt the encrypted passphrase above */
+ char *uri;
+
+ /* What to test the decrypted passphrase against to allow access (classic UNIX password hash). Note
+ * that the decrypted passphrase is also used for unlocking LUKS and fscrypt, and if the account is
+ * backed by LUKS or fscrypt the hashed password is only an additional layer of authentication, not
+ * the only. */
+ char *hashed_password;
+} Pkcs11EncryptedKey;
+
+typedef struct Fido2HmacCredential {
+ void *id;
+ size_t size;
+} Fido2HmacCredential;
+
+typedef struct Fido2HmacSalt {
+ /* The FIDO2 Cridential ID to use */
+ Fido2HmacCredential credential;
+
+ /* The FIDO2 salt value */
+ void *salt;
+ size_t salt_size;
+
+ /* What to test the hashed salt value against, usually UNIX password hash here. */
+ char *hashed_password;
+} Fido2HmacSalt;
+
+typedef struct RecoveryKey {
+ /* The type of recovery key, must be "modhex64" right now */
+ char *type;
+
+ /* A UNIX password hash of the normalized form of modhex64 */
+ char *hashed_password;
+} RecoveryKey;
+
+typedef struct UserRecord {
+ /* The following three fields are not part of the JSON record */
+ unsigned n_ref;
+ UserRecordMask mask;
+ bool incomplete; /* incomplete due to security restrictions. */
+
+ char *user_name;
+ char *realm;
+ char *user_name_and_realm_auto; /* the user_name field concatenated with '@' and the realm, if the latter is defined */
+ char *real_name;
+ char *email_address;
+ char *password_hint;
+ char *icon_name;
+ char *location;
+
+ UserDisposition disposition;
+ uint64_t last_change_usec;
+ uint64_t last_password_change_usec;
+
+ char *shell;
+ mode_t umask;
+ char **environment;
+ char *time_zone;
+ char *preferred_language;
+ int nice_level;
+ struct rlimit *rlimits[_RLIMIT_MAX];
+
+ int locked; /* prohibit activation in general */
+ uint64_t not_before_usec; /* prohibit activation before this unix time */
+ uint64_t not_after_usec; /* prohibit activation after this unix time */
+
+ UserStorage storage;
+ uint64_t disk_size;
+ uint64_t disk_size_relative; /* Disk size, relative to the free bytes of the medium, normalized to UINT32_MAX = 100% */
+ char *skeleton_directory;
+ mode_t access_mode;
+
+ uint64_t tasks_max;
+ uint64_t memory_high;
+ uint64_t memory_max;
+ uint64_t cpu_weight;
+ uint64_t io_weight;
+
+ bool nosuid;
+ bool nodev;
+ bool noexec;
+
+ char **hashed_password;
+ char **ssh_authorized_keys;
+ char **password;
+ char **token_pin;
+
+ char *cifs_domain;
+ char *cifs_user_name;
+ char *cifs_service;
+
+ char *image_path;
+ char *image_path_auto; /* when none is configured explicitly, this is where we place the implicit image */
+ char *home_directory;
+ char *home_directory_auto; /* when none is set explicitly, this is where we place the implicit home directory */
+
+ uid_t uid;
+ gid_t gid;
+
+ char **member_of;
+
+ char *file_system_type;
+ sd_id128_t partition_uuid;
+ sd_id128_t luks_uuid;
+ sd_id128_t file_system_uuid;
+
+ int luks_discard;
+ int luks_offline_discard;
+ char *luks_cipher;
+ char *luks_cipher_mode;
+ uint64_t luks_volume_key_size;
+ char *luks_pbkdf_hash_algorithm;
+ char *luks_pbkdf_type;
+ uint64_t luks_pbkdf_time_cost_usec;
+ uint64_t luks_pbkdf_memory_cost;
+ uint64_t luks_pbkdf_parallel_threads;
+
+ uint64_t disk_usage;
+ uint64_t disk_free;
+ uint64_t disk_ceiling;
+ uint64_t disk_floor;
+
+ char *state;
+ char *service;
+ int signed_locally;
+
+ uint64_t good_authentication_counter;
+ uint64_t bad_authentication_counter;
+ uint64_t last_good_authentication_usec;
+ uint64_t last_bad_authentication_usec;
+
+ uint64_t ratelimit_begin_usec;
+ uint64_t ratelimit_count;
+ uint64_t ratelimit_interval_usec;
+ uint64_t ratelimit_burst;
+
+ int removable;
+ int enforce_password_policy;
+ int auto_login;
+
+ uint64_t stop_delay_usec; /* How long to leave systemd --user around on log-out */
+ int kill_processes; /* Whether to kill user processes forcibly on log-out */
+
+ /* The following exist mostly so that we can cover the full /etc/shadow set of fields */
+ uint64_t password_change_min_usec; /* maps to .sp_min */
+ uint64_t password_change_max_usec; /* maps to .sp_max */
+ uint64_t password_change_warn_usec; /* maps to .sp_warn */
+ uint64_t password_change_inactive_usec; /* maps to .sp_inact */
+ int password_change_now; /* Require a password change immediately on next login (.sp_lstchg = 0) */
+
+ char **pkcs11_token_uri;
+ Pkcs11EncryptedKey *pkcs11_encrypted_key;
+ size_t n_pkcs11_encrypted_key;
+ int pkcs11_protected_authentication_path_permitted;
+
+ Fido2HmacCredential *fido2_hmac_credential;
+ size_t n_fido2_hmac_credential;
+ Fido2HmacSalt *fido2_hmac_salt;
+ size_t n_fido2_hmac_salt;
+ int fido2_user_presence_permitted;
+
+ char **recovery_key_type;
+ RecoveryKey *recovery_key;
+ size_t n_recovery_key;
+
+ JsonVariant *json;
+} UserRecord;
+
+UserRecord* user_record_new(void);
+UserRecord* user_record_ref(UserRecord *h);
+UserRecord* user_record_unref(UserRecord *h);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(UserRecord*, user_record_unref);
+
+int user_record_load(UserRecord *h, JsonVariant *v, UserRecordLoadFlags flags);
+int user_record_build(UserRecord **ret, ...);
+
+const char *user_record_user_name_and_realm(UserRecord *h);
+UserStorage user_record_storage(UserRecord *h);
+const char *user_record_file_system_type(UserRecord *h);
+const char *user_record_skeleton_directory(UserRecord *h);
+mode_t user_record_access_mode(UserRecord *h);
+const char *user_record_home_directory(UserRecord *h);
+const char *user_record_image_path(UserRecord *h);
+unsigned long user_record_mount_flags(UserRecord *h);
+const char *user_record_cifs_user_name(UserRecord *h);
+const char *user_record_shell(UserRecord *h);
+const char *user_record_real_name(UserRecord *h);
+bool user_record_luks_discard(UserRecord *h);
+bool user_record_luks_offline_discard(UserRecord *h);
+const char *user_record_luks_cipher(UserRecord *h);
+const char *user_record_luks_cipher_mode(UserRecord *h);
+uint64_t user_record_luks_volume_key_size(UserRecord *h);
+const char* user_record_luks_pbkdf_type(UserRecord *h);
+usec_t user_record_luks_pbkdf_time_cost_usec(UserRecord *h);
+uint64_t user_record_luks_pbkdf_memory_cost(UserRecord *h);
+uint64_t user_record_luks_pbkdf_parallel_threads(UserRecord *h);
+const char *user_record_luks_pbkdf_hash_algorithm(UserRecord *h);
+gid_t user_record_gid(UserRecord *h);
+UserDisposition user_record_disposition(UserRecord *h);
+int user_record_removable(UserRecord *h);
+usec_t user_record_ratelimit_interval_usec(UserRecord *h);
+uint64_t user_record_ratelimit_burst(UserRecord *h);
+bool user_record_can_authenticate(UserRecord *h);
+
+int user_record_build_image_path(UserStorage storage, const char *user_name_and_realm, char **ret);
+
+bool user_record_equal(UserRecord *a, UserRecord *b);
+bool user_record_compatible(UserRecord *a, UserRecord *b);
+int user_record_compare_last_change(UserRecord *a, UserRecord *b);
+
+usec_t user_record_ratelimit_next_try(UserRecord *h);
+
+int user_record_clone(UserRecord *h, UserRecordLoadFlags flags, UserRecord **ret);
+int user_record_masked_equal(UserRecord *a, UserRecord *b, UserRecordMask mask);
+
+int user_record_test_blocked(UserRecord *h);
+int user_record_test_password_change_required(UserRecord *h);
+
+/* The following six are user by group-record.c, that's why we export them here */
+int json_dispatch_realm(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_gecos(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_user_group_list(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_user_disposition(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+
+int per_machine_id_match(JsonVariant *ids, JsonDispatchFlags flags);
+int per_machine_hostname_match(JsonVariant *hns, JsonDispatchFlags flags);
+int user_group_record_mangle(JsonVariant *v, UserRecordLoadFlags load_flags, JsonVariant **ret_variant, UserRecordMask *ret_mask);
+
+const char* user_storage_to_string(UserStorage t) _const_;
+UserStorage user_storage_from_string(const char *s) _pure_;
+
+const char* user_disposition_to_string(UserDisposition t) _const_;
+UserDisposition user_disposition_from_string(const char *s) _pure_;
diff --git a/src/shared/userdb.c b/src/shared/userdb.c
new file mode 100644
index 0000000..2d48028
--- /dev/null
+++ b/src/shared/userdb.c
@@ -0,0 +1,1249 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/auxv.h>
+
+#include "dirent-util.h"
+#include "dlfcn-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "missing_syscall.h"
+#include "parse-util.h"
+#include "set.h"
+#include "socket-util.h"
+#include "strv.h"
+#include "user-record-nss.h"
+#include "user-util.h"
+#include "userdb.h"
+#include "varlink.h"
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(link_hash_ops, void, trivial_hash_func, trivial_compare_func, Varlink, varlink_unref);
+
+typedef enum LookupWhat {
+ LOOKUP_USER,
+ LOOKUP_GROUP,
+ LOOKUP_MEMBERSHIP,
+ _LOOKUP_WHAT_MAX,
+} LookupWhat;
+
+struct UserDBIterator {
+ LookupWhat what;
+ Set *links;
+ bool nss_covered:1;
+ bool nss_iterating:1;
+ bool synthesize_root:1;
+ bool synthesize_nobody:1;
+ bool nss_systemd_blocked:1;
+ int error;
+ unsigned n_found;
+ sd_event *event;
+ UserRecord *found_user; /* when .what == LOOKUP_USER */
+ GroupRecord *found_group; /* when .what == LOOKUP_GROUP */
+
+ char *found_user_name, *found_group_name; /* when .what == LOOKUP_MEMBERSHIP */
+ char **members_of_group;
+ size_t index_members_of_group;
+ char *filter_user_name;
+};
+
+UserDBIterator* userdb_iterator_free(UserDBIterator *iterator) {
+ if (!iterator)
+ return NULL;
+
+ set_free(iterator->links);
+
+ switch (iterator->what) {
+
+ case LOOKUP_USER:
+ user_record_unref(iterator->found_user);
+
+ if (iterator->nss_iterating)
+ endpwent();
+
+ break;
+
+ case LOOKUP_GROUP:
+ group_record_unref(iterator->found_group);
+
+ if (iterator->nss_iterating)
+ endgrent();
+
+ break;
+
+ case LOOKUP_MEMBERSHIP:
+ free(iterator->found_user_name);
+ free(iterator->found_group_name);
+ strv_free(iterator->members_of_group);
+ free(iterator->filter_user_name);
+
+ if (iterator->nss_iterating)
+ endgrent();
+
+ break;
+
+ default:
+ assert_not_reached("Unexpected state?");
+ }
+
+ sd_event_unref(iterator->event);
+
+ if (iterator->nss_systemd_blocked)
+ assert_se(userdb_block_nss_systemd(false) >= 0);
+
+ return mfree(iterator);
+}
+
+static UserDBIterator* userdb_iterator_new(LookupWhat what) {
+ UserDBIterator *i;
+
+ assert(what >= 0);
+ assert(what < _LOOKUP_WHAT_MAX);
+
+ i = new(UserDBIterator, 1);
+ if (!i)
+ return NULL;
+
+ *i = (UserDBIterator) {
+ .what = what,
+ };
+
+ return i;
+}
+
+static int userdb_iterator_block_nss_systemd(UserDBIterator *iterator) {
+ int r;
+
+ assert(iterator);
+
+ if (iterator->nss_systemd_blocked)
+ return 0;
+
+ r = userdb_block_nss_systemd(true);
+ if (r < 0)
+ return r;
+
+ iterator->nss_systemd_blocked = true;
+ return 1;
+}
+
+struct user_group_data {
+ JsonVariant *record;
+ bool incomplete;
+};
+
+static void user_group_data_release(struct user_group_data *d) {
+ json_variant_unref(d->record);
+}
+
+static int userdb_on_query_reply(
+ Varlink *link,
+ JsonVariant *parameters,
+ const char *error_id,
+ VarlinkReplyFlags flags,
+ void *userdata) {
+
+ UserDBIterator *iterator = userdata;
+ int r;
+
+ assert(iterator);
+
+ if (error_id) {
+ log_debug("Got lookup error: %s", error_id);
+
+ if (STR_IN_SET(error_id,
+ "io.systemd.UserDatabase.NoRecordFound",
+ "io.systemd.UserDatabase.ConflictingRecordFound"))
+ r = -ESRCH;
+ else if (streq(error_id, "io.systemd.UserDatabase.ServiceNotAvailable"))
+ r = -EHOSTDOWN;
+ else if (streq(error_id, "io.systemd.UserDatabase.EnumerationNotSupported"))
+ r = -EOPNOTSUPP;
+ else if (streq(error_id, VARLINK_ERROR_TIMEOUT))
+ r = -ETIMEDOUT;
+ else
+ r = -EIO;
+
+ goto finish;
+ }
+
+ switch (iterator->what) {
+
+ case LOOKUP_USER: {
+ _cleanup_(user_group_data_release) struct user_group_data user_data = {};
+
+ static const JsonDispatch dispatch_table[] = {
+ { "record", _JSON_VARIANT_TYPE_INVALID, json_dispatch_variant, offsetof(struct user_group_data, record), 0 },
+ { "incomplete", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(struct user_group_data, incomplete), 0 },
+ {}
+ };
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+
+ assert_se(!iterator->found_user);
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &user_data);
+ if (r < 0)
+ goto finish;
+
+ if (!user_data.record) {
+ r = log_debug_errno(SYNTHETIC_ERRNO(EIO), "Reply is missing record key");
+ goto finish;
+ }
+
+ hr = user_record_new();
+ if (!hr) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ r = user_record_load(hr, user_data.record, USER_RECORD_LOAD_REFUSE_SECRET|USER_RECORD_PERMISSIVE);
+ if (r < 0)
+ goto finish;
+
+ if (!hr->service) {
+ r = log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "User record does not carry service information, refusing.");
+ goto finish;
+ }
+
+ hr->incomplete = user_data.incomplete;
+
+ /* We match the root user by the name since the name is our primary key. We match the nobody
+ * use by UID though, since the name might differ on OSes */
+ if (streq_ptr(hr->user_name, "root"))
+ iterator->synthesize_root = false;
+ if (hr->uid == UID_NOBODY)
+ iterator->synthesize_nobody = false;
+
+ iterator->found_user = TAKE_PTR(hr);
+ iterator->n_found++;
+
+ /* More stuff coming? then let's just exit cleanly here */
+ if (FLAGS_SET(flags, VARLINK_REPLY_CONTINUES))
+ return 0;
+
+ /* Otherwise, let's remove this link and exit cleanly then */
+ r = 0;
+ goto finish;
+ }
+
+ case LOOKUP_GROUP: {
+ _cleanup_(user_group_data_release) struct user_group_data group_data = {};
+
+ static const JsonDispatch dispatch_table[] = {
+ { "record", _JSON_VARIANT_TYPE_INVALID, json_dispatch_variant, offsetof(struct user_group_data, record), 0 },
+ { "incomplete", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(struct user_group_data, incomplete), 0 },
+ {}
+ };
+ _cleanup_(group_record_unrefp) GroupRecord *g = NULL;
+
+ assert_se(!iterator->found_group);
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &group_data);
+ if (r < 0)
+ goto finish;
+
+ if (!group_data.record) {
+ r = log_debug_errno(SYNTHETIC_ERRNO(EIO), "Reply is missing record key");
+ goto finish;
+ }
+
+ g = group_record_new();
+ if (!g) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ r = group_record_load(g, group_data.record, USER_RECORD_LOAD_REFUSE_SECRET|USER_RECORD_PERMISSIVE);
+ if (r < 0)
+ goto finish;
+
+ if (!g->service) {
+ r = log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Group record does not carry service information, refusing.");
+ goto finish;
+ }
+
+ g->incomplete = group_data.incomplete;
+
+ if (streq_ptr(g->group_name, "root"))
+ iterator->synthesize_root = false;
+ if (g->gid == GID_NOBODY)
+ iterator->synthesize_nobody = false;
+
+ iterator->found_group = TAKE_PTR(g);
+ iterator->n_found++;
+
+ if (FLAGS_SET(flags, VARLINK_REPLY_CONTINUES))
+ return 0;
+
+ r = 0;
+ goto finish;
+ }
+
+ case LOOKUP_MEMBERSHIP: {
+ struct membership_data {
+ const char *user_name;
+ const char *group_name;
+ } membership_data = {};
+
+ static const JsonDispatch dispatch_table[] = {
+ { "userName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(struct membership_data, user_name), JSON_SAFE },
+ { "groupName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(struct membership_data, group_name), JSON_SAFE },
+ {}
+ };
+
+ assert(!iterator->found_user_name);
+ assert(!iterator->found_group_name);
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &membership_data);
+ if (r < 0)
+ goto finish;
+
+ iterator->found_user_name = mfree(iterator->found_user_name);
+ iterator->found_group_name = mfree(iterator->found_group_name);
+
+ iterator->found_user_name = strdup(membership_data.user_name);
+ if (!iterator->found_user_name) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ iterator->found_group_name = strdup(membership_data.group_name);
+ if (!iterator->found_group_name) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ iterator->n_found++;
+
+ if (FLAGS_SET(flags, VARLINK_REPLY_CONTINUES))
+ return 0;
+
+ r = 0;
+ goto finish;
+ }
+
+ default:
+ assert_not_reached("unexpected lookup");
+ }
+
+finish:
+ /* If we got one ESRCH, let that win. This way when we do a wild dump we won't be tripped up by bad
+ * errors if at least one connection ended cleanly */
+ if (r == -ESRCH || iterator->error == 0)
+ iterator->error = -r;
+
+ assert_se(set_remove(iterator->links, link) == link);
+ link = varlink_unref(link);
+ return 0;
+}
+
+static int userdb_connect(
+ UserDBIterator *iterator,
+ const char *path,
+ const char *method,
+ bool more,
+ JsonVariant *query) {
+
+ _cleanup_(varlink_unrefp) Varlink *vl = NULL;
+ int r;
+
+ assert(iterator);
+ assert(path);
+ assert(method);
+
+ r = varlink_connect_address(&vl, path);
+ if (r < 0)
+ return log_debug_errno(r, "Unable to connect to %s: %m", path);
+
+ varlink_set_userdata(vl, iterator);
+
+ if (!iterator->event) {
+ r = sd_event_new(&iterator->event);
+ if (r < 0)
+ return log_debug_errno(r, "Unable to allocate event loop: %m");
+ }
+
+ r = varlink_attach_event(vl, iterator->event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to attach varlink connection to event loop: %m");
+
+ (void) varlink_set_description(vl, path);
+
+ r = varlink_bind_reply(vl, userdb_on_query_reply);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to bind reply callback: %m");
+
+ if (more)
+ r = varlink_observe(vl, method, query);
+ else
+ r = varlink_invoke(vl, method, query);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to invoke varlink method: %m");
+
+ r = set_ensure_consume(&iterator->links, &link_hash_ops, TAKE_PTR(vl));
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add varlink connection to set: %m");
+ return r;
+}
+
+static int userdb_start_query(
+ UserDBIterator *iterator,
+ const char *method,
+ bool more,
+ JsonVariant *query,
+ UserDBFlags flags) {
+
+ _cleanup_(strv_freep) char **except = NULL, **only = NULL;
+ _cleanup_(closedirp) DIR *d = NULL;
+ struct dirent *de;
+ const char *e;
+ int r, ret = 0;
+
+ assert(iterator);
+ assert(method);
+
+ e = getenv("SYSTEMD_BYPASS_USERDB");
+ if (e) {
+ r = parse_boolean(e);
+ if (r > 0)
+ return -ENOLINK;
+ if (r < 0) {
+ except = strv_split(e, ":");
+ if (!except)
+ return -ENOMEM;
+ }
+ }
+
+ e = getenv("SYSTEMD_ONLY_USERDB");
+ if (e) {
+ only = strv_split(e, ":");
+ if (!only)
+ return -ENOMEM;
+ }
+
+ /* First, let's talk to the multiplexer, if we can */
+ if ((flags & (USERDB_AVOID_MULTIPLEXER|USERDB_AVOID_DYNAMIC_USER|USERDB_AVOID_NSS|USERDB_DONT_SYNTHESIZE)) == 0 &&
+ !strv_contains(except, "io.systemd.Multiplexer") &&
+ (!only || strv_contains(only, "io.systemd.Multiplexer"))) {
+ _cleanup_(json_variant_unrefp) JsonVariant *patched_query = json_variant_ref(query);
+
+ r = json_variant_set_field_string(&patched_query, "service", "io.systemd.Multiplexer");
+ if (r < 0)
+ return log_debug_errno(r, "Unable to set service JSON field: %m");
+
+ r = userdb_connect(iterator, "/run/systemd/userdb/io.systemd.Multiplexer", method, more, patched_query);
+ if (r >= 0) {
+ iterator->nss_covered = true; /* The multiplexer does NSS */
+ return 0;
+ }
+ }
+
+ d = opendir("/run/systemd/userdb/");
+ if (!d) {
+ if (errno == ENOENT)
+ return -ESRCH;
+
+ return -errno;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ _cleanup_(json_variant_unrefp) JsonVariant *patched_query = NULL;
+ _cleanup_free_ char *p = NULL;
+ bool is_nss;
+
+ if (streq(de->d_name, "io.systemd.Multiplexer")) /* We already tried this above, don't try this again */
+ continue;
+
+ if (FLAGS_SET(flags, USERDB_AVOID_DYNAMIC_USER) &&
+ streq(de->d_name, "io.systemd.DynamicUser"))
+ continue;
+
+ /* Avoid NSS is this is requested. Note that we also skip NSS when we were asked to skip the
+ * multiplexer, since in that case it's safer to do NSS in the client side emulation below
+ * (and when we run as part of systemd-userdbd.service we don't want to talk to ourselves
+ * anyway). */
+ is_nss = streq(de->d_name, "io.systemd.NameServiceSwitch");
+ if ((flags & (USERDB_AVOID_NSS|USERDB_AVOID_MULTIPLEXER)) && is_nss)
+ continue;
+
+ if (strv_contains(except, de->d_name))
+ continue;
+
+ if (only && !strv_contains(only, de->d_name))
+ continue;
+
+ p = path_join("/run/systemd/userdb/", de->d_name);
+ if (!p)
+ return -ENOMEM;
+
+ patched_query = json_variant_ref(query);
+ r = json_variant_set_field_string(&patched_query, "service", de->d_name);
+ if (r < 0)
+ return log_debug_errno(r, "Unable to set service JSON field: %m");
+
+ r = userdb_connect(iterator, p, method, more, patched_query);
+ if (is_nss && r >= 0) /* Turn off fallback NSS if we found the NSS service and could connect
+ * to it */
+ iterator->nss_covered = true;
+
+ if (ret == 0 && r < 0)
+ ret = r;
+ }
+
+ if (set_isempty(iterator->links))
+ return ret; /* propagate last error we saw if we couldn't connect to anything. */
+
+ /* We connected to some services, in this case, ignore the ones we failed on */
+ return 0;
+}
+
+static int userdb_process(
+ UserDBIterator *iterator,
+ UserRecord **ret_user_record,
+ GroupRecord **ret_group_record,
+ char **ret_user_name,
+ char **ret_group_name) {
+
+ int r;
+
+ assert(iterator);
+
+ for (;;) {
+ if (iterator->what == LOOKUP_USER && iterator->found_user) {
+ if (ret_user_record)
+ *ret_user_record = TAKE_PTR(iterator->found_user);
+ else
+ iterator->found_user = user_record_unref(iterator->found_user);
+
+ if (ret_group_record)
+ *ret_group_record = NULL;
+ if (ret_user_name)
+ *ret_user_name = NULL;
+ if (ret_group_name)
+ *ret_group_name = NULL;
+
+ return 0;
+ }
+
+ if (iterator->what == LOOKUP_GROUP && iterator->found_group) {
+ if (ret_group_record)
+ *ret_group_record = TAKE_PTR(iterator->found_group);
+ else
+ iterator->found_group = group_record_unref(iterator->found_group);
+
+ if (ret_user_record)
+ *ret_user_record = NULL;
+ if (ret_user_name)
+ *ret_user_name = NULL;
+ if (ret_group_name)
+ *ret_group_name = NULL;
+
+ return 0;
+ }
+
+ if (iterator->what == LOOKUP_MEMBERSHIP && iterator->found_user_name && iterator->found_group_name) {
+ if (ret_user_name)
+ *ret_user_name = TAKE_PTR(iterator->found_user_name);
+ else
+ iterator->found_user_name = mfree(iterator->found_user_name);
+
+ if (ret_group_name)
+ *ret_group_name = TAKE_PTR(iterator->found_group_name);
+ else
+ iterator->found_group_name = mfree(iterator->found_group_name);
+
+ if (ret_user_record)
+ *ret_user_record = NULL;
+ if (ret_group_record)
+ *ret_group_record = NULL;
+
+ return 0;
+ }
+
+ if (set_isempty(iterator->links)) {
+ if (iterator->error == 0)
+ return -ESRCH;
+
+ return -abs(iterator->error);
+ }
+
+ if (!iterator->event)
+ return -ESRCH;
+
+ r = sd_event_run(iterator->event, UINT64_MAX);
+ if (r < 0)
+ return r;
+ }
+}
+
+static int synthetic_root_user_build(UserRecord **ret) {
+ return user_record_build(
+ ret,
+ JSON_BUILD_OBJECT(JSON_BUILD_PAIR("userName", JSON_BUILD_STRING("root")),
+ JSON_BUILD_PAIR("uid", JSON_BUILD_UNSIGNED(0)),
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(0)),
+ JSON_BUILD_PAIR("homeDirectory", JSON_BUILD_STRING("/root")),
+ JSON_BUILD_PAIR("disposition", JSON_BUILD_STRING("intrinsic"))));
+}
+
+static int synthetic_nobody_user_build(UserRecord **ret) {
+ return user_record_build(
+ ret,
+ JSON_BUILD_OBJECT(JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(NOBODY_USER_NAME)),
+ JSON_BUILD_PAIR("uid", JSON_BUILD_UNSIGNED(UID_NOBODY)),
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(GID_NOBODY)),
+ JSON_BUILD_PAIR("shell", JSON_BUILD_STRING(NOLOGIN)),
+ JSON_BUILD_PAIR("locked", JSON_BUILD_BOOLEAN(true)),
+ JSON_BUILD_PAIR("disposition", JSON_BUILD_STRING("intrinsic"))));
+}
+
+int userdb_by_name(const char *name, UserDBFlags flags, UserRecord **ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *query = NULL;
+ int r;
+
+ if (!valid_user_group_name(name, VALID_USER_RELAX))
+ return -EINVAL;
+
+ r = json_build(&query, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(name))));
+ if (r < 0)
+ return r;
+
+ iterator = userdb_iterator_new(LOOKUP_USER);
+ if (!iterator)
+ return -ENOMEM;
+
+ r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetUserRecord", false, query, flags);
+ if (r >= 0) {
+ r = userdb_process(iterator, ret, NULL, NULL, NULL);
+ if (r >= 0)
+ return r;
+ }
+
+ if (!FLAGS_SET(flags, USERDB_AVOID_NSS) && !iterator->nss_covered) {
+ /* Make sure the NSS lookup doesn't recurse back to us. */
+
+ r = userdb_iterator_block_nss_systemd(iterator);
+ if (r >= 0) {
+ /* Client-side NSS fallback */
+ r = nss_user_record_by_name(name, !FLAGS_SET(flags, USERDB_AVOID_SHADOW), ret);
+ if (r >= 0)
+ return r;
+ }
+ }
+
+ if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE)) {
+ if (streq(name, "root"))
+ return synthetic_root_user_build(ret);
+
+ if (streq(name, NOBODY_USER_NAME) && synthesize_nobody())
+ return synthetic_nobody_user_build(ret);
+ }
+
+ return r;
+}
+
+int userdb_by_uid(uid_t uid, UserDBFlags flags, UserRecord **ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *query = NULL;
+ int r;
+
+ if (!uid_is_valid(uid))
+ return -EINVAL;
+
+ r = json_build(&query, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("uid", JSON_BUILD_UNSIGNED(uid))));
+ if (r < 0)
+ return r;
+
+ iterator = userdb_iterator_new(LOOKUP_USER);
+ if (!iterator)
+ return -ENOMEM;
+
+ r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetUserRecord", false, query, flags);
+ if (r >= 0) {
+ r = userdb_process(iterator, ret, NULL, NULL, NULL);
+ if (r >= 0)
+ return r;
+ }
+
+ if (!FLAGS_SET(flags, USERDB_AVOID_NSS) && !iterator->nss_covered) {
+ r = userdb_iterator_block_nss_systemd(iterator);
+ if (r >= 0) {
+ /* Client-side NSS fallback */
+ r = nss_user_record_by_uid(uid, !FLAGS_SET(flags, USERDB_AVOID_SHADOW), ret);
+ if (r >= 0)
+ return r;
+ }
+ }
+
+ if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE)) {
+ if (uid == 0)
+ return synthetic_root_user_build(ret);
+
+ if (uid == UID_NOBODY && synthesize_nobody())
+ return synthetic_nobody_user_build(ret);
+ }
+
+ return r;
+}
+
+int userdb_all(UserDBFlags flags, UserDBIterator **ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ int r;
+
+ assert(ret);
+
+ iterator = userdb_iterator_new(LOOKUP_USER);
+ if (!iterator)
+ return -ENOMEM;
+
+ iterator->synthesize_root = iterator->synthesize_nobody = !FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE);
+
+ r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetUserRecord", true, NULL, flags);
+
+ if (!FLAGS_SET(flags, USERDB_AVOID_NSS) && (r < 0 || !iterator->nss_covered)) {
+ r = userdb_iterator_block_nss_systemd(iterator);
+ if (r < 0)
+ return r;
+
+ setpwent();
+ iterator->nss_iterating = true;
+ } else if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(iterator);
+ return 0;
+}
+
+int userdb_iterator_get(UserDBIterator *iterator, UserRecord **ret) {
+ int r;
+
+ assert(iterator);
+ assert(iterator->what == LOOKUP_USER);
+
+ if (iterator->nss_iterating) {
+ struct passwd *pw;
+
+ /* If NSS isn't covered elsewhere, let's iterate through it first, since it probably contains
+ * the more traditional sources, which are probably good to show first. */
+
+ pw = getpwent();
+ if (pw) {
+ _cleanup_free_ char *buffer = NULL;
+ bool incomplete = false;
+ struct spwd spwd;
+
+ if (streq_ptr(pw->pw_name, "root"))
+ iterator->synthesize_root = false;
+ if (pw->pw_uid == UID_NOBODY)
+ iterator->synthesize_nobody = false;
+
+ r = nss_spwd_for_passwd(pw, &spwd, &buffer);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to acquire shadow entry for user %s, ignoring: %m", pw->pw_name);
+ incomplete = ERRNO_IS_PRIVILEGE(r);
+ }
+
+ r = nss_passwd_to_user_record(pw, r >= 0 ? &spwd : NULL, ret);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ (*ret)->incomplete = incomplete;
+ return r;
+ }
+
+ if (errno != 0)
+ log_debug_errno(errno, "Failure to iterate NSS user database, ignoring: %m");
+
+ iterator->nss_iterating = false;
+ endpwent();
+ }
+
+ r = userdb_process(iterator, ret, NULL, NULL, NULL);
+
+ if (r < 0) {
+ if (iterator->synthesize_root) {
+ iterator->synthesize_root = false;
+ iterator->n_found++;
+ return synthetic_root_user_build(ret);
+ }
+
+ if (iterator->synthesize_nobody) {
+ iterator->synthesize_nobody = false;
+ iterator->n_found++;
+ return synthetic_nobody_user_build(ret);
+ }
+ }
+
+ /* if we found at least one entry, then ignore errors and indicate that we reached the end */
+ if (r < 0 && iterator->n_found > 0)
+ return -ESRCH;
+
+ return r;
+}
+
+static int synthetic_root_group_build(GroupRecord **ret) {
+ return group_record_build(
+ ret,
+ JSON_BUILD_OBJECT(JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING("root")),
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(0)),
+ JSON_BUILD_PAIR("disposition", JSON_BUILD_STRING("intrinsic"))));
+}
+
+static int synthetic_nobody_group_build(GroupRecord **ret) {
+ return group_record_build(
+ ret,
+ JSON_BUILD_OBJECT(JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(NOBODY_GROUP_NAME)),
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(GID_NOBODY)),
+ JSON_BUILD_PAIR("disposition", JSON_BUILD_STRING("intrinsic"))));
+}
+
+int groupdb_by_name(const char *name, UserDBFlags flags, GroupRecord **ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *query = NULL;
+ int r;
+
+ if (!valid_user_group_name(name, VALID_USER_RELAX))
+ return -EINVAL;
+
+ r = json_build(&query, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(name))));
+ if (r < 0)
+ return r;
+
+ iterator = userdb_iterator_new(LOOKUP_GROUP);
+ if (!iterator)
+ return -ENOMEM;
+
+ r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetGroupRecord", false, query, flags);
+ if (r >= 0) {
+ r = userdb_process(iterator, NULL, ret, NULL, NULL);
+ if (r >= 0)
+ return r;
+ }
+
+ if (!FLAGS_SET(flags, USERDB_AVOID_NSS) && !(iterator && iterator->nss_covered)) {
+ r = userdb_iterator_block_nss_systemd(iterator);
+ if (r >= 0) {
+ r = nss_group_record_by_name(name, !FLAGS_SET(flags, USERDB_AVOID_SHADOW), ret);
+ if (r >= 0)
+ return r;
+ }
+ }
+
+ if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE)) {
+ if (streq(name, "root"))
+ return synthetic_root_group_build(ret);
+
+ if (streq(name, NOBODY_GROUP_NAME) && synthesize_nobody())
+ return synthetic_nobody_group_build(ret);
+ }
+
+ return r;
+}
+
+int groupdb_by_gid(gid_t gid, UserDBFlags flags, GroupRecord **ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *query = NULL;
+ int r;
+
+ if (!gid_is_valid(gid))
+ return -EINVAL;
+
+ r = json_build(&query, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(gid))));
+ if (r < 0)
+ return r;
+
+ iterator = userdb_iterator_new(LOOKUP_GROUP);
+ if (!iterator)
+ return -ENOMEM;
+
+ r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetGroupRecord", false, query, flags);
+ if (r >= 0) {
+ r = userdb_process(iterator, NULL, ret, NULL, NULL);
+ if (r >= 0)
+ return r;
+ }
+
+ if (!FLAGS_SET(flags, USERDB_AVOID_NSS) && !(iterator && iterator->nss_covered)) {
+ r = userdb_iterator_block_nss_systemd(iterator);
+ if (r >= 0) {
+ r = nss_group_record_by_gid(gid, !FLAGS_SET(flags, USERDB_AVOID_SHADOW), ret);
+ if (r >= 0)
+ return r;
+ }
+ }
+
+ if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE)) {
+ if (gid == 0)
+ return synthetic_root_group_build(ret);
+
+ if (gid == GID_NOBODY && synthesize_nobody())
+ return synthetic_nobody_group_build(ret);
+ }
+
+ return r;
+}
+
+int groupdb_all(UserDBFlags flags, UserDBIterator **ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ int r;
+
+ assert(ret);
+
+ iterator = userdb_iterator_new(LOOKUP_GROUP);
+ if (!iterator)
+ return -ENOMEM;
+
+ iterator->synthesize_root = iterator->synthesize_nobody = !FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE);
+
+ r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetGroupRecord", true, NULL, flags);
+
+ if (!FLAGS_SET(flags, USERDB_AVOID_NSS) && (r < 0 || !iterator->nss_covered)) {
+ r = userdb_iterator_block_nss_systemd(iterator);
+ if (r < 0)
+ return r;
+
+ setgrent();
+ iterator->nss_iterating = true;
+ } if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(iterator);
+ return 0;
+}
+
+int groupdb_iterator_get(UserDBIterator *iterator, GroupRecord **ret) {
+ int r;
+
+ assert(iterator);
+ assert(iterator->what == LOOKUP_GROUP);
+
+ if (iterator->nss_iterating) {
+ struct group *gr;
+
+ errno = 0;
+ gr = getgrent();
+ if (gr) {
+ _cleanup_free_ char *buffer = NULL;
+ bool incomplete = false;
+ struct sgrp sgrp;
+
+ if (streq_ptr(gr->gr_name, "root"))
+ iterator->synthesize_root = false;
+ if (gr->gr_gid == GID_NOBODY)
+ iterator->synthesize_nobody = false;
+
+ r = nss_sgrp_for_group(gr, &sgrp, &buffer);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to acquire shadow entry for group %s, ignoring: %m", gr->gr_name);
+ incomplete = ERRNO_IS_PRIVILEGE(r);
+ }
+
+ r = nss_group_to_group_record(gr, r >= 0 ? &sgrp : NULL, ret);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ (*ret)->incomplete = incomplete;
+ return r;
+ }
+
+ if (errno != 0)
+ log_debug_errno(errno, "Failure to iterate NSS group database, ignoring: %m");
+
+ iterator->nss_iterating = false;
+ endgrent();
+ }
+
+ r = userdb_process(iterator, NULL, ret, NULL, NULL);
+ if (r < 0) {
+ if (iterator->synthesize_root) {
+ iterator->synthesize_root = false;
+ iterator->n_found++;
+ return synthetic_root_group_build(ret);
+ }
+
+ if (iterator->synthesize_nobody) {
+ iterator->synthesize_nobody = false;
+ iterator->n_found++;
+ return synthetic_nobody_group_build(ret);
+ }
+ }
+
+ /* if we found at least one entry, then ignore errors and indicate that we reached the end */
+ if (r < 0 && iterator->n_found > 0)
+ return -ESRCH;
+
+ return r;
+}
+
+int membershipdb_by_user(const char *name, UserDBFlags flags, UserDBIterator **ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *query = NULL;
+ int r;
+
+ assert(ret);
+
+ if (!valid_user_group_name(name, VALID_USER_RELAX))
+ return -EINVAL;
+
+ r = json_build(&query, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(name))));
+ if (r < 0)
+ return r;
+
+ iterator = userdb_iterator_new(LOOKUP_MEMBERSHIP);
+ if (!iterator)
+ return -ENOMEM;
+
+ r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetMemberships", true, query, flags);
+ if ((r >= 0 && iterator->nss_covered) || FLAGS_SET(flags, USERDB_AVOID_NSS))
+ goto finish;
+
+ r = userdb_iterator_block_nss_systemd(iterator);
+ if (r < 0)
+ return r;
+
+ iterator->filter_user_name = strdup(name);
+ if (!iterator->filter_user_name)
+ return -ENOMEM;
+
+ setgrent();
+ iterator->nss_iterating = true;
+
+ r = 0;
+
+finish:
+ if (r >= 0)
+ *ret = TAKE_PTR(iterator);
+ return r;
+}
+
+int membershipdb_by_group(const char *name, UserDBFlags flags, UserDBIterator **ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *query = NULL;
+ _cleanup_(group_record_unrefp) GroupRecord *gr = NULL;
+ int r;
+
+ assert(ret);
+
+ if (!valid_user_group_name(name, VALID_USER_RELAX))
+ return -EINVAL;
+
+ r = json_build(&query, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(name))));
+ if (r < 0)
+ return r;
+
+ iterator = userdb_iterator_new(LOOKUP_MEMBERSHIP);
+ if (!iterator)
+ return -ENOMEM;
+
+ r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetMemberships", true, query, flags);
+ if ((r >= 0 && iterator->nss_covered) || FLAGS_SET(flags, USERDB_AVOID_NSS))
+ goto finish;
+
+ r = userdb_iterator_block_nss_systemd(iterator);
+ if (r < 0)
+ return r;
+
+ /* We ignore all errors here, since the group might be defined by a userdb native service, and we queried them already above. */
+ (void) nss_group_record_by_name(name, false, &gr);
+ if (gr) {
+ iterator->members_of_group = strv_copy(gr->members);
+ if (!iterator->members_of_group)
+ return -ENOMEM;
+
+ iterator->index_members_of_group = 0;
+
+ iterator->found_group_name = strdup(name);
+ if (!iterator->found_group_name)
+ return -ENOMEM;
+ }
+
+ r = 0;
+
+finish:
+ if (r >= 0)
+ *ret = TAKE_PTR(iterator);
+
+ return r;
+}
+
+int membershipdb_all(UserDBFlags flags, UserDBIterator **ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ int r;
+
+ assert(ret);
+
+ iterator = userdb_iterator_new(LOOKUP_MEMBERSHIP);
+ if (!iterator)
+ return -ENOMEM;
+
+ r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetMemberships", true, NULL, flags);
+ if ((r >= 0 && iterator->nss_covered) || FLAGS_SET(flags, USERDB_AVOID_NSS))
+ goto finish;
+
+ r = userdb_iterator_block_nss_systemd(iterator);
+ if (r < 0)
+ return r;
+
+ setgrent();
+ iterator->nss_iterating = true;
+
+ r = 0;
+
+finish:
+ if (r >= 0)
+ *ret = TAKE_PTR(iterator);
+
+ return r;
+}
+
+int membershipdb_iterator_get(
+ UserDBIterator *iterator,
+ char **ret_user,
+ char **ret_group) {
+
+ int r;
+
+ assert(iterator);
+
+ for (;;) {
+ /* If we are iteratring through NSS acquire a new group entry if we haven't acquired one yet. */
+ if (!iterator->members_of_group) {
+ struct group *g;
+
+ if (!iterator->nss_iterating)
+ break;
+
+ assert(!iterator->found_user_name);
+ do {
+ errno = 0;
+ g = getgrent();
+ if (!g) {
+ if (errno != 0)
+ log_debug_errno(errno, "Failure during NSS group iteration, ignoring: %m");
+ break;
+ }
+
+ } while (iterator->filter_user_name ? !strv_contains(g->gr_mem, iterator->filter_user_name) :
+ strv_isempty(g->gr_mem));
+
+ if (g) {
+ r = free_and_strdup(&iterator->found_group_name, g->gr_name);
+ if (r < 0)
+ return r;
+
+ if (iterator->filter_user_name)
+ iterator->members_of_group = strv_new(iterator->filter_user_name);
+ else
+ iterator->members_of_group = strv_copy(g->gr_mem);
+ if (!iterator->members_of_group)
+ return -ENOMEM;
+
+ iterator->index_members_of_group = 0;
+ } else {
+ iterator->nss_iterating = false;
+ endgrent();
+ break;
+ }
+ }
+
+ assert(iterator->found_group_name);
+ assert(iterator->members_of_group);
+ assert(!iterator->found_user_name);
+
+ if (iterator->members_of_group[iterator->index_members_of_group]) {
+ _cleanup_free_ char *cu = NULL, *cg = NULL;
+
+ if (ret_user) {
+ cu = strdup(iterator->members_of_group[iterator->index_members_of_group]);
+ if (!cu)
+ return -ENOMEM;
+ }
+
+ if (ret_group) {
+ cg = strdup(iterator->found_group_name);
+ if (!cg)
+ return -ENOMEM;
+ }
+
+ if (ret_user)
+ *ret_user = TAKE_PTR(cu);
+
+ if (ret_group)
+ *ret_group = TAKE_PTR(cg);
+
+ iterator->index_members_of_group++;
+ return 0;
+ }
+
+ iterator->members_of_group = strv_free(iterator->members_of_group);
+ iterator->found_group_name = mfree(iterator->found_group_name);
+ }
+
+ r = userdb_process(iterator, NULL, NULL, ret_user, ret_group);
+ if (r < 0 && iterator->n_found > 0)
+ return -ESRCH;
+
+ return r;
+}
+
+int membershipdb_by_group_strv(const char *name, UserDBFlags flags, char ***ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ _cleanup_strv_free_ char **members = NULL;
+ int r;
+
+ assert(name);
+ assert(ret);
+
+ r = membershipdb_by_group(name, flags, &iterator);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *user_name = NULL;
+
+ r = membershipdb_iterator_get(iterator, &user_name, NULL);
+ if (r == -ESRCH)
+ break;
+ if (r < 0)
+ return r;
+
+ r = strv_consume(&members, TAKE_PTR(user_name));
+ if (r < 0)
+ return r;
+ }
+
+ strv_sort(members);
+ strv_uniq(members);
+
+ *ret = TAKE_PTR(members);
+ return 0;
+}
+
+int userdb_block_nss_systemd(int b) {
+ _cleanup_(dlclosep) void *dl = NULL;
+ int (*call)(bool b);
+
+ /* Note that we might be called from libnss_systemd.so.2 itself, but that should be fine, really. */
+
+ dl = dlopen(ROOTLIBDIR "/libnss_systemd.so.2", RTLD_LAZY|RTLD_NODELETE);
+ if (!dl) {
+ /* If the file isn't installed, don't complain loudly */
+ log_debug("Failed to dlopen(libnss_systemd.so.2), ignoring: %s", dlerror());
+ return 0;
+ }
+
+ call = (int (*)(bool b)) dlsym(dl, "_nss_systemd_block");
+ if (!call)
+ /* If the file is is installed but lacks the symbol we expect, things are weird, let's complain */
+ return log_debug_errno(SYNTHETIC_ERRNO(ELIBBAD),
+ "Unable to find symbol _nss_systemd_block in libnss_systemd.so.2: %s", dlerror());
+
+ return call(b);
+}
diff --git a/src/shared/userdb.h b/src/shared/userdb.h
new file mode 100644
index 0000000..ee207b5
--- /dev/null
+++ b/src/shared/userdb.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "group-record.h"
+#include "user-record.h"
+
+/* Inquire local services for user/group records */
+
+typedef struct UserDBIterator UserDBIterator;
+
+UserDBIterator *userdb_iterator_free(UserDBIterator *iterator);
+DEFINE_TRIVIAL_CLEANUP_FUNC(UserDBIterator*, userdb_iterator_free);
+
+typedef enum UserDBFlags {
+ USERDB_AVOID_NSS = 1 << 0, /* don't do client-side nor server-side NSS */
+ USERDB_AVOID_SHADOW = 1 << 1, /* don't do client-side shadow calls (server side might happen though) */
+ USERDB_AVOID_DYNAMIC_USER = 1 << 2, /* exclude looking up in io.systemd.DynamicUser */
+ USERDB_AVOID_MULTIPLEXER = 1 << 3, /* exclude looking up via io.systemd.Multiplexer */
+ USERDB_DONT_SYNTHESIZE = 1 << 4, /* don't synthesize root/nobody */
+} UserDBFlags;
+
+int userdb_by_name(const char *name, UserDBFlags flags, UserRecord **ret);
+int userdb_by_uid(uid_t uid, UserDBFlags flags, UserRecord **ret);
+int userdb_all(UserDBFlags flags, UserDBIterator **ret);
+int userdb_iterator_get(UserDBIterator *iterator, UserRecord **ret);
+
+int groupdb_by_name(const char *name, UserDBFlags flags, GroupRecord **ret);
+int groupdb_by_gid(gid_t gid, UserDBFlags flags, GroupRecord **ret);
+int groupdb_all(UserDBFlags flags, UserDBIterator **ret);
+int groupdb_iterator_get(UserDBIterator *iterator, GroupRecord **ret);
+
+int membershipdb_by_user(const char *name, UserDBFlags flags, UserDBIterator **ret);
+int membershipdb_by_group(const char *name, UserDBFlags flags, UserDBIterator **ret);
+int membershipdb_all(UserDBFlags flags, UserDBIterator **ret);
+int membershipdb_iterator_get(UserDBIterator *iterator, char **user, char **group);
+int membershipdb_by_group_strv(const char *name, UserDBFlags flags, char ***ret);
+
+int userdb_block_nss_systemd(int b);
diff --git a/src/shared/utmp-wtmp.c b/src/shared/utmp-wtmp.c
new file mode 100644
index 0000000..b36bc20
--- /dev/null
+++ b/src/shared/utmp-wtmp.c
@@ -0,0 +1,409 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+#include <utmpx.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "hostname-util.h"
+#include "io-util.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "user-util.h"
+#include "utmp-wtmp.h"
+
+int utmp_get_runlevel(int *runlevel, int *previous) {
+ _cleanup_(utxent_cleanup) bool utmpx = false;
+ struct utmpx *found, lookup = { .ut_type = RUN_LVL };
+ const char *e;
+
+ assert(runlevel);
+
+ /* If these values are set in the environment this takes
+ * precedence. Presumably, sysvinit does this to work around a
+ * race condition that would otherwise exist where we'd always
+ * go to disk and hence might read runlevel data that might be
+ * very new and not apply to the current script being executed. */
+
+ e = getenv("RUNLEVEL");
+ if (e && e[0] > 0) {
+ *runlevel = e[0];
+
+ if (previous) {
+ /* $PREVLEVEL seems to be an Upstart thing */
+
+ e = getenv("PREVLEVEL");
+ if (e && e[0] > 0)
+ *previous = e[0];
+ else
+ *previous = 0;
+ }
+
+ return 0;
+ }
+
+ if (utmpxname(_PATH_UTMPX) < 0)
+ return -errno;
+
+ utmpx = utxent_start();
+
+ found = getutxid(&lookup);
+ if (!found)
+ return -errno;
+
+ *runlevel = found->ut_pid & 0xFF;
+ if (previous)
+ *previous = (found->ut_pid >> 8) & 0xFF;
+
+ return 0;
+}
+
+static void init_timestamp(struct utmpx *store, usec_t t) {
+ assert(store);
+
+ if (t <= 0)
+ t = now(CLOCK_REALTIME);
+
+ store->ut_tv.tv_sec = t / USEC_PER_SEC;
+ store->ut_tv.tv_usec = t % USEC_PER_SEC;
+}
+
+static void init_entry(struct utmpx *store, usec_t t) {
+ struct utsname uts = {};
+
+ assert(store);
+
+ init_timestamp(store, t);
+
+ if (uname(&uts) >= 0)
+ strncpy(store->ut_host, uts.release, sizeof(store->ut_host));
+
+ strncpy(store->ut_line, "~", sizeof(store->ut_line)); /* or ~~ ? */
+ strncpy(store->ut_id, "~~", sizeof(store->ut_id));
+}
+
+static int write_entry_utmp(const struct utmpx *store) {
+ _cleanup_(utxent_cleanup) bool utmpx = false;
+
+ assert(store);
+
+ /* utmp is similar to wtmp, but there is only one entry for
+ * each entry type resp. user; i.e. basically a key/value
+ * table. */
+
+ if (utmpxname(_PATH_UTMPX) < 0)
+ return -errno;
+
+ utmpx = utxent_start();
+
+ if (pututxline(store))
+ return 0;
+ if (errno == ENOENT) {
+ /* If utmp/wtmp have been disabled, that's a good thing, hence ignore the error. */
+ log_debug_errno(errno, "Not writing utmp: %m");
+ return 0;
+ }
+ return -errno;
+}
+
+static int write_entry_wtmp(const struct utmpx *store) {
+ assert(store);
+
+ /* wtmp is a simple append-only file where each entry is
+ * simply appended to the end; i.e. basically a log. */
+
+ errno = 0;
+ updwtmpx(_PATH_WTMPX, store);
+ if (errno == ENOENT) {
+ /* If utmp/wtmp have been disabled, that's a good thing, hence ignore the error. */
+ log_debug_errno(errno, "Not writing wtmp: %m");
+ return 0;
+ }
+ if (errno == EROFS) {
+ log_warning_errno(errno, "Failed to write wtmp record, ignoring: %m");
+ return 0;
+ }
+ return -errno;
+}
+
+static int write_utmp_wtmp(const struct utmpx *store_utmp, const struct utmpx *store_wtmp) {
+ int r, s;
+
+ r = write_entry_utmp(store_utmp);
+ s = write_entry_wtmp(store_wtmp);
+ return r < 0 ? r : s;
+}
+
+static int write_entry_both(const struct utmpx *store) {
+ return write_utmp_wtmp(store, store);
+}
+
+int utmp_put_shutdown(void) {
+ struct utmpx store = {};
+
+ init_entry(&store, 0);
+
+ store.ut_type = RUN_LVL;
+ strncpy(store.ut_user, "shutdown", sizeof(store.ut_user));
+
+ return write_entry_both(&store);
+}
+
+int utmp_put_reboot(usec_t t) {
+ struct utmpx store = {};
+
+ init_entry(&store, t);
+
+ store.ut_type = BOOT_TIME;
+ strncpy(store.ut_user, "reboot", sizeof(store.ut_user));
+
+ return write_entry_both(&store);
+}
+
+static void copy_suffix(char *buf, size_t buf_size, const char *src) {
+ size_t l;
+
+ l = strlen(src);
+ if (l < buf_size)
+ strncpy(buf, src, buf_size);
+ else
+ memcpy(buf, src + l - buf_size, buf_size);
+}
+
+int utmp_put_init_process(const char *id, pid_t pid, pid_t sid, const char *line, int ut_type, const char *user) {
+ struct utmpx store = {
+ .ut_type = INIT_PROCESS,
+ .ut_pid = pid,
+ .ut_session = sid,
+ };
+ int r;
+
+ assert(id);
+
+ init_timestamp(&store, 0);
+
+ /* Copy the whole string if it fits, or just the suffix without the terminating NUL. */
+ copy_suffix(store.ut_id, sizeof(store.ut_id), id);
+
+ if (line)
+ strncpy_exact(store.ut_line, line, sizeof(store.ut_line));
+
+ r = write_entry_both(&store);
+ if (r < 0)
+ return r;
+
+ if (IN_SET(ut_type, LOGIN_PROCESS, USER_PROCESS)) {
+ store.ut_type = LOGIN_PROCESS;
+ r = write_entry_both(&store);
+ if (r < 0)
+ return r;
+ }
+
+ if (ut_type == USER_PROCESS) {
+ store.ut_type = USER_PROCESS;
+ strncpy(store.ut_user, user, sizeof(store.ut_user)-1);
+ r = write_entry_both(&store);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int utmp_put_dead_process(const char *id, pid_t pid, int code, int status) {
+ struct utmpx lookup = {
+ .ut_type = INIT_PROCESS /* looks for DEAD_PROCESS, LOGIN_PROCESS, USER_PROCESS, too */
+ }, store, store_wtmp, *found;
+
+ assert(id);
+
+ setutxent();
+
+ /* Copy the whole string if it fits, or just the suffix without the terminating NUL. */
+ copy_suffix(store.ut_id, sizeof(store.ut_id), id);
+
+ found = getutxid(&lookup);
+ if (!found)
+ return 0;
+
+ if (found->ut_pid != pid)
+ return 0;
+
+ memcpy(&store, found, sizeof(store));
+ store.ut_type = DEAD_PROCESS;
+ store.ut_exit.e_termination = code;
+ store.ut_exit.e_exit = status;
+
+ zero(store.ut_user);
+ zero(store.ut_host);
+ zero(store.ut_tv);
+
+ memcpy(&store_wtmp, &store, sizeof(store_wtmp));
+ /* wtmp wants the current time */
+ init_timestamp(&store_wtmp, 0);
+
+ return write_utmp_wtmp(&store, &store_wtmp);
+}
+
+int utmp_put_runlevel(int runlevel, int previous) {
+ struct utmpx store = {};
+ int r;
+
+ assert(runlevel > 0);
+
+ if (previous <= 0) {
+ /* Find the old runlevel automatically */
+
+ r = utmp_get_runlevel(&previous, NULL);
+ if (r < 0) {
+ if (r != -ESRCH)
+ return r;
+
+ previous = 0;
+ }
+ }
+
+ if (previous == runlevel)
+ return 0;
+
+ init_entry(&store, 0);
+
+ store.ut_type = RUN_LVL;
+ store.ut_pid = (runlevel & 0xFF) | ((previous & 0xFF) << 8);
+ strncpy(store.ut_user, "runlevel", sizeof(store.ut_user));
+
+ return write_entry_both(&store);
+}
+
+#define TIMEOUT_USEC (50 * USEC_PER_MSEC)
+
+static int write_to_terminal(const char *tty, const char *message) {
+ _cleanup_close_ int fd = -1;
+ const char *p;
+ size_t left;
+ usec_t end;
+
+ assert(tty);
+ assert(message);
+
+ fd = open(tty, O_WRONLY|O_NONBLOCK|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0 || !isatty(fd))
+ return -errno;
+
+ p = message;
+ left = strlen(message);
+
+ end = now(CLOCK_MONOTONIC) + TIMEOUT_USEC;
+
+ while (left > 0) {
+ ssize_t n;
+ usec_t t;
+ int k;
+
+ t = now(CLOCK_MONOTONIC);
+
+ if (t >= end)
+ return -ETIME;
+
+ k = fd_wait_for_event(fd, POLLOUT, end - t);
+ if (k < 0)
+ return k;
+ if (k == 0)
+ return -ETIME;
+
+ n = write(fd, p, left);
+ if (n < 0) {
+ if (errno == EAGAIN)
+ continue;
+
+ return -errno;
+ }
+
+ assert((size_t) n <= left);
+
+ p += n;
+ left -= n;
+ }
+
+ return 0;
+}
+
+int utmp_wall(
+ const char *message,
+ const char *username,
+ const char *origin_tty,
+ bool (*match_tty)(const char *tty, void *userdata),
+ void *userdata) {
+
+ _cleanup_free_ char *text = NULL, *hn = NULL, *un = NULL, *stdin_tty = NULL;
+ char date[FORMAT_TIMESTAMP_MAX];
+ struct utmpx *u;
+ int r;
+
+ hn = gethostname_malloc();
+ if (!hn)
+ return -ENOMEM;
+ if (!username) {
+ un = getlogname_malloc();
+ if (!un)
+ return -ENOMEM;
+ }
+
+ if (!origin_tty) {
+ getttyname_harder(STDIN_FILENO, &stdin_tty);
+ origin_tty = stdin_tty;
+ }
+
+ if (asprintf(&text,
+ "\a\r\n"
+ "Broadcast message from %s@%s%s%s (%s):\r\n\r\n"
+ "%s\r\n\r\n",
+ un ?: username, hn,
+ origin_tty ? " on " : "", strempty(origin_tty),
+ format_timestamp(date, sizeof(date), now(CLOCK_REALTIME)),
+ message) < 0)
+ return -ENOMEM;
+
+ setutxent();
+
+ r = 0;
+
+ while ((u = getutxent())) {
+ _cleanup_free_ char *buf = NULL;
+ const char *path;
+ int q;
+
+ if (u->ut_type != USER_PROCESS || u->ut_user[0] == 0)
+ continue;
+
+ /* this access is fine, because STRLEN("/dev/") << 32 (UT_LINESIZE) */
+ if (path_startswith(u->ut_line, "/dev/"))
+ path = u->ut_line;
+ else {
+ if (asprintf(&buf, "/dev/%.*s", (int) sizeof(u->ut_line), u->ut_line) < 0)
+ return -ENOMEM;
+
+ path = buf;
+ }
+
+ if (!match_tty || match_tty(path, userdata)) {
+ q = write_to_terminal(path, text);
+ if (q < 0)
+ r = q;
+ }
+ }
+
+ return r;
+}
diff --git a/src/shared/utmp-wtmp.h b/src/shared/utmp-wtmp.h
new file mode 100644
index 0000000..3e71f76
--- /dev/null
+++ b/src/shared/utmp-wtmp.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "time-util.h"
+#include "util.h"
+
+#if ENABLE_UTMP
+#include <utmpx.h>
+
+int utmp_get_runlevel(int *runlevel, int *previous);
+
+int utmp_put_shutdown(void);
+int utmp_put_reboot(usec_t timestamp);
+int utmp_put_runlevel(int runlevel, int previous);
+
+int utmp_put_dead_process(const char *id, pid_t pid, int code, int status);
+int utmp_put_init_process(const char *id, pid_t pid, pid_t sid, const char *line, int ut_type, const char *user);
+
+int utmp_wall(
+ const char *message,
+ const char *username,
+ const char *origin_tty,
+ bool (*match_tty)(const char *tty, void *userdata),
+ void *userdata);
+
+static inline bool utxent_start(void) {
+ setutxent();
+ return true;
+}
+static inline void utxent_cleanup(bool *initialized) {
+ if (initialized)
+ endutxent();
+}
+
+#else /* ENABLE_UTMP */
+
+static inline int utmp_get_runlevel(int *runlevel, int *previous) {
+ return -ESRCH;
+}
+static inline int utmp_put_shutdown(void) {
+ return 0;
+}
+static inline int utmp_put_reboot(usec_t timestamp) {
+ return 0;
+}
+static inline int utmp_put_runlevel(int runlevel, int previous) {
+ return 0;
+}
+static inline int utmp_put_dead_process(const char *id, pid_t pid, int code, int status) {
+ return 0;
+}
+static inline int utmp_put_init_process(const char *id, pid_t pid, pid_t sid, const char *line, int ut_type, const char *user) {
+ return 0;
+}
+static inline int utmp_wall(
+ const char *message,
+ const char *username,
+ const char *origin_tty,
+ bool (*match_tty)(const char *tty, void *userdata),
+ void *userdata) {
+ return 0;
+}
+
+#endif /* ENABLE_UTMP */
diff --git a/src/shared/varlink.c b/src/shared/varlink.c
new file mode 100644
index 0000000..e7be33c
--- /dev/null
+++ b/src/shared/varlink.c
@@ -0,0 +1,2502 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/poll.h>
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "hashmap.h"
+#include "io-util.h"
+#include "list.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "set.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+#include "varlink.h"
+
+#define VARLINK_DEFAULT_CONNECTIONS_MAX 4096U
+#define VARLINK_DEFAULT_CONNECTIONS_PER_UID_MAX 1024U
+
+#define VARLINK_DEFAULT_TIMEOUT_USEC (45U*USEC_PER_SEC)
+#define VARLINK_BUFFER_MAX (16U*1024U*1024U)
+#define VARLINK_READ_SIZE (64U*1024U)
+
+typedef enum VarlinkState {
+ /* Client side states */
+ VARLINK_IDLE_CLIENT,
+ VARLINK_AWAITING_REPLY,
+ VARLINK_AWAITING_REPLY_MORE,
+ VARLINK_CALLING,
+ VARLINK_CALLED,
+ VARLINK_PROCESSING_REPLY,
+
+ /* Server side states */
+ VARLINK_IDLE_SERVER,
+ VARLINK_PROCESSING_METHOD,
+ VARLINK_PROCESSING_METHOD_MORE,
+ VARLINK_PROCESSING_METHOD_ONEWAY,
+ VARLINK_PROCESSED_METHOD,
+ VARLINK_PENDING_METHOD,
+ VARLINK_PENDING_METHOD_MORE,
+
+ /* Common states (only during shutdown) */
+ VARLINK_PENDING_DISCONNECT,
+ VARLINK_PENDING_TIMEOUT,
+ VARLINK_PROCESSING_DISCONNECT,
+ VARLINK_PROCESSING_TIMEOUT,
+ VARLINK_PROCESSING_FAILURE,
+ VARLINK_DISCONNECTED,
+
+ _VARLINK_STATE_MAX,
+ _VARLINK_STATE_INVALID = -1
+} VarlinkState;
+
+/* Tests whether we are not yet disconnected. Note that this is true during all states where the connection
+ * is still good for something, and false only when it's dead for good. This means: when we are
+ * asynchronously connecting to a peer and the connect() is still pending, then this will return 'true', as
+ * the connection is still good, and we are likely to be able to properly operate on it soon. */
+#define VARLINK_STATE_IS_ALIVE(state) \
+ IN_SET(state, \
+ VARLINK_IDLE_CLIENT, \
+ VARLINK_AWAITING_REPLY, \
+ VARLINK_AWAITING_REPLY_MORE, \
+ VARLINK_CALLING, \
+ VARLINK_CALLED, \
+ VARLINK_PROCESSING_REPLY, \
+ VARLINK_IDLE_SERVER, \
+ VARLINK_PROCESSING_METHOD, \
+ VARLINK_PROCESSING_METHOD_MORE, \
+ VARLINK_PROCESSING_METHOD_ONEWAY, \
+ VARLINK_PROCESSED_METHOD, \
+ VARLINK_PENDING_METHOD, \
+ VARLINK_PENDING_METHOD_MORE)
+
+struct Varlink {
+ unsigned n_ref;
+
+ VarlinkServer *server;
+
+ VarlinkState state;
+ bool connecting; /* This boolean indicates whether the socket fd we are operating on is currently
+ * processing an asynchronous connect(). In that state we watch the socket for
+ * EPOLLOUT, but we refrain from calling read() or write() on the socket as that
+ * will trigger ENOTCONN. Note that this boolean is kept separate from the
+ * VarlinkState above on purpose: while the connect() is still not complete we
+ * already want to allow queuing of messages and similar. Thus it's nice to keep
+ * these two state concepts separate: the VarlinkState encodes what our own view of
+ * the connection is, i.e. whether we think it's a server, a client, and has
+ * something queued already, while 'connecting' tells us a detail about the
+ * transport used below, that should have no effect on how we otherwise accept and
+ * process operations from the user.
+ *
+ * Or to say this differently: VARLINK_STATE_IS_ALIVE(state) tells you whether the
+ * connection is good to use, even if it might not be fully connected
+ * yet. connecting=true then informs you that actually we are still connecting, and
+ * the connection is actually not established yet and thus any requests you enqueue
+ * now will still work fine but will be queued only, not sent yet, but that
+ * shouldn't stop you from using the connection, since eventually whatever you queue
+ * *will* be sent.
+ *
+ * Or to say this even differently: 'state' is a high-level ("application layer"
+ * high, if you so will) state, while 'conecting' is a low-level ("transport layer"
+ * low, if you so will) state, and while they are not entirely unrelated and
+ * sometimes propagate effects to each other they are only asynchronously connected
+ * at most. */
+ unsigned n_pending;
+
+ int fd;
+
+ char *input_buffer; /* valid data starts at input_buffer_index, ends at input_buffer_index+input_buffer_size */
+ size_t input_buffer_allocated;
+ size_t input_buffer_index;
+ size_t input_buffer_size;
+ size_t input_buffer_unscanned;
+
+ char *output_buffer; /* valid data starts at output_buffer_index, ends at output_buffer_index+output_buffer_size */
+ size_t output_buffer_allocated;
+ size_t output_buffer_index;
+ size_t output_buffer_size;
+
+ VarlinkReply reply_callback;
+
+ JsonVariant *current;
+ JsonVariant *reply;
+
+ struct ucred ucred;
+ bool ucred_acquired:1;
+
+ bool write_disconnected:1;
+ bool read_disconnected:1;
+ bool prefer_read_write:1;
+ bool got_pollhup:1;
+
+ usec_t timestamp;
+ usec_t timeout;
+
+ void *userdata;
+ char *description;
+
+ sd_event *event;
+ sd_event_source *io_event_source;
+ sd_event_source *time_event_source;
+ sd_event_source *quit_event_source;
+ sd_event_source *defer_event_source;
+};
+
+typedef struct VarlinkServerSocket VarlinkServerSocket;
+
+struct VarlinkServerSocket {
+ VarlinkServer *server;
+
+ int fd;
+ char *address;
+
+ sd_event_source *event_source;
+
+ LIST_FIELDS(VarlinkServerSocket, sockets);
+};
+
+struct VarlinkServer {
+ unsigned n_ref;
+ VarlinkServerFlags flags;
+
+ LIST_HEAD(VarlinkServerSocket, sockets);
+
+ Hashmap *methods;
+ VarlinkConnect connect_callback;
+ VarlinkDisconnect disconnect_callback;
+
+ sd_event *event;
+ int64_t event_priority;
+
+ unsigned n_connections;
+ Hashmap *by_uid;
+
+ void *userdata;
+ char *description;
+
+ unsigned connections_max;
+ unsigned connections_per_uid_max;
+};
+
+static const char* const varlink_state_table[_VARLINK_STATE_MAX] = {
+ [VARLINK_IDLE_CLIENT] = "idle-client",
+ [VARLINK_AWAITING_REPLY] = "awaiting-reply",
+ [VARLINK_AWAITING_REPLY_MORE] = "awaiting-reply-more",
+ [VARLINK_CALLING] = "calling",
+ [VARLINK_CALLED] = "called",
+ [VARLINK_PROCESSING_REPLY] = "processing-reply",
+ [VARLINK_IDLE_SERVER] = "idle-server",
+ [VARLINK_PROCESSING_METHOD] = "processing-method",
+ [VARLINK_PROCESSING_METHOD_MORE] = "processing-method-more",
+ [VARLINK_PROCESSING_METHOD_ONEWAY] = "processing-method-oneway",
+ [VARLINK_PROCESSED_METHOD] = "processed-method",
+ [VARLINK_PENDING_METHOD] = "pending-method",
+ [VARLINK_PENDING_METHOD_MORE] = "pending-method-more",
+ [VARLINK_PENDING_DISCONNECT] = "pending-disconnect",
+ [VARLINK_PENDING_TIMEOUT] = "pending-timeout",
+ [VARLINK_PROCESSING_DISCONNECT] = "processing-disconnect",
+ [VARLINK_PROCESSING_TIMEOUT] = "processing-timeout",
+ [VARLINK_PROCESSING_FAILURE] = "processing-failure",
+ [VARLINK_DISCONNECTED] = "disconnected",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(varlink_state, VarlinkState);
+
+#define varlink_log_errno(v, error, fmt, ...) \
+ log_debug_errno(error, "%s: " fmt, varlink_description(v), ##__VA_ARGS__)
+
+#define varlink_log(v, fmt, ...) \
+ log_debug("%s: " fmt, varlink_description(v), ##__VA_ARGS__)
+
+#define varlink_server_log_errno(s, error, fmt, ...) \
+ log_debug_errno(error, "%s: " fmt, varlink_server_description(s), ##__VA_ARGS__)
+
+#define varlink_server_log(s, fmt, ...) \
+ log_debug("%s: " fmt, varlink_server_description(s), ##__VA_ARGS__)
+
+static inline const char *varlink_description(Varlink *v) {
+ return strna(v ? v->description : NULL);
+}
+
+static inline const char *varlink_server_description(VarlinkServer *s) {
+ return strna(s ? s->description : NULL);
+}
+
+static void varlink_set_state(Varlink *v, VarlinkState state) {
+ assert(v);
+ assert(state >= 0 && state < _VARLINK_STATE_MAX);
+
+ if (v->state < 0)
+ varlink_log(v, "varlink: setting state %s",
+ varlink_state_to_string(state));
+ else
+ varlink_log(v, "varlink: changing state %s → %s",
+ varlink_state_to_string(v->state),
+ varlink_state_to_string(state));
+
+ v->state = state;
+}
+
+static int varlink_new(Varlink **ret) {
+ Varlink *v;
+
+ assert(ret);
+
+ v = new(Varlink, 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (Varlink) {
+ .n_ref = 1,
+ .fd = -1,
+
+ .state = _VARLINK_STATE_INVALID,
+
+ .ucred.uid = UID_INVALID,
+ .ucred.gid = GID_INVALID,
+
+ .timestamp = USEC_INFINITY,
+ .timeout = VARLINK_DEFAULT_TIMEOUT_USEC
+ };
+
+ *ret = v;
+ return 0;
+}
+
+int varlink_connect_address(Varlink **ret, const char *address) {
+ _cleanup_(varlink_unrefp) Varlink *v = NULL;
+ union sockaddr_union sockaddr;
+ socklen_t sockaddr_len;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(address, -EINVAL);
+
+ r = sockaddr_un_set_path(&sockaddr.un, address);
+ if (r < 0)
+ return r;
+ sockaddr_len = r;
+
+ r = varlink_new(&v);
+ if (r < 0)
+ return r;
+
+ v->fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (v->fd < 0)
+ return -errno;
+
+ v->fd = fd_move_above_stdio(v->fd);
+
+ if (connect(v->fd, &sockaddr.sa, sockaddr_len) < 0) {
+ if (!IN_SET(errno, EAGAIN, EINPROGRESS))
+ return -errno;
+
+ v->connecting = true; /* We are asynchronously connecting, i.e. the connect() is being
+ * processed in the background. As long as that's the case the socket
+ * is in a special state: it's there, we can poll it for EPOLLOUT, but
+ * if we attempt to write() to it before we see EPOLLOUT we'll get
+ * ENOTCONN (and not EAGAIN, like we would for a normal connected
+ * socket that isn't writable at the moment). Since ENOTCONN on write()
+ * hence can mean two different things (i.e. connection not complete
+ * yet vs. already disconnected again), we store as a boolean whether
+ * we are still in connect(). */
+ }
+
+ varlink_set_state(v, VARLINK_IDLE_CLIENT);
+
+ *ret = TAKE_PTR(v);
+ return r;
+}
+
+int varlink_connect_fd(Varlink **ret, int fd) {
+ Varlink *v;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(fd >= 0, -EBADF);
+
+ r = fd_nonblock(fd, true);
+ if (r < 0)
+ return r;
+
+ r = varlink_new(&v);
+ if (r < 0)
+ return r;
+
+ v->fd = fd;
+ varlink_set_state(v, VARLINK_IDLE_CLIENT);
+
+ /* Note that if this function is called we assume the passed socket (if it is one) is already
+ * properly connected, i.e. any asynchronous connect() done on it already completed. Because of that
+ * we'll not set the 'connecting' boolean here, i.e. we don't need to avoid write()ing to the socket
+ * until the connection is fully set up. Behaviour here is hence a bit different from
+ * varlink_connect_address() above, as there we do handle asynchronous connections ourselves and
+ * avoid doing write() on it before we saw EPOLLOUT for the first time. */
+
+ *ret = v;
+ return 0;
+}
+
+static void varlink_detach_event_sources(Varlink *v) {
+ assert(v);
+
+ v->io_event_source = sd_event_source_disable_unref(v->io_event_source);
+ v->time_event_source = sd_event_source_disable_unref(v->time_event_source);
+ v->quit_event_source = sd_event_source_disable_unref(v->quit_event_source);
+ v->defer_event_source = sd_event_source_disable_unref(v->defer_event_source);
+}
+
+static void varlink_clear(Varlink *v) {
+ assert(v);
+
+ varlink_detach_event_sources(v);
+
+ v->fd = safe_close(v->fd);
+
+ v->input_buffer = mfree(v->input_buffer);
+ v->output_buffer = mfree(v->output_buffer);
+
+ v->current = json_variant_unref(v->current);
+ v->reply = json_variant_unref(v->reply);
+
+ v->event = sd_event_unref(v->event);
+}
+
+static Varlink* varlink_destroy(Varlink *v) {
+ if (!v)
+ return NULL;
+
+ /* If this is called the server object must already been unreffed here. Why that? because when we
+ * linked up the varlink connection with the server object we took one ref in each direction */
+ assert(!v->server);
+
+ varlink_clear(v);
+
+ free(v->description);
+ return mfree(v);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(Varlink, varlink, varlink_destroy);
+
+static int varlink_test_disconnect(Varlink *v) {
+ assert(v);
+
+ /* Tests whether we the connection has been terminated. We are careful to not stop processing it
+ * prematurely, since we want to handle half-open connections as well as possible and want to flush
+ * out and read data before we close down if we can. */
+
+ /* Already disconnected? */
+ if (!VARLINK_STATE_IS_ALIVE(v->state))
+ return 0;
+
+ /* Wait until connection setup is complete, i.e. until asynchronous connect() completes */
+ if (v->connecting)
+ return 0;
+
+ /* Still something to write and we can write? Stay around */
+ if (v->output_buffer_size > 0 && !v->write_disconnected)
+ return 0;
+
+ /* Both sides gone already? Then there's no need to stick around */
+ if (v->read_disconnected && v->write_disconnected)
+ goto disconnect;
+
+ /* If we are waiting for incoming data but the read side is shut down, disconnect. */
+ if (IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE, VARLINK_CALLING, VARLINK_IDLE_SERVER) && v->read_disconnected)
+ goto disconnect;
+
+ /* Similar, if are a client that hasn't written anything yet but the write side is dead, also
+ * disconnect. We also explicitly check for POLLHUP here since we likely won't notice the write side
+ * being down if we never wrote anything. */
+ if (IN_SET(v->state, VARLINK_IDLE_CLIENT) && (v->write_disconnected || v->got_pollhup))
+ goto disconnect;
+
+ /* The server is still expecting to write more, but its write end is disconnected and it got a POLLHUP
+ * (i.e. from a disconnected client), so disconnect. */
+ if (IN_SET(v->state, VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE) && v->write_disconnected && v->got_pollhup)
+ goto disconnect;
+
+ return 0;
+
+disconnect:
+ varlink_set_state(v, VARLINK_PENDING_DISCONNECT);
+ return 1;
+}
+
+static int varlink_write(Varlink *v) {
+ ssize_t n;
+
+ assert(v);
+
+ if (!VARLINK_STATE_IS_ALIVE(v->state))
+ return 0;
+ if (v->connecting) /* Writing while we are still wait for a non-blocking connect() to complete will
+ * result in ENOTCONN, hence exit early here */
+ return 0;
+ if (v->output_buffer_size == 0)
+ return 0;
+ if (v->write_disconnected)
+ return 0;
+
+ assert(v->fd >= 0);
+
+ /* We generally prefer recv()/send() (mostly because of MSG_NOSIGNAL) but also want to be compatible
+ * with non-socket IO, hence fall back automatically */
+ if (!v->prefer_read_write) {
+ n = send(v->fd, v->output_buffer + v->output_buffer_index, v->output_buffer_size, MSG_DONTWAIT|MSG_NOSIGNAL);
+ if (n < 0 && errno == ENOTSOCK)
+ v->prefer_read_write = true;
+ }
+ if (v->prefer_read_write)
+ n = write(v->fd, v->output_buffer + v->output_buffer_index, v->output_buffer_size);
+ if (n < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ if (ERRNO_IS_DISCONNECT(errno)) {
+ /* If we get informed about a disconnect on write, then let's remember that, but not
+ * act on it just yet. Let's wait for read() to report the issue first. */
+ v->write_disconnected = true;
+ return 1;
+ }
+
+ return -errno;
+ }
+
+ v->output_buffer_size -= n;
+
+ if (v->output_buffer_size == 0)
+ v->output_buffer_index = 0;
+ else
+ v->output_buffer_index += n;
+
+ v->timestamp = now(CLOCK_MONOTONIC);
+ return 1;
+}
+
+static int varlink_read(Varlink *v) {
+ size_t rs;
+ ssize_t n;
+
+ assert(v);
+
+ if (!IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE, VARLINK_CALLING, VARLINK_IDLE_SERVER))
+ return 0;
+ if (v->connecting) /* read() on a socket while we are in connect() will fail with EINVAL, hence exit early here */
+ return 0;
+ if (v->current)
+ return 0;
+ if (v->input_buffer_unscanned > 0)
+ return 0;
+ if (v->read_disconnected)
+ return 0;
+
+ if (v->input_buffer_size >= VARLINK_BUFFER_MAX)
+ return -ENOBUFS;
+
+ assert(v->fd >= 0);
+
+ if (v->input_buffer_allocated <= v->input_buffer_index + v->input_buffer_size) {
+ size_t add;
+
+ add = MIN(VARLINK_BUFFER_MAX - v->input_buffer_size, VARLINK_READ_SIZE);
+
+ if (v->input_buffer_index == 0) {
+
+ if (!GREEDY_REALLOC(v->input_buffer, v->input_buffer_allocated, v->input_buffer_size + add))
+ return -ENOMEM;
+
+ } else {
+ char *b;
+
+ b = new(char, v->input_buffer_size + add);
+ if (!b)
+ return -ENOMEM;
+
+ memcpy(b, v->input_buffer + v->input_buffer_index, v->input_buffer_size);
+
+ free_and_replace(v->input_buffer, b);
+
+ v->input_buffer_allocated = v->input_buffer_size + add;
+ v->input_buffer_index = 0;
+ }
+ }
+
+ rs = v->input_buffer_allocated - (v->input_buffer_index + v->input_buffer_size);
+
+ if (!v->prefer_read_write) {
+ n = recv(v->fd, v->input_buffer + v->input_buffer_index + v->input_buffer_size, rs, MSG_DONTWAIT);
+ if (n < 0 && errno == ENOTSOCK)
+ v->prefer_read_write = true;
+ }
+ if (v->prefer_read_write)
+ n = read(v->fd, v->input_buffer + v->input_buffer_index + v->input_buffer_size, rs);
+ if (n < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ if (ERRNO_IS_DISCONNECT(errno)) {
+ v->read_disconnected = true;
+ return 1;
+ }
+
+ return -errno;
+ }
+ if (n == 0) { /* EOF */
+ v->read_disconnected = true;
+ return 1;
+ }
+
+ v->input_buffer_size += n;
+ v->input_buffer_unscanned += n;
+
+ return 1;
+}
+
+static int varlink_parse_message(Varlink *v) {
+ const char *e, *begin;
+ size_t sz;
+ int r;
+
+ assert(v);
+
+ if (v->current)
+ return 0;
+ if (v->input_buffer_unscanned <= 0)
+ return 0;
+
+ assert(v->input_buffer_unscanned <= v->input_buffer_size);
+ assert(v->input_buffer_index + v->input_buffer_size <= v->input_buffer_allocated);
+
+ begin = v->input_buffer + v->input_buffer_index;
+
+ e = memchr(begin + v->input_buffer_size - v->input_buffer_unscanned, 0, v->input_buffer_unscanned);
+ if (!e) {
+ v->input_buffer_unscanned = 0;
+ return 0;
+ }
+
+ sz = e - begin + 1;
+
+ varlink_log(v, "New incoming message: %s", begin); /* FIXME: should we output the whole message here before validation?
+ * This may produce a non-printable journal entry if the message
+ * is invalid. We may also expose privileged information. */
+
+ r = json_parse(begin, 0, &v->current, NULL, NULL);
+ if (r < 0) {
+ /* If we encounter a parse failure flush all data. We cannot possibly recover from this,
+ * hence drop all buffered data now. */
+ v->input_buffer_index = v->input_buffer_size = v->input_buffer_unscanned = 0;
+ return varlink_log_errno(v, r, "Failed to parse JSON: %m");
+ }
+
+ v->input_buffer_size -= sz;
+
+ if (v->input_buffer_size == 0)
+ v->input_buffer_index = 0;
+ else
+ v->input_buffer_index += sz;
+
+ v->input_buffer_unscanned = v->input_buffer_size;
+ return 1;
+}
+
+static int varlink_test_timeout(Varlink *v) {
+ assert(v);
+
+ if (!IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE, VARLINK_CALLING))
+ return 0;
+ if (v->timeout == USEC_INFINITY)
+ return 0;
+
+ if (now(CLOCK_MONOTONIC) < usec_add(v->timestamp, v->timeout))
+ return 0;
+
+ varlink_set_state(v, VARLINK_PENDING_TIMEOUT);
+
+ return 1;
+}
+
+static int varlink_dispatch_local_error(Varlink *v, const char *error) {
+ int r;
+
+ assert(v);
+ assert(error);
+
+ if (!v->reply_callback)
+ return 0;
+
+ r = v->reply_callback(v, NULL, error, VARLINK_REPLY_ERROR|VARLINK_REPLY_LOCAL, v->userdata);
+ if (r < 0)
+ log_debug_errno(r, "Reply callback returned error, ignoring: %m");
+
+ return 1;
+}
+
+static int varlink_dispatch_timeout(Varlink *v) {
+ assert(v);
+
+ if (v->state != VARLINK_PENDING_TIMEOUT)
+ return 0;
+
+ varlink_set_state(v, VARLINK_PROCESSING_TIMEOUT);
+ varlink_dispatch_local_error(v, VARLINK_ERROR_TIMEOUT);
+ varlink_close(v);
+
+ return 1;
+}
+
+static int varlink_dispatch_disconnect(Varlink *v) {
+ assert(v);
+
+ if (v->state != VARLINK_PENDING_DISCONNECT)
+ return 0;
+
+ varlink_set_state(v, VARLINK_PROCESSING_DISCONNECT);
+ varlink_dispatch_local_error(v, VARLINK_ERROR_DISCONNECTED);
+ varlink_close(v);
+
+ return 1;
+}
+
+static int varlink_sanitize_parameters(JsonVariant **v) {
+ assert(v);
+
+ /* Varlink always wants a parameters list, hence make one if the caller doesn't want any */
+ if (!*v)
+ return json_variant_new_object(v, NULL, 0);
+ else if (!json_variant_is_object(*v))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int varlink_dispatch_reply(Varlink *v) {
+ _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL;
+ VarlinkReplyFlags flags = 0;
+ const char *error = NULL;
+ JsonVariant *e;
+ const char *k;
+ int r;
+
+ assert(v);
+
+ if (!IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE, VARLINK_CALLING))
+ return 0;
+ if (!v->current)
+ return 0;
+
+ assert(v->n_pending > 0);
+
+ if (!json_variant_is_object(v->current))
+ goto invalid;
+
+ JSON_VARIANT_OBJECT_FOREACH(k, e, v->current) {
+
+ if (streq(k, "error")) {
+ if (error)
+ goto invalid;
+ if (!json_variant_is_string(e))
+ goto invalid;
+
+ error = json_variant_string(e);
+ flags |= VARLINK_REPLY_ERROR;
+
+ } else if (streq(k, "parameters")) {
+ if (parameters)
+ goto invalid;
+ if (!json_variant_is_object(e))
+ goto invalid;
+
+ parameters = json_variant_ref(e);
+
+ } else if (streq(k, "continues")) {
+ if (FLAGS_SET(flags, VARLINK_REPLY_CONTINUES))
+ goto invalid;
+
+ if (!json_variant_is_boolean(e))
+ goto invalid;
+
+ if (json_variant_boolean(e))
+ flags |= VARLINK_REPLY_CONTINUES;
+ } else
+ goto invalid;
+ }
+
+ /* Replies with 'continue' set are only OK if we set 'more' when the method call was initiated */
+ if (v->state != VARLINK_AWAITING_REPLY_MORE && FLAGS_SET(flags, VARLINK_REPLY_CONTINUES))
+ goto invalid;
+
+ /* An error is final */
+ if (error && FLAGS_SET(flags, VARLINK_REPLY_CONTINUES))
+ goto invalid;
+
+ r = varlink_sanitize_parameters(&parameters);
+ if (r < 0)
+ goto invalid;
+
+ if (IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE)) {
+ varlink_set_state(v, VARLINK_PROCESSING_REPLY);
+
+ if (v->reply_callback) {
+ r = v->reply_callback(v, parameters, error, flags, v->userdata);
+ if (r < 0)
+ log_debug_errno(r, "Reply callback returned error, ignoring: %m");
+ }
+
+ v->current = json_variant_unref(v->current);
+
+ if (v->state == VARLINK_PROCESSING_REPLY) {
+
+ assert(v->n_pending > 0);
+
+ if (!FLAGS_SET(flags, VARLINK_REPLY_CONTINUES))
+ v->n_pending--;
+
+ varlink_set_state(v,
+ FLAGS_SET(flags, VARLINK_REPLY_CONTINUES) ? VARLINK_AWAITING_REPLY_MORE :
+ v->n_pending == 0 ? VARLINK_IDLE_CLIENT : VARLINK_AWAITING_REPLY);
+ }
+ } else {
+ assert(v->state == VARLINK_CALLING);
+ varlink_set_state(v, VARLINK_CALLED);
+ }
+
+ return 1;
+
+invalid:
+ varlink_set_state(v, VARLINK_PROCESSING_FAILURE);
+ varlink_dispatch_local_error(v, VARLINK_ERROR_PROTOCOL);
+ varlink_close(v);
+
+ return 1;
+}
+
+static int varlink_dispatch_method(Varlink *v) {
+ _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL;
+ VarlinkMethodFlags flags = 0;
+ const char *method = NULL, *error;
+ JsonVariant *e;
+ VarlinkMethod callback;
+ const char *k;
+ int r;
+
+ assert(v);
+
+ if (v->state != VARLINK_IDLE_SERVER)
+ return 0;
+ if (!v->current)
+ return 0;
+
+ if (!json_variant_is_object(v->current))
+ goto invalid;
+
+ JSON_VARIANT_OBJECT_FOREACH(k, e, v->current) {
+
+ if (streq(k, "method")) {
+ if (method)
+ goto invalid;
+ if (!json_variant_is_string(e))
+ goto invalid;
+
+ method = json_variant_string(e);
+
+ } else if (streq(k, "parameters")) {
+ if (parameters)
+ goto invalid;
+ if (!json_variant_is_object(e))
+ goto invalid;
+
+ parameters = json_variant_ref(e);
+
+ } else if (streq(k, "oneway")) {
+
+ if ((flags & (VARLINK_METHOD_ONEWAY|VARLINK_METHOD_MORE)) != 0)
+ goto invalid;
+
+ if (!json_variant_is_boolean(e))
+ goto invalid;
+
+ if (json_variant_boolean(e))
+ flags |= VARLINK_METHOD_ONEWAY;
+
+ } else if (streq(k, "more")) {
+
+ if ((flags & (VARLINK_METHOD_ONEWAY|VARLINK_METHOD_MORE)) != 0)
+ goto invalid;
+
+ if (!json_variant_is_boolean(e))
+ goto invalid;
+
+ if (json_variant_boolean(e))
+ flags |= VARLINK_METHOD_MORE;
+
+ } else
+ goto invalid;
+ }
+
+ if (!method)
+ goto invalid;
+
+ r = varlink_sanitize_parameters(&parameters);
+ if (r < 0)
+ goto fail;
+
+ varlink_set_state(v, (flags & VARLINK_METHOD_MORE) ? VARLINK_PROCESSING_METHOD_MORE :
+ (flags & VARLINK_METHOD_ONEWAY) ? VARLINK_PROCESSING_METHOD_ONEWAY :
+ VARLINK_PROCESSING_METHOD);
+
+ assert(v->server);
+
+ if (STR_IN_SET(method, "org.varlink.service.GetInfo", "org.varlink.service.GetInterface")) {
+ /* For now, we don't implement a single of varlink's own methods */
+ callback = NULL;
+ error = VARLINK_ERROR_METHOD_NOT_IMPLEMENTED;
+ } else if (startswith(method, "org.varlink.service.")) {
+ callback = NULL;
+ error = VARLINK_ERROR_METHOD_NOT_FOUND;
+ } else {
+ callback = hashmap_get(v->server->methods, method);
+ error = VARLINK_ERROR_METHOD_NOT_FOUND;
+ }
+
+ if (callback) {
+ r = callback(v, parameters, flags, v->userdata);
+ if (r < 0) {
+ log_debug_errno(r, "Callback for %s returned error: %m", method);
+
+ /* We got an error back from the callback. Propagate it to the client if the method call remains unanswered. */
+ if (!FLAGS_SET(flags, VARLINK_METHOD_ONEWAY)) {
+ r = varlink_error_errno(v, r);
+ if (r < 0)
+ return r;
+ }
+ }
+ } else if (!FLAGS_SET(flags, VARLINK_METHOD_ONEWAY)) {
+ assert(error);
+
+ r = varlink_errorb(v, error, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("method", JSON_BUILD_STRING(method))));
+ if (r < 0)
+ return r;
+ }
+
+ switch (v->state) {
+
+ case VARLINK_PROCESSED_METHOD: /* Method call is fully processed */
+ case VARLINK_PROCESSING_METHOD_ONEWAY: /* ditto */
+ v->current = json_variant_unref(v->current);
+ varlink_set_state(v, VARLINK_IDLE_SERVER);
+ break;
+
+ case VARLINK_PROCESSING_METHOD: /* Method call wasn't replied to, will be replied to later */
+ varlink_set_state(v, VARLINK_PENDING_METHOD);
+ break;
+
+ case VARLINK_PROCESSING_METHOD_MORE: /* No reply for a "more" message was sent, more to come */
+ varlink_set_state(v, VARLINK_PENDING_METHOD_MORE);
+ break;
+
+ default:
+ assert_not_reached("Unexpected state");
+
+ }
+
+ return r;
+
+invalid:
+ r = -EINVAL;
+
+fail:
+ varlink_set_state(v, VARLINK_PROCESSING_FAILURE);
+ varlink_dispatch_local_error(v, VARLINK_ERROR_PROTOCOL);
+ varlink_close(v);
+
+ return r;
+}
+
+int varlink_process(Varlink *v) {
+ int r;
+
+ assert_return(v, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+
+ varlink_ref(v);
+
+ r = varlink_write(v);
+ if (r != 0)
+ goto finish;
+
+ r = varlink_dispatch_reply(v);
+ if (r != 0)
+ goto finish;
+
+ r = varlink_dispatch_method(v);
+ if (r != 0)
+ goto finish;
+
+ r = varlink_parse_message(v);
+ if (r != 0)
+ goto finish;
+
+ r = varlink_read(v);
+ if (r != 0)
+ goto finish;
+
+ r = varlink_test_disconnect(v);
+ if (r != 0)
+ goto finish;
+
+ r = varlink_dispatch_disconnect(v);
+ if (r != 0)
+ goto finish;
+
+ r = varlink_test_timeout(v);
+ if (r != 0)
+ goto finish;
+
+ r = varlink_dispatch_timeout(v);
+ if (r != 0)
+ goto finish;
+
+finish:
+ if (r >= 0 && v->defer_event_source) {
+ int q;
+
+ /* If we did some processing, make sure we are called again soon */
+ q = sd_event_source_set_enabled(v->defer_event_source, r > 0 ? SD_EVENT_ON : SD_EVENT_OFF);
+ if (q < 0)
+ r = q;
+ }
+
+ if (r < 0) {
+ if (VARLINK_STATE_IS_ALIVE(v->state))
+ /* Initiate disconnection */
+ varlink_set_state(v, VARLINK_PENDING_DISCONNECT);
+ else
+ /* We failed while disconnecting, in that case close right away */
+ varlink_close(v);
+ }
+
+ varlink_unref(v);
+ return r;
+}
+
+static void handle_revents(Varlink *v, int revents) {
+ assert(v);
+
+ if (v->connecting) {
+ /* If we have seen POLLOUT or POLLHUP on a socket we are asynchronously waiting a connect()
+ * to complete on, we know we are ready. We don't read the connection error here though,
+ * we'll get the error on the next read() or write(). */
+ if ((revents & (POLLOUT|POLLHUP)) == 0)
+ return;
+
+ varlink_log(v, "Anynchronous connection completed.");
+ v->connecting = false;
+ } else {
+ /* Note that we don't care much about POLLIN/POLLOUT here, we'll just try reading and writing
+ * what we can. However, we do care about POLLHUP to detect connection termination even if we
+ * momentarily don't want to read nor write anything. */
+
+ if (!FLAGS_SET(revents, POLLHUP))
+ return;
+
+ varlink_log(v, "Got POLLHUP from socket.");
+ v->got_pollhup = true;
+ }
+}
+
+int varlink_wait(Varlink *v, usec_t timeout) {
+ int r, fd, events;
+ usec_t t;
+
+ assert_return(v, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+
+ r = varlink_get_timeout(v, &t);
+ if (r < 0)
+ return r;
+ if (t != USEC_INFINITY) {
+ usec_t n;
+
+ n = now(CLOCK_MONOTONIC);
+ if (t < n)
+ t = 0;
+ else
+ t = usec_sub_unsigned(t, n);
+ }
+
+ if (timeout != USEC_INFINITY &&
+ (t == USEC_INFINITY || timeout < t))
+ t = timeout;
+
+ fd = varlink_get_fd(v);
+ if (fd < 0)
+ return fd;
+
+ events = varlink_get_events(v);
+ if (events < 0)
+ return events;
+
+ r = fd_wait_for_event(fd, events, t);
+ if (r <= 0)
+ return r;
+
+ handle_revents(v, r);
+ return 1;
+}
+
+int varlink_get_fd(Varlink *v) {
+
+ assert_return(v, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+ if (v->fd < 0)
+ return -EBADF;
+
+ return v->fd;
+}
+
+int varlink_get_events(Varlink *v) {
+ int ret = 0;
+
+ assert_return(v, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+
+ if (v->connecting) /* When processing an asynchronous connect(), we only wait for EPOLLOUT, which
+ * tells us that the connection is now complete. Before that we should neither
+ * write() or read() from the fd. */
+ return EPOLLOUT;
+
+ if (!v->read_disconnected &&
+ IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE, VARLINK_CALLING, VARLINK_IDLE_SERVER) &&
+ !v->current &&
+ v->input_buffer_unscanned <= 0)
+ ret |= EPOLLIN;
+
+ if (!v->write_disconnected &&
+ v->output_buffer_size > 0)
+ ret |= EPOLLOUT;
+
+ return ret;
+}
+
+int varlink_get_timeout(Varlink *v, usec_t *ret) {
+ assert_return(v, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+
+ if (IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE, VARLINK_CALLING) &&
+ v->timeout != USEC_INFINITY) {
+ if (ret)
+ *ret = usec_add(v->timestamp, v->timeout);
+ return 1;
+ } else {
+ if (ret)
+ *ret = USEC_INFINITY;
+ return 0;
+ }
+}
+
+int varlink_flush(Varlink *v) {
+ int ret = 0, r;
+
+ assert_return(v, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+
+ for (;;) {
+ if (v->output_buffer_size == 0)
+ break;
+ if (v->write_disconnected)
+ return -ECONNRESET;
+
+ r = varlink_write(v);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ ret = 1;
+ continue;
+ }
+
+ r = fd_wait_for_event(v->fd, POLLOUT, USEC_INFINITY);
+ if (r < 0)
+ return r;
+
+ assert(r != 0);
+
+ handle_revents(v, r);
+ }
+
+ return ret;
+}
+
+static void varlink_detach_server(Varlink *v) {
+ VarlinkServer *saved_server;
+ assert(v);
+
+ if (!v->server)
+ return;
+
+ if (v->server->by_uid &&
+ v->ucred_acquired &&
+ uid_is_valid(v->ucred.uid)) {
+ unsigned c;
+
+ c = PTR_TO_UINT(hashmap_get(v->server->by_uid, UID_TO_PTR(v->ucred.uid)));
+ assert(c > 0);
+
+ if (c == 1)
+ (void) hashmap_remove(v->server->by_uid, UID_TO_PTR(v->ucred.uid));
+ else
+ (void) hashmap_replace(v->server->by_uid, UID_TO_PTR(v->ucred.uid), UINT_TO_PTR(c - 1));
+ }
+
+ assert(v->server->n_connections > 0);
+ v->server->n_connections--;
+
+ /* If this is a connection associated to a server, then let's disconnect the server and the
+ * connection from each other. This drops the dangling reference that connect_callback() set up. But
+ * before we release the references, let's call the disconnection callback if it is defined. */
+
+ saved_server = TAKE_PTR(v->server);
+
+ if (saved_server->disconnect_callback)
+ saved_server->disconnect_callback(saved_server, v, saved_server->userdata);
+
+ varlink_server_unref(saved_server);
+ varlink_unref(v);
+}
+
+int varlink_close(Varlink *v) {
+
+ assert_return(v, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return 0;
+
+ varlink_set_state(v, VARLINK_DISCONNECTED);
+
+ /* Let's take a reference first, since varlink_detach_server() might drop the final (dangling) ref
+ * which would destroy us before we can call varlink_clear() */
+ varlink_ref(v);
+ varlink_detach_server(v);
+ varlink_clear(v);
+ varlink_unref(v);
+
+ return 1;
+}
+
+Varlink* varlink_close_unref(Varlink *v) {
+
+ if (!v)
+ return NULL;
+
+ (void) varlink_close(v);
+ return varlink_unref(v);
+}
+
+Varlink* varlink_flush_close_unref(Varlink *v) {
+
+ if (!v)
+ return NULL;
+
+ (void) varlink_flush(v);
+ (void) varlink_close(v);
+ return varlink_unref(v);
+}
+
+static int varlink_enqueue_json(Varlink *v, JsonVariant *m) {
+ _cleanup_free_ char *text = NULL;
+ int r;
+
+ assert(v);
+ assert(m);
+
+ r = json_variant_format(m, 0, &text);
+ if (r < 0)
+ return r;
+ assert(text[r] == '\0');
+
+ if (v->output_buffer_size + r + 1 > VARLINK_BUFFER_MAX)
+ return -ENOBUFS;
+
+ varlink_log(v, "Sending message: %s", text);
+
+ if (v->output_buffer_size == 0) {
+
+ free_and_replace(v->output_buffer, text);
+
+ v->output_buffer_size = v->output_buffer_allocated = r + 1;
+ v->output_buffer_index = 0;
+
+ } else if (v->output_buffer_index == 0) {
+
+ if (!GREEDY_REALLOC(v->output_buffer, v->output_buffer_allocated, v->output_buffer_size + r + 1))
+ return -ENOMEM;
+
+ memcpy(v->output_buffer + v->output_buffer_size, text, r + 1);
+ v->output_buffer_size += r + 1;
+
+ } else {
+ char *n;
+ const size_t new_size = v->output_buffer_size + r + 1;
+
+ n = new(char, new_size);
+ if (!n)
+ return -ENOMEM;
+
+ memcpy(mempcpy(n, v->output_buffer + v->output_buffer_index, v->output_buffer_size), text, r + 1);
+
+ free_and_replace(v->output_buffer, n);
+ v->output_buffer_allocated = v->output_buffer_size = new_size;
+ v->output_buffer_index = 0;
+ }
+
+ return 0;
+}
+
+int varlink_send(Varlink *v, const char *method, JsonVariant *parameters) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(method, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+
+ /* We allow enqueuing multiple method calls at once! */
+ if (!IN_SET(v->state, VARLINK_IDLE_CLIENT, VARLINK_AWAITING_REPLY))
+ return -EBUSY;
+
+ r = varlink_sanitize_parameters(&parameters);
+ if (r < 0)
+ return r;
+
+ r = json_build(&m, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("method", JSON_BUILD_STRING(method)),
+ JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters)),
+ JSON_BUILD_PAIR("oneway", JSON_BUILD_BOOLEAN(true))));
+ if (r < 0)
+ return r;
+
+ r = varlink_enqueue_json(v, m);
+ if (r < 0)
+ return r;
+
+ /* No state change here, this is one-way only after all */
+ v->timestamp = now(CLOCK_MONOTONIC);
+ return 0;
+}
+
+int varlink_sendb(Varlink *v, const char *method, ...) {
+ _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL;
+ va_list ap;
+ int r;
+
+ assert_return(v, -EINVAL);
+
+ va_start(ap, method);
+ r = json_buildv(&parameters, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ return varlink_send(v, method, parameters);
+}
+
+int varlink_invoke(Varlink *v, const char *method, JsonVariant *parameters) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(method, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+
+ /* We allow enqueuing multiple method calls at once! */
+ if (!IN_SET(v->state, VARLINK_IDLE_CLIENT, VARLINK_AWAITING_REPLY))
+ return -EBUSY;
+
+ r = varlink_sanitize_parameters(&parameters);
+ if (r < 0)
+ return r;
+
+ r = json_build(&m, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("method", JSON_BUILD_STRING(method)),
+ JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters))));
+ if (r < 0)
+ return r;
+
+ r = varlink_enqueue_json(v, m);
+ if (r < 0)
+ return r;
+
+ varlink_set_state(v, VARLINK_AWAITING_REPLY);
+ v->n_pending++;
+ v->timestamp = now(CLOCK_MONOTONIC);
+
+ return 0;
+}
+
+int varlink_invokeb(Varlink *v, const char *method, ...) {
+ _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL;
+ va_list ap;
+ int r;
+
+ assert_return(v, -EINVAL);
+
+ va_start(ap, method);
+ r = json_buildv(&parameters, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ return varlink_invoke(v, method, parameters);
+}
+
+int varlink_observe(Varlink *v, const char *method, JsonVariant *parameters) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(method, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+ /* Note that we don't allow enqueuing multiple method calls when we are in more/continues mode! We
+ * thus insist on an idle client here. */
+ if (v->state != VARLINK_IDLE_CLIENT)
+ return -EBUSY;
+
+ r = varlink_sanitize_parameters(&parameters);
+ if (r < 0)
+ return r;
+
+ r = json_build(&m, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("method", JSON_BUILD_STRING(method)),
+ JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters)),
+ JSON_BUILD_PAIR("more", JSON_BUILD_BOOLEAN(true))));
+ if (r < 0)
+ return r;
+
+ r = varlink_enqueue_json(v, m);
+ if (r < 0)
+ return r;
+
+
+ varlink_set_state(v, VARLINK_AWAITING_REPLY_MORE);
+ v->n_pending++;
+ v->timestamp = now(CLOCK_MONOTONIC);
+
+ return 0;
+}
+
+int varlink_observeb(Varlink *v, const char *method, ...) {
+ _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL;
+ va_list ap;
+ int r;
+
+ assert_return(v, -EINVAL);
+
+ va_start(ap, method);
+ r = json_buildv(&parameters, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ return varlink_observe(v, method, parameters);
+}
+
+int varlink_call(
+ Varlink *v,
+ const char *method,
+ JsonVariant *parameters,
+ JsonVariant **ret_parameters,
+ const char **ret_error_id,
+ VarlinkReplyFlags *ret_flags) {
+
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(method, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+ if (!IN_SET(v->state, VARLINK_IDLE_CLIENT))
+ return -EBUSY;
+
+ assert(v->n_pending == 0); /* n_pending can't be > 0 if we are in VARLINK_IDLE_CLIENT state */
+
+ r = varlink_sanitize_parameters(&parameters);
+ if (r < 0)
+ return r;
+
+ r = json_build(&m, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("method", JSON_BUILD_STRING(method)),
+ JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters))));
+ if (r < 0)
+ return r;
+
+ r = varlink_enqueue_json(v, m);
+ if (r < 0)
+ return r;
+
+ varlink_set_state(v, VARLINK_CALLING);
+ v->n_pending++;
+ v->timestamp = now(CLOCK_MONOTONIC);
+
+ while (v->state == VARLINK_CALLING) {
+
+ r = varlink_process(v);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ r = varlink_wait(v, USEC_INFINITY);
+ if (r < 0)
+ return r;
+ }
+
+ switch (v->state) {
+
+ case VARLINK_CALLED:
+ assert(v->current);
+
+ json_variant_unref(v->reply);
+ v->reply = TAKE_PTR(v->current);
+
+ varlink_set_state(v, VARLINK_IDLE_CLIENT);
+ assert(v->n_pending == 1);
+ v->n_pending--;
+
+ if (ret_parameters)
+ *ret_parameters = json_variant_by_key(v->reply, "parameters");
+ if (ret_error_id)
+ *ret_error_id = json_variant_string(json_variant_by_key(v->reply, "error"));
+ if (ret_flags)
+ *ret_flags = 0;
+
+ return 1;
+
+ case VARLINK_PENDING_DISCONNECT:
+ case VARLINK_DISCONNECTED:
+ return -ECONNRESET;
+
+ case VARLINK_PENDING_TIMEOUT:
+ return -ETIME;
+
+ default:
+ assert_not_reached("Unexpected state after method call.");
+ }
+}
+
+int varlink_callb(
+ Varlink *v,
+ const char *method,
+ JsonVariant **ret_parameters,
+ const char **ret_error_id,
+ VarlinkReplyFlags *ret_flags, ...) {
+
+ _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL;
+ va_list ap;
+ int r;
+
+ assert_return(v, -EINVAL);
+
+ va_start(ap, ret_flags);
+ r = json_buildv(&parameters, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ return varlink_call(v, method, parameters, ret_parameters, ret_error_id, ret_flags);
+}
+
+int varlink_reply(Varlink *v, JsonVariant *parameters) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ assert_return(v, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+ if (!IN_SET(v->state,
+ VARLINK_PROCESSING_METHOD, VARLINK_PROCESSING_METHOD_MORE,
+ VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE))
+ return -EBUSY;
+
+ r = varlink_sanitize_parameters(&parameters);
+ if (r < 0)
+ return r;
+
+ r = json_build(&m, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters))));
+ if (r < 0)
+ return r;
+
+ r = varlink_enqueue_json(v, m);
+ if (r < 0)
+ return r;
+
+ if (IN_SET(v->state, VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE)) {
+ /* We just replied to a method call that was let hanging for a while (i.e. we were outside of
+ * the varlink_dispatch_method() stack frame), which means with this reply we are ready to
+ * process further messages. */
+ v->current = json_variant_unref(v->current);
+ varlink_set_state(v, VARLINK_IDLE_SERVER);
+ } else
+ /* We replied to a method call from within the varlink_dispatch_method() stack frame), which
+ * means we should it handle the rest of the state engine. */
+ varlink_set_state(v, VARLINK_PROCESSED_METHOD);
+
+ return 1;
+}
+
+int varlink_replyb(Varlink *v, ...) {
+ _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL;
+ va_list ap;
+ int r;
+
+ assert_return(v, -EINVAL);
+
+ va_start(ap, v);
+ r = json_buildv(&parameters, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ return varlink_reply(v, parameters);
+}
+
+int varlink_error(Varlink *v, const char *error_id, JsonVariant *parameters) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(error_id, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+ if (!IN_SET(v->state,
+ VARLINK_PROCESSING_METHOD, VARLINK_PROCESSING_METHOD_MORE,
+ VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE))
+ return -EBUSY;
+
+ r = varlink_sanitize_parameters(&parameters);
+ if (r < 0)
+ return r;
+
+ r = json_build(&m, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("error", JSON_BUILD_STRING(error_id)),
+ JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters))));
+ if (r < 0)
+ return r;
+
+ r = varlink_enqueue_json(v, m);
+ if (r < 0)
+ return r;
+
+ if (IN_SET(v->state, VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE)) {
+ v->current = json_variant_unref(v->current);
+ varlink_set_state(v, VARLINK_IDLE_SERVER);
+ } else
+ varlink_set_state(v, VARLINK_PROCESSED_METHOD);
+
+ return 1;
+}
+
+int varlink_errorb(Varlink *v, const char *error_id, ...) {
+ _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL;
+ va_list ap;
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(error_id, -EINVAL);
+
+ va_start(ap, error_id);
+ r = json_buildv(&parameters, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ return varlink_error(v, error_id, parameters);
+}
+
+int varlink_error_invalid_parameter(Varlink *v, JsonVariant *parameters) {
+
+ assert_return(v, -EINVAL);
+ assert_return(parameters, -EINVAL);
+
+ /* We expect to be called in one of two ways: the 'parameters' argument is a string variant in which
+ * case it is the parameter key name that is invalid. Or the 'parameters' argument is an object
+ * variant in which case we'll pull out the first key. The latter mode is useful in functions that
+ * don't expect any arguments. */
+
+ if (json_variant_is_string(parameters))
+ return varlink_error(v, VARLINK_ERROR_INVALID_PARAMETER, parameters);
+
+ if (json_variant_is_object(parameters) &&
+ json_variant_elements(parameters) > 0)
+ return varlink_error(v, VARLINK_ERROR_INVALID_PARAMETER,
+ json_variant_by_index(parameters, 0));
+
+ return -EINVAL;
+}
+
+int varlink_error_errno(Varlink *v, int error) {
+ return varlink_errorb(
+ v,
+ VARLINK_ERROR_SYSTEM,
+ JSON_BUILD_OBJECT(JSON_BUILD_PAIR("errno", JSON_BUILD_INTEGER(abs(error)))));
+}
+
+int varlink_notify(Varlink *v, JsonVariant *parameters) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ assert_return(v, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+ if (!IN_SET(v->state, VARLINK_PROCESSING_METHOD_MORE, VARLINK_PENDING_METHOD_MORE))
+ return -EBUSY;
+
+ r = varlink_sanitize_parameters(&parameters);
+ if (r < 0)
+ return r;
+
+ r = json_build(&m, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters)),
+ JSON_BUILD_PAIR("continues", JSON_BUILD_BOOLEAN(true))));
+ if (r < 0)
+ return r;
+
+ r = varlink_enqueue_json(v, m);
+ if (r < 0)
+ return r;
+
+ /* No state change, as more is coming */
+ return 1;
+}
+
+int varlink_notifyb(Varlink *v, ...) {
+ _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL;
+ va_list ap;
+ int r;
+
+ assert_return(v, -EINVAL);
+
+ va_start(ap, v);
+ r = json_buildv(&parameters, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ return varlink_notify(v, parameters);
+}
+
+int varlink_bind_reply(Varlink *v, VarlinkReply callback) {
+ assert_return(v, -EINVAL);
+
+ if (callback && v->reply_callback && callback != v->reply_callback)
+ return -EBUSY;
+
+ v->reply_callback = callback;
+
+ return 0;
+}
+
+void* varlink_set_userdata(Varlink *v, void *userdata) {
+ void *old;
+
+ assert_return(v, NULL);
+
+ old = v->userdata;
+ v->userdata = userdata;
+
+ return old;
+}
+
+void* varlink_get_userdata(Varlink *v) {
+ assert_return(v, NULL);
+
+ return v->userdata;
+}
+
+static int varlink_acquire_ucred(Varlink *v) {
+ int r;
+
+ assert(v);
+
+ if (v->ucred_acquired)
+ return 0;
+
+ r = getpeercred(v->fd, &v->ucred);
+ if (r < 0)
+ return r;
+
+ v->ucred_acquired = true;
+ return 0;
+}
+
+int varlink_get_peer_uid(Varlink *v, uid_t *ret) {
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = varlink_acquire_ucred(v);
+ if (r < 0)
+ return r;
+
+ if (!uid_is_valid(v->ucred.uid))
+ return -ENODATA;
+
+ *ret = v->ucred.uid;
+ return 0;
+}
+
+int varlink_get_peer_pid(Varlink *v, pid_t *ret) {
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = varlink_acquire_ucred(v);
+ if (r < 0)
+ return r;
+
+ if (!pid_is_valid(v->ucred.pid))
+ return -ENODATA;
+
+ *ret = v->ucred.pid;
+ return 0;
+}
+
+int varlink_set_relative_timeout(Varlink *v, usec_t timeout) {
+ assert_return(v, -EINVAL);
+ assert_return(timeout > 0, -EINVAL);
+
+ v->timeout = timeout;
+ return 0;
+}
+
+VarlinkServer *varlink_get_server(Varlink *v) {
+ assert_return(v, NULL);
+
+ return v->server;
+}
+
+int varlink_set_description(Varlink *v, const char *description) {
+ assert_return(v, -EINVAL);
+
+ return free_and_strdup(&v->description, description);
+}
+
+static int io_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Varlink *v = userdata;
+
+ assert(s);
+ assert(v);
+
+ handle_revents(v, revents);
+ (void) varlink_process(v);
+
+ return 1;
+}
+
+static int time_callback(sd_event_source *s, uint64_t usec, void *userdata) {
+ Varlink *v = userdata;
+
+ assert(s);
+ assert(v);
+
+ (void) varlink_process(v);
+ return 1;
+}
+
+static int defer_callback(sd_event_source *s, void *userdata) {
+ Varlink *v = userdata;
+
+ assert(s);
+ assert(v);
+
+ (void) varlink_process(v);
+ return 1;
+}
+
+static int prepare_callback(sd_event_source *s, void *userdata) {
+ Varlink *v = userdata;
+ int r, e;
+ usec_t until;
+ bool have_timeout;
+
+ assert(s);
+ assert(v);
+
+ e = varlink_get_events(v);
+ if (e < 0)
+ return e;
+
+ r = sd_event_source_set_io_events(v->io_event_source, e);
+ if (r < 0)
+ return r;
+
+ r = varlink_get_timeout(v, &until);
+ if (r < 0)
+ return r;
+ have_timeout = r > 0;
+
+ if (have_timeout) {
+ r = sd_event_source_set_time(v->time_event_source, until);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_source_set_enabled(v->time_event_source, have_timeout ? SD_EVENT_ON : SD_EVENT_OFF);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int quit_callback(sd_event_source *event, void *userdata) {
+ Varlink *v = userdata;
+
+ assert(event);
+ assert(v);
+
+ varlink_flush(v);
+ varlink_close(v);
+
+ return 1;
+}
+
+int varlink_attach_event(Varlink *v, sd_event *e, int64_t priority) {
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(!v->event, -EBUSY);
+
+ if (e)
+ v->event = sd_event_ref(e);
+ else {
+ r = sd_event_default(&v->event);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_add_time(v->event, &v->time_event_source, CLOCK_MONOTONIC, 0, 0, time_callback, v);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(v->time_event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(v->time_event_source, "varlink-time");
+
+ r = sd_event_add_exit(v->event, &v->quit_event_source, quit_callback, v);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(v->quit_event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(v->quit_event_source, "varlink-quit");
+
+ r = sd_event_add_io(v->event, &v->io_event_source, v->fd, 0, io_callback, v);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_prepare(v->io_event_source, prepare_callback);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(v->io_event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(v->io_event_source, "varlink-io");
+
+ r = sd_event_add_defer(v->event, &v->defer_event_source, defer_callback, v);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(v->defer_event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(v->defer_event_source, "varlink-defer");
+
+ return 0;
+
+fail:
+ varlink_detach_event(v);
+ return r;
+}
+
+void varlink_detach_event(Varlink *v) {
+ if (!v)
+ return;
+
+ varlink_detach_event_sources(v);
+
+ v->event = sd_event_unref(v->event);
+}
+
+sd_event *varlink_get_event(Varlink *v) {
+ assert_return(v, NULL);
+
+ return v->event;
+}
+
+int varlink_server_new(VarlinkServer **ret, VarlinkServerFlags flags) {
+ VarlinkServer *s;
+
+ assert_return(ret, -EINVAL);
+ assert_return((flags & ~_VARLINK_SERVER_FLAGS_ALL) == 0, -EINVAL);
+
+ s = new(VarlinkServer, 1);
+ if (!s)
+ return -ENOMEM;
+
+ *s = (VarlinkServer) {
+ .n_ref = 1,
+ .flags = flags,
+ .connections_max = varlink_server_connections_max(NULL),
+ .connections_per_uid_max = varlink_server_connections_per_uid_max(NULL),
+ };
+
+ *ret = s;
+ return 0;
+}
+
+static VarlinkServer* varlink_server_destroy(VarlinkServer *s) {
+ char *m;
+
+ if (!s)
+ return NULL;
+
+ varlink_server_shutdown(s);
+
+ while ((m = hashmap_steal_first_key(s->methods)))
+ free(m);
+
+ hashmap_free(s->methods);
+ hashmap_free(s->by_uid);
+
+ sd_event_unref(s->event);
+
+ free(s->description);
+
+ return mfree(s);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(VarlinkServer, varlink_server, varlink_server_destroy);
+
+static int validate_connection(VarlinkServer *server, const struct ucred *ucred) {
+ int allowed = -1;
+
+ assert(server);
+ assert(ucred);
+
+ if (FLAGS_SET(server->flags, VARLINK_SERVER_ROOT_ONLY))
+ allowed = ucred->uid == 0;
+
+ if (FLAGS_SET(server->flags, VARLINK_SERVER_MYSELF_ONLY))
+ allowed = allowed > 0 || ucred->uid == getuid();
+
+ if (allowed == 0) { /* Allow access when it is explicitly allowed or when neither
+ * VARLINK_SERVER_ROOT_ONLY nor VARLINK_SERVER_MYSELF_ONLY are specified. */
+ varlink_server_log(server, "Unprivileged client attempted connection, refusing.");
+ return 0;
+ }
+
+ if (server->n_connections >= server->connections_max) {
+ varlink_server_log(server, "Connection limit of %u reached, refusing.", server->connections_max);
+ return 0;
+ }
+
+ if (FLAGS_SET(server->flags, VARLINK_SERVER_ACCOUNT_UID)) {
+ unsigned c;
+
+ if (!uid_is_valid(ucred->uid)) {
+ varlink_server_log(server, "Client with invalid UID attempted connection, refusing.");
+ return 0;
+ }
+
+ c = PTR_TO_UINT(hashmap_get(server->by_uid, UID_TO_PTR(ucred->uid)));
+ if (c >= server->connections_per_uid_max) {
+ varlink_server_log(server, "Per-UID connection limit of %u reached, refusing.",
+ server->connections_per_uid_max);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static int count_connection(VarlinkServer *server, struct ucred *ucred) {
+ unsigned c;
+ int r;
+
+ assert(server);
+ assert(ucred);
+
+ server->n_connections++;
+
+ if (FLAGS_SET(server->flags, VARLINK_SERVER_ACCOUNT_UID)) {
+ r = hashmap_ensure_allocated(&server->by_uid, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to allocate UID hash table: %m");
+
+ c = PTR_TO_UINT(hashmap_get(server->by_uid, UID_TO_PTR(ucred->uid)));
+
+ varlink_server_log(server, "Connections of user " UID_FMT ": %u (of %u max)",
+ ucred->uid, c, server->connections_per_uid_max);
+
+ r = hashmap_replace(server->by_uid, UID_TO_PTR(ucred->uid), UINT_TO_PTR(c + 1));
+ if (r < 0)
+ return log_debug_errno(r, "Failed to increment counter in UID hash table: %m");
+ }
+
+ return 0;
+}
+
+int varlink_server_add_connection(VarlinkServer *server, int fd, Varlink **ret) {
+ _cleanup_(varlink_unrefp) Varlink *v = NULL;
+ bool ucred_acquired;
+ struct ucred ucred;
+ int r;
+
+ assert_return(server, -EINVAL);
+ assert_return(fd >= 0, -EBADF);
+
+ if ((server->flags & (VARLINK_SERVER_ROOT_ONLY|VARLINK_SERVER_ACCOUNT_UID)) != 0) {
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return varlink_server_log_errno(server, r, "Failed to acquire peer credentials of incoming socket, refusing: %m");
+
+ ucred_acquired = true;
+
+ r = validate_connection(server, &ucred);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EPERM;
+ } else
+ ucred_acquired = false;
+
+ r = varlink_new(&v);
+ if (r < 0)
+ return varlink_server_log_errno(server, r, "Failed to allocate connection object: %m");
+
+ r = count_connection(server, &ucred);
+ if (r < 0)
+ return r;
+
+ v->fd = fd;
+ v->userdata = server->userdata;
+ if (ucred_acquired) {
+ v->ucred = ucred;
+ v->ucred_acquired = true;
+ }
+
+ (void) asprintf(&v->description, "%s-%i", server->description ?: "varlink", v->fd);
+
+ /* Link up the server and the connection, and take reference in both directions. Note that the
+ * reference on the connection is left dangling. It will be dropped when the connection is closed,
+ * which happens in varlink_close(), including in the event loop quit callback. */
+ v->server = varlink_server_ref(server);
+ varlink_ref(v);
+
+ varlink_set_state(v, VARLINK_IDLE_SERVER);
+
+ if (server->event) {
+ r = varlink_attach_event(v, server->event, server->event_priority);
+ if (r < 0) {
+ varlink_log_errno(v, r, "Failed to attach new connection: %m");
+ v->fd = -1; /* take the fd out of the connection again */
+ varlink_close(v);
+ return r;
+ }
+ }
+
+ if (ret)
+ *ret = v;
+
+ return 0;
+}
+
+static int connect_callback(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ VarlinkServerSocket *ss = userdata;
+ _cleanup_close_ int cfd = -1;
+ Varlink *v = NULL;
+ int r;
+
+ assert(source);
+ assert(ss);
+
+ varlink_server_log(ss->server, "New incoming connection.");
+
+ cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (cfd < 0) {
+ if (ERRNO_IS_ACCEPT_AGAIN(errno))
+ return 0;
+
+ return varlink_server_log_errno(ss->server, errno, "Failed to accept incoming socket: %m");
+ }
+
+ r = varlink_server_add_connection(ss->server, cfd, &v);
+ if (r < 0)
+ return 0;
+
+ TAKE_FD(cfd);
+
+ if (ss->server->connect_callback) {
+ r = ss->server->connect_callback(ss->server, v, ss->server->userdata);
+ if (r < 0) {
+ varlink_log_errno(v, r, "Connection callback returned error, disconnecting client: %m");
+ varlink_close(v);
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+int varlink_server_listen_fd(VarlinkServer *s, int fd) {
+ _cleanup_free_ VarlinkServerSocket *ss = NULL;
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(fd >= 0, -EBADF);
+
+ r = fd_nonblock(fd, true);
+ if (r < 0)
+ return r;
+
+ ss = new(VarlinkServerSocket, 1);
+ if (!ss)
+ return -ENOMEM;
+
+ *ss = (VarlinkServerSocket) {
+ .server = s,
+ .fd = fd,
+ };
+
+ if (s->event) {
+ r = sd_event_add_io(s->event, &ss->event_source, fd, EPOLLIN, connect_callback, ss);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(ss->event_source, s->event_priority);
+ if (r < 0)
+ return r;
+ }
+
+ LIST_PREPEND(sockets, s->sockets, TAKE_PTR(ss));
+ return 0;
+}
+
+int varlink_server_listen_address(VarlinkServer *s, const char *address, mode_t m) {
+ union sockaddr_union sockaddr;
+ socklen_t sockaddr_len;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(address, -EINVAL);
+ assert_return((m & ~0777) == 0, -EINVAL);
+
+ r = sockaddr_un_set_path(&sockaddr.un, address);
+ if (r < 0)
+ return r;
+ sockaddr_len = r;
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return -errno;
+
+ fd = fd_move_above_stdio(fd);
+
+ (void) sockaddr_un_unlink(&sockaddr.un);
+
+ RUN_WITH_UMASK(~m & 0777) {
+ r = mac_selinux_bind(fd, &sockaddr.sa, sockaddr_len);
+ if (r < 0)
+ return r;
+ }
+
+ if (listen(fd, SOMAXCONN) < 0)
+ return -errno;
+
+ r = varlink_server_listen_fd(s, fd);
+ if (r < 0)
+ return r;
+
+ TAKE_FD(fd);
+ return 0;
+}
+
+void* varlink_server_set_userdata(VarlinkServer *s, void *userdata) {
+ void *ret;
+
+ assert_return(s, NULL);
+
+ ret = s->userdata;
+ s->userdata = userdata;
+
+ return ret;
+}
+
+void* varlink_server_get_userdata(VarlinkServer *s) {
+ assert_return(s, NULL);
+
+ return s->userdata;
+}
+
+static VarlinkServerSocket* varlink_server_socket_destroy(VarlinkServerSocket *ss) {
+ if (!ss)
+ return NULL;
+
+ if (ss->server)
+ LIST_REMOVE(sockets, ss->server->sockets, ss);
+
+ sd_event_source_disable_unref(ss->event_source);
+
+ free(ss->address);
+ safe_close(ss->fd);
+
+ return mfree(ss);
+}
+
+int varlink_server_shutdown(VarlinkServer *s) {
+ assert_return(s, -EINVAL);
+
+ while (s->sockets)
+ varlink_server_socket_destroy(s->sockets);
+
+ return 0;
+}
+
+int varlink_server_attach_event(VarlinkServer *s, sd_event *e, int64_t priority) {
+ VarlinkServerSocket *ss;
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(!s->event, -EBUSY);
+
+ if (e)
+ s->event = sd_event_ref(e);
+ else {
+ r = sd_event_default(&s->event);
+ if (r < 0)
+ return r;
+ }
+
+ LIST_FOREACH(sockets, ss, s->sockets) {
+ assert(!ss->event_source);
+
+ r = sd_event_add_io(s->event, &ss->event_source, ss->fd, EPOLLIN, connect_callback, ss);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(ss->event_source, priority);
+ if (r < 0)
+ goto fail;
+ }
+
+ s->event_priority = priority;
+ return 0;
+
+fail:
+ varlink_server_detach_event(s);
+ return r;
+}
+
+int varlink_server_detach_event(VarlinkServer *s) {
+ VarlinkServerSocket *ss;
+
+ assert_return(s, -EINVAL);
+
+ LIST_FOREACH(sockets, ss, s->sockets) {
+
+ if (!ss->event_source)
+ continue;
+
+ (void) sd_event_source_set_enabled(ss->event_source, SD_EVENT_OFF);
+ ss->event_source = sd_event_source_unref(ss->event_source);
+ }
+
+ sd_event_unref(s->event);
+ return 0;
+}
+
+sd_event *varlink_server_get_event(VarlinkServer *s) {
+ assert_return(s, NULL);
+
+ return s->event;
+}
+
+int varlink_server_bind_method(VarlinkServer *s, const char *method, VarlinkMethod callback) {
+ char *m;
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(method, -EINVAL);
+ assert_return(callback, -EINVAL);
+
+ if (startswith(method, "org.varlink.service."))
+ return -EEXIST;
+
+ r = hashmap_ensure_allocated(&s->methods, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ m = strdup(method);
+ if (!m)
+ return -ENOMEM;
+
+ r = hashmap_put(s->methods, m, callback);
+ if (r < 0) {
+ free(m);
+ return r;
+ }
+
+ return 0;
+}
+
+int varlink_server_bind_method_many_internal(VarlinkServer *s, ...) {
+ va_list ap;
+ int r = 0;
+
+ assert_return(s, -EINVAL);
+
+ va_start(ap, s);
+ for (;;) {
+ VarlinkMethod callback;
+ const char *method;
+
+ method = va_arg(ap, const char *);
+ if (!method)
+ break;
+
+ callback = va_arg(ap, VarlinkMethod);
+
+ r = varlink_server_bind_method(s, method, callback);
+ if (r < 0)
+ break;
+ }
+ va_end(ap);
+
+ return r;
+}
+
+int varlink_server_bind_connect(VarlinkServer *s, VarlinkConnect callback) {
+ assert_return(s, -EINVAL);
+
+ if (callback && s->connect_callback && callback != s->connect_callback)
+ return -EBUSY;
+
+ s->connect_callback = callback;
+ return 0;
+}
+
+int varlink_server_bind_disconnect(VarlinkServer *s, VarlinkDisconnect callback) {
+ assert_return(s, -EINVAL);
+
+ if (callback && s->disconnect_callback && callback != s->disconnect_callback)
+ return -EBUSY;
+
+ s->disconnect_callback = callback;
+ return 0;
+}
+
+unsigned varlink_server_connections_max(VarlinkServer *s) {
+ int dts;
+
+ /* If a server is specified, return the setting for that server, otherwise the default value */
+ if (s)
+ return s->connections_max;
+
+ dts = getdtablesize();
+ assert_se(dts > 0);
+
+ /* Make sure we never use up more than ¾th of RLIMIT_NOFILE for IPC */
+ if (VARLINK_DEFAULT_CONNECTIONS_MAX > (unsigned) dts / 4 * 3)
+ return dts / 4 * 3;
+
+ return VARLINK_DEFAULT_CONNECTIONS_MAX;
+}
+
+unsigned varlink_server_connections_per_uid_max(VarlinkServer *s) {
+ unsigned m;
+
+ if (s)
+ return s->connections_per_uid_max;
+
+ /* Make sure to never use up more than ¾th of available connections for a single user */
+ m = varlink_server_connections_max(NULL);
+ if (VARLINK_DEFAULT_CONNECTIONS_PER_UID_MAX > m)
+ return m / 4 * 3;
+
+ return VARLINK_DEFAULT_CONNECTIONS_PER_UID_MAX;
+}
+
+int varlink_server_set_connections_per_uid_max(VarlinkServer *s, unsigned m) {
+ assert_return(s, -EINVAL);
+ assert_return(m > 0, -EINVAL);
+
+ s->connections_per_uid_max = m;
+ return 0;
+}
+
+int varlink_server_set_connections_max(VarlinkServer *s, unsigned m) {
+ assert_return(s, -EINVAL);
+ assert_return(m > 0, -EINVAL);
+
+ s->connections_max = m;
+ return 0;
+}
+
+unsigned varlink_server_current_connections(VarlinkServer *s) {
+ assert_return(s, UINT_MAX);
+
+ return s->n_connections;
+}
+
+int varlink_server_set_description(VarlinkServer *s, const char *description) {
+ assert_return(s, -EINVAL);
+
+ return free_and_strdup(&s->description, description);
+}
diff --git a/src/shared/varlink.h b/src/shared/varlink.h
new file mode 100644
index 0000000..7ea1f91
--- /dev/null
+++ b/src/shared/varlink.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-event.h"
+
+#include "json.h"
+#include "time-util.h"
+
+/* A minimal Varlink implementation. We only implement the minimal, obvious bits here though. No validation,
+ * no introspection, no name service, just the stuff actually needed.
+ *
+ * You might wonder why we aren't using libvarlink here? Varlink is a very simple protocol, which allows us
+ * to write our own implementation relatively easily. However, the main reasons are these:
+ *
+ * • We want to use our own JSON subsystem, with all the benefits that brings (i.e. accurate unsigned+signed
+ * 64bit integers, full fuzzing, logging during parsing and so on). If we'd want to use that with
+ * libvarlink we'd have to serialize and deserialize all the time from its own representation which is
+ * inefficient and nasty.
+ *
+ * • We want integration into sd-event, but also synchronous event-loop-less operation
+ *
+ * • We need proper per-UID accounting and access control, since we want to allow communication between
+ * unprivileged clients and privileged servers.
+ *
+ * • And of course, we don't want the name service and introspection stuff for now (though that might
+ * change).
+ */
+
+typedef struct Varlink Varlink;
+typedef struct VarlinkServer VarlinkServer;
+
+typedef enum VarlinkReplyFlags {
+ VARLINK_REPLY_ERROR = 1 << 0,
+ VARLINK_REPLY_CONTINUES = 1 << 1,
+ VARLINK_REPLY_LOCAL = 1 << 2,
+} VarlinkReplyFlags;
+
+typedef enum VarlinkMethodFlags {
+ VARLINK_METHOD_ONEWAY = 1 << 0,
+ VARLINK_METHOD_MORE = 2 << 1,
+} VarlinkMethodFlags;
+
+typedef enum VarlinkServerFlags {
+ VARLINK_SERVER_ROOT_ONLY = 1 << 0, /* Only accessible by root */
+ VARLINK_SERVER_MYSELF_ONLY = 1 << 1, /* Only accessible by our own UID */
+ VARLINK_SERVER_ACCOUNT_UID = 1 << 2, /* Do per user accounting */
+
+ _VARLINK_SERVER_FLAGS_ALL = (1 << 3) - 1,
+} VarlinkServerFlags;
+
+typedef int (*VarlinkMethod)(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata);
+typedef int (*VarlinkReply)(Varlink *link, JsonVariant *parameters, const char *error_id, VarlinkReplyFlags flags, void *userdata);
+typedef int (*VarlinkConnect)(VarlinkServer *server, Varlink *link, void *userdata);
+typedef void (*VarlinkDisconnect)(VarlinkServer *server, Varlink *link, void *userdata);
+
+int varlink_connect_address(Varlink **ret, const char *address);
+int varlink_connect_fd(Varlink **ret, int fd);
+
+Varlink* varlink_ref(Varlink *link);
+Varlink* varlink_unref(Varlink *v);
+
+int varlink_get_fd(Varlink *v);
+int varlink_get_events(Varlink *v);
+int varlink_get_timeout(Varlink *v, usec_t *ret);
+
+int varlink_attach_event(Varlink *v, sd_event *e, int64_t priority);
+void varlink_detach_event(Varlink *v);
+sd_event *varlink_get_event(Varlink *v);
+
+int varlink_process(Varlink *v);
+int varlink_wait(Varlink *v, usec_t timeout);
+
+int varlink_flush(Varlink *v);
+int varlink_close(Varlink *v);
+
+Varlink* varlink_flush_close_unref(Varlink *v);
+Varlink* varlink_close_unref(Varlink *v);
+
+/* Enqueue method call, not expecting a reply */
+int varlink_send(Varlink *v, const char *method, JsonVariant *parameters);
+int varlink_sendb(Varlink *v, const char *method, ...);
+
+/* Send method call and wait for reply */
+int varlink_call(Varlink *v, const char *method, JsonVariant *parameters, JsonVariant **ret_parameters, const char **ret_error_id, VarlinkReplyFlags *ret_flags);
+int varlink_callb(Varlink *v, const char *method, JsonVariant **ret_parameters, const char **ret_error_id, VarlinkReplyFlags *ret_flags, ...);
+
+/* Enqueue method call, expect a reply, which is eventually delivered to the reply callback */
+int varlink_invoke(Varlink *v, const char *method, JsonVariant *parameters);
+int varlink_invokeb(Varlink *v, const char *method, ...);
+
+/* Enqueue method call, expect a reply now, and possibly more later, which are all delivered to the reply callback */
+int varlink_observe(Varlink *v, const char *method, JsonVariant *parameters);
+int varlink_observeb(Varlink *v, const char *method, ...);
+
+/* Enqueue a final reply */
+int varlink_reply(Varlink *v, JsonVariant *parameters);
+int varlink_replyb(Varlink *v, ...);
+
+/* Enqueue a (final) error */
+int varlink_error(Varlink *v, const char *error_id, JsonVariant *parameters);
+int varlink_errorb(Varlink *v, const char *error_id, ...);
+int varlink_error_invalid_parameter(Varlink *v, JsonVariant *parameters);
+int varlink_error_errno(Varlink *v, int error);
+
+/* Enqueue a "more" reply */
+int varlink_notify(Varlink *v, JsonVariant *parameters);
+int varlink_notifyb(Varlink *v, ...);
+
+/* Bind a disconnect, reply or timeout callback */
+int varlink_bind_reply(Varlink *v, VarlinkReply reply);
+
+void* varlink_set_userdata(Varlink *v, void *userdata);
+void* varlink_get_userdata(Varlink *v);
+
+int varlink_get_peer_uid(Varlink *v, uid_t *ret);
+int varlink_get_peer_pid(Varlink *v, pid_t *ret);
+
+int varlink_set_relative_timeout(Varlink *v, usec_t usec);
+
+VarlinkServer* varlink_get_server(Varlink *v);
+
+int varlink_set_description(Varlink *v, const char *d);
+
+/* Create a varlink server */
+int varlink_server_new(VarlinkServer **ret, VarlinkServerFlags flags);
+VarlinkServer *varlink_server_ref(VarlinkServer *s);
+VarlinkServer *varlink_server_unref(VarlinkServer *s);
+
+/* Add addresses or fds to listen on */
+int varlink_server_listen_address(VarlinkServer *s, const char *address, mode_t mode);
+int varlink_server_listen_fd(VarlinkServer *s, int fd);
+int varlink_server_add_connection(VarlinkServer *s, int fd, Varlink **ret);
+
+/* Bind callbacks */
+int varlink_server_bind_method(VarlinkServer *s, const char *method, VarlinkMethod callback);
+int varlink_server_bind_method_many_internal(VarlinkServer *s, ...);
+#define varlink_server_bind_method_many(s, ...) varlink_server_bind_method_many_internal(s, __VA_ARGS__, NULL)
+int varlink_server_bind_connect(VarlinkServer *s, VarlinkConnect connect);
+int varlink_server_bind_disconnect(VarlinkServer *s, VarlinkDisconnect disconnect);
+
+void* varlink_server_set_userdata(VarlinkServer *s, void *userdata);
+void* varlink_server_get_userdata(VarlinkServer *s);
+
+int varlink_server_attach_event(VarlinkServer *v, sd_event *e, int64_t priority);
+int varlink_server_detach_event(VarlinkServer *v);
+sd_event *varlink_server_get_event(VarlinkServer *v);
+
+int varlink_server_shutdown(VarlinkServer *server);
+
+unsigned varlink_server_connections_max(VarlinkServer *s);
+unsigned varlink_server_connections_per_uid_max(VarlinkServer *s);
+
+int varlink_server_set_connections_per_uid_max(VarlinkServer *s, unsigned m);
+int varlink_server_set_connections_max(VarlinkServer *s, unsigned m);
+
+unsigned varlink_server_current_connections(VarlinkServer *s);
+
+int varlink_server_set_description(VarlinkServer *s, const char *description);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Varlink *, varlink_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Varlink *, varlink_close_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Varlink *, varlink_flush_close_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(VarlinkServer *, varlink_server_unref);
+
+#define VARLINK_ERROR_DISCONNECTED "io.systemd.Disconnected"
+#define VARLINK_ERROR_TIMEOUT "io.systemd.TimedOut"
+#define VARLINK_ERROR_PROTOCOL "io.systemd.Protocol"
+#define VARLINK_ERROR_SYSTEM "io.systemd.System"
+
+#define VARLINK_ERROR_INTERFACE_NOT_FOUND "org.varlink.service.InterfaceNotFound"
+#define VARLINK_ERROR_METHOD_NOT_FOUND "org.varlink.service.MethodNotFound"
+#define VARLINK_ERROR_METHOD_NOT_IMPLEMENTED "org.varlink.service.MethodNotImplemented"
+#define VARLINK_ERROR_INVALID_PARAMETER "org.varlink.service.InvalidParameter"
+#define VARLINK_ERROR_SUBSCRIPTION_TAKEN "org.varlink.service.SubscriptionTaken"
diff --git a/src/shared/verbs.c b/src/shared/verbs.c
new file mode 100644
index 0000000..2d19172
--- /dev/null
+++ b/src/shared/verbs.c
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "env-util.h"
+#include "log.h"
+#include "macro.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "verbs.h"
+#include "virt.h"
+
+/* Wraps running_in_chroot() which is used in various places, but also adds an environment variable check so external
+ * processes can reliably force this on.
+ */
+bool running_in_chroot_or_offline(void) {
+ int r;
+
+ /* Added to support use cases like rpm-ostree, where from %post scripts we only want to execute "preset", but
+ * not "start"/"restart" for example.
+ *
+ * See docs/ENVIRONMENT.md for docs.
+ */
+ r = getenv_bool("SYSTEMD_OFFLINE");
+ if (r < 0 && r != -ENXIO)
+ log_debug_errno(r, "Failed to parse $SYSTEMD_OFFLINE: %m");
+ else if (r >= 0)
+ return r > 0;
+
+ /* We've had this condition check for a long time which basically checks for legacy chroot case like Fedora's
+ * "mock", which is used for package builds. We don't want to try to start systemd services there, since
+ * without --new-chroot we don't even have systemd running, and even if we did, adding a concept of background
+ * daemons to builds would be an enormous change, requiring considering things like how the journal output is
+ * handled, etc. And there's really not a use case today for a build talking to a service.
+ *
+ * Note this call itself also looks for a different variable SYSTEMD_IGNORE_CHROOT=1.
+ */
+ r = running_in_chroot();
+ if (r < 0)
+ log_debug_errno(r, "running_in_chroot(): %m");
+
+ return r > 0;
+}
+
+const Verb* verbs_find_verb(const char *name, const Verb verbs[]) {
+ for (size_t i = 0; verbs[i].dispatch; i++)
+ if (streq_ptr(name, verbs[i].verb) ||
+ (!name && FLAGS_SET(verbs[i].flags, VERB_DEFAULT)))
+ return &verbs[i];
+
+ /* At the end of the list? */
+ return NULL;
+}
+
+int dispatch_verb(int argc, char *argv[], const Verb verbs[], void *userdata) {
+ const Verb *verb;
+ const char *name;
+ int left;
+
+ assert(verbs);
+ assert(verbs[0].dispatch);
+ assert(argc >= 0);
+ assert(argv);
+ assert(argc >= optind);
+
+ left = argc - optind;
+ argv += optind;
+ optind = 0;
+ name = argv[0];
+
+ verb = verbs_find_verb(name, verbs);
+ if (!verb) {
+ if (name)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown command verb %s.", name);
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Command verb required.");
+ }
+
+ if (!name)
+ left = 1;
+
+ if (verb->min_args != VERB_ANY &&
+ (unsigned) left < verb->min_args)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too few arguments.");
+
+ if (verb->max_args != VERB_ANY &&
+ (unsigned) left > verb->max_args)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many arguments.");
+
+ if ((verb->flags & VERB_ONLINE_ONLY) && running_in_chroot_or_offline()) {
+ log_info("Running in chroot, ignoring command '%s'", name ?: verb->verb);
+ return 0;
+ }
+
+ if (name)
+ return verb->dispatch(left, argv, userdata);
+ else {
+ char* fake[2] = {
+ (char*) verb->verb,
+ NULL
+ };
+
+ return verb->dispatch(1, fake, userdata);
+ }
+}
diff --git a/src/shared/verbs.h b/src/shared/verbs.h
new file mode 100644
index 0000000..245bb37
--- /dev/null
+++ b/src/shared/verbs.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#define VERB_ANY ((unsigned) -1)
+
+typedef enum VerbFlags {
+ VERB_DEFAULT = 1 << 0, /* The verb to run if no verb is specified */
+ VERB_ONLINE_ONLY = 1 << 1, /* Just do nothing when running in chroot or offline */
+} VerbFlags;
+
+typedef struct {
+ const char *verb;
+ unsigned min_args, max_args;
+ VerbFlags flags;
+ int (* const dispatch)(int argc, char *argv[], void *userdata);
+} Verb;
+
+bool running_in_chroot_or_offline(void);
+
+const Verb* verbs_find_verb(const char *name, const Verb verbs[]);
+int dispatch_verb(int argc, char *argv[], const Verb verbs[], void *userdata);
diff --git a/src/shared/vlan-util.c b/src/shared/vlan-util.c
new file mode 100644
index 0000000..cb43d50
--- /dev/null
+++ b/src/shared/vlan-util.c
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "conf-parser.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "vlan-util.h"
+
+int parse_vlanid(const char *p, uint16_t *ret) {
+ uint16_t id;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ r = safe_atou16(p, &id);
+ if (r < 0)
+ return r;
+ if (!vlanid_is_valid(id))
+ return -ERANGE;
+
+ *ret = id;
+ return 0;
+}
+
+int parse_vid_range(const char *p, uint16_t *vid, uint16_t *vid_end) {
+ unsigned lower, upper;
+ int r;
+
+ r = parse_range(p, &lower, &upper);
+ if (r < 0)
+ return r;
+
+ if (lower > VLANID_MAX || upper > VLANID_MAX || lower > upper)
+ return -EINVAL;
+
+ *vid = lower;
+ *vid_end = upper;
+ return 0;
+}
+
+int config_parse_default_port_vlanid(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ uint16_t *id = data;
+
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(rvalue, "none")) {
+ *id = 0;
+ return 0;
+ }
+
+ return config_parse_vlanid(unit, filename, line, section, section_line,
+ lvalue, ltype, rvalue, data, userdata);
+}
+
+int config_parse_vlanid(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint16_t *id = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_vlanid(rvalue, id);
+ if (r == -ERANGE) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "VLAN identifier outside of valid range 0…4094, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse VLAN identifier value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
diff --git a/src/shared/vlan-util.h b/src/shared/vlan-util.h
new file mode 100644
index 0000000..0336908
--- /dev/null
+++ b/src/shared/vlan-util.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <inttypes.h>
+
+#include "conf-parser.h"
+
+#define VLANID_MAX 4094
+#define VLANID_INVALID UINT16_MAX
+
+/* Note that we permit VLAN Id 0 here, as that is apparently OK by the Linux kernel */
+static inline bool vlanid_is_valid(uint16_t id) {
+ return id <= VLANID_MAX;
+}
+
+int parse_vlanid(const char *p, uint16_t *ret);
+int parse_vid_range(const char *p, uint16_t *vid, uint16_t *vid_end);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_default_port_vlanid);
+CONFIG_PARSER_PROTOTYPE(config_parse_vlanid);
diff --git a/src/shared/volatile-util.c b/src/shared/volatile-util.c
new file mode 100644
index 0000000..3323897
--- /dev/null
+++ b/src/shared/volatile-util.c
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "volatile-util.h"
+
+int query_volatile_mode(VolatileMode *ret) {
+ _cleanup_free_ char *mode = NULL;
+ int r;
+
+ r = proc_cmdline_get_key("systemd.volatile", PROC_CMDLINE_VALUE_OPTIONAL, &mode);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ *ret = VOLATILE_NO;
+ return 0;
+ }
+
+ if (mode) {
+ VolatileMode m;
+
+ m = volatile_mode_from_string(mode);
+ if (m < 0)
+ return -EINVAL;
+
+ *ret = m;
+ } else
+ *ret = VOLATILE_YES;
+
+ return 1;
+}
+
+static const char* const volatile_mode_table[_VOLATILE_MODE_MAX] = {
+ [VOLATILE_NO] = "no",
+ [VOLATILE_YES] = "yes",
+ [VOLATILE_STATE] = "state",
+ [VOLATILE_OVERLAY] = "overlay",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(volatile_mode, VolatileMode, VOLATILE_YES);
diff --git a/src/shared/volatile-util.h b/src/shared/volatile-util.h
new file mode 100644
index 0000000..9a1bb38
--- /dev/null
+++ b/src/shared/volatile-util.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef enum VolatileMode {
+ VOLATILE_NO,
+ VOLATILE_YES,
+ VOLATILE_STATE,
+ VOLATILE_OVERLAY,
+ _VOLATILE_MODE_MAX,
+ _VOLATILE_MODE_INVALID = -1
+} VolatileMode;
+
+VolatileMode volatile_mode_from_string(const char *s);
+const char* volatile_mode_to_string(VolatileMode m);
+
+int query_volatile_mode(VolatileMode *ret);
diff --git a/src/shared/watchdog.c b/src/shared/watchdog.c
new file mode 100644
index 0000000..d33acaf
--- /dev/null
+++ b/src/shared/watchdog.c
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <syslog.h>
+#include <unistd.h>
+#include <linux/watchdog.h>
+
+#include "errno-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "watchdog.h"
+
+static int watchdog_fd = -1;
+static char *watchdog_device = NULL;
+static usec_t watchdog_timeout = USEC_INFINITY;
+static usec_t watchdog_last_ping = USEC_INFINITY;
+
+static int update_timeout(void) {
+ if (watchdog_fd < 0)
+ return 0;
+ if (watchdog_timeout == USEC_INFINITY)
+ return 0;
+
+ if (watchdog_timeout == 0) {
+ int flags;
+
+ flags = WDIOS_DISABLECARD;
+ if (ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags) < 0)
+ return log_warning_errno(errno, "Failed to disable hardware watchdog: %m");
+ } else {
+ char buf[FORMAT_TIMESPAN_MAX];
+ int sec, flags;
+ usec_t t;
+
+ t = DIV_ROUND_UP(watchdog_timeout, USEC_PER_SEC);
+ sec = (int) t >= INT_MAX ? INT_MAX : t; /* Saturate */
+ if (ioctl(watchdog_fd, WDIOC_SETTIMEOUT, &sec) < 0)
+ return log_warning_errno(errno, "Failed to set timeout to %is: %m", sec);
+
+ watchdog_timeout = (usec_t) sec * USEC_PER_SEC;
+ log_info("Set hardware watchdog to %s.", format_timespan(buf, sizeof(buf), watchdog_timeout, 0));
+
+ flags = WDIOS_ENABLECARD;
+ if (ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags) < 0) {
+ /* ENOTTY means the watchdog is always enabled so we're fine */
+ log_full(ERRNO_IS_NOT_SUPPORTED(errno) ? LOG_DEBUG : LOG_WARNING,
+ "Failed to enable hardware watchdog: %m");
+ if (!ERRNO_IS_NOT_SUPPORTED(errno))
+ return -errno;
+ }
+
+ if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) < 0)
+ return log_warning_errno(errno, "Failed to ping hardware watchdog: %m");
+
+ watchdog_last_ping = now(clock_boottime_or_monotonic());
+ }
+
+ return 0;
+}
+
+static int open_watchdog(void) {
+ struct watchdog_info ident;
+ const char *fn;
+
+ if (watchdog_fd >= 0)
+ return 0;
+
+ fn = watchdog_device ?: "/dev/watchdog";
+ watchdog_fd = open(fn, O_WRONLY|O_CLOEXEC);
+ if (watchdog_fd < 0)
+ return log_debug_errno(errno, "Failed to open watchdog device %s: %m", fn);
+
+ if (ioctl(watchdog_fd, WDIOC_GETSUPPORT, &ident) < 0)
+ log_debug_errno(errno, "Hardware watchdog %s does not support WDIOC_GETSUPPORT ioctl: %m", fn);
+ else
+ log_info("Using hardware watchdog '%s', version %x, device %s",
+ ident.identity,
+ ident.firmware_version,
+ fn);
+
+ return update_timeout();
+}
+
+int watchdog_set_device(char *path) {
+ int r;
+
+ r = free_and_strdup(&watchdog_device, path);
+ if (r < 0)
+ return r;
+
+ if (r > 0) /* watchdog_device changed */
+ watchdog_fd = safe_close(watchdog_fd);
+
+ return r;
+}
+
+int watchdog_set_timeout(usec_t *usec) {
+ int r;
+
+ watchdog_timeout = *usec;
+
+ /* If we didn't open the watchdog yet and didn't get any explicit timeout value set, don't do
+ * anything */
+ if (watchdog_fd < 0 && watchdog_timeout == USEC_INFINITY)
+ return 0;
+
+ if (watchdog_fd < 0)
+ r = open_watchdog();
+ else
+ r = update_timeout();
+
+ *usec = watchdog_timeout;
+ return r;
+}
+
+usec_t watchdog_runtime_wait(void) {
+ usec_t rtwait, ntime;
+
+ if (!timestamp_is_set(watchdog_timeout))
+ return USEC_INFINITY;
+
+ /* Sleep half the watchdog timeout since the last successful ping at most */
+ if (timestamp_is_set(watchdog_last_ping)) {
+ ntime = now(clock_boottime_or_monotonic());
+ assert(ntime >= watchdog_last_ping);
+ rtwait = usec_sub_unsigned(watchdog_last_ping + (watchdog_timeout / 2), ntime);
+ } else
+ rtwait = watchdog_timeout / 2;
+
+ return rtwait;
+}
+
+int watchdog_ping(void) {
+ usec_t ntime;
+ int r;
+
+ ntime = now(clock_boottime_or_monotonic());
+
+ /* Never ping earlier than watchdog_timeout/4 and try to ping
+ * by watchdog_timeout/2 plus scheduling latencies the latest */
+ if (timestamp_is_set(watchdog_last_ping)) {
+ assert(ntime >= watchdog_last_ping);
+ if ((ntime - watchdog_last_ping) < (watchdog_timeout / 4))
+ return 0;
+ }
+
+ if (watchdog_fd < 0) {
+ r = open_watchdog();
+ if (r < 0)
+ return r;
+ }
+
+ if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) < 0)
+ return log_warning_errno(errno, "Failed to ping hardware watchdog: %m");
+
+ watchdog_last_ping = ntime;
+ return 0;
+}
+
+void watchdog_close(bool disarm) {
+ if (watchdog_fd < 0)
+ return;
+
+ if (disarm) {
+ int flags;
+
+ /* Explicitly disarm it */
+ flags = WDIOS_DISABLECARD;
+ if (ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags) < 0)
+ log_warning_errno(errno, "Failed to disable hardware watchdog: %m");
+
+ /* To be sure, use magic close logic, too */
+ for (;;) {
+ static const char v = 'V';
+
+ if (write(watchdog_fd, &v, 1) > 0)
+ break;
+
+ if (errno != EINTR) {
+ log_error_errno(errno, "Failed to disarm watchdog timer: %m");
+ break;
+ }
+ }
+ }
+
+ watchdog_fd = safe_close(watchdog_fd);
+}
diff --git a/src/shared/watchdog.h b/src/shared/watchdog.h
new file mode 100644
index 0000000..b7587db
--- /dev/null
+++ b/src/shared/watchdog.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "time-util.h"
+#include "util.h"
+
+int watchdog_set_device(char *path);
+int watchdog_set_timeout(usec_t *usec);
+int watchdog_ping(void);
+void watchdog_close(bool disarm);
+usec_t watchdog_runtime_wait(void);
+
+static inline void watchdog_free_device(void) {
+ (void) watchdog_set_device(NULL);
+}
diff --git a/src/shared/web-util.c b/src/shared/web-util.c
new file mode 100644
index 0000000..82cd5fb
--- /dev/null
+++ b/src/shared/web-util.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdbool.h>
+
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+#include "web-util.h"
+
+bool http_etag_is_valid(const char *etag) {
+ if (isempty(etag))
+ return false;
+
+ if (!endswith(etag, "\""))
+ return false;
+
+ if (!STARTSWITH_SET(etag, "\"", "W/\""))
+ return false;
+
+ return true;
+}
+
+bool http_url_is_valid(const char *url) {
+ const char *p;
+
+ if (isempty(url))
+ return false;
+
+ p = STARTSWITH_SET(url, "http://", "https://");
+ if (!p)
+ return false;
+
+ if (isempty(p))
+ return false;
+
+ return ascii_is_valid(p);
+}
+
+bool documentation_url_is_valid(const char *url) {
+ const char *p;
+
+ if (isempty(url))
+ return false;
+
+ if (http_url_is_valid(url))
+ return true;
+
+ p = STARTSWITH_SET(url, "file:/", "info:", "man:");
+ if (isempty(p))
+ return false;
+
+ return ascii_is_valid(p);
+}
diff --git a/src/shared/web-util.h b/src/shared/web-util.h
new file mode 100644
index 0000000..ec54669
--- /dev/null
+++ b/src/shared/web-util.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+bool http_url_is_valid(const char *url) _pure_;
+
+bool documentation_url_is_valid(const char *url) _pure_;
+
+bool http_etag_is_valid(const char *etag);
diff --git a/src/shared/wifi-util.c b/src/shared/wifi-util.c
new file mode 100644
index 0000000..2ac8846
--- /dev/null
+++ b/src/shared/wifi-util.c
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "log.h"
+#include "wifi-util.h"
+
+int wifi_get_interface(sd_netlink *genl, int ifindex, enum nl80211_iftype *iftype, char **ssid) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *reply = NULL;
+ sd_genl_family family;
+ int r;
+
+ assert(genl);
+ assert(ifindex > 0);
+
+ r = sd_genl_message_new(genl, SD_GENL_NL80211, NL80211_CMD_GET_INTERFACE, &m);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create generic netlink message: %m");
+
+ r = sd_netlink_message_append_u32(m, NL80211_ATTR_IFINDEX, ifindex);
+ if (r < 0)
+ return log_debug_errno(r, "Could not append NL80211_ATTR_IFINDEX attribute: %m");
+
+ r = sd_netlink_call(genl, m, 0, &reply);
+ if (r == -ENODEV) {
+ /* For obsolete WEXT driver. */
+ log_debug_errno(r, "Failed to request information about wifi interface %d. "
+ "The device doesn't seem to have nl80211 interface. Ignoring.",
+ ifindex);
+ goto nodata;
+ }
+ if (r < 0)
+ return log_debug_errno(r, "Failed to request information about wifi interface %d: %m", ifindex);
+ if (!reply) {
+ log_debug_errno(r, "No reply received to request for information about wifi interface %d, ignoring.", ifindex);
+ goto nodata;
+ }
+
+ r = sd_netlink_message_get_errno(reply);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get information about wifi interface %d: %m", ifindex);
+
+ r = sd_genl_message_get_family(genl, reply, &family);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to determine genl family: %m");
+ if (family != SD_GENL_NL80211) {
+ log_debug("Received message of unexpected genl family %u, ignoring.", family);
+ goto nodata;
+ }
+
+ if (iftype) {
+ uint32_t t;
+
+ r = sd_netlink_message_read_u32(reply, NL80211_ATTR_IFTYPE, &t);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get NL80211_ATTR_IFTYPE attribute: %m");
+ *iftype = t;
+ }
+
+ if (ssid) {
+ r = sd_netlink_message_read_string_strdup(reply, NL80211_ATTR_SSID, ssid);
+ if (r == -ENODATA)
+ *ssid = NULL;
+ else if (r < 0)
+ return log_debug_errno(r, "Failed to get NL80211_ATTR_SSID attribute: %m");
+ }
+
+ return 1;
+
+nodata:
+ if (iftype)
+ *iftype = 0;
+ if (ssid)
+ *ssid = NULL;
+ return 0;
+}
+
+int wifi_get_station(sd_netlink *genl, int ifindex, struct ether_addr *bssid) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *reply = NULL;
+ sd_genl_family family;
+ int r;
+
+ assert(genl);
+ assert(ifindex > 0);
+ assert(bssid);
+
+ r = sd_genl_message_new(genl, SD_GENL_NL80211, NL80211_CMD_GET_STATION, &m);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create generic netlink message: %m");
+
+ r = sd_netlink_message_set_flags(m, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to set dump flag: %m");
+
+ r = sd_netlink_message_append_u32(m, NL80211_ATTR_IFINDEX, ifindex);
+ if (r < 0)
+ return log_debug_errno(r, "Could not append NL80211_ATTR_IFINDEX attribute: %m");
+
+ r = sd_netlink_call(genl, m, 0, &reply);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to request information about wifi station: %m");
+ if (!reply) {
+ log_debug_errno(r, "No reply received to request for information about wifi station, ignoring.");
+ goto nodata;
+ }
+
+ r = sd_netlink_message_get_errno(reply);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get information about wifi station: %m");
+
+ r = sd_genl_message_get_family(genl, reply, &family);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to determine genl family: %m");
+ if (family != SD_GENL_NL80211) {
+ log_debug("Received message of unexpected genl family %u, ignoring.", family);
+ goto nodata;
+ }
+
+ r = sd_netlink_message_read_ether_addr(reply, NL80211_ATTR_MAC, bssid);
+ if (r == -ENODATA)
+ goto nodata;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get NL80211_ATTR_MAC attribute: %m");
+
+ return 1;
+
+nodata:
+ *bssid = (struct ether_addr) {};
+ return 0;
+}
diff --git a/src/shared/wifi-util.h b/src/shared/wifi-util.h
new file mode 100644
index 0000000..0ce4137
--- /dev/null
+++ b/src/shared/wifi-util.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include <linux/nl80211.h>
+#include <net/ethernet.h>
+
+#include "sd-netlink.h"
+
+int wifi_get_interface(sd_netlink *genl, int ifindex, enum nl80211_iftype *iftype, char **ssid);
+int wifi_get_station(sd_netlink *genl, int ifindex, struct ether_addr *bssid);
diff --git a/src/shared/xml.c b/src/shared/xml.c
new file mode 100644
index 0000000..8ff3fea
--- /dev/null
+++ b/src/shared/xml.c
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stddef.h>
+
+#include "macro.h"
+#include "string-util.h"
+#include "xml.h"
+
+enum {
+ STATE_NULL,
+ STATE_TEXT,
+ STATE_TAG,
+ STATE_ATTRIBUTE,
+};
+
+static void inc_lines(unsigned *line, const char *s, size_t n) {
+ const char *p = s;
+
+ if (!line)
+ return;
+
+ for (;;) {
+ const char *f;
+
+ f = memchr(p, '\n', n);
+ if (!f)
+ return;
+
+ n -= (f - p) + 1;
+ p = f + 1;
+ (*line)++;
+ }
+}
+
+/* We don't actually do real XML here. We only read a simplistic
+ * subset, that is a bit less strict that XML and lacks all the more
+ * complex features, like entities, or namespaces. However, we do
+ * support some HTML5-like simplifications */
+
+int xml_tokenize(const char **p, char **name, void **state, unsigned *line) {
+ const char *c, *e, *b;
+ char *ret;
+ int t;
+
+ assert(p);
+ assert(*p);
+ assert(name);
+ assert(state);
+
+ t = PTR_TO_INT(*state);
+ c = *p;
+
+ if (t == STATE_NULL) {
+ if (line)
+ *line = 1;
+ t = STATE_TEXT;
+ }
+
+ for (;;) {
+ if (*c == 0)
+ return XML_END;
+
+ switch (t) {
+
+ case STATE_TEXT: {
+ int x;
+
+ e = strchrnul(c, '<');
+ if (e > c) {
+ /* More text... */
+ ret = strndup(c, e - c);
+ if (!ret)
+ return -ENOMEM;
+
+ inc_lines(line, c, e - c);
+
+ *name = ret;
+ *p = e;
+ *state = INT_TO_PTR(STATE_TEXT);
+
+ return XML_TEXT;
+ }
+
+ assert(*e == '<');
+ b = c + 1;
+
+ if (startswith(b, "!--")) {
+ /* A comment */
+ e = strstr(b + 3, "-->");
+ if (!e)
+ return -EINVAL;
+
+ inc_lines(line, b, e + 3 - b);
+
+ c = e + 3;
+ continue;
+ }
+
+ if (*b == '?') {
+ /* Processing instruction */
+
+ e = strstr(b + 1, "?>");
+ if (!e)
+ return -EINVAL;
+
+ inc_lines(line, b, e + 2 - b);
+
+ c = e + 2;
+ continue;
+ }
+
+ if (*b == '!') {
+ /* DTD */
+
+ e = strchr(b + 1, '>');
+ if (!e)
+ return -EINVAL;
+
+ inc_lines(line, b, e + 1 - b);
+
+ c = e + 1;
+ continue;
+ }
+
+ if (*b == '/') {
+ /* A closing tag */
+ x = XML_TAG_CLOSE;
+ b++;
+ } else
+ x = XML_TAG_OPEN;
+
+ e = strpbrk(b, WHITESPACE "/>");
+ if (!e)
+ return -EINVAL;
+
+ ret = strndup(b, e - b);
+ if (!ret)
+ return -ENOMEM;
+
+ *name = ret;
+ *p = e;
+ *state = INT_TO_PTR(STATE_TAG);
+
+ return x;
+ }
+
+ case STATE_TAG:
+
+ b = c + strspn(c, WHITESPACE);
+ if (*b == 0)
+ return -EINVAL;
+
+ inc_lines(line, c, b - c);
+
+ e = b + strcspn(b, WHITESPACE "=/>");
+ if (e > b) {
+ /* An attribute */
+
+ ret = strndup(b, e - b);
+ if (!ret)
+ return -ENOMEM;
+
+ *name = ret;
+ *p = e;
+ *state = INT_TO_PTR(STATE_ATTRIBUTE);
+
+ return XML_ATTRIBUTE_NAME;
+ }
+
+ if (startswith(b, "/>")) {
+ /* An empty tag */
+
+ *name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */
+ *p = b + 2;
+ *state = INT_TO_PTR(STATE_TEXT);
+
+ return XML_TAG_CLOSE_EMPTY;
+ }
+
+ if (*b != '>')
+ return -EINVAL;
+
+ c = b + 1;
+ t = STATE_TEXT;
+ continue;
+
+ case STATE_ATTRIBUTE:
+
+ if (*c == '=') {
+ c++;
+
+ if (IN_SET(*c, '\'', '"')) {
+ /* Tag with a quoted value */
+
+ e = strchr(c+1, *c);
+ if (!e)
+ return -EINVAL;
+
+ inc_lines(line, c, e - c);
+
+ ret = strndup(c+1, e - c - 1);
+ if (!ret)
+ return -ENOMEM;
+
+ *name = ret;
+ *p = e + 1;
+ *state = INT_TO_PTR(STATE_TAG);
+
+ return XML_ATTRIBUTE_VALUE;
+
+ }
+
+ /* Tag with a value without quotes */
+
+ b = strpbrk(c, WHITESPACE ">");
+ if (!b)
+ b = c;
+
+ ret = strndup(c, b - c);
+ if (!ret)
+ return -ENOMEM;
+
+ *name = ret;
+ *p = b;
+ *state = INT_TO_PTR(STATE_TAG);
+ return XML_ATTRIBUTE_VALUE;
+ }
+
+ t = STATE_TAG;
+ continue;
+ }
+
+ }
+
+ assert_not_reached("Bad state");
+}
diff --git a/src/shared/xml.h b/src/shared/xml.h
new file mode 100644
index 0000000..217b3b0
--- /dev/null
+++ b/src/shared/xml.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+enum {
+ XML_END,
+ XML_TEXT,
+ XML_TAG_OPEN,
+ XML_TAG_CLOSE,
+ XML_TAG_CLOSE_EMPTY,
+ XML_ATTRIBUTE_NAME,
+ XML_ATTRIBUTE_VALUE,
+};
+
+int xml_tokenize(const char **p, char **name, void **state, unsigned *line);
diff --git a/src/shutdown/meson.build b/src/shutdown/meson.build
new file mode 100644
index 0000000..ebf0bed
--- /dev/null
+++ b/src/shutdown/meson.build
@@ -0,0 +1,5 @@
+systemd_shutdown_sources = files('''
+ shutdown.c
+ umount.c
+ umount.h
+'''.split())
diff --git a/src/shutdown/shutdown.c b/src/shutdown/shutdown.c
new file mode 100644
index 0000000..0d07865
--- /dev/null
+++ b/src/shutdown/shutdown.c
@@ -0,0 +1,625 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2010 ProFUSION embedded systems
+***/
+
+#include <errno.h>
+#include <getopt.h>
+#include <linux/reboot.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/reboot.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "async.h"
+#include "binfmt-util.h"
+#include "cgroup-setup.h"
+#include "cgroup-util.h"
+#include "def.h"
+#include "exec-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "killall.h"
+#include "log.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "reboot-util.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "switch-root.h"
+#include "sysctl-util.h"
+#include "terminal-util.h"
+#include "umount.h"
+#include "util.h"
+#include "virt.h"
+#include "watchdog.h"
+
+#define SYNC_PROGRESS_ATTEMPTS 3
+#define SYNC_TIMEOUT_USEC (10*USEC_PER_SEC)
+
+static char* arg_verb;
+static uint8_t arg_exit_code;
+static usec_t arg_timeout = DEFAULT_TIMEOUT_USEC;
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_LOG_LEVEL = 0x100,
+ ARG_LOG_TARGET,
+ ARG_LOG_COLOR,
+ ARG_LOG_LOCATION,
+ ARG_LOG_TIME,
+ ARG_EXIT_CODE,
+ ARG_TIMEOUT,
+ };
+
+ static const struct option options[] = {
+ { "log-level", required_argument, NULL, ARG_LOG_LEVEL },
+ { "log-target", required_argument, NULL, ARG_LOG_TARGET },
+ { "log-color", optional_argument, NULL, ARG_LOG_COLOR },
+ { "log-location", optional_argument, NULL, ARG_LOG_LOCATION },
+ { "log-time", optional_argument, NULL, ARG_LOG_TIME },
+ { "exit-code", required_argument, NULL, ARG_EXIT_CODE },
+ { "timeout", required_argument, NULL, ARG_TIMEOUT },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 1);
+ assert(argv);
+
+ /* "-" prevents getopt from permuting argv[] and moving the verb away
+ * from argv[1]. Our interface to initrd promises it'll be there. */
+ while ((c = getopt_long(argc, argv, "-", options, NULL)) >= 0)
+ switch (c) {
+
+ case ARG_LOG_LEVEL:
+ r = log_set_max_level_from_string(optarg);
+ if (r < 0)
+ log_error_errno(r, "Failed to parse log level %s, ignoring: %m", optarg);
+
+ break;
+
+ case ARG_LOG_TARGET:
+ r = log_set_target_from_string(optarg);
+ if (r < 0)
+ log_error_errno(r, "Failed to parse log target %s, ignoring: %m", optarg);
+
+ break;
+
+ case ARG_LOG_COLOR:
+
+ if (optarg) {
+ r = log_show_color_from_string(optarg);
+ if (r < 0)
+ log_error_errno(r, "Failed to parse log color setting %s, ignoring: %m", optarg);
+ } else
+ log_show_color(true);
+
+ break;
+
+ case ARG_LOG_LOCATION:
+ if (optarg) {
+ r = log_show_location_from_string(optarg);
+ if (r < 0)
+ log_error_errno(r, "Failed to parse log location setting %s, ignoring: %m", optarg);
+ } else
+ log_show_location(true);
+
+ break;
+
+ case ARG_LOG_TIME:
+
+ if (optarg) {
+ r = log_show_time_from_string(optarg);
+ if (r < 0)
+ log_error_errno(r, "Failed to parse log time setting %s, ignoring: %m", optarg);
+ } else
+ log_show_time(true);
+
+ break;
+
+ case ARG_EXIT_CODE:
+ r = safe_atou8(optarg, &arg_exit_code);
+ if (r < 0)
+ log_error_errno(r, "Failed to parse exit code %s, ignoring: %m", optarg);
+
+ break;
+
+ case ARG_TIMEOUT:
+ r = parse_sec(optarg, &arg_timeout);
+ if (r < 0)
+ log_error_errno(r, "Failed to parse shutdown timeout %s, ignoring: %m", optarg);
+
+ break;
+
+ case '\001':
+ if (!arg_verb)
+ arg_verb = optarg;
+ else
+ log_error("Excess arguments, ignoring");
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option code.");
+ }
+
+ if (!arg_verb)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Verb argument missing.");
+
+ return 0;
+}
+
+static int switch_root_initramfs(void) {
+ if (mount("/run/initramfs", "/run/initramfs", NULL, MS_BIND, NULL) < 0)
+ return log_error_errno(errno, "Failed to mount bind /run/initramfs on /run/initramfs: %m");
+
+ if (mount(NULL, "/run/initramfs", NULL, MS_PRIVATE, NULL) < 0)
+ return log_error_errno(errno, "Failed to make /run/initramfs private mount: %m");
+
+ /* switch_root with MS_BIND, because there might still be processes lurking around, which have open file descriptors.
+ * /run/initramfs/shutdown will take care of these.
+ * Also do not detach the old root, because /run/initramfs/shutdown needs to access it.
+ */
+ return switch_root("/run/initramfs", "/oldroot", false, MS_BIND);
+}
+
+/* Read the following fields from /proc/meminfo:
+ *
+ * NFS_Unstable
+ * Writeback
+ * Dirty
+ *
+ * Return true if the sum of these fields is greater than the previous
+ * value input. For all other issues, report the failure and indicate that
+ * the sync is not making progress.
+ */
+static int sync_making_progress(unsigned long long *prev_dirty) {
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned long long val = 0;
+ int ret;
+
+ f = fopen("/proc/meminfo", "re");
+ if (!f)
+ return log_warning_errno(errno, "Failed to open /proc/meminfo: %m");
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ unsigned long long ull = 0;
+ int q;
+
+ q = read_line(f, LONG_LINE_MAX, &line);
+ if (q < 0)
+ return log_warning_errno(q, "Failed to parse /proc/meminfo: %m");
+ if (q == 0)
+ break;
+
+ if (!first_word(line, "NFS_Unstable:") && !first_word(line, "Writeback:") && !first_word(line, "Dirty:"))
+ continue;
+
+ errno = 0;
+ if (sscanf(line, "%*s %llu %*s", &ull) != 1) {
+ if (errno != 0)
+ log_warning_errno(errno, "Failed to parse /proc/meminfo: %m");
+ else
+ log_warning("Failed to parse /proc/meminfo");
+
+ return false;
+ }
+
+ val += ull;
+ }
+
+ ret = *prev_dirty > val;
+ *prev_dirty = val;
+ return ret;
+}
+
+static void sync_with_progress(void) {
+ unsigned long long dirty = ULLONG_MAX;
+ unsigned checks;
+ pid_t pid;
+ int r;
+
+ BLOCK_SIGNALS(SIGCHLD);
+
+ /* Due to the possibility of the sync operation hanging, we fork a child process and monitor
+ * the progress. If the timeout lapses, the assumption is that the particular sync stalled. */
+
+ r = asynchronous_sync(&pid);
+ if (r < 0) {
+ log_error_errno(r, "Failed to fork sync(): %m");
+ return;
+ }
+
+ log_info("Syncing filesystems and block devices.");
+
+ /* Start monitoring the sync operation. If more than
+ * SYNC_PROGRESS_ATTEMPTS lapse without progress being made,
+ * we assume that the sync is stalled */
+ for (checks = 0; checks < SYNC_PROGRESS_ATTEMPTS; checks++) {
+ r = wait_for_terminate_with_timeout(pid, SYNC_TIMEOUT_USEC);
+ if (r == 0)
+ /* Sync finished without error.
+ * (The sync itself does not return an error code) */
+ return;
+ else if (r == -ETIMEDOUT) {
+ /* Reset the check counter if the "Dirty" value is
+ * decreasing */
+ if (sync_making_progress(&dirty) > 0)
+ checks = 0;
+ } else {
+ log_error_errno(r, "Failed to sync filesystems and block devices: %m");
+ return;
+ }
+ }
+
+ /* Only reached in the event of a timeout. We should issue a kill
+ * to the stray process. */
+ log_error("Syncing filesystems and block devices - timed out, issuing SIGKILL to PID "PID_FMT".", pid);
+ (void) kill(pid, SIGKILL);
+}
+
+static int read_current_sysctl_printk_log_level(void) {
+ _cleanup_free_ char *sysctl_printk_vals = NULL, *sysctl_printk_curr = NULL;
+ int current_lvl;
+ const char *p;
+ int r;
+
+ r = sysctl_read("kernel/printk", &sysctl_printk_vals);
+ if (r < 0)
+ return log_debug_errno(r, "Cannot read sysctl kernel.printk: %m");
+
+ p = sysctl_printk_vals;
+ r = extract_first_word(&p, &sysctl_printk_curr, NULL, 0);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to split out kernel printk priority: %m");
+ if (r == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Short read while reading kernel.printk sysctl");
+
+ r = safe_atoi(sysctl_printk_curr, &current_lvl);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse kernel.printk sysctl: %s", sysctl_printk_vals);
+
+ return current_lvl;
+}
+
+static void bump_sysctl_printk_log_level(int min_level) {
+ int current_lvl, r;
+
+ /* Set the logging level to be able to see messages with log level smaller or equal to min_level */
+
+ current_lvl = read_current_sysctl_printk_log_level();
+ if (current_lvl < 0 || current_lvl >= min_level + 1)
+ return;
+
+ r = sysctl_writef("kernel/printk", "%i", min_level + 1);
+ if (r < 0)
+ log_debug_errno(r, "Failed to bump kernel.printk to %i: %m", min_level + 1);
+}
+
+int main(int argc, char *argv[]) {
+ bool need_umount, need_swapoff, need_loop_detach, need_dm_detach, need_md_detach, in_container, use_watchdog = false, can_initrd;
+ _cleanup_free_ char *cgroup = NULL;
+ char *arguments[3], *watchdog_device;
+ int cmd, r, umount_log_level = LOG_INFO;
+ static const char* const dirs[] = {SYSTEM_SHUTDOWN_PATH, NULL};
+
+ /* The log target defaults to console, but the original systemd process will pass its log target in through a
+ * command line argument, which will override this default. Also, ensure we'll never log to the journal or
+ * syslog, as these logging daemons are either already dead or will die very soon. */
+
+ log_set_target(LOG_TARGET_CONSOLE);
+ log_set_prohibit_ipc(true);
+ log_parse_environment();
+
+ r = parse_argv(argc, argv);
+ if (r < 0)
+ goto error;
+
+ log_open();
+
+ umask(0022);
+
+ if (getpid_cached() != 1) {
+ log_error("Not executed by init (PID 1).");
+ r = -EPERM;
+ goto error;
+ }
+
+ if (streq(arg_verb, "reboot"))
+ cmd = RB_AUTOBOOT;
+ else if (streq(arg_verb, "poweroff"))
+ cmd = RB_POWER_OFF;
+ else if (streq(arg_verb, "halt"))
+ cmd = RB_HALT_SYSTEM;
+ else if (streq(arg_verb, "kexec"))
+ cmd = LINUX_REBOOT_CMD_KEXEC;
+ else if (streq(arg_verb, "exit"))
+ cmd = 0; /* ignored, just checking that arg_verb is valid */
+ else {
+ log_error("Unknown action '%s'.", arg_verb);
+ r = -EINVAL;
+ goto error;
+ }
+
+ (void) cg_get_root_path(&cgroup);
+ in_container = detect_container() > 0;
+
+ /* If the logging messages are going to KMSG, and if we are not running from a container, then try to
+ * update the sysctl kernel.printk current value in order to see "info" messages; This current log
+ * level is not updated if already big enough.
+ */
+ if (!in_container &&
+ IN_SET(log_get_target(),
+ LOG_TARGET_AUTO,
+ LOG_TARGET_JOURNAL_OR_KMSG,
+ LOG_TARGET_SYSLOG_OR_KMSG,
+ LOG_TARGET_KMSG))
+ bump_sysctl_printk_log_level(LOG_WARNING);
+
+ use_watchdog = getenv("WATCHDOG_USEC");
+ watchdog_device = getenv("WATCHDOG_DEVICE");
+ if (watchdog_device) {
+ r = watchdog_set_device(watchdog_device);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m",
+ watchdog_device);
+ }
+
+ /* Lock us into memory */
+ (void) mlockall(MCL_CURRENT|MCL_FUTURE);
+
+ /* Synchronize everything that is not written to disk yet at this point already. This is a good idea so that
+ * slow IO is processed here already and the final process killing spree is not impacted by processes
+ * desperately trying to sync IO to disk within their timeout. Do not remove this sync, data corruption will
+ * result. */
+ if (!in_container)
+ sync_with_progress();
+
+ disable_coredumps();
+ disable_binfmt();
+
+ log_info("Sending SIGTERM to remaining processes...");
+ broadcast_signal(SIGTERM, true, true, arg_timeout);
+
+ log_info("Sending SIGKILL to remaining processes...");
+ broadcast_signal(SIGKILL, true, false, arg_timeout);
+
+ need_umount = !in_container;
+ need_swapoff = !in_container;
+ need_loop_detach = !in_container;
+ need_dm_detach = !in_container;
+ need_md_detach = !in_container;
+ can_initrd = !in_container && !in_initrd() && access("/run/initramfs/shutdown", X_OK) == 0;
+
+ /* Unmount all mountpoints, swaps, and loopback devices */
+ for (;;) {
+ bool changed = false;
+
+ if (use_watchdog)
+ (void) watchdog_ping();
+
+ /* Let's trim the cgroup tree on each iteration so
+ that we leave an empty cgroup tree around, so that
+ container managers get a nice notify event when we
+ are down */
+ if (cgroup)
+ (void) cg_trim(SYSTEMD_CGROUP_CONTROLLER, cgroup, false);
+
+ if (need_umount) {
+ log_info("Unmounting file systems.");
+ r = umount_all(&changed, umount_log_level);
+ if (r == 0) {
+ need_umount = false;
+ log_info("All filesystems unmounted.");
+ } else if (r > 0)
+ log_info("Not all file systems unmounted, %d left.", r);
+ else
+ log_error_errno(r, "Failed to unmount file systems: %m");
+ }
+
+ if (need_swapoff) {
+ log_info("Deactivating swaps.");
+ r = swapoff_all(&changed);
+ if (r == 0) {
+ need_swapoff = false;
+ log_info("All swaps deactivated.");
+ } else if (r > 0)
+ log_info("Not all swaps deactivated, %d left.", r);
+ else
+ log_error_errno(r, "Failed to deactivate swaps: %m");
+ }
+
+ if (need_loop_detach) {
+ log_info("Detaching loop devices.");
+ r = loopback_detach_all(&changed, umount_log_level);
+ if (r == 0) {
+ need_loop_detach = false;
+ log_info("All loop devices detached.");
+ } else if (r > 0)
+ log_info("Not all loop devices detached, %d left.", r);
+ else
+ log_error_errno(r, "Failed to detach loop devices: %m");
+ }
+
+ if (need_md_detach) {
+ log_info("Stopping MD devices.");
+ r = md_detach_all(&changed, umount_log_level);
+ if (r == 0) {
+ need_md_detach = false;
+ log_info("All MD devices stopped.");
+ } else if (r > 0)
+ log_info("Not all MD devices stopped, %d left.", r);
+ else
+ log_error_errno(r, "Failed to stop MD devices: %m");
+ }
+
+ if (need_dm_detach) {
+ log_info("Detaching DM devices.");
+ r = dm_detach_all(&changed, umount_log_level);
+ if (r == 0) {
+ need_dm_detach = false;
+ log_info("All DM devices detached.");
+ } else if (r > 0)
+ log_info("Not all DM devices detached, %d left.", r);
+ else
+ log_error_errno(r, "Failed to detach DM devices: %m");
+ }
+
+ if (!need_umount && !need_swapoff && !need_loop_detach && !need_dm_detach
+ && !need_md_detach) {
+ log_info("All filesystems, swaps, loop devices, MD devices and DM devices detached.");
+ /* Yay, done */
+ break;
+ }
+
+ if (!changed && umount_log_level == LOG_INFO && !can_initrd) {
+ /* There are things we cannot get rid of. Loop one more time
+ * with LOG_ERR to inform the user. Note that we don't need
+ * to do this if there is a initrd to switch to, because that
+ * one is likely to get rid of the remounting mounts. If not,
+ * it will log about them. */
+ umount_log_level = LOG_ERR;
+ continue;
+ }
+
+ /* If in this iteration we didn't manage to
+ * unmount/deactivate anything, we simply give up */
+ if (!changed) {
+ log_info("Cannot finalize remaining%s%s%s%s%s continuing.",
+ need_umount ? " file systems," : "",
+ need_swapoff ? " swap devices," : "",
+ need_loop_detach ? " loop devices," : "",
+ need_dm_detach ? " DM devices," : "",
+ need_md_detach ? " MD devices," : "");
+ break;
+ }
+
+ log_debug("Couldn't finalize remaining %s%s%s%s%s trying again.",
+ need_umount ? " file systems," : "",
+ need_swapoff ? " swap devices," : "",
+ need_loop_detach ? " loop devices," : "",
+ need_dm_detach ? " DM devices," : "",
+ need_md_detach ? " MD devices," : "");
+ }
+
+ /* We're done with the watchdog. */
+ watchdog_free_device();
+
+ arguments[0] = NULL;
+ arguments[1] = arg_verb;
+ arguments[2] = NULL;
+ (void) execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments, NULL, EXEC_DIR_PARALLEL | EXEC_DIR_IGNORE_ERRORS);
+
+ (void) rlimit_nofile_safe();
+
+ if (can_initrd) {
+ r = switch_root_initramfs();
+ if (r >= 0) {
+ argv[0] = (char*) "/shutdown";
+
+ (void) setsid();
+ (void) make_console_stdio();
+
+ log_info("Successfully changed into root pivot.\n"
+ "Returning to initrd...");
+
+ execv("/shutdown", argv);
+ log_error_errno(errno, "Failed to execute shutdown binary: %m");
+ } else
+ log_error_errno(r, "Failed to switch root to \"/run/initramfs\": %m");
+ }
+
+ if (need_umount || need_swapoff || need_loop_detach || need_dm_detach || need_md_detach)
+ log_error("Failed to finalize%s%s%s%s%s ignoring.",
+ need_umount ? " file systems," : "",
+ need_swapoff ? " swap devices," : "",
+ need_loop_detach ? " loop devices," : "",
+ need_dm_detach ? " DM devices," : "",
+ need_md_detach ? " MD devices," : "");
+
+ /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be
+ * sync'ed explicitly in advance. So let's do this here, but not needlessly slow down containers. Note that we
+ * sync'ed things already once above, but we did some more work since then which might have caused IO, hence
+ * let's do it once more. Do not remove this sync, data corruption will result. */
+ if (!in_container)
+ sync_with_progress();
+
+ if (streq(arg_verb, "exit")) {
+ if (in_container)
+ return arg_exit_code;
+
+ cmd = RB_POWER_OFF; /* We cannot exit() on the host, fallback on another method. */
+ }
+
+ switch (cmd) {
+
+ case LINUX_REBOOT_CMD_KEXEC:
+
+ if (!in_container) {
+ /* We cheat and exec kexec to avoid doing all its work */
+ log_info("Rebooting with kexec.");
+
+ r = safe_fork("(sd-kexec)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_LOG|FORK_WAIT, NULL);
+ if (r == 0) {
+ const char * const args[] = {
+ KEXEC, "-e", NULL
+ };
+
+ /* Child */
+
+ execv(args[0], (char * const *) args);
+
+ /* execv failed (kexec binary missing?), so try simply reboot(RB_KEXEC) */
+ (void) reboot(cmd);
+ _exit(EXIT_FAILURE);
+ }
+
+ /* If we are still running, then the kexec can't have worked, let's fall through */
+ }
+
+ cmd = RB_AUTOBOOT;
+ _fallthrough_;
+
+ case RB_AUTOBOOT:
+ (void) reboot_with_parameter(REBOOT_LOG);
+ log_info("Rebooting.");
+ break;
+
+ case RB_POWER_OFF:
+ log_info("Powering off.");
+ break;
+
+ case RB_HALT_SYSTEM:
+ log_info("Halting system.");
+ break;
+
+ default:
+ assert_not_reached("Unknown magic");
+ }
+
+ (void) reboot(cmd);
+ if (errno == EPERM && in_container) {
+ /* If we are in a container, and we lacked
+ * CAP_SYS_BOOT just exit, this will kill our
+ * container for good. */
+ log_info("Exiting container.");
+ return EXIT_SUCCESS;
+ }
+
+ r = log_error_errno(errno, "Failed to invoke reboot(): %m");
+
+ error:
+ log_emergency_errno(r, "Critical error while doing system shutdown: %m");
+ freeze();
+}
diff --git a/src/shutdown/umount.c b/src/shutdown/umount.c
new file mode 100644
index 0000000..3a72a13
--- /dev/null
+++ b/src/shutdown/umount.c
@@ -0,0 +1,843 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2010 ProFUSION embedded systems
+***/
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/dm-ioctl.h>
+#include <linux/major.h>
+#include <linux/raid/md_u.h>
+#include <linux/loop.h>
+#include <sys/mount.h>
+#include <sys/swap.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "blockdev-util.h"
+#include "def.h"
+#include "device-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "fstab-util.h"
+#include "libmount-util.h"
+#include "mount-setup.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "umount.h"
+#include "util.h"
+#include "virt.h"
+
+static void mount_point_free(MountPoint **head, MountPoint *m) {
+ assert(head);
+ assert(m);
+
+ LIST_REMOVE(mount_point, *head, m);
+
+ free(m->path);
+ free(m->remount_options);
+ free(m);
+}
+
+void mount_points_list_free(MountPoint **head) {
+ assert(head);
+
+ while (*head)
+ mount_point_free(head, *head);
+}
+
+int mount_points_list_get(const char *mountinfo, MountPoint **head) {
+ _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
+ _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
+ int r;
+
+ assert(head);
+
+ r = libmount_parse(mountinfo, NULL, &table, &iter);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s: %m", mountinfo ?: "/proc/self/mountinfo");
+
+ for (;;) {
+ struct libmnt_fs *fs;
+ const char *path, *fstype;
+ _cleanup_free_ char *options = NULL;
+ unsigned long remount_flags = 0u;
+ _cleanup_free_ char *remount_options = NULL;
+ bool try_remount_ro;
+ _cleanup_free_ MountPoint *m = NULL;
+
+ r = mnt_table_next_fs(table, iter, &fs);
+ if (r == 1)
+ break;
+ if (r < 0)
+ return log_error_errno(r, "Failed to get next entry from %s: %m", mountinfo ?: "/proc/self/mountinfo");
+
+ path = mnt_fs_get_target(fs);
+ if (!path)
+ continue;
+
+ fstype = mnt_fs_get_fstype(fs);
+
+ /* Combine the generic VFS options with the FS-specific
+ * options. Duplicates are not a problem here, because the only
+ * options that should come up twice are typically ro/rw, which
+ * are turned into MS_RDONLY or the inversion of it.
+ *
+ * Even if there are duplicates later in mount_option_mangle()
+ * they shouldn't hurt anyways as they override each other.
+ */
+ if (!strextend_with_separator(&options, ",",
+ mnt_fs_get_vfs_options(fs),
+ NULL))
+ return log_oom();
+ if (!strextend_with_separator(&options, ",",
+ mnt_fs_get_fs_options(fs),
+ NULL))
+ return log_oom();
+
+ /* Ignore mount points we can't unmount because they
+ * are API or because we are keeping them open (like
+ * /dev/console). Also, ignore all mounts below API
+ * file systems, since they are likely virtual too,
+ * and hence not worth spending time on. Also, in
+ * unprivileged containers we might lack the rights to
+ * unmount these things, hence don't bother. */
+ if (mount_point_is_api(path) ||
+ mount_point_ignore(path) ||
+ PATH_STARTSWITH_SET(path, "/dev", "/sys", "/proc"))
+ continue;
+
+ /* If we are in a container, don't attempt to
+ * read-only mount anything as that brings no real
+ * benefits, but might confuse the host, as we remount
+ * the superblock here, not the bind mount.
+ *
+ * If the filesystem is a network fs, also skip the
+ * remount. It brings no value (we cannot leave
+ * a "dirty fs") and could hang if the network is down.
+ * Note that umount2() is more careful and will not
+ * hang because of the network being down. */
+ try_remount_ro = detect_container() <= 0 &&
+ !fstype_is_network(fstype) &&
+ !fstype_is_api_vfs(fstype) &&
+ !fstype_is_ro(fstype) &&
+ !fstab_test_yes_no_option(options, "ro\0rw\0");
+
+ if (try_remount_ro) {
+ /* mount(2) states that mount flags and options need to be exactly the same
+ * as they were when the filesystem was mounted, except for the desired
+ * changes. So we reconstruct both here and adjust them for the later
+ * remount call too. */
+
+ r = mnt_fs_get_propagation(fs, &remount_flags);
+ if (r < 0) {
+ log_warning_errno(r, "mnt_fs_get_propagation() failed for %s, ignoring: %m", path);
+ continue;
+ }
+
+ r = mount_option_mangle(options, remount_flags, &remount_flags, &remount_options);
+ if (r < 0) {
+ log_warning_errno(r, "mount_option_mangle failed for %s, ignoring: %m", path);
+ continue;
+ }
+
+ /* MS_BIND is special. If it is provided it will only make the mount-point
+ * read-only. If left out, the super block itself is remounted, which we want. */
+ remount_flags = (remount_flags|MS_REMOUNT|MS_RDONLY) & ~MS_BIND;
+ }
+
+ m = new0(MountPoint, 1);
+ if (!m)
+ return log_oom();
+
+ m->path = strdup(path);
+ if (!m->path)
+ return log_oom();
+
+ m->remount_options = TAKE_PTR(remount_options);
+ m->remount_flags = remount_flags;
+ m->try_remount_ro = try_remount_ro;
+
+ LIST_PREPEND(mount_point, *head, TAKE_PTR(m));
+ }
+
+ return 0;
+}
+
+int swap_list_get(const char *swaps, MountPoint **head) {
+ _cleanup_(mnt_free_tablep) struct libmnt_table *t = NULL;
+ _cleanup_(mnt_free_iterp) struct libmnt_iter *i = NULL;
+ int r;
+
+ assert(head);
+
+ t = mnt_new_table();
+ i = mnt_new_iter(MNT_ITER_FORWARD);
+ if (!t || !i)
+ return log_oom();
+
+ r = mnt_table_parse_swaps(t, swaps);
+ if (r == -ENOENT) /* no /proc/swaps is fine */
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s: %m", swaps ?: "/proc/swaps");
+
+ for (;;) {
+ struct libmnt_fs *fs;
+ _cleanup_free_ MountPoint *swap = NULL;
+ const char *source;
+
+ r = mnt_table_next_fs(t, i, &fs);
+ if (r == 1)
+ break;
+ if (r < 0)
+ return log_error_errno(r, "Failed to get next entry from %s: %m", swaps ?: "/proc/swaps");
+
+ source = mnt_fs_get_source(fs);
+ if (!source)
+ continue;
+
+ swap = new0(MountPoint, 1);
+ if (!swap)
+ return log_oom();
+
+ swap->path = strdup(source);
+ if (!swap->path)
+ return log_oom();
+
+ LIST_PREPEND(mount_point, *head, TAKE_PTR(swap));
+ }
+
+ return 0;
+}
+
+static int loopback_list_get(MountPoint **head) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
+ int r;
+
+ assert(head);
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_subsystem(e, "block", true);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_sysname(e, "loop*");
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_sysattr(e, "loop/backing_file", NULL, true);
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, d) {
+ _cleanup_free_ char *p = NULL;
+ const char *dn;
+ MountPoint *lb;
+ dev_t devnum;
+
+ if (sd_device_get_devnum(d, &devnum) < 0 ||
+ sd_device_get_devname(d, &dn) < 0)
+ continue;
+
+ p = strdup(dn);
+ if (!p)
+ return -ENOMEM;
+
+ lb = new(MountPoint, 1);
+ if (!lb)
+ return -ENOMEM;
+
+ *lb = (MountPoint) {
+ .path = TAKE_PTR(p),
+ .devnum = devnum,
+ };
+
+ LIST_PREPEND(mount_point, *head, lb);
+ }
+
+ return 0;
+}
+
+static int dm_list_get(MountPoint **head) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
+ int r;
+
+ assert(head);
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_subsystem(e, "block", true);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_sysname(e, "dm-*");
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, d) {
+ _cleanup_free_ char *p = NULL;
+ const char *dn;
+ MountPoint *m;
+ dev_t devnum;
+
+ if (sd_device_get_devnum(d, &devnum) < 0 ||
+ sd_device_get_devname(d, &dn) < 0)
+ continue;
+
+ p = strdup(dn);
+ if (!p)
+ return -ENOMEM;
+
+ m = new(MountPoint, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (MountPoint) {
+ .path = TAKE_PTR(p),
+ .devnum = devnum,
+ };
+
+ LIST_PREPEND(mount_point, *head, m);
+ }
+
+ return 0;
+}
+
+static int md_list_get(MountPoint **head) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
+ int r;
+
+ assert(head);
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_subsystem(e, "block", true);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_sysname(e, "md*");
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, d) {
+ _cleanup_free_ char *p = NULL;
+ const char *dn;
+ MountPoint *m;
+ dev_t devnum;
+
+ if (sd_device_get_devnum(d, &devnum) < 0 ||
+ sd_device_get_devname(d, &dn) < 0)
+ continue;
+
+ p = strdup(dn);
+ if (!p)
+ return -ENOMEM;
+
+ m = new(MountPoint, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (MountPoint) {
+ .path = TAKE_PTR(p),
+ .devnum = devnum,
+ };
+
+ LIST_PREPEND(mount_point, *head, m);
+ }
+
+ return 0;
+}
+
+static int delete_loopback(const char *device) {
+ _cleanup_close_ int fd = -1;
+ struct loop_info64 info;
+
+ assert(device);
+
+ fd = open(device, O_RDONLY|O_CLOEXEC);
+ if (fd < 0) {
+ log_debug_errno(errno, "Failed to open loopback device %s: %m", device);
+ return errno == ENOENT ? 0 : -errno;
+ }
+
+ /* Loopback block devices don't sync in-flight blocks when we clear the fd, hence sync explicitly
+ * first */
+ if (fsync(fd) < 0)
+ log_debug_errno(errno, "Failed to sync loop block device %s, ignoring: %m", device);
+
+ if (ioctl(fd, LOOP_CLR_FD, 0) < 0) {
+ if (errno == ENXIO) /* Nothing bound, didn't do anything */
+ return 0;
+
+ if (errno != EBUSY)
+ return log_debug_errno(errno, "Failed to clear loopback device %s: %m", device);
+
+ if (ioctl(fd, LOOP_GET_STATUS64, &info) < 0) {
+ if (errno == ENXIO) /* What? Suddenly detached after all? That's fine by us then. */
+ return 1;
+
+ log_debug_errno(errno, "Failed to invoke LOOP_GET_STATUS64 on loopback device %s, ignoring: %m", device);
+ return -EBUSY; /* propagate original error */
+ }
+
+ if (FLAGS_SET(info.lo_flags, LO_FLAGS_AUTOCLEAR)) /* someone else already set LO_FLAGS_AUTOCLEAR for us? fine by us */
+ return -EBUSY; /* propagate original error */
+
+ info.lo_flags |= LO_FLAGS_AUTOCLEAR;
+ if (ioctl(fd, LOOP_SET_STATUS64, &info) < 0) {
+ if (errno == ENXIO) /* Suddenly detached after all? Fine by us */
+ return 1;
+
+ log_debug_errno(errno, "Failed to set LO_FLAGS_AUTOCLEAR flag for loop device %s, ignoring: %m", device);
+ } else
+ log_debug("Successfully set LO_FLAGS_AUTOCLEAR flag for loop device %s.", device);
+
+ return -EBUSY;
+ }
+
+ if (ioctl(fd, LOOP_GET_STATUS64, &info) < 0) {
+ /* If the LOOP_CLR_FD above succeeded we'll see ENXIO here. */
+ if (errno == ENXIO)
+ log_debug("Successfully detached loopback device %s.", device);
+ else
+ log_debug_errno(errno, "Failed to invoke LOOP_GET_STATUS64 on loopback device %s, ignoring: %m", device); /* the LOOP_CLR_FD at least worked, let's hope for the best */
+
+ return 1;
+ }
+
+ /* Linux makes LOOP_CLR_FD succeed whenever LO_FLAGS_AUTOCLEAR is set without actually doing
+ * anything. Very confusing. Let's hence not claim we did anything in this case. */
+ if (FLAGS_SET(info.lo_flags, LO_FLAGS_AUTOCLEAR))
+ log_debug("Successfully called LOOP_CLR_FD on a loopback device %s with autoclear set, which is a NOP.", device);
+ else
+ log_debug("Weird, LOOP_CLR_FD succeeded but the device is still attached on %s.", device);
+
+ return -EBUSY; /* Nothing changed, the device is still attached, hence it apparently is still busy */
+}
+
+static int delete_dm(MountPoint *m) {
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(m);
+ assert(major(m->devnum) != 0);
+ assert(m->path);
+
+ fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ r = fsync_path_at(AT_FDCWD, m->path);
+ if (r < 0)
+ log_debug_errno(r, "Failed to sync DM block device %s, ignoring: %m", m->path);
+
+ if (ioctl(fd, DM_DEV_REMOVE, &(struct dm_ioctl) {
+ .version = {
+ DM_VERSION_MAJOR,
+ DM_VERSION_MINOR,
+ DM_VERSION_PATCHLEVEL
+ },
+ .data_size = sizeof(struct dm_ioctl),
+ .dev = m->devnum,
+ }) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int delete_md(MountPoint *m) {
+ _cleanup_close_ int fd = -1;
+
+ assert(m);
+ assert(major(m->devnum) != 0);
+ assert(m->path);
+
+ fd = open(m->path, O_RDONLY|O_CLOEXEC|O_EXCL);
+ if (fd < 0)
+ return -errno;
+
+ if (fsync(fd) < 0)
+ log_debug_errno(errno, "Failed to sync MD block device %s, ignoring: %m", m->path);
+
+ if (ioctl(fd, STOP_ARRAY, NULL) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static bool nonunmountable_path(const char *path) {
+ return path_equal(path, "/")
+#if ! HAVE_SPLIT_USR
+ || path_equal(path, "/usr")
+#endif
+ || path_startswith(path, "/run/initramfs");
+}
+
+static int remount_with_timeout(MountPoint *m, int umount_log_level) {
+ pid_t pid;
+ int r;
+
+ BLOCK_SIGNALS(SIGCHLD);
+
+ assert(m);
+
+ /* Due to the possibility of a remount operation hanging, we fork a child process and set a
+ * timeout. If the timeout lapses, the assumption is that the particular remount failed. */
+ r = safe_fork("(sd-remount)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_LOG|FORK_REOPEN_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ log_info("Remounting '%s' read-only in with options '%s'.", m->path, m->remount_options);
+
+ /* Start the mount operation here in the child */
+ r = mount(NULL, m->path, NULL, m->remount_flags, m->remount_options);
+ if (r < 0)
+ log_full_errno(umount_log_level, errno, "Failed to remount '%s' read-only: %m", m->path);
+
+ _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
+ }
+
+ r = wait_for_terminate_with_timeout(pid, DEFAULT_TIMEOUT_USEC);
+ if (r == -ETIMEDOUT) {
+ log_error_errno(r, "Remounting '%s' timed out, issuing SIGKILL to PID " PID_FMT ".", m->path, pid);
+ (void) kill(pid, SIGKILL);
+ } else if (r == -EPROTO)
+ log_debug_errno(r, "Remounting '%s' failed abnormally, child process " PID_FMT " aborted or exited non-zero.", m->path, pid);
+ else if (r < 0)
+ log_error_errno(r, "Remounting '%s' failed unexpectedly, couldn't wait for child process " PID_FMT ": %m", m->path, pid);
+
+ return r;
+}
+
+static int umount_with_timeout(MountPoint *m, int umount_log_level) {
+ pid_t pid;
+ int r;
+
+ BLOCK_SIGNALS(SIGCHLD);
+
+ assert(m);
+
+ /* Due to the possibility of a umount operation hanging, we fork a child process and set a
+ * timeout. If the timeout lapses, the assumption is that the particular umount failed. */
+ r = safe_fork("(sd-umount)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_LOG|FORK_REOPEN_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ log_info("Unmounting '%s'.", m->path);
+
+ /* Start the mount operation here in the child Using MNT_FORCE
+ * causes some filesystems (e.g. FUSE and NFS and other network
+ * filesystems) to abort any pending requests and return -EIO
+ * rather than blocking indefinitely. If the filesysten is
+ * "busy", this may allow processes to die, thus making the
+ * filesystem less busy so the unmount might succeed (rather
+ * then return EBUSY).*/
+ r = umount2(m->path, MNT_FORCE);
+ if (r < 0)
+ log_full_errno(umount_log_level, errno, "Failed to unmount %s: %m", m->path);
+
+ _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
+ }
+
+ r = wait_for_terminate_with_timeout(pid, DEFAULT_TIMEOUT_USEC);
+ if (r == -ETIMEDOUT) {
+ log_error_errno(r, "Unmounting '%s' timed out, issuing SIGKILL to PID " PID_FMT ".", m->path, pid);
+ (void) kill(pid, SIGKILL);
+ } else if (r == -EPROTO)
+ log_debug_errno(r, "Unmounting '%s' failed abnormally, child process " PID_FMT " aborted or exited non-zero.", m->path, pid);
+ else if (r < 0)
+ log_error_errno(r, "Unmounting '%s' failed unexpectedly, couldn't wait for child process " PID_FMT ": %m", m->path, pid);
+
+ return r;
+}
+
+/* This includes remounting readonly, which changes the kernel mount options. Therefore the list passed to
+ * this function is invalidated, and should not be reused. */
+static int mount_points_list_umount(MountPoint **head, bool *changed, int umount_log_level) {
+ MountPoint *m;
+ int n_failed = 0;
+
+ assert(head);
+ assert(changed);
+
+ LIST_FOREACH(mount_point, m, *head) {
+ if (m->try_remount_ro) {
+ /* We always try to remount directories read-only first, before we go on and umount
+ * them.
+ *
+ * Mount points can be stacked. If a mount point is stacked below / or /usr, we
+ * cannot umount or remount it directly, since there is no way to refer to the
+ * underlying mount. There's nothing we can do about it for the general case, but we
+ * can do something about it if it is aliased somewhere else via a bind mount. If we
+ * explicitly remount the super block of that alias read-only we hence should be
+ * relatively safe regarding keeping a dirty fs we cannot otherwise see.
+ *
+ * Since the remount can hang in the instance of remote filesystems, we remount
+ * asynchronously and skip the subsequent umount if it fails. */
+ if (remount_with_timeout(m, umount_log_level) < 0) {
+ /* Remount failed, but try unmounting anyway,
+ * unless this is a mount point we want to skip. */
+ if (nonunmountable_path(m->path)) {
+ n_failed++;
+ continue;
+ }
+ }
+ }
+
+ /* Skip / and /usr since we cannot unmount that anyway, since we are running from it. They
+ * have already been remounted ro. */
+ if (nonunmountable_path(m->path))
+ continue;
+
+ /* Trying to umount */
+ if (umount_with_timeout(m, umount_log_level) < 0)
+ n_failed++;
+ else
+ *changed = true;
+ }
+
+ return n_failed;
+}
+
+static int swap_points_list_off(MountPoint **head, bool *changed) {
+ MountPoint *m, *n;
+ int n_failed = 0;
+
+ assert(head);
+ assert(changed);
+
+ LIST_FOREACH_SAFE(mount_point, m, n, *head) {
+ log_info("Deactivating swap %s.", m->path);
+ if (swapoff(m->path) < 0) {
+ log_warning_errno(errno, "Could not deactivate swap %s: %m", m->path);
+ n_failed++;
+ continue;
+ }
+
+ *changed = true;
+ mount_point_free(head, m);
+ }
+
+ return n_failed;
+}
+
+static int loopback_points_list_detach(MountPoint **head, bool *changed, int umount_log_level) {
+ MountPoint *m, *n;
+ int n_failed = 0, r;
+ dev_t rootdev = 0;
+
+ assert(head);
+ assert(changed);
+
+ (void) get_block_device("/", &rootdev);
+
+ LIST_FOREACH_SAFE(mount_point, m, n, *head) {
+ if (major(rootdev) != 0 && rootdev == m->devnum) {
+ n_failed++;
+ continue;
+ }
+
+ log_info("Detaching loopback %s.", m->path);
+ r = delete_loopback(m->path);
+ if (r < 0) {
+ log_full_errno(umount_log_level, r, "Could not detach loopback %s: %m", m->path);
+ n_failed++;
+ continue;
+ }
+ if (r > 0)
+ *changed = true;
+
+ mount_point_free(head, m);
+ }
+
+ return n_failed;
+}
+
+static int dm_points_list_detach(MountPoint **head, bool *changed, int umount_log_level) {
+ MountPoint *m, *n;
+ int n_failed = 0, r;
+ dev_t rootdev = 0;
+
+ assert(head);
+ assert(changed);
+
+ (void) get_block_device("/", &rootdev);
+
+ LIST_FOREACH_SAFE(mount_point, m, n, *head) {
+ if (major(rootdev) != 0 && rootdev == m->devnum) {
+ n_failed ++;
+ continue;
+ }
+
+ log_info("Detaching DM %s (%u:%u).", m->path, major(m->devnum), minor(m->devnum));
+ r = delete_dm(m);
+ if (r < 0) {
+ log_full_errno(umount_log_level, r, "Could not detach DM %s: %m", m->path);
+ n_failed++;
+ continue;
+ }
+
+ *changed = true;
+ mount_point_free(head, m);
+ }
+
+ return n_failed;
+}
+
+static int md_points_list_detach(MountPoint **head, bool *changed, int umount_log_level) {
+ MountPoint *m, *n;
+ int n_failed = 0, r;
+ dev_t rootdev = 0;
+
+ assert(head);
+ assert(changed);
+
+ (void) get_block_device("/", &rootdev);
+
+ LIST_FOREACH_SAFE(mount_point, m, n, *head) {
+ if (major(rootdev) != 0 && rootdev == m->devnum) {
+ n_failed ++;
+ continue;
+ }
+
+ log_info("Stopping MD %s (%u:%u).", m->path, major(m->devnum), minor(m->devnum));
+ r = delete_md(m);
+ if (r < 0) {
+ log_full_errno(umount_log_level, r, "Could not stop MD %s: %m", m->path);
+ n_failed++;
+ continue;
+ }
+
+ *changed = true;
+ mount_point_free(head, m);
+ }
+
+ return n_failed;
+}
+
+static int umount_all_once(bool *changed, int umount_log_level) {
+ _cleanup_(mount_points_list_free) LIST_HEAD(MountPoint, mp_list_head);
+ int r;
+
+ assert(changed);
+
+ LIST_HEAD_INIT(mp_list_head);
+ r = mount_points_list_get(NULL, &mp_list_head);
+ if (r < 0)
+ return r;
+
+ return mount_points_list_umount(&mp_list_head, changed, umount_log_level);
+}
+
+int umount_all(bool *changed, int umount_log_level) {
+ bool umount_changed;
+ int r;
+
+ assert(changed);
+
+ /* Retry umount, until nothing can be umounted anymore. Mounts are
+ * processed in order, newest first. The retries are needed when
+ * an old mount has been moved, to a path inside a newer mount. */
+ do {
+ umount_changed = false;
+
+ r = umount_all_once(&umount_changed, umount_log_level);
+ if (umount_changed)
+ *changed = true;
+ } while (umount_changed);
+
+ return r;
+}
+
+int swapoff_all(bool *changed) {
+ _cleanup_(mount_points_list_free) LIST_HEAD(MountPoint, swap_list_head);
+ int r;
+
+ assert(changed);
+
+ LIST_HEAD_INIT(swap_list_head);
+
+ r = swap_list_get(NULL, &swap_list_head);
+ if (r < 0)
+ return r;
+
+ return swap_points_list_off(&swap_list_head, changed);
+}
+
+int loopback_detach_all(bool *changed, int umount_log_level) {
+ _cleanup_(mount_points_list_free) LIST_HEAD(MountPoint, loopback_list_head);
+ int r;
+
+ assert(changed);
+
+ LIST_HEAD_INIT(loopback_list_head);
+
+ r = loopback_list_get(&loopback_list_head);
+ if (r < 0)
+ return r;
+
+ return loopback_points_list_detach(&loopback_list_head, changed, umount_log_level);
+}
+
+int dm_detach_all(bool *changed, int umount_log_level) {
+ _cleanup_(mount_points_list_free) LIST_HEAD(MountPoint, dm_list_head);
+ int r;
+
+ assert(changed);
+
+ LIST_HEAD_INIT(dm_list_head);
+
+ r = dm_list_get(&dm_list_head);
+ if (r < 0)
+ return r;
+
+ return dm_points_list_detach(&dm_list_head, changed, umount_log_level);
+}
+
+int md_detach_all(bool *changed, int umount_log_level) {
+ _cleanup_(mount_points_list_free) LIST_HEAD(MountPoint, md_list_head);
+ int r;
+
+ assert(changed);
+
+ LIST_HEAD_INIT(md_list_head);
+
+ r = md_list_get(&md_list_head);
+ if (r < 0)
+ return r;
+
+ return md_points_list_detach(&md_list_head, changed, umount_log_level);
+}
diff --git a/src/shutdown/umount.h b/src/shutdown/umount.h
new file mode 100644
index 0000000..fac1a12
--- /dev/null
+++ b/src/shutdown/umount.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/***
+ Copyright © 2010 ProFUSION embedded systems
+***/
+
+#include "list.h"
+
+int umount_all(bool *changed, int umount_log_level);
+
+int swapoff_all(bool *changed);
+
+int loopback_detach_all(bool *changed, int umount_log_level);
+
+int dm_detach_all(bool *changed, int umount_log_level);
+
+int md_detach_all(bool *changed, int umount_log_level);
+
+/* This is exported just for testing */
+typedef struct MountPoint {
+ char *path;
+ char *remount_options;
+ unsigned long remount_flags;
+ bool try_remount_ro;
+ dev_t devnum;
+ LIST_FIELDS(struct MountPoint, mount_point);
+} MountPoint;
+
+int mount_points_list_get(const char *mountinfo, MountPoint **head);
+void mount_points_list_free(MountPoint **head);
+int swap_list_get(const char *swaps, MountPoint **head);
diff --git a/src/sleep/sleep.c b/src/sleep/sleep.c
new file mode 100644
index 0000000..39ab554
--- /dev/null
+++ b/src/sleep/sleep.c
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2010-2017 Canonical
+ Copyright © 2018 Dell Inc.
+***/
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/fiemap.h>
+#include <poll.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/timerfd.h>
+#include <unistd.h>
+
+#include "sd-messages.h"
+
+#include "btrfs-util.h"
+#include "bus-error.h"
+#include "def.h"
+#include "exec-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "sleep-config.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "util.h"
+
+static char* arg_verb = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_verb, freep);
+
+static int write_hibernate_location_info(const HibernateLocation *hibernate_location) {
+ char offset_str[DECIMAL_STR_MAX(uint64_t)];
+ char resume_str[DECIMAL_STR_MAX(unsigned) * 2 + STRLEN(":")];
+ int r;
+
+ assert(hibernate_location);
+ assert(hibernate_location->swap);
+
+ xsprintf(resume_str, "%u:%u", major(hibernate_location->devno), minor(hibernate_location->devno));
+ r = write_string_file("/sys/power/resume", resume_str, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to write partition device to /sys/power/resume for '%s': '%s': %m",
+ hibernate_location->swap->device, resume_str);
+
+ log_debug("Wrote resume= value for %s to /sys/power/resume: %s", hibernate_location->swap->device, resume_str);
+
+ /* if it's a swap partition, we're done */
+ if (streq(hibernate_location->swap->type, "partition"))
+ return r;
+
+ if (!streq(hibernate_location->swap->type, "file"))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid hibernate type: %s", hibernate_location->swap->type);
+
+ /* Only available in 4.17+ */
+ if (hibernate_location->offset > 0 && access("/sys/power/resume_offset", W_OK) < 0) {
+ if (errno == ENOENT) {
+ log_debug("Kernel too old, can't configure resume_offset for %s, ignoring: %" PRIu64,
+ hibernate_location->swap->device, hibernate_location->offset);
+ return 0;
+ }
+
+ return log_debug_errno(errno, "/sys/power/resume_offset not writable: %m");
+ }
+
+ xsprintf(offset_str, "%" PRIu64, hibernate_location->offset);
+ r = write_string_file("/sys/power/resume_offset", offset_str, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to write swap file offset to /sys/power/resume_offset for '%s': '%s': %m",
+ hibernate_location->swap->device, offset_str);
+
+ log_debug("Wrote resume_offset= value for %s to /sys/power/resume_offset: %s", hibernate_location->swap->device, offset_str);
+
+ return 0;
+}
+
+static int write_mode(char **modes) {
+ int r = 0;
+ char **mode;
+
+ STRV_FOREACH(mode, modes) {
+ int k;
+
+ k = write_string_file("/sys/power/disk", *mode, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (k >= 0)
+ return 0;
+
+ log_debug_errno(k, "Failed to write '%s' to /sys/power/disk: %m", *mode);
+ if (r >= 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int write_state(FILE **f, char **states) {
+ char **state;
+ int r = 0;
+
+ assert(f);
+ assert(*f);
+
+ STRV_FOREACH(state, states) {
+ int k;
+
+ k = write_string_stream(*f, *state, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (k >= 0)
+ return 0;
+ log_debug_errno(k, "Failed to write '%s' to /sys/power/state: %m", *state);
+ if (r >= 0)
+ r = k;
+
+ fclose(*f);
+ *f = fopen("/sys/power/state", "we");
+ if (!*f)
+ return -errno;
+ }
+
+ return r;
+}
+
+static int lock_all_homes(void) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ /* Let's synchronously lock all home directories managed by homed that have been marked for it. This
+ * way the key material required to access these volumes is hopefully removed from memory. */
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to connect to system bus, ignoring: %m");
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.home1",
+ "/org/freedesktop/home1",
+ "org.freedesktop.home1.Manager",
+ "LockAllHomes");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* If homed is not running it can't have any home directories active either. */
+ r = sd_bus_message_set_auto_start(m, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to disable auto-start of LockAllHomes() message: %m");
+
+ r = sd_bus_call(bus, m, DEFAULT_TIMEOUT_USEC, &error, NULL);
+ if (r < 0) {
+ if (!bus_error_is_unknown_service(&error))
+ return log_error_errno(r, "Failed to lock home directories: %s", bus_error_message(&error, r));
+
+ return log_debug("systemd-homed is not running, locking of home directories skipped.");
+ }
+
+ return log_debug("Successfully requested locking of all home directories.");
+}
+
+static int execute(char **modes, char **states) {
+ char *arguments[] = {
+ NULL,
+ (char*) "pre",
+ arg_verb,
+ NULL
+ };
+ static const char* const dirs[] = {
+ SYSTEM_SLEEP_PATH,
+ NULL
+ };
+
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_(hibernate_location_freep) HibernateLocation *hibernate_location = NULL;
+ int r;
+
+ /* This file is opened first, so that if we hit an error,
+ * we can abort before modifying any state. */
+ f = fopen("/sys/power/state", "we");
+ if (!f)
+ return log_error_errno(errno, "Failed to open /sys/power/state: %m");
+
+ setvbuf(f, NULL, _IONBF, 0);
+
+ /* Configure hibernation settings if we are supposed to hibernate */
+ if (!strv_isempty(modes)) {
+ r = find_hibernate_location(&hibernate_location);
+ if (r < 0)
+ return log_error_errno(r, "Failed to find location to hibernate to: %m");
+ if (r == 0) { /* 0 means: no hibernation location was configured in the kernel so far, let's
+ * do it ourselves then. > 0 means: kernel already had a configured hibernation
+ * location which we shouldn't touch. */
+ r = write_hibernate_location_info(hibernate_location);
+ if (r < 0)
+ return log_error_errno(r, "Failed to prepare for hibernation: %m");
+ }
+
+ r = write_mode(modes);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write mode to /sys/power/disk: %m");;
+ }
+
+ (void) execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments, NULL, EXEC_DIR_PARALLEL | EXEC_DIR_IGNORE_ERRORS);
+ (void) lock_all_homes();
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_SLEEP_START_STR,
+ LOG_MESSAGE("Suspending system..."),
+ "SLEEP=%s", arg_verb);
+
+ r = write_state(&f, states);
+ if (r < 0)
+ log_struct_errno(LOG_ERR, r,
+ "MESSAGE_ID=" SD_MESSAGE_SLEEP_STOP_STR,
+ LOG_MESSAGE("Failed to suspend system. System resumed again: %m"),
+ "SLEEP=%s", arg_verb);
+ else
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_SLEEP_STOP_STR,
+ LOG_MESSAGE("System resumed."),
+ "SLEEP=%s", arg_verb);
+
+ arguments[1] = (char*) "post";
+ (void) execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, arguments, NULL, EXEC_DIR_PARALLEL | EXEC_DIR_IGNORE_ERRORS);
+
+ return r;
+}
+
+static int execute_s2h(const SleepConfig *sleep_config) {
+ _cleanup_close_ int tfd = -1;
+ char buf[FORMAT_TIMESPAN_MAX];
+ struct itimerspec ts = {};
+ int r;
+
+ assert(sleep_config);
+
+ tfd = timerfd_create(CLOCK_BOOTTIME_ALARM, TFD_NONBLOCK|TFD_CLOEXEC);
+ if (tfd < 0)
+ return log_error_errno(errno, "Error creating timerfd: %m");
+
+ log_debug("Set timerfd wake alarm for %s",
+ format_timespan(buf, sizeof(buf), sleep_config->hibernate_delay_sec, USEC_PER_SEC));
+
+ timespec_store(&ts.it_value, sleep_config->hibernate_delay_sec);
+
+ r = timerfd_settime(tfd, 0, &ts, NULL);
+ if (r < 0)
+ return log_error_errno(errno, "Error setting hibernate timer: %m");
+
+ r = execute(sleep_config->suspend_modes, sleep_config->suspend_states);
+ if (r < 0)
+ return r;
+
+ r = fd_wait_for_event(tfd, POLLIN, 0);
+ if (r < 0)
+ return log_error_errno(r, "Error polling timerfd: %m");
+ if (!FLAGS_SET(r, POLLIN)) /* We woke up before the alarm time, we are done. */
+ return 0;
+
+ tfd = safe_close(tfd);
+
+ /* If woken up after alarm time, hibernate */
+ log_debug("Attempting to hibernate after waking from %s timer",
+ format_timespan(buf, sizeof(buf), sleep_config->hibernate_delay_sec, USEC_PER_SEC));
+
+ r = execute(sleep_config->hibernate_modes, sleep_config->hibernate_states);
+ if (r < 0) {
+ log_notice_errno(r, "Couldn't hibernate, will try to suspend again: %m");
+
+ r = execute(sleep_config->suspend_modes, sleep_config->suspend_states);
+ if (r < 0)
+ return log_error_errno(r, "Could neither hibernate nor suspend, giving up: %m");
+ }
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-suspend.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s COMMAND\n\n"
+ "Suspend the system, hibernate the system, or both.\n\n"
+ " -h --help Show this help and exit\n"
+ " --version Print version string and exit\n"
+ "\nCommands:\n"
+ " suspend Suspend the system\n"
+ " hibernate Hibernate the system\n"
+ " hybrid-sleep Both hibernate and suspend the system\n"
+ " suspend-then-hibernate Initially suspend and then hibernate\n"
+ " the system after a fixed period of time\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_VERSION = 0x100,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+ switch(c) {
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (argc - optind != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Usage: %s COMMAND",
+ program_invocation_short_name);
+
+ arg_verb = strdup(argv[optind]);
+ if (!arg_verb)
+ return log_oom();
+
+ if (!STR_IN_SET(arg_verb, "suspend", "hibernate", "hybrid-sleep", "suspend-then-hibernate"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown command '%s'.", arg_verb);
+
+ return 1 /* work to do */;
+}
+
+static int run(int argc, char *argv[]) {
+ bool allow;
+ char **modes = NULL, **states = NULL;
+ _cleanup_(free_sleep_configp) SleepConfig *sleep_config = NULL;
+ int r;
+
+ log_setup_service();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = parse_sleep_config(&sleep_config);
+ if (r < 0)
+ return r;
+
+ r = sleep_settings(arg_verb, sleep_config, &allow, &modes, &states);
+ if (r < 0)
+ return r;
+
+ if (!allow)
+ return log_error_errno(SYNTHETIC_ERRNO(EACCES),
+ "Sleep mode \"%s\" is disabled by configuration, refusing.",
+ arg_verb);
+
+ if (streq(arg_verb, "suspend-then-hibernate"))
+ return execute_s2h(sleep_config);
+ else
+ return execute(modes, states);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/sleep/sleep.conf b/src/sleep/sleep.conf
new file mode 100644
index 0000000..dc2ed37
--- /dev/null
+++ b/src/sleep/sleep.conf
@@ -0,0 +1,25 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See systemd-sleep.conf(5) for details
+
+[Sleep]
+#AllowSuspend=yes
+#AllowHibernation=yes
+#AllowSuspendThenHibernate=yes
+#AllowHybridSleep=yes
+#SuspendMode=
+#SuspendState=mem standby freeze
+#HibernateMode=platform shutdown
+#HibernateState=disk
+#HybridSleepMode=suspend platform shutdown
+#HybridSleepState=disk
+#HibernateDelaySec=180min
diff --git a/src/socket-proxy/socket-proxyd.c b/src/socket-proxy/socket-proxyd.c
new file mode 100644
index 0000000..4391d9f
--- /dev/null
+++ b/src/socket-proxy/socket-proxyd.c
@@ -0,0 +1,722 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+#include "sd-event.h"
+#include "sd-resolve.h"
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "resolve-private.h"
+#include "set.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "util.h"
+
+#define BUFFER_SIZE (256 * 1024)
+
+static unsigned arg_connections_max = 256;
+static const char *arg_remote_host = NULL;
+static usec_t arg_exit_idle_time = USEC_INFINITY;
+
+typedef struct Context {
+ sd_event *event;
+ sd_resolve *resolve;
+ sd_event_source *idle_time;
+
+ Set *listen;
+ Set *connections;
+} Context;
+
+typedef struct Connection {
+ Context *context;
+
+ int server_fd, client_fd;
+ int server_to_client_buffer[2]; /* a pipe */
+ int client_to_server_buffer[2]; /* a pipe */
+
+ size_t server_to_client_buffer_full, client_to_server_buffer_full;
+ size_t server_to_client_buffer_size, client_to_server_buffer_size;
+
+ sd_event_source *server_event_source, *client_event_source;
+
+ sd_resolve_query *resolve_query;
+} Connection;
+
+static void connection_free(Connection *c) {
+ assert(c);
+
+ if (c->context)
+ set_remove(c->context->connections, c);
+
+ sd_event_source_unref(c->server_event_source);
+ sd_event_source_unref(c->client_event_source);
+
+ safe_close(c->server_fd);
+ safe_close(c->client_fd);
+
+ safe_close_pair(c->server_to_client_buffer);
+ safe_close_pair(c->client_to_server_buffer);
+
+ sd_resolve_query_unref(c->resolve_query);
+
+ free(c);
+}
+
+static int idle_time_cb(sd_event_source *s, uint64_t usec, void *userdata) {
+ Context *c = userdata;
+ int r;
+
+ if (!set_isempty(c->connections)) {
+ log_warning("Idle timer fired even though there are connections, ignoring");
+ return 0;
+ }
+
+ r = sd_event_exit(c->event, 0);
+ if (r < 0) {
+ log_warning_errno(r, "Error while stopping event loop, ignoring: %m");
+ return 0;
+ }
+ return 0;
+}
+
+static int connection_release(Connection *c) {
+ Context *context = c->context;
+ int r;
+
+ connection_free(c);
+
+ if (arg_exit_idle_time < USEC_INFINITY && set_isempty(context->connections)) {
+ if (context->idle_time) {
+ r = sd_event_source_set_time_relative(context->idle_time, arg_exit_idle_time);
+ if (r < 0)
+ return log_error_errno(r, "Error while setting idle time: %m");
+
+ r = sd_event_source_set_enabled(context->idle_time, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_error_errno(r, "Error while enabling idle time: %m");
+ } else {
+ r = sd_event_add_time_relative(
+ context->event, &context->idle_time, CLOCK_MONOTONIC,
+ arg_exit_idle_time, 0, idle_time_cb, context);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create idle timer: %m");
+ }
+ }
+
+ return 0;
+}
+
+static void context_clear(Context *context) {
+ assert(context);
+
+ set_free_with_destructor(context->listen, sd_event_source_unref);
+ set_free_with_destructor(context->connections, connection_free);
+
+ sd_event_unref(context->event);
+ sd_resolve_unref(context->resolve);
+ sd_event_source_unref(context->idle_time);
+}
+
+static int connection_create_pipes(Connection *c, int buffer[static 2], size_t *sz) {
+ int r;
+
+ assert(c);
+ assert(buffer);
+ assert(sz);
+
+ if (buffer[0] >= 0)
+ return 0;
+
+ r = pipe2(buffer, O_CLOEXEC|O_NONBLOCK);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to allocate pipe buffer: %m");
+
+ (void) fcntl(buffer[0], F_SETPIPE_SZ, BUFFER_SIZE);
+
+ r = fcntl(buffer[0], F_GETPIPE_SZ);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to get pipe buffer size: %m");
+
+ assert(r > 0);
+ *sz = r;
+
+ return 0;
+}
+
+static int connection_shovel(
+ Connection *c,
+ int *from, int buffer[2], int *to,
+ size_t *full, size_t *sz,
+ sd_event_source **from_source, sd_event_source **to_source) {
+
+ bool shoveled;
+
+ assert(c);
+ assert(from);
+ assert(buffer);
+ assert(buffer[0] >= 0);
+ assert(buffer[1] >= 0);
+ assert(to);
+ assert(full);
+ assert(sz);
+ assert(from_source);
+ assert(to_source);
+
+ do {
+ ssize_t z;
+
+ shoveled = false;
+
+ if (*full < *sz && *from >= 0 && *to >= 0) {
+ z = splice(*from, NULL, buffer[1], NULL, *sz - *full, SPLICE_F_MOVE|SPLICE_F_NONBLOCK);
+ if (z > 0) {
+ *full += z;
+ shoveled = true;
+ } else if (z == 0 || ERRNO_IS_DISCONNECT(errno)) {
+ *from_source = sd_event_source_unref(*from_source);
+ *from = safe_close(*from);
+ } else if (!IN_SET(errno, EAGAIN, EINTR))
+ return log_error_errno(errno, "Failed to splice: %m");
+ }
+
+ if (*full > 0 && *to >= 0) {
+ z = splice(buffer[0], NULL, *to, NULL, *full, SPLICE_F_MOVE|SPLICE_F_NONBLOCK);
+ if (z > 0) {
+ *full -= z;
+ shoveled = true;
+ } else if (z == 0 || ERRNO_IS_DISCONNECT(errno)) {
+ *to_source = sd_event_source_unref(*to_source);
+ *to = safe_close(*to);
+ } else if (!IN_SET(errno, EAGAIN, EINTR))
+ return log_error_errno(errno, "Failed to splice: %m");
+ }
+ } while (shoveled);
+
+ return 0;
+}
+
+static int connection_enable_event_sources(Connection *c);
+
+static int traffic_cb(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Connection *c = userdata;
+ int r;
+
+ assert(s);
+ assert(fd >= 0);
+ assert(c);
+
+ r = connection_shovel(c,
+ &c->server_fd, c->server_to_client_buffer, &c->client_fd,
+ &c->server_to_client_buffer_full, &c->server_to_client_buffer_size,
+ &c->server_event_source, &c->client_event_source);
+ if (r < 0)
+ goto quit;
+
+ r = connection_shovel(c,
+ &c->client_fd, c->client_to_server_buffer, &c->server_fd,
+ &c->client_to_server_buffer_full, &c->client_to_server_buffer_size,
+ &c->client_event_source, &c->server_event_source);
+ if (r < 0)
+ goto quit;
+
+ /* EOF on both sides? */
+ if (c->server_fd == -1 && c->client_fd == -1)
+ goto quit;
+
+ /* Server closed, and all data written to client? */
+ if (c->server_fd == -1 && c->server_to_client_buffer_full <= 0)
+ goto quit;
+
+ /* Client closed, and all data written to server? */
+ if (c->client_fd == -1 && c->client_to_server_buffer_full <= 0)
+ goto quit;
+
+ r = connection_enable_event_sources(c);
+ if (r < 0)
+ goto quit;
+
+ return 1;
+
+quit:
+ connection_release(c);
+ return 0; /* ignore errors, continue serving */
+}
+
+static int connection_enable_event_sources(Connection *c) {
+ uint32_t a = 0, b = 0;
+ int r;
+
+ assert(c);
+
+ if (c->server_to_client_buffer_full > 0)
+ b |= EPOLLOUT;
+ if (c->server_to_client_buffer_full < c->server_to_client_buffer_size)
+ a |= EPOLLIN;
+
+ if (c->client_to_server_buffer_full > 0)
+ a |= EPOLLOUT;
+ if (c->client_to_server_buffer_full < c->client_to_server_buffer_size)
+ b |= EPOLLIN;
+
+ if (c->server_event_source)
+ r = sd_event_source_set_io_events(c->server_event_source, a);
+ else if (c->server_fd >= 0)
+ r = sd_event_add_io(c->context->event, &c->server_event_source, c->server_fd, a, traffic_cb, c);
+ else
+ r = 0;
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up server event source: %m");
+
+ if (c->client_event_source)
+ r = sd_event_source_set_io_events(c->client_event_source, b);
+ else if (c->client_fd >= 0)
+ r = sd_event_add_io(c->context->event, &c->client_event_source, c->client_fd, b, traffic_cb, c);
+ else
+ r = 0;
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up client event source: %m");
+
+ return 0;
+}
+
+static int connection_complete(Connection *c) {
+ int r;
+
+ assert(c);
+
+ r = connection_create_pipes(c, c->server_to_client_buffer, &c->server_to_client_buffer_size);
+ if (r < 0)
+ goto fail;
+
+ r = connection_create_pipes(c, c->client_to_server_buffer, &c->client_to_server_buffer_size);
+ if (r < 0)
+ goto fail;
+
+ r = connection_enable_event_sources(c);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ connection_release(c);
+ return 0; /* ignore errors, continue serving */
+}
+
+static int connect_cb(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Connection *c = userdata;
+ socklen_t solen;
+ int error, r;
+
+ assert(s);
+ assert(fd >= 0);
+ assert(c);
+
+ solen = sizeof(error);
+ r = getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &solen);
+ if (r < 0) {
+ log_error_errno(errno, "Failed to issue SO_ERROR: %m");
+ goto fail;
+ }
+
+ if (error != 0) {
+ log_error_errno(error, "Failed to connect to remote host: %m");
+ goto fail;
+ }
+
+ c->client_event_source = sd_event_source_unref(c->client_event_source);
+
+ return connection_complete(c);
+
+fail:
+ connection_release(c);
+ return 0; /* ignore errors, continue serving */
+}
+
+static int connection_start(Connection *c, struct sockaddr *sa, socklen_t salen) {
+ int r;
+
+ assert(c);
+ assert(sa);
+ assert(salen);
+
+ c->client_fd = socket(sa->sa_family, SOCK_STREAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0);
+ if (c->client_fd < 0) {
+ log_error_errno(errno, "Failed to get remote socket: %m");
+ goto fail;
+ }
+
+ r = connect(c->client_fd, sa, salen);
+ if (r < 0) {
+ if (errno == EINPROGRESS) {
+ r = sd_event_add_io(c->context->event, &c->client_event_source, c->client_fd, EPOLLOUT, connect_cb, c);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add connection socket: %m");
+ goto fail;
+ }
+
+ r = sd_event_source_set_enabled(c->client_event_source, SD_EVENT_ONESHOT);
+ if (r < 0) {
+ log_error_errno(r, "Failed to enable oneshot event source: %m");
+ goto fail;
+ }
+ } else {
+ log_error_errno(errno, "Failed to connect to remote host: %m");
+ goto fail;
+ }
+ } else {
+ r = connection_complete(c);
+ if (r < 0)
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ connection_release(c);
+ return 0; /* ignore errors, continue serving */
+}
+
+static int resolve_handler(sd_resolve_query *q, int ret, const struct addrinfo *ai, Connection *c) {
+ assert(q);
+ assert(c);
+
+ if (ret != 0) {
+ log_error("Failed to resolve host: %s", gai_strerror(ret));
+ goto fail;
+ }
+
+ c->resolve_query = sd_resolve_query_unref(c->resolve_query);
+
+ return connection_start(c, ai->ai_addr, ai->ai_addrlen);
+
+fail:
+ connection_release(c);
+ return 0; /* ignore errors, continue serving */
+}
+
+static int resolve_remote(Connection *c) {
+
+ static const struct addrinfo hints = {
+ .ai_family = AF_UNSPEC,
+ .ai_socktype = SOCK_STREAM,
+ };
+
+ const char *node, *service;
+ int r;
+
+ if (IN_SET(arg_remote_host[0], '/', '@')) {
+ union sockaddr_union sa;
+ int sa_len;
+
+ r = sockaddr_un_set_path(&sa.un, arg_remote_host);
+ if (r < 0) {
+ log_error_errno(r, "Specified address doesn't fit in an AF_UNIX address, refusing: %m");
+ goto fail;
+ }
+ sa_len = r;
+
+ return connection_start(c, &sa.sa, sa_len);
+ }
+
+ service = strrchr(arg_remote_host, ':');
+ if (service) {
+ node = strndupa(arg_remote_host, service - arg_remote_host);
+ service++;
+ } else {
+ node = arg_remote_host;
+ service = "80";
+ }
+
+ log_debug("Looking up address info for %s:%s", node, service);
+ r = resolve_getaddrinfo(c->context->resolve, &c->resolve_query, node, service, &hints, resolve_handler, NULL, c);
+ if (r < 0) {
+ log_error_errno(r, "Failed to resolve remote host: %m");
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ connection_release(c);
+ return 0; /* ignore errors, continue serving */
+}
+
+static int add_connection_socket(Context *context, int fd) {
+ Connection *c;
+ int r;
+
+ assert(context);
+ assert(fd >= 0);
+
+ if (set_size(context->connections) > arg_connections_max) {
+ log_warning("Hit connection limit, refusing connection.");
+ safe_close(fd);
+ return 0;
+ }
+
+ if (context->idle_time) {
+ r = sd_event_source_set_enabled(context->idle_time, SD_EVENT_OFF);
+ if (r < 0)
+ log_warning_errno(r, "Unable to disable idle timer, continuing: %m");
+ }
+
+ c = new(Connection, 1);
+ if (!c) {
+ log_oom();
+ return 0;
+ }
+
+ *c = (Connection) {
+ .context = context,
+ .server_fd = fd,
+ .client_fd = -1,
+ .server_to_client_buffer = {-1, -1},
+ .client_to_server_buffer = {-1, -1},
+ };
+
+ r = set_ensure_put(&context->connections, NULL, c);
+ if (r < 0) {
+ free(c);
+ log_oom();
+ return 0;
+ }
+
+ return resolve_remote(c);
+}
+
+static int accept_cb(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_free_ char *peer = NULL;
+ Context *context = userdata;
+ int nfd = -1, r;
+
+ assert(s);
+ assert(fd >= 0);
+ assert(revents & EPOLLIN);
+ assert(context);
+
+ nfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (nfd < 0) {
+ if (!ERRNO_IS_ACCEPT_AGAIN(errno))
+ log_warning_errno(errno, "Failed to accept() socket: %m");
+ } else {
+ (void) getpeername_pretty(nfd, true, &peer);
+ log_debug("New connection from %s", strna(peer));
+
+ r = add_connection_socket(context, nfd);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to accept connection, ignoring: %m");
+ safe_close(nfd);
+ }
+ }
+
+ r = sd_event_source_set_enabled(s, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_error_errno(r, "Error while re-enabling listener with ONESHOT: %m");
+
+ return 1;
+}
+
+static int add_listen_socket(Context *context, int fd) {
+ sd_event_source *source;
+ int r;
+
+ assert(context);
+ assert(fd >= 0);
+
+ r = sd_is_socket(fd, 0, SOCK_STREAM, 1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine socket type: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Passed in socket is not a stream socket.");
+
+ r = fd_nonblock(fd, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mark file descriptor non-blocking: %m");
+
+ r = sd_event_add_io(context->event, &source, fd, EPOLLIN, accept_cb, context);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add event source: %m");
+
+ r = set_ensure_put(&context->listen, NULL, source);
+ if (r < 0) {
+ sd_event_source_unref(source);
+ return log_error_errno(r, "Failed to add source to set: %m");
+ }
+
+ r = sd_event_source_set_exit_on_failure(source, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable exit-on-failure logic: %m");
+
+ /* Set the watcher to oneshot in case other processes are also
+ * watching to accept(). */
+ r = sd_event_source_set_enabled(source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable oneshot mode: %m");
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ _cleanup_free_ char *time_link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-socket-proxyd", "8", &link);
+ if (r < 0)
+ return log_oom();
+ r = terminal_urlify_man("systemd.time", "7", &time_link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%1$s [HOST:PORT]\n"
+ "%1$s [SOCKET]\n\n"
+ "Bidirectionally proxy local sockets to another (possibly remote) socket.\n\n"
+ " -c --connections-max= Set the maximum number of connections to be accepted\n"
+ " --exit-idle-time= Exit when without a connection for this duration. See\n"
+ " the %3$s for time span format\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ "\nSee the %2$s for details.\n"
+ , program_invocation_short_name
+ , link
+ , time_link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_EXIT_IDLE,
+ ARG_IGNORE_ENV
+ };
+
+ static const struct option options[] = {
+ { "connections-max", required_argument, NULL, 'c' },
+ { "exit-idle-time", required_argument, NULL, ARG_EXIT_IDLE },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "c:h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'c':
+ r = safe_atou(optarg, &arg_connections_max);
+ if (r < 0) {
+ log_error("Failed to parse --connections-max= argument: %s", optarg);
+ return r;
+ }
+
+ if (arg_connections_max < 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Connection limit is too low.");
+
+ break;
+
+ case ARG_EXIT_IDLE:
+ r = parse_sec(optarg, &arg_exit_idle_time);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --exit-idle-time= argument: %s", optarg);
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind >= argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not enough parameters.");
+
+ if (argc != optind+1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many parameters.");
+
+ arg_remote_host = argv[optind];
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(context_clear) Context context = {};
+ int r, n, fd;
+
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = sd_event_default(&context.event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ r = sd_resolve_default(&context.resolve);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate resolver: %m");
+
+ r = sd_resolve_attach_event(context.resolve, context.event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach resolver: %m");
+
+ sd_event_set_watchdog(context.event, true);
+
+ r = sd_listen_fds(1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to receive sockets from parent.");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Didn't get any sockets passed in.");
+
+ n = r;
+
+ for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
+ r = add_listen_socket(&context, fd);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_loop(context.event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/stdio-bridge/stdio-bridge.c b/src/stdio-bridge/stdio-bridge.c
new file mode 100644
index 0000000..81d5071
--- /dev/null
+++ b/src/stdio-bridge/stdio-bridge.c
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <poll.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "build.h"
+#include "bus-internal.h"
+#include "bus-util.h"
+#include "errno-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "util.h"
+
+#define DEFAULT_BUS_PATH "unix:path=/run/dbus/system_bus_socket"
+
+static const char *arg_bus_path = DEFAULT_BUS_PATH;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+
+static int help(void) {
+
+ printf("%s [OPTIONS...]\n\n"
+ "STDIO or socket-activatable proxy to a given DBus endpoint.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " -p --bus-path=PATH Path to the kernel bus (default: %s)\n"
+ " -M --machine=MACHINE Name of machine to connect to\n",
+ program_invocation_short_name, DEFAULT_BUS_PATH);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_MACHINE,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "bus-path", required_argument, NULL, 'p' },
+ { "machine", required_argument, NULL, 'M' },
+ {},
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hp:M:", options, NULL)) >= 0) {
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'p':
+ arg_bus_path = optarg;
+ break;
+
+ case 'M':
+ arg_bus_path = optarg;
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown option code %c", c);
+ }
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *a = NULL, *b = NULL;
+ sd_id128_t server_id;
+ bool is_unix;
+ int r, in_fd, out_fd;
+
+ log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = sd_listen_fds(0);
+ if (r == 0) {
+ in_fd = STDIN_FILENO;
+ out_fd = STDOUT_FILENO;
+ } else if (r == 1) {
+ in_fd = SD_LISTEN_FDS_START;
+ out_fd = SD_LISTEN_FDS_START;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Illegal number of file descriptors passed.");
+
+ is_unix =
+ sd_is_socket(in_fd, AF_UNIX, 0, 0) > 0 &&
+ sd_is_socket(out_fd, AF_UNIX, 0, 0) > 0;
+
+ r = sd_bus_new(&a);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate bus: %m");
+
+ if (arg_transport == BUS_TRANSPORT_MACHINE)
+ r = bus_set_address_system_machine(a, arg_bus_path);
+ else
+ r = sd_bus_set_address(a, arg_bus_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set address to connect to: %m");
+
+ r = sd_bus_negotiate_fds(a, is_unix);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set FD negotiation: %m");
+
+ r = sd_bus_start(a);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start bus client: %m");
+
+ r = sd_bus_get_bus_id(a, &server_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get server ID: %m");
+
+ r = sd_bus_new(&b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate bus: %m");
+
+ r = sd_bus_set_fd(b, in_fd, out_fd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set fds: %m");
+
+ r = sd_bus_set_server(b, 1, server_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set server mode: %m");
+
+ r = sd_bus_negotiate_fds(b, is_unix);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set FD negotiation: %m");
+
+ r = sd_bus_set_anonymous(b, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set anonymous authentication: %m");
+
+ r = sd_bus_start(b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start bus client: %m");
+
+ for (;;) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int events_a, events_b, fd;
+ uint64_t timeout_a, timeout_b, t;
+ struct timespec _ts, *ts;
+
+ r = sd_bus_process(a, &m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to process bus a: %m");
+
+ if (m) {
+ r = sd_bus_send(b, m, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send message: %m");
+ }
+
+ if (r > 0)
+ continue;
+
+ r = sd_bus_process(b, &m);
+ if (r < 0) {
+ /* treat 'connection reset by peer' as clean exit condition */
+ if (ERRNO_IS_DISCONNECT(r))
+ return 0;
+
+ return log_error_errno(r, "Failed to process bus: %m");
+ }
+
+ if (m) {
+ r = sd_bus_send(a, m, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send message: %m");
+ }
+
+ if (r > 0)
+ continue;
+
+ fd = sd_bus_get_fd(a);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to get fd: %m");
+
+ events_a = sd_bus_get_events(a);
+ if (events_a < 0)
+ return log_error_errno(events_a, "Failed to get events mask: %m");
+
+ r = sd_bus_get_timeout(a, &timeout_a);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get timeout: %m");
+
+ events_b = sd_bus_get_events(b);
+ if (events_b < 0)
+ return log_error_errno(events_b, "Failed to get events mask: %m");
+
+ r = sd_bus_get_timeout(b, &timeout_b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get timeout: %m");
+
+ t = timeout_a;
+ if (t == (uint64_t) -1 || (timeout_b != (uint64_t) -1 && timeout_b < timeout_a))
+ t = timeout_b;
+
+ if (t == (uint64_t) -1)
+ ts = NULL;
+ else {
+ usec_t nw;
+
+ nw = now(CLOCK_MONOTONIC);
+ if (t > nw)
+ t -= nw;
+ else
+ t = 0;
+
+ ts = timespec_store(&_ts, t);
+ }
+
+ struct pollfd p[3] = {
+ { .fd = fd, .events = events_a },
+ { .fd = STDIN_FILENO, .events = events_b & POLLIN },
+ { .fd = STDOUT_FILENO, .events = events_b & POLLOUT },
+ };
+
+ r = ppoll(p, ELEMENTSOF(p), ts, NULL);
+ if (r < 0)
+ return log_error_errno(errno, "ppoll() failed: %m");
+ if (p[0].revents & POLLNVAL ||
+ p[1].revents & POLLNVAL ||
+ p[2].revents & POLLNVAL)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADF), "Invalid file descriptor to poll on?");
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/sulogin-shell/sulogin-shell.c b/src/sulogin-shell/sulogin-shell.c
new file mode 100644
index 0000000..b0d71ff
--- /dev/null
+++ b/src/sulogin-shell/sulogin-shell.c
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2017 Felipe Sateler
+***/
+
+#include <errno.h>
+#include <sys/prctl.h>
+
+#include "sd-bus.h"
+
+#include "bus-util.h"
+#include "bus-error.h"
+#include "def.h"
+#include "env-util.h"
+#include "log.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "special.h"
+
+static int reload_manager(sd_bus *bus) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ log_info("Reloading system manager configuration");
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "Reload");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Note we use an extra-long timeout here. This is because a reload or reexec means generators are rerun which
+ * are timed out after DEFAULT_TIMEOUT_USEC. Let's use twice that time here, so that the generators can have
+ * their timeout, and for everything else there's the same time budget in place. */
+
+ r = sd_bus_call(bus, m, DEFAULT_TIMEOUT_USEC * 2, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to reload daemon: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int start_default_target(sd_bus *bus) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ log_info("Starting default target");
+
+ /* Start these units only if we can replace base.target with it */
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "StartUnit",
+ &error,
+ NULL,
+ "ss", SPECIAL_DEFAULT_TARGET, "isolate");
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to start default target: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int fork_wait(const char* const cmdline[]) {
+ pid_t pid;
+ int r;
+
+ r = safe_fork("(sulogin)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Child */
+ execv(cmdline[0], (char**) cmdline);
+ log_error_errno(errno, "Failed to execute %s: %m", cmdline[0]);
+ _exit(EXIT_FAILURE); /* Operational error */
+ }
+
+ return wait_for_terminate_and_check(cmdline[0], pid, WAIT_LOG_ABNORMAL);
+}
+
+static void print_mode(const char* mode) {
+ printf("You are in %s mode. After logging in, type \"journalctl -xb\" to view\n"
+ "system logs, \"systemctl reboot\" to reboot, \"systemctl default\" or \"exit\"\n"
+ "to boot into default mode.\n", mode);
+ fflush(stdout);
+}
+
+int main(int argc, char *argv[]) {
+ const char* sulogin_cmdline[] = {
+ SULOGIN,
+ NULL, /* --force */
+ NULL
+ };
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ log_setup_service();
+
+ print_mode(argc > 1 ? argv[1] : "");
+
+ if (getenv_bool("SYSTEMD_SULOGIN_FORCE") > 0)
+ /* allows passwordless logins if root account is locked. */
+ sulogin_cmdline[1] = "--force";
+
+ (void) fork_wait(sulogin_cmdline);
+
+ r = bus_connect_system_systemd(&bus);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to get D-Bus connection: %m");
+ r = 0;
+ } else {
+ (void) reload_manager(bus);
+
+ r = start_default_target(bus);
+ }
+
+ return r >= 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/src/sysctl/sysctl.c b/src/sysctl/sysctl.c
new file mode 100644
index 0000000..e263d45
--- /dev/null
+++ b/src/sysctl/sysctl.c
@@ -0,0 +1,432 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "conf-files.h"
+#include "def.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "glob-util.h"
+#include "hashmap.h"
+#include "log.h"
+#include "main-func.h"
+#include "pager.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "strv.h"
+#include "sysctl-util.h"
+
+static char **arg_prefixes = NULL;
+static bool arg_cat_config = false;
+static PagerFlags arg_pager_flags = 0;
+
+STATIC_DESTRUCTOR_REGISTER(arg_prefixes, strv_freep);
+
+typedef struct Option {
+ char *key;
+ char *value;
+ bool ignore_failure;
+} Option;
+
+static Option *option_free(Option *o) {
+ if (!o)
+ return NULL;
+
+ free(o->key);
+ free(o->value);
+
+ return mfree(o);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Option*, option_free);
+DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(option_hash_ops, char, string_hash_func, string_compare_func, Option, option_free);
+
+static bool test_prefix(const char *p) {
+ char **i;
+
+ if (strv_isempty(arg_prefixes))
+ return true;
+
+ STRV_FOREACH(i, arg_prefixes) {
+ const char *t;
+
+ t = path_startswith(*i, "/proc/sys/");
+ if (!t)
+ t = *i;
+
+ if (path_startswith(p, t))
+ return true;
+ }
+
+ return false;
+}
+
+static Option *option_new(
+ const char *key,
+ const char *value,
+ bool ignore_failure) {
+
+ _cleanup_(option_freep) Option *o = NULL;
+
+ assert(key);
+
+ o = new(Option, 1);
+ if (!o)
+ return NULL;
+
+ *o = (Option) {
+ .key = strdup(key),
+ .value = value ? strdup(value) : NULL,
+ .ignore_failure = ignore_failure,
+ };
+
+ if (!o->key)
+ return NULL;
+ if (value && !o->value)
+ return NULL;
+
+ return TAKE_PTR(o);
+}
+
+static int sysctl_write_or_warn(const char *key, const char *value, bool ignore_failure) {
+ int r;
+
+ r = sysctl_write(key, value);
+ if (r < 0) {
+ /* If the sysctl is not available in the kernel or we are running with reduced privileges and
+ * cannot write it, then log about the issue, and proceed without failing. (EROFS is treated
+ * as a permission problem here, since that's how container managers usually protected their
+ * sysctls.) In all other cases log an error and make the tool fail. */
+ if (ignore_failure || r == -EROFS || ERRNO_IS_PRIVILEGE(r))
+ log_debug_errno(r, "Couldn't write '%s' to '%s', ignoring: %m", value, key);
+ else if (r == -ENOENT)
+ log_info_errno(r, "Couldn't write '%s' to '%s', ignoring: %m", value, key);
+ else
+ return log_error_errno(r, "Couldn't write '%s' to '%s': %m", value, key);
+ }
+
+ return 0;
+}
+
+static int apply_all(OrderedHashmap *sysctl_options) {
+ Option *option;
+ int r = 0;
+
+ ORDERED_HASHMAP_FOREACH(option, sysctl_options) {
+ int k;
+
+ /* Ignore "negative match" options, they are there only to exclude stuff from globs. */
+ if (!option->value)
+ continue;
+
+ if (string_is_glob(option->key)) {
+ _cleanup_strv_free_ char **paths = NULL;
+ _cleanup_free_ char *pattern = NULL;
+ char **s;
+
+ pattern = path_join("/proc/sys", option->key);
+ if (!pattern)
+ return log_oom();
+
+ k = glob_extend(&paths, pattern, GLOB_NOCHECK);
+ if (k < 0) {
+ if (option->ignore_failure || ERRNO_IS_PRIVILEGE(k))
+ log_debug_errno(k, "Failed to resolve glob '%s', ignoring: %m",
+ option->key);
+ else {
+ log_error_errno(k, "Couldn't resolve glob '%s': %m",
+ option->key);
+ if (r == 0)
+ r = k;
+ }
+
+ } else if (strv_isempty(paths))
+ log_debug("No match for glob: %s", option->key);
+
+ STRV_FOREACH(s, paths) {
+ const char *key;
+
+ assert_se(key = path_startswith(*s, "/proc/sys"));
+
+ if (!test_prefix(key))
+ continue;
+
+ if (ordered_hashmap_contains(sysctl_options, key)) {
+ log_info("Not setting %s (explicit setting exists).", key);
+ continue;
+ }
+
+ k = sysctl_write_or_warn(key, option->value, option->ignore_failure);
+ if (r == 0)
+ r = k;
+ }
+
+ } else {
+ k = sysctl_write_or_warn(option->key, option->value, option->ignore_failure);
+ if (r == 0)
+ r = k;
+ }
+ }
+
+ return r;
+}
+
+static int parse_file(OrderedHashmap **sysctl_options, const char *path, bool ignore_enoent) {
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned c = 0;
+ int r;
+
+ assert(path);
+
+ r = search_and_fopen(path, "re", NULL, (const char**) CONF_PATHS_STRV("sysctl.d"), &f);
+ if (r < 0) {
+ if (ignore_enoent && r == -ENOENT)
+ return 0;
+
+ return log_error_errno(r, "Failed to open file '%s', ignoring: %m", path);
+ }
+
+ log_debug("Parsing %s", path);
+ for (;;) {
+ _cleanup_(option_freep) Option *new_option = NULL;
+ _cleanup_free_ char *l = NULL;
+ bool ignore_failure = false;
+ Option *existing;
+ char *p, *value;
+ int k;
+
+ k = read_line(f, LONG_LINE_MAX, &l);
+ if (k == 0)
+ break;
+ if (k < 0)
+ return log_error_errno(k, "Failed to read file '%s', ignoring: %m", path);
+
+ c++;
+
+ p = strstrip(l);
+
+ if (isempty(p))
+ continue;
+ if (strchr(COMMENTS "\n", *p))
+ continue;
+
+ value = strchr(p, '=');
+ if (value) {
+ if (p[0] == '-') {
+ ignore_failure = true;
+ p++;
+ }
+
+ *value = 0;
+ value++;
+ value = strstrip(value);
+
+ } else {
+ if (p[0] == '-')
+ /* We have a "negative match" option. Let's continue with value==NULL. */
+ p++;
+ else {
+ log_syntax(NULL, LOG_WARNING, path, c, 0,
+ "Line is not an assignment, ignoring: %s", p);
+ if (r == 0)
+ r = -EINVAL;
+ continue;
+ }
+ }
+
+ p = strstrip(p);
+ p = sysctl_normalize(p);
+
+ /* We can't filter out globs at this point, we'll need to do that later. */
+ if (!string_is_glob(p) &&
+ !test_prefix(p))
+ continue;
+
+ if (ordered_hashmap_ensure_allocated(sysctl_options, &option_hash_ops) < 0)
+ return log_oom();
+
+ existing = ordered_hashmap_get(*sysctl_options, p);
+ if (existing) {
+ if (streq_ptr(value, existing->value)) {
+ existing->ignore_failure = existing->ignore_failure || ignore_failure;
+ continue;
+ }
+
+ log_debug("Overwriting earlier assignment of %s at '%s:%u'.", p, path, c);
+ option_free(ordered_hashmap_remove(*sysctl_options, p));
+ }
+
+ new_option = option_new(p, value, ignore_failure);
+ if (!new_option)
+ return log_oom();
+
+ k = ordered_hashmap_put(*sysctl_options, new_option->key, new_option);
+ if (k < 0)
+ return log_error_errno(k, "Failed to add sysctl variable %s to hashmap: %m", p);
+
+ TAKE_PTR(new_option);
+ }
+
+ return r;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-sysctl.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [CONFIGURATION FILE...]\n\n"
+ "Applies kernel sysctl settings.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --cat-config Show configuration files\n"
+ " --prefix=PATH Only apply rules with the specified prefix\n"
+ " --no-pager Do not pipe output into a pager\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_CAT_CONFIG,
+ ARG_PREFIX,
+ ARG_NO_PAGER,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "cat-config", no_argument, NULL, ARG_CAT_CONFIG },
+ { "prefix", required_argument, NULL, ARG_PREFIX },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_CAT_CONFIG:
+ arg_cat_config = true;
+ break;
+
+ case ARG_PREFIX: {
+ char *p;
+
+ /* We used to require people to specify absolute paths
+ * in /proc/sys in the past. This is kinda useless, but
+ * we need to keep compatibility. We now support any
+ * sysctl name available. */
+ sysctl_normalize(optarg);
+
+ if (path_startswith(optarg, "/proc/sys"))
+ p = strdup(optarg);
+ else
+ p = path_join("/proc/sys", optarg);
+ if (!p)
+ return log_oom();
+
+ if (strv_consume(&arg_prefixes, p) < 0)
+ return log_oom();
+
+ break;
+ }
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_cat_config && argc > optind)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Positional arguments are not allowed with --cat-config");
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(ordered_hashmap_freep) OrderedHashmap *sysctl_options = NULL;
+ int r, k;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ log_setup_service();
+
+ umask(0022);
+
+ if (argc > optind) {
+ int i;
+
+ r = 0;
+
+ for (i = optind; i < argc; i++) {
+ k = parse_file(&sysctl_options, argv[i], false);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+ } else {
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+
+ r = conf_files_list_strv(&files, ".conf", NULL, 0, (const char**) CONF_PATHS_STRV("sysctl.d"));
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate sysctl.d files: %m");
+
+ if (arg_cat_config) {
+ (void) pager_open(arg_pager_flags);
+
+ return cat_files(NULL, files, 0);
+ }
+
+ STRV_FOREACH(f, files) {
+ k = parse_file(&sysctl_options, *f, true);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+ }
+
+ k = apply_all(sysctl_options);
+ if (k < 0 && r == 0)
+ r = k;
+
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/system-update-generator/system-update-generator.c b/src/system-update-generator/system-update-generator.c
new file mode 100644
index 0000000..26874cf
--- /dev/null
+++ b/src/system-update-generator/system-update-generator.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "fs-util.h"
+#include "generator.h"
+#include "log.h"
+#include "proc-cmdline.h"
+#include "special.h"
+#include "string-util.h"
+#include "unit-file.h"
+#include "util.h"
+
+/*
+ * Implements the logic described in systemd.offline-updates(7).
+ */
+
+static const char *arg_dest = NULL;
+
+static int generate_symlink(void) {
+ const char *p = NULL;
+
+ if (laccess("/system-update", F_OK) < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ log_error_errno(errno, "Failed to check for system update: %m");
+ return -EINVAL;
+ }
+
+ p = strjoina(arg_dest, "/" SPECIAL_DEFAULT_TARGET);
+ if (symlink(SYSTEM_DATA_UNIT_PATH "/system-update.target", p) < 0)
+ return log_error_errno(errno, "Failed to create symlink %s: %m", p);
+
+ return 1;
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ assert(key);
+
+ /* Check if a run level is specified on the kernel command line. The
+ * command line has higher priority than any on-disk configuration, so
+ * it'll make any symlink we create moot.
+ */
+
+ if (streq(key, "systemd.unit") && !proc_cmdline_value_missing(key, value))
+ log_warning("Offline system update overridden by kernel command line systemd.unit= setting");
+ else if (!value && runlevel_to_target(key))
+ log_warning("Offline system update overridden by runlevel \"%s\" on the kernel command line", key);
+
+ return 0;
+}
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ int r;
+
+ assert_se(arg_dest = dest_early);
+
+ r = generate_symlink();
+ if (r <= 0)
+ return r;
+
+ /* We parse the command line only to emit warnings. */
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/systemctl/systemctl-add-dependency.c b/src/systemctl/systemctl-add-dependency.c
new file mode 100644
index 0000000..2a0290d
--- /dev/null
+++ b/src/systemctl/systemctl-add-dependency.c
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "systemctl-add-dependency.h"
+#include "systemctl-daemon-reload.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+
+int add_dependency(int argc, char *argv[], void *userdata) {
+ _cleanup_strv_free_ char **names = NULL;
+ _cleanup_free_ char *target = NULL;
+ const char *verb = argv[0];
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ UnitDependency dep;
+ int r;
+
+ if (!argv[1])
+ return 0;
+
+ r = unit_name_mangle_with_suffix(argv[1], "as target",
+ arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN,
+ ".target", &target);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle unit name: %m");
+
+ r = mangle_names("as dependency", strv_skip(argv, 2), &names);
+ if (r < 0)
+ return r;
+
+ if (streq(verb, "add-wants"))
+ dep = UNIT_WANTS;
+ else if (streq(verb, "add-requires"))
+ dep = UNIT_REQUIRES;
+ else
+ assert_not_reached("Unknown verb");
+
+ if (install_client_side()) {
+ r = unit_file_add_dependency(arg_scope, unit_file_flags_from_args(), arg_root, names, target, dep, &changes, &n_changes);
+ unit_file_dump_changes(r, "add dependency on", changes, n_changes, arg_quiet);
+
+ if (r > 0)
+ r = 0;
+ } else {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL, *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, "AddDependencyUnitFiles");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, names);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "ssbb", target, unit_dependency_to_string(dep), arg_runtime, arg_force);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add dependency: %s", bus_error_message(&error, r));
+
+ r = bus_deserialize_and_dump_unit_file_changes(reply, arg_quiet, &changes, &n_changes);
+ if (r < 0)
+ goto finish;
+
+ if (arg_no_reload) {
+ r = 0;
+ goto finish;
+ }
+
+ r = daemon_reload(argc, argv, userdata);
+ }
+
+finish:
+ unit_file_changes_free(changes, n_changes);
+
+ return r;
+}
diff --git a/src/systemctl/systemctl-add-dependency.h b/src/systemctl/systemctl-add-dependency.h
new file mode 100644
index 0000000..deb0da4
--- /dev/null
+++ b/src/systemctl/systemctl-add-dependency.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int add_dependency(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-cancel-job.c b/src/systemctl/systemctl-cancel-job.c
new file mode 100644
index 0000000..4c5203c
--- /dev/null
+++ b/src/systemctl/systemctl-cancel-job.c
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "parse-util.h"
+#include "systemctl-cancel-job.h"
+#include "systemctl-trivial-method.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+
+int cancel_job(int argc, char *argv[], void *userdata) {
+ sd_bus *bus;
+ char **name;
+ int r;
+
+ if (argc <= 1) /* Shortcut to trivial_method() if no argument is given */
+ return trivial_method(argc, argv, userdata);
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ STRV_FOREACH(name, strv_skip(argv, 1)) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ uint32_t id;
+ int q;
+
+ q = safe_atou32(*name, &id);
+ if (q < 0)
+ return log_error_errno(q, "Failed to parse job id \"%s\": %m", *name);
+
+ q = bus_call_method(bus, bus_systemd_mgr, "CancelJob", &error, NULL, "u", id);
+ if (q < 0) {
+ log_error_errno(q, "Failed to cancel job %"PRIu32": %s", id, bus_error_message(&error, q));
+ if (r == 0)
+ r = q;
+ }
+ }
+
+ return r;
+}
diff --git a/src/systemctl/systemctl-cancel-job.h b/src/systemctl/systemctl-cancel-job.h
new file mode 100644
index 0000000..75151d6
--- /dev/null
+++ b/src/systemctl/systemctl-cancel-job.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int cancel_job(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-clean-or-freeze.c b/src/systemctl/systemctl-clean-or-freeze.c
new file mode 100644
index 0000000..eca3a6d
--- /dev/null
+++ b/src/systemctl/systemctl-clean-or-freeze.c
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "bus-wait-for-units.h"
+#include "systemctl-clean-or-freeze.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+
+int clean_or_freeze_unit(int argc, char *argv[], void *userdata) {
+ _cleanup_(bus_wait_for_units_freep) BusWaitForUnits *w = NULL;
+ _cleanup_strv_free_ char **names = NULL;
+ int r, ret = EXIT_SUCCESS;
+ char **name;
+ const char *method;
+ sd_bus *bus;
+
+ r = acquire_bus(BUS_FULL, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ if (!arg_clean_what) {
+ arg_clean_what = strv_new("cache", "runtime");
+ if (!arg_clean_what)
+ return log_oom();
+ }
+
+ r = expand_unit_names(bus, strv_skip(argv, 1), NULL, &names, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to expand names: %m");
+
+ if (!arg_no_block) {
+ r = bus_wait_for_units_new(bus, &w);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate unit waiter: %m");
+ }
+
+ if (streq(argv[0], "clean"))
+ method = "CleanUnit";
+ else if (streq(argv[0], "freeze"))
+ method = "FreezeUnit";
+ else if (streq(argv[0], "thaw"))
+ method = "ThawUnit";
+ else
+ assert_not_reached("Unhandled method");
+
+ STRV_FOREACH(name, names) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ if (w) {
+ /* If we shall wait for the cleaning to complete, let's add a ref on the unit first */
+ r = bus_call_method(bus, bus_systemd_mgr, "RefUnit", &error, NULL, "s", *name);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add reference to unit %s: %s", *name, bus_error_message(&error, r));
+ if (ret == EXIT_SUCCESS)
+ ret = r;
+ continue;
+ }
+ }
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, method);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", *name);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (streq(method, "CleanUnit")) {
+ r = sd_bus_message_append_strv(m, arg_clean_what);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_call(bus, m, 0, &error, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to %s unit %s: %s", argv[0], *name, bus_error_message(&error, r));
+ if (ret == EXIT_SUCCESS) {
+ ret = r;
+ continue;
+ }
+ }
+
+ if (w) {
+ r = bus_wait_for_units_add_unit(w, *name, BUS_WAIT_REFFED|BUS_WAIT_FOR_MAINTENANCE_END, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch unit %s: %m", *name);
+ }
+ }
+
+ r = bus_wait_for_units_run(w);
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait for units: %m");
+ if (r == BUS_WAIT_FAILURE)
+ ret = EXIT_FAILURE;
+
+ return ret;
+}
diff --git a/src/systemctl/systemctl-clean-or-freeze.h b/src/systemctl/systemctl-clean-or-freeze.h
new file mode 100644
index 0000000..8e73f4e
--- /dev/null
+++ b/src/systemctl/systemctl-clean-or-freeze.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int clean_or_freeze_unit(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-compat-halt.c b/src/systemctl/systemctl-compat-halt.c
new file mode 100644
index 0000000..8e41bd6
--- /dev/null
+++ b/src/systemctl/systemctl-compat-halt.c
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "reboot-util.h"
+#include "systemctl-compat-halt.h"
+#include "systemctl-compat-telinit.h"
+#include "systemctl-logind.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+#include "terminal-util.h"
+#include "utmp-wtmp.h"
+
+static int halt_help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("halt", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]%s\n"
+ "\n%s%s the system.%s\n"
+ "\nOptions:\n"
+ " --help Show this help\n"
+ " --halt Halt the machine\n"
+ " -p --poweroff Switch off the machine\n"
+ " --reboot Reboot the machine\n"
+ " -f --force Force immediate halt/power-off/reboot\n"
+ " -w --wtmp-only Don't halt/power-off/reboot, just write wtmp record\n"
+ " -d --no-wtmp Don't write wtmp record\n"
+ " --no-wall Don't send wall message before halt/power-off/reboot\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , arg_action == ACTION_REBOOT ? " [ARG]" : ""
+ , ansi_highlight()
+ , arg_action == ACTION_REBOOT ? "Reboot" :
+ arg_action == ACTION_POWEROFF ? "Power off" :
+ "Halt"
+ , ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+int halt_parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_HELP = 0x100,
+ ARG_HALT,
+ ARG_REBOOT,
+ ARG_NO_WALL
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, ARG_HELP },
+ { "halt", no_argument, NULL, ARG_HALT },
+ { "poweroff", no_argument, NULL, 'p' },
+ { "reboot", no_argument, NULL, ARG_REBOOT },
+ { "force", no_argument, NULL, 'f' },
+ { "wtmp-only", no_argument, NULL, 'w' },
+ { "no-wtmp", no_argument, NULL, 'd' },
+ { "no-sync", no_argument, NULL, 'n' },
+ { "no-wall", no_argument, NULL, ARG_NO_WALL },
+ {}
+ };
+
+ int c, r, runlevel;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ if (utmp_get_runlevel(&runlevel, NULL) >= 0)
+ if (IN_SET(runlevel, '0', '6'))
+ arg_force = 2;
+
+ while ((c = getopt_long(argc, argv, "pfwdnih", options, NULL)) >= 0)
+ switch (c) {
+
+ case ARG_HELP:
+ return halt_help();
+
+ case ARG_HALT:
+ arg_action = ACTION_HALT;
+ break;
+
+ case 'p':
+ if (arg_action != ACTION_REBOOT)
+ arg_action = ACTION_POWEROFF;
+ break;
+
+ case ARG_REBOOT:
+ arg_action = ACTION_REBOOT;
+ break;
+
+ case 'f':
+ arg_force = 2;
+ break;
+
+ case 'w':
+ arg_dry_run = true;
+ break;
+
+ case 'd':
+ arg_no_wtmp = true;
+ break;
+
+ case 'n':
+ arg_no_sync = true;
+ break;
+
+ case ARG_NO_WALL:
+ arg_no_wall = true;
+ break;
+
+ case 'i':
+ case 'h':
+ /* Compatibility nops */
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_action == ACTION_REBOOT && (argc == optind || argc == optind + 1)) {
+ r = update_reboot_parameter_and_warn(argc == optind + 1 ? argv[optind] : NULL, false);
+ if (r < 0)
+ return r;
+ } else if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many arguments.");
+
+ return 1;
+}
+
+int halt_main(void) {
+ int r;
+
+ r = logind_check_inhibitors(arg_action);
+ if (r < 0)
+ return r;
+
+ /* Delayed shutdown requested, and was successful */
+ if (arg_when > 0 && logind_schedule_shutdown() == 0)
+ return 0;
+
+ /* No delay, or logind failed or is not at all available */
+ if (geteuid() != 0) {
+ if (arg_dry_run || arg_force > 0) {
+ (void) must_be_root();
+ return -EPERM;
+ }
+
+ /* Try logind if we are a normal user and no special mode applies. Maybe polkit allows us to
+ * shutdown the machine. */
+ if (IN_SET(arg_action, ACTION_POWEROFF, ACTION_REBOOT, ACTION_HALT)) {
+ r = logind_reboot(arg_action);
+ if (r >= 0)
+ return r;
+ if (IN_SET(r, -EOPNOTSUPP, -EINPROGRESS))
+ /* Requested operation is not supported on the local system or already in
+ * progress */
+ return r;
+
+ /* on all other errors, try low-level operation */
+ }
+ }
+
+ /* In order to minimize the difference between operation with and without logind, we explicitly
+ * enable non-blocking mode for this, as logind's shutdown operations are always non-blocking. */
+ arg_no_block = true;
+
+ if (!arg_dry_run && !arg_force)
+ return start_with_fallback();
+
+ assert(geteuid() == 0);
+
+ if (!arg_no_wtmp) {
+ if (sd_booted() > 0)
+ log_debug("Not writing utmp record, assuming that systemd-update-utmp is used.");
+ else {
+ r = utmp_put_shutdown();
+ if (r < 0)
+ log_warning_errno(r, "Failed to write utmp record: %m");
+ }
+ }
+
+ if (arg_dry_run)
+ return 0;
+
+ r = halt_now(arg_action);
+ return log_error_errno(r, "Failed to reboot: %m");
+}
diff --git a/src/systemctl/systemctl-compat-halt.h b/src/systemctl/systemctl-compat-halt.h
new file mode 100644
index 0000000..85b9dda
--- /dev/null
+++ b/src/systemctl/systemctl-compat-halt.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int halt_parse_argv(int argc, char *argv[]);
+
+int halt_main(void);
diff --git a/src/systemctl/systemctl-compat-runlevel.c b/src/systemctl/systemctl-compat-runlevel.c
new file mode 100644
index 0000000..e05b1b4
--- /dev/null
+++ b/src/systemctl/systemctl-compat-runlevel.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+
+#include "alloc-util.h"
+#include "pretty-print.h"
+#include "systemctl-compat-runlevel.h"
+#include "systemctl.h"
+#include "terminal-util.h"
+#include "utmp-wtmp.h"
+
+static int runlevel_help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("runlevel", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n"
+ "\n%sPrints the previous and current runlevel of the init system.%s\n"
+ "\nOptions:\n"
+ " --help Show this help\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight(), ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+int runlevel_parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_HELP = 0x100,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, ARG_HELP },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "", options, NULL)) >= 0)
+ switch (c) {
+
+ case ARG_HELP:
+ return runlevel_help();
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many arguments.");
+
+ return 1;
+}
+
+int runlevel_main(void) {
+ int r, runlevel, previous;
+
+ r = utmp_get_runlevel(&runlevel, &previous);
+ if (r < 0) {
+ puts("unknown");
+ return r;
+ }
+
+ printf("%c %c\n",
+ previous <= 0 ? 'N' : previous,
+ runlevel <= 0 ? 'N' : runlevel);
+
+ return 0;
+}
diff --git a/src/systemctl/systemctl-compat-runlevel.h b/src/systemctl/systemctl-compat-runlevel.h
new file mode 100644
index 0000000..658524b
--- /dev/null
+++ b/src/systemctl/systemctl-compat-runlevel.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int runlevel_parse_argv(int argc, char *argv[]);
+
+int runlevel_main(void);
diff --git a/src/systemctl/systemctl-compat-shutdown.c b/src/systemctl/systemctl-compat-shutdown.c
new file mode 100644
index 0000000..0c7d18b
--- /dev/null
+++ b/src/systemctl/systemctl-compat-shutdown.c
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+
+#include "alloc-util.h"
+#include "pretty-print.h"
+#include "systemctl-compat-shutdown.h"
+#include "systemctl-sysv-compat.h"
+#include "systemctl.h"
+#include "terminal-util.h"
+
+static int shutdown_help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("shutdown", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [TIME] [WALL...]\n"
+ "\n%sShut down the system.%s\n"
+ "\nOptions:\n"
+ " --help Show this help\n"
+ " -H --halt Halt the machine\n"
+ " -P --poweroff Power-off the machine\n"
+ " -r --reboot Reboot the machine\n"
+ " -h Equivalent to --poweroff, overridden by --halt\n"
+ " -k Don't halt/power-off/reboot, just send warnings\n"
+ " --no-wall Don't send wall message before halt/power-off/reboot\n"
+ " -c Cancel a pending shutdown\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight(), ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+int shutdown_parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_HELP = 0x100,
+ ARG_NO_WALL
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, ARG_HELP },
+ { "halt", no_argument, NULL, 'H' },
+ { "poweroff", no_argument, NULL, 'P' },
+ { "reboot", no_argument, NULL, 'r' },
+ { "kexec", no_argument, NULL, 'K' }, /* not documented extension */
+ { "no-wall", no_argument, NULL, ARG_NO_WALL },
+ {}
+ };
+
+ char **wall = NULL;
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "HPrhkKat:fFc", options, NULL)) >= 0)
+ switch (c) {
+
+ case ARG_HELP:
+ return shutdown_help();
+
+ case 'H':
+ arg_action = ACTION_HALT;
+ break;
+
+ case 'P':
+ arg_action = ACTION_POWEROFF;
+ break;
+
+ case 'r':
+ if (kexec_loaded())
+ arg_action = ACTION_KEXEC;
+ else
+ arg_action = ACTION_REBOOT;
+ break;
+
+ case 'K':
+ arg_action = ACTION_KEXEC;
+ break;
+
+ case 'h':
+ if (arg_action != ACTION_HALT)
+ arg_action = ACTION_POWEROFF;
+ break;
+
+ case 'k':
+ arg_dry_run = true;
+ break;
+
+ case ARG_NO_WALL:
+ arg_no_wall = true;
+ break;
+
+ case 'a':
+ case 't': /* Note that we also ignore any passed argument to -t, not just the -t itself */
+ case 'f':
+ case 'F':
+ /* Compatibility nops */
+ break;
+
+ case 'c':
+ arg_action = ACTION_CANCEL_SHUTDOWN;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (argc > optind && arg_action != ACTION_CANCEL_SHUTDOWN) {
+ r = parse_shutdown_time_spec(argv[optind], &arg_when);
+ if (r < 0) {
+ log_error("Failed to parse time specification: %s", argv[optind]);
+ return r;
+ }
+ } else
+ arg_when = now(CLOCK_REALTIME) + USEC_PER_MINUTE;
+
+ if (argc > optind && arg_action == ACTION_CANCEL_SHUTDOWN)
+ /* No time argument for shutdown cancel */
+ wall = argv + optind;
+ else if (argc > optind + 1)
+ /* We skip the time argument */
+ wall = argv + optind + 1;
+
+ if (wall) {
+ arg_wall = strv_copy(wall);
+ if (!arg_wall)
+ return log_oom();
+ }
+
+ optind = argc;
+
+ return 1;
+}
diff --git a/src/systemctl/systemctl-compat-shutdown.h b/src/systemctl/systemctl-compat-shutdown.h
new file mode 100644
index 0000000..7acf941
--- /dev/null
+++ b/src/systemctl/systemctl-compat-shutdown.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int shutdown_parse_argv(int argc, char *argv[]);
diff --git a/src/systemctl/systemctl-compat-telinit.c b/src/systemctl/systemctl-compat-telinit.c
new file mode 100644
index 0000000..148574d
--- /dev/null
+++ b/src/systemctl/systemctl-compat-telinit.c
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+
+#include "alloc-util.h"
+#include "pretty-print.h"
+#include "systemctl-compat-telinit.h"
+#include "systemctl-daemon-reload.h"
+#include "systemctl-start-unit.h"
+#include "systemctl-sysv-compat.h"
+#include "systemctl.h"
+#include "terminal-util.h"
+
+static int telinit_help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("telinit", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND\n\n"
+ "%sSend control commands to the init daemon.%s\n"
+ "\nCommands:\n"
+ " 0 Power-off the machine\n"
+ " 6 Reboot the machine\n"
+ " 2, 3, 4, 5 Start runlevelX.target unit\n"
+ " 1, s, S Enter rescue mode\n"
+ " q, Q Reload init daemon configuration\n"
+ " u, U Reexecute init daemon\n"
+ "\nOptions:\n"
+ " --help Show this help\n"
+ " --no-wall Don't send wall message before halt/power-off/reboot\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight(), ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+int telinit_parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_HELP = 0x100,
+ ARG_NO_WALL
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, ARG_HELP },
+ { "no-wall", no_argument, NULL, ARG_NO_WALL },
+ {}
+ };
+
+ static const struct {
+ char from;
+ enum action to;
+ } table[] = {
+ { '0', ACTION_POWEROFF },
+ { '6', ACTION_REBOOT },
+ { '1', ACTION_RESCUE },
+ { '2', ACTION_RUNLEVEL2 },
+ { '3', ACTION_RUNLEVEL3 },
+ { '4', ACTION_RUNLEVEL4 },
+ { '5', ACTION_RUNLEVEL5 },
+ { 's', ACTION_RESCUE },
+ { 'S', ACTION_RESCUE },
+ { 'q', ACTION_RELOAD },
+ { 'Q', ACTION_RELOAD },
+ { 'u', ACTION_REEXEC },
+ { 'U', ACTION_REEXEC }
+ };
+
+ unsigned i;
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "", options, NULL)) >= 0)
+ switch (c) {
+
+ case ARG_HELP:
+ return telinit_help();
+
+ case ARG_NO_WALL:
+ arg_no_wall = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind >= argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: required argument missing.",
+ program_invocation_short_name);
+
+ if (optind + 1 < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many arguments.");
+
+ if (strlen(argv[optind]) != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Expected single character argument.");
+
+ for (i = 0; i < ELEMENTSOF(table); i++)
+ if (table[i].from == argv[optind][0])
+ break;
+
+ if (i >= ELEMENTSOF(table))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown command '%s'.", argv[optind]);
+
+ arg_action = table[i].to;
+
+ optind++;
+
+ return 1;
+}
+
+int start_with_fallback(void) {
+ /* First, try systemd via D-Bus. */
+ if (start_unit(0, NULL, NULL) == 0)
+ return 0;
+
+#if HAVE_SYSV_COMPAT
+ /* Nothing else worked, so let's try /dev/initctl */
+ if (talk_initctl(action_to_runlevel()) > 0)
+ return 0;
+#endif
+
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to talk to init daemon.");
+}
+
+int reload_with_fallback(void) {
+ /* First, try systemd via D-Bus. */
+ if (daemon_reload(0, NULL, NULL) >= 0)
+ return 0;
+
+ /* Nothing else worked, so let's try signals */
+ assert(IN_SET(arg_action, ACTION_RELOAD, ACTION_REEXEC));
+
+ if (kill(1, arg_action == ACTION_RELOAD ? SIGHUP : SIGTERM) < 0)
+ return log_error_errno(errno, "kill() failed: %m");
+
+ return 0;
+}
diff --git a/src/systemctl/systemctl-compat-telinit.h b/src/systemctl/systemctl-compat-telinit.h
new file mode 100644
index 0000000..1a2bcd4
--- /dev/null
+++ b/src/systemctl/systemctl-compat-telinit.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int telinit_parse_argv(int argc, char *argv[]);
+int start_with_fallback(void);
+int reload_with_fallback(void);
diff --git a/src/systemctl/systemctl-daemon-reload.c b/src/systemctl/systemctl-daemon-reload.c
new file mode 100644
index 0000000..03ba908
--- /dev/null
+++ b/src/systemctl/systemctl-daemon-reload.c
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "systemctl-daemon-reload.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+
+int daemon_reload(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ const char *method;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ switch (arg_action) {
+
+ case ACTION_RELOAD:
+ method = "Reload";
+ break;
+
+ case ACTION_REEXEC:
+ method = "Reexecute";
+ break;
+
+ case ACTION_SYSTEMCTL:
+ method = streq(argv[0], "daemon-reexec") ? "Reexecute" :
+ /* "daemon-reload" */ "Reload";
+ break;
+
+ default:
+ assert_not_reached("Unexpected action");
+ }
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, method);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Note we use an extra-long timeout here. This is because a reload or reexec means generators are
+ * rerun which are timed out after DEFAULT_TIMEOUT_USEC. Let's use twice that time here, so that the
+ * generators can have their timeout, and for everything else there's the same time budget in
+ * place. */
+
+ r = sd_bus_call(bus, m, DEFAULT_TIMEOUT_USEC * 2, &error, NULL);
+
+ /* On reexecution, we expect a disconnect, not a reply */
+ if (IN_SET(r, -ETIMEDOUT, -ECONNRESET) && streq(method, "Reexecute"))
+ r = 0;
+
+ if (r < 0 && arg_action == ACTION_SYSTEMCTL)
+ return log_error_errno(r, "Failed to reload daemon: %s", bus_error_message(&error, r));
+
+ /* Note that for the legacy commands (i.e. those with action != ACTION_SYSTEMCTL) we support
+ * fallbacks to the old ways of doing things, hence don't log any error in that case here. */
+
+ return r < 0 ? r : 0;
+}
diff --git a/src/systemctl/systemctl-daemon-reload.h b/src/systemctl/systemctl-daemon-reload.h
new file mode 100644
index 0000000..a9fc007
--- /dev/null
+++ b/src/systemctl/systemctl-daemon-reload.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int daemon_reload(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-edit.c b/src/systemctl/systemctl-edit.c
new file mode 100644
index 0000000..4186ec3
--- /dev/null
+++ b/src/systemctl/systemctl-edit.c
@@ -0,0 +1,588 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "copy.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "mkdir.h"
+#include "pager.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "stat-util.h"
+#include "systemctl-daemon-reload.h"
+#include "systemctl-edit.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+
+#define EDIT_MARKER_START "### Anything between here and the comment below will become the new contents of the file"
+#define EDIT_MARKER_END "### Lines below this comment will be discarded"
+
+int cat(int argc, char *argv[], void *userdata) {
+ _cleanup_(hashmap_freep) Hashmap *cached_name_map = NULL, *cached_id_map = NULL;
+ _cleanup_(lookup_paths_free) LookupPaths lp = {};
+ _cleanup_strv_free_ char **names = NULL;
+ char **name;
+ sd_bus *bus;
+ bool first = true;
+ int r, rc = 0;
+
+ /* Include all units by default — i.e. continue as if the --all option was used */
+ if (strv_isempty(arg_states))
+ arg_all = true;
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot remotely cat units.");
+
+ r = lookup_paths_init(&lp, arg_scope, 0, arg_root);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine unit paths: %m");
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = expand_unit_names(bus, strv_skip(argv, 1), NULL, &names, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to expand names: %m");
+
+ r = maybe_extend_with_unit_dependencies(bus, &names);
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ STRV_FOREACH(name, names) {
+ _cleanup_free_ char *fragment_path = NULL;
+ _cleanup_strv_free_ char **dropin_paths = NULL;
+
+ r = unit_find_paths(bus, *name, &lp, false, &cached_name_map, &cached_id_map, &fragment_path, &dropin_paths);
+ if (r == -ERFKILL) {
+ printf("%s# Unit %s is masked%s.\n",
+ ansi_highlight_magenta(),
+ *name,
+ ansi_normal());
+ continue;
+ }
+ if (r == -EKEYREJECTED) {
+ printf("%s# Unit %s could not be loaded.%s\n",
+ ansi_highlight_magenta(),
+ *name,
+ ansi_normal());
+ continue;
+ }
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Skip units which have no on-disk counterpart, but propagate the error to the
+ * user */
+ rc = -ENOENT;
+ continue;
+ }
+
+ if (first)
+ first = false;
+ else
+ puts("");
+
+ if (need_daemon_reload(bus, *name) > 0) /* ignore errors (<0), this is informational output */
+ fprintf(stderr,
+ "%s# Warning: %s changed on disk, the version systemd has loaded is outdated.\n"
+ "%s# This output shows the current version of the unit's original fragment and drop-in files.\n"
+ "%s# If fragments or drop-ins were added or removed, they are not properly reflected in this output.\n"
+ "%s# Run 'systemctl%s daemon-reload' to reload units.%s\n",
+ ansi_highlight_red(),
+ *name,
+ ansi_highlight_red(),
+ ansi_highlight_red(),
+ ansi_highlight_red(),
+ arg_scope == UNIT_FILE_SYSTEM ? "" : " --user",
+ ansi_normal());
+
+ r = cat_files(fragment_path, dropin_paths, 0);
+ if (r < 0)
+ return r;
+ }
+
+ return rc;
+}
+
+static int create_edit_temp_file(const char *new_path, const char *original_path, char ** const original_unit_paths, char **ret_tmp_fn) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(new_path);
+ assert(ret_tmp_fn);
+
+ r = tempfn_random(new_path, NULL, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine temporary filename for \"%s\": %m", new_path);
+
+ r = mkdir_parents_label(new_path, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create directories for \"%s\": %m", new_path);
+
+ if (original_path) {
+ r = mac_selinux_create_file_prepare(new_path, S_IFREG);
+ if (r < 0)
+ return r;
+
+ r = copy_file(original_path, t, 0, 0644, 0, 0, COPY_REFLINK);
+ if (r == -ENOENT) {
+ r = touch(t);
+ mac_selinux_create_file_clear();
+ if (r < 0)
+ return log_error_errno(r, "Failed to create temporary file \"%s\": %m", t);
+ } else {
+ mac_selinux_create_file_clear();
+ if (r < 0)
+ return log_error_errno(r, "Failed to create temporary file for \"%s\": %m", new_path);
+ }
+ } else if (original_unit_paths) {
+ _cleanup_free_ char *new_contents = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ char **path;
+ size_t size;
+
+ r = mac_selinux_create_file_prepare(new_path, S_IFREG);
+ if (r < 0)
+ return r;
+
+ f = fopen(t, "we");
+ mac_selinux_create_file_clear();
+ if (!f)
+ return log_error_errno(errno, "Failed to open \"%s\": %m", t);
+
+ r = fchmod(fileno(f), 0644);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to change mode of \"%s\": %m", t);
+
+ r = read_full_file(new_path, &new_contents, &size);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "Failed to read \"%s\": %m", new_path);
+
+ fprintf(f,
+ "### Editing %s\n"
+ EDIT_MARKER_START
+ "\n\n%s%s\n"
+ EDIT_MARKER_END,
+ new_path,
+ strempty(new_contents),
+ new_contents && endswith(new_contents, "\n") ? "" : "\n");
+
+ /* Add a comment with the contents of the original unit files */
+ STRV_FOREACH(path, original_unit_paths) {
+ _cleanup_free_ char *contents = NULL;
+
+ /* Skip the file that's being edited */
+ if (path_equal(*path, new_path))
+ continue;
+
+ r = read_full_file(*path, &contents, &size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read \"%s\": %m", *path);
+
+ fprintf(f, "\n\n### %s", *path);
+ if (!isempty(contents)) {
+ contents = strreplace(strstrip(contents), "\n", "\n# ");
+ if (!contents)
+ return log_oom();
+ fprintf(f, "\n# %s", contents);
+ }
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create temporary file \"%s\": %m", t);
+ }
+
+ *ret_tmp_fn = TAKE_PTR(t);
+
+ return 0;
+}
+
+static int get_file_to_edit(
+ const LookupPaths *paths,
+ const char *name,
+ char **ret_path) {
+
+ _cleanup_free_ char *path = NULL, *run = NULL;
+
+ assert(name);
+ assert(ret_path);
+
+ path = path_join(paths->persistent_config, name);
+ if (!path)
+ return log_oom();
+
+ if (arg_runtime) {
+ run = path_join(paths->runtime_config, name);
+ if (!run)
+ return log_oom();
+ }
+
+ if (arg_runtime) {
+ if (access(path, F_OK) >= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "Refusing to create \"%s\" because it would be overridden by \"%s\" anyway.",
+ run, path);
+
+ *ret_path = TAKE_PTR(run);
+ } else
+ *ret_path = TAKE_PTR(path);
+
+ return 0;
+}
+
+static int unit_file_create_new(
+ const LookupPaths *paths,
+ const char *unit_name,
+ const char *suffix,
+ char ** const original_unit_paths,
+ char **ret_new_path,
+ char **ret_tmp_path) {
+
+ _cleanup_free_ char *new_path = NULL, *tmp_path = NULL;
+ const char *ending;
+ int r;
+
+ assert(unit_name);
+ assert(ret_new_path);
+ assert(ret_tmp_path);
+
+ ending = strjoina(unit_name, suffix);
+ r = get_file_to_edit(paths, ending, &new_path);
+ if (r < 0)
+ return r;
+
+ r = create_edit_temp_file(new_path, NULL, original_unit_paths, &tmp_path);
+ if (r < 0)
+ return r;
+
+ *ret_new_path = TAKE_PTR(new_path);
+ *ret_tmp_path = TAKE_PTR(tmp_path);
+
+ return 0;
+}
+
+static int unit_file_create_copy(
+ const LookupPaths *paths,
+ const char *unit_name,
+ const char *fragment_path,
+ char **ret_new_path,
+ char **ret_tmp_path) {
+
+ _cleanup_free_ char *new_path = NULL, *tmp_path = NULL;
+ int r;
+
+ assert(fragment_path);
+ assert(unit_name);
+ assert(ret_new_path);
+ assert(ret_tmp_path);
+
+ r = get_file_to_edit(paths, unit_name, &new_path);
+ if (r < 0)
+ return r;
+
+ if (!path_equal(fragment_path, new_path) && access(new_path, F_OK) >= 0) {
+ char response;
+
+ r = ask_char(&response, "yn", "\"%s\" already exists. Overwrite with \"%s\"? [(y)es, (n)o] ", new_path, fragment_path);
+ if (r < 0)
+ return r;
+ if (response != 'y')
+ return log_warning_errno(SYNTHETIC_ERRNO(EKEYREJECTED), "%s skipped.", unit_name);
+ }
+
+ r = create_edit_temp_file(new_path, fragment_path, NULL, &tmp_path);
+ if (r < 0)
+ return r;
+
+ *ret_new_path = TAKE_PTR(new_path);
+ *ret_tmp_path = TAKE_PTR(tmp_path);
+
+ return 0;
+}
+
+static int run_editor(char **paths) {
+ int r;
+
+ assert(paths);
+
+ r = safe_fork("(editor)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG|FORK_WAIT, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ char **editor_args = NULL, **tmp_path, **original_path;
+ size_t n_editor_args = 0, i = 1, argc;
+ const char **args, *editor, *p;
+
+ argc = strv_length(paths)/2 + 1;
+
+ /* SYSTEMD_EDITOR takes precedence over EDITOR which takes precedence over VISUAL. If
+ * neither SYSTEMD_EDITOR nor EDITOR nor VISUAL are present, we try to execute well known
+ * editors. */
+ editor = getenv("SYSTEMD_EDITOR");
+ if (!editor)
+ editor = getenv("EDITOR");
+ if (!editor)
+ editor = getenv("VISUAL");
+
+ if (!isempty(editor)) {
+ editor_args = strv_split(editor, WHITESPACE);
+ if (!editor_args) {
+ (void) log_oom();
+ _exit(EXIT_FAILURE);
+ }
+ n_editor_args = strv_length(editor_args);
+ argc += n_editor_args - 1;
+ }
+
+ args = newa(const char*, argc + 1);
+
+ if (n_editor_args > 0) {
+ args[0] = editor_args[0];
+ for (; i < n_editor_args; i++)
+ args[i] = editor_args[i];
+ }
+
+ STRV_FOREACH_PAIR(original_path, tmp_path, paths)
+ args[i++] = *tmp_path;
+ args[i] = NULL;
+
+ if (n_editor_args > 0)
+ execvp(args[0], (char* const*) args);
+
+ FOREACH_STRING(p, "editor", "nano", "vim", "vi") {
+ args[0] = p;
+ execvp(p, (char* const*) args);
+ /* We do not fail if the editor doesn't exist because we want to try each one of them
+ * before failing. */
+ if (errno != ENOENT) {
+ log_error_errno(errno, "Failed to execute %s: %m", editor);
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ log_error("Cannot edit unit(s), no editor available. Please set either $SYSTEMD_EDITOR, $EDITOR or $VISUAL.");
+ _exit(EXIT_FAILURE);
+ }
+
+ return 0;
+}
+
+static int find_paths_to_edit(sd_bus *bus, char **names, char ***paths) {
+ _cleanup_(hashmap_freep) Hashmap *cached_name_map = NULL, *cached_id_map = NULL;
+ _cleanup_(lookup_paths_free) LookupPaths lp = {};
+ char **name;
+ int r;
+
+ assert(names);
+ assert(paths);
+
+ r = lookup_paths_init(&lp, arg_scope, 0, arg_root);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(name, names) {
+ _cleanup_free_ char *path = NULL, *new_path = NULL, *tmp_path = NULL, *tmp_name = NULL;
+ _cleanup_strv_free_ char **unit_paths = NULL;
+ const char *unit_name;
+
+ r = unit_find_paths(bus, *name, &lp, false, &cached_name_map, &cached_id_map, &path, &unit_paths);
+ if (r == -EKEYREJECTED) {
+ /* If loading of the unit failed server side complete, then the server won't tell us
+ * the unit file path. In that case, find the file client side. */
+ log_debug_errno(r, "Unit '%s' was not loaded correctly, retrying client-side.", *name);
+ r = unit_find_paths(bus, *name, &lp, true, &cached_name_map, &cached_id_map, &path, &unit_paths);
+ }
+ if (r == -ERFKILL)
+ return log_error_errno(r, "Unit '%s' masked, cannot edit.", *name);
+ if (r < 0)
+ return r;
+
+ if (!path) {
+ if (!arg_force) {
+ log_info("Run 'systemctl edit%s --force --full %s' to create a new unit.",
+ arg_scope == UNIT_FILE_GLOBAL ? " --global" :
+ arg_scope == UNIT_FILE_USER ? " --user" : "",
+ *name);
+ return -ENOENT;
+ }
+
+ /* Create a new unit from scratch */
+ unit_name = *name;
+ r = unit_file_create_new(&lp, unit_name,
+ arg_full ? NULL : ".d/override.conf",
+ NULL, &new_path, &tmp_path);
+ } else {
+ unit_name = basename(path);
+ /* We follow unit aliases, but we need to propagate the instance */
+ if (unit_name_is_valid(*name, UNIT_NAME_INSTANCE) &&
+ unit_name_is_valid(unit_name, UNIT_NAME_TEMPLATE)) {
+ _cleanup_free_ char *instance = NULL;
+
+ r = unit_name_to_instance(*name, &instance);
+ if (r < 0)
+ return r;
+
+ r = unit_name_replace_instance(unit_name, instance, &tmp_name);
+ if (r < 0)
+ return r;
+
+ unit_name = tmp_name;
+ }
+
+ if (arg_full)
+ r = unit_file_create_copy(&lp, unit_name, path, &new_path, &tmp_path);
+ else {
+ r = strv_prepend(&unit_paths, path);
+ if (r < 0)
+ return log_oom();
+
+ r = unit_file_create_new(&lp, unit_name, ".d/override.conf", unit_paths, &new_path, &tmp_path);
+ }
+ }
+ if (r < 0)
+ return r;
+
+ r = strv_push_pair(paths, new_path, tmp_path);
+ if (r < 0)
+ return log_oom();
+
+ new_path = tmp_path = NULL;
+ }
+
+ return 0;
+}
+
+static int trim_edit_markers(const char *path) {
+ _cleanup_free_ char *contents = NULL;
+ char *contents_start = NULL;
+ const char *contents_end = NULL;
+ size_t size;
+ int r;
+
+ /* Trim out the lines between the two markers */
+ r = read_full_file(path, &contents, &size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read temporary file \"%s\": %m", path);
+
+ contents_start = strstr(contents, EDIT_MARKER_START);
+ if (contents_start)
+ contents_start += strlen(EDIT_MARKER_START);
+ else
+ contents_start = contents;
+
+ contents_end = strstr(contents_start, EDIT_MARKER_END);
+ if (contents_end)
+ strshorten(contents_start, contents_end - contents_start);
+
+ contents_start = strstrip(contents_start);
+
+ /* Write new contents if the trimming actually changed anything */
+ if (strlen(contents) != size) {
+ r = write_string_file(path, contents_start, WRITE_STRING_FILE_CREATE | WRITE_STRING_FILE_TRUNCATE | WRITE_STRING_FILE_AVOID_NEWLINE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to modify temporary file \"%s\": %m", path);
+ }
+
+ return 0;
+}
+
+int edit(int argc, char *argv[], void *userdata) {
+ _cleanup_(lookup_paths_free) LookupPaths lp = {};
+ _cleanup_strv_free_ char **names = NULL;
+ _cleanup_strv_free_ char **paths = NULL;
+ char **original, **tmp;
+ sd_bus *bus;
+ int r;
+
+ if (!on_tty())
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot edit units if not on a tty.");
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot edit units remotely.");
+
+ r = lookup_paths_init(&lp, arg_scope, 0, arg_root);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine unit paths: %m");
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return r;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = expand_unit_names(bus, strv_skip(argv, 1), NULL, &names, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to expand names: %m");
+
+ STRV_FOREACH(tmp, names) {
+ r = unit_is_masked(bus, &lp, *tmp);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot edit %s: unit is masked.", *tmp);
+ }
+
+ r = find_paths_to_edit(bus, names, &paths);
+ if (r < 0)
+ return r;
+
+ if (strv_isempty(paths))
+ return -ENOENT;
+
+ r = run_editor(paths);
+ if (r < 0)
+ goto end;
+
+ STRV_FOREACH_PAIR(original, tmp, paths) {
+ /* If the temporary file is empty we ignore it. This allows the user to cancel the
+ * modification. */
+ r = trim_edit_markers(*tmp);
+ if (r < 0)
+ continue;
+
+ if (null_or_empty_path(*tmp)) {
+ log_warning("Editing \"%s\" canceled: temporary file is empty.", *original);
+ continue;
+ }
+
+ r = rename(*tmp, *original);
+ if (r < 0) {
+ r = log_error_errno(errno, "Failed to rename \"%s\" to \"%s\": %m", *tmp, *original);
+ goto end;
+ }
+ }
+
+ r = 0;
+
+ if (!arg_no_reload && !install_client_side())
+ r = daemon_reload(argc, argv, userdata);
+
+end:
+ STRV_FOREACH_PAIR(original, tmp, paths) {
+ (void) unlink(*tmp);
+
+ /* Removing empty dropin dirs */
+ if (!arg_full) {
+ _cleanup_free_ char *dir;
+
+ dir = dirname_malloc(*original);
+ if (!dir)
+ return log_oom();
+
+ /* No need to check if the dir is empty, rmdir does nothing if it is not the case. */
+ (void) rmdir(dir);
+ }
+ }
+
+ return r;
+}
diff --git a/src/systemctl/systemctl-edit.h b/src/systemctl/systemctl-edit.h
new file mode 100644
index 0000000..fe7e4dc
--- /dev/null
+++ b/src/systemctl/systemctl-edit.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int cat(int argc, char *argv[], void *userdata);
+int edit(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-enable.c b/src/systemctl/systemctl-enable.c
new file mode 100644
index 0000000..8f053ca
--- /dev/null
+++ b/src/systemctl/systemctl-enable.c
@@ -0,0 +1,284 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "locale-util.h"
+#include "path-util.h"
+#include "systemctl-daemon-reload.h"
+#include "systemctl-enable.h"
+#include "systemctl-start-unit.h"
+#include "systemctl-sysv-compat.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+
+static int normalize_filenames(char **names) {
+ char **u;
+ int r;
+
+ STRV_FOREACH(u, names)
+ if (!path_is_absolute(*u)) {
+ char* normalized_path;
+
+ if (!isempty(arg_root))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Non-absolute paths are not allowed when --root is used: %s",
+ *u);
+
+ if (!strchr(*u,'/'))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Link argument does contain at least one directory separator: %s",
+ *u);
+
+ r = path_make_absolute_cwd(*u, &normalized_path);
+ if (r < 0)
+ return r;
+
+ free_and_replace(*u, normalized_path);
+ }
+
+ return 0;
+}
+
+static int normalize_names(char **names, bool warn_if_path) {
+ char **u;
+ bool was_path = false;
+
+ STRV_FOREACH(u, names) {
+ int r;
+
+ if (!is_path(*u))
+ continue;
+
+ r = free_and_strdup(u, basename(*u));
+ if (r < 0)
+ return log_error_errno(r, "Failed to normalize unit file path: %m");
+
+ was_path = true;
+ }
+
+ if (warn_if_path && was_path)
+ log_warning("Warning: Can't execute disable on the unit file path. Proceeding with the unit name.");
+
+ return 0;
+}
+
+int enable_unit(int argc, char *argv[], void *userdata) {
+ _cleanup_strv_free_ char **names = NULL;
+ const char *verb = argv[0];
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ int carries_install_info = -1;
+ bool ignore_carries_install_info = arg_quiet;
+ int r;
+
+ if (!argv[1])
+ return 0;
+
+ r = mangle_names("to enable", strv_skip(argv, 1), &names);
+ if (r < 0)
+ return r;
+
+ r = enable_sysv_units(verb, names);
+ if (r < 0)
+ return r;
+
+ /* If the operation was fully executed by the SysV compat, let's finish early */
+ if (strv_isempty(names)) {
+ if (arg_no_reload || install_client_side())
+ return 0;
+ return daemon_reload(argc, argv, userdata);
+ }
+
+ if (streq(verb, "disable")) {
+ r = normalize_names(names, true);
+ if (r < 0)
+ return r;
+ }
+
+ if (streq(verb, "link")) {
+ r = normalize_filenames(names);
+ if (r < 0)
+ return r;
+ }
+
+ if (install_client_side()) {
+ UnitFileFlags flags;
+
+ flags = unit_file_flags_from_args();
+ if (streq(verb, "enable")) {
+ r = unit_file_enable(arg_scope, flags, arg_root, names, &changes, &n_changes);
+ carries_install_info = r;
+ } else if (streq(verb, "disable"))
+ r = unit_file_disable(arg_scope, flags, arg_root, names, &changes, &n_changes);
+ else if (streq(verb, "reenable")) {
+ r = unit_file_reenable(arg_scope, flags, arg_root, names, &changes, &n_changes);
+ carries_install_info = r;
+ } else if (streq(verb, "link"))
+ r = unit_file_link(arg_scope, flags, arg_root, names, &changes, &n_changes);
+ else if (streq(verb, "preset")) {
+ r = unit_file_preset(arg_scope, flags, arg_root, names, arg_preset_mode, &changes, &n_changes);
+ } else if (streq(verb, "mask"))
+ r = unit_file_mask(arg_scope, flags, arg_root, names, &changes, &n_changes);
+ else if (streq(verb, "unmask"))
+ r = unit_file_unmask(arg_scope, flags, arg_root, names, &changes, &n_changes);
+ else if (streq(verb, "revert"))
+ r = unit_file_revert(arg_scope, arg_root, names, &changes, &n_changes);
+ else
+ assert_not_reached("Unknown verb");
+
+ unit_file_dump_changes(r, verb, changes, n_changes, arg_quiet);
+ if (r < 0)
+ goto finish;
+ r = 0;
+ } else {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL, *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ bool expect_carries_install_info = false;
+ bool send_runtime = true, send_force = true, send_preset_mode = false;
+ const char *method;
+ sd_bus *bus;
+
+ if (STR_IN_SET(verb, "mask", "unmask")) {
+ char **name;
+ _cleanup_(lookup_paths_free) LookupPaths lp = {};
+
+ r = lookup_paths_init(&lp, arg_scope, 0, arg_root);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(name, names) {
+ r = unit_exists(&lp, *name);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ log_notice("Unit %s does not exist, proceeding anyway.", *name);
+ }
+ }
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ if (streq(verb, "enable")) {
+ method = "EnableUnitFiles";
+ expect_carries_install_info = true;
+ } else if (streq(verb, "disable")) {
+ method = "DisableUnitFiles";
+ send_force = false;
+ } else if (streq(verb, "reenable")) {
+ method = "ReenableUnitFiles";
+ expect_carries_install_info = true;
+ } else if (streq(verb, "link"))
+ method = "LinkUnitFiles";
+ else if (streq(verb, "preset")) {
+
+ if (arg_preset_mode != UNIT_FILE_PRESET_FULL) {
+ method = "PresetUnitFilesWithMode";
+ send_preset_mode = true;
+ } else
+ method = "PresetUnitFiles";
+
+ expect_carries_install_info = true;
+ ignore_carries_install_info = true;
+ } else if (streq(verb, "mask"))
+ method = "MaskUnitFiles";
+ else if (streq(verb, "unmask")) {
+ method = "UnmaskUnitFiles";
+ send_force = false;
+ } else if (streq(verb, "revert")) {
+ method = "RevertUnitFiles";
+ send_runtime = send_force = false;
+ } else
+ assert_not_reached("Unknown verb");
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, method);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, names);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (send_preset_mode) {
+ r = sd_bus_message_append(m, "s", unit_file_preset_mode_to_string(arg_preset_mode));
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (send_runtime) {
+ r = sd_bus_message_append(m, "b", arg_runtime);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ if (send_force) {
+ r = sd_bus_message_append(m, "b", arg_force);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to %s unit: %s", verb, bus_error_message(&error, r));
+
+ if (expect_carries_install_info) {
+ r = sd_bus_message_read(reply, "b", &carries_install_info);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ r = bus_deserialize_and_dump_unit_file_changes(reply, arg_quiet, &changes, &n_changes);
+ if (r < 0)
+ goto finish;
+
+ /* Try to reload if enabled */
+ if (!arg_no_reload)
+ r = daemon_reload(argc, argv, userdata);
+ else
+ r = 0;
+ }
+
+ if (carries_install_info == 0 && !ignore_carries_install_info)
+ log_notice("The unit files have no installation config (WantedBy=, RequiredBy=, Also=,\n"
+ "Alias= settings in the [Install] section, and DefaultInstance= for template\n"
+ "units). This means they are not meant to be enabled using systemctl.\n"
+ " \n" /* trick: the space is needed so that the line does not get stripped from output */
+ "Possible reasons for having this kind of units are:\n"
+ "%1$s A unit may be statically enabled by being symlinked from another unit's\n"
+ " .wants/ or .requires/ directory.\n"
+ "%1$s A unit's purpose may be to act as a helper for some other unit which has\n"
+ " a requirement dependency on it.\n"
+ "%1$s A unit may be started when needed via activation (socket, path, timer,\n"
+ " D-Bus, udev, scripted systemctl call, ...).\n"
+ "%1$s In case of template units, the unit is meant to be enabled with some\n"
+ " instance name specified.",
+ special_glyph(SPECIAL_GLYPH_BULLET));
+
+ if (arg_now && STR_IN_SET(argv[0], "enable", "disable", "mask")) {
+ sd_bus *bus;
+ size_t len, i;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ goto finish;
+
+ len = strv_length(names);
+ {
+ char *new_args[len + 2];
+
+ new_args[0] = (char*) (streq(argv[0], "enable") ? "start" : "stop");
+ for (i = 0; i < len; i++)
+ new_args[i + 1] = basename(names[i]);
+ new_args[i + 1] = NULL;
+
+ r = start_unit(len + 1, new_args, userdata);
+ }
+ }
+
+finish:
+ unit_file_changes_free(changes, n_changes);
+
+ return r;
+}
diff --git a/src/systemctl/systemctl-enable.h b/src/systemctl/systemctl-enable.h
new file mode 100644
index 0000000..43f60e7
--- /dev/null
+++ b/src/systemctl/systemctl-enable.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int enable_unit(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-is-active.c b/src/systemctl/systemctl-is-active.c
new file mode 100644
index 0000000..3d99b0d
--- /dev/null
+++ b/src/systemctl/systemctl-is-active.c
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "pretty-print.h"
+#include "syslog-util.h"
+#include "systemctl-is-active.h"
+#include "systemctl-sysv-compat.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+
+static int check_unit_generic(int code, const UnitActiveState good_states[], int nb_states, char **args) {
+ _cleanup_strv_free_ char **names = NULL;
+ UnitActiveState active_state;
+ sd_bus *bus;
+ char **name;
+ int r, i;
+ bool found = false;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = expand_unit_names(bus, args, NULL, &names, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to expand names: %m");
+
+ STRV_FOREACH(name, names) {
+ r = get_state_one_unit(bus, *name, &active_state);
+ if (r < 0)
+ return r;
+
+ if (!arg_quiet)
+ puts(unit_active_state_to_string(active_state));
+
+ for (i = 0; i < nb_states; ++i)
+ if (good_states[i] == active_state)
+ found = true;
+ }
+
+ /* use the given return code for the case that we won't find
+ * any unit which matches the list */
+ return found ? 0 : code;
+}
+
+int check_unit_active(int argc, char *argv[], void *userdata) {
+ static const UnitActiveState states[] = {
+ UNIT_ACTIVE,
+ UNIT_RELOADING,
+ };
+
+ /* According to LSB: 3, "program is not running" */
+ return check_unit_generic(EXIT_PROGRAM_NOT_RUNNING, states, ELEMENTSOF(states), strv_skip(argv, 1));
+}
+
+int check_unit_failed(int argc, char *argv[], void *userdata) {
+ static const UnitActiveState states[] = {
+ UNIT_FAILED,
+ };
+
+ return check_unit_generic(EXIT_PROGRAM_DEAD_AND_PID_EXISTS, states, ELEMENTSOF(states), strv_skip(argv, 1));
+}
diff --git a/src/systemctl/systemctl-is-active.h b/src/systemctl/systemctl-is-active.h
new file mode 100644
index 0000000..9a5238e
--- /dev/null
+++ b/src/systemctl/systemctl-is-active.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int check_unit_active(int argc, char *argv[], void *userdata);
+int check_unit_failed(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-is-enabled.c b/src/systemctl/systemctl-is-enabled.c
new file mode 100644
index 0000000..babd590
--- /dev/null
+++ b/src/systemctl/systemctl-is-enabled.c
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "systemctl-is-enabled.h"
+#include "systemctl-sysv-compat.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+
+static int show_installation_targets_client_side(const char *name) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0, i;
+ UnitFileFlags flags;
+ char **p;
+ int r;
+
+ p = STRV_MAKE(name);
+ flags = UNIT_FILE_DRY_RUN |
+ (arg_runtime ? UNIT_FILE_RUNTIME : 0);
+
+ r = unit_file_disable(UNIT_FILE_SYSTEM, flags, NULL, p, &changes, &n_changes);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get file links for %s: %m", name);
+
+ for (i = 0; i < n_changes; i++)
+ if (changes[i].type == UNIT_FILE_UNLINK)
+ printf(" %s\n", changes[i].path);
+
+ return 0;
+}
+
+static int show_installation_targets(sd_bus *bus, const char *name) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *link;
+ int r;
+
+ r = bus_call_method(bus, bus_systemd_mgr, "GetUnitFileLinks", &error, &reply, "sb", name, arg_runtime);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get unit file links for %s: %s", name, bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "s");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(reply, "s", &link)) > 0)
+ printf(" %s\n", link);
+
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 0;
+}
+
+int unit_is_enabled(int argc, char *argv[], void *userdata) {
+ _cleanup_strv_free_ char **names = NULL;
+ bool enabled;
+ char **name;
+ int r;
+
+ r = mangle_names("to check", strv_skip(argv, 1), &names);
+ if (r < 0)
+ return r;
+
+ r = enable_sysv_units(argv[0], names);
+ if (r < 0)
+ return r;
+
+ enabled = r > 0;
+
+ if (install_client_side()) {
+ STRV_FOREACH(name, names) {
+ UnitFileState state;
+
+ r = unit_file_get_state(arg_scope, arg_root, *name, &state);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get unit file state for %s: %m", *name);
+
+ if (IN_SET(state,
+ UNIT_FILE_ENABLED,
+ UNIT_FILE_ENABLED_RUNTIME,
+ UNIT_FILE_STATIC,
+ UNIT_FILE_ALIAS,
+ UNIT_FILE_INDIRECT,
+ UNIT_FILE_GENERATED))
+ enabled = true;
+
+ if (!arg_quiet) {
+ puts(unit_file_state_to_string(state));
+ if (arg_full) {
+ r = show_installation_targets_client_side(*name);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ r = 0;
+ } else {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(name, names) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *s;
+
+ r = bus_call_method(bus, bus_systemd_mgr, "GetUnitFileState", &error, &reply, "s", *name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get unit file state for %s: %s", *name, bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "s", &s);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (STR_IN_SET(s, "enabled", "enabled-runtime", "static", "alias", "indirect", "generated"))
+ enabled = true;
+
+ if (!arg_quiet) {
+ puts(s);
+ if (arg_full) {
+ r = show_installation_targets(bus, *name);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+ }
+
+ return enabled ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/src/systemctl/systemctl-is-enabled.h b/src/systemctl/systemctl-is-enabled.h
new file mode 100644
index 0000000..5cb9e5c
--- /dev/null
+++ b/src/systemctl/systemctl-is-enabled.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int unit_is_enabled(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-is-system-running.c b/src/systemctl/systemctl-is-system-running.c
new file mode 100644
index 0000000..ecebf0d
--- /dev/null
+++ b/src/systemctl/systemctl-is-system-running.c
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-event.h"
+#include "sd-daemon.h"
+
+#include "systemctl-util.h"
+#include "systemctl-is-system-running.h"
+#include "virt.h"
+#include "systemctl.h"
+#include "bus-util.h"
+#include "bus-locator.h"
+#include "bus-error.h"
+
+static int match_startup_finished(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ char **state = userdata;
+ int r;
+
+ assert(state);
+
+ r = bus_get_property_string(sd_bus_message_get_bus(m), bus_systemd_mgr, "SystemState", NULL, state);
+
+ sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), r);
+ return 0;
+}
+
+int is_system_running(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_slot_unrefp) sd_bus_slot *slot_startup_finished = NULL;
+ _cleanup_(sd_event_unrefp) sd_event* event = NULL;
+ _cleanup_free_ char *state = NULL;
+ sd_bus *bus;
+ int r;
+
+ if (running_in_chroot() > 0 || (arg_transport == BUS_TRANSPORT_LOCAL && !sd_booted())) {
+ if (!arg_quiet)
+ puts("offline");
+ return EXIT_FAILURE;
+ }
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ if (arg_wait) {
+ r = sd_event_default(&event);
+ if (r >= 0)
+ r = sd_bus_attach_event(bus, event, 0);
+ if (r >= 0)
+ r = bus_match_signal_async(
+ bus,
+ &slot_startup_finished,
+ bus_systemd_mgr,
+ "StartupFinished",
+ match_startup_finished, NULL, &state);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to request match for StartupFinished: %m");
+ arg_wait = false;
+ }
+ }
+
+ r = bus_get_property_string(bus, bus_systemd_mgr, "SystemState", &error, &state);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to query system state: %s", bus_error_message(&error, r));
+
+ if (!arg_quiet)
+ puts("unknown");
+ return EXIT_FAILURE;
+ }
+
+ if (arg_wait && STR_IN_SET(state, "initializing", "starting")) {
+ r = sd_event_loop(event);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to get property from event loop: %m");
+ if (!arg_quiet)
+ puts("unknown");
+ return EXIT_FAILURE;
+ }
+ }
+
+ if (!arg_quiet)
+ puts(state);
+
+ return streq(state, "running") ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/src/systemctl/systemctl-is-system-running.h b/src/systemctl/systemctl-is-system-running.h
new file mode 100644
index 0000000..3d7e9fb
--- /dev/null
+++ b/src/systemctl/systemctl-is-system-running.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int is_system_running(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-kill.c b/src/systemctl/systemctl-kill.c
new file mode 100644
index 0000000..810aad1
--- /dev/null
+++ b/src/systemctl/systemctl-kill.c
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "systemctl-kill.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+
+int kill_unit(int argc, char *argv[], void *userdata) {
+ _cleanup_strv_free_ char **names = NULL;
+ char *kill_who = NULL, **name;
+ sd_bus *bus;
+ int r, q;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ if (!arg_kill_who)
+ arg_kill_who = "all";
+
+ /* --fail was specified */
+ if (streq(arg_job_mode, "fail"))
+ kill_who = strjoina(arg_kill_who, "-fail");
+
+ r = expand_unit_names(bus, strv_skip(argv, 1), NULL, &names, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to expand names: %m");
+
+ STRV_FOREACH(name, names) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ q = bus_call_method(
+ bus,
+ bus_systemd_mgr,
+ "KillUnit",
+ &error,
+ NULL,
+ "ssi", *name, kill_who ? kill_who : arg_kill_who, arg_signal);
+ if (q < 0) {
+ log_error_errno(q, "Failed to kill unit %s: %s", *name, bus_error_message(&error, q));
+ if (r == 0)
+ r = q;
+ }
+ }
+
+ return r;
+}
diff --git a/src/systemctl/systemctl-kill.h b/src/systemctl/systemctl-kill.h
new file mode 100644
index 0000000..a42d4f1
--- /dev/null
+++ b/src/systemctl/systemctl-kill.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int kill_unit(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-list-dependencies.c b/src/systemctl/systemctl-list-dependencies.c
new file mode 100644
index 0000000..821998e
--- /dev/null
+++ b/src/systemctl/systemctl-list-dependencies.c
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "locale-util.h"
+#include "sort-util.h"
+#include "special.h"
+#include "systemctl-list-dependencies.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+#include "terminal-util.h"
+
+static int list_dependencies_print(const char *name, int level, unsigned branches, bool last) {
+ _cleanup_free_ char *n = NULL;
+ size_t max_len = MAX(columns(),20u);
+ size_t len = 0;
+ int i;
+
+ if (!arg_plain) {
+
+ for (i = level - 1; i >= 0; i--) {
+ len += 2;
+ if (len > max_len - 3 && !arg_full) {
+ printf("%s...\n",max_len % 2 ? "" : " ");
+ return 0;
+ }
+ printf("%s", special_glyph(branches & (1 << i) ? SPECIAL_GLYPH_TREE_VERTICAL : SPECIAL_GLYPH_TREE_SPACE));
+ }
+ len += 2;
+
+ if (len > max_len - 3 && !arg_full) {
+ printf("%s...\n",max_len % 2 ? "" : " ");
+ return 0;
+ }
+
+ printf("%s", special_glyph(last ? SPECIAL_GLYPH_TREE_RIGHT : SPECIAL_GLYPH_TREE_BRANCH));
+ }
+
+ if (arg_full) {
+ printf("%s\n", name);
+ return 0;
+ }
+
+ n = ellipsize(name, max_len-len, 100);
+ if (!n)
+ return log_oom();
+
+ printf("%s\n", n);
+ return 0;
+}
+
+static int list_dependencies_compare(char * const *a, char * const *b) {
+ if (unit_name_to_type(*a) == UNIT_TARGET && unit_name_to_type(*b) != UNIT_TARGET)
+ return 1;
+ if (unit_name_to_type(*a) != UNIT_TARGET && unit_name_to_type(*b) == UNIT_TARGET)
+ return -1;
+
+ return strcasecmp(*a, *b);
+}
+
+static int list_dependencies_one(
+ sd_bus *bus,
+ const char *name,
+ int level,
+ char ***units,
+ unsigned branches) {
+
+ _cleanup_strv_free_ char **deps = NULL;
+ char **c;
+ int r;
+
+ assert(bus);
+ assert(name);
+ assert(units);
+
+ r = strv_extend(units, name);
+ if (r < 0)
+ return log_oom();
+
+ r = unit_get_dependencies(bus, name, &deps);
+ if (r < 0)
+ return r;
+
+ typesafe_qsort(deps, strv_length(deps), list_dependencies_compare);
+
+ STRV_FOREACH(c, deps) {
+ if (strv_contains(*units, *c)) {
+ if (!arg_plain) {
+ printf(" ");
+ r = list_dependencies_print("...", level + 1, (branches << 1) | (c[1] == NULL ? 0 : 1), 1);
+ if (r < 0)
+ return r;
+ }
+ continue;
+ }
+
+ if (arg_plain)
+ printf(" ");
+ else {
+ UnitActiveState active_state = _UNIT_ACTIVE_STATE_INVALID;
+ const char *on;
+
+ (void) get_state_one_unit(bus, *c, &active_state);
+
+ switch (active_state) {
+ case UNIT_ACTIVE:
+ case UNIT_RELOADING:
+ case UNIT_ACTIVATING:
+ on = ansi_highlight_green();
+ break;
+
+ case UNIT_INACTIVE:
+ case UNIT_DEACTIVATING:
+ on = ansi_normal();
+ break;
+
+ default:
+ on = ansi_highlight_red();
+ break;
+ }
+
+ printf("%s%s%s ", on, special_glyph(SPECIAL_GLYPH_BLACK_CIRCLE), ansi_normal());
+ }
+
+ r = list_dependencies_print(*c, level, branches, c[1] == NULL);
+ if (r < 0)
+ return r;
+
+ if (arg_all || unit_name_to_type(*c) == UNIT_TARGET) {
+ r = list_dependencies_one(bus, *c, level + 1, units, (branches << 1) | (c[1] == NULL ? 0 : 1));
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (!arg_plain)
+ strv_remove(*units, name);
+
+ return 0;
+}
+
+int list_dependencies(int argc, char *argv[], void *userdata) {
+ _cleanup_strv_free_ char **units = NULL, **done = NULL;
+ char **u, **patterns;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ patterns = strv_skip(argv, 1);
+ if (strv_isempty(patterns)) {
+ units = strv_new(SPECIAL_DEFAULT_TARGET);
+ if (!units)
+ return log_oom();
+ } else {
+ r = expand_unit_names(bus, patterns, NULL, &units, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to expand names: %m");
+ }
+
+ (void) pager_open(arg_pager_flags);
+
+ STRV_FOREACH(u, units) {
+ if (u != units)
+ puts("");
+
+ puts(*u);
+ r = list_dependencies_one(bus, *u, 0, &done, 0);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/systemctl/systemctl-list-dependencies.h b/src/systemctl/systemctl-list-dependencies.h
new file mode 100644
index 0000000..7246570
--- /dev/null
+++ b/src/systemctl/systemctl-list-dependencies.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int list_dependencies(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-list-jobs.c b/src/systemctl/systemctl-list-jobs.c
new file mode 100644
index 0000000..8b028c0
--- /dev/null
+++ b/src/systemctl/systemctl-list-jobs.c
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "locale-util.h"
+#include "systemctl-list-jobs.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+#include "terminal-util.h"
+
+static int output_waiting_jobs(sd_bus *bus, Table *table, uint32_t id, const char *method, const char *prefix) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *name, *type;
+ uint32_t other_id;
+ int r;
+
+ assert(bus);
+
+ r = bus_call_method(bus, bus_systemd_mgr, method, &error, &reply, "u", id);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get waiting jobs for job %" PRIu32, id);
+
+ r = sd_bus_message_enter_container(reply, 'a', "(usssoo)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(reply, "(usssoo)", &other_id, &name, &type, NULL, NULL, NULL)) > 0) {
+ _cleanup_free_ char *row = NULL;
+ int rc;
+
+ if (asprintf(&row, "%s %u (%s/%s)", prefix, other_id, name, type) < 0)
+ return log_oom();
+
+ rc = table_add_many(table,
+ TABLE_STRING, special_glyph(SPECIAL_GLYPH_TREE_RIGHT),
+ TABLE_STRING, row,
+ TABLE_EMPTY,
+ TABLE_EMPTY);
+ if (rc < 0)
+ return table_log_add_error(r);
+ }
+
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 0;
+}
+
+struct job_info {
+ uint32_t id;
+ const char *name, *type, *state;
+};
+
+static int output_jobs_list(sd_bus *bus, const struct job_info* jobs, unsigned n, bool skipped) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ const struct job_info *j;
+ const char *on, *off;
+ int r;
+
+ assert(n == 0 || jobs);
+
+ if (n == 0) {
+ if (!arg_no_legend) {
+ on = ansi_highlight_green();
+ off = ansi_normal();
+
+ printf("%sNo jobs %s.%s\n", on, skipped ? "listed" : "running", off);
+ }
+ return 0;
+ }
+
+ (void) pager_open(arg_pager_flags);
+
+ table = table_new("job", "unit", "type", "state");
+ if (!table)
+ return log_oom();
+
+ table_set_header(table, !arg_no_legend);
+ if (arg_full)
+ table_set_width(table, 0);
+
+ (void) table_set_empty_string(table, "-");
+
+ for (j = jobs; j < jobs + n; j++) {
+ if (streq(j->state, "running"))
+ on = ansi_highlight();
+ else
+ on = "";
+
+ r = table_add_many(table,
+ TABLE_UINT, j->id,
+ TABLE_STRING, j->name,
+ TABLE_SET_COLOR, on,
+ TABLE_STRING, j->type,
+ TABLE_STRING, j->state,
+ TABLE_SET_COLOR, on);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (arg_jobs_after)
+ output_waiting_jobs(bus, table, j->id, "GetJobAfter", "\twaiting for job");
+ if (arg_jobs_before)
+ output_waiting_jobs(bus, table, j->id, "GetJobBefore", "\tblocking job");
+ }
+
+ r = table_print(table, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to print the table: %m");
+
+ if (!arg_no_legend) {
+ on = ansi_highlight();
+ off = ansi_normal();
+
+ printf("\n%s%u jobs listed%s.\n", on, n, off);
+ }
+
+ return 0;
+}
+
+static bool output_show_job(struct job_info *job, char **patterns) {
+ return strv_fnmatch_or_empty(patterns, job->name, FNM_NOESCAPE);
+}
+
+int list_jobs(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ struct job_info *jobs = NULL;
+ const char *name, *type, *state;
+ bool skipped = false;
+ size_t size = 0;
+ unsigned c = 0;
+ sd_bus *bus;
+ uint32_t id;
+ int r;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = bus_call_method(bus, bus_systemd_mgr, "ListJobs", &error, &reply, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list jobs: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, 'a', "(usssoo)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(reply, "(usssoo)", &id, &name, &type, &state, NULL, NULL)) > 0) {
+ struct job_info job = { id, name, type, state };
+
+ if (!output_show_job(&job, strv_skip(argv, 1))) {
+ skipped = true;
+ continue;
+ }
+
+ if (!GREEDY_REALLOC(jobs, size, c + 1))
+ return log_oom();
+
+ jobs[c++] = job;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ (void) pager_open(arg_pager_flags);
+
+ return output_jobs_list(bus, jobs, c, skipped);
+}
diff --git a/src/systemctl/systemctl-list-jobs.h b/src/systemctl/systemctl-list-jobs.h
new file mode 100644
index 0000000..aa49696
--- /dev/null
+++ b/src/systemctl/systemctl-list-jobs.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int list_jobs(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-list-machines.c b/src/systemctl/systemctl-list-machines.c
new file mode 100644
index 0000000..48d0e8b
--- /dev/null
+++ b/src/systemctl/systemctl-list-machines.c
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "sd-login.h"
+
+#include "bus-map-properties.h"
+#include "hostname-util.h"
+#include "locale-util.h"
+#include "memory-util.h"
+#include "sort-util.h"
+#include "systemctl-list-machines.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+#include "terminal-util.h"
+
+const struct bus_properties_map machine_info_property_map[] = {
+ { "SystemState", "s", NULL, offsetof(struct machine_info, state) },
+ { "NJobs", "u", NULL, offsetof(struct machine_info, n_jobs) },
+ { "NFailedUnits", "u", NULL, offsetof(struct machine_info, n_failed_units) },
+ { "ControlGroup", "s", NULL, offsetof(struct machine_info, control_group) },
+ { "UserspaceTimestamp", "t", NULL, offsetof(struct machine_info, timestamp) },
+ {}
+};
+
+void machine_info_clear(struct machine_info *info) {
+ assert(info);
+
+ free(info->name);
+ free(info->state);
+ free(info->control_group);
+ zero(*info);
+}
+
+static void free_machines_list(struct machine_info *machine_infos, int n) {
+ int i;
+
+ if (!machine_infos)
+ return;
+
+ for (i = 0; i < n; i++)
+ machine_info_clear(&machine_infos[i]);
+
+ free(machine_infos);
+}
+
+static int compare_machine_info(const struct machine_info *a, const struct machine_info *b) {
+ int r;
+
+ r = CMP(b->is_host, a->is_host);
+ if (r != 0)
+ return r;
+
+ return strcasecmp(a->name, b->name);
+}
+
+static int get_machine_properties(sd_bus *bus, struct machine_info *mi) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *container = NULL;
+ int r;
+
+ assert(mi);
+
+ if (!bus) {
+ r = sd_bus_open_system_machine(&container, mi->name);
+ if (r < 0)
+ return r;
+
+ bus = container;
+ }
+
+ r = bus_map_all_properties(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ machine_info_property_map,
+ BUS_MAP_STRDUP,
+ NULL,
+ NULL,
+ mi);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static bool output_show_machine(const char *name, char **patterns) {
+ return strv_fnmatch_or_empty(patterns, name, FNM_NOESCAPE);
+}
+
+static int get_machine_list(
+ sd_bus *bus,
+ struct machine_info **_machine_infos,
+ char **patterns) {
+
+ struct machine_info *machine_infos = NULL;
+ _cleanup_strv_free_ char **m = NULL;
+ _cleanup_free_ char *hn = NULL;
+ size_t sz = 0;
+ char **i;
+ int c = 0, r;
+
+ hn = gethostname_malloc();
+ if (!hn)
+ return log_oom();
+
+ if (output_show_machine(hn, patterns)) {
+ if (!GREEDY_REALLOC0(machine_infos, sz, c+1))
+ return log_oom();
+
+ machine_infos[c].is_host = true;
+ machine_infos[c].name = TAKE_PTR(hn);
+
+ (void) get_machine_properties(bus, &machine_infos[c]);
+ c++;
+ }
+
+ r = sd_get_machine_names(&m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get machine list: %m");
+
+ STRV_FOREACH(i, m) {
+ _cleanup_free_ char *class = NULL;
+
+ if (!output_show_machine(*i, patterns))
+ continue;
+
+ sd_machine_get_class(*i, &class);
+ if (!streq_ptr(class, "container"))
+ continue;
+
+ if (!GREEDY_REALLOC0(machine_infos, sz, c+1)) {
+ free_machines_list(machine_infos, c);
+ return log_oom();
+ }
+
+ machine_infos[c].is_host = false;
+ machine_infos[c].name = strdup(*i);
+ if (!machine_infos[c].name) {
+ free_machines_list(machine_infos, c);
+ return log_oom();
+ }
+
+ (void) get_machine_properties(NULL, &machine_infos[c]);
+ c++;
+ }
+
+ *_machine_infos = machine_infos;
+ return c;
+}
+
+static int output_machines_list(struct machine_info *machine_infos, unsigned n) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ struct machine_info *m;
+ bool state_missing = false;
+ int r;
+
+ assert(machine_infos || n == 0);
+
+ table = table_new("", "name", "state", "failed", "jobs");
+ if (!table)
+ return log_oom();
+
+ table_set_header(table, !arg_no_legend);
+ if (arg_plain) {
+ /* Hide the 'glyph' column when --plain is requested */
+ r = table_hide_column_from_display(table, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to hide column: %m");
+ }
+ if (arg_full)
+ table_set_width(table, 0);
+
+ (void) table_set_empty_string(table, "-");
+
+ for (m = machine_infos; m < machine_infos + n; m++) {
+ _cleanup_free_ char *mname = NULL;
+ const char *on_state = "", *on_failed = "";
+ bool circle = false;
+
+ if (streq_ptr(m->state, "degraded")) {
+ on_state = ansi_highlight_red();
+ circle = true;
+ } else if (!streq_ptr(m->state, "running")) {
+ on_state = ansi_highlight_yellow();
+ circle = true;
+ }
+
+ if (m->n_failed_units > 0)
+ on_failed = ansi_highlight_red();
+ else
+ on_failed = "";
+
+ if (!m->state)
+ state_missing = true;
+
+ if (m->is_host)
+ mname = strjoin(strna(m->name), " (host)");
+
+ r = table_add_many(table,
+ TABLE_STRING, circle ? special_glyph(SPECIAL_GLYPH_BLACK_CIRCLE) : " ",
+ TABLE_SET_COLOR, on_state,
+ TABLE_STRING, m->is_host ? mname : strna(m->name),
+ TABLE_STRING, strna(m->state),
+ TABLE_SET_COLOR, on_state,
+ TABLE_UINT32, m->n_failed_units,
+ TABLE_SET_COLOR, on_failed,
+ TABLE_UINT32, m->n_jobs);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = output_table(table);
+ if (r < 0)
+ return r;
+
+ if (!arg_no_legend) {
+ printf("\n");
+ if (state_missing && geteuid() != 0)
+ printf("Notice: some information only available to privileged users was not shown.\n");
+ printf("%u machines listed.\n", n);
+ }
+
+ return 0;
+}
+
+int list_machines(int argc, char *argv[], void *userdata) {
+ struct machine_info *machine_infos = NULL;
+ sd_bus *bus;
+ int r, rc;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = get_machine_list(bus, &machine_infos, strv_skip(argv, 1));
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ typesafe_qsort(machine_infos, r, compare_machine_info);
+ rc = output_machines_list(machine_infos, r);
+ free_machines_list(machine_infos, r);
+
+ return rc;
+}
diff --git a/src/systemctl/systemctl-list-machines.h b/src/systemctl/systemctl-list-machines.h
new file mode 100644
index 0000000..4a33e2b
--- /dev/null
+++ b/src/systemctl/systemctl-list-machines.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+
+#include "bus-map-properties.h"
+#include "time-util.h"
+
+int list_machines(int argc, char *argv[], void *userdata);
+
+struct machine_info {
+ bool is_host;
+ char *name;
+ char *state;
+ char *control_group;
+ uint32_t n_failed_units;
+ uint32_t n_jobs;
+ usec_t timestamp;
+};
+
+void machine_info_clear(struct machine_info *info);
+
+extern const struct bus_properties_map machine_info_property_map[];
diff --git a/src/systemctl/systemctl-list-unit-files.c b/src/systemctl/systemctl-list-unit-files.c
new file mode 100644
index 0000000..e1bf876
--- /dev/null
+++ b/src/systemctl/systemctl-list-unit-files.c
@@ -0,0 +1,275 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "sort-util.h"
+#include "systemctl-list-unit-files.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+#include "terminal-util.h"
+
+static int compare_unit_file_list(const UnitFileList *a, const UnitFileList *b) {
+ const char *d1, *d2;
+
+ d1 = strrchr(a->path, '.');
+ d2 = strrchr(b->path, '.');
+
+ if (d1 && d2) {
+ int r;
+
+ r = strcasecmp(d1, d2);
+ if (r != 0)
+ return r;
+ }
+
+ return strcasecmp(basename(a->path), basename(b->path));
+}
+
+static bool output_show_unit_file(const UnitFileList *u, char **states, char **patterns) {
+ assert(u);
+
+ if (!strv_fnmatch_or_empty(patterns, basename(u->path), FNM_NOESCAPE))
+ return false;
+
+ if (!strv_isempty(arg_types)) {
+ const char *dot;
+
+ dot = strrchr(u->path, '.');
+ if (!dot)
+ return false;
+
+ if (!strv_find(arg_types, dot+1))
+ return false;
+ }
+
+ if (!strv_isempty(states) &&
+ !strv_find(states, unit_file_state_to_string(u->state)))
+ return false;
+
+ return true;
+}
+
+static int output_unit_file_list(const UnitFileList *units, unsigned c) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ _cleanup_(unit_file_presets_freep) UnitFilePresets presets = {};
+ int r;
+
+ table = table_new("unit file", "state", "vendor preset");
+ if (!table)
+ return log_oom();
+
+ table_set_header(table, !arg_no_legend);
+ if (arg_full)
+ table_set_width(table, 0);
+
+ (void) table_set_empty_string(table, "-");
+
+ for (const UnitFileList *u = units; u < units + c; u++) {
+ const char *on_underline = NULL, *on_unit_color = NULL, *id;
+ bool underline;
+
+ underline = u + 1 < units + c &&
+ !streq(unit_type_suffix(u->path), unit_type_suffix((u + 1)->path));
+
+ if (underline)
+ on_underline = ansi_underline();
+
+ if (IN_SET(u->state,
+ UNIT_FILE_MASKED,
+ UNIT_FILE_MASKED_RUNTIME,
+ UNIT_FILE_DISABLED,
+ UNIT_FILE_BAD))
+ on_unit_color = underline ? ansi_highlight_red_underline() : ansi_highlight_red();
+ else if (IN_SET(u->state,
+ UNIT_FILE_ENABLED,
+ UNIT_FILE_ALIAS))
+ on_unit_color = underline ? ansi_highlight_green_underline() : ansi_highlight_green();
+ else
+ on_unit_color = on_underline;
+
+ id = basename(u->path);
+
+ r = table_add_many(table,
+ TABLE_STRING, id,
+ TABLE_SET_BOTH_COLORS, strempty(on_underline),
+ TABLE_STRING, unit_file_state_to_string(u->state),
+ TABLE_SET_BOTH_COLORS, strempty(on_unit_color));
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (show_preset_for_state(u->state)) {
+ const char *unit_preset_str, *on_preset_color;
+
+ r = unit_file_query_preset(arg_scope, arg_root, id, &presets);
+ if (r < 0) {
+ unit_preset_str = "n/a";
+ on_preset_color = underline ? on_underline : ansi_normal();
+ } else if (r == 0) {
+ unit_preset_str = "disabled";
+ on_preset_color = underline ? ansi_highlight_red_underline() : ansi_highlight_red();
+ } else {
+ unit_preset_str = "enabled";
+ on_preset_color = underline ? ansi_highlight_green_underline() : ansi_highlight_green();
+ }
+
+ r = table_add_many(table,
+ TABLE_STRING, unit_preset_str,
+ TABLE_SET_BOTH_COLORS, strempty(on_preset_color));
+ } else
+ r = table_add_many(table,
+ TABLE_EMPTY,
+ TABLE_SET_BOTH_COLORS, underline ? ansi_grey_underline() : ansi_grey());
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = output_table(table);
+ if (r < 0)
+ return r;
+
+ if (!arg_no_legend)
+ printf("\n%u unit files listed.\n", c);
+
+ return 0;
+}
+
+int list_unit_files(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ UnitFileList *units = NULL;
+ UnitFileList *unit;
+ size_t size = 0;
+ unsigned c = 0;
+ const char *state;
+ char *path;
+ int r;
+ bool fallback = false;
+
+ if (install_client_side()) {
+ Hashmap *h;
+ UnitFileList *u;
+ unsigned n_units;
+
+ h = hashmap_new(&string_hash_ops);
+ if (!h)
+ return log_oom();
+
+ r = unit_file_get_list(arg_scope, arg_root, h, arg_states, strv_skip(argv, 1));
+ if (r < 0) {
+ unit_file_list_free(h);
+ return log_error_errno(r, "Failed to get unit file list: %m");
+ }
+
+ n_units = hashmap_size(h);
+
+ units = new(UnitFileList, n_units ?: 1); /* avoid malloc(0) */
+ if (!units) {
+ unit_file_list_free(h);
+ return log_oom();
+ }
+
+ HASHMAP_FOREACH(u, h) {
+ if (!output_show_unit_file(u, NULL, NULL))
+ continue;
+
+ units[c++] = *u;
+ free(u);
+ }
+
+ assert(c <= n_units);
+ hashmap_free(h);
+
+ r = 0;
+ } else {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, "ListUnitFilesByPatterns");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, arg_states);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (arg_with_dependencies) {
+ _cleanup_strv_free_ char **names_with_deps = NULL;
+
+ r = append_unit_dependencies(bus, strv_skip(argv, 1), &names_with_deps);
+ if (r < 0)
+ return log_error_errno(r, "Failed to append unit dependencies: %m");
+
+ r = sd_bus_message_append_strv(m, names_with_deps);
+ if (r < 0)
+ return bus_log_create_error(r);
+ } else {
+ r = sd_bus_message_append_strv(m, strv_skip(argv, 1));
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0 && sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD)) {
+ /* Fallback to legacy ListUnitFiles method */
+ fallback = true;
+ log_debug_errno(r, "Failed to list unit files: %s Falling back to ListUnitsFiles method.", bus_error_message(&error, r));
+ m = sd_bus_message_unref(m);
+ sd_bus_error_free(&error);
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, "ListUnitFiles");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to list unit files: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ss)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(reply, "(ss)", &path, &state)) > 0) {
+
+ if (!GREEDY_REALLOC(units, size, c + 1))
+ return log_oom();
+
+ units[c] = (struct UnitFileList) {
+ path,
+ unit_file_state_from_string(state)
+ };
+
+ if (output_show_unit_file(&units[c],
+ fallback ? arg_states : NULL,
+ fallback ? strv_skip(argv, 1) : NULL))
+ c++;
+
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ (void) pager_open(arg_pager_flags);
+
+ typesafe_qsort(units, c, compare_unit_file_list);
+ r = output_unit_file_list(units, c);
+ if (r < 0)
+ return r;
+
+ if (install_client_side())
+ for (unit = units; unit < units + c; unit++)
+ free(unit->path);
+
+ if (c == 0)
+ return -ENOENT;
+
+ return 0;
+}
diff --git a/src/systemctl/systemctl-list-unit-files.h b/src/systemctl/systemctl-list-unit-files.h
new file mode 100644
index 0000000..387233e
--- /dev/null
+++ b/src/systemctl/systemctl-list-unit-files.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int list_unit_files(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-list-units.c b/src/systemctl/systemctl-list-units.c
new file mode 100644
index 0000000..c7a91ba
--- /dev/null
+++ b/src/systemctl/systemctl-list-units.c
@@ -0,0 +1,771 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-login.h"
+
+#include "bus-error.h"
+#include "format-table.h"
+#include "locale-util.h"
+#include "set.h"
+#include "sort-util.h"
+#include "systemctl-list-units.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+#include "terminal-util.h"
+
+static void message_set_freep(Set **set) {
+ set_free_with_destructor(*set, sd_bus_message_unref);
+}
+
+static int get_unit_list_recursive(
+ sd_bus *bus,
+ char **patterns,
+ UnitInfo **ret_unit_infos,
+ Set **ret_replies,
+ char ***ret_machines) {
+
+ _cleanup_free_ UnitInfo *unit_infos = NULL;
+ _cleanup_(message_set_freep) Set *replies;
+ sd_bus_message *reply;
+ int c, r;
+
+ assert(bus);
+ assert(ret_replies);
+ assert(ret_unit_infos);
+ assert(ret_machines);
+
+ replies = set_new(NULL);
+ if (!replies)
+ return log_oom();
+
+ c = get_unit_list(bus, NULL, patterns, &unit_infos, 0, &reply);
+ if (c < 0)
+ return c;
+
+ r = set_put(replies, reply);
+ if (r < 0) {
+ sd_bus_message_unref(reply);
+ return log_oom();
+ }
+
+ if (arg_recursive) {
+ _cleanup_strv_free_ char **machines = NULL;
+ char **i;
+
+ r = sd_get_machine_names(&machines);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get machine names: %m");
+
+ STRV_FOREACH(i, machines) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *container = NULL;
+ int k;
+
+ r = sd_bus_open_system_machine(&container, *i);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to connect to container %s, ignoring: %m", *i);
+ continue;
+ }
+
+ k = get_unit_list(container, *i, patterns, &unit_infos, c, &reply);
+ if (k < 0)
+ return k;
+
+ c = k;
+
+ r = set_put(replies, reply);
+ if (r < 0) {
+ sd_bus_message_unref(reply);
+ return log_oom();
+ }
+ }
+
+ *ret_machines = TAKE_PTR(machines);
+ } else
+ *ret_machines = NULL;
+
+ *ret_unit_infos = TAKE_PTR(unit_infos);
+ *ret_replies = TAKE_PTR(replies);
+
+ return c;
+}
+
+static int output_units_list(const UnitInfo *unit_infos, unsigned c) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ unsigned job_count = 0;
+ int r;
+
+ table = table_new("", "unit", "load", "active", "sub", "job", "description");
+ if (!table)
+ return log_oom();
+
+ table_set_header(table, !arg_no_legend);
+ if (arg_plain) {
+ /* Hide the 'glyph' column when --plain is requested */
+ r = table_hide_column_from_display(table, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to hide column: %m");
+ }
+ if (arg_full)
+ table_set_width(table, 0);
+
+ (void) table_set_empty_string(table, "-");
+
+ for (const UnitInfo *u = unit_infos; unit_infos && u - unit_infos < c; u++) {
+ _cleanup_free_ char *j = NULL;
+ const char *on_underline = "", *on_loaded = "", *on_active = "";
+ const char *on_circle = "", *id;
+ bool circle = false, underline = false;
+
+ if (u + 1 < unit_infos + c &&
+ !streq(unit_type_suffix(u->id), unit_type_suffix((u + 1)->id))) {
+ on_underline = ansi_underline();
+ underline = true;
+ }
+
+ if (STR_IN_SET(u->load_state, "error", "not-found", "bad-setting", "masked") && !arg_plain) {
+ on_circle = underline ? ansi_highlight_yellow_underline() : ansi_highlight_yellow();
+ circle = true;
+ on_loaded = underline ? ansi_highlight_red_underline() : ansi_highlight_red();
+ } else if (streq(u->active_state, "failed") && !arg_plain) {
+ on_circle = underline ? ansi_highlight_red_underline() : ansi_highlight_red();
+ circle = true;
+ on_active = underline ? ansi_highlight_red_underline() : ansi_highlight_red();
+ } else {
+ on_circle = on_underline;
+ on_active = on_underline;
+ on_loaded = on_underline;
+ }
+
+ if (u->machine) {
+ j = strjoin(u->machine, ":", u->id);
+ if (!j)
+ return log_oom();
+
+ id = j;
+ } else
+ id = u->id;
+
+ r = table_add_many(table,
+ TABLE_STRING, circle ? special_glyph(SPECIAL_GLYPH_BLACK_CIRCLE) : " ",
+ TABLE_SET_BOTH_COLORS, on_circle,
+ TABLE_STRING, id,
+ TABLE_SET_BOTH_COLORS, on_active,
+ TABLE_STRING, u->load_state,
+ TABLE_SET_BOTH_COLORS, on_loaded,
+ TABLE_STRING, u->active_state,
+ TABLE_SET_BOTH_COLORS, on_active,
+ TABLE_STRING, u->sub_state,
+ TABLE_SET_BOTH_COLORS, on_active,
+ TABLE_STRING, u->job_id ? u->job_type: "",
+ TABLE_SET_BOTH_COLORS, on_underline,
+ TABLE_STRING, u->description,
+ TABLE_SET_BOTH_COLORS, on_underline);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (u->job_id != 0)
+ job_count++;
+ }
+
+ if (job_count == 0) {
+ /* There's no data in the JOB column, so let's hide it */
+ r = table_hide_column_from_display(table, 5);
+ if (r < 0)
+ return log_error_errno(r, "Failed to hide column: %m");
+ }
+
+ r = output_table(table);
+ if (r < 0)
+ return r;
+
+ if (!arg_no_legend) {
+ const char *on, *off;
+ size_t records = table_get_rows(table) - 1;
+
+ if (records > 0) {
+ puts("\n"
+ "LOAD = Reflects whether the unit definition was properly loaded.\n"
+ "ACTIVE = The high-level unit activation state, i.e. generalization of SUB.\n"
+ "SUB = The low-level unit activation state, values depend on unit type.");
+ if (job_count > 0)
+ puts("JOB = Pending job for the unit.\n");
+ on = ansi_highlight();
+ off = ansi_normal();
+ } else {
+ on = ansi_highlight_red();
+ off = ansi_normal();
+ }
+
+ if (arg_all || strv_contains(arg_states, "inactive"))
+ printf("%s%zu loaded units listed.%s\n"
+ "To show all installed unit files use 'systemctl list-unit-files'.\n",
+ on, records, off);
+ else if (!arg_states)
+ printf("%s%zu loaded units listed.%s Pass --all to see loaded but inactive units, too.\n"
+ "To show all installed unit files use 'systemctl list-unit-files'.\n",
+ on, records, off);
+ else
+ printf("%zu loaded units listed.\n", records);
+ }
+
+ return 0;
+}
+
+int list_units(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ UnitInfo *unit_infos = NULL;
+ _cleanup_(message_set_freep) Set *replies = NULL;
+ _cleanup_strv_free_ char **machines = NULL;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ if (arg_with_dependencies) {
+ _cleanup_strv_free_ char **names = NULL;
+
+ r = append_unit_dependencies(bus, strv_skip(argv, 1), &names);
+ if (r < 0)
+ return r;
+
+ r = get_unit_list_recursive(bus, names, &unit_infos, &replies, &machines);
+ if (r < 0)
+ return r;
+ } else {
+ r = get_unit_list_recursive(bus, strv_skip(argv, 1), &unit_infos, &replies, &machines);
+ if (r < 0)
+ return r;
+ }
+
+ typesafe_qsort(unit_infos, r, unit_info_compare);
+ return output_units_list(unit_infos, r);
+}
+
+static int get_triggered_units(
+ sd_bus *bus,
+ const char* path,
+ char*** ret) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(ret);
+
+ r = sd_bus_get_property_strv(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "Triggers",
+ &error,
+ ret);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine triggers: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int get_listening(
+ sd_bus *bus,
+ const char* unit_path,
+ char*** listening) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *type, *path;
+ int r, n = 0;
+
+ r = sd_bus_get_property(
+ bus,
+ "org.freedesktop.systemd1",
+ unit_path,
+ "org.freedesktop.systemd1.Socket",
+ "Listen",
+ &error,
+ &reply,
+ "a(ss)");
+ if (r < 0)
+ return log_error_errno(r, "Failed to get list of listening sockets: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ss)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(reply, "(ss)", &type, &path)) > 0) {
+
+ r = strv_extend(listening, type);
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extend(listening, path);
+ if (r < 0)
+ return log_oom();
+
+ n++;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return n;
+}
+
+struct socket_info {
+ const char *machine;
+ const char* id;
+
+ char* type;
+ char* path;
+
+ /* Note: triggered is a list here, although it almost certainly will always be one
+ * unit. Nevertheless, dbus API allows for multiple values, so let's follow that. */
+ char** triggered;
+
+ /* The strv above is shared. free is set only in the first one. */
+ bool own_triggered;
+};
+
+static int socket_info_compare(const struct socket_info *a, const struct socket_info *b) {
+ int r;
+
+ assert(a);
+ assert(b);
+
+ r = strcasecmp_ptr(a->machine, b->machine);
+ if (r != 0)
+ return r;
+
+ r = strcmp(a->path, b->path);
+ if (r != 0)
+ return r;
+
+ return strcmp(a->type, b->type);
+}
+
+static int output_sockets_list(struct socket_info *socket_infos, unsigned cs) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ struct socket_info *s;
+ const char *on, *off;
+ int r;
+
+ table = table_new("listen", "type", "unit", "activates");
+ if (!table)
+ return log_oom();
+
+ if (!arg_show_types) {
+ /* Hide the second (TYPE) column */
+ r = table_set_display(table, (size_t) 0, (size_t) 2, (size_t) 3, (size_t) -1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set columns to display: %m");
+ }
+
+ table_set_header(table, !arg_no_legend);
+ if (arg_full)
+ table_set_width(table, 0);
+
+ (void) table_set_empty_string(table, "-");
+
+ if (cs) {
+ for (s = socket_infos; s < socket_infos + cs; s++) {
+ _cleanup_free_ char *j = NULL;
+ const char *path;
+
+ if (s->machine) {
+ j = strjoin(s->machine, ":", s->path);
+ if (!j)
+ return log_oom();
+ path = j;
+ } else
+ path = s->path;
+
+ r = table_add_many(table,
+ TABLE_STRING, path,
+ TABLE_STRING, s->type,
+ TABLE_STRING, s->id);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (strv_isempty(s->triggered))
+ r = table_add_cell(table, NULL, TABLE_EMPTY, NULL);
+ else if (strv_length(s->triggered) == 1)
+ r = table_add_cell(table, NULL, TABLE_STRING, s->triggered[0]);
+ else
+ /* This should never happen, currently our socket units can only trigger a
+ * single unit. But let's handle this anyway, who knows what the future
+ * brings? */
+ r = table_add_cell(table, NULL, TABLE_STRV, s->triggered);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ }
+
+ on = ansi_highlight();
+ off = ansi_normal();
+ } else {
+ on = ansi_highlight_red();
+ off = ansi_normal();
+ }
+
+ r = output_table(table);
+ if (r < 0)
+ return r;
+
+ if (!arg_no_legend) {
+ printf("\n%s%u sockets listed.%s\n", on, cs, off);
+ if (!arg_all)
+ printf("Pass --all to see loaded but inactive sockets, too.\n");
+ }
+
+ return 0;
+}
+
+int list_sockets(int argc, char *argv[], void *userdata) {
+ _cleanup_(message_set_freep) Set *replies = NULL;
+ _cleanup_strv_free_ char **machines = NULL;
+ _cleanup_strv_free_ char **sockets_with_suffix = NULL;
+ _cleanup_free_ UnitInfo *unit_infos = NULL;
+ _cleanup_free_ struct socket_info *socket_infos = NULL;
+ const UnitInfo *u;
+ struct socket_info *s;
+ unsigned cs = 0;
+ size_t size = 0;
+ int r, n;
+ sd_bus *bus;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = expand_unit_names(bus, strv_skip(argv, 1), ".socket", &sockets_with_suffix, NULL);
+ if (r < 0)
+ return r;
+
+ if (argc == 1 || sockets_with_suffix) {
+ n = get_unit_list_recursive(bus, sockets_with_suffix, &unit_infos, &replies, &machines);
+ if (n < 0)
+ return n;
+
+ for (u = unit_infos; u < unit_infos + n; u++) {
+ _cleanup_strv_free_ char **listening = NULL, **triggered = NULL;
+ int i, c;
+
+ if (!endswith(u->id, ".socket"))
+ continue;
+
+ r = get_triggered_units(bus, u->unit_path, &triggered);
+ if (r < 0)
+ goto cleanup;
+
+ c = get_listening(bus, u->unit_path, &listening);
+ if (c < 0) {
+ r = c;
+ goto cleanup;
+ }
+
+ if (!GREEDY_REALLOC(socket_infos, size, cs + c)) {
+ r = log_oom();
+ goto cleanup;
+ }
+
+ for (i = 0; i < c; i++)
+ socket_infos[cs + i] = (struct socket_info) {
+ .machine = u->machine,
+ .id = u->id,
+ .type = listening[i*2],
+ .path = listening[i*2 + 1],
+ .triggered = triggered,
+ .own_triggered = i==0,
+ };
+
+ /* from this point on we will cleanup those socket_infos */
+ cs += c;
+ free(listening);
+ listening = triggered = NULL; /* avoid cleanup */
+ }
+
+ typesafe_qsort(socket_infos, cs, socket_info_compare);
+ }
+
+ output_sockets_list(socket_infos, cs);
+
+ cleanup:
+ assert(cs == 0 || socket_infos);
+ for (s = socket_infos; s < socket_infos + cs; s++) {
+ free(s->type);
+ free(s->path);
+ if (s->own_triggered)
+ strv_free(s->triggered);
+ }
+
+ return r;
+}
+
+static int get_next_elapse(
+ sd_bus *bus,
+ const char *path,
+ dual_timestamp *next) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ dual_timestamp t;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(next);
+
+ r = sd_bus_get_property_trivial(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Timer",
+ "NextElapseUSecMonotonic",
+ &error,
+ 't',
+ &t.monotonic);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get next elapse time: %s", bus_error_message(&error, r));
+
+ r = sd_bus_get_property_trivial(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Timer",
+ "NextElapseUSecRealtime",
+ &error,
+ 't',
+ &t.realtime);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get next elapse time: %s", bus_error_message(&error, r));
+
+ *next = t;
+ return 0;
+}
+
+static int get_last_trigger(
+ sd_bus *bus,
+ const char *path,
+ usec_t *last) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(last);
+
+ r = sd_bus_get_property_trivial(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Timer",
+ "LastTriggerUSec",
+ &error,
+ 't',
+ last);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get last trigger time: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+struct timer_info {
+ const char* machine;
+ const char* id;
+ usec_t next_elapse;
+ usec_t last_trigger;
+ char** triggered;
+};
+
+static int timer_info_compare(const struct timer_info *a, const struct timer_info *b) {
+ int r;
+
+ assert(a);
+ assert(b);
+
+ r = strcasecmp_ptr(a->machine, b->machine);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->next_elapse, b->next_elapse);
+ if (r != 0)
+ return r;
+
+ return strcmp(a->id, b->id);
+}
+
+static int output_timers_list(struct timer_info *timer_infos, unsigned n) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ struct timer_info *t;
+ const char *on, *off;
+ int r;
+
+ assert(timer_infos || n == 0);
+
+ table = table_new("next", "left", "last", "passed", "unit", "activates");
+ if (!table)
+ return log_oom();
+
+ table_set_header(table, !arg_no_legend);
+ if (arg_full)
+ table_set_width(table, 0);
+
+ (void) table_set_empty_string(table, "-");
+
+ if (n > 0) {
+ for (t = timer_infos; t < timer_infos + n; t++) {
+ _cleanup_free_ char *j = NULL, *activates = NULL;
+ const char *unit;
+
+ if (t->machine) {
+ j = strjoin(t->machine, ":", t->id);
+ if (!j)
+ return log_oom();
+ unit = j;
+ } else
+ unit = t->id;
+
+ activates = strv_join(t->triggered, ", ");
+ if (!activates)
+ return log_oom();
+
+ r = table_add_many(table,
+ TABLE_TIMESTAMP, t->next_elapse,
+ TABLE_TIMESTAMP_RELATIVE, t->next_elapse,
+ TABLE_TIMESTAMP, t->last_trigger,
+ TABLE_TIMESTAMP_RELATIVE, t->last_trigger,
+ TABLE_STRING, unit,
+ TABLE_STRING, activates);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ on = ansi_highlight();
+ off = ansi_normal();
+ } else {
+ on = ansi_highlight_red();
+ off = ansi_normal();
+ }
+
+ r = output_table(table);
+ if (r < 0)
+ return r;
+
+ if (!arg_no_legend) {
+ printf("\n%s%u timers listed.%s\n", on, n, off);
+ if (!arg_all)
+ printf("Pass --all to see loaded but inactive timers, too.\n");
+ }
+
+ return 0;
+}
+
+usec_t calc_next_elapse(dual_timestamp *nw, dual_timestamp *next) {
+ usec_t next_elapse;
+
+ assert(nw);
+ assert(next);
+
+ if (timestamp_is_set(next->monotonic)) {
+ usec_t converted;
+
+ if (next->monotonic > nw->monotonic)
+ converted = nw->realtime + (next->monotonic - nw->monotonic);
+ else
+ converted = nw->realtime - (nw->monotonic - next->monotonic);
+
+ if (timestamp_is_set(next->realtime))
+ next_elapse = MIN(converted, next->realtime);
+ else
+ next_elapse = converted;
+
+ } else
+ next_elapse = next->realtime;
+
+ return next_elapse;
+}
+
+int list_timers(int argc, char *argv[], void *userdata) {
+ _cleanup_(message_set_freep) Set *replies = NULL;
+ _cleanup_strv_free_ char **machines = NULL;
+ _cleanup_strv_free_ char **timers_with_suffix = NULL;
+ _cleanup_free_ struct timer_info *timer_infos = NULL;
+ _cleanup_free_ UnitInfo *unit_infos = NULL;
+ struct timer_info *t;
+ const UnitInfo *u;
+ size_t size = 0;
+ int n, c = 0;
+ dual_timestamp nw;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = expand_unit_names(bus, strv_skip(argv, 1), ".timer", &timers_with_suffix, NULL);
+ if (r < 0)
+ return r;
+
+ if (argc == 1 || timers_with_suffix) {
+ n = get_unit_list_recursive(bus, timers_with_suffix, &unit_infos, &replies, &machines);
+ if (n < 0)
+ return n;
+
+ dual_timestamp_get(&nw);
+
+ for (u = unit_infos; u < unit_infos + n; u++) {
+ _cleanup_strv_free_ char **triggered = NULL;
+ dual_timestamp next = DUAL_TIMESTAMP_NULL;
+ usec_t m, last = 0;
+
+ if (!endswith(u->id, ".timer"))
+ continue;
+
+ r = get_triggered_units(bus, u->unit_path, &triggered);
+ if (r < 0)
+ goto cleanup;
+
+ r = get_next_elapse(bus, u->unit_path, &next);
+ if (r < 0)
+ goto cleanup;
+
+ get_last_trigger(bus, u->unit_path, &last);
+
+ if (!GREEDY_REALLOC(timer_infos, size, c+1)) {
+ r = log_oom();
+ goto cleanup;
+ }
+
+ m = calc_next_elapse(&nw, &next);
+
+ timer_infos[c++] = (struct timer_info) {
+ .machine = u->machine,
+ .id = u->id,
+ .next_elapse = m,
+ .last_trigger = last,
+ .triggered = TAKE_PTR(triggered),
+ };
+ }
+
+ typesafe_qsort(timer_infos, c, timer_info_compare);
+ }
+
+ output_timers_list(timer_infos, c);
+
+ cleanup:
+ for (t = timer_infos; t < timer_infos + c; t++)
+ strv_free(t->triggered);
+
+ return r;
+}
diff --git a/src/systemctl/systemctl-list-units.h b/src/systemctl/systemctl-list-units.h
new file mode 100644
index 0000000..ef27627
--- /dev/null
+++ b/src/systemctl/systemctl-list-units.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int list_units(int argc, char *argv[], void *userdata);
+int list_sockets(int argc, char *argv[], void *userdata);
+int list_timers(int argc, char *argv[], void *userdata);
+
+usec_t calc_next_elapse(dual_timestamp *nw, dual_timestamp *next);
diff --git a/src/systemctl/systemctl-log-setting.c b/src/systemctl/systemctl-log-setting.c
new file mode 100644
index 0000000..64984e4
--- /dev/null
+++ b/src/systemctl/systemctl-log-setting.c
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "pretty-print.h"
+#include "syslog-util.h"
+#include "systemctl-log-setting.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+
+static void give_log_control1_hint(const char *name) {
+ _cleanup_free_ char *link = NULL;
+
+ if (arg_quiet)
+ return;
+
+ (void) terminal_urlify_man("org.freedesktop.LogControl1", "5", &link);
+
+ log_notice("Hint: the service must declare BusName= and implement the appropriate D-Bus interface.\n"
+ " See the %s for details.", link ?: "org.freedesktop.LogControl1(5) man page");
+}
+
+static int log_setting_internal(sd_bus *bus, const BusLocator* bloc, const char *verb, const char *value) {
+ assert(bus);
+ assert(STR_IN_SET(verb, "log-level", "log-target", "service-log-level", "service-log-target"));
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ bool level = endswith(verb, "log-level");
+ int r;
+
+ if (value) {
+ if (level) {
+ if (log_level_from_string(value) < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "\"%s\" is not a valid log level.", value);
+ }
+
+ r = bus_set_property(bus, bloc,
+ level ? "LogLevel" : "LogTarget",
+ &error, "s", value);
+ if (r >= 0)
+ return 0;
+
+ log_error_errno(r, "Failed to set log %s of %s to %s: %s",
+ level ? "level" : "target",
+ bloc->destination, value, bus_error_message(&error, r));
+ } else {
+ _cleanup_free_ char *t = NULL;
+
+ r = bus_get_property_string(bus, bloc,
+ level ? "LogLevel" : "LogTarget",
+ &error, &t);
+ if (r >= 0) {
+ puts(t);
+ return 0;
+ }
+
+ log_error_errno(r, "Failed to get log %s of %s: %s",
+ level ? "level" : "target",
+ bloc->destination, bus_error_message(&error, r));
+ }
+
+ if (sd_bus_error_has_names(&error, SD_BUS_ERROR_UNKNOWN_METHOD,
+ SD_BUS_ERROR_UNKNOWN_OBJECT,
+ SD_BUS_ERROR_UNKNOWN_INTERFACE,
+ SD_BUS_ERROR_UNKNOWN_PROPERTY))
+ give_log_control1_hint(bloc->destination);
+ return r;
+}
+
+int log_setting(int argc, char *argv[], void *userdata) {
+ sd_bus *bus;
+ int r;
+
+ assert(argc >= 1 && argc <= 2);
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ return log_setting_internal(bus, bus_systemd_mgr, argv[0], argv[1]);
+}
+
+static int service_name_to_dbus(sd_bus *bus, const char *name, char **ret_dbus_name) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *bus_name = NULL;
+ int r;
+
+ /* First, look for the BusName= property */
+ _cleanup_free_ char *dbus_path = unit_dbus_path_from_name(name);
+ if (!dbus_path)
+ return log_oom();
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ dbus_path,
+ "org.freedesktop.systemd1.Service",
+ "BusName",
+ &error,
+ &bus_name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to obtain BusName= property of %s: %s",
+ name, bus_error_message(&error, r));
+
+ if (isempty(bus_name)) {
+ log_error("Unit %s doesn't declare BusName=.", name);
+ give_log_control1_hint(name);
+ return -ENOLINK;
+ }
+
+ *ret_dbus_name = TAKE_PTR(bus_name);
+ return 0;
+}
+
+int service_log_setting(int argc, char *argv[], void *userdata) {
+ sd_bus *bus;
+ _cleanup_free_ char *unit = NULL, *dbus_name = NULL;
+ int r;
+
+ assert(argc >= 2 && argc <= 3);
+
+ r = acquire_bus(BUS_FULL, &bus);
+ if (r < 0)
+ return r;
+
+ r = unit_name_mangle_with_suffix(argv[1], argv[0],
+ arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN,
+ ".service", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle unit name: %m");
+
+ r = service_name_to_dbus(bus, unit, &dbus_name);
+ if (r < 0)
+ return r;
+
+ const BusLocator bloc = {
+ .destination = dbus_name,
+ .path = "/org/freedesktop/LogControl1",
+ .interface = "org.freedesktop.LogControl1",
+ };
+
+ return log_setting_internal(bus, &bloc, argv[0], argv[2]);
+}
diff --git a/src/systemctl/systemctl-log-setting.h b/src/systemctl/systemctl-log-setting.h
new file mode 100644
index 0000000..9a2e793
--- /dev/null
+++ b/src/systemctl/systemctl-log-setting.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int log_setting(int argc, char *argv[], void *userdata);
+int service_log_setting(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-logind.c b/src/systemctl/systemctl-logind.c
new file mode 100644
index 0000000..405f12a
--- /dev/null
+++ b/src/systemctl/systemctl-logind.c
@@ -0,0 +1,380 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "sd-login.h"
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "process-util.h"
+#include "systemctl-logind.h"
+#include "systemctl-start-unit.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+#include "terminal-util.h"
+#include "user-util.h"
+
+int logind_set_wall_message(void) {
+#if ENABLE_LOGIND
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus;
+ _cleanup_free_ char *m = NULL;
+ int r;
+
+ r = acquire_bus(BUS_FULL, &bus);
+ if (r < 0)
+ return r;
+
+ m = strv_join(arg_wall, " ");
+ if (!m)
+ return log_oom();
+
+ log_debug("%s wall message \"%s\".", arg_dry_run ? "Would set" : "Setting", m);
+ if (arg_dry_run)
+ return 0;
+
+ r = bus_call_method(bus, bus_login_mgr, "SetWallMessage", &error, NULL, "sb", m, !arg_no_wall);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to set wall message, ignoring: %s", bus_error_message(&error, r));
+#endif
+ return 0;
+}
+
+/* Ask systemd-logind, which might grant access to unprivileged users through polkit */
+int logind_reboot(enum action a) {
+#if ENABLE_LOGIND
+ static const struct {
+ const char *method;
+ const char *description;
+ } actions[_ACTION_MAX] = {
+ [ACTION_POWEROFF] = { "PowerOff", "power off system" },
+ [ACTION_REBOOT] = { "Reboot", "reboot system" },
+ [ACTION_HALT] = { "Halt", "halt system" },
+ [ACTION_SUSPEND] = { "Suspend", "suspend system" },
+ [ACTION_HIBERNATE] = { "Hibernate", "hibernate system" },
+ [ACTION_HYBRID_SLEEP] = { "HybridSleep", "put system into hybrid sleep" },
+ [ACTION_SUSPEND_THEN_HIBERNATE] = { "SuspendThenHibernate", "suspend system, hibernate later" },
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus;
+ int r;
+
+ if (a < 0 || a >= _ACTION_MAX || !actions[a].method)
+ return -EINVAL;
+
+ r = acquire_bus(BUS_FULL, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+ (void) logind_set_wall_message();
+
+ log_debug("%s org.freedesktop.login1.Manager %s dbus call.", arg_dry_run ? "Would execute" : "Executing", actions[a].method);
+
+ if (arg_dry_run)
+ return 0;
+
+ r = bus_call_method(bus, bus_login_mgr, actions[a].method, &error, NULL, "b", arg_ask_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to %s via logind: %s", actions[a].description, bus_error_message(&error, r));
+
+ return 0;
+#else
+ return -ENOSYS;
+#endif
+}
+
+int logind_check_inhibitors(enum action a) {
+#if ENABLE_LOGIND
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_strv_free_ char **sessions = NULL;
+ const char *what, *who, *why, *mode;
+ uint32_t uid, pid;
+ sd_bus *bus;
+ unsigned c = 0;
+ char **s;
+ int r;
+
+ if (arg_ignore_inhibitors || arg_force > 0)
+ return 0;
+
+ if (arg_when > 0)
+ return 0;
+
+ if (geteuid() == 0)
+ return 0;
+
+ if (!on_tty())
+ return 0;
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL)
+ return 0;
+
+ r = acquire_bus(BUS_FULL, &bus);
+ if (r < 0)
+ return r;
+
+ r = bus_call_method(bus, bus_login_mgr, "ListInhibitors", NULL, &reply, NULL);
+ if (r < 0)
+ /* If logind is not around, then there are no inhibitors... */
+ return 0;
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssssuu)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(reply, "(ssssuu)", &what, &who, &why, &mode, &uid, &pid)) > 0) {
+ _cleanup_free_ char *comm = NULL, *user = NULL;
+ _cleanup_strv_free_ char **sv = NULL;
+
+ if (!streq(mode, "block"))
+ continue;
+
+ sv = strv_split(what, ":");
+ if (!sv)
+ return log_oom();
+
+ if (!pid_is_valid((pid_t) pid))
+ return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Invalid PID "PID_FMT".", (pid_t) pid);
+
+ if (!strv_contains(sv,
+ IN_SET(a,
+ ACTION_HALT,
+ ACTION_POWEROFF,
+ ACTION_REBOOT,
+ ACTION_KEXEC) ? "shutdown" : "sleep"))
+ continue;
+
+ (void) get_process_comm(pid, &comm);
+ user = uid_to_name(uid);
+
+ log_warning("Operation inhibited by \"%s\" (PID "PID_FMT" \"%s\", user %s), reason is \"%s\".",
+ who, (pid_t) pid, strna(comm), strna(user), why);
+
+ c++;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ /* Check for current sessions */
+ sd_get_sessions(&sessions);
+ STRV_FOREACH(s, sessions) {
+ _cleanup_free_ char *type = NULL, *tty = NULL, *seat = NULL, *user = NULL, *service = NULL, *class = NULL;
+
+ if (sd_session_get_uid(*s, &uid) < 0 || uid == getuid())
+ continue;
+
+ if (sd_session_get_class(*s, &class) < 0 || !streq(class, "user"))
+ continue;
+
+ if (sd_session_get_type(*s, &type) < 0 || !STR_IN_SET(type, "x11", "wayland", "tty", "mir"))
+ continue;
+
+ sd_session_get_tty(*s, &tty);
+ sd_session_get_seat(*s, &seat);
+ sd_session_get_service(*s, &service);
+ user = uid_to_name(uid);
+
+ log_warning("User %s is logged in on %s.", strna(user), isempty(tty) ? (isempty(seat) ? strna(service) : seat) : tty);
+ c++;
+ }
+
+ if (c <= 0)
+ return 0;
+
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Please retry operation after closing inhibitors and logging out other users.\n"
+ "Alternatively, ignore inhibitors and users with 'systemctl %s -i'.",
+ action_table[a].verb);
+#else
+ return 0;
+#endif
+}
+
+int prepare_firmware_setup(void) {
+
+ if (!arg_firmware_setup)
+ return 0;
+
+#if ENABLE_LOGIND
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_FULL, &bus);
+ if (r < 0)
+ return r;
+
+ r = bus_call_method(bus, bus_login_mgr, "SetRebootToFirmwareSetup", &error, NULL, "b", true);
+ if (r < 0)
+ return log_error_errno(r, "Cannot indicate to EFI to boot into setup mode: %s", bus_error_message(&error, r));
+
+ return 0;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(ENOSYS),
+ "Booting into firmware setup not supported.");
+#endif
+}
+
+int prepare_boot_loader_menu(void) {
+
+ if (arg_boot_loader_menu == USEC_INFINITY)
+ return 0;
+
+#if ENABLE_LOGIND
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_FULL, &bus);
+ if (r < 0)
+ return r;
+
+ r = bus_call_method(bus, bus_login_mgr, "SetRebootToBootLoaderMenu", &error, NULL, "t", arg_boot_loader_menu);
+ if (r < 0)
+ return log_error_errno(r, "Cannot indicate to boot loader to enter boot loader entry menu: %s", bus_error_message(&error, r));
+
+ return 0;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(ENOSYS),
+ "Booting into boot loader menu not supported.");
+#endif
+}
+
+int prepare_boot_loader_entry(void) {
+
+ if (!arg_boot_loader_entry)
+ return 0;
+
+#if ENABLE_LOGIND
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_FULL, &bus);
+ if (r < 0)
+ return r;
+
+ r = bus_call_method(bus, bus_login_mgr, "SetRebootToBootLoaderEntry", &error, NULL, "s", arg_boot_loader_entry);
+ if (r < 0)
+ return log_error_errno(r, "Cannot set boot into loader entry '%s': %s", arg_boot_loader_entry, bus_error_message(&error, r));
+
+ return 0;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(ENOSYS),
+ "Booting into boot loader entry not supported.");
+#endif
+}
+
+int logind_schedule_shutdown(void) {
+
+#if ENABLE_LOGIND
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ char date[FORMAT_TIMESTAMP_MAX];
+ const char *action;
+ const char *log_action;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_FULL, &bus);
+ if (r < 0)
+ return r;
+
+ switch (arg_action) {
+ case ACTION_HALT:
+ action = "halt";
+ log_action = "Shutdown";
+ break;
+ case ACTION_POWEROFF:
+ action = "poweroff";
+ log_action = "Shutdown";
+ break;
+ case ACTION_KEXEC:
+ action = "kexec";
+ log_action = "Reboot via kexec";
+ break;
+ case ACTION_EXIT:
+ action = "exit";
+ log_action = "Shutdown";
+ break;
+ case ACTION_REBOOT:
+ default:
+ action = "reboot";
+ log_action = "Reboot";
+ break;
+ }
+
+ if (arg_dry_run)
+ action = strjoina("dry-", action);
+
+ (void) logind_set_wall_message();
+
+ r = bus_call_method(bus, bus_login_mgr, "ScheduleShutdown", &error, NULL, "st", action, arg_when);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to call ScheduleShutdown in logind, proceeding with immediate shutdown: %s", bus_error_message(&error, r));
+
+ if (!arg_quiet)
+ log_info("%s scheduled for %s, use 'shutdown -c' to cancel.", log_action, format_timestamp_style(date, sizeof(date), arg_when, arg_timestamp_style));
+ return 0;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(ENOSYS),
+ "Cannot schedule shutdown without logind support, proceeding with immediate shutdown.");
+#endif
+}
+
+int logind_cancel_shutdown(void) {
+#if ENABLE_LOGIND
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_FULL, &bus);
+ if (r < 0)
+ return r;
+
+ (void) logind_set_wall_message();
+
+ r = bus_call_method(bus, bus_login_mgr, "CancelScheduledShutdown", &error, NULL, NULL);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to talk to logind, shutdown hasn't been cancelled: %s", bus_error_message(&error, r));
+
+ return 0;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(ENOSYS),
+ "Not compiled with logind support, cannot cancel scheduled shutdowns.");
+#endif
+}
+
+int help_boot_loader_entry(void) {
+#if ENABLE_LOGIND
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ sd_bus *bus;
+ char **i;
+ int r;
+
+ r = acquire_bus(BUS_FULL, &bus);
+ if (r < 0)
+ return r;
+
+ r = bus_get_property_strv(bus, bus_login_mgr, "BootLoaderEntries", &error, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate boot loader entries: %s", bus_error_message(&error, r));
+
+ if (strv_isempty(l))
+ return log_error_errno(SYNTHETIC_ERRNO(ENODATA), "No boot loader entries discovered.");
+
+ STRV_FOREACH(i, l)
+ puts(*i);
+
+ return 0;
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(ENOSYS),
+ "Not compiled with logind support, cannot display boot loader entries.");
+#endif
+}
diff --git a/src/systemctl/systemctl-logind.h b/src/systemctl/systemctl-logind.h
new file mode 100644
index 0000000..144056b
--- /dev/null
+++ b/src/systemctl/systemctl-logind.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "systemctl.h"
+
+int logind_set_wall_message(void);
+
+int logind_reboot(enum action a);
+int logind_check_inhibitors(enum action a);
+
+int prepare_firmware_setup(void);
+int prepare_boot_loader_menu(void);
+int prepare_boot_loader_entry(void);
+
+int logind_schedule_shutdown(void);
+int logind_cancel_shutdown(void);
+
+int help_boot_loader_entry(void);
diff --git a/src/systemctl/systemctl-preset-all.c b/src/systemctl/systemctl-preset-all.c
new file mode 100644
index 0000000..b5eb199
--- /dev/null
+++ b/src/systemctl/systemctl-preset-all.c
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "systemctl-daemon-reload.h"
+#include "systemctl-preset-all.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+
+int preset_all(int argc, char *argv[], void *userdata) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ int r;
+
+ if (install_client_side()) {
+ r = unit_file_preset_all(arg_scope, unit_file_flags_from_args(), arg_root, arg_preset_mode, &changes, &n_changes);
+ unit_file_dump_changes(r, "preset", changes, n_changes, arg_quiet);
+
+ if (r > 0)
+ r = 0;
+ } else {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ sd_bus *bus;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ r = bus_call_method(
+ bus,
+ bus_systemd_mgr,
+ "PresetAllUnitFiles",
+ &error,
+ &reply,
+ "sbb",
+ unit_file_preset_mode_to_string(arg_preset_mode),
+ arg_runtime,
+ arg_force);
+ if (r < 0)
+ return log_error_errno(r, "Failed to preset all units: %s", bus_error_message(&error, r));
+
+ r = bus_deserialize_and_dump_unit_file_changes(reply, arg_quiet, &changes, &n_changes);
+ if (r < 0)
+ goto finish;
+
+ if (arg_no_reload) {
+ r = 0;
+ goto finish;
+ }
+
+ r = daemon_reload(argc, argv, userdata);
+ }
+
+finish:
+ unit_file_changes_free(changes, n_changes);
+
+ return r;
+}
diff --git a/src/systemctl/systemctl-preset-all.h b/src/systemctl/systemctl-preset-all.h
new file mode 100644
index 0000000..f4f6790
--- /dev/null
+++ b/src/systemctl/systemctl-preset-all.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int preset_all(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-reset-failed.c b/src/systemctl/systemctl-reset-failed.c
new file mode 100644
index 0000000..eee7586
--- /dev/null
+++ b/src/systemctl/systemctl-reset-failed.c
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "systemctl-reset-failed.h"
+#include "systemctl-trivial-method.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+
+int reset_failed(int argc, char *argv[], void *userdata) {
+ _cleanup_strv_free_ char **names = NULL;
+ sd_bus *bus;
+ char **name;
+ int r, q;
+
+ if (argc <= 1) /* Shortcut to trivial_method() if no argument is given */
+ return trivial_method(argc, argv, userdata);
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ r = expand_unit_names(bus, strv_skip(argv, 1), NULL, &names, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to expand names: %m");
+
+ STRV_FOREACH(name, names) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ q = bus_call_method(bus, bus_systemd_mgr, "ResetFailedUnit", &error, NULL, "s", *name);
+ if (q < 0) {
+ log_error_errno(q, "Failed to reset failed state of unit %s: %s", *name, bus_error_message(&error, q));
+ if (r == 0)
+ r = q;
+ }
+ }
+
+ return r;
+}
diff --git a/src/systemctl/systemctl-reset-failed.h b/src/systemctl/systemctl-reset-failed.h
new file mode 100644
index 0000000..956bb46
--- /dev/null
+++ b/src/systemctl/systemctl-reset-failed.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int reset_failed(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-service-watchdogs.c b/src/systemctl/systemctl-service-watchdogs.c
new file mode 100644
index 0000000..e579851
--- /dev/null
+++ b/src/systemctl/systemctl-service-watchdogs.c
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "parse-util.h"
+#include "systemctl-service-watchdogs.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+
+int service_watchdogs(int argc, char *argv[], void *userdata) {
+ sd_bus *bus;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int b, r;
+
+ assert(argv);
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ if (argc == 1) {
+ /* get ServiceWatchdogs */
+ r = bus_get_property_trivial(bus, bus_systemd_mgr, "ServiceWatchdogs", &error, 'b', &b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get service-watchdog state: %s", bus_error_message(&error, r));
+
+ printf("%s\n", yes_no(!!b));
+
+ } else {
+ /* set ServiceWatchdogs */
+ assert(argc == 2);
+
+ b = parse_boolean(argv[1]);
+ if (b < 0)
+ return log_error_errno(b, "Failed to parse service-watchdogs argument: %m");
+
+ r = bus_set_property(bus, bus_systemd_mgr, "ServiceWatchdogs", &error, "b", b);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set service-watchdog state: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
diff --git a/src/systemctl/systemctl-service-watchdogs.h b/src/systemctl/systemctl-service-watchdogs.h
new file mode 100644
index 0000000..11a53db
--- /dev/null
+++ b/src/systemctl/systemctl-service-watchdogs.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int service_watchdogs(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-set-default.c b/src/systemctl/systemctl-set-default.c
new file mode 100644
index 0000000..05c1894
--- /dev/null
+++ b/src/systemctl/systemctl-set-default.c
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "proc-cmdline.h"
+#include "systemctl-daemon-reload.h"
+#include "systemctl-set-default.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ char **ret = data;
+
+ if (streq(key, "systemd.unit")) {
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+ if (!unit_name_is_valid(value, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE)) {
+ log_warning("Unit name specified on %s= is not valid, ignoring: %s", key, value);
+ return 0;
+ }
+
+ return free_and_strdup_warn(ret, key);
+
+ } else if (!value) {
+ if (runlevel_to_target(key))
+ return free_and_strdup_warn(ret, key);
+ }
+
+ return 0;
+}
+
+static void emit_cmdline_warning(void) {
+ if (arg_quiet || arg_root)
+ /* don't bother checking the commandline if we're operating on a container */
+ return;
+
+ _cleanup_free_ char *override = NULL;
+ int r;
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, &override, 0);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse kernel command line, ignoring: %m");
+ if (override)
+ log_notice("Note: found \"%s\" on the kernel commandline, which overrides the default unit.",
+ override);
+}
+
+static int determine_default(char **ret_name) {
+ int r;
+
+ if (install_client_side()) {
+ r = unit_file_get_default(arg_scope, arg_root, ret_name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get default target: %m");
+ return 0;
+
+ } else {
+ sd_bus *bus;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *name;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = bus_call_method(bus, bus_systemd_mgr, "GetDefaultTarget", &error, &reply, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get default target: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "s", &name);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return free_and_strdup_warn(ret_name, name);
+ }
+}
+
+int get_default(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *name = NULL;
+ int r;
+
+ r = determine_default(&name);
+ if (r < 0)
+ return r;
+
+ printf("%s\n", name);
+
+ emit_cmdline_warning();
+
+ return 0;
+}
+
+int set_default(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *unit = NULL;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ int r;
+
+ assert(argc >= 2);
+ assert(argv);
+
+ r = unit_name_mangle_with_suffix(argv[1], "set-default",
+ arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN,
+ ".target", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle unit name: %m");
+
+ if (install_client_side()) {
+ r = unit_file_set_default(arg_scope, UNIT_FILE_FORCE, arg_root, unit, &changes, &n_changes);
+ unit_file_dump_changes(r, "set default", changes, n_changes, arg_quiet);
+
+ if (r > 0)
+ r = 0;
+ } else {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ sd_bus *bus;
+
+ polkit_agent_open_maybe();
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = bus_call_method(bus, bus_systemd_mgr, "SetDefaultTarget", &error, &reply, "sb", unit, 1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set default target: %s", bus_error_message(&error, r));
+
+ r = bus_deserialize_and_dump_unit_file_changes(reply, arg_quiet, &changes, &n_changes);
+ if (r < 0)
+ goto finish;
+
+ /* Try to reload if enabled */
+ if (!arg_no_reload)
+ r = daemon_reload(argc, argv, userdata);
+ else
+ r = 0;
+ }
+
+ emit_cmdline_warning();
+
+ if (!arg_quiet) {
+ _cleanup_free_ char *final = NULL;
+
+ r = determine_default(&final);
+ if (r < 0)
+ return r;
+
+ if (!streq(final, unit))
+ log_notice("Note: \"%s\" is the default unit (possibly a runtime override).", final);
+ }
+
+finish:
+ unit_file_changes_free(changes, n_changes);
+
+ return r;
+}
diff --git a/src/systemctl/systemctl-set-default.h b/src/systemctl/systemctl-set-default.h
new file mode 100644
index 0000000..839b2c9
--- /dev/null
+++ b/src/systemctl/systemctl-set-default.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int get_default(int argc, char *argv[], void *userdata);
+int set_default(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-set-environment.c b/src/systemctl/systemctl-set-environment.c
new file mode 100644
index 0000000..b37bd6f
--- /dev/null
+++ b/src/systemctl/systemctl-set-environment.c
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "env-util.h"
+#include "escape.h"
+#include "systemctl-set-environment.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+
+static int print_variable(const char *s) {
+ const char *sep;
+ _cleanup_free_ char *esc = NULL;
+
+ sep = strchr(s, '=');
+ if (!sep)
+ return log_error_errno(SYNTHETIC_ERRNO(EUCLEAN),
+ "Invalid environment block");
+
+ esc = shell_maybe_quote(sep + 1, ESCAPE_POSIX);
+ if (!esc)
+ return log_oom();
+
+ printf("%.*s=%s\n", (int)(sep-s), s, esc);
+ return 0;
+}
+
+int show_environment(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *text;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = bus_get_property(bus, bus_systemd_mgr, "Environment", &error, &reply, "as");
+ if (r < 0)
+ return log_error_errno(r, "Failed to get environment: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "s");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read_basic(reply, SD_BUS_TYPE_STRING, &text)) > 0) {
+ r = print_variable(text);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 0;
+}
+
+static void invalid_callback(const char *p, void *userdata) {
+ _cleanup_free_ char *t = cescape(p);
+
+ log_debug("Ignoring invalid environment assignment \"%s\".", strnull(t));
+}
+
+int set_environment(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ const char *method;
+ sd_bus *bus;
+ int r;
+
+ assert(argc > 1);
+ assert(argv);
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ method = streq(argv[0], "set-environment")
+ ? "SetEnvironment"
+ : "UnsetEnvironment";
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, method);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, strv_skip(argv, 1));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set environment: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+int import_environment(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, "SetEnvironment");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (argc < 2) {
+ _cleanup_strv_free_ char **copy = NULL;
+
+ copy = strv_copy(environ);
+ if (!copy)
+ return log_oom();
+
+ strv_env_clean_with_callback(copy, invalid_callback, NULL);
+
+ char **e;
+ STRV_FOREACH(e, copy)
+ if (string_has_cc(*e, NULL))
+ log_notice("Environment variable $%.*s contains control characters, importing anyway.",
+ (int) strcspn(*e, "="), *e);
+
+ r = sd_bus_message_append_strv(m, copy);
+
+ } else {
+ char **a, **b;
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ STRV_FOREACH(a, strv_skip(argv, 1)) {
+
+ if (!env_name_is_valid(*a))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not a valid environment variable name: %s", *a);
+
+ bool found = false;
+ STRV_FOREACH(b, environ) {
+ const char *eq;
+
+ eq = startswith(*b, *a);
+ if (eq && *eq == '=') {
+ if (string_has_cc(eq + 1, NULL))
+ log_notice("Environment variable $%.*s contains control characters, importing anyway.",
+ (int) (eq - *b), *b);
+
+ r = sd_bus_message_append(m, "s", *b);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ log_notice("Environment variable $%s not set, ignoring.", *a);
+ }
+
+ r = sd_bus_message_close_container(m);
+ }
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to import environment: %s", bus_error_message(&error, r));
+
+ return 0;
+}
diff --git a/src/systemctl/systemctl-set-environment.h b/src/systemctl/systemctl-set-environment.h
new file mode 100644
index 0000000..bd05e31
--- /dev/null
+++ b/src/systemctl/systemctl-set-environment.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int show_environment(int argc, char *argv[], void *userdata);
+int set_environment(int argc, char *argv[], void *userdata);
+int import_environment(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-set-property.c b/src/systemctl/systemctl-set-property.c
new file mode 100644
index 0000000..183a7b6
--- /dev/null
+++ b/src/systemctl/systemctl-set-property.c
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "systemctl-set-property.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+
+int set_property(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *n = NULL;
+ UnitType t;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, "SetUnitProperties");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = unit_name_mangle(argv[1], arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN, &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle unit name: %m");
+
+ t = unit_name_to_type(n);
+ if (t < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid unit type: %s", n);
+
+ r = sd_bus_message_append(m, "sb", n, arg_runtime);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_ARRAY, "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = bus_append_unit_property_assignment_many(m, t, strv_skip(argv, 2));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set unit properties on %s: %s", n, bus_error_message(&error, r));
+
+ return 0;
+}
diff --git a/src/systemctl/systemctl-set-property.h b/src/systemctl/systemctl-set-property.h
new file mode 100644
index 0000000..74990e7
--- /dev/null
+++ b/src/systemctl/systemctl-set-property.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int set_property(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-show.c b/src/systemctl/systemctl-show.c
new file mode 100644
index 0000000..d5efecb
--- /dev/null
+++ b/src/systemctl/systemctl-show.c
@@ -0,0 +1,2135 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/mount.h>
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "bus-map-properties.h"
+#include "bus-print-properties.h"
+#include "bus-unit-procs.h"
+#include "cgroup-show.h"
+#include "cpu-set-util.h"
+#include "errno-util.h"
+#include "exec-util.h"
+#include "exit-status.h"
+#include "format-util.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
+#include "journal-file.h"
+#include "list.h"
+#include "locale-util.h"
+#include "memory-util.h"
+#include "numa-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "sort-util.h"
+#include "string-table.h"
+#include "systemctl-list-machines.h"
+#include "systemctl-list-units.h"
+#include "systemctl-show.h"
+#include "systemctl-sysv-compat.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+#include "terminal-util.h"
+#include "utf8.h"
+
+static OutputFlags get_output_flags(void) {
+ return
+ arg_all * OUTPUT_SHOW_ALL |
+ (arg_full || !on_tty() || pager_have()) * OUTPUT_FULL_WIDTH |
+ colors_enabled() * OUTPUT_COLOR |
+ !arg_quiet * OUTPUT_WARN_CUTOFF;
+}
+
+typedef struct ExecStatusInfo {
+ char *name;
+
+ char *path;
+ char **argv;
+
+ bool ignore;
+
+ usec_t start_timestamp;
+ usec_t exit_timestamp;
+ pid_t pid;
+ int code;
+ int status;
+
+ ExecCommandFlags flags;
+
+ LIST_FIELDS(struct ExecStatusInfo, exec);
+} ExecStatusInfo;
+
+static void exec_status_info_free(ExecStatusInfo *i) {
+ assert(i);
+
+ free(i->name);
+ free(i->path);
+ strv_free(i->argv);
+ free(i);
+}
+
+static int exec_status_info_deserialize(sd_bus_message *m, ExecStatusInfo *i, bool is_ex_prop) {
+ _cleanup_strv_free_ char **ex_opts = NULL;
+ uint64_t start_timestamp, exit_timestamp, start_timestamp_monotonic, exit_timestamp_monotonic;
+ const char *path;
+ uint32_t pid;
+ int32_t code, status;
+ int ignore, r;
+
+ assert(m);
+ assert(i);
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_STRUCT, is_ex_prop ? "sasasttttuii" : "sasbttttuii");
+ if (r < 0)
+ return bus_log_parse_error(r);
+ else if (r == 0)
+ return 0;
+
+ r = sd_bus_message_read(m, "s", &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ i->path = strdup(path);
+ if (!i->path)
+ return log_oom();
+
+ r = sd_bus_message_read_strv(m, &i->argv);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = is_ex_prop ? sd_bus_message_read_strv(m, &ex_opts) : sd_bus_message_read(m, "b", &ignore);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read(m,
+ "ttttuii",
+ &start_timestamp, &start_timestamp_monotonic,
+ &exit_timestamp, &exit_timestamp_monotonic,
+ &pid,
+ &code, &status);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (is_ex_prop) {
+ r = exec_command_flags_from_strv(ex_opts, &i->flags);
+ if (r < 0)
+ return log_error_errno(r, "Failed to convert strv to ExecCommandFlags: %m");
+
+ i->ignore = FLAGS_SET(i->flags, EXEC_COMMAND_IGNORE_FAILURE);
+ } else
+ i->ignore = ignore;
+
+ i->start_timestamp = (usec_t) start_timestamp;
+ i->exit_timestamp = (usec_t) exit_timestamp;
+ i->pid = (pid_t) pid;
+ i->code = code;
+ i->status = status;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+}
+
+typedef struct UnitCondition {
+ char *name;
+ char *param;
+ bool trigger;
+ bool negate;
+ int tristate;
+
+ LIST_FIELDS(struct UnitCondition, conditions);
+} UnitCondition;
+
+static void unit_condition_free(UnitCondition *c) {
+ if (!c)
+ return;
+
+ free(c->name);
+ free(c->param);
+ free(c);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(UnitCondition*, unit_condition_free);
+
+typedef struct UnitStatusInfo {
+ const char *id;
+ const char *load_state;
+ const char *active_state;
+ const char *freezer_state;
+ const char *sub_state;
+ const char *unit_file_state;
+ const char *unit_file_preset;
+
+ const char *description;
+ const char *following;
+
+ char **documentation;
+
+ const char *fragment_path;
+ const char *source_path;
+ const char *control_group;
+
+ char **dropin_paths;
+
+ char **triggered_by;
+ char **triggers;
+
+ const char *load_error;
+ const char *result;
+
+ usec_t inactive_exit_timestamp;
+ usec_t inactive_exit_timestamp_monotonic;
+ usec_t active_enter_timestamp;
+ usec_t active_exit_timestamp;
+ usec_t inactive_enter_timestamp;
+
+ bool need_daemon_reload;
+ bool transient;
+
+ /* Service */
+ pid_t main_pid;
+ pid_t control_pid;
+ const char *status_text;
+ const char *pid_file;
+ bool running:1;
+ int status_errno;
+
+ usec_t start_timestamp;
+ usec_t exit_timestamp;
+
+ int exit_code, exit_status;
+
+ const char *log_namespace;
+
+ usec_t condition_timestamp;
+ bool condition_result;
+ LIST_HEAD(UnitCondition, conditions);
+
+ usec_t assert_timestamp;
+ bool assert_result;
+ bool failed_assert_trigger;
+ bool failed_assert_negate;
+ const char *failed_assert;
+ const char *failed_assert_parameter;
+ usec_t next_elapse_real;
+ usec_t next_elapse_monotonic;
+
+ /* Socket */
+ unsigned n_accepted;
+ unsigned n_connections;
+ unsigned n_refused;
+ bool accept;
+
+ /* Pairs of type, path */
+ char **listen;
+
+ /* Device */
+ const char *sysfs_path;
+
+ /* Mount, Automount */
+ const char *where;
+
+ /* Swap */
+ const char *what;
+
+ /* CGroup */
+ uint64_t memory_current;
+ uint64_t memory_min;
+ uint64_t memory_low;
+ uint64_t memory_high;
+ uint64_t memory_max;
+ uint64_t memory_swap_max;
+ uint64_t memory_limit;
+ uint64_t cpu_usage_nsec;
+ uint64_t tasks_current;
+ uint64_t tasks_max;
+ uint64_t ip_ingress_bytes;
+ uint64_t ip_egress_bytes;
+ uint64_t io_read_bytes;
+ uint64_t io_write_bytes;
+
+ uint64_t default_memory_min;
+ uint64_t default_memory_low;
+
+ LIST_HEAD(ExecStatusInfo, exec);
+} UnitStatusInfo;
+
+static void unit_status_info_free(UnitStatusInfo *info) {
+ ExecStatusInfo *p;
+ UnitCondition *c;
+
+ strv_free(info->documentation);
+ strv_free(info->dropin_paths);
+ strv_free(info->triggered_by);
+ strv_free(info->triggers);
+ strv_free(info->listen);
+
+ while ((c = info->conditions)) {
+ LIST_REMOVE(conditions, info->conditions, c);
+ unit_condition_free(c);
+ }
+
+ while ((p = info->exec)) {
+ LIST_REMOVE(exec, info->exec, p);
+ exec_status_info_free(p);
+ }
+}
+
+static void format_active_state(const char *active_state, const char **active_on, const char **active_off) {
+ if (streq_ptr(active_state, "failed")) {
+ *active_on = ansi_highlight_red();
+ *active_off = ansi_normal();
+ } else if (STRPTR_IN_SET(active_state, "active", "reloading")) {
+ *active_on = ansi_highlight_green();
+ *active_off = ansi_normal();
+ } else
+ *active_on = *active_off = "";
+}
+
+static void print_status_info(
+ sd_bus *bus,
+ UnitStatusInfo *i,
+ bool *ellipsized) {
+
+ char since1[FORMAT_TIMESTAMP_RELATIVE_MAX], since2[FORMAT_TIMESTAMP_MAX];
+ const char *s1, *s2, *active_on, *active_off, *on, *off, *ss, *fs;
+ _cleanup_free_ char *formatted_path = NULL;
+ ExecStatusInfo *p;
+ usec_t timestamp;
+ const char *path;
+ char **t, **t2;
+ int r;
+
+ assert(i);
+
+ /* This shows pretty information about a unit. See print_property() for a low-level property
+ * printer */
+
+ format_active_state(i->active_state, &active_on, &active_off);
+
+ printf("%s%s%s %s", active_on, special_glyph(SPECIAL_GLYPH_BLACK_CIRCLE), active_off, strna(i->id));
+
+ if (i->description && !streq_ptr(i->id, i->description))
+ printf(" - %s", i->description);
+
+ printf("\n");
+
+ if (i->following)
+ printf(" Follow: unit currently follows state of %s\n", i->following);
+
+ if (STRPTR_IN_SET(i->load_state, "error", "not-found", "bad-setting")) {
+ on = ansi_highlight_red();
+ off = ansi_normal();
+ } else
+ on = off = "";
+
+ path = i->source_path ?: i->fragment_path;
+ if (path && terminal_urlify_path(path, NULL, &formatted_path) >= 0)
+ path = formatted_path;
+
+ if (!isempty(i->load_error))
+ printf(" Loaded: %s%s%s (Reason: %s)\n",
+ on, strna(i->load_state), off, i->load_error);
+ else if (path && !isempty(i->unit_file_state)) {
+ bool show_preset = !isempty(i->unit_file_preset) &&
+ show_preset_for_state(unit_file_state_from_string(i->unit_file_state));
+
+ printf(" Loaded: %s%s%s (%s; %s%s%s)\n",
+ on, strna(i->load_state), off,
+ path,
+ i->unit_file_state,
+ show_preset ? "; vendor preset: " : "",
+ show_preset ? i->unit_file_preset : "");
+
+ } else if (path)
+ printf(" Loaded: %s%s%s (%s)\n",
+ on, strna(i->load_state), off, path);
+ else
+ printf(" Loaded: %s%s%s\n",
+ on, strna(i->load_state), off);
+
+ if (i->transient)
+ printf(" Transient: yes\n");
+
+ if (!strv_isempty(i->dropin_paths)) {
+ _cleanup_free_ char *dir = NULL;
+ bool last = false;
+ char ** dropin;
+
+ STRV_FOREACH(dropin, i->dropin_paths) {
+ _cleanup_free_ char *dropin_formatted = NULL;
+ const char *df;
+
+ if (!dir || last) {
+ printf(dir ? " " :
+ " Drop-In: ");
+
+ dir = mfree(dir);
+
+ dir = dirname_malloc(*dropin);
+ if (!dir) {
+ log_oom();
+ return;
+ }
+
+ printf("%s\n"
+ " %s", dir,
+ special_glyph(SPECIAL_GLYPH_TREE_RIGHT));
+ }
+
+ last = ! (*(dropin + 1) && startswith(*(dropin + 1), dir));
+
+ if (terminal_urlify_path(*dropin, basename(*dropin), &dropin_formatted) >= 0)
+ df = dropin_formatted;
+ else
+ df = *dropin;
+
+ printf("%s%s", df, last ? "\n" : ", ");
+ }
+ }
+
+ ss = streq_ptr(i->active_state, i->sub_state) ? NULL : i->sub_state;
+ if (ss)
+ printf(" Active: %s%s (%s)%s",
+ active_on, strna(i->active_state), ss, active_off);
+ else
+ printf(" Active: %s%s%s",
+ active_on, strna(i->active_state), active_off);
+
+ fs = !isempty(i->freezer_state) && !streq(i->freezer_state, "running") ? i->freezer_state : NULL;
+ if (fs)
+ printf(" %s(%s)%s", ansi_highlight_yellow(), fs, ansi_normal());
+
+ if (!isempty(i->result) && !streq(i->result, "success"))
+ printf(" (Result: %s)", i->result);
+
+ timestamp = STRPTR_IN_SET(i->active_state, "active", "reloading") ? i->active_enter_timestamp :
+ STRPTR_IN_SET(i->active_state, "inactive", "failed") ? i->inactive_enter_timestamp :
+ STRPTR_IN_SET(i->active_state, "activating") ? i->inactive_exit_timestamp :
+ i->active_exit_timestamp;
+
+ s1 = format_timestamp_relative(since1, sizeof(since1), timestamp);
+ s2 = format_timestamp_style(since2, sizeof(since2), timestamp, arg_timestamp_style);
+
+ if (s1)
+ printf(" since %s; %s\n", s2, s1);
+ else if (s2)
+ printf(" since %s\n", s2);
+ else
+ printf("\n");
+
+ STRV_FOREACH(t, i->triggered_by) {
+ UnitActiveState state = _UNIT_ACTIVE_STATE_INVALID;
+
+ (void) get_state_one_unit(bus, *t, &state);
+ format_active_state(unit_active_state_to_string(state), &on, &off);
+
+ printf("%s %s%s%s %s\n",
+ t == i->triggered_by ? "TriggeredBy:" : " ",
+ on, special_glyph(SPECIAL_GLYPH_BLACK_CIRCLE), off,
+ *t);
+ }
+
+ if (endswith(i->id, ".timer")) {
+ char tstamp1[FORMAT_TIMESTAMP_RELATIVE_MAX],
+ tstamp2[FORMAT_TIMESTAMP_MAX];
+ const char *next_rel_time, *next_time;
+ dual_timestamp nw, next = {i->next_elapse_real,
+ i->next_elapse_monotonic};
+ usec_t next_elapse;
+
+ printf(" Trigger: ");
+
+ dual_timestamp_get(&nw);
+ next_elapse = calc_next_elapse(&nw, &next);
+ next_rel_time = format_timestamp_relative(tstamp1, sizeof tstamp1, next_elapse);
+ next_time = format_timestamp_style(tstamp2, sizeof tstamp2, next_elapse, arg_timestamp_style);
+
+ if (next_time && next_rel_time)
+ printf("%s; %s\n", next_time, next_rel_time);
+ else
+ printf("n/a\n");
+ }
+
+ STRV_FOREACH(t, i->triggers) {
+ UnitActiveState state = _UNIT_ACTIVE_STATE_INVALID;
+
+ (void) get_state_one_unit(bus, *t, &state);
+ format_active_state(unit_active_state_to_string(state), &on, &off);
+
+ printf("%s %s%s%s %s\n",
+ t == i->triggers ? " Triggers:" : " ",
+ on, special_glyph(SPECIAL_GLYPH_BLACK_CIRCLE), off,
+ *t);
+ }
+
+ if (!i->condition_result && i->condition_timestamp > 0) {
+ UnitCondition *c;
+ int n = 0;
+
+ s1 = format_timestamp_relative(since1, sizeof(since1), i->condition_timestamp);
+ s2 = format_timestamp_style(since2, sizeof(since2), i->condition_timestamp, arg_timestamp_style);
+
+ printf(" Condition: start %scondition failed%s at %s%s%s\n",
+ ansi_highlight_yellow(), ansi_normal(),
+ s2, s1 ? "; " : "", strempty(s1));
+
+ LIST_FOREACH(conditions, c, i->conditions)
+ if (c->tristate < 0)
+ n++;
+
+ LIST_FOREACH(conditions, c, i->conditions)
+ if (c->tristate < 0)
+ printf(" %s %s=%s%s%s was not met\n",
+ --n ? special_glyph(SPECIAL_GLYPH_TREE_BRANCH) : special_glyph(SPECIAL_GLYPH_TREE_RIGHT),
+ c->name,
+ c->trigger ? "|" : "",
+ c->negate ? "!" : "",
+ c->param);
+ }
+
+ if (!i->assert_result && i->assert_timestamp > 0) {
+ s1 = format_timestamp_relative(since1, sizeof(since1), i->assert_timestamp);
+ s2 = format_timestamp_style(since2, sizeof(since2), i->assert_timestamp, arg_timestamp_style);
+
+ printf(" Assert: start %sassertion failed%s at %s%s%s\n",
+ ansi_highlight_red(), ansi_normal(),
+ s2, s1 ? "; " : "", strempty(s1));
+ if (i->failed_assert_trigger)
+ printf(" none of the trigger assertions were met\n");
+ else if (i->failed_assert)
+ printf(" %s=%s%s was not met\n",
+ i->failed_assert,
+ i->failed_assert_negate ? "!" : "",
+ i->failed_assert_parameter);
+ }
+
+ if (i->sysfs_path)
+ printf(" Device: %s\n", i->sysfs_path);
+ if (i->where)
+ printf(" Where: %s\n", i->where);
+ if (i->what)
+ printf(" What: %s\n", i->what);
+
+ STRV_FOREACH(t, i->documentation) {
+ _cleanup_free_ char *formatted = NULL;
+ const char *q;
+
+ if (terminal_urlify(*t, NULL, &formatted) >= 0)
+ q = formatted;
+ else
+ q = *t;
+
+ printf(" %*s %s\n", 9, t == i->documentation ? "Docs:" : "", q);
+ }
+
+ STRV_FOREACH_PAIR(t, t2, i->listen)
+ printf(" %*s %s (%s)\n", 9, t == i->listen ? "Listen:" : "", *t2, *t);
+
+ if (i->accept) {
+ printf(" Accepted: %u; Connected: %u;", i->n_accepted, i->n_connections);
+ if (i->n_refused)
+ printf(" Refused: %u", i->n_refused);
+ printf("\n");
+ }
+
+ LIST_FOREACH(exec, p, i->exec) {
+ _cleanup_free_ char *argv = NULL;
+ bool good;
+
+ /* Only show exited processes here */
+ if (p->code == 0)
+ continue;
+
+ /* Don't print ExecXYZEx= properties here since it will appear as a
+ * duplicate of the non-Ex= variant. */
+ if (endswith(p->name, "Ex"))
+ continue;
+
+ argv = strv_join(p->argv, " ");
+ printf(" Process: "PID_FMT" %s=%s ", p->pid, p->name, strna(argv));
+
+ good = is_clean_exit(p->code, p->status, EXIT_CLEAN_DAEMON, NULL);
+ if (!good) {
+ on = ansi_highlight_red();
+ off = ansi_normal();
+ } else
+ on = off = "";
+
+ printf("%s(code=%s, ", on, sigchld_code_to_string(p->code));
+
+ if (p->code == CLD_EXITED) {
+ const char *c;
+
+ printf("status=%i", p->status);
+
+ c = exit_status_to_string(p->status, EXIT_STATUS_LIBC | EXIT_STATUS_SYSTEMD);
+ if (c)
+ printf("/%s", c);
+
+ } else
+ printf("signal=%s", signal_to_string(p->status));
+
+ printf(")%s\n", off);
+
+ if (i->main_pid == p->pid &&
+ i->start_timestamp == p->start_timestamp &&
+ i->exit_timestamp == p->start_timestamp)
+ /* Let's not show this twice */
+ i->main_pid = 0;
+
+ if (p->pid == i->control_pid)
+ i->control_pid = 0;
+ }
+
+ if (i->main_pid > 0 || i->control_pid > 0) {
+ if (i->main_pid > 0) {
+ printf(" Main PID: "PID_FMT, i->main_pid);
+
+ if (i->running) {
+
+ if (arg_transport == BUS_TRANSPORT_LOCAL) {
+ _cleanup_free_ char *comm = NULL;
+
+ (void) get_process_comm(i->main_pid, &comm);
+ if (comm)
+ printf(" (%s)", comm);
+ }
+
+ } else if (i->exit_code > 0) {
+ printf(" (code=%s, ", sigchld_code_to_string(i->exit_code));
+
+ if (i->exit_code == CLD_EXITED) {
+ const char *c;
+
+ printf("status=%i", i->exit_status);
+
+ c = exit_status_to_string(i->exit_status,
+ EXIT_STATUS_LIBC | EXIT_STATUS_SYSTEMD);
+ if (c)
+ printf("/%s", c);
+
+ } else
+ printf("signal=%s", signal_to_string(i->exit_status));
+ printf(")");
+ }
+ }
+
+ if (i->control_pid > 0) {
+ _cleanup_free_ char *c = NULL;
+
+ if (i->main_pid > 0)
+ fputs("; Control PID: ", stdout);
+ else
+ fputs("Cntrl PID: ", stdout); /* if first in column, abbreviated so it fits alignment */
+
+ printf(PID_FMT, i->control_pid);
+
+ if (arg_transport == BUS_TRANSPORT_LOCAL) {
+ (void) get_process_comm(i->control_pid, &c);
+ if (c)
+ printf(" (%s)", c);
+ }
+ }
+
+ printf("\n");
+ }
+
+ if (i->status_text)
+ printf(" Status: \"%s\"\n", i->status_text);
+ if (i->status_errno > 0)
+ printf(" Error: %i (%s)\n", i->status_errno, strerror_safe(i->status_errno));
+
+ if (i->ip_ingress_bytes != (uint64_t) -1 && i->ip_egress_bytes != (uint64_t) -1) {
+ char buf_in[FORMAT_BYTES_MAX], buf_out[FORMAT_BYTES_MAX];
+
+ printf(" IP: %s in, %s out\n",
+ format_bytes(buf_in, sizeof(buf_in), i->ip_ingress_bytes),
+ format_bytes(buf_out, sizeof(buf_out), i->ip_egress_bytes));
+ }
+
+ if (i->io_read_bytes != UINT64_MAX && i->io_write_bytes != UINT64_MAX) {
+ char buf_in[FORMAT_BYTES_MAX], buf_out[FORMAT_BYTES_MAX];
+
+ printf(" IO: %s read, %s written\n",
+ format_bytes(buf_in, sizeof(buf_in), i->io_read_bytes),
+ format_bytes(buf_out, sizeof(buf_out), i->io_write_bytes));
+ }
+
+ if (i->tasks_current != (uint64_t) -1) {
+ printf(" Tasks: %" PRIu64, i->tasks_current);
+
+ if (i->tasks_max != (uint64_t) -1)
+ printf(" (limit: %" PRIu64 ")\n", i->tasks_max);
+ else
+ printf("\n");
+ }
+
+ if (i->memory_current != (uint64_t) -1) {
+ char buf[FORMAT_BYTES_MAX];
+
+ printf(" Memory: %s", format_bytes(buf, sizeof(buf), i->memory_current));
+
+ if (i->memory_min > 0 || i->memory_low > 0 ||
+ i->memory_high != CGROUP_LIMIT_MAX || i->memory_max != CGROUP_LIMIT_MAX ||
+ i->memory_swap_max != CGROUP_LIMIT_MAX ||
+ i->memory_limit != CGROUP_LIMIT_MAX) {
+ const char *prefix = "";
+
+ printf(" (");
+ if (i->memory_min > 0) {
+ printf("%smin: %s", prefix, format_bytes_cgroup_protection(buf, sizeof(buf), i->memory_min));
+ prefix = " ";
+ }
+ if (i->memory_low > 0) {
+ printf("%slow: %s", prefix, format_bytes_cgroup_protection(buf, sizeof(buf), i->memory_low));
+ prefix = " ";
+ }
+ if (i->memory_high != CGROUP_LIMIT_MAX) {
+ printf("%shigh: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_high));
+ prefix = " ";
+ }
+ if (i->memory_max != CGROUP_LIMIT_MAX) {
+ printf("%smax: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_max));
+ prefix = " ";
+ }
+ if (i->memory_swap_max != CGROUP_LIMIT_MAX) {
+ printf("%sswap max: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_swap_max));
+ prefix = " ";
+ }
+ if (i->memory_limit != CGROUP_LIMIT_MAX) {
+ printf("%slimit: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_limit));
+ prefix = " ";
+ }
+ printf(")");
+ }
+ printf("\n");
+ }
+
+ if (i->cpu_usage_nsec != (uint64_t) -1) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ printf(" CPU: %s\n", format_timespan(buf, sizeof(buf), i->cpu_usage_nsec / NSEC_PER_USEC, USEC_PER_MSEC));
+ }
+
+ if (i->control_group) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ static const char prefix[] = " ";
+ unsigned c;
+
+ printf(" CGroup: %s\n", i->control_group);
+
+ c = columns();
+ if (c > sizeof(prefix) - 1)
+ c -= sizeof(prefix) - 1;
+ else
+ c = 0;
+
+ r = unit_show_processes(bus, i->id, i->control_group, prefix, c, get_output_flags(), &error);
+ if (r == -EBADR) {
+ unsigned k = 0;
+ pid_t extra[2];
+
+ /* Fallback for older systemd versions where the GetUnitProcesses() call is not yet available */
+
+ if (i->main_pid > 0)
+ extra[k++] = i->main_pid;
+
+ if (i->control_pid > 0)
+ extra[k++] = i->control_pid;
+
+ show_cgroup_and_extra(SYSTEMD_CGROUP_CONTROLLER, i->control_group, prefix, c, extra, k, get_output_flags());
+ } else if (r < 0)
+ log_warning_errno(r, "Failed to dump process list for '%s', ignoring: %s",
+ i->id, bus_error_message(&error, r));
+ }
+
+ if (i->id && arg_transport == BUS_TRANSPORT_LOCAL)
+ show_journal_by_unit(
+ stdout,
+ i->id,
+ i->log_namespace,
+ arg_output,
+ 0,
+ i->inactive_exit_timestamp_monotonic,
+ arg_lines,
+ getuid(),
+ get_output_flags() | OUTPUT_BEGIN_NEWLINE,
+ SD_JOURNAL_LOCAL_ONLY,
+ arg_scope == UNIT_FILE_SYSTEM,
+ ellipsized);
+
+ if (i->need_daemon_reload)
+ warn_unit_file_changed(i->id);
+}
+
+static void show_unit_help(UnitStatusInfo *i) {
+ char **p;
+
+ assert(i);
+
+ if (!i->documentation) {
+ log_info("Documentation for %s not known.", i->id);
+ return;
+ }
+
+ STRV_FOREACH(p, i->documentation)
+ if (startswith(*p, "man:"))
+ show_man_page(*p + 4, false);
+ else
+ log_info("Can't show: %s", *p);
+}
+
+static int map_main_pid(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ UnitStatusInfo *i = userdata;
+ uint32_t u;
+ int r;
+
+ r = sd_bus_message_read(m, "u", &u);
+ if (r < 0)
+ return r;
+
+ i->main_pid = (pid_t) u;
+ i->running = u > 0;
+
+ return 0;
+}
+
+static int map_load_error(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ const char *message, **p = userdata;
+ int r;
+
+ r = sd_bus_message_read(m, "(ss)", NULL, &message);
+ if (r < 0)
+ return r;
+
+ if (!isempty(message))
+ *p = message;
+
+ return 0;
+}
+
+static int map_listen(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ const char *type, *path;
+ char ***p = userdata;
+ int r;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(ss)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(m, "(ss)", &type, &path)) > 0) {
+
+ r = strv_extend(p, type);
+ if (r < 0)
+ return r;
+
+ r = strv_extend(p, path);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int map_conditions(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ UnitStatusInfo *i = userdata;
+ const char *cond, *param;
+ int trigger, negate;
+ int32_t state;
+ int r;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(sbbsi)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(m, "(sbbsi)", &cond, &trigger, &negate, &param, &state)) > 0) {
+ _cleanup_(unit_condition_freep) UnitCondition *c = NULL;
+
+ c = new(UnitCondition, 1);
+ if (!c)
+ return -ENOMEM;
+
+ *c = (UnitCondition) {
+ .name = strdup(cond),
+ .param = strdup(param),
+ .trigger = trigger,
+ .negate = negate,
+ .tristate = state,
+ };
+
+ if (!c->name || !c->param)
+ return -ENOMEM;
+
+ LIST_PREPEND(conditions, i->conditions, TAKE_PTR(c));
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int map_asserts(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ UnitStatusInfo *i = userdata;
+ const char *cond, *param;
+ int trigger, negate;
+ int32_t state;
+ int r;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(sbbsi)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(m, "(sbbsi)", &cond, &trigger, &negate, &param, &state)) > 0) {
+ if (state < 0 && (!trigger || !i->failed_assert)) {
+ i->failed_assert = cond;
+ i->failed_assert_trigger = trigger;
+ i->failed_assert_negate = negate;
+ i->failed_assert_parameter = param;
+ }
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int map_exec(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ _cleanup_free_ ExecStatusInfo *info = NULL;
+ ExecStatusInfo *last;
+ UnitStatusInfo *i = userdata;
+ bool is_ex_prop = endswith(member, "Ex");
+ int r;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, is_ex_prop ? "(sasasttttuii)" : "(sasbttttuii)");
+ if (r < 0)
+ return r;
+
+ info = new0(ExecStatusInfo, 1);
+ if (!info)
+ return -ENOMEM;
+
+ LIST_FIND_TAIL(exec, i->exec, last);
+
+ while ((r = exec_status_info_deserialize(m, info, is_ex_prop)) > 0) {
+
+ info->name = strdup(member);
+ if (!info->name)
+ return -ENOMEM;
+
+ LIST_INSERT_AFTER(exec, i->exec, last, info);
+ last = info;
+
+ info = new0(ExecStatusInfo, 1);
+ if (!info)
+ return -ENOMEM;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int print_property(const char *name, const char *expected_value, sd_bus_message *m, bool value, bool all) {
+ char bus_type;
+ const char *contents;
+ int r;
+
+ assert(name);
+ assert(m);
+
+ /* This is a low-level property printer, see print_status_info() for the nicer output */
+
+ r = sd_bus_message_peek_type(m, &bus_type, &contents);
+ if (r < 0)
+ return r;
+
+ switch (bus_type) {
+
+ case SD_BUS_TYPE_INT32:
+ if (endswith(name, "ActionExitStatus")) {
+ int32_t i;
+
+ r = sd_bus_message_read_basic(m, bus_type, &i);
+ if (r < 0)
+ return r;
+
+ if (i >= 0 && i <= 255)
+ bus_print_property_valuef(name, expected_value, value, "%"PRIi32, i);
+ else if (all)
+ bus_print_property_value(name, expected_value, value, "[not set]");
+
+ return 1;
+ } else if (streq(name, "NUMAPolicy")) {
+ int32_t i;
+
+ r = sd_bus_message_read_basic(m, bus_type, &i);
+ if (r < 0)
+ return r;
+
+ bus_print_property_valuef(name, expected_value, value, "%s", strna(mpol_to_string(i)));
+
+ return 1;
+ }
+ break;
+
+ case SD_BUS_TYPE_STRUCT:
+
+ if (contents[0] == SD_BUS_TYPE_UINT32 && streq(name, "Job")) {
+ uint32_t u;
+
+ r = sd_bus_message_read(m, "(uo)", &u, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (u > 0)
+ bus_print_property_valuef(name, expected_value, value, "%"PRIu32, u);
+ else if (all)
+ bus_print_property_value(name, expected_value, value, "");
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRING && streq(name, "Unit")) {
+ const char *s;
+
+ r = sd_bus_message_read(m, "(so)", &s, NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (all || !isempty(s))
+ bus_print_property_value(name, expected_value, value, s);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRING && streq(name, "LoadError")) {
+ const char *a = NULL, *b = NULL;
+
+ r = sd_bus_message_read(m, "(ss)", &a, &b);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (!isempty(a) || !isempty(b))
+ bus_print_property_valuef(name, expected_value, value, "%s \"%s\"", strempty(a), strempty(b));
+ else if (all)
+ bus_print_property_value(name, expected_value, value, "");
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "SystemCallFilter", "SystemCallLog", "RestrictAddressFamilies")) {
+ _cleanup_strv_free_ char **l = NULL;
+ int allow_list;
+
+ r = sd_bus_message_enter_container(m, 'r', "bas");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read(m, "b", &allow_list);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_strv(m, &l);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (all || allow_list || !strv_isempty(l)) {
+ bool first = true;
+ char **i;
+
+ if (!value) {
+ fputs(name, stdout);
+ fputc('=', stdout);
+ }
+
+ if (!allow_list)
+ fputc('~', stdout);
+
+ STRV_FOREACH(i, l) {
+ if (first)
+ first = false;
+ else
+ fputc(' ', stdout);
+
+ fputs(*i, stdout);
+ }
+ fputc('\n', stdout);
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "SELinuxContext", "AppArmorProfile", "SmackProcessLabel")) {
+ int ignore;
+ const char *s;
+
+ r = sd_bus_message_read(m, "(bs)", &ignore, &s);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (!isempty(s))
+ bus_print_property_valuef(name, expected_value, value, "%s%s", ignore ? "-" : "", s);
+ else if (all)
+ bus_print_property_value(name, expected_value, value, "");
+
+ return 1;
+
+ } else if (endswith(name, "ExitStatus") && streq(contents, "aiai")) {
+ const int32_t *status, *signal;
+ size_t n_status, n_signal, i;
+
+ r = sd_bus_message_enter_container(m, 'r', "aiai");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_array(m, 'i', (const void **) &status, &n_status);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_array(m, 'i', (const void **) &signal, &n_signal);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ n_status /= sizeof(int32_t);
+ n_signal /= sizeof(int32_t);
+
+ if (all || n_status > 0 || n_signal > 0) {
+ bool first = true;
+
+ if (!value) {
+ fputs(name, stdout);
+ fputc('=', stdout);
+ }
+
+ for (i = 0; i < n_status; i++) {
+ if (first)
+ first = false;
+ else
+ fputc(' ', stdout);
+
+ printf("%"PRIi32, status[i]);
+ }
+
+ for (i = 0; i < n_signal; i++) {
+ const char *str;
+
+ str = signal_to_string((int) signal[i]);
+
+ if (first)
+ first = false;
+ else
+ fputc(' ', stdout);
+
+ if (str)
+ fputs(str, stdout);
+ else
+ printf("%"PRIi32, status[i]);
+ }
+
+ fputc('\n', stdout);
+ }
+ return 1;
+ }
+
+ break;
+
+ case SD_BUS_TYPE_ARRAY:
+
+ if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN && streq(name, "EnvironmentFiles")) {
+ const char *path;
+ int ignore;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(sb)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(sb)", &path, &ignore)) > 0)
+ bus_print_property_valuef(name, expected_value, value, "%s (ignore_errors=%s)", path, yes_no(ignore));
+
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN && streq(name, "Paths")) {
+ const char *type, *path;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(ss)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(ss)", &type, &path)) > 0)
+ bus_print_property_valuef(name, expected_value, value, "%s (%s)", path, type);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN && streq(name, "Listen")) {
+ const char *type, *path;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(ss)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(ss)", &type, &path)) > 0)
+ bus_print_property_valuef(name, expected_value, value, "%s (%s)", path, type);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN && streq(name, "TimersMonotonic")) {
+ const char *base;
+ uint64_t v, next_elapse;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(stt)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(stt)", &base, &v, &next_elapse)) > 0) {
+ char timespan1[FORMAT_TIMESPAN_MAX] = "n/a", timespan2[FORMAT_TIMESPAN_MAX] = "n/a";
+
+ (void) format_timespan(timespan1, sizeof timespan1, v, 0);
+ (void) format_timespan(timespan2, sizeof timespan2, next_elapse, 0);
+
+ bus_print_property_valuef(name, expected_value, value,
+ "{ %s=%s ; next_elapse=%s }", base, timespan1, timespan2);
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN && streq(name, "TimersCalendar")) {
+ const char *base, *spec;
+ uint64_t next_elapse;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(sst)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(sst)", &base, &spec, &next_elapse)) > 0) {
+ char timestamp[FORMAT_TIMESTAMP_MAX] = "n/a";
+
+ (void) format_timestamp_style(timestamp, sizeof(timestamp), next_elapse, arg_timestamp_style);
+ bus_print_property_valuef(name, expected_value, value,
+ "{ %s=%s ; next_elapse=%s }", base, spec, timestamp);
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN && startswith(name, "Exec")) {
+ ExecStatusInfo info = {};
+ bool is_ex_prop = endswith(name, "Ex");
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, is_ex_prop ? "(sasasttttuii)" : "(sasbttttuii)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = exec_status_info_deserialize(m, &info, is_ex_prop)) > 0) {
+ char timestamp1[FORMAT_TIMESTAMP_MAX], timestamp2[FORMAT_TIMESTAMP_MAX];
+ _cleanup_strv_free_ char **optv = NULL;
+ _cleanup_free_ char *tt, *o = NULL;
+
+ tt = strv_join(info.argv, " ");
+
+ if (is_ex_prop) {
+ r = exec_command_flags_to_strv(info.flags, &optv);
+ if (r < 0)
+ return log_error_errno(r, "Failed to convert ExecCommandFlags to strv: %m");
+
+ o = strv_join(optv, " ");
+
+ bus_print_property_valuef(name, expected_value, value,
+ "{ path=%s ; argv[]=%s ; flags=%s ; start_time=[%s] ; stop_time=[%s] ; pid="PID_FMT" ; code=%s ; status=%i%s%s }",
+ strna(info.path),
+ strna(tt),
+ strna(o),
+ strna(format_timestamp_style(timestamp1, sizeof(timestamp1), info.start_timestamp, arg_timestamp_style)),
+ strna(format_timestamp_style(timestamp2, sizeof(timestamp2), info.exit_timestamp, arg_timestamp_style)),
+ info.pid,
+ sigchld_code_to_string(info.code),
+ info.status,
+ info.code == CLD_EXITED ? "" : "/",
+ strempty(info.code == CLD_EXITED ? NULL : signal_to_string(info.status)));
+ } else
+ bus_print_property_valuef(name, expected_value, value,
+ "{ path=%s ; argv[]=%s ; ignore_errors=%s ; start_time=[%s] ; stop_time=[%s] ; pid="PID_FMT" ; code=%s ; status=%i%s%s }",
+ strna(info.path),
+ strna(tt),
+ yes_no(info.ignore),
+ strna(format_timestamp_style(timestamp1, sizeof(timestamp1), info.start_timestamp, arg_timestamp_style)),
+ strna(format_timestamp_style(timestamp2, sizeof(timestamp2), info.exit_timestamp, arg_timestamp_style)),
+ info.pid,
+ sigchld_code_to_string(info.code),
+ info.status,
+ info.code == CLD_EXITED ? "" : "/",
+ strempty(info.code == CLD_EXITED ? NULL : signal_to_string(info.status)));
+
+ free(info.path);
+ strv_free(info.argv);
+ zero(info);
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN && streq(name, "DeviceAllow")) {
+ const char *path, *rwm;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(ss)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(ss)", &path, &rwm)) > 0)
+ bus_print_property_valuef(name, expected_value, value, "%s %s", strna(path), strna(rwm));
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN &&
+ STR_IN_SET(name, "IODeviceWeight", "BlockIODeviceWeight")) {
+ const char *path;
+ uint64_t weight;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(st)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(st)", &path, &weight)) > 0)
+ bus_print_property_valuef(name, expected_value, value, "%s %"PRIu64, strna(path), weight);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN &&
+ (cgroup_io_limit_type_from_string(name) >= 0 ||
+ STR_IN_SET(name, "BlockIOReadBandwidth", "BlockIOWriteBandwidth"))) {
+ const char *path;
+ uint64_t bandwidth;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(st)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(st)", &path, &bandwidth)) > 0)
+ bus_print_property_valuef(name, expected_value, value, "%s %"PRIu64, strna(path), bandwidth);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN &&
+ streq(name, "IODeviceLatencyTargetUSec")) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ const char *path;
+ uint64_t target;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(st)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(st)", &path, &target)) > 0)
+ bus_print_property_valuef(name, expected_value, value, "%s %s", strna(path),
+ format_timespan(ts, sizeof(ts), target, 1));
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 1;
+
+ } else if (contents[0] == SD_BUS_TYPE_BYTE && STR_IN_SET(name, "StandardInputData", "RootHashSignature")) {
+ _cleanup_free_ char *h = NULL;
+ const void *p;
+ size_t sz;
+ ssize_t n;
+
+ r = sd_bus_message_read_array(m, 'y', &p, &sz);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ n = base64mem(p, sz, &h);
+ if (n < 0)
+ return log_oom();
+
+ bus_print_property_value(name, expected_value, value, h);
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "IPAddressAllow", "IPAddressDeny")) {
+ _cleanup_free_ char *addresses = NULL;
+
+ r = sd_bus_message_enter_container(m, 'a', "(iayu)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *str = NULL;
+ uint32_t prefixlen;
+ int32_t family;
+ const void *ap;
+ size_t an;
+
+ r = sd_bus_message_enter_container(m, 'r', "iayu");
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_read(m, "i", &family);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read_array(m, 'y', &ap, &an);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read(m, "u", &prefixlen);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ continue;
+
+ if (an != FAMILY_ADDRESS_SIZE(family))
+ continue;
+
+ if (prefixlen > FAMILY_ADDRESS_SIZE(family) * 8)
+ continue;
+
+ if (in_addr_prefix_to_string(family, (union in_addr_union *) ap, prefixlen, &str) < 0)
+ continue;
+
+ if (!strextend_with_separator(&addresses, " ", str, NULL))
+ return log_oom();
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (all || !isempty(addresses))
+ bus_print_property_value(name, expected_value, value, strempty(addresses));
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "BindPaths", "BindReadOnlyPaths")) {
+ _cleanup_free_ char *paths = NULL;
+ const char *source, *dest;
+ int ignore_enoent;
+ uint64_t rbind;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(ssbt)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(ssbt)", &source, &dest, &ignore_enoent, &rbind)) > 0) {
+ _cleanup_free_ char *str = NULL;
+
+ if (isempty(source))
+ continue;
+
+ if (asprintf(&str, "%s%s%s%s%s",
+ ignore_enoent ? "-" : "",
+ source,
+ isempty(dest) ? "" : ":",
+ strempty(dest),
+ rbind == MS_REC ? ":rbind" : "") < 0)
+ return log_oom();
+
+ if (!strextend_with_separator(&paths, " ", str, NULL))
+ return log_oom();
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (all || !isempty(paths))
+ bus_print_property_value(name, expected_value, value, strempty(paths));
+
+ return 1;
+
+ } else if (streq(name, "TemporaryFileSystem")) {
+ _cleanup_free_ char *paths = NULL;
+ const char *target, *option;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(ss)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(ss)", &target, &option)) > 0) {
+ _cleanup_free_ char *str = NULL;
+
+ if (isempty(target))
+ continue;
+
+ if (asprintf(&str, "%s%s%s", target, isempty(option) ? "" : ":", strempty(option)) < 0)
+ return log_oom();
+
+ if (!strextend_with_separator(&paths, " ", str, NULL))
+ return log_oom();
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (all || !isempty(paths))
+ bus_print_property_value(name, expected_value, value, strempty(paths));
+
+ return 1;
+
+ } else if (streq(name, "LogExtraFields")) {
+ _cleanup_free_ char *fields = NULL;
+ const void *p;
+ size_t sz;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "ay");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read_array(m, 'y', &p, &sz)) > 0) {
+ _cleanup_free_ char *str = NULL;
+ const char *eq;
+
+ if (memchr(p, 0, sz))
+ continue;
+
+ eq = memchr(p, '=', sz);
+ if (!eq)
+ continue;
+
+ if (!journal_field_valid(p, eq - (const char*) p, false))
+ continue;
+
+ str = malloc(sz + 1);
+ if (!str)
+ return log_oom();
+
+ memcpy(str, p, sz);
+ str[sz] = '\0';
+
+ if (!utf8_is_valid(str))
+ continue;
+
+ if (!strextend_with_separator(&fields, " ", str, NULL))
+ return log_oom();
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (all || !isempty(fields))
+ bus_print_property_value(name, expected_value, value, strempty(fields));
+
+ return 1;
+ } else if (contents[0] == SD_BUS_TYPE_BYTE && STR_IN_SET(name, "CPUAffinity", "NUMAMask", "AllowedCPUs", "AllowedMemoryNodes", "EffectiveCPUs", "EffectiveMemoryNodes")) {
+ _cleanup_free_ char *affinity = NULL;
+ _cleanup_(cpu_set_reset) CPUSet set = {};
+ const void *a;
+ size_t n;
+
+ r = sd_bus_message_read_array(m, 'y', &a, &n);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = cpu_set_from_dbus(a, n, &set);
+ if (r < 0)
+ return log_error_errno(r, "Failed to deserialize %s: %m", name);
+
+ affinity = cpu_set_to_range_string(&set);
+ if (!affinity)
+ return log_oom();
+
+ bus_print_property_value(name, expected_value, value, affinity);
+
+ return 1;
+ } else if (streq(name, "MountImages")) {
+ _cleanup_free_ char *paths = NULL;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(ssba(ss))");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *str = NULL;
+ const char *source, *destination, *partition, *mount_options;
+ int ignore_enoent;
+
+ r = sd_bus_message_enter_container(m, 'r', "ssba(ss)");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(m, "ssb", &source, &destination, &ignore_enoent);
+ if (r <= 0)
+ break;
+
+ str = strjoin(ignore_enoent ? "-" : "",
+ source,
+ ":",
+ destination);
+ if (!str)
+ return log_oom();
+
+ r = sd_bus_message_enter_container(m, 'a', "(ss)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(m, "(ss)", &partition, &mount_options)) > 0) {
+ _cleanup_free_ char *previous = NULL;
+
+ previous = TAKE_PTR(str);
+ str = strjoin(strempty(previous), previous ? ":" : "", partition, ":", mount_options);
+ if (!str)
+ return log_oom();
+ }
+ if (r < 0)
+ return r;
+
+ if (!strextend_with_separator(&paths, " ", str, NULL))
+ return log_oom();
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (all || !isempty(paths))
+ bus_print_property_value(name, expected_value, value, strempty(paths));
+
+ return 1;
+
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+typedef enum SystemctlShowMode{
+ SYSTEMCTL_SHOW_PROPERTIES,
+ SYSTEMCTL_SHOW_STATUS,
+ SYSTEMCTL_SHOW_HELP,
+ _SYSTEMCTL_SHOW_MODE_MAX,
+ _SYSTEMCTL_SHOW_MODE_INVALID = -1,
+} SystemctlShowMode;
+
+static const char* const systemctl_show_mode_table[_SYSTEMCTL_SHOW_MODE_MAX] = {
+ [SYSTEMCTL_SHOW_PROPERTIES] = "show",
+ [SYSTEMCTL_SHOW_STATUS] = "status",
+ [SYSTEMCTL_SHOW_HELP] = "help",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(systemctl_show_mode, SystemctlShowMode);
+
+static int show_one(
+ sd_bus *bus,
+ const char *path,
+ const char *unit,
+ SystemctlShowMode show_mode,
+ bool *new_line,
+ bool *ellipsized) {
+
+ static const struct bus_properties_map property_map[] = {
+ { "LoadState", "s", NULL, offsetof(UnitStatusInfo, load_state) },
+ { "ActiveState", "s", NULL, offsetof(UnitStatusInfo, active_state) },
+ { "FreezerState", "s", NULL, offsetof(UnitStatusInfo, freezer_state) },
+ { "Documentation", "as", NULL, offsetof(UnitStatusInfo, documentation) },
+ {}
+ }, status_map[] = {
+ { "Id", "s", NULL, offsetof(UnitStatusInfo, id) },
+ { "LoadState", "s", NULL, offsetof(UnitStatusInfo, load_state) },
+ { "ActiveState", "s", NULL, offsetof(UnitStatusInfo, active_state) },
+ { "FreezerState", "s", NULL, offsetof(UnitStatusInfo, freezer_state) },
+ { "SubState", "s", NULL, offsetof(UnitStatusInfo, sub_state) },
+ { "UnitFileState", "s", NULL, offsetof(UnitStatusInfo, unit_file_state) },
+ { "UnitFilePreset", "s", NULL, offsetof(UnitStatusInfo, unit_file_preset) },
+ { "Description", "s", NULL, offsetof(UnitStatusInfo, description) },
+ { "Following", "s", NULL, offsetof(UnitStatusInfo, following) },
+ { "Documentation", "as", NULL, offsetof(UnitStatusInfo, documentation) },
+ { "FragmentPath", "s", NULL, offsetof(UnitStatusInfo, fragment_path) },
+ { "SourcePath", "s", NULL, offsetof(UnitStatusInfo, source_path) },
+ { "ControlGroup", "s", NULL, offsetof(UnitStatusInfo, control_group) },
+ { "DropInPaths", "as", NULL, offsetof(UnitStatusInfo, dropin_paths) },
+ { "LoadError", "(ss)", map_load_error, offsetof(UnitStatusInfo, load_error) },
+ { "Result", "s", NULL, offsetof(UnitStatusInfo, result) },
+ { "TriggeredBy", "as", NULL, offsetof(UnitStatusInfo, triggered_by) },
+ { "Triggers", "as", NULL, offsetof(UnitStatusInfo, triggers) },
+ { "InactiveExitTimestamp", "t", NULL, offsetof(UnitStatusInfo, inactive_exit_timestamp) },
+ { "InactiveExitTimestampMonotonic", "t", NULL, offsetof(UnitStatusInfo, inactive_exit_timestamp_monotonic) },
+ { "ActiveEnterTimestamp", "t", NULL, offsetof(UnitStatusInfo, active_enter_timestamp) },
+ { "ActiveExitTimestamp", "t", NULL, offsetof(UnitStatusInfo, active_exit_timestamp) },
+ { "InactiveEnterTimestamp", "t", NULL, offsetof(UnitStatusInfo, inactive_enter_timestamp) },
+ { "NeedDaemonReload", "b", NULL, offsetof(UnitStatusInfo, need_daemon_reload) },
+ { "Transient", "b", NULL, offsetof(UnitStatusInfo, transient) },
+ { "ExecMainPID", "u", NULL, offsetof(UnitStatusInfo, main_pid) },
+ { "MainPID", "u", map_main_pid, 0 },
+ { "ControlPID", "u", NULL, offsetof(UnitStatusInfo, control_pid) },
+ { "StatusText", "s", NULL, offsetof(UnitStatusInfo, status_text) },
+ { "PIDFile", "s", NULL, offsetof(UnitStatusInfo, pid_file) },
+ { "StatusErrno", "i", NULL, offsetof(UnitStatusInfo, status_errno) },
+ { "ExecMainStartTimestamp", "t", NULL, offsetof(UnitStatusInfo, start_timestamp) },
+ { "ExecMainExitTimestamp", "t", NULL, offsetof(UnitStatusInfo, exit_timestamp) },
+ { "ExecMainCode", "i", NULL, offsetof(UnitStatusInfo, exit_code) },
+ { "ExecMainStatus", "i", NULL, offsetof(UnitStatusInfo, exit_status) },
+ { "LogNamespace", "s", NULL, offsetof(UnitStatusInfo, log_namespace) },
+ { "ConditionTimestamp", "t", NULL, offsetof(UnitStatusInfo, condition_timestamp) },
+ { "ConditionResult", "b", NULL, offsetof(UnitStatusInfo, condition_result) },
+ { "Conditions", "a(sbbsi)", map_conditions, 0 },
+ { "AssertTimestamp", "t", NULL, offsetof(UnitStatusInfo, assert_timestamp) },
+ { "AssertResult", "b", NULL, offsetof(UnitStatusInfo, assert_result) },
+ { "Asserts", "a(sbbsi)", map_asserts, 0 },
+ { "NextElapseUSecRealtime", "t", NULL, offsetof(UnitStatusInfo, next_elapse_real) },
+ { "NextElapseUSecMonotonic", "t", NULL, offsetof(UnitStatusInfo, next_elapse_monotonic) },
+ { "NAccepted", "u", NULL, offsetof(UnitStatusInfo, n_accepted) },
+ { "NConnections", "u", NULL, offsetof(UnitStatusInfo, n_connections) },
+ { "NRefused", "u", NULL, offsetof(UnitStatusInfo, n_refused) },
+ { "Accept", "b", NULL, offsetof(UnitStatusInfo, accept) },
+ { "Listen", "a(ss)", map_listen, offsetof(UnitStatusInfo, listen) },
+ { "SysFSPath", "s", NULL, offsetof(UnitStatusInfo, sysfs_path) },
+ { "Where", "s", NULL, offsetof(UnitStatusInfo, where) },
+ { "What", "s", NULL, offsetof(UnitStatusInfo, what) },
+ { "MemoryCurrent", "t", NULL, offsetof(UnitStatusInfo, memory_current) },
+ { "DefaultMemoryMin", "t", NULL, offsetof(UnitStatusInfo, default_memory_min) },
+ { "DefaultMemoryLow", "t", NULL, offsetof(UnitStatusInfo, default_memory_low) },
+ { "MemoryMin", "t", NULL, offsetof(UnitStatusInfo, memory_min) },
+ { "MemoryLow", "t", NULL, offsetof(UnitStatusInfo, memory_low) },
+ { "MemoryHigh", "t", NULL, offsetof(UnitStatusInfo, memory_high) },
+ { "MemoryMax", "t", NULL, offsetof(UnitStatusInfo, memory_max) },
+ { "MemorySwapMax", "t", NULL, offsetof(UnitStatusInfo, memory_swap_max) },
+ { "MemoryLimit", "t", NULL, offsetof(UnitStatusInfo, memory_limit) },
+ { "CPUUsageNSec", "t", NULL, offsetof(UnitStatusInfo, cpu_usage_nsec) },
+ { "TasksCurrent", "t", NULL, offsetof(UnitStatusInfo, tasks_current) },
+ { "TasksMax", "t", NULL, offsetof(UnitStatusInfo, tasks_max) },
+ { "IPIngressBytes", "t", NULL, offsetof(UnitStatusInfo, ip_ingress_bytes) },
+ { "IPEgressBytes", "t", NULL, offsetof(UnitStatusInfo, ip_egress_bytes) },
+ { "IOReadBytes", "t", NULL, offsetof(UnitStatusInfo, io_read_bytes) },
+ { "IOWriteBytes", "t", NULL, offsetof(UnitStatusInfo, io_write_bytes) },
+ { "ExecCondition", "a(sasbttttuii)", map_exec, 0 },
+ { "ExecConditionEx", "a(sasasttttuii)", map_exec, 0 },
+ { "ExecStartPre", "a(sasbttttuii)", map_exec, 0 },
+ { "ExecStartPreEx", "a(sasasttttuii)", map_exec, 0 },
+ { "ExecStart", "a(sasbttttuii)", map_exec, 0 },
+ { "ExecStartEx", "a(sasasttttuii)", map_exec, 0 },
+ { "ExecStartPost", "a(sasbttttuii)", map_exec, 0 },
+ { "ExecStartPostEx", "a(sasasttttuii)", map_exec, 0 },
+ { "ExecReload", "a(sasbttttuii)", map_exec, 0 },
+ { "ExecReloadEx", "a(sasasttttuii)", map_exec, 0 },
+ { "ExecStopPre", "a(sasbttttuii)", map_exec, 0 },
+ { "ExecStop", "a(sasbttttuii)", map_exec, 0 },
+ { "ExecStopEx", "a(sasasttttuii)", map_exec, 0 },
+ { "ExecStopPost", "a(sasbttttuii)", map_exec, 0 },
+ { "ExecStopPostEx", "a(sasasttttuii)", map_exec, 0 },
+ {}
+ };
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_set_free_ Set *found_properties = NULL;
+ _cleanup_(unit_status_info_free) UnitStatusInfo info = {
+ .memory_current = (uint64_t) -1,
+ .memory_high = CGROUP_LIMIT_MAX,
+ .memory_max = CGROUP_LIMIT_MAX,
+ .memory_swap_max = CGROUP_LIMIT_MAX,
+ .memory_limit = (uint64_t) -1,
+ .cpu_usage_nsec = (uint64_t) -1,
+ .tasks_current = (uint64_t) -1,
+ .tasks_max = (uint64_t) -1,
+ .ip_ingress_bytes = (uint64_t) -1,
+ .ip_egress_bytes = (uint64_t) -1,
+ .io_read_bytes = UINT64_MAX,
+ .io_write_bytes = UINT64_MAX,
+ };
+ char **pp;
+ int r;
+
+ assert(path);
+ assert(new_line);
+
+ log_debug("Showing one %s", path);
+
+ r = bus_map_all_properties(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ show_mode == SYSTEMCTL_SHOW_STATUS ? status_map : property_map,
+ BUS_MAP_BOOLEAN_AS_BOOL,
+ &error,
+ &reply,
+ &info);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get properties: %s", bus_error_message(&error, r));
+
+ if (unit && streq_ptr(info.load_state, "not-found") && streq_ptr(info.active_state, "inactive")) {
+ log_full(show_mode == SYSTEMCTL_SHOW_STATUS ? LOG_ERR : LOG_DEBUG,
+ "Unit %s could not be found.", unit);
+
+ if (show_mode == SYSTEMCTL_SHOW_STATUS)
+ return EXIT_PROGRAM_OR_SERVICES_STATUS_UNKNOWN;
+ else if (show_mode == SYSTEMCTL_SHOW_HELP)
+ return -ENOENT;
+ }
+
+ if (*new_line)
+ printf("\n");
+
+ *new_line = true;
+
+ if (show_mode == SYSTEMCTL_SHOW_STATUS) {
+ print_status_info(bus, &info, ellipsized);
+
+ if (info.active_state && !STR_IN_SET(info.active_state, "active", "reloading"))
+ return EXIT_PROGRAM_NOT_RUNNING;
+
+ return EXIT_PROGRAM_RUNNING_OR_SERVICE_OK;
+
+ } else if (show_mode == SYSTEMCTL_SHOW_HELP) {
+ show_unit_help(&info);
+ return 0;
+ }
+
+ r = sd_bus_message_rewind(reply, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to rewind: %s", bus_error_message(&error, r));
+
+ r = bus_message_print_all_properties(reply, print_property, arg_properties, arg_value, arg_all, &found_properties);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ STRV_FOREACH(pp, arg_properties)
+ if (!set_contains(found_properties, *pp))
+ log_debug("Property %s does not exist.", *pp);
+
+ return 0;
+}
+
+static int get_unit_dbus_path_by_pid(
+ sd_bus *bus,
+ uint32_t pid,
+ char **unit) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ char *u;
+ int r;
+
+ r = bus_call_method(bus, bus_systemd_mgr, "GetUnitByPID", &error, &reply, "u", pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get unit for PID %"PRIu32": %s", pid, bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "o", &u);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ u = strdup(u);
+ if (!u)
+ return log_oom();
+
+ *unit = u;
+ return 0;
+}
+
+static int show_all(
+ sd_bus *bus,
+ bool *new_line,
+ bool *ellipsized) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ UnitInfo *unit_infos = NULL;
+ const UnitInfo *u;
+ unsigned c;
+ int r, ret = 0;
+
+ r = get_unit_list(bus, NULL, NULL, &unit_infos, 0, &reply);
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ c = (unsigned) r;
+
+ typesafe_qsort(unit_infos, c, unit_info_compare);
+
+ for (u = unit_infos; u < unit_infos + c; u++) {
+ _cleanup_free_ char *p = NULL;
+
+ p = unit_dbus_path_from_name(u->id);
+ if (!p)
+ return log_oom();
+
+ r = show_one(bus, p, u->id, SYSTEMCTL_SHOW_STATUS, new_line, ellipsized);
+ if (r < 0)
+ return r;
+ else if (r > 0 && ret == 0)
+ ret = r;
+ }
+
+ return ret;
+}
+
+static int show_system_status(sd_bus *bus) {
+ char since1[FORMAT_TIMESTAMP_RELATIVE_MAX], since2[FORMAT_TIMESTAMP_MAX];
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(machine_info_clear) struct machine_info mi = {};
+ _cleanup_free_ char *hn = NULL;
+ const char *on, *off;
+ int r;
+
+ hn = gethostname_malloc();
+ if (!hn)
+ return log_oom();
+
+ r = bus_map_all_properties(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ machine_info_property_map,
+ BUS_MAP_STRDUP,
+ &error,
+ NULL,
+ &mi);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read server status: %s", bus_error_message(&error, r));
+
+ if (streq_ptr(mi.state, "degraded")) {
+ on = ansi_highlight_red();
+ off = ansi_normal();
+ } else if (streq_ptr(mi.state, "running")) {
+ on = ansi_highlight_green();
+ off = ansi_normal();
+ } else {
+ on = ansi_highlight_yellow();
+ off = ansi_normal();
+ }
+
+ printf("%s%s%s %s\n", on, special_glyph(SPECIAL_GLYPH_BLACK_CIRCLE), off, arg_host ? arg_host : hn);
+
+ printf(" State: %s%s%s\n",
+ on, strna(mi.state), off);
+
+ printf(" Jobs: %" PRIu32 " queued\n", mi.n_jobs);
+ printf(" Failed: %" PRIu32 " units\n", mi.n_failed_units);
+
+ printf(" Since: %s; %s\n",
+ format_timestamp_style(since2, sizeof(since2), mi.timestamp, arg_timestamp_style),
+ format_timestamp_relative(since1, sizeof(since1), mi.timestamp));
+
+ printf(" CGroup: %s\n", mi.control_group ?: "/");
+ if (IN_SET(arg_transport,
+ BUS_TRANSPORT_LOCAL,
+ BUS_TRANSPORT_MACHINE)) {
+ static const char prefix[] = " ";
+ unsigned c;
+
+ c = columns();
+ if (c > sizeof(prefix) - 1)
+ c -= sizeof(prefix) - 1;
+ else
+ c = 0;
+
+ show_cgroup(SYSTEMD_CGROUP_CONTROLLER, strempty(mi.control_group), prefix, c, get_output_flags());
+ }
+
+ return 0;
+}
+
+int show(int argc, char *argv[], void *userdata) {
+ bool new_line = false, ellipsized = false;
+ SystemctlShowMode show_mode;
+ int r, ret = 0;
+ sd_bus *bus;
+
+ assert(argv);
+
+ show_mode = systemctl_show_mode_from_string(argv[0]);
+ if (show_mode < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid argument.");
+
+ if (show_mode == SYSTEMCTL_SHOW_HELP && argc <= 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "'help' command expects one or more unit names.\n"
+ "(Alternatively, help for systemctl itself may be shown with --help)");
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ /* If no argument is specified inspect the manager itself */
+ if (show_mode == SYSTEMCTL_SHOW_PROPERTIES && argc <= 1)
+ return show_one(bus, "/org/freedesktop/systemd1", NULL, show_mode, &new_line, &ellipsized);
+
+ if (show_mode == SYSTEMCTL_SHOW_STATUS && argc <= 1) {
+
+ show_system_status(bus);
+ new_line = true;
+
+ if (arg_all)
+ ret = show_all(bus, &new_line, &ellipsized);
+ } else {
+ _cleanup_free_ char **patterns = NULL;
+ char **name;
+
+ STRV_FOREACH(name, strv_skip(argv, 1)) {
+ _cleanup_free_ char *path = NULL, *unit = NULL;
+ uint32_t id;
+
+ if (safe_atou32(*name, &id) < 0) {
+ if (strv_push(&patterns, *name) < 0)
+ return log_oom();
+
+ continue;
+ } else if (show_mode == SYSTEMCTL_SHOW_PROPERTIES) {
+ /* Interpret as job id */
+ if (asprintf(&path, "/org/freedesktop/systemd1/job/%u", id) < 0)
+ return log_oom();
+
+ } else {
+ /* Interpret as PID */
+ r = get_unit_dbus_path_by_pid(bus, id, &path);
+ if (r < 0) {
+ ret = r;
+ continue;
+ }
+
+ r = unit_name_from_dbus_path(path, &unit);
+ if (r < 0)
+ return log_oom();
+ }
+
+ r = show_one(bus, path, unit, show_mode, &new_line, &ellipsized);
+ if (r < 0)
+ return r;
+ else if (r > 0 && ret == 0)
+ ret = r;
+ }
+
+ if (!strv_isempty(patterns)) {
+ _cleanup_strv_free_ char **names = NULL;
+
+ r = expand_unit_names(bus, patterns, NULL, &names, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to expand names: %m");
+
+ r = maybe_extend_with_unit_dependencies(bus, &names);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(name, names) {
+ _cleanup_free_ char *path;
+
+ path = unit_dbus_path_from_name(*name);
+ if (!path)
+ return log_oom();
+
+ r = show_one(bus, path, *name, show_mode, &new_line, &ellipsized);
+ if (r < 0)
+ return r;
+ if (r > 0 && ret == 0)
+ ret = r;
+ }
+ }
+ }
+
+ if (ellipsized && !arg_quiet)
+ printf("Hint: Some lines were ellipsized, use -l to show in full.\n");
+
+ return ret;
+}
diff --git a/src/systemctl/systemctl-show.h b/src/systemctl/systemctl-show.h
new file mode 100644
index 0000000..d778beb
--- /dev/null
+++ b/src/systemctl/systemctl-show.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int show(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-start-special.c b/src/systemctl/systemctl-start-special.c
new file mode 100644
index 0000000..15d2ea7
--- /dev/null
+++ b/src/systemctl/systemctl-start-special.c
@@ -0,0 +1,248 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bootspec.h"
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "efivars.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "reboot-util.h"
+#include "systemctl-logind.h"
+#include "systemctl-start-special.h"
+#include "systemctl-start-unit.h"
+#include "systemctl-trivial-method.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+
+static int load_kexec_kernel(void) {
+ _cleanup_(boot_config_free) BootConfig config = {};
+ _cleanup_free_ char *kernel = NULL, *initrd = NULL, *options = NULL;
+ const BootEntry *e;
+ pid_t pid;
+ int r;
+
+ if (kexec_loaded()) {
+ log_debug("Kexec kernel already loaded.");
+ return 0;
+ }
+
+ if (access(KEXEC, X_OK) < 0)
+ return log_error_errno(errno, KEXEC" is not available: %m");
+
+ r = boot_entries_load_config_auto(NULL, NULL, &config);
+ if (r == -ENOKEY)
+ /* The call doesn't log about ENOKEY, let's do so here. */
+ return log_error_errno(r,
+ "No kexec kernel loaded and autodetection failed.\n%s",
+ is_efi_boot()
+ ? "Cannot automatically load kernel: ESP partition mount point not found."
+ : "Automatic loading works only on systems booted with EFI.");
+ if (r < 0)
+ return r;
+
+ e = boot_config_default_entry(&config);
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT),
+ "No boot loader entry suitable as default, refusing to guess.");
+
+ log_debug("Found default boot loader entry in file \"%s\"", e->path);
+
+ if (!e->kernel)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Boot entry does not refer to Linux kernel, which is not supported currently.");
+ if (strv_length(e->initrd) > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Boot entry specifies multiple initrds, which is not supported currently.");
+
+ kernel = path_join(e->root, e->kernel);
+ if (!kernel)
+ return log_oom();
+
+ if (!strv_isempty(e->initrd)) {
+ initrd = path_join(e->root, e->initrd[0]);
+ if (!initrd)
+ return log_oom();
+ }
+
+ options = strv_join(e->options, " ");
+ if (!options)
+ return log_oom();
+
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO,
+ "%s "KEXEC" --load \"%s\" --append \"%s\"%s%s%s",
+ arg_dry_run ? "Would run" : "Running",
+ kernel,
+ options,
+ initrd ? " --initrd \"" : NULL, strempty(initrd), initrd ? "\"" : "");
+ if (arg_dry_run)
+ return 0;
+
+ r = safe_fork("(kexec)", FORK_WAIT|FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ const char* const args[] = {
+ KEXEC,
+ "--load", kernel,
+ "--append", options,
+ initrd ? "--initrd" : NULL, initrd,
+ NULL
+ };
+
+ /* Child */
+ execv(args[0], (char * const *) args);
+ _exit(EXIT_FAILURE);
+ }
+
+ return 0;
+}
+
+static int set_exit_code(uint8_t code) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus;
+ int r;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = bus_call_method(bus, bus_systemd_mgr, "SetExitCode", &error, NULL, "y", code);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set exit code: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+int start_special(int argc, char *argv[], void *userdata) {
+ bool termination_action; /* An action that terminates the manager, can be performed also by
+ * signal. */
+ enum action a;
+ int r;
+
+ assert(argv);
+
+ a = verb_to_action(argv[0]);
+
+ r = logind_check_inhibitors(a);
+ if (r < 0)
+ return r;
+
+ if (arg_force >= 2) {
+ r = must_be_root();
+ if (r < 0)
+ return r;
+ }
+
+ r = prepare_firmware_setup();
+ if (r < 0)
+ return r;
+
+ r = prepare_boot_loader_menu();
+ if (r < 0)
+ return r;
+
+ r = prepare_boot_loader_entry();
+ if (r < 0)
+ return r;
+
+ if (a == ACTION_REBOOT) {
+ const char *arg = NULL;
+
+ if (argc > 1) {
+ if (arg_reboot_argument)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Both --reboot-argument= and positional argument passed to reboot command, refusing.");
+
+ log_notice("Positional argument to reboot command is deprecated, please use --reboot-argument= instead. Accepting anyway.");
+ arg = argv[1];
+ } else
+ arg = arg_reboot_argument;
+
+ if (arg) {
+ r = update_reboot_parameter_and_warn(arg, false);
+ if (r < 0)
+ return r;
+ }
+
+ } else if (a == ACTION_KEXEC) {
+ r = load_kexec_kernel();
+ if (r < 0 && arg_force >= 1)
+ log_notice("Failed to load kexec kernel, continuing without.");
+ else if (r < 0)
+ return r;
+
+ } else if (a == ACTION_EXIT && argc > 1) {
+ uint8_t code;
+
+ /* If the exit code is not given on the command line, don't reset it to zero: just keep it as
+ * it might have been set previously. */
+
+ r = safe_atou8(argv[1], &code);
+ if (r < 0)
+ return log_error_errno(r, "Invalid exit code.");
+
+ r = set_exit_code(code);
+ if (r < 0)
+ return r;
+ }
+
+ termination_action = IN_SET(a,
+ ACTION_HALT,
+ ACTION_POWEROFF,
+ ACTION_REBOOT);
+ if (termination_action && arg_force >= 2)
+ return halt_now(a);
+
+ if (arg_force >= 1 &&
+ (termination_action || IN_SET(a, ACTION_KEXEC, ACTION_EXIT)))
+ r = trivial_method(argc, argv, userdata);
+ else {
+ /* First try logind, to allow authentication with polkit */
+ if (IN_SET(a,
+ ACTION_POWEROFF,
+ ACTION_REBOOT,
+ ACTION_HALT,
+ ACTION_SUSPEND,
+ ACTION_HIBERNATE,
+ ACTION_HYBRID_SLEEP,
+ ACTION_SUSPEND_THEN_HIBERNATE)) {
+
+ r = logind_reboot(a);
+ if (r >= 0)
+ return r;
+ if (IN_SET(r, -EOPNOTSUPP, -EINPROGRESS))
+ /* Requested operation is not supported or already in progress */
+ return r;
+
+ /* On all other errors, try low-level operation. In order to minimize the difference
+ * between operation with and without logind, we explicitly enable non-blocking mode
+ * for this, as logind's shutdown operations are always non-blocking. */
+
+ arg_no_block = true;
+
+ } else if (IN_SET(a, ACTION_EXIT, ACTION_KEXEC))
+ /* Since exit/kexec are so close in behaviour to power-off/reboot, let's also make
+ * them asynchronous, in order to not confuse the user needlessly with unexpected
+ * behaviour. */
+ arg_no_block = true;
+
+ r = start_unit(argc, argv, userdata);
+ }
+
+ if (termination_action && arg_force < 2 &&
+ IN_SET(r, -ENOENT, -ETIMEDOUT))
+ log_notice("It is possible to perform action directly, see discussion of --force --force in man:systemctl(1).");
+
+ return r;
+}
+
+int start_system_special(int argc, char *argv[], void *userdata) {
+ /* Like start_special above, but raises an error when running in user mode */
+
+ if (arg_scope != UNIT_FILE_SYSTEM)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Bad action for %s mode.",
+ arg_scope == UNIT_FILE_GLOBAL ? "--global" : "--user");
+
+ return start_special(argc, argv, userdata);
+}
diff --git a/src/systemctl/systemctl-start-special.h b/src/systemctl/systemctl-start-special.h
new file mode 100644
index 0000000..06875e9
--- /dev/null
+++ b/src/systemctl/systemctl-start-special.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int start_special(int argc, char *argv[], void *userdata);
+int start_system_special(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-start-unit.c b/src/systemctl/systemctl-start-unit.c
new file mode 100644
index 0000000..b398e77
--- /dev/null
+++ b/src/systemctl/systemctl-start-unit.c
@@ -0,0 +1,368 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-bus.h"
+
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "bus-util.h"
+#include "bus-wait-for-jobs.h"
+#include "bus-wait-for-units.h"
+#include "macro.h"
+#include "special.h"
+#include "string-util.h"
+#include "systemctl-start-unit.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+#include "terminal-util.h"
+
+static const struct {
+ const char *verb; /* systemctl verb */
+ const char *method; /* Name of the specific D-Bus method */
+ const char *job_type; /* Job type when passing to the generic EnqueueUnitJob() method */
+} unit_actions[] = {
+ { "start", "StartUnit", "start" },
+ { "stop", "StopUnit", "stop" },
+ { "condstop", "StopUnit", "stop" }, /* legacy alias */
+ { "reload", "ReloadUnit", "reload" },
+ { "restart", "RestartUnit", "restart" },
+ { "try-restart", "TryRestartUnit", "try-restart" },
+ { "condrestart", "TryRestartUnit", "try-restart" }, /* legacy alias */
+ { "reload-or-restart", "ReloadOrRestartUnit", "reload-or-restart" },
+ { "try-reload-or-restart", "ReloadOrTryRestartUnit", "reload-or-try-restart" },
+ { "reload-or-try-restart", "ReloadOrTryRestartUnit", "reload-or-try-restart" }, /* legacy alias */
+ { "condreload", "ReloadOrTryRestartUnit", "reload-or-try-restart" }, /* legacy alias */
+ { "force-reload", "ReloadOrTryRestartUnit", "reload-or-try-restart" }, /* legacy alias */
+};
+
+static const char *verb_to_method(const char *verb) {
+ size_t i;
+
+ for (i = 0; i < ELEMENTSOF(unit_actions); i++)
+ if (streq_ptr(unit_actions[i].verb, verb))
+ return unit_actions[i].method;
+
+ return "StartUnit";
+}
+
+static const char *verb_to_job_type(const char *verb) {
+ size_t i;
+
+ for (i = 0; i < ELEMENTSOF(unit_actions); i++)
+ if (streq_ptr(unit_actions[i].verb, verb))
+ return unit_actions[i].job_type;
+
+ return "start";
+}
+
+static int start_unit_one(
+ sd_bus *bus,
+ const char *method, /* When using classic per-job bus methods */
+ const char *job_type, /* When using new-style EnqueueUnitJob() */
+ const char *name,
+ const char *mode,
+ sd_bus_error *error,
+ BusWaitForJobs *w,
+ BusWaitForUnits *wu) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *path;
+ bool done = false;
+ int r;
+
+ assert(method);
+ assert(name);
+ assert(mode);
+ assert(error);
+
+ log_debug("%s dbus call org.freedesktop.systemd1.Manager %s(%s, %s)",
+ arg_dry_run ? "Would execute" : "Executing",
+ method, name, mode);
+
+ if (arg_dry_run)
+ return 0;
+
+ if (arg_show_transaction) {
+ _cleanup_(sd_bus_error_free) sd_bus_error enqueue_error = SD_BUS_ERROR_NULL;
+
+ /* Use the new, fancy EnqueueUnitJob() API if the user wants us to print the transaction */
+ r = bus_call_method(
+ bus,
+ bus_systemd_mgr,
+ "EnqueueUnitJob",
+ &enqueue_error,
+ &reply,
+ "sss",
+ name, job_type, mode);
+ if (r < 0) {
+ if (!sd_bus_error_has_name(&enqueue_error, SD_BUS_ERROR_UNKNOWN_METHOD)) {
+ (void) sd_bus_error_move(error, &enqueue_error);
+ goto fail;
+ }
+
+ /* Hmm, the API is not yet available. Let's use the classic API instead (see below). */
+ log_notice("--show-transaction not supported by this service manager, proceeding without.");
+ } else {
+ const char *u, *jt;
+ uint32_t id;
+
+ r = sd_bus_message_read(reply, "uosos", &id, &path, &u, NULL, &jt);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ log_info("Enqueued anchor job %" PRIu32 " %s/%s.", id, u, jt);
+
+ r = sd_bus_message_enter_container(reply, 'a', "(uosos)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+ for (;;) {
+ r = sd_bus_message_read(reply, "(uosos)", &id, NULL, &u, NULL, &jt);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ log_info("Enqueued auxiliary job %" PRIu32 " %s/%s.", id, u, jt);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ done = true;
+ }
+ }
+
+ if (!done) {
+ r = bus_call_method(bus, bus_systemd_mgr, method, error, &reply, "ss", name, mode);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_read(reply, "o", &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+
+ if (need_daemon_reload(bus, name) > 0)
+ warn_unit_file_changed(name);
+
+ if (w) {
+ log_debug("Adding %s to the set", path);
+ r = bus_wait_for_jobs_add(w, path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch job for %s: %m", name);
+ }
+
+ if (wu) {
+ r = bus_wait_for_units_add_unit(wu, name, BUS_WAIT_FOR_INACTIVE|BUS_WAIT_NO_JOB, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch unit %s: %m", name);
+ }
+
+ return 0;
+
+fail:
+ /* There's always a fallback possible for legacy actions. */
+ if (arg_action != ACTION_SYSTEMCTL)
+ return r;
+
+ log_error_errno(r, "Failed to %s %s: %s", job_type, name, bus_error_message(error, r));
+
+ if (!sd_bus_error_has_names(error, BUS_ERROR_NO_SUCH_UNIT,
+ BUS_ERROR_UNIT_MASKED,
+ BUS_ERROR_JOB_TYPE_NOT_APPLICABLE))
+ log_error("See %s logs and 'systemctl%s status%s %s' for details.",
+ arg_scope == UNIT_FILE_SYSTEM ? "system" : "user",
+ arg_scope == UNIT_FILE_SYSTEM ? "" : " --user",
+ name[0] == '-' ? " --" : "",
+ name);
+
+ return r;
+}
+
+const struct action_metadata action_table[_ACTION_MAX] = {
+ [ACTION_HALT] = { SPECIAL_HALT_TARGET, "halt", "replace-irreversibly" },
+ [ACTION_POWEROFF] = { SPECIAL_POWEROFF_TARGET, "poweroff", "replace-irreversibly" },
+ [ACTION_REBOOT] = { SPECIAL_REBOOT_TARGET, "reboot", "replace-irreversibly" },
+ [ACTION_KEXEC] = { SPECIAL_KEXEC_TARGET, "kexec", "replace-irreversibly" },
+ [ACTION_RUNLEVEL2] = { SPECIAL_MULTI_USER_TARGET, NULL, "isolate" },
+ [ACTION_RUNLEVEL3] = { SPECIAL_MULTI_USER_TARGET, NULL, "isolate" },
+ [ACTION_RUNLEVEL4] = { SPECIAL_MULTI_USER_TARGET, NULL, "isolate" },
+ [ACTION_RUNLEVEL5] = { SPECIAL_GRAPHICAL_TARGET, NULL, "isolate" },
+ [ACTION_RESCUE] = { SPECIAL_RESCUE_TARGET, "rescue", "isolate" },
+ [ACTION_EMERGENCY] = { SPECIAL_EMERGENCY_TARGET, "emergency", "isolate" },
+ [ACTION_DEFAULT] = { SPECIAL_DEFAULT_TARGET, "default", "isolate" },
+ [ACTION_EXIT] = { SPECIAL_EXIT_TARGET, "exit", "replace-irreversibly" },
+ [ACTION_SUSPEND] = { SPECIAL_SUSPEND_TARGET, "suspend", "replace-irreversibly" },
+ [ACTION_HIBERNATE] = { SPECIAL_HIBERNATE_TARGET, "hibernate", "replace-irreversibly" },
+ [ACTION_HYBRID_SLEEP] = { SPECIAL_HYBRID_SLEEP_TARGET, "hybrid-sleep", "replace-irreversibly" },
+ [ACTION_SUSPEND_THEN_HIBERNATE] = { SPECIAL_SUSPEND_THEN_HIBERNATE_TARGET, "suspend-then-hibernate", "replace-irreversibly" },
+};
+
+enum action verb_to_action(const char *verb) {
+ enum action i;
+
+ for (i = 0; i < _ACTION_MAX; i++)
+ if (streq_ptr(action_table[i].verb, verb))
+ return i;
+
+ return _ACTION_INVALID;
+}
+
+static const char** make_extra_args(const char *extra_args[static 4]) {
+ size_t n = 0;
+
+ assert(extra_args);
+
+ if (arg_scope != UNIT_FILE_SYSTEM)
+ extra_args[n++] = "--user";
+
+ if (arg_transport == BUS_TRANSPORT_REMOTE) {
+ extra_args[n++] = "-H";
+ extra_args[n++] = arg_host;
+ } else if (arg_transport == BUS_TRANSPORT_MACHINE) {
+ extra_args[n++] = "-M";
+ extra_args[n++] = arg_host;
+ } else
+ assert(arg_transport == BUS_TRANSPORT_LOCAL);
+
+ extra_args[n] = NULL;
+ return extra_args;
+}
+
+int start_unit(int argc, char *argv[], void *userdata) {
+ _cleanup_(bus_wait_for_units_freep) BusWaitForUnits *wu = NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ const char *method, *job_type, *mode, *one_name, *suffix = NULL;
+ _cleanup_free_ char **stopped_units = NULL; /* Do not use _cleanup_strv_free_ */
+ _cleanup_strv_free_ char **names = NULL;
+ int r, ret = EXIT_SUCCESS;
+ sd_bus *bus;
+ char **name;
+
+ if (arg_wait && !STR_IN_SET(argv[0], "start", "restart"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--wait may only be used with the 'start' or 'restart' commands.");
+
+ /* We cannot do sender tracking on the private bus, so we need the full one for RefUnit to implement
+ * --wait */
+ r = acquire_bus(arg_wait ? BUS_FULL : BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ ask_password_agent_open_maybe();
+ polkit_agent_open_maybe();
+
+ if (arg_action == ACTION_SYSTEMCTL) {
+ enum action action;
+
+ action = verb_to_action(argv[0]);
+
+ if (action != _ACTION_INVALID) {
+ /* A command in style "systemctl reboot", "systemctl poweroff", … */
+ method = "StartUnit";
+ job_type = "start";
+ mode = action_table[action].mode;
+ one_name = action_table[action].target;
+ } else {
+ if (streq(argv[0], "isolate")) {
+ /* A "systemctl isolate <unit1> <unit2> …" command */
+ method = "StartUnit";
+ job_type = "start";
+ mode = "isolate";
+ suffix = ".target";
+ } else {
+ /* A command in style of "systemctl start <unit1> <unit2> …", "sysemctl stop <unit1> <unit2> …" and so on */
+ method = verb_to_method(argv[0]);
+ job_type = verb_to_job_type(argv[0]);
+ mode = arg_job_mode;
+ }
+ one_name = NULL;
+ }
+ } else {
+ /* A SysV legacy command such as "halt", "reboot", "poweroff", … */
+ assert(arg_action >= 0 && arg_action < _ACTION_MAX);
+ assert(action_table[arg_action].target);
+ assert(action_table[arg_action].mode);
+
+ method = "StartUnit";
+ job_type = "start";
+ mode = action_table[arg_action].mode;
+ one_name = action_table[arg_action].target;
+ }
+
+ if (one_name) {
+ names = strv_new(one_name);
+ if (!names)
+ return log_oom();
+ } else {
+ bool expanded;
+
+ r = expand_unit_names(bus, strv_skip(argv, 1), suffix, &names, &expanded);
+ if (r < 0)
+ return log_error_errno(r, "Failed to expand names: %m");
+
+ if (!arg_all && expanded && streq(job_type, "start") && !arg_quiet) {
+ log_warning("Warning: %ssystemctl start called with a glob pattern.%s",
+ ansi_highlight_red(),
+ ansi_normal());
+ log_notice("Hint: unit globs expand to loaded units, so start will usually have no effect.\n"
+ " Passing --all will also load units which are pulled in by other units.\n"
+ " See systemctl(1) for more details.");
+ }
+ }
+
+ if (!arg_no_block) {
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_error_errno(r, "Could not watch jobs: %m");
+ }
+
+ if (arg_wait) {
+ r = bus_call_method_async(bus, NULL, bus_systemd_mgr, "Subscribe", NULL, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable subscription: %m");
+
+ r = bus_wait_for_units_new(bus, &wu);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate unit watch context: %m");
+ }
+
+ STRV_FOREACH(name, names) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ r = start_unit_one(bus, method, job_type, *name, mode, &error, w, wu);
+ if (ret == EXIT_SUCCESS && r < 0)
+ ret = translate_bus_error_to_exit_status(r, &error);
+
+ if (r >= 0 && streq(method, "StopUnit")) {
+ r = strv_push(&stopped_units, *name);
+ if (r < 0)
+ return log_oom();
+ }
+ }
+
+ if (!arg_no_block) {
+ const char* extra_args[4];
+
+ r = bus_wait_for_jobs(w, arg_quiet, make_extra_args(extra_args));
+ if (r < 0)
+ return r;
+
+ /* When stopping units, warn if they can still be triggered by
+ * another active unit (socket, path, timer) */
+ if (!arg_quiet)
+ STRV_FOREACH(name, stopped_units)
+ (void) check_triggering_units(bus, *name);
+ }
+
+ if (arg_wait) {
+ r = bus_wait_for_units_run(wu);
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait for units: %m");
+ if (r == BUS_WAIT_FAILURE && ret == EXIT_SUCCESS)
+ ret = EXIT_FAILURE;
+ }
+
+ return ret;
+}
diff --git a/src/systemctl/systemctl-start-unit.h b/src/systemctl/systemctl-start-unit.h
new file mode 100644
index 0000000..915c6fa
--- /dev/null
+++ b/src/systemctl/systemctl-start-unit.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "systemctl.h"
+
+int start_unit(int argc, char *argv[], void *userdata);
+
+struct action_metadata {
+ const char *target;
+ const char *verb;
+ const char *mode;
+};
+
+extern const struct action_metadata action_table[_ACTION_MAX];
+
+enum action verb_to_action(const char *verb);
diff --git a/src/systemctl/systemctl-switch-root.c b/src/systemctl/systemctl-switch-root.c
new file mode 100644
index 0000000..9ed40e6
--- /dev/null
+++ b/src/systemctl/systemctl-switch-root.c
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "systemctl-switch-root.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+
+int switch_root(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *cmdline_init = NULL;
+ const char *root, *init;
+ sd_bus *bus;
+ int r;
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot switch root remotely.");
+
+ if (argc < 2 || argc > 3)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Wrong number of arguments.");
+
+ root = argv[1];
+
+ if (argc >= 3)
+ init = argv[2];
+ else {
+ r = proc_cmdline_get_key("init", 0, &cmdline_init);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse /proc/cmdline: %m");
+
+ init = cmdline_init;
+ }
+
+ init = empty_to_null(init);
+ if (init) {
+ const char *root_systemd_path = NULL, *root_init_path = NULL;
+
+ root_systemd_path = prefix_roota(root, "/" SYSTEMD_BINARY_PATH);
+ root_init_path = prefix_roota(root, init);
+
+ /* If the passed init is actually the same as the systemd binary, then let's suppress it. */
+ if (files_same(root_init_path, root_systemd_path, 0) > 0)
+ init = NULL;
+ }
+
+ /* Instruct PID1 to exclude us from its killing spree applied during the transition. Otherwise we
+ * would exit with a failure status even though the switch to the new root has succeed. */
+ assert(saved_argv);
+ assert(saved_argv[0]);
+ saved_argv[0][0] = '@';
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ /* If we are slow to exit after the root switch, the new systemd instance will send us a signal to
+ * terminate. Just ignore it and exit normally. This way the unit does not end up as failed. */
+ r = ignore_signals(SIGTERM, -1);
+ if (r < 0)
+ log_warning_errno(r, "Failed to change disposition of SIGTERM to ignore: %m");
+
+ log_debug("Switching root - root: %s; init: %s", root, strna(init));
+
+ r = bus_call_method(bus, bus_systemd_mgr, "SwitchRoot", &error, NULL, "ss", root, init);
+ if (r < 0) {
+ (void) default_signals(SIGTERM, -1);
+
+ return log_error_errno(r, "Failed to switch root: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
diff --git a/src/systemctl/systemctl-switch-root.h b/src/systemctl/systemctl-switch-root.h
new file mode 100644
index 0000000..6e13961
--- /dev/null
+++ b/src/systemctl/systemctl-switch-root.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int switch_root(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-sysv-compat.c b/src/systemctl/systemctl-sysv-compat.c
new file mode 100644
index 0000000..a78fa1e
--- /dev/null
+++ b/src/systemctl/systemctl-sysv-compat.c
@@ -0,0 +1,271 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "env-util.h"
+#include "fd-util.h"
+#include "initreq.h"
+#include "install.h"
+#include "io-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "strv.h"
+#include "systemctl-sysv-compat.h"
+#include "systemctl.h"
+
+int talk_initctl(char rl) {
+#if HAVE_SYSV_COMPAT
+ struct init_request request;
+ _cleanup_close_ int fd = -1;
+ const char *p;
+ int r;
+
+ /* Try to switch to the specified SysV runlevel. Returns == 0 if the operation does not apply on this
+ * system, and > 0 on success. */
+
+ if (rl == 0)
+ return 0;
+
+ FOREACH_STRING(p, "/run/initctl", "/dev/initctl") {
+ fd = open(p, O_WRONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
+ if (fd >= 0 || errno != ENOENT)
+ break;
+ }
+ if (fd < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open initctl fifo: %m");
+ }
+
+ request = (struct init_request) {
+ .magic = INIT_MAGIC,
+ .sleeptime = 0,
+ .cmd = INIT_CMD_RUNLVL,
+ .runlevel = rl,
+ };
+
+ r = loop_write(fd, &request, sizeof(request), false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write to %s: %m", p);
+
+ return 1;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+int parse_shutdown_time_spec(const char *t, usec_t *ret) {
+ assert(t);
+ assert(ret);
+
+ if (streq(t, "now"))
+ *ret = 0;
+ else if (!strchr(t, ':')) {
+ uint64_t u;
+
+ if (safe_atou64(t, &u) < 0)
+ return -EINVAL;
+
+ *ret = now(CLOCK_REALTIME) + USEC_PER_MINUTE * u;
+ } else {
+ char *e = NULL;
+ long hour, minute;
+ struct tm tm = {};
+ time_t s;
+ usec_t n;
+
+ errno = 0;
+ hour = strtol(t, &e, 10);
+ if (errno > 0 || *e != ':' || hour < 0 || hour > 23)
+ return -EINVAL;
+
+ minute = strtol(e+1, &e, 10);
+ if (errno > 0 || *e != 0 || minute < 0 || minute > 59)
+ return -EINVAL;
+
+ n = now(CLOCK_REALTIME);
+ s = (time_t) (n / USEC_PER_SEC);
+
+ assert_se(localtime_r(&s, &tm));
+
+ tm.tm_hour = (int) hour;
+ tm.tm_min = (int) minute;
+ tm.tm_sec = 0;
+
+ s = mktime(&tm);
+ assert(s >= 0);
+
+ *ret = (usec_t) s * USEC_PER_SEC;
+
+ while (*ret <= n)
+ *ret += USEC_PER_DAY;
+ }
+
+ return 0;
+}
+
+int enable_sysv_units(const char *verb, char **args) {
+ int r = 0;
+
+#if HAVE_SYSV_COMPAT
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ unsigned f = 0;
+
+ /* Processes all SysV units, and reshuffles the array so that afterwards only the native units remain */
+
+ if (arg_scope != UNIT_FILE_SYSTEM)
+ return 0;
+
+ if (getenv_bool("SYSTEMCTL_SKIP_SYSV") > 0)
+ return 0;
+
+ if (!STR_IN_SET(verb,
+ "enable",
+ "disable",
+ "is-enabled"))
+ return 0;
+
+ r = lookup_paths_init(&paths, arg_scope, LOOKUP_PATHS_EXCLUDE_GENERATED, arg_root);
+ if (r < 0)
+ return r;
+
+ r = 0;
+ while (args[f]) {
+
+ const char *argv[] = {
+ ROOTLIBEXECDIR "/systemd-sysv-install",
+ NULL, /* --root= */
+ NULL, /* verb */
+ NULL, /* service */
+ NULL,
+ };
+
+ _cleanup_free_ char *p = NULL, *q = NULL, *l = NULL, *v = NULL;
+ bool found_native = false, found_sysv;
+ const char *name;
+ unsigned c = 1;
+ pid_t pid;
+ int j;
+
+ name = args[f++];
+
+ if (!endswith(name, ".service"))
+ continue;
+
+ if (path_is_absolute(name))
+ continue;
+
+ j = unit_file_exists(arg_scope, &paths, name);
+ if (j < 0 && !IN_SET(j, -ELOOP, -ERFKILL, -EADDRNOTAVAIL))
+ return log_error_errno(j, "Failed to look up unit file state: %m");
+ found_native = j != 0;
+
+ /* If we have both a native unit and a SysV script, enable/disable them both (below); for
+ * is-enabled, prefer the native unit */
+ if (found_native && streq(verb, "is-enabled"))
+ continue;
+
+ p = path_join(arg_root, SYSTEM_SYSVINIT_PATH, name);
+ if (!p)
+ return log_oom();
+
+ p[strlen(p) - STRLEN(".service")] = 0;
+ found_sysv = access(p, F_OK) >= 0;
+ if (!found_sysv)
+ continue;
+
+ if (!arg_quiet) {
+ if (found_native)
+ log_info("Synchronizing state of %s with SysV service script with %s.", name, argv[0]);
+ else
+ log_info("%s is not a native service, redirecting to systemd-sysv-install.", name);
+ }
+
+ if (!isempty(arg_root)) {
+ q = strjoin("--root=", arg_root);
+ if (!q)
+ return log_oom();
+
+ argv[c++] = q;
+ }
+
+ /* Let's copy the verb, since it's still pointing directly into the original argv[] array we
+ * got passed, but safe_fork() is likely going to rewrite that for the new child */
+ v = strdup(verb);
+ if (!v)
+ return log_oom();
+
+ argv[c++] = v;
+ argv[c++] = basename(p);
+ argv[c] = NULL;
+
+ l = strv_join((char**)argv, " ");
+ if (!l)
+ return log_oom();
+
+ if (!arg_quiet)
+ log_info("Executing: %s", l);
+
+ j = safe_fork("(sysv-install)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (j < 0)
+ return j;
+ if (j == 0) {
+ /* Child */
+ execv(argv[0], (char**) argv);
+ log_error_errno(errno, "Failed to execute %s: %m", argv[0]);
+ _exit(EXIT_FAILURE);
+ }
+
+ j = wait_for_terminate_and_check("sysv-install", pid, WAIT_LOG_ABNORMAL);
+ if (j < 0)
+ return j;
+ if (streq(verb, "is-enabled")) {
+ if (j == EXIT_SUCCESS) {
+ if (!arg_quiet)
+ puts("enabled");
+ r = 1;
+ } else {
+ if (!arg_quiet)
+ puts("disabled");
+ }
+
+ } else if (j != EXIT_SUCCESS)
+ return -EBADE; /* We don't warn here, under the assumption the script already showed an explanation */
+
+ if (found_native)
+ continue;
+
+ /* Remove this entry, so that we don't try enabling it as native unit */
+ assert(f > 0);
+ f--;
+ assert(args[f] == name);
+ strv_remove(args + f, name);
+ }
+
+#endif
+ return r;
+}
+
+int action_to_runlevel(void) {
+#if HAVE_SYSV_COMPAT
+ static const char table[_ACTION_MAX] = {
+ [ACTION_HALT] = '0',
+ [ACTION_POWEROFF] = '0',
+ [ACTION_REBOOT] = '6',
+ [ACTION_RUNLEVEL2] = '2',
+ [ACTION_RUNLEVEL3] = '3',
+ [ACTION_RUNLEVEL4] = '4',
+ [ACTION_RUNLEVEL5] = '5',
+ [ACTION_RESCUE] = '1'
+ };
+
+ assert(arg_action >= 0 && arg_action < _ACTION_MAX);
+ return table[arg_action];
+#else
+ return -EOPNOTSUPP;
+#endif
+}
diff --git a/src/systemctl/systemctl-sysv-compat.h b/src/systemctl/systemctl-sysv-compat.h
new file mode 100644
index 0000000..86fd3ec
--- /dev/null
+++ b/src/systemctl/systemctl-sysv-compat.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "time-util.h"
+
+int talk_initctl(char runlevel);
+
+int parse_shutdown_time_spec(const char *t, usec_t *ret);
+
+/* The init script exit codes for the LSB 'status' verb. (This is different from the 'start' verb, whose exit
+ codes are defined in exit-status.h.)
+
+ 0 program is running or service is OK
+ 1 program is dead and /var/run pid file exists
+ 2 program is dead and /var/lock lock file exists
+ 3 program is not running
+ 4 program or service status is unknown
+ 5-99 reserved for future LSB use
+ 100-149 reserved for distribution use
+ 150-199 reserved for application use
+ 200-254 reserved
+
+ https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/iniscrptact.html
+*/
+enum {
+ EXIT_PROGRAM_RUNNING_OR_SERVICE_OK = 0,
+ EXIT_PROGRAM_DEAD_AND_PID_EXISTS = 1,
+ EXIT_PROGRAM_DEAD_AND_LOCK_FILE_EXISTS = 2,
+ EXIT_PROGRAM_NOT_RUNNING = 3,
+ EXIT_PROGRAM_OR_SERVICES_STATUS_UNKNOWN = 4,
+};
+
+int enable_sysv_units(const char *verb, char **args);
+
+int action_to_runlevel(void) _pure_;
diff --git a/src/systemctl/systemctl-trivial-method.c b/src/systemctl/systemctl-trivial-method.c
new file mode 100644
index 0000000..c0b4d48
--- /dev/null
+++ b/src/systemctl/systemctl-trivial-method.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "systemctl-trivial-method.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+
+/* A generic implementation for cases we just need to invoke a simple method call on the Manager object. */
+
+int trivial_method(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *method;
+ sd_bus *bus;
+ int r;
+
+ if (arg_dry_run)
+ return 0;
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ polkit_agent_open_maybe();
+
+ method =
+ streq(argv[0], "clear-jobs") ||
+ streq(argv[0], "cancel") ? "ClearJobs" :
+ streq(argv[0], "reset-failed") ? "ResetFailed" :
+ streq(argv[0], "halt") ? "Halt" :
+ streq(argv[0], "reboot") ? "Reboot" :
+ streq(argv[0], "kexec") ? "KExec" :
+ streq(argv[0], "exit") ? "Exit" :
+ /* poweroff */ "PowerOff";
+
+ r = bus_call_method(bus, bus_systemd_mgr, method, &error, NULL, NULL);
+ if (r < 0 && arg_action == ACTION_SYSTEMCTL)
+ return log_error_errno(r, "Failed to execute operation: %s", bus_error_message(&error, r));
+
+ /* Note that for the legacy commands (i.e. those with action != ACTION_SYSTEMCTL) we support
+ * fallbacks to the old ways of doing things, hence don't log any error in that case here. */
+
+ return r < 0 ? r : 0;
+}
diff --git a/src/systemctl/systemctl-trivial-method.h b/src/systemctl/systemctl-trivial-method.h
new file mode 100644
index 0000000..6dcd152
--- /dev/null
+++ b/src/systemctl/systemctl-trivial-method.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int trivial_method(int argc, char *argv[], void *userdata);
diff --git a/src/systemctl/systemctl-util.c b/src/systemctl/systemctl-util.c
new file mode 100644
index 0000000..9713fce
--- /dev/null
+++ b/src/systemctl/systemctl-util.c
@@ -0,0 +1,936 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/reboot.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-daemon.h"
+
+#include "bus-common-errors.h"
+#include "bus-locator.h"
+#include "bus-map-properties.h"
+#include "bus-unit-util.h"
+#include "dropin.h"
+#include "env-util.h"
+#include "exit-status.h"
+#include "fs-util.h"
+#include "glob-util.h"
+#include "macro.h"
+#include "path-util.h"
+#include "reboot-util.h"
+#include "set.h"
+#include "spawn-ask-password-agent.h"
+#include "spawn-polkit-agent.h"
+#include "stat-util.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+#include "terminal-util.h"
+#include "verbs.h"
+
+static sd_bus *buses[_BUS_FOCUS_MAX] = {};
+
+int acquire_bus(BusFocus focus, sd_bus **ret) {
+ int r;
+
+ assert(focus < _BUS_FOCUS_MAX);
+ assert(ret);
+
+ /* We only go directly to the manager, if we are using a local transport */
+ if (arg_transport != BUS_TRANSPORT_LOCAL)
+ focus = BUS_FULL;
+
+ if (getenv_bool("SYSTEMCTL_FORCE_BUS") > 0)
+ focus = BUS_FULL;
+
+ if (!buses[focus]) {
+ bool user;
+
+ user = arg_scope != UNIT_FILE_SYSTEM;
+
+ if (focus == BUS_MANAGER)
+ r = bus_connect_transport_systemd(arg_transport, arg_host, user, &buses[focus]);
+ else
+ r = bus_connect_transport(arg_transport, arg_host, user, &buses[focus]);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ (void) sd_bus_set_allow_interactive_authorization(buses[focus], arg_ask_password);
+ }
+
+ *ret = buses[focus];
+ return 0;
+}
+
+void release_busses(void) {
+ BusFocus w;
+
+ for (w = 0; w < _BUS_FOCUS_MAX; w++)
+ buses[w] = sd_bus_flush_close_unref(buses[w]);
+}
+
+void ask_password_agent_open_maybe(void) {
+ /* Open the password agent as a child process if necessary */
+
+ if (arg_dry_run)
+ return;
+
+ if (arg_scope != UNIT_FILE_SYSTEM)
+ return;
+
+ ask_password_agent_open_if_enabled(arg_transport, arg_ask_password);
+}
+
+void polkit_agent_open_maybe(void) {
+ /* Open the polkit agent as a child process if necessary */
+
+ if (arg_scope != UNIT_FILE_SYSTEM)
+ return;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+}
+
+int translate_bus_error_to_exit_status(int r, const sd_bus_error *error) {
+ assert(error);
+
+ if (!sd_bus_error_is_set(error))
+ return r;
+
+ if (sd_bus_error_has_names(error, SD_BUS_ERROR_ACCESS_DENIED,
+ BUS_ERROR_ONLY_BY_DEPENDENCY,
+ BUS_ERROR_NO_ISOLATION,
+ BUS_ERROR_TRANSACTION_IS_DESTRUCTIVE))
+ return EXIT_NOPERMISSION;
+
+ if (sd_bus_error_has_name(error, BUS_ERROR_NO_SUCH_UNIT))
+ return EXIT_NOTINSTALLED;
+
+ if (sd_bus_error_has_names(error, BUS_ERROR_JOB_TYPE_NOT_APPLICABLE,
+ SD_BUS_ERROR_NOT_SUPPORTED))
+ return EXIT_NOTIMPLEMENTED;
+
+ if (sd_bus_error_has_name(error, BUS_ERROR_LOAD_FAILED))
+ return EXIT_NOTCONFIGURED;
+
+ if (r != 0)
+ return r;
+
+ return EXIT_FAILURE;
+}
+
+int get_state_one_unit(sd_bus *bus, const char *unit, UnitActiveState *ret_active_state) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *buf = NULL, *dbus_path = NULL;
+ UnitActiveState state;
+ int r;
+
+ assert(unit);
+ assert(ret_active_state);
+
+ dbus_path = unit_dbus_path_from_name(unit);
+ if (!dbus_path)
+ return log_oom();
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ dbus_path,
+ "org.freedesktop.systemd1.Unit",
+ "ActiveState",
+ &error,
+ &buf);
+ if (r < 0)
+ return log_error_errno(r, "Failed to retrieve unit state: %s", bus_error_message(&error, r));
+
+ state = unit_active_state_from_string(buf);
+ if (state < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid unit state '%s' for: %s", buf, unit);
+
+ *ret_active_state = state;
+ return 0;
+}
+
+int get_unit_list(
+ sd_bus *bus,
+ const char *machine,
+ char **patterns,
+ UnitInfo **unit_infos,
+ int c,
+ sd_bus_message **ret_reply) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ size_t size = c;
+ int r;
+ bool fallback = false;
+
+ assert(bus);
+ assert(unit_infos);
+ assert(ret_reply);
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, "ListUnitsByPatterns");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, arg_states);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, patterns);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0 && (sd_bus_error_has_names(&error, SD_BUS_ERROR_UNKNOWN_METHOD,
+ SD_BUS_ERROR_ACCESS_DENIED))) {
+ /* Fallback to legacy ListUnitsFiltered method */
+ fallback = true;
+ log_debug_errno(r, "Failed to list units: %s Falling back to ListUnitsFiltered method.", bus_error_message(&error, r));
+ m = sd_bus_message_unref(m);
+ sd_bus_error_free(&error);
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, "ListUnitsFiltered");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, arg_states);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to list units: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssssssouso)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ for (;;) {
+ UnitInfo u;
+
+ r = bus_parse_unit_info(reply, &u);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ if (r == 0)
+ break;
+
+ u.machine = machine;
+
+ if (!output_show_unit(&u, fallback ? patterns : NULL))
+ continue;
+
+ if (!GREEDY_REALLOC(*unit_infos, size, c+1))
+ return log_oom();
+
+ (*unit_infos)[c++] = u;
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ *ret_reply = TAKE_PTR(reply);
+ return c;
+}
+
+int expand_unit_names(sd_bus *bus, char **names, const char* suffix, char ***ret, bool *ret_expanded) {
+ _cleanup_strv_free_ char **mangled = NULL, **globs = NULL;
+ char **name;
+ int r, i;
+
+ assert(bus);
+ assert(ret);
+
+ STRV_FOREACH(name, names) {
+ UnitNameMangle options = UNIT_NAME_MANGLE_GLOB | (arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN);
+ char *t;
+
+ r = unit_name_mangle_with_suffix(*name, NULL, options, suffix ?: ".service", &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle name: %m");
+
+ if (string_is_glob(t))
+ r = strv_consume(&globs, t);
+ else
+ r = strv_consume(&mangled, t);
+ if (r < 0)
+ return log_oom();
+ }
+
+ /* Query the manager only if any of the names are a glob, since this is fairly expensive */
+ bool expanded = !strv_isempty(globs);
+ if (expanded) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_free_ UnitInfo *unit_infos = NULL;
+ size_t allocated, n;
+
+ r = get_unit_list(bus, NULL, globs, &unit_infos, 0, &reply);
+ if (r < 0)
+ return r;
+
+ n = strv_length(mangled);
+ allocated = n + 1;
+
+ for (i = 0; i < r; i++) {
+ if (!GREEDY_REALLOC(mangled, allocated, n+2))
+ return log_oom();
+
+ mangled[n] = strdup(unit_infos[i].id);
+ if (!mangled[n])
+ return log_oom();
+
+ mangled[++n] = NULL;
+ }
+ }
+
+ if (ret_expanded)
+ *ret_expanded = expanded;
+
+ *ret = TAKE_PTR(mangled);
+ return 0;
+}
+
+int check_triggering_units(sd_bus *bus, const char *unit) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *n = NULL, *dbus_path = NULL, *load_state = NULL;
+ _cleanup_strv_free_ char **triggered_by = NULL;
+ bool print_warning_label = true;
+ UnitActiveState active_state;
+ char **i;
+ int r;
+
+ r = unit_name_mangle(unit, 0, &n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to mangle unit name: %m");
+
+ r = unit_load_state(bus, n, &load_state);
+ if (r < 0)
+ return r;
+
+ if (streq(load_state, "masked"))
+ return 0;
+
+ dbus_path = unit_dbus_path_from_name(n);
+ if (!dbus_path)
+ return log_oom();
+
+ r = sd_bus_get_property_strv(
+ bus,
+ "org.freedesktop.systemd1",
+ dbus_path,
+ "org.freedesktop.systemd1.Unit",
+ "TriggeredBy",
+ &error,
+ &triggered_by);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get triggered by array of %s: %s", n, bus_error_message(&error, r));
+
+ STRV_FOREACH(i, triggered_by) {
+ r = get_state_one_unit(bus, *i, &active_state);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(active_state, UNIT_ACTIVE, UNIT_RELOADING))
+ continue;
+
+ if (print_warning_label) {
+ log_warning("Warning: Stopping %s, but it can still be activated by:", n);
+ print_warning_label = false;
+ }
+
+ log_warning(" %s", *i);
+ }
+
+ return 0;
+}
+
+int need_daemon_reload(sd_bus *bus, const char *unit) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *path;
+ int b, r;
+
+ /* We ignore all errors here, since this is used to show a
+ * warning only */
+
+ /* We don't use unit_dbus_path_from_name() directly since we
+ * don't want to load the unit if it isn't loaded. */
+
+ r = bus_call_method(bus, bus_systemd_mgr, "GetUnit", NULL, &reply, "s", unit);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "o", &path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_get_property_trivial(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "NeedDaemonReload",
+ NULL,
+ 'b', &b);
+ if (r < 0)
+ return r;
+
+ return b;
+}
+
+void warn_unit_file_changed(const char *unit) {
+ assert(unit);
+
+ log_warning("%sWarning:%s The unit file, source configuration file or drop-ins of %s changed on disk. Run 'systemctl%s daemon-reload' to reload units.",
+ ansi_highlight_red(),
+ ansi_normal(),
+ unit,
+ arg_scope == UNIT_FILE_SYSTEM ? "" : " --user");
+}
+
+int unit_file_find_path(LookupPaths *lp, const char *unit_name, char **ret_unit_path) {
+ char **p;
+
+ assert(lp);
+ assert(unit_name);
+
+ STRV_FOREACH(p, lp->search_path) {
+ _cleanup_free_ char *path = NULL, *lpath = NULL;
+ int r;
+
+ path = path_join(*p, unit_name);
+ if (!path)
+ return log_oom();
+
+ r = chase_symlinks(path, arg_root, 0, &lpath, NULL);
+ if (r == -ENOENT)
+ continue;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Failed to access path \"%s\": %m", path);
+
+ if (ret_unit_path)
+ *ret_unit_path = TAKE_PTR(lpath);
+
+ return 1;
+ }
+
+ if (ret_unit_path)
+ *ret_unit_path = NULL;
+
+ return 0;
+}
+
+int unit_find_paths(
+ sd_bus *bus,
+ const char *unit_name,
+ LookupPaths *lp,
+ bool force_client_side,
+ Hashmap **cached_name_map,
+ Hashmap **cached_id_map,
+ char **ret_fragment_path,
+ char ***ret_dropin_paths) {
+
+ _cleanup_strv_free_ char **dropins = NULL;
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ /**
+ * Finds where the unit is defined on disk. Returns 0 if the unit is not found. Returns 1 if it is
+ * found, and sets:
+ * - the path to the unit in *ret_frament_path, if it exists on disk,
+ * - and a strv of existing drop-ins in *ret_dropin_paths, if the arg is not NULL and any dropins
+ * were found.
+ *
+ * Returns -ERFKILL if the unit is masked, and -EKEYREJECTED if the unit file could not be loaded for
+ * some reason (the latter only applies if we are going through the service manager).
+ */
+
+ assert(unit_name);
+ assert(ret_fragment_path);
+ assert(lp);
+
+ /* Go via the bus to acquire the path, unless we are explicitly told not to, or when the unit name is a template */
+ if (!force_client_side &&
+ !install_client_side() &&
+ !unit_name_is_valid(unit_name, UNIT_NAME_TEMPLATE)) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *load_state = NULL, *dbus_path = NULL;
+
+ dbus_path = unit_dbus_path_from_name(unit_name);
+ if (!dbus_path)
+ return log_oom();
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ dbus_path,
+ "org.freedesktop.systemd1.Unit",
+ "LoadState",
+ &error,
+ &load_state);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get LoadState: %s", bus_error_message(&error, r));
+
+ if (streq(load_state, "masked"))
+ return -ERFKILL;
+ if (streq(load_state, "not-found")) {
+ r = 0;
+ goto not_found;
+ }
+ if (!STR_IN_SET(load_state, "loaded", "bad-setting"))
+ return -EKEYREJECTED;
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ dbus_path,
+ "org.freedesktop.systemd1.Unit",
+ "FragmentPath",
+ &error,
+ &path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get FragmentPath: %s", bus_error_message(&error, r));
+
+ if (ret_dropin_paths) {
+ r = sd_bus_get_property_strv(
+ bus,
+ "org.freedesktop.systemd1",
+ dbus_path,
+ "org.freedesktop.systemd1.Unit",
+ "DropInPaths",
+ &error,
+ &dropins);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get DropInPaths: %s", bus_error_message(&error, r));
+ }
+ } else {
+ const char *_path;
+ _cleanup_set_free_free_ Set *names = NULL;
+
+ if (!*cached_name_map) {
+ r = unit_file_build_name_map(lp, NULL, cached_id_map, cached_name_map, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ r = unit_file_find_fragment(*cached_id_map, *cached_name_map, unit_name, &_path, &names);
+ if (r < 0)
+ return r;
+
+ if (_path) {
+ path = strdup(_path);
+ if (!path)
+ return log_oom();
+ }
+
+ if (ret_dropin_paths) {
+ r = unit_file_find_dropin_paths(arg_root, lp->search_path, NULL,
+ ".d", ".conf",
+ NULL, names, &dropins);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (isempty(path)) {
+ *ret_fragment_path = NULL;
+ r = 0;
+ } else {
+ *ret_fragment_path = TAKE_PTR(path);
+ r = 1;
+ }
+
+ if (ret_dropin_paths) {
+ if (!strv_isempty(dropins)) {
+ *ret_dropin_paths = TAKE_PTR(dropins);
+ r = 1;
+ } else
+ *ret_dropin_paths = NULL;
+ }
+
+ not_found:
+ if (r == 0 && !arg_force)
+ log_error("No files found for %s.", unit_name);
+
+ return r;
+}
+
+static int unit_find_template_path(
+ const char *unit_name,
+ LookupPaths *lp,
+ char **ret_fragment_path,
+ char **ret_template) {
+
+ _cleanup_free_ char *t = NULL, *f = NULL;
+ int r;
+
+ /* Returns 1 if a fragment was found, 0 if not found, negative on error. */
+
+ r = unit_file_find_path(lp, unit_name, &f);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (ret_fragment_path)
+ *ret_fragment_path = TAKE_PTR(f);
+ if (ret_template)
+ *ret_template = NULL;
+ return r; /* found a real unit */
+ }
+
+ r = unit_name_template(unit_name, &t);
+ if (r == -EINVAL) {
+ if (ret_fragment_path)
+ *ret_fragment_path = NULL;
+ if (ret_template)
+ *ret_template = NULL;
+
+ return 0; /* not a template, does not exist */
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine template name: %m");
+
+ r = unit_file_find_path(lp, t, ret_fragment_path);
+ if (r < 0)
+ return r;
+
+ if (ret_template)
+ *ret_template = r > 0 ? TAKE_PTR(t) : NULL;
+
+ return r;
+}
+
+int unit_is_masked(sd_bus *bus, LookupPaths *lp, const char *name) {
+ _cleanup_free_ char *load_state = NULL;
+ int r;
+
+ if (unit_name_is_valid(name, UNIT_NAME_TEMPLATE)) {
+ _cleanup_free_ char *path = NULL;
+
+ /* A template cannot be loaded, but it can be still masked, so
+ * we need to use a different method. */
+
+ r = unit_file_find_path(lp, name, &path);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return false;
+ return null_or_empty_path(path);
+ }
+
+ r = unit_load_state(bus, name, &load_state);
+ if (r < 0)
+ return r;
+
+ return streq(load_state, "masked");
+}
+
+int unit_exists(LookupPaths *lp, const char *unit) {
+ typedef struct UnitStateInfo {
+ const char *load_state;
+ const char *active_state;
+ } UnitStateInfo;
+
+ static const struct bus_properties_map property_map[] = {
+ { "LoadState", "s", NULL, offsetof(UnitStateInfo, load_state) },
+ { "ActiveState", "s", NULL, offsetof(UnitStateInfo, active_state) },
+ {},
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ char *path = NULL;
+ UnitStateInfo info = {};
+ sd_bus *bus;
+ int r;
+
+ if (unit_name_is_valid(unit, UNIT_NAME_TEMPLATE))
+ return unit_find_template_path(unit, lp, NULL, NULL);
+
+ path = unit_dbus_path_from_name(unit);
+ if (!path)
+ return log_oom();
+
+ r = acquire_bus(BUS_MANAGER, &bus);
+ if (r < 0)
+ return r;
+
+ r = bus_map_all_properties(bus, "org.freedesktop.systemd1", path, property_map, 0, &error, &m, &info);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get properties: %s", bus_error_message(&error, r));
+
+ return !streq_ptr(info.load_state, "not-found") || !streq_ptr(info.active_state, "inactive");
+}
+
+
+int append_unit_dependencies(sd_bus *bus, char **names, char ***ret) {
+ _cleanup_strv_free_ char **with_deps = NULL;
+ char **name;
+
+ assert(bus);
+ assert(ret);
+
+ STRV_FOREACH(name, names) {
+ _cleanup_strv_free_ char **deps = NULL;
+
+ if (strv_extend(&with_deps, *name) < 0)
+ return log_oom();
+
+ (void) unit_get_dependencies(bus, *name, &deps);
+
+ if (strv_extend_strv(&with_deps, deps, true) < 0)
+ return log_oom();
+ }
+
+ *ret = TAKE_PTR(with_deps);
+
+ return 0;
+}
+
+int maybe_extend_with_unit_dependencies(sd_bus *bus, char ***list) {
+ _cleanup_strv_free_ char **list_with_deps = NULL;
+ int r;
+
+ assert(bus);
+ assert(list);
+
+ if (!arg_with_dependencies)
+ return 0;
+
+ r = append_unit_dependencies(bus, *list, &list_with_deps);
+ if (r < 0)
+ return log_error_errno(r, "Failed to append unit dependencies: %m");
+
+ strv_free(*list);
+ *list = TAKE_PTR(list_with_deps);
+ return 0;
+}
+
+int unit_get_dependencies(sd_bus *bus, const char *name, char ***ret) {
+ _cleanup_strv_free_ char **deps = NULL;
+
+ static const struct bus_properties_map map[_DEPENDENCY_MAX][6] = {
+ [DEPENDENCY_FORWARD] = {
+ { "Requires", "as", NULL, 0 },
+ { "Requisite", "as", NULL, 0 },
+ { "Wants", "as", NULL, 0 },
+ { "ConsistsOf", "as", NULL, 0 },
+ { "BindsTo", "as", NULL, 0 },
+ {}
+ },
+ [DEPENDENCY_REVERSE] = {
+ { "RequiredBy", "as", NULL, 0 },
+ { "RequisiteOf", "as", NULL, 0 },
+ { "WantedBy", "as", NULL, 0 },
+ { "PartOf", "as", NULL, 0 },
+ { "BoundBy", "as", NULL, 0 },
+ {}
+ },
+ [DEPENDENCY_AFTER] = {
+ { "After", "as", NULL, 0 },
+ {}
+ },
+ [DEPENDENCY_BEFORE] = {
+ { "Before", "as", NULL, 0 },
+ {}
+ },
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *dbus_path = NULL;
+ int r;
+
+ assert(bus);
+ assert(name);
+ assert(ret);
+
+ dbus_path = unit_dbus_path_from_name(name);
+ if (!dbus_path)
+ return log_oom();
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.systemd1",
+ dbus_path,
+ map[arg_dependency],
+ BUS_MAP_STRDUP,
+ &error,
+ NULL,
+ &deps);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get properties of %s: %s", name, bus_error_message(&error, r));
+
+ strv_uniq(deps); /* Sometimes a unit might have multiple deps on the other unit,
+ * but we still want to show it just once. */
+ *ret = TAKE_PTR(deps);
+
+ return 0;
+}
+
+const char* unit_type_suffix(const char *unit) {
+ const char *dot;
+
+ dot = strrchr(unit, '.');
+ if (!dot)
+ return "";
+
+ return dot + 1;
+}
+
+bool output_show_unit(const UnitInfo *u, char **patterns) {
+ assert(u);
+
+ if (!strv_fnmatch_or_empty(patterns, u->id, FNM_NOESCAPE))
+ return false;
+
+ if (arg_types && !strv_find(arg_types, unit_type_suffix(u->id)))
+ return false;
+
+ if (arg_all)
+ return true;
+
+ /* Note that '--all' is not purely a state filter, but also a filter that hides units that "follow"
+ * other units (which is used for device units that appear under different names). */
+ if (!isempty(u->following))
+ return false;
+
+ if (!strv_isempty(arg_states))
+ return true;
+
+ /* By default show all units except the ones in inactive state and with no pending job */
+ if (u->job_id > 0)
+ return true;
+
+ if (streq(u->active_state, "inactive"))
+ return false;
+
+ return true;
+}
+
+bool install_client_side(void) {
+ /* Decides when to execute enable/disable/... operations client-side rather than server-side. */
+
+ if (running_in_chroot_or_offline())
+ return true;
+
+ if (sd_booted() <= 0)
+ return true;
+
+ if (!isempty(arg_root))
+ return true;
+
+ if (arg_scope == UNIT_FILE_GLOBAL)
+ return true;
+
+ /* Unsupported environment variable, mostly for debugging purposes */
+ if (getenv_bool("SYSTEMCTL_INSTALL_CLIENT_SIDE") > 0)
+ return true;
+
+ return false;
+}
+
+int output_table(Table *table) {
+ int r;
+
+ assert(table);
+
+ if (OUTPUT_MODE_IS_JSON(arg_output))
+ r = table_print_json(table, NULL, output_mode_to_json_format_flags(arg_output) | JSON_FORMAT_COLOR_AUTO);
+ else
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+
+ return 0;
+}
+
+bool show_preset_for_state(UnitFileState state) {
+ /* Don't show preset state in those unit file states, it'll only confuse users. */
+ return !IN_SET(state,
+ UNIT_FILE_ALIAS,
+ UNIT_FILE_STATIC,
+ UNIT_FILE_GENERATED,
+ UNIT_FILE_TRANSIENT);
+}
+
+UnitFileFlags unit_file_flags_from_args(void) {
+ return (arg_runtime ? UNIT_FILE_RUNTIME : 0) |
+ (arg_force ? UNIT_FILE_FORCE : 0);
+}
+
+int mangle_names(const char *operation, char **original_names, char ***ret_mangled_names) {
+ _cleanup_strv_free_ char **l = NULL;
+ char **i, **name;
+ int r;
+
+ assert(ret_mangled_names);
+
+ l = i = new(char*, strv_length(original_names) + 1);
+ if (!l)
+ return log_oom();
+
+ STRV_FOREACH(name, original_names) {
+
+ /* When enabling units qualified path names are OK, too, hence allow them explicitly. */
+
+ if (is_path(*name)) {
+ *i = strdup(*name);
+ if (!*i)
+ return log_oom();
+ } else {
+ r = unit_name_mangle_with_suffix(*name, operation,
+ arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN,
+ ".service", i);
+ if (r < 0) {
+ *i = NULL;
+ return log_error_errno(r, "Failed to mangle unit name: %m");
+ }
+ }
+
+ i++;
+ }
+
+ *i = NULL;
+ *ret_mangled_names = TAKE_PTR(l);
+
+ return 0;
+}
+
+int halt_now(enum action a) {
+ /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need
+ * to be synced explicitly in advance. */
+ if (!arg_no_sync && !arg_dry_run)
+ (void) sync();
+
+ /* Make sure C-A-D is handled by the kernel from this point on... */
+ if (!arg_dry_run)
+ (void) reboot(RB_ENABLE_CAD);
+
+ switch (a) {
+
+ case ACTION_HALT:
+ if (!arg_quiet)
+ log_info("Halting.");
+ if (arg_dry_run)
+ return 0;
+ (void) reboot(RB_HALT_SYSTEM);
+ return -errno;
+
+ case ACTION_POWEROFF:
+ if (!arg_quiet)
+ log_info("Powering off.");
+ if (arg_dry_run)
+ return 0;
+ (void) reboot(RB_POWER_OFF);
+ return -errno;
+
+ case ACTION_KEXEC:
+ case ACTION_REBOOT:
+ return reboot_with_parameter(REBOOT_FALLBACK |
+ (arg_quiet ? 0 : REBOOT_LOG) |
+ (arg_dry_run ? REBOOT_DRY_RUN : 0));
+
+ default:
+ assert_not_reached("Unknown action.");
+ }
+}
diff --git a/src/systemctl/systemctl-util.h b/src/systemctl/systemctl-util.h
new file mode 100644
index 0000000..6445bb4
--- /dev/null
+++ b/src/systemctl/systemctl-util.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "bus-unit-util.h"
+#include "format-table.h"
+#include "systemctl.h"
+
+typedef enum BusFocus {
+ BUS_FULL, /* The full bus indicated via --system or --user */
+ BUS_MANAGER, /* The manager itself, possibly directly, possibly via the bus */
+ _BUS_FOCUS_MAX
+} BusFocus;
+
+int acquire_bus(BusFocus focus, sd_bus **ret);
+void release_busses(void);
+
+void ask_password_agent_open_maybe(void);
+void polkit_agent_open_maybe(void);
+
+int translate_bus_error_to_exit_status(int r, const sd_bus_error *error);
+
+int get_state_one_unit(sd_bus *bus, const char *name, UnitActiveState *ret_active_state);
+int get_unit_list(sd_bus *bus, const char *machine, char **patterns, UnitInfo **unit_infos, int c, sd_bus_message **ret_reply);
+int expand_unit_names(sd_bus *bus, char **names, const char* suffix, char ***ret, bool *ret_expanded);
+
+int check_triggering_units(sd_bus *bus, const char *unit);
+
+int need_daemon_reload(sd_bus *bus, const char *unit);
+
+void warn_unit_file_changed(const char *unit);
+
+int append_unit_dependencies(sd_bus *bus, char **names, char ***ret);
+int maybe_extend_with_unit_dependencies(sd_bus *bus, char ***list);
+
+int unit_file_find_path(LookupPaths *lp, const char *unit_name, char **ret_unit_path);
+int unit_find_paths(sd_bus *bus, const char *unit_name, LookupPaths *lp, bool force_client_side, Hashmap **cached_id_map, Hashmap **cached_name_map, char **ret_fragment_path, char ***ret_dropin_paths);
+
+int unit_is_masked(sd_bus *bus, LookupPaths *lp, const char *name);
+int unit_exists(LookupPaths *lp, const char *unit);
+
+int unit_get_dependencies(sd_bus *bus, const char *name, char ***ret);
+
+const char* unit_type_suffix(const char *unit);
+bool output_show_unit(const UnitInfo *u, char **patterns);
+
+bool install_client_side(void);
+
+int output_table(Table *table);
+
+bool show_preset_for_state(UnitFileState state);
+
+int mangle_names(const char *operation, char **original_names, char ***ret_mangled_names);
+
+UnitFileFlags unit_file_flags_from_args(void);
+
+int halt_now(enum action a);
diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c
new file mode 100644
index 0000000..d002d93
--- /dev/null
+++ b/src/systemctl/systemctl.c
@@ -0,0 +1,1128 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <locale.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+
+#include "bus-util.h"
+#include "install.h"
+#include "main-func.h"
+#include "output-mode.h"
+#include "pager.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "rlimit-util.h"
+#include "sigbus.h"
+#include "signal-util.h"
+#include "string-table.h"
+#include "systemctl-add-dependency.h"
+#include "systemctl-cancel-job.h"
+#include "systemctl-clean-or-freeze.h"
+#include "systemctl-compat-halt.h"
+#include "systemctl-compat-runlevel.h"
+#include "systemctl-compat-shutdown.h"
+#include "systemctl-compat-telinit.h"
+#include "systemctl-daemon-reload.h"
+#include "systemctl-edit.h"
+#include "systemctl-enable.h"
+#include "systemctl-is-active.h"
+#include "systemctl-is-enabled.h"
+#include "systemctl-is-system-running.h"
+#include "systemctl-kill.h"
+#include "systemctl-list-dependencies.h"
+#include "systemctl-list-jobs.h"
+#include "systemctl-list-machines.h"
+#include "systemctl-list-unit-files.h"
+#include "systemctl-list-units.h"
+#include "systemctl-log-setting.h"
+#include "systemctl-logind.h"
+#include "systemctl-preset-all.h"
+#include "systemctl-reset-failed.h"
+#include "systemctl-service-watchdogs.h"
+#include "systemctl-set-default.h"
+#include "systemctl-set-environment.h"
+#include "systemctl-set-property.h"
+#include "systemctl-show.h"
+#include "systemctl-start-special.h"
+#include "systemctl-start-unit.h"
+#include "systemctl-switch-root.h"
+#include "systemctl-sysv-compat.h"
+#include "systemctl-trivial-method.h"
+#include "systemctl-util.h"
+#include "systemctl.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "verbs.h"
+#include "virt.h"
+
+char **arg_types = NULL;
+char **arg_states = NULL;
+char **arg_properties = NULL;
+bool arg_all = false;
+enum dependency arg_dependency = DEPENDENCY_FORWARD;
+const char *arg_job_mode = "replace";
+UnitFileScope arg_scope = UNIT_FILE_SYSTEM;
+bool arg_wait = false;
+bool arg_no_block = false;
+bool arg_no_legend = false;
+PagerFlags arg_pager_flags = 0;
+bool arg_no_wtmp = false;
+bool arg_no_sync = false;
+bool arg_no_wall = false;
+bool arg_no_reload = false;
+bool arg_value = false;
+bool arg_show_types = false;
+bool arg_ignore_inhibitors = false;
+bool arg_dry_run = false;
+bool arg_quiet = false;
+bool arg_full = false;
+bool arg_recursive = false;
+bool arg_with_dependencies = false;
+bool arg_show_transaction = false;
+int arg_force = 0;
+bool arg_ask_password = false;
+bool arg_runtime = false;
+UnitFilePresetMode arg_preset_mode = UNIT_FILE_PRESET_FULL;
+char **arg_wall = NULL;
+const char *arg_kill_who = NULL;
+int arg_signal = SIGTERM;
+char *arg_root = NULL;
+usec_t arg_when = 0;
+const char *arg_reboot_argument = NULL;
+enum action arg_action = ACTION_SYSTEMCTL;
+BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+const char *arg_host = NULL;
+unsigned arg_lines = 10;
+OutputMode arg_output = OUTPUT_SHORT;
+bool arg_plain = false;
+bool arg_firmware_setup = false;
+usec_t arg_boot_loader_menu = USEC_INFINITY;
+const char *arg_boot_loader_entry = NULL;
+bool arg_now = false;
+bool arg_jobs_before = false;
+bool arg_jobs_after = false;
+char **arg_clean_what = NULL;
+TimestampStyle arg_timestamp_style = TIMESTAMP_PRETTY;
+
+STATIC_DESTRUCTOR_REGISTER(arg_wall, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_root, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_types, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_states, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_properties, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_clean_what, strv_freep);
+
+static int systemctl_help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = terminal_urlify_man("systemctl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%1$s [OPTIONS...] COMMAND ...\n\n"
+ "%5$sQuery or send control commands to the system manager.%6$s\n"
+ "\n%3$sUnit Commands:%4$s\n"
+ " list-units [PATTERN...] List units currently in memory\n"
+ " list-sockets [PATTERN...] List socket units currently in memory,\n"
+ " ordered by address\n"
+ " list-timers [PATTERN...] List timer units currently in memory,\n"
+ " ordered by next elapse\n"
+ " is-active PATTERN... Check whether units are active\n"
+ " is-failed PATTERN... Check whether units are failed\n"
+ " status [PATTERN...|PID...] Show runtime status of one or more units\n"
+ " show [PATTERN...|JOB...] Show properties of one or more\n"
+ " units/jobs or the manager\n"
+ " cat PATTERN... Show files and drop-ins of specified units\n"
+ " help PATTERN...|PID... Show manual for one or more units\n"
+ " list-dependencies [UNIT...] Recursively show units which are required\n"
+ " or wanted by the units or by which those\n"
+ " units are required or wanted\n"
+ " start UNIT... Start (activate) one or more units\n"
+ " stop UNIT... Stop (deactivate) one or more units\n"
+ " reload UNIT... Reload one or more units\n"
+ " restart UNIT... Start or restart one or more units\n"
+ " try-restart UNIT... Restart one or more units if active\n"
+ " reload-or-restart UNIT... Reload one or more units if possible,\n"
+ " otherwise start or restart\n"
+ " try-reload-or-restart UNIT... If active, reload one or more units,\n"
+ " if supported, otherwise restart\n"
+ " isolate UNIT Start one unit and stop all others\n"
+ " kill UNIT... Send signal to processes of a unit\n"
+ " clean UNIT... Clean runtime, cache, state, logs or\n"
+ " configuration of unit\n"
+ " freeze PATTERN... Freeze execution of unit processes\n"
+ " thaw PATTERN... Resume execution of a frozen unit\n"
+ " set-property UNIT PROPERTY=VALUE... Sets one or more properties of a unit\n"
+ " service-log-level SERVICE [LEVEL] Get/set logging threshold for service\n"
+ " service-log-target SERVICE [TARGET] Get/set logging target for service\n"
+ " reset-failed [PATTERN...] Reset failed state for all, one, or more\n"
+ " units"
+ "\n%3$sUnit File Commands:%4$s\n"
+ " list-unit-files [PATTERN...] List installed unit files\n"
+ " enable [UNIT...|PATH...] Enable one or more unit files\n"
+ " disable UNIT... Disable one or more unit files\n"
+ " reenable UNIT... Reenable one or more unit files\n"
+ " preset UNIT... Enable/disable one or more unit files\n"
+ " based on preset configuration\n"
+ " preset-all Enable/disable all unit files based on\n"
+ " preset configuration\n"
+ " is-enabled UNIT... Check whether unit files are enabled\n"
+ " mask UNIT... Mask one or more units\n"
+ " unmask UNIT... Unmask one or more units\n"
+ " link PATH... Link one or more units files into\n"
+ " the search path\n"
+ " revert UNIT... Revert one or more unit files to vendor\n"
+ " version\n"
+ " add-wants TARGET UNIT... Add 'Wants' dependency for the target\n"
+ " on specified one or more units\n"
+ " add-requires TARGET UNIT... Add 'Requires' dependency for the target\n"
+ " on specified one or more units\n"
+ " edit UNIT... Edit one or more unit files\n"
+ " get-default Get the name of the default target\n"
+ " set-default TARGET Set the default target\n"
+ "\n%3$sMachine Commands:%4$s\n"
+ " list-machines [PATTERN...] List local containers and host\n"
+ "\n%3$sJob Commands:%4$s\n"
+ " list-jobs [PATTERN...] List jobs\n"
+ " cancel [JOB...] Cancel all, one, or more jobs\n"
+ "\n%3$sEnvironment Commands:%4$s\n"
+ " show-environment Dump environment\n"
+ " set-environment VARIABLE=VALUE... Set one or more environment variables\n"
+ " unset-environment VARIABLE... Unset one or more environment variables\n"
+ " import-environment [VARIABLE...] Import all or some environment variables\n"
+ "\n%3$sManager State Commands:%4$s\n"
+ " daemon-reload Reload systemd manager configuration\n"
+ " daemon-reexec Reexecute systemd manager\n"
+ " log-level [LEVEL] Get/set logging threshold for manager\n"
+ " log-target [TARGET] Get/set logging target for manager\n"
+ " service-watchdogs [BOOL] Get/set service watchdog state\n"
+ "\n%3$sSystem Commands:%4$s\n"
+ " is-system-running Check whether system is fully running\n"
+ " default Enter system default mode\n"
+ " rescue Enter system rescue mode\n"
+ " emergency Enter system emergency mode\n"
+ " halt Shut down and halt the system\n"
+ " poweroff Shut down and power-off the system\n"
+ " reboot Shut down and reboot the system\n"
+ " kexec Shut down and reboot the system with kexec\n"
+ " exit [EXIT_CODE] Request user instance or container exit\n"
+ " switch-root ROOT [INIT] Change to a different root file system\n"
+ " suspend Suspend the system\n"
+ " hibernate Hibernate the system\n"
+ " hybrid-sleep Hibernate and suspend the system\n"
+ " suspend-then-hibernate Suspend the system, wake after a period of\n"
+ " time, and hibernate"
+ "\n%3$sOptions:%4$s\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --system Connect to system manager\n"
+ " --user Connect to user service manager\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on a local container\n"
+ " -t --type=TYPE List units of a particular type\n"
+ " --state=STATE List units with particular LOAD or SUB or ACTIVE state\n"
+ " --failed Shortcut for --state=failed\n"
+ " -p --property=NAME Show only properties by this name\n"
+ " -P NAME Equivalent to --value --property=NAME\n"
+ " -a --all Show all properties/all units currently in memory,\n"
+ " including dead/empty ones. To list all units installed\n"
+ " on the system, use 'list-unit-files' instead.\n"
+ " -l --full Don't ellipsize unit names on output\n"
+ " -r --recursive Show unit list of host and local containers\n"
+ " --reverse Show reverse dependencies with 'list-dependencies'\n"
+ " --with-dependencies Show unit dependencies with 'status', 'cat',\n"
+ " 'list-units', and 'list-unit-files'.\n"
+ " --job-mode=MODE Specify how to deal with already queued jobs, when\n"
+ " queueing a new job\n"
+ " -T --show-transaction When enqueuing a unit job, show full transaction\n"
+ " --show-types When showing sockets, explicitly show their type\n"
+ " --value When showing properties, only print the value\n"
+ " -i --ignore-inhibitors When shutting down or sleeping, ignore inhibitors\n"
+ " --kill-who=WHO Whom to send signal to\n"
+ " -s --signal=SIGNAL Which signal to send\n"
+ " --what=RESOURCES Which types of resources to remove\n"
+ " --now Start or stop unit after enabling or disabling it\n"
+ " --dry-run Only print what would be done\n"
+ " Currently supported by verbs: halt, poweroff, reboot,\n"
+ " kexec, suspend, hibernate, suspend-then-hibernate,\n"
+ " hybrid-sleep, default, rescue, emergency, and exit.\n"
+ " -q --quiet Suppress output\n"
+ " --wait For (re)start, wait until service stopped again\n"
+ " For is-system-running, wait until startup is completed\n"
+ " --no-block Do not wait until operation finished\n"
+ " --no-wall Don't send wall message before halt/power-off/reboot\n"
+ " --no-reload Don't reload daemon after en-/dis-abling unit files\n"
+ " --no-legend Do not print a legend (column headers and hints)\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-ask-password Do not ask for system passwords\n"
+ " --global Enable/disable/mask unit files globally\n"
+ " --runtime Enable/disable/mask unit files temporarily until next\n"
+ " reboot\n"
+ " -f --force When enabling unit files, override existing symlinks\n"
+ " When shutting down, execute action immediately\n"
+ " --preset-mode= Apply only enable, only disable, or all presets\n"
+ " --root=PATH Enable/disable/mask unit files in the specified root\n"
+ " directory\n"
+ " -n --lines=INTEGER Number of journal entries to show\n"
+ " -o --output=STRING Change journal output mode (short, short-precise,\n"
+ " short-iso, short-iso-precise, short-full,\n"
+ " short-monotonic, short-unix,\n"
+ " verbose, export, json, json-pretty, json-sse, cat)\n"
+ " --firmware-setup Tell the firmware to show the setup menu on next boot\n"
+ " --boot-loader-menu=TIME\n"
+ " Boot into boot loader menu on next boot\n"
+ " --boot-loader-entry=NAME\n"
+ " Boot into a specific boot loader entry on next boot\n"
+ " --plain Print unit dependencies as a list instead of a tree\n"
+ " --timestamp=FORMAT Change format of printed timestamps.\n"
+ " 'pretty' (default): 'Day YYYY-MM-DD HH:MM:SS TZ\n"
+ " 'us': 'Day YYYY-MM-DD HH:MM:SS.UUUUUU TZ\n"
+ " 'utc': 'Day YYYY-MM-DD HH:MM:SS UTC\n"
+ " 'us+utc': 'Day YYYY-MM-DD HH:MM:SS.UUUUUU UTC\n"
+ "\nSee the %2$s for details.\n"
+ , program_invocation_short_name
+ , link
+ , ansi_underline(), ansi_normal()
+ , ansi_highlight(), ansi_normal()
+ );
+
+ return 0;
+}
+
+static void help_types(void) {
+ if (!arg_no_legend)
+ puts("Available unit types:");
+
+ DUMP_STRING_TABLE(unit_type, UnitType, _UNIT_TYPE_MAX);
+}
+
+static void help_states(void) {
+ if (!arg_no_legend)
+ puts("Available unit load states:");
+ DUMP_STRING_TABLE(unit_load_state, UnitLoadState, _UNIT_LOAD_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable unit active states:");
+ DUMP_STRING_TABLE(unit_active_state, UnitActiveState, _UNIT_ACTIVE_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable unit file states:");
+ DUMP_STRING_TABLE(unit_file_state, UnitFileState, _UNIT_FILE_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable automount unit substates:");
+ DUMP_STRING_TABLE(automount_state, AutomountState, _AUTOMOUNT_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable device unit substates:");
+ DUMP_STRING_TABLE(device_state, DeviceState, _DEVICE_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable mount unit substates:");
+ DUMP_STRING_TABLE(mount_state, MountState, _MOUNT_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable path unit substates:");
+ DUMP_STRING_TABLE(path_state, PathState, _PATH_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable scope unit substates:");
+ DUMP_STRING_TABLE(scope_state, ScopeState, _SCOPE_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable service unit substates:");
+ DUMP_STRING_TABLE(service_state, ServiceState, _SERVICE_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable slice unit substates:");
+ DUMP_STRING_TABLE(slice_state, SliceState, _SLICE_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable socket unit substates:");
+ DUMP_STRING_TABLE(socket_state, SocketState, _SOCKET_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable swap unit substates:");
+ DUMP_STRING_TABLE(swap_state, SwapState, _SWAP_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable target unit substates:");
+ DUMP_STRING_TABLE(target_state, TargetState, _TARGET_STATE_MAX);
+
+ if (!arg_no_legend)
+ puts("\nAvailable timer unit substates:");
+ DUMP_STRING_TABLE(timer_state, TimerState, _TIMER_STATE_MAX);
+}
+
+static int systemctl_parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_FAIL = 0x100,
+ ARG_REVERSE,
+ ARG_AFTER,
+ ARG_BEFORE,
+ ARG_DRY_RUN,
+ ARG_SHOW_TYPES,
+ ARG_IRREVERSIBLE,
+ ARG_IGNORE_DEPENDENCIES,
+ ARG_VALUE,
+ ARG_VERSION,
+ ARG_USER,
+ ARG_SYSTEM,
+ ARG_GLOBAL,
+ ARG_NO_BLOCK,
+ ARG_NO_LEGEND,
+ ARG_NO_PAGER,
+ ARG_NO_WALL,
+ ARG_ROOT,
+ ARG_NO_RELOAD,
+ ARG_KILL_WHO,
+ ARG_NO_ASK_PASSWORD,
+ ARG_FAILED,
+ ARG_RUNTIME,
+ ARG_PLAIN,
+ ARG_STATE,
+ ARG_JOB_MODE,
+ ARG_PRESET_MODE,
+ ARG_FIRMWARE_SETUP,
+ ARG_BOOT_LOADER_MENU,
+ ARG_BOOT_LOADER_ENTRY,
+ ARG_NOW,
+ ARG_MESSAGE,
+ ARG_WITH_DEPENDENCIES,
+ ARG_WAIT,
+ ARG_WHAT,
+ ARG_REBOOT_ARG,
+ ARG_TIMESTAMP_STYLE,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "type", required_argument, NULL, 't' },
+ { "property", required_argument, NULL, 'p' },
+ { "all", no_argument, NULL, 'a' },
+ { "reverse", no_argument, NULL, ARG_REVERSE },
+ { "after", no_argument, NULL, ARG_AFTER },
+ { "before", no_argument, NULL, ARG_BEFORE },
+ { "show-types", no_argument, NULL, ARG_SHOW_TYPES },
+ { "failed", no_argument, NULL, ARG_FAILED }, /* compatibility only */
+ { "full", no_argument, NULL, 'l' },
+ { "job-mode", required_argument, NULL, ARG_JOB_MODE },
+ { "fail", no_argument, NULL, ARG_FAIL }, /* compatibility only */
+ { "irreversible", no_argument, NULL, ARG_IRREVERSIBLE }, /* compatibility only */
+ { "ignore-dependencies", no_argument, NULL, ARG_IGNORE_DEPENDENCIES }, /* compatibility only */
+ { "ignore-inhibitors", no_argument, NULL, 'i' },
+ { "value", no_argument, NULL, ARG_VALUE },
+ { "user", no_argument, NULL, ARG_USER },
+ { "system", no_argument, NULL, ARG_SYSTEM },
+ { "global", no_argument, NULL, ARG_GLOBAL },
+ { "wait", no_argument, NULL, ARG_WAIT },
+ { "no-block", no_argument, NULL, ARG_NO_BLOCK },
+ { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-wall", no_argument, NULL, ARG_NO_WALL },
+ { "dry-run", no_argument, NULL, ARG_DRY_RUN },
+ { "quiet", no_argument, NULL, 'q' },
+ { "root", required_argument, NULL, ARG_ROOT },
+ { "force", no_argument, NULL, 'f' },
+ { "no-reload", no_argument, NULL, ARG_NO_RELOAD },
+ { "kill-who", required_argument, NULL, ARG_KILL_WHO },
+ { "signal", required_argument, NULL, 's' },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "runtime", no_argument, NULL, ARG_RUNTIME },
+ { "lines", required_argument, NULL, 'n' },
+ { "output", required_argument, NULL, 'o' },
+ { "plain", no_argument, NULL, ARG_PLAIN },
+ { "state", required_argument, NULL, ARG_STATE },
+ { "recursive", no_argument, NULL, 'r' },
+ { "with-dependencies", no_argument, NULL, ARG_WITH_DEPENDENCIES },
+ { "preset-mode", required_argument, NULL, ARG_PRESET_MODE },
+ { "firmware-setup", no_argument, NULL, ARG_FIRMWARE_SETUP },
+ { "boot-loader-menu", required_argument, NULL, ARG_BOOT_LOADER_MENU },
+ { "boot-loader-entry", required_argument, NULL, ARG_BOOT_LOADER_ENTRY },
+ { "now", no_argument, NULL, ARG_NOW },
+ { "message", required_argument, NULL, ARG_MESSAGE },
+ { "show-transaction", no_argument, NULL, 'T' },
+ { "what", required_argument, NULL, ARG_WHAT },
+ { "reboot-argument", required_argument, NULL, ARG_REBOOT_ARG },
+ { "timestamp", required_argument, NULL, ARG_TIMESTAMP_STYLE },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ /* We default to allowing interactive authorization only in systemctl (not in the legacy commands) */
+ arg_ask_password = true;
+
+ while ((c = getopt_long(argc, argv, "ht:p:P:alqfs:H:M:n:o:iTr.::", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return systemctl_help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 't': {
+ const char *p;
+
+ if (isempty(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--type= requires arguments.");
+
+ for (p = optarg;;) {
+ _cleanup_free_ char *type = NULL;
+
+ r = extract_first_word(&p, &type, ",", 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse type: %s", optarg);
+ if (r == 0)
+ break;
+
+ if (streq(type, "help")) {
+ help_types();
+ return 0;
+ }
+
+ if (unit_type_from_string(type) >= 0) {
+ if (strv_consume(&arg_types, TAKE_PTR(type)) < 0)
+ return log_oom();
+ continue;
+ }
+
+ /* It's much nicer to use --state= for load states, but let's support this in
+ * --types= too for compatibility with old versions */
+ if (unit_load_state_from_string(type) >= 0) {
+ if (strv_consume(&arg_states, TAKE_PTR(type)) < 0)
+ return log_oom();
+ continue;
+ }
+
+ log_error("Unknown unit type or load state '%s'.", type);
+ return log_info_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Use -t help to see a list of allowed values.");
+ }
+
+ break;
+ }
+
+ case 'P':
+ arg_value = true;
+ _fallthrough_;
+
+ case 'p':
+ /* Make sure that if the empty property list was specified, we won't show any
+ properties. */
+ if (isempty(optarg) && !arg_properties) {
+ arg_properties = new0(char*, 1);
+ if (!arg_properties)
+ return log_oom();
+ } else {
+ const char *p;
+
+ for (p = optarg;;) {
+ _cleanup_free_ char *prop = NULL;
+
+ r = extract_first_word(&p, &prop, ",", 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse property: %s", optarg);
+ if (r == 0)
+ break;
+
+ if (strv_consume(&arg_properties, TAKE_PTR(prop)) < 0)
+ return log_oom();
+ }
+ }
+
+ /* If the user asked for a particular property, show it, even if it is empty. */
+ arg_all = true;
+
+ break;
+
+ case 'a':
+ arg_all = true;
+ break;
+
+ case ARG_REVERSE:
+ arg_dependency = DEPENDENCY_REVERSE;
+ break;
+
+ case ARG_AFTER:
+ arg_dependency = DEPENDENCY_AFTER;
+ arg_jobs_after = true;
+ break;
+
+ case ARG_BEFORE:
+ arg_dependency = DEPENDENCY_BEFORE;
+ arg_jobs_before = true;
+ break;
+
+ case ARG_SHOW_TYPES:
+ arg_show_types = true;
+ break;
+
+ case ARG_VALUE:
+ arg_value = true;
+ break;
+
+ case ARG_JOB_MODE:
+ arg_job_mode = optarg;
+ break;
+
+ case ARG_FAIL:
+ arg_job_mode = "fail";
+ break;
+
+ case ARG_IRREVERSIBLE:
+ arg_job_mode = "replace-irreversibly";
+ break;
+
+ case ARG_IGNORE_DEPENDENCIES:
+ arg_job_mode = "ignore-dependencies";
+ break;
+
+ case ARG_USER:
+ arg_scope = UNIT_FILE_USER;
+ break;
+
+ case ARG_SYSTEM:
+ arg_scope = UNIT_FILE_SYSTEM;
+ break;
+
+ case ARG_GLOBAL:
+ arg_scope = UNIT_FILE_GLOBAL;
+ break;
+
+ case ARG_WAIT:
+ arg_wait = true;
+ break;
+
+ case ARG_NO_BLOCK:
+ arg_no_block = true;
+ break;
+
+ case ARG_NO_LEGEND:
+ arg_no_legend = true;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_WALL:
+ arg_no_wall = true;
+ break;
+
+ case ARG_ROOT:
+ r = parse_path_argument_and_warn(optarg, false, &arg_root);
+ if (r < 0)
+ return r;
+ break;
+
+ case 'l':
+ arg_full = true;
+ break;
+
+ case ARG_FAILED:
+ if (strv_extend(&arg_states, "failed") < 0)
+ return log_oom();
+
+ break;
+
+ case ARG_DRY_RUN:
+ arg_dry_run = true;
+ break;
+
+ case 'q':
+ arg_quiet = true;
+ break;
+
+ case 'f':
+ arg_force++;
+ break;
+
+ case ARG_NO_RELOAD:
+ arg_no_reload = true;
+ break;
+
+ case ARG_KILL_WHO:
+ arg_kill_who = optarg;
+ break;
+
+ case 's':
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(signal, int, _NSIG);
+ return 0;
+ }
+
+ arg_signal = signal_from_string(optarg);
+ if (arg_signal < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse signal string %s.",
+ optarg);
+ break;
+
+ case ARG_NO_ASK_PASSWORD:
+ arg_ask_password = false;
+ break;
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case ARG_RUNTIME:
+ arg_runtime = true;
+ break;
+
+ case 'n':
+ if (safe_atou(optarg, &arg_lines) < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse lines '%s'",
+ optarg);
+ break;
+
+ case 'o':
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(output_mode, OutputMode, _OUTPUT_MODE_MAX);
+ return 0;
+ }
+
+ arg_output = output_mode_from_string(optarg);
+ if (arg_output < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown output '%s'.",
+ optarg);
+
+ if (OUTPUT_MODE_IS_JSON(arg_output)) {
+ arg_no_legend = true;
+ arg_plain = true;
+ }
+ break;
+
+ case 'i':
+ arg_ignore_inhibitors = true;
+ break;
+
+ case ARG_PLAIN:
+ arg_plain = true;
+ break;
+
+ case ARG_FIRMWARE_SETUP:
+ arg_firmware_setup = true;
+ break;
+
+ case ARG_BOOT_LOADER_MENU:
+
+ r = parse_sec(optarg, &arg_boot_loader_menu);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --boot-loader-menu= argument '%s': %m", optarg);
+
+ break;
+
+ case ARG_BOOT_LOADER_ENTRY:
+
+ if (streq(optarg, "help")) { /* Yes, this means, "help" is not a valid boot loader entry name we can deal with */
+ r = help_boot_loader_entry();
+ if (r < 0)
+ return r;
+
+ return 0;
+ }
+
+ arg_boot_loader_entry = empty_to_null(optarg);
+ break;
+
+ case ARG_STATE: {
+ const char *p;
+
+ if (isempty(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--state= requires arguments.");
+
+ for (p = optarg;;) {
+ _cleanup_free_ char *s = NULL;
+
+ r = extract_first_word(&p, &s, ",", 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse state: %s", optarg);
+ if (r == 0)
+ break;
+
+ if (streq(s, "help")) {
+ help_states();
+ return 0;
+ }
+
+ if (strv_consume(&arg_states, TAKE_PTR(s)) < 0)
+ return log_oom();
+ }
+ break;
+ }
+
+ case 'r':
+ if (geteuid() != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "--recursive requires root privileges.");
+
+ arg_recursive = true;
+ break;
+
+ case ARG_PRESET_MODE:
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(unit_file_preset_mode, UnitFilePresetMode, _UNIT_FILE_PRESET_MAX);
+ return 0;
+ }
+
+ arg_preset_mode = unit_file_preset_mode_from_string(optarg);
+ if (arg_preset_mode < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse preset mode: %s.", optarg);
+
+ break;
+
+ case ARG_NOW:
+ arg_now = true;
+ break;
+
+ case ARG_MESSAGE:
+ if (strv_extend(&arg_wall, optarg) < 0)
+ return log_oom();
+ break;
+
+ case 'T':
+ arg_show_transaction = true;
+ break;
+
+ case ARG_WITH_DEPENDENCIES:
+ arg_with_dependencies = true;
+ break;
+
+ case ARG_WHAT: {
+ const char *p;
+
+ if (isempty(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--what= requires arguments.");
+
+ for (p = optarg;;) {
+ _cleanup_free_ char *k = NULL;
+
+ r = extract_first_word(&p, &k, ",", 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse directory type: %s", optarg);
+ if (r == 0)
+ break;
+
+ if (streq(k, "help")) {
+ puts("runtime\n"
+ "state\n"
+ "cache\n"
+ "logs\n"
+ "configuration");
+ return 0;
+ }
+
+ r = strv_consume(&arg_clean_what, TAKE_PTR(k));
+ if (r < 0)
+ return log_oom();
+ }
+
+ break;
+ }
+
+ case ARG_REBOOT_ARG:
+ arg_reboot_argument = optarg;
+ break;
+
+ case ARG_TIMESTAMP_STYLE:
+ if (streq(optarg, "help")) {
+ DUMP_STRING_TABLE(timestamp_style, TimestampStyle, _TIMESTAMP_STYLE_MAX);
+ return 0;
+ }
+
+ arg_timestamp_style = timestamp_style_from_string(optarg);
+ if (arg_timestamp_style < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid value: %s.", optarg);
+
+ break;
+
+ case '.':
+ /* Output an error mimicking getopt, and print a hint afterwards */
+ log_error("%s: invalid option -- '.'", program_invocation_name);
+ log_notice("Hint: to specify units starting with a dash, use \"--\":\n"
+ " %s [OPTIONS...] COMMAND -- -.%s ...",
+ program_invocation_name, optarg ?: "mount");
+ _fallthrough_;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_transport != BUS_TRANSPORT_LOCAL && arg_scope != UNIT_FILE_SYSTEM)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot access user instance remotely.");
+
+ if (arg_wait && arg_no_block)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--wait may not be combined with --no-block.");
+
+ return 1;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ assert(argc >= 0);
+ assert(argv);
+
+ if (program_invocation_short_name) {
+
+ if (strstr(program_invocation_short_name, "halt")) {
+ arg_action = ACTION_HALT;
+ return halt_parse_argv(argc, argv);
+
+ } else if (strstr(program_invocation_short_name, "poweroff")) {
+ arg_action = ACTION_POWEROFF;
+ return halt_parse_argv(argc, argv);
+
+ } else if (strstr(program_invocation_short_name, "reboot")) {
+ if (kexec_loaded())
+ arg_action = ACTION_KEXEC;
+ else
+ arg_action = ACTION_REBOOT;
+ return halt_parse_argv(argc, argv);
+
+ } else if (strstr(program_invocation_short_name, "shutdown")) {
+ arg_action = ACTION_POWEROFF;
+ return shutdown_parse_argv(argc, argv);
+
+ } else if (strstr(program_invocation_short_name, "init")) {
+
+ /* Matches invocations as "init" as well as "telinit", which are synonymous when run
+ * as PID != 1 on SysV.
+ *
+ * On SysV "telinit" was the official command to communicate with PID 1, but "init" would
+ * redirect itself to "telinit" if called with PID != 1. We follow the same logic here still,
+ * though we add one level of indirection, as we implement "telinit" in "systemctl". Hence, for
+ * us if you invoke "init" you get "systemd", but it will execve() "systemctl" immediately with
+ * argv[] unmodified if PID is != 1. If you invoke "telinit" you directly get "systemctl". In
+ * both cases we shall do the same thing, which is why we do strstr(p_i_s_n, "init") here, as a
+ * quick way to match both.
+ *
+ * Also see redirect_telinit() in src/core/main.c. */
+
+ if (sd_booted() > 0) {
+ arg_action = _ACTION_INVALID;
+ return telinit_parse_argv(argc, argv);
+ } else {
+ /* Hmm, so some other init system is running, we need to forward this request to
+ * it. For now we simply guess that it is Upstart. */
+
+ (void) rlimit_nofile_safe();
+ execv(TELINIT, argv);
+
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Couldn't find an alternative telinit implementation to spawn.");
+ }
+
+ } else if (strstr(program_invocation_short_name, "runlevel")) {
+ arg_action = ACTION_RUNLEVEL;
+ return runlevel_parse_argv(argc, argv);
+ }
+ }
+
+ arg_action = ACTION_SYSTEMCTL;
+ return systemctl_parse_argv(argc, argv);
+}
+
+static int systemctl_main(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "list-units", VERB_ANY, VERB_ANY, VERB_DEFAULT|VERB_ONLINE_ONLY, list_units },
+ { "list-unit-files", VERB_ANY, VERB_ANY, 0, list_unit_files },
+ { "list-sockets", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, list_sockets },
+ { "list-timers", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, list_timers },
+ { "list-jobs", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, list_jobs },
+ { "list-machines", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, list_machines },
+ { "clear-jobs", VERB_ANY, 1, VERB_ONLINE_ONLY, trivial_method },
+ { "cancel", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, cancel_job },
+ { "start", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit },
+ { "stop", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit },
+ { "condstop", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit }, /* For compatibility with ALTLinux */
+ { "reload", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit },
+ { "restart", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit },
+ { "try-restart", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit },
+ { "reload-or-restart", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit },
+ { "reload-or-try-restart", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit }, /* For compatibility with old systemctl <= 228 */
+ { "try-reload-or-restart", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit },
+ { "force-reload", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit }, /* For compatibility with SysV */
+ { "condreload", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit }, /* For compatibility with ALTLinux */
+ { "condrestart", 2, VERB_ANY, VERB_ONLINE_ONLY, start_unit }, /* For compatibility with RH */
+ { "isolate", 2, 2, VERB_ONLINE_ONLY, start_unit },
+ { "kill", 2, VERB_ANY, VERB_ONLINE_ONLY, kill_unit },
+ { "clean", 2, VERB_ANY, VERB_ONLINE_ONLY, clean_or_freeze_unit },
+ { "freeze", 2, VERB_ANY, VERB_ONLINE_ONLY, clean_or_freeze_unit },
+ { "thaw", 2, VERB_ANY, VERB_ONLINE_ONLY, clean_or_freeze_unit },
+ { "is-active", 2, VERB_ANY, VERB_ONLINE_ONLY, check_unit_active },
+ { "check", 2, VERB_ANY, VERB_ONLINE_ONLY, check_unit_active }, /* deprecated alias of is-active */
+ { "is-failed", 2, VERB_ANY, VERB_ONLINE_ONLY, check_unit_failed },
+ { "show", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, show },
+ { "cat", 2, VERB_ANY, VERB_ONLINE_ONLY, cat },
+ { "status", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, show },
+ { "help", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, show },
+ { "daemon-reload", VERB_ANY, 1, VERB_ONLINE_ONLY, daemon_reload },
+ { "daemon-reexec", VERB_ANY, 1, VERB_ONLINE_ONLY, daemon_reload },
+ { "log-level", VERB_ANY, 2, VERB_ONLINE_ONLY, log_setting },
+ { "log-target", VERB_ANY, 2, VERB_ONLINE_ONLY, log_setting },
+ { "service-log-level", 2, 3, VERB_ONLINE_ONLY, service_log_setting },
+ { "service-log-target", 2, 3, VERB_ONLINE_ONLY, service_log_setting },
+ { "service-watchdogs", VERB_ANY, 2, VERB_ONLINE_ONLY, service_watchdogs },
+ { "show-environment", VERB_ANY, 1, VERB_ONLINE_ONLY, show_environment },
+ { "set-environment", 2, VERB_ANY, VERB_ONLINE_ONLY, set_environment },
+ { "unset-environment", 2, VERB_ANY, VERB_ONLINE_ONLY, set_environment },
+ { "import-environment", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, import_environment },
+ { "halt", VERB_ANY, 1, VERB_ONLINE_ONLY, start_system_special },
+ { "poweroff", VERB_ANY, 1, VERB_ONLINE_ONLY, start_system_special },
+ { "reboot", VERB_ANY, 2, VERB_ONLINE_ONLY, start_system_special },
+ { "kexec", VERB_ANY, 1, VERB_ONLINE_ONLY, start_system_special },
+ { "suspend", VERB_ANY, 1, VERB_ONLINE_ONLY, start_system_special },
+ { "hibernate", VERB_ANY, 1, VERB_ONLINE_ONLY, start_system_special },
+ { "hybrid-sleep", VERB_ANY, 1, VERB_ONLINE_ONLY, start_system_special },
+ { "suspend-then-hibernate",VERB_ANY, 1, VERB_ONLINE_ONLY, start_system_special },
+ { "default", VERB_ANY, 1, VERB_ONLINE_ONLY, start_special },
+ { "rescue", VERB_ANY, 1, VERB_ONLINE_ONLY, start_system_special },
+ { "emergency", VERB_ANY, 1, VERB_ONLINE_ONLY, start_system_special },
+ { "exit", VERB_ANY, 2, VERB_ONLINE_ONLY, start_special },
+ { "reset-failed", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, reset_failed },
+ { "enable", 2, VERB_ANY, 0, enable_unit },
+ { "disable", 2, VERB_ANY, 0, enable_unit },
+ { "is-enabled", 2, VERB_ANY, 0, unit_is_enabled },
+ { "reenable", 2, VERB_ANY, 0, enable_unit },
+ { "preset", 2, VERB_ANY, 0, enable_unit },
+ { "preset-all", VERB_ANY, 1, 0, preset_all },
+ { "mask", 2, VERB_ANY, 0, enable_unit },
+ { "unmask", 2, VERB_ANY, 0, enable_unit },
+ { "link", 2, VERB_ANY, 0, enable_unit },
+ { "revert", 2, VERB_ANY, 0, enable_unit },
+ { "switch-root", 2, VERB_ANY, VERB_ONLINE_ONLY, switch_root },
+ { "list-dependencies", VERB_ANY, VERB_ANY, VERB_ONLINE_ONLY, list_dependencies },
+ { "set-default", 2, 2, 0, set_default },
+ { "get-default", VERB_ANY, 1, 0, get_default },
+ { "set-property", 3, VERB_ANY, VERB_ONLINE_ONLY, set_property },
+ { "is-system-running", VERB_ANY, 1, 0, is_system_running },
+ { "add-wants", 3, VERB_ANY, 0, add_dependency },
+ { "add-requires", 3, VERB_ANY, 0, add_dependency },
+ { "edit", 2, VERB_ANY, VERB_ONLINE_ONLY, edit },
+ {}
+ };
+
+ const Verb *verb = verbs_find_verb(argv[optind], verbs);
+ if (verb && (verb->flags & VERB_ONLINE_ONLY) && arg_root)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Verb '%s' cannot be used with --root=.",
+ argv[optind] ?: verb->verb);
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_parse_environment_cli();
+ log_open();
+
+ /* The journal merging logic potentially needs a lot of fds. */
+ (void) rlimit_nofile_bump(HIGH_RLIMIT_NOFILE);
+
+ sigbus_install();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ goto finish;
+
+ if (arg_action != ACTION_SYSTEMCTL && running_in_chroot() > 0) {
+ if (!arg_quiet)
+ log_info("Running in chroot, ignoring request.");
+ r = 0;
+ goto finish;
+ }
+
+ /* systemctl_main() will print an error message for the bus connection, but only if it needs to */
+
+ switch (arg_action) {
+
+ case ACTION_SYSTEMCTL:
+ r = systemctl_main(argc, argv);
+ break;
+
+ /* Legacy command aliases set arg_action. They provide some fallbacks, e.g. to tell sysvinit to
+ * reboot after you have installed systemd binaries. */
+
+ case ACTION_HALT:
+ case ACTION_POWEROFF:
+ case ACTION_REBOOT:
+ case ACTION_KEXEC:
+ r = halt_main();
+ break;
+
+ case ACTION_RUNLEVEL2:
+ case ACTION_RUNLEVEL3:
+ case ACTION_RUNLEVEL4:
+ case ACTION_RUNLEVEL5:
+ case ACTION_RESCUE:
+ r = start_with_fallback();
+ break;
+
+ case ACTION_RELOAD:
+ case ACTION_REEXEC:
+ r = reload_with_fallback();
+ break;
+
+ case ACTION_CANCEL_SHUTDOWN:
+ r = logind_cancel_shutdown();
+ break;
+
+ case ACTION_RUNLEVEL:
+ r = runlevel_main();
+ break;
+
+ case ACTION_EXIT:
+ case ACTION_SUSPEND:
+ case ACTION_HIBERNATE:
+ case ACTION_HYBRID_SLEEP:
+ case ACTION_SUSPEND_THEN_HIBERNATE:
+ case ACTION_EMERGENCY:
+ case ACTION_DEFAULT:
+ /* systemctl verbs with no equivalent in the legacy commands. These cannot appear in
+ * arg_action. Fall through. */
+
+ case _ACTION_INVALID:
+ default:
+ assert_not_reached("Unknown action");
+ }
+
+finish:
+ release_busses();
+
+ /* Note that we return r here, not 0, so that we can implement the LSB-like return codes */
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/systemctl/systemctl.h b/src/systemctl/systemctl.h
new file mode 100644
index 0000000..f8cefc9
--- /dev/null
+++ b/src/systemctl/systemctl.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "bus-util.h"
+#include "install.h"
+#include "output-mode.h"
+#include "pager.h"
+
+enum action {
+ ACTION_SYSTEMCTL,
+ ACTION_HALT,
+ ACTION_POWEROFF,
+ ACTION_REBOOT,
+ ACTION_KEXEC,
+ ACTION_EXIT,
+ ACTION_SUSPEND,
+ ACTION_HIBERNATE,
+ ACTION_HYBRID_SLEEP,
+ ACTION_SUSPEND_THEN_HIBERNATE,
+ ACTION_RUNLEVEL2,
+ ACTION_RUNLEVEL3,
+ ACTION_RUNLEVEL4,
+ ACTION_RUNLEVEL5,
+ ACTION_RESCUE,
+ ACTION_EMERGENCY,
+ ACTION_DEFAULT,
+ ACTION_RELOAD,
+ ACTION_REEXEC,
+ ACTION_RUNLEVEL,
+ ACTION_CANCEL_SHUTDOWN,
+ _ACTION_MAX,
+ _ACTION_INVALID = -1
+};
+
+enum dependency {
+ DEPENDENCY_FORWARD,
+ DEPENDENCY_REVERSE,
+ DEPENDENCY_AFTER,
+ DEPENDENCY_BEFORE,
+ _DEPENDENCY_MAX
+};
+
+extern char **arg_types;
+extern char **arg_states;
+extern char **arg_properties;
+extern bool arg_all;
+extern enum dependency arg_dependency;
+extern const char *arg_job_mode;
+extern UnitFileScope arg_scope;
+extern bool arg_wait;
+extern bool arg_no_block;
+extern bool arg_no_legend;
+extern PagerFlags arg_pager_flags;
+extern bool arg_no_wtmp;
+extern bool arg_no_sync;
+extern bool arg_no_wall;
+extern bool arg_no_reload;
+extern bool arg_value;
+extern bool arg_show_types;
+extern bool arg_ignore_inhibitors;
+extern bool arg_dry_run;
+extern bool arg_quiet;
+extern bool arg_full;
+extern bool arg_recursive;
+extern bool arg_with_dependencies;
+extern bool arg_show_transaction;
+extern int arg_force;
+extern bool arg_ask_password;
+extern bool arg_runtime;
+extern UnitFilePresetMode arg_preset_mode;
+extern char **arg_wall;
+extern const char *arg_kill_who;
+extern int arg_signal;
+extern char *arg_root;
+extern usec_t arg_when;
+extern const char *arg_reboot_argument;
+extern enum action arg_action;
+extern BusTransport arg_transport;
+extern const char *arg_host;
+extern unsigned arg_lines;
+extern OutputMode arg_output;
+extern bool arg_plain;
+extern bool arg_firmware_setup;
+extern usec_t arg_boot_loader_menu;
+extern const char *arg_boot_loader_entry;
+extern bool arg_now;
+extern bool arg_jobs_before;
+extern bool arg_jobs_after;
+extern char **arg_clean_what;
+extern TimestampStyle arg_timestamp_style;
diff --git a/src/systemctl/systemd-sysv-install.SKELETON b/src/systemctl/systemd-sysv-install.SKELETON
new file mode 100755
index 0000000..8c16cf9
--- /dev/null
+++ b/src/systemctl/systemd-sysv-install.SKELETON
@@ -0,0 +1,49 @@
+#!/bin/sh
+# This script is called by "systemctl enable/disable" when the given unit is a
+# SysV init.d script. It needs to call the distribution's mechanism for
+# enabling/disabling those, such as chkconfig, update-rc.d, or similar. This
+# can optionally take a --root argument for enabling a SysV init script
+# in a chroot or similar.
+set -e
+
+usage() {
+ echo "Usage: $0 [--root=path] enable|disable|is-enabled <sysv script name>" >&2
+ exit 1
+}
+
+unset ROOT
+
+# parse options
+eval set -- "$(getopt -o r: --long root: -- "$@")"
+while true; do
+ case "$1" in
+ -r|--root)
+ ROOT="$2"
+ shift 2 ;;
+ --) shift ; break ;;
+ *) usage ;;
+ esac
+done
+
+NAME="$2"
+[ -n "$NAME" ] || usage
+
+case "$1" in
+ enable)
+ # call the command to enable SysV init script $NAME here
+ # (consider optional $ROOT)
+ echo "IMPLEMENT ME: enabling SysV init.d script $NAME"
+ ;;
+ disable)
+ # call the command to disable SysV init script $NAME here
+ # (consider optional $ROOT)
+ echo "IMPLEMENT ME: disabling SysV init.d script $NAME"
+ ;;
+ is-enabled)
+ # exit with 0 if $NAME is enabled, non-zero if it is disabled
+ # (consider optional $ROOT)
+ echo "IMPLEMENT ME: checking SysV init.d script $NAME"
+ ;;
+ *)
+ usage ;;
+esac
diff --git a/src/systemd/_sd-common.h b/src/systemd/_sd-common.h
new file mode 100644
index 0000000..e3de2ae
--- /dev/null
+++ b/src/systemd/_sd-common.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdcommonhfoo
+#define foosdcommonhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+/* This is a private header; never even think of including this directly! */
+
+#if defined(__INCLUDE_LEVEL__) && __INCLUDE_LEVEL__ <= 1 && !defined(__COVERITY__)
+# error "Do not include _sd-common.h directly; it is a private header."
+#endif
+
+typedef void (*_sd_destroy_t)(void *userdata);
+
+#ifndef _sd_printf_
+# if __GNUC__ >= 4
+# define _sd_printf_(a,b) __attribute__((__format__(printf, a, b)))
+# else
+# define _sd_printf_(a,b)
+# endif
+#endif
+
+#ifndef _sd_sentinel_
+# define _sd_sentinel_ __attribute__((__sentinel__))
+#endif
+
+#ifndef _sd_packed_
+# define _sd_packed_ __attribute__((__packed__))
+#endif
+
+#ifndef _sd_pure_
+# define _sd_pure_ __attribute__((__pure__))
+#endif
+
+/* Note that strictly speaking __deprecated__ has been available before GCC 6. However, starting with GCC 6
+ * it also works on enum values, which we are interested in. Since this is a developer-facing feature anyway
+ * (as opposed to build engineer-facing), let's hence conditionalize this to gcc 6, given that the developers
+ * are probably going to use something newer anyway. */
+#ifndef _sd_deprecated_
+# if __GNUC__ >= 6
+# define _sd_deprecated_ __attribute__((__deprecated__))
+# else
+# define _sd_deprecated_
+# endif
+#endif
+
+#ifndef _SD_STRINGIFY
+# define _SD_XSTRINGIFY(x) #x
+# define _SD_STRINGIFY(x) _SD_XSTRINGIFY(x)
+#endif
+
+#ifndef _SD_BEGIN_DECLARATIONS
+# ifdef __cplusplus
+# define _SD_BEGIN_DECLARATIONS \
+ extern "C" { \
+ struct _sd_useless_struct_to_allow_trailing_semicolon_
+# else
+# define _SD_BEGIN_DECLARATIONS \
+ struct _sd_useless_struct_to_allow_trailing_semicolon_
+# endif
+#endif
+
+#ifndef _SD_END_DECLARATIONS
+# ifdef __cplusplus
+# define _SD_END_DECLARATIONS \
+ } \
+ struct _sd_useless_cpp_struct_to_allow_trailing_semicolon_
+# else
+# define _SD_END_DECLARATIONS \
+ struct _sd_useless_struct_to_allow_trailing_semicolon_
+# endif
+#endif
+
+#ifndef _SD_ARRAY_STATIC
+# if __STDC_VERSION__ >= 199901L
+# define _SD_ARRAY_STATIC static
+# else
+# define _SD_ARRAY_STATIC
+# endif
+#endif
+
+#define _SD_DEFINE_POINTER_CLEANUP_FUNC(type, func) \
+ static __inline__ void func##p(type **p) { \
+ if (*p) \
+ func(*p); \
+ } \
+ struct _sd_useless_struct_to_allow_trailing_semicolon_
+
+#endif
diff --git a/src/systemd/meson.build b/src/systemd/meson.build
new file mode 100644
index 0000000..3d328e5
--- /dev/null
+++ b/src/systemd/meson.build
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+_systemd_headers = '''
+ sd-bus.h
+ sd-bus-protocol.h
+ sd-bus-vtable.h
+ sd-daemon.h
+ sd-device.h
+ sd-event.h
+ sd-hwdb.h
+ sd-id128.h
+ sd-journal.h
+ sd-login.h
+ sd-messages.h
+ sd-path.h
+'''.split()
+
+# https://github.com/mesonbuild/meson/issues/1633
+systemd_headers = files(_systemd_headers)
+
+_not_installed_headers = '''
+ sd-dhcp6-client.h
+ sd-dhcp6-lease.h
+ sd-dhcp-client.h
+ sd-dhcp-lease.h
+ sd-dhcp-option.h
+ sd-dhcp6-option.h
+ sd-dhcp-server.h
+ sd-ipv4acd.h
+ sd-ipv4ll.h
+ sd-lldp.h
+ sd-ndisc.h
+ sd-netlink.h
+ sd-network.h
+ sd-radv.h
+ sd-resolve.h
+ sd-utf8.h
+'''.split()
+
+install_headers(
+ systemd_headers,
+ '_sd-common.h',
+ subdir : 'systemd')
+
+
+############################################################
+
+opts = [['c'],
+ ['c', '-ansi'],
+ ['c', '-std=iso9899:1990'],
+ ['c', '-std=iso9899:2011']]
+
+if cc.has_argument('-std=iso9899:2017')
+ opts += [['c', '-std=iso9899:2017']]
+endif
+
+if cxx_cmd != ''
+ opts += [['c++'],
+ ['c++', '-std=c++98'],
+ ['c++', '-std=c++11']]
+ if cxx.has_argument('-std=c++14')
+ opts += [['c++', '-std=c++14']]
+ endif
+ if cxx.has_argument('-std=c++17')
+ opts += [['c++', '-std=c++17']]
+ endif
+ if cxx.has_argument('-std=c++20')
+ opts += [['c++', '-std=c++20']]
+ endif
+endif
+
+foreach header : _systemd_headers + _not_installed_headers + [libudev_h_path]
+ foreach opt : opts
+ std_name = opt.length() == 2 ? '_'.join(opt[1].split(':')) : ''
+ name = ''.join(['cc-', header.split('/')[-1], '_', opt[0], std_name])
+ if want_tests != 'false'
+ test(name,
+ check_compilation_sh,
+ args : cc.cmd_array() + ['-c', '-x'] + opt +
+ ['-Werror', '-include',
+ join_paths(meson.current_source_dir(), header)])
+ endif
+ endforeach
+endforeach
diff --git a/src/systemd/sd-bus-protocol.h b/src/systemd/sd-bus-protocol.h
new file mode 100644
index 0000000..ed96cdb
--- /dev/null
+++ b/src/systemd/sd-bus-protocol.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdbusprotocolhfoo
+#define foosdbusprotocolhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+/* Types of message */
+
+enum {
+ _SD_BUS_MESSAGE_TYPE_INVALID = 0,
+ SD_BUS_MESSAGE_METHOD_CALL,
+ SD_BUS_MESSAGE_METHOD_RETURN,
+ SD_BUS_MESSAGE_METHOD_ERROR,
+ SD_BUS_MESSAGE_SIGNAL,
+ _SD_BUS_MESSAGE_TYPE_MAX
+};
+
+/* Primitive types */
+
+enum {
+ _SD_BUS_TYPE_INVALID = 0,
+ SD_BUS_TYPE_BYTE = 'y',
+ SD_BUS_TYPE_BOOLEAN = 'b',
+ SD_BUS_TYPE_INT16 = 'n',
+ SD_BUS_TYPE_UINT16 = 'q',
+ SD_BUS_TYPE_INT32 = 'i',
+ SD_BUS_TYPE_UINT32 = 'u',
+ SD_BUS_TYPE_INT64 = 'x',
+ SD_BUS_TYPE_UINT64 = 't',
+ SD_BUS_TYPE_DOUBLE = 'd',
+ SD_BUS_TYPE_STRING = 's',
+ SD_BUS_TYPE_OBJECT_PATH = 'o',
+ SD_BUS_TYPE_SIGNATURE = 'g',
+ SD_BUS_TYPE_UNIX_FD = 'h',
+ SD_BUS_TYPE_ARRAY = 'a',
+ SD_BUS_TYPE_VARIANT = 'v',
+ SD_BUS_TYPE_STRUCT = 'r', /* not actually used in signatures */
+ SD_BUS_TYPE_STRUCT_BEGIN = '(',
+ SD_BUS_TYPE_STRUCT_END = ')',
+ SD_BUS_TYPE_DICT_ENTRY = 'e', /* not actually used in signatures */
+ SD_BUS_TYPE_DICT_ENTRY_BEGIN = '{',
+ SD_BUS_TYPE_DICT_ENTRY_END = '}'
+};
+
+/* Well-known errors. Note that this is only a sanitized subset of the
+ * errors that the reference implementation generates. */
+
+#define SD_BUS_ERROR_FAILED "org.freedesktop.DBus.Error.Failed"
+#define SD_BUS_ERROR_NO_MEMORY "org.freedesktop.DBus.Error.NoMemory"
+#define SD_BUS_ERROR_SERVICE_UNKNOWN "org.freedesktop.DBus.Error.ServiceUnknown"
+#define SD_BUS_ERROR_NAME_HAS_NO_OWNER "org.freedesktop.DBus.Error.NameHasNoOwner"
+#define SD_BUS_ERROR_NO_REPLY "org.freedesktop.DBus.Error.NoReply"
+#define SD_BUS_ERROR_IO_ERROR "org.freedesktop.DBus.Error.IOError"
+#define SD_BUS_ERROR_BAD_ADDRESS "org.freedesktop.DBus.Error.BadAddress"
+#define SD_BUS_ERROR_NOT_SUPPORTED "org.freedesktop.DBus.Error.NotSupported"
+#define SD_BUS_ERROR_LIMITS_EXCEEDED "org.freedesktop.DBus.Error.LimitsExceeded"
+#define SD_BUS_ERROR_ACCESS_DENIED "org.freedesktop.DBus.Error.AccessDenied"
+#define SD_BUS_ERROR_AUTH_FAILED "org.freedesktop.DBus.Error.AuthFailed"
+#define SD_BUS_ERROR_NO_SERVER "org.freedesktop.DBus.Error.NoServer"
+#define SD_BUS_ERROR_TIMEOUT "org.freedesktop.DBus.Error.Timeout"
+#define SD_BUS_ERROR_NO_NETWORK "org.freedesktop.DBus.Error.NoNetwork"
+#define SD_BUS_ERROR_ADDRESS_IN_USE "org.freedesktop.DBus.Error.AddressInUse"
+#define SD_BUS_ERROR_DISCONNECTED "org.freedesktop.DBus.Error.Disconnected"
+#define SD_BUS_ERROR_INVALID_ARGS "org.freedesktop.DBus.Error.InvalidArgs"
+#define SD_BUS_ERROR_FILE_NOT_FOUND "org.freedesktop.DBus.Error.FileNotFound"
+#define SD_BUS_ERROR_FILE_EXISTS "org.freedesktop.DBus.Error.FileExists"
+#define SD_BUS_ERROR_UNKNOWN_METHOD "org.freedesktop.DBus.Error.UnknownMethod"
+#define SD_BUS_ERROR_UNKNOWN_OBJECT "org.freedesktop.DBus.Error.UnknownObject"
+#define SD_BUS_ERROR_UNKNOWN_INTERFACE "org.freedesktop.DBus.Error.UnknownInterface"
+#define SD_BUS_ERROR_UNKNOWN_PROPERTY "org.freedesktop.DBus.Error.UnknownProperty"
+#define SD_BUS_ERROR_PROPERTY_READ_ONLY "org.freedesktop.DBus.Error.PropertyReadOnly"
+#define SD_BUS_ERROR_UNIX_PROCESS_ID_UNKNOWN "org.freedesktop.DBus.Error.UnixProcessIdUnknown"
+#define SD_BUS_ERROR_INVALID_SIGNATURE "org.freedesktop.DBus.Error.InvalidSignature"
+#define SD_BUS_ERROR_INCONSISTENT_MESSAGE "org.freedesktop.DBus.Error.InconsistentMessage"
+#define SD_BUS_ERROR_MATCH_RULE_NOT_FOUND "org.freedesktop.DBus.Error.MatchRuleNotFound"
+#define SD_BUS_ERROR_MATCH_RULE_INVALID "org.freedesktop.DBus.Error.MatchRuleInvalid"
+#define SD_BUS_ERROR_INTERACTIVE_AUTHORIZATION_REQUIRED \
+ "org.freedesktop.DBus.Error.InteractiveAuthorizationRequired"
+
+/* https://dbus.freedesktop.org/doc/dbus-specification.html#message-protocol-marshaling-signature */
+#define SD_BUS_MAXIMUM_SIGNATURE_LENGTH 255
+
+/* https://dbus.freedesktop.org/doc/dbus-specification.html#message-protocol-names */
+#define SD_BUS_MAXIMUM_NAME_LENGTH 255
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-bus-vtable.h b/src/systemd/sd-bus-vtable.h
new file mode 100644
index 0000000..ef57efd
--- /dev/null
+++ b/src/systemd/sd-bus-vtable.h
@@ -0,0 +1,311 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdbusvtablehfoo
+#define foosdbusvtablehfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+typedef struct sd_bus_vtable sd_bus_vtable;
+
+#include "sd-bus.h"
+
+enum {
+ _SD_BUS_VTABLE_START = '<',
+ _SD_BUS_VTABLE_END = '>',
+ _SD_BUS_VTABLE_METHOD = 'M',
+ _SD_BUS_VTABLE_SIGNAL = 'S',
+ _SD_BUS_VTABLE_PROPERTY = 'P',
+ _SD_BUS_VTABLE_WRITABLE_PROPERTY = 'W'
+};
+
+enum {
+ SD_BUS_VTABLE_DEPRECATED = 1ULL << 0,
+ SD_BUS_VTABLE_HIDDEN = 1ULL << 1,
+ SD_BUS_VTABLE_UNPRIVILEGED = 1ULL << 2,
+ SD_BUS_VTABLE_METHOD_NO_REPLY = 1ULL << 3,
+ SD_BUS_VTABLE_PROPERTY_CONST = 1ULL << 4,
+ SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE = 1ULL << 5,
+ SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION = 1ULL << 6,
+ SD_BUS_VTABLE_PROPERTY_EXPLICIT = 1ULL << 7,
+ SD_BUS_VTABLE_SENSITIVE = 1ULL << 8, /* covers both directions: method call + reply */
+ SD_BUS_VTABLE_ABSOLUTE_OFFSET = 1ULL << 9,
+ _SD_BUS_VTABLE_CAPABILITY_MASK = 0xFFFFULL << 40
+};
+
+#define SD_BUS_VTABLE_CAPABILITY(x) ((uint64_t) (((x)+1) & 0xFFFF) << 40)
+
+enum {
+ _SD_BUS_VTABLE_PARAM_NAMES = 1 << 0,
+};
+
+extern const unsigned sd_bus_object_vtable_format;
+
+/* Note: unused areas in the sd_bus_vtable[] array must be initialized to 0. The structure contains an embedded
+ * union, and the compiler is NOT required to initialize the unused areas of the union when the rest of the
+ * structure is initialized. Normally the array is defined as read-only data, in which case the linker places
+ * it in the BSS section, which is always fully initialized, so this is not a concern. But if the array is
+ * created on the stack or on the heap, care must be taken to initialize the unused areas, for examply by
+ * first memsetting the whole region to zero before filling the data in. */
+
+struct sd_bus_vtable {
+ /* Please do not initialize this structure directly, use the
+ * macros below instead */
+
+ uint8_t type:8;
+ uint64_t flags:56;
+ union {
+ struct {
+ size_t element_size;
+ uint64_t features;
+ const unsigned *vtable_format_reference;
+ } start;
+ struct {
+ const char *member;
+ const char *signature;
+ const char *result;
+ sd_bus_message_handler_t handler;
+ size_t offset;
+ const char *names;
+ } method;
+ struct {
+ const char *member;
+ const char *signature;
+ const char *names;
+ } signal;
+ struct {
+ const char *member;
+ const char *signature;
+ sd_bus_property_get_t get;
+ sd_bus_property_set_t set;
+ size_t offset;
+ } property;
+ } x;
+};
+
+#define SD_BUS_VTABLE_START(_flags) \
+ { \
+ .type = _SD_BUS_VTABLE_START, \
+ .flags = _flags, \
+ .x = { \
+ .start = { \
+ .element_size = sizeof(sd_bus_vtable), \
+ .features = _SD_BUS_VTABLE_PARAM_NAMES, \
+ .vtable_format_reference = &sd_bus_object_vtable_format, \
+ }, \
+ }, \
+ }
+
+/* helper macro to format method and signal parameters, one at a time */
+#define SD_BUS_PARAM(x) #x "\0"
+
+#define SD_BUS_METHOD_WITH_NAMES_OFFSET(_member, _signature, _in_names, _result, _out_names, _handler, _offset, _flags) \
+ { \
+ .type = _SD_BUS_VTABLE_METHOD, \
+ .flags = _flags, \
+ .x = { \
+ .method = { \
+ .member = _member, \
+ .signature = _signature, \
+ .result = _result, \
+ .handler = _handler, \
+ .offset = _offset, \
+ .names = _in_names _out_names, \
+ }, \
+ }, \
+ }
+#define SD_BUS_METHOD_WITH_OFFSET(_member, _signature, _result, _handler, _offset, _flags) \
+ SD_BUS_METHOD_WITH_NAMES_OFFSET(_member, _signature, "", _result, "", _handler, _offset, _flags)
+#define SD_BUS_METHOD_WITH_NAMES(_member, _signature, _in_names, _result, _out_names, _handler, _flags) \
+ SD_BUS_METHOD_WITH_NAMES_OFFSET(_member, _signature, _in_names, _result, _out_names, _handler, 0, _flags)
+#define SD_BUS_METHOD(_member, _signature, _result, _handler, _flags) \
+ SD_BUS_METHOD_WITH_NAMES_OFFSET(_member, _signature, "", _result, "", _handler, 0, _flags)
+
+#define SD_BUS_SIGNAL_WITH_NAMES(_member, _signature, _out_names, _flags) \
+ { \
+ .type = _SD_BUS_VTABLE_SIGNAL, \
+ .flags = _flags, \
+ .x = { \
+ .signal = { \
+ .member = _member, \
+ .signature = _signature, \
+ .names = _out_names, \
+ }, \
+ }, \
+ }
+#define SD_BUS_SIGNAL(_member, _signature, _flags) \
+ SD_BUS_SIGNAL_WITH_NAMES(_member, _signature, "", _flags)
+
+#define SD_BUS_PROPERTY(_member, _signature, _get, _offset, _flags) \
+ { \
+ .type = _SD_BUS_VTABLE_PROPERTY, \
+ .flags = _flags, \
+ .x = { \
+ .property = { \
+ .member = _member, \
+ .signature = _signature, \
+ .get = _get, \
+ .set = NULL, \
+ .offset = _offset, \
+ }, \
+ }, \
+ }
+
+#define SD_BUS_WRITABLE_PROPERTY(_member, _signature, _get, _set, _offset, _flags) \
+ { \
+ .type = _SD_BUS_VTABLE_WRITABLE_PROPERTY, \
+ .flags = _flags, \
+ .x = { \
+ .property = { \
+ .member = _member, \
+ .signature = _signature, \
+ .get = _get, \
+ .set = _set, \
+ .offset = _offset, \
+ }, \
+ }, \
+ }
+
+#define SD_BUS_VTABLE_END \
+ { \
+ .type = _SD_BUS_VTABLE_END, \
+ .flags = 0, \
+ .x = { { 0 } }, \
+ }
+
+#define _SD_ECHO(X) X
+#define _SD_CONCAT(X) #X "\0"
+
+#define _SD_VARARGS_FOREACH_EVEN_01(FN, X, ...)
+#define _SD_VARARGS_FOREACH_EVEN_02(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_01(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_03(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_02(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_04(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_03(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_05(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_04(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_06(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_05(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_07(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_06(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_08(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_07(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_09(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_08(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_10(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_09(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_11(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_10(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_12(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_11(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_13(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_12(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_14(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_13(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_15(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_14(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_16(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_15(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_17(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_16(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_18(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_17(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_19(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_18(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_20(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_19(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_21(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_20(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_22(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_21(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_23(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_22(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_24(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_23(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_25(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_24(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_26(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_25(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_27(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_26(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_28(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_27(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_29(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_28(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_30(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_29(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_31(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_30(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_32(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_31(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_33(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_32(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_34(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_33(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_35(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_34(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_36(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_35(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_37(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_36(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_38(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_37(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_39(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_38(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_40(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_39(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_41(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_40(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_42(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_41(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_43(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_42(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_44(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_43(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_45(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_44(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_46(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_45(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_47(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_46(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_48(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_47(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_49(FN, X, ...) _SD_VARARGS_FOREACH_EVEN_48(FN, __VA_ARGS__)
+#define _SD_VARARGS_FOREACH_EVEN_50(FN, X, ...) FN(X) _SD_VARARGS_FOREACH_EVEN_49(FN, __VA_ARGS__)
+
+#define _SD_VARARGS_FOREACH_EVEN_SEQ(_01, _02, _03, _04, _05, _06, _07, _08, _09, _10, \
+ _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, \
+ _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, \
+ _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, \
+ _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, \
+ NAME, ...) NAME
+
+#define _SD_VARARGS_FOREACH_EVEN(FN, ...) \
+ _SD_VARARGS_FOREACH_EVEN_SEQ(__VA_ARGS__, \
+ _SD_VARARGS_FOREACH_EVEN_50, _SD_VARARGS_FOREACH_EVEN_49, \
+ _SD_VARARGS_FOREACH_EVEN_48, _SD_VARARGS_FOREACH_EVEN_47, \
+ _SD_VARARGS_FOREACH_EVEN_46, _SD_VARARGS_FOREACH_EVEN_45, \
+ _SD_VARARGS_FOREACH_EVEN_44, _SD_VARARGS_FOREACH_EVEN_43, \
+ _SD_VARARGS_FOREACH_EVEN_42, _SD_VARARGS_FOREACH_EVEN_41, \
+ _SD_VARARGS_FOREACH_EVEN_40, _SD_VARARGS_FOREACH_EVEN_39, \
+ _SD_VARARGS_FOREACH_EVEN_38, _SD_VARARGS_FOREACH_EVEN_37, \
+ _SD_VARARGS_FOREACH_EVEN_36, _SD_VARARGS_FOREACH_EVEN_35, \
+ _SD_VARARGS_FOREACH_EVEN_34, _SD_VARARGS_FOREACH_EVEN_33, \
+ _SD_VARARGS_FOREACH_EVEN_32, _SD_VARARGS_FOREACH_EVEN_31, \
+ _SD_VARARGS_FOREACH_EVEN_30, _SD_VARARGS_FOREACH_EVEN_29, \
+ _SD_VARARGS_FOREACH_EVEN_28, _SD_VARARGS_FOREACH_EVEN_27, \
+ _SD_VARARGS_FOREACH_EVEN_26, _SD_VARARGS_FOREACH_EVEN_25, \
+ _SD_VARARGS_FOREACH_EVEN_24, _SD_VARARGS_FOREACH_EVEN_23, \
+ _SD_VARARGS_FOREACH_EVEN_22, _SD_VARARGS_FOREACH_EVEN_21, \
+ _SD_VARARGS_FOREACH_EVEN_20, _SD_VARARGS_FOREACH_EVEN_19, \
+ _SD_VARARGS_FOREACH_EVEN_18, _SD_VARARGS_FOREACH_EVEN_17, \
+ _SD_VARARGS_FOREACH_EVEN_16, _SD_VARARGS_FOREACH_EVEN_15, \
+ _SD_VARARGS_FOREACH_EVEN_14, _SD_VARARGS_FOREACH_EVEN_13, \
+ _SD_VARARGS_FOREACH_EVEN_12, _SD_VARARGS_FOREACH_EVEN_11, \
+ _SD_VARARGS_FOREACH_EVEN_10, _SD_VARARGS_FOREACH_EVEN_09, \
+ _SD_VARARGS_FOREACH_EVEN_08, _SD_VARARGS_FOREACH_EVEN_07, \
+ _SD_VARARGS_FOREACH_EVEN_06, _SD_VARARGS_FOREACH_EVEN_05, \
+ _SD_VARARGS_FOREACH_EVEN_04, _SD_VARARGS_FOREACH_EVEN_03, \
+ _SD_VARARGS_FOREACH_EVEN_02, _SD_VARARGS_FOREACH_EVEN_01) \
+ (FN, __VA_ARGS__)
+
+#define SD_BUS_ARGS(...) __VA_ARGS__
+#define SD_BUS_RESULT(...) __VA_ARGS__
+
+#define SD_BUS_NO_ARGS SD_BUS_ARGS(NULL,)
+#define SD_BUS_NO_RESULT SD_BUS_RESULT(NULL,)
+
+#define SD_BUS_METHOD_WITH_ARGS(_member, _args, _result, _handler, _flags) \
+ SD_BUS_METHOD_WITH_NAMES(_member, \
+ _SD_VARARGS_FOREACH_EVEN(_SD_ECHO, _args), \
+ _SD_VARARGS_FOREACH_EVEN(_SD_CONCAT, _args, ""), \
+ _SD_VARARGS_FOREACH_EVEN(_SD_ECHO, _result), \
+ _SD_VARARGS_FOREACH_EVEN(_SD_CONCAT, _result, ""), \
+ _handler, _flags)
+
+#define SD_BUS_METHOD_WITH_ARGS_OFFSET(_member, _args, _result, _handler, _offset, _flags) \
+ SD_BUS_METHOD_WITH_NAMES_OFFSET(_member, \
+ _SD_VARARGS_FOREACH_EVEN(_SD_ECHO, _args), \
+ _SD_VARARGS_FOREACH_EVEN(_SD_CONCAT, _args, ""), \
+ _SD_VARARGS_FOREACH_EVEN(_SD_ECHO, _result), \
+ _SD_VARARGS_FOREACH_EVEN(_SD_CONCAT, _result, ""), \
+ _handler, _offset, _flags)
+
+#define SD_BUS_SIGNAL_WITH_ARGS(_member, _args, _flags) \
+ SD_BUS_SIGNAL_WITH_NAMES(_member, \
+ _SD_VARARGS_FOREACH_EVEN(_SD_ECHO, _args), \
+ _SD_VARARGS_FOREACH_EVEN(_SD_CONCAT, _args, ""), \
+ _flags)
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-bus.h b/src/systemd/sd-bus.h
new file mode 100644
index 0000000..87fbcf3
--- /dev/null
+++ b/src/systemd/sd-bus.h
@@ -0,0 +1,532 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdbushfoo
+#define foosdbushfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+#define SD_BUS_DEFAULT ((sd_bus *) 1)
+#define SD_BUS_DEFAULT_USER ((sd_bus *) 2)
+#define SD_BUS_DEFAULT_SYSTEM ((sd_bus *) 3)
+
+/* Types */
+
+typedef struct sd_bus sd_bus;
+typedef struct sd_bus_message sd_bus_message;
+typedef struct sd_bus_slot sd_bus_slot;
+typedef struct sd_bus_creds sd_bus_creds;
+typedef struct sd_bus_track sd_bus_track;
+
+typedef struct {
+ const char *name;
+ const char *message;
+ int _need_free;
+} sd_bus_error;
+
+typedef struct {
+ const char *name;
+ int code;
+} sd_bus_error_map;
+
+/* Flags */
+
+enum {
+ SD_BUS_CREDS_PID = 1ULL << 0,
+ SD_BUS_CREDS_TID = 1ULL << 1,
+ SD_BUS_CREDS_PPID = 1ULL << 2,
+ SD_BUS_CREDS_UID = 1ULL << 3,
+ SD_BUS_CREDS_EUID = 1ULL << 4,
+ SD_BUS_CREDS_SUID = 1ULL << 5,
+ SD_BUS_CREDS_FSUID = 1ULL << 6,
+ SD_BUS_CREDS_GID = 1ULL << 7,
+ SD_BUS_CREDS_EGID = 1ULL << 8,
+ SD_BUS_CREDS_SGID = 1ULL << 9,
+ SD_BUS_CREDS_FSGID = 1ULL << 10,
+ SD_BUS_CREDS_SUPPLEMENTARY_GIDS = 1ULL << 11,
+ SD_BUS_CREDS_COMM = 1ULL << 12,
+ SD_BUS_CREDS_TID_COMM = 1ULL << 13,
+ SD_BUS_CREDS_EXE = 1ULL << 14,
+ SD_BUS_CREDS_CMDLINE = 1ULL << 15,
+ SD_BUS_CREDS_CGROUP = 1ULL << 16,
+ SD_BUS_CREDS_UNIT = 1ULL << 17,
+ SD_BUS_CREDS_SLICE = 1ULL << 18,
+ SD_BUS_CREDS_USER_UNIT = 1ULL << 19,
+ SD_BUS_CREDS_USER_SLICE = 1ULL << 20,
+ SD_BUS_CREDS_SESSION = 1ULL << 21,
+ SD_BUS_CREDS_OWNER_UID = 1ULL << 22,
+ SD_BUS_CREDS_EFFECTIVE_CAPS = 1ULL << 23,
+ SD_BUS_CREDS_PERMITTED_CAPS = 1ULL << 24,
+ SD_BUS_CREDS_INHERITABLE_CAPS = 1ULL << 25,
+ SD_BUS_CREDS_BOUNDING_CAPS = 1ULL << 26,
+ SD_BUS_CREDS_SELINUX_CONTEXT = 1ULL << 27,
+ SD_BUS_CREDS_AUDIT_SESSION_ID = 1ULL << 28,
+ SD_BUS_CREDS_AUDIT_LOGIN_UID = 1ULL << 29,
+ SD_BUS_CREDS_TTY = 1ULL << 30,
+ SD_BUS_CREDS_UNIQUE_NAME = 1ULL << 31,
+ SD_BUS_CREDS_WELL_KNOWN_NAMES = 1ULL << 32,
+ SD_BUS_CREDS_DESCRIPTION = 1ULL << 33,
+ SD_BUS_CREDS_AUGMENT = 1ULL << 63, /* special flag, if on sd-bus will augment creds struct, in a potentially race-full way. */
+ _SD_BUS_CREDS_ALL = (1ULL << 34) -1
+};
+
+enum {
+ SD_BUS_NAME_REPLACE_EXISTING = 1ULL << 0,
+ SD_BUS_NAME_ALLOW_REPLACEMENT = 1ULL << 1,
+ SD_BUS_NAME_QUEUE = 1ULL << 2
+};
+
+enum {
+ SD_BUS_MESSAGE_DUMP_WITH_HEADER = 1ULL << 0,
+ SD_BUS_MESSAGE_DUMP_SUBTREE_ONLY = 1ULL << 1,
+};
+
+/* Callbacks */
+
+typedef int (*sd_bus_message_handler_t)(sd_bus_message *m, void *userdata, sd_bus_error *ret_error);
+typedef int (*sd_bus_property_get_t) (sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error);
+typedef int (*sd_bus_property_set_t) (sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *ret_error);
+typedef int (*sd_bus_object_find_t) (sd_bus *bus, const char *path, const char *interface, void *userdata, void **ret_found, sd_bus_error *ret_error);
+typedef int (*sd_bus_node_enumerator_t) (sd_bus *bus, const char *prefix, void *userdata, char ***ret_nodes, sd_bus_error *ret_error);
+typedef int (*sd_bus_track_handler_t) (sd_bus_track *track, void *userdata);
+typedef _sd_destroy_t sd_bus_destroy_t;
+
+#include "sd-bus-protocol.h"
+#include "sd-bus-vtable.h"
+
+/* Naming */
+
+int sd_bus_interface_name_is_valid(const char *p);
+int sd_bus_service_name_is_valid(const char *p);
+int sd_bus_member_name_is_valid(const char *p);
+int sd_bus_object_path_is_valid(const char *p);
+
+/* Connections */
+
+int sd_bus_default(sd_bus **ret);
+int sd_bus_default_user(sd_bus **ret);
+int sd_bus_default_system(sd_bus **ret);
+
+int sd_bus_open(sd_bus **ret);
+int sd_bus_open_with_description(sd_bus **ret, const char *description);
+int sd_bus_open_user(sd_bus **ret);
+int sd_bus_open_user_with_description(sd_bus **ret, const char *description);
+int sd_bus_open_system(sd_bus **ret);
+int sd_bus_open_system_with_description(sd_bus **ret, const char *description);
+int sd_bus_open_system_remote(sd_bus **ret, const char *host);
+int sd_bus_open_system_machine(sd_bus **ret, const char *machine);
+
+int sd_bus_new(sd_bus **ret);
+
+int sd_bus_set_address(sd_bus *bus, const char *address);
+int sd_bus_set_fd(sd_bus *bus, int input_fd, int output_fd);
+int sd_bus_set_exec(sd_bus *bus, const char *path, char *const *argv);
+int sd_bus_get_address(sd_bus *bus, const char **address);
+int sd_bus_set_bus_client(sd_bus *bus, int b);
+int sd_bus_is_bus_client(sd_bus *bus);
+int sd_bus_set_server(sd_bus *bus, int b, sd_id128_t bus_id);
+int sd_bus_is_server(sd_bus *bus);
+int sd_bus_set_anonymous(sd_bus *bus, int b);
+int sd_bus_is_anonymous(sd_bus *bus);
+int sd_bus_set_trusted(sd_bus *bus, int b);
+int sd_bus_is_trusted(sd_bus *bus);
+int sd_bus_set_monitor(sd_bus *bus, int b);
+int sd_bus_is_monitor(sd_bus *bus);
+int sd_bus_set_description(sd_bus *bus, const char *description);
+int sd_bus_get_description(sd_bus *bus, const char **description);
+int sd_bus_negotiate_creds(sd_bus *bus, int b, uint64_t creds_mask);
+int sd_bus_negotiate_timestamp(sd_bus *bus, int b);
+int sd_bus_negotiate_fds(sd_bus *bus, int b);
+int sd_bus_can_send(sd_bus *bus, char type);
+int sd_bus_get_creds_mask(sd_bus *bus, uint64_t *creds_mask);
+int sd_bus_set_allow_interactive_authorization(sd_bus *bus, int b);
+int sd_bus_get_allow_interactive_authorization(sd_bus *bus);
+int sd_bus_set_exit_on_disconnect(sd_bus *bus, int b);
+int sd_bus_get_exit_on_disconnect(sd_bus *bus);
+int sd_bus_set_close_on_exit(sd_bus *bus, int b);
+int sd_bus_get_close_on_exit(sd_bus *bus);
+int sd_bus_set_watch_bind(sd_bus *bus, int b);
+int sd_bus_get_watch_bind(sd_bus *bus);
+int sd_bus_set_connected_signal(sd_bus *bus, int b);
+int sd_bus_get_connected_signal(sd_bus *bus);
+int sd_bus_set_sender(sd_bus *bus, const char *sender);
+int sd_bus_get_sender(sd_bus *bus, const char **ret);
+
+int sd_bus_start(sd_bus *bus);
+
+int sd_bus_try_close(sd_bus *bus) _sd_deprecated_;
+void sd_bus_close(sd_bus *bus);
+
+sd_bus* sd_bus_ref(sd_bus *bus);
+sd_bus* sd_bus_unref(sd_bus *bus);
+sd_bus* sd_bus_close_unref(sd_bus *bus);
+sd_bus* sd_bus_flush_close_unref(sd_bus *bus);
+
+void sd_bus_default_flush_close(void);
+
+int sd_bus_is_open(sd_bus *bus);
+int sd_bus_is_ready(sd_bus *bus);
+
+int sd_bus_get_bus_id(sd_bus *bus, sd_id128_t *id);
+int sd_bus_get_scope(sd_bus *bus, const char **scope);
+int sd_bus_get_tid(sd_bus *bus, pid_t *tid);
+int sd_bus_get_owner_creds(sd_bus *bus, uint64_t creds_mask, sd_bus_creds **ret);
+
+int sd_bus_send(sd_bus *bus, sd_bus_message *m, uint64_t *cookie);
+int sd_bus_send_to(sd_bus *bus, sd_bus_message *m, const char *destination, uint64_t *cookie);
+int sd_bus_call(sd_bus *bus, sd_bus_message *m, uint64_t usec, sd_bus_error *ret_error, sd_bus_message **reply);
+int sd_bus_call_async(sd_bus *bus, sd_bus_slot **slot, sd_bus_message *m, sd_bus_message_handler_t callback, void *userdata, uint64_t usec);
+
+int sd_bus_get_fd(sd_bus *bus);
+int sd_bus_get_events(sd_bus *bus);
+int sd_bus_get_timeout(sd_bus *bus, uint64_t *timeout_usec);
+int sd_bus_process(sd_bus *bus, sd_bus_message **r);
+int sd_bus_process_priority(sd_bus *bus, int64_t max_priority, sd_bus_message **r) _sd_deprecated_;
+int sd_bus_wait(sd_bus *bus, uint64_t timeout_usec);
+int sd_bus_flush(sd_bus *bus);
+int sd_bus_enqueue_for_read(sd_bus *bus, sd_bus_message *m);
+
+sd_bus_slot* sd_bus_get_current_slot(sd_bus *bus);
+sd_bus_message* sd_bus_get_current_message(sd_bus *bus);
+sd_bus_message_handler_t sd_bus_get_current_handler(sd_bus *bus);
+void* sd_bus_get_current_userdata(sd_bus *bus);
+
+int sd_bus_attach_event(sd_bus *bus, sd_event *e, int priority);
+int sd_bus_detach_event(sd_bus *bus);
+sd_event* sd_bus_get_event(sd_bus *bus);
+
+int sd_bus_get_n_queued_read(sd_bus *bus, uint64_t *ret);
+int sd_bus_get_n_queued_write(sd_bus *bus, uint64_t *ret);
+
+int sd_bus_set_method_call_timeout(sd_bus *bus, uint64_t usec);
+int sd_bus_get_method_call_timeout(sd_bus *bus, uint64_t *ret);
+
+int sd_bus_add_filter(sd_bus *bus, sd_bus_slot **slot, sd_bus_message_handler_t callback, void *userdata);
+int sd_bus_add_match(sd_bus *bus, sd_bus_slot **slot, const char *match, sd_bus_message_handler_t callback, void *userdata);
+int sd_bus_add_match_async(sd_bus *bus, sd_bus_slot **slot, const char *match, sd_bus_message_handler_t callback, sd_bus_message_handler_t install_callback, void *userdata);
+int sd_bus_add_object(sd_bus *bus, sd_bus_slot **slot, const char *path, sd_bus_message_handler_t callback, void *userdata);
+int sd_bus_add_fallback(sd_bus *bus, sd_bus_slot **slot, const char *prefix, sd_bus_message_handler_t callback, void *userdata);
+int sd_bus_add_object_vtable(sd_bus *bus, sd_bus_slot **slot, const char *path, const char *interface, const sd_bus_vtable *vtable, void *userdata);
+int sd_bus_add_fallback_vtable(sd_bus *bus, sd_bus_slot **slot, const char *prefix, const char *interface, const sd_bus_vtable *vtable, sd_bus_object_find_t find, void *userdata);
+int sd_bus_add_node_enumerator(sd_bus *bus, sd_bus_slot **slot, const char *path, sd_bus_node_enumerator_t callback, void *userdata);
+int sd_bus_add_object_manager(sd_bus *bus, sd_bus_slot **slot, const char *path);
+
+/* Slot object */
+
+sd_bus_slot* sd_bus_slot_ref(sd_bus_slot *slot);
+sd_bus_slot* sd_bus_slot_unref(sd_bus_slot *slot);
+
+sd_bus* sd_bus_slot_get_bus(sd_bus_slot *slot);
+void* sd_bus_slot_get_userdata(sd_bus_slot *slot);
+void* sd_bus_slot_set_userdata(sd_bus_slot *slot, void *userdata);
+int sd_bus_slot_set_description(sd_bus_slot *slot, const char *description);
+int sd_bus_slot_get_description(sd_bus_slot *slot, const char **description);
+int sd_bus_slot_get_floating(sd_bus_slot *slot);
+int sd_bus_slot_set_floating(sd_bus_slot *slot, int b);
+int sd_bus_slot_set_destroy_callback(sd_bus_slot *s, sd_bus_destroy_t callback);
+int sd_bus_slot_get_destroy_callback(sd_bus_slot *s, sd_bus_destroy_t *callback);
+
+sd_bus_message* sd_bus_slot_get_current_message(sd_bus_slot *slot);
+sd_bus_message_handler_t sd_bus_slot_get_current_handler(sd_bus_slot *slot);
+void* sd_bus_slot_get_current_userdata(sd_bus_slot *slot);
+
+/* Message object */
+
+int sd_bus_message_new(sd_bus *bus, sd_bus_message **m, uint8_t type);
+int sd_bus_message_new_signal(sd_bus *bus, sd_bus_message **m, const char *path, const char *interface, const char *member);
+int sd_bus_message_new_method_call(sd_bus *bus, sd_bus_message **m, const char *destination, const char *path, const char *interface, const char *member);
+int sd_bus_message_new_method_return(sd_bus_message *call, sd_bus_message **m);
+int sd_bus_message_new_method_error(sd_bus_message *call, sd_bus_message **m, const sd_bus_error *e);
+int sd_bus_message_new_method_errorf(sd_bus_message *call, sd_bus_message **m, const char *name, const char *format, ...) _sd_printf_(4, 5);
+int sd_bus_message_new_method_errno(sd_bus_message *call, sd_bus_message **m, int error, const sd_bus_error *e);
+int sd_bus_message_new_method_errnof(sd_bus_message *call, sd_bus_message **m, int error, const char *format, ...) _sd_printf_(4, 5);
+
+sd_bus_message* sd_bus_message_ref(sd_bus_message *m);
+sd_bus_message* sd_bus_message_unref(sd_bus_message *m);
+
+int sd_bus_message_seal(sd_bus_message *m, uint64_t cookie, uint64_t timeout_usec);
+
+int sd_bus_message_get_type(sd_bus_message *m, uint8_t *type);
+int sd_bus_message_get_cookie(sd_bus_message *m, uint64_t *cookie);
+int sd_bus_message_get_reply_cookie(sd_bus_message *m, uint64_t *cookie);
+int sd_bus_message_get_priority(sd_bus_message *m, int64_t *priority) _sd_deprecated_;
+
+int sd_bus_message_get_expect_reply(sd_bus_message *m);
+int sd_bus_message_get_auto_start(sd_bus_message *m);
+int sd_bus_message_get_allow_interactive_authorization(sd_bus_message *m);
+
+const char* sd_bus_message_get_signature(sd_bus_message *m, int complete);
+const char* sd_bus_message_get_path(sd_bus_message *m);
+const char* sd_bus_message_get_interface(sd_bus_message *m);
+const char* sd_bus_message_get_member(sd_bus_message *m);
+const char* sd_bus_message_get_destination(sd_bus_message *m);
+const char* sd_bus_message_get_sender(sd_bus_message *m);
+const sd_bus_error* sd_bus_message_get_error(sd_bus_message *m);
+int sd_bus_message_get_errno(sd_bus_message *m);
+
+int sd_bus_message_get_monotonic_usec(sd_bus_message *m, uint64_t *usec);
+int sd_bus_message_get_realtime_usec(sd_bus_message *m, uint64_t *usec);
+int sd_bus_message_get_seqnum(sd_bus_message *m, uint64_t *seqnum);
+
+sd_bus* sd_bus_message_get_bus(sd_bus_message *m);
+sd_bus_creds* sd_bus_message_get_creds(sd_bus_message *m); /* do not unref the result */
+
+int sd_bus_message_is_signal(sd_bus_message *m, const char *interface, const char *member);
+int sd_bus_message_is_method_call(sd_bus_message *m, const char *interface, const char *member);
+int sd_bus_message_is_method_error(sd_bus_message *m, const char *name);
+int sd_bus_message_is_empty(sd_bus_message *m);
+int sd_bus_message_has_signature(sd_bus_message *m, const char *signature);
+
+int sd_bus_message_set_expect_reply(sd_bus_message *m, int b);
+int sd_bus_message_set_auto_start(sd_bus_message *m, int b);
+int sd_bus_message_set_allow_interactive_authorization(sd_bus_message *m, int b);
+
+int sd_bus_message_set_destination(sd_bus_message *m, const char *destination);
+int sd_bus_message_set_sender(sd_bus_message *m, const char *sender);
+int sd_bus_message_set_priority(sd_bus_message *m, int64_t priority) _sd_deprecated_;
+
+int sd_bus_message_append(sd_bus_message *m, const char *types, ...);
+int sd_bus_message_appendv(sd_bus_message *m, const char *types, va_list ap);
+int sd_bus_message_append_basic(sd_bus_message *m, char type, const void *p);
+int sd_bus_message_append_array(sd_bus_message *m, char type, const void *ptr, size_t size);
+int sd_bus_message_append_array_space(sd_bus_message *m, char type, size_t size, void **ptr);
+int sd_bus_message_append_array_iovec(sd_bus_message *m, char type, const struct iovec *iov, unsigned n);
+int sd_bus_message_append_array_memfd(sd_bus_message *m, char type, int memfd, uint64_t offset, uint64_t size);
+int sd_bus_message_append_string_space(sd_bus_message *m, size_t size, char **s);
+int sd_bus_message_append_string_iovec(sd_bus_message *m, const struct iovec *iov, unsigned n);
+int sd_bus_message_append_string_memfd(sd_bus_message *m, int memfd, uint64_t offset, uint64_t size);
+int sd_bus_message_append_strv(sd_bus_message *m, char **l);
+int sd_bus_message_open_container(sd_bus_message *m, char type, const char *contents);
+int sd_bus_message_close_container(sd_bus_message *m);
+int sd_bus_message_copy(sd_bus_message *m, sd_bus_message *source, int all);
+
+int sd_bus_message_read(sd_bus_message *m, const char *types, ...);
+int sd_bus_message_readv(sd_bus_message *m, const char *types, va_list ap);
+int sd_bus_message_read_basic(sd_bus_message *m, char type, void *p);
+int sd_bus_message_read_array(sd_bus_message *m, char type, const void **ptr, size_t *size);
+int sd_bus_message_read_strv(sd_bus_message *m, char ***l); /* free the result! */
+int sd_bus_message_skip(sd_bus_message *m, const char *types);
+int sd_bus_message_enter_container(sd_bus_message *m, char type, const char *contents);
+int sd_bus_message_exit_container(sd_bus_message *m);
+int sd_bus_message_peek_type(sd_bus_message *m, char *type, const char **contents);
+int sd_bus_message_verify_type(sd_bus_message *m, char type, const char *contents);
+int sd_bus_message_at_end(sd_bus_message *m, int complete);
+int sd_bus_message_rewind(sd_bus_message *m, int complete);
+int sd_bus_message_sensitive(sd_bus_message *m);
+
+int sd_bus_message_dump(sd_bus_message *m, FILE *f, uint64_t flags);
+
+/* Bus management */
+
+int sd_bus_get_unique_name(sd_bus *bus, const char **unique);
+int sd_bus_request_name(sd_bus *bus, const char *name, uint64_t flags);
+int sd_bus_request_name_async(sd_bus *bus, sd_bus_slot **ret_slot, const char *name, uint64_t flags, sd_bus_message_handler_t callback, void *userdata);
+int sd_bus_release_name(sd_bus *bus, const char *name);
+int sd_bus_release_name_async(sd_bus *bus, sd_bus_slot **ret_slot, const char *name, sd_bus_message_handler_t callback, void *userdata);
+int sd_bus_list_names(sd_bus *bus, char ***acquired, char ***activatable); /* free the results */
+int sd_bus_get_name_creds(sd_bus *bus, const char *name, uint64_t mask, sd_bus_creds **creds); /* unref the result! */
+int sd_bus_get_name_machine_id(sd_bus *bus, const char *name, sd_id128_t *machine);
+
+/* Convenience calls */
+
+int sd_bus_call_methodv(sd_bus *bus, const char *destination, const char *path, const char *interface, const char *member, sd_bus_error *ret_error, sd_bus_message **reply, const char *types, va_list ap);
+int sd_bus_call_method(sd_bus *bus, const char *destination, const char *path, const char *interface, const char *member, sd_bus_error *ret_error, sd_bus_message **reply, const char *types, ...);
+int sd_bus_call_method_asyncv(sd_bus *bus, sd_bus_slot **slot, const char *destination, const char *path, const char *interface, const char *member, sd_bus_message_handler_t callback, void *userdata, const char *types, va_list ap);
+int sd_bus_call_method_async(sd_bus *bus, sd_bus_slot **slot, const char *destination, const char *path, const char *interface, const char *member, sd_bus_message_handler_t callback, void *userdata, const char *types, ...);
+int sd_bus_get_property(sd_bus *bus, const char *destination, const char *path, const char *interface, const char *member, sd_bus_error *ret_error, sd_bus_message **reply, const char *type);
+int sd_bus_get_property_trivial(sd_bus *bus, const char *destination, const char *path, const char *interface, const char *member, sd_bus_error *ret_error, char type, void *ret_ptr);
+int sd_bus_get_property_string(sd_bus *bus, const char *destination, const char *path, const char *interface, const char *member, sd_bus_error *ret_error, char **ret); /* free the result! */
+int sd_bus_get_property_strv(sd_bus *bus, const char *destination, const char *path, const char *interface, const char *member, sd_bus_error *ret_error, char ***ret); /* free the result! */
+int sd_bus_set_propertyv(sd_bus *bus, const char *destination, const char *path, const char *interface, const char *member, sd_bus_error *ret_error, const char *type, va_list ap);
+int sd_bus_set_property(sd_bus *bus, const char *destination, const char *path, const char *interface, const char *member, sd_bus_error *ret_error, const char *type, ...);
+
+int sd_bus_reply_method_returnv(sd_bus_message *call, const char *types, va_list ap);
+int sd_bus_reply_method_return(sd_bus_message *call, const char *types, ...);
+int sd_bus_reply_method_error(sd_bus_message *call, const sd_bus_error *e);
+int sd_bus_reply_method_errorfv(sd_bus_message *call, const char *name, const char *format, va_list ap) _sd_printf_(3, 0);
+int sd_bus_reply_method_errorf(sd_bus_message *call, const char *name, const char *format, ...) _sd_printf_(3, 4);
+int sd_bus_reply_method_errno(sd_bus_message *call, int error, const sd_bus_error *e);
+int sd_bus_reply_method_errnofv(sd_bus_message *call, int error, const char *format, va_list ap) _sd_printf_(3, 0);
+int sd_bus_reply_method_errnof(sd_bus_message *call, int error, const char *format, ...) _sd_printf_(3, 4);
+
+int sd_bus_emit_signalv(sd_bus *bus, const char *path, const char *interface, const char *member, const char *types, va_list ap);
+int sd_bus_emit_signal(sd_bus *bus, const char *path, const char *interface, const char *member, const char *types, ...);
+
+int sd_bus_emit_properties_changed_strv(sd_bus *bus, const char *path, const char *interface, char **names);
+int sd_bus_emit_properties_changed(sd_bus *bus, const char *path, const char *interface, const char *name, ...) _sd_sentinel_;
+
+int sd_bus_emit_object_added(sd_bus *bus, const char *path);
+int sd_bus_emit_object_removed(sd_bus *bus, const char *path);
+int sd_bus_emit_interfaces_added_strv(sd_bus *bus, const char *path, char **interfaces);
+int sd_bus_emit_interfaces_added(sd_bus *bus, const char *path, const char *interface, ...) _sd_sentinel_;
+int sd_bus_emit_interfaces_removed_strv(sd_bus *bus, const char *path, char **interfaces);
+int sd_bus_emit_interfaces_removed(sd_bus *bus, const char *path, const char *interface, ...) _sd_sentinel_;
+
+int sd_bus_query_sender_creds(sd_bus_message *m, uint64_t mask, sd_bus_creds **creds);
+int sd_bus_query_sender_privilege(sd_bus_message *m, int capability);
+
+int sd_bus_match_signal(sd_bus *bus, sd_bus_slot **ret, const char *sender, const char *path, const char *interface, const char *member, sd_bus_message_handler_t callback, void *userdata);
+int sd_bus_match_signal_async(sd_bus *bus, sd_bus_slot **ret, const char *sender, const char *path, const char *interface, const char *member, sd_bus_message_handler_t match_callback, sd_bus_message_handler_t add_callback, void *userdata);
+
+/* Credential handling */
+
+int sd_bus_creds_new_from_pid(sd_bus_creds **ret, pid_t pid, uint64_t creds_mask);
+sd_bus_creds* sd_bus_creds_ref(sd_bus_creds *c);
+sd_bus_creds* sd_bus_creds_unref(sd_bus_creds *c);
+uint64_t sd_bus_creds_get_mask(const sd_bus_creds *c);
+uint64_t sd_bus_creds_get_augmented_mask(const sd_bus_creds *c);
+
+int sd_bus_creds_get_pid(sd_bus_creds *c, pid_t *pid);
+int sd_bus_creds_get_ppid(sd_bus_creds *c, pid_t *ppid);
+int sd_bus_creds_get_tid(sd_bus_creds *c, pid_t *tid);
+int sd_bus_creds_get_uid(sd_bus_creds *c, uid_t *uid);
+int sd_bus_creds_get_euid(sd_bus_creds *c, uid_t *euid);
+int sd_bus_creds_get_suid(sd_bus_creds *c, uid_t *suid);
+int sd_bus_creds_get_fsuid(sd_bus_creds *c, uid_t *fsuid);
+int sd_bus_creds_get_gid(sd_bus_creds *c, gid_t *gid);
+int sd_bus_creds_get_egid(sd_bus_creds *c, gid_t *egid);
+int sd_bus_creds_get_sgid(sd_bus_creds *c, gid_t *sgid);
+int sd_bus_creds_get_fsgid(sd_bus_creds *c, gid_t *fsgid);
+int sd_bus_creds_get_supplementary_gids(sd_bus_creds *c, const gid_t **gids);
+int sd_bus_creds_get_comm(sd_bus_creds *c, const char **comm);
+int sd_bus_creds_get_tid_comm(sd_bus_creds *c, const char **comm);
+int sd_bus_creds_get_exe(sd_bus_creds *c, const char **exe);
+int sd_bus_creds_get_cmdline(sd_bus_creds *c, char ***cmdline);
+int sd_bus_creds_get_cgroup(sd_bus_creds *c, const char **cgroup);
+int sd_bus_creds_get_unit(sd_bus_creds *c, const char **unit);
+int sd_bus_creds_get_slice(sd_bus_creds *c, const char **slice);
+int sd_bus_creds_get_user_unit(sd_bus_creds *c, const char **unit);
+int sd_bus_creds_get_user_slice(sd_bus_creds *c, const char **slice);
+int sd_bus_creds_get_session(sd_bus_creds *c, const char **session);
+int sd_bus_creds_get_owner_uid(sd_bus_creds *c, uid_t *uid);
+int sd_bus_creds_has_effective_cap(sd_bus_creds *c, int capability);
+int sd_bus_creds_has_permitted_cap(sd_bus_creds *c, int capability);
+int sd_bus_creds_has_inheritable_cap(sd_bus_creds *c, int capability);
+int sd_bus_creds_has_bounding_cap(sd_bus_creds *c, int capability);
+int sd_bus_creds_get_selinux_context(sd_bus_creds *c, const char **context);
+int sd_bus_creds_get_audit_session_id(sd_bus_creds *c, uint32_t *sessionid);
+int sd_bus_creds_get_audit_login_uid(sd_bus_creds *c, uid_t *loginuid);
+int sd_bus_creds_get_tty(sd_bus_creds *c, const char **tty);
+int sd_bus_creds_get_unique_name(sd_bus_creds *c, const char **name);
+int sd_bus_creds_get_well_known_names(sd_bus_creds *c, char ***names);
+int sd_bus_creds_get_description(sd_bus_creds *c, const char **name);
+
+/* Error structures */
+
+#define SD_BUS_ERROR_MAKE_CONST(name, message) ((const sd_bus_error) {(name), (message), 0})
+#define SD_BUS_ERROR_NULL SD_BUS_ERROR_MAKE_CONST(NULL, NULL)
+
+void sd_bus_error_free(sd_bus_error *e);
+int sd_bus_error_set(sd_bus_error *e, const char *name, const char *message);
+int sd_bus_error_setf(sd_bus_error *e, const char *name, const char *format, ...) _sd_printf_(3, 4);
+int sd_bus_error_set_const(sd_bus_error *e, const char *name, const char *message);
+int sd_bus_error_set_errno(sd_bus_error *e, int error);
+int sd_bus_error_set_errnof(sd_bus_error *e, int error, const char *format, ...) _sd_printf_(3, 4);
+int sd_bus_error_set_errnofv(sd_bus_error *e, int error, const char *format, va_list ap) _sd_printf_(3,0);
+int sd_bus_error_get_errno(const sd_bus_error *e);
+int sd_bus_error_copy(sd_bus_error *dest, const sd_bus_error *e);
+int sd_bus_error_move(sd_bus_error *dest, sd_bus_error *e);
+int sd_bus_error_is_set(const sd_bus_error *e);
+int sd_bus_error_has_name(const sd_bus_error *e, const char *name);
+int sd_bus_error_has_names_sentinel(const sd_bus_error *e, ...) _sd_sentinel_;
+#define sd_bus_error_has_names(e, ...) sd_bus_error_has_names_sentinel(e, __VA_ARGS__, NULL)
+
+#define SD_BUS_ERROR_MAP(_name, _code) \
+ { \
+ .name = _name, \
+ .code = _code, \
+ }
+#define SD_BUS_ERROR_MAP_END \
+ { \
+ .name = NULL, \
+ .code = - 'x', \
+ }
+
+int sd_bus_error_add_map(const sd_bus_error_map *map);
+
+/* Auxiliary macros */
+
+#define SD_BUS_MESSAGE_APPEND_ID128(x) 16, \
+ (x).bytes[0], (x).bytes[1], (x).bytes[2], (x).bytes[3], \
+ (x).bytes[4], (x).bytes[5], (x).bytes[6], (x).bytes[7], \
+ (x).bytes[8], (x).bytes[9], (x).bytes[10], (x).bytes[11], \
+ (x).bytes[12], (x).bytes[13], (x).bytes[14], (x).bytes[15]
+
+#define SD_BUS_MESSAGE_READ_ID128(x) 16, \
+ &(x).bytes[0], &(x).bytes[1], &(x).bytes[2], &(x).bytes[3], \
+ &(x).bytes[4], &(x).bytes[5], &(x).bytes[6], &(x).bytes[7], \
+ &(x).bytes[8], &(x).bytes[9], &(x).bytes[10], &(x).bytes[11], \
+ &(x).bytes[12], &(x).bytes[13], &(x).bytes[14], &(x).bytes[15]
+
+/* Label escaping */
+
+int sd_bus_path_encode(const char *prefix, const char *external_id, char **ret_path);
+int sd_bus_path_encode_many(char **out, const char *path_template, ...);
+int sd_bus_path_decode(const char *path, const char *prefix, char **ret_external_id);
+int sd_bus_path_decode_many(const char *path, const char *path_template, ...);
+
+/* Tracking peers */
+
+int sd_bus_track_new(sd_bus *bus, sd_bus_track **track, sd_bus_track_handler_t handler, void *userdata);
+sd_bus_track* sd_bus_track_ref(sd_bus_track *track);
+sd_bus_track* sd_bus_track_unref(sd_bus_track *track);
+
+sd_bus* sd_bus_track_get_bus(sd_bus_track *track);
+void* sd_bus_track_get_userdata(sd_bus_track *track);
+void* sd_bus_track_set_userdata(sd_bus_track *track, void *userdata);
+
+int sd_bus_track_add_sender(sd_bus_track *track, sd_bus_message *m);
+int sd_bus_track_remove_sender(sd_bus_track *track, sd_bus_message *m);
+int sd_bus_track_add_name(sd_bus_track *track, const char *name);
+int sd_bus_track_remove_name(sd_bus_track *track, const char *name);
+
+int sd_bus_track_set_recursive(sd_bus_track *track, int b);
+int sd_bus_track_get_recursive(sd_bus_track *track);
+
+unsigned sd_bus_track_count(sd_bus_track *track);
+int sd_bus_track_count_sender(sd_bus_track *track, sd_bus_message *m);
+int sd_bus_track_count_name(sd_bus_track *track, const char *name);
+
+const char* sd_bus_track_contains(sd_bus_track *track, const char *name);
+const char* sd_bus_track_first(sd_bus_track *track);
+const char* sd_bus_track_next(sd_bus_track *track);
+
+int sd_bus_track_set_destroy_callback(sd_bus_track *s, sd_bus_destroy_t callback);
+int sd_bus_track_get_destroy_callback(sd_bus_track *s, sd_bus_destroy_t *ret);
+
+/* Define helpers so that __attribute__((cleanup(sd_bus_unrefp))) and similar may be used. */
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_bus, sd_bus_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_bus, sd_bus_close_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_bus, sd_bus_flush_close_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_bus_slot, sd_bus_slot_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_bus_message, sd_bus_message_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_bus_creds, sd_bus_creds_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_bus_track, sd_bus_track_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-daemon.h b/src/systemd/sd-daemon.h
new file mode 100644
index 0000000..f42a5d8
--- /dev/null
+++ b/src/systemd/sd-daemon.h
@@ -0,0 +1,333 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosddaemonhfoo
+#define foosddaemonhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+/*
+ The following functionality is provided:
+
+ - Support for logging with log levels on stderr
+ - File descriptor passing for socket-based activation
+ - Daemon startup and status notification
+ - Detection of systemd boots
+
+ See sd-daemon(3) for more information.
+*/
+
+/*
+ Log levels for usage on stderr:
+
+ fprintf(stderr, SD_NOTICE "Hello World!\n");
+
+ This is similar to printk() usage in the kernel.
+*/
+#define SD_EMERG "<0>" /* system is unusable */
+#define SD_ALERT "<1>" /* action must be taken immediately */
+#define SD_CRIT "<2>" /* critical conditions */
+#define SD_ERR "<3>" /* error conditions */
+#define SD_WARNING "<4>" /* warning conditions */
+#define SD_NOTICE "<5>" /* normal but significant condition */
+#define SD_INFO "<6>" /* informational */
+#define SD_DEBUG "<7>" /* debug-level messages */
+
+/* The first passed file descriptor is fd 3 */
+#define SD_LISTEN_FDS_START 3
+
+/*
+ Returns how many file descriptors have been passed, or a negative
+ errno code on failure. Optionally, removes the $LISTEN_FDS and
+ $LISTEN_PID file descriptors from the environment (recommended, but
+ problematic in threaded environments). If r is the return value of
+ this function you'll find the file descriptors passed as fds
+ SD_LISTEN_FDS_START to SD_LISTEN_FDS_START+r-1. Returns a negative
+ errno style error code on failure. This function call ensures that
+ the FD_CLOEXEC flag is set for the passed file descriptors, to make
+ sure they are not passed on to child processes. If FD_CLOEXEC shall
+ not be set, the caller needs to unset it after this call for all file
+ descriptors that are used.
+
+ See sd_listen_fds(3) for more information.
+*/
+int sd_listen_fds(int unset_environment);
+
+int sd_listen_fds_with_names(int unset_environment, char ***names);
+
+/*
+ Helper call for identifying a passed file descriptor. Returns 1 if
+ the file descriptor is a FIFO in the file system stored under the
+ specified path, 0 otherwise. If path is NULL a path name check will
+ not be done and the call only verifies if the file descriptor
+ refers to a FIFO. Returns a negative errno style error code on
+ failure.
+
+ See sd_is_fifo(3) for more information.
+*/
+int sd_is_fifo(int fd, const char *path);
+
+/*
+ Helper call for identifying a passed file descriptor. Returns 1 if
+ the file descriptor is a special character device on the file
+ system stored under the specified path, 0 otherwise.
+ If path is NULL a path name check will not be done and the call
+ only verifies if the file descriptor refers to a special character.
+ Returns a negative errno style error code on failure.
+
+ See sd_is_special(3) for more information.
+*/
+int sd_is_special(int fd, const char *path);
+
+/*
+ Helper call for identifying a passed file descriptor. Returns 1 if
+ the file descriptor is a socket of the specified family (AF_INET,
+ ...) and type (SOCK_DGRAM, SOCK_STREAM, ...), 0 otherwise. If
+ family is 0 a socket family check will not be done. If type is 0 a
+ socket type check will not be done and the call only verifies if
+ the file descriptor refers to a socket. If listening is > 0 it is
+ verified that the socket is in listening mode. (i.e. listen() has
+ been called) If listening is == 0 it is verified that the socket is
+ not in listening mode. If listening is < 0 no listening mode check
+ is done. Returns a negative errno style error code on failure.
+
+ See sd_is_socket(3) for more information.
+*/
+int sd_is_socket(int fd, int family, int type, int listening);
+
+/*
+ Helper call for identifying a passed file descriptor. Returns 1 if
+ the file descriptor is an Internet socket, of the specified family
+ (either AF_INET or AF_INET6) and the specified type (SOCK_DGRAM,
+ SOCK_STREAM, ...), 0 otherwise. If version is 0 a protocol version
+ check is not done. If type is 0 a socket type check will not be
+ done. If port is 0 a socket port check will not be done. The
+ listening flag is used the same way as in sd_is_socket(). Returns a
+ negative errno style error code on failure.
+
+ See sd_is_socket_inet(3) for more information.
+*/
+int sd_is_socket_inet(int fd, int family, int type, int listening, uint16_t port);
+
+/*
+ Helper call for identifying a passed file descriptor. Returns 1 if the
+ file descriptor is an Internet socket of the specified type
+ (SOCK_DGRAM, SOCK_STREAM, ...), and if the address of the socket is
+ the same as the address specified by addr. The listening flag is used
+ the same way as in sd_is_socket(). Returns a negative errno style
+ error code on failure.
+
+ See sd_is_socket_sockaddr(3) for more information.
+*/
+int sd_is_socket_sockaddr(int fd, int type, const struct sockaddr* addr, unsigned addr_len, int listening);
+
+/*
+ Helper call for identifying a passed file descriptor. Returns 1 if
+ the file descriptor is an AF_UNIX socket of the specified type
+ (SOCK_DGRAM, SOCK_STREAM, ...) and path, 0 otherwise. If type is 0
+ a socket type check will not be done. If path is NULL a socket path
+ check will not be done. For normal AF_UNIX sockets set length to
+ 0. For abstract namespace sockets set length to the length of the
+ socket name (including the initial 0 byte), and pass the full
+ socket path in path (including the initial 0 byte). The listening
+ flag is used the same way as in sd_is_socket(). Returns a negative
+ errno style error code on failure.
+
+ See sd_is_socket_unix(3) for more information.
+*/
+int sd_is_socket_unix(int fd, int type, int listening, const char *path, size_t length);
+
+/*
+ Helper call for identifying a passed file descriptor. Returns 1 if
+ the file descriptor is a POSIX Message Queue of the specified name,
+ 0 otherwise. If path is NULL a message queue name check is not
+ done. Returns a negative errno style error code on failure.
+
+ See sd_is_mq(3) for more information.
+*/
+int sd_is_mq(int fd, const char *path);
+
+/*
+ Informs systemd about changed daemon state. This takes a number of
+ newline separated environment-style variable assignments in a
+ string. The following variables are known:
+
+ MAINPID=... The main PID of a daemon, in case systemd did not
+ fork off the process itself. Example: "MAINPID=4711"
+
+ READY=1 Tells systemd that daemon startup or daemon reload
+ is finished (only relevant for services of Type=notify).
+ The passed argument is a boolean "1" or "0". Since there
+ is little value in signaling non-readiness the only
+ value daemons should send is "READY=1".
+
+ RELOADING=1 Tell systemd that the daemon began reloading its
+ configuration. When the configuration has been
+ reloaded completely, READY=1 should be sent to inform
+ systemd about this.
+
+ STOPPING=1 Tells systemd that the daemon is about to go down.
+
+ STATUS=... Passes a single-line status string back to systemd
+ that describes the daemon state. This is free-form
+ and can be used for various purposes: general state
+ feedback, fsck-like programs could pass completion
+ percentages and failing programs could pass a human
+ readable error message. Example: "STATUS=Completed
+ 66% of file system check..."
+
+ ERRNO=... If a daemon fails, the errno-style error code,
+ formatted as string. Example: "ERRNO=2" for ENOENT.
+
+ BUSERROR=... If a daemon fails, the D-Bus error-style error
+ code. Example: "BUSERROR=org.freedesktop.DBus.Error.TimedOut"
+
+ WATCHDOG=1 Tells systemd to update the watchdog timestamp.
+ Services using this feature should do this in
+ regular intervals. A watchdog framework can use the
+ timestamps to detect failed services. Also see
+ sd_watchdog_enabled() below.
+
+ WATCHDOG_USEC=...
+ Reset watchdog_usec value during runtime.
+ To reset watchdog_usec value, start the service again.
+ Example: "WATCHDOG_USEC=20000000"
+
+ FDSTORE=1 Store the file descriptors passed along with the
+ message in the per-service file descriptor store,
+ and pass them to the main process again on next
+ invocation. This variable is only supported with
+ sd_pid_notify_with_fds().
+
+ FDSTOREREMOVE=1
+ Remove one or more file descriptors from the file
+ descriptor store, identified by the name specified
+ in FDNAME=, see below.
+
+ FDNAME= A name to assign to new file descriptors stored in the
+ file descriptor store, or the name of the file descriptors
+ to remove in case of FDSTOREREMOVE=1.
+
+ Daemons can choose to send additional variables. However, it is
+ recommended to prefix variable names not listed above with X_.
+
+ Returns a negative errno-style error code on failure. Returns > 0
+ if systemd could be notified, 0 if it couldn't possibly because
+ systemd is not running.
+
+ Example: When a daemon finished starting up, it could issue this
+ call to notify systemd about it:
+
+ sd_notify(0, "READY=1");
+
+ See sd_notifyf() for more complete examples.
+
+ See sd_notify(3) for more information.
+*/
+int sd_notify(int unset_environment, const char *state);
+
+/*
+ Similar to sd_notify() but takes a format string.
+
+ Example 1: A daemon could send the following after initialization:
+
+ sd_notifyf(0, "READY=1\n"
+ "STATUS=Processing requests...\n"
+ "MAINPID=%lu",
+ (unsigned long) getpid());
+
+ Example 2: A daemon could send the following shortly before
+ exiting, on failure:
+
+ sd_notifyf(0, "STATUS=Failed to start up: %s\n"
+ "ERRNO=%i",
+ strerror(errno),
+ errno);
+
+ See sd_notifyf(3) for more information.
+*/
+int sd_notifyf(int unset_environment, const char *format, ...) _sd_printf_(2,3);
+
+/*
+ Similar to sd_notify(), but send the message on behalf of another
+ process, if the appropriate permissions are available.
+*/
+int sd_pid_notify(pid_t pid, int unset_environment, const char *state);
+
+/*
+ Similar to sd_notifyf(), but send the message on behalf of another
+ process, if the appropriate permissions are available.
+*/
+int sd_pid_notifyf(pid_t pid, int unset_environment, const char *format, ...) _sd_printf_(3,4);
+
+/*
+ Similar to sd_pid_notify(), but also passes the specified fd array
+ to the service manager for storage. This is particularly useful for
+ FDSTORE=1 messages.
+*/
+int sd_pid_notify_with_fds(pid_t pid, int unset_environment, const char *state, const int *fds, unsigned n_fds);
+
+/*
+ Returns > 0 if synchronization with systemd succeeded. Returns < 0
+ on error. Returns 0 if $NOTIFY_SOCKET was not set. Note that the
+ timeout parameter of this function call takes the timeout in µs, and
+ will be passed to ppoll(2), hence the behaviour will be similar to
+ ppoll(2). This function can be called after sending a status message
+ to systemd, if one needs to synchronize against reception of the
+ status messages sent before this call is made. Therefore, this
+ cannot be used to know if the status message was processed
+ successfully, but to only synchronize against its consumption.
+*/
+int sd_notify_barrier(int unset_environment, uint64_t timeout);
+
+/*
+ Returns > 0 if the system was booted with systemd. Returns < 0 on
+ error. Returns 0 if the system was not booted with systemd. Note
+ that all of the functions above handle non-systemd boots just
+ fine. You should NOT protect them with a call to this function. Also
+ note that this function checks whether the system, not the user
+ session is controlled by systemd. However the functions above work
+ for both user and system services.
+
+ See sd_booted(3) for more information.
+*/
+int sd_booted(void);
+
+/*
+ Returns > 0 if the service manager expects watchdog keep-alive
+ events to be sent regularly via sd_notify(0, "WATCHDOG=1"). Returns
+ 0 if it does not expect this. If the usec argument is non-NULL
+ returns the watchdog timeout in µs after which the service manager
+ will act on a process that has not sent a watchdog keep alive
+ message. This function is useful to implement services that
+ recognize automatically if they are being run under supervision of
+ systemd with WatchdogSec= set. It is recommended for clients to
+ generate keep-alive pings via sd_notify(0, "WATCHDOG=1") every half
+ of the returned time.
+
+ See sd_watchdog_enabled(3) for more information.
+*/
+int sd_watchdog_enabled(int unset_environment, uint64_t *usec);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-device.h b/src/systemd/sd-device.h
new file mode 100644
index 0000000..78fe584
--- /dev/null
+++ b/src/systemd/sd-device.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosddevicehfoo
+#define foosddevicehfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+typedef struct sd_device sd_device;
+typedef struct sd_device_enumerator sd_device_enumerator;
+typedef struct sd_device_monitor sd_device_monitor;
+
+/* callback */
+
+typedef int (*sd_device_monitor_handler_t)(sd_device_monitor *m, sd_device *device, void *userdata);
+
+/* device */
+
+sd_device *sd_device_ref(sd_device *device);
+sd_device *sd_device_unref(sd_device *device);
+
+int sd_device_new_from_syspath(sd_device **ret, const char *syspath);
+int sd_device_new_from_devnum(sd_device **ret, char type, dev_t devnum);
+int sd_device_new_from_subsystem_sysname(sd_device **ret, const char *subsystem, const char *sysname);
+int sd_device_new_from_device_id(sd_device **ret, const char *id);
+
+int sd_device_get_parent(sd_device *child, sd_device **ret);
+int sd_device_get_parent_with_subsystem_devtype(sd_device *child, const char *subsystem, const char *devtype, sd_device **ret);
+
+int sd_device_get_syspath(sd_device *device, const char **ret);
+int sd_device_get_subsystem(sd_device *device, const char **ret);
+int sd_device_get_devtype(sd_device *device, const char **ret);
+int sd_device_get_devnum(sd_device *device, dev_t *devnum);
+int sd_device_get_ifindex(sd_device *device, int *ifindex);
+int sd_device_get_driver(sd_device *device, const char **ret);
+int sd_device_get_devpath(sd_device *device, const char **ret);
+int sd_device_get_devname(sd_device *device, const char **ret);
+int sd_device_get_sysname(sd_device *device, const char **ret);
+int sd_device_get_sysnum(sd_device *device, const char **ret);
+
+int sd_device_get_is_initialized(sd_device *device);
+int sd_device_get_usec_since_initialized(sd_device *device, uint64_t *usec);
+
+const char *sd_device_get_tag_first(sd_device *device);
+const char *sd_device_get_tag_next(sd_device *device);
+const char *sd_device_get_current_tag_first(sd_device *device);
+const char *sd_device_get_current_tag_next(sd_device *device);
+const char *sd_device_get_devlink_first(sd_device *device);
+const char *sd_device_get_devlink_next(sd_device *device);
+const char *sd_device_get_property_first(sd_device *device, const char **value);
+const char *sd_device_get_property_next(sd_device *device, const char **value);
+const char *sd_device_get_sysattr_first(sd_device *device);
+const char *sd_device_get_sysattr_next(sd_device *device);
+
+int sd_device_has_tag(sd_device *device, const char *tag);
+int sd_device_has_current_tag(sd_device *device, const char *tag);
+int sd_device_get_property_value(sd_device *device, const char *key, const char **value);
+int sd_device_get_sysattr_value(sd_device *device, const char *sysattr, const char **_value);
+
+int sd_device_set_sysattr_value(sd_device *device, const char *sysattr, const char *value);
+int sd_device_set_sysattr_valuef(sd_device *device, const char *sysattr, const char *format, ...) _sd_printf_(3, 4);
+
+/* device enumerator */
+
+int sd_device_enumerator_new(sd_device_enumerator **ret);
+sd_device_enumerator *sd_device_enumerator_ref(sd_device_enumerator *enumerator);
+sd_device_enumerator *sd_device_enumerator_unref(sd_device_enumerator *enumerator);
+
+sd_device *sd_device_enumerator_get_device_first(sd_device_enumerator *enumerator);
+sd_device *sd_device_enumerator_get_device_next(sd_device_enumerator *enumerator);
+sd_device *sd_device_enumerator_get_subsystem_first(sd_device_enumerator *enumerator);
+sd_device *sd_device_enumerator_get_subsystem_next(sd_device_enumerator *enumerator);
+
+int sd_device_enumerator_add_match_subsystem(sd_device_enumerator *enumerator, const char *subsystem, int match);
+int sd_device_enumerator_add_match_sysattr(sd_device_enumerator *enumerator, const char *sysattr, const char *value, int match);
+int sd_device_enumerator_add_match_property(sd_device_enumerator *enumerator, const char *property, const char *value);
+int sd_device_enumerator_add_match_sysname(sd_device_enumerator *enumerator, const char *sysname);
+int sd_device_enumerator_add_match_tag(sd_device_enumerator *enumerator, const char *tag);
+int sd_device_enumerator_add_match_parent(sd_device_enumerator *enumerator, sd_device *parent);
+int sd_device_enumerator_allow_uninitialized(sd_device_enumerator *enumerator);
+
+/* device monitor */
+
+int sd_device_monitor_new(sd_device_monitor **ret);
+sd_device_monitor *sd_device_monitor_ref(sd_device_monitor *m);
+sd_device_monitor *sd_device_monitor_unref(sd_device_monitor *m);
+
+int sd_device_monitor_set_receive_buffer_size(sd_device_monitor *m, size_t size);
+int sd_device_monitor_attach_event(sd_device_monitor *m, sd_event *event);
+int sd_device_monitor_detach_event(sd_device_monitor *m);
+sd_event *sd_device_monitor_get_event(sd_device_monitor *m);
+sd_event_source *sd_device_monitor_get_event_source(sd_device_monitor *m);
+int sd_device_monitor_start(sd_device_monitor *m, sd_device_monitor_handler_t callback, void *userdata);
+int sd_device_monitor_stop(sd_device_monitor *m);
+
+int sd_device_monitor_filter_add_match_subsystem_devtype(sd_device_monitor *m, const char *subsystem, const char *devtype);
+int sd_device_monitor_filter_add_match_tag(sd_device_monitor *m, const char *tag);
+int sd_device_monitor_filter_update(sd_device_monitor *m);
+int sd_device_monitor_filter_remove(sd_device_monitor *m);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_device, sd_device_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_device_enumerator, sd_device_enumerator_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_device_monitor, sd_device_monitor_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-dhcp-client.h b/src/systemd/sd-dhcp-client.h
new file mode 100644
index 0000000..d4ee16e
--- /dev/null
+++ b/src/systemd/sd-dhcp-client.h
@@ -0,0 +1,222 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosddhcpclienthfoo
+#define foosddhcpclienthfoo
+
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <net/ethernet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+#include <stdbool.h>
+
+#include "sd-dhcp-lease.h"
+#include "sd-dhcp-option.h"
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+enum {
+ SD_DHCP_CLIENT_EVENT_STOP = 0,
+ SD_DHCP_CLIENT_EVENT_IP_ACQUIRE = 1,
+ SD_DHCP_CLIENT_EVENT_IP_CHANGE = 2,
+ SD_DHCP_CLIENT_EVENT_EXPIRED = 3,
+ SD_DHCP_CLIENT_EVENT_RENEW = 4,
+ SD_DHCP_CLIENT_EVENT_SELECTING = 5,
+};
+
+enum {
+ SD_DHCP_OPTION_PAD = 0,
+ SD_DHCP_OPTION_SUBNET_MASK = 1,
+ SD_DHCP_OPTION_TIME_OFFSET = 2,
+ SD_DHCP_OPTION_ROUTER = 3,
+ SD_DHCP_OPTION_DOMAIN_NAME_SERVER = 6,
+ SD_DHCP_OPTION_LPR_SERVER = 9,
+ SD_DHCP_OPTION_HOST_NAME = 12,
+ SD_DHCP_OPTION_BOOT_FILE_SIZE = 13,
+ SD_DHCP_OPTION_DOMAIN_NAME = 15,
+ SD_DHCP_OPTION_ROOT_PATH = 17,
+ SD_DHCP_OPTION_ENABLE_IP_FORWARDING = 19,
+ SD_DHCP_OPTION_ENABLE_IP_FORWARDING_NL = 20,
+ SD_DHCP_OPTION_POLICY_FILTER = 21,
+ SD_DHCP_OPTION_INTERFACE_MDR = 22,
+ SD_DHCP_OPTION_INTERFACE_TTL = 23,
+ SD_DHCP_OPTION_INTERFACE_MTU_AGING_TIMEOUT = 24,
+ SD_DHCP_OPTION_INTERFACE_MTU = 26,
+ SD_DHCP_OPTION_BROADCAST = 28,
+ /* Windows 10 option to send when Anonymize=true */
+ SD_DHCP_OPTION_ROUTER_DISCOVER = 31,
+ SD_DHCP_OPTION_STATIC_ROUTE = 33,
+ SD_DHCP_OPTION_NTP_SERVER = 42,
+ SD_DHCP_OPTION_VENDOR_SPECIFIC = 43,
+ /* Windows 10 option to send when Anonymize=true */
+ SD_DHCP_OPTION_NETBIOS_NAMESERVER = 44,
+ /* Windows 10 option to send when Anonymize=true */
+ SD_DHCP_OPTION_NETBIOS_NODETYPE = 46,
+ /* Windows 10 option to send when Anonymize=true */
+ SD_DHCP_OPTION_NETBIOS_SCOPE = 47,
+ SD_DHCP_OPTION_REQUESTED_IP_ADDRESS = 50,
+ SD_DHCP_OPTION_IP_ADDRESS_LEASE_TIME = 51,
+ SD_DHCP_OPTION_OVERLOAD = 52,
+ SD_DHCP_OPTION_MESSAGE_TYPE = 53,
+ SD_DHCP_OPTION_SERVER_IDENTIFIER = 54,
+ SD_DHCP_OPTION_PARAMETER_REQUEST_LIST = 55,
+ SD_DHCP_OPTION_ERROR_MESSAGE = 56,
+ SD_DHCP_OPTION_MAXIMUM_MESSAGE_SIZE = 57,
+ SD_DHCP_OPTION_RENEWAL_T1_TIME = 58,
+ SD_DHCP_OPTION_REBINDING_T2_TIME = 59,
+ SD_DHCP_OPTION_VENDOR_CLASS_IDENTIFIER = 60,
+ SD_DHCP_OPTION_CLIENT_IDENTIFIER = 61,
+ SD_DHCP_OPTION_SMTP_SERVER = 69,
+ SD_DHCP_OPTION_POP3_SERVER = 70,
+ SD_DHCP_OPTION_USER_CLASS = 77,
+ SD_DHCP_OPTION_FQDN = 81,
+ SD_DHCP_OPTION_NEW_POSIX_TIMEZONE = 100,
+ SD_DHCP_OPTION_NEW_TZDB_TIMEZONE = 101,
+ SD_DHCP_OPTION_DOMAIN_SEARCH_LIST = 119,
+ SD_DHCP_OPTION_SIP_SERVER = 120,
+ SD_DHCP_OPTION_CLASSLESS_STATIC_ROUTE = 121,
+ SD_DHCP_OPTION_MUD_URL = 161,
+ SD_DHCP_OPTION_PRIVATE_BASE = 224,
+ /* Windows 10 option to send when Anonymize=true */
+ SD_DHCP_OPTION_PRIVATE_CLASSLESS_STATIC_ROUTE = 249,
+ /* Windows 10 option to send when Anonymize=true */
+ SD_DHCP_OPTION_PRIVATE_PROXY_AUTODISCOVERY = 252,
+ SD_DHCP_OPTION_PRIVATE_LAST = 254,
+ SD_DHCP_OPTION_END = 255,
+};
+
+typedef struct sd_dhcp_client sd_dhcp_client;
+
+typedef int (*sd_dhcp_client_callback_t)(sd_dhcp_client *client, int event, void *userdata);
+int sd_dhcp_client_set_callback(
+ sd_dhcp_client *client,
+ sd_dhcp_client_callback_t cb,
+ void *userdata);
+
+int sd_dhcp_client_set_request_option(
+ sd_dhcp_client *client,
+ uint8_t option);
+int sd_dhcp_client_set_request_address(
+ sd_dhcp_client *client,
+ const struct in_addr *last_address);
+int sd_dhcp_client_set_request_broadcast(
+ sd_dhcp_client *client,
+ int broadcast);
+int sd_dhcp_client_set_ifindex(
+ sd_dhcp_client *client,
+ int interface_index);
+int sd_dhcp_client_set_mac(
+ sd_dhcp_client *client,
+ const uint8_t *addr,
+ const uint8_t *bcast_addr,
+ size_t addr_len,
+ uint16_t arp_type);
+int sd_dhcp_client_set_client_id(
+ sd_dhcp_client *client,
+ uint8_t type,
+ const uint8_t *data,
+ size_t data_len);
+int sd_dhcp_client_set_iaid_duid(
+ sd_dhcp_client *client,
+ bool iaid_set,
+ uint32_t iaid,
+ uint16_t duid_type,
+ const void *duid,
+ size_t duid_len);
+int sd_dhcp_client_set_iaid_duid_llt(
+ sd_dhcp_client *client,
+ bool iaid_set,
+ uint32_t iaid,
+ uint64_t llt_time);
+int sd_dhcp_client_set_duid(
+ sd_dhcp_client *client,
+ uint16_t duid_type,
+ const void *duid,
+ size_t duid_len);
+int sd_dhcp_client_set_duid_llt(
+ sd_dhcp_client *client,
+ uint64_t llt_time);
+int sd_dhcp_client_get_client_id(
+ sd_dhcp_client *client,
+ uint8_t *type,
+ const uint8_t **data,
+ size_t *data_len);
+int sd_dhcp_client_set_mtu(
+ sd_dhcp_client *client,
+ uint32_t mtu);
+int sd_dhcp_client_set_max_attempts(
+ sd_dhcp_client *client,
+ uint64_t attempt);
+int sd_dhcp_client_set_client_port(
+ sd_dhcp_client *client,
+ uint16_t port);
+int sd_dhcp_client_set_hostname(
+ sd_dhcp_client *client,
+ const char *hostname);
+int sd_dhcp_client_set_vendor_class_identifier(
+ sd_dhcp_client *client,
+ const char *vci);
+int sd_dhcp_client_set_mud_url(
+ sd_dhcp_client *client,
+ const char *mudurl);
+int sd_dhcp_client_set_user_class(
+ sd_dhcp_client *client,
+ char * const *user_class);
+int sd_dhcp_client_get_lease(
+ sd_dhcp_client *client,
+ sd_dhcp_lease **ret);
+int sd_dhcp_client_set_service_type(
+ sd_dhcp_client *client,
+ int type);
+int sd_dhcp_client_set_fallback_lease_lifetime(
+ sd_dhcp_client *client,
+ uint32_t fallback_lease_lifetime);
+
+int sd_dhcp_client_add_option(sd_dhcp_client *client, sd_dhcp_option *v);
+int sd_dhcp_client_add_vendor_option(sd_dhcp_client *client, sd_dhcp_option *v);
+
+int sd_dhcp_client_stop(sd_dhcp_client *client);
+int sd_dhcp_client_start(sd_dhcp_client *client);
+int sd_dhcp_client_send_release(sd_dhcp_client *client);
+int sd_dhcp_client_send_decline(sd_dhcp_client *client);
+int sd_dhcp_client_send_renew(sd_dhcp_client *client);
+
+sd_dhcp_client *sd_dhcp_client_ref(sd_dhcp_client *client);
+sd_dhcp_client *sd_dhcp_client_unref(sd_dhcp_client *client);
+
+/* NOTE: anonymize parameter is used to initialize PRL memory with different
+ * options when using RFC7844 Anonymity Profiles */
+int sd_dhcp_client_new(sd_dhcp_client **ret, int anonymize);
+
+int sd_dhcp_client_id_to_string(const void *data, size_t len, char **ret);
+
+int sd_dhcp_client_attach_event(
+ sd_dhcp_client *client,
+ sd_event *event,
+ int64_t priority);
+int sd_dhcp_client_detach_event(sd_dhcp_client *client);
+sd_event *sd_dhcp_client_get_event(sd_dhcp_client *client);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_dhcp_client, sd_dhcp_client_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-dhcp-lease.h b/src/systemd/sd-dhcp-lease.h
new file mode 100644
index 0000000..c255a1f
--- /dev/null
+++ b/src/systemd/sd-dhcp-lease.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosddhcpleasehfoo
+#define foosddhcpleasehfoo
+
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <net/ethernet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+typedef struct sd_dhcp_lease sd_dhcp_lease;
+typedef struct sd_dhcp_route sd_dhcp_route;
+
+sd_dhcp_lease *sd_dhcp_lease_ref(sd_dhcp_lease *lease);
+sd_dhcp_lease *sd_dhcp_lease_unref(sd_dhcp_lease *lease);
+
+typedef enum sd_dhcp_lease_server_type {
+ SD_DHCP_LEASE_DNS,
+ SD_DHCP_LEASE_NTP,
+ SD_DHCP_LEASE_SIP,
+ SD_DHCP_LEASE_POP3,
+ SD_DHCP_LEASE_SMTP,
+ SD_DHCP_LEASE_LPR,
+ _SD_DHCP_LEASE_SERVER_TYPE_MAX,
+ _SD_DHCP_LEASE_SERVER_TYPE_INVALID = -1,
+} sd_dhcp_lease_server_type;
+
+int sd_dhcp_lease_get_address(sd_dhcp_lease *lease, struct in_addr *addr);
+int sd_dhcp_lease_get_lifetime(sd_dhcp_lease *lease, uint32_t *lifetime);
+int sd_dhcp_lease_get_t1(sd_dhcp_lease *lease, uint32_t *t1);
+int sd_dhcp_lease_get_t2(sd_dhcp_lease *lease, uint32_t *t2);
+int sd_dhcp_lease_get_broadcast(sd_dhcp_lease *lease, struct in_addr *addr);
+int sd_dhcp_lease_get_netmask(sd_dhcp_lease *lease, struct in_addr *addr);
+int sd_dhcp_lease_get_router(sd_dhcp_lease *lease, const struct in_addr **addr);
+int sd_dhcp_lease_get_next_server(sd_dhcp_lease *lease, struct in_addr *addr);
+int sd_dhcp_lease_get_server_identifier(sd_dhcp_lease *lease, struct in_addr *addr);
+int sd_dhcp_lease_get_servers(sd_dhcp_lease *lease, sd_dhcp_lease_server_type what, const struct in_addr **addr);
+int sd_dhcp_lease_get_dns(sd_dhcp_lease *lease, const struct in_addr **addr);
+int sd_dhcp_lease_get_ntp(sd_dhcp_lease *lease, const struct in_addr **addr);
+int sd_dhcp_lease_get_sip(sd_dhcp_lease *lease, const struct in_addr **addr);
+int sd_dhcp_lease_get_pop3(sd_dhcp_lease *lease, const struct in_addr **addr);
+int sd_dhcp_lease_get_smtp(sd_dhcp_lease *lease, const struct in_addr **addr);
+int sd_dhcp_lease_get_lpr(sd_dhcp_lease *lease, const struct in_addr **addr);
+int sd_dhcp_lease_get_mtu(sd_dhcp_lease *lease, uint16_t *mtu);
+int sd_dhcp_lease_get_domainname(sd_dhcp_lease *lease, const char **domainname);
+int sd_dhcp_lease_get_search_domains(sd_dhcp_lease *lease, char ***domains);
+int sd_dhcp_lease_get_hostname(sd_dhcp_lease *lease, const char **hostname);
+int sd_dhcp_lease_get_root_path(sd_dhcp_lease *lease, const char **root_path);
+int sd_dhcp_lease_get_routes(sd_dhcp_lease *lease, sd_dhcp_route ***routes);
+int sd_dhcp_lease_get_vendor_specific(sd_dhcp_lease *lease, const void **data, size_t *data_len);
+int sd_dhcp_lease_get_client_id(sd_dhcp_lease *lease, const void **client_id, size_t *client_id_len);
+int sd_dhcp_lease_get_timezone(sd_dhcp_lease *lease, const char **timezone);
+
+int sd_dhcp_route_get_destination(sd_dhcp_route *route, struct in_addr *destination);
+int sd_dhcp_route_get_destination_prefix_length(sd_dhcp_route *route, uint8_t *length);
+int sd_dhcp_route_get_gateway(sd_dhcp_route *route, struct in_addr *gateway);
+int sd_dhcp_route_get_option(sd_dhcp_route *route);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_dhcp_lease, sd_dhcp_lease_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-dhcp-option.h b/src/systemd/sd-dhcp-option.h
new file mode 100644
index 0000000..71aa479
--- /dev/null
+++ b/src/systemd/sd-dhcp-option.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosddhcpoptionhfoo
+#define foosddhcpoptionhfoo
+
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+typedef struct sd_dhcp_option sd_dhcp_option;
+
+int sd_dhcp_option_new(uint8_t option, const void *data, size_t length, sd_dhcp_option **ret);
+sd_dhcp_option *sd_dhcp_option_ref(sd_dhcp_option *ra);
+sd_dhcp_option *sd_dhcp_option_unref(sd_dhcp_option *ra);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_dhcp_option, sd_dhcp_option_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-dhcp-server.h b/src/systemd/sd-dhcp-server.h
new file mode 100644
index 0000000..f42c3db
--- /dev/null
+++ b/src/systemd/sd-dhcp-server.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosddhcpserverhfoo
+#define foosddhcpserverhfoo
+
+/***
+ Copyright © 2013 Intel Corporation. All rights reserved.
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <netinet/in.h>
+
+#include "sd-dhcp-lease.h"
+#include "sd-dhcp-option.h"
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+typedef struct sd_dhcp_server sd_dhcp_server;
+
+enum {
+ SD_DHCP_SERVER_EVENT_LEASE_CHANGED = 1 << 0,
+};
+
+int sd_dhcp_server_new(sd_dhcp_server **ret, int ifindex);
+
+sd_dhcp_server *sd_dhcp_server_ref(sd_dhcp_server *server);
+sd_dhcp_server *sd_dhcp_server_unref(sd_dhcp_server *server);
+
+int sd_dhcp_server_attach_event(sd_dhcp_server *client, sd_event *event, int64_t priority);
+int sd_dhcp_server_detach_event(sd_dhcp_server *client);
+sd_event *sd_dhcp_server_get_event(sd_dhcp_server *client);
+
+typedef void (*sd_dhcp_server_callback_t)(sd_dhcp_server *server, uint64_t event, void *userdata);
+
+int sd_dhcp_server_set_callback(sd_dhcp_server *server, sd_dhcp_server_callback_t cb, void *userdata);
+
+int sd_dhcp_server_is_running(sd_dhcp_server *server);
+
+int sd_dhcp_server_start(sd_dhcp_server *server);
+int sd_dhcp_server_stop(sd_dhcp_server *server);
+
+int sd_dhcp_server_configure_pool(sd_dhcp_server *server, const struct in_addr *address, unsigned char prefixlen, uint32_t offset, uint32_t size);
+
+int sd_dhcp_server_set_timezone(sd_dhcp_server *server, const char *timezone);
+int sd_dhcp_server_set_emit_router(sd_dhcp_server *server, int enabled);
+
+int sd_dhcp_server_set_servers(
+ sd_dhcp_server *server,
+ sd_dhcp_lease_server_type what,
+ const struct in_addr addresses[],
+ size_t n_addresses);
+
+int sd_dhcp_server_set_lpr(sd_dhcp_server *server, const struct in_addr lpr[], size_t n);
+int sd_dhcp_server_set_dns(sd_dhcp_server *server, const struct in_addr dns[], size_t n);
+int sd_dhcp_server_set_ntp(sd_dhcp_server *server, const struct in_addr ntp[], size_t n);
+int sd_dhcp_server_set_sip(sd_dhcp_server *server, const struct in_addr sip[], size_t n);
+int sd_dhcp_server_set_pop3(sd_dhcp_server *server, const struct in_addr pop3[], size_t n);
+int sd_dhcp_server_set_smtp(sd_dhcp_server *server, const struct in_addr smtp[], size_t n);
+
+int sd_dhcp_server_add_option(sd_dhcp_server *server, sd_dhcp_option *v);
+int sd_dhcp_server_add_vendor_option(sd_dhcp_server *server, sd_dhcp_option *v);
+
+int sd_dhcp_server_set_max_lease_time(sd_dhcp_server *server, uint32_t t);
+int sd_dhcp_server_set_default_lease_time(sd_dhcp_server *server, uint32_t t);
+
+int sd_dhcp_server_forcerenew(sd_dhcp_server *server);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_dhcp_server, sd_dhcp_server_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-dhcp6-client.h b/src/systemd/sd-dhcp6-client.h
new file mode 100644
index 0000000..84e3170
--- /dev/null
+++ b/src/systemd/sd-dhcp6-client.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosddhcp6clienthfoo
+#define foosddhcp6clienthfoo
+
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <net/ethernet.h>
+#include <sys/types.h>
+
+#include "sd-dhcp6-lease.h"
+#include "sd-dhcp6-option.h"
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+enum {
+ SD_DHCP6_CLIENT_EVENT_STOP = 0,
+ SD_DHCP6_CLIENT_EVENT_RESEND_EXPIRE = 10,
+ SD_DHCP6_CLIENT_EVENT_RETRANS_MAX = 11,
+ SD_DHCP6_CLIENT_EVENT_IP_ACQUIRE = 12,
+ SD_DHCP6_CLIENT_EVENT_INFORMATION_REQUEST = 13,
+};
+
+enum {
+ SD_DHCP6_OPTION_CLIENTID = 1,
+ SD_DHCP6_OPTION_SERVERID = 2,
+ SD_DHCP6_OPTION_IA_NA = 3,
+ SD_DHCP6_OPTION_IA_TA = 4,
+ SD_DHCP6_OPTION_IAADDR = 5,
+ SD_DHCP6_OPTION_ORO = 6,
+ SD_DHCP6_OPTION_PREFERENCE = 7,
+ SD_DHCP6_OPTION_ELAPSED_TIME = 8,
+ SD_DHCP6_OPTION_RELAY_MSG = 9,
+ /* option code 10 is unassigned */
+ SD_DHCP6_OPTION_AUTH = 11,
+ SD_DHCP6_OPTION_UNICAST = 12,
+ SD_DHCP6_OPTION_STATUS_CODE = 13,
+ SD_DHCP6_OPTION_RAPID_COMMIT = 14,
+ SD_DHCP6_OPTION_USER_CLASS = 15,
+ SD_DHCP6_OPTION_VENDOR_CLASS = 16,
+ SD_DHCP6_OPTION_VENDOR_OPTS = 17,
+ SD_DHCP6_OPTION_INTERFACE_ID = 18,
+ SD_DHCP6_OPTION_RECONF_MSG = 19,
+ SD_DHCP6_OPTION_RECONF_ACCEPT = 20,
+
+ SD_DHCP6_OPTION_DNS_SERVERS = 23, /* RFC 3646 */
+ SD_DHCP6_OPTION_DOMAIN_LIST = 24, /* RFC 3646 */
+ SD_DHCP6_OPTION_IA_PD = 25, /* RFC 3633, prefix delegation */
+ SD_DHCP6_OPTION_IA_PD_PREFIX = 26, /* RFC 3633, prefix delegation */
+
+ SD_DHCP6_OPTION_SNTP_SERVERS = 31, /* RFC 4075, deprecated */
+ SD_DHCP6_OPTION_INFORMATION_REFRESH_TIME = 32, /* RFC 8415, sec. 21.23 */
+
+ /* option code 35 is unassigned */
+
+ SD_DHCP6_OPTION_FQDN = 39, /* RFC 4704 */
+
+ SD_DHCP6_OPTION_NTP_SERVER = 56, /* RFC 5908 */
+ SD_DHCP6_OPTION_MUD_URL = 112, /* RFC 8250 */
+
+ /* option codes 89-142 are unassigned */
+ /* option codes 144-65535 are unassigned */
+};
+
+typedef struct sd_dhcp6_client sd_dhcp6_client;
+
+typedef void (*sd_dhcp6_client_callback_t)(sd_dhcp6_client *client, int event, void *userdata);
+int sd_dhcp6_client_set_callback(
+ sd_dhcp6_client *client,
+ sd_dhcp6_client_callback_t cb,
+ void *userdata);
+
+int sd_dhcp6_client_set_ifindex(
+ sd_dhcp6_client *client,
+ int interface_index);
+int sd_dhcp6_client_set_local_address(
+ sd_dhcp6_client *client,
+ const struct in6_addr *local_address);
+int sd_dhcp6_client_set_mac(
+ sd_dhcp6_client *client,
+ const uint8_t *addr,
+ size_t addr_len,
+ uint16_t arp_type);
+int sd_dhcp6_client_set_duid(
+ sd_dhcp6_client *client,
+ uint16_t duid_type,
+ const void *duid,
+ size_t duid_len);
+int sd_dhcp6_client_set_duid_llt(
+ sd_dhcp6_client *client,
+ uint64_t llt_time);
+int sd_dhcp6_client_set_iaid(
+ sd_dhcp6_client *client,
+ uint32_t iaid);
+int sd_dhcp6_client_get_iaid(
+ sd_dhcp6_client *client,
+ uint32_t *iaid);
+int sd_dhcp6_client_duid_as_string(
+ sd_dhcp6_client *client,
+ char **duid);
+int sd_dhcp6_client_set_fqdn(
+ sd_dhcp6_client *client,
+ const char *fqdn);
+int sd_dhcp6_client_set_information_request(
+ sd_dhcp6_client *client,
+ int enabled);
+int sd_dhcp6_client_get_information_request(
+ sd_dhcp6_client *client,
+ int *enabled);
+int sd_dhcp6_client_set_request_option(
+ sd_dhcp6_client *client,
+ uint16_t option);
+int sd_dhcp6_client_set_request_mud_url(
+ sd_dhcp6_client *client,
+ const char *mudurl);
+int sd_dhcp6_client_set_request_user_class(
+ sd_dhcp6_client *client,
+ char * const *user_class);
+int sd_dhcp6_client_set_request_vendor_class(
+ sd_dhcp6_client *client,
+ char * const *vendor_class);
+int sd_dhcp6_client_set_prefix_delegation_hint(
+ sd_dhcp6_client *client,
+ uint8_t prefixlen,
+ const struct in6_addr *pd_address);
+int sd_dhcp6_client_get_prefix_delegation(sd_dhcp6_client *client,
+ int *delegation);
+int sd_dhcp6_client_set_prefix_delegation(sd_dhcp6_client *client,
+ int delegation);
+int sd_dhcp6_client_get_address_request(sd_dhcp6_client *client,
+ int *request);
+int sd_dhcp6_client_set_address_request(sd_dhcp6_client *client,
+ int request);
+int sd_dhcp6_client_set_transaction_id(sd_dhcp6_client *client,
+ uint32_t transaction_id);
+int sd_dhcp6_client_add_vendor_option(sd_dhcp6_client *client,
+ sd_dhcp6_option *v);
+
+int sd_dhcp6_client_get_lease(
+ sd_dhcp6_client *client,
+ sd_dhcp6_lease **ret);
+
+int sd_dhcp6_client_add_option(sd_dhcp6_client *client, sd_dhcp6_option *v);
+
+int sd_dhcp6_client_stop(sd_dhcp6_client *client);
+int sd_dhcp6_client_start(sd_dhcp6_client *client);
+int sd_dhcp6_client_is_running(sd_dhcp6_client *client);
+int sd_dhcp6_client_attach_event(
+ sd_dhcp6_client *client,
+ sd_event *event,
+ int64_t priority);
+int sd_dhcp6_client_detach_event(sd_dhcp6_client *client);
+sd_event *sd_dhcp6_client_get_event(sd_dhcp6_client *client);
+sd_dhcp6_client *sd_dhcp6_client_ref(sd_dhcp6_client *client);
+sd_dhcp6_client *sd_dhcp6_client_unref(sd_dhcp6_client *client);
+int sd_dhcp6_client_new(sd_dhcp6_client **ret);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_dhcp6_client, sd_dhcp6_client_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-dhcp6-lease.h b/src/systemd/sd-dhcp6-lease.h
new file mode 100644
index 0000000..f77b31a
--- /dev/null
+++ b/src/systemd/sd-dhcp6-lease.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosddhcp6leasehfoo
+#define foosddhcp6leasehfoo
+
+/***
+ Copyright © 2014-2015 Intel Corporation. All rights reserved.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <netinet/in.h>
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+typedef struct sd_dhcp6_lease sd_dhcp6_lease;
+
+void sd_dhcp6_lease_reset_address_iter(sd_dhcp6_lease *lease);
+int sd_dhcp6_lease_get_address(sd_dhcp6_lease *lease,
+ struct in6_addr *addr,
+ uint32_t *lifetime_preferred,
+ uint32_t *lifetime_valid);
+void sd_dhcp6_lease_reset_pd_prefix_iter(sd_dhcp6_lease *lease);
+int sd_dhcp6_lease_get_pd(sd_dhcp6_lease *lease, struct in6_addr *prefix,
+ uint8_t *prefix_len,
+ uint32_t *lifetime_preferred,
+ uint32_t *lifetime_valid);
+
+int sd_dhcp6_lease_get_dns(sd_dhcp6_lease *lease, const struct in6_addr **addrs);
+int sd_dhcp6_lease_get_domains(sd_dhcp6_lease *lease, char ***domains);
+int sd_dhcp6_lease_get_ntp_addrs(sd_dhcp6_lease *lease, const struct in6_addr **addrs);
+int sd_dhcp6_lease_get_ntp_fqdn(sd_dhcp6_lease *lease, char ***ntp_fqdn);
+int sd_dhcp6_lease_get_fqdn(sd_dhcp6_lease *lease, const char **fqdn);
+
+sd_dhcp6_lease *sd_dhcp6_lease_ref(sd_dhcp6_lease *lease);
+sd_dhcp6_lease *sd_dhcp6_lease_unref(sd_dhcp6_lease *lease);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_dhcp6_lease, sd_dhcp6_lease_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-dhcp6-option.h b/src/systemd/sd-dhcp6-option.h
new file mode 100644
index 0000000..ddb2c7c
--- /dev/null
+++ b/src/systemd/sd-dhcp6-option.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosddhcp6optionhfoo
+#define foosddhcp6optionhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+typedef struct sd_dhcp6_option sd_dhcp6_option;
+
+int sd_dhcp6_option_new(uint16_t option, const void *data, size_t length, uint32_t enterprise_identifier, sd_dhcp6_option **ret);
+sd_dhcp6_option *sd_dhcp6_option_ref(sd_dhcp6_option *ra);
+sd_dhcp6_option *sd_dhcp6_option_unref(sd_dhcp6_option *ra);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_dhcp6_option, sd_dhcp6_option_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-event.h b/src/systemd/sd-event.h
new file mode 100644
index 0000000..937c9bd
--- /dev/null
+++ b/src/systemd/sd-event.h
@@ -0,0 +1,173 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdeventhfoo
+#define foosdeventhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <signal.h>
+#include <sys/epoll.h>
+#include <sys/inotify.h>
+#include <sys/signalfd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+
+#include "_sd-common.h"
+
+/*
+ Why is this better than pure epoll?
+
+ - Supports event source prioritization
+ - Scales better with a large number of time events because it does not require one timerfd each
+ - Automatically tries to coalesce timer events system-wide
+ - Handles signals, child PIDs, inotify events
+ - Supports systemd-style automatic watchdog event generation
+*/
+
+_SD_BEGIN_DECLARATIONS;
+
+#define SD_EVENT_DEFAULT ((sd_event *) 1)
+
+typedef struct sd_event sd_event;
+typedef struct sd_event_source sd_event_source;
+
+enum {
+ SD_EVENT_OFF = 0,
+ SD_EVENT_ON = 1,
+ SD_EVENT_ONESHOT = -1
+};
+
+enum {
+ SD_EVENT_INITIAL,
+ SD_EVENT_ARMED,
+ SD_EVENT_PENDING,
+ SD_EVENT_RUNNING,
+ SD_EVENT_EXITING,
+ SD_EVENT_FINISHED,
+ SD_EVENT_PREPARING
+};
+
+enum {
+ /* And everything in-between and outside is good too */
+ SD_EVENT_PRIORITY_IMPORTANT = -100,
+ SD_EVENT_PRIORITY_NORMAL = 0,
+ SD_EVENT_PRIORITY_IDLE = 100
+};
+
+typedef int (*sd_event_handler_t)(sd_event_source *s, void *userdata);
+typedef int (*sd_event_io_handler_t)(sd_event_source *s, int fd, uint32_t revents, void *userdata);
+typedef int (*sd_event_time_handler_t)(sd_event_source *s, uint64_t usec, void *userdata);
+typedef int (*sd_event_signal_handler_t)(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata);
+#if defined _GNU_SOURCE || (defined _POSIX_C_SOURCE && _POSIX_C_SOURCE >= 199309L)
+typedef int (*sd_event_child_handler_t)(sd_event_source *s, const siginfo_t *si, void *userdata);
+#else
+typedef void* sd_event_child_handler_t;
+#endif
+typedef int (*sd_event_inotify_handler_t)(sd_event_source *s, const struct inotify_event *event, void *userdata);
+typedef _sd_destroy_t sd_event_destroy_t;
+
+int sd_event_default(sd_event **e);
+
+int sd_event_new(sd_event **e);
+sd_event* sd_event_ref(sd_event *e);
+sd_event* sd_event_unref(sd_event *e);
+
+int sd_event_add_io(sd_event *e, sd_event_source **s, int fd, uint32_t events, sd_event_io_handler_t callback, void *userdata);
+int sd_event_add_time(sd_event *e, sd_event_source **s, clockid_t clock, uint64_t usec, uint64_t accuracy, sd_event_time_handler_t callback, void *userdata);
+int sd_event_add_time_relative(sd_event *e, sd_event_source **s, clockid_t clock, uint64_t usec, uint64_t accuracy, sd_event_time_handler_t callback, void *userdata);
+int sd_event_add_signal(sd_event *e, sd_event_source **s, int sig, sd_event_signal_handler_t callback, void *userdata);
+int sd_event_add_child(sd_event *e, sd_event_source **s, pid_t pid, int options, sd_event_child_handler_t callback, void *userdata);
+int sd_event_add_child_pidfd(sd_event *e, sd_event_source **s, int pidfd, int options, sd_event_child_handler_t callback, void *userdata);
+int sd_event_add_inotify(sd_event *e, sd_event_source **s, const char *path, uint32_t mask, sd_event_inotify_handler_t callback, void *userdata);
+int sd_event_add_defer(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata);
+int sd_event_add_post(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata);
+int sd_event_add_exit(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata);
+
+int sd_event_prepare(sd_event *e);
+int sd_event_wait(sd_event *e, uint64_t usec);
+int sd_event_dispatch(sd_event *e);
+int sd_event_run(sd_event *e, uint64_t usec);
+int sd_event_loop(sd_event *e);
+int sd_event_exit(sd_event *e, int code);
+
+int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec);
+
+int sd_event_get_fd(sd_event *e);
+int sd_event_get_state(sd_event *e);
+int sd_event_get_tid(sd_event *e, pid_t *tid);
+int sd_event_get_exit_code(sd_event *e, int *code);
+int sd_event_set_watchdog(sd_event *e, int b);
+int sd_event_get_watchdog(sd_event *e);
+int sd_event_get_iteration(sd_event *e, uint64_t *ret);
+
+sd_event_source* sd_event_source_ref(sd_event_source *s);
+sd_event_source* sd_event_source_unref(sd_event_source *s);
+sd_event_source* sd_event_source_disable_unref(sd_event_source *s);
+
+sd_event *sd_event_source_get_event(sd_event_source *s);
+void* sd_event_source_get_userdata(sd_event_source *s);
+void* sd_event_source_set_userdata(sd_event_source *s, void *userdata);
+
+int sd_event_source_set_description(sd_event_source *s, const char *description);
+int sd_event_source_get_description(sd_event_source *s, const char **description);
+int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback);
+int sd_event_source_get_pending(sd_event_source *s);
+int sd_event_source_get_priority(sd_event_source *s, int64_t *priority);
+int sd_event_source_set_priority(sd_event_source *s, int64_t priority);
+int sd_event_source_get_enabled(sd_event_source *s, int *enabled);
+int sd_event_source_set_enabled(sd_event_source *s, int enabled);
+int sd_event_source_get_io_fd(sd_event_source *s);
+int sd_event_source_set_io_fd(sd_event_source *s, int fd);
+int sd_event_source_get_io_fd_own(sd_event_source *s);
+int sd_event_source_set_io_fd_own(sd_event_source *s, int own);
+int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events);
+int sd_event_source_set_io_events(sd_event_source *s, uint32_t events);
+int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents);
+int sd_event_source_get_time(sd_event_source *s, uint64_t *usec);
+int sd_event_source_set_time(sd_event_source *s, uint64_t usec);
+int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec);
+int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec);
+int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec);
+int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock);
+int sd_event_source_get_signal(sd_event_source *s);
+int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid);
+int sd_event_source_get_child_pidfd(sd_event_source *s);
+int sd_event_source_get_child_pidfd_own(sd_event_source *s);
+int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own);
+int sd_event_source_get_child_process_own(sd_event_source *s);
+int sd_event_source_set_child_process_own(sd_event_source *s, int own);
+#if defined _GNU_SOURCE || (defined _POSIX_C_SOURCE && _POSIX_C_SOURCE >= 199309L)
+int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags);
+#else
+int sd_event_source_send_child_signal(sd_event_source *s, int sig, const void *si, unsigned flags);
+#endif
+int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *ret);
+int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback);
+int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret);
+int sd_event_source_get_floating(sd_event_source *s);
+int sd_event_source_set_floating(sd_event_source *s, int b);
+int sd_event_source_get_exit_on_failure(sd_event_source *s);
+int sd_event_source_set_exit_on_failure(sd_event_source *s, int b);
+
+/* Define helpers so that __attribute__((cleanup(sd_event_unrefp))) and similar may be used. */
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_event, sd_event_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_event_source, sd_event_source_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_event_source, sd_event_source_disable_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-hwdb.h b/src/systemd/sd-hwdb.h
new file mode 100644
index 0000000..9380759
--- /dev/null
+++ b/src/systemd/sd-hwdb.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdhwdbhfoo
+#define foosdhwdbhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+typedef struct sd_hwdb sd_hwdb;
+
+sd_hwdb *sd_hwdb_ref(sd_hwdb *hwdb);
+sd_hwdb *sd_hwdb_unref(sd_hwdb *hwdb);
+
+int sd_hwdb_new(sd_hwdb **ret);
+
+int sd_hwdb_get(sd_hwdb *hwdb, const char *modalias, const char *key, const char **value);
+
+int sd_hwdb_seek(sd_hwdb *hwdb, const char *modalias);
+int sd_hwdb_enumerate(sd_hwdb *hwdb, const char **key, const char **value);
+
+/* the inverse condition avoids ambiguity of dangling 'else' after the macro */
+#define SD_HWDB_FOREACH_PROPERTY(hwdb, modalias, key, value) \
+ if (sd_hwdb_seek(hwdb, modalias) < 0) { } \
+ else while (sd_hwdb_enumerate(hwdb, &(key), &(value)) > 0)
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_hwdb, sd_hwdb_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-id128.h b/src/systemd/sd-id128.h
new file mode 100644
index 0000000..02aa318
--- /dev/null
+++ b/src/systemd/sd-id128.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdid128hfoo
+#define foosdid128hfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <string.h>
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+/* 128-bit ID APIs. See sd-id128(3) for more information. */
+
+typedef union sd_id128 sd_id128_t;
+
+union sd_id128 {
+ uint8_t bytes[16];
+ uint64_t qwords[2];
+};
+
+#define SD_ID128_STRING_MAX 33
+
+char *sd_id128_to_string(sd_id128_t id, char s[_SD_ARRAY_STATIC SD_ID128_STRING_MAX]);
+int sd_id128_from_string(const char *s, sd_id128_t *ret);
+
+int sd_id128_randomize(sd_id128_t *ret);
+
+int sd_id128_get_machine(sd_id128_t *ret);
+int sd_id128_get_boot(sd_id128_t *ret);
+int sd_id128_get_invocation(sd_id128_t *ret);
+
+int sd_id128_get_machine_app_specific(sd_id128_t app_id, sd_id128_t *ret);
+int sd_id128_get_boot_app_specific(sd_id128_t app_id, sd_id128_t *ret);
+
+#define SD_ID128_ARRAY(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) \
+ { .bytes = { 0x##v0, 0x##v1, 0x##v2, 0x##v3, 0x##v4, 0x##v5, 0x##v6, 0x##v7, \
+ 0x##v8, 0x##v9, 0x##v10, 0x##v11, 0x##v12, 0x##v13, 0x##v14, 0x##v15 }}
+
+#define SD_ID128_MAKE(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) \
+ ((const sd_id128_t) SD_ID128_ARRAY(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15))
+
+/* Note that SD_ID128_FORMAT_VAL will evaluate the passed argument 16
+ * times. It is hence not a good idea to call this macro with an
+ * expensive function as parameter or an expression with side
+ * effects */
+
+#define SD_ID128_FORMAT_STR "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x"
+#define SD_ID128_FORMAT_VAL(x) (x).bytes[0], (x).bytes[1], (x).bytes[2], (x).bytes[3], (x).bytes[4], (x).bytes[5], (x).bytes[6], (x).bytes[7], (x).bytes[8], (x).bytes[9], (x).bytes[10], (x).bytes[11], (x).bytes[12], (x).bytes[13], (x).bytes[14], (x).bytes[15]
+
+/* Like SD_ID128_FORMAT_STR, but formats as UUID, not in plain format */
+#define SD_ID128_UUID_FORMAT_STR "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x"
+
+#define SD_ID128_CONST_STR(x) \
+ ((const char[SD_ID128_STRING_MAX]) { \
+ ((x).bytes[0] >> 4) >= 10 ? 'a' + ((x).bytes[0] >> 4) - 10 : '0' + ((x).bytes[0] >> 4), \
+ ((x).bytes[0] & 15) >= 10 ? 'a' + ((x).bytes[0] & 15) - 10 : '0' + ((x).bytes[0] & 15), \
+ ((x).bytes[1] >> 4) >= 10 ? 'a' + ((x).bytes[1] >> 4) - 10 : '0' + ((x).bytes[1] >> 4), \
+ ((x).bytes[1] & 15) >= 10 ? 'a' + ((x).bytes[1] & 15) - 10 : '0' + ((x).bytes[1] & 15), \
+ ((x).bytes[2] >> 4) >= 10 ? 'a' + ((x).bytes[2] >> 4) - 10 : '0' + ((x).bytes[2] >> 4), \
+ ((x).bytes[2] & 15) >= 10 ? 'a' + ((x).bytes[2] & 15) - 10 : '0' + ((x).bytes[2] & 15), \
+ ((x).bytes[3] >> 4) >= 10 ? 'a' + ((x).bytes[3] >> 4) - 10 : '0' + ((x).bytes[3] >> 4), \
+ ((x).bytes[3] & 15) >= 10 ? 'a' + ((x).bytes[3] & 15) - 10 : '0' + ((x).bytes[3] & 15), \
+ ((x).bytes[4] >> 4) >= 10 ? 'a' + ((x).bytes[4] >> 4) - 10 : '0' + ((x).bytes[4] >> 4), \
+ ((x).bytes[4] & 15) >= 10 ? 'a' + ((x).bytes[4] & 15) - 10 : '0' + ((x).bytes[4] & 15), \
+ ((x).bytes[5] >> 4) >= 10 ? 'a' + ((x).bytes[5] >> 4) - 10 : '0' + ((x).bytes[5] >> 4), \
+ ((x).bytes[5] & 15) >= 10 ? 'a' + ((x).bytes[5] & 15) - 10 : '0' + ((x).bytes[5] & 15), \
+ ((x).bytes[6] >> 4) >= 10 ? 'a' + ((x).bytes[6] >> 4) - 10 : '0' + ((x).bytes[6] >> 4), \
+ ((x).bytes[6] & 15) >= 10 ? 'a' + ((x).bytes[6] & 15) - 10 : '0' + ((x).bytes[6] & 15), \
+ ((x).bytes[7] >> 4) >= 10 ? 'a' + ((x).bytes[7] >> 4) - 10 : '0' + ((x).bytes[7] >> 4), \
+ ((x).bytes[7] & 15) >= 10 ? 'a' + ((x).bytes[7] & 15) - 10 : '0' + ((x).bytes[7] & 15), \
+ ((x).bytes[8] >> 4) >= 10 ? 'a' + ((x).bytes[8] >> 4) - 10 : '0' + ((x).bytes[8] >> 4), \
+ ((x).bytes[8] & 15) >= 10 ? 'a' + ((x).bytes[8] & 15) - 10 : '0' + ((x).bytes[8] & 15), \
+ ((x).bytes[9] >> 4) >= 10 ? 'a' + ((x).bytes[9] >> 4) - 10 : '0' + ((x).bytes[9] >> 4), \
+ ((x).bytes[9] & 15) >= 10 ? 'a' + ((x).bytes[9] & 15) - 10 : '0' + ((x).bytes[9] & 15), \
+ ((x).bytes[10] >> 4) >= 10 ? 'a' + ((x).bytes[10] >> 4) - 10 : '0' + ((x).bytes[10] >> 4), \
+ ((x).bytes[10] & 15) >= 10 ? 'a' + ((x).bytes[10] & 15) - 10 : '0' + ((x).bytes[10] & 15), \
+ ((x).bytes[11] >> 4) >= 10 ? 'a' + ((x).bytes[11] >> 4) - 10 : '0' + ((x).bytes[11] >> 4), \
+ ((x).bytes[11] & 15) >= 10 ? 'a' + ((x).bytes[11] & 15) - 10 : '0' + ((x).bytes[11] & 15), \
+ ((x).bytes[12] >> 4) >= 10 ? 'a' + ((x).bytes[12] >> 4) - 10 : '0' + ((x).bytes[12] >> 4), \
+ ((x).bytes[12] & 15) >= 10 ? 'a' + ((x).bytes[12] & 15) - 10 : '0' + ((x).bytes[12] & 15), \
+ ((x).bytes[13] >> 4) >= 10 ? 'a' + ((x).bytes[13] >> 4) - 10 : '0' + ((x).bytes[13] >> 4), \
+ ((x).bytes[13] & 15) >= 10 ? 'a' + ((x).bytes[13] & 15) - 10 : '0' + ((x).bytes[13] & 15), \
+ ((x).bytes[14] >> 4) >= 10 ? 'a' + ((x).bytes[14] >> 4) - 10 : '0' + ((x).bytes[14] >> 4), \
+ ((x).bytes[14] & 15) >= 10 ? 'a' + ((x).bytes[14] & 15) - 10 : '0' + ((x).bytes[14] & 15), \
+ ((x).bytes[15] >> 4) >= 10 ? 'a' + ((x).bytes[15] >> 4) - 10 : '0' + ((x).bytes[15] >> 4), \
+ ((x).bytes[15] & 15) >= 10 ? 'a' + ((x).bytes[15] & 15) - 10 : '0' + ((x).bytes[15] & 15), \
+ 0 })
+
+#define SD_ID128_MAKE_STR(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \
+ #a #b #c #d #e #f #g #h #i #j #k #l #m #n #o #p
+
+_sd_pure_ static __inline__ int sd_id128_equal(sd_id128_t a, sd_id128_t b) {
+ return memcmp(&a, &b, 16) == 0;
+}
+
+_sd_pure_ static __inline__ int sd_id128_is_null(sd_id128_t a) {
+ return a.qwords[0] == 0 && a.qwords[1] == 0;
+}
+
+_sd_pure_ static __inline__ int sd_id128_is_allf(sd_id128_t a) {
+ return a.qwords[0] == UINT64_C(0xFFFFFFFFFFFFFFFF) && a.qwords[1] == UINT64_C(0xFFFFFFFFFFFFFFFF);
+}
+
+#define SD_ID128_NULL ((const sd_id128_t) { .qwords = { 0, 0 }})
+#define SD_ID128_ALLF ((const sd_id128_t) { .qwords = { UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF) }})
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-ipv4acd.h b/src/systemd/sd-ipv4acd.h
new file mode 100644
index 0000000..d900018
--- /dev/null
+++ b/src/systemd/sd-ipv4acd.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdipv4acdfoo
+#define foosdipv4acdfoo
+
+/***
+ Copyright © 2014 Axis Communications AB. All rights reserved.
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <net/ethernet.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+enum {
+ SD_IPV4ACD_EVENT_STOP = 0,
+ SD_IPV4ACD_EVENT_BIND = 1,
+ SD_IPV4ACD_EVENT_CONFLICT = 2,
+};
+
+typedef struct sd_ipv4acd sd_ipv4acd;
+typedef void (*sd_ipv4acd_callback_t)(sd_ipv4acd *acd, int event, void *userdata);
+
+int sd_ipv4acd_detach_event(sd_ipv4acd *acd);
+int sd_ipv4acd_attach_event(sd_ipv4acd *acd, sd_event *event, int64_t priority);
+int sd_ipv4acd_get_address(sd_ipv4acd *acd, struct in_addr *address);
+int sd_ipv4acd_set_callback(sd_ipv4acd *acd, sd_ipv4acd_callback_t cb, void *userdata);
+int sd_ipv4acd_set_mac(sd_ipv4acd *acd, const struct ether_addr *addr);
+int sd_ipv4acd_set_ifindex(sd_ipv4acd *acd, int interface_index);
+int sd_ipv4acd_set_address(sd_ipv4acd *acd, const struct in_addr *address);
+int sd_ipv4acd_is_running(sd_ipv4acd *acd);
+int sd_ipv4acd_start(sd_ipv4acd *acd, bool reset_conflicts);
+int sd_ipv4acd_stop(sd_ipv4acd *acd);
+sd_ipv4acd *sd_ipv4acd_ref(sd_ipv4acd *acd);
+sd_ipv4acd *sd_ipv4acd_unref(sd_ipv4acd *acd);
+int sd_ipv4acd_new(sd_ipv4acd **ret);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_ipv4acd, sd_ipv4acd_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-ipv4ll.h b/src/systemd/sd-ipv4ll.h
new file mode 100644
index 0000000..a068223
--- /dev/null
+++ b/src/systemd/sd-ipv4ll.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdipv4llfoo
+#define foosdipv4llfoo
+
+/***
+ Copyright © 2014 Axis Communications AB. All rights reserved.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <net/ethernet.h>
+#include <netinet/in.h>
+
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+enum {
+ SD_IPV4LL_EVENT_STOP = 0,
+ SD_IPV4LL_EVENT_BIND = 1,
+ SD_IPV4LL_EVENT_CONFLICT = 2,
+};
+
+typedef struct sd_ipv4ll sd_ipv4ll;
+typedef void (*sd_ipv4ll_callback_t)(sd_ipv4ll *ll, int event, void *userdata);
+
+int sd_ipv4ll_detach_event(sd_ipv4ll *ll);
+int sd_ipv4ll_attach_event(sd_ipv4ll *ll, sd_event *event, int64_t priority);
+int sd_ipv4ll_get_address(sd_ipv4ll *ll, struct in_addr *address);
+int sd_ipv4ll_set_callback(sd_ipv4ll *ll, sd_ipv4ll_callback_t cb, void *userdata);
+int sd_ipv4ll_set_mac(sd_ipv4ll *ll, const struct ether_addr *addr);
+int sd_ipv4ll_set_ifindex(sd_ipv4ll *ll, int interface_index);
+int sd_ipv4ll_set_address(sd_ipv4ll *ll, const struct in_addr *address);
+int sd_ipv4ll_set_address_seed(sd_ipv4ll *ll, uint64_t seed);
+int sd_ipv4ll_is_running(sd_ipv4ll *ll);
+int sd_ipv4ll_restart(sd_ipv4ll *ll);
+int sd_ipv4ll_start(sd_ipv4ll *ll);
+int sd_ipv4ll_stop(sd_ipv4ll *ll);
+sd_ipv4ll *sd_ipv4ll_ref(sd_ipv4ll *ll);
+sd_ipv4ll *sd_ipv4ll_unref(sd_ipv4ll *ll);
+int sd_ipv4ll_new(sd_ipv4ll **ret);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_ipv4ll, sd_ipv4ll_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-journal.h b/src/systemd/sd-journal.h
new file mode 100644
index 0000000..03696eb
--- /dev/null
+++ b/src/systemd/sd-journal.h
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdjournalhfoo
+#define foosdjournalhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <syslog.h>
+
+#include "sd-id128.h"
+
+#include "_sd-common.h"
+
+/* Journal APIs. See sd-journal(3) for more information. */
+
+_SD_BEGIN_DECLARATIONS;
+
+/* Write to daemon */
+int sd_journal_print(int priority, const char *format, ...) _sd_printf_(2, 3);
+int sd_journal_printv(int priority, const char *format, va_list ap) _sd_printf_(2, 0);
+int sd_journal_send(const char *format, ...) _sd_printf_(1, 0) _sd_sentinel_;
+int sd_journal_sendv(const struct iovec *iov, int n);
+int sd_journal_perror(const char *message);
+
+/* Used by the macros below. You probably don't want to call this directly. */
+int sd_journal_print_with_location(int priority, const char *file, const char *line, const char *func, const char *format, ...) _sd_printf_(5, 6);
+int sd_journal_printv_with_location(int priority, const char *file, const char *line, const char *func, const char *format, va_list ap) _sd_printf_(5, 0);
+int sd_journal_send_with_location(const char *file, const char *line, const char *func, const char *format, ...) _sd_printf_(4, 0) _sd_sentinel_;
+int sd_journal_sendv_with_location(const char *file, const char *line, const char *func, const struct iovec *iov, int n);
+int sd_journal_perror_with_location(const char *file, const char *line, const char *func, const char *message);
+
+/* implicitly add code location to messages sent, if this is enabled */
+#ifndef SD_JOURNAL_SUPPRESS_LOCATION
+
+#define sd_journal_print(priority, ...) sd_journal_print_with_location(priority, "CODE_FILE=" __FILE__, "CODE_LINE=" _SD_STRINGIFY(__LINE__), __func__, __VA_ARGS__)
+#define sd_journal_printv(priority, format, ap) sd_journal_printv_with_location(priority, "CODE_FILE=" __FILE__, "CODE_LINE=" _SD_STRINGIFY(__LINE__), __func__, format, ap)
+#define sd_journal_send(...) sd_journal_send_with_location("CODE_FILE=" __FILE__, "CODE_LINE=" _SD_STRINGIFY(__LINE__), __func__, __VA_ARGS__)
+#define sd_journal_sendv(iovec, n) sd_journal_sendv_with_location("CODE_FILE=" __FILE__, "CODE_LINE=" _SD_STRINGIFY(__LINE__), __func__, iovec, n)
+#define sd_journal_perror(message) sd_journal_perror_with_location("CODE_FILE=" __FILE__, "CODE_LINE=" _SD_STRINGIFY(__LINE__), __func__, message)
+
+#endif
+
+int sd_journal_stream_fd(const char *identifier, int priority, int level_prefix);
+
+/* Browse journal stream */
+
+typedef struct sd_journal sd_journal;
+
+/* Open flags */
+enum {
+ SD_JOURNAL_LOCAL_ONLY = 1 << 0,
+ SD_JOURNAL_RUNTIME_ONLY = 1 << 1,
+ SD_JOURNAL_SYSTEM = 1 << 2,
+ SD_JOURNAL_CURRENT_USER = 1 << 3,
+ SD_JOURNAL_OS_ROOT = 1 << 4,
+ SD_JOURNAL_ALL_NAMESPACES = 1 << 5, /* Show all namespaces, not just the default or specified one */
+ SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE = 1 << 6, /* Show default namespace in addition to specified one */
+
+ SD_JOURNAL_SYSTEM_ONLY _sd_deprecated_ = SD_JOURNAL_SYSTEM /* old name */
+};
+
+/* Wakeup event types */
+enum {
+ SD_JOURNAL_NOP,
+ SD_JOURNAL_APPEND,
+ SD_JOURNAL_INVALIDATE
+};
+
+int sd_journal_open(sd_journal **ret, int flags);
+int sd_journal_open_namespace(sd_journal **ret, const char *name_space, int flags);
+int sd_journal_open_directory(sd_journal **ret, const char *path, int flags);
+int sd_journal_open_directory_fd(sd_journal **ret, int fd, int flags);
+int sd_journal_open_files(sd_journal **ret, const char **paths, int flags);
+int sd_journal_open_files_fd(sd_journal **ret, int fds[], unsigned n_fds, int flags);
+int sd_journal_open_container(sd_journal **ret, const char *machine, int flags) _sd_deprecated_;
+void sd_journal_close(sd_journal *j);
+
+int sd_journal_previous(sd_journal *j);
+int sd_journal_next(sd_journal *j);
+
+int sd_journal_previous_skip(sd_journal *j, uint64_t skip);
+int sd_journal_next_skip(sd_journal *j, uint64_t skip);
+
+int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret);
+int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id);
+
+int sd_journal_set_data_threshold(sd_journal *j, size_t sz);
+int sd_journal_get_data_threshold(sd_journal *j, size_t *sz);
+
+int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *l);
+int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *l);
+int sd_journal_enumerate_available_data(sd_journal *j, const void **data, size_t *l);
+void sd_journal_restart_data(sd_journal *j);
+
+int sd_journal_add_match(sd_journal *j, const void *data, size_t size);
+int sd_journal_add_disjunction(sd_journal *j);
+int sd_journal_add_conjunction(sd_journal *j);
+void sd_journal_flush_matches(sd_journal *j);
+
+int sd_journal_seek_head(sd_journal *j);
+int sd_journal_seek_tail(sd_journal *j);
+int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec);
+int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec);
+int sd_journal_seek_cursor(sd_journal *j, const char *cursor);
+
+int sd_journal_get_cursor(sd_journal *j, char **cursor);
+int sd_journal_test_cursor(sd_journal *j, const char *cursor);
+
+int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to);
+int sd_journal_get_cutoff_monotonic_usec(sd_journal *j, const sd_id128_t boot_id, uint64_t *from, uint64_t *to);
+
+int sd_journal_get_usage(sd_journal *j, uint64_t *bytes);
+
+int sd_journal_query_unique(sd_journal *j, const char *field);
+int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l);
+int sd_journal_enumerate_available_unique(sd_journal *j, const void **data, size_t *l);
+void sd_journal_restart_unique(sd_journal *j);
+
+int sd_journal_enumerate_fields(sd_journal *j, const char **field);
+void sd_journal_restart_fields(sd_journal *j);
+
+int sd_journal_get_fd(sd_journal *j);
+int sd_journal_get_events(sd_journal *j);
+int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec);
+int sd_journal_process(sd_journal *j);
+int sd_journal_wait(sd_journal *j, uint64_t timeout_usec);
+int sd_journal_reliable_fd(sd_journal *j);
+
+int sd_journal_get_catalog(sd_journal *j, char **text);
+int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **text);
+
+int sd_journal_has_runtime_files(sd_journal *j);
+int sd_journal_has_persistent_files(sd_journal *j);
+
+/* The inverse condition avoids ambiguity of dangling 'else' after the macro */
+#define SD_JOURNAL_FOREACH(j) \
+ if (sd_journal_seek_head(j) < 0) { } \
+ else while (sd_journal_next(j) > 0)
+
+/* The inverse condition avoids ambiguity of dangling 'else' after the macro */
+#define SD_JOURNAL_FOREACH_BACKWARDS(j) \
+ if (sd_journal_seek_tail(j) < 0) { } \
+ else while (sd_journal_previous(j) > 0)
+
+/* Iterate through all available data fields of the current journal entry */
+#define SD_JOURNAL_FOREACH_DATA(j, data, l) \
+ for (sd_journal_restart_data(j); sd_journal_enumerate_available_data((j), &(data), &(l)) > 0; )
+
+/* Iterate through all available values of a specific field */
+#define SD_JOURNAL_FOREACH_UNIQUE(j, data, l) \
+ for (sd_journal_restart_unique(j); sd_journal_enumerate_available_unique((j), &(data), &(l)) > 0; )
+
+/* Iterate through all known field names */
+#define SD_JOURNAL_FOREACH_FIELD(j, field) \
+ for (sd_journal_restart_fields(j); sd_journal_enumerate_fields((j), &(field)) > 0; )
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_journal, sd_journal_close);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-lldp.h b/src/systemd/sd-lldp.h
new file mode 100644
index 0000000..f551f6b
--- /dev/null
+++ b/src/systemd/sd-lldp.h
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdlldphfoo
+#define foosdlldphfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <net/ethernet.h>
+#include <sys/types.h>
+
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+/* IEEE 802.1AB-2009 Clause 8: TLV Types */
+enum {
+ SD_LLDP_TYPE_END = 0,
+ SD_LLDP_TYPE_CHASSIS_ID = 1,
+ SD_LLDP_TYPE_PORT_ID = 2,
+ SD_LLDP_TYPE_TTL = 3,
+ SD_LLDP_TYPE_PORT_DESCRIPTION = 4,
+ SD_LLDP_TYPE_SYSTEM_NAME = 5,
+ SD_LLDP_TYPE_SYSTEM_DESCRIPTION = 6,
+ SD_LLDP_TYPE_SYSTEM_CAPABILITIES = 7,
+ SD_LLDP_TYPE_MGMT_ADDRESS = 8,
+ SD_LLDP_TYPE_PRIVATE = 127,
+};
+
+/* IEEE 802.1AB-2009 Clause 8.5.2: Chassis subtypes */
+enum {
+ SD_LLDP_CHASSIS_SUBTYPE_RESERVED = 0,
+ SD_LLDP_CHASSIS_SUBTYPE_CHASSIS_COMPONENT = 1,
+ SD_LLDP_CHASSIS_SUBTYPE_INTERFACE_ALIAS = 2,
+ SD_LLDP_CHASSIS_SUBTYPE_PORT_COMPONENT = 3,
+ SD_LLDP_CHASSIS_SUBTYPE_MAC_ADDRESS = 4,
+ SD_LLDP_CHASSIS_SUBTYPE_NETWORK_ADDRESS = 5,
+ SD_LLDP_CHASSIS_SUBTYPE_INTERFACE_NAME = 6,
+ SD_LLDP_CHASSIS_SUBTYPE_LOCALLY_ASSIGNED = 7,
+};
+
+/* IEEE 802.1AB-2009 Clause 8.5.3: Port subtype */
+enum {
+ SD_LLDP_PORT_SUBTYPE_RESERVED = 0,
+ SD_LLDP_PORT_SUBTYPE_INTERFACE_ALIAS = 1,
+ SD_LLDP_PORT_SUBTYPE_PORT_COMPONENT = 2,
+ SD_LLDP_PORT_SUBTYPE_MAC_ADDRESS = 3,
+ SD_LLDP_PORT_SUBTYPE_NETWORK_ADDRESS = 4,
+ SD_LLDP_PORT_SUBTYPE_INTERFACE_NAME = 5,
+ SD_LLDP_PORT_SUBTYPE_AGENT_CIRCUIT_ID = 6,
+ SD_LLDP_PORT_SUBTYPE_LOCALLY_ASSIGNED = 7,
+};
+
+/* IEEE 802.1AB-2009 Clause 8.5.8: System capabilities */
+enum {
+ SD_LLDP_SYSTEM_CAPABILITIES_OTHER = 1 << 0,
+ SD_LLDP_SYSTEM_CAPABILITIES_REPEATER = 1 << 1,
+ SD_LLDP_SYSTEM_CAPABILITIES_BRIDGE = 1 << 2,
+ SD_LLDP_SYSTEM_CAPABILITIES_WLAN_AP = 1 << 3,
+ SD_LLDP_SYSTEM_CAPABILITIES_ROUTER = 1 << 4,
+ SD_LLDP_SYSTEM_CAPABILITIES_PHONE = 1 << 5,
+ SD_LLDP_SYSTEM_CAPABILITIES_DOCSIS = 1 << 6,
+ SD_LLDP_SYSTEM_CAPABILITIES_STATION = 1 << 7,
+ SD_LLDP_SYSTEM_CAPABILITIES_CVLAN = 1 << 8,
+ SD_LLDP_SYSTEM_CAPABILITIES_SVLAN = 1 << 9,
+ SD_LLDP_SYSTEM_CAPABILITIES_TPMR = 1 << 10,
+};
+
+#define SD_LLDP_SYSTEM_CAPABILITIES_ALL ((uint16_t) -1)
+
+#define SD_LLDP_SYSTEM_CAPABILITIES_ALL_ROUTERS \
+ ((uint16_t) \
+ (SD_LLDP_SYSTEM_CAPABILITIES_REPEATER| \
+ SD_LLDP_SYSTEM_CAPABILITIES_BRIDGE| \
+ SD_LLDP_SYSTEM_CAPABILITIES_WLAN_AP| \
+ SD_LLDP_SYSTEM_CAPABILITIES_ROUTER| \
+ SD_LLDP_SYSTEM_CAPABILITIES_DOCSIS| \
+ SD_LLDP_SYSTEM_CAPABILITIES_CVLAN| \
+ SD_LLDP_SYSTEM_CAPABILITIES_SVLAN| \
+ SD_LLDP_SYSTEM_CAPABILITIES_TPMR))
+
+#define SD_LLDP_OUI_802_1 (uint8_t[]) { 0x00, 0x80, 0xc2 }
+#define SD_LLDP_OUI_802_3 (uint8_t[]) { 0x00, 0x12, 0x0f }
+
+#define SD_LLDP_OUI_MUD (uint8_t[]) { 0x00, 0x00, 0x5E }
+#define SD_LLDP_OUI_SUBTYPE_MUD_USAGE_DESCRIPTION 0x01
+
+/* IEEE 802.1AB-2009 Annex E */
+enum {
+ SD_LLDP_OUI_802_1_SUBTYPE_PORT_VLAN_ID = 1,
+ SD_LLDP_OUI_802_1_SUBTYPE_PORT_PROTOCOL_VLAN_ID = 2,
+ SD_LLDP_OUI_802_1_SUBTYPE_VLAN_NAME = 3,
+ SD_LLDP_OUI_802_1_SUBTYPE_PROTOCOL_IDENTITY = 4,
+ SD_LLDP_OUI_802_1_SUBTYPE_VID_USAGE_DIGEST = 5,
+ SD_LLDP_OUI_802_1_SUBTYPE_MANAGEMENT_VID = 6,
+ SD_LLDP_OUI_802_1_SUBTYPE_LINK_AGGREGATION = 7,
+};
+
+/* IEEE 802.1AB-2009 Annex F */
+enum {
+ SD_LLDP_OUI_802_3_SUBTYPE_MAC_PHY_CONFIG_STATUS = 1,
+ SD_LLDP_OUI_802_3_SUBTYPE_POWER_VIA_MDI = 2,
+ SD_LLDP_OUI_802_3_SUBTYPE_LINK_AGGREGATION = 3,
+ SD_LLDP_OUI_802_3_SUBTYPE_MAXIMUM_FRAME_SIZE = 4,
+};
+
+typedef struct sd_lldp sd_lldp;
+typedef struct sd_lldp_neighbor sd_lldp_neighbor;
+
+typedef enum sd_lldp_event {
+ SD_LLDP_EVENT_ADDED,
+ SD_LLDP_EVENT_REMOVED,
+ SD_LLDP_EVENT_UPDATED,
+ SD_LLDP_EVENT_REFRESHED,
+ _SD_LLDP_EVENT_MAX,
+ _SD_LLDP_EVENT_INVALID = -1,
+} sd_lldp_event;
+
+typedef void (*sd_lldp_callback_t)(sd_lldp *lldp, sd_lldp_event event, sd_lldp_neighbor *n, void *userdata);
+
+int sd_lldp_new(sd_lldp **ret);
+sd_lldp* sd_lldp_ref(sd_lldp *lldp);
+sd_lldp* sd_lldp_unref(sd_lldp *lldp);
+
+int sd_lldp_start(sd_lldp *lldp);
+int sd_lldp_stop(sd_lldp *lldp);
+
+int sd_lldp_attach_event(sd_lldp *lldp, sd_event *event, int64_t priority);
+int sd_lldp_detach_event(sd_lldp *lldp);
+sd_event *sd_lldp_get_event(sd_lldp *lldp);
+
+int sd_lldp_set_callback(sd_lldp *lldp, sd_lldp_callback_t cb, void *userdata);
+int sd_lldp_set_ifindex(sd_lldp *lldp, int ifindex);
+
+/* Controls how much and what to store in the neighbors database */
+int sd_lldp_set_neighbors_max(sd_lldp *lldp, uint64_t n);
+int sd_lldp_match_capabilities(sd_lldp *lldp, uint16_t mask);
+int sd_lldp_set_filter_address(sd_lldp *lldp, const struct ether_addr *address);
+
+int sd_lldp_get_neighbors(sd_lldp *lldp, sd_lldp_neighbor ***neighbors);
+
+int sd_lldp_neighbor_from_raw(sd_lldp_neighbor **ret, const void *raw, size_t raw_size);
+sd_lldp_neighbor *sd_lldp_neighbor_ref(sd_lldp_neighbor *n);
+sd_lldp_neighbor *sd_lldp_neighbor_unref(sd_lldp_neighbor *n);
+
+/* Access to LLDP frame metadata */
+int sd_lldp_neighbor_get_source_address(sd_lldp_neighbor *n, struct ether_addr* address);
+int sd_lldp_neighbor_get_destination_address(sd_lldp_neighbor *n, struct ether_addr* address);
+int sd_lldp_neighbor_get_timestamp(sd_lldp_neighbor *n, clockid_t clock, uint64_t *ret);
+int sd_lldp_neighbor_get_raw(sd_lldp_neighbor *n, const void **ret, size_t *size);
+
+/* High-level, direct, parsed out field access. These fields exist at most once, hence may be queried directly. */
+int sd_lldp_neighbor_get_chassis_id(sd_lldp_neighbor *n, uint8_t *type, const void **ret, size_t *size);
+int sd_lldp_neighbor_get_chassis_id_as_string(sd_lldp_neighbor *n, const char **ret);
+int sd_lldp_neighbor_get_port_id(sd_lldp_neighbor *n, uint8_t *type, const void **ret, size_t *size);
+int sd_lldp_neighbor_get_port_id_as_string(sd_lldp_neighbor *n, const char **ret);
+int sd_lldp_neighbor_get_ttl(sd_lldp_neighbor *n, uint16_t *ret_sec);
+int sd_lldp_neighbor_get_system_name(sd_lldp_neighbor *n, const char **ret);
+int sd_lldp_neighbor_get_system_description(sd_lldp_neighbor *n, const char **ret);
+int sd_lldp_neighbor_get_port_description(sd_lldp_neighbor *n, const char **ret);
+int sd_lldp_neighbor_get_mud_url(sd_lldp_neighbor *n, const char **ret);
+int sd_lldp_neighbor_get_system_capabilities(sd_lldp_neighbor *n, uint16_t *ret);
+int sd_lldp_neighbor_get_enabled_capabilities(sd_lldp_neighbor *n, uint16_t *ret);
+
+/* Low-level, iterative TLV access. This is for everything else, it iteratively goes through all available TLVs
+ * (including the ones covered with the calls above), and allows multiple TLVs for the same fields. */
+int sd_lldp_neighbor_tlv_rewind(sd_lldp_neighbor *n);
+int sd_lldp_neighbor_tlv_next(sd_lldp_neighbor *n);
+int sd_lldp_neighbor_tlv_get_type(sd_lldp_neighbor *n, uint8_t *type);
+int sd_lldp_neighbor_tlv_is_type(sd_lldp_neighbor *n, uint8_t type);
+int sd_lldp_neighbor_tlv_get_oui(sd_lldp_neighbor *n, uint8_t oui[_SD_ARRAY_STATIC 3], uint8_t *subtype);
+int sd_lldp_neighbor_tlv_is_oui(sd_lldp_neighbor *n, const uint8_t oui[_SD_ARRAY_STATIC 3], uint8_t subtype);
+int sd_lldp_neighbor_tlv_get_raw(sd_lldp_neighbor *n, const void **ret, size_t *size);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_lldp, sd_lldp_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_lldp_neighbor, sd_lldp_neighbor_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-login.h b/src/systemd/sd-login.h
new file mode 100644
index 0000000..6958d99
--- /dev/null
+++ b/src/systemd/sd-login.h
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdloginhfoo
+#define foosdloginhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+#include "_sd-common.h"
+
+/*
+ * A few points:
+ *
+ * Instead of returning an empty string array or empty uid array, we
+ * may return NULL.
+ *
+ * Free the data the library returns with libc free(). String arrays
+ * are NULL terminated, and you need to free the array itself, in
+ * addition to the strings contained.
+ *
+ * We return error codes as negative errno, kernel-style. On success, we
+ * return 0 or positive.
+ *
+ * These functions access data in /proc, /sys/fs/cgroup, and /run. All
+ * of these are virtual file systems; therefore, accesses are
+ * relatively cheap.
+ *
+ * See sd-login(3) for more information.
+ */
+
+_SD_BEGIN_DECLARATIONS;
+
+/* Get session from PID. Note that 'shared' processes of a user are
+ * not attached to a session, but only attached to a user. This will
+ * return an error for system processes and 'shared' processes of a
+ * user. */
+int sd_pid_get_session(pid_t pid, char **session);
+
+/* Get UID of the owner of the session of the PID (or in case the
+ * process is a 'shared' user process, the UID of that user is
+ * returned). This will not return the UID of the process, but rather
+ * the UID of the owner of the cgroup that the process is in. This will
+ * return an error for system processes. */
+int sd_pid_get_owner_uid(pid_t pid, uid_t *uid);
+
+/* Get systemd non-slice unit (i.e. service) name from PID, for system
+ * services. This will return an error for non-service processes. */
+int sd_pid_get_unit(pid_t pid, char **unit);
+
+/* Get systemd non-slice unit (i.e. service) name from PID, for user
+ * services. This will return an error for non-user-service
+ * processes. */
+int sd_pid_get_user_unit(pid_t pid, char **unit);
+
+/* Get slice name from PID. */
+int sd_pid_get_slice(pid_t pid, char **slice);
+
+/* Get user slice name from PID. */
+int sd_pid_get_user_slice(pid_t pid, char **slice);
+
+/* Get machine name from PID, for processes assigned to a VM or
+ * container. This will return an error for non-machine processes. */
+int sd_pid_get_machine_name(pid_t pid, char **machine);
+
+/* Get the control group from a PID, relative to the root of the
+ * hierarchy. */
+int sd_pid_get_cgroup(pid_t pid, char **cgroup);
+
+/* Similar to sd_pid_get_session(), but retrieves data about the peer
+ * of a connected AF_UNIX socket */
+int sd_peer_get_session(int fd, char **session);
+
+/* Similar to sd_pid_get_owner_uid(), but retrieves data about the peer of
+ * a connected AF_UNIX socket */
+int sd_peer_get_owner_uid(int fd, uid_t *uid);
+
+/* Similar to sd_pid_get_unit(), but retrieves data about the peer of
+ * a connected AF_UNIX socket */
+int sd_peer_get_unit(int fd, char **unit);
+
+/* Similar to sd_pid_get_user_unit(), but retrieves data about the peer of
+ * a connected AF_UNIX socket */
+int sd_peer_get_user_unit(int fd, char **unit);
+
+/* Similar to sd_pid_get_slice(), but retrieves data about the peer of
+ * a connected AF_UNIX socket */
+int sd_peer_get_slice(int fd, char **slice);
+
+/* Similar to sd_pid_get_user_slice(), but retrieves data about the peer of
+ * a connected AF_UNIX socket */
+int sd_peer_get_user_slice(int fd, char **slice);
+
+/* Similar to sd_pid_get_machine_name(), but retrieves data about the
+ * peer of a connected AF_UNIX socket */
+int sd_peer_get_machine_name(int fd, char **machine);
+
+/* Similar to sd_pid_get_cgroup(), but retrieves data about the peer
+ * of a connected AF_UNIX socket. */
+int sd_peer_get_cgroup(pid_t pid, char **cgroup);
+
+/* Get state from UID. Possible states: offline, lingering, online, active, closing */
+int sd_uid_get_state(uid_t uid, char **state);
+
+/* Return primary session of user, if there is any */
+int sd_uid_get_display(uid_t uid, char **session);
+
+/* Return 1 if UID has session on seat. If require_active is true, this will
+ * look for active sessions only. */
+int sd_uid_is_on_seat(uid_t uid, int require_active, const char *seat);
+
+/* Return sessions of user. If require_active is true, this will look for
+ * active sessions only. Returns the number of sessions.
+ * If sessions is NULL, this will just return the number of sessions. */
+int sd_uid_get_sessions(uid_t uid, int require_active, char ***sessions);
+
+/* Return seats of user is on. If require_active is true, this will look for
+ * active seats only. Returns the number of seats.
+ * If seats is NULL, this will just return the number of seats. */
+int sd_uid_get_seats(uid_t uid, int require_active, char ***seats);
+
+/* Return 1 if the session is active. */
+int sd_session_is_active(const char *session);
+
+/* Return 1 if the session is remote. */
+int sd_session_is_remote(const char *session);
+
+/* Get state from session. Possible states: online, active, closing.
+ * This function is a more generic version of sd_session_is_active(). */
+int sd_session_get_state(const char *session, char **state);
+
+/* Determine user ID of session */
+int sd_session_get_uid(const char *session, uid_t *uid);
+
+/* Determine seat of session */
+int sd_session_get_seat(const char *session, char **seat);
+
+/* Determine the (PAM) service name this session was registered by. */
+int sd_session_get_service(const char *session, char **service);
+
+/* Determine the type of this session, i.e. one of "tty", "x11", "wayland", "mir" or "unspecified". */
+int sd_session_get_type(const char *session, char **type);
+
+/* Determine the class of this session, i.e. one of "user", "greeter" or "lock-screen". */
+int sd_session_get_class(const char *session, char **clazz);
+
+/* Determine the desktop brand of this session, i.e. something like "GNOME", "KDE" or "systemd-console". */
+int sd_session_get_desktop(const char *session, char **desktop);
+
+/* Determine the X11 display of this session. */
+int sd_session_get_display(const char *session, char **display);
+
+/* Determine the remote host of this session. */
+int sd_session_get_remote_host(const char *session, char **remote_host);
+
+/* Determine the remote user of this session (if provided by PAM). */
+int sd_session_get_remote_user(const char *session, char **remote_user);
+
+/* Determine the TTY of this session. */
+int sd_session_get_tty(const char *session, char **display);
+
+/* Determine the VT number of this session. */
+int sd_session_get_vt(const char *session, unsigned *vtnr);
+
+/* Return active session and user of seat */
+int sd_seat_get_active(const char *seat, char **session, uid_t *uid);
+
+/* Return sessions and users on seat. Returns number of sessions.
+ * If sessions is NULL, this returns only the number of sessions. */
+int sd_seat_get_sessions(
+ const char *seat,
+ char ***ret_sessions,
+ uid_t **ret_uids,
+ unsigned *ret_n_uids);
+
+/* Return whether the seat is multi-session capable */
+int sd_seat_can_multi_session(const char *seat) _sd_deprecated_;
+
+/* Return whether the seat is TTY capable, i.e. suitable for showing console UIs */
+int sd_seat_can_tty(const char *seat);
+
+/* Return whether the seat is graphics capable, i.e. suitable for showing graphical UIs */
+int sd_seat_can_graphical(const char *seat);
+
+/* Return the class of machine */
+int sd_machine_get_class(const char *machine, char **clazz);
+
+/* Return the list if host-side network interface indices of a machine */
+int sd_machine_get_ifindices(const char *machine, int **ret_ifindices);
+
+/* Get all seats, store in *seats. Returns the number of seats. If
+ * seats is NULL, this only returns the number of seats. */
+int sd_get_seats(char ***seats);
+
+/* Get all sessions, store in *sessions. Returns the number of
+ * sessions. If sessions is NULL, this only returns the number of sessions. */
+int sd_get_sessions(char ***sessions);
+
+/* Get all logged in users, store in *users. Returns the number of
+ * users. If users is NULL, this only returns the number of users. */
+int sd_get_uids(uid_t **users);
+
+/* Get all running virtual machines/containers */
+int sd_get_machine_names(char ***machines);
+
+/* Monitor object */
+typedef struct sd_login_monitor sd_login_monitor;
+
+/* Create a new monitor. Category must be NULL, "seat", "session",
+ * "uid", or "machine" to get monitor events for the specific category
+ * (or all). */
+int sd_login_monitor_new(const char *category, sd_login_monitor** ret);
+
+/* Destroys the passed monitor. Returns NULL. */
+sd_login_monitor* sd_login_monitor_unref(sd_login_monitor *m);
+
+/* Flushes the monitor */
+int sd_login_monitor_flush(sd_login_monitor *m);
+
+/* Get FD from monitor */
+int sd_login_monitor_get_fd(sd_login_monitor *m);
+
+/* Get poll() mask to monitor */
+int sd_login_monitor_get_events(sd_login_monitor *m);
+
+/* Get timeout for poll(), as usec value relative to CLOCK_MONOTONIC's epoch */
+int sd_login_monitor_get_timeout(sd_login_monitor *m, uint64_t *timeout_usec);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_login_monitor, sd_login_monitor_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-messages.h b/src/systemd/sd-messages.h
new file mode 100644
index 0000000..97ba02f
--- /dev/null
+++ b/src/systemd/sd-messages.h
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdmessageshfoo
+#define foosdmessageshfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "sd-id128.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+/* Hey! If you add a new message here, you *must* also update the message catalog with an appropriate explanation */
+
+/* And if you add a new ID here, make sure to generate a random one with "systemd-id128 new". Do not use any other IDs,
+ * and do not count them up manually. */
+
+#define SD_MESSAGE_JOURNAL_START SD_ID128_MAKE(f7,73,79,a8,49,0b,40,8b,be,5f,69,40,50,5a,77,7b)
+#define SD_MESSAGE_JOURNAL_START_STR SD_ID128_MAKE_STR(f7,73,79,a8,49,0b,40,8b,be,5f,69,40,50,5a,77,7b)
+#define SD_MESSAGE_JOURNAL_STOP SD_ID128_MAKE(d9,3f,b3,c9,c2,4d,45,1a,97,ce,a6,15,ce,59,c0,0b)
+#define SD_MESSAGE_JOURNAL_STOP_STR SD_ID128_MAKE_STR(d9,3f,b3,c9,c2,4d,45,1a,97,ce,a6,15,ce,59,c0,0b)
+#define SD_MESSAGE_JOURNAL_DROPPED SD_ID128_MAKE(a5,96,d6,fe,7b,fa,49,94,82,8e,72,30,9e,95,d6,1e)
+#define SD_MESSAGE_JOURNAL_DROPPED_STR SD_ID128_MAKE_STR(a5,96,d6,fe,7b,fa,49,94,82,8e,72,30,9e,95,d6,1e)
+#define SD_MESSAGE_JOURNAL_MISSED SD_ID128_MAKE(e9,bf,28,e6,e8,34,48,1b,b6,f4,8f,54,8a,d1,36,06)
+#define SD_MESSAGE_JOURNAL_MISSED_STR SD_ID128_MAKE_STR(e9,bf,28,e6,e8,34,48,1b,b6,f4,8f,54,8a,d1,36,06)
+#define SD_MESSAGE_JOURNAL_USAGE SD_ID128_MAKE(ec,38,7f,57,7b,84,4b,8f,a9,48,f3,3c,ad,9a,75,e6)
+#define SD_MESSAGE_JOURNAL_USAGE_STR SD_ID128_MAKE_STR(ec,38,7f,57,7b,84,4b,8f,a9,48,f3,3c,ad,9a,75,e6)
+
+#define SD_MESSAGE_COREDUMP SD_ID128_MAKE(fc,2e,22,bc,6e,e6,47,b6,b9,07,29,ab,34,a2,50,b1)
+#define SD_MESSAGE_COREDUMP_STR SD_ID128_MAKE_STR(fc,2e,22,bc,6e,e6,47,b6,b9,07,29,ab,34,a2,50,b1)
+#define SD_MESSAGE_TRUNCATED_CORE SD_ID128_MAKE(5a,ad,d8,e9,54,dc,4b,1a,8c,95,4d,63,fd,9e,11,37)
+#define SD_MESSAGE_TRUNCATED_CORE_STR SD_ID128_MAKE_STR(5a,ad,d8,e9,54,dc,4b,1a,8c,95,4d,63,fd,9e,11,37)
+#define SD_MESSAGE_BACKTRACE SD_ID128_MAKE(1f,4e,0a,44,a8,86,49,93,9a,ae,a3,4f,c6,da,8c,95)
+#define SD_MESSAGE_BACKTRACE_STR SD_ID128_MAKE_STR(1f,4e,0a,44,a8,86,49,93,9a,ae,a3,4f,c6,da,8c,95)
+
+#define SD_MESSAGE_SESSION_START SD_ID128_MAKE(8d,45,62,0c,1a,43,48,db,b1,74,10,da,57,c6,0c,66)
+#define SD_MESSAGE_SESSION_START_STR SD_ID128_MAKE_STR(8d,45,62,0c,1a,43,48,db,b1,74,10,da,57,c6,0c,66)
+#define SD_MESSAGE_SESSION_STOP SD_ID128_MAKE(33,54,93,94,24,b4,45,6d,98,02,ca,83,33,ed,42,4a)
+#define SD_MESSAGE_SESSION_STOP_STR SD_ID128_MAKE_STR(33,54,93,94,24,b4,45,6d,98,02,ca,83,33,ed,42,4a)
+#define SD_MESSAGE_SEAT_START SD_ID128_MAKE(fc,be,fc,5d,a2,3d,42,80,93,f9,7c,82,a9,29,0f,7b)
+#define SD_MESSAGE_SEAT_START_STR SD_ID128_MAKE_STR(fc,be,fc,5d,a2,3d,42,80,93,f9,7c,82,a9,29,0f,7b)
+#define SD_MESSAGE_SEAT_STOP SD_ID128_MAKE(e7,85,2b,fe,46,78,4e,d0,ac,cd,e0,4b,c8,64,c2,d5)
+#define SD_MESSAGE_SEAT_STOP_STR SD_ID128_MAKE_STR(e7,85,2b,fe,46,78,4e,d0,ac,cd,e0,4b,c8,64,c2,d5)
+#define SD_MESSAGE_MACHINE_START SD_ID128_MAKE(24,d8,d4,45,25,73,40,24,96,06,83,81,a6,31,2d,f2)
+#define SD_MESSAGE_MACHINE_START_STR SD_ID128_MAKE_STR(24,d8,d4,45,25,73,40,24,96,06,83,81,a6,31,2d,f2)
+#define SD_MESSAGE_MACHINE_STOP SD_ID128_MAKE(58,43,2b,d3,ba,ce,47,7c,b5,14,b5,63,81,b8,a7,58)
+#define SD_MESSAGE_MACHINE_STOP_STR SD_ID128_MAKE_STR(58,43,2b,d3,ba,ce,47,7c,b5,14,b5,63,81,b8,a7,58)
+
+#define SD_MESSAGE_TIME_CHANGE SD_ID128_MAKE(c7,a7,87,07,9b,35,4e,aa,a9,e7,7b,37,18,93,cd,27)
+#define SD_MESSAGE_TIME_CHANGE_STR SD_ID128_MAKE_STR(c7,a7,87,07,9b,35,4e,aa,a9,e7,7b,37,18,93,cd,27)
+#define SD_MESSAGE_TIMEZONE_CHANGE SD_ID128_MAKE(45,f8,2f,4a,ef,7a,4b,bf,94,2c,e8,61,d1,f2,09,90)
+#define SD_MESSAGE_TIMEZONE_CHANGE_STR SD_ID128_MAKE_STR(45,f8,2f,4a,ef,7a,4b,bf,94,2c,e8,61,d1,f2,09,90)
+
+#define SD_MESSAGE_TAINTED SD_ID128_MAKE(50,87,6a,9d,b0,0f,4c,40,bd,e1,a2,ad,38,1c,3a,1b)
+#define SD_MESSAGE_TAINTED_STR SD_ID128_MAKE_STR(50,87,6a,9d,b0,0f,4c,40,bd,e1,a2,ad,38,1c,3a,1b)
+#define SD_MESSAGE_STARTUP_FINISHED SD_ID128_MAKE(b0,7a,24,9c,d0,24,41,4a,82,dd,00,cd,18,13,78,ff)
+#define SD_MESSAGE_STARTUP_FINISHED_STR SD_ID128_MAKE_STR(b0,7a,24,9c,d0,24,41,4a,82,dd,00,cd,18,13,78,ff)
+#define SD_MESSAGE_USER_STARTUP_FINISHED \
+ SD_ID128_MAKE(ee,d0,0a,68,ff,d8,4e,31,88,21,05,fd,97,3a,bd,d1)
+#define SD_MESSAGE_USER_STARTUP_FINISHED_STR \
+ SD_ID128_MAKE_STR(ee,d0,0a,68,ff,d8,4e,31,88,21,05,fd,97,3a,bd,d1)
+
+#define SD_MESSAGE_SLEEP_START SD_ID128_MAKE(6b,bd,95,ee,97,79,41,e4,97,c4,8b,e2,7c,25,41,28)
+#define SD_MESSAGE_SLEEP_START_STR SD_ID128_MAKE_STR(6b,bd,95,ee,97,79,41,e4,97,c4,8b,e2,7c,25,41,28)
+#define SD_MESSAGE_SLEEP_STOP SD_ID128_MAKE(88,11,e6,df,2a,8e,40,f5,8a,94,ce,a2,6f,8e,bf,14)
+#define SD_MESSAGE_SLEEP_STOP_STR SD_ID128_MAKE_STR(88,11,e6,df,2a,8e,40,f5,8a,94,ce,a2,6f,8e,bf,14)
+
+#define SD_MESSAGE_SHUTDOWN SD_ID128_MAKE(98,26,88,66,d1,d5,4a,49,9c,4e,98,92,1d,93,bc,40)
+#define SD_MESSAGE_SHUTDOWN_STR SD_ID128_MAKE_STR(98,26,88,66,d1,d5,4a,49,9c,4e,98,92,1d,93,bc,40)
+
+/* The messages below are actually about jobs, not really about units, the macros are misleadingly named. Moreover
+ * SD_MESSAGE_UNIT_FAILED is not actually about a failing unit but about a failed start job. A job either finishes with
+ * SD_MESSAGE_UNIT_STARTED or with SD_MESSAGE_UNIT_FAILED hence. */
+#define SD_MESSAGE_UNIT_STARTING SD_ID128_MAKE(7d,49,58,e8,42,da,4a,75,8f,6c,1c,dc,7b,36,dc,c5)
+#define SD_MESSAGE_UNIT_STARTING_STR SD_ID128_MAKE_STR(7d,49,58,e8,42,da,4a,75,8f,6c,1c,dc,7b,36,dc,c5)
+#define SD_MESSAGE_UNIT_STARTED SD_ID128_MAKE(39,f5,34,79,d3,a0,45,ac,8e,11,78,62,48,23,1f,bf)
+#define SD_MESSAGE_UNIT_STARTED_STR SD_ID128_MAKE_STR(39,f5,34,79,d3,a0,45,ac,8e,11,78,62,48,23,1f,bf)
+#define SD_MESSAGE_UNIT_FAILED SD_ID128_MAKE(be,02,cf,68,55,d2,42,8b,a4,0d,f7,e9,d0,22,f0,3d)
+#define SD_MESSAGE_UNIT_FAILED_STR SD_ID128_MAKE_STR(be,02,cf,68,55,d2,42,8b,a4,0d,f7,e9,d0,22,f0,3d)
+#define SD_MESSAGE_UNIT_STOPPING SD_ID128_MAKE(de,5b,42,6a,63,be,47,a7,b6,ac,3e,aa,c8,2e,2f,6f)
+#define SD_MESSAGE_UNIT_STOPPING_STR SD_ID128_MAKE_STR(de,5b,42,6a,63,be,47,a7,b6,ac,3e,aa,c8,2e,2f,6f)
+#define SD_MESSAGE_UNIT_STOPPED SD_ID128_MAKE(9d,1a,aa,27,d6,01,40,bd,96,36,54,38,aa,d2,02,86)
+#define SD_MESSAGE_UNIT_STOPPED_STR SD_ID128_MAKE_STR(9d,1a,aa,27,d6,01,40,bd,96,36,54,38,aa,d2,02,86)
+#define SD_MESSAGE_UNIT_RELOADING SD_ID128_MAKE(d3,4d,03,7f,ff,18,47,e6,ae,66,9a,37,0e,69,47,25)
+#define SD_MESSAGE_UNIT_RELOADING_STR SD_ID128_MAKE_STR(d3,4d,03,7f,ff,18,47,e6,ae,66,9a,37,0e,69,47,25)
+#define SD_MESSAGE_UNIT_RELOADED SD_ID128_MAKE(7b,05,eb,c6,68,38,42,22,ba,a8,88,11,79,cf,da,54)
+#define SD_MESSAGE_UNIT_RELOADED_STR SD_ID128_MAKE_STR(7b,05,eb,c6,68,38,42,22,ba,a8,88,11,79,cf,da,54)
+
+#define SD_MESSAGE_UNIT_RESTART_SCHEDULED SD_ID128_MAKE(5e,b0,34,94,b6,58,48,70,a5,36,b3,37,29,08,09,b3)
+#define SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR \
+ SD_ID128_MAKE_STR(5e,b0,34,94,b6,58,48,70,a5,36,b3,37,29,08,09,b3)
+
+#define SD_MESSAGE_UNIT_RESOURCES SD_ID128_MAKE(ae,8f,7b,86,6b,03,47,b9,af,31,fe,1c,80,b1,27,c0)
+#define SD_MESSAGE_UNIT_RESOURCES_STR SD_ID128_MAKE_STR(ae,8f,7b,86,6b,03,47,b9,af,31,fe,1c,80,b1,27,c0)
+
+#define SD_MESSAGE_UNIT_SUCCESS SD_ID128_MAKE(7a,d2,d1,89,f7,e9,4e,70,a3,8c,78,13,54,91,24,48)
+#define SD_MESSAGE_UNIT_SUCCESS_STR SD_ID128_MAKE_STR(7a,d2,d1,89,f7,e9,4e,70,a3,8c,78,13,54,91,24,48)
+#define SD_MESSAGE_UNIT_SKIPPED SD_ID128_MAKE(0e,42,84,a0,ca,ca,4b,fc,81,c0,bb,67,86,97,26,73)
+#define SD_MESSAGE_UNIT_SKIPPED_STR SD_ID128_MAKE_STR(0e,42,84,a0,ca,ca,4b,fc,81,c0,bb,67,86,97,26,73)
+#define SD_MESSAGE_UNIT_FAILURE_RESULT SD_ID128_MAKE(d9,b3,73,ed,55,a6,4f,eb,82,42,e0,2d,be,79,a4,9c)
+#define SD_MESSAGE_UNIT_FAILURE_RESULT_STR \
+ SD_ID128_MAKE_STR(d9,b3,73,ed,55,a6,4f,eb,82,42,e0,2d,be,79,a4,9c)
+
+#define SD_MESSAGE_SPAWN_FAILED SD_ID128_MAKE(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7)
+#define SD_MESSAGE_SPAWN_FAILED_STR SD_ID128_MAKE_STR(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7)
+
+#define SD_MESSAGE_UNIT_PROCESS_EXIT SD_ID128_MAKE(98,e3,22,20,3f,7a,4e,d2,90,d0,9f,e0,3c,09,fe,15)
+#define SD_MESSAGE_UNIT_PROCESS_EXIT_STR SD_ID128_MAKE_STR(98,e3,22,20,3f,7a,4e,d2,90,d0,9f,e0,3c,09,fe,15)
+
+#define SD_MESSAGE_FORWARD_SYSLOG_MISSED SD_ID128_MAKE(00,27,22,9c,a0,64,41,81,a7,6c,4e,92,45,8a,fa,2e)
+#define SD_MESSAGE_FORWARD_SYSLOG_MISSED_STR \
+ SD_ID128_MAKE_STR(00,27,22,9c,a0,64,41,81,a7,6c,4e,92,45,8a,fa,2e)
+
+#define SD_MESSAGE_OVERMOUNTING SD_ID128_MAKE(1d,ee,03,69,c7,fc,47,36,b7,09,9b,38,ec,b4,6e,e7)
+#define SD_MESSAGE_OVERMOUNTING_STR SD_ID128_MAKE_STR(1d,ee,03,69,c7,fc,47,36,b7,09,9b,38,ec,b4,6e,e7)
+
+#define SD_MESSAGE_UNIT_OOMD_KILL SD_ID128_MAKE(d9,89,61,1b,15,e4,4c,9d,bf,31,e3,c8,12,56,e4,ed)
+#define SD_MESSAGE_UNIT_OOMD_KILL_STR SD_ID128_MAKE_STR(d9,89,61,1b,15,e4,4c,9d,bf,31,e3,c8,12,56,e4,ed)
+
+#define SD_MESSAGE_UNIT_OUT_OF_MEMORY SD_ID128_MAKE(fe,6f,aa,94,e7,77,46,63,a0,da,52,71,78,91,d8,ef)
+#define SD_MESSAGE_UNIT_OUT_OF_MEMORY_STR SD_ID128_MAKE_STR(fe,6f,aa,94,e7,77,46,63,a0,da,52,71,78,91,d8,ef)
+
+#define SD_MESSAGE_LID_OPENED SD_ID128_MAKE(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,6f)
+#define SD_MESSAGE_LID_OPENED_STR SD_ID128_MAKE_STR(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,6f)
+#define SD_MESSAGE_LID_CLOSED SD_ID128_MAKE(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,70)
+#define SD_MESSAGE_LID_CLOSED_STR SD_ID128_MAKE_STR(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,70)
+#define SD_MESSAGE_SYSTEM_DOCKED SD_ID128_MAKE(f5,f4,16,b8,62,07,4b,28,92,7a,48,c3,ba,7d,51,ff)
+#define SD_MESSAGE_SYSTEM_DOCKED_STR SD_ID128_MAKE_STR(f5,f4,16,b8,62,07,4b,28,92,7a,48,c3,ba,7d,51,ff)
+#define SD_MESSAGE_SYSTEM_UNDOCKED SD_ID128_MAKE(51,e1,71,bd,58,52,48,56,81,10,14,4c,51,7c,ca,53)
+#define SD_MESSAGE_SYSTEM_UNDOCKED_STR SD_ID128_MAKE_STR(51,e1,71,bd,58,52,48,56,81,10,14,4c,51,7c,ca,53)
+#define SD_MESSAGE_POWER_KEY SD_ID128_MAKE(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,71)
+#define SD_MESSAGE_POWER_KEY_STR SD_ID128_MAKE_STR(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,71)
+#define SD_MESSAGE_REBOOT_KEY SD_ID128_MAKE(9f,a9,d2,c0,12,13,4e,c3,85,45,1f,fe,31,6f,97,d0)
+#define SD_MESSAGE_REBOOT_KEY_STR SD_ID128_MAKE_STR(9f,a9,d2,c0,12,13,4e,c3,85,45,1f,fe,31,6f,97,d0)
+#define SD_MESSAGE_SUSPEND_KEY SD_ID128_MAKE(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,72)
+#define SD_MESSAGE_SUSPEND_KEY_STR SD_ID128_MAKE_STR(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,72)
+#define SD_MESSAGE_HIBERNATE_KEY SD_ID128_MAKE(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,73)
+#define SD_MESSAGE_HIBERNATE_KEY_STR SD_ID128_MAKE_STR(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,73)
+
+#define SD_MESSAGE_INVALID_CONFIGURATION SD_ID128_MAKE(c7,72,d2,4e,9a,88,4c,be,b9,ea,12,62,5c,30,6c,01)
+#define SD_MESSAGE_INVALID_CONFIGURATION_STR \
+ SD_ID128_MAKE_STR(c7,72,d2,4e,9a,88,4c,be,b9,ea,12,62,5c,30,6c,01)
+
+#define SD_MESSAGE_DNSSEC_FAILURE SD_ID128_MAKE(16,75,d7,f1,72,17,40,98,b1,10,8b,f8,c7,dc,8f,5d)
+#define SD_MESSAGE_DNSSEC_FAILURE_STR SD_ID128_MAKE_STR(16,75,d7,f1,72,17,40,98,b1,10,8b,f8,c7,dc,8f,5d)
+#define SD_MESSAGE_DNSSEC_TRUST_ANCHOR_REVOKED \
+ SD_ID128_MAKE(4d,44,08,cf,d0,d1,44,85,91,84,d1,e6,5d,7c,8a,65)
+#define SD_MESSAGE_DNSSEC_TRUST_ANCHOR_REVOKED_STR \
+ SD_ID128_MAKE_STR(4d,44,08,cf,d0,d1,44,85,91,84,d1,e6,5d,7c,8a,65)
+#define SD_MESSAGE_DNSSEC_DOWNGRADE SD_ID128_MAKE(36,db,2d,fa,5a,90,45,e1,bd,4a,f5,f9,3e,1c,f0,57)
+#define SD_MESSAGE_DNSSEC_DOWNGRADE_STR SD_ID128_MAKE_STR(36,db,2d,fa,5a,90,45,e1,bd,4a,f5,f9,3e,1c,f0,57)
+
+#define SD_MESSAGE_UNSAFE_USER_NAME SD_ID128_MAKE(b6,1f,da,c6,12,e9,4b,91,82,28,5b,99,88,43,06,1f)
+#define SD_MESSAGE_UNSAFE_USER_NAME_STR SD_ID128_MAKE_STR(b6,1f,da,c6,12,e9,4b,91,82,28,5b,99,88,43,06,1f)
+
+#define SD_MESSAGE_MOUNT_POINT_PATH_NOT_SUITABLE \
+ SD_ID128_MAKE(1b,3b,b9,40,37,f0,4b,bf,81,02,8e,13,5a,12,d2,93)
+#define SD_MESSAGE_MOUNT_POINT_PATH_NOT_SUITABLE_STR \
+ SD_ID128_MAKE_STR(1b,3b,b9,40,37,f0,4b,bf,81,02,8e,13,5a,12,d2,93)
+
+#define SD_MESSAGE_NOBODY_USER_UNSUITABLE SD_ID128_MAKE(b4,80,32,5f,9c,39,4a,7b,80,2c,23,1e,51,a2,75,2c)
+#define SD_MESSAGE_NOBODY_USER_UNSUITABLE_STR \
+ SD_ID128_MAKE_STR(b4,80,32,5f,9c,39,4a,7b,80,2c,23,1e,51,a2,75,2c)
+
+#define SD_MESSAGE_SYSTEMD_UDEV_SETTLE_DEPRECATED \
+ SD_ID128_MAKE(1c,04,54,c1,bd,22,41,e0,ac,6f,ef,b4,bc,63,14,33)
+#define SD_MESSAGE_SYSTEMD_UDEV_SETTLE_DEPRECATED_STR \
+ SD_ID128_MAKE_STR(1c,04,54,c1,bd,22,41,e0,ac,6f,ef,b4,bc,63,14,33)
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-ndisc.h b/src/systemd/sd-ndisc.h
new file mode 100644
index 0000000..c0e3789
--- /dev/null
+++ b/src/systemd/sd-ndisc.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdndiscfoo
+#define foosdndiscfoo
+
+/***
+ Copyright © 2014 Intel Corporation. All rights reserved.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <net/ethernet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+/* Neighbor Discovery Options, RFC 4861, Section 4.6 and
+ * https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xhtml#icmpv6-parameters-5 */
+enum {
+ SD_NDISC_OPTION_SOURCE_LL_ADDRESS = 1,
+ SD_NDISC_OPTION_TARGET_LL_ADDRESS = 2,
+ SD_NDISC_OPTION_PREFIX_INFORMATION = 3,
+ SD_NDISC_OPTION_MTU = 5,
+ SD_NDISC_OPTION_ROUTE_INFORMATION = 24,
+ SD_NDISC_OPTION_RDNSS = 25,
+ SD_NDISC_OPTION_FLAGS_EXTENSION = 26,
+ SD_NDISC_OPTION_DNSSL = 31,
+ SD_NDISC_OPTION_CAPTIVE_PORTAL = 37,
+};
+
+/* Route preference, RFC 4191, Section 2.1 */
+enum {
+ SD_NDISC_PREFERENCE_LOW = 3U,
+ SD_NDISC_PREFERENCE_MEDIUM = 0U,
+ SD_NDISC_PREFERENCE_HIGH = 1U,
+};
+
+typedef struct sd_ndisc sd_ndisc;
+typedef struct sd_ndisc_router sd_ndisc_router;
+
+typedef enum sd_ndisc_event {
+ SD_NDISC_EVENT_TIMEOUT,
+ SD_NDISC_EVENT_ROUTER,
+ _SD_NDISC_EVENT_MAX,
+ _SD_NDISC_EVENT_INVALID = -1,
+} sd_ndisc_event;
+
+typedef void (*sd_ndisc_callback_t)(sd_ndisc *nd, sd_ndisc_event event, sd_ndisc_router *rt, void *userdata);
+
+int sd_ndisc_new(sd_ndisc **ret);
+sd_ndisc *sd_ndisc_ref(sd_ndisc *nd);
+sd_ndisc *sd_ndisc_unref(sd_ndisc *nd);
+
+int sd_ndisc_start(sd_ndisc *nd);
+int sd_ndisc_stop(sd_ndisc *nd);
+
+int sd_ndisc_attach_event(sd_ndisc *nd, sd_event *event, int64_t priority);
+int sd_ndisc_detach_event(sd_ndisc *nd);
+sd_event *sd_ndisc_get_event(sd_ndisc *nd);
+
+int sd_ndisc_set_callback(sd_ndisc *nd, sd_ndisc_callback_t cb, void *userdata);
+int sd_ndisc_set_ifindex(sd_ndisc *nd, int interface_index);
+int sd_ndisc_set_mac(sd_ndisc *nd, const struct ether_addr *mac_addr);
+
+int sd_ndisc_get_mtu(sd_ndisc *nd, uint32_t *ret);
+int sd_ndisc_get_hop_limit(sd_ndisc *nd, uint8_t *ret);
+
+int sd_ndisc_router_from_raw(sd_ndisc_router **ret, const void *raw, size_t raw_size);
+sd_ndisc_router *sd_ndisc_router_ref(sd_ndisc_router *rt);
+sd_ndisc_router *sd_ndisc_router_unref(sd_ndisc_router *rt);
+
+int sd_ndisc_router_get_address(sd_ndisc_router *rt, struct in6_addr *ret_addr);
+int sd_ndisc_router_get_timestamp(sd_ndisc_router *rt, clockid_t clock, uint64_t *ret);
+int sd_ndisc_router_get_raw(sd_ndisc_router *rt, const void **ret, size_t *size);
+
+int sd_ndisc_router_get_hop_limit(sd_ndisc_router *rt, uint8_t *ret);
+int sd_ndisc_router_get_flags(sd_ndisc_router *rt, uint64_t *ret_flags);
+int sd_ndisc_router_get_preference(sd_ndisc_router *rt, unsigned *ret);
+int sd_ndisc_router_get_lifetime(sd_ndisc_router *rt, uint16_t *ret_lifetime);
+int sd_ndisc_router_get_mtu(sd_ndisc_router *rt, uint32_t *ret);
+
+/* Generic option access */
+int sd_ndisc_router_option_rewind(sd_ndisc_router *rt);
+int sd_ndisc_router_option_next(sd_ndisc_router *rt);
+int sd_ndisc_router_option_get_type(sd_ndisc_router *rt, uint8_t *ret);
+int sd_ndisc_router_option_is_type(sd_ndisc_router *rt, uint8_t type);
+int sd_ndisc_router_option_get_raw(sd_ndisc_router *rt, const void **ret, size_t *size);
+
+/* Specific option access: SD_NDISC_OPTION_PREFIX_INFORMATION */
+int sd_ndisc_router_prefix_get_valid_lifetime(sd_ndisc_router *rt, uint32_t *ret);
+int sd_ndisc_router_prefix_get_preferred_lifetime(sd_ndisc_router *rt, uint32_t *ret);
+int sd_ndisc_router_prefix_get_flags(sd_ndisc_router *rt, uint8_t *ret);
+int sd_ndisc_router_prefix_get_address(sd_ndisc_router *rt, struct in6_addr *ret_addr);
+int sd_ndisc_router_prefix_get_prefixlen(sd_ndisc_router *rt, unsigned *prefixlen);
+
+/* Specific option access: SD_NDISC_OPTION_ROUTE_INFORMATION */
+int sd_ndisc_router_route_get_lifetime(sd_ndisc_router *rt, uint32_t *ret);
+int sd_ndisc_router_route_get_address(sd_ndisc_router *rt, struct in6_addr *ret_addr);
+int sd_ndisc_router_route_get_prefixlen(sd_ndisc_router *rt, unsigned *prefixlen);
+int sd_ndisc_router_route_get_preference(sd_ndisc_router *rt, unsigned *ret);
+
+/* Specific option access: SD_NDISC_OPTION_RDNSS */
+int sd_ndisc_router_rdnss_get_addresses(sd_ndisc_router *rt, const struct in6_addr **ret);
+int sd_ndisc_router_rdnss_get_lifetime(sd_ndisc_router *rt, uint32_t *ret);
+
+/* Specific option access: SD_NDISC_OPTION_DNSSL */
+int sd_ndisc_router_dnssl_get_domains(sd_ndisc_router *rt, char ***ret);
+int sd_ndisc_router_dnssl_get_lifetime(sd_ndisc_router *rt, uint32_t *ret);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_ndisc, sd_ndisc_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_ndisc_router, sd_ndisc_router_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-netlink.h b/src/systemd/sd-netlink.h
new file mode 100644
index 0000000..bf6d1e4
--- /dev/null
+++ b/src/systemd/sd-netlink.h
@@ -0,0 +1,247 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdnetlinkhfoo
+#define foosdnetlinkhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <net/ethernet.h>
+#include <netinet/in.h>
+#include <linux/neighbour.h>
+#include <linux/rtnetlink.h>
+
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+typedef struct sd_netlink sd_netlink;
+typedef struct sd_genl_socket sd_genl_socket;
+typedef struct sd_netlink_message sd_netlink_message;
+typedef struct sd_netlink_slot sd_netlink_slot;
+
+typedef enum sd_gen_family {
+ SD_GENL_ERROR,
+ SD_GENL_DONE,
+ SD_GENL_ID_CTRL,
+ SD_GENL_WIREGUARD,
+ SD_GENL_FOU,
+ SD_GENL_L2TP,
+ SD_GENL_MACSEC,
+ SD_GENL_NL80211,
+} sd_genl_family;
+
+/* callback */
+
+typedef int (*sd_netlink_message_handler_t)(sd_netlink *nl, sd_netlink_message *m, void *userdata);
+typedef _sd_destroy_t sd_netlink_destroy_t;
+
+/* bus */
+int sd_netlink_new_from_netlink(sd_netlink **nl, int fd);
+int sd_netlink_open(sd_netlink **nl);
+int sd_netlink_open_fd(sd_netlink **nl, int fd);
+int sd_netlink_inc_rcvbuf(sd_netlink *nl, const size_t size);
+
+sd_netlink *sd_netlink_ref(sd_netlink *nl);
+sd_netlink *sd_netlink_unref(sd_netlink *nl);
+
+int sd_netlink_send(sd_netlink *nl, sd_netlink_message *message, uint32_t *serial);
+int sd_netlink_call_async(sd_netlink *nl, sd_netlink_slot **ret_slot, sd_netlink_message *message,
+ sd_netlink_message_handler_t callback, sd_netlink_destroy_t destoy_callback,
+ void *userdata, uint64_t usec, const char *description);
+int sd_netlink_call(sd_netlink *nl, sd_netlink_message *message, uint64_t timeout,
+ sd_netlink_message **reply);
+
+int sd_netlink_get_events(const sd_netlink *nl);
+int sd_netlink_get_timeout(const sd_netlink *nl, uint64_t *timeout);
+int sd_netlink_process(sd_netlink *nl, sd_netlink_message **ret);
+int sd_netlink_wait(sd_netlink *nl, uint64_t timeout);
+
+int sd_netlink_add_match(sd_netlink *nl, sd_netlink_slot **ret_slot, uint16_t match,
+ sd_netlink_message_handler_t callback,
+ sd_netlink_destroy_t destroy_callback,
+ void *userdata, const char *description);
+
+int sd_netlink_attach_event(sd_netlink *nl, sd_event *e, int64_t priority);
+int sd_netlink_detach_event(sd_netlink *nl);
+
+int sd_netlink_message_append_string(sd_netlink_message *m, unsigned short type, const char *data);
+int sd_netlink_message_append_strv(sd_netlink_message *m, unsigned short type, char * const *data);
+int sd_netlink_message_append_flag(sd_netlink_message *m, unsigned short type);
+int sd_netlink_message_append_u8(sd_netlink_message *m, unsigned short type, uint8_t data);
+int sd_netlink_message_append_u16(sd_netlink_message *m, unsigned short type, uint16_t data);
+int sd_netlink_message_append_u32(sd_netlink_message *m, unsigned short type, uint32_t data);
+int sd_netlink_message_append_u64(sd_netlink_message *m, unsigned short type, uint64_t data);
+int sd_netlink_message_append_s8(sd_netlink_message *m, unsigned short type, int8_t data);
+int sd_netlink_message_append_s16(sd_netlink_message *m, unsigned short type, int16_t data);
+int sd_netlink_message_append_s32(sd_netlink_message *m, unsigned short type, int32_t data);
+int sd_netlink_message_append_s64(sd_netlink_message *m, unsigned short type, int64_t data);
+int sd_netlink_message_append_data(sd_netlink_message *m, unsigned short type, const void *data, size_t len);
+int sd_netlink_message_append_in_addr(sd_netlink_message *m, unsigned short type, const struct in_addr *data);
+int sd_netlink_message_append_in6_addr(sd_netlink_message *m, unsigned short type, const struct in6_addr *data);
+int sd_netlink_message_append_sockaddr_in(sd_netlink_message *m, unsigned short type, const struct sockaddr_in *data);
+int sd_netlink_message_append_sockaddr_in6(sd_netlink_message *m, unsigned short type, const struct sockaddr_in6 *data);
+int sd_netlink_message_append_ether_addr(sd_netlink_message *m, unsigned short type, const struct ether_addr *data);
+int sd_netlink_message_append_cache_info(sd_netlink_message *m, unsigned short type, const struct ifa_cacheinfo *info);
+
+int sd_netlink_message_open_container(sd_netlink_message *m, unsigned short type);
+int sd_netlink_message_open_container_union(sd_netlink_message *m, unsigned short type, const char *key);
+int sd_netlink_message_close_container(sd_netlink_message *m);
+
+int sd_netlink_message_read(sd_netlink_message *m, unsigned short type, size_t size, void *data);
+int sd_netlink_message_read_data(sd_netlink_message *m, unsigned short type, size_t *ret_size, void **ret_data);
+int sd_netlink_message_read_string_strdup(sd_netlink_message *m, unsigned short type, char **data);
+int sd_netlink_message_read_string(sd_netlink_message *m, unsigned short type, const char **data);
+int sd_netlink_message_read_strv(sd_netlink_message *m, unsigned short container_type, unsigned short type_id, char ***ret);
+int sd_netlink_message_read_u8(sd_netlink_message *m, unsigned short type, uint8_t *data);
+int sd_netlink_message_read_u16(sd_netlink_message *m, unsigned short type, uint16_t *data);
+int sd_netlink_message_read_u32(sd_netlink_message *m, unsigned short type, uint32_t *data);
+int sd_netlink_message_read_ether_addr(sd_netlink_message *m, unsigned short type, struct ether_addr *data);
+int sd_netlink_message_read_cache_info(sd_netlink_message *m, unsigned short type, struct ifa_cacheinfo *info);
+int sd_netlink_message_read_in_addr(sd_netlink_message *m, unsigned short type, struct in_addr *data);
+int sd_netlink_message_read_in6_addr(sd_netlink_message *m, unsigned short type, struct in6_addr *data);
+int sd_netlink_message_enter_container(sd_netlink_message *m, unsigned short type);
+int sd_netlink_message_enter_array(sd_netlink_message *m, unsigned short type);
+int sd_netlink_message_exit_container(sd_netlink_message *m);
+
+int sd_netlink_message_open_array(sd_netlink_message *m, uint16_t type);
+int sd_netlink_message_cancel_array(sd_netlink_message *m);
+
+int sd_netlink_message_rewind(sd_netlink_message *m, sd_netlink *genl);
+
+sd_netlink_message *sd_netlink_message_next(sd_netlink_message *m);
+
+sd_netlink_message *sd_netlink_message_ref(sd_netlink_message *m);
+sd_netlink_message *sd_netlink_message_unref(sd_netlink_message *m);
+
+int sd_netlink_message_request_dump(sd_netlink_message *m, int dump);
+int sd_netlink_message_is_error(const sd_netlink_message *m);
+int sd_netlink_message_get_errno(const sd_netlink_message *m);
+int sd_netlink_message_get_type(const sd_netlink_message *m, uint16_t *type);
+int sd_netlink_message_set_flags(sd_netlink_message *m, uint16_t flags);
+int sd_netlink_message_is_broadcast(const sd_netlink_message *m);
+
+/* rtnl */
+
+int sd_rtnl_message_new_link(sd_netlink *nl, sd_netlink_message **ret, uint16_t msg_type, int index);
+int sd_rtnl_message_new_addr_update(sd_netlink *nl, sd_netlink_message **ret, int index, int family);
+int sd_rtnl_message_new_addr(sd_netlink *nl, sd_netlink_message **ret, uint16_t msg_type, int index, int family);
+int sd_rtnl_message_new_route(sd_netlink *nl, sd_netlink_message **ret, uint16_t nlmsg_type, int rtm_family, unsigned char rtm_protocol);
+int sd_rtnl_message_new_neigh(sd_netlink *nl, sd_netlink_message **ret, uint16_t msg_type, int index, int nda_family);
+
+int sd_rtnl_message_get_family(const sd_netlink_message *m, int *family);
+
+int sd_rtnl_message_addr_set_prefixlen(sd_netlink_message *m, unsigned char prefixlen);
+int sd_rtnl_message_addr_set_scope(sd_netlink_message *m, unsigned char scope);
+int sd_rtnl_message_addr_set_flags(sd_netlink_message *m, unsigned char flags);
+int sd_rtnl_message_addr_get_family(const sd_netlink_message *m, int *family);
+int sd_rtnl_message_addr_get_prefixlen(const sd_netlink_message *m, unsigned char *prefixlen);
+int sd_rtnl_message_addr_get_scope(const sd_netlink_message *m, unsigned char *scope);
+int sd_rtnl_message_addr_get_flags(const sd_netlink_message *m, unsigned char *flags);
+int sd_rtnl_message_addr_get_ifindex(const sd_netlink_message *m, int *ifindex);
+
+int sd_rtnl_message_link_set_flags(sd_netlink_message *m, unsigned flags, unsigned change);
+int sd_rtnl_message_link_set_type(sd_netlink_message *m, unsigned type);
+int sd_rtnl_message_link_set_family(sd_netlink_message *m, unsigned family);
+int sd_rtnl_message_link_get_ifindex(const sd_netlink_message *m, int *ifindex);
+int sd_rtnl_message_link_get_flags(const sd_netlink_message *m, unsigned *flags);
+int sd_rtnl_message_link_get_type(const sd_netlink_message *m, unsigned short *type);
+
+int sd_rtnl_message_route_set_dst_prefixlen(sd_netlink_message *m, unsigned char prefixlen);
+int sd_rtnl_message_route_set_src_prefixlen(sd_netlink_message *m, unsigned char prefixlen);
+int sd_rtnl_message_route_set_scope(sd_netlink_message *m, unsigned char scope);
+int sd_rtnl_message_route_set_flags(sd_netlink_message *m, unsigned flags);
+int sd_rtnl_message_route_set_table(sd_netlink_message *m, unsigned char table);
+int sd_rtnl_message_route_set_type(sd_netlink_message *m, unsigned char type);
+int sd_rtnl_message_route_get_flags(const sd_netlink_message *m, unsigned *flags);
+int sd_rtnl_message_route_get_family(const sd_netlink_message *m, int *family);
+int sd_rtnl_message_route_set_family(sd_netlink_message *m, int family);
+int sd_rtnl_message_route_get_protocol(const sd_netlink_message *m, unsigned char *protocol);
+int sd_rtnl_message_route_get_scope(const sd_netlink_message *m, unsigned char *scope);
+int sd_rtnl_message_route_get_tos(const sd_netlink_message *m, unsigned char *tos);
+int sd_rtnl_message_route_get_table(const sd_netlink_message *m, unsigned char *table);
+int sd_rtnl_message_route_get_dst_prefixlen(const sd_netlink_message *m, unsigned char *dst_len);
+int sd_rtnl_message_route_get_src_prefixlen(const sd_netlink_message *m, unsigned char *src_len);
+int sd_rtnl_message_route_get_type(const sd_netlink_message *m, unsigned char *type);
+
+int sd_rtnl_message_new_nexthop(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nhmsg_type, int nh_family, unsigned char nh_protocol);
+
+int sd_rtnl_message_nexthop_set_flags(sd_netlink_message *m, uint8_t flags);
+int sd_rtnl_message_nexthop_set_family(sd_netlink_message *m, uint8_t family);
+int sd_rtnl_message_nexthop_get_family(const sd_netlink_message *m, uint8_t *family);
+
+int sd_rtnl_message_neigh_set_flags(sd_netlink_message *m, uint8_t flags);
+int sd_rtnl_message_neigh_set_state(sd_netlink_message *m, uint16_t state);
+int sd_rtnl_message_neigh_get_family(const sd_netlink_message *m, int *family);
+int sd_rtnl_message_neigh_get_ifindex(const sd_netlink_message *m, int *family);
+int sd_rtnl_message_neigh_get_state(const sd_netlink_message *m, uint16_t *state);
+int sd_rtnl_message_neigh_get_flags(const sd_netlink_message *m, uint8_t *flags);
+
+int sd_rtnl_message_new_addrlabel(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int ifindex, int ifal_family);
+int sd_rtnl_message_addrlabel_set_prefixlen(sd_netlink_message *m, unsigned char prefixlen);
+int sd_rtnl_message_addrlabel_get_prefixlen(const sd_netlink_message *m, unsigned char *prefixlen);
+
+int sd_rtnl_message_new_routing_policy_rule(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int ifal_family);
+int sd_rtnl_message_routing_policy_rule_set_tos(sd_netlink_message *m, unsigned char tos);
+int sd_rtnl_message_routing_policy_rule_get_tos(const sd_netlink_message *m, unsigned char *tos);
+int sd_rtnl_message_routing_policy_rule_set_table(sd_netlink_message *m, unsigned char table);
+int sd_rtnl_message_routing_policy_rule_get_table(const sd_netlink_message *m, unsigned char *table);
+int sd_rtnl_message_routing_policy_rule_set_rtm_src_prefixlen(sd_netlink_message *m, unsigned char len);
+int sd_rtnl_message_routing_policy_rule_get_rtm_src_prefixlen(const sd_netlink_message *m, unsigned char *len);
+int sd_rtnl_message_routing_policy_rule_set_rtm_dst_prefixlen(sd_netlink_message *m, unsigned char len);
+int sd_rtnl_message_routing_policy_rule_get_rtm_dst_prefixlen(const sd_netlink_message *m, unsigned char *len);
+int sd_rtnl_message_routing_policy_rule_set_rtm_type(sd_netlink_message *m, unsigned char type);
+int sd_rtnl_message_routing_policy_rule_get_rtm_type(const sd_netlink_message *m, unsigned char *type);
+int sd_rtnl_message_routing_policy_rule_set_flags(sd_netlink_message *m, unsigned flags);
+int sd_rtnl_message_routing_policy_rule_get_flags(const sd_netlink_message *m, unsigned *flags);
+
+int sd_rtnl_message_new_qdisc(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int tcm_family, int tcm_ifindex);
+int sd_rtnl_message_set_qdisc_parent(sd_netlink_message *m, uint32_t parent);
+int sd_rtnl_message_set_qdisc_handle(sd_netlink_message *m, uint32_t handle);
+
+int sd_rtnl_message_new_tclass(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int tcm_family, int tcm_ifindex);
+int sd_rtnl_message_set_tclass_parent(sd_netlink_message *m, uint32_t parent);
+int sd_rtnl_message_set_tclass_handle(sd_netlink_message *m, uint32_t handle);
+
+int sd_rtnl_message_new_mdb(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int mdb_ifindex);
+
+/* genl */
+int sd_genl_socket_open(sd_netlink **nl);
+int sd_genl_message_new(sd_netlink *nl, sd_genl_family family, uint8_t cmd, sd_netlink_message **m);
+int sd_genl_message_get_family(const sd_netlink *nl, const sd_netlink_message *m, sd_genl_family *family);
+
+/* slot */
+sd_netlink_slot *sd_netlink_slot_ref(sd_netlink_slot *nl);
+sd_netlink_slot *sd_netlink_slot_unref(sd_netlink_slot *nl);
+
+sd_netlink *sd_netlink_slot_get_netlink(sd_netlink_slot *slot);
+void *sd_netlink_slot_get_userdata(sd_netlink_slot *slot);
+void *sd_netlink_slot_set_userdata(sd_netlink_slot *slot, void *userdata);
+int sd_netlink_slot_get_destroy_callback(const sd_netlink_slot *slot, sd_netlink_destroy_t *callback);
+int sd_netlink_slot_set_destroy_callback(sd_netlink_slot *slot, sd_netlink_destroy_t callback);
+int sd_netlink_slot_get_floating(const sd_netlink_slot *slot);
+int sd_netlink_slot_set_floating(sd_netlink_slot *slot, int b);
+int sd_netlink_slot_get_description(const sd_netlink_slot *slot, const char **description);
+int sd_netlink_slot_set_description(sd_netlink_slot *slot, const char *description);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_netlink, sd_netlink_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_netlink_message, sd_netlink_message_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_netlink_slot, sd_netlink_slot_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-network.h b/src/systemd/sd-network.h
new file mode 100644
index 0000000..7e06251
--- /dev/null
+++ b/src/systemd/sd-network.h
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdnetworkhfoo
+#define foosdnetworkhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+#include "_sd-common.h"
+
+/*
+ * A few points:
+ *
+ * Instead of returning an empty string array or empty integer array, we
+ * may return NULL.
+ *
+ * Free the data the library returns with libc free(). String arrays
+ * are NULL terminated, and you need to free the array itself in
+ * addition to the strings contained.
+ *
+ * We return error codes as negative errno, kernel-style. On success, we
+ * return 0 or positive.
+ *
+ * These functions access data in /run. This is a virtual file system;
+ * therefore, accesses are relatively cheap.
+ *
+ * See sd-network(3) for more information.
+ */
+
+_SD_BEGIN_DECLARATIONS;
+
+/* Get overall operational state
+ * Possible states: down, up, dormant, carrier, degraded, routable
+ * Possible return codes:
+ * -ENODATA: networkd is not aware of any links
+ */
+int sd_network_get_operational_state(char **state);
+int sd_network_get_carrier_state(char **state);
+int sd_network_get_address_state(char **state);
+
+/* Get DNS entries for all links. These are string representations of
+ * IP addresses */
+int sd_network_get_dns(char ***dns);
+
+/* Get NTP entries for all links. These are domain names or string
+ * representations of IP addresses */
+int sd_network_get_ntp(char ***ntp);
+
+/* Get the search domains for all links. */
+int sd_network_get_search_domains(char ***domains);
+
+/* Get the search domains for all links. */
+int sd_network_get_route_domains(char ***domains);
+
+/* Get setup state from ifindex.
+ * Possible states:
+ * pending: udev is still processing the link, we don't yet know if we will manage it
+ * failed: networkd failed to manage the link
+ * configuring: in the process of retrieving configuration or configuring the link
+ * configured: link configured successfully
+ * unmanaged: networkd is not handling the link
+ * linger: the link is gone, but has not yet been dropped by networkd
+ * Possible return codes:
+ * -ENODATA: networkd is not aware of the link
+ */
+int sd_network_link_get_setup_state(int ifindex, char **state);
+
+/* Get operational state from ifindex.
+ * Possible states:
+ * off: the device is powered down
+ * no-carrier: the device is powered up, but it does not yet have a carrier
+ * dormant: the device has a carrier, but is not yet ready for normal traffic
+ * carrier: the link has a carrier
+ * degraded: the link has carrier and addresses valid on the local link configured
+ * routable: the link has carrier and routable address configured
+ * Possible return codes:
+ * -ENODATA: networkd is not aware of the link
+ */
+int sd_network_link_get_operational_state(int ifindex, char **state);
+int sd_network_link_get_required_operstate_for_online(int ifindex, char **state);
+int sd_network_link_get_carrier_state(int ifindex, char **state);
+int sd_network_link_get_address_state(int ifindex, char **state);
+
+/* Indicates whether the network is relevant to being online.
+ * Possible return codes:
+ * 0: the connection is not required
+ * 1: the connection is required to consider the system online
+ * <0: networkd is not aware of the link
+ */
+int sd_network_link_get_required_for_online(int ifindex);
+
+/* Get path to .network file applied to link */
+int sd_network_link_get_network_file(int ifindex, char **filename);
+
+/* Get DNS entries for a given link. These are string representations of
+ * IP addresses */
+int sd_network_link_get_dns(int ifindex, char ***ret);
+
+/* Get NTP entries for a given link. These are domain names or string
+ * representations of IP addresses */
+int sd_network_link_get_ntp(int ifindex, char ***ret);
+
+/* Get SIP entries for a given link. These are string
+ * representations of IP addresses */
+int sd_network_link_get_sip(int ifindex, char ***ret);
+
+/* Indicates whether or not LLMNR should be enabled for the link
+ * Possible levels of support: yes, no, resolve
+ * Possible return codes:
+ * -ENODATA: networkd is not aware of the link
+ */
+int sd_network_link_get_llmnr(int ifindex, char **llmnr);
+
+/* Indicates whether or not MulticastDNS should be enabled for the
+ * link.
+ * Possible levels of support: yes, no, resolve
+ * Possible return codes:
+ * -ENODATA: networkd is not aware of the link
+ */
+int sd_network_link_get_mdns(int ifindex, char **mdns);
+
+/* Indicates whether or not DNS-over-TLS should be enabled for the
+ * link.
+ * Possible levels of support: yes, no, opportunistic
+ * Possible return codes:
+ * -ENODATA: networkd is not aware of the link
+ */
+int sd_network_link_get_dns_over_tls(int ifindex, char **dns_over_tls);
+
+/* Indicates whether or not DNSSEC should be enabled for the link
+ * Possible levels of support: yes, no, allow-downgrade
+ * Possible return codes:
+ * -ENODATA: networkd is not aware of the link
+ */
+int sd_network_link_get_dnssec(int ifindex, char **dnssec);
+
+/* Returns the list of per-interface DNSSEC negative trust anchors
+ * Possible return codes:
+ * -ENODATA: networkd is not aware of the link, or has no such data
+ */
+int sd_network_link_get_dnssec_negative_trust_anchors(int ifindex, char ***nta);
+
+/* Get the search DNS domain names for a given link. */
+int sd_network_link_get_search_domains(int ifindex, char ***domains);
+
+/* Get the route DNS domain names for a given link. */
+int sd_network_link_get_route_domains(int ifindex, char ***domains);
+
+/* Get whether this link shall be used as 'default route' for DNS queries */
+int sd_network_link_get_dns_default_route(int ifindex);
+
+/* Get the carrier interface indexes to which current link is bound to. */
+int sd_network_link_get_carrier_bound_to(int ifindex, int **ifindexes);
+
+/* Get the CARRIERS that are bound to current link. */
+int sd_network_link_get_carrier_bound_by(int ifindex, int **ifindexes);
+
+/* Get DHCPv6 client IAID for a given link. */
+int sd_network_link_get_dhcp6_client_iaid_string(int ifindex, char **iaid);
+
+/* Get DHCPv6 client DUID for a given link. */
+int sd_network_link_get_dhcp6_client_duid_string(int ifindex, char **duid);
+
+/* Monitor object */
+typedef struct sd_network_monitor sd_network_monitor;
+
+/* Create a new monitor. Category must be NULL, "links" or "leases". */
+int sd_network_monitor_new(sd_network_monitor **ret, const char *category);
+
+/* Destroys the passed monitor. Returns NULL. */
+sd_network_monitor* sd_network_monitor_unref(sd_network_monitor *m);
+
+/* Flushes the monitor */
+int sd_network_monitor_flush(sd_network_monitor *m);
+
+/* Get FD from monitor */
+int sd_network_monitor_get_fd(sd_network_monitor *m);
+
+/* Get poll() mask to monitor */
+int sd_network_monitor_get_events(sd_network_monitor *m);
+
+/* Get timeout for poll(), as usec value relative to CLOCK_MONOTONIC's epoch */
+int sd_network_monitor_get_timeout(sd_network_monitor *m, uint64_t *timeout_usec);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_network_monitor, sd_network_monitor_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-path.h b/src/systemd/sd-path.h
new file mode 100644
index 0000000..5f2f03c
--- /dev/null
+++ b/src/systemd/sd-path.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdpathhfoo
+#define foosdpathhfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+enum {
+ /* Temporary files */
+ SD_PATH_TEMPORARY,
+ SD_PATH_TEMPORARY_LARGE,
+
+ /* Vendor supplied data */
+ SD_PATH_SYSTEM_BINARIES,
+ SD_PATH_SYSTEM_INCLUDE,
+ SD_PATH_SYSTEM_LIBRARY_PRIVATE,
+ SD_PATH_SYSTEM_LIBRARY_ARCH,
+ SD_PATH_SYSTEM_SHARED,
+ SD_PATH_SYSTEM_CONFIGURATION_FACTORY,
+ SD_PATH_SYSTEM_STATE_FACTORY,
+
+ /* System configuration, runtime, state, ... */
+ SD_PATH_SYSTEM_CONFIGURATION,
+ SD_PATH_SYSTEM_RUNTIME,
+ SD_PATH_SYSTEM_RUNTIME_LOGS,
+ SD_PATH_SYSTEM_STATE_PRIVATE,
+ SD_PATH_SYSTEM_STATE_LOGS,
+ SD_PATH_SYSTEM_STATE_CACHE,
+ SD_PATH_SYSTEM_STATE_SPOOL,
+
+ /* Vendor supplied data */
+ SD_PATH_USER_BINARIES,
+ SD_PATH_USER_LIBRARY_PRIVATE,
+ SD_PATH_USER_LIBRARY_ARCH,
+ SD_PATH_USER_SHARED,
+
+ /* User configuration, state, runtime ... */
+ SD_PATH_USER_CONFIGURATION, /* takes both actual configuration (like /etc) and state (like /var/lib) */
+ SD_PATH_USER_RUNTIME,
+ SD_PATH_USER_STATE_CACHE,
+
+ /* User resources */
+ SD_PATH_USER, /* $HOME itself */
+ SD_PATH_USER_DOCUMENTS,
+ SD_PATH_USER_MUSIC,
+ SD_PATH_USER_PICTURES,
+ SD_PATH_USER_VIDEOS,
+ SD_PATH_USER_DOWNLOAD,
+ SD_PATH_USER_PUBLIC,
+ SD_PATH_USER_TEMPLATES,
+ SD_PATH_USER_DESKTOP,
+
+ /* Search paths */
+ SD_PATH_SEARCH_BINARIES,
+ SD_PATH_SEARCH_BINARIES_DEFAULT,
+ SD_PATH_SEARCH_LIBRARY_PRIVATE,
+ SD_PATH_SEARCH_LIBRARY_ARCH,
+ SD_PATH_SEARCH_SHARED,
+ SD_PATH_SEARCH_CONFIGURATION_FACTORY,
+ SD_PATH_SEARCH_STATE_FACTORY,
+ SD_PATH_SEARCH_CONFIGURATION,
+
+ /* Various systemd paths, generally mirroring systemd.pc — Except we drop the "dir" suffix (and
+ * replaces "path" by "search"), since this API is about dirs/paths anyway, and contains "path"
+ * already in the prefix */
+ SD_PATH_SYSTEMD_UTIL,
+ SD_PATH_SYSTEMD_SYSTEM_UNIT,
+ SD_PATH_SYSTEMD_SYSTEM_PRESET,
+ SD_PATH_SYSTEMD_SYSTEM_CONF,
+ SD_PATH_SYSTEMD_USER_UNIT,
+ SD_PATH_SYSTEMD_USER_PRESET,
+ SD_PATH_SYSTEMD_USER_CONF,
+
+ SD_PATH_SYSTEMD_SEARCH_SYSTEM_UNIT,
+ SD_PATH_SYSTEMD_SEARCH_USER_UNIT,
+
+ SD_PATH_SYSTEMD_SYSTEM_GENERATOR,
+ SD_PATH_SYSTEMD_USER_GENERATOR,
+ SD_PATH_SYSTEMD_SEARCH_SYSTEM_GENERATOR,
+ SD_PATH_SYSTEMD_SEARCH_USER_GENERATOR,
+
+ SD_PATH_SYSTEMD_SLEEP,
+ SD_PATH_SYSTEMD_SHUTDOWN,
+
+ SD_PATH_TMPFILES,
+ SD_PATH_SYSUSERS,
+ SD_PATH_SYSCTL,
+ SD_PATH_BINFMT,
+ SD_PATH_MODULES_LOAD,
+ SD_PATH_CATALOG,
+
+ /* systemd-networkd search paths */
+ SD_PATH_SYSTEMD_SEARCH_NETWORK,
+
+ _SD_PATH_MAX,
+};
+
+int sd_path_lookup(uint64_t type, const char *suffix, char **path);
+int sd_path_lookup_strv(uint64_t type, const char *suffix, char ***paths);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-radv.h b/src/systemd/sd-radv.h
new file mode 100644
index 0000000..3f6c149
--- /dev/null
+++ b/src/systemd/sd-radv.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdradvfoo
+#define foosdradvfoo
+
+/***
+ Copyright © 2017 Intel Corporation. All rights reserved.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+#include <net/ethernet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+
+#include "_sd-common.h"
+#include "sd-event.h"
+#include "sd-ndisc.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+#define SD_RADV_DEFAULT_MIN_TIMEOUT_USEC (200*USEC_PER_SEC)
+#define SD_RADV_DEFAULT_MAX_TIMEOUT_USEC (600*USEC_PER_SEC)
+
+#define SD_RADV_DEFAULT_DNS_LIFETIME_USEC (3*SD_RADV_DEFAULT_MAX_TIMEOUT_USEC)
+
+typedef struct sd_radv sd_radv;
+typedef struct sd_radv_prefix sd_radv_prefix;
+typedef struct sd_radv_route_prefix sd_radv_route_prefix;
+
+/* Router Advertisement */
+int sd_radv_new(sd_radv **ret);
+sd_radv *sd_radv_ref(sd_radv *ra);
+sd_radv *sd_radv_unref(sd_radv *ra);
+
+int sd_radv_attach_event(sd_radv *ra, sd_event *event, int64_t priority);
+int sd_radv_detach_event(sd_radv *nd);
+sd_event *sd_radv_get_event(sd_radv *ra);
+
+int sd_radv_start(sd_radv *ra);
+int sd_radv_stop(sd_radv *ra);
+int sd_radv_is_running(sd_radv *ra);
+
+int sd_radv_set_ifindex(sd_radv *ra, int interface_index);
+int sd_radv_set_mac(sd_radv *ra, const struct ether_addr *mac_addr);
+int sd_radv_set_mtu(sd_radv *ra, uint32_t mtu);
+int sd_radv_set_hop_limit(sd_radv *ra, uint8_t hop_limit);
+int sd_radv_set_router_lifetime(sd_radv *ra, uint16_t router_lifetime);
+int sd_radv_set_managed_information(sd_radv *ra, int managed);
+int sd_radv_set_other_information(sd_radv *ra, int other);
+int sd_radv_set_preference(sd_radv *ra, unsigned preference);
+int sd_radv_add_prefix(sd_radv *ra, sd_radv_prefix *p, int dynamic);
+int sd_radv_add_route_prefix(sd_radv *ra, sd_radv_route_prefix *p, int dynamic);
+sd_radv_prefix *sd_radv_remove_prefix(sd_radv *ra, const struct in6_addr *prefix,
+ unsigned char prefixlen);
+int sd_radv_set_rdnss(sd_radv *ra, uint32_t lifetime,
+ const struct in6_addr *dns, size_t n_dns);
+int sd_radv_set_dnssl(sd_radv *ra, uint32_t lifetime, char **search_list);
+
+/* Advertised prefixes */
+int sd_radv_prefix_new(sd_radv_prefix **ret);
+sd_radv_prefix *sd_radv_prefix_ref(sd_radv_prefix *ra);
+sd_radv_prefix *sd_radv_prefix_unref(sd_radv_prefix *ra);
+
+int sd_radv_prefix_set_prefix(sd_radv_prefix *p, const struct in6_addr *in6_addr,
+ unsigned char prefixlen);
+int sd_radv_prefix_get_prefix(sd_radv_prefix *p, struct in6_addr *ret_in6_addr,
+ unsigned char *ret_prefixlen);
+int sd_radv_prefix_set_onlink(sd_radv_prefix *p, int onlink);
+int sd_radv_prefix_set_address_autoconfiguration(sd_radv_prefix *p,
+ int address_autoconfiguration);
+int sd_radv_prefix_set_valid_lifetime(sd_radv_prefix *p,
+ uint32_t valid_lifetime);
+int sd_radv_prefix_set_preferred_lifetime(sd_radv_prefix *p,
+ uint32_t preferred_lifetime);
+
+int sd_radv_route_prefix_new(sd_radv_route_prefix **ret);
+sd_radv_route_prefix *sd_radv_route_prefix_ref(sd_radv_route_prefix *ra);
+sd_radv_route_prefix *sd_radv_route_prefix_unref(sd_radv_route_prefix *ra);
+
+int sd_radv_prefix_set_route_prefix(sd_radv_route_prefix *p, const struct in6_addr *in6_addr, unsigned char prefixlen);
+int sd_radv_route_prefix_set_lifetime(sd_radv_route_prefix *p, uint32_t valid_lifetime);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_radv, sd_radv_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_radv_prefix, sd_radv_prefix_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_radv_route_prefix, sd_radv_route_prefix_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-resolve.h b/src/systemd/sd-resolve.h
new file mode 100644
index 0000000..ee58d18
--- /dev/null
+++ b/src/systemd/sd-resolve.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdresolvehfoo
+#define foosdresolvehfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+/* 'struct addrinfo' needs _GNU_SOURCE */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+
+#include <inttypes.h>
+#include <netdb.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "sd-event.h"
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+/* An opaque sd-resolve session structure */
+typedef struct sd_resolve sd_resolve;
+
+/* An opaque sd-resolve query structure */
+typedef struct sd_resolve_query sd_resolve_query;
+
+/* A callback on completion */
+typedef int (*sd_resolve_getaddrinfo_handler_t)(sd_resolve_query *q, int ret, const struct addrinfo *ai, void *userdata);
+typedef int (*sd_resolve_getnameinfo_handler_t)(sd_resolve_query *q, int ret, const char *host, const char *serv, void *userdata);
+typedef _sd_destroy_t sd_resolve_destroy_t;
+
+enum {
+ SD_RESOLVE_GET_HOST = 1 << 0,
+ SD_RESOLVE_GET_SERVICE = 1 << 1,
+ SD_RESOLVE_GET_BOTH = SD_RESOLVE_GET_HOST | SD_RESOLVE_GET_SERVICE,
+};
+
+int sd_resolve_default(sd_resolve **ret);
+
+/* Allocate a new sd-resolve session. */
+int sd_resolve_new(sd_resolve **ret);
+
+/* Free a sd-resolve session. This destroys all attached
+ * sd_resolve_query objects automatically. */
+sd_resolve* sd_resolve_unref(sd_resolve *resolve);
+sd_resolve* sd_resolve_ref(sd_resolve *resolve);
+
+/* Return the UNIX file descriptor to poll() for events on. Use this
+ * function to integrate sd-resolve with your custom main loop. */
+int sd_resolve_get_fd(sd_resolve *resolve);
+
+/* Return the poll() events (a combination of flags like POLLIN,
+ * POLLOUT, ...) to check for. */
+int sd_resolve_get_events(sd_resolve *resolve);
+
+/* Return the poll() timeout to pass. Returns (uint64_t) -1 as
+ * timeout if no timeout is needed. */
+int sd_resolve_get_timeout(sd_resolve *resolve, uint64_t *timeout_usec);
+
+/* Process pending responses. After this function is called, you can
+ * get the next completed query object(s) using
+ * sd_resolve_get_next(). */
+int sd_resolve_process(sd_resolve *resolve);
+
+/* Wait for a resolve event to complete. */
+int sd_resolve_wait(sd_resolve *resolve, uint64_t timeout_usec);
+
+int sd_resolve_get_tid(sd_resolve *resolve, pid_t *tid);
+
+int sd_resolve_attach_event(sd_resolve *resolve, sd_event *e, int64_t priority);
+int sd_resolve_detach_event(sd_resolve *resolve);
+sd_event *sd_resolve_get_event(sd_resolve *resolve);
+
+/* Issue a name-to-address query on the specified session. The
+ * arguments are compatible with those of libc's
+ * getaddrinfo(3). The function returns a new query object. When the
+ * query is completed, you may retrieve the results using
+ * sd_resolve_getaddrinfo_done(). */
+int sd_resolve_getaddrinfo(sd_resolve *resolve, sd_resolve_query **q, const char *node, const char *service, const struct addrinfo *hints, sd_resolve_getaddrinfo_handler_t callback, void *userdata);
+
+/* Issue an address-to-name query on the specified session. The
+ * arguments are compatible with those of libc's
+ * getnameinfo(3). The function returns a new query object. When the
+ * query is completed, you may retrieve the results using
+ * sd_resolve_getnameinfo_done(). Set gethost (resp. getserv) to non-zero
+ * if you want to query the hostname (resp. the service name). */
+int sd_resolve_getnameinfo(sd_resolve *resolve, sd_resolve_query **q, const struct sockaddr *sa, socklen_t salen, int flags, uint64_t get, sd_resolve_getnameinfo_handler_t callback, void *userdata);
+
+sd_resolve_query *sd_resolve_query_ref(sd_resolve_query *q);
+sd_resolve_query *sd_resolve_query_unref(sd_resolve_query *q);
+
+/* Returns non-zero when the query operation specified by q has been completed. */
+int sd_resolve_query_is_done(sd_resolve_query *q);
+
+void *sd_resolve_query_get_userdata(sd_resolve_query *q);
+void *sd_resolve_query_set_userdata(sd_resolve_query *q, void *userdata);
+int sd_resolve_query_get_destroy_callback(sd_resolve_query *q, sd_resolve_destroy_t *destroy_callback);
+int sd_resolve_query_set_destroy_callback(sd_resolve_query *q, sd_resolve_destroy_t destroy_callback);
+int sd_resolve_query_get_floating(sd_resolve_query *q);
+int sd_resolve_query_set_floating(sd_resolve_query *q, int b);
+
+sd_resolve *sd_resolve_query_get_resolve(sd_resolve_query *q);
+
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_resolve, sd_resolve_unref);
+_SD_DEFINE_POINTER_CLEANUP_FUNC(sd_resolve_query, sd_resolve_query_unref);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/systemd/sd-utf8.h b/src/systemd/sd-utf8.h
new file mode 100644
index 0000000..57013d0
--- /dev/null
+++ b/src/systemd/sd-utf8.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#ifndef foosdutf8hfoo
+#define foosdutf8hfoo
+
+/***
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "_sd-common.h"
+
+_SD_BEGIN_DECLARATIONS;
+
+_sd_pure_ const char *sd_utf8_is_valid(const char *s);
+_sd_pure_ const char *sd_ascii_is_valid(const char *s);
+
+_SD_END_DECLARATIONS;
+
+#endif
diff --git a/src/sysusers/sysusers.c b/src/sysusers/sysusers.c
new file mode 100644
index 0000000..71bfb3b
--- /dev/null
+++ b/src/sysusers/sysusers.c
@@ -0,0 +1,2038 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <utmp.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "copy.h"
+#include "def.h"
+#include "dissect-image.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "main-func.h"
+#include "mount-util.h"
+#include "nscd-flush.h"
+#include "pager.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "selinux-util.h"
+#include "set.h"
+#include "smack-util.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util-label.h"
+#include "uid-range.h"
+#include "user-record.h"
+#include "user-util.h"
+#include "utf8.h"
+#include "util.h"
+
+typedef enum ItemType {
+ ADD_USER = 'u',
+ ADD_GROUP = 'g',
+ ADD_MEMBER = 'm',
+ ADD_RANGE = 'r',
+} ItemType;
+
+typedef struct Item {
+ ItemType type;
+
+ char *name;
+ char *group_name;
+ char *uid_path;
+ char *gid_path;
+ char *description;
+ char *home;
+ char *shell;
+
+ gid_t gid;
+ uid_t uid;
+
+ bool gid_set:1;
+
+ /* When set the group with the specified gid must exist
+ * and the check if a uid clashes with the gid is skipped.
+ */
+ bool id_set_strict:1;
+
+ bool uid_set:1;
+
+ bool todo_user:1;
+ bool todo_group:1;
+} Item;
+
+static char *arg_root = NULL;
+static char *arg_image = NULL;
+static bool arg_cat_config = false;
+static const char *arg_replace = NULL;
+static bool arg_inline = false;
+static PagerFlags arg_pager_flags = 0;
+
+static OrderedHashmap *users = NULL, *groups = NULL;
+static OrderedHashmap *todo_uids = NULL, *todo_gids = NULL;
+static OrderedHashmap *members = NULL;
+
+static Hashmap *database_by_uid = NULL, *database_by_username = NULL;
+static Hashmap *database_by_gid = NULL, *database_by_groupname = NULL;
+static Set *database_users = NULL, *database_groups = NULL;
+
+static uid_t search_uid = UID_INVALID;
+static UidRange *uid_range = NULL;
+static unsigned n_uid_range = 0;
+
+static UGIDAllocationRange login_defs = {};
+static bool login_defs_need_warning = false;
+
+STATIC_DESTRUCTOR_REGISTER(groups, ordered_hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(users, ordered_hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(members, ordered_hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(todo_uids, ordered_hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(todo_gids, ordered_hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(database_by_uid, hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(database_by_username, hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(database_users, set_free_freep);
+STATIC_DESTRUCTOR_REGISTER(database_by_gid, hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(database_by_groupname, hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(database_groups, set_free_freep);
+STATIC_DESTRUCTOR_REGISTER(uid_range, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_root, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_image, freep);
+
+static int errno_is_not_exists(int code) {
+ /* See getpwnam(3) and getgrnam(3): those codes and others can be returned if the user or group are
+ * not found. */
+ return IN_SET(code, 0, ENOENT, ESRCH, EBADF, EPERM);
+}
+
+static void maybe_emit_login_defs_warning(void) {
+ if (!login_defs_need_warning)
+ return;
+
+ if (login_defs.system_alloc_uid_min != SYSTEM_ALLOC_UID_MIN ||
+ login_defs.system_uid_max != SYSTEM_UID_MAX)
+ log_warning("login.defs specifies UID allocation range "UID_FMT"–"UID_FMT
+ " that is different than the built-in defaults ("UID_FMT"–"UID_FMT")",
+ login_defs.system_alloc_uid_min, login_defs.system_uid_max,
+ SYSTEM_ALLOC_UID_MIN, SYSTEM_UID_MAX);
+ if (login_defs.system_alloc_gid_min != SYSTEM_ALLOC_GID_MIN ||
+ login_defs.system_gid_max != SYSTEM_GID_MAX)
+ log_warning("login.defs specifies GID allocation range "GID_FMT"–"GID_FMT
+ " that is different than the built-in defaults ("GID_FMT"–"GID_FMT")",
+ login_defs.system_alloc_gid_min, login_defs.system_gid_max,
+ SYSTEM_ALLOC_GID_MIN, SYSTEM_GID_MAX);
+
+ login_defs_need_warning = false;
+}
+
+static int load_user_database(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *passwd_path;
+ struct passwd *pw;
+ int r;
+
+ passwd_path = prefix_roota(arg_root, "/etc/passwd");
+ f = fopen(passwd_path, "re");
+ if (!f)
+ return errno == ENOENT ? 0 : -errno;
+
+ r = hashmap_ensure_allocated(&database_by_username, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&database_by_uid, NULL);
+ if (r < 0)
+ return r;
+
+ r = set_ensure_allocated(&database_users, NULL);
+ if (r < 0)
+ return r;
+
+ while ((r = fgetpwent_sane(f, &pw)) > 0) {
+ char *n;
+ int k, q;
+
+ n = strdup(pw->pw_name);
+ if (!n)
+ return -ENOMEM;
+
+ k = set_put(database_users, n);
+ if (k < 0) {
+ free(n);
+ return k;
+ }
+
+ k = hashmap_put(database_by_username, n, UID_TO_PTR(pw->pw_uid));
+ if (k < 0 && k != -EEXIST)
+ return k;
+
+ q = hashmap_put(database_by_uid, UID_TO_PTR(pw->pw_uid), n);
+ if (q < 0 && q != -EEXIST)
+ return q;
+ }
+ return r;
+}
+
+static int load_group_database(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *group_path;
+ struct group *gr;
+ int r;
+
+ group_path = prefix_roota(arg_root, "/etc/group");
+ f = fopen(group_path, "re");
+ if (!f)
+ return errno == ENOENT ? 0 : -errno;
+
+ r = hashmap_ensure_allocated(&database_by_groupname, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&database_by_gid, NULL);
+ if (r < 0)
+ return r;
+
+ r = set_ensure_allocated(&database_groups, NULL);
+ if (r < 0)
+ return r;
+
+ while ((r = fgetgrent_sane(f, &gr)) > 0) {
+ char *n;
+ int k, q;
+
+ n = strdup(gr->gr_name);
+ if (!n)
+ return -ENOMEM;
+
+ k = set_put(database_groups, n);
+ if (k < 0) {
+ free(n);
+ return k;
+ }
+
+ k = hashmap_put(database_by_groupname, n, GID_TO_PTR(gr->gr_gid));
+ if (k < 0 && k != -EEXIST)
+ return k;
+
+ q = hashmap_put(database_by_gid, GID_TO_PTR(gr->gr_gid), n);
+ if (q < 0 && q != -EEXIST)
+ return q;
+ }
+ return r;
+}
+
+static int make_backup(const char *target, const char *x) {
+ _cleanup_(unlink_and_freep) char *dst_tmp = NULL;
+ _cleanup_fclose_ FILE *dst = NULL;
+ _cleanup_close_ int src = -1;
+ const char *backup;
+ struct stat st;
+ int r;
+
+ assert(target);
+ assert(x);
+
+ src = open(x, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (src < 0) {
+ if (errno == ENOENT) /* No backup necessary... */
+ return 0;
+
+ return -errno;
+ }
+
+ if (fstat(src, &st) < 0)
+ return -errno;
+
+ r = fopen_temporary_label(
+ target, /* The path for which to the lookup the label */
+ x, /* Where we want the file actually to end up */
+ &dst,
+ &dst_tmp /* The temporary file we write to */);
+ if (r < 0)
+ return r;
+
+ r = copy_bytes(src, fileno(dst), (uint64_t) -1, COPY_REFLINK);
+ if (r < 0)
+ return r;
+
+ backup = strjoina(x, "-");
+
+ /* Copy over the access mask. Don't fail on chmod() or chown(). If it stays owned by us and/or
+ * unreadable by others, then it isn't too bad... */
+ r = fchmod_and_chown(fileno(dst), st.st_mode & 07777, st.st_uid, st.st_gid);
+ if (r < 0)
+ log_warning_errno(r, "Failed to change access mode or ownership of %s: %m", backup);
+
+ if (futimens(fileno(dst), (const struct timespec[2]) { st.st_atim, st.st_mtim }) < 0)
+ log_warning_errno(errno, "Failed to fix access and modification time of %s: %m", backup);
+
+ r = fsync_full(fileno(dst));
+ if (r < 0)
+ return r;
+
+ if (rename(dst_tmp, backup) < 0)
+ return errno;
+
+ dst_tmp = mfree(dst_tmp); /* disable the unlink_and_freep() hook now that the file has been renamed*/
+ return 0;
+}
+
+static int putgrent_with_members(const struct group *gr, FILE *group) {
+ char **a;
+
+ assert(gr);
+ assert(group);
+
+ a = ordered_hashmap_get(members, gr->gr_name);
+ if (a) {
+ _cleanup_strv_free_ char **l = NULL;
+ bool added = false;
+ char **i;
+
+ l = strv_copy(gr->gr_mem);
+ if (!l)
+ return -ENOMEM;
+
+ STRV_FOREACH(i, a) {
+ if (strv_find(l, *i))
+ continue;
+
+ if (strv_extend(&l, *i) < 0)
+ return -ENOMEM;
+
+ added = true;
+ }
+
+ if (added) {
+ struct group t;
+ int r;
+
+ strv_uniq(l);
+ strv_sort(l);
+
+ t = *gr;
+ t.gr_mem = l;
+
+ r = putgrent_sane(&t, group);
+ return r < 0 ? r : 1;
+ }
+ }
+
+ return putgrent_sane(gr, group);
+}
+
+#if ENABLE_GSHADOW
+static int putsgent_with_members(const struct sgrp *sg, FILE *gshadow) {
+ char **a;
+
+ assert(sg);
+ assert(gshadow);
+
+ a = ordered_hashmap_get(members, sg->sg_namp);
+ if (a) {
+ _cleanup_strv_free_ char **l = NULL;
+ bool added = false;
+ char **i;
+
+ l = strv_copy(sg->sg_mem);
+ if (!l)
+ return -ENOMEM;
+
+ STRV_FOREACH(i, a) {
+ if (strv_find(l, *i))
+ continue;
+
+ if (strv_extend(&l, *i) < 0)
+ return -ENOMEM;
+
+ added = true;
+ }
+
+ if (added) {
+ struct sgrp t;
+ int r;
+
+ strv_uniq(l);
+ strv_sort(l);
+
+ t = *sg;
+ t.sg_mem = l;
+
+ r = putsgent_sane(&t, gshadow);
+ return r < 0 ? r : 1;
+ }
+ }
+
+ return putsgent_sane(sg, gshadow);
+}
+#endif
+
+static const char* default_shell(uid_t uid) {
+ return uid == 0 ? "/bin/sh" : NOLOGIN;
+}
+
+static int write_temporary_passwd(const char *passwd_path, FILE **tmpfile, char **tmpfile_path) {
+ _cleanup_fclose_ FILE *original = NULL, *passwd = NULL;
+ _cleanup_(unlink_and_freep) char *passwd_tmp = NULL;
+ struct passwd *pw = NULL;
+ Item *i;
+ int r;
+
+ if (ordered_hashmap_size(todo_uids) == 0)
+ return 0;
+
+ r = fopen_temporary_label("/etc/passwd", passwd_path, &passwd, &passwd_tmp);
+ if (r < 0)
+ return r;
+
+ original = fopen(passwd_path, "re");
+ if (original) {
+
+ r = sync_rights(fileno(original), fileno(passwd));
+ if (r < 0)
+ return r;
+
+ while ((r = fgetpwent_sane(original, &pw)) > 0) {
+
+ i = ordered_hashmap_get(users, pw->pw_name);
+ if (i && i->todo_user)
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "%s: User \"%s\" already exists.",
+ passwd_path, pw->pw_name);
+
+ if (ordered_hashmap_contains(todo_uids, UID_TO_PTR(pw->pw_uid)))
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "%s: Detected collision for UID " UID_FMT ".",
+ passwd_path, pw->pw_uid);
+
+ /* Make sure we keep the NIS entries (if any) at the end. */
+ if (IN_SET(pw->pw_name[0], '+', '-'))
+ break;
+
+ r = putpwent_sane(pw, passwd);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ } else {
+ if (errno != ENOENT)
+ return -errno;
+ if (fchmod(fileno(passwd), 0644) < 0)
+ return -errno;
+ }
+
+ ORDERED_HASHMAP_FOREACH(i, todo_uids) {
+ struct passwd n = {
+ .pw_name = i->name,
+ .pw_uid = i->uid,
+ .pw_gid = i->gid,
+ .pw_gecos = i->description,
+
+ /* "x" means the password is stored in the shadow file */
+ .pw_passwd = (char*) "x",
+
+ /* We default to the root directory as home */
+ .pw_dir = i->home ?: (char*) "/",
+
+ /* Initialize the shell to nologin, with one exception:
+ * for root we patch in something special */
+ .pw_shell = i->shell ?: (char*) default_shell(i->uid),
+ };
+
+ r = putpwent_sane(&n, passwd);
+ if (r < 0)
+ return r;
+ }
+
+ /* Append the remaining NIS entries if any */
+ while (pw) {
+ r = putpwent_sane(pw, passwd);
+ if (r < 0)
+ return r;
+
+ r = fgetpwent_sane(original, &pw);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+
+ r = fflush_and_check(passwd);
+ if (r < 0)
+ return r;
+
+ *tmpfile = TAKE_PTR(passwd);
+ *tmpfile_path = TAKE_PTR(passwd_tmp);
+
+ return 0;
+}
+
+static int write_temporary_shadow(const char *shadow_path, FILE **tmpfile, char **tmpfile_path) {
+ _cleanup_fclose_ FILE *original = NULL, *shadow = NULL;
+ _cleanup_(unlink_and_freep) char *shadow_tmp = NULL;
+ struct spwd *sp = NULL;
+ long lstchg;
+ Item *i;
+ int r;
+
+ if (ordered_hashmap_size(todo_uids) == 0)
+ return 0;
+
+ r = fopen_temporary_label("/etc/shadow", shadow_path, &shadow, &shadow_tmp);
+ if (r < 0)
+ return r;
+
+ lstchg = (long) (now(CLOCK_REALTIME) / USEC_PER_DAY);
+
+ original = fopen(shadow_path, "re");
+ if (original) {
+
+ r = sync_rights(fileno(original), fileno(shadow));
+ if (r < 0)
+ return r;
+
+ while ((r = fgetspent_sane(original, &sp)) > 0) {
+
+ i = ordered_hashmap_get(users, sp->sp_namp);
+ if (i && i->todo_user) {
+ /* we will update the existing entry */
+ sp->sp_lstchg = lstchg;
+
+ /* only the /etc/shadow stage is left, so we can
+ * safely remove the item from the todo set */
+ i->todo_user = false;
+ ordered_hashmap_remove(todo_uids, UID_TO_PTR(i->uid));
+ }
+
+ /* Make sure we keep the NIS entries (if any) at the end. */
+ if (IN_SET(sp->sp_namp[0], '+', '-'))
+ break;
+
+ r = putspent_sane(sp, shadow);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ } else {
+ if (errno != ENOENT)
+ return -errno;
+ if (fchmod(fileno(shadow), 0000) < 0)
+ return -errno;
+ }
+
+ ORDERED_HASHMAP_FOREACH(i, todo_uids) {
+ struct spwd n = {
+ .sp_namp = i->name,
+ .sp_pwdp = (char*) "!*", /* lock this password, and make it invalid */
+ .sp_lstchg = lstchg,
+ .sp_min = -1,
+ .sp_max = -1,
+ .sp_warn = -1,
+ .sp_inact = -1,
+ .sp_expire = -1,
+ .sp_flag = (unsigned long) -1, /* this appears to be what everybody does ... */
+ };
+
+ r = putspent_sane(&n, shadow);
+ if (r < 0)
+ return r;
+ }
+
+ /* Append the remaining NIS entries if any */
+ while (sp) {
+ r = putspent_sane(sp, shadow);
+ if (r < 0)
+ return r;
+
+ r = fgetspent_sane(original, &sp);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+ if (!IN_SET(errno, 0, ENOENT))
+ return -errno;
+
+ r = fflush_sync_and_check(shadow);
+ if (r < 0)
+ return r;
+
+ *tmpfile = TAKE_PTR(shadow);
+ *tmpfile_path = TAKE_PTR(shadow_tmp);
+
+ return 0;
+}
+
+static int write_temporary_group(const char *group_path, FILE **tmpfile, char **tmpfile_path) {
+ _cleanup_fclose_ FILE *original = NULL, *group = NULL;
+ _cleanup_(unlink_and_freep) char *group_tmp = NULL;
+ bool group_changed = false;
+ struct group *gr = NULL;
+ Item *i;
+ int r;
+
+ if (ordered_hashmap_size(todo_gids) == 0 && ordered_hashmap_size(members) == 0)
+ return 0;
+
+ r = fopen_temporary_label("/etc/group", group_path, &group, &group_tmp);
+ if (r < 0)
+ return r;
+
+ original = fopen(group_path, "re");
+ if (original) {
+
+ r = sync_rights(fileno(original), fileno(group));
+ if (r < 0)
+ return r;
+
+ while ((r = fgetgrent_sane(original, &gr)) > 0) {
+ /* Safety checks against name and GID collisions. Normally,
+ * this should be unnecessary, but given that we look at the
+ * entries anyway here, let's make an extra verification
+ * step that we don't generate duplicate entries. */
+
+ i = ordered_hashmap_get(groups, gr->gr_name);
+ if (i && i->todo_group)
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "%s: Group \"%s\" already exists.",
+ group_path, gr->gr_name);
+
+ if (ordered_hashmap_contains(todo_gids, GID_TO_PTR(gr->gr_gid)))
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "%s: Detected collision for GID " GID_FMT ".",
+ group_path, gr->gr_gid);
+
+ /* Make sure we keep the NIS entries (if any) at the end. */
+ if (IN_SET(gr->gr_name[0], '+', '-'))
+ break;
+
+ r = putgrent_with_members(gr, group);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ group_changed = true;
+ }
+ if (r < 0)
+ return r;
+
+ } else {
+ if (errno != ENOENT)
+ return -errno;
+ if (fchmod(fileno(group), 0644) < 0)
+ return -errno;
+ }
+
+ ORDERED_HASHMAP_FOREACH(i, todo_gids) {
+ struct group n = {
+ .gr_name = i->name,
+ .gr_gid = i->gid,
+ .gr_passwd = (char*) "x",
+ };
+
+ r = putgrent_with_members(&n, group);
+ if (r < 0)
+ return r;
+
+ group_changed = true;
+ }
+
+ /* Append the remaining NIS entries if any */
+ while (gr) {
+ r = putgrent_sane(gr, group);
+ if (r < 0)
+ return r;
+
+ r = fgetgrent_sane(original, &gr);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+
+ r = fflush_sync_and_check(group);
+ if (r < 0)
+ return r;
+
+ if (group_changed) {
+ *tmpfile = TAKE_PTR(group);
+ *tmpfile_path = TAKE_PTR(group_tmp);
+ }
+ return 0;
+}
+
+static int write_temporary_gshadow(const char * gshadow_path, FILE **tmpfile, char **tmpfile_path) {
+#if ENABLE_GSHADOW
+ _cleanup_fclose_ FILE *original = NULL, *gshadow = NULL;
+ _cleanup_(unlink_and_freep) char *gshadow_tmp = NULL;
+ bool group_changed = false;
+ Item *i;
+ int r;
+
+ if (ordered_hashmap_size(todo_gids) == 0 && ordered_hashmap_size(members) == 0)
+ return 0;
+
+ r = fopen_temporary_label("/etc/gshadow", gshadow_path, &gshadow, &gshadow_tmp);
+ if (r < 0)
+ return r;
+
+ original = fopen(gshadow_path, "re");
+ if (original) {
+ struct sgrp *sg;
+
+ r = sync_rights(fileno(original), fileno(gshadow));
+ if (r < 0)
+ return r;
+
+ while ((r = fgetsgent_sane(original, &sg)) > 0) {
+
+ i = ordered_hashmap_get(groups, sg->sg_namp);
+ if (i && i->todo_group)
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "%s: Group \"%s\" already exists.",
+ gshadow_path, sg->sg_namp);
+
+ r = putsgent_with_members(sg, gshadow);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ group_changed = true;
+ }
+ if (r < 0)
+ return r;
+
+ } else {
+ if (errno != ENOENT)
+ return -errno;
+ if (fchmod(fileno(gshadow), 0000) < 0)
+ return -errno;
+ }
+
+ ORDERED_HASHMAP_FOREACH(i, todo_gids) {
+ struct sgrp n = {
+ .sg_namp = i->name,
+ .sg_passwd = (char*) "!*",
+ };
+
+ r = putsgent_with_members(&n, gshadow);
+ if (r < 0)
+ return r;
+
+ group_changed = true;
+ }
+
+ r = fflush_sync_and_check(gshadow);
+ if (r < 0)
+ return r;
+
+ if (group_changed) {
+ *tmpfile = TAKE_PTR(gshadow);
+ *tmpfile_path = TAKE_PTR(gshadow_tmp);
+ }
+ return 0;
+#else
+ return 0;
+#endif
+}
+
+static int write_files(void) {
+ _cleanup_fclose_ FILE *passwd = NULL, *group = NULL, *shadow = NULL, *gshadow = NULL;
+ _cleanup_(unlink_and_freep) char *passwd_tmp = NULL, *group_tmp = NULL, *shadow_tmp = NULL, *gshadow_tmp = NULL;
+ const char *passwd_path = NULL, *group_path = NULL, *shadow_path = NULL, *gshadow_path = NULL;
+ int r;
+
+ passwd_path = prefix_roota(arg_root, "/etc/passwd");
+ shadow_path = prefix_roota(arg_root, "/etc/shadow");
+ group_path = prefix_roota(arg_root, "/etc/group");
+ gshadow_path = prefix_roota(arg_root, "/etc/gshadow");
+
+ r = write_temporary_group(group_path, &group, &group_tmp);
+ if (r < 0)
+ return r;
+
+ r = write_temporary_gshadow(gshadow_path, &gshadow, &gshadow_tmp);
+ if (r < 0)
+ return r;
+
+ r = write_temporary_passwd(passwd_path, &passwd, &passwd_tmp);
+ if (r < 0)
+ return r;
+
+ r = write_temporary_shadow(shadow_path, &shadow, &shadow_tmp);
+ if (r < 0)
+ return r;
+
+ /* Make a backup of the old files */
+ if (group) {
+ r = make_backup("/etc/group", group_path);
+ if (r < 0)
+ return r;
+ }
+ if (gshadow) {
+ r = make_backup("/etc/gshadow", gshadow_path);
+ if (r < 0)
+ return r;
+ }
+
+ if (passwd) {
+ r = make_backup("/etc/passwd", passwd_path);
+ if (r < 0)
+ return r;
+ }
+ if (shadow) {
+ r = make_backup("/etc/shadow", shadow_path);
+ if (r < 0)
+ return r;
+ }
+
+ /* And make the new files count */
+ if (group) {
+ r = rename_and_apply_smack_floor_label(group_tmp, group_path);
+ if (r < 0)
+ return r;
+
+ group_tmp = mfree(group_tmp);
+
+ if (!arg_root && !arg_image)
+ (void) nscd_flush_cache(STRV_MAKE("group"));
+ }
+ if (gshadow) {
+ r = rename_and_apply_smack_floor_label(gshadow_tmp, gshadow_path);
+ if (r < 0)
+ return r;
+
+ gshadow_tmp = mfree(gshadow_tmp);
+ }
+
+ if (passwd) {
+ r = rename_and_apply_smack_floor_label(passwd_tmp, passwd_path);
+ if (r < 0)
+ return r;
+
+ passwd_tmp = mfree(passwd_tmp);
+
+ if (!arg_root && !arg_image)
+ (void) nscd_flush_cache(STRV_MAKE("passwd"));
+ }
+ if (shadow) {
+ r = rename_and_apply_smack_floor_label(shadow_tmp, shadow_path);
+ if (r < 0)
+ return r;
+
+ shadow_tmp = mfree(shadow_tmp);
+ }
+
+ return 0;
+}
+
+static int uid_is_ok(uid_t uid, const char *name, bool check_with_gid) {
+ struct passwd *p;
+ struct group *g;
+ const char *n;
+ Item *i;
+
+ /* Let's see if we already have assigned the UID a second time */
+ if (ordered_hashmap_get(todo_uids, UID_TO_PTR(uid)))
+ return 0;
+
+ /* Try to avoid using uids that are already used by a group
+ * that doesn't have the same name as our new user. */
+ if (check_with_gid) {
+ i = ordered_hashmap_get(todo_gids, GID_TO_PTR(uid));
+ if (i && !streq(i->name, name))
+ return 0;
+ }
+
+ /* Let's check the files directly */
+ if (hashmap_contains(database_by_uid, UID_TO_PTR(uid)))
+ return 0;
+
+ if (check_with_gid) {
+ n = hashmap_get(database_by_gid, GID_TO_PTR(uid));
+ if (n && !streq(n, name))
+ return 0;
+ }
+
+ /* Let's also check via NSS, to avoid UID clashes over LDAP and such, just in case */
+ if (!arg_root) {
+ errno = 0;
+ p = getpwuid(uid);
+ if (p)
+ return 0;
+ if (!IN_SET(errno, 0, ENOENT))
+ return -errno;
+
+ if (check_with_gid) {
+ errno = 0;
+ g = getgrgid((gid_t) uid);
+ if (g) {
+ if (!streq(g->gr_name, name))
+ return 0;
+ } else if (!IN_SET(errno, 0, ENOENT))
+ return -errno;
+ }
+ }
+
+ return 1;
+}
+
+static int root_stat(const char *p, struct stat *st) {
+ const char *fix;
+
+ fix = prefix_roota(arg_root, p);
+ if (stat(fix, st) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int read_id_from_file(Item *i, uid_t *_uid, gid_t *_gid) {
+ struct stat st;
+ bool found_uid = false, found_gid = false;
+ uid_t uid = 0;
+ gid_t gid = 0;
+
+ assert(i);
+
+ /* First, try to get the gid directly */
+ if (_gid && i->gid_path && root_stat(i->gid_path, &st) >= 0) {
+ gid = st.st_gid;
+ found_gid = true;
+ }
+
+ /* Then, try to get the uid directly */
+ if ((_uid || (_gid && !found_gid))
+ && i->uid_path
+ && root_stat(i->uid_path, &st) >= 0) {
+
+ uid = st.st_uid;
+ found_uid = true;
+
+ /* If we need the gid, but had no success yet, also derive it from the uid path */
+ if (_gid && !found_gid) {
+ gid = st.st_gid;
+ found_gid = true;
+ }
+ }
+
+ /* If that didn't work yet, then let's reuse the gid as uid */
+ if (_uid && !found_uid && i->gid_path) {
+
+ if (found_gid) {
+ uid = (uid_t) gid;
+ found_uid = true;
+ } else if (root_stat(i->gid_path, &st) >= 0) {
+ uid = (uid_t) st.st_gid;
+ found_uid = true;
+ }
+ }
+
+ if (_uid) {
+ if (!found_uid)
+ return 0;
+
+ *_uid = uid;
+ }
+
+ if (_gid) {
+ if (!found_gid)
+ return 0;
+
+ *_gid = gid;
+ }
+
+ return 1;
+}
+
+static int add_user(Item *i) {
+ void *z;
+ int r;
+
+ assert(i);
+
+ /* Check the database directly */
+ z = hashmap_get(database_by_username, i->name);
+ if (z) {
+ log_debug("User %s already exists.", i->name);
+ i->uid = PTR_TO_UID(z);
+ i->uid_set = true;
+ return 0;
+ }
+
+ if (!arg_root) {
+ struct passwd *p;
+
+ /* Also check NSS */
+ errno = 0;
+ p = getpwnam(i->name);
+ if (p) {
+ log_debug("User %s already exists.", i->name);
+ i->uid = p->pw_uid;
+ i->uid_set = true;
+
+ r = free_and_strdup(&i->description, p->pw_gecos);
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+ }
+ if (!errno_is_not_exists(errno))
+ return log_error_errno(errno, "Failed to check if user %s already exists: %m", i->name);
+ }
+
+ /* Try to use the suggested numeric uid */
+ if (i->uid_set) {
+ r = uid_is_ok(i->uid, i->name, !i->id_set_strict);
+ if (r < 0)
+ return log_error_errno(r, "Failed to verify uid " UID_FMT ": %m", i->uid);
+ if (r == 0) {
+ log_debug("Suggested user ID " UID_FMT " for %s already used.", i->uid, i->name);
+ i->uid_set = false;
+ }
+ }
+
+ /* If that didn't work, try to read it from the specified path */
+ if (!i->uid_set) {
+ uid_t c;
+
+ if (read_id_from_file(i, &c, NULL) > 0) {
+
+ if (c <= 0 || !uid_range_contains(uid_range, n_uid_range, c))
+ log_debug("User ID " UID_FMT " of file not suitable for %s.", c, i->name);
+ else {
+ r = uid_is_ok(c, i->name, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to verify uid " UID_FMT ": %m", i->uid);
+ else if (r > 0) {
+ i->uid = c;
+ i->uid_set = true;
+ } else
+ log_debug("User ID " UID_FMT " of file for %s is already used.", c, i->name);
+ }
+ }
+ }
+
+ /* Otherwise, try to reuse the group ID */
+ if (!i->uid_set && i->gid_set) {
+ r = uid_is_ok((uid_t) i->gid, i->name, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to verify uid " UID_FMT ": %m", i->uid);
+ if (r > 0) {
+ i->uid = (uid_t) i->gid;
+ i->uid_set = true;
+ }
+ }
+
+ /* And if that didn't work either, let's try to find a free one */
+ if (!i->uid_set) {
+ maybe_emit_login_defs_warning();
+
+ for (;;) {
+ r = uid_range_next_lower(uid_range, n_uid_range, &search_uid);
+ if (r < 0)
+ return log_error_errno(r, "No free user ID available for %s.", i->name);
+
+ r = uid_is_ok(search_uid, i->name, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to verify uid " UID_FMT ": %m", i->uid);
+ else if (r > 0)
+ break;
+ }
+
+ i->uid_set = true;
+ i->uid = search_uid;
+ }
+
+ r = ordered_hashmap_ensure_allocated(&todo_uids, NULL);
+ if (r < 0)
+ return log_oom();
+
+ r = ordered_hashmap_put(todo_uids, UID_TO_PTR(i->uid), i);
+ if (r < 0)
+ return log_oom();
+
+ i->todo_user = true;
+ log_info("Creating user %s (%s) with uid " UID_FMT " and gid " GID_FMT ".", i->name, strna(i->description), i->uid, i->gid);
+
+ return 0;
+}
+
+static int gid_is_ok(gid_t gid) {
+ struct group *g;
+ struct passwd *p;
+
+ if (ordered_hashmap_get(todo_gids, GID_TO_PTR(gid)))
+ return 0;
+
+ /* Avoid reusing gids that are already used by a different user */
+ if (ordered_hashmap_get(todo_uids, UID_TO_PTR(gid)))
+ return 0;
+
+ if (hashmap_contains(database_by_gid, GID_TO_PTR(gid)))
+ return 0;
+
+ if (hashmap_contains(database_by_uid, UID_TO_PTR(gid)))
+ return 0;
+
+ if (!arg_root) {
+ errno = 0;
+ g = getgrgid(gid);
+ if (g)
+ return 0;
+ if (!IN_SET(errno, 0, ENOENT))
+ return -errno;
+
+ errno = 0;
+ p = getpwuid((uid_t) gid);
+ if (p)
+ return 0;
+ if (!IN_SET(errno, 0, ENOENT))
+ return -errno;
+ }
+
+ return 1;
+}
+
+static int get_gid_by_name(const char *name, gid_t *gid) {
+ void *z;
+
+ assert(gid);
+
+ /* Check the database directly */
+ z = hashmap_get(database_by_groupname, name);
+ if (z) {
+ *gid = PTR_TO_GID(z);
+ return 0;
+ }
+
+ /* Also check NSS */
+ if (!arg_root) {
+ struct group *g;
+
+ errno = 0;
+ g = getgrnam(name);
+ if (g) {
+ *gid = g->gr_gid;
+ return 0;
+ }
+ if (!errno_is_not_exists(errno))
+ return log_error_errno(errno, "Failed to check if group %s already exists: %m", name);
+ }
+
+ return -ENOENT;
+}
+
+static int add_group(Item *i) {
+ int r;
+
+ assert(i);
+
+ r = get_gid_by_name(i->name, &i->gid);
+ if (r != -ENOENT) {
+ if (r < 0)
+ return r;
+ log_debug("Group %s already exists.", i->name);
+ i->gid_set = true;
+ return 0;
+ }
+
+ /* Try to use the suggested numeric gid */
+ if (i->gid_set) {
+ r = gid_is_ok(i->gid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to verify gid " GID_FMT ": %m", i->gid);
+ if (i->id_set_strict) {
+ /* If we require the gid to already exist we can return here:
+ * r > 0: means the gid does not exist -> fail
+ * r == 0: means the gid exists -> nothing more to do.
+ */
+ if (r > 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to create %s: please create GID %d",
+ i->name, i->gid);
+ if (r == 0)
+ return 0;
+ }
+ if (r == 0) {
+ log_debug("Suggested group ID " GID_FMT " for %s already used.", i->gid, i->name);
+ i->gid_set = false;
+ }
+ }
+
+ /* Try to reuse the numeric uid, if there's one */
+ if (!i->gid_set && i->uid_set) {
+ r = gid_is_ok((gid_t) i->uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to verify gid " GID_FMT ": %m", i->gid);
+ if (r > 0) {
+ i->gid = (gid_t) i->uid;
+ i->gid_set = true;
+ }
+ }
+
+ /* If that didn't work, try to read it from the specified path */
+ if (!i->gid_set) {
+ gid_t c;
+
+ if (read_id_from_file(i, NULL, &c) > 0) {
+
+ if (c <= 0 || !uid_range_contains(uid_range, n_uid_range, c))
+ log_debug("Group ID " GID_FMT " of file not suitable for %s.", c, i->name);
+ else {
+ r = gid_is_ok(c);
+ if (r < 0)
+ return log_error_errno(r, "Failed to verify gid " GID_FMT ": %m", i->gid);
+ else if (r > 0) {
+ i->gid = c;
+ i->gid_set = true;
+ } else
+ log_debug("Group ID " GID_FMT " of file for %s already used.", c, i->name);
+ }
+ }
+ }
+
+ /* And if that didn't work either, let's try to find a free one */
+ if (!i->gid_set) {
+ maybe_emit_login_defs_warning();
+
+ for (;;) {
+ /* We look for new GIDs in the UID pool! */
+ r = uid_range_next_lower(uid_range, n_uid_range, &search_uid);
+ if (r < 0)
+ return log_error_errno(r, "No free group ID available for %s.", i->name);
+
+ r = gid_is_ok(search_uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to verify gid " GID_FMT ": %m", i->gid);
+ else if (r > 0)
+ break;
+ }
+
+ i->gid_set = true;
+ i->gid = search_uid;
+ }
+
+ r = ordered_hashmap_ensure_allocated(&todo_gids, NULL);
+ if (r < 0)
+ return log_oom();
+
+ r = ordered_hashmap_put(todo_gids, GID_TO_PTR(i->gid), i);
+ if (r < 0)
+ return log_oom();
+
+ i->todo_group = true;
+ log_info("Creating group %s with gid " GID_FMT ".", i->name, i->gid);
+
+ return 0;
+}
+
+static int process_item(Item *i) {
+ int r;
+
+ assert(i);
+
+ switch (i->type) {
+
+ case ADD_USER: {
+ Item *j;
+
+ j = ordered_hashmap_get(groups, i->group_name ?: i->name);
+ if (j && j->todo_group) {
+ /* When a group with the target name is already in queue,
+ * use the information about the group and do not create
+ * duplicated group entry. */
+ i->gid_set = j->gid_set;
+ i->gid = j->gid;
+ i->id_set_strict = true;
+ } else if (i->group_name) {
+ /* When a group name was given instead of a GID and it's
+ * not in queue, then it must already exist. */
+ r = get_gid_by_name(i->group_name, &i->gid);
+ if (r < 0)
+ return log_error_errno(r, "Group %s not found.", i->group_name);
+ i->gid_set = true;
+ i->id_set_strict = true;
+ } else {
+ r = add_group(i);
+ if (r < 0)
+ return r;
+ }
+
+ return add_user(i);
+ }
+
+ case ADD_GROUP:
+ return add_group(i);
+
+ default:
+ assert_not_reached("Unknown item type");
+ }
+}
+
+static Item* item_free(Item *i) {
+ if (!i)
+ return NULL;
+
+ free(i->name);
+ free(i->group_name);
+ free(i->uid_path);
+ free(i->gid_path);
+ free(i->description);
+ free(i->home);
+ free(i->shell);
+ return mfree(i);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Item*, item_free);
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(item_hash_ops, char, string_hash_func, string_compare_func, Item, item_free);
+
+static int add_implicit(void) {
+ char *g, **l;
+ int r;
+
+ /* Implicitly create additional users and groups, if they were listed in "m" lines */
+ ORDERED_HASHMAP_FOREACH_KEY(l, g, members) {
+ char **m;
+
+ STRV_FOREACH(m, l)
+ if (!ordered_hashmap_get(users, *m)) {
+ _cleanup_(item_freep) Item *j = NULL;
+
+ r = ordered_hashmap_ensure_allocated(&users, &item_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ j = new0(Item, 1);
+ if (!j)
+ return log_oom();
+
+ j->type = ADD_USER;
+ j->name = strdup(*m);
+ if (!j->name)
+ return log_oom();
+
+ r = ordered_hashmap_put(users, j->name, j);
+ if (r < 0)
+ return log_oom();
+
+ log_debug("Adding implicit user '%s' due to m line", j->name);
+ j = NULL;
+ }
+
+ if (!(ordered_hashmap_get(users, g) ||
+ ordered_hashmap_get(groups, g))) {
+ _cleanup_(item_freep) Item *j = NULL;
+
+ r = ordered_hashmap_ensure_allocated(&groups, &item_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ j = new0(Item, 1);
+ if (!j)
+ return log_oom();
+
+ j->type = ADD_GROUP;
+ j->name = strdup(g);
+ if (!j->name)
+ return log_oom();
+
+ r = ordered_hashmap_put(groups, j->name, j);
+ if (r < 0)
+ return log_oom();
+
+ log_debug("Adding implicit group '%s' due to m line", j->name);
+ j = NULL;
+ }
+ }
+
+ return 0;
+}
+
+static bool item_equal(Item *a, Item *b) {
+ assert(a);
+ assert(b);
+
+ if (a->type != b->type)
+ return false;
+
+ if (!streq_ptr(a->name, b->name))
+ return false;
+
+ if (!streq_ptr(a->uid_path, b->uid_path))
+ return false;
+
+ if (!streq_ptr(a->gid_path, b->gid_path))
+ return false;
+
+ if (!streq_ptr(a->description, b->description))
+ return false;
+
+ if (a->uid_set != b->uid_set)
+ return false;
+
+ if (a->uid_set && a->uid != b->uid)
+ return false;
+
+ if (a->gid_set != b->gid_set)
+ return false;
+
+ if (a->gid_set && a->gid != b->gid)
+ return false;
+
+ if (!streq_ptr(a->home, b->home))
+ return false;
+
+ if (!streq_ptr(a->shell, b->shell))
+ return false;
+
+ return true;
+}
+
+static int parse_line(const char *fname, unsigned line, const char *buffer) {
+
+ static const Specifier specifier_table[] = {
+ COMMON_SYSTEM_SPECIFIERS,
+ COMMON_TMP_SPECIFIERS,
+ {}
+ };
+
+ _cleanup_free_ char *action = NULL,
+ *name = NULL, *resolved_name = NULL,
+ *id = NULL, *resolved_id = NULL,
+ *description = NULL, *resolved_description = NULL,
+ *home = NULL, *resolved_home = NULL,
+ *shell = NULL, *resolved_shell = NULL;
+ _cleanup_(item_freep) Item *i = NULL;
+ Item *existing;
+ OrderedHashmap *h;
+ int r;
+ const char *p;
+
+ assert(fname);
+ assert(line >= 1);
+ assert(buffer);
+
+ /* Parse columns */
+ p = buffer;
+ r = extract_many_words(&p, NULL, EXTRACT_UNQUOTE,
+ &action, &name, &id, &description, &home, &shell, NULL);
+ if (r < 0)
+ return log_error_errno(r, "[%s:%u] Syntax error.", fname, line);
+ if (r < 2)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] Missing action and name columns.", fname, line);
+ if (!isempty(p))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] Trailing garbage.", fname, line);
+
+ /* Verify action */
+ if (strlen(action) != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] Unknown modifier '%s'", fname, line, action);
+
+ if (!IN_SET(action[0], ADD_USER, ADD_GROUP, ADD_MEMBER, ADD_RANGE))
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "[%s:%u] Unknown command type '%c'.", fname, line, action[0]);
+
+ /* Verify name */
+ if (empty_or_dash(name))
+ name = mfree(name);
+
+ if (name) {
+ r = specifier_printf(name, specifier_table, NULL, &resolved_name);
+ if (r < 0)
+ return log_error_errno(r, "[%s:%u] Failed to replace specifiers in '%s': %m", fname, line, name);
+
+ if (!valid_user_group_name(resolved_name, 0))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] '%s' is not a valid user or group name.",
+ fname, line, resolved_name);
+ }
+
+ /* Verify id */
+ if (empty_or_dash(id))
+ id = mfree(id);
+
+ if (id) {
+ r = specifier_printf(id, specifier_table, NULL, &resolved_id);
+ if (r < 0)
+ return log_error_errno(r, "[%s:%u] Failed to replace specifiers in '%s': %m",
+ fname, line, name);
+ }
+
+ /* Verify description */
+ if (empty_or_dash(description))
+ description = mfree(description);
+
+ if (description) {
+ r = specifier_printf(description, specifier_table, NULL, &resolved_description);
+ if (r < 0)
+ return log_error_errno(r, "[%s:%u] Failed to replace specifiers in '%s': %m",
+ fname, line, description);
+
+ if (!valid_gecos(resolved_description))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] '%s' is not a valid GECOS field.",
+ fname, line, resolved_description);
+ }
+
+ /* Verify home */
+ if (empty_or_dash(home))
+ home = mfree(home);
+
+ if (home) {
+ r = specifier_printf(home, specifier_table, NULL, &resolved_home);
+ if (r < 0)
+ return log_error_errno(r, "[%s:%u] Failed to replace specifiers in '%s': %m",
+ fname, line, home);
+
+ if (!valid_home(resolved_home))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] '%s' is not a valid home directory field.",
+ fname, line, resolved_home);
+ }
+
+ /* Verify shell */
+ if (empty_or_dash(shell))
+ shell = mfree(shell);
+
+ if (shell) {
+ r = specifier_printf(shell, specifier_table, NULL, &resolved_shell);
+ if (r < 0)
+ return log_error_errno(r, "[%s:%u] Failed to replace specifiers in '%s': %m",
+ fname, line, shell);
+
+ if (!valid_shell(resolved_shell))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] '%s' is not a valid login shell field.",
+ fname, line, resolved_shell);
+ }
+
+ switch (action[0]) {
+
+ case ADD_RANGE:
+ if (resolved_name)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] Lines of type 'r' don't take a name field.",
+ fname, line);
+
+ if (!resolved_id)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] Lines of type 'r' require a ID range in the third field.",
+ fname, line);
+
+ if (description || home || shell)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] Lines of type '%c' don't take a %s field.",
+ fname, line, action[0],
+ description ? "GECOS" : home ? "home directory" : "login shell");
+
+ r = uid_range_add_str(&uid_range, &n_uid_range, resolved_id);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] Invalid UID range %s.", fname, line, resolved_id);
+
+ return 0;
+
+ case ADD_MEMBER: {
+ /* Try to extend an existing member or group item */
+ if (!name)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] Lines of type 'm' require a user name in the second field.",
+ fname, line);
+
+ if (!resolved_id)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] Lines of type 'm' require a group name in the third field.",
+ fname, line);
+
+ if (!valid_user_group_name(resolved_id, 0))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] '%s' is not a valid user or group name.",
+ fname, line, resolved_id);
+
+ if (description || home || shell)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] Lines of type '%c' don't take a %s field.",
+ fname, line, action[0],
+ description ? "GECOS" : home ? "home directory" : "login shell");
+
+ r = string_strv_ordered_hashmap_put(&members, resolved_id, resolved_name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to store mapping for %s: %m", resolved_id);
+
+ return 0;
+ }
+
+ case ADD_USER:
+ if (!name)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] Lines of type 'u' require a user name in the second field.",
+ fname, line);
+
+ r = ordered_hashmap_ensure_allocated(&users, &item_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ i = new0(Item, 1);
+ if (!i)
+ return log_oom();
+
+ if (resolved_id) {
+ if (path_is_absolute(resolved_id)) {
+ i->uid_path = TAKE_PTR(resolved_id);
+ path_simplify(i->uid_path, false);
+ } else {
+ _cleanup_free_ char *uid = NULL, *gid = NULL;
+ if (split_pair(resolved_id, ":", &uid, &gid) == 0) {
+ r = parse_gid(gid, &i->gid);
+ if (r < 0) {
+ if (valid_user_group_name(gid, 0))
+ i->group_name = TAKE_PTR(gid);
+ else
+ return log_error_errno(r, "Failed to parse GID: '%s': %m", id);
+ } else {
+ i->gid_set = true;
+ i->id_set_strict = true;
+ }
+ free_and_replace(resolved_id, uid);
+ }
+ if (!streq(resolved_id, "-")) {
+ r = parse_uid(resolved_id, &i->uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse UID: '%s': %m", id);
+ i->uid_set = true;
+ }
+ }
+ }
+
+ i->description = TAKE_PTR(resolved_description);
+ i->home = TAKE_PTR(resolved_home);
+ i->shell = TAKE_PTR(resolved_shell);
+
+ h = users;
+ break;
+
+ case ADD_GROUP:
+ if (!name)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] Lines of type 'g' require a user name in the second field.",
+ fname, line);
+
+ if (description || home || shell)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] Lines of type '%c' don't take a %s field.",
+ fname, line, action[0],
+ description ? "GECOS" : home ? "home directory" : "login shell");
+
+ r = ordered_hashmap_ensure_allocated(&groups, &item_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ i = new0(Item, 1);
+ if (!i)
+ return log_oom();
+
+ if (resolved_id) {
+ if (path_is_absolute(resolved_id)) {
+ i->gid_path = TAKE_PTR(resolved_id);
+ path_simplify(i->gid_path, false);
+ } else {
+ r = parse_gid(resolved_id, &i->gid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse GID: '%s': %m", id);
+
+ i->gid_set = true;
+ }
+ }
+
+ h = groups;
+ break;
+
+ default:
+ return -EBADMSG;
+ }
+
+ i->type = action[0];
+ i->name = TAKE_PTR(resolved_name);
+
+ existing = ordered_hashmap_get(h, i->name);
+ if (existing) {
+ /* Two identical items are fine */
+ if (!item_equal(existing, i))
+ log_warning("Two or more conflicting lines for %s configured, ignoring.", i->name);
+
+ return 0;
+ }
+
+ r = ordered_hashmap_put(h, i->name, i);
+ if (r < 0)
+ return log_oom();
+
+ i = NULL;
+ return 0;
+}
+
+static int read_config_file(const char *fn, bool ignore_enoent) {
+ _cleanup_fclose_ FILE *rf = NULL;
+ FILE *f = NULL;
+ unsigned v = 0;
+ int r = 0;
+
+ assert(fn);
+
+ if (streq(fn, "-"))
+ f = stdin;
+ else {
+ r = search_and_fopen(fn, "re", arg_root, (const char**) CONF_PATHS_STRV("sysusers.d"), &rf);
+ if (r < 0) {
+ if (ignore_enoent && r == -ENOENT)
+ return 0;
+
+ return log_error_errno(r, "Failed to open '%s', ignoring: %m", fn);
+ }
+
+ f = rf;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *l;
+ int k;
+
+ k = read_line(f, LONG_LINE_MAX, &line);
+ if (k < 0)
+ return log_error_errno(k, "Failed to read '%s': %m", fn);
+ if (k == 0)
+ break;
+
+ v++;
+
+ l = strstrip(line);
+ if (IN_SET(*l, 0, '#'))
+ continue;
+
+ k = parse_line(fn, v, l);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ if (ferror(f)) {
+ log_error_errno(errno, "Failed to read from file %s: %m", fn);
+ if (r == 0)
+ r = -EIO;
+ }
+
+ return r;
+}
+
+static int cat_config(void) {
+ _cleanup_strv_free_ char **files = NULL;
+ int r;
+
+ r = conf_files_list_with_replacement(arg_root, CONF_PATHS_STRV("sysusers.d"), arg_replace, &files, NULL);
+ if (r < 0)
+ return r;
+
+ (void) pager_open(arg_pager_flags);
+
+ return cat_files(NULL, files, 0);
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-sysusers.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [CONFIGURATION FILE...]\n\n"
+ "Creates system user accounts.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --cat-config Show configuration files\n"
+ " --root=PATH Operate on an alternate filesystem root\n"
+ " --image=PATH Operate on disk image as filesystem root\n"
+ " --replace=PATH Treat arguments as replacement for PATH\n"
+ " --inline Treat arguments as configuration lines\n"
+ " --no-pager Do not pipe output into a pager\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_CAT_CONFIG,
+ ARG_ROOT,
+ ARG_IMAGE,
+ ARG_REPLACE,
+ ARG_INLINE,
+ ARG_NO_PAGER,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "cat-config", no_argument, NULL, ARG_CAT_CONFIG },
+ { "root", required_argument, NULL, ARG_ROOT },
+ { "image", required_argument, NULL, ARG_IMAGE },
+ { "replace", required_argument, NULL, ARG_REPLACE },
+ { "inline", no_argument, NULL, ARG_INLINE },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_CAT_CONFIG:
+ arg_cat_config = true;
+ break;
+
+ case ARG_ROOT:
+ r = parse_path_argument_and_warn(optarg, /* suppress_root= */ false, &arg_root);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_IMAGE:
+#ifdef STANDALONE
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "This systemd-sysusers version is compiled without support for --image=.");
+#else
+ r = parse_path_argument_and_warn(optarg, /* suppress_root= */ false, &arg_image);
+ if (r < 0)
+ return r;
+ break;
+#endif
+
+ case ARG_REPLACE:
+ if (!path_is_absolute(optarg) ||
+ !endswith(optarg, ".conf"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "The argument to --replace= must an absolute path to a config file");
+
+ arg_replace = optarg;
+ break;
+
+ case ARG_INLINE:
+ arg_inline = true;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_replace && arg_cat_config)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --replace= is not supported with --cat-config");
+
+ if (arg_replace && optind >= argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "When --replace= is given, some configuration items must be specified");
+
+ if (arg_image && arg_root)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Please specify either --root= or --image=, the combination of both is not supported.");
+
+ return 1;
+}
+
+static int parse_arguments(char **args) {
+ char **arg;
+ unsigned pos = 1;
+ int r;
+
+ STRV_FOREACH(arg, args) {
+ if (arg_inline)
+ /* Use (argument):n, where n==1 for the first positional arg */
+ r = parse_line("(argument)", pos, *arg);
+ else
+ r = read_config_file(*arg, false);
+ if (r < 0)
+ return r;
+
+ pos++;
+ }
+
+ return 0;
+}
+
+static int read_config_files(char **args) {
+ _cleanup_strv_free_ char **files = NULL;
+ _cleanup_free_ char *p = NULL;
+ char **f;
+ int r;
+
+ r = conf_files_list_with_replacement(arg_root, CONF_PATHS_STRV("sysusers.d"), arg_replace, &files, &p);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(f, files)
+ if (p && path_equal(*f, p)) {
+ log_debug("Parsing arguments at position \"%s\"…", *f);
+
+ r = parse_arguments(args);
+ if (r < 0)
+ return r;
+ } else {
+ log_debug("Reading config file \"%s\"…", *f);
+
+ /* Just warn, ignore result otherwise */
+ (void) read_config_file(*f, true);
+ }
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+#ifndef STANDALONE
+ _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
+ _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL;
+ _cleanup_(umount_and_rmdir_and_freep) char *unlink_dir = NULL;
+#endif
+ _cleanup_close_ int lock = -1;
+ Item *i;
+ int r;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ log_setup_service();
+
+ if (arg_cat_config)
+ return cat_config();
+
+ umask(0022);
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return r;
+
+#ifndef STANDALONE
+ if (arg_image) {
+ assert(!arg_root);
+
+ r = mount_image_privately_interactively(
+ arg_image,
+ DISSECT_IMAGE_REQUIRE_ROOT|DISSECT_IMAGE_VALIDATE_OS|DISSECT_IMAGE_RELAX_VAR_CHECK|DISSECT_IMAGE_FSCK,
+ &unlink_dir,
+ &loop_device,
+ &decrypted_image);
+ if (r < 0)
+ return r;
+
+ arg_root = strdup(unlink_dir);
+ if (!arg_root)
+ return log_oom();
+ }
+#else
+ assert(!arg_image);
+#endif
+
+ /* If command line arguments are specified along with --replace, read all
+ * configuration files and insert the positional arguments at the specified
+ * place. Otherwise, if command line arguments are specified, execute just
+ * them, and finally, without --replace= or any positional arguments, just
+ * read configuration and execute it.
+ */
+ if (arg_replace || optind >= argc)
+ r = read_config_files(argv + optind);
+ else
+ r = parse_arguments(argv + optind);
+ if (r < 0)
+ return r;
+
+ /* Let's tell nss-systemd not to synthesize the "root" and "nobody" entries for it, so that our detection
+ * whether the names or UID/GID area already used otherwise doesn't get confused. After all, even though
+ * nss-systemd synthesizes these users/groups, they should still appear in /etc/passwd and /etc/group, as the
+ * synthesizing logic is merely supposed to be fallback for cases where we run with a completely unpopulated
+ * /etc. */
+ if (setenv("SYSTEMD_NSS_BYPASS_SYNTHETIC", "1", 1) < 0)
+ return log_error_errno(errno, "Failed to set SYSTEMD_NSS_BYPASS_SYNTHETIC environment variable: %m");
+
+ if (!uid_range) {
+ /* Default to default range of SYSTEMD_UID_MIN..SYSTEM_UID_MAX. */
+ r = read_login_defs(&login_defs, NULL, arg_root);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read %s%s: %m",
+ strempty(arg_root), "/etc/login.defs");
+
+ login_defs_need_warning = true;
+
+ /* We pick a range that very conservative: we look at compiled-in maximum and the value in
+ * /etc/login.defs. That way the uids/gids which we allocate will be interpreted correctly,
+ * even if /etc/login.defs is removed later. (The bottom bound doesn't matter much, since
+ * it's only used during allocation, so we use the configured value directly). */
+ uid_t begin = login_defs.system_alloc_uid_min,
+ end = MIN3((uid_t) SYSTEM_UID_MAX, login_defs.system_uid_max, login_defs.system_gid_max);
+ if (begin < end) {
+ r = uid_range_add(&uid_range, &n_uid_range, begin, end - begin + 1);
+ if (r < 0)
+ return log_oom();
+ }
+ }
+
+ r = add_implicit();
+ if (r < 0)
+ return r;
+
+ lock = take_etc_passwd_lock(arg_root);
+ if (lock < 0)
+ return log_error_errno(lock, "Failed to take /etc/passwd lock: %m");
+
+ r = load_user_database();
+ if (r < 0)
+ return log_error_errno(r, "Failed to load user database: %m");
+
+ r = load_group_database();
+ if (r < 0)
+ return log_error_errno(r, "Failed to read group database: %m");
+
+ ORDERED_HASHMAP_FOREACH(i, groups)
+ (void) process_item(i);
+
+ ORDERED_HASHMAP_FOREACH(i, users)
+ (void) process_item(i);
+
+ r = write_files();
+ if (r < 0)
+ return log_error_errno(r, "Failed to write files: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/sysv-generator/sysv-generator.c b/src/sysv-generator/sysv-generator.c
new file mode 100644
index 0000000..008a825
--- /dev/null
+++ b/src/sysv-generator/sysv-generator.c
@@ -0,0 +1,950 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "generator.h"
+#include "hashmap.h"
+#include "hexdecoct.h"
+#include "install.h"
+#include "log.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "path-lookup.h"
+#include "path-util.h"
+#include "set.h"
+#include "special.h"
+#include "specifier.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "util.h"
+
+static const struct {
+ const char *path;
+ const char *target;
+} rcnd_table[] = {
+ /* Standard SysV runlevels for start-up */
+ { "rc1.d", SPECIAL_RESCUE_TARGET },
+ { "rc2.d", SPECIAL_MULTI_USER_TARGET },
+ { "rc3.d", SPECIAL_MULTI_USER_TARGET },
+ { "rc4.d", SPECIAL_MULTI_USER_TARGET },
+ { "rc5.d", SPECIAL_GRAPHICAL_TARGET },
+
+ /* We ignore the SysV runlevels for shutdown here, as SysV services get default dependencies anyway, and that
+ * means they are shut down anyway at system power off if running. */
+};
+
+static const char *arg_dest = NULL;
+
+typedef struct SysvStub {
+ char *name;
+ char *path;
+ char *description;
+ int sysv_start_priority;
+ char *pid_file;
+ char **before;
+ char **after;
+ char **wants;
+ char **wanted_by;
+ bool has_lsb;
+ bool reload;
+ bool loaded;
+} SysvStub;
+
+static void free_sysvstub(SysvStub *s) {
+ if (!s)
+ return;
+
+ free(s->name);
+ free(s->path);
+ free(s->description);
+ free(s->pid_file);
+ strv_free(s->before);
+ strv_free(s->after);
+ strv_free(s->wants);
+ strv_free(s->wanted_by);
+ free(s);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(SysvStub*, free_sysvstub);
+
+static void free_sysvstub_hashmapp(Hashmap **h) {
+ hashmap_free_with_destructor(*h, free_sysvstub);
+}
+
+static int add_alias(const char *service, const char *alias) {
+ const char *link;
+ int r;
+
+ assert(service);
+ assert(alias);
+
+ link = prefix_roota(arg_dest, alias);
+
+ r = symlink(service, link);
+ if (r < 0) {
+ if (errno == EEXIST)
+ return 0;
+
+ return -errno;
+ }
+
+ return 1;
+}
+
+static int generate_unit_file(SysvStub *s) {
+ _cleanup_free_ char *path_escaped = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *unit;
+ char **p;
+ int r;
+
+ assert(s);
+
+ if (!s->loaded)
+ return 0;
+
+ path_escaped = specifier_escape(s->path);
+ if (!path_escaped)
+ return log_oom();
+
+ unit = prefix_roota(arg_dest, s->name);
+
+ /* We might already have a symlink with the same name from a Provides:,
+ * or from backup files like /etc/init.d/foo.bak. Real scripts always win,
+ * so remove an existing link */
+ if (is_symlink(unit) > 0) {
+ log_warning("Overwriting existing symlink %s with real service.", unit);
+ (void) unlink(unit);
+ }
+
+ f = fopen(unit, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m", unit);
+
+ fprintf(f,
+ "# Automatically generated by systemd-sysv-generator\n\n"
+ "[Unit]\n"
+ "Documentation=man:systemd-sysv-generator(8)\n"
+ "SourcePath=%s\n",
+ path_escaped);
+
+ if (s->description) {
+ _cleanup_free_ char *t;
+
+ t = specifier_escape(s->description);
+ if (!t)
+ return log_oom();
+
+ fprintf(f, "Description=%s\n", t);
+ }
+
+ STRV_FOREACH(p, s->before)
+ fprintf(f, "Before=%s\n", *p);
+ STRV_FOREACH(p, s->after)
+ fprintf(f, "After=%s\n", *p);
+ STRV_FOREACH(p, s->wants)
+ fprintf(f, "Wants=%s\n", *p);
+
+ fprintf(f,
+ "\n[Service]\n"
+ "Type=forking\n"
+ "Restart=no\n"
+ "TimeoutSec=5min\n"
+ "IgnoreSIGPIPE=no\n"
+ "KillMode=process\n"
+ "GuessMainPID=no\n"
+ "RemainAfterExit=%s\n",
+ yes_no(!s->pid_file));
+
+ if (s->pid_file) {
+ _cleanup_free_ char *t;
+
+ t = specifier_escape(s->pid_file);
+ if (!t)
+ return log_oom();
+
+ fprintf(f, "PIDFile=%s\n", t);
+ }
+
+ /* Consider two special LSB exit codes a clean exit */
+ if (s->has_lsb)
+ fprintf(f,
+ "SuccessExitStatus=%i %i\n",
+ EXIT_NOTINSTALLED,
+ EXIT_NOTCONFIGURED);
+
+ fprintf(f,
+ "ExecStart=%s start\n"
+ "ExecStop=%s stop\n",
+ path_escaped, path_escaped);
+
+ if (s->reload)
+ fprintf(f, "ExecReload=%s reload\n", path_escaped);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit %s: %m", unit);
+
+ STRV_FOREACH(p, s->wanted_by)
+ (void) generator_add_symlink(arg_dest, *p, "wants", s->name);
+
+ return 1;
+}
+
+static bool usage_contains_reload(const char *line) {
+ return (strcasestr(line, "{reload|") ||
+ strcasestr(line, "{reload}") ||
+ strcasestr(line, "{reload\"") ||
+ strcasestr(line, "|reload|") ||
+ strcasestr(line, "|reload}") ||
+ strcasestr(line, "|reload\""));
+}
+
+static char *sysv_translate_name(const char *name) {
+ _cleanup_free_ char *c = NULL;
+ char *res;
+
+ c = strdup(name);
+ if (!c)
+ return NULL;
+
+ res = endswith(c, ".sh");
+ if (res)
+ *res = 0;
+
+ if (unit_name_mangle(c, 0, &res) < 0)
+ return NULL;
+
+ return res;
+}
+
+static int sysv_translate_facility(SysvStub *s, unsigned line, const char *name, char **ret) {
+
+ /* We silently ignore the $ prefix here. According to the LSB
+ * spec it simply indicates whether something is a
+ * standardized name or a distribution-specific one. Since we
+ * just follow what already exists and do not introduce new
+ * uses or names we don't care who introduced a new name. */
+
+ static const char * const table[] = {
+ /* LSB defined facilities */
+ "local_fs", NULL,
+ "network", SPECIAL_NETWORK_ONLINE_TARGET,
+ "named", SPECIAL_NSS_LOOKUP_TARGET,
+ "portmap", SPECIAL_RPCBIND_TARGET,
+ "remote_fs", SPECIAL_REMOTE_FS_TARGET,
+ "syslog", NULL,
+ "time", SPECIAL_TIME_SYNC_TARGET,
+ };
+
+ const char *filename;
+ char *filename_no_sh, *e, *m;
+ const char *n;
+ unsigned i;
+ int r;
+
+ assert(name);
+ assert(s);
+ assert(ret);
+
+ filename = basename(s->path);
+
+ n = *name == '$' ? name + 1 : name;
+
+ for (i = 0; i < ELEMENTSOF(table); i += 2) {
+ if (!streq(table[i], n))
+ continue;
+
+ if (!table[i+1]) {
+ *ret = NULL;
+ return 0;
+ }
+
+ m = strdup(table[i+1]);
+ if (!m)
+ return log_oom();
+
+ *ret = m;
+ return 1;
+ }
+
+ /* If we don't know this name, fallback heuristics to figure
+ * out whether something is a target or a service alias. */
+
+ /* Facilities starting with $ are most likely targets */
+ if (*name == '$') {
+ r = unit_name_build(n, NULL, ".target", ret);
+ if (r < 0)
+ return log_error_errno(r, "[%s:%u] Could not build name for facility %s: %m", s->path, line, name);
+
+ return 1;
+ }
+
+ /* Strip ".sh" suffix from file name for comparison */
+ filename_no_sh = strdupa(filename);
+ e = endswith(filename_no_sh, ".sh");
+ if (e) {
+ *e = '\0';
+ filename = filename_no_sh;
+ }
+
+ /* Names equaling the file name of the services are redundant */
+ if (streq_ptr(n, filename)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ /* Everything else we assume to be normal service names */
+ m = sysv_translate_name(n);
+ if (!m)
+ return log_oom();
+
+ *ret = m;
+ return 1;
+}
+
+static int handle_provides(SysvStub *s, unsigned line, const char *full_text, const char *text) {
+ int r;
+
+ assert(s);
+ assert(full_text);
+ assert(text);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *m = NULL;
+
+ r = extract_first_word(&text, &word, NULL, EXTRACT_UNQUOTE|EXTRACT_RELAX);
+ if (r < 0)
+ return log_error_errno(r, "[%s:%u] Failed to parse word from provides string: %m", s->path, line);
+ if (r == 0)
+ break;
+
+ r = sysv_translate_facility(s, line, word, &m);
+ if (r <= 0) /* continue on error */
+ continue;
+
+ switch (unit_name_to_type(m)) {
+
+ case UNIT_SERVICE:
+ log_debug("Adding Provides: alias '%s' for '%s'", m, s->name);
+ r = add_alias(s->name, m);
+ if (r < 0)
+ log_warning_errno(r, "[%s:%u] Failed to add LSB Provides name %s, ignoring: %m", s->path, line, m);
+ break;
+
+ case UNIT_TARGET:
+
+ /* NB: SysV targets which are provided by a
+ * service are pulled in by the services, as
+ * an indication that the generic service is
+ * now available. This is strictly one-way.
+ * The targets do NOT pull in SysV services! */
+
+ r = strv_extend(&s->before, m);
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extend(&s->wants, m);
+ if (r < 0)
+ return log_oom();
+
+ if (streq(m, SPECIAL_NETWORK_ONLINE_TARGET)) {
+ r = strv_extend(&s->before, SPECIAL_NETWORK_TARGET);
+ if (r < 0)
+ return log_oom();
+ r = strv_extend(&s->wants, SPECIAL_NETWORK_TARGET);
+ if (r < 0)
+ return log_oom();
+ }
+
+ break;
+
+ case _UNIT_TYPE_INVALID:
+ log_warning("Unit name '%s' is invalid", m);
+ break;
+
+ default:
+ log_warning("Unknown unit type for unit '%s'", m);
+ }
+ }
+
+ return 0;
+}
+
+static int handle_dependencies(SysvStub *s, unsigned line, const char *full_text, const char *text) {
+ int r;
+
+ assert(s);
+ assert(full_text);
+ assert(text);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *m = NULL;
+ bool is_before;
+
+ r = extract_first_word(&text, &word, NULL, EXTRACT_UNQUOTE|EXTRACT_RELAX);
+ if (r < 0)
+ return log_error_errno(r, "[%s:%u] Failed to parse word from provides string: %m", s->path, line);
+ if (r == 0)
+ break;
+
+ r = sysv_translate_facility(s, line, word, &m);
+ if (r <= 0) /* continue on error */
+ continue;
+
+ is_before = startswith_no_case(full_text, "X-Start-Before:");
+
+ if (streq(m, SPECIAL_NETWORK_ONLINE_TARGET) && !is_before) {
+ /* the network-online target is special, as it needs to be actively pulled in */
+ r = strv_extend(&s->after, m);
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extend(&s->wants, m);
+ } else
+ r = strv_extend(is_before ? &s->before : &s->after, m);
+ if (r < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+static int load_sysv(SysvStub *s) {
+ _cleanup_fclose_ FILE *f;
+ unsigned line = 0;
+ int r;
+ enum {
+ NORMAL,
+ DESCRIPTION,
+ LSB,
+ LSB_DESCRIPTION,
+ USAGE_CONTINUATION
+ } state = NORMAL;
+ _cleanup_free_ char *short_description = NULL, *long_description = NULL, *chkconfig_description = NULL;
+ char *description;
+ bool supports_reload = false;
+
+ assert(s);
+
+ f = fopen(s->path, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open %s: %m", s->path);
+ }
+
+ log_debug("Loading SysV script %s", s->path);
+
+ for (;;) {
+ _cleanup_free_ char *l = NULL;
+ char *t;
+
+ r = read_line(f, LONG_LINE_MAX, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read configuration file '%s': %m", s->path);
+ if (r == 0)
+ break;
+
+ line++;
+
+ t = strstrip(l);
+ if (*t != '#') {
+ /* Try to figure out whether this init script supports
+ * the reload operation. This heuristic looks for
+ * "Usage" lines which include the reload option. */
+ if (state == USAGE_CONTINUATION ||
+ (state == NORMAL && strcasestr(t, "usage"))) {
+ if (usage_contains_reload(t)) {
+ supports_reload = true;
+ state = NORMAL;
+ } else if (t[strlen(t)-1] == '\\')
+ state = USAGE_CONTINUATION;
+ else
+ state = NORMAL;
+ }
+
+ continue;
+ }
+
+ if (state == NORMAL && streq(t, "### BEGIN INIT INFO")) {
+ state = LSB;
+ s->has_lsb = true;
+ continue;
+ }
+
+ if (IN_SET(state, LSB_DESCRIPTION, LSB) && streq(t, "### END INIT INFO")) {
+ state = NORMAL;
+ continue;
+ }
+
+ t++;
+ t += strspn(t, WHITESPACE);
+
+ if (state == NORMAL) {
+
+ /* Try to parse Red Hat style description */
+
+ if (startswith_no_case(t, "description:")) {
+
+ size_t k;
+ const char *j;
+
+ k = strlen(t);
+ if (k > 0 && t[k-1] == '\\') {
+ state = DESCRIPTION;
+ t[k-1] = 0;
+ }
+
+ j = empty_to_null(strstrip(t+12));
+
+ r = free_and_strdup(&chkconfig_description, j);
+ if (r < 0)
+ return log_oom();
+
+ } else if (startswith_no_case(t, "pidfile:")) {
+ const char *fn;
+
+ state = NORMAL;
+
+ fn = strstrip(t+8);
+ if (!path_is_absolute(fn)) {
+ log_error("[%s:%u] PID file not absolute. Ignoring.", s->path, line);
+ continue;
+ }
+
+ r = free_and_strdup(&s->pid_file, fn);
+ if (r < 0)
+ return log_oom();
+ }
+
+ } else if (state == DESCRIPTION) {
+
+ /* Try to parse Red Hat style description
+ * continuation */
+
+ size_t k;
+ char *j;
+
+ k = strlen(t);
+ if (k > 0 && t[k-1] == '\\')
+ t[k-1] = 0;
+ else
+ state = NORMAL;
+
+ j = strstrip(t);
+ if (!isempty(j)) {
+ char *d = NULL;
+
+ if (chkconfig_description)
+ d = strjoin(chkconfig_description, " ", j);
+ else
+ d = strdup(j);
+ if (!d)
+ return log_oom();
+
+ free(chkconfig_description);
+ chkconfig_description = d;
+ }
+
+ } else if (IN_SET(state, LSB, LSB_DESCRIPTION)) {
+
+ if (startswith_no_case(t, "Provides:")) {
+ state = LSB;
+
+ r = handle_provides(s, line, t, t + 9);
+ if (r < 0)
+ return r;
+
+ } else if (startswith_no_case(t, "Required-Start:") ||
+ startswith_no_case(t, "Should-Start:") ||
+ startswith_no_case(t, "X-Start-Before:") ||
+ startswith_no_case(t, "X-Start-After:")) {
+
+ state = LSB;
+
+ r = handle_dependencies(s, line, t, strchr(t, ':') + 1);
+ if (r < 0)
+ return r;
+
+ } else if (startswith_no_case(t, "Description:")) {
+ const char *j;
+
+ state = LSB_DESCRIPTION;
+
+ j = empty_to_null(strstrip(t+12));
+
+ r = free_and_strdup(&long_description, j);
+ if (r < 0)
+ return log_oom();
+
+ } else if (startswith_no_case(t, "Short-Description:")) {
+ const char *j;
+
+ state = LSB;
+
+ j = empty_to_null(strstrip(t+18));
+
+ r = free_and_strdup(&short_description, j);
+ if (r < 0)
+ return log_oom();
+
+ } else if (state == LSB_DESCRIPTION) {
+
+ if (startswith(l, "#\t") || startswith(l, "# ")) {
+ const char *j;
+
+ j = strstrip(t);
+ if (!isempty(j)) {
+ char *d = NULL;
+
+ if (long_description)
+ d = strjoin(long_description, " ", t);
+ else
+ d = strdup(j);
+ if (!d)
+ return log_oom();
+
+ free(long_description);
+ long_description = d;
+ }
+
+ } else
+ state = LSB;
+ }
+ }
+ }
+
+ s->reload = supports_reload;
+
+ /* We use the long description only if
+ * no short description is set. */
+
+ if (short_description)
+ description = short_description;
+ else if (chkconfig_description)
+ description = chkconfig_description;
+ else if (long_description)
+ description = long_description;
+ else
+ description = NULL;
+
+ if (description) {
+ char *d;
+
+ d = strjoin(s->has_lsb ? "LSB: " : "SYSV: ", description);
+ if (!d)
+ return log_oom();
+
+ s->description = d;
+ }
+
+ s->loaded = true;
+ return 0;
+}
+
+static int fix_order(SysvStub *s, Hashmap *all_services) {
+ SysvStub *other;
+ int r;
+
+ assert(s);
+
+ if (!s->loaded)
+ return 0;
+
+ if (s->sysv_start_priority < 0)
+ return 0;
+
+ HASHMAP_FOREACH(other, all_services) {
+ if (s == other)
+ continue;
+
+ if (!other->loaded)
+ continue;
+
+ if (other->sysv_start_priority < 0)
+ continue;
+
+ /* If both units have modern headers we don't care
+ * about the priorities */
+ if (s->has_lsb && other->has_lsb)
+ continue;
+
+ if (other->sysv_start_priority < s->sysv_start_priority) {
+ r = strv_extend(&s->after, other->name);
+ if (r < 0)
+ return log_oom();
+
+ } else if (other->sysv_start_priority > s->sysv_start_priority) {
+ r = strv_extend(&s->before, other->name);
+ if (r < 0)
+ return log_oom();
+ } else
+ continue;
+
+ /* FIXME: Maybe we should compare the name here lexicographically? */
+ }
+
+ return 0;
+}
+
+static int acquire_search_path(const char *def, const char *envvar, char ***ret) {
+ _cleanup_strv_free_ char **l = NULL;
+ const char *e;
+ int r;
+
+ assert(def);
+ assert(envvar);
+
+ e = getenv(envvar);
+ if (e) {
+ r = path_split_and_make_absolute(e, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make $%s search path absolute: %m", envvar);
+ }
+
+ if (strv_isempty(l)) {
+ strv_free(l);
+
+ l = strv_new(def);
+ if (!l)
+ return log_oom();
+ }
+
+ if (!path_strv_resolve_uniq(l, NULL))
+ return log_oom();
+
+ *ret = TAKE_PTR(l);
+
+ return 0;
+}
+
+static int enumerate_sysv(const LookupPaths *lp, Hashmap *all_services) {
+ _cleanup_strv_free_ char **sysvinit_path = NULL;
+ char **path;
+ int r;
+
+ assert(lp);
+
+ r = acquire_search_path(SYSTEM_SYSVINIT_PATH, "SYSTEMD_SYSVINIT_PATH", &sysvinit_path);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(path, sysvinit_path) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ d = opendir(*path);
+ if (!d) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Opening %s failed, ignoring: %m", *path);
+ continue;
+ }
+
+ FOREACH_DIRENT(de, d, log_error_errno(errno, "Failed to enumerate directory %s, ignoring: %m", *path)) {
+ _cleanup_free_ char *fpath = NULL, *name = NULL;
+ _cleanup_(free_sysvstubp) SysvStub *service = NULL;
+ struct stat st;
+
+ if (fstatat(dirfd(d), de->d_name, &st, 0) < 0) {
+ log_warning_errno(errno, "stat() failed on %s/%s, ignoring: %m", *path, de->d_name);
+ continue;
+ }
+
+ if (!(st.st_mode & S_IXUSR))
+ continue;
+
+ if (!S_ISREG(st.st_mode))
+ continue;
+
+ name = sysv_translate_name(de->d_name);
+ if (!name)
+ return log_oom();
+
+ if (hashmap_contains(all_services, name))
+ continue;
+
+ r = unit_file_exists(UNIT_FILE_SYSTEM, lp, name);
+ if (r < 0 && !IN_SET(r, -ELOOP, -ERFKILL, -EADDRNOTAVAIL)) {
+ log_debug_errno(r, "Failed to detect whether %s exists, skipping: %m", name);
+ continue;
+ } else if (r != 0) {
+ log_debug("Native unit for %s already exists, skipping.", name);
+ continue;
+ }
+
+ fpath = path_join(*path, de->d_name);
+ if (!fpath)
+ return log_oom();
+
+ log_warning("SysV service '%s' lacks a native systemd unit file. "
+ "Automatically generating a unit file for compatibility. "
+ "Please update package to include a native systemd unit file, in order to make it more safe and robust.", fpath);
+
+ service = new(SysvStub, 1);
+ if (!service)
+ return log_oom();
+
+ *service = (SysvStub) {
+ .sysv_start_priority = -1,
+ .name = TAKE_PTR(name),
+ .path = TAKE_PTR(fpath),
+ };
+
+ r = hashmap_put(all_services, service->name, service);
+ if (r < 0)
+ return log_oom();
+
+ TAKE_PTR(service);
+ }
+ }
+
+ return 0;
+}
+
+static int set_dependencies_from_rcnd(const LookupPaths *lp, Hashmap *all_services) {
+ Set *runlevel_services[ELEMENTSOF(rcnd_table)] = {};
+ _cleanup_strv_free_ char **sysvrcnd_path = NULL;
+ SysvStub *service;
+ char **p;
+ int r;
+
+ assert(lp);
+
+ r = acquire_search_path(SYSTEM_SYSVRCND_PATH, "SYSTEMD_SYSVRCND_PATH", &sysvrcnd_path);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, sysvrcnd_path)
+ for (unsigned i = 0; i < ELEMENTSOF(rcnd_table); i ++) {
+ _cleanup_closedir_ DIR *d = NULL;
+ _cleanup_free_ char *path = NULL;
+ struct dirent *de;
+
+ path = path_join(*p, rcnd_table[i].path);
+ if (!path) {
+ r = log_oom();
+ goto finish;
+ }
+
+ d = opendir(path);
+ if (!d) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Opening %s failed, ignoring: %m", path);
+
+ continue;
+ }
+
+ FOREACH_DIRENT(de, d, log_warning_errno(errno, "Failed to enumerate directory %s, ignoring: %m", path)) {
+ _cleanup_free_ char *name = NULL, *fpath = NULL;
+ int a, b;
+
+ if (de->d_name[0] != 'S')
+ continue;
+
+ if (strlen(de->d_name) < 4)
+ continue;
+
+ a = undecchar(de->d_name[1]);
+ b = undecchar(de->d_name[2]);
+
+ if (a < 0 || b < 0)
+ continue;
+
+ fpath = path_join(*p, de->d_name);
+ if (!fpath) {
+ r = log_oom();
+ goto finish;
+ }
+
+ name = sysv_translate_name(de->d_name + 3);
+ if (!name) {
+ r = log_oom();
+ goto finish;
+ }
+
+ service = hashmap_get(all_services, name);
+ if (!service) {
+ log_debug("Ignoring %s symlink in %s, not generating %s.", de->d_name, rcnd_table[i].path, name);
+ continue;
+ }
+
+ service->sysv_start_priority = MAX(a*10 + b, service->sysv_start_priority);
+
+ r = set_ensure_put(&runlevel_services[i], NULL, service);
+ if (r < 0) {
+ log_oom();
+ goto finish;
+ }
+ }
+ }
+
+ for (unsigned i = 0; i < ELEMENTSOF(rcnd_table); i++)
+ SET_FOREACH(service, runlevel_services[i]) {
+ r = strv_extend(&service->before, rcnd_table[i].target);
+ if (r < 0) {
+ log_oom();
+ goto finish;
+ }
+ r = strv_extend(&service->wanted_by, rcnd_table[i].target);
+ if (r < 0) {
+ log_oom();
+ goto finish;
+ }
+ }
+
+ r = 0;
+
+finish:
+ for (unsigned i = 0; i < ELEMENTSOF(rcnd_table); i++)
+ set_free(runlevel_services[i]);
+
+ return r;
+}
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ _cleanup_(free_sysvstub_hashmapp) Hashmap *all_services = NULL;
+ _cleanup_(lookup_paths_free) LookupPaths lp = {};
+ SysvStub *service;
+ int r;
+
+ assert_se(arg_dest = dest_late);
+
+ r = lookup_paths_init(&lp, UNIT_FILE_SYSTEM, LOOKUP_PATHS_EXCLUDE_GENERATED, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to find lookup paths: %m");
+
+ all_services = hashmap_new(&string_hash_ops);
+ if (!all_services)
+ return log_oom();
+
+ r = enumerate_sysv(&lp, all_services);
+ if (r < 0)
+ return r;
+
+ r = set_dependencies_from_rcnd(&lp, all_services);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(service, all_services)
+ (void) load_sysv(service);
+
+ HASHMAP_FOREACH(service, all_services) {
+ (void) fix_order(service, all_services);
+ (void) generate_unit_file(service);
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/test/generate-sym-test.py b/src/test/generate-sym-test.py
new file mode 100755
index 0000000..fdb9e3e
--- /dev/null
+++ b/src/test/generate-sym-test.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+import sys, re
+
+print('#include <stdio.h>')
+for header in sys.argv[2:]:
+ print('#include "{}"'.format(header.split('/')[-1]))
+
+print('''
+/* We want to check deprecated symbols too, without complaining */
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+const void* symbols[] = {''')
+
+for line in open(sys.argv[1]):
+ match = re.search('^ +([a-zA-Z0-9_]+);', line)
+ if match:
+ s = match.group(1)
+ if s == 'sd_bus_object_vtable_format':
+ print(' &{},'.format(s))
+ else:
+ print(' {},'.format(s))
+
+print('''};
+
+int main(void) {
+ unsigned i;
+ for (i = 0; i < sizeof(symbols)/sizeof(void*); i++)
+ printf("%p\\n", symbols[i]);
+ return 0;
+}''')
diff --git a/src/test/meson.build b/src/test/meson.build
new file mode 100644
index 0000000..6234294
--- /dev/null
+++ b/src/test/meson.build
@@ -0,0 +1,1194 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+awkscript = 'test-hashmap-ordered.awk'
+test_hashmap_ordered_c = custom_target(
+ 'test-hashmap-ordered.c',
+ input : [awkscript, 'test-hashmap-plain.c'],
+ output : 'test-hashmap-ordered.c',
+ command : [awk, '-f', '@INPUT0@', '@INPUT1@'],
+ capture : true,
+ build_by_default : want_tests != 'false')
+
+test_include_dir = include_directories('.')
+
+path = run_command('sh', ['-c', 'echo "$PATH"']).stdout().strip()
+test_env = environment()
+test_env.set('SYSTEMD_KBD_MODEL_MAP', kbd_model_map)
+test_env.set('SYSTEMD_LANGUAGE_FALLBACK_MAP', language_fallback_map)
+test_env.set('PATH', '@0@:@1@'.format(meson.build_root(), path))
+
+############################################################
+
+generate_sym_test_py = find_program('generate-sym-test.py')
+
+test_libsystemd_sym_c = custom_target(
+ 'test-libsystemd-sym.c',
+ input : [libsystemd_sym_path] + systemd_headers,
+ output : 'test-libsystemd-sym.c',
+ command : [generate_sym_test_py, libsystemd_sym_path] + systemd_headers,
+ capture : true,
+ build_by_default : want_tests != 'false')
+
+test_libudev_sym_c = custom_target(
+ 'test-libudev-sym.c',
+ input : [libudev_sym_path, libudev_h_path],
+ output : 'test-libudev-sym.c',
+ command : [generate_sym_test_py, '@INPUT0@', '@INPUT1@'],
+ capture : true,
+ build_by_default : want_tests != 'false')
+
+test_dlopen_c = files('test-dlopen.c')
+
+############################################################
+
+test_systemd_tmpfiles_py = find_program('test-systemd-tmpfiles.py')
+
+############################################################
+
+tests += [
+ [['src/test/test-device-nodes.c'],
+ [],
+ []],
+
+ [['src/test/test-engine.c'],
+ [libcore,
+ libudev,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid]],
+
+ [['src/test/test-emergency-action.c'],
+ [libcore,
+ libshared],
+ []],
+
+ [['src/test/test-chown-rec.c'],
+ [libcore,
+ libshared],
+ []],
+
+ [['src/test/test-job-type.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid]],
+
+ [['src/test/test-ns.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid],
+ '', 'manual'],
+
+ [['src/test/test-nscd-flush.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid],
+ '', 'manual'],
+
+ [['src/test/test-loopback.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid]],
+
+ [['src/test/test-hostname.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid],
+ '', 'unsafe'],
+
+ [['src/test/test-dns-domain.c'],
+ [libcore,
+ libshared,
+ libsystemd_network],
+ []],
+
+ [['src/test/test-boot-timestamps.c'],
+ [],
+ [],
+ 'ENABLE_EFI'],
+
+ [['src/test/test-unit-file.c'],
+ [],
+ []],
+
+ [['src/test/test-unit-name.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid]],
+
+ [['src/test/test-load-fragment.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid]],
+
+ [['src/test/test-serialize.c'],
+ [],
+ []],
+
+ [['src/test/test-utf8.c'],
+ [],
+ []],
+
+ [['src/test/test-dev-setup.c'],
+ [],
+ []],
+
+ [['src/test/test-capability.c'],
+ [],
+ [libcap]],
+
+ [['src/test/test-async.c'],
+ [],
+ [],
+ '', 'timeout=120'],
+
+ [['src/test/test-locale-util.c'],
+ [],
+ []],
+
+ [['src/test/test-copy.c'],
+ [],
+ []],
+
+ [['src/test/test-static-destruct.c'],
+ [],
+ []],
+
+ [['src/test/test-sigbus.c'],
+ [],
+ []],
+
+ [['src/test/test-condition.c'],
+ [],
+ []],
+
+ [['src/test/test-fdset.c'],
+ [],
+ []],
+
+ [['src/test/test-fstab-util.c'],
+ [],
+ []],
+
+ [['src/test/test-random-util.c'],
+ [],
+ []],
+
+ [['src/test/test-format-table.c'],
+ [],
+ []],
+
+ [['src/test/test-format-util.c'],
+ [],
+ []],
+
+ [['src/test/test-ratelimit.c'],
+ [],
+ []],
+
+ [['src/test/test-util.c'],
+ [],
+ []],
+
+ [['src/test/test-json.c'],
+ [],
+ []],
+
+ [['src/test/test-libmount.c'],
+ [],
+ [threads,
+ libmount]],
+
+ [['src/test/test-mount-util.c'],
+ [],
+ []],
+
+ [['src/test/test-mountpoint-util.c'],
+ [],
+ []],
+
+ [['src/test/test-exec-util.c'],
+ [],
+ []],
+
+ [['src/test/test-hexdecoct.c'],
+ [],
+ []],
+
+ [['src/test/test-alloc-util.c'],
+ [],
+ []],
+
+ [['src/test/test-xattr-util.c'],
+ [],
+ []],
+
+ [['src/test/test-io-util.c'],
+ [],
+ []],
+
+ [['src/test/test-glob-util.c'],
+ [],
+ []],
+
+ [['src/test/test-fs-util.c'],
+ [],
+ []],
+
+ [['src/test/test-umask-util.c'],
+ [],
+ []],
+
+ [['src/test/test-proc-cmdline.c'],
+ [],
+ []],
+
+ [['src/test/test-fd-util.c'],
+ [],
+ []],
+
+ [['src/test/test-web-util.c'],
+ [],
+ []],
+
+ [['src/test/test-cpu-set-util.c'],
+ [],
+ []],
+
+ [['src/test/test-stat-util.c'],
+ [],
+ []],
+
+ [['src/test/test-os-util.c'],
+ [],
+ []],
+
+ [['src/test/test-libcrypt-util.c'],
+ [],
+ [],
+ '', 'timeout=120'],
+
+ [['src/test/test-offline-passwd.c',
+ 'src/shared/offline-passwd.c',
+ 'src/shared/offline-passwd.h'],
+ [],
+ []],
+
+ [['src/test/test-escape.c'],
+ [],
+ []],
+
+ [['src/test/test-exit-status.c'],
+ [],
+ []],
+
+ [['src/test/test-specifier.c'],
+ [],
+ []],
+
+ [['src/test/test-string-util.c'],
+ [],
+ []],
+
+ [['src/test/test-extract-word.c'],
+ [],
+ []],
+
+ [['src/test/test-parse-util.c'],
+ [],
+ [libseccomp]],
+
+ [['src/test/test-sysctl-util.c'],
+ [],
+ []],
+
+ [['src/test/test-user-record.c'],
+ [],
+ []],
+
+ [['src/test/test-user-util.c'],
+ [],
+ []],
+
+ [['src/test/test-hostname-util.c'],
+ [],
+ []],
+
+ [['src/test/test-process-util.c'],
+ [],
+ []],
+
+ [['src/test/test-terminal-util.c'],
+ [],
+ []],
+
+ [['src/test/test-path-lookup.c'],
+ [],
+ []],
+
+ [['src/test/test-pretty-print.c'],
+ [],
+ []],
+
+ [['src/test/test-uid-range.c'],
+ [],
+ []],
+
+ [['src/test/test-cap-list.c',
+ generated_gperf_headers],
+ [],
+ [libcap]],
+
+ [['src/test/test-socket-util.c'],
+ [],
+ []],
+
+ [['src/test/test-socket-netlink.c'],
+ [],
+ []],
+
+ [['src/test/test-in-addr-util.c'],
+ [],
+ []],
+
+ [['src/test/test-barrier.c'],
+ [],
+ []],
+
+ [['src/test/test-tmpfiles.c'],
+ [],
+ []],
+
+ [['src/test/test-namespace.c'],
+ [libcore,
+ libshared],
+ [threads,
+ libblkid]],
+
+ [['src/test/test-verbs.c'],
+ [],
+ []],
+
+ [['src/test/test-install-root.c'],
+ [],
+ []],
+
+ [['src/test/test-acl-util.c'],
+ [],
+ [],
+ 'HAVE_ACL'],
+
+ [['src/test/test-seccomp.c'],
+ [],
+ [libseccomp],
+ 'HAVE_SECCOMP'],
+
+ [['src/test/test-rlimit-util.c'],
+ [],
+ []],
+
+ [['src/test/test-ask-password-api.c'],
+ [],
+ [],
+ '', 'manual'],
+
+ [['src/test/test-signal-util.c'],
+ [],
+ []],
+
+ [['src/test/test-loop-block.c'],
+ [libcore,
+ libshared],
+ [threads,
+ libblkid],
+ '',
+ '',
+ [],
+ includes,
+ false],
+
+ [['src/test/test-selinux.c'],
+ [],
+ []],
+
+ [['src/test/test-sizeof.c'],
+ [libbasic],
+ []],
+
+ [['src/test/test-bpf-devices.c'],
+ [libcore,
+ libshared],
+ [libmount,
+ threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libblkid]],
+
+ [['src/test/test-bpf-firewall.c'],
+ [libcore,
+ libshared],
+ [libmount,
+ threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libblkid]],
+
+ [['src/test/test-watch-pid.c'],
+ [libcore,
+ libshared],
+ [libmount,
+ threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libblkid]],
+
+ [['src/test/test-hashmap.c',
+ 'src/test/test-hashmap-plain.c',
+ test_hashmap_ordered_c],
+ [],
+ [],
+ '', 'timeout=90'],
+
+ [['src/test/test-set.c'],
+ [libbasic],
+ []],
+
+ [['src/test/test-ordered-set.c'],
+ [],
+ []],
+
+ [['src/test/test-set-disable-mempool.c'],
+ [],
+ [threads]],
+
+ [['src/test/test-bitmap.c'],
+ [],
+ []],
+
+ [['src/test/test-xml.c'],
+ [],
+ []],
+
+ [['src/test/test-list.c'],
+ [],
+ []],
+
+ [['src/test/test-procfs-util.c'],
+ [],
+ []],
+
+ [['src/test/test-unaligned.c'],
+ [],
+ []],
+
+ [['src/test/test-tables.c',
+ 'src/shared/test-tables.h',
+ 'src/journal/journald-server.c',
+ 'src/journal/journald-server.h'],
+ [libcore,
+ libjournal_core,
+ libudev_core,
+ libudev_static,
+ libsystemd_network,
+ libshared],
+ [threads,
+ libseccomp,
+ libmount,
+ libxz,
+ liblz4,
+ libblkid],
+ '', '', [], libudev_core_includes],
+
+ [['src/test/test-prioq.c'],
+ [],
+ []],
+
+ [['src/test/test-fileio.c'],
+ [],
+ []],
+
+ [['src/test/test-time-util.c'],
+ [],
+ []],
+
+ [['src/test/test-clock.c'],
+ [],
+ []],
+
+ [['src/test/test-architecture.c'],
+ [],
+ []],
+
+ [['src/test/test-log.c'],
+ [],
+ []],
+
+ [['src/test/test-ipcrm.c'],
+ [],
+ [],
+ '', 'unsafe'],
+
+ [['src/test/test-btrfs.c'],
+ [],
+ [],
+ '', 'manual'],
+
+
+ [['src/test/test-firewall-util.c'],
+ [libshared],
+ [],
+ 'HAVE_LIBIPTC'],
+
+ [['src/test/test-netlink-manual.c'],
+ [],
+ [libkmod],
+ 'HAVE_KMOD', 'manual'],
+
+ [['src/test/test-ellipsize.c'],
+ [],
+ []],
+
+ [['src/test/test-date.c'],
+ [],
+ []],
+
+ [['src/test/test-sleep.c'],
+ [],
+ []],
+
+ [['src/test/test-replace-var.c'],
+ [],
+ []],
+
+ [['src/test/test-calendarspec.c'],
+ [],
+ []],
+
+ [['src/test/test-strip-tab-ansi.c'],
+ [],
+ []],
+
+ [['src/test/test-coredump-util.c'],
+ [],
+ []],
+
+ [['src/test/test-daemon.c'],
+ [],
+ []],
+
+ [['src/test/test-cgroup.c'],
+ [],
+ []],
+
+ [['src/test/test-cgroup-cpu.c'],
+ [libcore,
+ libshared],
+ []],
+
+ [['src/test/test-cgroup-unit-default.c'],
+ [libcore,
+ libshared],
+ []],
+
+ [['src/test/test-cgroup-mask.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid]],
+
+ [['src/test/test-varlink.c'],
+ [],
+ [threads]],
+
+ [['src/test/test-cgroup-util.c'],
+ [],
+ []],
+
+ [['src/test/test-cgroup-setup.c'],
+ [],
+ []],
+
+ [['src/test/test-env-file.c'],
+ [],
+ []],
+
+ [['src/test/test-env-util.c'],
+ [],
+ []],
+
+ [['src/test/test-strbuf.c'],
+ [],
+ []],
+
+ [['src/test/test-strv.c'],
+ [],
+ []],
+
+ [['src/test/test-path-util.c'],
+ [],
+ []],
+
+ [['src/test/test-rm-rf.c'],
+ [],
+ []],
+
+ [['src/test/test-chase-symlinks.c'],
+ [],
+ [],
+ '', 'manual'],
+
+ [['src/test/test-path.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid],
+ '', 'timeout=120'],
+
+ [['src/test/test-execute.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid],
+ '', 'timeout=360'],
+
+ [['src/test/test-siphash24.c'],
+ [],
+ []],
+
+ [['src/test/test-strxcpyx.c'],
+ [],
+ []],
+
+ [['src/test/test-install.c'],
+ [libcore,
+ libshared],
+ [],
+ '', 'manual'],
+
+ [['src/test/test-watchdog.c'],
+ [],
+ []],
+
+ [['src/test/test-sched-prio.c'],
+ [libcore,
+ libshared],
+ [threads,
+ librt,
+ libseccomp,
+ libselinux,
+ libmount,
+ libblkid]],
+
+ [['src/test/test-conf-files.c'],
+ [],
+ []],
+
+ [['src/test/test-conf-parser.c'],
+ [],
+ []],
+
+ [['src/test/test-af-list.c',
+ generated_gperf_headers],
+ [],
+ []],
+
+ [['src/test/test-arphrd-list.c',
+ generated_gperf_headers],
+ [],
+ []],
+
+ [['src/test/test-ip-protocol-list.c',
+ shared_generated_gperf_headers],
+ [],
+ []],
+
+ [['src/test/test-journal-importer.c'],
+ [],
+ []],
+
+ [['src/test/test-libudev.c'],
+ [libshared],
+ []],
+
+ [['src/test/test-udev.c'],
+ [libudev_core,
+ libudev_static,
+ libsystemd_network,
+ libshared],
+ [threads,
+ librt,
+ libblkid,
+ libkmod,
+ libacl,
+ libselinux],
+ '', 'manual', '-DLOG_REALM=LOG_REALM_UDEV'],
+
+ [['src/test/test-udev-util.c'],
+ [],
+ []],
+
+ [['src/test/test-id128.c'],
+ [],
+ []],
+
+ [['src/test/test-hash.c'],
+ [],
+ []],
+
+ [['src/test/test-gcrypt-util.c'],
+ [],
+ [],
+ 'HAVE_GCRYPT'],
+
+ [['src/test/test-nss.c'],
+ [],
+ [libdl],
+ 'ENABLE_NSS', 'manual'],
+
+ [['src/test/test-umount.c',
+ 'src/shutdown/umount.c',
+ 'src/shutdown/umount.h'],
+ [libcore_shared,
+ libshared],
+ [libmount]],
+
+ [['src/test/test-bus-util.c'],
+ [],
+ []],
+
+ [['src/test/test-sd-hwdb.c'],
+ [],
+ []],
+
+ [['src/test/test-sd-path.c'],
+ [],
+ []],
+
+ [['src/test/test-local-addresses.c'],
+ [],
+ []],
+
+ [['src/test/test-psi-util.c'],
+ [],
+ []],
+
+ [['src/test/test-qrcode-util.c'],
+ [libshared],
+ [libdl]],
+]
+
+############################################################
+
+# define some tests here, because the link_with deps were not defined earlier
+
+tests += [
+ [['src/journal/test-journal.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-journal-send.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-journal-syslog.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4,
+ libselinux]],
+
+ [['src/journal/test-journal-match.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-journal-enum.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4],
+ '', 'timeout=360'],
+
+ [['src/journal/test-journal-stream.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-journal-flush.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-journal-init.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-journal-config.c'],
+ [libjournal_core,
+ libshared],
+ [libxz,
+ liblz4,
+ libselinux]],
+
+ [['src/journal/test-journal-verify.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-journal-interleaving.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-mmap-cache.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-catalog.c'],
+ [libjournal_core,
+ libshared],
+ [threads,
+ libxz,
+ liblz4]],
+
+ [['src/journal/test-compress.c'],
+ [libjournal_core,
+ libshared],
+ [liblz4,
+ libzstd,
+ libxz]],
+
+ [['src/journal/test-compress-benchmark.c'],
+ [libjournal_core,
+ libshared],
+ [liblz4,
+ libzstd,
+ libxz],
+ '', 'timeout=90'],
+
+ [['src/journal/test-audit-type.c'],
+ [libjournal_core,
+ libshared],
+ [liblz4,
+ libxz]],
+]
+
+############################################################
+
+tests += [
+ [['src/libsystemd/sd-bus/test-bus-address.c'],
+ [],
+ [threads]],
+
+ [['src/libsystemd/sd-bus/test-bus-marshal.c'],
+ [],
+ [threads,
+ libglib,
+ libgobject,
+ libgio,
+ libdbus]],
+
+ [['src/libsystemd/sd-bus/test-bus-signature.c'],
+ [],
+ [threads]],
+
+ [['src/libsystemd/sd-bus/test-bus-queue-ref-cycle.c'],
+ [],
+ [threads]],
+
+ [['src/libsystemd/sd-bus/test-bus-watch-bind.c'],
+ [],
+ [threads], '', 'timeout=120'],
+
+ [['src/libsystemd/sd-bus/test-bus-chat.c'],
+ [],
+ [threads]],
+
+ [['src/libsystemd/sd-bus/test-bus-cleanup.c'],
+ [],
+ [threads,
+ libseccomp]],
+
+ [['src/libsystemd/sd-bus/test-bus-error.c'],
+ [libshared_static,
+ libsystemd_static],
+ []],
+
+ [['src/libsystemd/sd-bus/test-bus-track.c'],
+ [],
+ [libseccomp]],
+
+ [['src/libsystemd/sd-bus/test-bus-server.c'],
+ [],
+ [threads]],
+
+ [['src/libsystemd/sd-bus/test-bus-objects.c'],
+ [],
+ [threads]],
+
+ [['src/libsystemd/sd-bus/test-bus-vtable.c',
+ 'src/libsystemd/sd-bus/test-vtable-data.h'],
+ [],
+ []],
+
+ [['src/libsystemd/sd-bus/test-bus-gvariant.c'],
+ [],
+ [libglib,
+ libgobject,
+ libgio]],
+
+ [['src/libsystemd/sd-bus/test-bus-creds.c'],
+ [],
+ []],
+
+ [['src/libsystemd/sd-bus/test-bus-match.c'],
+ [],
+ []],
+
+ [['src/libsystemd/sd-bus/test-bus-benchmark.c'],
+ [],
+ [threads],
+ '', 'manual'],
+
+ [['src/libsystemd/sd-bus/test-bus-introspect.c',
+ 'src/libsystemd/sd-bus/test-vtable-data.h'],
+ [],
+ []],
+
+ [['src/libsystemd/sd-event/test-event.c'],
+ [],
+ []],
+
+ [['src/libsystemd/sd-netlink/test-netlink.c'],
+ [],
+ []],
+
+ [['src/libsystemd/sd-resolve/test-resolve.c'],
+ [],
+ [threads],
+ '', 'timeout=120'],
+
+ [['src/libsystemd/sd-login/test-login.c'],
+ [],
+ []],
+
+ [['src/libsystemd/sd-device/test-sd-device.c'],
+ [],
+ []],
+
+ [['src/libsystemd/sd-device/test-sd-device-thread.c'],
+ [libbasic,
+ libshared_static,
+ libsystemd],
+ [threads]],
+
+ [['src/libsystemd/sd-device/test-udev-device-thread.c'],
+ [libbasic,
+ libshared_static,
+ libudev],
+ [threads]],
+
+ [['src/libsystemd/sd-device/test-sd-device-monitor.c'],
+ [],
+ []],
+
+]
+
+if cxx_cmd != ''
+ tests += [
+ [['src/libsystemd/sd-bus/test-bus-vtable-cc.cc'],
+ [],
+ []]
+ ]
+endif
+
+############################################################
+
+tests += [
+ [['src/libsystemd-network/test-dhcp-option.c',
+ 'src/libsystemd-network/dhcp-protocol.h',
+ 'src/libsystemd-network/dhcp-internal.h'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/libsystemd-network/test-sd-dhcp-lease.c',
+ 'src/libsystemd-network/dhcp-lease-internal.h'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/libsystemd-network/test-dhcp-client.c',
+ 'src/libsystemd-network/dhcp-protocol.h',
+ 'src/libsystemd-network/dhcp-internal.h',
+ 'src/systemd/sd-dhcp-client.h'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/libsystemd-network/test-dhcp-server.c'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/libsystemd-network/test-ipv4ll.c',
+ 'src/libsystemd-network/arp-util.h',
+ 'src/systemd/sd-ipv4ll.h'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/libsystemd-network/test-ipv4ll-manual.c',
+ 'src/systemd/sd-ipv4ll.h'],
+ [libshared,
+ libsystemd_network],
+ [],
+ '', 'manual'],
+
+ [['src/libsystemd-network/test-acd.c',
+ 'src/systemd/sd-ipv4acd.h'],
+ [libshared,
+ libsystemd_network],
+ [],
+ '', 'manual'],
+
+ [['src/libsystemd-network/test-ndisc-rs.c',
+ 'src/libsystemd-network/dhcp-identifier.h',
+ 'src/libsystemd-network/dhcp-identifier.c',
+ 'src/libsystemd-network/icmp6-util.h',
+ 'src/systemd/sd-dhcp6-client.h',
+ 'src/systemd/sd-ndisc.h'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/libsystemd-network/test-ndisc-ra.c',
+ 'src/libsystemd-network/icmp6-util.h',
+ 'src/systemd/sd-ndisc.h'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/libsystemd-network/test-dhcp6-client.c',
+ 'src/libsystemd-network/dhcp-identifier.h',
+ 'src/libsystemd-network/dhcp-identifier.c',
+ 'src/libsystemd-network/dhcp6-internal.h',
+ 'src/systemd/sd-dhcp6-client.h'],
+ [libshared,
+ libsystemd_network],
+ []],
+
+ [['src/libsystemd-network/test-lldp.c'],
+ [libshared,
+ libsystemd_network],
+ []],
+]
+
+############################################################
+
+tests += [
+ [['src/login/test-login-shared.c'],
+ [],
+ []],
+
+ [['src/analyze/test-verify.c', 'src/analyze/analyze-verify.c', 'src/analyze/analyze-verify.h'],
+ [libcore, libshared],
+ []],
+
+ [['src/login/test-inhibit.c'],
+ [],
+ [],
+ '', 'manual'],
+
+ [['src/login/test-login-tables.c'],
+ [liblogind_core,
+ libshared],
+ [threads]],
+]
+
+############################################################
+
+tests += [
+ [['src/test/test-xdg-autostart.c',
+ 'src/xdg-autostart-generator/xdg-autostart-service.c',
+ 'src/xdg-autostart-generator/xdg-autostart-service.h',],
+ [],
+ []],
+]
diff --git a/src/test/test-acl-util.c b/src/test/test-acl-util.c
new file mode 100644
index 0000000..37c8265
--- /dev/null
+++ b/src/test/test-acl-util.c
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "acl-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+
+static int test_add_acls_for_user(void) {
+ char fn[] = "/tmp/test-empty.XXXXXX";
+ _cleanup_close_ int fd = -1;
+ char *cmd;
+ uid_t uid;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ fd = mkostemp_safe(fn);
+ assert_se(fd >= 0);
+
+ /* Use the mode that user journal files use */
+ assert_se(fchmod(fd, 0640) == 0);
+
+ cmd = strjoina("ls -l ", fn);
+ assert_se(system(cmd) == 0);
+
+ cmd = strjoina("getfacl -p ", fn);
+ assert_se(system(cmd) == 0);
+
+ if (getuid() == 0) {
+ const char *nobody = NOBODY_USER_NAME;
+ r = get_user_creds(&nobody, &uid, NULL, NULL, NULL, 0);
+ if (r < 0)
+ uid = 0;
+ } else
+ uid = getuid();
+
+ r = fd_add_uid_acl_permission(fd, uid, ACL_READ);
+ if (ERRNO_IS_NOT_SUPPORTED(r))
+ return log_tests_skipped("no ACL support on /tmp");
+
+ log_info_errno(r, "fd_add_uid_acl_permission(%i, "UID_FMT", ACL_READ): %m", fd, uid);
+ assert_se(r >= 0);
+
+ cmd = strjoina("ls -l ", fn);
+ assert_se(system(cmd) == 0);
+
+ cmd = strjoina("getfacl -p ", fn);
+ assert_se(system(cmd) == 0);
+
+ /* set the acls again */
+
+ r = fd_add_uid_acl_permission(fd, uid, ACL_READ);
+ assert_se(r >= 0);
+
+ cmd = strjoina("ls -l ", fn);
+ assert_se(system(cmd) == 0);
+
+ cmd = strjoina("getfacl -p ", fn);
+ assert_se(system(cmd) == 0);
+
+ (void) unlink(fn);
+ return 0;
+}
+
+int main(int argc, char **argv) {
+ return test_add_acls_for_user();
+}
diff --git a/src/test/test-af-list.c b/src/test/test-af-list.c
new file mode 100644
index 0000000..672dc68
--- /dev/null
+++ b/src/test/test-af-list.c
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/socket.h>
+
+#include "macro.h"
+#include "string-util.h"
+#include "util.h"
+
+_unused_
+static const struct af_name* lookup_af(register const char *str, register GPERF_LEN_TYPE len);
+
+#include "af-from-name.h"
+#include "af-list.h"
+#include "af-to-name.h"
+
+int main(int argc, const char *argv[]) {
+
+ unsigned i;
+
+ for (i = 0; i < ELEMENTSOF(af_names); i++) {
+ if (af_names[i]) {
+ assert_se(streq(af_to_name(i), af_names[i]));
+ assert_se(af_from_name(af_names[i]) == (int) i);
+ }
+ }
+
+ assert_se(af_to_name(af_max()) == NULL);
+ assert_se(af_to_name(-1) == NULL);
+ assert_se(af_from_name("huddlduddl") == -EINVAL);
+ assert_se(af_from_name("") == -EINVAL);
+
+ return 0;
+}
diff --git a/src/test/test-alloc-util.c b/src/test/test-alloc-util.c
new file mode 100644
index 0000000..b4319f9
--- /dev/null
+++ b/src/test/test-alloc-util.c
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <malloc.h>
+#include <stdint.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "random-util.h"
+#include "tests.h"
+
+static void test_alloca(void) {
+ static const uint8_t zero[997] = { };
+ char *t;
+
+ t = alloca_align(17, 512);
+ assert_se(!((uintptr_t)t & 0xff));
+ memzero(t, 17);
+
+ t = alloca0_align(997, 1024);
+ assert_se(!((uintptr_t)t & 0x1ff));
+ assert_se(!memcmp(t, zero, 997));
+}
+
+static void test_GREEDY_REALLOC(void) {
+ _cleanup_free_ int *a = NULL, *b = NULL;
+ size_t n_allocated = 0, i, j;
+
+ /* Give valgrind a chance to verify our realloc() operations */
+
+ for (i = 0; i < 20480; i++) {
+ assert_se(GREEDY_REALLOC(a, n_allocated, i + 1));
+ assert_se(n_allocated >= i + 1);
+ assert_se(malloc_usable_size(a) >= (i + 1) * sizeof(int));
+ a[i] = (int) i;
+ assert_se(GREEDY_REALLOC(a, n_allocated, i / 2));
+ assert_se(n_allocated >= i / 2);
+ assert_se(malloc_usable_size(a) >= (i / 2) * sizeof(int));
+ }
+
+ for (j = 0; j < i / 2; j++)
+ assert_se(a[j] == (int) j);
+
+ for (i = 30, n_allocated = 0; i < 20480; i += 7) {
+ assert_se(GREEDY_REALLOC(b, n_allocated, i + 1));
+ assert_se(n_allocated >= i + 1);
+ assert_se(malloc_usable_size(b) >= (i + 1) * sizeof(int));
+ b[i] = (int) i;
+ assert_se(GREEDY_REALLOC(b, n_allocated, i / 2));
+ assert_se(n_allocated >= i / 2);
+ assert_se(malloc_usable_size(b) >= (i / 2) * sizeof(int));
+ }
+
+ for (j = 30; j < i / 2; j += 7)
+ assert_se(b[j] == (int) j);
+}
+
+static void test_memdup_multiply_and_greedy_realloc(void) {
+ static const int org[] = { 1, 2, 3 };
+ _cleanup_free_ int *dup;
+ int *p;
+ size_t i, allocated = 3;
+
+ dup = memdup_suffix0_multiply(org, sizeof(int), 3);
+ assert_se(dup);
+ assert_se(dup[0] == 1);
+ assert_se(dup[1] == 2);
+ assert_se(dup[2] == 3);
+ assert_se(((uint8_t*) dup)[sizeof(int) * 3] == 0);
+ free(dup);
+
+ dup = memdup_multiply(org, sizeof(int), 3);
+ assert_se(dup);
+ assert_se(dup[0] == 1);
+ assert_se(dup[1] == 2);
+ assert_se(dup[2] == 3);
+
+ p = dup;
+ assert_se(greedy_realloc0((void**) &dup, &allocated, 2, sizeof(int)) == p);
+
+ p = (int *) greedy_realloc0((void**) &dup, &allocated, 10, sizeof(int));
+ assert_se(p == dup);
+ assert_se(allocated >= 10);
+ assert_se(p[0] == 1);
+ assert_se(p[1] == 2);
+ assert_se(p[2] == 3);
+ for (i = 3; i < allocated; i++)
+ assert_se(p[i] == 0);
+}
+
+static void test_bool_assign(void) {
+ bool b, c, *cp = &c, d, e, f, g, h;
+
+ b = 123;
+ *cp = -11;
+ d = 0xF & 0xFF;
+ e = b & d;
+ f = 0x0;
+ g = cp; /* cast from pointer */
+ h = NULL; /* cast from pointer */
+
+ assert_se(b);
+ assert_se(c);
+ assert_se(d);
+ assert_se(e);
+ assert_se(!f);
+ assert_se(g);
+ assert_se(!h);
+}
+
+static int cleanup_counter = 0;
+
+static void cleanup1(void *a) {
+ log_info("%s(%p)", __func__, a);
+ assert_se(++cleanup_counter == *(int*) a);
+}
+static void cleanup2(void *a) {
+ log_info("%s(%p)", __func__, a);
+ assert_se(++cleanup_counter == *(int*) a);
+}
+static void cleanup3(void *a) {
+ log_info("%s(%p)", __func__, a);
+ assert_se(++cleanup_counter == *(int*) a);
+}
+
+static void test_cleanup_order(void) {
+ _cleanup_(cleanup1) int x1 = 4, x2 = 3;
+ _cleanup_(cleanup3) int z = 2;
+ _cleanup_(cleanup2) int y = 1;
+ log_debug("x1: %p", &x1);
+ log_debug("x2: %p", &x2);
+ log_debug("y: %p", &y);
+ log_debug("z: %p", &z);
+}
+
+static void test_auto_erase_memory(void) {
+ _cleanup_(erase_and_freep) uint8_t *p1, *p2;
+
+ /* print address of p2, else e.g. clang-11 will optimize it out */
+ log_debug("p1: %p p2: %p", &p1, &p2);
+
+ assert_se(p1 = new(uint8_t, 1024));
+ assert_se(p2 = new(uint8_t, 1024));
+
+ assert_se(genuine_random_bytes(p1, 1024, RANDOM_BLOCK) == 0);
+
+ /* before we exit the scope, do something with this data, so that the compiler won't optimize this away */
+ memcpy(p2, p1, 1024);
+ for (size_t i = 0; i < 1024; i++)
+ assert_se(p1[i] == p2[i]);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_alloca();
+ test_GREEDY_REALLOC();
+ test_memdup_multiply_and_greedy_realloc();
+ test_bool_assign();
+ test_cleanup_order();
+ test_auto_erase_memory();
+
+ return 0;
+}
diff --git a/src/test/test-architecture.c b/src/test/test-architecture.c
new file mode 100644
index 0000000..798c95c
--- /dev/null
+++ b/src/test/test-architecture.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "architecture.h"
+#include "log.h"
+#include "tests.h"
+#include "util.h"
+#include "virt.h"
+
+int main(int argc, char *argv[]) {
+ int a, v;
+ const char *p;
+
+ test_setup_logging(LOG_INFO);
+
+ assert_se(architecture_from_string("") < 0);
+ assert_se(architecture_from_string(NULL) < 0);
+ assert_se(architecture_from_string("hoge") < 0);
+ assert_se(architecture_to_string(-1) == NULL);
+ assert_se(architecture_from_string(architecture_to_string(0)) == 0);
+ assert_se(architecture_from_string(architecture_to_string(1)) == 1);
+
+ v = detect_virtualization();
+ if (IN_SET(v, -EPERM, -EACCES))
+ return log_tests_skipped("Cannot detect virtualization");
+
+ assert_se(v >= 0);
+
+ log_info("virtualization=%s id=%s",
+ VIRTUALIZATION_IS_CONTAINER(v) ? "container" :
+ VIRTUALIZATION_IS_VM(v) ? "vm" : "n/a",
+ virtualization_to_string(v));
+
+ a = uname_architecture();
+ assert_se(a >= 0);
+
+ p = architecture_to_string(a);
+ assert_se(p);
+ log_info("uname architecture=%s", p);
+ assert_se(architecture_from_string(p) == a);
+
+ a = native_architecture();
+ assert_se(a >= 0);
+
+ p = architecture_to_string(a);
+ assert_se(p);
+ log_info("native architecture=%s", p);
+ assert_se(architecture_from_string(p) == a);
+
+ log_info("primary library architecture=" LIB_ARCH_TUPLE);
+
+ return 0;
+}
diff --git a/src/test/test-arphrd-list.c b/src/test/test-arphrd-list.c
new file mode 100644
index 0000000..9e21f32
--- /dev/null
+++ b/src/test/test-arphrd-list.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/if_arp.h>
+
+#include "string-util.h"
+#include "tests.h"
+
+#include "arphrd-list.h"
+
+int main(int argc, const char *argv[]) {
+ test_setup_logging(LOG_INFO);
+
+ for (int i = 0; i <= ARPHRD_VOID + 1; i++) {
+ const char *name;
+
+ name = arphrd_to_name(i);
+ if (name) {
+ log_info("%i: %s", i, name);
+
+ assert_se(arphrd_from_name(name) == i);
+ }
+ }
+
+ assert_se(arphrd_to_name(ARPHRD_VOID + 1) == NULL);
+ assert_se(arphrd_from_name("huddlduddl") == -EINVAL);
+ assert_se(arphrd_from_name("") == -EINVAL);
+
+ return 0;
+}
diff --git a/src/test/test-ask-password-api.c b/src/test/test-ask-password-api.c
new file mode 100644
index 0000000..06158ac
--- /dev/null
+++ b/src/test/test-ask-password-api.c
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "ask-password-api.h"
+#include "strv.h"
+#include "tests.h"
+
+static void test_ask_password(void) {
+ int r;
+ _cleanup_strv_free_ char **ret = NULL;
+
+ r = ask_password_tty(-1, "hello?", "da key", 0, ASK_PASSWORD_CONSOLE_COLOR, NULL, &ret);
+ if (r == -ECANCELED)
+ assert_se(ret == NULL);
+ else {
+ assert_se(r >= 0);
+ assert_se(strv_length(ret) == 1);
+ log_info("Got \"%s\"", *ret);
+ }
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_ask_password();
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-async.c b/src/test/test-async.c
new file mode 100644
index 0000000..8eefad5
--- /dev/null
+++ b/src/test/test-async.c
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "async.h"
+#include "macro.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+static bool test_async = false;
+
+static void *async_func(void *arg) {
+ test_async = true;
+
+ return NULL;
+}
+
+int main(int argc, char *argv[]) {
+ int fd;
+ char name[] = "/tmp/test-asynchronous_close.XXXXXX";
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ asynchronous_close(fd);
+
+ assert_se(asynchronous_job(async_func, NULL) >= 0);
+
+ assert_se(asynchronous_sync(NULL) >= 0);
+
+ sleep(1);
+
+ assert_se(fcntl(fd, F_GETFD) == -1);
+ assert_se(test_async);
+
+ (void) unlink(name);
+
+ return 0;
+}
diff --git a/src/test/test-barrier.c b/src/test/test-barrier.c
new file mode 100644
index 0000000..6ef2998
--- /dev/null
+++ b/src/test/test-barrier.c
@@ -0,0 +1,464 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/*
+ * IPC barrier tests
+ * These tests verify the correct behavior of the IPC Barrier implementation.
+ * Note that the tests use alarm-timers to verify dead-locks and timeouts. These
+ * might not work on slow machines where 20ms are too short to perform specific
+ * operations (though, very unlikely). In case that turns out true, we have to
+ * increase it at the slightly cost of lengthen test-duration on other machines.
+ */
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "barrier.h"
+#include "util.h"
+#include "tests.h"
+#include "virt.h"
+#include "time-util.h"
+
+/* 20ms to test deadlocks; All timings use multiples of this constant as
+ * alarm/sleep timers. If this timeout is too small for slow machines to perform
+ * the requested operations, we have to increase it. On an i7 this works fine
+ * with 1ms base-time, so 20ms should be just fine for everyone. */
+#define BASE_TIME (20 * USEC_PER_MSEC)
+
+static void set_alarm(usec_t usecs) {
+ struct itimerval v = { };
+
+ timeval_store(&v.it_value, usecs);
+ assert_se(setitimer(ITIMER_REAL, &v, NULL) >= 0);
+}
+
+static void sleep_for(usec_t usecs) {
+ /* stupid usleep() might fail if >1000000 */
+ assert_se(usecs < USEC_PER_SEC);
+ usleep(usecs);
+}
+
+#define TEST_BARRIER(_FUNCTION, _CHILD_CODE, _WAIT_CHILD, _PARENT_CODE, _WAIT_PARENT) \
+ static void _FUNCTION(void) { \
+ Barrier b = BARRIER_NULL; \
+ pid_t pid1, pid2; \
+ \
+ assert_se(barrier_create(&b) >= 0); \
+ assert_se(b.me > 0); \
+ assert_se(b.them > 0); \
+ assert_se(b.pipe[0] > 0); \
+ assert_se(b.pipe[1] > 0); \
+ \
+ pid1 = fork(); \
+ assert_se(pid1 >= 0); \
+ if (pid1 == 0) { \
+ barrier_set_role(&b, BARRIER_CHILD); \
+ { _CHILD_CODE; } \
+ exit(42); \
+ } \
+ \
+ pid2 = fork(); \
+ assert_se(pid2 >= 0); \
+ if (pid2 == 0) { \
+ barrier_set_role(&b, BARRIER_PARENT); \
+ { _PARENT_CODE; } \
+ exit(42); \
+ } \
+ \
+ barrier_destroy(&b); \
+ set_alarm(999999); \
+ { _WAIT_CHILD; } \
+ { _WAIT_PARENT; } \
+ set_alarm(0); \
+ }
+
+#define TEST_BARRIER_WAIT_SUCCESS(_pid) \
+ ({ \
+ int pidr, status; \
+ pidr = waitpid(_pid, &status, 0); \
+ assert_se(pidr == _pid); \
+ assert_se(WIFEXITED(status)); \
+ assert_se(WEXITSTATUS(status) == 42); \
+ })
+
+#define TEST_BARRIER_WAIT_ALARM(_pid) \
+ ({ \
+ int pidr, status; \
+ pidr = waitpid(_pid, &status, 0); \
+ assert_se(pidr == _pid); \
+ assert_se(WIFSIGNALED(status)); \
+ assert_se(WTERMSIG(status) == SIGALRM); \
+ })
+
+/*
+ * Test basic sync points
+ * This places a barrier in both processes and waits synchronously for them.
+ * The timeout makes sure the sync works as expected. The sleep_for() on one side
+ * makes sure the exit of the parent does not overwrite previous barriers. Due
+ * to the sleep_for(), we know that the parent already exited, thus there's a
+ * pending HUP on the pipe. However, the barrier_sync() prefers reads on the
+ * eventfd, thus we can safely wait on the barrier.
+ */
+TEST_BARRIER(test_barrier_sync,
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_place(&b));
+ sleep_for(BASE_TIME * 2);
+ assert_se(barrier_sync(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_place(&b));
+ assert_se(barrier_sync(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test wait_next()
+ * This places a barrier in the parent and syncs on it. The child sleeps while
+ * the parent places the barrier and then waits for a barrier. The wait will
+ * succeed as the child hasn't read the parent's barrier, yet. The following
+ * barrier and sync synchronize the exit.
+ */
+TEST_BARRIER(test_barrier_wait_next,
+ ({
+ sleep_for(BASE_TIME);
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_wait_next(&b));
+ assert_se(barrier_place(&b));
+ assert_se(barrier_sync(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ set_alarm(BASE_TIME * 4);
+ assert_se(barrier_place(&b));
+ assert_se(barrier_sync(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test wait_next() multiple times
+ * This places two barriers in the parent and waits for the child to exit. The
+ * child sleeps 20ms so both barriers _should_ be in place. It then waits for
+ * the parent to place the next barrier twice. The first call will fetch both
+ * barriers and return. However, the second call will stall as the parent does
+ * not place a 3rd barrier (the sleep caught two barriers). wait_next() is does
+ * not look at barrier-links so this stall is expected. Thus this test times
+ * out.
+ */
+TEST_BARRIER(test_barrier_wait_next_twice,
+ ({
+ sleep_for(BASE_TIME);
+ set_alarm(BASE_TIME);
+ assert_se(barrier_wait_next(&b));
+ assert_se(barrier_wait_next(&b));
+ assert_se(0);
+ }),
+ TEST_BARRIER_WAIT_ALARM(pid1),
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_place(&b));
+ assert_se(barrier_place(&b));
+ sleep_for(BASE_TIME * 4);
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test wait_next() with local barriers
+ * This is the same as test_barrier_wait_next_twice, but places local barriers
+ * between both waits. This does not have any effect on the wait so it times out
+ * like the other test.
+ */
+TEST_BARRIER(test_barrier_wait_next_twice_local,
+ ({
+ sleep_for(BASE_TIME);
+ set_alarm(BASE_TIME);
+ assert_se(barrier_wait_next(&b));
+ assert_se(barrier_place(&b));
+ assert_se(barrier_place(&b));
+ assert_se(barrier_wait_next(&b));
+ assert_se(0);
+ }),
+ TEST_BARRIER_WAIT_ALARM(pid1),
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_place(&b));
+ assert_se(barrier_place(&b));
+ sleep_for(BASE_TIME * 4);
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test wait_next() with sync_next()
+ * This is again the same as test_barrier_wait_next_twice but uses a
+ * synced wait as the second wait. This works just fine because the local state
+ * has no barriers placed, therefore, the remote is always in sync.
+ */
+TEST_BARRIER(test_barrier_wait_next_twice_sync,
+ ({
+ sleep_for(BASE_TIME);
+ set_alarm(BASE_TIME);
+ assert_se(barrier_wait_next(&b));
+ assert_se(barrier_sync_next(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_place(&b));
+ assert_se(barrier_place(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test wait_next() with sync_next() and local barriers
+ * This is again the same as test_barrier_wait_next_twice_local but uses a
+ * synced wait as the second wait. This works just fine because the local state
+ * is in sync with the remote.
+ */
+TEST_BARRIER(test_barrier_wait_next_twice_local_sync,
+ ({
+ sleep_for(BASE_TIME);
+ set_alarm(BASE_TIME);
+ assert_se(barrier_wait_next(&b));
+ assert_se(barrier_place(&b));
+ assert_se(barrier_place(&b));
+ assert_se(barrier_sync_next(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_place(&b));
+ assert_se(barrier_place(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test sync_next() and sync()
+ * This tests sync_*() synchronizations and makes sure they work fine if the
+ * local state is behind the remote state.
+ */
+TEST_BARRIER(test_barrier_sync_next,
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_sync_next(&b));
+ assert_se(barrier_sync(&b));
+ assert_se(barrier_place(&b));
+ assert_se(barrier_place(&b));
+ assert_se(barrier_sync_next(&b));
+ assert_se(barrier_sync_next(&b));
+ assert_se(barrier_sync(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ set_alarm(BASE_TIME * 10);
+ sleep_for(BASE_TIME);
+ assert_se(barrier_place(&b));
+ assert_se(barrier_place(&b));
+ assert_se(barrier_sync(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test sync_next() and sync() with local barriers
+ * This tests timeouts if sync_*() is used if local barriers are placed but the
+ * remote didn't place any.
+ */
+TEST_BARRIER(test_barrier_sync_next_local,
+ ({
+ set_alarm(BASE_TIME);
+ assert_se(barrier_place(&b));
+ assert_se(barrier_sync_next(&b));
+ assert_se(0);
+ }),
+ TEST_BARRIER_WAIT_ALARM(pid1),
+ ({
+ sleep_for(BASE_TIME * 2);
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test sync_next() and sync() with local barriers and abortion
+ * This is the same as test_barrier_sync_next_local but aborts the sync in the
+ * parent. Therefore, the sync_next() succeeds just fine due to the abortion.
+ */
+TEST_BARRIER(test_barrier_sync_next_local_abort,
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_place(&b));
+ assert_se(!barrier_sync_next(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ assert_se(barrier_abort(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test matched wait_abortion()
+ * This runs wait_abortion() with remote abortion.
+ */
+TEST_BARRIER(test_barrier_wait_abortion,
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_wait_abortion(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ assert_se(barrier_abort(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test unmatched wait_abortion()
+ * This runs wait_abortion() without any remote abortion going on. It thus must
+ * timeout.
+ */
+TEST_BARRIER(test_barrier_wait_abortion_unmatched,
+ ({
+ set_alarm(BASE_TIME);
+ assert_se(barrier_wait_abortion(&b));
+ assert_se(0);
+ }),
+ TEST_BARRIER_WAIT_ALARM(pid1),
+ ({
+ sleep_for(BASE_TIME * 2);
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test matched wait_abortion() with local abortion
+ * This runs wait_abortion() with local and remote abortion.
+ */
+TEST_BARRIER(test_barrier_wait_abortion_local,
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_abort(&b));
+ assert_se(!barrier_wait_abortion(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ assert_se(barrier_abort(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test unmatched wait_abortion() with local abortion
+ * This runs wait_abortion() with only local abortion. This must time out.
+ */
+TEST_BARRIER(test_barrier_wait_abortion_local_unmatched,
+ ({
+ set_alarm(BASE_TIME);
+ assert_se(barrier_abort(&b));
+ assert_se(!barrier_wait_abortion(&b));
+ assert_se(0);
+ }),
+ TEST_BARRIER_WAIT_ALARM(pid1),
+ ({
+ sleep_for(BASE_TIME * 2);
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test child exit
+ * Place barrier and sync with the child. The child only exits()s, which should
+ * cause an implicit abortion and wake the parent.
+ */
+TEST_BARRIER(test_barrier_exit,
+ ({
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ set_alarm(BASE_TIME * 10);
+ assert_se(barrier_place(&b));
+ assert_se(!barrier_sync(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+/*
+ * Test child exit with sleep
+ * Same as test_barrier_exit but verifies the test really works due to the
+ * child-exit. We add a usleep() which triggers the alarm in the parent and
+ * causes the test to time out.
+ */
+TEST_BARRIER(test_barrier_no_exit,
+ ({
+ sleep_for(BASE_TIME * 2);
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ set_alarm(BASE_TIME);
+ assert_se(barrier_place(&b));
+ assert_se(!barrier_sync(&b));
+ }),
+ TEST_BARRIER_WAIT_ALARM(pid2));
+
+/*
+ * Test pending exit against sync
+ * The parent places a barrier *and* exits. The 20ms wait in the child
+ * guarantees both are pending. However, our logic prefers pending barriers over
+ * pending exit-abortions (unlike normal abortions), thus the wait_next() must
+ * succeed, same for the sync_next() as our local barrier-count is smaller than
+ * the remote. Once we place a barrier our count is equal, so the sync still
+ * succeeds. Only if we place one more barrier, we're ahead of the remote, thus
+ * we will fail due to HUP on the pipe.
+ */
+TEST_BARRIER(test_barrier_pending_exit,
+ ({
+ set_alarm(BASE_TIME * 4);
+ sleep_for(BASE_TIME * 2);
+ assert_se(barrier_wait_next(&b));
+ assert_se(barrier_sync_next(&b));
+ assert_se(barrier_place(&b));
+ assert_se(barrier_sync_next(&b));
+ assert_se(barrier_place(&b));
+ assert_se(!barrier_sync_next(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid1),
+ ({
+ assert_se(barrier_place(&b));
+ }),
+ TEST_BARRIER_WAIT_SUCCESS(pid2));
+
+int main(int argc, char *argv[]) {
+ int v;
+ test_setup_logging(LOG_INFO);
+
+ if (!slow_tests_enabled())
+ return log_tests_skipped("slow tests are disabled");
+
+ /*
+ * This test uses real-time alarms and sleeps to test for CPU races
+ * explicitly. This is highly fragile if your system is under load. We
+ * already increased the BASE_TIME value to make the tests more robust,
+ * but that just makes the test take significantly longer. Given the recent
+ * issues when running the test in a virtualized environments, limit it
+ * to bare metal machines only, to minimize false-positives in CIs.
+ */
+ v = detect_virtualization();
+ if (IN_SET(v, -EPERM, -EACCES))
+ return log_tests_skipped("Cannot detect virtualization");
+
+ if (v != VIRTUALIZATION_NONE)
+ return log_tests_skipped("This test requires a baremetal machine");
+
+ test_barrier_sync();
+ test_barrier_wait_next();
+ test_barrier_wait_next_twice();
+ test_barrier_wait_next_twice_sync();
+ test_barrier_wait_next_twice_local();
+ test_barrier_wait_next_twice_local_sync();
+ test_barrier_sync_next();
+ test_barrier_sync_next_local();
+ test_barrier_sync_next_local_abort();
+ test_barrier_wait_abortion();
+ test_barrier_wait_abortion_unmatched();
+ test_barrier_wait_abortion_local();
+ test_barrier_wait_abortion_local_unmatched();
+ test_barrier_exit();
+ test_barrier_no_exit();
+ test_barrier_pending_exit();
+
+ return 0;
+}
diff --git a/src/test/test-bitmap.c b/src/test/test-bitmap.c
new file mode 100644
index 0000000..9c5d551
--- /dev/null
+++ b/src/test/test-bitmap.c
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bitmap.h"
+
+int main(int argc, const char *argv[]) {
+ _cleanup_bitmap_free_ Bitmap *b = NULL, *b2 = NULL;
+ unsigned n = (unsigned) -1, i = 0;
+
+ b = bitmap_new();
+ assert_se(b);
+
+ assert_se(bitmap_ensure_allocated(&b) == 0);
+ bitmap_free(b);
+ b = NULL;
+ assert_se(bitmap_ensure_allocated(&b) == 0);
+
+ assert_se(bitmap_isset(b, 0) == false);
+ assert_se(bitmap_isset(b, 1) == false);
+ assert_se(bitmap_isset(b, 256) == false);
+ assert_se(bitmap_isclear(b) == true);
+
+ assert_se(bitmap_set(b, 0) == 0);
+ assert_se(bitmap_isset(b, 0) == true);
+ assert_se(bitmap_isclear(b) == false);
+ bitmap_unset(b, 0);
+ assert_se(bitmap_isset(b, 0) == false);
+ assert_se(bitmap_isclear(b) == true);
+
+ assert_se(bitmap_set(b, 1) == 0);
+ assert_se(bitmap_isset(b, 1) == true);
+ assert_se(bitmap_isclear(b) == false);
+ bitmap_unset(b, 1);
+ assert_se(bitmap_isset(b, 1) == false);
+ assert_se(bitmap_isclear(b) == true);
+
+ assert_se(bitmap_set(b, 256) == 0);
+ assert_se(bitmap_isset(b, 256) == true);
+ assert_se(bitmap_isclear(b) == false);
+ bitmap_unset(b, 256);
+ assert_se(bitmap_isset(b, 256) == false);
+ assert_se(bitmap_isclear(b) == true);
+
+ assert_se(bitmap_set(b, 32) == 0);
+ bitmap_unset(b, 0);
+ assert_se(bitmap_isset(b, 32) == true);
+ bitmap_unset(b, 32);
+
+ BITMAP_FOREACH(n, NULL)
+ assert_not_reached("NULL bitmap");
+
+ assert_se(bitmap_set(b, 0) == 0);
+ assert_se(bitmap_set(b, 1) == 0);
+ assert_se(bitmap_set(b, 256) == 0);
+
+ BITMAP_FOREACH(n, b) {
+ assert_se(n == i);
+ if (i == 0)
+ i = 1;
+ else if (i == 1)
+ i = 256;
+ else if (i == 256)
+ i = (unsigned) -1;
+ }
+
+ assert_se(i == (unsigned) -1);
+
+ i = 0;
+
+ BITMAP_FOREACH(n, b) {
+ assert_se(n == i);
+ if (i == 0)
+ i = 1;
+ else if (i == 1)
+ i = 256;
+ else if (i == 256)
+ i = (unsigned) -1;
+ }
+
+ assert_se(i == (unsigned) -1);
+
+ b2 = bitmap_copy(b);
+ assert_se(b2);
+ assert_se(bitmap_equal(b, b2) == true);
+ assert_se(bitmap_equal(b, b) == true);
+ assert_se(bitmap_equal(b, NULL) == false);
+ assert_se(bitmap_equal(NULL, b) == false);
+ assert_se(bitmap_equal(NULL, NULL) == true);
+
+ bitmap_clear(b);
+ assert_se(bitmap_isclear(b) == true);
+ assert_se(bitmap_equal(b, b2) == false);
+ bitmap_free(b2);
+ b2 = NULL;
+
+ assert_se(bitmap_set(b, (unsigned) -1) == -ERANGE);
+
+ bitmap_free(b);
+ b = NULL;
+ assert_se(bitmap_ensure_allocated(&b) == 0);
+ assert_se(bitmap_ensure_allocated(&b2) == 0);
+
+ assert_se(bitmap_equal(b, b2));
+ assert_se(bitmap_set(b, 0) == 0);
+ bitmap_unset(b, 0);
+ assert_se(bitmap_equal(b, b2));
+
+ assert_se(bitmap_set(b, 1) == 0);
+ bitmap_clear(b);
+ assert_se(bitmap_equal(b, b2));
+
+ assert_se(bitmap_set(b, 0) == 0);
+ assert_se(bitmap_set(b2, 0) == 0);
+ assert_se(bitmap_equal(b, b2));
+
+ return 0;
+}
diff --git a/src/test/test-boot-timestamps.c b/src/test/test-boot-timestamps.c
new file mode 100644
index 0000000..ae5b582
--- /dev/null
+++ b/src/test/test-boot-timestamps.c
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "acpi-fpdt.h"
+#include "boot-timestamps.h"
+#include "efi-loader.h"
+#include "log.h"
+#include "tests.h"
+#include "util.h"
+
+static int test_acpi_fpdt(void) {
+ char ts_start[FORMAT_TIMESPAN_MAX], ts_exit[FORMAT_TIMESPAN_MAX], ts_span[FORMAT_TIMESPAN_MAX];
+ usec_t loader_start, loader_exit;
+ int r;
+
+ r = acpi_get_boot_usec(&loader_start, &loader_exit);
+ if (r < 0) {
+ bool ok = r == -ENOENT || r == -EACCES || r == -ENODATA;
+
+ log_full_errno(ok ? LOG_DEBUG : LOG_ERR, r, "Failed to read ACPI FPDT: %m");
+ return ok ? 0 : r;
+ }
+
+ log_info("ACPI FPDT: loader start=%s exit=%s duration=%s",
+ format_timespan(ts_start, sizeof(ts_start), loader_start, USEC_PER_MSEC),
+ format_timespan(ts_exit, sizeof(ts_exit), loader_exit, USEC_PER_MSEC),
+ format_timespan(ts_span, sizeof(ts_span), loader_exit - loader_start, USEC_PER_MSEC));
+ return 1;
+}
+
+static int test_efi_loader(void) {
+ char ts_start[FORMAT_TIMESPAN_MAX], ts_exit[FORMAT_TIMESPAN_MAX], ts_span[FORMAT_TIMESPAN_MAX];
+ usec_t loader_start, loader_exit;
+ int r;
+
+ r = efi_loader_get_boot_usec(&loader_start, &loader_exit);
+ if (r < 0) {
+ bool ok = r == -ENOENT || r == -EACCES || r == -EOPNOTSUPP;
+
+ log_full_errno(ok ? LOG_DEBUG : LOG_ERR, r, "Failed to read EFI loader data: %m");
+ return ok ? 0 : r;
+ }
+
+ log_info("EFI Loader: start=%s exit=%s duration=%s",
+ format_timespan(ts_start, sizeof(ts_start), loader_start, USEC_PER_MSEC),
+ format_timespan(ts_exit, sizeof(ts_exit), loader_exit, USEC_PER_MSEC),
+ format_timespan(ts_span, sizeof(ts_span), loader_exit - loader_start, USEC_PER_MSEC));
+ return 1;
+}
+
+static int test_boot_timestamps(void) {
+ char s[MAX(FORMAT_TIMESPAN_MAX, FORMAT_TIMESTAMP_MAX)];
+ dual_timestamp fw, l, k;
+ int r;
+
+ dual_timestamp_from_monotonic(&k, 0);
+
+ r = boot_timestamps(NULL, &fw, &l);
+ if (r < 0) {
+ bool ok = r == -ENOENT || r == -EACCES || r == -EOPNOTSUPP;
+
+ log_full_errno(ok ? LOG_DEBUG : LOG_ERR, r, "Failed to read variables: %m");
+ return ok ? 0 : r;
+ }
+
+ log_info("Firmware began %s before kernel.", format_timespan(s, sizeof(s), fw.monotonic, 0));
+ log_info("Loader began %s before kernel.", format_timespan(s, sizeof(s), l.monotonic, 0));
+ log_info("Firmware began %s.", format_timestamp(s, sizeof(s), fw.realtime));
+ log_info("Loader began %s.", format_timestamp(s, sizeof(s), l.realtime));
+ log_info("Kernel began %s.", format_timestamp(s, sizeof(s), k.realtime));
+ return 1;
+}
+
+int main(int argc, char* argv[]) {
+ int p, q, r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ p = test_acpi_fpdt();
+ assert(p >= 0);
+ q = test_efi_loader();
+ assert(q >= 0);
+ r = test_boot_timestamps();
+ assert(r >= 0);
+
+ if (p == 0 && q == 0 && r == 0)
+ return log_tests_skipped("access to firmware variables not possible");
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-bpf-devices.c b/src/test/test-bpf-devices.c
new file mode 100644
index 0000000..2c5eb73
--- /dev/null
+++ b/src/test/test-bpf-devices.c
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bpf-devices.h"
+#include "bpf-program.h"
+#include "cgroup-setup.h"
+#include "errno-list.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "path-util.h"
+#include "tests.h"
+
+static void test_policy_closed(const char *cgroup_path, BPFProgram **installed_prog) {
+ _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
+ unsigned wrong = 0;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ r = bpf_devices_cgroup_init(&prog, CGROUP_DEVICE_POLICY_CLOSED, true);
+ assert_se(r >= 0);
+
+ r = bpf_devices_allow_list_static(prog, cgroup_path);
+ assert_se(r >= 0);
+
+ r = bpf_devices_apply_policy(prog, CGROUP_DEVICE_POLICY_CLOSED, true, cgroup_path, installed_prog);
+ assert_se(r >= 0);
+
+ const char *s;
+ FOREACH_STRING(s, "/dev/null",
+ "/dev/zero",
+ "/dev/full",
+ "/dev/random",
+ "/dev/urandom",
+ "/dev/tty",
+ "/dev/ptmx") {
+ _cleanup_close_ int fd, fd2;
+
+ fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
+ log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
+ wrong += fd < 0 && errno == EPERM;
+ /* We ignore errors other than EPERM, e.g. ENOENT or ENXIO */
+
+ fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
+ log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
+ wrong += fd2 < 0 && errno == EPERM;
+ }
+ assert_se(wrong == 0);
+}
+
+static void test_policy_strict(const char *cgroup_path, BPFProgram **installed_prog) {
+ _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
+ unsigned wrong = 0;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ r = bpf_devices_cgroup_init(&prog, CGROUP_DEVICE_POLICY_STRICT, true);
+ assert_se(r >= 0);
+
+ r = bpf_devices_allow_list_device(prog, cgroup_path, "/dev/null", "rw");
+ assert_se(r >= 0);
+
+ r = bpf_devices_allow_list_device(prog, cgroup_path, "/dev/random", "r");
+ assert_se(r >= 0);
+
+ r = bpf_devices_allow_list_device(prog, cgroup_path, "/dev/zero", "w");
+ assert_se(r >= 0);
+
+ r = bpf_devices_apply_policy(prog, CGROUP_DEVICE_POLICY_STRICT, true, cgroup_path, installed_prog);
+ assert_se(r >= 0);
+
+ {
+ _cleanup_close_ int fd, fd2;
+ const char *s = "/dev/null";
+
+ fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
+ log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
+ wrong += fd < 0;
+
+ fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
+ log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
+ wrong += fd2 < 0;
+ }
+
+ {
+ _cleanup_close_ int fd, fd2;
+ const char *s = "/dev/random";
+
+ fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
+ log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
+ wrong += fd < 0;
+
+ fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
+ log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
+ wrong += fd2 >= 0;
+ }
+
+ {
+ _cleanup_close_ int fd, fd2;
+ const char *s = "/dev/zero";
+
+ fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
+ log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
+ wrong += fd >= 0;
+
+ fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
+ log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
+ wrong += fd2 < 0;
+ }
+
+ {
+ _cleanup_close_ int fd, fd2;
+ const char *s = "/dev/full";
+
+ fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
+ log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
+ wrong += fd >= 0;
+
+ fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
+ log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
+ wrong += fd2 >= 0;
+ }
+
+ assert_se(wrong == 0);
+}
+
+static void test_policy_allow_list_major(const char *pattern, const char *cgroup_path, BPFProgram **installed_prog) {
+ _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
+ unsigned wrong = 0;
+ int r;
+
+ log_info("/* %s(%s) */", __func__, pattern);
+
+ r = bpf_devices_cgroup_init(&prog, CGROUP_DEVICE_POLICY_STRICT, true);
+ assert_se(r >= 0);
+
+ r = bpf_devices_allow_list_major(prog, cgroup_path, pattern, 'c', "rw");
+ assert_se(r >= 0);
+
+ r = bpf_devices_apply_policy(prog, CGROUP_DEVICE_POLICY_STRICT, true, cgroup_path, installed_prog);
+ assert_se(r >= 0);
+
+ /* /dev/null, /dev/full have major==1, /dev/tty has major==5 */
+ {
+ _cleanup_close_ int fd, fd2;
+ const char *s = "/dev/null";
+
+ fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
+ log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
+ wrong += fd < 0;
+
+ fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
+ log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
+ wrong += fd2 < 0;
+ }
+
+ {
+ _cleanup_close_ int fd, fd2;
+ const char *s = "/dev/full";
+
+ fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
+ log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
+ wrong += fd < 0;
+
+ fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
+ log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
+ wrong += fd2 < 0;
+ }
+
+ {
+ _cleanup_close_ int fd, fd2;
+ const char *s = "/dev/tty";
+
+ fd = open(s, O_CLOEXEC|O_RDONLY|O_NOCTTY);
+ log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
+ wrong += fd >= 0;
+
+ fd2 = open(s, O_CLOEXEC|O_WRONLY|O_NOCTTY);
+ log_debug("open(%s, \"w\") = %d/%s", s, fd2, fd2 < 0 ? errno_to_name(errno) : "-");
+ wrong += fd2 >= 0;
+ }
+
+ assert_se(wrong == 0);
+}
+
+static void test_policy_allow_list_major_star(char type, const char *cgroup_path, BPFProgram **installed_prog) {
+ _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
+ unsigned wrong = 0;
+ int r;
+
+ log_info("/* %s(type=%c) */", __func__, type);
+
+ r = bpf_devices_cgroup_init(&prog, CGROUP_DEVICE_POLICY_STRICT, true);
+ assert_se(r >= 0);
+
+ r = bpf_devices_allow_list_major(prog, cgroup_path, "*", type, "rw");
+ assert_se(r >= 0);
+
+ r = bpf_devices_apply_policy(prog, CGROUP_DEVICE_POLICY_STRICT, true, cgroup_path, installed_prog);
+ assert_se(r >= 0);
+
+ {
+ _cleanup_close_ int fd;
+ const char *s = "/dev/null";
+
+ fd = open(s, O_CLOEXEC|O_RDWR|O_NOCTTY);
+ log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
+ if (type == 'c')
+ wrong += fd < 0;
+ else
+ wrong += fd >= 0;
+ }
+
+ assert_se(wrong == 0);
+}
+
+static void test_policy_empty(bool add_mismatched, const char *cgroup_path, BPFProgram **installed_prog) {
+ _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
+ unsigned wrong = 0;
+ int r;
+
+ log_info("/* %s(add_mismatched=%s) */", __func__, yes_no(add_mismatched));
+
+ r = bpf_devices_cgroup_init(&prog, CGROUP_DEVICE_POLICY_STRICT, add_mismatched);
+ assert_se(r >= 0);
+
+ if (add_mismatched) {
+ r = bpf_devices_allow_list_major(prog, cgroup_path, "foobarxxx", 'c', "rw");
+ assert_se(r < 0);
+ }
+
+ r = bpf_devices_apply_policy(prog, CGROUP_DEVICE_POLICY_STRICT, false, cgroup_path, installed_prog);
+ assert_se(r >= 0);
+
+ {
+ _cleanup_close_ int fd;
+ const char *s = "/dev/null";
+
+ fd = open(s, O_CLOEXEC|O_RDWR|O_NOCTTY);
+ log_debug("open(%s, \"r\") = %d/%s", s, fd, fd < 0 ? errno_to_name(errno) : "-");
+ wrong += fd >= 0;
+ }
+
+ assert_se(wrong == 0);
+}
+
+
+int main(int argc, char *argv[]) {
+ _cleanup_free_ char *cgroup = NULL, *parent = NULL;
+ _cleanup_(rmdir_and_freep) char *controller_path = NULL;
+ CGroupMask supported;
+ struct rlimit rl;
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(getrlimit(RLIMIT_MEMLOCK, &rl) >= 0);
+ rl.rlim_cur = rl.rlim_max = MAX(rl.rlim_max, CAN_MEMLOCK_SIZE);
+ (void) setrlimit(RLIMIT_MEMLOCK, &rl);
+
+ r = cg_all_unified();
+ if (r <= 0)
+ return log_tests_skipped("We don't seem to be running with unified cgroup hierarchy");
+
+ if (!can_memlock())
+ return log_tests_skipped("Can't use mlock()");
+
+ r = enter_cgroup_subroot(&cgroup);
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ r = bpf_devices_supported();
+ if (!r)
+ return log_tests_skipped("BPF device filter not supported");
+ assert_se(r == 1);
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, cgroup, NULL, &controller_path);
+ assert_se(r >= 0);
+
+ _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
+
+ test_policy_closed(cgroup, &prog);
+ test_policy_strict(cgroup, &prog);
+
+ test_policy_allow_list_major("mem", cgroup, &prog);
+ test_policy_allow_list_major("1", cgroup, &prog);
+
+ test_policy_allow_list_major_star('c', cgroup, &prog);
+ test_policy_allow_list_major_star('b', cgroup, &prog);
+
+ test_policy_empty(false, cgroup, &prog);
+ test_policy_empty(true, cgroup, &prog);
+
+ assert_se(parent = dirname_malloc(cgroup));
+
+ assert_se(cg_mask_supported(&supported) >= 0);
+ r = cg_attach_everywhere(supported, parent, 0, NULL, NULL);
+ assert_se(r >= 0);
+
+ return 0;
+}
diff --git a/src/test/test-bpf-firewall.c b/src/test/test-bpf-firewall.c
new file mode 100644
index 0000000..b6fd229
--- /dev/null
+++ b/src/test/test-bpf-firewall.c
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/bpf_insn.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "bpf-firewall.h"
+#include "bpf-program.h"
+#include "load-fragment.h"
+#include "manager.h"
+#include "memory-util.h"
+#include "rm-rf.h"
+#include "service.h"
+#include "tests.h"
+#include "unit.h"
+#include "virt.h"
+
+int main(int argc, char *argv[]) {
+ const struct bpf_insn exit_insn[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0), /* drop */
+ BPF_EXIT_INSN()
+ };
+
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+ CGroupContext *cc = NULL;
+ _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ Unit *u;
+ char log_buf[65535];
+ struct rlimit rl;
+ int r;
+ union bpf_attr attr;
+ bool test_custom_filter = false;
+ const char *test_prog = "/sys/fs/bpf/test-dropper";
+
+ test_setup_logging(LOG_DEBUG);
+
+ if (detect_container() > 0)
+ return log_tests_skipped("test-bpf-firewall fails inside LXC and Docker containers: https://github.com/systemd/systemd/issues/9666");
+
+ assert_se(getrlimit(RLIMIT_MEMLOCK, &rl) >= 0);
+ rl.rlim_cur = rl.rlim_max = MAX(rl.rlim_max, CAN_MEMLOCK_SIZE);
+ (void) setrlimit(RLIMIT_MEMLOCK, &rl);
+
+ if (!can_memlock())
+ return log_tests_skipped("Can't use mlock()");
+
+ r = enter_cgroup_subroot(NULL);
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ _cleanup_free_ char *unit_dir = NULL;
+ assert_se(get_testdata_dir("units", &unit_dir) >= 0);
+ assert_se(set_unit_path(unit_dir) >= 0);
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p);
+ assert(r == 0);
+
+ r = bpf_program_add_instructions(p, exit_insn, ELEMENTSOF(exit_insn));
+ assert(r == 0);
+
+ if (getuid() != 0)
+ return log_tests_skipped("not running as root");
+
+ r = bpf_firewall_supported();
+ if (r == BPF_FIREWALL_UNSUPPORTED)
+ return log_tests_skipped("BPF firewalling not supported");
+ assert_se(r > 0);
+
+ if (r == BPF_FIREWALL_SUPPORTED_WITH_MULTI) {
+ log_notice("BPF firewalling with BPF_F_ALLOW_MULTI supported. Yay!");
+ test_custom_filter = true;
+ } else
+ log_notice("BPF firewalling (though without BPF_F_ALLOW_MULTI) supported. Good.");
+
+ r = bpf_program_load_kernel(p, log_buf, ELEMENTSOF(log_buf));
+ assert(r >= 0);
+
+ if (test_custom_filter) {
+ zero(attr);
+ attr.pathname = PTR_TO_UINT64(test_prog);
+ attr.bpf_fd = p->kernel_fd;
+ attr.file_flags = 0;
+
+ (void) unlink(test_prog);
+
+ r = bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+ if (r < 0) {
+ log_warning_errno(errno, "BPF object pinning failed, will not run custom filter test: %m");
+ test_custom_filter = false;
+ }
+ }
+
+ p = bpf_program_unref(p);
+
+ /* The simple tests succeeded. Now let's try full unit-based use-case. */
+
+ assert_se(manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m) >= 0);
+ assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+ assert_se(u = unit_new(m, sizeof(Service)));
+ assert_se(unit_add_name(u, "foo.service") == 0);
+ assert_se(cc = unit_get_cgroup_context(u));
+ u->perpetual = true;
+
+ cc->ip_accounting = true;
+
+ assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressAllow", 0, "10.0.1.0/24", &cc->ip_address_allow, NULL) == 0);
+ assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressAllow", 0, "127.0.0.2", &cc->ip_address_allow, NULL) == 0);
+ assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.3", &cc->ip_address_deny, NULL) == 0);
+ assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "10.0.3.2/24", &cc->ip_address_deny, NULL) == 0);
+ assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.1/25", &cc->ip_address_deny, NULL) == 0);
+ assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.4", &cc->ip_address_deny, NULL) == 0);
+
+ assert(cc->ip_address_allow);
+ assert(cc->ip_address_allow->items_next);
+ assert(!cc->ip_address_allow->items_next->items_next);
+
+ /* The deny list is defined redundantly, let's ensure it got properly reduced */
+ assert(cc->ip_address_deny);
+ assert(cc->ip_address_deny->items_next);
+ assert(!cc->ip_address_deny->items_next->items_next);
+
+ assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "/bin/ping -c 1 127.0.0.2 -W 5", SERVICE(u)->exec_command, u) == 0);
+ assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "/bin/ping -c 1 127.0.0.3 -W 5", SERVICE(u)->exec_command, u) == 0);
+
+ assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]);
+ assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next);
+ assert_se(!SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->command_next);
+
+ SERVICE(u)->type = SERVICE_ONESHOT;
+ u->load_state = UNIT_LOADED;
+
+ unit_dump(u, stdout, NULL);
+
+ r = bpf_firewall_compile(u);
+ if (IN_SET(r, -ENOTTY, -ENOSYS, -EPERM))
+ return log_tests_skipped("Kernel doesn't support the necessary bpf bits (masked out via seccomp?)");
+ assert_se(r >= 0);
+
+ assert(u->ip_bpf_ingress);
+ assert(u->ip_bpf_egress);
+
+ r = bpf_program_load_kernel(u->ip_bpf_ingress, log_buf, ELEMENTSOF(log_buf));
+
+ log_notice("log:");
+ log_notice("-------");
+ log_notice("%s", log_buf);
+ log_notice("-------");
+
+ assert(r >= 0);
+
+ r = bpf_program_load_kernel(u->ip_bpf_egress, log_buf, ELEMENTSOF(log_buf));
+
+ log_notice("log:");
+ log_notice("-------");
+ log_notice("%s", log_buf);
+ log_notice("-------");
+
+ assert(r >= 0);
+
+ assert_se(unit_start(u) >= 0);
+
+ while (!IN_SET(SERVICE(u)->state, SERVICE_DEAD, SERVICE_FAILED))
+ assert_se(sd_event_run(m->event, UINT64_MAX) >= 0);
+
+ assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.code == CLD_EXITED &&
+ SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.status == EXIT_SUCCESS);
+
+ assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.code != CLD_EXITED ||
+ SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.status != EXIT_SUCCESS);
+
+ if (test_custom_filter) {
+ assert_se(u = unit_new(m, sizeof(Service)));
+ assert_se(unit_add_name(u, "custom-filter.service") == 0);
+ assert_se(cc = unit_get_cgroup_context(u));
+ u->perpetual = true;
+
+ cc->ip_accounting = true;
+
+ assert_se(config_parse_ip_filter_bpf_progs(u->id, "filename", 1, "Service", 1, "IPIngressFilterPath", 0, test_prog, &cc->ip_filters_ingress, u) == 0);
+ assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "-/bin/ping -c 1 127.0.0.1 -W 5", SERVICE(u)->exec_command, u) == 0);
+
+ SERVICE(u)->type = SERVICE_ONESHOT;
+ u->load_state = UNIT_LOADED;
+
+ assert_se(unit_start(u) >= 0);
+
+ while (!IN_SET(SERVICE(u)->state, SERVICE_DEAD, SERVICE_FAILED))
+ assert_se(sd_event_run(m->event, UINT64_MAX) >= 0);
+
+ assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.code != CLD_EXITED ||
+ SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.status != EXIT_SUCCESS);
+
+ (void) unlink(test_prog);
+ assert_se(SERVICE(u)->state == SERVICE_DEAD);
+ }
+
+ return 0;
+}
diff --git a/src/test/test-btrfs.c b/src/test/test-btrfs.c
new file mode 100644
index 0000000..e840536
--- /dev/null
+++ b/src/test/test-btrfs.c
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+
+#include "btrfs-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "log.h"
+#include "string-util.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ BtrfsQuotaInfo quota;
+ int r, fd;
+
+ fd = open("/", O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (fd < 0)
+ log_error_errno(errno, "Failed to open root directory: %m");
+ else {
+ char ts[FORMAT_TIMESTAMP_MAX], bs[FORMAT_BYTES_MAX];
+ BtrfsSubvolInfo info;
+
+ r = btrfs_subvol_get_info_fd(fd, 0, &info);
+ if (r < 0)
+ log_error_errno(r, "Failed to get subvolume info: %m");
+ else {
+ log_info("otime: %s", format_timestamp(ts, sizeof(ts), info.otime));
+ log_info("read-only (search): %s", yes_no(info.read_only));
+ }
+
+ r = btrfs_qgroup_get_quota_fd(fd, 0, &quota);
+ if (r < 0)
+ log_error_errno(r, "Failed to get quota info: %m");
+ else {
+ log_info("referenced: %s", strna(format_bytes(bs, sizeof(bs), quota.referenced)));
+ log_info("exclusive: %s", strna(format_bytes(bs, sizeof(bs), quota.exclusive)));
+ log_info("referenced_max: %s", strna(format_bytes(bs, sizeof(bs), quota.referenced_max)));
+ log_info("exclusive_max: %s", strna(format_bytes(bs, sizeof(bs), quota.exclusive_max)));
+ }
+
+ r = btrfs_subvol_get_read_only_fd(fd);
+ if (r < 0)
+ log_error_errno(r, "Failed to get read only flag: %m");
+ else
+ log_info("read-only (ioctl): %s", yes_no(r));
+
+ safe_close(fd);
+ }
+
+ r = btrfs_subvol_make("/xxxtest");
+ if (r < 0)
+ log_error_errno(r, "Failed to make subvolume: %m");
+
+ r = write_string_file("/xxxtest/afile", "ljsadhfljasdkfhlkjdsfha", WRITE_STRING_FILE_CREATE);
+ if (r < 0)
+ log_error_errno(r, "Failed to write file: %m");
+
+ r = btrfs_subvol_snapshot("/xxxtest", "/xxxtest2", 0);
+ if (r < 0)
+ log_error_errno(r, "Failed to make snapshot: %m");
+
+ r = btrfs_subvol_snapshot("/xxxtest", "/xxxtest3", BTRFS_SNAPSHOT_READ_ONLY);
+ if (r < 0)
+ log_error_errno(r, "Failed to make snapshot: %m");
+
+ r = btrfs_subvol_remove("/xxxtest", BTRFS_REMOVE_QUOTA);
+ if (r < 0)
+ log_error_errno(r, "Failed to remove subvolume: %m");
+
+ r = btrfs_subvol_remove("/xxxtest2", BTRFS_REMOVE_QUOTA);
+ if (r < 0)
+ log_error_errno(r, "Failed to remove subvolume: %m");
+
+ r = btrfs_subvol_remove("/xxxtest3", BTRFS_REMOVE_QUOTA);
+ if (r < 0)
+ log_error_errno(r, "Failed to remove subvolume: %m");
+
+ r = btrfs_subvol_snapshot("/etc", "/etc2", BTRFS_SNAPSHOT_READ_ONLY|BTRFS_SNAPSHOT_FALLBACK_COPY);
+ if (r < 0)
+ log_error_errno(r, "Failed to make snapshot: %m");
+
+ r = btrfs_subvol_remove("/etc2", BTRFS_REMOVE_QUOTA);
+ if (r < 0)
+ log_error_errno(r, "Failed to remove subvolume: %m");
+
+ r = btrfs_subvol_make("/xxxrectest");
+ if (r < 0)
+ log_error_errno(r, "Failed to make subvolume: %m");
+
+ r = btrfs_subvol_make("/xxxrectest/xxxrectest2");
+ if (r < 0)
+ log_error_errno(r, "Failed to make subvolume: %m");
+
+ r = btrfs_subvol_make("/xxxrectest/xxxrectest3");
+ if (r < 0)
+ log_error_errno(r, "Failed to make subvolume: %m");
+
+ r = btrfs_subvol_make("/xxxrectest/xxxrectest3/sub");
+ if (r < 0)
+ log_error_errno(r, "Failed to make subvolume: %m");
+
+ if (mkdir("/xxxrectest/dir", 0755) < 0)
+ log_error_errno(errno, "Failed to make directory: %m");
+
+ r = btrfs_subvol_make("/xxxrectest/dir/xxxrectest4");
+ if (r < 0)
+ log_error_errno(r, "Failed to make subvolume: %m");
+
+ if (mkdir("/xxxrectest/dir/xxxrectest4/dir", 0755) < 0)
+ log_error_errno(errno, "Failed to make directory: %m");
+
+ r = btrfs_subvol_make("/xxxrectest/dir/xxxrectest4/dir/xxxrectest5");
+ if (r < 0)
+ log_error_errno(r, "Failed to make subvolume: %m");
+
+ if (mkdir("/xxxrectest/mnt", 0755) < 0)
+ log_error_errno(errno, "Failed to make directory: %m");
+
+ r = btrfs_subvol_snapshot("/xxxrectest", "/xxxrectest2", BTRFS_SNAPSHOT_RECURSIVE);
+ if (r < 0)
+ log_error_errno(r, "Failed to snapshot subvolume: %m");
+
+ r = btrfs_subvol_remove("/xxxrectest", BTRFS_REMOVE_QUOTA|BTRFS_REMOVE_RECURSIVE);
+ if (r < 0)
+ log_error_errno(r, "Failed to recursively remove subvolume: %m");
+
+ r = btrfs_subvol_remove("/xxxrectest2", BTRFS_REMOVE_QUOTA|BTRFS_REMOVE_RECURSIVE);
+ if (r < 0)
+ log_error_errno(r, "Failed to recursively remove subvolume: %m");
+
+ r = btrfs_subvol_make("/xxxquotatest");
+ if (r < 0)
+ log_error_errno(r, "Failed to make subvolume: %m");
+
+ r = btrfs_subvol_auto_qgroup("/xxxquotatest", 0, true);
+ if (r < 0)
+ log_error_errno(r, "Failed to set up auto qgroup: %m");
+
+ r = btrfs_subvol_make("/xxxquotatest/beneath");
+ if (r < 0)
+ log_error_errno(r, "Failed to make subvolume: %m");
+
+ r = btrfs_subvol_auto_qgroup("/xxxquotatest/beneath", 0, false);
+ if (r < 0)
+ log_error_errno(r, "Failed to set up auto qgroup: %m");
+
+ r = btrfs_qgroup_set_limit("/xxxquotatest/beneath", 0, 4ULL * 1024 * 1024 * 1024);
+ if (r < 0)
+ log_error_errno(r, "Failed to set up quota limit: %m");
+
+ r = btrfs_subvol_set_subtree_quota_limit("/xxxquotatest", 0, 5ULL * 1024 * 1024 * 1024);
+ if (r < 0)
+ log_error_errno(r, "Failed to set up quota limit: %m");
+
+ r = btrfs_subvol_snapshot("/xxxquotatest", "/xxxquotatest2", BTRFS_SNAPSHOT_RECURSIVE|BTRFS_SNAPSHOT_QUOTA);
+ if (r < 0)
+ log_error_errno(r, "Failed to set up snapshot: %m");
+
+ r = btrfs_qgroup_get_quota("/xxxquotatest2/beneath", 0, &quota);
+ if (r < 0)
+ log_error_errno(r, "Failed to query quota: %m");
+
+ assert_se(quota.referenced_max == 4ULL * 1024 * 1024 * 1024);
+
+ r = btrfs_subvol_get_subtree_quota("/xxxquotatest2", 0, &quota);
+ if (r < 0)
+ log_error_errno(r, "Failed to query quota: %m");
+
+ assert_se(quota.referenced_max == 5ULL * 1024 * 1024 * 1024);
+
+ r = btrfs_subvol_remove("/xxxquotatest", BTRFS_REMOVE_QUOTA|BTRFS_REMOVE_RECURSIVE);
+ if (r < 0)
+ log_error_errno(r, "Failed remove subvolume: %m");
+
+ r = btrfs_subvol_remove("/xxxquotatest2", BTRFS_REMOVE_QUOTA|BTRFS_REMOVE_RECURSIVE);
+ if (r < 0)
+ log_error_errno(r, "Failed remove subvolume: %m");
+
+ return 0;
+}
diff --git a/src/test/test-bus-util.c b/src/test/test-bus-util.c
new file mode 100644
index 0000000..0381ba1
--- /dev/null
+++ b/src/test/test-bus-util.c
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-util.h"
+#include "log.h"
+#include "tests.h"
+
+static int callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ return 1;
+}
+
+static void destroy_callback(void *userdata) {
+ int *n_called = userdata;
+
+ (*n_called) ++;
+}
+
+static void test_destroy_callback(void) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ sd_bus_slot *slot = NULL;
+ sd_bus_destroy_t t;
+
+ int r, n_called = 0;
+
+ log_info("/* %s */", __func__);
+
+ r = bus_open_system_watch_bind_with_description(&bus, "test-bus");
+ if (r < 0) {
+ log_error_errno(r, "Failed to connect to bus: %m");
+ return;
+ }
+
+ r = sd_bus_request_name_async(bus, &slot, "org.freedesktop.systemd.test-bus-util", 0, callback, &n_called);
+ assert(r == 1);
+
+ assert_se(sd_bus_slot_get_destroy_callback(slot, NULL) == 0);
+ assert_se(sd_bus_slot_get_destroy_callback(slot, &t) == 0);
+
+ assert_se(sd_bus_slot_set_destroy_callback(slot, destroy_callback) == 0);
+ assert_se(sd_bus_slot_get_destroy_callback(slot, NULL) == 1);
+ assert_se(sd_bus_slot_get_destroy_callback(slot, &t) == 1);
+ assert_se(t == destroy_callback);
+
+ /* Force cleanup so we can look at n_called */
+ assert(n_called == 0);
+ sd_bus_slot_unref(slot);
+ assert(n_called == 1);
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_destroy_callback();
+
+ return 0;
+}
diff --git a/src/test/test-calendarspec.c b/src/test/test-calendarspec.c
new file mode 100644
index 0000000..e0b7f22
--- /dev/null
+++ b/src/test/test-calendarspec.c
@@ -0,0 +1,252 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "calendarspec.h"
+#include "errno-util.h"
+#include "string-util.h"
+#include "util.h"
+
+static void test_one(const char *input, const char *output) {
+ CalendarSpec *c;
+ _cleanup_free_ char *p = NULL, *q = NULL;
+ usec_t u;
+ char buf[FORMAT_TIMESTAMP_MAX];
+ int r;
+
+ assert_se(calendar_spec_from_string(input, &c) >= 0);
+
+ assert_se(calendar_spec_to_string(c, &p) >= 0);
+ printf("\"%s\" → \"%s\"\n", input, p);
+
+ assert_se(streq(p, output));
+
+ u = now(CLOCK_REALTIME);
+ r = calendar_spec_next_usec(c, u, &u);
+ printf("Next: %s\n", r < 0 ? strerror_safe(r) : format_timestamp(buf, sizeof(buf), u));
+ calendar_spec_free(c);
+
+ assert_se(calendar_spec_from_string(p, &c) >= 0);
+ assert_se(calendar_spec_to_string(c, &q) >= 0);
+ calendar_spec_free(c);
+
+ assert_se(streq(q, p));
+}
+
+static void test_next(const char *input, const char *new_tz, usec_t after, usec_t expect) {
+ CalendarSpec *c;
+ usec_t u;
+ char *old_tz;
+ char buf[FORMAT_TIMESTAMP_MAX];
+ int r;
+
+ old_tz = getenv("TZ");
+ if (old_tz)
+ old_tz = strdupa(old_tz);
+
+ if (new_tz) {
+ char *colon_tz;
+
+ colon_tz = strjoina(":", new_tz);
+ assert_se(setenv("TZ", colon_tz, 1) >= 0);
+ } else
+ assert_se(unsetenv("TZ") >= 0);
+ tzset();
+
+ assert_se(calendar_spec_from_string(input, &c) >= 0);
+
+ printf("\"%s\"\n", input);
+
+ u = after;
+ r = calendar_spec_next_usec(c, after, &u);
+ printf("At: %s\n", r < 0 ? strerror_safe(r) : format_timestamp_style(buf, sizeof buf, u, TIMESTAMP_US));
+ if (expect != (usec_t)-1)
+ assert_se(r >= 0 && u == expect);
+ else
+ assert(r == -ENOENT);
+
+ calendar_spec_free(c);
+
+ if (old_tz)
+ assert_se(setenv("TZ", old_tz, 1) >= 0);
+ else
+ assert_se(unsetenv("TZ") >= 0);
+ tzset();
+}
+
+static void test_timestamp(void) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+ _cleanup_free_ char *t = NULL;
+ CalendarSpec *c;
+ usec_t x, y;
+
+ /* Ensure that a timestamp is also a valid calendar specification. Convert forth and back */
+
+ x = now(CLOCK_REALTIME);
+
+ assert_se(format_timestamp_style(buf, sizeof(buf), x, TIMESTAMP_US));
+ printf("%s\n", buf);
+ assert_se(calendar_spec_from_string(buf, &c) >= 0);
+ assert_se(calendar_spec_to_string(c, &t) >= 0);
+ calendar_spec_free(c);
+ printf("%s\n", t);
+
+ assert_se(parse_timestamp(t, &y) >= 0);
+ assert_se(y == x);
+}
+
+static void test_hourly_bug_4031(void) {
+ CalendarSpec *c;
+ usec_t n, u, w;
+ char buf[FORMAT_TIMESTAMP_MAX], zaf[FORMAT_TIMESTAMP_MAX];
+ int r;
+
+ assert_se(calendar_spec_from_string("hourly", &c) >= 0);
+ n = now(CLOCK_REALTIME);
+ assert_se((r = calendar_spec_next_usec(c, n, &u)) >= 0);
+
+ printf("Now: %s (%"PRIu64")\n", format_timestamp_style(buf, sizeof buf, n, TIMESTAMP_US), n);
+ printf("Next hourly: %s (%"PRIu64")\n", r < 0 ? strerror_safe(r) : format_timestamp_style(buf, sizeof buf, u, TIMESTAMP_US), u);
+
+ assert_se((r = calendar_spec_next_usec(c, u, &w)) >= 0);
+ printf("Next hourly: %s (%"PRIu64")\n", r < 0 ? strerror_safe(r) : format_timestamp_style(zaf, sizeof zaf, w, TIMESTAMP_US), w);
+
+ assert_se(n < u);
+ assert_se(u <= n + USEC_PER_HOUR);
+ assert_se(u < w);
+ assert_se(w <= u + USEC_PER_HOUR);
+
+ calendar_spec_free(c);
+}
+
+int main(int argc, char* argv[]) {
+ CalendarSpec *c;
+
+ test_one("Sat,Thu,Mon-Wed,Sat-Sun", "Mon..Thu,Sat,Sun *-*-* 00:00:00");
+ test_one("Sat,Thu,Mon..Wed,Sat..Sun", "Mon..Thu,Sat,Sun *-*-* 00:00:00");
+ test_one("Mon,Sun 12-*-* 2,1:23", "Mon,Sun 2012-*-* 01,02:23:00");
+ test_one("Wed *-1", "Wed *-*-01 00:00:00");
+ test_one("Wed-Wed,Wed *-1", "Wed *-*-01 00:00:00");
+ test_one("Wed..Wed,Wed *-1", "Wed *-*-01 00:00:00");
+ test_one("Wed, 17:48", "Wed *-*-* 17:48:00");
+ test_one("Wednesday,", "Wed *-*-* 00:00:00");
+ test_one("Wed-Sat,Tue 12-10-15 1:2:3", "Tue..Sat 2012-10-15 01:02:03");
+ test_one("Wed..Sat,Tue 12-10-15 1:2:3", "Tue..Sat 2012-10-15 01:02:03");
+ test_one("*-*-7 0:0:0", "*-*-07 00:00:00");
+ test_one("10-15", "*-10-15 00:00:00");
+ test_one("monday *-12-* 17:00", "Mon *-12-* 17:00:00");
+ test_one("Mon,Fri *-*-3,1,2 *:30:45", "Mon,Fri *-*-01,02,03 *:30:45");
+ test_one("12,14,13,12:20,10,30", "*-*-* 12,13,14:10,20,30:00");
+ test_one("mon,fri *-1/2-1,3 *:30:45", "Mon,Fri *-01/2-01,03 *:30:45");
+ test_one("03-05 08:05:40", "*-03-05 08:05:40");
+ test_one("08:05:40", "*-*-* 08:05:40");
+ test_one("05:40", "*-*-* 05:40:00");
+ test_one("Sat,Sun 12-05 08:05:40", "Sat,Sun *-12-05 08:05:40");
+ test_one("Sat,Sun 08:05:40", "Sat,Sun *-*-* 08:05:40");
+ test_one("2003-03-05 05:40", "2003-03-05 05:40:00");
+ test_one("2003-03-05", "2003-03-05 00:00:00");
+ test_one("03-05", "*-03-05 00:00:00");
+ test_one("hourly", "*-*-* *:00:00");
+ test_one("daily", "*-*-* 00:00:00");
+ test_one("monthly", "*-*-01 00:00:00");
+ test_one("weekly", "Mon *-*-* 00:00:00");
+ test_one("minutely", "*-*-* *:*:00");
+ test_one("quarterly", "*-01,04,07,10-01 00:00:00");
+ test_one("semi-annually", "*-01,07-01 00:00:00");
+ test_one("annually", "*-01-01 00:00:00");
+ test_one("*:2/3", "*-*-* *:02/3:00");
+ test_one("2015-10-25 01:00:00 uTc", "2015-10-25 01:00:00 UTC");
+ test_one("2015-10-25 01:00:00 Asia/Vladivostok", "2015-10-25 01:00:00 Asia/Vladivostok");
+ test_one("weekly Pacific/Auckland", "Mon *-*-* 00:00:00 Pacific/Auckland");
+ test_one("2016-03-27 03:17:00.4200005", "2016-03-27 03:17:00.420001");
+ test_one("2016-03-27 03:17:00/0.42", "2016-03-27 03:17:00/0.420000");
+ test_one("9..11,13:00,30", "*-*-* 09..11,13:00,30:00");
+ test_one("1..3-1..3 1..3:1..3", "*-01..03-01..03 01..03:01..03:00");
+ test_one("00:00:1.125..2.125", "*-*-* 00:00:01.125000..02.125000");
+ test_one("00:00:1.0..3.8", "*-*-* 00:00:01..03");
+ test_one("00:00:01..03", "*-*-* 00:00:01..03");
+ test_one("00:00:01/2,02..03", "*-*-* 00:00:01/2,02..03");
+ test_one("*-*~1 Utc", "*-*~01 00:00:00 UTC");
+ test_one("*-*~05,3 ", "*-*~03,05 00:00:00");
+ test_one("*-*~* 00:00:00", "*-*-* 00:00:00");
+ test_one("Monday", "Mon *-*-* 00:00:00");
+ test_one("Monday *-*-*", "Mon *-*-* 00:00:00");
+ test_one("*-*-*", "*-*-* 00:00:00");
+ test_one("*:*:*", "*-*-* *:*:*");
+ test_one("*:*", "*-*-* *:*:00");
+ test_one("12:*", "*-*-* 12:*:00");
+ test_one("*:30", "*-*-* *:30:00");
+ test_one("93..00-*-*", "1993..2000-*-* 00:00:00");
+ test_one("00..07-*-*", "2000..2007-*-* 00:00:00");
+ test_one("*:20..39/5", "*-*-* *:20..35/5:00");
+ test_one("00:00:20..40/1", "*-*-* 00:00:20..40");
+ test_one("*~03/1,03..05", "*-*~03/1,03..05 00:00:00");
+ /* UNIX timestamps are always UTC */
+ test_one("@1493187147", "2017-04-26 06:12:27 UTC");
+ test_one("@1493187147 UTC", "2017-04-26 06:12:27 UTC");
+ test_one("@0", "1970-01-01 00:00:00 UTC");
+ test_one("@0 UTC", "1970-01-01 00:00:00 UTC");
+ test_one("*:05..05", "*-*-* *:05:00");
+ test_one("*:05..10/6", "*-*-* *:05:00");
+
+ test_next("2016-03-27 03:17:00", "", 12345, 1459048620000000);
+ test_next("2016-03-27 03:17:00", "CET", 12345, 1459041420000000);
+ test_next("2016-03-27 03:17:00", "EET", 12345, -1);
+ test_next("2016-03-27 03:17:00 UTC", NULL, 12345, 1459048620000000);
+ test_next("2016-03-27 03:17:00 UTC", "", 12345, 1459048620000000);
+ test_next("2016-03-27 03:17:00 UTC", "CET", 12345, 1459048620000000);
+ test_next("2016-03-27 03:17:00 UTC", "EET", 12345, 1459048620000000);
+ test_next("2016-03-27 03:17:00.420000001 UTC", "EET", 12345, 1459048620420000);
+ test_next("2016-03-27 03:17:00.4200005 UTC", "EET", 12345, 1459048620420001);
+ test_next("2015-11-13 09:11:23.42", "EET", 12345, 1447398683420000);
+ test_next("2015-11-13 09:11:23.42/1.77", "EET", 1447398683420000, 1447398685190000);
+ test_next("2015-11-13 09:11:23.42/1.77", "EET", 1447398683419999, 1447398683420000);
+ test_next("Sun 16:00:00", "CET", 1456041600123456, 1456066800000000);
+ test_next("*-04-31", "", 12345, -1);
+ test_next("2016-02~01 UTC", "", 12345, 1456704000000000);
+ test_next("Mon 2017-05~01..07 UTC", "", 12345, 1496016000000000);
+ test_next("Mon 2017-05~07/1 UTC", "", 12345, 1496016000000000);
+ test_next("2017-08-06 9,11,13,15,17:00 UTC", "", 1502029800000000, 1502031600000000);
+ test_next("2017-08-06 9..17/2:00 UTC", "", 1502029800000000, 1502031600000000);
+ test_next("2016-12-* 3..21/6:00 UTC", "", 1482613200000001, 1482634800000000);
+ test_next("2017-09-24 03:30:00 Pacific/Auckland", "", 12345, 1506177000000000);
+ // Due to daylight saving time - 2017-09-24 02:30:00 does not exist
+ test_next("2017-09-24 02:30:00 Pacific/Auckland", "", 12345, -1);
+ test_next("2017-04-02 02:30:00 Pacific/Auckland", "", 12345, 1491053400000000);
+ // Confirm that even though it's a time change here (backward) 02:30 happens only once
+ test_next("2017-04-02 02:30:00 Pacific/Auckland", "", 1491053400000000, -1);
+ test_next("2017-04-02 03:30:00 Pacific/Auckland", "", 12345, 1491060600000000);
+ // Confirm that timezones in the Spec work regardless of current timezone
+ test_next("2017-09-09 20:42:00 Pacific/Auckland", "", 12345, 1504946520000000);
+ test_next("2017-09-09 20:42:00 Pacific/Auckland", "EET", 12345, 1504946520000000);
+
+ assert_se(calendar_spec_from_string("test", &c) < 0);
+ assert_se(calendar_spec_from_string(" utc", &c) < 0);
+ assert_se(calendar_spec_from_string(" ", &c) < 0);
+ assert_se(calendar_spec_from_string("", &c) < 0);
+ assert_se(calendar_spec_from_string("7", &c) < 0);
+ assert_se(calendar_spec_from_string("121212:1:2", &c) < 0);
+ assert_se(calendar_spec_from_string("2000-03-05.23 00:00:00", &c) < 0);
+ assert_se(calendar_spec_from_string("2000-03-05 00:00.1:00", &c) < 0);
+ assert_se(calendar_spec_from_string("00:00:00/0.00000001", &c) < 0);
+ assert_se(calendar_spec_from_string("00:00:00.0..00.9", &c) < 0);
+ assert_se(calendar_spec_from_string("2016~11-22", &c) < 0);
+ assert_se(calendar_spec_from_string("*-*~5/5", &c) < 0);
+ assert_se(calendar_spec_from_string("Monday.. 12:00", &c) < 0);
+ assert_se(calendar_spec_from_string("Monday..", &c) < 0);
+ assert_se(calendar_spec_from_string("-00:+00/-5", &c) < 0);
+ assert_se(calendar_spec_from_string("00:+00/-5", &c) < 0);
+ assert_se(calendar_spec_from_string("2016- 11- 24 12: 30: 00", &c) < 0);
+ assert_se(calendar_spec_from_string("*~29", &c) < 0);
+ assert_se(calendar_spec_from_string("*~16..31", &c) < 0);
+ assert_se(calendar_spec_from_string("12..1/2-*", &c) < 0);
+ assert_se(calendar_spec_from_string("20/4:00", &c) < 0);
+ assert_se(calendar_spec_from_string("00:00/60", &c) < 0);
+ assert_se(calendar_spec_from_string("00:00:2300", &c) < 0);
+ assert_se(calendar_spec_from_string("00:00:18446744073709551615", &c) < 0);
+ assert_se(calendar_spec_from_string("@88588582097858858", &c) == -ERANGE);
+
+ test_timestamp();
+ test_hourly_bug_4031();
+
+ return 0;
+}
diff --git a/src/test/test-cap-list.c b/src/test/test-cap-list.c
new file mode 100644
index 0000000..c4b40f3
--- /dev/null
+++ b/src/test/test-cap-list.c
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+#include <sys/prctl.h>
+
+#include "alloc-util.h"
+#include "cap-list.h"
+#include "capability-util.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "util.h"
+
+/* verify the capability parser */
+static void test_cap_list(void) {
+ assert_se(!capability_to_name(-1));
+ assert_se(!capability_to_name(capability_list_length()));
+
+ for (int i = 0; i < capability_list_length(); i++) {
+ const char *n;
+
+ assert_se(n = capability_to_name(i));
+ assert_se(capability_from_name(n) == i);
+ printf("%s = %i\n", n, i);
+ }
+
+ assert_se(capability_from_name("asdfbsd") == -EINVAL);
+ assert_se(capability_from_name("CAP_AUDIT_READ") == CAP_AUDIT_READ);
+ assert_se(capability_from_name("cap_audit_read") == CAP_AUDIT_READ);
+ assert_se(capability_from_name("cAp_aUdIt_rEAd") == CAP_AUDIT_READ);
+ assert_se(capability_from_name("0") == 0);
+ assert_se(capability_from_name("15") == 15);
+ assert_se(capability_from_name("63") == 63);
+ assert_se(capability_from_name("64") == -EINVAL);
+ assert_se(capability_from_name("-1") == -EINVAL);
+
+ for (int i = 0; i < capability_list_length(); i++) {
+ _cleanup_cap_free_charp_ char *a = NULL;
+ const char *b;
+ unsigned u;
+
+ assert_se(a = cap_to_name(i));
+
+ /* quit the loop as soon as libcap starts returning
+ * numeric ids, formatted as strings */
+ if (safe_atou(a, &u) >= 0)
+ break;
+
+ assert_se(b = capability_to_name(i));
+
+ printf("%s vs. %s\n", a, b);
+
+ assert_se(strcasecmp(a, b) == 0);
+ }
+}
+
+static void test_capability_set_one(uint64_t c, const char *t) {
+ _cleanup_free_ char *t1 = NULL;
+ uint64_t c1, c_masked = c & all_capabilities();
+
+ assert_se(capability_set_to_string_alloc(c, &t1) == 0);
+ assert_se(streq(t1, t));
+
+ assert_se(capability_set_from_string(t1, &c1) == 0);
+ assert_se(c1 == c_masked);
+
+ free(t1);
+ assert_se(t1 = strjoin("'cap_chown cap_dac_override' \"cap_setgid cap_setuid\"", t,
+ " hogehoge foobar 18446744073709551616 3.14 -3 ", t));
+ assert_se(capability_set_from_string(t1, &c1) == 0);
+ assert_se(c1 == c_masked);
+}
+
+static void test_capability_set_from_string(void) {
+ uint64_t c;
+
+ assert_se(capability_set_from_string(NULL, &c) == 0);
+ assert_se(c == 0);
+
+ assert_se(capability_set_from_string("", &c) == 0);
+ assert_se(c == 0);
+
+ assert_se(capability_set_from_string("0", &c) == 0);
+ assert_se(c == UINT64_C(1));
+
+ assert_se(capability_set_from_string("1", &c) == 0);
+ assert_se(c == UINT64_C(1) << 1);
+
+ assert_se(capability_set_from_string("0 1 2 3", &c) == 0);
+ assert_se(c == (UINT64_C(1) << 4) - 1);
+}
+
+static void test_capability_set_to_string(uint64_t invalid_cap_set) {
+ uint64_t c;
+
+ test_capability_set_one(invalid_cap_set, "");
+
+ c = (UINT64_C(1) << CAP_DAC_OVERRIDE | invalid_cap_set);
+ test_capability_set_one(c, "cap_dac_override");
+
+ c = (UINT64_C(1) << CAP_CHOWN |
+ UINT64_C(1) << CAP_DAC_OVERRIDE |
+ UINT64_C(1) << CAP_DAC_READ_SEARCH |
+ UINT64_C(1) << CAP_FOWNER |
+ UINT64_C(1) << CAP_SETGID |
+ UINT64_C(1) << CAP_SETUID |
+ UINT64_C(1) << CAP_SYS_PTRACE |
+ UINT64_C(1) << CAP_SYS_ADMIN |
+ UINT64_C(1) << CAP_AUDIT_CONTROL |
+ UINT64_C(1) << CAP_MAC_OVERRIDE |
+ UINT64_C(1) << CAP_SYSLOG |
+ invalid_cap_set);
+ test_capability_set_one(c, ("cap_chown cap_dac_override cap_dac_read_search cap_fowner "
+ "cap_setgid cap_setuid cap_sys_ptrace cap_sys_admin "
+ "cap_audit_control cap_mac_override cap_syslog"));
+}
+
+int main(int argc, char *argv[]) {
+ test_cap_list();
+ test_capability_set_from_string();
+ test_capability_set_to_string(0);
+
+ /* once the kernel supports 63 caps, there are no 'invalid' numbers
+ * for us to test with */
+ if (cap_last_cap() < 63)
+ test_capability_set_to_string(all_capabilities() + 1);
+
+ return 0;
+}
diff --git a/src/test/test-capability.c b/src/test/test-capability.c
new file mode 100644
index 0000000..0ff5607
--- /dev/null
+++ b/src/test/test-capability.c
@@ -0,0 +1,280 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <pwd.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#define TEST_CAPABILITY_C
+
+#include "alloc-util.h"
+#include "capability-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "missing_prctl.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "tests.h"
+
+static uid_t test_uid = -1;
+static gid_t test_gid = -1;
+
+#if HAS_FEATURE_ADDRESS_SANITIZER
+/* Keep CAP_SYS_PTRACE when running under Address Sanitizer */
+static const uint64_t test_flags = UINT64_C(1) << CAP_SYS_PTRACE;
+#else
+/* We keep CAP_DAC_OVERRIDE to avoid errors with gcov when doing test coverage */
+static const uint64_t test_flags = UINT64_C(1) << CAP_DAC_OVERRIDE;
+#endif
+
+/* verify cap_last_cap() against /proc/sys/kernel/cap_last_cap */
+static void test_last_cap_file(void) {
+ _cleanup_free_ char *content = NULL;
+ unsigned long val = 0;
+ int r;
+
+ r = read_one_line_file("/proc/sys/kernel/cap_last_cap", &content);
+ if (r == -ENOENT || ERRNO_IS_PRIVILEGE(r)) /* kernel pre 3.2 or no access */
+ return;
+ assert_se(r >= 0);
+
+ r = safe_atolu(content, &val);
+ assert_se(r >= 0);
+ assert_se(val != 0);
+ assert_se(val == cap_last_cap());
+}
+
+/* verify cap_last_cap() against syscall probing */
+static void test_last_cap_probe(void) {
+ unsigned long p = (unsigned long)CAP_LAST_CAP;
+
+ if (prctl(PR_CAPBSET_READ, p) < 0) {
+ for (p--; p > 0; p --)
+ if (prctl(PR_CAPBSET_READ, p) >= 0)
+ break;
+ } else {
+ for (;; p++)
+ if (prctl(PR_CAPBSET_READ, p+1) < 0)
+ break;
+ }
+
+ assert_se(p != 0);
+ assert_se(p == cap_last_cap());
+}
+
+static void fork_test(void (*test_func)(void)) {
+ pid_t pid = 0;
+
+ pid = fork();
+ assert_se(pid >= 0);
+ if (pid == 0) {
+ test_func();
+ exit(EXIT_SUCCESS);
+ } else if (pid > 0) {
+ int status;
+
+ assert_se(waitpid(pid, &status, 0) > 0);
+ assert_se(WIFEXITED(status) && WEXITSTATUS(status) == 0);
+ }
+}
+
+static void show_capabilities(void) {
+ cap_t caps;
+ char *text;
+
+ caps = cap_get_proc();
+ assert_se(caps);
+
+ text = cap_to_text(caps, NULL);
+ assert_se(text);
+
+ log_info("Capabilities:%s", text);
+ cap_free(caps);
+ cap_free(text);
+}
+
+static int setup_tests(bool *run_ambient) {
+ struct passwd *nobody;
+ int r;
+
+ nobody = getpwnam(NOBODY_USER_NAME);
+ if (!nobody)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Could not find nobody user: %m");
+
+ test_uid = nobody->pw_uid;
+ test_gid = nobody->pw_gid;
+
+ *run_ambient = false;
+
+ r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0);
+
+ /* There's support for PR_CAP_AMBIENT if the prctl() call
+ * succeeded or error code was something else than EINVAL. The
+ * EINVAL check should be good enough to rule out false
+ * positives. */
+
+ if (r >= 0 || errno != EINVAL)
+ *run_ambient = true;
+
+ return 0;
+}
+
+static void test_drop_privileges_keep_net_raw(void) {
+ int sock;
+
+ sock = socket(AF_INET, SOCK_RAW, IPPROTO_UDP);
+ assert_se(sock >= 0);
+ safe_close(sock);
+
+ assert_se(drop_privileges(test_uid, test_gid, test_flags | (1ULL << CAP_NET_RAW)) >= 0);
+ assert_se(getuid() == test_uid);
+ assert_se(getgid() == test_gid);
+ show_capabilities();
+
+ sock = socket(AF_INET, SOCK_RAW, IPPROTO_UDP);
+ assert_se(sock >= 0);
+ safe_close(sock);
+}
+
+static void test_drop_privileges_dontkeep_net_raw(void) {
+ int sock;
+
+ sock = socket(AF_INET, SOCK_RAW, IPPROTO_UDP);
+ assert_se(sock >= 0);
+ safe_close(sock);
+
+ assert_se(drop_privileges(test_uid, test_gid, test_flags) >= 0);
+ assert_se(getuid() == test_uid);
+ assert_se(getgid() == test_gid);
+ show_capabilities();
+
+ sock = socket(AF_INET, SOCK_RAW, IPPROTO_UDP);
+ assert_se(sock < 0);
+}
+
+static void test_drop_privileges_fail(void) {
+ assert_se(drop_privileges(test_uid, test_gid, test_flags) >= 0);
+ assert_se(getuid() == test_uid);
+ assert_se(getgid() == test_gid);
+
+ assert_se(drop_privileges(test_uid, test_gid, test_flags) < 0);
+ assert_se(drop_privileges(0, 0, test_flags) < 0);
+}
+
+static void test_drop_privileges(void) {
+ fork_test(test_drop_privileges_keep_net_raw);
+ fork_test(test_drop_privileges_dontkeep_net_raw);
+ fork_test(test_drop_privileges_fail);
+}
+
+static void test_have_effective_cap(void) {
+ assert_se(have_effective_cap(CAP_KILL));
+ assert_se(have_effective_cap(CAP_CHOWN));
+
+ assert_se(drop_privileges(test_uid, test_gid, test_flags | (1ULL << CAP_KILL)) >= 0);
+ assert_se(getuid() == test_uid);
+ assert_se(getgid() == test_gid);
+
+ assert_se(have_effective_cap(CAP_KILL));
+ assert_se(!have_effective_cap(CAP_CHOWN));
+}
+
+static void test_update_inherited_set(void) {
+ cap_t caps;
+ uint64_t set = 0;
+ cap_flag_value_t fv;
+
+ caps = cap_get_proc();
+ assert_se(caps);
+
+ set = (UINT64_C(1) << CAP_CHOWN);
+
+ assert_se(!capability_update_inherited_set(caps, set));
+ assert_se(!cap_get_flag(caps, CAP_CHOWN, CAP_INHERITABLE, &fv));
+ assert(fv == CAP_SET);
+
+ cap_free(caps);
+}
+
+static void test_apply_ambient_caps(void) {
+ cap_t caps;
+ uint64_t set = 0;
+ cap_flag_value_t fv;
+
+ assert_se(prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_CHOWN, 0, 0) == 0);
+
+ set = (UINT64_C(1) << CAP_CHOWN);
+
+ assert_se(!capability_ambient_set_apply(set, true));
+
+ caps = cap_get_proc();
+ assert_se(caps);
+ assert_se(!cap_get_flag(caps, CAP_CHOWN, CAP_INHERITABLE, &fv));
+ assert_se(fv == CAP_SET);
+ cap_free(caps);
+
+ assert_se(prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_CHOWN, 0, 0) == 1);
+
+ assert_se(!capability_ambient_set_apply(0, true));
+ caps = cap_get_proc();
+ assert_se(caps);
+ assert_se(!cap_get_flag(caps, CAP_CHOWN, CAP_INHERITABLE, &fv));
+ assert_se(fv == CAP_CLEAR);
+ cap_free(caps);
+
+ assert_se(prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_CHOWN, 0, 0) == 0);
+}
+
+static void test_ensure_cap_64bit(void) {
+ _cleanup_free_ char *content = NULL;
+ unsigned long p = 0;
+ int r;
+
+ r = read_one_line_file("/proc/sys/kernel/cap_last_cap", &content);
+ if (r == -ENOENT || ERRNO_IS_PRIVILEGE(r)) /* kernel pre 3.2 or no access */
+ return;
+ assert_se(r >= 0);
+
+ assert_se(safe_atolu(content, &p) >= 0);
+
+ /* If caps don't fit into 64bit anymore, we have a problem, fail the test. */
+ assert_se(p <= 63);
+
+ /* Also check for the header definition */
+ assert_cc(CAP_LAST_CAP <= 63);
+}
+
+int main(int argc, char *argv[]) {
+ bool run_ambient;
+
+ test_setup_logging(LOG_INFO);
+
+ test_ensure_cap_64bit();
+
+ test_last_cap_file();
+ test_last_cap_probe();
+
+ log_info("have ambient caps: %s", yes_no(ambient_capabilities_supported()));
+
+ if (getuid() != 0)
+ return log_tests_skipped("not running as root");
+
+ if (setup_tests(&run_ambient) < 0)
+ return log_tests_skipped("setup failed");
+
+ show_capabilities();
+
+ test_drop_privileges();
+ test_update_inherited_set();
+
+ fork_test(test_have_effective_cap);
+
+ if (run_ambient)
+ fork_test(test_apply_ambient_caps);
+
+ return 0;
+}
diff --git a/src/test/test-cgroup-cpu.c b/src/test/test-cgroup-cpu.c
new file mode 100644
index 0000000..be73be8
--- /dev/null
+++ b/src/test/test-cgroup-cpu.c
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "cgroup.h"
+#include "log.h"
+
+static void test_cgroup_cpu_adjust_period(void) {
+ log_info("/* %s */", __func__);
+
+ /* Period 1ms, quota 40% -> Period 2.5ms */
+ assert_se(2500 == cgroup_cpu_adjust_period(USEC_PER_MSEC, 400 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 10ms, quota 10% -> keep. */
+ assert_se(10 * USEC_PER_MSEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 100 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 1ms, quota 1000% -> keep. */
+ assert_se(USEC_PER_MSEC == cgroup_cpu_adjust_period(USEC_PER_MSEC, 10000 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 100ms, quota 30% -> keep. */
+ assert_se(100 * USEC_PER_MSEC == cgroup_cpu_adjust_period(100 * USEC_PER_MSEC, 300 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 5s, quota 40% -> adjust to 1s. */
+ assert_se(USEC_PER_SEC == cgroup_cpu_adjust_period(5 * USEC_PER_SEC, 400 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 2s, quota 250% -> adjust to 1s. */
+ assert_se(USEC_PER_SEC == cgroup_cpu_adjust_period(2 * USEC_PER_SEC, 2500 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 10us, quota 5,000,000% -> adjust to 1ms. */
+ assert_se(USEC_PER_MSEC == cgroup_cpu_adjust_period(10, 50000000 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 10ms, quota 50,000% -> keep. */
+ assert_se(10 * USEC_PER_MSEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 500000 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 10ms, quota 1% -> adjust to 100ms. */
+ assert_se(100 * USEC_PER_MSEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 10 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 10ms, quota .001% -> adjust to 1s. */
+ assert_se(1 * USEC_PER_SEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 10, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 0ms, quota 200% -> adjust to 1ms. */
+ assert_se(1 * USEC_PER_MSEC == cgroup_cpu_adjust_period(0, 2 * USEC_PER_SEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 0ms, quota 40% -> adjust to 2.5ms. */
+ assert_se(2500 == cgroup_cpu_adjust_period(0, 400 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+}
+
+int main(int argc, char *argv[]) {
+ test_cgroup_cpu_adjust_period();
+ return 0;
+}
diff --git a/src/test/test-cgroup-mask.c b/src/test/test-cgroup-mask.c
new file mode 100644
index 0000000..b53e327
--- /dev/null
+++ b/src/test/test-cgroup-mask.c
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "cgroup.h"
+#include "cgroup-util.h"
+#include "macro.h"
+#include "manager.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "tests.h"
+#include "unit.h"
+
+#define ASSERT_CGROUP_MASK(got, expected) \
+ log_cgroup_mask(got, expected); \
+ assert_se(got == expected)
+
+#define ASSERT_CGROUP_MASK_JOINED(got, expected) ASSERT_CGROUP_MASK(got, CGROUP_MASK_EXTEND_JOINED(expected))
+
+static void log_cgroup_mask(CGroupMask got, CGroupMask expected) {
+ _cleanup_free_ char *e_store = NULL, *g_store = NULL;
+
+ assert_se(cg_mask_to_string(expected, &e_store) >= 0);
+ log_info("Expected mask: %s\n", e_store);
+ assert_se(cg_mask_to_string(got, &g_store) >= 0);
+ log_info("Got mask: %s\n", g_store);
+}
+
+static int test_cgroup_mask(void) {
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ Unit *son, *daughter, *parent, *root, *grandchild, *parent_deep, *nomem_parent, *nomem_leaf;
+ int r;
+ CGroupMask cpu_accounting_mask = get_cpu_accounting_mask();
+
+ r = enter_cgroup_subroot(NULL);
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ /* Prepare the manager. */
+ _cleanup_free_ char *unit_dir = NULL;
+ assert_se(get_testdata_dir("units", &unit_dir) >= 0);
+ assert_se(set_unit_path(unit_dir) >= 0);
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+ r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m);
+ if (IN_SET(r, -EPERM, -EACCES)) {
+ log_error_errno(r, "manager_new: %m");
+ return log_tests_skipped("cannot create manager");
+ }
+
+ assert_se(r >= 0);
+
+ /* Turn off all kinds of default accouning, so that we can
+ * verify the masks resulting of our configuration and nothing
+ * else. */
+ m->default_cpu_accounting =
+ m->default_memory_accounting =
+ m->default_blockio_accounting =
+ m->default_io_accounting =
+ m->default_tasks_accounting = false;
+ m->default_tasks_max = TASKS_MAX_UNSET;
+
+ assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+ /* Load units and verify hierarchy. */
+ assert_se(manager_load_startable_unit_or_warn(m, "parent.slice", NULL, &parent) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "son.service", NULL, &son) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "daughter.service", NULL, &daughter) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "grandchild.service", NULL, &grandchild) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "parent-deep.slice", NULL, &parent_deep) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "nomem.slice", NULL, &nomem_parent) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "nomemleaf.service", NULL, &nomem_leaf) >= 0);
+ assert_se(UNIT_DEREF(son->slice) == parent);
+ assert_se(UNIT_DEREF(daughter->slice) == parent);
+ assert_se(UNIT_DEREF(parent_deep->slice) == parent);
+ assert_se(UNIT_DEREF(grandchild->slice) == parent_deep);
+ assert_se(UNIT_DEREF(nomem_leaf->slice) == nomem_parent);
+ root = UNIT_DEREF(parent->slice);
+ assert_se(UNIT_DEREF(nomem_parent->slice) == root);
+
+ /* Verify per-unit cgroups settings. */
+ ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(son), CGROUP_MASK_CPU);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(daughter), cpu_accounting_mask);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(grandchild), 0);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(parent_deep), CGROUP_MASK_MEMORY);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(parent), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(nomem_parent), 0);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(nomem_leaf), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(root), 0);
+
+ /* Verify aggregation of member masks */
+ ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(son), 0);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(daughter), 0);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(grandchild), 0);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(parent_deep), 0);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(parent), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(nomem_parent), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(nomem_leaf), 0);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(root), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
+
+ /* Verify aggregation of sibling masks. */
+ ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(son), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(daughter), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(grandchild), 0);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(parent_deep), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(parent), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(nomem_parent), (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(nomem_leaf), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(root), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
+
+ /* Verify aggregation of target masks. */
+ ASSERT_CGROUP_MASK(unit_get_target_mask(son), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+ ASSERT_CGROUP_MASK(unit_get_target_mask(daughter), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+ ASSERT_CGROUP_MASK(unit_get_target_mask(grandchild), 0);
+ ASSERT_CGROUP_MASK(unit_get_target_mask(parent_deep), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+ ASSERT_CGROUP_MASK(unit_get_target_mask(parent), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+ ASSERT_CGROUP_MASK(unit_get_target_mask(nomem_parent), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO) & m->cgroup_supported));
+ ASSERT_CGROUP_MASK(unit_get_target_mask(nomem_leaf), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_IO | CGROUP_MASK_BLKIO) & m->cgroup_supported));
+ ASSERT_CGROUP_MASK(unit_get_target_mask(root), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+
+ /* Verify aggregation of enable masks. */
+ ASSERT_CGROUP_MASK(unit_get_enable_mask(son), 0);
+ ASSERT_CGROUP_MASK(unit_get_enable_mask(daughter), 0);
+ ASSERT_CGROUP_MASK(unit_get_enable_mask(grandchild), 0);
+ ASSERT_CGROUP_MASK(unit_get_enable_mask(parent_deep), 0);
+ ASSERT_CGROUP_MASK(unit_get_enable_mask(parent), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+ ASSERT_CGROUP_MASK(unit_get_enable_mask(nomem_parent), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_IO | CGROUP_MASK_BLKIO) & m->cgroup_supported));
+ ASSERT_CGROUP_MASK(unit_get_enable_mask(nomem_leaf), 0);
+ ASSERT_CGROUP_MASK(unit_get_enable_mask(root), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+
+ return 0;
+}
+
+static void test_cg_mask_to_string_one(CGroupMask mask, const char *t) {
+ _cleanup_free_ char *b = NULL;
+
+ assert_se(cg_mask_to_string(mask, &b) >= 0);
+ assert_se(streq_ptr(b, t));
+}
+
+static void test_cg_mask_to_string(void) {
+ test_cg_mask_to_string_one(0, NULL);
+ test_cg_mask_to_string_one(_CGROUP_MASK_ALL, "cpu cpuacct cpuset io blkio memory devices pids bpf-firewall bpf-devices");
+ test_cg_mask_to_string_one(CGROUP_MASK_CPU, "cpu");
+ test_cg_mask_to_string_one(CGROUP_MASK_CPUACCT, "cpuacct");
+ test_cg_mask_to_string_one(CGROUP_MASK_CPUSET, "cpuset");
+ test_cg_mask_to_string_one(CGROUP_MASK_IO, "io");
+ test_cg_mask_to_string_one(CGROUP_MASK_BLKIO, "blkio");
+ test_cg_mask_to_string_one(CGROUP_MASK_MEMORY, "memory");
+ test_cg_mask_to_string_one(CGROUP_MASK_DEVICES, "devices");
+ test_cg_mask_to_string_one(CGROUP_MASK_PIDS, "pids");
+ test_cg_mask_to_string_one(CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT, "cpu cpuacct");
+ test_cg_mask_to_string_one(CGROUP_MASK_CPU|CGROUP_MASK_PIDS, "cpu pids");
+ test_cg_mask_to_string_one(CGROUP_MASK_CPUACCT|CGROUP_MASK_PIDS, "cpuacct pids");
+ test_cg_mask_to_string_one(CGROUP_MASK_DEVICES|CGROUP_MASK_PIDS, "devices pids");
+ test_cg_mask_to_string_one(CGROUP_MASK_IO|CGROUP_MASK_BLKIO, "io blkio");
+}
+
+int main(int argc, char* argv[]) {
+ int rc = EXIT_SUCCESS;
+
+ test_setup_logging(LOG_DEBUG);
+
+ test_cg_mask_to_string();
+ TEST_REQ_RUNNING_SYSTEMD(rc = test_cgroup_mask());
+
+ return rc;
+}
diff --git a/src/test/test-cgroup-setup.c b/src/test/test-cgroup-setup.c
new file mode 100644
index 0000000..72726ca
--- /dev/null
+++ b/src/test/test-cgroup-setup.c
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "build.h"
+#include "cgroup-setup.h"
+#include "errno-util.h"
+#include "log.h"
+#include "proc-cmdline.h"
+#include "string-util.h"
+#include "tests.h"
+
+static void test_is_wanted_print(bool header) {
+ _cleanup_free_ char *cmdline = NULL;
+
+ log_info("-- %s --", __func__);
+ assert_se(proc_cmdline(&cmdline) >= 0);
+ log_info("cmdline: %s", cmdline);
+ if (header) {
+ log_info(_CGROUP_HIERARCHY_);
+ (void) system("findmnt -n /sys/fs/cgroup");
+ }
+
+ log_info("is_unified_wanted() → %s", yes_no(cg_is_unified_wanted()));
+ log_info("is_hybrid_wanted() → %s", yes_no(cg_is_hybrid_wanted()));
+ log_info("is_legacy_wanted() → %s", yes_no(cg_is_legacy_wanted()));
+ log_info(" ");
+}
+
+static void test_is_wanted(void) {
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "systemd.unified_cgroup_hierarchy", 1) >= 0);
+ test_is_wanted_print(false);
+
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "systemd.unified_cgroup_hierarchy=0", 1) >= 0);
+ test_is_wanted_print(false);
+
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "systemd.unified_cgroup_hierarchy=0 "
+ "systemd.legacy_systemd_cgroup_controller", 1) >= 0);
+ test_is_wanted_print(false);
+
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "systemd.unified_cgroup_hierarchy=0 "
+ "systemd.legacy_systemd_cgroup_controller=0", 1) >= 0);
+ test_is_wanted_print(false);
+
+ /* cgroup_no_v1=all implies unified cgroup hierarchy, unless otherwise
+ * explicitly specified. */
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "cgroup_no_v1=all", 1) >= 0);
+ test_is_wanted_print(false);
+
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "cgroup_no_v1=all "
+ "systemd.unified_cgroup_hierarchy=0", 1) >= 0);
+ test_is_wanted_print(false);
+}
+
+int main(void) {
+ test_setup_logging(LOG_DEBUG);
+
+ if (access("/proc/cmdline", R_OK) < 0 && ERRNO_IS_PRIVILEGE(errno))
+ return log_tests_skipped("can't read /proc/cmdline");
+
+ test_is_wanted_print(true);
+ test_is_wanted_print(false); /* run twice to test caching */
+ test_is_wanted();
+
+ return 0;
+}
diff --git a/src/test/test-cgroup-unit-default.c b/src/test/test-cgroup-unit-default.c
new file mode 100644
index 0000000..b03f6ff
--- /dev/null
+++ b/src/test/test-cgroup-unit-default.c
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "cgroup.h"
+#include "manager.h"
+#include "rm-rf.h"
+#include "tests.h"
+#include "unit.h"
+
+static int test_default_memory_low(void) {
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ Unit *root, *dml,
+ *dml_passthrough, *dml_passthrough_empty, *dml_passthrough_set_dml, *dml_passthrough_set_ml,
+ *dml_override, *dml_override_empty,
+ *dml_discard, *dml_discard_empty, *dml_discard_set_ml;
+ uint64_t dml_tree_default;
+ int r;
+
+ r = enter_cgroup_subroot(NULL);
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ _cleanup_free_ char *unit_dir = NULL;
+ assert_se(get_testdata_dir("units", &unit_dir) >= 0);
+ assert_se(set_unit_path(unit_dir) >= 0);
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+ r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m);
+ if (IN_SET(r, -EPERM, -EACCES)) {
+ log_error_errno(r, "manager_new: %m");
+ return log_tests_skipped("cannot create manager");
+ }
+
+ assert_se(r >= 0);
+ assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+ /* dml.slice has DefaultMemoryLow=50. Beyond that, individual subhierarchies look like this:
+ *
+ * 1. dml-passthrough.slice sets MemoryLow=100. This should not affect its children, as only
+ * DefaultMemoryLow is propagated, not MemoryLow. As such, all leaf services should end up with
+ * memory.low as 50, inherited from dml.slice, *except* for dml-passthrough-set-ml.service, which
+ * should have the value of 0, as it has MemoryLow explicitly set.
+ *
+ * ┌───────────┐
+ * │ dml.slice │
+ * └─────┬─────┘
+ * MemoryLow=100
+ * ┌───────────┴───────────┐
+ * │ dml-passthrough.slice │
+ * └───────────┬───────────┘
+ * ┌───────────────────────────────────┼───────────────────────────────────┐
+ * no new settings DefaultMemoryLow=15 MemoryLow=0
+ * ┌───────────────┴───────────────┐ ┌────────────────┴────────────────┐ ┌───────────────┴────────────────┐
+ * │ dml-passthrough-empty.service │ │ dml-passthrough-set-dml.service │ │ dml-passthrough-set-ml.service │
+ * └───────────────────────────────┘ └─────────────────────────────────┘ └────────────────────────────────┘
+ *
+ * 2. dml-override.slice sets DefaultMemoryLow=10. As such, dml-override-empty.service should also
+ * end up with a memory.low of 10. dml-override.slice should still have a memory.low of 50.
+ *
+ * ┌───────────┐
+ * │ dml.slice │
+ * └─────┬─────┘
+ * DefaultMemoryLow=10
+ * ┌─────────┴──────────┐
+ * │ dml-override.slice │
+ * └─────────┬──────────┘
+ * no new settings
+ * ┌─────────────┴──────────────┐
+ * │ dml-override-empty.service │
+ * └────────────────────────────┘
+ *
+ * 3. dml-discard.slice sets DefaultMemoryLow= with no rvalue. As such,
+ * dml-discard-empty.service should end up with a value of 0.
+ * dml-discard-set-ml.service sets MemoryLow=15, and as such should have that override the
+ * reset DefaultMemoryLow value. dml-discard.slice should still have an eventual memory.low of 50.
+ *
+ * ┌───────────┐
+ * │ dml.slice │
+ * └─────┬─────┘
+ * DefaultMemoryLow=
+ * ┌─────────┴─────────┐
+ * │ dml-discard.slice │
+ * └─────────┬─────────┘
+ * ┌──────────────┴───────────────┐
+ * no new settings MemoryLow=15
+ * ┌─────────────┴─────────────┐ ┌─────────────┴──────────────┐
+ * │ dml-discard-empty.service │ │ dml-discard-set-ml.service │
+ * └───────────────────────────┘ └────────────────────────────┘
+ */
+ assert_se(manager_load_startable_unit_or_warn(m, "dml.slice", NULL, &dml) >= 0);
+
+ assert_se(manager_load_startable_unit_or_warn(m, "dml-passthrough.slice", NULL, &dml_passthrough) >= 0);
+ assert_se(UNIT_DEREF(dml_passthrough->slice) == dml);
+ assert_se(manager_load_startable_unit_or_warn(m, "dml-passthrough-empty.service", NULL, &dml_passthrough_empty) >= 0);
+ assert_se(UNIT_DEREF(dml_passthrough_empty->slice) == dml_passthrough);
+ assert_se(manager_load_startable_unit_or_warn(m, "dml-passthrough-set-dml.service", NULL, &dml_passthrough_set_dml) >= 0);
+ assert_se(UNIT_DEREF(dml_passthrough_set_dml->slice) == dml_passthrough);
+ assert_se(manager_load_startable_unit_or_warn(m, "dml-passthrough-set-ml.service", NULL, &dml_passthrough_set_ml) >= 0);
+ assert_se(UNIT_DEREF(dml_passthrough_set_ml->slice) == dml_passthrough);
+
+ assert_se(manager_load_startable_unit_or_warn(m, "dml-override.slice", NULL, &dml_override) >= 0);
+ assert_se(UNIT_DEREF(dml_override->slice) == dml);
+ assert_se(manager_load_startable_unit_or_warn(m, "dml-override-empty.service", NULL, &dml_override_empty) >= 0);
+ assert_se(UNIT_DEREF(dml_override_empty->slice) == dml_override);
+
+ assert_se(manager_load_startable_unit_or_warn(m, "dml-discard.slice", NULL, &dml_discard) >= 0);
+ assert_se(UNIT_DEREF(dml_discard->slice) == dml);
+ assert_se(manager_load_startable_unit_or_warn(m, "dml-discard-empty.service", NULL, &dml_discard_empty) >= 0);
+ assert_se(UNIT_DEREF(dml_discard_empty->slice) == dml_discard);
+ assert_se(manager_load_startable_unit_or_warn(m, "dml-discard-set-ml.service", NULL, &dml_discard_set_ml) >= 0);
+ assert_se(UNIT_DEREF(dml_discard_set_ml->slice) == dml_discard);
+
+ root = UNIT_DEREF(dml->slice);
+ assert_se(!UNIT_ISSET(root->slice));
+
+ assert_se(unit_get_ancestor_memory_low(root) == CGROUP_LIMIT_MIN);
+
+ assert_se(unit_get_ancestor_memory_low(dml) == CGROUP_LIMIT_MIN);
+ dml_tree_default = unit_get_cgroup_context(dml)->default_memory_low;
+ assert_se(dml_tree_default == 50);
+
+ assert_se(unit_get_ancestor_memory_low(dml_passthrough) == 100);
+ assert_se(unit_get_ancestor_memory_low(dml_passthrough_empty) == dml_tree_default);
+ assert_se(unit_get_ancestor_memory_low(dml_passthrough_set_dml) == 50);
+ assert_se(unit_get_ancestor_memory_low(dml_passthrough_set_ml) == 0);
+
+ assert_se(unit_get_ancestor_memory_low(dml_override) == dml_tree_default);
+ assert_se(unit_get_ancestor_memory_low(dml_override_empty) == 10);
+
+ assert_se(unit_get_ancestor_memory_low(dml_discard) == dml_tree_default);
+ assert_se(unit_get_ancestor_memory_low(dml_discard_empty) == CGROUP_LIMIT_MIN);
+ assert_se(unit_get_ancestor_memory_low(dml_discard_set_ml) == 15);
+
+ return 0;
+}
+
+int main(int argc, char* argv[]) {
+ int rc = EXIT_SUCCESS;
+
+ test_setup_logging(LOG_DEBUG);
+
+ TEST_REQ_RUNNING_SYSTEMD(rc = test_default_memory_low());
+
+ return rc;
+}
diff --git a/src/test/test-cgroup-util.c b/src/test/test-cgroup-util.c
new file mode 100644
index 0000000..41b1df1
--- /dev/null
+++ b/src/test/test-cgroup-util.c
@@ -0,0 +1,455 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "build.h"
+#include "cgroup-util.h"
+#include "dirent-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "special.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "user-util.h"
+#include "util.h"
+
+static void check_p_d_u(const char *path, int code, const char *result) {
+ _cleanup_free_ char *unit = NULL;
+ int r;
+
+ r = cg_path_decode_unit(path, &unit);
+ printf("%s: %s → %s %d expected %s %d\n", __func__, path, unit, r, strnull(result), code);
+ assert_se(r == code);
+ assert_se(streq_ptr(unit, result));
+}
+
+static void test_path_decode_unit(void) {
+ check_p_d_u("getty@tty2.service", 0, "getty@tty2.service");
+ check_p_d_u("getty@tty2.service/", 0, "getty@tty2.service");
+ check_p_d_u("getty@tty2.service/xxx", 0, "getty@tty2.service");
+ check_p_d_u("getty@.service/", -ENXIO, NULL);
+ check_p_d_u("getty@.service", -ENXIO, NULL);
+ check_p_d_u("getty.service", 0, "getty.service");
+ check_p_d_u("getty", -ENXIO, NULL);
+ check_p_d_u("getty/waldo", -ENXIO, NULL);
+ check_p_d_u("_cpu.service", 0, "cpu.service");
+}
+
+static void check_p_g_u(const char *path, int code, const char *result) {
+ _cleanup_free_ char *unit = NULL;
+ int r;
+
+ r = cg_path_get_unit(path, &unit);
+ printf("%s: %s → %s %d expected %s %d\n", __func__, path, unit, r, strnull(result), code);
+ assert_se(r == code);
+ assert_se(streq_ptr(unit, result));
+}
+
+static void test_path_get_unit(void) {
+ check_p_g_u("/system.slice/foobar.service/sdfdsaf", 0, "foobar.service");
+ check_p_g_u("/system.slice/getty@tty5.service", 0, "getty@tty5.service");
+ check_p_g_u("/system.slice/getty@tty5.service/aaa/bbb", 0, "getty@tty5.service");
+ check_p_g_u("/system.slice/getty@tty5.service/", 0, "getty@tty5.service");
+ check_p_g_u("/system.slice/getty@tty6.service/tty5", 0, "getty@tty6.service");
+ check_p_g_u("sadfdsafsda", -ENXIO, NULL);
+ check_p_g_u("/system.slice/getty####@tty6.service/xxx", -ENXIO, NULL);
+ check_p_g_u("/system.slice/system-waldo.slice/foobar.service/sdfdsaf", 0, "foobar.service");
+ check_p_g_u("/system.slice/system-waldo.slice/_cpu.service/sdfdsaf", 0, "cpu.service");
+ check_p_g_u("/user.slice/user-1000.slice/user@1000.service/server.service", 0, "user@1000.service");
+ check_p_g_u("/user.slice/user-1000.slice/user@.service/server.service", -ENXIO, NULL);
+}
+
+static void check_p_g_u_u(const char *path, int code, const char *result) {
+ _cleanup_free_ char *unit = NULL;
+ int r;
+
+ r = cg_path_get_user_unit(path, &unit);
+ printf("%s: %s → %s %d expected %s %d\n", __func__, path, unit, r, strnull(result), code);
+ assert_se(r == code);
+ assert_se(streq_ptr(unit, result));
+}
+
+static void test_path_get_user_unit(void) {
+ check_p_g_u_u("/user.slice/user-1000.slice/session-2.scope/foobar.service", 0, "foobar.service");
+ check_p_g_u_u("/user.slice/user-1000.slice/session-2.scope/waldo.slice/foobar.service", 0, "foobar.service");
+ check_p_g_u_u("/user.slice/user-1002.slice/session-2.scope/foobar.service/waldo", 0, "foobar.service");
+ check_p_g_u_u("/user.slice/user-1000.slice/session-2.scope/foobar.service/waldo/uuuux", 0, "foobar.service");
+ check_p_g_u_u("/user.slice/user-1000.slice/session-2.scope/waldo/waldo/uuuux", -ENXIO, NULL);
+ check_p_g_u_u("/user.slice/user-1000.slice/session-2.scope/foobar@pie.service/pa/po", 0, "foobar@pie.service");
+ check_p_g_u_u("/session-2.scope/foobar@pie.service/pa/po", 0, "foobar@pie.service");
+ check_p_g_u_u("/xyz.slice/xyz-waldo.slice/session-77.scope/foobar@pie.service/pa/po", 0, "foobar@pie.service");
+ check_p_g_u_u("/meh.service", -ENXIO, NULL);
+ check_p_g_u_u("/session-3.scope/_cpu.service", 0, "cpu.service");
+ check_p_g_u_u("/user.slice/user-1000.slice/user@1000.service/server.service", 0, "server.service");
+ check_p_g_u_u("/user.slice/user-1000.slice/user@1000.service/foobar.slice/foobar@pie.service", 0, "foobar@pie.service");
+ check_p_g_u_u("/user.slice/user-1000.slice/user@.service/server.service", -ENXIO, NULL);
+}
+
+static void check_p_g_s(const char *path, int code, const char *result) {
+ _cleanup_free_ char *s = NULL;
+
+ assert_se(cg_path_get_session(path, &s) == code);
+ assert_se(streq_ptr(s, result));
+}
+
+static void test_path_get_session(void) {
+ check_p_g_s("/user.slice/user-1000.slice/session-2.scope/foobar.service", 0, "2");
+ check_p_g_s("/session-3.scope", 0, "3");
+ check_p_g_s("/session-.scope", -ENXIO, NULL);
+ check_p_g_s("", -ENXIO, NULL);
+}
+
+static void check_p_g_o_u(const char *path, int code, uid_t result) {
+ uid_t uid = 0;
+
+ assert_se(cg_path_get_owner_uid(path, &uid) == code);
+ assert_se(uid == result);
+}
+
+static void test_path_get_owner_uid(void) {
+ check_p_g_o_u("/user.slice/user-1000.slice/session-2.scope/foobar.service", 0, 1000);
+ check_p_g_o_u("/user.slice/user-1006.slice", 0, 1006);
+ check_p_g_o_u("", -ENXIO, 0);
+}
+
+static void check_p_g_slice(const char *path, int code, const char *result) {
+ _cleanup_free_ char *s = NULL;
+
+ assert_se(cg_path_get_slice(path, &s) == code);
+ assert_se(streq_ptr(s, result));
+}
+
+static void test_path_get_slice(void) {
+ check_p_g_slice("/user.slice", 0, "user.slice");
+ check_p_g_slice("/foobar", 0, SPECIAL_ROOT_SLICE);
+ check_p_g_slice("/user.slice/user-waldo.slice", 0, "user-waldo.slice");
+ check_p_g_slice("", 0, SPECIAL_ROOT_SLICE);
+ check_p_g_slice("foobar", 0, SPECIAL_ROOT_SLICE);
+ check_p_g_slice("foobar.slice", 0, "foobar.slice");
+ check_p_g_slice("foo.slice/foo-bar.slice/waldo.service", 0, "foo-bar.slice");
+}
+
+static void check_p_g_u_slice(const char *path, int code, const char *result) {
+ _cleanup_free_ char *s = NULL;
+
+ assert_se(cg_path_get_user_slice(path, &s) == code);
+ assert_se(streq_ptr(s, result));
+}
+
+static void test_path_get_user_slice(void) {
+ check_p_g_u_slice("/user.slice", -ENXIO, NULL);
+ check_p_g_u_slice("/foobar", -ENXIO, NULL);
+ check_p_g_u_slice("/user.slice/user-waldo.slice", -ENXIO, NULL);
+ check_p_g_u_slice("", -ENXIO, NULL);
+ check_p_g_u_slice("foobar", -ENXIO, NULL);
+ check_p_g_u_slice("foobar.slice", -ENXIO, NULL);
+ check_p_g_u_slice("foo.slice/foo-bar.slice/waldo.service", -ENXIO, NULL);
+
+ check_p_g_u_slice("foo.slice/foo-bar.slice/user@1000.service", 0, SPECIAL_ROOT_SLICE);
+ check_p_g_u_slice("foo.slice/foo-bar.slice/user@1000.service/", 0, SPECIAL_ROOT_SLICE);
+ check_p_g_u_slice("foo.slice/foo-bar.slice/user@1000.service///", 0, SPECIAL_ROOT_SLICE);
+ check_p_g_u_slice("foo.slice/foo-bar.slice/user@1000.service/waldo.service", 0, SPECIAL_ROOT_SLICE);
+ check_p_g_u_slice("foo.slice/foo-bar.slice/user@1000.service/piep.slice/foo.service", 0, "piep.slice");
+ check_p_g_u_slice("/foo.slice//foo-bar.slice/user@1000.service/piep.slice//piep-pap.slice//foo.service", 0, "piep-pap.slice");
+}
+
+static void test_get_paths(void) {
+ _cleanup_free_ char *a = NULL;
+
+ assert_se(cg_get_root_path(&a) >= 0);
+ log_info("Root = %s", a);
+}
+
+static void test_proc(void) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r;
+
+ d = opendir("/proc");
+ assert_se(d);
+
+ FOREACH_DIRENT(de, d, break) {
+ _cleanup_free_ char *path = NULL, *path_shifted = NULL, *session = NULL, *unit = NULL, *user_unit = NULL, *machine = NULL, *slice = NULL;
+ pid_t pid;
+ uid_t uid = UID_INVALID;
+
+ if (!IN_SET(de->d_type, DT_DIR, DT_UNKNOWN))
+ continue;
+
+ r = parse_pid(de->d_name, &pid);
+ if (r < 0)
+ continue;
+
+ if (is_kernel_thread(pid))
+ continue;
+
+ cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &path);
+ cg_pid_get_path_shifted(pid, NULL, &path_shifted);
+ cg_pid_get_owner_uid(pid, &uid);
+ cg_pid_get_session(pid, &session);
+ cg_pid_get_unit(pid, &unit);
+ cg_pid_get_user_unit(pid, &user_unit);
+ cg_pid_get_machine_name(pid, &machine);
+ cg_pid_get_slice(pid, &slice);
+
+ printf(PID_FMT"\t%s\t%s\t"UID_FMT"\t%s\t%s\t%s\t%s\t%s\n",
+ pid,
+ path,
+ path_shifted,
+ uid,
+ session,
+ unit,
+ user_unit,
+ machine,
+ slice);
+ }
+}
+
+static void test_escape_one(const char *s, const char *r) {
+ _cleanup_free_ char *b;
+
+ b = cg_escape(s);
+ assert_se(b);
+ assert_se(streq(b, r));
+
+ assert_se(streq(cg_unescape(b), s));
+}
+
+static void test_escape(void) {
+ test_escape_one("foobar", "foobar");
+ test_escape_one(".foobar", "_.foobar");
+ test_escape_one("foobar.service", "foobar.service");
+ test_escape_one("cgroup.service", "_cgroup.service");
+ test_escape_one("tasks", "_tasks");
+ if (access("/sys/fs/cgroup/cpu", F_OK) == 0)
+ test_escape_one("cpu.service", "_cpu.service");
+ test_escape_one("_foobar", "__foobar");
+ test_escape_one("", "_");
+ test_escape_one("_", "__");
+ test_escape_one(".", "_.");
+}
+
+static void test_controller_is_valid(void) {
+ assert_se(cg_controller_is_valid("foobar"));
+ assert_se(cg_controller_is_valid("foo_bar"));
+ assert_se(cg_controller_is_valid("name=foo"));
+ assert_se(!cg_controller_is_valid(""));
+ assert_se(!cg_controller_is_valid("name="));
+ assert_se(!cg_controller_is_valid("="));
+ assert_se(!cg_controller_is_valid("cpu,cpuacct"));
+ assert_se(!cg_controller_is_valid("_"));
+ assert_se(!cg_controller_is_valid("_foobar"));
+ assert_se(!cg_controller_is_valid("tatü"));
+}
+
+static void test_slice_to_path_one(const char *unit, const char *path, int error) {
+ _cleanup_free_ char *ret = NULL;
+ int r;
+
+ log_info("unit: %s", unit);
+
+ r = cg_slice_to_path(unit, &ret);
+ log_info("actual: %s / %d", strnull(ret), r);
+ log_info("expect: %s / %d", strnull(path), error);
+ assert_se(r == error);
+ assert_se(streq_ptr(ret, path));
+}
+
+static void test_slice_to_path(void) {
+ test_slice_to_path_one("foobar.slice", "foobar.slice", 0);
+ test_slice_to_path_one("foobar-waldo.slice", "foobar.slice/foobar-waldo.slice", 0);
+ test_slice_to_path_one("foobar-waldo.service", NULL, -EINVAL);
+ test_slice_to_path_one(SPECIAL_ROOT_SLICE, "", 0);
+ test_slice_to_path_one("--.slice", NULL, -EINVAL);
+ test_slice_to_path_one("-", NULL, -EINVAL);
+ test_slice_to_path_one("-foo-.slice", NULL, -EINVAL);
+ test_slice_to_path_one("-foo.slice", NULL, -EINVAL);
+ test_slice_to_path_one("foo-.slice", NULL, -EINVAL);
+ test_slice_to_path_one("foo--bar.slice", NULL, -EINVAL);
+ test_slice_to_path_one("foo.slice/foo--bar.slice", NULL, -EINVAL);
+ test_slice_to_path_one("a-b.slice", "a.slice/a-b.slice", 0);
+ test_slice_to_path_one("a-b-c-d-e.slice", "a.slice/a-b.slice/a-b-c.slice/a-b-c-d.slice/a-b-c-d-e.slice", 0);
+
+ test_slice_to_path_one("foobar@.slice", NULL, -EINVAL);
+ test_slice_to_path_one("foobar@waldo.slice", NULL, -EINVAL);
+ test_slice_to_path_one("foobar@waldo.service", NULL, -EINVAL);
+ test_slice_to_path_one("-foo@-.slice", NULL, -EINVAL);
+ test_slice_to_path_one("-foo@.slice", NULL, -EINVAL);
+ test_slice_to_path_one("foo@-.slice", NULL, -EINVAL);
+ test_slice_to_path_one("foo@@bar.slice", NULL, -EINVAL);
+ test_slice_to_path_one("foo.slice/foo@@bar.slice", NULL, -EINVAL);
+}
+
+static void test_shift_path_one(const char *raw, const char *root, const char *shifted) {
+ const char *s = NULL;
+
+ assert_se(cg_shift_path(raw, root, &s) >= 0);
+ assert_se(streq(s, shifted));
+}
+
+static void test_shift_path(void) {
+
+ test_shift_path_one("/foobar/waldo", "/", "/foobar/waldo");
+ test_shift_path_one("/foobar/waldo", "", "/foobar/waldo");
+ test_shift_path_one("/foobar/waldo", "/foobar", "/waldo");
+ test_shift_path_one("/foobar/waldo", "/hogehoge", "/foobar/waldo");
+}
+
+static void test_mask_supported(void) {
+
+ CGroupMask m;
+ CGroupController c;
+
+ assert_se(cg_mask_supported(&m) >= 0);
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++)
+ printf("'%s' is supported: %s\n", cgroup_controller_to_string(c), yes_no(m & CGROUP_CONTROLLER_TO_MASK(c)));
+}
+
+static void test_is_cgroup_fs(void) {
+ struct statfs sfs;
+ assert_se(statfs("/sys/fs/cgroup", &sfs) == 0);
+ if (is_temporary_fs(&sfs))
+ assert_se(statfs("/sys/fs/cgroup/systemd", &sfs) == 0);
+ assert_se(is_cgroup_fs(&sfs));
+}
+
+static void test_fd_is_cgroup_fs(void) {
+ int fd;
+
+ fd = open("/sys/fs/cgroup", O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
+ assert_se(fd >= 0);
+ if (fd_is_temporary_fs(fd)) {
+ fd = safe_close(fd);
+ fd = open("/sys/fs/cgroup/systemd", O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
+ assert_se(fd >= 0);
+ }
+ assert_se(fd_is_cgroup_fs(fd));
+ fd = safe_close(fd);
+}
+
+static void test_cg_tests(void) {
+ int all, hybrid, systemd, r;
+
+ r = cg_unified();
+ if (r == -ENOMEDIUM) {
+ log_notice_errno(r, "Skipping cg hierarchy tests: %m");
+ return;
+ }
+ assert_se(r >= 0);
+
+ all = cg_all_unified();
+ assert_se(IN_SET(all, 0, 1));
+
+ hybrid = cg_hybrid_unified();
+ assert_se(IN_SET(hybrid, 0, 1));
+
+ systemd = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ assert_se(IN_SET(systemd, 0, 1));
+
+ if (all) {
+ assert_se(systemd);
+ assert_se(!hybrid);
+
+ } else if (hybrid) {
+ assert_se(systemd);
+ assert_se(!all);
+
+ } else
+ assert_se(!systemd);
+}
+
+static void test_cg_get_keyed_attribute(void) {
+ _cleanup_free_ char *val = NULL;
+ char *vals3[3] = {}, *vals3a[3] = {};
+ int i, r;
+
+ r = cg_get_keyed_attribute("cpu", "/init.scope", "no_such_file", STRV_MAKE("no_such_attr"), &val);
+ if (r == -ENOMEDIUM || ERRNO_IS_PRIVILEGE(r)) {
+ log_info_errno(r, "Skipping most of %s, /sys/fs/cgroup not accessible: %m", __func__);
+ return;
+ }
+
+ assert_se(r == -ENOENT);
+ assert_se(val == NULL);
+
+ if (access("/sys/fs/cgroup/init.scope/cpu.stat", R_OK) < 0) {
+ log_info_errno(errno, "Skipping most of %s, /init.scope/cpu.stat not accessible: %m", __func__);
+ return;
+ }
+
+ assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("no_such_attr"), &val) == -ENXIO);
+ assert_se(cg_get_keyed_attribute_graceful("cpu", "/init.scope", "cpu.stat", STRV_MAKE("no_such_attr"), &val) == 0);
+ assert_se(val == NULL);
+
+ assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec"), &val) == 0);
+ val = mfree(val);
+
+ assert_se(cg_get_keyed_attribute_graceful("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec"), &val) == 1);
+ log_info("cpu /init.scope cpu.stat [usage_usec] → \"%s\"", val);
+
+ assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec", "no_such_attr"), vals3) == -ENXIO);
+ assert_se(cg_get_keyed_attribute_graceful("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec", "no_such_attr"), vals3) == 1);
+ assert(vals3[0] && !vals3[1]);
+ free(vals3[0]);
+
+ assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec", "usage_usec"), vals3) == -ENXIO);
+ assert_se(cg_get_keyed_attribute_graceful("cpu", "/init.scope", "cpu.stat", STRV_MAKE("usage_usec", "usage_usec"), vals3) == 1);
+ assert(vals3[0] && !vals3[1]);
+ free(vals3[0]);
+
+ assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat",
+ STRV_MAKE("usage_usec", "user_usec", "system_usec"), vals3) == 0);
+ for (i = 0; i < 3; i++)
+ free(vals3[i]);
+
+ assert_se(cg_get_keyed_attribute_graceful("cpu", "/init.scope", "cpu.stat",
+ STRV_MAKE("usage_usec", "user_usec", "system_usec"), vals3) == 3);
+ log_info("cpu /init.scope cpu.stat [usage_usec user_usec system_usec] → \"%s\", \"%s\", \"%s\"",
+ vals3[0], vals3[1], vals3[2]);
+
+ assert_se(cg_get_keyed_attribute("cpu", "/init.scope", "cpu.stat",
+ STRV_MAKE("system_usec", "user_usec", "usage_usec"), vals3a) == 0);
+ for (i = 0; i < 3; i++)
+ free(vals3a[i]);
+
+ assert_se(cg_get_keyed_attribute_graceful("cpu", "/init.scope", "cpu.stat",
+ STRV_MAKE("system_usec", "user_usec", "usage_usec"), vals3a) == 3);
+ log_info("cpu /init.scope cpu.stat [system_usec user_usec usage_usec] → \"%s\", \"%s\", \"%s\"",
+ vals3a[0], vals3a[1], vals3a[2]);
+
+ for (i = 0; i < 3; i++) {
+ free(vals3[i]);
+ free(vals3a[i]);
+ }
+}
+
+int main(void) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_path_decode_unit();
+ test_path_get_unit();
+ test_path_get_user_unit();
+ test_path_get_session();
+ test_path_get_owner_uid();
+ test_path_get_slice();
+ test_path_get_user_slice();
+ TEST_REQ_RUNNING_SYSTEMD(test_get_paths());
+ test_proc();
+ TEST_REQ_RUNNING_SYSTEMD(test_escape());
+ test_controller_is_valid();
+ test_slice_to_path();
+ test_shift_path();
+ TEST_REQ_RUNNING_SYSTEMD(test_mask_supported());
+ TEST_REQ_RUNNING_SYSTEMD(test_is_cgroup_fs());
+ TEST_REQ_RUNNING_SYSTEMD(test_fd_is_cgroup_fs());
+ test_cg_tests();
+ test_cg_get_keyed_attribute();
+
+ return 0;
+}
diff --git a/src/test/test-cgroup.c b/src/test/test-cgroup.c
new file mode 100644
index 0000000..722e11a
--- /dev/null
+++ b/src/test/test-cgroup.c
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "cgroup-setup.h"
+#include "cgroup-util.h"
+#include "errno-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "tests.h"
+
+static void test_cg_split_spec(void) {
+ char *c, *p;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(cg_split_spec("foobar:/", &c, &p) == 0);
+ assert_se(streq(c, "foobar"));
+ assert_se(streq(p, "/"));
+ c = mfree(c);
+ p = mfree(p);
+
+ assert_se(cg_split_spec("foobar:", &c, &p) == 0);
+ c = mfree(c);
+ p = mfree(p);
+
+ assert_se(cg_split_spec("foobar:asdfd", &c, &p) < 0);
+ assert_se(cg_split_spec(":///", &c, &p) < 0);
+ assert_se(cg_split_spec(":", &c, &p) < 0);
+ assert_se(cg_split_spec("", &c, &p) < 0);
+ assert_se(cg_split_spec("fo/obar:/", &c, &p) < 0);
+
+ assert_se(cg_split_spec("/", &c, &p) >= 0);
+ assert_se(c == NULL);
+ assert_se(streq(p, "/"));
+ p = mfree(p);
+
+ assert_se(cg_split_spec("foo", &c, &p) >= 0);
+ assert_se(streq(c, "foo"));
+ assert_se(p == NULL);
+ c = mfree(c);
+}
+
+static void test_cg_create(void) {
+ log_info("/* %s */", __func__);
+ int r;
+
+ r = cg_unified_cached(false);
+ if (r < 0) {
+ log_info_errno(r, "Skipping %s: %m", __func__);
+ return;
+ }
+
+ _cleanup_free_ char *here = NULL;
+ assert_se(cg_pid_get_path_shifted(0, NULL, &here) >= 0);
+
+ const char *test_a = prefix_roota(here, "/test-a"),
+ *test_b = prefix_roota(here, "/test-b"),
+ *test_c = prefix_roota(here, "/test-b/test-c"),
+ *test_d = prefix_roota(here, "/test-b/test-d");
+ char *path;
+
+ log_info("Paths for test:\n%s\n%s", test_a, test_b);
+
+ r = cg_create(SYSTEMD_CGROUP_CONTROLLER, test_a);
+ if (IN_SET(r, -EPERM, -EACCES, -EROFS)) {
+ log_info_errno(r, "Skipping %s: %m", __func__);
+ return;
+ }
+
+ assert_se(r == 1);
+ assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, test_a) == 0);
+ assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, test_b) == 1);
+ assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, test_c) == 1);
+ assert_se(cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, test_b, 0) == 0);
+
+ assert_se(cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, getpid_cached(), &path) == 0);
+ assert_se(streq(path, test_b));
+ free(path);
+
+ assert_se(cg_attach(SYSTEMD_CGROUP_CONTROLLER, test_a, 0) == 0);
+
+ assert_se(cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, getpid_cached(), &path) == 0);
+ assert_se(path_equal(path, test_a));
+ free(path);
+
+ assert_se(cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, test_d, 0) == 1);
+
+ assert_se(cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, getpid_cached(), &path) == 0);
+ assert_se(path_equal(path, test_d));
+ free(path);
+
+ assert_se(cg_get_path(SYSTEMD_CGROUP_CONTROLLER, test_d, NULL, &path) == 0);
+ log_debug("test_d: %s", path);
+ const char *full_d;
+ if (cg_all_unified())
+ full_d = strjoina("/sys/fs/cgroup", test_d);
+ else if (cg_hybrid_unified())
+ full_d = strjoina("/sys/fs/cgroup/unified", test_d);
+ else
+ full_d = strjoina("/sys/fs/cgroup/systemd", test_d);
+ assert_se(path_equal(path, full_d));
+ free(path);
+
+ assert_se(cg_is_empty(SYSTEMD_CGROUP_CONTROLLER, test_a) > 0);
+ assert_se(cg_is_empty(SYSTEMD_CGROUP_CONTROLLER, test_b) > 0);
+ assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, test_a) > 0);
+ assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, test_b) == 0);
+
+ assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, test_a, 0, 0, NULL, NULL, NULL) == 0);
+ assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, test_b, 0, 0, NULL, NULL, NULL) > 0);
+
+ assert_se(cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, test_b, SYSTEMD_CGROUP_CONTROLLER, test_a, 0) > 0);
+
+ assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, test_a) == 0);
+ assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, test_b) > 0);
+
+ assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, test_a, 0, 0, NULL, NULL, NULL) > 0);
+ assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, test_b, 0, 0, NULL, NULL, NULL) == 0);
+
+ cg_trim(SYSTEMD_CGROUP_CONTROLLER, test_b, false);
+
+ assert_se(cg_rmdir(SYSTEMD_CGROUP_CONTROLLER, test_b) == 0);
+ assert_se(cg_rmdir(SYSTEMD_CGROUP_CONTROLLER, test_a) < 0);
+ assert_se(cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, test_a, SYSTEMD_CGROUP_CONTROLLER, here, 0) > 0);
+ assert_se(cg_rmdir(SYSTEMD_CGROUP_CONTROLLER, test_a) == 0);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_cg_split_spec();
+ test_cg_create();
+
+ return 0;
+}
diff --git a/src/test/test-chase-symlinks.c b/src/test/test-chase-symlinks.c
new file mode 100644
index 0000000..d9b9b62
--- /dev/null
+++ b/src/test/test-chase-symlinks.c
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#include <getopt.h>
+
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "main-func.h"
+
+static char *arg_root = NULL;
+static int arg_flags = 0;
+static bool arg_open = false;
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_ROOT = 0x1000,
+ ARG_OPEN,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "root", required_argument, NULL, ARG_ROOT },
+ { "open", no_argument, NULL, ARG_OPEN },
+
+ { "prefix-root", no_argument, NULL, CHASE_PREFIX_ROOT },
+ { "nonexistent", no_argument, NULL, CHASE_NONEXISTENT },
+ { "no_autofs", no_argument, NULL, CHASE_NO_AUTOFS },
+ { "safe", no_argument, NULL, CHASE_SAFE },
+ { "trail-slash", no_argument, NULL, CHASE_TRAIL_SLASH },
+ { "step", no_argument, NULL, CHASE_STEP },
+ { "nofollow", no_argument, NULL, CHASE_NOFOLLOW },
+ { "warn", no_argument, NULL, CHASE_WARN },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "", options, NULL)) >= 0)
+ switch (c) {
+
+ case 'h':
+ printf("Syntax:\n"
+ " %s [OPTION...] path...\n"
+ "Options:\n"
+ , argv[0]);
+ for (size_t i = 0; i < ELEMENTSOF(options) - 1; i++)
+ printf(" --%s\n", options[i].name);
+ return 0;
+
+ case ARG_ROOT:
+ arg_root = optarg;
+ break;
+
+ case ARG_OPEN:
+ arg_open = true;
+ break;
+
+ case CHASE_PREFIX_ROOT:
+ case CHASE_NONEXISTENT:
+ case CHASE_NO_AUTOFS:
+ case CHASE_SAFE:
+ case CHASE_TRAIL_SLASH:
+ case CHASE_STEP:
+ case CHASE_NOFOLLOW:
+ case CHASE_WARN:
+ arg_flags |= c;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind == argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "At least one argument is required.");
+
+ return 1;
+}
+
+static int run(int argc, char **argv) {
+ int r;
+
+ log_setup_cli();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ for (int i = optind; i < argc; i++) {
+ _cleanup_free_ char *p = NULL;
+ _cleanup_close_ int fd = -1;
+
+ printf("%s ", argv[i]);
+ fflush(stdout);
+
+ r = chase_symlinks(argv[i], arg_root, arg_flags, &p, arg_open ? &fd : NULL);
+ if (r < 0)
+ log_error_errno(r, "failed: %m");
+ else {
+ log_info("→ %s", p);
+ if (arg_open)
+ assert(fd >= 0);
+ else
+ assert(fd == -1);
+ }
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/test/test-chown-rec.c b/src/test/test-chown-rec.c
new file mode 100644
index 0000000..66c6fd9
--- /dev/null
+++ b/src/test/test-chown-rec.c
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "chown-recursive.h"
+#include "log.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+
+static const uint8_t acl[] = {
+ 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x02, 0x00, 0x07, 0x00,
+ 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x07, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x10, 0x00, 0x07, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x20, 0x00, 0x05, 0x00,
+ 0xff, 0xff, 0xff, 0xff,
+};
+
+static const uint8_t default_acl[] = {
+ 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x04, 0x00, 0x07, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x08, 0x00, 0x07, 0x00,
+ 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x07, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0x20, 0x00, 0x05, 0x00,
+ 0xff, 0xff, 0xff, 0xff,
+};
+
+static bool has_xattr(const char *p) {
+ char buffer[sizeof(acl) * 4];
+
+ if (lgetxattr(p, "system.posix_acl_access", buffer, sizeof(buffer)) < 0) {
+ if (IN_SET(errno, EOPNOTSUPP, ENOTTY, ENODATA, ENOSYS))
+ return false;
+ }
+
+ return true;
+}
+
+static void test_chown_recursive(void) {
+ _cleanup_(rm_rf_physical_and_freep) char *t = NULL;
+ struct stat st;
+ const char *p;
+ const uid_t uid = getuid();
+ const gid_t gid = getgid();
+
+ umask(022);
+ assert_se(mkdtemp_malloc(NULL, &t) >= 0);
+
+ p = strjoina(t, "/dir");
+ assert_se(mkdir(p, 0777) >= 0);
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISDIR(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0755);
+ assert_se(st.st_uid == uid);
+ assert_se(st.st_gid == gid);
+ assert_se(!has_xattr(p));
+
+ p = strjoina(t, "/dir/symlink");
+ assert_se(symlink("../../", p) >= 0);
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISLNK(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0777);
+ assert_se(st.st_uid == uid);
+ assert_se(st.st_gid == gid);
+ assert_se(!has_xattr(p));
+
+ p = strjoina(t, "/dir/reg");
+ assert_se(mknod(p, S_IFREG|0777, 0) >= 0);
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISREG(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0755);
+ assert_se(st.st_uid == uid);
+ assert_se(st.st_gid == gid);
+ assert_se(!has_xattr(p));
+
+ p = strjoina(t, "/dir/sock");
+ assert_se(mknod(p, S_IFSOCK|0777, 0) >= 0);
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISSOCK(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0755);
+ assert_se(st.st_uid == uid);
+ assert_se(st.st_gid == gid);
+ assert_se(!has_xattr(p));
+
+ p = strjoina(t, "/dir/fifo");
+ assert_se(mknod(p, S_IFIFO|0777, 0) >= 0);
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISFIFO(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0755);
+ assert_se(st.st_uid == uid);
+ assert_se(st.st_gid == gid);
+ assert_se(!has_xattr(p));
+
+ /* We now apply an xattr to the dir, and check it again */
+ p = strjoina(t, "/dir");
+ assert_se(setxattr(p, "system.posix_acl_access", acl, sizeof(acl), 0) >= 0);
+ assert_se(setxattr(p, "system.posix_acl_default", default_acl, sizeof(default_acl), 0) >= 0);
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISDIR(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0775); /* acl change changed the mode too */
+ assert_se(st.st_uid == uid);
+ assert_se(st.st_gid == gid);
+ assert_se(has_xattr(p));
+
+ assert_se(path_chown_recursive(t, 1, 2, 07777) >= 0);
+
+ p = strjoina(t, "/dir");
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISDIR(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0775);
+ assert_se(st.st_uid == 1);
+ assert_se(st.st_gid == 2);
+ assert_se(!has_xattr(p));
+
+ p = strjoina(t, "/dir/symlink");
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISLNK(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0777);
+ assert_se(st.st_uid == 1);
+ assert_se(st.st_gid == 2);
+ assert_se(!has_xattr(p));
+
+ p = strjoina(t, "/dir/reg");
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISREG(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0755);
+ assert_se(st.st_uid == 1);
+ assert_se(st.st_gid == 2);
+ assert_se(!has_xattr(p));
+
+ p = strjoina(t, "/dir/sock");
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISSOCK(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0755);
+ assert_se(st.st_uid == 1);
+ assert_se(st.st_gid == 2);
+ assert_se(!has_xattr(p));
+
+ p = strjoina(t, "/dir/fifo");
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISFIFO(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0755);
+ assert_se(st.st_uid == 1);
+ assert_se(st.st_gid == 2);
+ assert_se(!has_xattr(p));
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ if (geteuid() != 0)
+ return log_tests_skipped("not running as root");
+
+ test_chown_recursive();
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-clock.c b/src/test/test-clock.c
new file mode 100644
index 0000000..714935c
--- /dev/null
+++ b/src/test/test-clock.c
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2016 Canonical Ltd.
+***/
+
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "clock-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "tmpfile-util.h"
+
+static void test_clock_is_localtime(void) {
+ _cleanup_(unlink_tempfilep) char adjtime[] = "/tmp/test-adjtime.XXXXXX";
+ _cleanup_fclose_ FILE* f = NULL;
+
+ static const struct scenario {
+ const char* contents;
+ int expected_result;
+ } scenarios[] = {
+ /* adjtime configures UTC */
+ {"0.0 0 0\n0\nUTC\n", 0},
+ /* adjtime configures local time */
+ {"0.0 0 0\n0\nLOCAL\n", 1},
+ /* no final EOL */
+ {"0.0 0 0\n0\nUTC", 0},
+ {"0.0 0 0\n0\nLOCAL", 1},
+ /* empty value -> defaults to UTC */
+ {"0.0 0 0\n0\n", 0},
+ /* unknown value -> defaults to UTC */
+ {"0.0 0 0\n0\nFOO\n", 0},
+ /* no third line */
+ {"0.0 0 0", 0},
+ {"0.0 0 0\n", 0},
+ {"0.0 0 0\n0", 0},
+ };
+
+ /* without an adjtime file we default to UTC */
+ assert_se(clock_is_localtime("/nonexisting/adjtime") == 0);
+
+ assert_se(fmkostemp_safe(adjtime, "w", &f) == 0);
+ log_info("adjtime test file: %s", adjtime);
+
+ for (size_t i = 0; i < ELEMENTSOF(scenarios); ++i) {
+ log_info("scenario #%zu:, expected result %i", i, scenarios[i].expected_result);
+ log_info("%s", scenarios[i].contents);
+ rewind(f);
+ assert_se(ftruncate(fileno(f), 0) == 0);
+ assert_se(write_string_stream(f, scenarios[i].contents, WRITE_STRING_FILE_AVOID_NEWLINE) == 0);
+ assert_se(clock_is_localtime(adjtime) == scenarios[i].expected_result);
+ }
+}
+
+/* Test with the real /etc/adjtime */
+static void test_clock_is_localtime_system(void) {
+ int r;
+ r = clock_is_localtime(NULL);
+
+ if (access("/etc/adjtime", R_OK) == 0) {
+ log_info("/etc/adjtime is readable, clock_is_localtime() == %i", r);
+ /* if /etc/adjtime exists we expect some answer, no error or
+ * crash */
+ assert_se(IN_SET(r, 0, 1));
+ } else
+ /* default is UTC if there is no /etc/adjtime */
+ assert_se(r == 0 || ERRNO_IS_PRIVILEGE(r));
+}
+
+int main(int argc, char *argv[]) {
+ test_clock_is_localtime();
+ test_clock_is_localtime_system();
+
+ return 0;
+}
diff --git a/src/test/test-condition.c b/src/test/test-condition.c
new file mode 100644
index 0000000..15099d8
--- /dev/null
+++ b/src/test/test-condition.c
@@ -0,0 +1,869 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "apparmor-util.h"
+#include "architecture.h"
+#include "audit-util.h"
+#include "cgroup-util.h"
+#include "condition.h"
+#include "cpu-set-util.h"
+#include "efi-loader.h"
+#include "errno-util.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "ima-util.h"
+#include "limits-util.h"
+#include "log.h"
+#include "macro.h"
+#include "nulstr-util.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "set.h"
+#include "smack-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "tomoyo-util.h"
+#include "user-record.h"
+#include "user-util.h"
+#include "virt.h"
+
+static void test_condition_test_path(void) {
+ Condition *condition;
+
+ condition = condition_new(CONDITION_PATH_EXISTS, "/bin/sh", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_EXISTS, "/bin/s?", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_EXISTS_GLOB, "/bin/s?", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_EXISTS_GLOB, "/bin/s?", false, true);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_EXISTS, "/thiscertainlywontexist", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_EXISTS, "/thiscertainlywontexist", false, true);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_IS_DIRECTORY, "/bin", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_DIRECTORY_NOT_EMPTY, "/bin", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_FILE_NOT_EMPTY, "/bin/sh", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_FILE_IS_EXECUTABLE, "/bin/sh", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_FILE_IS_EXECUTABLE, "/etc/passwd", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_IS_MOUNT_POINT, "/proc", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_IS_MOUNT_POINT, "/", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_IS_MOUNT_POINT, "/bin", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_IS_READ_WRITE, "/tmp", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_IS_ENCRYPTED, "/sys", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_PATH_IS_SYMBOLIC_LINK, "/dev/stdout", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+}
+
+static void test_condition_test_control_group_controller(void) {
+ Condition *condition;
+ CGroupMask system_mask;
+ CGroupController controller;
+ _cleanup_free_ char *controller_name = NULL;
+ int r;
+
+ r = cg_unified();
+ if (r < 0) {
+ log_notice_errno(r, "Skipping ConditionControlGroupController tests: %m");
+ return;
+ }
+
+ /* Invalid controllers are ignored */
+ condition = condition_new(CONDITION_CONTROL_GROUP_CONTROLLER, "thisisnotarealcontroller", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_CONTROL_GROUP_CONTROLLER, "thisisnotarealcontroller", false, true);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ assert_se(cg_mask_supported(&system_mask) >= 0);
+
+ /* Individual valid controllers one by one */
+ for (controller = 0; controller < _CGROUP_CONTROLLER_MAX; controller++) {
+ const char *local_controller_name = cgroup_controller_to_string(controller);
+ log_info("chosen controller is '%s'", local_controller_name);
+ if (system_mask & CGROUP_CONTROLLER_TO_MASK(controller)) {
+ log_info("this controller is available");
+ condition = condition_new(CONDITION_CONTROL_GROUP_CONTROLLER, local_controller_name, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_CONTROL_GROUP_CONTROLLER, local_controller_name, false, true);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+ } else {
+ log_info("this controller is unavailable");
+ condition = condition_new(CONDITION_CONTROL_GROUP_CONTROLLER, local_controller_name, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_CONTROL_GROUP_CONTROLLER, local_controller_name, false, true);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+ }
+ }
+
+ /* Multiple valid controllers at the same time */
+ assert_se(cg_mask_to_string(system_mask, &controller_name) >= 0);
+
+ condition = condition_new(CONDITION_CONTROL_GROUP_CONTROLLER, strempty(controller_name), false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_CONTROL_GROUP_CONTROLLER, strempty(controller_name), false, true);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+}
+
+static void test_condition_test_ac_power(void) {
+ Condition *condition;
+
+ condition = condition_new(CONDITION_AC_POWER, "true", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == on_ac_power());
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_AC_POWER, "false", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) != on_ac_power());
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_AC_POWER, "false", false, true);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == on_ac_power());
+ condition_free(condition);
+}
+
+static void test_condition_test_host(void) {
+ _cleanup_free_ char *hostname = NULL;
+ char sid[SD_ID128_STRING_MAX];
+ Condition *condition;
+ sd_id128_t id;
+ int r;
+
+ r = sd_id128_get_machine(&id);
+ assert_se(r >= 0);
+ assert_se(sd_id128_to_string(id, sid));
+
+ condition = condition_new(CONDITION_HOST, sid, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_HOST, "garbage value jjjjjjjjjjjjjj", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_HOST, sid, false, true);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ hostname = gethostname_malloc();
+ assert_se(hostname);
+
+ /* if hostname looks like an id128 then skip testing it */
+ if (id128_is_valid(hostname))
+ log_notice("hostname is an id128, skipping test");
+ else {
+ condition = condition_new(CONDITION_HOST, hostname, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+ }
+}
+
+static void test_condition_test_architecture(void) {
+ Condition *condition;
+ const char *sa;
+ int a;
+
+ a = uname_architecture();
+ assert_se(a >= 0);
+
+ sa = architecture_to_string(a);
+ assert_se(sa);
+
+ condition = condition_new(CONDITION_ARCHITECTURE, sa, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_ARCHITECTURE, "garbage value", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_ARCHITECTURE, sa, false, true);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+}
+
+static void test_condition_test_kernel_command_line(void) {
+ Condition *condition;
+ int r;
+
+ condition = condition_new(CONDITION_KERNEL_COMMAND_LINE, "thisreallyshouldntbeonthekernelcommandline", false, false);
+ assert_se(condition);
+ r = condition_test(condition, environ);
+ if (ERRNO_IS_PRIVILEGE(r))
+ return;
+ assert_se(r == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_COMMAND_LINE, "andthis=neither", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+}
+
+static void test_condition_test_kernel_version(void) {
+ Condition *condition;
+ struct utsname u;
+ const char *v;
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, "*thisreallyshouldntbeinthekernelversion*", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, "*", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ /* An artificially empty condition. It evaluates to true, but normally
+ * such condition cannot be created, because the condition list is reset instead. */
+ condition = condition_new(CONDITION_KERNEL_VERSION, "", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ assert_se(uname(&u) >= 0);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, u.release, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ strshorten(u.release, 4);
+ strcpy(strchr(u.release, 0), "*");
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, u.release, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ /* 0.1.2 would be a very very very old kernel */
+ condition = condition_new(CONDITION_KERNEL_VERSION, "> 0.1.2", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, ">0.1.2", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, "'>0.1.2' '<9.0.0'", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, "> 0.1.2 < 9.0.0", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == -EINVAL);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, ">", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == -EINVAL);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, ">= 0.1.2", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, "< 0.1.2", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, "<= 0.1.2", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, "= 0.1.2", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ /* 4711.8.15 is a very very very future kernel */
+ condition = condition_new(CONDITION_KERNEL_VERSION, "< 4711.8.15", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, "<= 4711.8.15", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, "= 4711.8.15", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, "> 4711.8.15", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_KERNEL_VERSION, ">= 4711.8.15", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ assert_se(uname(&u) >= 0);
+
+ v = strjoina(">=", u.release);
+ condition = condition_new(CONDITION_KERNEL_VERSION, v, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ v = strjoina("= ", u.release);
+ condition = condition_new(CONDITION_KERNEL_VERSION, v, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ v = strjoina("<=", u.release);
+ condition = condition_new(CONDITION_KERNEL_VERSION, v, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) > 0);
+ condition_free(condition);
+
+ v = strjoina("> ", u.release);
+ condition = condition_new(CONDITION_KERNEL_VERSION, v, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ v = strjoina("< ", u.release);
+ condition = condition_new(CONDITION_KERNEL_VERSION, v, false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+}
+
+static void test_condition_test_security(void) {
+ Condition *condition;
+
+ condition = condition_new(CONDITION_SECURITY, "garbage oifdsjfoidsjoj", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_SECURITY, "selinux", false, true);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) != mac_selinux_use());
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_SECURITY, "apparmor", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == mac_apparmor_use());
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_SECURITY, "tomoyo", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == mac_tomoyo_use());
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_SECURITY, "ima", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == use_ima());
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_SECURITY, "smack", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == mac_smack_use());
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_SECURITY, "audit", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == use_audit());
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_SECURITY, "uefi-secureboot", false, false);
+ assert_se(condition);
+ assert_se(condition_test(condition, environ) == is_efi_secure_boot());
+ condition_free(condition);
+}
+
+static void print_securities(void) {
+ log_info("------ enabled security technologies ------");
+ log_info("SELinux: %s", yes_no(mac_selinux_use()));
+ log_info("AppArmor: %s", yes_no(mac_apparmor_use()));
+ log_info("Tomoyo: %s", yes_no(mac_tomoyo_use()));
+ log_info("IMA: %s", yes_no(use_ima()));
+ log_info("SMACK: %s", yes_no(mac_smack_use()));
+ log_info("Audit: %s", yes_no(use_audit()));
+ log_info("UEFI secure boot: %s", yes_no(is_efi_secure_boot()));
+ log_info("-------------------------------------------");
+}
+
+static void test_condition_test_virtualization(void) {
+ Condition *condition;
+ const char *virt;
+ int r;
+
+ condition = condition_new(CONDITION_VIRTUALIZATION, "garbage oifdsjfoidsjoj", false, false);
+ assert_se(condition);
+ r = condition_test(condition, environ);
+ if (ERRNO_IS_PRIVILEGE(r))
+ return;
+ log_info("ConditionVirtualization=garbage → %i", r);
+ assert_se(r == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_VIRTUALIZATION, "container", false, false);
+ assert_se(condition);
+ r = condition_test(condition, environ);
+ log_info("ConditionVirtualization=container → %i", r);
+ assert_se(r == !!detect_container());
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_VIRTUALIZATION, "vm", false, false);
+ assert_se(condition);
+ r = condition_test(condition, environ);
+ log_info("ConditionVirtualization=vm → %i", r);
+ assert_se(r == (detect_vm() && !detect_container()));
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_VIRTUALIZATION, "private-users", false, false);
+ assert_se(condition);
+ r = condition_test(condition, environ);
+ log_info("ConditionVirtualization=private-users → %i", r);
+ assert_se(r == !!running_in_userns());
+ condition_free(condition);
+
+ NULSTR_FOREACH(virt,
+ "kvm\0"
+ "qemu\0"
+ "bochs\0"
+ "xen\0"
+ "uml\0"
+ "vmware\0"
+ "oracle\0"
+ "microsoft\0"
+ "zvm\0"
+ "parallels\0"
+ "bhyve\0"
+ "vm_other\0") {
+
+ condition = condition_new(CONDITION_VIRTUALIZATION, virt, false, false);
+ assert_se(condition);
+ r = condition_test(condition, environ);
+ log_info("ConditionVirtualization=%s → %i", virt, r);
+ assert_se(r >= 0);
+ condition_free(condition);
+ }
+}
+
+static void test_condition_test_user(void) {
+ Condition *condition;
+ char* uid;
+ char* username;
+ int r;
+
+ condition = condition_new(CONDITION_USER, "garbage oifdsjfoidsjoj", false, false);
+ assert_se(condition);
+ r = condition_test(condition, environ);
+ log_info("ConditionUser=garbage → %i", r);
+ assert_se(r == 0);
+ condition_free(condition);
+
+ assert_se(asprintf(&uid, "%"PRIu32, UINT32_C(0xFFFF)) > 0);
+ condition = condition_new(CONDITION_USER, uid, false, false);
+ assert_se(condition);
+ r = condition_test(condition, environ);
+ log_info("ConditionUser=%s → %i", uid, r);
+ assert_se(r == 0);
+ condition_free(condition);
+ free(uid);
+
+ assert_se(asprintf(&uid, "%u", (unsigned)getuid()) > 0);
+ condition = condition_new(CONDITION_USER, uid, false, false);
+ assert_se(condition);
+ r = condition_test(condition, environ);
+ log_info("ConditionUser=%s → %i", uid, r);
+ assert_se(r > 0);
+ condition_free(condition);
+ free(uid);
+
+ assert_se(asprintf(&uid, "%u", (unsigned)getuid()+1) > 0);
+ condition = condition_new(CONDITION_USER, uid, false, false);
+ assert_se(condition);
+ r = condition_test(condition, environ);
+ log_info("ConditionUser=%s → %i", uid, r);
+ assert_se(r == 0);
+ condition_free(condition);
+ free(uid);
+
+ username = getusername_malloc();
+ assert_se(username);
+ condition = condition_new(CONDITION_USER, username, false, false);
+ assert_se(condition);
+ r = condition_test(condition, environ);
+ log_info("ConditionUser=%s → %i", username, r);
+ assert_se(r > 0);
+ condition_free(condition);
+ free(username);
+
+ username = (char*)(geteuid() == 0 ? NOBODY_USER_NAME : "root");
+ condition = condition_new(CONDITION_USER, username, false, false);
+ assert_se(condition);
+ r = condition_test(condition, environ);
+ log_info("ConditionUser=%s → %i", username, r);
+ assert_se(r == 0);
+ condition_free(condition);
+
+ condition = condition_new(CONDITION_USER, "@system", false, false);
+ assert_se(condition);
+ r = condition_test(condition, environ);
+ log_info("ConditionUser=@system → %i", r);
+ if (uid_is_system(getuid()) || uid_is_system(geteuid()))
+ assert_se(r > 0);
+ else
+ assert_se(r == 0);
+ condition_free(condition);
+}
+
+static void test_condition_test_group(void) {
+ Condition *condition;
+ char* gid;
+ char* groupname;
+ gid_t *gids, max_gid;
+ int ngroups_max, ngroups, r, i;
+
+ assert_se(0 < asprintf(&gid, "%u", UINT32_C(0xFFFF)));
+ condition = condition_new(CONDITION_GROUP, gid, false, false);
+ assert_se(condition);
+ r = condition_test(condition, environ);
+ log_info("ConditionGroup=%s → %i", gid, r);
+ assert_se(r == 0);
+ condition_free(condition);
+ free(gid);
+
+ assert_se(0 < asprintf(&gid, "%u", getgid()));
+ condition = condition_new(CONDITION_GROUP, gid, false, false);
+ assert_se(condition);
+ r = condition_test(condition, environ);
+ log_info("ConditionGroup=%s → %i", gid, r);
+ assert_se(r > 0);
+ condition_free(condition);
+ free(gid);
+
+ ngroups_max = sysconf(_SC_NGROUPS_MAX);
+ assert(ngroups_max > 0);
+
+ gids = newa(gid_t, ngroups_max);
+
+ ngroups = getgroups(ngroups_max, gids);
+ assert(ngroups >= 0);
+
+ max_gid = getgid();
+ for (i = 0; i < ngroups; i++) {
+ assert_se(0 < asprintf(&gid, "%u", gids[i]));
+ condition = condition_new(CONDITION_GROUP, gid, false, false);
+ assert_se(condition);
+ r = condition_test(condition, environ);
+ log_info("ConditionGroup=%s → %i", gid, r);
+ assert_se(r > 0);
+ condition_free(condition);
+ free(gid);
+ max_gid = gids[i] > max_gid ? gids[i] : max_gid;
+
+ groupname = gid_to_name(gids[i]);
+ assert_se(groupname);
+ condition = condition_new(CONDITION_GROUP, groupname, false, false);
+ assert_se(condition);
+ r = condition_test(condition, environ);
+ log_info("ConditionGroup=%s → %i", groupname, r);
+ assert_se(r > 0);
+ condition_free(condition);
+ free(groupname);
+ max_gid = gids[i] > max_gid ? gids[i] : max_gid;
+ }
+
+ assert_se(0 < asprintf(&gid, "%u", max_gid+1));
+ condition = condition_new(CONDITION_GROUP, gid, false, false);
+ assert_se(condition);
+ r = condition_test(condition, environ);
+ log_info("ConditionGroup=%s → %i", gid, r);
+ assert_se(r == 0);
+ condition_free(condition);
+ free(gid);
+
+ groupname = (char*)(getegid() == 0 ? NOBODY_GROUP_NAME : "root");
+ condition = condition_new(CONDITION_GROUP, groupname, false, false);
+ assert_se(condition);
+ r = condition_test(condition, environ);
+ log_info("ConditionGroup=%s → %i", groupname, r);
+ assert_se(r == 0);
+ condition_free(condition);
+}
+
+static void test_condition_test_cpus_one(const char *s, bool result) {
+ Condition *condition;
+ int r;
+
+ log_debug("%s=%s", condition_type_to_string(CONDITION_CPUS), s);
+
+ condition = condition_new(CONDITION_CPUS, s, false, false);
+ assert_se(condition);
+
+ r = condition_test(condition, environ);
+ assert_se(r >= 0);
+ assert_se(r == result);
+ condition_free(condition);
+}
+
+static void test_condition_test_cpus(void) {
+ _cleanup_free_ char *t = NULL;
+ int cpus;
+
+ cpus = cpus_in_affinity_mask();
+ assert_se(cpus >= 0);
+
+ test_condition_test_cpus_one("> 0", true);
+ test_condition_test_cpus_one(">= 0", true);
+ test_condition_test_cpus_one("!= 0", true);
+ test_condition_test_cpus_one("<= 0", false);
+ test_condition_test_cpus_one("< 0", false);
+ test_condition_test_cpus_one("= 0", false);
+
+ test_condition_test_cpus_one("> 100000", false);
+ test_condition_test_cpus_one("= 100000", false);
+ test_condition_test_cpus_one(">= 100000", false);
+ test_condition_test_cpus_one("< 100000", true);
+ test_condition_test_cpus_one("!= 100000", true);
+ test_condition_test_cpus_one("<= 100000", true);
+
+ assert_se(asprintf(&t, "= %i", cpus) >= 0);
+ test_condition_test_cpus_one(t, true);
+ t = mfree(t);
+
+ assert_se(asprintf(&t, "<= %i", cpus) >= 0);
+ test_condition_test_cpus_one(t, true);
+ t = mfree(t);
+
+ assert_se(asprintf(&t, ">= %i", cpus) >= 0);
+ test_condition_test_cpus_one(t, true);
+ t = mfree(t);
+
+ assert_se(asprintf(&t, "!= %i", cpus) >= 0);
+ test_condition_test_cpus_one(t, false);
+ t = mfree(t);
+
+ assert_se(asprintf(&t, "< %i", cpus) >= 0);
+ test_condition_test_cpus_one(t, false);
+ t = mfree(t);
+
+ assert_se(asprintf(&t, "> %i", cpus) >= 0);
+ test_condition_test_cpus_one(t, false);
+ t = mfree(t);
+}
+
+static void test_condition_test_memory_one(const char *s, bool result) {
+ Condition *condition;
+ int r;
+
+ log_debug("%s=%s", condition_type_to_string(CONDITION_MEMORY), s);
+
+ condition = condition_new(CONDITION_MEMORY, s, false, false);
+ assert_se(condition);
+
+ r = condition_test(condition, environ);
+ assert_se(r >= 0);
+ assert_se(r == result);
+ condition_free(condition);
+}
+
+static void test_condition_test_memory(void) {
+ _cleanup_free_ char *t = NULL;
+ uint64_t memory;
+
+ memory = physical_memory();
+
+ test_condition_test_memory_one("> 0", true);
+ test_condition_test_memory_one(">= 0", true);
+ test_condition_test_memory_one("!= 0", true);
+ test_condition_test_memory_one("<= 0", false);
+ test_condition_test_memory_one("< 0", false);
+ test_condition_test_memory_one("= 0", false);
+
+ test_condition_test_memory_one("> 18446744073709547520", false);
+ test_condition_test_memory_one("= 18446744073709547520", false);
+ test_condition_test_memory_one(">= 18446744073709547520", false);
+ test_condition_test_memory_one("< 18446744073709547520", true);
+ test_condition_test_memory_one("!= 18446744073709547520", true);
+ test_condition_test_memory_one("<= 18446744073709547520", true);
+
+ assert_se(asprintf(&t, "= %" PRIu64, memory) >= 0);
+ test_condition_test_memory_one(t, true);
+ t = mfree(t);
+
+ assert_se(asprintf(&t, "<= %" PRIu64, memory) >= 0);
+ test_condition_test_memory_one(t, true);
+ t = mfree(t);
+
+ assert_se(asprintf(&t, ">= %" PRIu64, memory) >= 0);
+ test_condition_test_memory_one(t, true);
+ t = mfree(t);
+
+ assert_se(asprintf(&t, "!= %" PRIu64, memory) >= 0);
+ test_condition_test_memory_one(t, false);
+ t = mfree(t);
+
+ assert_se(asprintf(&t, "< %" PRIu64, memory) >= 0);
+ test_condition_test_memory_one(t, false);
+ t = mfree(t);
+
+ assert_se(asprintf(&t, "> %" PRIu64, memory) >= 0);
+ test_condition_test_memory_one(t, false);
+ t = mfree(t);
+}
+
+static void test_condition_test_environment_one(const char *s, bool result) {
+ Condition *condition;
+ int r;
+
+ log_debug("%s=%s", condition_type_to_string(CONDITION_ENVIRONMENT), s);
+
+ condition = condition_new(CONDITION_ENVIRONMENT, s, false, false);
+ assert_se(condition);
+
+ r = condition_test(condition, environ);
+ assert_se(r >= 0);
+ assert_se(r == result);
+ condition_free(condition);
+}
+
+static void test_condition_test_environment(void) {
+ assert_se(setenv("EXISTINGENVVAR", "foo", false) >= 0);
+
+ test_condition_test_environment_one("MISSINGENVVAR", false);
+ test_condition_test_environment_one("MISSINGENVVAR=foo", false);
+ test_condition_test_environment_one("MISSINGENVVAR=", false);
+
+ test_condition_test_environment_one("EXISTINGENVVAR", true);
+ test_condition_test_environment_one("EXISTINGENVVAR=foo", true);
+ test_condition_test_environment_one("EXISTINGENVVAR=bar", false);
+ test_condition_test_environment_one("EXISTINGENVVAR=", false);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_condition_test_path();
+ test_condition_test_ac_power();
+ test_condition_test_host();
+ test_condition_test_architecture();
+ test_condition_test_kernel_command_line();
+ test_condition_test_kernel_version();
+ test_condition_test_security();
+ print_securities();
+ test_condition_test_virtualization();
+ test_condition_test_user();
+ test_condition_test_group();
+ test_condition_test_control_group_controller();
+ test_condition_test_cpus();
+ test_condition_test_memory();
+ test_condition_test_environment();
+
+ return 0;
+}
diff --git a/src/test/test-conf-files.c b/src/test/test-conf-files.c
new file mode 100644
index 0000000..ee7bbd1
--- /dev/null
+++ b/src/test/test-conf-files.c
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2014 Michael Marineau
+***/
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "user-util.h"
+#include "util.h"
+
+static void setup_test_dir(char *tmp_dir, const char *files, ...) {
+ va_list ap;
+
+ assert_se(mkdtemp(tmp_dir));
+
+ va_start(ap, files);
+ while (files) {
+ _cleanup_free_ char *path;
+
+ assert_se(path = path_join(tmp_dir, files));
+ (void) mkdir_parents(path, 0755);
+ assert_se(write_string_file(path, "foobar", WRITE_STRING_FILE_CREATE) >= 0);
+
+ files = va_arg(ap, const char *);
+ }
+ va_end(ap);
+}
+
+static void test_conf_files_list(bool use_root) {
+ char tmp_dir[] = "/tmp/test-conf-files-XXXXXX";
+ _cleanup_strv_free_ char **found_files = NULL, **found_files2 = NULL;
+ const char *root_dir, *search, *expect_a, *expect_b, *expect_c, *mask;
+
+ log_info("/* %s(%s) */", __func__, yes_no(use_root));
+
+ setup_test_dir(tmp_dir,
+ "/dir/a.conf",
+ "/dir/b.conf",
+ "/dir/c.foo",
+ NULL);
+
+ mask = strjoina(tmp_dir, "/dir/d.conf");
+ assert_se(symlink("/dev/null", mask) >= 0);
+
+ if (use_root) {
+ root_dir = tmp_dir;
+ search = "/dir";
+ } else {
+ root_dir = NULL;
+ search = strjoina(tmp_dir, "/dir");
+ }
+
+ expect_a = strjoina(tmp_dir, "/dir/a.conf");
+ expect_b = strjoina(tmp_dir, "/dir/b.conf");
+ expect_c = strjoina(tmp_dir, "/dir/c.foo");
+
+ log_debug("/* Check when filtered by suffix */");
+
+ assert_se(conf_files_list(&found_files, ".conf", root_dir, CONF_FILES_FILTER_MASKED, search) == 0);
+ strv_print(found_files);
+
+ assert_se(found_files);
+ assert_se(streq_ptr(found_files[0], expect_a));
+ assert_se(streq_ptr(found_files[1], expect_b));
+ assert_se(!found_files[2]);
+
+ log_debug("/* Check when unfiltered */");
+ assert_se(conf_files_list(&found_files2, NULL, root_dir, CONF_FILES_FILTER_MASKED, search) == 0);
+ strv_print(found_files2);
+
+ assert_se(found_files2);
+ assert_se(streq_ptr(found_files2[0], expect_a));
+ assert_se(streq_ptr(found_files2[1], expect_b));
+ assert_se(streq_ptr(found_files2[2], expect_c));
+ assert_se(!found_files2[3]);
+
+ assert_se(rm_rf(tmp_dir, REMOVE_ROOT|REMOVE_PHYSICAL) == 0);
+}
+
+static void test_conf_files_insert(const char *root) {
+ _cleanup_strv_free_ char **s = NULL;
+
+ log_info("/* %s root=%s */", __func__, strempty(root));
+
+ char **dirs = STRV_MAKE("/dir1", "/dir2", "/dir3");
+
+ _cleanup_free_ const char
+ *foo1 = path_join(root, "/dir1/foo.conf"),
+ *foo2 = path_join(root, "/dir2/foo.conf"),
+ *bar2 = path_join(root, "/dir2/bar.conf"),
+ *zzz3 = path_join(root, "/dir3/zzz.conf"),
+ *whatever = path_join(root, "/whatever.conf");
+
+ assert_se(conf_files_insert(&s, root, dirs, "/dir2/foo.conf") == 0);
+ assert_se(strv_equal(s, STRV_MAKE(foo2)));
+
+ /* The same file again, https://github.com/systemd/systemd/issues/11124 */
+ assert_se(conf_files_insert(&s, root, dirs, "/dir2/foo.conf") == 0);
+ assert_se(strv_equal(s, STRV_MAKE(foo2)));
+
+ /* Lower priority → new entry is ignored */
+ assert_se(conf_files_insert(&s, root, dirs, "/dir3/foo.conf") == 0);
+ assert_se(strv_equal(s, STRV_MAKE(foo2)));
+
+ /* Higher priority → new entry replaces */
+ assert_se(conf_files_insert(&s, root, dirs, "/dir1/foo.conf") == 0);
+ assert_se(strv_equal(s, STRV_MAKE(foo1)));
+
+ /* Earlier basename */
+ assert_se(conf_files_insert(&s, root, dirs, "/dir2/bar.conf") == 0);
+ assert_se(strv_equal(s, STRV_MAKE(bar2, foo1)));
+
+ /* Later basename */
+ assert_se(conf_files_insert(&s, root, dirs, "/dir3/zzz.conf") == 0);
+ assert_se(strv_equal(s, STRV_MAKE(bar2, foo1, zzz3)));
+
+ /* All lower priority → all ignored */
+ assert_se(conf_files_insert(&s, root, dirs, "/dir3/zzz.conf") == 0);
+ assert_se(conf_files_insert(&s, root, dirs, "/dir2/bar.conf") == 0);
+ assert_se(conf_files_insert(&s, root, dirs, "/dir3/bar.conf") == 0);
+ assert_se(conf_files_insert(&s, root, dirs, "/dir2/foo.conf") == 0);
+ assert_se(strv_equal(s, STRV_MAKE(bar2, foo1, zzz3)));
+
+ /* Two entries that don't match any of the directories, but match basename */
+ assert_se(conf_files_insert(&s, root, dirs, "/dir4/zzz.conf") == 0);
+ assert_se(conf_files_insert(&s, root, dirs, "/zzz.conf") == 0);
+ assert_se(strv_equal(s, STRV_MAKE(bar2, foo1, zzz3)));
+
+ /* An entry that doesn't match any of the directories, no match at all */
+ assert_se(conf_files_insert(&s, root, dirs, "/whatever.conf") == 0);
+ assert_se(strv_equal(s, STRV_MAKE(bar2, foo1, whatever, zzz3)));
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_conf_files_list(false);
+ test_conf_files_list(true);
+ test_conf_files_insert(NULL);
+ test_conf_files_insert("/root");
+ test_conf_files_insert("/root/");
+
+ return 0;
+}
diff --git a/src/test/test-conf-parser.c b/src/test/test-conf-parser.c
new file mode 100644
index 0000000..04b610c
--- /dev/null
+++ b/src/test/test-conf-parser.c
@@ -0,0 +1,411 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "conf-parser.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+static void test_config_parse_path_one(const char *rvalue, const char *expected) {
+ _cleanup_free_ char *path = NULL;
+
+ assert_se(config_parse_path("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &path, NULL) >= 0);
+ assert_se(streq_ptr(expected, path));
+}
+
+static void test_config_parse_log_level_one(const char *rvalue, int expected) {
+ int log_level = 0;
+
+ assert_se(config_parse_log_level("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &log_level, NULL) >= 0);
+ assert_se(expected == log_level);
+}
+
+static void test_config_parse_log_facility_one(const char *rvalue, int expected) {
+ int log_facility = 0;
+
+ assert_se(config_parse_log_facility("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &log_facility, NULL) >= 0);
+ assert_se(expected == log_facility);
+}
+
+static void test_config_parse_iec_size_one(const char *rvalue, size_t expected) {
+ size_t iec_size = 0;
+
+ assert_se(config_parse_iec_size("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &iec_size, NULL) >= 0);
+ assert_se(expected == iec_size);
+}
+
+static void test_config_parse_si_uint64_one(const char *rvalue, uint64_t expected) {
+ uint64_t si_uint64 = 0;
+
+ assert_se(config_parse_si_uint64("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &si_uint64, NULL) >= 0);
+ assert_se(expected == si_uint64);
+}
+
+static void test_config_parse_int_one(const char *rvalue, int expected) {
+ int v = -1;
+
+ assert_se(config_parse_int("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &v, NULL) >= 0);
+ assert_se(expected == v);
+}
+
+static void test_config_parse_unsigned_one(const char *rvalue, unsigned expected) {
+ unsigned v = 0;
+
+ assert_se(config_parse_unsigned("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &v, NULL) >= 0);
+ assert_se(expected == v);
+}
+
+static void test_config_parse_strv_one(const char *rvalue, char **expected) {
+ _cleanup_strv_free_ char **strv = NULL;
+
+ assert_se(config_parse_strv("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &strv, NULL) >= 0);
+ assert_se(strv_equal(expected, strv));
+}
+
+static void test_config_parse_mode_one(const char *rvalue, mode_t expected) {
+ mode_t v = 0;
+
+ assert_se(config_parse_mode("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &v, NULL) >= 0);
+ assert_se(expected == v);
+}
+
+static void test_config_parse_sec_one(const char *rvalue, usec_t expected) {
+ usec_t v = 0;
+
+ assert_se(config_parse_sec("unit", "filename", 1, "section", 1, "lvalue", 0, rvalue, &v, NULL) >= 0);
+ assert_se(expected == v);
+}
+
+static void test_config_parse_nsec_one(const char *rvalue, nsec_t expected) {
+ nsec_t v = 0;
+
+ assert_se(config_parse_nsec("unit", "filename", 1, "nsection", 1, "lvalue", 0, rvalue, &v, NULL) >= 0);
+ assert_se(expected == v);
+}
+
+static void test_config_parse_path(void) {
+ test_config_parse_path_one("/path", "/path");
+ test_config_parse_path_one("/path//////////", "/path");
+ test_config_parse_path_one("///path/foo///bar////bar//", "/path/foo/bar/bar");
+ test_config_parse_path_one("/path//./////hogehoge///.", "/path/hogehoge");
+ test_config_parse_path_one("/path/\xc3\x80", "/path/\xc3\x80");
+
+ test_config_parse_path_one("not_absolute/path", NULL);
+ test_config_parse_path_one("/path/\xc3\x7f", NULL);
+}
+
+static void test_config_parse_log_level(void) {
+ test_config_parse_log_level_one("debug", LOG_DEBUG);
+ test_config_parse_log_level_one("info", LOG_INFO);
+
+ test_config_parse_log_level_one("garbage", 0);
+}
+
+static void test_config_parse_log_facility(void) {
+ test_config_parse_log_facility_one("mail", LOG_MAIL);
+ test_config_parse_log_facility_one("user", LOG_USER);
+
+ test_config_parse_log_facility_one("garbage", 0);
+}
+
+static void test_config_parse_iec_size(void) {
+ test_config_parse_iec_size_one("1024", 1024);
+ test_config_parse_iec_size_one("2K", 2048);
+ test_config_parse_iec_size_one("10M", 10 * 1024 * 1024);
+ test_config_parse_iec_size_one("1G", 1 * 1024 * 1024 * 1024);
+ test_config_parse_iec_size_one("0G", 0);
+ test_config_parse_iec_size_one("0", 0);
+
+ test_config_parse_iec_size_one("-982", 0);
+ test_config_parse_iec_size_one("49874444198739873000000G", 0);
+ test_config_parse_iec_size_one("garbage", 0);
+}
+
+static void test_config_parse_si_uint64(void) {
+ test_config_parse_si_uint64_one("1024", 1024);
+ test_config_parse_si_uint64_one("2K", 2000);
+ test_config_parse_si_uint64_one("10M", 10 * 1000 * 1000);
+ test_config_parse_si_uint64_one("1G", 1 * 1000 * 1000 * 1000);
+ test_config_parse_si_uint64_one("0G", 0);
+ test_config_parse_si_uint64_one("0", 0);
+
+ test_config_parse_si_uint64_one("-982", 0);
+ test_config_parse_si_uint64_one("49874444198739873000000G", 0);
+ test_config_parse_si_uint64_one("garbage", 0);
+}
+
+static void test_config_parse_int(void) {
+ test_config_parse_int_one("1024", 1024);
+ test_config_parse_int_one("-1024", -1024);
+ test_config_parse_int_one("0", 0);
+
+ test_config_parse_int_one("99999999999999999999999999999999999999999999999999999999", -1);
+ test_config_parse_int_one("-99999999999999999999999999999999999999999999999999999999", -1);
+ test_config_parse_int_one("1G", -1);
+ test_config_parse_int_one("garbage", -1);
+}
+
+static void test_config_parse_unsigned(void) {
+ test_config_parse_unsigned_one("10241024", 10241024);
+ test_config_parse_unsigned_one("1024", 1024);
+ test_config_parse_unsigned_one("0", 0);
+
+ test_config_parse_unsigned_one("99999999999999999999999999999999999999999999999999999999", 0);
+ test_config_parse_unsigned_one("1G", 0);
+ test_config_parse_unsigned_one("garbage", 0);
+ test_config_parse_unsigned_one("1000garbage", 0);
+}
+
+static void test_config_parse_strv(void) {
+ test_config_parse_strv_one("", STRV_MAKE_EMPTY);
+ test_config_parse_strv_one("foo", STRV_MAKE("foo"));
+ test_config_parse_strv_one("foo bar foo", STRV_MAKE("foo", "bar", "foo"));
+ test_config_parse_strv_one("\"foo bar\" foo", STRV_MAKE("foo bar", "foo"));
+ test_config_parse_strv_one("\xc3\x80", STRV_MAKE("\xc3\x80"));
+ test_config_parse_strv_one("\xc3\x7f", STRV_MAKE("\xc3\x7f"));
+}
+
+static void test_config_parse_mode(void) {
+ test_config_parse_mode_one("777", 0777);
+ test_config_parse_mode_one("644", 0644);
+
+ test_config_parse_mode_one("-777", 0);
+ test_config_parse_mode_one("999", 0);
+ test_config_parse_mode_one("garbage", 0);
+ test_config_parse_mode_one("777garbage", 0);
+ test_config_parse_mode_one("777 garbage", 0);
+}
+
+static void test_config_parse_sec(void) {
+ test_config_parse_sec_one("1", 1 * USEC_PER_SEC);
+ test_config_parse_sec_one("1s", 1 * USEC_PER_SEC);
+ test_config_parse_sec_one("100ms", 100 * USEC_PER_MSEC);
+ test_config_parse_sec_one("5min 20s", 5 * 60 * USEC_PER_SEC + 20 * USEC_PER_SEC);
+
+ test_config_parse_sec_one("-1", 0);
+ test_config_parse_sec_one("10foo", 0);
+ test_config_parse_sec_one("garbage", 0);
+}
+
+static void test_config_parse_nsec(void) {
+ test_config_parse_nsec_one("1", 1);
+ test_config_parse_nsec_one("1s", 1 * NSEC_PER_SEC);
+ test_config_parse_nsec_one("100ms", 100 * NSEC_PER_MSEC);
+ test_config_parse_nsec_one("5min 20s", 5 * 60 * NSEC_PER_SEC + 20 * NSEC_PER_SEC);
+
+ test_config_parse_nsec_one("-1", 0);
+ test_config_parse_nsec_one("10foo", 0);
+ test_config_parse_nsec_one("garbage", 0);
+}
+
+static void test_config_parse_iec_uint64(void) {
+ uint64_t offset = 0;
+ assert_se(config_parse_iec_uint64(NULL, "/this/file", 11, "Section", 22, "Size", 0, "4M", &offset, NULL) == 0);
+ assert_se(offset == 4 * 1024 * 1024);
+
+ assert_se(config_parse_iec_uint64(NULL, "/this/file", 11, "Section", 22, "Size", 0, "4.5M", &offset, NULL) == 0);
+}
+
+#define x10(x) x x x x x x x x x x
+#define x100(x) x10(x10(x))
+#define x1000(x) x10(x100(x))
+
+static const char* const config_file[] = {
+ "[Section]\n"
+ "setting1=1\n",
+
+ "[Section]\n"
+ "setting1=1", /* no terminating newline */
+
+ "\n\n\n\n[Section]\n\n\n"
+ "setting1=1", /* some whitespace, no terminating newline */
+
+ "[Section]\n"
+ "[Section]\n"
+ "setting1=1\n"
+ "setting1= 2 \t\n"
+ "setting1= 1\n", /* repeated settings */
+
+ "[Section]\n"
+ "[Section]\n"
+ "setting1=1\n"
+ "setting1=2\\\n"
+ " \n" /* empty line breaks continuation */
+ "setting1=1\n", /* repeated settings */
+
+ "[Section]\n"
+ "setting1=1\\\n" /* normal continuation */
+ "2\\\n"
+ "3\n",
+
+ "[Section]\n"
+ "#hogehoge\\\n" /* continuation is ignored in comment */
+ "setting1=1\\\n" /* normal continuation */
+ "2\\\n"
+ "3\n",
+
+ "[Section]\n"
+ "setting1=1\\\n" /* normal continuation */
+ "#hogehoge\\\n" /* commented out line in continuation is ignored */
+ "2\\\n"
+ "3\n",
+
+ "[Section]\n"
+ " #hogehoge\\\n" /* whitespaces before comments */
+ " setting1=1\\\n" /* whitespaces before key */
+ "2\\\n"
+ "3\n",
+
+ "[Section]\n"
+ " setting1=1\\\n" /* whitespaces before key */
+ " #hogehoge\\\n" /* commented out line prefixed with whitespaces in continuation */
+ "2\\\n"
+ "3\n",
+
+ "[Section]\n"
+ "setting1=1\\\n" /* continuation with extra trailing backslash at the end */
+ "2\\\n"
+ "3\\\n",
+
+ "[Section]\n"
+ "setting1=1\\\\\\\n" /* continuation with trailing escape symbols */
+ "\\\\2\n", /* note that C requires one level of escaping, so the
+ * parser gets "…1 BS BS BS NL BS BS 2 NL", which
+ * it translates into "…1 BS BS SP BS BS 2" */
+
+ "\n[Section]\n\n"
+ "setting1=" /* a line above LINE_MAX length */
+ x1000("ABCD")
+ "\n",
+
+ "[Section]\n"
+ "setting1=" /* a line above LINE_MAX length, with continuation */
+ x1000("ABCD") "\\\n"
+ "foobar",
+
+ "[Section]\n"
+ "setting1=" /* a line above LINE_MAX length, with continuation */
+ x1000("ABCD") "\\\n" /* and an extra trailing backslash */
+ "foobar\\\n",
+
+ "[Section]\n"
+ "setting1=" /* a line above the allowed limit: 9 + 1050000 + 1 */
+ x1000(x1000("x") x10("abcde")) "\n",
+
+ "[Section]\n"
+ "setting1=" /* many continuation lines, together above the limit */
+ x1000(x1000("x") x10("abcde") "\\\n") "xxx",
+
+ "[Section]\n"
+ "setting1=2\n"
+ "[NoWarnSection]\n"
+ "setting1=3\n"
+ "[WarnSection]\n"
+ "setting1=3\n"
+ "[X-Section]\n"
+ "setting1=3\n",
+};
+
+static void test_config_parse(unsigned i, const char *s) {
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-conf-parser.XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *setting1 = NULL;
+ int r;
+
+ const ConfigTableItem items[] = {
+ { "Section", "setting1", config_parse_string, 0, &setting1},
+ {}
+ };
+
+ log_info("== %s[%i] ==", __func__, i);
+
+ assert_se(fmkostemp_safe(name, "r+", &f) == 0);
+ assert_se(fwrite(s, strlen(s), 1, f) == 1);
+ rewind(f);
+
+ /*
+ int config_parse(const char *unit,
+ const char *filename,
+ FILE *f,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata,
+ usec_t *ret_mtime)
+ */
+
+ r = config_parse(NULL, name, f,
+ "Section\0"
+ "-NoWarnSection\0",
+ config_item_table_lookup, items,
+ CONFIG_PARSE_WARN,
+ NULL,
+ NULL);
+
+ switch (i) {
+ case 0 ... 4:
+ assert_se(r == 0);
+ assert_se(streq(setting1, "1"));
+ break;
+
+ case 5 ... 10:
+ assert_se(r == 0);
+ assert_se(streq(setting1, "1 2 3"));
+ break;
+
+ case 11:
+ assert_se(r == 0);
+ assert_se(streq(setting1, "1\\\\ \\\\2"));
+ break;
+
+ case 12:
+ assert_se(r == 0);
+ assert_se(streq(setting1, x1000("ABCD")));
+ break;
+
+ case 13 ... 14:
+ assert_se(r == 0);
+ assert_se(streq(setting1, x1000("ABCD") " foobar"));
+ break;
+
+ case 15 ... 16:
+ assert_se(r == -ENOBUFS);
+ assert_se(setting1 == NULL);
+ break;
+
+ case 17:
+ assert_se(r == 0);
+ assert_se(streq(setting1, "2"));
+ break;
+ }
+}
+
+int main(int argc, char **argv) {
+ unsigned i;
+
+ log_parse_environment();
+ log_open();
+
+ test_config_parse_path();
+ test_config_parse_log_level();
+ test_config_parse_log_facility();
+ test_config_parse_iec_size();
+ test_config_parse_si_uint64();
+ test_config_parse_int();
+ test_config_parse_unsigned();
+ test_config_parse_strv();
+ test_config_parse_mode();
+ test_config_parse_sec();
+ test_config_parse_nsec();
+ test_config_parse_iec_uint64();
+
+ for (i = 0; i < ELEMENTSOF(config_file); i++)
+ test_config_parse(i, config_file[i]);
+
+ return 0;
+}
diff --git a/src/test/test-copy.c b/src/test/test-copy.c
new file mode 100644
index 0000000..ffa9297
--- /dev/null
+++ b/src/test/test-copy.c
@@ -0,0 +1,323 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "copy.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hexdecoct.h"
+#include "log.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+#include "xattr-util.h"
+
+static void test_copy_file(void) {
+ _cleanup_free_ char *buf = NULL;
+ char fn[] = "/tmp/test-copy_file.XXXXXX";
+ char fn_copy[] = "/tmp/test-copy_file.XXXXXX";
+ size_t sz = 0;
+ int fd;
+
+ log_info("%s", __func__);
+
+ fd = mkostemp_safe(fn);
+ assert_se(fd >= 0);
+ close(fd);
+
+ fd = mkostemp_safe(fn_copy);
+ assert_se(fd >= 0);
+ close(fd);
+
+ assert_se(write_string_file(fn, "foo bar bar bar foo", WRITE_STRING_FILE_CREATE) == 0);
+
+ assert_se(copy_file(fn, fn_copy, 0, 0644, 0, 0, COPY_REFLINK) == 0);
+
+ assert_se(read_full_file(fn_copy, &buf, &sz) == 0);
+ assert_se(streq(buf, "foo bar bar bar foo\n"));
+ assert_se(sz == 20);
+
+ unlink(fn);
+ unlink(fn_copy);
+}
+
+static void test_copy_file_fd(void) {
+ char in_fn[] = "/tmp/test-copy-file-fd-XXXXXX";
+ char out_fn[] = "/tmp/test-copy-file-fd-XXXXXX";
+ _cleanup_close_ int in_fd = -1, out_fd = -1;
+ const char *text = "boohoo\nfoo\n\tbar\n";
+ char buf[64] = {};
+
+ log_info("%s", __func__);
+
+ in_fd = mkostemp_safe(in_fn);
+ assert_se(in_fd >= 0);
+ out_fd = mkostemp_safe(out_fn);
+ assert_se(out_fd >= 0);
+
+ assert_se(write_string_file(in_fn, text, WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(copy_file_fd("/a/file/which/does/not/exist/i/guess", out_fd, COPY_REFLINK) < 0);
+ assert_se(copy_file_fd(in_fn, out_fd, COPY_REFLINK) >= 0);
+ assert_se(lseek(out_fd, SEEK_SET, 0) == 0);
+
+ assert_se(read(out_fd, buf, sizeof buf) == (ssize_t) strlen(text));
+ assert_se(streq(buf, text));
+
+ unlink(in_fn);
+ unlink(out_fn);
+}
+
+static void test_copy_tree(void) {
+ char original_dir[] = "/tmp/test-copy_tree/";
+ char copy_dir[] = "/tmp/test-copy_tree-copy/";
+ char **files = STRV_MAKE("file", "dir1/file", "dir1/dir2/file", "dir1/dir2/dir3/dir4/dir5/file");
+ char **symlinks = STRV_MAKE("link", "file",
+ "link2", "dir1/file");
+ char **hardlinks = STRV_MAKE("hlink", "file",
+ "hlink2", "dir1/file");
+ const char *unixsockp;
+ char **p, **ll;
+ struct stat st;
+ int xattr_worked = -1; /* xattr support is optional in temporary directories, hence use it if we can,
+ * but don't fail if we can't */
+
+ log_info("%s", __func__);
+
+ (void) rm_rf(copy_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf(original_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
+
+ STRV_FOREACH(p, files) {
+ _cleanup_free_ char *f, *c;
+ int k;
+
+ assert_se(f = path_join(original_dir, *p));
+
+ assert_se(mkdir_parents(f, 0755) >= 0);
+ assert_se(write_string_file(f, "file", WRITE_STRING_FILE_CREATE) == 0);
+
+ assert_se(base64mem(*p, strlen(*p), &c) >= 0);
+
+ k = setxattr(f, "user.testxattr", c, strlen(c), 0);
+ assert_se(xattr_worked < 0 || ((k >= 0) == !!xattr_worked));
+ xattr_worked = k >= 0;
+ }
+
+ STRV_FOREACH_PAIR(ll, p, symlinks) {
+ _cleanup_free_ char *f, *l;
+
+ assert_se(f = path_join(original_dir, *p));
+ assert_se(l = path_join(original_dir, *ll));
+
+ assert_se(mkdir_parents(l, 0755) >= 0);
+ assert_se(symlink(f, l) == 0);
+ }
+
+ STRV_FOREACH_PAIR(ll, p, hardlinks) {
+ _cleanup_free_ char *f, *l;
+
+ assert_se(f = path_join(original_dir, *p));
+ assert_se(l = path_join(original_dir, *ll));
+
+ assert_se(mkdir_parents(l, 0755) >= 0);
+ assert_se(link(f, l) == 0);
+ }
+
+ unixsockp = strjoina(original_dir, "unixsock");
+ assert_se(mknod(unixsockp, S_IFSOCK|0644, 0) >= 0);
+
+ assert_se(copy_tree(original_dir, copy_dir, UID_INVALID, GID_INVALID, COPY_REFLINK|COPY_MERGE|COPY_HARDLINKS) == 0);
+
+ STRV_FOREACH(p, files) {
+ _cleanup_free_ char *buf, *f, *c = NULL;
+ size_t sz;
+ int k;
+
+ assert_se(f = path_join(copy_dir, *p));
+
+ assert_se(access(f, F_OK) == 0);
+ assert_se(read_full_file(f, &buf, &sz) == 0);
+ assert_se(streq(buf, "file\n"));
+
+ k = getxattr_malloc(f, "user.testxattr", &c, false);
+ assert_se(xattr_worked < 0 || ((k >= 0) == !!xattr_worked));
+
+ if (k >= 0) {
+ _cleanup_free_ char *d = NULL;
+
+ assert_se(base64mem(*p, strlen(*p), &d) >= 0);
+ assert_se(streq(d, c));
+ }
+ }
+
+ STRV_FOREACH_PAIR(ll, p, symlinks) {
+ _cleanup_free_ char *target, *f, *l;
+
+ assert_se(f = strjoin(original_dir, *p));
+ assert_se(l = strjoin(copy_dir, *ll));
+
+ assert_se(chase_symlinks(l, NULL, 0, &target, NULL) == 1);
+ assert_se(path_equal(f, target));
+ }
+
+ STRV_FOREACH_PAIR(ll, p, hardlinks) {
+ _cleanup_free_ char *f, *l;
+ struct stat a, b;
+
+ assert_se(f = strjoin(copy_dir, *p));
+ assert_se(l = strjoin(copy_dir, *ll));
+
+ assert_se(lstat(f, &a) >= 0);
+ assert_se(lstat(l, &b) >= 0);
+
+ assert_se(a.st_ino == b.st_ino);
+ assert_se(a.st_dev == b.st_dev);
+ }
+
+ unixsockp = strjoina(copy_dir, "unixsock");
+ assert_se(stat(unixsockp, &st) >= 0);
+ assert_se(S_ISSOCK(st.st_mode));
+
+ assert_se(copy_tree(original_dir, copy_dir, UID_INVALID, GID_INVALID, COPY_REFLINK) < 0);
+ assert_se(copy_tree("/tmp/inexistent/foo/bar/fsdoi", copy_dir, UID_INVALID, GID_INVALID, COPY_REFLINK) < 0);
+
+ (void) rm_rf(copy_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf(original_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+static void test_copy_bytes(void) {
+ _cleanup_close_pair_ int pipefd[2] = {-1, -1};
+ _cleanup_close_ int infd = -1;
+ int r, r2;
+ char buf[1024], buf2[1024];
+
+ infd = open("/usr/lib/os-release", O_RDONLY|O_CLOEXEC);
+ if (infd < 0)
+ infd = open("/etc/os-release", O_RDONLY|O_CLOEXEC);
+ assert_se(infd >= 0);
+
+ assert_se(pipe2(pipefd, O_CLOEXEC) == 0);
+
+ r = copy_bytes(infd, pipefd[1], (uint64_t) -1, 0);
+ assert_se(r == 0);
+
+ r = read(pipefd[0], buf, sizeof(buf));
+ assert_se(r >= 0);
+
+ assert_se(lseek(infd, 0, SEEK_SET) == 0);
+ r2 = read(infd, buf2, sizeof(buf2));
+ assert_se(r == r2);
+
+ assert_se(strneq(buf, buf2, r));
+
+ /* test copy_bytes with invalid descriptors */
+ r = copy_bytes(pipefd[0], pipefd[0], 1, 0);
+ assert_se(r == -EBADF);
+
+ r = copy_bytes(pipefd[1], pipefd[1], 1, 0);
+ assert_se(r == -EBADF);
+
+ r = copy_bytes(pipefd[1], infd, 1, 0);
+ assert_se(r == -EBADF);
+}
+
+static void test_copy_bytes_regular_file(const char *src, bool try_reflink, uint64_t max_bytes) {
+ char fn2[] = "/tmp/test-copy-file-XXXXXX";
+ char fn3[] = "/tmp/test-copy-file-XXXXXX";
+ _cleanup_close_ int fd = -1, fd2 = -1, fd3 = -1;
+ int r;
+ struct stat buf, buf2, buf3;
+
+ log_info("%s try_reflink=%s max_bytes=%" PRIu64, __func__, yes_no(try_reflink), max_bytes);
+
+ fd = open(src, O_RDONLY | O_CLOEXEC | O_NOCTTY);
+ assert_se(fd >= 0);
+
+ fd2 = mkostemp_safe(fn2);
+ assert_se(fd2 >= 0);
+
+ fd3 = mkostemp_safe(fn3);
+ assert_se(fd3 >= 0);
+
+ r = copy_bytes(fd, fd2, max_bytes, try_reflink ? COPY_REFLINK : 0);
+ if (max_bytes == (uint64_t) -1)
+ assert_se(r == 0);
+ else
+ assert_se(IN_SET(r, 0, 1));
+
+ assert_se(fstat(fd, &buf) == 0);
+ assert_se(fstat(fd2, &buf2) == 0);
+ assert_se((uint64_t) buf2.st_size == MIN((uint64_t) buf.st_size, max_bytes));
+
+ if (max_bytes < (uint64_t) -1)
+ /* Make sure the file is now higher than max_bytes */
+ assert_se(ftruncate(fd2, max_bytes + 1) == 0);
+
+ assert_se(lseek(fd2, 0, SEEK_SET) == 0);
+
+ r = copy_bytes(fd2, fd3, max_bytes, try_reflink ? COPY_REFLINK : 0);
+ if (max_bytes == (uint64_t) -1)
+ assert_se(r == 0);
+ else
+ /* We cannot distinguish between the input being exactly max_bytes
+ * or longer than max_bytes (without trying to read one more byte,
+ * or calling stat, or FION_READ, etc, and we don't want to do any
+ * of that). So we expect "truncation" since we know that file we
+ * are copying is exactly max_bytes bytes. */
+ assert_se(r == 1);
+
+ assert_se(fstat(fd3, &buf3) == 0);
+
+ if (max_bytes == (uint64_t) -1)
+ assert_se(buf3.st_size == buf2.st_size);
+ else
+ assert_se((uint64_t) buf3.st_size == max_bytes);
+
+ unlink(fn2);
+ unlink(fn3);
+}
+
+static void test_copy_atomic(void) {
+ _cleanup_(rm_rf_physical_and_freep) char *p = NULL;
+ const char *q;
+ int r;
+
+ assert_se(mkdtemp_malloc(NULL, &p) >= 0);
+
+ q = strjoina(p, "/fstab");
+
+ r = copy_file_atomic("/etc/fstab", q, 0644, 0, 0, COPY_REFLINK);
+ if (r == -ENOENT || ERRNO_IS_PRIVILEGE(r))
+ return;
+
+ assert_se(copy_file_atomic("/etc/fstab", q, 0644, 0, 0, COPY_REFLINK) == -EEXIST);
+
+ assert_se(copy_file_atomic("/etc/fstab", q, 0644, 0, 0, COPY_REPLACE) >= 0);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_copy_file();
+ test_copy_file_fd();
+ test_copy_tree();
+ test_copy_bytes();
+ test_copy_bytes_regular_file(argv[0], false, (uint64_t) -1);
+ test_copy_bytes_regular_file(argv[0], true, (uint64_t) -1);
+ test_copy_bytes_regular_file(argv[0], false, 1000); /* smaller than copy buffer size */
+ test_copy_bytes_regular_file(argv[0], true, 1000);
+ test_copy_bytes_regular_file(argv[0], false, 32000); /* larger than copy buffer size */
+ test_copy_bytes_regular_file(argv[0], true, 32000);
+ test_copy_atomic();
+
+ return 0;
+}
diff --git a/src/test/test-coredump-util.c b/src/test/test-coredump-util.c
new file mode 100644
index 0000000..f9a44b2
--- /dev/null
+++ b/src/test/test-coredump-util.c
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "coredump-util.h"
+#include "macro.h"
+#include "tests.h"
+
+static void test_coredump_filter_to_from_string(void) {
+ log_info("/* %s */", __func__);
+
+ for (CoredumpFilter i = 0; i < _COREDUMP_FILTER_MAX; i++) {
+ const char *n;
+
+ assert_se(n = coredump_filter_to_string(i));
+ log_info("0x%x\t%s", 1<<i, n);
+ assert_se(coredump_filter_from_string(n) == i);
+
+ uint64_t f;
+ assert_se(coredump_filter_mask_from_string(n, &f) == 0);
+ assert_se(f == 1u << i);
+ }
+}
+
+static void test_coredump_filter_mask_from_string(void) {
+ log_info("/* %s */", __func__);
+
+ uint64_t f;
+ assert_se(coredump_filter_mask_from_string("default", &f) == 0);
+ assert_se(f == COREDUMP_FILTER_MASK_DEFAULT);
+
+ assert_se(coredump_filter_mask_from_string(" default\tdefault\tdefault ", &f) == 0);
+ assert_se(f == COREDUMP_FILTER_MASK_DEFAULT);
+
+ assert_se(coredump_filter_mask_from_string("defaulta", &f) < 0);
+ assert_se(coredump_filter_mask_from_string("default defaulta default", &f) < 0);
+ assert_se(coredump_filter_mask_from_string("default default defaulta", &f) < 0);
+
+ assert_se(coredump_filter_mask_from_string("private-anonymous default", &f) == 0);
+ assert_se(f == COREDUMP_FILTER_MASK_DEFAULT);
+
+ assert_se(coredump_filter_mask_from_string("shared-file-backed shared-dax", &f) == 0);
+ assert_se(f == (1 << COREDUMP_FILTER_SHARED_FILE_BACKED |
+ 1 << COREDUMP_FILTER_SHARED_DAX));
+
+ assert_se(coredump_filter_mask_from_string("private-file-backed private-dax 0xF", &f) == 0);
+ assert_se(f == (1 << COREDUMP_FILTER_PRIVATE_FILE_BACKED |
+ 1 << COREDUMP_FILTER_PRIVATE_DAX |
+ 0xF));
+
+ assert_se(coredump_filter_mask_from_string("11", &f) == 0);
+ assert_se(f == 0x11);
+
+ assert_se(coredump_filter_mask_from_string("0x1101", &f) == 0);
+ assert_se(f == 0x1101);
+
+ assert_se(coredump_filter_mask_from_string("0", &f) == 0);
+ assert_se(f == 0);
+
+ assert_se(coredump_filter_mask_from_string("all", &f) == 0);
+ assert_se(FLAGS_SET(f, (1 << COREDUMP_FILTER_PRIVATE_ANONYMOUS |
+ 1 << COREDUMP_FILTER_SHARED_ANONYMOUS |
+ 1 << COREDUMP_FILTER_PRIVATE_FILE_BACKED |
+ 1 << COREDUMP_FILTER_SHARED_FILE_BACKED |
+ 1 << COREDUMP_FILTER_ELF_HEADERS |
+ 1 << COREDUMP_FILTER_PRIVATE_HUGE |
+ 1 << COREDUMP_FILTER_SHARED_HUGE |
+ 1 << COREDUMP_FILTER_PRIVATE_DAX |
+ 1 << COREDUMP_FILTER_SHARED_DAX)));
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_INFO);
+
+ test_coredump_filter_to_from_string();
+ test_coredump_filter_mask_from_string();
+
+ return 0;
+}
diff --git a/src/test/test-cpu-set-util.c b/src/test/test-cpu-set-util.c
new file mode 100644
index 0000000..0cfc883
--- /dev/null
+++ b/src/test/test-cpu-set-util.c
@@ -0,0 +1,290 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "cpu-set-util.h"
+#include "string-util.h"
+#include "macro.h"
+
+static void test_parse_cpu_set(void) {
+ CPUSet c = {};
+ _cleanup_free_ char *str = NULL;
+ int cpu;
+
+ log_info("/* %s */", __func__);
+
+ /* Single value */
+ assert_se(parse_cpu_set_full("0", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0);
+ assert_se(c.set);
+ assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8));
+ assert_se(CPU_ISSET_S(0, c.allocated, c.set));
+ assert_se(CPU_COUNT_S(c.allocated, c.set) == 1);
+
+ assert_se(str = cpu_set_to_string(&c));
+ log_info("cpu_set_to_string: %s", str);
+ str = mfree(str);
+ assert_se(str = cpu_set_to_range_string(&c));
+ log_info("cpu_set_to_range_string: %s", str);
+ assert_se(streq(str, "0"));
+ str = mfree(str);
+ cpu_set_reset(&c);
+
+ /* Simple range (from CPUAffinity example) */
+ assert_se(parse_cpu_set_full("1 2 4", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0);
+ assert_se(c.set);
+ assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8));
+ assert_se(CPU_ISSET_S(1, c.allocated, c.set));
+ assert_se(CPU_ISSET_S(2, c.allocated, c.set));
+ assert_se(CPU_ISSET_S(4, c.allocated, c.set));
+ assert_se(CPU_COUNT_S(c.allocated, c.set) == 3);
+
+ assert_se(str = cpu_set_to_string(&c));
+ log_info("cpu_set_to_string: %s", str);
+ str = mfree(str);
+ assert_se(str = cpu_set_to_range_string(&c));
+ log_info("cpu_set_to_range_string: %s", str);
+ assert_se(streq(str, "1-2 4"));
+ str = mfree(str);
+ cpu_set_reset(&c);
+
+ /* A more interesting range */
+ assert_se(parse_cpu_set_full("0 1 2 3 8 9 10 11", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0);
+ assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8));
+ assert_se(CPU_COUNT_S(c.allocated, c.set) == 8);
+ for (cpu = 0; cpu < 4; cpu++)
+ assert_se(CPU_ISSET_S(cpu, c.allocated, c.set));
+ for (cpu = 8; cpu < 12; cpu++)
+ assert_se(CPU_ISSET_S(cpu, c.allocated, c.set));
+
+ assert_se(str = cpu_set_to_string(&c));
+ log_info("cpu_set_to_string: %s", str);
+ str = mfree(str);
+ assert_se(str = cpu_set_to_range_string(&c));
+ log_info("cpu_set_to_range_string: %s", str);
+ assert_se(streq(str, "0-3 8-11"));
+ str = mfree(str);
+ cpu_set_reset(&c);
+
+ /* Quoted strings */
+ assert_se(parse_cpu_set_full("8 '9' 10 \"11\"", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0);
+ assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8));
+ assert_se(CPU_COUNT_S(c.allocated, c.set) == 4);
+ for (cpu = 8; cpu < 12; cpu++)
+ assert_se(CPU_ISSET_S(cpu, c.allocated, c.set));
+ assert_se(str = cpu_set_to_string(&c));
+ log_info("cpu_set_to_string: %s", str);
+ str = mfree(str);
+ assert_se(str = cpu_set_to_range_string(&c));
+ log_info("cpu_set_to_range_string: %s", str);
+ assert_se(streq(str, "8-11"));
+ str = mfree(str);
+ cpu_set_reset(&c);
+
+ /* Use commas as separators */
+ assert_se(parse_cpu_set_full("0,1,2,3 8,9,10,11", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0);
+ assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8));
+ assert_se(CPU_COUNT_S(c.allocated, c.set) == 8);
+ for (cpu = 0; cpu < 4; cpu++)
+ assert_se(CPU_ISSET_S(cpu, c.allocated, c.set));
+ for (cpu = 8; cpu < 12; cpu++)
+ assert_se(CPU_ISSET_S(cpu, c.allocated, c.set));
+ assert_se(str = cpu_set_to_string(&c));
+ log_info("cpu_set_to_string: %s", str);
+ str = mfree(str);
+ cpu_set_reset(&c);
+
+ /* Commas with spaces (and trailing comma, space) */
+ assert_se(parse_cpu_set_full("0, 1, 2, 3, 4, 5, 6, 7, 63, ", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0);
+ assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8));
+ assert_se(CPU_COUNT_S(c.allocated, c.set) == 9);
+ for (cpu = 0; cpu < 8; cpu++)
+ assert_se(CPU_ISSET_S(cpu, c.allocated, c.set));
+
+ assert_se(CPU_ISSET_S(63, c.allocated, c.set));
+ assert_se(str = cpu_set_to_string(&c));
+ log_info("cpu_set_to_string: %s", str);
+ str = mfree(str);
+ assert_se(str = cpu_set_to_range_string(&c));
+ log_info("cpu_set_to_range_string: %s", str);
+ assert_se(streq(str, "0-7 63"));
+ str = mfree(str);
+ cpu_set_reset(&c);
+
+ /* Ranges */
+ assert_se(parse_cpu_set_full("0-3,8-11", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0);
+ assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8));
+ assert_se(CPU_COUNT_S(c.allocated, c.set) == 8);
+ for (cpu = 0; cpu < 4; cpu++)
+ assert_se(CPU_ISSET_S(cpu, c.allocated, c.set));
+ for (cpu = 8; cpu < 12; cpu++)
+ assert_se(CPU_ISSET_S(cpu, c.allocated, c.set));
+ assert_se(str = cpu_set_to_string(&c));
+ log_info("cpu_set_to_string: %s", str);
+ str = mfree(str);
+ cpu_set_reset(&c);
+
+ /* Ranges with trailing comma, space */
+ assert_se(parse_cpu_set_full("0-3 8-11, ", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0);
+ assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8));
+ assert_se(CPU_COUNT_S(c.allocated, c.set) == 8);
+ for (cpu = 0; cpu < 4; cpu++)
+ assert_se(CPU_ISSET_S(cpu, c.allocated, c.set));
+ for (cpu = 8; cpu < 12; cpu++)
+ assert_se(CPU_ISSET_S(cpu, c.allocated, c.set));
+ assert_se(str = cpu_set_to_string(&c));
+ log_info("cpu_set_to_string: %s", str);
+ str = mfree(str);
+ assert_se(str = cpu_set_to_range_string(&c));
+ log_info("cpu_set_to_range_string: %s", str);
+ assert_se(streq(str, "0-3 8-11"));
+ str = mfree(str);
+ cpu_set_reset(&c);
+
+ /* Negative range (returns empty cpu_set) */
+ assert_se(parse_cpu_set_full("3-0", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0);
+ assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8));
+ assert_se(CPU_COUNT_S(c.allocated, c.set) == 0);
+ cpu_set_reset(&c);
+
+ /* Overlapping ranges */
+ assert_se(parse_cpu_set_full("0-7 4-11", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0);
+ assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8));
+ assert_se(CPU_COUNT_S(c.allocated, c.set) == 12);
+ for (cpu = 0; cpu < 12; cpu++)
+ assert_se(CPU_ISSET_S(cpu, c.allocated, c.set));
+ assert_se(str = cpu_set_to_string(&c));
+ log_info("cpu_set_to_string: %s", str);
+ str = mfree(str);
+ assert_se(str = cpu_set_to_range_string(&c));
+ log_info("cpu_set_to_range_string: %s", str);
+ assert_se(streq(str, "0-11"));
+ str = mfree(str);
+ cpu_set_reset(&c);
+
+ /* Mix ranges and individual CPUs */
+ assert_se(parse_cpu_set_full("0,2 4-11", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0);
+ assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8));
+ assert_se(CPU_COUNT_S(c.allocated, c.set) == 10);
+ assert_se(CPU_ISSET_S(0, c.allocated, c.set));
+ assert_se(CPU_ISSET_S(2, c.allocated, c.set));
+ for (cpu = 4; cpu < 12; cpu++)
+ assert_se(CPU_ISSET_S(cpu, c.allocated, c.set));
+ assert_se(str = cpu_set_to_string(&c));
+ log_info("cpu_set_to_string: %s", str);
+ str = mfree(str);
+ assert_se(str = cpu_set_to_range_string(&c));
+ log_info("cpu_set_to_range_string: %s", str);
+ assert_se(streq(str, "0 2 4-11"));
+ str = mfree(str);
+ cpu_set_reset(&c);
+
+ /* Garbage */
+ assert_se(parse_cpu_set_full("0 1 2 3 garbage", &c, true, NULL, "fake", 1, "CPUAffinity") == -EINVAL);
+ assert_se(!c.set);
+ assert_se(c.allocated == 0);
+
+ /* Range with garbage */
+ assert_se(parse_cpu_set_full("0-3 8-garbage", &c, true, NULL, "fake", 1, "CPUAffinity") == -EINVAL);
+ assert_se(!c.set);
+ assert_se(c.allocated == 0);
+
+ /* Empty string */
+ assert_se(parse_cpu_set_full("", &c, true, NULL, "fake", 1, "CPUAffinity") == 0);
+ assert_se(!c.set); /* empty string returns NULL */
+ assert_se(c.allocated == 0);
+
+ /* Runaway quoted string */
+ assert_se(parse_cpu_set_full("0 1 2 3 \"4 5 6 7 ", &c, true, NULL, "fake", 1, "CPUAffinity") == -EINVAL);
+ assert_se(!c.set);
+ assert_se(c.allocated == 0);
+
+ /* Maximum allocation */
+ assert_se(parse_cpu_set_full("8000-8191", &c, true, NULL, "fake", 1, "CPUAffinity") == 0);
+ assert_se(CPU_COUNT_S(c.allocated, c.set) == 192);
+ assert_se(str = cpu_set_to_string(&c));
+ log_info("cpu_set_to_string: %s", str);
+ str = mfree(str);
+ assert_se(str = cpu_set_to_range_string(&c));
+ log_info("cpu_set_to_range_string: %s", str);
+ assert_se(streq(str, "8000-8191"));
+ str = mfree(str);
+ cpu_set_reset(&c);
+}
+
+static void test_parse_cpu_set_extend(void) {
+ CPUSet c = {};
+ _cleanup_free_ char *s1 = NULL, *s2 = NULL;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(parse_cpu_set_extend("1 3", &c, true, NULL, "fake", 1, "CPUAffinity") == 1);
+ assert_se(CPU_COUNT_S(c.allocated, c.set) == 2);
+ assert_se(s1 = cpu_set_to_string(&c));
+ log_info("cpu_set_to_string: %s", s1);
+
+ assert_se(parse_cpu_set_extend("4", &c, true, NULL, "fake", 1, "CPUAffinity") == 1);
+ assert_se(CPU_COUNT_S(c.allocated, c.set) == 3);
+ assert_se(s2 = cpu_set_to_string(&c));
+ log_info("cpu_set_to_string: %s", s2);
+
+ assert_se(parse_cpu_set_extend("", &c, true, NULL, "fake", 1, "CPUAffinity") == 0);
+ assert_se(!c.set);
+ assert_se(c.allocated == 0);
+ log_info("cpu_set_to_string: (null)");
+}
+
+static void test_cpu_set_to_from_dbus(void) {
+ _cleanup_(cpu_set_reset) CPUSet c = {}, c2 = {};
+ _cleanup_free_ char *s = NULL;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(parse_cpu_set_extend("1 3 8 100-200", &c, true, NULL, "fake", 1, "CPUAffinity") == 1);
+ assert_se(s = cpu_set_to_string(&c));
+ log_info("cpu_set_to_string: %s", s);
+ assert_se(CPU_COUNT_S(c.allocated, c.set) == 104);
+
+ _cleanup_free_ uint8_t *array = NULL;
+ size_t allocated;
+ static const char expected[32] =
+ "\x0A\x01\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\xF0\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
+ "\xFF\xFF\xFF\xFF\xFF\x01";
+
+ assert_se(cpu_set_to_dbus(&c, &array, &allocated) == 0);
+ assert_se(array);
+ assert_se(allocated == c.allocated);
+
+ assert_se(allocated <= sizeof expected);
+ assert_se(allocated >= DIV_ROUND_UP(201u, 8u)); /* We need at least 201 bits for our mask */
+ assert(memcmp(array, expected, allocated) == 0);
+
+ assert_se(cpu_set_from_dbus(array, allocated, &c2) == 0);
+ assert_se(c2.set);
+ assert_se(c2.allocated == c.allocated);
+ assert_se(memcmp(c.set, c2.set, c.allocated) == 0);
+}
+
+static void test_cpus_in_affinity_mask(void) {
+ int r;
+
+ r = cpus_in_affinity_mask();
+ assert(r > 0);
+ log_info("cpus_in_affinity_mask: %d", r);
+}
+
+int main(int argc, char *argv[]) {
+ log_info("CPU_ALLOC_SIZE(1) = %zu", CPU_ALLOC_SIZE(1));
+ log_info("CPU_ALLOC_SIZE(9) = %zu", CPU_ALLOC_SIZE(9));
+ log_info("CPU_ALLOC_SIZE(64) = %zu", CPU_ALLOC_SIZE(64));
+ log_info("CPU_ALLOC_SIZE(65) = %zu", CPU_ALLOC_SIZE(65));
+ log_info("CPU_ALLOC_SIZE(1024) = %zu", CPU_ALLOC_SIZE(1024));
+ log_info("CPU_ALLOC_SIZE(1025) = %zu", CPU_ALLOC_SIZE(1025));
+ log_info("CPU_ALLOC_SIZE(8191) = %zu", CPU_ALLOC_SIZE(8191));
+
+ test_parse_cpu_set();
+ test_parse_cpu_set_extend();
+ test_cpus_in_affinity_mask();
+ test_cpu_set_to_from_dbus();
+
+ return 0;
+}
diff --git a/src/test/test-daemon.c b/src/test/test-daemon.c
new file mode 100644
index 0000000..e6dd29a
--- /dev/null
+++ b/src/test/test-daemon.c
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "sd-daemon.h"
+
+#include "parse-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_strv_free_ char **l = NULL;
+ int n, i;
+ usec_t duration = USEC_PER_SEC / 10;
+
+ if (argc >= 2) {
+ unsigned x;
+
+ assert_se(safe_atou(argv[1], &x) >= 0);
+ duration = x * USEC_PER_SEC;
+ }
+
+ n = sd_listen_fds_with_names(false, &l);
+ if (n < 0) {
+ log_error_errno(n, "Failed to get listening fds: %m");
+ return EXIT_FAILURE;
+ }
+
+ for (i = 0; i < n; i++)
+ log_info("fd=%i name=%s\n", SD_LISTEN_FDS_START + i, l[i]);
+
+ sd_notify(0,
+ "STATUS=Starting up");
+ usleep(duration);
+
+ sd_notify(0,
+ "STATUS=Running\n"
+ "READY=1");
+ usleep(duration);
+
+ sd_notify(0,
+ "STATUS=Reloading\n"
+ "RELOADING=1");
+ usleep(duration);
+
+ sd_notify(0,
+ "STATUS=Running\n"
+ "READY=1");
+ usleep(duration);
+
+ sd_notify(0,
+ "STATUS=Quitting\n"
+ "STOPPING=1");
+ usleep(duration);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-date.c b/src/test/test-date.c
new file mode 100644
index 0000000..47b7096
--- /dev/null
+++ b/src/test/test-date.c
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "time-util.h"
+
+static void test_should_pass(const char *p) {
+ usec_t t, q;
+ char buf[FORMAT_TIMESTAMP_MAX], buf_relative[FORMAT_TIMESTAMP_RELATIVE_MAX];
+
+ log_info("Test: %s", p);
+ assert_se(parse_timestamp(p, &t) >= 0);
+ assert_se(format_timestamp_style(buf, sizeof(buf), t, TIMESTAMP_US));
+ log_info("\"%s\" → \"%s\"", p, buf);
+
+ assert_se(parse_timestamp(buf, &q) >= 0);
+ if (q != t) {
+ char tmp[FORMAT_TIMESTAMP_MAX];
+
+ log_error("round-trip failed: \"%s\" → \"%s\"",
+ buf, format_timestamp_style(tmp, sizeof(tmp), q, TIMESTAMP_US));
+ }
+ assert_se(q == t);
+
+ assert_se(format_timestamp_relative(buf_relative, sizeof(buf_relative), t));
+ log_info("%s", strna(buf_relative));
+}
+
+static void test_should_parse(const char *p) {
+ usec_t t;
+
+ log_info("Test: %s", p);
+ assert_se(parse_timestamp(p, &t) >= 0);
+ log_info("\"%s\" → \"@%" PRI_USEC "\"", p, t);
+}
+
+static void test_should_fail(const char *p) {
+ usec_t t;
+ int r;
+
+ log_info("Test: %s", p);
+ r = parse_timestamp(p, &t);
+ if (r >= 0)
+ log_info("\"%s\" → \"@%" PRI_USEC "\" (unexpected)", p, t);
+ else
+ log_info("parse_timestamp() returns %d (expected)", r);
+ assert_se(r < 0);
+}
+
+static void test_one(const char *p) {
+ _cleanup_free_ char *with_utc;
+
+ with_utc = strjoin(p, " UTC");
+ test_should_pass(p);
+ test_should_pass(with_utc);
+}
+
+static void test_one_noutc(const char *p) {
+ _cleanup_free_ char *with_utc;
+
+ with_utc = strjoin(p, " UTC");
+ test_should_pass(p);
+ test_should_fail(with_utc);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_one("17:41");
+ test_one("18:42:44");
+ test_one("18:42:44.0");
+ test_one("18:42:44.999999999999");
+ test_one("12-10-02 12:13:14");
+ test_one("12-10-2 12:13:14");
+ test_one("12-10-03 12:13");
+ test_one("2012-12-30 18:42");
+ test_one("2012-10-02");
+ test_one("Tue 2012-10-02");
+ test_one("yesterday");
+ test_one("today");
+ test_one("tomorrow");
+ test_one_noutc("16:20 UTC");
+ test_one_noutc("16:20 Asia/Seoul");
+ test_one_noutc("tomorrow Asia/Seoul");
+ test_one_noutc("2012-12-30 18:42 Asia/Seoul");
+ test_one_noutc("now");
+ test_one_noutc("+2d");
+ test_one_noutc("+2y 4d");
+ test_one_noutc("5months ago");
+ test_one_noutc("@1395716396");
+ test_should_parse("1970-1-1 UTC");
+ test_should_pass("1970-1-1 00:00:01 UTC");
+ test_should_fail("1969-12-31 UTC");
+ test_should_fail("-100y");
+ test_should_fail("today UTC UTC");
+ test_should_fail("now Asia/Seoul");
+ test_should_fail("+2d Asia/Seoul");
+ test_should_fail("@1395716396 Asia/Seoul");
+#if SIZEOF_TIME_T == 8
+ test_should_pass("9999-12-30 23:59:59 UTC");
+ test_should_fail("9999-12-31 00:00:00 UTC");
+ test_should_fail("10000-01-01 00:00:00 UTC");
+#elif SIZEOF_TIME_T == 4
+ test_should_pass("2038-01-19 03:14:07 UTC");
+ test_should_fail("2038-01-19 03:14:08 UTC");
+#endif
+
+ return 0;
+}
diff --git a/src/test/test-dev-setup.c b/src/test/test-dev-setup.c
new file mode 100644
index 0000000..ea9df56
--- /dev/null
+++ b/src/test/test-dev-setup.c
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "capability-util.h"
+#include "dev-setup.h"
+#include "fs-util.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "tmpfile-util.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_(rm_rf_physical_and_freep) char *p = NULL;
+ const char *f;
+ struct stat st;
+
+ if (have_effective_cap(CAP_DAC_OVERRIDE) <= 0)
+ return EXIT_TEST_SKIP;
+
+ assert_se(mkdtemp_malloc("/tmp/test-dev-setupXXXXXX", &p) >= 0);
+
+ f = prefix_roota(p, "/run/systemd");
+ assert_se(mkdir_p(f, 0755) >= 0);
+
+ assert_se(make_inaccessible_nodes(f, 1, 1) >= 0);
+
+ f = prefix_roota(p, "/run/systemd/inaccessible/reg");
+ assert_se(stat(f, &st) >= 0);
+ assert_se(S_ISREG(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0000);
+
+ f = prefix_roota(p, "/run/systemd/inaccessible/dir");
+ assert_se(stat(f, &st) >= 0);
+ assert_se(S_ISDIR(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0000);
+
+ f = prefix_roota(p, "/run/systemd/inaccessible/fifo");
+ assert_se(stat(f, &st) >= 0);
+ assert_se(S_ISFIFO(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0000);
+
+ f = prefix_roota(p, "/run/systemd/inaccessible/sock");
+ assert_se(stat(f, &st) >= 0);
+ assert_se(S_ISSOCK(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0000);
+
+ f = prefix_roota(p, "/run/systemd/inaccessible/chr");
+ if (stat(f, &st) < 0)
+ assert_se(errno == ENOENT);
+ else {
+ assert_se(S_ISCHR(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0000);
+ }
+
+ f = prefix_roota(p, "/run/systemd/inaccessible/blk");
+ if (stat(f, &st) < 0)
+ assert_se(errno == ENOENT);
+ else {
+ assert_se(S_ISBLK(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0000);
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-device-nodes.c b/src/test/test-device-nodes.c
new file mode 100644
index 0000000..9efb3fe
--- /dev/null
+++ b/src/test/test-device-nodes.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/types.h>
+
+#include "alloc-util.h"
+#include "device-nodes.h"
+#include "string-util.h"
+#include "util.h"
+
+/* helpers for test_encode_devnode_name */
+static char *do_encode_string(const char *in) {
+ size_t out_len = strlen(in) * 4 + 1;
+ char *out = malloc(out_len);
+
+ assert_se(out);
+ assert_se(encode_devnode_name(in, out, out_len) >= 0);
+ puts(out);
+
+ return out;
+}
+
+static bool expect_encoded_as(const char *in, const char *expected) {
+ _cleanup_free_ char *encoded = do_encode_string(in);
+ return streq(encoded, expected);
+}
+
+static void test_encode_devnode_name(void) {
+ assert_se(expect_encoded_as("systemd sucks", "systemd\\x20sucks"));
+ assert_se(expect_encoded_as("pinkiepie", "pinkiepie"));
+ assert_se(expect_encoded_as("valíd\\ųtf8", "valíd\\x5cųtf8"));
+ assert_se(expect_encoded_as("s/ash/ng", "s\\x2fash\\x2fng"));
+ assert_se(expect_encoded_as("/", "\\x2f"));
+ assert_se(expect_encoded_as("!", "\\x21"));
+}
+
+int main(int argc, char *argv[]) {
+ test_encode_devnode_name();
+
+ return 0;
+}
diff --git a/src/test/test-dlopen.c b/src/test/test-dlopen.c
new file mode 100644
index 0000000..35981eb
--- /dev/null
+++ b/src/test/test-dlopen.c
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <dlfcn.h>
+#include <stdlib.h>
+
+#include "macro.h"
+
+int main(int argc, char **argv) {
+ void *handle;
+
+ assert_se(handle = dlopen(argv[1], RTLD_NOW));
+ assert_se(dlclose(handle) == 0);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-dns-domain.c b/src/test/test-dns-domain.c
new file mode 100644
index 0000000..2df2380
--- /dev/null
+++ b/src/test/test-dns-domain.c
@@ -0,0 +1,830 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "macro.h"
+#include "string-util.h"
+#include "tests.h"
+
+static void test_dns_label_unescape_one(const char *what, const char *expect, size_t buffer_sz, int ret, int ret_ldh) {
+ char buffer[buffer_sz];
+ int r;
+ const char *w = what;
+
+ log_info("%s, %s, %zu, →%d/%d", what, expect, buffer_sz, ret, ret_ldh);
+
+ r = dns_label_unescape(&w, buffer, buffer_sz, 0);
+ assert_se(r == ret);
+ if (r >= 0)
+ assert_se(streq(buffer, expect));
+
+ w = what;
+ r = dns_label_unescape(&w, buffer, buffer_sz, DNS_LABEL_LDH);
+ assert_se(r == ret_ldh);
+ if (r >= 0)
+ assert_se(streq(buffer, expect));
+
+ w = what;
+ r = dns_label_unescape(&w, buffer, buffer_sz, DNS_LABEL_NO_ESCAPES);
+ const int ret_noe = strchr(what, '\\') ? -EINVAL : ret;
+ assert_se(r == ret_noe);
+ if (r >= 0)
+ assert_se(streq(buffer, expect));
+}
+
+static void test_dns_label_unescape(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_label_unescape_one("hallo", "hallo", 6, 5, 5);
+ test_dns_label_unescape_one("hallo", "hallo", 4, -ENOBUFS, -ENOBUFS);
+ test_dns_label_unescape_one("", "", 10, 0, 0);
+ test_dns_label_unescape_one("hallo\\.foobar", "hallo.foobar", 20, 12, -EINVAL);
+ test_dns_label_unescape_one("hallo.foobar", "hallo", 10, 5, 5);
+ test_dns_label_unescape_one("hallo\n.foobar", "hallo", 20, -EINVAL, -EINVAL);
+ test_dns_label_unescape_one("hallo\\", "hallo", 20, -EINVAL, -EINVAL);
+ test_dns_label_unescape_one("hallo\\032 ", "hallo ", 20, 7, -EINVAL);
+ test_dns_label_unescape_one(".", "", 20, 0, 0);
+ test_dns_label_unescape_one("..", "", 20, -EINVAL, -EINVAL);
+ test_dns_label_unescape_one(".foobar", "", 20, -EINVAL, -EINVAL);
+ test_dns_label_unescape_one("foobar.", "foobar", 20, 6, 6);
+ test_dns_label_unescape_one("foobar..", "foobar", 20, -EINVAL, -EINVAL);
+ test_dns_label_unescape_one("foo-bar", "foo-bar", 20, 7, 7);
+ test_dns_label_unescape_one("foo-", "foo-", 20, 4, -EINVAL);
+ test_dns_label_unescape_one("-foo", "-foo", 20, 4, -EINVAL);
+ test_dns_label_unescape_one("-foo-", "-foo-", 20, 5, -EINVAL);
+ test_dns_label_unescape_one("foo-.", "foo-", 20, 4, -EINVAL);
+ test_dns_label_unescape_one("foo.-", "foo", 20, 3, 3);
+ test_dns_label_unescape_one("foo\\032", "foo ", 20, 4, -EINVAL);
+ test_dns_label_unescape_one("foo\\045", "foo-", 20, 4, -EINVAL);
+ test_dns_label_unescape_one("głąb", "głąb", 20, 6, -EINVAL);
+}
+
+static void test_dns_name_to_wire_format_one(const char *what, const char *expect, size_t buffer_sz, int ret) {
+ uint8_t buffer[buffer_sz];
+ int r;
+
+ log_info("%s, %s, %zu, →%d", what, expect, buffer_sz, ret);
+
+ r = dns_name_to_wire_format(what, buffer, buffer_sz, false);
+ assert_se(r == ret);
+
+ if (r < 0)
+ return;
+
+ assert_se(!memcmp(buffer, expect, r));
+}
+
+static void test_dns_name_to_wire_format(void) {
+ static const char out0[] = { 0 };
+ static const char out1[] = { 3, 'f', 'o', 'o', 0 };
+ static const char out2[] = { 5, 'h', 'a', 'l', 'l', 'o', 3, 'f', 'o', 'o', 3, 'b', 'a', 'r', 0 };
+ static const char out3[] = { 4, ' ', 'f', 'o', 'o', 3, 'b', 'a', 'r', 0 };
+ static const char out4[] = { 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 9, 'a', '1', '2', '3', '4', '5', '6', '7', '8',
+ 3, 'a', '1', '2', 0 };
+
+ log_info("/* %s */", __func__);
+
+ test_dns_name_to_wire_format_one("", out0, sizeof(out0), sizeof(out0));
+
+ test_dns_name_to_wire_format_one("foo", out1, sizeof(out1), sizeof(out1));
+ test_dns_name_to_wire_format_one("foo", out1, sizeof(out1) + 1, sizeof(out1));
+ test_dns_name_to_wire_format_one("foo", out1, sizeof(out1) - 1, -ENOBUFS);
+
+ test_dns_name_to_wire_format_one("hallo.foo.bar", out2, sizeof(out2), sizeof(out2));
+ test_dns_name_to_wire_format_one("hallo.foo..bar", NULL, 32, -EINVAL);
+
+ test_dns_name_to_wire_format_one("\\032foo.bar", out3, sizeof(out3), sizeof(out3));
+
+ test_dns_name_to_wire_format_one("a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a123", NULL, 500, -EINVAL);
+ test_dns_name_to_wire_format_one("a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12", out4, sizeof(out4), sizeof(out4));
+}
+
+static void test_dns_label_unescape_suffix_one(const char *what, const char *expect1, const char *expect2, size_t buffer_sz, int ret1, int ret2) {
+ char buffer[buffer_sz];
+ const char *label;
+ int r;
+
+ log_info("%s, %s, %s, %zu, %d, %d", what, expect1, expect2, buffer_sz, ret1, ret2);
+
+ label = what + strlen(what);
+
+ r = dns_label_unescape_suffix(what, &label, buffer, buffer_sz);
+ assert_se(r == ret1);
+ if (r >= 0)
+ assert_se(streq(buffer, expect1));
+
+ r = dns_label_unescape_suffix(what, &label, buffer, buffer_sz);
+ assert_se(r == ret2);
+ if (r >= 0)
+ assert_se(streq(buffer, expect2));
+}
+
+static void test_dns_label_unescape_suffix(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_label_unescape_suffix_one("hallo", "hallo", "", 6, 5, 0);
+ test_dns_label_unescape_suffix_one("hallo", "hallo", "", 4, -ENOBUFS, -ENOBUFS);
+ test_dns_label_unescape_suffix_one("", "", "", 10, 0, 0);
+ test_dns_label_unescape_suffix_one("hallo\\.foobar", "hallo.foobar", "", 20, 12, 0);
+ test_dns_label_unescape_suffix_one("hallo.foobar", "foobar", "hallo", 10, 6, 5);
+ test_dns_label_unescape_suffix_one("hallo.foobar\n", "foobar", "foobar", 20, -EINVAL, -EINVAL);
+ test_dns_label_unescape_suffix_one("hallo\\", "hallo", "hallo", 20, -EINVAL, -EINVAL);
+ test_dns_label_unescape_suffix_one("hallo\\032 ", "hallo ", "", 20, 7, 0);
+ test_dns_label_unescape_suffix_one(".", "", "", 20, 0, 0);
+ test_dns_label_unescape_suffix_one("..", "", "", 20, 0, -EINVAL);
+ test_dns_label_unescape_suffix_one(".foobar", "foobar", "", 20, 6, -EINVAL);
+ test_dns_label_unescape_suffix_one("foobar.", "foobar", "", 20, 6, 0);
+ test_dns_label_unescape_suffix_one("foo\\\\bar", "foo\\bar", "", 20, 7, 0);
+ test_dns_label_unescape_suffix_one("foo.bar", "bar", "foo", 20, 3, 3);
+ test_dns_label_unescape_suffix_one("foo..bar", "bar", "", 20, 3, -EINVAL);
+ test_dns_label_unescape_suffix_one("foo...bar", "bar", "", 20, 3, -EINVAL);
+ test_dns_label_unescape_suffix_one("foo\\.bar", "foo.bar", "", 20, 7, 0);
+ test_dns_label_unescape_suffix_one("foo\\\\.bar", "bar", "foo\\", 20, 3, 4);
+ test_dns_label_unescape_suffix_one("foo\\\\\\.bar", "foo\\.bar", "", 20, 8, 0);
+}
+
+static void test_dns_label_escape_one(const char *what, size_t l, const char *expect, int ret) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ log_info("%s, %zu, %s, →%d", what, l, expect, ret);
+
+ r = dns_label_escape_new(what, l, &t);
+ assert_se(r == ret);
+
+ if (r < 0)
+ return;
+
+ assert_se(streq_ptr(expect, t));
+}
+
+static void test_dns_label_escape(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_label_escape_one("", 0, NULL, -EINVAL);
+ test_dns_label_escape_one("hallo", 5, "hallo", 5);
+ test_dns_label_escape_one("hallo", 6, "hallo\\000", 9);
+ test_dns_label_escape_one("hallo hallo.foobar,waldi", 24, "hallo\\032hallo\\.foobar\\044waldi", 31);
+}
+
+static void test_dns_name_normalize_one(const char *what, const char *expect, int ret) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ r = dns_name_normalize(what, 0, &t);
+ assert_se(r == ret);
+
+ if (r < 0)
+ return;
+
+ assert_se(streq_ptr(expect, t));
+}
+
+static void test_dns_name_normalize(void) {
+ test_dns_name_normalize_one("", ".", 0);
+ test_dns_name_normalize_one("f", "f", 0);
+ test_dns_name_normalize_one("f.waldi", "f.waldi", 0);
+ test_dns_name_normalize_one("f \\032.waldi", "f\\032\\032.waldi", 0);
+ test_dns_name_normalize_one("\\000", "\\000", 0);
+ test_dns_name_normalize_one("..", NULL, -EINVAL);
+ test_dns_name_normalize_one(".foobar", NULL, -EINVAL);
+ test_dns_name_normalize_one("foobar.", "foobar", 0);
+ test_dns_name_normalize_one(".", ".", 0);
+}
+
+static void test_dns_name_equal_one(const char *a, const char *b, int ret) {
+ int r;
+
+ r = dns_name_equal(a, b);
+ assert_se(r == ret);
+
+ r = dns_name_equal(b, a);
+ assert_se(r == ret);
+}
+
+static void test_dns_name_equal(void) {
+ test_dns_name_equal_one("", "", true);
+ test_dns_name_equal_one("x", "x", true);
+ test_dns_name_equal_one("x", "x.", true);
+ test_dns_name_equal_one("abc.def", "abc.def", true);
+ test_dns_name_equal_one("abc.def", "ABC.def", true);
+ test_dns_name_equal_one("abc.def", "CBA.def", false);
+ test_dns_name_equal_one("", "xxx", false);
+ test_dns_name_equal_one("ab", "a", false);
+ test_dns_name_equal_one("\\000", "\\000", true);
+ test_dns_name_equal_one(".", "", true);
+ test_dns_name_equal_one(".", ".", true);
+ test_dns_name_equal_one("..", "..", -EINVAL);
+}
+
+static void test_dns_name_between_one(const char *a, const char *b, const char *c, int ret) {
+ int r;
+
+ r = dns_name_between(a, b, c);
+ assert_se(r == ret);
+
+ r = dns_name_between(c, b, a);
+ if (ret >= 0)
+ assert_se(r == 0 || dns_name_equal(a, c) > 0);
+ else
+ assert_se(r == ret);
+}
+
+static void test_dns_name_between(void) {
+ /* see https://tools.ietf.org/html/rfc4034#section-6.1
+ Note that we use "\033.z.example" in stead of "\001.z.example" as we
+ consider the latter invalid */
+ test_dns_name_between_one("example", "a.example", "yljkjljk.a.example", true);
+ test_dns_name_between_one("a.example", "yljkjljk.a.example", "Z.a.example", true);
+ test_dns_name_between_one("yljkjljk.a.example", "Z.a.example", "zABC.a.EXAMPLE", true);
+ test_dns_name_between_one("Z.a.example", "zABC.a.EXAMPLE", "z.example", true);
+ test_dns_name_between_one("zABC.a.EXAMPLE", "z.example", "\\033.z.example", true);
+ test_dns_name_between_one("z.example", "\\033.z.example", "*.z.example", true);
+ test_dns_name_between_one("\\033.z.example", "*.z.example", "\\200.z.example", true);
+ test_dns_name_between_one("*.z.example", "\\200.z.example", "example", true);
+ test_dns_name_between_one("\\200.z.example", "example", "a.example", true);
+
+ test_dns_name_between_one("example", "a.example", "example", true);
+ test_dns_name_between_one("example", "example", "example", false);
+ test_dns_name_between_one("example", "example", "yljkjljk.a.example", false);
+ test_dns_name_between_one("example", "yljkjljk.a.example", "yljkjljk.a.example", false);
+ test_dns_name_between_one("hkps.pool.sks-keyservers.net", "_pgpkey-https._tcp.hkps.pool.sks-keyservers.net", "ipv4.pool.sks-keyservers.net", true);
+}
+
+static void test_dns_name_endswith_one(const char *a, const char *b, int ret) {
+ assert_se(dns_name_endswith(a, b) == ret);
+}
+
+static void test_dns_name_endswith(void) {
+ test_dns_name_endswith_one("", "", true);
+ test_dns_name_endswith_one("", "xxx", false);
+ test_dns_name_endswith_one("xxx", "", true);
+ test_dns_name_endswith_one("x", "x", true);
+ test_dns_name_endswith_one("x", "y", false);
+ test_dns_name_endswith_one("x.y", "y", true);
+ test_dns_name_endswith_one("x.y", "Y", true);
+ test_dns_name_endswith_one("x.y", "x", false);
+ test_dns_name_endswith_one("x.y.z", "Z", true);
+ test_dns_name_endswith_one("x.y.z", "y.Z", true);
+ test_dns_name_endswith_one("x.y.z", "x.y.Z", true);
+ test_dns_name_endswith_one("x.y.z", "waldo", false);
+ test_dns_name_endswith_one("x.y.z.u.v.w", "y.z", false);
+ test_dns_name_endswith_one("x.y.z.u.v.w", "u.v.w", true);
+ test_dns_name_endswith_one("x.y\001.z", "waldo", -EINVAL);
+}
+
+static void test_dns_name_startswith_one(const char *a, const char *b, int ret) {
+ assert_se(dns_name_startswith(a, b) == ret);
+}
+
+static void test_dns_name_startswith(void) {
+ test_dns_name_startswith_one("", "", true);
+ test_dns_name_startswith_one("", "xxx", false);
+ test_dns_name_startswith_one("xxx", "", true);
+ test_dns_name_startswith_one("x", "x", true);
+ test_dns_name_startswith_one("x", "y", false);
+ test_dns_name_startswith_one("x.y", "x.y", true);
+ test_dns_name_startswith_one("x.y", "y.x", false);
+ test_dns_name_startswith_one("x.y", "x", true);
+ test_dns_name_startswith_one("x.y", "X", true);
+ test_dns_name_startswith_one("x.y", "y", false);
+ test_dns_name_startswith_one("x.y", "", true);
+ test_dns_name_startswith_one("x.y", "X", true);
+}
+
+static void test_dns_name_is_root(void) {
+ assert_se(dns_name_is_root(""));
+ assert_se(dns_name_is_root("."));
+ assert_se(!dns_name_is_root("xxx"));
+ assert_se(!dns_name_is_root("xxx."));
+ assert_se(!dns_name_is_root(".."));
+}
+
+static void test_dns_name_is_single_label(void) {
+ assert_se(!dns_name_is_single_label(""));
+ assert_se(!dns_name_is_single_label("."));
+ assert_se(!dns_name_is_single_label(".."));
+ assert_se(dns_name_is_single_label("x"));
+ assert_se(dns_name_is_single_label("x."));
+ assert_se(!dns_name_is_single_label("xx.yy"));
+}
+
+static void test_dns_name_reverse_one(const char *address, const char *name) {
+ _cleanup_free_ char *p = NULL;
+ union in_addr_union a, b = {};
+ int familya, familyb;
+
+ assert_se(in_addr_from_string_auto(address, &familya, &a) >= 0);
+ assert_se(dns_name_reverse(familya, &a, &p) >= 0);
+ assert_se(streq(p, name));
+ assert_se(dns_name_address(p, &familyb, &b) > 0);
+ assert_se(familya == familyb);
+ assert_se(in_addr_equal(familya, &a, &b));
+}
+
+static void test_dns_name_reverse(void) {
+ test_dns_name_reverse_one("47.11.8.15", "15.8.11.47.in-addr.arpa");
+ test_dns_name_reverse_one("fe80::47", "7.4.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.8.e.f.ip6.arpa");
+ test_dns_name_reverse_one("127.0.0.1", "1.0.0.127.in-addr.arpa");
+ test_dns_name_reverse_one("::1", "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa");
+}
+
+static void test_dns_name_concat_one(const char *a, const char *b, int r, const char *result) {
+ _cleanup_free_ char *p = NULL;
+
+ assert_se(dns_name_concat(a, b, 0, &p) == r);
+ assert_se(streq_ptr(p, result));
+}
+
+static void test_dns_name_concat(void) {
+ test_dns_name_concat_one("", "", 0, ".");
+ test_dns_name_concat_one(".", "", 0, ".");
+ test_dns_name_concat_one("", ".", 0, ".");
+ test_dns_name_concat_one(".", ".", 0, ".");
+ test_dns_name_concat_one("foo", "bar", 0, "foo.bar");
+ test_dns_name_concat_one("foo.foo", "bar.bar", 0, "foo.foo.bar.bar");
+ test_dns_name_concat_one("foo", NULL, 0, "foo");
+ test_dns_name_concat_one("foo", ".", 0, "foo");
+ test_dns_name_concat_one("foo.", "bar.", 0, "foo.bar");
+ test_dns_name_concat_one(NULL, NULL, 0, ".");
+ test_dns_name_concat_one(NULL, ".", 0, ".");
+ test_dns_name_concat_one(NULL, "foo", 0, "foo");
+}
+
+static void test_dns_name_is_valid_one(const char *s, int ret, int ret_ldh) {
+ log_info("%s, →%d", s, ret);
+
+ assert_se(dns_name_is_valid(s) == ret);
+ assert_se(dns_name_is_valid_ldh(s) == ret_ldh);
+}
+
+static void test_dns_name_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_name_is_valid_one("foo", 1, 1);
+ test_dns_name_is_valid_one("foo.", 1, 1);
+ test_dns_name_is_valid_one("foo..", 0, 0);
+ test_dns_name_is_valid_one("Foo", 1, 1);
+ test_dns_name_is_valid_one("foo.bar", 1, 1);
+ test_dns_name_is_valid_one("foo.bar.baz", 1, 1);
+ test_dns_name_is_valid_one("", 1, 1);
+ test_dns_name_is_valid_one("foo..bar", 0, 0);
+ test_dns_name_is_valid_one(".foo.bar", 0, 0);
+ test_dns_name_is_valid_one("foo.bar.", 1, 1);
+ test_dns_name_is_valid_one("foo.bar..", 0, 0);
+ test_dns_name_is_valid_one("\\zbar", 0, 0);
+ test_dns_name_is_valid_one("ä", 1, 0);
+ test_dns_name_is_valid_one("\n", 0, 0);
+
+ test_dns_name_is_valid_one("dash-", 1, 0);
+ test_dns_name_is_valid_one("-dash", 1, 0);
+ test_dns_name_is_valid_one("dash-dash", 1, 1);
+ test_dns_name_is_valid_one("foo.dash-", 1, 0);
+ test_dns_name_is_valid_one("foo.-dash", 1, 0);
+ test_dns_name_is_valid_one("foo.dash-dash", 1, 1);
+ test_dns_name_is_valid_one("foo.dash-.bar", 1, 0);
+ test_dns_name_is_valid_one("foo.-dash.bar", 1, 0);
+ test_dns_name_is_valid_one("foo.dash-dash.bar", 1, 1);
+ test_dns_name_is_valid_one("dash-.bar", 1, 0);
+ test_dns_name_is_valid_one("-dash.bar", 1, 0);
+ test_dns_name_is_valid_one("dash-dash.bar", 1, 1);
+ test_dns_name_is_valid_one("-.bar", 1, 0);
+ test_dns_name_is_valid_one("foo.-", 1, 0);
+
+ /* 256 characters */
+ test_dns_name_is_valid_one("a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345", 0, 0);
+
+ /* 255 characters */
+ test_dns_name_is_valid_one("a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a1234", 0, 0);
+
+ /* 254 characters */
+ test_dns_name_is_valid_one("a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a123", 0, 0);
+
+ /* 253 characters */
+ test_dns_name_is_valid_one("a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12345678.a12", 1, 1);
+
+ /* label of 64 chars length */
+ test_dns_name_is_valid_one("a123456789a123456789a123456789a123456789a123456789a123456789a123", 0, 0);
+
+ /* label of 63 chars length */
+ test_dns_name_is_valid_one("a123456789a123456789a123456789a123456789a123456789a123456789a12", 1, 1);
+}
+
+static void test_dns_service_name_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(dns_service_name_is_valid("Lennart's Compüter"));
+ assert_se(dns_service_name_is_valid("piff.paff"));
+
+ assert_se(!dns_service_name_is_valid(NULL));
+ assert_se(!dns_service_name_is_valid(""));
+ assert_se(!dns_service_name_is_valid("foo\nbar"));
+ assert_se(!dns_service_name_is_valid("foo\201bar"));
+ assert_se(!dns_service_name_is_valid("this is an overly long string that is certainly longer than 63 characters"));
+}
+
+static void test_dns_srv_type_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(dns_srv_type_is_valid("_http._tcp"));
+ assert_se(dns_srv_type_is_valid("_foo-bar._tcp"));
+ assert_se(dns_srv_type_is_valid("_w._udp"));
+ assert_se(dns_srv_type_is_valid("_a800._tcp"));
+ assert_se(dns_srv_type_is_valid("_a-800._tcp"));
+
+ assert_se(!dns_srv_type_is_valid(NULL));
+ assert_se(!dns_srv_type_is_valid(""));
+ assert_se(!dns_srv_type_is_valid("x"));
+ assert_se(!dns_srv_type_is_valid("_foo"));
+ assert_se(!dns_srv_type_is_valid("_tcp"));
+ assert_se(!dns_srv_type_is_valid("_"));
+ assert_se(!dns_srv_type_is_valid("_foo."));
+ assert_se(!dns_srv_type_is_valid("_föo._tcp"));
+ assert_se(!dns_srv_type_is_valid("_f\no._tcp"));
+ assert_se(!dns_srv_type_is_valid("_800._tcp"));
+ assert_se(!dns_srv_type_is_valid("_-800._tcp"));
+ assert_se(!dns_srv_type_is_valid("_-foo._tcp"));
+ assert_se(!dns_srv_type_is_valid("_piep._foo._udp"));
+}
+
+static void test_dnssd_srv_type_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(dnssd_srv_type_is_valid("_http._tcp"));
+ assert_se(dnssd_srv_type_is_valid("_foo-bar._tcp"));
+ assert_se(dnssd_srv_type_is_valid("_w._udp"));
+ assert_se(dnssd_srv_type_is_valid("_a800._tcp"));
+ assert_se(dnssd_srv_type_is_valid("_a-800._tcp"));
+
+ assert_se(!dnssd_srv_type_is_valid(NULL));
+ assert_se(!dnssd_srv_type_is_valid(""));
+ assert_se(!dnssd_srv_type_is_valid("x"));
+ assert_se(!dnssd_srv_type_is_valid("_foo"));
+ assert_se(!dnssd_srv_type_is_valid("_tcp"));
+ assert_se(!dnssd_srv_type_is_valid("_"));
+ assert_se(!dnssd_srv_type_is_valid("_foo."));
+ assert_se(!dnssd_srv_type_is_valid("_föo._tcp"));
+ assert_se(!dnssd_srv_type_is_valid("_f\no._tcp"));
+ assert_se(!dnssd_srv_type_is_valid("_800._tcp"));
+ assert_se(!dnssd_srv_type_is_valid("_-800._tcp"));
+ assert_se(!dnssd_srv_type_is_valid("_-foo._tcp"));
+ assert_se(!dnssd_srv_type_is_valid("_piep._foo._udp"));
+ assert_se(!dnssd_srv_type_is_valid("_foo._unknown"));
+}
+
+static void test_dns_service_join_one(const char *a, const char *b, const char *c, int r, const char *d) {
+ _cleanup_free_ char *x = NULL, *y = NULL, *z = NULL, *t = NULL;
+
+ log_info("%s, %s, %s, →%d, %s", a, b, c, r, d);
+
+ assert_se(dns_service_join(a, b, c, &t) == r);
+ assert_se(streq_ptr(t, d));
+
+ if (r < 0)
+ return;
+
+ assert_se(dns_service_split(t, &x, &y, &z) >= 0);
+ assert_se(streq_ptr(a, x));
+ assert_se(streq_ptr(b, y));
+ assert_se(dns_name_equal(c, z) > 0);
+}
+
+static void test_dns_service_join(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_service_join_one("", "", "", -EINVAL, NULL);
+ test_dns_service_join_one("", "_http._tcp", "", -EINVAL, NULL);
+ test_dns_service_join_one("", "_http._tcp", "foo", -EINVAL, NULL);
+ test_dns_service_join_one("foo", "", "foo", -EINVAL, NULL);
+ test_dns_service_join_one("foo", "foo", "foo", -EINVAL, NULL);
+
+ test_dns_service_join_one("foo", "_http._tcp", "", 0, "foo._http._tcp");
+ test_dns_service_join_one(NULL, "_http._tcp", "", 0, "_http._tcp");
+ test_dns_service_join_one("foo", "_http._tcp", "foo", 0, "foo._http._tcp.foo");
+ test_dns_service_join_one(NULL, "_http._tcp", "foo", 0, "_http._tcp.foo");
+ test_dns_service_join_one("Lennart's PC", "_pc._tcp", "foo.bar.com", 0, "Lennart\\039s\\032PC._pc._tcp.foo.bar.com");
+ test_dns_service_join_one(NULL, "_pc._tcp", "foo.bar.com", 0, "_pc._tcp.foo.bar.com");
+}
+
+static void test_dns_service_split_one(const char *joined, const char *a, const char *b, const char *c, int r) {
+ _cleanup_free_ char *x = NULL, *y = NULL, *z = NULL, *t = NULL;
+
+ log_info("%s, %s, %s, %s, →%d", joined, a, b, c, r);
+
+ assert_se(dns_service_split(joined, &x, &y, &z) == r);
+ assert_se(streq_ptr(x, a));
+ assert_se(streq_ptr(y, b));
+ assert_se(streq_ptr(z, c));
+
+ if (r < 0)
+ return;
+
+ if (y) {
+ assert_se(dns_service_join(x, y, z, &t) == 0);
+ assert_se(dns_name_equal(joined, t) > 0);
+ } else
+ assert_se(!x && dns_name_equal(z, joined) > 0);
+}
+
+static void test_dns_service_split(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_service_split_one("", NULL, NULL, ".", 0);
+ test_dns_service_split_one("foo", NULL, NULL, "foo", 0);
+ test_dns_service_split_one("foo.bar", NULL, NULL, "foo.bar", 0);
+ test_dns_service_split_one("_foo.bar", NULL, NULL, "_foo.bar", 0);
+ test_dns_service_split_one("_foo._bar", NULL, "_foo._bar", ".", 0);
+ test_dns_service_split_one("_meh._foo._bar", "_meh", "_foo._bar", ".", 0);
+ test_dns_service_split_one("Wuff\\032Wuff._foo._bar.waldo.com", "Wuff Wuff", "_foo._bar", "waldo.com", 0);
+}
+
+static void test_dns_name_change_suffix_one(const char *name, const char *old_suffix, const char *new_suffix, int r, const char *result) {
+ _cleanup_free_ char *s = NULL;
+
+ log_info("%s, %s, %s, →%s", name, old_suffix, new_suffix, result);
+
+ assert_se(dns_name_change_suffix(name, old_suffix, new_suffix, &s) == r);
+ assert_se(streq_ptr(s, result));
+}
+
+static void test_dns_name_change_suffix(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_name_change_suffix_one("foo.bar", "bar", "waldo", 1, "foo.waldo");
+ test_dns_name_change_suffix_one("foo.bar.waldi.quux", "foo.bar.waldi.quux", "piff.paff", 1, "piff.paff");
+ test_dns_name_change_suffix_one("foo.bar.waldi.quux", "bar.waldi.quux", "piff.paff", 1, "foo.piff.paff");
+ test_dns_name_change_suffix_one("foo.bar.waldi.quux", "waldi.quux", "piff.paff", 1, "foo.bar.piff.paff");
+ test_dns_name_change_suffix_one("foo.bar.waldi.quux", "quux", "piff.paff", 1, "foo.bar.waldi.piff.paff");
+ test_dns_name_change_suffix_one("foo.bar.waldi.quux", "", "piff.paff", 1, "foo.bar.waldi.quux.piff.paff");
+ test_dns_name_change_suffix_one("", "", "piff.paff", 1, "piff.paff");
+ test_dns_name_change_suffix_one("", "", "", 1, ".");
+ test_dns_name_change_suffix_one("a", "b", "c", 0, NULL);
+}
+
+static void test_dns_name_suffix_one(const char *name, unsigned n_labels, const char *result, int ret) {
+ const char *p = NULL;
+
+ log_info("%s, %d, →%s, %d", name, n_labels, result, ret);
+
+ assert_se(ret == dns_name_suffix(name, n_labels, &p));
+ assert_se(streq_ptr(p, result));
+}
+
+static void test_dns_name_suffix(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_name_suffix_one("foo.bar", 2, "foo.bar", 0);
+ test_dns_name_suffix_one("foo.bar", 1, "bar", 1);
+ test_dns_name_suffix_one("foo.bar", 0, "", 2);
+ test_dns_name_suffix_one("foo.bar", 3, NULL, -EINVAL);
+ test_dns_name_suffix_one("foo.bar", 4, NULL, -EINVAL);
+
+ test_dns_name_suffix_one("bar", 1, "bar", 0);
+ test_dns_name_suffix_one("bar", 0, "", 1);
+ test_dns_name_suffix_one("bar", 2, NULL, -EINVAL);
+ test_dns_name_suffix_one("bar", 3, NULL, -EINVAL);
+
+ test_dns_name_suffix_one("", 0, "", 0);
+ test_dns_name_suffix_one("", 1, NULL, -EINVAL);
+ test_dns_name_suffix_one("", 2, NULL, -EINVAL);
+}
+
+static void test_dns_name_count_labels_one(const char *name, int n) {
+ log_info("%s, →%d", name, n);
+
+ assert_se(dns_name_count_labels(name) == n);
+}
+
+static void test_dns_name_count_labels(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_name_count_labels_one("foo.bar.quux.", 3);
+ test_dns_name_count_labels_one("foo.bar.quux", 3);
+ test_dns_name_count_labels_one("foo.bar.", 2);
+ test_dns_name_count_labels_one("foo.bar", 2);
+ test_dns_name_count_labels_one("foo.", 1);
+ test_dns_name_count_labels_one("foo", 1);
+ test_dns_name_count_labels_one("", 0);
+ test_dns_name_count_labels_one(".", 0);
+ test_dns_name_count_labels_one("..", -EINVAL);
+}
+
+static void test_dns_name_equal_skip_one(const char *a, unsigned n_labels, const char *b, int ret) {
+ log_info("%s, %u, %s, →%d", a, n_labels, b, ret);
+
+ assert_se(dns_name_equal_skip(a, n_labels, b) == ret);
+}
+
+static void test_dns_name_equal_skip(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_name_equal_skip_one("foo", 0, "bar", 0);
+ test_dns_name_equal_skip_one("foo", 0, "foo", 1);
+ test_dns_name_equal_skip_one("foo", 1, "foo", 0);
+ test_dns_name_equal_skip_one("foo", 2, "foo", 0);
+
+ test_dns_name_equal_skip_one("foo.bar", 0, "foo.bar", 1);
+ test_dns_name_equal_skip_one("foo.bar", 1, "foo.bar", 0);
+ test_dns_name_equal_skip_one("foo.bar", 2, "foo.bar", 0);
+ test_dns_name_equal_skip_one("foo.bar", 3, "foo.bar", 0);
+
+ test_dns_name_equal_skip_one("foo.bar", 0, "bar", 0);
+ test_dns_name_equal_skip_one("foo.bar", 1, "bar", 1);
+ test_dns_name_equal_skip_one("foo.bar", 2, "bar", 0);
+ test_dns_name_equal_skip_one("foo.bar", 3, "bar", 0);
+
+ test_dns_name_equal_skip_one("foo.bar", 0, "", 0);
+ test_dns_name_equal_skip_one("foo.bar", 1, "", 0);
+ test_dns_name_equal_skip_one("foo.bar", 2, "", 1);
+ test_dns_name_equal_skip_one("foo.bar", 3, "", 0);
+
+ test_dns_name_equal_skip_one("", 0, "", 1);
+ test_dns_name_equal_skip_one("", 1, "", 0);
+ test_dns_name_equal_skip_one("", 1, "foo", 0);
+ test_dns_name_equal_skip_one("", 2, "foo", 0);
+}
+
+static void test_dns_name_compare_func(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(dns_name_compare_func("", "") == 0);
+ assert_se(dns_name_compare_func("", ".") == 0);
+ assert_se(dns_name_compare_func(".", "") == 0);
+ assert_se(dns_name_compare_func("foo", "foo.") == 0);
+ assert_se(dns_name_compare_func("foo.", "foo") == 0);
+ assert_se(dns_name_compare_func("foo", "foo") == 0);
+ assert_se(dns_name_compare_func("foo.", "foo.") == 0);
+ assert_se(dns_name_compare_func("heise.de", "HEISE.DE.") == 0);
+
+ assert_se(dns_name_compare_func("de.", "heise.de") != 0);
+}
+
+static void test_dns_name_common_suffix_one(const char *a, const char *b, const char *result) {
+ const char *c;
+
+ log_info("%s, %s, →%s", a, b, result);
+
+ assert_se(dns_name_common_suffix(a, b, &c) >= 0);
+ assert_se(streq(c, result));
+}
+
+static void test_dns_name_common_suffix(void) {
+ log_info("/* %s */", __func__);
+
+ test_dns_name_common_suffix_one("", "", "");
+ test_dns_name_common_suffix_one("foo", "", "");
+ test_dns_name_common_suffix_one("", "foo", "");
+ test_dns_name_common_suffix_one("foo", "bar", "");
+ test_dns_name_common_suffix_one("bar", "foo", "");
+ test_dns_name_common_suffix_one("foo", "foo", "foo");
+ test_dns_name_common_suffix_one("quux.foo", "foo", "foo");
+ test_dns_name_common_suffix_one("foo", "quux.foo", "foo");
+ test_dns_name_common_suffix_one("this.is.a.short.sentence", "this.is.another.short.sentence", "short.sentence");
+ test_dns_name_common_suffix_one("FOO.BAR", "tEST.bAR", "BAR");
+}
+
+static void test_dns_name_apply_idna_one(const char *s, int expected, const char *result) {
+ _cleanup_free_ char *buf = NULL;
+ int r;
+
+ r = dns_name_apply_idna(s, &buf);
+ log_debug("dns_name_apply_idna: \"%s\" → %d/\"%s\" (expected %d/\"%s\")",
+ s, r, strnull(buf), expected, strnull(result));
+
+ /* Different libidn2 versions are more and less accepting
+ * of underscore-prefixed names. So let's list the lowest
+ * expected return value. */
+ assert_se(r >= expected);
+ if (expected == 1)
+ assert_se(dns_name_equal(buf, result) == 1);
+}
+
+static void test_dns_name_apply_idna(void) {
+#if HAVE_LIBIDN2 || HAVE_LIBIDN
+ const int ret = 1;
+#else
+ const int ret = 0;
+#endif
+ log_info("/* %s */", __func__);
+
+ /* IDNA2008 forbids names with hyphens in third and fourth positions
+ * (https://tools.ietf.org/html/rfc5891#section-4.2.3.1).
+ * IDNA2003 does not have this restriction
+ * (https://tools.ietf.org/html/rfc3490#section-5).
+ * This means that when using libidn we will transform and test more
+ * labels. If registrars follow IDNA2008 we'll just be performing a
+ * useless lookup.
+ */
+#if HAVE_LIBIDN
+ const int ret2 = 1;
+#else
+ const int ret2 = 0;
+#endif
+
+ test_dns_name_apply_idna_one("", ret, "");
+ test_dns_name_apply_idna_one("foo", ret, "foo");
+ test_dns_name_apply_idna_one("foo.", ret, "foo");
+ test_dns_name_apply_idna_one("foo.bar", ret, "foo.bar");
+ test_dns_name_apply_idna_one("foo.bar.", ret, "foo.bar");
+ test_dns_name_apply_idna_one("föö", ret, "xn--f-1gaa");
+ test_dns_name_apply_idna_one("föö.", ret, "xn--f-1gaa");
+ test_dns_name_apply_idna_one("föö.bär", ret, "xn--f-1gaa.xn--br-via");
+ test_dns_name_apply_idna_one("föö.bär.", ret, "xn--f-1gaa.xn--br-via");
+ test_dns_name_apply_idna_one("xn--f-1gaa.xn--br-via", ret, "xn--f-1gaa.xn--br-via");
+
+ test_dns_name_apply_idna_one("_443._tcp.fedoraproject.org", ret2,
+ "_443._tcp.fedoraproject.org");
+ test_dns_name_apply_idna_one("_443", ret2, "_443");
+ test_dns_name_apply_idna_one("gateway", ret, "gateway");
+ test_dns_name_apply_idna_one("_gateway", ret2, "_gateway");
+
+ test_dns_name_apply_idna_one("r3---sn-ab5l6ne7.googlevideo.com", ret2,
+ ret2 ? "r3---sn-ab5l6ne7.googlevideo.com" : "");
+}
+
+static void test_dns_name_is_valid_or_address(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(dns_name_is_valid_or_address(NULL) == 0);
+ assert_se(dns_name_is_valid_or_address("") == 0);
+ assert_se(dns_name_is_valid_or_address("foobar") > 0);
+ assert_se(dns_name_is_valid_or_address("foobar.com") > 0);
+ assert_se(dns_name_is_valid_or_address("foobar..com") == 0);
+ assert_se(dns_name_is_valid_or_address("foobar.com.") > 0);
+ assert_se(dns_name_is_valid_or_address("127.0.0.1") > 0);
+ assert_se(dns_name_is_valid_or_address("::") > 0);
+ assert_se(dns_name_is_valid_or_address("::1") > 0);
+}
+
+static void test_dns_name_dot_suffixed(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(dns_name_dot_suffixed("") == 0);
+ assert_se(dns_name_dot_suffixed(".") > 0);
+ assert_se(dns_name_dot_suffixed("foo") == 0);
+ assert_se(dns_name_dot_suffixed("foo.") > 0);
+ assert_se(dns_name_dot_suffixed("foo\\..") > 0);
+ assert_se(dns_name_dot_suffixed("foo\\.") == 0);
+ assert_se(dns_name_dot_suffixed("foo.bar.") > 0);
+ assert_se(dns_name_dot_suffixed("foo.bar\\.\\.\\..") > 0);
+ assert_se(dns_name_dot_suffixed("foo.bar\\.\\.\\.\\.") == 0);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_dns_label_unescape();
+ test_dns_label_unescape_suffix();
+ test_dns_label_escape();
+ test_dns_name_normalize();
+ test_dns_name_equal();
+ test_dns_name_endswith();
+ test_dns_name_startswith();
+ test_dns_name_between();
+ test_dns_name_is_root();
+ test_dns_name_is_single_label();
+ test_dns_name_reverse();
+ test_dns_name_concat();
+ test_dns_name_is_valid();
+ test_dns_name_to_wire_format();
+ test_dns_service_name_is_valid();
+ test_dns_srv_type_is_valid();
+ test_dnssd_srv_type_is_valid();
+ test_dns_service_join();
+ test_dns_service_split();
+ test_dns_name_change_suffix();
+ test_dns_name_suffix();
+ test_dns_name_count_labels();
+ test_dns_name_equal_skip();
+ test_dns_name_compare_func();
+ test_dns_name_common_suffix();
+ test_dns_name_apply_idna();
+ test_dns_name_is_valid_or_address();
+ test_dns_name_dot_suffixed();
+
+ return 0;
+}
diff --git a/src/test/test-ellipsize.c b/src/test/test-ellipsize.c
new file mode 100644
index 0000000..a25c0b5
--- /dev/null
+++ b/src/test/test-ellipsize.c
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "def.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+#include "utf8.h"
+
+static void test_ellipsize_mem_one(const char *s, size_t old_length, size_t new_length) {
+ _cleanup_free_ char *n = NULL;
+ _cleanup_free_ char *t1 = NULL, *t2 = NULL, *t3 = NULL;
+ char buf[LINE_MAX];
+ bool has_wide_chars;
+ size_t max_width;
+
+ n = memdup_suffix0(s, old_length);
+
+ if (!utf8_is_valid(n))
+ /* We don't support invalid sequences… */
+ return;
+
+ /* Report out inputs. We duplicate the data so that cellescape
+ * can properly report truncated multibyte sequences. */
+ log_info("%s \"%s\" old_length=%zu/%zu new_length=%zu", __func__,
+ cellescape(buf, sizeof buf, n),
+ old_length, utf8_console_width(n),
+ new_length);
+
+ /* To keep this test simple, any case with wide chars starts with this glyph */
+ has_wide_chars = startswith(s, "你");
+ max_width = MIN(utf8_console_width(n), new_length);
+
+ t1 = ellipsize_mem(n, old_length, new_length, 30);
+ log_info("30%% → %s utf8_console_width=%zu", t1, utf8_console_width(t1));
+ if (!has_wide_chars)
+ assert_se(utf8_console_width(t1) == max_width);
+ else
+ assert_se(utf8_console_width(t1) <= max_width);
+
+ t2 = ellipsize_mem(n, old_length, new_length, 90);
+ log_info("90%% → %s utf8_console_width=%zu", t2, utf8_console_width(t2));
+ if (!has_wide_chars)
+ assert_se(utf8_console_width(t2) == max_width);
+ else
+ assert_se(utf8_console_width(t2) <= max_width);
+
+ t3 = ellipsize_mem(n, old_length, new_length, 100);
+ log_info("100%% → %s utf8_console_width=%zu", t3, utf8_console_width(t3));
+ if (!has_wide_chars)
+ assert_se(utf8_console_width(t3) == max_width);
+ else
+ assert_se(utf8_console_width(t3) <= max_width);
+
+ if (new_length >= old_length) {
+ assert_se(streq(t1, n));
+ assert_se(streq(t2, n));
+ assert_se(streq(t3, n));
+ }
+}
+
+static void test_ellipsize_mem(void) {
+ const char *s;
+ ssize_t l, k;
+
+ FOREACH_STRING(s,
+ "_XXXXXXXXXXX_", /* ASCII */
+ "_aąęółśćńżźć_", /* two-byte utf-8 */
+ "გამარჯობა", /* multi-byte utf-8 */
+ "你好世界", /* wide characters */
+ "你გą世óoó界") /* a mix */
+
+ for (l = strlen(s); l >= 0; l--)
+ for (k = strlen(s) + 1; k >= 0; k--)
+ test_ellipsize_mem_one(s, l, k);
+}
+
+static void test_ellipsize_one(const char *p) {
+ _cleanup_free_ char *t;
+ t = ellipsize(p, columns(), 70);
+ puts(t);
+ free(t);
+ t = ellipsize(p, columns(), 0);
+ puts(t);
+ free(t);
+ t = ellipsize(p, columns(), 100);
+ puts(t);
+ free(t);
+ t = ellipsize(p, 0, 50);
+ puts(t);
+ free(t);
+ t = ellipsize(p, 1, 50);
+ puts(t);
+ free(t);
+ t = ellipsize(p, 2, 50);
+ puts(t);
+ free(t);
+ t = ellipsize(p, 3, 50);
+ puts(t);
+ free(t);
+ t = ellipsize(p, 4, 50);
+ puts(t);
+ free(t);
+ t = ellipsize(p, 5, 50);
+ puts(t);
+}
+
+static void test_ellipsize(void) {
+ test_ellipsize_one(DIGITS LETTERS DIGITS LETTERS);
+ test_ellipsize_one("한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어");
+ test_ellipsize_one("-日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国");
+ test_ellipsize_one("中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国-中国中国中国中国中国中国中国中国中国中国中国中国中国");
+ test_ellipsize_one("sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd");
+ test_ellipsize_one("🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮");
+ test_ellipsize_one("Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.");
+ test_ellipsize_one("shórt");
+}
+
+int main(int argc, char *argv[]) {
+ test_ellipsize_mem();
+ test_ellipsize();
+
+ return 0;
+}
diff --git a/src/test/test-emergency-action.c b/src/test/test-emergency-action.c
new file mode 100644
index 0000000..88214aa
--- /dev/null
+++ b/src/test/test-emergency-action.c
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "emergency-action.h"
+#include "tests.h"
+
+static void test_parse_emergency_action(void) {
+ EmergencyAction x;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(parse_emergency_action("none", false, &x) == 0);
+ assert_se(x == EMERGENCY_ACTION_NONE);
+ assert_se(parse_emergency_action("reboot", false, &x) == -EOPNOTSUPP);
+ assert_se(parse_emergency_action("reboot-force", false, &x) == -EOPNOTSUPP);
+ assert_se(parse_emergency_action("reboot-immediate", false, &x) == -EOPNOTSUPP);
+ assert_se(parse_emergency_action("poweroff", false, &x) == -EOPNOTSUPP);
+ assert_se(parse_emergency_action("poweroff-force", false, &x) == -EOPNOTSUPP);
+ assert_se(parse_emergency_action("poweroff-immediate", false, &x) == -EOPNOTSUPP);
+ assert_se(x == EMERGENCY_ACTION_NONE);
+ assert_se(parse_emergency_action("exit", false, &x) == 0);
+ assert_se(x == EMERGENCY_ACTION_EXIT);
+ assert_se(parse_emergency_action("exit-force", false, &x) == 0);
+ assert_se(x == EMERGENCY_ACTION_EXIT_FORCE);
+ assert_se(parse_emergency_action("exit-forcee", false, &x) == -EINVAL);
+
+ assert_se(parse_emergency_action("none", true, &x) == 0);
+ assert_se(x == EMERGENCY_ACTION_NONE);
+ assert_se(parse_emergency_action("reboot", true, &x) == 0);
+ assert_se(x == EMERGENCY_ACTION_REBOOT);
+ assert_se(parse_emergency_action("reboot-force", true, &x) == 0);
+ assert_se(x == EMERGENCY_ACTION_REBOOT_FORCE);
+ assert_se(parse_emergency_action("reboot-immediate", true, &x) == 0);
+ assert_se(x == EMERGENCY_ACTION_REBOOT_IMMEDIATE);
+ assert_se(parse_emergency_action("poweroff", true, &x) == 0);
+ assert_se(x == EMERGENCY_ACTION_POWEROFF);
+ assert_se(parse_emergency_action("poweroff-force", true, &x) == 0);
+ assert_se(x == EMERGENCY_ACTION_POWEROFF_FORCE);
+ assert_se(parse_emergency_action("poweroff-immediate", true, &x) == 0);
+ assert_se(parse_emergency_action("exit", true, &x) == 0);
+ assert_se(parse_emergency_action("exit-force", true, &x) == 0);
+ assert_se(parse_emergency_action("exit-forcee", true, &x) == -EINVAL);
+ assert_se(x == EMERGENCY_ACTION_EXIT_FORCE);
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_INFO);
+
+ test_parse_emergency_action();
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-engine.c b/src/test/test-engine.c
new file mode 100644
index 0000000..cd7cfd9
--- /dev/null
+++ b/src/test/test-engine.c
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+
+#include "bus-util.h"
+#include "manager.h"
+#include "rm-rf.h"
+#include "strv.h"
+#include "tests.h"
+#include "service.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error err = SD_BUS_ERROR_NULL;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ Unit *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL, *g = NULL,
+ *h = NULL, *i = NULL, *a_conj = NULL, *unit_with_multiple_dashes = NULL;
+ Job *j;
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ r = enter_cgroup_subroot(NULL);
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ /* prepare the test */
+ _cleanup_free_ char *unit_dir = NULL;
+ assert_se(get_testdata_dir("units", &unit_dir) >= 0);
+ assert_se(set_unit_path(unit_dir) >= 0);
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+
+ r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m);
+ if (manager_errno_skip_test(r))
+ return log_tests_skipped_errno(r, "manager_new");
+ assert_se(r >= 0);
+ assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+ printf("Load1:\n");
+ assert_se(manager_load_startable_unit_or_warn(m, "a.service", NULL, &a) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "b.service", NULL, &b) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "c.service", NULL, &c) >= 0);
+ manager_dump_units(m, stdout, "\t");
+
+ printf("Test1: (Trivial)\n");
+ r = manager_add_job(m, JOB_START, c, JOB_REPLACE, NULL, &err, &j);
+ if (sd_bus_error_is_set(&err))
+ log_error("error: %s: %s", err.name, err.message);
+ assert_se(r == 0);
+ manager_dump_jobs(m, stdout, "\t");
+
+ printf("Load2:\n");
+ manager_clear_jobs(m);
+ assert_se(manager_load_startable_unit_or_warn(m, "d.service", NULL, &d) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "e.service", NULL, &e) >= 0);
+ manager_dump_units(m, stdout, "\t");
+
+ printf("Test2: (Cyclic Order, Unfixable)\n");
+ assert_se(manager_add_job(m, JOB_START, d, JOB_REPLACE, NULL, NULL, &j) == -EDEADLK);
+ manager_dump_jobs(m, stdout, "\t");
+
+ printf("Test3: (Cyclic Order, Fixable, Garbage Collector)\n");
+ assert_se(manager_add_job(m, JOB_START, e, JOB_REPLACE, NULL, NULL, &j) == 0);
+ manager_dump_jobs(m, stdout, "\t");
+
+ printf("Test4: (Identical transaction)\n");
+ assert_se(manager_add_job(m, JOB_START, e, JOB_FAIL, NULL, NULL, &j) == 0);
+ manager_dump_jobs(m, stdout, "\t");
+
+ printf("Load3:\n");
+ assert_se(manager_load_startable_unit_or_warn(m, "g.service", NULL, &g) >= 0);
+ manager_dump_units(m, stdout, "\t");
+
+ printf("Test5: (Colliding transaction, fail)\n");
+ assert_se(manager_add_job(m, JOB_START, g, JOB_FAIL, NULL, NULL, &j) == -EDEADLK);
+
+ printf("Test6: (Colliding transaction, replace)\n");
+ assert_se(manager_add_job(m, JOB_START, g, JOB_REPLACE, NULL, NULL, &j) == 0);
+ manager_dump_jobs(m, stdout, "\t");
+
+ printf("Test7: (Unmergeable job type, fail)\n");
+ assert_se(manager_add_job(m, JOB_STOP, g, JOB_FAIL, NULL, NULL, &j) == -EDEADLK);
+
+ printf("Test8: (Mergeable job type, fail)\n");
+ assert_se(manager_add_job(m, JOB_RESTART, g, JOB_FAIL, NULL, NULL, &j) == 0);
+ manager_dump_jobs(m, stdout, "\t");
+
+ printf("Test9: (Unmergeable job type, replace)\n");
+ assert_se(manager_add_job(m, JOB_STOP, g, JOB_REPLACE, NULL, NULL, &j) == 0);
+ manager_dump_jobs(m, stdout, "\t");
+
+ printf("Load4:\n");
+ assert_se(manager_load_startable_unit_or_warn(m, "h.service", NULL, &h) >= 0);
+ manager_dump_units(m, stdout, "\t");
+
+ printf("Test10: (Unmergeable job type of auxiliary job, fail)\n");
+ assert_se(manager_add_job(m, JOB_START, h, JOB_FAIL, NULL, NULL, &j) == 0);
+ manager_dump_jobs(m, stdout, "\t");
+
+ printf("Load5:\n");
+ manager_clear_jobs(m);
+ assert_se(manager_load_startable_unit_or_warn(m, "i.service", NULL, &i) >= 0);
+ SERVICE(a)->state = SERVICE_RUNNING;
+ SERVICE(d)->state = SERVICE_RUNNING;
+ manager_dump_units(m, stdout, "\t");
+
+ printf("Test11: (Start/stop job ordering, execution cycle)\n");
+ assert_se(manager_add_job(m, JOB_START, i, JOB_FAIL, NULL, NULL, &j) == 0);
+ assert_se(unit_has_job_type(a, JOB_STOP));
+ assert_se(unit_has_job_type(d, JOB_STOP));
+ assert_se(unit_has_job_type(b, JOB_START));
+ manager_dump_jobs(m, stdout, "\t");
+
+ printf("Load6:\n");
+ manager_clear_jobs(m);
+ assert_se(manager_load_startable_unit_or_warn(m, "a-conj.service", NULL, &a_conj) >= 0);
+ SERVICE(a)->state = SERVICE_DEAD;
+ manager_dump_units(m, stdout, "\t");
+
+ printf("Test12: (Trivial cycle, Unfixable)\n");
+ assert_se(manager_add_job(m, JOB_START, a_conj, JOB_REPLACE, NULL, NULL, &j) == -EDEADLK);
+ manager_dump_jobs(m, stdout, "\t");
+
+ assert_se(!hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], b));
+ assert_se(!hashmap_get(b->dependencies[UNIT_RELOAD_PROPAGATED_FROM], a));
+ assert_se(!hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], c));
+ assert_se(!hashmap_get(c->dependencies[UNIT_RELOAD_PROPAGATED_FROM], a));
+
+ assert_se(unit_add_dependency(a, UNIT_PROPAGATES_RELOAD_TO, b, true, UNIT_DEPENDENCY_UDEV) == 0);
+ assert_se(unit_add_dependency(a, UNIT_PROPAGATES_RELOAD_TO, c, true, UNIT_DEPENDENCY_PROC_SWAP) == 0);
+
+ assert_se(hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], b));
+ assert_se(hashmap_get(b->dependencies[UNIT_RELOAD_PROPAGATED_FROM], a));
+ assert_se(hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], c));
+ assert_se(hashmap_get(c->dependencies[UNIT_RELOAD_PROPAGATED_FROM], a));
+
+ unit_remove_dependencies(a, UNIT_DEPENDENCY_UDEV);
+
+ assert_se(!hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], b));
+ assert_se(!hashmap_get(b->dependencies[UNIT_RELOAD_PROPAGATED_FROM], a));
+ assert_se(hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], c));
+ assert_se(hashmap_get(c->dependencies[UNIT_RELOAD_PROPAGATED_FROM], a));
+
+ unit_remove_dependencies(a, UNIT_DEPENDENCY_PROC_SWAP);
+
+ assert_se(!hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], b));
+ assert_se(!hashmap_get(b->dependencies[UNIT_RELOAD_PROPAGATED_FROM], a));
+ assert_se(!hashmap_get(a->dependencies[UNIT_PROPAGATES_RELOAD_TO], c));
+ assert_se(!hashmap_get(c->dependencies[UNIT_RELOAD_PROPAGATED_FROM], a));
+
+ assert_se(manager_load_unit(m, "unit-with-multiple-dashes.service", NULL, NULL, &unit_with_multiple_dashes) >= 0);
+
+ assert_se(strv_equal(unit_with_multiple_dashes->documentation, STRV_MAKE("man:test", "man:override2", "man:override3")));
+ assert_se(streq_ptr(unit_with_multiple_dashes->description, "override4"));
+
+ return 0;
+}
diff --git a/src/test/test-env-file.c b/src/test/test-env-file.c
new file mode 100644
index 0000000..a3acde1
--- /dev/null
+++ b/src/test/test-env-file.c
@@ -0,0 +1,188 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "env-file.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "strv.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+
+#define env_file_1 \
+ "a=a\n" \
+ "b=b\\\n" \
+ "c\n" \
+ "d=d\\\n" \
+ "e\\\n" \
+ "f\n" \
+ "g=g\\ \n" \
+ "h=h\n" \
+ "i=i\\"
+
+#define env_file_2 \
+ "a=a\\\n"
+
+#define env_file_3 \
+ "#SPAMD_ARGS=\"-d --socketpath=/var/lib/bulwark/spamd \\\n" \
+ "#--nouser-config \\\n" \
+ "normal=line"
+
+#define env_file_4 \
+ "# Generated\n" \
+ "\n" \
+ "HWMON_MODULES=\"coretemp f71882fg\"\n" \
+ "\n" \
+ "# For compatibility reasons\n" \
+ "\n" \
+ "MODULE_0=coretemp\n" \
+ "MODULE_1=f71882fg"
+
+#define env_file_5 \
+ "a=\n" \
+ "b="
+
+static void test_load_env_file_1(void) {
+ _cleanup_strv_free_ char **data = NULL;
+ int r;
+
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-load-env-file.XXXXXX";
+ _cleanup_close_ int fd;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(write(fd, env_file_1, strlen(env_file_1)) == strlen(env_file_1));
+
+ r = load_env_file(NULL, name, &data);
+ assert_se(r == 0);
+ assert_se(streq(data[0], "a=a"));
+ assert_se(streq(data[1], "b=bc"));
+ assert_se(streq(data[2], "d=def"));
+ assert_se(streq(data[3], "g=g "));
+ assert_se(streq(data[4], "h=h"));
+ assert_se(streq(data[5], "i=i"));
+ assert_se(data[6] == NULL);
+}
+
+static void test_load_env_file_2(void) {
+ _cleanup_strv_free_ char **data = NULL;
+ int r;
+
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-load-env-file.XXXXXX";
+ _cleanup_close_ int fd;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(write(fd, env_file_2, strlen(env_file_2)) == strlen(env_file_2));
+
+ r = load_env_file(NULL, name, &data);
+ assert_se(r == 0);
+ assert_se(streq(data[0], "a=a"));
+ assert_se(data[1] == NULL);
+}
+
+static void test_load_env_file_3(void) {
+ _cleanup_strv_free_ char **data = NULL;
+ int r;
+
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-load-env-file.XXXXXX";
+ _cleanup_close_ int fd;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(write(fd, env_file_3, strlen(env_file_3)) == strlen(env_file_3));
+
+ r = load_env_file(NULL, name, &data);
+ assert_se(r == 0);
+ assert_se(data == NULL);
+}
+
+static void test_load_env_file_4(void) {
+ _cleanup_strv_free_ char **data = NULL;
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-load-env-file.XXXXXX";
+ _cleanup_close_ int fd;
+ int r;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(write(fd, env_file_4, strlen(env_file_4)) == strlen(env_file_4));
+
+ r = load_env_file(NULL, name, &data);
+ assert_se(r == 0);
+ assert_se(streq(data[0], "HWMON_MODULES=coretemp f71882fg"));
+ assert_se(streq(data[1], "MODULE_0=coretemp"));
+ assert_se(streq(data[2], "MODULE_1=f71882fg"));
+ assert_se(data[3] == NULL);
+}
+
+static void test_load_env_file_5(void) {
+ _cleanup_strv_free_ char **data = NULL;
+ int r;
+
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-load-env-file.XXXXXX";
+ _cleanup_close_ int fd;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(write(fd, env_file_5, strlen(env_file_5)) == strlen(env_file_5));
+
+ r = load_env_file(NULL, name, &data);
+ assert_se(r == 0);
+ assert_se(streq(data[0], "a="));
+ assert_se(streq(data[1], "b="));
+ assert_se(data[2] == NULL);
+}
+
+static void test_write_and_load_env_file(void) {
+ const char *v;
+
+ /* Make sure that our writer, parser and the shell agree on what our env var files mean */
+
+ FOREACH_STRING(v,
+ "obbardc-laptop",
+ "obbardc\\-laptop",
+ "obbardc-lap\\top",
+ "obbardc-lap\\top",
+ "obbardc-lap\\\\top",
+ "double\"quote",
+ "single\'quote",
+ "dollar$dollar",
+ "newline\nnewline") {
+ _cleanup_(unlink_and_freep) char *p = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ _cleanup_free_ char *j = NULL, *w = NULL, *cmd = NULL, *from_shell = NULL;
+ _cleanup_pclose_ FILE *f = NULL;
+ size_t sz;
+
+ assert_se(tempfn_random_child(NULL, NULL, &p) >= 0);
+
+ assert_se(j = strjoin("TEST=", v));
+ assert_se(write_env_file(p, STRV_MAKE(j)) >= 0);
+
+ assert_se(cmd = strjoin(". ", p, " && /bin/echo -n \"$TEST\""));
+ assert_se(f = popen(cmd, "re"));
+ assert_se(read_full_stream(f, &from_shell, &sz) >= 0);
+ assert_se(sz == strlen(v));
+ assert_se(streq(from_shell, v));
+
+ assert_se(load_env_file(NULL, p, &l) >= 0);
+ assert_se(strv_equal(l, STRV_MAKE(j)));
+
+ assert_se(parse_env_file(NULL, p, "TEST", &w) >= 0);
+ assert_se(streq_ptr(w, v));
+ }
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_INFO);
+
+ test_load_env_file_1();
+ test_load_env_file_2();
+ test_load_env_file_3();
+ test_load_env_file_4();
+ test_load_env_file_5();
+
+ test_write_and_load_env_file();
+
+ return 0;
+}
diff --git a/src/test/test-env-util.c b/src/test/test-env-util.c
new file mode 100644
index 0000000..f77b1cd
--- /dev/null
+++ b/src/test/test-env-util.c
@@ -0,0 +1,355 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "serialize.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+static void test_strv_env_delete(void) {
+ log_info("/* %s */", __func__);
+
+ _cleanup_strv_free_ char **a = NULL, **b = NULL, **c = NULL, **d = NULL;
+
+ a = strv_new("FOO=BAR", "WALDO=WALDO", "WALDO=", "PIEP", "SCHLUMPF=SMURF");
+ assert_se(a);
+
+ b = strv_new("PIEP", "FOO");
+ assert_se(b);
+
+ c = strv_new("SCHLUMPF");
+ assert_se(c);
+
+ d = strv_env_delete(a, 2, b, c);
+ assert_se(d);
+
+ assert_se(streq(d[0], "WALDO=WALDO"));
+ assert_se(streq(d[1], "WALDO="));
+ assert_se(strv_length(d) == 2);
+}
+
+static void test_strv_env_get(void) {
+ log_info("/* %s */", __func__);
+
+ char **l = STRV_MAKE("ONE_OR_TWO=1", "THREE=3", "ONE_OR_TWO=2", "FOUR=4");
+
+ assert_se(streq(strv_env_get(l, "ONE_OR_TWO"), "2"));
+ assert_se(streq(strv_env_get(l, "THREE"), "3"));
+ assert_se(streq(strv_env_get(l, "FOUR"), "4"));
+}
+
+static void test_strv_env_unset(void) {
+ log_info("/* %s */", __func__);
+
+ _cleanup_strv_free_ char **l = NULL;
+
+ l = strv_new("PIEP", "SCHLUMPF=SMURFF", "NANANANA=YES");
+ assert_se(l);
+
+ assert_se(strv_env_unset(l, "SCHLUMPF") == l);
+
+ assert_se(streq(l[0], "PIEP"));
+ assert_se(streq(l[1], "NANANANA=YES"));
+ assert_se(strv_length(l) == 2);
+}
+
+static void test_strv_env_set(void) {
+ log_info("/* %s */", __func__);
+
+ _cleanup_strv_free_ char **l = NULL, **r = NULL;
+
+ l = strv_new("PIEP", "SCHLUMPF=SMURFF", "NANANANA=YES");
+ assert_se(l);
+
+ r = strv_env_set(l, "WALDO=WALDO");
+ assert_se(r);
+
+ assert_se(streq(r[0], "PIEP"));
+ assert_se(streq(r[1], "SCHLUMPF=SMURFF"));
+ assert_se(streq(r[2], "NANANANA=YES"));
+ assert_se(streq(r[3], "WALDO=WALDO"));
+ assert_se(strv_length(r) == 4);
+}
+
+static void test_strv_env_merge(void) {
+ log_info("/* %s */", __func__);
+
+ _cleanup_strv_free_ char **a = NULL, **b = NULL, **r = NULL;
+
+ a = strv_new("FOO=BAR", "WALDO=WALDO", "WALDO=", "PIEP", "SCHLUMPF=SMURF");
+ assert_se(a);
+
+ b = strv_new("FOO=KKK", "FOO=", "PIEP=", "SCHLUMPF=SMURFF", "NANANANA=YES");
+ assert_se(b);
+
+ r = strv_env_merge(2, a, b);
+ assert_se(r);
+ assert_se(streq(r[0], "FOO="));
+ assert_se(streq(r[1], "WALDO="));
+ assert_se(streq(r[2], "PIEP"));
+ assert_se(streq(r[3], "SCHLUMPF=SMURFF"));
+ assert_se(streq(r[4], "PIEP="));
+ assert_se(streq(r[5], "NANANANA=YES"));
+ assert_se(strv_length(r) == 6);
+
+ assert_se(strv_env_clean(r) == r);
+ assert_se(streq(r[0], "FOO="));
+ assert_se(streq(r[1], "WALDO="));
+ assert_se(streq(r[2], "SCHLUMPF=SMURFF"));
+ assert_se(streq(r[3], "PIEP="));
+ assert_se(streq(r[4], "NANANANA=YES"));
+ assert_se(strv_length(r) == 5);
+}
+
+static void test_env_strv_get_n(void) {
+ log_info("/* %s */", __func__);
+
+ const char *_env[] = {
+ "FOO=NO NO NO",
+ "FOO=BAR BAR",
+ "BAR=waldo",
+ "PATH=unset",
+ NULL
+ };
+ char **env = (char**) _env;
+
+ assert_se(streq(strv_env_get_n(env, "FOO__", 3, 0), "BAR BAR"));
+ assert_se(streq(strv_env_get_n(env, "FOO__", 3, REPLACE_ENV_USE_ENVIRONMENT), "BAR BAR"));
+ assert_se(streq(strv_env_get_n(env, "FOO", 3, 0), "BAR BAR"));
+ assert_se(streq(strv_env_get_n(env, "FOO", 3, REPLACE_ENV_USE_ENVIRONMENT), "BAR BAR"));
+
+ assert_se(streq(strv_env_get_n(env, "PATH__", 4, 0), "unset"));
+ assert_se(streq(strv_env_get_n(env, "PATH", 4, 0), "unset"));
+ assert_se(streq(strv_env_get_n(env, "PATH__", 4, REPLACE_ENV_USE_ENVIRONMENT), "unset"));
+ assert_se(streq(strv_env_get_n(env, "PATH", 4, REPLACE_ENV_USE_ENVIRONMENT), "unset"));
+
+ env[3] = NULL; /* kill our $PATH */
+
+ assert_se(!strv_env_get_n(env, "PATH__", 4, 0));
+ assert_se(!strv_env_get_n(env, "PATH", 4, 0));
+ assert_se(streq_ptr(strv_env_get_n(env, "PATH__", 4, REPLACE_ENV_USE_ENVIRONMENT),
+ getenv("PATH")));
+ assert_se(streq_ptr(strv_env_get_n(env, "PATH", 4, REPLACE_ENV_USE_ENVIRONMENT),
+ getenv("PATH")));
+}
+
+static void test_replace_env(bool braceless) {
+ log_info("/* %s(braceless=%s) */", __func__, yes_no(braceless));
+
+ const char *env[] = {
+ "FOO=BAR BAR",
+ "BAR=waldo",
+ NULL
+ };
+ _cleanup_free_ char *t = NULL, *s = NULL, *q = NULL, *r = NULL, *p = NULL;
+ unsigned flags = REPLACE_ENV_ALLOW_BRACELESS*braceless;
+
+ t = replace_env("FOO=$FOO=${FOO}", (char**) env, flags);
+ assert_se(streq(t, braceless ? "FOO=BAR BAR=BAR BAR" : "FOO=$FOO=BAR BAR"));
+
+ s = replace_env("BAR=$BAR=${BAR}", (char**) env, flags);
+ assert_se(streq(s, braceless ? "BAR=waldo=waldo" : "BAR=$BAR=waldo"));
+
+ q = replace_env("BARBAR=$BARBAR=${BARBAR}", (char**) env, flags);
+ assert_se(streq(q, braceless ? "BARBAR==" : "BARBAR=$BARBAR="));
+
+ r = replace_env("BAR=$BAR$BAR${BAR}${BAR}", (char**) env, flags);
+ assert_se(streq(r, braceless ? "BAR=waldowaldowaldowaldo" : "BAR=$BAR$BARwaldowaldo"));
+
+ p = replace_env("${BAR}$BAR$BAR", (char**) env, flags);
+ assert_se(streq(p, braceless ? "waldowaldowaldo" : "waldo$BAR$BAR"));
+}
+
+static void test_replace_env2(bool extended) {
+ log_info("/* %s(extended=%s) */", __func__, yes_no(extended));
+
+ const char *env[] = {
+ "FOO=foo",
+ "BAR=bar",
+ NULL
+ };
+ _cleanup_free_ char *t = NULL, *s = NULL, *q = NULL, *r = NULL, *p = NULL, *x = NULL;
+ unsigned flags = REPLACE_ENV_ALLOW_EXTENDED*extended;
+
+ t = replace_env("FOO=${FOO:-${BAR}}", (char**) env, flags);
+ assert_se(streq(t, extended ? "FOO=foo" : "FOO=${FOO:-bar}"));
+
+ s = replace_env("BAR=${XXX:-${BAR}}", (char**) env, flags);
+ assert_se(streq(s, extended ? "BAR=bar" : "BAR=${XXX:-bar}"));
+
+ q = replace_env("XXX=${XXX:+${BAR}}", (char**) env, flags);
+ assert_se(streq(q, extended ? "XXX=" : "XXX=${XXX:+bar}"));
+
+ r = replace_env("FOO=${FOO:+${BAR}}", (char**) env, flags);
+ assert_se(streq(r, extended ? "FOO=bar" : "FOO=${FOO:+bar}"));
+
+ p = replace_env("FOO=${FOO:-${BAR}post}", (char**) env, flags);
+ assert_se(streq(p, extended ? "FOO=foo" : "FOO=${FOO:-barpost}"));
+
+ x = replace_env("XXX=${XXX:+${BAR}post}", (char**) env, flags);
+ assert_se(streq(x, extended ? "XXX=" : "XXX=${XXX:+barpost}"));
+}
+
+static void test_replace_env_argv(void) {
+ log_info("/* %s */", __func__);
+
+ const char *env[] = {
+ "FOO=BAR BAR",
+ "BAR=waldo",
+ NULL
+ };
+ const char *line[] = {
+ "FOO$FOO",
+ "FOO$FOOFOO",
+ "FOO${FOO}$FOO",
+ "FOO${FOO}",
+ "${FOO}",
+ "$FOO",
+ "$FOO$FOO",
+ "${FOO}${BAR}",
+ "${FOO",
+ "FOO$$${FOO}",
+ "$$FOO${FOO}",
+ "${FOO:-${BAR}}",
+ "${QUUX:-${FOO}}",
+ "${FOO:+${BAR}}",
+ "${QUUX:+${BAR}}",
+ "${FOO:+|${BAR}|}}",
+ "${FOO:+|${BAR}{|}",
+ NULL
+ };
+ _cleanup_strv_free_ char **r = NULL;
+
+ r = replace_env_argv((char**) line, (char**) env);
+ assert_se(r);
+ assert_se(streq(r[0], "FOO$FOO"));
+ assert_se(streq(r[1], "FOO$FOOFOO"));
+ assert_se(streq(r[2], "FOOBAR BAR$FOO"));
+ assert_se(streq(r[3], "FOOBAR BAR"));
+ assert_se(streq(r[4], "BAR BAR"));
+ assert_se(streq(r[5], "BAR"));
+ assert_se(streq(r[6], "BAR"));
+ assert_se(streq(r[7], "BAR BARwaldo"));
+ assert_se(streq(r[8], "${FOO"));
+ assert_se(streq(r[9], "FOO$BAR BAR"));
+ assert_se(streq(r[10], "$FOOBAR BAR"));
+ assert_se(streq(r[11], "${FOO:-waldo}"));
+ assert_se(streq(r[12], "${QUUX:-BAR BAR}"));
+ assert_se(streq(r[13], "${FOO:+waldo}"));
+ assert_se(streq(r[14], "${QUUX:+waldo}"));
+ assert_se(streq(r[15], "${FOO:+|waldo|}}"));
+ assert_se(streq(r[16], "${FOO:+|waldo{|}"));
+ assert_se(strv_length(r) == 17);
+}
+
+static void test_env_clean(void) {
+ log_info("/* %s */", __func__);
+
+ _cleanup_strv_free_ char **e = strv_new("FOOBAR=WALDO",
+ "FOOBAR=WALDO",
+ "FOOBAR",
+ "F",
+ "X=",
+ "F=F",
+ "=",
+ "=F",
+ "",
+ "0000=000",
+ "äöüß=abcd",
+ "abcd=äöüß",
+ "xyz\n=xyz",
+ "xyz=xyz\n",
+ "another=one",
+ "another=final one",
+ "CRLF=\r\n",
+ "LESS_TERMCAP_mb=\x1b[01;31m",
+ "BASH_FUNC_foo%%=() { echo foo\n}");
+ assert_se(e);
+ assert_se(!strv_env_is_valid(e));
+ assert_se(strv_env_clean(e) == e);
+ assert_se(strv_env_is_valid(e));
+
+ assert_se(streq(e[0], "FOOBAR=WALDO"));
+ assert_se(streq(e[1], "X="));
+ assert_se(streq(e[2], "F=F"));
+ assert_se(streq(e[3], "abcd=äöüß"));
+ assert_se(streq(e[4], "xyz=xyz\n"));
+ assert_se(streq(e[5], "another=final one"));
+ assert_se(streq(e[6], "CRLF=\r\n"));
+ assert_se(streq(e[7], "LESS_TERMCAP_mb=\x1b[01;31m"));
+ assert_se(e[8] == NULL);
+}
+
+static void test_env_name_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(env_name_is_valid("test"));
+
+ assert_se(!env_name_is_valid(NULL));
+ assert_se(!env_name_is_valid(""));
+ assert_se(!env_name_is_valid("xxx\a"));
+ assert_se(!env_name_is_valid("xxx\007b"));
+ assert_se(!env_name_is_valid("\007\009"));
+ assert_se(!env_name_is_valid("5_starting_with_a_number_is_wrong"));
+ assert_se(!env_name_is_valid("#¤%&?_only_numbers_letters_and_underscore_allowed"));
+}
+
+static void test_env_value_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(env_value_is_valid(""));
+ assert_se(env_value_is_valid("głąb kapuściany"));
+ assert_se(env_value_is_valid("printf \"\\x1b]0;<mock-chroot>\\x07<mock-chroot>\""));
+ assert_se(env_value_is_valid("tab\tcharacter"));
+ assert_se(env_value_is_valid("new\nline"));
+ assert_se(env_value_is_valid("Show this?\rNope. Show that!"));
+ assert_se(env_value_is_valid("new DOS\r\nline"));
+
+ assert_se(!env_value_is_valid("\xc5")); /* A truncated utf-8-encoded "ł".
+ * We currently disallow that. */
+}
+
+static void test_env_assignment_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(env_assignment_is_valid("a="));
+ assert_se(env_assignment_is_valid("b=głąb kapuściany"));
+ assert_se(env_assignment_is_valid("c=\\007\\009\\011"));
+ assert_se(env_assignment_is_valid("e=printf \"\\x1b]0;<mock-chroot>\\x07<mock-chroot>\""));
+ assert_se(env_assignment_is_valid("f=tab\tcharacter"));
+ assert_se(env_assignment_is_valid("g=new\nline"));
+
+ assert_se(!env_assignment_is_valid("="));
+ assert_se(!env_assignment_is_valid("a b="));
+ assert_se(!env_assignment_is_valid("a ="));
+ assert_se(!env_assignment_is_valid(" b="));
+ /* no dots or dashes: http://tldp.org/LDP/abs/html/gotchas.html */
+ assert_se(!env_assignment_is_valid("a.b="));
+ assert_se(!env_assignment_is_valid("a-b="));
+ assert_se(!env_assignment_is_valid("\007=głąb kapuściany"));
+ assert_se(!env_assignment_is_valid("c\009=\007\009\011"));
+ assert_se(!env_assignment_is_valid("głąb=printf \"\x1b]0;<mock-chroot>\x07<mock-chroot>\""));
+}
+
+int main(int argc, char *argv[]) {
+ test_strv_env_delete();
+ test_strv_env_get();
+ test_strv_env_unset();
+ test_strv_env_set();
+ test_strv_env_merge();
+ test_env_strv_get_n();
+ test_replace_env(false);
+ test_replace_env(true);
+ test_replace_env2(false);
+ test_replace_env2(true);
+ test_replace_env_argv();
+ test_env_clean();
+ test_env_name_is_valid();
+ test_env_value_is_valid();
+ test_env_assignment_is_valid();
+
+ return 0;
+}
diff --git a/src/test/test-escape.c b/src/test/test-escape.c
new file mode 100644
index 0000000..3e410ca
--- /dev/null
+++ b/src/test/test-escape.c
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "macro.h"
+#include "tests.h"
+
+static void test_cescape(void) {
+ _cleanup_free_ char *t;
+
+ assert_se(t = cescape("abc\\\"\b\f\n\r\t\v\a\003\177\234\313"));
+ assert_se(streq(t, "abc\\\\\\\"\\b\\f\\n\\r\\t\\v\\a\\003\\177\\234\\313"));
+}
+
+static void test_xescape(void) {
+ _cleanup_free_ char *t;
+
+ assert_se(t = xescape("abc\\\"\b\f\n\r\t\v\a\003\177\234\313", ""));
+ assert_se(streq(t, "abc\\x5c\"\\x08\\x0c\\x0a\\x0d\\x09\\x0b\\x07\\x03\\x7f\\x9c\\xcb"));
+}
+
+static void test_xescape_full(bool eight_bits) {
+ const char* escaped = !eight_bits ?
+ "a\\x62c\\x5c\"\\x08\\x0c\\x0a\\x0d\\x09\\x0b\\x07\\x03\\x7f\\x9c\\xcb" :
+ "a\\x62c\\x5c\"\\x08\\x0c\\x0a\\x0d\\x09\\x0b\\x07\\x03\177\234\313";
+ const unsigned full_fit = !eight_bits ? 55 : 46;
+
+ for (unsigned i = 0; i < 60; i++) {
+ _cleanup_free_ char *t;
+
+ assert_se(t = xescape_full("abc\\\"\b\f\n\r\t\v\a\003\177\234\313", "b", i, eight_bits));
+
+ log_info("%02d: %s", i, t);
+
+ if (i >= full_fit)
+ assert_se(streq(t, escaped));
+ else if (i >= 3) {
+ /* We need up to four columns, so up to three three columns may be wasted */
+ assert_se(strlen(t) == i || strlen(t) == i - 1 || strlen(t) == i - 2 || strlen(t) == i - 3);
+ assert_se(strneq(t, escaped, i - 3) || strneq(t, escaped, i - 4) ||
+ strneq(t, escaped, i - 5) || strneq(t, escaped, i - 6));
+ assert_se(endswith(t, "..."));
+ } else {
+ assert_se(strlen(t) == i);
+ assert_se(strneq(t, "...", i));
+ }
+ }
+}
+
+static void test_cunescape(void) {
+ _cleanup_free_ char *unescaped;
+
+ assert_se(cunescape("abc\\\\\\\"\\b\\f\\a\\n\\r\\t\\v\\003\\177\\234\\313\\000\\x00", 0, &unescaped) < 0);
+ assert_se(cunescape("abc\\\\\\\"\\b\\f\\a\\n\\r\\t\\v\\003\\177\\234\\313\\000\\x00", UNESCAPE_RELAX, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, "abc\\\"\b\f\a\n\r\t\v\003\177\234\313\\000\\x00"));
+ unescaped = mfree(unescaped);
+
+ /* incomplete sequences */
+ assert_se(cunescape("\\x0", 0, &unescaped) < 0);
+ assert_se(cunescape("\\x0", UNESCAPE_RELAX, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, "\\x0"));
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("\\x", 0, &unescaped) < 0);
+ assert_se(cunescape("\\x", UNESCAPE_RELAX, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, "\\x"));
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("\\", 0, &unescaped) < 0);
+ assert_se(cunescape("\\", UNESCAPE_RELAX, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, "\\"));
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("\\11", 0, &unescaped) < 0);
+ assert_se(cunescape("\\11", UNESCAPE_RELAX, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, "\\11"));
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("\\1", 0, &unescaped) < 0);
+ assert_se(cunescape("\\1", UNESCAPE_RELAX, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, "\\1"));
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("\\u0000", 0, &unescaped) < 0);
+ assert_se(cunescape("\\u00DF\\U000000df\\u03a0\\U00000041", UNESCAPE_RELAX, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, "ßßΠA"));
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("\\073", 0, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, ";"));
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("A=A\\\\x0aB", 0, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, "A=A\\x0aB"));
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("A=A\\\\x0aB", UNESCAPE_RELAX, &unescaped) >= 0);
+ assert_se(streq_ptr(unescaped, "A=A\\x0aB"));
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("\\x00\\x00\\x00", UNESCAPE_ACCEPT_NUL, &unescaped) == 3);
+ assert_se(memcmp(unescaped, "\0\0\0", 3) == 0);
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("\\u0000\\u0000\\u0000", UNESCAPE_ACCEPT_NUL, &unescaped) == 3);
+ assert_se(memcmp(unescaped, "\0\0\0", 3) == 0);
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("\\U00000000\\U00000000\\U00000000", UNESCAPE_ACCEPT_NUL, &unescaped) == 3);
+ assert_se(memcmp(unescaped, "\0\0\0", 3) == 0);
+ unescaped = mfree(unescaped);
+
+ assert_se(cunescape("\\000\\000\\000", UNESCAPE_ACCEPT_NUL, &unescaped) == 3);
+ assert_se(memcmp(unescaped, "\0\0\0", 3) == 0);
+}
+
+static void test_shell_escape_one(const char *s, const char *bad, const char *expected) {
+ _cleanup_free_ char *r;
+
+ assert_se(r = shell_escape(s, bad));
+ assert_se(streq_ptr(r, expected));
+}
+
+static void test_shell_escape(void) {
+ test_shell_escape_one("", "", "");
+ test_shell_escape_one("\\", "", "\\\\");
+ test_shell_escape_one("foobar", "", "foobar");
+ test_shell_escape_one("foobar", "o", "f\\o\\obar");
+ test_shell_escape_one("foo:bar,baz", ",:", "foo\\:bar\\,baz");
+}
+
+static void test_shell_maybe_quote_one(const char *s,
+ EscapeStyle style,
+ const char *expected) {
+ _cleanup_free_ char *ret = NULL;
+
+ assert_se(ret = shell_maybe_quote(s, style));
+ log_debug("[%s] → [%s] (%s)", s, ret, expected);
+ assert_se(streq(ret, expected));
+}
+
+static void test_shell_maybe_quote(void) {
+
+ test_shell_maybe_quote_one("", ESCAPE_BACKSLASH, "");
+ test_shell_maybe_quote_one("", ESCAPE_BACKSLASH_ONELINE, "");
+ test_shell_maybe_quote_one("", ESCAPE_POSIX, "");
+ test_shell_maybe_quote_one("\\", ESCAPE_BACKSLASH, "\"\\\\\"");
+ test_shell_maybe_quote_one("\\", ESCAPE_BACKSLASH_ONELINE, "\"\\\\\"");
+ test_shell_maybe_quote_one("\\", ESCAPE_POSIX, "$'\\\\'");
+ test_shell_maybe_quote_one("\"", ESCAPE_BACKSLASH, "\"\\\"\"");
+ test_shell_maybe_quote_one("\"", ESCAPE_BACKSLASH_ONELINE, "\"\\\"\"");
+ test_shell_maybe_quote_one("\"", ESCAPE_POSIX, "$'\"'");
+ test_shell_maybe_quote_one("foobar", ESCAPE_BACKSLASH, "foobar");
+ test_shell_maybe_quote_one("foobar", ESCAPE_BACKSLASH_ONELINE, "foobar");
+ test_shell_maybe_quote_one("foobar", ESCAPE_POSIX, "foobar");
+ test_shell_maybe_quote_one("foo bar", ESCAPE_BACKSLASH, "\"foo bar\"");
+ test_shell_maybe_quote_one("foo bar", ESCAPE_BACKSLASH_ONELINE, "\"foo bar\"");
+ test_shell_maybe_quote_one("foo bar", ESCAPE_POSIX, "$'foo bar'");
+ test_shell_maybe_quote_one("foo\tbar", ESCAPE_BACKSLASH, "\"foo\tbar\"");
+ test_shell_maybe_quote_one("foo\tbar", ESCAPE_BACKSLASH_ONELINE, "\"foo\\tbar\"");
+ test_shell_maybe_quote_one("foo\tbar", ESCAPE_POSIX, "$'foo\\tbar'");
+ test_shell_maybe_quote_one("foo\nbar", ESCAPE_BACKSLASH, "\"foo\nbar\"");
+ test_shell_maybe_quote_one("foo\nbar", ESCAPE_BACKSLASH_ONELINE, "\"foo\\nbar\"");
+ test_shell_maybe_quote_one("foo\nbar", ESCAPE_POSIX, "$'foo\\nbar'");
+ test_shell_maybe_quote_one("foo \"bar\" waldo", ESCAPE_BACKSLASH, "\"foo \\\"bar\\\" waldo\"");
+ test_shell_maybe_quote_one("foo \"bar\" waldo", ESCAPE_BACKSLASH_ONELINE, "\"foo \\\"bar\\\" waldo\"");
+ test_shell_maybe_quote_one("foo \"bar\" waldo", ESCAPE_POSIX, "$'foo \"bar\" waldo'");
+ test_shell_maybe_quote_one("foo$bar", ESCAPE_BACKSLASH, "\"foo\\$bar\"");
+ test_shell_maybe_quote_one("foo$bar", ESCAPE_BACKSLASH_ONELINE, "\"foo\\$bar\"");
+ test_shell_maybe_quote_one("foo$bar", ESCAPE_POSIX, "$'foo$bar'");
+
+ /* Note that current users disallow control characters, so this "test"
+ * is here merely to establish current behaviour. If control characters
+ * were allowed, they should be quoted, i.e. \001 should become \\001. */
+ test_shell_maybe_quote_one("a\nb\001", ESCAPE_BACKSLASH, "\"a\nb\001\"");
+ test_shell_maybe_quote_one("a\nb\001", ESCAPE_BACKSLASH_ONELINE, "\"a\\nb\001\"");
+ test_shell_maybe_quote_one("a\nb\001", ESCAPE_POSIX, "$'a\\nb\001'");
+
+ test_shell_maybe_quote_one("foo!bar", ESCAPE_BACKSLASH, "\"foo!bar\"");
+ test_shell_maybe_quote_one("foo!bar", ESCAPE_BACKSLASH_ONELINE, "\"foo!bar\"");
+ test_shell_maybe_quote_one("foo!bar", ESCAPE_POSIX, "$'foo!bar'");
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_cescape();
+ test_xescape();
+ test_xescape_full(false);
+ test_xescape_full(true);
+ test_cunescape();
+ test_shell_escape();
+ test_shell_maybe_quote();
+
+ return 0;
+}
diff --git a/src/test/test-exec-util.c b/src/test/test-exec-util.c
new file mode 100644
index 0000000..e9e8e21
--- /dev/null
+++ b/src/test/test-exec-util.c
@@ -0,0 +1,473 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "copy.h"
+#include "def.h"
+#include "env-util.h"
+#include "exec-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+
+static int here = 0, here2 = 0, here3 = 0;
+void *ignore_stdout_args[] = {&here, &here2, &here3};
+
+/* noop handlers, just check that arguments are passed correctly */
+static int ignore_stdout_func(int fd, void *arg) {
+ assert(fd >= 0);
+ assert(arg == &here);
+ safe_close(fd);
+
+ return 0;
+}
+static int ignore_stdout_func2(int fd, void *arg) {
+ assert(fd >= 0);
+ assert(arg == &here2);
+ safe_close(fd);
+
+ return 0;
+}
+static int ignore_stdout_func3(int fd, void *arg) {
+ assert(fd >= 0);
+ assert(arg == &here3);
+ safe_close(fd);
+
+ return 0;
+}
+
+static const gather_stdout_callback_t ignore_stdout[] = {
+ ignore_stdout_func,
+ ignore_stdout_func2,
+ ignore_stdout_func3,
+};
+
+static void test_execute_directory(bool gather_stdout) {
+ char template_lo[] = "/tmp/test-exec-util.lo.XXXXXXX";
+ char template_hi[] = "/tmp/test-exec-util.hi.XXXXXXX";
+ const char * dirs[] = {template_hi, template_lo, NULL};
+ const char *name, *name2, *name3,
+ *overridden, *override,
+ *masked, *mask,
+ *masked2, *mask2, /* the mask is non-executable */
+ *masked2e, *mask2e; /* the mask is executable */
+
+ log_info("/* %s (%s) */", __func__, gather_stdout ? "gathering stdout" : "asynchronous");
+
+ assert_se(mkdtemp(template_lo));
+ assert_se(mkdtemp(template_hi));
+
+ name = strjoina(template_lo, "/script");
+ name2 = strjoina(template_hi, "/script2");
+ name3 = strjoina(template_lo, "/useless");
+ overridden = strjoina(template_lo, "/overridden");
+ override = strjoina(template_hi, "/overridden");
+ masked = strjoina(template_lo, "/masked");
+ mask = strjoina(template_hi, "/masked");
+ masked2 = strjoina(template_lo, "/masked2");
+ mask2 = strjoina(template_hi, "/masked2");
+ masked2e = strjoina(template_lo, "/masked2e");
+ mask2e = strjoina(template_hi, "/masked2e");
+
+ assert_se(write_string_file(name,
+ "#!/bin/sh\necho 'Executing '$0\ntouch $(dirname $0)/it_works",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(name2,
+ "#!/bin/sh\necho 'Executing '$0\ntouch $(dirname $0)/it_works2",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(overridden,
+ "#!/bin/sh\necho 'Executing '$0\ntouch $(dirname $0)/failed",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(override,
+ "#!/bin/sh\necho 'Executing '$0",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(masked,
+ "#!/bin/sh\necho 'Executing '$0\ntouch $(dirname $0)/failed",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(masked2,
+ "#!/bin/sh\necho 'Executing '$0\ntouch $(dirname $0)/failed",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(masked2e,
+ "#!/bin/sh\necho 'Executing '$0\ntouch $(dirname $0)/failed",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(symlink("/dev/null", mask) == 0);
+ assert_se(touch(mask2) == 0);
+ assert_se(touch(mask2e) == 0);
+ assert_se(touch(name3) >= 0);
+
+ assert_se(chmod(name, 0755) == 0);
+ assert_se(chmod(name2, 0755) == 0);
+ assert_se(chmod(overridden, 0755) == 0);
+ assert_se(chmod(override, 0755) == 0);
+ assert_se(chmod(masked, 0755) == 0);
+ assert_se(chmod(masked2, 0755) == 0);
+ assert_se(chmod(masked2e, 0755) == 0);
+ assert_se(chmod(mask2e, 0755) == 0);
+
+ if (access(name, X_OK) < 0 && ERRNO_IS_PRIVILEGE(errno))
+ return;
+
+ if (gather_stdout)
+ execute_directories(dirs, DEFAULT_TIMEOUT_USEC, ignore_stdout, ignore_stdout_args, NULL, NULL, EXEC_DIR_PARALLEL | EXEC_DIR_IGNORE_ERRORS);
+ else
+ execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, NULL, NULL, EXEC_DIR_PARALLEL | EXEC_DIR_IGNORE_ERRORS);
+
+ assert_se(chdir(template_lo) == 0);
+ assert_se(access("it_works", F_OK) >= 0);
+ assert_se(access("failed", F_OK) < 0);
+
+ assert_se(chdir(template_hi) == 0);
+ assert_se(access("it_works2", F_OK) >= 0);
+ assert_se(access("failed", F_OK) < 0);
+
+ (void) rm_rf(template_lo, REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf(template_hi, REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+static void test_execution_order(void) {
+ char template_lo[] = "/tmp/test-exec-util-lo.XXXXXXX";
+ char template_hi[] = "/tmp/test-exec-util-hi.XXXXXXX";
+ const char *dirs[] = {template_hi, template_lo, NULL};
+ const char *name, *name2, *name3, *overridden, *override, *masked, *mask;
+ const char *output, *t;
+ _cleanup_free_ char *contents = NULL;
+
+ assert_se(mkdtemp(template_lo));
+ assert_se(mkdtemp(template_hi));
+
+ output = strjoina(template_hi, "/output");
+
+ log_info("/* %s >>%s */", __func__, output);
+
+ /* write files in "random" order */
+ name2 = strjoina(template_lo, "/90-bar");
+ name = strjoina(template_hi, "/80-foo");
+ name3 = strjoina(template_lo, "/last");
+ overridden = strjoina(template_lo, "/30-override");
+ override = strjoina(template_hi, "/30-override");
+ masked = strjoina(template_lo, "/10-masked");
+ mask = strjoina(template_hi, "/10-masked");
+
+ t = strjoina("#!/bin/sh\necho $(basename $0) >>", output);
+ assert_se(write_string_file(name, t, WRITE_STRING_FILE_CREATE) == 0);
+
+ t = strjoina("#!/bin/sh\necho $(basename $0) >>", output);
+ assert_se(write_string_file(name2, t, WRITE_STRING_FILE_CREATE) == 0);
+
+ t = strjoina("#!/bin/sh\necho $(basename $0) >>", output);
+ assert_se(write_string_file(name3, t, WRITE_STRING_FILE_CREATE) == 0);
+
+ t = strjoina("#!/bin/sh\necho OVERRIDDEN >>", output);
+ assert_se(write_string_file(overridden, t, WRITE_STRING_FILE_CREATE) == 0);
+
+ t = strjoina("#!/bin/sh\necho $(basename $0) >>", output);
+ assert_se(write_string_file(override, t, WRITE_STRING_FILE_CREATE) == 0);
+
+ t = strjoina("#!/bin/sh\necho MASKED >>", output);
+ assert_se(write_string_file(masked, t, WRITE_STRING_FILE_CREATE) == 0);
+
+ assert_se(symlink("/dev/null", mask) == 0);
+
+ assert_se(chmod(name, 0755) == 0);
+ assert_se(chmod(name2, 0755) == 0);
+ assert_se(chmod(name3, 0755) == 0);
+ assert_se(chmod(overridden, 0755) == 0);
+ assert_se(chmod(override, 0755) == 0);
+ assert_se(chmod(masked, 0755) == 0);
+
+ if (access(name, X_OK) < 0 && ERRNO_IS_PRIVILEGE(errno))
+ return;
+
+ execute_directories(dirs, DEFAULT_TIMEOUT_USEC, ignore_stdout, ignore_stdout_args, NULL, NULL, EXEC_DIR_PARALLEL | EXEC_DIR_IGNORE_ERRORS);
+
+ assert_se(read_full_file(output, &contents, NULL) >= 0);
+ assert_se(streq(contents, "30-override\n80-foo\n90-bar\nlast\n"));
+
+ (void) rm_rf(template_lo, REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf(template_hi, REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+static int gather_stdout_one(int fd, void *arg) {
+ char ***s = arg, *t;
+ char buf[128] = {};
+
+ assert_se(s);
+ assert_se(read(fd, buf, sizeof buf) >= 0);
+ safe_close(fd);
+
+ assert_se(t = strndup(buf, sizeof buf));
+ assert_se(strv_push(s, t) >= 0);
+
+ return 0;
+}
+static int gather_stdout_two(int fd, void *arg) {
+ char ***s = arg, **t;
+
+ STRV_FOREACH(t, *s)
+ assert_se(write(fd, *t, strlen(*t)) == (ssize_t) strlen(*t));
+ safe_close(fd);
+
+ return 0;
+}
+static int gather_stdout_three(int fd, void *arg) {
+ char **s = arg;
+ char buf[128] = {};
+
+ assert_se(read(fd, buf, sizeof buf - 1) > 0);
+ safe_close(fd);
+ assert_se(*s = strndup(buf, sizeof buf));
+
+ return 0;
+}
+
+const gather_stdout_callback_t gather_stdout[] = {
+ gather_stdout_one,
+ gather_stdout_two,
+ gather_stdout_three,
+};
+
+static void test_stdout_gathering(void) {
+ char template[] = "/tmp/test-exec-util.XXXXXXX";
+ const char *dirs[] = {template, NULL};
+ const char *name, *name2, *name3;
+ int r;
+
+ char **tmp = NULL; /* this is only used in the forked process, no cleanup here */
+ _cleanup_free_ char *output = NULL;
+
+ void* args[] = {&tmp, &tmp, &output};
+
+ assert_se(mkdtemp(template));
+
+ log_info("/* %s */", __func__);
+
+ /* write files */
+ name = strjoina(template, "/10-foo");
+ name2 = strjoina(template, "/20-bar");
+ name3 = strjoina(template, "/30-last");
+
+ assert_se(write_string_file(name,
+ "#!/bin/sh\necho a\necho b\necho c\n",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(name2,
+ "#!/bin/sh\necho d\n",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(name3,
+ "#!/bin/sh\nsleep 1",
+ WRITE_STRING_FILE_CREATE) == 0);
+
+ assert_se(chmod(name, 0755) == 0);
+ assert_se(chmod(name2, 0755) == 0);
+ assert_se(chmod(name3, 0755) == 0);
+
+ if (access(name, X_OK) < 0 && ERRNO_IS_PRIVILEGE(errno))
+ return;
+
+ r = execute_directories(dirs, DEFAULT_TIMEOUT_USEC, gather_stdout, args, NULL, NULL, EXEC_DIR_PARALLEL | EXEC_DIR_IGNORE_ERRORS);
+ assert_se(r >= 0);
+
+ log_info("got: %s", output);
+
+ assert_se(streq(output, "a\nb\nc\nd\n"));
+}
+
+static void test_environment_gathering(void) {
+ char template[] = "/tmp/test-exec-util.XXXXXXX", **p;
+ const char *dirs[] = {template, NULL};
+ const char *name, *name2, *name3, *old;
+ int r;
+
+ char **tmp = NULL; /* this is only used in the forked process, no cleanup here */
+ _cleanup_strv_free_ char **env = NULL;
+
+ void* const args[] = { &tmp, &tmp, &env };
+
+ assert_se(mkdtemp(template));
+
+ log_info("/* %s */", __func__);
+
+ /* write files */
+ name = strjoina(template, "/10-foo");
+ name2 = strjoina(template, "/20-bar");
+ name3 = strjoina(template, "/30-last");
+
+ assert_se(write_string_file(name,
+ "#!/bin/sh\n"
+ "echo A=23\n",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(name2,
+ "#!/bin/sh\n"
+ "echo A=22:$A\n\n\n", /* substitution from previous generator */
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(name3,
+ "#!/bin/sh\n"
+ "echo A=$A:24\n"
+ "echo B=12\n"
+ "echo C=000\n"
+ "echo C=001\n" /* variable overwriting */
+ /* various invalid entries */
+ "echo unset A\n"
+ "echo unset A=\n"
+ "echo unset A=B\n"
+ "echo unset \n"
+ "echo A B=C\n"
+ "echo A\n"
+ /* test variable assignment without newline */
+ "echo PATH=$PATH:/no/such/file", /* no newline */
+ WRITE_STRING_FILE_CREATE) == 0);
+
+ assert_se(chmod(name, 0755) == 0);
+ assert_se(chmod(name2, 0755) == 0);
+ assert_se(chmod(name3, 0755) == 0);
+
+ /* When booting in containers or without initramfs there might not be
+ * any PATH in the environment and if there is no PATH /bin/sh built-in
+ * PATH may leak and override systemd's DEFAULT_PATH which is not
+ * good. Force our own PATH in environment, to prevent expansion of sh
+ * built-in $PATH */
+ old = getenv("PATH");
+ r = setenv("PATH", "no-sh-built-in-path", 1);
+ assert_se(r >= 0);
+
+ if (access(name, X_OK) < 0 && ERRNO_IS_PRIVILEGE(errno))
+ return;
+
+ r = execute_directories(dirs, DEFAULT_TIMEOUT_USEC, gather_environment, args, NULL, NULL, EXEC_DIR_PARALLEL | EXEC_DIR_IGNORE_ERRORS);
+ assert_se(r >= 0);
+
+ STRV_FOREACH(p, env)
+ log_info("got env: \"%s\"", *p);
+
+ assert_se(streq(strv_env_get(env, "A"), "22:23:24"));
+ assert_se(streq(strv_env_get(env, "B"), "12"));
+ assert_se(streq(strv_env_get(env, "C"), "001"));
+ assert_se(streq(strv_env_get(env, "PATH"), "no-sh-built-in-path:/no/such/file"));
+
+ /* now retest with "default" path passed in, as created by
+ * manager_default_environment */
+ env = strv_free(env);
+ env = strv_new("PATH=" DEFAULT_PATH);
+ assert_se(env);
+
+ r = execute_directories(dirs, DEFAULT_TIMEOUT_USEC, gather_environment, args, NULL, env, EXEC_DIR_PARALLEL | EXEC_DIR_IGNORE_ERRORS);
+ assert_se(r >= 0);
+
+ STRV_FOREACH(p, env)
+ log_info("got env: \"%s\"", *p);
+
+ assert_se(streq(strv_env_get(env, "A"), "22:23:24"));
+ assert_se(streq(strv_env_get(env, "B"), "12"));
+ assert_se(streq(strv_env_get(env, "C"), "001"));
+ assert_se(streq(strv_env_get(env, "PATH"), DEFAULT_PATH ":/no/such/file"));
+
+ /* reset environ PATH */
+ assert_se(set_unset_env("PATH", old, true) == 0);
+}
+
+static void test_error_catching(void) {
+ char template[] = "/tmp/test-exec-util.XXXXXXX";
+ const char *dirs[] = {template, NULL};
+ const char *name, *name2, *name3;
+ int r;
+
+ assert_se(mkdtemp(template));
+
+ log_info("/* %s */", __func__);
+
+ /* write files */
+ name = strjoina(template, "/10-foo");
+ name2 = strjoina(template, "/20-bar");
+ name3 = strjoina(template, "/30-last");
+
+ assert_se(write_string_file(name,
+ "#!/bin/sh\necho a\necho b\necho c\n",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(name2,
+ "#!/bin/sh\nexit 42\n",
+ WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(write_string_file(name3,
+ "#!/bin/sh\nexit 12",
+ WRITE_STRING_FILE_CREATE) == 0);
+
+ assert_se(chmod(name, 0755) == 0);
+ assert_se(chmod(name2, 0755) == 0);
+ assert_se(chmod(name3, 0755) == 0);
+
+ if (access(name, X_OK) < 0 && ERRNO_IS_PRIVILEGE(errno))
+ return;
+
+ r = execute_directories(dirs, DEFAULT_TIMEOUT_USEC, NULL, NULL, NULL, NULL, EXEC_DIR_NONE);
+
+ /* we should exit with the error code of the first script that failed */
+ assert_se(r == 42);
+}
+
+static void test_exec_command_flags_from_strv(void) {
+ ExecCommandFlags flags = 0;
+ char **valid_strv = STRV_MAKE("no-env-expand", "no-setuid", "ignore-failure");
+ char **invalid_strv = STRV_MAKE("no-env-expand", "no-setuid", "nonexistent-option", "ignore-failure");
+ int r;
+
+ r = exec_command_flags_from_strv(valid_strv, &flags);
+
+ assert_se(r == 0);
+ assert_se(FLAGS_SET(flags, EXEC_COMMAND_NO_ENV_EXPAND));
+ assert_se(FLAGS_SET(flags, EXEC_COMMAND_NO_SETUID));
+ assert_se(FLAGS_SET(flags, EXEC_COMMAND_IGNORE_FAILURE));
+ assert_se(!FLAGS_SET(flags, EXEC_COMMAND_AMBIENT_MAGIC));
+ assert_se(!FLAGS_SET(flags, EXEC_COMMAND_FULLY_PRIVILEGED));
+
+ r = exec_command_flags_from_strv(invalid_strv, &flags);
+
+ assert_se(r == -EINVAL);
+}
+
+static void test_exec_command_flags_to_strv(void) {
+ _cleanup_strv_free_ char **opts = NULL, **empty_opts = NULL, **invalid_opts = NULL;
+ ExecCommandFlags flags = 0;
+ int r;
+
+ flags |= (EXEC_COMMAND_AMBIENT_MAGIC|EXEC_COMMAND_NO_ENV_EXPAND|EXEC_COMMAND_IGNORE_FAILURE);
+
+ r = exec_command_flags_to_strv(flags, &opts);
+
+ assert_se(r == 0);
+ assert_se(strv_equal(opts, STRV_MAKE("ignore-failure", "ambient", "no-env-expand")));
+
+ r = exec_command_flags_to_strv(0, &empty_opts);
+
+ assert_se(r == 0);
+ assert_se(strv_equal(empty_opts, STRV_MAKE_EMPTY));
+
+ flags = _EXEC_COMMAND_FLAGS_INVALID;
+
+ r = exec_command_flags_to_strv(flags, &invalid_opts);
+
+ assert_se(r == -EINVAL);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_execute_directory(true);
+ test_execute_directory(false);
+ test_execution_order();
+ test_stdout_gathering();
+ test_environment_gathering();
+ test_error_catching();
+ test_exec_command_flags_from_strv();
+ test_exec_command_flags_to_strv();
+
+ return 0;
+}
diff --git a/src/test/test-execute.c b/src/test/test-execute.c
new file mode 100644
index 0000000..3b6a4be
--- /dev/null
+++ b/src/test/test-execute.c
@@ -0,0 +1,966 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+
+#include "capability-util.h"
+#include "cpu-set-util.h"
+#include "errno-list.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "manager.h"
+#include "missing_prctl.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#if HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
+#include "service.h"
+#include "stat-util.h"
+#include "tests.h"
+#include "unit.h"
+#include "user-util.h"
+#include "util.h"
+#include "virt.h"
+
+static bool can_unshare;
+
+typedef void (*test_function_t)(Manager *m);
+
+static int cld_dumped_to_killed(int code) {
+ /* Depending on the system, seccomp version, … some signals might result in dumping, others in plain
+ * killing. Let's ignore the difference here, and map both cases to CLD_KILLED */
+ return code == CLD_DUMPED ? CLD_KILLED : code;
+}
+
+static void wait_for_service_finish(Manager *m, Unit *unit) {
+ Service *service = NULL;
+ usec_t ts;
+ usec_t timeout = 2 * USEC_PER_MINUTE;
+
+ assert_se(m);
+ assert_se(unit);
+
+ service = SERVICE(unit);
+ printf("%s\n", unit->id);
+ exec_context_dump(&service->exec_context, stdout, "\t");
+ ts = now(CLOCK_MONOTONIC);
+ while (!IN_SET(service->state, SERVICE_DEAD, SERVICE_FAILED)) {
+ int r;
+ usec_t n;
+
+ r = sd_event_run(m->event, 100 * USEC_PER_MSEC);
+ assert_se(r >= 0);
+
+ n = now(CLOCK_MONOTONIC);
+ if (ts + timeout < n) {
+ log_error("Test timeout when testing %s", unit->id);
+ r = unit_kill(unit, KILL_ALL, SIGKILL, NULL);
+ if (r < 0)
+ log_error_errno(r, "Failed to kill %s: %m", unit->id);
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+static void check_main_result(const char *file, unsigned line, const char *func,
+ Manager *m, Unit *unit, int status_expected, int code_expected) {
+ Service *service = NULL;
+
+ assert_se(m);
+ assert_se(unit);
+
+ wait_for_service_finish(m, unit);
+
+ service = SERVICE(unit);
+ exec_status_dump(&service->main_exec_status, stdout, "\t");
+
+ if (cld_dumped_to_killed(service->main_exec_status.code) != cld_dumped_to_killed(code_expected)) {
+ log_error("%s:%u:%s %s: exit code %d, expected %d",
+ file, line, func,
+ unit->id,
+ service->main_exec_status.code, code_expected);
+ abort();
+ }
+
+ if (service->main_exec_status.status != status_expected) {
+ log_error("%s:%u:%s: %s: exit status %d, expected %d",
+ file, line, func, unit->id,
+ service->main_exec_status.status, status_expected);
+ abort();
+ }
+}
+
+static void check_service_result(const char *file, unsigned line, const char *func,
+ Manager *m, Unit *unit, ServiceResult result_expected) {
+ Service *service = NULL;
+
+ assert_se(m);
+ assert_se(unit);
+
+ wait_for_service_finish(m, unit);
+
+ service = SERVICE(unit);
+
+ if (service->result != result_expected) {
+ log_error("%s:%u:%s: %s: service end result %s, expected %s",
+ file, line, func,
+ unit->id,
+ service_result_to_string(service->result),
+ service_result_to_string(result_expected));
+ abort();
+ }
+}
+
+static bool check_nobody_user_and_group(void) {
+ static int cache = -1;
+ struct passwd *p;
+ struct group *g;
+
+ if (cache >= 0)
+ return !!cache;
+
+ if (!synthesize_nobody())
+ goto invalid;
+
+ p = getpwnam(NOBODY_USER_NAME);
+ if (!p ||
+ !streq(p->pw_name, NOBODY_USER_NAME) ||
+ p->pw_uid != UID_NOBODY ||
+ p->pw_gid != GID_NOBODY)
+ goto invalid;
+
+ p = getpwuid(UID_NOBODY);
+ if (!p ||
+ !streq(p->pw_name, NOBODY_USER_NAME) ||
+ p->pw_uid != UID_NOBODY ||
+ p->pw_gid != GID_NOBODY)
+ goto invalid;
+
+ g = getgrnam(NOBODY_GROUP_NAME);
+ if (!g ||
+ !streq(g->gr_name, NOBODY_GROUP_NAME) ||
+ g->gr_gid != GID_NOBODY)
+ goto invalid;
+
+ g = getgrgid(GID_NOBODY);
+ if (!g ||
+ !streq(g->gr_name, NOBODY_GROUP_NAME) ||
+ g->gr_gid != GID_NOBODY)
+ goto invalid;
+
+ cache = 1;
+ return true;
+
+invalid:
+ cache = 0;
+ return false;
+}
+
+static bool check_user_has_group_with_same_name(const char *name) {
+ struct passwd *p;
+ struct group *g;
+
+ assert(name);
+
+ p = getpwnam(name);
+ if (!p ||
+ !streq(p->pw_name, name))
+ return false;
+
+ g = getgrgid(p->pw_gid);
+ if (!g ||
+ !streq(g->gr_name, name))
+ return false;
+
+ return true;
+}
+
+static bool is_inaccessible_available(void) {
+ const char *p;
+
+ FOREACH_STRING(p,
+ "/run/systemd/inaccessible/reg",
+ "/run/systemd/inaccessible/dir",
+ "/run/systemd/inaccessible/chr",
+ "/run/systemd/inaccessible/blk",
+ "/run/systemd/inaccessible/fifo",
+ "/run/systemd/inaccessible/sock"
+ ) {
+ if (access(p, F_OK) < 0)
+ return false;
+ }
+
+ return true;
+}
+
+static void _test(const char *file, unsigned line, const char *func,
+ Manager *m, const char *unit_name, int status_expected, int code_expected) {
+ Unit *unit;
+
+ assert_se(unit_name);
+
+ assert_se(manager_load_startable_unit_or_warn(m, unit_name, NULL, &unit) >= 0);
+ assert_se(unit_start(unit) >= 0);
+ check_main_result(file, line, func, m, unit, status_expected, code_expected);
+}
+#define test(m, unit_name, status_expected, code_expected) \
+ _test(PROJECT_FILE, __LINE__, __func__, m, unit_name, status_expected, code_expected)
+
+static void _test_service(const char *file, unsigned line, const char *func,
+ Manager *m, const char *unit_name, ServiceResult result_expected) {
+ Unit *unit;
+
+ assert_se(unit_name);
+
+ assert_se(manager_load_startable_unit_or_warn(m, unit_name, NULL, &unit) >= 0);
+ assert_se(unit_start(unit) >= 0);
+ check_service_result(file, line, func, m, unit, result_expected);
+}
+#define test_service(m, unit_name, result_expected) \
+ _test_service(PROJECT_FILE, __LINE__, __func__, m, unit_name, result_expected)
+
+static void test_exec_bindpaths(Manager *m) {
+ assert_se(mkdir_p("/tmp/test-exec-bindpaths", 0755) >= 0);
+ assert_se(mkdir_p("/tmp/test-exec-bindreadonlypaths", 0755) >= 0);
+
+ test(m, "exec-bindpaths.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+
+ (void) rm_rf("/tmp/test-exec-bindpaths", REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf("/tmp/test-exec-bindreadonlypaths", REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+static void test_exec_cpuaffinity(Manager *m) {
+ _cleanup_(cpu_set_reset) CPUSet c = {};
+
+ assert_se(cpu_set_realloc(&c, 8192) >= 0); /* just allocate the maximum possible size */
+ assert_se(sched_getaffinity(0, c.allocated, c.set) >= 0);
+
+ if (!CPU_ISSET_S(0, c.allocated, c.set)) {
+ log_notice("Cannot use CPU 0, skipping %s", __func__);
+ return;
+ }
+
+ test(m, "exec-cpuaffinity1.service", 0, CLD_EXITED);
+ test(m, "exec-cpuaffinity2.service", 0, CLD_EXITED);
+
+ if (!CPU_ISSET_S(1, c.allocated, c.set) ||
+ !CPU_ISSET_S(2, c.allocated, c.set)) {
+ log_notice("Cannot use CPU 1 or 2, skipping remaining tests in %s", __func__);
+ return;
+ }
+
+ test(m, "exec-cpuaffinity3.service", 0, CLD_EXITED);
+}
+
+static void test_exec_workingdirectory(Manager *m) {
+ assert_se(mkdir_p("/tmp/test-exec_workingdirectory", 0755) >= 0);
+
+ test(m, "exec-workingdirectory.service", 0, CLD_EXITED);
+ test(m, "exec-workingdirectory-trailing-dot.service", 0, CLD_EXITED);
+
+ (void) rm_rf("/tmp/test-exec_workingdirectory", REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+static void test_exec_personality(Manager *m) {
+#if defined(__x86_64__)
+ test(m, "exec-personality-x86-64.service", 0, CLD_EXITED);
+
+#elif defined(__s390__)
+ test(m, "exec-personality-s390.service", 0, CLD_EXITED);
+
+#elif defined(__powerpc64__)
+# if __BYTE_ORDER == __BIG_ENDIAN
+ test(m, "exec-personality-ppc64.service", 0, CLD_EXITED);
+# else
+ test(m, "exec-personality-ppc64le.service", 0, CLD_EXITED);
+# endif
+
+#elif defined(__aarch64__)
+ test(m, "exec-personality-aarch64.service", 0, CLD_EXITED);
+
+#elif defined(__i386__)
+ test(m, "exec-personality-x86.service", 0, CLD_EXITED);
+#else
+ log_notice("Unknown personality, skipping %s", __func__);
+#endif
+}
+
+static void test_exec_ignoresigpipe(Manager *m) {
+ test(m, "exec-ignoresigpipe-yes.service", 0, CLD_EXITED);
+ test(m, "exec-ignoresigpipe-no.service", SIGPIPE, CLD_KILLED);
+}
+
+static void test_exec_privatetmp(Manager *m) {
+ assert_se(touch("/tmp/test-exec_privatetmp") >= 0);
+
+ test(m, "exec-privatetmp-yes.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-privatetmp-no.service", 0, CLD_EXITED);
+ test(m, "exec-privatetmp-disabled-by-prefix.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+
+ unlink("/tmp/test-exec_privatetmp");
+}
+
+static void test_exec_privatedevices(Manager *m) {
+ int r;
+
+ if (detect_container() > 0) {
+ log_notice("Testing in container, skipping %s", __func__);
+ return;
+ }
+ if (!is_inaccessible_available()) {
+ log_notice("Testing without inaccessible, skipping %s", __func__);
+ return;
+ }
+
+ test(m, "exec-privatedevices-yes.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-privatedevices-no.service", 0, CLD_EXITED);
+ test(m, "exec-privatedevices-disabled-by-prefix.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-privatedevices-yes-with-group.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+
+ /* We use capsh to test if the capabilities are
+ * properly set, so be sure that it exists */
+ r = find_executable("capsh", NULL);
+ if (r < 0) {
+ log_notice_errno(r, "Could not find capsh binary, skipping remaining tests in %s: %m", __func__);
+ return;
+ }
+
+ test(m, "exec-privatedevices-yes-capability-mknod.service", 0, CLD_EXITED);
+ test(m, "exec-privatedevices-no-capability-mknod.service", 0, CLD_EXITED);
+ test(m, "exec-privatedevices-yes-capability-sys-rawio.service", 0, CLD_EXITED);
+ test(m, "exec-privatedevices-no-capability-sys-rawio.service", 0, CLD_EXITED);
+}
+
+static void test_exec_protecthome(Manager *m) {
+ if (!can_unshare) {
+ log_notice("Cannot reliably unshare, skipping %s", __func__);
+ return;
+ }
+
+ test(m, "exec-protecthome-tmpfs-vs-protectsystem-strict.service", 0, CLD_EXITED);
+}
+
+static void test_exec_protectkernelmodules(Manager *m) {
+ int r;
+
+ if (detect_container() > 0) {
+ log_notice("Testing in container, skipping %s", __func__);
+ return;
+ }
+ if (!is_inaccessible_available()) {
+ log_notice("Testing without inaccessible, skipping %s", __func__);
+ return;
+ }
+
+ r = find_executable("capsh", NULL);
+ if (r < 0) {
+ log_notice_errno(r, "Skipping %s, could not find capsh binary: %m", __func__);
+ return;
+ }
+
+ test(m, "exec-protectkernelmodules-no-capabilities.service", 0, CLD_EXITED);
+ test(m, "exec-protectkernelmodules-yes-capabilities.service", 0, CLD_EXITED);
+ test(m, "exec-protectkernelmodules-yes-mount-propagation.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+}
+
+static void test_exec_readonlypaths(Manager *m) {
+
+ test(m, "exec-readonlypaths-simple.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+
+ if (path_is_read_only_fs("/var") > 0) {
+ log_notice("Directory /var is readonly, skipping remaining tests in %s", __func__);
+ return;
+ }
+
+ test(m, "exec-readonlypaths.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-readonlypaths-with-bindpaths.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-readonlypaths-mount-propagation.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+}
+
+static void test_exec_readwritepaths(Manager *m) {
+
+ if (path_is_read_only_fs("/") > 0) {
+ log_notice("Root directory is readonly, skipping %s", __func__);
+ return;
+ }
+
+ test(m, "exec-readwritepaths-mount-propagation.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+}
+
+static void test_exec_inaccessiblepaths(Manager *m) {
+
+ if (!is_inaccessible_available()) {
+ log_notice("Testing without inaccessible, skipping %s", __func__);
+ return;
+ }
+
+ test(m, "exec-inaccessiblepaths-sys.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+
+ if (path_is_read_only_fs("/") > 0) {
+ log_notice("Root directory is readonly, skipping remaining tests in %s", __func__);
+ return;
+ }
+
+ test(m, "exec-inaccessiblepaths-mount-propagation.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+}
+
+static void test_exec_temporaryfilesystem(Manager *m) {
+
+ test(m, "exec-temporaryfilesystem-options.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-temporaryfilesystem-ro.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-temporaryfilesystem-rw.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-temporaryfilesystem-usr.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+}
+
+static void test_exec_systemcallfilter(Manager *m) {
+#if HAVE_SECCOMP
+ int r;
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+
+ test(m, "exec-systemcallfilter-not-failing.service", 0, CLD_EXITED);
+ test(m, "exec-systemcallfilter-not-failing2.service", 0, CLD_EXITED);
+ test(m, "exec-systemcallfilter-failing.service", SIGSYS, CLD_KILLED);
+ test(m, "exec-systemcallfilter-failing2.service", SIGSYS, CLD_KILLED);
+
+ r = find_executable("python3", NULL);
+ if (r < 0) {
+ log_notice_errno(r, "Skipping remaining tests in %s, could not find python3 binary: %m", __func__);
+ return;
+ }
+
+ test(m, "exec-systemcallfilter-with-errno-name.service", errno_from_name("EILSEQ"), CLD_EXITED);
+ test(m, "exec-systemcallfilter-with-errno-number.service", 255, CLD_EXITED);
+ test(m, "exec-systemcallfilter-with-errno-multi.service", errno_from_name("EILSEQ"), CLD_EXITED);
+ test(m, "exec-systemcallfilter-override-error-action.service", SIGSYS, CLD_KILLED);
+ test(m, "exec-systemcallfilter-override-error-action2.service", errno_from_name("EILSEQ"), CLD_EXITED);
+#endif
+}
+
+static void test_exec_systemcallerrornumber(Manager *m) {
+#if HAVE_SECCOMP
+ int r;
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+
+ r = find_executable("python3", NULL);
+ if (r < 0) {
+ log_notice_errno(r, "Skipping %s, could not find python3 binary: %m", __func__);
+ return;
+ }
+
+ test(m, "exec-systemcallerrornumber-name.service", errno_from_name("EACCES"), CLD_EXITED);
+ test(m, "exec-systemcallerrornumber-number.service", 255, CLD_EXITED);
+#endif
+}
+
+static void test_exec_restrictnamespaces(Manager *m) {
+#if HAVE_SECCOMP
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+
+ test(m, "exec-restrictnamespaces-no.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-restrictnamespaces-yes.service", 1, CLD_EXITED);
+ test(m, "exec-restrictnamespaces-mnt.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-restrictnamespaces-mnt-deny-list.service", 1, CLD_EXITED);
+ test(m, "exec-restrictnamespaces-merge-and.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-restrictnamespaces-merge-or.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+ test(m, "exec-restrictnamespaces-merge-all.service", can_unshare ? 0 : EXIT_FAILURE, CLD_EXITED);
+#endif
+}
+
+static void test_exec_systemcallfilter_system(Manager *m) {
+/* Skip this particular test case when running under ASan, as
+ * LSan intermittently segfaults when accessing memory right
+ * after the test finishes. Generally, ASan & LSan don't like
+ * the seccomp stuff.
+ */
+#if HAVE_SECCOMP && !HAS_FEATURE_ADDRESS_SANITIZER
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+
+ test(m, "exec-systemcallfilter-system-user.service", 0, CLD_EXITED);
+
+ if (!check_nobody_user_and_group()) {
+ log_notice("nobody user/group is not synthesized or may conflict to other entries, skipping remaining tests in %s", __func__);
+ return;
+ }
+
+ if (!STR_IN_SET(NOBODY_USER_NAME, "nobody", "nfsnobody")) {
+ log_notice("Unsupported nobody user name '%s', skipping remaining tests in %s", NOBODY_USER_NAME, __func__);
+ return;
+ }
+
+ test(m, "exec-systemcallfilter-system-user-" NOBODY_USER_NAME ".service", 0, CLD_EXITED);
+#endif
+}
+
+static void test_exec_user(Manager *m) {
+ test(m, "exec-user.service", 0, CLD_EXITED);
+
+ if (!check_nobody_user_and_group()) {
+ log_notice("nobody user/group is not synthesized or may conflict to other entries, skipping remaining tests in %s", __func__);
+ return;
+ }
+
+ if (!STR_IN_SET(NOBODY_USER_NAME, "nobody", "nfsnobody")) {
+ log_notice("Unsupported nobody user name '%s', skipping remaining tests in %s", NOBODY_USER_NAME, __func__);
+ return;
+ }
+
+ test(m, "exec-user-" NOBODY_USER_NAME ".service", 0, CLD_EXITED);
+}
+
+static void test_exec_group(Manager *m) {
+ test(m, "exec-group.service", 0, CLD_EXITED);
+
+ if (!check_nobody_user_and_group()) {
+ log_notice("nobody user/group is not synthesized or may conflict to other entries, skipping remaining tests in %s", __func__);
+ return;
+ }
+
+ if (!STR_IN_SET(NOBODY_GROUP_NAME, "nobody", "nfsnobody", "nogroup")) {
+ log_notice("Unsupported nobody group name '%s', skipping remaining tests in %s", NOBODY_GROUP_NAME, __func__);
+ return;
+ }
+
+ test(m, "exec-group-" NOBODY_GROUP_NAME ".service", 0, CLD_EXITED);
+}
+
+static void test_exec_supplementarygroups(Manager *m) {
+ test(m, "exec-supplementarygroups.service", 0, CLD_EXITED);
+ test(m, "exec-supplementarygroups-single-group.service", 0, CLD_EXITED);
+ test(m, "exec-supplementarygroups-single-group-user.service", 0, CLD_EXITED);
+ test(m, "exec-supplementarygroups-multiple-groups-default-group-user.service", 0, CLD_EXITED);
+ test(m, "exec-supplementarygroups-multiple-groups-withgid.service", 0, CLD_EXITED);
+ test(m, "exec-supplementarygroups-multiple-groups-withuid.service", 0, CLD_EXITED);
+}
+
+static char* private_directory_bad(Manager *m) {
+ /* This mirrors setup_exec_directory(). */
+
+ for (ExecDirectoryType dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
+ _cleanup_free_ char *p = NULL;
+ struct stat st;
+
+ assert_se(p = path_join(m->prefix[dt], "private"));
+
+ if (stat(p, &st) >= 0 &&
+ (st.st_mode & (S_IRWXG|S_IRWXO)))
+ return TAKE_PTR(p);
+ }
+
+ return NULL;
+}
+
+static void test_exec_dynamicuser(Manager *m) {
+ _cleanup_free_ char *bad = private_directory_bad(m);
+ if (bad) {
+ log_warning("%s: %s has bad permissions, skipping test.", __func__, bad);
+ return;
+ }
+
+ test(m, "exec-dynamicuser-fixeduser.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ if (check_user_has_group_with_same_name("adm"))
+ test(m, "exec-dynamicuser-fixeduser-adm.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ if (check_user_has_group_with_same_name("games"))
+ test(m, "exec-dynamicuser-fixeduser-games.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-dynamicuser-fixeduser-one-supplementarygroup.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-dynamicuser-supplementarygroups.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-dynamicuser-statedir.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+
+ (void) rm_rf("/var/lib/quux", REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf("/var/lib/test-dynamicuser-migrate", REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf("/var/lib/test-dynamicuser-migrate2", REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf("/var/lib/waldo", REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf("/var/lib/private/quux", REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf("/var/lib/private/test-dynamicuser-migrate", REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf("/var/lib/private/test-dynamicuser-migrate2", REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf("/var/lib/private/waldo", REMOVE_ROOT|REMOVE_PHYSICAL);
+
+ test(m, "exec-dynamicuser-statedir-migrate-step1.service", 0, CLD_EXITED);
+ test(m, "exec-dynamicuser-statedir-migrate-step2.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-dynamicuser-statedir-migrate-step1.service", 0, CLD_EXITED);
+
+ (void) rm_rf("/var/lib/test-dynamicuser-migrate", REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf("/var/lib/test-dynamicuser-migrate2", REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf("/var/lib/private/test-dynamicuser-migrate", REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf("/var/lib/private/test-dynamicuser-migrate2", REMOVE_ROOT|REMOVE_PHYSICAL);
+
+ test(m, "exec-dynamicuser-runtimedirectory1.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-dynamicuser-runtimedirectory2.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+ test(m, "exec-dynamicuser-runtimedirectory3.service", can_unshare ? 0 : EXIT_NAMESPACE, CLD_EXITED);
+}
+
+static void test_exec_environment(Manager *m) {
+ test(m, "exec-environment-no-substitute.service", 0, CLD_EXITED);
+ test(m, "exec-environment.service", 0, CLD_EXITED);
+ test(m, "exec-environment-multiple.service", 0, CLD_EXITED);
+ test(m, "exec-environment-empty.service", 0, CLD_EXITED);
+}
+
+static void test_exec_environmentfile(Manager *m) {
+ static const char e[] =
+ "VAR1='word1 word2'\n"
+ "VAR2=word3 \n"
+ "# comment1\n"
+ "\n"
+ "; comment2\n"
+ " ; # comment3\n"
+ "line without an equal\n"
+ "VAR3='$word 5 6'\n"
+ "VAR4='new\nline'\n"
+ "VAR5=password\\with\\backslashes";
+ int r;
+
+ r = write_string_file("/tmp/test-exec_environmentfile.conf", e, WRITE_STRING_FILE_CREATE);
+ assert_se(r == 0);
+
+ test(m, "exec-environmentfile.service", 0, CLD_EXITED);
+
+ (void) unlink("/tmp/test-exec_environmentfile.conf");
+}
+
+static void test_exec_passenvironment(Manager *m) {
+ /* test-execute runs under MANAGER_USER which, by default, forwards all
+ * variables present in the environment, but only those that are
+ * present _at the time it is created_!
+ *
+ * So these PassEnvironment checks are still expected to work, since we
+ * are ensuring the variables are not present at manager creation (they
+ * are unset explicitly in main) and are only set here.
+ *
+ * This is still a good approximation of how a test for MANAGER_SYSTEM
+ * would work.
+ */
+ assert_se(setenv("VAR1", "word1 word2", 1) == 0);
+ assert_se(setenv("VAR2", "word3", 1) == 0);
+ assert_se(setenv("VAR3", "$word 5 6", 1) == 0);
+ assert_se(setenv("VAR4", "new\nline", 1) == 0);
+ assert_se(setenv("VAR5", "passwordwithbackslashes", 1) == 0);
+ test(m, "exec-passenvironment.service", 0, CLD_EXITED);
+ test(m, "exec-passenvironment-repeated.service", 0, CLD_EXITED);
+ test(m, "exec-passenvironment-empty.service", 0, CLD_EXITED);
+ assert_se(unsetenv("VAR1") == 0);
+ assert_se(unsetenv("VAR2") == 0);
+ assert_se(unsetenv("VAR3") == 0);
+ assert_se(unsetenv("VAR4") == 0);
+ assert_se(unsetenv("VAR5") == 0);
+ test(m, "exec-passenvironment-absent.service", 0, CLD_EXITED);
+}
+
+static void test_exec_umask(Manager *m) {
+ test(m, "exec-umask-default.service", 0, CLD_EXITED);
+ test(m, "exec-umask-0177.service", 0, CLD_EXITED);
+}
+
+static void test_exec_runtimedirectory(Manager *m) {
+ test(m, "exec-runtimedirectory.service", 0, CLD_EXITED);
+ test(m, "exec-runtimedirectory-mode.service", 0, CLD_EXITED);
+ test(m, "exec-runtimedirectory-owner.service", 0, CLD_EXITED);
+
+ if (!check_nobody_user_and_group()) {
+ log_notice("nobody user/group is not synthesized or may conflict to other entries, skipping remaining tests in %s", __func__);
+ return;
+ }
+
+ if (!STR_IN_SET(NOBODY_GROUP_NAME, "nobody", "nfsnobody", "nogroup")) {
+ log_notice("Unsupported nobody group name '%s', skipping remaining tests in %s", NOBODY_GROUP_NAME, __func__);
+ return;
+ }
+
+ test(m, "exec-runtimedirectory-owner-" NOBODY_GROUP_NAME ".service", 0, CLD_EXITED);
+}
+
+static void test_exec_capabilityboundingset(Manager *m) {
+ int r;
+
+ r = find_executable("capsh", NULL);
+ if (r < 0) {
+ log_notice_errno(r, "Skipping %s, could not find capsh binary: %m", __func__);
+ return;
+ }
+
+ if (have_effective_cap(CAP_CHOWN) <= 0 ||
+ have_effective_cap(CAP_FOWNER) <= 0 ||
+ have_effective_cap(CAP_KILL) <= 0) {
+ log_notice("Skipping %s, this process does not have enough capabilities", __func__);
+ return;
+ }
+
+ test(m, "exec-capabilityboundingset-simple.service", 0, CLD_EXITED);
+ test(m, "exec-capabilityboundingset-reset.service", 0, CLD_EXITED);
+ test(m, "exec-capabilityboundingset-merge.service", 0, CLD_EXITED);
+ test(m, "exec-capabilityboundingset-invert.service", 0, CLD_EXITED);
+}
+
+static void test_exec_basic(Manager *m) {
+ test(m, "exec-basic.service", 0, CLD_EXITED);
+}
+
+static void test_exec_ambientcapabilities(Manager *m) {
+ int r;
+
+ /* Check if the kernel has support for ambient capabilities. Run
+ * the tests only if that's the case. Clearing all ambient
+ * capabilities is fine, since we are expecting them to be unset
+ * in the first place for the tests. */
+ r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0);
+ if (r < 0 && IN_SET(errno, EINVAL, EOPNOTSUPP, ENOSYS)) {
+ log_notice("Skipping %s, the kernel does not support ambient capabilities", __func__);
+ return;
+ }
+
+ if (have_effective_cap(CAP_CHOWN) <= 0 ||
+ have_effective_cap(CAP_NET_RAW) <= 0) {
+ log_notice("Skipping %s, this process does not have enough capabilities", __func__);
+ return;
+ }
+
+ test(m, "exec-ambientcapabilities.service", 0, CLD_EXITED);
+ test(m, "exec-ambientcapabilities-merge.service", 0, CLD_EXITED);
+
+ if (!check_nobody_user_and_group()) {
+ log_notice("nobody user/group is not synthesized or may conflict to other entries, skipping remaining tests in %s", __func__);
+ return;
+ }
+
+ if (!STR_IN_SET(NOBODY_USER_NAME, "nobody", "nfsnobody")) {
+ log_notice("Unsupported nobody user name '%s', skipping remaining tests in %s", NOBODY_USER_NAME, __func__);
+ return;
+ }
+
+ test(m, "exec-ambientcapabilities-" NOBODY_USER_NAME ".service", 0, CLD_EXITED);
+ test(m, "exec-ambientcapabilities-merge-" NOBODY_USER_NAME ".service", 0, CLD_EXITED);
+}
+
+static void test_exec_privatenetwork(Manager *m) {
+ int r;
+
+ r = find_executable("ip", NULL);
+ if (r < 0) {
+ log_notice_errno(r, "Skipping %s, could not find ip binary: %m", __func__);
+ return;
+ }
+
+ test(m, "exec-privatenetwork-yes.service", can_unshare ? 0 : EXIT_NETWORK, CLD_EXITED);
+}
+
+static void test_exec_oomscoreadjust(Manager *m) {
+ test(m, "exec-oomscoreadjust-positive.service", 0, CLD_EXITED);
+
+ if (detect_container() > 0) {
+ log_notice("Testing in container, skipping remaining tests in %s", __func__);
+ return;
+ }
+ test(m, "exec-oomscoreadjust-negative.service", 0, CLD_EXITED);
+}
+
+static void test_exec_ioschedulingclass(Manager *m) {
+ test(m, "exec-ioschedulingclass-none.service", 0, CLD_EXITED);
+ test(m, "exec-ioschedulingclass-idle.service", 0, CLD_EXITED);
+ test(m, "exec-ioschedulingclass-best-effort.service", 0, CLD_EXITED);
+
+ if (detect_container() > 0) {
+ log_notice("Testing in container, skipping remaining tests in %s", __func__);
+ return;
+ }
+ test(m, "exec-ioschedulingclass-realtime.service", 0, CLD_EXITED);
+}
+
+static void test_exec_unsetenvironment(Manager *m) {
+ test(m, "exec-unsetenvironment.service", 0, CLD_EXITED);
+}
+
+static void test_exec_specifier(Manager *m) {
+ test(m, "exec-specifier.service", 0, CLD_EXITED);
+ test(m, "exec-specifier@foo-bar.service", 0, CLD_EXITED);
+ test(m, "exec-specifier-interpolation.service", 0, CLD_EXITED);
+}
+
+static void test_exec_standardinput(Manager *m) {
+ test(m, "exec-standardinput-data.service", 0, CLD_EXITED);
+ test(m, "exec-standardinput-file.service", 0, CLD_EXITED);
+ test(m, "exec-standardinput-file-cat.service", 0, CLD_EXITED);
+}
+
+static void test_exec_standardoutput(Manager *m) {
+ test(m, "exec-standardoutput-file.service", 0, CLD_EXITED);
+}
+
+static void test_exec_standardoutput_append(Manager *m) {
+ test(m, "exec-standardoutput-append.service", 0, CLD_EXITED);
+}
+
+static void test_exec_condition(Manager *m) {
+ test_service(m, "exec-condition-failed.service", SERVICE_FAILURE_EXIT_CODE);
+ test_service(m, "exec-condition-skip.service", SERVICE_SKIP_CONDITION);
+}
+
+typedef struct test_entry {
+ test_function_t f;
+ const char *name;
+} test_entry;
+
+#define entry(x) {x, #x}
+
+static int run_tests(UnitFileScope scope, const test_entry tests[], char **patterns) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ int r;
+
+ assert_se(tests);
+
+ r = manager_new(scope, MANAGER_TEST_RUN_BASIC, &m);
+ m->default_std_output = EXEC_OUTPUT_NULL; /* don't rely on host journald */
+ if (manager_errno_skip_test(r))
+ return log_tests_skipped_errno(r, "manager_new");
+ assert_se(r >= 0);
+ assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+ for (const test_entry *test = tests; test->f; test++)
+ if (strv_fnmatch_or_empty(patterns, test->name, FNM_NOESCAPE))
+ test->f(m);
+ else
+ log_info("Skipping %s because it does not match any pattern.", test->name);
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+
+ static const test_entry user_tests[] = {
+ entry(test_exec_basic),
+ entry(test_exec_ambientcapabilities),
+ entry(test_exec_bindpaths),
+ entry(test_exec_capabilityboundingset),
+ entry(test_exec_condition),
+ entry(test_exec_cpuaffinity),
+ entry(test_exec_environment),
+ entry(test_exec_environmentfile),
+ entry(test_exec_group),
+ entry(test_exec_ignoresigpipe),
+ entry(test_exec_inaccessiblepaths),
+ entry(test_exec_ioschedulingclass),
+ entry(test_exec_oomscoreadjust),
+ entry(test_exec_passenvironment),
+ entry(test_exec_personality),
+ entry(test_exec_privatedevices),
+ entry(test_exec_privatenetwork),
+ entry(test_exec_privatetmp),
+ entry(test_exec_protecthome),
+ entry(test_exec_protectkernelmodules),
+ entry(test_exec_readonlypaths),
+ entry(test_exec_readwritepaths),
+ entry(test_exec_restrictnamespaces),
+ entry(test_exec_runtimedirectory),
+ entry(test_exec_standardinput),
+ entry(test_exec_standardoutput),
+ entry(test_exec_standardoutput_append),
+ entry(test_exec_supplementarygroups),
+ entry(test_exec_systemcallerrornumber),
+ entry(test_exec_systemcallfilter),
+ entry(test_exec_temporaryfilesystem),
+ entry(test_exec_umask),
+ entry(test_exec_unsetenvironment),
+ entry(test_exec_user),
+ entry(test_exec_workingdirectory),
+ {},
+ };
+ static const test_entry system_tests[] = {
+ entry(test_exec_dynamicuser),
+ entry(test_exec_specifier),
+ entry(test_exec_systemcallfilter_system),
+ {},
+ };
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+#if HAS_FEATURE_ADDRESS_SANITIZER
+ if (strstr_ptr(ci_environment(), "travis")) {
+ log_notice("Running on TravisCI under ASan, skipping, see https://github.com/systemd/systemd/issues/10696");
+ return EXIT_TEST_SKIP;
+ }
+#endif
+
+ assert_se(unsetenv("USER") == 0);
+ assert_se(unsetenv("LOGNAME") == 0);
+ assert_se(unsetenv("SHELL") == 0);
+ assert_se(unsetenv("HOME") == 0);
+ assert_se(unsetenv("TMPDIR") == 0);
+
+ can_unshare = have_namespaces();
+
+ /* It is needed otherwise cgroup creation fails */
+ if (getuid() != 0)
+ return log_tests_skipped("not root");
+
+ r = enter_cgroup_subroot(NULL);
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ _cleanup_free_ char *unit_dir = NULL;
+ assert_se(get_testdata_dir("test-execute/", &unit_dir) >= 0);
+ assert_se(set_unit_path(unit_dir) >= 0);
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+
+ /* Unset VAR1, VAR2 and VAR3 which are used in the PassEnvironment test
+ * cases, otherwise (and if they are present in the environment),
+ * `manager_default_environment` will copy them into the default
+ * environment which is passed to each created job, which will make the
+ * tests that expect those not to be present to fail.
+ */
+ assert_se(unsetenv("VAR1") == 0);
+ assert_se(unsetenv("VAR2") == 0);
+ assert_se(unsetenv("VAR3") == 0);
+
+ r = run_tests(UNIT_FILE_USER, user_tests, argv + 1);
+ if (r != 0)
+ return r;
+
+ r = run_tests(UNIT_FILE_SYSTEM, system_tests, argv + 1);
+ if (r != 0)
+ return r;
+
+#if HAVE_SECCOMP
+ /* The following tests are for 1beab8b0d0ff2d7d1436b52d4a0c3d56dc908962. */
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping unshare() filtered tests.");
+ return 0;
+ }
+
+ _cleanup_hashmap_free_ Hashmap *s = NULL;
+ assert_se(s = hashmap_new(NULL));
+ r = seccomp_syscall_resolve_name("unshare");
+ assert_se(r != __NR_SCMP_ERROR);
+ assert_se(hashmap_put(s, UINT32_TO_PTR(r + 1), INT_TO_PTR(-1)) >= 0);
+ assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, s, SCMP_ACT_ERRNO(EOPNOTSUPP), true) >= 0);
+ assert_se(unshare(CLONE_NEWNS) < 0);
+ assert_se(errno == EOPNOTSUPP);
+
+ can_unshare = false;
+
+ r = run_tests(UNIT_FILE_USER, user_tests, argv + 1);
+ if (r != 0)
+ return r;
+
+ return run_tests(UNIT_FILE_SYSTEM, system_tests, argv + 1);
+#else
+ return 0;
+#endif
+}
diff --git a/src/test/test-exit-status.c b/src/test/test-exit-status.c
new file mode 100644
index 0000000..4dc1973
--- /dev/null
+++ b/src/test/test-exit-status.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "exit-status.h"
+#include "string-util.h"
+#include "tests.h"
+
+static void test_exit_status_to_string(void) {
+ log_info("/* %s */", __func__);
+
+ for (int i = -1; i <= 256; i++) {
+ const char *s, *class;
+
+ s = exit_status_to_string(i, EXIT_STATUS_FULL);
+ class = exit_status_class(i);
+ log_info("%d: %s%s%s%s",
+ i, s ?: "-",
+ class ? " (" : "", strempty(class), class ? ")" : "");
+
+ if (s)
+ assert_se(exit_status_from_string(s) == i);
+ }
+}
+
+static void test_exit_status_from_string(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(exit_status_from_string("11") == 11);
+ assert_se(exit_status_from_string("-1") == -ERANGE);
+ assert_se(exit_status_from_string("256") == -ERANGE);
+ assert_se(exit_status_from_string("foo") == -EINVAL);
+ assert_se(exit_status_from_string("SUCCESS") == 0);
+ assert_se(exit_status_from_string("FAILURE") == 1);
+}
+
+static void test_exit_status_NUMA_POLICY(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(streq(exit_status_to_string(EXIT_NUMA_POLICY, EXIT_STATUS_FULL), "NUMA_POLICY"));
+ assert_se(streq(exit_status_to_string(EXIT_NUMA_POLICY, EXIT_STATUS_SYSTEMD), "NUMA_POLICY"));
+ assert_se(!exit_status_to_string(EXIT_NUMA_POLICY, EXIT_STATUS_BSD));
+ assert_se(!exit_status_to_string(EXIT_NUMA_POLICY, EXIT_STATUS_LSB));
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_exit_status_to_string();
+ test_exit_status_from_string();
+ test_exit_status_NUMA_POLICY();
+
+ return 0;
+}
diff --git a/src/test/test-extract-word.c b/src/test/test-extract-word.c
new file mode 100644
index 0000000..56b516f
--- /dev/null
+++ b/src/test/test-extract-word.c
@@ -0,0 +1,626 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdlib.h>
+
+#include "extract-word.h"
+#include "log.h"
+#include "string-util.h"
+
+static void test_extract_first_word(void) {
+ const char *p, *original;
+ char *t;
+
+ log_info("/* %s */", __func__);
+
+ p = original = "foobar waldo";
+ assert_se(extract_first_word(&p, &t, NULL, 0) > 0);
+ assert_se(streq(t, "foobar"));
+ free(t);
+ assert_se(p == original + 7);
+
+ assert_se(extract_first_word(&p, &t, NULL, 0) > 0);
+ assert_se(streq(t, "waldo"));
+ free(t);
+ assert_se(isempty(p));
+
+ assert_se(extract_first_word(&p, &t, NULL, 0) == 0);
+ assert_se(!t);
+ assert_se(isempty(p));
+
+ p = original = "\"foobar\" \'waldo\'";
+ assert_se(extract_first_word(&p, &t, NULL, 0) > 0);
+ assert_se(streq(t, "\"foobar\""));
+ free(t);
+ assert_se(p == original + 9);
+
+ assert_se(extract_first_word(&p, &t, NULL, 0) > 0);
+ assert_se(streq(t, "\'waldo\'"));
+ free(t);
+ assert_se(isempty(p));
+
+ assert_se(extract_first_word(&p, &t, NULL, 0) == 0);
+ assert_se(!t);
+ assert_se(isempty(p));
+
+ p = original = "\"foobar\" \'waldo\'";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_UNQUOTE) > 0);
+ assert_se(streq(t, "foobar"));
+ free(t);
+ assert_se(p == original + 9);
+
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_UNQUOTE) > 0);
+ assert_se(streq(t, "waldo"));
+ free(t);
+ assert_se(isempty(p));
+
+ assert_se(extract_first_word(&p, &t, NULL, 0) == 0);
+ assert_se(!t);
+ assert_se(isempty(p));
+
+ p = original = "\"";
+ assert_se(extract_first_word(&p, &t, NULL, 0) == 1);
+ assert_se(streq(t, "\""));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "\"";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_UNQUOTE) == -EINVAL);
+ assert_se(p == original + 1);
+
+ p = original = "\'";
+ assert_se(extract_first_word(&p, &t, NULL, 0) == 1);
+ assert_se(streq(t, "\'"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "\'";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_UNQUOTE) == -EINVAL);
+ assert_se(p == original + 1);
+
+ p = original = "\'fooo";
+ assert_se(extract_first_word(&p, &t, NULL, 0) == 1);
+ assert_se(streq(t, "\'fooo"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "KEY=val \"KEY2=val with space\" \"KEY3=val with \\\"quotation\\\"\"";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_UNQUOTE) == 1);
+ assert_se(streq(t, "KEY=val"));
+ free(t);
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_UNQUOTE) == 1);
+ assert_se(streq(t, "KEY2=val with space"));
+ free(t);
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_UNQUOTE) == 1);
+ assert_se(streq(t, "KEY3=val with \"quotation\""));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "KEY=val \"KEY2=val space\" \"KEY3=val with \\\"quotation\\\"\"";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_RETAIN_ESCAPE) == 1);
+ assert_se(streq(t, "KEY=val"));
+ free(t);
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_RETAIN_ESCAPE) == 1);
+ assert_se(streq(t, "\"KEY2=val"));
+ free(t);
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_RETAIN_ESCAPE) == 1);
+ assert_se(streq(t, "space\""));
+ free(t);
+ assert_se(startswith(p, "\"KEY3="));
+
+ p = original = "\'fooo";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_UNQUOTE) == -EINVAL);
+ assert_se(p == original + 5);
+
+ p = original = "\'fooo";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_UNQUOTE|EXTRACT_RELAX) > 0);
+ assert_se(streq(t, "fooo"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "\"fooo";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_UNQUOTE|EXTRACT_RELAX) > 0);
+ assert_se(streq(t, "fooo"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "yay\'foo\'bar";
+ assert_se(extract_first_word(&p, &t, NULL, 0) > 0);
+ assert_se(streq(t, "yay\'foo\'bar"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "yay\'foo\'bar";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_UNQUOTE) > 0);
+ assert_se(streq(t, "yayfoobar"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = " foobar ";
+ assert_se(extract_first_word(&p, &t, NULL, 0) > 0);
+ assert_se(streq(t, "foobar"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = " foo\\ba\\x6ar ";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE) > 0);
+ assert_se(streq(t, "foo\ba\x6ar"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = " foo\\ba\\x6ar ";
+ assert_se(extract_first_word(&p, &t, NULL, 0) > 0);
+ assert_se(streq(t, "foobax6ar"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = " f\\u00f6o \"pi\\U0001F4A9le\" ";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE) > 0);
+ assert_se(streq(t, "föo"));
+ free(t);
+ assert_se(p == original + 13);
+
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE) > 0);
+ assert_se(streq(t, "pi\360\237\222\251le"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "fooo\\";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_RELAX) > 0);
+ assert_se(streq(t, "fooo"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "fooo\\";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE_RELAX) > 0);
+ assert_se(streq(t, "fooo\\"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "fooo\\";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE_RELAX|EXTRACT_RELAX) > 0);
+ assert_se(streq(t, "fooo\\"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "fooo\\";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE|EXTRACT_CUNESCAPE_RELAX) > 0);
+ assert_se(streq(t, "fooo\\"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "\"foo\\";
+ assert_se(extract_first_word(&p, &t, NULL, 0) == -EINVAL);
+ assert_se(p == original + 5);
+
+ p = original = "\"foo\\";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_UNQUOTE|EXTRACT_RELAX) > 0);
+ assert_se(streq(t, "foo"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "foo::bar";
+ assert_se(extract_first_word(&p, &t, ":", 0) == 1);
+ assert_se(streq(t, "foo"));
+ free(t);
+ assert_se(p == original + 5);
+
+ assert_se(extract_first_word(&p, &t, ":", 0) == 1);
+ assert_se(streq(t, "bar"));
+ free(t);
+ assert_se(isempty(p));
+
+ assert_se(extract_first_word(&p, &t, ":", 0) == 0);
+ assert_se(!t);
+ assert_se(isempty(p));
+
+ p = original = "foo\\:bar::waldo";
+ assert_se(extract_first_word(&p, &t, ":", 0) == 1);
+ assert_se(streq(t, "foo:bar"));
+ free(t);
+ assert_se(p == original + 10);
+
+ assert_se(extract_first_word(&p, &t, ":", 0) == 1);
+ assert_se(streq(t, "waldo"));
+ free(t);
+ assert_se(isempty(p));
+
+ assert_se(extract_first_word(&p, &t, ":", 0) == 0);
+ assert_se(!t);
+ assert_se(isempty(p));
+
+ p = original = "\"foo\\";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE_RELAX) == -EINVAL);
+ assert_se(p == original + 5);
+
+ p = original = "\"foo\\";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE_RELAX|EXTRACT_RELAX) > 0);
+ assert_se(streq(t, "foo\\"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "\"foo\\";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE|EXTRACT_CUNESCAPE_RELAX|EXTRACT_RELAX) > 0);
+ assert_se(streq(t, "foo\\"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "fooo\\ bar quux";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_RELAX) > 0);
+ assert_se(streq(t, "fooo bar"));
+ free(t);
+ assert_se(p == original + 10);
+
+ p = original = "fooo\\ bar quux";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE_RELAX) > 0);
+ assert_se(streq(t, "fooo bar"));
+ free(t);
+ assert_se(p == original + 10);
+
+ p = original = "fooo\\ bar quux";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE_RELAX|EXTRACT_RELAX) > 0);
+ assert_se(streq(t, "fooo bar"));
+ free(t);
+ assert_se(p == original + 10);
+
+ p = original = "fooo\\ bar quux";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE) == -EINVAL);
+ assert_se(p == original + 5);
+
+ p = original = "fooo\\ bar quux";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE|EXTRACT_CUNESCAPE_RELAX) > 0);
+ assert_se(streq(t, "fooo\\ bar"));
+ free(t);
+ assert_se(p == original + 10);
+
+ p = original = "\\w+@\\K[\\d.]+";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE) == -EINVAL);
+ assert_se(p == original + 1);
+
+ p = original = "\\w+@\\K[\\d.]+";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE|EXTRACT_CUNESCAPE_RELAX) > 0);
+ assert_se(streq(t, "\\w+@\\K[\\d.]+"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "\\w+\\b";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_CUNESCAPE|EXTRACT_CUNESCAPE_RELAX) > 0);
+ assert_se(streq(t, "\\w+\b"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "-N ''";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_UNQUOTE) > 0);
+ assert_se(streq(t, "-N"));
+ free(t);
+ assert_se(p == original + 3);
+
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_UNQUOTE) > 0);
+ assert_se(streq(t, ""));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = ":foo\\:bar::waldo:";
+ assert_se(extract_first_word(&p, &t, ":", EXTRACT_DONT_COALESCE_SEPARATORS) == 1);
+ assert_se(t);
+ assert_se(streq(t, ""));
+ free(t);
+ assert_se(p == original + 1);
+
+ assert_se(extract_first_word(&p, &t, ":", EXTRACT_DONT_COALESCE_SEPARATORS) == 1);
+ assert_se(streq(t, "foo:bar"));
+ free(t);
+ assert_se(p == original + 10);
+
+ assert_se(extract_first_word(&p, &t, ":", EXTRACT_DONT_COALESCE_SEPARATORS) == 1);
+ assert_se(t);
+ assert_se(streq(t, ""));
+ free(t);
+ assert_se(p == original + 11);
+
+ assert_se(extract_first_word(&p, &t, ":", EXTRACT_DONT_COALESCE_SEPARATORS) == 1);
+ assert_se(streq(t, "waldo"));
+ free(t);
+ assert_se(p == original + 17);
+
+ assert_se(extract_first_word(&p, &t, ":", EXTRACT_DONT_COALESCE_SEPARATORS) == 1);
+ assert_se(streq(t, ""));
+ free(t);
+ assert_se(p == NULL);
+
+ assert_se(extract_first_word(&p, &t, ":", EXTRACT_DONT_COALESCE_SEPARATORS) == 0);
+ assert_se(!t);
+ assert_se(!p);
+
+ p = "foo\\xbar";
+ assert_se(extract_first_word(&p, &t, NULL, 0) > 0);
+ assert_se(streq(t, "fooxbar"));
+ free(t);
+ assert_se(p == NULL);
+
+ p = "foo\\xbar";
+ assert_se(extract_first_word(&p, &t, NULL, EXTRACT_RETAIN_ESCAPE) > 0);
+ assert_se(streq(t, "foo\\xbar"));
+ free(t);
+ assert_se(p == NULL);
+
+ p = "\\:";
+ assert_se(extract_first_word(&p, &t, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS) == 1);
+ assert_se(streq(t, ":"));
+ free(t);
+ assert_se(p == NULL);
+
+ p = "a\\:b";
+ assert_se(extract_first_word(&p, &t, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS) == 1);
+ assert_se(streq(t, "a:b"));
+ free(t);
+ assert_se(p == NULL);
+
+ p = "a\\ b:c";
+ assert_se(extract_first_word(&p, &t, WHITESPACE ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS) == 1);
+ assert_se(streq(t, "a b"));
+ free(t);
+ assert_se(extract_first_word(&p, &t, WHITESPACE ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS) == 1);
+ assert_se(streq(t, "c"));
+ free(t);
+ assert_se(p == NULL);
+
+ p = "\\:";
+ assert_se(extract_first_word(&p, &t, ":", EXTRACT_CUNESCAPE) == -EINVAL);
+
+ p = "a\\:b";
+ assert_se(extract_first_word(&p, &t, ":", EXTRACT_CUNESCAPE) == -EINVAL);
+ assert_se(extract_first_word(&p, &t, ":", EXTRACT_CUNESCAPE) == 1);
+ assert_se(streq(t, "b"));
+ free(t);
+
+ p = "a\\ b:c";
+ assert_se(extract_first_word(&p, &t, WHITESPACE ":", EXTRACT_CUNESCAPE) == -EINVAL);
+ assert_se(extract_first_word(&p, &t, WHITESPACE ":", EXTRACT_CUNESCAPE) == 1);
+ assert_se(streq(t, "b"));
+ free(t);
+ assert_se(extract_first_word(&p, &t, WHITESPACE ":", EXTRACT_CUNESCAPE) == 1);
+ assert_se(streq(t, "c"));
+ free(t);
+ assert_se(p == NULL);
+}
+
+static void test_extract_first_word_and_warn(void) {
+ const char *p, *original;
+ char *t;
+
+ log_info("/* %s */", __func__);
+
+ p = original = "foobar waldo";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, 0, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "foobar"));
+ free(t);
+ assert_se(p == original + 7);
+
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, 0, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "waldo"));
+ free(t);
+ assert_se(isempty(p));
+
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, 0, NULL, "fake", 1, original) == 0);
+ assert_se(!t);
+ assert_se(isempty(p));
+
+ p = original = "\"foobar\" \'waldo\'";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_UNQUOTE, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "foobar"));
+ free(t);
+ assert_se(p == original + 9);
+
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_UNQUOTE, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "waldo"));
+ free(t);
+ assert_se(isempty(p));
+
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, 0, NULL, "fake", 1, original) == 0);
+ assert_se(!t);
+ assert_se(isempty(p));
+
+ p = original = "\"";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_UNQUOTE, NULL, "fake", 1, original) == -EINVAL);
+ assert_se(p == original + 1);
+
+ p = original = "\'";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_UNQUOTE, NULL, "fake", 1, original) == -EINVAL);
+ assert_se(p == original + 1);
+
+ p = original = "\'fooo";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_UNQUOTE, NULL, "fake", 1, original) == -EINVAL);
+ assert_se(p == original + 5);
+
+ p = original = "\'fooo";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_UNQUOTE|EXTRACT_RELAX, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "fooo"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = " foo\\ba\\x6ar ";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_CUNESCAPE, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "foo\ba\x6ar"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = " foo\\ba\\x6ar ";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, 0, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "foobax6ar"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = " f\\u00f6o \"pi\\U0001F4A9le\" ";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_CUNESCAPE, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "föo"));
+ free(t);
+ assert_se(p == original + 13);
+
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "pi\360\237\222\251le"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "fooo\\";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_RELAX, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "fooo"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "fooo\\";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, 0, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "fooo\\"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "fooo\\";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_CUNESCAPE, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "fooo\\"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "\"foo\\";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_UNQUOTE, NULL, "fake", 1, original) == -EINVAL);
+ assert_se(p == original + 5);
+
+ p = original = "\"foo\\";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_UNQUOTE|EXTRACT_RELAX, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "foo"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "\"foo\\";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE, NULL, "fake", 1, original) == -EINVAL);
+ assert_se(p == original + 5);
+
+ p = original = "\"foo\\";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE|EXTRACT_RELAX, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "foo"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "fooo\\ bar quux";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_RELAX, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "fooo bar"));
+ free(t);
+ assert_se(p == original + 10);
+
+ p = original = "fooo\\ bar quux";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, 0, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "fooo bar"));
+ free(t);
+ assert_se(p == original + 10);
+
+ p = original = "fooo\\ bar quux";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_CUNESCAPE, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "fooo\\ bar"));
+ free(t);
+ assert_se(p == original + 10);
+
+ p = original = "\\w+@\\K[\\d.]+";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_CUNESCAPE, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "\\w+@\\K[\\d.]+"));
+ free(t);
+ assert_se(isempty(p));
+
+ p = original = "\\w+\\b";
+ assert_se(extract_first_word_and_warn(&p, &t, NULL, EXTRACT_CUNESCAPE, NULL, "fake", 1, original) > 0);
+ assert_se(streq(t, "\\w+\b"));
+ free(t);
+ assert_se(isempty(p));
+}
+
+static void test_extract_many_words(void) {
+ const char *p, *original;
+ char *a, *b, *c, *d, *e, *f;
+
+ log_info("/* %s */", __func__);
+
+ p = original = "foobar waldi piep";
+ assert_se(extract_many_words(&p, NULL, 0, &a, &b, &c, NULL) == 3);
+ assert_se(isempty(p));
+ assert_se(streq_ptr(a, "foobar"));
+ assert_se(streq_ptr(b, "waldi"));
+ assert_se(streq_ptr(c, "piep"));
+ free(a);
+ free(b);
+ free(c);
+
+ p = original = "foobar:waldi:piep ba1:ba2";
+ assert_se(extract_many_words(&p, ":" WHITESPACE, 0, &a, &b, &c, NULL) == 3);
+ assert_se(!isempty(p));
+ assert_se(streq_ptr(a, "foobar"));
+ assert_se(streq_ptr(b, "waldi"));
+ assert_se(streq_ptr(c, "piep"));
+ assert_se(extract_many_words(&p, ":" WHITESPACE, 0, &d, &e, &f, NULL) == 2);
+ assert_se(isempty(p));
+ assert_se(streq_ptr(d, "ba1"));
+ assert_se(streq_ptr(e, "ba2"));
+ assert_se(isempty(f));
+ free(a);
+ free(b);
+ free(c);
+ free(d);
+ free(e);
+ free(f);
+
+ p = original = "'foobar' wa\"ld\"i ";
+ assert_se(extract_many_words(&p, NULL, 0, &a, &b, &c, NULL) == 2);
+ assert_se(isempty(p));
+ assert_se(streq_ptr(a, "'foobar'"));
+ assert_se(streq_ptr(b, "wa\"ld\"i"));
+ assert_se(streq_ptr(c, NULL));
+ free(a);
+ free(b);
+
+ p = original = "'foobar' wa\"ld\"i ";
+ assert_se(extract_many_words(&p, NULL, EXTRACT_UNQUOTE, &a, &b, &c, NULL) == 2);
+ assert_se(isempty(p));
+ assert_se(streq_ptr(a, "foobar"));
+ assert_se(streq_ptr(b, "waldi"));
+ assert_se(streq_ptr(c, NULL));
+ free(a);
+ free(b);
+
+ p = original = "";
+ assert_se(extract_many_words(&p, NULL, 0, &a, &b, &c, NULL) == 0);
+ assert_se(isempty(p));
+ assert_se(streq_ptr(a, NULL));
+ assert_se(streq_ptr(b, NULL));
+ assert_se(streq_ptr(c, NULL));
+
+ p = original = " ";
+ assert_se(extract_many_words(&p, NULL, 0, &a, &b, &c, NULL) == 0);
+ assert_se(isempty(p));
+ assert_se(streq_ptr(a, NULL));
+ assert_se(streq_ptr(b, NULL));
+ assert_se(streq_ptr(c, NULL));
+
+ p = original = "foobar";
+ assert_se(extract_many_words(&p, NULL, 0, NULL) == 0);
+ assert_se(p == original);
+
+ p = original = "foobar waldi";
+ assert_se(extract_many_words(&p, NULL, 0, &a, NULL) == 1);
+ assert_se(p == original+7);
+ assert_se(streq_ptr(a, "foobar"));
+ free(a);
+
+ p = original = " foobar ";
+ assert_se(extract_many_words(&p, NULL, 0, &a, NULL) == 1);
+ assert_se(isempty(p));
+ assert_se(streq_ptr(a, "foobar"));
+ free(a);
+}
+
+int main(int argc, char *argv[]) {
+ log_parse_environment();
+ log_open();
+
+ test_extract_first_word();
+ test_extract_first_word_and_warn();
+ test_extract_many_words();
+
+ return 0;
+}
diff --git a/src/test/test-fd-util.c b/src/test/test-fd-util.c
new file mode 100644
index 0000000..bece89a
--- /dev/null
+++ b/src/test/test-fd-util.c
@@ -0,0 +1,431 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "rlimit-util.h"
+#include "serialize.h"
+#include "string-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+
+static void test_close_many(void) {
+ int fds[3];
+ char name0[] = "/tmp/test-close-many.XXXXXX";
+ char name1[] = "/tmp/test-close-many.XXXXXX";
+ char name2[] = "/tmp/test-close-many.XXXXXX";
+
+ fds[0] = mkostemp_safe(name0);
+ fds[1] = mkostemp_safe(name1);
+ fds[2] = mkostemp_safe(name2);
+
+ close_many(fds, 2);
+
+ assert_se(fcntl(fds[0], F_GETFD) == -1);
+ assert_se(fcntl(fds[1], F_GETFD) == -1);
+ assert_se(fcntl(fds[2], F_GETFD) >= 0);
+
+ safe_close(fds[2]);
+
+ unlink(name0);
+ unlink(name1);
+ unlink(name2);
+}
+
+static void test_close_nointr(void) {
+ char name[] = "/tmp/test-test-close_nointr.XXXXXX";
+ int fd;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(close_nointr(fd) >= 0);
+ assert_se(close_nointr(fd) < 0);
+
+ unlink(name);
+}
+
+static void test_same_fd(void) {
+ _cleanup_close_pair_ int p[2] = { -1, -1 };
+ _cleanup_close_ int a = -1, b = -1, c = -1;
+
+ assert_se(pipe2(p, O_CLOEXEC) >= 0);
+ assert_se((a = fcntl(p[0], F_DUPFD, 3)) >= 0);
+ assert_se((b = open("/dev/null", O_RDONLY|O_CLOEXEC)) >= 0);
+ assert_se((c = fcntl(a, F_DUPFD, 3)) >= 0);
+
+ assert_se(same_fd(p[0], p[0]) > 0);
+ assert_se(same_fd(p[1], p[1]) > 0);
+ assert_se(same_fd(a, a) > 0);
+ assert_se(same_fd(b, b) > 0);
+
+ assert_se(same_fd(a, p[0]) > 0);
+ assert_se(same_fd(p[0], a) > 0);
+ assert_se(same_fd(c, p[0]) > 0);
+ assert_se(same_fd(p[0], c) > 0);
+ assert_se(same_fd(a, c) > 0);
+ assert_se(same_fd(c, a) > 0);
+
+ assert_se(same_fd(p[0], p[1]) == 0);
+ assert_se(same_fd(p[1], p[0]) == 0);
+ assert_se(same_fd(p[0], b) == 0);
+ assert_se(same_fd(b, p[0]) == 0);
+ assert_se(same_fd(p[1], a) == 0);
+ assert_se(same_fd(a, p[1]) == 0);
+ assert_se(same_fd(p[1], b) == 0);
+ assert_se(same_fd(b, p[1]) == 0);
+
+ assert_se(same_fd(a, b) == 0);
+ assert_se(same_fd(b, a) == 0);
+}
+
+static void test_open_serialization_fd(void) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open_serialization_fd("test");
+ assert_se(fd >= 0);
+
+ assert_se(write(fd, "test\n", 5) == 5);
+}
+
+static void test_acquire_data_fd_one(unsigned flags) {
+ char wbuffer[196*1024 - 7];
+ char rbuffer[sizeof(wbuffer)];
+ int fd;
+
+ fd = acquire_data_fd("foo", 3, flags);
+ assert_se(fd >= 0);
+
+ zero(rbuffer);
+ assert_se(read(fd, rbuffer, sizeof(rbuffer)) == 3);
+ assert_se(streq(rbuffer, "foo"));
+
+ fd = safe_close(fd);
+
+ fd = acquire_data_fd("", 0, flags);
+ assert_se(fd >= 0);
+
+ zero(rbuffer);
+ assert_se(read(fd, rbuffer, sizeof(rbuffer)) == 0);
+ assert_se(streq(rbuffer, ""));
+
+ fd = safe_close(fd);
+
+ random_bytes(wbuffer, sizeof(wbuffer));
+
+ fd = acquire_data_fd(wbuffer, sizeof(wbuffer), flags);
+ assert_se(fd >= 0);
+
+ zero(rbuffer);
+ assert_se(read(fd, rbuffer, sizeof(rbuffer)) == sizeof(rbuffer));
+ assert_se(memcmp(rbuffer, wbuffer, sizeof(rbuffer)) == 0);
+
+ fd = safe_close(fd);
+}
+
+static void test_acquire_data_fd(void) {
+
+ test_acquire_data_fd_one(0);
+ test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL);
+ test_acquire_data_fd_one(ACQUIRE_NO_MEMFD);
+ test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_MEMFD);
+ test_acquire_data_fd_one(ACQUIRE_NO_PIPE);
+ test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_PIPE);
+ test_acquire_data_fd_one(ACQUIRE_NO_MEMFD|ACQUIRE_NO_PIPE);
+ test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_MEMFD|ACQUIRE_NO_PIPE);
+ test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_MEMFD|ACQUIRE_NO_PIPE|ACQUIRE_NO_TMPFILE);
+}
+
+static void test_fd_move_above_stdio(void) {
+ int original_stdin, new_fd;
+
+ original_stdin = fcntl(0, F_DUPFD, 3);
+ assert_se(original_stdin >= 3);
+ assert_se(close_nointr(0) != EBADF);
+
+ new_fd = open("/dev/null", O_RDONLY);
+ assert_se(new_fd == 0);
+
+ new_fd = fd_move_above_stdio(new_fd);
+ assert_se(new_fd >= 3);
+
+ assert_se(dup(original_stdin) == 0);
+ assert_se(close_nointr(original_stdin) != EBADF);
+ assert_se(close_nointr(new_fd) != EBADF);
+}
+
+static void test_rearrange_stdio(void) {
+ pid_t pid;
+ int r;
+
+ r = safe_fork("rearrange", FORK_WAIT|FORK_LOG, &pid);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ _cleanup_free_ char *path = NULL;
+ char buffer[10];
+
+ /* Child */
+
+ safe_close(STDERR_FILENO); /* Let's close an fd < 2, to make it more interesting */
+
+ assert_se(rearrange_stdio(-1, -1, -1) >= 0);
+
+ assert_se(fd_get_path(STDIN_FILENO, &path) >= 0);
+ assert_se(path_equal(path, "/dev/null"));
+ path = mfree(path);
+
+ assert_se(fd_get_path(STDOUT_FILENO, &path) >= 0);
+ assert_se(path_equal(path, "/dev/null"));
+ path = mfree(path);
+
+ assert_se(fd_get_path(STDOUT_FILENO, &path) >= 0);
+ assert_se(path_equal(path, "/dev/null"));
+ path = mfree(path);
+
+ safe_close(STDIN_FILENO);
+ safe_close(STDOUT_FILENO);
+ safe_close(STDERR_FILENO);
+
+ {
+ int pair[2];
+ assert_se(pipe(pair) >= 0);
+ assert_se(pair[0] == 0);
+ assert_se(pair[1] == 1);
+ assert_se(fd_move_above_stdio(0) == 3);
+ }
+ assert_se(open("/dev/full", O_WRONLY|O_CLOEXEC) == 0);
+ assert_se(acquire_data_fd("foobar", 6, 0) == 2);
+
+ assert_se(rearrange_stdio(2, 0, 1) >= 0);
+
+ assert_se(write(1, "x", 1) < 0 && errno == ENOSPC);
+ assert_se(write(2, "z", 1) == 1);
+ assert_se(read(3, buffer, sizeof(buffer)) == 1);
+ assert_se(buffer[0] == 'z');
+ assert_se(read(0, buffer, sizeof(buffer)) == 6);
+ assert_se(memcmp(buffer, "foobar", 6) == 0);
+
+ assert_se(rearrange_stdio(-1, 1, 2) >= 0);
+ assert_se(write(1, "a", 1) < 0 && errno == ENOSPC);
+ assert_se(write(2, "y", 1) == 1);
+ assert_se(read(3, buffer, sizeof(buffer)) == 1);
+ assert_se(buffer[0] == 'y');
+
+ assert_se(fd_get_path(0, &path) >= 0);
+ assert_se(path_equal(path, "/dev/null"));
+ path = mfree(path);
+
+ _exit(EXIT_SUCCESS);
+ }
+}
+
+static void assert_equal_fd(int fd1, int fd2) {
+
+ for (;;) {
+ uint8_t a[4096], b[4096];
+ ssize_t x, y;
+
+ x = read(fd1, a, sizeof(a));
+ assert_se(x >= 0);
+
+ y = read(fd2, b, sizeof(b));
+ assert_se(y >= 0);
+
+ assert_se(x == y);
+
+ if (x == 0)
+ break;
+
+ assert_se(memcmp(a, b, x) == 0);
+ }
+}
+
+static void test_fd_duplicate_data_fd(void) {
+ _cleanup_close_ int fd1 = -1, fd2 = -1;
+ _cleanup_(close_pairp) int sfd[2] = { -1, -1 };
+ _cleanup_(sigkill_waitp) pid_t pid = -1;
+ uint64_t i, j;
+ int r;
+
+ fd1 = open("/etc/fstab", O_RDONLY|O_CLOEXEC);
+ if (fd1 >= 0) {
+
+ fd2 = fd_duplicate_data_fd(fd1);
+ assert_se(fd2 >= 0);
+
+ assert_se(lseek(fd1, 0, SEEK_SET) == 0);
+ assert_equal_fd(fd1, fd2);
+ }
+
+ fd1 = safe_close(fd1);
+ fd2 = safe_close(fd2);
+
+ fd1 = acquire_data_fd("hallo", 6, 0);
+ assert_se(fd1 >= 0);
+
+ fd2 = fd_duplicate_data_fd(fd1);
+ assert_se(fd2 >= 0);
+
+ safe_close(fd1);
+ fd1 = acquire_data_fd("hallo", 6, 0);
+ assert_se(fd1 >= 0);
+
+ assert_equal_fd(fd1, fd2);
+
+ fd1 = safe_close(fd1);
+ fd2 = safe_close(fd2);
+
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, sfd) >= 0);
+
+ r = safe_fork("(sd-pipe)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &pid);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ /* child */
+
+ sfd[0] = safe_close(sfd[0]);
+
+ for (i = 0; i < 1536*1024 / sizeof(uint64_t); i++)
+ assert_se(write(sfd[1], &i, sizeof(i)) == sizeof(i));
+
+ sfd[1] = safe_close(sfd[1]);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ sfd[1] = safe_close(sfd[1]);
+
+ fd2 = fd_duplicate_data_fd(sfd[0]);
+ assert_se(fd2 >= 0);
+
+ for (i = 0; i < 1536*1024 / sizeof(uint64_t); i++) {
+ assert_se(read(fd2, &j, sizeof(j)) == sizeof(j));
+ assert_se(i == j);
+ }
+
+ assert_se(read(fd2, &j, sizeof(j)) == 0);
+}
+
+static void test_read_nr_open(void) {
+ log_info("nr-open: %i", read_nr_open());
+}
+
+static size_t validate_fds(
+ bool opened,
+ const int *fds,
+ size_t n_fds) {
+
+ size_t c = 0;
+
+ /* Validates that fds in the specified array are one of the following three:
+ *
+ * 1. < 0 (test is skipped) or
+ * 2. opened (if 'opened' param is true) or
+ * 3. closed (if 'opened' param is false)
+ */
+
+ for (size_t i = 0; i < n_fds; i++) {
+ if (fds[i] < 0)
+ continue;
+
+ if (opened)
+ assert_se(fcntl(fds[i], F_GETFD) >= 0);
+ else
+ assert_se(fcntl(fds[i], F_GETFD) < 0 && errno == EBADF);
+
+ c++;
+ }
+
+ return c; /* Return number of fds >= 0 in the array */
+}
+
+static void test_close_all_fds(void) {
+ _cleanup_free_ int *fds = NULL, *keep = NULL;
+ struct rlimit rl;
+ size_t n_fds, n_keep;
+
+ log_info("/* %s */", __func__);
+
+ rlimit_nofile_bump(-1);
+
+ assert_se(getrlimit(RLIMIT_NOFILE, &rl) >= 0);
+ assert_se(rl.rlim_cur > 10);
+
+ /* Try to use 5000 fds, but when we can't bump the rlimit to make that happen use the whole limit minus 10 */
+ n_fds = MIN((rl.rlim_cur & ~1U) - 10U, 5000U);
+ assert_se((n_fds & 1U) == 0U); /* make sure even number of fds */
+
+ /* Allocate the determined number of fds, always two at a time */
+ assert_se(fds = new(int, n_fds));
+ for (size_t i = 0; i < n_fds; i += 2)
+ assert_se(pipe2(fds + i, O_CLOEXEC) >= 0);
+
+ /* Validate this worked */
+ assert_se(validate_fds(true, fds, n_fds) == n_fds);
+
+ /* Randomized number of fds to keep, but at most every second */
+ n_keep = (random_u64() % (n_fds / 2));
+
+ /* Now randomly select a number of fds from the array above to keep */
+ assert_se(keep = new(int, n_keep));
+ for (size_t k = 0; k < n_keep; k++) {
+ for (;;) {
+ size_t p;
+
+ p = random_u64() % n_fds;
+ if (fds[p] >= 0) {
+ keep[k] = TAKE_FD(fds[p]);
+ break;
+ }
+ }
+ }
+
+ /* Check that all fds from both arrays are still open, and test how many in each are >= 0 */
+ assert_se(validate_fds(true, fds, n_fds) == n_fds - n_keep);
+ assert_se(validate_fds(true, keep, n_keep) == n_keep);
+
+ /* Close logging fd first, so that we don't confuse it by closing its fd */
+ log_close();
+ log_set_open_when_needed(true);
+
+ /* Close all but the ones to keep */
+ assert_se(close_all_fds(keep, n_keep) >= 0);
+
+ assert_se(validate_fds(false, fds, n_fds) == n_fds - n_keep);
+ assert_se(validate_fds(true, keep, n_keep) == n_keep);
+
+ /* Close everything else too! */
+ assert_se(close_all_fds(NULL, 0) >= 0);
+
+ assert_se(validate_fds(false, fds, n_fds) == n_fds - n_keep);
+ assert_se(validate_fds(false, keep, n_keep) == n_keep);
+
+ log_set_open_when_needed(false);
+ log_open();
+}
+
+int main(int argc, char *argv[]) {
+
+ test_setup_logging(LOG_DEBUG);
+
+ test_close_many();
+ test_close_nointr();
+ test_same_fd();
+ test_open_serialization_fd();
+ test_acquire_data_fd();
+ test_fd_move_above_stdio();
+ test_rearrange_stdio();
+ test_fd_duplicate_data_fd();
+ test_read_nr_open();
+ test_close_all_fds();
+
+ return 0;
+}
diff --git a/src/test/test-fdset.c b/src/test/test-fdset.c
new file mode 100644
index 0000000..c77d7ff
--- /dev/null
+++ b/src/test/test-fdset.c
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "fdset.h"
+#include "macro.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+static void test_fdset_new_fill(void) {
+ int fd = -1;
+ _cleanup_fdset_free_ FDSet *fdset = NULL;
+ char name[] = "/tmp/test-fdset_new_fill.XXXXXX";
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(fdset_new_fill(&fdset) >= 0);
+ assert_se(fdset_contains(fdset, fd));
+
+ unlink(name);
+}
+
+static void test_fdset_put_dup(void) {
+ _cleanup_close_ int fd = -1;
+ int copyfd = -1;
+ _cleanup_fdset_free_ FDSet *fdset = NULL;
+ char name[] = "/tmp/test-fdset_put_dup.XXXXXX";
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+
+ fdset = fdset_new();
+ assert_se(fdset);
+ copyfd = fdset_put_dup(fdset, fd);
+ assert_se(copyfd >= 0 && copyfd != fd);
+ assert_se(fdset_contains(fdset, copyfd));
+ assert_se(!fdset_contains(fdset, fd));
+
+ unlink(name);
+}
+
+static void test_fdset_cloexec(void) {
+ int fd = -1;
+ _cleanup_fdset_free_ FDSet *fdset = NULL;
+ int flags = -1;
+ char name[] = "/tmp/test-fdset_cloexec.XXXXXX";
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+
+ fdset = fdset_new();
+ assert_se(fdset);
+ assert_se(fdset_put(fdset, fd));
+
+ assert_se(fdset_cloexec(fdset, false) >= 0);
+ flags = fcntl(fd, F_GETFD);
+ assert_se(flags >= 0);
+ assert_se(!(flags & FD_CLOEXEC));
+
+ assert_se(fdset_cloexec(fdset, true) >= 0);
+ flags = fcntl(fd, F_GETFD);
+ assert_se(flags >= 0);
+ assert_se(flags & FD_CLOEXEC);
+
+ unlink(name);
+}
+
+static void test_fdset_close_others(void) {
+ int fd = -1;
+ int copyfd = -1;
+ _cleanup_fdset_free_ FDSet *fdset = NULL;
+ int flags = -1;
+ char name[] = "/tmp/test-fdset_close_others.XXXXXX";
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+
+ fdset = fdset_new();
+ assert_se(fdset);
+ copyfd = fdset_put_dup(fdset, fd);
+ assert_se(copyfd >= 0);
+
+ assert_se(fdset_close_others(fdset) >= 0);
+ flags = fcntl(fd, F_GETFD);
+ assert_se(flags < 0);
+ flags = fcntl(copyfd, F_GETFD);
+ assert_se(flags >= 0);
+
+ unlink(name);
+}
+
+static void test_fdset_remove(void) {
+ _cleanup_close_ int fd = -1;
+ FDSet *fdset = NULL;
+ char name[] = "/tmp/test-fdset_remove.XXXXXX";
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+
+ fdset = fdset_new();
+ assert_se(fdset);
+ assert_se(fdset_put(fdset, fd) >= 0);
+ assert_se(fdset_remove(fdset, fd) >= 0);
+ assert_se(!fdset_contains(fdset, fd));
+ fdset_free(fdset);
+
+ assert_se(fcntl(fd, F_GETFD) >= 0);
+
+ unlink(name);
+}
+
+static void test_fdset_iterate(void) {
+ int fd = -1;
+ FDSet *fdset = NULL;
+ char name[] = "/tmp/test-fdset_iterate.XXXXXX";
+ int c = 0;
+ int a;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+
+ fdset = fdset_new();
+ assert_se(fdset);
+ assert_se(fdset_put(fdset, fd) >= 0);
+ assert_se(fdset_put(fdset, fd) >= 0);
+ assert_se(fdset_put(fdset, fd) >= 0);
+
+ FDSET_FOREACH(a, fdset) {
+ c++;
+ assert_se(a == fd);
+ }
+ assert_se(c == 1);
+
+ fdset_free(fdset);
+
+ unlink(name);
+}
+
+static void test_fdset_isempty(void) {
+ int fd;
+ _cleanup_fdset_free_ FDSet *fdset = NULL;
+ char name[] = "/tmp/test-fdset_isempty.XXXXXX";
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+
+ fdset = fdset_new();
+ assert_se(fdset);
+
+ assert_se(fdset_isempty(fdset));
+ assert_se(fdset_put(fdset, fd) >= 0);
+ assert_se(!fdset_isempty(fdset));
+
+ unlink(name);
+}
+
+static void test_fdset_steal_first(void) {
+ int fd;
+ _cleanup_fdset_free_ FDSet *fdset = NULL;
+ char name[] = "/tmp/test-fdset_steal_first.XXXXXX";
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+
+ fdset = fdset_new();
+ assert_se(fdset);
+
+ assert_se(fdset_steal_first(fdset) < 0);
+ assert_se(fdset_put(fdset, fd) >= 0);
+ assert_se(fdset_steal_first(fdset) == fd);
+ assert_se(fdset_steal_first(fdset) < 0);
+ assert_se(fdset_put(fdset, fd) >= 0);
+
+ unlink(name);
+}
+
+static void test_fdset_new_array(void) {
+ int fds[] = {10, 11, 12, 13};
+ _cleanup_fdset_free_ FDSet *fdset = NULL;
+
+ assert_se(fdset_new_array(&fdset, fds, 4) >= 0);
+ assert_se(fdset_size(fdset) == 4);
+ assert_se(fdset_contains(fdset, 10));
+ assert_se(fdset_contains(fdset, 11));
+ assert_se(fdset_contains(fdset, 12));
+ assert_se(fdset_contains(fdset, 13));
+}
+
+int main(int argc, char *argv[]) {
+ test_fdset_new_fill();
+ test_fdset_put_dup();
+ test_fdset_cloexec();
+ test_fdset_close_others();
+ test_fdset_remove();
+ test_fdset_iterate();
+ test_fdset_isempty();
+ test_fdset_steal_first();
+ test_fdset_new_array();
+
+ return 0;
+}
diff --git a/src/test/test-fileio.c b/src/test/test-fileio.c
new file mode 100644
index 0000000..431aea0
--- /dev/null
+++ b/src/test/test-fileio.c
@@ -0,0 +1,951 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "ctype.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "rm-rf.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+static void test_parse_env_file(void) {
+ _cleanup_(unlink_tempfilep) char
+ t[] = "/tmp/test-fileio-in-XXXXXX",
+ p[] = "/tmp/test-fileio-out-XXXXXX";
+ FILE *f;
+ _cleanup_free_ char *one = NULL, *two = NULL, *three = NULL, *four = NULL, *five = NULL,
+ *six = NULL, *seven = NULL, *eight = NULL, *nine = NULL, *ten = NULL,
+ *eleven = NULL, *twelve = NULL, *thirteen = NULL;
+ _cleanup_strv_free_ char **a = NULL, **b = NULL;
+ char **i;
+ unsigned k;
+ int r;
+
+ assert_se(fmkostemp_safe(t, "w", &f) == 0);
+ fputs("one=BAR \n"
+ "# comment\n"
+ " # comment \n"
+ " ; comment \n"
+ " two = bar \n"
+ "invalid line\n"
+ "invalid line #comment\n"
+ "three = \"333\n"
+ "xxxx\"\n"
+ "four = \'44\\\"44\'\n"
+ "five = \"55\\\"55\" \"FIVE\" cinco \n"
+ "six = seis sechs\\\n"
+ " sis\n"
+ "seven=\"sevenval\" #nocomment\n"
+ "eight=eightval #nocomment\n"
+ "export nine=nineval\n"
+ "ten=ignored\n"
+ "ten=ignored\n"
+ "ten=\n"
+ "eleven=\\value\n"
+ "twelve=\"\\value\"\n"
+ "thirteen='\\value'", f);
+
+ fflush(f);
+ fclose(f);
+
+ r = load_env_file(NULL, t, &a);
+ assert_se(r >= 0);
+
+ STRV_FOREACH(i, a)
+ log_info("Got: <%s>", *i);
+
+ assert_se(streq_ptr(a[0], "one=BAR"));
+ assert_se(streq_ptr(a[1], "two=bar"));
+ assert_se(streq_ptr(a[2], "three=333\nxxxx"));
+ assert_se(streq_ptr(a[3], "four=44\\\"44"));
+ assert_se(streq_ptr(a[4], "five=55\"55FIVEcinco"));
+ assert_se(streq_ptr(a[5], "six=seis sechs sis"));
+ assert_se(streq_ptr(a[6], "seven=sevenval#nocomment"));
+ assert_se(streq_ptr(a[7], "eight=eightval #nocomment"));
+ assert_se(streq_ptr(a[8], "export nine=nineval"));
+ assert_se(streq_ptr(a[9], "ten="));
+ assert_se(streq_ptr(a[10], "eleven=value"));
+ assert_se(streq_ptr(a[11], "twelve=\\value"));
+ assert_se(streq_ptr(a[12], "thirteen=\\value"));
+ assert_se(a[13] == NULL);
+
+ strv_env_clean(a);
+
+ k = 0;
+ STRV_FOREACH(i, b) {
+ log_info("Got2: <%s>", *i);
+ assert_se(streq(*i, a[k++]));
+ }
+
+ r = parse_env_file(
+ NULL, t,
+ "one", &one,
+ "two", &two,
+ "three", &three,
+ "four", &four,
+ "five", &five,
+ "six", &six,
+ "seven", &seven,
+ "eight", &eight,
+ "export nine", &nine,
+ "ten", &ten,
+ "eleven", &eleven,
+ "twelve", &twelve,
+ "thirteen", &thirteen);
+
+ assert_se(r >= 0);
+
+ log_info("one=[%s]", strna(one));
+ log_info("two=[%s]", strna(two));
+ log_info("three=[%s]", strna(three));
+ log_info("four=[%s]", strna(four));
+ log_info("five=[%s]", strna(five));
+ log_info("six=[%s]", strna(six));
+ log_info("seven=[%s]", strna(seven));
+ log_info("eight=[%s]", strna(eight));
+ log_info("export nine=[%s]", strna(nine));
+ log_info("ten=[%s]", strna(nine));
+ log_info("eleven=[%s]", strna(eleven));
+ log_info("twelve=[%s]", strna(twelve));
+ log_info("thirteen=[%s]", strna(thirteen));
+
+ assert_se(streq(one, "BAR"));
+ assert_se(streq(two, "bar"));
+ assert_se(streq(three, "333\nxxxx"));
+ assert_se(streq(four, "44\\\"44"));
+ assert_se(streq(five, "55\"55FIVEcinco"));
+ assert_se(streq(six, "seis sechs sis"));
+ assert_se(streq(seven, "sevenval#nocomment"));
+ assert_se(streq(eight, "eightval #nocomment"));
+ assert_se(streq(nine, "nineval"));
+ assert_se(ten == NULL);
+ assert_se(streq(eleven, "value"));
+ assert_se(streq(twelve, "\\value"));
+ assert_se(streq(thirteen, "\\value"));
+
+ {
+ /* prepare a temporary file to write the environment to */
+ _cleanup_close_ int fd = mkostemp_safe(p);
+ assert_se(fd >= 0);
+ }
+
+ r = write_env_file(p, a);
+ assert_se(r >= 0);
+
+ r = load_env_file(NULL, p, &b);
+ assert_se(r >= 0);
+}
+
+static void test_one_shell_var(const char *file, const char *variable, const char *value) {
+ _cleanup_free_ char *cmd = NULL, *from_shell = NULL;
+ _cleanup_pclose_ FILE *f = NULL;
+ size_t sz;
+
+ assert_se(cmd = strjoin(". ", file, " && /bin/echo -n \"$", variable, "\""));
+ assert_se(f = popen(cmd, "re"));
+ assert_se(read_full_stream(f, &from_shell, &sz) >= 0);
+ assert_se(sz == strlen(value));
+ assert_se(streq(from_shell, value));
+}
+
+static void test_parse_multiline_env_file(void) {
+ _cleanup_(unlink_tempfilep) char
+ t[] = "/tmp/test-fileio-in-XXXXXX",
+ p[] = "/tmp/test-fileio-out-XXXXXX";
+ FILE *f;
+ _cleanup_strv_free_ char **a = NULL, **b = NULL;
+ char **i;
+ int r;
+
+ assert_se(fmkostemp_safe(t, "w", &f) == 0);
+ fputs("one=BAR\\\n"
+ "\\ \\ \\ \\ VAR\\\n"
+ "\\\tGAR\n"
+ "#comment\n"
+ "two=\"bar\\\n"
+ " var\\\n"
+ "\tgar\"\n"
+ "#comment\n"
+ "tri=\"bar \\\n"
+ " var \\\n"
+ "\tgar \"\n", f);
+
+ assert_se(fflush_and_check(f) >= 0);
+ fclose(f);
+
+ test_one_shell_var(t, "one", "BAR VAR\tGAR");
+ test_one_shell_var(t, "two", "bar var\tgar");
+ test_one_shell_var(t, "tri", "bar var \tgar ");
+
+ r = load_env_file(NULL, t, &a);
+ assert_se(r >= 0);
+
+ STRV_FOREACH(i, a)
+ log_info("Got: <%s>", *i);
+
+ assert_se(streq_ptr(a[0], "one=BAR VAR\tGAR"));
+ assert_se(streq_ptr(a[1], "two=bar var\tgar"));
+ assert_se(streq_ptr(a[2], "tri=bar var \tgar "));
+ assert_se(a[3] == NULL);
+
+ {
+ _cleanup_close_ int fd = mkostemp_safe(p);
+ assert_se(fd >= 0);
+ }
+
+ r = write_env_file(p, a);
+ assert_se(r >= 0);
+
+ r = load_env_file(NULL, p, &b);
+ assert_se(r >= 0);
+}
+
+static void test_merge_env_file(void) {
+ _cleanup_(unlink_tempfilep) char t[] = "/tmp/test-fileio-XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **a = NULL;
+ char **i;
+ int r;
+
+ assert_se(fmkostemp_safe(t, "w", &f) == 0);
+ log_info("/* %s (%s) */", __func__, t);
+
+ r = write_string_stream(f,
+ "one=1 \n"
+ "twelve=${one}2\n"
+ "twentyone=2${one}\n"
+ "one=2\n"
+ "twentytwo=2${one}\n"
+ "xxx_minus_three=$xxx - 3\n"
+ "xxx=0x$one$one$one\n"
+ "yyy=${one:-fallback}\n"
+ "zzz=${one:+replacement}\n"
+ "zzzz=${foobar:-${nothing}}\n"
+ "zzzzz=${nothing:+${nothing}}\n"
+ , WRITE_STRING_FILE_AVOID_NEWLINE);
+ assert(r >= 0);
+
+ r = merge_env_file(&a, NULL, t);
+ assert_se(r >= 0);
+ strv_sort(a);
+
+ STRV_FOREACH(i, a)
+ log_info("Got: <%s>", *i);
+
+ assert_se(streq(a[0], "one=2"));
+ assert_se(streq(a[1], "twelve=12"));
+ assert_se(streq(a[2], "twentyone=21"));
+ assert_se(streq(a[3], "twentytwo=22"));
+ assert_se(streq(a[4], "xxx=0x222"));
+ assert_se(streq(a[5], "xxx_minus_three= - 3"));
+ assert_se(streq(a[6], "yyy=2"));
+ assert_se(streq(a[7], "zzz=replacement"));
+ assert_se(streq(a[8], "zzzz="));
+ assert_se(streq(a[9], "zzzzz="));
+ assert_se(a[10] == NULL);
+
+ r = merge_env_file(&a, NULL, t);
+ assert_se(r >= 0);
+ strv_sort(a);
+
+ STRV_FOREACH(i, a)
+ log_info("Got2: <%s>", *i);
+
+ assert_se(streq(a[0], "one=2"));
+ assert_se(streq(a[1], "twelve=12"));
+ assert_se(streq(a[2], "twentyone=21"));
+ assert_se(streq(a[3], "twentytwo=22"));
+ assert_se(streq(a[4], "xxx=0x222"));
+ assert_se(streq(a[5], "xxx_minus_three=0x222 - 3"));
+ assert_se(streq(a[6], "yyy=2"));
+ assert_se(streq(a[7], "zzz=replacement"));
+ assert_se(streq(a[8], "zzzz="));
+ assert_se(streq(a[9], "zzzzz="));
+ assert_se(a[10] == NULL);
+}
+
+static void test_merge_env_file_invalid(void) {
+ _cleanup_(unlink_tempfilep) char t[] = "/tmp/test-fileio-XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **a = NULL;
+ char **i;
+ int r;
+
+ assert_se(fmkostemp_safe(t, "w", &f) == 0);
+ log_info("/* %s (%s) */", __func__, t);
+
+ r = write_string_stream(f,
+ "unset one \n"
+ "unset one= \n"
+ "unset one=1 \n"
+ "one \n"
+ "one = \n"
+ "one two =\n"
+ "\x20two=\n"
+ "#comment=comment\n"
+ ";comment2=comment2\n"
+ "#\n"
+ "\n\n" /* empty line */
+ , WRITE_STRING_FILE_AVOID_NEWLINE);
+ assert(r >= 0);
+
+ r = merge_env_file(&a, NULL, t);
+ assert_se(r >= 0);
+
+ STRV_FOREACH(i, a)
+ log_info("Got: <%s>", *i);
+
+ assert_se(strv_isempty(a));
+}
+
+static void test_executable_is_script(void) {
+ _cleanup_(unlink_tempfilep) char t[] = "/tmp/test-fileio-XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+ char *command;
+ int r;
+
+ assert_se(fmkostemp_safe(t, "w", &f) == 0);
+ fputs("#! /bin/script -a -b \ngoo goo", f);
+ fflush(f);
+
+ r = executable_is_script(t, &command);
+ assert_se(r > 0);
+ assert_se(streq(command, "/bin/script"));
+ free(command);
+
+ r = executable_is_script("/bin/sh", &command);
+ assert_se(r == 0);
+
+ r = executable_is_script("/usr/bin/yum", &command);
+ assert_se(r > 0 || r == -ENOENT);
+ if (r > 0) {
+ assert_se(startswith(command, "/"));
+ free(command);
+ }
+}
+
+static void test_status_field(void) {
+ _cleanup_free_ char *t = NULL, *p = NULL, *s = NULL, *z = NULL;
+ unsigned long long total = 0, buffers = 0;
+ int r;
+
+ assert_se(get_proc_field("/proc/self/status", "Threads", WHITESPACE, &t) == 0);
+ puts(t);
+ assert_se(streq(t, "1"));
+
+ r = get_proc_field("/proc/meminfo", "MemTotal", WHITESPACE, &p);
+ if (r != -ENOENT) {
+ assert_se(r == 0);
+ puts(p);
+ assert_se(safe_atollu(p, &total) == 0);
+ }
+
+ r = get_proc_field("/proc/meminfo", "Buffers", WHITESPACE, &s);
+ if (r != -ENOENT) {
+ assert_se(r == 0);
+ puts(s);
+ assert_se(safe_atollu(s, &buffers) == 0);
+ }
+
+ if (p)
+ assert_se(buffers < total);
+
+ /* Seccomp should be a good test for field full of zeros. */
+ r = get_proc_field("/proc/meminfo", "Seccomp", WHITESPACE, &z);
+ if (r != -ENOENT) {
+ assert_se(r == 0);
+ puts(z);
+ assert_se(safe_atollu(z, &buffers) == 0);
+ }
+}
+
+static void test_capeff(void) {
+ int pid, p;
+
+ for (pid = 0; pid < 2; pid++) {
+ _cleanup_free_ char *capeff = NULL;
+ int r;
+
+ r = get_process_capeff(0, &capeff);
+ log_info("capeff: '%s' (r=%d)", capeff, r);
+
+ if (IN_SET(r, -ENOENT, -EPERM))
+ return;
+
+ assert_se(r == 0);
+ assert_se(*capeff);
+ p = capeff[strspn(capeff, HEXDIGITS)];
+ assert_se(!p || isspace(p));
+ }
+}
+
+static void test_write_string_stream(void) {
+ _cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-write_string_stream-XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+ int fd;
+ char buf[64];
+
+ fd = mkostemp_safe(fn);
+ assert_se(fd >= 0);
+
+ f = fdopen(fd, "r");
+ assert_se(f);
+ assert_se(write_string_stream(f, "boohoo", 0) < 0);
+ f = safe_fclose(f);
+
+ f = fopen(fn, "r+");
+ assert_se(f);
+
+ assert_se(write_string_stream(f, "boohoo", 0) == 0);
+ rewind(f);
+
+ assert_se(fgets(buf, sizeof(buf), f));
+ assert_se(streq(buf, "boohoo\n"));
+ f = safe_fclose(f);
+
+ f = fopen(fn, "w+");
+ assert_se(f);
+
+ assert_se(write_string_stream(f, "boohoo", WRITE_STRING_FILE_AVOID_NEWLINE) == 0);
+ rewind(f);
+
+ assert_se(fgets(buf, sizeof(buf), f));
+ printf(">%s<", buf);
+ assert_se(streq(buf, "boohoo"));
+}
+
+static void test_write_string_file(void) {
+ _cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-write_string_file-XXXXXX";
+ char buf[64] = {};
+ _cleanup_close_ int fd;
+
+ fd = mkostemp_safe(fn);
+ assert_se(fd >= 0);
+
+ assert_se(write_string_file(fn, "boohoo", WRITE_STRING_FILE_CREATE) == 0);
+
+ assert_se(read(fd, buf, sizeof(buf)) == 7);
+ assert_se(streq(buf, "boohoo\n"));
+}
+
+static void test_write_string_file_no_create(void) {
+ _cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-write_string_file_no_create-XXXXXX";
+ _cleanup_close_ int fd;
+ char buf[64] = {};
+
+ fd = mkostemp_safe(fn);
+ assert_se(fd >= 0);
+
+ assert_se(write_string_file("/a/file/which/does/not/exists/i/guess", "boohoo", 0) < 0);
+ assert_se(write_string_file(fn, "boohoo", 0) == 0);
+
+ assert_se(read(fd, buf, sizeof buf) == (ssize_t) strlen("boohoo\n"));
+ assert_se(streq(buf, "boohoo\n"));
+}
+
+static void test_write_string_file_verify(void) {
+ _cleanup_free_ char *buf = NULL, *buf2 = NULL;
+ int r;
+
+ r = read_one_line_file("/proc/version", &buf);
+ if (ERRNO_IS_PRIVILEGE(r))
+ return;
+ assert_se(r >= 0);
+ assert_se(buf2 = strjoin(buf, "\n"));
+
+ r = write_string_file("/proc/version", buf, 0);
+ assert_se(IN_SET(r, -EACCES, -EIO));
+ r = write_string_file("/proc/version", buf2, 0);
+ assert_se(IN_SET(r, -EACCES, -EIO));
+
+ assert_se(write_string_file("/proc/version", buf, WRITE_STRING_FILE_VERIFY_ON_FAILURE) == 0);
+ assert_se(write_string_file("/proc/version", buf2, WRITE_STRING_FILE_VERIFY_ON_FAILURE) == 0);
+
+ r = write_string_file("/proc/version", buf, WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_AVOID_NEWLINE);
+ assert_se(IN_SET(r, -EACCES, -EIO));
+ assert_se(write_string_file("/proc/version", buf2, WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_AVOID_NEWLINE) == 0);
+}
+
+static void test_load_env_file_pairs(void) {
+ _cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-load_env_file_pairs-XXXXXX";
+ int fd, r;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ char **k, **v;
+
+ fd = mkostemp_safe(fn);
+ assert_se(fd >= 0);
+
+ r = write_string_file(fn,
+ "NAME=\"Arch Linux\"\n"
+ "ID=arch\n"
+ "PRETTY_NAME=\"Arch Linux\"\n"
+ "ANSI_COLOR=\"0;36\"\n"
+ "HOME_URL=\"https://www.archlinux.org/\"\n"
+ "SUPPORT_URL=\"https://bbs.archlinux.org/\"\n"
+ "BUG_REPORT_URL=\"https://bugs.archlinux.org/\"\n",
+ WRITE_STRING_FILE_CREATE);
+ assert_se(r == 0);
+
+ f = fdopen(fd, "r");
+ assert_se(f);
+
+ r = load_env_file_pairs(f, fn, &l);
+ assert_se(r >= 0);
+
+ assert_se(strv_length(l) == 14);
+ STRV_FOREACH_PAIR(k, v, l) {
+ assert_se(STR_IN_SET(*k, "NAME", "ID", "PRETTY_NAME", "ANSI_COLOR", "HOME_URL", "SUPPORT_URL", "BUG_REPORT_URL"));
+ printf("%s=%s\n", *k, *v);
+ if (streq(*k, "NAME")) assert_se(streq(*v, "Arch Linux"));
+ if (streq(*k, "ID")) assert_se(streq(*v, "arch"));
+ if (streq(*k, "PRETTY_NAME")) assert_se(streq(*v, "Arch Linux"));
+ if (streq(*k, "ANSI_COLOR")) assert_se(streq(*v, "0;36"));
+ if (streq(*k, "HOME_URL")) assert_se(streq(*v, "https://www.archlinux.org/"));
+ if (streq(*k, "SUPPORT_URL")) assert_se(streq(*v, "https://bbs.archlinux.org/"));
+ if (streq(*k, "BUG_REPORT_URL")) assert_se(streq(*v, "https://bugs.archlinux.org/"));
+ }
+}
+
+static void test_search_and_fopen(void) {
+ const char *dirs[] = {"/tmp/foo/bar", "/tmp", NULL};
+
+ char name[] = "/tmp/test-search_and_fopen.XXXXXX";
+ int fd, r;
+ FILE *f;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ close(fd);
+
+ r = search_and_fopen(basename(name), "r", NULL, dirs, &f);
+ assert_se(r >= 0);
+ fclose(f);
+
+ r = search_and_fopen(name, "r", NULL, dirs, &f);
+ assert_se(r >= 0);
+ fclose(f);
+
+ r = search_and_fopen(basename(name), "r", "/", dirs, &f);
+ assert_se(r >= 0);
+ fclose(f);
+
+ r = search_and_fopen("/a/file/which/does/not/exist/i/guess", "r", NULL, dirs, &f);
+ assert_se(r < 0);
+ r = search_and_fopen("afilewhichdoesnotexistiguess", "r", NULL, dirs, &f);
+ assert_se(r < 0);
+
+ r = unlink(name);
+ assert_se(r == 0);
+
+ r = search_and_fopen(basename(name), "r", NULL, dirs, &f);
+ assert_se(r < 0);
+}
+
+static void test_search_and_fopen_nulstr(void) {
+ const char dirs[] = "/tmp/foo/bar\0/tmp\0";
+
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-search_and_fopen.XXXXXX";
+ int fd, r;
+ FILE *f;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ close(fd);
+
+ r = search_and_fopen_nulstr(basename(name), "r", NULL, dirs, &f);
+ assert_se(r >= 0);
+ fclose(f);
+
+ r = search_and_fopen_nulstr(name, "r", NULL, dirs, &f);
+ assert_se(r >= 0);
+ fclose(f);
+
+ r = search_and_fopen_nulstr("/a/file/which/does/not/exist/i/guess", "r", NULL, dirs, &f);
+ assert_se(r < 0);
+ r = search_and_fopen_nulstr("afilewhichdoesnotexistiguess", "r", NULL, dirs, &f);
+ assert_se(r < 0);
+
+ r = unlink(name);
+ assert_se(r == 0);
+
+ r = search_and_fopen_nulstr(basename(name), "r", NULL, dirs, &f);
+ assert_se(r < 0);
+}
+
+static void test_writing_tmpfile(void) {
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-systemd_writing_tmpfile.XXXXXX";
+ _cleanup_free_ char *contents = NULL;
+ size_t size;
+ _cleanup_close_ int fd = -1;
+ struct iovec iov[3];
+ int r;
+
+ iov[0] = IOVEC_MAKE_STRING("abc\n");
+ iov[1] = IOVEC_MAKE_STRING(ALPHANUMERICAL "\n");
+ iov[2] = IOVEC_MAKE_STRING("");
+
+ fd = mkostemp_safe(name);
+ printf("tmpfile: %s", name);
+
+ r = writev(fd, iov, 3);
+ assert_se(r >= 0);
+
+ r = read_full_file(name, &contents, &size);
+ assert_se(r == 0);
+ printf("contents: %s", contents);
+ assert_se(streq(contents, "abc\n" ALPHANUMERICAL "\n"));
+}
+
+static void test_tempfn(void) {
+ char *ret = NULL, *p;
+
+ assert_se(tempfn_xxxxxx("/foo/bar/waldo", NULL, &ret) >= 0);
+ assert_se(streq_ptr(ret, "/foo/bar/.#waldoXXXXXX"));
+ free(ret);
+
+ assert_se(tempfn_xxxxxx("/foo/bar/waldo", "[miau]", &ret) >= 0);
+ assert_se(streq_ptr(ret, "/foo/bar/.#[miau]waldoXXXXXX"));
+ free(ret);
+
+ assert_se(tempfn_random("/foo/bar/waldo", NULL, &ret) >= 0);
+ assert_se(p = startswith(ret, "/foo/bar/.#waldo"));
+ assert_se(strlen(p) == 16);
+ assert_se(in_charset(p, "0123456789abcdef"));
+ free(ret);
+
+ assert_se(tempfn_random("/foo/bar/waldo", "[wuff]", &ret) >= 0);
+ assert_se(p = startswith(ret, "/foo/bar/.#[wuff]waldo"));
+ assert_se(strlen(p) == 16);
+ assert_se(in_charset(p, "0123456789abcdef"));
+ free(ret);
+
+ assert_se(tempfn_random_child("/foo/bar/waldo", NULL, &ret) >= 0);
+ assert_se(p = startswith(ret, "/foo/bar/waldo/.#"));
+ assert_se(strlen(p) == 16);
+ assert_se(in_charset(p, "0123456789abcdef"));
+ free(ret);
+
+ assert_se(tempfn_random_child("/foo/bar/waldo", "[kikiriki]", &ret) >= 0);
+ assert_se(p = startswith(ret, "/foo/bar/waldo/.#[kikiriki]"));
+ assert_se(strlen(p) == 16);
+ assert_se(in_charset(p, "0123456789abcdef"));
+ free(ret);
+}
+
+static const char chars[] =
+ "Aąę„”\n루\377";
+
+DISABLE_WARNING_TYPE_LIMITS;
+
+static void test_fgetc(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char c;
+
+ f = fmemopen_unlocked((void*) chars, sizeof(chars), "re");
+ assert_se(f);
+
+ for (size_t i = 0; i < sizeof(chars); i++) {
+ assert_se(safe_fgetc(f, &c) == 1);
+ assert_se(c == chars[i]);
+
+ if (ungetc(c, f) == EOF) {
+ /* EOF is -1, and hence we can't push value 255 in this way – if char is signed */
+ assert_se(c == (char) EOF);
+ assert_se(CHAR_MIN == -128); /* verify that char is signed on this platform */
+ } else {
+ assert_se(safe_fgetc(f, &c) == 1);
+ assert_se(c == chars[i]);
+ }
+
+ /* But it works when we push it properly cast */
+ assert_se(ungetc((unsigned char) c, f) != EOF);
+ assert_se(safe_fgetc(f, &c) == 1);
+ assert_se(c == chars[i]);
+ }
+
+ assert_se(safe_fgetc(f, &c) == 0);
+}
+
+REENABLE_WARNING;
+
+static const char buffer[] =
+ "Some test data\n"
+ "루Non-ascii chars: ąę„”\n"
+ "terminators\r\n"
+ "and even more\n\r"
+ "now the same with a NUL\n\0"
+ "and more\r\0"
+ "and even more\r\n\0"
+ "and yet even more\n\r\0"
+ "With newlines, and a NUL byte\0"
+ "\n"
+ "an empty line\n"
+ "an ignored line\n"
+ "and a very long line that is supposed to be truncated, because it is so long\n";
+
+static void test_read_line_one_file(FILE *f) {
+ _cleanup_free_ char *line = NULL;
+
+ assert_se(read_line(f, (size_t) -1, &line) == 15 && streq(line, "Some test data"));
+ line = mfree(line);
+
+ assert_se(read_line(f, (size_t) -1, &line) > 0 && streq(line, "루Non-ascii chars: ąę„”"));
+ line = mfree(line);
+
+ assert_se(read_line(f, (size_t) -1, &line) == 13 && streq(line, "terminators"));
+ line = mfree(line);
+
+ assert_se(read_line(f, (size_t) -1, &line) == 15 && streq(line, "and even more"));
+ line = mfree(line);
+
+ assert_se(read_line(f, (size_t) -1, &line) == 25 && streq(line, "now the same with a NUL"));
+ line = mfree(line);
+
+ assert_se(read_line(f, (size_t) -1, &line) == 10 && streq(line, "and more"));
+ line = mfree(line);
+
+ assert_se(read_line(f, (size_t) -1, &line) == 16 && streq(line, "and even more"));
+ line = mfree(line);
+
+ assert_se(read_line(f, (size_t) -1, &line) == 20 && streq(line, "and yet even more"));
+ line = mfree(line);
+
+ assert_se(read_line(f, 1024, &line) == 30 && streq(line, "With newlines, and a NUL byte"));
+ line = mfree(line);
+
+ assert_se(read_line(f, 1024, &line) == 1 && streq(line, ""));
+ line = mfree(line);
+
+ assert_se(read_line(f, 1024, &line) == 14 && streq(line, "an empty line"));
+ line = mfree(line);
+
+ assert_se(read_line(f, (size_t) -1, NULL) == 16);
+
+ assert_se(read_line(f, 16, &line) == -ENOBUFS);
+ line = mfree(line);
+
+ /* read_line() stopped when it hit the limit, that means when we continue reading we'll read at the first
+ * character after the previous limit. Let's make use of that to continue our test. */
+ assert_se(read_line(f, 1024, &line) == 62 && streq(line, "line that is supposed to be truncated, because it is so long"));
+ line = mfree(line);
+
+ assert_se(read_line(f, 1024, &line) == 0 && streq(line, ""));
+}
+
+static void test_read_line(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ f = fmemopen_unlocked((void*) buffer, sizeof(buffer), "re");
+ assert_se(f);
+
+ test_read_line_one_file(f);
+}
+
+static void test_read_line2(void) {
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-fileio.XXXXXX";
+ int fd;
+ _cleanup_fclose_ FILE *f = NULL;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se((size_t) write(fd, buffer, sizeof(buffer)) == sizeof(buffer));
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(f = fdopen(fd, "r"));
+
+ test_read_line_one_file(f);
+}
+
+static void test_read_line3(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *line = NULL;
+ int r;
+
+ f = fopen("/proc/uptime", "re");
+ if (!f && IN_SET(errno, ENOENT, EPERM))
+ return;
+ assert_se(f);
+
+ r = read_line(f, LINE_MAX, &line);
+ assert_se(r >= 0);
+ if (r == 0)
+ assert_se(line && isempty(line));
+ else
+ assert_se((size_t) r == strlen(line) + 1);
+ assert_se(read_line(f, LINE_MAX, NULL) == 0);
+}
+
+static void test_read_line4(void) {
+ static const struct {
+ size_t length;
+ const char *string;
+ } eof_endings[] = {
+ /* Each of these will be followed by EOF and should generate the one same single string */
+ { 3, "foo" },
+ { 4, "foo\n" },
+ { 4, "foo\r" },
+ { 4, "foo\0" },
+ { 5, "foo\n\0" },
+ { 5, "foo\r\0" },
+ { 5, "foo\r\n" },
+ { 5, "foo\n\r" },
+ { 6, "foo\r\n\0" },
+ { 6, "foo\n\r\0" },
+ };
+
+ size_t i;
+ int r;
+
+ for (i = 0; i < ELEMENTSOF(eof_endings); i++) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *s = NULL;
+
+ assert_se(f = fmemopen_unlocked((void*) eof_endings[i].string, eof_endings[i].length, "r"));
+
+ r = read_line(f, (size_t) -1, &s);
+ assert_se((size_t) r == eof_endings[i].length);
+ assert_se(streq_ptr(s, "foo"));
+
+ assert_se(read_line(f, (size_t) -1, NULL) == 0); /* Ensure we hit EOF */
+ }
+}
+
+static void test_read_nul_string(void) {
+ static const char test[] = "string nr. 1\0"
+ "string nr. 2\n\0"
+ "\377empty string follows\0"
+ "\0"
+ "final string\n is empty\0"
+ "\0";
+
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *s = NULL;
+
+ assert_se(f = fmemopen_unlocked((void*) test, sizeof(test)-1, "r"));
+
+ assert_se(read_nul_string(f, LONG_LINE_MAX, &s) == 13 && streq_ptr(s, "string nr. 1"));
+ s = mfree(s);
+
+ assert_se(read_nul_string(f, LONG_LINE_MAX, &s) == 14 && streq_ptr(s, "string nr. 2\n"));
+ s = mfree(s);
+
+ assert_se(read_nul_string(f, LONG_LINE_MAX, &s) == 22 && streq_ptr(s, "\377empty string follows"));
+ s = mfree(s);
+
+ assert_se(read_nul_string(f, LONG_LINE_MAX, &s) == 1 && streq_ptr(s, ""));
+ s = mfree(s);
+
+ assert_se(read_nul_string(f, LONG_LINE_MAX, &s) == 23 && streq_ptr(s, "final string\n is empty"));
+ s = mfree(s);
+
+ assert_se(read_nul_string(f, LONG_LINE_MAX, &s) == 1 && streq_ptr(s, ""));
+ s = mfree(s);
+
+ assert_se(read_nul_string(f, LONG_LINE_MAX, &s) == 0 && streq_ptr(s, ""));
+}
+
+static void test_read_full_file_socket(void) {
+ _cleanup_(rm_rf_physical_and_freep) char *z = NULL;
+ _cleanup_close_ int listener = -1;
+ _cleanup_free_ char *data = NULL, *clientname = NULL;
+ union sockaddr_union sa;
+ const char *j;
+ size_t size;
+ pid_t pid;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ listener = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
+ assert_se(listener >= 0);
+
+ assert_se(mkdtemp_malloc(NULL, &z) >= 0);
+ j = strjoina(z, "/socket");
+
+ assert_se(sockaddr_un_set_path(&sa.un, j) >= 0);
+
+ assert_se(bind(listener, &sa.sa, SOCKADDR_UN_LEN(sa.un)) >= 0);
+ assert_se(listen(listener, 1) >= 0);
+
+ /* Bind the *client* socket to some randomized name, to verify that this works correctly. */
+ assert_se(asprintf(&clientname, "@%" PRIx64 "/test-bindname", random_u64()) >= 0);
+
+ r = safe_fork("(server)", FORK_DEATHSIG|FORK_LOG, &pid);
+ assert_se(r >= 0);
+ if (r == 0) {
+ union sockaddr_union peer = {};
+ socklen_t peerlen = sizeof(peer);
+ _cleanup_close_ int rfd = -1;
+ /* child */
+
+ rfd = accept4(listener, NULL, 0, SOCK_CLOEXEC);
+ assert_se(rfd >= 0);
+
+ assert_se(getpeername(rfd, &peer.sa, &peerlen) >= 0);
+
+ assert_se(peer.un.sun_family == AF_UNIX);
+ assert_se(peerlen > offsetof(struct sockaddr_un, sun_path));
+ assert_se(peer.un.sun_path[0] == 0);
+ assert_se(streq(peer.un.sun_path + 1, clientname + 1));
+
+#define TEST_STR "This is a test\nreally."
+
+ assert_se(write(rfd, TEST_STR, strlen(TEST_STR)) == strlen(TEST_STR));
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(read_full_file_full(AT_FDCWD, j, 0, NULL, &data, &size) == -ENXIO);
+ assert_se(read_full_file_full(AT_FDCWD, j, READ_FULL_FILE_CONNECT_SOCKET, clientname, &data, &size) >= 0);
+ assert_se(size == strlen(TEST_STR));
+ assert_se(streq(data, TEST_STR));
+
+ assert_se(wait_for_terminate_and_check("(server)", pid, WAIT_LOG) >= 0);
+#undef TEST_STR
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_parse_env_file();
+ test_parse_multiline_env_file();
+ test_merge_env_file();
+ test_merge_env_file_invalid();
+ test_executable_is_script();
+ test_status_field();
+ test_capeff();
+ test_write_string_stream();
+ test_write_string_file();
+ test_write_string_file_no_create();
+ test_write_string_file_verify();
+ test_load_env_file_pairs();
+ test_search_and_fopen();
+ test_search_and_fopen_nulstr();
+ test_writing_tmpfile();
+ test_tempfn();
+ test_fgetc();
+ test_read_line();
+ test_read_line2();
+ test_read_line3();
+ test_read_line4();
+ test_read_nul_string();
+ test_read_full_file_socket();
+
+ return 0;
+}
diff --git a/src/test/test-firewall-util.c b/src/test/test-firewall-util.c
new file mode 100644
index 0000000..64616e4
--- /dev/null
+++ b/src/test/test-firewall-util.c
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "firewall-util.h"
+#include "log.h"
+#include "tests.h"
+
+#define MAKE_IN_ADDR_UNION(a,b,c,d) (union in_addr_union) { .in.s_addr = htobe32((uint32_t) (a) << 24 | (uint32_t) (b) << 16 | (uint32_t) (c) << 8 | (uint32_t) (d))}
+
+int main(int argc, char *argv[]) {
+ int r;
+ test_setup_logging(LOG_DEBUG);
+
+ r = fw_add_masquerade(true, AF_INET, 0, NULL, 0, "foobar", NULL, 0);
+ if (r < 0)
+ log_error_errno(r, "Failed to modify firewall: %m");
+
+ r = fw_add_masquerade(true, AF_INET, 0, NULL, 0, "foobar", NULL, 0);
+ if (r < 0)
+ log_error_errno(r, "Failed to modify firewall: %m");
+
+ r = fw_add_masquerade(false, AF_INET, 0, NULL, 0, "foobar", NULL, 0);
+ if (r < 0)
+ log_error_errno(r, "Failed to modify firewall: %m");
+
+ r = fw_add_local_dnat(true, AF_INET, IPPROTO_TCP, NULL, NULL, 0, NULL, 0, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 4), 815, NULL);
+ if (r < 0)
+ log_error_errno(r, "Failed to modify firewall: %m");
+
+ r = fw_add_local_dnat(true, AF_INET, IPPROTO_TCP, NULL, NULL, 0, NULL, 0, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 4), 815, NULL);
+ if (r < 0)
+ log_error_errno(r, "Failed to modify firewall: %m");
+
+ r = fw_add_local_dnat(true, AF_INET, IPPROTO_TCP, NULL, NULL, 0, NULL, 0, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 5), 815, &MAKE_IN_ADDR_UNION(1, 2, 3, 4));
+ if (r < 0)
+ log_error_errno(r, "Failed to modify firewall: %m");
+
+ r = fw_add_local_dnat(false, AF_INET, IPPROTO_TCP, NULL, NULL, 0, NULL, 0, 4711, &MAKE_IN_ADDR_UNION(1, 2, 3, 5), 815, NULL);
+ if (r < 0)
+ log_error_errno(r, "Failed to modify firewall: %m");
+
+ return 0;
+}
diff --git a/src/test/test-format-table.c b/src/test/test-format-table.c
new file mode 100644
index 0000000..24ee1df
--- /dev/null
+++ b/src/test/test-format-table.c
@@ -0,0 +1,514 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "format-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+static void test_issue_9549(void) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ _cleanup_free_ char *formatted = NULL;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(table = table_new("name", "type", "ro", "usage", "created", "modified"));
+ assert_se(table_set_align_percent(table, TABLE_HEADER_CELL(3), 100) >= 0);
+ assert_se(table_add_many(table,
+ TABLE_STRING, "foooo",
+ TABLE_STRING, "raw",
+ TABLE_BOOLEAN, false,
+ TABLE_SIZE, (uint64_t) (673.7*1024*1024),
+ TABLE_STRING, "Wed 2018-07-11 00:10:33 JST",
+ TABLE_STRING, "Wed 2018-07-11 00:16:00 JST") >= 0);
+
+ table_set_width(table, 75);
+ assert_se(table_format(table, &formatted) >= 0);
+
+ printf("%s\n", formatted);
+ assert_se(streq(formatted,
+ "NAME TYPE RO USAGE CREATED MODIFIED \n"
+ "foooo raw no 673.6M Wed 2018-07-11 00:10:33 J… Wed 2018-07-11 00:16:00 JST\n"
+ ));
+}
+
+static void test_multiline(void) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ _cleanup_free_ char *formatted = NULL;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(table = table_new("foo", "bar"));
+
+ assert_se(table_set_align_percent(table, TABLE_HEADER_CELL(1), 100) >= 0);
+
+ assert_se(table_add_many(table,
+ TABLE_STRING, "three\ndifferent\nlines",
+ TABLE_STRING, "two\nlines\n") >= 0);
+
+ table_set_cell_height_max(table, 1);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three… two…\n"));
+ formatted = mfree(formatted);
+
+ table_set_cell_height_max(table, 2);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three two\n"
+ "different… lines\n"));
+ formatted = mfree(formatted);
+
+ table_set_cell_height_max(table, 3);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three two\n"
+ "different lines\n"
+ "lines \n"));
+ formatted = mfree(formatted);
+
+ table_set_cell_height_max(table, (size_t) -1);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three two\n"
+ "different lines\n"
+ "lines \n"));
+ formatted = mfree(formatted);
+
+ assert_se(table_add_many(table,
+ TABLE_STRING, "short",
+ TABLE_STRING, "a\npair") >= 0);
+
+ assert_se(table_add_many(table,
+ TABLE_STRING, "short2\n",
+ TABLE_STRING, "a\nfour\nline\ncell") >= 0);
+
+ table_set_cell_height_max(table, 1);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three… two…\n"
+ "short a…\n"
+ "short2 a…\n"));
+ formatted = mfree(formatted);
+
+ table_set_cell_height_max(table, 2);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three two\n"
+ "different… lines\n"
+ "short a\n"
+ " pair\n"
+ "short2 a\n"
+ " four…\n"));
+ formatted = mfree(formatted);
+
+ table_set_cell_height_max(table, 3);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three two\n"
+ "different lines\n"
+ "lines \n"
+ "short a\n"
+ " pair\n"
+ "short2 a\n"
+ " four\n"
+ " line…\n"));
+ formatted = mfree(formatted);
+
+ table_set_cell_height_max(table, (size_t) -1);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three two\n"
+ "different lines\n"
+ "lines \n"
+ "short a\n"
+ " pair\n"
+ "short2 a\n"
+ " four\n"
+ " line\n"
+ " cell\n"));
+ formatted = mfree(formatted);
+}
+
+static void test_strv(void) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ _cleanup_free_ char *formatted = NULL;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(table = table_new("foo", "bar"));
+
+ assert_se(table_set_align_percent(table, TABLE_HEADER_CELL(1), 100) >= 0);
+
+ assert_se(table_add_many(table,
+ TABLE_STRV, STRV_MAKE("three", "different", "lines"),
+ TABLE_STRV, STRV_MAKE("two", "lines")) >= 0);
+
+ table_set_cell_height_max(table, 1);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three… two…\n"));
+ formatted = mfree(formatted);
+
+ table_set_cell_height_max(table, 2);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three two\n"
+ "different… lines\n"));
+ formatted = mfree(formatted);
+
+ table_set_cell_height_max(table, 3);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three two\n"
+ "different lines\n"
+ "lines \n"));
+ formatted = mfree(formatted);
+
+ table_set_cell_height_max(table, (size_t) -1);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three two\n"
+ "different lines\n"
+ "lines \n"));
+ formatted = mfree(formatted);
+
+ assert_se(table_add_many(table,
+ TABLE_STRING, "short",
+ TABLE_STRV, STRV_MAKE("a", "pair")) >= 0);
+
+ assert_se(table_add_many(table,
+ TABLE_STRV, STRV_MAKE("short2"),
+ TABLE_STRV, STRV_MAKE("a", "four", "line", "cell")) >= 0);
+
+ table_set_cell_height_max(table, 1);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three… two…\n"
+ "short a…\n"
+ "short2 a…\n"));
+ formatted = mfree(formatted);
+
+ table_set_cell_height_max(table, 2);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three two\n"
+ "different… lines\n"
+ "short a\n"
+ " pair\n"
+ "short2 a\n"
+ " four…\n"));
+ formatted = mfree(formatted);
+
+ table_set_cell_height_max(table, 3);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three two\n"
+ "different lines\n"
+ "lines \n"
+ "short a\n"
+ " pair\n"
+ "short2 a\n"
+ " four\n"
+ " line…\n"));
+ formatted = mfree(formatted);
+
+ table_set_cell_height_max(table, (size_t) -1);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three two\n"
+ "different lines\n"
+ "lines \n"
+ "short a\n"
+ " pair\n"
+ "short2 a\n"
+ " four\n"
+ " line\n"
+ " cell\n"));
+ formatted = mfree(formatted);
+}
+
+static void test_strv_wrapped(void) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ _cleanup_free_ char *formatted = NULL;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(table = table_new("foo", "bar"));
+
+ assert_se(table_set_align_percent(table, TABLE_HEADER_CELL(1), 100) >= 0);
+
+ assert_se(table_add_many(table,
+ TABLE_STRV_WRAPPED, STRV_MAKE("three", "different", "lines"),
+ TABLE_STRV_WRAPPED, STRV_MAKE("two", "lines")) >= 0);
+
+ table_set_cell_height_max(table, 1);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three different lines two lines\n"));
+ formatted = mfree(formatted);
+
+ table_set_cell_height_max(table, 2);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three different lines two lines\n"));
+ formatted = mfree(formatted);
+
+ table_set_cell_height_max(table, 3);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three different lines two lines\n"));
+ formatted = mfree(formatted);
+
+ table_set_cell_height_max(table, (size_t) -1);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three different lines two lines\n"));
+ formatted = mfree(formatted);
+
+ assert_se(table_add_many(table,
+ TABLE_STRING, "short",
+ TABLE_STRV_WRAPPED, STRV_MAKE("a", "pair")) >= 0);
+
+ assert_se(table_add_many(table,
+ TABLE_STRV_WRAPPED, STRV_MAKE("short2"),
+ TABLE_STRV_WRAPPED, STRV_MAKE("a", "eight", "line", "ćęłł",
+ "___5___", "___6___", "___7___", "___8___")) >= 0);
+
+ table_set_cell_height_max(table, 1);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three different… two lines\n"
+ "short a pair\n"
+ "short2 a eight line ćęłł…\n"));
+ formatted = mfree(formatted);
+
+ table_set_cell_height_max(table, 2);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three different two lines\n"
+ "lines \n"
+ "short a pair\n"
+ "short2 a eight line ćęłł\n"
+ " ___5___ ___6___…\n"));
+ formatted = mfree(formatted);
+
+ table_set_cell_height_max(table, 3);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three different two lines\n"
+ "lines \n"
+ "short a pair\n"
+ "short2 a eight line ćęłł\n"
+ " ___5___ ___6___\n"
+ " ___7___ ___8___\n"));
+ formatted = mfree(formatted);
+
+ table_set_cell_height_max(table, (size_t) -1);
+ assert_se(table_format(table, &formatted) >= 0);
+ fputs(formatted, stdout);
+ assert_se(streq(formatted,
+ "FOO BAR\n"
+ "three different two lines\n"
+ "lines \n"
+ "short a pair\n"
+ "short2 a eight line ćęłł\n"
+ " ___5___ ___6___\n"
+ " ___7___ ___8___\n"));
+ formatted = mfree(formatted);
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(table_unrefp) Table *t = NULL;
+ _cleanup_free_ char *formatted = NULL;
+
+ assert_se(setenv("SYSTEMD_COLORS", "0", 1) >= 0);
+ assert_se(setenv("COLUMNS", "40", 1) >= 0);
+
+ assert_se(t = table_new("one", "two", "three"));
+
+ assert_se(table_set_align_percent(t, TABLE_HEADER_CELL(2), 100) >= 0);
+
+ assert_se(table_add_many(t,
+ TABLE_STRING, "xxx",
+ TABLE_STRING, "yyy",
+ TABLE_BOOLEAN, true) >= 0);
+
+ assert_se(table_add_many(t,
+ TABLE_STRING, "a long field",
+ TABLE_STRING, "yyy",
+ TABLE_SET_UPPERCASE, 1,
+ TABLE_BOOLEAN, false) >= 0);
+
+ assert_se(table_format(t, &formatted) >= 0);
+ printf("%s\n", formatted);
+
+ assert_se(streq(formatted,
+ "ONE TWO THREE\n"
+ "xxx yyy yes\n"
+ "a long field YYY no\n"));
+
+ formatted = mfree(formatted);
+
+ table_set_width(t, 40);
+
+ assert_se(table_format(t, &formatted) >= 0);
+ printf("%s\n", formatted);
+
+ assert_se(streq(formatted,
+ "ONE TWO THREE\n"
+ "xxx yyy yes\n"
+ "a long field YYY no\n"));
+
+ formatted = mfree(formatted);
+
+ table_set_width(t, 12);
+ assert_se(table_format(t, &formatted) >= 0);
+ printf("%s\n", formatted);
+
+ assert_se(streq(formatted,
+ "ONE TWO THR…\n"
+ "xxx yyy yes\n"
+ "a … YYY no\n"));
+
+ formatted = mfree(formatted);
+
+ table_set_width(t, 5);
+ assert_se(table_format(t, &formatted) >= 0);
+ printf("%s\n", formatted);
+
+ assert_se(streq(formatted,
+ "… … …\n"
+ "… … …\n"
+ "… … …\n"));
+
+ formatted = mfree(formatted);
+
+ table_set_width(t, 3);
+ assert_se(table_format(t, &formatted) >= 0);
+ printf("%s\n", formatted);
+
+ assert_se(streq(formatted,
+ "… … …\n"
+ "… … …\n"
+ "… … …\n"));
+
+ formatted = mfree(formatted);
+
+ table_set_width(t, (size_t) -1);
+ assert_se(table_set_sort(t, (size_t) 0, (size_t) 2, (size_t) -1) >= 0);
+
+ assert_se(table_format(t, &formatted) >= 0);
+ printf("%s\n", formatted);
+
+ assert_se(streq(formatted,
+ "ONE TWO THREE\n"
+ "a long field YYY no\n"
+ "xxx yyy yes\n"));
+
+ formatted = mfree(formatted);
+
+ table_set_header(t, false);
+
+ assert_se(table_add_many(t,
+ TABLE_STRING, "fäää",
+ TABLE_STRING, "uuu",
+ TABLE_BOOLEAN, true) >= 0);
+
+ assert_se(table_add_many(t,
+ TABLE_STRING, "fäää",
+ TABLE_STRING, "zzz",
+ TABLE_BOOLEAN, false) >= 0);
+
+ assert_se(table_add_many(t,
+ TABLE_EMPTY,
+ TABLE_SIZE, (uint64_t) 4711,
+ TABLE_TIMESPAN, (usec_t) 5*USEC_PER_MINUTE) >= 0);
+
+ assert_se(table_format(t, &formatted) >= 0);
+ printf("%s\n", formatted);
+
+ assert_se(streq(formatted,
+ "a long field YYY no\n"
+ "fäää zzz no\n"
+ "fäää uuu yes\n"
+ "xxx yyy yes\n"
+ " 4.6K 5min\n"));
+
+ formatted = mfree(formatted);
+
+ assert_se(table_set_display(t, (size_t) 2, (size_t) 0, (size_t) 2, (size_t) 0, (size_t) 0, (size_t) -1) >= 0);
+
+ assert_se(table_format(t, &formatted) >= 0);
+ printf("%s\n", formatted);
+
+ if (isatty(STDOUT_FILENO))
+ assert_se(streq(formatted,
+ " no a long f… no a long f… a long fi…\n"
+ " no fäää no fäää fäää \n"
+ " yes fäää yes fäää fäää \n"
+ " yes xxx yes xxx xxx \n"
+ "5min 5min \n"));
+ else
+ assert_se(streq(formatted,
+ " no a long field no a long field a long field\n"
+ " no fäää no fäää fäää \n"
+ " yes fäää yes fäää fäää \n"
+ " yes xxx yes xxx xxx \n"
+ "5min 5min \n"));
+
+ test_issue_9549();
+ test_multiline();
+ test_strv();
+ test_strv_wrapped();
+
+ return 0;
+}
diff --git a/src/test/test-format-util.c b/src/test/test-format-util.c
new file mode 100644
index 0000000..5562ac8
--- /dev/null
+++ b/src/test/test-format-util.c
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "format-util.h"
+#include "macro.h"
+#include "string-util.h"
+
+static void test_format_bytes_one(uint64_t val, bool trailing_B, const char *iec_with_p, const char *iec_without_p,
+ const char *si_with_p, const char *si_without_p) {
+ char buf[FORMAT_BYTES_MAX];
+
+ assert_se(streq_ptr(format_bytes_full(buf, sizeof buf, val, FORMAT_BYTES_USE_IEC | FORMAT_BYTES_BELOW_POINT | (trailing_B ? FORMAT_BYTES_TRAILING_B : 0)), iec_with_p));
+ assert_se(streq_ptr(format_bytes_full(buf, sizeof buf, val, FORMAT_BYTES_USE_IEC | (trailing_B ? FORMAT_BYTES_TRAILING_B : 0)), iec_without_p));
+ assert_se(streq_ptr(format_bytes_full(buf, sizeof buf, val, FORMAT_BYTES_BELOW_POINT | (trailing_B ? FORMAT_BYTES_TRAILING_B : 0)), si_with_p));
+ assert_se(streq_ptr(format_bytes_full(buf, sizeof buf, val, trailing_B ? FORMAT_BYTES_TRAILING_B : 0), si_without_p));
+}
+
+static void test_format_bytes(void) {
+ test_format_bytes_one(900, true, "900B", "900B", "900B", "900B");
+ test_format_bytes_one(900, false, "900", "900", "900", "900");
+ test_format_bytes_one(1023, true, "1023B", "1023B", "1.0K", "1K");
+ test_format_bytes_one(1023, false, "1023", "1023", "1.0K", "1K");
+ test_format_bytes_one(1024, true, "1.0K", "1K", "1.0K", "1K");
+ test_format_bytes_one(1024, false, "1.0K", "1K", "1.0K", "1K");
+ test_format_bytes_one(1100, true, "1.0K", "1K", "1.1K", "1K");
+ test_format_bytes_one(1500, true, "1.4K", "1K", "1.5K", "1K");
+ test_format_bytes_one(UINT64_C(3)*1024*1024, true, "3.0M", "3M", "3.1M", "3M");
+ test_format_bytes_one(UINT64_C(3)*1024*1024*1024, true, "3.0G", "3G", "3.2G", "3G");
+ test_format_bytes_one(UINT64_C(3)*1024*1024*1024*1024, true, "3.0T", "3T", "3.2T", "3T");
+ test_format_bytes_one(UINT64_C(3)*1024*1024*1024*1024*1024, true, "3.0P", "3P", "3.3P", "3P");
+ test_format_bytes_one(UINT64_C(3)*1024*1024*1024*1024*1024*1024, true, "3.0E", "3E", "3.4E", "3E");
+ test_format_bytes_one(UINT64_MAX, true, NULL, NULL, NULL, NULL);
+ test_format_bytes_one(UINT64_MAX, false, NULL, NULL, NULL, NULL);
+}
+
+int main(void) {
+ test_format_bytes();
+
+ return 0;
+}
diff --git a/src/test/test-fs-util.c b/src/test/test-fs-util.c
new file mode 100644
index 0000000..d1f9252
--- /dev/null
+++ b/src/test/test-fs-util.c
@@ -0,0 +1,857 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "id128-util.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+#include "util.h"
+#include "virt.h"
+
+static const char *arg_test_dir = NULL;
+
+static void test_chase_symlinks(void) {
+ _cleanup_free_ char *result = NULL;
+ char *temp;
+ const char *top, *p, *pslash, *q, *qslash;
+ struct stat st;
+ int r, pfd;
+
+ log_info("/* %s */", __func__);
+
+ temp = strjoina(arg_test_dir ?: "/tmp", "/test-chase.XXXXXX");
+ assert_se(mkdtemp(temp));
+
+ top = strjoina(temp, "/top");
+ assert_se(mkdir(top, 0700) >= 0);
+
+ p = strjoina(top, "/dot");
+ if (symlink(".", p) < 0) {
+ assert_se(IN_SET(errno, EINVAL, ENOSYS, ENOTTY, EPERM));
+ log_tests_skipped_errno(errno, "symlink() not possible");
+ goto cleanup;
+ };
+
+ p = strjoina(top, "/dotdot");
+ assert_se(symlink("..", p) >= 0);
+
+ p = strjoina(top, "/dotdota");
+ assert_se(symlink("../a", p) >= 0);
+
+ p = strjoina(temp, "/a");
+ assert_se(symlink("b", p) >= 0);
+
+ p = strjoina(temp, "/b");
+ assert_se(symlink("/usr", p) >= 0);
+
+ p = strjoina(temp, "/start");
+ assert_se(symlink("top/dot/dotdota", p) >= 0);
+
+ /* Paths that use symlinks underneath the "root" */
+
+ r = chase_symlinks(p, NULL, 0, &result, NULL);
+ assert_se(r > 0);
+ assert_se(path_equal(result, "/usr"));
+ result = mfree(result);
+
+ pslash = strjoina(p, "/");
+ r = chase_symlinks(pslash, NULL, 0, &result, NULL);
+ assert_se(r > 0);
+ assert_se(path_equal(result, "/usr/"));
+ result = mfree(result);
+
+ r = chase_symlinks(p, temp, 0, &result, NULL);
+ assert_se(r == -ENOENT);
+
+ r = chase_symlinks(pslash, temp, 0, &result, NULL);
+ assert_se(r == -ENOENT);
+
+ q = strjoina(temp, "/usr");
+
+ r = chase_symlinks(p, temp, CHASE_NONEXISTENT, &result, NULL);
+ assert_se(r == 0);
+ assert_se(path_equal(result, q));
+ result = mfree(result);
+
+ qslash = strjoina(q, "/");
+
+ r = chase_symlinks(pslash, temp, CHASE_NONEXISTENT, &result, NULL);
+ assert_se(r == 0);
+ assert_se(path_equal(result, qslash));
+ result = mfree(result);
+
+ assert_se(mkdir(q, 0700) >= 0);
+
+ r = chase_symlinks(p, temp, 0, &result, NULL);
+ assert_se(r > 0);
+ assert_se(path_equal(result, q));
+ result = mfree(result);
+
+ r = chase_symlinks(pslash, temp, 0, &result, NULL);
+ assert_se(r > 0);
+ assert_se(path_equal(result, qslash));
+ result = mfree(result);
+
+ p = strjoina(temp, "/slash");
+ assert_se(symlink("/", p) >= 0);
+
+ r = chase_symlinks(p, NULL, 0, &result, NULL);
+ assert_se(r > 0);
+ assert_se(path_equal(result, "/"));
+ result = mfree(result);
+
+ r = chase_symlinks(p, temp, 0, &result, NULL);
+ assert_se(r > 0);
+ assert_se(path_equal(result, temp));
+ result = mfree(result);
+
+ /* Paths that would "escape" outside of the "root" */
+
+ p = strjoina(temp, "/6dots");
+ assert_se(symlink("../../..", p) >= 0);
+
+ r = chase_symlinks(p, temp, 0, &result, NULL);
+ assert_se(r > 0 && path_equal(result, temp));
+ result = mfree(result);
+
+ p = strjoina(temp, "/6dotsusr");
+ assert_se(symlink("../../../usr", p) >= 0);
+
+ r = chase_symlinks(p, temp, 0, &result, NULL);
+ assert_se(r > 0 && path_equal(result, q));
+ result = mfree(result);
+
+ p = strjoina(temp, "/top/8dotsusr");
+ assert_se(symlink("../../../../usr", p) >= 0);
+
+ r = chase_symlinks(p, temp, 0, &result, NULL);
+ assert_se(r > 0 && path_equal(result, q));
+ result = mfree(result);
+
+ /* Paths that contain repeated slashes */
+
+ p = strjoina(temp, "/slashslash");
+ assert_se(symlink("///usr///", p) >= 0);
+
+ r = chase_symlinks(p, NULL, 0, &result, NULL);
+ assert_se(r > 0);
+ assert_se(path_equal(result, "/usr"));
+ assert_se(streq(result, "/usr")); /* we guarantee that we drop redundant slashes */
+ result = mfree(result);
+
+ r = chase_symlinks(p, temp, 0, &result, NULL);
+ assert_se(r > 0);
+ assert_se(path_equal(result, q));
+ result = mfree(result);
+
+ /* Paths underneath the "root" with different UIDs while using CHASE_SAFE */
+
+ if (geteuid() == 0) {
+ p = strjoina(temp, "/user");
+ assert_se(mkdir(p, 0755) >= 0);
+ assert_se(chown(p, UID_NOBODY, GID_NOBODY) >= 0);
+
+ q = strjoina(temp, "/user/root");
+ assert_se(mkdir(q, 0755) >= 0);
+
+ p = strjoina(q, "/link");
+ assert_se(symlink("/", p) >= 0);
+
+ /* Fail when user-owned directories contain root-owned subdirectories. */
+ r = chase_symlinks(p, temp, CHASE_SAFE, &result, NULL);
+ assert_se(r == -ENOLINK);
+ result = mfree(result);
+
+ /* Allow this when the user-owned directories are all in the "root". */
+ r = chase_symlinks(p, q, CHASE_SAFE, &result, NULL);
+ assert_se(r > 0);
+ result = mfree(result);
+ }
+
+ /* Paths using . */
+
+ r = chase_symlinks("/etc/./.././", NULL, 0, &result, NULL);
+ assert_se(r > 0);
+ assert_se(path_equal(result, "/"));
+ result = mfree(result);
+
+ r = chase_symlinks("/etc/./.././", "/etc", 0, &result, NULL);
+ assert_se(r > 0 && path_equal(result, "/etc"));
+ result = mfree(result);
+
+ r = chase_symlinks("/../.././//../../etc", NULL, 0, &result, NULL);
+ assert_se(r > 0);
+ assert_se(streq(result, "/etc"));
+ result = mfree(result);
+
+ r = chase_symlinks("/../.././//../../test-chase.fsldajfl", NULL, CHASE_NONEXISTENT, &result, NULL);
+ assert_se(r == 0);
+ assert_se(streq(result, "/test-chase.fsldajfl"));
+ result = mfree(result);
+
+ r = chase_symlinks("/../.././//../../etc", "/", CHASE_PREFIX_ROOT, &result, NULL);
+ assert_se(r > 0);
+ assert_se(streq(result, "/etc"));
+ result = mfree(result);
+
+ r = chase_symlinks("/../.././//../../test-chase.fsldajfl", "/", CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &result, NULL);
+ assert_se(r == 0);
+ assert_se(streq(result, "/test-chase.fsldajfl"));
+ result = mfree(result);
+
+ r = chase_symlinks("/etc/machine-id/foo", NULL, 0, &result, NULL);
+ assert_se(r == -ENOTDIR);
+ result = mfree(result);
+
+ /* Path that loops back to self */
+
+ p = strjoina(temp, "/recursive-symlink");
+ assert_se(symlink("recursive-symlink", p) >= 0);
+ r = chase_symlinks(p, NULL, 0, &result, NULL);
+ assert_se(r == -ELOOP);
+
+ /* Path which doesn't exist */
+
+ p = strjoina(temp, "/idontexist");
+ r = chase_symlinks(p, NULL, 0, &result, NULL);
+ assert_se(r == -ENOENT);
+
+ r = chase_symlinks(p, NULL, CHASE_NONEXISTENT, &result, NULL);
+ assert_se(r == 0);
+ assert_se(path_equal(result, p));
+ result = mfree(result);
+
+ p = strjoina(temp, "/idontexist/meneither");
+ r = chase_symlinks(p, NULL, 0, &result, NULL);
+ assert_se(r == -ENOENT);
+
+ r = chase_symlinks(p, NULL, CHASE_NONEXISTENT, &result, NULL);
+ assert_se(r == 0);
+ assert_se(path_equal(result, p));
+ result = mfree(result);
+
+ /* Path which doesn't exist, but contains weird stuff */
+
+ p = strjoina(temp, "/idontexist/..");
+ r = chase_symlinks(p, NULL, 0, &result, NULL);
+ assert_se(r == -ENOENT);
+
+ r = chase_symlinks(p, NULL, CHASE_NONEXISTENT, &result, NULL);
+ assert_se(r == -ENOENT);
+
+ p = strjoina(temp, "/target");
+ q = strjoina(temp, "/top");
+ assert_se(symlink(q, p) >= 0);
+ p = strjoina(temp, "/target/idontexist");
+ r = chase_symlinks(p, NULL, 0, &result, NULL);
+ assert_se(r == -ENOENT);
+
+ if (geteuid() == 0) {
+ p = strjoina(temp, "/priv1");
+ assert_se(mkdir(p, 0755) >= 0);
+
+ q = strjoina(p, "/priv2");
+ assert_se(mkdir(q, 0755) >= 0);
+
+ assert_se(chase_symlinks(q, NULL, CHASE_SAFE, NULL, NULL) >= 0);
+
+ assert_se(chown(q, UID_NOBODY, GID_NOBODY) >= 0);
+ assert_se(chase_symlinks(q, NULL, CHASE_SAFE, NULL, NULL) >= 0);
+
+ assert_se(chown(p, UID_NOBODY, GID_NOBODY) >= 0);
+ assert_se(chase_symlinks(q, NULL, CHASE_SAFE, NULL, NULL) >= 0);
+
+ assert_se(chown(q, 0, 0) >= 0);
+ assert_se(chase_symlinks(q, NULL, CHASE_SAFE, NULL, NULL) == -ENOLINK);
+
+ assert_se(rmdir(q) >= 0);
+ assert_se(symlink("/etc/passwd", q) >= 0);
+ assert_se(chase_symlinks(q, NULL, CHASE_SAFE, NULL, NULL) == -ENOLINK);
+
+ assert_se(chown(p, 0, 0) >= 0);
+ assert_se(chase_symlinks(q, NULL, CHASE_SAFE, NULL, NULL) >= 0);
+ }
+
+ p = strjoina(temp, "/machine-id-test");
+ assert_se(symlink("/usr/../etc/./machine-id", p) >= 0);
+
+ r = chase_symlinks(p, NULL, 0, NULL, &pfd);
+ if (r != -ENOENT) {
+ _cleanup_close_ int fd = -1;
+ sd_id128_t a, b;
+
+ assert_se(pfd >= 0);
+
+ fd = fd_reopen(pfd, O_RDONLY|O_CLOEXEC);
+ assert_se(fd >= 0);
+ safe_close(pfd);
+
+ assert_se(id128_read_fd(fd, ID128_PLAIN, &a) >= 0);
+ assert_se(sd_id128_get_machine(&b) >= 0);
+ assert_se(sd_id128_equal(a, b));
+ }
+
+ /* Test CHASE_NOFOLLOW */
+
+ p = strjoina(temp, "/target");
+ q = strjoina(temp, "/symlink");
+ assert_se(symlink(p, q) >= 0);
+ r = chase_symlinks(q, NULL, CHASE_NOFOLLOW, &result, &pfd);
+ assert_se(r >= 0);
+ assert_se(pfd >= 0);
+ assert_se(path_equal(result, q));
+ assert_se(fstat(pfd, &st) >= 0);
+ assert_se(S_ISLNK(st.st_mode));
+ result = mfree(result);
+
+ /* s1 -> s2 -> nonexistent */
+ q = strjoina(temp, "/s1");
+ assert_se(symlink("s2", q) >= 0);
+ p = strjoina(temp, "/s2");
+ assert_se(symlink("nonexistent", p) >= 0);
+ r = chase_symlinks(q, NULL, CHASE_NOFOLLOW, &result, &pfd);
+ assert_se(r >= 0);
+ assert_se(pfd >= 0);
+ assert_se(path_equal(result, q));
+ assert_se(fstat(pfd, &st) >= 0);
+ assert_se(S_ISLNK(st.st_mode));
+ result = mfree(result);
+
+ /* Test CHASE_ONE */
+
+ p = strjoina(temp, "/start");
+ r = chase_symlinks(p, NULL, CHASE_STEP, &result, NULL);
+ assert_se(r == 0);
+ p = strjoina(temp, "/top/dot/dotdota");
+ assert_se(streq(p, result));
+ result = mfree(result);
+
+ r = chase_symlinks(p, NULL, CHASE_STEP, &result, NULL);
+ assert_se(r == 0);
+ p = strjoina(temp, "/top/./dotdota");
+ assert_se(streq(p, result));
+ result = mfree(result);
+
+ r = chase_symlinks(p, NULL, CHASE_STEP, &result, NULL);
+ assert_se(r == 0);
+ p = strjoina(temp, "/top/../a");
+ assert_se(streq(p, result));
+ result = mfree(result);
+
+ r = chase_symlinks(p, NULL, CHASE_STEP, &result, NULL);
+ assert_se(r == 0);
+ p = strjoina(temp, "/a");
+ assert_se(streq(p, result));
+ result = mfree(result);
+
+ r = chase_symlinks(p, NULL, CHASE_STEP, &result, NULL);
+ assert_se(r == 0);
+ p = strjoina(temp, "/b");
+ assert_se(streq(p, result));
+ result = mfree(result);
+
+ r = chase_symlinks(p, NULL, CHASE_STEP, &result, NULL);
+ assert_se(r == 0);
+ assert_se(streq("/usr", result));
+ result = mfree(result);
+
+ r = chase_symlinks("/usr", NULL, CHASE_STEP, &result, NULL);
+ assert_se(r > 0);
+ assert_se(streq("/usr", result));
+ result = mfree(result);
+
+ /* Make sure that symlinks in the "root" path are not resolved, but those below are */
+ p = strjoina("/etc/..", temp, "/self");
+ assert_se(symlink(".", p) >= 0);
+ q = strjoina(p, "/top/dot/dotdota");
+ r = chase_symlinks(q, p, 0, &result, NULL);
+ assert_se(r > 0);
+ assert_se(path_equal(path_startswith(result, p), "usr"));
+ result = mfree(result);
+
+ cleanup:
+ assert_se(rm_rf(temp, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+}
+
+static void test_unlink_noerrno(void) {
+ char *name;
+ int fd;
+
+ log_info("/* %s */", __func__);
+
+ name = strjoina(arg_test_dir ?: "/tmp", "/test-close_nointr.XXXXXX");
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(close_nointr(fd) >= 0);
+
+ {
+ PROTECT_ERRNO;
+ errno = 42;
+ assert_se(unlink_noerrno(name) >= 0);
+ assert_se(errno == 42);
+ assert_se(unlink_noerrno(name) < 0);
+ assert_se(errno == 42);
+ }
+}
+
+static void test_readlink_and_make_absolute(void) {
+ const char *tempdir, *name, *name2, *name_alias;
+ _cleanup_free_ char *r1 = NULL, *r2 = NULL, *pwd = NULL;
+
+ log_info("/* %s */", __func__);
+
+ tempdir = strjoina(arg_test_dir ?: "/tmp", "/test-readlink_and_make_absolute");
+ name = strjoina(tempdir, "/original");
+ name2 = "test-readlink_and_make_absolute/original";
+ name_alias = strjoina(arg_test_dir ?: "/tmp", "/test-readlink_and_make_absolute-alias");
+
+ assert_se(mkdir_safe(tempdir, 0755, getuid(), getgid(), MKDIR_WARN_MODE) >= 0);
+ assert_se(touch(name) >= 0);
+
+ if (symlink(name, name_alias) < 0) {
+ assert_se(IN_SET(errno, EINVAL, ENOSYS, ENOTTY, EPERM));
+ log_tests_skipped_errno(errno, "symlink() not possible");
+ } else {
+ assert_se(readlink_and_make_absolute(name_alias, &r1) >= 0);
+ assert_se(streq(r1, name));
+ assert_se(unlink(name_alias) >= 0);
+
+ assert_se(safe_getcwd(&pwd) >= 0);
+
+ assert_se(chdir(tempdir) >= 0);
+ assert_se(symlink(name2, name_alias) >= 0);
+ assert_se(readlink_and_make_absolute(name_alias, &r2) >= 0);
+ assert_se(streq(r2, name));
+ assert_se(unlink(name_alias) >= 0);
+
+ assert_se(chdir(pwd) >= 0);
+ }
+
+ assert_se(rm_rf(tempdir, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+}
+
+static void test_get_files_in_directory(void) {
+ _cleanup_strv_free_ char **l = NULL, **t = NULL;
+
+ assert_se(get_files_in_directory(arg_test_dir ?: "/tmp", &l) >= 0);
+ assert_se(get_files_in_directory(".", &t) >= 0);
+ assert_se(get_files_in_directory(".", NULL) >= 0);
+}
+
+static void test_var_tmp(void) {
+ _cleanup_free_ char *tmpdir_backup = NULL, *temp_backup = NULL, *tmp_backup = NULL;
+ const char *tmp_dir = NULL, *t;
+
+ log_info("/* %s */", __func__);
+
+ t = getenv("TMPDIR");
+ if (t) {
+ tmpdir_backup = strdup(t);
+ assert_se(tmpdir_backup);
+ }
+
+ t = getenv("TEMP");
+ if (t) {
+ temp_backup = strdup(t);
+ assert_se(temp_backup);
+ }
+
+ t = getenv("TMP");
+ if (t) {
+ tmp_backup = strdup(t);
+ assert_se(tmp_backup);
+ }
+
+ assert_se(unsetenv("TMPDIR") >= 0);
+ assert_se(unsetenv("TEMP") >= 0);
+ assert_se(unsetenv("TMP") >= 0);
+
+ assert_se(var_tmp_dir(&tmp_dir) >= 0);
+ assert_se(streq(tmp_dir, "/var/tmp"));
+
+ assert_se(setenv("TMPDIR", "/tmp", true) >= 0);
+ assert_se(streq(getenv("TMPDIR"), "/tmp"));
+
+ assert_se(var_tmp_dir(&tmp_dir) >= 0);
+ assert_se(streq(tmp_dir, "/tmp"));
+
+ assert_se(setenv("TMPDIR", "/88_does_not_exist_88", true) >= 0);
+ assert_se(streq(getenv("TMPDIR"), "/88_does_not_exist_88"));
+
+ assert_se(var_tmp_dir(&tmp_dir) >= 0);
+ assert_se(streq(tmp_dir, "/var/tmp"));
+
+ if (tmpdir_backup) {
+ assert_se(setenv("TMPDIR", tmpdir_backup, true) >= 0);
+ assert_se(streq(getenv("TMPDIR"), tmpdir_backup));
+ }
+
+ if (temp_backup) {
+ assert_se(setenv("TEMP", temp_backup, true) >= 0);
+ assert_se(streq(getenv("TEMP"), temp_backup));
+ }
+
+ if (tmp_backup) {
+ assert_se(setenv("TMP", tmp_backup, true) >= 0);
+ assert_se(streq(getenv("TMP"), tmp_backup));
+ }
+}
+
+static void test_dot_or_dot_dot(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(!dot_or_dot_dot(NULL));
+ assert_se(!dot_or_dot_dot(""));
+ assert_se(!dot_or_dot_dot("xxx"));
+ assert_se(dot_or_dot_dot("."));
+ assert_se(dot_or_dot_dot(".."));
+ assert_se(!dot_or_dot_dot(".foo"));
+ assert_se(!dot_or_dot_dot("..foo"));
+}
+
+static void test_access_fd(void) {
+ _cleanup_(rmdir_and_freep) char *p = NULL;
+ _cleanup_close_ int fd = -1;
+ const char *a;
+
+ log_info("/* %s */", __func__);
+
+ a = strjoina(arg_test_dir ?: "/tmp", "/access-fd.XXXXXX");
+ assert_se(mkdtemp_malloc(a, &p) >= 0);
+
+ fd = open(p, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+ assert_se(fd >= 0);
+
+ assert_se(access_fd(fd, R_OK) >= 0);
+ assert_se(access_fd(fd, F_OK) >= 0);
+ assert_se(access_fd(fd, W_OK) >= 0);
+
+ assert_se(fchmod(fd, 0000) >= 0);
+
+ assert_se(access_fd(fd, F_OK) >= 0);
+
+ if (geteuid() == 0) {
+ assert_se(access_fd(fd, R_OK) >= 0);
+ assert_se(access_fd(fd, W_OK) >= 0);
+ } else {
+ assert_se(access_fd(fd, R_OK) == -EACCES);
+ assert_se(access_fd(fd, W_OK) == -EACCES);
+ }
+}
+
+static void test_touch_file(void) {
+ uid_t test_uid, test_gid;
+ _cleanup_(rm_rf_physical_and_freep) char *p = NULL;
+ struct stat st;
+ const char *a;
+ usec_t test_mtime;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ test_uid = geteuid() == 0 ? 65534 : getuid();
+ test_gid = geteuid() == 0 ? 65534 : getgid();
+
+ test_mtime = usec_sub_unsigned(now(CLOCK_REALTIME), USEC_PER_WEEK);
+
+ a = strjoina(arg_test_dir ?: "/dev/shm", "/touch-file-XXXXXX");
+ assert_se(mkdtemp_malloc(a, &p) >= 0);
+
+ a = strjoina(p, "/regular");
+ r = touch_file(a, false, test_mtime, test_uid, test_gid, 0640);
+ if (r < 0) {
+ assert_se(IN_SET(r, -EINVAL, -ENOSYS, -ENOTTY, -EPERM));
+ log_tests_skipped_errno(errno, "touch_file() not possible");
+ return;
+ }
+
+ assert_se(lstat(a, &st) >= 0);
+ assert_se(st.st_uid == test_uid);
+ assert_se(st.st_gid == test_gid);
+ assert_se(S_ISREG(st.st_mode));
+ assert_se((st.st_mode & 0777) == 0640);
+ assert_se(timespec_load(&st.st_mtim) == test_mtime);
+
+ a = strjoina(p, "/dir");
+ assert_se(mkdir(a, 0775) >= 0);
+ assert_se(touch_file(a, false, test_mtime, test_uid, test_gid, 0640) >= 0);
+ assert_se(lstat(a, &st) >= 0);
+ assert_se(st.st_uid == test_uid);
+ assert_se(st.st_gid == test_gid);
+ assert_se(S_ISDIR(st.st_mode));
+ assert_se((st.st_mode & 0777) == 0640);
+ assert_se(timespec_load(&st.st_mtim) == test_mtime);
+
+ a = strjoina(p, "/fifo");
+ assert_se(mkfifo(a, 0775) >= 0);
+ assert_se(touch_file(a, false, test_mtime, test_uid, test_gid, 0640) >= 0);
+ assert_se(lstat(a, &st) >= 0);
+ assert_se(st.st_uid == test_uid);
+ assert_se(st.st_gid == test_gid);
+ assert_se(S_ISFIFO(st.st_mode));
+ assert_se((st.st_mode & 0777) == 0640);
+ assert_se(timespec_load(&st.st_mtim) == test_mtime);
+
+ a = strjoina(p, "/sock");
+ assert_se(mknod(a, 0775 | S_IFSOCK, 0) >= 0);
+ assert_se(touch_file(a, false, test_mtime, test_uid, test_gid, 0640) >= 0);
+ assert_se(lstat(a, &st) >= 0);
+ assert_se(st.st_uid == test_uid);
+ assert_se(st.st_gid == test_gid);
+ assert_se(S_ISSOCK(st.st_mode));
+ assert_se((st.st_mode & 0777) == 0640);
+ assert_se(timespec_load(&st.st_mtim) == test_mtime);
+
+ if (geteuid() == 0) {
+ a = strjoina(p, "/bdev");
+ r = mknod(a, 0775 | S_IFBLK, makedev(0, 0));
+ if (r < 0 && errno == EPERM && detect_container() > 0) {
+ log_notice("Running in unprivileged container? Skipping remaining tests in %s", __func__);
+ return;
+ }
+ assert_se(r >= 0);
+ assert_se(touch_file(a, false, test_mtime, test_uid, test_gid, 0640) >= 0);
+ assert_se(lstat(a, &st) >= 0);
+ assert_se(st.st_uid == test_uid);
+ assert_se(st.st_gid == test_gid);
+ assert_se(S_ISBLK(st.st_mode));
+ assert_se((st.st_mode & 0777) == 0640);
+ assert_se(timespec_load(&st.st_mtim) == test_mtime);
+
+ a = strjoina(p, "/cdev");
+ assert_se(mknod(a, 0775 | S_IFCHR, makedev(0, 0)) >= 0);
+ assert_se(touch_file(a, false, test_mtime, test_uid, test_gid, 0640) >= 0);
+ assert_se(lstat(a, &st) >= 0);
+ assert_se(st.st_uid == test_uid);
+ assert_se(st.st_gid == test_gid);
+ assert_se(S_ISCHR(st.st_mode));
+ assert_se((st.st_mode & 0777) == 0640);
+ assert_se(timespec_load(&st.st_mtim) == test_mtime);
+ }
+
+ a = strjoina(p, "/lnk");
+ assert_se(symlink("target", a) >= 0);
+ assert_se(touch_file(a, false, test_mtime, test_uid, test_gid, 0640) >= 0);
+ assert_se(lstat(a, &st) >= 0);
+ assert_se(st.st_uid == test_uid);
+ assert_se(st.st_gid == test_gid);
+ assert_se(S_ISLNK(st.st_mode));
+ assert_se(timespec_load(&st.st_mtim) == test_mtime);
+}
+
+static void test_unlinkat_deallocate(void) {
+ _cleanup_free_ char *p = NULL;
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(tempfn_random_child(arg_test_dir, "unlink-deallocation", &p) >= 0);
+
+ fd = open(p, O_WRONLY|O_CLOEXEC|O_CREAT|O_EXCL, 0600);
+ assert_se(fd >= 0);
+
+ assert_se(write(fd, "hallo\n", 6) == 6);
+
+ assert_se(fstat(fd, &st) >= 0);
+ assert_se(st.st_size == 6);
+ assert_se(st.st_blocks > 0);
+ assert_se(st.st_nlink == 1);
+
+ assert_se(unlinkat_deallocate(AT_FDCWD, p, UNLINK_ERASE) >= 0);
+
+ assert_se(fstat(fd, &st) >= 0);
+ assert_se(IN_SET(st.st_size, 0, 6)); /* depending on whether hole punching worked the size will be 6
+ (it worked) or 0 (we had to resort to truncation) */
+ assert_se(st.st_blocks == 0);
+ assert_se(st.st_nlink == 0);
+}
+
+static void test_fsync_directory_of_file(void) {
+ _cleanup_close_ int fd = -1;
+
+ log_info("/* %s */", __func__);
+
+ fd = open_tmpfile_unlinkable(arg_test_dir, O_RDWR);
+ assert_se(fd >= 0);
+
+ assert_se(fsync_directory_of_file(fd) >= 0);
+}
+
+static void test_rename_noreplace(void) {
+ static const char* const table[] = {
+ "/reg",
+ "/dir",
+ "/fifo",
+ "/socket",
+ "/symlink",
+ NULL
+ };
+
+ _cleanup_(rm_rf_physical_and_freep) char *z = NULL;
+ const char *j = NULL;
+ char **a, **b;
+
+ log_info("/* %s */", __func__);
+
+ if (arg_test_dir)
+ j = strjoina(arg_test_dir, "/testXXXXXX");
+ assert_se(mkdtemp_malloc(j, &z) >= 0);
+
+ j = strjoina(z, table[0]);
+ assert_se(touch(j) >= 0);
+
+ j = strjoina(z, table[1]);
+ assert_se(mkdir(j, 0777) >= 0);
+
+ j = strjoina(z, table[2]);
+ (void) mkfifo(j, 0777);
+
+ j = strjoina(z, table[3]);
+ (void) mknod(j, S_IFSOCK | 0777, 0);
+
+ j = strjoina(z, table[4]);
+ (void) symlink("foobar", j);
+
+ STRV_FOREACH(a, (char**) table) {
+ _cleanup_free_ char *x = NULL, *y = NULL;
+
+ x = strjoin(z, *a);
+ assert_se(x);
+
+ if (access(x, F_OK) < 0) {
+ assert_se(errno == ENOENT);
+ continue;
+ }
+
+ STRV_FOREACH(b, (char**) table) {
+ _cleanup_free_ char *w = NULL;
+
+ w = strjoin(z, *b);
+ assert_se(w);
+
+ if (access(w, F_OK) < 0) {
+ assert_se(errno == ENOENT);
+ continue;
+ }
+
+ assert_se(rename_noreplace(AT_FDCWD, x, AT_FDCWD, w) == -EEXIST);
+ }
+
+ y = strjoin(z, "/somethingelse");
+ assert_se(y);
+
+ assert_se(rename_noreplace(AT_FDCWD, x, AT_FDCWD, y) >= 0);
+ assert_se(rename_noreplace(AT_FDCWD, y, AT_FDCWD, x) >= 0);
+ }
+}
+
+static void test_chmod_and_chown(void) {
+ _cleanup_(rm_rf_physical_and_freep) char *d = NULL;
+ _unused_ _cleanup_umask_ mode_t u = umask(0000);
+ struct stat st;
+ const char *p;
+
+ if (geteuid() != 0)
+ return;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(mkdtemp_malloc(NULL, &d) >= 0);
+
+ p = strjoina(d, "/reg");
+ assert_se(mknod(p, S_IFREG | 0123, 0) >= 0);
+
+ assert_se(chmod_and_chown(p, S_IFREG | 0321, 1, 2) >= 0);
+ assert_se(chmod_and_chown(p, S_IFDIR | 0555, 3, 4) == -EINVAL);
+
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISREG(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0321);
+
+ p = strjoina(d, "/dir");
+ assert_se(mkdir(p, 0123) >= 0);
+
+ assert_se(chmod_and_chown(p, S_IFDIR | 0321, 1, 2) >= 0);
+ assert_se(chmod_and_chown(p, S_IFREG | 0555, 3, 4) == -EINVAL);
+
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISDIR(st.st_mode));
+ assert_se((st.st_mode & 07777) == 0321);
+
+ p = strjoina(d, "/lnk");
+ assert_se(symlink("idontexist", p) >= 0);
+
+ assert_se(chmod_and_chown(p, S_IFLNK | 0321, 1, 2) >= 0);
+ assert_se(chmod_and_chown(p, S_IFREG | 0555, 3, 4) == -EINVAL);
+ assert_se(chmod_and_chown(p, S_IFDIR | 0555, 3, 4) == -EINVAL);
+
+ assert_se(lstat(p, &st) >= 0);
+ assert_se(S_ISLNK(st.st_mode));
+}
+
+static void test_path_is_encrypted_one(const char *p, int expect) {
+ int r;
+
+ r = path_is_encrypted(p);
+ if (r == -ENOENT || ERRNO_IS_PRIVILEGE(r)) /* This might fail, if btrfs is used and we run in a
+ * container. In that case we cannot resolve the device node paths that
+ * BTRFS_IOC_DEV_INFO returns, because the device nodes are unlikely to exist in
+ * the container. But if we can't stat() them we cannot determine the dev_t of
+ * them, and thus cannot figure out if they are enrypted. Hence let's just ignore
+ * ENOENT here. Also skip the test if we lack privileges. */
+ return;
+ assert_se(r >= 0);
+
+ log_info("%s encrypted: %s", p, yes_no(r));
+
+ assert_se(expect < 0 || ((r > 0) == (expect > 0)));
+}
+
+static void test_path_is_encrypted(void) {
+ int booted = sd_booted(); /* If this is run in build environments such as koji, /dev might be a
+ * reguar fs. Don't assume too much if not running under systemd. */
+
+ log_info("/* %s (sd_booted=%d)*/", __func__, booted);
+
+ test_path_is_encrypted_one("/home", -1);
+ test_path_is_encrypted_one("/var", -1);
+ test_path_is_encrypted_one("/", -1);
+ test_path_is_encrypted_one("/proc", false);
+ test_path_is_encrypted_one("/sys", false);
+ test_path_is_encrypted_one("/dev", booted > 0 ? false : -1);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_INFO);
+
+ arg_test_dir = argv[1];
+
+ test_chase_symlinks();
+ test_unlink_noerrno();
+ test_readlink_and_make_absolute();
+ test_get_files_in_directory();
+ test_var_tmp();
+ test_dot_or_dot_dot();
+ test_access_fd();
+ test_touch_file();
+ test_unlinkat_deallocate();
+ test_fsync_directory_of_file();
+ test_rename_noreplace();
+ test_chmod_and_chown();
+ test_path_is_encrypted();
+
+ return 0;
+}
diff --git a/src/test/test-fstab-util.c b/src/test/test-fstab-util.c
new file mode 100644
index 0000000..222ffbb
--- /dev/null
+++ b/src/test/test-fstab-util.c
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "fstab-util.h"
+#include "log.h"
+#include "string-util.h"
+
+/*
+int fstab_filter_options(const char *opts, const char *names,
+ const char **namefound, char **value, char **filtered);
+*/
+
+static void do_fstab_filter_options(const char *opts,
+ const char *remove,
+ int r_expected,
+ const char *name_expected,
+ const char *value_expected,
+ const char *filtered_expected) {
+ int r;
+ const char *name;
+ _cleanup_free_ char *value = NULL, *filtered = NULL;
+
+ r = fstab_filter_options(opts, remove, &name, &value, &filtered);
+ log_info("\"%s\" → %d, \"%s\", \"%s\", \"%s\", expected %d, \"%s\", \"%s\", \"%s\"",
+ opts, r, name, value, filtered,
+ r_expected, name_expected, value_expected, filtered_expected ?: opts);
+ assert_se(r == r_expected);
+ assert_se(streq_ptr(name, name_expected));
+ assert_se(streq_ptr(value, value_expected));
+ assert_se(streq_ptr(filtered, filtered_expected ?: opts));
+
+ /* also test the malloc-less mode */
+ r = fstab_filter_options(opts, remove, &name, NULL, NULL);
+ log_info("\"%s\" → %d, \"%s\", expected %d, \"%s\"\n-",
+ opts, r, name,
+ r_expected, name_expected);
+ assert_se(r == r_expected);
+ assert_se(streq_ptr(name, name_expected));
+}
+
+static void test_fstab_filter_options(void) {
+ do_fstab_filter_options("opt=0", "opt\0x-opt\0", 1, "opt", "0", "");
+ do_fstab_filter_options("opt=0", "x-opt\0opt\0", 1, "opt", "0", "");
+ do_fstab_filter_options("opt", "opt\0x-opt\0", 1, "opt", NULL, "");
+ do_fstab_filter_options("opt", "x-opt\0opt\0", 1, "opt", NULL, "");
+ do_fstab_filter_options("x-opt", "x-opt\0opt\0", 1, "x-opt", NULL, "");
+
+ do_fstab_filter_options("opt=0,other", "opt\0x-opt\0", 1, "opt", "0", "other");
+ do_fstab_filter_options("opt=0,other", "x-opt\0opt\0", 1, "opt", "0", "other");
+ do_fstab_filter_options("opt,other", "opt\0x-opt\0", 1, "opt", NULL, "other");
+ do_fstab_filter_options("opt,other", "x-opt\0opt\0", 1, "opt", NULL, "other");
+ do_fstab_filter_options("x-opt,other", "opt\0x-opt\0", 1, "x-opt", NULL, "other");
+
+ do_fstab_filter_options("opt=0\\,1,other", "opt\0x-opt\0", 1, "opt", "0,1", "other");
+ do_fstab_filter_options("opt=0,other,x-opt\\,foobar", "x-opt\0opt\0", 1, "opt", "0", "other,x-opt\\,foobar");
+ do_fstab_filter_options("opt,other,x-opt\\,part", "opt\0x-opt\0", 1, "opt", NULL, "other,x-opt\\,part");
+ do_fstab_filter_options("opt,other,part\\,x-opt", "x-opt\0opt\0", 1, "opt", NULL, "other,part\\,x-opt");
+ do_fstab_filter_options("opt,other\\,\\,\\,opt,x-part", "opt\0x-opt\0", 1, "opt", NULL, "other\\,\\,\\,opt,x-part");
+
+ do_fstab_filter_options("opto=0,other", "opt\0x-opt\0", 0, NULL, NULL, NULL);
+ do_fstab_filter_options("opto,other", "opt\0x-opt\0", 0, NULL, NULL, NULL);
+ do_fstab_filter_options("x-opto,other", "opt\0x-opt\0", 0, NULL, NULL, NULL);
+
+ do_fstab_filter_options("first,opt=0", "opt\0x-opt\0", 1, "opt", "0", "first");
+ do_fstab_filter_options("first=1,opt=0", "opt\0x-opt\0", 1, "opt", "0", "first=1");
+ do_fstab_filter_options("first,opt=", "opt\0x-opt\0", 1, "opt", "", "first");
+ do_fstab_filter_options("first=1,opt", "opt\0x-opt\0", 1, "opt", NULL, "first=1");
+ do_fstab_filter_options("first=1,x-opt", "opt\0x-opt\0", 1, "x-opt", NULL, "first=1");
+
+ do_fstab_filter_options("first,opt=0,last=1", "opt\0x-opt\0", 1, "opt", "0", "first,last=1");
+ do_fstab_filter_options("first=1,opt=0,last=2", "x-opt\0opt\0", 1, "opt", "0", "first=1,last=2");
+ do_fstab_filter_options("first,opt,last", "opt\0", 1, "opt", NULL, "first,last");
+ do_fstab_filter_options("first=1,opt,last", "x-opt\0opt\0", 1, "opt", NULL, "first=1,last");
+ do_fstab_filter_options("first=,opt,last", "opt\0noopt\0", 1, "opt", NULL, "first=,last");
+
+ /* check repeated options */
+ do_fstab_filter_options("first,opt=0,noopt=1,last=1", "opt\0noopt\0", 1, "noopt", "1", "first,last=1");
+ do_fstab_filter_options("first=1,opt=0,last=2,opt=1", "opt\0", 1, "opt", "1", "first=1,last=2");
+ do_fstab_filter_options("x-opt=0,x-opt=1", "opt\0x-opt\0", 1, "x-opt", "1", "");
+ do_fstab_filter_options("opt=0,x-opt=1", "opt\0x-opt\0", 1, "x-opt", "1", "");
+
+ /* check that semicolons are not misinterpreted */
+ do_fstab_filter_options("opt=0;", "opt\0", 1, "opt", "0;", "");
+ do_fstab_filter_options("opt;=0", "x-opt\0opt\0noopt\0x-noopt\0", 0, NULL, NULL, NULL);
+ do_fstab_filter_options("opt;", "opt\0x-opt\0", 0, NULL, NULL, NULL);
+
+ /* check that spaces are not misinterpreted */
+ do_fstab_filter_options("opt=0 ", "opt\0", 1, "opt", "0 ", "");
+ do_fstab_filter_options("opt =0", "x-opt\0opt\0noopt\0x-noopt\0", 0, NULL, NULL, NULL);
+ do_fstab_filter_options(" opt ", "opt\0x-opt\0", 0, NULL, NULL, NULL);
+
+ /* check function will NULL args */
+ do_fstab_filter_options(NULL, "opt\0", 0, NULL, NULL, "");
+ do_fstab_filter_options("", "opt\0", 0, NULL, NULL, "");
+}
+
+static void test_fstab_find_pri(void) {
+ int pri = -1;
+
+ assert_se(fstab_find_pri("pri", &pri) == 0);
+ assert_se(pri == -1);
+
+ assert_se(fstab_find_pri("pri=11", &pri) == 1);
+ assert_se(pri == 11);
+
+ assert_se(fstab_find_pri("pri=-2", &pri) == 1);
+ assert_se(pri == -2);
+
+ assert_se(fstab_find_pri("opt,pri=12,opt", &pri) == 1);
+ assert_se(pri == 12);
+
+ assert_se(fstab_find_pri("opt,opt,pri=12,pri=13", &pri) == 1);
+ assert_se(pri == 13);
+}
+
+static void test_fstab_yes_no_option(void) {
+ assert_se(fstab_test_yes_no_option("nofail,fail,nofail", "nofail\0fail\0") == true);
+ assert_se(fstab_test_yes_no_option("nofail,nofail,fail", "nofail\0fail\0") == false);
+ assert_se(fstab_test_yes_no_option("abc,cde,afail", "nofail\0fail\0") == false);
+ assert_se(fstab_test_yes_no_option("nofail,fail=0,nofail=0", "nofail\0fail\0") == true);
+ assert_se(fstab_test_yes_no_option("nofail,nofail=0,fail=0", "nofail\0fail\0") == false);
+}
+
+static void test_fstab_node_to_udev_node(void) {
+ char *n;
+
+ n = fstab_node_to_udev_node("LABEL=applé/jack");
+ puts(n);
+ assert_se(streq(n, "/dev/disk/by-label/applé\\x2fjack"));
+ free(n);
+
+ n = fstab_node_to_udev_node("PARTLABEL=pinkié pie");
+ puts(n);
+ assert_se(streq(n, "/dev/disk/by-partlabel/pinkié\\x20pie"));
+ free(n);
+
+ n = fstab_node_to_udev_node("UUID=037b9d94-148e-4ee4-8d38-67bfe15bb535");
+ puts(n);
+ assert_se(streq(n, "/dev/disk/by-uuid/037b9d94-148e-4ee4-8d38-67bfe15bb535"));
+ free(n);
+
+ n = fstab_node_to_udev_node("PARTUUID=037b9d94-148e-4ee4-8d38-67bfe15bb535");
+ puts(n);
+ assert_se(streq(n, "/dev/disk/by-partuuid/037b9d94-148e-4ee4-8d38-67bfe15bb535"));
+ free(n);
+
+ n = fstab_node_to_udev_node("PONIES=awesome");
+ puts(n);
+ assert_se(streq(n, "PONIES=awesome"));
+ free(n);
+
+ n = fstab_node_to_udev_node("/dev/xda1");
+ puts(n);
+ assert_se(streq(n, "/dev/xda1"));
+ free(n);
+}
+
+int main(void) {
+ test_fstab_filter_options();
+ test_fstab_find_pri();
+ test_fstab_yes_no_option();
+ test_fstab_node_to_udev_node();
+
+ return 0;
+}
diff --git a/src/test/test-gcrypt-util.c b/src/test/test-gcrypt-util.c
new file mode 100644
index 0000000..2af040b
--- /dev/null
+++ b/src/test/test-gcrypt-util.c
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "gcrypt-util.h"
+#include "macro.h"
+#include "string-util.h"
+
+static void test_string_hashsum(void) {
+ _cleanup_free_ char *out1 = NULL, *out2 = NULL, *out3 = NULL, *out4 = NULL;
+
+ assert_se(string_hashsum("asdf", 4, GCRY_MD_SHA224, &out1) == 0);
+ /* echo -n 'asdf' | sha224sum - */
+ assert_se(streq(out1, "7872a74bcbf298a1e77d507cd95d4f8d96131cbbd4cdfc571e776c8a"));
+
+ assert_se(string_hashsum("asdf", 4, GCRY_MD_SHA256, &out2) == 0);
+ /* echo -n 'asdf' | sha256sum - */
+ assert_se(streq(out2, "f0e4c2f76c58916ec258f246851bea091d14d4247a2fc3e18694461b1816e13b"));
+
+ assert_se(string_hashsum("", 0, GCRY_MD_SHA224, &out3) == 0);
+ /* echo -n '' | sha224sum - */
+ assert_se(streq(out3, "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f"));
+
+ assert_se(string_hashsum("", 0, GCRY_MD_SHA256, &out4) == 0);
+ /* echo -n '' | sha256sum - */
+ assert_se(streq(out4, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"));
+}
+
+int main(int argc, char **argv) {
+ test_string_hashsum();
+
+ return 0;
+}
diff --git a/src/test/test-glob-util.c b/src/test/test-glob-util.c
new file mode 100644
index 0000000..df6444c
--- /dev/null
+++ b/src/test/test-glob-util.c
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fs-util.h"
+#include "glob-util.h"
+#include "macro.h"
+#include "rm-rf.h"
+#include "tmpfile-util.h"
+
+static void test_glob_exists(void) {
+ char name[] = "/tmp/test-glob_exists.XXXXXX";
+ int fd = -1;
+ int r;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ close(fd);
+
+ r = glob_exists("/tmp/test-glob_exists*");
+ assert_se(r == 1);
+
+ r = unlink(name);
+ assert_se(r == 0);
+ r = glob_exists("/tmp/test-glob_exists*");
+ assert_se(r == 0);
+}
+
+static void closedir_wrapper(void* v) {
+ (void) closedir(v);
+}
+
+static void test_glob_no_dot(void) {
+ char template[] = "/tmp/test-glob-util.XXXXXXX";
+ const char *fn;
+
+ _cleanup_globfree_ glob_t g = {
+ .gl_closedir = closedir_wrapper,
+ .gl_readdir = (struct dirent *(*)(void *)) readdir_no_dot,
+ .gl_opendir = (void *(*)(const char *)) opendir,
+ .gl_lstat = lstat,
+ .gl_stat = stat,
+ };
+
+ int r;
+
+ assert_se(mkdtemp(template));
+
+ fn = strjoina(template, "/*");
+ r = glob(fn, GLOB_NOSORT|GLOB_BRACE|GLOB_ALTDIRFUNC, NULL, &g);
+ assert_se(r == GLOB_NOMATCH);
+
+ fn = strjoina(template, "/.*");
+ r = glob(fn, GLOB_NOSORT|GLOB_BRACE|GLOB_ALTDIRFUNC, NULL, &g);
+ assert_se(r == GLOB_NOMATCH);
+
+ (void) rm_rf(template, REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+static void test_safe_glob(void) {
+ char template[] = "/tmp/test-glob-util.XXXXXXX";
+ const char *fn, *fn2, *fname;
+
+ _cleanup_globfree_ glob_t g = {};
+ int r;
+
+ assert_se(mkdtemp(template));
+
+ fn = strjoina(template, "/*");
+ r = safe_glob(fn, 0, &g);
+ assert_se(r == -ENOENT);
+
+ fn2 = strjoina(template, "/.*");
+ r = safe_glob(fn2, GLOB_NOSORT|GLOB_BRACE, &g);
+ assert_se(r == -ENOENT);
+
+ fname = strjoina(template, "/.foobar");
+ assert_se(touch(fname) == 0);
+
+ r = safe_glob(fn, 0, &g);
+ assert_se(r == -ENOENT);
+
+ r = safe_glob(fn2, GLOB_NOSORT|GLOB_BRACE, &g);
+ assert_se(r == 0);
+ assert_se(g.gl_pathc == 1);
+ assert_se(streq(g.gl_pathv[0], fname));
+ assert_se(g.gl_pathv[1] == NULL);
+
+ (void) rm_rf(template, REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+int main(void) {
+ test_glob_exists();
+ test_glob_no_dot();
+ test_safe_glob();
+
+ return 0;
+}
diff --git a/src/test/test-hash.c b/src/test/test-hash.c
new file mode 100644
index 0000000..270fcd0
--- /dev/null
+++ b/src/test/test-hash.c
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "log.h"
+#include "string-util.h"
+#include "khash.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_(khash_unrefp) khash *h = NULL, *copy = NULL;
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(khash_new(&h, NULL) == -EINVAL);
+ assert_se(khash_new(&h, "") == -EINVAL);
+
+ r = khash_supported();
+ assert_se(r >= 0);
+ if (r == 0)
+ return log_tests_skipped("khash not supported on this kernel");
+
+ assert_se(khash_new(&h, "foobar") == -EOPNOTSUPP); /* undefined hash function */
+
+ assert_se(khash_new(&h, "sha256") >= 0);
+ assert_se(khash_get_size(h) == 32);
+ assert_se(streq(khash_get_algorithm(h), "sha256"));
+
+ assert_se(khash_digest_string(h, &s) >= 0);
+ assert_se(streq(s, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"));
+ s = mfree(s);
+
+ assert_se(khash_put(h, "foobar", 6) >= 0);
+ assert_se(khash_digest_string(h, &s) >= 0);
+ assert_se(streq(s, "c3ab8ff13720e8ad9047dd39466b3c8974e592c2fa383d4a3960714caef0c4f2"));
+ s = mfree(s);
+
+ assert_se(khash_put(h, "piep", 4) >= 0);
+ assert_se(khash_digest_string(h, &s) >= 0);
+ assert_se(streq(s, "f114d872b5ea075d3be9040d0b7a429514b3f9324a8e8e3dc3fb24c34ee56bea"));
+ s = mfree(s);
+
+ assert_se(khash_put(h, "foo", 3) >= 0);
+ assert_se(khash_dup(h, &copy) >= 0);
+
+ assert_se(khash_put(h, "bar", 3) >= 0);
+ assert_se(khash_put(copy, "bar", 3) >= 0);
+
+ assert_se(khash_digest_string(h, &s) >= 0);
+ assert_se(streq(s, "c3ab8ff13720e8ad9047dd39466b3c8974e592c2fa383d4a3960714caef0c4f2"));
+ s = mfree(s);
+
+ assert_se(khash_digest_string(copy, &s) >= 0);
+ assert_se(streq(s, "c3ab8ff13720e8ad9047dd39466b3c8974e592c2fa383d4a3960714caef0c4f2"));
+ s = mfree(s);
+
+ h = khash_unref(h);
+
+ assert_se(khash_new_with_key(&h, "hmac(sha256)", "quux", 4) >= 0);
+ assert_se(khash_get_size(h) == 32);
+ assert_se(streq(khash_get_algorithm(h), "hmac(sha256)"));
+
+ assert_se(khash_digest_string(h, &s) >= 0);
+ assert_se(streq(s, "abed9f8218ab473f77218a6a7d39abf1d21fa46d0700c4898e330ba88309d5ae"));
+ s = mfree(s);
+
+ assert_se(khash_put(h, "foobar", 6) >= 0);
+ assert_se(khash_digest_string(h, &s) >= 0);
+ assert_se(streq(s, "33f6c70a60db66007d5325d5d1dea37c371354e5b83347a59ad339ce9f4ba3dc"));
+
+ return 0;
+}
diff --git a/src/test/test-hashmap-ordered.awk b/src/test/test-hashmap-ordered.awk
new file mode 100644
index 0000000..10f4386
--- /dev/null
+++ b/src/test/test-hashmap-ordered.awk
@@ -0,0 +1,11 @@
+BEGIN {
+ print "/* GENERATED FILE */";
+ print "#define ORDERED"
+}
+{
+ if (!match($0, "^#include"))
+ gsub(/hashmap/, "ordered_hashmap");
+ gsub(/HASHMAP/, "ORDERED_HASHMAP");
+ gsub(/Hashmap/, "OrderedHashmap");
+ print
+}
diff --git a/src/test/test-hashmap-plain.c b/src/test/test-hashmap-plain.c
new file mode 100644
index 0000000..9ed6bee
--- /dev/null
+++ b/src/test/test-hashmap-plain.c
@@ -0,0 +1,1098 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "hashmap.h"
+#include "log.h"
+#include "nulstr-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "tests.h"
+
+void test_hashmap_funcs(void);
+
+static void test_hashmap_replace(void) {
+ Hashmap *m;
+ char *val1, *val2, *val3, *val4, *val5, *r;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+
+ val1 = strdup("val1");
+ assert_se(val1);
+ val2 = strdup("val2");
+ assert_se(val2);
+ val3 = strdup("val3");
+ assert_se(val3);
+ val4 = strdup("val4");
+ assert_se(val4);
+ val5 = strdup("val5");
+ assert_se(val5);
+
+ hashmap_put(m, "key 1", val1);
+ hashmap_put(m, "key 2", val2);
+ hashmap_put(m, "key 3", val3);
+ hashmap_put(m, "key 4", val4);
+
+ hashmap_replace(m, "key 3", val1);
+ r = hashmap_get(m, "key 3");
+ assert_se(streq(r, "val1"));
+
+ hashmap_replace(m, "key 5", val5);
+ r = hashmap_get(m, "key 5");
+ assert_se(streq(r, "val5"));
+
+ free(val1);
+ free(val2);
+ free(val3);
+ free(val4);
+ free(val5);
+ hashmap_free(m);
+}
+
+static void test_hashmap_copy(void) {
+ Hashmap *m, *copy;
+ char *val1, *val2, *val3, *val4, *r;
+
+ log_info("/* %s */", __func__);
+
+ val1 = strdup("val1");
+ assert_se(val1);
+ val2 = strdup("val2");
+ assert_se(val2);
+ val3 = strdup("val3");
+ assert_se(val3);
+ val4 = strdup("val4");
+ assert_se(val4);
+
+ m = hashmap_new(&string_hash_ops);
+
+ hashmap_put(m, "key 1", val1);
+ hashmap_put(m, "key 2", val2);
+ hashmap_put(m, "key 3", val3);
+ hashmap_put(m, "key 4", val4);
+
+ copy = hashmap_copy(m);
+
+ r = hashmap_get(copy, "key 1");
+ assert_se(streq(r, "val1"));
+ r = hashmap_get(copy, "key 2");
+ assert_se(streq(r, "val2"));
+ r = hashmap_get(copy, "key 3");
+ assert_se(streq(r, "val3"));
+ r = hashmap_get(copy, "key 4");
+ assert_se(streq(r, "val4"));
+
+ hashmap_free_free(copy);
+ hashmap_free(m);
+}
+
+static void test_hashmap_get_strv(void) {
+ Hashmap *m;
+ char **strv;
+ char *val1, *val2, *val3, *val4;
+
+ log_info("/* %s */", __func__);
+
+ val1 = strdup("val1");
+ assert_se(val1);
+ val2 = strdup("val2");
+ assert_se(val2);
+ val3 = strdup("val3");
+ assert_se(val3);
+ val4 = strdup("val4");
+ assert_se(val4);
+
+ m = hashmap_new(&string_hash_ops);
+
+ hashmap_put(m, "key 1", val1);
+ hashmap_put(m, "key 2", val2);
+ hashmap_put(m, "key 3", val3);
+ hashmap_put(m, "key 4", val4);
+
+ strv = hashmap_get_strv(m);
+
+#ifndef ORDERED
+ strv = strv_sort(strv);
+#endif
+
+ assert_se(streq(strv[0], "val1"));
+ assert_se(streq(strv[1], "val2"));
+ assert_se(streq(strv[2], "val3"));
+ assert_se(streq(strv[3], "val4"));
+
+ strv_free(strv);
+
+ hashmap_free(m);
+}
+
+static void test_hashmap_move_one(void) {
+ Hashmap *m, *n;
+ char *val1, *val2, *val3, *val4, *r;
+
+ log_info("/* %s */", __func__);
+
+ val1 = strdup("val1");
+ assert_se(val1);
+ val2 = strdup("val2");
+ assert_se(val2);
+ val3 = strdup("val3");
+ assert_se(val3);
+ val4 = strdup("val4");
+ assert_se(val4);
+
+ m = hashmap_new(&string_hash_ops);
+ n = hashmap_new(&string_hash_ops);
+
+ hashmap_put(m, "key 1", val1);
+ hashmap_put(m, "key 2", val2);
+ hashmap_put(m, "key 3", val3);
+ hashmap_put(m, "key 4", val4);
+
+ assert_se(hashmap_move_one(n, NULL, "key 3") == -ENOENT);
+ assert_se(hashmap_move_one(n, m, "key 5") == -ENOENT);
+ assert_se(hashmap_move_one(n, m, "key 3") == 0);
+ assert_se(hashmap_move_one(n, m, "key 4") == 0);
+
+ r = hashmap_get(n, "key 3");
+ assert_se(r && streq(r, "val3"));
+ r = hashmap_get(n, "key 4");
+ assert_se(r && streq(r, "val4"));
+ r = hashmap_get(m, "key 3");
+ assert_se(!r);
+
+ assert_se(hashmap_move_one(n, m, "key 3") == -EEXIST);
+
+ hashmap_free_free(m);
+ hashmap_free_free(n);
+}
+
+static void test_hashmap_move(void) {
+ Hashmap *m, *n;
+ char *val1, *val2, *val3, *val4, *r;
+
+ log_info("/* %s */", __func__);
+
+ val1 = strdup("val1");
+ assert_se(val1);
+ val2 = strdup("val2");
+ assert_se(val2);
+ val3 = strdup("val3");
+ assert_se(val3);
+ val4 = strdup("val4");
+ assert_se(val4);
+
+ m = hashmap_new(&string_hash_ops);
+ n = hashmap_new(&string_hash_ops);
+
+ hashmap_put(n, "key 1", strdup(val1));
+ hashmap_put(m, "key 1", val1);
+ hashmap_put(m, "key 2", val2);
+ hashmap_put(m, "key 3", val3);
+ hashmap_put(m, "key 4", val4);
+
+ assert_se(hashmap_move(n, NULL) == 0);
+ assert_se(hashmap_move(n, m) == 0);
+
+ assert_se(hashmap_size(m) == 1);
+ r = hashmap_get(m, "key 1");
+ assert_se(r && streq(r, "val1"));
+
+ r = hashmap_get(n, "key 1");
+ assert_se(r && streq(r, "val1"));
+ r = hashmap_get(n, "key 2");
+ assert_se(r && streq(r, "val2"));
+ r = hashmap_get(n, "key 3");
+ assert_se(r && streq(r, "val3"));
+ r = hashmap_get(n, "key 4");
+ assert_se(r && streq(r, "val4"));
+
+ hashmap_free_free(m);
+ hashmap_free_free(n);
+}
+
+static void test_hashmap_update(void) {
+ Hashmap *m;
+ char *val1, *val2, *r;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+ val1 = strdup("old_value");
+ assert_se(val1);
+ val2 = strdup("new_value");
+ assert_se(val2);
+
+ hashmap_put(m, "key 1", val1);
+ r = hashmap_get(m, "key 1");
+ assert_se(streq(r, "old_value"));
+
+ assert_se(hashmap_update(m, "key 2", val2) == -ENOENT);
+ r = hashmap_get(m, "key 1");
+ assert_se(streq(r, "old_value"));
+
+ assert_se(hashmap_update(m, "key 1", val2) == 0);
+ r = hashmap_get(m, "key 1");
+ assert_se(streq(r, "new_value"));
+
+ free(val1);
+ free(val2);
+ hashmap_free(m);
+}
+
+static void test_hashmap_put(void) {
+ Hashmap *m = NULL;
+ int valid_hashmap_put;
+ void *val1 = (void*) "val 1";
+ void *val2 = (void*) "val 2";
+ _cleanup_free_ char* key1 = NULL;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(hashmap_ensure_allocated(&m, &string_hash_ops) == 1);
+ assert_se(m);
+
+ valid_hashmap_put = hashmap_put(m, "key 1", val1);
+ assert_se(valid_hashmap_put == 1);
+ assert_se(hashmap_put(m, "key 1", val1) == 0);
+ assert_se(hashmap_put(m, "key 1", val2) == -EEXIST);
+ key1 = strdup("key 1");
+ assert_se(hashmap_put(m, key1, val1) == 0);
+ assert_se(hashmap_put(m, key1, val2) == -EEXIST);
+
+ hashmap_free(m);
+}
+
+static void test_hashmap_remove(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+ char *r;
+
+ log_info("/* %s */", __func__);
+
+ r = hashmap_remove(NULL, "key 1");
+ assert_se(r == NULL);
+
+ m = hashmap_new(&string_hash_ops);
+ assert_se(m);
+
+ r = hashmap_remove(m, "no such key");
+ assert_se(r == NULL);
+
+ hashmap_put(m, "key 1", (void*) "val 1");
+ hashmap_put(m, "key 2", (void*) "val 2");
+
+ r = hashmap_remove(m, "key 1");
+ assert_se(streq(r, "val 1"));
+
+ r = hashmap_get(m, "key 2");
+ assert_se(streq(r, "val 2"));
+ assert_se(!hashmap_get(m, "key 1"));
+}
+
+static void test_hashmap_remove2(void) {
+ _cleanup_hashmap_free_free_free_ Hashmap *m = NULL;
+ char key1[] = "key 1";
+ char key2[] = "key 2";
+ char val1[] = "val 1";
+ char val2[] = "val 2";
+ void *r, *r2;
+
+ log_info("/* %s */", __func__);
+
+ r = hashmap_remove2(NULL, "key 1", &r2);
+ assert_se(r == NULL);
+
+ m = hashmap_new(&string_hash_ops);
+ assert_se(m);
+
+ r = hashmap_remove2(m, "no such key", &r2);
+ assert_se(r == NULL);
+
+ hashmap_put(m, strdup(key1), strdup(val1));
+ hashmap_put(m, strdup(key2), strdup(val2));
+
+ r = hashmap_remove2(m, key1, &r2);
+ assert_se(streq(r, val1));
+ assert_se(streq(r2, key1));
+ free(r);
+ free(r2);
+
+ r = hashmap_get(m, key2);
+ assert_se(streq(r, val2));
+ assert_se(!hashmap_get(m, key1));
+}
+
+static void test_hashmap_remove_value(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+ char *r;
+
+ char val1[] = "val 1";
+ char val2[] = "val 2";
+
+ log_info("/* %s */", __func__);
+
+ r = hashmap_remove_value(NULL, "key 1", val1);
+ assert_se(r == NULL);
+
+ m = hashmap_new(&string_hash_ops);
+ assert_se(m);
+
+ r = hashmap_remove_value(m, "key 1", val1);
+ assert_se(r == NULL);
+
+ hashmap_put(m, "key 1", val1);
+ hashmap_put(m, "key 2", val2);
+
+ r = hashmap_remove_value(m, "key 1", val1);
+ assert_se(streq(r, "val 1"));
+
+ r = hashmap_get(m, "key 2");
+ assert_se(streq(r, "val 2"));
+ assert_se(!hashmap_get(m, "key 1"));
+
+ r = hashmap_remove_value(m, "key 2", val1);
+ assert_se(r == NULL);
+
+ r = hashmap_get(m, "key 2");
+ assert_se(streq(r, "val 2"));
+ assert_se(!hashmap_get(m, "key 1"));
+}
+
+static void test_hashmap_remove_and_put(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+ int valid;
+ char *r;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+ assert_se(m);
+
+ valid = hashmap_remove_and_put(m, "invalid key", "new key", NULL);
+ assert_se(valid == -ENOENT);
+
+ valid = hashmap_put(m, "key 1", (void*) (const char *) "val 1");
+ assert_se(valid == 1);
+
+ valid = hashmap_remove_and_put(NULL, "key 1", "key 2", (void*) (const char *) "val 2");
+ assert_se(valid == -ENOENT);
+
+ valid = hashmap_remove_and_put(m, "key 1", "key 2", (void*) (const char *) "val 2");
+ assert_se(valid == 0);
+
+ r = hashmap_get(m, "key 2");
+ assert_se(streq(r, "val 2"));
+ assert_se(!hashmap_get(m, "key 1"));
+
+ valid = hashmap_put(m, "key 3", (void*) (const char *) "val 3");
+ assert_se(valid == 1);
+ valid = hashmap_remove_and_put(m, "key 3", "key 2", (void*) (const char *) "val 2");
+ assert_se(valid == -EEXIST);
+}
+
+static void test_hashmap_remove_and_replace(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+ int valid;
+ void *key1 = UINT_TO_PTR(1);
+ void *key2 = UINT_TO_PTR(2);
+ void *key3 = UINT_TO_PTR(3);
+ void *r;
+ int i, j;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&trivial_hash_ops);
+ assert_se(m);
+
+ valid = hashmap_remove_and_replace(m, key1, key2, NULL);
+ assert_se(valid == -ENOENT);
+
+ valid = hashmap_put(m, key1, key1);
+ assert_se(valid == 1);
+
+ valid = hashmap_remove_and_replace(NULL, key1, key2, key2);
+ assert_se(valid == -ENOENT);
+
+ valid = hashmap_remove_and_replace(m, key1, key2, key2);
+ assert_se(valid == 0);
+
+ r = hashmap_get(m, key2);
+ assert_se(r == key2);
+ assert_se(!hashmap_get(m, key1));
+
+ valid = hashmap_put(m, key3, key3);
+ assert_se(valid == 1);
+ valid = hashmap_remove_and_replace(m, key3, key2, key2);
+ assert_se(valid == 0);
+ r = hashmap_get(m, key2);
+ assert_se(r == key2);
+ assert_se(!hashmap_get(m, key3));
+
+ /* Repeat this test several times to increase the chance of hitting
+ * the less likely case in hashmap_remove_and_replace where it
+ * compensates for the backward shift. */
+ for (i = 0; i < 20; i++) {
+ hashmap_clear(m);
+
+ for (j = 1; j < 7; j++)
+ hashmap_put(m, UINT_TO_PTR(10*i + j), UINT_TO_PTR(10*i + j));
+ valid = hashmap_remove_and_replace(m, UINT_TO_PTR(10*i + 1),
+ UINT_TO_PTR(10*i + 2),
+ UINT_TO_PTR(10*i + 2));
+ assert_se(valid == 0);
+ assert_se(!hashmap_get(m, UINT_TO_PTR(10*i + 1)));
+ for (j = 2; j < 7; j++) {
+ r = hashmap_get(m, UINT_TO_PTR(10*i + j));
+ assert_se(r == UINT_TO_PTR(10*i + j));
+ }
+ }
+}
+
+static void test_hashmap_ensure_allocated(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ r = hashmap_ensure_allocated(&m, &string_hash_ops);
+ assert_se(r == 1);
+
+ r = hashmap_ensure_allocated(&m, &string_hash_ops);
+ assert_se(r == 0);
+
+ /* different hash ops shouldn't matter at this point */
+ r = hashmap_ensure_allocated(&m, &trivial_hash_ops);
+ assert_se(r == 0);
+}
+
+static void test_hashmap_foreach_key(void) {
+ Hashmap *m;
+ bool key_found[] = { false, false, false, false };
+ const char *s;
+ const char *key;
+ static const char key_table[] =
+ "key 1\0"
+ "key 2\0"
+ "key 3\0"
+ "key 4\0";
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+
+ NULSTR_FOREACH(key, key_table)
+ hashmap_put(m, key, (void*) (const char*) "my dummy val");
+
+ HASHMAP_FOREACH_KEY(s, key, m) {
+ assert(s);
+ if (!key_found[0] && streq(key, "key 1"))
+ key_found[0] = true;
+ else if (!key_found[1] && streq(key, "key 2"))
+ key_found[1] = true;
+ else if (!key_found[2] && streq(key, "key 3"))
+ key_found[2] = true;
+ else if (!key_found[3] && streq(key, "fail"))
+ key_found[3] = true;
+ }
+
+ assert_se(m);
+ assert_se(key_found[0] && key_found[1] && key_found[2] && !key_found[3]);
+
+ hashmap_free(m);
+}
+
+static void test_hashmap_foreach(void) {
+ Hashmap *m;
+ bool value_found[] = { false, false, false, false };
+ char *val1, *val2, *val3, *val4, *s;
+ unsigned count;
+
+ log_info("/* %s */", __func__);
+
+ val1 = strdup("my val1");
+ assert_se(val1);
+ val2 = strdup("my val2");
+ assert_se(val2);
+ val3 = strdup("my val3");
+ assert_se(val3);
+ val4 = strdup("my val4");
+ assert_se(val4);
+
+ m = NULL;
+
+ count = 0;
+ HASHMAP_FOREACH(s, m)
+ count++;
+ assert_se(count == 0);
+
+ m = hashmap_new(&string_hash_ops);
+
+ count = 0;
+ HASHMAP_FOREACH(s, m)
+ count++;
+ assert_se(count == 0);
+
+ hashmap_put(m, "Key 1", val1);
+ hashmap_put(m, "Key 2", val2);
+ hashmap_put(m, "Key 3", val3);
+ hashmap_put(m, "Key 4", val4);
+
+ HASHMAP_FOREACH(s, m) {
+ if (!value_found[0] && streq(s, val1))
+ value_found[0] = true;
+ else if (!value_found[1] && streq(s, val2))
+ value_found[1] = true;
+ else if (!value_found[2] && streq(s, val3))
+ value_found[2] = true;
+ else if (!value_found[3] && streq(s, val4))
+ value_found[3] = true;
+ }
+
+ assert_se(m);
+ assert_se(value_found[0] && value_found[1] && value_found[2] && value_found[3]);
+
+ hashmap_free_free(m);
+}
+
+static void test_hashmap_merge(void) {
+ Hashmap *m, *n;
+ char *val1, *val2, *val3, *val4, *r;
+
+ log_info("/* %s */", __func__);
+
+ val1 = strdup("my val1");
+ assert_se(val1);
+ val2 = strdup("my val2");
+ assert_se(val2);
+ val3 = strdup("my val3");
+ assert_se(val3);
+ val4 = strdup("my val4");
+ assert_se(val4);
+
+ m = hashmap_new(&string_hash_ops);
+ n = hashmap_new(&string_hash_ops);
+
+ hashmap_put(m, "Key 1", val1);
+ hashmap_put(m, "Key 2", val2);
+ hashmap_put(n, "Key 3", val3);
+ hashmap_put(n, "Key 4", val4);
+
+ assert_se(hashmap_merge(m, n) == 0);
+ r = hashmap_get(m, "Key 3");
+ assert_se(r && streq(r, "my val3"));
+ r = hashmap_get(m, "Key 4");
+ assert_se(r && streq(r, "my val4"));
+
+ assert_se(m);
+ assert_se(n);
+ hashmap_free(n);
+ hashmap_free_free(m);
+}
+
+static void test_hashmap_contains(void) {
+ Hashmap *m;
+ char *val1;
+
+ log_info("/* %s */", __func__);
+
+ val1 = strdup("my val");
+ assert_se(val1);
+
+ m = hashmap_new(&string_hash_ops);
+
+ assert_se(!hashmap_contains(m, "Key 1"));
+ hashmap_put(m, "Key 1", val1);
+ assert_se(hashmap_contains(m, "Key 1"));
+ assert_se(!hashmap_contains(m, "Key 2"));
+
+ assert_se(!hashmap_contains(NULL, "Key 1"));
+
+ assert_se(m);
+ hashmap_free_free(m);
+}
+
+static void test_hashmap_isempty(void) {
+ Hashmap *m;
+ char *val1;
+
+ log_info("/* %s */", __func__);
+
+ val1 = strdup("my val");
+ assert_se(val1);
+
+ m = hashmap_new(&string_hash_ops);
+
+ assert_se(hashmap_isempty(m));
+ hashmap_put(m, "Key 1", val1);
+ assert_se(!hashmap_isempty(m));
+
+ assert_se(m);
+ hashmap_free_free(m);
+}
+
+static void test_hashmap_size(void) {
+ Hashmap *m;
+ char *val1, *val2, *val3, *val4;
+
+ log_info("/* %s */", __func__);
+
+ val1 = strdup("my val");
+ assert_se(val1);
+ val2 = strdup("my val");
+ assert_se(val2);
+ val3 = strdup("my val");
+ assert_se(val3);
+ val4 = strdup("my val");
+ assert_se(val4);
+
+ assert_se(hashmap_size(NULL) == 0);
+ assert_se(hashmap_buckets(NULL) == 0);
+
+ m = hashmap_new(&string_hash_ops);
+
+ hashmap_put(m, "Key 1", val1);
+ hashmap_put(m, "Key 2", val2);
+ hashmap_put(m, "Key 3", val3);
+ hashmap_put(m, "Key 4", val4);
+
+ assert_se(m);
+ assert_se(hashmap_size(m) == 4);
+ assert_se(hashmap_buckets(m) >= 4);
+ hashmap_free_free(m);
+}
+
+static void test_hashmap_get(void) {
+ Hashmap *m;
+ char *r;
+ char *val;
+
+ log_info("/* %s */", __func__);
+
+ val = strdup("my val");
+ assert_se(val);
+
+ r = hashmap_get(NULL, "Key 1");
+ assert_se(r == NULL);
+
+ m = hashmap_new(&string_hash_ops);
+
+ hashmap_put(m, "Key 1", val);
+
+ r = hashmap_get(m, "Key 1");
+ assert_se(streq(r, val));
+
+ r = hashmap_get(m, "no such key");
+ assert_se(r == NULL);
+
+ assert_se(m);
+ hashmap_free_free(m);
+}
+
+static void test_hashmap_get2(void) {
+ Hashmap *m;
+ char *r;
+ char *val;
+ char key_orig[] = "Key 1";
+ void *key_copy;
+
+ log_info("/* %s */", __func__);
+
+ val = strdup("my val");
+ assert_se(val);
+
+ key_copy = strdup(key_orig);
+ assert_se(key_copy);
+
+ r = hashmap_get2(NULL, key_orig, &key_copy);
+ assert_se(r == NULL);
+
+ m = hashmap_new(&string_hash_ops);
+
+ hashmap_put(m, key_copy, val);
+ key_copy = NULL;
+
+ r = hashmap_get2(m, key_orig, &key_copy);
+ assert_se(streq(r, val));
+ assert_se(key_orig != key_copy);
+ assert_se(streq(key_orig, key_copy));
+
+ r = hashmap_get2(m, "no such key", NULL);
+ assert_se(r == NULL);
+
+ assert_se(m);
+ hashmap_free_free_free(m);
+}
+
+static void crippled_hashmap_func(const void *p, struct siphash *state) {
+ return trivial_hash_func(INT_TO_PTR(PTR_TO_INT(p) & 0xff), state);
+}
+
+static const struct hash_ops crippled_hashmap_ops = {
+ .hash = crippled_hashmap_func,
+ .compare = trivial_compare_func,
+};
+
+static void test_hashmap_many(void) {
+ Hashmap *h;
+ unsigned i, j;
+ void *v, *k;
+ bool slow = slow_tests_enabled();
+ const struct {
+ const char *title;
+ const struct hash_ops *ops;
+ unsigned n_entries;
+ } tests[] = {
+ { "trivial_hashmap_ops", NULL, slow ? 1 << 20 : 240 },
+ { "crippled_hashmap_ops", &crippled_hashmap_ops, slow ? 1 << 14 : 140 },
+ };
+
+ log_info("/* %s (%s) */", __func__, slow ? "slow" : "fast");
+
+ for (j = 0; j < ELEMENTSOF(tests); j++) {
+ usec_t ts = now(CLOCK_MONOTONIC), n;
+ char b[FORMAT_TIMESPAN_MAX];
+
+ assert_se(h = hashmap_new(tests[j].ops));
+
+ for (i = 1; i < tests[j].n_entries*3; i+=3) {
+ assert_se(hashmap_put(h, UINT_TO_PTR(i), UINT_TO_PTR(i)) >= 0);
+ assert_se(PTR_TO_UINT(hashmap_get(h, UINT_TO_PTR(i))) == i);
+ }
+
+ for (i = 1; i < tests[j].n_entries*3; i++)
+ assert_se(hashmap_contains(h, UINT_TO_PTR(i)) == (i % 3 == 1));
+
+ log_info("%s %u <= %u * 0.8 = %g",
+ tests[j].title, hashmap_size(h), hashmap_buckets(h), hashmap_buckets(h) * 0.8);
+
+ assert_se(hashmap_size(h) <= hashmap_buckets(h) * 0.8);
+ assert_se(hashmap_size(h) == tests[j].n_entries);
+
+ while (!hashmap_isempty(h)) {
+ k = hashmap_first_key(h);
+ v = hashmap_remove(h, k);
+ assert_se(v == k);
+ }
+
+ hashmap_free(h);
+
+ n = now(CLOCK_MONOTONIC);
+ log_info("test took %s", format_timespan(b, sizeof b, n - ts, 0));
+ }
+}
+
+extern unsigned custom_counter;
+extern const struct hash_ops boring_hash_ops, custom_hash_ops;
+
+static void test_hashmap_free(void) {
+ Hashmap *h;
+ bool slow = slow_tests_enabled();
+ usec_t ts, n;
+ char b[FORMAT_TIMESPAN_MAX];
+ unsigned n_entries = slow ? 1 << 20 : 240;
+
+ const struct {
+ const char *title;
+ const struct hash_ops *ops;
+ unsigned expect_counter;
+ } tests[] = {
+ { "string_hash_ops", &boring_hash_ops, 2 * n_entries},
+ { "custom_free_hash_ops", &custom_hash_ops, 0 },
+ };
+
+ log_info("/* %s (%s, %u entries) */", __func__, slow ? "slow" : "fast", n_entries);
+
+ for (unsigned j = 0; j < ELEMENTSOF(tests); j++) {
+ ts = now(CLOCK_MONOTONIC);
+ assert_se(h = hashmap_new(tests[j].ops));
+
+ custom_counter = 0;
+ for (unsigned i = 0; i < n_entries; i++) {
+ char s[DECIMAL_STR_MAX(unsigned)];
+ char *k, *v;
+
+ xsprintf(s, "%u", i);
+ assert_se(k = strdup(s));
+ assert_se(v = strdup(s));
+ custom_counter += 2;
+
+ assert_se(hashmap_put(h, k, v) >= 0);
+ }
+
+ hashmap_free(h);
+
+ n = now(CLOCK_MONOTONIC);
+ log_info("%s test took %s", tests[j].title, format_timespan(b, sizeof b, n - ts, 0));
+
+ assert_se(custom_counter == tests[j].expect_counter);
+ }
+}
+
+typedef struct Item {
+ int seen;
+} Item;
+static void item_seen(Item *item) {
+ item->seen++;
+}
+
+static void test_hashmap_free_with_destructor(void) {
+ Hashmap *m;
+ struct Item items[4] = {};
+ unsigned i;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(m = hashmap_new(NULL));
+ for (i = 0; i < ELEMENTSOF(items) - 1; i++)
+ assert_se(hashmap_put(m, INT_TO_PTR(i), items + i) == 1);
+
+ m = hashmap_free_with_destructor(m, item_seen);
+ assert_se(items[0].seen == 1);
+ assert_se(items[1].seen == 1);
+ assert_se(items[2].seen == 1);
+ assert_se(items[3].seen == 0);
+}
+
+static void test_hashmap_first(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+ assert_se(m);
+
+ assert_se(!hashmap_first(m));
+ assert_se(hashmap_put(m, "key 1", (void*) "val 1") == 1);
+ assert_se(streq(hashmap_first(m), "val 1"));
+ assert_se(hashmap_put(m, "key 2", (void*) "val 2") == 1);
+#ifdef ORDERED
+ assert_se(streq(hashmap_first(m), "val 1"));
+ assert_se(hashmap_remove(m, "key 1"));
+ assert_se(streq(hashmap_first(m), "val 2"));
+#endif
+}
+
+static void test_hashmap_first_key(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+ assert_se(m);
+
+ assert_se(!hashmap_first_key(m));
+ assert_se(hashmap_put(m, "key 1", NULL) == 1);
+ assert_se(streq(hashmap_first_key(m), "key 1"));
+ assert_se(hashmap_put(m, "key 2", NULL) == 1);
+#ifdef ORDERED
+ assert_se(streq(hashmap_first_key(m), "key 1"));
+ assert_se(hashmap_remove(m, "key 1") == NULL);
+ assert_se(streq(hashmap_first_key(m), "key 2"));
+#endif
+}
+
+static void test_hashmap_steal_first_key(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+ assert_se(m);
+
+ assert_se(!hashmap_steal_first_key(m));
+ assert_se(hashmap_put(m, "key 1", NULL) == 1);
+ assert_se(streq(hashmap_steal_first_key(m), "key 1"));
+
+ assert_se(hashmap_isempty(m));
+}
+
+static void test_hashmap_steal_first(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+ int seen[3] = {};
+ char *val;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+ assert_se(m);
+
+ assert_se(hashmap_put(m, "key 1", (void*) "1") == 1);
+ assert_se(hashmap_put(m, "key 2", (void*) "22") == 1);
+ assert_se(hashmap_put(m, "key 3", (void*) "333") == 1);
+
+ while ((val = hashmap_steal_first(m)))
+ seen[strlen(val) - 1]++;
+
+ assert_se(seen[0] == 1 && seen[1] == 1 && seen[2] == 1);
+
+ assert_se(hashmap_isempty(m));
+}
+
+static void test_hashmap_clear_free_free(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+ assert_se(m);
+
+ assert_se(hashmap_put(m, strdup("key 1"), NULL) == 1);
+ assert_se(hashmap_put(m, strdup("key 2"), NULL) == 1);
+ assert_se(hashmap_put(m, strdup("key 3"), NULL) == 1);
+
+ hashmap_clear_free_free(m);
+ assert_se(hashmap_isempty(m));
+
+ assert_se(hashmap_put(m, strdup("key 1"), strdup("value 1")) == 1);
+ assert_se(hashmap_put(m, strdup("key 2"), strdup("value 2")) == 1);
+ assert_se(hashmap_put(m, strdup("key 3"), strdup("value 3")) == 1);
+
+ hashmap_clear_free_free(m);
+ assert_se(hashmap_isempty(m));
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(test_hash_ops_key, char, string_hash_func, string_compare_func, free);
+DEFINE_PRIVATE_HASH_OPS_FULL(test_hash_ops_full, char, string_hash_func, string_compare_func, free, char, free);
+
+static void test_hashmap_clear_free_with_destructor(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&test_hash_ops_key);
+ assert_se(m);
+
+ assert_se(hashmap_put(m, strdup("key 1"), NULL) == 1);
+ assert_se(hashmap_put(m, strdup("key 2"), NULL) == 1);
+ assert_se(hashmap_put(m, strdup("key 3"), NULL) == 1);
+
+ hashmap_clear_free(m);
+ assert_se(hashmap_isempty(m));
+ m = hashmap_free(m);
+
+ m = hashmap_new(&test_hash_ops_full);
+ assert_se(m);
+
+ assert_se(hashmap_put(m, strdup("key 1"), strdup("value 1")) == 1);
+ assert_se(hashmap_put(m, strdup("key 2"), strdup("value 2")) == 1);
+ assert_se(hashmap_put(m, strdup("key 3"), strdup("value 3")) == 1);
+
+ hashmap_clear_free(m);
+ assert_se(hashmap_isempty(m));
+}
+
+static void test_hashmap_reserve(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+
+ log_info("/* %s */", __func__);
+
+ m = hashmap_new(&string_hash_ops);
+
+ assert_se(hashmap_reserve(m, 1) == 0);
+ assert_se(hashmap_buckets(m) < 1000);
+ assert_se(hashmap_reserve(m, 1000) == 0);
+ assert_se(hashmap_buckets(m) >= 1000);
+ assert_se(hashmap_isempty(m));
+
+ assert_se(hashmap_put(m, "key 1", (void*) "val 1") == 1);
+
+ assert_se(hashmap_reserve(m, UINT_MAX) == -ENOMEM);
+ assert_se(hashmap_reserve(m, UINT_MAX - 1) == -ENOMEM);
+}
+
+static void test_path_hashmap(void) {
+ _cleanup_hashmap_free_ Hashmap *h = NULL;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(h = hashmap_new(&path_hash_ops));
+
+ assert_se(hashmap_put(h, "foo", INT_TO_PTR(1)) >= 0);
+ assert_se(hashmap_put(h, "/foo", INT_TO_PTR(2)) >= 0);
+ assert_se(hashmap_put(h, "//foo", INT_TO_PTR(3)) == -EEXIST);
+ assert_se(hashmap_put(h, "//foox/", INT_TO_PTR(4)) >= 0);
+ assert_se(hashmap_put(h, "/foox////", INT_TO_PTR(5)) == -EEXIST);
+ assert_se(hashmap_put(h, "foo//////bar/quux//", INT_TO_PTR(6)) >= 0);
+ assert_se(hashmap_put(h, "foo/bar//quux/", INT_TO_PTR(8)) == -EEXIST);
+
+ assert_se(hashmap_get(h, "foo") == INT_TO_PTR(1));
+ assert_se(hashmap_get(h, "foo/") == INT_TO_PTR(1));
+ assert_se(hashmap_get(h, "foo////") == INT_TO_PTR(1));
+ assert_se(hashmap_get(h, "/foo") == INT_TO_PTR(2));
+ assert_se(hashmap_get(h, "//foo") == INT_TO_PTR(2));
+ assert_se(hashmap_get(h, "/////foo////") == INT_TO_PTR(2));
+ assert_se(hashmap_get(h, "/////foox////") == INT_TO_PTR(4));
+ assert_se(hashmap_get(h, "/foox/") == INT_TO_PTR(4));
+ assert_se(hashmap_get(h, "/foox") == INT_TO_PTR(4));
+ assert_se(!hashmap_get(h, "foox"));
+ assert_se(hashmap_get(h, "foo/bar/quux") == INT_TO_PTR(6));
+ assert_se(hashmap_get(h, "foo////bar////quux/////") == INT_TO_PTR(6));
+ assert_se(!hashmap_get(h, "/foo////bar////quux/////"));
+}
+
+static void test_string_strv_hashmap(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+ char **s;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(string_strv_hashmap_put(&m, "foo", "bar") == 1);
+ assert_se(string_strv_hashmap_put(&m, "foo", "bar") == 0);
+ assert_se(string_strv_hashmap_put(&m, "foo", "BAR") == 1);
+ assert_se(string_strv_hashmap_put(&m, "foo", "BAR") == 0);
+ assert_se(string_strv_hashmap_put(&m, "foo", "bar") == 0);
+ assert_se(hashmap_contains(m, "foo"));
+
+ s = hashmap_get(m, "foo");
+ assert_se(strv_equal(s, STRV_MAKE("bar", "BAR")));
+
+ assert_se(string_strv_hashmap_put(&m, "xxx", "bar") == 1);
+ assert_se(string_strv_hashmap_put(&m, "xxx", "bar") == 0);
+ assert_se(string_strv_hashmap_put(&m, "xxx", "BAR") == 1);
+ assert_se(string_strv_hashmap_put(&m, "xxx", "BAR") == 0);
+ assert_se(string_strv_hashmap_put(&m, "xxx", "bar") == 0);
+ assert_se(hashmap_contains(m, "xxx"));
+
+ s = hashmap_get(m, "xxx");
+ assert_se(strv_equal(s, STRV_MAKE("bar", "BAR")));
+}
+
+void test_hashmap_funcs(void) {
+ log_info("/************ %s ************/", __func__);
+
+ test_hashmap_copy();
+ test_hashmap_get_strv();
+ test_hashmap_move_one();
+ test_hashmap_move();
+ test_hashmap_replace();
+ test_hashmap_update();
+ test_hashmap_put();
+ test_hashmap_remove();
+ test_hashmap_remove2();
+ test_hashmap_remove_value();
+ test_hashmap_remove_and_put();
+ test_hashmap_remove_and_replace();
+ test_hashmap_ensure_allocated();
+ test_hashmap_foreach();
+ test_hashmap_foreach_key();
+ test_hashmap_contains();
+ test_hashmap_merge();
+ test_hashmap_isempty();
+ test_hashmap_get();
+ test_hashmap_get2();
+ test_hashmap_size();
+ test_hashmap_many();
+ test_hashmap_free();
+ test_hashmap_free_with_destructor();
+ test_hashmap_first();
+ test_hashmap_first_key();
+ test_hashmap_steal_first_key();
+ test_hashmap_steal_first();
+ test_hashmap_clear_free_free();
+ test_hashmap_clear_free_with_destructor();
+ test_hashmap_reserve();
+ test_path_hashmap();
+ test_string_strv_hashmap();
+}
diff --git a/src/test/test-hashmap.c b/src/test/test-hashmap.c
new file mode 100644
index 0000000..20bc97c
--- /dev/null
+++ b/src/test/test-hashmap.c
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "hashmap.h"
+#include "string-util.h"
+#include "util.h"
+
+unsigned custom_counter = 0;
+static void custom_destruct(void* p) {
+ custom_counter--;
+ free(p);
+}
+
+DEFINE_HASH_OPS_FULL(boring_hash_ops, char, string_hash_func, string_compare_func, free, char, free);
+DEFINE_HASH_OPS_FULL(custom_hash_ops, char, string_hash_func, string_compare_func, custom_destruct, char, custom_destruct);
+
+void test_hashmap_funcs(void);
+void test_ordered_hashmap_funcs(void);
+
+static void test_ordered_hashmap_next(void) {
+ _cleanup_ordered_hashmap_free_ OrderedHashmap *m = NULL;
+ int i;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(m = ordered_hashmap_new(NULL));
+ for (i = -2; i <= 2; i++)
+ assert_se(ordered_hashmap_put(m, INT_TO_PTR(i), INT_TO_PTR(i+10)) == 1);
+ for (i = -2; i <= 1; i++)
+ assert_se(ordered_hashmap_next(m, INT_TO_PTR(i)) == INT_TO_PTR(i+11));
+ assert_se(!ordered_hashmap_next(m, INT_TO_PTR(2)));
+ assert_se(!ordered_hashmap_next(NULL, INT_TO_PTR(1)));
+ assert_se(!ordered_hashmap_next(m, INT_TO_PTR(3)));
+}
+
+static void test_uint64_compare_func(void) {
+ const uint64_t a = 0x100, b = 0x101;
+
+ assert_se(uint64_compare_func(&a, &a) == 0);
+ assert_se(uint64_compare_func(&a, &b) == -1);
+ assert_se(uint64_compare_func(&b, &a) == 1);
+}
+
+static void test_trivial_compare_func(void) {
+ assert_se(trivial_compare_func(INT_TO_PTR('a'), INT_TO_PTR('a')) == 0);
+ assert_se(trivial_compare_func(INT_TO_PTR('a'), INT_TO_PTR('b')) == -1);
+ assert_se(trivial_compare_func(INT_TO_PTR('b'), INT_TO_PTR('a')) == 1);
+}
+
+static void test_string_compare_func(void) {
+ assert_se(string_compare_func("fred", "wilma") != 0);
+ assert_se(string_compare_func("fred", "fred") == 0);
+}
+
+static void compare_cache(Hashmap *map, IteratedCache *cache) {
+ const void **keys = NULL, **values = NULL;
+ unsigned num, idx;
+ void *k, *v;
+
+ assert_se(iterated_cache_get(cache, &keys, &values, &num) == 0);
+ assert_se(num == 0 || keys);
+ assert_se(num == 0 || values);
+
+ idx = 0;
+ HASHMAP_FOREACH_KEY(v, k, map) {
+ assert_se(v == values[idx]);
+ assert_se(k == keys[idx]);
+
+ idx++;
+ }
+
+ assert_se(idx == num);
+}
+
+static void test_iterated_cache(void) {
+ Hashmap *m;
+ IteratedCache *c;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(m = hashmap_new(NULL));
+ assert_se(c = hashmap_iterated_cache_new(m));
+ compare_cache(m, c);
+
+ for (int stage = 0; stage < 100; stage++) {
+
+ for (int i = 0; i < 100; i++) {
+ int foo = stage * 1000 + i;
+
+ assert_se(hashmap_put(m, INT_TO_PTR(foo), INT_TO_PTR(foo + 777)) == 1);
+ }
+
+ compare_cache(m, c);
+
+ if (!(stage % 10)) {
+ for (int i = 0; i < 100; i++) {
+ int foo = stage * 1000 + i;
+
+ assert_se(hashmap_remove(m, INT_TO_PTR(foo)) == INT_TO_PTR(foo + 777));
+ }
+
+ compare_cache(m, c);
+ }
+ }
+
+ hashmap_clear(m);
+ compare_cache(m, c);
+
+ assert_se(hashmap_free(m) == NULL);
+ assert_se(iterated_cache_free(c) == NULL);
+}
+
+static void test_hashmap_put_strdup(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+ char *s;
+
+ /* We don't have ordered_hashmap_put_strdup() yet. If it is added,
+ * these tests should be moved to test-hashmap-plain.c. */
+
+ log_info("/* %s */", __func__);
+
+ assert_se(hashmap_put_strdup(&m, "foo", "bar") == 1);
+ assert_se(hashmap_put_strdup(&m, "foo", "bar") == 0);
+ assert_se(hashmap_put_strdup(&m, "foo", "BAR") == -EEXIST);
+ assert_se(hashmap_put_strdup(&m, "foo", "bar") == 0);
+ assert_se(hashmap_contains(m, "foo"));
+
+ s = hashmap_get(m, "foo");
+ assert_se(streq(s, "bar"));
+
+ assert_se(hashmap_put_strdup(&m, "xxx", "bar") == 1);
+ assert_se(hashmap_put_strdup(&m, "xxx", "bar") == 0);
+ assert_se(hashmap_put_strdup(&m, "xxx", "BAR") == -EEXIST);
+ assert_se(hashmap_put_strdup(&m, "xxx", "bar") == 0);
+ assert_se(hashmap_contains(m, "xxx"));
+
+ s = hashmap_get(m, "xxx");
+ assert_se(streq(s, "bar"));
+}
+
+static void test_hashmap_put_strdup_null(void) {
+ _cleanup_hashmap_free_ Hashmap *m = NULL;
+ char *s;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(hashmap_put_strdup(&m, "foo", "bar") == 1);
+ assert_se(hashmap_put_strdup(&m, "foo", "bar") == 0);
+ assert_se(hashmap_put_strdup(&m, "foo", NULL) == -EEXIST);
+ assert_se(hashmap_put_strdup(&m, "foo", "bar") == 0);
+ assert_se(hashmap_contains(m, "foo"));
+
+ s = hashmap_get(m, "foo");
+ assert_se(streq(s, "bar"));
+
+ assert_se(hashmap_put_strdup(&m, "xxx", NULL) == 1);
+ assert_se(hashmap_put_strdup(&m, "xxx", "bar") == -EEXIST);
+ assert_se(hashmap_put_strdup(&m, "xxx", NULL) == 0);
+ assert_se(hashmap_contains(m, "xxx"));
+
+ s = hashmap_get(m, "xxx");
+ assert_se(s == NULL);
+}
+
+int main(int argc, const char *argv[]) {
+ /* This file tests in test-hashmap-plain.c, and tests in test-hashmap-ordered.c, which is generated
+ * from test-hashmap-plain.c. Hashmap tests should be added to test-hashmap-plain.c, and here only if
+ * they don't apply to ordered hashmaps. */
+
+ log_parse_environment();
+ log_open();
+
+ test_hashmap_funcs();
+ test_ordered_hashmap_funcs();
+
+ log_info("/************ non-shared tests ************/");
+
+ test_ordered_hashmap_next();
+ test_uint64_compare_func();
+ test_trivial_compare_func();
+ test_string_compare_func();
+ test_iterated_cache();
+ test_hashmap_put_strdup();
+ test_hashmap_put_strdup_null();
+
+ return 0;
+}
diff --git a/src/test/test-hexdecoct.c b/src/test/test-hexdecoct.c
new file mode 100644
index 0000000..f0f9679
--- /dev/null
+++ b/src/test/test-hexdecoct.c
@@ -0,0 +1,355 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "hexdecoct.h"
+#include "macro.h"
+#include "string-util.h"
+
+static void test_hexchar(void) {
+ assert_se(hexchar(0xa) == 'a');
+ assert_se(hexchar(0x0) == '0');
+}
+
+static void test_unhexchar(void) {
+ assert_se(unhexchar('a') == 0xA);
+ assert_se(unhexchar('A') == 0xA);
+ assert_se(unhexchar('0') == 0x0);
+}
+
+static void test_base32hexchar(void) {
+ assert_se(base32hexchar(0) == '0');
+ assert_se(base32hexchar(9) == '9');
+ assert_se(base32hexchar(10) == 'A');
+ assert_se(base32hexchar(31) == 'V');
+}
+
+static void test_unbase32hexchar(void) {
+ assert_se(unbase32hexchar('0') == 0);
+ assert_se(unbase32hexchar('9') == 9);
+ assert_se(unbase32hexchar('A') == 10);
+ assert_se(unbase32hexchar('V') == 31);
+ assert_se(unbase32hexchar('=') == -EINVAL);
+}
+
+static void test_base64char(void) {
+ assert_se(base64char(0) == 'A');
+ assert_se(base64char(26) == 'a');
+ assert_se(base64char(63) == '/');
+}
+
+static void test_unbase64char(void) {
+ assert_se(unbase64char('A') == 0);
+ assert_se(unbase64char('Z') == 25);
+ assert_se(unbase64char('a') == 26);
+ assert_se(unbase64char('z') == 51);
+ assert_se(unbase64char('0') == 52);
+ assert_se(unbase64char('9') == 61);
+ assert_se(unbase64char('+') == 62);
+ assert_se(unbase64char('/') == 63);
+ assert_se(unbase64char('=') == -EINVAL);
+}
+
+static void test_octchar(void) {
+ assert_se(octchar(00) == '0');
+ assert_se(octchar(07) == '7');
+}
+
+static void test_unoctchar(void) {
+ assert_se(unoctchar('0') == 00);
+ assert_se(unoctchar('7') == 07);
+}
+
+static void test_decchar(void) {
+ assert_se(decchar(0) == '0');
+ assert_se(decchar(9) == '9');
+}
+
+static void test_undecchar(void) {
+ assert_se(undecchar('0') == 0);
+ assert_se(undecchar('9') == 9);
+}
+
+static void test_unhexmem_one(const char *s, size_t l, int retval) {
+ _cleanup_free_ char *hex = NULL;
+ _cleanup_free_ void *mem = NULL;
+ size_t len;
+
+ assert_se(unhexmem(s, l, &mem, &len) == retval);
+ if (retval == 0) {
+ char *answer;
+
+ if (l == (size_t) -1)
+ l = strlen(s);
+
+ assert_se(hex = hexmem(mem, len));
+ answer = strndupa(strempty(s), l);
+ assert_se(streq(delete_chars(answer, WHITESPACE), hex));
+ }
+}
+
+static void test_unhexmem(void) {
+ const char *hex = "efa2149213";
+ const char *hex_space = " e f a\n 2\r 14\n\r\t9\t2 \n1\r3 \r\r\t";
+ const char *hex_invalid = "efa214921o";
+
+ test_unhexmem_one(NULL, 0, 0);
+ test_unhexmem_one("", 0, 0);
+ test_unhexmem_one("", (size_t) -1, 0);
+ test_unhexmem_one(" \n \t\r \t\t \n\n\n", (size_t) -1, 0);
+ test_unhexmem_one(hex_invalid, strlen(hex_invalid), -EINVAL);
+ test_unhexmem_one(hex_invalid, (size_t) - 1, -EINVAL);
+ test_unhexmem_one(hex, strlen(hex) - 1, -EPIPE);
+ test_unhexmem_one(hex, strlen(hex), 0);
+ test_unhexmem_one(hex, (size_t) -1, 0);
+ test_unhexmem_one(hex_space, strlen(hex_space), 0);
+ test_unhexmem_one(hex_space, (size_t) -1, 0);
+}
+
+/* https://tools.ietf.org/html/rfc4648#section-10 */
+static void test_base32hexmem(void) {
+ char *b32;
+
+ b32 = base32hexmem("", STRLEN(""), true);
+ assert_se(b32);
+ assert_se(streq(b32, ""));
+ free(b32);
+
+ b32 = base32hexmem("f", STRLEN("f"), true);
+ assert_se(b32);
+ assert_se(streq(b32, "CO======"));
+ free(b32);
+
+ b32 = base32hexmem("fo", STRLEN("fo"), true);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNG===="));
+ free(b32);
+
+ b32 = base32hexmem("foo", STRLEN("foo"), true);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNMU==="));
+ free(b32);
+
+ b32 = base32hexmem("foob", STRLEN("foob"), true);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNMUOG="));
+ free(b32);
+
+ b32 = base32hexmem("fooba", STRLEN("fooba"), true);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNMUOJ1"));
+ free(b32);
+
+ b32 = base32hexmem("foobar", STRLEN("foobar"), true);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNMUOJ1E8======"));
+ free(b32);
+
+ b32 = base32hexmem("", STRLEN(""), false);
+ assert_se(b32);
+ assert_se(streq(b32, ""));
+ free(b32);
+
+ b32 = base32hexmem("f", STRLEN("f"), false);
+ assert_se(b32);
+ assert_se(streq(b32, "CO"));
+ free(b32);
+
+ b32 = base32hexmem("fo", STRLEN("fo"), false);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNG"));
+ free(b32);
+
+ b32 = base32hexmem("foo", STRLEN("foo"), false);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNMU"));
+ free(b32);
+
+ b32 = base32hexmem("foob", STRLEN("foob"), false);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNMUOG"));
+ free(b32);
+
+ b32 = base32hexmem("fooba", STRLEN("fooba"), false);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNMUOJ1"));
+ free(b32);
+
+ b32 = base32hexmem("foobar", STRLEN("foobar"), false);
+ assert_se(b32);
+ assert_se(streq(b32, "CPNMUOJ1E8"));
+ free(b32);
+}
+
+static void test_unbase32hexmem_one(const char *hex, bool padding, int retval, const char *ans) {
+ _cleanup_free_ void *mem = NULL;
+ size_t len;
+
+ assert_se(unbase32hexmem(hex, (size_t) -1, padding, &mem, &len) == retval);
+ if (retval == 0) {
+ char *str;
+
+ str = strndupa(mem, len);
+ assert_se(streq(str, ans));
+ }
+}
+
+static void test_unbase32hexmem(void) {
+ test_unbase32hexmem_one("", true, 0, "");
+
+ test_unbase32hexmem_one("CO======", true, 0, "f");
+ test_unbase32hexmem_one("CPNG====", true, 0, "fo");
+ test_unbase32hexmem_one("CPNMU===", true, 0, "foo");
+ test_unbase32hexmem_one("CPNMUOG=", true, 0, "foob");
+ test_unbase32hexmem_one("CPNMUOJ1", true, 0, "fooba");
+ test_unbase32hexmem_one("CPNMUOJ1E8======", true, 0, "foobar");
+
+ test_unbase32hexmem_one("A", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("A=======", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAA=====", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAAAAA==", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("AB======", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAAB====", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAAAB===", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAAAAAB=", true, -EINVAL, NULL);
+
+ test_unbase32hexmem_one("XPNMUOJ1", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("CXNMUOJ1", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("CPXMUOJ1", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("CPNXUOJ1", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("CPNMXOJ1", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("CPNMUXJ1", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("CPNMUOX1", true, -EINVAL, NULL);
+ test_unbase32hexmem_one("CPNMUOJX", true, -EINVAL, NULL);
+
+ test_unbase32hexmem_one("", false, 0, "");
+ test_unbase32hexmem_one("CO", false, 0, "f");
+ test_unbase32hexmem_one("CPNG", false, 0, "fo");
+ test_unbase32hexmem_one("CPNMU", false, 0, "foo");
+ test_unbase32hexmem_one("CPNMUOG", false, 0, "foob");
+ test_unbase32hexmem_one("CPNMUOJ1", false, 0, "fooba");
+ test_unbase32hexmem_one("CPNMUOJ1E8", false, 0, "foobar");
+ test_unbase32hexmem_one("CPNMUOG=", false, -EINVAL, NULL);
+ test_unbase32hexmem_one("CPNMUOJ1E8======", false, -EINVAL, NULL);
+
+ test_unbase32hexmem_one("A", false, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAA", false, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAAAAA", false, -EINVAL, NULL);
+ test_unbase32hexmem_one("AB", false, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAAB", false, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAAAB", false, -EINVAL, NULL);
+ test_unbase32hexmem_one("AAAAAAB", false, -EINVAL, NULL);
+}
+
+/* https://tools.ietf.org/html/rfc4648#section-10 */
+static void test_base64mem(void) {
+ char *b64;
+
+ assert_se(base64mem("", STRLEN(""), &b64) == 0);
+ assert_se(streq(b64, ""));
+ free(b64);
+
+ assert_se(base64mem("f", STRLEN("f"), &b64) == 4);
+ assert_se(streq(b64, "Zg=="));
+ free(b64);
+
+ assert_se(base64mem("fo", STRLEN("fo"), &b64) == 4);
+ assert_se(streq(b64, "Zm8="));
+ free(b64);
+
+ assert_se(base64mem("foo", STRLEN("foo"), &b64) == 4);
+ assert_se(streq(b64, "Zm9v"));
+ free(b64);
+
+ assert_se(base64mem("foob", STRLEN("foob"), &b64) == 8);
+ assert_se(streq(b64, "Zm9vYg=="));
+ free(b64);
+
+ assert_se(base64mem("fooba", STRLEN("fooba"), &b64) == 8);
+ assert_se(streq(b64, "Zm9vYmE="));
+ free(b64);
+
+ assert_se(base64mem("foobar", STRLEN("foobar"), &b64) == 8);
+ assert_se(streq(b64, "Zm9vYmFy"));
+ free(b64);
+}
+
+static void test_unbase64mem_one(const char *input, const char *output, int ret) {
+ _cleanup_free_ void *buffer = NULL;
+ size_t size = 0;
+
+ assert_se(unbase64mem(input, (size_t) -1, &buffer, &size) == ret);
+
+ if (ret >= 0) {
+ assert_se(size == strlen(output));
+ assert_se(memcmp(buffer, output, size) == 0);
+ assert_se(((char*) buffer)[size] == 0);
+ }
+}
+
+static void test_unbase64mem(void) {
+
+ test_unbase64mem_one("", "", 0);
+ test_unbase64mem_one("Zg==", "f", 0);
+ test_unbase64mem_one("Zm8=", "fo", 0);
+ test_unbase64mem_one("Zm9v", "foo", 0);
+ test_unbase64mem_one("Zm9vYg==", "foob", 0);
+ test_unbase64mem_one("Zm9vYmE=", "fooba", 0);
+ test_unbase64mem_one("Zm9vYmFy", "foobar", 0);
+
+ test_unbase64mem_one(" ", "", 0);
+ test_unbase64mem_one(" \n\r ", "", 0);
+ test_unbase64mem_one(" Zg\n== ", "f", 0);
+ test_unbase64mem_one(" Zm 8=\r", "fo", 0);
+ test_unbase64mem_one(" Zm9\n\r\r\nv ", "foo", 0);
+ test_unbase64mem_one(" Z m9vYg==\n\r", "foob", 0);
+ test_unbase64mem_one(" Zm 9vYmE= ", "fooba", 0);
+ test_unbase64mem_one(" Z m9v YmFy ", "foobar", 0);
+
+ test_unbase64mem_one("A", NULL, -EPIPE);
+ test_unbase64mem_one("A====", NULL, -EINVAL);
+ test_unbase64mem_one("AAB==", NULL, -EINVAL);
+ test_unbase64mem_one(" A A A B = ", NULL, -EINVAL);
+ test_unbase64mem_one(" Z m 8 = q u u x ", NULL, -ENAMETOOLONG);
+}
+
+static void test_hexdump(void) {
+ uint8_t data[146];
+ unsigned i;
+
+ hexdump(stdout, NULL, 0);
+ hexdump(stdout, "", 0);
+ hexdump(stdout, "", 1);
+ hexdump(stdout, "x", 1);
+ hexdump(stdout, "x", 2);
+ hexdump(stdout, "foobar", 7);
+ hexdump(stdout, "f\nobar", 7);
+ hexdump(stdout, "xxxxxxxxxxxxxxxxxxxxyz", 23);
+
+ for (i = 0; i < ELEMENTSOF(data); i++)
+ data[i] = i*2;
+
+ hexdump(stdout, data, sizeof(data));
+}
+
+int main(int argc, char *argv[]) {
+ test_hexchar();
+ test_unhexchar();
+ test_base32hexchar();
+ test_unbase32hexchar();
+ test_base64char();
+ test_unbase64char();
+ test_octchar();
+ test_unoctchar();
+ test_decchar();
+ test_undecchar();
+ test_unhexmem();
+ test_base32hexmem();
+ test_unbase32hexmem();
+ test_base64mem();
+ test_unbase64mem();
+ test_hexdump();
+
+ return 0;
+}
diff --git a/src/test/test-hostname-util.c b/src/test/test-hostname-util.c
new file mode 100644
index 0000000..73839b3
--- /dev/null
+++ b/src/test/test-hostname-util.c
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "hostname-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+static void test_hostname_is_valid(void) {
+ assert_se(hostname_is_valid("foobar", false));
+ assert_se(hostname_is_valid("foobar.com", false));
+ assert_se(!hostname_is_valid("foobar.com.", false));
+ assert_se(hostname_is_valid("fooBAR", false));
+ assert_se(hostname_is_valid("fooBAR.com", false));
+ assert_se(!hostname_is_valid("fooBAR.", false));
+ assert_se(!hostname_is_valid("fooBAR.com.", false));
+ assert_se(!hostname_is_valid("fööbar", false));
+ assert_se(!hostname_is_valid("", false));
+ assert_se(!hostname_is_valid(".", false));
+ assert_se(!hostname_is_valid("..", false));
+ assert_se(!hostname_is_valid("foobar.", false));
+ assert_se(!hostname_is_valid(".foobar", false));
+ assert_se(!hostname_is_valid("foo..bar", false));
+ assert_se(!hostname_is_valid("foo.bar..", false));
+ assert_se(!hostname_is_valid("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", false));
+ assert_se(!hostname_is_valid("au-xph5-rvgrdsb5hcxc-47et3a5vvkrc-server-wyoz4elpdpe3.openstack.local", false));
+
+ assert_se(hostname_is_valid("foobar", true));
+ assert_se(hostname_is_valid("foobar.com", true));
+ assert_se(hostname_is_valid("foobar.com.", true));
+ assert_se(hostname_is_valid("fooBAR", true));
+ assert_se(hostname_is_valid("fooBAR.com", true));
+ assert_se(!hostname_is_valid("fooBAR.", true));
+ assert_se(hostname_is_valid("fooBAR.com.", true));
+ assert_se(!hostname_is_valid("fööbar", true));
+ assert_se(!hostname_is_valid("", true));
+ assert_se(!hostname_is_valid(".", true));
+ assert_se(!hostname_is_valid("..", true));
+ assert_se(!hostname_is_valid("foobar.", true));
+ assert_se(!hostname_is_valid(".foobar", true));
+ assert_se(!hostname_is_valid("foo..bar", true));
+ assert_se(!hostname_is_valid("foo.bar..", true));
+ assert_se(!hostname_is_valid("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", true));
+}
+
+static void test_hostname_cleanup(void) {
+ char *s;
+
+ s = strdupa("foobar");
+ assert_se(streq(hostname_cleanup(s), "foobar"));
+ s = strdupa("foobar.com");
+ assert_se(streq(hostname_cleanup(s), "foobar.com"));
+ s = strdupa("foobar.com.");
+ assert_se(streq(hostname_cleanup(s), "foobar.com"));
+ s = strdupa("foo-bar.-com-.");
+ assert_se(streq(hostname_cleanup(s), "foo-bar.com"));
+ s = strdupa("foo-bar-.-com-.");
+ assert_se(streq(hostname_cleanup(s), "foo-bar--com"));
+ s = strdupa("--foo-bar.-com");
+ assert_se(streq(hostname_cleanup(s), "foo-bar.com"));
+ s = strdupa("fooBAR");
+ assert_se(streq(hostname_cleanup(s), "fooBAR"));
+ s = strdupa("fooBAR.com");
+ assert_se(streq(hostname_cleanup(s), "fooBAR.com"));
+ s = strdupa("fooBAR.");
+ assert_se(streq(hostname_cleanup(s), "fooBAR"));
+ s = strdupa("fooBAR.com.");
+ assert_se(streq(hostname_cleanup(s), "fooBAR.com"));
+ s = strdupa("fööbar");
+ assert_se(streq(hostname_cleanup(s), "fbar"));
+ s = strdupa("");
+ assert_se(isempty(hostname_cleanup(s)));
+ s = strdupa(".");
+ assert_se(isempty(hostname_cleanup(s)));
+ s = strdupa("..");
+ assert_se(isempty(hostname_cleanup(s)));
+ s = strdupa("foobar.");
+ assert_se(streq(hostname_cleanup(s), "foobar"));
+ s = strdupa(".foobar");
+ assert_se(streq(hostname_cleanup(s), "foobar"));
+ s = strdupa("foo..bar");
+ assert_se(streq(hostname_cleanup(s), "foo.bar"));
+ s = strdupa("foo.bar..");
+ assert_se(streq(hostname_cleanup(s), "foo.bar"));
+ s = strdupa("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
+ assert_se(streq(hostname_cleanup(s), "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"));
+ s = strdupa("xxxx........xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
+ assert_se(streq(hostname_cleanup(s), "xxxx.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"));
+}
+
+static void test_read_etc_hostname(void) {
+ char path[] = "/tmp/hostname.XXXXXX";
+ char *hostname;
+ int fd;
+
+ fd = mkostemp_safe(path);
+ assert(fd > 0);
+ close(fd);
+
+ /* simple hostname */
+ assert_se(write_string_file(path, "foo", WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(read_etc_hostname(path, &hostname) == 0);
+ assert_se(streq(hostname, "foo"));
+ hostname = mfree(hostname);
+
+ /* with comment */
+ assert_se(write_string_file(path, "# comment\nfoo", WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(read_etc_hostname(path, &hostname) == 0);
+ assert_se(hostname);
+ assert_se(streq(hostname, "foo"));
+ hostname = mfree(hostname);
+
+ /* with comment and extra whitespace */
+ assert_se(write_string_file(path, "# comment\n\n foo ", WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(read_etc_hostname(path, &hostname) == 0);
+ assert_se(hostname);
+ assert_se(streq(hostname, "foo"));
+ hostname = mfree(hostname);
+
+ /* cleans up name */
+ assert_se(write_string_file(path, "!foo/bar.com", WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(read_etc_hostname(path, &hostname) == 0);
+ assert_se(hostname);
+ assert_se(streq(hostname, "foobar.com"));
+ hostname = mfree(hostname);
+
+ /* no value set */
+ hostname = (char*) 0x1234;
+ assert_se(write_string_file(path, "# nothing here\n", WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(read_etc_hostname(path, &hostname) == -ENOENT);
+ assert_se(hostname == (char*) 0x1234); /* does not touch argument on error */
+
+ /* nonexisting file */
+ assert_se(read_etc_hostname("/non/existing", &hostname) == -ENOENT);
+ assert_se(hostname == (char*) 0x1234); /* does not touch argument on error */
+
+ unlink(path);
+}
+
+static void test_hostname_malloc(void) {
+ _cleanup_free_ char *h = NULL, *l = NULL;
+
+ assert_se(h = gethostname_malloc());
+ log_info("hostname_malloc: \"%s\"", h);
+
+ assert_se(l = gethostname_short_malloc());
+ log_info("hostname_short_malloc: \"%s\"", l);
+}
+
+static void test_fallback_hostname(void) {
+ if (!hostname_is_valid(FALLBACK_HOSTNAME, false)) {
+ log_error("Configured fallback hostname \"%s\" is not valid.", FALLBACK_HOSTNAME);
+ exit(EXIT_FAILURE);
+ }
+}
+
+int main(int argc, char *argv[]) {
+ log_parse_environment();
+ log_open();
+
+ test_hostname_is_valid();
+ test_hostname_cleanup();
+ test_read_etc_hostname();
+ test_hostname_malloc();
+
+ test_fallback_hostname();
+
+ return 0;
+}
diff --git a/src/test/test-hostname.c b/src/test/test-hostname.c
new file mode 100644
index 0000000..1a925f2
--- /dev/null
+++ b/src/test/test-hostname.c
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "hostname-setup.h"
+#include "util.h"
+
+int main(int argc, char* argv[]) {
+ int r;
+
+ r = hostname_setup();
+ if (r < 0)
+ log_error_errno(r, "hostname: %m");
+
+ return 0;
+}
diff --git a/src/test/test-id128.c b/src/test/test-id128.c
new file mode 100644
index 0000000..a0649b9
--- /dev/null
+++ b/src/test/test-id128.c
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "id128-util.h"
+#include "macro.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+#define ID128_WALDI SD_ID128_MAKE(01, 02, 03, 04, 05, 06, 07, 08, 09, 0a, 0b, 0c, 0d, 0e, 0f, 10)
+#define STR_WALDI "0102030405060708090a0b0c0d0e0f10"
+#define UUID_WALDI "01020304-0506-0708-090a-0b0c0d0e0f10"
+
+int main(int argc, char *argv[]) {
+ sd_id128_t id, id2;
+ char t[SD_ID128_STRING_MAX], q[ID128_UUID_STRING_MAX];
+ _cleanup_free_ char *b = NULL;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert_se(sd_id128_randomize(&id) == 0);
+ printf("random: %s\n", sd_id128_to_string(id, t));
+
+ assert_se(sd_id128_from_string(t, &id2) == 0);
+ assert_se(sd_id128_equal(id, id2));
+
+ if (sd_booted() > 0) {
+ assert_se(sd_id128_get_machine(&id) == 0);
+ printf("machine: %s\n", sd_id128_to_string(id, t));
+
+ assert_se(sd_id128_get_boot(&id) == 0);
+ printf("boot: %s\n", sd_id128_to_string(id, t));
+ }
+
+ printf("waldi: %s\n", sd_id128_to_string(ID128_WALDI, t));
+ assert_se(streq(t, STR_WALDI));
+
+ assert_se(asprintf(&b, SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(ID128_WALDI)) == 32);
+ printf("waldi2: %s\n", b);
+ assert_se(streq(t, b));
+
+ printf("waldi3: %s\n", id128_to_uuid_string(ID128_WALDI, q));
+ assert_se(streq(q, UUID_WALDI));
+
+ b = mfree(b);
+ assert_se(asprintf(&b, SD_ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(ID128_WALDI)) == 36);
+ printf("waldi4: %s\n", b);
+ assert_se(streq(q, b));
+
+ assert_se(sd_id128_from_string(STR_WALDI, &id) >= 0);
+ assert_se(sd_id128_equal(id, ID128_WALDI));
+
+ assert_se(sd_id128_from_string(UUID_WALDI, &id) >= 0);
+ assert_se(sd_id128_equal(id, ID128_WALDI));
+
+ assert_se(sd_id128_from_string("", &id) < 0);
+ assert_se(sd_id128_from_string("01020304-0506-0708-090a-0b0c0d0e0f101", &id) < 0);
+ assert_se(sd_id128_from_string("01020304-0506-0708-090a-0b0c0d0e0f10-", &id) < 0);
+ assert_se(sd_id128_from_string("01020304-0506-0708-090a0b0c0d0e0f10", &id) < 0);
+ assert_se(sd_id128_from_string("010203040506-0708-090a-0b0c0d0e0f10", &id) < 0);
+
+ assert_se(id128_is_valid(STR_WALDI));
+ assert_se(id128_is_valid(UUID_WALDI));
+ assert_se(!id128_is_valid(""));
+ assert_se(!id128_is_valid("01020304-0506-0708-090a-0b0c0d0e0f101"));
+ assert_se(!id128_is_valid("01020304-0506-0708-090a-0b0c0d0e0f10-"));
+ assert_se(!id128_is_valid("01020304-0506-0708-090a0b0c0d0e0f10"));
+ assert_se(!id128_is_valid("010203040506-0708-090a-0b0c0d0e0f10"));
+
+ fd = open_tmpfile_unlinkable(NULL, O_RDWR|O_CLOEXEC);
+ assert_se(fd >= 0);
+
+ /* First, write as UUID */
+ assert_se(sd_id128_randomize(&id) >= 0);
+ assert_se(id128_write_fd(fd, ID128_UUID, id, false) >= 0);
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_PLAIN, &id2) == -EINVAL);
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_UUID, &id2) >= 0);
+ assert_se(sd_id128_equal(id, id2));
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_ANY, &id2) >= 0);
+ assert_se(sd_id128_equal(id, id2));
+
+ /* Second, write as plain */
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(ftruncate(fd, 0) >= 0);
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+ assert_se(id128_write_fd(fd, ID128_PLAIN, id, false) >= 0);
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_UUID, &id2) == -EINVAL);
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_PLAIN, &id2) >= 0);
+ assert_se(sd_id128_equal(id, id2));
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_ANY, &id2) >= 0);
+ assert_se(sd_id128_equal(id, id2));
+
+ /* Third, write plain without trailing newline */
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(ftruncate(fd, 0) >= 0);
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+ assert_se(write(fd, sd_id128_to_string(id, t), 32) == 32);
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_UUID, &id2) == -EINVAL);
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_PLAIN, &id2) >= 0);
+ assert_se(sd_id128_equal(id, id2));
+
+ /* Third, write UUID without trailing newline */
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(ftruncate(fd, 0) >= 0);
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+ assert_se(write(fd, id128_to_uuid_string(id, q), 36) == 36);
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_PLAIN, &id2) == -EINVAL);
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(id128_read_fd(fd, ID128_UUID, &id2) >= 0);
+ assert_se(sd_id128_equal(id, id2));
+
+ r = sd_id128_get_machine_app_specific(SD_ID128_MAKE(f0,3d,aa,eb,1c,33,4b,43,a7,32,17,29,44,bf,77,2e), &id);
+ if (r == -EOPNOTSUPP)
+ log_info("khash not supported on this kernel, skipping sd_id128_get_machine_app_specific() checks");
+ else {
+ assert_se(r >= 0);
+ assert_se(sd_id128_get_machine_app_specific(SD_ID128_MAKE(f0,3d,aa,eb,1c,33,4b,43,a7,32,17,29,44,bf,77,2e), &id2) >= 0);
+ assert_se(sd_id128_equal(id, id2));
+ assert_se(sd_id128_get_machine_app_specific(SD_ID128_MAKE(51,df,0b,4b,c3,b0,4c,97,80,e2,99,b9,8c,a3,73,b8), &id2) >= 0);
+ assert_se(!sd_id128_equal(id, id2));
+ }
+
+ /* Query the invocation ID */
+ r = sd_id128_get_invocation(&id);
+ if (r < 0)
+ log_warning_errno(r, "Failed to get invocation ID, ignoring: %m");
+ else
+ log_info("Invocation ID: " SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(id));
+
+ return 0;
+}
diff --git a/src/test/test-in-addr-util.c b/src/test/test-in-addr-util.c
new file mode 100644
index 0000000..2b63645
--- /dev/null
+++ b/src/test/test-in-addr-util.c
@@ -0,0 +1,345 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fnmatch.h>
+#include <netinet/in.h>
+
+#include "log.h"
+#include "strv.h"
+#include "in-addr-util.h"
+
+static void test_in_addr_prefix_from_string_one(
+ const char *p,
+ int family,
+ int ret,
+ const union in_addr_union *u,
+ unsigned char prefixlen,
+ int ret_refuse,
+ unsigned char prefixlen_refuse,
+ int ret_legacy,
+ unsigned char prefixlen_legacy) {
+
+ union in_addr_union q;
+ unsigned char l;
+ int f, r;
+
+ r = in_addr_prefix_from_string(p, family, &q, &l);
+ assert_se(r == ret);
+
+ if (r < 0)
+ return;
+
+ assert_se(in_addr_equal(family, &q, u));
+ assert_se(l == prefixlen);
+
+ r = in_addr_prefix_from_string_auto(p, &f, &q, &l);
+ assert_se(r >= 0);
+
+ assert_se(f == family);
+ assert_se(in_addr_equal(family, &q, u));
+ assert_se(l == prefixlen);
+
+ r = in_addr_prefix_from_string_auto_internal(p, PREFIXLEN_REFUSE, &f, &q, &l);
+ assert_se(r == ret_refuse);
+
+ if (r >= 0) {
+ assert_se(f == family);
+ assert_se(in_addr_equal(family, &q, u));
+ assert_se(l == prefixlen_refuse);
+ }
+
+ r = in_addr_prefix_from_string_auto_internal(p, PREFIXLEN_LEGACY, &f, &q, &l);
+ assert_se(r == ret_legacy);
+
+ if (r >= 0) {
+ assert_se(f == family);
+ assert_se(in_addr_equal(family, &q, u));
+ assert_se(l == prefixlen_legacy);
+ }
+}
+
+static void test_in_addr_prefix_from_string(void) {
+ log_info("/* %s */", __func__);
+
+ test_in_addr_prefix_from_string_one("", AF_INET, -EINVAL, NULL, 0, -EINVAL, 0, -EINVAL, 0);
+ test_in_addr_prefix_from_string_one("/", AF_INET, -EINVAL, NULL, 0, -EINVAL, 0, -EINVAL, 0);
+ test_in_addr_prefix_from_string_one("/8", AF_INET, -EINVAL, NULL, 0, -EINVAL, 0, -EINVAL, 0);
+ test_in_addr_prefix_from_string_one("1.2.3.4", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 32, -ENOANO, 0, 0, 8);
+ test_in_addr_prefix_from_string_one("1.2.3.4/0", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 0, 0, 0, 0, 0);
+ test_in_addr_prefix_from_string_one("1.2.3.4/1", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 1, 0, 1, 0, 1);
+ test_in_addr_prefix_from_string_one("1.2.3.4/2", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 2, 0, 2, 0, 2);
+ test_in_addr_prefix_from_string_one("1.2.3.4/32", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 32, 0, 32, 0, 32);
+ test_in_addr_prefix_from_string_one("1.2.3.4/33", AF_INET, -ERANGE, NULL, 0, -ERANGE, 0, -ERANGE, 0);
+ test_in_addr_prefix_from_string_one("1.2.3.4/-1", AF_INET, -ERANGE, NULL, 0, -ERANGE, 0, -ERANGE, 0);
+ test_in_addr_prefix_from_string_one("::1", AF_INET, -EINVAL, NULL, 0, -EINVAL, 0, -EINVAL, 0);
+
+ test_in_addr_prefix_from_string_one("", AF_INET6, -EINVAL, NULL, 0, -EINVAL, 0, -EINVAL, 0);
+ test_in_addr_prefix_from_string_one("/", AF_INET6, -EINVAL, NULL, 0, -EINVAL, 0, -EINVAL, 0);
+ test_in_addr_prefix_from_string_one("/8", AF_INET6, -EINVAL, NULL, 0, -EINVAL, 0, -EINVAL, 0);
+ test_in_addr_prefix_from_string_one("::1", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 128, -ENOANO, 0, 0, 0);
+ test_in_addr_prefix_from_string_one("::1/0", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 0, 0, 0, 0, 0);
+ test_in_addr_prefix_from_string_one("::1/1", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 1, 0, 1, 0, 1);
+ test_in_addr_prefix_from_string_one("::1/2", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 2, 0, 2, 0, 2);
+ test_in_addr_prefix_from_string_one("::1/32", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 32, 0, 32, 0, 32);
+ test_in_addr_prefix_from_string_one("::1/33", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 33, 0, 33, 0, 33);
+ test_in_addr_prefix_from_string_one("::1/64", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 64, 0, 64, 0, 64);
+ test_in_addr_prefix_from_string_one("::1/128", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 128, 0, 128, 0, 128);
+ test_in_addr_prefix_from_string_one("::1/129", AF_INET6, -ERANGE, NULL, 0, -ERANGE, 0, -ERANGE, 0);
+ test_in_addr_prefix_from_string_one("::1/-1", AF_INET6, -ERANGE, NULL, 0, -ERANGE, 0, -ERANGE, 0);
+}
+
+static void test_in_addr_prefix_to_string_valid(int family, const char *p) {
+ _cleanup_free_ char *str = NULL;
+ union in_addr_union u;
+ unsigned char l;
+
+ log_info("%s: %s", __func__, p);
+
+ assert_se(in_addr_prefix_from_string(p, family, &u, &l) >= 0);
+ assert_se(in_addr_prefix_to_string(family, &u, l, &str) >= 0);
+ assert_se(streq(str, p));
+}
+
+static void test_in_addr_prefix_to_string_unoptimized(int family, const char *p) {
+ _cleanup_free_ char *str1 = NULL, *str2 = NULL;
+ union in_addr_union u1, u2;
+ unsigned char len1, len2;
+
+ log_info("%s: %s", __func__, p);
+
+ assert_se(in_addr_prefix_from_string(p, family, &u1, &len1) >= 0);
+ assert_se(in_addr_prefix_to_string(family, &u1, len1, &str1) >= 0);
+ assert_se(in_addr_prefix_from_string(str1, family, &u2, &len2) >= 0);
+ assert_se(in_addr_prefix_to_string(family, &u2, len2, &str2) >= 0);
+
+ assert_se(streq(str1, str2));
+ assert_se(len1 == len2);
+ assert_se(in_addr_equal(family, &u1, &u2) > 0);
+}
+
+static void test_in_addr_prefix_to_string(void) {
+ log_info("/* %s */", __func__);
+
+ test_in_addr_prefix_to_string_valid(AF_INET, "0.0.0.0/32");
+ test_in_addr_prefix_to_string_valid(AF_INET, "1.2.3.4/0");
+ test_in_addr_prefix_to_string_valid(AF_INET, "1.2.3.4/24");
+ test_in_addr_prefix_to_string_valid(AF_INET, "1.2.3.4/32");
+ test_in_addr_prefix_to_string_valid(AF_INET, "255.255.255.255/32");
+
+ test_in_addr_prefix_to_string_valid(AF_INET6, "::1/128");
+ test_in_addr_prefix_to_string_valid(AF_INET6, "fd00:abcd::1/64");
+ test_in_addr_prefix_to_string_valid(AF_INET6, "fd00:abcd::1234:1/64");
+ test_in_addr_prefix_to_string_valid(AF_INET6, "1111:2222:3333:4444:5555:6666:7777:8888/128");
+
+ test_in_addr_prefix_to_string_unoptimized(AF_INET, "0.0.0.0");
+ test_in_addr_prefix_to_string_unoptimized(AF_INET, "192.168.0.1");
+
+ test_in_addr_prefix_to_string_unoptimized(AF_INET6, "fd00:0000:0000:0000:0000:0000:0000:0001/64");
+ test_in_addr_prefix_to_string_unoptimized(AF_INET6, "fd00:1111::0000:2222:3333:4444:0001/64");
+}
+
+static void test_in_addr_random_prefix(void) {
+ _cleanup_free_ char *str = NULL;
+ union in_addr_union a;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(in_addr_from_string(AF_INET, "192.168.10.1", &a) >= 0);
+
+ assert_se(in_addr_random_prefix(AF_INET, &a, 31, 32) >= 0);
+ assert_se(in_addr_to_string(AF_INET, &a, &str) >= 0);
+ assert_se(STR_IN_SET(str, "192.168.10.0", "192.168.10.1"));
+ str = mfree(str);
+
+ assert_se(in_addr_random_prefix(AF_INET, &a, 24, 26) >= 0);
+ assert_se(in_addr_to_string(AF_INET, &a, &str) >= 0);
+ assert_se(startswith(str, "192.168.10."));
+ str = mfree(str);
+
+ assert_se(in_addr_random_prefix(AF_INET, &a, 16, 24) >= 0);
+ assert_se(in_addr_to_string(AF_INET, &a, &str) >= 0);
+ assert_se(fnmatch("192.168.[0-9]*.0", str, 0) == 0);
+ str = mfree(str);
+
+ assert_se(in_addr_random_prefix(AF_INET, &a, 8, 24) >= 0);
+ assert_se(in_addr_to_string(AF_INET, &a, &str) >= 0);
+ assert_se(fnmatch("192.[0-9]*.[0-9]*.0", str, 0) == 0);
+ str = mfree(str);
+
+ assert_se(in_addr_random_prefix(AF_INET, &a, 8, 16) >= 0);
+ assert_se(in_addr_to_string(AF_INET, &a, &str) >= 0);
+ assert_se(fnmatch("192.[0-9]*.0.0", str, 0) == 0);
+ str = mfree(str);
+
+ assert_se(in_addr_from_string(AF_INET6, "fd00::1", &a) >= 0);
+
+ assert_se(in_addr_random_prefix(AF_INET6, &a, 16, 64) >= 0);
+ assert_se(in_addr_to_string(AF_INET6, &a, &str) >= 0);
+ assert_se(startswith(str, "fd00:"));
+ str = mfree(str);
+
+ assert_se(in_addr_random_prefix(AF_INET6, &a, 8, 16) >= 0);
+ assert_se(in_addr_to_string(AF_INET6, &a, &str) >= 0);
+ assert_se(fnmatch("fd??::", str, 0) == 0);
+ str = mfree(str);
+}
+
+static void test_in_addr_is_null(void) {
+ union in_addr_union i = {};
+
+ log_info("/* %s */", __func__);
+
+ assert_se(in_addr_is_null(AF_INET, &i) == true);
+ assert_se(in_addr_is_null(AF_INET6, &i) == true);
+
+ i.in.s_addr = 0x1000000;
+ assert_se(in_addr_is_null(AF_INET, &i) == false);
+ assert_se(in_addr_is_null(AF_INET6, &i) == false);
+
+ assert_se(in_addr_is_null(-1, &i) == -EAFNOSUPPORT);
+}
+
+static void test_in_addr_prefix_intersect_one(unsigned f, const char *a, unsigned apl, const char *b, unsigned bpl, int result) {
+ union in_addr_union ua, ub;
+
+ assert_se(in_addr_from_string(f, a, &ua) >= 0);
+ assert_se(in_addr_from_string(f, b, &ub) >= 0);
+
+ assert_se(in_addr_prefix_intersect(f, &ua, apl, &ub, bpl) == result);
+}
+
+static void test_in_addr_prefix_intersect(void) {
+ log_info("/* %s */", __func__);
+
+ test_in_addr_prefix_intersect_one(AF_INET, "255.255.255.255", 32, "255.255.255.254", 32, 0);
+ test_in_addr_prefix_intersect_one(AF_INET, "255.255.255.255", 0, "255.255.255.255", 32, 1);
+ test_in_addr_prefix_intersect_one(AF_INET, "0.0.0.0", 0, "47.11.8.15", 32, 1);
+
+ test_in_addr_prefix_intersect_one(AF_INET, "1.1.1.1", 24, "1.1.1.1", 24, 1);
+ test_in_addr_prefix_intersect_one(AF_INET, "2.2.2.2", 24, "1.1.1.1", 24, 0);
+
+ test_in_addr_prefix_intersect_one(AF_INET, "1.1.1.1", 24, "1.1.1.127", 25, 1);
+ test_in_addr_prefix_intersect_one(AF_INET, "1.1.1.1", 24, "1.1.1.127", 26, 1);
+ test_in_addr_prefix_intersect_one(AF_INET, "1.1.1.1", 25, "1.1.1.127", 25, 1);
+ test_in_addr_prefix_intersect_one(AF_INET, "1.1.1.1", 25, "1.1.1.255", 25, 0);
+
+ test_in_addr_prefix_intersect_one(AF_INET6, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", 128, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe", 128, 0);
+ test_in_addr_prefix_intersect_one(AF_INET6, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", 0, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", 128, 1);
+ test_in_addr_prefix_intersect_one(AF_INET6, "::", 0, "beef:beef:beef:beef:beef:beef:beef:beef", 128, 1);
+
+ test_in_addr_prefix_intersect_one(AF_INET6, "1::2", 64, "1::2", 64, 1);
+ test_in_addr_prefix_intersect_one(AF_INET6, "2::2", 64, "1::2", 64, 0);
+
+ test_in_addr_prefix_intersect_one(AF_INET6, "1::1", 120, "1::007f", 121, 1);
+ test_in_addr_prefix_intersect_one(AF_INET6, "1::1", 120, "1::007f", 122, 1);
+ test_in_addr_prefix_intersect_one(AF_INET6, "1::1", 121, "1::007f", 121, 1);
+ test_in_addr_prefix_intersect_one(AF_INET6, "1::1", 121, "1::00ff", 121, 0);
+}
+
+static void test_in_addr_prefix_next_one(unsigned f, const char *before, unsigned pl, const char *after) {
+ union in_addr_union ubefore, uafter, t;
+
+ assert_se(in_addr_from_string(f, before, &ubefore) >= 0);
+
+ t = ubefore;
+ assert_se((in_addr_prefix_next(f, &t, pl) > 0) == !!after);
+
+ if (after) {
+ assert_se(in_addr_from_string(f, after, &uafter) >= 0);
+ assert_se(in_addr_equal(f, &t, &uafter) > 0);
+ }
+}
+
+static void test_in_addr_prefix_next(void) {
+ log_info("/* %s */", __func__);
+
+ test_in_addr_prefix_next_one(AF_INET, "192.168.0.0", 24, "192.168.1.0");
+ test_in_addr_prefix_next_one(AF_INET, "192.168.0.0", 16, "192.169.0.0");
+ test_in_addr_prefix_next_one(AF_INET, "192.168.0.0", 20, "192.168.16.0");
+
+ test_in_addr_prefix_next_one(AF_INET, "0.0.0.0", 32, "0.0.0.1");
+ test_in_addr_prefix_next_one(AF_INET, "255.255.255.255", 32, NULL);
+ test_in_addr_prefix_next_one(AF_INET, "255.255.255.0", 24, NULL);
+
+ test_in_addr_prefix_next_one(AF_INET6, "4400::", 128, "4400::0001");
+ test_in_addr_prefix_next_one(AF_INET6, "4400::", 120, "4400::0100");
+ test_in_addr_prefix_next_one(AF_INET6, "4400::", 127, "4400::0002");
+ test_in_addr_prefix_next_one(AF_INET6, "4400::", 8, "4500::");
+ test_in_addr_prefix_next_one(AF_INET6, "4400::", 7, "4600::");
+
+ test_in_addr_prefix_next_one(AF_INET6, "::", 128, "::1");
+
+ test_in_addr_prefix_next_one(AF_INET6, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", 128, NULL);
+ test_in_addr_prefix_next_one(AF_INET6, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ff00", 120, NULL);
+}
+
+static void test_in_addr_prefix_nth_one(unsigned f, const char *before, unsigned pl, uint64_t nth, const char *after) {
+ union in_addr_union ubefore, uafter, t;
+
+ assert_se(in_addr_from_string(f, before, &ubefore) >= 0);
+
+ t = ubefore;
+ assert_se((in_addr_prefix_nth(f, &t, pl, nth) > 0) == !!after);
+
+ if (after) {
+ assert_se(in_addr_from_string(f, after, &uafter) >= 0);
+ assert_se(in_addr_equal(f, &t, &uafter) > 0);
+ }
+}
+
+static void test_in_addr_prefix_nth(void) {
+ log_info("/* %s */", __func__);
+
+ test_in_addr_prefix_nth_one(AF_INET, "192.168.0.0", 24, 0, "192.168.0.0");
+ test_in_addr_prefix_nth_one(AF_INET, "192.168.0.0", 24, 1, "192.168.1.0");
+ test_in_addr_prefix_nth_one(AF_INET, "192.168.0.0", 24, 4, "192.168.4.0");
+ test_in_addr_prefix_nth_one(AF_INET, "192.168.0.0", 25, 1, "192.168.0.128");
+ test_in_addr_prefix_nth_one(AF_INET, "192.168.255.0", 25, 1, "192.168.255.128");
+ test_in_addr_prefix_nth_one(AF_INET, "192.168.255.0", 24, 0, "192.168.255.0");
+ test_in_addr_prefix_nth_one(AF_INET, "255.255.255.255", 32, 1, NULL);
+ test_in_addr_prefix_nth_one(AF_INET, "255.255.255.255", 0, 1, NULL);
+
+ test_in_addr_prefix_nth_one(AF_INET6, "4400::", 8, 1, "4500::");
+ test_in_addr_prefix_nth_one(AF_INET6, "4400::", 7, 1, "4600::");
+ test_in_addr_prefix_nth_one(AF_INET6, "4400::", 64, 1, "4400:0:0:1::");
+ test_in_addr_prefix_nth_one(AF_INET6, "4400::", 64, 2, "4400:0:0:2::");
+ test_in_addr_prefix_nth_one(AF_INET6, "4400::", 64, 0xbad, "4400:0:0:0bad::");
+ test_in_addr_prefix_nth_one(AF_INET6, "4400:0:0:ffff::", 64, 1, "4400:0:1::");
+ test_in_addr_prefix_nth_one(AF_INET6, "4400::", 56, ((uint64_t)1<<48) -1, "44ff:ffff:ffff:ff00::");
+ test_in_addr_prefix_nth_one(AF_INET6, "0000::", 8, 255, "ff00::");
+ test_in_addr_prefix_nth_one(AF_INET6, "0000::", 8, 256, NULL);
+ test_in_addr_prefix_nth_one(AF_INET6, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", 128, 1, NULL);
+ test_in_addr_prefix_nth_one(AF_INET6, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", 0, 1, NULL);
+}
+
+static void test_in_addr_to_string_one(int f, const char *addr) {
+ union in_addr_union ua;
+ _cleanup_free_ char *r = NULL;
+
+ assert_se(in_addr_from_string(f, addr, &ua) >= 0);
+ assert_se(in_addr_to_string(f, &ua, &r) >= 0);
+ printf("test_in_addr_to_string_one: %s == %s\n", addr, r);
+ assert_se(streq(addr, r));
+}
+
+static void test_in_addr_to_string(void) {
+ log_info("/* %s */", __func__);
+
+ test_in_addr_to_string_one(AF_INET, "192.168.0.1");
+ test_in_addr_to_string_one(AF_INET, "10.11.12.13");
+ test_in_addr_to_string_one(AF_INET6, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff");
+ test_in_addr_to_string_one(AF_INET6, "::1");
+ test_in_addr_to_string_one(AF_INET6, "fe80::");
+}
+
+int main(int argc, char *argv[]) {
+ test_in_addr_prefix_from_string();
+ test_in_addr_random_prefix();
+ test_in_addr_prefix_to_string();
+ test_in_addr_is_null();
+ test_in_addr_prefix_intersect();
+ test_in_addr_prefix_next();
+ test_in_addr_prefix_nth();
+ test_in_addr_to_string();
+
+ return 0;
+}
diff --git a/src/test/test-install-root.c b/src/test/test-install-root.c
new file mode 100644
index 0000000..aedec54
--- /dev/null
+++ b/src/test/test-install-root.c
@@ -0,0 +1,1266 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "install.h"
+#include "mkdir.h"
+#include "rm-rf.h"
+#include "special.h"
+#include "string-util.h"
+#include "tests.h"
+
+static void test_basic_mask_and_enable(const char *root) {
+ const char *p;
+ UnitFileState state;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "a.service", NULL) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "b.service", NULL) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "c.service", NULL) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", NULL) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "e.service", NULL) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/a.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "a.service", NULL) >= 0);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "a.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, "/usr/lib/systemd/system/b.service");
+ assert_se(symlink("a.service", p) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "b.service", NULL) >= 0);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "b.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+
+ p = strjoina(root, "/usr/lib/systemd/system/c.service");
+ assert_se(symlink("/usr/lib/systemd/system/a.service", p) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "c.service", NULL) >= 0);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "c.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+
+ p = strjoina(root, "/usr/lib/systemd/system/d.service");
+ assert_se(symlink("c.service", p) >= 0);
+
+ /* This one is interesting, as d follows a relative, then an absolute symlink */
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", NULL) >= 0);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+
+ assert_se(unit_file_mask(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/dev/null"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/a.service");
+ assert_se(streq(changes[0].path, p));
+
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "a.service", &state) >= 0 && state == UNIT_FILE_MASKED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "b.service", &state) >= 0 && state == UNIT_FILE_MASKED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "c.service", &state) >= 0 && state == UNIT_FILE_MASKED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", &state) >= 0 && state == UNIT_FILE_MASKED);
+
+ /* Enabling a masked unit should fail! */
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) == -ERFKILL);
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_unmask(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/a.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) == 1);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/a.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/a.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "a.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "b.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "c.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+
+ /* Enabling it again should succeed but be a NOP */
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 0);
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/a.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "a.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "b.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "c.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+
+ /* Disabling a disabled unit must succeed but be a NOP */
+ assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 0);
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ /* Let's enable this indirectly via a symlink */
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("d.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/a.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/a.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "a.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "b.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "c.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+
+ /* Let's try to reenable */
+
+ assert_se(unit_file_reenable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("b.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 2);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/a.service");
+ assert_se(streq(changes[0].path, p));
+ assert_se(changes[1].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[1].source, "/usr/lib/systemd/system/a.service"));
+ assert_se(streq(changes[1].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "a.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "b.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "c.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+
+ /* Test masking with relative symlinks */
+
+ p = strjoina(root, "/usr/lib/systemd/system/e.service");
+ assert_se(symlink("../../../../../../dev/null", p) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "e.service", NULL) >= 0);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "e.service", &state) >= 0 && state == UNIT_FILE_MASKED);
+
+ assert_se(unlink(p) == 0);
+ assert_se(symlink("/usr/../dev/null", p) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "e.service", NULL) >= 0);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "e.service", &state) >= 0 && state == UNIT_FILE_MASKED);
+
+ assert_se(unlink(p) == 0);
+}
+
+static void test_linked_units(const char *root) {
+ const char *p, *q;
+ UnitFileState state;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0, i;
+
+ /*
+ * We'll test three cases here:
+ *
+ * a) a unit file in /opt, that we use "systemctl link" and
+ * "systemctl enable" on to make it available to the system
+ *
+ * b) a unit file in /opt, that is statically linked into
+ * /usr/lib/systemd/system, that "enable" should work on
+ * correctly.
+ *
+ * c) a unit file in /opt, that is linked into
+ * /etc/systemd/system, and where "enable" should result in
+ * -ELOOP, since using information from /etc to generate
+ * information in /etc should not be allowed.
+ */
+
+ p = strjoina(root, "/opt/linked.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/opt/linked2.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/opt/linked3.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked.service", NULL) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked2.service", NULL) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked3.service", NULL) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/linked2.service");
+ assert_se(symlink("/opt/linked2.service", p) >= 0);
+
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/linked3.service");
+ assert_se(symlink("/opt/linked3.service", p) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked2.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked3.service", &state) >= 0 && state == UNIT_FILE_LINKED);
+
+ /* First, let's link the unit into the search path */
+ assert_se(unit_file_link(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("/opt/linked.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/opt/linked.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/linked.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked.service", &state) >= 0 && state == UNIT_FILE_LINKED);
+
+ /* Let's unlink it from the search path again */
+ assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("linked.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/linked.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked.service", NULL) == -ENOENT);
+
+ /* Now, let's not just link it, but also enable it */
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("/opt/linked.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 2);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/linked.service");
+ q = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/linked.service");
+ for (i = 0 ; i < n_changes; i++) {
+ assert_se(changes[i].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[i].source, "/opt/linked.service"));
+
+ if (p && streq(changes[i].path, p))
+ p = NULL;
+ else if (q && streq(changes[i].path, q))
+ q = NULL;
+ else
+ assert_not_reached("wut?");
+ }
+ assert(!p && !q);
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+
+ /* And let's unlink it again */
+ assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("linked.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 2);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/linked.service");
+ q = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/linked.service");
+ for (i = 0; i < n_changes; i++) {
+ assert_se(changes[i].type == UNIT_FILE_UNLINK);
+
+ if (p && streq(changes[i].path, p))
+ p = NULL;
+ else if (q && streq(changes[i].path, q))
+ q = NULL;
+ else
+ assert_not_reached("wut?");
+ }
+ assert(!p && !q);
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked.service", NULL) == -ENOENT);
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("linked2.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 2);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/linked2.service");
+ q = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/linked2.service");
+ for (i = 0 ; i < n_changes; i++) {
+ assert_se(changes[i].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[i].source, "/opt/linked2.service"));
+
+ if (p && streq(changes[i].path, p))
+ p = NULL;
+ else if (q && streq(changes[i].path, q))
+ q = NULL;
+ else
+ assert_not_reached("wut?");
+ }
+ assert(!p && !q);
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("linked3.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(startswith(changes[0].path, root));
+ assert_se(endswith(changes[0].path, "linked3.service"));
+ assert_se(streq(changes[0].source, "/opt/linked3.service"));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+}
+
+static void test_default(const char *root) {
+ _cleanup_free_ char *def = NULL;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ const char *p;
+
+ p = strjoina(root, "/usr/lib/systemd/system/test-default-real.target");
+ assert_se(write_string_file(p, "# pretty much empty", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/test-default.target");
+ assert_se(symlink("test-default-real.target", p) >= 0);
+
+ assert_se(unit_file_get_default(UNIT_FILE_SYSTEM, root, &def) == -ENOENT);
+
+ assert_se(unit_file_set_default(UNIT_FILE_SYSTEM, 0, root, "idontexist.target", &changes, &n_changes) == -ENOENT);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == -ENOENT);
+ assert_se(streq_ptr(changes[0].path, "idontexist.target"));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_default(UNIT_FILE_SYSTEM, root, &def) == -ENOENT);
+
+ assert_se(unit_file_set_default(UNIT_FILE_SYSTEM, 0, root, "test-default.target", &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/test-default-real.target"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR "/" SPECIAL_DEFAULT_TARGET);
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_default(UNIT_FILE_SYSTEM, root, &def) >= 0);
+ assert_se(streq_ptr(def, "test-default-real.target"));
+}
+
+static void test_add_dependency(const char *root) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ const char *p;
+
+ p = strjoina(root, "/usr/lib/systemd/system/real-add-dependency-test-target.target");
+ assert_se(write_string_file(p, "# pretty much empty", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/add-dependency-test-target.target");
+ assert_se(symlink("real-add-dependency-test-target.target", p) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/real-add-dependency-test-service.service");
+ assert_se(write_string_file(p, "# pretty much empty", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/add-dependency-test-service.service");
+ assert_se(symlink("real-add-dependency-test-service.service", p) >= 0);
+
+ assert_se(unit_file_add_dependency(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("add-dependency-test-service.service"), "add-dependency-test-target.target", UNIT_WANTS, &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/real-add-dependency-test-service.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/real-add-dependency-test-target.target.wants/real-add-dependency-test-service.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+}
+
+static void test_template_enable(const char *root) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ UnitFileState state;
+ const char *p;
+
+ log_info("== %s ==", __func__);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@def.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@foo.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/template@.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "DefaultInstance=def\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/template-symlink@.service");
+ assert_se(symlink("template@.service", p) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ log_info("== %s with template@.service enabled ==", __func__);
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("template@.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/template@.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/template@def.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@def.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@def.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("template@.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ log_info("== %s with template@foo.service enabled ==", __func__);
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("template@foo.service"), &changes, &n_changes) >= 0);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/template@.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/template@foo.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@.service", &state) >= 0 && state == UNIT_FILE_INDIRECT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@foo.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+
+ assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("template@foo.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@quux.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@quux.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ log_info("== %s with template-symlink@quux.service enabled ==", __func__);
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("template-symlink@quux.service"), &changes, &n_changes) >= 0);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/template@.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/template@quux.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@.service", &state) >= 0 && state == UNIT_FILE_INDIRECT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template@quux.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@quux.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+}
+
+static void test_indirect(const char *root) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ UnitFileState state;
+ const char *p;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirecta.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirectb.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirectc.service", &state) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/indirecta.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "Also=indirectb.service\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/indirectb.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/indirectc.service");
+ assert_se(symlink("indirecta.service", p) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirecta.service", &state) >= 0 && state == UNIT_FILE_INDIRECT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirectb.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirectc.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("indirectc.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/indirectb.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/indirectb.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirecta.service", &state) >= 0 && state == UNIT_FILE_INDIRECT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirectb.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirectc.service", &state) >= 0 && state == UNIT_FILE_ALIAS);
+
+ assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("indirectc.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/indirectb.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+}
+
+static void test_preset_and_list(const char *root) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0, i;
+ const char *p, *q;
+ UnitFileState state;
+ bool got_yes = false, got_no = false;
+ UnitFileList *fl;
+ Hashmap *h;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-yes.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-no.service", &state) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/preset-yes.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/preset-no.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system-preset/test.preset");
+ assert_se(write_string_file(p,
+ "enable *-yes.*\n"
+ "disable *\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-yes.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-no.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_preset(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("preset-yes.service"), UNIT_FILE_PRESET_FULL, &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/preset-yes.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/preset-yes.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-yes.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-no.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("preset-yes.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/preset-yes.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-yes.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-no.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_preset(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("preset-no.service"), UNIT_FILE_PRESET_FULL, &changes, &n_changes) >= 0);
+ assert_se(n_changes == 0);
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-yes.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-no.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_preset_all(UNIT_FILE_SYSTEM, 0, root, UNIT_FILE_PRESET_FULL, &changes, &n_changes) >= 0);
+
+ assert_se(n_changes > 0);
+
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/preset-yes.service");
+
+ for (i = 0; i < n_changes; i++) {
+
+ if (changes[i].type == UNIT_FILE_SYMLINK) {
+ assert_se(streq(changes[i].source, "/usr/lib/systemd/system/preset-yes.service"));
+ assert_se(streq(changes[i].path, p));
+ } else
+ assert_se(changes[i].type == UNIT_FILE_UNLINK);
+ }
+
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-yes.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-no.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(h = hashmap_new(&string_hash_ops));
+ assert_se(unit_file_get_list(UNIT_FILE_SYSTEM, root, h, NULL, NULL) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/preset-yes.service");
+ q = strjoina(root, "/usr/lib/systemd/system/preset-no.service");
+
+ HASHMAP_FOREACH(fl, h) {
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, basename(fl->path), &state) >= 0);
+ assert_se(fl->state == state);
+
+ if (streq(fl->path, p)) {
+ got_yes = true;
+ assert_se(fl->state == UNIT_FILE_ENABLED);
+ } else if (streq(fl->path, q)) {
+ got_no = true;
+ assert_se(fl->state == UNIT_FILE_DISABLED);
+ } else
+ assert_se(IN_SET(fl->state, UNIT_FILE_DISABLED, UNIT_FILE_STATIC, UNIT_FILE_INDIRECT, UNIT_FILE_ALIAS));
+ }
+
+ unit_file_list_free(h);
+
+ assert_se(got_yes && got_no);
+}
+
+static void test_revert(const char *root) {
+ const char *p;
+ UnitFileState state;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+
+ assert(root);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "xx.service", NULL) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "yy.service", NULL) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/xx.service");
+ assert_se(write_string_file(p, "# Empty\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "xx.service", NULL) >= 0);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "xx.service", &state) >= 0 && state == UNIT_FILE_STATIC);
+
+ /* Initially there's nothing to revert */
+ assert_se(unit_file_revert(UNIT_FILE_SYSTEM, root, STRV_MAKE("xx.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 0);
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/xx.service");
+ assert_se(write_string_file(p, "# Empty override\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ /* Revert the override file */
+ assert_se(unit_file_revert(UNIT_FILE_SYSTEM, root, STRV_MAKE("xx.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/xx.service.d/dropin.conf");
+ assert_se(mkdir_parents(p, 0755) >= 0);
+ assert_se(write_string_file(p, "# Empty dropin\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ /* Revert the dropin file */
+ assert_se(unit_file_revert(UNIT_FILE_SYSTEM, root, STRV_MAKE("xx.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 2);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ assert_se(streq(changes[0].path, p));
+
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/xx.service.d");
+ assert_se(changes[1].type == UNIT_FILE_UNLINK);
+ assert_se(streq(changes[1].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+}
+
+static void test_preset_order(const char *root) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ const char *p;
+ UnitFileState state;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-1.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-2.service", &state) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/prefix-1.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/prefix-2.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system-preset/test.preset");
+ assert_se(write_string_file(p,
+ "enable prefix-1.service\n"
+ "disable prefix-*.service\n"
+ "enable prefix-2.service\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-1.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-2.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_preset(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("prefix-1.service"), UNIT_FILE_PRESET_FULL, &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/prefix-1.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/prefix-1.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-1.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-2.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_preset(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("prefix-2.service"), UNIT_FILE_PRESET_FULL, &changes, &n_changes) >= 0);
+ assert_se(n_changes == 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-1.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-2.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+}
+
+static void test_static_instance(const char *root) {
+ UnitFileState state;
+ const char *p;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "static-instance@.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "static-instance@foo.service", &state) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/static-instance@.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "static-instance@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "static-instance@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, "/usr/lib/systemd/system/static-instance@foo.service");
+ assert_se(symlink("static-instance@.service", p) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "static-instance@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "static-instance@foo.service", &state) >= 0 && state == UNIT_FILE_STATIC);
+}
+
+static void test_with_dropin(const char *root) {
+ const char *p;
+ UnitFileState state;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-1.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-2.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-3.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-4a.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-4b.service", &state) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-1.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-1.service.d/dropin.conf");
+ assert_se(mkdir_parents(p, 0755) >= 0);
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=graphical.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-1.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/with-dropin-2.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-2.service.d/dropin.conf");
+ assert_se(mkdir_parents(p, 0755) >= 0);
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=graphical.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-2.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-3.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/with-dropin-3.service.d/dropin.conf");
+ assert_se(mkdir_parents(p, 0755) >= 0);
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=graphical.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-3.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-4a.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/with-dropin-4a.service.d/dropin.conf");
+ assert_se(mkdir_parents(p, 0755) >= 0);
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "Also=with-dropin-4b.service\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-4a.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-4b.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-4b.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("with-dropin-1.service"), &changes, &n_changes) == 1);
+ assert_se(n_changes == 2);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(changes[1].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/with-dropin-1.service"));
+ assert_se(streq(changes[1].source, "/usr/lib/systemd/system/with-dropin-1.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/with-dropin-1.service");
+ assert_se(streq(changes[0].path, p));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/graphical.target.wants/with-dropin-1.service");
+ assert_se(streq(changes[1].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("with-dropin-2.service"), &changes, &n_changes) == 1);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-2.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(n_changes == 2);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(changes[1].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, SYSTEM_CONFIG_UNIT_DIR"/with-dropin-2.service"));
+ assert_se(streq(changes[1].source, SYSTEM_CONFIG_UNIT_DIR"/with-dropin-2.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/with-dropin-2.service");
+ assert_se(streq(changes[0].path, p));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/graphical.target.wants/with-dropin-2.service");
+ assert_se(streq(changes[1].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("with-dropin-3.service"), &changes, &n_changes) == 1);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-3.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(n_changes == 2);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(changes[1].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/with-dropin-3.service"));
+ assert_se(streq(changes[1].source, "/usr/lib/systemd/system/with-dropin-3.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/with-dropin-3.service");
+ assert_se(streq(changes[0].path, p));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/graphical.target.wants/with-dropin-3.service");
+ assert_se(streq(changes[1].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("with-dropin-4a.service"), &changes, &n_changes) == 2);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-3.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(n_changes == 2);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(changes[1].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/with-dropin-4a.service"));
+ assert_se(streq(changes[1].source, "/usr/lib/systemd/system/with-dropin-4b.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/with-dropin-4a.service");
+ assert_se(streq(changes[0].path, p));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/with-dropin-4b.service");
+ assert_se(streq(changes[1].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-1.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-2.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-3.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-4a.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-4b.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+}
+
+static void test_with_dropin_template(const char *root) {
+ const char *p;
+ UnitFileState state;
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-1@.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-2@.service", &state) == -ENOENT);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-3@.service", &state) == -ENOENT);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-1@.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-1@.service.d/dropin.conf");
+ assert_se(mkdir_parents(p, 0755) >= 0);
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=graphical.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-1@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-2@.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-2@instance-1.service.d/dropin.conf");
+ assert_se(mkdir_parents(p, 0755) >= 0);
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "WantedBy=graphical.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-2@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-3@.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "DefaultInstance=instance-1\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system/with-dropin-3@.service.d/dropin.conf");
+ assert_se(mkdir_parents(p, 0755) >= 0);
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "DefaultInstance=instance-2\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-3@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("with-dropin-1@instance-1.service"), &changes, &n_changes) == 1);
+ assert_se(n_changes == 2);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(changes[1].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/with-dropin-1@.service"));
+ assert_se(streq(changes[1].source, "/usr/lib/systemd/system/with-dropin-1@.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/with-dropin-1@instance-1.service");
+ assert_se(streq(changes[0].path, p));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/graphical.target.wants/with-dropin-1@instance-1.service");
+ assert_se(streq(changes[1].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("with-dropin-2@instance-1.service"), &changes, &n_changes) == 1);
+ assert_se(n_changes == 2);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(changes[1].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/with-dropin-2@.service"));
+ assert_se(streq(changes[1].source, "/usr/lib/systemd/system/with-dropin-2@.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/with-dropin-2@instance-1.service");
+ assert_se(streq(changes[0].path, p));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/graphical.target.wants/with-dropin-2@instance-1.service");
+ assert_se(streq(changes[1].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("with-dropin-2@instance-2.service"), &changes, &n_changes) == 1);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/with-dropin-2@.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/with-dropin-2@instance-2.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("with-dropin-3@.service"), &changes, &n_changes) == 1);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ assert_se(streq(changes[0].source, "/usr/lib/systemd/system/with-dropin-3@.service"));
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/with-dropin-3@instance-2.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-1@instance-1.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-2@instance-1.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-2@instance-2.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-3@instance-1.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "with-dropin-3@instance-2.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+}
+
+static void test_preset_multiple_instances(const char *root) {
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ const char *p;
+ UnitFileState state;
+
+ /* Set up template service files and preset file */
+ p = strjoina(root, "/usr/lib/systemd/system/foo@.service");
+ assert_se(write_string_file(p,
+ "[Install]\n"
+ "DefaultInstance=def\n"
+ "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ p = strjoina(root, "/usr/lib/systemd/system-preset/test.preset");
+ assert_se(write_string_file(p,
+ "enable foo@.service bar0 bar1 bartest\n"
+ "enable emptylist@.service\n" /* This line ensures the old functionality for templated unit still works */
+ "disable *\n" , WRITE_STRING_FILE_CREATE) >= 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@bar0.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ /* Preset a single instantiated unit specified in the list */
+ assert_se(unit_file_preset(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("foo@bar0.service"), UNIT_FILE_PRESET_FULL, &changes, &n_changes) >= 0);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@bar0.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_SYMLINK);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/foo@bar0.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("foo@bar0.service"), &changes, &n_changes) >= 0);
+ assert_se(n_changes == 1);
+ assert_se(changes[0].type == UNIT_FILE_UNLINK);
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/multi-user.target.wants/foo@bar0.service");
+ assert_se(streq(changes[0].path, p));
+ unit_file_changes_free(changes, n_changes);
+ changes = NULL; n_changes = 0;
+
+ /* Check for preset-all case, only instances on the list should be enabled, not including the default instance */
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@bar1.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@bartest.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+
+ assert_se(unit_file_preset_all(UNIT_FILE_SYSTEM, 0, root, UNIT_FILE_PRESET_FULL, &changes, &n_changes) >= 0);
+ assert_se(n_changes > 0);
+
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@bar0.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@bar1.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+ assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "foo@bartest.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
+
+ unit_file_changes_free(changes, n_changes);
+}
+
+static void verify_one(
+ const UnitFileInstallInfo *i,
+ const char *alias,
+ int expected,
+ const char *updated_name) {
+ int r;
+ static const UnitFileInstallInfo *last_info = NULL;
+ _cleanup_free_ char *alias2 = NULL;
+
+ if (i != last_info)
+ log_info("-- %s --", (last_info = i)->name);
+
+ r = unit_file_verify_alias(i, alias, &alias2);
+ log_info_errno(r, "alias %s ← %s: %d/%m (expected %d)%s%s%s",
+ i->name, alias, r, expected,
+ alias2 ? " [" : "", strempty(alias2),
+ alias2 ? "]" : "");
+ assert(r == expected);
+
+ /* This is is test for "instance propagation". This propagation matters mostly for WantedBy= and
+ * RequiredBy= settings, and less so for Alias=. The only case where it should happen is when we have
+ * an Alias=alias@.service an instantiated template template@instance. In that case the instance name
+ * should be propagated into the alias as alias@instance. */
+ assert(streq_ptr(alias2, updated_name));
+}
+
+static void test_verify_alias(void) {
+ const UnitFileInstallInfo
+ plain_service = { .name = (char*) "plain.service" },
+ bare_template = { .name = (char*) "template1@.service" },
+ di_template = { .name = (char*) "template2@.service",
+ .default_instance = (char*) "di" },
+ inst_template = { .name = (char*) "template3@inst.service" },
+ di_inst_template = { .name = (char*) "template4@inst.service",
+ .default_instance = (char*) "di" };
+
+ verify_one(&plain_service, "alias.service", 0, NULL);
+ verify_one(&plain_service, "alias.socket", -EXDEV, NULL);
+ verify_one(&plain_service, "alias@.service", -EXDEV, NULL);
+ verify_one(&plain_service, "alias@inst.service", -EXDEV, NULL);
+ verify_one(&plain_service, "foo.target.wants/plain.service", 0, NULL);
+ verify_one(&plain_service, "foo.target.wants/plain.socket", -EXDEV, NULL);
+ verify_one(&plain_service, "foo.target.wants/plain@.service", -EXDEV, NULL);
+ verify_one(&plain_service, "foo.target.wants/service", -EXDEV, NULL);
+ verify_one(&plain_service, "foo.target.requires/plain.service", 0, NULL);
+ verify_one(&plain_service, "foo.target.requires/plain.socket", -EXDEV, NULL);
+ verify_one(&plain_service, "foo.target.requires/plain@.service", -EXDEV, NULL);
+ verify_one(&plain_service, "foo.target.requires/service", -EXDEV, NULL);
+ verify_one(&plain_service, "foo.target.conf/plain.service", -EXDEV, NULL);
+ verify_one(&plain_service, "foo.service/plain.service", -EXDEV, NULL); /* missing dir suffix */
+ verify_one(&plain_service, "asdf.requires/plain.service", -EXDEV, NULL); /* invalid unit name component */
+
+ verify_one(&bare_template, "alias.service", -EXDEV, NULL);
+ verify_one(&bare_template, "alias.socket", -EXDEV, NULL);
+ verify_one(&bare_template, "alias@.socket", -EXDEV, NULL);
+ verify_one(&bare_template, "alias@inst.socket", -EXDEV, NULL);
+ /* A general alias alias@.service → template1@.service. */
+ verify_one(&bare_template, "alias@.service", 0, NULL);
+ /* Only a specific instance is aliased, see the discussion in https://github.com/systemd/systemd/pull/13119. */
+ verify_one(&bare_template, "alias@inst.service", 0, NULL);
+ verify_one(&bare_template, "foo.target.wants/plain.service", -EXDEV, NULL);
+ verify_one(&bare_template, "foo.target.wants/plain.socket", -EXDEV, NULL);
+ verify_one(&bare_template, "foo.target.wants/plain@.service", -EXDEV, NULL);
+ /* Name mismatch: we cannot allow this, because plain@foo.service would be pulled in by foo.target,
+ * but would not be resolveable on its own, since systemd doesn't know how to load the fragment. */
+ verify_one(&bare_template, "foo.target.wants/plain@foo.service", -EXDEV, NULL);
+ verify_one(&bare_template, "foo.target.wants/template1@foo.service", 0, NULL);
+ verify_one(&bare_template, "foo.target.wants/service", -EXDEV, NULL);
+ verify_one(&bare_template, "foo.target.requires/plain.service", -EXDEV, NULL);
+ verify_one(&bare_template, "foo.target.requires/plain.socket", -EXDEV, NULL);
+ verify_one(&bare_template, "foo.target.requires/plain@.service", -EXDEV, NULL); /* instance missing */
+ verify_one(&bare_template, "foo.target.requires/template1@inst.service", 0, NULL);
+ verify_one(&bare_template, "foo.target.requires/service", -EXDEV, NULL);
+ verify_one(&bare_template, "foo.target.conf/plain.service", -EXDEV, NULL);
+ verify_one(&bare_template, "FOO@.target.requires/plain@.service", -EXDEV, NULL); /* template name mismatch */
+ verify_one(&bare_template, "FOO@inst.target.requires/plain@.service", -EXDEV, NULL);
+ verify_one(&bare_template, "FOO@inst.target.requires/plain@inst.service", -EXDEV, NULL);
+ verify_one(&bare_template, "FOO@.target.requires/template1@.service", 0, NULL); /* instance propagated */
+ verify_one(&bare_template, "FOO@inst.target.requires/template1@.service", -EXDEV, NULL); /* instance missing */
+ verify_one(&bare_template, "FOO@inst.target.requires/template1@inst.service", 0, NULL); /* instance provided */
+
+ verify_one(&di_template, "alias.service", -EXDEV, NULL);
+ verify_one(&di_template, "alias.socket", -EXDEV, NULL);
+ verify_one(&di_template, "alias@.socket", -EXDEV, NULL);
+ verify_one(&di_template, "alias@inst.socket", -EXDEV, NULL);
+ verify_one(&di_template, "alias@inst.service", 0, NULL);
+ verify_one(&di_template, "alias@.service", 0, NULL);
+ verify_one(&di_template, "alias@di.service", 0, NULL);
+ verify_one(&di_template, "foo.target.wants/plain.service", -EXDEV, NULL);
+ verify_one(&di_template, "foo.target.wants/plain.socket", -EXDEV, NULL);
+ verify_one(&di_template, "foo.target.wants/plain@.service", -EXDEV, NULL);
+ verify_one(&di_template, "foo.target.wants/plain@di.service", -EXDEV, NULL);
+ verify_one(&di_template, "foo.target.wants/template2@di.service", 0, NULL);
+ verify_one(&di_template, "foo.target.wants/service", -EXDEV, NULL);
+ verify_one(&di_template, "foo.target.requires/plain.service", -EXDEV, NULL);
+ verify_one(&di_template, "foo.target.requires/plain.socket", -EXDEV, NULL);
+ verify_one(&di_template, "foo.target.requires/plain@.service", -EXDEV, NULL);
+ verify_one(&di_template, "foo.target.requires/plain@di.service", -EXDEV, NULL);
+ verify_one(&di_template, "foo.target.requires/plain@foo.service", -EXDEV, NULL);
+ verify_one(&di_template, "foo.target.requires/template2@.service", -EXDEV, NULL); /* instance missing */
+ verify_one(&di_template, "foo.target.requires/template2@di.service", 0, NULL);
+ verify_one(&di_template, "foo.target.requires/service", -EXDEV, NULL);
+ verify_one(&di_template, "foo.target.conf/plain.service", -EXDEV, NULL);
+
+ verify_one(&inst_template, "alias.service", -EXDEV, NULL);
+ verify_one(&inst_template, "alias.socket", -EXDEV, NULL);
+ verify_one(&inst_template, "alias@.socket", -EXDEV, NULL);
+ verify_one(&inst_template, "alias@inst.socket", -EXDEV, NULL);
+ verify_one(&inst_template, "alias@inst.service", 0, NULL);
+ verify_one(&inst_template, "alias@.service", 0, "alias@inst.service");
+ verify_one(&inst_template, "alias@di.service", -EXDEV, NULL);
+ verify_one(&inst_template, "bar.target.wants/plain.service", -EXDEV, NULL);
+ verify_one(&inst_template, "bar.target.wants/plain.socket", -EXDEV, NULL);
+ verify_one(&inst_template, "bar.target.wants/plain@.service", -EXDEV, NULL);
+ verify_one(&inst_template, "bar.target.wants/plain@di.service", -EXDEV, NULL);
+ verify_one(&inst_template, "bar.target.wants/plain@inst.service", -EXDEV, NULL);
+ verify_one(&inst_template, "bar.target.wants/template3@foo.service", -EXDEV, NULL);
+ verify_one(&inst_template, "bar.target.wants/template3@inst.service", 0, NULL);
+ verify_one(&inst_template, "bar.target.wants/service", -EXDEV, NULL);
+ verify_one(&inst_template, "bar.target.requires/plain.service", -EXDEV, NULL);
+ verify_one(&inst_template, "bar.target.requires/plain.socket", -EXDEV, NULL);
+ verify_one(&inst_template, "bar.target.requires/plain@.service", -EXDEV, NULL);
+ verify_one(&inst_template, "bar.target.requires/plain@di.service", -EXDEV, NULL);
+ verify_one(&inst_template, "bar.target.requires/plain@inst.service", -EXDEV, NULL);
+ verify_one(&inst_template, "bar.target.requires/template3@foo.service", -EXDEV, NULL);
+ verify_one(&inst_template, "bar.target.requires/template3@inst.service", 0, NULL);
+ verify_one(&inst_template, "bar.target.requires/service", -EXDEV, NULL);
+ verify_one(&inst_template, "bar.target.conf/plain.service", -EXDEV, NULL);
+ verify_one(&inst_template, "BAR@.target.requires/plain@.service", -EXDEV, NULL); /* template name mismatch */
+ verify_one(&inst_template, "BAR@inst.target.requires/plain@.service", -EXDEV, NULL);
+ verify_one(&inst_template, "BAR@inst.target.requires/plain@inst.service", -EXDEV, NULL);
+ verify_one(&inst_template, "BAR@.target.requires/template3@.service", -EXDEV, NULL); /* instance missing */
+ verify_one(&inst_template, "BAR@inst.target.requires/template3@.service", -EXDEV, NULL); /* instance missing */
+ verify_one(&inst_template, "BAR@inst.target.requires/template3@inst.service", 0, NULL); /* instance provided */
+ verify_one(&inst_template, "BAR@inst.target.requires/template3@ins2.service", -EXDEV, NULL); /* instance mismatch */
+
+ /* explicit alias overrides DefaultInstance */
+ verify_one(&di_inst_template, "alias.service", -EXDEV, NULL);
+ verify_one(&di_inst_template, "alias.socket", -EXDEV, NULL);
+ verify_one(&di_inst_template, "alias@.socket", -EXDEV, NULL);
+ verify_one(&di_inst_template, "alias@inst.socket", -EXDEV, NULL);
+ verify_one(&di_inst_template, "alias@inst.service", 0, NULL);
+ verify_one(&di_inst_template, "alias@.service", 0, "alias@inst.service");
+ verify_one(&di_inst_template, "alias@di.service", -EXDEV, NULL);
+ verify_one(&di_inst_template, "goo.target.wants/plain.service", -EXDEV, NULL);
+ verify_one(&di_inst_template, "goo.target.wants/plain.socket", -EXDEV, NULL);
+ verify_one(&di_inst_template, "goo.target.wants/plain@.service", -EXDEV, NULL);
+ verify_one(&di_inst_template, "goo.target.wants/plain@di.service", -EXDEV, NULL);
+ verify_one(&di_inst_template, "goo.target.wants/template4@foo.service", -EXDEV, NULL);
+ verify_one(&di_inst_template, "goo.target.wants/template4@inst.service", 0, NULL);
+ verify_one(&di_inst_template, "goo.target.wants/template4@di.service", -EXDEV, NULL);
+ verify_one(&di_inst_template, "goo.target.wants/service", -EXDEV, NULL);
+ verify_one(&di_inst_template, "goo.target.requires/plain.service", -EXDEV, NULL);
+ verify_one(&di_inst_template, "goo.target.requires/plain.socket", -EXDEV, NULL);
+ verify_one(&di_inst_template, "goo.target.requires/plain@.service", -EXDEV, NULL);
+ verify_one(&di_inst_template, "goo.target.requires/plain@di.service", -EXDEV, NULL);
+ verify_one(&di_inst_template, "goo.target.requires/plain@inst.service", -EXDEV, NULL);
+ verify_one(&di_inst_template, "goo.target.requires/template4@foo.service", -EXDEV, NULL);
+ verify_one(&di_inst_template, "goo.target.requires/template4@inst.service", 0, NULL);
+ verify_one(&di_inst_template, "goo.target.requires/service", -EXDEV, NULL);
+ verify_one(&di_inst_template, "goo.target.conf/plain.service", -EXDEV, NULL);
+}
+
+int main(int argc, char *argv[]) {
+ char root[] = "/tmp/rootXXXXXX";
+ const char *p;
+
+ assert_se(mkdtemp(root));
+
+ p = strjoina(root, "/usr/lib/systemd/system/");
+ assert_se(mkdir_p(p, 0755) >= 0);
+
+ p = strjoina(root, SYSTEM_CONFIG_UNIT_DIR"/");
+ assert_se(mkdir_p(p, 0755) >= 0);
+
+ p = strjoina(root, "/run/systemd/system/");
+ assert_se(mkdir_p(p, 0755) >= 0);
+
+ p = strjoina(root, "/opt/");
+ assert_se(mkdir_p(p, 0755) >= 0);
+
+ p = strjoina(root, "/usr/lib/systemd/system-preset/");
+ assert_se(mkdir_p(p, 0755) >= 0);
+
+ test_basic_mask_and_enable(root);
+ test_linked_units(root);
+ test_default(root);
+ test_add_dependency(root);
+ test_template_enable(root);
+ test_indirect(root);
+ test_preset_and_list(root);
+ test_preset_order(root);
+ test_preset_multiple_instances(root);
+ test_revert(root);
+ test_static_instance(root);
+ test_with_dropin(root);
+ test_with_dropin_template(root);
+
+ assert_se(rm_rf(root, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+
+ test_verify_alias();
+
+ return 0;
+}
diff --git a/src/test/test-install.c b/src/test/test-install.c
new file mode 100644
index 0000000..7cd91ef
--- /dev/null
+++ b/src/test/test-install.c
@@ -0,0 +1,272 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "install.h"
+#include "tests.h"
+
+static void dump_changes(UnitFileChange *c, unsigned n) {
+ unsigned i;
+
+ assert_se(n == 0 || c);
+
+ for (i = 0; i < n; i++) {
+ if (c[i].type == UNIT_FILE_UNLINK)
+ printf("rm '%s'\n", c[i].path);
+ else if (c[i].type == UNIT_FILE_SYMLINK)
+ printf("ln -s '%s' '%s'\n", c[i].source, c[i].path);
+ }
+}
+
+int main(int argc, char* argv[]) {
+ Hashmap *h;
+ UnitFileList *p;
+ int r;
+ const char *const files[] = { "avahi-daemon.service", NULL };
+ const char *const files2[] = { "/home/lennart/test.service", NULL };
+ UnitFileChange *changes = NULL;
+ size_t n_changes = 0;
+ UnitFileState state = 0;
+
+ test_setup_logging(LOG_DEBUG);
+
+ h = hashmap_new(&string_hash_ops);
+ r = unit_file_get_list(UNIT_FILE_SYSTEM, NULL, h, NULL, NULL);
+ assert_se(r == 0);
+
+ HASHMAP_FOREACH(p, h) {
+ UnitFileState s = _UNIT_FILE_STATE_INVALID;
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, basename(p->path), &s);
+
+ assert_se((r < 0 && p->state == UNIT_FILE_BAD) ||
+ (p->state == s));
+
+ fprintf(stderr, "%s (%s)\n",
+ p->path,
+ unit_file_state_to_string(p->state));
+ }
+
+ unit_file_list_free(h);
+
+ log_info("/*** enable **/");
+
+ r = unit_file_enable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ log_info("/*** enable2 **/");
+
+ r = unit_file_enable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, files[0], &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_ENABLED);
+
+ log_info("/*** disable ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_disable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, files[0], &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_DISABLED);
+
+ log_info("/*** mask ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_mask(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+ log_info("/*** mask2 ***/");
+ r = unit_file_mask(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, files[0], &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_MASKED);
+
+ log_info("/*** unmask ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_unmask(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+ log_info("/*** unmask2 ***/");
+ r = unit_file_unmask(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, files[0], &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_DISABLED);
+
+ log_info("/*** mask ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_mask(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, files[0], &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_MASKED);
+
+ log_info("/*** disable ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_disable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+ log_info("/*** disable2 ***/");
+ r = unit_file_disable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, files[0], &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_MASKED);
+
+ log_info("/*** umask ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_unmask(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, files[0], &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_DISABLED);
+
+ log_info("/*** enable files2 ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_enable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files2, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, basename(files2[0]), &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_ENABLED);
+
+ log_info("/*** disable files2 ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_disable(UNIT_FILE_SYSTEM, 0, NULL, STRV_MAKE(basename(files2[0])), &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, basename(files2[0]), &state);
+ assert_se(r < 0);
+
+ log_info("/*** link files2 ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_link(UNIT_FILE_SYSTEM, 0, NULL, (char**) files2, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, basename(files2[0]), &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_LINKED);
+
+ log_info("/*** disable files2 ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_disable(UNIT_FILE_SYSTEM, 0, NULL, STRV_MAKE(basename(files2[0])), &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, basename(files2[0]), &state);
+ assert_se(r < 0);
+
+ log_info("/*** link files2 ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_link(UNIT_FILE_SYSTEM, 0, NULL, (char**) files2, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, basename(files2[0]), &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_LINKED);
+
+ log_info("/*** reenable files2 ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_reenable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files2, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, basename(files2[0]), &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_ENABLED);
+
+ log_info("/*** disable files2 ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_disable(UNIT_FILE_SYSTEM, 0, NULL, STRV_MAKE(basename(files2[0])), &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, basename(files2[0]), &state);
+ assert_se(r < 0);
+ log_info("/*** preset files ***/");
+ changes = NULL;
+ n_changes = 0;
+
+ r = unit_file_preset(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, UNIT_FILE_PRESET_FULL, &changes, &n_changes);
+ assert_se(r >= 0);
+
+ dump_changes(changes, n_changes);
+ unit_file_changes_free(changes, n_changes);
+
+ r = unit_file_get_state(UNIT_FILE_SYSTEM, NULL, basename(files[0]), &state);
+ assert_se(r >= 0);
+ assert_se(state == UNIT_FILE_ENABLED);
+
+ return 0;
+}
diff --git a/src/test/test-io-util.c b/src/test/test-io-util.c
new file mode 100644
index 0000000..104c022
--- /dev/null
+++ b/src/test/test-io-util.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "macro.h"
+
+static void test_sparse_write_one(int fd, const char *buffer, size_t n) {
+ char check[n];
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(ftruncate(fd, 0) >= 0);
+ assert_se(sparse_write(fd, buffer, n, 4) == (ssize_t) n);
+
+ assert_se(lseek(fd, 0, SEEK_CUR) == (off_t) n);
+ assert_se(ftruncate(fd, n) >= 0);
+
+ assert_se(lseek(fd, 0, SEEK_SET) == 0);
+ assert_se(read(fd, check, n) == (ssize_t) n);
+
+ assert_se(memcmp(buffer, check, n) == 0);
+}
+
+static void test_sparse_write(void) {
+ const char test_a[] = "test";
+ const char test_b[] = "\0\0\0\0test\0\0\0\0";
+ const char test_c[] = "\0\0test\0\0\0\0";
+ const char test_d[] = "\0\0test\0\0\0test\0\0\0\0test\0\0\0\0\0test\0\0\0test\0\0\0\0test\0\0\0\0\0\0\0\0";
+ const char test_e[] = "test\0\0\0\0test";
+ _cleanup_close_ int fd = -1;
+ char fn[] = "/tmp/sparseXXXXXX";
+
+ fd = mkostemp(fn, O_CLOEXEC);
+ assert_se(fd >= 0);
+ unlink(fn);
+
+ test_sparse_write_one(fd, test_a, sizeof(test_a));
+ test_sparse_write_one(fd, test_b, sizeof(test_b));
+ test_sparse_write_one(fd, test_c, sizeof(test_c));
+ test_sparse_write_one(fd, test_d, sizeof(test_d));
+ test_sparse_write_one(fd, test_e, sizeof(test_e));
+}
+
+int main(void) {
+ test_sparse_write();
+
+ return 0;
+}
diff --git a/src/test/test-ip-protocol-list.c b/src/test/test-ip-protocol-list.c
new file mode 100644
index 0000000..15bbbde
--- /dev/null
+++ b/src/test/test-ip-protocol-list.c
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+
+#include "macro.h"
+#include "ip-protocol-list.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+static void test_int(int i) {
+ char str[DECIMAL_STR_MAX(int)];
+
+ assert_se(ip_protocol_from_name(ip_protocol_to_name(i)) == i);
+
+ xsprintf(str, "%i", i);
+ assert_se(ip_protocol_from_name(ip_protocol_to_name(parse_ip_protocol(str))) == i);
+}
+
+static void test_int_fail(int i) {
+ char str[DECIMAL_STR_MAX(int)];
+
+ assert_se(!ip_protocol_to_name(i));
+
+ xsprintf(str, "%i", i);
+ assert_se(parse_ip_protocol(str) == -EINVAL);
+}
+
+static void test_str(const char *s) {
+ assert_se(streq(ip_protocol_to_name(ip_protocol_from_name(s)), s));
+ assert_se(streq(ip_protocol_to_name(parse_ip_protocol(s)), s));
+}
+
+static void test_str_fail(const char *s) {
+ assert_se(ip_protocol_from_name(s) == -EINVAL);
+ assert_se(parse_ip_protocol(s) == -EINVAL);
+}
+
+static void test_parse_ip_protocol(const char *s, int expected) {
+ assert_se(parse_ip_protocol(s) == expected);
+}
+
+int main(int argc, const char *argv[]) {
+ test_int(IPPROTO_TCP);
+ test_int(IPPROTO_DCCP);
+ test_int_fail(-1);
+ test_int_fail(1024 * 1024);
+
+ test_str("sctp");
+ test_str("udp");
+ test_str_fail("hoge");
+ test_str_fail("-1");
+ test_str_fail("1000000000");
+
+ test_parse_ip_protocol("sctp", IPPROTO_SCTP);
+ test_parse_ip_protocol("ScTp", IPPROTO_SCTP);
+ test_parse_ip_protocol("ip", IPPROTO_IP);
+ test_parse_ip_protocol("", IPPROTO_IP);
+ test_parse_ip_protocol("1", 1);
+ test_parse_ip_protocol("0", 0);
+ test_parse_ip_protocol("-10", -EINVAL);
+ test_parse_ip_protocol("100000000", -EINVAL);
+
+ return 0;
+}
diff --git a/src/test/test-ipcrm.c b/src/test/test-ipcrm.c
new file mode 100644
index 0000000..238f0bf
--- /dev/null
+++ b/src/test/test-ipcrm.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "clean-ipc.h"
+#include "errno-util.h"
+#include "main-func.h"
+#include "tests.h"
+#include "user-util.h"
+
+static int run(int argc, char *argv[]) {
+ uid_t uid;
+ int r;
+ const char* name = argv[1] ?: NOBODY_USER_NAME;
+
+ test_setup_logging(LOG_INFO);
+
+ r = get_user_creds(&name, &uid, NULL, NULL, NULL, 0);
+ if (r == -ESRCH)
+ return log_tests_skipped("Failed to resolve user");
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve \"%s\": %m", name);
+
+ r = clean_ipc_by_uid(uid);
+ if (ERRNO_IS_PRIVILEGE(r))
+ return log_tests_skipped("No privileges");
+
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/test/test-job-type.c b/src/test/test-job-type.c
new file mode 100644
index 0000000..024d976
--- /dev/null
+++ b/src/test/test-job-type.c
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "service.h"
+#include "unit.h"
+
+int main(int argc, char *argv[]) {
+ JobType a, b, c, ab, bc, ab_c, bc_a, a_bc;
+ const ServiceState test_states[] = { SERVICE_DEAD, SERVICE_RUNNING };
+ unsigned i;
+ bool merged_ab;
+
+ /* fake a unit */
+ static Service s = {
+ .meta.load_state = UNIT_LOADED,
+ .type = SERVICE_SIMPLE,
+ };
+ Unit *u = UNIT(&s);
+
+ for (i = 0; i < ELEMENTSOF(test_states); i++) {
+ s.state = test_states[i];
+ printf("\nWith collapsing for service state %s\n"
+ "=========================================\n", service_state_to_string(s.state));
+ for (a = 0; a < _JOB_TYPE_MAX_MERGING; a++) {
+ for (b = 0; b < _JOB_TYPE_MAX_MERGING; b++) {
+
+ ab = a;
+ merged_ab = (job_type_merge_and_collapse(&ab, b, u) >= 0);
+
+ if (!job_type_is_mergeable(a, b)) {
+ assert_se(!merged_ab);
+ printf("Not mergeable: %s + %s\n", job_type_to_string(a), job_type_to_string(b));
+ continue;
+ }
+
+ assert_se(merged_ab);
+ printf("%s + %s = %s\n", job_type_to_string(a), job_type_to_string(b), job_type_to_string(ab));
+
+ for (c = 0; c < _JOB_TYPE_MAX_MERGING; c++) {
+
+ /* Verify transitivity of mergeability of job types */
+ assert_se(!job_type_is_mergeable(a, b) ||
+ !job_type_is_mergeable(b, c) ||
+ job_type_is_mergeable(a, c));
+
+ /* Verify that merged entries can be merged with the same entries
+ * they can be merged with separately */
+ assert_se(!job_type_is_mergeable(a, c) || job_type_is_mergeable(ab, c));
+ assert_se(!job_type_is_mergeable(b, c) || job_type_is_mergeable(ab, c));
+
+ /* Verify that if a merged with b is not mergeable with c, then
+ * either a or b is not mergeable with c either. */
+ assert_se(job_type_is_mergeable(ab, c) || !job_type_is_mergeable(a, c) || !job_type_is_mergeable(b, c));
+
+ bc = b;
+ if (job_type_merge_and_collapse(&bc, c, u) >= 0) {
+
+ /* Verify associativity */
+
+ ab_c = ab;
+ assert_se(job_type_merge_and_collapse(&ab_c, c, u) == 0);
+
+ bc_a = bc;
+ assert_se(job_type_merge_and_collapse(&bc_a, a, u) == 0);
+
+ a_bc = a;
+ assert_se(job_type_merge_and_collapse(&a_bc, bc, u) == 0);
+
+ assert_se(ab_c == bc_a);
+ assert_se(ab_c == a_bc);
+
+ printf("%s + %s + %s = %s\n", job_type_to_string(a), job_type_to_string(b), job_type_to_string(c), job_type_to_string(ab_c));
+ }
+ }
+ }
+ }
+ }
+
+ return 0;
+}
diff --git a/src/test/test-journal-importer.c b/src/test/test-journal-importer.c
new file mode 100644
index 0000000..da266d9
--- /dev/null
+++ b/src/test/test-journal-importer.c
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "alloc-util.h"
+#include "log.h"
+#include "journal-importer.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "tests.h"
+
+static void assert_iovec_entry(const struct iovec *iovec, const char* content) {
+ assert_se(strlen(content) == iovec->iov_len);
+ assert_se(memcmp(content, iovec->iov_base, iovec->iov_len) == 0);
+}
+
+#define COREDUMP_PROC_GROUP \
+ "COREDUMP_PROC_CGROUP=1:name=systemd:/\n" \
+ "0::/user.slice/user-1002.slice/user@1002.service/gnome-terminal-server.service\n"
+
+static void test_basic_parsing(void) {
+ _cleanup_(journal_importer_cleanup) JournalImporter imp = JOURNAL_IMPORTER_INIT(-1);
+ _cleanup_free_ char *journal_data_path = NULL;
+ int r;
+
+ assert_se(get_testdata_dir("journal-data/journal-1.txt", &journal_data_path) >= 0);
+ imp.fd = open(journal_data_path, O_RDONLY|O_CLOEXEC);
+ assert_se(imp.fd >= 0);
+
+ do
+ r = journal_importer_process_data(&imp);
+ while (r == 0 && !journal_importer_eof(&imp));
+ assert_se(r == 1);
+
+ /* We read one entry, so we should get EOF on next read, but not yet */
+ assert_se(!journal_importer_eof(&imp));
+
+ assert_se(imp.iovw.count == 6);
+ assert_iovec_entry(&imp.iovw.iovec[0], "_BOOT_ID=1531fd22ec84429e85ae888b12fadb91");
+ assert_iovec_entry(&imp.iovw.iovec[1], "_TRANSPORT=journal");
+ assert_iovec_entry(&imp.iovw.iovec[2], COREDUMP_PROC_GROUP);
+ assert_iovec_entry(&imp.iovw.iovec[3], "COREDUMP_RLIMIT=-1");
+ assert_iovec_entry(&imp.iovw.iovec[4], COREDUMP_PROC_GROUP);
+ assert_iovec_entry(&imp.iovw.iovec[5], "_SOURCE_REALTIME_TIMESTAMP=1478389147837945");
+
+ /* Let's check if we get EOF now */
+ r = journal_importer_process_data(&imp);
+ assert_se(r == 0);
+ assert_se(journal_importer_eof(&imp));
+}
+
+static void test_bad_input(void) {
+ _cleanup_(journal_importer_cleanup) JournalImporter imp = JOURNAL_IMPORTER_INIT(-1);
+ _cleanup_free_ char *journal_data_path = NULL;
+ int r;
+
+ assert_se(get_testdata_dir("journal-data/journal-1.txt", &journal_data_path) >= 0);
+ imp.fd = open(journal_data_path, O_RDONLY|O_CLOEXEC);
+ assert_se(imp.fd >= 0);
+
+ do
+ r = journal_importer_process_data(&imp);
+ while (!journal_importer_eof(&imp));
+ assert_se(r == 0); /* If we don't have enough input, 0 is returned */
+
+ assert_se(journal_importer_eof(&imp));
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_basic_parsing();
+ test_bad_input();
+
+ return 0;
+}
diff --git a/src/test/test-json.c b/src/test/test-json.c
new file mode 100644
index 0000000..1d4b119
--- /dev/null
+++ b/src/test/test-json.c
@@ -0,0 +1,578 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <math.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "json-internal.h"
+#include "json.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "util.h"
+
+static void test_tokenizer(const char *data, ...) {
+ unsigned line = 0, column = 0;
+ void *state = NULL;
+ va_list ap;
+
+ _cleanup_free_ char *cdata;
+ assert_se(cdata = cescape(data));
+ log_info("/* %s data=\"%s\" */", __func__, cdata);
+
+ va_start(ap, data);
+
+ for (;;) {
+ unsigned token_line, token_column;
+ _cleanup_free_ char *str = NULL;
+ JsonValue v = JSON_VALUE_NULL;
+ int t, tt;
+
+ t = json_tokenize(&data, &str, &v, &token_line, &token_column, &state, &line, &column);
+ tt = va_arg(ap, int);
+
+ assert_se(t == tt);
+
+ if (t == JSON_TOKEN_END || t < 0)
+ break;
+
+ else if (t == JSON_TOKEN_STRING) {
+ const char *nn;
+
+ nn = va_arg(ap, const char *);
+ assert_se(streq_ptr(nn, str));
+
+ } else if (t == JSON_TOKEN_REAL) {
+ long double d;
+
+ d = va_arg(ap, long double);
+
+ /* Valgrind doesn't support long double calculations and automatically downgrades to 80bit:
+ * http://www.valgrind.org/docs/manual/manual-core.html#manual-core.limits.
+ * Some architectures might not support long double either.
+ */
+
+ assert_se(fabsl(d - v.real) < 1e-10 ||
+ fabsl((d - v.real) / v.real) < 1e-10);
+
+ } else if (t == JSON_TOKEN_INTEGER) {
+ intmax_t i;
+
+ i = va_arg(ap, intmax_t);
+ assert_se(i == v.integer);
+
+ } else if (t == JSON_TOKEN_UNSIGNED) {
+ uintmax_t u;
+
+ u = va_arg(ap, uintmax_t);
+ assert_se(u == v.unsig);
+
+ } else if (t == JSON_TOKEN_BOOLEAN) {
+ bool b;
+
+ b = va_arg(ap, int);
+ assert_se(b == v.boolean);
+ }
+ }
+
+ va_end(ap);
+}
+
+typedef void (*Test)(JsonVariant *);
+
+static void test_variant(const char *data, Test test) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL, *w = NULL;
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ _cleanup_free_ char *cdata;
+ assert_se(cdata = cescape(data));
+ log_info("/* %s data=\"%s\" */", __func__, cdata);
+
+ r = json_parse(data, 0, &v, NULL, NULL);
+ assert_se(r == 0);
+ assert_se(v);
+
+ r = json_variant_format(v, 0, &s);
+ assert_se(r >= 0);
+ assert_se(s);
+ assert_se((size_t) r == strlen(s));
+
+ log_info("formatted normally: %s\n", s);
+
+ r = json_parse(data, JSON_PARSE_SENSITIVE, &w, NULL, NULL);
+ assert_se(r == 0);
+ assert_se(w);
+ assert_se(json_variant_has_type(v, json_variant_type(w)));
+ assert_se(json_variant_has_type(w, json_variant_type(v)));
+ assert_se(json_variant_equal(v, w));
+
+ s = mfree(s);
+ w = json_variant_unref(w);
+
+ r = json_variant_format(v, JSON_FORMAT_PRETTY, &s);
+ assert_se(r >= 0);
+ assert_se(s);
+ assert_se((size_t) r == strlen(s));
+
+ log_info("formatted prettily:\n%s", s);
+
+ r = json_parse(data, 0, &w, NULL, NULL);
+ assert_se(r == 0);
+ assert_se(w);
+
+ assert_se(json_variant_has_type(v, json_variant_type(w)));
+ assert_se(json_variant_has_type(w, json_variant_type(v)));
+ assert_se(json_variant_equal(v, w));
+
+ s = mfree(s);
+ r = json_variant_format(v, JSON_FORMAT_COLOR, &s);
+ assert_se(r >= 0);
+ assert_se(s);
+ assert_se((size_t) r == strlen(s));
+ printf("Normal with color: %s\n", s);
+
+ s = mfree(s);
+ r = json_variant_format(v, JSON_FORMAT_COLOR|JSON_FORMAT_PRETTY, &s);
+ assert_se(r >= 0);
+ assert_se(s);
+ assert_se((size_t) r == strlen(s));
+ printf("Pretty with color:\n%s\n", s);
+
+ if (test)
+ test(v);
+}
+
+static void test_1(JsonVariant *v) {
+ JsonVariant *p, *q;
+ unsigned i;
+
+ log_info("/* %s */", __func__);
+
+ /* 3 keys + 3 values */
+ assert_se(json_variant_elements(v) == 6);
+
+ /* has k */
+ p = json_variant_by_key(v, "k");
+ assert_se(p && json_variant_type(p) == JSON_VARIANT_STRING);
+
+ /* k equals v */
+ assert_se(streq(json_variant_string(p), "v"));
+
+ /* has foo */
+ p = json_variant_by_key(v, "foo");
+ assert_se(p && json_variant_type(p) == JSON_VARIANT_ARRAY && json_variant_elements(p) == 3);
+
+ /* check foo[0] = 1, foo[1] = 2, foo[2] = 3 */
+ for (i = 0; i < 3; ++i) {
+ q = json_variant_by_index(p, i);
+ assert_se(q && json_variant_type(q) == JSON_VARIANT_UNSIGNED && json_variant_unsigned(q) == (i+1));
+ assert_se(q && json_variant_has_type(q, JSON_VARIANT_INTEGER) && json_variant_integer(q) == (i+1));
+ }
+
+ /* has bar */
+ p = json_variant_by_key(v, "bar");
+ assert_se(p && json_variant_type(p) == JSON_VARIANT_OBJECT && json_variant_elements(p) == 2);
+
+ /* zap is null */
+ q = json_variant_by_key(p, "zap");
+ assert_se(q && json_variant_type(q) == JSON_VARIANT_NULL);
+}
+
+static void test_2(JsonVariant *v) {
+ JsonVariant *p, *q;
+
+ log_info("/* %s */", __func__);
+
+ /* 2 keys + 2 values */
+ assert_se(json_variant_elements(v) == 4);
+
+ /* has mutant */
+ p = json_variant_by_key(v, "mutant");
+ assert_se(p && json_variant_type(p) == JSON_VARIANT_ARRAY && json_variant_elements(p) == 4);
+
+ /* mutant[0] == 1 */
+ q = json_variant_by_index(p, 0);
+ assert_se(q && json_variant_type(q) == JSON_VARIANT_UNSIGNED && json_variant_unsigned(q) == 1);
+ assert_se(q && json_variant_has_type(q, JSON_VARIANT_INTEGER) && json_variant_integer(q) == 1);
+
+ /* mutant[1] == null */
+ q = json_variant_by_index(p, 1);
+ assert_se(q && json_variant_type(q) == JSON_VARIANT_NULL);
+
+ /* mutant[2] == "1" */
+ q = json_variant_by_index(p, 2);
+ assert_se(q && json_variant_type(q) == JSON_VARIANT_STRING && streq(json_variant_string(q), "1"));
+
+ /* mutant[3] == JSON_VARIANT_OBJECT */
+ q = json_variant_by_index(p, 3);
+ assert_se(q && json_variant_type(q) == JSON_VARIANT_OBJECT && json_variant_elements(q) == 2);
+
+ /* has 1 */
+ p = json_variant_by_key(q, "1");
+ assert_se(p && json_variant_type(p) == JSON_VARIANT_ARRAY && json_variant_elements(p) == 2);
+
+ /* "1"[0] == 1 */
+ q = json_variant_by_index(p, 0);
+ assert_se(q && json_variant_type(q) == JSON_VARIANT_UNSIGNED && json_variant_unsigned(q) == 1);
+ assert_se(q && json_variant_has_type(q, JSON_VARIANT_INTEGER) && json_variant_integer(q) == 1);
+
+ /* "1"[1] == "1" */
+ q = json_variant_by_index(p, 1);
+ assert_se(q && json_variant_type(q) == JSON_VARIANT_STRING && streq(json_variant_string(q), "1"));
+
+ /* has thisisaverylongproperty */
+ p = json_variant_by_key(v, "thisisaverylongproperty");
+ assert_se(p && json_variant_type(p) == JSON_VARIANT_REAL && fabsl(json_variant_real(p) - 1.27) < 0.001);
+}
+
+static void test_zeroes(JsonVariant *v) {
+ /* Make sure zero is how we expect it. */
+ log_info("/* %s */", __func__);
+
+ assert_se(json_variant_elements(v) == 13);
+
+ for (size_t i = 0; i < json_variant_elements(v); i++) {
+ JsonVariant *w;
+ size_t j;
+
+ assert_se(w = json_variant_by_index(v, i));
+
+ assert_se(json_variant_integer(w) == 0);
+ assert_se(json_variant_unsigned(w) == 0U);
+
+ DISABLE_WARNING_FLOAT_EQUAL;
+ assert_se(json_variant_real(w) == 0.0L);
+ REENABLE_WARNING;
+
+ assert_se(json_variant_is_integer(w));
+ assert_se(json_variant_is_unsigned(w));
+ assert_se(json_variant_is_real(w));
+ assert_se(json_variant_is_number(w));
+
+ assert_se(!json_variant_is_negative(w));
+
+ assert_se(IN_SET(json_variant_type(w), JSON_VARIANT_INTEGER, JSON_VARIANT_UNSIGNED, JSON_VARIANT_REAL));
+
+ for (j = 0; j < json_variant_elements(v); j++) {
+ JsonVariant *q;
+
+ assert_se(q = json_variant_by_index(v, j));
+
+ assert_se(json_variant_equal(w, q));
+ }
+ }
+}
+
+static void test_build(void) {
+ log_info("/* %s */", __func__);
+
+ _cleanup_(json_variant_unrefp) JsonVariant *a = NULL, *b = NULL;
+ _cleanup_free_ char *s = NULL, *t = NULL;
+
+ assert_se(json_build(&a, JSON_BUILD_STRING("hallo")) >= 0);
+ assert_se(json_build(&b, JSON_BUILD_LITERAL(" \"hallo\" ")) >= 0);
+ assert_se(json_variant_equal(a, b));
+
+ b = json_variant_unref(b);
+
+ assert_se(json_build(&b, JSON_BUILD_VARIANT(a)) >= 0);
+ assert_se(json_variant_equal(a, b));
+
+ b = json_variant_unref(b);
+ assert_se(json_build(&b, JSON_BUILD_STRING("pief")) >= 0);
+ assert_se(!json_variant_equal(a, b));
+
+ a = json_variant_unref(a);
+ b = json_variant_unref(b);
+
+ assert_se(json_build(&a, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("one", JSON_BUILD_INTEGER(7)),
+ JSON_BUILD_PAIR("two", JSON_BUILD_REAL(2.0)),
+ JSON_BUILD_PAIR("three", JSON_BUILD_INTEGER(0)))) >= 0);
+
+ assert_se(json_build(&b, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("two", JSON_BUILD_INTEGER(2)),
+ JSON_BUILD_PAIR("three", JSON_BUILD_REAL(0)),
+ JSON_BUILD_PAIR("one", JSON_BUILD_REAL(7)))) >= 0);
+
+ assert_se(json_variant_equal(a, b));
+
+ a = json_variant_unref(a);
+ b = json_variant_unref(b);
+
+ const char* arr_1234[] = {"one", "two", "three", "four", NULL};
+ assert_se(json_build(&a, JSON_BUILD_ARRAY(JSON_BUILD_OBJECT(JSON_BUILD_PAIR("x", JSON_BUILD_BOOLEAN(true)),
+ JSON_BUILD_PAIR("y", JSON_BUILD_OBJECT(JSON_BUILD_PAIR("this", JSON_BUILD_NULL)))),
+ JSON_BUILD_VARIANT(NULL),
+ JSON_BUILD_LITERAL(NULL),
+ JSON_BUILD_STRING(NULL),
+ JSON_BUILD_NULL,
+ JSON_BUILD_INTEGER(77),
+ JSON_BUILD_ARRAY(JSON_BUILD_VARIANT(JSON_VARIANT_STRING_CONST("foobar")),
+ JSON_BUILD_VARIANT(JSON_VARIANT_STRING_CONST("zzz"))),
+ JSON_BUILD_STRV((char**) arr_1234))) >= 0);
+
+ assert_se(json_variant_format(a, 0, &s) >= 0);
+ log_info("GOT: %s\n", s);
+ assert_se(json_parse(s, 0, &b, NULL, NULL) >= 0);
+ assert_se(json_variant_equal(a, b));
+
+ a = json_variant_unref(a);
+ b = json_variant_unref(b);
+
+ assert_se(json_build(&a, JSON_BUILD_REAL(M_PIl)) >= 0);
+
+ s = mfree(s);
+ assert_se(json_variant_format(a, 0, &s) >= 0);
+ log_info("GOT: %s\n", s);
+ assert_se(json_parse(s, 0, &b, NULL, NULL) >= 0);
+ assert_se(json_variant_format(b, 0, &t) >= 0);
+ log_info("GOT: %s\n", t);
+
+ assert_se(streq(s, t));
+
+ a = json_variant_unref(a);
+ b = json_variant_unref(b);
+
+ assert_se(json_build(&a, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("x", JSON_BUILD_STRING("y")),
+ JSON_BUILD_PAIR("z", JSON_BUILD_STRING("a")),
+ JSON_BUILD_PAIR("b", JSON_BUILD_STRING("c"))
+ )) >= 0);
+
+ assert_se(json_build(&b, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("x", JSON_BUILD_STRING("y")),
+ JSON_BUILD_PAIR_CONDITION(false, "p", JSON_BUILD_STRING("q")),
+ JSON_BUILD_PAIR_CONDITION(true, "z", JSON_BUILD_STRING("a")),
+ JSON_BUILD_PAIR_CONDITION(false, "j", JSON_BUILD_ARRAY(JSON_BUILD_STRING("k"), JSON_BUILD_STRING("u"), JSON_BUILD_STRING("i"))),
+ JSON_BUILD_PAIR("b", JSON_BUILD_STRING("c"))
+ )) >= 0);
+
+ assert_se(json_variant_equal(a, b));
+}
+
+static void test_source(void) {
+ static const char data[] =
+ "\n"
+ "\n"
+ "{\n"
+ "\"foo\" : \"bar\", \n"
+ "\"qüüx\" : [ 1, 2, 3,\n"
+ "4,\n"
+ "5 ],\n"
+ "\"miep\" : { \"hallo\" : 1 },\n"
+ "\n"
+ "\"zzzzzz\" \n"
+ ":\n"
+ "[ true, \n"
+ "false, 7.5, {} ]\n"
+ "}\n";
+
+ log_info("/* %s */", __func__);
+
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+
+ printf("--- original begin ---\n"
+ "%s"
+ "--- original end ---\n", data);
+
+ assert_se(f = fmemopen_unlocked((void*) data, strlen(data), "r"));
+
+ assert_se(json_parse_file(f, "waldo", 0, &v, NULL, NULL) >= 0);
+
+ printf("--- non-pretty begin ---\n");
+ json_variant_dump(v, 0, stdout, NULL);
+ printf("\n--- non-pretty end ---\n");
+
+ printf("--- pretty begin ---\n");
+ json_variant_dump(v, JSON_FORMAT_PRETTY|JSON_FORMAT_COLOR|JSON_FORMAT_SOURCE, stdout, NULL);
+ printf("--- pretty end ---\n");
+}
+
+static void test_depth(void) {
+ log_info("/* %s */", __func__);
+
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ int r;
+
+ v = JSON_VARIANT_STRING_CONST("start");
+
+ /* Let's verify that the maximum depth checks work */
+
+ for (unsigned i = 0;; i++) {
+ _cleanup_(json_variant_unrefp) JsonVariant *w = NULL;
+
+ assert_se(i <= UINT16_MAX);
+ if (i & 1)
+ r = json_variant_new_array(&w, &v, 1);
+ else
+ r = json_variant_new_object(&w, (JsonVariant*[]) { JSON_VARIANT_STRING_CONST("key"), v }, 2);
+ if (r == -ELNRNG) {
+ log_info("max depth at %u", i);
+ break;
+ }
+#if HAS_FEATURE_MEMORY_SANITIZER
+ /* msan doesn't like the stack nesting to be too deep. Let's quit early. */
+ if (i >= 128) {
+ log_info("quitting early at depth %u", i);
+ break;
+ }
+#endif
+
+ assert_se(r >= 0);
+
+ json_variant_unref(v);
+ v = TAKE_PTR(w);
+ }
+
+ json_variant_dump(v, 0, stdout, NULL);
+ fputs("\n", stdout);
+}
+
+static void test_normalize(void) {
+ log_info("/* %s */", __func__);
+
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL, *w = NULL;
+ _cleanup_free_ char *t = NULL;
+
+ assert_se(json_build(&v, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("b", JSON_BUILD_STRING("x")),
+ JSON_BUILD_PAIR("c", JSON_BUILD_STRING("y")),
+ JSON_BUILD_PAIR("a", JSON_BUILD_STRING("z")))) >= 0);
+
+ assert_se(!json_variant_is_sorted(v));
+ assert_se(!json_variant_is_normalized(v));
+
+ assert_se(json_variant_format(v, 0, &t) >= 0);
+ assert_se(streq(t, "{\"b\":\"x\",\"c\":\"y\",\"a\":\"z\"}"));
+ t = mfree(t);
+
+ assert_se(json_build(&w, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("bar", JSON_BUILD_STRING("zzz")),
+ JSON_BUILD_PAIR("foo", JSON_BUILD_VARIANT(v)))) >= 0);
+
+ assert_se(json_variant_is_sorted(w));
+ assert_se(!json_variant_is_normalized(w));
+
+ assert_se(json_variant_format(w, 0, &t) >= 0);
+ assert_se(streq(t, "{\"bar\":\"zzz\",\"foo\":{\"b\":\"x\",\"c\":\"y\",\"a\":\"z\"}}"));
+ t = mfree(t);
+
+ assert_se(json_variant_sort(&v) >= 0);
+ assert_se(json_variant_is_sorted(v));
+ assert_se(json_variant_is_normalized(v));
+
+ assert_se(json_variant_format(v, 0, &t) >= 0);
+ assert_se(streq(t, "{\"a\":\"z\",\"b\":\"x\",\"c\":\"y\"}"));
+ t = mfree(t);
+
+ assert_se(json_variant_normalize(&w) >= 0);
+ assert_se(json_variant_is_sorted(w));
+ assert_se(json_variant_is_normalized(w));
+
+ assert_se(json_variant_format(w, 0, &t) >= 0);
+ assert_se(streq(t, "{\"bar\":\"zzz\",\"foo\":{\"a\":\"z\",\"b\":\"x\",\"c\":\"y\"}}"));
+ t = mfree(t);
+}
+
+static void test_bisect(void) {
+ log_info("/* %s */", __func__);
+
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+
+ /* Tests the bisection logic in json_variant_by_key() */
+
+ for (char c = 'z'; c >= 'a'; c--) {
+
+ if ((c % 3) == 0)
+ continue;
+
+ _cleanup_(json_variant_unrefp) JsonVariant *w = NULL;
+ assert_se(json_variant_new_stringn(&w, (char[4]) { '<', c, c, '>' }, 4) >= 0);
+ assert_se(json_variant_set_field(&v, (char[2]) { c, 0 }, w) >= 0);
+ }
+
+ json_variant_dump(v, JSON_FORMAT_COLOR|JSON_FORMAT_PRETTY, NULL, NULL);
+
+ assert_se(!json_variant_is_sorted(v));
+ assert_se(!json_variant_is_normalized(v));
+ assert_se(json_variant_normalize(&v) >= 0);
+ assert_se(json_variant_is_sorted(v));
+ assert_se(json_variant_is_normalized(v));
+
+ json_variant_dump(v, JSON_FORMAT_COLOR|JSON_FORMAT_PRETTY, NULL, NULL);
+
+ for (char c = 'a'; c <= 'z'; c++) {
+ JsonVariant *k;
+ const char *z;
+
+ k = json_variant_by_key(v, (char[2]) { c, 0 });
+ assert_se(!k == ((c % 3) == 0));
+
+ if (!k)
+ continue;
+
+ assert_se(json_variant_is_string(k));
+
+ z = (char[5]){ '<', c, c, '>', 0};
+ assert_se(streq(json_variant_string(k), z));
+ }
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_tokenizer("x", -EINVAL);
+ test_tokenizer("", JSON_TOKEN_END);
+ test_tokenizer(" ", JSON_TOKEN_END);
+ test_tokenizer("0", JSON_TOKEN_UNSIGNED, (uintmax_t) 0, JSON_TOKEN_END);
+ test_tokenizer("-0", JSON_TOKEN_INTEGER, (intmax_t) 0, JSON_TOKEN_END);
+ test_tokenizer("1234", JSON_TOKEN_UNSIGNED, (uintmax_t) 1234, JSON_TOKEN_END);
+ test_tokenizer("-1234", JSON_TOKEN_INTEGER, (intmax_t) -1234, JSON_TOKEN_END);
+ test_tokenizer("18446744073709551615", JSON_TOKEN_UNSIGNED, (uintmax_t) UINT64_MAX, JSON_TOKEN_END);
+ test_tokenizer("-9223372036854775808", JSON_TOKEN_INTEGER, (intmax_t) INT64_MIN, JSON_TOKEN_END);
+ test_tokenizer("18446744073709551616", JSON_TOKEN_REAL, (long double) 18446744073709551616.0L, JSON_TOKEN_END);
+ test_tokenizer("-9223372036854775809", JSON_TOKEN_REAL, (long double) -9223372036854775809.0L, JSON_TOKEN_END);
+ test_tokenizer("-1234", JSON_TOKEN_INTEGER, (intmax_t) -1234, JSON_TOKEN_END);
+ test_tokenizer("3.141", JSON_TOKEN_REAL, (long double) 3.141, JSON_TOKEN_END);
+ test_tokenizer("0.0", JSON_TOKEN_REAL, (long double) 0.0, JSON_TOKEN_END);
+ test_tokenizer("7e3", JSON_TOKEN_REAL, (long double) 7e3, JSON_TOKEN_END);
+ test_tokenizer("-7e-3", JSON_TOKEN_REAL, (long double) -7e-3, JSON_TOKEN_END);
+ test_tokenizer("true", JSON_TOKEN_BOOLEAN, true, JSON_TOKEN_END);
+ test_tokenizer("false", JSON_TOKEN_BOOLEAN, false, JSON_TOKEN_END);
+ test_tokenizer("null", JSON_TOKEN_NULL, JSON_TOKEN_END);
+ test_tokenizer("{}", JSON_TOKEN_OBJECT_OPEN, JSON_TOKEN_OBJECT_CLOSE, JSON_TOKEN_END);
+ test_tokenizer("\t {\n} \n", JSON_TOKEN_OBJECT_OPEN, JSON_TOKEN_OBJECT_CLOSE, JSON_TOKEN_END);
+ test_tokenizer("[]", JSON_TOKEN_ARRAY_OPEN, JSON_TOKEN_ARRAY_CLOSE, JSON_TOKEN_END);
+ test_tokenizer("\t [] \n\n", JSON_TOKEN_ARRAY_OPEN, JSON_TOKEN_ARRAY_CLOSE, JSON_TOKEN_END);
+ test_tokenizer("\"\"", JSON_TOKEN_STRING, "", JSON_TOKEN_END);
+ test_tokenizer("\"foo\"", JSON_TOKEN_STRING, "foo", JSON_TOKEN_END);
+ test_tokenizer("\"foo\\nfoo\"", JSON_TOKEN_STRING, "foo\nfoo", JSON_TOKEN_END);
+ test_tokenizer("{\"foo\" : \"bar\"}", JSON_TOKEN_OBJECT_OPEN, JSON_TOKEN_STRING, "foo", JSON_TOKEN_COLON, JSON_TOKEN_STRING, "bar", JSON_TOKEN_OBJECT_CLOSE, JSON_TOKEN_END);
+ test_tokenizer("{\"foo\" : [true, false]}", JSON_TOKEN_OBJECT_OPEN, JSON_TOKEN_STRING, "foo", JSON_TOKEN_COLON, JSON_TOKEN_ARRAY_OPEN, JSON_TOKEN_BOOLEAN, true, JSON_TOKEN_COMMA, JSON_TOKEN_BOOLEAN, false, JSON_TOKEN_ARRAY_CLOSE, JSON_TOKEN_OBJECT_CLOSE, JSON_TOKEN_END);
+ test_tokenizer("\"\xef\xbf\xbd\"", JSON_TOKEN_STRING, "\xef\xbf\xbd", JSON_TOKEN_END);
+ test_tokenizer("\"\\ufffd\"", JSON_TOKEN_STRING, "\xef\xbf\xbd", JSON_TOKEN_END);
+ test_tokenizer("\"\\uf\"", -EINVAL);
+ test_tokenizer("\"\\ud800a\"", -EINVAL);
+ test_tokenizer("\"\\udc00\\udc00\"", -EINVAL);
+ test_tokenizer("\"\\ud801\\udc37\"", JSON_TOKEN_STRING, "\xf0\x90\x90\xb7", JSON_TOKEN_END);
+
+ test_tokenizer("[1, 2, -3]", JSON_TOKEN_ARRAY_OPEN, JSON_TOKEN_UNSIGNED, (uintmax_t) 1, JSON_TOKEN_COMMA, JSON_TOKEN_UNSIGNED, (uintmax_t) 2, JSON_TOKEN_COMMA, JSON_TOKEN_INTEGER, (intmax_t) -3, JSON_TOKEN_ARRAY_CLOSE, JSON_TOKEN_END);
+
+ test_variant("{\"k\": \"v\", \"foo\": [1, 2, 3], \"bar\": {\"zap\": null}}", test_1);
+ test_variant("{\"mutant\": [1, null, \"1\", {\"1\": [1, \"1\"]}], \"thisisaverylongproperty\": 1.27}", test_2);
+ test_variant("{\"foo\" : \"\\u0935\\u093f\\u0935\\u0947\\u0915\\u0916\\u094d\\u092f\\u093e\\u0924\\u093f\\u0930\\u0935\\u093f\\u092a\\u094d\\u0932\\u0935\\u093e\\u0020\\u0939\\u093e\\u0928\\u094b\\u092a\\u093e\\u092f\\u0903\\u0964\"}", NULL);
+
+ test_variant("[ 0, -0, 0.0, -0.0, 0.000, -0.000, 0e0, -0e0, 0e+0, -0e-0, 0e-0, -0e000, 0e+000 ]", test_zeroes);
+
+ test_build();
+ test_source();
+ test_depth();
+
+ test_normalize();
+ test_bisect();
+
+ return 0;
+}
diff --git a/src/test/test-libcrypt-util.c b/src/test/test-libcrypt-util.c
new file mode 100644
index 0000000..cd296d6
--- /dev/null
+++ b/src/test/test-libcrypt-util.c
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_CRYPT_H
+# include <crypt.h>
+#else
+# include <unistd.h>
+#endif
+
+#include "strv.h"
+#include "tests.h"
+#include "libcrypt-util.h"
+
+static int test_hash_password(void) {
+ log_info("/* %s */", __func__);
+
+ /* As a warmup exercise, check if we can hash passwords. */
+
+ bool have_sane_hash = false;
+ const char *hash;
+
+ FOREACH_STRING(hash,
+ "ew3bU1.hoKk4o",
+ "$1$gc5rWpTB$wK1aul1PyBn9AX1z93stk1",
+ "$2b$12$BlqcGkB/7BFvNMXKGxDea.5/8D6FTny.cbNcHW/tqcrcyo6ZJd8u2",
+ "$5$lGhDrcrao9zb5oIK$05KlOVG3ocknx/ThreqXE/gk.XzFFBMTksc4t2CPDUD",
+ "$6$c7wB/3GiRk0VHf7e$zXJ7hN0aLZapE.iO4mn/oHu6.prsXTUG/5k1AxpgR85ELolyAcaIGRgzfwJs3isTChMDBjnthZyaMCfCNxo9I.",
+ "$y$j9T$$9cKOWsAm4m97WiYk61lPPibZpy3oaGPIbsL4koRe/XD") {
+ int b;
+
+ b = test_password_one(hash, "ppp");
+ log_info("%s: %s", hash, yes_no(b));
+#if defined(XCRYPT_VERSION_MAJOR)
+ /* xcrypt is supposed to always implement all methods. */
+ assert_se(b);
+#endif
+
+ if (b && IN_SET(hash[1], '6', 'y'))
+ have_sane_hash = true;
+ }
+
+ return have_sane_hash;
+}
+
+static void test_hash_password_full(void) {
+ log_info("/* %s */", __func__);
+
+ _cleanup_free_ void *cd_data = NULL;
+ const char *i;
+ int cd_size = 0;
+
+ log_info("sizeof(struct crypt_data): %zu bytes", sizeof(struct crypt_data));
+
+ for (unsigned c = 0; c < 2; c++)
+ FOREACH_STRING(i, "abc123", "h⸿sło") {
+ _cleanup_free_ char *hashed;
+
+ if (c == 0)
+ assert_se(hash_password_full(i, &cd_data, &cd_size, &hashed) == 0);
+ else
+ assert_se(hash_password_full(i, NULL, NULL, &hashed) == 0);
+ log_debug("\"%s\" → \"%s\"", i, hashed);
+ log_info("crypt_r[a] buffer size: %i bytes", cd_size);
+
+ assert_se(test_password_one(hashed, i) == true);
+ assert_se(test_password_one(i, hashed) <= 0); /* We get an error for non-utf8 */
+ assert_se(test_password_one(hashed, "foobar") == false);
+ assert_se(test_password_many(STRV_MAKE(hashed), i) == true);
+ assert_se(test_password_many(STRV_MAKE(hashed), "foobar") == false);
+ assert_se(test_password_many(STRV_MAKE(hashed, hashed, hashed), "foobar") == false);
+ assert_se(test_password_many(STRV_MAKE("$y$j9T$dlCXwkX0GC5L6B8Gf.4PN/$VCyEH",
+ hashed,
+ "$y$j9T$SAayASazWZIQeJd9AS02m/$"),
+ i) == true);
+ assert_se(test_password_many(STRV_MAKE("$W$j9T$dlCXwkX0GC5L6B8Gf.4PN/$VCyEH", /* no such method exists... */
+ hashed,
+ "$y$j9T$SAayASazWZIQeJd9AS02m/$"),
+ i) == true);
+ assert_se(test_password_many(STRV_MAKE("$y$j9T$dlCXwkX0GC5L6B8Gf.4PN/$VCyEH",
+ hashed,
+ "$y$j9T$SAayASazWZIQeJd9AS02m/$"),
+ "") == false);
+ assert_se(test_password_many(STRV_MAKE("$W$j9T$dlCXwkX0GC5L6B8Gf.4PN/$VCyEH", /* no such method exists... */
+ hashed,
+ "$y$j9T$SAayASazWZIQeJd9AS02m/$"),
+ "") == false);
+ }
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+#if defined(__powerpc__) && !defined(XCRYPT_VERSION_MAJOR)
+ return log_tests_skipped("crypt_r() causes a buffer overflow on ppc64el, see https://github.com/systemd/systemd/pull/16981#issuecomment-691203787");
+#endif
+
+ if (!test_hash_password())
+ return log_tests_skipped("crypt doesn't support yescrypt or sha512crypt");
+
+ test_hash_password_full();
+
+ return 0;
+}
diff --git a/src/test/test-libmount.c b/src/test/test-libmount.c
new file mode 100644
index 0000000..bd2381f
--- /dev/null
+++ b/src/test/test-libmount.c
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "escape.h"
+#include "libmount-util.h"
+#include "tests.h"
+
+static void test_libmount_unescaping_one(
+ const char *title,
+ const char *string,
+ bool may_fail,
+ const char *expected_source,
+ const char *expected_target) {
+ /* A test for libmount really */
+ int r;
+
+ log_info("/* %s %s */", __func__, title);
+
+ _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
+ _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+
+ f = fmemopen((char*) string, strlen(string), "re");
+ assert_se(f);
+
+ assert_se(libmount_parse(title, f, &table, &iter) >= 0);
+
+ struct libmnt_fs *fs;
+ const char *source, *target;
+ _cleanup_free_ char *x = NULL, *cs = NULL, *s = NULL, *ct = NULL, *t = NULL;
+
+ /* We allow this call and the checks below to fail in some cases. See the case definitions below. */
+
+ r = mnt_table_next_fs(table, iter, &fs);
+ if (r != 0 && may_fail) {
+ log_error_errno(r, "mnt_table_next_fs failed: %m");
+ return;
+ }
+ assert_se(r == 0);
+
+ assert_se(x = cescape(string));
+
+ assert_se(source = mnt_fs_get_source(fs));
+ assert_se(target = mnt_fs_get_target(fs));
+
+ assert_se(cs = cescape(source));
+ assert_se(ct = cescape(target));
+
+ assert_se(cunescape(source, UNESCAPE_RELAX, &s) >= 0);
+ assert_se(cunescape(target, UNESCAPE_RELAX, &t) >= 0);
+
+ log_info("from '%s'", x);
+ log_info("source: '%s'", source);
+ log_info("source: '%s'", cs);
+ log_info("source: '%s'", s);
+ log_info("expected: '%s'", strna(expected_source));
+ log_info("target: '%s'", target);
+ log_info("target: '%s'", ct);
+ log_info("target: '%s'", t);
+ log_info("expected: '%s'", strna(expected_target));
+
+ assert_se(may_fail || streq(source, expected_source));
+ assert_se(may_fail || streq(target, expected_target));
+
+ assert_se(mnt_table_next_fs(table, iter, &fs) == 1);
+}
+
+static void test_libmount_unescaping(void) {
+ test_libmount_unescaping_one(
+ "escaped space + utf8",
+ "729 38 0:59 / /tmp/„zupa\\040zębowa” rw,relatime shared:395 - tmpfs die\\040Brühe rw,seclabel",
+ false,
+ "die Brühe",
+ "/tmp/„zupa zębowa”"
+ );
+
+ test_libmount_unescaping_one(
+ "escaped newline",
+ "729 38 0:59 / /tmp/x\\012y rw,relatime shared:395 - tmpfs newline rw,seclabel",
+ false,
+ "newline",
+ "/tmp/x\ny"
+ );
+
+ /* The result of "mount -t tmpfs '' /tmp/emptysource".
+ * This will fail with libmount <= v2.33.
+ * See https://github.com/karelzak/util-linux/commit/18a52a5094.
+ */
+ test_libmount_unescaping_one(
+ "empty source",
+ "760 38 0:60 / /tmp/emptysource rw,relatime shared:410 - tmpfs rw,seclabel",
+ true,
+ "",
+ "/tmp/emptysource"
+ );
+
+ /* The kernel leaves \r as is.
+ * Also see https://github.com/karelzak/util-linux/issues/780.
+ */
+ test_libmount_unescaping_one(
+ "foo\\rbar",
+ "790 38 0:61 / /tmp/foo\rbar rw,relatime shared:425 - tmpfs tmpfs rw,seclabel",
+ true,
+ "tmpfs",
+ "/tmp/foo\rbar"
+ );
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_libmount_unescaping();
+ return 0;
+}
diff --git a/src/test/test-libudev.c b/src/test/test-libudev.c
new file mode 100644
index 0000000..d162aba
--- /dev/null
+++ b/src/test/test-libudev.c
@@ -0,0 +1,584 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "build.h"
+#include "fd-util.h"
+#include "libudev-list-internal.h"
+#include "libudev-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "tests.h"
+
+static bool arg_monitor = false;
+
+static void print_device(struct udev_device *device) {
+ const char *str;
+ dev_t devnum;
+ int count;
+ struct udev_list_entry *list_entry;
+
+ log_info("*** device: %p ***", device);
+ str = udev_device_get_action(device);
+ if (str)
+ log_info("action: '%s'", str);
+
+ str = udev_device_get_syspath(device);
+ log_info("syspath: '%s'", str);
+
+ str = udev_device_get_sysname(device);
+ log_info("sysname: '%s'", str);
+
+ str = udev_device_get_sysnum(device);
+ if (str)
+ log_info("sysnum: '%s'", str);
+
+ str = udev_device_get_devpath(device);
+ log_info("devpath: '%s'", str);
+
+ str = udev_device_get_subsystem(device);
+ if (str)
+ log_info("subsystem: '%s'", str);
+
+ str = udev_device_get_devtype(device);
+ if (str)
+ log_info("devtype: '%s'", str);
+
+ str = udev_device_get_driver(device);
+ if (str)
+ log_info("driver: '%s'", str);
+
+ str = udev_device_get_devnode(device);
+ if (str)
+ log_info("devname: '%s'", str);
+
+ devnum = udev_device_get_devnum(device);
+ if (major(devnum) > 0)
+ log_info("devnum: %u:%u", major(devnum), minor(devnum));
+
+ count = 0;
+ udev_list_entry_foreach(list_entry, udev_device_get_devlinks_list_entry(device)) {
+ log_info("link: '%s'", udev_list_entry_get_name(list_entry));
+ count++;
+ }
+ if (count > 0)
+ log_info("found %i links", count);
+
+ count = 0;
+ udev_list_entry_foreach(list_entry, udev_device_get_properties_list_entry(device)) {
+ log_info("property: '%s=%s'",
+ udev_list_entry_get_name(list_entry),
+ udev_list_entry_get_value(list_entry));
+ count++;
+ }
+ if (count > 0)
+ log_info("found %i properties", count);
+
+ str = udev_device_get_property_value(device, "MAJOR");
+ if (str)
+ log_info("MAJOR: '%s'", str);
+
+ str = udev_device_get_sysattr_value(device, "dev");
+ if (str)
+ log_info("attr{dev}: '%s'", str);
+}
+
+static void test_device(struct udev *udev, const char *syspath) {
+ _cleanup_(udev_device_unrefp) struct udev_device *device;
+
+ log_info("/* %s, device %s */", __func__, syspath);
+ device = udev_device_new_from_syspath(udev, syspath);
+ if (device)
+ print_device(device);
+ else
+ log_warning_errno(errno, "udev_device_new_from_syspath: %m");
+}
+
+static void test_device_parents(struct udev *udev, const char *syspath) {
+ _cleanup_(udev_device_unrefp) struct udev_device *device;
+ struct udev_device *device_parent;
+
+ log_info("/* %s, device %s */", __func__, syspath);
+ device = udev_device_new_from_syspath(udev, syspath);
+ if (!device)
+ return;
+
+ log_info("looking at parents");
+ device_parent = device;
+ do {
+ print_device(device_parent);
+ device_parent = udev_device_get_parent(device_parent);
+ } while (device_parent != NULL);
+
+ log_info("looking at parents again");
+ device_parent = device;
+ do {
+ print_device(device_parent);
+ device_parent = udev_device_get_parent(device_parent);
+ } while (device_parent != NULL);
+}
+
+static void test_device_devnum(struct udev *udev) {
+ dev_t devnum = makedev(1, 3);
+ _cleanup_(udev_device_unrefp) struct udev_device *device;
+
+ log_info("/* %s, device %d:%d */", __func__, major(devnum), minor(devnum));
+
+ device = udev_device_new_from_devnum(udev, 'c', devnum);
+ if (device)
+ print_device(device);
+ else
+ log_warning_errno(errno, "udev_device_new_from_devnum: %m");
+}
+
+static void test_device_subsys_name(struct udev *udev, const char *subsys, const char *dev) {
+ _cleanup_(udev_device_unrefp) struct udev_device *device;
+
+ log_info("looking up device: '%s:%s'", subsys, dev);
+ device = udev_device_new_from_subsystem_sysname(udev, subsys, dev);
+ if (!device)
+ log_warning_errno(errno, "udev_device_new_from_subsystem_sysname: %m");
+ else
+ print_device(device);
+}
+
+static int enumerate_print_list(struct udev_enumerate *enumerate) {
+ struct udev_list_entry *list_entry;
+ int count = 0;
+
+ udev_list_entry_foreach(list_entry, udev_enumerate_get_list_entry(enumerate)) {
+ struct udev_device *device;
+
+ device = udev_device_new_from_syspath(udev_enumerate_get_udev(enumerate),
+ udev_list_entry_get_name(list_entry));
+ if (device) {
+ log_info("device: '%s' (%s)",
+ udev_device_get_syspath(device),
+ udev_device_get_subsystem(device));
+ udev_device_unref(device);
+ count++;
+ }
+ }
+ log_info("found %i devices", count);
+ return count;
+}
+
+static void test_monitor(struct udev *udev) {
+ _cleanup_(udev_monitor_unrefp) struct udev_monitor *udev_monitor;
+ _cleanup_close_ int fd_ep;
+ int fd_udev;
+ struct epoll_event ep_udev = {
+ .events = EPOLLIN,
+ }, ep_stdin = {
+ .events = EPOLLIN,
+ .data.fd = STDIN_FILENO,
+ };
+
+ log_info("/* %s */", __func__);
+
+ fd_ep = epoll_create1(EPOLL_CLOEXEC);
+ assert_se(fd_ep >= 0);
+
+ udev_monitor = udev_monitor_new_from_netlink(udev, "udev");
+ assert_se(udev_monitor != NULL);
+
+ fd_udev = udev_monitor_get_fd(udev_monitor);
+ ep_udev.data.fd = fd_udev;
+
+ assert_se(udev_monitor_filter_add_match_subsystem_devtype(udev_monitor, "block", NULL) >= 0);
+ assert_se(udev_monitor_filter_add_match_subsystem_devtype(udev_monitor, "tty", NULL) >= 0);
+ assert_se(udev_monitor_filter_add_match_subsystem_devtype(udev_monitor, "usb", "usb_device") >= 0);
+
+ assert_se(udev_monitor_enable_receiving(udev_monitor) >= 0);
+
+ assert_se(epoll_ctl(fd_ep, EPOLL_CTL_ADD, fd_udev, &ep_udev) >= 0);
+ assert_se(epoll_ctl(fd_ep, EPOLL_CTL_ADD, STDIN_FILENO, &ep_stdin) >= 0);
+
+ for (;;) {
+ int fdcount;
+ struct epoll_event ev[4];
+ struct udev_device *device;
+ int i;
+
+ printf("waiting for events from udev, press ENTER to exit\n");
+ fdcount = epoll_wait(fd_ep, ev, ELEMENTSOF(ev), -1);
+ printf("epoll fd count: %i\n", fdcount);
+
+ for (i = 0; i < fdcount; i++) {
+ if (ev[i].data.fd == fd_udev && ev[i].events & EPOLLIN) {
+ device = udev_monitor_receive_device(udev_monitor);
+ if (!device) {
+ printf("no device from socket\n");
+ continue;
+ }
+ print_device(device);
+ udev_device_unref(device);
+ } else if (ev[i].data.fd == STDIN_FILENO && ev[i].events & EPOLLIN) {
+ printf("exiting loop\n");
+ return;
+ }
+ }
+ }
+}
+
+static void test_queue(struct udev *udev) {
+ struct udev_queue *udev_queue;
+ bool empty;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(udev_queue = udev_queue_new(udev));
+
+ empty = udev_queue_get_queue_is_empty(udev_queue);
+ log_info("queue is %s", empty ? "empty" : "not empty");
+ udev_queue_unref(udev_queue);
+}
+
+static int test_enumerate(struct udev *udev, const char *subsystem) {
+ struct udev_enumerate *udev_enumerate;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ log_info("enumerate '%s'", subsystem == NULL ? "<all>" : subsystem);
+ udev_enumerate = udev_enumerate_new(udev);
+ if (!udev_enumerate)
+ return -1;
+ udev_enumerate_add_match_subsystem(udev_enumerate, subsystem);
+ udev_enumerate_scan_devices(udev_enumerate);
+ enumerate_print_list(udev_enumerate);
+ udev_enumerate_unref(udev_enumerate);
+
+ log_info("enumerate 'net' + duplicated scan + null + zero");
+ udev_enumerate = udev_enumerate_new(udev);
+ if (!udev_enumerate)
+ return -1;
+ udev_enumerate_add_match_subsystem(udev_enumerate, "net");
+ udev_enumerate_scan_devices(udev_enumerate);
+ udev_enumerate_scan_devices(udev_enumerate);
+ udev_enumerate_add_syspath(udev_enumerate, "/sys/class/mem/zero");
+ udev_enumerate_add_syspath(udev_enumerate, "/sys/class/mem/null");
+ udev_enumerate_add_syspath(udev_enumerate, "/sys/class/mem/zero");
+ udev_enumerate_add_syspath(udev_enumerate, "/sys/class/mem/null");
+ udev_enumerate_add_syspath(udev_enumerate, "/sys/class/mem/zero");
+ udev_enumerate_add_syspath(udev_enumerate, "/sys/class/mem/null");
+ udev_enumerate_add_syspath(udev_enumerate, "/sys/class/mem/null");
+ udev_enumerate_add_syspath(udev_enumerate, "/sys/class/mem/zero");
+ udev_enumerate_add_syspath(udev_enumerate, "/sys/class/mem/zero");
+ udev_enumerate_scan_devices(udev_enumerate);
+ enumerate_print_list(udev_enumerate);
+ udev_enumerate_unref(udev_enumerate);
+
+ log_info("enumerate 'block'");
+ udev_enumerate = udev_enumerate_new(udev);
+ if (!udev_enumerate)
+ return -1;
+ udev_enumerate_add_match_subsystem(udev_enumerate,"block");
+ r = udev_enumerate_add_match_is_initialized(udev_enumerate);
+ if (r < 0) {
+ udev_enumerate_unref(udev_enumerate);
+ return r;
+ }
+ udev_enumerate_scan_devices(udev_enumerate);
+ enumerate_print_list(udev_enumerate);
+ udev_enumerate_unref(udev_enumerate);
+
+ log_info("enumerate 'not block'");
+ udev_enumerate = udev_enumerate_new(udev);
+ if (!udev_enumerate)
+ return -1;
+ udev_enumerate_add_nomatch_subsystem(udev_enumerate, "block");
+ udev_enumerate_scan_devices(udev_enumerate);
+ enumerate_print_list(udev_enumerate);
+ udev_enumerate_unref(udev_enumerate);
+
+ log_info("enumerate 'pci, mem, vc'");
+ udev_enumerate = udev_enumerate_new(udev);
+ if (!udev_enumerate)
+ return -1;
+ udev_enumerate_add_match_subsystem(udev_enumerate, "pci");
+ udev_enumerate_add_match_subsystem(udev_enumerate, "mem");
+ udev_enumerate_add_match_subsystem(udev_enumerate, "vc");
+ udev_enumerate_scan_devices(udev_enumerate);
+ enumerate_print_list(udev_enumerate);
+ udev_enumerate_unref(udev_enumerate);
+
+ log_info("enumerate 'subsystem'");
+ udev_enumerate = udev_enumerate_new(udev);
+ if (!udev_enumerate)
+ return -1;
+ udev_enumerate_scan_subsystems(udev_enumerate);
+ enumerate_print_list(udev_enumerate);
+ udev_enumerate_unref(udev_enumerate);
+
+ log_info("enumerate 'property IF_FS_*=filesystem'");
+ udev_enumerate = udev_enumerate_new(udev);
+ if (!udev_enumerate)
+ return -1;
+ udev_enumerate_add_match_property(udev_enumerate, "ID_FS*", "filesystem");
+ udev_enumerate_scan_devices(udev_enumerate);
+ enumerate_print_list(udev_enumerate);
+ udev_enumerate_unref(udev_enumerate);
+ return 0;
+}
+
+static void test_hwdb(struct udev *udev, const char *modalias) {
+ struct udev_hwdb *hwdb;
+ struct udev_list_entry *entry;
+
+ log_info("/* %s */", __func__);
+
+ hwdb = udev_hwdb_new(udev);
+ if (!hwdb)
+ log_warning_errno(errno, "Failed to open hwdb: %m");
+
+ udev_list_entry_foreach(entry, udev_hwdb_get_properties_list_entry(hwdb, modalias, 0))
+ log_info("'%s'='%s'", udev_list_entry_get_name(entry), udev_list_entry_get_value(entry));
+
+ hwdb = udev_hwdb_unref(hwdb);
+ assert_se(hwdb == NULL);
+}
+
+static void test_util_replace_whitespace_one_len(const char *str, size_t len, const char *expected) {
+ _cleanup_free_ char *result = NULL;
+ int r;
+
+ result = new(char, len + 1);
+ assert_se(result);
+ r = util_replace_whitespace(str, result, len);
+ assert_se((size_t) r == strlen(expected));
+ assert_se(streq(result, expected));
+}
+
+static void test_util_replace_whitespace_one(const char *str, const char *expected) {
+ test_util_replace_whitespace_one_len(str, strlen(str), expected);
+}
+
+static void test_util_replace_whitespace(void) {
+ log_info("/* %s */", __func__);
+
+ test_util_replace_whitespace_one("hogehoge", "hogehoge");
+ test_util_replace_whitespace_one("hoge hoge", "hoge_hoge");
+ test_util_replace_whitespace_one(" hoge hoge ", "hoge_hoge");
+ test_util_replace_whitespace_one(" ", "");
+ test_util_replace_whitespace_one("hoge ", "hoge");
+
+ test_util_replace_whitespace_one_len("hoge hoge ", 9, "hoge_hoge");
+ test_util_replace_whitespace_one_len("hoge hoge ", 8, "hoge_hog");
+ test_util_replace_whitespace_one_len("hoge hoge ", 7, "hoge_ho");
+ test_util_replace_whitespace_one_len("hoge hoge ", 6, "hoge_h");
+ test_util_replace_whitespace_one_len("hoge hoge ", 5, "hoge");
+ test_util_replace_whitespace_one_len("hoge hoge ", 4, "hoge");
+ test_util_replace_whitespace_one_len("hoge hoge ", 3, "hog");
+ test_util_replace_whitespace_one_len("hoge hoge ", 2, "ho");
+ test_util_replace_whitespace_one_len("hoge hoge ", 1, "h");
+ test_util_replace_whitespace_one_len("hoge hoge ", 0, "");
+
+ test_util_replace_whitespace_one_len(" hoge hoge ", 16, "hoge_hoge");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 15, "hoge_hoge");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 14, "hoge_hog");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 13, "hoge_ho");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 12, "hoge_h");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 11, "hoge");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 10, "hoge");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 9, "hoge");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 8, "hoge");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 7, "hog");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 6, "ho");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 5, "h");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 4, "");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 3, "");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 2, "");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 1, "");
+ test_util_replace_whitespace_one_len(" hoge hoge ", 0, "");
+}
+
+static void test_util_resolve_subsys_kernel_one(const char *str, bool read_value, int retval, const char *expected) {
+ char result[UTIL_PATH_SIZE] = "";
+ int r;
+
+ r = util_resolve_subsys_kernel(str, result, sizeof(result), read_value);
+ log_info("\"%s\" → expect: \"%s\", %d, actual: \"%s\", %d", str, strnull(expected), retval, result, r);
+ assert_se(r == retval);
+ if (r >= 0)
+ assert_se(streq(result, expected));
+}
+
+static void test_util_resolve_subsys_kernel(void) {
+ log_info("/* %s */", __func__);
+
+ test_util_resolve_subsys_kernel_one("hoge", false, -EINVAL, NULL);
+ test_util_resolve_subsys_kernel_one("[hoge", false, -EINVAL, NULL);
+ test_util_resolve_subsys_kernel_one("[hoge/foo", false, -EINVAL, NULL);
+ test_util_resolve_subsys_kernel_one("[hoge/]", false, -ENODEV, NULL);
+
+ test_util_resolve_subsys_kernel_one("[net/lo]", false, 0, "/sys/devices/virtual/net/lo");
+ test_util_resolve_subsys_kernel_one("[net/lo]/", false, 0, "/sys/devices/virtual/net/lo");
+ test_util_resolve_subsys_kernel_one("[net/lo]hoge", false, 0, "/sys/devices/virtual/net/lo/hoge");
+ test_util_resolve_subsys_kernel_one("[net/lo]/hoge", false, 0, "/sys/devices/virtual/net/lo/hoge");
+
+ test_util_resolve_subsys_kernel_one("[net/lo]", true, -EINVAL, NULL);
+ test_util_resolve_subsys_kernel_one("[net/lo]/", true, -EINVAL, NULL);
+ test_util_resolve_subsys_kernel_one("[net/lo]hoge", true, 0, "");
+ test_util_resolve_subsys_kernel_one("[net/lo]/hoge", true, 0, "");
+ test_util_resolve_subsys_kernel_one("[net/lo]address", true, 0, "00:00:00:00:00:00");
+ test_util_resolve_subsys_kernel_one("[net/lo]/address", true, 0, "00:00:00:00:00:00");
+}
+
+static void test_list(void) {
+ _cleanup_(udev_list_freep) struct udev_list *list = NULL;
+ struct udev_list_entry *e;
+
+ /* empty list */
+ assert_se(list = udev_list_new(false));
+ assert_se(!udev_list_get_entry(list));
+ list = udev_list_free(list);
+
+ /* unique == false */
+ assert_se(list = udev_list_new(false));
+ assert_se(udev_list_entry_add(list, "aaa", "hoge"));
+ assert_se(udev_list_entry_add(list, "aaa", "hogehoge"));
+ assert_se(udev_list_entry_add(list, "bbb", "foo"));
+ e = udev_list_get_entry(list);
+ assert_se(e);
+ assert_se(streq_ptr(udev_list_entry_get_name(e), "aaa"));
+ assert_se(streq_ptr(udev_list_entry_get_value(e), "hoge"));
+ e = udev_list_entry_get_next(e);
+ assert_se(e);
+ assert_se(streq_ptr(udev_list_entry_get_name(e), "aaa"));
+ assert_se(streq_ptr(udev_list_entry_get_value(e), "hogehoge"));
+ e = udev_list_entry_get_next(e);
+ assert_se(e);
+ assert_se(streq_ptr(udev_list_entry_get_name(e), "bbb"));
+ assert_se(streq_ptr(udev_list_entry_get_value(e), "foo"));
+ assert_se(!udev_list_entry_get_next(e));
+
+ assert_se(!udev_list_entry_get_by_name(e, "aaa"));
+ assert_se(!udev_list_entry_get_by_name(e, "bbb"));
+ assert_se(!udev_list_entry_get_by_name(e, "ccc"));
+ list = udev_list_free(list);
+
+ /* unique == true */
+ assert_se(list = udev_list_new(true));
+ assert_se(udev_list_entry_add(list, "aaa", "hoge"));
+ assert_se(udev_list_entry_add(list, "aaa", "hogehoge"));
+ assert_se(udev_list_entry_add(list, "bbb", "foo"));
+ e = udev_list_get_entry(list);
+ assert_se(e);
+ assert_se(streq_ptr(udev_list_entry_get_name(e), "aaa"));
+ assert_se(streq_ptr(udev_list_entry_get_value(e), "hogehoge"));
+ e = udev_list_entry_get_next(e);
+ assert_se(streq_ptr(udev_list_entry_get_name(e), "bbb"));
+ assert_se(streq_ptr(udev_list_entry_get_value(e), "foo"));
+ assert_se(!udev_list_entry_get_next(e));
+
+ e = udev_list_entry_get_by_name(e, "bbb");
+ assert_se(e);
+ assert_se(streq_ptr(udev_list_entry_get_name(e), "bbb"));
+ assert_se(streq_ptr(udev_list_entry_get_value(e), "foo"));
+ e = udev_list_entry_get_by_name(e, "aaa");
+ assert_se(e);
+ assert_se(streq_ptr(udev_list_entry_get_name(e), "aaa"));
+ assert_se(streq_ptr(udev_list_entry_get_value(e), "hogehoge"));
+ assert_se(!udev_list_entry_get_by_name(e, "ccc"));
+}
+
+static int parse_args(int argc, char *argv[], const char **syspath, const char **subsystem) {
+ static const struct option options[] = {
+ { "syspath", required_argument, NULL, 'p' },
+ { "subsystem", required_argument, NULL, 's' },
+ { "debug", no_argument, NULL, 'd' },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V' },
+ { "monitor", no_argument, NULL, 'm' },
+ {}
+ };
+ int c;
+
+ while ((c = getopt_long(argc, argv, "p:s:dhVm", options, NULL)) >= 0)
+ switch (c) {
+ case 'p':
+ *syspath = optarg;
+ break;
+
+ case 's':
+ *subsystem = optarg;
+ break;
+
+ case 'd':
+ log_set_max_level(LOG_DEBUG);
+ break;
+
+ case 'h':
+ printf("--debug --syspath= --subsystem= --help\n");
+ return 0;
+
+ case 'V':
+ printf("%s\n", GIT_VERSION);
+ return 0;
+
+ case 'm':
+ arg_monitor = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option code.");
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(udev_unrefp) struct udev *udev = NULL;
+
+ const char *syspath = "/devices/virtual/mem/null";
+ const char *subsystem = NULL;
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ r = parse_args(argc, argv, &syspath, &subsystem);
+ if (r <= 0)
+ return r;
+
+ assert_se(udev = udev_new());
+
+ /* add sys path if needed */
+ if (!startswith(syspath, "/sys"))
+ syspath = strjoina("/sys/", syspath);
+
+ test_device(udev, syspath);
+ test_device_devnum(udev);
+ test_device_subsys_name(udev, "block", "sda");
+ test_device_subsys_name(udev, "subsystem", "pci");
+ test_device_subsys_name(udev, "drivers", "scsi:sd");
+ test_device_subsys_name(udev, "module", "printk");
+ test_device_parents(udev, syspath);
+
+ test_enumerate(udev, subsystem);
+
+ test_queue(udev);
+
+ test_hwdb(udev, "usb:v0D50p0011*");
+
+ if (arg_monitor)
+ test_monitor(udev);
+
+ test_util_replace_whitespace();
+ test_util_resolve_subsys_kernel();
+
+ test_list();
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/test/test-list.c b/src/test/test-list.c
new file mode 100644
index 0000000..fc6e1c8
--- /dev/null
+++ b/src/test/test-list.c
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2013 Jan Janssen
+***/
+
+#include "list.h"
+#include "util.h"
+
+int main(int argc, const char *argv[]) {
+ size_t i;
+ typedef struct list_item {
+ LIST_FIELDS(struct list_item, item);
+ } list_item;
+ LIST_HEAD(list_item, head);
+ LIST_HEAD(list_item, head2);
+ list_item items[4];
+ list_item *cursor;
+
+ LIST_HEAD_INIT(head);
+ LIST_HEAD_INIT(head2);
+ assert_se(head == NULL);
+ assert_se(head2 == NULL);
+
+ for (i = 0; i < ELEMENTSOF(items); i++) {
+ LIST_INIT(item, &items[i]);
+ assert_se(LIST_JUST_US(item, &items[i]));
+ LIST_PREPEND(item, head, &items[i]);
+ }
+
+ i = 0;
+ LIST_FOREACH_OTHERS(item, cursor, &items[2]) {
+ i++;
+ assert_se(cursor != &items[2]);
+ }
+ assert_se(i == ELEMENTSOF(items)-1);
+
+ i = 0;
+ LIST_FOREACH_OTHERS(item, cursor, &items[0]) {
+ i++;
+ assert_se(cursor != &items[0]);
+ }
+ assert_se(i == ELEMENTSOF(items)-1);
+
+ i = 0;
+ LIST_FOREACH_OTHERS(item, cursor, &items[3]) {
+ i++;
+ assert_se(cursor != &items[3]);
+ }
+ assert_se(i == ELEMENTSOF(items)-1);
+
+ assert_se(!LIST_JUST_US(item, head));
+
+ assert_se(items[0].item_next == NULL);
+ assert_se(items[1].item_next == &items[0]);
+ assert_se(items[2].item_next == &items[1]);
+ assert_se(items[3].item_next == &items[2]);
+
+ assert_se(items[0].item_prev == &items[1]);
+ assert_se(items[1].item_prev == &items[2]);
+ assert_se(items[2].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_FIND_HEAD(item, &items[0], cursor);
+ assert_se(cursor == &items[3]);
+
+ LIST_FIND_TAIL(item, &items[3], cursor);
+ assert_se(cursor == &items[0]);
+
+ LIST_REMOVE(item, head, &items[1]);
+ assert_se(LIST_JUST_US(item, &items[1]));
+
+ assert_se(items[0].item_next == NULL);
+ assert_se(items[2].item_next == &items[0]);
+ assert_se(items[3].item_next == &items[2]);
+
+ assert_se(items[0].item_prev == &items[2]);
+ assert_se(items[2].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_INSERT_AFTER(item, head, &items[3], &items[1]);
+ assert_se(items[0].item_next == NULL);
+ assert_se(items[2].item_next == &items[0]);
+ assert_se(items[1].item_next == &items[2]);
+ assert_se(items[3].item_next == &items[1]);
+
+ assert_se(items[0].item_prev == &items[2]);
+ assert_se(items[2].item_prev == &items[1]);
+ assert_se(items[1].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_REMOVE(item, head, &items[1]);
+ assert_se(LIST_JUST_US(item, &items[1]));
+
+ assert_se(items[0].item_next == NULL);
+ assert_se(items[2].item_next == &items[0]);
+ assert_se(items[3].item_next == &items[2]);
+
+ assert_se(items[0].item_prev == &items[2]);
+ assert_se(items[2].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_INSERT_BEFORE(item, head, &items[2], &items[1]);
+ assert_se(items[0].item_next == NULL);
+ assert_se(items[2].item_next == &items[0]);
+ assert_se(items[1].item_next == &items[2]);
+ assert_se(items[3].item_next == &items[1]);
+
+ assert_se(items[0].item_prev == &items[2]);
+ assert_se(items[2].item_prev == &items[1]);
+ assert_se(items[1].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_REMOVE(item, head, &items[0]);
+ assert_se(LIST_JUST_US(item, &items[0]));
+
+ assert_se(items[2].item_next == NULL);
+ assert_se(items[1].item_next == &items[2]);
+ assert_se(items[3].item_next == &items[1]);
+
+ assert_se(items[2].item_prev == &items[1]);
+ assert_se(items[1].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_INSERT_BEFORE(item, head, &items[3], &items[0]);
+ assert_se(items[2].item_next == NULL);
+ assert_se(items[1].item_next == &items[2]);
+ assert_se(items[3].item_next == &items[1]);
+ assert_se(items[0].item_next == &items[3]);
+
+ assert_se(items[2].item_prev == &items[1]);
+ assert_se(items[1].item_prev == &items[3]);
+ assert_se(items[3].item_prev == &items[0]);
+ assert_se(items[0].item_prev == NULL);
+ assert_se(head == &items[0]);
+
+ LIST_REMOVE(item, head, &items[0]);
+ assert_se(LIST_JUST_US(item, &items[0]));
+
+ assert_se(items[2].item_next == NULL);
+ assert_se(items[1].item_next == &items[2]);
+ assert_se(items[3].item_next == &items[1]);
+
+ assert_se(items[2].item_prev == &items[1]);
+ assert_se(items[1].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_INSERT_BEFORE(item, head, NULL, &items[0]);
+ assert_se(items[0].item_next == NULL);
+ assert_se(items[2].item_next == &items[0]);
+ assert_se(items[1].item_next == &items[2]);
+ assert_se(items[3].item_next == &items[1]);
+
+ assert_se(items[0].item_prev == &items[2]);
+ assert_se(items[2].item_prev == &items[1]);
+ assert_se(items[1].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_REMOVE(item, head, &items[0]);
+ assert_se(LIST_JUST_US(item, &items[0]));
+
+ assert_se(items[2].item_next == NULL);
+ assert_se(items[1].item_next == &items[2]);
+ assert_se(items[3].item_next == &items[1]);
+
+ assert_se(items[2].item_prev == &items[1]);
+ assert_se(items[1].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_REMOVE(item, head, &items[1]);
+ assert_se(LIST_JUST_US(item, &items[1]));
+
+ assert_se(items[2].item_next == NULL);
+ assert_se(items[3].item_next == &items[2]);
+
+ assert_se(items[2].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_REMOVE(item, head, &items[2]);
+ assert_se(LIST_JUST_US(item, &items[2]));
+ assert_se(LIST_JUST_US(item, head));
+
+ LIST_REMOVE(item, head, &items[3]);
+ assert_se(LIST_JUST_US(item, &items[3]));
+
+ assert_se(head == NULL);
+
+ for (i = 0; i < ELEMENTSOF(items); i++) {
+ assert_se(LIST_JUST_US(item, &items[i]));
+ LIST_APPEND(item, head, &items[i]);
+ }
+
+ assert_se(!LIST_JUST_US(item, head));
+
+ assert_se(items[0].item_next == &items[1]);
+ assert_se(items[1].item_next == &items[2]);
+ assert_se(items[2].item_next == &items[3]);
+ assert_se(items[3].item_next == NULL);
+
+ assert_se(items[0].item_prev == NULL);
+ assert_se(items[1].item_prev == &items[0]);
+ assert_se(items[2].item_prev == &items[1]);
+ assert_se(items[3].item_prev == &items[2]);
+
+ for (i = 0; i < ELEMENTSOF(items); i++)
+ LIST_REMOVE(item, head, &items[i]);
+
+ assert_se(head == NULL);
+
+ for (i = 0; i < ELEMENTSOF(items) / 2; i++) {
+ LIST_INIT(item, &items[i]);
+ assert_se(LIST_JUST_US(item, &items[i]));
+ LIST_PREPEND(item, head, &items[i]);
+ }
+
+ for (i = ELEMENTSOF(items) / 2; i < ELEMENTSOF(items); i++) {
+ LIST_INIT(item, &items[i]);
+ assert_se(LIST_JUST_US(item, &items[i]));
+ LIST_PREPEND(item, head2, &items[i]);
+ }
+
+ assert_se(items[0].item_next == NULL);
+ assert_se(items[1].item_next == &items[0]);
+ assert_se(items[2].item_next == NULL);
+ assert_se(items[3].item_next == &items[2]);
+
+ assert_se(items[0].item_prev == &items[1]);
+ assert_se(items[1].item_prev == NULL);
+ assert_se(items[2].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_JOIN(item, head2, head);
+ assert_se(head == NULL);
+
+ assert_se(items[0].item_next == NULL);
+ assert_se(items[1].item_next == &items[0]);
+ assert_se(items[2].item_next == &items[1]);
+ assert_se(items[3].item_next == &items[2]);
+
+ assert_se(items[0].item_prev == &items[1]);
+ assert_se(items[1].item_prev == &items[2]);
+ assert_se(items[2].item_prev == &items[3]);
+ assert_se(items[3].item_prev == NULL);
+
+ LIST_JOIN(item, head, head2);
+ assert_se(head2 == NULL);
+ assert_se(!LIST_IS_EMPTY(head));
+
+ for (i = 0; i < ELEMENTSOF(items); i++)
+ LIST_REMOVE(item, head, &items[i]);
+
+ assert_se(head == NULL);
+
+ return 0;
+}
diff --git a/src/test/test-load-fragment.c b/src/test/test-load-fragment.c
new file mode 100644
index 0000000..40c1fd0
--- /dev/null
+++ b/src/test/test-load-fragment.c
@@ -0,0 +1,855 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "all-units.h"
+#include "alloc-util.h"
+#include "capability-util.h"
+#include "conf-parser.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "hostname-util.h"
+#include "install-printf.h"
+#include "install.h"
+#include "load-fragment.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "rm-rf.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+
+/* Nontrivial value serves as a placeholder to check that parsing function (didn't) change it */
+#define CGROUP_LIMIT_DUMMY 3
+
+static int test_unit_file_get_set(void) {
+ int r;
+ Hashmap *h;
+ UnitFileList *p;
+
+ h = hashmap_new(&string_hash_ops);
+ assert_se(h);
+
+ r = unit_file_get_list(UNIT_FILE_SYSTEM, NULL, h, NULL, NULL);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return log_tests_skipped_errno(r, "unit_file_get_list");
+
+ log_full_errno(r == 0 ? LOG_INFO : LOG_ERR, r,
+ "unit_file_get_list: %m");
+ if (r < 0)
+ return EXIT_FAILURE;
+
+ HASHMAP_FOREACH(p, h)
+ printf("%s = %s\n", p->path, unit_file_state_to_string(p->state));
+
+ unit_file_list_free(h);
+
+ return 0;
+}
+
+static void check_execcommand(ExecCommand *c,
+ const char* path,
+ const char* argv0,
+ const char* argv1,
+ const char* argv2,
+ bool ignore) {
+ size_t n;
+
+ assert_se(c);
+ log_info("expect: \"%s\" [\"%s\" \"%s\" \"%s\"]",
+ path, argv0 ?: path, argv1, argv2);
+ n = strv_length(c->argv);
+ log_info("actual: \"%s\" [\"%s\" \"%s\" \"%s\"]",
+ c->path, c->argv[0], n > 0 ? c->argv[1] : NULL, n > 1 ? c->argv[2] : NULL);
+ assert_se(streq(c->path, path));
+ assert_se(streq(c->argv[0], argv0 ?: path));
+ if (n > 0)
+ assert_se(streq_ptr(c->argv[1], argv1));
+ if (n > 1)
+ assert_se(streq_ptr(c->argv[2], argv2));
+ assert_se(!!(c->flags & EXEC_COMMAND_IGNORE_FAILURE) == ignore);
+}
+
+static void test_config_parse_exec(void) {
+ /* int config_parse_exec(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) */
+ int r;
+
+ ExecCommand *c = NULL, *c1;
+ const char *ccc;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ _cleanup_(unit_freep) Unit *u = NULL;
+
+ r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_MINIMAL, &m);
+ if (manager_errno_skip_test(r)) {
+ log_notice_errno(r, "Skipping test: manager_new: %m");
+ return;
+ }
+
+ assert_se(r >= 0);
+ assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+ assert_se(u = unit_new(m, sizeof(Service)));
+
+ log_info("/* basic test */");
+ r = config_parse_exec(NULL, "fake", 1, "section", 1,
+ "LValue", 0, "/RValue r1",
+ &c, u);
+ assert_se(r >= 0);
+ check_execcommand(c, "/RValue", "/RValue", "r1", NULL, false);
+
+ r = config_parse_exec(NULL, "fake", 2, "section", 1,
+ "LValue", 0, "/RValue///slashes r1///",
+ &c, u);
+
+ log_info("/* test slashes */");
+ assert_se(r >= 0);
+ c1 = c->command_next;
+ check_execcommand(c1, "/RValue/slashes", "/RValue///slashes", "r1///", NULL, false);
+
+ log_info("/* trailing slash */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "/RValue/ argv0 r1",
+ &c, u);
+ assert_se(r == -ENOEXEC);
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* honour_argv0 */");
+ r = config_parse_exec(NULL, "fake", 3, "section", 1,
+ "LValue", 0, "@/RValue///slashes2 ///argv0 r1",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/RValue/slashes2", "///argv0", "r1", NULL, false);
+
+ log_info("/* honour_argv0, no args */");
+ r = config_parse_exec(NULL, "fake", 3, "section", 1,
+ "LValue", 0, "@/RValue",
+ &c, u);
+ assert_se(r == -ENOEXEC);
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* no command, whitespace only, reset */");
+ r = config_parse_exec(NULL, "fake", 3, "section", 1,
+ "LValue", 0, "",
+ &c, u);
+ assert_se(r == 0);
+ assert_se(c == NULL);
+
+ log_info("/* ignore && honour_argv0 */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "-@/RValue///slashes3 argv0a r1",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c;
+ check_execcommand(c1, "/RValue/slashes3", "argv0a", "r1", NULL, true);
+
+ log_info("/* ignore && honour_argv0 */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "@-/RValue///slashes4 argv0b r1",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/RValue/slashes4", "argv0b", "r1", NULL, true);
+
+ log_info("/* ignore && ignore */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "--/RValue argv0 r1",
+ &c, u);
+ assert_se(r == 0);
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* ignore && ignore (2) */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "-@-/RValue argv0 r1",
+ &c, u);
+ assert_se(r == 0);
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* semicolon */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "-@/RValue argv0 r1 ; "
+ "/goo/goo boo",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/RValue", "argv0", "r1", NULL, true);
+
+ c1 = c1->command_next;
+ check_execcommand(c1, "/goo/goo", NULL, "boo", NULL, false);
+
+ log_info("/* two semicolons in a row */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "-@/RValue argv0 r1 ; ; "
+ "/goo/goo boo",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/RValue", "argv0", "r1", NULL, true);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/goo/goo", "/goo/goo", "boo", NULL, false);
+
+ log_info("/* trailing semicolon */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "-@/RValue argv0 r1 ; ",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/RValue", "argv0", "r1", NULL, true);
+
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* trailing semicolon, no whitespace */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "-@/RValue argv0 r1 ;",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/RValue", "argv0", "r1", NULL, true);
+
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* trailing semicolon in single quotes */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "-@/RValue argv0 r1 ';'",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/RValue", "argv0", "r1", ";", true);
+
+ log_info("/* escaped semicolon */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "/bin/find \\;",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/bin/find", NULL, ";", NULL, false);
+
+ log_info("/* escaped semicolon with following arg */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "/sbin/find \\; /x",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1,
+ "/sbin/find", NULL, ";", "/x", false);
+
+ log_info("/* escaped semicolon as part of an expression */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "/sbin/find \\;x",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1,
+ "/sbin/find", NULL, "\\;x", NULL, false);
+
+ log_info("/* encoded semicolon */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "/bin/find \\073",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/bin/find", NULL, ";", NULL, false);
+
+ log_info("/* quoted semicolon */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "/bin/find \";\"",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/bin/find", NULL, ";", NULL, false);
+
+ log_info("/* quoted semicolon with following arg */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "/sbin/find \";\" /x",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1,
+ "/sbin/find", NULL, ";", "/x", false);
+
+ log_info("/* spaces in the filename */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "\"/PATH WITH SPACES/daemon\" -1 -2",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1,
+ "/PATH WITH SPACES/daemon", NULL, "-1", "-2", false);
+
+ log_info("/* spaces in the filename, no args */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "\"/PATH WITH SPACES/daemon -1 -2\"",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1,
+ "/PATH WITH SPACES/daemon -1 -2", NULL, NULL, NULL, false);
+
+ log_info("/* spaces in the filename, everything quoted */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "\"/PATH WITH SPACES/daemon\" \"-1\" '-2'",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1,
+ "/PATH WITH SPACES/daemon", NULL, "-1", "-2", false);
+
+ log_info("/* escaped spaces in the filename */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "\"/PATH\\sWITH\\sSPACES/daemon\" '-1 -2'",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1,
+ "/PATH WITH SPACES/daemon", NULL, "-1 -2", NULL, false);
+
+ log_info("/* escaped spaces in the filename (2) */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "\"/PATH\\x20WITH\\x20SPACES/daemon\" \"-1 -2\"",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1,
+ "/PATH WITH SPACES/daemon", NULL, "-1 -2", NULL, false);
+
+ for (ccc = "abfnrtv\\\'\"x"; *ccc; ccc++) {
+ /* \\x is an incomplete hexadecimal sequence, invalid because of the slash */
+ char path[] = "/path\\X";
+ path[sizeof(path) - 2] = *ccc;
+
+ log_info("/* invalid character: \\%c */", *ccc);
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, path,
+ &c, u);
+ assert_se(r == -ENOEXEC);
+ assert_se(c1->command_next == NULL);
+ }
+
+ log_info("/* valid character: \\s */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "/path\\s",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/path ", NULL, NULL, NULL, false);
+
+ log_info("/* quoted backslashes */");
+ r = config_parse_exec(NULL, "fake", 5, "section", 1,
+ "LValue", 0,
+ "/bin/grep '\\w+\\K'",
+ &c, u);
+ assert_se(r >= 0);
+ c1 = c1->command_next;
+ check_execcommand(c1, "/bin/grep", NULL, "\\w+\\K", NULL, false);
+
+ log_info("/* trailing backslash: \\ */");
+ /* backslash is invalid */
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "/path\\",
+ &c, u);
+ assert_se(r == -ENOEXEC);
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* missing ending ' */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "/path 'foo",
+ &c, u);
+ assert_se(r == -ENOEXEC);
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* missing ending ' with trailing backslash */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "/path 'foo\\",
+ &c, u);
+ assert_se(r == -ENOEXEC);
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* invalid space between modifiers */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "- /path",
+ &c, u);
+ assert_se(r == 0);
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* only modifiers, no path */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "-",
+ &c, u);
+ assert_se(r == 0);
+ assert_se(c1->command_next == NULL);
+
+ log_info("/* empty argument, reset */");
+ r = config_parse_exec(NULL, "fake", 4, "section", 1,
+ "LValue", 0, "",
+ &c, u);
+ assert_se(r == 0);
+ assert_se(c == NULL);
+
+ exec_command_free_list(c);
+}
+
+static void test_config_parse_log_extra_fields(void) {
+ /* int config_parse_log_extra_fields(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) */
+
+ int r;
+
+ _cleanup_(manager_freep) Manager *m = NULL;
+ _cleanup_(unit_freep) Unit *u = NULL;
+ ExecContext c = {};
+
+ r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_MINIMAL, &m);
+ if (manager_errno_skip_test(r)) {
+ log_notice_errno(r, "Skipping test: manager_new: %m");
+ return;
+ }
+
+ assert_se(r >= 0);
+ assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+ assert_se(u = unit_new(m, sizeof(Service)));
+
+ log_info("/* %s – basic test */", __func__);
+ r = config_parse_log_extra_fields(NULL, "fake", 1, "section", 1,
+ "LValue", 0, "FOO=BAR \"QOOF=quux ' ' \"",
+ &c, u);
+ assert_se(r >= 0);
+ assert_se(c.n_log_extra_fields == 2);
+ assert_se(strneq(c.log_extra_fields[0].iov_base, "FOO=BAR", c.log_extra_fields[0].iov_len));
+ assert_se(strneq(c.log_extra_fields[1].iov_base, "QOOF=quux ' ' ", c.log_extra_fields[1].iov_len));
+
+ log_info("/* %s – add some */", __func__);
+ r = config_parse_log_extra_fields(NULL, "fake", 1, "section", 1,
+ "LValue", 0, "FOO2=BAR2 QOOF2=quux ' '",
+ &c, u);
+ assert_se(r >= 0);
+ assert_se(c.n_log_extra_fields == 4);
+ assert_se(strneq(c.log_extra_fields[0].iov_base, "FOO=BAR", c.log_extra_fields[0].iov_len));
+ assert_se(strneq(c.log_extra_fields[1].iov_base, "QOOF=quux ' ' ", c.log_extra_fields[1].iov_len));
+ assert_se(strneq(c.log_extra_fields[2].iov_base, "FOO2=BAR2", c.log_extra_fields[2].iov_len));
+ assert_se(strneq(c.log_extra_fields[3].iov_base, "QOOF2=quux", c.log_extra_fields[3].iov_len));
+
+ exec_context_dump(&c, stdout, " --> ");
+
+ log_info("/* %s – reset */", __func__);
+ r = config_parse_log_extra_fields(NULL, "fake", 1, "section", 1,
+ "LValue", 0, "",
+ &c, u);
+ assert_se(r >= 0);
+ assert_se(c.n_log_extra_fields == 0);
+
+ exec_context_free_log_extra_fields(&c);
+
+ log_info("/* %s – bye */", __func__);
+}
+
+static void test_install_printf(void) {
+ char name[] = "name.service",
+ path[] = "/run/systemd/system/name.service";
+ UnitFileInstallInfo i = { .name = name, .path = path, };
+ UnitFileInstallInfo i2 = { .name= name, .path = path, };
+ char name3[] = "name@inst.service",
+ path3[] = "/run/systemd/system/name.service";
+ UnitFileInstallInfo i3 = { .name = name3, .path = path3, };
+ UnitFileInstallInfo i4 = { .name = name3, .path = path3, };
+
+ _cleanup_free_ char *mid = NULL, *bid = NULL, *host = NULL, *gid = NULL, *group = NULL, *uid = NULL, *user = NULL;
+
+ assert_se(specifier_machine_id('m', NULL, NULL, &mid) >= 0 && mid);
+ assert_se(specifier_boot_id('b', NULL, NULL, &bid) >= 0 && bid);
+ assert_se(host = gethostname_malloc());
+ assert_se(group = gid_to_name(getgid()));
+ assert_se(asprintf(&gid, UID_FMT, getgid()) >= 0);
+ assert_se(user = uid_to_name(getuid()));
+ assert_se(asprintf(&uid, UID_FMT, getuid()) >= 0);
+
+#define expect(src, pattern, result) \
+ do { \
+ _cleanup_free_ char *t = NULL; \
+ _cleanup_free_ char \
+ *d1 = strdup(i.name), \
+ *d2 = strdup(i.path); \
+ assert_se(install_full_printf(&src, pattern, &t) >= 0 || !result); \
+ memzero(i.name, strlen(i.name)); \
+ memzero(i.path, strlen(i.path)); \
+ assert_se(d1 && d2); \
+ if (result) { \
+ printf("%s\n", t); \
+ assert_se(streq(t, result)); \
+ } else assert_se(t == NULL); \
+ strcpy(i.name, d1); \
+ strcpy(i.path, d2); \
+ } while (false)
+
+ expect(i, "%n", "name.service");
+ expect(i, "%N", "name");
+ expect(i, "%p", "name");
+ expect(i, "%i", "");
+ expect(i, "%j", "name");
+ expect(i, "%g", group);
+ expect(i, "%G", gid);
+ expect(i, "%u", user);
+ expect(i, "%U", uid);
+
+ expect(i, "%m", mid);
+ expect(i, "%b", bid);
+ expect(i, "%H", host);
+
+ expect(i2, "%g", group);
+ expect(i2, "%G", gid);
+ expect(i2, "%u", user);
+ expect(i2, "%U", uid);
+
+ expect(i3, "%n", "name@inst.service");
+ expect(i3, "%N", "name@inst");
+ expect(i3, "%p", "name");
+ expect(i3, "%g", group);
+ expect(i3, "%G", gid);
+ expect(i3, "%u", user);
+ expect(i3, "%U", uid);
+
+ expect(i3, "%m", mid);
+ expect(i3, "%b", bid);
+ expect(i3, "%H", host);
+
+ expect(i4, "%g", group);
+ expect(i4, "%G", gid);
+ expect(i4, "%u", user);
+ expect(i4, "%U", uid);
+}
+
+static uint64_t make_cap(int cap) {
+ return ((uint64_t) 1ULL << (uint64_t) cap);
+}
+
+static void test_config_parse_capability_set(void) {
+ /* int config_parse_capability_set(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) */
+ int r;
+ uint64_t capability_bounding_set = 0;
+
+ r = config_parse_capability_set(NULL, "fake", 1, "section", 1,
+ "CapabilityBoundingSet", 0, "CAP_NET_RAW",
+ &capability_bounding_set, NULL);
+ assert_se(r >= 0);
+ assert_se(capability_bounding_set == make_cap(CAP_NET_RAW));
+
+ r = config_parse_capability_set(NULL, "fake", 1, "section", 1,
+ "CapabilityBoundingSet", 0, "CAP_NET_ADMIN",
+ &capability_bounding_set, NULL);
+ assert_se(r >= 0);
+ assert_se(capability_bounding_set == (make_cap(CAP_NET_RAW) | make_cap(CAP_NET_ADMIN)));
+
+ r = config_parse_capability_set(NULL, "fake", 1, "section", 1,
+ "CapabilityBoundingSet", 0, "~CAP_NET_ADMIN",
+ &capability_bounding_set, NULL);
+ assert_se(r >= 0);
+ assert_se(capability_bounding_set == make_cap(CAP_NET_RAW));
+
+ r = config_parse_capability_set(NULL, "fake", 1, "section", 1,
+ "CapabilityBoundingSet", 0, "",
+ &capability_bounding_set, NULL);
+ assert_se(r >= 0);
+ assert_se(capability_bounding_set == UINT64_C(0));
+
+ r = config_parse_capability_set(NULL, "fake", 1, "section", 1,
+ "CapabilityBoundingSet", 0, "~",
+ &capability_bounding_set, NULL);
+ assert_se(r >= 0);
+ assert_se(cap_test_all(capability_bounding_set));
+
+ capability_bounding_set = 0;
+ r = config_parse_capability_set(NULL, "fake", 1, "section", 1,
+ "CapabilityBoundingSet", 0, " 'CAP_NET_RAW' WAT_CAP??? CAP_NET_ADMIN CAP'_trailing_garbage",
+ &capability_bounding_set, NULL);
+ assert_se(r >= 0);
+ assert_se(capability_bounding_set == (make_cap(CAP_NET_RAW) | make_cap(CAP_NET_ADMIN)));
+}
+
+static void test_config_parse_rlimit(void) {
+ struct rlimit * rl[_RLIMIT_MAX] = {};
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitNOFILE", RLIMIT_NOFILE, "55", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_NOFILE]);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == 55);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == rl[RLIMIT_NOFILE]->rlim_max);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitNOFILE", RLIMIT_NOFILE, "55:66", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_NOFILE]);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == 55);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_max == 66);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitNOFILE", RLIMIT_NOFILE, "infinity", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_NOFILE]);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == RLIM_INFINITY);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == rl[RLIMIT_NOFILE]->rlim_max);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitNOFILE", RLIMIT_NOFILE, "infinity:infinity", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_NOFILE]);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == RLIM_INFINITY);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == rl[RLIMIT_NOFILE]->rlim_max);
+
+ rl[RLIMIT_NOFILE]->rlim_cur = 10;
+ rl[RLIMIT_NOFILE]->rlim_max = 20;
+
+ /* Invalid values don't change rl */
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitNOFILE", RLIMIT_NOFILE, "10:20:30", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_NOFILE]);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == 10);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_max == 20);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitNOFILE", RLIMIT_NOFILE, "wat:wat", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_NOFILE]);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == 10);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_max == 20);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitNOFILE", RLIMIT_NOFILE, "66:wat", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_NOFILE]);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == 10);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_max == 20);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitNOFILE", RLIMIT_NOFILE, "200:100", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_NOFILE]);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_cur == 10);
+ assert_se(rl[RLIMIT_NOFILE]->rlim_max == 20);
+
+ rl[RLIMIT_NOFILE] = mfree(rl[RLIMIT_NOFILE]);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitCPU", RLIMIT_CPU, "56", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_CPU]);
+ assert_se(rl[RLIMIT_CPU]->rlim_cur == 56);
+ assert_se(rl[RLIMIT_CPU]->rlim_cur == rl[RLIMIT_CPU]->rlim_max);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitCPU", RLIMIT_CPU, "57s", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_CPU]);
+ assert_se(rl[RLIMIT_CPU]->rlim_cur == 57);
+ assert_se(rl[RLIMIT_CPU]->rlim_cur == rl[RLIMIT_CPU]->rlim_max);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitCPU", RLIMIT_CPU, "40s:1m", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_CPU]);
+ assert_se(rl[RLIMIT_CPU]->rlim_cur == 40);
+ assert_se(rl[RLIMIT_CPU]->rlim_max == 60);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitCPU", RLIMIT_CPU, "infinity", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_CPU]);
+ assert_se(rl[RLIMIT_CPU]->rlim_cur == RLIM_INFINITY);
+ assert_se(rl[RLIMIT_CPU]->rlim_cur == rl[RLIMIT_CPU]->rlim_max);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitCPU", RLIMIT_CPU, "1234ms", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_CPU]);
+ assert_se(rl[RLIMIT_CPU]->rlim_cur == 2);
+ assert_se(rl[RLIMIT_CPU]->rlim_cur == rl[RLIMIT_CPU]->rlim_max);
+
+ rl[RLIMIT_CPU] = mfree(rl[RLIMIT_CPU]);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitRTTIME", RLIMIT_RTTIME, "58", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_RTTIME]);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == 58);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == rl[RLIMIT_RTTIME]->rlim_max);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitRTTIME", RLIMIT_RTTIME, "58:60", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_RTTIME]);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == 58);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_max == 60);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitRTTIME", RLIMIT_RTTIME, "59s", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_RTTIME]);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == 59 * USEC_PER_SEC);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == rl[RLIMIT_RTTIME]->rlim_max);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitRTTIME", RLIMIT_RTTIME, "59s:123s", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_RTTIME]);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == 59 * USEC_PER_SEC);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_max == 123 * USEC_PER_SEC);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitRTTIME", RLIMIT_RTTIME, "infinity", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_RTTIME]);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == RLIM_INFINITY);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == rl[RLIMIT_RTTIME]->rlim_max);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitRTTIME", RLIMIT_RTTIME, "infinity:infinity", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_RTTIME]);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == RLIM_INFINITY);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == rl[RLIMIT_RTTIME]->rlim_max);
+
+ assert_se(config_parse_rlimit(NULL, "fake", 1, "section", 1, "LimitRTTIME", RLIMIT_RTTIME, "2345ms", rl, NULL) >= 0);
+ assert_se(rl[RLIMIT_RTTIME]);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == 2345 * USEC_PER_MSEC);
+ assert_se(rl[RLIMIT_RTTIME]->rlim_cur == rl[RLIMIT_RTTIME]->rlim_max);
+
+ rl[RLIMIT_RTTIME] = mfree(rl[RLIMIT_RTTIME]);
+}
+
+static void test_config_parse_pass_environ(void) {
+ /* int config_parse_pass_environ(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) */
+ int r;
+ _cleanup_strv_free_ char **passenv = NULL;
+
+ r = config_parse_pass_environ(NULL, "fake", 1, "section", 1,
+ "PassEnvironment", 0, "A B",
+ &passenv, NULL);
+ assert_se(r >= 0);
+ assert_se(strv_length(passenv) == 2);
+ assert_se(streq(passenv[0], "A"));
+ assert_se(streq(passenv[1], "B"));
+
+ r = config_parse_pass_environ(NULL, "fake", 1, "section", 1,
+ "PassEnvironment", 0, "",
+ &passenv, NULL);
+ assert_se(r >= 0);
+ assert_se(strv_isempty(passenv));
+
+ r = config_parse_pass_environ(NULL, "fake", 1, "section", 1,
+ "PassEnvironment", 0, "'invalid name' 'normal_name' A=1 'special_name$$' \\",
+ &passenv, NULL);
+ assert_se(r >= 0);
+ assert_se(strv_length(passenv) == 1);
+ assert_se(streq(passenv[0], "normal_name"));
+}
+
+static void test_unit_dump_config_items(void) {
+ unit_dump_config_items(stdout);
+}
+
+static void test_config_parse_memory_limit(void) {
+ /* int config_parse_memory_limit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) */
+ CGroupContext c;
+ struct limit_test {
+ const char *limit;
+ const char *value;
+ uint64_t *result;
+ uint64_t expected;
+ } limit_tests[]= {
+ { "MemoryMin", "", &c.memory_min, CGROUP_LIMIT_MIN },
+ { "MemoryMin", "0", &c.memory_min, CGROUP_LIMIT_MIN },
+ { "MemoryMin", "10", &c.memory_min, 10 },
+ { "MemoryMin", "infinity", &c.memory_min, CGROUP_LIMIT_MAX },
+ { "MemoryLow", "", &c.memory_low, CGROUP_LIMIT_MIN },
+ { "MemoryLow", "0", &c.memory_low, CGROUP_LIMIT_MIN },
+ { "MemoryLow", "10", &c.memory_low, 10 },
+ { "MemoryLow", "infinity", &c.memory_low, CGROUP_LIMIT_MAX },
+ { "MemoryHigh", "", &c.memory_high, CGROUP_LIMIT_MAX },
+ { "MemoryHigh", "0", &c.memory_high, CGROUP_LIMIT_DUMMY },
+ { "MemoryHigh", "10", &c.memory_high, 10 },
+ { "MemoryHigh", "infinity", &c.memory_high, CGROUP_LIMIT_MAX },
+ { "MemoryMax", "", &c.memory_max, CGROUP_LIMIT_MAX },
+ { "MemoryMax", "0", &c.memory_max, CGROUP_LIMIT_DUMMY },
+ { "MemoryMax", "10", &c.memory_max, 10 },
+ { "MemoryMax", "infinity", &c.memory_max, CGROUP_LIMIT_MAX },
+ };
+ size_t i;
+ int r;
+
+ for (i = 0; i < ELEMENTSOF(limit_tests); i++) {
+ c.memory_min = CGROUP_LIMIT_DUMMY;
+ c.memory_low = CGROUP_LIMIT_DUMMY;
+ c.memory_high = CGROUP_LIMIT_DUMMY;
+ c.memory_max = CGROUP_LIMIT_DUMMY;
+ r = config_parse_memory_limit(NULL, "fake", 1, "section", 1,
+ limit_tests[i].limit, 1,
+ limit_tests[i].value, &c, NULL);
+ log_info("%s=%s\t%"PRIu64"==%"PRIu64"\n",
+ limit_tests[i].limit, limit_tests[i].value,
+ *limit_tests[i].result, limit_tests[i].expected);
+ assert_se(r >= 0);
+ assert_se(*limit_tests[i].result == limit_tests[i].expected);
+ }
+
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ r = enter_cgroup_subroot(NULL);
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+
+ r = test_unit_file_get_set();
+ test_config_parse_exec();
+ test_config_parse_log_extra_fields();
+ test_config_parse_capability_set();
+ test_config_parse_rlimit();
+ test_config_parse_pass_environ();
+ TEST_REQ_RUNNING_SYSTEMD(test_install_printf());
+ test_unit_dump_config_items();
+ test_config_parse_memory_limit();
+
+ return r;
+}
diff --git a/src/test/test-local-addresses.c b/src/test/test-local-addresses.c
new file mode 100644
index 0000000..7eeddd2
--- /dev/null
+++ b/src/test/test-local-addresses.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "in-addr-util.h"
+#include "local-addresses.h"
+#include "tests.h"
+
+static void print_local_addresses(struct local_address *a, unsigned n) {
+ unsigned i;
+
+ for (i = 0; i < n; i++) {
+ _cleanup_free_ char *b = NULL;
+
+ assert_se(in_addr_to_string(a[i].family, &a[i].address, &b) >= 0);
+ printf("%s if%i scope=%i metric=%u address=%s\n", af_to_name(a[i].family), a[i].ifindex, a[i].scope, a[i].metric, b);
+ }
+}
+
+int main(int argc, char *argv[]) {
+ struct local_address *a;
+ int n;
+
+ test_setup_logging(LOG_DEBUG);
+
+ a = NULL;
+ n = local_addresses(NULL, 0, AF_UNSPEC, &a);
+ assert_se(n >= 0);
+
+ printf("Local Addresses:\n");
+ print_local_addresses(a, (unsigned) n);
+ a = mfree(a);
+
+ n = local_gateways(NULL, 0, AF_UNSPEC, &a);
+ assert_se(n >= 0);
+
+ printf("Local Gateways:\n");
+ print_local_addresses(a, (unsigned) n);
+ free(a);
+
+ return 0;
+}
diff --git a/src/test/test-locale-util.c b/src/test/test-locale-util.c
new file mode 100644
index 0000000..62f8220
--- /dev/null
+++ b/src/test/test-locale-util.c
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "kbd-util.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "strv.h"
+#include "util.h"
+
+static void test_get_locales(void) {
+ _cleanup_strv_free_ char **locales = NULL;
+ char **p;
+ int r;
+
+ r = get_locales(&locales);
+ assert_se(r >= 0);
+ assert_se(locales);
+
+ STRV_FOREACH(p, locales) {
+ puts(*p);
+ assert_se(locale_is_valid(*p));
+ }
+}
+
+static void test_locale_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(locale_is_valid("en_EN.utf8"));
+ assert_se(locale_is_valid("fr_FR.utf8"));
+ assert_se(locale_is_valid("fr_FR@euro"));
+ assert_se(locale_is_valid("fi_FI"));
+ assert_se(locale_is_valid("POSIX"));
+ assert_se(locale_is_valid("C"));
+
+ assert_se(!locale_is_valid(""));
+ assert_se(!locale_is_valid("/usr/bin/foo"));
+ assert_se(!locale_is_valid("\x01gar\x02 bage\x03"));
+}
+
+static void test_locale_is_installed(void) {
+ log_info("/* %s */", __func__);
+
+ /* Always available */
+ assert_se(locale_is_installed("POSIX") > 0);
+ assert_se(locale_is_installed("C") > 0);
+
+ /* Might, or might not be installed. */
+ assert_se(locale_is_installed("en_EN.utf8") >= 0);
+ assert_se(locale_is_installed("fr_FR.utf8") >= 0);
+ assert_se(locale_is_installed("fr_FR@euro") >= 0);
+ assert_se(locale_is_installed("fi_FI") >= 0);
+
+ /* Definitely not valid */
+ assert_se(locale_is_installed("") == 0);
+ assert_se(locale_is_installed("/usr/bin/foo") == 0);
+ assert_se(locale_is_installed("\x01gar\x02 bage\x03") == 0);
+
+ /* Definitely not installed */
+ assert_se(locale_is_installed("zz_ZZ") == 0);
+}
+
+static void test_keymaps(void) {
+ _cleanup_strv_free_ char **kmaps = NULL;
+ char **p;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(!keymap_is_valid(""));
+ assert_se(!keymap_is_valid("/usr/bin/foo"));
+ assert_se(!keymap_is_valid("\x01gar\x02 bage\x03"));
+
+ r = get_keymaps(&kmaps);
+ if (r == -ENOENT)
+ return; /* skip test if no keymaps are installed */
+
+ assert_se(r >= 0);
+ assert_se(kmaps);
+
+ STRV_FOREACH(p, kmaps) {
+ puts(*p);
+ assert_se(keymap_is_valid(*p));
+ }
+
+ assert_se(keymap_is_valid("uk"));
+ assert_se(keymap_is_valid("de-nodeadkeys"));
+ assert_se(keymap_is_valid("ANSI-dvorak"));
+ assert_se(keymap_is_valid("unicode"));
+}
+
+#define dump_glyph(x) log_info(STRINGIFY(x) ": %s", special_glyph(x))
+static void dump_special_glyphs(void) {
+ assert_cc(SPECIAL_GLYPH_TOUCH + 1 == _SPECIAL_GLYPH_MAX);
+
+ log_info("/* %s */", __func__);
+
+ log_info("is_locale_utf8: %s", yes_no(is_locale_utf8()));
+
+ dump_glyph(SPECIAL_GLYPH_TREE_VERTICAL);
+ dump_glyph(SPECIAL_GLYPH_TREE_BRANCH);
+ dump_glyph(SPECIAL_GLYPH_TREE_RIGHT);
+ dump_glyph(SPECIAL_GLYPH_TREE_SPACE);
+ dump_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET);
+ dump_glyph(SPECIAL_GLYPH_BLACK_CIRCLE);
+ dump_glyph(SPECIAL_GLYPH_BULLET);
+ dump_glyph(SPECIAL_GLYPH_ARROW);
+ dump_glyph(SPECIAL_GLYPH_ELLIPSIS);
+ dump_glyph(SPECIAL_GLYPH_MU);
+ dump_glyph(SPECIAL_GLYPH_CHECK_MARK);
+ dump_glyph(SPECIAL_GLYPH_CROSS_MARK);
+ dump_glyph(SPECIAL_GLYPH_EXTERNAL_LINK);
+ dump_glyph(SPECIAL_GLYPH_ECSTATIC_SMILEY);
+ dump_glyph(SPECIAL_GLYPH_HAPPY_SMILEY);
+ dump_glyph(SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY);
+ dump_glyph(SPECIAL_GLYPH_NEUTRAL_SMILEY);
+ dump_glyph(SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY);
+ dump_glyph(SPECIAL_GLYPH_UNHAPPY_SMILEY);
+ dump_glyph(SPECIAL_GLYPH_DEPRESSED_SMILEY);
+ dump_glyph(SPECIAL_GLYPH_LOCK_AND_KEY);
+ dump_glyph(SPECIAL_GLYPH_TOUCH);
+}
+
+int main(int argc, char *argv[]) {
+ test_get_locales();
+ test_locale_is_valid();
+ test_locale_is_installed();
+ test_keymaps();
+
+ dump_special_glyphs();
+
+ return 0;
+}
diff --git a/src/test/test-log.c b/src/test/test-log.c
new file mode 100644
index 0000000..a2a5373
--- /dev/null
+++ b/src/test/test-log.c
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stddef.h>
+#include <unistd.h>
+
+#include "format-util.h"
+#include "log.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "util.h"
+
+assert_cc(LOG_REALM_REMOVE_LEVEL(LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, LOG_FTP | LOG_DEBUG))
+ == LOG_REALM_SYSTEMD);
+assert_cc(LOG_REALM_REMOVE_LEVEL(LOG_REALM_PLUS_LEVEL(LOG_REALM_UDEV, LOG_LOCAL7 | LOG_DEBUG))
+ == LOG_REALM_UDEV);
+assert_cc((LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, LOG_LOCAL3 | LOG_DEBUG) & LOG_FACMASK)
+ == LOG_LOCAL3);
+assert_cc((LOG_REALM_PLUS_LEVEL(LOG_REALM_UDEV, LOG_USER | LOG_INFO) & LOG_PRIMASK)
+ == LOG_INFO);
+
+assert_cc(IS_SYNTHETIC_ERRNO(SYNTHETIC_ERRNO(EINVAL)));
+assert_cc(!IS_SYNTHETIC_ERRNO(EINVAL));
+assert_cc(IS_SYNTHETIC_ERRNO(SYNTHETIC_ERRNO(0)));
+assert_cc(!IS_SYNTHETIC_ERRNO(0));
+
+#define X10(x) x x x x x x x x x x
+#define X100(x) X10(X10(x))
+#define X1000(x) X100(X10(x))
+
+static void test_file(void) {
+ log_info("__FILE__: %s", __FILE__);
+ log_info("RELATIVE_SOURCE_PATH: %s", RELATIVE_SOURCE_PATH);
+ log_info("PROJECT_FILE: %s", PROJECT_FILE);
+
+ assert(startswith(__FILE__, RELATIVE_SOURCE_PATH "/"));
+}
+
+static void test_log_struct(void) {
+ log_struct(LOG_INFO,
+ "MESSAGE=Waldo PID="PID_FMT" (no errno)", getpid_cached(),
+ "SERVICE=piepapo");
+
+ log_struct_errno(LOG_INFO, EILSEQ,
+ "MESSAGE=Waldo PID="PID_FMT": %m (normal)", getpid_cached(),
+ "SERVICE=piepapo");
+
+ log_struct_errno(LOG_INFO, SYNTHETIC_ERRNO(EILSEQ),
+ "MESSAGE=Waldo PID="PID_FMT": %m (synthetic)", getpid_cached(),
+ "SERVICE=piepapo");
+
+ log_struct(LOG_INFO,
+ "MESSAGE=Foobar PID="PID_FMT, getpid_cached(),
+ "FORMAT_STR_TEST=1=%i A=%c 2=%hi 3=%li 4=%lli 1=%p foo=%s 2.5=%g 3.5=%g 4.5=%Lg",
+ (int) 1, 'A', (short) 2, (long int) 3, (long long int) 4, (void*) 1, "foo", (float) 2.5f, (double) 3.5, (long double) 4.5,
+ "SUFFIX=GOT IT");
+}
+
+static void test_long_lines(void) {
+ log_object_internal(LOG_NOTICE,
+ EUCLEAN,
+ X1000("abcd_") ".txt",
+ 1000000,
+ X1000("fff") "unc",
+ "OBJECT=",
+ X1000("obj_") "ect",
+ "EXTRA=",
+ X1000("ext_") "tra",
+ "asdfasdf %s asdfasdfa", "foobar");
+}
+
+static void test_log_syntax(void) {
+ assert_se(log_syntax("unit", LOG_ERR, "filename", 10, EINVAL, "EINVAL: %s: %m", "hogehoge") == -EINVAL);
+ assert_se(log_syntax("unit", LOG_ERR, "filename", 10, -ENOENT, "ENOENT: %s: %m", "hogehoge") == -ENOENT);
+ assert_se(log_syntax("unit", LOG_ERR, "filename", 10, SYNTHETIC_ERRNO(ENOTTY), "ENOTTY: %s: %m", "hogehoge") == -ENOTTY);
+}
+
+int main(int argc, char* argv[]) {
+ int target;
+
+ test_file();
+
+ for (target = 0; target < _LOG_TARGET_MAX; target++) {
+ log_set_target(target);
+ log_open();
+
+ test_log_struct();
+ test_long_lines();
+ test_log_syntax();
+ }
+
+ assert_se(log_info_errno(SYNTHETIC_ERRNO(EUCLEAN), "foo") == -EUCLEAN);
+
+ return 0;
+}
diff --git a/src/test/test-loop-block.c b/src/test/test-loop-block.c
new file mode 100644
index 0000000..298ded9
--- /dev/null
+++ b/src/test/test-loop-block.c
@@ -0,0 +1,250 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <linux/loop.h>
+#include <pthread.h>
+
+#include "alloc-util.h"
+#include "dissect-image.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "gpt.h"
+#include "missing_loop.h"
+#include "mkfs-util.h"
+#include "mount-util.h"
+#include "namespace-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "virt.h"
+
+#define N_THREADS 5
+#define N_ITERATIONS 3
+
+static usec_t end = 0;
+
+static void* thread_func(void *ptr) {
+ int fd = PTR_TO_FD(ptr);
+ int r;
+
+ for (unsigned i = 0; i < N_ITERATIONS; i++) {
+ _cleanup_(loop_device_unrefp) LoopDevice *loop = NULL;
+ _cleanup_(umount_and_rmdir_and_freep) char *mounted = NULL;
+ _cleanup_(dissected_image_unrefp) DissectedImage *dissected = NULL;
+
+ if (now(CLOCK_MONOTONIC) >= end) {
+ log_notice("Time's up, exiting thread's loop");
+ break;
+ }
+
+ log_notice("> Thread iteration #%u.", i);
+
+ assert_se(mkdtemp_malloc(NULL, &mounted) >= 0);
+
+ r = loop_device_make(fd, O_RDONLY, 0, UINT64_MAX, LO_FLAGS_PARTSCAN, &loop);
+ if (r < 0)
+ log_error_errno(r, "Failed to allocate loopback device: %m");
+ assert_se(r >= 0);
+
+ log_notice("Acquired loop device %s, will mount on %s", loop->node, mounted);
+
+ r = dissect_image(loop->fd, NULL, NULL, DISSECT_IMAGE_READ_ONLY, &dissected);
+ if (r < 0)
+ log_error_errno(r, "Failed dissect loopback device %s: %m", loop->node);
+ assert_se(r >= 0);
+
+ log_info("Dissected loop device %s", loop->node);
+
+ for (PartitionDesignator d = 0; d < _PARTITION_DESIGNATOR_MAX; d++) {
+ if (!dissected->partitions[d].found)
+ continue;
+
+ log_notice("Found node %s fstype %s designator %s",
+ dissected->partitions[d].node,
+ dissected->partitions[d].fstype,
+ partition_designator_to_string(d));
+ }
+
+ assert_se(dissected->partitions[PARTITION_ESP].found);
+ assert_se(dissected->partitions[PARTITION_ESP].node);
+ assert_se(dissected->partitions[PARTITION_XBOOTLDR].found);
+ assert_se(dissected->partitions[PARTITION_XBOOTLDR].node);
+ assert_se(dissected->partitions[PARTITION_ROOT].found);
+ assert_se(dissected->partitions[PARTITION_ROOT].node);
+ assert_se(dissected->partitions[PARTITION_HOME].found);
+ assert_se(dissected->partitions[PARTITION_HOME].node);
+
+ r = dissected_image_mount(dissected, mounted, UID_INVALID, DISSECT_IMAGE_READ_ONLY);
+ log_notice_errno(r, "Mounted %s → %s: %m", loop->node, mounted);
+ assert_se(r >= 0);
+
+ log_notice("Unmounting %s", mounted);
+ mounted = umount_and_rmdir_and_free(mounted);
+
+ log_notice("Unmounted.");
+
+ dissected = dissected_image_unref(dissected);
+
+ log_notice("Detaching loop device %s", loop->node);
+ loop = loop_device_unref(loop);
+ log_notice("Detached loop device.");
+ }
+
+ log_notice("Leaving thread");
+
+ return NULL;
+}
+
+static bool have_root_gpt_type(void) {
+#ifdef GPT_ROOT_NATIVE
+ return true;
+#else
+ return false;
+#endif
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_free_ char *p = NULL, *cmd = NULL;
+ _cleanup_(pclosep) FILE *sfdisk = NULL;
+ _cleanup_(loop_device_unrefp) LoopDevice *loop = NULL;
+ _cleanup_close_ int fd = -1;
+ _cleanup_(dissected_image_unrefp) DissectedImage *dissected = NULL;
+ _cleanup_(umount_and_rmdir_and_freep) char *mounted = NULL;
+ pthread_t threads[N_THREADS];
+ const char *fs;
+ sd_id128_t id;
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+ log_show_tid(true);
+ log_show_time(true);
+
+ if (!have_root_gpt_type()) {
+ log_tests_skipped("No root partition GPT defined for this architecture, exiting.");
+ return EXIT_TEST_SKIP;
+ }
+
+ if (detect_container() > 0) {
+ log_tests_skipped("Test not supported in a container, requires udev/uevent notifications.");
+ return EXIT_TEST_SKIP;
+ }
+
+ if (strstr_ptr(ci_environment(), "autopkgtest")) {
+ // FIXME: we should reenable this one day
+ log_tests_skipped("Skipping test on Ubuntu autopkgtest CI, test too slow and installed udev too flakey.");
+ return EXIT_TEST_SKIP;
+ }
+
+ /* This is a test for the loopback block device setup code and it's use by the image dissection
+ * logic: since the kernel APIs are hard use and prone to races, let's test this in a heavy duty
+ * test: we open a bunch of threads and repeatedly allocate and deallocate loopback block devices in
+ * them in parallel, with an image file with a number of partitions. */
+
+ r = detach_mount_namespace();
+ if (ERRNO_IS_PRIVILEGE(r)) {
+ log_tests_skipped("Lacking privileges");
+ return EXIT_TEST_SKIP;
+ }
+
+ FOREACH_STRING(fs, "vfat", "ext4") {
+ r = mkfs_exists(fs);
+ assert_se(r >= 0);
+ if (!r) {
+ log_tests_skipped("mkfs.{vfat|ext4} not installed");
+ return EXIT_TEST_SKIP;
+ }
+ }
+
+ assert_se(r >= 0);
+
+ assert_se(tempfn_random_child("/var/tmp", "sfdisk", &p) >= 0);
+ fd = open(p, O_CREAT|O_EXCL|O_RDWR|O_CLOEXEC|O_NOFOLLOW, 0666);
+ assert_se(fd >= 0);
+ assert_se(ftruncate(fd, 256*1024*1024) >= 0);
+
+ assert_se(cmd = strjoin("sfdisk ", p));
+ assert_se(sfdisk = popen(cmd, "we"));
+
+ /* A reasonably complex partition table that fits on a 64K disk */
+ fputs("label: gpt\n"
+ "size=32M, type=C12A7328-F81F-11D2-BA4B-00A0C93EC93B\n"
+ "size=32M, type=BC13C2FF-59E6-4262-A352-B275FD6F7172\n"
+ "size=32M, type=0657FD6D-A4AB-43C4-84E5-0933C84B4F4F\n"
+ "size=32M, type=", sfdisk);
+
+#ifdef GPT_ROOT_NATIVE
+ fprintf(sfdisk, SD_ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(GPT_ROOT_NATIVE));
+#else
+ fprintf(sfdisk, SD_ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(GPT_ROOT_X86_64));
+#endif
+
+ fputs("\n"
+ "size=32M, type=933AC7E1-2EB4-4F13-B844-0E14E2AEF915\n", sfdisk);
+
+ assert_se(pclose(sfdisk) == 0);
+ sfdisk = NULL;
+
+ assert_se(loop_device_make(fd, O_RDWR, 0, UINT64_MAX, LO_FLAGS_PARTSCAN, &loop) >= 0);
+ assert_se(dissect_image(loop->fd, NULL, NULL, 0, &dissected) >= 0);
+
+ assert_se(dissected->partitions[PARTITION_ESP].found);
+ assert_se(dissected->partitions[PARTITION_ESP].node);
+ assert_se(dissected->partitions[PARTITION_XBOOTLDR].found);
+ assert_se(dissected->partitions[PARTITION_XBOOTLDR].node);
+ assert_se(dissected->partitions[PARTITION_ROOT].found);
+ assert_se(dissected->partitions[PARTITION_ROOT].node);
+ assert_se(dissected->partitions[PARTITION_HOME].found);
+ assert_se(dissected->partitions[PARTITION_HOME].node);
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+ assert_se(make_filesystem(dissected->partitions[PARTITION_ESP].node, "vfat", "EFI", id, true) >= 0);
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+ assert_se(make_filesystem(dissected->partitions[PARTITION_XBOOTLDR].node, "vfat", "xbootldr", id, true) >= 0);
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+ assert_se(make_filesystem(dissected->partitions[PARTITION_ROOT].node, "ext4", "root", id, true) >= 0);
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+ assert_se(make_filesystem(dissected->partitions[PARTITION_HOME].node, "ext4", "home", id, true) >= 0);
+
+ dissected = dissected_image_unref(dissected);
+ assert_se(dissect_image(loop->fd, NULL, NULL, 0, &dissected) >= 0);
+
+ assert_se(mkdtemp_malloc(NULL, &mounted) >= 0);
+
+ /* This first (writable) mount will initialize the mount point dirs, so that the subsequent read-only ones can work */
+ assert_se(dissected_image_mount(dissected, mounted, UID_INVALID, 0) >= 0);
+
+ assert_se(umount_recursive(mounted, 0) >= 0);
+ loop = loop_device_unref(loop);
+
+ log_notice("Threads are being started now");
+
+ /* Let's make sure we run for 10s on slow systems at max */
+ end = usec_add(now(CLOCK_MONOTONIC),
+ slow_tests_enabled() ? 5 * USEC_PER_SEC :
+ 1 * USEC_PER_SEC);
+
+ for (unsigned i = 0; i < N_THREADS; i++)
+ assert_se(pthread_create(threads + i, NULL, thread_func, FD_TO_PTR(fd)) == 0);
+
+ log_notice("All threads started now.");
+
+ for (unsigned i = 0; i < N_THREADS; i++) {
+ log_notice("Joining thread #%u.", i);
+
+ void *k;
+ assert_se(pthread_join(threads[i], &k) == 0);
+ assert_se(k == NULL);
+
+ log_notice("Joined thread #%u.", i);
+ }
+
+ log_notice("Threads are all terminated now.");
+
+ return 0;
+}
diff --git a/src/test/test-loopback.c b/src/test/test-loopback.c
new file mode 100644
index 0000000..58d8c29
--- /dev/null
+++ b/src/test/test-loopback.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "log.h"
+#include "loopback-setup.h"
+#include "tests.h"
+
+int main(int argc, char* argv[]) {
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ r = loopback_setup();
+ if (r < 0)
+ log_error_errno(r, "loopback: %m");
+
+ return r >= 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/src/test/test-mount-util.c b/src/test/test-mount-util.c
new file mode 100644
index 0000000..41df558
--- /dev/null
+++ b/src/test/test-mount-util.c
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/mount.h>
+
+#include "alloc-util.h"
+#include "mount-util.h"
+#include "string-util.h"
+#include "tests.h"
+
+static void test_mount_option_mangle(void) {
+ char *opts = NULL;
+ unsigned long f;
+
+ assert_se(mount_option_mangle(NULL, MS_RDONLY|MS_NOSUID, &f, &opts) == 0);
+ assert_se(f == (MS_RDONLY|MS_NOSUID));
+ assert_se(opts == NULL);
+
+ assert_se(mount_option_mangle("", MS_RDONLY|MS_NOSUID, &f, &opts) == 0);
+ assert_se(f == (MS_RDONLY|MS_NOSUID));
+ assert_se(opts == NULL);
+
+ assert_se(mount_option_mangle("ro,nosuid,nodev,noexec", 0, &f, &opts) == 0);
+ assert_se(f == (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC));
+ assert_se(opts == NULL);
+
+ assert_se(mount_option_mangle("ro,nosuid,nodev,noexec,mode=755", 0, &f, &opts) == 0);
+ assert_se(f == (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC));
+ assert_se(streq(opts, "mode=755"));
+ opts = mfree(opts);
+
+ assert_se(mount_option_mangle("rw,nosuid,foo,hogehoge,nodev,mode=755", 0, &f, &opts) == 0);
+ assert_se(f == (MS_NOSUID|MS_NODEV));
+ assert_se(streq(opts, "foo,hogehoge,mode=755"));
+ opts = mfree(opts);
+
+ assert_se(mount_option_mangle("rw,nosuid,nodev,noexec,relatime,net_cls,net_prio", MS_RDONLY, &f, &opts) == 0);
+ assert_se(f == (MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME));
+ assert_se(streq(opts, "net_cls,net_prio"));
+ opts = mfree(opts);
+
+ assert_se(mount_option_mangle("rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000", MS_RDONLY, &f, &opts) == 0);
+ assert_se(f == (MS_NOSUID|MS_NODEV|MS_RELATIME));
+ assert_se(streq(opts, "size=1630748k,mode=700,uid=1000,gid=1000"));
+ opts = mfree(opts);
+
+ assert_se(mount_option_mangle("size=1630748k,rw,gid=1000,,,nodev,relatime,,mode=700,nosuid,uid=1000", MS_RDONLY, &f, &opts) == 0);
+ assert_se(f == (MS_NOSUID|MS_NODEV|MS_RELATIME));
+ assert_se(streq(opts, "size=1630748k,gid=1000,mode=700,uid=1000"));
+ opts = mfree(opts);
+
+ assert_se(mount_option_mangle("rw,exec,size=8143984k,nr_inodes=2035996,mode=755", MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, &f, &opts) == 0);
+ assert_se(f == (MS_NOSUID|MS_NODEV));
+ assert_se(streq(opts, "size=8143984k,nr_inodes=2035996,mode=755"));
+ opts = mfree(opts);
+
+ assert_se(mount_option_mangle("rw,relatime,fmask=0022,,,dmask=0022", MS_RDONLY, &f, &opts) == 0);
+ assert_se(f == MS_RELATIME);
+ assert_se(streq(opts, "fmask=0022,dmask=0022"));
+ opts = mfree(opts);
+
+ assert_se(mount_option_mangle("rw,relatime,fmask=0022,dmask=0022,\"hogehoge", MS_RDONLY, &f, &opts) < 0);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_mount_option_mangle();
+
+ return 0;
+}
diff --git a/src/test/test-mountpoint-util.c b/src/test/test-mountpoint-util.c
new file mode 100644
index 0000000..47fde5c
--- /dev/null
+++ b/src/test/test-mountpoint-util.c
@@ -0,0 +1,316 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sched.h>
+#include <sys/mount.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hashmap.h"
+#include "log.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "tests.h"
+
+static void test_mount_propagation_flags(const char *name, int ret, unsigned long expected) {
+ long unsigned flags;
+
+ log_info("/* %s(%s) */", __func__, name);
+
+ assert_se(mount_propagation_flags_from_string(name, &flags) == ret);
+
+ if (ret >= 0) {
+ const char *c;
+
+ assert_se(flags == expected);
+
+ c = mount_propagation_flags_to_string(flags);
+ if (isempty(name))
+ assert_se(isempty(c));
+ else
+ assert_se(streq(c, name));
+ }
+}
+
+static void test_mnt_id(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_hashmap_free_free_ Hashmap *h = NULL;
+ char *p;
+ void *k;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(f = fopen("/proc/self/mountinfo", "re"));
+ assert_se(h = hashmap_new(&trivial_hash_ops));
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL, *path = NULL;
+ int mnt_id;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r == 0)
+ break;
+ assert_se(r > 0);
+
+ assert_se(sscanf(line, "%i %*s %*s %*s %ms", &mnt_id, &path) == 2);
+#if HAS_FEATURE_MEMORY_SANITIZER
+ /* We don't know the length of the string, so we need to unpoison it one char at a time */
+ for (const char *c = path; ;c++) {
+ msan_unpoison(c, 1);
+ if (!*c)
+ break;
+ }
+#endif
+ log_debug("mountinfo: %s → %i", path, mnt_id);
+
+ assert_se(hashmap_put(h, INT_TO_PTR(mnt_id), path) >= 0);
+ path = NULL;
+ }
+
+ HASHMAP_FOREACH_KEY(p, k, h) {
+ int mnt_id = PTR_TO_INT(k), mnt_id2;
+
+ r = path_get_mnt_id(p, &mnt_id2);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to get the mnt id of %s: %m\n", p);
+ continue;
+ }
+
+ if (mnt_id == mnt_id2) {
+ log_debug("mnt ids of %s is %i\n", p, mnt_id);
+ continue;
+ } else
+ log_debug("mnt ids of %s are %i, %i\n", p, mnt_id, mnt_id2);
+
+ /* The ids don't match? If so, then there are two mounts on the same path, let's check if
+ * that's really the case */
+ char *t = hashmap_get(h, INT_TO_PTR(mnt_id2));
+ log_debug("the other path for mnt id %i is %s\n", mnt_id2, t);
+ assert_se(path_equal(p, t));
+ }
+}
+
+static void test_path_is_mount_point(void) {
+ int fd;
+ char tmp_dir[] = "/tmp/test-path-is-mount-point-XXXXXX";
+ _cleanup_free_ char *file1 = NULL, *file2 = NULL, *link1 = NULL, *link2 = NULL;
+ _cleanup_free_ char *dir1 = NULL, *dir1file = NULL, *dirlink1 = NULL, *dirlink1file = NULL;
+ _cleanup_free_ char *dir2 = NULL, *dir2file = NULL;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(path_is_mount_point("/", NULL, AT_SYMLINK_FOLLOW) > 0);
+ assert_se(path_is_mount_point("/", NULL, 0) > 0);
+ assert_se(path_is_mount_point("//", NULL, AT_SYMLINK_FOLLOW) > 0);
+ assert_se(path_is_mount_point("//", NULL, 0) > 0);
+
+ assert_se(path_is_mount_point("/proc", NULL, AT_SYMLINK_FOLLOW) > 0);
+ assert_se(path_is_mount_point("/proc", NULL, 0) > 0);
+ assert_se(path_is_mount_point("/proc/", NULL, AT_SYMLINK_FOLLOW) > 0);
+ assert_se(path_is_mount_point("/proc/", NULL, 0) > 0);
+
+ assert_se(path_is_mount_point("/proc/1", NULL, AT_SYMLINK_FOLLOW) == 0);
+ assert_se(path_is_mount_point("/proc/1", NULL, 0) == 0);
+ assert_se(path_is_mount_point("/proc/1/", NULL, AT_SYMLINK_FOLLOW) == 0);
+ assert_se(path_is_mount_point("/proc/1/", NULL, 0) == 0);
+
+ assert_se(path_is_mount_point("/sys", NULL, AT_SYMLINK_FOLLOW) > 0);
+ assert_se(path_is_mount_point("/sys", NULL, 0) > 0);
+ assert_se(path_is_mount_point("/sys/", NULL, AT_SYMLINK_FOLLOW) > 0);
+ assert_se(path_is_mount_point("/sys/", NULL, 0) > 0);
+
+ /* we'll create a hierarchy of different kinds of dir/file/link
+ * layouts:
+ *
+ * <tmp>/file1, <tmp>/file2
+ * <tmp>/link1 -> file1, <tmp>/link2 -> file2
+ * <tmp>/dir1/
+ * <tmp>/dir1/file
+ * <tmp>/dirlink1 -> dir1
+ * <tmp>/dirlink1file -> dirlink1/file
+ * <tmp>/dir2/
+ * <tmp>/dir2/file
+ */
+
+ /* file mountpoints */
+ assert_se(mkdtemp(tmp_dir) != NULL);
+ file1 = path_join(tmp_dir, "file1");
+ assert_se(file1);
+ file2 = path_join(tmp_dir, "file2");
+ assert_se(file2);
+ fd = open(file1, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0664);
+ assert_se(fd > 0);
+ close(fd);
+ fd = open(file2, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0664);
+ assert_se(fd > 0);
+ close(fd);
+ link1 = path_join(tmp_dir, "link1");
+ assert_se(link1);
+ assert_se(symlink("file1", link1) == 0);
+ link2 = path_join(tmp_dir, "link2");
+ assert_se(link1);
+ assert_se(symlink("file2", link2) == 0);
+
+ assert_se(path_is_mount_point(file1, NULL, AT_SYMLINK_FOLLOW) == 0);
+ assert_se(path_is_mount_point(file1, NULL, 0) == 0);
+ assert_se(path_is_mount_point(link1, NULL, AT_SYMLINK_FOLLOW) == 0);
+ assert_se(path_is_mount_point(link1, NULL, 0) == 0);
+
+ /* directory mountpoints */
+ dir1 = path_join(tmp_dir, "dir1");
+ assert_se(dir1);
+ assert_se(mkdir(dir1, 0755) == 0);
+ dirlink1 = path_join(tmp_dir, "dirlink1");
+ assert_se(dirlink1);
+ assert_se(symlink("dir1", dirlink1) == 0);
+ dirlink1file = path_join(tmp_dir, "dirlink1file");
+ assert_se(dirlink1file);
+ assert_se(symlink("dirlink1/file", dirlink1file) == 0);
+ dir2 = path_join(tmp_dir, "dir2");
+ assert_se(dir2);
+ assert_se(mkdir(dir2, 0755) == 0);
+
+ assert_se(path_is_mount_point(dir1, NULL, AT_SYMLINK_FOLLOW) == 0);
+ assert_se(path_is_mount_point(dir1, NULL, 0) == 0);
+ assert_se(path_is_mount_point(dirlink1, NULL, AT_SYMLINK_FOLLOW) == 0);
+ assert_se(path_is_mount_point(dirlink1, NULL, 0) == 0);
+
+ /* file in subdirectory mountpoints */
+ dir1file = path_join(dir1, "file");
+ assert_se(dir1file);
+ fd = open(dir1file, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0664);
+ assert_se(fd > 0);
+ close(fd);
+
+ assert_se(path_is_mount_point(dir1file, NULL, AT_SYMLINK_FOLLOW) == 0);
+ assert_se(path_is_mount_point(dir1file, NULL, 0) == 0);
+ assert_se(path_is_mount_point(dirlink1file, NULL, AT_SYMLINK_FOLLOW) == 0);
+ assert_se(path_is_mount_point(dirlink1file, NULL, 0) == 0);
+
+ /* these tests will only work as root */
+ if (mount(file1, file2, NULL, MS_BIND, NULL) >= 0) {
+ int rf, rt, rdf, rdt, rlf, rlt, rl1f, rl1t;
+ const char *file2d;
+
+ /* files */
+ /* capture results in vars, to avoid dangling mounts on failure */
+ log_info("%s: %s", __func__, file2);
+ rf = path_is_mount_point(file2, NULL, 0);
+ rt = path_is_mount_point(file2, NULL, AT_SYMLINK_FOLLOW);
+
+ file2d = strjoina(file2, "/");
+ log_info("%s: %s", __func__, file2d);
+ rdf = path_is_mount_point(file2d, NULL, 0);
+ rdt = path_is_mount_point(file2d, NULL, AT_SYMLINK_FOLLOW);
+
+ log_info("%s: %s", __func__, link2);
+ rlf = path_is_mount_point(link2, NULL, 0);
+ rlt = path_is_mount_point(link2, NULL, AT_SYMLINK_FOLLOW);
+
+ assert_se(umount(file2) == 0);
+
+ assert_se(rf == 1);
+ assert_se(rt == 1);
+ assert_se(rdf == -ENOTDIR);
+ assert_se(rdt == -ENOTDIR);
+ assert_se(rlf == 0);
+ assert_se(rlt == 1);
+
+ /* dirs */
+ dir2file = path_join(dir2, "file");
+ assert_se(dir2file);
+ fd = open(dir2file, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0664);
+ assert_se(fd > 0);
+ close(fd);
+
+ assert_se(mount(dir2, dir1, NULL, MS_BIND, NULL) >= 0);
+
+ log_info("%s: %s", __func__, dir1);
+ rf = path_is_mount_point(dir1, NULL, 0);
+ rt = path_is_mount_point(dir1, NULL, AT_SYMLINK_FOLLOW);
+ log_info("%s: %s", __func__, dirlink1);
+ rlf = path_is_mount_point(dirlink1, NULL, 0);
+ rlt = path_is_mount_point(dirlink1, NULL, AT_SYMLINK_FOLLOW);
+ log_info("%s: %s", __func__, dirlink1file);
+ /* its parent is a mount point, but not /file itself */
+ rl1f = path_is_mount_point(dirlink1file, NULL, 0);
+ rl1t = path_is_mount_point(dirlink1file, NULL, AT_SYMLINK_FOLLOW);
+
+ assert_se(umount(dir1) == 0);
+
+ assert_se(rf == 1);
+ assert_se(rt == 1);
+ assert_se(rlf == 0);
+ assert_se(rlt == 1);
+ assert_se(rl1f == 0);
+ assert_se(rl1t == 0);
+
+ } else
+ printf("Skipping bind mount file test: %m\n");
+
+ assert_se(rm_rf(tmp_dir, REMOVE_ROOT|REMOVE_PHYSICAL) == 0);
+}
+
+static void test_fd_is_mount_point(void) {
+ _cleanup_close_ int fd = -1;
+
+ log_info("/* %s */", __func__);
+
+ fd = open("/", O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY);
+ assert_se(fd >= 0);
+
+ /* Not allowed, since "/" is a path, not a plain filename */
+ assert_se(fd_is_mount_point(fd, "/", 0) == -EINVAL);
+ assert_se(fd_is_mount_point(fd, ".", 0) == -EINVAL);
+ assert_se(fd_is_mount_point(fd, "./", 0) == -EINVAL);
+ assert_se(fd_is_mount_point(fd, "..", 0) == -EINVAL);
+ assert_se(fd_is_mount_point(fd, "../", 0) == -EINVAL);
+ assert_se(fd_is_mount_point(fd, "", 0) == -EINVAL);
+ assert_se(fd_is_mount_point(fd, "/proc", 0) == -EINVAL);
+ assert_se(fd_is_mount_point(fd, "/proc/", 0) == -EINVAL);
+ assert_se(fd_is_mount_point(fd, "proc/sys", 0) == -EINVAL);
+ assert_se(fd_is_mount_point(fd, "proc/sys/", 0) == -EINVAL);
+
+ /* This one definitely is a mount point */
+ assert_se(fd_is_mount_point(fd, "proc", 0) > 0);
+ assert_se(fd_is_mount_point(fd, "proc/", 0) > 0);
+
+ /* /root's entire raison d'etre is to be on the root file system (i.e. not in /home/ which might be
+ * split off), so that the user can always log in, so it cannot be a mount point unless the system is
+ * borked. Let's allow for it to be missing though. */
+ assert_se(IN_SET(fd_is_mount_point(fd, "root", 0), -ENOENT, 0));
+ assert_se(IN_SET(fd_is_mount_point(fd, "root/", 0), -ENOENT, 0));
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ /* let's move into our own mount namespace with all propagation from the host turned off, so that
+ * /proc/self/mountinfo is static and constant for the whole time our test runs. */
+ if (unshare(CLONE_NEWNS) < 0) {
+ if (!ERRNO_IS_PRIVILEGE(errno))
+ return log_error_errno(errno, "Failed to detach mount namespace: %m");
+
+ log_notice("Lacking privilege to create separate mount namespace, proceeding in originating mount namespace.");
+ } else
+ assert_se(mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL) >= 0);
+
+ test_mount_propagation_flags("shared", 0, MS_SHARED);
+ test_mount_propagation_flags("slave", 0, MS_SLAVE);
+ test_mount_propagation_flags("private", 0, MS_PRIVATE);
+ test_mount_propagation_flags(NULL, 0, 0);
+ test_mount_propagation_flags("", 0, 0);
+ test_mount_propagation_flags("xxxx", -EINVAL, 0);
+ test_mount_propagation_flags(" ", -EINVAL, 0);
+
+ test_mnt_id();
+ test_path_is_mount_point();
+ test_fd_is_mount_point();
+
+ return 0;
+}
diff --git a/src/test/test-namespace.c b/src/test/test-namespace.c
new file mode 100644
index 0000000..e234f54
--- /dev/null
+++ b/src/test/test-namespace.c
@@ -0,0 +1,223 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "namespace.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "user-util.h"
+#include "util.h"
+#include "virt.h"
+
+static void test_namespace_cleanup_tmpdir(void) {
+ {
+ _cleanup_(namespace_cleanup_tmpdirp) char *dir;
+ assert_se(dir = strdup(RUN_SYSTEMD_EMPTY));
+ }
+
+ {
+ _cleanup_(namespace_cleanup_tmpdirp) char *dir;
+ assert_se(dir = strdup("/tmp/systemd-test-namespace.XXXXXX"));
+ assert_se(mkdtemp(dir));
+ }
+}
+
+static void test_tmpdir(const char *id, const char *A, const char *B) {
+ _cleanup_free_ char *a, *b;
+ struct stat x, y;
+ char *c, *d;
+
+ assert_se(setup_tmp_dirs(id, &a, &b) == 0);
+
+ assert_se(stat(a, &x) >= 0);
+ assert_se(stat(b, &y) >= 0);
+
+ assert_se(S_ISDIR(x.st_mode));
+ assert_se(S_ISDIR(y.st_mode));
+
+ if (!streq(a, RUN_SYSTEMD_EMPTY)) {
+ assert_se(startswith(a, A));
+ assert_se((x.st_mode & 01777) == 0700);
+ c = strjoina(a, "/tmp");
+ assert_se(stat(c, &x) >= 0);
+ assert_se(S_ISDIR(x.st_mode));
+ assert_se(FLAGS_SET(x.st_mode, 01777));
+ assert_se(rmdir(c) >= 0);
+ assert_se(rmdir(a) >= 0);
+ }
+
+ if (!streq(b, RUN_SYSTEMD_EMPTY)) {
+ assert_se(startswith(b, B));
+ assert_se((y.st_mode & 01777) == 0700);
+ d = strjoina(b, "/tmp");
+ assert_se(stat(d, &y) >= 0);
+ assert_se(S_ISDIR(y.st_mode));
+ assert_se(FLAGS_SET(y.st_mode, 01777));
+ assert_se(rmdir(d) >= 0);
+ assert_se(rmdir(b) >= 0);
+ }
+}
+
+static void test_netns(void) {
+ _cleanup_close_pair_ int s[2] = { -1, -1 };
+ pid_t pid1, pid2, pid3;
+ int r, n = 0;
+ siginfo_t si;
+
+ if (geteuid() > 0) {
+ (void) log_tests_skipped("not root");
+ return;
+ }
+
+ assert_se(socketpair(AF_UNIX, SOCK_DGRAM, 0, s) >= 0);
+
+ pid1 = fork();
+ assert_se(pid1 >= 0);
+
+ if (pid1 == 0) {
+ r = setup_netns(s);
+ assert_se(r >= 0);
+ _exit(r);
+ }
+
+ pid2 = fork();
+ assert_se(pid2 >= 0);
+
+ if (pid2 == 0) {
+ r = setup_netns(s);
+ assert_se(r >= 0);
+ exit(r);
+ }
+
+ pid3 = fork();
+ assert_se(pid3 >= 0);
+
+ if (pid3 == 0) {
+ r = setup_netns(s);
+ assert_se(r >= 0);
+ exit(r);
+ }
+
+ r = wait_for_terminate(pid1, &si);
+ assert_se(r >= 0);
+ assert_se(si.si_code == CLD_EXITED);
+ n += si.si_status;
+
+ r = wait_for_terminate(pid2, &si);
+ assert_se(r >= 0);
+ assert_se(si.si_code == CLD_EXITED);
+ n += si.si_status;
+
+ r = wait_for_terminate(pid3, &si);
+ assert_se(r >= 0);
+ assert_se(si.si_code == CLD_EXITED);
+ n += si.si_status;
+
+ assert_se(n == 1);
+}
+
+static void test_protect_kernel_logs(void) {
+ int r;
+ pid_t pid;
+ static const NamespaceInfo ns_info = {
+ .protect_kernel_logs = true,
+ };
+
+ if (geteuid() > 0) {
+ (void) log_tests_skipped("not root");
+ return;
+ }
+
+ /* In a container we likely don't have access to /dev/kmsg */
+ if (detect_container() > 0) {
+ (void) log_tests_skipped("in container");
+ return;
+ }
+
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open("/dev/kmsg", O_RDONLY | O_CLOEXEC);
+ assert_se(fd > 0);
+
+ r = setup_namespace(NULL,
+ NULL,
+ NULL,
+ &ns_info,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL, 0,
+ NULL, 0,
+ NULL, 0,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ NULL,
+ 0,
+ NULL,
+ NULL,
+ 0,
+ NULL);
+ assert_se(r == 0);
+
+ assert_se(setresuid(UID_NOBODY, UID_NOBODY, UID_NOBODY) >= 0);
+ assert_se(open("/dev/kmsg", O_RDONLY | O_CLOEXEC) < 0);
+ assert_se(errno == EACCES);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("ns-kernellogs", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[]) {
+ sd_id128_t bid;
+ char boot_id[SD_ID128_STRING_MAX];
+ _cleanup_free_ char *x = NULL, *y = NULL, *z = NULL, *zz = NULL;
+
+ test_setup_logging(LOG_INFO);
+
+ test_namespace_cleanup_tmpdir();
+
+ if (!have_namespaces()) {
+ log_tests_skipped("Don't have namespace support");
+ return EXIT_TEST_SKIP;
+ }
+
+ assert_se(sd_id128_get_boot(&bid) >= 0);
+ sd_id128_to_string(bid, boot_id);
+
+ x = strjoin("/tmp/systemd-private-", boot_id, "-abcd.service-");
+ y = strjoin("/var/tmp/systemd-private-", boot_id, "-abcd.service-");
+ assert_se(x && y);
+
+ test_tmpdir("abcd.service", x, y);
+
+ z = strjoin("/tmp/systemd-private-", boot_id, "-sys-devices-pci0000:00-0000:00:1a.0-usb3-3\\x2d1-3\\x2d1:1.0-bluetooth-hci0.device-");
+ zz = strjoin("/var/tmp/systemd-private-", boot_id, "-sys-devices-pci0000:00-0000:00:1a.0-usb3-3\\x2d1-3\\x2d1:1.0-bluetooth-hci0.device-");
+
+ assert_se(z && zz);
+
+ test_tmpdir("sys-devices-pci0000:00-0000:00:1a.0-usb3-3\\x2d1-3\\x2d1:1.0-bluetooth-hci0.device", z, zz);
+
+ test_netns();
+ test_protect_kernel_logs();
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-netlink-manual.c b/src/test/test-netlink-manual.c
new file mode 100644
index 0000000..49aca68
--- /dev/null
+++ b/src/test/test-netlink-manual.c
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+#include <linux/if_tunnel.h>
+#include <linux/ip.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-netlink.h"
+
+#include "macro.h"
+#include "module-util.h"
+#include "tests.h"
+#include "util.h"
+
+static int load_module(const char *mod_name) {
+ _cleanup_(kmod_unrefp) struct kmod_ctx *ctx = NULL;
+ _cleanup_(kmod_module_unref_listp) struct kmod_list *list = NULL;
+ struct kmod_list *l;
+ int r;
+
+ ctx = kmod_new(NULL, NULL);
+ if (!ctx)
+ return log_oom();
+
+ r = kmod_module_new_from_lookup(ctx, mod_name, &list);
+ if (r < 0)
+ return r;
+
+ kmod_list_foreach(l, list) {
+ _cleanup_(kmod_module_unrefp) struct kmod_module *mod = NULL;
+
+ mod = kmod_module_get_module(l);
+
+ r = kmod_module_probe_insert_module(mod, 0, NULL, NULL, NULL, NULL);
+ if (r > 0)
+ r = -EINVAL;
+ }
+
+ return r;
+}
+
+static int test_tunnel_configure(sd_netlink *rtnl) {
+ int r;
+ sd_netlink_message *m, *n;
+ struct in_addr local, remote;
+
+ /* skip test if module cannot be loaded */
+ r = load_module("ipip");
+ if (r < 0)
+ return log_tests_skipped_errno(r, "failed to load module 'ipip'");
+
+ r = load_module("sit");
+ if (r < 0)
+ return log_tests_skipped_errno(r, "failed to load module 'sit'");
+
+ if (getuid() != 0)
+ return log_tests_skipped("not root");
+
+ /* IPIP tunnel */
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0) >= 0);
+ assert_se(m);
+
+ assert_se(sd_netlink_message_append_string(m, IFLA_IFNAME, "ipip-tunnel") >= 0);
+ assert_se(sd_netlink_message_append_u32(m, IFLA_MTU, 1234)>= 0);
+
+ assert_se(sd_netlink_message_open_container(m, IFLA_LINKINFO) >= 0);
+
+ assert_se(sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "ipip") >= 0);
+
+ inet_pton(AF_INET, "192.168.21.1", &local.s_addr);
+ assert_se(sd_netlink_message_append_u32(m, IFLA_IPTUN_LOCAL, local.s_addr) >= 0);
+
+ inet_pton(AF_INET, "192.168.21.2", &remote.s_addr);
+ assert_se(sd_netlink_message_append_u32(m, IFLA_IPTUN_REMOTE, remote.s_addr) >= 0);
+
+ assert_se(sd_netlink_message_close_container(m) >= 0);
+ assert_se(sd_netlink_message_close_container(m) >= 0);
+
+ assert_se(sd_netlink_call(rtnl, m, -1, 0) == 1);
+
+ assert_se((m = sd_netlink_message_unref(m)) == NULL);
+
+ /* sit */
+ assert_se(sd_rtnl_message_new_link(rtnl, &n, RTM_NEWLINK, 0) >= 0);
+ assert_se(n);
+
+ assert_se(sd_netlink_message_append_string(n, IFLA_IFNAME, "sit-tunnel") >= 0);
+ assert_se(sd_netlink_message_append_u32(n, IFLA_MTU, 1234)>= 0);
+
+ assert_se(sd_netlink_message_open_container(n, IFLA_LINKINFO) >= 0);
+
+ assert_se(sd_netlink_message_open_container_union(n, IFLA_INFO_DATA, "sit") >= 0);
+
+ assert_se(sd_netlink_message_append_u8(n, IFLA_IPTUN_PROTO, IPPROTO_IPIP) >= 0);
+
+ inet_pton(AF_INET, "192.168.21.3", &local.s_addr);
+ assert_se(sd_netlink_message_append_u32(n, IFLA_IPTUN_LOCAL, local.s_addr) >= 0);
+
+ inet_pton(AF_INET, "192.168.21.4", &remote.s_addr);
+ assert_se(sd_netlink_message_append_u32(n, IFLA_IPTUN_REMOTE, remote.s_addr) >= 0);
+
+ assert_se(sd_netlink_message_close_container(n) >= 0);
+ assert_se(sd_netlink_message_close_container(n) >= 0);
+
+ assert_se(sd_netlink_call(rtnl, n, -1, 0) == 1);
+
+ assert_se((n = sd_netlink_message_unref(n)) == NULL);
+
+ return EXIT_SUCCESS;
+}
+
+int main(int argc, char *argv[]) {
+ sd_netlink *rtnl;
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+ assert_se(rtnl);
+
+ r = test_tunnel_configure(rtnl);
+
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+
+ return r;
+}
diff --git a/src/test/test-ns.c b/src/test/test-ns.c
new file mode 100644
index 0000000..6ec1cff
--- /dev/null
+++ b/src/test/test-ns.c
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "log.h"
+#include "namespace.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ const char * const writable[] = {
+ "/home",
+ "-/home/lennart/projects/foobar", /* this should be masked automatically */
+ NULL
+ };
+
+ const char * const readonly[] = {
+ /* "/", */
+ /* "/usr", */
+ "/boot",
+ "/lib",
+ "/usr/lib",
+ "-/lib64",
+ "-/usr/lib64",
+ NULL
+ };
+
+ const char *inaccessible[] = {
+ "/home/lennart/projects",
+ NULL
+ };
+
+ static const NamespaceInfo ns_info = {
+ .private_dev = true,
+ .protect_control_groups = true,
+ .protect_kernel_tunables = true,
+ .protect_kernel_modules = true,
+ .protect_proc = PROTECT_PROC_NOACCESS,
+ .proc_subset = PROC_SUBSET_PID,
+ };
+
+ char *root_directory;
+ char *projects_directory;
+ int r;
+ char tmp_dir[] = "/tmp/systemd-private-XXXXXX",
+ var_tmp_dir[] = "/var/tmp/systemd-private-XXXXXX";
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(mkdtemp(tmp_dir));
+ assert_se(mkdtemp(var_tmp_dir));
+
+ root_directory = getenv("TEST_NS_CHROOT");
+ projects_directory = getenv("TEST_NS_PROJECTS");
+
+ if (projects_directory)
+ inaccessible[0] = projects_directory;
+
+ log_info("Inaccessible directory: '%s'", inaccessible[0]);
+ if (root_directory)
+ log_info("Chroot: '%s'", root_directory);
+ else
+ log_info("Not chrooted");
+
+ r = setup_namespace(root_directory,
+ NULL,
+ NULL,
+ &ns_info,
+ (char **) writable,
+ (char **) readonly,
+ (char **) inaccessible,
+ NULL,
+ &(BindMount) { .source = (char*) "/usr/bin", .destination = (char*) "/etc/systemd", .read_only = true }, 1,
+ &(TemporaryFileSystem) { .path = (char*) "/var", .options = (char*) "ro" }, 1,
+ NULL,
+ 0,
+ tmp_dir,
+ var_tmp_dir,
+ NULL,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ NULL,
+ 0,
+ NULL,
+ NULL,
+ 0,
+ NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set up namespace: %m");
+
+ log_info("Usage:\n"
+ " sudo TEST_NS_PROJECTS=/home/lennart/projects ./test-ns\n"
+ " sudo TEST_NS_CHROOT=/home/alban/debian-tree TEST_NS_PROJECTS=/home/alban/debian-tree/home/alban/Documents ./test-ns");
+
+ return 1;
+ }
+
+ execl("/bin/sh", "/bin/sh", NULL);
+ log_error_errno(errno, "execl(): %m");
+
+ return 1;
+}
diff --git a/src/test/test-nscd-flush.c b/src/test/test-nscd-flush.c
new file mode 100644
index 0000000..1a5a808
--- /dev/null
+++ b/src/test/test-nscd-flush.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "main-func.h"
+#include "nscd-flush.h"
+#include "strv.h"
+#include "tests.h"
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ r = nscd_flush_cache(STRV_MAKE("group", "passwd", "hosts"));
+ if (r < 0)
+ return log_error_errno(r, "Failed to flush NSCD cache");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/test/test-nss.c b/src/test/test-nss.c
new file mode 100644
index 0000000..2e9414d
--- /dev/null
+++ b/src/test/test-nss.c
@@ -0,0 +1,536 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <dlfcn.h>
+#include <net/if.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "errno-list.h"
+#include "format-util.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
+#include "local-addresses.h"
+#include "log.h"
+#include "main-func.h"
+#include "nss-util.h"
+#include "path-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+
+static const char* nss_status_to_string(enum nss_status status, char *buf, size_t buf_len) {
+ switch (status) {
+ case NSS_STATUS_TRYAGAIN:
+ return "NSS_STATUS_TRYAGAIN";
+ case NSS_STATUS_UNAVAIL:
+ return "NSS_STATUS_UNAVAIL";
+ case NSS_STATUS_NOTFOUND:
+ return "NSS_STATUS_NOTFOUND";
+ case NSS_STATUS_SUCCESS:
+ return "NSS_STATUS_SUCCESS";
+ case NSS_STATUS_RETURN:
+ return "NSS_STATUS_RETURN";
+ default:
+ snprintf(buf, buf_len, "%i", status);
+ return buf;
+ }
+};
+
+static const char* af_to_string(int family, char *buf, size_t buf_len) {
+ const char *name;
+
+ if (family == AF_UNSPEC)
+ return "*";
+
+ name = af_to_name(family);
+ if (name)
+ return name;
+
+ snprintf(buf, buf_len, "%i", family);
+ return buf;
+}
+
+static void* open_handle(const char *dir, const char *module, int flags) {
+ const char *path = NULL;
+ void *handle;
+
+ if (dir)
+ path = strjoina(dir, "/libnss_", module, ".so.2");
+ if (!path || access(path, F_OK) < 0)
+ path = strjoina("libnss_", module, ".so.2");
+
+ log_debug("Using %s", path);
+ handle = dlopen(path, flags);
+ if (!handle)
+ log_error("Failed to load module %s: %s", module, dlerror());
+ return handle;
+}
+
+static int print_gaih_addrtuples(const struct gaih_addrtuple *tuples) {
+ int n = 0;
+
+ for (const struct gaih_addrtuple *it = tuples; it; it = it->next) {
+ _cleanup_free_ char *a = NULL;
+ union in_addr_union u;
+ int r;
+ char family_name[DECIMAL_STR_MAX(int)];
+ char ifname[IF_NAMESIZE + 1];
+
+ memcpy(&u, it->addr, 16);
+ r = in_addr_to_string(it->family, &u, &a);
+ assert_se(IN_SET(r, 0, -EAFNOSUPPORT));
+ if (r == -EAFNOSUPPORT)
+ assert_se(a = hexmem(it->addr, 16));
+
+ if (it->scopeid == 0)
+ goto numerical_index;
+
+ if (!format_ifname(it->scopeid, ifname)) {
+ log_warning_errno(errno, "if_indextoname(%d) failed: %m", it->scopeid);
+ numerical_index:
+ xsprintf(ifname, "%i", it->scopeid);
+ };
+
+ log_info(" \"%s\" %s %s %%%s",
+ it->name,
+ af_to_string(it->family, family_name, sizeof family_name),
+ a,
+ ifname);
+ n ++;
+ }
+ return n;
+}
+
+static void print_struct_hostent(struct hostent *host, const char *canon) {
+ char **s;
+
+ log_info(" \"%s\"", host->h_name);
+ STRV_FOREACH(s, host->h_aliases)
+ log_info(" alias \"%s\"", *s);
+ STRV_FOREACH(s, host->h_addr_list) {
+ union in_addr_union u;
+ _cleanup_free_ char *a = NULL;
+ char family_name[DECIMAL_STR_MAX(int)];
+ int r;
+
+ assert_se((unsigned) host->h_length == FAMILY_ADDRESS_SIZE(host->h_addrtype));
+ memcpy(&u, *s, host->h_length);
+ r = in_addr_to_string(host->h_addrtype, &u, &a);
+ assert_se(r == 0);
+ log_info(" %s %s",
+ af_to_string(host->h_addrtype, family_name, sizeof family_name),
+ a);
+ }
+ if (canon)
+ log_info(" canonical: \"%s\"", canon);
+}
+
+static void test_gethostbyname4_r(void *handle, const char *module, const char *name) {
+ const char *fname;
+ _nss_gethostbyname4_r_t f;
+ char buffer[2000];
+ struct gaih_addrtuple *pat = NULL;
+ int errno1 = 999, errno2 = 999; /* nss-dns doesn't set those */
+ int32_t ttl = INT32_MAX; /* nss-dns wants to return the lowest ttl,
+ and will access this variable through *ttlp,
+ so we need to set it to something.
+ I'm not sure if this is a bug in nss-dns
+ or not. */
+ enum nss_status status;
+ char pretty_status[DECIMAL_STR_MAX(enum nss_status)];
+ int n;
+
+ fname = strjoina("_nss_", module, "_gethostbyname4_r");
+ f = dlsym(handle, fname);
+ log_debug("dlsym(0x%p, %s) → 0x%p", handle, fname, f);
+ if (!f) {
+ log_info("%s not defined", fname);
+ return;
+ }
+
+ status = f(name, &pat, buffer, sizeof buffer, &errno1, &errno2, &ttl);
+ if (status == NSS_STATUS_SUCCESS) {
+ log_info("%s(\"%s\") → status=%s%-20spat=buffer+0x%tx errno=%d/%s h_errno=%d/%s ttl=%"PRIi32,
+ fname, name,
+ nss_status_to_string(status, pretty_status, sizeof pretty_status), "\n",
+ pat ? (char*) pat - buffer : 0,
+ errno1, errno_to_name(errno1) ?: "---",
+ errno2, hstrerror(errno2),
+ ttl);
+ n = print_gaih_addrtuples(pat);
+ } else {
+ log_info("%s(\"%s\") → status=%s%-20spat=0x%p errno=%d/%s h_errno=%d/%s",
+ fname, name,
+ nss_status_to_string(status, pretty_status, sizeof pretty_status), "\n",
+ pat,
+ errno1, errno_to_name(errno1) ?: "---",
+ errno2, hstrerror(errno2));
+ n = 0;
+ }
+
+ if (STR_IN_SET(module, "resolve", "mymachines") && status == NSS_STATUS_UNAVAIL)
+ return;
+
+ if (STR_IN_SET(module, "myhostname", "resolve") && streq(name, "localhost")) {
+ assert_se(status == NSS_STATUS_SUCCESS);
+ assert_se(n == 2);
+ }
+}
+
+static void test_gethostbyname3_r(void *handle, const char *module, const char *name, int af) {
+ const char *fname;
+ _nss_gethostbyname3_r_t f;
+ char buffer[2000];
+ int errno1 = 999, errno2 = 999; /* nss-dns doesn't set those */
+ int32_t ttl = INT32_MAX; /* nss-dns wants to return the lowest ttl,
+ and will access this variable through *ttlp,
+ so we need to set it to something.
+ I'm not sure if this is a bug in nss-dns
+ or not. */
+ enum nss_status status;
+ char pretty_status[DECIMAL_STR_MAX(enum nss_status)];
+ struct hostent host;
+ char *canon;
+ char family_name[DECIMAL_STR_MAX(int)];
+
+ fname = strjoina("_nss_", module, "_gethostbyname3_r");
+ f = dlsym(handle, fname);
+ log_debug("dlsym(0x%p, %s) → 0x%p", handle, fname, f);
+ if (!f) {
+ log_info("%s not defined", fname);
+ return;
+ }
+
+ status = f(name, af, &host, buffer, sizeof buffer, &errno1, &errno2, &ttl, &canon);
+ log_info("%s(\"%s\", %s) → status=%s%-20serrno=%d/%s h_errno=%d/%s ttl=%"PRIi32,
+ fname, name, af_to_string(af, family_name, sizeof family_name),
+ nss_status_to_string(status, pretty_status, sizeof pretty_status), "\n",
+ errno1, errno_to_name(errno1) ?: "---",
+ errno2, hstrerror(errno2),
+ ttl);
+ if (status == NSS_STATUS_SUCCESS)
+ print_struct_hostent(&host, canon);
+}
+
+static void test_gethostbyname2_r(void *handle, const char *module, const char *name, int af) {
+ const char *fname;
+ _nss_gethostbyname2_r_t f;
+ char buffer[2000];
+ int errno1 = 999, errno2 = 999; /* nss-dns doesn't set those */
+ enum nss_status status;
+ char pretty_status[DECIMAL_STR_MAX(enum nss_status)];
+ struct hostent host;
+ char family_name[DECIMAL_STR_MAX(int)];
+
+ fname = strjoina("_nss_", module, "_gethostbyname2_r");
+ f = dlsym(handle, fname);
+ log_debug("dlsym(0x%p, %s) → 0x%p", handle, fname, f);
+ if (!f) {
+ log_info("%s not defined", fname);
+ return;
+ }
+
+ status = f(name, af, &host, buffer, sizeof buffer, &errno1, &errno2);
+ log_info("%s(\"%s\", %s) → status=%s%-20serrno=%d/%s h_errno=%d/%s",
+ fname, name, af_to_string(af, family_name, sizeof family_name),
+ nss_status_to_string(status, pretty_status, sizeof pretty_status), "\n",
+ errno1, errno_to_name(errno1) ?: "---",
+ errno2, hstrerror(errno2));
+ if (status == NSS_STATUS_SUCCESS)
+ print_struct_hostent(&host, NULL);
+}
+
+static void test_gethostbyname_r(void *handle, const char *module, const char *name) {
+ const char *fname;
+ _nss_gethostbyname_r_t f;
+ char buffer[2000];
+ int errno1 = 999, errno2 = 999; /* nss-dns doesn't set those */
+ enum nss_status status;
+ char pretty_status[DECIMAL_STR_MAX(enum nss_status)];
+ struct hostent host;
+
+ fname = strjoina("_nss_", module, "_gethostbyname_r");
+ f = dlsym(handle, fname);
+ log_debug("dlsym(0x%p, %s) → 0x%p", handle, fname, f);
+ if (!f) {
+ log_info("%s not defined", fname);
+ return;
+ }
+
+ status = f(name, &host, buffer, sizeof buffer, &errno1, &errno2);
+ log_info("%s(\"%s\") → status=%s%-20serrno=%d/%s h_errno=%d/%s",
+ fname, name,
+ nss_status_to_string(status, pretty_status, sizeof pretty_status), "\n",
+ errno1, errno_to_name(errno1) ?: "---",
+ errno2, hstrerror(errno2));
+ if (status == NSS_STATUS_SUCCESS)
+ print_struct_hostent(&host, NULL);
+}
+
+static void test_gethostbyaddr2_r(void *handle,
+ const char *module,
+ const void* addr, socklen_t len,
+ int af) {
+
+ const char *fname;
+ _nss_gethostbyaddr2_r_t f;
+ char buffer[2000];
+ int errno1 = 999, errno2 = 999; /* nss-dns doesn't set those */
+ enum nss_status status;
+ char pretty_status[DECIMAL_STR_MAX(enum nss_status)];
+ struct hostent host;
+ int32_t ttl = INT32_MAX;
+ _cleanup_free_ char *addr_pretty = NULL;
+
+ fname = strjoina("_nss_", module, "_gethostbyaddr2_r");
+ f = dlsym(handle, fname);
+
+ log_full_errno(f ? LOG_DEBUG : LOG_INFO, errno,
+ "dlsym(0x%p, %s) → 0x%p: %m", handle, fname, f);
+ if (!f) {
+ log_info("%s not defined", fname);
+ return;
+ }
+
+ assert_se(in_addr_to_string(af, addr, &addr_pretty) >= 0);
+
+ status = f(addr, len, af, &host, buffer, sizeof buffer, &errno1, &errno2, &ttl);
+ log_info("%s(\"%s\") → status=%s%-20serrno=%d/%s h_errno=%d/%s ttl=%"PRIi32,
+ fname, addr_pretty,
+ nss_status_to_string(status, pretty_status, sizeof pretty_status), "\n",
+ errno1, errno_to_name(errno1) ?: "---",
+ errno2, hstrerror(errno2),
+ ttl);
+ if (status == NSS_STATUS_SUCCESS)
+ print_struct_hostent(&host, NULL);
+}
+
+static void test_gethostbyaddr_r(void *handle,
+ const char *module,
+ const void* addr, socklen_t len,
+ int af) {
+
+ const char *fname;
+ _nss_gethostbyaddr_r_t f;
+ char buffer[2000];
+ int errno1 = 999, errno2 = 999; /* nss-dns doesn't set those */
+ enum nss_status status;
+ char pretty_status[DECIMAL_STR_MAX(enum nss_status)];
+ struct hostent host;
+ _cleanup_free_ char *addr_pretty = NULL;
+
+ fname = strjoina("_nss_", module, "_gethostbyaddr_r");
+ f = dlsym(handle, fname);
+
+ log_full_errno(f ? LOG_DEBUG : LOG_INFO, errno,
+ "dlsym(0x%p, %s) → 0x%p: %m", handle, fname, f);
+ if (!f) {
+ log_info("%s not defined", fname);
+ return;
+ }
+
+ assert_se(in_addr_to_string(af, addr, &addr_pretty) >= 0);
+
+ status = f(addr, len, af, &host, buffer, sizeof buffer, &errno1, &errno2);
+ log_info("%s(\"%s\") → status=%s%-20serrno=%d/%s h_errno=%d/%s",
+ fname, addr_pretty,
+ nss_status_to_string(status, pretty_status, sizeof pretty_status), "\n",
+ errno1, errno_to_name(errno1) ?: "---",
+ errno2, hstrerror(errno2));
+ if (status == NSS_STATUS_SUCCESS)
+ print_struct_hostent(&host, NULL);
+}
+
+static void test_byname(void *handle, const char *module, const char *name) {
+ test_gethostbyname4_r(handle, module, name);
+ puts("");
+
+ test_gethostbyname3_r(handle, module, name, AF_INET);
+ puts("");
+ test_gethostbyname3_r(handle, module, name, AF_INET6);
+ puts("");
+ test_gethostbyname3_r(handle, module, name, AF_UNSPEC);
+ puts("");
+ test_gethostbyname3_r(handle, module, name, AF_LOCAL);
+ puts("");
+
+ test_gethostbyname2_r(handle, module, name, AF_INET);
+ puts("");
+ test_gethostbyname2_r(handle, module, name, AF_INET6);
+ puts("");
+ test_gethostbyname2_r(handle, module, name, AF_UNSPEC);
+ puts("");
+ test_gethostbyname2_r(handle, module, name, AF_LOCAL);
+ puts("");
+
+ test_gethostbyname_r(handle, module, name);
+ puts("");
+}
+
+static void test_byaddr(void *handle,
+ const char *module,
+ const void* addr, socklen_t len,
+ int af) {
+ test_gethostbyaddr2_r(handle, module, addr, len, af);
+ puts("");
+
+ test_gethostbyaddr_r(handle, module, addr, len, af);
+ puts("");
+}
+
+static int make_addresses(struct local_address **addresses) {
+ int n;
+ size_t n_alloc;
+ _cleanup_free_ struct local_address *addrs = NULL;
+
+ n = local_addresses(NULL, 0, AF_UNSPEC, &addrs);
+ if (n < 0)
+ log_info_errno(n, "Failed to query local addresses: %m");
+
+ n_alloc = n; /* we _can_ do that */
+ if (!GREEDY_REALLOC(addrs, n_alloc, n + 3))
+ return log_oom();
+
+ addrs[n++] = (struct local_address) { .family = AF_INET,
+ .address.in = { htobe32(0x7F000001) } };
+ addrs[n++] = (struct local_address) { .family = AF_INET,
+ .address.in = { htobe32(0x7F000002) } };
+ addrs[n++] = (struct local_address) { .family = AF_INET6,
+ .address.in6 = in6addr_loopback };
+ return 0;
+}
+
+static int test_one_module(const char *dir,
+ const char *module,
+ char **names,
+ struct local_address *addresses,
+ int n_addresses) {
+ void *handle;
+ char **name;
+
+ log_info("======== %s ========", module);
+
+ handle = open_handle(dir, module, RTLD_LAZY|RTLD_NODELETE);
+ if (!handle)
+ return -EINVAL;
+
+ STRV_FOREACH(name, names)
+ test_byname(handle, module, *name);
+
+ for (int i = 0; i < n_addresses; i++)
+ test_byaddr(handle, module,
+ &addresses[i].address,
+ FAMILY_ADDRESS_SIZE(addresses[i].family),
+ addresses[i].family);
+
+ log_info(" ");
+ dlclose(handle);
+ return 0;
+}
+
+static int parse_argv(int argc, char **argv,
+ char ***the_modules,
+ char ***the_names,
+ struct local_address **the_addresses, int *n_addresses) {
+
+ int r, n = 0;
+ _cleanup_strv_free_ char **modules = NULL, **names = NULL;
+ _cleanup_free_ struct local_address *addrs = NULL;
+ size_t n_allocated = 0;
+
+ if (argc > 1)
+ modules = strv_new(argv[1]);
+ else
+ modules = strv_new(
+#if ENABLE_NSS_MYHOSTNAME
+ "myhostname",
+#endif
+#if ENABLE_NSS_RESOLVE
+ "resolve",
+#endif
+#if ENABLE_NSS_MYMACHINES
+ "mymachines",
+#endif
+ "dns");
+ if (!modules)
+ return -ENOMEM;
+
+ if (argc > 2) {
+ char **name;
+ int family;
+ union in_addr_union address;
+
+ STRV_FOREACH(name, argv + 2) {
+ r = in_addr_from_string_auto(*name, &family, &address);
+ if (r < 0) {
+ /* assume this is a name */
+ r = strv_extend(&names, *name);
+ if (r < 0)
+ return r;
+ } else {
+ if (!GREEDY_REALLOC0(addrs, n_allocated, n + 1))
+ return -ENOMEM;
+
+ addrs[n++] = (struct local_address) { .family = family,
+ .address = address };
+ }
+ }
+ } else {
+ _cleanup_free_ char *hostname;
+
+ hostname = gethostname_malloc();
+ if (!hostname)
+ return -ENOMEM;
+
+ names = strv_new("localhost", "_gateway", "foo_no_such_host", hostname);
+ if (!names)
+ return -ENOMEM;
+
+ n = make_addresses(&addrs);
+ if (n < 0)
+ return n;
+ }
+
+ *the_modules = modules;
+ *the_names = names;
+ modules = names = NULL;
+ *the_addresses = addrs;
+ *n_addresses = n;
+ addrs = NULL;
+ return 0;
+}
+
+static int run(int argc, char **argv) {
+ _cleanup_free_ char *dir = NULL;
+ _cleanup_strv_free_ char **modules = NULL, **names = NULL;
+ _cleanup_free_ struct local_address *addresses = NULL;
+ int n_addresses = 0;
+ char **module;
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ r = parse_argv(argc, argv, &modules, &names, &addresses, &n_addresses);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse arguments: %m");
+ return EXIT_FAILURE;
+ }
+
+ dir = dirname_malloc(argv[0]);
+ if (!dir)
+ return log_oom();
+
+ STRV_FOREACH(module, modules) {
+ r = test_one_module(dir, *module, names, addresses, n_addresses);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/test/test-offline-passwd.c b/src/test/test-offline-passwd.c
new file mode 100644
index 0000000..1a961d1
--- /dev/null
+++ b/src/test/test-offline-passwd.c
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+
+#include "offline-passwd.h"
+#include "user-util.h"
+#include "format-util.h"
+#include "tests.h"
+
+static char *arg_root = NULL;
+
+static void test_resolve_one(const char *name) {
+ bool relaxed = name || arg_root;
+
+ if (!name)
+ name = "root";
+
+ log_info("/* %s(\"%s\") */", __func__, name);
+
+ _cleanup_(hashmap_freep) Hashmap *uid_cache = NULL, *gid_cache = NULL;
+ uid_t uid = UID_INVALID;
+ gid_t gid = GID_INVALID;
+ int r;
+
+ r = name_to_uid_offline(arg_root, name, &uid, &uid_cache);
+ log_info_errno(r, "name_to_uid_offline: %s → "UID_FMT": %m", name, uid);
+ assert_se(relaxed || r == 0);
+
+ r = name_to_uid_offline(arg_root, name, &uid, &uid_cache);
+ log_info_errno(r, "name_to_uid_offline: %s → "UID_FMT": %m", name, uid);
+ assert_se(relaxed || r == 0);
+
+ r = name_to_gid_offline(arg_root, name, &gid, &gid_cache);
+ log_info_errno(r, "name_to_gid_offline: %s → "GID_FMT": %m", name, gid);
+ assert_se(relaxed || r == 0);
+
+ r = name_to_gid_offline(arg_root, name, &gid, &gid_cache);
+ log_info_errno(r, "name_to_gid_offline: %s → "GID_FMT": %m", name, gid);
+ assert_se(relaxed || r == 0);
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ static const struct option options[] = {
+ { "root", required_argument, NULL, 'r' },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "r:", options, NULL)) >= 0)
+ switch(c) {
+ case 'r':
+ arg_root = optarg;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 0;
+}
+
+int main(int argc, char **argv) {
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ r = parse_argv(argc, argv);
+ if (r < 0)
+ return r;
+
+ if (optind >= argc)
+ test_resolve_one(NULL);
+ else
+ while (optind < argc)
+ test_resolve_one(argv[optind++]);
+
+ return 0;
+}
diff --git a/src/test/test-ordered-set.c b/src/test/test-ordered-set.c
new file mode 100644
index 0000000..0fbdd97
--- /dev/null
+++ b/src/test/test-ordered-set.c
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "ordered-set.h"
+#include "string-util.h"
+#include "strv.h"
+
+static void test_set_steal_first(void) {
+ log_info("/* %s */", __func__);
+
+ _cleanup_ordered_set_free_ OrderedSet *m = NULL;
+ int seen[3] = {};
+ char *val;
+
+ m = ordered_set_new(&string_hash_ops);
+ assert_se(m);
+
+ assert_se(ordered_set_put(m, (void*) "1") == 1);
+ assert_se(ordered_set_put(m, (void*) "22") == 1);
+ assert_se(ordered_set_put(m, (void*) "333") == 1);
+
+ ordered_set_print(stdout, "SET=", m);
+
+ while ((val = ordered_set_steal_first(m)))
+ seen[strlen(val) - 1]++;
+
+ assert_se(seen[0] == 1 && seen[1] == 1 && seen[2] == 1);
+
+ assert_se(ordered_set_isempty(m));
+
+ ordered_set_print(stdout, "SET=", m);
+}
+
+typedef struct Item {
+ int seen;
+} Item;
+static void item_seen(Item *item) {
+ item->seen++;
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(item_hash_ops, void, trivial_hash_func, trivial_compare_func, Item, item_seen);
+
+static void test_set_free_with_hash_ops(void) {
+ OrderedSet *m;
+ struct Item items[4] = {};
+
+ log_info("/* %s */", __func__);
+
+ assert_se(m = ordered_set_new(&item_hash_ops));
+
+ for (size_t i = 0; i < ELEMENTSOF(items) - 1; i++)
+ assert_se(ordered_set_put(m, items + i) == 1);
+
+ for (size_t i = 0; i < ELEMENTSOF(items) - 1; i++)
+ assert_se(ordered_set_put(m, items + i) == 0); /* We get 0 here, because we use trivial hash
+ * ops. Also see below... */
+
+ m = ordered_set_free(m);
+ assert_se(items[0].seen == 1);
+ assert_se(items[1].seen == 1);
+ assert_se(items[2].seen == 1);
+ assert_se(items[3].seen == 0);
+}
+
+static void test_set_put(void) {
+ _cleanup_ordered_set_free_ OrderedSet *m = NULL;
+ _cleanup_free_ char **t = NULL, *str = NULL;
+
+ log_info("/* %s */", __func__);
+
+ m = ordered_set_new(&string_hash_ops);
+ assert_se(m);
+
+ assert_se(ordered_set_put(m, (void*) "1") == 1);
+ assert_se(ordered_set_put(m, (void*) "22") == 1);
+ assert_se(ordered_set_put(m, (void*) "333") == 1);
+ assert_se(ordered_set_put(m, (void*) "333") == 0);
+ assert_se(ordered_set_remove(m, (void*) "333"));
+ assert_se(ordered_set_put(m, (void*) "333") == 1);
+ assert_se(ordered_set_put(m, (void*) "333") == 0);
+ assert_se(ordered_set_put(m, (void*) "22") == 0);
+
+ assert_se(str = strdup("333"));
+ assert_se(ordered_set_put(m, str) == -EEXIST); /* ... and we get -EEXIST here, because we use
+ * non-trivial hash ops. */
+
+ assert_se(t = ordered_set_get_strv(m));
+ assert_se(streq(t[0], "1"));
+ assert_se(streq(t[1], "22"));
+ assert_se(streq(t[2], "333"));
+ assert_se(!t[3]);
+
+ ordered_set_print(stdout, "FOO=", m);
+}
+
+static void test_set_put_string_set(void) {
+ _cleanup_ordered_set_free_free_ OrderedSet *m = NULL;
+ _cleanup_ordered_set_free_ OrderedSet *q = NULL;
+ _cleanup_free_ char **final = NULL; /* "just free" because the strings are in the set */
+ void *t;
+
+ log_info("/* %s */", __func__);
+
+ m = ordered_set_new(&string_hash_ops);
+ assert_se(m);
+
+ q = ordered_set_new(&string_hash_ops);
+ assert_se(q);
+
+ assert_se(t = strdup("1"));
+ assert_se(ordered_set_put(m, t) == 1);
+ assert_se(t = strdup("22"));
+ assert_se(ordered_set_put(m, t) == 1);
+ assert_se(t = strdup("333"));
+ assert_se(ordered_set_put(m, t) == 1);
+
+ assert_se(ordered_set_put(q, (void*) "11") == 1);
+ assert_se(ordered_set_put(q, (void*) "22") == 1);
+ assert_se(ordered_set_put(q, (void*) "33") == 1);
+
+ assert_se(ordered_set_put_string_set(m, q) == 2);
+
+ assert_se(final = ordered_set_get_strv(m));
+ assert_se(strv_equal(final, STRV_MAKE("1", "22", "333", "11", "33")));
+
+ ordered_set_print(stdout, "BAR=", m);
+}
+
+int main(int argc, const char *argv[]) {
+ test_set_steal_first();
+ test_set_free_with_hash_ops();
+ test_set_put();
+ test_set_put_string_set();
+
+ return 0;
+}
diff --git a/src/test/test-os-util.c b/src/test/test-os-util.c
new file mode 100644
index 0000000..ef63026
--- /dev/null
+++ b/src/test/test-os-util.c
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "log.h"
+#include "os-util.h"
+#include "tests.h"
+
+static void test_path_is_os_tree(void) {
+ assert_se(path_is_os_tree("/") > 0);
+ assert_se(path_is_os_tree("/etc") == 0);
+ assert_se(path_is_os_tree("/idontexist") == -ENOENT);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_path_is_os_tree();
+
+ return 0;
+}
diff --git a/src/test/test-parse-util.c b/src/test/test-parse-util.c
new file mode 100644
index 0000000..1c96909
--- /dev/null
+++ b/src/test/test-parse-util.c
@@ -0,0 +1,998 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <linux/loadavg.h>
+#include <locale.h>
+#include <math.h>
+#include <sys/socket.h>
+
+#include "alloc-util.h"
+#include "errno-list.h"
+#include "log.h"
+#include "parse-util.h"
+#include "string-util.h"
+#if HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
+
+static void test_parse_boolean(void) {
+ assert_se(parse_boolean("1") == 1);
+ assert_se(parse_boolean("y") == 1);
+ assert_se(parse_boolean("Y") == 1);
+ assert_se(parse_boolean("yes") == 1);
+ assert_se(parse_boolean("YES") == 1);
+ assert_se(parse_boolean("true") == 1);
+ assert_se(parse_boolean("TRUE") == 1);
+ assert_se(parse_boolean("on") == 1);
+ assert_se(parse_boolean("ON") == 1);
+
+ assert_se(parse_boolean("0") == 0);
+ assert_se(parse_boolean("n") == 0);
+ assert_se(parse_boolean("N") == 0);
+ assert_se(parse_boolean("no") == 0);
+ assert_se(parse_boolean("NO") == 0);
+ assert_se(parse_boolean("false") == 0);
+ assert_se(parse_boolean("FALSE") == 0);
+ assert_se(parse_boolean("off") == 0);
+ assert_se(parse_boolean("OFF") == 0);
+
+ assert_se(parse_boolean("garbage") < 0);
+ assert_se(parse_boolean("") < 0);
+ assert_se(parse_boolean("full") < 0);
+}
+
+static void test_parse_pid(void) {
+ int r;
+ pid_t pid;
+
+ r = parse_pid("100", &pid);
+ assert_se(r == 0);
+ assert_se(pid == 100);
+
+ r = parse_pid("0x7FFFFFFF", &pid);
+ assert_se(r == 0);
+ assert_se(pid == 2147483647);
+
+ pid = 65; /* pid is left unchanged on ERANGE. Set to known arbitrary value. */
+ r = parse_pid("0", &pid);
+ assert_se(r == -ERANGE);
+ assert_se(pid == 65);
+
+ pid = 65; /* pid is left unchanged on ERANGE. Set to known arbitrary value. */
+ r = parse_pid("-100", &pid);
+ assert_se(r == -ERANGE);
+ assert_se(pid == 65);
+
+ pid = 65; /* pid is left unchanged on ERANGE. Set to known arbitrary value. */
+ r = parse_pid("0xFFFFFFFFFFFFFFFFF", &pid);
+ assert_se(r == -ERANGE);
+ assert_se(pid == 65);
+
+ r = parse_pid("junk", &pid);
+ assert_se(r == -EINVAL);
+
+ r = parse_pid("", &pid);
+ assert_se(r == -EINVAL);
+}
+
+static void test_parse_mode(void) {
+ mode_t m;
+
+ assert_se(parse_mode("-1", &m) < 0);
+ assert_se(parse_mode("+1", &m) < 0);
+ assert_se(parse_mode("", &m) < 0);
+ assert_se(parse_mode("888", &m) < 0);
+ assert_se(parse_mode("77777", &m) < 0);
+
+ assert_se(parse_mode("544", &m) >= 0 && m == 0544);
+ assert_se(parse_mode("0544", &m) >= 0 && m == 0544);
+ assert_se(parse_mode("00544", &m) >= 0 && m == 0544);
+ assert_se(parse_mode("777", &m) >= 0 && m == 0777);
+ assert_se(parse_mode("0777", &m) >= 0 && m == 0777);
+ assert_se(parse_mode("00777", &m) >= 0 && m == 0777);
+ assert_se(parse_mode("7777", &m) >= 0 && m == 07777);
+ assert_se(parse_mode("07777", &m) >= 0 && m == 07777);
+ assert_se(parse_mode("007777", &m) >= 0 && m == 07777);
+ assert_se(parse_mode("0", &m) >= 0 && m == 0);
+ assert_se(parse_mode(" 1", &m) >= 0 && m == 1);
+}
+
+static void test_parse_size(void) {
+ uint64_t bytes;
+
+ assert_se(parse_size("", 1024, &bytes) == -EINVAL);
+
+ assert_se(parse_size("111", 1024, &bytes) == 0);
+ assert_se(bytes == 111);
+
+ assert_se(parse_size("111.4", 1024, &bytes) == 0);
+ assert_se(bytes == 111);
+
+ assert_se(parse_size(" 112 B", 1024, &bytes) == 0);
+ assert_se(bytes == 112);
+
+ assert_se(parse_size(" 112.6 B", 1024, &bytes) == 0);
+ assert_se(bytes == 112);
+
+ assert_se(parse_size("3.5 K", 1024, &bytes) == 0);
+ assert_se(bytes == 3*1024 + 512);
+
+ assert_se(parse_size("3. K", 1024, &bytes) == 0);
+ assert_se(bytes == 3*1024);
+
+ assert_se(parse_size("3.0 K", 1024, &bytes) == 0);
+ assert_se(bytes == 3*1024);
+
+ assert_se(parse_size("3. 0 K", 1024, &bytes) == -EINVAL);
+
+ assert_se(parse_size(" 4 M 11.5K", 1024, &bytes) == 0);
+ assert_se(bytes == 4*1024*1024 + 11 * 1024 + 512);
+
+ assert_se(parse_size("3B3.5G", 1024, &bytes) == -EINVAL);
+
+ assert_se(parse_size("3.5G3B", 1024, &bytes) == 0);
+ assert_se(bytes == 3ULL*1024*1024*1024 + 512*1024*1024 + 3);
+
+ assert_se(parse_size("3.5G 4B", 1024, &bytes) == 0);
+ assert_se(bytes == 3ULL*1024*1024*1024 + 512*1024*1024 + 4);
+
+ assert_se(parse_size("3B3G4T", 1024, &bytes) == -EINVAL);
+
+ assert_se(parse_size("4T3G3B", 1024, &bytes) == 0);
+ assert_se(bytes == (4ULL*1024 + 3)*1024*1024*1024 + 3);
+
+ assert_se(parse_size(" 4 T 3 G 3 B", 1024, &bytes) == 0);
+ assert_se(bytes == (4ULL*1024 + 3)*1024*1024*1024 + 3);
+
+ assert_se(parse_size("12P", 1024, &bytes) == 0);
+ assert_se(bytes == 12ULL * 1024*1024*1024*1024*1024);
+
+ assert_se(parse_size("12P12P", 1024, &bytes) == -EINVAL);
+
+ assert_se(parse_size("3E 2P", 1024, &bytes) == 0);
+ assert_se(bytes == (3 * 1024 + 2ULL) * 1024*1024*1024*1024*1024);
+
+ assert_se(parse_size("12X", 1024, &bytes) == -EINVAL);
+
+ assert_se(parse_size("12.5X", 1024, &bytes) == -EINVAL);
+
+ assert_se(parse_size("12.5e3", 1024, &bytes) == -EINVAL);
+
+ assert_se(parse_size("1024E", 1024, &bytes) == -ERANGE);
+ assert_se(parse_size("-1", 1024, &bytes) == -ERANGE);
+ assert_se(parse_size("-1024E", 1024, &bytes) == -ERANGE);
+
+ assert_se(parse_size("-1024P", 1024, &bytes) == -ERANGE);
+
+ assert_se(parse_size("-10B 20K", 1024, &bytes) == -ERANGE);
+}
+
+static void test_parse_range(void) {
+ unsigned lower, upper;
+
+ /* Successful cases */
+ assert_se(parse_range("111", &lower, &upper) == 0);
+ assert_se(lower == 111);
+ assert_se(upper == 111);
+
+ assert_se(parse_range("111-123", &lower, &upper) == 0);
+ assert_se(lower == 111);
+ assert_se(upper == 123);
+
+ assert_se(parse_range("123-111", &lower, &upper) == 0);
+ assert_se(lower == 123);
+ assert_se(upper == 111);
+
+ assert_se(parse_range("123-123", &lower, &upper) == 0);
+ assert_se(lower == 123);
+ assert_se(upper == 123);
+
+ assert_se(parse_range("0", &lower, &upper) == 0);
+ assert_se(lower == 0);
+ assert_se(upper == 0);
+
+ assert_se(parse_range("0-15", &lower, &upper) == 0);
+ assert_se(lower == 0);
+ assert_se(upper == 15);
+
+ assert_se(parse_range("15-0", &lower, &upper) == 0);
+ assert_se(lower == 15);
+ assert_se(upper == 0);
+
+ assert_se(parse_range("128-65535", &lower, &upper) == 0);
+ assert_se(lower == 128);
+ assert_se(upper == 65535);
+
+ assert_se(parse_range("1024-4294967295", &lower, &upper) == 0);
+ assert_se(lower == 1024);
+ assert_se(upper == 4294967295);
+
+ /* Leading whitespace is acceptable */
+ assert_se(parse_range(" 111", &lower, &upper) == 0);
+ assert_se(lower == 111);
+ assert_se(upper == 111);
+
+ assert_se(parse_range(" 111-123", &lower, &upper) == 0);
+ assert_se(lower == 111);
+ assert_se(upper == 123);
+
+ assert_se(parse_range("111- 123", &lower, &upper) == 0);
+ assert_se(lower == 111);
+ assert_se(upper == 123);
+
+ assert_se(parse_range("\t111-\t123", &lower, &upper) == 0);
+ assert_se(lower == 111);
+ assert_se(upper == 123);
+
+ assert_se(parse_range(" \t 111- \t 123", &lower, &upper) == 0);
+ assert_se(lower == 111);
+ assert_se(upper == 123);
+
+ /* Error cases, make sure they fail as expected */
+ lower = upper = 9999;
+ assert_se(parse_range("111garbage", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("garbage111", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("garbage", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111-123garbage", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111garbage-123", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ /* Empty string */
+ lower = upper = 9999;
+ assert_se(parse_range("", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ /* 111--123 will pass -123 to safe_atou which returns -ERANGE for negative */
+ assert_se(parse_range("111--123", &lower, &upper) == -ERANGE);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("-123", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("-111-123", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111-123-", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111.4-123", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111-123.4", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111,4-123", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111-123,4", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ /* Error on trailing dash */
+ assert_se(parse_range("111-", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111-123-", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111--", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111- ", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ /* Whitespace is not a separator */
+ assert_se(parse_range("111 123", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111\t123", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111 \t 123", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ /* Trailing whitespace is invalid (from safe_atou) */
+ assert_se(parse_range("111 ", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111-123 ", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111 -123", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111 -123 ", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111\t-123\t", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ assert_se(parse_range("111 \t -123 \t ", &lower, &upper) == -EINVAL);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+
+ /* Out of the "unsigned" range, this is 1<<64 */
+ assert_se(parse_range("0-18446744073709551616", &lower, &upper) == -ERANGE);
+ assert_se(lower == 9999);
+ assert_se(upper == 9999);
+}
+
+static void test_safe_atolli(void) {
+ int r;
+ long long l;
+
+ r = safe_atolli("12345", &l);
+ assert_se(r == 0);
+ assert_se(l == 12345);
+
+ r = safe_atolli(" 12345", &l);
+ assert_se(r == 0);
+ assert_se(l == 12345);
+
+ r = safe_atolli("-12345", &l);
+ assert_se(r == 0);
+ assert_se(l == -12345);
+
+ r = safe_atolli(" -12345", &l);
+ assert_se(r == 0);
+ assert_se(l == -12345);
+
+ r = safe_atolli("0x5", &l);
+ assert_se(r == 0);
+ assert_se(l == 5);
+
+ r = safe_atolli("0o6", &l);
+ assert_se(r == 0);
+ assert_se(l == 6);
+
+ r = safe_atolli("0B101", &l);
+ assert_se(r == 0);
+ assert_se(l == 5);
+
+ r = safe_atolli("12345678901234567890", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atolli("-12345678901234567890", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atolli("junk", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atolli("123x", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atolli("12.3", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atolli("", &l);
+ assert_se(r == -EINVAL);
+}
+
+static void test_safe_atou16(void) {
+ int r;
+ uint16_t l;
+
+ r = safe_atou16("12345", &l);
+ assert_se(r == 0);
+ assert_se(l == 12345);
+
+ r = safe_atou16(" 12345", &l);
+ assert_se(r == 0);
+ assert_se(l == 12345);
+
+ r = safe_atou16("123456", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atou16("-1", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atou16(" -1", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atou16("junk", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atou16("123x", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atou16("12.3", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atou16("", &l);
+ assert_se(r == -EINVAL);
+}
+
+static void test_safe_atoi16(void) {
+ int r;
+ int16_t l;
+
+ r = safe_atoi16("-12345", &l);
+ assert_se(r == 0);
+ assert_se(l == -12345);
+
+ r = safe_atoi16(" -12345", &l);
+ assert_se(r == 0);
+ assert_se(l == -12345);
+
+ r = safe_atoi16("32767", &l);
+ assert_se(r == 0);
+ assert_se(l == 32767);
+
+ r = safe_atoi16(" 32767", &l);
+ assert_se(r == 0);
+ assert_se(l == 32767);
+
+ r = safe_atoi16("0o11", &l);
+ assert_se(r == 0);
+ assert_se(l == 9);
+
+ r = safe_atoi16("0B110", &l);
+ assert_se(r == 0);
+ assert_se(l == 6);
+
+ r = safe_atoi16("36536", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atoi16("-32769", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atoi16("junk", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoi16("123x", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoi16("12.3", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoi16("", &l);
+ assert_se(r == -EINVAL);
+}
+
+static void test_safe_atoux16(void) {
+ int r;
+ uint16_t l;
+
+ r = safe_atoux16("1234", &l);
+ assert_se(r == 0);
+ assert_se(l == 0x1234);
+
+ r = safe_atoux16("abcd", &l);
+ assert_se(r == 0);
+ assert_se(l == 0xabcd);
+
+ r = safe_atoux16(" 1234", &l);
+ assert_se(r == 0);
+ assert_se(l == 0x1234);
+
+ r = safe_atoux16("12345", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atoux16("-1", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atoux16(" -1", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atoux16("0b1", &l);
+ assert_se(r == 0);
+ assert_se(l == 177);
+
+ r = safe_atoux16("0o70", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoux16("junk", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoux16("123x", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoux16("12.3", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoux16("", &l);
+ assert_se(r == -EINVAL);
+}
+
+static void test_safe_atou64(void) {
+ int r;
+ uint64_t l;
+
+ r = safe_atou64("12345", &l);
+ assert_se(r == 0);
+ assert_se(l == 12345);
+
+ r = safe_atou64(" 12345", &l);
+ assert_se(r == 0);
+ assert_se(l == 12345);
+
+ r = safe_atou64("0o11", &l);
+ assert_se(r == 0);
+ assert_se(l == 9);
+
+ r = safe_atou64("0b11", &l);
+ assert_se(r == 0);
+ assert_se(l == 3);
+
+ r = safe_atou64("18446744073709551617", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atou64("-1", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atou64(" -1", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atou64("junk", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atou64("123x", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atou64("12.3", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atou64("", &l);
+ assert_se(r == -EINVAL);
+}
+
+static void test_safe_atoi64(void) {
+ int r;
+ int64_t l;
+
+ r = safe_atoi64("-12345", &l);
+ assert_se(r == 0);
+ assert_se(l == -12345);
+
+ r = safe_atoi64(" -12345", &l);
+ assert_se(r == 0);
+ assert_se(l == -12345);
+
+ r = safe_atoi64("32767", &l);
+ assert_se(r == 0);
+ assert_se(l == 32767);
+
+ r = safe_atoi64(" 32767", &l);
+ assert_se(r == 0);
+ assert_se(l == 32767);
+
+ r = safe_atoi64(" 0o20", &l);
+ assert_se(r == 0);
+ assert_se(l == 16);
+
+ r = safe_atoi64(" 0b01010", &l);
+ assert_se(r == 0);
+ assert_se(l == 10);
+
+ r = safe_atoi64("9223372036854775813", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atoi64("-9223372036854775813", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atoi64("junk", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoi64("123x", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoi64("12.3", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoi64("", &l);
+ assert_se(r == -EINVAL);
+}
+
+static void test_safe_atoux64(void) {
+ int r;
+ uint64_t l;
+
+ r = safe_atoux64("12345", &l);
+ assert_se(r == 0);
+ assert_se(l == 0x12345);
+
+ r = safe_atoux64(" 12345", &l);
+ assert_se(r == 0);
+ assert_se(l == 0x12345);
+
+ r = safe_atoux64("0x12345", &l);
+ assert_se(r == 0);
+ assert_se(l == 0x12345);
+
+ r = safe_atoux64("0b11011", &l);
+ assert_se(r == 0);
+ assert_se(l == 11603985);
+
+ r = safe_atoux64("0o11011", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoux64("18446744073709551617", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atoux64("-1", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atoux64(" -1", &l);
+ assert_se(r == -ERANGE);
+
+ r = safe_atoux64("junk", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoux64("123x", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoux64("12.3", &l);
+ assert_se(r == -EINVAL);
+
+ r = safe_atoux64("", &l);
+ assert_se(r == -EINVAL);
+}
+
+static void test_safe_atod(void) {
+ int r;
+ double d;
+ char *e;
+
+ r = safe_atod("junk", &d);
+ assert_se(r == -EINVAL);
+
+ r = safe_atod("0.2244", &d);
+ assert_se(r == 0);
+ assert_se(fabs(d - 0.2244) < 0.000001);
+
+ r = safe_atod("0,5", &d);
+ assert_se(r == -EINVAL);
+
+ errno = 0;
+ strtod("0,5", &e);
+ assert_se(*e == ',');
+
+ r = safe_atod("", &d);
+ assert_se(r == -EINVAL);
+
+ /* Check if this really is locale independent */
+ if (setlocale(LC_NUMERIC, "de_DE.utf8")) {
+
+ r = safe_atod("0.2244", &d);
+ assert_se(r == 0);
+ assert_se(fabs(d - 0.2244) < 0.000001);
+
+ r = safe_atod("0,5", &d);
+ assert_se(r == -EINVAL);
+
+ errno = 0;
+ assert_se(fabs(strtod("0,5", &e) - 0.5) < 0.00001);
+
+ r = safe_atod("", &d);
+ assert_se(r == -EINVAL);
+ }
+
+ /* And check again, reset */
+ assert_se(setlocale(LC_NUMERIC, "C"));
+
+ r = safe_atod("0.2244", &d);
+ assert_se(r == 0);
+ assert_se(fabs(d - 0.2244) < 0.000001);
+
+ r = safe_atod("0,5", &d);
+ assert_se(r == -EINVAL);
+
+ errno = 0;
+ strtod("0,5", &e);
+ assert_se(*e == ',');
+
+ r = safe_atod("", &d);
+ assert_se(r == -EINVAL);
+}
+
+static void test_parse_percent(void) {
+ assert_se(parse_percent("") == -EINVAL);
+ assert_se(parse_percent("foo") == -EINVAL);
+ assert_se(parse_percent("0") == -EINVAL);
+ assert_se(parse_percent("50") == -EINVAL);
+ assert_se(parse_percent("100") == -EINVAL);
+ assert_se(parse_percent("-1") == -EINVAL);
+ assert_se(parse_percent("0%") == 0);
+ assert_se(parse_percent("55%") == 55);
+ assert_se(parse_percent("100%") == 100);
+ assert_se(parse_percent("-7%") == -ERANGE);
+ assert_se(parse_percent("107%") == -ERANGE);
+ assert_se(parse_percent("%") == -EINVAL);
+ assert_se(parse_percent("%%") == -EINVAL);
+ assert_se(parse_percent("%1") == -EINVAL);
+ assert_se(parse_percent("1%%") == -EINVAL);
+ assert_se(parse_percent("3.2%") == -EINVAL);
+}
+
+static void test_parse_percent_unbounded(void) {
+ assert_se(parse_percent_unbounded("101%") == 101);
+ assert_se(parse_percent_unbounded("400%") == 400);
+}
+
+static void test_parse_permille(void) {
+ assert_se(parse_permille("") == -EINVAL);
+ assert_se(parse_permille("foo") == -EINVAL);
+ assert_se(parse_permille("0") == -EINVAL);
+ assert_se(parse_permille("50") == -EINVAL);
+ assert_se(parse_permille("100") == -EINVAL);
+ assert_se(parse_permille("-1") == -EINVAL);
+
+ assert_se(parse_permille("0‰") == 0);
+ assert_se(parse_permille("555‰") == 555);
+ assert_se(parse_permille("1000‰") == 1000);
+ assert_se(parse_permille("-7‰") == -ERANGE);
+ assert_se(parse_permille("1007‰") == -ERANGE);
+ assert_se(parse_permille("‰") == -EINVAL);
+ assert_se(parse_permille("‰‰") == -EINVAL);
+ assert_se(parse_permille("‰1") == -EINVAL);
+ assert_se(parse_permille("1‰‰") == -EINVAL);
+ assert_se(parse_permille("3.2‰") == -EINVAL);
+
+ assert_se(parse_permille("0%") == 0);
+ assert_se(parse_permille("55%") == 550);
+ assert_se(parse_permille("55.5%") == 555);
+ assert_se(parse_permille("100%") == 1000);
+ assert_se(parse_permille("-7%") == -ERANGE);
+ assert_se(parse_permille("107%") == -ERANGE);
+ assert_se(parse_permille("%") == -EINVAL);
+ assert_se(parse_permille("%%") == -EINVAL);
+ assert_se(parse_permille("%1") == -EINVAL);
+ assert_se(parse_permille("1%%") == -EINVAL);
+ assert_se(parse_permille("3.21%") == -EINVAL);
+}
+
+static void test_parse_permille_unbounded(void) {
+ assert_se(parse_permille_unbounded("1001‰") == 1001);
+ assert_se(parse_permille_unbounded("4000‰") == 4000);
+ assert_se(parse_permille_unbounded("2147483647‰") == 2147483647);
+ assert_se(parse_permille_unbounded("2147483648‰") == -ERANGE);
+ assert_se(parse_permille_unbounded("4294967295‰") == -ERANGE);
+ assert_se(parse_permille_unbounded("4294967296‰") == -ERANGE);
+
+ assert_se(parse_permille_unbounded("101%") == 1010);
+ assert_se(parse_permille_unbounded("400%") == 4000);
+ assert_se(parse_permille_unbounded("214748364.7%") == 2147483647);
+ assert_se(parse_permille_unbounded("214748364.8%") == -ERANGE);
+ assert_se(parse_permille_unbounded("429496729.5%") == -ERANGE);
+ assert_se(parse_permille_unbounded("429496729.6%") == -ERANGE);
+}
+
+static void test_parse_nice(void) {
+ int n;
+
+ assert_se(parse_nice("0", &n) >= 0 && n == 0);
+ assert_se(parse_nice("+0", &n) >= 0 && n == 0);
+ assert_se(parse_nice("-1", &n) >= 0 && n == -1);
+ assert_se(parse_nice("-2", &n) >= 0 && n == -2);
+ assert_se(parse_nice("1", &n) >= 0 && n == 1);
+ assert_se(parse_nice("2", &n) >= 0 && n == 2);
+ assert_se(parse_nice("+1", &n) >= 0 && n == 1);
+ assert_se(parse_nice("+2", &n) >= 0 && n == 2);
+ assert_se(parse_nice("-20", &n) >= 0 && n == -20);
+ assert_se(parse_nice("19", &n) >= 0 && n == 19);
+ assert_se(parse_nice("+19", &n) >= 0 && n == 19);
+
+ assert_se(parse_nice("", &n) == -EINVAL);
+ assert_se(parse_nice("-", &n) == -EINVAL);
+ assert_se(parse_nice("+", &n) == -EINVAL);
+ assert_se(parse_nice("xx", &n) == -EINVAL);
+ assert_se(parse_nice("-50", &n) == -ERANGE);
+ assert_se(parse_nice("50", &n) == -ERANGE);
+ assert_se(parse_nice("+50", &n) == -ERANGE);
+ assert_se(parse_nice("-21", &n) == -ERANGE);
+ assert_se(parse_nice("20", &n) == -ERANGE);
+ assert_se(parse_nice("+20", &n) == -ERANGE);
+}
+
+static void test_parse_dev(void) {
+ dev_t dev;
+
+ assert_se(parse_dev("", &dev) == -EINVAL);
+ assert_se(parse_dev("junk", &dev) == -EINVAL);
+ assert_se(parse_dev("0", &dev) == -EINVAL);
+ assert_se(parse_dev("5", &dev) == -EINVAL);
+ assert_se(parse_dev("5:", &dev) == -EINVAL);
+ assert_se(parse_dev(":5", &dev) == -EINVAL);
+ assert_se(parse_dev("-1:-1", &dev) == -EINVAL);
+#if SIZEOF_DEV_T < 8
+ assert_se(parse_dev("4294967295:4294967295", &dev) == -EINVAL);
+#endif
+ assert_se(parse_dev("8:11", &dev) >= 0 && major(dev) == 8 && minor(dev) == 11);
+ assert_se(parse_dev("0:0", &dev) >= 0 && major(dev) == 0 && minor(dev) == 0);
+}
+
+static void test_parse_errno(void) {
+ assert_se(parse_errno("EILSEQ") == EILSEQ);
+ assert_se(parse_errno("EINVAL") == EINVAL);
+ assert_se(parse_errno("0") == 0);
+ assert_se(parse_errno("1") == 1);
+ assert_se(parse_errno("4095") == 4095);
+
+ assert_se(parse_errno("-1") == -ERANGE);
+ assert_se(parse_errno("-3") == -ERANGE);
+ assert_se(parse_errno("4096") == -ERANGE);
+
+ assert_se(parse_errno("") == -EINVAL);
+ assert_se(parse_errno("12.3") == -EINVAL);
+ assert_se(parse_errno("123junk") == -EINVAL);
+ assert_se(parse_errno("junk123") == -EINVAL);
+ assert_se(parse_errno("255EILSEQ") == -EINVAL);
+ assert_se(parse_errno("EINVAL12") == -EINVAL);
+ assert_se(parse_errno("-EINVAL") == -EINVAL);
+ assert_se(parse_errno("EINVALaaa") == -EINVAL);
+}
+
+static void test_parse_syscall_and_errno(void) {
+#if HAVE_SECCOMP
+ _cleanup_free_ char *n = NULL;
+ int e;
+
+ assert_se(parse_syscall_and_errno("uname:EILSEQ", &n, &e) >= 0);
+ assert_se(streq(n, "uname"));
+ assert_se(e == errno_from_name("EILSEQ") && e >= 0);
+ n = mfree(n);
+
+ assert_se(parse_syscall_and_errno("uname:EINVAL", &n, &e) >= 0);
+ assert_se(streq(n, "uname"));
+ assert_se(e == errno_from_name("EINVAL") && e >= 0);
+ n = mfree(n);
+
+ assert_se(parse_syscall_and_errno("@sync:4095", &n, &e) >= 0);
+ assert_se(streq(n, "@sync"));
+ assert_se(e == 4095);
+ n = mfree(n);
+
+ /* If errno is omitted, then e is set to -1 */
+ assert_se(parse_syscall_and_errno("mount", &n, &e) >= 0);
+ assert_se(streq(n, "mount"));
+ assert_se(e == -1);
+ n = mfree(n);
+
+ /* parse_syscall_and_errno() does not check the syscall name is valid or not. */
+ assert_se(parse_syscall_and_errno("hoge:255", &n, &e) >= 0);
+ assert_se(streq(n, "hoge"));
+ assert_se(e == 255);
+ n = mfree(n);
+
+ assert_se(parse_syscall_and_errno("hoge:kill", &n, &e) >= 0);
+ assert_se(streq(n, "hoge"));
+ assert_se(e == SECCOMP_ERROR_NUMBER_KILL);
+ n = mfree(n);
+
+ /* The function checks the syscall name is empty or not. */
+ assert_se(parse_syscall_and_errno("", &n, &e) == -EINVAL);
+ assert_se(parse_syscall_and_errno(":255", &n, &e) == -EINVAL);
+
+ /* errno must be a valid errno name or number between 0 and ERRNO_MAX == 4095, or "kill" */
+ assert_se(parse_syscall_and_errno("hoge:4096", &n, &e) == -ERANGE);
+ assert_se(parse_syscall_and_errno("hoge:-3", &n, &e) == -ERANGE);
+ assert_se(parse_syscall_and_errno("hoge:12.3", &n, &e) == -EINVAL);
+ assert_se(parse_syscall_and_errno("hoge:123junk", &n, &e) == -EINVAL);
+ assert_se(parse_syscall_and_errno("hoge:junk123", &n, &e) == -EINVAL);
+ assert_se(parse_syscall_and_errno("hoge:255:EILSEQ", &n, &e) == -EINVAL);
+ assert_se(parse_syscall_and_errno("hoge:-EINVAL", &n, &e) == -EINVAL);
+ assert_se(parse_syscall_and_errno("hoge:EINVALaaa", &n, &e) == -EINVAL);
+ assert_se(parse_syscall_and_errno("hoge:", &n, &e) == -EINVAL);
+#endif
+}
+
+static void test_parse_mtu(void) {
+ uint32_t mtu = 0;
+
+ assert_se(parse_mtu(AF_UNSPEC, "1500", &mtu) >= 0 && mtu == 1500);
+ assert_se(parse_mtu(AF_UNSPEC, "1400", &mtu) >= 0 && mtu == 1400);
+ assert_se(parse_mtu(AF_UNSPEC, "65535", &mtu) >= 0 && mtu == 65535);
+ assert_se(parse_mtu(AF_UNSPEC, "65536", &mtu) >= 0 && mtu == 65536);
+ assert_se(parse_mtu(AF_UNSPEC, "4294967295", &mtu) >= 0 && mtu == 4294967295);
+ assert_se(parse_mtu(AF_UNSPEC, "500", &mtu) >= 0 && mtu == 500);
+ assert_se(parse_mtu(AF_UNSPEC, "1280", &mtu) >= 0 && mtu == 1280);
+ assert_se(parse_mtu(AF_INET6, "1280", &mtu) >= 0 && mtu == 1280);
+ assert_se(parse_mtu(AF_INET6, "1279", &mtu) == -ERANGE);
+ assert_se(parse_mtu(AF_UNSPEC, "4294967296", &mtu) == -ERANGE);
+ assert_se(parse_mtu(AF_INET6, "4294967296", &mtu) == -ERANGE);
+ assert_se(parse_mtu(AF_INET6, "68", &mtu) == -ERANGE);
+ assert_se(parse_mtu(AF_UNSPEC, "68", &mtu) >= 0 && mtu == 68);
+ assert_se(parse_mtu(AF_UNSPEC, "67", &mtu) == -ERANGE);
+ assert_se(parse_mtu(AF_UNSPEC, "0", &mtu) == -ERANGE);
+ assert_se(parse_mtu(AF_UNSPEC, "", &mtu) == -EINVAL);
+}
+
+static void test_parse_loadavg_fixed_point(void) {
+ loadavg_t fp;
+
+ assert_se(parse_loadavg_fixed_point("1.23", &fp) == 0);
+ assert_se(LOAD_INT(fp) == 1);
+ assert_se(LOAD_FRAC(fp) == 23);
+
+ assert_se(parse_loadavg_fixed_point("1.80", &fp) == 0);
+ assert_se(LOAD_INT(fp) == 1);
+ assert_se(LOAD_FRAC(fp) == 80);
+
+ assert_se(parse_loadavg_fixed_point("0.07", &fp) == 0);
+ assert_se(LOAD_INT(fp) == 0);
+ assert_se(LOAD_FRAC(fp) == 7);
+
+ assert_se(parse_loadavg_fixed_point("0.00", &fp) == 0);
+ assert_se(LOAD_INT(fp) == 0);
+ assert_se(LOAD_FRAC(fp) == 0);
+
+ assert_se(parse_loadavg_fixed_point("4096.57", &fp) == 0);
+ assert_se(LOAD_INT(fp) == 4096);
+ assert_se(LOAD_FRAC(fp) == 57);
+
+ /* Caps out at 2 digit fracs */
+ assert_se(parse_loadavg_fixed_point("1.100", &fp) == -ERANGE);
+
+ assert_se(parse_loadavg_fixed_point("4096.4096", &fp) == -ERANGE);
+ assert_se(parse_loadavg_fixed_point("-4000.5", &fp) == -ERANGE);
+ assert_se(parse_loadavg_fixed_point("18446744073709551615.5", &fp) == -ERANGE);
+ assert_se(parse_loadavg_fixed_point("foobar", &fp) == -EINVAL);
+ assert_se(parse_loadavg_fixed_point("3333", &fp) == -EINVAL);
+ assert_se(parse_loadavg_fixed_point("1.2.3", &fp) == -EINVAL);
+ assert_se(parse_loadavg_fixed_point(".", &fp) == -EINVAL);
+ assert_se(parse_loadavg_fixed_point("", &fp) == -EINVAL);
+}
+
+int main(int argc, char *argv[]) {
+ log_parse_environment();
+ log_open();
+
+ test_parse_boolean();
+ test_parse_pid();
+ test_parse_mode();
+ test_parse_size();
+ test_parse_range();
+ test_safe_atolli();
+ test_safe_atou16();
+ test_safe_atoi16();
+ test_safe_atoux16();
+ test_safe_atou64();
+ test_safe_atoi64();
+ test_safe_atoux64();
+ test_safe_atod();
+ test_parse_percent();
+ test_parse_percent_unbounded();
+ test_parse_permille();
+ test_parse_permille_unbounded();
+ test_parse_nice();
+ test_parse_dev();
+ test_parse_errno();
+ test_parse_syscall_and_errno();
+ test_parse_mtu();
+ test_parse_loadavg_fixed_point();
+
+ return 0;
+}
diff --git a/src/test/test-path-lookup.c b/src/test/test-path-lookup.c
new file mode 100644
index 0000000..da146aa
--- /dev/null
+++ b/src/test/test-path-lookup.c
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdlib.h>
+#include <sys/stat.h>
+
+#include "log.h"
+#include "path-lookup.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+
+static void test_paths(UnitFileScope scope) {
+ char template[] = "/tmp/test-path-lookup.XXXXXXX";
+
+ _cleanup_(lookup_paths_free) LookupPaths lp_without_env = {};
+ _cleanup_(lookup_paths_free) LookupPaths lp_with_env = {};
+ char *systemd_unit_path;
+
+ assert_se(mkdtemp(template));
+
+ assert_se(unsetenv("SYSTEMD_UNIT_PATH") == 0);
+ assert_se(lookup_paths_init(&lp_without_env, scope, 0, NULL) >= 0);
+ assert_se(!strv_isempty(lp_without_env.search_path));
+ lookup_paths_log(&lp_without_env);
+
+ systemd_unit_path = strjoina(template, "/systemd-unit-path");
+ assert_se(setenv("SYSTEMD_UNIT_PATH", systemd_unit_path, 1) == 0);
+ assert_se(lookup_paths_init(&lp_with_env, scope, 0, NULL) == 0);
+ assert_se(strv_length(lp_with_env.search_path) == 1);
+ assert_se(streq(lp_with_env.search_path[0], systemd_unit_path));
+ lookup_paths_log(&lp_with_env);
+ assert_se(strv_equal(lp_with_env.search_path, STRV_MAKE(systemd_unit_path)));
+
+ assert_se(rm_rf(template, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+}
+
+static void test_user_and_global_paths(void) {
+ _cleanup_(lookup_paths_free) LookupPaths lp_global = {}, lp_user = {};
+ char **u, **g, **p;
+ unsigned k = 0;
+
+ assert_se(unsetenv("SYSTEMD_UNIT_PATH") == 0);
+ assert_se(unsetenv("XDG_DATA_DIRS") == 0);
+ assert_se(unsetenv("XDG_CONFIG_DIRS") == 0);
+
+ assert_se(lookup_paths_init(&lp_global, UNIT_FILE_GLOBAL, 0, NULL) == 0);
+ assert_se(lookup_paths_init(&lp_user, UNIT_FILE_USER, 0, NULL) == 0);
+ g = lp_global.search_path;
+ u = lp_user.search_path;
+
+ /* Go over all entries in global search path, and verify
+ * that they also exist in the user search path. Skip any
+ * entries in user search path which don't exist in the global
+ * one, but not vice versa. */
+ log_info("/* %s */", __func__);
+ STRV_FOREACH(p, g) {
+ while (u[k] && !streq(*p, u[k])) {
+ log_info("+ %s", u[k]);
+ k++;
+ }
+ log_info(" %s", *p);
+ assert(u[k]); /* If NULL, we didn't find a matching entry */
+ k++;
+ }
+ STRV_FOREACH(p, u + k)
+ log_info("+ %s", *p);
+}
+
+static void test_generator_binary_paths(UnitFileScope scope) {
+ char template[] = "/tmp/test-path-lookup.XXXXXXX";
+
+ _cleanup_strv_free_ char **gp_without_env = NULL;
+ _cleanup_strv_free_ char **env_gp_without_env = NULL;
+ _cleanup_strv_free_ char **gp_with_env = NULL;
+ _cleanup_strv_free_ char **env_gp_with_env = NULL;
+ char *systemd_generator_path = NULL;
+ char *systemd_env_generator_path = NULL;
+ char **dir;
+
+ assert_se(mkdtemp(template));
+
+ assert_se(unsetenv("SYSTEMD_GENERATOR_PATH") == 0);
+ assert_se(unsetenv("SYSTEMD_ENVIRONMENT_GENERATOR_PATH") == 0);
+
+ gp_without_env = generator_binary_paths(scope);
+ env_gp_without_env = env_generator_binary_paths(scope == UNIT_FILE_SYSTEM ? true : false);
+
+ log_info("Generators dirs (%s):", scope == UNIT_FILE_SYSTEM ? "system" : "user");
+ STRV_FOREACH(dir, gp_without_env)
+ log_info(" %s", *dir);
+
+ log_info("Environment generators dirs (%s):", scope == UNIT_FILE_SYSTEM ? "system" : "user");
+ STRV_FOREACH(dir, env_gp_without_env)
+ log_info(" %s", *dir);
+
+ assert_se(!strv_isempty(gp_without_env));
+ assert_se(!strv_isempty(env_gp_without_env));
+
+ systemd_generator_path = strjoina(template, "/systemd-generator-path");
+ systemd_env_generator_path = strjoina(template, "/systemd-environment-generator-path");
+ assert_se(setenv("SYSTEMD_GENERATOR_PATH", systemd_generator_path, 1) == 0);
+ assert_se(setenv("SYSTEMD_ENVIRONMENT_GENERATOR_PATH", systemd_env_generator_path, 1) == 0);
+
+ gp_with_env = generator_binary_paths(scope);
+ env_gp_with_env = env_generator_binary_paths(scope == UNIT_FILE_SYSTEM ? true : false);
+
+ log_info("Generators dirs (%s):", scope == UNIT_FILE_SYSTEM ? "system" : "user");
+ STRV_FOREACH(dir, gp_with_env)
+ log_info(" %s", *dir);
+
+ log_info("Environment generators dirs (%s):", scope == UNIT_FILE_SYSTEM ? "system" : "user");
+ STRV_FOREACH(dir, env_gp_with_env)
+ log_info(" %s", *dir);
+
+ assert_se(strv_equal(gp_with_env, STRV_MAKE(systemd_generator_path)));
+ assert_se(strv_equal(env_gp_with_env, STRV_MAKE(systemd_env_generator_path)));
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_paths(UNIT_FILE_SYSTEM);
+ test_paths(UNIT_FILE_USER);
+ test_paths(UNIT_FILE_GLOBAL);
+
+ test_user_and_global_paths();
+
+ test_generator_binary_paths(UNIT_FILE_SYSTEM);
+ test_generator_binary_paths(UNIT_FILE_USER);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-path-util.c b/src/test/test-path-util.c
new file mode 100644
index 0000000..cb91a1a
--- /dev/null
+++ b/src/test/test-path-util.c
@@ -0,0 +1,733 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "util.h"
+
+static void test_print_paths(void) {
+ log_info("DEFAULT_PATH=%s", DEFAULT_PATH);
+ log_info("DEFAULT_USER_PATH=%s", DEFAULT_USER_PATH);
+}
+
+#define test_path_compare(a, b, result) { \
+ assert_se(path_compare(a, b) == result); \
+ assert_se(path_compare(b, a) == -result); \
+ assert_se(path_equal(a, b) == !result); \
+ assert_se(path_equal(b, a) == !result); \
+ }
+
+static void test_path_simplify(const char *in, const char *out, const char *out_dot) {
+ char *p;
+
+ log_info("/* %s */", __func__);
+
+ p = strdupa(in);
+ assert_se(streq(path_simplify(p, false), out));
+
+ p = strdupa(in);
+ assert_se(streq(path_simplify(p, true), out_dot));
+}
+
+static void test_path(void) {
+ log_info("/* %s */", __func__);
+
+ test_path_compare("/goo", "/goo", 0);
+ test_path_compare("/goo", "/goo", 0);
+ test_path_compare("//goo", "/goo", 0);
+ test_path_compare("//goo/////", "/goo", 0);
+ test_path_compare("goo/////", "goo", 0);
+
+ test_path_compare("/goo/boo", "/goo//boo", 0);
+ test_path_compare("//goo/boo", "/goo/boo//", 0);
+
+ test_path_compare("/", "///", 0);
+
+ test_path_compare("/x", "x/", 1);
+ test_path_compare("x/", "/", -1);
+
+ test_path_compare("/x/./y", "x/y", 1);
+ test_path_compare("x/.y", "x/y", -1);
+
+ test_path_compare("foo", "/foo", -1);
+ test_path_compare("/foo", "/foo/bar", -1);
+ test_path_compare("/foo/aaa", "/foo/b", -1);
+ test_path_compare("/foo/aaa", "/foo/b/a", -1);
+ test_path_compare("/foo/a", "/foo/aaa", -1);
+ test_path_compare("/foo/a/b", "/foo/aaa", -1);
+
+ assert_se(path_is_absolute("/"));
+ assert_se(!path_is_absolute("./"));
+
+ assert_se(is_path("/dir"));
+ assert_se(is_path("a/b"));
+ assert_se(!is_path("."));
+
+ assert_se(streq(basename("./aa/bb/../file.da."), "file.da."));
+ assert_se(streq(basename("/aa///.file"), ".file"));
+ assert_se(streq(basename("/aa///file..."), "file..."));
+ assert_se(streq(basename("file.../"), ""));
+
+ test_path_simplify("aaa/bbb////ccc", "aaa/bbb/ccc", "aaa/bbb/ccc");
+ test_path_simplify("//aaa/.////ccc", "/aaa/./ccc", "/aaa/ccc");
+ test_path_simplify("///", "/", "/");
+ test_path_simplify("///.//", "/.", "/");
+ test_path_simplify("///.//.///", "/./.", "/");
+ test_path_simplify("////.././///../.", "/.././../.", "/../..");
+ test_path_simplify(".", ".", ".");
+ test_path_simplify("./", ".", ".");
+ test_path_simplify(".///.//./.", "./././.", ".");
+ test_path_simplify(".///.//././/", "./././.", ".");
+ test_path_simplify("//./aaa///.//./.bbb/..///c.//d.dd///..eeee/.",
+ "/./aaa/././.bbb/../c./d.dd/..eeee/.",
+ "/aaa/.bbb/../c./d.dd/..eeee");
+ test_path_simplify("//./aaa///.//./.bbb/..///c.//d.dd///..eeee/..",
+ "/./aaa/././.bbb/../c./d.dd/..eeee/..",
+ "/aaa/.bbb/../c./d.dd/..eeee/..");
+ test_path_simplify(".//./aaa///.//./.bbb/..///c.//d.dd///..eeee/..",
+ "././aaa/././.bbb/../c./d.dd/..eeee/..",
+ "aaa/.bbb/../c./d.dd/..eeee/..");
+ test_path_simplify("..//./aaa///.//./.bbb/..///c.//d.dd///..eeee/..",
+ ".././aaa/././.bbb/../c./d.dd/..eeee/..",
+ "../aaa/.bbb/../c./d.dd/..eeee/..");
+
+ assert_se(PATH_IN_SET("/bin", "/", "/bin", "/foo"));
+ assert_se(PATH_IN_SET("/bin", "/bin"));
+ assert_se(PATH_IN_SET("/bin", "/foo/bar", "/bin"));
+ assert_se(PATH_IN_SET("/", "/", "/", "/foo/bar"));
+ assert_se(!PATH_IN_SET("/", "/abc", "/def"));
+
+ assert_se(path_equal_ptr(NULL, NULL));
+ assert_se(path_equal_ptr("/a", "/a"));
+ assert_se(!path_equal_ptr("/a", "/b"));
+ assert_se(!path_equal_ptr("/a", NULL));
+ assert_se(!path_equal_ptr(NULL, "/a"));
+}
+
+static void test_path_equal_root(void) {
+ /* Nail down the details of how path_equal("/", ...) works. */
+
+ log_info("/* %s */", __func__);
+
+ assert_se(path_equal("/", "/"));
+ assert_se(path_equal("/", "//"));
+
+ assert_se(!path_equal("/", "/./"));
+ assert_se(!path_equal("/", "/../"));
+
+ assert_se(!path_equal("/", "/.../"));
+
+ /* Make sure that files_same works as expected. */
+
+ assert_se(files_same("/", "/", 0) > 0);
+ assert_se(files_same("/", "/", AT_SYMLINK_NOFOLLOW) > 0);
+ assert_se(files_same("/", "//", 0) > 0);
+ assert_se(files_same("/", "//", AT_SYMLINK_NOFOLLOW) > 0);
+
+ assert_se(files_same("/", "/./", 0) > 0);
+ assert_se(files_same("/", "/./", AT_SYMLINK_NOFOLLOW) > 0);
+ assert_se(files_same("/", "/../", 0) > 0);
+ assert_se(files_same("/", "/../", AT_SYMLINK_NOFOLLOW) > 0);
+
+ assert_se(files_same("/", "/.../", 0) == -ENOENT);
+ assert_se(files_same("/", "/.../", AT_SYMLINK_NOFOLLOW) == -ENOENT);
+
+ /* The same for path_equal_or_files_same. */
+
+ assert_se(path_equal_or_files_same("/", "/", 0));
+ assert_se(path_equal_or_files_same("/", "/", AT_SYMLINK_NOFOLLOW));
+ assert_se(path_equal_or_files_same("/", "//", 0));
+ assert_se(path_equal_or_files_same("/", "//", AT_SYMLINK_NOFOLLOW));
+
+ assert_se(path_equal_or_files_same("/", "/./", 0));
+ assert_se(path_equal_or_files_same("/", "/./", AT_SYMLINK_NOFOLLOW));
+ assert_se(path_equal_or_files_same("/", "/../", 0));
+ assert_se(path_equal_or_files_same("/", "/../", AT_SYMLINK_NOFOLLOW));
+
+ assert_se(!path_equal_or_files_same("/", "/.../", 0));
+ assert_se(!path_equal_or_files_same("/", "/.../", AT_SYMLINK_NOFOLLOW));
+}
+
+static void test_find_executable_full(void) {
+ char *p;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(find_executable_full("sh", true, &p) == 0);
+ puts(p);
+ assert_se(streq(basename(p), "sh"));
+ free(p);
+
+ assert_se(find_executable_full("sh", false, &p) == 0);
+ puts(p);
+ assert_se(streq(basename(p), "sh"));
+ free(p);
+
+ _cleanup_free_ char *oldpath = NULL;
+ p = getenv("PATH");
+ if (p)
+ assert_se(oldpath = strdup(p));
+
+ assert_se(unsetenv("PATH") == 0);
+
+ assert_se(find_executable_full("sh", true, &p) == 0);
+ puts(p);
+ assert_se(streq(basename(p), "sh"));
+ free(p);
+
+ assert_se(find_executable_full("sh", false, &p) == 0);
+ puts(p);
+ assert_se(streq(basename(p), "sh"));
+ free(p);
+
+ if (oldpath)
+ assert_se(setenv("PATH", oldpath, true) >= 0);
+}
+
+static void test_find_executable(const char *self) {
+ char *p;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(find_executable("/bin/sh", &p) == 0);
+ puts(p);
+ assert_se(path_equal(p, "/bin/sh"));
+ free(p);
+
+ assert_se(find_executable(self, &p) == 0);
+ puts(p);
+ assert_se(endswith(p, "/test-path-util"));
+ assert_se(path_is_absolute(p));
+ free(p);
+
+ assert_se(find_executable("sh", &p) == 0);
+ puts(p);
+ assert_se(endswith(p, "/sh"));
+ assert_se(path_is_absolute(p));
+ free(p);
+
+ assert_se(find_executable("/bin/touch", &p) == 0);
+ assert_se(streq(p, "/bin/touch"));
+ free(p);
+
+ assert_se(find_executable("touch", &p) == 0);
+ assert_se(path_is_absolute(p));
+ assert_se(streq(basename(p), "touch"));
+ free(p);
+
+ assert_se(find_executable("xxxx-xxxx", &p) == -ENOENT);
+ assert_se(find_executable("/some/dir/xxxx-xxxx", &p) == -ENOENT);
+ assert_se(find_executable("/proc/filesystems", &p) == -EACCES);
+}
+
+static void test_prefixes(void) {
+ static const char* const values[] = {
+ "/a/b/c/d",
+ "/a/b/c",
+ "/a/b",
+ "/a",
+ "",
+ NULL
+ };
+ unsigned i;
+ char s[PATH_MAX];
+ bool b;
+
+ log_info("/* %s */", __func__);
+
+ i = 0;
+ PATH_FOREACH_PREFIX_MORE(s, "/a/b/c/d") {
+ log_error("---%s---", s);
+ assert_se(streq(s, values[i++]));
+ }
+ assert_se(values[i] == NULL);
+
+ i = 1;
+ PATH_FOREACH_PREFIX(s, "/a/b/c/d") {
+ log_error("---%s---", s);
+ assert_se(streq(s, values[i++]));
+ }
+ assert_se(values[i] == NULL);
+
+ i = 0;
+ PATH_FOREACH_PREFIX_MORE(s, "////a////b////c///d///////")
+ assert_se(streq(s, values[i++]));
+ assert_se(values[i] == NULL);
+
+ i = 1;
+ PATH_FOREACH_PREFIX(s, "////a////b////c///d///////")
+ assert_se(streq(s, values[i++]));
+ assert_se(values[i] == NULL);
+
+ PATH_FOREACH_PREFIX(s, "////")
+ assert_not_reached("Wut?");
+
+ b = false;
+ PATH_FOREACH_PREFIX_MORE(s, "////") {
+ assert_se(!b);
+ assert_se(streq(s, ""));
+ b = true;
+ }
+ assert_se(b);
+
+ PATH_FOREACH_PREFIX(s, "")
+ assert_not_reached("wut?");
+
+ b = false;
+ PATH_FOREACH_PREFIX_MORE(s, "") {
+ assert_se(!b);
+ assert_se(streq(s, ""));
+ b = true;
+ }
+}
+
+static void test_path_join(void) {
+ log_info("/* %s */", __func__);
+
+#define test_join(expected, ...) { \
+ _cleanup_free_ char *z = NULL; \
+ z = path_join(__VA_ARGS__); \
+ log_debug("got \"%s\", expected \"%s\"", z, expected); \
+ assert_se(streq(z, expected)); \
+ }
+
+ test_join("/root/a/b/c", "/root", "/a/b", "/c");
+ test_join("/root/a/b/c", "/root", "a/b", "c");
+ test_join("/root/a/b/c", "/root", "/a/b", "c");
+ test_join("/root/c", "/root", "/", "c");
+ test_join("/root/", "/root", "/", NULL);
+
+ test_join("/a/b/c", "", "/a/b", "/c");
+ test_join("a/b/c", "", "a/b", "c");
+ test_join("/a/b/c", "", "/a/b", "c");
+ test_join("/c", "", "/", "c");
+ test_join("/", "", "/", NULL);
+
+ test_join("/a/b/c", NULL, "/a/b", "/c");
+ test_join("a/b/c", NULL, "a/b", "c");
+ test_join("/a/b/c", NULL, "/a/b", "c");
+ test_join("/c", NULL, "/", "c");
+ test_join("/", NULL, "/", NULL);
+
+ test_join("", "", NULL);
+ test_join("", NULL, "");
+ test_join("", NULL, NULL);
+
+ test_join("foo/bar", "foo", "bar");
+ test_join("foo/bar", "", "foo", "bar");
+ test_join("foo/bar", NULL, "foo", NULL, "bar");
+ test_join("foo/bar", "", "foo", "", "bar", "");
+ test_join("foo/bar", "", "", "", "", "foo", "", "", "", "bar", "", "", "");
+
+ test_join("//foo///bar//", "", "/", "", "/foo/", "", "/", "", "/bar/", "", "/", "");
+ test_join("/foo/bar/", "/", "foo", "/", "bar", "/");
+ test_join("foo/bar/baz", "foo", "bar", "baz");
+ test_join("foo/bar/baz", "foo/", "bar", "/baz");
+ test_join("foo//bar//baz", "foo/", "/bar/", "/baz");
+ test_join("//foo////bar////baz//", "//foo/", "///bar/", "///baz//");
+}
+
+static void test_fsck_exists(void) {
+ log_info("/* %s */", __func__);
+
+ /* Ensure we use a sane default for PATH. */
+ assert_se(unsetenv("PATH") == 0);
+
+ /* fsck.minix is provided by util-linux and will probably exist. */
+ assert_se(fsck_exists("minix") == 1);
+
+ assert_se(fsck_exists("AbCdE") == 0);
+ assert_se(fsck_exists("/../bin/") == 0);
+}
+
+static void test_make_relative(void) {
+ char *result;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(path_make_relative("some/relative/path", "/some/path", &result) < 0);
+ assert_se(path_make_relative("/some/path", "some/relative/path", &result) < 0);
+ assert_se(path_make_relative("/some/dotdot/../path", "/some/path", &result) < 0);
+
+#define test(from_dir, to_path, expected) { \
+ _cleanup_free_ char *z = NULL; \
+ path_make_relative(from_dir, to_path, &z); \
+ assert_se(streq(z, expected)); \
+ }
+
+ test("/", "/", ".");
+ test("/", "/some/path", "some/path");
+ test("/some/path", "/some/path", ".");
+ test("/some/path", "/some/path/in/subdir", "in/subdir");
+ test("/some/path", "/", "../..");
+ test("/some/path", "/some/other/path", "../other/path");
+ test("/some/path/./dot", "/some/further/path", "../../further/path");
+ test("//extra.//.//./.slashes//./won't////fo.ol///anybody//", "/././/extra././/.slashes////ar.e/.just/././.fine///", "../../../ar.e/.just/.fine");
+}
+
+static void test_strv_resolve(void) {
+ char tmp_dir[] = "/tmp/test-path-util-XXXXXX";
+ _cleanup_strv_free_ char **search_dirs = NULL;
+ _cleanup_strv_free_ char **absolute_dirs = NULL;
+ char **d;
+
+ assert_se(mkdtemp(tmp_dir) != NULL);
+
+ search_dirs = strv_new("/dir1", "/dir2", "/dir3");
+ assert_se(search_dirs);
+ STRV_FOREACH(d, search_dirs) {
+ char *p = path_join(tmp_dir, *d);
+ assert_se(p);
+ assert_se(strv_push(&absolute_dirs, p) == 0);
+ }
+
+ assert_se(mkdir(absolute_dirs[0], 0700) == 0);
+ assert_se(mkdir(absolute_dirs[1], 0700) == 0);
+ assert_se(symlink("dir2", absolute_dirs[2]) == 0);
+
+ path_strv_resolve(search_dirs, tmp_dir);
+ assert_se(streq(search_dirs[0], "/dir1"));
+ assert_se(streq(search_dirs[1], "/dir2"));
+ assert_se(streq(search_dirs[2], "/dir2"));
+
+ assert_se(rm_rf(tmp_dir, REMOVE_ROOT|REMOVE_PHYSICAL) == 0);
+}
+
+static void test_path_startswith(void) {
+ const char *p;
+
+ log_info("/* %s */", __func__);
+
+ p = path_startswith("/foo/bar/barfoo/", "/foo");
+ assert_se(streq_ptr(p, "bar/barfoo/"));
+
+ p = path_startswith("/foo/bar/barfoo/", "/foo/");
+ assert_se(streq_ptr(p, "bar/barfoo/"));
+
+ p = path_startswith("/foo/bar/barfoo/", "/");
+ assert_se(streq_ptr(p, "foo/bar/barfoo/"));
+
+ p = path_startswith("/foo/bar/barfoo/", "////");
+ assert_se(streq_ptr(p, "foo/bar/barfoo/"));
+
+ p = path_startswith("/foo/bar/barfoo/", "/foo//bar/////barfoo///");
+ assert_se(streq_ptr(p, ""));
+
+ p = path_startswith("/foo/bar/barfoo/", "/foo/bar/barfoo////");
+ assert_se(streq_ptr(p, ""));
+
+ p = path_startswith("/foo/bar/barfoo/", "/foo/bar///barfoo/");
+ assert_se(streq_ptr(p, ""));
+
+ p = path_startswith("/foo/bar/barfoo/", "/foo////bar/barfoo/");
+ assert_se(streq_ptr(p, ""));
+
+ p = path_startswith("/foo/bar/barfoo/", "////foo/bar/barfoo/");
+ assert_se(streq_ptr(p, ""));
+
+ p = path_startswith("/foo/bar/barfoo/", "/foo/bar/barfoo");
+ assert_se(streq_ptr(p, ""));
+
+ assert_se(!path_startswith("/foo/bar/barfoo/", "/foo/bar/barfooa/"));
+ assert_se(!path_startswith("/foo/bar/barfoo/", "/foo/bar/barfooa"));
+ assert_se(!path_startswith("/foo/bar/barfoo/", ""));
+ assert_se(!path_startswith("/foo/bar/barfoo/", "/bar/foo"));
+ assert_se(!path_startswith("/foo/bar/barfoo/", "/f/b/b/"));
+}
+
+static void test_prefix_root_one(const char *r, const char *p, const char *expected) {
+ _cleanup_free_ char *s = NULL;
+ const char *t;
+
+ assert_se(s = path_join(r, p));
+ assert_se(path_equal_ptr(s, expected));
+
+ t = prefix_roota(r, p);
+ assert_se(t);
+ assert_se(path_equal_ptr(t, expected));
+}
+
+static void test_prefix_root(void) {
+ log_info("/* %s */", __func__);
+
+ test_prefix_root_one("/", "/foo", "/foo");
+ test_prefix_root_one(NULL, "/foo", "/foo");
+ test_prefix_root_one("", "/foo", "/foo");
+ test_prefix_root_one("///", "/foo", "/foo");
+ test_prefix_root_one("/", "////foo", "/foo");
+ test_prefix_root_one(NULL, "////foo", "/foo");
+ test_prefix_root_one("/", "foo", "/foo");
+ test_prefix_root_one("", "foo", "foo");
+ test_prefix_root_one(NULL, "foo", "foo");
+
+ test_prefix_root_one("/foo", "/bar", "/foo/bar");
+ test_prefix_root_one("/foo", "bar", "/foo/bar");
+ test_prefix_root_one("foo", "bar", "foo/bar");
+ test_prefix_root_one("/foo/", "/bar", "/foo/bar");
+ test_prefix_root_one("/foo/", "//bar", "/foo/bar");
+ test_prefix_root_one("/foo///", "//bar", "/foo/bar");
+}
+
+static void test_file_in_same_dir(void) {
+ char *t;
+
+ log_info("/* %s */", __func__);
+
+ t = file_in_same_dir("/", "a");
+ assert_se(streq(t, "/a"));
+ free(t);
+
+ t = file_in_same_dir("/", "/a");
+ assert_se(streq(t, "/a"));
+ free(t);
+
+ t = file_in_same_dir("", "a");
+ assert_se(streq(t, "a"));
+ free(t);
+
+ t = file_in_same_dir("a/", "a");
+ assert_se(streq(t, "a/a"));
+ free(t);
+
+ t = file_in_same_dir("bar/foo", "bar");
+ assert_se(streq(t, "bar/bar"));
+ free(t);
+}
+
+static void test_last_path_component(void) {
+ assert_se(last_path_component(NULL) == NULL);
+ assert_se(streq(last_path_component("a/b/c"), "c"));
+ assert_se(streq(last_path_component("a/b/c/"), "c/"));
+ assert_se(streq(last_path_component("/"), "/"));
+ assert_se(streq(last_path_component("//"), "/"));
+ assert_se(streq(last_path_component("///"), "/"));
+ assert_se(streq(last_path_component("."), "."));
+ assert_se(streq(last_path_component("./."), "."));
+ assert_se(streq(last_path_component("././"), "./"));
+ assert_se(streq(last_path_component("././/"), ".//"));
+ assert_se(streq(last_path_component("/foo/a"), "a"));
+ assert_se(streq(last_path_component("/foo/a/"), "a/"));
+ assert_se(streq(last_path_component(""), ""));
+ assert_se(streq(last_path_component("a"), "a"));
+ assert_se(streq(last_path_component("a/"), "a/"));
+ assert_se(streq(last_path_component("/a"), "a"));
+ assert_se(streq(last_path_component("/a/"), "a/"));
+}
+
+static void test_path_extract_filename_one(const char *input, const char *output, int ret) {
+ _cleanup_free_ char *k = NULL;
+ int r;
+
+ r = path_extract_filename(input, &k);
+ log_info("%s → %s/%s [expected: %s/%s]", strnull(input), strnull(k), strerror_safe(r), strnull(output), strerror_safe(ret));
+ assert_se(streq_ptr(k, output));
+ assert_se(r == ret);
+}
+
+static void test_path_extract_filename(void) {
+ log_info("/* %s */", __func__);
+
+ test_path_extract_filename_one(NULL, NULL, -EINVAL);
+ test_path_extract_filename_one("a/b/c", "c", 0);
+ test_path_extract_filename_one("a/b/c/", "c", 0);
+ test_path_extract_filename_one("/", NULL, -EINVAL);
+ test_path_extract_filename_one("//", NULL, -EINVAL);
+ test_path_extract_filename_one("///", NULL, -EINVAL);
+ test_path_extract_filename_one(".", NULL, -EINVAL);
+ test_path_extract_filename_one("./.", NULL, -EINVAL);
+ test_path_extract_filename_one("././", NULL, -EINVAL);
+ test_path_extract_filename_one("././/", NULL, -EINVAL);
+ test_path_extract_filename_one("/foo/a", "a", 0);
+ test_path_extract_filename_one("/foo/a/", "a", 0);
+ test_path_extract_filename_one("", NULL, -EINVAL);
+ test_path_extract_filename_one("a", "a", 0);
+ test_path_extract_filename_one("a/", "a", 0);
+ test_path_extract_filename_one("/a", "a", 0);
+ test_path_extract_filename_one("/a/", "a", 0);
+ test_path_extract_filename_one("/////////////a/////////////", "a", 0);
+ test_path_extract_filename_one("xx/.", NULL, -EINVAL);
+ test_path_extract_filename_one("xx/..", NULL, -EINVAL);
+ test_path_extract_filename_one("..", NULL, -EINVAL);
+ test_path_extract_filename_one("/..", NULL, -EINVAL);
+ test_path_extract_filename_one("../", NULL, -EINVAL);
+ test_path_extract_filename_one(".", NULL, -EINVAL);
+ test_path_extract_filename_one("/.", NULL, -EINVAL);
+ test_path_extract_filename_one("./", NULL, -EINVAL);
+}
+
+static void test_filename_is_valid(void) {
+ char foo[FILENAME_MAX+2];
+ int i;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(!filename_is_valid(""));
+ assert_se(!filename_is_valid("/bar/foo"));
+ assert_se(!filename_is_valid("/"));
+ assert_se(!filename_is_valid("."));
+ assert_se(!filename_is_valid(".."));
+ assert_se(!filename_is_valid("bar/foo"));
+ assert_se(!filename_is_valid("bar/foo/"));
+ assert_se(!filename_is_valid("bar//"));
+
+ for (i=0; i<FILENAME_MAX+1; i++)
+ foo[i] = 'a';
+ foo[FILENAME_MAX+1] = '\0';
+
+ assert_se(!filename_is_valid(foo));
+
+ assert_se(filename_is_valid("foo_bar-333"));
+ assert_se(filename_is_valid("o.o"));
+}
+
+static void test_hidden_or_backup_file(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(hidden_or_backup_file(".hidden"));
+ assert_se(hidden_or_backup_file("..hidden"));
+ assert_se(!hidden_or_backup_file("hidden."));
+
+ assert_se(hidden_or_backup_file("backup~"));
+ assert_se(hidden_or_backup_file(".backup~"));
+
+ assert_se(hidden_or_backup_file("lost+found"));
+ assert_se(hidden_or_backup_file("aquota.user"));
+ assert_se(hidden_or_backup_file("aquota.group"));
+
+ assert_se(hidden_or_backup_file("test.rpmnew"));
+ assert_se(hidden_or_backup_file("test.dpkg-old"));
+ assert_se(hidden_or_backup_file("test.dpkg-remove"));
+ assert_se(hidden_or_backup_file("test.swp"));
+
+ assert_se(!hidden_or_backup_file("test.rpmnew."));
+ assert_se(!hidden_or_backup_file("test.dpkg-old.foo"));
+}
+
+static void test_systemd_installation_has_version(const char *path) {
+ int r;
+ const unsigned versions[] = {0, 231, PROJECT_VERSION, 999};
+ unsigned i;
+
+ log_info("/* %s */", __func__);
+
+ for (i = 0; i < ELEMENTSOF(versions); i++) {
+ r = systemd_installation_has_version(path, versions[i]);
+ assert_se(r >= 0);
+ log_info("%s has systemd >= %u: %s",
+ path ?: "Current installation", versions[i], yes_no(r));
+ }
+}
+
+static void test_skip_dev_prefix(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(streq(skip_dev_prefix("/"), "/"));
+ assert_se(streq(skip_dev_prefix("/dev"), ""));
+ assert_se(streq(skip_dev_prefix("/dev/"), ""));
+ assert_se(streq(skip_dev_prefix("/dev/foo"), "foo"));
+ assert_se(streq(skip_dev_prefix("/dev/foo/bar"), "foo/bar"));
+ assert_se(streq(skip_dev_prefix("//dev"), ""));
+ assert_se(streq(skip_dev_prefix("//dev//"), ""));
+ assert_se(streq(skip_dev_prefix("/dev///foo"), "foo"));
+ assert_se(streq(skip_dev_prefix("///dev///foo///bar"), "foo///bar"));
+ assert_se(streq(skip_dev_prefix("//foo"), "//foo"));
+ assert_se(streq(skip_dev_prefix("foo"), "foo"));
+}
+
+static void test_empty_or_root(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(empty_or_root(NULL));
+ assert_se(empty_or_root(""));
+ assert_se(empty_or_root("/"));
+ assert_se(empty_or_root("//"));
+ assert_se(empty_or_root("///"));
+ assert_se(empty_or_root("/////////////////"));
+ assert_se(!empty_or_root("xxx"));
+ assert_se(!empty_or_root("/xxx"));
+ assert_se(!empty_or_root("/xxx/"));
+ assert_se(!empty_or_root("//yy//"));
+}
+
+static void test_path_startswith_set(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar", "/foo/quux", "/foo/bar", "/zzz"), ""));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar", "/foo/quux", "/foo/", "/zzz"), "bar"));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar", "/foo/quux", "/foo", "/zzz"), "bar"));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar", "/foo/quux", "/", "/zzz"), "foo/bar"));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar", "/foo/quux", "", "/zzz"), NULL));
+
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar2", "/foo/quux", "/foo/bar", "/zzz"), NULL));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar2", "/foo/quux", "/foo/", "/zzz"), "bar2"));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar2", "/foo/quux", "/foo", "/zzz"), "bar2"));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar2", "/foo/quux", "/", "/zzz"), "foo/bar2"));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo/bar2", "/foo/quux", "", "/zzz"), NULL));
+
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo2/bar", "/foo/quux", "/foo/bar", "/zzz"), NULL));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo2/bar", "/foo/quux", "/foo/", "/zzz"), NULL));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo2/bar", "/foo/quux", "/foo", "/zzz"), NULL));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo2/bar", "/foo/quux", "/", "/zzz"), "foo2/bar"));
+ assert_se(streq_ptr(PATH_STARTSWITH_SET("/foo2/bar", "/foo/quux", "", "/zzz"), NULL));
+}
+
+static void test_path_startswith_strv(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(streq_ptr(path_startswith_strv("/foo/bar", STRV_MAKE("/foo/quux", "/foo/bar", "/zzz")), ""));
+ assert_se(streq_ptr(path_startswith_strv("/foo/bar", STRV_MAKE("/foo/quux", "/foo/", "/zzz")), "bar"));
+ assert_se(streq_ptr(path_startswith_strv("/foo/bar", STRV_MAKE("/foo/quux", "/foo", "/zzz")), "bar"));
+ assert_se(streq_ptr(path_startswith_strv("/foo/bar", STRV_MAKE("/foo/quux", "/", "/zzz")), "foo/bar"));
+ assert_se(streq_ptr(path_startswith_strv("/foo/bar", STRV_MAKE("/foo/quux", "", "/zzz")), NULL));
+
+ assert_se(streq_ptr(path_startswith_strv("/foo/bar2", STRV_MAKE("/foo/quux", "/foo/bar", "/zzz")), NULL));
+ assert_se(streq_ptr(path_startswith_strv("/foo/bar2", STRV_MAKE("/foo/quux", "/foo/", "/zzz")), "bar2"));
+ assert_se(streq_ptr(path_startswith_strv("/foo/bar2", STRV_MAKE("/foo/quux", "/foo", "/zzz")), "bar2"));
+ assert_se(streq_ptr(path_startswith_strv("/foo/bar2", STRV_MAKE("/foo/quux", "/", "/zzz")), "foo/bar2"));
+ assert_se(streq_ptr(path_startswith_strv("/foo/bar2", STRV_MAKE("/foo/quux", "", "/zzz")), NULL));
+
+ assert_se(streq_ptr(path_startswith_strv("/foo2/bar", STRV_MAKE("/foo/quux", "/foo/bar", "/zzz")), NULL));
+ assert_se(streq_ptr(path_startswith_strv("/foo2/bar", STRV_MAKE("/foo/quux", "/foo/", "/zzz")), NULL));
+ assert_se(streq_ptr(path_startswith_strv("/foo2/bar", STRV_MAKE("/foo/quux", "/foo", "/zzz")), NULL));
+ assert_se(streq_ptr(path_startswith_strv("/foo2/bar", STRV_MAKE("/foo/quux", "/", "/zzz")), "foo2/bar"));
+ assert_se(streq_ptr(path_startswith_strv("/foo2/bar", STRV_MAKE("/foo/quux", "", "/zzz")), NULL));
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_print_paths();
+ test_path();
+ test_path_equal_root();
+ test_find_executable_full();
+ test_find_executable(argv[0]);
+ test_prefixes();
+ test_path_join();
+ test_fsck_exists();
+ test_make_relative();
+ test_strv_resolve();
+ test_path_startswith();
+ test_prefix_root();
+ test_file_in_same_dir();
+ test_last_path_component();
+ test_path_extract_filename();
+ test_filename_is_valid();
+ test_hidden_or_backup_file();
+ test_skip_dev_prefix();
+ test_empty_or_root();
+ test_path_startswith_set();
+ test_path_startswith_strv();
+
+ test_systemd_installation_has_version(argv[1]); /* NULL is OK */
+
+ return 0;
+}
diff --git a/src/test/test-path.c b/src/test/test-path.c
new file mode 100644
index 0000000..490fb13
--- /dev/null
+++ b/src/test/test-path.c
@@ -0,0 +1,412 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "alloc-util.h"
+#include "all-units.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "manager.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "unit.h"
+#include "util.h"
+
+typedef void (*test_function_t)(Manager *m);
+
+static int setup_test(Manager **m) {
+ char **tests_path = STRV_MAKE("exists", "existsglobFOOBAR", "changed", "modified", "unit",
+ "directorynotempty", "makedirectory");
+ char **test_path;
+ Manager *tmp = NULL;
+ int r;
+
+ assert_se(m);
+
+ r = enter_cgroup_subroot(NULL);
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &tmp);
+ if (manager_errno_skip_test(r))
+ return log_tests_skipped_errno(r, "manager_new");
+ assert_se(r >= 0);
+ assert_se(manager_startup(tmp, NULL, NULL) >= 0);
+
+ STRV_FOREACH(test_path, tests_path) {
+ _cleanup_free_ char *p = NULL;
+
+ p = strjoin("/tmp/test-path_", *test_path);
+ assert_se(p);
+
+ (void) rm_rf(p, REMOVE_ROOT|REMOVE_PHYSICAL);
+ }
+
+ *m = tmp;
+
+ return 0;
+}
+
+static void shutdown_test(Manager *m) {
+ assert_se(m);
+
+ manager_free(m);
+}
+
+static Service *service_for_path(Manager *m, Path *path, const char *service_name) {
+ _cleanup_free_ char *tmp = NULL;
+ Unit *service_unit = NULL;
+
+ assert_se(m);
+ assert_se(path);
+
+ if (!service_name) {
+ assert_se(tmp = strreplace(UNIT(path)->id, ".path", ".service"));
+ service_unit = manager_get_unit(m, tmp);
+ } else
+ service_unit = manager_get_unit(m, service_name);
+ assert_se(service_unit);
+
+ return SERVICE(service_unit);
+}
+
+static int _check_states(unsigned line,
+ Manager *m, Path *path, Service *service, PathState path_state, ServiceState service_state) {
+ assert_se(m);
+ assert_se(service);
+
+ usec_t end = now(CLOCK_MONOTONIC) + 30 * USEC_PER_SEC;
+
+ while (path->state != path_state || service->state != service_state ||
+ path->result != PATH_SUCCESS || service->result != SERVICE_SUCCESS) {
+
+ assert_se(sd_event_run(m->event, 100 * USEC_PER_MSEC) >= 0);
+
+ usec_t n = now(CLOCK_MONOTONIC);
+ log_info("line %u: %s: state = %s; result = %s (left: %" PRIi64 ")",
+ line,
+ UNIT(path)->id,
+ path_state_to_string(path->state),
+ path_result_to_string(path->result),
+ end - n);
+ log_info("line %u: %s: state = %s; result = %s",
+ line,
+ UNIT(service)->id,
+ service_state_to_string(service->state),
+ service_result_to_string(service->result));
+
+ if (service->state == SERVICE_FAILED &&
+ service->main_exec_status.status == EXIT_CGROUP &&
+ !ci_environment())
+ /* On a general purpose system we may fail to start the service for reasons which are
+ * not under our control: permission limits, resource exhaustion, etc. Let's skip the
+ * test in those cases. On developer machines we require proper setup. */
+ return log_notice_errno(SYNTHETIC_ERRNO(ECANCELED),
+ "Failed to start service %s, aborting test: %s/%s",
+ UNIT(service)->id,
+ service_state_to_string(service->state),
+ service_result_to_string(service->result));
+
+ if (n >= end) {
+ log_error("Test timeout when testing %s", UNIT(path)->id);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ return 0;
+}
+#define check_states(...) _check_states(__LINE__, __VA_ARGS__)
+
+static void test_path_exists(Manager *m) {
+ const char *test_path = "/tmp/test-path_exists";
+ Unit *unit = NULL;
+ Path *path = NULL;
+ Service *service = NULL;
+
+ assert_se(m);
+
+ assert_se(manager_load_startable_unit_or_warn(m, "path-exists.path", NULL, &unit) >= 0);
+
+ path = PATH(unit);
+ service = service_for_path(m, path, NULL);
+
+ assert_se(unit_start(unit) >= 0);
+ if (check_states(m, path, service, PATH_WAITING, SERVICE_DEAD) < 0)
+ return;
+
+ assert_se(touch(test_path) >= 0);
+ if (check_states(m, path, service, PATH_RUNNING, SERVICE_RUNNING) < 0)
+ return;
+
+ /* Service restarts if file still exists */
+ assert_se(unit_stop(UNIT(service)) >= 0);
+ if (check_states(m, path, service, PATH_RUNNING, SERVICE_RUNNING) < 0)
+ return;
+
+ assert_se(rm_rf(test_path, REMOVE_ROOT|REMOVE_PHYSICAL) == 0);
+ assert_se(unit_stop(UNIT(service)) >= 0);
+ if (check_states(m, path, service, PATH_WAITING, SERVICE_DEAD) < 0)
+ return;
+
+ assert_se(unit_stop(unit) >= 0);
+}
+
+static void test_path_existsglob(Manager *m) {
+ const char *test_path = "/tmp/test-path_existsglobFOOBAR";
+ Unit *unit = NULL;
+ Path *path = NULL;
+ Service *service = NULL;
+
+ assert_se(m);
+
+ assert_se(manager_load_startable_unit_or_warn(m, "path-existsglob.path", NULL, &unit) >= 0);
+
+ path = PATH(unit);
+ service = service_for_path(m, path, NULL);
+
+ assert_se(unit_start(unit) >= 0);
+ if (check_states(m, path, service, PATH_WAITING, SERVICE_DEAD) < 0)
+ return;
+
+ assert_se(touch(test_path) >= 0);
+ if (check_states(m, path, service, PATH_RUNNING, SERVICE_RUNNING) < 0)
+ return;
+
+ /* Service restarts if file still exists */
+ assert_se(unit_stop(UNIT(service)) >= 0);
+ if (check_states(m, path, service, PATH_RUNNING, SERVICE_RUNNING) < 0)
+ return;
+
+ assert_se(rm_rf(test_path, REMOVE_ROOT|REMOVE_PHYSICAL) == 0);
+ assert_se(unit_stop(UNIT(service)) >= 0);
+ if (check_states(m, path, service, PATH_WAITING, SERVICE_DEAD) < 0)
+ return;
+
+ assert_se(unit_stop(unit) >= 0);
+}
+
+static void test_path_changed(Manager *m) {
+ const char *test_path = "/tmp/test-path_changed";
+ FILE *f;
+ Unit *unit = NULL;
+ Path *path = NULL;
+ Service *service = NULL;
+
+ assert_se(m);
+
+ assert_se(manager_load_startable_unit_or_warn(m, "path-changed.path", NULL, &unit) >= 0);
+
+ path = PATH(unit);
+ service = service_for_path(m, path, NULL);
+
+ assert_se(unit_start(unit) >= 0);
+ if (check_states(m, path, service, PATH_WAITING, SERVICE_DEAD) < 0)
+ return;
+
+ assert_se(touch(test_path) >= 0);
+ if (check_states(m, path, service, PATH_RUNNING, SERVICE_RUNNING) < 0)
+ return;
+
+ /* Service does not restart if file still exists */
+ assert_se(unit_stop(UNIT(service)) >= 0);
+ if (check_states(m, path, service, PATH_WAITING, SERVICE_DEAD) < 0)
+ return;
+
+ f = fopen(test_path, "w");
+ assert_se(f);
+ fclose(f);
+
+ if (check_states(m, path, service, PATH_RUNNING, SERVICE_RUNNING) < 0)
+ return;
+
+ assert_se(unit_stop(UNIT(service)) >= 0);
+ if (check_states(m, path, service, PATH_WAITING, SERVICE_DEAD) < 0)
+ return;
+
+ (void) rm_rf(test_path, REMOVE_ROOT|REMOVE_PHYSICAL);
+ assert_se(unit_stop(unit) >= 0);
+}
+
+static void test_path_modified(Manager *m) {
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *test_path = "/tmp/test-path_modified";
+ Unit *unit = NULL;
+ Path *path = NULL;
+ Service *service = NULL;
+
+ assert_se(m);
+
+ assert_se(manager_load_startable_unit_or_warn(m, "path-modified.path", NULL, &unit) >= 0);
+
+ path = PATH(unit);
+ service = service_for_path(m, path, NULL);
+
+ assert_se(unit_start(unit) >= 0);
+ if (check_states(m, path, service, PATH_WAITING, SERVICE_DEAD) < 0)
+ return;
+
+ assert_se(touch(test_path) >= 0);
+ if (check_states(m, path, service, PATH_RUNNING, SERVICE_RUNNING) < 0)
+ return;
+
+ /* Service does not restart if file still exists */
+ assert_se(unit_stop(UNIT(service)) >= 0);
+ if (check_states(m, path, service, PATH_WAITING, SERVICE_DEAD) < 0)
+ return;
+
+ f = fopen(test_path, "w");
+ assert_se(f);
+ fputs("test", f);
+
+ if (check_states(m, path, service, PATH_RUNNING, SERVICE_RUNNING) < 0)
+ return;
+
+ assert_se(unit_stop(UNIT(service)) >= 0);
+ if (check_states(m, path, service, PATH_WAITING, SERVICE_DEAD) < 0)
+ return;
+
+ (void) rm_rf(test_path, REMOVE_ROOT|REMOVE_PHYSICAL);
+ assert_se(unit_stop(unit) >= 0);
+}
+
+static void test_path_unit(Manager *m) {
+ const char *test_path = "/tmp/test-path_unit";
+ Unit *unit = NULL;
+ Path *path = NULL;
+ Service *service = NULL;
+
+ assert_se(m);
+
+ assert_se(manager_load_startable_unit_or_warn(m, "path-unit.path", NULL, &unit) >= 0);
+
+ path = PATH(unit);
+ service = service_for_path(m, path, "path-mycustomunit.service");
+
+ assert_se(unit_start(unit) >= 0);
+ if (check_states(m, path, service, PATH_WAITING, SERVICE_DEAD) < 0)
+ return;
+
+ assert_se(touch(test_path) >= 0);
+ if (check_states(m, path, service, PATH_RUNNING, SERVICE_RUNNING) < 0)
+ return;
+
+ assert_se(rm_rf(test_path, REMOVE_ROOT|REMOVE_PHYSICAL) == 0);
+ assert_se(unit_stop(UNIT(service)) >= 0);
+ if (check_states(m, path, service, PATH_WAITING, SERVICE_DEAD) < 0)
+ return;
+
+ assert_se(unit_stop(unit) >= 0);
+}
+
+static void test_path_directorynotempty(Manager *m) {
+ const char *test_path = "/tmp/test-path_directorynotempty/";
+ Unit *unit = NULL;
+ Path *path = NULL;
+ Service *service = NULL;
+
+ assert_se(m);
+
+ assert_se(manager_load_startable_unit_or_warn(m, "path-directorynotempty.path", NULL, &unit) >= 0);
+
+ path = PATH(unit);
+ service = service_for_path(m, path, NULL);
+
+ assert_se(access(test_path, F_OK) < 0);
+
+ assert_se(unit_start(unit) >= 0);
+ if (check_states(m, path, service, PATH_WAITING, SERVICE_DEAD) < 0)
+ return;
+
+ /* MakeDirectory default to no */
+ assert_se(access(test_path, F_OK) < 0);
+
+ assert_se(mkdir_p(test_path, 0755) >= 0);
+ assert_se(touch(strjoina(test_path, "test_file")) >= 0);
+ if (check_states(m, path, service, PATH_RUNNING, SERVICE_RUNNING) < 0)
+ return;
+
+ /* Service restarts if directory is still not empty */
+ assert_se(unit_stop(UNIT(service)) >= 0);
+ if (check_states(m, path, service, PATH_RUNNING, SERVICE_RUNNING) < 0)
+ return;
+
+ assert_se(rm_rf(test_path, REMOVE_ROOT|REMOVE_PHYSICAL) == 0);
+ assert_se(unit_stop(UNIT(service)) >= 0);
+ if (check_states(m, path, service, PATH_WAITING, SERVICE_DEAD) < 0)
+ return;
+
+ assert_se(unit_stop(unit) >= 0);
+}
+
+static void test_path_makedirectory_directorymode(Manager *m) {
+ const char *test_path = "/tmp/test-path_makedirectory/";
+ Unit *unit = NULL;
+ struct stat s;
+
+ assert_se(m);
+
+ assert_se(manager_load_startable_unit_or_warn(m, "path-makedirectory.path", NULL, &unit) >= 0);
+
+ assert_se(access(test_path, F_OK) < 0);
+
+ assert_se(unit_start(unit) >= 0);
+
+ /* Check if the directory has been created */
+ assert_se(access(test_path, F_OK) >= 0);
+
+ /* Check the mode we specified with DirectoryMode=0744 */
+ assert_se(stat(test_path, &s) >= 0);
+ assert_se((s.st_mode & S_IRWXU) == 0700);
+ assert_se((s.st_mode & S_IRWXG) == 0040);
+ assert_se((s.st_mode & S_IRWXO) == 0004);
+
+ assert_se(unit_stop(unit) >= 0);
+ (void) rm_rf(test_path, REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+int main(int argc, char *argv[]) {
+ static const test_function_t tests[] = {
+ test_path_exists,
+ test_path_existsglob,
+ test_path_changed,
+ test_path_modified,
+ test_path_unit,
+ test_path_directorynotempty,
+ test_path_makedirectory_directorymode,
+ NULL,
+ };
+
+ _cleanup_free_ char *test_path = NULL;
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+
+ umask(022);
+
+ test_setup_logging(LOG_INFO);
+
+ assert_se(get_testdata_dir("test-path", &test_path) >= 0);
+ assert_se(set_unit_path(test_path) >= 0);
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+
+ for (const test_function_t *test = tests; *test; test++) {
+ Manager *m = NULL;
+ int r;
+
+ /* We create a clean environment for each test */
+ r = setup_test(&m);
+ if (r != 0)
+ return r;
+
+ (*test)(m);
+
+ shutdown_test(m);
+ }
+
+ return 0;
+}
diff --git a/src/test/test-pretty-print.c b/src/test/test-pretty-print.c
new file mode 100644
index 0000000..dbae34e
--- /dev/null
+++ b/src/test/test-pretty-print.c
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "pretty-print.h"
+#include "strv.h"
+#include "tests.h"
+
+static void test_terminal_urlify(void) {
+ _cleanup_free_ char *formatted = NULL;
+
+ assert_se(terminal_urlify("https://www.freedesktop.org/wiki/Software/systemd/", "systemd homepage", &formatted) >= 0);
+ printf("Hey, consider visiting the %s right now! It is very good!\n", formatted);
+
+ formatted = mfree(formatted);
+
+ assert_se(terminal_urlify_path("/etc/fstab", "this link to your /etc/fstab", &formatted) >= 0);
+ printf("Or click on %s to have a look at it!\n", formatted);
+}
+
+static void test_cat_files(void) {
+ assert_se(cat_files("/no/such/file", NULL, 0) == -ENOENT);
+ assert_se(cat_files("/no/such/file", NULL, CAT_FLAGS_MAIN_FILE_OPTIONAL) == 0);
+
+ if (access("/etc/fstab", R_OK) >= 0)
+ assert_se(cat_files("/etc/fstab", STRV_MAKE("/etc/fstab", "/etc/fstab"), 0) == 0);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_INFO);
+
+ test_terminal_urlify();
+ test_cat_files();
+
+ print_separator();
+
+ return 0;
+}
diff --git a/src/test/test-prioq.c b/src/test/test-prioq.c
new file mode 100644
index 0000000..4bfa181
--- /dev/null
+++ b/src/test/test-prioq.c
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "prioq.h"
+#include "set.h"
+#include "siphash24.h"
+#include "sort-util.h"
+
+#define SET_SIZE 1024*4
+
+static int unsigned_compare(const unsigned *a, const unsigned *b) {
+ return CMP(*a, *b);
+}
+
+static void test_unsigned(void) {
+ _cleanup_(prioq_freep) Prioq *q = NULL;
+ unsigned buffer[SET_SIZE], i, u, n;
+
+ srand(0);
+
+ assert_se(q = prioq_new(trivial_compare_func));
+
+ for (i = 0; i < ELEMENTSOF(buffer); i++) {
+ u = (unsigned) rand();
+ buffer[i] = u;
+ assert_se(prioq_put(q, UINT_TO_PTR(u), NULL) >= 0);
+
+ n = prioq_size(q);
+ assert_se(prioq_remove(q, UINT_TO_PTR(u), &n) == 0);
+ }
+
+ typesafe_qsort(buffer, ELEMENTSOF(buffer), unsigned_compare);
+
+ for (i = 0; i < ELEMENTSOF(buffer); i++) {
+ assert_se(prioq_size(q) == ELEMENTSOF(buffer) - i);
+
+ u = PTR_TO_UINT(prioq_pop(q));
+ assert_se(buffer[i] == u);
+ }
+
+ assert_se(prioq_isempty(q));
+}
+
+struct test {
+ unsigned value;
+ unsigned idx;
+};
+
+static int test_compare(const struct test *x, const struct test *y) {
+ return CMP(x->value, y->value);
+}
+
+static void test_hash(const struct test *x, struct siphash *state) {
+ siphash24_compress(&x->value, sizeof(x->value), state);
+}
+
+DEFINE_PRIVATE_HASH_OPS(test_hash_ops, struct test, test_hash, test_compare);
+
+static void test_struct(void) {
+ _cleanup_(prioq_freep) Prioq *q = NULL;
+ _cleanup_set_free_ Set *s = NULL;
+ unsigned previous = 0, i;
+ struct test *t;
+
+ srand(0);
+
+ assert_se(q = prioq_new((compare_func_t) test_compare));
+ assert_se(s = set_new(&test_hash_ops));
+
+ assert_se(prioq_peek(q) == NULL);
+ assert_se(prioq_peek_by_index(q, 0) == NULL);
+ assert_se(prioq_peek_by_index(q, 1) == NULL);
+ assert_se(prioq_peek_by_index(q, (unsigned) -1) == NULL);
+
+ for (i = 0; i < SET_SIZE; i++) {
+ assert_se(t = new0(struct test, 1));
+ t->value = (unsigned) rand();
+
+ assert_se(prioq_put(q, t, &t->idx) >= 0);
+
+ if (i % 4 == 0)
+ assert_se(set_consume(s, t) >= 0);
+ }
+
+ for (i = 0; i < SET_SIZE; i++)
+ assert_se(prioq_peek_by_index(q, i));
+ assert_se(prioq_peek_by_index(q, SET_SIZE) == NULL);
+
+ unsigned count = 0;
+ PRIOQ_FOREACH_ITEM(q, t) {
+ assert_se(t);
+ count++;
+ }
+ assert_se(count == SET_SIZE);
+
+ while ((t = set_steal_first(s))) {
+ assert_se(prioq_remove(q, t, &t->idx) == 1);
+ assert_se(prioq_remove(q, t, &t->idx) == 0);
+ assert_se(prioq_remove(q, t, NULL) == 0);
+
+ free(t);
+ }
+
+ for (i = 0; i < SET_SIZE * 3 / 4; i++) {
+ assert_se(prioq_size(q) == (SET_SIZE * 3 / 4) - i);
+
+ assert_se(t = prioq_pop(q));
+ assert_se(prioq_remove(q, t, &t->idx) == 0);
+ assert_se(prioq_remove(q, t, NULL) == 0);
+ assert_se(previous <= t->value);
+
+ previous = t->value;
+ free(t);
+ }
+
+ assert_se(prioq_isempty(q));
+ assert_se(set_isempty(s));
+}
+
+int main(int argc, char* argv[]) {
+
+ test_unsigned();
+ test_struct();
+
+ return 0;
+}
diff --git a/src/test/test-proc-cmdline.c b/src/test/test-proc-cmdline.c
new file mode 100644
index 0000000..1f5ee7d
--- /dev/null
+++ b/src/test/test-proc-cmdline.c
@@ -0,0 +1,269 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "env-util.h"
+#include "errno-util.h"
+#include "log.h"
+#include "macro.h"
+#include "proc-cmdline.h"
+#include "special.h"
+#include "string-util.h"
+#include "tests.h"
+#include "util.h"
+
+static int obj;
+
+static int parse_item(const char *key, const char *value, void *data) {
+ assert_se(key);
+ assert_se(data == &obj);
+
+ log_info("kernel cmdline option <%s> = <%s>", key, strna(value));
+ return 0;
+}
+
+static void test_proc_cmdline_parse(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(proc_cmdline_parse(parse_item, &obj, PROC_CMDLINE_STRIP_RD_PREFIX) >= 0);
+}
+
+static void test_proc_cmdline_override(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(putenv((char*) "SYSTEMD_PROC_CMDLINE=foo_bar=quux wuff-piep=tuet zumm some_arg_with_space='foo bar' and_one_more=\"zzz aaa\"") == 0);
+ assert_se(putenv((char*) "SYSTEMD_EFI_OPTIONS=different") == 0);
+
+ /* First test if the overrides for /proc/cmdline still work */
+ _cleanup_free_ char *line = NULL, *value = NULL;
+ assert_se(proc_cmdline(&line) >= 0);
+
+ /* Test if parsing makes uses of the override */
+ assert_se(streq(line, "foo_bar=quux wuff-piep=tuet zumm some_arg_with_space='foo bar' and_one_more=\"zzz aaa\""));
+ assert_se(proc_cmdline_get_key("foo_bar", 0, &value) > 0 && streq_ptr(value, "quux"));
+ value = mfree(value);
+
+ assert_se(proc_cmdline_get_key("some_arg_with_space", 0, &value) > 0 && streq_ptr(value, "foo bar"));
+ value = mfree(value);
+
+ assert_se(proc_cmdline_get_key("and_one_more", 0, &value) > 0 && streq_ptr(value, "zzz aaa"));
+ value = mfree(value);
+
+ assert_se(putenv((char*) "SYSTEMD_PROC_CMDLINE=") == 0);
+ assert_se(putenv((char*) "SYSTEMD_PROC_CMDLINE=foo_bar=quux wuff-piep=tuet zumm some_arg_with_space='foo bar' and_one_more=\"zzz aaa\"") == 0);
+
+ assert_se(streq(line, "foo_bar=quux wuff-piep=tuet zumm some_arg_with_space='foo bar' and_one_more=\"zzz aaa\""));
+ assert_se(proc_cmdline_get_key("foo_bar", 0, &value) > 0 && streq_ptr(value, "quux"));
+ value = mfree(value);
+
+ assert_se(proc_cmdline_get_key("some_arg_with_space", 0, &value) > 0 && streq_ptr(value, "foo bar"));
+ value = mfree(value);
+
+ assert_se(proc_cmdline_get_key("and_one_more", 0, &value) > 0 && streq_ptr(value, "zzz aaa"));
+ value = mfree(value);
+}
+
+static int parse_item_given(const char *key, const char *value, void *data) {
+ assert_se(key);
+ assert_se(data);
+
+ bool *strip = data;
+
+ log_info("%s: option <%s> = <%s>", __func__, key, strna(value));
+ if (proc_cmdline_key_streq(key, "foo_bar"))
+ assert_se(streq(value, "quux"));
+ else if (proc_cmdline_key_streq(key, "wuff-piep"))
+ assert_se(streq(value, "tuet "));
+ else if (proc_cmdline_key_streq(key, "space"))
+ assert_se(streq(value, "x y z"));
+ else if (proc_cmdline_key_streq(key, "miepf"))
+ assert_se(streq(value, "uuu"));
+ else if (in_initrd() && *strip && proc_cmdline_key_streq(key, "zumm"))
+ assert_se(!value);
+ else if (in_initrd() && !*strip && proc_cmdline_key_streq(key, "rd.zumm"))
+ assert_se(!value);
+ else
+ assert_not_reached("Bad key!");
+
+ return 0;
+}
+
+static void test_proc_cmdline_given(bool flip_initrd) {
+ log_info("/* %s (flip: %s) */", __func__, yes_no(flip_initrd));
+
+ if (flip_initrd)
+ in_initrd_force(!in_initrd());
+
+ bool t = true, f = false;
+ assert_se(proc_cmdline_parse_given("foo_bar=quux wuff-piep=\"tuet \" rd.zumm space='x y z' miepf=\"uuu\"",
+ parse_item_given, &t, PROC_CMDLINE_STRIP_RD_PREFIX) >= 0);
+
+ assert_se(proc_cmdline_parse_given("foo_bar=quux wuff-piep=\"tuet \" rd.zumm space='x y z' miepf=\"uuu\"",
+ parse_item_given, &f, 0) >= 0);
+
+ if (flip_initrd)
+ in_initrd_force(!in_initrd());
+}
+
+static void test_proc_cmdline_get_key(void) {
+ _cleanup_free_ char *value = NULL;
+
+ log_info("/* %s */", __func__);
+ assert_se(putenv((char*) "SYSTEMD_PROC_CMDLINE=foo_bar=quux wuff-piep=tuet zumm spaaace='ö ü ß' ticks=\"''\"\n\nkkk=uuu\n\n\n") == 0);
+
+ assert_se(proc_cmdline_get_key("", 0, &value) == -EINVAL);
+ assert_se(proc_cmdline_get_key("abc", 0, NULL) == 0);
+ assert_se(proc_cmdline_get_key("abc", 0, &value) == 0 && value == NULL);
+ assert_se(proc_cmdline_get_key("abc", PROC_CMDLINE_VALUE_OPTIONAL, &value) == 0 && value == NULL);
+
+ assert_se(proc_cmdline_get_key("foo_bar", 0, &value) > 0 && streq_ptr(value, "quux"));
+ value = mfree(value);
+ assert_se(proc_cmdline_get_key("foo_bar", PROC_CMDLINE_VALUE_OPTIONAL, &value) > 0 && streq_ptr(value, "quux"));
+ value = mfree(value);
+ assert_se(proc_cmdline_get_key("foo-bar", 0, &value) > 0 && streq_ptr(value, "quux"));
+ value = mfree(value);
+ assert_se(proc_cmdline_get_key("foo-bar", PROC_CMDLINE_VALUE_OPTIONAL, &value) > 0 && streq_ptr(value, "quux"));
+ value = mfree(value);
+ assert_se(proc_cmdline_get_key("foo-bar", 0, NULL) == 0);
+ assert_se(proc_cmdline_get_key("foo-bar", PROC_CMDLINE_VALUE_OPTIONAL, NULL) == -EINVAL);
+
+ assert_se(proc_cmdline_get_key("wuff-piep", 0, &value) > 0 && streq_ptr(value, "tuet"));
+ value = mfree(value);
+ assert_se(proc_cmdline_get_key("wuff-piep", PROC_CMDLINE_VALUE_OPTIONAL, &value) > 0 && streq_ptr(value, "tuet"));
+ value = mfree(value);
+ assert_se(proc_cmdline_get_key("wuff_piep", 0, &value) > 0 && streq_ptr(value, "tuet"));
+ value = mfree(value);
+ assert_se(proc_cmdline_get_key("wuff_piep", PROC_CMDLINE_VALUE_OPTIONAL, &value) > 0 && streq_ptr(value, "tuet"));
+ value = mfree(value);
+ assert_se(proc_cmdline_get_key("wuff_piep", 0, NULL) == 0);
+ assert_se(proc_cmdline_get_key("wuff_piep", PROC_CMDLINE_VALUE_OPTIONAL, NULL) == -EINVAL);
+
+ assert_se(proc_cmdline_get_key("zumm", 0, &value) == 0 && value == NULL);
+ assert_se(proc_cmdline_get_key("zumm", PROC_CMDLINE_VALUE_OPTIONAL, &value) > 0 && value == NULL);
+ assert_se(proc_cmdline_get_key("zumm", 0, NULL) > 0);
+
+ assert_se(proc_cmdline_get_key("spaaace", 0, &value) > 0 && streq_ptr(value, "ö ü ß"));
+ value = mfree(value);
+
+ assert_se(proc_cmdline_get_key("ticks", 0, &value) > 0 && streq_ptr(value, "''"));
+ value = mfree(value);
+
+ assert_se(proc_cmdline_get_key("kkk", 0, &value) > 0 && streq_ptr(value, "uuu"));
+}
+
+static void test_proc_cmdline_get_bool(void) {
+ bool value = false;
+
+ log_info("/* %s */", __func__);
+ assert_se(putenv((char*) "SYSTEMD_PROC_CMDLINE=foo_bar bar-waldo=1 x_y-z=0 quux=miep\nda=yes\nthe=1") == 0);
+ assert_se(putenv((char*) "SYSTEMD_EFI_OPTIONS=") == 0);
+
+ assert_se(proc_cmdline_get_bool("", &value) == -EINVAL);
+ assert_se(proc_cmdline_get_bool("abc", &value) == 0 && value == false);
+ assert_se(proc_cmdline_get_bool("foo_bar", &value) > 0 && value == true);
+ assert_se(proc_cmdline_get_bool("foo-bar", &value) > 0 && value == true);
+ assert_se(proc_cmdline_get_bool("bar-waldo", &value) > 0 && value == true);
+ assert_se(proc_cmdline_get_bool("bar_waldo", &value) > 0 && value == true);
+ assert_se(proc_cmdline_get_bool("x_y-z", &value) > 0 && value == false);
+ assert_se(proc_cmdline_get_bool("x-y-z", &value) > 0 && value == false);
+ assert_se(proc_cmdline_get_bool("x-y_z", &value) > 0 && value == false);
+ assert_se(proc_cmdline_get_bool("x_y_z", &value) > 0 && value == false);
+ assert_se(proc_cmdline_get_bool("quux", &value) == -EINVAL && value == false);
+ assert_se(proc_cmdline_get_bool("da", &value) > 0 && value == true);
+ assert_se(proc_cmdline_get_bool("the", &value) > 0 && value == true);
+
+ assert_se(putenv((char*) "SYSTEMD_PROC_CMDLINE=") == 0);
+ assert_se(putenv((char*) "SYSTEMD_EFI_OPTIONS=foo_bar bar-waldo=1 x_y-z=0 quux=miep\nda=yes\nthe=1") == 0);
+
+#if ENABLE_EFI
+ assert_se(proc_cmdline_get_bool("", &value) == -EINVAL);
+ assert_se(proc_cmdline_get_bool("abc", &value) == 0 && value == false);
+ assert_se(proc_cmdline_get_bool("foo_bar", &value) > 0 && value == true);
+ assert_se(proc_cmdline_get_bool("foo-bar", &value) > 0 && value == true);
+ assert_se(proc_cmdline_get_bool("bar-waldo", &value) > 0 && value == true);
+ assert_se(proc_cmdline_get_bool("bar_waldo", &value) > 0 && value == true);
+ assert_se(proc_cmdline_get_bool("x_y-z", &value) > 0 && value == false);
+ assert_se(proc_cmdline_get_bool("x-y-z", &value) > 0 && value == false);
+ assert_se(proc_cmdline_get_bool("x-y_z", &value) > 0 && value == false);
+ assert_se(proc_cmdline_get_bool("x_y_z", &value) > 0 && value == false);
+ assert_se(proc_cmdline_get_bool("quux", &value) == -EINVAL && value == false);
+ assert_se(proc_cmdline_get_bool("da", &value) > 0 && value == true);
+ assert_se(proc_cmdline_get_bool("the", &value) > 0 && value == true);
+#endif
+}
+
+static void test_proc_cmdline_get_key_many(void) {
+ _cleanup_free_ char *value1 = NULL, *value2 = NULL, *value3 = NULL, *value4 = NULL, *value5 = NULL, *value6 = NULL, *value7 = NULL;
+
+ log_info("/* %s */", __func__);
+ assert_se(putenv((char*) "SYSTEMD_PROC_CMDLINE=foo_bar=quux wuff-piep=tuet zumm SPACE='one two' doubleticks=\" aaa aaa \"\n\nzummm='\n'\n") == 0);
+
+ assert_se(proc_cmdline_get_key_many(0,
+ "wuff-piep", &value3,
+ "foo_bar", &value1,
+ "idontexist", &value2,
+ "zumm", &value4,
+ "SPACE", &value5,
+ "doubleticks", &value6,
+ "zummm", &value7) == 5);
+
+ assert_se(streq_ptr(value1, "quux"));
+ assert_se(!value2);
+ assert_se(streq_ptr(value3, "tuet"));
+ assert_se(!value4);
+ assert_se(streq_ptr(value5, "one two"));
+ assert_se(streq_ptr(value6, " aaa aaa "));
+ assert_se(streq_ptr(value7, "\n"));
+}
+
+static void test_proc_cmdline_key_streq(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(proc_cmdline_key_streq("", ""));
+ assert_se(proc_cmdline_key_streq("a", "a"));
+ assert_se(!proc_cmdline_key_streq("", "a"));
+ assert_se(!proc_cmdline_key_streq("a", ""));
+ assert_se(proc_cmdline_key_streq("a", "a"));
+ assert_se(!proc_cmdline_key_streq("a", "b"));
+ assert_se(proc_cmdline_key_streq("x-y-z", "x-y-z"));
+ assert_se(proc_cmdline_key_streq("x-y-z", "x_y_z"));
+ assert_se(proc_cmdline_key_streq("x-y-z", "x-y_z"));
+ assert_se(proc_cmdline_key_streq("x-y-z", "x_y-z"));
+ assert_se(proc_cmdline_key_streq("x_y-z", "x-y_z"));
+ assert_se(!proc_cmdline_key_streq("x_y-z", "x-z_z"));
+}
+
+static void test_proc_cmdline_key_startswith(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(proc_cmdline_key_startswith("", ""));
+ assert_se(proc_cmdline_key_startswith("x", ""));
+ assert_se(!proc_cmdline_key_startswith("", "x"));
+ assert_se(proc_cmdline_key_startswith("x", "x"));
+ assert_se(!proc_cmdline_key_startswith("x", "y"));
+ assert_se(!proc_cmdline_key_startswith("foo-bar", "quux"));
+ assert_se(proc_cmdline_key_startswith("foo-bar", "foo"));
+ assert_se(proc_cmdline_key_startswith("foo-bar", "foo-bar"));
+ assert_se(proc_cmdline_key_startswith("foo-bar", "foo_bar"));
+ assert_se(proc_cmdline_key_startswith("foo-bar", "foo_"));
+ assert_se(!proc_cmdline_key_startswith("foo-bar", "foo_xx"));
+}
+
+int main(void) {
+ test_setup_logging(LOG_INFO);
+
+ if (access("/proc/cmdline", R_OK) < 0 && ERRNO_IS_PRIVILEGE(errno))
+ return log_tests_skipped("can't read /proc/cmdline");
+
+ test_proc_cmdline_parse();
+ test_proc_cmdline_override();
+ test_proc_cmdline_given(false);
+ /* Repeat the same thing, but now flip our ininitrdness */
+ test_proc_cmdline_given(true);
+ test_proc_cmdline_key_streq();
+ test_proc_cmdline_key_startswith();
+ test_proc_cmdline_get_key();
+ test_proc_cmdline_get_bool();
+ test_proc_cmdline_get_key_many();
+
+ return 0;
+}
diff --git a/src/test/test-process-util.c b/src/test/test-process-util.c
new file mode 100644
index 0000000..a87cdf8
--- /dev/null
+++ b/src/test/test-process-util.c
@@ -0,0 +1,720 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/mount.h>
+#include <sys/personality.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#if HAVE_VALGRIND_VALGRIND_H
+#include <valgrind/valgrind.h>
+#endif
+
+#include "alloc-util.h"
+#include "architecture.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing_sched.h"
+#include "missing_syscall.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "tests.h"
+#include "user-util.h"
+#include "util.h"
+#include "virt.h"
+
+static void test_get_process_comm(pid_t pid) {
+ struct stat st;
+ _cleanup_free_ char *a = NULL, *c = NULL, *d = NULL, *f = NULL, *i = NULL;
+ _cleanup_free_ char *env = NULL;
+ char path[STRLEN("/proc//comm") + DECIMAL_STR_MAX(pid_t)];
+ pid_t e;
+ uid_t u;
+ gid_t g;
+ dev_t h;
+ int r;
+
+ xsprintf(path, "/proc/"PID_FMT"/comm", pid);
+
+ if (stat(path, &st) == 0) {
+ assert_se(get_process_comm(pid, &a) >= 0);
+ log_info("PID"PID_FMT" comm: '%s'", pid, a);
+ } else
+ log_warning("%s not exist.", path);
+
+ assert_se(get_process_cmdline(pid, 0, PROCESS_CMDLINE_COMM_FALLBACK, &c) >= 0);
+ log_info("PID"PID_FMT" cmdline: '%s'", pid, c);
+
+ assert_se(get_process_cmdline(pid, 8, 0, &d) >= 0);
+ log_info("PID"PID_FMT" cmdline truncated to 8: '%s'", pid, d);
+
+ free(d);
+ assert_se(get_process_cmdline(pid, 1, 0, &d) >= 0);
+ log_info("PID"PID_FMT" cmdline truncated to 1: '%s'", pid, d);
+
+ assert_se(get_process_ppid(pid, &e) >= 0);
+ log_info("PID"PID_FMT" PPID: "PID_FMT, pid, e);
+ assert_se(pid == 1 ? e == 0 : e > 0);
+
+ assert_se(is_kernel_thread(pid) == 0 || pid != 1);
+
+ r = get_process_exe(pid, &f);
+ assert_se(r >= 0 || r == -EACCES);
+ log_info("PID"PID_FMT" exe: '%s'", pid, strna(f));
+
+ assert_se(get_process_uid(pid, &u) == 0);
+ log_info("PID"PID_FMT" UID: "UID_FMT, pid, u);
+
+ assert_se(get_process_gid(pid, &g) == 0);
+ log_info("PID"PID_FMT" GID: "GID_FMT, pid, g);
+
+ r = get_process_environ(pid, &env);
+ assert_se(r >= 0 || r == -EACCES);
+ log_info("PID"PID_FMT" strlen(environ): %zi", pid, env ? (ssize_t)strlen(env) : (ssize_t)-errno);
+
+ if (!detect_container())
+ assert_se(get_ctty_devnr(pid, &h) == -ENXIO || pid != 1);
+
+ (void) getenv_for_pid(pid, "PATH", &i);
+ log_info("PID"PID_FMT" $PATH: '%s'", pid, strna(i));
+}
+
+static void test_get_process_comm_escape_one(const char *input, const char *output) {
+ _cleanup_free_ char *n = NULL;
+
+ log_info("input: <%s> — output: <%s>", input, output);
+
+ assert_se(prctl(PR_SET_NAME, input) >= 0);
+ assert_se(get_process_comm(0, &n) >= 0);
+
+ log_info("got: <%s>", n);
+
+ assert_se(streq_ptr(n, output));
+}
+
+static void test_get_process_comm_escape(void) {
+ _cleanup_free_ char *saved = NULL;
+
+ assert_se(get_process_comm(0, &saved) >= 0);
+
+ test_get_process_comm_escape_one("", "");
+ test_get_process_comm_escape_one("foo", "foo");
+ test_get_process_comm_escape_one("012345678901234", "012345678901234");
+ test_get_process_comm_escape_one("0123456789012345", "012345678901234");
+ test_get_process_comm_escape_one("äöüß", "\\303\\244\\303\\266\\303\\274\\303\\237");
+ test_get_process_comm_escape_one("xäöüß", "x\\303\\244\\303\\266\\303\\274\\303\\237");
+ test_get_process_comm_escape_one("xxäöüß", "xx\\303\\244\\303\\266\\303\\274\\303\\237");
+ test_get_process_comm_escape_one("xxxäöüß", "xxx\\303\\244\\303\\266\\303\\274\\303\\237");
+ test_get_process_comm_escape_one("xxxxäöüß", "xxxx\\303\\244\\303\\266\\303\\274\\303\\237");
+ test_get_process_comm_escape_one("xxxxxäöüß", "xxxxx\\303\\244\\303\\266\\303\\274\\303\\237");
+
+ assert_se(prctl(PR_SET_NAME, saved) >= 0);
+}
+
+static void test_pid_is_unwaited(void) {
+ pid_t pid;
+
+ pid = fork();
+ assert_se(pid >= 0);
+ if (pid == 0) {
+ _exit(EXIT_SUCCESS);
+ } else {
+ int status;
+
+ waitpid(pid, &status, 0);
+ assert_se(!pid_is_unwaited(pid));
+ }
+ assert_se(pid_is_unwaited(getpid_cached()));
+ assert_se(!pid_is_unwaited(-1));
+}
+
+static void test_pid_is_alive(void) {
+ pid_t pid;
+
+ pid = fork();
+ assert_se(pid >= 0);
+ if (pid == 0) {
+ _exit(EXIT_SUCCESS);
+ } else {
+ int status;
+
+ waitpid(pid, &status, 0);
+ assert_se(!pid_is_alive(pid));
+ }
+ assert_se(pid_is_alive(getpid_cached()));
+ assert_se(!pid_is_alive(-1));
+}
+
+static void test_personality(void) {
+
+ assert_se(personality_to_string(PER_LINUX));
+ assert_se(!personality_to_string(PERSONALITY_INVALID));
+
+ assert_se(streq(personality_to_string(PER_LINUX), architecture_to_string(native_architecture())));
+
+ assert_se(personality_from_string(personality_to_string(PER_LINUX)) == PER_LINUX);
+ assert_se(personality_from_string(architecture_to_string(native_architecture())) == PER_LINUX);
+
+#ifdef __x86_64__
+ assert_se(streq_ptr(personality_to_string(PER_LINUX), "x86-64"));
+ assert_se(streq_ptr(personality_to_string(PER_LINUX32), "x86"));
+
+ assert_se(personality_from_string("x86-64") == PER_LINUX);
+ assert_se(personality_from_string("x86") == PER_LINUX32);
+ assert_se(personality_from_string("ia64") == PERSONALITY_INVALID);
+ assert_se(personality_from_string(NULL) == PERSONALITY_INVALID);
+
+ assert_se(personality_from_string(personality_to_string(PER_LINUX32)) == PER_LINUX32);
+#endif
+}
+
+static void test_get_process_cmdline_harder(void) {
+ char path[] = "/tmp/test-cmdlineXXXXXX";
+ _cleanup_close_ int fd = -1;
+ _cleanup_free_ char *line = NULL;
+ pid_t pid;
+
+ if (geteuid() != 0) {
+ log_info("Skipping %s: not root", __func__);
+ return;
+ }
+
+ if (!have_namespaces()) {
+ log_notice("Testing without namespaces, skipping %s", __func__);
+ return;
+ }
+
+#if HAVE_VALGRIND_VALGRIND_H
+ /* valgrind patches open(/proc//cmdline)
+ * so, test_get_process_cmdline_harder fails always
+ * See https://github.com/systemd/systemd/pull/3555#issuecomment-226564908 */
+ if (RUNNING_ON_VALGRIND) {
+ log_info("Skipping %s: running on valgrind", __func__);
+ return;
+ }
+#endif
+
+ pid = fork();
+ if (pid > 0) {
+ siginfo_t si;
+
+ (void) wait_for_terminate(pid, &si);
+
+ assert_se(si.si_code == CLD_EXITED);
+ assert_se(si.si_status == 0);
+
+ return;
+ }
+
+ assert_se(pid == 0);
+ assert_se(unshare(CLONE_NEWNS) >= 0);
+
+ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
+ log_warning_errno(errno, "mount(..., \"/\", MS_SLAVE|MS_REC, ...) failed: %m");
+ assert_se(IN_SET(errno, EPERM, EACCES));
+ return;
+ }
+
+ fd = mkostemp(path, O_CLOEXEC);
+ assert_se(fd >= 0);
+
+ /* Note that we don't unmount the following bind-mount at the end of the test because the kernel
+ * will clear up its /proc/PID/ hierarchy automatically as soon as the test stops. */
+ if (mount(path, "/proc/self/cmdline", "bind", MS_BIND, NULL) < 0) {
+ /* This happens under selinux… Abort the test in this case. */
+ log_warning_errno(errno, "mount(..., \"/proc/self/cmdline\", \"bind\", ...) failed: %m");
+ assert_se(IN_SET(errno, EPERM, EACCES));
+ return;
+ }
+
+ assert_se(unlink(path) >= 0);
+
+ assert_se(prctl(PR_SET_NAME, "testa") >= 0);
+
+ assert_se(get_process_cmdline(getpid_cached(), SIZE_MAX, 0, &line) == -ENOENT);
+
+ assert_se(get_process_cmdline(getpid_cached(), SIZE_MAX, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "[testa]"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 0, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ log_info("'%s'", line);
+ assert_se(streq(line, ""));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 1, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "…"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 2, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "[…"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 3, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "[t…"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 4, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "[te…"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 5, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "[tes…"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 6, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "[test…"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 7, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "[testa]"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 8, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "[testa]"));
+ line = mfree(line);
+
+ assert_se(write(fd, "foo\0bar", 8) == 8);
+
+ assert_se(get_process_cmdline(getpid_cached(), SIZE_MAX, 0, &line) >= 0);
+ log_info("'%s'", line);
+ assert_se(streq(line, "foo bar"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), SIZE_MAX, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "foo bar"));
+ line = mfree(line);
+
+ assert_se(write(fd, "quux", 4) == 4);
+ assert_se(get_process_cmdline(getpid_cached(), SIZE_MAX, 0, &line) >= 0);
+ log_info("'%s'", line);
+ assert_se(streq(line, "foo bar quux"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), SIZE_MAX, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "foo bar quux"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 1, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "…"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 2, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "f…"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 3, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "fo…"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 4, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "foo…"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 5, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "foo …"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 6, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "foo b…"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 7, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "foo ba…"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 8, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "foo bar…"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 9, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "foo bar …"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 10, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "foo bar q…"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 11, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "foo bar qu…"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 12, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "foo bar quux"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 13, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "foo bar quux"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 14, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "foo bar quux"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 1000, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "foo bar quux"));
+ line = mfree(line);
+
+ assert_se(ftruncate(fd, 0) >= 0);
+ assert_se(prctl(PR_SET_NAME, "aaaa bbbb cccc") >= 0);
+
+ assert_se(get_process_cmdline(getpid_cached(), SIZE_MAX, 0, &line) == -ENOENT);
+
+ assert_se(get_process_cmdline(getpid_cached(), SIZE_MAX, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "[aaaa bbbb cccc]"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 10, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "[aaaa bbb…"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 11, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "[aaaa bbbb…"));
+ line = mfree(line);
+
+ assert_se(get_process_cmdline(getpid_cached(), 12, PROCESS_CMDLINE_COMM_FALLBACK, &line) >= 0);
+ assert_se(streq(line, "[aaaa bbbb …"));
+ line = mfree(line);
+
+ safe_close(fd);
+ _exit(EXIT_SUCCESS);
+}
+
+static void test_rename_process_now(const char *p, int ret) {
+ _cleanup_free_ char *comm = NULL, *cmdline = NULL;
+ int r;
+
+ r = rename_process(p);
+ assert_se(r == ret ||
+ (ret == 0 && r >= 0) ||
+ (ret > 0 && r > 0));
+
+ if (r < 0)
+ return;
+
+#if HAVE_VALGRIND_VALGRIND_H
+ /* see above, valgrind is weird, we can't verify what we are doing here */
+ if (RUNNING_ON_VALGRIND)
+ return;
+#endif
+
+ assert_se(get_process_comm(0, &comm) >= 0);
+ log_info("comm = <%s>", comm);
+ assert_se(strneq(comm, p, TASK_COMM_LEN-1));
+ /* We expect comm to be at most 16 bytes (TASK_COMM_LEN). The kernel may raise this limit in the
+ * future. We'd only check the initial part, at least until we recompile, but this will still pass. */
+
+ r = get_process_cmdline(0, SIZE_MAX, 0, &cmdline);
+ assert_se(r >= 0);
+ /* we cannot expect cmdline to be renamed properly without privileges */
+ if (geteuid() == 0) {
+ if (r == 0 && detect_container() > 0)
+ log_info("cmdline = <%s> (not verified, Running in unprivileged container?)", cmdline);
+ else {
+ log_info("cmdline = <%s>", cmdline);
+ assert_se(strneq(p, cmdline, STRLEN("test-process-util")));
+ assert_se(startswith(p, cmdline));
+ }
+ } else
+ log_info("cmdline = <%s> (not verified)", cmdline);
+}
+
+static void test_rename_process_one(const char *p, int ret) {
+ siginfo_t si;
+ pid_t pid;
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ /* child */
+ test_rename_process_now(p, ret);
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate(pid, &si) >= 0);
+ assert_se(si.si_code == CLD_EXITED);
+ assert_se(si.si_status == EXIT_SUCCESS);
+}
+
+static void test_rename_process_multi(void) {
+ pid_t pid;
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid > 0) {
+ siginfo_t si;
+
+ assert_se(wait_for_terminate(pid, &si) >= 0);
+ assert_se(si.si_code == CLD_EXITED);
+ assert_se(si.si_status == EXIT_SUCCESS);
+
+ return;
+ }
+
+ /* child */
+ test_rename_process_now("one", 1);
+ test_rename_process_now("more", 0); /* longer than "one", hence truncated */
+ (void) setresuid(99, 99, 99); /* change uid when running privileged */
+ test_rename_process_now("time!", 0);
+ test_rename_process_now("0", 1); /* shorter than "one", should fit */
+ test_rename_process_one("", -EINVAL);
+ test_rename_process_one(NULL, -EINVAL);
+ _exit(EXIT_SUCCESS);
+}
+
+static void test_rename_process(void) {
+ test_rename_process_one(NULL, -EINVAL);
+ test_rename_process_one("", -EINVAL);
+ test_rename_process_one("foo", 1); /* should always fit */
+ test_rename_process_one("this is a really really long process name, followed by some more words", 0); /* unlikely to fit */
+ test_rename_process_one("1234567", 1); /* should always fit */
+ test_rename_process_multi(); /* multiple invocations and dropped privileges */
+}
+
+static void test_getpid_cached(void) {
+ siginfo_t si;
+ pid_t a, b, c, d, e, f, child;
+
+ a = raw_getpid();
+ b = getpid_cached();
+ c = getpid();
+
+ assert_se(a == b && a == c);
+
+ child = fork();
+ assert_se(child >= 0);
+
+ if (child == 0) {
+ /* In child */
+ a = raw_getpid();
+ b = getpid_cached();
+ c = getpid();
+
+ assert_se(a == b && a == c);
+ _exit(EXIT_SUCCESS);
+ }
+
+ d = raw_getpid();
+ e = getpid_cached();
+ f = getpid();
+
+ assert_se(a == d && a == e && a == f);
+
+ assert_se(wait_for_terminate(child, &si) >= 0);
+ assert_se(si.si_status == 0);
+ assert_se(si.si_code == CLD_EXITED);
+}
+
+#define MEASURE_ITERATIONS (10000000LLU)
+
+static void test_getpid_measure(void) {
+ unsigned long long i;
+ usec_t t, q;
+
+ t = now(CLOCK_MONOTONIC);
+ for (i = 0; i < MEASURE_ITERATIONS; i++)
+ (void) getpid();
+ q = now(CLOCK_MONOTONIC) - t;
+
+ log_info(" glibc getpid(): %lf µs each\n", (double) q / MEASURE_ITERATIONS);
+
+ t = now(CLOCK_MONOTONIC);
+ for (i = 0; i < MEASURE_ITERATIONS; i++)
+ (void) getpid_cached();
+ q = now(CLOCK_MONOTONIC) - t;
+
+ log_info("getpid_cached(): %lf µs each\n", (double) q / MEASURE_ITERATIONS);
+}
+
+static void test_safe_fork(void) {
+ siginfo_t status;
+ pid_t pid;
+ int r;
+
+ BLOCK_SIGNALS(SIGCHLD);
+
+ r = safe_fork("(test-child)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_NULL_STDIO|FORK_REOPEN_LOG, &pid);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ /* child */
+ usleep(100 * USEC_PER_MSEC);
+
+ _exit(88);
+ }
+
+ assert_se(wait_for_terminate(pid, &status) >= 0);
+ assert_se(status.si_code == CLD_EXITED);
+ assert_se(status.si_status == 88);
+}
+
+static void test_pid_to_ptr(void) {
+
+ assert_se(PTR_TO_PID(NULL) == 0);
+ assert_se(PID_TO_PTR(0) == NULL);
+
+ assert_se(PTR_TO_PID(PID_TO_PTR(1)) == 1);
+ assert_se(PTR_TO_PID(PID_TO_PTR(2)) == 2);
+ assert_se(PTR_TO_PID(PID_TO_PTR(-1)) == -1);
+ assert_se(PTR_TO_PID(PID_TO_PTR(-2)) == -2);
+
+ assert_se(PTR_TO_PID(PID_TO_PTR(INT16_MAX)) == INT16_MAX);
+ assert_se(PTR_TO_PID(PID_TO_PTR(INT16_MIN)) == INT16_MIN);
+
+ assert_se(PTR_TO_PID(PID_TO_PTR(INT32_MAX)) == INT32_MAX);
+ assert_se(PTR_TO_PID(PID_TO_PTR(INT32_MIN)) == INT32_MIN);
+}
+
+static void test_ioprio_class_from_to_string_one(const char *val, int expected) {
+ assert_se(ioprio_class_from_string(val) == expected);
+ if (expected >= 0) {
+ _cleanup_free_ char *s = NULL;
+ unsigned ret;
+
+ assert_se(ioprio_class_to_string_alloc(expected, &s) == 0);
+ /* We sometimes get a class number and sometimes a number back */
+ assert_se(streq(s, val) ||
+ safe_atou(val, &ret) == 0);
+ }
+}
+
+static void test_ioprio_class_from_to_string(void) {
+ test_ioprio_class_from_to_string_one("none", IOPRIO_CLASS_NONE);
+ test_ioprio_class_from_to_string_one("realtime", IOPRIO_CLASS_RT);
+ test_ioprio_class_from_to_string_one("best-effort", IOPRIO_CLASS_BE);
+ test_ioprio_class_from_to_string_one("idle", IOPRIO_CLASS_IDLE);
+ test_ioprio_class_from_to_string_one("0", 0);
+ test_ioprio_class_from_to_string_one("1", 1);
+ test_ioprio_class_from_to_string_one("7", 7);
+ test_ioprio_class_from_to_string_one("8", 8);
+ test_ioprio_class_from_to_string_one("9", -1);
+ test_ioprio_class_from_to_string_one("-1", -1);
+}
+
+static void test_setpriority_closest(void) {
+ int r;
+
+ r = safe_fork("(test-setprio)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_WAIT|FORK_LOG, NULL);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ bool full_test;
+ int p, q;
+ /* child */
+
+ /* rlimit of 30 equals nice level of -10 */
+ if (setrlimit(RLIMIT_NICE, &RLIMIT_MAKE_CONST(30)) < 0) {
+ /* If this fails we are probably unprivileged or in a userns of some kind, let's skip
+ * the full test */
+ assert_se(ERRNO_IS_PRIVILEGE(errno));
+ full_test = false;
+ } else {
+ assert_se(setresgid(GID_NOBODY, GID_NOBODY, GID_NOBODY) >= 0);
+ assert_se(setresuid(UID_NOBODY, UID_NOBODY, UID_NOBODY) >= 0);
+ full_test = true;
+ }
+
+ errno = 0;
+ p = getpriority(PRIO_PROCESS, 0);
+ assert_se(errno == 0);
+
+ /* It should always be possible to set our nice level to the current one */
+ assert_se(setpriority_closest(p) > 0);
+
+ errno = 0;
+ q = getpriority(PRIO_PROCESS, 0);
+ assert_se(errno == 0 && p == q);
+
+ /* It should also be possible to set the nice level to one higher */
+ if (p < PRIO_MAX-1) {
+ assert_se(setpriority_closest(++p) > 0);
+
+ errno = 0;
+ q = getpriority(PRIO_PROCESS, 0);
+ assert_se(errno == 0 && p == q);
+ }
+
+ /* It should also be possible to set the nice level to two higher */
+ if (p < PRIO_MAX-1) {
+ assert_se(setpriority_closest(++p) > 0);
+
+ errno = 0;
+ q = getpriority(PRIO_PROCESS, 0);
+ assert_se(errno == 0 && p == q);
+ }
+
+ if (full_test) {
+ /* These two should work, given the RLIMIT_NICE we set above */
+ assert_se(setpriority_closest(-10) > 0);
+ errno = 0;
+ q = getpriority(PRIO_PROCESS, 0);
+ assert_se(errno == 0 && q == -10);
+
+ assert_se(setpriority_closest(-9) > 0);
+ errno = 0;
+ q = getpriority(PRIO_PROCESS, 0);
+ assert_se(errno == 0 && q == -9);
+
+ /* This should succeed but should be clamped to the limit */
+ assert_se(setpriority_closest(-11) == 0);
+ errno = 0;
+ q = getpriority(PRIO_PROCESS, 0);
+ assert_se(errno == 0 && q == -10);
+
+ assert_se(setpriority_closest(-8) > 0);
+ errno = 0;
+ q = getpriority(PRIO_PROCESS, 0);
+ assert_se(errno == 0 && q == -8);
+
+ /* This should succeed but should be clamped to the limit */
+ assert_se(setpriority_closest(-12) == 0);
+ errno = 0;
+ q = getpriority(PRIO_PROCESS, 0);
+ assert_se(errno == 0 && q == -10);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ save_argc_argv(argc, argv);
+
+ if (argc > 1) {
+ pid_t pid = 0;
+
+ (void) parse_pid(argv[1], &pid);
+ test_get_process_comm(pid);
+ } else {
+ TEST_REQ_RUNNING_SYSTEMD(test_get_process_comm(1));
+ test_get_process_comm(getpid());
+ }
+
+ test_get_process_comm_escape();
+ test_pid_is_unwaited();
+ test_pid_is_alive();
+ test_personality();
+ test_get_process_cmdline_harder();
+ test_rename_process();
+ test_getpid_cached();
+ test_getpid_measure();
+ test_safe_fork();
+ test_pid_to_ptr();
+ test_ioprio_class_from_to_string();
+ test_setpriority_closest();
+
+ return 0;
+}
diff --git a/src/test/test-procfs-util.c b/src/test/test-procfs-util.c
new file mode 100644
index 0000000..b2679e3
--- /dev/null
+++ b/src/test/test-procfs-util.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "errno-util.h"
+#include "format-util.h"
+#include "log.h"
+#include "procfs-util.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ char buf[CONST_MAX(FORMAT_TIMESPAN_MAX, FORMAT_BYTES_MAX)];
+ nsec_t nsec;
+ uint64_t v;
+ int r;
+
+ log_parse_environment();
+ log_open();
+
+ assert_se(procfs_cpu_get_usage(&nsec) >= 0);
+ log_info("Current system CPU time: %s", format_timespan(buf, sizeof(buf), nsec/NSEC_PER_USEC, 1));
+
+ assert_se(procfs_memory_get_used(&v) >= 0);
+ log_info("Current memory usage: %s", format_bytes(buf, sizeof(buf), v));
+
+ assert_se(procfs_tasks_get_current(&v) >= 0);
+ log_info("Current number of tasks: %" PRIu64, v);
+
+ r = procfs_tasks_get_limit(&v);
+ if (r == -ENOENT || ERRNO_IS_PRIVILEGE(r))
+ return log_tests_skipped("can't read /proc/sys/kernel/pid_max");
+
+ assert_se(r >= 0);
+ log_info("Limit of tasks: %" PRIu64, v);
+ assert_se(v > 0);
+ assert_se(procfs_tasks_set_limit(v) >= 0);
+
+ if (v > 100) {
+ uint64_t w;
+ r = procfs_tasks_set_limit(v-1);
+ assert_se(IN_SET(r, 0, -EPERM, -EACCES, -EROFS));
+
+ assert_se(procfs_tasks_get_limit(&w) >= 0);
+ assert_se((r == 0 && w == v - 1) || (r < 0 && w == v));
+
+ assert_se(procfs_tasks_set_limit(v) >= 0);
+
+ assert_se(procfs_tasks_get_limit(&w) >= 0);
+ assert_se(v == w);
+ }
+
+ return 0;
+}
diff --git a/src/test/test-psi-util.c b/src/test/test-psi-util.c
new file mode 100644
index 0000000..0b5a30c
--- /dev/null
+++ b/src/test/test-psi-util.c
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/loadavg.h>
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "psi-util.h"
+#include "tests.h"
+
+static void test_read_mem_pressure(void) {
+ _cleanup_(unlink_tempfilep) char path[] = "/tmp/pressurereadtestXXXXXX";
+ ResourcePressure rp;
+
+ if (geteuid() != 0)
+ return (void) log_tests_skipped("not root");
+
+ assert_se(mkstemp(path));
+
+ assert_se(read_resource_pressure("/verylikelynonexistentpath", PRESSURE_TYPE_SOME, &rp) < 0);
+ assert_se(read_resource_pressure(path, PRESSURE_TYPE_SOME, &rp) < 0);
+
+ assert_se(write_string_file(path, "herpdederp\n", WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(read_resource_pressure(path, PRESSURE_TYPE_SOME, &rp) < 0);
+
+ /* Pressure file with some invalid values*/
+ assert_se(write_string_file(path, "some avg10=0.22=55 avg60=0.17=8 avg300=1.11=00 total=58761459\n"
+ "full avg10=0.23=55 avg60=0.16=8 avg300=1.08=00 total=58464525", WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(read_resource_pressure(path, PRESSURE_TYPE_SOME, &rp) < 0);
+
+ /* Same pressure valid values as below but with duplicate avg60 field */
+ assert_se(write_string_file(path, "some avg10=0.22 avg60=0.17 avg60=0.18 avg300=1.11 total=58761459\n"
+ "full avg10=0.23 avg60=0.16 avg300=1.08 total=58464525", WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(read_resource_pressure(path, PRESSURE_TYPE_SOME, &rp) < 0);
+
+ assert_se(write_string_file(path, "some avg10=0.22 avg60=0.17 avg300=1.11 total=58761459\n"
+ "full avg10=0.23 avg60=0.16 avg300=1.08 total=58464525", WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(read_resource_pressure(path, PRESSURE_TYPE_SOME, &rp) == 0);
+ assert_se(LOAD_INT(rp.avg10) == 0);
+ assert_se(LOAD_FRAC(rp.avg10) == 22);
+ assert_se(LOAD_INT(rp.avg60) == 0);
+ assert_se(LOAD_FRAC(rp.avg60) == 17);
+ assert_se(LOAD_INT(rp.avg300) == 1);
+ assert_se(LOAD_FRAC(rp.avg300) == 11);
+ assert_se(rp.total == 58761459);
+ assert(read_resource_pressure(path, PRESSURE_TYPE_FULL, &rp) == 0);
+ assert_se(LOAD_INT(rp.avg10) == 0);
+ assert_se(LOAD_FRAC(rp.avg10) == 23);
+ assert_se(LOAD_INT(rp.avg60) == 0);
+ assert_se(LOAD_FRAC(rp.avg60) == 16);
+ assert_se(LOAD_INT(rp.avg300) == 1);
+ assert_se(LOAD_FRAC(rp.avg300) == 8);
+ assert_se(rp.total == 58464525);
+
+ /* Pressure file with extra unsupported fields */
+ assert_se(write_string_file(path, "some avg5=0.55 avg10=0.22 avg60=0.17 avg300=1.11 total=58761459\n"
+ "full avg10=0.23 avg60=0.16 avg300=1.08 avg600=2.00 total=58464525", WRITE_STRING_FILE_CREATE) == 0);
+ assert_se(read_resource_pressure(path, PRESSURE_TYPE_SOME, &rp) == 0);
+ assert_se(LOAD_INT(rp.avg10) == 0);
+ assert_se(LOAD_FRAC(rp.avg10) == 22);
+ assert_se(LOAD_INT(rp.avg60) == 0);
+ assert_se(LOAD_FRAC(rp.avg60) == 17);
+ assert_se(LOAD_INT(rp.avg300) == 1);
+ assert_se(LOAD_FRAC(rp.avg300) == 11);
+ assert_se(rp.total == 58761459);
+ assert(read_resource_pressure(path, PRESSURE_TYPE_FULL, &rp) == 0);
+ assert_se(LOAD_INT(rp.avg10) == 0);
+ assert_se(LOAD_FRAC(rp.avg10) == 23);
+ assert_se(LOAD_INT(rp.avg60) == 0);
+ assert_se(LOAD_FRAC(rp.avg60) == 16);
+ assert_se(LOAD_INT(rp.avg300) == 1);
+ assert_se(LOAD_FRAC(rp.avg300) == 8);
+ assert_se(rp.total == 58464525);
+}
+
+int main(void) {
+ test_setup_logging(LOG_DEBUG);
+ test_read_mem_pressure();
+ return 0;
+}
diff --git a/src/test/test-qrcode-util.c b/src/test/test-qrcode-util.c
new file mode 100644
index 0000000..221ad85
--- /dev/null
+++ b/src/test/test-qrcode-util.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "locale-util.h"
+#include "main-func.h"
+#include "qrcode-util.h"
+#include "tests.h"
+
+static int run(int argc, char **argv) {
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(setenv("SYSTEMD_COLORS", "1", 1) == 0); /* Force the qrcode to be printed */
+
+ r = print_qrcode(stdout, "This should say \"TEST\"", "TEST");
+ if (r == -EOPNOTSUPP)
+ return log_tests_skipped("not supported");
+ if (r < 0)
+ return log_error_errno(r, "Failed to print QR code: %m");
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/test/test-random-util.c b/src/test/test-random-util.c
new file mode 100644
index 0000000..02a73ec
--- /dev/null
+++ b/src/test/test-random-util.c
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "hexdecoct.h"
+#include "random-util.h"
+#include "log.h"
+#include "tests.h"
+
+static void test_genuine_random_bytes(RandomFlags flags) {
+ uint8_t buf[16] = {};
+ unsigned i;
+
+ log_info("/* %s */", __func__);
+
+ for (i = 1; i < sizeof buf; i++) {
+ assert_se(genuine_random_bytes(buf, i, flags) == 0);
+ if (i + 1 < sizeof buf)
+ assert_se(buf[i] == 0);
+
+ hexdump(stdout, buf, i);
+ }
+}
+
+static void test_pseudo_random_bytes(void) {
+ uint8_t buf[16] = {};
+ unsigned i;
+
+ log_info("/* %s */", __func__);
+
+ for (i = 1; i < sizeof buf; i++) {
+ pseudo_random_bytes(buf, i);
+ if (i + 1 < sizeof buf)
+ assert_se(buf[i] == 0);
+
+ hexdump(stdout, buf, i);
+ }
+}
+
+static void test_rdrand(void) {
+ int r, i;
+
+ for (i = 0; i < 10; i++) {
+ unsigned long x = 0;
+
+ r = rdrand(&x);
+ if (r < 0) {
+ log_error_errno(r, "RDRAND failed: %m");
+ return;
+ }
+
+ printf("%lx\n", x);
+ }
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_genuine_random_bytes(RANDOM_EXTEND_WITH_PSEUDO);
+ test_genuine_random_bytes(0);
+ test_genuine_random_bytes(RANDOM_BLOCK);
+ test_genuine_random_bytes(RANDOM_ALLOW_RDRAND);
+ test_genuine_random_bytes(RANDOM_ALLOW_INSECURE);
+
+ test_pseudo_random_bytes();
+
+ test_rdrand();
+
+ return 0;
+}
diff --git a/src/test/test-ratelimit.c b/src/test/test-ratelimit.c
new file mode 100644
index 0000000..af60572
--- /dev/null
+++ b/src/test/test-ratelimit.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "macro.h"
+#include "ratelimit.h"
+#include "time-util.h"
+
+static void test_ratelimit_below(void) {
+ int i;
+ RateLimit ratelimit = { 1 * USEC_PER_SEC, 10 };
+
+ for (i = 0; i < 10; i++)
+ assert_se(ratelimit_below(&ratelimit));
+ assert_se(!ratelimit_below(&ratelimit));
+ sleep(1);
+ for (i = 0; i < 10; i++)
+ assert_se(ratelimit_below(&ratelimit));
+
+ ratelimit = (RateLimit) { 0, 10 };
+ for (i = 0; i < 10000; i++)
+ assert_se(ratelimit_below(&ratelimit));
+}
+
+int main(int argc, char *argv[]) {
+ test_ratelimit_below();
+
+ return 0;
+}
diff --git a/src/test/test-replace-var.c b/src/test/test-replace-var.c
new file mode 100644
index 0000000..4d699b9
--- /dev/null
+++ b/src/test/test-replace-var.c
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "macro.h"
+#include "replace-var.h"
+#include "string-util.h"
+
+static char *lookup(const char *variable, void *userdata) {
+ return strjoin("<<<", variable, ">>>");
+}
+
+int main(int argc, char *argv[]) {
+ char *r;
+
+ assert_se(r = replace_var("@@@foobar@xyz@HALLO@foobar@test@@testtest@TEST@...@@@", lookup, NULL));
+ puts(r);
+ assert_se(streq(r, "@@@foobar@xyz<<<HALLO>>>foobar@test@@testtest<<<TEST>>>...@@@"));
+ free(r);
+
+ assert_se(r = strreplace("XYZFFFFXYZFFFFXYZ", "XYZ", "ABC"));
+ puts(r);
+ assert_se(streq(r, "ABCFFFFABCFFFFABC"));
+ free(r);
+
+ return 0;
+}
diff --git a/src/test/test-rlimit-util.c b/src/test/test-rlimit-util.c
new file mode 100644
index 0000000..057ae6b
--- /dev/null
+++ b/src/test/test-rlimit-util.c
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/resource.h>
+
+#include "alloc-util.h"
+#include "capability-util.h"
+#include "macro.h"
+#include "missing_resource.h"
+#include "rlimit-util.h"
+#include "string-util.h"
+#include "time-util.h"
+
+static void test_rlimit_parse_format(int resource, const char *string, rlim_t soft, rlim_t hard, int ret, const char *formatted) {
+ _cleanup_free_ char *f = NULL;
+ struct rlimit rl = {
+ .rlim_cur = 4711,
+ .rlim_max = 4712,
+ }, rl2 = {
+ .rlim_cur = 4713,
+ .rlim_max = 4714
+ };
+
+ assert_se(rlimit_parse(resource, string, &rl) == ret);
+ if (ret < 0)
+ return;
+
+ assert_se(rl.rlim_cur == soft);
+ assert_se(rl.rlim_max == hard);
+
+ assert_se(rlimit_format(&rl, &f) >= 0);
+ assert_se(streq(formatted, f));
+
+ assert_se(rlimit_parse(resource, formatted, &rl2) >= 0);
+ assert_se(memcmp(&rl, &rl2, sizeof(struct rlimit)) == 0);
+}
+
+int main(int argc, char *argv[]) {
+ struct rlimit old, new, high;
+ struct rlimit err = {
+ .rlim_cur = 10,
+ .rlim_max = 5,
+ };
+ int i;
+
+ log_parse_environment();
+ log_open();
+
+ assert_se(drop_capability(CAP_SYS_RESOURCE) == 0);
+
+ assert_se(getrlimit(RLIMIT_NOFILE, &old) == 0);
+ new.rlim_cur = MIN(5U, old.rlim_max);
+ new.rlim_max = old.rlim_max;
+ assert_se(setrlimit(RLIMIT_NOFILE, &new) >= 0);
+
+ assert_se(rlimit_from_string("NOFILE") == RLIMIT_NOFILE);
+ assert_se(rlimit_from_string("LimitNOFILE") == -1);
+ assert_se(rlimit_from_string("RLIMIT_NOFILE") == -1);
+ assert_se(rlimit_from_string("xxxNOFILE") == -1);
+ assert_se(rlimit_from_string("DefaultLimitNOFILE") == -1);
+
+ assert_se(rlimit_from_string_harder("NOFILE") == RLIMIT_NOFILE);
+ assert_se(rlimit_from_string_harder("LimitNOFILE") == RLIMIT_NOFILE);
+ assert_se(rlimit_from_string_harder("RLIMIT_NOFILE") == RLIMIT_NOFILE);
+ assert_se(rlimit_from_string_harder("xxxNOFILE") == -1);
+ assert_se(rlimit_from_string_harder("DefaultLimitNOFILE") == -1);
+
+ for (i = 0; i < _RLIMIT_MAX; i++) {
+ _cleanup_free_ char *prefixed = NULL;
+ const char *p;
+
+ assert_se(p = rlimit_to_string(i));
+ log_info("%i = %s", i, p);
+
+ assert_se(rlimit_from_string(p) == i);
+ assert_se(rlimit_from_string_harder(p) == i);
+
+ assert_se(prefixed = strjoin("Limit", p));
+
+ assert_se(rlimit_from_string(prefixed) < 0);
+ assert_se(rlimit_from_string_harder(prefixed) == i);
+
+ prefixed = mfree(prefixed);
+ assert_se(prefixed = strjoin("RLIMIT_", p));
+
+ assert_se(rlimit_from_string(prefixed) < 0);
+ assert_se(rlimit_from_string_harder(prefixed) == i);
+ }
+
+ assert_se(streq_ptr(rlimit_to_string(RLIMIT_NOFILE), "NOFILE"));
+ assert_se(rlimit_to_string(-1) == NULL);
+
+ assert_se(getrlimit(RLIMIT_NOFILE, &old) == 0);
+ assert_se(setrlimit_closest(RLIMIT_NOFILE, &old) == 0);
+ assert_se(getrlimit(RLIMIT_NOFILE, &new) == 0);
+ assert_se(old.rlim_cur == new.rlim_cur);
+ assert_se(old.rlim_max == new.rlim_max);
+
+ assert_se(getrlimit(RLIMIT_NOFILE, &old) == 0);
+ high = RLIMIT_MAKE_CONST(old.rlim_max == RLIM_INFINITY ? old.rlim_max : old.rlim_max + 1);
+ assert_se(setrlimit_closest(RLIMIT_NOFILE, &high) == 0);
+ assert_se(getrlimit(RLIMIT_NOFILE, &new) == 0);
+ assert_se(new.rlim_max == old.rlim_max);
+ assert_se(new.rlim_cur == new.rlim_max);
+
+ assert_se(getrlimit(RLIMIT_NOFILE, &old) == 0);
+ assert_se(setrlimit_closest(RLIMIT_NOFILE, &err) == -EINVAL);
+ assert_se(getrlimit(RLIMIT_NOFILE, &new) == 0);
+ assert_se(old.rlim_cur == new.rlim_cur);
+ assert_se(old.rlim_max == new.rlim_max);
+
+ test_rlimit_parse_format(RLIMIT_NOFILE, "4:5", 4, 5, 0, "4:5");
+ test_rlimit_parse_format(RLIMIT_NOFILE, "6", 6, 6, 0, "6");
+ test_rlimit_parse_format(RLIMIT_NOFILE, "infinity", RLIM_INFINITY, RLIM_INFINITY, 0, "infinity");
+ test_rlimit_parse_format(RLIMIT_NOFILE, "infinity:infinity", RLIM_INFINITY, RLIM_INFINITY, 0, "infinity");
+ test_rlimit_parse_format(RLIMIT_NOFILE, "8:infinity", 8, RLIM_INFINITY, 0, "8:infinity");
+ test_rlimit_parse_format(RLIMIT_CPU, "25min:13h", (25*USEC_PER_MINUTE) / USEC_PER_SEC, (13*USEC_PER_HOUR) / USEC_PER_SEC, 0, "1500:46800");
+ test_rlimit_parse_format(RLIMIT_NOFILE, "", 0, 0, -EINVAL, NULL);
+ test_rlimit_parse_format(RLIMIT_NOFILE, "5:4", 0, 0, -EILSEQ, NULL);
+ test_rlimit_parse_format(RLIMIT_NOFILE, "5:4:3", 0, 0, -EINVAL, NULL);
+ test_rlimit_parse_format(RLIMIT_NICE, "20", 20, 20, 0, "20");
+ test_rlimit_parse_format(RLIMIT_NICE, "40", 40, 40, 0, "40");
+ test_rlimit_parse_format(RLIMIT_NICE, "41", 41, 41, -ERANGE, "41");
+ test_rlimit_parse_format(RLIMIT_NICE, "0", 0, 0, 0, "0");
+ test_rlimit_parse_format(RLIMIT_NICE, "-7", 27, 27, 0, "27");
+ test_rlimit_parse_format(RLIMIT_NICE, "-20", 40, 40, 0, "40");
+ test_rlimit_parse_format(RLIMIT_NICE, "-21", 41, 41, -ERANGE, "41");
+ test_rlimit_parse_format(RLIMIT_NICE, "-0", 20, 20, 0, "20");
+ test_rlimit_parse_format(RLIMIT_NICE, "+7", 13, 13, 0, "13");
+ test_rlimit_parse_format(RLIMIT_NICE, "+19", 1, 1, 0, "1");
+ test_rlimit_parse_format(RLIMIT_NICE, "+20", 0, 0, -ERANGE, "0");
+ test_rlimit_parse_format(RLIMIT_NICE, "+0", 20, 20, 0, "20");
+
+ return 0;
+}
diff --git a/src/test/test-rm-rf.c b/src/test/test-rm-rf.c
new file mode 100644
index 0000000..38aa100
--- /dev/null
+++ b/src/test/test-rm-rf.c
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "process-util.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+
+static void test_rm_rf_chmod_inner(void) {
+ _cleanup_free_ char *d = NULL;
+ const char *x, *y;
+
+ assert_se(getuid() != 0);
+
+ assert_se(mkdtemp_malloc(NULL, &d) >= 0);
+
+ x = strjoina(d, "/d");
+ assert_se(mkdir(x, 0700) >= 0);
+ y = strjoina(x, "/f");
+ assert_se(mknod(y, S_IFREG | 0600, 0) >= 0);
+
+ assert_se(chmod(y, 0400) >= 0);
+ assert_se(chmod(x, 0500) >= 0);
+ assert_se(chmod(d, 0500) >= 0);
+
+ assert_se(rm_rf(d, REMOVE_PHYSICAL|REMOVE_ROOT) == -EACCES);
+
+ assert_se(access(d, F_OK) >= 0);
+ assert_se(access(x, F_OK) >= 0);
+ assert_se(access(y, F_OK) >= 0);
+
+ assert_se(rm_rf(d, REMOVE_PHYSICAL|REMOVE_ROOT|REMOVE_CHMOD) >= 0);
+
+ errno = 0;
+ assert_se(access(d, F_OK) < 0 && errno == ENOENT);
+}
+
+static void test_rm_rf_chmod(void) {
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ if (getuid() == 0) {
+ /* This test only works unpriv (as only then the access mask for the owning user matters),
+ * hence drop privs here */
+
+ r = safe_fork("(setresuid)", FORK_DEATHSIG|FORK_WAIT, NULL);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ /* child */
+
+ assert_se(setresuid(1, 1, 1) >= 0);
+
+ test_rm_rf_chmod_inner();
+ _exit(EXIT_SUCCESS);
+ }
+
+ return;
+ }
+
+ test_rm_rf_chmod_inner();
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_rm_rf_chmod();
+
+ return 0;
+}
diff --git a/src/test/test-sched-prio.c b/src/test/test-sched-prio.c
new file mode 100644
index 0000000..1f125b1
--- /dev/null
+++ b/src/test/test-sched-prio.c
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2012 Holger Hans Peter Freyther
+***/
+
+#include <sched.h>
+
+#include "all-units.h"
+#include "macro.h"
+#include "manager.h"
+#include "rm-rf.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ Unit *idle_ok, *idle_bad, *rr_ok, *rr_bad, *rr_sched;
+ Service *ser;
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ r = enter_cgroup_subroot(NULL);
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ /* prepare the test */
+ _cleanup_free_ char *unit_dir = NULL;
+ assert_se(get_testdata_dir("units", &unit_dir) >= 0);
+ assert_se(set_unit_path(unit_dir) >= 0);
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+
+ r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m);
+ if (manager_errno_skip_test(r))
+ return log_tests_skipped_errno(r, "manager_new");
+ assert_se(r >= 0);
+ assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+ /* load idle ok */
+ assert_se(manager_load_startable_unit_or_warn(m, "sched_idle_ok.service", NULL, &idle_ok) >= 0);
+ ser = SERVICE(idle_ok);
+ assert_se(ser->exec_context.cpu_sched_policy == SCHED_OTHER);
+ assert_se(ser->exec_context.cpu_sched_priority == 0);
+
+ /*
+ * load idle bad. This should print a warning but we have no way to look at it.
+ */
+ assert_se(manager_load_startable_unit_or_warn(m, "sched_idle_bad.service", NULL, &idle_bad) >= 0);
+ ser = SERVICE(idle_ok);
+ assert_se(ser->exec_context.cpu_sched_policy == SCHED_OTHER);
+ assert_se(ser->exec_context.cpu_sched_priority == 0);
+
+ /*
+ * load rr ok.
+ * Test that the default priority is moving from 0 to 1.
+ */
+ assert_se(manager_load_startable_unit_or_warn(m, "sched_rr_ok.service", NULL, &rr_ok) >= 0);
+ ser = SERVICE(rr_ok);
+ assert_se(ser->exec_context.cpu_sched_policy == SCHED_RR);
+ assert_se(ser->exec_context.cpu_sched_priority == 1);
+
+ /*
+ * load rr bad.
+ * Test that the value of 0 and 100 is ignored.
+ */
+ assert_se(manager_load_startable_unit_or_warn(m, "sched_rr_bad.service", NULL, &rr_bad) >= 0);
+ ser = SERVICE(rr_bad);
+ assert_se(ser->exec_context.cpu_sched_policy == SCHED_RR);
+ assert_se(ser->exec_context.cpu_sched_priority == 1);
+
+ /*
+ * load rr change.
+ * Test that anything between 1 and 99 can be set.
+ */
+ assert_se(manager_load_startable_unit_or_warn(m, "sched_rr_change.service", NULL, &rr_sched) >= 0);
+ ser = SERVICE(rr_sched);
+ assert_se(ser->exec_context.cpu_sched_policy == SCHED_RR);
+ assert_se(ser->exec_context.cpu_sched_priority == 99);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-sd-hwdb.c b/src/test/test-sd-hwdb.c
new file mode 100644
index 0000000..7e1512a
--- /dev/null
+++ b/src/test/test-sd-hwdb.c
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-hwdb.h"
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "errno.h"
+#include "tests.h"
+
+static int test_failed_enumerate(void) {
+ _cleanup_(sd_hwdb_unrefp) sd_hwdb *hwdb = NULL;
+ const char *key, *value;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ r = sd_hwdb_new(&hwdb);
+ if (r == -ENOENT || ERRNO_IS_PRIVILEGE(r))
+ return r;
+ assert_se(r == 0);
+
+ assert_se(sd_hwdb_seek(hwdb, "no-such-modalias-should-exist") == 0);
+
+ assert_se(sd_hwdb_enumerate(hwdb, &key, &value) == 0);
+ assert_se(sd_hwdb_enumerate(hwdb, &key, NULL) == -EINVAL);
+ assert_se(sd_hwdb_enumerate(hwdb, NULL, &value) == -EINVAL);
+
+ return 0;
+}
+
+#define DELL_MODALIAS \
+ "evdev:atkbd:dmi:bvnXXX:bvrYYY:bdZZZ:svnDellXXX:pnYYY"
+
+static void test_basic_enumerate(void) {
+ _cleanup_(sd_hwdb_unrefp) sd_hwdb *hwdb = NULL;
+ const char *key, *value;
+ size_t len1 = 0, len2 = 0;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(sd_hwdb_new(&hwdb) == 0);
+
+ assert_se(sd_hwdb_seek(hwdb, DELL_MODALIAS) == 0);
+
+ for (;;) {
+ r = sd_hwdb_enumerate(hwdb, &key, &value);
+ assert(IN_SET(r, 0, 1));
+ if (r == 0)
+ break;
+ assert(key);
+ assert(value);
+ log_debug("A: \"%s\" → \"%s\"", key, value);
+ len1 += strlen(key) + strlen(value);
+ }
+
+ SD_HWDB_FOREACH_PROPERTY(hwdb, DELL_MODALIAS, key, value) {
+ log_debug("B: \"%s\" → \"%s\"", key, value);
+ len2 += strlen(key) + strlen(value);
+ }
+
+ assert_se(len1 == len2);
+}
+
+int main(int argc, char *argv[]) {
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ r = test_failed_enumerate();
+ if (r < 0)
+ return log_tests_skipped_errno(r, "cannot open hwdb");
+
+ test_basic_enumerate();
+
+ return 0;
+}
diff --git a/src/test/test-sd-path.c b/src/test/test-sd-path.c
new file mode 100644
index 0000000..75436ab
--- /dev/null
+++ b/src/test/test-sd-path.c
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-path.h"
+
+#include "alloc-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+
+static void test_sd_path_lookup(void) {
+ log_info("/* %s */", __func__);
+
+ for (uint64_t i = 0; i < _SD_PATH_MAX; i++) {
+ _cleanup_free_ char *t = NULL, *s = NULL;
+ int r;
+
+ r = sd_path_lookup(i, NULL, &t);
+ if (i == SD_PATH_USER_RUNTIME && r == -ENXIO)
+ continue;
+ assert_se(r == 0);
+ assert_se(t);
+ log_info("%02"PRIu64": \"%s\"", i, t);
+
+ assert_se(sd_path_lookup(i, "suffix", &s) == 0);
+ assert_se(s);
+ log_info("%02"PRIu64": \"%s\"", i, s);
+ assert_se(endswith(s, "/suffix"));
+ }
+
+ char *tt;
+ assert_se(sd_path_lookup(_SD_PATH_MAX, NULL, &tt) == -EOPNOTSUPP);
+}
+
+static void test_sd_path_lookup_strv(void) {
+ log_info("/* %s */", __func__);
+
+ for (uint64_t i = 0; i < _SD_PATH_MAX; i++) {
+ _cleanup_strv_free_ char **t = NULL, **s = NULL;
+ char **item;
+ int r;
+
+ r = sd_path_lookup_strv(i, NULL, &t);
+ if (i == SD_PATH_USER_RUNTIME && r == -ENXIO)
+ continue;
+ assert_se(r == 0);
+ assert_se(t);
+ log_info("%02"PRIu64":", i);
+ STRV_FOREACH(item, t)
+ log_debug(" %s", *item);
+
+ assert_se(sd_path_lookup_strv(i, "suffix", &s) == 0);
+ assert_se(s);
+ log_info("%02"PRIu64":", i);
+ STRV_FOREACH(item, s) {
+ assert_se(endswith(*item, "/suffix"));
+ log_debug(" %s", *item);
+ }
+ }
+
+ char *tt;
+ assert_se(sd_path_lookup(_SD_PATH_MAX, NULL, &tt) == -EOPNOTSUPP);
+}
+
+int main(void) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_sd_path_lookup();
+ test_sd_path_lookup_strv();
+}
diff --git a/src/test/test-seccomp.c b/src/test/test-seccomp.c
new file mode 100644
index 0000000..10393b6
--- /dev/null
+++ b/src/test/test-seccomp.c
@@ -0,0 +1,1096 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <poll.h>
+#include <stdlib.h>
+#include <sys/eventfd.h>
+#include <sys/mman.h>
+#include <sys/personality.h>
+#include <sys/shm.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+#if HAVE_VALGRIND_VALGRIND_H
+#include <valgrind/valgrind.h>
+#endif
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "missing_sched.h"
+#include "nsflags.h"
+#include "nulstr-util.h"
+#include "process-util.h"
+#include "raw-clone.h"
+#include "rm-rf.h"
+#include "seccomp-util.h"
+#include "set.h"
+#include "string-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "virt.h"
+
+/* __NR_socket may be invalid due to libseccomp */
+#if !defined(__NR_socket) || __NR_socket < 0 || defined(__i386__) || defined(__s390x__) || defined(__s390__) || defined(__powerpc64__) || defined(__powerpc__)
+/* On these archs, socket() is implemented via the socketcall() syscall multiplexer,
+ * and we can't restrict it hence via seccomp. */
+# define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 1
+#else
+# define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 0
+#endif
+
+static void test_seccomp_arch_to_string(void) {
+ uint32_t a, b;
+ const char *name;
+
+ log_info("/* %s */", __func__);
+
+ a = seccomp_arch_native();
+ assert_se(a > 0);
+ name = seccomp_arch_to_string(a);
+ assert_se(name);
+ assert_se(seccomp_arch_from_string(name, &b) >= 0);
+ assert_se(a == b);
+}
+
+static void test_architecture_table(void) {
+ const char *n, *n2;
+
+ log_info("/* %s */", __func__);
+
+ NULSTR_FOREACH(n,
+ "native\0"
+ "x86\0"
+ "x86-64\0"
+ "x32\0"
+ "arm\0"
+ "arm64\0"
+ "mips\0"
+ "mips64\0"
+ "mips64-n32\0"
+ "mips-le\0"
+ "mips64-le\0"
+ "mips64-le-n32\0"
+ "ppc\0"
+ "ppc64\0"
+ "ppc64-le\0"
+#ifdef SCMP_ARCH_RISCV64
+ "riscv64\0"
+#endif
+ "s390\0"
+ "s390x\0") {
+ uint32_t c;
+
+ assert_se(seccomp_arch_from_string(n, &c) >= 0);
+ n2 = seccomp_arch_to_string(c);
+ log_info("seccomp-arch: %s → 0x%"PRIx32" → %s", n, c, n2);
+ assert_se(streq_ptr(n, n2));
+ }
+}
+
+static void test_syscall_filter_set_find(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(!syscall_filter_set_find(NULL));
+ assert_se(!syscall_filter_set_find(""));
+ assert_se(!syscall_filter_set_find("quux"));
+ assert_se(!syscall_filter_set_find("@quux"));
+
+ assert_se(syscall_filter_set_find("@clock") == syscall_filter_sets + SYSCALL_FILTER_SET_CLOCK);
+ assert_se(syscall_filter_set_find("@default") == syscall_filter_sets + SYSCALL_FILTER_SET_DEFAULT);
+ assert_se(syscall_filter_set_find("@raw-io") == syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO);
+}
+
+static void test_filter_sets(void) {
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+
+ for (unsigned i = 0; i < _SYSCALL_FILTER_SET_MAX; i++) {
+ pid_t pid;
+
+#if HAVE_VALGRIND_VALGRIND_H
+ if (RUNNING_ON_VALGRIND && IN_SET(i, SYSCALL_FILTER_SET_DEFAULT, SYSCALL_FILTER_SET_BASIC_IO, SYSCALL_FILTER_SET_SIGNAL)) {
+ /* valgrind at least requires rt_sigprocmask(), read(), write(). */
+ log_info("Running on valgrind, skipping %s", syscall_filter_sets[i].name);
+ continue;
+ }
+#endif
+#if HAS_FEATURE_ADDRESS_SANITIZER
+ if (IN_SET(i, SYSCALL_FILTER_SET_DEFAULT, SYSCALL_FILTER_SET_BASIC_IO, SYSCALL_FILTER_SET_SIGNAL)) {
+ /* ASAN at least requires sigaltstack(), read(), write(). */
+ log_info("Running on address sanitizer, skipping %s", syscall_filter_sets[i].name);
+ continue;
+ }
+#endif
+
+ log_info("Testing %s", syscall_filter_sets[i].name);
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) { /* Child? */
+ int fd, r;
+
+ /* If we look at the default set (or one that includes it), allow-list instead of deny-list */
+ if (IN_SET(i, SYSCALL_FILTER_SET_DEFAULT,
+ SYSCALL_FILTER_SET_SYSTEM_SERVICE,
+ SYSCALL_FILTER_SET_KNOWN))
+ r = seccomp_load_syscall_filter_set(SCMP_ACT_ERRNO(EUCLEAN), syscall_filter_sets + i, SCMP_ACT_ALLOW, true);
+ else
+ r = seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + i, SCMP_ACT_ERRNO(EUCLEAN), true);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ /* Test the sycall filter with one random system call */
+ fd = eventfd(0, EFD_NONBLOCK|EFD_CLOEXEC);
+ if (IN_SET(i, SYSCALL_FILTER_SET_IO_EVENT, SYSCALL_FILTER_SET_DEFAULT))
+ assert_se(fd < 0 && errno == EUCLEAN);
+ else {
+ assert_se(fd >= 0);
+ safe_close(fd);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check(syscall_filter_sets[i].name, pid, WAIT_LOG) == EXIT_SUCCESS);
+ }
+}
+
+static void test_filter_sets_ordered(void) {
+ log_info("/* %s */", __func__);
+
+ /* Ensure "@default" always remains at the beginning of the list */
+ assert_se(SYSCALL_FILTER_SET_DEFAULT == 0);
+ assert_se(streq(syscall_filter_sets[0].name, "@default"));
+
+ /* Ensure "@known" always remains at the end of the list */
+ assert_se(SYSCALL_FILTER_SET_KNOWN == _SYSCALL_FILTER_SET_MAX - 1);
+ assert_se(streq(syscall_filter_sets[SYSCALL_FILTER_SET_KNOWN].name, "@known"));
+
+ for (size_t i = 0; i < _SYSCALL_FILTER_SET_MAX; i++) {
+ const char *k, *p = NULL;
+
+ /* Make sure each group has a description */
+ assert_se(!isempty(syscall_filter_sets[0].help));
+
+ /* Make sure the groups are ordered alphabetically, except for the first and last entries */
+ assert_se(i < 2 || i == _SYSCALL_FILTER_SET_MAX - 1 ||
+ strcmp(syscall_filter_sets[i-1].name, syscall_filter_sets[i].name) < 0);
+
+ NULSTR_FOREACH(k, syscall_filter_sets[i].value) {
+
+ /* Ensure each syscall list is in itself ordered, but groups before names */
+ assert_se(!p ||
+ (*p == '@' && *k != '@') ||
+ (((*p == '@' && *k == '@') ||
+ (*p != '@' && *k != '@')) &&
+ strcmp(p, k) < 0));
+
+ p = k;
+ }
+ }
+}
+
+static void test_restrict_namespace(void) {
+ char *s = NULL;
+ unsigned long ul;
+ pid_t pid;
+
+ if (!have_namespaces()) {
+ log_notice("Testing without namespaces, skipping %s", __func__);
+ return;
+ }
+
+ log_info("/* %s */", __func__);
+
+ assert_se(namespace_flags_to_string(0, &s) == 0 && isempty(s));
+ s = mfree(s);
+ assert_se(namespace_flags_to_string(CLONE_NEWNS, &s) == 0 && streq(s, "mnt"));
+ s = mfree(s);
+ assert_se(namespace_flags_to_string(CLONE_NEWNS|CLONE_NEWIPC, &s) == 0 && streq(s, "ipc mnt"));
+ s = mfree(s);
+ assert_se(namespace_flags_to_string(CLONE_NEWCGROUP, &s) == 0 && streq(s, "cgroup"));
+ s = mfree(s);
+
+ assert_se(namespace_flags_from_string("mnt", &ul) == 0 && ul == CLONE_NEWNS);
+ assert_se(namespace_flags_from_string(NULL, &ul) == 0 && ul == 0);
+ assert_se(namespace_flags_from_string("", &ul) == 0 && ul == 0);
+ assert_se(namespace_flags_from_string("uts", &ul) == 0 && ul == CLONE_NEWUTS);
+ assert_se(namespace_flags_from_string("mnt uts ipc", &ul) == 0 && ul == (CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWIPC));
+
+ assert_se(namespace_flags_to_string(CLONE_NEWUTS, &s) == 0 && streq(s, "uts"));
+ assert_se(namespace_flags_from_string(s, &ul) == 0 && ul == CLONE_NEWUTS);
+ s = mfree(s);
+ assert_se(namespace_flags_from_string("ipc", &ul) == 0 && ul == CLONE_NEWIPC);
+ assert_se(namespace_flags_to_string(ul, &s) == 0 && streq(s, "ipc"));
+ s = mfree(s);
+
+ assert_se(namespace_flags_to_string(NAMESPACE_FLAGS_ALL, &s) == 0);
+ assert_se(streq(s, "cgroup ipc net mnt pid user uts"));
+ assert_se(namespace_flags_from_string(s, &ul) == 0 && ul == NAMESPACE_FLAGS_ALL);
+ s = mfree(s);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping remaining tests in %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping remaining tests in %s", __func__);
+ return;
+ }
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+
+ assert_se(seccomp_restrict_namespaces(CLONE_NEWNS|CLONE_NEWNET) >= 0);
+
+ assert_se(unshare(CLONE_NEWNS) == 0);
+ assert_se(unshare(CLONE_NEWNET) == 0);
+ assert_se(unshare(CLONE_NEWUTS) == -1);
+ assert_se(errno == EPERM);
+ assert_se(unshare(CLONE_NEWIPC) == -1);
+ assert_se(errno == EPERM);
+ assert_se(unshare(CLONE_NEWNET|CLONE_NEWUTS) == -1);
+ assert_se(errno == EPERM);
+
+ /* We use fd 0 (stdin) here, which of course will fail with EINVAL on setns(). Except of course our
+ * seccomp filter worked, and hits first and makes it return EPERM */
+ assert_se(setns(0, CLONE_NEWNS) == -1);
+ assert_se(errno == EINVAL);
+ assert_se(setns(0, CLONE_NEWNET) == -1);
+ assert_se(errno == EINVAL);
+ assert_se(setns(0, CLONE_NEWUTS) == -1);
+ assert_se(errno == EPERM);
+ assert_se(setns(0, CLONE_NEWIPC) == -1);
+ assert_se(errno == EPERM);
+ assert_se(setns(0, CLONE_NEWNET|CLONE_NEWUTS) == -1);
+ assert_se(errno == EPERM);
+ assert_se(setns(0, 0) == -1);
+ assert_se(errno == EPERM);
+
+ pid = raw_clone(CLONE_NEWNS);
+ assert_se(pid >= 0);
+ if (pid == 0)
+ _exit(EXIT_SUCCESS);
+ pid = raw_clone(CLONE_NEWNET);
+ assert_se(pid >= 0);
+ if (pid == 0)
+ _exit(EXIT_SUCCESS);
+ pid = raw_clone(CLONE_NEWUTS);
+ assert_se(pid < 0);
+ assert_se(errno == EPERM);
+ pid = raw_clone(CLONE_NEWIPC);
+ assert_se(pid < 0);
+ assert_se(errno == EPERM);
+ pid = raw_clone(CLONE_NEWNET|CLONE_NEWUTS);
+ assert_se(pid < 0);
+ assert_se(errno == EPERM);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("nsseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+static void test_protect_sysctl(void) {
+ pid_t pid;
+ _cleanup_free_ char *seccomp = NULL;
+
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+
+ /* in containers _sysctl() is likely missing anyway */
+ if (detect_container() > 0) {
+ log_notice("Testing in container, skipping %s", __func__);
+ return;
+ }
+
+ assert_se(get_proc_field("/proc/self/status", "Seccomp", WHITESPACE, &seccomp) == 0);
+ if (!streq(seccomp, "0"))
+ log_warning("Warning: seccomp filter detected, results may be unreliable for %s", __func__);
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+#if defined __NR__sysctl && __NR__sysctl >= 0
+ assert_se(syscall(__NR__sysctl, NULL) < 0);
+ assert_se(IN_SET(errno, EFAULT, ENOSYS));
+#endif
+
+ assert_se(seccomp_protect_sysctl() >= 0);
+
+#if HAVE_VALGRIND_VALGRIND_H
+ if (RUNNING_ON_VALGRIND) {
+ log_info("Running on valgrind, skipping syscall/EPERM test");
+ _exit(EXIT_SUCCESS);
+ }
+#endif
+
+#if defined __NR__sysctl && __NR__sysctl >= 0
+ assert_se(syscall(__NR__sysctl, 0, 0, 0) < 0);
+ assert_se(errno == EPERM);
+#endif
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("sysctlseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+static void test_protect_syslog(void) {
+ pid_t pid;
+
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+
+ /* in containers syslog() is likely missing anyway */
+ if (detect_container() > 0) {
+ log_notice("Testing in container, skipping %s", __func__);
+ return;
+ }
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+#if defined __NR_syslog && __NR_syslog >= 0
+ assert_se(syscall(__NR_syslog, -1, NULL, 0) < 0);
+ assert_se(errno == EINVAL);
+#endif
+
+ assert_se(seccomp_protect_syslog() >= 0);
+
+#if defined __NR_syslog && __NR_syslog >= 0
+ assert_se(syscall(__NR_syslog, 0, 0, 0) < 0);
+ assert_se(errno == EPERM);
+#endif
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("syslogseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+static void test_restrict_address_families(void) {
+ pid_t pid;
+
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ int fd;
+ Set *s;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ assert_se(fd >= 0);
+ safe_close(fd);
+
+ fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+ assert_se(fd >= 0);
+ safe_close(fd);
+
+ fd = socket(AF_NETLINK, SOCK_DGRAM, 0);
+ assert_se(fd >= 0);
+ safe_close(fd);
+
+ assert_se(s = set_new(NULL));
+ assert_se(set_put(s, INT_TO_PTR(AF_UNIX)) >= 0);
+
+ assert_se(seccomp_restrict_address_families(s, false) >= 0);
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ assert_se(fd >= 0);
+ safe_close(fd);
+
+ fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+#if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
+ assert_se(fd >= 0);
+ safe_close(fd);
+#else
+ assert_se(fd < 0);
+ assert_se(errno == EAFNOSUPPORT);
+#endif
+
+ fd = socket(AF_NETLINK, SOCK_DGRAM, 0);
+ assert_se(fd >= 0);
+ safe_close(fd);
+
+ set_clear(s);
+
+ assert_se(set_put(s, INT_TO_PTR(AF_INET)) >= 0);
+
+ assert_se(seccomp_restrict_address_families(s, true) >= 0);
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ assert_se(fd >= 0);
+ safe_close(fd);
+
+ fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+#if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
+ assert_se(fd >= 0);
+ safe_close(fd);
+#else
+ assert_se(fd < 0);
+ assert_se(errno == EAFNOSUPPORT);
+#endif
+
+ fd = socket(AF_NETLINK, SOCK_DGRAM, 0);
+#if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
+ assert_se(fd >= 0);
+ safe_close(fd);
+#else
+ assert_se(fd < 0);
+ assert_se(errno == EAFNOSUPPORT);
+#endif
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("socketseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+static void test_restrict_realtime(void) {
+ pid_t pid;
+
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+
+ /* in containers RT privs are likely missing anyway */
+ if (detect_container() > 0) {
+ log_notice("Testing in container, skipping %s", __func__);
+ return;
+ }
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ assert_se(sched_setscheduler(0, SCHED_FIFO, &(struct sched_param) { .sched_priority = 1 }) >= 0);
+ assert_se(sched_setscheduler(0, SCHED_RR, &(struct sched_param) { .sched_priority = 1 }) >= 0);
+ assert_se(sched_setscheduler(0, SCHED_IDLE, &(struct sched_param) { .sched_priority = 0 }) >= 0);
+ assert_se(sched_setscheduler(0, SCHED_BATCH, &(struct sched_param) { .sched_priority = 0 }) >= 0);
+ assert_se(sched_setscheduler(0, SCHED_OTHER, &(struct sched_param) {}) >= 0);
+
+ assert_se(seccomp_restrict_realtime() >= 0);
+
+ assert_se(sched_setscheduler(0, SCHED_IDLE, &(struct sched_param) { .sched_priority = 0 }) >= 0);
+ assert_se(sched_setscheduler(0, SCHED_BATCH, &(struct sched_param) { .sched_priority = 0 }) >= 0);
+ assert_se(sched_setscheduler(0, SCHED_OTHER, &(struct sched_param) {}) >= 0);
+
+ assert_se(sched_setscheduler(0, SCHED_FIFO, &(struct sched_param) { .sched_priority = 1 }) < 0);
+ assert_se(errno == EPERM);
+ assert_se(sched_setscheduler(0, SCHED_RR, &(struct sched_param) { .sched_priority = 1 }) < 0);
+ assert_se(errno == EPERM);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("realtimeseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+static void test_memory_deny_write_execute_mmap(void) {
+ pid_t pid;
+
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+#if HAVE_VALGRIND_VALGRIND_H
+ if (RUNNING_ON_VALGRIND) {
+ log_notice("Running on valgrind, skipping %s", __func__);
+ return;
+ }
+#endif
+#if HAS_FEATURE_ADDRESS_SANITIZER
+ log_notice("Running on address sanitizer, skipping %s", __func__);
+ return;
+#endif
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ void *p;
+
+ p = mmap(NULL, page_size(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
+ assert_se(p != MAP_FAILED);
+ assert_se(munmap(p, page_size()) >= 0);
+
+ p = mmap(NULL, page_size(), PROT_WRITE|PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
+ assert_se(p != MAP_FAILED);
+ assert_se(munmap(p, page_size()) >= 0);
+
+ assert_se(seccomp_memory_deny_write_execute() >= 0);
+
+ p = mmap(NULL, page_size(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
+#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc64__) || defined(__arm__) || defined(__aarch64__)
+ assert_se(p == MAP_FAILED);
+ assert_se(errno == EPERM);
+#endif
+ /* Depending on kernel, libseccomp, and glibc versions, other architectures
+ * might fail or not. Let's not assert success. */
+ if (p != MAP_FAILED)
+ assert_se(munmap(p, page_size()) == 0);
+
+ p = mmap(NULL, page_size(), PROT_WRITE|PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
+ assert_se(p != MAP_FAILED);
+ assert_se(munmap(p, page_size()) >= 0);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("memoryseccomp-mmap", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+static void test_memory_deny_write_execute_shmat(void) {
+ int shmid;
+ pid_t pid;
+ uint32_t arch;
+
+ log_info("/* %s */", __func__);
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ log_debug("arch %s: SCMP_SYS(mmap) = %d", seccomp_arch_to_string(arch), SCMP_SYS(mmap));
+ log_debug("arch %s: SCMP_SYS(mmap2) = %d", seccomp_arch_to_string(arch), SCMP_SYS(mmap2));
+ log_debug("arch %s: SCMP_SYS(shmget) = %d", seccomp_arch_to_string(arch), SCMP_SYS(shmget));
+ log_debug("arch %s: SCMP_SYS(shmat) = %d", seccomp_arch_to_string(arch), SCMP_SYS(shmat));
+ log_debug("arch %s: SCMP_SYS(shmdt) = %d", seccomp_arch_to_string(arch), SCMP_SYS(shmdt));
+ }
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+#if HAVE_VALGRIND_VALGRIND_H
+ if (RUNNING_ON_VALGRIND) {
+ log_notice("Running on valgrind, skipping %s", __func__);
+ return;
+ }
+#endif
+#if HAS_FEATURE_ADDRESS_SANITIZER
+ log_notice("Running on address sanitizer, skipping %s", __func__);
+ return;
+#endif
+
+ shmid = shmget(IPC_PRIVATE, page_size(), 0);
+ assert_se(shmid >= 0);
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ void *p;
+
+ p = shmat(shmid, NULL, 0);
+ assert_se(p != MAP_FAILED);
+ assert_se(shmdt(p) == 0);
+
+ p = shmat(shmid, NULL, SHM_EXEC);
+ assert_se(p != MAP_FAILED);
+ assert_se(shmdt(p) == 0);
+
+ assert_se(seccomp_memory_deny_write_execute() >= 0);
+
+ p = shmat(shmid, NULL, SHM_EXEC);
+ log_debug_errno(p == MAP_FAILED ? errno : 0, "shmat(SHM_EXEC): %m");
+#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
+ assert_se(p == MAP_FAILED);
+ assert_se(errno == EPERM);
+#endif
+ /* Depending on kernel, libseccomp, and glibc versions, other architectures
+ * might fail or not. Let's not assert success. */
+ if (p != MAP_FAILED)
+ assert_se(shmdt(p) == 0);
+
+ p = shmat(shmid, NULL, 0);
+ log_debug_errno(p == MAP_FAILED ? errno : 0, "shmat(0): %m");
+ assert_se(p != MAP_FAILED);
+ assert_se(shmdt(p) == 0);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("memoryseccomp-shmat", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+static void test_restrict_archs(void) {
+ pid_t pid;
+
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ _cleanup_set_free_ Set *s = NULL;
+
+ assert_se(access("/", F_OK) >= 0);
+
+ assert_se(s = set_new(NULL));
+
+#ifdef __x86_64__
+ assert_se(set_put(s, UINT32_TO_PTR(SCMP_ARCH_X86+1)) >= 0);
+#endif
+ assert_se(seccomp_restrict_archs(s) >= 0);
+
+ assert_se(access("/", F_OK) >= 0);
+ assert_se(seccomp_restrict_archs(NULL) >= 0);
+
+ assert_se(access("/", F_OK) >= 0);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("archseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+static void test_load_syscall_filter_set_raw(void) {
+ pid_t pid;
+
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ _cleanup_hashmap_free_ Hashmap *s = NULL;
+
+ assert_se(access("/", F_OK) >= 0);
+ assert_se(poll(NULL, 0, 0) == 0);
+
+ assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, NULL, scmp_act_kill_process(), true) >= 0);
+ assert_se(access("/", F_OK) >= 0);
+ assert_se(poll(NULL, 0, 0) == 0);
+
+ assert_se(s = hashmap_new(NULL));
+#if defined __NR_access && __NR_access >= 0
+ assert_se(hashmap_put(s, UINT32_TO_PTR(__NR_access + 1), INT_TO_PTR(-1)) >= 0);
+#else
+ assert_se(hashmap_put(s, UINT32_TO_PTR(__NR_faccessat + 1), INT_TO_PTR(-1)) >= 0);
+#endif
+
+ assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, s, SCMP_ACT_ERRNO(EUCLEAN), true) >= 0);
+
+ assert_se(access("/", F_OK) < 0);
+ assert_se(errno == EUCLEAN);
+
+ assert_se(poll(NULL, 0, 0) == 0);
+
+ s = hashmap_free(s);
+
+ assert_se(s = hashmap_new(NULL));
+#if defined __NR_access && __NR_access >= 0
+ assert_se(hashmap_put(s, UINT32_TO_PTR(__NR_access + 1), INT_TO_PTR(EILSEQ)) >= 0);
+#else
+ assert_se(hashmap_put(s, UINT32_TO_PTR(__NR_faccessat + 1), INT_TO_PTR(EILSEQ)) >= 0);
+#endif
+
+ assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, s, SCMP_ACT_ERRNO(EUCLEAN), true) >= 0);
+
+ assert_se(access("/", F_OK) < 0);
+ assert_se(errno == EILSEQ);
+
+ assert_se(poll(NULL, 0, 0) == 0);
+
+ s = hashmap_free(s);
+
+ assert_se(s = hashmap_new(NULL));
+#if defined __NR_poll && __NR_poll >= 0
+ assert_se(hashmap_put(s, UINT32_TO_PTR(__NR_poll + 1), INT_TO_PTR(-1)) >= 0);
+#else
+ assert_se(hashmap_put(s, UINT32_TO_PTR(__NR_ppoll + 1), INT_TO_PTR(-1)) >= 0);
+#endif
+
+ assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, s, SCMP_ACT_ERRNO(EUNATCH), true) >= 0);
+
+ assert_se(access("/", F_OK) < 0);
+ assert_se(errno == EILSEQ);
+
+ assert_se(poll(NULL, 0, 0) < 0);
+ assert_se(errno == EUNATCH);
+
+ s = hashmap_free(s);
+
+ assert_se(s = hashmap_new(NULL));
+#if defined __NR_poll && __NR_poll >= 0
+ assert_se(hashmap_put(s, UINT32_TO_PTR(__NR_poll + 1), INT_TO_PTR(EILSEQ)) >= 0);
+#else
+ assert_se(hashmap_put(s, UINT32_TO_PTR(__NR_ppoll + 1), INT_TO_PTR(EILSEQ)) >= 0);
+#endif
+
+ assert_se(seccomp_load_syscall_filter_set_raw(SCMP_ACT_ALLOW, s, SCMP_ACT_ERRNO(EUNATCH), true) >= 0);
+
+ assert_se(access("/", F_OK) < 0);
+ assert_se(errno == EILSEQ);
+
+ assert_se(poll(NULL, 0, 0) < 0);
+ assert_se(errno == EILSEQ);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("syscallrawseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+static void test_lock_personality(void) {
+ unsigned long current;
+ pid_t pid;
+
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+
+ assert_se(opinionated_personality(&current) >= 0);
+
+ log_info("current personality=%lu", current);
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ assert_se(seccomp_lock_personality(current) >= 0);
+
+ assert_se((unsigned long) safe_personality(current) == current);
+
+ /* Note, we also test that safe_personality() works correctly, by checkig whether errno is properly
+ * set, in addition to the return value */
+ errno = 0;
+ assert_se(safe_personality(PER_LINUX | ADDR_NO_RANDOMIZE) == -EPERM);
+ assert_se(errno == EPERM);
+
+ assert_se(safe_personality(PER_LINUX | MMAP_PAGE_ZERO) == -EPERM);
+ assert_se(safe_personality(PER_LINUX | ADDR_COMPAT_LAYOUT) == -EPERM);
+ assert_se(safe_personality(PER_LINUX | READ_IMPLIES_EXEC) == -EPERM);
+ assert_se(safe_personality(PER_LINUX_32BIT) == -EPERM);
+ assert_se(safe_personality(PER_SVR4) == -EPERM);
+ assert_se(safe_personality(PER_BSD) == -EPERM);
+ assert_se(safe_personality(current == PER_LINUX ? PER_LINUX32 : PER_LINUX) == -EPERM);
+ assert_se(safe_personality(PER_LINUX32_3GB) == -EPERM);
+ assert_se(safe_personality(PER_UW7) == -EPERM);
+ assert_se(safe_personality(0x42) == -EPERM);
+
+ assert_se(safe_personality(PERSONALITY_INVALID) == -EPERM); /* maybe remove this later */
+
+ assert_se((unsigned long) personality(current) == current);
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("lockpersonalityseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+static int real_open(const char *path, int flags, mode_t mode) {
+ /* glibc internally calls openat() when open() is requested. Let's hence define our own wrapper for
+ * testing purposes that calls the real syscall, on architectures where SYS_open is defined. On
+ * other architectures, let's just fall back to the glibc call. */
+
+#if defined __NR_open && __NR_open >= 0
+ return (int) syscall(__NR_open, path, flags, mode);
+#else
+ return open(path, flags, mode);
+#endif
+}
+
+static void test_restrict_suid_sgid(void) {
+ pid_t pid;
+
+ log_info("/* %s */", __func__);
+
+ if (!is_seccomp_available()) {
+ log_notice("Seccomp not available, skipping %s", __func__);
+ return;
+ }
+ if (geteuid() != 0) {
+ log_notice("Not root, skipping %s", __func__);
+ return;
+ }
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ char path[] = "/tmp/suidsgidXXXXXX", dir[] = "/tmp/suidsgiddirXXXXXX";
+ int fd = -1, k = -1;
+ const char *z;
+
+ fd = mkostemp_safe(path);
+ assert_se(fd >= 0);
+
+ assert_se(mkdtemp(dir));
+ z = strjoina(dir, "/test");
+
+ assert_se(chmod(path, 0755 | S_ISUID) >= 0);
+ assert_se(chmod(path, 0755 | S_ISGID) >= 0);
+ assert_se(chmod(path, 0755 | S_ISGID | S_ISUID) >= 0);
+ assert_se(chmod(path, 0755) >= 0);
+
+ assert_se(fchmod(fd, 0755 | S_ISUID) >= 0);
+ assert_se(fchmod(fd, 0755 | S_ISGID) >= 0);
+ assert_se(fchmod(fd, 0755 | S_ISGID | S_ISUID) >= 0);
+ assert_se(fchmod(fd, 0755) >= 0);
+
+ assert_se(fchmodat(AT_FDCWD, path, 0755 | S_ISUID, 0) >= 0);
+ assert_se(fchmodat(AT_FDCWD, path, 0755 | S_ISGID, 0) >= 0);
+ assert_se(fchmodat(AT_FDCWD, path, 0755 | S_ISGID | S_ISUID, 0) >= 0);
+ assert_se(fchmodat(AT_FDCWD, path, 0755, 0) >= 0);
+
+ k = real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISGID);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID | S_ISGID);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = creat(z, 0644 | S_ISUID);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = creat(z, 0644 | S_ISGID);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = creat(z, 0644 | S_ISUID | S_ISGID);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = creat(z, 0644);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISGID);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID | S_ISGID);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ k = openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ assert_se(mkdir(z, 0755 | S_ISUID) >= 0);
+ assert_se(rmdir(z) >= 0);
+ assert_se(mkdir(z, 0755 | S_ISGID) >= 0);
+ assert_se(rmdir(z) >= 0);
+ assert_se(mkdir(z, 0755 | S_ISUID | S_ISGID) >= 0);
+ assert_se(rmdir(z) >= 0);
+ assert_se(mkdir(z, 0755) >= 0);
+ assert_se(rmdir(z) >= 0);
+
+ assert_se(mkdirat(AT_FDCWD, z, 0755 | S_ISUID) >= 0);
+ assert_se(rmdir(z) >= 0);
+ assert_se(mkdirat(AT_FDCWD, z, 0755 | S_ISGID) >= 0);
+ assert_se(rmdir(z) >= 0);
+ assert_se(mkdirat(AT_FDCWD, z, 0755 | S_ISUID | S_ISGID) >= 0);
+ assert_se(rmdir(z) >= 0);
+ assert_se(mkdirat(AT_FDCWD, z, 0755) >= 0);
+ assert_se(rmdir(z) >= 0);
+
+ assert_se(mknod(z, S_IFREG | 0755 | S_ISUID, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+ assert_se(mknod(z, S_IFREG | 0755 | S_ISGID, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+ assert_se(mknod(z, S_IFREG | 0755 | S_ISUID | S_ISGID, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+ assert_se(mknod(z, S_IFREG | 0755, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+
+ assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755 | S_ISUID, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+ assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755 | S_ISGID, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+ assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755 | S_ISUID | S_ISGID, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+ assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+
+ assert_se(seccomp_restrict_suid_sgid() >= 0);
+
+ assert_se(chmod(path, 0775 | S_ISUID) < 0 && errno == EPERM);
+ assert_se(chmod(path, 0775 | S_ISGID) < 0 && errno == EPERM);
+ assert_se(chmod(path, 0775 | S_ISGID | S_ISUID) < 0 && errno == EPERM);
+ assert_se(chmod(path, 0775) >= 0);
+
+ assert_se(fchmod(fd, 0775 | S_ISUID) < 0 && errno == EPERM);
+ assert_se(fchmod(fd, 0775 | S_ISGID) < 0 && errno == EPERM);
+ assert_se(fchmod(fd, 0775 | S_ISGID | S_ISUID) < 0 && errno == EPERM);
+ assert_se(fchmod(fd, 0775) >= 0);
+
+ assert_se(fchmodat(AT_FDCWD, path, 0755 | S_ISUID, 0) < 0 && errno == EPERM);
+ assert_se(fchmodat(AT_FDCWD, path, 0755 | S_ISGID, 0) < 0 && errno == EPERM);
+ assert_se(fchmodat(AT_FDCWD, path, 0755 | S_ISGID | S_ISUID, 0) < 0 && errno == EPERM);
+ assert_se(fchmodat(AT_FDCWD, path, 0755, 0) >= 0);
+
+ assert_se(real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID) < 0 && errno == EPERM);
+ assert_se(real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISGID) < 0 && errno == EPERM);
+ assert_se(real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID | S_ISGID) < 0 && errno == EPERM);
+ k = real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ assert_se(creat(z, 0644 | S_ISUID) < 0 && errno == EPERM);
+ assert_se(creat(z, 0644 | S_ISGID) < 0 && errno == EPERM);
+ assert_se(creat(z, 0644 | S_ISUID | S_ISGID) < 0 && errno == EPERM);
+ k = creat(z, 0644);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ assert_se(openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID) < 0 && errno == EPERM);
+ assert_se(openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISGID) < 0 && errno == EPERM);
+ assert_se(openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID | S_ISGID) < 0 && errno == EPERM);
+ k = openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644);
+ k = safe_close(k);
+ assert_se(unlink(z) >= 0);
+
+ assert_se(mkdir(z, 0755 | S_ISUID) < 0 && errno == EPERM);
+ assert_se(mkdir(z, 0755 | S_ISGID) < 0 && errno == EPERM);
+ assert_se(mkdir(z, 0755 | S_ISUID | S_ISGID) < 0 && errno == EPERM);
+ assert_se(mkdir(z, 0755) >= 0);
+ assert_se(rmdir(z) >= 0);
+
+ assert_se(mkdirat(AT_FDCWD, z, 0755 | S_ISUID) < 0 && errno == EPERM);
+ assert_se(mkdirat(AT_FDCWD, z, 0755 | S_ISGID) < 0 && errno == EPERM);
+ assert_se(mkdirat(AT_FDCWD, z, 0755 | S_ISUID | S_ISGID) < 0 && errno == EPERM);
+ assert_se(mkdirat(AT_FDCWD, z, 0755) >= 0);
+ assert_se(rmdir(z) >= 0);
+
+ assert_se(mknod(z, S_IFREG | 0755 | S_ISUID, 0) < 0 && errno == EPERM);
+ assert_se(mknod(z, S_IFREG | 0755 | S_ISGID, 0) < 0 && errno == EPERM);
+ assert_se(mknod(z, S_IFREG | 0755 | S_ISUID | S_ISGID, 0) < 0 && errno == EPERM);
+ assert_se(mknod(z, S_IFREG | 0755, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+
+ assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755 | S_ISUID, 0) < 0 && errno == EPERM);
+ assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755 | S_ISGID, 0) < 0 && errno == EPERM);
+ assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755 | S_ISUID | S_ISGID, 0) < 0 && errno == EPERM);
+ assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755, 0) >= 0);
+ assert_se(unlink(z) >= 0);
+
+ assert_se(unlink(path) >= 0);
+ assert_se(rm_rf(dir, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_check("suidsgidseccomp", pid, WAIT_LOG) == EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_seccomp_arch_to_string();
+ test_architecture_table();
+ test_syscall_filter_set_find();
+ test_filter_sets();
+ test_filter_sets_ordered();
+ test_restrict_namespace();
+ test_protect_sysctl();
+ test_protect_syslog();
+ test_restrict_address_families();
+ test_restrict_realtime();
+ test_memory_deny_write_execute_mmap();
+ test_memory_deny_write_execute_shmat();
+ test_restrict_archs();
+ test_load_syscall_filter_set_raw();
+ test_lock_personality();
+ test_restrict_suid_sgid();
+
+ return 0;
+}
diff --git a/src/test/test-selinux.c b/src/test/test-selinux.c
new file mode 100644
index 0000000..3eb7ad3
--- /dev/null
+++ b/src/test/test-selinux.c
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/stat.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "selinux-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "time-util.h"
+#include "util.h"
+
+static void test_testing(void) {
+ bool b;
+
+ log_info("============ %s ==========", __func__);
+
+ b = mac_selinux_use();
+ log_info("mac_selinux_use → %s", yes_no(b));
+
+ b = mac_selinux_use();
+ log_info("mac_selinux_use → %s", yes_no(b));
+
+ mac_selinux_retest();
+
+ b = mac_selinux_use();
+ log_info("mac_selinux_use → %s", yes_no(b));
+
+ b = mac_selinux_use();
+ log_info("mac_selinux_use → %s", yes_no(b));
+}
+
+static void test_loading(void) {
+ usec_t n1, n2;
+ int r;
+
+ log_info("============ %s ==========", __func__);
+
+ n1 = now(CLOCK_MONOTONIC);
+ r = mac_selinux_init();
+ n2 = now(CLOCK_MONOTONIC);
+ log_info_errno(r, "mac_selinux_init → %d %.2fs (%m)", r, (n2 - n1)/1e6);
+}
+
+static void test_cleanup(void) {
+ usec_t n1, n2;
+
+ log_info("============ %s ==========", __func__);
+
+ n1 = now(CLOCK_MONOTONIC);
+ mac_selinux_finish();
+ n2 = now(CLOCK_MONOTONIC);
+ log_info("mac_selinux_finish → %.2fs", (n2 - n1)/1e6);
+}
+
+static void test_misc(const char* fname) {
+ _cleanup_(mac_selinux_freep) char *label = NULL, *label2 = NULL, *label3 = NULL;
+ int r;
+ _cleanup_close_ int fd = -1;
+
+ log_info("============ %s ==========", __func__);
+
+ r = mac_selinux_get_our_label(&label);
+ log_info_errno(r, "mac_selinux_get_our_label → %d, \"%s\" (%m)",
+ r, strnull(label));
+
+ r = mac_selinux_get_create_label_from_exe(fname, &label2);
+ log_info_errno(r, "mac_selinux_create_label_from_exe → %d, \"%s\" (%m)",
+ r, strnull(label2));
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ assert_se(fd >= 0);
+
+ r = mac_selinux_get_child_mls_label(fd, fname, label2, &label3);
+ log_info_errno(r, "mac_selinux_get_child_mls_label → %d, \"%s\" (%m)",
+ r, strnull(label3));
+}
+
+static void test_create_file_prepare(const char* fname) {
+ int r;
+
+ log_info("============ %s ==========", __func__);
+
+ r = mac_selinux_create_file_prepare(fname, S_IRWXU);
+ log_info_errno(r, "mac_selinux_create_file_prepare → %d (%m)", r);
+
+ mac_selinux_create_file_clear();
+}
+
+int main(int argc, char **argv) {
+ const char *path = SYSTEMD_BINARY_PATH;
+ if (argc >= 2)
+ path = argv[1];
+
+ test_setup_logging(LOG_DEBUG);
+
+ test_testing();
+ test_loading();
+ test_misc(path);
+ test_create_file_prepare(path);
+ test_cleanup();
+
+ return 0;
+}
diff --git a/src/test/test-serialize.c b/src/test/test-serialize.c
new file mode 100644
index 0000000..7bd53a8
--- /dev/null
+++ b/src/test/test-serialize.c
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "log.h"
+#include "serialize.h"
+#include "strv.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+
+char long_string[LONG_LINE_MAX+1];
+
+static void test_serialize_item(void) {
+ _cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-serialize.XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+
+ assert_se(fmkostemp_safe(fn, "r+", &f) == 0);
+ log_info("/* %s (%s) */", __func__, fn);
+
+ assert_se(serialize_item(f, "a", NULL) == 0);
+ assert_se(serialize_item(f, "a", "bbb") == 1);
+ assert_se(serialize_item(f, "a", "bbb") == 1);
+ assert_se(serialize_item(f, "a", long_string) == -EINVAL);
+ assert_se(serialize_item(f, long_string, "a") == -EINVAL);
+ assert_se(serialize_item(f, long_string, long_string) == -EINVAL);
+
+ rewind(f);
+
+ _cleanup_free_ char *line1 = NULL, *line2 = NULL, *line3 = NULL;
+ assert_se(read_line(f, LONG_LINE_MAX, &line1) > 0);
+ assert_se(streq(line1, "a=bbb"));
+ assert_se(read_line(f, LONG_LINE_MAX, &line2) > 0);
+ assert_se(streq(line2, "a=bbb"));
+ assert_se(read_line(f, LONG_LINE_MAX, &line3) == 0);
+ assert_se(streq(line3, ""));
+}
+
+static void test_serialize_item_escaped(void) {
+ _cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-serialize.XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+
+ assert_se(fmkostemp_safe(fn, "r+", &f) == 0);
+ log_info("/* %s (%s) */", __func__, fn);
+
+ assert_se(serialize_item_escaped(f, "a", NULL) == 0);
+ assert_se(serialize_item_escaped(f, "a", "bbb") == 1);
+ assert_se(serialize_item_escaped(f, "a", "bbb") == 1);
+ assert_se(serialize_item_escaped(f, "a", long_string) == -EINVAL);
+ assert_se(serialize_item_escaped(f, long_string, "a") == -EINVAL);
+ assert_se(serialize_item_escaped(f, long_string, long_string) == -EINVAL);
+
+ rewind(f);
+
+ _cleanup_free_ char *line1 = NULL, *line2 = NULL, *line3 = NULL;
+ assert_se(read_line(f, LONG_LINE_MAX, &line1) > 0);
+ assert_se(streq(line1, "a=bbb"));
+ assert_se(read_line(f, LONG_LINE_MAX, &line2) > 0);
+ assert_se(streq(line2, "a=bbb"));
+ assert_se(read_line(f, LONG_LINE_MAX, &line3) == 0);
+ assert_se(streq(line3, ""));
+}
+
+static void test_serialize_usec(void) {
+ _cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-serialize.XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+
+ assert_se(fmkostemp_safe(fn, "r+", &f) == 0);
+ log_info("/* %s (%s) */", __func__, fn);
+
+ assert_se(serialize_usec(f, "usec1", USEC_INFINITY) == 0);
+ assert_se(serialize_usec(f, "usec2", 0) == 1);
+ assert_se(serialize_usec(f, "usec3", USEC_INFINITY-1) == 1);
+
+ rewind(f);
+
+ _cleanup_free_ char *line1 = NULL, *line2 = NULL;
+ usec_t x;
+
+ assert_se(read_line(f, LONG_LINE_MAX, &line1) > 0);
+ assert_se(streq(line1, "usec2=0"));
+ assert_se(deserialize_usec(line1 + 6, &x) == 0);
+ assert_se(x == 0);
+
+ assert_se(read_line(f, LONG_LINE_MAX, &line2) > 0);
+ assert_se(startswith(line2, "usec3="));
+ assert_se(deserialize_usec(line2 + 6, &x) == 0);
+ assert_se(x == USEC_INFINITY-1);
+}
+
+static void test_serialize_strv(void) {
+ _cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-serialize.XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+
+ char **strv = STRV_MAKE("a", "b", "foo foo",
+ "nasty1 \"",
+ "\"nasty2 ",
+ "nasty3 '",
+ "\"nasty4 \"",
+ "nasty5\n",
+ "\nnasty5\nfoo=bar",
+ "\nnasty5\nfoo=bar");
+
+ assert_se(fmkostemp_safe(fn, "r+", &f) == 0);
+ log_info("/* %s (%s) */", __func__, fn);
+
+ assert_se(serialize_strv(f, "strv1", NULL) == 0);
+ assert_se(serialize_strv(f, "strv2", STRV_MAKE_EMPTY) == 0);
+ assert_se(serialize_strv(f, "strv3", strv) == 1);
+ assert_se(serialize_strv(f, "strv4", STRV_MAKE(long_string)) == -EINVAL);
+
+ rewind(f);
+
+ _cleanup_strv_free_ char **strv2 = NULL;
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ int r;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r == 0)
+ break;
+ assert_se(r > 0);
+
+ const char *t = startswith(line, "strv3=");
+ assert_se(t);
+
+ char *un;
+ assert_se(cunescape(t, 0, &un) >= 0);
+ assert_se(strv_consume(&strv2, un) >= 0);
+ }
+
+ assert_se(strv_equal(strv, strv2));
+}
+
+static void test_deserialize_environment(void) {
+ _cleanup_strv_free_ char **env;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(env = strv_new("A=1"));
+
+ assert_se(deserialize_environment("B=2", &env) >= 0);
+ assert_se(deserialize_environment("FOO%%=a\\177b\\nc\\td e", &env) >= 0);
+
+ assert_se(strv_equal(env, STRV_MAKE("A=1", "B=2", "FOO%%=a\177b\nc\td e")));
+
+ assert_se(deserialize_environment("foo\\", &env) < 0);
+ assert_se(deserialize_environment("bar\\_baz", &env) < 0);
+}
+
+static void test_serialize_environment(void) {
+ _cleanup_strv_free_ char **env = NULL, **env2 = NULL;
+ _cleanup_(unlink_tempfilep) char fn[] = "/tmp/test-env-util.XXXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert_se(fmkostemp_safe(fn, "r+", &f) == 0);
+ log_info("/* %s (%s) */", __func__, fn);
+
+ assert_se(env = strv_new("A=1",
+ "B=2",
+ "C=ąęółń",
+ "D=D=a\\x0Ab",
+ "FOO%%=a\177b\nc\td e"));
+
+ assert_se(serialize_strv(f, "env", env) == 1);
+ assert_se(fflush_and_check(f) == 0);
+
+ rewind(f);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ assert_se(r >= 0);
+
+ if (r == 0)
+ break;
+
+ l = strstrip(line);
+
+ assert_se(startswith(l, "env="));
+
+ r = deserialize_environment(l+4, &env2);
+ assert_se(r >= 0);
+ }
+ assert_se(feof(f));
+
+ assert_se(strv_equal(env, env2));
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_INFO);
+
+ memset(long_string, 'x', sizeof(long_string)-1);
+ char_array_0(long_string);
+
+ test_serialize_item();
+ test_serialize_item_escaped();
+ test_serialize_usec();
+ test_serialize_strv();
+ test_deserialize_environment();
+ test_serialize_environment();
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-set-disable-mempool.c b/src/test/test-set-disable-mempool.c
new file mode 100644
index 0000000..ae36fac
--- /dev/null
+++ b/src/test/test-set-disable-mempool.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <pthread.h>
+
+#include "process-util.h"
+#include "set.h"
+#include "tests.h"
+
+#define NUM 100
+
+static void* thread(void *p) {
+ Set **s = p;
+
+ assert_se(s);
+ assert_se(*s);
+
+ assert_se(!is_main_thread());
+ assert_se(set_size(*s) == NUM);
+ *s = set_free(*s);
+
+ return NULL;
+}
+
+static void test_one(const char *val) {
+ pthread_t t;
+ int x[NUM] = {};
+ unsigned i;
+ Set *s;
+
+ log_info("Testing with SYSTEMD_MEMPOOL=%s", val);
+ assert_se(setenv("SYSTEMD_MEMPOOL", val, true) == 0);
+ assert_se(is_main_thread());
+
+ assert_se(s = set_new(NULL));
+ for (i = 0; i < NUM; i++)
+ assert_se(set_put(s, &x[i]));
+
+ assert_se(pthread_create(&t, NULL, thread, &s) == 0);
+ assert_se(pthread_join(t, NULL) == 0);
+
+ assert_se(!s);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_one("0");
+ /* The value $SYSTEMD_MEMPOOL= is cached. So the following
+ * test should also succeed. */
+ test_one("1");
+
+ return 0;
+}
diff --git a/src/test/test-set.c b/src/test/test-set.c
new file mode 100644
index 0000000..b4d07b2
--- /dev/null
+++ b/src/test/test-set.c
@@ -0,0 +1,243 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "set.h"
+#include "strv.h"
+
+const bool mempool_use_allowed = VALGRIND;
+
+static void test_set_steal_first(void) {
+ _cleanup_set_free_ Set *m = NULL;
+ int seen[3] = {};
+ char *val;
+
+ m = set_new(&string_hash_ops);
+ assert_se(m);
+
+ assert_se(set_put(m, (void*) "1") == 1);
+ assert_se(set_put(m, (void*) "22") == 1);
+ assert_se(set_put(m, (void*) "333") == 1);
+
+ while ((val = set_steal_first(m)))
+ seen[strlen(val) - 1]++;
+
+ assert_se(seen[0] == 1 && seen[1] == 1 && seen[2] == 1);
+
+ assert_se(set_isempty(m));
+}
+
+typedef struct Item {
+ int seen;
+} Item;
+static void item_seen(Item *item) {
+ item->seen++;
+}
+
+static void test_set_free_with_destructor(void) {
+ Set *m;
+ struct Item items[4] = {};
+ unsigned i;
+
+ assert_se(m = set_new(NULL));
+ for (i = 0; i < ELEMENTSOF(items) - 1; i++)
+ assert_se(set_put(m, items + i) == 1);
+
+ m = set_free_with_destructor(m, item_seen);
+ assert_se(items[0].seen == 1);
+ assert_se(items[1].seen == 1);
+ assert_se(items[2].seen == 1);
+ assert_se(items[3].seen == 0);
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(item_hash_ops, void, trivial_hash_func, trivial_compare_func, Item, item_seen);
+
+static void test_set_free_with_hash_ops(void) {
+ Set *m;
+ struct Item items[4] = {};
+ unsigned i;
+
+ assert_se(m = set_new(&item_hash_ops));
+ for (i = 0; i < ELEMENTSOF(items) - 1; i++)
+ assert_se(set_put(m, items + i) == 1);
+
+ m = set_free(m);
+ assert_se(items[0].seen == 1);
+ assert_se(items[1].seen == 1);
+ assert_se(items[2].seen == 1);
+ assert_se(items[3].seen == 0);
+}
+
+static void test_set_put(void) {
+ _cleanup_set_free_ Set *m = NULL;
+
+ m = set_new(&string_hash_ops);
+ assert_se(m);
+
+ assert_se(set_put(m, (void*) "1") == 1);
+ assert_se(set_put(m, (void*) "22") == 1);
+ assert_se(set_put(m, (void*) "333") == 1);
+ assert_se(set_put(m, (void*) "333") == 0);
+ assert_se(set_remove(m, (void*) "333"));
+ assert_se(set_put(m, (void*) "333") == 1);
+ assert_se(set_put(m, (void*) "333") == 0);
+ assert_se(set_put(m, (void*) "22") == 0);
+
+ _cleanup_free_ char **t = set_get_strv(m);
+ assert_se(strv_contains(t, "1"));
+ assert_se(strv_contains(t, "22"));
+ assert_se(strv_contains(t, "333"));
+ assert_se(strv_length(t) == 3);
+}
+
+static void test_set_put_strdup(void) {
+ _cleanup_set_free_ Set *m = NULL;
+
+ assert_se(set_put_strdup(&m, "aaa") == 1);
+ assert_se(set_put_strdup(&m, "aaa") == 0);
+ assert_se(set_put_strdup(&m, "bbb") == 1);
+ assert_se(set_put_strdup(&m, "bbb") == 0);
+ assert_se(set_put_strdup(&m, "aaa") == 0);
+ assert_se(set_size(m) == 2);
+}
+
+static void test_set_put_strdupv(void) {
+ _cleanup_set_free_ Set *m = NULL;
+
+ assert_se(set_put_strdupv(&m, STRV_MAKE("aaa", "aaa", "bbb", "bbb", "aaa")) == 2);
+ assert_se(set_put_strdupv(&m, STRV_MAKE("aaa", "aaa", "bbb", "bbb", "ccc")) == 1);
+ assert_se(set_size(m) == 3);
+}
+
+static void test_set_ensure_allocated(void) {
+ _cleanup_set_free_ Set *m = NULL;
+
+ assert_se(set_ensure_allocated(&m, &string_hash_ops) == 1);
+ assert_se(set_ensure_allocated(&m, &string_hash_ops) == 0);
+ assert_se(set_ensure_allocated(&m, NULL) == 0);
+ assert_se(set_size(m) == 0);
+}
+
+static void test_set_ensure_put(void) {
+ _cleanup_set_free_ Set *m = NULL;
+
+ assert_se(set_ensure_put(&m, &string_hash_ops, "a") == 1);
+ assert_se(set_ensure_put(&m, &string_hash_ops, "a") == 0);
+ assert_se(set_ensure_put(&m, NULL, "a") == 0);
+ assert_se(set_ensure_put(&m, &string_hash_ops, "b") == 1);
+ assert_se(set_ensure_put(&m, &string_hash_ops, "b") == 0);
+ assert_se(set_ensure_put(&m, &string_hash_ops, "a") == 0);
+ assert_se(set_size(m) == 2);
+}
+
+static void test_set_ensure_consume(void) {
+ _cleanup_set_free_ Set *m = NULL;
+ char *s, *t;
+
+ assert_se(s = strdup("a"));
+ assert_se(set_ensure_consume(&m, &string_hash_ops_free, s) == 1);
+
+ assert_se(t = strdup("a"));
+ assert_se(set_ensure_consume(&m, &string_hash_ops_free, t) == 0);
+
+ assert_se(t = strdup("a"));
+ assert_se(set_ensure_consume(&m, &string_hash_ops_free, t) == 0);
+
+ assert_se(t = strdup("b"));
+ assert_se(set_ensure_consume(&m, &string_hash_ops_free, t) == 1);
+
+ assert_se(t = strdup("b"));
+ assert_se(set_ensure_consume(&m, &string_hash_ops_free, t) == 0);
+
+ assert_se(set_size(m) == 2);
+}
+
+static void test_set_strjoin(void) {
+ _cleanup_set_free_ Set *m = NULL;
+ _cleanup_free_ char *joined = NULL;
+
+ /* Empty set */
+ assert_se(set_strjoin(m, NULL, false, &joined) >= 0);
+ assert_se(!joined);
+ assert_se(set_strjoin(m, "", false, &joined) >= 0);
+ assert_se(!joined);
+ assert_se(set_strjoin(m, " ", false, &joined) >= 0);
+ assert_se(!joined);
+ assert_se(set_strjoin(m, "xxx", false, &joined) >= 0);
+ assert_se(!joined);
+ assert_se(set_strjoin(m, NULL, true, &joined) >= 0);
+ assert_se(!joined);
+ assert_se(set_strjoin(m, "", true, &joined) >= 0);
+ assert_se(!joined);
+ assert_se(set_strjoin(m, " ", true, &joined) >= 0);
+ assert_se(!joined);
+ assert_se(set_strjoin(m, "xxx", true, &joined) >= 0);
+ assert_se(!joined);
+
+ /* Single entry */
+ assert_se(set_put_strdup(&m, "aaa") == 1);
+ assert_se(set_strjoin(m, NULL, false, &joined) >= 0);
+ assert_se(streq(joined, "aaa"));
+ joined = mfree(joined);
+ assert_se(set_strjoin(m, "", false, &joined) >= 0);
+ assert_se(streq(joined, "aaa"));
+ joined = mfree(joined);
+ assert_se(set_strjoin(m, " ", false, &joined) >= 0);
+ assert_se(streq(joined, "aaa"));
+ joined = mfree(joined);
+ assert_se(set_strjoin(m, "xxx", false, &joined) >= 0);
+ assert_se(streq(joined, "aaa"));
+ joined = mfree(joined);
+ assert_se(set_strjoin(m, NULL, true, &joined) >= 0);
+ assert_se(streq(joined, "aaa"));
+ joined = mfree(joined);
+ assert_se(set_strjoin(m, "", true, &joined) >= 0);
+ assert_se(streq(joined, "aaa"));
+ joined = mfree(joined);
+ assert_se(set_strjoin(m, " ", true, &joined) >= 0);
+ assert_se(streq(joined, " aaa "));
+ joined = mfree(joined);
+ assert_se(set_strjoin(m, "xxx", true, &joined) >= 0);
+ assert_se(streq(joined, "xxxaaaxxx"));
+
+ /* Two entries */
+ assert_se(set_put_strdup(&m, "bbb") == 1);
+ assert_se(set_put_strdup(&m, "aaa") == 0);
+ joined = mfree(joined);
+ assert_se(set_strjoin(m, NULL, false, &joined) >= 0);
+ assert_se(STR_IN_SET(joined, "aaabbb", "bbbaaa"));
+ joined = mfree(joined);
+ assert_se(set_strjoin(m, "", false, &joined) >= 0);
+ assert_se(STR_IN_SET(joined, "aaabbb", "bbbaaa"));
+ joined = mfree(joined);
+ assert_se(set_strjoin(m, " ", false, &joined) >= 0);
+ assert_se(STR_IN_SET(joined, "aaa bbb", "bbb aaa"));
+ joined = mfree(joined);
+ assert_se(set_strjoin(m, "xxx", false, &joined) >= 0);
+ assert_se(STR_IN_SET(joined, "aaaxxxbbb", "bbbxxxaaa"));
+ joined = mfree(joined);
+ assert_se(set_strjoin(m, NULL, true, &joined) >= 0);
+ assert_se(STR_IN_SET(joined, "aaabbb", "bbbaaa"));
+ joined = mfree(joined);
+ assert_se(set_strjoin(m, "", true, &joined) >= 0);
+ assert_se(STR_IN_SET(joined, "aaabbb", "bbbaaa"));
+ joined = mfree(joined);
+ assert_se(set_strjoin(m, " ", true, &joined) >= 0);
+ assert_se(STR_IN_SET(joined, " aaa bbb ", " bbb aaa "));
+ joined = mfree(joined);
+ assert_se(set_strjoin(m, "xxx", true, &joined) >= 0);
+ assert_se(STR_IN_SET(joined, "xxxaaaxxxbbbxxx", "xxxbbbxxxaaaxxx"));
+}
+
+int main(int argc, const char *argv[]) {
+ test_set_steal_first();
+ test_set_free_with_destructor();
+ test_set_free_with_hash_ops();
+ test_set_put();
+ test_set_put_strdup();
+ test_set_put_strdupv();
+ test_set_ensure_allocated();
+ test_set_ensure_put();
+ test_set_ensure_consume();
+ test_set_strjoin();
+
+ return 0;
+}
diff --git a/src/test/test-sigbus.c b/src/test/test-sigbus.c
new file mode 100644
index 0000000..d141735
--- /dev/null
+++ b/src/test/test-sigbus.c
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#if HAVE_VALGRIND_VALGRIND_H
+# include <valgrind/valgrind.h>
+#endif
+
+#include "fd-util.h"
+#include "memory-util.h"
+#include "sigbus.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_close_ int fd = -1;
+ char template[] = "/tmp/sigbus-test-XXXXXX";
+ void *addr = NULL;
+ uint8_t *p;
+
+ test_setup_logging(LOG_INFO);
+
+#if HAS_FEATURE_ADDRESS_SANITIZER
+ return log_tests_skipped("address-sanitizer is enabled");
+#endif
+#if HAVE_VALGRIND_VALGRIND_H
+ if (RUNNING_ON_VALGRIND)
+ return log_tests_skipped("This test cannot run on valgrind");
+#endif
+
+ sigbus_install();
+
+ assert_se(sigbus_pop(&addr) == 0);
+
+ assert_se((fd = mkostemp(template, O_RDWR|O_CREAT|O_EXCL)) >= 0);
+ assert_se(unlink(template) >= 0);
+ assert_se(posix_fallocate(fd, 0, page_size() * 8) >= 0);
+
+ p = mmap(NULL, page_size() * 16, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ assert_se(p != MAP_FAILED);
+
+ assert_se(sigbus_pop(&addr) == 0);
+
+ p[0] = 0xFF;
+ assert_se(sigbus_pop(&addr) == 0);
+
+ p[page_size()] = 0xFF;
+ assert_se(sigbus_pop(&addr) == 0);
+
+ p[page_size()*8] = 0xFF;
+ p[page_size()*8+1] = 0xFF;
+ p[page_size()*10] = 0xFF;
+ assert_se(sigbus_pop(&addr) > 0);
+ assert_se(addr == p + page_size() * 8);
+ assert_se(sigbus_pop(&addr) > 0);
+ assert_se(addr == p + page_size() * 10);
+ assert_se(sigbus_pop(&addr) == 0);
+
+ sigbus_reset();
+}
diff --git a/src/test/test-signal-util.c b/src/test/test-signal-util.c
new file mode 100644
index 0000000..e5096a8
--- /dev/null
+++ b/src/test/test-signal-util.c
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "log.h"
+#include "macro.h"
+#include "signal-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "process-util.h"
+
+#define info(sig) log_info(#sig " = " STRINGIFY(sig) " = %d", sig)
+
+static void test_rt_signals(void) {
+ info(SIGRTMIN);
+ info(SIGRTMAX);
+
+ /* We use signals SIGRTMIN+0 to SIGRTMIN+24 unconditionally */
+ assert(SIGRTMAX - SIGRTMIN >= 24);
+}
+
+static void test_signal_to_string_one(int val) {
+ const char *p;
+
+ assert_se(p = signal_to_string(val));
+
+ assert_se(signal_from_string(p) == val);
+
+ p = strjoina("SIG", p);
+ assert_se(signal_from_string(p) == val);
+}
+
+static void test_signal_from_string_one(const char *s, int val) {
+ const char *p;
+
+ assert_se(signal_from_string(s) == val);
+
+ p = strjoina("SIG", s);
+ assert_se(signal_from_string(p) == val);
+}
+
+static void test_signal_from_string_number(const char *s, int val) {
+ const char *p;
+
+ assert_se(signal_from_string(s) == val);
+
+ p = strjoina("SIG", s);
+ assert_se(signal_from_string(p) == -EINVAL);
+}
+
+static void test_signal_from_string(void) {
+ char buf[STRLEN("RTMIN+") + DECIMAL_STR_MAX(int) + 1];
+
+ test_signal_to_string_one(SIGHUP);
+ test_signal_to_string_one(SIGTERM);
+ test_signal_to_string_one(SIGRTMIN);
+ test_signal_to_string_one(SIGRTMIN+3);
+ test_signal_to_string_one(SIGRTMAX-4);
+
+ test_signal_from_string_one("RTMIN", SIGRTMIN);
+ test_signal_from_string_one("RTMAX", SIGRTMAX);
+
+ xsprintf(buf, "RTMIN+%d", SIGRTMAX-SIGRTMIN);
+ test_signal_from_string_one(buf, SIGRTMAX);
+
+ xsprintf(buf, "RTMIN+%d", INT_MAX);
+ test_signal_from_string_one(buf, -ERANGE);
+
+ xsprintf(buf, "RTMAX-%d", SIGRTMAX-SIGRTMIN);
+ test_signal_from_string_one(buf, SIGRTMIN);
+
+ xsprintf(buf, "RTMAX-%d", INT_MAX);
+ test_signal_from_string_one(buf, -ERANGE);
+
+ test_signal_from_string_one("", -EINVAL);
+ test_signal_from_string_one("hup", -EINVAL);
+ test_signal_from_string_one("HOGEHOGE", -EINVAL);
+
+ test_signal_from_string_one("RTMIN-5", -EINVAL);
+ test_signal_from_string_one("RTMIN- 5", -EINVAL);
+ test_signal_from_string_one("RTMIN -5", -EINVAL);
+ test_signal_from_string_one("RTMIN+ 5", -EINVAL);
+ test_signal_from_string_one("RTMIN +5", -EINVAL);
+ test_signal_from_string_one("RTMIN+100", -ERANGE);
+ test_signal_from_string_one("RTMIN+-3", -EINVAL);
+ test_signal_from_string_one("RTMIN++3", -EINVAL);
+ test_signal_from_string_one("RTMIN+HUP", -EINVAL);
+ test_signal_from_string_one("RTMIN3", -EINVAL);
+
+ test_signal_from_string_one("RTMAX+5", -EINVAL);
+ test_signal_from_string_one("RTMAX+ 5", -EINVAL);
+ test_signal_from_string_one("RTMAX +5", -EINVAL);
+ test_signal_from_string_one("RTMAX- 5", -EINVAL);
+ test_signal_from_string_one("RTMAX -5", -EINVAL);
+ test_signal_from_string_one("RTMAX-100", -ERANGE);
+ test_signal_from_string_one("RTMAX-+3", -EINVAL);
+ test_signal_from_string_one("RTMAX--3", -EINVAL);
+ test_signal_from_string_one("RTMAX-HUP", -EINVAL);
+
+ test_signal_from_string_number("3", 3);
+ test_signal_from_string_number("+5", 5);
+ test_signal_from_string_number(" +5", 5);
+ test_signal_from_string_number("10000", -ERANGE);
+ test_signal_from_string_number("-2", -ERANGE);
+}
+
+static void test_block_signals(void) {
+ sigset_t ss;
+
+ assert_se(sigprocmask(0, NULL, &ss) >= 0);
+
+ assert_se(sigismember(&ss, SIGUSR1) == 0);
+ assert_se(sigismember(&ss, SIGALRM) == 0);
+ assert_se(sigismember(&ss, SIGVTALRM) == 0);
+
+ {
+ BLOCK_SIGNALS(SIGUSR1, SIGVTALRM);
+
+ assert_se(sigprocmask(0, NULL, &ss) >= 0);
+ assert_se(sigismember(&ss, SIGUSR1) == 1);
+ assert_se(sigismember(&ss, SIGALRM) == 0);
+ assert_se(sigismember(&ss, SIGVTALRM) == 1);
+
+ }
+
+ assert_se(sigprocmask(0, NULL, &ss) >= 0);
+ assert_se(sigismember(&ss, SIGUSR1) == 0);
+ assert_se(sigismember(&ss, SIGALRM) == 0);
+ assert_se(sigismember(&ss, SIGVTALRM) == 0);
+}
+
+static void test_ignore_signals(void) {
+ assert_se(ignore_signals(SIGINT, -1) >= 0);
+ assert_se(kill(getpid_cached(), SIGINT) >= 0);
+ assert_se(ignore_signals(SIGUSR1, SIGUSR2, SIGTERM, SIGPIPE, -1) >= 0);
+ assert_se(kill(getpid_cached(), SIGUSR1) >= 0);
+ assert_se(kill(getpid_cached(), SIGUSR2) >= 0);
+ assert_se(kill(getpid_cached(), SIGTERM) >= 0);
+ assert_se(kill(getpid_cached(), SIGPIPE) >= 0);
+ assert_se(default_signals(SIGINT, SIGUSR1, SIGUSR2, SIGTERM, SIGPIPE, -1) >= 0);
+}
+
+int main(int argc, char *argv[]) {
+ test_rt_signals();
+ test_signal_from_string();
+ test_block_signals();
+ test_ignore_signals();
+
+ return 0;
+}
diff --git a/src/test/test-siphash24.c b/src/test/test-siphash24.c
new file mode 100644
index 0000000..4a1672f
--- /dev/null
+++ b/src/test/test-siphash24.c
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "memory-util.h"
+#include "siphash24.h"
+
+#define ITERATIONS 10000000ULL
+
+static void do_test(const uint8_t *in, size_t len, const uint8_t *key) {
+ struct siphash state = {};
+ uint64_t out;
+ unsigned i, j;
+
+ out = siphash24(in, len, key);
+ assert_se(out == 0xa129ca6149be45e5);
+
+ /* verify the internal state as given in the above paper */
+ siphash24_init(&state, key);
+ assert_se(state.v0 == 0x7469686173716475);
+ assert_se(state.v1 == 0x6b617f6d656e6665);
+ assert_se(state.v2 == 0x6b7f62616d677361);
+ assert_se(state.v3 == 0x7b6b696e727e6c7b);
+ siphash24_compress(in, len, &state);
+ assert_se(state.v0 == 0x4a017198de0a59e0);
+ assert_se(state.v1 == 0x0d52f6f62a4f59a4);
+ assert_se(state.v2 == 0x634cb3577b01fd3d);
+ assert_se(state.v3 == 0xa5224d6f55c7d9c8);
+ out = siphash24_finalize(&state);
+ assert_se(out == 0xa129ca6149be45e5);
+ assert_se(state.v0 == 0xf6bcd53893fecff1);
+ assert_se(state.v1 == 0x54b9964c7ea0d937);
+ assert_se(state.v2 == 0x1b38329c099bb55a);
+ assert_se(state.v3 == 0x1814bb89ad7be679);
+
+ /* verify that decomposing the input in three chunks gives the
+ same result */
+ for (i = 0; i < len; i++) {
+ for (j = i; j < len; j++) {
+ siphash24_init(&state, key);
+ siphash24_compress(in, i, &state);
+ siphash24_compress(&in[i], j - i, &state);
+ siphash24_compress(&in[j], len - j, &state);
+ out = siphash24_finalize(&state);
+ assert_se(out == 0xa129ca6149be45e5);
+ }
+ }
+}
+
+static void test_short_hashes(void) {
+ const uint8_t one[] = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x09, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16 };
+ const uint8_t key[16] = { 0x22, 0x24, 0x41, 0x22, 0x55, 0x77, 0x88, 0x07,
+ 0x23, 0x09, 0x23, 0x14, 0x0c, 0x33, 0x0e, 0x0f};
+ uint8_t two[sizeof one] = {};
+
+ struct siphash state1 = {}, state2 = {};
+ unsigned i, j;
+
+ siphash24_init(&state1, key);
+ siphash24_init(&state2, key);
+
+ /* hashing 1, 2, 3, 4, 5, ..., 16 bytes, with the byte after the buffer different */
+ for (i = 1; i <= sizeof one; i++) {
+ siphash24_compress(one, i, &state1);
+
+ two[i-1] = one[i-1];
+ siphash24_compress(two, i, &state2);
+
+ assert_se(memcmp(&state1, &state2, sizeof state1) == 0);
+ }
+
+ /* hashing n and 1, n and 2, n and 3, ..., n-1 and 1, n-2 and 2, ... */
+ for (i = sizeof one; i > 0; i--) {
+ zero(two);
+
+ for (j = 1; j <= sizeof one; j++) {
+ siphash24_compress(one, i, &state1);
+ siphash24_compress(one, j, &state1);
+
+ siphash24_compress(one, i, &state2);
+ two[j-1] = one[j-1];
+ siphash24_compress(two, j, &state2);
+
+ assert_se(memcmp(&state1, &state2, sizeof state1) == 0);
+ }
+ }
+}
+
+/* see https://131002.net/siphash/siphash.pdf, Appendix A */
+int main(int argc, char *argv[]) {
+ const uint8_t in[15] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e };
+ const uint8_t key[16] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
+ uint8_t in_buf[20];
+
+ /* Test with same input but different alignments. */
+ memcpy(in_buf, in, sizeof(in));
+ do_test(in_buf, sizeof(in), key);
+ memcpy(in_buf + 1, in, sizeof(in));
+ do_test(in_buf + 1, sizeof(in), key);
+ memcpy(in_buf + 2, in, sizeof(in));
+ do_test(in_buf + 2, sizeof(in), key);
+ memcpy(in_buf + 4, in, sizeof(in));
+ do_test(in_buf + 4, sizeof(in), key);
+
+ test_short_hashes();
+}
diff --git a/src/test/test-sizeof.c b/src/test/test-sizeof.c
new file mode 100644
index 0000000..3c9dc18
--- /dev/null
+++ b/src/test/test-sizeof.c
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#define __STDC_WANT_IEC_60559_TYPES_EXT__
+#include <float.h>
+
+#include "time-util.h"
+
+/* Print information about various types. Useful when diagnosing
+ * gcc diagnostics on an unfamiliar architecture. */
+
+DISABLE_WARNING_TYPE_LIMITS;
+
+#define info_no_sign(t) \
+ printf("%s → %zu bits, %zu byte alignment\n", STRINGIFY(t), \
+ sizeof(t)*CHAR_BIT, \
+ __alignof__(t))
+
+#define info(t) \
+ printf("%s → %zu bits%s, %zu byte alignment\n", STRINGIFY(t), \
+ sizeof(t)*CHAR_BIT, \
+ strstr(STRINGIFY(t), "signed") ? "" : \
+ (t)-1 < (t)0 ? ", signed" : ", unsigned", \
+ __alignof__(t))
+
+enum Enum {
+ enum_value,
+};
+
+enum BigEnum {
+ big_enum_value = UINT64_C(1),
+};
+
+enum BigEnum2 {
+ big_enum2_pos = UINT64_C(1),
+ big_enum2_neg = UINT64_C(-1),
+};
+
+int main(void) {
+ int (*function_pointer)(void);
+
+ info_no_sign(function_pointer);
+ info_no_sign(void*);
+ info(char*);
+
+ info(char);
+ info(signed char);
+ info(unsigned char);
+ info(short unsigned);
+ info(unsigned);
+ info(long unsigned);
+ info(long long unsigned);
+ info(__syscall_ulong_t);
+ info(__syscall_slong_t);
+
+ info(float);
+ info(double);
+ info(long double);
+
+#ifdef FLT128_MAX
+ info(_Float128);
+ info(_Float64);
+ info(_Float64x);
+ info(_Float32);
+ info(_Float32x);
+#endif
+
+ info(size_t);
+ info(ssize_t);
+ info(time_t);
+ info(usec_t);
+ info(__time_t);
+ info(pid_t);
+ info(uid_t);
+ info(gid_t);
+ info(socklen_t);
+
+ info(__cpu_mask);
+
+ info(enum Enum);
+ info(enum BigEnum);
+ info(enum BigEnum2);
+ assert_cc(sizeof(enum BigEnum2) == 8);
+ printf("big_enum2_pos → %zu\n", sizeof(big_enum2_pos));
+ printf("big_enum2_neg → %zu\n", sizeof(big_enum2_neg));
+
+ return 0;
+}
diff --git a/src/test/test-sleep.c b/src/test/test-sleep.c
new file mode 100644
index 0000000..d916254
--- /dev/null
+++ b/src/test/test-sleep.c
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <inttypes.h>
+#include <linux/fiemap.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "efivars.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "memory-util.h"
+#include "sleep-config.h"
+#include "strv.h"
+#include "tests.h"
+#include "util.h"
+
+static void test_parse_sleep_config(void) {
+ _cleanup_(free_sleep_configp) SleepConfig *sleep_config = NULL;
+ log_info("/* %s */", __func__);
+
+ assert_se(parse_sleep_config(&sleep_config) == 0);
+
+ _cleanup_free_ char *sum, *sus, *him, *his, *hym, *hys;
+
+ sum = strv_join(sleep_config->suspend_modes, ", ");
+ sus = strv_join(sleep_config->suspend_states, ", ");
+ him = strv_join(sleep_config->hibernate_modes, ", ");
+ his = strv_join(sleep_config->hibernate_states, ", ");
+ hym = strv_join(sleep_config->hybrid_modes, ", ");
+ hys = strv_join(sleep_config->hybrid_states, ", ");
+ log_debug(" allow_suspend: %u", sleep_config->allow_suspend);
+ log_debug(" allow_hibernate: %u", sleep_config->allow_hibernate);
+ log_debug(" allow_s2h: %u", sleep_config->allow_s2h);
+ log_debug(" allow_hybrid_sleep: %u", sleep_config->allow_hybrid_sleep);
+ log_debug(" suspend modes: %s", sum);
+ log_debug(" states: %s", sus);
+ log_debug(" hibernate modes: %s", him);
+ log_debug(" states: %s", his);
+ log_debug(" hybrid modes: %s", hym);
+ log_debug(" states: %s", hys);
+}
+
+static int test_fiemap(const char *path) {
+ _cleanup_free_ struct fiemap *fiemap = NULL;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ fd = open(path, O_RDONLY | O_CLOEXEC | O_NONBLOCK);
+ if (fd < 0)
+ return log_error_errno(errno, "failed to open %s: %m", path);
+ r = read_fiemap(fd, &fiemap);
+ if (r == -EOPNOTSUPP)
+ exit(log_tests_skipped("Not supported"));
+ if (r < 0)
+ return log_error_errno(r, "Unable to read extent map for '%s': %m", path);
+ log_info("extent map information for %s:", path);
+ log_info("\t start: %" PRIu64, (uint64_t) fiemap->fm_start);
+ log_info("\t length: %" PRIu64, (uint64_t) fiemap->fm_length);
+ log_info("\t flags: %" PRIu32, fiemap->fm_flags);
+ log_info("\t number of mapped extents: %" PRIu32, fiemap->fm_mapped_extents);
+ log_info("\t extent count: %" PRIu32, fiemap->fm_extent_count);
+ if (fiemap->fm_extent_count > 0)
+ log_info("\t first extent location: %" PRIu64,
+ (uint64_t) (fiemap->fm_extents[0].fe_physical / page_size()));
+
+ return 0;
+}
+
+static void test_sleep(void) {
+ _cleanup_strv_free_ char
+ **standby = strv_new("standby"),
+ **mem = strv_new("mem"),
+ **disk = strv_new("disk"),
+ **suspend = strv_new("suspend"),
+ **reboot = strv_new("reboot"),
+ **platform = strv_new("platform"),
+ **shutdown = strv_new("shutdown"),
+ **freeze = strv_new("freeze");
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ printf("Secure boot: %sd\n", enable_disable(is_efi_secure_boot()));
+
+ log_info("/= individual sleep modes =/");
+ log_info("Standby configured: %s", yes_no(can_sleep_state(standby) > 0));
+ log_info("Suspend configured: %s", yes_no(can_sleep_state(mem) > 0));
+ log_info("Hibernate configured: %s", yes_no(can_sleep_state(disk) > 0));
+ log_info("Hibernate+Suspend (Hybrid-Sleep) configured: %s", yes_no(can_sleep_disk(suspend) > 0));
+ log_info("Hibernate+Reboot configured: %s", yes_no(can_sleep_disk(reboot) > 0));
+ log_info("Hibernate+Platform configured: %s", yes_no(can_sleep_disk(platform) > 0));
+ log_info("Hibernate+Shutdown configured: %s", yes_no(can_sleep_disk(shutdown) > 0));
+ log_info("Freeze configured: %s", yes_no(can_sleep_state(freeze) > 0));
+
+ log_info("/= high-level sleep verbs =/");
+ r = can_sleep("suspend");
+ log_info("Suspend configured and possible: %s", r >= 0 ? yes_no(r) : strerror_safe(r));
+ r = can_sleep("hibernate");
+ log_info("Hibernation configured and possible: %s", r >= 0 ? yes_no(r) : strerror_safe(r));
+ r = can_sleep("hybrid-sleep");
+ log_info("Hybrid-sleep configured and possible: %s", r >= 0 ? yes_no(r) : strerror_safe(r));
+ r = can_sleep("suspend-then-hibernate");
+ log_info("Suspend-then-Hibernate configured and possible: %s", r >= 0 ? yes_no(r) : strerror_safe(r));
+}
+
+int main(int argc, char* argv[]) {
+ int i, r = 0, k;
+
+ test_setup_logging(LOG_DEBUG);
+
+ if (getuid() != 0)
+ log_warning("This program is unlikely to work for unprivileged users");
+
+ test_parse_sleep_config();
+ test_sleep();
+
+ if (argc <= 1)
+ assert_se(test_fiemap(argv[0]) == 0);
+ else
+ for (i = 1; i < argc; i++) {
+ k = test_fiemap(argv[i]);
+ if (r == 0)
+ r = k;
+ }
+
+ return r;
+}
diff --git a/src/test/test-socket-netlink.c b/src/test/test-socket-netlink.c
new file mode 100644
index 0000000..704cc01
--- /dev/null
+++ b/src/test/test-socket-netlink.c
@@ -0,0 +1,405 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "missing_network.h"
+#include "tests.h"
+#include "socket-netlink.h"
+#include "string-util.h"
+
+static void test_socket_address_parse_one(const char *in, int ret, int family, const char *expected) {
+ SocketAddress a;
+ _cleanup_free_ char *out = NULL;
+ int r;
+
+ r = socket_address_parse(&a, in);
+ if (r >= 0) {
+ r = socket_address_print(&a, &out);
+ if (r < 0)
+ log_error_errno(r, "Printing failed for \"%s\": %m", in);
+ assert(r >= 0);
+ assert_se(a.type == 0);
+ }
+
+ log_info("\"%s\" → %s %d → \"%s\" (expect %d / \"%s\")",
+ in,
+ r >= 0 ? "✓" : "✗", r,
+ empty_to_dash(out),
+ ret,
+ ret >= 0 ? expected ?: in : "-");
+ assert_se(r == ret);
+ if (r >= 0) {
+ assert_se(a.sockaddr.sa.sa_family == family);
+ assert_se(streq(out, expected ?: in));
+ }
+}
+
+static void test_socket_address_parse(void) {
+ log_info("/* %s */", __func__);
+
+ test_socket_address_parse_one("junk", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("192.168.1.1", -EINVAL, 0, NULL);
+ test_socket_address_parse_one(".168.1.1", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("989.168.1.1", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("192.168.1.1:65536", -ERANGE, 0, NULL);
+ test_socket_address_parse_one("192.168.1.1:0", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("0", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("65536", -ERANGE, 0, NULL);
+
+ const int default_family = socket_ipv6_is_supported() ? AF_INET6 : AF_INET;
+
+ test_socket_address_parse_one("65535", 0, default_family, "[::]:65535");
+
+ /* The checks below will pass even if ipv6 is disabled in
+ * kernel. The underlying glibc's inet_pton() is just a string
+ * parser and doesn't make any syscalls. */
+
+ test_socket_address_parse_one("[::1]", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("[::1]8888", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("::1", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("[::1]:0", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("[::1]:65536", -ERANGE, 0, NULL);
+ test_socket_address_parse_one("[a:b:1]:8888", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("[::1]%lo:1234", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("[::1]%lo:0", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("[::1]%lo", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("[::1]%lo%lo:1234", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("[::1]% lo:1234", -EINVAL, 0, NULL);
+
+ test_socket_address_parse_one("8888", 0, default_family, "[::]:8888");
+ test_socket_address_parse_one("[2001:0db8:0000:85a3:0000:0000:ac1f:8001]:8888", 0, AF_INET6,
+ "[2001:db8:0:85a3::ac1f:8001]:8888");
+ test_socket_address_parse_one("[::1]:8888", 0, AF_INET6, NULL);
+ test_socket_address_parse_one("[::1]:1234%lo", 0, AF_INET6, NULL);
+ test_socket_address_parse_one("[::1]:0%lo", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("[::1]%lo", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("[::1]:1234%lo%lo", -ENODEV, 0, NULL);
+ test_socket_address_parse_one("[::1]:1234%xxxxasdf", -ENODEV, 0, NULL);
+ test_socket_address_parse_one("192.168.1.254:8888", 0, AF_INET, NULL);
+ test_socket_address_parse_one("/foo/bar", 0, AF_UNIX, NULL);
+ test_socket_address_parse_one("/", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("@abstract", 0, AF_UNIX, NULL);
+
+ {
+ char aaa[SUN_PATH_LEN + 1] = "@";
+
+ memset(aaa + 1, 'a', SUN_PATH_LEN - 1);
+ char_array_0(aaa);
+
+ test_socket_address_parse_one(aaa, -EINVAL, 0, NULL);
+
+ aaa[SUN_PATH_LEN - 1] = '\0';
+ test_socket_address_parse_one(aaa, 0, AF_UNIX, NULL);
+ }
+
+ test_socket_address_parse_one("vsock:2:1234", 0, AF_VSOCK, NULL);
+ test_socket_address_parse_one("vsock::1234", 0, AF_VSOCK, NULL);
+ test_socket_address_parse_one("vsock:2:1234x", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("vsock:2x:1234", -EINVAL, 0, NULL);
+ test_socket_address_parse_one("vsock:2", -EINVAL, 0, NULL);
+}
+
+static void test_socket_address_parse_netlink(void) {
+ SocketAddress a;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socket_address_parse_netlink(&a, "junk") < 0);
+ assert_se(socket_address_parse_netlink(&a, "") < 0);
+
+ assert_se(socket_address_parse_netlink(&a, "route") >= 0);
+ assert_se(a.sockaddr.nl.nl_family == AF_NETLINK);
+ assert_se(a.sockaddr.nl.nl_groups == 0);
+ assert_se(a.protocol == NETLINK_ROUTE);
+ assert_se(socket_address_parse_netlink(&a, "route") >= 0);
+ assert_se(socket_address_parse_netlink(&a, "route 10") >= 0);
+ assert_se(a.sockaddr.nl.nl_family == AF_NETLINK);
+ assert_se(a.sockaddr.nl.nl_groups == 10);
+ assert_se(a.protocol == NETLINK_ROUTE);
+
+ /* With spaces and tabs */
+ assert_se(socket_address_parse_netlink(&a, " kobject-uevent ") >= 0);
+ assert_se(a.sockaddr.nl.nl_family == AF_NETLINK);
+ assert_se(a.sockaddr.nl.nl_groups == 0);
+ assert_se(a.protocol == NETLINK_KOBJECT_UEVENT);
+ assert_se(socket_address_parse_netlink(&a, " \t kobject-uevent \t 10") >= 0);
+ assert_se(a.sockaddr.nl.nl_family == AF_NETLINK);
+ assert_se(a.sockaddr.nl.nl_groups == 10);
+ assert_se(a.protocol == NETLINK_KOBJECT_UEVENT);
+ assert_se(socket_address_parse_netlink(&a, "kobject-uevent\t10") >= 0);
+ assert_se(a.sockaddr.nl.nl_family == AF_NETLINK);
+ assert_se(a.sockaddr.nl.nl_groups == 10);
+ assert_se(a.protocol == NETLINK_KOBJECT_UEVENT);
+
+ /* trailing space is not supported */
+ assert_se(socket_address_parse_netlink(&a, "kobject-uevent\t10 ") < 0);
+
+ /* Group must be unsigned */
+ assert_se(socket_address_parse_netlink(&a, "kobject-uevent -1") < 0);
+
+ /* oss-fuzz #6884 */
+ assert_se(socket_address_parse_netlink(&a, "\xff") < 0);
+}
+
+static void test_socket_address_equal(void) {
+ SocketAddress a, b;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socket_address_parse(&a, "192.168.1.1:8888") >= 0);
+ assert_se(socket_address_parse(&b, "192.168.1.1:888") >= 0);
+ assert_se(!socket_address_equal(&a, &b));
+
+ assert_se(socket_address_parse(&a, "192.168.1.1:8888") >= 0);
+ assert_se(socket_address_parse(&b, "192.16.1.1:8888") >= 0);
+ assert_se(!socket_address_equal(&a, &b));
+
+ assert_se(socket_address_parse(&a, "192.168.1.1:8888") >= 0);
+ assert_se(socket_address_parse(&b, "8888") >= 0);
+ assert_se(!socket_address_equal(&a, &b));
+
+ assert_se(socket_address_parse(&a, "192.168.1.1:8888") >= 0);
+ assert_se(socket_address_parse(&b, "/foo/bar/") >= 0);
+ assert_se(!socket_address_equal(&a, &b));
+
+ assert_se(socket_address_parse(&a, "192.168.1.1:8888") >= 0);
+ assert_se(socket_address_parse(&b, "192.168.1.1:8888") >= 0);
+ assert_se(socket_address_equal(&a, &b));
+
+ assert_se(socket_address_parse(&a, "/foo/bar") >= 0);
+ assert_se(socket_address_parse(&b, "/foo/bar") >= 0);
+ assert_se(socket_address_equal(&a, &b));
+
+ assert_se(socket_address_parse(&a, "[::1]:8888") >= 0);
+ assert_se(socket_address_parse(&b, "[::1]:8888") >= 0);
+ assert_se(socket_address_equal(&a, &b));
+
+ assert_se(socket_address_parse(&a, "@abstract") >= 0);
+ assert_se(socket_address_parse(&b, "@abstract") >= 0);
+ assert_se(socket_address_equal(&a, &b));
+
+ assert_se(socket_address_parse_netlink(&a, "firewall") >= 0);
+ assert_se(socket_address_parse_netlink(&b, "firewall") >= 0);
+ assert_se(socket_address_equal(&a, &b));
+
+ assert_se(socket_address_parse(&a, "vsock:2:1234") >= 0);
+ assert_se(socket_address_parse(&b, "vsock:2:1234") >= 0);
+ assert_se(socket_address_equal(&a, &b));
+ assert_se(socket_address_parse(&b, "vsock:2:1235") >= 0);
+ assert_se(!socket_address_equal(&a, &b));
+ assert_se(socket_address_parse(&b, "vsock:3:1234") >= 0);
+ assert_se(!socket_address_equal(&a, &b));
+}
+
+static void test_socket_address_get_path(void) {
+ SocketAddress a;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socket_address_parse(&a, "192.168.1.1:8888") >= 0);
+ assert_se(!socket_address_get_path(&a));
+
+ assert_se(socket_address_parse(&a, "@abstract") >= 0);
+ assert_se(!socket_address_get_path(&a));
+
+ assert_se(socket_address_parse(&a, "[::1]:8888") >= 0);
+ assert_se(!socket_address_get_path(&a));
+
+ assert_se(socket_address_parse(&a, "/foo/bar") >= 0);
+ assert_se(streq(socket_address_get_path(&a), "/foo/bar"));
+
+ assert_se(socket_address_parse(&a, "vsock:2:1234") >= 0);
+ assert_se(!socket_address_get_path(&a));
+}
+
+static void test_socket_address_is(void) {
+ SocketAddress a;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socket_address_parse(&a, "192.168.1.1:8888") >= 0);
+ assert_se( socket_address_is(&a, "192.168.1.1:8888", 0 /* unspecified yet */));
+ assert_se(!socket_address_is(&a, "route", 0));
+ assert_se(!socket_address_is(&a, "route", SOCK_STREAM));
+ assert_se(!socket_address_is(&a, "192.168.1.1:8888", SOCK_RAW));
+ assert_se(!socket_address_is(&a, "192.168.1.1:8888", SOCK_STREAM));
+ a.type = SOCK_STREAM;
+ assert_se( socket_address_is(&a, "192.168.1.1:8888", SOCK_STREAM));
+}
+
+static void test_socket_address_is_netlink(void) {
+ SocketAddress a;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socket_address_parse_netlink(&a, "route 10") >= 0);
+ assert_se( socket_address_is_netlink(&a, "route 10"));
+ assert_se(!socket_address_is_netlink(&a, "192.168.1.1:8888"));
+ assert_se(!socket_address_is_netlink(&a, "route 1"));
+}
+
+static void test_in_addr_ifindex_to_string_one(int f, const char *a, int ifindex, const char *b) {
+ _cleanup_free_ char *r = NULL;
+ union in_addr_union ua, uuaa;
+ int ff, ifindex2;
+
+ assert_se(in_addr_from_string(f, a, &ua) >= 0);
+ assert_se(in_addr_ifindex_to_string(f, &ua, ifindex, &r) >= 0);
+ printf("test_in_addr_ifindex_to_string_one: %s == %s\n", b, r);
+ assert_se(streq(b, r));
+
+ assert_se(in_addr_ifindex_from_string_auto(b, &ff, &uuaa, &ifindex2) >= 0);
+ assert_se(ff == f);
+ assert_se(in_addr_equal(f, &ua, &uuaa));
+ assert_se(ifindex2 == ifindex || ifindex2 == 0);
+}
+
+static void test_in_addr_ifindex_to_string(void) {
+ log_info("/* %s */", __func__);
+
+ test_in_addr_ifindex_to_string_one(AF_INET, "192.168.0.1", 7, "192.168.0.1");
+ test_in_addr_ifindex_to_string_one(AF_INET, "10.11.12.13", 9, "10.11.12.13");
+ test_in_addr_ifindex_to_string_one(AF_INET6, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", 10, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff");
+ test_in_addr_ifindex_to_string_one(AF_INET6, "::1", 11, "::1");
+ test_in_addr_ifindex_to_string_one(AF_INET6, "fe80::", 12, "fe80::%12");
+ test_in_addr_ifindex_to_string_one(AF_INET6, "fe80::", 0, "fe80::");
+ test_in_addr_ifindex_to_string_one(AF_INET6, "fe80::14", 12, "fe80::14%12");
+ test_in_addr_ifindex_to_string_one(AF_INET6, "fe80::15", -7, "fe80::15");
+ test_in_addr_ifindex_to_string_one(AF_INET6, "fe80::16", LOOPBACK_IFINDEX, "fe80::16%1");
+}
+
+static void test_in_addr_ifindex_from_string_auto(void) {
+ int family, ifindex;
+ union in_addr_union ua;
+
+ log_info("/* %s */", __func__);
+ /* Most in_addr_ifindex_from_string_auto() invocations have already been tested above, but let's test some more */
+
+ assert_se(in_addr_ifindex_from_string_auto("fe80::17", &family, &ua, &ifindex) >= 0);
+ assert_se(family == AF_INET6);
+ assert_se(ifindex == 0);
+
+ assert_se(in_addr_ifindex_from_string_auto("fe80::18%19", &family, &ua, &ifindex) >= 0);
+ assert_se(family == AF_INET6);
+ assert_se(ifindex == 19);
+
+ assert_se(in_addr_ifindex_from_string_auto("fe80::18%lo", &family, &ua, &ifindex) >= 0);
+ assert_se(family == AF_INET6);
+ assert_se(ifindex == LOOPBACK_IFINDEX);
+
+ assert_se(in_addr_ifindex_from_string_auto("fe80::19%thisinterfacecantexist", &family, &ua, &ifindex) == -ENODEV);
+}
+
+static void test_in_addr_ifindex_name_from_string_auto_one(const char *a, const char *expected) {
+ int family, ifindex;
+ union in_addr_union ua;
+ _cleanup_free_ char *server_name = NULL;
+
+ assert_se(in_addr_ifindex_name_from_string_auto(a, &family, &ua, &ifindex, &server_name) >= 0);
+ assert_se(streq_ptr(server_name, expected));
+}
+
+static void test_in_addr_ifindex_name_from_string_auto(void) {
+ log_info("/* %s */", __func__);
+
+ test_in_addr_ifindex_name_from_string_auto_one("192.168.0.1", NULL);
+ test_in_addr_ifindex_name_from_string_auto_one("192.168.0.1#test.com", "test.com");
+ test_in_addr_ifindex_name_from_string_auto_one("fe80::18%19", NULL);
+ test_in_addr_ifindex_name_from_string_auto_one("fe80::18%19#another.test.com", "another.test.com");
+}
+
+static void test_in_addr_port_ifindex_name_from_string_auto_one(const char *str, int family, uint16_t port, int ifindex,
+ const char *server_name, const char *str_repr) {
+ union in_addr_union a;
+ uint16_t p;
+ int f, i;
+ char *fake;
+
+ log_info("%s: %s", __func__, str);
+
+ {
+ _cleanup_free_ char *name = NULL, *x = NULL;
+ assert_se(in_addr_port_ifindex_name_from_string_auto(str, &f, &a, &p, &i, &name) == 0);
+ assert_se(family == f);
+ assert_se(port == p);
+ assert_se(ifindex == i);
+ assert_se(streq_ptr(server_name, name));
+ assert_se(in_addr_port_ifindex_name_to_string(f, &a, p, i, name, &x) >= 0);
+ assert_se(streq(str_repr ?: str, x));
+ }
+
+ if (port > 0)
+ assert_se(in_addr_port_ifindex_name_from_string_auto(str, &f, &a, NULL, &i, &fake) == -EINVAL);
+ else {
+ _cleanup_free_ char *name = NULL, *x = NULL;
+ assert_se(in_addr_port_ifindex_name_from_string_auto(str, &f, &a, NULL, &i, &name) == 0);
+ assert_se(family == f);
+ assert_se(ifindex == i);
+ assert_se(streq_ptr(server_name, name));
+ assert_se(in_addr_port_ifindex_name_to_string(f, &a, 0, i, name, &x) >= 0);
+ assert_se(streq(str_repr ?: str, x));
+ }
+
+ if (ifindex > 0)
+ assert_se(in_addr_port_ifindex_name_from_string_auto(str, &f, &a, &p, NULL, &fake) == -EINVAL);
+ else {
+ _cleanup_free_ char *name = NULL, *x = NULL;
+ assert_se(in_addr_port_ifindex_name_from_string_auto(str, &f, &a, &p, NULL, &name) == 0);
+ assert_se(family == f);
+ assert_se(port == p);
+ assert_se(streq_ptr(server_name, name));
+ assert_se(in_addr_port_ifindex_name_to_string(f, &a, p, 0, name, &x) >= 0);
+ assert_se(streq(str_repr ?: str, x));
+ }
+
+ if (server_name)
+ assert_se(in_addr_port_ifindex_name_from_string_auto(str, &f, &a, &p, &i, NULL) == -EINVAL);
+ else {
+ _cleanup_free_ char *x = NULL;
+ assert_se(in_addr_port_ifindex_name_from_string_auto(str, &f, &a, &p, &i, NULL) == 0);
+ assert_se(family == f);
+ assert_se(port == p);
+ assert_se(ifindex == i);
+ assert_se(in_addr_port_ifindex_name_to_string(f, &a, p, i, NULL, &x) >= 0);
+ assert_se(streq(str_repr ?: str, x));
+ }
+}
+
+static void test_in_addr_port_ifindex_name_from_string_auto(void) {
+ log_info("/* %s */", __func__);
+
+ test_in_addr_port_ifindex_name_from_string_auto_one("192.168.0.1", AF_INET, 0, 0, NULL, NULL);
+ test_in_addr_port_ifindex_name_from_string_auto_one("192.168.0.1#test.com", AF_INET, 0, 0, "test.com", NULL);
+ test_in_addr_port_ifindex_name_from_string_auto_one("192.168.0.1:53", AF_INET, 53, 0, NULL, NULL);
+ test_in_addr_port_ifindex_name_from_string_auto_one("192.168.0.1:53#example.com", AF_INET, 53, 0, "example.com", NULL);
+ test_in_addr_port_ifindex_name_from_string_auto_one("fe80::18", AF_INET6, 0, 0, NULL, NULL);
+ test_in_addr_port_ifindex_name_from_string_auto_one("fe80::18#hoge.com", AF_INET6, 0, 0, "hoge.com", NULL);
+ test_in_addr_port_ifindex_name_from_string_auto_one("fe80::18%19", AF_INET6, 0, 19, NULL, NULL);
+ test_in_addr_port_ifindex_name_from_string_auto_one("fe80::18%lo", AF_INET6, 0, 1, NULL, "fe80::18%1");
+ test_in_addr_port_ifindex_name_from_string_auto_one("[fe80::18]:53", AF_INET6, 53, 0, NULL, NULL);
+ test_in_addr_port_ifindex_name_from_string_auto_one("[fe80::18]:53%19", AF_INET6, 53, 19, NULL, NULL);
+ test_in_addr_port_ifindex_name_from_string_auto_one("[fe80::18]:53%lo", AF_INET6, 53, 1, NULL, "[fe80::18]:53%1");
+ test_in_addr_port_ifindex_name_from_string_auto_one("fe80::18%19#hoge.com", AF_INET6, 0, 19, "hoge.com", NULL);
+ test_in_addr_port_ifindex_name_from_string_auto_one("[fe80::18]:53#hoge.com", AF_INET6, 53, 0, "hoge.com", NULL);
+ test_in_addr_port_ifindex_name_from_string_auto_one("[fe80::18]:53%19", AF_INET6, 53, 19, NULL, NULL);
+ test_in_addr_port_ifindex_name_from_string_auto_one("[fe80::18]:53%19#hoge.com", AF_INET6, 53, 19, "hoge.com", NULL);
+ test_in_addr_port_ifindex_name_from_string_auto_one("[fe80::18]:53%lo", AF_INET6, 53, 1, NULL, "[fe80::18]:53%1");
+ test_in_addr_port_ifindex_name_from_string_auto_one("[fe80::18]:53%lo#hoge.com", AF_INET6, 53, 1, "hoge.com", "[fe80::18]:53%1#hoge.com");
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_socket_address_parse();
+ test_socket_address_parse_netlink();
+ test_socket_address_equal();
+ test_socket_address_get_path();
+ test_socket_address_is();
+ test_socket_address_is_netlink();
+
+ test_in_addr_ifindex_to_string();
+ test_in_addr_ifindex_from_string_auto();
+ test_in_addr_ifindex_name_from_string_auto();
+ test_in_addr_port_ifindex_name_from_string_auto();
+
+ return 0;
+}
diff --git a/src/test/test-socket-util.c b/src/test/test-socket-util.c
new file mode 100644
index 0000000..4ff7d71
--- /dev/null
+++ b/src/test/test-socket-util.c
@@ -0,0 +1,524 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <grp.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "async.h"
+#include "escape.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "in-addr-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "macro.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+
+assert_cc(SUN_PATH_LEN == 108);
+
+static void test_ifname_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert( ifname_valid("foo"));
+ assert( ifname_valid("eth0"));
+
+ assert(!ifname_valid("0"));
+ assert(!ifname_valid("99"));
+ assert( ifname_valid("a99"));
+ assert( ifname_valid("99a"));
+
+ assert(!ifname_valid(NULL));
+ assert(!ifname_valid(""));
+ assert(!ifname_valid(" "));
+ assert(!ifname_valid(" foo"));
+ assert(!ifname_valid("bar\n"));
+ assert(!ifname_valid("."));
+ assert(!ifname_valid(".."));
+ assert(ifname_valid("foo.bar"));
+ assert(!ifname_valid("x:y"));
+
+ assert( ifname_valid_full("xxxxxxxxxxxxxxx", 0));
+ assert(!ifname_valid_full("xxxxxxxxxxxxxxxx", 0));
+ assert( ifname_valid_full("xxxxxxxxxxxxxxxx", IFNAME_VALID_ALTERNATIVE));
+ assert( ifname_valid_full("xxxxxxxxxxxxxxxx", IFNAME_VALID_ALTERNATIVE));
+ assert(!ifname_valid_full("999", IFNAME_VALID_ALTERNATIVE));
+ assert( ifname_valid_full("999", IFNAME_VALID_ALTERNATIVE | IFNAME_VALID_NUMERIC));
+ assert(!ifname_valid_full("0", IFNAME_VALID_ALTERNATIVE | IFNAME_VALID_NUMERIC));
+}
+
+static void test_socket_print_unix_one(const char *in, size_t len_in, const char *expected) {
+ _cleanup_free_ char *out = NULL, *c = NULL;
+
+ assert(len_in <= SUN_PATH_LEN);
+ SocketAddress a = { .sockaddr = { .un = { .sun_family = AF_UNIX } },
+ .size = offsetof(struct sockaddr_un, sun_path) + len_in,
+ .type = SOCK_STREAM,
+ };
+ memcpy(a.sockaddr.un.sun_path, in, len_in);
+
+ assert_se(socket_address_print(&a, &out) >= 0);
+ assert_se(c = cescape(in));
+ log_info("\"%s\" → \"%s\" (expect \"%s\")", in, out, expected);
+ assert_se(streq(out, expected));
+}
+
+static void test_socket_print_unix(void) {
+ log_info("/* %s */", __func__);
+
+ /* Some additional tests for abstract addresses which we don't parse */
+
+ test_socket_print_unix_one("\0\0\0\0", 4, "@\\000\\000\\000");
+ test_socket_print_unix_one("@abs", 5, "@abs");
+ test_socket_print_unix_one("\n", 2, "\\n");
+ test_socket_print_unix_one("", 1, "<unnamed>");
+ test_socket_print_unix_one("\0", 1, "<unnamed>");
+ test_socket_print_unix_one("\0_________________________there's 108 characters in this string_____________________________________________", 108,
+ "@_________________________there\\'s 108 characters in this string_____________________________________________");
+ test_socket_print_unix_one("////////////////////////////////////////////////////////////////////////////////////////////////////////////", 108,
+ "////////////////////////////////////////////////////////////////////////////////////////////////////////////");
+ test_socket_print_unix_one("\0\a\b\n\255", 6, "@\\a\\b\\n\\255\\000");
+}
+
+static void test_sockaddr_equal(void) {
+ union sockaddr_union a = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = 0,
+ .in.sin_addr.s_addr = htobe32(INADDR_ANY),
+ };
+ union sockaddr_union b = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = 0,
+ .in.sin_addr.s_addr = htobe32(INADDR_ANY),
+ };
+ union sockaddr_union c = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = 0,
+ .in.sin_addr.s_addr = htobe32(1234),
+ };
+ union sockaddr_union d = {
+ .in6.sin6_family = AF_INET6,
+ .in6.sin6_port = 0,
+ .in6.sin6_addr = IN6ADDR_ANY_INIT,
+ };
+ union sockaddr_union e = {
+ .vm.svm_family = AF_VSOCK,
+ .vm.svm_port = 0,
+ .vm.svm_cid = VMADDR_CID_ANY,
+ };
+
+ log_info("/* %s */", __func__);
+
+ assert_se(sockaddr_equal(&a, &a));
+ assert_se(sockaddr_equal(&a, &b));
+ assert_se(sockaddr_equal(&d, &d));
+ assert_se(sockaddr_equal(&e, &e));
+ assert_se(!sockaddr_equal(&a, &c));
+ assert_se(!sockaddr_equal(&b, &c));
+ assert_se(!sockaddr_equal(&a, &e));
+}
+
+static void test_sockaddr_un_len(void) {
+ log_info("/* %s */", __func__);
+
+ static const struct sockaddr_un fs = {
+ .sun_family = AF_UNIX,
+ .sun_path = "/foo/bar/waldo",
+ };
+
+ static const struct sockaddr_un abstract = {
+ .sun_family = AF_UNIX,
+ .sun_path = "\0foobar",
+ };
+
+ assert_se(SOCKADDR_UN_LEN(fs) == offsetof(struct sockaddr_un, sun_path) + strlen(fs.sun_path) + 1);
+ assert_se(SOCKADDR_UN_LEN(abstract) == offsetof(struct sockaddr_un, sun_path) + 1 + strlen(abstract.sun_path + 1));
+}
+
+static void test_in_addr_is_multicast(void) {
+ union in_addr_union a, b;
+ int f;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(in_addr_from_string_auto("192.168.3.11", &f, &a) >= 0);
+ assert_se(in_addr_is_multicast(f, &a) == 0);
+
+ assert_se(in_addr_from_string_auto("224.0.0.1", &f, &a) >= 0);
+ assert_se(in_addr_is_multicast(f, &a) == 1);
+
+ assert_se(in_addr_from_string_auto("FF01:0:0:0:0:0:0:1", &f, &b) >= 0);
+ assert_se(in_addr_is_multicast(f, &b) == 1);
+
+ assert_se(in_addr_from_string_auto("2001:db8::c:69b:aeff:fe53:743e", &f, &b) >= 0);
+ assert_se(in_addr_is_multicast(f, &b) == 0);
+}
+
+static void test_getpeercred_getpeergroups(void) {
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ r = safe_fork("(getpeercred)", FORK_DEATHSIG|FORK_LOG|FORK_WAIT, NULL);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ static const gid_t gids[] = { 3, 4, 5, 6, 7 };
+ gid_t *test_gids;
+ size_t n_test_gids;
+ uid_t test_uid;
+ gid_t test_gid;
+ struct ucred ucred;
+ int pair[2];
+
+ if (geteuid() == 0) {
+ test_uid = 1;
+ test_gid = 2;
+ test_gids = (gid_t*) gids;
+ n_test_gids = ELEMENTSOF(gids);
+
+ assert_se(setgroups(n_test_gids, test_gids) >= 0);
+ assert_se(setresgid(test_gid, test_gid, test_gid) >= 0);
+ assert_se(setresuid(test_uid, test_uid, test_uid) >= 0);
+
+ } else {
+ long ngroups_max;
+
+ test_uid = getuid();
+ test_gid = getgid();
+
+ ngroups_max = sysconf(_SC_NGROUPS_MAX);
+ assert(ngroups_max > 0);
+
+ test_gids = newa(gid_t, ngroups_max);
+
+ r = getgroups(ngroups_max, test_gids);
+ assert_se(r >= 0);
+ n_test_gids = (size_t) r;
+ }
+
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, pair) >= 0);
+
+ assert_se(getpeercred(pair[0], &ucred) >= 0);
+
+ assert_se(ucred.uid == test_uid);
+ assert_se(ucred.gid == test_gid);
+ assert_se(ucred.pid == getpid_cached());
+
+ {
+ _cleanup_free_ gid_t *peer_groups = NULL;
+
+ r = getpeergroups(pair[0], &peer_groups);
+ assert_se(r >= 0 || IN_SET(r, -EOPNOTSUPP, -ENOPROTOOPT));
+
+ if (r >= 0) {
+ assert_se((size_t) r == n_test_gids);
+ assert_se(memcmp(peer_groups, test_gids, sizeof(gid_t) * n_test_gids) == 0);
+ }
+ }
+
+ safe_close_pair(pair);
+ _exit(EXIT_SUCCESS);
+ }
+}
+
+static void test_passfd_read(void) {
+ static const char file_contents[] = "test contents for passfd";
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) >= 0);
+
+ r = safe_fork("(passfd_read)", FORK_DEATHSIG|FORK_LOG|FORK_WAIT, NULL);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ /* Child */
+ char tmpfile[] = "/tmp/test-socket-util-passfd-read-XXXXXX";
+ _cleanup_close_ int tmpfd = -1;
+
+ pair[0] = safe_close(pair[0]);
+
+ tmpfd = mkostemp_safe(tmpfile);
+ assert_se(tmpfd >= 0);
+ assert_se(write(tmpfd, file_contents, strlen(file_contents)) == (ssize_t) strlen(file_contents));
+ tmpfd = safe_close(tmpfd);
+
+ tmpfd = open(tmpfile, O_RDONLY);
+ assert_se(tmpfd >= 0);
+ assert_se(unlink(tmpfile) == 0);
+
+ assert_se(send_one_fd(pair[1], tmpfd, MSG_DONTWAIT) == 0);
+ _exit(EXIT_SUCCESS);
+ }
+
+ /* Parent */
+ char buf[64];
+ struct iovec iov = IOVEC_INIT(buf, sizeof(buf)-1);
+ _cleanup_close_ int fd = -1;
+
+ pair[1] = safe_close(pair[1]);
+
+ assert_se(receive_one_fd_iov(pair[0], &iov, 1, MSG_DONTWAIT, &fd) == 0);
+
+ assert_se(fd >= 0);
+ r = read(fd, buf, sizeof(buf)-1);
+ assert_se(r >= 0);
+ buf[r] = 0;
+ assert_se(streq(buf, file_contents));
+}
+
+static void test_passfd_contents_read(void) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ static const char file_contents[] = "test contents in the file";
+ static const char wire_contents[] = "test contents on the wire";
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) >= 0);
+
+ r = safe_fork("(passfd_contents_read)", FORK_DEATHSIG|FORK_LOG|FORK_WAIT, NULL);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ /* Child */
+ struct iovec iov = IOVEC_INIT_STRING(wire_contents);
+ char tmpfile[] = "/tmp/test-socket-util-passfd-contents-read-XXXXXX";
+ _cleanup_close_ int tmpfd = -1;
+
+ pair[0] = safe_close(pair[0]);
+
+ tmpfd = mkostemp_safe(tmpfile);
+ assert_se(tmpfd >= 0);
+ assert_se(write(tmpfd, file_contents, strlen(file_contents)) == (ssize_t) strlen(file_contents));
+ tmpfd = safe_close(tmpfd);
+
+ tmpfd = open(tmpfile, O_RDONLY);
+ assert_se(tmpfd >= 0);
+ assert_se(unlink(tmpfile) == 0);
+
+ assert_se(send_one_fd_iov(pair[1], tmpfd, &iov, 1, MSG_DONTWAIT) > 0);
+ _exit(EXIT_SUCCESS);
+ }
+
+ /* Parent */
+ char buf[64];
+ struct iovec iov = IOVEC_INIT(buf, sizeof(buf)-1);
+ _cleanup_close_ int fd = -1;
+ ssize_t k;
+
+ pair[1] = safe_close(pair[1]);
+
+ k = receive_one_fd_iov(pair[0], &iov, 1, MSG_DONTWAIT, &fd);
+ assert_se(k > 0);
+ buf[k] = 0;
+ assert_se(streq(buf, wire_contents));
+
+ assert_se(fd >= 0);
+ r = read(fd, buf, sizeof(buf)-1);
+ assert_se(r >= 0);
+ buf[r] = 0;
+ assert_se(streq(buf, file_contents));
+}
+
+static void test_receive_nopassfd(void) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ static const char wire_contents[] = "no fd passed here";
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) >= 0);
+
+ r = safe_fork("(receive_nopassfd)", FORK_DEATHSIG|FORK_LOG|FORK_WAIT, NULL);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ /* Child */
+ struct iovec iov = IOVEC_INIT_STRING(wire_contents);
+
+ pair[0] = safe_close(pair[0]);
+
+ assert_se(send_one_fd_iov(pair[1], -1, &iov, 1, MSG_DONTWAIT) > 0);
+ _exit(EXIT_SUCCESS);
+ }
+
+ /* Parent */
+ char buf[64];
+ struct iovec iov = IOVEC_INIT(buf, sizeof(buf)-1);
+ int fd = -999;
+ ssize_t k;
+
+ pair[1] = safe_close(pair[1]);
+
+ k = receive_one_fd_iov(pair[0], &iov, 1, MSG_DONTWAIT, &fd);
+ assert_se(k > 0);
+ buf[k] = 0;
+ assert_se(streq(buf, wire_contents));
+
+ /* no fd passed here, confirm it was reset */
+ assert_se(fd == -1);
+}
+
+static void test_send_nodata_nofd(void) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) >= 0);
+
+ r = safe_fork("(send_nodata_nofd)", FORK_DEATHSIG|FORK_LOG|FORK_WAIT, NULL);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ /* Child */
+ pair[0] = safe_close(pair[0]);
+
+ assert_se(send_one_fd_iov(pair[1], -1, NULL, 0, MSG_DONTWAIT) == -EINVAL);
+ _exit(EXIT_SUCCESS);
+ }
+
+ /* Parent */
+ char buf[64];
+ struct iovec iov = IOVEC_INIT(buf, sizeof(buf)-1);
+ int fd = -999;
+ ssize_t k;
+
+ pair[1] = safe_close(pair[1]);
+
+ k = receive_one_fd_iov(pair[0], &iov, 1, MSG_DONTWAIT, &fd);
+ /* recvmsg() will return errno EAGAIN if nothing was sent */
+ assert_se(k == -EAGAIN);
+
+ /* receive_one_fd_iov returned error, so confirm &fd wasn't touched */
+ assert_se(fd == -999);
+}
+
+static void test_send_emptydata(void) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) >= 0);
+
+ r = safe_fork("(send_emptydata)", FORK_DEATHSIG|FORK_LOG|FORK_WAIT, NULL);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ /* Child */
+ struct iovec iov = IOVEC_INIT_STRING(""); /* zero-length iov */
+ assert_se(iov.iov_len == 0);
+
+ pair[0] = safe_close(pair[0]);
+
+ /* This will succeed, since iov is set. */
+ assert_se(send_one_fd_iov(pair[1], -1, &iov, 1, MSG_DONTWAIT) == 0);
+ _exit(EXIT_SUCCESS);
+ }
+
+ /* Parent */
+ char buf[64];
+ struct iovec iov = IOVEC_INIT(buf, sizeof(buf)-1);
+ int fd = -999;
+ ssize_t k;
+
+ pair[1] = safe_close(pair[1]);
+
+ k = receive_one_fd_iov(pair[0], &iov, 1, MSG_DONTWAIT, &fd);
+ /* receive_one_fd_iov() returns -EIO if an fd is not found and no data was returned. */
+ assert_se(k == -EIO);
+
+ /* receive_one_fd_iov returned error, so confirm &fd wasn't touched */
+ assert_se(fd == -999);
+}
+
+static void test_flush_accept(void) {
+ _cleanup_close_ int listen_stream = -1, listen_dgram = -1, listen_seqpacket = 1, connect_stream = -1, connect_dgram = -1, connect_seqpacket = -1;
+ static const union sockaddr_union sa = { .un.sun_family = AF_UNIX };
+ union sockaddr_union lsa;
+ socklen_t l;
+
+ listen_stream = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ assert_se(listen_stream >= 0);
+
+ listen_dgram = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ assert_se(listen_dgram >= 0);
+
+ listen_seqpacket = socket(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ assert_se(listen_seqpacket >= 0);
+
+ assert_se(flush_accept(listen_stream) < 0);
+ assert_se(flush_accept(listen_dgram) < 0);
+ assert_se(flush_accept(listen_seqpacket) < 0);
+
+ assert_se(bind(listen_stream, &sa.sa, sizeof(sa_family_t)) >= 0);
+ assert_se(bind(listen_dgram, &sa.sa, sizeof(sa_family_t)) >= 0);
+ assert_se(bind(listen_seqpacket, &sa.sa, sizeof(sa_family_t)) >= 0);
+
+ assert_se(flush_accept(listen_stream) < 0);
+ assert_se(flush_accept(listen_dgram) < 0);
+ assert_se(flush_accept(listen_seqpacket) < 0);
+
+ assert_se(listen(listen_stream, SOMAXCONN) >= 0);
+ assert_se(listen(listen_dgram, SOMAXCONN) < 0);
+ assert_se(listen(listen_seqpacket, SOMAXCONN) >= 0);
+
+ assert_se(flush_accept(listen_stream) >= 0);
+ assert_se(flush_accept(listen_dgram) < 0);
+ assert_se(flush_accept(listen_seqpacket) >= 0);
+
+ connect_stream = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ assert_se(connect_stream >= 0);
+
+ connect_dgram = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ assert_se(connect_dgram >= 0);
+
+ connect_seqpacket = socket(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ assert_se(connect_seqpacket >= 0);
+
+ l = sizeof(lsa);
+ assert_se(getsockname(listen_stream, &lsa.sa, &l) >= 0);
+ assert_se(connect(connect_stream, &lsa.sa, l) >= 0);
+
+ l = sizeof(lsa);
+ assert_se(getsockname(listen_dgram, &lsa.sa, &l) >= 0);
+ assert_se(connect(connect_dgram, &lsa.sa, l) >= 0);
+
+ l = sizeof(lsa);
+ assert_se(getsockname(listen_seqpacket, &lsa.sa, &l) >= 0);
+ assert_se(connect(connect_seqpacket, &lsa.sa, l) >= 0);
+
+ assert_se(flush_accept(listen_stream) >= 0);
+ assert_se(flush_accept(listen_dgram) < 0);
+ assert_se(flush_accept(listen_seqpacket) >= 0);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_ifname_valid();
+ test_socket_print_unix();
+ test_sockaddr_equal();
+ test_sockaddr_un_len();
+ test_in_addr_is_multicast();
+ test_getpeercred_getpeergroups();
+ test_passfd_read();
+ test_passfd_contents_read();
+ test_receive_nopassfd();
+ test_send_nodata_nofd();
+ test_send_emptydata();
+ test_flush_accept();
+
+ return 0;
+}
diff --git a/src/test/test-specifier.c b/src/test/test-specifier.c
new file mode 100644
index 0000000..2648c1c
--- /dev/null
+++ b/src/test/test-specifier.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "log.h"
+#include "specifier.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+
+static void test_specifier_escape_one(const char *a, const char *b) {
+ _cleanup_free_ char *x = NULL;
+
+ x = specifier_escape(a);
+ assert_se(streq_ptr(x, b));
+}
+
+static void test_specifier_escape(void) {
+ log_info("/* %s */", __func__);
+
+ test_specifier_escape_one(NULL, NULL);
+ test_specifier_escape_one("", "");
+ test_specifier_escape_one("%", "%%");
+ test_specifier_escape_one("foo bar", "foo bar");
+ test_specifier_escape_one("foo%bar", "foo%%bar");
+ test_specifier_escape_one("%%%%%", "%%%%%%%%%%");
+}
+
+static void test_specifier_escape_strv_one(char **a, char **b) {
+ _cleanup_strv_free_ char **x = NULL;
+
+ assert_se(specifier_escape_strv(a, &x) >= 0);
+ assert_se(strv_equal(x, b));
+}
+
+static void test_specifier_escape_strv(void) {
+ log_info("/* %s */", __func__);
+
+ test_specifier_escape_strv_one(NULL, NULL);
+ test_specifier_escape_strv_one(STRV_MAKE(NULL), STRV_MAKE(NULL));
+ test_specifier_escape_strv_one(STRV_MAKE(""), STRV_MAKE(""));
+ test_specifier_escape_strv_one(STRV_MAKE("foo"), STRV_MAKE("foo"));
+ test_specifier_escape_strv_one(STRV_MAKE("%"), STRV_MAKE("%%"));
+ test_specifier_escape_strv_one(STRV_MAKE("foo", "%", "foo%", "%foo", "foo%foo", "quux", "%%%"),
+ STRV_MAKE("foo", "%%", "foo%%", "%%foo", "foo%%foo", "quux", "%%%%%%"));
+}
+
+/* Any specifier functions which don't need an argument. */
+static const Specifier specifier_table[] = {
+ COMMON_SYSTEM_SPECIFIERS,
+
+ COMMON_CREDS_SPECIFIERS,
+ { 'h', specifier_user_home, NULL },
+
+ COMMON_TMP_SPECIFIERS,
+ {}
+};
+
+static void test_specifiers(void) {
+ log_info("/* %s */", __func__);
+
+ for (const Specifier *s = specifier_table; s->specifier; s++) {
+ char spec[3];
+ _cleanup_free_ char *resolved = NULL;
+
+ xsprintf(spec, "%%%c", s->specifier);
+
+ assert_se(specifier_printf(spec, specifier_table, NULL, &resolved) >= 0);
+
+ log_info("%%%c → %s", s->specifier, resolved);
+ }
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_specifier_escape();
+ test_specifier_escape_strv();
+ test_specifiers();
+
+ return 0;
+}
diff --git a/src/test/test-stat-util.c b/src/test/test-stat-util.c
new file mode 100644
index 0000000..9aca09c
--- /dev/null
+++ b/src/test/test-stat-util.c
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <linux/magic.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "mountpoint-util.h"
+#include "namespace-util.h"
+#include "path-util.h"
+#include "stat-util.h"
+#include "tmpfile-util.h"
+
+static void test_files_same(void) {
+ _cleanup_close_ int fd = -1;
+ char name[] = "/tmp/test-files_same.XXXXXX";
+ char name_alias[] = "/tmp/test-files_same.alias";
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(symlink(name, name_alias) >= 0);
+
+ assert_se(files_same(name, name, 0));
+ assert_se(files_same(name, name, AT_SYMLINK_NOFOLLOW));
+ assert_se(files_same(name, name_alias, 0));
+ assert_se(!files_same(name, name_alias, AT_SYMLINK_NOFOLLOW));
+
+ unlink(name);
+ unlink(name_alias);
+}
+
+static void test_is_symlink(void) {
+ char name[] = "/tmp/test-is_symlink.XXXXXX";
+ char name_link[] = "/tmp/test-is_symlink.link";
+ _cleanup_close_ int fd = -1;
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(symlink(name, name_link) >= 0);
+
+ assert_se(is_symlink(name) == 0);
+ assert_se(is_symlink(name_link) == 1);
+ assert_se(is_symlink("/a/file/which/does/not/exist/i/guess") < 0);
+
+ unlink(name);
+ unlink(name_link);
+}
+
+static void test_path_is_fs_type(void) {
+ /* run might not be a mount point in build chroots */
+ if (path_is_mount_point("/run", NULL, AT_SYMLINK_FOLLOW) > 0) {
+ assert_se(path_is_fs_type("/run", TMPFS_MAGIC) > 0);
+ assert_se(path_is_fs_type("/run", BTRFS_SUPER_MAGIC) == 0);
+ }
+ assert_se(path_is_fs_type("/proc", PROC_SUPER_MAGIC) > 0);
+ assert_se(path_is_fs_type("/proc", BTRFS_SUPER_MAGIC) == 0);
+ assert_se(path_is_fs_type("/i-dont-exist", BTRFS_SUPER_MAGIC) == -ENOENT);
+}
+
+static void test_path_is_temporary_fs(void) {
+ /* run might not be a mount point in build chroots */
+ if (path_is_mount_point("/run", NULL, AT_SYMLINK_FOLLOW) > 0)
+ assert_se(path_is_temporary_fs("/run") > 0);
+ assert_se(path_is_temporary_fs("/proc") == 0);
+ assert_se(path_is_temporary_fs("/i-dont-exist") == -ENOENT);
+}
+
+static void test_fd_is_network_ns(void) {
+ _cleanup_close_ int fd = -1;
+ assert_se(fd_is_network_ns(STDIN_FILENO) == 0);
+ assert_se(fd_is_network_ns(STDERR_FILENO) == 0);
+ assert_se(fd_is_network_ns(STDOUT_FILENO) == 0);
+
+ assert_se((fd = open("/proc/self/ns/mnt", O_CLOEXEC|O_RDONLY)) >= 0);
+ assert_se(IN_SET(fd_is_network_ns(fd), 0, -EUCLEAN));
+ fd = safe_close(fd);
+
+ assert_se((fd = open("/proc/self/ns/net", O_CLOEXEC|O_RDONLY)) >= 0);
+ assert_se(IN_SET(fd_is_network_ns(fd), 1, -EUCLEAN));
+}
+
+static void test_device_major_minor_valid(void) {
+ /* on glibc dev_t is 64bit, even though in the kernel it is only 32bit */
+ assert_cc(sizeof(dev_t) == sizeof(uint64_t));
+
+ assert_se(DEVICE_MAJOR_VALID(0U));
+ assert_se(DEVICE_MINOR_VALID(0U));
+
+ assert_se(DEVICE_MAJOR_VALID(1U));
+ assert_se(DEVICE_MINOR_VALID(1U));
+
+ assert_se(!DEVICE_MAJOR_VALID(-1U));
+ assert_se(!DEVICE_MINOR_VALID(-1U));
+
+ assert_se(DEVICE_MAJOR_VALID(1U << 10));
+ assert_se(DEVICE_MINOR_VALID(1U << 10));
+
+ assert_se(DEVICE_MAJOR_VALID((1U << 12) - 1));
+ assert_se(DEVICE_MINOR_VALID((1U << 20) - 1));
+
+ assert_se(!DEVICE_MAJOR_VALID((1U << 12)));
+ assert_se(!DEVICE_MINOR_VALID((1U << 20)));
+
+ assert_se(!DEVICE_MAJOR_VALID(1U << 25));
+ assert_se(!DEVICE_MINOR_VALID(1U << 25));
+
+ assert_se(!DEVICE_MAJOR_VALID(UINT32_MAX));
+ assert_se(!DEVICE_MINOR_VALID(UINT32_MAX));
+
+ assert_se(!DEVICE_MAJOR_VALID(UINT64_MAX));
+ assert_se(!DEVICE_MINOR_VALID(UINT64_MAX));
+
+ assert_se(DEVICE_MAJOR_VALID(major(0)));
+ assert_se(DEVICE_MINOR_VALID(minor(0)));
+}
+
+static void test_device_path_make_canonical_one(const char *path) {
+ _cleanup_free_ char *resolved = NULL, *raw = NULL;
+ struct stat st;
+ dev_t devno;
+ mode_t mode;
+ int r;
+
+ assert_se(stat(path, &st) >= 0);
+ r = device_path_make_canonical(st.st_mode, st.st_rdev, &resolved);
+ if (r == -ENOENT) /* maybe /dev/char/x:y and /dev/block/x:y are missing in this test environment, because we
+ * run in a container or so? */
+ return;
+
+ assert_se(r >= 0);
+ assert_se(path_equal(path, resolved));
+
+ assert_se(device_path_make_major_minor(st.st_mode, st.st_rdev, &raw) >= 0);
+ assert_se(device_path_parse_major_minor(raw, &mode, &devno) >= 0);
+
+ assert_se(st.st_rdev == devno);
+ assert_se((st.st_mode & S_IFMT) == (mode & S_IFMT));
+}
+
+static void test_device_path_make_canonical(void) {
+
+ test_device_path_make_canonical_one("/dev/null");
+ test_device_path_make_canonical_one("/dev/zero");
+ test_device_path_make_canonical_one("/dev/full");
+ test_device_path_make_canonical_one("/dev/random");
+ test_device_path_make_canonical_one("/dev/urandom");
+ test_device_path_make_canonical_one("/dev/tty");
+
+ if (is_device_node("/run/systemd/inaccessible/blk") > 0) {
+ test_device_path_make_canonical_one("/run/systemd/inaccessible/chr");
+ test_device_path_make_canonical_one("/run/systemd/inaccessible/blk");
+ }
+}
+
+int main(int argc, char *argv[]) {
+ test_files_same();
+ test_is_symlink();
+ test_path_is_fs_type();
+ test_path_is_temporary_fs();
+ test_fd_is_network_ns();
+ test_device_major_minor_valid();
+ test_device_path_make_canonical();
+
+ return 0;
+}
diff --git a/src/test/test-static-destruct.c b/src/test/test-static-destruct.c
new file mode 100644
index 0000000..904a9bb
--- /dev/null
+++ b/src/test/test-static-destruct.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "static-destruct.h"
+#include "tests.h"
+
+static int foo = 0;
+static int bar = 0;
+static int baz = 0;
+static char* memory = NULL;
+
+static void test_destroy(int *b) {
+ (*b)++;
+}
+
+STATIC_DESTRUCTOR_REGISTER(foo, test_destroy);
+STATIC_DESTRUCTOR_REGISTER(bar, test_destroy);
+STATIC_DESTRUCTOR_REGISTER(bar, test_destroy);
+STATIC_DESTRUCTOR_REGISTER(baz, test_destroy);
+STATIC_DESTRUCTOR_REGISTER(baz, test_destroy);
+STATIC_DESTRUCTOR_REGISTER(baz, test_destroy);
+STATIC_DESTRUCTOR_REGISTER(memory, freep);
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_INFO);
+
+ assert_se(memory = strdup("hallo"));
+
+ assert_se(foo == 0 && bar == 0 && baz == 0);
+ static_destruct();
+ assert_se(foo == 1 && bar == 2 && baz == 3);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-strbuf.c b/src/test/test-strbuf.c
new file mode 100644
index 0000000..867be19
--- /dev/null
+++ b/src/test/test-strbuf.c
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdlib.h>
+
+#include "strbuf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+static ssize_t add_string(struct strbuf *sb, const char *s) {
+ return strbuf_add_string(sb, s, strlen(s));
+}
+
+static void test_strbuf(void) {
+ _cleanup_(strbuf_cleanupp) struct strbuf *sb;
+ _cleanup_strv_free_ char **l;
+ ssize_t a, b, c, d, e, f, g, h;
+
+ sb = strbuf_new();
+
+ a = add_string(sb, "waldo");
+ b = add_string(sb, "foo");
+ c = add_string(sb, "bar");
+ d = add_string(sb, "waldo"); /* duplicate */
+ e = add_string(sb, "aldo"); /* duplicate */
+ f = add_string(sb, "do"); /* duplicate */
+ g = add_string(sb, "waldorf"); /* not a duplicate: matches from tail */
+ h = add_string(sb, "");
+
+ /* check the content of the buffer directly */
+ l = strv_parse_nulstr(sb->buf, sb->len);
+
+ assert_se(streq(l[0], "")); /* root */
+ assert_se(streq(l[1], "waldo"));
+ assert_se(streq(l[2], "foo"));
+ assert_se(streq(l[3], "bar"));
+ assert_se(streq(l[4], "waldorf"));
+ assert_se(l[5] == NULL);
+
+ assert_se(sb->nodes_count == 5); /* root + 4 non-duplicates */
+ assert_se(sb->dedup_count == 4);
+ assert_se(sb->in_count == 8);
+
+ assert_se(sb->in_len == 29); /* length of all strings added */
+ assert_se(sb->dedup_len == 11); /* length of all strings duplicated */
+ assert_se(sb->len == 23); /* buffer length: in - dedup + \0 for each node */
+
+ /* check the returned offsets and the respective content in the buffer */
+ assert_se(a == 1);
+ assert_se(b == 7);
+ assert_se(c == 11);
+ assert_se(d == 1);
+ assert_se(e == 2);
+ assert_se(f == 4);
+ assert_se(g == 15);
+ assert_se(h == 0);
+
+ assert_se(streq(sb->buf + a, "waldo"));
+ assert_se(streq(sb->buf + b, "foo"));
+ assert_se(streq(sb->buf + c, "bar"));
+ assert_se(streq(sb->buf + d, "waldo"));
+ assert_se(streq(sb->buf + e, "aldo"));
+ assert_se(streq(sb->buf + f, "do"));
+ assert_se(streq(sb->buf + g, "waldorf"));
+ assert_se(streq(sb->buf + h, ""));
+
+ strbuf_complete(sb);
+ assert_se(sb->root == NULL);
+}
+
+int main(int argc, const char *argv[]) {
+ test_strbuf();
+
+ return 0;
+}
diff --git a/src/test/test-string-util.c b/src/test/test-string-util.c
new file mode 100644
index 0000000..b74eb18
--- /dev/null
+++ b/src/test/test-string-util.c
@@ -0,0 +1,928 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "utf8.h"
+#include "util.h"
+
+static void test_string_erase(void) {
+ log_info("/* %s */", __func__);
+
+ char *x;
+ x = strdupa("");
+ assert_se(streq(string_erase(x), ""));
+
+ x = strdupa("1");
+ assert_se(streq(string_erase(x), ""));
+
+ x = strdupa("123456789");
+ assert_se(streq(string_erase(x), ""));
+
+ assert_se(x[1] == '\0');
+ assert_se(x[2] == '\0');
+ assert_se(x[3] == '\0');
+ assert_se(x[4] == '\0');
+ assert_se(x[5] == '\0');
+ assert_se(x[6] == '\0');
+ assert_se(x[7] == '\0');
+ assert_se(x[8] == '\0');
+ assert_se(x[9] == '\0');
+}
+
+static void test_free_and_strndup_one(char **t, const char *src, size_t l, const char *expected, bool change) {
+ log_debug("%s: \"%s\", \"%s\", %zd (expect \"%s\", %s)",
+ __func__, strnull(*t), strnull(src), l, strnull(expected), yes_no(change));
+
+ int r = free_and_strndup(t, src, l);
+ assert_se(streq_ptr(*t, expected));
+ assert_se(r == change); /* check that change occurs only when necessary */
+}
+
+static void test_free_and_strndup(void) {
+ log_info("/* %s */", __func__);
+
+ static const struct test_case {
+ const char *src;
+ size_t len;
+ const char *expected;
+ } cases[] = {
+ {"abc", 0, ""},
+ {"abc", 0, ""},
+ {"abc", 1, "a"},
+ {"abc", 2, "ab"},
+ {"abc", 3, "abc"},
+ {"abc", 4, "abc"},
+ {"abc", 5, "abc"},
+ {"abc", 5, "abc"},
+ {"abc", 4, "abc"},
+ {"abc", 3, "abc"},
+ {"abc", 2, "ab"},
+ {"abc", 1, "a"},
+ {"abc", 0, ""},
+
+ {"", 0, ""},
+ {"", 1, ""},
+ {"", 2, ""},
+ {"", 0, ""},
+ {"", 1, ""},
+ {"", 2, ""},
+ {"", 2, ""},
+ {"", 1, ""},
+ {"", 0, ""},
+
+ {NULL, 0, NULL},
+
+ {"foo", 3, "foo"},
+ {"foobar", 6, "foobar"},
+ };
+
+ _cleanup_free_ char *t = NULL;
+ const char *prev_expected = t;
+
+ for (unsigned i = 0; i < ELEMENTSOF(cases); i++) {
+ test_free_and_strndup_one(&t,
+ cases[i].src, cases[i].len, cases[i].expected,
+ !streq_ptr(cases[i].expected, prev_expected));
+ prev_expected = t;
+ }
+}
+
+static void test_ascii_strcasecmp_n(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(ascii_strcasecmp_n("", "", 0) == 0);
+ assert_se(ascii_strcasecmp_n("", "", 1) == 0);
+ assert_se(ascii_strcasecmp_n("", "a", 1) < 0);
+ assert_se(ascii_strcasecmp_n("", "a", 2) < 0);
+ assert_se(ascii_strcasecmp_n("a", "", 1) > 0);
+ assert_se(ascii_strcasecmp_n("a", "", 2) > 0);
+ assert_se(ascii_strcasecmp_n("a", "a", 1) == 0);
+ assert_se(ascii_strcasecmp_n("a", "a", 2) == 0);
+ assert_se(ascii_strcasecmp_n("a", "b", 1) < 0);
+ assert_se(ascii_strcasecmp_n("a", "b", 2) < 0);
+ assert_se(ascii_strcasecmp_n("b", "a", 1) > 0);
+ assert_se(ascii_strcasecmp_n("b", "a", 2) > 0);
+ assert_se(ascii_strcasecmp_n("xxxxyxxxx", "xxxxYxxxx", 9) == 0);
+ assert_se(ascii_strcasecmp_n("xxxxxxxxx", "xxxxyxxxx", 9) < 0);
+ assert_se(ascii_strcasecmp_n("xxxxXxxxx", "xxxxyxxxx", 9) < 0);
+ assert_se(ascii_strcasecmp_n("xxxxxxxxx", "xxxxYxxxx", 9) < 0);
+ assert_se(ascii_strcasecmp_n("xxxxXxxxx", "xxxxYxxxx", 9) < 0);
+
+ assert_se(ascii_strcasecmp_n("xxxxYxxxx", "xxxxYxxxx", 9) == 0);
+ assert_se(ascii_strcasecmp_n("xxxxyxxxx", "xxxxxxxxx", 9) > 0);
+ assert_se(ascii_strcasecmp_n("xxxxyxxxx", "xxxxXxxxx", 9) > 0);
+ assert_se(ascii_strcasecmp_n("xxxxYxxxx", "xxxxxxxxx", 9) > 0);
+ assert_se(ascii_strcasecmp_n("xxxxYxxxx", "xxxxXxxxx", 9) > 0);
+}
+
+static void test_ascii_strcasecmp_nn(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(ascii_strcasecmp_nn("", 0, "", 0) == 0);
+ assert_se(ascii_strcasecmp_nn("", 0, "", 1) < 0);
+ assert_se(ascii_strcasecmp_nn("", 1, "", 0) > 0);
+ assert_se(ascii_strcasecmp_nn("", 1, "", 1) == 0);
+
+ assert_se(ascii_strcasecmp_nn("aaaa", 4, "aaAa", 4) == 0);
+ assert_se(ascii_strcasecmp_nn("aaa", 3, "aaAa", 4) < 0);
+ assert_se(ascii_strcasecmp_nn("aaa", 4, "aaAa", 4) < 0);
+ assert_se(ascii_strcasecmp_nn("aaaa", 4, "aaA", 3) > 0);
+ assert_se(ascii_strcasecmp_nn("aaaa", 4, "AAA", 4) > 0);
+
+ assert_se(ascii_strcasecmp_nn("aaaa", 4, "bbbb", 4) < 0);
+ assert_se(ascii_strcasecmp_nn("aaAA", 4, "BBbb", 4) < 0);
+ assert_se(ascii_strcasecmp_nn("BBbb", 4, "aaaa", 4) > 0);
+}
+
+static void test_cellescape(void) {
+ char buf[40];
+
+ log_info("/* %s */", __func__);
+
+ assert_se(streq(cellescape(buf, 1, ""), ""));
+ assert_se(streq(cellescape(buf, 1, "1"), ""));
+ assert_se(streq(cellescape(buf, 1, "12"), ""));
+
+ assert_se(streq(cellescape(buf, 2, ""), ""));
+ assert_se(streq(cellescape(buf, 2, "1"), "1"));
+ assert_se(streq(cellescape(buf, 2, "12"), "."));
+ assert_se(streq(cellescape(buf, 2, "123"), "."));
+
+ assert_se(streq(cellescape(buf, 3, ""), ""));
+ assert_se(streq(cellescape(buf, 3, "1"), "1"));
+ assert_se(streq(cellescape(buf, 3, "12"), "12"));
+ assert_se(streq(cellescape(buf, 3, "123"), ".."));
+ assert_se(streq(cellescape(buf, 3, "1234"), ".."));
+
+ assert_se(streq(cellescape(buf, 4, ""), ""));
+ assert_se(streq(cellescape(buf, 4, "1"), "1"));
+ assert_se(streq(cellescape(buf, 4, "12"), "12"));
+ assert_se(streq(cellescape(buf, 4, "123"), "123"));
+ assert_se(streq(cellescape(buf, 4, "1234"), is_locale_utf8() ? "…" : "..."));
+ assert_se(streq(cellescape(buf, 4, "12345"), is_locale_utf8() ? "…" : "..."));
+
+ assert_se(streq(cellescape(buf, 5, ""), ""));
+ assert_se(streq(cellescape(buf, 5, "1"), "1"));
+ assert_se(streq(cellescape(buf, 5, "12"), "12"));
+ assert_se(streq(cellescape(buf, 5, "123"), "123"));
+ assert_se(streq(cellescape(buf, 5, "1234"), "1234"));
+ assert_se(streq(cellescape(buf, 5, "12345"), is_locale_utf8() ? "1…" : "1..."));
+ assert_se(streq(cellescape(buf, 5, "123456"), is_locale_utf8() ? "1…" : "1..."));
+
+ assert_se(streq(cellescape(buf, 1, "\020"), ""));
+ assert_se(streq(cellescape(buf, 2, "\020"), "."));
+ assert_se(streq(cellescape(buf, 3, "\020"), ".."));
+ assert_se(streq(cellescape(buf, 4, "\020"), "…"));
+ assert_se(streq(cellescape(buf, 5, "\020"), "\\020"));
+
+ assert_se(streq(cellescape(buf, 5, "1234\020"), "1…"));
+ assert_se(streq(cellescape(buf, 6, "1234\020"), "12…"));
+ assert_se(streq(cellescape(buf, 7, "1234\020"), "123…"));
+ assert_se(streq(cellescape(buf, 8, "1234\020"), "1234…"));
+ assert_se(streq(cellescape(buf, 9, "1234\020"), "1234\\020"));
+
+ assert_se(streq(cellescape(buf, 1, "\t\n"), ""));
+ assert_se(streq(cellescape(buf, 2, "\t\n"), "."));
+ assert_se(streq(cellescape(buf, 3, "\t\n"), ".."));
+ assert_se(streq(cellescape(buf, 4, "\t\n"), "…"));
+ assert_se(streq(cellescape(buf, 5, "\t\n"), "\\t\\n"));
+
+ assert_se(streq(cellescape(buf, 5, "1234\t\n"), "1…"));
+ assert_se(streq(cellescape(buf, 6, "1234\t\n"), "12…"));
+ assert_se(streq(cellescape(buf, 7, "1234\t\n"), "123…"));
+ assert_se(streq(cellescape(buf, 8, "1234\t\n"), "1234…"));
+ assert_se(streq(cellescape(buf, 9, "1234\t\n"), "1234\\t\\n"));
+
+ assert_se(streq(cellescape(buf, 4, "x\t\020\n"), "…"));
+ assert_se(streq(cellescape(buf, 5, "x\t\020\n"), "x…"));
+ assert_se(streq(cellescape(buf, 6, "x\t\020\n"), "x…"));
+ assert_se(streq(cellescape(buf, 7, "x\t\020\n"), "x\\t…"));
+ assert_se(streq(cellescape(buf, 8, "x\t\020\n"), "x\\t…"));
+ assert_se(streq(cellescape(buf, 9, "x\t\020\n"), "x\\t…"));
+ assert_se(streq(cellescape(buf, 10, "x\t\020\n"), "x\\t\\020\\n"));
+
+ assert_se(streq(cellescape(buf, 6, "1\011"), "1\\t"));
+ assert_se(streq(cellescape(buf, 6, "1\020"), "1\\020"));
+ assert_se(streq(cellescape(buf, 6, "1\020x"), is_locale_utf8() ? "1…" : "1..."));
+
+ assert_se(streq(cellescape(buf, 40, "1\020"), "1\\020"));
+ assert_se(streq(cellescape(buf, 40, "1\020x"), "1\\020x"));
+
+ assert_se(streq(cellescape(buf, 40, "\a\b\f\n\r\t\v\\\"'"), "\\a\\b\\f\\n\\r\\t\\v\\\\\\\"\\'"));
+ assert_se(streq(cellescape(buf, 6, "\a\b\f\n\r\t\v\\\"'"), is_locale_utf8() ? "\\a…" : "\\a..."));
+ assert_se(streq(cellescape(buf, 7, "\a\b\f\n\r\t\v\\\"'"), is_locale_utf8() ? "\\a…" : "\\a..."));
+ assert_se(streq(cellescape(buf, 8, "\a\b\f\n\r\t\v\\\"'"), is_locale_utf8() ? "\\a\\b…" : "\\a\\b..."));
+
+ assert_se(streq(cellescape(buf, sizeof buf, "1\020"), "1\\020"));
+ assert_se(streq(cellescape(buf, sizeof buf, "1\020x"), "1\\020x"));
+}
+
+static void test_streq_ptr(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(streq_ptr(NULL, NULL));
+ assert_se(!streq_ptr("abc", "cdef"));
+}
+
+static void test_strstrip(void) {
+ log_info("/* %s */", __func__);
+
+ char *ret, input[] = " hello, waldo. ";
+
+ ret = strstrip(input);
+ assert_se(streq(ret, "hello, waldo."));
+}
+
+static void test_strextend(void) {
+ log_info("/* %s */", __func__);
+
+ _cleanup_free_ char *str = NULL;
+
+ assert_se(strextend(&str, NULL));
+ assert_se(streq_ptr(str, ""));
+ assert_se(strextend(&str, "", "0", "", "", "123", NULL));
+ assert_se(streq_ptr(str, "0123"));
+ assert_se(strextend(&str, "456", "78", "9", NULL));
+ assert_se(streq_ptr(str, "0123456789"));
+}
+
+static void test_strextend_with_separator(void) {
+ log_info("/* %s */", __func__);
+
+ _cleanup_free_ char *str = NULL;
+
+ assert_se(strextend_with_separator(&str, NULL, NULL));
+ assert_se(streq_ptr(str, ""));
+ str = mfree(str);
+
+ assert_se(strextend_with_separator(&str, "...", NULL));
+ assert_se(streq_ptr(str, ""));
+ assert_se(strextend_with_separator(&str, "...", NULL));
+ assert_se(streq_ptr(str, ""));
+ str = mfree(str);
+
+ assert_se(strextend_with_separator(&str, "xyz", "a", "bb", "ccc", NULL));
+ assert_se(streq_ptr(str, "axyzbbxyzccc"));
+ str = mfree(str);
+
+ assert_se(strextend_with_separator(&str, ",", "start", "", "1", "234", NULL));
+ assert_se(streq_ptr(str, "start,,1,234"));
+ assert_se(strextend_with_separator(&str, ";", "more", "5", "678", NULL));
+ assert_se(streq_ptr(str, "start,,1,234;more;5;678"));
+}
+
+static void test_strrep(void) {
+ log_info("/* %s */", __func__);
+
+ _cleanup_free_ char *one, *three, *zero;
+ one = strrep("waldo", 1);
+ three = strrep("waldo", 3);
+ zero = strrep("waldo", 0);
+
+ assert_se(streq(one, "waldo"));
+ assert_se(streq(three, "waldowaldowaldo"));
+ assert_se(streq(zero, ""));
+}
+
+static void test_string_has_cc(void) {
+ assert_se(string_has_cc("abc\1", NULL));
+ assert_se(string_has_cc("abc\x7f", NULL));
+ assert_se(string_has_cc("abc\x7f", NULL));
+ assert_se(string_has_cc("abc\t\x7f", "\t"));
+ assert_se(string_has_cc("abc\t\x7f", "\t"));
+ assert_se(string_has_cc("\x7f", "\t"));
+ assert_se(string_has_cc("\x7f", "\t\a"));
+
+ assert_se(!string_has_cc("abc\t\t", "\t"));
+ assert_se(!string_has_cc("abc\t\t\a", "\t\a"));
+ assert_se(!string_has_cc("a\ab\tc", "\t\a"));
+}
+
+static void test_ascii_strlower(void) {
+ log_info("/* %s */", __func__);
+
+ char a[] = "AabBcC Jk Ii Od LKJJJ kkd LK";
+ assert_se(streq(ascii_strlower(a), "aabbcc jk ii od lkjjj kkd lk"));
+}
+
+static void test_strshorten(void) {
+ log_info("/* %s */", __func__);
+
+ char s[] = "foobar";
+
+ assert_se(strlen(strshorten(s, 6)) == 6);
+ assert_se(strlen(strshorten(s, 12)) == 6);
+ assert_se(strlen(strshorten(s, 2)) == 2);
+ assert_se(strlen(strshorten(s, 0)) == 0);
+}
+
+static void test_strjoina(void) {
+ log_info("/* %s */", __func__);
+
+ char *actual;
+
+ actual = strjoina("", "foo", "bar");
+ assert_se(streq(actual, "foobar"));
+
+ actual = strjoina("foo", "bar", "baz");
+ assert_se(streq(actual, "foobarbaz"));
+
+ actual = strjoina("foo", "", "bar", "baz");
+ assert_se(streq(actual, "foobarbaz"));
+
+ actual = strjoina("foo");
+ assert_se(streq(actual, "foo"));
+
+ actual = strjoina(NULL);
+ assert_se(streq(actual, ""));
+
+ actual = strjoina(NULL, "foo");
+ assert_se(streq(actual, ""));
+
+ actual = strjoina("foo", NULL, "bar");
+ assert_se(streq(actual, "foo"));
+}
+
+static void test_strjoin(void) {
+ char *actual;
+
+ actual = strjoin("", "foo", "bar");
+ assert_se(streq(actual, "foobar"));
+ mfree(actual);
+
+ actual = strjoin("foo", "bar", "baz");
+ assert_se(streq(actual, "foobarbaz"));
+ mfree(actual);
+
+ actual = strjoin("foo", "", "bar", "baz");
+ assert_se(streq(actual, "foobarbaz"));
+ mfree(actual);
+
+ actual = strjoin("foo", NULL);
+ assert_se(streq(actual, "foo"));
+ mfree(actual);
+
+ actual = strjoin(NULL, NULL);
+ assert_se(streq(actual, ""));
+ mfree(actual);
+
+ actual = strjoin(NULL, "foo");
+ assert_se(streq(actual, ""));
+ mfree(actual);
+
+ actual = strjoin("foo", NULL, "bar");
+ assert_se(streq(actual, "foo"));
+ mfree(actual);
+}
+
+static void test_strcmp_ptr(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(strcmp_ptr(NULL, NULL) == 0);
+ assert_se(strcmp_ptr("", NULL) > 0);
+ assert_se(strcmp_ptr("foo", NULL) > 0);
+ assert_se(strcmp_ptr(NULL, "") < 0);
+ assert_se(strcmp_ptr(NULL, "bar") < 0);
+ assert_se(strcmp_ptr("foo", "bar") > 0);
+ assert_se(strcmp_ptr("bar", "baz") < 0);
+ assert_se(strcmp_ptr("foo", "foo") == 0);
+ assert_se(strcmp_ptr("", "") == 0);
+}
+
+static void test_foreach_word(void) {
+ log_info("/* %s */", __func__);
+
+ const char *test = "test abc d\te f ";
+ const char * const expected[] = {
+ "test",
+ "abc",
+ "d",
+ "e",
+ "f",
+ };
+
+ size_t i = 0;
+ int r;
+ for (const char *p = test;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0) {
+ assert_se(i == ELEMENTSOF(expected));
+ break;
+ }
+ assert_se(r > 0);
+
+ assert_se(streq(expected[i++], word));
+ }
+}
+
+static void check(const char *test, char** expected, bool trailing) {
+ size_t i = 0;
+ int r;
+
+ printf("<<<%s>>>\n", test);
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&test, &word, NULL, EXTRACT_UNQUOTE);
+ if (r == 0) {
+ assert_se(!trailing);
+ break;
+ } else if (r < 0) {
+ assert_se(trailing);
+ break;
+ }
+
+ assert_se(streq(word, expected[i++]));
+ printf("<%s>\n", word);
+ }
+ assert_se(expected[i] == NULL);
+}
+
+static void test_foreach_word_quoted(void) {
+ log_info("/* %s */", __func__);
+
+ check("test a b c 'd' e '' '' hhh '' '' \"a b c\"",
+ STRV_MAKE("test",
+ "a",
+ "b",
+ "c",
+ "d",
+ "e",
+ "",
+ "",
+ "hhh",
+ "",
+ "",
+ "a b c"),
+ false);
+
+ check("test \"xxx",
+ STRV_MAKE("test"),
+ true);
+
+ check("test\\",
+ STRV_MAKE_EMPTY,
+ true);
+}
+
+static void test_endswith(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(endswith("foobar", "bar"));
+ assert_se(endswith("foobar", ""));
+ assert_se(endswith("foobar", "foobar"));
+ assert_se(endswith("", ""));
+
+ assert_se(!endswith("foobar", "foo"));
+ assert_se(!endswith("foobar", "foobarfoofoo"));
+}
+
+static void test_endswith_no_case(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(endswith_no_case("fooBAR", "bar"));
+ assert_se(endswith_no_case("foobar", ""));
+ assert_se(endswith_no_case("foobar", "FOOBAR"));
+ assert_se(endswith_no_case("", ""));
+
+ assert_se(!endswith_no_case("foobar", "FOO"));
+ assert_se(!endswith_no_case("foobar", "FOOBARFOOFOO"));
+}
+
+static void test_delete_chars(void) {
+ log_info("/* %s */", __func__);
+
+ char *s, input[] = " hello, waldo. abc";
+
+ s = delete_chars(input, WHITESPACE);
+ assert_se(streq(s, "hello,waldo.abc"));
+ assert_se(s == input);
+}
+
+static void test_delete_trailing_chars(void) {
+ log_info("/* %s */", __func__);
+
+ char *s,
+ input1[] = " \n \r k \n \r ",
+ input2[] = "kkkkthiskkkiskkkaktestkkk",
+ input3[] = "abcdef";
+
+ s = delete_trailing_chars(input1, WHITESPACE);
+ assert_se(streq(s, " \n \r k"));
+ assert_se(s == input1);
+
+ s = delete_trailing_chars(input2, "kt");
+ assert_se(streq(s, "kkkkthiskkkiskkkaktes"));
+ assert_se(s == input2);
+
+ s = delete_trailing_chars(input3, WHITESPACE);
+ assert_se(streq(s, "abcdef"));
+ assert_se(s == input3);
+
+ s = delete_trailing_chars(input3, "fe");
+ assert_se(streq(s, "abcd"));
+ assert_se(s == input3);
+}
+
+static void test_delete_trailing_slashes(void) {
+ log_info("/* %s */", __func__);
+
+ char s1[] = "foobar//",
+ s2[] = "foobar/",
+ s3[] = "foobar",
+ s4[] = "";
+
+ assert_se(streq(delete_trailing_chars(s1, "_"), "foobar//"));
+ assert_se(streq(delete_trailing_chars(s1, "/"), "foobar"));
+ assert_se(streq(delete_trailing_chars(s2, "/"), "foobar"));
+ assert_se(streq(delete_trailing_chars(s3, "/"), "foobar"));
+ assert_se(streq(delete_trailing_chars(s4, "/"), ""));
+}
+
+static void test_skip_leading_chars(void) {
+ log_info("/* %s */", __func__);
+
+ char input1[] = " \n \r k \n \r ",
+ input2[] = "kkkkthiskkkiskkkaktestkkk",
+ input3[] = "abcdef";
+
+ assert_se(streq(skip_leading_chars(input1, WHITESPACE), "k \n \r "));
+ assert_se(streq(skip_leading_chars(input2, "k"), "thiskkkiskkkaktestkkk"));
+ assert_se(streq(skip_leading_chars(input2, "tk"), "hiskkkiskkkaktestkkk"));
+ assert_se(streq(skip_leading_chars(input3, WHITESPACE), "abcdef"));
+ assert_se(streq(skip_leading_chars(input3, "bcaef"), "def"));
+}
+
+static void test_in_charset(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(in_charset("dddaaabbbcccc", "abcd"));
+ assert_se(!in_charset("dddaaabbbcccc", "abc f"));
+}
+
+static void test_split_pair(void) {
+ log_info("/* %s */", __func__);
+
+ _cleanup_free_ char *a = NULL, *b = NULL;
+
+ assert_se(split_pair("", "", &a, &b) == -EINVAL);
+ assert_se(split_pair("foo=bar", "", &a, &b) == -EINVAL);
+ assert_se(split_pair("", "=", &a, &b) == -EINVAL);
+ assert_se(split_pair("foo=bar", "=", &a, &b) >= 0);
+ assert_se(streq(a, "foo"));
+ assert_se(streq(b, "bar"));
+ free(a);
+ free(b);
+ assert_se(split_pair("==", "==", &a, &b) >= 0);
+ assert_se(streq(a, ""));
+ assert_se(streq(b, ""));
+ free(a);
+ free(b);
+
+ assert_se(split_pair("===", "==", &a, &b) >= 0);
+ assert_se(streq(a, ""));
+ assert_se(streq(b, "="));
+}
+
+static void test_first_word(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(first_word("Hello", ""));
+ assert_se(first_word("Hello", "Hello"));
+ assert_se(first_word("Hello world", "Hello"));
+ assert_se(first_word("Hello\tworld", "Hello"));
+ assert_se(first_word("Hello\nworld", "Hello"));
+ assert_se(first_word("Hello\rworld", "Hello"));
+ assert_se(first_word("Hello ", "Hello"));
+
+ assert_se(!first_word("Hello", "Hellooo"));
+ assert_se(!first_word("Hello", "xxxxx"));
+ assert_se(!first_word("Hellooo", "Hello"));
+}
+
+static void test_strlen_ptr(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(strlen_ptr("foo") == 3);
+ assert_se(strlen_ptr("") == 0);
+ assert_se(strlen_ptr(NULL) == 0);
+}
+
+static void test_memory_startswith(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(streq(memory_startswith("", 0, ""), ""));
+ assert_se(streq(memory_startswith("", 1, ""), ""));
+ assert_se(streq(memory_startswith("x", 2, ""), "x"));
+ assert_se(!memory_startswith("", 1, "x"));
+ assert_se(!memory_startswith("", 1, "xxxxxxxx"));
+ assert_se(streq(memory_startswith("xxx", 4, "x"), "xx"));
+ assert_se(streq(memory_startswith("xxx", 4, "xx"), "x"));
+ assert_se(streq(memory_startswith("xxx", 4, "xxx"), ""));
+ assert_se(!memory_startswith("xxx", 4, "xxxx"));
+}
+
+static void test_memory_startswith_no_case(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(streq(memory_startswith_no_case("", 0, ""), ""));
+ assert_se(streq(memory_startswith_no_case("", 1, ""), ""));
+ assert_se(streq(memory_startswith_no_case("x", 2, ""), "x"));
+ assert_se(streq(memory_startswith_no_case("X", 2, ""), "X"));
+ assert_se(!memory_startswith_no_case("", 1, "X"));
+ assert_se(!memory_startswith_no_case("", 1, "xxxxXXXX"));
+ assert_se(streq(memory_startswith_no_case("xxx", 4, "X"), "xx"));
+ assert_se(streq(memory_startswith_no_case("XXX", 4, "x"), "XX"));
+ assert_se(streq(memory_startswith_no_case("XXX", 4, "X"), "XX"));
+ assert_se(streq(memory_startswith_no_case("xxx", 4, "XX"), "x"));
+ assert_se(streq(memory_startswith_no_case("XXX", 4, "xx"), "X"));
+ assert_se(streq(memory_startswith_no_case("XXX", 4, "XX"), "X"));
+ assert_se(streq(memory_startswith_no_case("xxx", 4, "XXX"), ""));
+ assert_se(streq(memory_startswith_no_case("XXX", 4, "xxx"), ""));
+ assert_se(streq(memory_startswith_no_case("XXX", 4, "XXX"), ""));
+
+ assert_se(memory_startswith_no_case((char[2]){'x', 'x'}, 2, "xx"));
+ assert_se(memory_startswith_no_case((char[2]){'x', 'X'}, 2, "xX"));
+ assert_se(memory_startswith_no_case((char[2]){'X', 'x'}, 2, "Xx"));
+ assert_se(memory_startswith_no_case((char[2]){'X', 'X'}, 2, "XX"));
+}
+
+static void test_string_truncate_lines_one(const char *input, size_t n_lines, const char *output, bool truncation) {
+ _cleanup_free_ char *b = NULL;
+ int k;
+
+ assert_se((k = string_truncate_lines(input, n_lines, &b)) >= 0);
+ assert_se(streq(b, output));
+ assert_se(!!k == truncation);
+}
+
+static void test_string_truncate_lines(void) {
+ log_info("/* %s */", __func__);
+
+ test_string_truncate_lines_one("", 0, "", false);
+ test_string_truncate_lines_one("", 1, "", false);
+ test_string_truncate_lines_one("", 2, "", false);
+ test_string_truncate_lines_one("", 3, "", false);
+
+ test_string_truncate_lines_one("x", 0, "", true);
+ test_string_truncate_lines_one("x", 1, "x", false);
+ test_string_truncate_lines_one("x", 2, "x", false);
+ test_string_truncate_lines_one("x", 3, "x", false);
+
+ test_string_truncate_lines_one("x\n", 0, "", true);
+ test_string_truncate_lines_one("x\n", 1, "x", false);
+ test_string_truncate_lines_one("x\n", 2, "x", false);
+ test_string_truncate_lines_one("x\n", 3, "x", false);
+
+ test_string_truncate_lines_one("x\ny", 0, "", true);
+ test_string_truncate_lines_one("x\ny", 1, "x", true);
+ test_string_truncate_lines_one("x\ny", 2, "x\ny", false);
+ test_string_truncate_lines_one("x\ny", 3, "x\ny", false);
+
+ test_string_truncate_lines_one("x\ny\n", 0, "", true);
+ test_string_truncate_lines_one("x\ny\n", 1, "x", true);
+ test_string_truncate_lines_one("x\ny\n", 2, "x\ny", false);
+ test_string_truncate_lines_one("x\ny\n", 3, "x\ny", false);
+
+ test_string_truncate_lines_one("x\ny\nz", 0, "", true);
+ test_string_truncate_lines_one("x\ny\nz", 1, "x", true);
+ test_string_truncate_lines_one("x\ny\nz", 2, "x\ny", true);
+ test_string_truncate_lines_one("x\ny\nz", 3, "x\ny\nz", false);
+
+ test_string_truncate_lines_one("x\ny\nz\n", 0, "", true);
+ test_string_truncate_lines_one("x\ny\nz\n", 1, "x", true);
+ test_string_truncate_lines_one("x\ny\nz\n", 2, "x\ny", true);
+ test_string_truncate_lines_one("x\ny\nz\n", 3, "x\ny\nz", false);
+
+ test_string_truncate_lines_one("\n", 0, "", false);
+ test_string_truncate_lines_one("\n", 1, "", false);
+ test_string_truncate_lines_one("\n", 2, "", false);
+ test_string_truncate_lines_one("\n", 3, "", false);
+
+ test_string_truncate_lines_one("\n\n", 0, "", false);
+ test_string_truncate_lines_one("\n\n", 1, "", false);
+ test_string_truncate_lines_one("\n\n", 2, "", false);
+ test_string_truncate_lines_one("\n\n", 3, "", false);
+
+ test_string_truncate_lines_one("\n\n\n", 0, "", false);
+ test_string_truncate_lines_one("\n\n\n", 1, "", false);
+ test_string_truncate_lines_one("\n\n\n", 2, "", false);
+ test_string_truncate_lines_one("\n\n\n", 3, "", false);
+
+ test_string_truncate_lines_one("\nx\n\n", 0, "", true);
+ test_string_truncate_lines_one("\nx\n\n", 1, "", true);
+ test_string_truncate_lines_one("\nx\n\n", 2, "\nx", false);
+ test_string_truncate_lines_one("\nx\n\n", 3, "\nx", false);
+
+ test_string_truncate_lines_one("\n\nx\n", 0, "", true);
+ test_string_truncate_lines_one("\n\nx\n", 1, "", true);
+ test_string_truncate_lines_one("\n\nx\n", 2, "", true);
+ test_string_truncate_lines_one("\n\nx\n", 3, "\n\nx", false);
+}
+
+static void test_string_extract_lines_one(const char *input, size_t i, const char *output, bool more) {
+ _cleanup_free_ char *b = NULL;
+ int k;
+
+ assert_se((k = string_extract_line(input, i, &b)) >= 0);
+ assert_se(streq(b ?: input, output));
+ assert_se(!!k == more);
+}
+
+static void test_string_extract_line(void) {
+ log_info("/* %s */", __func__);
+
+ test_string_extract_lines_one("", 0, "", false);
+ test_string_extract_lines_one("", 1, "", false);
+ test_string_extract_lines_one("", 2, "", false);
+ test_string_extract_lines_one("", 3, "", false);
+
+ test_string_extract_lines_one("x", 0, "x", false);
+ test_string_extract_lines_one("x", 1, "", false);
+ test_string_extract_lines_one("x", 2, "", false);
+ test_string_extract_lines_one("x", 3, "", false);
+
+ test_string_extract_lines_one("x\n", 0, "x", false);
+ test_string_extract_lines_one("x\n", 1, "", false);
+ test_string_extract_lines_one("x\n", 2, "", false);
+ test_string_extract_lines_one("x\n", 3, "", false);
+
+ test_string_extract_lines_one("x\ny", 0, "x", true);
+ test_string_extract_lines_one("x\ny", 1, "y", false);
+ test_string_extract_lines_one("x\ny", 2, "", false);
+ test_string_extract_lines_one("x\ny", 3, "", false);
+
+ test_string_extract_lines_one("x\ny\n", 0, "x", true);
+ test_string_extract_lines_one("x\ny\n", 1, "y", false);
+ test_string_extract_lines_one("x\ny\n", 2, "", false);
+ test_string_extract_lines_one("x\ny\n", 3, "", false);
+
+ test_string_extract_lines_one("x\ny\nz", 0, "x", true);
+ test_string_extract_lines_one("x\ny\nz", 1, "y", true);
+ test_string_extract_lines_one("x\ny\nz", 2, "z", false);
+ test_string_extract_lines_one("x\ny\nz", 3, "", false);
+
+ test_string_extract_lines_one("\n", 0, "", false);
+ test_string_extract_lines_one("\n", 1, "", false);
+ test_string_extract_lines_one("\n", 2, "", false);
+ test_string_extract_lines_one("\n", 3, "", false);
+
+ test_string_extract_lines_one("\n\n", 0, "", true);
+ test_string_extract_lines_one("\n\n", 1, "", false);
+ test_string_extract_lines_one("\n\n", 2, "", false);
+ test_string_extract_lines_one("\n\n", 3, "", false);
+
+ test_string_extract_lines_one("\n\n\n", 0, "", true);
+ test_string_extract_lines_one("\n\n\n", 1, "", true);
+ test_string_extract_lines_one("\n\n\n", 2, "", false);
+ test_string_extract_lines_one("\n\n\n", 3, "", false);
+
+ test_string_extract_lines_one("\n\n\n\n", 0, "", true);
+ test_string_extract_lines_one("\n\n\n\n", 1, "", true);
+ test_string_extract_lines_one("\n\n\n\n", 2, "", true);
+ test_string_extract_lines_one("\n\n\n\n", 3, "", false);
+
+ test_string_extract_lines_one("\nx\n\n\n", 0, "", true);
+ test_string_extract_lines_one("\nx\n\n\n", 1, "x", true);
+ test_string_extract_lines_one("\nx\n\n\n", 2, "", true);
+ test_string_extract_lines_one("\nx\n\n\n", 3, "", false);
+
+ test_string_extract_lines_one("\n\nx\n\n", 0, "", true);
+ test_string_extract_lines_one("\n\nx\n\n", 1, "", true);
+ test_string_extract_lines_one("\n\nx\n\n", 2, "x", true);
+ test_string_extract_lines_one("\n\nx\n\n", 3, "", false);
+
+ test_string_extract_lines_one("\n\n\nx\n", 0, "", true);
+ test_string_extract_lines_one("\n\n\nx\n", 1, "", true);
+ test_string_extract_lines_one("\n\n\nx\n", 2, "", true);
+ test_string_extract_lines_one("\n\n\nx\n", 3, "x", false);
+}
+
+static void test_string_contains_word_strv(void) {
+ log_info("/* %s */", __func__);
+
+ const char *w;
+
+ assert_se(string_contains_word_strv("a b cc", NULL, STRV_MAKE("a", "b"), NULL));
+
+ assert_se(string_contains_word_strv("a b cc", NULL, STRV_MAKE("a", "b"), &w));
+ assert_se(streq(w, "a"));
+
+ assert_se(!string_contains_word_strv("a b cc", NULL, STRV_MAKE("d"), &w));
+ assert_se(w == NULL);
+
+ assert_se(string_contains_word_strv("a b cc", NULL, STRV_MAKE("b", "a"), &w));
+ assert_se(streq(w, "a"));
+
+ assert_se(string_contains_word_strv("b a b cc", NULL, STRV_MAKE("b", "a", "b"), &w));
+ assert_se(streq(w, "b"));
+
+ assert_se(string_contains_word_strv("a b cc", NULL, STRV_MAKE("b", ""), &w));
+ assert_se(streq(w, "b"));
+
+ assert_se(!string_contains_word_strv("a b cc", NULL, STRV_MAKE(""), &w));
+ assert_se(w == NULL);
+
+ assert_se(string_contains_word_strv("a b cc", " ", STRV_MAKE(""), &w));
+ assert_se(streq(w, ""));
+}
+
+static void test_string_contains_word(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se( string_contains_word("a b cc", NULL, "a"));
+ assert_se( string_contains_word("a b cc", NULL, "b"));
+ assert_se(!string_contains_word("a b cc", NULL, "c"));
+ assert_se( string_contains_word("a b cc", NULL, "cc"));
+ assert_se(!string_contains_word("a b cc", NULL, "d"));
+ assert_se(!string_contains_word("a b cc", NULL, "a b"));
+ assert_se(!string_contains_word("a b cc", NULL, "a b c"));
+ assert_se(!string_contains_word("a b cc", NULL, "b c"));
+ assert_se(!string_contains_word("a b cc", NULL, "b cc"));
+ assert_se(!string_contains_word("a b cc", NULL, "a "));
+ assert_se(!string_contains_word("a b cc", NULL, " b "));
+ assert_se(!string_contains_word("a b cc", NULL, " cc"));
+
+ assert_se( string_contains_word(" a b\t\tcc", NULL, "a"));
+ assert_se( string_contains_word(" a b\t\tcc", NULL, "b"));
+ assert_se(!string_contains_word(" a b\t\tcc", NULL, "c"));
+ assert_se( string_contains_word(" a b\t\tcc", NULL, "cc"));
+ assert_se(!string_contains_word(" a b\t\tcc", NULL, "d"));
+ assert_se(!string_contains_word(" a b\t\tcc", NULL, "a b"));
+ assert_se(!string_contains_word(" a b\t\tcc", NULL, "a b\t\tc"));
+ assert_se(!string_contains_word(" a b\t\tcc", NULL, "b\t\tc"));
+ assert_se(!string_contains_word(" a b\t\tcc", NULL, "b\t\tcc"));
+ assert_se(!string_contains_word(" a b\t\tcc", NULL, "a "));
+ assert_se(!string_contains_word(" a b\t\tcc", NULL, " b "));
+ assert_se(!string_contains_word(" a b\t\tcc", NULL, " cc"));
+
+ assert_se(!string_contains_word(" a b\t\tcc", NULL, ""));
+ assert_se(!string_contains_word(" a b\t\tcc", NULL, " "));
+ assert_se(!string_contains_word(" a b\t\tcc", NULL, " "));
+ assert_se( string_contains_word(" a b\t\tcc", " ", ""));
+ assert_se( string_contains_word(" a b\t\tcc", "\t", ""));
+ assert_se( string_contains_word(" a b\t\tcc", WHITESPACE, ""));
+
+ assert_se( string_contains_word("a:b:cc", ":#", "a"));
+ assert_se( string_contains_word("a:b:cc", ":#", "b"));
+ assert_se(!string_contains_word("a:b:cc", ":#", "c"));
+ assert_se( string_contains_word("a:b:cc", ":#", "cc"));
+ assert_se(!string_contains_word("a:b:cc", ":#", "d"));
+ assert_se(!string_contains_word("a:b:cc", ":#", "a:b"));
+ assert_se(!string_contains_word("a:b:cc", ":#", "a:b:c"));
+ assert_se(!string_contains_word("a:b:cc", ":#", "b:c"));
+ assert_se(!string_contains_word("a#b#cc", ":#", "b:cc"));
+ assert_se( string_contains_word("a#b#cc", ":#", "b"));
+ assert_se( string_contains_word("a#b#cc", ":#", "cc"));
+ assert_se(!string_contains_word("a:b:cc", ":#", "a:"));
+ assert_se(!string_contains_word("a:b cc", ":#", "b"));
+ assert_se( string_contains_word("a:b cc", ":#", "b cc"));
+ assert_se(!string_contains_word("a:b:cc", ":#", ":cc"));
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_string_erase();
+ test_free_and_strndup();
+ test_ascii_strcasecmp_n();
+ test_ascii_strcasecmp_nn();
+ test_cellescape();
+ test_streq_ptr();
+ test_strstrip();
+ test_strextend();
+ test_strextend_with_separator();
+ test_strrep();
+ test_string_has_cc();
+ test_ascii_strlower();
+ test_strshorten();
+ test_strjoina();
+ test_strjoin();
+ test_strcmp_ptr();
+ test_foreach_word();
+ test_foreach_word_quoted();
+ test_endswith();
+ test_endswith_no_case();
+ test_delete_chars();
+ test_delete_trailing_chars();
+ test_delete_trailing_slashes();
+ test_skip_leading_chars();
+ test_in_charset();
+ test_split_pair();
+ test_first_word();
+ test_strlen_ptr();
+ test_memory_startswith();
+ test_memory_startswith_no_case();
+ test_string_truncate_lines();
+ test_string_extract_line();
+ test_string_contains_word_strv();
+ test_string_contains_word();
+
+ return 0;
+}
diff --git a/src/test/test-strip-tab-ansi.c b/src/test/test-strip-tab-ansi.c
new file mode 100644
index 0000000..be54b0d
--- /dev/null
+++ b/src/test/test-strip-tab-ansi.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_free_ char *urlified = NULL, *q = NULL, *qq = NULL;
+ char *p, *z;
+
+ assert_se(p = strdup("\tFoobar\tbar\twaldo\t"));
+ assert_se(strip_tab_ansi(&p, NULL, NULL));
+ fprintf(stdout, "<%s>\n", p);
+ assert_se(streq(p, " Foobar bar waldo "));
+ free(p);
+
+ assert_se(p = strdup(ANSI_HIGHLIGHT "Hello" ANSI_NORMAL ANSI_HIGHLIGHT_RED " world!" ANSI_NORMAL));
+ assert_se(strip_tab_ansi(&p, NULL, NULL));
+ fprintf(stdout, "<%s>\n", p);
+ assert_se(streq(p, "Hello world!"));
+ free(p);
+
+ assert_se(p = strdup("\x1B[\x1B[\t\x1B[" ANSI_HIGHLIGHT "\x1B[" "Hello" ANSI_NORMAL ANSI_HIGHLIGHT_RED " world!" ANSI_NORMAL));
+ assert_se(strip_tab_ansi(&p, NULL, NULL));
+ assert_se(streq(p, "\x1B[\x1B[ \x1B[\x1B[Hello world!"));
+ free(p);
+
+ assert_se(p = strdup("\x1B[waldo"));
+ assert_se(strip_tab_ansi(&p, NULL, NULL));
+ assert_se(streq(p, "\x1B[waldo"));
+ free(p);
+
+ assert_se(p = strdup("\r\rwaldo"));
+ assert_se(strip_tab_ansi(&p, NULL, NULL));
+ assert_se(streq(p, "\r\rwaldo"));
+ free(p);
+
+ assert_se(p = strdup("waldo\r\r"));
+ assert_se(strip_tab_ansi(&p, NULL, NULL));
+ assert_se(streq(p, "waldo"));
+ free(p);
+
+ assert_se(p = strdup("waldo\r\r\n\r\n"));
+ assert_se(strip_tab_ansi(&p, NULL, NULL));
+ assert_se(streq(p, "waldo\n\n"));
+ free(p);
+
+ assert_se(terminal_urlify_path("/etc/fstab", "i am a fabulous link", &urlified) >= 0);
+ assert_se(p = strjoin("something ", urlified, " something-else"));
+ assert_se(q = strdup(p));
+ printf("<%s>\n", p);
+ assert_se(strip_tab_ansi(&p, NULL, NULL));
+ printf("<%s>\n", p);
+ assert_se(streq(p, "something i am a fabulous link something-else"));
+ p = mfree(p);
+
+ /* Truncate the formatted string in the middle of an ANSI sequence (in which case we shouldn't touch the
+ * incomplete sequence) */
+ z = strstr(q, "fstab");
+ if (z) {
+ *z = 0;
+ assert_se(qq = strdup(q));
+ assert_se(strip_tab_ansi(&q, NULL, NULL));
+ assert_se(streq(q, qq));
+ }
+
+ return 0;
+}
diff --git a/src/test/test-strv.c b/src/test/test-strv.c
new file mode 100644
index 0000000..6b5005f
--- /dev/null
+++ b/src/test/test-strv.c
@@ -0,0 +1,1059 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "nulstr-util.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "strv.h"
+
+static void test_specifier_printf(void) {
+ static const Specifier table[] = {
+ { 'X', specifier_string, (char*) "AAAA" },
+ { 'Y', specifier_string, (char*) "BBBB" },
+ COMMON_SYSTEM_SPECIFIERS,
+ {}
+ };
+
+ _cleanup_free_ char *w = NULL;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ r = specifier_printf("xxx a=%X b=%Y yyy", table, NULL, &w);
+ assert_se(r >= 0);
+ assert_se(w);
+
+ puts(w);
+ assert_se(streq(w, "xxx a=AAAA b=BBBB yyy"));
+
+ free(w);
+ r = specifier_printf("machine=%m, boot=%b, host=%H, version=%v, arch=%a", table, NULL, &w);
+ assert_se(r >= 0);
+ assert_se(w);
+ puts(w);
+
+ w = mfree(w);
+ specifier_printf("os=%o, os-version=%w, build=%B, variant=%W", table, NULL, &w);
+ if (w)
+ puts(w);
+}
+
+static void test_str_in_set(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(STR_IN_SET("x", "x", "y", "z"));
+ assert_se(!STR_IN_SET("X", "x", "y", "z"));
+ assert_se(!STR_IN_SET("", "x", "y", "z"));
+ assert_se(STR_IN_SET("x", "w", "x"));
+}
+
+static void test_strptr_in_set(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(STRPTR_IN_SET("x", "x", "y", "z"));
+ assert_se(!STRPTR_IN_SET("X", "x", "y", "z"));
+ assert_se(!STRPTR_IN_SET("", "x", "y", "z"));
+ assert_se(STRPTR_IN_SET("x", "w", "x"));
+
+ assert_se(!STRPTR_IN_SET(NULL, "x", "y", "z"));
+ assert_se(!STRPTR_IN_SET(NULL, ""));
+ /* strv cannot contain a null, hence the result below */
+ assert_se(!STRPTR_IN_SET(NULL, NULL));
+}
+
+static void test_startswith_set(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(!STARTSWITH_SET("foo", "bar", "baz", "waldo"));
+ assert_se(!STARTSWITH_SET("foo", "bar"));
+
+ assert_se(STARTSWITH_SET("abc", "a", "ab", "abc"));
+ assert_se(STARTSWITH_SET("abc", "ax", "ab", "abc"));
+ assert_se(STARTSWITH_SET("abc", "ax", "abx", "abc"));
+ assert_se(!STARTSWITH_SET("abc", "ax", "abx", "abcx"));
+
+ assert_se(streq_ptr(STARTSWITH_SET("foobar", "hhh", "kkk", "foo", "zzz"), "bar"));
+ assert_se(streq_ptr(STARTSWITH_SET("foobar", "hhh", "kkk", "", "zzz"), "foobar"));
+ assert_se(streq_ptr(STARTSWITH_SET("", "hhh", "kkk", "zzz", ""), ""));
+}
+
+static const char* const input_table_multiple[] = {
+ "one",
+ "two",
+ "three",
+ NULL,
+};
+
+static const char* const input_table_quoted[] = {
+ "one",
+ " two\t three ",
+ " four five",
+ NULL,
+};
+
+static const char* const input_table_quoted_joined[] = {
+ "one",
+ " two\t three " " four five",
+ NULL,
+};
+
+static const char* const input_table_one[] = {
+ "one",
+ NULL,
+};
+
+static const char* const input_table_none[] = {
+ NULL,
+};
+
+static const char* const input_table_two_empties[] = {
+ "",
+ "",
+ NULL,
+};
+
+static const char* const input_table_one_empty[] = {
+ "",
+ NULL,
+};
+
+static void test_strv_find(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(strv_find((char **)input_table_multiple, "three"));
+ assert_se(!strv_find((char **)input_table_multiple, "four"));
+}
+
+static void test_strv_find_prefix(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(strv_find_prefix((char **)input_table_multiple, "o"));
+ assert_se(strv_find_prefix((char **)input_table_multiple, "one"));
+ assert_se(strv_find_prefix((char **)input_table_multiple, ""));
+ assert_se(!strv_find_prefix((char **)input_table_multiple, "xxx"));
+ assert_se(!strv_find_prefix((char **)input_table_multiple, "onee"));
+}
+
+static void test_strv_find_startswith(void) {
+ char *r;
+
+ log_info("/* %s */", __func__);
+
+ r = strv_find_startswith((char **)input_table_multiple, "o");
+ assert_se(r && streq(r, "ne"));
+
+ r = strv_find_startswith((char **)input_table_multiple, "one");
+ assert_se(r && streq(r, ""));
+
+ r = strv_find_startswith((char **)input_table_multiple, "");
+ assert_se(r && streq(r, "one"));
+
+ assert_se(!strv_find_startswith((char **)input_table_multiple, "xxx"));
+ assert_se(!strv_find_startswith((char **)input_table_multiple, "onee"));
+}
+
+static void test_strv_join(void) {
+ log_info("/* %s */", __func__);
+
+ _cleanup_free_ char *p = strv_join((char **)input_table_multiple, ", ");
+ assert_se(p);
+ assert_se(streq(p, "one, two, three"));
+
+ _cleanup_free_ char *q = strv_join((char **)input_table_multiple, ";");
+ assert_se(q);
+ assert_se(streq(q, "one;two;three"));
+
+ _cleanup_free_ char *r = strv_join((char **)input_table_multiple, NULL);
+ assert_se(r);
+ assert_se(streq(r, "one two three"));
+
+ _cleanup_free_ char *s = strv_join(STRV_MAKE("1", "2", "3,3"), ",");
+ assert_se(s);
+ assert_se(streq(s, "1,2,3,3"));
+
+ _cleanup_free_ char *t = strv_join((char **)input_table_one, ", ");
+ assert_se(t);
+ assert_se(streq(t, "one"));
+
+ _cleanup_free_ char *u = strv_join((char **)input_table_none, ", ");
+ assert_se(u);
+ assert_se(streq(u, ""));
+
+ _cleanup_free_ char *v = strv_join((char **)input_table_two_empties, ", ");
+ assert_se(v);
+ assert_se(streq(v, ", "));
+
+ _cleanup_free_ char *w = strv_join((char **)input_table_one_empty, ", ");
+ assert_se(w);
+ assert_se(streq(w, ""));
+}
+
+static void test_strv_join_full(void) {
+ log_info("/* %s */", __func__);
+
+ _cleanup_free_ char *p = strv_join_full((char **)input_table_multiple, ", ", "foo", false);
+ assert_se(p);
+ assert_se(streq(p, "fooone, footwo, foothree"));
+
+ _cleanup_free_ char *q = strv_join_full((char **)input_table_multiple, ";", "foo", false);
+ assert_se(q);
+ assert_se(streq(q, "fooone;footwo;foothree"));
+
+ _cleanup_free_ char *r = strv_join_full(STRV_MAKE("a", "a;b", "a:c"), ";", NULL, true);
+ assert_se(r);
+ assert_se(streq(r, "a;a\\;b;a:c"));
+
+ _cleanup_free_ char *s = strv_join_full(STRV_MAKE("a", "a;b", "a;;c", ";", ";x"), ";", NULL, true);
+ assert_se(s);
+ assert_se(streq(s, "a;a\\;b;a\\;\\;c;\\;;\\;x"));
+
+ _cleanup_free_ char *t = strv_join_full(STRV_MAKE("a", "a;b", "a:c", ";"), ";", "=", true);
+ assert_se(t);
+ assert_se(streq(t, "=a;=a\\;b;=a:c;=\\;"));
+ t = mfree(t);
+
+ _cleanup_free_ char *u = strv_join_full((char **)input_table_multiple, NULL, "foo", false);
+ assert_se(u);
+ assert_se(streq(u, "fooone footwo foothree"));
+
+ _cleanup_free_ char *v = strv_join_full((char **)input_table_one, ", ", "foo", false);
+ assert_se(v);
+ assert_se(streq(v, "fooone"));
+
+ _cleanup_free_ char *w = strv_join_full((char **)input_table_none, ", ", "foo", false);
+ assert_se(w);
+ assert_se(streq(w, ""));
+
+ _cleanup_free_ char *x = strv_join_full((char **)input_table_two_empties, ", ", "foo", false);
+ assert_se(x);
+ assert_se(streq(x, "foo, foo"));
+
+ _cleanup_free_ char *y = strv_join_full((char **)input_table_one_empty, ", ", "foo", false);
+ assert_se(y);
+ assert_se(streq(y, "foo"));
+}
+
+static void test_strv_unquote(const char *quoted, char **list) {
+ _cleanup_strv_free_ char **s;
+ _cleanup_free_ char *j;
+ unsigned i = 0;
+ char **t;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ r = strv_split_full(&s, quoted, WHITESPACE, EXTRACT_UNQUOTE);
+ assert_se(r == (int) strv_length(list));
+ assert_se(s);
+ j = strv_join(s, " | ");
+ assert_se(j);
+ puts(j);
+
+ STRV_FOREACH(t, s)
+ assert_se(streq(list[i++], *t));
+
+ assert_se(list[i] == NULL);
+}
+
+static void test_invalid_unquote(const char *quoted) {
+ char **s = NULL;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ r = strv_split_full(&s, quoted, WHITESPACE, EXTRACT_UNQUOTE);
+ assert_se(s == NULL);
+ assert_se(r == -EINVAL);
+}
+
+static void test_strv_split(void) {
+ _cleanup_(strv_free_erasep) char **l = NULL;
+ const char str[] = "one,two,three";
+
+ log_info("/* %s */", __func__);
+
+ l = strv_split(str, ",");
+ assert_se(l);
+ assert_se(strv_equal(l, (char**) input_table_multiple));
+
+ strv_free_erase(l);
+
+ l = strv_split(" one two\t three", WHITESPACE);
+ assert_se(l);
+ assert_se(strv_equal(l, (char**) input_table_multiple));
+
+ strv_free_erase(l);
+
+ /* Setting NULL for separator is equivalent to WHITESPACE */
+ l = strv_split(" one two\t three", NULL);
+ assert_se(l);
+ assert_se(strv_equal(l, (char**) input_table_multiple));
+
+ strv_free_erase(l);
+
+ assert_se(strv_split_full(&l, " one two\t three", NULL, 0) == 3);
+ assert_se(strv_equal(l, (char**) input_table_multiple));
+
+ strv_free_erase(l);
+
+ assert_se(strv_split_full(&l, " 'one' \" two\t three \" ' four five'", NULL, EXTRACT_UNQUOTE) == 3);
+ assert_se(strv_equal(l, (char**) input_table_quoted));
+
+ l = strv_free_erase(l);
+
+ /* missing last quote causes extraction to fail. */
+ assert_se(strv_split_full(&l, " 'one' \" two\t three \" ' four five", NULL, EXTRACT_UNQUOTE) == -EINVAL);
+ assert_se(!l);
+
+ /* missing last quote, but the last element is _not_ ignored with EXTRACT_RELAX. */
+ assert_se(strv_split_full(&l, " 'one' \" two\t three \" ' four five", NULL, EXTRACT_UNQUOTE | EXTRACT_RELAX) == 3);
+ assert_se(strv_equal(l, (char**) input_table_quoted));
+
+ l = strv_free_erase(l);
+
+ /* missing separator between items */
+ assert_se(strv_split_full(&l, " 'one' \" two\t three \"' four five'", NULL, EXTRACT_UNQUOTE | EXTRACT_RELAX) == 2);
+ assert_se(strv_equal(l, (char**) input_table_quoted_joined));
+
+ l = strv_free_erase(l);
+
+ assert_se(strv_split_full(&l, " 'one' \" two\t three \"' four five", NULL,
+ EXTRACT_UNQUOTE | EXTRACT_RELAX | EXTRACT_CUNESCAPE_RELAX) == 2);
+ assert_se(strv_equal(l, (char**) input_table_quoted_joined));
+
+ l = strv_free_erase(l);
+
+ assert_se(strv_split_full(&l, "\\", NULL, EXTRACT_UNQUOTE | EXTRACT_RELAX | EXTRACT_CUNESCAPE_RELAX) == 1);
+ assert_se(strv_equal(l, STRV_MAKE("\\")));
+}
+
+static void test_strv_split_empty(void) {
+ _cleanup_strv_free_ char **l = NULL;
+
+ log_info("/* %s */", __func__);
+
+ l = strv_split("", WHITESPACE);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+ l = strv_free(l);
+
+ assert_se(l = strv_split("", NULL));
+ assert_se(strv_isempty(l));
+ l = strv_free(l);
+
+ assert_se(strv_split_full(&l, "", NULL, 0) == 0);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+ l = strv_free(l);
+
+ assert_se(strv_split_full(&l, "", NULL, EXTRACT_UNQUOTE) == 0);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+ l = strv_free(l);
+
+ assert_se(strv_split_full(&l, "", WHITESPACE, EXTRACT_UNQUOTE) == 0);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+ l = strv_free(l);
+
+ assert_se(strv_split_full(&l, "", WHITESPACE, EXTRACT_UNQUOTE | EXTRACT_RELAX) == 0);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+ strv_free(l);
+
+ l = strv_split(" ", WHITESPACE);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+ strv_free(l);
+
+ l = strv_split(" ", NULL);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+ l = strv_free(l);
+
+ assert_se(strv_split_full(&l, " ", NULL, 0) == 0);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+ l = strv_free(l);
+
+ assert_se(strv_split_full(&l, " ", WHITESPACE, EXTRACT_UNQUOTE) == 0);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+ l = strv_free(l);
+
+ assert_se(strv_split_full(&l, " ", NULL, EXTRACT_UNQUOTE) == 0);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+ l = strv_free(l);
+
+ assert_se(strv_split_full(&l, " ", NULL, EXTRACT_UNQUOTE | EXTRACT_RELAX) == 0);
+ assert_se(l);
+ assert_se(strv_isempty(l));
+}
+
+static void test_strv_split_full(void) {
+ _cleanup_strv_free_ char **l = NULL;
+ const char *str = ":foo\\:bar::waldo:";
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ r = strv_split_full(&l, str, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ assert_se(r == (int) strv_length(l));
+ assert_se(streq_ptr(l[0], ""));
+ assert_se(streq_ptr(l[1], "foo:bar"));
+ assert_se(streq_ptr(l[2], ""));
+ assert_se(streq_ptr(l[3], "waldo"));
+ assert_se(streq_ptr(l[4], ""));
+ assert_se(streq_ptr(l[5], NULL));
+}
+
+static void test_strv_split_colon_pairs(void) {
+ _cleanup_strv_free_ char **l = NULL;
+ const char *str = "one:two three four:five six seven:eight\\:nine ten\\:eleven\\\\",
+ *str_inval="one:two three:four:five";
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ r = strv_split_colon_pairs(&l, str);
+ assert_se(r == (int) strv_length(l));
+ assert_se(r == 12);
+ assert_se(streq_ptr(l[0], "one"));
+ assert_se(streq_ptr(l[1], "two"));
+ assert_se(streq_ptr(l[2], "three"));
+ assert_se(streq_ptr(l[3], ""));
+ assert_se(streq_ptr(l[4], "four"));
+ assert_se(streq_ptr(l[5], "five"));
+ assert_se(streq_ptr(l[6], "six"));
+ assert_se(streq_ptr(l[7], ""));
+ assert_se(streq_ptr(l[8], "seven"));
+ assert_se(streq_ptr(l[9], "eight:nine"));
+ assert_se(streq_ptr(l[10], "ten:eleven\\"));
+ assert_se(streq_ptr(l[11], ""));
+ assert_se(streq_ptr(l[12], NULL));
+
+ r = strv_split_colon_pairs(&l, str_inval);
+ assert_se(r == -EINVAL);
+}
+
+static void test_strv_split_newlines(void) {
+ unsigned i = 0;
+ char **s;
+ _cleanup_strv_free_ char **l = NULL;
+ const char str[] = "one\ntwo\nthree";
+
+ log_info("/* %s */", __func__);
+
+ l = strv_split_newlines(str);
+ assert_se(l);
+
+ STRV_FOREACH(s, l)
+ assert_se(streq(*s, input_table_multiple[i++]));
+}
+
+static void test_strv_split_nulstr(void) {
+ _cleanup_strv_free_ char **l = NULL;
+ const char nulstr[] = "str0\0str1\0str2\0str3\0";
+
+ log_info("/* %s */", __func__);
+
+ l = strv_split_nulstr (nulstr);
+ assert_se(l);
+
+ assert_se(streq(l[0], "str0"));
+ assert_se(streq(l[1], "str1"));
+ assert_se(streq(l[2], "str2"));
+ assert_se(streq(l[3], "str3"));
+}
+
+static void test_strv_parse_nulstr(void) {
+ _cleanup_strv_free_ char **l = NULL;
+ const char nulstr[] = "hoge\0hoge2\0hoge3\0\0hoge5\0\0xxx";
+
+ log_info("/* %s */", __func__);
+
+ l = strv_parse_nulstr(nulstr, sizeof(nulstr)-1);
+ assert_se(l);
+ puts("Parse nulstr:");
+ strv_print(l);
+
+ assert_se(streq(l[0], "hoge"));
+ assert_se(streq(l[1], "hoge2"));
+ assert_se(streq(l[2], "hoge3"));
+ assert_se(streq(l[3], ""));
+ assert_se(streq(l[4], "hoge5"));
+ assert_se(streq(l[5], ""));
+ assert_se(streq(l[6], "xxx"));
+}
+
+static void test_strv_overlap(void) {
+ const char * const input_table[] = {
+ "one",
+ "two",
+ "three",
+ NULL
+ };
+ const char * const input_table_overlap[] = {
+ "two",
+ NULL
+ };
+ const char * const input_table_unique[] = {
+ "four",
+ "five",
+ "six",
+ NULL
+ };
+
+ log_info("/* %s */", __func__);
+
+ assert_se(strv_overlap((char **)input_table, (char**)input_table_overlap));
+ assert_se(!strv_overlap((char **)input_table, (char**)input_table_unique));
+}
+
+static void test_strv_sort(void) {
+ const char* input_table[] = {
+ "durian",
+ "apple",
+ "citrus",
+ "CAPITAL LETTERS FIRST",
+ "banana",
+ NULL
+ };
+
+ log_info("/* %s */", __func__);
+
+ strv_sort((char **)input_table);
+
+ assert_se(streq(input_table[0], "CAPITAL LETTERS FIRST"));
+ assert_se(streq(input_table[1], "apple"));
+ assert_se(streq(input_table[2], "banana"));
+ assert_se(streq(input_table[3], "citrus"));
+ assert_se(streq(input_table[4], "durian"));
+}
+
+static void test_strv_extend_strv_concat(void) {
+ _cleanup_strv_free_ char **a = NULL, **b = NULL;
+
+ log_info("/* %s */", __func__);
+
+ a = strv_new("without", "suffix");
+ b = strv_new("with", "suffix");
+ assert_se(a);
+ assert_se(b);
+
+ assert_se(strv_extend_strv_concat(&a, b, "_suffix") >= 0);
+
+ assert_se(streq(a[0], "without"));
+ assert_se(streq(a[1], "suffix"));
+ assert_se(streq(a[2], "with_suffix"));
+ assert_se(streq(a[3], "suffix_suffix"));
+}
+
+static void test_strv_extend_strv(void) {
+ _cleanup_strv_free_ char **a = NULL, **b = NULL, **n = NULL;
+
+ log_info("/* %s */", __func__);
+
+ a = strv_new("abc", "def", "ghi");
+ b = strv_new("jkl", "mno", "abc", "pqr");
+ assert_se(a);
+ assert_se(b);
+
+ assert_se(strv_extend_strv(&a, b, true) == 3);
+
+ assert_se(streq(a[0], "abc"));
+ assert_se(streq(a[1], "def"));
+ assert_se(streq(a[2], "ghi"));
+ assert_se(streq(a[3], "jkl"));
+ assert_se(streq(a[4], "mno"));
+ assert_se(streq(a[5], "pqr"));
+ assert_se(strv_length(a) == 6);
+
+ assert_se(strv_extend_strv(&n, b, false) >= 0);
+ assert_se(streq(n[0], "jkl"));
+ assert_se(streq(n[1], "mno"));
+ assert_se(streq(n[2], "abc"));
+ assert_se(streq(n[3], "pqr"));
+ assert_se(strv_length(n) == 4);
+}
+
+static void test_strv_extend(void) {
+ _cleanup_strv_free_ char **a = NULL, **b = NULL;
+
+ log_info("/* %s */", __func__);
+
+ a = strv_new("test", "test1");
+ assert_se(a);
+ assert_se(strv_extend(&a, "test2") >= 0);
+ assert_se(strv_extend(&b, "test3") >= 0);
+
+ assert_se(streq(a[0], "test"));
+ assert_se(streq(a[1], "test1"));
+ assert_se(streq(a[2], "test2"));
+ assert_se(streq(b[0], "test3"));
+}
+
+static void test_strv_extendf(void) {
+ _cleanup_strv_free_ char **a = NULL, **b = NULL;
+
+ log_info("/* %s */", __func__);
+
+ a = strv_new("test", "test1");
+ assert_se(a);
+ assert_se(strv_extendf(&a, "test2 %s %d %s", "foo", 128, "bar") >= 0);
+ assert_se(strv_extendf(&b, "test3 %s %s %d", "bar", "foo", 128) >= 0);
+
+ assert_se(streq(a[0], "test"));
+ assert_se(streq(a[1], "test1"));
+ assert_se(streq(a[2], "test2 foo 128 bar"));
+ assert_se(streq(b[0], "test3 bar foo 128"));
+}
+
+static void test_strv_foreach(void) {
+ _cleanup_strv_free_ char **a;
+ unsigned i = 0;
+ char **check;
+
+ log_info("/* %s */", __func__);
+
+ a = strv_new("one", "two", "three");
+ assert_se(a);
+
+ STRV_FOREACH(check, a)
+ assert_se(streq(*check, input_table_multiple[i++]));
+}
+
+static void test_strv_foreach_backwards(void) {
+ _cleanup_strv_free_ char **a;
+ unsigned i = 2;
+ char **check;
+
+ log_info("/* %s */", __func__);
+
+ a = strv_new("one", "two", "three");
+
+ assert_se(a);
+
+ STRV_FOREACH_BACKWARDS(check, a)
+ assert_se(streq_ptr(*check, input_table_multiple[i--]));
+
+ STRV_FOREACH_BACKWARDS(check, (char**) NULL)
+ assert_not_reached("Let's see that we check empty strv right, too.");
+
+ STRV_FOREACH_BACKWARDS(check, (char**) { NULL })
+ assert_not_reached("Let's see that we check empty strv right, too.");
+}
+
+static void test_strv_foreach_pair(void) {
+ _cleanup_strv_free_ char **a = NULL;
+ char **x, **y;
+
+ log_info("/* %s */", __func__);
+
+ a = strv_new("pair_one", "pair_one",
+ "pair_two", "pair_two",
+ "pair_three", "pair_three");
+ STRV_FOREACH_PAIR(x, y, a)
+ assert_se(streq(*x, *y));
+}
+
+static void test_strv_from_stdarg_alloca_one(char **l, const char *first, ...) {
+ char **j;
+ unsigned i;
+
+ log_info("/* %s */", __func__);
+
+ j = strv_from_stdarg_alloca(first);
+
+ for (i = 0;; i++) {
+ assert_se(streq_ptr(l[i], j[i]));
+
+ if (!l[i])
+ break;
+ }
+}
+
+static void test_strv_from_stdarg_alloca(void) {
+ log_info("/* %s */", __func__);
+
+ test_strv_from_stdarg_alloca_one(STRV_MAKE("foo", "bar"), "foo", "bar", NULL);
+ test_strv_from_stdarg_alloca_one(STRV_MAKE("foo"), "foo", NULL);
+ test_strv_from_stdarg_alloca_one(STRV_MAKE_EMPTY, NULL);
+}
+
+static void test_strv_insert(void) {
+ _cleanup_strv_free_ char **a = NULL;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(strv_insert(&a, 0, strdup("first")) == 0);
+ assert_se(streq(a[0], "first"));
+ assert_se(!a[1]);
+
+ assert_se(strv_insert(&a, 0, NULL) == 0);
+ assert_se(streq(a[0], "first"));
+ assert_se(!a[1]);
+
+ assert_se(strv_insert(&a, 1, strdup("two")) == 0);
+ assert_se(streq(a[0], "first"));
+ assert_se(streq(a[1], "two"));
+ assert_se(!a[2]);
+
+ assert_se(strv_insert(&a, 4, strdup("tri")) == 0);
+ assert_se(streq(a[0], "first"));
+ assert_se(streq(a[1], "two"));
+ assert_se(streq(a[2], "tri"));
+ assert_se(!a[3]);
+
+ assert_se(strv_insert(&a, 1, strdup("duo")) == 0);
+ assert_se(streq(a[0], "first"));
+ assert_se(streq(a[1], "duo"));
+ assert_se(streq(a[2], "two"));
+ assert_se(streq(a[3], "tri"));
+ assert_se(!a[4]);
+}
+
+static void test_strv_push_prepend(void) {
+ _cleanup_strv_free_ char **a = NULL;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(a = strv_new("foo", "bar", "three"));
+
+ assert_se(strv_push_prepend(&a, strdup("first")) >= 0);
+ assert_se(streq(a[0], "first"));
+ assert_se(streq(a[1], "foo"));
+ assert_se(streq(a[2], "bar"));
+ assert_se(streq(a[3], "three"));
+ assert_se(!a[4]);
+
+ assert_se(strv_consume_prepend(&a, strdup("first2")) >= 0);
+ assert_se(streq(a[0], "first2"));
+ assert_se(streq(a[1], "first"));
+ assert_se(streq(a[2], "foo"));
+ assert_se(streq(a[3], "bar"));
+ assert_se(streq(a[4], "three"));
+ assert_se(!a[5]);
+}
+
+static void test_strv_push(void) {
+ _cleanup_strv_free_ char **a = NULL;
+ char *i, *j;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(i = strdup("foo"));
+ assert_se(strv_push(&a, i) >= 0);
+
+ assert_se(i = strdup("a"));
+ assert_se(j = strdup("b"));
+ assert_se(strv_push_pair(&a, i, j) >= 0);
+
+ assert_se(streq_ptr(a[0], "foo"));
+ assert_se(streq_ptr(a[1], "a"));
+ assert_se(streq_ptr(a[2], "b"));
+ assert_se(streq_ptr(a[3], NULL));
+}
+
+static void test_strv_compare(void) {
+ _cleanup_strv_free_ char **a = NULL;
+ _cleanup_strv_free_ char **b = NULL;
+ _cleanup_strv_free_ char **c = NULL;
+ _cleanup_strv_free_ char **d = NULL;
+
+ log_info("/* %s */", __func__);
+
+ a = strv_new("one", "two", "three");
+ assert_se(a);
+ b = strv_new("one", "two", "three");
+ assert_se(b);
+ c = strv_new("one", "two", "three", "four");
+ assert_se(c);
+ d = strv_new(NULL);
+ assert_se(d);
+
+ assert_se(strv_compare(a, a) == 0);
+ assert_se(strv_compare(a, b) == 0);
+ assert_se(strv_compare(d, d) == 0);
+ assert_se(strv_compare(d, NULL) == 0);
+ assert_se(strv_compare(NULL, NULL) == 0);
+
+ assert_se(strv_compare(a, c) < 0);
+ assert_se(strv_compare(b, c) < 0);
+ assert_se(strv_compare(b, d) == 1);
+ assert_se(strv_compare(b, NULL) == 1);
+}
+
+static void test_strv_is_uniq(void) {
+ _cleanup_strv_free_ char **a = NULL, **b = NULL, **c = NULL, **d = NULL;
+
+ log_info("/* %s */", __func__);
+
+ a = strv_new(NULL);
+ assert_se(a);
+ assert_se(strv_is_uniq(a));
+
+ b = strv_new("foo");
+ assert_se(b);
+ assert_se(strv_is_uniq(b));
+
+ c = strv_new("foo", "bar");
+ assert_se(c);
+ assert_se(strv_is_uniq(c));
+
+ d = strv_new("foo", "bar", "waldo", "bar", "piep");
+ assert_se(d);
+ assert_se(!strv_is_uniq(d));
+}
+
+static void test_strv_reverse(void) {
+ _cleanup_strv_free_ char **a = NULL, **b = NULL, **c = NULL, **d = NULL;
+
+ log_info("/* %s */", __func__);
+
+ a = strv_new(NULL);
+ assert_se(a);
+
+ strv_reverse(a);
+ assert_se(strv_isempty(a));
+
+ b = strv_new("foo");
+ assert_se(b);
+ strv_reverse(b);
+ assert_se(streq_ptr(b[0], "foo"));
+ assert_se(streq_ptr(b[1], NULL));
+
+ c = strv_new("foo", "bar");
+ assert_se(c);
+ strv_reverse(c);
+ assert_se(streq_ptr(c[0], "bar"));
+ assert_se(streq_ptr(c[1], "foo"));
+ assert_se(streq_ptr(c[2], NULL));
+
+ d = strv_new("foo", "bar", "waldo");
+ assert_se(d);
+ strv_reverse(d);
+ assert_se(streq_ptr(d[0], "waldo"));
+ assert_se(streq_ptr(d[1], "bar"));
+ assert_se(streq_ptr(d[2], "foo"));
+ assert_se(streq_ptr(d[3], NULL));
+}
+
+static void test_strv_shell_escape(void) {
+ _cleanup_strv_free_ char **v = NULL;
+
+ log_info("/* %s */", __func__);
+
+ v = strv_new("foo:bar", "bar,baz", "wal\\do");
+ assert_se(v);
+ assert_se(strv_shell_escape(v, ",:"));
+ assert_se(streq_ptr(v[0], "foo\\:bar"));
+ assert_se(streq_ptr(v[1], "bar\\,baz"));
+ assert_se(streq_ptr(v[2], "wal\\\\do"));
+ assert_se(streq_ptr(v[3], NULL));
+}
+
+static void test_strv_skip_one(char **a, size_t n, char **b) {
+ a = strv_skip(a, n);
+ assert_se(strv_equal(a, b));
+}
+
+static void test_strv_skip(void) {
+ log_info("/* %s */", __func__);
+
+ test_strv_skip_one(STRV_MAKE("foo", "bar", "baz"), 0, STRV_MAKE("foo", "bar", "baz"));
+ test_strv_skip_one(STRV_MAKE("foo", "bar", "baz"), 1, STRV_MAKE("bar", "baz"));
+ test_strv_skip_one(STRV_MAKE("foo", "bar", "baz"), 2, STRV_MAKE("baz"));
+ test_strv_skip_one(STRV_MAKE("foo", "bar", "baz"), 3, STRV_MAKE(NULL));
+ test_strv_skip_one(STRV_MAKE("foo", "bar", "baz"), 4, STRV_MAKE(NULL));
+ test_strv_skip_one(STRV_MAKE("foo", "bar", "baz"), 55, STRV_MAKE(NULL));
+
+ test_strv_skip_one(STRV_MAKE("quux"), 0, STRV_MAKE("quux"));
+ test_strv_skip_one(STRV_MAKE("quux"), 1, STRV_MAKE(NULL));
+ test_strv_skip_one(STRV_MAKE("quux"), 55, STRV_MAKE(NULL));
+
+ test_strv_skip_one(STRV_MAKE(NULL), 0, STRV_MAKE(NULL));
+ test_strv_skip_one(STRV_MAKE(NULL), 1, STRV_MAKE(NULL));
+ test_strv_skip_one(STRV_MAKE(NULL), 55, STRV_MAKE(NULL));
+}
+
+static void test_strv_extend_n(void) {
+ _cleanup_strv_free_ char **v = NULL;
+
+ log_info("/* %s */", __func__);
+
+ v = strv_new("foo", "bar");
+ assert_se(v);
+
+ assert_se(strv_extend_n(&v, "waldo", 3) >= 0);
+ assert_se(strv_extend_n(&v, "piep", 2) >= 0);
+
+ assert_se(streq(v[0], "foo"));
+ assert_se(streq(v[1], "bar"));
+ assert_se(streq(v[2], "waldo"));
+ assert_se(streq(v[3], "waldo"));
+ assert_se(streq(v[4], "waldo"));
+ assert_se(streq(v[5], "piep"));
+ assert_se(streq(v[6], "piep"));
+ assert_se(v[7] == NULL);
+
+ v = strv_free(v);
+
+ assert_se(strv_extend_n(&v, "foo", 1) >= 0);
+ assert_se(strv_extend_n(&v, "bar", 0) >= 0);
+
+ assert_se(streq(v[0], "foo"));
+ assert_se(v[1] == NULL);
+}
+
+static void test_strv_make_nulstr_one(char **l) {
+ _cleanup_free_ char *b = NULL, *c = NULL;
+ _cleanup_strv_free_ char **q = NULL;
+ const char *s = NULL;
+ size_t n, m;
+ unsigned i = 0;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(strv_make_nulstr(l, &b, &n) >= 0);
+ assert_se(q = strv_parse_nulstr(b, n));
+ assert_se(strv_equal(l, q));
+
+ assert_se(strv_make_nulstr(q, &c, &m) >= 0);
+ assert_se(m == n);
+ assert_se(memcmp(b, c, m) == 0);
+
+ NULSTR_FOREACH(s, b)
+ assert_se(streq(s, l[i++]));
+ assert_se(i == strv_length(l));
+}
+
+static void test_strv_make_nulstr(void) {
+ log_info("/* %s */", __func__);
+
+ test_strv_make_nulstr_one(NULL);
+ test_strv_make_nulstr_one(STRV_MAKE(NULL));
+ test_strv_make_nulstr_one(STRV_MAKE("foo"));
+ test_strv_make_nulstr_one(STRV_MAKE("foo", "bar"));
+ test_strv_make_nulstr_one(STRV_MAKE("foo", "bar", "quuux"));
+}
+
+static void test_strv_free_free(void) {
+ char ***t;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(t = new(char**, 3));
+ assert_se(t[0] = strv_new("a", "b"));
+ assert_se(t[1] = strv_new("c", "d", "e"));
+ t[2] = NULL;
+
+ t = strv_free_free(t);
+}
+
+static void test_foreach_string(void) {
+ const char * const t[] = {
+ "foo",
+ "bar",
+ "waldo",
+ NULL
+ };
+ const char *x;
+ unsigned i = 0;
+
+ log_info("/* %s */", __func__);
+
+ FOREACH_STRING(x, "foo", "bar", "waldo")
+ assert_se(streq_ptr(t[i++], x));
+
+ assert_se(i == 3);
+
+ FOREACH_STRING(x, "zzz")
+ assert_se(streq(x, "zzz"));
+}
+
+static void test_strv_fnmatch(void) {
+ _cleanup_strv_free_ char **v = NULL;
+ size_t pos;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(!strv_fnmatch(STRV_MAKE_EMPTY, "a"));
+
+ v = strv_new("xxx", "*\\*", "yyy");
+ assert_se(!strv_fnmatch_full(v, "\\", 0, NULL));
+ assert_se(strv_fnmatch_full(v, "\\", FNM_NOESCAPE, &pos));
+ assert(pos == 1);
+}
+
+int main(int argc, char *argv[]) {
+ test_specifier_printf();
+ test_str_in_set();
+ test_strptr_in_set();
+ test_startswith_set();
+ test_strv_foreach();
+ test_strv_foreach_backwards();
+ test_strv_foreach_pair();
+ test_strv_find();
+ test_strv_find_prefix();
+ test_strv_find_startswith();
+ test_strv_join();
+ test_strv_join_full();
+
+ test_strv_unquote(" foo=bar \"waldo\" zzz ", STRV_MAKE("foo=bar", "waldo", "zzz"));
+ test_strv_unquote("", STRV_MAKE_EMPTY);
+ test_strv_unquote(" ", STRV_MAKE_EMPTY);
+ test_strv_unquote(" ", STRV_MAKE_EMPTY);
+ test_strv_unquote(" x", STRV_MAKE("x"));
+ test_strv_unquote("x ", STRV_MAKE("x"));
+ test_strv_unquote(" x ", STRV_MAKE("x"));
+ test_strv_unquote(" \"x\" ", STRV_MAKE("x"));
+ test_strv_unquote(" 'x' ", STRV_MAKE("x"));
+ test_strv_unquote(" 'x\"' ", STRV_MAKE("x\""));
+ test_strv_unquote(" \"x'\" ", STRV_MAKE("x'"));
+ test_strv_unquote("a '--b=c \"d e\"'", STRV_MAKE("a", "--b=c \"d e\""));
+
+ /* trailing backslashes */
+ test_strv_unquote(" x\\\\", STRV_MAKE("x\\"));
+ test_invalid_unquote(" x\\");
+
+ test_invalid_unquote("a --b='c \"d e\"''");
+ test_invalid_unquote("a --b='c \"d e\" '\"");
+ test_invalid_unquote("a --b='c \"d e\"garbage");
+ test_invalid_unquote("'");
+ test_invalid_unquote("\"");
+ test_invalid_unquote("'x'y'g");
+
+ test_strv_split();
+ test_strv_split_empty();
+ test_strv_split_full();
+ test_strv_split_colon_pairs();
+ test_strv_split_newlines();
+ test_strv_split_nulstr();
+ test_strv_parse_nulstr();
+ test_strv_overlap();
+ test_strv_sort();
+ test_strv_extend_strv();
+ test_strv_extend_strv_concat();
+ test_strv_extend();
+ test_strv_extendf();
+ test_strv_from_stdarg_alloca();
+ test_strv_insert();
+ test_strv_push_prepend();
+ test_strv_push();
+ test_strv_compare();
+ test_strv_is_uniq();
+ test_strv_reverse();
+ test_strv_shell_escape();
+ test_strv_skip();
+ test_strv_extend_n();
+ test_strv_make_nulstr();
+ test_strv_free_free();
+
+ test_foreach_string();
+ test_strv_fnmatch();
+
+ return 0;
+}
diff --git a/src/test/test-strxcpyx.c b/src/test/test-strxcpyx.c
new file mode 100644
index 0000000..4b6d8eb
--- /dev/null
+++ b/src/test/test-strxcpyx.c
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "util.h"
+
+static void test_strpcpy(void) {
+ char target[25];
+ char *s = target;
+ size_t space_left;
+
+ space_left = sizeof(target);
+ space_left = strpcpy(&s, space_left, "12345");
+ space_left = strpcpy(&s, space_left, "hey hey hey");
+ space_left = strpcpy(&s, space_left, "waldo");
+ space_left = strpcpy(&s, space_left, "ba");
+ space_left = strpcpy(&s, space_left, "r");
+ space_left = strpcpy(&s, space_left, "foo");
+
+ assert_se(streq(target, "12345hey hey heywaldobar"));
+ assert_se(space_left == 0);
+}
+
+static void test_strpcpyf(void) {
+ char target[25];
+ char *s = target;
+ size_t space_left;
+
+ space_left = sizeof(target);
+ space_left = strpcpyf(&s, space_left, "space left: %zu. ", space_left);
+ space_left = strpcpyf(&s, space_left, "foo%s", "bar");
+
+ assert_se(streq(target, "space left: 25. foobar"));
+ assert_se(space_left == 3);
+
+ /* test overflow */
+ s = target;
+ space_left = strpcpyf(&s, 12, "00 left: %i. ", 999);
+ assert_se(streq(target, "00 left: 99"));
+ assert_se(space_left == 0);
+ assert_se(target[12] == '2');
+}
+
+static void test_strpcpyl(void) {
+ char target[25];
+ char *s = target;
+ size_t space_left;
+
+ space_left = sizeof(target);
+ space_left = strpcpyl(&s, space_left, "waldo", " test", " waldo. ", NULL);
+ space_left = strpcpyl(&s, space_left, "Banana", NULL);
+
+ assert_se(streq(target, "waldo test waldo. Banana"));
+ assert_se(space_left == 1);
+}
+
+static void test_strscpy(void) {
+ char target[25];
+ size_t space_left;
+
+ space_left = sizeof(target);
+ space_left = strscpy(target, space_left, "12345");
+
+ assert_se(streq(target, "12345"));
+ assert_se(space_left == 20);
+}
+
+static void test_strscpyl(void) {
+ char target[25];
+ size_t space_left;
+
+ space_left = sizeof(target);
+ space_left = strscpyl(target, space_left, "12345", "waldo", "waldo", NULL);
+
+ assert_se(streq(target, "12345waldowaldo"));
+ assert_se(space_left == 10);
+}
+
+static void test_sd_event_code_migration(void) {
+ char b[100 * DECIMAL_STR_MAX(unsigned) + 1];
+ char c[100 * DECIMAL_STR_MAX(unsigned) + 1], *p;
+ unsigned i;
+ size_t l;
+ int o;
+
+ for (i = o = 0; i < 100; i++)
+ o += snprintf(&b[o], sizeof(b) - o, "%u ", i);
+
+ p = c;
+ l = sizeof(c);
+ for (i = 0; i < 100; i++)
+ l = strpcpyf(&p, l, "%u ", i);
+
+ assert_se(streq(b, c));
+}
+
+int main(int argc, char *argv[]) {
+ test_strpcpy();
+ test_strpcpyf();
+ test_strpcpyl();
+ test_strscpy();
+ test_strscpyl();
+
+ test_sd_event_code_migration();
+
+ return 0;
+}
diff --git a/src/test/test-sysctl-util.c b/src/test/test-sysctl-util.c
new file mode 100644
index 0000000..a6f449b
--- /dev/null
+++ b/src/test/test-sysctl-util.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "strv.h"
+#include "sysctl-util.h"
+#include "tests.h"
+
+static const char* cases[] = {
+ "a.b.c", "a/b/c",
+ "a/b/c", "a/b/c",
+ "a/b.c/d", "a/b.c/d",
+ "a.b/c.d", "a/b.c/d",
+
+ "net.ipv4.conf.enp3s0/200.forwarding", "net/ipv4/conf/enp3s0.200/forwarding",
+ "net/ipv4/conf/enp3s0.200/forwarding", "net/ipv4/conf/enp3s0.200/forwarding",
+
+ "a...b...c", "a/b/c",
+ "a///b///c", "a/b/c",
+ ".a...b...c", "a/b/c",
+ "/a///b///c", "a/b/c",
+ NULL,
+};
+
+static void test_sysctl_normalize(void) {
+ log_info("/* %s */", __func__);
+
+ const char **s, **expected;
+ STRV_FOREACH_PAIR(s, expected, cases) {
+ _cleanup_free_ char *t;
+
+ assert_se(t = strdup(*s));
+ assert_se(sysctl_normalize(t) == t);
+
+ log_info("\"%s\" → \"%s\", expected \"%s\"", *s, t, *expected);
+ assert_se(streq(t, *expected));
+ }
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_INFO);
+
+ test_sysctl_normalize();
+
+ return 0;
+}
diff --git a/src/test/test-systemd-tmpfiles.py b/src/test/test-systemd-tmpfiles.py
new file mode 100755
index 0000000..255922d
--- /dev/null
+++ b/src/test/test-systemd-tmpfiles.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+import os
+import sys
+import socket
+import subprocess
+import tempfile
+import pwd
+import grp
+
+try:
+ from systemd import id128
+except ImportError:
+ id128 = None
+
+EX_DATAERR = 65 # from sysexits.h
+EXIT_TEST_SKIP = 77
+
+try:
+ subprocess.run
+except AttributeError:
+ sys.exit(EXIT_TEST_SKIP)
+
+exe_with_args = sys.argv[1:]
+
+def test_line(line, *, user, returncode=EX_DATAERR, extra={}):
+ args = ['--user'] if user else []
+ print('Running {} on {!r}'.format(' '.join(exe_with_args + args), line))
+ c = subprocess.run(exe_with_args + ['--create', '-'] + args,
+ input=line, stdout=subprocess.PIPE, universal_newlines=True,
+ **extra)
+ assert c.returncode == returncode, c
+
+def test_invalids(*, user):
+ test_line('asdfa', user=user)
+ test_line('f "open quote', user=user)
+ test_line('f closed quote""', user=user)
+ test_line('Y /unknown/letter', user=user)
+ test_line('w non/absolute/path', user=user)
+ test_line('s', user=user) # s is for short
+ test_line('f!! /too/many/bangs', user=user)
+ test_line('f++ /too/many/plusses', user=user)
+ test_line('f+!+ /too/many/plusses', user=user)
+ test_line('f!+! /too/many/bangs', user=user)
+ test_line('w /unresolved/argument - - - - "%Y"', user=user)
+ test_line('w /unresolved/argument/sandwich - - - - "%v%Y%v"', user=user)
+ test_line('w /unresolved/filename/%Y - - - - "whatever"', user=user)
+ test_line('w /unresolved/filename/sandwich/%v%Y%v - - - - "whatever"', user=user)
+ test_line('w - - - - - "no file specified"', user=user)
+ test_line('C - - - - - "no file specified"', user=user)
+ test_line('C non/absolute/path - - - - -', user=user)
+ test_line('b - - - - - -', user=user)
+ test_line('b 1234 - - - - -', user=user)
+ test_line('c - - - - - -', user=user)
+ test_line('c 1234 - - - - -', user=user)
+ test_line('t - - -', user=user)
+ test_line('T - - -', user=user)
+ test_line('a - - -', user=user)
+ test_line('A - - -', user=user)
+ test_line('h - - -', user=user)
+ test_line('H - - -', user=user)
+
+def test_uninitialized_t():
+ if os.getuid() == 0:
+ return
+
+ test_line('w /foo - - - - "specifier for --user %t"',
+ user=True, returncode=0, extra={'env':{}})
+
+def test_content(line, expected, *, user, extra={}):
+ d = tempfile.TemporaryDirectory(prefix='test-systemd-tmpfiles.')
+ arg = d.name + '/arg'
+ spec = line.format(arg)
+ test_line(spec, user=user, returncode=0, extra=extra)
+ content = open(arg).read()
+ print('expect: {!r}\nactual: {!r}'.format(expected, content))
+ assert content == expected
+
+def test_valid_specifiers(*, user):
+ test_content('f {} - - - - two words', 'two words', user=user)
+ if id128:
+ try:
+ test_content('f {} - - - - %m', '{}'.format(id128.get_machine().hex), user=user)
+ except AssertionError as e:
+ print(e)
+ print('/etc/machine-id: {!r}'.format(open('/etc/machine-id').read()))
+ print('/proc/cmdline: {!r}'.format(open('/proc/cmdline').read()))
+ print('skipping')
+ test_content('f {} - - - - %b', '{}'.format(id128.get_boot().hex), user=user)
+ test_content('f {} - - - - %H', '{}'.format(socket.gethostname()), user=user)
+ test_content('f {} - - - - %v', '{}'.format(os.uname().release), user=user)
+ test_content('f {} - - - - %U', '{}'.format(os.getuid()), user=user)
+ test_content('f {} - - - - %G', '{}'.format(os.getgid()), user=user)
+
+ puser = pwd.getpwuid(os.getuid())
+ test_content('f {} - - - - %u', '{}'.format(puser.pw_name), user=user)
+
+ pgroup = grp.getgrgid(os.getgid())
+ test_content('f {} - - - - %g', '{}'.format(pgroup.gr_name), user=user)
+
+ # Note that %h is the only specifier in which we look the environment,
+ # because we check $HOME. Should we even be doing that?
+ home = os.path.expanduser("~")
+ test_content('f {} - - - - %h', '{}'.format(home), user=user)
+
+ xdg_runtime_dir = os.getenv('XDG_RUNTIME_DIR')
+ if xdg_runtime_dir is not None or not user:
+ test_content('f {} - - - - %t',
+ xdg_runtime_dir if user else '/run',
+ user=user)
+
+ xdg_config_home = os.getenv('XDG_CONFIG_HOME')
+ if xdg_config_home is not None or not user:
+ test_content('f {} - - - - %S',
+ xdg_config_home if user else '/var/lib',
+ user=user)
+
+ xdg_cache_home = os.getenv('XDG_CACHE_HOME')
+ if xdg_cache_home is not None or not user:
+ test_content('f {} - - - - %C',
+ xdg_cache_home if user else '/var/cache',
+ user=user)
+
+ if xdg_config_home is not None or not user:
+ test_content('f {} - - - - %L',
+ xdg_config_home + '/log' if user else '/var/log',
+ user=user)
+
+ test_content('f {} - - - - %%', '%', user=user)
+
+if __name__ == '__main__':
+ test_invalids(user=False)
+ test_invalids(user=True)
+ test_uninitialized_t()
+
+ test_valid_specifiers(user=False)
+ test_valid_specifiers(user=True)
diff --git a/src/test/test-tables.c b/src/test/test-tables.c
new file mode 100644
index 0000000..e25cf9e
--- /dev/null
+++ b/src/test/test-tables.c
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "architecture.h"
+#include "automount.h"
+#include "cgroup.h"
+#include "cgroup-util.h"
+#include "compress.h"
+#include "condition.h"
+#include "device-private.h"
+#include "device.h"
+#include "execute.h"
+#include "import-util.h"
+#include "install.h"
+#include "job.h"
+#include "journald-server.h"
+#include "kill.h"
+#include "link-config.h"
+#include "locale-util.h"
+#include "log.h"
+#include "logs-show.h"
+#include "machine-image.h"
+#include "mount.h"
+#include "path.h"
+#include "process-util.h"
+#include "resolve-util.h"
+#include "rlimit-util.h"
+#include "scope.h"
+#include "service.h"
+#include "show-status.h"
+#include "slice.h"
+#include "socket-util.h"
+#include "socket.h"
+#include "swap.h"
+#include "target.h"
+#include "test-tables.h"
+#include "timer.h"
+#include "unit-name.h"
+#include "unit.h"
+#include "util.h"
+#include "virt.h"
+
+int main(int argc, char **argv) {
+ test_table(architecture, ARCHITECTURE);
+ test_table(assert_type, CONDITION_TYPE);
+ test_table(automount_result, AUTOMOUNT_RESULT);
+ test_table(automount_state, AUTOMOUNT_STATE);
+ test_table(cgroup_controller, CGROUP_CONTROLLER);
+ test_table(cgroup_device_policy, CGROUP_DEVICE_POLICY);
+ test_table(cgroup_io_limit_type, CGROUP_IO_LIMIT_TYPE);
+ test_table(collect_mode, COLLECT_MODE);
+ test_table(condition_result, CONDITION_RESULT);
+ test_table(condition_type, CONDITION_TYPE);
+ test_table(device_action, DEVICE_ACTION);
+ test_table(device_state, DEVICE_STATE);
+ test_table(dns_over_tls_mode, DNS_OVER_TLS_MODE);
+ test_table(dnssec_mode, DNSSEC_MODE);
+ test_table(emergency_action, EMERGENCY_ACTION);
+ test_table(exec_directory_type, EXEC_DIRECTORY_TYPE);
+ test_table(exec_input, EXEC_INPUT);
+ test_table(exec_keyring_mode, EXEC_KEYRING_MODE);
+ test_table(exec_output, EXEC_OUTPUT);
+ test_table(exec_preserve_mode, EXEC_PRESERVE_MODE);
+ test_table(exec_utmp_mode, EXEC_UTMP_MODE);
+ test_table(image_type, IMAGE_TYPE);
+ test_table(import_verify, IMPORT_VERIFY);
+ test_table(job_mode, JOB_MODE);
+ test_table(job_result, JOB_RESULT);
+ test_table(job_state, JOB_STATE);
+ test_table(job_type, JOB_TYPE);
+ test_table(kill_mode, KILL_MODE);
+ test_table(kill_who, KILL_WHO);
+ test_table(locale_variable, VARIABLE_LC);
+ test_table(log_target, LOG_TARGET);
+ test_table(mac_address_policy, MAC_ADDRESS_POLICY);
+ test_table(managed_oom_mode, MANAGED_OOM_MODE);
+ test_table(manager_state, MANAGER_STATE);
+ test_table(manager_timestamp, MANAGER_TIMESTAMP);
+ test_table(mount_exec_command, MOUNT_EXEC_COMMAND);
+ test_table(mount_result, MOUNT_RESULT);
+ test_table(mount_state, MOUNT_STATE);
+ test_table(name_policy, NAMEPOLICY);
+ test_table(namespace_type, NAMESPACE_TYPE);
+ test_table(notify_access, NOTIFY_ACCESS);
+ test_table(notify_state, NOTIFY_STATE);
+ test_table(output_mode, OUTPUT_MODE);
+ test_table(partition_designator, PARTITION_DESIGNATOR);
+ test_table(path_result, PATH_RESULT);
+ test_table(path_state, PATH_STATE);
+ test_table(path_type, PATH_TYPE);
+ test_table(protect_home, PROTECT_HOME);
+ test_table(protect_system, PROTECT_SYSTEM);
+ test_table(resolve_support, RESOLVE_SUPPORT);
+ test_table(rlimit, RLIMIT);
+ test_table(scope_result, SCOPE_RESULT);
+ test_table(scope_state, SCOPE_STATE);
+ test_table(service_exec_command, SERVICE_EXEC_COMMAND);
+ test_table(service_restart, SERVICE_RESTART);
+ test_table(service_result, SERVICE_RESULT);
+ test_table(service_state, SERVICE_STATE);
+ test_table(service_type, SERVICE_TYPE);
+ test_table(show_status, SHOW_STATUS);
+ test_table(slice_state, SLICE_STATE);
+ test_table(socket_address_bind_ipv6_only, SOCKET_ADDRESS_BIND_IPV6_ONLY);
+ test_table(socket_exec_command, SOCKET_EXEC_COMMAND);
+ test_table(socket_result, SOCKET_RESULT);
+ test_table(socket_state, SOCKET_STATE);
+ test_table(split_mode, SPLIT);
+ test_table(storage, STORAGE);
+ test_table(swap_exec_command, SWAP_EXEC_COMMAND);
+ test_table(swap_result, SWAP_RESULT);
+ test_table(swap_state, SWAP_STATE);
+ test_table(target_state, TARGET_STATE);
+ test_table(timer_base, TIMER_BASE);
+ test_table(timer_result, TIMER_RESULT);
+ test_table(timer_state, TIMER_STATE);
+ test_table(unit_active_state, UNIT_ACTIVE_STATE);
+ test_table(unit_dependency, UNIT_DEPENDENCY);
+ test_table(unit_file_change_type, UNIT_FILE_CHANGE_TYPE);
+ test_table(unit_file_preset_mode, UNIT_FILE_PRESET);
+ test_table(unit_file_state, UNIT_FILE_STATE);
+ test_table(unit_load_state, UNIT_LOAD_STATE);
+ test_table(unit_type, UNIT_TYPE);
+ test_table(virtualization, VIRTUALIZATION);
+
+ test_table_sparse(object_compressed, OBJECT_COMPRESSED);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-terminal-util.c b/src/test/test-terminal-util.c
new file mode 100644
index 0000000..508f0c0
--- /dev/null
+++ b/src/test/test-terminal-util.c
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "path-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+#define LOREM_IPSUM "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor " \
+ "incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation " \
+ "ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit " \
+ "in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat " \
+ "non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
+
+static void test_default_term_for_tty(void) {
+ log_info("/* %s */", __func__);
+
+ puts(default_term_for_tty("/dev/tty23"));
+ puts(default_term_for_tty("/dev/ttyS23"));
+ puts(default_term_for_tty("/dev/tty0"));
+ puts(default_term_for_tty("/dev/pty0"));
+ puts(default_term_for_tty("/dev/pts/0"));
+ puts(default_term_for_tty("/dev/console"));
+ puts(default_term_for_tty("tty23"));
+ puts(default_term_for_tty("ttyS23"));
+ puts(default_term_for_tty("tty0"));
+ puts(default_term_for_tty("pty0"));
+ puts(default_term_for_tty("pts/0"));
+ puts(default_term_for_tty("console"));
+}
+
+static void test_read_one_char(void) {
+ _cleanup_fclose_ FILE *file = NULL;
+ char r;
+ bool need_nl;
+ char name[] = "/tmp/test-read_one_char.XXXXXX";
+
+ log_info("/* %s */", __func__);
+
+ assert_se(fmkostemp_safe(name, "r+", &file) == 0);
+
+ assert_se(fputs("c\n", file) >= 0);
+ rewind(file);
+ assert_se(read_one_char(file, &r, 1000000, &need_nl) >= 0);
+ assert_se(!need_nl);
+ assert_se(r == 'c');
+ assert_se(read_one_char(file, &r, 1000000, &need_nl) < 0);
+
+ rewind(file);
+ assert_se(fputs("foobar\n", file) >= 0);
+ rewind(file);
+ assert_se(read_one_char(file, &r, 1000000, &need_nl) < 0);
+
+ rewind(file);
+ assert_se(fputs("\n", file) >= 0);
+ rewind(file);
+ assert_se(read_one_char(file, &r, 1000000, &need_nl) < 0);
+
+ assert_se(unlink(name) >= 0);
+}
+
+static void test_getttyname_malloc(void) {
+ _cleanup_free_ char *ttyname = NULL;
+ _cleanup_close_ int master = -1;
+
+ log_info("/* %s */", __func__);
+
+ assert_se((master = posix_openpt(O_RDWR|O_NOCTTY)) >= 0);
+ assert_se(getttyname_malloc(master, &ttyname) >= 0);
+ log_info("ttyname = %s", ttyname);
+
+ assert_se(PATH_IN_SET(ttyname, "ptmx", "pts/ptmx"));
+}
+
+typedef struct {
+ const char *name;
+ const char* (*func)(void);
+} Color;
+
+static const Color colors[] = {
+ { "normal", ansi_normal },
+ { "highlight", ansi_highlight },
+ { "black", ansi_black },
+ { "red", ansi_red },
+ { "green", ansi_green },
+ { "yellow", ansi_yellow },
+ { "blue", ansi_blue },
+ { "magenta", ansi_magenta },
+ { "cyan", ansi_cyan },
+ { "white", ansi_white },
+ { "grey", ansi_grey },
+
+ { "bright-black", ansi_bright_black },
+ { "bright-red", ansi_bright_red },
+ { "bright-green", ansi_bright_green },
+ { "bright-yellow", ansi_bright_yellow },
+ { "bright-blue", ansi_bright_blue },
+ { "bright-magenta", ansi_bright_magenta },
+ { "bright-cyan", ansi_bright_cyan },
+ { "bright-white", ansi_bright_white },
+
+ { "highlight-black", ansi_highlight_black },
+ { "highlight-red", ansi_highlight_red },
+ { "highlight-green", ansi_highlight_green },
+ { "highlight-yellow (original)", _ansi_highlight_yellow },
+ { "highlight-yellow (replacement)", ansi_highlight_yellow },
+ { "highlight-blue", ansi_highlight_blue },
+ { "highlight-magenta", ansi_highlight_magenta },
+ { "highlight-cyan", ansi_highlight_cyan },
+ { "highlight-white", ansi_highlight_white },
+ { "highlight-grey", ansi_highlight_grey },
+
+ { "underline", ansi_underline },
+ { "highlight-underline", ansi_highlight_underline },
+ { "highlight-red-underline", ansi_highlight_red_underline },
+ { "highlight-green-underline", ansi_highlight_green_underline },
+ { "highlight-yellow-underline", ansi_highlight_yellow_underline },
+ { "highlight-blue-underline", ansi_highlight_blue_underline },
+ { "highlight-magenta-underline", ansi_highlight_magenta_underline },
+ { "highlight-grey-underline", ansi_highlight_grey_underline },
+};
+
+static void test_colors(void) {
+ log_info("/* %s */", __func__);
+
+ for (size_t i = 0; i < ELEMENTSOF(colors); i++)
+ printf("<%s%s%s>\n", colors[i].func(), colors[i].name, ansi_normal());
+}
+
+static void test_text(void) {
+ log_info("/* %s */", __func__);
+
+ for (size_t i = 0; !streq(colors[i].name, "underline"); i++) {
+ bool blwh = strstr(colors[i].name, "black")
+ || strstr(colors[i].name, "white");
+
+ printf("\n"
+ "Testing color %s%s\n%s%s%s\n",
+ colors[i].name,
+ blwh ? "" : ", this text should be readable",
+ colors[i].func(),
+ LOREM_IPSUM,
+ ansi_normal());
+ }
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_INFO);
+
+ test_default_term_for_tty();
+ test_read_one_char();
+ test_getttyname_malloc();
+ test_colors();
+ test_text();
+
+ return 0;
+}
diff --git a/src/test/test-time-util.c b/src/test/test-time-util.c
new file mode 100644
index 0000000..cc391e8
--- /dev/null
+++ b/src/test/test-time-util.c
@@ -0,0 +1,557 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "random-util.h"
+#include "serialize.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "time-util.h"
+
+static void test_parse_sec(void) {
+ usec_t u;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(parse_sec("5s", &u) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC);
+ assert_se(parse_sec("5s500ms", &u) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC + 500 * USEC_PER_MSEC);
+ assert_se(parse_sec(" 5s 500ms ", &u) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC + 500 * USEC_PER_MSEC);
+ assert_se(parse_sec(" 5.5s ", &u) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC + 500 * USEC_PER_MSEC);
+ assert_se(parse_sec(" 5.5s 0.5ms ", &u) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC + 500 * USEC_PER_MSEC + 500);
+ assert_se(parse_sec(" .22s ", &u) >= 0);
+ assert_se(u == 220 * USEC_PER_MSEC);
+ assert_se(parse_sec(" .50y ", &u) >= 0);
+ assert_se(u == USEC_PER_YEAR / 2);
+ assert_se(parse_sec("2.5", &u) >= 0);
+ assert_se(u == 2500 * USEC_PER_MSEC);
+ assert_se(parse_sec(".7", &u) >= 0);
+ assert_se(u == 700 * USEC_PER_MSEC);
+ assert_se(parse_sec("23us", &u) >= 0);
+ assert_se(u == 23);
+ assert_se(parse_sec("23µs", &u) >= 0);
+ assert_se(u == 23);
+ assert_se(parse_sec("infinity", &u) >= 0);
+ assert_se(u == USEC_INFINITY);
+ assert_se(parse_sec(" infinity ", &u) >= 0);
+ assert_se(u == USEC_INFINITY);
+ assert_se(parse_sec("+3.1s", &u) >= 0);
+ assert_se(u == 3100 * USEC_PER_MSEC);
+ assert_se(parse_sec("3.1s.2", &u) >= 0);
+ assert_se(u == 3300 * USEC_PER_MSEC);
+ assert_se(parse_sec("3.1 .2", &u) >= 0);
+ assert_se(u == 3300 * USEC_PER_MSEC);
+ assert_se(parse_sec("3.1 sec .2 sec", &u) >= 0);
+ assert_se(u == 3300 * USEC_PER_MSEC);
+ assert_se(parse_sec("3.1 sec 1.2 sec", &u) >= 0);
+ assert_se(u == 4300 * USEC_PER_MSEC);
+
+ assert_se(parse_sec(" xyz ", &u) < 0);
+ assert_se(parse_sec("", &u) < 0);
+ assert_se(parse_sec(" . ", &u) < 0);
+ assert_se(parse_sec(" 5. ", &u) < 0);
+ assert_se(parse_sec(".s ", &u) < 0);
+ assert_se(parse_sec("-5s ", &u) < 0);
+ assert_se(parse_sec("-0.3s ", &u) < 0);
+ assert_se(parse_sec("-0.0s ", &u) < 0);
+ assert_se(parse_sec("-0.-0s ", &u) < 0);
+ assert_se(parse_sec("0.-0s ", &u) < 0);
+ assert_se(parse_sec("3.-0s ", &u) < 0);
+ assert_se(parse_sec(" infinity .7", &u) < 0);
+ assert_se(parse_sec(".3 infinity", &u) < 0);
+ assert_se(parse_sec("3.+1s", &u) < 0);
+ assert_se(parse_sec("3. 1s", &u) < 0);
+ assert_se(parse_sec("3.s", &u) < 0);
+ assert_se(parse_sec("12.34.56", &u) < 0);
+ assert_se(parse_sec("12..34", &u) < 0);
+ assert_se(parse_sec("..1234", &u) < 0);
+ assert_se(parse_sec("1234..", &u) < 0);
+}
+
+static void test_parse_sec_fix_0(void) {
+ usec_t u;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(parse_sec_fix_0("5s", &u) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC);
+ assert_se(parse_sec_fix_0("0s", &u) >= 0);
+ assert_se(u == USEC_INFINITY);
+ assert_se(parse_sec_fix_0("0", &u) >= 0);
+ assert_se(u == USEC_INFINITY);
+ assert_se(parse_sec_fix_0(" 0", &u) >= 0);
+ assert_se(u == USEC_INFINITY);
+}
+
+static void test_parse_sec_def_infinity(void) {
+ usec_t u;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(parse_sec_def_infinity("5s", &u) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC);
+ assert_se(parse_sec_def_infinity("", &u) >= 0);
+ assert_se(u == USEC_INFINITY);
+ assert_se(parse_sec_def_infinity(" ", &u) >= 0);
+ assert_se(u == USEC_INFINITY);
+ assert_se(parse_sec_def_infinity("0s", &u) >= 0);
+ assert_se(u == 0);
+ assert_se(parse_sec_def_infinity("0", &u) >= 0);
+ assert_se(u == 0);
+ assert_se(parse_sec_def_infinity(" 0", &u) >= 0);
+ assert_se(u == 0);
+ assert_se(parse_sec_def_infinity("-5s", &u) < 0);
+}
+
+static void test_parse_time(void) {
+ usec_t u;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(parse_time("5", &u, 1) >= 0);
+ assert_se(u == 5);
+
+ assert_se(parse_time("5", &u, USEC_PER_MSEC) >= 0);
+ assert_se(u == 5 * USEC_PER_MSEC);
+
+ assert_se(parse_time("5", &u, USEC_PER_SEC) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC);
+
+ assert_se(parse_time("5s", &u, 1) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC);
+
+ assert_se(parse_time("5s", &u, USEC_PER_SEC) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC);
+
+ assert_se(parse_time("5s", &u, USEC_PER_MSEC) >= 0);
+ assert_se(u == 5 * USEC_PER_SEC);
+
+ assert_se(parse_time("11111111111111y", &u, 1) == -ERANGE);
+ assert_se(parse_time("1.1111111111111y", &u, 1) >= 0);
+}
+
+static void test_parse_nsec(void) {
+ nsec_t u;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(parse_nsec("5s", &u) >= 0);
+ assert_se(u == 5 * NSEC_PER_SEC);
+ assert_se(parse_nsec("5s500ms", &u) >= 0);
+ assert_se(u == 5 * NSEC_PER_SEC + 500 * NSEC_PER_MSEC);
+ assert_se(parse_nsec(" 5s 500ms ", &u) >= 0);
+ assert_se(u == 5 * NSEC_PER_SEC + 500 * NSEC_PER_MSEC);
+ assert_se(parse_nsec(" 5.5s ", &u) >= 0);
+ assert_se(u == 5 * NSEC_PER_SEC + 500 * NSEC_PER_MSEC);
+ assert_se(parse_nsec(" 5.5s 0.5ms ", &u) >= 0);
+ assert_se(u == 5 * NSEC_PER_SEC + 500 * NSEC_PER_MSEC + 500 * NSEC_PER_USEC);
+ assert_se(parse_nsec(" .22s ", &u) >= 0);
+ assert_se(u == 220 * NSEC_PER_MSEC);
+ assert_se(parse_nsec(" .50y ", &u) >= 0);
+ assert_se(u == NSEC_PER_YEAR / 2);
+ assert_se(parse_nsec("2.5", &u) >= 0);
+ assert_se(u == 2);
+ assert_se(parse_nsec(".7", &u) >= 0);
+ assert_se(u == 0);
+ assert_se(parse_nsec("infinity", &u) >= 0);
+ assert_se(u == NSEC_INFINITY);
+ assert_se(parse_nsec(" infinity ", &u) >= 0);
+ assert_se(u == NSEC_INFINITY);
+ assert_se(parse_nsec("+3.1s", &u) >= 0);
+ assert_se(u == 3100 * NSEC_PER_MSEC);
+ assert_se(parse_nsec("3.1s.2", &u) >= 0);
+ assert_se(u == 3100 * NSEC_PER_MSEC);
+ assert_se(parse_nsec("3.1 .2s", &u) >= 0);
+ assert_se(u == 200 * NSEC_PER_MSEC + 3);
+ assert_se(parse_nsec("3.1 sec .2 sec", &u) >= 0);
+ assert_se(u == 3300 * NSEC_PER_MSEC);
+ assert_se(parse_nsec("3.1 sec 1.2 sec", &u) >= 0);
+ assert_se(u == 4300 * NSEC_PER_MSEC);
+
+ assert_se(parse_nsec(" xyz ", &u) < 0);
+ assert_se(parse_nsec("", &u) < 0);
+ assert_se(parse_nsec(" . ", &u) < 0);
+ assert_se(parse_nsec(" 5. ", &u) < 0);
+ assert_se(parse_nsec(".s ", &u) < 0);
+ assert_se(parse_nsec(" infinity .7", &u) < 0);
+ assert_se(parse_nsec(".3 infinity", &u) < 0);
+ assert_se(parse_nsec("-5s ", &u) < 0);
+ assert_se(parse_nsec("-0.3s ", &u) < 0);
+ assert_se(parse_nsec("-0.0s ", &u) < 0);
+ assert_se(parse_nsec("-0.-0s ", &u) < 0);
+ assert_se(parse_nsec("0.-0s ", &u) < 0);
+ assert_se(parse_nsec("3.-0s ", &u) < 0);
+ assert_se(parse_nsec(" infinity .7", &u) < 0);
+ assert_se(parse_nsec(".3 infinity", &u) < 0);
+ assert_se(parse_nsec("3.+1s", &u) < 0);
+ assert_se(parse_nsec("3. 1s", &u) < 0);
+ assert_se(parse_nsec("3.s", &u) < 0);
+ assert_se(parse_nsec("12.34.56", &u) < 0);
+ assert_se(parse_nsec("12..34", &u) < 0);
+ assert_se(parse_nsec("..1234", &u) < 0);
+ assert_se(parse_nsec("1234..", &u) < 0);
+ assert_se(parse_nsec("1111111111111y", &u) == -ERANGE);
+ assert_se(parse_nsec("1.111111111111y", &u) >= 0);
+}
+
+static void test_format_timespan_one(usec_t x, usec_t accuracy) {
+ char l[FORMAT_TIMESPAN_MAX];
+ const char *t;
+ usec_t y;
+
+ log_info(USEC_FMT" (at accuracy "USEC_FMT")", x, accuracy);
+
+ assert_se(t = format_timespan(l, sizeof l, x, accuracy));
+ log_info(" = <%s>", t);
+
+ assert_se(parse_sec(t, &y) >= 0);
+ log_info(" = "USEC_FMT, y);
+
+ if (accuracy <= 0)
+ accuracy = 1;
+
+ assert_se(x / accuracy == y / accuracy);
+}
+
+static void test_format_timespan(usec_t accuracy) {
+ log_info("/* %s accuracy="USEC_FMT" */", __func__, accuracy);
+
+ test_format_timespan_one(0, accuracy);
+ test_format_timespan_one(1, accuracy);
+ test_format_timespan_one(1*USEC_PER_SEC, accuracy);
+ test_format_timespan_one(999*USEC_PER_MSEC, accuracy);
+ test_format_timespan_one(1234567, accuracy);
+ test_format_timespan_one(12, accuracy);
+ test_format_timespan_one(123, accuracy);
+ test_format_timespan_one(1234, accuracy);
+ test_format_timespan_one(12345, accuracy);
+ test_format_timespan_one(123456, accuracy);
+ test_format_timespan_one(1234567, accuracy);
+ test_format_timespan_one(12345678, accuracy);
+ test_format_timespan_one(1200000, accuracy);
+ test_format_timespan_one(1230000, accuracy);
+ test_format_timespan_one(1234000, accuracy);
+ test_format_timespan_one(1234500, accuracy);
+ test_format_timespan_one(1234560, accuracy);
+ test_format_timespan_one(1234567, accuracy);
+ test_format_timespan_one(986087, accuracy);
+ test_format_timespan_one(500 * USEC_PER_MSEC, accuracy);
+ test_format_timespan_one(9*USEC_PER_YEAR/5 - 23, accuracy);
+ test_format_timespan_one(USEC_INFINITY, accuracy);
+}
+
+static void test_timezone_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(timezone_is_valid("Europe/Berlin", LOG_ERR));
+ assert_se(timezone_is_valid("Australia/Sydney", LOG_ERR));
+ assert_se(!timezone_is_valid("Europe/Do not exist", LOG_ERR));
+}
+
+static void test_get_timezones(void) {
+ _cleanup_strv_free_ char **zones = NULL;
+ int r;
+ char **zone;
+
+ log_info("/* %s */", __func__);
+
+ r = get_timezones(&zones);
+ assert_se(r == 0);
+
+ STRV_FOREACH(zone, zones) {
+ log_info("zone: %s", *zone);
+ assert_se(timezone_is_valid(*zone, LOG_ERR));
+ }
+}
+
+static void test_usec_add(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(usec_add(0, 0) == 0);
+ assert_se(usec_add(1, 4) == 5);
+ assert_se(usec_add(USEC_INFINITY, 5) == USEC_INFINITY);
+ assert_se(usec_add(5, USEC_INFINITY) == USEC_INFINITY);
+ assert_se(usec_add(USEC_INFINITY-5, 2) == USEC_INFINITY-3);
+ assert_se(usec_add(USEC_INFINITY-2, 2) == USEC_INFINITY);
+ assert_se(usec_add(USEC_INFINITY-1, 2) == USEC_INFINITY);
+ assert_se(usec_add(USEC_INFINITY, 2) == USEC_INFINITY);
+}
+
+static void test_usec_sub_unsigned(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(usec_sub_unsigned(0, 0) == 0);
+ assert_se(usec_sub_unsigned(0, 2) == 0);
+ assert_se(usec_sub_unsigned(0, USEC_INFINITY) == 0);
+ assert_se(usec_sub_unsigned(1, 0) == 1);
+ assert_se(usec_sub_unsigned(1, 1) == 0);
+ assert_se(usec_sub_unsigned(1, 2) == 0);
+ assert_se(usec_sub_unsigned(1, 3) == 0);
+ assert_se(usec_sub_unsigned(1, USEC_INFINITY) == 0);
+ assert_se(usec_sub_unsigned(USEC_INFINITY-1, 0) == USEC_INFINITY-1);
+ assert_se(usec_sub_unsigned(USEC_INFINITY-1, 1) == USEC_INFINITY-2);
+ assert_se(usec_sub_unsigned(USEC_INFINITY-1, 2) == USEC_INFINITY-3);
+ assert_se(usec_sub_unsigned(USEC_INFINITY-1, USEC_INFINITY-2) == 1);
+ assert_se(usec_sub_unsigned(USEC_INFINITY-1, USEC_INFINITY-1) == 0);
+ assert_se(usec_sub_unsigned(USEC_INFINITY-1, USEC_INFINITY) == 0);
+ assert_se(usec_sub_unsigned(USEC_INFINITY, 0) == USEC_INFINITY);
+ assert_se(usec_sub_unsigned(USEC_INFINITY, 1) == USEC_INFINITY);
+ assert_se(usec_sub_unsigned(USEC_INFINITY, 2) == USEC_INFINITY);
+ assert_se(usec_sub_unsigned(USEC_INFINITY, USEC_INFINITY) == USEC_INFINITY);
+}
+
+static void test_usec_sub_signed(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(usec_sub_signed(0, 0) == 0);
+ assert_se(usec_sub_signed(4, 1) == 3);
+ assert_se(usec_sub_signed(4, 4) == 0);
+ assert_se(usec_sub_signed(4, 5) == 0);
+ assert_se(usec_sub_signed(USEC_INFINITY-3, -3) == USEC_INFINITY);
+ assert_se(usec_sub_signed(USEC_INFINITY-3, -4) == USEC_INFINITY);
+ assert_se(usec_sub_signed(USEC_INFINITY-3, -5) == USEC_INFINITY);
+ assert_se(usec_sub_signed(USEC_INFINITY, 5) == USEC_INFINITY);
+}
+
+static void test_format_timestamp(void) {
+ unsigned i;
+
+ log_info("/* %s */", __func__);
+
+ for (i = 0; i < 100; i++) {
+ char buf[MAX(FORMAT_TIMESTAMP_MAX, FORMAT_TIMESPAN_MAX)];
+ usec_t x, y;
+
+ random_bytes(&x, sizeof(x));
+ x = x % (2147483600 * USEC_PER_SEC) + 1;
+
+ assert_se(format_timestamp(buf, sizeof(buf), x));
+ log_info("%s", buf);
+ assert_se(parse_timestamp(buf, &y) >= 0);
+ assert_se(x / USEC_PER_SEC == y / USEC_PER_SEC);
+
+ assert_se(format_timestamp_style(buf, sizeof(buf), x, TIMESTAMP_UTC));
+ log_info("%s", buf);
+ assert_se(parse_timestamp(buf, &y) >= 0);
+ assert_se(x / USEC_PER_SEC == y / USEC_PER_SEC);
+
+ assert_se(format_timestamp_style(buf, sizeof(buf), x, TIMESTAMP_US));
+ log_info("%s", buf);
+ assert_se(parse_timestamp(buf, &y) >= 0);
+ assert_se(x == y);
+
+ assert_se(format_timestamp_style(buf, sizeof(buf), x, TIMESTAMP_US_UTC));
+ log_info("%s", buf);
+ assert_se(parse_timestamp(buf, &y) >= 0);
+ assert_se(x == y);
+
+ assert_se(format_timestamp_relative(buf, sizeof(buf), x));
+ log_info("%s", buf);
+ assert_se(parse_timestamp(buf, &y) >= 0);
+
+ /* The two calls above will run with a slightly different local time. Make sure we are in the same
+ * range however, but give enough leeway that this is unlikely to explode. And of course,
+ * format_timestamp_relative() scales the accuracy with the distance from the current time up to one
+ * month, cover for that too. */
+ assert_se(y > x ? y - x : x - y <= USEC_PER_MONTH + USEC_PER_DAY);
+ }
+}
+
+static void test_format_timestamp_utc_one(usec_t val, const char *result) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+ const char *t;
+
+ t = format_timestamp_style(buf, sizeof(buf), val, TIMESTAMP_UTC);
+ assert_se(streq_ptr(t, result));
+}
+
+static void test_format_timestamp_utc(void) {
+ log_info("/* %s */", __func__);
+
+ test_format_timestamp_utc_one(0, NULL);
+ test_format_timestamp_utc_one(1, "Thu 1970-01-01 00:00:00 UTC");
+ test_format_timestamp_utc_one(USEC_PER_SEC, "Thu 1970-01-01 00:00:01 UTC");
+
+#if SIZEOF_TIME_T == 8
+ test_format_timestamp_utc_one(USEC_TIMESTAMP_FORMATTABLE_MAX, "Thu 9999-12-30 23:59:59 UTC");
+ test_format_timestamp_utc_one(USEC_TIMESTAMP_FORMATTABLE_MAX + 1, "--- XXXX-XX-XX XX:XX:XX");
+#elif SIZEOF_TIME_T == 4
+ test_format_timestamp_utc_one(USEC_TIMESTAMP_FORMATTABLE_MAX, "Tue 2038-01-19 03:14:07 UTC");
+ test_format_timestamp_utc_one(USEC_TIMESTAMP_FORMATTABLE_MAX + 1, "--- XXXX-XX-XX XX:XX:XX");
+#endif
+
+ test_format_timestamp_utc_one(USEC_INFINITY, NULL);
+}
+
+static void test_deserialize_dual_timestamp(void) {
+ int r;
+ dual_timestamp t;
+
+ log_info("/* %s */", __func__);
+
+ r = deserialize_dual_timestamp("1234 5678", &t);
+ assert_se(r == 0);
+ assert_se(t.realtime == 1234);
+ assert_se(t.monotonic == 5678);
+
+ r = deserialize_dual_timestamp("1234x 5678", &t);
+ assert_se(r == -EINVAL);
+
+ r = deserialize_dual_timestamp("1234 5678y", &t);
+ assert_se(r == -EINVAL);
+
+ r = deserialize_dual_timestamp("-1234 5678", &t);
+ assert_se(r == -EINVAL);
+
+ r = deserialize_dual_timestamp("1234 -5678", &t);
+ assert_se(r == -EINVAL);
+
+ /* Check that output wasn't modified. */
+ assert_se(t.realtime == 1234);
+ assert_se(t.monotonic == 5678);
+
+ r = deserialize_dual_timestamp("+123 567", &t);
+ assert_se(r == 0);
+ assert_se(t.realtime == 123);
+ assert_se(t.monotonic == 567);
+
+ /* Check that we get "infinity" on overflow. */
+ r = deserialize_dual_timestamp("18446744073709551617 0", &t);
+ assert_se(r == 0);
+ assert_se(t.realtime == USEC_INFINITY);
+ assert_se(t.monotonic == 0);
+}
+
+static void assert_similar(usec_t a, usec_t b) {
+ usec_t d;
+
+ if (a > b)
+ d = a - b;
+ else
+ d = b - a;
+
+ assert_se(d < 10*USEC_PER_SEC);
+}
+
+static void test_usec_shift_clock(void) {
+ usec_t rt, mn, bt;
+
+ log_info("/* %s */", __func__);
+
+ rt = now(CLOCK_REALTIME);
+ mn = now(CLOCK_MONOTONIC);
+ bt = now(clock_boottime_or_monotonic());
+
+ assert_se(usec_shift_clock(USEC_INFINITY, CLOCK_REALTIME, CLOCK_MONOTONIC) == USEC_INFINITY);
+
+ assert_similar(usec_shift_clock(rt + USEC_PER_HOUR, CLOCK_REALTIME, CLOCK_MONOTONIC), mn + USEC_PER_HOUR);
+ assert_similar(usec_shift_clock(rt + 2*USEC_PER_HOUR, CLOCK_REALTIME, clock_boottime_or_monotonic()), bt + 2*USEC_PER_HOUR);
+ assert_se(usec_shift_clock(rt + 3*USEC_PER_HOUR, CLOCK_REALTIME, CLOCK_REALTIME_ALARM) == rt + 3*USEC_PER_HOUR);
+
+ assert_similar(usec_shift_clock(mn + 4*USEC_PER_HOUR, CLOCK_MONOTONIC, CLOCK_REALTIME_ALARM), rt + 4*USEC_PER_HOUR);
+ assert_similar(usec_shift_clock(mn + 5*USEC_PER_HOUR, CLOCK_MONOTONIC, clock_boottime_or_monotonic()), bt + 5*USEC_PER_HOUR);
+ assert_se(usec_shift_clock(mn + 6*USEC_PER_HOUR, CLOCK_MONOTONIC, CLOCK_MONOTONIC) == mn + 6*USEC_PER_HOUR);
+
+ assert_similar(usec_shift_clock(bt + 7*USEC_PER_HOUR, clock_boottime_or_monotonic(), CLOCK_MONOTONIC), mn + 7*USEC_PER_HOUR);
+ assert_similar(usec_shift_clock(bt + 8*USEC_PER_HOUR, clock_boottime_or_monotonic(), CLOCK_REALTIME_ALARM), rt + 8*USEC_PER_HOUR);
+ assert_se(usec_shift_clock(bt + 9*USEC_PER_HOUR, clock_boottime_or_monotonic(), clock_boottime_or_monotonic()) == bt + 9*USEC_PER_HOUR);
+
+ if (mn > USEC_PER_MINUTE) {
+ assert_similar(usec_shift_clock(rt - 30 * USEC_PER_SEC, CLOCK_REALTIME_ALARM, CLOCK_MONOTONIC), mn - 30 * USEC_PER_SEC);
+ assert_similar(usec_shift_clock(rt - 50 * USEC_PER_SEC, CLOCK_REALTIME, clock_boottime_or_monotonic()), bt - 50 * USEC_PER_SEC);
+ }
+}
+
+static void test_in_utc_timezone(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(setenv("TZ", ":UTC", 1) >= 0);
+ assert_se(in_utc_timezone());
+ assert_se(streq(tzname[0], "UTC"));
+ assert_se(streq(tzname[1], "UTC"));
+ assert_se(timezone == 0);
+ assert_se(daylight == 0);
+
+ assert_se(setenv("TZ", ":Europe/Berlin", 1) >= 0);
+ assert_se(!in_utc_timezone());
+ assert_se(streq(tzname[0], "CET"));
+ assert_se(streq(tzname[1], "CEST"));
+
+ assert_se(unsetenv("TZ") == 0);
+}
+
+static void test_map_clock_usec(void) {
+ usec_t nowr, x, y, z;
+
+ log_info("/* %s */", __func__);
+ nowr = now(CLOCK_REALTIME);
+
+ x = nowr; /* right now */
+ y = map_clock_usec(x, CLOCK_REALTIME, CLOCK_MONOTONIC);
+ z = map_clock_usec(y, CLOCK_MONOTONIC, CLOCK_REALTIME);
+ /* Converting forth and back will introduce inaccuracies, since we cannot query both clocks atomically, but it should be small. Even on the slowest CI smaller than 1h */
+
+ assert_se((z > x ? z - x : x - z) < USEC_PER_HOUR);
+
+ assert_se(nowr < USEC_INFINITY - USEC_PER_DAY*7); /* overflow check */
+ x = nowr + USEC_PER_DAY*7; /* 1 week from now */
+ y = map_clock_usec(x, CLOCK_REALTIME, CLOCK_MONOTONIC);
+ assert_se(y > 0 && y < USEC_INFINITY);
+ z = map_clock_usec(y, CLOCK_MONOTONIC, CLOCK_REALTIME);
+ assert_se(z > 0 && z < USEC_INFINITY);
+ assert_se((z > x ? z - x : x - z) < USEC_PER_HOUR);
+
+ assert_se(nowr > USEC_PER_DAY * 7); /* underflow check */
+ x = nowr - USEC_PER_DAY*7; /* 1 week ago */
+ y = map_clock_usec(x, CLOCK_REALTIME, CLOCK_MONOTONIC);
+ if (y != 0) { /* might underflow if machine is not up long enough for the monotonic clock to be beyond 1w */
+ assert_se(y < USEC_INFINITY);
+ z = map_clock_usec(y, CLOCK_MONOTONIC, CLOCK_REALTIME);
+ assert_se(z > 0 && z < USEC_INFINITY);
+ assert_se((z > x ? z - x : x - z) < USEC_PER_HOUR);
+ }
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_INFO);
+
+ log_info("realtime=" USEC_FMT "\n"
+ "monotonic=" USEC_FMT "\n"
+ "boottime=" USEC_FMT "\n",
+ now(CLOCK_REALTIME),
+ now(CLOCK_MONOTONIC),
+ now(clock_boottime_or_monotonic()));
+
+ test_parse_sec();
+ test_parse_sec_fix_0();
+ test_parse_sec_def_infinity();
+ test_parse_time();
+ test_parse_nsec();
+ test_format_timespan(1);
+ test_format_timespan(USEC_PER_MSEC);
+ test_format_timespan(USEC_PER_SEC);
+ test_timezone_is_valid();
+ test_get_timezones();
+ test_usec_add();
+ test_usec_sub_signed();
+ test_usec_sub_unsigned();
+ test_format_timestamp();
+ test_format_timestamp_utc();
+ test_deserialize_dual_timestamp();
+ test_usec_shift_clock();
+ test_in_utc_timezone();
+ test_map_clock_usec();
+
+ /* Ensure time_t is signed */
+ assert_cc((time_t) -1 < (time_t) 1);
+
+ /* Ensure TIME_T_MAX works correctly */
+ uintmax_t x = TIME_T_MAX;
+ x++;
+ assert((time_t) x < 0);
+
+ return 0;
+}
diff --git a/src/test/test-tmpfiles.c b/src/test/test-tmpfiles.c
new file mode 100644
index 0000000..4c3389a
--- /dev/null
+++ b/src/test/test-tmpfiles.c
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+int main(int argc, char** argv) {
+ _cleanup_free_ char *cmd = NULL, *cmd2 = NULL, *ans = NULL, *ans2 = NULL, *d = NULL, *tmp = NULL, *line = NULL;
+ _cleanup_close_ int fd = -1, fd2 = -1;
+ const char *p = argv[1] ?: "/tmp";
+ char *pattern;
+
+ test_setup_logging(LOG_DEBUG);
+
+ pattern = strjoina(p, "/systemd-test-XXXXXX");
+
+ fd = open_tmpfile_unlinkable(p, O_RDWR|O_CLOEXEC);
+ assert_se(fd >= 0);
+
+ assert_se(asprintf(&cmd, "ls -l /proc/"PID_FMT"/fd/%d", getpid_cached(), fd) > 0);
+ (void) system(cmd);
+ assert_se(readlink_malloc(cmd + 6, &ans) >= 0);
+ log_debug("link1: %s", ans);
+ assert_se(endswith(ans, " (deleted)"));
+
+ fd2 = mkostemp_safe(pattern);
+ assert_se(fd >= 0);
+ assert_se(unlink(pattern) == 0);
+
+ assert_se(asprintf(&cmd2, "ls -l /proc/"PID_FMT"/fd/%d", getpid_cached(), fd2) > 0);
+ (void) system(cmd2);
+ assert_se(readlink_malloc(cmd2 + 6, &ans2) >= 0);
+ log_debug("link2: %s", ans2);
+ assert_se(endswith(ans2, " (deleted)"));
+
+ pattern = strjoina(p, "/tmpfiles-test");
+ assert_se(tempfn_random(pattern, NULL, &d) >= 0);
+
+ fd = open_tmpfile_linkable(d, O_RDWR|O_CLOEXEC, &tmp);
+ assert_se(fd >= 0);
+ assert_se(write(fd, "foobar\n", 7) == 7);
+
+ assert_se(touch(d) >= 0);
+ assert_se(link_tmpfile(fd, tmp, d) == -EEXIST);
+ assert_se(unlink(d) >= 0);
+ assert_se(link_tmpfile(fd, tmp, d) >= 0);
+
+ assert_se(read_one_line_file(d, &line) >= 0);
+ assert_se(streq(line, "foobar"));
+ assert_se(unlink(d) >= 0);
+
+ return 0;
+}
diff --git a/src/test/test-udev-util.c b/src/test/test-udev-util.c
new file mode 100644
index 0000000..b0213f8
--- /dev/null
+++ b/src/test/test-udev-util.c
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "macro.h"
+#include "string-util.h"
+#include "udev-util.h"
+
+static void test_udev_rule_parse_value_one(const char *in, const char *expected_value, int expected_retval) {
+ _cleanup_free_ char *str = NULL;
+ char *value = UINT_TO_PTR(0x12345678U);
+ char *endpos = UINT_TO_PTR(0x87654321U);
+
+ assert_se(str = strdup(in));
+ assert_se(udev_rule_parse_value(str, &value, &endpos) == expected_retval);
+ if (expected_retval < 0) {
+ /* not modified on failure */
+ assert_se(value == UINT_TO_PTR(0x12345678U));
+ assert_se(endpos == UINT_TO_PTR(0x87654321U));
+ } else {
+ assert_se(streq_ptr(value, expected_value));
+ assert_se(endpos == str + strlen(in));
+ }
+}
+
+static void test_parse_value(void) {
+ /* input: "valid operand"
+ * parsed: valid operand
+ * use the following command to help generate textual C strings:
+ * python3 -c 'import json; print(json.dumps(input()))' */
+ test_udev_rule_parse_value_one(
+ "\"valid operand\"",
+ "valid operand",
+ 0
+ );
+}
+
+static void test_parse_value_with_backslashes(void) {
+ /* input: "va'l\'id\"op\"erand"
+ * parsed: va'l\'id"op"erand */
+ test_udev_rule_parse_value_one(
+ "\"va'l\\'id\\\"op\\\"erand\"",
+ "va'l\\'id\"op\"erand",
+ 0
+ );
+}
+
+static void test_parse_value_no_quotes(void) {
+ test_udev_rule_parse_value_one(
+ "no quotes",
+ 0,
+ -EINVAL
+ );
+}
+
+static void test_parse_value_noescape(void) {
+ test_udev_rule_parse_value_one(
+ "\"\\\\a\\b\\x\\y\"",
+ "\\\\a\\b\\x\\y",
+ 0
+ );
+}
+
+static void test_parse_value_nul(void) {
+ test_udev_rule_parse_value_one(
+ "\"reject\0nul\"",
+ 0,
+ -EINVAL
+ );
+}
+
+static void test_parse_value_escape_nothing(void) {
+ /* input: e"" */
+ test_udev_rule_parse_value_one(
+ "e\"\"",
+ "",
+ 0
+ );
+}
+
+static void test_parse_value_escape_nothing2(void) {
+ /* input: e"1234" */
+ test_udev_rule_parse_value_one(
+ "e\"1234\"",
+ "1234",
+ 0
+ );
+}
+
+static void test_parse_value_escape_double_quote(void) {
+ /* input: e"\"" */
+ test_udev_rule_parse_value_one(
+ "e\"\\\"\"",
+ "\"",
+ 0
+ );
+}
+
+static void test_parse_value_escape_backslash(void) {
+ /* input: e"\ */
+ test_udev_rule_parse_value_one(
+ "e\"\\",
+ 0,
+ -EINVAL
+ );
+ /* input: e"\" */
+ test_udev_rule_parse_value_one(
+ "e\"\\\"",
+ 0,
+ -EINVAL
+ );
+ /* input: e"\\" */
+ test_udev_rule_parse_value_one(
+ "e\"\\\\\"",
+ "\\",
+ 0
+ );
+ /* input: e"\\\" */
+ test_udev_rule_parse_value_one(
+ "e\"\\\\\\\"",
+ 0,
+ -EINVAL
+ );
+ /* input: e"\\\"" */
+ test_udev_rule_parse_value_one(
+ "e\"\\\\\\\"\"",
+ "\\\"",
+ 0
+ );
+ /* input: e"\\\\" */
+ test_udev_rule_parse_value_one(
+ "e\"\\\\\\\\\"",
+ "\\\\",
+ 0
+ );
+}
+
+static void test_parse_value_newline(void) {
+ /* input: e"operand with newline\n" */
+ test_udev_rule_parse_value_one(
+ "e\"operand with newline\\n\"",
+ "operand with newline\n",
+ 0
+ );
+}
+
+static void test_parse_value_escaped(void) {
+ /* input: e"single\rcharacter\t\aescape\bsequence" */
+ test_udev_rule_parse_value_one(
+ "e\"single\\rcharacter\\t\\aescape\\bsequence\"",
+ "single\rcharacter\t\aescape\bsequence",
+ 0
+ );
+}
+
+static void test_parse_value_invalid_escape(void) {
+ /* input: e"reject\invalid escape sequence" */
+ test_udev_rule_parse_value_one(
+ "e\"reject\\invalid escape sequence",
+ 0,
+ -EINVAL
+ );
+}
+
+static void test_parse_value_invalid_termination(void) {
+ /* input: e"\ */
+ test_udev_rule_parse_value_one(
+ "e\"\\",
+ 0,
+ -EINVAL
+ );
+}
+
+static void test_parse_value_unicode(void) {
+ /* input: "s\u1d1c\u1d04\u029c \u1d1c\u0274\u026a\u1d04\u1d0f\u1d05\u1d07 \U0001d568\U0001d560\U0001d568" */
+ test_udev_rule_parse_value_one(
+ "e\"s\\u1d1c\\u1d04\\u029c \\u1d1c\\u0274\\u026a\\u1d04\\u1d0f\\u1d05\\u1d07 \\U0001d568\\U0001d560\\U0001d568\"",
+ "s\xe1\xb4\x9c\xe1\xb4\x84\xca\x9c \xe1\xb4\x9c\xc9\xb4\xc9\xaa\xe1\xb4\x84\xe1\xb4\x8f\xe1\xb4\x85\xe1\xb4\x87 \xf0\x9d\x95\xa8\xf0\x9d\x95\xa0\xf0\x9d\x95\xa8",
+ 0
+ );
+}
+
+int main(int argc, char **argv) {
+ test_parse_value();
+ test_parse_value_with_backslashes();
+ test_parse_value_no_quotes();
+ test_parse_value_nul();
+ test_parse_value_noescape();
+
+ test_parse_value_escape_nothing();
+ test_parse_value_escape_nothing2();
+ test_parse_value_escape_double_quote();
+ test_parse_value_escape_backslash();
+ test_parse_value_newline();
+ test_parse_value_escaped();
+ test_parse_value_invalid_escape();
+ test_parse_value_invalid_termination();
+ test_parse_value_unicode();
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-udev.c b/src/test/test-udev.c
new file mode 100644
index 0000000..8acf86d
--- /dev/null
+++ b/src/test/test-udev.c
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2003-2004 Greg Kroah-Hartman <greg@kroah.com>
+***/
+
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/signalfd.h>
+#include <unistd.h>
+
+#include "build.h"
+#include "device-private.h"
+#include "fs-util.h"
+#include "log.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "namespace-util.h"
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "udev-event.h"
+
+static int fake_filesystems(void) {
+ static const struct fakefs {
+ const char *src;
+ const char *target;
+ const char *error;
+ bool ignore_mount_error;
+ } fakefss[] = {
+ { "test/tmpfs/sys", "/sys", "Failed to mount test /sys", false },
+ { "test/tmpfs/dev", "/dev", "Failed to mount test /dev", false },
+ { "test/run", "/run", "Failed to mount test /run", false },
+ { "test/run", "/etc/udev/rules.d", "Failed to mount empty /etc/udev/rules.d", true },
+ { "test/run", UDEVLIBEXECDIR "/rules.d", "Failed to mount empty " UDEVLIBEXECDIR "/rules.d", true },
+ };
+ int r;
+
+ r = detach_mount_namespace();
+ if (r < 0)
+ return log_error_errno(r, "Failed to detach mount namespace: %m");
+
+ for (size_t i = 0; i < ELEMENTSOF(fakefss); i++) {
+ r = mount_nofollow_verbose(fakefss[i].ignore_mount_error ? LOG_NOTICE : LOG_ERR,
+ fakefss[i].src, fakefss[i].target, NULL, MS_BIND, NULL);
+ if (r < 0 && !fakefss[i].ignore_mount_error)
+ return r;
+ }
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(udev_rules_freep) UdevRules *rules = NULL;
+ _cleanup_(udev_event_freep) UdevEvent *event = NULL;
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ const char *devpath, *devname, *action;
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ if (!IN_SET(argc, 2, 3))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program needs one or two arguments, %d given", argc - 1);
+
+ r = fake_filesystems();
+ if (r < 0)
+ return r;
+
+ /* Let's make sure the test runs with selinux assumed disabled. */
+#if HAVE_SELINUX
+ fini_selinuxmnt();
+#endif
+ mac_selinux_retest();
+
+ if (argc == 2) {
+ if (!streq(argv[1], "check"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown argument: %s", argv[1]);
+
+ return 0;
+ }
+
+ log_debug("version %s", GIT_VERSION);
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return r;
+
+ action = argv[1];
+ devpath = argv[2];
+
+ assert_se(udev_rules_load(&rules, RESOLVE_NAME_EARLY) == 0);
+
+ const char *syspath;
+ syspath = strjoina("/sys", devpath);
+ r = device_new_from_synthetic_event(&dev, syspath, action);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to open device '%s'", devpath);
+
+ assert_se(event = udev_event_new(dev, 0, NULL));
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, SIGHUP, SIGCHLD, -1) >= 0);
+
+ /* do what devtmpfs usually provides us */
+ if (sd_device_get_devname(dev, &devname) >= 0) {
+ const char *subsystem;
+ mode_t mode = 0600;
+
+ if (sd_device_get_subsystem(dev, &subsystem) >= 0 && streq(subsystem, "block"))
+ mode |= S_IFBLK;
+ else
+ mode |= S_IFCHR;
+
+ if (!streq(action, "remove")) {
+ dev_t devnum = makedev(0, 0);
+
+ (void) mkdir_parents_label(devname, 0755);
+ (void) sd_device_get_devnum(dev, &devnum);
+ if (mknod(devname, mode, devnum) < 0)
+ return log_error_errno(errno, "mknod() failed for '%s': %m", devname);
+ } else {
+ if (unlink(devname) < 0)
+ return log_error_errno(errno, "unlink('%s') failed: %m", devname);
+ (void) rmdir_parents(devname, "/");
+ }
+ }
+
+ udev_event_execute_rules(event, 3 * USEC_PER_SEC, SIGKILL, NULL, rules);
+ udev_event_execute_run(event, 3 * USEC_PER_SEC, SIGKILL);
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/test/test-uid-range.c b/src/test/test-uid-range.c
new file mode 100644
index 0000000..16cbab0
--- /dev/null
+++ b/src/test/test-uid-range.c
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stddef.h>
+
+#include "alloc-util.h"
+#include "uid-range.h"
+#include "user-util.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_free_ UidRange *p = NULL;
+ unsigned n = 0;
+ uid_t search;
+
+ assert_se(uid_range_add_str(&p, &n, "500-999") >= 0);
+ assert_se(n == 1);
+ assert_se(p[0].start == 500);
+ assert_se(p[0].nr == 500);
+
+ assert_se(!uid_range_contains(p, n, 499));
+ assert_se(uid_range_contains(p, n, 500));
+ assert_se(uid_range_contains(p, n, 999));
+ assert_se(!uid_range_contains(p, n, 1000));
+
+ search = UID_INVALID;
+ assert_se(uid_range_next_lower(p, n, &search));
+ assert_se(search == 999);
+ assert_se(uid_range_next_lower(p, n, &search));
+ assert_se(search == 998);
+ search = 501;
+ assert_se(uid_range_next_lower(p, n, &search));
+ assert_se(search == 500);
+ assert_se(uid_range_next_lower(p, n, &search) == -EBUSY);
+
+ assert_se(uid_range_add_str(&p, &n, "1000") >= 0);
+ assert_se(n == 1);
+ assert_se(p[0].start == 500);
+ assert_se(p[0].nr == 501);
+
+ assert_se(uid_range_add_str(&p, &n, "30-40") >= 0);
+ assert_se(n == 2);
+ assert_se(p[0].start == 30);
+ assert_se(p[0].nr == 11);
+ assert_se(p[1].start == 500);
+ assert_se(p[1].nr == 501);
+
+ assert_se(uid_range_add_str(&p, &n, "60-70") >= 0);
+ assert_se(n == 3);
+ assert_se(p[0].start == 30);
+ assert_se(p[0].nr == 11);
+ assert_se(p[1].start == 60);
+ assert_se(p[1].nr == 11);
+ assert_se(p[2].start == 500);
+ assert_se(p[2].nr == 501);
+
+ assert_se(uid_range_add_str(&p, &n, "20-2000") >= 0);
+ assert_se(n == 1);
+ assert_se(p[0].start == 20);
+ assert_se(p[0].nr == 1981);
+
+ assert_se(uid_range_add_str(&p, &n, "2002") >= 0);
+ assert_se(n == 2);
+ assert_se(p[0].start == 20);
+ assert_se(p[0].nr == 1981);
+ assert_se(p[1].start == 2002);
+ assert_se(p[1].nr == 1);
+
+ assert_se(uid_range_add_str(&p, &n, "2001") >= 0);
+ assert_se(n == 1);
+ assert_se(p[0].start == 20);
+ assert_se(p[0].nr == 1983);
+
+ return 0;
+}
diff --git a/src/test/test-umask-util.c b/src/test/test-umask-util.c
new file mode 100644
index 0000000..df3ae98
--- /dev/null
+++ b/src/test/test-umask-util.c
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "tests.h"
+#include "umask-util.h"
+
+int main(int argc, char *argv[]) {
+ size_t n;
+ mode_t u;
+
+ test_setup_logging(LOG_DEBUG);
+
+ u = umask(0111);
+
+ n = 0;
+ RUN_WITH_UMASK(0123) {
+ assert_se(umask(000) == 0123);
+ n++;
+ }
+
+ assert_se(n == 1);
+ assert_se(umask(u) == 0111);
+
+ RUN_WITH_UMASK(0135) {
+ assert_se(umask(000) == 0135);
+ n++;
+ }
+
+ assert_se(n == 2);
+ assert_se(umask(0111) == u);
+
+ RUN_WITH_UMASK(0315) {
+ assert_se(umask(000) == 0315);
+ n++;
+ break;
+ }
+
+ assert_se(n == 3);
+ assert_se(umask(u) == 0111);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/test-umount.c b/src/test/test-umount.c
new file mode 100644
index 0000000..676c6dd
--- /dev/null
+++ b/src/test/test-umount.c
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "log.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "umount.h"
+#include "util.h"
+
+static void test_mount_points_list(const char *fname) {
+ _cleanup_(mount_points_list_free) LIST_HEAD(MountPoint, mp_list_head);
+ _cleanup_free_ char *testdata_fname = NULL;
+ MountPoint *m;
+
+ log_info("/* %s(\"%s\") */", __func__, fname ?: "/proc/self/mountinfo");
+
+ if (fname) {
+ assert_se(get_testdata_dir(fname, &testdata_fname) >= 0);
+ fname = testdata_fname;
+ }
+
+ LIST_HEAD_INIT(mp_list_head);
+ assert_se(mount_points_list_get(fname, &mp_list_head) >= 0);
+
+ LIST_FOREACH(mount_point, m, mp_list_head)
+ log_debug("path=%s o=%s f=0x%lx try-ro=%s dev=%u:%u",
+ m->path,
+ strempty(m->remount_options),
+ m->remount_flags,
+ yes_no(m->try_remount_ro),
+ major(m->devnum), minor(m->devnum));
+}
+
+static void test_swap_list(const char *fname) {
+ _cleanup_(mount_points_list_free) LIST_HEAD(MountPoint, mp_list_head);
+ _cleanup_free_ char *testdata_fname = NULL;
+ MountPoint *m;
+ int r;
+
+ log_info("/* %s(\"%s\") */", __func__, fname ?: "/proc/swaps");
+
+ if (fname) {
+ assert_se(get_testdata_dir(fname, &testdata_fname) >= 0);
+ fname = testdata_fname;
+ }
+
+ LIST_HEAD_INIT(mp_list_head);
+ r = swap_list_get(fname, &mp_list_head);
+ if (ERRNO_IS_PRIVILEGE(r))
+ return;
+ assert_se(r >= 0);
+
+ LIST_FOREACH(mount_point, m, mp_list_head)
+ log_debug("path=%s o=%s f=0x%lx try-ro=%s dev=%u:%u",
+ m->path,
+ strempty(m->remount_options),
+ m->remount_flags,
+ yes_no(m->try_remount_ro),
+ major(m->devnum), minor(m->devnum));
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_mount_points_list(NULL);
+ test_mount_points_list("/test-umount/empty.mountinfo");
+ test_mount_points_list("/test-umount/garbled.mountinfo");
+ test_mount_points_list("/test-umount/rhbug-1554943.mountinfo");
+
+ test_swap_list(NULL);
+ test_swap_list("/test-umount/example.swaps");
+}
diff --git a/src/test/test-unaligned.c b/src/test/test-unaligned.c
new file mode 100644
index 0000000..b4d380b
--- /dev/null
+++ b/src/test/test-unaligned.c
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "memory-util.h"
+#include "sparse-endian.h"
+#include "unaligned.h"
+
+static uint8_t data[] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+};
+
+static void test_be(void) {
+ uint8_t scratch[16];
+
+ assert_se(unaligned_read_be16(&data[0]) == 0x0001);
+ assert_se(unaligned_read_be16(&data[1]) == 0x0102);
+
+ assert_se(unaligned_read_be32(&data[0]) == 0x00010203);
+ assert_se(unaligned_read_be32(&data[1]) == 0x01020304);
+ assert_se(unaligned_read_be32(&data[2]) == 0x02030405);
+ assert_se(unaligned_read_be32(&data[3]) == 0x03040506);
+
+ assert_se(unaligned_read_be64(&data[0]) == 0x0001020304050607);
+ assert_se(unaligned_read_be64(&data[1]) == 0x0102030405060708);
+ assert_se(unaligned_read_be64(&data[2]) == 0x0203040506070809);
+ assert_se(unaligned_read_be64(&data[3]) == 0x030405060708090a);
+ assert_se(unaligned_read_be64(&data[4]) == 0x0405060708090a0b);
+ assert_se(unaligned_read_be64(&data[5]) == 0x05060708090a0b0c);
+ assert_se(unaligned_read_be64(&data[6]) == 0x060708090a0b0c0d);
+ assert_se(unaligned_read_be64(&data[7]) == 0x0708090a0b0c0d0e);
+
+ zero(scratch);
+ unaligned_write_be16(&scratch[0], 0x0001);
+ assert_se(memcmp(&scratch[0], &data[0], sizeof(uint16_t)) == 0);
+ zero(scratch);
+ unaligned_write_be16(&scratch[1], 0x0102);
+ assert_se(memcmp(&scratch[1], &data[1], sizeof(uint16_t)) == 0);
+
+ zero(scratch);
+ unaligned_write_be32(&scratch[0], 0x00010203);
+ assert_se(memcmp(&scratch[0], &data[0], sizeof(uint32_t)) == 0);
+ zero(scratch);
+ unaligned_write_be32(&scratch[1], 0x01020304);
+ assert_se(memcmp(&scratch[1], &data[1], sizeof(uint32_t)) == 0);
+ zero(scratch);
+ unaligned_write_be32(&scratch[2], 0x02030405);
+ assert_se(memcmp(&scratch[2], &data[2], sizeof(uint32_t)) == 0);
+ zero(scratch);
+ unaligned_write_be32(&scratch[3], 0x03040506);
+ assert_se(memcmp(&scratch[3], &data[3], sizeof(uint32_t)) == 0);
+
+ zero(scratch);
+ unaligned_write_be64(&scratch[0], 0x0001020304050607);
+ assert_se(memcmp(&scratch[0], &data[0], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_be64(&scratch[1], 0x0102030405060708);
+ assert_se(memcmp(&scratch[1], &data[1], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_be64(&scratch[2], 0x0203040506070809);
+ assert_se(memcmp(&scratch[2], &data[2], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_be64(&scratch[3], 0x030405060708090a);
+ assert_se(memcmp(&scratch[3], &data[3], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_be64(&scratch[4], 0x0405060708090a0b);
+ assert_se(memcmp(&scratch[4], &data[4], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_be64(&scratch[5], 0x05060708090a0b0c);
+ assert_se(memcmp(&scratch[5], &data[5], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_be64(&scratch[6], 0x060708090a0b0c0d);
+ assert_se(memcmp(&scratch[6], &data[6], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_be64(&scratch[7], 0x0708090a0b0c0d0e);
+ assert_se(memcmp(&scratch[7], &data[7], sizeof(uint64_t)) == 0);
+}
+
+static void test_le(void) {
+ uint8_t scratch[16];
+
+ assert_se(unaligned_read_le16(&data[0]) == 0x0100);
+ assert_se(unaligned_read_le16(&data[1]) == 0x0201);
+
+ assert_se(unaligned_read_le32(&data[0]) == 0x03020100);
+ assert_se(unaligned_read_le32(&data[1]) == 0x04030201);
+ assert_se(unaligned_read_le32(&data[2]) == 0x05040302);
+ assert_se(unaligned_read_le32(&data[3]) == 0x06050403);
+
+ assert_se(unaligned_read_le64(&data[0]) == 0x0706050403020100);
+ assert_se(unaligned_read_le64(&data[1]) == 0x0807060504030201);
+ assert_se(unaligned_read_le64(&data[2]) == 0x0908070605040302);
+ assert_se(unaligned_read_le64(&data[3]) == 0x0a09080706050403);
+ assert_se(unaligned_read_le64(&data[4]) == 0x0b0a090807060504);
+ assert_se(unaligned_read_le64(&data[5]) == 0x0c0b0a0908070605);
+ assert_se(unaligned_read_le64(&data[6]) == 0x0d0c0b0a09080706);
+ assert_se(unaligned_read_le64(&data[7]) == 0x0e0d0c0b0a090807);
+
+ zero(scratch);
+ unaligned_write_le16(&scratch[0], 0x0100);
+ assert_se(memcmp(&scratch[0], &data[0], sizeof(uint16_t)) == 0);
+ zero(scratch);
+ unaligned_write_le16(&scratch[1], 0x0201);
+ assert_se(memcmp(&scratch[1], &data[1], sizeof(uint16_t)) == 0);
+
+ zero(scratch);
+ unaligned_write_le32(&scratch[0], 0x03020100);
+
+ assert_se(memcmp(&scratch[0], &data[0], sizeof(uint32_t)) == 0);
+ zero(scratch);
+ unaligned_write_le32(&scratch[1], 0x04030201);
+ assert_se(memcmp(&scratch[1], &data[1], sizeof(uint32_t)) == 0);
+ zero(scratch);
+ unaligned_write_le32(&scratch[2], 0x05040302);
+ assert_se(memcmp(&scratch[2], &data[2], sizeof(uint32_t)) == 0);
+ zero(scratch);
+ unaligned_write_le32(&scratch[3], 0x06050403);
+ assert_se(memcmp(&scratch[3], &data[3], sizeof(uint32_t)) == 0);
+
+ zero(scratch);
+ unaligned_write_le64(&scratch[0], 0x0706050403020100);
+ assert_se(memcmp(&scratch[0], &data[0], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_le64(&scratch[1], 0x0807060504030201);
+ assert_se(memcmp(&scratch[1], &data[1], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_le64(&scratch[2], 0x0908070605040302);
+ assert_se(memcmp(&scratch[2], &data[2], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_le64(&scratch[3], 0x0a09080706050403);
+ assert_se(memcmp(&scratch[3], &data[3], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_le64(&scratch[4], 0x0B0A090807060504);
+ assert_se(memcmp(&scratch[4], &data[4], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_le64(&scratch[5], 0x0c0b0a0908070605);
+ assert_se(memcmp(&scratch[5], &data[5], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_le64(&scratch[6], 0x0d0c0b0a09080706);
+ assert_se(memcmp(&scratch[6], &data[6], sizeof(uint64_t)) == 0);
+ zero(scratch);
+ unaligned_write_le64(&scratch[7], 0x0e0d0c0b0a090807);
+ assert_se(memcmp(&scratch[7], &data[7], sizeof(uint64_t)) == 0);
+}
+
+static void test_ne(void) {
+ uint16_t x = 4711;
+ uint32_t y = 123456;
+ uint64_t z = 9876543210;
+
+ /* Note that we don't bother actually testing alignment issues in this function, after all the _ne() functions
+ * are just aliases for the _le() or _be() implementations, which we test extensively above. Hence, in this
+ * function, just ensure that they map to the right version on the local architecture. */
+
+ assert_se(unaligned_read_ne16(&x) == 4711);
+ assert_se(unaligned_read_ne32(&y) == 123456);
+ assert_se(unaligned_read_ne64(&z) == 9876543210);
+
+ unaligned_write_ne16(&x, 1);
+ unaligned_write_ne32(&y, 2);
+ unaligned_write_ne64(&z, 3);
+
+ assert_se(x == 1);
+ assert_se(y == 2);
+ assert_se(z == 3);
+}
+
+int main(int argc, const char *argv[]) {
+ test_be();
+ test_le();
+ test_ne();
+ return 0;
+}
diff --git a/src/test/test-unit-file.c b/src/test/test-unit-file.c
new file mode 100644
index 0000000..8f96790
--- /dev/null
+++ b/src/test/test-unit-file.c
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "path-lookup.h"
+#include "set.h"
+#include "special.h"
+#include "strv.h"
+#include "tests.h"
+#include "unit-file.h"
+
+static void test_unit_validate_alias_symlink_and_warn(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(unit_validate_alias_symlink_and_warn("/path/a.service", "/other/b.service") == 0);
+ assert_se(unit_validate_alias_symlink_and_warn("/path/a.service", "/other/b.socket") == -EXDEV);
+ assert_se(unit_validate_alias_symlink_and_warn("/path/a.service", "/other/b.foobar") == -EXDEV);
+ assert_se(unit_validate_alias_symlink_and_warn("/path/a@.service", "/other/b@.service") == 0);
+ assert_se(unit_validate_alias_symlink_and_warn("/path/a@.service", "/other/b@.socket") == -EXDEV);
+ assert_se(unit_validate_alias_symlink_and_warn("/path/a@XXX.service", "/other/b@YYY.service") == -EXDEV);
+ assert_se(unit_validate_alias_symlink_and_warn("/path/a@XXX.service", "/other/b@YYY.socket") == -EXDEV);
+ assert_se(unit_validate_alias_symlink_and_warn("/path/a@.service", "/other/b@YYY.service") == -EXDEV);
+ assert_se(unit_validate_alias_symlink_and_warn("/path/a@XXX.service", "/other/b@XXX.service") == 0);
+ assert_se(unit_validate_alias_symlink_and_warn("/path/a@XXX.service", "/other/b@.service") == 0);
+ assert_se(unit_validate_alias_symlink_and_warn("/path/a@.service", "/other/b.service") == -EXDEV);
+ assert_se(unit_validate_alias_symlink_and_warn("/path/a.service", "/other/b@.service") == -EXDEV);
+ assert_se(unit_validate_alias_symlink_and_warn("/path/a@.slice", "/other/b.slice") == -EINVAL);
+ assert_se(unit_validate_alias_symlink_and_warn("/path/a.slice", "/other/b.slice") == -EINVAL);
+}
+
+static void test_unit_file_build_name_map(char **ids) {
+ _cleanup_(lookup_paths_free) LookupPaths lp = {};
+ _cleanup_hashmap_free_ Hashmap *unit_ids = NULL;
+ _cleanup_hashmap_free_ Hashmap *unit_names = NULL;
+ const char *k, *dst;
+ char **v;
+ usec_t mtime = 0;
+ int r;
+
+ assert_se(lookup_paths_init(&lp, UNIT_FILE_SYSTEM, 0, NULL) >= 0);
+
+ assert_se(unit_file_build_name_map(&lp, &mtime, &unit_ids, &unit_names, NULL) == 1);
+
+ HASHMAP_FOREACH_KEY(dst, k, unit_ids)
+ log_info("ids: %s → %s", k, dst);
+
+ HASHMAP_FOREACH_KEY(v, k, unit_names) {
+ _cleanup_free_ char *j = strv_join(v, ", ");
+ log_info("aliases: %s ← %s", k, j);
+ }
+
+ char buf[FORMAT_TIMESTAMP_MAX];
+ log_debug("Last modification time: %s", format_timestamp(buf, sizeof buf, mtime));
+
+ r = unit_file_build_name_map(&lp, &mtime, &unit_ids, &unit_names, NULL);
+ assert_se(IN_SET(r, 0, 1));
+ if (r == 0)
+ log_debug("Cache rebuild skipped based on mtime.");
+
+ char **id;
+ STRV_FOREACH(id, ids) {
+ const char *fragment, *name;
+ _cleanup_set_free_free_ Set *names = NULL;
+ log_info("*** %s ***", *id);
+ r = unit_file_find_fragment(unit_ids,
+ unit_names,
+ *id,
+ &fragment,
+ &names);
+ assert(r == 0);
+ log_info("fragment: %s", fragment);
+ log_info("names:");
+ SET_FOREACH(name, names)
+ log_info(" %s", name);
+ }
+}
+
+static void test_runlevel_to_target(void) {
+ log_info("/* %s */", __func__);
+
+ in_initrd_force(false);
+ assert_se(streq_ptr(runlevel_to_target(NULL), NULL));
+ assert_se(streq_ptr(runlevel_to_target("unknown-runlevel"), NULL));
+ assert_se(streq_ptr(runlevel_to_target("rd.unknown-runlevel"), NULL));
+ assert_se(streq_ptr(runlevel_to_target("3"), SPECIAL_MULTI_USER_TARGET));
+ assert_se(streq_ptr(runlevel_to_target("rd.rescue"), NULL));
+
+ in_initrd_force(true);
+ assert_se(streq_ptr(runlevel_to_target(NULL), NULL));
+ assert_se(streq_ptr(runlevel_to_target("unknown-runlevel"), NULL));
+ assert_se(streq_ptr(runlevel_to_target("rd.unknown-runlevel"), NULL));
+ assert_se(streq_ptr(runlevel_to_target("3"), NULL));
+ assert_se(streq_ptr(runlevel_to_target("rd.rescue"), SPECIAL_RESCUE_TARGET));
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_unit_validate_alias_symlink_and_warn();
+ test_unit_file_build_name_map(strv_skip(argv, 1));
+ test_runlevel_to_target();
+
+ return 0;
+}
diff --git a/src/test/test-unit-name.c b/src/test/test-unit-name.c
new file mode 100644
index 0000000..ece78aa
--- /dev/null
+++ b/src/test/test-unit-name.c
@@ -0,0 +1,907 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "all-units.h"
+#include "glob-util.h"
+#include "format-util.h"
+#include "hostname-util.h"
+#include "macro.h"
+#include "manager.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "special.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "tests.h"
+#include "unit-def.h"
+#include "unit-name.h"
+#include "unit-printf.h"
+#include "unit.h"
+#include "user-util.h"
+#include "util.h"
+
+static void test_unit_name_is_valid_one(const char *name, UnitNameFlags flags, bool expected) {
+ log_info("%s ( %s%s%s ): %s",
+ name,
+ (flags & UNIT_NAME_PLAIN) ? "plain" : "",
+ (flags & UNIT_NAME_INSTANCE) ? " instance" : "",
+ (flags & UNIT_NAME_TEMPLATE) ? " template" : "",
+ yes_no(expected));
+ assert_se(unit_name_is_valid(name, flags) == expected);
+}
+
+static void test_unit_name_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ test_unit_name_is_valid_one("foo.service", UNIT_NAME_ANY, true);
+ test_unit_name_is_valid_one("foo.service", UNIT_NAME_PLAIN, true);
+ test_unit_name_is_valid_one("foo.service", UNIT_NAME_INSTANCE, false);
+ test_unit_name_is_valid_one("foo.service", UNIT_NAME_TEMPLATE, false);
+ test_unit_name_is_valid_one("foo.service", UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE, false);
+
+ test_unit_name_is_valid_one("foo@bar.service", UNIT_NAME_ANY, true);
+ test_unit_name_is_valid_one("foo@bar.service", UNIT_NAME_PLAIN, false);
+ test_unit_name_is_valid_one("foo@bar.service", UNIT_NAME_INSTANCE, true);
+ test_unit_name_is_valid_one("foo@bar.service", UNIT_NAME_TEMPLATE, false);
+ test_unit_name_is_valid_one("foo@bar.service", UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE, true);
+
+ test_unit_name_is_valid_one("foo@bar@bar.service", UNIT_NAME_ANY, true);
+ test_unit_name_is_valid_one("foo@bar@bar.service", UNIT_NAME_PLAIN, false);
+ test_unit_name_is_valid_one("foo@bar@bar.service", UNIT_NAME_INSTANCE, true);
+ test_unit_name_is_valid_one("foo@bar@bar.service", UNIT_NAME_TEMPLATE, false);
+ test_unit_name_is_valid_one("foo@bar@bar.service", UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE, true);
+
+ test_unit_name_is_valid_one("foo@.service", UNIT_NAME_ANY, true);
+ test_unit_name_is_valid_one("foo@.service", UNIT_NAME_PLAIN, false);
+ test_unit_name_is_valid_one("foo@.service", UNIT_NAME_INSTANCE, false);
+ test_unit_name_is_valid_one("foo@.service", UNIT_NAME_TEMPLATE, true);
+ test_unit_name_is_valid_one("foo@.service", UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE, true);
+ test_unit_name_is_valid_one(".test.service", UNIT_NAME_PLAIN, true);
+ test_unit_name_is_valid_one(".test@.service", UNIT_NAME_TEMPLATE, true);
+ test_unit_name_is_valid_one("_strange::::.service", UNIT_NAME_ANY, true);
+
+ test_unit_name_is_valid_one(".service", UNIT_NAME_ANY, false);
+ test_unit_name_is_valid_one("", UNIT_NAME_ANY, false);
+ test_unit_name_is_valid_one("foo.waldo", UNIT_NAME_ANY, false);
+ test_unit_name_is_valid_one("@.service", UNIT_NAME_ANY, false);
+ test_unit_name_is_valid_one("@piep.service", UNIT_NAME_ANY, false);
+
+ test_unit_name_is_valid_one("user@1000.slice", UNIT_NAME_ANY, true);
+ test_unit_name_is_valid_one("user@1000.slice", UNIT_NAME_INSTANCE, true);
+ test_unit_name_is_valid_one("user@1000.slice", UNIT_NAME_TEMPLATE, false);
+
+ test_unit_name_is_valid_one("foo@%i.service", UNIT_NAME_ANY, false);
+ test_unit_name_is_valid_one("foo@%i.service", UNIT_NAME_INSTANCE, false);
+ test_unit_name_is_valid_one("foo@%%i.service", UNIT_NAME_INSTANCE, false);
+ test_unit_name_is_valid_one("foo@%%i%f.service", UNIT_NAME_INSTANCE, false);
+ test_unit_name_is_valid_one("foo@%F.service", UNIT_NAME_INSTANCE, false);
+
+ test_unit_name_is_valid_one("foo.target.wants/plain.service", UNIT_NAME_ANY, false);
+ test_unit_name_is_valid_one("foo.target.conf/foo.conf", UNIT_NAME_ANY, false);
+ test_unit_name_is_valid_one("foo.target.requires/plain.socket", UNIT_NAME_ANY, false);
+}
+
+static void test_unit_name_replace_instance_one(const char *pattern, const char *repl, const char *expected, int ret) {
+ _cleanup_free_ char *t = NULL;
+ assert_se(unit_name_replace_instance(pattern, repl, &t) == ret);
+ puts(strna(t));
+ assert_se(streq_ptr(t, expected));
+}
+
+static void test_unit_name_replace_instance(void) {
+ log_info("/* %s */", __func__);
+
+ test_unit_name_replace_instance_one("foo@.service", "waldo", "foo@waldo.service", 0);
+ test_unit_name_replace_instance_one("foo@xyz.service", "waldo", "foo@waldo.service", 0);
+ test_unit_name_replace_instance_one("xyz", "waldo", NULL, -EINVAL);
+ test_unit_name_replace_instance_one("", "waldo", NULL, -EINVAL);
+ test_unit_name_replace_instance_one("foo.service", "waldo", NULL, -EINVAL);
+ test_unit_name_replace_instance_one(".service", "waldo", NULL, -EINVAL);
+ test_unit_name_replace_instance_one("foo@", "waldo", NULL, -EINVAL);
+ test_unit_name_replace_instance_one("@bar", "waldo", NULL, -EINVAL);
+}
+
+static void test_unit_name_from_path_one(const char *path, const char *suffix, const char *expected, int ret) {
+ _cleanup_free_ char *t = NULL;
+
+ assert_se(unit_name_from_path(path, suffix, &t) == ret);
+ puts(strna(t));
+ assert_se(streq_ptr(t, expected));
+
+ if (t) {
+ _cleanup_free_ char *k = NULL;
+ assert_se(unit_name_to_path(t, &k) == 0);
+ puts(strna(k));
+ assert_se(path_equal(k, empty_to_root(path)));
+ }
+}
+
+static void test_unit_name_from_path(void) {
+ log_info("/* %s */", __func__);
+
+ test_unit_name_from_path_one("/waldo", ".mount", "waldo.mount", 0);
+ test_unit_name_from_path_one("/waldo/quuix", ".mount", "waldo-quuix.mount", 0);
+ test_unit_name_from_path_one("/waldo/quuix/", ".mount", "waldo-quuix.mount", 0);
+ test_unit_name_from_path_one("", ".mount", "-.mount", 0);
+ test_unit_name_from_path_one("/", ".mount", "-.mount", 0);
+ test_unit_name_from_path_one("///", ".mount", "-.mount", 0);
+ test_unit_name_from_path_one("/foo/../bar", ".mount", NULL, -EINVAL);
+ test_unit_name_from_path_one("/foo/./bar", ".mount", NULL, -EINVAL);
+ test_unit_name_from_path_one("/waldoaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", ".mount", NULL, -EINVAL);
+}
+
+static void test_unit_name_from_path_instance_one(const char *pattern, const char *path, const char *suffix, const char *expected, int ret) {
+ _cleanup_free_ char *t = NULL;
+
+ assert_se(unit_name_from_path_instance(pattern, path, suffix, &t) == ret);
+ puts(strna(t));
+ assert_se(streq_ptr(t, expected));
+
+ if (t) {
+ _cleanup_free_ char *k = NULL, *v = NULL;
+
+ assert_se(unit_name_to_instance(t, &k) > 0);
+ assert_se(unit_name_path_unescape(k, &v) == 0);
+ assert_se(path_equal(v, empty_to_root(path)));
+ }
+}
+
+static void test_unit_name_from_path_instance(void) {
+ log_info("/* %s */", __func__);
+
+ test_unit_name_from_path_instance_one("waldo", "/waldo", ".mount", "waldo@waldo.mount", 0);
+ test_unit_name_from_path_instance_one("waldo", "/waldo////quuix////", ".mount", "waldo@waldo-quuix.mount", 0);
+ test_unit_name_from_path_instance_one("waldo", "/", ".mount", "waldo@-.mount", 0);
+ test_unit_name_from_path_instance_one("waldo", "", ".mount", "waldo@-.mount", 0);
+ test_unit_name_from_path_instance_one("waldo", "///", ".mount", "waldo@-.mount", 0);
+ test_unit_name_from_path_instance_one("waldo", "..", ".mount", NULL, -EINVAL);
+ test_unit_name_from_path_instance_one("waldo", "/foo", ".waldi", NULL, -EINVAL);
+ test_unit_name_from_path_instance_one("wa--ldo", "/--", ".mount", "wa--ldo@\\x2d\\x2d.mount", 0);
+ test_unit_name_from_path_instance_one("waldoaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "/waldo", ".mount", NULL, -EINVAL);
+}
+
+static void test_unit_name_to_path_one(const char *unit, const char *path, int ret) {
+ _cleanup_free_ char *p = NULL;
+
+ assert_se(unit_name_to_path(unit, &p) == ret);
+ assert_se(streq_ptr(path, p));
+}
+
+static void test_unit_name_to_path(void) {
+ log_info("/* %s */", __func__);
+
+ test_unit_name_to_path_one("home.mount", "/home", 0);
+ test_unit_name_to_path_one("home-lennart.mount", "/home/lennart", 0);
+ test_unit_name_to_path_one("home-lennart-.mount", NULL, -EINVAL);
+ test_unit_name_to_path_one("-home-lennart.mount", NULL, -EINVAL);
+ test_unit_name_to_path_one("-home--lennart.mount", NULL, -EINVAL);
+ test_unit_name_to_path_one("home-..-lennart.mount", NULL, -EINVAL);
+ test_unit_name_to_path_one("", NULL, -EINVAL);
+ test_unit_name_to_path_one("home/foo", NULL, -EINVAL);
+}
+
+static void test_unit_name_mangle_one(bool allow_globs, const char *pattern, const char *expect, int ret) {
+ _cleanup_free_ char *t = NULL;
+
+ assert_se(unit_name_mangle(pattern, (allow_globs * UNIT_NAME_MANGLE_GLOB) | UNIT_NAME_MANGLE_WARN, &t) == ret);
+ puts(strna(t));
+ assert_se(streq_ptr(t, expect));
+
+ if (t) {
+ _cleanup_free_ char *k = NULL;
+
+ assert_se(unit_name_is_valid(t, UNIT_NAME_ANY) ||
+ (allow_globs && string_is_glob(t)));
+
+ assert_se(unit_name_mangle(t, (allow_globs * UNIT_NAME_MANGLE_GLOB) | UNIT_NAME_MANGLE_WARN, &k) == 0);
+ assert_se(streq_ptr(t, k));
+ }
+}
+
+static void test_unit_name_mangle(void) {
+ log_info("/* %s */", __func__);
+
+ test_unit_name_mangle_one(false, "foo.service", "foo.service", 0);
+ test_unit_name_mangle_one(false, "/home", "home.mount", 1);
+ test_unit_name_mangle_one(false, "/dev/sda", "dev-sda.device", 1);
+ test_unit_name_mangle_one(false, "üxknürz.service", "\\xc3\\xbcxkn\\xc3\\xbcrz.service", 1);
+ test_unit_name_mangle_one(false, "foobar-meh...waldi.service", "foobar-meh...waldi.service", 0);
+ test_unit_name_mangle_one(false, "_____####----.....service", "_____\\x23\\x23\\x23\\x23----.....service", 1);
+ test_unit_name_mangle_one(false, "_____##@;;;,,,##----.....service", "_____\\x23\\x23@\\x3b\\x3b\\x3b\\x2c\\x2c\\x2c\\x23\\x23----.....service", 1);
+ test_unit_name_mangle_one(false, "xxx@@@@/////\\\\\\\\\\yyy.service", "xxx@@@@-----\\\\\\\\\\yyy.service", 1);
+ test_unit_name_mangle_one(false, "", NULL, -EINVAL);
+
+ test_unit_name_mangle_one(true, "foo.service", "foo.service", 0);
+ test_unit_name_mangle_one(true, "foo", "foo.service", 1);
+ test_unit_name_mangle_one(true, "foo*", "foo*", 0);
+ test_unit_name_mangle_one(true, "ü*", "\\xc3\\xbc*", 1);
+}
+
+static int test_unit_printf(void) {
+ _cleanup_free_ char *mid = NULL, *bid = NULL, *host = NULL, *gid = NULL, *group = NULL, *uid = NULL, *user = NULL, *shell = NULL, *home = NULL;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ Unit *u;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(specifier_machine_id('m', NULL, NULL, &mid) >= 0 && mid);
+ assert_se(specifier_boot_id('b', NULL, NULL, &bid) >= 0 && bid);
+ assert_se(host = gethostname_malloc());
+ assert_se(user = uid_to_name(getuid()));
+ assert_se(group = gid_to_name(getgid()));
+ assert_se(asprintf(&uid, UID_FMT, getuid()));
+ assert_se(asprintf(&gid, UID_FMT, getgid()));
+ assert_se(get_home_dir(&home) >= 0);
+ assert_se(get_shell(&shell) >= 0);
+
+ r = manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_MINIMAL, &m);
+ if (manager_errno_skip_test(r))
+ return log_tests_skipped_errno(r, "manager_new");
+ assert_se(r == 0);
+
+#define expect(unit, pattern, expected) \
+ { \
+ char *e; \
+ _cleanup_free_ char *t = NULL; \
+ assert_se(unit_full_printf(unit, pattern, &t) >= 0); \
+ printf("result: %s\nexpect: %s\n", t, expected); \
+ if ((e = endswith(expected, "*"))) \
+ assert_se(strncmp(t, e, e-expected)); \
+ else \
+ assert_se(streq(t, expected)); \
+ }
+
+ assert_se(u = unit_new(m, sizeof(Service)));
+ assert_se(unit_add_name(u, "blah.service") == 0);
+ assert_se(unit_add_name(u, "blah.service") == 0);
+
+ /* general tests */
+ expect(u, "%%", "%");
+ expect(u, "%%s", "%s");
+ expect(u, "%,", "%,");
+ expect(u, "%", "%");
+
+ /* normal unit */
+ expect(u, "%n", "blah.service");
+ expect(u, "%f", "/blah");
+ expect(u, "%N", "blah");
+ expect(u, "%p", "blah");
+ expect(u, "%P", "blah");
+ expect(u, "%i", "");
+ expect(u, "%I", "");
+ expect(u, "%j", "blah");
+ expect(u, "%J", "blah");
+ expect(u, "%g", group);
+ expect(u, "%G", gid);
+ expect(u, "%u", user);
+ expect(u, "%U", uid);
+ expect(u, "%h", home);
+ expect(u, "%m", mid);
+ expect(u, "%b", bid);
+ expect(u, "%H", host);
+ expect(u, "%t", "/run/user/*");
+
+ /* templated */
+ assert_se(u = unit_new(m, sizeof(Service)));
+ assert_se(unit_add_name(u, "blah@foo-foo.service") == 0);
+ assert_se(unit_add_name(u, "blah@foo-foo.service") == 0);
+
+ expect(u, "%n", "blah@foo-foo.service");
+ expect(u, "%N", "blah@foo-foo");
+ expect(u, "%f", "/foo/foo");
+ expect(u, "%p", "blah");
+ expect(u, "%P", "blah");
+ expect(u, "%i", "foo-foo");
+ expect(u, "%I", "foo/foo");
+ expect(u, "%j", "blah");
+ expect(u, "%J", "blah");
+ expect(u, "%g", group);
+ expect(u, "%G", gid);
+ expect(u, "%u", user);
+ expect(u, "%U", uid);
+ expect(u, "%h", home);
+ expect(u, "%m", mid);
+ expect(u, "%b", bid);
+ expect(u, "%H", host);
+ expect(u, "%t", "/run/user/*");
+
+ /* templated with components */
+ assert_se(u = unit_new(m, sizeof(Slice)));
+ assert_se(unit_add_name(u, "blah-blah\\x2d.slice") == 0);
+
+ expect(u, "%n", "blah-blah\\x2d.slice");
+ expect(u, "%N", "blah-blah\\x2d");
+ expect(u, "%f", "/blah/blah-");
+ expect(u, "%p", "blah-blah\\x2d");
+ expect(u, "%P", "blah/blah-");
+ expect(u, "%i", "");
+ expect(u, "%I", "");
+ expect(u, "%j", "blah\\x2d");
+ expect(u, "%J", "blah-");
+
+#undef expect
+
+ return 0;
+}
+
+static void test_unit_instance_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(unit_instance_is_valid("fooBar"));
+ assert_se(unit_instance_is_valid("foo-bar"));
+ assert_se(unit_instance_is_valid("foo.stUff"));
+ assert_se(unit_instance_is_valid("fOo123.stuff"));
+ assert_se(unit_instance_is_valid("@f_oo123.Stuff"));
+
+ assert_se(!unit_instance_is_valid("$¢£"));
+ assert_se(!unit_instance_is_valid(""));
+ assert_se(!unit_instance_is_valid("foo bar"));
+ assert_se(!unit_instance_is_valid("foo/bar"));
+}
+
+static void test_unit_prefix_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(unit_prefix_is_valid("fooBar"));
+ assert_se(unit_prefix_is_valid("foo-bar"));
+ assert_se(unit_prefix_is_valid("foo.stUff"));
+ assert_se(unit_prefix_is_valid("fOo123.stuff"));
+ assert_se(unit_prefix_is_valid("foo123.Stuff"));
+
+ assert_se(!unit_prefix_is_valid("$¢£"));
+ assert_se(!unit_prefix_is_valid(""));
+ assert_se(!unit_prefix_is_valid("foo bar"));
+ assert_se(!unit_prefix_is_valid("foo/bar"));
+ assert_se(!unit_prefix_is_valid("@foo-bar"));
+}
+
+static void test_unit_name_change_suffix(void) {
+ char *t;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(unit_name_change_suffix("foo.mount", ".service", &t) == 0);
+ assert_se(streq(t, "foo.service"));
+ free(t);
+
+ assert_se(unit_name_change_suffix("foo@stuff.service", ".socket", &t) == 0);
+ assert_se(streq(t, "foo@stuff.socket"));
+ free(t);
+}
+
+static void test_unit_name_build(void) {
+ char *t;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(unit_name_build("foo", "bar", ".service", &t) == 0);
+ assert_se(streq(t, "foo@bar.service"));
+ free(t);
+
+ assert_se(unit_name_build("fo0-stUff_b", "bar", ".mount", &t) == 0);
+ assert_se(streq(t, "fo0-stUff_b@bar.mount"));
+ free(t);
+
+ assert_se(unit_name_build("foo", NULL, ".service", &t) == 0);
+ assert_se(streq(t, "foo.service"));
+ free(t);
+}
+
+static void test_slice_name_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se( slice_name_is_valid(SPECIAL_ROOT_SLICE));
+ assert_se( slice_name_is_valid("foo.slice"));
+ assert_se( slice_name_is_valid("foo-bar.slice"));
+ assert_se( slice_name_is_valid("foo-bar-baz.slice"));
+ assert_se(!slice_name_is_valid("-foo-bar-baz.slice"));
+ assert_se(!slice_name_is_valid("foo-bar-baz-.slice"));
+ assert_se(!slice_name_is_valid("-foo-bar-baz-.slice"));
+ assert_se(!slice_name_is_valid("foo-bar--baz.slice"));
+ assert_se(!slice_name_is_valid("foo--bar--baz.slice"));
+ assert_se(!slice_name_is_valid(".slice"));
+ assert_se(!slice_name_is_valid(""));
+ assert_se(!slice_name_is_valid("foo.service"));
+
+ assert_se(!slice_name_is_valid("foo@.slice"));
+ assert_se(!slice_name_is_valid("foo@bar.slice"));
+ assert_se(!slice_name_is_valid("foo-bar@baz.slice"));
+ assert_se(!slice_name_is_valid("foo@bar@baz.slice"));
+ assert_se(!slice_name_is_valid("foo@bar-baz.slice"));
+ assert_se(!slice_name_is_valid("-foo-bar-baz@.slice"));
+ assert_se(!slice_name_is_valid("foo-bar-baz@-.slice"));
+ assert_se(!slice_name_is_valid("foo-bar-baz@a--b.slice"));
+ assert_se(!slice_name_is_valid("-foo-bar-baz@-.slice"));
+ assert_se(!slice_name_is_valid("foo-bar--baz@.slice"));
+ assert_se(!slice_name_is_valid("foo--bar--baz@.slice"));
+ assert_se(!slice_name_is_valid("@.slice"));
+ assert_se(!slice_name_is_valid("foo@bar.service"));
+}
+
+static void test_build_subslice(void) {
+ char *a;
+ char *b;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(slice_build_subslice(SPECIAL_ROOT_SLICE, "foo", &a) >= 0);
+ assert_se(slice_build_subslice(a, "bar", &b) >= 0);
+ free(a);
+ assert_se(slice_build_subslice(b, "barfoo", &a) >= 0);
+ free(b);
+ assert_se(slice_build_subslice(a, "foobar", &b) >= 0);
+ free(a);
+ assert_se(streq(b, "foo-bar-barfoo-foobar.slice"));
+ free(b);
+
+ assert_se(slice_build_subslice("foo.service", "bar", &a) < 0);
+ assert_se(slice_build_subslice("foo", "bar", &a) < 0);
+}
+
+static void test_build_parent_slice_one(const char *name, const char *expect, int ret) {
+ _cleanup_free_ char *s = NULL;
+
+ assert_se(slice_build_parent_slice(name, &s) == ret);
+ assert_se(streq_ptr(s, expect));
+}
+
+static void test_build_parent_slice(void) {
+ log_info("/* %s */", __func__);
+
+ test_build_parent_slice_one(SPECIAL_ROOT_SLICE, NULL, 0);
+ test_build_parent_slice_one("foo.slice", SPECIAL_ROOT_SLICE, 1);
+ test_build_parent_slice_one("foo-bar.slice", "foo.slice", 1);
+ test_build_parent_slice_one("foo-bar-baz.slice", "foo-bar.slice", 1);
+ test_build_parent_slice_one("foo-bar--baz.slice", NULL, -EINVAL);
+ test_build_parent_slice_one("-foo-bar.slice", NULL, -EINVAL);
+ test_build_parent_slice_one("foo-bar-.slice", NULL, -EINVAL);
+ test_build_parent_slice_one("foo-bar.service", NULL, -EINVAL);
+ test_build_parent_slice_one(".slice", NULL, -EINVAL);
+ test_build_parent_slice_one("foo@bar.slice", NULL, -EINVAL);
+ test_build_parent_slice_one("foo-bar@baz.slice", NULL, -EINVAL);
+ test_build_parent_slice_one("foo-bar--@baz.slice", NULL, -EINVAL);
+ test_build_parent_slice_one("-foo-bar@bar.slice", NULL, -EINVAL);
+ test_build_parent_slice_one("foo-bar@-.slice", NULL, -EINVAL);
+ test_build_parent_slice_one("foo@bar.service", NULL, -EINVAL);
+ test_build_parent_slice_one("@.slice", NULL, -EINVAL);
+}
+
+static void test_unit_name_to_instance(void) {
+ char *instance;
+ int r;
+
+ log_info("/* %s */", __func__);
+
+ r = unit_name_to_instance("foo@bar.service", &instance);
+ assert_se(r == UNIT_NAME_INSTANCE);
+ assert_se(streq(instance, "bar"));
+ free(instance);
+
+ r = unit_name_to_instance("foo@.service", &instance);
+ assert_se(r == UNIT_NAME_TEMPLATE);
+ assert_se(streq(instance, ""));
+ free(instance);
+
+ r = unit_name_to_instance("fo0-stUff_b@b.service", &instance);
+ assert_se(r == UNIT_NAME_INSTANCE);
+ assert_se(streq(instance, "b"));
+ free(instance);
+
+ r = unit_name_to_instance("foo.service", &instance);
+ assert_se(r == UNIT_NAME_PLAIN);
+ assert_se(!instance);
+
+ r = unit_name_to_instance("fooj@unk", &instance);
+ assert_se(r < 0);
+ assert_se(!instance);
+
+ r = unit_name_to_instance("foo@", &instance);
+ assert_se(r < 0);
+ assert_se(!instance);
+}
+
+static void test_unit_name_escape(void) {
+ _cleanup_free_ char *r;
+
+ log_info("/* %s */", __func__);
+
+ r = unit_name_escape("ab+-c.a/bc@foo.service");
+ assert_se(r);
+ assert_se(streq(r, "ab\\x2b\\x2dc.a-bc\\x40foo.service"));
+}
+
+static void test_u_n_t_one(const char *name, const char *expected, int ret) {
+ _cleanup_free_ char *f = NULL;
+
+ assert_se(unit_name_template(name, &f) == ret);
+ printf("got: %s, expected: %s\n", strna(f), strna(expected));
+ assert_se(streq_ptr(f, expected));
+}
+
+static void test_unit_name_template(void) {
+ log_info("/* %s */", __func__);
+
+ test_u_n_t_one("foo@bar.service", "foo@.service", 0);
+ test_u_n_t_one("foo.mount", NULL, -EINVAL);
+}
+
+static void test_unit_name_path_unescape_one(const char *name, const char *path, int ret) {
+ _cleanup_free_ char *p = NULL;
+
+ assert_se(unit_name_path_unescape(name, &p) == ret);
+ assert_se(streq_ptr(path, p));
+}
+
+static void test_unit_name_path_unescape(void) {
+ log_info("/* %s */", __func__);
+
+ test_unit_name_path_unescape_one("foo", "/foo", 0);
+ test_unit_name_path_unescape_one("foo-bar", "/foo/bar", 0);
+ test_unit_name_path_unescape_one("foo-.bar", "/foo/.bar", 0);
+ test_unit_name_path_unescape_one("foo-bar-baz", "/foo/bar/baz", 0);
+ test_unit_name_path_unescape_one("-", "/", 0);
+ test_unit_name_path_unescape_one("--", NULL, -EINVAL);
+ test_unit_name_path_unescape_one("-foo-bar", NULL, -EINVAL);
+ test_unit_name_path_unescape_one("foo--bar", NULL, -EINVAL);
+ test_unit_name_path_unescape_one("foo-bar-", NULL, -EINVAL);
+ test_unit_name_path_unescape_one(".-bar", NULL, -EINVAL);
+ test_unit_name_path_unescape_one("foo-..", NULL, -EINVAL);
+ test_unit_name_path_unescape_one("", NULL, -EINVAL);
+}
+
+static void test_unit_name_to_prefix_one(const char *input, int ret, const char *output) {
+ _cleanup_free_ char *k = NULL;
+
+ assert_se(unit_name_to_prefix(input, &k) == ret);
+ assert_se(streq_ptr(k, output));
+}
+
+static void test_unit_name_to_prefix(void) {
+ log_info("/* %s */", __func__);
+
+ test_unit_name_to_prefix_one("foobar.service", 0, "foobar");
+ test_unit_name_to_prefix_one("", -EINVAL, NULL);
+ test_unit_name_to_prefix_one("foobar", -EINVAL, NULL);
+ test_unit_name_to_prefix_one(".service", -EINVAL, NULL);
+ test_unit_name_to_prefix_one("quux.quux", -EINVAL, NULL);
+ test_unit_name_to_prefix_one("quux.mount", 0, "quux");
+ test_unit_name_to_prefix_one("quux-quux.mount", 0, "quux-quux");
+ test_unit_name_to_prefix_one("quux@bar.mount", 0, "quux");
+ test_unit_name_to_prefix_one("quux-@.mount", 0, "quux-");
+ test_unit_name_to_prefix_one("@.mount", -EINVAL, NULL);
+}
+
+static void test_unit_name_from_dbus_path_one(const char *input, int ret, const char *output) {
+ _cleanup_free_ char *k = NULL;
+
+ assert_se(unit_name_from_dbus_path(input, &k) == ret);
+ assert_se(streq_ptr(k, output));
+}
+
+static void test_unit_name_from_dbus_path(void) {
+ log_info("/* %s */", __func__);
+
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dbus_2esocket", 0, "dbus.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/_2d_2emount", 0, "-.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/_2d_2eslice", 0, "-.slice");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/accounts_2ddaemon_2eservice", 0, "accounts-daemon.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/auditd_2eservice", 0, "auditd.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/basic_2etarget", 0, "basic.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/bluetooth_2etarget", 0, "bluetooth.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/boot_2eautomount", 0, "boot.automount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/boot_2emount", 0, "boot.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/btrfs_2emount", 0, "btrfs.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/cryptsetup_2dpre_2etarget", 0, "cryptsetup-pre.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/cryptsetup_2etarget", 0, "cryptsetup.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dbus_2eservice", 0, "dbus.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dbus_2esocket", 0, "dbus.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dcdrom_2edevice", 0, "dev-cdrom.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2data_5cx2dINTEL_5fSSDSA2M120G2GC_5fCVPO044405HH120QGN_2edevice", 0, "dev-disk-by\\x2did-ata\\x2dINTEL_SSDSA2M120G2GC_CVPO044405HH120QGN.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2data_5cx2dINTEL_5fSSDSA2M120G2GC_5fCVPO044405HH120QGN_5cx2dpart1_2edevice", 0, "dev-disk-by\\x2did-ata\\x2dINTEL_SSDSA2M120G2GC_CVPO044405HH120QGN\\x2dpart1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2data_5cx2dINTEL_5fSSDSA2M160G2GC_5fCVPO951003RY160AGN_2edevice", 0, "dev-disk-by\\x2did-ata\\x2dINTEL_SSDSA2M160G2GC_CVPO951003RY160AGN.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2data_5cx2dINTEL_5fSSDSA2M160G2GC_5fCVPO951003RY160AGN_5cx2dpart1_2edevice", 0, "dev-disk-by\\x2did-ata\\x2dINTEL_SSDSA2M160G2GC_CVPO951003RY160AGN\\x2dpart1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2data_5cx2dINTEL_5fSSDSA2M160G2GC_5fCVPO951003RY160AGN_5cx2dpart2_2edevice", 0, "dev-disk-by\\x2did-ata\\x2dINTEL_SSDSA2M160G2GC_CVPO951003RY160AGN\\x2dpart2.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2data_5cx2dINTEL_5fSSDSA2M160G2GC_5fCVPO951003RY160AGN_5cx2dpart3_2edevice", 0, "dev-disk-by\\x2did-ata\\x2dINTEL_SSDSA2M160G2GC_CVPO951003RY160AGN\\x2dpart3.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2data_5cx2dTSSTcorp_5fCDDVDW_5fTS_5cx2dL633C_5fR6176GLZB14646_2edevice", 0, "dev-disk-by\\x2did-ata\\x2dTSSTcorp_CDDVDW_TS\\x2dL633C_R6176GLZB14646.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2dwwn_5cx2d0x50015179591245ae_2edevice", 0, "dev-disk-by\\x2did-wwn\\x2d0x50015179591245ae.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2dwwn_5cx2d0x50015179591245ae_5cx2dpart1_2edevice", 0, "dev-disk-by\\x2did-wwn\\x2d0x50015179591245ae\\x2dpart1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2dwwn_5cx2d0x50015179591245ae_5cx2dpart2_2edevice", 0, "dev-disk-by\\x2did-wwn\\x2d0x50015179591245ae\\x2dpart2.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2dwwn_5cx2d0x50015179591245ae_5cx2dpart3_2edevice", 0, "dev-disk-by\\x2did-wwn\\x2d0x50015179591245ae\\x2dpart3.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2dwwn_5cx2d0x500151795946eab5_2edevice", 0, "dev-disk-by\\x2did-wwn\\x2d0x500151795946eab5.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2did_2dwwn_5cx2d0x500151795946eab5_5cx2dpart1_2edevice", 0, "dev-disk-by\\x2did-wwn\\x2d0x500151795946eab5\\x2dpart1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dlabel_2d_5cxe3_5cx82_5cxb7_5cxe3_5cx82_5cxb9_5cxe3_5cx83_5cx86_5cxe3_5cx83_5cxa0_5cxe3_5cx81_5cxa7_5cxe4_5cxba_5cx88_5cxe7_5cxb4_5cx84_5cxe6_5cxb8_5cx88_5cxe3_5cx81_5cxbf_2edevice", 0, "dev-disk-by\\x2dlabel-\\xe3\\x82\\xb7\\xe3\\x82\\xb9\\xe3\\x83\\x86\\xe3\\x83\\xa0\\xe3\\x81\\xa7\\xe4\\xba\\x88\\xe7\\xb4\\x84\\xe6\\xb8\\x88\\xe3\\x81\\xbf.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpartuuid_2d59834e50_5cx2d01_2edevice", 0, "dev-disk-by\\x2dpartuuid-59834e50\\x2d01.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpartuuid_2d63e2a7b3_5cx2d01_2edevice", 0, "dev-disk-by\\x2dpartuuid-63e2a7b3\\x2d01.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpartuuid_2d63e2a7b3_5cx2d02_2edevice", 0, "dev-disk-by\\x2dpartuuid-63e2a7b3\\x2d02.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpartuuid_2d63e2a7b3_5cx2d03_2edevice", 0, "dev-disk-by\\x2dpartuuid-63e2a7b3\\x2d03.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpath_2dpci_5cx2d0000_3a00_3a1f_2e2_5cx2data_5cx2d1_2edevice", 0, "dev-disk-by\\x2dpath-pci\\x2d0000:00:1f.2\\x2data\\x2d1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpath_2dpci_5cx2d0000_3a00_3a1f_2e2_5cx2data_5cx2d1_5cx2dpart1_2edevice", 0, "dev-disk-by\\x2dpath-pci\\x2d0000:00:1f.2\\x2data\\x2d1\\x2dpart1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpath_2dpci_5cx2d0000_3a00_3a1f_2e2_5cx2data_5cx2d1_5cx2dpart2_2edevice", 0, "dev-disk-by\\x2dpath-pci\\x2d0000:00:1f.2\\x2data\\x2d1\\x2dpart2.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpath_2dpci_5cx2d0000_3a00_3a1f_2e2_5cx2data_5cx2d1_5cx2dpart3_2edevice", 0, "dev-disk-by\\x2dpath-pci\\x2d0000:00:1f.2\\x2data\\x2d1\\x2dpart3.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpath_2dpci_5cx2d0000_3a00_3a1f_2e2_5cx2data_5cx2d2_2edevice", 0, "dev-disk-by\\x2dpath-pci\\x2d0000:00:1f.2\\x2data\\x2d2.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpath_2dpci_5cx2d0000_3a00_3a1f_2e2_5cx2data_5cx2d6_2edevice", 0, "dev-disk-by\\x2dpath-pci\\x2d0000:00:1f.2\\x2data\\x2d6.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2dpath_2dpci_5cx2d0000_3a00_3a1f_2e2_5cx2data_5cx2d6_5cx2dpart1_2edevice", 0, "dev-disk-by\\x2dpath-pci\\x2d0000:00:1f.2\\x2data\\x2d6\\x2dpart1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2duuid_2d1A34E3F034E3CD37_2edevice", 0, "dev-disk-by\\x2duuid-1A34E3F034E3CD37.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2duuid_2dB670EBFE70EBC2EB_2edevice", 0, "dev-disk-by\\x2duuid-B670EBFE70EBC2EB.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2duuid_2dFCD4F509D4F4C6C4_2edevice", 0, "dev-disk-by\\x2duuid-FCD4F509D4F4C6C4.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2ddisk_2dby_5cx2duuid_2db49ead57_5cx2d907c_5cx2d446c_5cx2db405_5cx2d5ca6cd865f5e_2edevice", 0, "dev-disk-by\\x2duuid-b49ead57\\x2d907c\\x2d446c\\x2db405\\x2d5ca6cd865f5e.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dhugepages_2emount", 0, "dev-hugepages.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dmqueue_2emount", 0, "dev-mqueue.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2drfkill_2edevice", 0, "dev-rfkill.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dsda1_2edevice", 0, "dev-sda1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dsda2_2edevice", 0, "dev-sda2.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dsda3_2edevice", 0, "dev-sda3.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dsda_2edevice", 0, "dev-sda.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dsdb1_2edevice", 0, "dev-sdb1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dsdb_2edevice", 0, "dev-sdb.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dsr0_2edevice", 0, "dev-sr0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS0_2edevice", 0, "dev-ttyS0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS10_2edevice", 0, "dev-ttyS10.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS11_2edevice", 0, "dev-ttyS11.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS12_2edevice", 0, "dev-ttyS12.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS13_2edevice", 0, "dev-ttyS13.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS14_2edevice", 0, "dev-ttyS14.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS15_2edevice", 0, "dev-ttyS15.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS16_2edevice", 0, "dev-ttyS16.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS17_2edevice", 0, "dev-ttyS17.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS18_2edevice", 0, "dev-ttyS18.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS19_2edevice", 0, "dev-ttyS19.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS1_2edevice", 0, "dev-ttyS1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS20_2edevice", 0, "dev-ttyS20.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS21_2edevice", 0, "dev-ttyS21.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS22_2edevice", 0, "dev-ttyS22.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS23_2edevice", 0, "dev-ttyS23.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS24_2edevice", 0, "dev-ttyS24.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS25_2edevice", 0, "dev-ttyS25.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS26_2edevice", 0, "dev-ttyS26.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS27_2edevice", 0, "dev-ttyS27.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS28_2edevice", 0, "dev-ttyS28.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS29_2edevice", 0, "dev-ttyS29.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS2_2edevice", 0, "dev-ttyS2.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS30_2edevice", 0, "dev-ttyS30.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS31_2edevice", 0, "dev-ttyS31.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS3_2edevice", 0, "dev-ttyS3.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS4_2edevice", 0, "dev-ttyS4.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS5_2edevice", 0, "dev-ttyS5.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS6_2edevice", 0, "dev-ttyS6.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS7_2edevice", 0, "dev-ttyS7.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS8_2edevice", 0, "dev-ttyS8.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dev_2dttyS9_2edevice", 0, "dev-ttyS9.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dracut_2dcmdline_2eservice", 0, "dracut-cmdline.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dracut_2dinitqueue_2eservice", 0, "dracut-initqueue.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dracut_2dmount_2eservice", 0, "dracut-mount.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dracut_2dpre_2dmount_2eservice", 0, "dracut-pre-mount.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dracut_2dpre_2dpivot_2eservice", 0, "dracut-pre-pivot.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dracut_2dpre_2dtrigger_2eservice", 0, "dracut-pre-trigger.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dracut_2dpre_2dudev_2eservice", 0, "dracut-pre-udev.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/dracut_2dshutdown_2eservice", 0, "dracut-shutdown.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/ebtables_2eservice", 0, "ebtables.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/emergency_2eservice", 0, "emergency.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/emergency_2etarget", 0, "emergency.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/fedora_2dimport_2dstate_2eservice", 0, "fedora-import-state.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/fedora_2dreadonly_2eservice", 0, "fedora-readonly.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/firewalld_2eservice", 0, "firewalld.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/getty_2dpre_2etarget", 0, "getty-pre.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/getty_2etarget", 0, "getty.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/getty_40tty1_2eservice", 0, "getty@tty1.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/graphical_2etarget", 0, "graphical.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/home_2emount", 0, "home.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/init_2escope", 0, "init.scope");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/initrd_2dcleanup_2eservice", 0, "initrd-cleanup.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/initrd_2dfs_2etarget", 0, "initrd-fs.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/initrd_2dparse_2detc_2eservice", 0, "initrd-parse-etc.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/initrd_2droot_2ddevice_2etarget", 0, "initrd-root-device.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/initrd_2droot_2dfs_2etarget", 0, "initrd-root-fs.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/initrd_2dswitch_2droot_2eservice", 0, "initrd-switch-root.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/initrd_2dswitch_2droot_2etarget", 0, "initrd-switch-root.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/initrd_2dudevadm_2dcleanup_2ddb_2eservice", 0, "initrd-udevadm-cleanup-db.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/initrd_2etarget", 0, "initrd.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/ip6tables_2eservice", 0, "ip6tables.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/ipset_2eservice", 0, "ipset.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/iptables_2eservice", 0, "iptables.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/irqbalance_2eservice", 0, "irqbalance.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/kmod_2dstatic_2dnodes_2eservice", 0, "kmod-static-nodes.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/ldconfig_2eservice", 0, "ldconfig.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/lightdm_2eservice", 0, "lightdm.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/livesys_2dlate_2eservice", 0, "livesys-late.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/lm_5fsensors_2eservice", 0, "lm_sensors.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/local_2dfs_2dpre_2etarget", 0, "local-fs-pre.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/local_2dfs_2etarget", 0, "local-fs.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/machines_2etarget", 0, "machines.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/mcelog_2eservice", 0, "mcelog.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/multi_2duser_2etarget", 0, "multi-user.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/network_2dpre_2etarget", 0, "network-pre.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/network_2etarget", 0, "network.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/nss_2dlookup_2etarget", 0, "nss-lookup.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/nss_2duser_2dlookup_2etarget", 0, "nss-user-lookup.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/paths_2etarget", 0, "paths.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/plymouth_2dquit_2dwait_2eservice", 0, "plymouth-quit-wait.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/plymouth_2dquit_2eservice", 0, "plymouth-quit.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/plymouth_2dstart_2eservice", 0, "plymouth-start.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/polkit_2eservice", 0, "polkit.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/proc_2dsys_2dfs_2dbinfmt_5fmisc_2eautomount", 0, "proc-sys-fs-binfmt_misc.automount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/proc_2dsys_2dfs_2dbinfmt_5fmisc_2emount", 0, "proc-sys-fs-binfmt_misc.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/rc_2dlocal_2eservice", 0, "rc-local.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/remote_2dcryptsetup_2etarget", 0, "remote-cryptsetup.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/remote_2dfs_2dpre_2etarget", 0, "remote-fs-pre.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/remote_2dfs_2etarget", 0, "remote-fs.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/rescue_2eservice", 0, "rescue.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/rescue_2etarget", 0, "rescue.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/run_2duser_2d1000_2emount", 0, "run-user-1000.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/session_2d2_2escope", 0, "session-2.scope");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/shutdown_2etarget", 0, "shutdown.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/slices_2etarget", 0, "slices.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/smartd_2eservice", 0, "smartd.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sockets_2etarget", 0, "sockets.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sound_2etarget", 0, "sound.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sshd_2dkeygen_2etarget", 0, "sshd-keygen.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sshd_2dkeygen_40ecdsa_2eservice", 0, "sshd-keygen@ecdsa.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sshd_2dkeygen_40ed25519_2eservice", 0, "sshd-keygen@ed25519.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sshd_2dkeygen_40rsa_2eservice", 0, "sshd-keygen@rsa.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sshd_2eservice", 0, "sshd.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/swap_2etarget", 0, "swap.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a02_2e0_2dbacklight_2dacpi_5fvideo0_2edevice", 0, "sys-devices-pci0000:00-0000:00:02.0-backlight-acpi_video0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a02_2e0_2ddrm_2dcard0_2dcard0_5cx2dLVDS_5cx2d1_2dintel_5fbacklight_2edevice", 0, "sys-devices-pci0000:00-0000:00:02.0-drm-card0-card0\\x2dLVDS\\x2d1-intel_backlight.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1a_2e0_2dusb1_2d1_5cx2d1_2d1_5cx2d1_2e6_2d1_5cx2d1_2e6_3a1_2e0_2dbluetooth_2dhci0_2edevice", 0, "sys-devices-pci0000:00-0000:00:1a.0-usb1-1\\x2d1-1\\x2d1.6-1\\x2d1.6:1.0-bluetooth-hci0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1b_2e0_2dsound_2dcard0_2edevice", 0, "sys-devices-pci0000:00-0000:00:1b.0-sound-card0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1c_2e0_2d0000_3a02_3a00_2e0_2dnet_2dwlp2s0_2edevice", 0, "sys-devices-pci0000:00-0000:00:1c.0-0000:02:00.0-net-wlp2s0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1c_2e2_2d0000_3a04_3a00_2e0_2dnet_2denp4s0_2edevice", 0, "sys-devices-pci0000:00-0000:00:1c.2-0000:04:00.0-net-enp4s0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1f_2e2_2data1_2dhost0_2dtarget0_3a0_3a0_2d0_3a0_3a0_3a0_2dblock_2dsda_2dsda1_2edevice", 0, "sys-devices-pci0000:00-0000:00:1f.2-ata1-host0-target0:0:0-0:0:0:0-block-sda-sda1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1f_2e2_2data1_2dhost0_2dtarget0_3a0_3a0_2d0_3a0_3a0_3a0_2dblock_2dsda_2dsda2_2edevice", 0, "sys-devices-pci0000:00-0000:00:1f.2-ata1-host0-target0:0:0-0:0:0:0-block-sda-sda2.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1f_2e2_2data1_2dhost0_2dtarget0_3a0_3a0_2d0_3a0_3a0_3a0_2dblock_2dsda_2dsda3_2edevice", 0, "sys-devices-pci0000:00-0000:00:1f.2-ata1-host0-target0:0:0-0:0:0:0-block-sda-sda3.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1f_2e2_2data1_2dhost0_2dtarget0_3a0_3a0_2d0_3a0_3a0_3a0_2dblock_2dsda_2edevice", 0, "sys-devices-pci0000:00-0000:00:1f.2-ata1-host0-target0:0:0-0:0:0:0-block-sda.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1f_2e2_2data2_2dhost1_2dtarget1_3a0_3a0_2d1_3a0_3a0_3a0_2dblock_2dsr0_2edevice", 0, "sys-devices-pci0000:00-0000:00:1f.2-ata2-host1-target1:0:0-1:0:0:0-block-sr0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1f_2e2_2data6_2dhost5_2dtarget5_3a0_3a0_2d5_3a0_3a0_3a0_2dblock_2dsdb_2dsdb1_2edevice", 0, "sys-devices-pci0000:00-0000:00:1f.2-ata6-host5-target5:0:0-5:0:0:0-block-sdb-sdb1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dpci0000_3a00_2d0000_3a00_3a1f_2e2_2data6_2dhost5_2dtarget5_3a0_3a0_2d5_3a0_3a0_3a0_2dblock_2dsdb_2edevice", 0, "sys-devices-pci0000:00-0000:00:1f.2-ata6-host5-target5:0:0-5:0:0:0-block-sdb.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS0_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS10_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS10.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS11_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS11.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS12_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS12.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS13_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS13.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS14_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS14.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS15_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS15.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS16_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS16.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS17_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS17.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS18_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS18.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS19_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS19.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS1_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS1.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS20_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS20.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS21_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS21.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS22_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS22.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS23_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS23.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS24_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS24.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS25_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS25.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS26_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS26.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS27_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS27.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS28_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS28.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS29_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS29.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS2_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS2.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS30_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS30.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS31_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS31.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS3_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS3.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS4_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS4.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS5_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS5.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS6_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS6.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS7_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS7.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS8_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS8.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dplatform_2dserial8250_2dtty_2dttyS9_2edevice", 0, "sys-devices-platform-serial8250-tty-ttyS9.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2ddevices_2dvirtual_2dmisc_2drfkill_2edevice", 0, "sys-devices-virtual-misc-rfkill.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2dfs_2dfuse_2dconnections_2emount", 0, "sys-fs-fuse-connections.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2dkernel_2dconfig_2emount", 0, "sys-kernel-config.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2dkernel_2ddebug_2emount", 0, "sys-kernel-debug.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2dmodule_2dconfigfs_2edevice", 0, "sys-module-configfs.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2dsubsystem_2dbluetooth_2ddevices_2dhci0_2edevice", 0, "sys-subsystem-bluetooth-devices-hci0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2dsubsystem_2dnet_2ddevices_2denp4s0_2edevice", 0, "sys-subsystem-net-devices-enp4s0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sys_2dsubsystem_2dnet_2ddevices_2dwlp2s0_2edevice", 0, "sys-subsystem-net-devices-wlp2s0.device");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sysinit_2etarget", 0, "sysinit.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/syslog_2eservice", 0, "syslog.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/syslog_2esocket", 0, "syslog.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/syslog_2etarget", 0, "syslog.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/sysroot_2emount", 0, "sysroot.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/system_2dgetty_2eslice", 0, "system-getty.slice");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/system_2dsshd_5cx2dkeygen_2eslice", 0, "system-sshd\\x2dkeygen.slice");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/system_2dsystemd_5cx2dbacklight_2eslice", 0, "system-systemd\\x2dbacklight.slice");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/system_2dsystemd_5cx2dcoredump_2eslice", 0, "system-systemd\\x2dcoredump.slice");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/system_2duser_5cx2druntime_5cx2ddir_2eslice", 0, "system-user\\x2druntime\\x2ddir.slice");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/system_2eslice", 0, "system.slice");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dask_2dpassword_2dconsole_2epath", 0, "systemd-ask-password-console.path");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dask_2dpassword_2dconsole_2eservice", 0, "systemd-ask-password-console.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dask_2dpassword_2dwall_2epath", 0, "systemd-ask-password-wall.path");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dask_2dpassword_2dwall_2eservice", 0, "systemd-ask-password-wall.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dbacklight_40backlight_3aacpi_5fvideo0_2eservice", 0, "systemd-backlight@backlight:acpi_video0.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dbacklight_40backlight_3aintel_5fbacklight_2eservice", 0, "systemd-backlight@backlight:intel_backlight.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dbinfmt_2eservice", 0, "systemd-binfmt.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dcoredump_2esocket", 0, "systemd-coredump.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dcoredump_400_2eservice", 0, "systemd-coredump@0.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dfirstboot_2eservice", 0, "systemd-firstboot.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dfsck_2droot_2eservice", 0, SPECIAL_FSCK_ROOT_SERVICE);
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dhwdb_2dupdate_2eservice", 0, "systemd-hwdb-update.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dinitctl_2eservice", 0, "systemd-initctl.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dinitctl_2esocket", 0, "systemd-initctl.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2djournal_2dcatalog_2dupdate_2eservice", 0, "systemd-journal-catalog-update.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2djournal_2dflush_2eservice", 0, "systemd-journal-flush.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2djournald_2daudit_2esocket", 0, "systemd-journald-audit.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2djournald_2ddev_2dlog_2esocket", 0, "systemd-journald-dev-log.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2djournald_2eservice", 0, "systemd-journald.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2djournald_2esocket", 0, "systemd-journald.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dlogind_2eservice", 0, "systemd-logind.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dmachine_2did_2dcommit_2eservice", 0, "systemd-machine-id-commit.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dmodules_2dload_2eservice", 0, "systemd-modules-load.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dnetworkd_2eservice", 0, "systemd-networkd.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dnetworkd_2esocket", 0, "systemd-networkd.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2drandom_2dseed_2eservice", 0, "systemd-random-seed.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dremount_2dfs_2eservice", 0, "systemd-remount-fs.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dresolved_2eservice", 0, "systemd-resolved.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2drfkill_2eservice", 0, "systemd-rfkill.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2drfkill_2esocket", 0, "systemd-rfkill.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dsysctl_2eservice", 0, "systemd-sysctl.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dsysusers_2eservice", 0, "systemd-sysusers.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dtimesyncd_2eservice", 0, "systemd-timesyncd.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dtmpfiles_2dclean_2eservice", 0, "systemd-tmpfiles-clean.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dtmpfiles_2dclean_2etimer", 0, "systemd-tmpfiles-clean.timer");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dtmpfiles_2dsetup_2ddev_2eservice", 0, "systemd-tmpfiles-setup-dev.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dtmpfiles_2dsetup_2eservice", 0, "systemd-tmpfiles-setup.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dudev_2dtrigger_2eservice", 0, "systemd-udev-trigger.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dudevd_2dcontrol_2esocket", 0, "systemd-udevd-control.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dudevd_2dkernel_2esocket", 0, "systemd-udevd-kernel.socket");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dudevd_2eservice", 0, "systemd-udevd.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dupdate_2ddone_2eservice", 0, "systemd-update-done.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dupdate_2dutmp_2drunlevel_2eservice", 0, "systemd-update-utmp-runlevel.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dupdate_2dutmp_2eservice", 0, "systemd-update-utmp.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2duser_2dsessions_2eservice", 0, "systemd-user-sessions.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/systemd_2dvconsole_2dsetup_2eservice", 0, "systemd-vconsole-setup.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/time_2dsync_2etarget", 0, "time-sync.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/timers_2etarget", 0, "timers.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/tmp_2emount", 0, "tmp.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/umount_2etarget", 0, "umount.target");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/unbound_2danchor_2eservice", 0, "unbound-anchor.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/unbound_2danchor_2etimer", 0, "unbound-anchor.timer");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/upower_2eservice", 0, "upower.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/user_2d1000_2eslice", 0, "user-1000.slice");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/user_2druntime_2ddir_401000_2eservice", 0, "user-runtime-dir@1000.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/user_2eslice", 0, "user.slice");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/user_401000_2eservice", 0, "user@1000.service");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/usr_2dlocal_2dtexlive_2emount", 0, "usr-local-texlive.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/var_2dlib_2dmachines_2emount", 0, "var-lib-machines.mount");
+ test_unit_name_from_dbus_path_one("/org/freedesktop/systemd1/unit/wpa_5fsupplicant_2eservice", 0, "wpa_supplicant.service");
+}
+
+int main(int argc, char* argv[]) {
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+ int r, rc = 0;
+
+ test_setup_logging(LOG_INFO);
+
+ r = enter_cgroup_subroot(NULL);
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+
+ test_unit_name_is_valid();
+ test_unit_name_replace_instance();
+ test_unit_name_from_path();
+ test_unit_name_from_path_instance();
+ test_unit_name_mangle();
+ test_unit_name_to_path();
+ TEST_REQ_RUNNING_SYSTEMD(rc = test_unit_printf());
+ test_unit_instance_is_valid();
+ test_unit_prefix_is_valid();
+ test_unit_name_change_suffix();
+ test_unit_name_build();
+ test_slice_name_is_valid();
+ test_build_subslice();
+ test_build_parent_slice();
+ test_unit_name_to_instance();
+ test_unit_name_escape();
+ test_unit_name_template();
+ test_unit_name_path_unescape();
+ test_unit_name_to_prefix();
+ test_unit_name_from_dbus_path();
+
+ return rc;
+}
diff --git a/src/test/test-user-record.c b/src/test/test-user-record.c
new file mode 100644
index 0000000..c9182e3
--- /dev/null
+++ b/src/test/test-user-record.c
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+#include <sys/types.h>
+
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "tmpfile-util.h"
+#include "tests.h"
+#include "user-record.h"
+
+static void test_read_login_defs(const char *path) {
+ log_info("/* %s(\"%s\") */", __func__, path ?: "<custom>");
+
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-user-record.XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+ if (!path) {
+ assert_se(fmkostemp_safe(name, "r+", &f) == 0);
+ fprintf(f,
+ "SYS_UID_MIN "UID_FMT"\n"
+ "SYS_UID_MAX "UID_FMT"\n"
+ "SYS_GID_MIN "GID_FMT"\n"
+ "SYS_GID_MAX "GID_FMT"\n",
+ SYSTEM_ALLOC_UID_MIN + 5,
+ SYSTEM_UID_MAX + 5,
+ SYSTEM_ALLOC_GID_MIN + 5,
+ SYSTEM_GID_MAX + 5);
+ assert_se(fflush_and_check(f) >= 0);
+ }
+
+ UGIDAllocationRange defs;
+ assert_se(read_login_defs(&defs, path ?: name, NULL) >= 0);
+
+ log_info("system_alloc_uid_min="UID_FMT, defs.system_alloc_uid_min);
+ log_info("system_uid_max="UID_FMT, defs.system_uid_max);
+ log_info("system_alloc_gid_min="GID_FMT, defs.system_alloc_gid_min);
+ log_info("system_gid_max="GID_FMT, defs.system_gid_max);
+
+ if (!path) {
+ uid_t offset = ENABLE_COMPAT_MUTABLE_UID_BOUNDARIES ? 5 : 0;
+ assert_se(defs.system_alloc_uid_min == SYSTEM_ALLOC_UID_MIN + offset);
+ assert_se(defs.system_uid_max == SYSTEM_UID_MAX + offset);
+ assert_se(defs.system_alloc_gid_min == SYSTEM_ALLOC_GID_MIN + offset);
+ assert_se(defs.system_gid_max == SYSTEM_GID_MAX + offset);
+ } else if (streq(path, "/dev/null")) {
+ assert_se(defs.system_alloc_uid_min == SYSTEM_ALLOC_UID_MIN);
+ assert_se(defs.system_uid_max == SYSTEM_UID_MAX);
+ assert_se(defs.system_alloc_gid_min == SYSTEM_ALLOC_GID_MIN);
+ assert_se(defs.system_gid_max == SYSTEM_GID_MAX);
+ }
+}
+
+static void test_acquire_ugid_allocation_range(void) {
+ log_info("/* %s */", __func__);
+
+ const UGIDAllocationRange *defs;
+ assert_se(defs = acquire_ugid_allocation_range());
+
+ log_info("system_alloc_uid_min="UID_FMT, defs->system_alloc_uid_min);
+ log_info("system_uid_max="UID_FMT, defs->system_uid_max);
+ log_info("system_alloc_gid_min="GID_FMT, defs->system_alloc_gid_min);
+ log_info("system_gid_max="GID_FMT, defs->system_gid_max);
+}
+
+static void test_uid_is_system(void) {
+ log_info("/* %s */", __func__);
+
+ uid_t uid = 0;
+ log_info("uid_is_system("UID_FMT") = %s", uid, yes_no(uid_is_system(uid)));
+
+ uid = 999;
+ log_info("uid_is_system("UID_FMT") = %s", uid, yes_no(uid_is_system(uid)));
+
+ uid = getuid();
+ log_info("uid_is_system("UID_FMT") = %s", uid, yes_no(uid_is_system(uid)));
+}
+
+static void test_gid_is_system(void) {
+ log_info("/* %s */", __func__);
+
+ gid_t gid = 0;
+ log_info("gid_is_system("GID_FMT") = %s", gid, yes_no(gid_is_system(gid)));
+
+ gid = 999;
+ log_info("gid_is_system("GID_FMT") = %s", gid, yes_no(gid_is_system(gid)));
+
+ gid = getgid();
+ log_info("gid_is_system("GID_FMT") = %s", gid, yes_no(gid_is_system(gid)));
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_read_login_defs("/dev/null");
+ test_read_login_defs("/etc/login.defs");
+ test_read_login_defs(NULL);
+ test_acquire_ugid_allocation_range();
+ test_uid_is_system();
+ test_gid_is_system();
+
+ return 0;
+}
diff --git a/src/test/test-user-util.c b/src/test/test-user-util.c
new file mode 100644
index 0000000..8924b5f
--- /dev/null
+++ b/src/test/test-user-util.c
@@ -0,0 +1,515 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "format-util.h"
+#include "libcrypt-util.h"
+#include "log.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "user-util.h"
+
+static void test_uid_to_name_one(uid_t uid, const char *name) {
+ _cleanup_free_ char *t = NULL;
+
+ log_info("/* %s("UID_FMT", \"%s\") */", __func__, uid, name);
+
+ assert_se(t = uid_to_name(uid));
+ if (!synthesize_nobody() && streq(name, NOBODY_USER_NAME)) {
+ log_info("(skipping detailed tests because nobody is not synthesized)");
+ return;
+ }
+ assert_se(streq_ptr(t, name));
+}
+
+static void test_gid_to_name_one(gid_t gid, const char *name) {
+ _cleanup_free_ char *t = NULL;
+
+ log_info("/* %s("GID_FMT", \"%s\") */", __func__, gid, name);
+
+ assert_se(t = gid_to_name(gid));
+ if (!synthesize_nobody() && streq(name, NOBODY_GROUP_NAME)) {
+ log_info("(skipping detailed tests because nobody is not synthesized)");
+ return;
+ }
+ assert_se(streq_ptr(t, name));
+}
+
+static void test_parse_uid(void) {
+ int r;
+ uid_t uid;
+
+ log_info("/* %s */", __func__);
+
+ r = parse_uid("0", &uid);
+ assert_se(r == 0);
+ assert_se(uid == 0);
+
+ r = parse_uid("1", &uid);
+ assert_se(r == 0);
+ assert_se(uid == 1);
+
+ r = parse_uid("01", &uid);
+ assert_se(r == -EINVAL);
+ assert_se(uid == 1);
+
+ r = parse_uid("001", &uid);
+ assert_se(r == -EINVAL);
+ assert_se(uid == 1);
+
+ r = parse_uid("100", &uid);
+ assert_se(r == 0);
+ assert_se(uid == 100);
+
+ r = parse_uid("65535", &uid);
+ assert_se(r == -ENXIO);
+ assert_se(uid == 100);
+
+ r = parse_uid("0x1234", &uid);
+ assert_se(r == -EINVAL);
+ assert_se(uid == 100);
+
+ r = parse_uid("0o1234", &uid);
+ assert_se(r == -EINVAL);
+ assert_se(uid == 100);
+
+ r = parse_uid("0b1234", &uid);
+ assert_se(r == -EINVAL);
+ assert_se(uid == 100);
+
+ r = parse_uid("+1234", &uid);
+ assert_se(r == -EINVAL);
+ assert_se(uid == 100);
+
+ r = parse_uid("-1234", &uid);
+ assert_se(r == -EINVAL);
+ assert_se(uid == 100);
+
+ r = parse_uid(" 1234", &uid);
+ assert_se(r == -EINVAL);
+ assert_se(uid == 100);
+
+ r = parse_uid("01234", &uid);
+ assert_se(r == -EINVAL);
+ assert_se(uid == 100);
+
+ r = parse_uid("001234", &uid);
+ assert_se(r == -EINVAL);
+ assert_se(uid == 100);
+
+ r = parse_uid("0001234", &uid);
+ assert_se(r == -EINVAL);
+ assert_se(uid == 100);
+
+ r = parse_uid("-0", &uid);
+ assert_se(r == -EINVAL);
+ assert_se(uid == 100);
+
+ r = parse_uid("+0", &uid);
+ assert_se(r == -EINVAL);
+ assert_se(uid == 100);
+
+ r = parse_uid("00", &uid);
+ assert_se(r == -EINVAL);
+ assert_se(uid == 100);
+
+ r = parse_uid("000", &uid);
+ assert_se(r == -EINVAL);
+ assert_se(uid == 100);
+
+ r = parse_uid("asdsdas", &uid);
+ assert_se(r == -EINVAL);
+ assert_se(uid == 100);
+}
+
+static void test_uid_ptr(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(UID_TO_PTR(0) != NULL);
+ assert_se(UID_TO_PTR(1000) != NULL);
+
+ assert_se(PTR_TO_UID(UID_TO_PTR(0)) == 0);
+ assert_se(PTR_TO_UID(UID_TO_PTR(1000)) == 1000);
+}
+
+static void test_valid_user_group_name_relaxed(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(!valid_user_group_name(NULL, VALID_USER_RELAX));
+ assert_se(!valid_user_group_name("", VALID_USER_RELAX));
+ assert_se(!valid_user_group_name("1", VALID_USER_RELAX));
+ assert_se(!valid_user_group_name("65535", VALID_USER_RELAX));
+ assert_se(!valid_user_group_name("-1", VALID_USER_RELAX));
+ assert_se(!valid_user_group_name("foo\nbar", VALID_USER_RELAX));
+ assert_se(!valid_user_group_name("0123456789012345678901234567890123456789", VALID_USER_RELAX));
+ assert_se(!valid_user_group_name("aaa:bbb", VALID_USER_RELAX|VALID_USER_ALLOW_NUMERIC));
+ assert_se(!valid_user_group_name(".aaa:bbb", VALID_USER_RELAX|VALID_USER_ALLOW_NUMERIC));
+ assert_se(!valid_user_group_name(".", VALID_USER_RELAX));
+ assert_se(!valid_user_group_name("..", VALID_USER_RELAX));
+
+ assert_se(valid_user_group_name("root", VALID_USER_RELAX));
+ assert_se(valid_user_group_name("lennart", VALID_USER_RELAX));
+ assert_se(valid_user_group_name("LENNART", VALID_USER_RELAX));
+ assert_se(valid_user_group_name("_kkk", VALID_USER_RELAX));
+ assert_se(valid_user_group_name("kkk-", VALID_USER_RELAX));
+ assert_se(valid_user_group_name("kk-k", VALID_USER_RELAX));
+ assert_se(valid_user_group_name("eff.eff", VALID_USER_RELAX));
+ assert_se(valid_user_group_name("eff.", VALID_USER_RELAX));
+ assert_se(valid_user_group_name("-kkk", VALID_USER_RELAX));
+ assert_se(valid_user_group_name("rööt", VALID_USER_RELAX));
+ assert_se(valid_user_group_name(".eff", VALID_USER_RELAX));
+ assert_se(valid_user_group_name(".1", VALID_USER_RELAX));
+ assert_se(valid_user_group_name(".65535", VALID_USER_RELAX));
+ assert_se(valid_user_group_name(".-1", VALID_USER_RELAX));
+ assert_se(valid_user_group_name(".-kkk", VALID_USER_RELAX));
+ assert_se(valid_user_group_name(".rööt", VALID_USER_RELAX));
+ assert_se(valid_user_group_name("...", VALID_USER_RELAX));
+
+ assert_se(valid_user_group_name("some5", VALID_USER_RELAX));
+ assert_se(valid_user_group_name("5some", VALID_USER_RELAX));
+ assert_se(valid_user_group_name("INNER5NUMBER", VALID_USER_RELAX));
+
+ assert_se(valid_user_group_name("piff.paff@ad.domain.example", VALID_USER_RELAX));
+ assert_se(valid_user_group_name("Dāvis", VALID_USER_RELAX));
+}
+
+static void test_valid_user_group_name(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(!valid_user_group_name(NULL, 0));
+ assert_se(!valid_user_group_name("", 0));
+ assert_se(!valid_user_group_name("1", 0));
+ assert_se(!valid_user_group_name("65535", 0));
+ assert_se(!valid_user_group_name("-1", 0));
+ assert_se(!valid_user_group_name("-kkk", 0));
+ assert_se(!valid_user_group_name("rööt", 0));
+ assert_se(!valid_user_group_name(".", 0));
+ assert_se(!valid_user_group_name(".eff", 0));
+ assert_se(!valid_user_group_name("foo\nbar", 0));
+ assert_se(!valid_user_group_name("0123456789012345678901234567890123456789", 0));
+ assert_se(!valid_user_group_name("aaa:bbb", VALID_USER_ALLOW_NUMERIC));
+ assert_se(!valid_user_group_name(".", 0));
+ assert_se(!valid_user_group_name("..", 0));
+ assert_se(!valid_user_group_name("...", 0));
+ assert_se(!valid_user_group_name(".1", 0));
+ assert_se(!valid_user_group_name(".65535", 0));
+ assert_se(!valid_user_group_name(".-1", 0));
+ assert_se(!valid_user_group_name(".-kkk", 0));
+ assert_se(!valid_user_group_name(".rööt", 0));
+ assert_se(!valid_user_group_name(".aaa:bbb", VALID_USER_ALLOW_NUMERIC));
+
+ assert_se(valid_user_group_name("root", 0));
+ assert_se(valid_user_group_name("lennart", 0));
+ assert_se(valid_user_group_name("LENNART", 0));
+ assert_se(valid_user_group_name("_kkk", 0));
+ assert_se(valid_user_group_name("kkk-", 0));
+ assert_se(valid_user_group_name("kk-k", 0));
+ assert_se(!valid_user_group_name("eff.eff", 0));
+ assert_se(!valid_user_group_name("eff.", 0));
+
+ assert_se(valid_user_group_name("some5", 0));
+ assert_se(!valid_user_group_name("5some", 0));
+ assert_se(valid_user_group_name("INNER5NUMBER", 0));
+
+ assert_se(!valid_user_group_name("piff.paff@ad.domain.example", 0));
+ assert_se(!valid_user_group_name("Dāvis", 0));
+}
+
+static void test_valid_user_group_name_or_numeric_relaxed(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(!valid_user_group_name(NULL, VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(!valid_user_group_name("", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(valid_user_group_name("0", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(valid_user_group_name("1", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(valid_user_group_name("65534", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(!valid_user_group_name("65535", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(valid_user_group_name("65536", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(!valid_user_group_name("-1", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(!valid_user_group_name("foo\nbar", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(!valid_user_group_name("0123456789012345678901234567890123456789", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(!valid_user_group_name("aaa:bbb", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(!valid_user_group_name(".", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(!valid_user_group_name("..", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+
+ assert_se(valid_user_group_name("root", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(valid_user_group_name("lennart", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(valid_user_group_name("LENNART", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(valid_user_group_name("_kkk", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(valid_user_group_name("kkk-", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(valid_user_group_name("kk-k", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(valid_user_group_name("-kkk", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(valid_user_group_name("rööt", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(valid_user_group_name(".eff", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(valid_user_group_name("eff.eff", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(valid_user_group_name("eff.", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(valid_user_group_name("...", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+
+ assert_se(valid_user_group_name("some5", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(valid_user_group_name("5some", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(valid_user_group_name("INNER5NUMBER", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+
+ assert_se(valid_user_group_name("piff.paff@ad.domain.example", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+ assert_se(valid_user_group_name("Dāvis", VALID_USER_ALLOW_NUMERIC|VALID_USER_RELAX));
+}
+
+static void test_valid_user_group_name_or_numeric(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(!valid_user_group_name(NULL, VALID_USER_ALLOW_NUMERIC));
+ assert_se(!valid_user_group_name("", VALID_USER_ALLOW_NUMERIC));
+ assert_se(valid_user_group_name("0", VALID_USER_ALLOW_NUMERIC));
+ assert_se(valid_user_group_name("1", VALID_USER_ALLOW_NUMERIC));
+ assert_se(valid_user_group_name("65534", VALID_USER_ALLOW_NUMERIC));
+ assert_se(!valid_user_group_name("65535", VALID_USER_ALLOW_NUMERIC));
+ assert_se(valid_user_group_name("65536", VALID_USER_ALLOW_NUMERIC));
+ assert_se(!valid_user_group_name("-1", VALID_USER_ALLOW_NUMERIC));
+ assert_se(!valid_user_group_name("-kkk", VALID_USER_ALLOW_NUMERIC));
+ assert_se(!valid_user_group_name("rööt", VALID_USER_ALLOW_NUMERIC));
+ assert_se(!valid_user_group_name(".", VALID_USER_ALLOW_NUMERIC));
+ assert_se(!valid_user_group_name("..", VALID_USER_ALLOW_NUMERIC));
+ assert_se(!valid_user_group_name("...", VALID_USER_ALLOW_NUMERIC));
+ assert_se(!valid_user_group_name(".eff", VALID_USER_ALLOW_NUMERIC));
+ assert_se(!valid_user_group_name("eff.eff", VALID_USER_ALLOW_NUMERIC));
+ assert_se(!valid_user_group_name("eff.", VALID_USER_ALLOW_NUMERIC));
+ assert_se(!valid_user_group_name("foo\nbar", VALID_USER_ALLOW_NUMERIC));
+ assert_se(!valid_user_group_name("0123456789012345678901234567890123456789", VALID_USER_ALLOW_NUMERIC));
+ assert_se(!valid_user_group_name("aaa:bbb", VALID_USER_ALLOW_NUMERIC));
+
+ assert_se(valid_user_group_name("root", VALID_USER_ALLOW_NUMERIC));
+ assert_se(valid_user_group_name("lennart", VALID_USER_ALLOW_NUMERIC));
+ assert_se(valid_user_group_name("LENNART", VALID_USER_ALLOW_NUMERIC));
+ assert_se(valid_user_group_name("_kkk", VALID_USER_ALLOW_NUMERIC));
+ assert_se(valid_user_group_name("kkk-", VALID_USER_ALLOW_NUMERIC));
+ assert_se(valid_user_group_name("kk-k", VALID_USER_ALLOW_NUMERIC));
+
+ assert_se(valid_user_group_name("some5", VALID_USER_ALLOW_NUMERIC));
+ assert_se(!valid_user_group_name("5some", VALID_USER_ALLOW_NUMERIC));
+ assert_se(valid_user_group_name("INNER5NUMBER", VALID_USER_ALLOW_NUMERIC));
+
+ assert_se(!valid_user_group_name("piff.paff@ad.domain.example", VALID_USER_ALLOW_NUMERIC));
+ assert_se(!valid_user_group_name("Dāvis", VALID_USER_ALLOW_NUMERIC));
+}
+
+static void test_valid_gecos(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(!valid_gecos(NULL));
+ assert_se(valid_gecos(""));
+ assert_se(valid_gecos("test"));
+ assert_se(valid_gecos("Ümläüt"));
+ assert_se(!valid_gecos("In\nvalid"));
+ assert_se(!valid_gecos("In:valid"));
+}
+
+static void test_valid_home(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(!valid_home(NULL));
+ assert_se(!valid_home(""));
+ assert_se(!valid_home("."));
+ assert_se(!valid_home("/home/.."));
+ assert_se(!valid_home("/home/../"));
+ assert_se(!valid_home("/home\n/foo"));
+ assert_se(!valid_home("./piep"));
+ assert_se(!valid_home("piep"));
+ assert_se(!valid_home("/home/user:lennart"));
+
+ assert_se(valid_home("/"));
+ assert_se(valid_home("/home"));
+ assert_se(valid_home("/home/foo"));
+}
+
+static void test_get_user_creds_one(const char *id, const char *name, uid_t uid, gid_t gid, const char *home, const char *shell) {
+ const char *rhome = NULL;
+ const char *rshell = NULL;
+ uid_t ruid = UID_INVALID;
+ gid_t rgid = GID_INVALID;
+ int r;
+
+ log_info("/* %s(\"%s\", \"%s\", "UID_FMT", "GID_FMT", \"%s\", \"%s\") */",
+ __func__, id, name, uid, gid, home, shell);
+
+ r = get_user_creds(&id, &ruid, &rgid, &rhome, &rshell, 0);
+ log_info_errno(r, "got \"%s\", "UID_FMT", "GID_FMT", \"%s\", \"%s\": %m",
+ id, ruid, rgid, strnull(rhome), strnull(rshell));
+ if (!synthesize_nobody() && streq(name, NOBODY_USER_NAME)) {
+ log_info("(skipping detailed tests because nobody is not synthesized)");
+ return;
+ }
+ assert_se(r == 0);
+ assert_se(streq_ptr(id, name));
+ assert_se(ruid == uid);
+ assert_se(rgid == gid);
+ assert_se(path_equal(rhome, home));
+ assert_se(path_equal(rshell, shell));
+}
+
+static void test_get_group_creds_one(const char *id, const char *name, gid_t gid) {
+ gid_t rgid = GID_INVALID;
+ int r;
+
+ log_info("/* %s(\"%s\", \"%s\", "GID_FMT") */", __func__, id, name, gid);
+
+ r = get_group_creds(&id, &rgid, 0);
+ log_info_errno(r, "got \"%s\", "GID_FMT": %m", id, rgid);
+ if (!synthesize_nobody() && streq(name, NOBODY_GROUP_NAME)) {
+ log_info("(skipping detailed tests because nobody is not synthesized)");
+ return;
+ }
+ assert_se(r == 0);
+ assert_se(streq_ptr(id, name));
+ assert_se(rgid == gid);
+}
+
+static void test_make_salt(void) {
+ log_info("/* %s */", __func__);
+
+ _cleanup_free_ char *s, *t;
+
+ assert_se(make_salt(&s) == 0);
+ log_info("got %s", s);
+
+ assert_se(make_salt(&t) == 0);
+ log_info("got %s", t);
+
+ assert(!streq(s, t));
+}
+
+static void test_in_gid(void) {
+ assert(in_gid(getgid()) >= 0);
+ assert(in_gid(getegid()) >= 0); assert(in_gid(TTY_GID) == 0); /* The TTY gid is for owning ttys, it would be really really weird if we were in it. */
+}
+
+static void test_gid_lists_ops(void) {
+ static const gid_t l1[] = { 5, 10, 15, 20, 25};
+ static const gid_t l2[] = { 1, 2, 3, 15, 20, 25};
+ static const gid_t l3[] = { 5, 10, 15, 20, 25, 26, 27};
+ static const gid_t l4[] = { 25, 26, 20, 15, 5, 27, 10};
+
+ static const gid_t result1[] = {1, 2, 3, 5, 10, 15, 20, 25, 26, 27};
+ static const gid_t result2[] = {5, 10, 15, 20, 25, 26, 27};
+
+ _cleanup_free_ gid_t *gids = NULL;
+ _cleanup_free_ gid_t *res1 = NULL;
+ _cleanup_free_ gid_t *res2 = NULL;
+ _cleanup_free_ gid_t *res3 = NULL;
+ _cleanup_free_ gid_t *res4 = NULL;
+ int nresult;
+
+ nresult = merge_gid_lists(l2, ELEMENTSOF(l2), l3, ELEMENTSOF(l3), &res1);
+ assert_se(nresult >= 0);
+ assert_se(memcmp_nn(res1, nresult, result1, ELEMENTSOF(result1)) == 0);
+
+ nresult = merge_gid_lists(NULL, 0, l2, ELEMENTSOF(l2), &res2);
+ assert_se(nresult >= 0);
+ assert_se(memcmp_nn(res2, nresult, l2, ELEMENTSOF(l2)) == 0);
+
+ nresult = merge_gid_lists(l1, ELEMENTSOF(l1), l1, ELEMENTSOF(l1), &res3);
+ assert_se(nresult >= 0);
+ assert_se(memcmp_nn(l1, ELEMENTSOF(l1), res3, nresult) == 0);
+
+ nresult = merge_gid_lists(l1, ELEMENTSOF(l1), l4, ELEMENTSOF(l4), &res4);
+ assert_se(nresult >= 0);
+ assert_se(memcmp_nn(result2, ELEMENTSOF(result2), res4, nresult) == 0);
+
+ nresult = getgroups_alloc(&gids);
+ assert_se(nresult >= 0 || nresult == -EINVAL || nresult == -ENOMEM);
+ assert_se(gids);
+}
+
+static void test_parse_uid_range(void) {
+ uid_t a = 4711, b = 4711;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(parse_uid_range("", &a, &b) == -EINVAL && a == 4711 && b == 4711);
+ assert_se(parse_uid_range(" ", &a, &b) == -EINVAL && a == 4711 && b == 4711);
+ assert_se(parse_uid_range("x", &a, &b) == -EINVAL && a == 4711 && b == 4711);
+
+ assert_se(parse_uid_range("0", &a, &b) >= 0 && a == 0 && b == 0);
+ assert_se(parse_uid_range("1", &a, &b) >= 0 && a == 1 && b == 1);
+ assert_se(parse_uid_range("2-2", &a, &b) >= 0 && a == 2 && b == 2);
+ assert_se(parse_uid_range("3-3", &a, &b) >= 0 && a == 3 && b == 3);
+ assert_se(parse_uid_range("4-5", &a, &b) >= 0 && a == 4 && b == 5);
+
+ assert_se(parse_uid_range("7-6", &a, &b) == -EINVAL && a == 4 && b == 5);
+ assert_se(parse_uid_range("-1", &a, &b) == -EINVAL && a == 4 && b == 5);
+ assert_se(parse_uid_range("01", &a, &b) == -EINVAL && a == 4 && b == 5);
+ assert_se(parse_uid_range("001", &a, &b) == -EINVAL && a == 4 && b == 5);
+ assert_se(parse_uid_range("+1", &a, &b) == -EINVAL && a == 4 && b == 5);
+ assert_se(parse_uid_range("1--1", &a, &b) == -EINVAL && a == 4 && b == 5);
+ assert_se(parse_uid_range(" 1", &a, &b) == -EINVAL && a == 4 && b == 5);
+ assert_se(parse_uid_range(" 1-2", &a, &b) == -EINVAL && a == 4 && b == 5);
+ assert_se(parse_uid_range("1 -2", &a, &b) == -EINVAL && a == 4 && b == 5);
+ assert_se(parse_uid_range("1- 2", &a, &b) == -EINVAL && a == 4 && b == 5);
+ assert_se(parse_uid_range("1-2 ", &a, &b) == -EINVAL && a == 4 && b == 5);
+ assert_se(parse_uid_range("01-2", &a, &b) == -EINVAL && a == 4 && b == 5);
+ assert_se(parse_uid_range("1-02", &a, &b) == -EINVAL && a == 4 && b == 5);
+ assert_se(parse_uid_range("001-2", &a, &b) == -EINVAL && a == 4 && b == 5);
+ assert_se(parse_uid_range("1-002", &a, &b) == -EINVAL && a == 4 && b == 5);
+ assert_se(parse_uid_range(" 01", &a, &b) == -EINVAL && a == 4 && b == 5);
+}
+
+static void test_mangle_gecos_one(const char *input, const char *expected) {
+ _cleanup_free_ char *p = NULL;
+
+ assert_se(p = mangle_gecos(input));
+ assert_se(streq(p, expected));
+ assert_se(valid_gecos(p));
+}
+
+static void test_mangle_gecos(void) {
+ test_mangle_gecos_one("", "");
+ test_mangle_gecos_one("root", "root");
+ test_mangle_gecos_one("wuff\nwuff", "wuff wuff");
+ test_mangle_gecos_one("wuff:wuff", "wuff wuff");
+ test_mangle_gecos_one("wuff\r\n:wuff", "wuff wuff");
+ test_mangle_gecos_one("\n--wüff-wäff-wöff::", " --wüff-wäff-wöff ");
+ test_mangle_gecos_one("\xc3\x28", " (");
+ test_mangle_gecos_one("\xe2\x28\xa1", " ( ");
+}
+
+int main(int argc, char *argv[]) {
+ test_uid_to_name_one(0, "root");
+ test_uid_to_name_one(UID_NOBODY, NOBODY_USER_NAME);
+ test_uid_to_name_one(0xFFFF, "65535");
+ test_uid_to_name_one(0xFFFFFFFF, "4294967295");
+
+ test_gid_to_name_one(0, "root");
+ test_gid_to_name_one(GID_NOBODY, NOBODY_GROUP_NAME);
+ test_gid_to_name_one(TTY_GID, "tty");
+ test_gid_to_name_one(0xFFFF, "65535");
+ test_gid_to_name_one(0xFFFFFFFF, "4294967295");
+
+ test_get_user_creds_one("root", "root", 0, 0, "/root", "/bin/sh");
+ test_get_user_creds_one("0", "root", 0, 0, "/root", "/bin/sh");
+ test_get_user_creds_one(NOBODY_USER_NAME, NOBODY_USER_NAME, UID_NOBODY, GID_NOBODY, "/", NOLOGIN);
+ test_get_user_creds_one("65534", NOBODY_USER_NAME, UID_NOBODY, GID_NOBODY, "/", NOLOGIN);
+
+ test_get_group_creds_one("root", "root", 0);
+ test_get_group_creds_one("0", "root", 0);
+ test_get_group_creds_one(NOBODY_GROUP_NAME, NOBODY_GROUP_NAME, GID_NOBODY);
+ test_get_group_creds_one("65534", NOBODY_GROUP_NAME, GID_NOBODY);
+
+ test_parse_uid();
+ test_uid_ptr();
+
+ test_valid_user_group_name_relaxed();
+ test_valid_user_group_name();
+ test_valid_user_group_name_or_numeric_relaxed();
+ test_valid_user_group_name_or_numeric();
+ test_valid_gecos();
+ test_mangle_gecos();
+ test_valid_home();
+
+ test_make_salt();
+
+ test_in_gid();
+ test_gid_lists_ops();
+
+ test_parse_uid_range();
+
+ return 0;
+}
diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c
new file mode 100644
index 0000000..c7b6d8d
--- /dev/null
+++ b/src/test/test-utf8.c
@@ -0,0 +1,253 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+#include "util.h"
+
+static void test_utf8_is_printable(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(utf8_is_printable("ascii is valid\tunicode", 22));
+ assert_se(utf8_is_printable("\342\204\242", 3));
+ assert_se(!utf8_is_printable("\341\204", 2));
+ assert_se(utf8_is_printable("ąę", 4));
+ assert_se(!utf8_is_printable("\r", 1));
+ assert_se(utf8_is_printable("\n", 1));
+ assert_se(utf8_is_printable("\t", 1));
+}
+
+static void test_utf8_n_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se( utf8_is_valid_n("ascii is valid unicode", 21));
+ assert_se( utf8_is_valid_n("ascii is valid unicode", 22));
+ assert_se(!utf8_is_valid_n("ascii is valid unicode", 23));
+ assert_se( utf8_is_valid_n("\342\204\242", 0));
+ assert_se(!utf8_is_valid_n("\342\204\242", 1));
+ assert_se(!utf8_is_valid_n("\342\204\242", 2));
+ assert_se( utf8_is_valid_n("\342\204\242", 3));
+ assert_se(!utf8_is_valid_n("\342\204\242", 4));
+ assert_se( utf8_is_valid_n("<ZZ>", 0));
+ assert_se( utf8_is_valid_n("<ZZ>", 1));
+ assert_se( utf8_is_valid_n("<ZZ>", 2));
+ assert_se( utf8_is_valid_n("<ZZ>", 3));
+ assert_se( utf8_is_valid_n("<ZZ>", 4));
+ assert_se(!utf8_is_valid_n("<ZZ>", 5));
+}
+
+static void test_utf8_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(utf8_is_valid("ascii is valid unicode"));
+ assert_se(utf8_is_valid("\342\204\242"));
+ assert_se(!utf8_is_valid("\341\204"));
+}
+
+static void test_ascii_is_valid(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se( ascii_is_valid("alsdjf\t\vbarr\nba z"));
+ assert_se(!ascii_is_valid("\342\204\242"));
+ assert_se(!ascii_is_valid("\341\204"));
+}
+
+static void test_ascii_is_valid_n(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se( ascii_is_valid_n("alsdjf\t\vbarr\nba z", 17));
+ assert_se( ascii_is_valid_n("alsdjf\t\vbarr\nba z", 16));
+ assert_se(!ascii_is_valid_n("alsdjf\t\vbarr\nba z", 18));
+ assert_se(!ascii_is_valid_n("\342\204\242", 3));
+ assert_se(!ascii_is_valid_n("\342\204\242", 2));
+ assert_se(!ascii_is_valid_n("\342\204\242", 1));
+ assert_se( ascii_is_valid_n("\342\204\242", 0));
+}
+
+static void test_utf8_encoded_valid_unichar(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(utf8_encoded_valid_unichar("\342\204\242", 1) == -EINVAL); /* truncated */
+ assert_se(utf8_encoded_valid_unichar("\342\204\242", 2) == -EINVAL); /* truncated */
+ assert_se(utf8_encoded_valid_unichar("\342\204\242", 3) == 3);
+ assert_se(utf8_encoded_valid_unichar("\342\204\242", 4) == 3);
+ assert_se(utf8_encoded_valid_unichar("\302\256", 1) == -EINVAL); /* truncated */
+ assert_se(utf8_encoded_valid_unichar("\302\256", 2) == 2);
+ assert_se(utf8_encoded_valid_unichar("\302\256", 3) == 2);
+ assert_se(utf8_encoded_valid_unichar("\302\256", (size_t) -1) == 2);
+ assert_se(utf8_encoded_valid_unichar("a", 1) == 1);
+ assert_se(utf8_encoded_valid_unichar("a", 2) == 1);
+ assert_se(utf8_encoded_valid_unichar("\341\204", 1) == -EINVAL); /* truncated, potentially valid */
+ assert_se(utf8_encoded_valid_unichar("\341\204", 2) == -EINVAL); /* truncated, potentially valid */
+ assert_se(utf8_encoded_valid_unichar("\341\204", 3) == -EINVAL);
+ assert_se(utf8_encoded_valid_unichar("\341\204\341\204", 4) == -EINVAL);
+ assert_se(utf8_encoded_valid_unichar("\341\204\341\204", 5) == -EINVAL);
+}
+
+static void test_utf8_escape_invalid(void) {
+ _cleanup_free_ char *p1, *p2, *p3;
+
+ log_info("/* %s */", __func__);
+
+ p1 = utf8_escape_invalid("goo goo goo");
+ puts(p1);
+ assert_se(utf8_is_valid(p1));
+
+ p2 = utf8_escape_invalid("\341\204\341\204");
+ puts(p2);
+ assert_se(utf8_is_valid(p2));
+
+ p3 = utf8_escape_invalid("\341\204");
+ puts(p3);
+ assert_se(utf8_is_valid(p3));
+}
+
+static void test_utf8_escape_non_printable(void) {
+ _cleanup_free_ char *p1, *p2, *p3, *p4, *p5, *p6;
+
+ log_info("/* %s */", __func__);
+
+ p1 = utf8_escape_non_printable("goo goo goo");
+ puts(p1);
+ assert_se(utf8_is_valid(p1));
+
+ p2 = utf8_escape_non_printable("\341\204\341\204");
+ puts(p2);
+ assert_se(utf8_is_valid(p2));
+
+ p3 = utf8_escape_non_printable("\341\204");
+ puts(p3);
+ assert_se(utf8_is_valid(p3));
+
+ p4 = utf8_escape_non_printable("ąę\n가너도루\n1234\n\341\204\341\204\n\001 \019\20\a");
+ puts(p4);
+ assert_se(utf8_is_valid(p4));
+
+ p5 = utf8_escape_non_printable("\001 \019\20\a");
+ puts(p5);
+ assert_se(utf8_is_valid(p5));
+
+ p6 = utf8_escape_non_printable("\xef\xbf\x30\x13");
+ puts(p6);
+ assert_se(utf8_is_valid(p6));
+}
+
+static void test_utf8_escape_non_printable_full(void) {
+ log_info("/* %s */", __func__);
+
+ for (size_t i = 0; i < 20; i++) {
+ _cleanup_free_ char *p;
+
+ p = utf8_escape_non_printable_full("goo goo goo", i);
+ puts(p);
+ assert_se(utf8_is_valid(p));
+ assert_se(utf8_console_width(p) <= i);
+ }
+
+ for (size_t i = 0; i < 20; i++) {
+ _cleanup_free_ char *p;
+
+ p = utf8_escape_non_printable_full("\001 \019\20\a", i);
+ puts(p);
+ assert_se(utf8_is_valid(p));
+ assert_se(utf8_console_width(p) <= i);
+ }
+
+ for (size_t i = 0; i < 20; i++) {
+ _cleanup_free_ char *p;
+
+ p = utf8_escape_non_printable_full("\xef\xbf\x30\x13", i);
+ puts(p);
+ assert_se(utf8_is_valid(p));
+ assert_se(utf8_console_width(p) <= i);
+ }
+}
+
+static void test_utf16_to_utf8(void) {
+ const char16_t utf16[] = { htole16('a'), htole16(0xd800), htole16('b'), htole16(0xdc00), htole16('c'), htole16(0xd801), htole16(0xdc37) };
+ static const char utf8[] = { 'a', 'b', 'c', 0xf0, 0x90, 0x90, 0xb7 };
+ _cleanup_free_ char16_t *b = NULL;
+ _cleanup_free_ char *a = NULL;
+
+ log_info("/* %s */", __func__);
+
+ /* Convert UTF-16 to UTF-8, filtering embedded bad chars */
+ a = utf16_to_utf8(utf16, sizeof(utf16));
+ assert_se(a);
+ assert_se(memcmp(a, utf8, sizeof(utf8)) == 0);
+
+ /* Convert UTF-8 to UTF-16, and back */
+ b = utf8_to_utf16(utf8, sizeof(utf8));
+ assert_se(b);
+
+ free(a);
+ a = utf16_to_utf8(b, char16_strlen(b) * 2);
+ assert_se(a);
+ assert_se(strlen(a) == sizeof(utf8));
+ assert_se(memcmp(a, utf8, sizeof(utf8)) == 0);
+}
+
+static void test_utf8_n_codepoints(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(utf8_n_codepoints("abc") == 3);
+ assert_se(utf8_n_codepoints("zażółcić gęślą jaźń") == 19);
+ assert_se(utf8_n_codepoints("串") == 1);
+ assert_se(utf8_n_codepoints("") == 0);
+ assert_se(utf8_n_codepoints("…👊🔪💐…") == 5);
+ assert_se(utf8_n_codepoints("\xF1") == (size_t) -1);
+}
+
+static void test_utf8_console_width(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(utf8_console_width("abc") == 3);
+ assert_se(utf8_console_width("zażółcić gęślą jaźń") == 19);
+ assert_se(utf8_console_width("串") == 2);
+ assert_se(utf8_console_width("") == 0);
+ assert_se(utf8_console_width("…👊🔪💐…") == 8);
+ assert_se(utf8_console_width("\xF1") == (size_t) -1);
+}
+
+static void test_utf8_to_utf16(void) {
+ const char *p;
+
+ log_info("/* %s */", __func__);
+
+ FOREACH_STRING(p,
+ "abc",
+ "zażółcić gęślą jaźń",
+ "串",
+ "",
+ "…👊🔪💐…") {
+
+ _cleanup_free_ char16_t *a = NULL;
+ _cleanup_free_ char *b = NULL;
+
+ a = utf8_to_utf16(p, strlen(p));
+ assert_se(a);
+
+ b = utf16_to_utf8(a, char16_strlen(a) * 2);
+ assert_se(b);
+ assert_se(streq(p, b));
+ }
+}
+
+int main(int argc, char *argv[]) {
+ test_utf8_n_is_valid();
+ test_utf8_is_valid();
+ test_utf8_is_printable();
+ test_ascii_is_valid();
+ test_ascii_is_valid_n();
+ test_utf8_encoded_valid_unichar();
+ test_utf8_escape_invalid();
+ test_utf8_escape_non_printable();
+ test_utf8_escape_non_printable_full();
+ test_utf16_to_utf8();
+ test_utf8_n_codepoints();
+ test_utf8_console_width();
+ test_utf8_to_utf16();
+
+ return 0;
+}
diff --git a/src/test/test-util.c b/src/test/test-util.c
new file mode 100644
index 0000000..0fe7a38
--- /dev/null
+++ b/src/test/test-util.c
@@ -0,0 +1,527 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "fileio.h"
+#include "fs-util.h"
+#include "limits-util.h"
+#include "memory-util.h"
+#include "missing_syscall.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "raw-clone.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "tests.h"
+#include "util.h"
+
+static void test_align_power2(void) {
+ unsigned long i, p2;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(ALIGN_POWER2(0) == 0);
+ assert_se(ALIGN_POWER2(1) == 1);
+ assert_se(ALIGN_POWER2(2) == 2);
+ assert_se(ALIGN_POWER2(3) == 4);
+ assert_se(ALIGN_POWER2(4) == 4);
+ assert_se(ALIGN_POWER2(5) == 8);
+ assert_se(ALIGN_POWER2(6) == 8);
+ assert_se(ALIGN_POWER2(7) == 8);
+ assert_se(ALIGN_POWER2(9) == 16);
+ assert_se(ALIGN_POWER2(10) == 16);
+ assert_se(ALIGN_POWER2(11) == 16);
+ assert_se(ALIGN_POWER2(12) == 16);
+ assert_se(ALIGN_POWER2(13) == 16);
+ assert_se(ALIGN_POWER2(14) == 16);
+ assert_se(ALIGN_POWER2(15) == 16);
+ assert_se(ALIGN_POWER2(16) == 16);
+ assert_se(ALIGN_POWER2(17) == 32);
+
+ assert_se(ALIGN_POWER2(ULONG_MAX) == 0);
+ assert_se(ALIGN_POWER2(ULONG_MAX - 1) == 0);
+ assert_se(ALIGN_POWER2(ULONG_MAX - 1024) == 0);
+ assert_se(ALIGN_POWER2(ULONG_MAX / 2) == ULONG_MAX / 2 + 1);
+ assert_se(ALIGN_POWER2(ULONG_MAX + 1) == 0);
+
+ for (i = 1; i < 131071; ++i) {
+ for (p2 = 1; p2 < i; p2 <<= 1)
+ /* empty */ ;
+
+ assert_se(ALIGN_POWER2(i) == p2);
+ }
+
+ for (i = ULONG_MAX - 1024; i < ULONG_MAX; ++i) {
+ for (p2 = 1; p2 && p2 < i; p2 <<= 1)
+ /* empty */ ;
+
+ assert_se(ALIGN_POWER2(i) == p2);
+ }
+}
+
+static void test_max(void) {
+ static const struct {
+ int a;
+ int b[CONST_MAX(10, 100)];
+ } val1 = {
+ .a = CONST_MAX(10, 100),
+ };
+ int d = 0;
+ unsigned long x = 12345;
+ unsigned long y = 54321;
+ const char str[] = "a_string_constant";
+ const unsigned long long arr[] = {9999ULL, 10ULL, 0ULL, 3000ULL, 2000ULL, 1000ULL, 100ULL, 9999999ULL};
+ void *p = (void *)str;
+ void *q = (void *)&str[16];
+
+ log_info("/* %s */", __func__);
+
+ assert_cc(sizeof(val1.b) == sizeof(int) * 100);
+
+ /* CONST_MAX returns (void) instead of a value if the passed arguments
+ * are not of the same type or not constant expressions. */
+ assert_cc(__builtin_types_compatible_p(typeof(CONST_MAX(1, 10)), int));
+ assert_cc(__builtin_types_compatible_p(typeof(CONST_MAX(1, 1U)), void));
+
+ assert_se(val1.a == 100);
+ assert_se(MAX(++d, 0) == 1);
+ assert_se(d == 1);
+
+ assert_cc(MAXSIZE(char[3], uint16_t) == 3);
+ assert_cc(MAXSIZE(char[3], uint32_t) == 4);
+ assert_cc(MAXSIZE(char, long) == sizeof(long));
+
+ assert_se(MAX(-5, 5) == 5);
+ assert_se(MAX(5, 5) == 5);
+ assert_se(MAX(MAX(1, MAX(2, MAX(3, 4))), 5) == 5);
+ assert_se(MAX(MAX(1, MAX(2, MAX(3, 2))), 1) == 3);
+ assert_se(MAX(MIN(1, MIN(2, MIN(3, 4))), 5) == 5);
+ assert_se(MAX(MAX(1, MIN(2, MIN(3, 2))), 1) == 2);
+ assert_se(LESS_BY(8, 4) == 4);
+ assert_se(LESS_BY(8, 8) == 0);
+ assert_se(LESS_BY(4, 8) == 0);
+ assert_se(LESS_BY(16, LESS_BY(8, 4)) == 12);
+ assert_se(LESS_BY(4, LESS_BY(8, 4)) == 0);
+ assert_se(CMP(3, 5) == -1);
+ assert_se(CMP(5, 3) == 1);
+ assert_se(CMP(5, 5) == 0);
+ assert_se(CMP(x, y) == -1);
+ assert_se(CMP(y, x) == 1);
+ assert_se(CMP(x, x) == 0);
+ assert_se(CMP(y, y) == 0);
+ assert_se(CMP(UINT64_MAX, (uint64_t) 0) == 1);
+ assert_se(CMP((uint64_t) 0, UINT64_MAX) == -1);
+ assert_se(CMP(UINT64_MAX, UINT64_MAX) == 0);
+ assert_se(CMP(INT64_MIN, INT64_MAX) == -1);
+ assert_se(CMP(INT64_MAX, INT64_MIN) == 1);
+ assert_se(CMP(INT64_MAX, INT64_MAX) == 0);
+ assert_se(CMP(INT64_MIN, INT64_MIN) == 0);
+ assert_se(CMP(INT64_MAX, (int64_t) 0) == 1);
+ assert_se(CMP((int64_t) 0, INT64_MIN) == 1);
+ assert_se(CMP(INT64_MIN, (int64_t) 0) == -1);
+ assert_se(CMP((int64_t) 0, INT64_MAX) == -1);
+ assert_se(CMP(&str[2], &str[7]) == -1);
+ assert_se(CMP(&str[2], &str[2]) == 0);
+ assert_se(CMP(&str[7], (const char *)str) == 1);
+ assert_se(CMP(str[2], str[7]) == 1);
+ assert_se(CMP(str[7], *str) == 1);
+ assert_se(CMP((const unsigned long long *)arr, &arr[3]) == -1);
+ assert_se(CMP(*arr, arr[3]) == 1);
+ assert_se(CMP(p, q) == -1);
+ assert_se(CMP(q, p) == 1);
+ assert_se(CMP(p, p) == 0);
+ assert_se(CMP(q, q) == 0);
+ assert_se(CLAMP(-5, 0, 1) == 0);
+ assert_se(CLAMP(5, 0, 1) == 1);
+ assert_se(CLAMP(5, -10, 1) == 1);
+ assert_se(CLAMP(5, -10, 10) == 5);
+ assert_se(CLAMP(CLAMP(0, -10, 10), CLAMP(-5, 10, 20), CLAMP(100, -5, 20)) == 10);
+}
+
+#pragma GCC diagnostic push
+#ifdef __clang__
+# pragma GCC diagnostic ignored "-Waddress-of-packed-member"
+#endif
+
+static void test_container_of(void) {
+ struct mytype {
+ uint8_t pad1[3];
+ uint64_t v1;
+ uint8_t pad2[2];
+ uint32_t v2;
+ } myval = { };
+
+ log_info("/* %s */", __func__);
+
+ assert_cc(sizeof(myval) >= 17);
+ assert_se(container_of(&myval.v1, struct mytype, v1) == &myval);
+ assert_se(container_of(&myval.v2, struct mytype, v2) == &myval);
+ assert_se(container_of(&container_of(&myval.v2,
+ struct mytype,
+ v2)->v1,
+ struct mytype,
+ v1) == &myval);
+}
+
+#pragma GCC diagnostic pop
+
+static void test_div_round_up(void) {
+ int div;
+
+ log_info("/* %s */", __func__);
+
+ /* basic tests */
+ assert_se(DIV_ROUND_UP(0, 8) == 0);
+ assert_se(DIV_ROUND_UP(1, 8) == 1);
+ assert_se(DIV_ROUND_UP(8, 8) == 1);
+ assert_se(DIV_ROUND_UP(12, 8) == 2);
+ assert_se(DIV_ROUND_UP(16, 8) == 2);
+
+ /* test multiple evaluation */
+ div = 0;
+ assert_se(DIV_ROUND_UP(div++, 8) == 0 && div == 1);
+ assert_se(DIV_ROUND_UP(++div, 8) == 1 && div == 2);
+ assert_se(DIV_ROUND_UP(8, div++) == 4 && div == 3);
+ assert_se(DIV_ROUND_UP(8, ++div) == 2 && div == 4);
+
+ /* overflow test with exact division */
+ assert_se(sizeof(0U) == 4);
+ assert_se(0xfffffffaU % 10U == 0U);
+ assert_se(0xfffffffaU / 10U == 429496729U);
+ assert_se(DIV_ROUND_UP(0xfffffffaU, 10U) == 429496729U);
+ assert_se((0xfffffffaU + 10U - 1U) / 10U == 0U);
+ assert_se(0xfffffffaU / 10U + !!(0xfffffffaU % 10U) == 429496729U);
+
+ /* overflow test with rounded division */
+ assert_se(0xfffffffdU % 10U == 3U);
+ assert_se(0xfffffffdU / 10U == 429496729U);
+ assert_se(DIV_ROUND_UP(0xfffffffdU, 10U) == 429496730U);
+ assert_se((0xfffffffdU + 10U - 1U) / 10U == 0U);
+ assert_se(0xfffffffdU / 10U + !!(0xfffffffdU % 10U) == 429496730U);
+}
+
+static void test_u64log2(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(u64log2(0) == 0);
+ assert_se(u64log2(8) == 3);
+ assert_se(u64log2(9) == 3);
+ assert_se(u64log2(15) == 3);
+ assert_se(u64log2(16) == 4);
+ assert_se(u64log2(1024*1024) == 20);
+ assert_se(u64log2(1024*1024+5) == 20);
+}
+
+static void test_protect_errno(void) {
+ log_info("/* %s */", __func__);
+
+ errno = 12;
+ {
+ PROTECT_ERRNO;
+ errno = 11;
+ }
+ assert_se(errno == 12);
+}
+
+static void test_unprotect_errno_inner_function(void) {
+ PROTECT_ERRNO;
+
+ errno = 2222;
+}
+
+static void test_unprotect_errno(void) {
+ log_info("/* %s */", __func__);
+
+ errno = 4711;
+
+ PROTECT_ERRNO;
+
+ errno = 815;
+
+ UNPROTECT_ERRNO;
+
+ assert_se(errno == 4711);
+
+ test_unprotect_errno_inner_function();
+
+ assert_se(errno == 4711);
+}
+
+static void test_in_set(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(IN_SET(1, 1));
+ assert_se(IN_SET(1, 1, 2, 3, 4));
+ assert_se(IN_SET(2, 1, 2, 3, 4));
+ assert_se(IN_SET(3, 1, 2, 3, 4));
+ assert_se(IN_SET(4, 1, 2, 3, 4));
+ assert_se(!IN_SET(0, 1));
+ assert_se(!IN_SET(0, 1, 2, 3, 4));
+}
+
+static void test_log2i(void) {
+ log_info("/* %s */", __func__);
+
+ assert_se(log2i(1) == 0);
+ assert_se(log2i(2) == 1);
+ assert_se(log2i(3) == 1);
+ assert_se(log2i(4) == 2);
+ assert_se(log2i(32) == 5);
+ assert_se(log2i(33) == 5);
+ assert_se(log2i(63) == 5);
+ assert_se(log2i(INT_MAX) == sizeof(int)*8-2);
+}
+
+static void test_eqzero(void) {
+ const uint32_t zeros[] = {0, 0, 0};
+ const uint32_t ones[] = {1, 1};
+ const uint32_t mixed[] = {0, 1, 0, 0, 0};
+ const uint8_t longer[] = {[55] = 255};
+
+ log_info("/* %s */", __func__);
+
+ assert_se(eqzero(zeros));
+ assert_se(!eqzero(ones));
+ assert_se(!eqzero(mixed));
+ assert_se(!eqzero(longer));
+}
+
+static void test_raw_clone(void) {
+ pid_t parent, pid, pid2;
+
+ log_info("/* %s */", __func__);
+
+ parent = getpid();
+ log_info("before clone: getpid()→"PID_FMT, parent);
+ assert_se(raw_getpid() == parent);
+
+ pid = raw_clone(0);
+ assert_se(pid >= 0);
+
+ pid2 = raw_getpid();
+ log_info("raw_clone: "PID_FMT" getpid()→"PID_FMT" raw_getpid()→"PID_FMT,
+ pid, getpid(), pid2);
+ if (pid == 0) {
+ assert_se(pid2 != parent);
+ _exit(EXIT_SUCCESS);
+ } else {
+ int status;
+
+ assert_se(pid2 == parent);
+ waitpid(pid, &status, __WCLONE);
+ assert_se(WIFEXITED(status) && WEXITSTATUS(status) == EXIT_SUCCESS);
+ }
+
+ errno = 0;
+ assert_se(raw_clone(CLONE_FS|CLONE_NEWNS) == -1);
+ assert_se(errno == EINVAL);
+}
+
+static void test_physical_memory(void) {
+ uint64_t p;
+ char buf[FORMAT_BYTES_MAX];
+
+ log_info("/* %s */", __func__);
+
+ p = physical_memory();
+ assert_se(p > 0);
+ assert_se(p < UINT64_MAX);
+ assert_se(p % page_size() == 0);
+
+ log_info("Memory: %s (%" PRIu64 ")", format_bytes(buf, sizeof(buf), p), p);
+}
+
+static void test_physical_memory_scale(void) {
+ uint64_t p;
+
+ log_info("/* %s */", __func__);
+
+ p = physical_memory();
+
+ assert_se(physical_memory_scale(0, 100) == 0);
+ assert_se(physical_memory_scale(100, 100) == p);
+
+ log_info("Memory original: %" PRIu64, physical_memory());
+ log_info("Memory scaled by 50%%: %" PRIu64, physical_memory_scale(50, 100));
+ log_info("Memory divided by 2: %" PRIu64, physical_memory() / 2);
+ log_info("Page size: %zu", page_size());
+
+ /* There might be an uneven number of pages, hence permit these calculations to be half a page off... */
+ assert_se(page_size()/2 + physical_memory_scale(50, 100) - p/2 <= page_size());
+ assert_se(physical_memory_scale(200, 100) == p*2);
+
+ assert_se(physical_memory_scale(0, 1) == 0);
+ assert_se(physical_memory_scale(1, 1) == p);
+ assert_se(physical_memory_scale(2, 1) == p*2);
+
+ assert_se(physical_memory_scale(0, 2) == 0);
+
+ assert_se(page_size()/2 + physical_memory_scale(1, 2) - p/2 <= page_size());
+ assert_se(physical_memory_scale(2, 2) == p);
+ assert_se(physical_memory_scale(4, 2) == p*2);
+
+ assert_se(physical_memory_scale(0, UINT32_MAX) == 0);
+ assert_se(physical_memory_scale(UINT32_MAX, UINT32_MAX) == p);
+
+ /* overflow */
+ assert_se(physical_memory_scale(UINT64_MAX/4, UINT64_MAX) == UINT64_MAX);
+}
+
+static void test_system_tasks_max(void) {
+ uint64_t t;
+
+ log_info("/* %s */", __func__);
+
+ t = system_tasks_max();
+ assert_se(t > 0);
+ assert_se(t < UINT64_MAX);
+
+ log_info("Max tasks: %" PRIu64, t);
+}
+
+static void test_system_tasks_max_scale(void) {
+ uint64_t t;
+
+ log_info("/* %s */", __func__);
+
+ t = system_tasks_max();
+
+ assert_se(system_tasks_max_scale(0, 100) == 0);
+ assert_se(system_tasks_max_scale(100, 100) == t);
+
+ assert_se(system_tasks_max_scale(0, 1) == 0);
+ assert_se(system_tasks_max_scale(1, 1) == t);
+ assert_se(system_tasks_max_scale(2, 1) == 2*t);
+
+ assert_se(system_tasks_max_scale(0, 2) == 0);
+ assert_se(system_tasks_max_scale(1, 2) == t/2);
+ assert_se(system_tasks_max_scale(2, 2) == t);
+ assert_se(system_tasks_max_scale(3, 2) == (3*t)/2);
+ assert_se(system_tasks_max_scale(4, 2) == t*2);
+
+ assert_se(system_tasks_max_scale(0, UINT32_MAX) == 0);
+ assert_se(system_tasks_max_scale((UINT32_MAX-1)/2, UINT32_MAX-1) == t/2);
+ assert_se(system_tasks_max_scale(UINT32_MAX, UINT32_MAX) == t);
+
+ /* overflow */
+
+ assert_se(system_tasks_max_scale(UINT64_MAX/4, UINT64_MAX) == UINT64_MAX);
+}
+
+static void test_foreach_pointer(void) {
+ int a, b, c, *i;
+ size_t k = 0;
+
+ log_info("/* %s */", __func__);
+
+ FOREACH_POINTER(i, &a, &b, &c) {
+ switch (k) {
+
+ case 0:
+ assert_se(i == &a);
+ break;
+
+ case 1:
+ assert_se(i == &b);
+ break;
+
+ case 2:
+ assert_se(i == &c);
+ break;
+
+ default:
+ assert_not_reached("unexpected index");
+ break;
+ }
+
+ k++;
+ }
+
+ assert(k == 3);
+
+ FOREACH_POINTER(i, &b) {
+ assert(k == 3);
+ assert(i == &b);
+ k = 4;
+ }
+
+ assert(k == 4);
+
+ FOREACH_POINTER(i, NULL, &c, NULL, &b, NULL, &a, NULL) {
+ switch (k) {
+
+ case 4:
+ assert_se(i == NULL);
+ break;
+
+ case 5:
+ assert_se(i == &c);
+ break;
+
+ case 6:
+ assert_se(i == NULL);
+ break;
+
+ case 7:
+ assert_se(i == &b);
+ break;
+
+ case 8:
+ assert_se(i == NULL);
+ break;
+
+ case 9:
+ assert_se(i == &a);
+ break;
+
+ case 10:
+ assert_se(i == NULL);
+ break;
+
+ default:
+ assert_not_reached("unexpected index");
+ break;
+ }
+
+ k++;
+ }
+
+ assert(k == 11);
+}
+
+static void test_ptr_to_int(void) {
+ log_info("/* %s */", __func__);
+
+ /* Primary reason to have this test is to validate that pointers are large enough to hold entire int range */
+ assert_se(PTR_TO_INT(INT_TO_PTR(0)) == 0);
+ assert_se(PTR_TO_INT(INT_TO_PTR(1)) == 1);
+ assert_se(PTR_TO_INT(INT_TO_PTR(-1)) == -1);
+ assert_se(PTR_TO_INT(INT_TO_PTR(INT_MAX)) == INT_MAX);
+ assert_se(PTR_TO_INT(INT_TO_PTR(INT_MIN)) == INT_MIN);
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_INFO);
+
+ test_align_power2();
+ test_max();
+ test_container_of();
+ test_div_round_up();
+ test_u64log2();
+ test_protect_errno();
+ test_unprotect_errno();
+ test_in_set();
+ test_log2i();
+ test_eqzero();
+ test_raw_clone();
+ test_physical_memory();
+ test_physical_memory_scale();
+ test_system_tasks_max();
+ test_system_tasks_max_scale();
+ test_foreach_pointer();
+ test_ptr_to_int();
+
+ return 0;
+}
diff --git a/src/test/test-varlink.c b/src/test/test-varlink.c
new file mode 100644
index 0000000..9a5fbc6
--- /dev/null
+++ b/src/test/test-varlink.c
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <poll.h>
+#include <pthread.h>
+
+#include "sd-event.h"
+
+#include "fd-util.h"
+#include "json.h"
+#include "rm-rf.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "varlink.h"
+
+/* Let's pick some high value, that is higher than the largest listen() backlog, but leaves enough room below
+ the typical RLIMIT_NOFILE value of 1024 so that we can process both sides of each socket in our
+ process. Or in other words: "OVERLOAD_CONNECTIONS * 2 + x < 1024" should hold, for some small x that
+ should cover any auxiliary fds, the listener server fds, stdin/stdout/stderr and whatever else. */
+#define OVERLOAD_CONNECTIONS 333
+
+static int n_done = 0;
+static int block_write_fd = -1;
+
+static int method_something(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+ _cleanup_(json_variant_unrefp) JsonVariant *ret = NULL;
+ JsonVariant *a, *b;
+ intmax_t x, y;
+ int r;
+
+ a = json_variant_by_key(parameters, "a");
+ if (!a)
+ return varlink_error(link, "io.test.BadParameters", NULL);
+
+ x = json_variant_integer(a);
+
+ b = json_variant_by_key(parameters, "b");
+ if (!b)
+ return varlink_error(link, "io.test.BadParameters", NULL);
+
+ y = json_variant_integer(b);
+
+ r = json_build(&ret, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("sum", JSON_BUILD_INTEGER(x + y))));
+ if (r < 0)
+ return r;
+
+ return varlink_reply(link, ret);
+}
+
+static int method_done(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+
+ if (++n_done == 2)
+ sd_event_exit(varlink_get_event(link), EXIT_FAILURE);
+
+ return 0;
+}
+
+static int reply(Varlink *link, JsonVariant *parameters, const char *error_id, VarlinkReplyFlags flags, void *userdata) {
+ JsonVariant *sum;
+
+ sum = json_variant_by_key(parameters, "sum");
+
+ assert_se(json_variant_integer(sum) == 7+22);
+
+ if (++n_done == 2)
+ sd_event_exit(varlink_get_event(link), EXIT_FAILURE);
+
+ return 0;
+}
+
+static int on_connect(VarlinkServer *s, Varlink *link, void *userdata) {
+ uid_t uid = UID_INVALID;
+
+ assert(s);
+ assert(link);
+
+ assert_se(varlink_get_peer_uid(link, &uid) >= 0);
+ assert_se(getuid() == uid);
+
+ return 0;
+}
+
+static int overload_reply(Varlink *link, JsonVariant *parameters, const char *error_id, VarlinkReplyFlags flags, void *userdata) {
+
+ /* This method call reply should always be called with a disconnection, since the method call should
+ * be talking to an overloaded server */
+
+ log_debug("Over reply triggered with error: %s", strna(error_id));
+ assert_se(streq(error_id, VARLINK_ERROR_DISCONNECTED));
+ sd_event_exit(varlink_get_event(link), 0);
+
+ return 0;
+}
+
+static void flood_test(const char *address) {
+ _cleanup_(varlink_flush_close_unrefp) Varlink *c = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ _cleanup_free_ Varlink **connections = NULL;
+ size_t k;
+ char x = 'x';
+
+ log_debug("Flooding server...");
+
+ /* Block the main event loop while we flood */
+ assert_se(write(block_write_fd, &x, sizeof(x)) == sizeof(x));
+
+ assert_se(sd_event_default(&e) >= 0);
+
+ /* Flood the server with connections */
+ assert_se(connections = new0(Varlink*, OVERLOAD_CONNECTIONS));
+ for (k = 0; k < OVERLOAD_CONNECTIONS; k++) {
+ _cleanup_free_ char *t = NULL;
+ log_debug("connection %zu", k);
+ assert_se(varlink_connect_address(connections + k, address) >= 0);
+
+ assert_se(asprintf(&t, "flood-%zu", k) >= 0);
+ assert_se(varlink_set_description(connections[k], t) >= 0);
+ assert_se(varlink_attach_event(connections[k], e, k) >= 0);
+ assert_se(varlink_sendb(connections[k], "io.test.Rubbish", JSON_BUILD_OBJECT(JSON_BUILD_PAIR("id", JSON_BUILD_INTEGER(k)))) >= 0);
+ }
+
+ /* Then, create one more, which should fail */
+ log_debug("Creating overload connection...");
+ assert_se(varlink_connect_address(&c, address) >= 0);
+ assert_se(varlink_set_description(c, "overload-client") >= 0);
+ assert_se(varlink_attach_event(c, e, k) >= 0);
+ assert_se(varlink_bind_reply(c, overload_reply) >= 0);
+ assert_se(varlink_invokeb(c, "io.test.Overload", JSON_BUILD_OBJECT(JSON_BUILD_PAIR("foo", JSON_BUILD_STRING("bar")))) >= 0);
+
+ /* Unblock it */
+ log_debug("Unblocking server...");
+ block_write_fd = safe_close(block_write_fd);
+
+ /* This loop will terminate as soon as the overload reply callback is called */
+ assert_se(sd_event_loop(e) >= 0);
+
+ /* And close all connections again */
+ for (k = 0; k < OVERLOAD_CONNECTIONS; k++)
+ connections[k] = varlink_unref(connections[k]);
+}
+
+static void *thread(void *arg) {
+ _cleanup_(varlink_flush_close_unrefp) Varlink *c = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *i = NULL;
+ JsonVariant *o = NULL;
+ const char *e;
+
+ assert_se(json_build(&i, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("a", JSON_BUILD_INTEGER(88)),
+ JSON_BUILD_PAIR("b", JSON_BUILD_INTEGER(99)))) >= 0);
+
+ assert_se(varlink_connect_address(&c, arg) >= 0);
+ assert_se(varlink_set_description(c, "thread-client") >= 0);
+
+ assert_se(varlink_call(c, "io.test.DoSomething", i, &o, &e, NULL) >= 0);
+ assert_se(json_variant_integer(json_variant_by_key(o, "sum")) == 88 + 99);
+ assert_se(!e);
+
+ assert_se(varlink_callb(c, "io.test.IDontExist", &o, &e, NULL, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("x", JSON_BUILD_REAL(5.5)))) >= 0);
+ assert_se(streq_ptr(json_variant_string(json_variant_by_key(o, "method")), "io.test.IDontExist"));
+ assert_se(streq(e, VARLINK_ERROR_METHOD_NOT_FOUND));
+
+ flood_test(arg);
+
+ assert_se(varlink_send(c, "io.test.Done", NULL) >= 0);
+
+ return NULL;
+}
+
+static int block_fd_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ char c;
+
+ assert_se(fd_nonblock(fd, false) >= 0);
+
+ assert_se(read(fd, &c, sizeof(c)) == sizeof(c));
+ /* When a character is written to this pipe we'll block until the pipe is closed. */
+
+ assert_se(read(fd, &c, sizeof(c)) == 0);
+
+ assert_se(fd_nonblock(fd, true) >= 0);
+
+ assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *block_event = NULL;
+ _cleanup_(varlink_server_unrefp) VarlinkServer *s = NULL;
+ _cleanup_(varlink_flush_close_unrefp) Varlink *c = NULL;
+ _cleanup_(rm_rf_physical_and_freep) char *tmpdir = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ _cleanup_(close_pairp) int block_fds[2] = { -1, -1 };
+ pthread_t t;
+ const char *sp;
+
+ log_set_max_level(LOG_DEBUG);
+ log_open();
+
+ assert_se(mkdtemp_malloc("/tmp/varlink-test-XXXXXX", &tmpdir) >= 0);
+ sp = strjoina(tmpdir, "/socket");
+
+ assert_se(sd_event_default(&e) >= 0);
+
+ assert_se(pipe2(block_fds, O_NONBLOCK|O_CLOEXEC) >= 0);
+ assert_se(sd_event_add_io(e, &block_event, block_fds[0], EPOLLIN, block_fd_handler, NULL) >= 0);
+ assert_se(sd_event_source_set_priority(block_event, SD_EVENT_PRIORITY_IMPORTANT) >= 0);
+ block_write_fd = TAKE_FD(block_fds[1]);
+
+ assert_se(varlink_server_new(&s, VARLINK_SERVER_ACCOUNT_UID) >= 0);
+ assert_se(varlink_server_set_description(s, "our-server") >= 0);
+
+ assert_se(varlink_server_bind_method(s, "io.test.DoSomething", method_something) >= 0);
+ assert_se(varlink_server_bind_method(s, "io.test.Done", method_done) >= 0);
+ assert_se(varlink_server_bind_connect(s, on_connect) >= 0);
+ assert_se(varlink_server_listen_address(s, sp, 0600) >= 0);
+ assert_se(varlink_server_attach_event(s, e, 0) >= 0);
+ assert_se(varlink_server_set_connections_max(s, OVERLOAD_CONNECTIONS) >= 0);
+
+ assert_se(varlink_connect_address(&c, sp) >= 0);
+ assert_se(varlink_set_description(c, "main-client") >= 0);
+ assert_se(varlink_bind_reply(c, reply) >= 0);
+
+ assert_se(json_build(&v, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("a", JSON_BUILD_INTEGER(7)),
+ JSON_BUILD_PAIR("b", JSON_BUILD_INTEGER(22)))) >= 0);
+
+ assert_se(varlink_invoke(c, "io.test.DoSomething", v) >= 0);
+
+ assert_se(varlink_attach_event(c, e, 0) >= 0);
+
+ assert_se(pthread_create(&t, NULL, thread, (void*) sp) == 0);
+
+ assert_se(sd_event_loop(e) >= 0);
+
+ assert_se(pthread_join(t, NULL) == 0);
+
+ return 0;
+}
diff --git a/src/test/test-verbs.c b/src/test/test-verbs.c
new file mode 100644
index 0000000..b7a0cbf
--- /dev/null
+++ b/src/test/test-verbs.c
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+
+#include "macro.h"
+#include "strv.h"
+#include "verbs.h"
+
+static int noop_dispatcher(int argc, char *argv[], void *userdata) {
+ return 0;
+}
+
+#define test_dispatch_one(argv, verbs, expected) \
+ optind = 0; \
+ assert_se(dispatch_verb(strv_length(argv), argv, verbs, NULL) == expected);
+
+static void test_verbs(void) {
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, noop_dispatcher },
+ { "list-images", VERB_ANY, 1, 0, noop_dispatcher },
+ { "list", VERB_ANY, 2, VERB_DEFAULT, noop_dispatcher },
+ { "status", 2, VERB_ANY, 0, noop_dispatcher },
+ { "show", VERB_ANY, VERB_ANY, 0, noop_dispatcher },
+ { "terminate", 2, VERB_ANY, 0, noop_dispatcher },
+ { "login", 2, 2, 0, noop_dispatcher },
+ { "copy-to", 3, 4, 0, noop_dispatcher },
+ {}
+ };
+
+ /* not found */
+ test_dispatch_one(STRV_MAKE("command-not-found"), verbs, -EINVAL);
+
+ /* found */
+ test_dispatch_one(STRV_MAKE("show"), verbs, 0);
+
+ /* found, too few args */
+ test_dispatch_one(STRV_MAKE("copy-to", "foo"), verbs, -EINVAL);
+
+ /* found, meets min args */
+ test_dispatch_one(STRV_MAKE("status", "foo", "bar"), verbs, 0);
+
+ /* found, too many args */
+ test_dispatch_one(STRV_MAKE("copy-to", "foo", "bar", "baz", "quux", "qaax"), verbs, -EINVAL);
+
+ /* no verb, but a default is set */
+ test_dispatch_one(STRV_MAKE_EMPTY, verbs, 0);
+}
+
+static void test_verbs_no_default(void) {
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, noop_dispatcher },
+ {},
+ };
+
+ test_dispatch_one(STRV_MAKE(NULL), verbs, -EINVAL);
+}
+
+int main(int argc, char *argv[]) {
+ test_verbs();
+ test_verbs_no_default();
+
+ return 0;
+}
diff --git a/src/test/test-watch-pid.c b/src/test/test-watch-pid.c
new file mode 100644
index 0000000..4afc46f
--- /dev/null
+++ b/src/test/test-watch-pid.c
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "log.h"
+#include "manager.h"
+#include "rm-rf.h"
+#include "service.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+ _cleanup_(manager_freep) Manager *m = NULL;
+ Unit *a, *b, *c, *u;
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ if (getuid() != 0)
+ return log_tests_skipped("not root");
+ r = enter_cgroup_subroot(NULL);
+ if (r == -ENOMEDIUM)
+ return log_tests_skipped("cgroupfs not available");
+
+ _cleanup_free_ char *unit_dir = NULL;
+ assert_se(get_testdata_dir("units/", &unit_dir) >= 0);
+ assert_se(set_unit_path(unit_dir) >= 0);
+
+ assert_se(runtime_dir = setup_fake_runtime_dir());
+
+ assert_se(manager_new(UNIT_FILE_USER, MANAGER_TEST_RUN_BASIC, &m) >= 0);
+ assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+ assert_se(a = unit_new(m, sizeof(Service)));
+ assert_se(unit_add_name(a, "a.service") >= 0);
+ assert_se(set_isempty(a->pids));
+
+ assert_se(b = unit_new(m, sizeof(Service)));
+ assert_se(unit_add_name(b, "b.service") >= 0);
+ assert_se(set_isempty(b->pids));
+
+ assert_se(c = unit_new(m, sizeof(Service)));
+ assert_se(unit_add_name(c, "c.service") >= 0);
+ assert_se(set_isempty(c->pids));
+
+ assert_se(hashmap_isempty(m->watch_pids));
+ assert_se(manager_get_unit_by_pid(m, 4711) == NULL);
+
+ assert_se(unit_watch_pid(a, 4711, false) >= 0);
+ assert_se(manager_get_unit_by_pid(m, 4711) == a);
+
+ assert_se(unit_watch_pid(a, 4711, false) >= 0);
+ assert_se(manager_get_unit_by_pid(m, 4711) == a);
+
+ assert_se(unit_watch_pid(b, 4711, false) >= 0);
+ u = manager_get_unit_by_pid(m, 4711);
+ assert_se(u == a || u == b);
+
+ assert_se(unit_watch_pid(b, 4711, false) >= 0);
+ u = manager_get_unit_by_pid(m, 4711);
+ assert_se(u == a || u == b);
+
+ assert_se(unit_watch_pid(c, 4711, false) >= 0);
+ u = manager_get_unit_by_pid(m, 4711);
+ assert_se(u == a || u == b || u == c);
+
+ assert_se(unit_watch_pid(c, 4711, false) >= 0);
+ u = manager_get_unit_by_pid(m, 4711);
+ assert_se(u == a || u == b || u == c);
+
+ unit_unwatch_pid(b, 4711);
+ u = manager_get_unit_by_pid(m, 4711);
+ assert_se(u == a || u == c);
+
+ unit_unwatch_pid(b, 4711);
+ u = manager_get_unit_by_pid(m, 4711);
+ assert_se(u == a || u == c);
+
+ unit_unwatch_pid(a, 4711);
+ assert_se(manager_get_unit_by_pid(m, 4711) == c);
+
+ unit_unwatch_pid(a, 4711);
+ assert_se(manager_get_unit_by_pid(m, 4711) == c);
+
+ unit_unwatch_pid(c, 4711);
+ assert_se(manager_get_unit_by_pid(m, 4711) == NULL);
+
+ unit_unwatch_pid(c, 4711);
+ assert_se(manager_get_unit_by_pid(m, 4711) == NULL);
+
+ return 0;
+}
diff --git a/src/test/test-watchdog.c b/src/test/test-watchdog.c
new file mode 100644
index 0000000..cbef75f
--- /dev/null
+++ b/src/test/test-watchdog.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <string.h>
+#include <unistd.h>
+
+#include "log.h"
+#include "tests.h"
+#include "watchdog.h"
+
+int main(int argc, char *argv[]) {
+ usec_t t;
+ unsigned i, count;
+ int r;
+ bool slow;
+
+ test_setup_logging(LOG_DEBUG);
+
+ slow = slow_tests_enabled();
+
+ t = slow ? 10 * USEC_PER_SEC : 1 * USEC_PER_SEC;
+ count = slow ? 5 : 3;
+
+ r = watchdog_set_timeout(&t);
+ if (r < 0)
+ log_warning_errno(r, "Failed to open watchdog: %m");
+ if (r == -EPERM)
+ t = 0;
+
+ for (i = 0; i < count; i++) {
+ log_info("Pinging...");
+ r = watchdog_ping();
+ if (r < 0)
+ log_warning_errno(r, "Failed to ping watchdog: %m");
+
+ usleep(t/2);
+ }
+
+ watchdog_close(true);
+ return 0;
+}
diff --git a/src/test/test-web-util.c b/src/test/test-web-util.c
new file mode 100644
index 0000000..853ea9c
--- /dev/null
+++ b/src/test/test-web-util.c
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "macro.h"
+#include "web-util.h"
+
+static void test_is_valid_documentation_url(void) {
+ assert_se(documentation_url_is_valid("http://www.freedesktop.org/wiki/Software/systemd"));
+ assert_se(documentation_url_is_valid("https://www.kernel.org/doc/Documentation/binfmt_misc.txt")); /* dead */
+ assert_se(documentation_url_is_valid("https://www.kernel.org/doc/Documentation/admin-guide/binfmt-misc.rst"));
+ assert_se(documentation_url_is_valid("https://www.kernel.org/doc/html/latest/admin-guide/binfmt-misc.html"));
+ assert_se(documentation_url_is_valid("file:/foo/foo"));
+ assert_se(documentation_url_is_valid("man:systemd.special(7)"));
+ assert_se(documentation_url_is_valid("info:bar"));
+
+ assert_se(!documentation_url_is_valid("foo:"));
+ assert_se(!documentation_url_is_valid("info:"));
+ assert_se(!documentation_url_is_valid(""));
+}
+
+int main(int argc, char *argv[]) {
+ test_is_valid_documentation_url();
+
+ return 0;
+}
diff --git a/src/test/test-xattr-util.c b/src/test/test-xattr-util.c
new file mode 100644
index 0000000..6aa55ba
--- /dev/null
+++ b/src/test/test-xattr-util.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "string-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "xattr-util.h"
+
+static void test_fgetxattrat_fake(void) {
+ char t[] = "/var/tmp/xattrtestXXXXXX";
+ _cleanup_close_ int fd = -1;
+ const char *x;
+ char v[3];
+ int r;
+ size_t size;
+
+ assert_se(mkdtemp(t));
+ x = strjoina(t, "/test");
+ assert_se(touch(x) >= 0);
+
+ r = setxattr(x, "user.foo", "bar", 3, 0);
+ if (r < 0 && ERRNO_IS_NOT_SUPPORTED(errno)) /* no xattrs supported on /var/tmp... */
+ goto cleanup;
+ assert_se(r >= 0);
+
+ fd = open(t, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY);
+ assert_se(fd >= 0);
+
+ assert_se(fgetxattrat_fake(fd, "test", "user.foo", v, 3, 0, &size) >= 0);
+ assert_se(size == 3);
+ assert_se(memcmp(v, "bar", 3) == 0);
+
+ safe_close(fd);
+ fd = open("/", O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY);
+ assert_se(fd >= 0);
+ r = fgetxattrat_fake(fd, "usr", "user.idontexist", v, 3, 0, &size);
+ assert_se(r == -ENODATA || ERRNO_IS_NOT_SUPPORTED(r));
+
+cleanup:
+ assert_se(unlink(x) >= 0);
+ assert_se(rmdir(t) >= 0);
+}
+
+static void test_getcrtime(void) {
+
+ _cleanup_close_ int fd = -1;
+ char ts[FORMAT_TIMESTAMP_MAX];
+ const char *vt;
+ usec_t usec, k;
+ int r;
+
+ assert_se(tmp_dir(&vt) >= 0);
+
+ fd = open_tmpfile_unlinkable(vt, O_RDWR);
+ assert_se(fd >= 0);
+
+ r = fd_getcrtime(fd, &usec);
+ if (r < 0)
+ log_debug_errno(r, "btime: %m");
+ else
+ log_debug("btime: %s", format_timestamp(ts, sizeof(ts), usec));
+
+ k = now(CLOCK_REALTIME);
+
+ r = fd_setcrtime(fd, 1519126446UL * USEC_PER_SEC);
+ if (!IN_SET(r, -EOPNOTSUPP, -ENOTTY)) {
+ assert_se(fd_getcrtime(fd, &usec) >= 0);
+ assert_se(k < 1519126446UL * USEC_PER_SEC ||
+ usec == 1519126446UL * USEC_PER_SEC);
+ }
+}
+
+int main(void) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_fgetxattrat_fake();
+ test_getcrtime();
+
+ return 0;
+}
diff --git a/src/test/test-xdg-autostart.c b/src/test/test-xdg-autostart.c
new file mode 100644
index 0000000..a437e2c
--- /dev/null
+++ b/src/test/test-xdg-autostart.c
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+#include "xdg-autostart-service.h"
+
+static void test_translate_name(void) {
+ _cleanup_free_ char *t;
+
+ assert_se(t = xdg_autostart_service_translate_name("a-b.blub.desktop"));
+ assert_se(streq(t, "app-a\\x2db.blub-autostart.service"));
+}
+
+static void test_xdg_format_exec_start_one(const char *exec, const char *expected) {
+ _cleanup_free_ char* out = NULL;
+
+ xdg_autostart_format_exec_start(exec, &out);
+ log_info("In: '%s', out: '%s', expected: '%s'", exec, out, expected);
+ assert_se(streq(out, expected));
+}
+
+static void test_xdg_format_exec_start(void) {
+ test_xdg_format_exec_start_one("/bin/sleep 100", "/bin/sleep \"100\"");
+
+ /* All standardised % identifiers are stripped. */
+ test_xdg_format_exec_start_one("/bin/sleep %f \"%F\" %u %U %d %D\t%n %N %i %c %k %v %m", "/bin/sleep");
+
+ /* Unknown % identifier currently remain, but are escaped. */
+ test_xdg_format_exec_start_one("/bin/sleep %X \"%Y\"", "/bin/sleep \"%%X\" \"%%Y\"");
+
+ test_xdg_format_exec_start_one("/bin/sleep \";\\\"\"", "/bin/sleep \";\\\"\"");
+}
+
+static const char* const xdg_desktop_file[] = {
+ "[Desktop Entry]\n"
+ "Exec\t =\t /bin/sleep 100\n" /* Whitespace Before/After = must be ignored */
+ "OnlyShowIn = A;B;\n"
+ "NotShowIn=C;;D\\\\\\;;E\n", /* "C", "", "D\;", "E" */
+
+ "[Desktop Entry]\n"
+ "Exec=a\n"
+ "Exec=b\n",
+
+ "[Desktop Entry]\n"
+ "Hidden=\t true\n",
+};
+
+static void test_xdg_desktop_parse(unsigned i, const char *s) {
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-xdg-autostart-parser.XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_(xdg_autostart_service_freep) XdgAutostartService *service = NULL;
+
+ log_info("== %s[%i] ==", __func__, i);
+
+ assert_se(fmkostemp_safe(name, "r+", &f) == 0);
+ assert_se(fwrite(s, strlen(s), 1, f) == 1);
+ rewind(f);
+
+ assert_se(service = xdg_autostart_service_parse_desktop(name));
+
+ switch (i) {
+ case 0:
+ assert_se(streq(service->exec_string, "/bin/sleep 100"));
+ assert_se(strv_equal(service->only_show_in, STRV_MAKE("A", "B")));
+ assert_se(strv_equal(service->not_show_in, STRV_MAKE("C", "D\\;", "E")));
+ assert_se(!service->hidden);
+ break;
+ case 1:
+ /* The second entry is not permissible and will be ignored (and error logged). */
+ assert_se(streq(service->exec_string, "a"));
+ break;
+ case 2:
+ assert_se(service->hidden);
+ break;
+ }
+}
+
+int main(int argc, char *argv[]) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_translate_name();
+ test_xdg_format_exec_start();
+
+ for (size_t i = 0; i < ELEMENTSOF(xdg_desktop_file); i++)
+ test_xdg_desktop_parse(i, xdg_desktop_file[i]);
+
+ return 0;
+}
diff --git a/src/test/test-xml.c b/src/test/test-xml.c
new file mode 100644
index 0000000..e69d6d0
--- /dev/null
+++ b/src/test/test-xml.c
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdarg.h>
+
+#include "alloc-util.h"
+#include "string-util.h"
+#include "util.h"
+#include "xml.h"
+
+static void test_one(const char *data, ...) {
+ void *state = NULL;
+ va_list ap;
+
+ va_start(ap, data);
+
+ for (;;) {
+ _cleanup_free_ char *name = NULL;
+ int t, tt;
+ const char *nn;
+
+ t = xml_tokenize(&data, &name, &state, NULL);
+ assert_se(t >= 0);
+
+ tt = va_arg(ap, int);
+ assert_se(tt >= 0);
+
+ assert_se(t == tt);
+ if (t == XML_END)
+ break;
+
+ nn = va_arg(ap, const char *);
+ assert_se(streq_ptr(nn, name));
+ }
+
+ va_end(ap);
+}
+
+int main(int argc, char *argv[]) {
+
+ test_one("", XML_END);
+
+ test_one("<foo></foo>",
+ XML_TAG_OPEN, "foo",
+ XML_TAG_CLOSE, "foo",
+ XML_END);
+
+ test_one("<foo waldo=piep meh=\"huhu\"/>",
+ XML_TAG_OPEN, "foo",
+ XML_ATTRIBUTE_NAME, "waldo",
+ XML_ATTRIBUTE_VALUE, "piep",
+ XML_ATTRIBUTE_NAME, "meh",
+ XML_ATTRIBUTE_VALUE, "huhu",
+ XML_TAG_CLOSE_EMPTY, NULL,
+ XML_END);
+
+ test_one("xxxx\n"
+ "<foo><?xml foo?> <!-- zzzz --> </foo>",
+ XML_TEXT, "xxxx\n",
+ XML_TAG_OPEN, "foo",
+ XML_TEXT, " ",
+ XML_TEXT, " ",
+ XML_TAG_CLOSE, "foo",
+ XML_END);
+
+ return 0;
+}
diff --git a/src/time-wait-sync/time-wait-sync.c b/src/time-wait-sync/time-wait-sync.c
new file mode 100644
index 0000000..df34541
--- /dev/null
+++ b/src/time-wait-sync/time-wait-sync.c
@@ -0,0 +1,244 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/* systemd service to wait until kernel realtime clock is synchronized */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/inotify.h>
+#include <sys/timerfd.h>
+#include <sys/timex.h>
+#include <unistd.h>
+
+#include "sd-event.h"
+
+#include "fd-util.h"
+#include "fs-util.h"
+#include "main-func.h"
+#include "signal-util.h"
+#include "time-util.h"
+
+typedef struct ClockState {
+ int timerfd_fd; /* non-negative is descriptor from timerfd_create */
+ int adjtime_state; /* return value from last adjtimex(2) call */
+ sd_event_source *timerfd_event_source; /* non-null is the active io event source */
+ int inotify_fd;
+ sd_event_source *inotify_event_source;
+ int run_systemd_wd;
+ int run_systemd_timesync_wd;
+ bool has_watchfile;
+} ClockState;
+
+static void clock_state_release_timerfd(ClockState *sp) {
+ sp->timerfd_event_source = sd_event_source_unref(sp->timerfd_event_source);
+ sp->timerfd_fd = safe_close(sp->timerfd_fd);
+}
+
+static void clock_state_release(ClockState *sp) {
+ clock_state_release_timerfd(sp);
+ sp->inotify_event_source = sd_event_source_unref(sp->inotify_event_source);
+ sp->inotify_fd = safe_close(sp->inotify_fd);
+}
+
+static int clock_state_update(ClockState *sp, sd_event *event);
+
+static int update_notify_run_systemd_timesync(ClockState *sp) {
+ sp->run_systemd_timesync_wd = inotify_add_watch(sp->inotify_fd, "/run/systemd/timesync", IN_CREATE|IN_DELETE_SELF);
+ return sp->run_systemd_timesync_wd;
+}
+
+static int timerfd_handler(sd_event_source *s,
+ int fd,
+ uint32_t revents,
+ void *userdata) {
+ ClockState *sp = userdata;
+
+ return clock_state_update(sp, sd_event_source_get_event(s));
+}
+
+static void process_inotify_event(sd_event *event, ClockState *sp, struct inotify_event *e) {
+ if (e->wd == sp->run_systemd_wd) {
+ /* Only thing we care about is seeing if we can start watching /run/systemd/timesync. */
+ if (sp->run_systemd_timesync_wd < 0)
+ update_notify_run_systemd_timesync(sp);
+ } else if (e->wd == sp->run_systemd_timesync_wd) {
+ if (e->mask & IN_DELETE_SELF) {
+ /* Somebody removed /run/systemd/timesync. */
+ (void) inotify_rm_watch(sp->inotify_fd, sp->run_systemd_timesync_wd);
+ sp->run_systemd_timesync_wd = -1;
+ } else
+ /* Somebody might have created /run/systemd/timesync/synchronized. */
+ clock_state_update(sp, event);
+ }
+}
+
+static int inotify_handler(sd_event_source *s,
+ int fd,
+ uint32_t revents,
+ void *userdata) {
+ sd_event *event = sd_event_source_get_event(s);
+ ClockState *sp = userdata;
+ union inotify_event_buffer buffer;
+ struct inotify_event *e;
+ ssize_t l;
+
+ l = read(fd, &buffer, sizeof(buffer));
+ if (l < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0;
+
+ return log_warning_errno(errno, "Lost access to inotify: %m");
+ }
+ FOREACH_INOTIFY_EVENT(e, buffer, l)
+ process_inotify_event(event, sp, e);
+
+ return 0;
+}
+
+static int clock_state_update(
+ ClockState *sp,
+ sd_event *event) {
+
+ char buf[MAX((size_t)FORMAT_TIMESTAMP_MAX, STRLEN("unrepresentable"))];
+ struct timex tx = {};
+ const char * ts;
+ usec_t t;
+ int r;
+
+ clock_state_release_timerfd(sp);
+
+ /* The kernel supports cancelling timers whenever its realtime clock is "set" (which can happen in a variety of
+ * ways, generally adjustments of at least 500 ms). The way this module works is we set up a timerfd that will
+ * wake when the clock is set, and when that happens we read the clock synchronization state from the return
+ * value of adjtimex(2), which supports the NTP time adjustment protocol.
+ *
+ * The kernel determines whether the clock is synchronized using driver-specific tests, based on time
+ * information passed by an application, generally through adjtimex(2). If the application asserts the clock is
+ * synchronized, but does not also do something that "sets the clock", the timer will not be cancelled and
+ * synchronization will not be detected.
+ *
+ * Similarly, this service will never complete if the application sets the time without also providing
+ * information that adjtimex(2) can use to determine that the clock is synchronized. This generally doesn't
+ * happen, but can if the system has a hardware clock that is accurate enough that the adjustment is too small
+ * to be a "set".
+ *
+ * Both these failure-to-detect situations are covered by having the presence/creation of
+ * /run/systemd/timesync/synchronized, which is considered sufficient to indicate a synchronized clock even if
+ * the kernel has not been updated.
+ *
+ * For timesyncd the initial setting of the time uses settimeofday(2), which sets the clock but does not mark
+ * it synchronized. When an NTP source is selected it sets the clock again with clock_adjtime(2) which marks it
+ * synchronized and also touches /run/systemd/timesync/synchronized which covers the case when the clock wasn't
+ * "set". */
+
+ r = time_change_fd();
+ if (r < 0) {
+ log_error_errno(r, "Failed to create timerfd: %m");
+ goto finish;
+ }
+ sp->timerfd_fd = r;
+
+ r = adjtimex(&tx);
+ if (r < 0) {
+ log_error_errno(errno, "Failed to read adjtimex state: %m");
+ goto finish;
+ }
+ sp->adjtime_state = r;
+
+ if (tx.status & STA_NANO)
+ tx.time.tv_usec /= 1000;
+ t = timeval_load(&tx.time);
+ ts = format_timestamp_style(buf, sizeof(buf), t, TIMESTAMP_US_UTC);
+ if (!ts)
+ strcpy(buf, "unrepresentable");
+ log_info("adjtime state %d status %x time %s", sp->adjtime_state, tx.status, ts);
+
+ sp->has_watchfile = access("/run/systemd/timesync/synchronized", F_OK) >= 0;
+ if (sp->has_watchfile)
+ /* Presence of watch file overrides adjtime_state */
+ r = 0;
+ else if (sp->adjtime_state == TIME_ERROR) {
+ /* Not synchronized. Do a one-shot wait on the descriptor and inform the caller we need to keep
+ * running. */
+ r = sd_event_add_io(event, &sp->timerfd_event_source, sp->timerfd_fd,
+ EPOLLIN, timerfd_handler, sp);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create time change monitor source: %m");
+ goto finish;
+ }
+ r = 1;
+ } else
+ /* Synchronized; we can exit. */
+ r = 0;
+
+ finish:
+ if (r <= 0)
+ (void) sd_event_exit(event, r);
+ return r;
+}
+
+static int run(int argc, char * argv[]) {
+ _cleanup_(sd_event_unrefp) sd_event *event;
+ _cleanup_(clock_state_release) ClockState state = {
+ .timerfd_fd = -1,
+ .inotify_fd = -1,
+ .run_systemd_wd = -1,
+ .run_systemd_timesync_wd = -1,
+ };
+ int r;
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ r = sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create sigterm event source: %m");
+
+ r = sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create sigint event source: %m");
+
+ r = sd_event_set_watchdog(event, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create watchdog event source: %m");
+
+ r = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to create inotify descriptor: %m");
+
+ state.inotify_fd = r;
+
+ r = sd_event_add_io(event, &state.inotify_event_source, state.inotify_fd,
+ EPOLLIN, inotify_handler, &state);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create notify event source: %m");
+
+ r = inotify_add_watch_and_warn(state.inotify_fd, "/run/systemd/", IN_CREATE);
+ if (r < 0)
+ return r;
+
+ state.run_systemd_wd = r;
+
+ (void) update_notify_run_systemd_timesync(&state);
+
+ r = clock_state_update(&state, event);
+ if (r > 0) {
+ r = sd_event_loop(event);
+ if (r < 0)
+ log_error_errno(r, "Failed in event loop: %m");
+ }
+
+ if (state.has_watchfile)
+ log_debug("Exit enabled by: /run/systemd/timesync/synchronized");
+
+ if (state.adjtime_state == TIME_ERROR)
+ log_info("Exit without adjtimex synchronized.");
+
+ return r;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/timedate/meson.build b/src/timedate/meson.build
new file mode 100644
index 0000000..6eb9607
--- /dev/null
+++ b/src/timedate/meson.build
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+if conf.get('ENABLE_TIMEDATED') == 1
+ install_data('org.freedesktop.timedate1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.timedate1.service',
+ install_dir : dbussystemservicedir)
+ install_data('org.freedesktop.timedate1.policy',
+ install_dir : polkitpolicydir)
+endif
diff --git a/src/timedate/org.freedesktop.timedate1.conf b/src/timedate/org.freedesktop.timedate1.conf
new file mode 100644
index 0000000..4567082
--- /dev/null
+++ b/src/timedate/org.freedesktop.timedate1.conf
@@ -0,0 +1,29 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="root">
+ <allow own="org.freedesktop.timedate1"/>
+ <allow send_destination="org.freedesktop.timedate1"/>
+ <allow receive_sender="org.freedesktop.timedate1"/>
+ </policy>
+
+ <policy context="default">
+ <allow send_destination="org.freedesktop.timedate1"/>
+ <allow receive_sender="org.freedesktop.timedate1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/timedate/org.freedesktop.timedate1.policy b/src/timedate/org.freedesktop.timedate1.policy
new file mode 100644
index 0000000..c4e71b0
--- /dev/null
+++ b/src/timedate/org.freedesktop.timedate1.policy
@@ -0,0 +1,62 @@
+<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
+<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
+
+<!--
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<policyconfig>
+
+ <vendor>The systemd Project</vendor>
+ <vendor_url>http://www.freedesktop.org/wiki/Software/systemd</vendor_url>
+
+ <action id="org.freedesktop.timedate1.set-time">
+ <description gettext-domain="systemd">Set system time</description>
+ <message gettext-domain="systemd">Authentication is required to set the system time.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ <annotate key="org.freedesktop.policykit.imply">org.freedesktop.timedate1.set-timezone org.freedesktop.timedate1.set-ntp</annotate>
+ </action>
+
+ <action id="org.freedesktop.timedate1.set-timezone">
+ <description gettext-domain="systemd">Set system timezone</description>
+ <message gettext-domain="systemd">Authentication is required to set the system timezone.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.timedate1.set-local-rtc">
+ <description gettext-domain="systemd">Set RTC to local timezone or UTC</description>
+ <message gettext-domain="systemd">Authentication is required to control whether the RTC stores the local or UTC time.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+ <action id="org.freedesktop.timedate1.set-ntp">
+ <description gettext-domain="systemd">Turn network time synchronization on or off</description>
+ <message gettext-domain="systemd">Authentication is required to control whether network time synchronization shall be enabled.</message>
+ <defaults>
+ <allow_any>auth_admin_keep</allow_any>
+ <allow_inactive>auth_admin_keep</allow_inactive>
+ <allow_active>auth_admin_keep</allow_active>
+ </defaults>
+ </action>
+
+</policyconfig>
diff --git a/src/timedate/org.freedesktop.timedate1.service b/src/timedate/org.freedesktop.timedate1.service
new file mode 100644
index 0000000..6b82d70
--- /dev/null
+++ b/src/timedate/org.freedesktop.timedate1.service
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.timedate1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.timedate1.service
diff --git a/src/timedate/timedatectl.c b/src/timedate/timedatectl.c
new file mode 100644
index 0000000..abc792a
--- /dev/null
+++ b/src/timedate/timedatectl.c
@@ -0,0 +1,1068 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <locale.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include "sd-bus.h"
+
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "bus-map-properties.h"
+#include "bus-print-properties.h"
+#include "env-util.h"
+#include "format-table.h"
+#include "in-addr-util.h"
+#include "main-func.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "sparse-endian.h"
+#include "spawn-polkit-agent.h"
+#include "string-table.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+#include "verbs.h"
+
+static PagerFlags arg_pager_flags = 0;
+static bool arg_ask_password = true;
+static BusTransport arg_transport = BUS_TRANSPORT_LOCAL;
+static char *arg_host = NULL;
+static bool arg_adjust_system_clock = false;
+static bool arg_monitor = false;
+static char **arg_property = NULL;
+static bool arg_value = false;
+static bool arg_all = false;
+
+typedef struct StatusInfo {
+ usec_t time;
+ const char *timezone;
+
+ usec_t rtc_time;
+ bool rtc_local;
+
+ bool ntp_capable;
+ bool ntp_active;
+ bool ntp_synced;
+} StatusInfo;
+
+static int print_status_info(const StatusInfo *i) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ const char *old_tz = NULL, *tz, *tz_colon;
+ bool have_time = false;
+ char a[LINE_MAX];
+ TableCell *cell;
+ struct tm tm;
+ time_t sec;
+ size_t n;
+ int r;
+
+ assert(i);
+
+ table = table_new("key", "value");
+ if (!table)
+ return log_oom();
+
+ table_set_header(table, false);
+
+ assert_se(cell = table_get_cell(table, 0, 0));
+ (void) table_set_ellipsize_percent(table, cell, 100);
+ (void) table_set_align_percent(table, cell, 100);
+
+ assert_se(cell = table_get_cell(table, 0, 1));
+ (void) table_set_ellipsize_percent(table, cell, 100);
+
+ /* Save the old $TZ */
+ tz = getenv("TZ");
+ if (tz)
+ old_tz = strdupa(tz);
+
+ /* Set the new $TZ */
+ tz_colon = strjoina(":", isempty(i->timezone) ? "UTC" : i->timezone);
+ if (setenv("TZ", tz_colon, true) < 0)
+ log_warning_errno(errno, "Failed to set TZ environment variable, ignoring: %m");
+ else
+ tzset();
+
+ if (i->time != 0) {
+ sec = (time_t) (i->time / USEC_PER_SEC);
+ have_time = true;
+ } else if (IN_SET(arg_transport, BUS_TRANSPORT_LOCAL, BUS_TRANSPORT_MACHINE)) {
+ sec = time(NULL);
+ have_time = true;
+ } else
+ log_warning("Could not get time from timedated and not operating locally, ignoring.");
+
+ if (have_time)
+ n = strftime(a, sizeof a, "%a %Y-%m-%d %H:%M:%S %Z", localtime_r(&sec, &tm));
+
+ r = table_add_many(table,
+ TABLE_STRING, "Local time:",
+ TABLE_STRING, have_time && n > 0 ? a : "n/a");
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (have_time)
+ n = strftime(a, sizeof a, "%a %Y-%m-%d %H:%M:%S UTC", gmtime_r(&sec, &tm));
+
+ r = table_add_many(table,
+ TABLE_STRING, "Universal time:",
+ TABLE_STRING, have_time && n > 0 ? a : "n/a");
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (i->rtc_time > 0) {
+ time_t rtc_sec;
+
+ rtc_sec = (time_t) (i->rtc_time / USEC_PER_SEC);
+ n = strftime(a, sizeof a, "%a %Y-%m-%d %H:%M:%S", gmtime_r(&rtc_sec, &tm));
+ }
+
+ r = table_add_many(table,
+ TABLE_STRING, "RTC time:",
+ TABLE_STRING, i->rtc_time > 0 && n > 0 ? a : "n/a");
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (have_time)
+ n = strftime(a, sizeof a, "%Z, %z", localtime_r(&sec, &tm));
+
+ r = table_add_cell(table, NULL, TABLE_STRING, "Time zone:");
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_cell_stringf(table, NULL, "%s (%s)", strna(i->timezone), have_time && n > 0 ? a : "n/a");
+ if (r < 0)
+ return table_log_add_error(r);
+
+
+ /* Restore the $TZ */
+ r = set_unset_env("TZ", old_tz, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set TZ environment variable, ignoring: %m");
+ else
+ tzset();
+
+ r = table_add_many(table,
+ TABLE_STRING, "System clock synchronized:",
+ TABLE_BOOLEAN, i->ntp_synced,
+ TABLE_STRING, "NTP service:",
+ TABLE_STRING, i->ntp_capable ? (i->ntp_active ? "active" : "inactive") : "n/a",
+ TABLE_STRING, "RTC in local TZ:",
+ TABLE_BOOLEAN, i->rtc_local);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+
+ if (i->rtc_local)
+ printf("\n%s"
+ "Warning: The system is configured to read the RTC time in the local time zone.\n"
+ " This mode cannot be fully supported. It will create various problems\n"
+ " with time zone changes and daylight saving time adjustments. The RTC\n"
+ " time is never updated, it relies on external facilities to maintain it.\n"
+ " If at all possible, use RTC in UTC by calling\n"
+ " 'timedatectl set-local-rtc 0'.%s\n", ansi_highlight(), ansi_normal());
+
+ return 0;
+}
+
+static int show_status(int argc, char **argv, void *userdata) {
+ StatusInfo info = {};
+ static const struct bus_properties_map map[] = {
+ { "Timezone", "s", NULL, offsetof(StatusInfo, timezone) },
+ { "LocalRTC", "b", NULL, offsetof(StatusInfo, rtc_local) },
+ { "NTP", "b", NULL, offsetof(StatusInfo, ntp_active) },
+ { "CanNTP", "b", NULL, offsetof(StatusInfo, ntp_capable) },
+ { "NTPSynchronized", "b", NULL, offsetof(StatusInfo, ntp_synced) },
+ { "TimeUSec", "t", NULL, offsetof(StatusInfo, time) },
+ { "RTCTimeUSec", "t", NULL, offsetof(StatusInfo, rtc_time) },
+ {}
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.timedate1",
+ "/org/freedesktop/timedate1",
+ map,
+ BUS_MAP_BOOLEAN_AS_BOOL,
+ &error,
+ &m,
+ &info);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query server: %s", bus_error_message(&error, r));
+
+ return print_status_info(&info);
+}
+
+static int show_properties(int argc, char **argv, void *userdata) {
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ r = bus_print_all_properties(bus,
+ "org.freedesktop.timedate1",
+ "/org/freedesktop/timedate1",
+ NULL,
+ arg_property,
+ arg_value,
+ arg_all,
+ NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 0;
+}
+
+static int set_time(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ bool relative = false, interactive = arg_ask_password;
+ sd_bus *bus = userdata;
+ usec_t t;
+ int r;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = parse_timestamp(argv[1], &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse time specification '%s': %m", argv[1]);
+
+ r = bus_call_method(
+ bus,
+ bus_timedate,
+ "SetTime",
+ &error,
+ NULL,
+ "xbb", (int64_t) t, relative, interactive);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set time: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int set_timezone(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = bus_call_method(bus, bus_timedate, "SetTimezone", &error, NULL, "sb", argv[1], arg_ask_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set time zone: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int set_local_rtc(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int r, b;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ b = parse_boolean(argv[1]);
+ if (b < 0)
+ return log_error_errno(b, "Failed to parse local RTC setting '%s': %m", argv[1]);
+
+ r = bus_call_method(
+ bus,
+ bus_timedate,
+ "SetLocalRTC",
+ &error,
+ NULL,
+ "bbb", b, arg_adjust_system_clock, arg_ask_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set local RTC: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int set_ntp(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int b, r;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ b = parse_boolean(argv[1]);
+ if (b < 0)
+ return log_error_errno(b, "Failed to parse NTP setting '%s': %m", argv[1]);
+
+ r = bus_call_method(bus, bus_timedate, "SetNTP", &error, NULL, "bb", b, arg_ask_password);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set ntp: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int list_timezones(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+ char** zones;
+
+ r = bus_call_method(bus, bus_timedate, "ListTimezones", &error, &reply, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request list of time zones: %s",
+ bus_error_message(&error, r));
+
+ r = sd_bus_message_read_strv(reply, &zones);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ (void) pager_open(arg_pager_flags);
+ strv_print(zones);
+
+ return 0;
+}
+
+typedef struct NTPStatusInfo {
+ const char *server_name;
+ char *server_address;
+ usec_t poll_interval, poll_max, poll_min;
+ usec_t root_distance_max;
+
+ uint32_t leap, version, mode, stratum;
+ int32_t precision;
+ usec_t root_delay, root_dispersion;
+ union {
+ char str[5];
+ uint32_t val;
+ } reference;
+ usec_t origin, recv, trans, dest;
+
+ bool spike;
+ uint64_t packet_count;
+ usec_t jitter;
+
+ int64_t freq;
+} NTPStatusInfo;
+
+static void ntp_status_info_clear(NTPStatusInfo *p) {
+ p->server_address = mfree(p->server_address);
+}
+
+static const char * const ntp_leap_table[4] = {
+ [0] = "normal",
+ [1] = "last minute of the day has 61 seconds",
+ [2] = "last minute of the day has 59 seconds",
+ [3] = "not synchronized",
+};
+
+DISABLE_WARNING_TYPE_LIMITS;
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(ntp_leap, uint32_t);
+REENABLE_WARNING;
+
+static int print_ntp_status_info(NTPStatusInfo *i) {
+ char ts[FORMAT_TIMESPAN_MAX], jitter[FORMAT_TIMESPAN_MAX],
+ tmin[FORMAT_TIMESPAN_MAX], tmax[FORMAT_TIMESPAN_MAX];
+ usec_t delay, t14, t23, offset, root_distance;
+ _cleanup_(table_unrefp) Table *table = NULL;
+ bool offset_sign;
+ TableCell *cell;
+ int r;
+
+ assert(i);
+
+ table = table_new("key", "value");
+ if (!table)
+ return log_oom();
+
+ table_set_header(table, false);
+
+ assert_se(cell = table_get_cell(table, 0, 0));
+ (void) table_set_ellipsize_percent(table, cell, 100);
+ (void) table_set_align_percent(table, cell, 100);
+
+ assert_se(cell = table_get_cell(table, 0, 1));
+ (void) table_set_ellipsize_percent(table, cell, 100);
+
+ /*
+ * "Timestamp Name ID When Generated
+ * ------------------------------------------------------------
+ * Originate Timestamp T1 time request sent by client
+ * Receive Timestamp T2 time request received by server
+ * Transmit Timestamp T3 time reply sent by server
+ * Destination Timestamp T4 time reply received by client
+ *
+ * The round-trip delay, d, and system clock offset, t, are defined as:
+ * d = (T4 - T1) - (T3 - T2) t = ((T2 - T1) + (T3 - T4)) / 2"
+ */
+
+ r = table_add_cell(table, NULL, TABLE_STRING, "Server:");
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_cell_stringf(table, NULL, "%s (%s)", strna(i->server_address), strna(i->server_name));
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_cell(table, NULL, TABLE_STRING, "Poll interval:");
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_cell_stringf(table, NULL, "%s (min: %s; max %s)",
+ format_timespan(ts, sizeof(ts), i->poll_interval, 0),
+ format_timespan(tmin, sizeof(tmin), i->poll_min, 0),
+ format_timespan(tmax, sizeof(tmax), i->poll_max, 0));
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (i->packet_count == 0) {
+ r = table_add_many(table,
+ TABLE_STRING, "Packet count:",
+ TABLE_STRING, "0");
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+
+ return 0;
+ }
+
+ if (i->dest < i->origin || i->trans < i->recv || i->dest - i->origin < i->trans - i->recv) {
+ log_error("Invalid NTP response");
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+
+ return 0;
+ }
+
+ delay = (i->dest - i->origin) - (i->trans - i->recv);
+
+ t14 = i->origin + i->dest;
+ t23 = i->recv + i->trans;
+ offset_sign = t14 < t23;
+ offset = (offset_sign ? t23 - t14 : t14 - t23) / 2;
+
+ root_distance = i->root_delay / 2 + i->root_dispersion;
+
+ r = table_add_many(table,
+ TABLE_STRING, "Leap:",
+ TABLE_STRING, ntp_leap_to_string(i->leap),
+ TABLE_STRING, "Version:",
+ TABLE_UINT32, i->version,
+ TABLE_STRING, "Stratum:",
+ TABLE_UINT32, i->stratum,
+ TABLE_STRING, "Reference:");
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (i->stratum <= 1)
+ r = table_add_cell(table, NULL, TABLE_STRING, i->reference.str);
+ else
+ r = table_add_cell_stringf(table, NULL, "%" PRIX32, be32toh(i->reference.val));
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_cell(table, NULL, TABLE_STRING, "Precision:");
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_cell_stringf(table, NULL, "%s (%" PRIi32 ")",
+ format_timespan(ts, sizeof(ts), DIV_ROUND_UP((nsec_t) (exp2(i->precision) * NSEC_PER_SEC), NSEC_PER_USEC), 0),
+ i->precision);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_cell(table, NULL, TABLE_STRING, "Root distance:");
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_cell_stringf(table, NULL, "%s (max: %s)",
+ format_timespan(ts, sizeof(ts), root_distance, 0),
+ format_timespan(tmax, sizeof(tmax), i->root_distance_max, 0));
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_cell(table, NULL, TABLE_STRING, "Offset:");
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_cell_stringf(table, NULL, "%s%s",
+ offset_sign ? "+" : "-",
+ format_timespan(ts, sizeof(ts), offset, 0));
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_many(table,
+ TABLE_STRING, "Delay:",
+ TABLE_STRING, format_timespan(ts, sizeof(ts), delay, 0),
+ TABLE_STRING, "Jitter:",
+ TABLE_STRING, format_timespan(jitter, sizeof(jitter), i->jitter, 0),
+ TABLE_STRING, "Packet count:",
+ TABLE_UINT64, i->packet_count);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ if (!i->spike) {
+ r = table_add_cell(table, NULL, TABLE_STRING, "Frequency:");
+ if (r < 0)
+ return table_log_add_error(r);
+
+ r = table_add_cell_stringf(table, NULL, "%+.3fppm", (double) i->freq / 0x10000);
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+
+ return 0;
+}
+
+static int map_server_address(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ char **p = (char **) userdata;
+ const void *d;
+ int family, r;
+ size_t sz;
+
+ assert(p);
+
+ r = sd_bus_message_enter_container(m, 'r', "iay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(m, "i", &family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_array(m, 'y', &d, &sz);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ if (sz == 0 && family == AF_UNSPEC) {
+ *p = mfree(*p);
+ return 0;
+ }
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown address family %i", family);
+
+ if (sz != FAMILY_ADDRESS_SIZE(family))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid address size");
+
+ r = in_addr_to_string(family, d, p);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int map_ntp_message(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ NTPStatusInfo *p = userdata;
+ const void *d;
+ size_t sz;
+ int32_t b;
+ int r;
+
+ assert(p);
+
+ r = sd_bus_message_enter_container(m, 'r', "uuuuittayttttbtt");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(m, "uuuuitt",
+ &p->leap, &p->version, &p->mode, &p->stratum, &p->precision,
+ &p->root_delay, &p->root_dispersion);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_array(m, 'y', &d, &sz);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(m, "ttttbtt",
+ &p->origin, &p->recv, &p->trans, &p->dest,
+ &b, &p->packet_count, &p->jitter);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ if (sz != 4)
+ return -EINVAL;
+
+ memcpy(p->reference.str, d, sz);
+
+ p->spike = b;
+
+ return 0;
+}
+
+static int show_timesync_status_once(sd_bus *bus) {
+ static const struct bus_properties_map map_timesync[] = {
+ { "ServerName", "s", NULL, offsetof(NTPStatusInfo, server_name) },
+ { "ServerAddress", "(iay)", map_server_address, offsetof(NTPStatusInfo, server_address) },
+ { "PollIntervalUSec", "t", NULL, offsetof(NTPStatusInfo, poll_interval) },
+ { "PollIntervalMinUSec", "t", NULL, offsetof(NTPStatusInfo, poll_min) },
+ { "PollIntervalMaxUSec", "t", NULL, offsetof(NTPStatusInfo, poll_max) },
+ { "RootDistanceMaxUSec", "t", NULL, offsetof(NTPStatusInfo, root_distance_max) },
+ { "NTPMessage", "(uuuuittayttttbtt)", map_ntp_message, 0 },
+ { "Frequency", "x", NULL, offsetof(NTPStatusInfo, freq) },
+ {}
+ };
+ _cleanup_(ntp_status_info_clear) NTPStatusInfo info = {};
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert(bus);
+
+ r = bus_map_all_properties(bus,
+ "org.freedesktop.timesync1",
+ "/org/freedesktop/timesync1",
+ map_timesync,
+ BUS_MAP_BOOLEAN_AS_BOOL,
+ &error,
+ &m,
+ &info);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query server: %s", bus_error_message(&error, r));
+
+ if (arg_monitor && !terminal_is_dumb())
+ fputs(ANSI_HOME_CLEAR, stdout);
+
+ print_ntp_status_info(&info);
+
+ return 0;
+}
+
+static int on_properties_changed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ const char *name;
+ int r;
+
+ assert(m);
+
+ r = sd_bus_message_read(m, "s", &name);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (!streq_ptr(name, "org.freedesktop.timesync1.Manager"))
+ return 0;
+
+ return show_timesync_status_once(sd_bus_message_get_bus(m));
+}
+
+static int show_timesync_status(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ r = show_timesync_status_once(bus);
+ if (r < 0)
+ return r;
+
+ if (!arg_monitor)
+ return 0;
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get event loop: %m");
+
+ r = sd_bus_match_signal(bus,
+ NULL,
+ "org.freedesktop.timesync1",
+ "/org/freedesktop/timesync1",
+ "org.freedesktop.DBus.Properties",
+ "PropertiesChanged",
+ on_properties_changed, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match for PropertiesChanged signal: %m");
+
+ r = sd_bus_attach_event(bus, event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ return 0;
+}
+
+static int print_timesync_property(const char *name, const char *expected_value, sd_bus_message *m, bool value, bool all) {
+ char type;
+ const char *contents;
+ int r;
+
+ assert(name);
+ assert(m);
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return r;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_STRUCT:
+ if (streq(name, "NTPMessage")) {
+ _cleanup_(ntp_status_info_clear) NTPStatusInfo i = {};
+ char ts[FORMAT_TIMESPAN_MAX], stamp[FORMAT_TIMESTAMP_MAX];
+
+ r = map_ntp_message(NULL, NULL, m, NULL, &i);
+ if (r < 0)
+ return r;
+
+ if (i.packet_count == 0)
+ return 1;
+
+ if (!value) {
+ fputs(name, stdout);
+ fputc('=', stdout);
+ }
+
+ printf("{ Leap=%u, Version=%u, Mode=%u, Stratum=%u, Precision=%i,",
+ i.leap, i.version, i.mode, i.stratum, i.precision);
+ printf(" RootDelay=%s,",
+ format_timespan(ts, sizeof(ts), i.root_delay, 0));
+ printf(" RootDispersion=%s,",
+ format_timespan(ts, sizeof(ts), i.root_dispersion, 0));
+
+ if (i.stratum == 1)
+ printf(" Reference=%s,", i.reference.str);
+ else
+ printf(" Reference=%" PRIX32 ",", be32toh(i.reference.val));
+
+ printf(" OriginateTimestamp=%s,",
+ format_timestamp(stamp, sizeof(stamp), i.origin));
+ printf(" ReceiveTimestamp=%s,",
+ format_timestamp(stamp, sizeof(stamp), i.recv));
+ printf(" TransmitTimestamp=%s,",
+ format_timestamp(stamp, sizeof(stamp), i.trans));
+ printf(" DestinationTimestamp=%s,",
+ format_timestamp(stamp, sizeof(stamp), i.dest));
+ printf(" Ignored=%s PacketCount=%" PRIu64 ",",
+ yes_no(i.spike), i.packet_count);
+ printf(" Jitter=%s }\n",
+ format_timespan(ts, sizeof(ts), i.jitter, 0));
+
+ return 1;
+
+ } else if (streq(name, "ServerAddress")) {
+ _cleanup_free_ char *str = NULL;
+
+ r = map_server_address(NULL, NULL, m, NULL, &str);
+ if (r < 0)
+ return r;
+
+ if (arg_all || !isempty(str))
+ bus_print_property_value(name, expected_value, value, str);
+
+ return 1;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+static int show_timesync(int argc, char **argv, void *userdata) {
+ sd_bus *bus = userdata;
+ int r;
+
+ assert(bus);
+
+ r = bus_print_all_properties(bus,
+ "org.freedesktop.timesync1",
+ "/org/freedesktop/timesync1",
+ print_timesync_property,
+ arg_property,
+ arg_value,
+ arg_all,
+ NULL);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 0;
+}
+
+static int parse_ifindex_bus(sd_bus *bus, const char *str) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int32_t i;
+ int r;
+
+ assert(bus);
+ assert(str);
+
+ r = parse_ifindex(str);
+ if (r > 0)
+ return r;
+ assert(r < 0);
+
+ r = bus_call_method(bus, bus_network_mgr, "GetLinkByName", &error, &reply, "s", str);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get ifindex of interfaces %s: %s", str, bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "io", &i, NULL);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return i;
+}
+
+static int verb_ntp_servers(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *req = NULL;
+ sd_bus *bus = userdata;
+ int ifindex, r;
+
+ assert(bus);
+
+ ifindex = parse_ifindex_bus(bus, argv[1]);
+ if (ifindex < 0)
+ return ifindex;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = bus_message_new_method_call(bus, &req, bus_network_mgr, "SetLinkNTP");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(req, "i", ifindex);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(req, argv + 2);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, req, 0, &error, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set NTP servers: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int verb_revert(int argc, char **argv, void *userdata) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ sd_bus *bus = userdata;
+ int ifindex, r;
+
+ assert(bus);
+
+ ifindex = parse_ifindex_bus(bus, argv[1]);
+ if (ifindex < 0)
+ return ifindex;
+
+ polkit_agent_open_if_enabled(arg_transport, arg_ask_password);
+
+ r = bus_call_method(bus, bus_network_mgr, "RevertLinkNTP", &error, NULL, "i", ifindex);
+ if (r < 0)
+ return log_error_errno(r, "Failed to revert interface configuration: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("timedatectl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND ...\n"
+ "\n%sQuery or change system time and date settings.%s\n"
+ "\nCommands:\n"
+ " status Show current time settings\n"
+ " show Show properties of systemd-timedated\n"
+ " set-time TIME Set system time\n"
+ " set-timezone ZONE Set system time zone\n"
+ " list-timezones Show known time zones\n"
+ " set-local-rtc BOOL Control whether RTC is in local time\n"
+ " set-ntp BOOL Enable or disable network time synchronization\n"
+ "\nsystemd-timesyncd Commands:\n"
+ " timesync-status Show status of systemd-timesyncd\n"
+ " show-timesync Show properties of systemd-timesyncd\n"
+ "\nOptions:\n"
+ " -h --help Show this help message\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-ask-password Do not prompt for password\n"
+ " -H --host=[USER@]HOST Operate on remote host\n"
+ " -M --machine=CONTAINER Operate on local container\n"
+ " --adjust-system-clock Adjust system clock when changing local RTC mode\n"
+ " --monitor Monitor status of systemd-timesyncd\n"
+ " -p --property=NAME Show only properties by this name\n"
+ " -a --all Show all properties, including empty ones\n"
+ " --value When showing properties, only print the value\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight()
+ , ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int verb_help(int argc, char **argv, void *userdata) {
+ return help();
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_PAGER,
+ ARG_ADJUST_SYSTEM_CLOCK,
+ ARG_NO_ASK_PASSWORD,
+ ARG_MONITOR,
+ ARG_VALUE,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "host", required_argument, NULL, 'H' },
+ { "machine", required_argument, NULL, 'M' },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ { "adjust-system-clock", no_argument, NULL, ARG_ADJUST_SYSTEM_CLOCK },
+ { "monitor", no_argument, NULL, ARG_MONITOR },
+ { "property", required_argument, NULL, 'p' },
+ { "all", no_argument, NULL, 'a' },
+ { "value", no_argument, NULL, ARG_VALUE },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hH:M:p:a", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case 'H':
+ arg_transport = BUS_TRANSPORT_REMOTE;
+ arg_host = optarg;
+ break;
+
+ case 'M':
+ arg_transport = BUS_TRANSPORT_MACHINE;
+ arg_host = optarg;
+ break;
+
+ case ARG_NO_ASK_PASSWORD:
+ arg_ask_password = false;
+ break;
+
+ case ARG_ADJUST_SYSTEM_CLOCK:
+ arg_adjust_system_clock = true;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_MONITOR:
+ arg_monitor = true;
+ break;
+
+ case 'p': {
+ r = strv_extend(&arg_property, optarg);
+ if (r < 0)
+ return log_oom();
+
+ /* If the user asked for a particular
+ * property, show it to them, even if it is
+ * empty. */
+ arg_all = true;
+ break;
+ }
+
+ case 'a':
+ arg_all = true;
+ break;
+
+ case ARG_VALUE:
+ arg_value = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1;
+}
+
+static int timedatectl_main(sd_bus *bus, int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "status", VERB_ANY, 1, VERB_DEFAULT, show_status },
+ { "show", VERB_ANY, 1, 0, show_properties },
+ { "set-time", 2, 2, 0, set_time },
+ { "set-timezone", 2, 2, 0, set_timezone },
+ { "list-timezones", VERB_ANY, 1, 0, list_timezones },
+ { "set-local-rtc", 2, 2, 0, set_local_rtc },
+ { "set-ntp", 2, 2, 0, set_ntp },
+ { "timesync-status", VERB_ANY, 1, 0, show_timesync_status },
+ { "show-timesync", VERB_ANY, 1, 0, show_timesync },
+ { "ntp-servers", 3, VERB_ANY, 0, verb_ntp_servers },
+ { "revert", 2, 2, 0, verb_revert },
+ { "help", VERB_ANY, VERB_ANY, 0, verb_help }, /* Not documented, but supported since it is created. */
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, bus);
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ setlocale(LC_ALL, "");
+ log_setup_cli();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = bus_connect_transport(arg_transport, arg_host, false, &bus);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ return timedatectl_main(bus, argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/timedate/timedated.c b/src/timedate/timedated.c
new file mode 100644
index 0000000..8149fac
--- /dev/null
+++ b/src/timedate/timedated.c
@@ -0,0 +1,1161 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-event.h"
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "bus-get-properties.h"
+#include "bus-locator.h"
+#include "bus-log-control-api.h"
+#include "bus-map-properties.h"
+#include "bus-polkit.h"
+#include "clock-util.h"
+#include "conf-files.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio-label.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "list.h"
+#include "main-func.h"
+#include "memory-util.h"
+#include "missing_capability.h"
+#include "path-util.h"
+#include "selinux-util.h"
+#include "service-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-def.h"
+#include "unit-name.h"
+#include "user-util.h"
+
+#define NULL_ADJTIME_UTC "0.0 0 0\n0\nUTC\n"
+#define NULL_ADJTIME_LOCAL "0.0 0 0\n0\nLOCAL\n"
+
+#define UNIT_LIST_DIRS (const char* const*) CONF_PATHS_STRV("systemd/ntp-units.d")
+
+typedef struct UnitStatusInfo {
+ char *name;
+ char *load_state;
+ char *unit_file_state;
+ char *active_state;
+ char *path;
+
+ LIST_FIELDS(struct UnitStatusInfo, units);
+} UnitStatusInfo;
+
+typedef struct Context {
+ char *zone;
+ bool local_rtc;
+ Hashmap *polkit_registry;
+ sd_bus_message *cache;
+
+ sd_bus_slot *slot_job_removed;
+
+ LIST_HEAD(UnitStatusInfo, units);
+} Context;
+
+#define log_unit_full(unit, level, error, ...) \
+ ({ \
+ const UnitStatusInfo *_u = (unit); \
+ log_object_internal(level, error, PROJECT_FILE, __LINE__, __func__, \
+ "UNIT=", _u->name, NULL, NULL, ##__VA_ARGS__); \
+ })
+
+#define log_unit_debug(unit, ...) log_unit_full(unit, LOG_DEBUG, 0, ##__VA_ARGS__)
+#define log_unit_info(unit, ...) log_unit_full(unit, LOG_INFO, 0, ##__VA_ARGS__)
+#define log_unit_notice(unit, ...) log_unit_full(unit, LOG_NOTICE, 0, ##__VA_ARGS__)
+#define log_unit_warning(unit, ...) log_unit_full(unit, LOG_WARNING, 0, ##__VA_ARGS__)
+#define log_unit_error(unit, ...) log_unit_full(unit, LOG_ERR, 0, ##__VA_ARGS__)
+
+#define log_unit_debug_errno(unit, error, ...) log_unit_full(unit, LOG_DEBUG, error, ##__VA_ARGS__)
+#define log_unit_info_errno(unit, error, ...) log_unit_full(unit, LOG_INFO, error, ##__VA_ARGS__)
+#define log_unit_notice_errno(unit, error, ...) log_unit_full(unit, LOG_NOTICE, error, ##__VA_ARGS__)
+#define log_unit_warning_errno(unit, error, ...) log_unit_full(unit, LOG_WARNING, error, ##__VA_ARGS__)
+#define log_unit_error_errno(unit, error, ...) log_unit_full(unit, LOG_ERR, error, ##__VA_ARGS__)
+
+static void unit_status_info_clear(UnitStatusInfo *p) {
+ assert(p);
+
+ p->load_state = mfree(p->load_state);
+ p->unit_file_state = mfree(p->unit_file_state);
+ p->active_state = mfree(p->active_state);
+}
+
+static void unit_status_info_free(UnitStatusInfo *p) {
+ assert(p);
+
+ unit_status_info_clear(p);
+ free(p->name);
+ free(p->path);
+ free(p);
+}
+
+static void context_clear(Context *c) {
+ UnitStatusInfo *p;
+
+ assert(c);
+
+ free(c->zone);
+ bus_verify_polkit_async_registry_free(c->polkit_registry);
+ sd_bus_message_unref(c->cache);
+
+ sd_bus_slot_unref(c->slot_job_removed);
+
+ while ((p = c->units)) {
+ LIST_REMOVE(units, c->units, p);
+ unit_status_info_free(p);
+ }
+}
+
+static int context_add_ntp_service(Context *c, const char *s, const char *source) {
+ UnitStatusInfo *u;
+
+ if (!unit_name_is_valid(s, UNIT_NAME_PLAIN))
+ return -EINVAL;
+
+ /* Do not add this if it is already listed */
+ LIST_FOREACH(units, u, c->units)
+ if (streq(u->name, s))
+ return 0;
+
+ u = new0(UnitStatusInfo, 1);
+ if (!u)
+ return -ENOMEM;
+
+ u->name = strdup(s);
+ if (!u->name) {
+ free(u);
+ return -ENOMEM;
+ }
+
+ LIST_APPEND(units, c->units, u);
+ log_unit_debug(u, "added from %s.", source);
+
+ return 0;
+}
+
+static int context_parse_ntp_services_from_environment(Context *c) {
+ const char *env, *p;
+ int r;
+
+ assert(c);
+
+ env = getenv("SYSTEMD_TIMEDATED_NTP_SERVICES");
+ if (!env)
+ return 0;
+
+ log_debug("Using list of ntp services from environment variable $SYSTEMD_TIMEDATED_NTP_SERVICES=%s.", env);
+
+ for (p = env;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, ":", 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_error("Invalid syntax, ignoring: %s", env);
+ break;
+ }
+
+ r = context_add_ntp_service(c, word, "$SYSTEMD_TIMEDATED_NTP_SERVICES");
+ if (r < 0)
+ log_warning_errno(r, "Failed to add NTP service \"%s\", ignoring: %m", word);
+ }
+
+ return 1;
+}
+
+static int context_parse_ntp_services_from_disk(Context *c) {
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+ int r;
+
+ r = conf_files_list_strv(&files, ".list", NULL, CONF_FILES_FILTER_MASKED, UNIT_LIST_DIRS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate .list files: %m");
+
+ STRV_FOREACH(f, files) {
+ _cleanup_fclose_ FILE *file = NULL;
+
+ log_debug("Reading file '%s'", *f);
+
+ r = fopen_unlocked(*f, "re", &file);
+ if (r < 0) {
+ log_error_errno(r, "Failed to open %s, ignoring: %m", *f);
+ continue;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *word;
+
+ r = read_line(file, LINE_MAX, &line);
+ if (r < 0) {
+ log_error_errno(r, "Failed to read %s, ignoring: %m", *f);
+ continue;
+ }
+ if (r == 0)
+ break;
+
+ word = strstrip(line);
+ if (isempty(word) || startswith("#", word))
+ continue;
+
+ r = context_add_ntp_service(c, word, *f);
+ if (r < 0)
+ log_warning_errno(r, "Failed to add NTP service \"%s\", ignoring: %m", word);
+ }
+ }
+
+ return 1;
+}
+
+static int context_parse_ntp_services(Context *c) {
+ int r;
+
+ r = context_parse_ntp_services_from_environment(c);
+ if (r != 0)
+ return r;
+
+ return context_parse_ntp_services_from_disk(c);
+}
+
+static int context_ntp_service_is_active(Context *c) {
+ UnitStatusInfo *info;
+ int count = 0;
+
+ assert(c);
+
+ /* Call context_update_ntp_status() to update UnitStatusInfo before calling this. */
+
+ LIST_FOREACH(units, info, c->units)
+ count += !STRPTR_IN_SET(info->active_state, "inactive", "failed");
+
+ return count;
+}
+
+static int context_ntp_service_exists(Context *c) {
+ UnitStatusInfo *info;
+ int count = 0;
+
+ assert(c);
+
+ /* Call context_update_ntp_status() to update UnitStatusInfo before calling this. */
+
+ LIST_FOREACH(units, info, c->units)
+ count += streq_ptr(info->load_state, "loaded");
+
+ return count;
+}
+
+static int context_read_data(Context *c) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(c);
+
+ r = get_timezone(&t);
+ if (r == -EINVAL)
+ log_warning_errno(r, "/etc/localtime should be a symbolic link to a time zone data file in /usr/share/zoneinfo/.");
+ else if (r < 0)
+ log_warning_errno(r, "Failed to get target of /etc/localtime: %m");
+
+ free_and_replace(c->zone, t);
+
+ c->local_rtc = clock_is_localtime(NULL) > 0;
+
+ return 0;
+}
+
+static int context_write_data_timezone(Context *c) {
+ _cleanup_free_ char *p = NULL;
+ const char *source;
+
+ assert(c);
+
+ /* No timezone is very similar to UTC. Hence in either of these cases link the UTC file in. Except if
+ * it isn't installed, in which case we remove the symlink altogether. Since glibc defaults to an
+ * internal version of UTC in that case behaviour is mostly equivalent. We still prefer creating the
+ * symlink though, since things are more self explanatory then. */
+
+ if (isempty(c->zone) || streq(c->zone, "UTC")) {
+
+ if (access("/usr/share/zoneinfo/UTC", F_OK) < 0) {
+
+ if (unlink("/etc/localtime") < 0 && errno != ENOENT)
+ return -errno;
+
+ return 0;
+ }
+
+ source = "../usr/share/zoneinfo/UTC";
+ } else {
+ p = path_join("../usr/share/zoneinfo", c->zone);
+ if (!p)
+ return -ENOMEM;
+
+ source = p;
+ }
+
+ return symlink_atomic(source, "/etc/localtime");
+}
+
+static int context_write_data_local_rtc(Context *c) {
+ _cleanup_free_ char *s = NULL, *w = NULL;
+ int r;
+
+ assert(c);
+
+ r = read_full_file("/etc/adjtime", &s, NULL);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return r;
+
+ if (!c->local_rtc)
+ return 0;
+
+ w = strdup(NULL_ADJTIME_LOCAL);
+ if (!w)
+ return -ENOMEM;
+ } else {
+ char *p;
+ const char *e = "\n"; /* default if there is less than 3 lines */
+ const char *prepend = "";
+ size_t a, b;
+
+ p = strchrnul(s, '\n');
+ if (*p == '\0')
+ /* only one line, no \n terminator */
+ prepend = "\n0\n";
+ else if (p[1] == '\0') {
+ /* only one line, with \n terminator */
+ ++p;
+ prepend = "0\n";
+ } else {
+ p = strchr(p+1, '\n');
+ if (!p) {
+ /* only two lines, no \n terminator */
+ prepend = "\n";
+ p = s + strlen(s);
+ } else {
+ char *end;
+ /* third line might have a \n terminator or not */
+ p++;
+ end = strchr(p, '\n');
+ /* if we actually have a fourth line, use that as suffix "e", otherwise the default \n */
+ if (end)
+ e = end;
+ }
+ }
+
+ a = p - s;
+ b = strlen(e);
+
+ w = new(char, a + (c->local_rtc ? 5 : 3) + strlen(prepend) + b + 1);
+ if (!w)
+ return -ENOMEM;
+
+ *(char*) mempcpy(stpcpy(stpcpy(mempcpy(w, s, a), prepend), c->local_rtc ? "LOCAL" : "UTC"), e, b) = 0;
+
+ if (streq(w, NULL_ADJTIME_UTC)) {
+ if (unlink("/etc/adjtime") < 0)
+ if (errno != ENOENT)
+ return -errno;
+
+ return 0;
+ }
+ }
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return r;
+
+ return write_string_file_atomic_label("/etc/adjtime", w);
+}
+
+static int context_update_ntp_status(Context *c, sd_bus *bus, sd_bus_message *m) {
+ static const struct bus_properties_map map[] = {
+ { "LoadState", "s", NULL, offsetof(UnitStatusInfo, load_state) },
+ { "ActiveState", "s", NULL, offsetof(UnitStatusInfo, active_state) },
+ { "UnitFileState", "s", NULL, offsetof(UnitStatusInfo, unit_file_state) },
+ {}
+ };
+ UnitStatusInfo *u;
+ int r;
+
+ assert(c);
+ assert(bus);
+
+ /* Suppress calling context_update_ntp_status() multiple times within single DBus transaction. */
+ if (m) {
+ if (m == c->cache)
+ return 0;
+
+ sd_bus_message_unref(c->cache);
+ c->cache = sd_bus_message_ref(m);
+ }
+
+ LIST_FOREACH(units, u, c->units) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *path = NULL;
+
+ unit_status_info_clear(u);
+
+ path = unit_dbus_path_from_name(u->name);
+ if (!path)
+ return -ENOMEM;
+
+ r = bus_map_all_properties(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ map,
+ BUS_MAP_STRDUP,
+ &error,
+ NULL,
+ u);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to get properties: %s", bus_error_message(&error, r));
+ }
+
+ return 0;
+}
+
+static int match_job_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ Context *c = userdata;
+ UnitStatusInfo *u;
+ const char *path;
+ unsigned n = 0;
+ int r;
+
+ assert(c);
+ assert(m);
+
+ r = sd_bus_message_read(m, "uoss", NULL, &path, NULL, NULL);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ LIST_FOREACH(units, u, c->units)
+ if (streq_ptr(path, u->path))
+ u->path = mfree(u->path);
+ else
+ n += !!u->path;
+
+ if (n == 0) {
+ c->slot_job_removed = sd_bus_slot_unref(c->slot_job_removed);
+
+ (void) sd_bus_emit_properties_changed(sd_bus_message_get_bus(m),
+ "/org/freedesktop/timedate1", "org.freedesktop.timedate1", "NTP",
+ NULL);
+ }
+
+ return 0;
+}
+
+static int unit_start_or_stop(UnitStatusInfo *u, sd_bus *bus, sd_bus_error *error, bool start) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *path;
+ int r;
+
+ assert(u);
+ assert(bus);
+ assert(error);
+
+ r = bus_call_method(
+ bus,
+ bus_systemd_mgr,
+ start ? "StartUnit" : "StopUnit",
+ error,
+ &reply,
+ "ss",
+ u->name,
+ "replace");
+ log_unit_full(u, r < 0 ? LOG_WARNING : LOG_DEBUG, r,
+ "%s unit: %m", start ? "Starting" : "Stopping");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "o", &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = free_and_strdup(&u->path, path);
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+static int unit_enable_or_disable(UnitStatusInfo *u, sd_bus *bus, sd_bus_error *error, bool enable) {
+ int r;
+
+ assert(u);
+ assert(bus);
+ assert(error);
+
+ /* Call context_update_ntp_status() to update UnitStatusInfo before calling this. */
+
+ if (streq(u->unit_file_state, "enabled") == enable) {
+ log_unit_debug(u, "already %sd.", enable_disable(enable));
+ return 0;
+ }
+
+ log_unit_info(u, "%s unit.", enable ? "Enabling" : "Disabling");
+
+ if (enable)
+ r = bus_call_method(
+ bus,
+ bus_systemd_mgr,
+ "EnableUnitFiles",
+ error,
+ NULL,
+ "asbb", 1,
+ u->name,
+ false, true);
+ else
+ r = bus_call_method(
+ bus,
+ bus_systemd_mgr,
+ "DisableUnitFiles",
+ error,
+ NULL,
+ "asb", 1,
+ u->name,
+ false);
+ if (r < 0)
+ return r;
+
+ r = bus_call_method(bus, bus_systemd_mgr, "Reload", error, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_time, "t", now(CLOCK_REALTIME));
+static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_ntp_sync, "b", ntp_synced());
+
+static int property_get_rtc_time(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ struct tm tm = {};
+ usec_t t = 0;
+ int r;
+
+ r = clock_get_hwclock(&tm);
+ if (r == -EBUSY)
+ log_warning("/dev/rtc is busy. Is somebody keeping it open continuously? That's not a good idea... Returning a bogus RTC timestamp.");
+ else if (r == -ENOENT)
+ log_debug("/dev/rtc not found.");
+ else if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to read RTC: %m");
+ else
+ t = (usec_t) timegm(&tm) * USEC_PER_SEC;
+
+ return sd_bus_message_append(reply, "t", t);
+}
+
+static int property_get_can_ntp(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Context *c = userdata;
+ int r;
+
+ assert(c);
+ assert(bus);
+ assert(property);
+ assert(reply);
+ assert(error);
+
+ if (c->slot_job_removed)
+ /* When the previous request is not finished, then assume NTP is enabled. */
+ return sd_bus_message_append(reply, "b", true);
+
+ r = context_update_ntp_status(c, bus, reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_append(reply, "b", context_ntp_service_exists(c) > 0);
+}
+
+static int property_get_ntp(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Context *c = userdata;
+ int r;
+
+ assert(c);
+ assert(bus);
+ assert(property);
+ assert(reply);
+ assert(error);
+
+ if (c->slot_job_removed)
+ /* When the previous request is not finished, then assume NTP is active. */
+ return sd_bus_message_append(reply, "b", true);
+
+ r = context_update_ntp_status(c, bus, reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_append(reply, "b", context_ntp_service_is_active(c) > 0);
+}
+
+static int method_set_timezone(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ Context *c = userdata;
+ int interactive, r;
+ const char *z;
+
+ assert(m);
+ assert(c);
+
+ r = sd_bus_message_read(m, "sb", &z, &interactive);
+ if (r < 0)
+ return r;
+
+ if (!timezone_is_valid(z, LOG_DEBUG))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid or not installed time zone '%s'", z);
+
+ if (streq_ptr(z, c->zone))
+ return sd_bus_reply_method_return(m, NULL);
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_TIME,
+ "org.freedesktop.timedate1.set-timezone",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = free_and_strdup(&c->zone, z);
+ if (r < 0)
+ return r;
+
+ /* 1. Write new configuration file */
+ r = context_write_data_timezone(c);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set time zone: %m");
+ return sd_bus_error_set_errnof(error, r, "Failed to set time zone: %m");
+ }
+
+ /* 2. Make glibc notice the new timezone */
+ tzset();
+
+ /* 3. Tell the kernel our timezone */
+ r = clock_set_timezone(NULL);
+ if (r < 0)
+ log_debug_errno(r, "Failed to tell kernel about timezone, ignoring: %m");
+
+ if (c->local_rtc) {
+ struct timespec ts;
+ struct tm tm;
+
+ /* 4. Sync RTC from system clock, with the new delta */
+ assert_se(clock_gettime(CLOCK_REALTIME, &ts) == 0);
+ assert_se(localtime_r(&ts.tv_sec, &tm));
+
+ r = clock_set_hwclock(&tm);
+ if (r < 0)
+ log_debug_errno(r, "Failed to sync time to hardware clock, ignoring: %m");
+ }
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_TIMEZONE_CHANGE_STR,
+ "TIMEZONE=%s", c->zone,
+ "TIMEZONE_SHORTNAME=%s", tzname[daylight],
+ "DAYLIGHT=%i", daylight,
+ LOG_MESSAGE("Changed time zone to '%s' (%s).", c->zone, tzname[daylight]));
+
+ (void) sd_bus_emit_properties_changed(sd_bus_message_get_bus(m),
+ "/org/freedesktop/timedate1", "org.freedesktop.timedate1", "Timezone",
+ NULL);
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static int method_set_local_rtc(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int lrtc, fix_system, interactive;
+ Context *c = userdata;
+ struct timespec ts;
+ int r;
+
+ assert(m);
+ assert(c);
+
+ r = sd_bus_message_read(m, "bbb", &lrtc, &fix_system, &interactive);
+ if (r < 0)
+ return r;
+
+ if (lrtc == c->local_rtc)
+ return sd_bus_reply_method_return(m, NULL);
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_TIME,
+ "org.freedesktop.timedate1.set-local-rtc",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1;
+
+ c->local_rtc = lrtc;
+
+ /* 1. Write new configuration file */
+ r = context_write_data_local_rtc(c);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set RTC to %s: %m", lrtc ? "local" : "UTC");
+ return sd_bus_error_set_errnof(error, r, "Failed to set RTC to %s: %m", lrtc ? "local" : "UTC");
+ }
+
+ /* 2. Tell the kernel our timezone */
+ r = clock_set_timezone(NULL);
+ if (r < 0)
+ log_debug_errno(r, "Failed to tell kernel about timezone, ignoring: %m");
+
+ /* 3. Synchronize clocks */
+ assert_se(clock_gettime(CLOCK_REALTIME, &ts) == 0);
+
+ if (fix_system) {
+ struct tm tm;
+
+ /* Sync system clock from RTC; first, initialize the timezone fields of struct tm. */
+ localtime_or_gmtime_r(&ts.tv_sec, &tm, !c->local_rtc);
+
+ /* Override the main fields of struct tm, but not the timezone fields */
+ r = clock_get_hwclock(&tm);
+ if (r < 0)
+ log_debug_errno(r, "Failed to get hardware clock, ignoring: %m");
+ else {
+ /* And set the system clock with this */
+ ts.tv_sec = mktime_or_timegm(&tm, !c->local_rtc);
+
+ if (clock_settime(CLOCK_REALTIME, &ts) < 0)
+ log_debug_errno(errno, "Failed to update system clock, ignoring: %m");
+ }
+
+ } else {
+ struct tm tm;
+
+ /* Sync RTC from system clock */
+ localtime_or_gmtime_r(&ts.tv_sec, &tm, !c->local_rtc);
+
+ r = clock_set_hwclock(&tm);
+ if (r < 0)
+ log_debug_errno(r, "Failed to sync time to hardware clock, ignoring: %m");
+ }
+
+ log_info("RTC configured to %s time.", c->local_rtc ? "local" : "UTC");
+
+ (void) sd_bus_emit_properties_changed(sd_bus_message_get_bus(m),
+ "/org/freedesktop/timedate1", "org.freedesktop.timedate1", "LocalRTC",
+ NULL);
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static int method_set_time(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ sd_bus *bus = sd_bus_message_get_bus(m);
+ int relative, interactive, r;
+ Context *c = userdata;
+ int64_t utc;
+ struct timespec ts;
+ usec_t start;
+ struct tm tm;
+
+ assert(m);
+ assert(c);
+
+ if (c->slot_job_removed)
+ return sd_bus_error_set(error, BUS_ERROR_AUTOMATIC_TIME_SYNC_ENABLED, "Previous request is not finished, refusing.");
+
+ r = context_update_ntp_status(c, bus, m);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to update context: %m");
+
+ if (context_ntp_service_is_active(c) > 0)
+ return sd_bus_error_set(error, BUS_ERROR_AUTOMATIC_TIME_SYNC_ENABLED, "Automatic time synchronization is enabled");
+
+ /* this only gets used if dbus does not provide a timestamp */
+ start = now(CLOCK_MONOTONIC);
+
+ r = sd_bus_message_read(m, "xbb", &utc, &relative, &interactive);
+ if (r < 0)
+ return r;
+
+ if (!relative && utc <= 0)
+ return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid absolute time");
+
+ if (relative && utc == 0)
+ return sd_bus_reply_method_return(m, NULL);
+
+ if (relative) {
+ usec_t n, x;
+
+ n = now(CLOCK_REALTIME);
+ x = n + utc;
+
+ if ((utc > 0 && x < n) ||
+ (utc < 0 && x > n))
+ return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Time value overflow");
+
+ timespec_store(&ts, x);
+ } else
+ timespec_store(&ts, (usec_t) utc);
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_TIME,
+ "org.freedesktop.timedate1.set-time",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1;
+
+ /* adjust ts for time spent in program */
+ r = sd_bus_message_get_monotonic_usec(m, &start);
+ /* when sd_bus_message_get_monotonic_usec() returns -ENODATA it does not modify &start */
+ if (r < 0 && r != -ENODATA)
+ return r;
+
+ timespec_store(&ts, timespec_load(&ts) + (now(CLOCK_MONOTONIC) - start));
+
+ /* Set system clock */
+ if (clock_settime(CLOCK_REALTIME, &ts) < 0) {
+ log_error_errno(errno, "Failed to set local time: %m");
+ return sd_bus_error_set_errnof(error, errno, "Failed to set local time: %m");
+ }
+
+ /* Sync down to RTC */
+ localtime_or_gmtime_r(&ts.tv_sec, &tm, !c->local_rtc);
+
+ r = clock_set_hwclock(&tm);
+ if (r < 0)
+ log_debug_errno(r, "Failed to update hardware clock, ignoring: %m");
+
+ log_struct(LOG_INFO,
+ "MESSAGE_ID=" SD_MESSAGE_TIME_CHANGE_STR,
+ "REALTIME="USEC_FMT, timespec_load(&ts),
+ LOG_MESSAGE("Changed local time to %s", ctime(&ts.tv_sec)));
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static int method_set_ntp(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_slot_unrefp) sd_bus_slot *slot = NULL;
+ sd_bus *bus = sd_bus_message_get_bus(m);
+ Context *c = userdata;
+ UnitStatusInfo *u;
+ const UnitStatusInfo *selected = NULL;
+ int enable, interactive, q, r;
+
+ assert(m);
+ assert(bus);
+ assert(c);
+
+ r = sd_bus_message_read(m, "bb", &enable, &interactive);
+ if (r < 0)
+ return r;
+
+ r = context_update_ntp_status(c, bus, m);
+ if (r < 0)
+ return r;
+
+ if (context_ntp_service_exists(c) <= 0)
+ return sd_bus_error_set(error, BUS_ERROR_NO_NTP_SUPPORT, "NTP not supported");
+
+ r = bus_verify_polkit_async(
+ m,
+ CAP_SYS_TIME,
+ "org.freedesktop.timedate1.set-ntp",
+ NULL,
+ interactive,
+ UID_INVALID,
+ &c->polkit_registry,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1;
+
+ /* This method may be called frequently. Forget the previous job if it has not completed yet. */
+ LIST_FOREACH(units, u, c->units)
+ u->path = mfree(u->path);
+
+ if (!c->slot_job_removed) {
+ r = bus_match_signal_async(
+ bus,
+ &slot,
+ bus_systemd_mgr,
+ "JobRemoved",
+ match_job_removed, NULL, c);
+ if (r < 0)
+ return r;
+ }
+
+ if (enable)
+ LIST_FOREACH(units, u, c->units) {
+ bool enable_this_one = !selected;
+
+ if (!streq(u->load_state, "loaded"))
+ continue;
+
+ r = unit_enable_or_disable(u, bus, error, enable_this_one);
+ if (r < 0)
+ /* If enablement failed, don't start this unit. */
+ enable_this_one = false;
+
+ r = unit_start_or_stop(u, bus, error, enable_this_one);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to %s %sd NTP unit, ignoring: %m",
+ enable_this_one ? "start" : "stop",
+ enable_disable(enable_this_one));
+ if (enable_this_one)
+ selected = u;
+ }
+ else
+ LIST_FOREACH(units, u, c->units) {
+ if (!streq(u->load_state, "loaded"))
+ continue;
+
+ q = unit_enable_or_disable(u, bus, error, false);
+ if (q < 0)
+ r = q;
+
+ q = unit_start_or_stop(u, bus, error, false);
+ if (q < 0)
+ r = q;
+ }
+
+ if (r < 0)
+ return r;
+ if (enable && !selected)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "No NTP service found to enable.");
+
+ if (slot)
+ c->slot_job_removed = TAKE_PTR(slot);
+
+ if (selected)
+ log_info("Set NTP to enabled (%s).", selected->name);
+ else
+ log_info("Set NTP to disabled.");
+
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static int method_list_timezones(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_strv_free_ char **zones = NULL;
+ int r;
+
+ assert(m);
+
+ r = get_timezones(&zones);
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to read list of time zones: %m");
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_strv(reply, zones);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static const sd_bus_vtable timedate_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("Timezone", "s", NULL, offsetof(Context, zone), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("LocalRTC", "b", bus_property_get_bool, offsetof(Context, local_rtc), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("CanNTP", "b", property_get_can_ntp, 0, 0),
+ SD_BUS_PROPERTY("NTP", "b", property_get_ntp, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("NTPSynchronized", "b", property_get_ntp_sync, 0, 0),
+ SD_BUS_PROPERTY("TimeUSec", "t", property_get_time, 0, 0),
+ SD_BUS_PROPERTY("RTCTimeUSec", "t", property_get_rtc_time, 0, 0),
+
+ SD_BUS_METHOD_WITH_NAMES("SetTime",
+ "xbb",
+ SD_BUS_PARAM(usec_utc)
+ SD_BUS_PARAM(relative)
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_set_time,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetTimezone",
+ "sb",
+ SD_BUS_PARAM(timezone)
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_set_timezone,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetLocalRTC",
+ "bbb",
+ SD_BUS_PARAM(local_rtc)
+ SD_BUS_PARAM(fix_system)
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_set_local_rtc,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("SetNTP",
+ "bb",
+ SD_BUS_PARAM(use_ntp)
+ SD_BUS_PARAM(interactive),
+ NULL,,
+ method_set_ntp,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD_WITH_NAMES("ListTimezones",
+ NULL,,
+ "as",
+ SD_BUS_PARAM(timezones),
+ method_list_timezones,
+ SD_BUS_VTABLE_UNPRIVILEGED),
+
+ SD_BUS_VTABLE_END,
+};
+
+const BusObjectImplementation manager_object = {
+ "/org/freedesktop/timedate1",
+ "org.freedesktop.timedate1",
+ .vtables = BUS_VTABLES(timedate_vtable),
+};
+
+static int connect_bus(Context *c, sd_event *event, sd_bus **_bus) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(c);
+ assert(event);
+ assert(_bus);
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get system bus connection: %m");
+
+ r = bus_add_implementation(bus, &manager_object, c);
+ if (r < 0)
+ return r;
+
+ r = bus_log_control_api_register(bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_request_name_async(bus, NULL, "org.freedesktop.timedate1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(bus, event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ *_bus = TAKE_PTR(bus);
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(context_clear) Context context = {};
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ log_setup_service();
+
+ r = service_parse_argv("systemd-timedated.service",
+ "Manage the system clock and timezone and NTP enablement.",
+ BUS_IMPLEMENTATIONS(&manager_object,
+ &log_control_object),
+ argc, argv);
+ if (r <= 0)
+ return r;
+
+ umask(0022);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ (void) sd_event_set_watchdog(event, true);
+
+ r = sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to install SIGINT handler: %m");
+
+ r = sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to install SIGTERM handler: %m");
+
+ r = connect_bus(&context, event, &bus);
+ if (r < 0)
+ return r;
+
+ (void) sd_bus_negotiate_timestamp(bus, true);
+
+ r = context_read_data(&context);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read time zone data: %m");
+
+ r = context_parse_ntp_services(&context);
+ if (r < 0)
+ return r;
+
+ r = bus_event_loop_with_idle(event, bus, "org.freedesktop.timedate1", DEFAULT_EXIT_USEC, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/timesync/80-systemd-timesync.list b/src/timesync/80-systemd-timesync.list
new file mode 100644
index 0000000..d5959ad
--- /dev/null
+++ b/src/timesync/80-systemd-timesync.list
@@ -0,0 +1 @@
+systemd-timesyncd.service
diff --git a/src/timesync/meson.build b/src/timesync/meson.build
new file mode 100644
index 0000000..571e3fc
--- /dev/null
+++ b/src/timesync/meson.build
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+systemd_timesyncd_sources = files('''
+ timesyncd.c
+ timesyncd-bus.c
+ timesyncd-bus.h
+ timesyncd-conf.c
+ timesyncd-conf.h
+ timesyncd-manager.c
+ timesyncd-manager.h
+ timesyncd-ntp-message.h
+ timesyncd-server.c
+ timesyncd-server.h
+'''.split())
+
+timesyncd_gperf_c = custom_target(
+ 'timesyncd-gperf.c',
+ input : 'timesyncd-gperf.gperf',
+ output : 'timesyncd-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+systemd_timesyncd_sources += [timesyncd_gperf_c]
+
+if conf.get('ENABLE_TIMESYNCD') == 1
+ timesyncd_conf = configure_file(
+ input : 'timesyncd.conf.in',
+ output : 'timesyncd.conf',
+ configuration : substs)
+ if install_sysconfdir
+ install_data(timesyncd_conf,
+ install_dir : pkgsysconfdir)
+ endif
+ install_data('org.freedesktop.timesync1.conf',
+ install_dir : dbuspolicydir)
+ install_data('org.freedesktop.timesync1.service',
+ install_dir : dbussystemservicedir)
+ install_data('80-systemd-timesync.list',
+ install_dir : ntpservicelistdir)
+endif
+
+############################################################
+
+tests += [
+ [['src/timesync/test-timesync.c',
+ 'src/timesync/timesyncd-manager.c',
+ 'src/timesync/timesyncd-manager.h',
+ 'src/timesync/timesyncd-conf.c',
+ 'src/timesync/timesyncd-conf.h',
+ 'src/timesync/timesyncd-server.c',
+ 'src/timesync/timesyncd-server.h',
+ timesyncd_gperf_c],
+ [libshared],
+ [libm],
+ 'ENABLE_TIMESYNCD'],
+]
diff --git a/src/timesync/org.freedesktop.timesync1.conf b/src/timesync/org.freedesktop.timesync1.conf
new file mode 100644
index 0000000..eccdbec
--- /dev/null
+++ b/src/timesync/org.freedesktop.timesync1.conf
@@ -0,0 +1,42 @@
+<?xml version="1.0"?> <!--*-nxml-*-->
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+
+<!--
+ This file is part of systemd.
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+-->
+
+<busconfig>
+
+ <policy user="systemd-timesync">
+ <allow own="org.freedesktop.timesync1"/>
+ <allow send_destination="org.freedesktop.timesync1"/>
+ <allow receive_sender="org.freedesktop.timesync1"/>
+ </policy>
+
+ <policy context="default">
+ <deny send_destination="org.freedesktop.timesync1"/>
+
+ <allow send_destination="org.freedesktop.timesync1"
+ send_interface="org.freedesktop.DBus.Introspectable"/>
+
+ <allow send_destination="org.freedesktop.timesync1"
+ send_interface="org.freedesktop.DBus.Peer"/>
+
+ <allow send_destination="org.freedesktop.timesync1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="Get"/>
+
+ <allow send_destination="org.freedesktop.timesync1"
+ send_interface="org.freedesktop.DBus.Properties"
+ send_member="GetAll"/>
+
+ <allow receive_sender="org.freedesktop.timesync1"/>
+ </policy>
+
+</busconfig>
diff --git a/src/timesync/org.freedesktop.timesync1.service b/src/timesync/org.freedesktop.timesync1.service
new file mode 100644
index 0000000..98878d6
--- /dev/null
+++ b/src/timesync/org.freedesktop.timesync1.service
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+[D-BUS Service]
+Name=org.freedesktop.timesync1
+Exec=/bin/false
+User=root
+SystemdService=dbus-org.freedesktop.timesync1.service
diff --git a/src/timesync/test-timesync.c b/src/timesync/test-timesync.c
new file mode 100644
index 0000000..31e91e7
--- /dev/null
+++ b/src/timesync/test-timesync.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/* Some unit tests for the helper functions in timesyncd. */
+
+#include "log.h"
+#include "macro.h"
+#include "timesyncd-conf.h"
+#include "tests.h"
+
+static void test_manager_parse_string(void) {
+ /* Make sure that NTP_SERVERS is configured to something
+ * that we can actually parse successfully. */
+
+ _cleanup_(manager_freep) Manager *m = NULL;
+
+ assert_se(manager_new(&m) == 0);
+
+ assert_se(!m->have_fallbacks);
+ assert_se(manager_parse_server_string(m, SERVER_FALLBACK, NTP_SERVERS) == 0);
+ assert_se(m->have_fallbacks);
+ assert_se(manager_parse_fallback_string(m, NTP_SERVERS) == 0);
+
+ assert_se(manager_parse_server_string(m, SERVER_SYSTEM, "time1.foobar.com time2.foobar.com axrfav.,avf..ra 12345..123") == 0);
+ assert_se(manager_parse_server_string(m, SERVER_FALLBACK, "time1.foobar.com time2.foobar.com axrfav.,avf..ra 12345..123") == 0);
+ assert_se(manager_parse_server_string(m, SERVER_LINK, "time1.foobar.com time2.foobar.com axrfav.,avf..ra 12345..123") == 0);
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_manager_parse_string();
+
+ return 0;
+}
diff --git a/src/timesync/timesyncd-bus.c b/src/timesync/timesyncd-bus.c
new file mode 100644
index 0000000..c5ad5fe
--- /dev/null
+++ b/src/timesync/timesyncd-bus.c
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-get-properties.h"
+#include "bus-internal.h"
+#include "bus-log-control-api.h"
+#include "bus-protocol.h"
+#include "bus-util.h"
+#include "in-addr-util.h"
+#include "log.h"
+#include "macro.h"
+#include "time-util.h"
+#include "timesyncd-bus.h"
+
+static int property_get_servers(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ServerName *p, **s = userdata;
+ int r;
+
+ assert(s);
+ assert(bus);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'a', "s");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(names, p, *s) {
+ r = sd_bus_message_append(reply, "s", p->string);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_current_server_name(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ServerName **s = userdata;
+
+ assert(s);
+ assert(bus);
+ assert(reply);
+
+ return sd_bus_message_append(reply, "s", *s ? (*s)->string : NULL);
+}
+
+static int property_get_current_server_address(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ServerAddress *a;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(userdata);
+
+ a = *(ServerAddress **) userdata;
+
+ if (!a)
+ return sd_bus_message_append(reply, "(iay)", AF_UNSPEC, 0);
+
+ assert(IN_SET(a->sockaddr.sa.sa_family, AF_INET, AF_INET6));
+
+ r = sd_bus_message_open_container(reply, 'r', "iay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "i", a->sockaddr.sa.sa_family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y',
+ a->sockaddr.sa.sa_family == AF_INET ? (void*) &a->sockaddr.in.sin_addr : (void*) &a->sockaddr.in6.sin6_addr,
+ FAMILY_ADDRESS_SIZE(a->sockaddr.sa.sa_family));
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
+static usec_t ntp_ts_short_to_usec(const struct ntp_ts_short *ts) {
+ return be16toh(ts->sec) * USEC_PER_SEC + (be16toh(ts->frac) * USEC_PER_SEC) / (usec_t) 0x10000ULL;
+}
+
+static usec_t ntp_ts_to_usec(const struct ntp_ts *ts) {
+ return (be32toh(ts->sec) - OFFSET_1900_1970) * USEC_PER_SEC + (be32toh(ts->frac) * USEC_PER_SEC) / (usec_t) 0x100000000ULL;
+}
+
+static int property_get_ntp_message(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ Manager *m = userdata;
+ int r;
+
+ assert(m);
+ assert(reply);
+
+ r = sd_bus_message_open_container(reply, 'r', "uuuuittayttttbtt");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "uuuuitt",
+ NTP_FIELD_LEAP(m->ntpmsg.field),
+ NTP_FIELD_VERSION(m->ntpmsg.field),
+ NTP_FIELD_MODE(m->ntpmsg.field),
+ m->ntpmsg.stratum,
+ m->ntpmsg.precision,
+ ntp_ts_short_to_usec(&m->ntpmsg.root_delay),
+ ntp_ts_short_to_usec(&m->ntpmsg.root_dispersion));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(reply, 'y', m->ntpmsg.refid, 4);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "ttttbtt",
+ timespec_load(&m->origin_time),
+ ntp_ts_to_usec(&m->ntpmsg.recv_time),
+ ntp_ts_to_usec(&m->ntpmsg.trans_time),
+ timespec_load(&m->dest_time),
+ m->spike,
+ m->packet_count,
+ (usec_t) (m->samples_jitter * USEC_PER_SEC));
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
+static const sd_bus_vtable manager_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_PROPERTY("LinkNTPServers", "as", property_get_servers, offsetof(Manager, link_servers), 0),
+ SD_BUS_PROPERTY("SystemNTPServers", "as", property_get_servers, offsetof(Manager, system_servers), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FallbackNTPServers", "as", property_get_servers, offsetof(Manager, fallback_servers), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ServerName", "s", property_get_current_server_name, offsetof(Manager, current_server_name), 0),
+ SD_BUS_PROPERTY("ServerAddress", "(iay)", property_get_current_server_address, offsetof(Manager, current_server_address), 0),
+ SD_BUS_PROPERTY("RootDistanceMaxUSec", "t", bus_property_get_usec, offsetof(Manager, max_root_distance_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PollIntervalMinUSec", "t", bus_property_get_usec, offsetof(Manager, poll_interval_min_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PollIntervalMaxUSec", "t", bus_property_get_usec, offsetof(Manager, poll_interval_max_usec), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PollIntervalUSec", "t", bus_property_get_usec, offsetof(Manager, poll_interval_usec), 0),
+ SD_BUS_PROPERTY("NTPMessage", "(uuuuittayttttbtt)", property_get_ntp_message, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Frequency", "x", NULL, offsetof(Manager, drift_freq), 0),
+
+ SD_BUS_VTABLE_END
+};
+
+int manager_connect_bus(Manager *m) {
+ int r;
+
+ assert(m);
+
+ if (m->bus)
+ return 0;
+
+ r = bus_open_system_watch_bind_with_description(&m->bus, "bus-api-timesync");
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to bus: %m");
+
+ r = sd_bus_add_object_vtable(m->bus, NULL, "/org/freedesktop/timesync1", "org.freedesktop.timesync1.Manager", manager_vtable, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add manager object vtable: %m");
+
+ r = bus_log_control_api_register(m->bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_request_name_async(m->bus, NULL, "org.freedesktop.timesync1", 0, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request name: %m");
+
+ r = sd_bus_attach_event(m->bus, m->event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+
+ return 0;
+}
diff --git a/src/timesync/timesyncd-bus.h b/src/timesync/timesyncd-bus.h
new file mode 100644
index 0000000..83db216
--- /dev/null
+++ b/src/timesync/timesyncd-bus.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "timesyncd-manager.h"
+
+int manager_connect_bus(Manager *m);
diff --git a/src/timesync/timesyncd-conf.c b/src/timesync/timesyncd-conf.c
new file mode 100644
index 0000000..2c31146
--- /dev/null
+++ b/src/timesync/timesyncd-conf.c
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "def.h"
+#include "dns-domain.h"
+#include "extract-word.h"
+#include "string-util.h"
+#include "timesyncd-conf.h"
+#include "timesyncd-manager.h"
+#include "timesyncd-server.h"
+
+int manager_parse_server_string(Manager *m, ServerType type, const char *string) {
+ ServerName *first;
+ int r;
+
+ assert(m);
+ assert(string);
+
+ first = type == SERVER_FALLBACK ? m->fallback_servers : m->system_servers;
+
+ if (type == SERVER_FALLBACK)
+ m->have_fallbacks = true;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ bool found = false;
+ ServerName *n;
+
+ r = extract_first_word(&string, &word, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse timesyncd server syntax \"%s\": %m", string);
+ if (r == 0)
+ break;
+
+ r = dns_name_is_valid_or_address(word);
+ if (r < 0)
+ return log_error_errno(r, "Failed to check validity of NTP server name or address '%s': %m", word);
+ if (r == 0) {
+ log_error("Invalid NTP server name or address, ignoring: %s", word);
+ continue;
+ }
+
+ /* Filter out duplicates */
+ LIST_FOREACH(names, n, first)
+ if (streq_ptr(n->string, word)) {
+ found = true;
+ break;
+ }
+
+ if (found)
+ continue;
+
+ r = server_name_new(m, NULL, type, word);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int manager_parse_fallback_string(Manager *m, const char *string) {
+ if (m->have_fallbacks)
+ return 0;
+
+ return manager_parse_server_string(m, SERVER_FALLBACK, string);
+}
+
+int config_parse_servers(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Manager *m = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue))
+ manager_flush_server_names(m, ltype);
+ else {
+ r = manager_parse_server_string(m, ltype, rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse NTP server string '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+int manager_parse_config_file(Manager *m) {
+ int r;
+
+ assert(m);
+
+ r = config_parse_many_nulstr(
+ PKGSYSCONFDIR "/timesyncd.conf",
+ CONF_PATHS_NULSTR("systemd/timesyncd.conf.d"),
+ "Time\0",
+ config_item_perf_lookup, timesyncd_gperf_lookup,
+ CONFIG_PARSE_WARN,
+ m,
+ NULL);
+ if (r < 0)
+ return r;
+
+ if (m->poll_interval_min_usec < 16 * USEC_PER_SEC) {
+ log_warning("Invalid PollIntervalMinSec=. Using default value.");
+ m->poll_interval_min_usec = NTP_POLL_INTERVAL_MIN_USEC;
+ }
+
+ if (m->poll_interval_max_usec < m->poll_interval_min_usec) {
+ log_warning("PollIntervalMaxSec= is smaller than PollIntervalMinSec=. Using default value.");
+ m->poll_interval_max_usec = MAX(NTP_POLL_INTERVAL_MAX_USEC, m->poll_interval_min_usec * 32);
+ }
+
+ return r;
+}
diff --git a/src/timesync/timesyncd-conf.h b/src/timesync/timesyncd-conf.h
new file mode 100644
index 0000000..d6b9060
--- /dev/null
+++ b/src/timesync/timesyncd-conf.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "conf-parser.h"
+#include "timesyncd-manager.h"
+
+const struct ConfigPerfItem* timesyncd_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+
+int manager_parse_server_string(Manager *m, ServerType type, const char *string);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_servers);
+
+int manager_parse_config_file(Manager *m);
+int manager_parse_fallback_string(Manager *m, const char *string);
diff --git a/src/timesync/timesyncd-gperf.gperf b/src/timesync/timesyncd-gperf.gperf
new file mode 100644
index 0000000..b502027
--- /dev/null
+++ b/src/timesync/timesyncd-gperf.gperf
@@ -0,0 +1,25 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include "conf-parser.h"
+#include "timesyncd-conf.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name timesyncd_gperf_hash
+%define lookup-function-name timesyncd_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Time.NTP, config_parse_servers, SERVER_SYSTEM, 0
+Time.Servers, config_parse_servers, SERVER_SYSTEM, 0
+Time.FallbackNTP, config_parse_servers, SERVER_FALLBACK, 0
+Time.RootDistanceMaxSec, config_parse_sec, 0, offsetof(Manager, max_root_distance_usec)
+Time.PollIntervalMinSec, config_parse_sec, 0, offsetof(Manager, poll_interval_min_usec)
+Time.PollIntervalMaxSec, config_parse_sec, 0, offsetof(Manager, poll_interval_max_usec)
diff --git a/src/timesync/timesyncd-manager.c b/src/timesync/timesyncd-manager.c
new file mode 100644
index 0000000..93ba4ef
--- /dev/null
+++ b/src/timesync/timesyncd-manager.c
@@ -0,0 +1,1118 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <math.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <resolv.h>
+#include <stdlib.h>
+#include <sys/timerfd.h>
+#include <sys/timex.h>
+#include <sys/types.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "list.h"
+#include "log.h"
+#include "network-util.h"
+#include "ratelimit.h"
+#include "resolve-private.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "timesyncd-conf.h"
+#include "timesyncd-manager.h"
+#include "util.h"
+
+#ifndef ADJ_SETOFFSET
+#define ADJ_SETOFFSET 0x0100 /* add 'time' to current time */
+#endif
+
+/* expected accuracy of time synchronization; used to adjust the poll interval */
+#define NTP_ACCURACY_SEC 0.2
+
+/*
+ * Maximum delta in seconds which the system clock is gradually adjusted
+ * (slewed) to approach the network time. Deltas larger that this are set by
+ * letting the system time jump. The kernel's limit for adjtime is 0.5s.
+ */
+#define NTP_MAX_ADJUST 0.4
+
+/* Default of maximum acceptable root distance in microseconds. */
+#define NTP_MAX_ROOT_DISTANCE (5 * USEC_PER_SEC)
+
+/* Maximum number of missed replies before selecting another source. */
+#define NTP_MAX_MISSED_REPLIES 2
+
+#define RETRY_USEC (30*USEC_PER_SEC)
+#define RATELIMIT_INTERVAL_USEC (10*USEC_PER_SEC)
+#define RATELIMIT_BURST 10
+
+#define TIMEOUT_USEC (10*USEC_PER_SEC)
+
+static int manager_arm_timer(Manager *m, usec_t next);
+static int manager_clock_watch_setup(Manager *m);
+static int manager_listen_setup(Manager *m);
+static void manager_listen_stop(Manager *m);
+
+static double ntp_ts_short_to_d(const struct ntp_ts_short *ts) {
+ return be16toh(ts->sec) + (be16toh(ts->frac) / 65536.0);
+}
+
+static double ntp_ts_to_d(const struct ntp_ts *ts) {
+ return be32toh(ts->sec) + ((double)be32toh(ts->frac) / UINT_MAX);
+}
+
+static double ts_to_d(const struct timespec *ts) {
+ return ts->tv_sec + (1.0e-9 * ts->tv_nsec);
+}
+
+static int manager_timeout(sd_event_source *source, usec_t usec, void *userdata) {
+ _cleanup_free_ char *pretty = NULL;
+ Manager *m = userdata;
+
+ assert(m);
+ assert(m->current_server_name);
+ assert(m->current_server_address);
+
+ server_address_pretty(m->current_server_address, &pretty);
+ log_info("Timed out waiting for reply from %s (%s).", strna(pretty), m->current_server_name->string);
+
+ return manager_connect(m);
+}
+
+static int manager_send_request(Manager *m) {
+ _cleanup_free_ char *pretty = NULL;
+ struct ntp_msg ntpmsg = {
+ /*
+ * "The client initializes the NTP message header, sends the request
+ * to the server, and strips the time of day from the Transmit
+ * Timestamp field of the reply. For this purpose, all the NTP
+ * header fields are set to 0, except the Mode, VN, and optional
+ * Transmit Timestamp fields."
+ */
+ .field = NTP_FIELD(0, 4, NTP_MODE_CLIENT),
+ };
+ ssize_t len;
+ int r;
+
+ assert(m);
+ assert(m->current_server_name);
+ assert(m->current_server_address);
+
+ m->event_timeout = sd_event_source_unref(m->event_timeout);
+
+ r = manager_listen_setup(m);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to set up connection socket: %m");
+
+ /*
+ * Set transmit timestamp, remember it; the server will send that back
+ * as the origin timestamp and we have an indication that this is the
+ * matching answer to our request.
+ *
+ * The actual value does not matter, We do not care about the correct
+ * NTP UINT_MAX fraction; we just pass the plain nanosecond value.
+ */
+ assert_se(clock_gettime(clock_boottime_or_monotonic(), &m->trans_time_mon) >= 0);
+ assert_se(clock_gettime(CLOCK_REALTIME, &m->trans_time) >= 0);
+ ntpmsg.trans_time.sec = htobe32(m->trans_time.tv_sec + OFFSET_1900_1970);
+ ntpmsg.trans_time.frac = htobe32(m->trans_time.tv_nsec);
+
+ server_address_pretty(m->current_server_address, &pretty);
+
+ len = sendto(m->server_socket, &ntpmsg, sizeof(ntpmsg), MSG_DONTWAIT, &m->current_server_address->sockaddr.sa, m->current_server_address->socklen);
+ if (len == sizeof(ntpmsg)) {
+ m->pending = true;
+ log_debug("Sent NTP request to %s (%s).", strna(pretty), m->current_server_name->string);
+ } else {
+ log_debug_errno(errno, "Sending NTP request to %s (%s) failed: %m", strna(pretty), m->current_server_name->string);
+ return manager_connect(m);
+ }
+
+ /* re-arm timer with increasing timeout, in case the packets never arrive back */
+ if (m->retry_interval == 0)
+ m->retry_interval = NTP_RETRY_INTERVAL_MIN_USEC;
+ else
+ m->retry_interval = MIN(m->retry_interval * 4/3, NTP_RETRY_INTERVAL_MAX_USEC);
+
+ r = manager_arm_timer(m, m->retry_interval);
+ if (r < 0)
+ return log_error_errno(r, "Failed to rearm timer: %m");
+
+ m->missed_replies++;
+ if (m->missed_replies > NTP_MAX_MISSED_REPLIES) {
+ r = sd_event_add_time(
+ m->event,
+ &m->event_timeout,
+ clock_boottime_or_monotonic(),
+ now(clock_boottime_or_monotonic()) + TIMEOUT_USEC, 0,
+ manager_timeout, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to arm timeout timer: %m");
+ }
+
+ return 0;
+}
+
+static int manager_timer(sd_event_source *source, usec_t usec, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+
+ return manager_send_request(m);
+}
+
+static int manager_arm_timer(Manager *m, usec_t next) {
+ int r;
+
+ assert(m);
+
+ if (next == 0) {
+ m->event_timer = sd_event_source_unref(m->event_timer);
+ return 0;
+ }
+
+ if (m->event_timer) {
+ r = sd_event_source_set_time_relative(m->event_timer, next);
+ if (r < 0)
+ return r;
+
+ return sd_event_source_set_enabled(m->event_timer, SD_EVENT_ONESHOT);
+ }
+
+ return sd_event_add_time_relative(
+ m->event,
+ &m->event_timer,
+ clock_boottime_or_monotonic(),
+ next, 0,
+ manager_timer, m);
+}
+
+static int manager_clock_watch(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+
+ /* rearm timer */
+ manager_clock_watch_setup(m);
+
+ /* skip our own jumps */
+ if (m->jumped) {
+ m->jumped = false;
+ return 0;
+ }
+
+ /* resync */
+ log_debug("System time changed. Resyncing.");
+ m->poll_resync = true;
+
+ return manager_send_request(m);
+}
+
+/* wake up when the system time changes underneath us */
+static int manager_clock_watch_setup(Manager *m) {
+ int r;
+
+ assert(m);
+
+ m->event_clock_watch = sd_event_source_unref(m->event_clock_watch);
+ safe_close(m->clock_watch_fd);
+
+ m->clock_watch_fd = time_change_fd();
+ if (m->clock_watch_fd < 0)
+ return log_error_errno(m->clock_watch_fd, "Failed to create timerfd: %m");
+
+ r = sd_event_add_io(m->event, &m->event_clock_watch, m->clock_watch_fd, EPOLLIN, manager_clock_watch, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create clock watch event source: %m");
+
+ return 0;
+}
+
+static int manager_adjust_clock(Manager *m, double offset, int leap_sec) {
+ struct timex tmx = {};
+ int r;
+
+ assert(m);
+
+ /*
+ * For small deltas, tell the kernel to gradually adjust the system
+ * clock to the NTP time, larger deltas are just directly set.
+ */
+ if (fabs(offset) < NTP_MAX_ADJUST) {
+ tmx.modes = ADJ_STATUS | ADJ_NANO | ADJ_OFFSET | ADJ_TIMECONST | ADJ_MAXERROR | ADJ_ESTERROR;
+ tmx.status = STA_PLL;
+ tmx.offset = offset * NSEC_PER_SEC;
+ tmx.constant = log2i(m->poll_interval_usec / USEC_PER_SEC) - 4;
+ tmx.maxerror = 0;
+ tmx.esterror = 0;
+ log_debug(" adjust (slew): %+.3f sec", offset);
+ } else {
+ tmx.modes = ADJ_STATUS | ADJ_NANO | ADJ_SETOFFSET | ADJ_MAXERROR | ADJ_ESTERROR;
+
+ /* ADJ_NANO uses nanoseconds in the microseconds field */
+ tmx.time.tv_sec = (long)offset;
+ tmx.time.tv_usec = (offset - tmx.time.tv_sec) * NSEC_PER_SEC;
+ tmx.maxerror = 0;
+ tmx.esterror = 0;
+
+ /* the kernel expects -0.3s as {-1, 7000.000.000} */
+ if (tmx.time.tv_usec < 0) {
+ tmx.time.tv_sec -= 1;
+ tmx.time.tv_usec += NSEC_PER_SEC;
+ }
+
+ m->jumped = true;
+ log_debug(" adjust (jump): %+.3f sec", offset);
+ }
+
+ /*
+ * An unset STA_UNSYNC will enable the kernel's 11-minute mode,
+ * which syncs the system time periodically to the RTC.
+ *
+ * In case the RTC runs in local time, never touch the RTC,
+ * we have no way to properly handle daylight saving changes and
+ * mobile devices moving between time zones.
+ */
+ if (m->rtc_local_time)
+ tmx.status |= STA_UNSYNC;
+
+ switch (leap_sec) {
+ case 1:
+ tmx.status |= STA_INS;
+ break;
+ case -1:
+ tmx.status |= STA_DEL;
+ break;
+ }
+
+ r = clock_adjtime(CLOCK_REALTIME, &tmx);
+ if (r < 0)
+ return -errno;
+
+ /* If touch fails, there isn't much we can do. Maybe it'll work next time. */
+ (void) touch("/var/lib/systemd/timesync/clock");
+ (void) touch("/run/systemd/timesync/synchronized");
+
+ m->drift_freq = tmx.freq;
+
+ log_debug(" status : %04i %s\n"
+ " time now : %"PRI_TIME".%03"PRI_USEC"\n"
+ " constant : %"PRI_TIMEX"\n"
+ " offset : %+.3f sec\n"
+ " freq offset : %+"PRI_TIMEX" (%+"PRI_TIMEX" ppm)\n",
+ tmx.status, tmx.status & STA_UNSYNC ? "unsync" : "sync",
+ tmx.time.tv_sec, tmx.time.tv_usec / NSEC_PER_MSEC,
+ tmx.constant,
+ (double)tmx.offset / NSEC_PER_SEC,
+ tmx.freq, tmx.freq / 65536);
+
+ return 0;
+}
+
+static bool manager_sample_spike_detection(Manager *m, double offset, double delay) {
+ unsigned i, idx_cur, idx_new, idx_min;
+ double jitter;
+ double j;
+
+ assert(m);
+
+ m->packet_count++;
+
+ /* ignore initial sample */
+ if (m->packet_count == 1)
+ return false;
+
+ /* store the current data in our samples array */
+ idx_cur = m->samples_idx;
+ idx_new = (idx_cur + 1) % ELEMENTSOF(m->samples);
+ m->samples_idx = idx_new;
+ m->samples[idx_new].offset = offset;
+ m->samples[idx_new].delay = delay;
+
+ /* calculate new jitter value from the RMS differences relative to the lowest delay sample */
+ jitter = m->samples_jitter;
+ for (idx_min = idx_cur, i = 0; i < ELEMENTSOF(m->samples); i++)
+ if (m->samples[i].delay > 0 && m->samples[i].delay < m->samples[idx_min].delay)
+ idx_min = i;
+
+ j = 0;
+ for (i = 0; i < ELEMENTSOF(m->samples); i++)
+ j += pow(m->samples[i].offset - m->samples[idx_min].offset, 2);
+ m->samples_jitter = sqrt(j / (ELEMENTSOF(m->samples) - 1));
+
+ /* ignore samples when resyncing */
+ if (m->poll_resync)
+ return false;
+
+ /* always accept offset if we are farther off than the round-trip delay */
+ if (fabs(offset) > delay)
+ return false;
+
+ /* we need a few samples before looking at them */
+ if (m->packet_count < 4)
+ return false;
+
+ /* do not accept anything worse than the maximum possible error of the best sample */
+ if (fabs(offset) > m->samples[idx_min].delay)
+ return true;
+
+ /* compare the difference between the current offset to the previous offset and jitter */
+ return fabs(offset - m->samples[idx_cur].offset) > 3 * jitter;
+}
+
+static void manager_adjust_poll(Manager *m, double offset, bool spike) {
+ assert(m);
+
+ if (m->poll_resync) {
+ m->poll_interval_usec = m->poll_interval_min_usec;
+ m->poll_resync = false;
+ return;
+ }
+
+ /* set to minimal poll interval */
+ if (!spike && fabs(offset) > NTP_ACCURACY_SEC) {
+ m->poll_interval_usec = m->poll_interval_min_usec;
+ return;
+ }
+
+ /* increase polling interval */
+ if (fabs(offset) < NTP_ACCURACY_SEC * 0.25) {
+ if (m->poll_interval_usec < m->poll_interval_max_usec)
+ m->poll_interval_usec *= 2;
+ return;
+ }
+
+ /* decrease polling interval */
+ if (spike || fabs(offset) > NTP_ACCURACY_SEC * 0.75) {
+ if (m->poll_interval_usec > m->poll_interval_min_usec)
+ m->poll_interval_usec /= 2;
+ return;
+ }
+}
+
+static int manager_receive_response(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+ struct ntp_msg ntpmsg;
+
+ struct iovec iov = {
+ .iov_base = &ntpmsg,
+ .iov_len = sizeof(ntpmsg),
+ };
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct timeval))) control;
+ union sockaddr_union server_addr;
+ struct msghdr msghdr = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_name = &server_addr,
+ .msg_namelen = sizeof(server_addr),
+ };
+ struct cmsghdr *cmsg;
+ struct timespec *recv_time = NULL;
+ ssize_t len;
+ double origin, receive, trans, dest;
+ double delay, offset;
+ double root_distance;
+ bool spike;
+ int leap_sec;
+ int r;
+
+ assert(source);
+ assert(m);
+
+ if (revents & (EPOLLHUP|EPOLLERR)) {
+ log_warning("Server connection returned error.");
+ return manager_connect(m);
+ }
+
+ len = recvmsg_safe(fd, &msghdr, MSG_DONTWAIT);
+ if (len == -EAGAIN)
+ return 0;
+ if (len < 0) {
+ log_warning_errno(len, "Error receiving message, disconnecting: %m");
+ return manager_connect(m);
+ }
+
+ /* Too short or too long packet? */
+ if (iov.iov_len < sizeof(struct ntp_msg) || (msghdr.msg_flags & MSG_TRUNC)) {
+ log_warning("Invalid response from server. Disconnecting.");
+ return manager_connect(m);
+ }
+
+ if (!m->current_server_name ||
+ !m->current_server_address ||
+ !sockaddr_equal(&server_addr, &m->current_server_address->sockaddr)) {
+ log_debug("Response from unknown server.");
+ return 0;
+ }
+
+ CMSG_FOREACH(cmsg, &msghdr) {
+ if (cmsg->cmsg_level != SOL_SOCKET)
+ continue;
+
+ switch (cmsg->cmsg_type) {
+ case SCM_TIMESTAMPNS:
+ recv_time = (struct timespec *) CMSG_DATA(cmsg);
+ break;
+ }
+ }
+ if (!recv_time)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid packet timestamp.");
+
+ if (!m->pending) {
+ log_debug("Unexpected reply. Ignoring.");
+ return 0;
+ }
+
+ m->missed_replies = 0;
+
+ /* check our "time cookie" (we just stored nanoseconds in the fraction field) */
+ if (be32toh(ntpmsg.origin_time.sec) != m->trans_time.tv_sec + OFFSET_1900_1970 ||
+ be32toh(ntpmsg.origin_time.frac) != (unsigned long) m->trans_time.tv_nsec) {
+ log_debug("Invalid reply; not our transmit time. Ignoring.");
+ return 0;
+ }
+
+ m->event_timeout = sd_event_source_unref(m->event_timeout);
+
+ if (be32toh(ntpmsg.recv_time.sec) < TIME_EPOCH + OFFSET_1900_1970 ||
+ be32toh(ntpmsg.trans_time.sec) < TIME_EPOCH + OFFSET_1900_1970) {
+ log_debug("Invalid reply, returned times before epoch. Ignoring.");
+ return manager_connect(m);
+ }
+
+ if (NTP_FIELD_LEAP(ntpmsg.field) == NTP_LEAP_NOTINSYNC ||
+ ntpmsg.stratum == 0 || ntpmsg.stratum >= 16) {
+ log_debug("Server is not synchronized. Disconnecting.");
+ return manager_connect(m);
+ }
+
+ if (!IN_SET(NTP_FIELD_VERSION(ntpmsg.field), 3, 4)) {
+ log_debug("Response NTPv%d. Disconnecting.", NTP_FIELD_VERSION(ntpmsg.field));
+ return manager_connect(m);
+ }
+
+ if (NTP_FIELD_MODE(ntpmsg.field) != NTP_MODE_SERVER) {
+ log_debug("Unsupported mode %d. Disconnecting.", NTP_FIELD_MODE(ntpmsg.field));
+ return manager_connect(m);
+ }
+
+ root_distance = ntp_ts_short_to_d(&ntpmsg.root_delay) / 2 + ntp_ts_short_to_d(&ntpmsg.root_dispersion);
+ if (root_distance > (double) m->max_root_distance_usec / (double) USEC_PER_SEC) {
+ log_info("Server has too large root distance. Disconnecting.");
+ return manager_connect(m);
+ }
+
+ /* valid packet */
+ m->pending = false;
+ m->retry_interval = 0;
+
+ /* Stop listening */
+ manager_listen_stop(m);
+
+ /* announce leap seconds */
+ if (NTP_FIELD_LEAP(ntpmsg.field) & NTP_LEAP_PLUSSEC)
+ leap_sec = 1;
+ else if (NTP_FIELD_LEAP(ntpmsg.field) & NTP_LEAP_MINUSSEC)
+ leap_sec = -1;
+ else
+ leap_sec = 0;
+
+ /*
+ * "Timestamp Name ID When Generated
+ * ------------------------------------------------------------
+ * Originate Timestamp T1 time request sent by client
+ * Receive Timestamp T2 time request received by server
+ * Transmit Timestamp T3 time reply sent by server
+ * Destination Timestamp T4 time reply received by client
+ *
+ * The round-trip delay, d, and system clock offset, t, are defined as:
+ * d = (T4 - T1) - (T3 - T2) t = ((T2 - T1) + (T3 - T4)) / 2"
+ */
+ origin = ts_to_d(&m->trans_time) + OFFSET_1900_1970;
+ receive = ntp_ts_to_d(&ntpmsg.recv_time);
+ trans = ntp_ts_to_d(&ntpmsg.trans_time);
+ dest = ts_to_d(recv_time) + OFFSET_1900_1970;
+
+ offset = ((receive - origin) + (trans - dest)) / 2;
+ delay = (dest - origin) - (trans - receive);
+
+ spike = manager_sample_spike_detection(m, offset, delay);
+
+ manager_adjust_poll(m, offset, spike);
+
+ log_debug("NTP response:\n"
+ " leap : %u\n"
+ " version : %u\n"
+ " mode : %u\n"
+ " stratum : %u\n"
+ " precision : %.6f sec (%d)\n"
+ " root distance: %.6f sec\n"
+ " reference : %.4s\n"
+ " origin : %.3f\n"
+ " receive : %.3f\n"
+ " transmit : %.3f\n"
+ " dest : %.3f\n"
+ " offset : %+.3f sec\n"
+ " delay : %+.3f sec\n"
+ " packet count : %"PRIu64"\n"
+ " jitter : %.3f%s\n"
+ " poll interval: " USEC_FMT "\n",
+ NTP_FIELD_LEAP(ntpmsg.field),
+ NTP_FIELD_VERSION(ntpmsg.field),
+ NTP_FIELD_MODE(ntpmsg.field),
+ ntpmsg.stratum,
+ exp2(ntpmsg.precision), ntpmsg.precision,
+ root_distance,
+ ntpmsg.stratum == 1 ? ntpmsg.refid : "n/a",
+ origin - OFFSET_1900_1970,
+ receive - OFFSET_1900_1970,
+ trans - OFFSET_1900_1970,
+ dest - OFFSET_1900_1970,
+ offset, delay,
+ m->packet_count,
+ m->samples_jitter, spike ? " spike" : "",
+ m->poll_interval_usec / USEC_PER_SEC);
+
+ if (!spike) {
+ m->sync = true;
+ r = manager_adjust_clock(m, offset, leap_sec);
+ if (r < 0)
+ log_error_errno(r, "Failed to call clock_adjtime(): %m");
+ }
+
+ /* Save NTP response */
+ m->ntpmsg = ntpmsg;
+ m->origin_time = m->trans_time;
+ m->dest_time = *recv_time;
+ m->spike = spike;
+
+ log_debug("interval/delta/delay/jitter/drift " USEC_FMT "s/%+.3fs/%.3fs/%.3fs/%+"PRIi64"ppm%s",
+ m->poll_interval_usec / USEC_PER_SEC, offset, delay, m->samples_jitter, m->drift_freq / 65536,
+ spike ? " (ignored)" : "");
+
+ (void) sd_bus_emit_properties_changed(m->bus, "/org/freedesktop/timesync1", "org.freedesktop.timesync1.Manager", "NTPMessage", NULL);
+
+ if (!m->good) {
+ _cleanup_free_ char *pretty = NULL;
+
+ m->good = true;
+
+ server_address_pretty(m->current_server_address, &pretty);
+ /* "Initial", as further successful syncs will not be logged. */
+ log_info("Initial synchronization to time server %s (%s).", strna(pretty), m->current_server_name->string);
+ sd_notifyf(false, "STATUS=Initial synchronization to time server %s (%s).", strna(pretty), m->current_server_name->string);
+ }
+
+ r = manager_arm_timer(m, m->poll_interval_usec);
+ if (r < 0)
+ return log_error_errno(r, "Failed to rearm timer: %m");
+
+ return 0;
+}
+
+static int manager_listen_setup(Manager *m) {
+ union sockaddr_union addr = {};
+ int r;
+
+ assert(m);
+
+ if (m->server_socket >= 0)
+ return 0;
+
+ assert(!m->event_receive);
+ assert(m->current_server_address);
+
+ addr.sa.sa_family = m->current_server_address->sockaddr.sa.sa_family;
+
+ m->server_socket = socket(addr.sa.sa_family, SOCK_DGRAM | SOCK_CLOEXEC, 0);
+ if (m->server_socket < 0)
+ return -errno;
+
+ r = bind(m->server_socket, &addr.sa, m->current_server_address->socklen);
+ if (r < 0)
+ return -errno;
+
+ r = setsockopt_int(m->server_socket, SOL_SOCKET, SO_TIMESTAMPNS, true);
+ if (r < 0)
+ return r;
+
+ if (addr.sa.sa_family == AF_INET)
+ (void) setsockopt_int(m->server_socket, IPPROTO_IP, IP_TOS, IPTOS_LOWDELAY);
+
+ return sd_event_add_io(m->event, &m->event_receive, m->server_socket, EPOLLIN, manager_receive_response, m);
+}
+
+static void manager_listen_stop(Manager *m) {
+ assert(m);
+
+ m->event_receive = sd_event_source_unref(m->event_receive);
+ m->server_socket = safe_close(m->server_socket);
+}
+
+static int manager_begin(Manager *m) {
+ _cleanup_free_ char *pretty = NULL;
+ int r;
+
+ assert(m);
+ assert_return(m->current_server_name, -EHOSTUNREACH);
+ assert_return(m->current_server_address, -EHOSTUNREACH);
+
+ m->good = false;
+ m->missed_replies = NTP_MAX_MISSED_REPLIES;
+ if (m->poll_interval_usec == 0)
+ m->poll_interval_usec = m->poll_interval_min_usec;
+
+ server_address_pretty(m->current_server_address, &pretty);
+ log_debug("Connecting to time server %s (%s).", strna(pretty), m->current_server_name->string);
+ sd_notifyf(false, "STATUS=Connecting to time server %s (%s).", strna(pretty), m->current_server_name->string);
+
+ r = manager_clock_watch_setup(m);
+ if (r < 0)
+ return r;
+
+ return manager_send_request(m);
+}
+
+void manager_set_server_name(Manager *m, ServerName *n) {
+ assert(m);
+
+ if (m->current_server_name == n)
+ return;
+
+ m->current_server_name = n;
+ m->current_server_address = NULL;
+
+ manager_disconnect(m);
+
+ if (n)
+ log_debug("Selected server %s.", n->string);
+}
+
+void manager_set_server_address(Manager *m, ServerAddress *a) {
+ assert(m);
+
+ if (m->current_server_address == a)
+ return;
+
+ m->current_server_address = a;
+ /* If a is NULL, we are just clearing the address, without
+ * changing the name. Keep the existing name in that case. */
+ if (a)
+ m->current_server_name = a->name;
+
+ manager_disconnect(m);
+
+ if (a) {
+ _cleanup_free_ char *pretty = NULL;
+ server_address_pretty(a, &pretty);
+ log_debug("Selected address %s of server %s.", strna(pretty), a->name->string);
+ }
+}
+
+static int manager_resolve_handler(sd_resolve_query *q, int ret, const struct addrinfo *ai, Manager *m) {
+ int r;
+
+ assert(q);
+ assert(m);
+ assert(m->current_server_name);
+
+ m->resolve_query = sd_resolve_query_unref(m->resolve_query);
+
+ if (ret != 0) {
+ log_debug("Failed to resolve %s: %s", m->current_server_name->string, gai_strerror(ret));
+
+ /* Try next host */
+ return manager_connect(m);
+ }
+
+ for (; ai; ai = ai->ai_next) {
+ _cleanup_free_ char *pretty = NULL;
+ ServerAddress *a;
+
+ assert(ai->ai_addr);
+ assert(ai->ai_addrlen >= offsetof(struct sockaddr, sa_data));
+
+ if (!IN_SET(ai->ai_addr->sa_family, AF_INET, AF_INET6)) {
+ log_warning("Unsuitable address protocol for %s", m->current_server_name->string);
+ continue;
+ }
+
+ r = server_address_new(m->current_server_name, &a, (const union sockaddr_union*) ai->ai_addr, ai->ai_addrlen);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add server address: %m");
+
+ server_address_pretty(a, &pretty);
+ log_debug("Resolved address %s for %s.", pretty, m->current_server_name->string);
+ }
+
+ if (!m->current_server_name->addresses) {
+ log_error("Failed to find suitable address for host %s.", m->current_server_name->string);
+
+ /* Try next host */
+ return manager_connect(m);
+ }
+
+ manager_set_server_address(m, m->current_server_name->addresses);
+
+ return manager_begin(m);
+}
+
+static int manager_retry_connect(sd_event_source *source, usec_t usec, void *userdata) {
+ Manager *m = userdata;
+
+ assert(m);
+
+ return manager_connect(m);
+}
+
+int manager_connect(Manager *m) {
+ int r;
+
+ assert(m);
+
+ manager_disconnect(m);
+
+ m->event_retry = sd_event_source_unref(m->event_retry);
+ if (!ratelimit_below(&m->ratelimit)) {
+ log_debug("Delaying attempts to contact servers.");
+
+ r = sd_event_add_time_relative(m->event, &m->event_retry, clock_boottime_or_monotonic(), RETRY_USEC, 0, manager_retry_connect, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create retry timer: %m");
+
+ return 0;
+ }
+
+ /* If we already are operating on some address, switch to the
+ * next one. */
+ if (m->current_server_address && m->current_server_address->addresses_next)
+ manager_set_server_address(m, m->current_server_address->addresses_next);
+ else {
+ struct addrinfo hints = {
+ .ai_flags = AI_NUMERICSERV|AI_ADDRCONFIG,
+ .ai_socktype = SOCK_DGRAM,
+ };
+
+ /* Hmm, we are through all addresses, let's look for the next host instead */
+ if (m->current_server_name && m->current_server_name->names_next)
+ manager_set_server_name(m, m->current_server_name->names_next);
+ else {
+ ServerName *f;
+ bool restart = true;
+
+ /* Our current server name list is exhausted,
+ * let's find the next one to iterate. First
+ * we try the system list, then the link list.
+ * After having processed the link list we
+ * jump back to the system list. However, if
+ * both lists are empty, we change to the
+ * fallback list. */
+ if (!m->current_server_name || m->current_server_name->type == SERVER_LINK) {
+ f = m->system_servers;
+ if (!f)
+ f = m->link_servers;
+ } else {
+ f = m->link_servers;
+ if (!f)
+ f = m->system_servers;
+ else
+ restart = false;
+ }
+
+ if (!f)
+ f = m->fallback_servers;
+
+ if (!f) {
+ manager_set_server_name(m, NULL);
+ log_debug("No server found.");
+ return 0;
+ }
+
+ if (restart && !m->exhausted_servers && m->poll_interval_usec) {
+ log_debug("Waiting after exhausting servers.");
+ r = sd_event_add_time_relative(m->event, &m->event_retry, clock_boottime_or_monotonic(), m->poll_interval_usec, 0, manager_retry_connect, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create retry timer: %m");
+
+ m->exhausted_servers = true;
+
+ /* Increase the polling interval */
+ if (m->poll_interval_usec < m->poll_interval_max_usec)
+ m->poll_interval_usec *= 2;
+
+ return 0;
+ }
+
+ m->exhausted_servers = false;
+
+ manager_set_server_name(m, f);
+ }
+
+ /* Tell the resolver to reread /etc/resolv.conf, in
+ * case it changed. */
+ res_init();
+
+ /* Flush out any previously resolved addresses */
+ server_name_flush_addresses(m->current_server_name);
+
+ log_debug("Resolving %s...", m->current_server_name->string);
+
+ r = resolve_getaddrinfo(m->resolve, &m->resolve_query, m->current_server_name->string, "123", &hints, manager_resolve_handler, NULL, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create resolver: %m");
+
+ return 1;
+ }
+
+ r = manager_begin(m);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+void manager_disconnect(Manager *m) {
+ assert(m);
+
+ m->resolve_query = sd_resolve_query_unref(m->resolve_query);
+
+ m->event_timer = sd_event_source_unref(m->event_timer);
+
+ manager_listen_stop(m);
+
+ m->event_clock_watch = sd_event_source_unref(m->event_clock_watch);
+ m->clock_watch_fd = safe_close(m->clock_watch_fd);
+
+ m->event_timeout = sd_event_source_unref(m->event_timeout);
+
+ sd_notifyf(false, "STATUS=Idle.");
+}
+
+void manager_flush_server_names(Manager *m, ServerType t) {
+ assert(m);
+
+ if (t == SERVER_SYSTEM)
+ while (m->system_servers)
+ server_name_free(m->system_servers);
+
+ if (t == SERVER_LINK)
+ while (m->link_servers)
+ server_name_free(m->link_servers);
+
+ if (t == SERVER_FALLBACK)
+ while (m->fallback_servers)
+ server_name_free(m->fallback_servers);
+}
+
+void manager_free(Manager *m) {
+ if (!m)
+ return;
+
+ manager_disconnect(m);
+ manager_flush_server_names(m, SERVER_SYSTEM);
+ manager_flush_server_names(m, SERVER_LINK);
+ manager_flush_server_names(m, SERVER_FALLBACK);
+
+ sd_event_source_unref(m->event_retry);
+
+ sd_event_source_unref(m->network_event_source);
+ sd_network_monitor_unref(m->network_monitor);
+
+ sd_resolve_unref(m->resolve);
+ sd_event_unref(m->event);
+
+ sd_bus_flush_close_unref(m->bus);
+
+ free(m);
+}
+
+static int manager_network_read_link_servers(Manager *m) {
+ _cleanup_strv_free_ char **ntp = NULL;
+ ServerName *n, *nx;
+ char **i;
+ bool changed = false;
+ int r;
+
+ assert(m);
+
+ r = sd_network_get_ntp(&ntp);
+ if (r < 0) {
+ if (r == -ENOMEM)
+ log_oom();
+ else
+ log_debug_errno(r, "Failed to get link NTP servers: %m");
+ goto clear;
+ }
+
+ LIST_FOREACH(names, n, m->link_servers)
+ n->marked = true;
+
+ STRV_FOREACH(i, ntp) {
+ bool found = false;
+
+ r = dns_name_is_valid_or_address(*i);
+ if (r < 0) {
+ log_error_errno(r, "Failed to check validity of NTP server name or address '%s': %m", *i);
+ goto clear;
+ } else if (r == 0) {
+ log_error("Invalid NTP server name or address, ignoring: %s", *i);
+ continue;
+ }
+
+ LIST_FOREACH(names, n, m->link_servers)
+ if (streq(n->string, *i)) {
+ n->marked = false;
+ found = true;
+ break;
+ }
+
+ if (!found) {
+ r = server_name_new(m, NULL, SERVER_LINK, *i);
+ if (r < 0) {
+ log_oom();
+ goto clear;
+ }
+
+ changed = true;
+ }
+ }
+
+ LIST_FOREACH_SAFE(names, n, nx, m->link_servers)
+ if (n->marked) {
+ server_name_free(n);
+ changed = true;
+ }
+
+ return changed;
+
+clear:
+ manager_flush_server_names(m, SERVER_LINK);
+ return r;
+}
+
+static bool manager_is_connected(Manager *m) {
+ /* Return true when the manager is sending a request, resolving a server name, or
+ * in a poll interval. */
+ return m->server_socket >= 0 || m->resolve_query || m->event_timer;
+}
+
+static int manager_network_event_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+ bool changed, connected, online;
+ int r;
+
+ assert(m);
+
+ sd_network_monitor_flush(m->network_monitor);
+
+ /* When manager_network_read_link_servers() failed, we assume that the servers are changed. */
+ changed = manager_network_read_link_servers(m);
+
+ /* check if the machine is online */
+ online = network_is_online();
+
+ /* check if the client is currently connected */
+ connected = manager_is_connected(m);
+
+ if (connected && !online) {
+ log_info("No network connectivity, watching for changes.");
+ manager_disconnect(m);
+
+ } else if ((!connected || changed) && online) {
+ log_info("Network configuration changed, trying to establish connection.");
+
+ if (m->current_server_address)
+ r = manager_begin(m);
+ else
+ r = manager_connect(m);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int manager_network_monitor_listen(Manager *m) {
+ int r, fd, events;
+
+ assert(m);
+
+ r = sd_network_monitor_new(&m->network_monitor, NULL);
+ if (r == -ENOENT) {
+ log_info("systemd does not appear to be running, not listening for systemd-networkd events.");
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ fd = sd_network_monitor_get_fd(m->network_monitor);
+ if (fd < 0)
+ return fd;
+
+ events = sd_network_monitor_get_events(m->network_monitor);
+ if (events < 0)
+ return events;
+
+ r = sd_event_add_io(m->event, &m->network_event_source, fd, events, manager_network_event_handler, m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int manager_new(Manager **ret) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ int r;
+
+ assert(ret);
+
+ m = new0(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ m->max_root_distance_usec = NTP_MAX_ROOT_DISTANCE;
+ m->poll_interval_min_usec = NTP_POLL_INTERVAL_MIN_USEC;
+ m->poll_interval_max_usec = NTP_POLL_INTERVAL_MAX_USEC;
+
+ m->server_socket = m->clock_watch_fd = -1;
+
+ m->ratelimit = (RateLimit) { RATELIMIT_INTERVAL_USEC, RATELIMIT_BURST };
+
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
+ (void) sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
+
+ (void) sd_event_set_watchdog(m->event, true);
+
+ r = sd_resolve_default(&m->resolve);
+ if (r < 0)
+ return r;
+
+ r = sd_resolve_attach_event(m->resolve, m->event, 0);
+ if (r < 0)
+ return r;
+
+ r = manager_network_monitor_listen(m);
+ if (r < 0)
+ return r;
+
+ (void) manager_network_read_link_servers(m);
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
diff --git a/src/timesync/timesyncd-manager.h b/src/timesync/timesyncd-manager.h
new file mode 100644
index 0000000..940a88e
--- /dev/null
+++ b/src/timesync/timesyncd-manager.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/timex.h>
+
+#include "sd-bus.h"
+#include "sd-event.h"
+#include "sd-network.h"
+#include "sd-resolve.h"
+
+#include "list.h"
+#include "ratelimit.h"
+#include "time-util.h"
+#include "timesyncd-ntp-message.h"
+
+typedef struct Manager Manager;
+
+#include "timesyncd-server.h"
+
+/*
+ * "A client MUST NOT under any conditions use a poll interval less
+ * than 15 seconds."
+ */
+#define NTP_POLL_INTERVAL_MIN_USEC (32 * USEC_PER_SEC)
+#define NTP_POLL_INTERVAL_MAX_USEC (2048 * USEC_PER_SEC)
+
+#define NTP_RETRY_INTERVAL_MIN_USEC (15 * USEC_PER_SEC)
+#define NTP_RETRY_INTERVAL_MAX_USEC (6 * 60 * USEC_PER_SEC) /* 6 minutes */
+
+struct Manager {
+ sd_bus *bus;
+ sd_event *event;
+ sd_resolve *resolve;
+
+ LIST_HEAD(ServerName, system_servers);
+ LIST_HEAD(ServerName, link_servers);
+ LIST_HEAD(ServerName, fallback_servers);
+
+ bool have_fallbacks:1;
+
+ RateLimit ratelimit;
+ bool exhausted_servers;
+
+ /* network */
+ sd_event_source *network_event_source;
+ sd_network_monitor *network_monitor;
+
+ /* peer */
+ sd_resolve_query *resolve_query;
+ sd_event_source *event_receive;
+ ServerName *current_server_name;
+ ServerAddress *current_server_address;
+ int server_socket;
+ int missed_replies;
+ uint64_t packet_count;
+ sd_event_source *event_timeout;
+ bool good;
+
+ /* last sent packet */
+ struct timespec trans_time_mon;
+ struct timespec trans_time;
+ usec_t retry_interval;
+ bool pending;
+
+ /* poll timer */
+ sd_event_source *event_timer;
+ usec_t poll_interval_usec;
+ usec_t poll_interval_min_usec;
+ usec_t poll_interval_max_usec;
+ bool poll_resync;
+
+ /* history data */
+ struct {
+ double offset;
+ double delay;
+ } samples[8];
+ unsigned samples_idx;
+ double samples_jitter;
+ usec_t max_root_distance_usec;
+
+ /* last change */
+ bool jumped;
+ bool sync;
+ int64_t drift_freq;
+
+ /* watch for time changes */
+ sd_event_source *event_clock_watch;
+ int clock_watch_fd;
+
+ /* Retry connections */
+ sd_event_source *event_retry;
+
+ /* RTC runs in local time, leave it alone */
+ bool rtc_local_time;
+
+ /* NTP response */
+ struct ntp_msg ntpmsg;
+ struct timespec origin_time, dest_time;
+ bool spike;
+};
+
+int manager_new(Manager **ret);
+void manager_free(Manager *m);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
+
+void manager_set_server_name(Manager *m, ServerName *n);
+void manager_set_server_address(Manager *m, ServerAddress *a);
+void manager_flush_server_names(Manager *m, ServerType t);
+
+int manager_connect(Manager *m);
+void manager_disconnect(Manager *m);
diff --git a/src/timesync/timesyncd-ntp-message.h b/src/timesync/timesyncd-ntp-message.h
new file mode 100644
index 0000000..76ed9ec
--- /dev/null
+++ b/src/timesync/timesyncd-ntp-message.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sparse-endian.h"
+
+/* NTP protocol, packet header */
+#define NTP_LEAP_PLUSSEC 1
+#define NTP_LEAP_MINUSSEC 2
+#define NTP_LEAP_NOTINSYNC 3
+#define NTP_MODE_CLIENT 3
+#define NTP_MODE_SERVER 4
+#define NTP_FIELD_LEAP(f) (((f) >> 6) & 3)
+#define NTP_FIELD_VERSION(f) (((f) >> 3) & 7)
+#define NTP_FIELD_MODE(f) ((f) & 7)
+#define NTP_FIELD(l, v, m) (((l) << 6) | ((v) << 3) | (m))
+
+/*
+ * "NTP timestamps are represented as a 64-bit unsigned fixed-point number,
+ * in seconds relative to 0h on 1 January 1900."
+ */
+#define OFFSET_1900_1970 UINT64_C(2208988800)
+
+struct ntp_ts {
+ be32_t sec;
+ be32_t frac;
+} _packed_;
+
+struct ntp_ts_short {
+ be16_t sec;
+ be16_t frac;
+} _packed_;
+
+struct ntp_msg {
+ uint8_t field;
+ uint8_t stratum;
+ int8_t poll;
+ int8_t precision;
+ struct ntp_ts_short root_delay;
+ struct ntp_ts_short root_dispersion;
+ char refid[4];
+ struct ntp_ts reference_time;
+ struct ntp_ts origin_time;
+ struct ntp_ts recv_time;
+ struct ntp_ts trans_time;
+} _packed_;
diff --git a/src/timesync/timesyncd-server.c b/src/timesync/timesyncd-server.c
new file mode 100644
index 0000000..f7ec317
--- /dev/null
+++ b/src/timesync/timesyncd-server.c
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "timesyncd-server.h"
+
+int server_address_new(
+ ServerName *n,
+ ServerAddress **ret,
+ const union sockaddr_union *sockaddr,
+ socklen_t socklen) {
+
+ ServerAddress *a, *tail;
+
+ assert(n);
+ assert(sockaddr);
+ assert(socklen >= offsetof(struct sockaddr, sa_data));
+ assert(socklen <= sizeof(union sockaddr_union));
+
+ a = new0(ServerAddress, 1);
+ if (!a)
+ return -ENOMEM;
+
+ memcpy(&a->sockaddr, sockaddr, socklen);
+ a->socklen = socklen;
+
+ LIST_FIND_TAIL(addresses, n->addresses, tail);
+ LIST_INSERT_AFTER(addresses, n->addresses, tail, a);
+ a->name = n;
+
+ if (ret)
+ *ret = a;
+
+ return 0;
+}
+
+ServerAddress* server_address_free(ServerAddress *a) {
+ if (!a)
+ return NULL;
+
+ if (a->name) {
+ LIST_REMOVE(addresses, a->name->addresses, a);
+
+ if (a->name->manager && a->name->manager->current_server_address == a)
+ manager_set_server_address(a->name->manager, NULL);
+ }
+
+ return mfree(a);
+}
+
+int server_name_new(
+ Manager *m,
+ ServerName **ret,
+ ServerType type,
+ const char *string) {
+
+ ServerName *n, *tail;
+
+ assert(m);
+ assert(string);
+
+ n = new0(ServerName, 1);
+ if (!n)
+ return -ENOMEM;
+
+ n->type = type;
+ n->string = strdup(string);
+ if (!n->string) {
+ free(n);
+ return -ENOMEM;
+ }
+
+ if (type == SERVER_SYSTEM) {
+ LIST_FIND_TAIL(names, m->system_servers, tail);
+ LIST_INSERT_AFTER(names, m->system_servers, tail, n);
+ } else if (type == SERVER_LINK) {
+ LIST_FIND_TAIL(names, m->link_servers, tail);
+ LIST_INSERT_AFTER(names, m->link_servers, tail, n);
+ } else if (type == SERVER_FALLBACK) {
+ LIST_FIND_TAIL(names, m->fallback_servers, tail);
+ LIST_INSERT_AFTER(names, m->fallback_servers, tail, n);
+ } else
+ assert_not_reached("Unknown server type");
+
+ n->manager = m;
+
+ if (type != SERVER_FALLBACK &&
+ m->current_server_name &&
+ m->current_server_name->type == SERVER_FALLBACK)
+ manager_set_server_name(m, NULL);
+
+ log_debug("Added new server %s.", string);
+
+ if (ret)
+ *ret = n;
+
+ return 0;
+}
+
+ServerName *server_name_free(ServerName *n) {
+ if (!n)
+ return NULL;
+
+ server_name_flush_addresses(n);
+
+ if (n->manager) {
+ if (n->type == SERVER_SYSTEM)
+ LIST_REMOVE(names, n->manager->system_servers, n);
+ else if (n->type == SERVER_LINK)
+ LIST_REMOVE(names, n->manager->link_servers, n);
+ else if (n->type == SERVER_FALLBACK)
+ LIST_REMOVE(names, n->manager->fallback_servers, n);
+ else
+ assert_not_reached("Unknown server type");
+
+ if (n->manager->current_server_name == n)
+ manager_set_server_name(n->manager, NULL);
+ }
+
+ log_debug("Removed server %s.", n->string);
+
+ free(n->string);
+ return mfree(n);
+}
+
+void server_name_flush_addresses(ServerName *n) {
+ assert(n);
+
+ while (n->addresses)
+ server_address_free(n->addresses);
+}
diff --git a/src/timesync/timesyncd-server.h b/src/timesync/timesyncd-server.h
new file mode 100644
index 0000000..8e9e408
--- /dev/null
+++ b/src/timesync/timesyncd-server.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "list.h"
+#include "socket-util.h"
+
+typedef struct ServerAddress ServerAddress;
+typedef struct ServerName ServerName;
+
+typedef enum ServerType {
+ SERVER_SYSTEM,
+ SERVER_FALLBACK,
+ SERVER_LINK,
+} ServerType;
+
+#include "timesyncd-manager.h"
+
+struct ServerAddress {
+ ServerName *name;
+
+ union sockaddr_union sockaddr;
+ socklen_t socklen;
+
+ LIST_FIELDS(ServerAddress, addresses);
+};
+
+struct ServerName {
+ Manager *manager;
+
+ ServerType type;
+ char *string;
+
+ bool marked:1;
+
+ LIST_HEAD(ServerAddress, addresses);
+ LIST_FIELDS(ServerName, names);
+};
+
+int server_address_new(ServerName *n, ServerAddress **ret, const union sockaddr_union *sockaddr, socklen_t socklen);
+ServerAddress* server_address_free(ServerAddress *a);
+static inline int server_address_pretty(ServerAddress *a, char **pretty) {
+ return sockaddr_pretty(&a->sockaddr.sa, a->socklen, true, true, pretty);
+}
+
+int server_name_new(Manager *m, ServerName **ret, ServerType type,const char *string);
+ServerName *server_name_free(ServerName *n);
+void server_name_flush_addresses(ServerName *n);
diff --git a/src/timesync/timesyncd.c b/src/timesync/timesyncd.c
new file mode 100644
index 0000000..1f59bf1
--- /dev/null
+++ b/src/timesync/timesyncd.c
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "sd-daemon.h"
+#include "sd-event.h"
+
+#include "capability-util.h"
+#include "clock-util.h"
+#include "daemon-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "network-util.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "timesyncd-bus.h"
+#include "timesyncd-conf.h"
+#include "timesyncd-manager.h"
+#include "user-util.h"
+
+#define STATE_DIR "/var/lib/systemd/timesync"
+#define CLOCK_FILE STATE_DIR "/clock"
+
+static int load_clock_timestamp(uid_t uid, gid_t gid) {
+ _cleanup_close_ int fd = -1;
+ usec_t min = TIME_EPOCH * USEC_PER_SEC;
+ usec_t ct;
+ int r;
+
+ /* Let's try to make sure that the clock is always
+ * monotonically increasing, by saving the clock whenever we
+ * have a new NTP time, or when we shut down, and restoring it
+ * when we start again. This is particularly helpful on
+ * systems lacking a battery backed RTC. We also will adjust
+ * the time to at least the build time of systemd. */
+
+ fd = open(CLOCK_FILE, O_RDWR|O_CLOEXEC, 0644);
+ if (fd >= 0) {
+ struct stat st;
+ usec_t stamp;
+
+ /* check if the recorded time is later than the compiled-in one */
+ r = fstat(fd, &st);
+ if (r >= 0) {
+ stamp = timespec_load(&st.st_mtim);
+ if (stamp > min)
+ min = stamp;
+ }
+
+ if (geteuid() == 0) {
+ /* Try to fix the access mode, so that we can still
+ touch the file after dropping privileges */
+ r = fchmod_and_chown(fd, 0644, uid, gid);
+ if (r < 0)
+ log_warning_errno(r, "Failed to chmod or chown %s, ignoring: %m", CLOCK_FILE);
+ }
+
+ } else {
+ r = mkdir_safe_label(STATE_DIR, 0755, uid, gid,
+ MKDIR_FOLLOW_SYMLINK | MKDIR_WARN_MODE);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to create state directory, ignoring: %m");
+ goto settime;
+ }
+
+ /* create stamp file with the compiled-in date */
+ r = touch_file(CLOCK_FILE, false, min, uid, gid, 0644);
+ if (r < 0)
+ log_debug_errno(r, "Failed to create %s, ignoring: %m", CLOCK_FILE);
+ }
+
+settime:
+ ct = now(CLOCK_REALTIME);
+ if (ct < min) {
+ struct timespec ts;
+ char date[FORMAT_TIMESTAMP_MAX];
+
+ log_info("System clock time unset or jumped backwards, restoring from recorded timestamp: %s",
+ format_timestamp(date, sizeof(date), min));
+
+ if (clock_settime(CLOCK_REALTIME, timespec_store(&ts, min)) < 0)
+ log_error_errno(errno, "Failed to restore system clock, ignoring: %m");
+ }
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ _cleanup_(notify_on_cleanup) const char *notify_message = NULL;
+ const char *user = "systemd-timesync";
+ uid_t uid, uid_current;
+ gid_t gid;
+ int r;
+
+ log_set_facility(LOG_CRON);
+ log_setup_service();
+
+ umask(0022);
+
+ if (argc != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program does not take arguments.");
+
+ uid = uid_current = geteuid();
+ gid = getegid();
+
+ if (uid_current == 0) {
+ r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Cannot resolve user name %s: %m", user);
+ }
+
+ r = load_clock_timestamp(uid, gid);
+ if (r < 0)
+ return r;
+
+ /* Drop privileges, but only if we have been started as root. If we are not running as root we assume all
+ * privileges are already dropped. */
+ if (uid_current == 0) {
+ r = drop_privileges(uid, gid, (1ULL << CAP_SYS_TIME));
+ if (r < 0)
+ return log_error_errno(r, "Failed to drop privileges: %m");
+ }
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+
+ r = manager_new(&m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate manager: %m");
+
+ r = manager_connect_bus(m);
+ if (r < 0)
+ return log_error_errno(r, "Could not connect to bus: %m");
+
+ if (clock_is_localtime(NULL) > 0) {
+ log_info("The system is configured to read the RTC time in the local time zone. "
+ "This mode cannot be fully supported. All system time to RTC updates are disabled.");
+ m->rtc_local_time = true;
+ }
+
+ r = manager_parse_config_file(m);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse configuration file: %m");
+
+ r = manager_parse_fallback_string(m, NTP_SERVERS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse fallback server strings: %m");
+
+ log_debug("systemd-timesyncd running as pid " PID_FMT, getpid_cached());
+
+ notify_message = notify_start("READY=1\n"
+ "STATUS=Daemon is running",
+ NOTIFY_STOPPING);
+
+ if (network_is_online()) {
+ r = manager_connect(m);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_loop(m->event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to run event loop: %m");
+
+ /* if we got an authoritative time, store it in the file system */
+ if (m->sync) {
+ r = touch(CLOCK_FILE);
+ if (r < 0)
+ log_debug_errno(r, "Failed to touch %s, ignoring: %m", CLOCK_FILE);
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/timesync/timesyncd.conf.in b/src/timesync/timesyncd.conf.in
new file mode 100644
index 0000000..f91c034
--- /dev/null
+++ b/src/timesync/timesyncd.conf.in
@@ -0,0 +1,19 @@
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# Entries in this file show the compile time defaults.
+# You can change settings by editing this file.
+# Defaults can be restored by simply deleting this file.
+#
+# See timesyncd.conf(5) for details.
+
+[Time]
+#NTP=
+#FallbackNTP=@NTP_SERVERS@
+#RootDistanceMaxSec=5
+#PollIntervalMinSec=32
+#PollIntervalMaxSec=2048
diff --git a/src/tmpfiles/meson.build b/src/tmpfiles/meson.build
new file mode 100644
index 0000000..2d61568
--- /dev/null
+++ b/src/tmpfiles/meson.build
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+systemd_tmpfiles_sources = [
+ 'src/tmpfiles/tmpfiles.c',
+ 'src/shared/offline-passwd.c',
+ 'src/shared/offline-passwd.h',
+]
diff --git a/src/tmpfiles/tmpfiles.c b/src/tmpfiles/tmpfiles.c
new file mode 100644
index 0000000..9906c70
--- /dev/null
+++ b/src/tmpfiles/tmpfiles.c
@@ -0,0 +1,3495 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <getopt.h>
+#include <limits.h>
+#include <linux/fs.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <sys/file.h>
+#include <sys/xattr.h>
+#include <sysexits.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "sd-path.h"
+
+#include "acl-util.h"
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "capability-util.h"
+#include "chattr-util.h"
+#include "conf-files.h"
+#include "copy.h"
+#include "def.h"
+#include "dirent-util.h"
+#include "dissect-image.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "glob-util.h"
+#include "io-util.h"
+#include "label.h"
+#include "log.h"
+#include "macro.h"
+#include "main-func.h"
+#include "missing_stat.h"
+#include "missing_syscall.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "offline-passwd.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-lookup.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "rlimit-util.h"
+#include "rm-rf.h"
+#include "selinux-util.h"
+#include "set.h"
+#include "sort-util.h"
+#include "specifier.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "umask-util.h"
+#include "user-util.h"
+
+/* This reads all files listed in /etc/tmpfiles.d/?*.conf and creates
+ * them in the file system. This is intended to be used to create
+ * properly owned directories beneath /tmp, /var/tmp, /run, which are
+ * volatile and hence need to be recreated on bootup. */
+
+typedef enum OperationMask {
+ OPERATION_CREATE = 1 << 0,
+ OPERATION_REMOVE = 1 << 1,
+ OPERATION_CLEAN = 1 << 2,
+} OperationMask;
+
+typedef enum ItemType {
+ /* These ones take file names */
+ CREATE_FILE = 'f',
+ TRUNCATE_FILE = 'F', /* deprecated: use f+ */
+ CREATE_DIRECTORY = 'd',
+ TRUNCATE_DIRECTORY = 'D',
+ CREATE_SUBVOLUME = 'v',
+ CREATE_SUBVOLUME_INHERIT_QUOTA = 'q',
+ CREATE_SUBVOLUME_NEW_QUOTA = 'Q',
+ CREATE_FIFO = 'p',
+ CREATE_SYMLINK = 'L',
+ CREATE_CHAR_DEVICE = 'c',
+ CREATE_BLOCK_DEVICE = 'b',
+ COPY_FILES = 'C',
+
+ /* These ones take globs */
+ WRITE_FILE = 'w',
+ EMPTY_DIRECTORY = 'e',
+ SET_XATTR = 't',
+ RECURSIVE_SET_XATTR = 'T',
+ SET_ACL = 'a',
+ RECURSIVE_SET_ACL = 'A',
+ SET_ATTRIBUTE = 'h',
+ RECURSIVE_SET_ATTRIBUTE = 'H',
+ IGNORE_PATH = 'x',
+ IGNORE_DIRECTORY_PATH = 'X',
+ REMOVE_PATH = 'r',
+ RECURSIVE_REMOVE_PATH = 'R',
+ RELABEL_PATH = 'z',
+ RECURSIVE_RELABEL_PATH = 'Z',
+ ADJUST_MODE = 'm', /* legacy, 'z' is identical to this */
+} ItemType;
+
+typedef struct Item {
+ ItemType type;
+
+ char *path;
+ char *argument;
+ char **xattrs;
+#if HAVE_ACL
+ acl_t acl_access;
+ acl_t acl_default;
+#endif
+ uid_t uid;
+ gid_t gid;
+ mode_t mode;
+ usec_t age;
+
+ dev_t major_minor;
+ unsigned attribute_value;
+ unsigned attribute_mask;
+
+ bool uid_set:1;
+ bool gid_set:1;
+ bool mode_set:1;
+ bool age_set:1;
+ bool mask_perms:1;
+ bool attribute_set:1;
+
+ bool keep_first_level:1;
+
+ bool append_or_force:1;
+
+ bool allow_failure:1;
+
+ OperationMask done;
+} Item;
+
+typedef struct ItemArray {
+ Item *items;
+ size_t n_items;
+ size_t allocated;
+
+ struct ItemArray *parent;
+ Set *children;
+} ItemArray;
+
+typedef enum DirectoryType {
+ DIRECTORY_RUNTIME,
+ DIRECTORY_STATE,
+ DIRECTORY_CACHE,
+ DIRECTORY_LOGS,
+ _DIRECTORY_TYPE_MAX,
+} DirectoryType;
+
+static bool arg_cat_config = false;
+static bool arg_user = false;
+static OperationMask arg_operation = 0;
+static bool arg_boot = false;
+static PagerFlags arg_pager_flags = 0;
+
+static char **arg_include_prefixes = NULL;
+static char **arg_exclude_prefixes = NULL;
+static char *arg_root = NULL;
+static char *arg_image = NULL;
+static char *arg_replace = NULL;
+
+#define MAX_DEPTH 256
+
+static OrderedHashmap *items = NULL, *globs = NULL;
+static Set *unix_sockets = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(items, ordered_hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(globs, ordered_hashmap_freep);
+STATIC_DESTRUCTOR_REGISTER(unix_sockets, set_free_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_include_prefixes, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_exclude_prefixes, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_root, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_image, freep);
+
+static int specifier_machine_id_safe(char specifier, const void *data, const void *userdata, char **ret);
+static int specifier_directory(char specifier, const void *data, const void *userdata, char **ret);
+
+static const Specifier specifier_table[] = {
+ { 'a', specifier_architecture, NULL },
+ { 'b', specifier_boot_id, NULL },
+ { 'B', specifier_os_build_id, NULL },
+ { 'H', specifier_host_name, NULL },
+ { 'l', specifier_short_host_name, NULL },
+ { 'm', specifier_machine_id_safe, NULL },
+ { 'o', specifier_os_id, NULL },
+ { 'v', specifier_kernel_release, NULL },
+ { 'w', specifier_os_version_id, NULL },
+ { 'W', specifier_os_variant_id, NULL },
+
+ { 'h', specifier_user_home, NULL },
+
+ { 'C', specifier_directory, UINT_TO_PTR(DIRECTORY_CACHE) },
+ { 'L', specifier_directory, UINT_TO_PTR(DIRECTORY_LOGS) },
+ { 'S', specifier_directory, UINT_TO_PTR(DIRECTORY_STATE) },
+ { 't', specifier_directory, UINT_TO_PTR(DIRECTORY_RUNTIME) },
+
+ COMMON_CREDS_SPECIFIERS,
+
+ COMMON_TMP_SPECIFIERS,
+ {}
+};
+
+static int specifier_machine_id_safe(char specifier, const void *data, const void *userdata, char **ret) {
+ int r;
+
+ /* If /etc/machine_id is missing or empty (e.g. in a chroot environment)
+ * return a recognizable error so that the caller can skip the rule
+ * gracefully. */
+
+ r = specifier_machine_id(specifier, data, userdata, ret);
+ if (IN_SET(r, -ENOENT, -ENOMEDIUM))
+ return -ENXIO;
+
+ return r;
+}
+
+static int specifier_directory(char specifier, const void *data, const void *userdata, char **ret) {
+ struct table_entry {
+ uint64_t type;
+ const char *suffix;
+ };
+
+ static const struct table_entry paths_system[] = {
+ [DIRECTORY_RUNTIME] = { SD_PATH_SYSTEM_RUNTIME },
+ [DIRECTORY_STATE] = { SD_PATH_SYSTEM_STATE_PRIVATE },
+ [DIRECTORY_CACHE] = { SD_PATH_SYSTEM_STATE_CACHE },
+ [DIRECTORY_LOGS] = { SD_PATH_SYSTEM_STATE_LOGS },
+ };
+
+ static const struct table_entry paths_user[] = {
+ [DIRECTORY_RUNTIME] = { SD_PATH_USER_RUNTIME },
+ [DIRECTORY_STATE] = { SD_PATH_USER_CONFIGURATION },
+ [DIRECTORY_CACHE] = { SD_PATH_USER_STATE_CACHE },
+ [DIRECTORY_LOGS] = { SD_PATH_USER_CONFIGURATION, "log" },
+ };
+
+ unsigned i;
+ const struct table_entry *paths;
+
+ assert_cc(ELEMENTSOF(paths_system) == ELEMENTSOF(paths_user));
+ paths = arg_user ? paths_user : paths_system;
+
+ i = PTR_TO_UINT(data);
+ assert(i < ELEMENTSOF(paths_system));
+
+ return sd_path_lookup(paths[i].type, paths[i].suffix, ret);
+}
+
+static int log_unresolvable_specifier(const char *filename, unsigned line) {
+ static bool notified = false;
+
+ /* In system mode, this is called when /etc is not fully initialized (e.g.
+ * in a chroot environment) where some specifiers are unresolvable. In user
+ * mode, this is called when some variables are not defined. These cases are
+ * not considered as an error so log at LOG_NOTICE only for the first time
+ * and then downgrade this to LOG_DEBUG for the rest. */
+
+ log_syntax(NULL,
+ notified ? LOG_DEBUG : LOG_NOTICE,
+ filename, line, 0,
+ "Failed to resolve specifier: %s, skipping",
+ arg_user ? "Required $XDG_... variable not defined" : "uninitialized /etc detected");
+
+ if (!notified)
+ log_notice("All rules containing unresolvable specifiers will be skipped.");
+
+ notified = true;
+ return 0;
+}
+
+static int user_config_paths(char*** ret) {
+ _cleanup_strv_free_ char **config_dirs = NULL, **data_dirs = NULL;
+ _cleanup_free_ char *persistent_config = NULL, *runtime_config = NULL, *data_home = NULL;
+ _cleanup_strv_free_ char **res = NULL;
+ int r;
+
+ r = xdg_user_dirs(&config_dirs, &data_dirs);
+ if (r < 0)
+ return r;
+
+ r = xdg_user_config_dir(&persistent_config, "/user-tmpfiles.d");
+ if (r < 0 && r != -ENXIO)
+ return r;
+
+ r = xdg_user_runtime_dir(&runtime_config, "/user-tmpfiles.d");
+ if (r < 0 && r != -ENXIO)
+ return r;
+
+ r = xdg_user_data_dir(&data_home, "/user-tmpfiles.d");
+ if (r < 0 && r != -ENXIO)
+ return r;
+
+ r = strv_extend_strv_concat(&res, config_dirs, "/user-tmpfiles.d");
+ if (r < 0)
+ return r;
+
+ r = strv_extend(&res, persistent_config);
+ if (r < 0)
+ return r;
+
+ r = strv_extend(&res, runtime_config);
+ if (r < 0)
+ return r;
+
+ r = strv_extend(&res, data_home);
+ if (r < 0)
+ return r;
+
+ r = strv_extend_strv_concat(&res, data_dirs, "/user-tmpfiles.d");
+ if (r < 0)
+ return r;
+
+ r = path_strv_make_absolute_cwd(res);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(res);
+ return 0;
+}
+
+static bool needs_glob(ItemType t) {
+ return IN_SET(t,
+ WRITE_FILE,
+ IGNORE_PATH,
+ IGNORE_DIRECTORY_PATH,
+ REMOVE_PATH,
+ RECURSIVE_REMOVE_PATH,
+ EMPTY_DIRECTORY,
+ ADJUST_MODE,
+ RELABEL_PATH,
+ RECURSIVE_RELABEL_PATH,
+ SET_XATTR,
+ RECURSIVE_SET_XATTR,
+ SET_ACL,
+ RECURSIVE_SET_ACL,
+ SET_ATTRIBUTE,
+ RECURSIVE_SET_ATTRIBUTE);
+}
+
+static bool takes_ownership(ItemType t) {
+ return IN_SET(t,
+ CREATE_FILE,
+ TRUNCATE_FILE,
+ CREATE_DIRECTORY,
+ EMPTY_DIRECTORY,
+ TRUNCATE_DIRECTORY,
+ CREATE_SUBVOLUME,
+ CREATE_SUBVOLUME_INHERIT_QUOTA,
+ CREATE_SUBVOLUME_NEW_QUOTA,
+ CREATE_FIFO,
+ CREATE_SYMLINK,
+ CREATE_CHAR_DEVICE,
+ CREATE_BLOCK_DEVICE,
+ COPY_FILES,
+ WRITE_FILE,
+ IGNORE_PATH,
+ IGNORE_DIRECTORY_PATH,
+ REMOVE_PATH,
+ RECURSIVE_REMOVE_PATH);
+}
+
+static struct Item* find_glob(OrderedHashmap *h, const char *match) {
+ ItemArray *j;
+
+ ORDERED_HASHMAP_FOREACH(j, h) {
+ size_t n;
+
+ for (n = 0; n < j->n_items; n++) {
+ Item *item = j->items + n;
+
+ if (fnmatch(item->path, match, FNM_PATHNAME|FNM_PERIOD) == 0)
+ return item;
+ }
+ }
+
+ return NULL;
+}
+
+static int load_unix_sockets(void) {
+ _cleanup_set_free_free_ Set *sockets = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ if (unix_sockets)
+ return 0;
+
+ /* We maintain a cache of the sockets we found in /proc/net/unix to speed things up a little. */
+
+ sockets = set_new(&path_hash_ops);
+ if (!sockets)
+ return log_oom();
+
+ f = fopen("/proc/net/unix", "re");
+ if (!f)
+ return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to open /proc/net/unix, ignoring: %m");
+
+ /* Skip header */
+ r = read_line(f, LONG_LINE_MAX, NULL);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to skip /proc/net/unix header line: %m");
+ if (r == 0)
+ return log_warning_errno(SYNTHETIC_ERRNO(EIO), "Premature end of file reading /proc/net/unix.");
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL, *s = NULL;
+ char *p;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to read /proc/net/unix line, ignoring: %m");
+ if (r == 0) /* EOF */
+ break;
+
+ p = strchr(line, ':');
+ if (!p)
+ continue;
+
+ if (strlen(p) < 37)
+ continue;
+
+ p += 37;
+ p += strspn(p, WHITESPACE);
+ p += strcspn(p, WHITESPACE); /* skip one more word */
+ p += strspn(p, WHITESPACE);
+
+ if (*p != '/')
+ continue;
+
+ s = strdup(p);
+ if (!s)
+ return log_oom();
+
+ path_simplify(s, false);
+
+ r = set_consume(sockets, s);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return log_warning_errno(r, "Failed to add AF_UNIX socket to set, ignoring: %m");
+
+ TAKE_PTR(s);
+ }
+
+ unix_sockets = TAKE_PTR(sockets);
+ return 1;
+}
+
+static bool unix_socket_alive(const char *fn) {
+ assert(fn);
+
+ if (load_unix_sockets() < 0)
+ return true; /* We don't know, so assume yes */
+
+ return !!set_get(unix_sockets, (char*) fn);
+}
+
+static DIR* xopendirat_nomod(int dirfd, const char *path) {
+ DIR *dir;
+
+ dir = xopendirat(dirfd, path, O_NOFOLLOW|O_NOATIME);
+ if (dir)
+ return dir;
+
+ log_debug_errno(errno, "Cannot open %sdirectory \"%s\": %m", dirfd == AT_FDCWD ? "" : "sub", path);
+ if (errno != EPERM)
+ return NULL;
+
+ dir = xopendirat(dirfd, path, O_NOFOLLOW);
+ if (!dir)
+ log_debug_errno(errno, "Cannot open %sdirectory \"%s\": %m", dirfd == AT_FDCWD ? "" : "sub", path);
+
+ return dir;
+}
+
+static DIR* opendir_nomod(const char *path) {
+ return xopendirat_nomod(AT_FDCWD, path);
+}
+
+static inline nsec_t load_statx_timestamp_nsec(const struct statx_timestamp *ts) {
+ assert(ts);
+
+ if (ts->tv_sec < 0)
+ return NSEC_INFINITY;
+
+ if ((nsec_t) ts->tv_sec >= (UINT64_MAX - ts->tv_nsec) / NSEC_PER_SEC)
+ return NSEC_INFINITY;
+
+ return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec;
+}
+
+static int dir_cleanup(
+ Item *i,
+ const char *p,
+ DIR *d,
+ nsec_t self_atime_nsec,
+ nsec_t self_mtime_nsec,
+ nsec_t cutoff_nsec,
+ dev_t rootdev_major,
+ dev_t rootdev_minor,
+ bool mountpoint,
+ int maxdepth,
+ bool keep_this_level) {
+
+ bool deleted = false;
+ struct dirent *dent;
+ int r = 0;
+
+ FOREACH_DIRENT_ALL(dent, d, break) {
+ _cleanup_free_ char *sub_path = NULL;
+ nsec_t atime_nsec, mtime_nsec, ctime_nsec, btime_nsec;
+
+ if (dot_or_dot_dot(dent->d_name))
+ continue;
+
+ /* If statx() is supported, use it. It's preferable over fstatat() since it tells us
+ * explicitly where we are looking at a mount point, for free as side information. Determining
+ * the same information without statx() is hard, see the complexity of path_is_mount_point(),
+ * and also much slower as it requires a number of syscalls instead of just one. Hence, when
+ * we have modern statx() we use it instead of fstat() and do proper mount point checks,
+ * while on older kernels's well do traditional st_dev based detection of mount points.
+ *
+ * Using statx() for detecting mount points also has the benfit that we handle weird file
+ * systems such as overlayfs better where each file is originating from a different
+ * st_dev. */
+
+ STRUCT_STATX_DEFINE(sx);
+
+ r = statx_fallback(
+ dirfd(d), dent->d_name,
+ AT_SYMLINK_NOFOLLOW|AT_NO_AUTOMOUNT,
+ STATX_TYPE|STATX_MODE|STATX_UID|STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_BTIME,
+ &sx);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0) {
+ /* FUSE, NFS mounts, SELinux might return EACCES */
+ r = log_full_errno(errno == EACCES ? LOG_DEBUG : LOG_ERR, errno,
+ "statx(%s/%s) failed: %m", p, dent->d_name);
+ continue;
+ }
+
+ if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) {
+ /* Yay, we have the mount point API, use it */
+ if (FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT)) {
+ log_debug("Ignoring \"%s/%s\": different mount points.", p, dent->d_name);
+ continue;
+ }
+ } else {
+ /* So we might have statx() but the STATX_ATTR_MOUNT_ROOT flag is not supported, fall
+ * back to traditional stx_dev checking. */
+ if (sx.stx_dev_major != rootdev_major ||
+ sx.stx_dev_minor != rootdev_minor) {
+ log_debug("Ignoring \"%s/%s\": different filesystem.", p, dent->d_name);
+ continue;
+ }
+
+ /* Try to detect bind mounts of the same filesystem instance; they do not differ in device
+ * major/minors. This type of query is not supported on all kernels or filesystem types
+ * though. */
+ if (S_ISDIR(sx.stx_mode)) {
+ int q;
+
+ q = fd_is_mount_point(dirfd(d), dent->d_name, 0);
+ if (q < 0)
+ log_debug_errno(q, "Failed to determine whether \"%s/%s\" is a mount point, ignoring: %m", p, dent->d_name);
+ else if (q > 0) {
+ log_debug("Ignoring \"%s/%s\": different mount of the same filesystem.", p, dent->d_name);
+ continue;
+ }
+ }
+ }
+
+ atime_nsec = FLAGS_SET(sx.stx_mask, STATX_ATIME) ? load_statx_timestamp_nsec(&sx.stx_atime) : 0;
+ mtime_nsec = FLAGS_SET(sx.stx_mask, STATX_MTIME) ? load_statx_timestamp_nsec(&sx.stx_mtime) : 0;
+ ctime_nsec = FLAGS_SET(sx.stx_mask, STATX_CTIME) ? load_statx_timestamp_nsec(&sx.stx_ctime) : 0;
+ btime_nsec = FLAGS_SET(sx.stx_mask, STATX_BTIME) ? load_statx_timestamp_nsec(&sx.stx_btime) : 0;
+
+ sub_path = path_join(p, dent->d_name);
+ if (!sub_path) {
+ r = log_oom();
+ goto finish;
+ }
+
+ /* Is there an item configured for this path? */
+ if (ordered_hashmap_get(items, sub_path)) {
+ log_debug("Ignoring \"%s\": a separate entry exists.", sub_path);
+ continue;
+ }
+
+ if (find_glob(globs, sub_path)) {
+ log_debug("Ignoring \"%s\": a separate glob exists.", sub_path);
+ continue;
+ }
+
+ if (S_ISDIR(sx.stx_mode)) {
+ _cleanup_closedir_ DIR *sub_dir = NULL;
+
+ if (mountpoint &&
+ streq(dent->d_name, "lost+found") &&
+ sx.stx_uid == 0) {
+ log_debug("Ignoring directory \"%s\".", sub_path);
+ continue;
+ }
+
+ if (maxdepth <= 0)
+ log_warning("Reached max depth on \"%s\".", sub_path);
+ else {
+ int q;
+
+ sub_dir = xopendirat_nomod(dirfd(d), dent->d_name);
+ if (!sub_dir) {
+ if (errno != ENOENT)
+ r = log_warning_errno(errno, "Opening directory \"%s\" failed, ignoring: %m", sub_path);
+
+ continue;
+ }
+
+ if (flock(dirfd(sub_dir), LOCK_EX|LOCK_NB) < 0) {
+ log_debug_errno(errno, "Couldn't acquire shared BSD lock on directory \"%s\", skipping: %m", p);
+ continue;
+ }
+
+ q = dir_cleanup(i,
+ sub_path, sub_dir,
+ atime_nsec, mtime_nsec, cutoff_nsec,
+ rootdev_major, rootdev_minor,
+ false, maxdepth-1, false);
+ if (q < 0)
+ r = q;
+ }
+
+ /* Note: if you are wondering why we don't support the sticky bit for excluding
+ * directories from cleaning like we do it for other file system objects: well, the
+ * sticky bit already has a meaning for directories, so we don't want to overload
+ * that. */
+
+ if (keep_this_level) {
+ log_debug("Keeping directory \"%s\".", sub_path);
+ continue;
+ }
+
+ /* Ignore ctime, we change it when deleting */
+ if (mtime_nsec != NSEC_INFINITY && mtime_nsec >= cutoff_nsec) {
+ char a[FORMAT_TIMESTAMP_MAX];
+ /* Follows spelling in stat(1). */
+ log_debug("Directory \"%s\": modify time %s is too new.",
+ sub_path,
+ format_timestamp_style(a, sizeof(a), mtime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
+ continue;
+ }
+
+ if (atime_nsec != NSEC_INFINITY && atime_nsec >= cutoff_nsec) {
+ char a[FORMAT_TIMESTAMP_MAX];
+ log_debug("Directory \"%s\": access time %s is too new.",
+ sub_path,
+ format_timestamp_style(a, sizeof(a), atime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
+ continue;
+ }
+
+ if (btime_nsec != NSEC_INFINITY && btime_nsec >= cutoff_nsec) {
+ char a[FORMAT_TIMESTAMP_MAX];
+ log_debug("Directory \"%s\": birth time %s is too new.",
+ sub_path,
+ format_timestamp_style(a, sizeof(a), btime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
+ continue;
+ }
+
+ log_debug("Removing directory \"%s\".", sub_path);
+ if (unlinkat(dirfd(d), dent->d_name, AT_REMOVEDIR) < 0)
+ if (!IN_SET(errno, ENOENT, ENOTEMPTY))
+ r = log_warning_errno(errno, "Failed to remove directory \"%s\", ignoring: %m", sub_path);
+
+ } else {
+ /* Skip files for which the sticky bit is set. These are semantics we define, and are
+ * unknown elsewhere. See XDG_RUNTIME_DIR specification for details. */
+ if (sx.stx_mode & S_ISVTX) {
+ log_debug("Skipping \"%s\": sticky bit set.", sub_path);
+ continue;
+ }
+
+ if (mountpoint &&
+ S_ISREG(sx.stx_mode) &&
+ sx.stx_uid == 0 &&
+ STR_IN_SET(dent->d_name,
+ ".journal",
+ "aquota.user",
+ "aquota.group")) {
+ log_debug("Skipping \"%s\".", sub_path);
+ continue;
+ }
+
+ /* Ignore sockets that are listed in /proc/net/unix */
+ if (S_ISSOCK(sx.stx_mode) && unix_socket_alive(sub_path)) {
+ log_debug("Skipping \"%s\": live socket.", sub_path);
+ continue;
+ }
+
+ /* Ignore device nodes */
+ if (S_ISCHR(sx.stx_mode) || S_ISBLK(sx.stx_mode)) {
+ log_debug("Skipping \"%s\": a device.", sub_path);
+ continue;
+ }
+
+ /* Keep files on this level around if this is requested */
+ if (keep_this_level) {
+ log_debug("Keeping \"%s\".", sub_path);
+ continue;
+ }
+
+ if (mtime_nsec != NSEC_INFINITY && mtime_nsec >= cutoff_nsec) {
+ char a[FORMAT_TIMESTAMP_MAX];
+ /* Follows spelling in stat(1). */
+ log_debug("File \"%s\": modify time %s is too new.",
+ sub_path,
+ format_timestamp_style(a, sizeof(a), mtime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
+ continue;
+ }
+
+ if (atime_nsec != NSEC_INFINITY && atime_nsec >= cutoff_nsec) {
+ char a[FORMAT_TIMESTAMP_MAX];
+ log_debug("File \"%s\": access time %s is too new.",
+ sub_path,
+ format_timestamp_style(a, sizeof(a), atime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
+ continue;
+ }
+
+ if (ctime_nsec != NSEC_INFINITY && ctime_nsec >= cutoff_nsec) {
+ char a[FORMAT_TIMESTAMP_MAX];
+ log_debug("File \"%s\": change time %s is too new.",
+ sub_path,
+ format_timestamp_style(a, sizeof(a), ctime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
+ continue;
+ }
+
+ if (btime_nsec != NSEC_INFINITY && btime_nsec >= cutoff_nsec) {
+ char a[FORMAT_TIMESTAMP_MAX];
+ log_debug("File \"%s\": birth time %s is too new.",
+ sub_path,
+ format_timestamp_style(a, sizeof(a), btime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
+ continue;
+ }
+
+ log_debug("Removing \"%s\".", sub_path);
+ if (unlinkat(dirfd(d), dent->d_name, 0) < 0)
+ if (errno != ENOENT)
+ r = log_warning_errno(errno, "Failed to remove \"%s\", ignoring: %m", sub_path);
+
+ deleted = true;
+ }
+ }
+
+finish:
+ if (deleted) {
+ char a[FORMAT_TIMESTAMP_MAX], m[FORMAT_TIMESTAMP_MAX];
+ struct timespec ts[2];
+
+ log_debug("Restoring access and modification time on \"%s\": %s, %s",
+ p,
+ format_timestamp_style(a, sizeof(a), self_atime_nsec / NSEC_PER_USEC, TIMESTAMP_US),
+ format_timestamp_style(m, sizeof(m), self_mtime_nsec / NSEC_PER_USEC, TIMESTAMP_US));
+
+ timespec_store_nsec(ts + 0, self_atime_nsec);
+ timespec_store_nsec(ts + 1, self_mtime_nsec);
+
+ /* Restore original directory timestamps */
+ if (futimens(dirfd(d), ts) < 0)
+ log_warning_errno(errno, "Failed to revert timestamps of '%s', ignoring: %m", p);
+ }
+
+ return r;
+}
+
+static bool dangerous_hardlinks(void) {
+ _cleanup_free_ char *value = NULL;
+ static int cached = -1;
+ int r;
+
+ /* Check whether the fs.protected_hardlinks sysctl is on. If we can't determine it we assume its off, as that's
+ * what the upstream default is. */
+
+ if (cached >= 0)
+ return cached;
+
+ r = read_one_line_file("/proc/sys/fs/protected_hardlinks", &value);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to read fs.protected_hardlinks sysctl: %m");
+ return true;
+ }
+
+ r = parse_boolean(value);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse fs.protected_hardlinks sysctl: %m");
+ return true;
+ }
+
+ cached = r == 0;
+ return cached;
+}
+
+static bool hardlink_vulnerable(const struct stat *st) {
+ assert(st);
+
+ return !S_ISDIR(st->st_mode) && st->st_nlink > 1 && dangerous_hardlinks();
+}
+
+static mode_t process_mask_perms(mode_t mode, mode_t current) {
+
+ if ((current & 0111) == 0)
+ mode &= ~0111;
+ if ((current & 0222) == 0)
+ mode &= ~0222;
+ if ((current & 0444) == 0)
+ mode &= ~0444;
+ if (!S_ISDIR(current))
+ mode &= ~07000; /* remove sticky/sgid/suid bit, unless directory */
+
+ return mode;
+}
+
+static int fd_set_perms(Item *i, int fd, const char *path, const struct stat *st) {
+ struct stat stbuf;
+ mode_t new_mode;
+ bool do_chown;
+ int r;
+
+ assert(i);
+ assert(fd);
+ assert(path);
+
+ if (!i->mode_set && !i->uid_set && !i->gid_set)
+ goto shortcut;
+
+ if (!st) {
+ if (fstat(fd, &stbuf) < 0)
+ return log_error_errno(errno, "fstat(%s) failed: %m", path);
+ st = &stbuf;
+ }
+
+ if (hardlink_vulnerable(st))
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Refusing to set permissions on hardlinked file %s while the fs.protected_hardlinks sysctl is turned off.",
+ path);
+
+ /* Do we need a chown()? */
+ do_chown =
+ (i->uid_set && i->uid != st->st_uid) ||
+ (i->gid_set && i->gid != st->st_gid);
+
+ /* Calculate the mode to apply */
+ new_mode = i->mode_set ? (i->mask_perms ?
+ process_mask_perms(i->mode, st->st_mode) :
+ i->mode) :
+ (st->st_mode & 07777);
+
+ if (i->mode_set && do_chown) {
+ /* Before we issue the chmod() let's reduce the access mode to the common bits of the old and
+ * the new mode. That way there's no time window where the file exists under the old owner
+ * with more than the old access modes — and not under the new owner with more than the new
+ * access modes either. */
+
+ if (S_ISLNK(st->st_mode))
+ log_debug("Skipping temporary mode fix for symlink %s.", path);
+ else {
+ mode_t m = new_mode & st->st_mode; /* Mask new mode by old mode */
+
+ if (((m ^ st->st_mode) & 07777) == 0)
+ log_debug("\"%s\" matches temporary mode %o already.", path, m);
+ else {
+ log_debug("Temporarily changing \"%s\" to mode %o.", path, m);
+ r = fchmod_opath(fd, m);
+ if (r < 0)
+ return log_error_errno(r, "fchmod() of %s failed: %m", path);
+ }
+ }
+ }
+
+ if (do_chown) {
+ log_debug("Changing \"%s\" to owner "UID_FMT":"GID_FMT,
+ path,
+ i->uid_set ? i->uid : UID_INVALID,
+ i->gid_set ? i->gid : GID_INVALID);
+
+ if (fchownat(fd,
+ "",
+ i->uid_set ? i->uid : UID_INVALID,
+ i->gid_set ? i->gid : GID_INVALID,
+ AT_EMPTY_PATH) < 0)
+ return log_error_errno(errno, "fchownat() of %s failed: %m", path);
+ }
+
+ /* Now, apply the final mode. We do this in two cases: when the user set a mode explicitly, or after a
+ * chown(), since chown()'s mangle the access mode in regards to sgid/suid in some conditions. */
+ if (i->mode_set || do_chown) {
+ if (S_ISLNK(st->st_mode))
+ log_debug("Skipping mode fix for symlink %s.", path);
+ else {
+ /* Check if the chmod() is unnecessary. Note that if we did a chown() before we always
+ * chmod() here again, since it might have mangled the bits. */
+ if (!do_chown && ((new_mode ^ st->st_mode) & 07777) == 0)
+ log_debug("\"%s\" matches mode %o already.", path, new_mode);
+ else {
+ log_debug("Changing \"%s\" to mode %o.", path, new_mode);
+ r = fchmod_opath(fd, new_mode);
+ if (r < 0)
+ return log_error_errno(r, "fchmod() of %s failed: %m", path);
+ }
+ }
+ }
+
+shortcut:
+ return label_fix(path, 0);
+}
+
+static int path_open_parent_safe(const char *path) {
+ _cleanup_free_ char *dn = NULL;
+ int r, fd;
+
+ if (path_equal(path, "/") || !path_is_normalized(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to open parent of '%s': invalid path.",
+ path);
+
+ dn = dirname_malloc(path);
+ if (!dn)
+ return log_oom();
+
+ r = chase_symlinks(dn, arg_root, CHASE_SAFE|CHASE_WARN, NULL, &fd);
+ if (r < 0 && r != -ENOLINK)
+ return log_error_errno(r, "Failed to validate path %s: %m", path);
+
+ return r < 0 ? r : fd;
+}
+
+static int path_open_safe(const char *path) {
+ int r, fd;
+
+ /* path_open_safe() returns a file descriptor opened with O_PATH after
+ * verifying that the path doesn't contain unsafe transitions, except
+ * for its final component as the function does not follow symlink. */
+
+ assert(path);
+
+ if (!path_is_normalized(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to open invalid path '%s'.",
+ path);
+
+ r = chase_symlinks(path, arg_root, CHASE_SAFE|CHASE_WARN|CHASE_NOFOLLOW, NULL, &fd);
+ if (r < 0 && r != -ENOLINK)
+ return log_error_errno(r, "Failed to validate path %s: %m", path);
+
+ return r < 0 ? r : fd;
+}
+
+static int path_set_perms(Item *i, const char *path) {
+ _cleanup_close_ int fd = -1;
+
+ assert(i);
+ assert(path);
+
+ fd = path_open_safe(path);
+ if (fd < 0)
+ return fd;
+
+ return fd_set_perms(i, fd, path, NULL);
+}
+
+static int parse_xattrs_from_arg(Item *i) {
+ const char *p;
+ int r;
+
+ assert(i);
+ assert(i->argument);
+
+ p = i->argument;
+
+ for (;;) {
+ _cleanup_free_ char *name = NULL, *value = NULL, *xattr = NULL;
+
+ r = extract_first_word(&p, &xattr, NULL, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse extended attribute '%s', ignoring: %m", p);
+ if (r <= 0)
+ break;
+
+ r = split_pair(xattr, "=", &name, &value);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse extended attribute, ignoring: %s", xattr);
+ continue;
+ }
+
+ if (isempty(name) || isempty(value)) {
+ log_warning("Malformed extended attribute found, ignoring: %s", xattr);
+ continue;
+ }
+
+ if (strv_push_pair(&i->xattrs, name, value) < 0)
+ return log_oom();
+
+ name = value = NULL;
+ }
+
+ return 0;
+}
+
+static int fd_set_xattrs(Item *i, int fd, const char *path, const struct stat *st) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ char **name, **value;
+
+ assert(i);
+ assert(fd);
+ assert(path);
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+
+ STRV_FOREACH_PAIR(name, value, i->xattrs) {
+ log_debug("Setting extended attribute '%s=%s' on %s.", *name, *value, path);
+ if (setxattr(procfs_path, *name, *value, strlen(*value), 0) < 0)
+ return log_error_errno(errno, "Setting extended attribute %s=%s on %s failed: %m",
+ *name, *value, path);
+ }
+ return 0;
+}
+
+static int path_set_xattrs(Item *i, const char *path) {
+ _cleanup_close_ int fd = -1;
+
+ assert(i);
+ assert(path);
+
+ fd = path_open_safe(path);
+ if (fd < 0)
+ return fd;
+
+ return fd_set_xattrs(i, fd, path, NULL);
+}
+
+static int parse_acls_from_arg(Item *item) {
+#if HAVE_ACL
+ int r;
+
+ assert(item);
+
+ /* If append_or_force (= modify) is set, we will not modify the acl
+ * afterwards, so the mask can be added now if necessary. */
+
+ r = parse_acl(item->argument, &item->acl_access, &item->acl_default, !item->append_or_force);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse ACL \"%s\": %m. Ignoring", item->argument);
+#else
+ log_warning("ACLs are not supported. Ignoring.");
+#endif
+
+ return 0;
+}
+
+#if HAVE_ACL
+static int path_set_acl(const char *path, const char *pretty, acl_type_t type, acl_t acl, bool modify) {
+ _cleanup_(acl_free_charpp) char *t = NULL;
+ _cleanup_(acl_freep) acl_t dup = NULL;
+ int r;
+
+ /* Returns 0 for success, positive error if already warned,
+ * negative error otherwise. */
+
+ if (modify) {
+ r = acls_for_file(path, type, acl, &dup);
+ if (r < 0)
+ return r;
+
+ r = calc_acl_mask_if_needed(&dup);
+ if (r < 0)
+ return r;
+ } else {
+ dup = acl_dup(acl);
+ if (!dup)
+ return -errno;
+
+ /* the mask was already added earlier if needed */
+ }
+
+ r = add_base_acls_if_needed(&dup, path);
+ if (r < 0)
+ return r;
+
+ t = acl_to_any_text(dup, NULL, ',', TEXT_ABBREVIATE);
+ log_debug("Setting %s ACL %s on %s.",
+ type == ACL_TYPE_ACCESS ? "access" : "default",
+ strna(t), pretty);
+
+ r = acl_set_file(path, type, dup);
+ if (r < 0) {
+ if (ERRNO_IS_NOT_SUPPORTED(errno))
+ /* No error if filesystem doesn't support ACLs. Return negative. */
+ return -errno;
+ else
+ /* Return positive to indicate we already warned */
+ return -log_error_errno(errno,
+ "Setting %s ACL \"%s\" on %s failed: %m",
+ type == ACL_TYPE_ACCESS ? "access" : "default",
+ strna(t), pretty);
+ }
+ return 0;
+}
+#endif
+
+static int fd_set_acls(Item *item, int fd, const char *path, const struct stat *st) {
+ int r = 0;
+#if HAVE_ACL
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ struct stat stbuf;
+
+ assert(item);
+ assert(fd);
+ assert(path);
+
+ if (!st) {
+ if (fstat(fd, &stbuf) < 0)
+ return log_error_errno(errno, "fstat(%s) failed: %m", path);
+ st = &stbuf;
+ }
+
+ if (hardlink_vulnerable(st))
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Refusing to set ACLs on hardlinked file %s while the fs.protected_hardlinks sysctl is turned off.",
+ path);
+
+ if (S_ISLNK(st->st_mode)) {
+ log_debug("Skipping ACL fix for symlink %s.", path);
+ return 0;
+ }
+
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+
+ if (item->acl_access)
+ r = path_set_acl(procfs_path, path, ACL_TYPE_ACCESS, item->acl_access, item->append_or_force);
+
+ /* set only default acls to folders */
+ if (r == 0 && item->acl_default && S_ISDIR(st->st_mode))
+ r = path_set_acl(procfs_path, path, ACL_TYPE_DEFAULT, item->acl_default, item->append_or_force);
+
+ if (ERRNO_IS_NOT_SUPPORTED(r)) {
+ log_debug_errno(r, "ACLs not supported by file system at %s", path);
+ return 0;
+ }
+
+ if (r > 0)
+ return -r; /* already warned */
+
+ /* The above procfs paths don't work if /proc is not mounted. */
+ if (r == -ENOENT && proc_mounted() == 0)
+ r = -ENOSYS;
+
+ if (r < 0)
+ return log_error_errno(r, "ACL operation on \"%s\" failed: %m", path);
+#endif
+ return r;
+}
+
+static int path_set_acls(Item *item, const char *path) {
+ int r = 0;
+#if HAVE_ACL
+ _cleanup_close_ int fd = -1;
+
+ assert(item);
+ assert(path);
+
+ fd = path_open_safe(path);
+ if (fd < 0)
+ return fd;
+
+ r = fd_set_acls(item, fd, path, NULL);
+#endif
+ return r;
+}
+
+static int parse_attribute_from_arg(Item *item) {
+
+ static const struct {
+ char character;
+ unsigned value;
+ } attributes[] = {
+ { 'A', FS_NOATIME_FL }, /* do not update atime */
+ { 'S', FS_SYNC_FL }, /* Synchronous updates */
+ { 'D', FS_DIRSYNC_FL }, /* dirsync behaviour (directories only) */
+ { 'a', FS_APPEND_FL }, /* writes to file may only append */
+ { 'c', FS_COMPR_FL }, /* Compress file */
+ { 'd', FS_NODUMP_FL }, /* do not dump file */
+ { 'e', FS_EXTENT_FL }, /* Extents */
+ { 'i', FS_IMMUTABLE_FL }, /* Immutable file */
+ { 'j', FS_JOURNAL_DATA_FL }, /* Reserved for ext3 */
+ { 's', FS_SECRM_FL }, /* Secure deletion */
+ { 'u', FS_UNRM_FL }, /* Undelete */
+ { 't', FS_NOTAIL_FL }, /* file tail should not be merged */
+ { 'T', FS_TOPDIR_FL }, /* Top of directory hierarchies */
+ { 'C', FS_NOCOW_FL }, /* Do not cow file */
+ { 'P', FS_PROJINHERIT_FL }, /* Inherit the quota project ID */
+ };
+
+ enum {
+ MODE_ADD,
+ MODE_DEL,
+ MODE_SET
+ } mode = MODE_ADD;
+
+ unsigned value = 0, mask = 0;
+ const char *p;
+
+ assert(item);
+
+ p = item->argument;
+ if (p) {
+ if (*p == '+') {
+ mode = MODE_ADD;
+ p++;
+ } else if (*p == '-') {
+ mode = MODE_DEL;
+ p++;
+ } else if (*p == '=') {
+ mode = MODE_SET;
+ p++;
+ }
+ }
+
+ if (isempty(p) && mode != MODE_SET)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Setting file attribute on '%s' needs an attribute specification.",
+ item->path);
+
+ for (; p && *p ; p++) {
+ unsigned i, v;
+
+ for (i = 0; i < ELEMENTSOF(attributes); i++)
+ if (*p == attributes[i].character)
+ break;
+
+ if (i >= ELEMENTSOF(attributes))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown file attribute '%c' on '%s'.",
+ *p, item->path);
+
+ v = attributes[i].value;
+
+ SET_FLAG(value, v, IN_SET(mode, MODE_ADD, MODE_SET));
+
+ mask |= v;
+ }
+
+ if (mode == MODE_SET)
+ mask |= CHATTR_ALL_FL;
+
+ assert(mask != 0);
+
+ item->attribute_mask = mask;
+ item->attribute_value = value;
+ item->attribute_set = true;
+
+ return 0;
+}
+
+static int fd_set_attribute(Item *item, int fd, const char *path, const struct stat *st) {
+ _cleanup_close_ int procfs_fd = -1;
+ struct stat stbuf;
+ unsigned f;
+ int r;
+
+ assert(item);
+ assert(fd);
+ assert(path);
+
+ if (!item->attribute_set || item->attribute_mask == 0)
+ return 0;
+
+ if (!st) {
+ if (fstat(fd, &stbuf) < 0)
+ return log_error_errno(errno, "fstat(%s) failed: %m", path);
+ st = &stbuf;
+ }
+
+ /* Issuing the file attribute ioctls on device nodes is not
+ * safe, as that will be delivered to the drivers, not the
+ * file system containing the device node. */
+ if (!S_ISREG(st->st_mode) && !S_ISDIR(st->st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Setting file flags is only supported on regular files and directories, cannot set on '%s'.",
+ path);
+
+ f = item->attribute_value & item->attribute_mask;
+
+ /* Mask away directory-specific flags */
+ if (!S_ISDIR(st->st_mode))
+ f &= ~FS_DIRSYNC_FL;
+
+ procfs_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NOATIME);
+ if (procfs_fd < 0)
+ return log_error_errno(procfs_fd, "Failed to re-open '%s': %m", path);
+
+ r = chattr_fd(procfs_fd, f, item->attribute_mask, NULL);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOTTY, -EOPNOTSUPP) ? LOG_DEBUG : LOG_WARNING,
+ r,
+ "Cannot set file attribute for '%s', value=0x%08x, mask=0x%08x, ignoring: %m",
+ path, item->attribute_value, item->attribute_mask);
+
+ return 0;
+}
+
+static int path_set_attribute(Item *item, const char *path) {
+ _cleanup_close_ int fd = -1;
+
+ if (!item->attribute_set || item->attribute_mask == 0)
+ return 0;
+
+ fd = path_open_safe(path);
+ if (fd < 0)
+ return fd;
+
+ return fd_set_attribute(item, fd, path, NULL);
+}
+
+static int write_one_file(Item *i, const char *path) {
+ _cleanup_close_ int fd = -1, dir_fd = -1;
+ char *bn;
+ int r;
+
+ assert(i);
+ assert(path);
+ assert(i->argument);
+ assert(i->type == WRITE_FILE);
+
+ /* Validate the path and keep the fd on the directory for opening the
+ * file so we're sure that it can't be changed behind our back. */
+ dir_fd = path_open_parent_safe(path);
+ if (dir_fd < 0)
+ return dir_fd;
+
+ bn = basename(path);
+
+ /* Follows symlinks */
+ fd = openat(dir_fd, bn,
+ O_NONBLOCK|O_CLOEXEC|O_WRONLY|O_NOCTTY|(i->append_or_force ? O_APPEND : 0),
+ i->mode);
+ if (fd < 0) {
+ if (errno == ENOENT) {
+ log_debug_errno(errno, "Not writing missing file \"%s\": %m", path);
+ return 0;
+ }
+
+ if (i->allow_failure)
+ return log_debug_errno(errno, "Failed to open file \"%s\", ignoring: %m", path);
+
+ return log_error_errno(errno, "Failed to open file \"%s\": %m", path);
+ }
+
+ /* 'w' is allowed to write into any kind of files. */
+ log_debug("Writing to \"%s\".", path);
+
+ r = loop_write(fd, i->argument, strlen(i->argument), false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write file \"%s\": %m", path);
+
+ return fd_set_perms(i, fd, path, NULL);
+}
+
+static int create_file(Item *i, const char *path) {
+ _cleanup_close_ int fd = -1, dir_fd = -1;
+ struct stat stbuf, *st = NULL;
+ int r = 0;
+ char *bn;
+
+ assert(i);
+ assert(path);
+ assert(i->type == CREATE_FILE);
+
+ /* 'f' operates on regular files exclusively. */
+
+ /* Validate the path and keep the fd on the directory for opening the
+ * file so we're sure that it can't be changed behind our back. */
+ dir_fd = path_open_parent_safe(path);
+ if (dir_fd < 0)
+ return dir_fd;
+
+ bn = basename(path);
+
+ RUN_WITH_UMASK(0000) {
+ mac_selinux_create_file_prepare(path, S_IFREG);
+ fd = openat(dir_fd, bn, O_CREAT|O_EXCL|O_NOFOLLOW|O_NONBLOCK|O_CLOEXEC|O_WRONLY|O_NOCTTY, i->mode);
+ mac_selinux_create_file_clear();
+ }
+
+ if (fd < 0) {
+ /* Even on a read-only filesystem, open(2) returns EEXIST if the
+ * file already exists. It returns EROFS only if it needs to
+ * create the file. */
+ if (errno != EEXIST)
+ return log_error_errno(errno, "Failed to create file %s: %m", path);
+
+ /* Re-open the file. At that point it must exist since open(2)
+ * failed with EEXIST. We still need to check if the perms/mode
+ * need to be changed. For read-only filesystems, we let
+ * fd_set_perms() report the error if the perms need to be
+ * modified. */
+ fd = openat(dir_fd, bn, O_NOFOLLOW|O_CLOEXEC|O_PATH, i->mode);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to re-open file %s: %m", path);
+
+ if (fstat(fd, &stbuf) < 0)
+ return log_error_errno(errno, "stat(%s) failed: %m", path);
+
+ if (!S_ISREG(stbuf.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "%s exists and is not a regular file.",
+ path);
+
+ st = &stbuf;
+ } else {
+
+ log_debug("\"%s\" has been created.", path);
+
+ if (i->argument) {
+ log_debug("Writing to \"%s\".", path);
+
+ r = loop_write(fd, i->argument, strlen(i->argument), false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write file \"%s\": %m", path);
+ }
+ }
+
+ return fd_set_perms(i, fd, path, st);
+}
+
+static int truncate_file(Item *i, const char *path) {
+ _cleanup_close_ int fd = -1, dir_fd = -1;
+ struct stat stbuf, *st = NULL;
+ bool erofs = false;
+ int r = 0;
+ char *bn;
+
+ assert(i);
+ assert(path);
+ assert(i->type == TRUNCATE_FILE || (i->type == CREATE_FILE && i->append_or_force));
+
+ /* We want to operate on regular file exclusively especially since
+ * O_TRUNC is unspecified if the file is neither a regular file nor a
+ * fifo nor a terminal device. Therefore we first open the file and make
+ * sure it's a regular one before truncating it. */
+
+ /* Validate the path and keep the fd on the directory for opening the
+ * file so we're sure that it can't be changed behind our back. */
+ dir_fd = path_open_parent_safe(path);
+ if (dir_fd < 0)
+ return dir_fd;
+
+ bn = basename(path);
+
+ RUN_WITH_UMASK(0000) {
+ mac_selinux_create_file_prepare(path, S_IFREG);
+ fd = openat(dir_fd, bn, O_CREAT|O_NOFOLLOW|O_NONBLOCK|O_CLOEXEC|O_WRONLY|O_NOCTTY, i->mode);
+ mac_selinux_create_file_clear();
+ }
+
+ if (fd < 0) {
+ if (errno != EROFS)
+ return log_error_errno(errno, "Failed to open/create file %s: %m", path);
+
+ /* On a read-only filesystem, we don't want to fail if the
+ * target is already empty and the perms are set. So we still
+ * proceed with the sanity checks and let the remaining
+ * operations fail with EROFS if they try to modify the target
+ * file. */
+
+ fd = openat(dir_fd, bn, O_NOFOLLOW|O_CLOEXEC|O_PATH, i->mode);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ return log_error_errno(SYNTHETIC_ERRNO(EROFS),
+ "Cannot create file %s on a read-only file system.",
+ path);
+
+ return log_error_errno(errno, "Failed to re-open file %s: %m", path);
+ }
+
+ erofs = true;
+ }
+
+ if (fstat(fd, &stbuf) < 0)
+ return log_error_errno(errno, "stat(%s) failed: %m", path);
+
+ if (!S_ISREG(stbuf.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "%s exists and is not a regular file.",
+ path);
+
+ if (stbuf.st_size > 0) {
+ if (ftruncate(fd, 0) < 0) {
+ r = erofs ? -EROFS : -errno;
+ return log_error_errno(r, "Failed to truncate file %s: %m", path);
+ }
+ } else
+ st = &stbuf;
+
+ log_debug("\"%s\" has been created.", path);
+
+ if (i->argument) {
+ log_debug("Writing to \"%s\".", path);
+
+ r = loop_write(fd, i->argument, strlen(i->argument), false);
+ if (r < 0) {
+ r = erofs ? -EROFS : r;
+ return log_error_errno(r, "Failed to write file %s: %m", path);
+ }
+ }
+
+ return fd_set_perms(i, fd, path, st);
+}
+
+static int copy_files(Item *i) {
+ _cleanup_close_ int dfd = -1, fd = -1;
+ char *bn;
+ int r;
+
+ log_debug("Copying tree \"%s\" to \"%s\".", i->argument, i->path);
+
+ bn = basename(i->path);
+
+ /* Validate the path and use the returned directory fd for copying the
+ * target so we're sure that the path can't be changed behind our
+ * back. */
+ dfd = path_open_parent_safe(i->path);
+ if (dfd < 0)
+ return dfd;
+
+ r = copy_tree_at(AT_FDCWD, i->argument,
+ dfd, bn,
+ i->uid_set ? i->uid : UID_INVALID,
+ i->gid_set ? i->gid : GID_INVALID,
+ COPY_REFLINK | COPY_MERGE_EMPTY | COPY_MAC_CREATE | COPY_HARDLINKS);
+ if (r < 0) {
+ struct stat a, b;
+
+ /* If the target already exists on read-only filesystems, trying
+ * to create the target will not fail with EEXIST but with
+ * EROFS. */
+ if (r == -EROFS && faccessat(dfd, bn, F_OK, AT_SYMLINK_NOFOLLOW) == 0)
+ r = -EEXIST;
+
+ if (r != -EEXIST)
+ return log_error_errno(r, "Failed to copy files to %s: %m", i->path);
+
+ if (stat(i->argument, &a) < 0)
+ return log_error_errno(errno, "stat(%s) failed: %m", i->argument);
+
+ if (fstatat(dfd, bn, &b, AT_SYMLINK_NOFOLLOW) < 0)
+ return log_error_errno(errno, "stat(%s) failed: %m", i->path);
+
+ if ((a.st_mode ^ b.st_mode) & S_IFMT) {
+ log_debug("Can't copy to %s, file exists already and is of different type", i->path);
+ return 0;
+ }
+ }
+
+ fd = openat(dfd, bn, O_NOFOLLOW|O_CLOEXEC|O_PATH);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to openat(%s): %m", i->path);
+
+ return fd_set_perms(i, fd, i->path, NULL);
+}
+
+typedef enum {
+ CREATION_NORMAL,
+ CREATION_EXISTING,
+ CREATION_FORCE,
+ _CREATION_MODE_MAX,
+ _CREATION_MODE_INVALID = -1
+} CreationMode;
+
+static const char *const creation_mode_verb_table[_CREATION_MODE_MAX] = {
+ [CREATION_NORMAL] = "Created",
+ [CREATION_EXISTING] = "Found existing",
+ [CREATION_FORCE] = "Created replacement",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(creation_mode_verb, CreationMode);
+
+static int create_directory_or_subvolume(const char *path, mode_t mode, bool subvol, CreationMode *creation) {
+ _cleanup_close_ int pfd = -1;
+ CreationMode c;
+ int r;
+
+ assert(path);
+
+ if (!creation)
+ creation = &c;
+
+ pfd = path_open_parent_safe(path);
+ if (pfd < 0)
+ return pfd;
+
+ if (subvol) {
+ if (btrfs_is_subvol(empty_to_root(arg_root)) <= 0)
+
+ /* Don't create a subvolume unless the root directory is
+ * one, too. We do this under the assumption that if the
+ * root directory is just a plain directory (i.e. very
+ * light-weight), we shouldn't try to split it up into
+ * subvolumes (i.e. more heavy-weight). Thus, chroot()
+ * environments and suchlike will get a full brtfs
+ * subvolume set up below their tree only if they
+ * specifically set up a btrfs subvolume for the root
+ * dir too. */
+
+ subvol = false;
+ else {
+ RUN_WITH_UMASK((~mode) & 0777)
+ r = btrfs_subvol_make_fd(pfd, basename(path));
+ }
+ } else
+ r = 0;
+
+ if (!subvol || r == -ENOTTY)
+ RUN_WITH_UMASK(0000)
+ r = mkdirat_label(pfd, basename(path), mode);
+
+ if (r < 0) {
+ int k;
+
+ if (!IN_SET(r, -EEXIST, -EROFS))
+ return log_error_errno(r, "Failed to create directory or subvolume \"%s\": %m", path);
+
+ k = is_dir_fd(pfd);
+ if (k == -ENOENT && r == -EROFS)
+ return log_error_errno(r, "%s does not exist and cannot be created as the file system is read-only.", path);
+ if (k < 0)
+ return log_error_errno(k, "Failed to check if %s exists: %m", path);
+ if (!k) {
+ log_warning("\"%s\" already exists and is not a directory.", path);
+ return -EEXIST;
+ }
+
+ *creation = CREATION_EXISTING;
+ } else
+ *creation = CREATION_NORMAL;
+
+ log_debug("%s directory \"%s\".", creation_mode_verb_to_string(*creation), path);
+
+ r = openat(pfd, basename(path), O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to open directory '%s': %m", basename(path));
+
+ return r;
+}
+
+static int create_directory(Item *i, const char *path) {
+ _cleanup_close_ int fd = -1;
+
+ assert(i);
+ assert(IN_SET(i->type, CREATE_DIRECTORY, TRUNCATE_DIRECTORY));
+
+ fd = create_directory_or_subvolume(path, i->mode, false, NULL);
+ if (fd == -EEXIST)
+ return 0;
+ if (fd < 0)
+ return fd;
+
+ return fd_set_perms(i, fd, path, NULL);
+}
+
+static int create_subvolume(Item *i, const char *path) {
+ _cleanup_close_ int fd = -1;
+ CreationMode creation;
+ int r, q = 0;
+
+ assert(i);
+ assert(IN_SET(i->type, CREATE_SUBVOLUME, CREATE_SUBVOLUME_NEW_QUOTA, CREATE_SUBVOLUME_INHERIT_QUOTA));
+
+ fd = create_directory_or_subvolume(path, i->mode, true, &creation);
+ if (fd == -EEXIST)
+ return 0;
+ if (fd < 0)
+ return fd;
+
+ if (creation == CREATION_NORMAL &&
+ IN_SET(i->type, CREATE_SUBVOLUME_NEW_QUOTA, CREATE_SUBVOLUME_INHERIT_QUOTA)) {
+ r = btrfs_subvol_auto_qgroup_fd(fd, 0, i->type == CREATE_SUBVOLUME_NEW_QUOTA);
+ if (r == -ENOTTY)
+ log_debug_errno(r, "Couldn't adjust quota for subvolume \"%s\" (unsupported fs or dir not a subvolume): %m", i->path);
+ else if (r == -EROFS)
+ log_debug_errno(r, "Couldn't adjust quota for subvolume \"%s\" (fs is read-only).", i->path);
+ else if (r == -ENOTCONN)
+ log_debug_errno(r, "Couldn't adjust quota for subvolume \"%s\" (quota support is disabled).", i->path);
+ else if (r < 0)
+ q = log_error_errno(r, "Failed to adjust quota for subvolume \"%s\": %m", i->path);
+ else if (r > 0)
+ log_debug("Adjusted quota for subvolume \"%s\".", i->path);
+ else if (r == 0)
+ log_debug("Quota for subvolume \"%s\" already in place, no change made.", i->path);
+ }
+
+ r = fd_set_perms(i, fd, path, NULL);
+ if (q < 0) /* prefer the quota change error from above */
+ return q;
+
+ return r;
+}
+
+static int empty_directory(Item *i, const char *path) {
+ int r;
+
+ assert(i);
+ assert(i->type == EMPTY_DIRECTORY);
+
+ r = is_dir(path, false);
+ if (r == -ENOENT) {
+ /* Option "e" operates only on existing objects. Do not
+ * print errors about non-existent files or directories */
+ log_debug("Skipping missing directory: %s", path);
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "is_dir() failed on path %s: %m", path);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
+ "'%s' already exists and is not a directory.",
+ path);
+
+ return path_set_perms(i, path);
+}
+
+static int create_device(Item *i, mode_t file_type) {
+ _cleanup_close_ int dfd = -1, fd = -1;
+ CreationMode creation;
+ char *bn;
+ int r;
+
+ assert(i);
+ assert(IN_SET(file_type, S_IFBLK, S_IFCHR));
+
+ bn = basename(i->path);
+
+ /* Validate the path and use the returned directory fd for copying the
+ * target so we're sure that the path can't be changed behind our
+ * back. */
+ dfd = path_open_parent_safe(i->path);
+ if (dfd < 0)
+ return dfd;
+
+ RUN_WITH_UMASK(0000) {
+ mac_selinux_create_file_prepare(i->path, file_type);
+ r = mknodat(dfd, bn, i->mode | file_type, i->major_minor);
+ mac_selinux_create_file_clear();
+ }
+
+ if (r < 0) {
+ struct stat st;
+
+ if (errno == EPERM) {
+ log_debug("We lack permissions, possibly because of cgroup configuration; "
+ "skipping creation of device node %s.", i->path);
+ return 0;
+ }
+
+ if (errno != EEXIST)
+ return log_error_errno(errno, "Failed to create device node %s: %m", i->path);
+
+ if (fstatat(dfd, bn, &st, 0) < 0)
+ return log_error_errno(errno, "stat(%s) failed: %m", i->path);
+
+ if ((st.st_mode & S_IFMT) != file_type) {
+
+ if (i->append_or_force) {
+
+ RUN_WITH_UMASK(0000) {
+ mac_selinux_create_file_prepare(i->path, file_type);
+ /* FIXME: need to introduce mknodat_atomic() */
+ r = mknod_atomic(i->path, i->mode | file_type, i->major_minor);
+ mac_selinux_create_file_clear();
+ }
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to create device node \"%s\": %m", i->path);
+ creation = CREATION_FORCE;
+ } else {
+ log_debug("%s is not a device node.", i->path);
+ return 0;
+ }
+ } else
+ creation = CREATION_EXISTING;
+ } else
+ creation = CREATION_NORMAL;
+
+ log_debug("%s %s device node \"%s\" %u:%u.",
+ creation_mode_verb_to_string(creation),
+ i->type == CREATE_BLOCK_DEVICE ? "block" : "char",
+ i->path, major(i->mode), minor(i->mode));
+
+ fd = openat(dfd, bn, O_NOFOLLOW|O_CLOEXEC|O_PATH);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to openat(%s): %m", i->path);
+
+ return fd_set_perms(i, fd, i->path, NULL);
+}
+
+static int create_fifo(Item *i, const char *path) {
+ _cleanup_close_ int pfd = -1, fd = -1;
+ CreationMode creation;
+ struct stat st;
+ char *bn;
+ int r;
+
+ pfd = path_open_parent_safe(path);
+ if (pfd < 0)
+ return pfd;
+
+ bn = basename(path);
+
+ RUN_WITH_UMASK(0000) {
+ mac_selinux_create_file_prepare(path, S_IFIFO);
+ r = mkfifoat(pfd, bn, i->mode);
+ mac_selinux_create_file_clear();
+ }
+
+ if (r < 0) {
+ if (errno != EEXIST)
+ return log_error_errno(errno, "Failed to create fifo %s: %m", path);
+
+ if (fstatat(pfd, bn, &st, AT_SYMLINK_NOFOLLOW) < 0)
+ return log_error_errno(errno, "stat(%s) failed: %m", path);
+
+ if (!S_ISFIFO(st.st_mode)) {
+
+ if (i->append_or_force) {
+ RUN_WITH_UMASK(0000) {
+ mac_selinux_create_file_prepare(path, S_IFIFO);
+ r = mkfifoat_atomic(pfd, bn, i->mode);
+ mac_selinux_create_file_clear();
+ }
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to create fifo %s: %m", path);
+ creation = CREATION_FORCE;
+ } else {
+ log_warning("\"%s\" already exists and is not a fifo.", path);
+ return 0;
+ }
+ } else
+ creation = CREATION_EXISTING;
+ } else
+ creation = CREATION_NORMAL;
+
+ log_debug("%s fifo \"%s\".", creation_mode_verb_to_string(creation), path);
+
+ fd = openat(pfd, bn, O_NOFOLLOW|O_CLOEXEC|O_PATH);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to openat(%s): %m", path);
+
+ return fd_set_perms(i, fd, i->path, NULL);
+}
+
+typedef int (*action_t)(Item *i, const char *path);
+typedef int (*fdaction_t)(Item *i, int fd, const char *path, const struct stat *st);
+
+static int item_do(Item *i, int fd, const char *path, fdaction_t action) {
+ struct stat st;
+ int r = 0, q;
+
+ assert(i);
+ assert(path);
+ assert(fd >= 0);
+
+ if (fstat(fd, &st) < 0) {
+ r = log_error_errno(errno, "fstat() on file failed: %m");
+ goto finish;
+ }
+
+ /* This returns the first error we run into, but nevertheless
+ * tries to go on */
+ r = action(i, fd, path, &st);
+
+ if (S_ISDIR(st.st_mode)) {
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ /* The passed 'fd' was opened with O_PATH. We need to convert
+ * it into a 'regular' fd before reading the directory content. */
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+
+ d = opendir(procfs_path);
+ if (!d) {
+ log_error_errno(errno, "Failed to opendir() '%s': %m", procfs_path);
+ if (r == 0)
+ r = -errno;
+ goto finish;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, q = -errno; goto finish) {
+ int de_fd;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ de_fd = openat(fd, de->d_name, O_NOFOLLOW|O_CLOEXEC|O_PATH);
+ if (de_fd < 0)
+ q = log_error_errno(errno, "Failed to open() file '%s': %m", de->d_name);
+ else {
+ _cleanup_free_ char *de_path = NULL;
+
+ de_path = path_join(path, de->d_name);
+ if (!de_path)
+ q = log_oom();
+ else
+ /* Pass ownership of dirent fd over */
+ q = item_do(i, de_fd, de_path, action);
+ }
+
+ if (q < 0 && r == 0)
+ r = q;
+ }
+ }
+finish:
+ safe_close(fd);
+ return r;
+}
+
+static int glob_item(Item *i, action_t action) {
+ _cleanup_globfree_ glob_t g = {
+ .gl_opendir = (void *(*)(const char *)) opendir_nomod,
+ };
+ int r = 0, k;
+ char **fn;
+
+ k = safe_glob(i->path, GLOB_NOSORT|GLOB_BRACE, &g);
+ if (k < 0 && k != -ENOENT)
+ return log_error_errno(k, "glob(%s) failed: %m", i->path);
+
+ STRV_FOREACH(fn, g.gl_pathv) {
+ k = action(i, *fn);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int glob_item_recursively(Item *i, fdaction_t action) {
+ _cleanup_globfree_ glob_t g = {
+ .gl_opendir = (void *(*)(const char *)) opendir_nomod,
+ };
+ int r = 0, k;
+ char **fn;
+
+ k = safe_glob(i->path, GLOB_NOSORT|GLOB_BRACE, &g);
+ if (k < 0 && k != -ENOENT)
+ return log_error_errno(k, "glob(%s) failed: %m", i->path);
+
+ STRV_FOREACH(fn, g.gl_pathv) {
+ _cleanup_close_ int fd = -1;
+
+ /* Make sure we won't trigger/follow file object (such as
+ * device nodes, automounts, ...) pointed out by 'fn' with
+ * O_PATH. Note, when O_PATH is used, flags other than
+ * O_CLOEXEC, O_DIRECTORY, and O_NOFOLLOW are ignored. */
+
+ fd = open(*fn, O_CLOEXEC|O_NOFOLLOW|O_PATH);
+ if (fd < 0) {
+ log_error_errno(errno, "Opening '%s' failed: %m", *fn);
+ if (r == 0)
+ r = -errno;
+ continue;
+ }
+
+ k = item_do(i, fd, *fn, action);
+ if (k < 0 && r == 0)
+ r = k;
+
+ /* we passed fd ownership to the previous call */
+ fd = -1;
+ }
+
+ return r;
+}
+
+static int create_item(Item *i) {
+ CreationMode creation;
+ int r = 0;
+
+ assert(i);
+
+ log_debug("Running create action for entry %c %s", (char) i->type, i->path);
+
+ switch (i->type) {
+
+ case IGNORE_PATH:
+ case IGNORE_DIRECTORY_PATH:
+ case REMOVE_PATH:
+ case RECURSIVE_REMOVE_PATH:
+ return 0;
+
+ case TRUNCATE_FILE:
+ case CREATE_FILE:
+ RUN_WITH_UMASK(0000)
+ (void) mkdir_parents_label(i->path, 0755);
+
+ if ((i->type == CREATE_FILE && i->append_or_force) || i->type == TRUNCATE_FILE)
+ r = truncate_file(i, i->path);
+ else
+ r = create_file(i, i->path);
+
+ if (r < 0)
+ return r;
+ break;
+
+ case COPY_FILES:
+ RUN_WITH_UMASK(0000)
+ (void) mkdir_parents_label(i->path, 0755);
+
+ r = copy_files(i);
+ if (r < 0)
+ return r;
+ break;
+
+ case WRITE_FILE:
+ r = glob_item(i, write_one_file);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case CREATE_DIRECTORY:
+ case TRUNCATE_DIRECTORY:
+ RUN_WITH_UMASK(0000)
+ (void) mkdir_parents_label(i->path, 0755);
+
+ r = create_directory(i, i->path);
+ if (r < 0)
+ return r;
+ break;
+
+ case CREATE_SUBVOLUME:
+ case CREATE_SUBVOLUME_INHERIT_QUOTA:
+ case CREATE_SUBVOLUME_NEW_QUOTA:
+ RUN_WITH_UMASK(0000)
+ (void) mkdir_parents_label(i->path, 0755);
+
+ r = create_subvolume(i, i->path);
+ if (r < 0)
+ return r;
+ break;
+
+ case EMPTY_DIRECTORY:
+ r = glob_item(i, empty_directory);
+ if (r < 0)
+ return r;
+ break;
+
+ case CREATE_FIFO:
+ RUN_WITH_UMASK(0000)
+ (void) mkdir_parents_label(i->path, 0755);
+
+ r = create_fifo(i, i->path);
+ if (r < 0)
+ return r;
+ break;
+
+ case CREATE_SYMLINK: {
+ RUN_WITH_UMASK(0000)
+ (void) mkdir_parents_label(i->path, 0755);
+
+ mac_selinux_create_file_prepare(i->path, S_IFLNK);
+ r = symlink(i->argument, i->path);
+ mac_selinux_create_file_clear();
+
+ if (r < 0) {
+ _cleanup_free_ char *x = NULL;
+
+ if (errno != EEXIST)
+ return log_error_errno(errno, "symlink(%s, %s) failed: %m", i->argument, i->path);
+
+ r = readlink_malloc(i->path, &x);
+ if (r < 0 || !streq(i->argument, x)) {
+
+ if (i->append_or_force) {
+ mac_selinux_create_file_prepare(i->path, S_IFLNK);
+ r = symlink_atomic(i->argument, i->path);
+ mac_selinux_create_file_clear();
+
+ if (IN_SET(r, -EISDIR, -EEXIST, -ENOTEMPTY)) {
+ r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL);
+ if (r < 0)
+ return log_error_errno(r, "rm -fr %s failed: %m", i->path);
+
+ mac_selinux_create_file_prepare(i->path, S_IFLNK);
+ r = symlink(i->argument, i->path) < 0 ? -errno : 0;
+ mac_selinux_create_file_clear();
+ }
+ if (r < 0)
+ return log_error_errno(r, "symlink(%s, %s) failed: %m", i->argument, i->path);
+
+ creation = CREATION_FORCE;
+ } else {
+ log_debug("\"%s\" is not a symlink or does not point to the correct path.", i->path);
+ return 0;
+ }
+ } else
+ creation = CREATION_EXISTING;
+ } else
+
+ creation = CREATION_NORMAL;
+ log_debug("%s symlink \"%s\".", creation_mode_verb_to_string(creation), i->path);
+ break;
+ }
+
+ case CREATE_BLOCK_DEVICE:
+ case CREATE_CHAR_DEVICE:
+ if (have_effective_cap(CAP_MKNOD) == 0) {
+ /* In a container we lack CAP_MKNOD. We shouldn't attempt to create the device node in that
+ * case to avoid noise, and we don't support virtualized devices in containers anyway. */
+
+ log_debug("We lack CAP_MKNOD, skipping creation of device node %s.", i->path);
+ return 0;
+ }
+
+ RUN_WITH_UMASK(0000)
+ (void) mkdir_parents_label(i->path, 0755);
+
+ r = create_device(i, i->type == CREATE_BLOCK_DEVICE ? S_IFBLK : S_IFCHR);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case ADJUST_MODE:
+ case RELABEL_PATH:
+ r = glob_item(i, path_set_perms);
+ if (r < 0)
+ return r;
+ break;
+
+ case RECURSIVE_RELABEL_PATH:
+ r = glob_item_recursively(i, fd_set_perms);
+ if (r < 0)
+ return r;
+ break;
+
+ case SET_XATTR:
+ r = glob_item(i, path_set_xattrs);
+ if (r < 0)
+ return r;
+ break;
+
+ case RECURSIVE_SET_XATTR:
+ r = glob_item_recursively(i, fd_set_xattrs);
+ if (r < 0)
+ return r;
+ break;
+
+ case SET_ACL:
+ r = glob_item(i, path_set_acls);
+ if (r < 0)
+ return r;
+ break;
+
+ case RECURSIVE_SET_ACL:
+ r = glob_item_recursively(i, fd_set_acls);
+ if (r < 0)
+ return r;
+ break;
+
+ case SET_ATTRIBUTE:
+ r = glob_item(i, path_set_attribute);
+ if (r < 0)
+ return r;
+ break;
+
+ case RECURSIVE_SET_ATTRIBUTE:
+ r = glob_item_recursively(i, fd_set_attribute);
+ if (r < 0)
+ return r;
+ break;
+ }
+
+ return 0;
+}
+
+static int remove_item_instance(Item *i, const char *instance) {
+ int r;
+
+ assert(i);
+
+ switch (i->type) {
+
+ case REMOVE_PATH:
+ if (remove(instance) < 0 && errno != ENOENT)
+ return log_error_errno(errno, "rm(%s): %m", instance);
+
+ break;
+
+ case RECURSIVE_REMOVE_PATH:
+ /* FIXME: we probably should use dir_cleanup() here instead of rm_rf() so that 'x' is honoured. */
+ log_debug("rm -rf \"%s\"", instance);
+ r = rm_rf(instance, REMOVE_ROOT|REMOVE_SUBVOLUME|REMOVE_PHYSICAL);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "rm_rf(%s): %m", instance);
+
+ break;
+
+ default:
+ assert_not_reached("wut?");
+ }
+
+ return 0;
+}
+
+static int remove_item(Item *i) {
+ int r;
+
+ assert(i);
+
+ log_debug("Running remove action for entry %c %s", (char) i->type, i->path);
+
+ switch (i->type) {
+
+ case TRUNCATE_DIRECTORY:
+ /* FIXME: we probably should use dir_cleanup() here instead of rm_rf() so that 'x' is honoured. */
+ log_debug("rm -rf \"%s\"", i->path);
+ r = rm_rf(i->path, REMOVE_PHYSICAL);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "rm_rf(%s): %m", i->path);
+
+ return 0;
+
+ case REMOVE_PATH:
+ case RECURSIVE_REMOVE_PATH:
+ return glob_item(i, remove_item_instance);
+
+ default:
+ return 0;
+ }
+}
+
+static int clean_item_instance(Item *i, const char* instance) {
+ char timestamp[FORMAT_TIMESTAMP_MAX];
+ _cleanup_closedir_ DIR *d = NULL;
+ STRUCT_STATX_DEFINE(sx);
+ int mountpoint, r;
+ usec_t cutoff, n;
+
+ assert(i);
+
+ if (!i->age_set)
+ return 0;
+
+ n = now(CLOCK_REALTIME);
+ if (n < i->age)
+ return 0;
+
+ cutoff = n - i->age;
+
+ d = opendir_nomod(instance);
+ if (!d) {
+ if (IN_SET(errno, ENOENT, ENOTDIR)) {
+ log_debug_errno(errno, "Directory \"%s\": %m", instance);
+ return 0;
+ }
+
+ return log_error_errno(errno, "Failed to open directory %s: %m", instance);
+ }
+
+ r = statx_fallback(dirfd(d), "", AT_EMPTY_PATH, STATX_MODE|STATX_INO|STATX_ATIME|STATX_MTIME, &sx);
+ if (r < 0)
+ return log_error_errno(r, "statx(%s) failed: %m", instance);
+
+ if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT))
+ mountpoint = FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT);
+ else {
+ struct stat ps;
+
+ if (fstatat(dirfd(d), "..", &ps, AT_SYMLINK_NOFOLLOW) != 0)
+ return log_error_errno(errno, "stat(%s/..) failed: %m", i->path);
+
+ mountpoint =
+ sx.stx_dev_major != major(ps.st_dev) ||
+ sx.stx_dev_minor != minor(ps.st_dev) ||
+ sx.stx_ino != ps.st_ino;
+ }
+
+ log_debug("Cleanup threshold for %s \"%s\" is %s",
+ mountpoint ? "mount point" : "directory",
+ instance,
+ format_timestamp_style(timestamp, sizeof(timestamp), cutoff, TIMESTAMP_US));
+
+ return dir_cleanup(i, instance, d,
+ load_statx_timestamp_nsec(&sx.stx_atime),
+ load_statx_timestamp_nsec(&sx.stx_mtime),
+ cutoff * NSEC_PER_USEC,
+ sx.stx_dev_major, sx.stx_dev_minor, mountpoint,
+ MAX_DEPTH, i->keep_first_level);
+}
+
+static int clean_item(Item *i) {
+ assert(i);
+
+ log_debug("Running clean action for entry %c %s", (char) i->type, i->path);
+
+ switch (i->type) {
+ case CREATE_DIRECTORY:
+ case CREATE_SUBVOLUME:
+ case CREATE_SUBVOLUME_INHERIT_QUOTA:
+ case CREATE_SUBVOLUME_NEW_QUOTA:
+ case TRUNCATE_DIRECTORY:
+ case IGNORE_PATH:
+ case COPY_FILES:
+ clean_item_instance(i, i->path);
+ return 0;
+ case EMPTY_DIRECTORY:
+ case IGNORE_DIRECTORY_PATH:
+ return glob_item(i, clean_item_instance);
+ default:
+ return 0;
+ }
+}
+
+static int process_item(Item *i, OperationMask operation) {
+ OperationMask todo;
+ int r, q, p;
+
+ assert(i);
+
+ todo = operation & ~i->done;
+ if (todo == 0) /* Everything already done? */
+ return 0;
+
+ i->done |= operation;
+
+ r = chase_symlinks(i->path, arg_root, CHASE_NO_AUTOFS|CHASE_WARN, NULL, NULL);
+ if (r == -EREMOTE) {
+ log_notice_errno(r, "Skipping %s", i->path);
+ return 0;
+ }
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine whether '%s' is below autofs, ignoring: %m", i->path);
+
+ r = FLAGS_SET(operation, OPERATION_CREATE) ? create_item(i) : 0;
+ /* Failure can only be tolerated for create */
+ if (i->allow_failure)
+ r = 0;
+
+ q = FLAGS_SET(operation, OPERATION_REMOVE) ? remove_item(i) : 0;
+ p = FLAGS_SET(operation, OPERATION_CLEAN) ? clean_item(i) : 0;
+
+ return r < 0 ? r :
+ q < 0 ? q :
+ p;
+}
+
+static int process_item_array(ItemArray *array, OperationMask operation) {
+ int r = 0;
+ size_t n;
+
+ assert(array);
+
+ /* Create any parent first. */
+ if (FLAGS_SET(operation, OPERATION_CREATE) && array->parent)
+ r = process_item_array(array->parent, operation & OPERATION_CREATE);
+
+ /* Clean up all children first */
+ if ((operation & (OPERATION_REMOVE|OPERATION_CLEAN)) && !set_isempty(array->children)) {
+ ItemArray *c;
+
+ SET_FOREACH(c, array->children) {
+ int k;
+
+ k = process_item_array(c, operation & (OPERATION_REMOVE|OPERATION_CLEAN));
+ if (k < 0 && r == 0)
+ r = k;
+ }
+ }
+
+ for (n = 0; n < array->n_items; n++) {
+ int k;
+
+ k = process_item(array->items + n, operation);
+ if (k < 0 && r == 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static void item_free_contents(Item *i) {
+ assert(i);
+ free(i->path);
+ free(i->argument);
+ strv_free(i->xattrs);
+
+#if HAVE_ACL
+ acl_free(i->acl_access);
+ acl_free(i->acl_default);
+#endif
+}
+
+static ItemArray* item_array_free(ItemArray *a) {
+ size_t n;
+
+ if (!a)
+ return NULL;
+
+ for (n = 0; n < a->n_items; n++)
+ item_free_contents(a->items + n);
+
+ set_free(a->children);
+ free(a->items);
+ return mfree(a);
+}
+
+static int item_compare(const Item *a, const Item *b) {
+ /* Make sure that the ownership taking item is put first, so
+ * that we first create the node, and then can adjust it */
+
+ if (takes_ownership(a->type) && !takes_ownership(b->type))
+ return -1;
+ if (!takes_ownership(a->type) && takes_ownership(b->type))
+ return 1;
+
+ return CMP(a->type, b->type);
+}
+
+static bool item_compatible(Item *a, Item *b) {
+ assert(a);
+ assert(b);
+ assert(streq(a->path, b->path));
+
+ if (takes_ownership(a->type) && takes_ownership(b->type))
+ /* check if the items are the same */
+ return streq_ptr(a->argument, b->argument) &&
+
+ a->uid_set == b->uid_set &&
+ a->uid == b->uid &&
+
+ a->gid_set == b->gid_set &&
+ a->gid == b->gid &&
+
+ a->mode_set == b->mode_set &&
+ a->mode == b->mode &&
+
+ a->age_set == b->age_set &&
+ a->age == b->age &&
+
+ a->mask_perms == b->mask_perms &&
+
+ a->keep_first_level == b->keep_first_level &&
+
+ a->major_minor == b->major_minor;
+
+ return true;
+}
+
+static bool should_include_path(const char *path) {
+ char **prefix;
+
+ STRV_FOREACH(prefix, arg_exclude_prefixes)
+ if (path_startswith(path, *prefix)) {
+ log_debug("Entry \"%s\" matches exclude prefix \"%s\", skipping.",
+ path, *prefix);
+ return false;
+ }
+
+ STRV_FOREACH(prefix, arg_include_prefixes)
+ if (path_startswith(path, *prefix)) {
+ log_debug("Entry \"%s\" matches include prefix \"%s\".", path, *prefix);
+ return true;
+ }
+
+ /* no matches, so we should include this path only if we have no allow list at all */
+ if (strv_isempty(arg_include_prefixes))
+ return true;
+
+ log_debug("Entry \"%s\" does not match any include prefix, skipping.", path);
+ return false;
+}
+
+static int specifier_expansion_from_arg(Item *i) {
+ _cleanup_free_ char *unescaped = NULL, *resolved = NULL;
+ char **xattr;
+ int r;
+
+ assert(i);
+
+ if (!i->argument)
+ return 0;
+
+ switch (i->type) {
+ case COPY_FILES:
+ case CREATE_SYMLINK:
+ case CREATE_FILE:
+ case TRUNCATE_FILE:
+ case WRITE_FILE:
+ r = cunescape(i->argument, 0, &unescaped);
+ if (r < 0)
+ return log_error_errno(r, "Failed to unescape parameter to write: %s", i->argument);
+
+ r = specifier_printf(unescaped, specifier_table, NULL, &resolved);
+ if (r < 0)
+ return r;
+
+ free_and_replace(i->argument, resolved);
+ break;
+
+ case SET_XATTR:
+ case RECURSIVE_SET_XATTR:
+ STRV_FOREACH(xattr, i->xattrs) {
+ r = specifier_printf(*xattr, specifier_table, NULL, &resolved);
+ if (r < 0)
+ return r;
+
+ free_and_replace(*xattr, resolved);
+ }
+ break;
+
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int patch_var_run(const char *fname, unsigned line, char **path) {
+ const char *k;
+ char *n;
+
+ assert(path);
+ assert(*path);
+
+ /* Optionally rewrites lines referencing /var/run/, to use /run/ instead. Why bother? tmpfiles merges lines in
+ * some cases and detects conflicts in others. If files/directories are specified through two equivalent lines
+ * this is problematic as neither case will be detected. Ideally we'd detect these cases by resolving symlinks
+ * early, but that's precisely not what we can do here as this code very likely is running very early on, at a
+ * time where the paths in question are not available yet, or even more importantly, our own tmpfiles rules
+ * might create the paths that are intermediary to the listed paths. We can't really cover the generic case,
+ * but the least we can do is cover the specific case of /var/run vs. /run, as /var/run is a legacy name for
+ * /run only, and we explicitly document that and require that on systemd systems the former is a symlink to
+ * the latter. Moreover files below this path are by far the primary usecase for tmpfiles.d/. */
+
+ k = path_startswith(*path, "/var/run/");
+ if (isempty(k)) /* Don't complain about other paths than /var/run, and not about /var/run itself either. */
+ return 0;
+
+ n = path_join("/run", k);
+ if (!n)
+ return log_oom();
+
+ /* Also log about this briefly. We do so at LOG_NOTICE level, as we fixed up the situation automatically, hence
+ * there's no immediate need for action by the user. However, in the interest of making things less confusing
+ * to the user, let's still inform the user that these snippets should really be updated. */
+ log_syntax(NULL, LOG_NOTICE, fname, line, 0, "Line references path below legacy directory /var/run/, updating %s → %s; please update the tmpfiles.d/ drop-in file accordingly.", *path, n);
+
+ free_and_replace(*path, n);
+
+ return 0;
+}
+
+static int find_uid(const char *user, uid_t *ret_uid, Hashmap **cache) {
+ int r;
+
+ assert(user);
+ assert(ret_uid);
+
+ /* First: parse as numeric UID string */
+ r = parse_uid(user, ret_uid);
+ if (r >= 0)
+ return r;
+
+ /* Second: pass to NSS if we are running "online" */
+ if (!arg_root)
+ return get_user_creds(&user, ret_uid, NULL, NULL, NULL, 0);
+
+ /* Third, synthesize "root" unconditionally */
+ if (streq(user, "root")) {
+ *ret_uid = 0;
+ return 0;
+ }
+
+ /* Fourth: use fgetpwent() to read /etc/passwd directly, if we are "offline" */
+ return name_to_uid_offline(arg_root, user, ret_uid, cache);
+}
+
+static int find_gid(const char *group, gid_t *ret_gid, Hashmap **cache) {
+ int r;
+
+ assert(group);
+ assert(ret_gid);
+
+ /* First: parse as numeric GID string */
+ r = parse_gid(group, ret_gid);
+ if (r >= 0)
+ return r;
+
+ /* Second: pass to NSS if we are running "online" */
+ if (!arg_root)
+ return get_group_creds(&group, ret_gid, 0);
+
+ /* Third, synthesize "root" unconditionally */
+ if (streq(group, "root")) {
+ *ret_gid = 0;
+ return 0;
+ }
+
+ /* Fourth: use fgetgrent() to read /etc/group directly, if we are "offline" */
+ return name_to_gid_offline(arg_root, group, ret_gid, cache);
+}
+
+static int parse_line(
+ const char *fname,
+ unsigned line,
+ const char *buffer,
+ bool *invalid_config,
+ Hashmap **uid_cache,
+ Hashmap **gid_cache) {
+
+ _cleanup_free_ char *action = NULL, *mode = NULL, *user = NULL, *group = NULL, *age = NULL, *path = NULL;
+ _cleanup_(item_free_contents) Item i = {};
+ ItemArray *existing;
+ OrderedHashmap *h;
+ int r, pos;
+ bool append_or_force = false, boot = false, allow_failure = false;
+
+ assert(fname);
+ assert(line >= 1);
+ assert(buffer);
+
+ r = extract_many_words(
+ &buffer,
+ NULL,
+ EXTRACT_UNQUOTE,
+ &action,
+ &path,
+ &mode,
+ &user,
+ &group,
+ &age,
+ NULL);
+ if (r < 0) {
+ if (IN_SET(r, -EINVAL, -EBADSLT))
+ /* invalid quoting and such or an unknown specifier */
+ *invalid_config = true;
+ return log_syntax(NULL, LOG_ERR, fname, line, r, "Failed to parse line: %m");
+ } else if (r < 2) {
+ *invalid_config = true;
+ return log_syntax(NULL, LOG_ERR, fname, line, SYNTHETIC_ERRNO(EBADMSG), "Syntax error.");
+ }
+
+ if (!empty_or_dash(buffer)) {
+ i.argument = strdup(buffer);
+ if (!i.argument)
+ return log_oom();
+ }
+
+ if (isempty(action)) {
+ *invalid_config = true;
+ return log_syntax(NULL, LOG_ERR, fname, line, SYNTHETIC_ERRNO(EBADMSG), "Command too short '%s'.", action);
+ }
+
+ for (pos = 1; action[pos]; pos++) {
+ if (action[pos] == '!' && !boot)
+ boot = true;
+ else if (action[pos] == '+' && !append_or_force)
+ append_or_force = true;
+ else if (action[pos] == '-' && !allow_failure)
+ allow_failure = true;
+ else {
+ *invalid_config = true;
+ return log_syntax(NULL, LOG_ERR, fname, line, SYNTHETIC_ERRNO(EBADMSG), "Unknown modifiers in command '%s'", action);
+ }
+ }
+
+ if (boot && !arg_boot) {
+ log_syntax(NULL, LOG_DEBUG, fname, line, 0, "Ignoring entry %s \"%s\" because --boot is not specified.", action, path);
+ return 0;
+ }
+
+ i.type = action[0];
+ i.append_or_force = append_or_force;
+ i.allow_failure = allow_failure;
+
+ r = specifier_printf(path, specifier_table, NULL, &i.path);
+ if (r == -ENXIO)
+ return log_unresolvable_specifier(fname, line);
+ if (r < 0) {
+ if (IN_SET(r, -EINVAL, -EBADSLT))
+ *invalid_config = true;
+ return log_syntax(NULL, LOG_ERR, fname, line, r, "Failed to replace specifiers in '%s': %m", path);
+ }
+
+ r = patch_var_run(fname, line, &i.path);
+ if (r < 0)
+ return r;
+
+ switch (i.type) {
+
+ case CREATE_DIRECTORY:
+ case CREATE_SUBVOLUME:
+ case CREATE_SUBVOLUME_INHERIT_QUOTA:
+ case CREATE_SUBVOLUME_NEW_QUOTA:
+ case EMPTY_DIRECTORY:
+ case TRUNCATE_DIRECTORY:
+ case CREATE_FIFO:
+ case IGNORE_PATH:
+ case IGNORE_DIRECTORY_PATH:
+ case REMOVE_PATH:
+ case RECURSIVE_REMOVE_PATH:
+ case ADJUST_MODE:
+ case RELABEL_PATH:
+ case RECURSIVE_RELABEL_PATH:
+ if (i.argument)
+ log_syntax(NULL, LOG_WARNING, fname, line, 0, "%c lines don't take argument fields, ignoring.", i.type);
+
+ break;
+
+ case CREATE_FILE:
+ case TRUNCATE_FILE:
+ break;
+
+ case CREATE_SYMLINK:
+ if (!i.argument) {
+ i.argument = path_join("/usr/share/factory", i.path);
+ if (!i.argument)
+ return log_oom();
+ }
+ break;
+
+ case WRITE_FILE:
+ if (!i.argument) {
+ *invalid_config = true;
+ return log_syntax(NULL, LOG_ERR, fname, line, SYNTHETIC_ERRNO(EBADMSG), "Write file requires argument.");
+ }
+ break;
+
+ case COPY_FILES:
+ if (!i.argument) {
+ i.argument = path_join("/usr/share/factory", i.path);
+ if (!i.argument)
+ return log_oom();
+
+ } else if (!path_is_absolute(i.argument)) {
+ *invalid_config = true;
+ return log_syntax(NULL, LOG_ERR, fname, line, SYNTHETIC_ERRNO(EBADMSG), "Source path '%s' is not absolute.", i.argument);
+
+ }
+
+ if (!empty_or_root(arg_root)) {
+ char *p;
+
+ p = path_join(arg_root, i.argument);
+ if (!p)
+ return log_oom();
+ free_and_replace(i.argument, p);
+ }
+
+ path_simplify(i.argument, false);
+ break;
+
+ case CREATE_CHAR_DEVICE:
+ case CREATE_BLOCK_DEVICE:
+ if (!i.argument) {
+ *invalid_config = true;
+ return log_syntax(NULL, LOG_ERR, fname, line, SYNTHETIC_ERRNO(EBADMSG), "Device file requires argument.");
+ }
+
+ r = parse_dev(i.argument, &i.major_minor);
+ if (r < 0) {
+ *invalid_config = true;
+ return log_syntax(NULL, LOG_ERR, fname, line, r, "Can't parse device file major/minor '%s'.", i.argument);
+ }
+
+ break;
+
+ case SET_XATTR:
+ case RECURSIVE_SET_XATTR:
+ if (!i.argument) {
+ *invalid_config = true;
+ return log_syntax(NULL, LOG_ERR, fname, line, SYNTHETIC_ERRNO(EBADMSG),
+ "Set extended attribute requires argument.");
+ }
+ r = parse_xattrs_from_arg(&i);
+ if (r < 0)
+ return r;
+ break;
+
+ case SET_ACL:
+ case RECURSIVE_SET_ACL:
+ if (!i.argument) {
+ *invalid_config = true;
+ return log_syntax(NULL, LOG_ERR, fname, line, SYNTHETIC_ERRNO(EBADMSG),
+ "Set ACLs requires argument.");
+ }
+ r = parse_acls_from_arg(&i);
+ if (r < 0)
+ return r;
+ break;
+
+ case SET_ATTRIBUTE:
+ case RECURSIVE_SET_ATTRIBUTE:
+ if (!i.argument) {
+ *invalid_config = true;
+ return log_syntax(NULL, LOG_ERR, fname, line, SYNTHETIC_ERRNO(EBADMSG),
+ "Set file attribute requires argument.");
+ }
+ r = parse_attribute_from_arg(&i);
+ if (IN_SET(r, -EINVAL, -EBADSLT))
+ *invalid_config = true;
+ if (r < 0)
+ return r;
+ break;
+
+ default:
+ *invalid_config = true;
+ return log_syntax(NULL, LOG_ERR, fname, line, SYNTHETIC_ERRNO(EBADMSG),
+ "Unknown command type '%c'.", (char) i.type);
+ }
+
+ if (!path_is_absolute(i.path)) {
+ *invalid_config = true;
+ return log_syntax(NULL, LOG_ERR, fname, line, SYNTHETIC_ERRNO(EBADMSG),
+ "Path '%s' not absolute.", i.path);
+ }
+
+ path_simplify(i.path, false);
+
+ if (!should_include_path(i.path))
+ return 0;
+
+ r = specifier_expansion_from_arg(&i);
+ if (r == -ENXIO)
+ return log_unresolvable_specifier(fname, line);
+ if (r < 0) {
+ if (IN_SET(r, -EINVAL, -EBADSLT))
+ *invalid_config = true;
+ return log_syntax(NULL, LOG_ERR, fname, line, r, "Failed to substitute specifiers in argument: %m");
+ }
+
+ if (!empty_or_root(arg_root)) {
+ char *p;
+
+ p = path_join(arg_root, i.path);
+ if (!p)
+ return log_oom();
+ free_and_replace(i.path, p);
+ }
+
+ if (!empty_or_dash(user)) {
+ r = find_uid(user, &i.uid, uid_cache);
+ if (r < 0) {
+ *invalid_config = true;
+ return log_syntax(NULL, LOG_ERR, fname, line, r, "Failed to resolve user '%s': %m", user);
+ }
+
+ i.uid_set = true;
+ }
+
+ if (!empty_or_dash(group)) {
+ r = find_gid(group, &i.gid, gid_cache);
+ if (r < 0) {
+ *invalid_config = true;
+ return log_syntax(NULL, LOG_ERR, fname, line, r, "Failed to resolve group '%s'.", group);
+ }
+
+ i.gid_set = true;
+ }
+
+ if (!empty_or_dash(mode)) {
+ const char *mm = mode;
+ unsigned m;
+
+ if (*mm == '~') {
+ i.mask_perms = true;
+ mm++;
+ }
+
+ r = parse_mode(mm, &m);
+ if (r < 0) {
+ *invalid_config = true;
+ return log_syntax(NULL, LOG_ERR, fname, line, r, "Invalid mode '%s'.", mode);
+ }
+
+ i.mode = m;
+ i.mode_set = true;
+ } else
+ i.mode = IN_SET(i.type, CREATE_DIRECTORY, TRUNCATE_DIRECTORY, CREATE_SUBVOLUME, CREATE_SUBVOLUME_INHERIT_QUOTA, CREATE_SUBVOLUME_NEW_QUOTA) ? 0755 : 0644;
+
+ if (!empty_or_dash(age)) {
+ const char *a = age;
+
+ if (*a == '~') {
+ i.keep_first_level = true;
+ a++;
+ }
+
+ r = parse_sec(a, &i.age);
+ if (r < 0) {
+ *invalid_config = true;
+ return log_syntax(NULL, LOG_ERR, fname, line, r, "Invalid age '%s'.", age);
+ }
+
+ i.age_set = true;
+ }
+
+ h = needs_glob(i.type) ? globs : items;
+
+ existing = ordered_hashmap_get(h, i.path);
+ if (existing) {
+ size_t n;
+
+ for (n = 0; n < existing->n_items; n++) {
+ if (!item_compatible(existing->items + n, &i) && !i.append_or_force) {
+ log_syntax(NULL, LOG_NOTICE, fname, line, 0, "Duplicate line for path \"%s\", ignoring.", i.path);
+ return 0;
+ }
+ }
+ } else {
+ existing = new0(ItemArray, 1);
+ if (!existing)
+ return log_oom();
+
+ r = ordered_hashmap_put(h, i.path, existing);
+ if (r < 0) {
+ free(existing);
+ return log_oom();
+ }
+ }
+
+ if (!GREEDY_REALLOC(existing->items, existing->allocated, existing->n_items + 1))
+ return log_oom();
+
+ existing->items[existing->n_items++] = i;
+ i = (struct Item) {};
+
+ /* Sort item array, to enforce stable ordering of application */
+ typesafe_qsort(existing->items, existing->n_items, item_compare);
+
+ return 0;
+}
+
+static int cat_config(char **config_dirs, char **args) {
+ _cleanup_strv_free_ char **files = NULL;
+ int r;
+
+ r = conf_files_list_with_replacement(arg_root, config_dirs, arg_replace, &files, NULL);
+ if (r < 0)
+ return r;
+
+ return cat_files(NULL, files, 0);
+}
+
+static int exclude_default_prefixes(void) {
+ int r;
+
+ /* Provide an easy way to exclude virtual/memory file systems from what we do here. Useful in
+ * combination with --root= where we probably don't want to apply stuff to these dirs as they are
+ * likely over-mounted if the root directory is actually used, and it wouldbe less than ideal to have
+ * all kinds of files created/adjusted underneath these mount points. */
+
+ r = strv_extend_strv(
+ &arg_exclude_prefixes,
+ STRV_MAKE("/dev",
+ "/proc",
+ "/run",
+ "/sys"),
+ true);
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-tmpfiles", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] [CONFIGURATION FILE...]\n\n"
+ "Creates, deletes and cleans up volatile and temporary files and directories.\n\n"
+ " -h --help Show this help\n"
+ " --user Execute user configuration\n"
+ " --version Show package version\n"
+ " --cat-config Show configuration files\n"
+ " --create Create marked files/directories\n"
+ " --clean Clean up marked directories\n"
+ " --remove Remove marked files/directories\n"
+ " --boot Execute actions only safe at boot\n"
+ " --prefix=PATH Only apply rules with the specified prefix\n"
+ " --exclude-prefix=PATH Ignore rules with the specified prefix\n"
+ " -E Ignore rules prefixed with /dev, /proc, /run, /sys\n"
+ " --root=PATH Operate on an alternate filesystem root\n"
+ " --image=PATH Operate on disk image as filesystem root\n"
+ " --replace=PATH Treat arguments as replacement for PATH\n"
+ " --no-pager Do not pipe output into a pager\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_CAT_CONFIG,
+ ARG_USER,
+ ARG_CREATE,
+ ARG_CLEAN,
+ ARG_REMOVE,
+ ARG_BOOT,
+ ARG_PREFIX,
+ ARG_EXCLUDE_PREFIX,
+ ARG_ROOT,
+ ARG_IMAGE,
+ ARG_REPLACE,
+ ARG_NO_PAGER,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "user", no_argument, NULL, ARG_USER },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "cat-config", no_argument, NULL, ARG_CAT_CONFIG },
+ { "create", no_argument, NULL, ARG_CREATE },
+ { "clean", no_argument, NULL, ARG_CLEAN },
+ { "remove", no_argument, NULL, ARG_REMOVE },
+ { "boot", no_argument, NULL, ARG_BOOT },
+ { "prefix", required_argument, NULL, ARG_PREFIX },
+ { "exclude-prefix", required_argument, NULL, ARG_EXCLUDE_PREFIX },
+ { "root", required_argument, NULL, ARG_ROOT },
+ { "image", required_argument, NULL, ARG_IMAGE },
+ { "replace", required_argument, NULL, ARG_REPLACE },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "hE", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_CAT_CONFIG:
+ arg_cat_config = true;
+ break;
+
+ case ARG_USER:
+ arg_user = true;
+ break;
+
+ case ARG_CREATE:
+ arg_operation |= OPERATION_CREATE;
+ break;
+
+ case ARG_CLEAN:
+ arg_operation |= OPERATION_CLEAN;
+ break;
+
+ case ARG_REMOVE:
+ arg_operation |= OPERATION_REMOVE;
+ break;
+
+ case ARG_BOOT:
+ arg_boot = true;
+ break;
+
+ case ARG_PREFIX:
+ if (strv_push(&arg_include_prefixes, optarg) < 0)
+ return log_oom();
+ break;
+
+ case ARG_EXCLUDE_PREFIX:
+ if (strv_push(&arg_exclude_prefixes, optarg) < 0)
+ return log_oom();
+ break;
+
+ case ARG_ROOT:
+ r = parse_path_argument_and_warn(optarg, /* suppress_root= */ false, &arg_root);
+ if (r < 0)
+ return r;
+ break;
+
+ case ARG_IMAGE:
+#ifdef STANDALONE
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "This systemd-tmpfiles version is compiled without support for --image=.");
+#else
+ r = parse_path_argument_and_warn(optarg, /* suppress_root= */ false, &arg_image);
+ if (r < 0)
+ return r;
+#endif
+ /* Imply -E here since it makes little sense to create files persistently in the /run mountpoint of a disk image */
+ _fallthrough_;
+
+ case 'E':
+ r = exclude_default_prefixes();
+ if (r < 0)
+ return r;
+
+ break;
+
+ case ARG_REPLACE:
+ if (!path_is_absolute(optarg) ||
+ !endswith(optarg, ".conf"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "The argument to --replace= must an absolute path to a config file");
+
+ arg_replace = optarg;
+ break;
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (arg_operation == 0 && !arg_cat_config)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "You need to specify at least one of --clean, --create or --remove.");
+
+ if (arg_replace && arg_cat_config)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option --replace= is not supported with --cat-config");
+
+ if (arg_replace && optind >= argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "When --replace= is given, some configuration items must be specified");
+
+ if (arg_root && arg_user)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Combination of --user and --root= is not supported.");
+
+ if (arg_image && arg_root)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Please specify either --root= or --image=, the combination of both is not supported.");
+
+ return 1;
+}
+
+static int read_config_file(char **config_dirs, const char *fn, bool ignore_enoent, bool *invalid_config) {
+ _cleanup_(hashmap_freep) Hashmap *uid_cache = NULL, *gid_cache = NULL;
+ _cleanup_fclose_ FILE *_f = NULL;
+ unsigned v = 0;
+ FILE *f;
+ Item *i;
+ int r = 0;
+
+ assert(fn);
+
+ if (streq(fn, "-")) {
+ log_debug("Reading config from stdin…");
+ fn = "<stdin>";
+ f = stdin;
+ } else {
+ r = search_and_fopen(fn, "re", arg_root, (const char**) config_dirs, &_f);
+ if (r < 0) {
+ if (ignore_enoent && r == -ENOENT) {
+ log_debug_errno(r, "Failed to open \"%s\", ignoring: %m", fn);
+ return 0;
+ }
+
+ return log_error_errno(r, "Failed to open '%s': %m", fn);
+ }
+ log_debug("Reading config file \"%s\"…", fn);
+ f = _f;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ bool invalid_line = false;
+ char *l;
+ int k;
+
+ k = read_line(f, LONG_LINE_MAX, &line);
+ if (k < 0)
+ return log_error_errno(k, "Failed to read '%s': %m", fn);
+ if (k == 0)
+ break;
+
+ v++;
+
+ l = strstrip(line);
+ if (IN_SET(*l, 0, '#'))
+ continue;
+
+ k = parse_line(fn, v, l, &invalid_line, &uid_cache, &gid_cache);
+ if (k < 0) {
+ if (invalid_line)
+ /* Allow reporting with a special code if the caller requested this */
+ *invalid_config = true;
+ else if (r == 0)
+ /* The first error becomes our return value */
+ r = k;
+ }
+ }
+
+ /* we have to determine age parameter for each entry of type X */
+ ORDERED_HASHMAP_FOREACH(i, globs) {
+ Item *j, *candidate_item = NULL;
+
+ if (i->type != IGNORE_DIRECTORY_PATH)
+ continue;
+
+ ORDERED_HASHMAP_FOREACH(j, items) {
+ if (!IN_SET(j->type, CREATE_DIRECTORY, TRUNCATE_DIRECTORY, CREATE_SUBVOLUME, CREATE_SUBVOLUME_INHERIT_QUOTA, CREATE_SUBVOLUME_NEW_QUOTA))
+ continue;
+
+ if (path_equal(j->path, i->path)) {
+ candidate_item = j;
+ break;
+ }
+
+ if ((!candidate_item && path_startswith(i->path, j->path)) ||
+ (candidate_item && path_startswith(j->path, candidate_item->path) && (fnmatch(i->path, j->path, FNM_PATHNAME | FNM_PERIOD) == 0)))
+ candidate_item = j;
+ }
+
+ if (candidate_item && candidate_item->age_set) {
+ i->age = candidate_item->age;
+ i->age_set = true;
+ }
+ }
+
+ if (ferror(f)) {
+ log_error_errno(errno, "Failed to read from file %s: %m", fn);
+ if (r == 0)
+ r = -EIO;
+ }
+
+ return r;
+}
+
+static int parse_arguments(char **config_dirs, char **args, bool *invalid_config) {
+ char **arg;
+ int r;
+
+ STRV_FOREACH(arg, args) {
+ r = read_config_file(config_dirs, *arg, false, invalid_config);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int read_config_files(char **config_dirs, char **args, bool *invalid_config) {
+ _cleanup_strv_free_ char **files = NULL;
+ _cleanup_free_ char *p = NULL;
+ char **f;
+ int r;
+
+ r = conf_files_list_with_replacement(arg_root, config_dirs, arg_replace, &files, &p);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(f, files)
+ if (p && path_equal(*f, p)) {
+ log_debug("Parsing arguments at position \"%s\"…", *f);
+
+ r = parse_arguments(config_dirs, args, invalid_config);
+ if (r < 0)
+ return r;
+ } else
+ /* Just warn, ignore result otherwise.
+ * read_config_file() has some debug output, so no need to print anything. */
+ (void) read_config_file(config_dirs, *f, true, invalid_config);
+
+ return 0;
+}
+
+static int link_parent(ItemArray *a) {
+ const char *path;
+ char *prefix;
+ int r;
+
+ assert(a);
+
+ /* Finds the closest "parent" item array for the specified item array. Then registers the specified item array
+ * as child of it, and fills the parent in, linking them both ways. This allows us to later create parents
+ * before their children, and clean up/remove children before their parents. */
+
+ if (a->n_items <= 0)
+ return 0;
+
+ path = a->items[0].path;
+ prefix = newa(char, strlen(path) + 1);
+ PATH_FOREACH_PREFIX(prefix, path) {
+ ItemArray *j;
+
+ j = ordered_hashmap_get(items, prefix);
+ if (!j)
+ j = ordered_hashmap_get(globs, prefix);
+ if (j) {
+ r = set_ensure_put(&j->children, NULL, a);
+ if (r < 0)
+ return log_oom();
+
+ a->parent = j;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(item_array_hash_ops, char, string_hash_func, string_compare_func,
+ ItemArray, item_array_free);
+
+static int run(int argc, char *argv[]) {
+#ifndef STANDALONE
+ _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
+ _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL;
+ _cleanup_(umount_and_rmdir_and_freep) char *unlink_dir = NULL;
+#endif
+ _cleanup_strv_free_ char **config_dirs = NULL;
+ bool invalid_config = false;
+ ItemArray *a;
+ enum {
+ PHASE_REMOVE_AND_CLEAN,
+ PHASE_CREATE,
+ _PHASE_MAX
+ } phase;
+ int r, k;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ log_setup_service();
+
+ /* Descending down file system trees might take a lot of fds */
+ (void) rlimit_nofile_bump(HIGH_RLIMIT_NOFILE);
+
+ if (arg_user) {
+ r = user_config_paths(&config_dirs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize configuration directory list: %m");
+ } else {
+ config_dirs = strv_split_nulstr(CONF_PATHS_NULSTR("tmpfiles.d"));
+ if (!config_dirs)
+ return log_oom();
+ }
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *t = NULL;
+ char **i;
+
+ STRV_FOREACH(i, config_dirs) {
+ _cleanup_free_ char *j = NULL;
+
+ j = path_join(arg_root, *i);
+ if (!j)
+ return log_oom();
+
+ if (!strextend(&t, "\n\t", j, NULL))
+ return log_oom();
+ }
+
+ log_debug("Looking for configuration files in (higher priority first):%s", t);
+ }
+
+ if (arg_cat_config) {
+ (void) pager_open(arg_pager_flags);
+
+ return cat_config(config_dirs, argv + optind);
+ }
+
+ umask(0022);
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return r;
+
+#ifndef STANDALONE
+ if (arg_image) {
+ assert(!arg_root);
+
+ r = mount_image_privately_interactively(
+ arg_image,
+ DISSECT_IMAGE_REQUIRE_ROOT|DISSECT_IMAGE_VALIDATE_OS|DISSECT_IMAGE_RELAX_VAR_CHECK|DISSECT_IMAGE_FSCK,
+ &unlink_dir,
+ &loop_device,
+ &decrypted_image);
+ if (r < 0)
+ return r;
+
+ arg_root = strdup(unlink_dir);
+ if (!arg_root)
+ return log_oom();
+ }
+#else
+ assert(!arg_image);
+#endif
+
+ items = ordered_hashmap_new(&item_array_hash_ops);
+ globs = ordered_hashmap_new(&item_array_hash_ops);
+ if (!items || !globs)
+ return log_oom();
+
+ /* If command line arguments are specified along with --replace, read all
+ * configuration files and insert the positional arguments at the specified
+ * place. Otherwise, if command line arguments are specified, execute just
+ * them, and finally, without --replace= or any positional arguments, just
+ * read configuration and execute it.
+ */
+ if (arg_replace || optind >= argc)
+ r = read_config_files(config_dirs, argv + optind, &invalid_config);
+ else
+ r = parse_arguments(config_dirs, argv + optind, &invalid_config);
+ if (r < 0)
+ return r;
+
+ /* Let's now link up all child/parent relationships */
+ ORDERED_HASHMAP_FOREACH(a, items) {
+ r = link_parent(a);
+ if (r < 0)
+ return r;
+ }
+ ORDERED_HASHMAP_FOREACH(a, globs) {
+ r = link_parent(a);
+ if (r < 0)
+ return r;
+ }
+
+ /* If multiple operations are requested, let's first run the remove/clean operations, and only then the create
+ * operations. i.e. that we first clean out the platform we then build on. */
+ for (phase = 0; phase < _PHASE_MAX; phase++) {
+ OperationMask op;
+
+ if (phase == PHASE_REMOVE_AND_CLEAN)
+ op = arg_operation & (OPERATION_REMOVE|OPERATION_CLEAN);
+ else if (phase == PHASE_CREATE)
+ op = arg_operation & OPERATION_CREATE;
+ else
+ assert_not_reached("unexpected phase");
+
+ if (op == 0) /* Nothing requested in this phase */
+ continue;
+
+ /* The non-globbing ones usually create things, hence we apply them first */
+ ORDERED_HASHMAP_FOREACH(a, items) {
+ k = process_item_array(a, op);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ /* The globbing ones usually alter things, hence we apply them second. */
+ ORDERED_HASHMAP_FOREACH(a, globs) {
+ k = process_item_array(a, op);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+ }
+
+ if (ERRNO_IS_RESOURCE(r))
+ return r;
+ if (invalid_config)
+ return EX_DATAERR;
+ if (r < 0)
+ return EX_CANTCREAT;
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/tty-ask-password-agent/tty-ask-password-agent.c b/src/tty-ask-password-agent/tty-ask-password-agent.c
new file mode 100644
index 0000000..1940792
--- /dev/null
+++ b/src/tty-ask-password-agent/tty-ask-password-agent.c
@@ -0,0 +1,715 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2015 Werner Fink
+***/
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <sys/prctl.h>
+#include <sys/signalfd.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "ask-password-api.h"
+#include "conf-parser.h"
+#include "def.h"
+#include "dirent-util.h"
+#include "exit-status.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "io-util.h"
+#include "macro.h"
+#include "main-func.h"
+#include "memory-util.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "set.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "utmp-wtmp.h"
+
+static enum {
+ ACTION_LIST,
+ ACTION_QUERY,
+ ACTION_WATCH,
+ ACTION_WALL,
+} arg_action = ACTION_QUERY;
+
+static bool arg_plymouth = false;
+static bool arg_console = false;
+static const char *arg_device = NULL;
+
+static int send_passwords(const char *socket_name, char **passwords) {
+ _cleanup_(erase_and_freep) char *packet = NULL;
+ _cleanup_close_ int socket_fd = -1;
+ union sockaddr_union sa;
+ socklen_t sa_len;
+ size_t packet_length = 1;
+ char **p, *d;
+ ssize_t n;
+ int r;
+
+ assert(socket_name);
+
+ r = sockaddr_un_set_path(&sa.un, socket_name);
+ if (r < 0)
+ return r;
+ sa_len = r;
+
+ STRV_FOREACH(p, passwords)
+ packet_length += strlen(*p) + 1;
+
+ packet = new(char, packet_length);
+ if (!packet)
+ return -ENOMEM;
+
+ packet[0] = '+';
+
+ d = packet + 1;
+ STRV_FOREACH(p, passwords)
+ d = stpcpy(d, *p) + 1;
+
+ socket_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0);
+ if (socket_fd < 0)
+ return log_debug_errno(errno, "socket(): %m");
+
+ n = sendto(socket_fd, packet, packet_length, MSG_NOSIGNAL, &sa.sa, sa_len);
+ if (n < 0)
+ return log_debug_errno(errno, "sendto(): %m");
+
+ return (int) n;
+}
+
+static bool wall_tty_match(const char *path, void *userdata) {
+ _cleanup_free_ char *p = NULL;
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+
+ if (!path_is_absolute(path))
+ path = strjoina("/dev/", path);
+
+ if (lstat(path, &st) < 0) {
+ log_debug_errno(errno, "Failed to stat %s: %m", path);
+ return true;
+ }
+
+ if (!S_ISCHR(st.st_mode)) {
+ log_debug("%s is not a character device.", path);
+ return true;
+ }
+
+ /* We use named pipes to ensure that wall messages suggesting
+ * password entry are not printed over password prompts
+ * already shown. We use the fact here that opening a pipe in
+ * non-blocking mode for write-only will succeed only if
+ * there's some writer behind it. Using pipes has the
+ * advantage that the block will automatically go away if the
+ * process dies. */
+
+ if (asprintf(&p, "/run/systemd/ask-password-block/%u:%u", major(st.st_rdev), minor(st.st_rdev)) < 0) {
+ log_oom();
+ return true;
+ }
+
+ fd = open(p, O_WRONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (fd < 0) {
+ log_debug_errno(errno, "Failed to open the wall pipe: %m");
+ return 1;
+ }
+
+ /* What, we managed to open the pipe? Then this tty is filtered. */
+ return 0;
+}
+
+static int agent_ask_password_tty(
+ const char *message,
+ usec_t until,
+ AskPasswordFlags flags,
+ const char *flag_file,
+ char ***ret) {
+
+ int tty_fd = -1, r;
+
+ if (arg_console) {
+ const char *con = arg_device ?: "/dev/console";
+
+ tty_fd = acquire_terminal(con, ACQUIRE_TERMINAL_WAIT, USEC_INFINITY);
+ if (tty_fd < 0)
+ return log_error_errno(tty_fd, "Failed to acquire %s: %m", con);
+
+ r = reset_terminal_fd(tty_fd, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to reset terminal, ignoring: %m");
+
+ }
+
+ r = ask_password_tty(tty_fd, message, NULL, until, flags, flag_file, ret);
+
+ if (arg_console) {
+ tty_fd = safe_close(tty_fd);
+ release_terminal();
+ }
+
+ return r;
+}
+
+static int process_one_password_file(const char *filename) {
+ _cleanup_free_ char *socket_name = NULL, *message = NULL;
+ bool accept_cached = false, echo = false;
+ uint64_t not_after = 0;
+ unsigned pid = 0;
+
+ const ConfigTableItem items[] = {
+ { "Ask", "Socket", config_parse_string, 0, &socket_name },
+ { "Ask", "NotAfter", config_parse_uint64, 0, &not_after },
+ { "Ask", "Message", config_parse_string, 0, &message },
+ { "Ask", "PID", config_parse_unsigned, 0, &pid },
+ { "Ask", "AcceptCached", config_parse_bool, 0, &accept_cached },
+ { "Ask", "Echo", config_parse_bool, 0, &echo },
+ {}
+ };
+
+ int r;
+
+ assert(filename);
+
+ r = config_parse(NULL, filename, NULL,
+ NULL,
+ config_item_table_lookup, items,
+ CONFIG_PARSE_RELAXED|CONFIG_PARSE_WARN,
+ NULL,
+ NULL);
+ if (r < 0)
+ return r;
+
+ if (!socket_name)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid password file %s", filename);
+
+ if (not_after > 0 && now(CLOCK_MONOTONIC) > not_after)
+ return 0;
+
+ if (pid > 0 && !pid_is_alive(pid))
+ return 0;
+
+ switch (arg_action) {
+ case ACTION_LIST:
+ printf("'%s' (PID %u)\n", strna(message), pid);
+ return 0;
+
+ case ACTION_WALL: {
+ _cleanup_free_ char *wall = NULL;
+
+ if (asprintf(&wall,
+ "Password entry required for \'%s\' (PID %u).\r\n"
+ "Please enter password with the systemd-tty-ask-password-agent tool.",
+ strna(message),
+ pid) < 0)
+ return log_oom();
+
+ (void) utmp_wall(wall, NULL, NULL, wall_tty_match, NULL);
+ return 0;
+ }
+ case ACTION_QUERY:
+ case ACTION_WATCH: {
+ _cleanup_strv_free_erase_ char **passwords = NULL;
+ AskPasswordFlags flags = 0;
+
+ if (access(socket_name, W_OK) < 0) {
+ if (arg_action == ACTION_QUERY)
+ log_info("Not querying '%s' (PID %u), lacking privileges.", strna(message), pid);
+
+ return 0;
+ }
+
+ SET_FLAG(flags, ASK_PASSWORD_ACCEPT_CACHED, accept_cached);
+ SET_FLAG(flags, ASK_PASSWORD_CONSOLE_COLOR, arg_console);
+ SET_FLAG(flags, ASK_PASSWORD_ECHO, echo);
+
+ if (arg_plymouth)
+ r = ask_password_plymouth(message, not_after, flags, filename, &passwords);
+ else
+ r = agent_ask_password_tty(message, not_after, flags, filename, &passwords);
+ if (r < 0) {
+ /* If the query went away, that's OK */
+ if (IN_SET(r, -ETIME, -ENOENT))
+ return 0;
+
+ return log_error_errno(r, "Failed to query password: %m");
+ }
+
+ if (strv_isempty(passwords))
+ return -ECANCELED;
+
+ r = send_passwords(socket_name, passwords);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send: %m");
+ break;
+ }}
+
+ return 0;
+}
+
+static int wall_tty_block(void) {
+ _cleanup_free_ char *p = NULL;
+ dev_t devnr;
+ int fd, r;
+
+ r = get_ctty_devnr(0, &devnr);
+ if (r == -ENXIO) /* We have no controlling tty */
+ return -ENOTTY;
+ if (r < 0)
+ return log_error_errno(r, "Failed to get controlling TTY: %m");
+
+ if (asprintf(&p, "/run/systemd/ask-password-block/%u:%u", major(devnr), minor(devnr)) < 0)
+ return log_oom();
+
+ (void) mkdir_parents_label(p, 0700);
+ (void) mkfifo(p, 0600);
+
+ fd = open(p, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (fd < 0)
+ return log_debug_errno(errno, "Failed to open %s: %m", p);
+
+ return fd;
+}
+
+static int process_password_files(void) {
+ _cleanup_closedir_ DIR *d;
+ struct dirent *de;
+ int r = 0;
+
+ d = opendir("/run/systemd/ask-password");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open /run/systemd/ask-password: %m");
+ }
+
+ FOREACH_DIRENT(de, d, return log_error_errno(errno, "Failed to read directory: %m")) {
+ _cleanup_free_ char *p = NULL;
+ int q;
+
+ /* We only support /run on tmpfs, hence we can rely on
+ * d_type to be reliable */
+
+ if (de->d_type != DT_REG)
+ continue;
+
+ if (!startswith(de->d_name, "ask."))
+ continue;
+
+ p = path_join("/run/systemd/ask-password", de->d_name);
+ if (!p)
+ return log_oom();
+
+ q = process_one_password_file(p);
+ if (q < 0 && r == 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int process_and_watch_password_files(bool watch) {
+ enum {
+ FD_SIGNAL,
+ FD_INOTIFY,
+ _FD_MAX
+ };
+
+ _cleanup_close_ int notify = -1, signal_fd = -1, tty_block_fd = -1;
+ struct pollfd pollfd[_FD_MAX];
+ sigset_t mask;
+ int r;
+
+ tty_block_fd = wall_tty_block();
+
+ (void) mkdir_p_label("/run/systemd/ask-password", 0755);
+
+ assert_se(sigemptyset(&mask) >= 0);
+ assert_se(sigset_add_many(&mask, SIGTERM, -1) >= 0);
+ assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) >= 0);
+
+ if (watch) {
+ signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
+ if (signal_fd < 0)
+ return log_error_errno(errno, "Failed to allocate signal file descriptor: %m");
+
+ pollfd[FD_SIGNAL] = (struct pollfd) { .fd = signal_fd, .events = POLLIN };
+
+ notify = inotify_init1(IN_CLOEXEC);
+ if (notify < 0)
+ return log_error_errno(errno, "Failed to allocate directory watch: %m");
+
+ r = inotify_add_watch_and_warn(notify, "/run/systemd/ask-password", IN_CLOSE_WRITE|IN_MOVED_TO);
+ if (r < 0)
+ return r;
+
+ pollfd[FD_INOTIFY] = (struct pollfd) { .fd = notify, .events = POLLIN };
+ }
+
+ for (;;) {
+ int timeout = -1;
+
+ r = process_password_files();
+ if (r < 0) {
+ if (r == -ECANCELED)
+ /* Disable poll() timeout since at least one password has
+ * been skipped and therefore one file remains and is
+ * unlikely to trigger any events. */
+ timeout = 0;
+ else
+ /* FIXME: we should do something here since otherwise the service
+ * requesting the password won't notice the error and will wait
+ * indefinitely. */
+ log_error_errno(r, "Failed to process password: %m");
+ }
+
+ if (!watch)
+ break;
+
+ if (poll(pollfd, _FD_MAX, timeout) < 0) {
+ if (errno == EINTR)
+ continue;
+
+ return -errno;
+ }
+
+ if (pollfd[FD_SIGNAL].revents & POLLNVAL ||
+ pollfd[FD_INOTIFY].revents & POLLNVAL)
+ return -EBADF;
+
+ if (pollfd[FD_INOTIFY].revents != 0)
+ (void) flush_fd(notify);
+
+ if (pollfd[FD_SIGNAL].revents != 0)
+ break;
+ }
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-tty-ask-password-agent", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "Process system password requests.\n\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --list Show pending password requests\n"
+ " --query Process pending password requests\n"
+ " --watch Continuously process password requests\n"
+ " --wall Continuously forward password requests to wall\n"
+ " --plymouth Ask question with Plymouth instead of on TTY\n"
+ " --console Ask question on /dev/console instead of current TTY\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_LIST = 0x100,
+ ARG_QUERY,
+ ARG_WATCH,
+ ARG_WALL,
+ ARG_PLYMOUTH,
+ ARG_CONSOLE,
+ ARG_VERSION
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "list", no_argument, NULL, ARG_LIST },
+ { "query", no_argument, NULL, ARG_QUERY },
+ { "watch", no_argument, NULL, ARG_WATCH },
+ { "wall", no_argument, NULL, ARG_WALL },
+ { "plymouth", no_argument, NULL, ARG_PLYMOUTH },
+ { "console", optional_argument, NULL, ARG_CONSOLE },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+
+ case 'h':
+ return help();
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_LIST:
+ arg_action = ACTION_LIST;
+ break;
+
+ case ARG_QUERY:
+ arg_action = ACTION_QUERY;
+ break;
+
+ case ARG_WATCH:
+ arg_action = ACTION_WATCH;
+ break;
+
+ case ARG_WALL:
+ arg_action = ACTION_WALL;
+ break;
+
+ case ARG_PLYMOUTH:
+ arg_plymouth = true;
+ break;
+
+ case ARG_CONSOLE:
+ arg_console = true;
+ if (optarg) {
+
+ if (isempty(optarg))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Empty console device path is not allowed.");
+
+ arg_device = optarg;
+ }
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ if (optind != argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s takes no arguments.", program_invocation_short_name);
+
+ if (arg_plymouth || arg_console) {
+
+ if (!IN_SET(arg_action, ACTION_QUERY, ACTION_WATCH))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Options --query and --watch conflict.");
+
+ if (arg_plymouth && arg_console)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Options --plymouth and --console conflict.");
+ }
+
+ return 1;
+}
+
+/*
+ * To be able to ask on all terminal devices of /dev/console the devices are collected. If more than one
+ * device is found, then on each of the terminals a inquiring task is forked. Every task has its own session
+ * and its own controlling terminal. If one of the tasks does handle a password, the remaining tasks will be
+ * terminated.
+ */
+static int ask_on_this_console(const char *tty, pid_t *ret_pid, char **arguments) {
+ static const struct sigaction sigchld = {
+ .sa_handler = nop_signal_handler,
+ .sa_flags = SA_NOCLDSTOP | SA_RESTART,
+ };
+ static const struct sigaction sighup = {
+ .sa_handler = SIG_DFL,
+ .sa_flags = SA_RESTART,
+ };
+ int r;
+
+ assert_se(sigaction(SIGCHLD, &sigchld, NULL) >= 0);
+ assert_se(sigaction(SIGHUP, &sighup, NULL) >= 0);
+ assert_se(sigprocmask_many(SIG_UNBLOCK, NULL, SIGHUP, SIGCHLD, -1) >= 0);
+
+ r = safe_fork("(sd-passwd)", FORK_RESET_SIGNALS|FORK_LOG, ret_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ char **i;
+
+ assert_se(prctl(PR_SET_PDEATHSIG, SIGHUP) >= 0);
+
+ STRV_FOREACH(i, arguments) {
+ char *k;
+
+ if (!streq(*i, "--console"))
+ continue;
+
+ k = strjoin("--console=", tty);
+ if (!k) {
+ log_oom();
+ _exit(EXIT_FAILURE);
+ }
+
+ free_and_replace(*i, k);
+ }
+
+ execv(SYSTEMD_TTY_ASK_PASSWORD_AGENT_BINARY_PATH, arguments);
+ _exit(EXIT_FAILURE);
+ }
+
+ return 0;
+}
+
+static void terminate_agents(Set *pids) {
+ struct timespec ts;
+ siginfo_t status = {};
+ sigset_t set;
+ void *p;
+ int r, signum;
+
+ /*
+ * Request termination of the remaining processes as those
+ * are not required anymore.
+ */
+ SET_FOREACH(p, pids)
+ (void) kill(PTR_TO_PID(p), SIGTERM);
+
+ /*
+ * Collect the processes which have go away.
+ */
+ assert_se(sigemptyset(&set) >= 0);
+ assert_se(sigaddset(&set, SIGCHLD) >= 0);
+ timespec_store(&ts, 50 * USEC_PER_MSEC);
+
+ while (!set_isempty(pids)) {
+
+ zero(status);
+ r = waitid(P_ALL, 0, &status, WEXITED|WNOHANG);
+ if (r < 0 && errno == EINTR)
+ continue;
+
+ if (r == 0 && status.si_pid > 0) {
+ set_remove(pids, PID_TO_PTR(status.si_pid));
+ continue;
+ }
+
+ signum = sigtimedwait(&set, NULL, &ts);
+ if (signum < 0) {
+ if (errno != EAGAIN)
+ log_error_errno(errno, "sigtimedwait() failed: %m");
+ break;
+ }
+ assert(signum == SIGCHLD);
+ }
+
+ /*
+ * Kill hanging processes.
+ */
+ SET_FOREACH(p, pids) {
+ log_warning("Failed to terminate child %d, killing it", PTR_TO_PID(p));
+ (void) kill(PTR_TO_PID(p), SIGKILL);
+ }
+}
+
+static int ask_on_consoles(char *argv[]) {
+ _cleanup_set_free_ Set *pids = NULL;
+ _cleanup_strv_free_ char **consoles = NULL, **arguments = NULL;
+ siginfo_t status = {};
+ char **tty;
+ pid_t pid;
+ int r;
+
+ r = get_kernel_consoles(&consoles);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine devices of /dev/console: %m");
+
+ pids = set_new(NULL);
+ if (!pids)
+ return log_oom();
+
+ arguments = strv_copy(argv);
+ if (!arguments)
+ return log_oom();
+
+ /* Start an agent on each console. */
+ STRV_FOREACH(tty, consoles) {
+ r = ask_on_this_console(*tty, &pid, arguments);
+ if (r < 0)
+ return r;
+
+ if (set_put(pids, PID_TO_PTR(pid)) < 0)
+ return log_oom();
+ }
+
+ /* Wait for an agent to exit. */
+ for (;;) {
+ zero(status);
+
+ if (waitid(P_ALL, 0, &status, WEXITED) < 0) {
+ if (errno == EINTR)
+ continue;
+
+ return log_error_errno(errno, "waitid() failed: %m");
+ }
+
+ set_remove(pids, PID_TO_PTR(status.si_pid));
+ break;
+ }
+
+ if (!is_clean_exit(status.si_code, status.si_status, EXIT_CLEAN_DAEMON, NULL))
+ log_error("Password agent failed with: %d", status.si_status);
+
+ terminate_agents(pids);
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ log_setup_service();
+
+ umask(0022);
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (arg_console && !arg_device)
+ /*
+ * Spawn a separate process for each console device.
+ */
+ return ask_on_consoles(argv);
+
+ if (arg_device) {
+ /*
+ * Later on, a controlling terminal will be acquired,
+ * therefore the current process has to become a session
+ * leader and should not have a controlling terminal already.
+ */
+ (void) setsid();
+ (void) release_terminal();
+ }
+
+ return process_and_watch_password_files(!IN_SET(arg_action, ACTION_QUERY, ACTION_LIST));
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/udev/.vimrc b/src/udev/.vimrc
new file mode 100644
index 0000000..366fbdc
--- /dev/null
+++ b/src/udev/.vimrc
@@ -0,0 +1,4 @@
+" 'set exrc' in ~/.vimrc will read .vimrc from the current directory
+set tabstop=8
+set shiftwidth=8
+set expandtab
diff --git a/src/udev/ata_id/ata_id.c b/src/udev/ata_id/ata_id.c
new file mode 100644
index 0000000..ce0bf5d
--- /dev/null
+++ b/src/udev/ata_id/ata_id.c
@@ -0,0 +1,651 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * ata_id - reads product/serial number from ATA drives
+ *
+ * Copyright © 2009-2010 David Zeuthen <zeuthen@gmail.com>
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/bsg.h>
+#include <linux/hdreg.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_ioctl.h>
+#include <scsi/sg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "libudev-util.h"
+#include "log.h"
+#include "memory-util.h"
+#include "udev-util.h"
+
+#define COMMAND_TIMEOUT_MSEC (30 * 1000)
+
+static int disk_scsi_inquiry_command(
+ int fd,
+ void *buf,
+ size_t buf_len) {
+
+ uint8_t cdb[6] = {
+ /* INQUIRY, see SPC-4 section 6.4 */
+ [0] = 0x12, /* OPERATION CODE: INQUIRY */
+ [3] = (buf_len >> 8), /* ALLOCATION LENGTH */
+ [4] = (buf_len & 0xff),
+ };
+ uint8_t sense[32] = {};
+ struct sg_io_v4 io_v4 = {
+ .guard = 'Q',
+ .protocol = BSG_PROTOCOL_SCSI,
+ .subprotocol = BSG_SUB_PROTOCOL_SCSI_CMD,
+ .request_len = sizeof(cdb),
+ .request = (uintptr_t) cdb,
+ .max_response_len = sizeof(sense),
+ .response = (uintptr_t) sense,
+ .din_xfer_len = buf_len,
+ .din_xferp = (uintptr_t) buf,
+ .timeout = COMMAND_TIMEOUT_MSEC,
+ };
+ int ret;
+
+ ret = ioctl(fd, SG_IO, &io_v4);
+ if (ret != 0) {
+ /* could be that the driver doesn't do version 4, try version 3 */
+ if (errno == EINVAL) {
+ struct sg_io_hdr io_hdr = {
+ .interface_id = 'S',
+ .cmdp = (unsigned char*) cdb,
+ .cmd_len = sizeof (cdb),
+ .dxferp = buf,
+ .dxfer_len = buf_len,
+ .sbp = sense,
+ .mx_sb_len = sizeof(sense),
+ .dxfer_direction = SG_DXFER_FROM_DEV,
+ .timeout = COMMAND_TIMEOUT_MSEC,
+ };
+
+ ret = ioctl(fd, SG_IO, &io_hdr);
+ if (ret != 0)
+ return ret;
+
+ /* even if the ioctl succeeds, we need to check the return value */
+ if (!(io_hdr.status == 0 &&
+ io_hdr.host_status == 0 &&
+ io_hdr.driver_status == 0)) {
+ errno = EIO;
+ return -1;
+ }
+ } else
+ return ret;
+ }
+
+ /* even if the ioctl succeeds, we need to check the return value */
+ if (!(io_v4.device_status == 0 &&
+ io_v4.transport_status == 0 &&
+ io_v4.driver_status == 0)) {
+ errno = EIO;
+ return -1;
+ }
+
+ return 0;
+}
+
+static int disk_identify_command(
+ int fd,
+ void *buf,
+ size_t buf_len) {
+
+ uint8_t cdb[12] = {
+ /*
+ * ATA Pass-Through 12 byte command, as described in
+ *
+ * T10 04-262r8 ATA Command Pass-Through
+ *
+ * from http://www.t10.org/ftp/t10/document.04/04-262r8.pdf
+ */
+ [0] = 0xa1, /* OPERATION CODE: 12 byte pass through */
+ [1] = 4 << 1, /* PROTOCOL: PIO Data-in */
+ [2] = 0x2e, /* OFF_LINE=0, CK_COND=1, T_DIR=1, BYT_BLOK=1, T_LENGTH=2 */
+ [3] = 0, /* FEATURES */
+ [4] = 1, /* SECTORS */
+ [5] = 0, /* LBA LOW */
+ [6] = 0, /* LBA MID */
+ [7] = 0, /* LBA HIGH */
+ [8] = 0 & 0x4F, /* SELECT */
+ [9] = 0xEC, /* Command: ATA IDENTIFY DEVICE */
+ };
+ uint8_t sense[32] = {};
+ uint8_t *desc = sense + 8;
+ struct sg_io_v4 io_v4 = {
+ .guard = 'Q',
+ .protocol = BSG_PROTOCOL_SCSI,
+ .subprotocol = BSG_SUB_PROTOCOL_SCSI_CMD,
+ .request_len = sizeof(cdb),
+ .request = (uintptr_t) cdb,
+ .max_response_len = sizeof(sense),
+ .response = (uintptr_t) sense,
+ .din_xfer_len = buf_len,
+ .din_xferp = (uintptr_t) buf,
+ .timeout = COMMAND_TIMEOUT_MSEC,
+ };
+ int ret;
+
+ ret = ioctl(fd, SG_IO, &io_v4);
+ if (ret != 0) {
+ /* could be that the driver doesn't do version 4, try version 3 */
+ if (errno == EINVAL) {
+ struct sg_io_hdr io_hdr = {
+ .interface_id = 'S',
+ .cmdp = (unsigned char*) cdb,
+ .cmd_len = sizeof (cdb),
+ .dxferp = buf,
+ .dxfer_len = buf_len,
+ .sbp = sense,
+ .mx_sb_len = sizeof (sense),
+ .dxfer_direction = SG_DXFER_FROM_DEV,
+ .timeout = COMMAND_TIMEOUT_MSEC,
+ };
+
+ ret = ioctl(fd, SG_IO, &io_hdr);
+ if (ret != 0)
+ return ret;
+ } else
+ return ret;
+ }
+
+ if (!(sense[0] == 0x72 && desc[0] == 0x9 && desc[1] == 0x0c) &&
+ !(sense[0] == 0x70 && sense[12] == 0x00 && sense[13] == 0x1d)) {
+ errno = EIO;
+ return -1;
+ }
+
+ return 0;
+}
+
+static int disk_identify_packet_device_command(
+ int fd,
+ void *buf,
+ size_t buf_len) {
+
+ uint8_t cdb[16] = {
+ /*
+ * ATA Pass-Through 16 byte command, as described in
+ *
+ * T10 04-262r8 ATA Command Pass-Through
+ *
+ * from http://www.t10.org/ftp/t10/document.04/04-262r8.pdf
+ */
+ [0] = 0x85, /* OPERATION CODE: 16 byte pass through */
+ [1] = 4 << 1, /* PROTOCOL: PIO Data-in */
+ [2] = 0x2e, /* OFF_LINE=0, CK_COND=1, T_DIR=1, BYT_BLOK=1, T_LENGTH=2 */
+ [3] = 0, /* FEATURES */
+ [4] = 0, /* FEATURES */
+ [5] = 0, /* SECTORS */
+ [6] = 1, /* SECTORS */
+ [7] = 0, /* LBA LOW */
+ [8] = 0, /* LBA LOW */
+ [9] = 0, /* LBA MID */
+ [10] = 0, /* LBA MID */
+ [11] = 0, /* LBA HIGH */
+ [12] = 0, /* LBA HIGH */
+ [13] = 0, /* DEVICE */
+ [14] = 0xA1, /* Command: ATA IDENTIFY PACKET DEVICE */
+ [15] = 0, /* CONTROL */
+ };
+ uint8_t sense[32] = {};
+ uint8_t *desc = sense + 8;
+ struct sg_io_v4 io_v4 = {
+ .guard = 'Q',
+ .protocol = BSG_PROTOCOL_SCSI,
+ .subprotocol = BSG_SUB_PROTOCOL_SCSI_CMD,
+ .request_len = sizeof (cdb),
+ .request = (uintptr_t) cdb,
+ .max_response_len = sizeof (sense),
+ .response = (uintptr_t) sense,
+ .din_xfer_len = buf_len,
+ .din_xferp = (uintptr_t) buf,
+ .timeout = COMMAND_TIMEOUT_MSEC,
+ };
+ int ret;
+
+ ret = ioctl(fd, SG_IO, &io_v4);
+ if (ret != 0) {
+ /* could be that the driver doesn't do version 4, try version 3 */
+ if (errno == EINVAL) {
+ struct sg_io_hdr io_hdr = {
+ .interface_id = 'S',
+ .cmdp = (unsigned char*) cdb,
+ .cmd_len = sizeof (cdb),
+ .dxferp = buf,
+ .dxfer_len = buf_len,
+ .sbp = sense,
+ .mx_sb_len = sizeof (sense),
+ .dxfer_direction = SG_DXFER_FROM_DEV,
+ .timeout = COMMAND_TIMEOUT_MSEC,
+ };
+
+ ret = ioctl(fd, SG_IO, &io_hdr);
+ if (ret != 0)
+ return ret;
+ } else
+ return ret;
+ }
+
+ if (!(sense[0] == 0x72 && desc[0] == 0x9 && desc[1] == 0x0c)) {
+ errno = EIO;
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * disk_identify_get_string:
+ * @identify: A block of IDENTIFY data
+ * @offset_words: Offset of the string to get, in words.
+ * @dest: Destination buffer for the string.
+ * @dest_len: Length of destination buffer, in bytes.
+ *
+ * Copies the ATA string from @identify located at @offset_words into @dest.
+ */
+static void disk_identify_get_string(
+ uint8_t identify[512],
+ unsigned offset_words,
+ char *dest,
+ size_t dest_len) {
+
+ unsigned c1;
+ unsigned c2;
+
+ while (dest_len > 0) {
+ c1 = identify[offset_words * 2 + 1];
+ c2 = identify[offset_words * 2];
+ *dest = c1;
+ dest++;
+ *dest = c2;
+ dest++;
+ offset_words++;
+ dest_len -= 2;
+ }
+}
+
+static void disk_identify_fixup_string(
+ uint8_t identify[512],
+ unsigned offset_words,
+ size_t len) {
+ disk_identify_get_string(identify, offset_words,
+ (char *) identify + offset_words * 2, len);
+}
+
+static void disk_identify_fixup_uint16 (uint8_t identify[512], unsigned offset_words) {
+ uint16_t *p;
+
+ p = (uint16_t *) identify;
+ p[offset_words] = le16toh (p[offset_words]);
+}
+
+/**
+ * disk_identify:
+ * @fd: File descriptor for the block device.
+ * @out_identify: Return location for IDENTIFY data.
+ * @out_is_packet_device: Return location for whether returned data is from a IDENTIFY PACKET DEVICE.
+ *
+ * Sends the IDENTIFY DEVICE or IDENTIFY PACKET DEVICE command to the
+ * device represented by @fd. If successful, then the result will be
+ * copied into @out_identify and @out_is_packet_device.
+ *
+ * This routine is based on code from libatasmart, LGPL v2.1.
+ *
+ * Returns: 0 if the data was successfully obtained, otherwise
+ * non-zero with errno set.
+ */
+static int disk_identify(int fd,
+ uint8_t out_identify[512],
+ int *out_is_packet_device) {
+ int ret;
+ uint8_t inquiry_buf[36];
+ int peripheral_device_type;
+ int all_nul_bytes;
+ int n;
+ int is_packet_device = 0;
+
+ /* init results */
+ memzero(out_identify, 512);
+
+ /* If we were to use ATA PASS_THROUGH (12) on an ATAPI device
+ * we could accidentally blank media. This is because MMC's BLANK
+ * command has the same op-code (0x61).
+ *
+ * To prevent this from happening we bail out if the device
+ * isn't a Direct Access Block Device, e.g. SCSI type 0x00
+ * (CD/DVD devices are type 0x05). So we send a SCSI INQUIRY
+ * command first... libata is handling this via its SCSI
+ * emulation layer.
+ *
+ * This also ensures that we're actually dealing with a device
+ * that understands SCSI commands.
+ *
+ * (Yes, it is a bit perverse that we're tunneling the ATA
+ * command through SCSI and relying on the ATA driver
+ * emulating SCSI well-enough...)
+ *
+ * (See commit 160b069c25690bfb0c785994c7c3710289179107 for
+ * the original bug-fix and see http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=556635
+ * for the original bug-report.)
+ */
+ ret = disk_scsi_inquiry_command (fd, inquiry_buf, sizeof (inquiry_buf));
+ if (ret != 0)
+ goto out;
+
+ /* SPC-4, section 6.4.2: Standard INQUIRY data */
+ peripheral_device_type = inquiry_buf[0] & 0x1f;
+ if (peripheral_device_type == 0x05)
+ {
+ is_packet_device = 1;
+ ret = disk_identify_packet_device_command(fd, out_identify, 512);
+ goto check_nul_bytes;
+ }
+ if (!IN_SET(peripheral_device_type, 0x00, 0x14)) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* OK, now issue the IDENTIFY DEVICE command */
+ ret = disk_identify_command(fd, out_identify, 512);
+ if (ret != 0)
+ goto out;
+
+ check_nul_bytes:
+ /* Check if IDENTIFY data is all NUL bytes - if so, bail */
+ all_nul_bytes = 1;
+ for (n = 0; n < 512; n++) {
+ if (out_identify[n] != '\0') {
+ all_nul_bytes = 0;
+ break;
+ }
+ }
+
+ if (all_nul_bytes) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+out:
+ if (out_is_packet_device)
+ *out_is_packet_device = is_packet_device;
+ return ret;
+}
+
+int main(int argc, char *argv[]) {
+ struct hd_driveid id;
+ union {
+ uint8_t byte[512];
+ uint16_t wyde[256];
+ } identify;
+ char model[41];
+ char model_enc[256];
+ char serial[21];
+ char revision[9];
+ const char *node = NULL;
+ int export = 0;
+ _cleanup_close_ int fd = -1;
+ uint16_t word;
+ int is_packet_device = 0;
+ static const struct option options[] = {
+ { "export", no_argument, NULL, 'x' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+
+ log_set_target(LOG_TARGET_AUTO);
+ udev_parse_config();
+ log_parse_environment();
+ log_open();
+
+ for (;;) {
+ int option;
+
+ option = getopt_long(argc, argv, "xh", options, NULL);
+ if (option == -1)
+ break;
+
+ switch (option) {
+ case 'x':
+ export = 1;
+ break;
+ case 'h':
+ printf("Usage: %s [--export] [--help] <device>\n"
+ " -x,--export print values as environment keys\n"
+ " -h,--help print this help text\n\n",
+ program_invocation_short_name);
+ return 0;
+ }
+ }
+
+ node = argv[optind];
+ if (!node) {
+ log_error("no node specified");
+ return 1;
+ }
+
+ fd = open(node, O_RDONLY|O_NONBLOCK|O_CLOEXEC);
+ if (fd < 0) {
+ log_error("unable to open '%s'", node);
+ return 1;
+ }
+
+ if (disk_identify(fd, identify.byte, &is_packet_device) == 0) {
+ /*
+ * fix up only the fields from the IDENTIFY data that we are going to
+ * use and copy it into the hd_driveid struct for convenience
+ */
+ disk_identify_fixup_string(identify.byte, 10, 20); /* serial */
+ disk_identify_fixup_string(identify.byte, 23, 8); /* fwrev */
+ disk_identify_fixup_string(identify.byte, 27, 40); /* model */
+ disk_identify_fixup_uint16(identify.byte, 0); /* configuration */
+ disk_identify_fixup_uint16(identify.byte, 75); /* queue depth */
+ disk_identify_fixup_uint16(identify.byte, 76); /* SATA capabilities */
+ disk_identify_fixup_uint16(identify.byte, 82); /* command set supported */
+ disk_identify_fixup_uint16(identify.byte, 83); /* command set supported */
+ disk_identify_fixup_uint16(identify.byte, 84); /* command set supported */
+ disk_identify_fixup_uint16(identify.byte, 85); /* command set supported */
+ disk_identify_fixup_uint16(identify.byte, 86); /* command set supported */
+ disk_identify_fixup_uint16(identify.byte, 87); /* command set supported */
+ disk_identify_fixup_uint16(identify.byte, 89); /* time required for SECURITY ERASE UNIT */
+ disk_identify_fixup_uint16(identify.byte, 90); /* time required for enhanced SECURITY ERASE UNIT */
+ disk_identify_fixup_uint16(identify.byte, 91); /* current APM values */
+ disk_identify_fixup_uint16(identify.byte, 94); /* current AAM value */
+ disk_identify_fixup_uint16(identify.byte, 108); /* WWN */
+ disk_identify_fixup_uint16(identify.byte, 109); /* WWN */
+ disk_identify_fixup_uint16(identify.byte, 110); /* WWN */
+ disk_identify_fixup_uint16(identify.byte, 111); /* WWN */
+ disk_identify_fixup_uint16(identify.byte, 128); /* device lock function */
+ disk_identify_fixup_uint16(identify.byte, 217); /* nominal media rotation rate */
+ memcpy(&id, identify.byte, sizeof id);
+ } else {
+ /* If this fails, then try HDIO_GET_IDENTITY */
+ if (ioctl(fd, HDIO_GET_IDENTITY, &id) != 0) {
+ log_debug_errno(errno, "HDIO_GET_IDENTITY failed for '%s': %m", node);
+ return 2;
+ }
+ }
+
+ memcpy(model, id.model, 40);
+ model[40] = '\0';
+ udev_util_encode_string(model, model_enc, sizeof(model_enc));
+ util_replace_whitespace((char *) id.model, model, 40);
+ util_replace_chars(model, NULL);
+ util_replace_whitespace((char *) id.serial_no, serial, 20);
+ util_replace_chars(serial, NULL);
+ util_replace_whitespace((char *) id.fw_rev, revision, 8);
+ util_replace_chars(revision, NULL);
+
+ if (export) {
+ /* Set this to convey the disk speaks the ATA protocol */
+ printf("ID_ATA=1\n");
+
+ if ((id.config >> 8) & 0x80) {
+ /* This is an ATAPI device */
+ switch ((id.config >> 8) & 0x1f) {
+ case 0:
+ printf("ID_TYPE=cd\n");
+ break;
+ case 1:
+ printf("ID_TYPE=tape\n");
+ break;
+ case 5:
+ printf("ID_TYPE=cd\n");
+ break;
+ case 7:
+ printf("ID_TYPE=optical\n");
+ break;
+ default:
+ printf("ID_TYPE=generic\n");
+ break;
+ }
+ } else
+ printf("ID_TYPE=disk\n");
+ printf("ID_BUS=ata\n");
+ printf("ID_MODEL=%s\n", model);
+ printf("ID_MODEL_ENC=%s\n", model_enc);
+ printf("ID_REVISION=%s\n", revision);
+ if (serial[0] != '\0') {
+ printf("ID_SERIAL=%s_%s\n", model, serial);
+ printf("ID_SERIAL_SHORT=%s\n", serial);
+ } else
+ printf("ID_SERIAL=%s\n", model);
+
+ if (id.command_set_1 & (1<<5)) {
+ printf("ID_ATA_WRITE_CACHE=1\n");
+ printf("ID_ATA_WRITE_CACHE_ENABLED=%d\n", (id.cfs_enable_1 & (1<<5)) ? 1 : 0);
+ }
+ if (id.command_set_1 & (1<<10)) {
+ printf("ID_ATA_FEATURE_SET_HPA=1\n");
+ printf("ID_ATA_FEATURE_SET_HPA_ENABLED=%d\n", (id.cfs_enable_1 & (1<<10)) ? 1 : 0);
+
+ /*
+ * TODO: use the READ NATIVE MAX ADDRESS command to get the native max address
+ * so it is easy to check whether the protected area is in use.
+ */
+ }
+ if (id.command_set_1 & (1<<3)) {
+ printf("ID_ATA_FEATURE_SET_PM=1\n");
+ printf("ID_ATA_FEATURE_SET_PM_ENABLED=%d\n", (id.cfs_enable_1 & (1<<3)) ? 1 : 0);
+ }
+ if (id.command_set_1 & (1<<1)) {
+ printf("ID_ATA_FEATURE_SET_SECURITY=1\n");
+ printf("ID_ATA_FEATURE_SET_SECURITY_ENABLED=%d\n", (id.cfs_enable_1 & (1<<1)) ? 1 : 0);
+ printf("ID_ATA_FEATURE_SET_SECURITY_ERASE_UNIT_MIN=%d\n", id.trseuc * 2);
+ if ((id.cfs_enable_1 & (1<<1))) /* enabled */ {
+ if (id.dlf & (1<<8))
+ printf("ID_ATA_FEATURE_SET_SECURITY_LEVEL=maximum\n");
+ else
+ printf("ID_ATA_FEATURE_SET_SECURITY_LEVEL=high\n");
+ }
+ if (id.dlf & (1<<5))
+ printf("ID_ATA_FEATURE_SET_SECURITY_ENHANCED_ERASE_UNIT_MIN=%d\n", id.trsEuc * 2);
+ if (id.dlf & (1<<4))
+ printf("ID_ATA_FEATURE_SET_SECURITY_EXPIRE=1\n");
+ if (id.dlf & (1<<3))
+ printf("ID_ATA_FEATURE_SET_SECURITY_FROZEN=1\n");
+ if (id.dlf & (1<<2))
+ printf("ID_ATA_FEATURE_SET_SECURITY_LOCKED=1\n");
+ }
+ if (id.command_set_1 & (1<<0)) {
+ printf("ID_ATA_FEATURE_SET_SMART=1\n");
+ printf("ID_ATA_FEATURE_SET_SMART_ENABLED=%d\n", (id.cfs_enable_1 & (1<<0)) ? 1 : 0);
+ }
+ if (id.command_set_2 & (1<<9)) {
+ printf("ID_ATA_FEATURE_SET_AAM=1\n");
+ printf("ID_ATA_FEATURE_SET_AAM_ENABLED=%d\n", (id.cfs_enable_2 & (1<<9)) ? 1 : 0);
+ printf("ID_ATA_FEATURE_SET_AAM_VENDOR_RECOMMENDED_VALUE=%d\n", id.acoustic >> 8);
+ printf("ID_ATA_FEATURE_SET_AAM_CURRENT_VALUE=%d\n", id.acoustic & 0xff);
+ }
+ if (id.command_set_2 & (1<<5)) {
+ printf("ID_ATA_FEATURE_SET_PUIS=1\n");
+ printf("ID_ATA_FEATURE_SET_PUIS_ENABLED=%d\n", (id.cfs_enable_2 & (1<<5)) ? 1 : 0);
+ }
+ if (id.command_set_2 & (1<<3)) {
+ printf("ID_ATA_FEATURE_SET_APM=1\n");
+ printf("ID_ATA_FEATURE_SET_APM_ENABLED=%d\n", (id.cfs_enable_2 & (1<<3)) ? 1 : 0);
+ if ((id.cfs_enable_2 & (1<<3)))
+ printf("ID_ATA_FEATURE_SET_APM_CURRENT_VALUE=%d\n", id.CurAPMvalues & 0xff);
+ }
+ if (id.command_set_2 & (1<<0))
+ printf("ID_ATA_DOWNLOAD_MICROCODE=1\n");
+
+ /*
+ * Word 76 indicates the capabilities of a SATA device. A PATA device shall set
+ * word 76 to 0000h or FFFFh. If word 76 is set to 0000h or FFFFh, then
+ * the device does not claim compliance with the Serial ATA specification and words
+ * 76 through 79 are not valid and shall be ignored.
+ */
+
+ word = identify.wyde[76];
+ if (!IN_SET(word, 0x0000, 0xffff)) {
+ printf("ID_ATA_SATA=1\n");
+ /*
+ * If bit 2 of word 76 is set to one, then the device supports the Gen2
+ * signaling rate of 3.0 Gb/s (see SATA 2.6).
+ *
+ * If bit 1 of word 76 is set to one, then the device supports the Gen1
+ * signaling rate of 1.5 Gb/s (see SATA 2.6).
+ */
+ if (word & (1<<2))
+ printf("ID_ATA_SATA_SIGNAL_RATE_GEN2=1\n");
+ if (word & (1<<1))
+ printf("ID_ATA_SATA_SIGNAL_RATE_GEN1=1\n");
+ }
+
+ /* Word 217 indicates the nominal media rotation rate of the device */
+ word = identify.wyde[217];
+ if (word == 0x0001)
+ printf ("ID_ATA_ROTATION_RATE_RPM=0\n"); /* non-rotating e.g. SSD */
+ else if (word >= 0x0401 && word <= 0xfffe)
+ printf ("ID_ATA_ROTATION_RATE_RPM=%d\n", word);
+
+ /*
+ * Words 108-111 contain a mandatory World Wide Name (WWN) in the NAA IEEE Registered identifier
+ * format. Word 108 bits (15:12) shall contain 5h, indicating that the naming authority is IEEE.
+ * All other values are reserved.
+ */
+ word = identify.wyde[108];
+ if ((word & 0xf000) == 0x5000) {
+ uint64_t wwwn;
+
+ wwwn = identify.wyde[108];
+ wwwn <<= 16;
+ wwwn |= identify.wyde[109];
+ wwwn <<= 16;
+ wwwn |= identify.wyde[110];
+ wwwn <<= 16;
+ wwwn |= identify.wyde[111];
+ printf("ID_WWN=0x%1$" PRIx64 "\n"
+ "ID_WWN_WITH_EXTENSION=0x%1$" PRIx64 "\n",
+ wwwn);
+ }
+
+ /* from Linux's include/linux/ata.h */
+ if (IN_SET(identify.wyde[0], 0x848a, 0x844a) ||
+ (identify.wyde[83] & 0xc004) == 0x4004)
+ printf("ID_ATA_CFA=1\n");
+ } else {
+ if (serial[0] != '\0')
+ printf("%s_%s\n", model, serial);
+ else
+ printf("%s\n", model);
+ }
+
+ return 0;
+}
diff --git a/src/udev/cdrom_id/cdrom_id.c b/src/udev/cdrom_id/cdrom_id.c
new file mode 100644
index 0000000..804cc7c
--- /dev/null
+++ b/src/udev/cdrom_id/cdrom_id.c
@@ -0,0 +1,1018 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * cdrom_id - optical drive and media information prober
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <limits.h>
+#include <linux/cdrom.h>
+#include <scsi/sg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "log.h"
+#include "memory-util.h"
+#include "random-util.h"
+#include "udev-util.h"
+
+/* device info */
+static unsigned cd_cd_rom;
+static unsigned cd_cd_r;
+static unsigned cd_cd_rw;
+static unsigned cd_dvd_rom;
+static unsigned cd_dvd_r;
+static unsigned cd_dvd_rw;
+static unsigned cd_dvd_ram;
+static unsigned cd_dvd_plus_r;
+static unsigned cd_dvd_plus_rw;
+static unsigned cd_dvd_plus_r_dl;
+static unsigned cd_dvd_plus_rw_dl;
+static unsigned cd_bd;
+static unsigned cd_bd_r;
+static unsigned cd_bd_re;
+static unsigned cd_hddvd;
+static unsigned cd_hddvd_r;
+static unsigned cd_hddvd_rw;
+static unsigned cd_mo;
+static unsigned cd_mrw;
+static unsigned cd_mrw_w;
+
+/* media info */
+static unsigned cd_media;
+static unsigned cd_media_cd_rom;
+static unsigned cd_media_cd_r;
+static unsigned cd_media_cd_rw;
+static unsigned cd_media_dvd_rom;
+static unsigned cd_media_dvd_r;
+static unsigned cd_media_dvd_rw;
+static unsigned cd_media_dvd_rw_ro; /* restricted overwrite mode */
+static unsigned cd_media_dvd_rw_seq; /* sequential mode */
+static unsigned cd_media_dvd_ram;
+static unsigned cd_media_dvd_plus_r;
+static unsigned cd_media_dvd_plus_rw;
+static unsigned cd_media_dvd_plus_r_dl;
+static unsigned cd_media_dvd_plus_rw_dl;
+static unsigned cd_media_bd;
+static unsigned cd_media_bd_r;
+static unsigned cd_media_bd_re;
+static unsigned cd_media_hddvd;
+static unsigned cd_media_hddvd_r;
+static unsigned cd_media_hddvd_rw;
+static unsigned cd_media_mo;
+static unsigned cd_media_mrw;
+static unsigned cd_media_mrw_w;
+
+static const char *cd_media_state = NULL;
+static unsigned cd_media_session_next;
+static unsigned cd_media_session_count;
+static unsigned cd_media_track_count;
+static unsigned cd_media_track_count_data;
+static unsigned cd_media_track_count_audio;
+static unsigned long long int cd_media_session_last_offset;
+
+#define ERRCODE(s) ((((s)[2] & 0x0F) << 16) | ((s)[12] << 8) | ((s)[13]))
+#define SK(errcode) (((errcode) >> 16) & 0xF)
+#define ASC(errcode) (((errcode) >> 8) & 0xFF)
+#define ASCQ(errcode) ((errcode) & 0xFF)
+
+static void info_scsi_cmd_err(const char *cmd, int err) {
+ if (err == -1)
+ log_debug("%s failed", cmd);
+ else
+ log_debug("%s failed with SK=%Xh/ASC=%02Xh/ACQ=%02Xh", cmd, SK(err), ASC(err), ASCQ(err));
+}
+
+struct scsi_cmd {
+ struct cdrom_generic_command cgc;
+ union {
+ struct request_sense s;
+ unsigned char u[18];
+ } _sense;
+ struct sg_io_hdr sg_io;
+};
+
+static void scsi_cmd_init(struct scsi_cmd *cmd) {
+ memzero(cmd, sizeof(struct scsi_cmd));
+ cmd->cgc.quiet = 1;
+ cmd->cgc.sense = &cmd->_sense.s;
+ cmd->sg_io.interface_id = 'S';
+ cmd->sg_io.mx_sb_len = sizeof(cmd->_sense);
+ cmd->sg_io.cmdp = cmd->cgc.cmd;
+ cmd->sg_io.sbp = cmd->_sense.u;
+ cmd->sg_io.flags = SG_FLAG_LUN_INHIBIT | SG_FLAG_DIRECT_IO;
+}
+
+static void scsi_cmd_set(struct scsi_cmd *cmd, size_t i, unsigned char arg) {
+ cmd->sg_io.cmd_len = i + 1;
+ cmd->cgc.cmd[i] = arg;
+}
+
+#define CHECK_CONDITION 0x01
+
+static int scsi_cmd_run(struct scsi_cmd *cmd, int fd, unsigned char *buf, size_t bufsize) {
+ int ret = 0;
+
+ if (bufsize > 0) {
+ cmd->sg_io.dxferp = buf;
+ cmd->sg_io.dxfer_len = bufsize;
+ cmd->sg_io.dxfer_direction = SG_DXFER_FROM_DEV;
+ } else
+ cmd->sg_io.dxfer_direction = SG_DXFER_NONE;
+
+ if (ioctl(fd, SG_IO, &cmd->sg_io))
+ return -1;
+
+ if ((cmd->sg_io.info & SG_INFO_OK_MASK) != SG_INFO_OK) {
+ errno = EIO;
+ ret = -1;
+ if (cmd->sg_io.masked_status & CHECK_CONDITION) {
+ ret = ERRCODE(cmd->_sense.u);
+ if (ret == 0)
+ ret = -1;
+ }
+ }
+ return ret;
+}
+
+static int media_lock(int fd, bool lock) {
+ int err;
+
+ /* disable the kernel's lock logic */
+ err = ioctl(fd, CDROM_CLEAR_OPTIONS, CDO_LOCK);
+ if (err < 0)
+ log_debug("CDROM_CLEAR_OPTIONS, CDO_LOCK failed");
+
+ err = ioctl(fd, CDROM_LOCKDOOR, lock ? 1 : 0);
+ if (err < 0)
+ log_debug("CDROM_LOCKDOOR failed");
+
+ return err;
+}
+
+static int media_eject(int fd) {
+ struct scsi_cmd sc;
+ int err;
+
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x1b);
+ scsi_cmd_set(&sc, 4, 0x02);
+ scsi_cmd_set(&sc, 5, 0);
+ err = scsi_cmd_run(&sc, fd, NULL, 0);
+ if (err != 0) {
+ info_scsi_cmd_err("START_STOP_UNIT", err);
+ return -1;
+ }
+ return 0;
+}
+
+static int cd_capability_compat(int fd) {
+ int capability;
+
+ capability = ioctl(fd, CDROM_GET_CAPABILITY, NULL);
+ if (capability < 0)
+ return log_debug_errno(errno, "CDROM_GET_CAPABILITY failed");
+
+ if (capability & CDC_CD_R)
+ cd_cd_r = 1;
+ if (capability & CDC_CD_RW)
+ cd_cd_rw = 1;
+ if (capability & CDC_DVD)
+ cd_dvd_rom = 1;
+ if (capability & CDC_DVD_R)
+ cd_dvd_r = 1;
+ if (capability & CDC_DVD_RAM)
+ cd_dvd_ram = 1;
+ if (capability & CDC_MRW)
+ cd_mrw = 1;
+ if (capability & CDC_MRW_W)
+ cd_mrw_w = 1;
+ return 0;
+}
+
+static int cd_media_compat(int fd) {
+ if (ioctl(fd, CDROM_DRIVE_STATUS, CDSL_CURRENT) != CDS_DISC_OK)
+ return log_debug_errno(errno, "CDROM_DRIVE_STATUS != CDS_DISC_OK");
+
+ cd_media = 1;
+ return 0;
+}
+
+static int cd_inquiry(int fd) {
+ struct scsi_cmd sc;
+ unsigned char inq[128];
+ int err;
+
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x12);
+ scsi_cmd_set(&sc, 4, 36);
+ scsi_cmd_set(&sc, 5, 0);
+ err = scsi_cmd_run(&sc, fd, inq, 36);
+ if (err != 0) {
+ info_scsi_cmd_err("INQUIRY", err);
+ return -1;
+ }
+
+ if ((inq[0] & 0x1F) != 5)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "not an MMC unit");
+
+ log_debug("INQUIRY: [%.8s][%.16s][%.4s]", inq + 8, inq + 16, inq + 32);
+ return 0;
+}
+
+static void feature_profile_media(int cur_profile) {
+ switch (cur_profile) {
+ case 0x03:
+ case 0x04:
+ case 0x05:
+ log_debug("profile 0x%02x ", cur_profile);
+ cd_media = 1;
+ cd_media_mo = 1;
+ break;
+ case 0x08:
+ log_debug("profile 0x%02x media_cd_rom", cur_profile);
+ cd_media = 1;
+ cd_media_cd_rom = 1;
+ break;
+ case 0x09:
+ log_debug("profile 0x%02x media_cd_r", cur_profile);
+ cd_media = 1;
+ cd_media_cd_r = 1;
+ break;
+ case 0x0a:
+ log_debug("profile 0x%02x media_cd_rw", cur_profile);
+ cd_media = 1;
+ cd_media_cd_rw = 1;
+ break;
+ case 0x10:
+ log_debug("profile 0x%02x media_dvd_ro", cur_profile);
+ cd_media = 1;
+ cd_media_dvd_rom = 1;
+ break;
+ case 0x11:
+ log_debug("profile 0x%02x media_dvd_r", cur_profile);
+ cd_media = 1;
+ cd_media_dvd_r = 1;
+ break;
+ case 0x12:
+ log_debug("profile 0x%02x media_dvd_ram", cur_profile);
+ cd_media = 1;
+ cd_media_dvd_ram = 1;
+ break;
+ case 0x13:
+ log_debug("profile 0x%02x media_dvd_rw_ro", cur_profile);
+ cd_media = 1;
+ cd_media_dvd_rw = 1;
+ cd_media_dvd_rw_ro = 1;
+ break;
+ case 0x14:
+ log_debug("profile 0x%02x media_dvd_rw_seq", cur_profile);
+ cd_media = 1;
+ cd_media_dvd_rw = 1;
+ cd_media_dvd_rw_seq = 1;
+ break;
+ case 0x1B:
+ log_debug("profile 0x%02x media_dvd_plus_r", cur_profile);
+ cd_media = 1;
+ cd_media_dvd_plus_r = 1;
+ break;
+ case 0x1A:
+ log_debug("profile 0x%02x media_dvd_plus_rw", cur_profile);
+ cd_media = 1;
+ cd_media_dvd_plus_rw = 1;
+ break;
+ case 0x2A:
+ log_debug("profile 0x%02x media_dvd_plus_rw_dl", cur_profile);
+ cd_media = 1;
+ cd_media_dvd_plus_rw_dl = 1;
+ break;
+ case 0x2B:
+ log_debug("profile 0x%02x media_dvd_plus_r_dl", cur_profile);
+ cd_media = 1;
+ cd_media_dvd_plus_r_dl = 1;
+ break;
+ case 0x40:
+ log_debug("profile 0x%02x media_bd", cur_profile);
+ cd_media = 1;
+ cd_media_bd = 1;
+ break;
+ case 0x41:
+ case 0x42:
+ log_debug("profile 0x%02x media_bd_r", cur_profile);
+ cd_media = 1;
+ cd_media_bd_r = 1;
+ break;
+ case 0x43:
+ log_debug("profile 0x%02x media_bd_re", cur_profile);
+ cd_media = 1;
+ cd_media_bd_re = 1;
+ break;
+ case 0x50:
+ log_debug("profile 0x%02x media_hddvd", cur_profile);
+ cd_media = 1;
+ cd_media_hddvd = 1;
+ break;
+ case 0x51:
+ log_debug("profile 0x%02x media_hddvd_r", cur_profile);
+ cd_media = 1;
+ cd_media_hddvd_r = 1;
+ break;
+ case 0x52:
+ log_debug("profile 0x%02x media_hddvd_rw", cur_profile);
+ cd_media = 1;
+ cd_media_hddvd_rw = 1;
+ break;
+ default:
+ log_debug("profile 0x%02x <ignored>", cur_profile);
+ break;
+ }
+}
+
+static int feature_profiles(const unsigned char *profiles, size_t size) {
+ unsigned i;
+
+ for (i = 0; i+4 <= size; i += 4) {
+ int profile;
+
+ profile = profiles[i] << 8 | profiles[i+1];
+ switch (profile) {
+ case 0x03:
+ case 0x04:
+ case 0x05:
+ log_debug("profile 0x%02x mo", profile);
+ cd_mo = 1;
+ break;
+ case 0x08:
+ log_debug("profile 0x%02x cd_rom", profile);
+ cd_cd_rom = 1;
+ break;
+ case 0x09:
+ log_debug("profile 0x%02x cd_r", profile);
+ cd_cd_r = 1;
+ break;
+ case 0x0A:
+ log_debug("profile 0x%02x cd_rw", profile);
+ cd_cd_rw = 1;
+ break;
+ case 0x10:
+ log_debug("profile 0x%02x dvd_rom", profile);
+ cd_dvd_rom = 1;
+ break;
+ case 0x12:
+ log_debug("profile 0x%02x dvd_ram", profile);
+ cd_dvd_ram = 1;
+ break;
+ case 0x13:
+ case 0x14:
+ log_debug("profile 0x%02x dvd_rw", profile);
+ cd_dvd_rw = 1;
+ break;
+ case 0x1B:
+ log_debug("profile 0x%02x dvd_plus_r", profile);
+ cd_dvd_plus_r = 1;
+ break;
+ case 0x1A:
+ log_debug("profile 0x%02x dvd_plus_rw", profile);
+ cd_dvd_plus_rw = 1;
+ break;
+ case 0x2A:
+ log_debug("profile 0x%02x dvd_plus_rw_dl", profile);
+ cd_dvd_plus_rw_dl = 1;
+ break;
+ case 0x2B:
+ log_debug("profile 0x%02x dvd_plus_r_dl", profile);
+ cd_dvd_plus_r_dl = 1;
+ break;
+ case 0x40:
+ cd_bd = 1;
+ log_debug("profile 0x%02x bd", profile);
+ break;
+ case 0x41:
+ case 0x42:
+ cd_bd_r = 1;
+ log_debug("profile 0x%02x bd_r", profile);
+ break;
+ case 0x43:
+ cd_bd_re = 1;
+ log_debug("profile 0x%02x bd_re", profile);
+ break;
+ case 0x50:
+ cd_hddvd = 1;
+ log_debug("profile 0x%02x hddvd", profile);
+ break;
+ case 0x51:
+ cd_hddvd_r = 1;
+ log_debug("profile 0x%02x hddvd_r", profile);
+ break;
+ case 0x52:
+ cd_hddvd_rw = 1;
+ log_debug("profile 0x%02x hddvd_rw", profile);
+ break;
+ default:
+ log_debug("profile 0x%02x <ignored>", profile);
+ break;
+ }
+ }
+ return 0;
+}
+
+/* returns 0 if media was detected */
+static int cd_profiles_old_mmc(int fd) {
+ struct scsi_cmd sc;
+ int err;
+
+ unsigned char header[32];
+
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x51);
+ scsi_cmd_set(&sc, 8, sizeof(header));
+ scsi_cmd_set(&sc, 9, 0);
+ err = scsi_cmd_run(&sc, fd, header, sizeof(header));
+ if (err != 0) {
+ info_scsi_cmd_err("READ DISC INFORMATION", err);
+ if (cd_media == 1) {
+ log_debug("no current profile, but disc is present; assuming CD-ROM");
+ cd_media_cd_rom = 1;
+ cd_media_track_count = 1;
+ cd_media_track_count_data = 1;
+ return 0;
+ } else
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
+ "no current profile, assuming no media");
+ };
+
+ cd_media = 1;
+
+ if (header[2] & 16) {
+ cd_media_cd_rw = 1;
+ log_debug("profile 0x0a media_cd_rw");
+ } else if ((header[2] & 3) < 2 && cd_cd_r) {
+ cd_media_cd_r = 1;
+ log_debug("profile 0x09 media_cd_r");
+ } else {
+ cd_media_cd_rom = 1;
+ log_debug("profile 0x08 media_cd_rom");
+ }
+ return 0;
+}
+
+/* returns 0 if media was detected */
+static int cd_profiles(int fd) {
+ struct scsi_cmd sc;
+ unsigned char features[65530];
+ unsigned cur_profile = 0;
+ unsigned len;
+ unsigned i;
+ int err;
+ int ret;
+
+ ret = -1;
+
+ /* First query the current profile */
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x46);
+ scsi_cmd_set(&sc, 8, 8);
+ scsi_cmd_set(&sc, 9, 0);
+ err = scsi_cmd_run(&sc, fd, features, 8);
+ if (err != 0) {
+ info_scsi_cmd_err("GET CONFIGURATION", err);
+ /* handle pre-MMC2 drives which do not support GET CONFIGURATION */
+ if (SK(err) == 0x5 && IN_SET(ASC(err), 0x20, 0x24)) {
+ log_debug("drive is pre-MMC2 and does not support 46h get configuration command");
+ log_debug("trying to work around the problem");
+ ret = cd_profiles_old_mmc(fd);
+ }
+ goto out;
+ }
+
+ cur_profile = features[6] << 8 | features[7];
+ if (cur_profile > 0) {
+ log_debug("current profile 0x%02x", cur_profile);
+ feature_profile_media(cur_profile);
+ ret = 0; /* we have media */
+ } else
+ log_debug("no current profile, assuming no media");
+
+ len = features[0] << 24 | features[1] << 16 | features[2] << 8 | features[3];
+ log_debug("GET CONFIGURATION: size of features buffer 0x%04x", len);
+
+ if (len > sizeof(features)) {
+ log_debug("cannot get features in a single query, truncating");
+ len = sizeof(features);
+ } else if (len <= 8)
+ len = sizeof(features);
+
+ /* Now get the full feature buffer */
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x46);
+ scsi_cmd_set(&sc, 7, ( len >> 8 ) & 0xff);
+ scsi_cmd_set(&sc, 8, len & 0xff);
+ scsi_cmd_set(&sc, 9, 0);
+ err = scsi_cmd_run(&sc, fd, features, len);
+ if (err != 0) {
+ info_scsi_cmd_err("GET CONFIGURATION", err);
+ return -1;
+ }
+
+ /* parse the length once more, in case the drive decided to have other features suddenly :) */
+ len = features[0] << 24 | features[1] << 16 | features[2] << 8 | features[3];
+ log_debug("GET CONFIGURATION: size of features buffer 0x%04x", len);
+
+ if (len > sizeof(features)) {
+ log_debug("cannot get features in a single query, truncating");
+ len = sizeof(features);
+ }
+
+ /* device features */
+ for (i = 8; i+4 < len; i += (4 + features[i+3])) {
+ unsigned feature;
+
+ feature = features[i] << 8 | features[i+1];
+
+ switch (feature) {
+ case 0x00:
+ log_debug("GET CONFIGURATION: feature 'profiles', with %i entries", features[i+3] / 4);
+ feature_profiles(&features[i]+4, MIN(features[i+3], len - i - 4));
+ break;
+ default:
+ log_debug("GET CONFIGURATION: feature 0x%04x <ignored>, with 0x%02x bytes", feature, features[i+3]);
+ break;
+ }
+ }
+out:
+ return ret;
+}
+
+static int cd_media_info(int fd) {
+ struct scsi_cmd sc;
+ unsigned char header[32];
+ static const char *const media_status[] = {
+ "blank",
+ "appendable",
+ "complete",
+ "other"
+ };
+ int err;
+
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x51);
+ scsi_cmd_set(&sc, 8, sizeof(header) & 0xff);
+ scsi_cmd_set(&sc, 9, 0);
+ err = scsi_cmd_run(&sc, fd, header, sizeof(header));
+ if (err != 0) {
+ info_scsi_cmd_err("READ DISC INFORMATION", err);
+ return -1;
+ };
+
+ cd_media = 1;
+ log_debug("disk type %02x", header[8]);
+ log_debug("hardware reported media status: %s", media_status[header[2] & 3]);
+
+ /* exclude plain CDROM, some fake cdroms return 0 for "blank" media here */
+ if (!cd_media_cd_rom)
+ cd_media_state = media_status[header[2] & 3];
+
+ /* fresh DVD-RW in restricted overwrite mode reports itself as
+ * "appendable"; change it to "blank" to make it consistent with what
+ * gets reported after blanking, and what userspace expects */
+ if (cd_media_dvd_rw_ro && (header[2] & 3) == 1)
+ cd_media_state = media_status[0];
+
+ /* DVD+RW discs (and DVD-RW in restricted mode) once formatted are
+ * always "complete", DVD-RAM are "other" or "complete" if the disc is
+ * write protected; we need to check the contents if it is blank */
+ if ((cd_media_dvd_rw_ro || cd_media_dvd_plus_rw || cd_media_dvd_plus_rw_dl || cd_media_dvd_ram) && (header[2] & 3) > 1) {
+ unsigned char buffer[32 * 2048];
+ unsigned char len;
+ int offset;
+
+ if (cd_media_dvd_ram) {
+ /* a write protected dvd-ram may report "complete" status */
+
+ unsigned char dvdstruct[8];
+ unsigned char format[12];
+
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0xAD);
+ scsi_cmd_set(&sc, 7, 0xC0);
+ scsi_cmd_set(&sc, 9, sizeof(dvdstruct));
+ scsi_cmd_set(&sc, 11, 0);
+ err = scsi_cmd_run(&sc, fd, dvdstruct, sizeof(dvdstruct));
+ if (err != 0) {
+ info_scsi_cmd_err("READ DVD STRUCTURE", err);
+ return -1;
+ }
+ if (dvdstruct[4] & 0x02) {
+ cd_media_state = media_status[2];
+ log_debug("write-protected DVD-RAM media inserted");
+ goto determined;
+ }
+
+ /* let's make sure we don't try to read unformatted media */
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x23);
+ scsi_cmd_set(&sc, 8, sizeof(format));
+ scsi_cmd_set(&sc, 9, 0);
+ err = scsi_cmd_run(&sc, fd, format, sizeof(format));
+ if (err != 0) {
+ info_scsi_cmd_err("READ DVD FORMAT CAPACITIES", err);
+ return -1;
+ }
+
+ len = format[3];
+ if (len & 7 || len < 16)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "invalid format capacities length");
+
+ switch(format[8] & 3) {
+ case 1:
+ log_debug("unformatted DVD-RAM media inserted");
+ /* This means that last format was interrupted
+ * or failed, blank dvd-ram discs are factory
+ * formatted. Take no action here as it takes
+ * quite a while to reformat a dvd-ram and it's
+ * not automatically started */
+ goto determined;
+
+ case 2:
+ log_debug("formatted DVD-RAM media inserted");
+ break;
+
+ case 3:
+ cd_media = 0; //return no media
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
+ "format capacities returned no media");
+ }
+ }
+
+ /* Take a closer look at formatted media (unformatted DVD+RW
+ * has "blank" status", DVD-RAM was examined earlier) and check
+ * for ISO and UDF PVDs or a fs superblock presence and do it
+ * in one ioctl (we need just sectors 0 and 16) */
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x28);
+ scsi_cmd_set(&sc, 5, 0);
+ scsi_cmd_set(&sc, 8, 32);
+ scsi_cmd_set(&sc, 9, 0);
+ err = scsi_cmd_run(&sc, fd, buffer, sizeof(buffer));
+ if (err != 0) {
+ cd_media = 0;
+ info_scsi_cmd_err("READ FIRST 32 BLOCKS", err);
+ return -1;
+ }
+
+ /* if any non-zero data is found in sector 16 (iso and udf) or
+ * eventually 0 (fat32 boot sector, ext2 superblock, etc), disc
+ * is assumed non-blank */
+
+ for (offset = 32768; offset < (32768 + 2048); offset++) {
+ if (buffer [offset]) {
+ log_debug("data in block 16, assuming complete");
+ goto determined;
+ }
+ }
+
+ for (offset = 0; offset < 2048; offset++) {
+ if (buffer [offset]) {
+ log_debug("data in block 0, assuming complete");
+ goto determined;
+ }
+ }
+
+ cd_media_state = media_status[0];
+ log_debug("no data in blocks 0 or 16, assuming blank");
+ }
+
+determined:
+ /* "other" is e. g. DVD-RAM, can't append sessions there; DVDs in
+ * restricted overwrite mode can never append, only in sequential mode */
+ if ((header[2] & 3) < 2 && !cd_media_dvd_rw_ro)
+ cd_media_session_next = header[10] << 8 | header[5];
+ cd_media_session_count = header[9] << 8 | header[4];
+ cd_media_track_count = header[11] << 8 | header[6];
+
+ return 0;
+}
+
+static int cd_media_toc(int fd) {
+ struct scsi_cmd sc;
+ unsigned char header[12];
+ unsigned char toc[65536];
+ unsigned len, i, num_tracks;
+ unsigned char *p;
+ int err;
+
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x43);
+ scsi_cmd_set(&sc, 6, 1);
+ scsi_cmd_set(&sc, 8, sizeof(header) & 0xff);
+ scsi_cmd_set(&sc, 9, 0);
+ err = scsi_cmd_run(&sc, fd, header, sizeof(header));
+ if (err != 0) {
+ info_scsi_cmd_err("READ TOC", err);
+ return -1;
+ }
+
+ len = (header[0] << 8 | header[1]) + 2;
+ log_debug("READ TOC: len: %d, start track: %d, end track: %d", len, header[2], header[3]);
+ if (len > sizeof(toc))
+ return -1;
+ if (len < 2)
+ return -1;
+ /* 2: first track, 3: last track */
+ num_tracks = header[3] - header[2] + 1;
+
+ /* empty media has no tracks */
+ if (len < 8)
+ return 0;
+
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x43);
+ scsi_cmd_set(&sc, 6, header[2]); /* First Track/Session Number */
+ scsi_cmd_set(&sc, 7, (len >> 8) & 0xff);
+ scsi_cmd_set(&sc, 8, len & 0xff);
+ scsi_cmd_set(&sc, 9, 0);
+ err = scsi_cmd_run(&sc, fd, toc, len);
+ if (err != 0) {
+ info_scsi_cmd_err("READ TOC (tracks)", err);
+ return -1;
+ }
+
+ /* Take care to not iterate beyond the last valid track as specified in
+ * the TOC, but also avoid going beyond the TOC length, just in case
+ * the last track number is invalidly large */
+ for (p = toc+4, i = 4; i < len-8 && num_tracks > 0; i += 8, p += 8, --num_tracks) {
+ unsigned block;
+ unsigned is_data_track;
+
+ is_data_track = (p[1] & 0x04) != 0;
+
+ block = p[4] << 24 | p[5] << 16 | p[6] << 8 | p[7];
+ log_debug("track=%u info=0x%x(%s) start_block=%u",
+ p[2], p[1] & 0x0f, is_data_track ? "data":"audio", block);
+
+ if (is_data_track)
+ cd_media_track_count_data++;
+ else
+ cd_media_track_count_audio++;
+ }
+
+ scsi_cmd_init(&sc);
+ scsi_cmd_set(&sc, 0, 0x43);
+ scsi_cmd_set(&sc, 2, 1); /* Session Info */
+ scsi_cmd_set(&sc, 8, sizeof(header));
+ scsi_cmd_set(&sc, 9, 0);
+ err = scsi_cmd_run(&sc, fd, header, sizeof(header));
+ if (err != 0) {
+ info_scsi_cmd_err("READ TOC (multi session)", err);
+ return -1;
+ }
+ len = header[4+4] << 24 | header[4+5] << 16 | header[4+6] << 8 | header[4+7];
+ log_debug("last track %u starts at block %u", header[4+2], len);
+ cd_media_session_last_offset = (unsigned long long int)len * 2048;
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ static const struct option options[] = {
+ { "lock-media", no_argument, NULL, 'l' },
+ { "unlock-media", no_argument, NULL, 'u' },
+ { "eject-media", no_argument, NULL, 'e' },
+ { "debug", no_argument, NULL, 'd' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+ bool eject = false;
+ bool lock = false;
+ bool unlock = false;
+ const char *node = NULL;
+ int fd = -1;
+ int cnt;
+ int rc = 0;
+
+ log_set_target(LOG_TARGET_AUTO);
+ udev_parse_config();
+ log_parse_environment();
+ log_open();
+
+ for (;;) {
+ int option;
+
+ option = getopt_long(argc, argv, "deluh", options, NULL);
+ if (option == -1)
+ break;
+
+ switch (option) {
+ case 'l':
+ lock = true;
+ break;
+ case 'u':
+ unlock = true;
+ break;
+ case 'e':
+ eject = true;
+ break;
+ case 'd':
+ log_set_target(LOG_TARGET_CONSOLE);
+ log_set_max_level(LOG_DEBUG);
+ log_open();
+ break;
+ case 'h':
+ printf("Usage: %s [options] <device>\n"
+ " -l,--lock-media lock the media (to enable eject request events)\n"
+ " -u,--unlock-media unlock the media\n"
+ " -e,--eject-media eject the media\n"
+ " -d,--debug debug to stderr\n"
+ " -h,--help print this help text\n\n",
+ program_invocation_short_name);
+ goto exit;
+ default:
+ rc = 1;
+ goto exit;
+ }
+ }
+
+ node = argv[optind];
+ if (!node) {
+ log_error("no device");
+ rc = 1;
+ goto exit;
+ }
+
+ initialize_srand();
+ for (cnt = 20; cnt > 0; cnt--) {
+ struct timespec duration;
+
+ fd = open(node, O_RDONLY|O_NONBLOCK|O_CLOEXEC);
+ if (fd >= 0 || errno != EBUSY)
+ break;
+ duration.tv_sec = 0;
+ duration.tv_nsec = (100 * 1000 * 1000) + (rand() % 100 * 1000 * 1000);
+ nanosleep(&duration, NULL);
+ }
+ if (fd < 0) {
+ log_debug("unable to open '%s'", node);
+ rc = 1;
+ goto exit;
+ }
+ log_debug("probing: '%s'", node);
+
+ /* same data as original cdrom_id */
+ if (cd_capability_compat(fd) < 0) {
+ rc = 1;
+ goto exit;
+ }
+
+ /* check for media - don't bail if there's no media as we still need to
+ * to read profiles */
+ cd_media_compat(fd);
+
+ /* check if drive talks MMC */
+ if (cd_inquiry(fd) < 0)
+ goto work;
+
+ /* read drive and possibly current profile */
+ if (cd_profiles(fd) != 0)
+ goto work;
+
+ /* at this point we are guaranteed to have media in the drive - find out more about it */
+
+ /* get session/track info */
+ cd_media_toc(fd);
+
+ /* get writable media state */
+ cd_media_info(fd);
+
+work:
+ /* lock the media, so we enable eject button events */
+ if (lock && cd_media) {
+ log_debug("PREVENT_ALLOW_MEDIUM_REMOVAL (lock)");
+ media_lock(fd, true);
+ }
+
+ if (unlock && cd_media) {
+ log_debug("PREVENT_ALLOW_MEDIUM_REMOVAL (unlock)");
+ media_lock(fd, false);
+ }
+
+ if (eject) {
+ log_debug("PREVENT_ALLOW_MEDIUM_REMOVAL (unlock)");
+ media_lock(fd, false);
+ log_debug("START_STOP_UNIT (eject)");
+ media_eject(fd);
+ }
+
+ printf("ID_CDROM=1\n");
+ if (cd_cd_rom)
+ printf("ID_CDROM_CD=1\n");
+ if (cd_cd_r)
+ printf("ID_CDROM_CD_R=1\n");
+ if (cd_cd_rw)
+ printf("ID_CDROM_CD_RW=1\n");
+ if (cd_dvd_rom)
+ printf("ID_CDROM_DVD=1\n");
+ if (cd_dvd_r)
+ printf("ID_CDROM_DVD_R=1\n");
+ if (cd_dvd_rw)
+ printf("ID_CDROM_DVD_RW=1\n");
+ if (cd_dvd_ram)
+ printf("ID_CDROM_DVD_RAM=1\n");
+ if (cd_dvd_plus_r)
+ printf("ID_CDROM_DVD_PLUS_R=1\n");
+ if (cd_dvd_plus_rw)
+ printf("ID_CDROM_DVD_PLUS_RW=1\n");
+ if (cd_dvd_plus_r_dl)
+ printf("ID_CDROM_DVD_PLUS_R_DL=1\n");
+ if (cd_dvd_plus_rw_dl)
+ printf("ID_CDROM_DVD_PLUS_RW_DL=1\n");
+ if (cd_bd)
+ printf("ID_CDROM_BD=1\n");
+ if (cd_bd_r)
+ printf("ID_CDROM_BD_R=1\n");
+ if (cd_bd_re)
+ printf("ID_CDROM_BD_RE=1\n");
+ if (cd_hddvd)
+ printf("ID_CDROM_HDDVD=1\n");
+ if (cd_hddvd_r)
+ printf("ID_CDROM_HDDVD_R=1\n");
+ if (cd_hddvd_rw)
+ printf("ID_CDROM_HDDVD_RW=1\n");
+ if (cd_mo)
+ printf("ID_CDROM_MO=1\n");
+ if (cd_mrw)
+ printf("ID_CDROM_MRW=1\n");
+ if (cd_mrw_w)
+ printf("ID_CDROM_MRW_W=1\n");
+
+ if (cd_media)
+ printf("ID_CDROM_MEDIA=1\n");
+ if (cd_media_mo)
+ printf("ID_CDROM_MEDIA_MO=1\n");
+ if (cd_media_mrw)
+ printf("ID_CDROM_MEDIA_MRW=1\n");
+ if (cd_media_mrw_w)
+ printf("ID_CDROM_MEDIA_MRW_W=1\n");
+ if (cd_media_cd_rom)
+ printf("ID_CDROM_MEDIA_CD=1\n");
+ if (cd_media_cd_r)
+ printf("ID_CDROM_MEDIA_CD_R=1\n");
+ if (cd_media_cd_rw)
+ printf("ID_CDROM_MEDIA_CD_RW=1\n");
+ if (cd_media_dvd_rom)
+ printf("ID_CDROM_MEDIA_DVD=1\n");
+ if (cd_media_dvd_r)
+ printf("ID_CDROM_MEDIA_DVD_R=1\n");
+ if (cd_media_dvd_ram)
+ printf("ID_CDROM_MEDIA_DVD_RAM=1\n");
+ if (cd_media_dvd_rw)
+ printf("ID_CDROM_MEDIA_DVD_RW=1\n");
+ if (cd_media_dvd_plus_r)
+ printf("ID_CDROM_MEDIA_DVD_PLUS_R=1\n");
+ if (cd_media_dvd_plus_rw)
+ printf("ID_CDROM_MEDIA_DVD_PLUS_RW=1\n");
+ if (cd_media_dvd_plus_rw_dl)
+ printf("ID_CDROM_MEDIA_DVD_PLUS_RW_DL=1\n");
+ if (cd_media_dvd_plus_r_dl)
+ printf("ID_CDROM_MEDIA_DVD_PLUS_R_DL=1\n");
+ if (cd_media_bd)
+ printf("ID_CDROM_MEDIA_BD=1\n");
+ if (cd_media_bd_r)
+ printf("ID_CDROM_MEDIA_BD_R=1\n");
+ if (cd_media_bd_re)
+ printf("ID_CDROM_MEDIA_BD_RE=1\n");
+ if (cd_media_hddvd)
+ printf("ID_CDROM_MEDIA_HDDVD=1\n");
+ if (cd_media_hddvd_r)
+ printf("ID_CDROM_MEDIA_HDDVD_R=1\n");
+ if (cd_media_hddvd_rw)
+ printf("ID_CDROM_MEDIA_HDDVD_RW=1\n");
+
+ if (cd_media_state)
+ printf("ID_CDROM_MEDIA_STATE=%s\n", cd_media_state);
+ if (cd_media_session_next > 0)
+ printf("ID_CDROM_MEDIA_SESSION_NEXT=%u\n", cd_media_session_next);
+ if (cd_media_session_count > 0)
+ printf("ID_CDROM_MEDIA_SESSION_COUNT=%u\n", cd_media_session_count);
+ if (cd_media_session_count > 1 && cd_media_session_last_offset > 0)
+ printf("ID_CDROM_MEDIA_SESSION_LAST_OFFSET=%llu\n", cd_media_session_last_offset);
+ if (cd_media_track_count > 0)
+ printf("ID_CDROM_MEDIA_TRACK_COUNT=%u\n", cd_media_track_count);
+ if (cd_media_track_count_audio > 0)
+ printf("ID_CDROM_MEDIA_TRACK_COUNT_AUDIO=%u\n", cd_media_track_count_audio);
+ if (cd_media_track_count_data > 0)
+ printf("ID_CDROM_MEDIA_TRACK_COUNT_DATA=%u\n", cd_media_track_count_data);
+exit:
+ if (fd >= 0)
+ close(fd);
+ log_close();
+ return rc;
+}
diff --git a/src/udev/fido_id/fido_id.c b/src/udev/fido_id/fido_id.c
new file mode 100644
index 0000000..f14b81d
--- /dev/null
+++ b/src/udev/fido_id/fido_id.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Identifies FIDO CTAP1 ("U2F")/CTAP2 security tokens based on the usage declared in their report
+ * descriptor and outputs suitable environment variables.
+ *
+ * Inspired by Andrew Lutomirski's 'u2f-hidraw-policy.c'
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/hid.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "device-internal.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "fd-util.h"
+#include "fido_id_desc.h"
+#include "log.h"
+#include "macro.h"
+#include "main-func.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "udev-util.h"
+
+static int run(int argc, char **argv) {
+ _cleanup_(sd_device_unrefp) struct sd_device *device = NULL;
+ _cleanup_free_ char *desc_path = NULL;
+ _cleanup_close_ int fd = -1;
+
+ struct sd_device *hid_device;
+ const char *sys_path;
+ uint8_t desc[HID_MAX_DESCRIPTOR_SIZE];
+ ssize_t desc_len;
+
+ int r;
+
+ log_set_target(LOG_TARGET_AUTO);
+ udev_parse_config();
+ log_parse_environment();
+ log_open();
+
+ if (argc > 2)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Usage: %s [SYSFS_PATH]", program_invocation_short_name);
+
+ if (argc == 1) {
+ r = device_new_from_strv(&device, environ);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get current device from environment: %m");
+ } else {
+ r = sd_device_new_from_syspath(&device, argv[1]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device from syspath: %m");
+ }
+
+ r = sd_device_get_parent(device, &hid_device);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Failed to get parent HID device: %m");
+
+ r = sd_device_get_syspath(hid_device, &sys_path);
+ if (r < 0)
+ return log_device_error_errno(hid_device, r, "Failed to get syspath for HID device: %m");
+
+ desc_path = path_join(sys_path, "report_descriptor");
+ if (!desc_path)
+ return log_oom();
+
+ fd = open(desc_path, O_RDONLY | O_NOFOLLOW | O_CLOEXEC);
+ if (fd < 0)
+ return log_device_error_errno(hid_device, errno,
+ "Failed to open report descriptor at '%s': %m", desc_path);
+
+ desc_len = read(fd, desc, sizeof(desc));
+ if (desc_len < 0)
+ return log_device_error_errno(hid_device, errno,
+ "Failed to read report descriptor at '%s': %m", desc_path);
+ if (desc_len == 0)
+ return log_device_debug_errno(hid_device, SYNTHETIC_ERRNO(EINVAL),
+ "Empty report descriptor at '%s'.", desc_path);
+
+ r = is_fido_security_token_desc(desc, desc_len);
+ if (r < 0)
+ return log_device_debug_errno(hid_device, r,
+ "Failed to parse report descriptor at '%s'.", desc_path);
+ if (r > 0) {
+ printf("ID_FIDO_TOKEN=1\n");
+ printf("ID_SECURITY_TOKEN=1\n");
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/udev/fido_id/fido_id_desc.c b/src/udev/fido_id/fido_id_desc.c
new file mode 100644
index 0000000..2dfa759
--- /dev/null
+++ b/src/udev/fido_id/fido_id_desc.c
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/* Inspired by Andrew Lutomirski's 'u2f-hidraw-policy.c' */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "fido_id_desc.h"
+
+#define HID_RPTDESC_FIRST_BYTE_LONG_ITEM 0xfeu
+#define HID_RPTDESC_TYPE_GLOBAL 0x1u
+#define HID_RPTDESC_TYPE_LOCAL 0x2u
+#define HID_RPTDESC_TAG_USAGE_PAGE 0x0u
+#define HID_RPTDESC_TAG_USAGE 0x0u
+
+/*
+ * HID usage for FIDO CTAP1 ("U2F") and CTAP2 security tokens.
+ * https://fidoalliance.org/specs/fido-u2f-v1.0-ps-20141009/fido-u2f-u2f_hid.h-v1.0-ps-20141009.txt
+ * https://fidoalliance.org/specs/fido-v2.0-ps-20190130/fido-client-to-authenticator-protocol-v2.0-ps-20190130.html#usb-discovery
+ * https://www.usb.org/sites/default/files/hutrr48.pdf
+ */
+#define FIDO_FULL_USAGE_CTAPHID 0xf1d00001u
+
+/*
+ * Parses a HID report descriptor and identifies FIDO CTAP1 ("U2F")/CTAP2 security tokens based on their
+ * declared usage.
+ * A positive return value indicates that the report descriptor belongs to a FIDO security token.
+ * https://www.usb.org/sites/default/files/documents/hid1_11.pdf (Section 6.2.2)
+ */
+int is_fido_security_token_desc(const uint8_t *desc, size_t desc_len) {
+ uint32_t usage = 0;
+
+ for (size_t pos = 0; pos < desc_len; ) {
+ uint8_t tag, type, size_code;
+ size_t size;
+ uint32_t value;
+
+ /* Report descriptors consists of short items (1-5 bytes) and long items (3-258 bytes). */
+ if (desc[pos] == HID_RPTDESC_FIRST_BYTE_LONG_ITEM) {
+ /* No long items are defined in the spec; skip them.
+ * The length of the data in a long item is contained in the byte after the long
+ * item tag. The header consists of three bytes: special long item tag, length,
+ * actual tag. */
+ if (pos + 1 >= desc_len)
+ return -EINVAL;
+ pos += desc[pos + 1] + 3;
+ continue;
+ }
+
+ /* The first byte of a short item encodes tag, type and size. */
+ tag = desc[pos] >> 4; /* Bits 7 to 4 */
+ type = (desc[pos] >> 2) & 0x3; /* Bits 3 and 2 */
+ size_code = desc[pos] & 0x3; /* Bits 1 and 0 */
+ /* Size is coded as follows:
+ * 0 -> 0 bytes, 1 -> 1 byte, 2 -> 2 bytes, 3 -> 4 bytes
+ */
+ size = size_code < 3 ? size_code : 4;
+ /* Consume header byte. */
+ pos++;
+
+ /* Extract the item value coded on size bytes. */
+ if (pos + size > desc_len)
+ return -EINVAL;
+ value = 0;
+ for (size_t i = 0; i < size; i++)
+ value |= (uint32_t) desc[pos + i] << (8 * i);
+ /* Consume value bytes. */
+ pos += size;
+
+ if (type == HID_RPTDESC_TYPE_GLOBAL && tag == HID_RPTDESC_TAG_USAGE_PAGE) {
+ /* A usage page is a 16 bit value coded on at most 16 bits. */
+ if (size > 2)
+ return -EINVAL;
+ /* A usage page sets the upper 16 bits of a following usage. */
+ usage = (value & 0x0000ffffu) << 16;
+ }
+
+ if (type == HID_RPTDESC_TYPE_LOCAL && tag == HID_RPTDESC_TAG_USAGE) {
+ /* A usage is a 32 bit value, but is prepended with the current usage page if
+ * coded on less than 4 bytes (that is, at most 2 bytes). */
+ if (size == 4)
+ usage = value;
+ else
+ usage = (usage & 0xffff0000u) | (value & 0x0000ffffu);
+ if (usage == FIDO_FULL_USAGE_CTAPHID)
+ return 1;
+ }
+ }
+
+ return 0;
+}
diff --git a/src/udev/fido_id/fido_id_desc.h b/src/udev/fido_id/fido_id_desc.h
new file mode 100644
index 0000000..57af57e
--- /dev/null
+++ b/src/udev/fido_id/fido_id_desc.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+int is_fido_security_token_desc(const uint8_t *desc, size_t desc_len);
diff --git a/src/udev/fido_id/fuzz-fido-id-desc.c b/src/udev/fido_id/fuzz-fido-id-desc.c
new file mode 100644
index 0000000..44d66df
--- /dev/null
+++ b/src/udev/fido_id/fuzz-fido-id-desc.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/hid.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "fido_id_desc.h"
+#include "fuzz.h"
+#include "log.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ /* We don't want to fill the logs with messages about parse errors.
+ * Disable most logging if not running standalone */
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ if (size > HID_MAX_DESCRIPTOR_SIZE)
+ return 0;
+ (void) is_fido_security_token_desc(data, size);
+
+ return 0;
+}
diff --git a/src/udev/fido_id/test-fido-id-desc.c b/src/udev/fido_id/test-fido-id-desc.c
new file mode 100644
index 0000000..6836bca
--- /dev/null
+++ b/src/udev/fido_id/test-fido-id-desc.c
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "fido_id_desc.h"
+#include "macro.h"
+
+static void test_is_fido_security_token_desc__fido(void) {
+ static const uint8_t FIDO_HID_DESC_1[] = {
+ 0x06, 0xd0, 0xf1, 0x09, 0x01, 0xa1, 0x01, 0x09, 0x20, 0x15, 0x00, 0x26, 0xff, 0x00, 0x75,
+ 0x08, 0x95, 0x40, 0x81, 0x02, 0x09, 0x21, 0x15, 0x00, 0x26, 0xff, 0x00, 0x75, 0x08, 0x95,
+ 0x40, 0x91, 0x02, 0xc0,
+ };
+ assert_se(is_fido_security_token_desc(FIDO_HID_DESC_1, sizeof(FIDO_HID_DESC_1)) > 0);
+
+ static const uint8_t FIDO_HID_DESC_2[] = {
+ 0x05, 0x01, 0x09, 0x06, 0xa1, 0x01, 0x05, 0x07, 0x19, 0xe0, 0x29, 0xe7, 0x15, 0x00, 0x25,
+ 0x01, 0x75, 0x01, 0x95, 0x08, 0x81, 0x02, 0x95, 0x01, 0x75, 0x08, 0x81, 0x01, 0x95, 0x05,
+ 0x75, 0x01, 0x05, 0x08, 0x19, 0x01, 0x29, 0x05, 0x91, 0x02, 0x95, 0x01, 0x75, 0x03, 0x91,
+ 0x01, 0x95, 0x06, 0x75, 0x08, 0x15, 0x00, 0x25, 0x65, 0x05, 0x07, 0x19, 0x00, 0x29, 0x65,
+ 0x81, 0x00, 0x09, 0x03, 0x75, 0x08, 0x95, 0x08, 0xb1, 0x02, 0xc0,
+ 0x06, 0xd0, 0xf1, 0x09, 0x01, 0xa1, 0x01, 0x09, 0x20, 0x15, 0x00, 0x26, 0xff, 0x00, 0x75,
+ 0x08, 0x95, 0x40, 0x81, 0x02, 0x09, 0x21, 0x15, 0x00, 0x26, 0xff, 0x00, 0x75, 0x08, 0x95,
+ 0x40, 0x91, 0x02, 0xc0,
+ };
+ assert_se(is_fido_security_token_desc(FIDO_HID_DESC_2, sizeof(FIDO_HID_DESC_2)) > 0);
+}
+
+static void test_is_fido_security_token_desc__non_fido(void) {
+ /* Wrong usage page */
+ static const uint8_t NON_FIDO_HID_DESC_1[] = {
+ 0x06, 0xd0, 0xf0, 0x09, 0x01, 0xa1, 0x01, 0x09, 0x20, 0x15, 0x00, 0x26, 0xff, 0x00, 0x75,
+ 0x08, 0x95, 0x40, 0x81, 0x02, 0x09, 0x21, 0x15, 0x00, 0x26, 0xff, 0x00, 0x75, 0x08, 0x95,
+ 0x40, 0x91, 0x02, 0xc0,
+ };
+ assert_se(is_fido_security_token_desc(NON_FIDO_HID_DESC_1, sizeof(NON_FIDO_HID_DESC_1)) == 0);
+
+ /* Wrong usage */
+ static const uint8_t NON_FIDO_HID_DESC_2[] = {
+ 0x06, 0xd0, 0xf1, 0x09, 0x02, 0xa1, 0x01, 0x09, 0x20, 0x15, 0x00, 0x26, 0xff, 0x00, 0x75,
+ 0x08, 0x95, 0x40, 0x81, 0x02, 0x09, 0x21, 0x15, 0x00, 0x26, 0xff, 0x00, 0x75, 0x08, 0x95,
+ 0x40, 0x91, 0x02, 0xc0,
+ };
+ assert_se(is_fido_security_token_desc(NON_FIDO_HID_DESC_2, sizeof(NON_FIDO_HID_DESC_2)) == 0);
+
+ static const uint8_t NON_FIDO_HID_DESC_3[] = {
+ 0x05, 0x01, 0x09, 0x06, 0xa1, 0x01, 0x05, 0x07, 0x19, 0xe0, 0x29, 0xe7, 0x15, 0x00, 0x25,
+ 0x01, 0x75, 0x01, 0x95, 0x08, 0x81, 0x02, 0x95, 0x01, 0x75, 0x08, 0x81, 0x01, 0x95, 0x05,
+ 0x75, 0x01, 0x05, 0x08, 0x19, 0x01, 0x29, 0x05, 0x91, 0x02, 0x95, 0x01, 0x75, 0x03, 0x91,
+ 0x01, 0x95, 0x06, 0x75, 0x08, 0x15, 0x00, 0x25, 0x65, 0x05, 0x07, 0x19, 0x00, 0x29, 0x65,
+ 0x81, 0x00, 0x09, 0x03, 0x75, 0x08, 0x95, 0x08, 0xb1, 0x02, 0xc0,
+ };
+ assert_se(is_fido_security_token_desc(NON_FIDO_HID_DESC_3, sizeof(NON_FIDO_HID_DESC_3)) == 0);
+}
+
+static void test_is_fido_security_token_desc__invalid(void) {
+ /* Size coded on 1 byte, but no byte given */
+ static const uint8_t INVALID_HID_DESC_1[] = { 0x01 };
+ assert_se(is_fido_security_token_desc(INVALID_HID_DESC_1, sizeof(INVALID_HID_DESC_1)) < 0);
+
+ /* Size coded on 2 bytes, but only 1 byte given */
+ static const uint8_t INVALID_HID_DESC_2[] = { 0x02, 0x01 };
+ assert_se(is_fido_security_token_desc(INVALID_HID_DESC_2, sizeof(INVALID_HID_DESC_2)) < 0);
+
+ /* Size coded on 4 bytes, but only 3 bytes given */
+ static const uint8_t INVALID_HID_DESC_3[] = { 0x03, 0x01, 0x02, 0x03 };
+ assert_se(is_fido_security_token_desc(INVALID_HID_DESC_3, sizeof(INVALID_HID_DESC_3)) < 0);
+
+ /* Long item without a size byte */
+ static const uint8_t INVALID_HID_DESC_4[] = { 0xfe };
+ assert_se(is_fido_security_token_desc(INVALID_HID_DESC_4, sizeof(INVALID_HID_DESC_4)) < 0);
+
+ /* Usage pages are coded on at most 2 bytes */
+ static const uint8_t INVALID_HID_DESC_5[] = { 0x07, 0x01, 0x02, 0x03, 0x04 };
+ assert_se(is_fido_security_token_desc(INVALID_HID_DESC_5, sizeof(INVALID_HID_DESC_5)) < 0);
+}
+
+int main(int argc, char *argv[]) {
+ test_is_fido_security_token_desc__fido();
+ test_is_fido_security_token_desc__non_fido();
+ test_is_fido_security_token_desc__invalid();
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/udev/generate-keyboard-keys-gperf.sh b/src/udev/generate-keyboard-keys-gperf.sh
new file mode 100755
index 0000000..c78652a
--- /dev/null
+++ b/src/udev/generate-keyboard-keys-gperf.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+set -eu
+
+awk '
+ BEGIN {
+ print "%{\n\
+#if __GNUC__ >= 7\n\
+_Pragma(\"GCC diagnostic ignored \\\"-Wimplicit-fallthrough\\\"\")\n\
+#endif\n\
+%}"
+ print "struct key_name { const char* name; unsigned short id; };"
+ print "%null-strings"
+ print "%%"
+ }
+
+ /^KEY_/ { print tolower(substr($1 ,5)) ", " $1 }
+ { print tolower($1) ", " $1 }
+' < "$1"
diff --git a/src/udev/generate-keyboard-keys-list.sh b/src/udev/generate-keyboard-keys-list.sh
new file mode 100755
index 0000000..aa00c15
--- /dev/null
+++ b/src/udev/generate-keyboard-keys-list.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+set -eu
+
+$1 -dM -include linux/input.h - </dev/null | awk '
+ /\<(KEY_(MAX|MIN_INTERESTING))|(BTN_(MISC|MOUSE|JOYSTICK|GAMEPAD|DIGI|WHEEL|TRIGGER_HAPPY))\>/ { next }
+ /^#define[ \t]+(KEY|BTN)_[^ ]+[ \t]+[0-9BK]/ { print $2 }
+'
diff --git a/src/udev/meson.build b/src/udev/meson.build
new file mode 100644
index 0000000..5eb0f99
--- /dev/null
+++ b/src/udev/meson.build
@@ -0,0 +1,225 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+udevadm_sources = files('''
+ udevadm.c
+ udevadm.h
+ udevadm-control.c
+ udevadm-hwdb.c
+ udevadm-info.c
+ udevadm-monitor.c
+ udevadm-settle.c
+ udevadm-test.c
+ udevadm-test-builtin.c
+ udevadm-trigger.c
+ udevadm-util.c
+ udevadm-util.h
+ udevd.c
+'''.split())
+
+libudev_core_sources = '''
+ udev-ctrl.c
+ udev-ctrl.h
+ udev-event.c
+ udev-event.h
+ udev-node.c
+ udev-node.h
+ udev-rules.c
+ udev-rules.h
+ udev-watch.c
+ udev-watch.h
+ udev-builtin.c
+ udev-builtin.h
+ udev-builtin-btrfs.c
+ udev-builtin-hwdb.c
+ udev-builtin-input_id.c
+ udev-builtin-keyboard.c
+ udev-builtin-net_id.c
+ udev-builtin-net_setup_link.c
+ udev-builtin-path_id.c
+ udev-builtin-usb_id.c
+ net/link-config.c
+ net/link-config.h
+'''.split()
+
+if conf.get('HAVE_KMOD') == 1
+ libudev_core_sources += ['udev-builtin-kmod.c']
+endif
+
+if conf.get('HAVE_BLKID') == 1
+ libudev_core_sources += ['udev-builtin-blkid.c']
+endif
+
+if conf.get('HAVE_ACL') == 1
+ libudev_core_sources += ['udev-builtin-uaccess.c',
+ logind_acl_c,
+ sd_login_sources]
+endif
+
+############################################################
+
+generate_keyboard_keys_list = find_program('generate-keyboard-keys-list.sh')
+keyboard_keys_list_txt = custom_target(
+ 'keyboard-keys-list.txt',
+ output : 'keyboard-keys-list.txt',
+ command : [generate_keyboard_keys_list, cpp],
+ capture : true)
+
+generate_keyboard_keys_gperf = find_program('generate-keyboard-keys-gperf.sh')
+fname = 'keyboard-keys-from-name.gperf'
+gperf_file = custom_target(
+ fname,
+ input : keyboard_keys_list_txt,
+ output : fname,
+ command : [generate_keyboard_keys_gperf, '@INPUT@'],
+ capture : true)
+
+fname = 'keyboard-keys-from-name.h'
+keyboard_keys_from_name_h = custom_target(
+ fname,
+ input : gperf_file,
+ output : fname,
+ command : [gperf,
+ '-L', 'ANSI-C', '-t',
+ '-N', 'keyboard_lookup_key',
+ '-H', 'hash_key_name',
+ '-p', '-C',
+ '@INPUT@'],
+ capture : true)
+
+############################################################
+
+link_config_gperf_c = custom_target(
+ 'link-config-gperf.c',
+ input : 'net/link-config-gperf.gperf',
+ output : 'link-config-gperf.c',
+ command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@'])
+
+############################################################
+
+if get_option('link-udev-shared')
+ udev_link_with = [libshared]
+ udev_rpath = rootlibexecdir
+else
+ udev_link_with = [libshared_static,
+ libsystemd_static]
+ udev_rpath = ''
+endif
+
+libudev_basic = static_library(
+ 'udev-basic',
+ libudev_sources,
+ include_directories : includes,
+ c_args : ['-fvisibility=default'])
+
+libudev_static = static_library(
+ 'udev_static',
+ include_directories : includes,
+ link_with : udev_link_with,
+ link_whole : libudev_basic)
+
+static_libudev = get_option('static-libudev')
+static_libudev_pic = static_libudev == 'true' or static_libudev == 'pic'
+install_libudev_static = static_library(
+ 'udev',
+ basic_sources,
+ shared_sources,
+ libsystemd_sources,
+ libudev_sources,
+ disable_mempool_c,
+ include_directories : includes,
+ build_by_default : static_libudev != 'false',
+ install : static_libudev != 'false',
+ install_dir : rootlibdir,
+ link_depends : libudev_sym,
+ dependencies : libshared_deps + [libmount],
+ c_args : static_libudev_pic ? [] : ['-fno-PIC'],
+ pic : static_libudev_pic)
+
+libudev = shared_library(
+ 'udev',
+ disable_mempool_c,
+ version : libudev_version,
+ include_directories : includes,
+ link_args : ['-shared',
+ '-Wl,--version-script=' + libudev_sym_path],
+ link_with : [libsystemd_static, libshared_static],
+ link_whole : libudev_basic,
+ dependencies : [threads],
+ link_depends : libudev_sym,
+ install : true,
+ install_dir : rootlibdir)
+
+libudev_core_includes = [includes, include_directories('net')]
+libudev_core = static_library(
+ 'udev-core',
+ libudev_core_sources,
+ link_config_gperf_c,
+ keyboard_keys_from_name_h,
+ include_directories : libudev_core_includes,
+ c_args : ['-DLOG_REALM=LOG_REALM_UDEV'],
+ link_with : udev_link_with,
+ dependencies : [libblkid, libkmod])
+
+foreach prog : [['ata_id/ata_id.c'],
+ ['cdrom_id/cdrom_id.c'],
+ ['fido_id/fido_id.c',
+ 'fido_id/fido_id_desc.c',
+ 'fido_id/fido_id_desc.h'],
+ ['scsi_id/scsi_id.c',
+ 'scsi_id/scsi_id.h',
+ 'scsi_id/scsi_serial.c',
+ 'scsi_id/scsi.h'],
+ ['v4l_id/v4l_id.c'],
+ ['mtd_probe/mtd_probe.c',
+ 'mtd_probe/mtd_probe.h',
+ 'mtd_probe/probe_smartmedia.c']]
+
+ executable(prog[0].split('/')[0],
+ prog,
+ include_directories : includes,
+ c_args : ['-DLOG_REALM=LOG_REALM_UDEV'],
+ dependencies : [versiondep],
+ link_with : [libudev_static],
+ install_rpath : udev_rpath,
+ install : true,
+ install_dir : udevlibexecdir)
+endforeach
+
+if install_sysconfdir
+ install_data('udev.conf',
+ install_dir : join_paths(sysconfdir, 'udev'))
+endif
+
+configure_file(
+ input : 'udev.pc.in',
+ output : 'udev.pc',
+ configuration : substs,
+ install_dir : pkgconfigdatadir == 'no' ? '' : pkgconfigdatadir)
+
+if install_sysconfdir
+ meson.add_install_script('sh', '-c',
+ mkdir_p.format(join_paths(sysconfdir, 'udev/rules.d')))
+endif
+
+fuzzers += [
+ [['src/udev/net/fuzz-link-parser.c',
+ 'src/fuzz/fuzz.h'],
+ [libudev_core,
+ libudev_static,
+ libsystemd_network,
+ libshared],
+ [threads,
+ libacl]],
+
+ [['src/udev/fido_id/fuzz-fido-id-desc.c',
+ 'src/udev/fido_id/fido_id_desc.c'],
+ [],
+ []],
+ ]
+
+tests += [
+ [['src/udev/fido_id/test-fido-id-desc.c',
+ 'src/udev/fido_id/fido_id_desc.c'],
+ [],
+ []],
+ ]
diff --git a/src/udev/mtd_probe/mtd_probe.c b/src/udev/mtd_probe/mtd_probe.c
new file mode 100644
index 0000000..df1f1c1
--- /dev/null
+++ b/src/udev/mtd_probe/mtd_probe.c
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright © 2010 - Maxim Levitsky
+ *
+ * mtd_probe is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mtd_probe is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with mtd_probe; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <mtd/mtd-user.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "mtd_probe.h"
+
+int main(int argc, char** argv) {
+ _cleanup_close_ int mtd_fd = -1;
+ mtd_info_t mtd_info;
+
+ if (argc != 2) {
+ printf("usage: mtd_probe /dev/mtd[n]\n");
+ return EXIT_FAILURE;
+ }
+
+ mtd_fd = open(argv[1], O_RDONLY|O_CLOEXEC);
+ if (mtd_fd < 0) {
+ log_error_errno(errno, "Failed to open: %m");
+ return EXIT_FAILURE;
+ }
+
+ if (ioctl(mtd_fd, MEMGETINFO, &mtd_info) < 0) {
+ log_error_errno(errno, "Failed to issue MEMGETINFO ioctl: %m");
+ return EXIT_FAILURE;
+ }
+
+ if (probe_smart_media(mtd_fd, &mtd_info) < 0)
+ return EXIT_FAILURE;
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/udev/mtd_probe/mtd_probe.h b/src/udev/mtd_probe/mtd_probe.h
new file mode 100644
index 0000000..ae03a7d
--- /dev/null
+++ b/src/udev/mtd_probe/mtd_probe.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#pragma once
+
+/*
+ * Copyright © 2010 - Maxim Levitsky
+ *
+ * mtd_probe is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mtd_probe is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with mtd_probe; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ */
+
+#include <mtd/mtd-user.h>
+
+#include "macro.h"
+
+/* Full oob structure as written on the flash */
+struct sm_oob {
+ uint32_t reserved;
+ uint8_t data_status;
+ uint8_t block_status;
+ uint8_t lba_copy1[2];
+ uint8_t ecc2[3];
+ uint8_t lba_copy2[2];
+ uint8_t ecc1[3];
+} _packed_;
+
+/* one sector is always 512 bytes, but it can consist of two nand pages */
+#define SM_SECTOR_SIZE 512
+
+/* oob area is also 16 bytes, but might be from two pages */
+#define SM_OOB_SIZE 16
+
+/* This is maximum zone size, and all devices that have more that one zone
+ have this size */
+#define SM_MAX_ZONE_SIZE 1024
+
+/* support for small page nand */
+#define SM_SMALL_PAGE 256
+#define SM_SMALL_OOB_SIZE 8
+
+int probe_smart_media(int mtd_fd, mtd_info_t *info);
diff --git a/src/udev/mtd_probe/probe_smartmedia.c b/src/udev/mtd_probe/probe_smartmedia.c
new file mode 100644
index 0000000..f4612ba
--- /dev/null
+++ b/src/udev/mtd_probe/probe_smartmedia.c
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright © 2010 - Maxim Levitsky
+ *
+ * mtd_probe is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mtd_probe is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with mtd_probe; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <mtd/mtd-user.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "mtd_probe.h"
+
+static const uint8_t cis_signature[] = {
+ 0x01, 0x03, 0xD9, 0x01, 0xFF, 0x18, 0x02, 0xDF, 0x01, 0x20
+};
+
+int probe_smart_media(int mtd_fd, mtd_info_t* info) {
+ int sector_size;
+ int block_size;
+ int size_in_megs;
+ int spare_count;
+ _cleanup_free_ uint8_t *cis_buffer = NULL;
+ int offset;
+ int cis_found = 0;
+
+ cis_buffer = malloc(SM_SECTOR_SIZE);
+ if (!cis_buffer)
+ return log_oom();
+
+ if (info->type != MTD_NANDFLASH)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not marked MTD_NANDFLASH.");
+
+ sector_size = info->writesize;
+ block_size = info->erasesize;
+ size_in_megs = info->size / (1024 * 1024);
+
+ if (!IN_SET(sector_size, SM_SECTOR_SIZE, SM_SMALL_PAGE))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unexpected sector size: %i", sector_size);
+
+ switch(size_in_megs) {
+ case 1:
+ case 2:
+ spare_count = 6;
+ break;
+ case 4:
+ spare_count = 12;
+ break;
+ default:
+ spare_count = 24;
+ break;
+ }
+
+ for (offset = 0; offset < block_size * spare_count; offset += sector_size) {
+ (void) lseek(mtd_fd, SEEK_SET, offset);
+
+ if (read(mtd_fd, cis_buffer, SM_SECTOR_SIZE) == SM_SECTOR_SIZE) {
+ cis_found = 1;
+ break;
+ }
+ }
+
+ if (!cis_found)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "CIS not found");
+
+ if (memcmp(cis_buffer, cis_signature, sizeof(cis_signature)) != 0 &&
+ memcmp(cis_buffer + SM_SMALL_PAGE, cis_signature, sizeof(cis_signature)) != 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "CIS signature didn't match");
+
+ printf("MTD_FTL=smartmedia\n");
+ return 0;
+}
diff --git a/src/udev/net/fuzz-link-parser.c b/src/udev/net/fuzz-link-parser.c
new file mode 100644
index 0000000..6f3469c
--- /dev/null
+++ b/src/udev/net/fuzz-link-parser.c
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "fd-util.h"
+#include "fs-util.h"
+#include "fuzz.h"
+#include "link-config.h"
+#include "tmpfile-util.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(link_config_ctx_freep) link_config_ctx *ctx = NULL;
+ _cleanup_(unlink_tempfilep) char filename[] = "/tmp/fuzz-link-config.XXXXXX";
+ _cleanup_fclose_ FILE *f = NULL;
+
+ if (size > 65535)
+ return 0;
+
+ if (!getenv("SYSTEMD_LOG_LEVEL"))
+ log_set_max_level(LOG_CRIT);
+
+ assert_se(fmkostemp_safe(filename, "r+", &f) == 0);
+ if (size != 0)
+ assert_se(fwrite(data, size, 1, f) == 1);
+
+ fflush(f);
+ assert_se(link_config_ctx_new(&ctx) >= 0);
+ (void) link_load_one(ctx, filename);
+ return 0;
+}
diff --git a/src/udev/net/fuzz-link-parser.options b/src/udev/net/fuzz-link-parser.options
new file mode 100644
index 0000000..0824b19
--- /dev/null
+++ b/src/udev/net/fuzz-link-parser.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 65535
diff --git a/src/udev/net/link-config-gperf.gperf b/src/udev/net/link-config-gperf.gperf
new file mode 100644
index 0000000..20f5d7e
--- /dev/null
+++ b/src/udev/net/link-config-gperf.gperf
@@ -0,0 +1,68 @@
+%{
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+#include <stddef.h>
+#include "conf-parser.h"
+#include "ethtool-util.h"
+#include "link-config.h"
+#include "network-internal.h"
+#include "socket-util.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name link_config_gperf_hash
+%define lookup-function-name link_config_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Match.MACAddress, config_parse_hwaddrs, 0, offsetof(link_config, match_mac)
+Match.PermanentMACAddress, config_parse_hwaddrs, 0, offsetof(link_config, match_permanent_mac)
+Match.OriginalName, config_parse_match_ifnames, 0, offsetof(link_config, match_name)
+Match.Path, config_parse_match_strv, 0, offsetof(link_config, match_path)
+Match.Driver, config_parse_match_strv, 0, offsetof(link_config, match_driver)
+Match.Type, config_parse_match_strv, 0, offsetof(link_config, match_type)
+Match.Property, config_parse_match_property, 0, offsetof(link_config, match_property)
+Match.Host, config_parse_net_condition, CONDITION_HOST, offsetof(link_config, conditions)
+Match.Virtualization, config_parse_net_condition, CONDITION_VIRTUALIZATION, offsetof(link_config, conditions)
+Match.KernelCommandLine, config_parse_net_condition, CONDITION_KERNEL_COMMAND_LINE, offsetof(link_config, conditions)
+Match.KernelVersion, config_parse_net_condition, CONDITION_KERNEL_VERSION, offsetof(link_config, conditions)
+Match.Architecture, config_parse_net_condition, CONDITION_ARCHITECTURE, offsetof(link_config, conditions)
+Link.Description, config_parse_string, 0, offsetof(link_config, description)
+Link.MACAddressPolicy, config_parse_mac_address_policy, 0, offsetof(link_config, mac_address_policy)
+Link.MACAddress, config_parse_hwaddr, 0, offsetof(link_config, mac)
+Link.NamePolicy, config_parse_name_policy, 0, offsetof(link_config, name_policy)
+Link.Name, config_parse_ifname, 0, offsetof(link_config, name)
+Link.AlternativeName, config_parse_ifnames, IFNAME_VALID_ALTERNATIVE, offsetof(link_config, alternative_names)
+Link.AlternativeNamesPolicy, config_parse_alternative_names_policy, 0, offsetof(link_config, alternative_names_policy)
+Link.Alias, config_parse_ifalias, 0, offsetof(link_config, alias)
+Link.MTUBytes, config_parse_mtu, AF_UNSPEC, offsetof(link_config, mtu)
+Link.BitsPerSecond, config_parse_si_uint64, 0, offsetof(link_config, speed)
+Link.Duplex, config_parse_duplex, 0, offsetof(link_config, duplex)
+Link.AutoNegotiation, config_parse_tristate, 0, offsetof(link_config, autonegotiation)
+Link.WakeOnLan, config_parse_wol, 0, offsetof(link_config, wol)
+Link.Port, config_parse_port, 0, offsetof(link_config, port)
+Link.ReceiveChecksumOffload, config_parse_tristate, 0, offsetof(link_config, features[NET_DEV_FEAT_RX])
+Link.TransmitChecksumOffload, config_parse_tristate, 0, offsetof(link_config, features[NET_DEV_FEAT_TX])
+Link.GenericSegmentationOffload, config_parse_tristate, 0, offsetof(link_config, features[NET_DEV_FEAT_GSO])
+Link.TCPSegmentationOffload, config_parse_tristate, 0, offsetof(link_config, features[NET_DEV_FEAT_TSO])
+Link.TCP6SegmentationOffload, config_parse_tristate, 0, offsetof(link_config, features[NET_DEV_FEAT_TSO6])
+Link.UDPSegmentationOffload, config_parse_warn_compat, DISABLED_LEGACY, 0
+Link.GenericReceiveOffload, config_parse_tristate, 0, offsetof(link_config, features[NET_DEV_FEAT_GRO])
+Link.LargeReceiveOffload, config_parse_tristate, 0, offsetof(link_config, features[NET_DEV_FEAT_LRO])
+Link.RxChannels, config_parse_channel, 0, offsetof(link_config, channels)
+Link.TxChannels, config_parse_channel, 0, offsetof(link_config, channels)
+Link.OtherChannels, config_parse_channel, 0, offsetof(link_config, channels)
+Link.CombinedChannels, config_parse_channel, 0, offsetof(link_config, channels)
+Link.Advertise, config_parse_advertise, 0, offsetof(link_config, advertise)
+Link.RxBufferSize, config_parse_nic_buffer_size, 0, offsetof(link_config, ring)
+Link.RxMiniBufferSize, config_parse_nic_buffer_size, 0, offsetof(link_config, ring)
+Link.RxJumboBufferSize, config_parse_nic_buffer_size, 0, offsetof(link_config, ring)
+Link.TxBufferSize, config_parse_nic_buffer_size, 0, offsetof(link_config, ring)
+Link.RxFlowControl, config_parse_tristate, 0, offsetof(link_config, rx_flow_control)
+Link.TxFlowControl, config_parse_tristate, 0, offsetof(link_config, tx_flow_control)
+Link.AutoNegotiationFlowControl, config_parse_tristate, 0, offsetof(link_config, autoneg_flow_control)
diff --git a/src/udev/net/link-config.c b/src/udev/net/link-config.c
new file mode 100644
index 0000000..d12fd0e
--- /dev/null
+++ b/src/udev/net/link-config.c
@@ -0,0 +1,711 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/netdevice.h>
+#include <netinet/ether.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "conf-parser.h"
+#include "def.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "ethtool-util.h"
+#include "fd-util.h"
+#include "link-config.h"
+#include "log.h"
+#include "memory-util.h"
+#include "netif-naming-scheme.h"
+#include "netlink-util.h"
+#include "network-internal.h"
+#include "parse-util.h"
+#include "path-lookup.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "random-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+
+struct link_config_ctx {
+ LIST_HEAD(link_config, links);
+
+ int ethtool_fd;
+
+ bool enable_name_policy;
+
+ sd_netlink *rtnl;
+
+ usec_t network_dirs_ts_usec;
+};
+
+static void link_config_free(link_config *link) {
+ if (!link)
+ return;
+
+ free(link->filename);
+
+ set_free_free(link->match_mac);
+ set_free_free(link->match_permanent_mac);
+ strv_free(link->match_path);
+ strv_free(link->match_driver);
+ strv_free(link->match_type);
+ strv_free(link->match_name);
+ strv_free(link->match_property);
+ condition_free_list(link->conditions);
+
+ free(link->description);
+ free(link->mac);
+ free(link->name_policy);
+ free(link->name);
+ strv_free(link->alternative_names);
+ free(link->alternative_names_policy);
+ free(link->alias);
+
+ free(link);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(link_config*, link_config_free);
+
+static void link_configs_free(link_config_ctx *ctx) {
+ link_config *link, *link_next;
+
+ if (!ctx)
+ return;
+
+ LIST_FOREACH_SAFE(links, link, link_next, ctx->links)
+ link_config_free(link);
+}
+
+void link_config_ctx_free(link_config_ctx *ctx) {
+ if (!ctx)
+ return;
+
+ safe_close(ctx->ethtool_fd);
+
+ sd_netlink_unref(ctx->rtnl);
+
+ link_configs_free(ctx);
+
+ free(ctx);
+
+ return;
+}
+
+int link_config_ctx_new(link_config_ctx **ret) {
+ _cleanup_(link_config_ctx_freep) link_config_ctx *ctx = NULL;
+
+ if (!ret)
+ return -EINVAL;
+
+ ctx = new0(link_config_ctx, 1);
+ if (!ctx)
+ return -ENOMEM;
+
+ LIST_HEAD_INIT(ctx->links);
+
+ ctx->ethtool_fd = -1;
+
+ ctx->enable_name_policy = true;
+
+ *ret = TAKE_PTR(ctx);
+
+ return 0;
+}
+
+int link_load_one(link_config_ctx *ctx, const char *filename) {
+ _cleanup_(link_config_freep) link_config *link = NULL;
+ _cleanup_fclose_ FILE *file = NULL;
+ _cleanup_free_ char *name = NULL;
+ size_t i;
+ int r;
+
+ assert(ctx);
+ assert(filename);
+
+ file = fopen(filename, "re");
+ if (!file)
+ return errno == ENOENT ? 0 : -errno;
+
+ if (null_or_empty_fd(fileno(file))) {
+ log_debug("Skipping empty file: %s", filename);
+ return 0;
+ }
+
+ name = strdup(filename);
+ if (!name)
+ return -ENOMEM;
+
+ link = new(link_config, 1);
+ if (!link)
+ return -ENOMEM;
+
+ *link = (link_config) {
+ .filename = TAKE_PTR(name),
+ .mac_address_policy = _MAC_ADDRESS_POLICY_INVALID,
+ .wol = _WOL_INVALID,
+ .duplex = _DUP_INVALID,
+ .port = _NET_DEV_PORT_INVALID,
+ .autonegotiation = -1,
+ .rx_flow_control = -1,
+ .tx_flow_control = -1,
+ .autoneg_flow_control = -1,
+ };
+
+ for (i = 0; i < ELEMENTSOF(link->features); i++)
+ link->features[i] = -1;
+
+ r = config_parse(NULL, filename, file,
+ "Match\0Link\0",
+ config_item_perf_lookup, link_config_gperf_lookup,
+ CONFIG_PARSE_WARN, link,
+ NULL);
+ if (r < 0)
+ return r;
+
+ if (set_isempty(link->match_mac) && set_isempty(link->match_permanent_mac) &&
+ strv_isempty(link->match_path) && strv_isempty(link->match_driver) && strv_isempty(link->match_type) &&
+ strv_isempty(link->match_name) && strv_isempty(link->match_property) && !link->conditions) {
+ log_warning("%s: No valid settings found in the [Match] section, ignoring file. "
+ "To match all interfaces, add OriginalName=* in the [Match] section.",
+ filename);
+ return 0;
+ }
+
+ if (!condition_test_list(link->conditions, environ, NULL, NULL, NULL)) {
+ log_debug("%s: Conditions do not match the system environment, skipping.", filename);
+ return 0;
+ }
+
+ if (IN_SET(link->mac_address_policy, MAC_ADDRESS_POLICY_PERSISTENT, MAC_ADDRESS_POLICY_RANDOM) && link->mac) {
+ log_warning("%s: MACAddress= in [Link] section will be ignored when MACAddressPolicy= "
+ "is set to \"persistent\" or \"random\".",
+ filename);
+ link->mac = mfree(link->mac);
+ }
+
+ log_debug("Parsed configuration file %s", filename);
+
+ LIST_PREPEND(links, ctx->links, TAKE_PTR(link));
+ return 0;
+}
+
+static bool enable_name_policy(void) {
+ bool b;
+
+ return proc_cmdline_get_bool("net.ifnames", &b) <= 0 || b;
+}
+
+static int link_unsigned_attribute(sd_device *device, const char *attr, unsigned *type) {
+ const char *s;
+ int r;
+
+ r = sd_device_get_sysattr_value(device, attr, &s);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "Failed to query %s: %m", attr);
+
+ r = safe_atou(s, type);
+ if (r < 0)
+ return log_device_warning_errno(device, r, "Failed to parse %s \"%s\": %m", attr, s);
+
+ log_device_debug(device, "Device has %s=%u", attr, *type);
+ return 0;
+}
+
+int link_config_load(link_config_ctx *ctx) {
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+ int r;
+
+ link_configs_free(ctx);
+
+ if (!enable_name_policy()) {
+ ctx->enable_name_policy = false;
+ log_info("Network interface NamePolicy= disabled on kernel command line, ignoring.");
+ }
+
+ /* update timestamp */
+ paths_check_timestamp(NETWORK_DIRS, &ctx->network_dirs_ts_usec, true);
+
+ r = conf_files_list_strv(&files, ".link", NULL, 0, NETWORK_DIRS);
+ if (r < 0)
+ return log_error_errno(r, "failed to enumerate link files: %m");
+
+ STRV_FOREACH_BACKWARDS(f, files) {
+ r = link_load_one(ctx, *f);
+ if (r < 0)
+ log_error_errno(r, "Failed to load %s, ignoring: %m", *f);
+ }
+
+ return 0;
+}
+
+bool link_config_should_reload(link_config_ctx *ctx) {
+ return paths_check_timestamp(NETWORK_DIRS, &ctx->network_dirs_ts_usec, false);
+}
+
+int link_config_get(link_config_ctx *ctx, sd_device *device, link_config **ret) {
+ unsigned name_assign_type = NET_NAME_UNKNOWN;
+ struct ether_addr permanent_mac = {};
+ unsigned short iftype = 0;
+ link_config *link;
+ const char *name;
+ int ifindex, r;
+
+ assert(ctx);
+ assert(device);
+ assert(ret);
+
+ r = sd_device_get_sysname(device, &name);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_ifindex(device, &ifindex);
+ if (r < 0)
+ return r;
+
+ r = rtnl_get_link_iftype(&ctx->rtnl, ifindex, &iftype);
+ if (r < 0)
+ return r;
+
+ r = ethtool_get_permanent_macaddr(&ctx->ethtool_fd, name, &permanent_mac);
+ if (r < 0)
+ log_device_debug_errno(device, r, "Failed to get permanent MAC address, ignoring: %m");
+
+ (void) link_unsigned_attribute(device, "name_assign_type", &name_assign_type);
+
+ LIST_FOREACH(links, link, ctx->links) {
+ if (net_match_config(link->match_mac, link->match_permanent_mac, link->match_path, link->match_driver,
+ link->match_type, link->match_name, link->match_property, NULL, NULL, NULL,
+ device, NULL, &permanent_mac, NULL, iftype, NULL, NULL, 0, NULL, NULL)) {
+
+ if (link->match_name && !strv_contains(link->match_name, "*") && name_assign_type == NET_NAME_ENUM)
+ log_device_warning(device, "Config file %s is applied to device based on potentially unpredictable interface name.",
+ link->filename);
+ else
+ log_device_debug(device, "Config file %s is applied", link->filename);
+
+ *ret = link;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static int link_config_apply_ethtool_settings(int *ethtool_fd, const link_config *config, sd_device *device) {
+ const char *name;
+ int r;
+
+ assert(ethtool_fd);
+ assert(config);
+ assert(device);
+
+ r = sd_device_get_sysname(device, &name);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Failed to get sysname: %m");
+
+ r = ethtool_set_glinksettings(ethtool_fd, name,
+ config->autonegotiation, config->advertise,
+ config->speed, config->duplex, config->port);
+ if (r < 0) {
+ if (config->port != _NET_DEV_PORT_INVALID)
+ log_device_warning_errno(device, r, "Could not set port '%s', ignoring: %m", port_to_string(config->port));
+
+ if (!eqzero(config->advertise))
+ log_device_warning_errno(device, r, "Could not set advertise mode, ignoring: %m"); /* TODO: include modes in the log message. */
+
+ if (config->speed) {
+ unsigned speed = DIV_ROUND_UP(config->speed, 1000000);
+ if (r == -EOPNOTSUPP) {
+ r = ethtool_set_speed(ethtool_fd, name, speed, config->duplex);
+ if (r < 0)
+ log_device_warning_errno(device, r, "Could not set speed to %uMbps, ignoring: %m", speed);
+ }
+ }
+
+ if (config->duplex != _DUP_INVALID)
+ log_device_warning_errno(device, r, "Could not set duplex to %s, ignoring: %m", duplex_to_string(config->duplex));
+ }
+
+ r = ethtool_set_wol(ethtool_fd, name, config->wol);
+ if (r < 0)
+ log_device_warning_errno(device, r, "Could not set WakeOnLan to %s, ignoring: %m", wol_to_string(config->wol));
+
+ r = ethtool_set_features(ethtool_fd, name, config->features);
+ if (r < 0)
+ log_device_warning_errno(device, r, "Could not set offload features, ignoring: %m");
+
+ if (config->channels.rx_count_set || config->channels.tx_count_set || config->channels.other_count_set || config->channels.combined_count_set) {
+ r = ethtool_set_channels(ethtool_fd, name, &config->channels);
+ if (r < 0)
+ log_device_warning_errno(device, r, "Could not set channels, ignoring: %m");
+ }
+
+ if (config->ring.rx_pending_set || config->ring.rx_mini_pending_set || config->ring.rx_jumbo_pending_set || config->ring.tx_pending_set) {
+ r = ethtool_set_nic_buffer_size(ethtool_fd, name, &config->ring);
+ if (r < 0)
+ log_device_warning_errno(device, r, "Could not set ring buffer, ignoring: %m");
+ }
+
+ if (config->rx_flow_control >= 0 || config->tx_flow_control >= 0 || config->autoneg_flow_control >= 0) {
+ r = ethtool_set_flow_control(ethtool_fd, name, config->rx_flow_control, config->tx_flow_control, config->autoneg_flow_control);
+ if (r < 0)
+ log_device_warning_errno(device, r, "Could not set flow control, ignoring: %m");
+ }
+
+ return 0;
+}
+
+static int get_mac(sd_device *device, MACAddressPolicy policy, struct ether_addr *mac) {
+ unsigned addr_type;
+ bool want_random = policy == MAC_ADDRESS_POLICY_RANDOM;
+ int r;
+
+ assert(IN_SET(policy, MAC_ADDRESS_POLICY_RANDOM, MAC_ADDRESS_POLICY_PERSISTENT));
+
+ r = link_unsigned_attribute(device, "addr_assign_type", &addr_type);
+ if (r < 0)
+ return r;
+ switch (addr_type) {
+ case NET_ADDR_SET:
+ return log_device_debug(device, "MAC on the device already set by userspace");
+ case NET_ADDR_STOLEN:
+ return log_device_debug(device, "MAC on the device already set based on another device");
+ case NET_ADDR_RANDOM:
+ case NET_ADDR_PERM:
+ break;
+ default:
+ log_device_warning(device, "Unknown addr_assign_type %u, ignoring", addr_type);
+ return 0;
+ }
+
+ if (want_random == (addr_type == NET_ADDR_RANDOM))
+ return log_device_debug(device, "MAC on the device already matches policy *%s*",
+ mac_address_policy_to_string(policy));
+
+ if (want_random) {
+ log_device_debug(device, "Using random bytes to generate MAC");
+
+ /* We require genuine randomness here, since we want to make sure we won't collide with other
+ * systems booting up at the very same time. We do allow RDRAND however, since this is not
+ * cryptographic key material. */
+ r = genuine_random_bytes(mac->ether_addr_octet, ETH_ALEN, RANDOM_ALLOW_RDRAND);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Failed to acquire random data to generate MAC: %m");
+ } else {
+ uint64_t result;
+
+ r = net_get_unique_predictable_data(device,
+ naming_scheme_has(NAMING_STABLE_VIRTUAL_MACS),
+ &result);
+ if (r < 0)
+ return log_device_warning_errno(device, r, "Could not generate persistent MAC: %m");
+
+ log_device_debug(device, "Using generated persistent MAC address");
+ assert_cc(ETH_ALEN <= sizeof(result));
+ memcpy(mac->ether_addr_octet, &result, ETH_ALEN);
+ }
+
+ /* see eth_random_addr in the kernel */
+ mac->ether_addr_octet[0] &= 0xfe; /* clear multicast bit */
+ mac->ether_addr_octet[0] |= 0x02; /* set local assignment bit (IEEE802) */
+ return 1;
+}
+
+static int link_config_apply_rtnl_settings(sd_netlink **rtnl, const link_config *config, sd_device *device) {
+ struct ether_addr generated_mac, *mac = NULL;
+ int ifindex, r;
+
+ assert(rtnl);
+ assert(config);
+ assert(device);
+
+ r = sd_device_get_ifindex(device, &ifindex);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Could not find ifindex: %m");
+
+ if (IN_SET(config->mac_address_policy, MAC_ADDRESS_POLICY_PERSISTENT, MAC_ADDRESS_POLICY_RANDOM)) {
+ if (get_mac(device, config->mac_address_policy, &generated_mac) > 0)
+ mac = &generated_mac;
+ } else
+ mac = config->mac;
+
+ r = rtnl_set_link_properties(rtnl, ifindex, config->alias, mac, config->mtu);
+ if (r < 0)
+ log_device_warning_errno(device, r, "Could not set Alias=, MACAddress= or MTU=, ignoring: %m");
+
+ return 0;
+}
+
+static int link_config_generate_new_name(const link_config_ctx *ctx, const link_config *config, sd_device *device, const char **ret_name) {
+ unsigned name_type = NET_NAME_UNKNOWN;
+ const char *new_name = NULL;
+ NamePolicy policy;
+ int r;
+
+ assert(ctx);
+ assert(config);
+ assert(device);
+ assert(ret_name);
+
+ (void) link_unsigned_attribute(device, "name_assign_type", &name_type);
+
+ if (IN_SET(name_type, NET_NAME_USER, NET_NAME_RENAMED)
+ && !naming_scheme_has(NAMING_ALLOW_RERENAMES)) {
+ log_device_debug(device, "Device already has a name given by userspace, not renaming.");
+ goto no_rename;
+ }
+
+ if (ctx->enable_name_policy && config->name_policy)
+ for (NamePolicy *p = config->name_policy; *p != _NAMEPOLICY_INVALID; p++) {
+ policy = *p;
+
+ switch (policy) {
+ case NAMEPOLICY_KERNEL:
+ if (name_type != NET_NAME_PREDICTABLE)
+ continue;
+
+ /* The kernel claims to have given a predictable name, keep it. */
+ log_device_debug(device, "Policy *%s*: keeping predictable kernel name",
+ name_policy_to_string(policy));
+ goto no_rename;
+ case NAMEPOLICY_KEEP:
+ if (!IN_SET(name_type, NET_NAME_USER, NET_NAME_RENAMED))
+ continue;
+
+ log_device_debug(device, "Policy *%s*: keeping existing userspace name",
+ name_policy_to_string(policy));
+ goto no_rename;
+ case NAMEPOLICY_DATABASE:
+ (void) sd_device_get_property_value(device, "ID_NET_NAME_FROM_DATABASE", &new_name);
+ break;
+ case NAMEPOLICY_ONBOARD:
+ (void) sd_device_get_property_value(device, "ID_NET_NAME_ONBOARD", &new_name);
+ break;
+ case NAMEPOLICY_SLOT:
+ (void) sd_device_get_property_value(device, "ID_NET_NAME_SLOT", &new_name);
+ break;
+ case NAMEPOLICY_PATH:
+ (void) sd_device_get_property_value(device, "ID_NET_NAME_PATH", &new_name);
+ break;
+ case NAMEPOLICY_MAC:
+ (void) sd_device_get_property_value(device, "ID_NET_NAME_MAC", &new_name);
+ break;
+ default:
+ assert_not_reached("invalid policy");
+ }
+ if (ifname_valid(new_name))
+ break;
+ }
+
+ if (new_name) {
+ log_device_debug(device, "Policy *%s* yields \"%s\".", name_policy_to_string(policy), new_name);
+ *ret_name = new_name;
+ return 0;
+ }
+
+ if (config->name) {
+ log_device_debug(device, "Policies didn't yield a name, using specified Name=%s.", config->name);
+ *ret_name = config->name;
+ return 0;
+ }
+
+ log_device_debug(device, "Policies didn't yield a name and Name= is not given, not renaming.");
+no_rename:
+ r = sd_device_get_sysname(device, ret_name);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Failed to get sysname: %m");
+
+ return 0;
+}
+
+static int link_config_apply_alternative_names(sd_netlink **rtnl, const link_config *config, sd_device *device, const char *new_name) {
+ _cleanup_strv_free_ char **altnames = NULL, **current_altnames = NULL;
+ const char *current_name;
+ int ifindex, r;
+
+ assert(rtnl);
+ assert(config);
+ assert(device);
+
+ r = sd_device_get_sysname(device, &current_name);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Failed to get sysname: %m");
+
+ r = sd_device_get_ifindex(device, &ifindex);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Could not find ifindex: %m");
+
+ if (config->alternative_names) {
+ altnames = strv_copy(config->alternative_names);
+ if (!altnames)
+ return log_oom();
+ }
+
+ if (config->alternative_names_policy)
+ for (NamePolicy *p = config->alternative_names_policy; *p != _NAMEPOLICY_INVALID; p++) {
+ const char *n = NULL;
+
+ switch (*p) {
+ case NAMEPOLICY_DATABASE:
+ (void) sd_device_get_property_value(device, "ID_NET_NAME_FROM_DATABASE", &n);
+ break;
+ case NAMEPOLICY_ONBOARD:
+ (void) sd_device_get_property_value(device, "ID_NET_NAME_ONBOARD", &n);
+ break;
+ case NAMEPOLICY_SLOT:
+ (void) sd_device_get_property_value(device, "ID_NET_NAME_SLOT", &n);
+ break;
+ case NAMEPOLICY_PATH:
+ (void) sd_device_get_property_value(device, "ID_NET_NAME_PATH", &n);
+ break;
+ case NAMEPOLICY_MAC:
+ (void) sd_device_get_property_value(device, "ID_NET_NAME_MAC", &n);
+ break;
+ default:
+ assert_not_reached("invalid policy");
+ }
+ if (!isempty(n)) {
+ r = strv_extend(&altnames, n);
+ if (r < 0)
+ return log_oom();
+ }
+ }
+
+ if (new_name)
+ strv_remove(altnames, new_name);
+ strv_remove(altnames, current_name);
+
+ r = rtnl_get_link_alternative_names(rtnl, ifindex, &current_altnames);
+ if (r < 0)
+ log_device_debug_errno(device, r, "Failed to get alternative names, ignoring: %m");
+
+ char **p;
+ STRV_FOREACH(p, current_altnames)
+ strv_remove(altnames, *p);
+
+ strv_uniq(altnames);
+ strv_sort(altnames);
+ r = rtnl_set_link_alternative_names(rtnl, ifindex, altnames);
+ if (r < 0)
+ log_device_full_errno(device, r == -EOPNOTSUPP ? LOG_DEBUG : LOG_WARNING, r,
+ "Could not set AlternativeName= or apply AlternativeNamesPolicy=, ignoring: %m");
+
+ return 0;
+}
+
+int link_config_apply(link_config_ctx *ctx, const link_config *config, sd_device *device, const char **ret_name) {
+ const char *new_name;
+ DeviceAction a;
+ int r;
+
+ assert(ctx);
+ assert(config);
+ assert(device);
+ assert(ret_name);
+
+ r = device_get_action(device, &a);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Failed to get ACTION= property: %m");
+
+ if (!IN_SET(a, DEVICE_ACTION_ADD, DEVICE_ACTION_BIND, DEVICE_ACTION_MOVE)) {
+ log_device_debug(device, "Skipping to apply .link settings on '%s' uevent.", device_action_to_string(a));
+
+ r = sd_device_get_sysname(device, ret_name);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Failed to get sysname: %m");
+
+ return 0;
+ }
+
+ r = link_config_apply_ethtool_settings(&ctx->ethtool_fd, config, device);
+ if (r < 0)
+ return r;
+
+ r = link_config_apply_rtnl_settings(&ctx->rtnl, config, device);
+ if (r < 0)
+ return r;
+
+ if (a == DEVICE_ACTION_MOVE) {
+ log_device_debug(device, "Skipping to apply Name= and NamePolicy= on '%s' uevent.", device_action_to_string(a));
+
+ r = sd_device_get_sysname(device, &new_name);
+ if (r < 0)
+ return log_device_error_errno(device, r, "Failed to get sysname: %m");
+ } else {
+ r = link_config_generate_new_name(ctx, config, device, &new_name);
+ if (r < 0)
+ return r;
+ }
+
+ r = link_config_apply_alternative_names(&ctx->rtnl, config, device, new_name);
+ if (r < 0)
+ return r;
+
+ *ret_name = new_name;
+ return 0;
+}
+
+int link_get_driver(link_config_ctx *ctx, sd_device *device, char **ret) {
+ const char *name;
+ char *driver = NULL;
+ int r;
+
+ r = sd_device_get_sysname(device, &name);
+ if (r < 0)
+ return r;
+
+ r = ethtool_get_driver(&ctx->ethtool_fd, name, &driver);
+ if (r < 0)
+ return r;
+
+ *ret = driver;
+ return 0;
+}
+
+static const char* const mac_address_policy_table[_MAC_ADDRESS_POLICY_MAX] = {
+ [MAC_ADDRESS_POLICY_PERSISTENT] = "persistent",
+ [MAC_ADDRESS_POLICY_RANDOM] = "random",
+ [MAC_ADDRESS_POLICY_NONE] = "none",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(mac_address_policy, MACAddressPolicy);
+DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(
+ config_parse_mac_address_policy,
+ mac_address_policy,
+ MACAddressPolicy,
+ MAC_ADDRESS_POLICY_NONE,
+ "Failed to parse MAC address policy");
+
+static const char* const name_policy_table[_NAMEPOLICY_MAX] = {
+ [NAMEPOLICY_KERNEL] = "kernel",
+ [NAMEPOLICY_KEEP] = "keep",
+ [NAMEPOLICY_DATABASE] = "database",
+ [NAMEPOLICY_ONBOARD] = "onboard",
+ [NAMEPOLICY_SLOT] = "slot",
+ [NAMEPOLICY_PATH] = "path",
+ [NAMEPOLICY_MAC] = "mac",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(name_policy, NamePolicy);
+DEFINE_CONFIG_PARSE_ENUMV(config_parse_name_policy, name_policy, NamePolicy,
+ _NAMEPOLICY_INVALID,
+ "Failed to parse interface name policy");
+
+static const char* const alternative_names_policy_table[_NAMEPOLICY_MAX] = {
+ [NAMEPOLICY_DATABASE] = "database",
+ [NAMEPOLICY_ONBOARD] = "onboard",
+ [NAMEPOLICY_SLOT] = "slot",
+ [NAMEPOLICY_PATH] = "path",
+ [NAMEPOLICY_MAC] = "mac",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(alternative_names_policy, NamePolicy);
+DEFINE_CONFIG_PARSE_ENUMV(config_parse_alternative_names_policy, alternative_names_policy, NamePolicy,
+ _NAMEPOLICY_INVALID,
+ "Failed to parse alternative names policy");
diff --git a/src/udev/net/link-config.h b/src/udev/net/link-config.h
new file mode 100644
index 0000000..eab1849
--- /dev/null
+++ b/src/udev/net/link-config.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-device.h"
+
+#include "condition.h"
+#include "conf-parser.h"
+#include "ethtool-util.h"
+#include "list.h"
+#include "set.h"
+
+typedef struct link_config_ctx link_config_ctx;
+typedef struct link_config link_config;
+
+typedef enum MACAddressPolicy {
+ MAC_ADDRESS_POLICY_PERSISTENT,
+ MAC_ADDRESS_POLICY_RANDOM,
+ MAC_ADDRESS_POLICY_NONE,
+ _MAC_ADDRESS_POLICY_MAX,
+ _MAC_ADDRESS_POLICY_INVALID = -1
+} MACAddressPolicy;
+
+typedef enum NamePolicy {
+ NAMEPOLICY_KERNEL,
+ NAMEPOLICY_KEEP,
+ NAMEPOLICY_DATABASE,
+ NAMEPOLICY_ONBOARD,
+ NAMEPOLICY_SLOT,
+ NAMEPOLICY_PATH,
+ NAMEPOLICY_MAC,
+ _NAMEPOLICY_MAX,
+ _NAMEPOLICY_INVALID = -1
+} NamePolicy;
+
+struct link_config {
+ char *filename;
+
+ Set *match_mac;
+ Set *match_permanent_mac;
+ char **match_path;
+ char **match_driver;
+ char **match_type;
+ char **match_name;
+ char **match_property;
+ LIST_HEAD(Condition, conditions);
+
+ char *description;
+ struct ether_addr *mac;
+ MACAddressPolicy mac_address_policy;
+ NamePolicy *name_policy;
+ NamePolicy *alternative_names_policy;
+ char *name;
+ char **alternative_names;
+ char *alias;
+ uint32_t mtu;
+ uint64_t speed;
+ Duplex duplex;
+ int autonegotiation;
+ uint32_t advertise[N_ADVERTISE];
+ WakeOnLan wol;
+ NetDevPort port;
+ int features[_NET_DEV_FEAT_MAX];
+ netdev_channels channels;
+ netdev_ring_param ring;
+ int rx_flow_control;
+ int tx_flow_control;
+ int autoneg_flow_control;
+
+ LIST_FIELDS(link_config, links);
+};
+
+int link_config_ctx_new(link_config_ctx **ret);
+void link_config_ctx_free(link_config_ctx *ctx);
+DEFINE_TRIVIAL_CLEANUP_FUNC(link_config_ctx*, link_config_ctx_free);
+
+int link_load_one(link_config_ctx *ctx, const char *filename);
+int link_config_load(link_config_ctx *ctx);
+bool link_config_should_reload(link_config_ctx *ctx);
+
+int link_config_get(link_config_ctx *ctx, sd_device *device, link_config **ret);
+int link_config_apply(link_config_ctx *ctx, const link_config *config, sd_device *device, const char **ret_name);
+int link_get_driver(link_config_ctx *ctx, sd_device *device, char **ret);
+
+const char *name_policy_to_string(NamePolicy p) _const_;
+NamePolicy name_policy_from_string(const char *p) _pure_;
+
+const char *alternative_names_policy_to_string(NamePolicy p) _const_;
+NamePolicy alternative_names_policy_from_string(const char *p) _pure_;
+
+const char *mac_address_policy_to_string(MACAddressPolicy p) _const_;
+MACAddressPolicy mac_address_policy_from_string(const char *p) _pure_;
+
+/* gperf lookup function */
+const struct ConfigPerfItem* link_config_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_mac_address_policy);
+CONFIG_PARSER_PROTOTYPE(config_parse_name_policy);
+CONFIG_PARSER_PROTOTYPE(config_parse_alternative_names_policy);
diff --git a/src/udev/scsi_id/README b/src/udev/scsi_id/README
new file mode 100644
index 0000000..9cfe739
--- /dev/null
+++ b/src/udev/scsi_id/README
@@ -0,0 +1,4 @@
+scsi_id - generate a SCSI unique identifier for a given SCSI device
+
+Please send questions, comments or patches to <patmans@us.ibm.com> or
+<linux-hotplug-devel@lists.sourceforge.net>.
diff --git a/src/udev/scsi_id/scsi.h b/src/udev/scsi_id/scsi.h
new file mode 100644
index 0000000..ee3e401
--- /dev/null
+++ b/src/udev/scsi_id/scsi.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#pragma once
+
+/*
+ * scsi.h
+ *
+ * General scsi and linux scsi specific defines and structs.
+ *
+ * Copyright (C) IBM Corp. 2003
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 of the License.
+ */
+
+#include <scsi/scsi.h>
+
+struct scsi_ioctl_command {
+ unsigned inlen; /* excluding scsi command length */
+ unsigned outlen;
+ unsigned char data[1];
+ /* on input, scsi command starts here then opt. data */
+};
+
+/*
+ * Default 5 second timeout
+ */
+#define DEF_TIMEOUT 5000
+
+#define SENSE_BUFF_LEN 32
+
+/*
+ * The request buffer size passed to the SCSI INQUIRY commands, use 254,
+ * as this is a nice value for some devices, especially some of the usb
+ * mass storage devices.
+ */
+#define SCSI_INQ_BUFF_LEN 254
+
+/*
+ * SCSI INQUIRY vendor and model (really product) lengths.
+ */
+#define VENDOR_LENGTH 8
+#define MODEL_LENGTH 16
+
+#define INQUIRY_CMD 0x12
+#define INQUIRY_CMDLEN 6
+
+/*
+ * INQUIRY VPD page 0x83 identifier descriptor related values. Reference the
+ * SCSI Primary Commands specification for details.
+ */
+
+/*
+ * id type values of id descriptors. These are assumed to fit in 4 bits.
+ */
+#define SCSI_ID_VENDOR_SPECIFIC 0
+#define SCSI_ID_T10_VENDOR 1
+#define SCSI_ID_EUI_64 2
+#define SCSI_ID_NAA 3
+#define SCSI_ID_RELPORT 4
+#define SCSI_ID_TGTGROUP 5
+#define SCSI_ID_LUNGROUP 6
+#define SCSI_ID_MD5 7
+#define SCSI_ID_NAME 8
+
+/*
+ * Supported NAA values. These fit in 4 bits, so the "don't care" value
+ * cannot conflict with real values.
+ */
+#define SCSI_ID_NAA_DONT_CARE 0xff
+#define SCSI_ID_NAA_IEEE_REG 0x05
+#define SCSI_ID_NAA_IEEE_REG_EXTENDED 0x06
+
+/*
+ * Supported Code Set values.
+ */
+#define SCSI_ID_BINARY 1
+#define SCSI_ID_ASCII 2
+
+struct scsi_id_search_values {
+ u_char id_type;
+ u_char naa_type;
+ u_char code_set;
+};
+
+/*
+ * Following are the "true" SCSI status codes. Linux has traditionally
+ * used a 1 bit right and masked version of these. So now CHECK_CONDITION
+ * and friends (in <scsi/scsi.h>) are deprecated.
+ */
+#define SCSI_CHECK_CONDITION 0x02
+#define SCSI_CONDITION_MET 0x04
+#define SCSI_BUSY 0x08
+#define SCSI_IMMEDIATE 0x10
+#define SCSI_IMMEDIATE_CONDITION_MET 0x14
+#define SCSI_RESERVATION_CONFLICT 0x18
+#define SCSI_COMMAND_TERMINATED 0x22
+#define SCSI_TASK_SET_FULL 0x28
+#define SCSI_ACA_ACTIVE 0x30
+#define SCSI_TASK_ABORTED 0x40
diff --git a/src/udev/scsi_id/scsi_id.c b/src/udev/scsi_id/scsi_id.c
new file mode 100644
index 0000000..5720256
--- /dev/null
+++ b/src/udev/scsi_id/scsi_id.c
@@ -0,0 +1,595 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright © IBM Corp. 2003
+ * Copyright © SUSE Linux Products GmbH, 2006
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "build.h"
+#include "fd-util.h"
+#include "libudev-util.h"
+#include "scsi_id.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "udev-util.h"
+
+static const struct option options[] = {
+ { "device", required_argument, NULL, 'd' },
+ { "config", required_argument, NULL, 'f' },
+ { "page", required_argument, NULL, 'p' },
+ { "blacklisted", no_argument, NULL, 'b' },
+ { "whitelisted", no_argument, NULL, 'g' },
+ { "replace-whitespace", no_argument, NULL, 'u' },
+ { "sg-version", required_argument, NULL, 's' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "version", no_argument, NULL, 'V' }, /* don't advertise -V */
+ { "export", no_argument, NULL, 'x' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+};
+
+static bool all_good = false;
+static bool dev_specified = false;
+static char config_file[MAX_PATH_LEN] = "/etc/scsi_id.config";
+static enum page_code default_page_code = PAGE_UNSPECIFIED;
+static int sg_version = 4;
+static bool reformat_serial = false;
+static bool export = false;
+static char vendor_str[64];
+static char model_str[64];
+static char vendor_enc_str[256];
+static char model_enc_str[256];
+static char revision_str[16];
+static char type_str[16];
+
+static void set_type(const char *from, char *to, size_t len) {
+ int type_num;
+ char *eptr;
+ const char *type = "generic";
+
+ type_num = strtoul(from, &eptr, 0);
+ if (eptr != from) {
+ switch (type_num) {
+ case 0:
+ type = "disk";
+ break;
+ case 1:
+ type = "tape";
+ break;
+ case 4:
+ type = "optical";
+ break;
+ case 5:
+ type = "cd";
+ break;
+ case 7:
+ type = "optical";
+ break;
+ case 0xe:
+ type = "disk";
+ break;
+ case 0xf:
+ type = "optical";
+ break;
+ default:
+ break;
+ }
+ }
+ strscpy(to, len, type);
+}
+
+/*
+ * get_value:
+ *
+ * buf points to an '=' followed by a quoted string ("foo") or a string ending
+ * with a space or ','.
+ *
+ * Return a pointer to the NUL terminated string, returns NULL if no
+ * matches.
+ */
+static char *get_value(char **buffer) {
+ static const char *quote_string = "\"\n";
+ static const char *comma_string = ",\n";
+ char *val;
+ const char *end;
+
+ if (**buffer == '"') {
+ /*
+ * skip leading quote, terminate when quote seen
+ */
+ (*buffer)++;
+ end = quote_string;
+ } else
+ end = comma_string;
+ val = strsep(buffer, end);
+ if (val && end == quote_string)
+ /*
+ * skip trailing quote
+ */
+ (*buffer)++;
+
+ while (isspace(**buffer))
+ (*buffer)++;
+
+ return val;
+}
+
+static int argc_count(char *opts) {
+ int i = 0;
+ while (*opts != '\0')
+ if (*opts++ == ' ')
+ i++;
+ return i;
+}
+
+/*
+ * get_file_options:
+ *
+ * If vendor == NULL, find a line in the config file with only "OPTIONS=";
+ * if vendor and model are set find the first OPTIONS line in the config
+ * file that matches. Set argc and argv to match the OPTIONS string.
+ *
+ * vendor and model can end in '\n'.
+ */
+static int get_file_options(const char *vendor, const char *model,
+ int *argc, char ***newargv) {
+ _cleanup_free_ char *buffer = NULL;
+ _cleanup_fclose_ FILE *f;
+ char *buf;
+ char *str1;
+ char *vendor_in, *model_in, *options_in; /* read in from file */
+ int lineno;
+ int c;
+ int retval = 0;
+
+ f = fopen(config_file, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 1;
+ else {
+ log_error_errno(errno, "can't open %s: %m", config_file);
+ return -1;
+ }
+ }
+
+ /*
+ * Allocate a buffer rather than put it on the stack so we can
+ * keep it around to parse any options (any allocated newargv
+ * points into this buffer for its strings).
+ */
+ buffer = malloc(MAX_BUFFER_LEN);
+ if (!buffer)
+ return log_oom();
+
+ *newargv = NULL;
+ lineno = 0;
+ for (;;) {
+ vendor_in = model_in = options_in = NULL;
+
+ buf = fgets(buffer, MAX_BUFFER_LEN, f);
+ if (!buf)
+ break;
+ lineno++;
+ if (buf[strlen(buffer) - 1] != '\n') {
+ log_error("Config file line %d too long", lineno);
+ break;
+ }
+
+ while (isspace(*buf))
+ buf++;
+
+ /* blank or all whitespace line */
+ if (*buf == '\0')
+ continue;
+
+ /* comment line */
+ if (*buf == '#')
+ continue;
+
+ str1 = strsep(&buf, "=");
+ if (str1 && strcaseeq(str1, "VENDOR")) {
+ str1 = get_value(&buf);
+ if (!str1) {
+ retval = log_oom();
+ break;
+ }
+ vendor_in = str1;
+
+ str1 = strsep(&buf, "=");
+ if (str1 && strcaseeq(str1, "MODEL")) {
+ str1 = get_value(&buf);
+ if (!str1) {
+ retval = log_oom();
+ break;
+ }
+ model_in = str1;
+ str1 = strsep(&buf, "=");
+ }
+ }
+
+ if (str1 && strcaseeq(str1, "OPTIONS")) {
+ str1 = get_value(&buf);
+ if (!str1) {
+ retval = log_oom();
+ break;
+ }
+ options_in = str1;
+ }
+
+ /*
+ * Only allow: [vendor=foo[,model=bar]]options=stuff
+ */
+ if (!options_in || (!vendor_in && model_in)) {
+ log_error("Error parsing config file line %d '%s'", lineno, buffer);
+ retval = -1;
+ break;
+ }
+ if (!vendor) {
+ if (!vendor_in)
+ break;
+ } else if (vendor_in &&
+ startswith(vendor, vendor_in) &&
+ (!model_in || startswith(model, model_in))) {
+ /*
+ * Matched vendor and optionally model.
+ *
+ * Note: a short vendor_in or model_in can
+ * give a partial match (that is FOO
+ * matches FOOBAR).
+ */
+ break;
+ }
+ }
+
+ if (retval == 0) {
+ if (vendor_in != NULL || model_in != NULL ||
+ options_in != NULL) {
+ /*
+ * Something matched. Allocate newargv, and store
+ * values found in options_in.
+ */
+ strcpy(buffer, options_in);
+ c = argc_count(buffer) + 2;
+ *newargv = calloc(c, sizeof(**newargv));
+ if (!*newargv)
+ retval = log_oom();
+ else {
+ *argc = c;
+ c = 0;
+ /*
+ * argv[0] at 0 is skipped by getopt, but
+ * store the buffer address there for
+ * later freeing
+ */
+ (*newargv)[c] = buffer;
+ for (c = 1; c < *argc; c++)
+ (*newargv)[c] = strsep(&buffer, " \t");
+ buffer = NULL;
+ }
+ } else {
+ /* No matches */
+ retval = 1;
+ }
+ }
+ return retval;
+}
+
+static void help(void) {
+ printf("Usage: %s [OPTION...] DEVICE\n\n"
+ "SCSI device identification.\n\n"
+ " -h --help Print this message\n"
+ " --version Print version of the program\n\n"
+ " -d --device= Device node for SG_IO commands\n"
+ " -f --config= Location of config file\n"
+ " -p --page=0x80|0x83|pre-spc3-83 SCSI page (0x80, 0x83, pre-spc3-83)\n"
+ " -s --sg-version=3|4 Use SGv3 or SGv4\n"
+ " -b --blacklisted Treat device as blacklisted\n"
+ " -g --whitelisted Treat device as whitelisted\n"
+ " -u --replace-whitespace Replace all whitespace by underscores\n"
+ " -v --verbose Verbose logging\n"
+ " -x --export Print values as environment keys\n"
+ , program_invocation_short_name);
+
+}
+
+static int set_options(int argc, char **argv,
+ char *maj_min_dev) {
+ int option;
+
+ /*
+ * optind is a global extern used by getopt. Since we can call
+ * set_options twice (once for command line, and once for config
+ * file) we have to reset this back to 1.
+ */
+ optind = 1;
+ while ((option = getopt_long(argc, argv, "d:f:gp:uvVxhbs:", options, NULL)) >= 0)
+ switch (option) {
+ case 'b':
+ all_good = false;
+ break;
+
+ case 'd':
+ dev_specified = true;
+ strscpy(maj_min_dev, MAX_PATH_LEN, optarg);
+ break;
+
+ case 'f':
+ strscpy(config_file, MAX_PATH_LEN, optarg);
+ break;
+
+ case 'g':
+ all_good = true;
+ break;
+
+ case 'h':
+ help();
+ exit(EXIT_SUCCESS);
+
+ case 'p':
+ if (streq(optarg, "0x80"))
+ default_page_code = PAGE_80;
+ else if (streq(optarg, "0x83"))
+ default_page_code = PAGE_83;
+ else if (streq(optarg, "pre-spc3-83"))
+ default_page_code = PAGE_83_PRE_SPC3;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown page code '%s'",
+ optarg);
+ break;
+
+ case 's':
+ sg_version = atoi(optarg);
+ if (sg_version < 3 || sg_version > 4)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown SG version '%s'",
+ optarg);
+ break;
+
+ case 'u':
+ reformat_serial = true;
+ break;
+
+ case 'v':
+ log_set_target(LOG_TARGET_CONSOLE);
+ log_set_max_level(LOG_DEBUG);
+ log_open();
+ break;
+
+ case 'V':
+ printf("%s\n", GIT_VERSION);
+ exit(EXIT_SUCCESS);
+
+ case 'x':
+ export = true;
+ break;
+
+ case '?':
+ return -1;
+
+ default:
+ assert_not_reached("Unknown option");
+ }
+
+ if (optind < argc && !dev_specified) {
+ dev_specified = true;
+ strscpy(maj_min_dev, MAX_PATH_LEN, argv[optind]);
+ }
+
+ return 0;
+}
+
+static int per_dev_options(struct scsi_id_device *dev_scsi, int *good_bad, int *page_code) {
+ int retval;
+ int newargc;
+ char **newargv = NULL;
+ int option;
+
+ *good_bad = all_good;
+ *page_code = default_page_code;
+
+ retval = get_file_options(vendor_str, model_str, &newargc, &newargv);
+
+ optind = 1; /* reset this global extern */
+ while (retval == 0) {
+ option = getopt_long(newargc, newargv, "bgp:", options, NULL);
+ if (option == -1)
+ break;
+
+ switch (option) {
+ case 'b':
+ *good_bad = 0;
+ break;
+
+ case 'g':
+ *good_bad = 1;
+ break;
+
+ case 'p':
+ if (streq(optarg, "0x80")) {
+ *page_code = PAGE_80;
+ } else if (streq(optarg, "0x83")) {
+ *page_code = PAGE_83;
+ } else if (streq(optarg, "pre-spc3-83")) {
+ *page_code = PAGE_83_PRE_SPC3;
+ } else {
+ log_error("Unknown page code '%s'", optarg);
+ retval = -1;
+ }
+ break;
+
+ default:
+ log_error("Unknown or bad option '%c' (0x%x)", option, option);
+ retval = -1;
+ break;
+ }
+ }
+
+ if (newargv) {
+ free(newargv[0]);
+ free(newargv);
+ }
+ return retval;
+}
+
+static int set_inq_values(struct scsi_id_device *dev_scsi, const char *path) {
+ int retval;
+
+ dev_scsi->use_sg = sg_version;
+
+ retval = scsi_std_inquiry(dev_scsi, path);
+ if (retval)
+ return retval;
+
+ udev_util_encode_string(dev_scsi->vendor, vendor_enc_str, sizeof(vendor_enc_str));
+ udev_util_encode_string(dev_scsi->model, model_enc_str, sizeof(model_enc_str));
+
+ util_replace_whitespace(dev_scsi->vendor, vendor_str, sizeof(vendor_str)-1);
+ util_replace_chars(vendor_str, NULL);
+ util_replace_whitespace(dev_scsi->model, model_str, sizeof(model_str)-1);
+ util_replace_chars(model_str, NULL);
+ set_type(dev_scsi->type, type_str, sizeof(type_str));
+ util_replace_whitespace(dev_scsi->revision, revision_str, sizeof(revision_str)-1);
+ util_replace_chars(revision_str, NULL);
+ return 0;
+}
+
+/*
+ * scsi_id: try to get an id, if one is found, printf it to stdout.
+ * returns a value passed to exit() - 0 if printed an id, else 1.
+ */
+static int scsi_id(char *maj_min_dev) {
+ struct scsi_id_device dev_scsi = {};
+ int good_dev;
+ int page_code;
+ int retval = 0;
+
+ if (set_inq_values(&dev_scsi, maj_min_dev) < 0) {
+ retval = 1;
+ goto out;
+ }
+
+ /* get per device (vendor + model) options from the config file */
+ per_dev_options(&dev_scsi, &good_dev, &page_code);
+ if (!good_dev) {
+ retval = 1;
+ goto out;
+ }
+
+ /* read serial number from mode pages (no values for optical drives) */
+ scsi_get_serial(&dev_scsi, maj_min_dev, page_code, MAX_SERIAL_LEN);
+
+ if (export) {
+ char serial_str[MAX_SERIAL_LEN];
+
+ printf("ID_SCSI=1\n");
+ printf("ID_VENDOR=%s\n", vendor_str);
+ printf("ID_VENDOR_ENC=%s\n", vendor_enc_str);
+ printf("ID_MODEL=%s\n", model_str);
+ printf("ID_MODEL_ENC=%s\n", model_enc_str);
+ printf("ID_REVISION=%s\n", revision_str);
+ printf("ID_TYPE=%s\n", type_str);
+ if (dev_scsi.serial[0] != '\0') {
+ util_replace_whitespace(dev_scsi.serial, serial_str, sizeof(serial_str)-1);
+ util_replace_chars(serial_str, NULL);
+ printf("ID_SERIAL=%s\n", serial_str);
+ util_replace_whitespace(dev_scsi.serial_short, serial_str, sizeof(serial_str)-1);
+ util_replace_chars(serial_str, NULL);
+ printf("ID_SERIAL_SHORT=%s\n", serial_str);
+ }
+ if (dev_scsi.wwn[0] != '\0') {
+ printf("ID_WWN=0x%s\n", dev_scsi.wwn);
+ if (dev_scsi.wwn_vendor_extension[0] != '\0') {
+ printf("ID_WWN_VENDOR_EXTENSION=0x%s\n", dev_scsi.wwn_vendor_extension);
+ printf("ID_WWN_WITH_EXTENSION=0x%s%s\n", dev_scsi.wwn, dev_scsi.wwn_vendor_extension);
+ } else
+ printf("ID_WWN_WITH_EXTENSION=0x%s\n", dev_scsi.wwn);
+ }
+ if (dev_scsi.tgpt_group[0] != '\0')
+ printf("ID_TARGET_PORT=%s\n", dev_scsi.tgpt_group);
+ if (dev_scsi.unit_serial_number[0] != '\0')
+ printf("ID_SCSI_SERIAL=%s\n", dev_scsi.unit_serial_number);
+ goto out;
+ }
+
+ if (dev_scsi.serial[0] == '\0') {
+ retval = 1;
+ goto out;
+ }
+
+ if (reformat_serial) {
+ char serial_str[MAX_SERIAL_LEN];
+
+ util_replace_whitespace(dev_scsi.serial, serial_str, sizeof(serial_str)-1);
+ util_replace_chars(serial_str, NULL);
+ printf("%s\n", serial_str);
+ goto out;
+ }
+
+ printf("%s\n", dev_scsi.serial);
+out:
+ return retval;
+}
+
+int main(int argc, char **argv) {
+ int retval = 0;
+ char maj_min_dev[MAX_PATH_LEN];
+ int newargc;
+ char **newargv = NULL;
+
+ log_set_target(LOG_TARGET_AUTO);
+ udev_parse_config();
+ log_parse_environment();
+ log_open();
+
+ /*
+ * Get config file options.
+ */
+ retval = get_file_options(NULL, NULL, &newargc, &newargv);
+ if (retval < 0) {
+ retval = 1;
+ goto exit;
+ }
+ if (retval == 0) {
+ assert(newargv);
+
+ if (set_options(newargc, newargv, maj_min_dev) < 0) {
+ retval = 2;
+ goto exit;
+ }
+ }
+
+ /*
+ * Get command line options (overriding any config file settings).
+ */
+ if (set_options(argc, argv, maj_min_dev) < 0)
+ exit(EXIT_FAILURE);
+
+ if (!dev_specified) {
+ log_error("No device specified.");
+ retval = 1;
+ goto exit;
+ }
+
+ retval = scsi_id(maj_min_dev);
+
+exit:
+ if (newargv) {
+ free(newargv[0]);
+ free(newargv);
+ }
+ log_close();
+ return retval;
+}
diff --git a/src/udev/scsi_id/scsi_id.h b/src/udev/scsi_id/scsi_id.h
new file mode 100644
index 0000000..2fe64f4
--- /dev/null
+++ b/src/udev/scsi_id/scsi_id.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#pragma once
+
+/*
+ * Copyright © IBM Corp. 2003
+ */
+
+#define MAX_PATH_LEN 512
+
+/*
+ * MAX_ATTR_LEN: maximum length of the result of reading a sysfs
+ * attribute.
+ */
+#define MAX_ATTR_LEN 256
+
+/*
+ * MAX_SERIAL_LEN: the maximum length of the serial number, including
+ * added prefixes such as vendor and product (model) strings.
+ */
+#define MAX_SERIAL_LEN 256
+
+/*
+ * MAX_BUFFER_LEN: maximum buffer size and line length used while reading
+ * the config file.
+ */
+#define MAX_BUFFER_LEN 256
+
+struct scsi_id_device {
+ char vendor[9];
+ char model[17];
+ char revision[5];
+ char type[33];
+ char kernel[64];
+ char serial[MAX_SERIAL_LEN];
+ char serial_short[MAX_SERIAL_LEN];
+ int use_sg;
+
+ /* Always from page 0x80 e.g. 'B3G1P8500RWT' - may not be unique */
+ char unit_serial_number[MAX_SERIAL_LEN];
+
+ /* NULs if not set - otherwise hex encoding using lower-case e.g. '50014ee0016eb572' */
+ char wwn[17];
+
+ /* NULs if not set - otherwise hex encoding using lower-case e.g. '0xe00000d80000' */
+ char wwn_vendor_extension[17];
+
+ /* NULs if not set - otherwise decimal number */
+ char tgpt_group[8];
+};
+
+int scsi_std_inquiry(struct scsi_id_device *dev_scsi, const char *devname);
+int scsi_get_serial(struct scsi_id_device *dev_scsi, const char *devname,
+ int page_code, int len);
+
+/*
+ * Page code values.
+ */
+enum page_code {
+ PAGE_83_PRE_SPC3 = -0x83,
+ PAGE_UNSPECIFIED = 0x00,
+ PAGE_80 = 0x80,
+ PAGE_83 = 0x83,
+};
diff --git a/src/udev/scsi_id/scsi_serial.c b/src/udev/scsi_id/scsi_serial.c
new file mode 100644
index 0000000..4fe7254
--- /dev/null
+++ b/src/udev/scsi_id/scsi_serial.c
@@ -0,0 +1,893 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright © IBM Corp. 2003
+ *
+ * Author: Patrick Mansfield<patmans@us.ibm.com>
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <linux/bsg.h>
+#include <linux/types.h>
+#include <scsi/scsi.h>
+#include <scsi/sg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "memory-util.h"
+#include "random-util.h"
+#include "scsi.h"
+#include "scsi_id.h"
+#include "string-util.h"
+
+/*
+ * A priority based list of id, naa, and binary/ascii for the identifier
+ * descriptor in VPD page 0x83.
+ *
+ * Brute force search for a match starting with the first value in the
+ * following id_search_list. This is not a performance issue, since there
+ * is normally one or some small number of descriptors.
+ */
+static const struct scsi_id_search_values id_search_list[] = {
+ { SCSI_ID_TGTGROUP, SCSI_ID_NAA_DONT_CARE, SCSI_ID_BINARY },
+ { SCSI_ID_NAA, SCSI_ID_NAA_IEEE_REG_EXTENDED, SCSI_ID_BINARY },
+ { SCSI_ID_NAA, SCSI_ID_NAA_IEEE_REG_EXTENDED, SCSI_ID_ASCII },
+ { SCSI_ID_NAA, SCSI_ID_NAA_IEEE_REG, SCSI_ID_BINARY },
+ { SCSI_ID_NAA, SCSI_ID_NAA_IEEE_REG, SCSI_ID_ASCII },
+ /*
+ * Devices already exist using NAA values that are now marked
+ * reserved. These should not conflict with other values, or it is
+ * a bug in the device. As long as we find the IEEE extended one
+ * first, we really don't care what other ones are used. Using
+ * don't care here means that a device that returns multiple
+ * non-IEEE descriptors in a random order will get different
+ * names.
+ */
+ { SCSI_ID_NAA, SCSI_ID_NAA_DONT_CARE, SCSI_ID_BINARY },
+ { SCSI_ID_NAA, SCSI_ID_NAA_DONT_CARE, SCSI_ID_ASCII },
+ { SCSI_ID_EUI_64, SCSI_ID_NAA_DONT_CARE, SCSI_ID_BINARY },
+ { SCSI_ID_EUI_64, SCSI_ID_NAA_DONT_CARE, SCSI_ID_ASCII },
+ { SCSI_ID_T10_VENDOR, SCSI_ID_NAA_DONT_CARE, SCSI_ID_BINARY },
+ { SCSI_ID_T10_VENDOR, SCSI_ID_NAA_DONT_CARE, SCSI_ID_ASCII },
+ { SCSI_ID_VENDOR_SPECIFIC, SCSI_ID_NAA_DONT_CARE, SCSI_ID_BINARY },
+ { SCSI_ID_VENDOR_SPECIFIC, SCSI_ID_NAA_DONT_CARE, SCSI_ID_ASCII },
+};
+
+static const char hex_str[]="0123456789abcdef";
+
+/*
+ * Values returned in the result/status, only the ones used by the code
+ * are used here.
+ */
+
+#define DID_NO_CONNECT 0x01 /* Unable to connect before timeout */
+#define DID_BUS_BUSY 0x02 /* Bus remain busy until timeout */
+#define DID_TIME_OUT 0x03 /* Timed out for some other reason */
+#define DRIVER_TIMEOUT 0x06
+#define DRIVER_SENSE 0x08 /* Sense_buffer has been set */
+
+/* The following "category" function returns one of the following */
+#define SG_ERR_CAT_CLEAN 0 /* No errors or other information */
+#define SG_ERR_CAT_MEDIA_CHANGED 1 /* interpreted from sense buffer */
+#define SG_ERR_CAT_RESET 2 /* interpreted from sense buffer */
+#define SG_ERR_CAT_TIMEOUT 3
+#define SG_ERR_CAT_RECOVERED 4 /* Successful command after recovered err */
+#define SG_ERR_CAT_NOTSUPPORTED 5 /* Illegal / unsupported command */
+#define SG_ERR_CAT_SENSE 98 /* Something else in the sense buffer */
+#define SG_ERR_CAT_OTHER 99 /* Some other error/warning */
+
+static int do_scsi_page80_inquiry(struct scsi_id_device *dev_scsi, int fd,
+ char *serial, char *serial_short, int max_len);
+
+static int sg_err_category_new(int scsi_status, int msg_status, int
+ host_status, int driver_status, const
+ unsigned char *sense_buffer, int sb_len) {
+ scsi_status &= 0x7e;
+
+ /*
+ * XXX change to return only two values - failed or OK.
+ */
+
+ if (!scsi_status && !host_status && !driver_status)
+ return SG_ERR_CAT_CLEAN;
+
+ if (IN_SET(scsi_status, SCSI_CHECK_CONDITION, SCSI_COMMAND_TERMINATED) ||
+ (driver_status & 0xf) == DRIVER_SENSE) {
+ if (sense_buffer && (sb_len > 2)) {
+ int sense_key;
+ unsigned char asc;
+
+ if (sense_buffer[0] & 0x2) {
+ sense_key = sense_buffer[1] & 0xf;
+ asc = sense_buffer[2];
+ } else {
+ sense_key = sense_buffer[2] & 0xf;
+ asc = (sb_len > 12) ? sense_buffer[12] : 0;
+ }
+
+ if (sense_key == RECOVERED_ERROR)
+ return SG_ERR_CAT_RECOVERED;
+ else if (sense_key == UNIT_ATTENTION) {
+ if (0x28 == asc)
+ return SG_ERR_CAT_MEDIA_CHANGED;
+ if (0x29 == asc)
+ return SG_ERR_CAT_RESET;
+ } else if (sense_key == ILLEGAL_REQUEST)
+ return SG_ERR_CAT_NOTSUPPORTED;
+ }
+ return SG_ERR_CAT_SENSE;
+ }
+ if (host_status) {
+ if (IN_SET(host_status, DID_NO_CONNECT, DID_BUS_BUSY, DID_TIME_OUT))
+ return SG_ERR_CAT_TIMEOUT;
+ }
+ if (driver_status) {
+ if (driver_status == DRIVER_TIMEOUT)
+ return SG_ERR_CAT_TIMEOUT;
+ }
+ return SG_ERR_CAT_OTHER;
+}
+
+static int sg_err_category3(struct sg_io_hdr *hp) {
+ return sg_err_category_new(hp->status, hp->msg_status,
+ hp->host_status, hp->driver_status,
+ hp->sbp, hp->sb_len_wr);
+}
+
+static int sg_err_category4(struct sg_io_v4 *hp) {
+ return sg_err_category_new(hp->device_status, 0,
+ hp->transport_status, hp->driver_status,
+ (unsigned char *)(uintptr_t)hp->response,
+ hp->response_len);
+}
+
+static int scsi_dump_sense(struct scsi_id_device *dev_scsi,
+ unsigned char *sense_buffer, int sb_len) {
+ int s;
+ int code;
+ int sense_class;
+ int sense_key;
+ int asc, ascq;
+
+ /*
+ * Figure out and print the sense key, asc and ascq.
+ *
+ * If you want to suppress these for a particular drive model, add
+ * a black list entry in the scsi_id config file.
+ *
+ * XXX We probably need to: lookup the sense/asc/ascq in a retry
+ * table, and if found return 1 (after dumping the sense, asc, and
+ * ascq). So, if/when we get something like a power on/reset,
+ * we'll retry the command.
+ */
+
+ if (sb_len < 1)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: sense buffer empty",
+ dev_scsi->kernel);
+
+ sense_class = (sense_buffer[0] >> 4) & 0x07;
+ code = sense_buffer[0] & 0xf;
+
+ if (sense_class == 7) {
+ /*
+ * extended sense data.
+ */
+ s = sense_buffer[7] + 8;
+ if (sb_len < s)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: sense buffer too small %d bytes, %d bytes too short",
+ dev_scsi->kernel, sb_len,
+ s - sb_len);
+
+ if (IN_SET(code, 0x0, 0x1)) {
+ sense_key = sense_buffer[2] & 0xf;
+ if (s < 14)
+ /*
+ * Possible?
+ */
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: sense result too small %d bytes",
+ dev_scsi->kernel, s);
+
+ asc = sense_buffer[12];
+ ascq = sense_buffer[13];
+ } else if (IN_SET(code, 0x2, 0x3)) {
+ sense_key = sense_buffer[1] & 0xf;
+ asc = sense_buffer[2];
+ ascq = sense_buffer[3];
+ } else
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: invalid sense code 0x%x",
+ dev_scsi->kernel, code);
+
+ log_debug("%s: sense key 0x%x ASC 0x%x ASCQ 0x%x",
+ dev_scsi->kernel, sense_key, asc, ascq);
+ } else {
+ if (sb_len < 4)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: sense buffer too small %d bytes, %d bytes too short",
+ dev_scsi->kernel, sb_len,
+ 4 - sb_len);
+
+ if (sense_buffer[0] < 15)
+ log_debug("%s: old sense key: 0x%x", dev_scsi->kernel, sense_buffer[0] & 0x0f);
+ else
+ log_debug("%s: sense = %2x %2x",
+ dev_scsi->kernel, sense_buffer[0], sense_buffer[2]);
+ log_debug("%s: non-extended sense class %d code 0x%0x",
+ dev_scsi->kernel, sense_class, code);
+
+ }
+
+ return -1;
+}
+
+static int scsi_dump(struct scsi_id_device *dev_scsi, struct sg_io_hdr *io) {
+ if (!io->status && !io->host_status && !io->msg_status &&
+ !io->driver_status)
+ /*
+ * Impossible, should not be called.
+ */
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: called with no error",
+ __FUNCTION__);
+
+ log_debug("%s: sg_io failed status 0x%x 0x%x 0x%x 0x%x",
+ dev_scsi->kernel, io->driver_status, io->host_status, io->msg_status, io->status);
+ if (io->status == SCSI_CHECK_CONDITION)
+ return scsi_dump_sense(dev_scsi, io->sbp, io->sb_len_wr);
+ else
+ return -1;
+}
+
+static int scsi_dump_v4(struct scsi_id_device *dev_scsi, struct sg_io_v4 *io) {
+ if (!io->device_status && !io->transport_status &&
+ !io->driver_status)
+ /*
+ * Impossible, should not be called.
+ */
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: called with no error",
+ __FUNCTION__);
+
+ log_debug("%s: sg_io failed status 0x%x 0x%x 0x%x",
+ dev_scsi->kernel, io->driver_status, io->transport_status, io->device_status);
+ if (io->device_status == SCSI_CHECK_CONDITION)
+ return scsi_dump_sense(dev_scsi, (unsigned char *)(uintptr_t)io->response,
+ io->response_len);
+ else
+ return -1;
+}
+
+static int scsi_inquiry(struct scsi_id_device *dev_scsi, int fd,
+ unsigned char evpd, unsigned char page,
+ unsigned char *buf, unsigned buflen) {
+ unsigned char inq_cmd[INQUIRY_CMDLEN] =
+ { INQUIRY_CMD, evpd, page, 0, buflen, 0 };
+ unsigned char sense[SENSE_BUFF_LEN];
+ void *io_buf;
+ struct sg_io_v4 io_v4;
+ struct sg_io_hdr io_hdr;
+ int retry = 3; /* rather random */
+ int retval;
+
+ if (buflen > SCSI_INQ_BUFF_LEN)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "buflen %d too long", buflen);
+
+resend:
+ if (dev_scsi->use_sg == 4) {
+ memzero(&io_v4, sizeof(struct sg_io_v4));
+ io_v4.guard = 'Q';
+ io_v4.protocol = BSG_PROTOCOL_SCSI;
+ io_v4.subprotocol = BSG_SUB_PROTOCOL_SCSI_CMD;
+ io_v4.request_len = sizeof(inq_cmd);
+ io_v4.request = (uintptr_t)inq_cmd;
+ io_v4.max_response_len = sizeof(sense);
+ io_v4.response = (uintptr_t)sense;
+ io_v4.din_xfer_len = buflen;
+ io_v4.din_xferp = (uintptr_t)buf;
+ io_buf = (void *)&io_v4;
+ } else {
+ memzero(&io_hdr, sizeof(struct sg_io_hdr));
+ io_hdr.interface_id = 'S';
+ io_hdr.cmd_len = sizeof(inq_cmd);
+ io_hdr.mx_sb_len = sizeof(sense);
+ io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+ io_hdr.dxfer_len = buflen;
+ io_hdr.dxferp = buf;
+ io_hdr.cmdp = inq_cmd;
+ io_hdr.sbp = sense;
+ io_hdr.timeout = DEF_TIMEOUT;
+ io_buf = (void *)&io_hdr;
+ }
+
+ retval = ioctl(fd, SG_IO, io_buf);
+ if (retval < 0) {
+ if (IN_SET(errno, EINVAL, ENOSYS) && dev_scsi->use_sg == 4) {
+ dev_scsi->use_sg = 3;
+ goto resend;
+ }
+ log_debug_errno(errno, "%s: ioctl failed: %m", dev_scsi->kernel);
+ goto error;
+ }
+
+ if (dev_scsi->use_sg == 4)
+ retval = sg_err_category4(io_buf);
+ else
+ retval = sg_err_category3(io_buf);
+
+ switch (retval) {
+ case SG_ERR_CAT_NOTSUPPORTED:
+ buf[1] = 0;
+ _fallthrough_;
+ case SG_ERR_CAT_CLEAN:
+ case SG_ERR_CAT_RECOVERED:
+ retval = 0;
+ break;
+
+ default:
+ if (dev_scsi->use_sg == 4)
+ retval = scsi_dump_v4(dev_scsi, io_buf);
+ else
+ retval = scsi_dump(dev_scsi, io_buf);
+ }
+
+ if (!retval) {
+ retval = buflen;
+ } else if (retval > 0) {
+ if (--retry > 0)
+ goto resend;
+ retval = -1;
+ }
+
+error:
+ if (retval < 0)
+ log_debug("%s: Unable to get INQUIRY vpd %d page 0x%x.",
+ dev_scsi->kernel, evpd, page);
+
+ return retval;
+}
+
+/* Get list of supported EVPD pages */
+static int do_scsi_page0_inquiry(struct scsi_id_device *dev_scsi, int fd,
+ unsigned char *buffer, unsigned len) {
+ int retval;
+
+ memzero(buffer, len);
+ retval = scsi_inquiry(dev_scsi, fd, 1, 0x0, buffer, len);
+ if (retval < 0)
+ return 1;
+
+ if (buffer[1] != 0) {
+ log_debug("%s: page 0 not available.", dev_scsi->kernel);
+ return 1;
+ }
+ if (buffer[3] > len) {
+ log_debug("%s: page 0 buffer too long %d", dev_scsi->kernel, buffer[3]);
+ return 1;
+ }
+
+ /*
+ * Following check is based on code once included in the 2.5.x
+ * kernel.
+ *
+ * Some ill behaved devices return the standard inquiry here
+ * rather than the evpd data, snoop the data to verify.
+ */
+ if (buffer[3] > MODEL_LENGTH) {
+ /*
+ * If the vendor id appears in the page assume the page is
+ * invalid.
+ */
+ if (strneq((char*) buffer + VENDOR_LENGTH, dev_scsi->vendor, VENDOR_LENGTH)) {
+ log_debug("%s: invalid page0 data", dev_scsi->kernel);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int append_vendor_model(
+ const struct scsi_id_device *dev_scsi,
+ char buf[static VENDOR_LENGTH + MODEL_LENGTH]) {
+
+ assert(dev_scsi);
+ assert(buf);
+
+ if (strnlen(dev_scsi->vendor, VENDOR_LENGTH) != VENDOR_LENGTH)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: bad vendor string \"%s\"",
+ dev_scsi->kernel, dev_scsi->vendor);
+ if (strnlen(dev_scsi->model, MODEL_LENGTH) != MODEL_LENGTH)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: bad model string \"%s\"",
+ dev_scsi->kernel, dev_scsi->model);
+ memcpy(buf, dev_scsi->vendor, VENDOR_LENGTH);
+ memcpy(buf + VENDOR_LENGTH, dev_scsi->model, MODEL_LENGTH);
+ return VENDOR_LENGTH + MODEL_LENGTH;
+}
+
+/*
+ * check_fill_0x83_id - check the page 0x83 id, if OK allocate and fill
+ * serial number.
+ */
+static int check_fill_0x83_id(struct scsi_id_device *dev_scsi,
+ unsigned char *page_83,
+ const struct scsi_id_search_values
+ *id_search, char *serial, char *serial_short,
+ int max_len, char *wwn,
+ char *wwn_vendor_extension, char *tgpt_group) {
+ int i, j, s, len;
+
+ /*
+ * ASSOCIATION must be with the device (value 0)
+ * or with the target port for SCSI_ID_TGTPORT
+ */
+ if ((page_83[1] & 0x30) == 0x10) {
+ if (id_search->id_type != SCSI_ID_TGTGROUP)
+ return 1;
+ } else if ((page_83[1] & 0x30) != 0)
+ return 1;
+
+ if ((page_83[1] & 0x0f) != id_search->id_type)
+ return 1;
+
+ /*
+ * Possibly check NAA sub-type.
+ */
+ if ((id_search->naa_type != SCSI_ID_NAA_DONT_CARE) &&
+ (id_search->naa_type != (page_83[4] & 0xf0) >> 4))
+ return 1;
+
+ /*
+ * Check for matching code set - ASCII or BINARY.
+ */
+ if ((page_83[0] & 0x0f) != id_search->code_set)
+ return 1;
+
+ /*
+ * page_83[3]: identifier length
+ */
+ len = page_83[3];
+ if ((page_83[0] & 0x0f) != SCSI_ID_ASCII)
+ /*
+ * If not ASCII, use two bytes for each binary value.
+ */
+ len *= 2;
+
+ /*
+ * Add one byte for the NUL termination, and one for the id_type.
+ */
+ len += 2;
+ if (id_search->id_type == SCSI_ID_VENDOR_SPECIFIC)
+ len += VENDOR_LENGTH + MODEL_LENGTH;
+
+ if (max_len < len) {
+ log_debug("%s: length %d too short - need %d",
+ dev_scsi->kernel, max_len, len);
+ return 1;
+ }
+
+ if (id_search->id_type == SCSI_ID_TGTGROUP && tgpt_group != NULL) {
+ unsigned group;
+
+ group = ((unsigned)page_83[6] << 8) | page_83[7];
+ sprintf(tgpt_group,"%x", group);
+ return 1;
+ }
+
+ serial[0] = hex_str[id_search->id_type];
+
+ /*
+ * For SCSI_ID_VENDOR_SPECIFIC prepend the vendor and model before
+ * the id since it is not unique across all vendors and models,
+ * this differs from SCSI_ID_T10_VENDOR, where the vendor is
+ * included in the identifier.
+ */
+ if (id_search->id_type == SCSI_ID_VENDOR_SPECIFIC)
+ if (append_vendor_model(dev_scsi, serial + 1) < 0)
+ return 1;
+
+ i = 4; /* offset to the start of the identifier */
+ s = j = strlen(serial);
+ if ((page_83[0] & 0x0f) == SCSI_ID_ASCII) {
+ /*
+ * ASCII descriptor.
+ */
+ while (i < (4 + page_83[3]))
+ serial[j++] = page_83[i++];
+ } else {
+ /*
+ * Binary descriptor, convert to ASCII, using two bytes of
+ * ASCII for each byte in the page_83.
+ */
+ while (i < (4 + page_83[3])) {
+ serial[j++] = hex_str[(page_83[i] & 0xf0) >> 4];
+ serial[j++] = hex_str[page_83[i] & 0x0f];
+ i++;
+ }
+ }
+
+ strcpy(serial_short, serial + s);
+
+ if (id_search->id_type == SCSI_ID_NAA && wwn != NULL) {
+ strncpy(wwn, serial + s, 16);
+ if (wwn_vendor_extension)
+ strncpy(wwn_vendor_extension, serial + s + 16, 16);
+ }
+
+ return 0;
+}
+
+/* Extract the raw binary from VPD 0x83 pre-SPC devices */
+static int check_fill_0x83_prespc3(struct scsi_id_device *dev_scsi,
+ unsigned char *page_83,
+ const struct scsi_id_search_values
+ *id_search, char *serial, char *serial_short, int max_len) {
+ int i, j;
+
+ serial[0] = hex_str[SCSI_ID_NAA];
+ /* serial has been memset to zero before */
+ j = strlen(serial); /* j = 1; */
+
+ for (i = 0; (i < page_83[3]) && (j < max_len-3); ++i) {
+ serial[j++] = hex_str[(page_83[4+i] & 0xf0) >> 4];
+ serial[j++] = hex_str[ page_83[4+i] & 0x0f];
+ }
+ serial[max_len-1] = 0;
+ strncpy(serial_short, serial, max_len-1);
+ return 0;
+}
+
+/* Get device identification VPD page */
+static int do_scsi_page83_inquiry(struct scsi_id_device *dev_scsi, int fd,
+ char *serial, char *serial_short, int len,
+ char *unit_serial_number, char *wwn,
+ char *wwn_vendor_extension, char *tgpt_group) {
+ int retval;
+ unsigned id_ind, j;
+ unsigned char page_83[SCSI_INQ_BUFF_LEN];
+
+ /* also pick up the page 80 serial number */
+ do_scsi_page80_inquiry(dev_scsi, fd, NULL, unit_serial_number, MAX_SERIAL_LEN);
+
+ memzero(page_83, SCSI_INQ_BUFF_LEN);
+ retval = scsi_inquiry(dev_scsi, fd, 1, PAGE_83, page_83,
+ SCSI_INQ_BUFF_LEN);
+ if (retval < 0)
+ return 1;
+
+ if (page_83[1] != PAGE_83) {
+ log_debug("%s: Invalid page 0x83", dev_scsi->kernel);
+ return 1;
+ }
+
+ /*
+ * XXX Some devices (IBM 3542) return all spaces for an identifier if
+ * the LUN is not actually configured. This leads to identifiers of
+ * the form: "1 ".
+ */
+
+ /*
+ * Model 4, 5, and (some) model 6 EMC Symmetrix devices return
+ * a page 83 reply according to SCSI-2 format instead of SPC-2/3.
+ *
+ * The SCSI-2 page 83 format returns an IEEE WWN in binary
+ * encoded hexi-decimal in the 16 bytes following the initial
+ * 4-byte page 83 reply header.
+ *
+ * Both the SPC-2 and SPC-3 formats return an IEEE WWN as part
+ * of an Identification descriptor. The 3rd byte of the first
+ * Identification descriptor is a reserved (BSZ) byte field.
+ *
+ * Reference the 7th byte of the page 83 reply to determine
+ * whether the reply is compliant with SCSI-2 or SPC-2/3
+ * specifications. A zero value in the 7th byte indicates
+ * an SPC-2/3 conformant reply, (i.e., the reserved field of the
+ * first Identification descriptor). This byte will be non-zero
+ * for a SCSI-2 conformant page 83 reply from these EMC
+ * Symmetrix models since the 7th byte of the reply corresponds
+ * to the 4th and 5th nibbles of the 6-byte OUI for EMC, that is,
+ * 0x006048.
+ */
+
+ if (page_83[6] != 0)
+ return check_fill_0x83_prespc3(dev_scsi, page_83, id_search_list,
+ serial, serial_short, len);
+
+ /*
+ * Search for a match in the prioritized id_search_list - since WWN ids
+ * come first we can pick up the WWN in check_fill_0x83_id().
+ */
+ for (id_ind = 0;
+ id_ind < sizeof(id_search_list)/sizeof(id_search_list[0]);
+ id_ind++) {
+ /*
+ * Examine each descriptor returned. There is normally only
+ * one or a small number of descriptors.
+ */
+ for (j = 4; j <= ((unsigned)page_83[2] << 8) + (unsigned)page_83[3] + 3; j += page_83[j + 3] + 4) {
+ retval = check_fill_0x83_id(dev_scsi, page_83 + j,
+ id_search_list + id_ind,
+ serial, serial_short, len,
+ wwn, wwn_vendor_extension,
+ tgpt_group);
+ if (!retval)
+ return retval;
+ else if (retval < 0)
+ return retval;
+ }
+ }
+ return 1;
+}
+
+/*
+ * Get device identification VPD page for older SCSI-2 device which is not
+ * compliant with either SPC-2 or SPC-3 format.
+ *
+ * Return the hard coded error code value 2 if the page 83 reply is not
+ * conformant to the SCSI-2 format.
+ */
+static int do_scsi_page83_prespc3_inquiry(struct scsi_id_device *dev_scsi, int fd,
+ char *serial, char *serial_short, int len) {
+ int retval;
+ int i, j;
+ unsigned char page_83[SCSI_INQ_BUFF_LEN];
+
+ memzero(page_83, SCSI_INQ_BUFF_LEN);
+ retval = scsi_inquiry(dev_scsi, fd, 1, PAGE_83, page_83, SCSI_INQ_BUFF_LEN);
+ if (retval < 0)
+ return 1;
+
+ if (page_83[1] != PAGE_83) {
+ log_debug("%s: Invalid page 0x83", dev_scsi->kernel);
+ return 1;
+ }
+ /*
+ * Model 4, 5, and (some) model 6 EMC Symmetrix devices return
+ * a page 83 reply according to SCSI-2 format instead of SPC-2/3.
+ *
+ * The SCSI-2 page 83 format returns an IEEE WWN in binary
+ * encoded hexi-decimal in the 16 bytes following the initial
+ * 4-byte page 83 reply header.
+ *
+ * Both the SPC-2 and SPC-3 formats return an IEEE WWN as part
+ * of an Identification descriptor. The 3rd byte of the first
+ * Identification descriptor is a reserved (BSZ) byte field.
+ *
+ * Reference the 7th byte of the page 83 reply to determine
+ * whether the reply is compliant with SCSI-2 or SPC-2/3
+ * specifications. A zero value in the 7th byte indicates
+ * an SPC-2/3 conformant reply, (i.e., the reserved field of the
+ * first Identification descriptor). This byte will be non-zero
+ * for a SCSI-2 conformant page 83 reply from these EMC
+ * Symmetrix models since the 7th byte of the reply corresponds
+ * to the 4th and 5th nibbles of the 6-byte OUI for EMC, that is,
+ * 0x006048.
+ */
+ if (page_83[6] == 0)
+ return 2;
+
+ serial[0] = hex_str[SCSI_ID_NAA];
+ /*
+ * The first four bytes contain data, not a descriptor.
+ */
+ i = 4;
+ j = strlen(serial);
+ /*
+ * Binary descriptor, convert to ASCII,
+ * using two bytes of ASCII for each byte
+ * in the page_83.
+ */
+ while (i < (page_83[3]+4)) {
+ serial[j++] = hex_str[(page_83[i] & 0xf0) >> 4];
+ serial[j++] = hex_str[page_83[i] & 0x0f];
+ i++;
+ }
+ return 0;
+}
+
+/* Get unit serial number VPD page */
+static int do_scsi_page80_inquiry(struct scsi_id_device *dev_scsi, int fd,
+ char *serial, char *serial_short, int max_len) {
+ int retval;
+ int ser_ind;
+ int i;
+ int len;
+ unsigned char buf[SCSI_INQ_BUFF_LEN];
+
+ memzero(buf, SCSI_INQ_BUFF_LEN);
+ retval = scsi_inquiry(dev_scsi, fd, 1, PAGE_80, buf, SCSI_INQ_BUFF_LEN);
+ if (retval < 0)
+ return retval;
+
+ if (buf[1] != PAGE_80) {
+ log_debug("%s: Invalid page 0x80", dev_scsi->kernel);
+ return 1;
+ }
+
+ len = 1 + VENDOR_LENGTH + MODEL_LENGTH + buf[3];
+ if (max_len < len) {
+ log_debug("%s: length %d too short - need %d",
+ dev_scsi->kernel, max_len, len);
+ return 1;
+ }
+ /*
+ * Prepend 'S' to avoid unlikely collision with page 0x83 vendor
+ * specific type where we prepend '0' + vendor + model.
+ */
+ len = buf[3];
+ if (serial) {
+ serial[0] = 'S';
+ ser_ind = append_vendor_model(dev_scsi, serial + 1);
+ if (ser_ind < 0)
+ return 1;
+ ser_ind++; /* for the leading 'S' */
+ for (i = 4; i < len + 4; i++, ser_ind++)
+ serial[ser_ind] = buf[i];
+ }
+ if (serial_short) {
+ memcpy(serial_short, buf + 4, len);
+ serial_short[len] = '\0';
+ }
+ return 0;
+}
+
+int scsi_std_inquiry(struct scsi_id_device *dev_scsi, const char *devname) {
+ int fd;
+ unsigned char buf[SCSI_INQ_BUFF_LEN];
+ struct stat statbuf;
+ int err = 0;
+
+ fd = open(devname, O_RDONLY | O_NONBLOCK | O_CLOEXEC);
+ if (fd < 0) {
+ log_debug_errno(errno, "scsi_id: cannot open %s: %m", devname);
+ return 1;
+ }
+
+ if (fstat(fd, &statbuf) < 0) {
+ log_debug_errno(errno, "scsi_id: cannot stat %s: %m", devname);
+ err = 2;
+ goto out;
+ }
+ sprintf(dev_scsi->kernel,"%d:%d", major(statbuf.st_rdev),
+ minor(statbuf.st_rdev));
+
+ memzero(buf, SCSI_INQ_BUFF_LEN);
+ err = scsi_inquiry(dev_scsi, fd, 0, 0, buf, SCSI_INQ_BUFF_LEN);
+ if (err < 0)
+ goto out;
+
+ err = 0;
+ memcpy(dev_scsi->vendor, buf + 8, 8);
+ dev_scsi->vendor[8] = '\0';
+ memcpy(dev_scsi->model, buf + 16, 16);
+ dev_scsi->model[16] = '\0';
+ memcpy(dev_scsi->revision, buf + 32, 4);
+ dev_scsi->revision[4] = '\0';
+ sprintf(dev_scsi->type,"%x", buf[0] & 0x1f);
+
+out:
+ close(fd);
+ return err;
+}
+
+int scsi_get_serial(struct scsi_id_device *dev_scsi, const char *devname,
+ int page_code, int len) {
+ unsigned char page0[SCSI_INQ_BUFF_LEN];
+ int fd = -1;
+ int cnt;
+ int ind;
+ int retval;
+
+ memzero(dev_scsi->serial, len);
+ initialize_srand();
+ for (cnt = 20; cnt > 0; cnt--) {
+ struct timespec duration;
+
+ fd = open(devname, O_RDONLY | O_NONBLOCK | O_CLOEXEC);
+ if (fd >= 0 || errno != EBUSY)
+ break;
+ duration.tv_sec = 0;
+ duration.tv_nsec = (200 * 1000 * 1000) + (rand() % 100 * 1000 * 1000);
+ nanosleep(&duration, NULL);
+ }
+ if (fd < 0)
+ return 1;
+
+ if (page_code == PAGE_80) {
+ if (do_scsi_page80_inquiry(dev_scsi, fd, dev_scsi->serial, dev_scsi->serial_short, len)) {
+ retval = 1;
+ goto completed;
+ } else {
+ retval = 0;
+ goto completed;
+ }
+ } else if (page_code == PAGE_83) {
+ if (do_scsi_page83_inquiry(dev_scsi, fd, dev_scsi->serial, dev_scsi->serial_short, len, dev_scsi->unit_serial_number, dev_scsi->wwn, dev_scsi->wwn_vendor_extension, dev_scsi->tgpt_group)) {
+ retval = 1;
+ goto completed;
+ } else {
+ retval = 0;
+ goto completed;
+ }
+ } else if (page_code == PAGE_83_PRE_SPC3) {
+ retval = do_scsi_page83_prespc3_inquiry(dev_scsi, fd, dev_scsi->serial, dev_scsi->serial_short, len);
+ if (retval) {
+ /*
+ * Fallback to servicing a SPC-2/3 compliant page 83
+ * inquiry if the page 83 reply format does not
+ * conform to pre-SPC3 expectations.
+ */
+ if (retval == 2) {
+ if (do_scsi_page83_inquiry(dev_scsi, fd, dev_scsi->serial, dev_scsi->serial_short, len, dev_scsi->unit_serial_number, dev_scsi->wwn, dev_scsi->wwn_vendor_extension, dev_scsi->tgpt_group)) {
+ retval = 1;
+ goto completed;
+ } else {
+ retval = 0;
+ goto completed;
+ }
+ }
+ else {
+ retval = 1;
+ goto completed;
+ }
+ } else {
+ retval = 0;
+ goto completed;
+ }
+ } else if (page_code != 0x00) {
+ log_debug("%s: unsupported page code 0x%d", dev_scsi->kernel, page_code);
+ retval = 1;
+ goto completed;
+ }
+
+ /*
+ * Get page 0, the page of the pages. By default, try from best to
+ * worst of supported pages: 0x83 then 0x80.
+ */
+ if (do_scsi_page0_inquiry(dev_scsi, fd, page0, SCSI_INQ_BUFF_LEN)) {
+ /*
+ * Don't try anything else. Black list if a specific page
+ * should be used for this vendor+model, or maybe have an
+ * optional fall-back to page 0x80 or page 0x83.
+ */
+ retval = 1;
+ goto completed;
+ }
+
+ for (ind = 4; ind <= page0[3] + 3; ind++)
+ if (page0[ind] == PAGE_83)
+ if (!do_scsi_page83_inquiry(dev_scsi, fd,
+ dev_scsi->serial, dev_scsi->serial_short, len, dev_scsi->unit_serial_number, dev_scsi->wwn, dev_scsi->wwn_vendor_extension, dev_scsi->tgpt_group)) {
+ /*
+ * Success
+ */
+ retval = 0;
+ goto completed;
+ }
+
+ for (ind = 4; ind <= page0[3] + 3; ind++)
+ if (page0[ind] == PAGE_80)
+ if (!do_scsi_page80_inquiry(dev_scsi, fd,
+ dev_scsi->serial, dev_scsi->serial_short, len)) {
+ /*
+ * Success
+ */
+ retval = 0;
+ goto completed;
+ }
+ retval = 1;
+
+completed:
+ close(fd);
+ return retval;
+}
diff --git a/src/udev/udev-builtin-blkid.c b/src/udev/udev-builtin-blkid.c
new file mode 100644
index 0000000..3f64548
--- /dev/null
+++ b/src/udev/udev-builtin-blkid.c
@@ -0,0 +1,317 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * probe disks for filesystems and partitions
+ *
+ * Copyright © 2011 Karel Zak <kzak@redhat.com>
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "blkid-util.h"
+#include "device-util.h"
+#include "efi-loader.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "gpt.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "udev-builtin.h"
+
+static void print_property(sd_device *dev, bool test, const char *name, const char *value) {
+ char s[256];
+
+ s[0] = '\0';
+
+ if (streq(name, "TYPE")) {
+ udev_builtin_add_property(dev, test, "ID_FS_TYPE", value);
+
+ } else if (streq(name, "USAGE")) {
+ udev_builtin_add_property(dev, test, "ID_FS_USAGE", value);
+
+ } else if (streq(name, "VERSION")) {
+ udev_builtin_add_property(dev, test, "ID_FS_VERSION", value);
+
+ } else if (streq(name, "UUID")) {
+ blkid_safe_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_UUID", s);
+ blkid_encode_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_UUID_ENC", s);
+
+ } else if (streq(name, "UUID_SUB")) {
+ blkid_safe_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_UUID_SUB", s);
+ blkid_encode_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_UUID_SUB_ENC", s);
+
+ } else if (streq(name, "LABEL")) {
+ blkid_safe_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_LABEL", s);
+ blkid_encode_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_LABEL_ENC", s);
+
+ } else if (streq(name, "PTTYPE")) {
+ udev_builtin_add_property(dev, test, "ID_PART_TABLE_TYPE", value);
+
+ } else if (streq(name, "PTUUID")) {
+ udev_builtin_add_property(dev, test, "ID_PART_TABLE_UUID", value);
+
+ } else if (streq(name, "PART_ENTRY_NAME")) {
+ blkid_encode_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_PART_ENTRY_NAME", s);
+
+ } else if (streq(name, "PART_ENTRY_TYPE")) {
+ blkid_encode_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_PART_ENTRY_TYPE", s);
+
+ } else if (startswith(name, "PART_ENTRY_")) {
+ strscpyl(s, sizeof(s), "ID_", name, NULL);
+ udev_builtin_add_property(dev, test, s, value);
+
+ } else if (streq(name, "SYSTEM_ID")) {
+ blkid_encode_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_SYSTEM_ID", s);
+
+ } else if (streq(name, "PUBLISHER_ID")) {
+ blkid_encode_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_PUBLISHER_ID", s);
+
+ } else if (streq(name, "APPLICATION_ID")) {
+ blkid_encode_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_APPLICATION_ID", s);
+
+ } else if (streq(name, "BOOT_SYSTEM_ID")) {
+ blkid_encode_string(value, s, sizeof(s));
+ udev_builtin_add_property(dev, test, "ID_FS_BOOT_SYSTEM_ID", s);
+ }
+}
+
+static int find_gpt_root(sd_device *dev, blkid_probe pr, bool test) {
+
+#if defined(GPT_ROOT_NATIVE) && ENABLE_EFI
+
+ _cleanup_free_ char *root_id = NULL;
+ bool found_esp = false;
+ blkid_partlist pl;
+ int i, nvals, r;
+
+ assert(pr);
+
+ /* Iterate through the partitions on this disk, and see if the
+ * EFI ESP we booted from is on it. If so, find the first root
+ * disk, and add a property indicating its partition UUID. */
+
+ errno = 0;
+ pl = blkid_probe_get_partitions(pr);
+ if (!pl)
+ return errno_or_else(ENOMEM);
+
+ nvals = blkid_partlist_numof_partitions(pl);
+ for (i = 0; i < nvals; i++) {
+ blkid_partition pp;
+ const char *stype, *sid;
+ sd_id128_t type;
+
+ pp = blkid_partlist_get_partition(pl, i);
+ if (!pp)
+ continue;
+
+ sid = blkid_partition_get_uuid(pp);
+ if (!sid)
+ continue;
+
+ stype = blkid_partition_get_type_string(pp);
+ if (!stype)
+ continue;
+
+ if (sd_id128_from_string(stype, &type) < 0)
+ continue;
+
+ if (sd_id128_equal(type, GPT_ESP)) {
+ sd_id128_t id, esp;
+
+ /* We found an ESP, let's see if it matches
+ * the ESP we booted from. */
+
+ if (sd_id128_from_string(sid, &id) < 0)
+ continue;
+
+ r = efi_loader_get_device_part_uuid(&esp);
+ if (r < 0)
+ return r;
+
+ if (sd_id128_equal(id, esp))
+ found_esp = true;
+
+ } else if (sd_id128_equal(type, GPT_ROOT_NATIVE)) {
+ unsigned long long flags;
+
+ flags = blkid_partition_get_flags(pp);
+ if (flags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ /* We found a suitable root partition, let's
+ * remember the first one. */
+
+ if (!root_id) {
+ root_id = strdup(sid);
+ if (!root_id)
+ return -ENOMEM;
+ }
+ }
+ }
+
+ /* We found the ESP on this disk, and also found a root
+ * partition, nice! Let's export its UUID */
+ if (found_esp && root_id)
+ udev_builtin_add_property(dev, test, "ID_PART_GPT_AUTO_ROOT_UUID", root_id);
+#endif
+
+ return 0;
+}
+
+static int probe_superblocks(blkid_probe pr) {
+ struct stat st;
+ int rc;
+
+ /* TODO: Return negative errno. */
+
+ if (fstat(blkid_probe_get_fd(pr), &st))
+ return -errno;
+
+ blkid_probe_enable_partitions(pr, 1);
+
+ if (!S_ISCHR(st.st_mode) &&
+ blkid_probe_get_size(pr) <= 1024 * 1440 &&
+ blkid_probe_is_wholedisk(pr)) {
+ /*
+ * check if the small disk is partitioned, if yes then
+ * don't probe for filesystems.
+ */
+ blkid_probe_enable_superblocks(pr, 0);
+
+ rc = blkid_do_fullprobe(pr);
+ if (rc < 0)
+ return rc; /* -1 = error, 1 = nothing, 0 = success */
+
+ if (blkid_probe_lookup_value(pr, "PTTYPE", NULL, NULL) == 0)
+ return 0; /* partition table detected */
+ }
+
+ blkid_probe_set_partitions_flags(pr, BLKID_PARTS_ENTRY_DETAILS);
+ blkid_probe_enable_superblocks(pr, 1);
+
+ return blkid_do_safeprobe(pr);
+}
+
+static int builtin_blkid(sd_device *dev, int argc, char *argv[], bool test) {
+ const char *devnode, *root_partition = NULL, *data, *name;
+ _cleanup_(blkid_free_probep) blkid_probe pr = NULL;
+ bool noraid = false, is_gpt = false;
+ _cleanup_close_ int fd = -1;
+ int64_t offset = 0;
+ int nvals, i, r;
+
+ static const struct option options[] = {
+ { "offset", required_argument, NULL, 'o' },
+ { "noraid", no_argument, NULL, 'R' },
+ {}
+ };
+
+ for (;;) {
+ int option;
+
+ option = getopt_long(argc, argv, "o:R", options, NULL);
+ if (option == -1)
+ break;
+
+ switch (option) {
+ case 'o':
+ r = safe_atoi64(optarg, &offset);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to parse '%s' as an integer: %m", optarg);
+ if (offset < 0)
+ return log_device_error_errno(dev, SYNTHETIC_ERRNO(ERANGE), "Invalid offset %"PRIi64": %m", offset);
+ break;
+ case 'R':
+ noraid = true;
+ break;
+ }
+ }
+
+ errno = 0;
+ pr = blkid_new_probe();
+ if (!pr)
+ return log_device_debug_errno(dev, errno > 0 ? errno : ENOMEM, "Failed to create blkid prober: %m");
+
+ blkid_probe_set_superblocks_flags(pr,
+ BLKID_SUBLKS_LABEL | BLKID_SUBLKS_UUID |
+ BLKID_SUBLKS_TYPE | BLKID_SUBLKS_SECTYPE |
+ BLKID_SUBLKS_USAGE | BLKID_SUBLKS_VERSION);
+
+ if (noraid)
+ blkid_probe_filter_superblocks_usage(pr, BLKID_FLTR_NOTIN, BLKID_USAGE_RAID);
+
+ r = sd_device_get_devname(dev, &devnode);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get device name: %m");
+
+ fd = open(devnode, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0)
+ return log_device_debug_errno(dev, errno, "Failed to open block device %s: %m", devnode);
+
+ errno = 0;
+ r = blkid_probe_set_device(pr, fd, offset, 0);
+ if (r < 0)
+ return log_device_debug_errno(dev, errno > 0 ? errno : ENOMEM, "Failed to set device to blkid prober: %m");
+
+ log_device_debug(dev, "Probe %s with %sraid and offset=%"PRIi64, devnode, noraid ? "no" : "", offset);
+
+ r = probe_superblocks(pr);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to probe superblocks: %m");
+
+ /* If the device is a partition then its parent passed the root partition UUID to the device */
+ (void) sd_device_get_property_value(dev, "ID_PART_GPT_AUTO_ROOT_UUID", &root_partition);
+
+ errno = 0;
+ nvals = blkid_probe_numof_values(pr);
+ if (nvals < 0)
+ return log_device_debug_errno(dev, errno > 0 ? errno : ENOMEM, "Failed to get number of probed values: %m");
+
+ for (i = 0; i < nvals; i++) {
+ if (blkid_probe_get_value(pr, i, &name, &data, NULL) < 0)
+ continue;
+
+ print_property(dev, test, name, data);
+
+ /* Is this a disk with GPT partition table? */
+ if (streq(name, "PTTYPE") && streq(data, "gpt"))
+ is_gpt = true;
+
+ /* Is this a partition that matches the root partition
+ * property inherited from the parent? */
+ if (root_partition && streq(name, "PART_ENTRY_UUID") && streq(data, root_partition))
+ udev_builtin_add_property(dev, test, "ID_PART_GPT_AUTO_ROOT", "1");
+ }
+
+ if (is_gpt)
+ find_gpt_root(dev, pr, test);
+
+ return 0;
+}
+
+const UdevBuiltin udev_builtin_blkid = {
+ .name = "blkid",
+ .cmd = builtin_blkid,
+ .help = "Filesystem and partition probing",
+ .run_once = true,
+};
diff --git a/src/udev/udev-builtin-btrfs.c b/src/udev/udev-builtin-btrfs.c
new file mode 100644
index 0000000..9079d1b
--- /dev/null
+++ b/src/udev/udev-builtin-btrfs.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <linux/btrfs.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+
+#include "device-util.h"
+#include "fd-util.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "udev-builtin.h"
+#include "util.h"
+
+static int builtin_btrfs(sd_device *dev, int argc, char *argv[], bool test) {
+ struct btrfs_ioctl_vol_args args = {};
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ if (argc != 3 || !streq(argv[1], "ready"))
+ return log_device_error_errno(dev, SYNTHETIC_ERRNO(EINVAL), "Invalid arguments");
+
+ fd = open("/dev/btrfs-control", O_RDWR|O_CLOEXEC);
+ if (fd < 0)
+ return log_device_debug_errno(dev, errno, "Failed to open /dev/btrfs-control: %m");
+
+ strscpy(args.name, sizeof(args.name), argv[2]);
+ r = ioctl(fd, BTRFS_IOC_DEVICES_READY, &args);
+ if (r < 0)
+ return log_device_debug_errno(dev, errno, "Failed to call BTRFS_IOC_DEVICES_READY: %m");
+
+ udev_builtin_add_property(dev, test, "ID_BTRFS_READY", one_zero(r == 0));
+ return 0;
+}
+
+const UdevBuiltin udev_builtin_btrfs = {
+ .name = "btrfs",
+ .cmd = builtin_btrfs,
+ .help = "btrfs volume management",
+};
diff --git a/src/udev/udev-builtin-hwdb.c b/src/udev/udev-builtin-hwdb.c
new file mode 100644
index 0000000..7883518
--- /dev/null
+++ b/src/udev/udev-builtin-hwdb.c
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fnmatch.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "sd-hwdb.h"
+
+#include "alloc-util.h"
+#include "device-util.h"
+#include "hwdb-util.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "udev-builtin.h"
+
+static sd_hwdb *hwdb;
+
+int udev_builtin_hwdb_lookup(sd_device *dev,
+ const char *prefix, const char *modalias,
+ const char *filter, bool test) {
+ _cleanup_free_ char *lookup = NULL;
+ const char *key, *value;
+ int n = 0, r;
+
+ if (!hwdb)
+ return -ENOENT;
+
+ if (prefix) {
+ lookup = strjoin(prefix, modalias);
+ if (!lookup)
+ return -ENOMEM;
+ modalias = lookup;
+ }
+
+ SD_HWDB_FOREACH_PROPERTY(hwdb, modalias, key, value) {
+ if (filter && fnmatch(filter, key, FNM_NOESCAPE) != 0)
+ continue;
+
+ r = udev_builtin_add_property(dev, test, key, value);
+ if (r < 0)
+ return r;
+ n++;
+ }
+ return n;
+}
+
+static const char *modalias_usb(sd_device *dev, char *s, size_t size) {
+ const char *v, *p, *n = NULL;
+ uint16_t vn, pn;
+
+ if (sd_device_get_sysattr_value(dev, "idVendor", &v) < 0)
+ return NULL;
+ if (sd_device_get_sysattr_value(dev, "idProduct", &p) < 0)
+ return NULL;
+ if (safe_atoux16(v, &vn) < 0)
+ return NULL;
+ if (safe_atoux16(p, &pn) < 0)
+ return NULL;
+ (void) sd_device_get_sysattr_value(dev, "product", &n);
+
+ snprintf(s, size, "usb:v%04Xp%04X:%s", vn, pn, strempty(n));
+ return s;
+}
+
+static int udev_builtin_hwdb_search(sd_device *dev, sd_device *srcdev,
+ const char *subsystem, const char *prefix,
+ const char *filter, bool test) {
+ char s[LINE_MAX];
+ bool last = false;
+ int r = 0;
+
+ assert(dev);
+
+ if (!srcdev)
+ srcdev = dev;
+
+ for (sd_device *d = srcdev; d; ) {
+ const char *dsubsys, *devtype, *modalias = NULL;
+
+ if (sd_device_get_subsystem(d, &dsubsys) < 0)
+ goto next;
+
+ /* look only at devices of a specific subsystem */
+ if (subsystem && !streq(dsubsys, subsystem))
+ goto next;
+
+ (void) sd_device_get_property_value(d, "MODALIAS", &modalias);
+
+ if (streq(dsubsys, "usb") &&
+ sd_device_get_devtype(d, &devtype) >= 0 &&
+ streq(devtype, "usb_device")) {
+ /* if the usb_device does not have a modalias, compose one */
+ if (!modalias)
+ modalias = modalias_usb(d, s, sizeof(s));
+
+ /* avoid looking at any parent device, they are usually just a USB hub */
+ last = true;
+ }
+
+ if (!modalias)
+ goto next;
+
+ log_device_debug(dev, "hwdb modalias key: \"%s\"", modalias);
+
+ r = udev_builtin_hwdb_lookup(dev, prefix, modalias, filter, test);
+ if (r > 0)
+ break;
+
+ if (last)
+ break;
+next:
+ if (sd_device_get_parent(d, &d) < 0)
+ break;
+ }
+
+ return r;
+}
+
+static int builtin_hwdb(sd_device *dev, int argc, char *argv[], bool test) {
+ static const struct option options[] = {
+ { "filter", required_argument, NULL, 'f' },
+ { "device", required_argument, NULL, 'd' },
+ { "subsystem", required_argument, NULL, 's' },
+ { "lookup-prefix", required_argument, NULL, 'p' },
+ {}
+ };
+ const char *filter = NULL;
+ const char *device = NULL;
+ const char *subsystem = NULL;
+ const char *prefix = NULL;
+ _cleanup_(sd_device_unrefp) sd_device *srcdev = NULL;
+ int r;
+
+ if (!hwdb)
+ return -EINVAL;
+
+ for (;;) {
+ int option;
+
+ option = getopt_long(argc, argv, "f:d:s:p:", options, NULL);
+ if (option == -1)
+ break;
+
+ switch (option) {
+ case 'f':
+ filter = optarg;
+ break;
+
+ case 'd':
+ device = optarg;
+ break;
+
+ case 's':
+ subsystem = optarg;
+ break;
+
+ case 'p':
+ prefix = optarg;
+ break;
+ }
+ }
+
+ /* query a specific key given as argument */
+ if (argv[optind]) {
+ r = udev_builtin_hwdb_lookup(dev, prefix, argv[optind], filter, test);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to look up hwdb: %m");
+ if (r == 0)
+ return log_device_debug_errno(dev, SYNTHETIC_ERRNO(ENODATA), "No entry found from hwdb.");
+ return r;
+ }
+
+ /* read data from another device than the device we will store the data */
+ if (device) {
+ r = sd_device_new_from_device_id(&srcdev, device);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to create sd_device object '%s': %m", device);
+ }
+
+ r = udev_builtin_hwdb_search(dev, srcdev, subsystem, prefix, filter, test);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to look up hwdb: %m");
+ if (r == 0)
+ return log_device_debug_errno(dev, SYNTHETIC_ERRNO(ENODATA), "No entry found from hwdb.");
+ return r;
+}
+
+/* called at udev startup and reload */
+static int builtin_hwdb_init(void) {
+ int r;
+
+ if (hwdb)
+ return 0;
+
+ r = sd_hwdb_new(&hwdb);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+/* called on udev shutdown and reload request */
+static void builtin_hwdb_exit(void) {
+ hwdb = sd_hwdb_unref(hwdb);
+}
+
+/* called every couple of seconds during event activity; 'true' if config has changed */
+static bool builtin_hwdb_validate(void) {
+ return hwdb_validate(hwdb);
+}
+
+const UdevBuiltin udev_builtin_hwdb = {
+ .name = "hwdb",
+ .cmd = builtin_hwdb,
+ .init = builtin_hwdb_init,
+ .exit = builtin_hwdb_exit,
+ .validate = builtin_hwdb_validate,
+ .help = "Hardware database",
+};
diff --git a/src/udev/udev-builtin-input_id.c b/src/udev/udev-builtin-input_id.c
new file mode 100644
index 0000000..6a4911c
--- /dev/null
+++ b/src/udev/udev-builtin-input_id.c
@@ -0,0 +1,395 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * expose input properties via udev
+ *
+ * Portions Copyright © 2004 David Zeuthen, <david@fubar.dk>
+ * Copyright © 2014 Carlos Garnacho <carlosg@gnome.org>
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <linux/limits.h>
+
+#include "device-util.h"
+#include "fd-util.h"
+#include "missing_input.h"
+#include "parse-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "udev-builtin.h"
+#include "util.h"
+
+/* we must use this kernel-compatible implementation */
+#define BITS_PER_LONG (sizeof(unsigned long) * 8)
+#define NBITS(x) ((((x)-1)/BITS_PER_LONG)+1)
+#define OFF(x) ((x)%BITS_PER_LONG)
+#define BIT(x) (1UL<<OFF(x))
+#define LONG(x) ((x)/BITS_PER_LONG)
+#define test_bit(bit, array) ((array[LONG(bit)] >> OFF(bit)) & 1)
+
+struct range {
+ unsigned start;
+ unsigned end;
+};
+
+/* key code ranges above BTN_MISC (start is inclusive, stop is exclusive)*/
+static const struct range high_key_blocks[] = {
+ { KEY_OK, BTN_DPAD_UP },
+ { KEY_ALS_TOGGLE, BTN_TRIGGER_HAPPY }
+};
+
+static int abs_size_mm(const struct input_absinfo *absinfo) {
+ /* Resolution is defined to be in units/mm for ABS_X/Y */
+ return (absinfo->maximum - absinfo->minimum) / absinfo->resolution;
+}
+
+static void extract_info(sd_device *dev, const char *devpath, bool test) {
+ char width[DECIMAL_STR_MAX(int)], height[DECIMAL_STR_MAX(int)];
+ struct input_absinfo xabsinfo = {}, yabsinfo = {};
+ _cleanup_close_ int fd = -1;
+
+ fd = open(devpath, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return;
+
+ if (ioctl(fd, EVIOCGABS(ABS_X), &xabsinfo) < 0 ||
+ ioctl(fd, EVIOCGABS(ABS_Y), &yabsinfo) < 0)
+ return;
+
+ if (xabsinfo.resolution <= 0 || yabsinfo.resolution <= 0)
+ return;
+
+ xsprintf(width, "%d", abs_size_mm(&xabsinfo));
+ xsprintf(height, "%d", abs_size_mm(&yabsinfo));
+
+ udev_builtin_add_property(dev, test, "ID_INPUT_WIDTH_MM", width);
+ udev_builtin_add_property(dev, test, "ID_INPUT_HEIGHT_MM", height);
+}
+
+/*
+ * Read a capability attribute and return bitmask.
+ * @param dev sd_device
+ * @param attr sysfs attribute name (e. g. "capabilities/key")
+ * @param bitmask: Output array which has a sizeof of bitmask_size
+ */
+static void get_cap_mask(sd_device *pdev, const char* attr,
+ unsigned long *bitmask, size_t bitmask_size,
+ bool test) {
+ const char *v;
+ char text[4096];
+ unsigned i;
+ char* word;
+ unsigned long val;
+
+ if (sd_device_get_sysattr_value(pdev, attr, &v) < 0)
+ v = "";
+
+ xsprintf(text, "%s", v);
+ log_device_debug(pdev, "%s raw kernel attribute: %s", attr, text);
+
+ memzero(bitmask, bitmask_size);
+ i = 0;
+ while ((word = strrchr(text, ' ')) != NULL) {
+ val = strtoul(word+1, NULL, 16);
+ if (i < bitmask_size / sizeof(unsigned long))
+ bitmask[i] = val;
+ else
+ log_device_debug(pdev, "Ignoring %s block %lX which is larger than maximum size", attr, val);
+ *word = '\0';
+ ++i;
+ }
+ val = strtoul (text, NULL, 16);
+ if (i < bitmask_size / sizeof(unsigned long))
+ bitmask[i] = val;
+ else
+ log_device_debug(pdev, "Ignoring %s block %lX which is larger than maximum size", attr, val);
+
+ if (test) {
+ /* printf pattern with the right unsigned long number of hex chars */
+ xsprintf(text, " bit %%4u: %%0%zulX\n",
+ 2 * sizeof(unsigned long));
+ log_device_debug(pdev, "%s decoded bit map:", attr);
+ val = bitmask_size / sizeof (unsigned long);
+ /* skip over leading zeros */
+ while (bitmask[val-1] == 0 && val > 0)
+ --val;
+ for (i = 0; i < val; ++i) {
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ log_device_debug(pdev, text, i * BITS_PER_LONG, bitmask[i]);
+ REENABLE_WARNING;
+ }
+ }
+}
+
+static struct input_id get_input_id(sd_device *dev) {
+ const char *v;
+ struct input_id id = {};
+
+ if (sd_device_get_sysattr_value(dev, "id/bustype", &v) >= 0)
+ (void) safe_atoux16(v, &id.bustype);
+ if (sd_device_get_sysattr_value(dev, "id/vendor", &v) >= 0)
+ (void) safe_atoux16(v, &id.vendor);
+ if (sd_device_get_sysattr_value(dev, "id/product", &v) >= 0)
+ (void) safe_atoux16(v, &id.product);
+ if (sd_device_get_sysattr_value(dev, "id/version", &v) >= 0)
+ (void) safe_atoux16(v, &id.version);
+
+ return id;
+}
+
+/* pointer devices */
+static bool test_pointers(sd_device *dev,
+ const struct input_id *id,
+ const unsigned long* bitmask_ev,
+ const unsigned long* bitmask_abs,
+ const unsigned long* bitmask_key,
+ const unsigned long* bitmask_rel,
+ const unsigned long* bitmask_props,
+ bool test) {
+ int button, axis;
+ bool has_abs_coordinates = false;
+ bool has_rel_coordinates = false;
+ bool has_mt_coordinates = false;
+ bool has_joystick_axes_or_buttons = false;
+ bool has_pad_buttons = false;
+ bool is_direct = false;
+ bool has_touch = false;
+ bool has_3d_coordinates = false;
+ bool has_keys = false;
+ bool has_stylus = false;
+ bool has_pen = false;
+ bool finger_but_no_pen = false;
+ bool has_mouse_button = false;
+ bool is_mouse = false;
+ bool is_touchpad = false;
+ bool is_touchscreen = false;
+ bool is_tablet = false;
+ bool is_tablet_pad = false;
+ bool is_joystick = false;
+ bool is_accelerometer = false;
+ bool is_pointing_stick = false;
+
+ has_keys = test_bit(EV_KEY, bitmask_ev);
+ has_abs_coordinates = test_bit(ABS_X, bitmask_abs) && test_bit(ABS_Y, bitmask_abs);
+ has_3d_coordinates = has_abs_coordinates && test_bit(ABS_Z, bitmask_abs);
+ is_accelerometer = test_bit(INPUT_PROP_ACCELEROMETER, bitmask_props);
+
+ if (!has_keys && has_3d_coordinates)
+ is_accelerometer = true;
+
+ if (is_accelerometer) {
+ udev_builtin_add_property(dev, test, "ID_INPUT_ACCELEROMETER", "1");
+ return true;
+ }
+
+ is_pointing_stick = test_bit(INPUT_PROP_POINTING_STICK, bitmask_props);
+ has_stylus = test_bit(BTN_STYLUS, bitmask_key);
+ has_pen = test_bit(BTN_TOOL_PEN, bitmask_key);
+ finger_but_no_pen = test_bit(BTN_TOOL_FINGER, bitmask_key) && !test_bit(BTN_TOOL_PEN, bitmask_key);
+ for (button = BTN_MOUSE; button < BTN_JOYSTICK && !has_mouse_button; button++)
+ has_mouse_button = test_bit(button, bitmask_key);
+ has_rel_coordinates = test_bit(EV_REL, bitmask_ev) && test_bit(REL_X, bitmask_rel) && test_bit(REL_Y, bitmask_rel);
+ has_mt_coordinates = test_bit(ABS_MT_POSITION_X, bitmask_abs) && test_bit(ABS_MT_POSITION_Y, bitmask_abs);
+
+ /* unset has_mt_coordinates if devices claims to have all abs axis */
+ if (has_mt_coordinates && test_bit(ABS_MT_SLOT, bitmask_abs) && test_bit(ABS_MT_SLOT - 1, bitmask_abs))
+ has_mt_coordinates = false;
+ is_direct = test_bit(INPUT_PROP_DIRECT, bitmask_props);
+ has_touch = test_bit(BTN_TOUCH, bitmask_key);
+ has_pad_buttons = test_bit(BTN_0, bitmask_key) && has_stylus && !has_pen;
+
+ /* joysticks don't necessarily have buttons; e. g.
+ * rudders/pedals are joystick-like, but buttonless; they have
+ * other fancy axes. Others have buttons only but no axes.
+ *
+ * The BTN_JOYSTICK range starts after the mouse range, so a mouse
+ * with more than 16 buttons runs into the joystick range (e.g. Mad
+ * Catz Mad Catz M.M.O.TE). Skip those.
+ */
+ if (!test_bit(BTN_JOYSTICK - 1, bitmask_key)) {
+ for (button = BTN_JOYSTICK; button < BTN_DIGI && !has_joystick_axes_or_buttons; button++)
+ has_joystick_axes_or_buttons = test_bit(button, bitmask_key);
+ for (button = BTN_TRIGGER_HAPPY1; button <= BTN_TRIGGER_HAPPY40 && !has_joystick_axes_or_buttons; button++)
+ has_joystick_axes_or_buttons = test_bit(button, bitmask_key);
+ for (button = BTN_DPAD_UP; button <= BTN_DPAD_RIGHT && !has_joystick_axes_or_buttons; button++)
+ has_joystick_axes_or_buttons = test_bit(button, bitmask_key);
+ }
+ for (axis = ABS_RX; axis < ABS_PRESSURE && !has_joystick_axes_or_buttons; axis++)
+ has_joystick_axes_or_buttons = test_bit(axis, bitmask_abs);
+
+ if (has_abs_coordinates) {
+ if (has_stylus || has_pen)
+ is_tablet = true;
+ else if (finger_but_no_pen && !is_direct)
+ is_touchpad = true;
+ else if (has_mouse_button)
+ /* This path is taken by VMware's USB mouse, which has
+ * absolute axes, but no touch/pressure button. */
+ is_mouse = true;
+ else if (has_touch || is_direct)
+ is_touchscreen = true;
+ else if (has_joystick_axes_or_buttons)
+ is_joystick = true;
+ } else if (has_joystick_axes_or_buttons)
+ is_joystick = true;
+
+ if (has_mt_coordinates) {
+ if (has_stylus || has_pen)
+ is_tablet = true;
+ else if (finger_but_no_pen && !is_direct)
+ is_touchpad = true;
+ else if (has_touch || is_direct)
+ is_touchscreen = true;
+ }
+
+ if (is_tablet && has_pad_buttons)
+ is_tablet_pad = true;
+
+ if (!is_tablet && !is_touchpad && !is_joystick &&
+ has_mouse_button &&
+ (has_rel_coordinates ||
+ !has_abs_coordinates)) /* mouse buttons and no axis */
+ is_mouse = true;
+
+ /* There is no such thing as an i2c mouse */
+ if (is_mouse && id->bustype == BUS_I2C)
+ is_pointing_stick = true;
+
+ if (is_pointing_stick)
+ udev_builtin_add_property(dev, test, "ID_INPUT_POINTINGSTICK", "1");
+ if (is_mouse)
+ udev_builtin_add_property(dev, test, "ID_INPUT_MOUSE", "1");
+ if (is_touchpad)
+ udev_builtin_add_property(dev, test, "ID_INPUT_TOUCHPAD", "1");
+ if (is_touchscreen)
+ udev_builtin_add_property(dev, test, "ID_INPUT_TOUCHSCREEN", "1");
+ if (is_joystick)
+ udev_builtin_add_property(dev, test, "ID_INPUT_JOYSTICK", "1");
+ if (is_tablet)
+ udev_builtin_add_property(dev, test, "ID_INPUT_TABLET", "1");
+ if (is_tablet_pad)
+ udev_builtin_add_property(dev, test, "ID_INPUT_TABLET_PAD", "1");
+
+ return is_tablet || is_mouse || is_touchpad || is_touchscreen || is_joystick || is_pointing_stick;
+}
+
+/* key like devices */
+static bool test_key(sd_device *dev,
+ const unsigned long* bitmask_ev,
+ const unsigned long* bitmask_key,
+ bool test) {
+ unsigned i;
+ unsigned long found;
+ unsigned long mask;
+ bool ret = false;
+
+ /* do we have any KEY_* capability? */
+ if (!test_bit(EV_KEY, bitmask_ev)) {
+ log_device_debug(dev, "test_key: no EV_KEY capability");
+ return false;
+ }
+
+ /* only consider KEY_* here, not BTN_* */
+ found = 0;
+ for (i = 0; i < BTN_MISC/BITS_PER_LONG; ++i) {
+ found |= bitmask_key[i];
+ log_device_debug(dev, "test_key: checking bit block %lu for any keys; found=%i", (unsigned long)i*BITS_PER_LONG, found > 0);
+ }
+ /* If there are no keys in the lower block, check the higher blocks */
+ if (!found) {
+ unsigned block;
+ for (block = 0; block < (sizeof(high_key_blocks) / sizeof(struct range)); ++block) {
+ for (i = high_key_blocks[block].start; i < high_key_blocks[block].end; ++i) {
+ if (test_bit(i, bitmask_key)) {
+ log_device_debug(dev, "test_key: Found key %x in high block", i);
+ found = 1;
+ break;
+ }
+ }
+ }
+ }
+
+ if (found > 0) {
+ udev_builtin_add_property(dev, test, "ID_INPUT_KEY", "1");
+ ret = true;
+ }
+
+ /* the first 32 bits are ESC, numbers, and Q to D; if we have all of
+ * those, consider it a full keyboard; do not test KEY_RESERVED, though */
+ mask = 0xFFFFFFFE;
+ if (FLAGS_SET(bitmask_key[0], mask)) {
+ udev_builtin_add_property(dev, test, "ID_INPUT_KEYBOARD", "1");
+ ret = true;
+ }
+
+ return ret;
+}
+
+static int builtin_input_id(sd_device *dev, int argc, char *argv[], bool test) {
+ sd_device *pdev;
+ unsigned long bitmask_ev[NBITS(EV_MAX)];
+ unsigned long bitmask_abs[NBITS(ABS_MAX)];
+ unsigned long bitmask_key[NBITS(KEY_MAX)];
+ unsigned long bitmask_rel[NBITS(REL_MAX)];
+ unsigned long bitmask_props[NBITS(INPUT_PROP_MAX)];
+ const char *sysname, *devnode;
+ bool is_pointer;
+ bool is_key;
+
+ assert(dev);
+
+ /* walk up the parental chain until we find the real input device; the
+ * argument is very likely a subdevice of this, like eventN */
+ for (pdev = dev; pdev; ) {
+ const char *s;
+
+ if (sd_device_get_sysattr_value(pdev, "capabilities/ev", &s) >= 0)
+ break;
+
+ if (sd_device_get_parent_with_subsystem_devtype(pdev, "input", NULL, &pdev) >= 0)
+ continue;
+
+ pdev = NULL;
+ break;
+ }
+
+ if (pdev) {
+ struct input_id id = get_input_id(pdev);
+
+ /* Use this as a flag that input devices were detected, so that this
+ * program doesn't need to be called more than once per device */
+ udev_builtin_add_property(dev, test, "ID_INPUT", "1");
+ get_cap_mask(pdev, "capabilities/ev", bitmask_ev, sizeof(bitmask_ev), test);
+ get_cap_mask(pdev, "capabilities/abs", bitmask_abs, sizeof(bitmask_abs), test);
+ get_cap_mask(pdev, "capabilities/rel", bitmask_rel, sizeof(bitmask_rel), test);
+ get_cap_mask(pdev, "capabilities/key", bitmask_key, sizeof(bitmask_key), test);
+ get_cap_mask(pdev, "properties", bitmask_props, sizeof(bitmask_props), test);
+ is_pointer = test_pointers(dev, &id, bitmask_ev, bitmask_abs,
+ bitmask_key, bitmask_rel,
+ bitmask_props, test);
+ is_key = test_key(dev, bitmask_ev, bitmask_key, test);
+ /* Some evdev nodes have only a scrollwheel */
+ if (!is_pointer && !is_key && test_bit(EV_REL, bitmask_ev) &&
+ (test_bit(REL_WHEEL, bitmask_rel) || test_bit(REL_HWHEEL, bitmask_rel)))
+ udev_builtin_add_property(dev, test, "ID_INPUT_KEY", "1");
+ if (test_bit(EV_SW, bitmask_ev))
+ udev_builtin_add_property(dev, test, "ID_INPUT_SWITCH", "1");
+
+ }
+
+ if (sd_device_get_devname(dev, &devnode) >= 0 &&
+ sd_device_get_sysname(dev, &sysname) >= 0 &&
+ startswith(sysname, "event"))
+ extract_info(dev, devnode, test);
+
+ return 0;
+}
+
+const UdevBuiltin udev_builtin_input_id = {
+ .name = "input_id",
+ .cmd = builtin_input_id,
+ .help = "Input device properties",
+};
diff --git a/src/udev/udev-builtin-keyboard.c b/src/udev/udev-builtin-keyboard.c
new file mode 100644
index 0000000..cd766a8
--- /dev/null
+++ b/src/udev/udev-builtin-keyboard.c
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <linux/input.h>
+
+#include "device-util.h"
+#include "fd-util.h"
+#include "parse-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "udev-builtin.h"
+
+static const struct key_name *keyboard_lookup_key(const char *str, GPERF_LEN_TYPE len);
+#include "keyboard-keys-from-name.h"
+
+static int install_force_release(sd_device *dev, const unsigned *release, unsigned release_count) {
+ sd_device *atkbd;
+ const char *cur;
+ char codes[4096];
+ char *s;
+ size_t l;
+ unsigned i;
+ int r;
+
+ assert(dev);
+ assert(release);
+
+ r = sd_device_get_parent_with_subsystem_devtype(dev, "serio", NULL, &atkbd);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get serio parent: %m");
+
+ r = sd_device_get_sysattr_value(atkbd, "force_release", &cur);
+ if (r < 0)
+ return log_device_error_errno(atkbd, r, "Failed to get force-release attribute: %m");
+
+ s = codes;
+ l = sizeof(codes);
+
+ /* copy current content */
+ l = strpcpy(&s, l, cur);
+
+ /* append new codes */
+ for (i = 0; i < release_count; i++)
+ l = strpcpyf(&s, l, ",%u", release[i]);
+
+ log_device_debug(atkbd, "keyboard: updating force-release list with '%s'", codes);
+ r = sd_device_set_sysattr_value(atkbd, "force_release", codes);
+ if (r < 0)
+ return log_device_error_errno(atkbd, r, "Failed to set force-release attribute: %m");
+
+ return 0;
+}
+
+static int map_keycode(sd_device *dev, int fd, int scancode, const char *keycode) {
+ struct {
+ unsigned scan;
+ unsigned key;
+ } map;
+ char *endptr;
+ const struct key_name *k;
+ unsigned keycode_num;
+
+ /* translate identifier to key code */
+ k = keyboard_lookup_key(keycode, strlen(keycode));
+ if (k) {
+ keycode_num = k->id;
+ } else {
+ /* check if it's a numeric code already */
+ keycode_num = strtoul(keycode, &endptr, 0);
+ if (endptr[0] !='\0')
+ return log_device_error_errno(dev, SYNTHETIC_ERRNO(EINVAL), "Failed to parse key identifier '%s'", keycode);
+ }
+
+ map.scan = scancode;
+ map.key = keycode_num;
+
+ log_device_debug(dev, "keyboard: mapping scan code %d (0x%x) to key code %d (0x%x)",
+ map.scan, map.scan, map.key, map.key);
+
+ if (ioctl(fd, EVIOCSKEYCODE, &map) < 0)
+ return log_device_error_errno(dev, errno, "Failed to call EVIOCSKEYCODE with scan code 0x%x, and key code %d: %m", map.scan, map.key);
+
+ return 0;
+}
+
+static const char* parse_token(const char *current, int32_t *val_out) {
+ char *next;
+ int32_t val;
+
+ if (!current)
+ return NULL;
+
+ val = strtol(current, &next, 0);
+ if (*next && *next != ':')
+ return NULL;
+
+ if (next != current)
+ *val_out = val;
+
+ if (*next)
+ next++;
+
+ return next;
+}
+
+static int override_abs(sd_device *dev, int fd, unsigned evcode, const char *value) {
+ struct input_absinfo absinfo;
+ const char *next;
+
+ if (ioctl(fd, EVIOCGABS(evcode), &absinfo) < 0)
+ return log_device_error_errno(dev, errno, "Failed to call EVIOCGABS");
+
+ next = parse_token(value, &absinfo.minimum);
+ next = parse_token(next, &absinfo.maximum);
+ next = parse_token(next, &absinfo.resolution);
+ next = parse_token(next, &absinfo.fuzz);
+ next = parse_token(next, &absinfo.flat);
+ if (!next)
+ return log_device_error_errno(dev, SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse EV_ABS override '%s'", value);
+
+ log_device_debug(dev, "keyboard: %x overridden with %"PRIi32"/%"PRIi32"/%"PRIi32"/%"PRIi32"/%"PRIi32,
+ evcode, absinfo.minimum, absinfo.maximum, absinfo.resolution, absinfo.fuzz, absinfo.flat);
+ if (ioctl(fd, EVIOCSABS(evcode), &absinfo) < 0)
+ return log_device_error_errno(dev, errno, "Failed to call EVIOCSABS");
+
+ return 0;
+}
+
+static int set_trackpoint_sensitivity(sd_device *dev, const char *value) {
+ sd_device *pdev;
+ char val_s[DECIMAL_STR_MAX(int)];
+ int r, val_i;
+
+ assert(dev);
+ assert(value);
+
+ /* The sensitivity sysfs attr belongs to the serio parent device */
+ r = sd_device_get_parent_with_subsystem_devtype(dev, "serio", NULL, &pdev);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get serio parent: %m");
+
+ r = safe_atoi(value, &val_i);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to parse POINTINGSTICK_SENSITIVITY '%s': %m", value);
+ else if (val_i < 0 || val_i > 255)
+ return log_device_error_errno(dev, SYNTHETIC_ERRNO(ERANGE), "POINTINGSTICK_SENSITIVITY %d outside range [0..255]", val_i);
+
+ xsprintf(val_s, "%d", val_i);
+
+ r = sd_device_set_sysattr_value(pdev, "sensitivity", val_s);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to write 'sensitivity' attribute: %m");
+
+ return 0;
+}
+
+static int builtin_keyboard(sd_device *dev, int argc, char *argv[], bool test) {
+ unsigned release[1024];
+ unsigned release_count = 0;
+ _cleanup_close_ int fd = -1;
+ const char *node, *key, *value;
+ int has_abs = -1, r;
+
+ r = sd_device_get_devname(dev, &node);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get device name: %m");
+
+ FOREACH_DEVICE_PROPERTY(dev, key, value) {
+ char *endptr;
+
+ if (startswith(key, "KEYBOARD_KEY_")) {
+ const char *keycode = value;
+ unsigned scancode;
+
+ /* KEYBOARD_KEY_<hex scan code>=<key identifier string> */
+ scancode = strtoul(key + 13, &endptr, 16);
+ if (endptr[0] != '\0') {
+ log_device_warning(dev, "Failed to parse scan code from \"%s\", ignoring", key);
+ continue;
+ }
+
+ /* a leading '!' needs a force-release entry */
+ if (keycode[0] == '!') {
+ keycode++;
+
+ release[release_count] = scancode;
+ if (release_count < ELEMENTSOF(release)-1)
+ release_count++;
+
+ if (keycode[0] == '\0')
+ continue;
+ }
+
+ if (fd < 0) {
+ fd = open(node, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (fd < 0)
+ return log_device_error_errno(dev, errno, "Failed to open device '%s': %m", node);
+ }
+
+ (void) map_keycode(dev, fd, scancode, keycode);
+ } else if (startswith(key, "EVDEV_ABS_")) {
+ unsigned evcode;
+
+ /* EVDEV_ABS_<EV_ABS code>=<min>:<max>:<res>:<fuzz>:<flat> */
+ evcode = strtoul(key + 10, &endptr, 16);
+ if (endptr[0] != '\0') {
+ log_device_warning(dev, "Failed to parse EV_ABS code from \"%s\", ignoring", key);
+ continue;
+ }
+
+ if (fd < 0) {
+ fd = open(node, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (fd < 0)
+ return log_device_error_errno(dev, errno, "Failed to open device '%s': %m", node);
+ }
+
+ if (has_abs == -1) {
+ unsigned long bits;
+ int rc;
+
+ rc = ioctl(fd, EVIOCGBIT(0, sizeof(bits)), &bits);
+ if (rc < 0)
+ return log_device_error_errno(dev, errno, "Failed to set EVIOCGBIT");
+
+ has_abs = !!(bits & (1 << EV_ABS));
+ if (!has_abs)
+ log_device_warning(dev, "EVDEV_ABS override set but no EV_ABS present on device");
+ }
+
+ if (!has_abs)
+ continue;
+
+ (void) override_abs(dev, fd, evcode, value);
+ } else if (streq(key, "POINTINGSTICK_SENSITIVITY"))
+ (void) set_trackpoint_sensitivity(dev, value);
+ }
+
+ /* install list of force-release codes */
+ if (release_count > 0)
+ (void) install_force_release(dev, release, release_count);
+
+ return 0;
+}
+
+const UdevBuiltin udev_builtin_keyboard = {
+ .name = "keyboard",
+ .cmd = builtin_keyboard,
+ .help = "Keyboard scan code to key mapping",
+};
diff --git a/src/udev/udev-builtin-kmod.c b/src/udev/udev-builtin-kmod.c
new file mode 100644
index 0000000..3be8bd5
--- /dev/null
+++ b/src/udev/udev-builtin-kmod.c
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * load kernel modules
+ *
+ * Copyright © 2011 ProFUSION embedded systems
+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "module-util.h"
+#include "string-util.h"
+#include "udev-builtin.h"
+
+static struct kmod_ctx *ctx = NULL;
+
+_printf_(6,0) static void udev_kmod_log(void *data, int priority, const char *file, int line, const char *fn, const char *format, va_list args) {
+ log_internalv(priority, 0, file, line, fn, format, args);
+}
+
+static int builtin_kmod(sd_device *dev, int argc, char *argv[], bool test) {
+ int i;
+
+ if (!ctx)
+ return 0;
+
+ if (argc < 3 || !streq(argv[1], "load"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: expected: load <module>", argv[0]);
+
+ for (i = 2; argv[i]; i++)
+ (void) module_load_and_warn(ctx, argv[i], false);
+
+ return 0;
+}
+
+/* called at udev startup and reload */
+static int builtin_kmod_init(void) {
+ if (ctx)
+ return 0;
+
+ ctx = kmod_new(NULL, NULL);
+ if (!ctx)
+ return -ENOMEM;
+
+ log_debug("Load module index");
+ kmod_set_log_fn(ctx, udev_kmod_log, NULL);
+ kmod_load_resources(ctx);
+ return 0;
+}
+
+/* called on udev shutdown and reload request */
+static void builtin_kmod_exit(void) {
+ log_debug("Unload module index");
+ ctx = kmod_unref(ctx);
+}
+
+/* called every couple of seconds during event activity; 'true' if config has changed */
+static bool builtin_kmod_validate(void) {
+ log_debug("Validate module index");
+ if (!ctx)
+ return false;
+ return (kmod_validate_resources(ctx) != KMOD_RESOURCES_OK);
+}
+
+const UdevBuiltin udev_builtin_kmod = {
+ .name = "kmod",
+ .cmd = builtin_kmod,
+ .init = builtin_kmod_init,
+ .exit = builtin_kmod_exit,
+ .validate = builtin_kmod_validate,
+ .help = "Kernel module loader",
+ .run_once = false,
+};
diff --git a/src/udev/udev-builtin-net_id.c b/src/udev/udev-builtin-net_id.c
new file mode 100644
index 0000000..d06a8c7
--- /dev/null
+++ b/src/udev/udev-builtin-net_id.c
@@ -0,0 +1,961 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/*
+ * Predictable network interface device names based on:
+ * - firmware/bios-provided index numbers for on-board devices
+ * - firmware-provided pci-express hotplug slot index number
+ * - physical/geographical location of the hardware
+ * - the interface's MAC address
+ *
+ * https://systemd.io/PREDICTABLE_INTERFACE_NAMES
+ *
+ * When the code here is changed, man/systemd.net-naming-scheme.xml must be updated too.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <linux/if.h>
+#include <linux/pci_regs.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "netif-naming-scheme.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "udev-builtin.h"
+
+#define ONBOARD_INDEX_MAX (16*1024-1)
+
+enum netname_type{
+ NET_UNDEF,
+ NET_PCI,
+ NET_USB,
+ NET_BCMA,
+ NET_VIRTIO,
+ NET_CCW,
+ NET_VIO,
+ NET_PLATFORM,
+ NET_NETDEVSIM,
+};
+
+struct netnames {
+ enum netname_type type;
+
+ uint8_t mac[6];
+ bool mac_valid;
+
+ sd_device *pcidev;
+ char pci_slot[ALTIFNAMSIZ];
+ char pci_path[ALTIFNAMSIZ];
+ char pci_onboard[ALTIFNAMSIZ];
+ const char *pci_onboard_label;
+
+ char usb_ports[ALTIFNAMSIZ];
+ char bcma_core[ALTIFNAMSIZ];
+ char ccw_busid[ALTIFNAMSIZ];
+ char vio_slot[ALTIFNAMSIZ];
+ char platform_path[ALTIFNAMSIZ];
+ char netdevsim_path[ALTIFNAMSIZ];
+};
+
+struct virtfn_info {
+ sd_device *physfn_pcidev;
+ char suffix[ALTIFNAMSIZ];
+};
+
+/* skip intermediate virtio devices */
+static sd_device *skip_virtio(sd_device *dev) {
+ sd_device *parent;
+
+ /* there can only ever be one virtio bus per parent device, so we can
+ * safely ignore any virtio buses. see
+ * http://lists.linuxfoundation.org/pipermail/virtualization/2015-August/030331.html */
+ for (parent = dev; parent; ) {
+ const char *subsystem;
+
+ if (sd_device_get_subsystem(parent, &subsystem) < 0)
+ break;
+
+ if (!streq(subsystem, "virtio"))
+ break;
+
+ if (sd_device_get_parent(parent, &parent) < 0)
+ return NULL;
+ }
+
+ return parent;
+}
+
+static int get_virtfn_info(sd_device *dev, struct netnames *names, struct virtfn_info *ret) {
+ _cleanup_(sd_device_unrefp) sd_device *physfn_pcidev = NULL;
+ const char *physfn_link_file, *syspath;
+ _cleanup_free_ char *physfn_pci_syspath = NULL;
+ _cleanup_free_ char *virtfn_pci_syspath = NULL;
+ struct dirent *dent;
+ _cleanup_closedir_ DIR *dir = NULL;
+ char suffix[ALTIFNAMSIZ];
+ int r;
+
+ assert(dev);
+ assert(names);
+ assert(ret);
+
+ r = sd_device_get_syspath(names->pcidev, &syspath);
+ if (r < 0)
+ return r;
+
+ /* Check if this is a virtual function. */
+ physfn_link_file = strjoina(syspath, "/physfn");
+ r = chase_symlinks(physfn_link_file, NULL, 0, &physfn_pci_syspath, NULL);
+ if (r < 0)
+ return r;
+
+ /* Get physical function's pci device. */
+ r = sd_device_new_from_syspath(&physfn_pcidev, physfn_pci_syspath);
+ if (r < 0)
+ return r;
+
+ /* Find the virtual function number by finding the right virtfn link. */
+ dir = opendir(physfn_pci_syspath);
+ if (!dir)
+ return -errno;
+
+ FOREACH_DIRENT_ALL(dent, dir, break) {
+ _cleanup_free_ char *virtfn_link_file = NULL;
+
+ if (!startswith(dent->d_name, "virtfn"))
+ continue;
+
+ virtfn_link_file = path_join(physfn_pci_syspath, dent->d_name);
+ if (!virtfn_link_file)
+ return -ENOMEM;
+
+ if (chase_symlinks(virtfn_link_file, NULL, 0, &virtfn_pci_syspath, NULL) < 0)
+ continue;
+
+ if (streq(syspath, virtfn_pci_syspath)) {
+ if (!snprintf_ok(suffix, sizeof(suffix), "v%s", &dent->d_name[6]))
+ return -ENOENT;
+
+ break;
+ }
+ }
+ if (isempty(suffix))
+ return -ENOENT;
+
+ ret->physfn_pcidev = TAKE_PTR(physfn_pcidev);
+ strncpy(ret->suffix, suffix, sizeof(ret->suffix));
+
+ return 0;
+}
+
+/* retrieve on-board index number and label from firmware */
+static int dev_pci_onboard(sd_device *dev, struct netnames *names) {
+ unsigned long idx, dev_port = 0;
+ const char *attr, *port_name = NULL;
+ size_t l;
+ char *s;
+ int r;
+
+ /* ACPI _DSM — device specific method for naming a PCI or PCI Express device */
+ if (sd_device_get_sysattr_value(names->pcidev, "acpi_index", &attr) < 0) {
+ /* SMBIOS type 41 — Onboard Devices Extended Information */
+ r = sd_device_get_sysattr_value(names->pcidev, "index", &attr);
+ if (r < 0)
+ return r;
+ }
+
+ r = safe_atolu(attr, &idx);
+ if (r < 0)
+ return r;
+ if (idx == 0 && !naming_scheme_has(NAMING_ZERO_ACPI_INDEX))
+ return -EINVAL;
+
+ /* Some BIOSes report rubbish indexes that are excessively high (2^24-1 is an index VMware likes to
+ * report for example). Let's define a cut-off where we don't consider the index reliable anymore. We
+ * pick some arbitrary cut-off, which is somewhere beyond the realistic number of physical network
+ * interface a system might have. Ideally the kernel would already filter this crap for us, but it
+ * doesn't currently. */
+ if (idx > ONBOARD_INDEX_MAX)
+ return -ENOENT;
+
+ /* kernel provided port index for multiple ports on a single PCI function */
+ if (sd_device_get_sysattr_value(dev, "dev_port", &attr) >= 0)
+ dev_port = strtoul(attr, NULL, 10);
+
+ /* kernel provided front panel port name for multiple port PCI device */
+ (void) sd_device_get_sysattr_value(dev, "phys_port_name", &port_name);
+
+ s = names->pci_onboard;
+ l = sizeof(names->pci_onboard);
+ l = strpcpyf(&s, l, "o%lu", idx);
+ if (port_name)
+ l = strpcpyf(&s, l, "n%s", port_name);
+ else if (dev_port > 0)
+ l = strpcpyf(&s, l, "d%lu", dev_port);
+ if (l == 0)
+ names->pci_onboard[0] = '\0';
+
+ if (sd_device_get_sysattr_value(names->pcidev, "label", &names->pci_onboard_label) < 0)
+ names->pci_onboard_label = NULL;
+
+ return 0;
+}
+
+/* read the 256 bytes PCI configuration space to check the multi-function bit */
+static bool is_pci_multifunction(sd_device *dev) {
+ _cleanup_close_ int fd = -1;
+ const char *filename, *syspath;
+ uint8_t config[64];
+
+ if (sd_device_get_syspath(dev, &syspath) < 0)
+ return false;
+
+ filename = strjoina(syspath, "/config");
+ fd = open(filename, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return false;
+ if (read(fd, &config, sizeof(config)) != sizeof(config))
+ return false;
+
+ /* bit 0-6 header type, bit 7 multi/single function device */
+ return config[PCI_HEADER_TYPE] & 0x80;
+}
+
+static bool is_pci_ari_enabled(sd_device *dev) {
+ const char *a;
+
+ if (sd_device_get_sysattr_value(dev, "ari_enabled", &a) < 0)
+ return false;
+
+ return streq(a, "1");
+}
+
+static bool is_pci_bridge(sd_device *dev) {
+ const char *v, *p;
+
+ if (sd_device_get_sysattr_value(dev, "modalias", &v) < 0)
+ return false;
+
+ if (!startswith(v, "pci:"))
+ return false;
+
+ p = strrchr(v, 's');
+ if (!p)
+ return false;
+ if (p[1] != 'c')
+ return false;
+
+ /* PCI device subclass 04 corresponds to PCI bridge */
+ return strneq(p + 2, "04", 2);
+}
+
+static int dev_pci_slot(sd_device *dev, struct netnames *names) {
+ unsigned long dev_port = 0;
+ unsigned domain, bus, slot, func;
+ int hotplug_slot = -1;
+ size_t l;
+ char *s;
+ const char *sysname, *attr, *port_name = NULL, *syspath;
+ _cleanup_(sd_device_unrefp) sd_device *pci = NULL;
+ sd_device *hotplug_slot_dev;
+ char slots[PATH_MAX];
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *dent;
+ int r;
+
+ r = sd_device_get_sysname(names->pcidev, &sysname);
+ if (r < 0)
+ return r;
+
+ if (sscanf(sysname, "%x:%x:%x.%u", &domain, &bus, &slot, &func) != 4)
+ return -ENOENT;
+
+ if (naming_scheme_has(NAMING_NPAR_ARI) &&
+ is_pci_ari_enabled(names->pcidev))
+ /* ARI devices support up to 256 functions on a single device ("slot"), and interpret the
+ * traditional 5-bit slot and 3-bit function number as a single 8-bit function number,
+ * where the slot makes up the upper 5 bits. */
+ func += slot * 8;
+
+ /* kernel provided port index for multiple ports on a single PCI function */
+ if (sd_device_get_sysattr_value(dev, "dev_port", &attr) >= 0) {
+ dev_port = strtoul(attr, NULL, 10);
+ /* With older kernels IP-over-InfiniBand network interfaces sometimes erroneously
+ * provide the port number in the 'dev_id' sysfs attribute instead of 'dev_port',
+ * which thus stays initialized as 0. */
+ if (dev_port == 0 &&
+ sd_device_get_sysattr_value(dev, "type", &attr) >= 0) {
+ unsigned long type;
+
+ type = strtoul(attr, NULL, 10);
+ if (type == ARPHRD_INFINIBAND &&
+ sd_device_get_sysattr_value(dev, "dev_id", &attr) >= 0)
+ dev_port = strtoul(attr, NULL, 16);
+ }
+ }
+
+ /* kernel provided front panel port name for multi-port PCI device */
+ (void) sd_device_get_sysattr_value(dev, "phys_port_name", &port_name);
+
+ /* compose a name based on the raw kernel's PCI bus, slot numbers */
+ s = names->pci_path;
+ l = sizeof(names->pci_path);
+ if (domain > 0)
+ l = strpcpyf(&s, l, "P%u", domain);
+ l = strpcpyf(&s, l, "p%us%u", bus, slot);
+ if (func > 0 || is_pci_multifunction(names->pcidev))
+ l = strpcpyf(&s, l, "f%u", func);
+ if (port_name)
+ l = strpcpyf(&s, l, "n%s", port_name);
+ else if (dev_port > 0)
+ l = strpcpyf(&s, l, "d%lu", dev_port);
+ if (l == 0)
+ names->pci_path[0] = '\0';
+
+ /* ACPI _SUN — slot user number */
+ r = sd_device_new_from_subsystem_sysname(&pci, "subsystem", "pci");
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_syspath(pci, &syspath);
+ if (r < 0)
+ return r;
+ if (!snprintf_ok(slots, sizeof slots, "%s/slots", syspath))
+ return -ENAMETOOLONG;
+
+ dir = opendir(slots);
+ if (!dir)
+ return -errno;
+
+ hotplug_slot_dev = names->pcidev;
+ while (hotplug_slot_dev) {
+ if (sd_device_get_sysname(hotplug_slot_dev, &sysname) < 0)
+ continue;
+
+ FOREACH_DIRENT_ALL(dent, dir, break) {
+ int i;
+ char str[PATH_MAX];
+ _cleanup_free_ char *address = NULL;
+
+ if (dot_or_dot_dot(dent->d_name))
+ continue;
+
+ r = safe_atoi(dent->d_name, &i);
+ if (r < 0 || i <= 0)
+ continue;
+
+ /* match slot address with device by stripping the function */
+ if (snprintf_ok(str, sizeof str, "%s/%s/address", slots, dent->d_name) &&
+ read_one_line_file(str, &address) >= 0 &&
+ startswith(sysname, address)) {
+ hotplug_slot = i;
+
+ /* We found the match between PCI device and slot. However, we won't use the
+ * slot index if the device is a PCI bridge, because it can have other child
+ * devices that will try to claim the same index and that would create name
+ * collision. */
+ if (naming_scheme_has(NAMING_BRIDGE_NO_SLOT) && is_pci_bridge(hotplug_slot_dev))
+ hotplug_slot = 0;
+
+ break;
+ }
+ }
+ if (hotplug_slot >= 0)
+ break;
+ if (sd_device_get_parent_with_subsystem_devtype(hotplug_slot_dev, "pci", NULL, &hotplug_slot_dev) < 0)
+ break;
+ rewinddir(dir);
+ }
+
+ if (hotplug_slot > 0) {
+ s = names->pci_slot;
+ l = sizeof(names->pci_slot);
+ if (domain > 0)
+ l = strpcpyf(&s, l, "P%d", domain);
+ l = strpcpyf(&s, l, "s%d", hotplug_slot);
+ if (func > 0 || is_pci_multifunction(names->pcidev))
+ l = strpcpyf(&s, l, "f%d", func);
+ if (port_name)
+ l = strpcpyf(&s, l, "n%s", port_name);
+ else if (dev_port > 0)
+ l = strpcpyf(&s, l, "d%lu", dev_port);
+ if (l == 0)
+ names->pci_slot[0] = '\0';
+ }
+
+ return 0;
+}
+
+static int names_vio(sd_device *dev, struct netnames *names) {
+ sd_device *parent;
+ unsigned busid, slotid, ethid;
+ const char *syspath, *subsystem;
+ int r;
+
+ /* check if our direct parent is a VIO device with no other bus in-between */
+ r = sd_device_get_parent(dev, &parent);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_subsystem(parent, &subsystem);
+ if (r < 0)
+ return r;
+ if (!streq("vio", subsystem))
+ return -ENOENT;
+
+ /* The devices' $DEVPATH number is tied to (virtual) hardware (slot id
+ * selected in the HMC), thus this provides a reliable naming (e.g.
+ * "/devices/vio/30000002/net/eth1"); we ignore the bus number, as
+ * there should only ever be one bus, and then remove leading zeros. */
+ r = sd_device_get_syspath(dev, &syspath);
+ if (r < 0)
+ return r;
+
+ if (sscanf(syspath, "/sys/devices/vio/%4x%4x/net/eth%u", &busid, &slotid, &ethid) != 3)
+ return -EINVAL;
+
+ xsprintf(names->vio_slot, "v%u", slotid);
+ names->type = NET_VIO;
+ return 0;
+}
+
+#define _PLATFORM_TEST "/sys/devices/platform/vvvvPPPP"
+#define _PLATFORM_PATTERN4 "/sys/devices/platform/%4s%4x:%2x/net/eth%u"
+#define _PLATFORM_PATTERN3 "/sys/devices/platform/%3s%4x:%2x/net/eth%u"
+
+static int names_platform(sd_device *dev, struct netnames *names, bool test) {
+ sd_device *parent;
+ char vendor[5];
+ unsigned model, instance, ethid;
+ const char *syspath, *pattern, *validchars, *subsystem;
+ int r;
+
+ /* check if our direct parent is a platform device with no other bus in-between */
+ r = sd_device_get_parent(dev, &parent);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_subsystem(parent, &subsystem);
+ if (r < 0)
+ return r;
+
+ if (!streq("platform", subsystem))
+ return -ENOENT;
+
+ r = sd_device_get_syspath(dev, &syspath);
+ if (r < 0)
+ return r;
+
+ /* syspath is too short, to have a valid ACPI instance */
+ if (strlen(syspath) < sizeof _PLATFORM_TEST)
+ return -EINVAL;
+
+ /* Vendor ID can be either PNP ID (3 chars A-Z) or ACPI ID (4 chars A-Z and numerals) */
+ if (syspath[sizeof _PLATFORM_TEST - 1] == ':') {
+ pattern = _PLATFORM_PATTERN4;
+ validchars = UPPERCASE_LETTERS DIGITS;
+ } else {
+ pattern = _PLATFORM_PATTERN3;
+ validchars = UPPERCASE_LETTERS;
+ }
+
+ /* Platform devices are named after ACPI table match, and instance id
+ * eg. "/sys/devices/platform/HISI00C2:00");
+ * The Vendor (3 or 4 char), followed by hexdecimal model number : instance id.
+ */
+
+ DISABLE_WARNING_FORMAT_NONLITERAL;
+ if (sscanf(syspath, pattern, vendor, &model, &instance, &ethid) != 4)
+ return -EINVAL;
+ REENABLE_WARNING;
+
+ if (!in_charset(vendor, validchars))
+ return -ENOENT;
+
+ ascii_strlower(vendor);
+
+ xsprintf(names->platform_path, "a%s%xi%u", vendor, model, instance);
+ names->type = NET_PLATFORM;
+ return 0;
+}
+
+static int names_pci(sd_device *dev, struct netnames *names) {
+ sd_device *parent;
+ struct netnames vf_names = {};
+ struct virtfn_info vf_info = {};
+ const char *subsystem;
+ int r;
+
+ assert(dev);
+ assert(names);
+
+ r = sd_device_get_parent(dev, &parent);
+ if (r < 0)
+ return r;
+ /* skip virtio subsystem if present */
+ parent = skip_virtio(parent);
+
+ if (!parent)
+ return -ENOENT;
+
+ /* check if our direct parent is a PCI device with no other bus in-between */
+ if (sd_device_get_subsystem(parent, &subsystem) >= 0 &&
+ streq("pci", subsystem)) {
+ names->type = NET_PCI;
+ names->pcidev = parent;
+ } else {
+ r = sd_device_get_parent_with_subsystem_devtype(dev, "pci", NULL, &names->pcidev);
+ if (r < 0)
+ return r;
+ }
+
+ if (naming_scheme_has(NAMING_SR_IOV_V) &&
+ get_virtfn_info(dev, names, &vf_info) >= 0) {
+ /* If this is an SR-IOV virtual device, get base name using physical device and add virtfn suffix. */
+ vf_names.pcidev = vf_info.physfn_pcidev;
+ dev_pci_onboard(dev, &vf_names);
+ dev_pci_slot(dev, &vf_names);
+ if (vf_names.pci_onboard[0])
+ if (strlen(vf_names.pci_onboard) + strlen(vf_info.suffix) < sizeof(names->pci_onboard))
+ strscpyl(names->pci_onboard, sizeof(names->pci_onboard),
+ vf_names.pci_onboard, vf_info.suffix, NULL);
+ if (vf_names.pci_slot[0])
+ if (strlen(vf_names.pci_slot) + strlen(vf_info.suffix) < sizeof(names->pci_slot))
+ strscpyl(names->pci_slot, sizeof(names->pci_slot),
+ vf_names.pci_slot, vf_info.suffix, NULL);
+ if (vf_names.pci_path[0])
+ if (strlen(vf_names.pci_path) + strlen(vf_info.suffix) < sizeof(names->pci_path))
+ strscpyl(names->pci_path, sizeof(names->pci_path),
+ vf_names.pci_path, vf_info.suffix, NULL);
+ sd_device_unref(vf_info.physfn_pcidev);
+ } else {
+ dev_pci_onboard(dev, names);
+ dev_pci_slot(dev, names);
+ }
+
+ return 0;
+}
+
+static int names_usb(sd_device *dev, struct netnames *names) {
+ sd_device *usbdev;
+ char name[256], *ports, *config, *interf, *s;
+ const char *sysname;
+ size_t l;
+ int r;
+
+ assert(dev);
+ assert(names);
+
+ r = sd_device_get_parent_with_subsystem_devtype(dev, "usb", "usb_interface", &usbdev);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_sysname(usbdev, &sysname);
+ if (r < 0)
+ return r;
+
+ /* get USB port number chain, configuration, interface */
+ strscpy(name, sizeof(name), sysname);
+ s = strchr(name, '-');
+ if (!s)
+ return -EINVAL;
+ ports = s+1;
+
+ s = strchr(ports, ':');
+ if (!s)
+ return -EINVAL;
+ s[0] = '\0';
+ config = s+1;
+
+ s = strchr(config, '.');
+ if (!s)
+ return -EINVAL;
+ s[0] = '\0';
+ interf = s+1;
+
+ /* prefix every port number in the chain with "u" */
+ s = ports;
+ while ((s = strchr(s, '.')))
+ s[0] = 'u';
+ s = names->usb_ports;
+ l = strpcpyl(&s, sizeof(names->usb_ports), "u", ports, NULL);
+
+ /* append USB config number, suppress the common config == 1 */
+ if (!streq(config, "1"))
+ l = strpcpyl(&s, sizeof(names->usb_ports), "c", config, NULL);
+
+ /* append USB interface number, suppress the interface == 0 */
+ if (!streq(interf, "0"))
+ l = strpcpyl(&s, sizeof(names->usb_ports), "i", interf, NULL);
+ if (l == 0)
+ return -ENAMETOOLONG;
+
+ names->type = NET_USB;
+ return 0;
+}
+
+static int names_bcma(sd_device *dev, struct netnames *names) {
+ sd_device *bcmadev;
+ unsigned core;
+ const char *sysname;
+ int r;
+
+ assert(dev);
+ assert(names);
+
+ r = sd_device_get_parent_with_subsystem_devtype(dev, "bcma", NULL, &bcmadev);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_sysname(bcmadev, &sysname);
+ if (r < 0)
+ return r;
+
+ /* bus num:core num */
+ if (sscanf(sysname, "bcma%*u:%u", &core) != 1)
+ return -EINVAL;
+ /* suppress the common core == 0 */
+ if (core > 0)
+ xsprintf(names->bcma_core, "b%u", core);
+
+ names->type = NET_BCMA;
+ return 0;
+}
+
+static int names_ccw(sd_device *dev, struct netnames *names) {
+ sd_device *cdev;
+ const char *bus_id, *subsys;
+ size_t bus_id_len;
+ size_t bus_id_start;
+ int r;
+
+ assert(dev);
+ assert(names);
+
+ /* Retrieve the associated CCW device */
+ r = sd_device_get_parent(dev, &cdev);
+ if (r < 0)
+ return r;
+
+ /* skip virtio subsystem if present */
+ cdev = skip_virtio(cdev);
+ if (!cdev)
+ return -ENOENT;
+
+ r = sd_device_get_subsystem(cdev, &subsys);
+ if (r < 0)
+ return r;
+
+ /* Network devices are either single or grouped CCW devices */
+ if (!STR_IN_SET(subsys, "ccwgroup", "ccw"))
+ return -ENOENT;
+
+ /* Retrieve bus-ID of the CCW device. The bus-ID uniquely
+ * identifies the network device on the Linux on System z channel
+ * subsystem. Note that the bus-ID contains lowercase characters.
+ */
+ r = sd_device_get_sysname(cdev, &bus_id);
+ if (r < 0)
+ return r;
+
+ /* Check the length of the bus-ID. Rely on the fact that the kernel provides a correct bus-ID;
+ * alternatively, improve this check and parse and verify each bus-ID part...
+ */
+ bus_id_len = strlen(bus_id);
+ if (!IN_SET(bus_id_len, 8, 9))
+ return -EINVAL;
+
+ /* Strip leading zeros from the bus id for aesthetic purposes. This
+ * keeps the ccw names stable, yet much shorter in general case of
+ * bus_id 0.0.0600 -> 600. This is similar to e.g. how PCI domain is
+ * not prepended when it is zero. Preserve the last 0 for 0.0.0000.
+ */
+ bus_id_start = strspn(bus_id, ".0");
+ bus_id += bus_id_start < bus_id_len ? bus_id_start : bus_id_len - 1;
+
+ /* Store the CCW bus-ID for use as network device name */
+ if (snprintf_ok(names->ccw_busid, sizeof(names->ccw_busid), "c%s", bus_id))
+ names->type = NET_CCW;
+
+ return 0;
+}
+
+static int names_mac(sd_device *dev, struct netnames *names) {
+ const char *s;
+ unsigned long i;
+ unsigned a1, a2, a3, a4, a5, a6;
+ int r;
+
+ /* Some kinds of devices tend to have hardware addresses
+ * that are impossible to use in an iface name.
+ */
+ r = sd_device_get_sysattr_value(dev, "type", &s);
+ if (r < 0)
+ return r;
+
+ i = strtoul(s, NULL, 0);
+ switch (i) {
+ /* The persistent part of a hardware address of an InfiniBand NIC
+ * is 8 bytes long. We cannot fit this much in an iface name.
+ */
+ case ARPHRD_INFINIBAND:
+ return -EINVAL;
+ default:
+ break;
+ }
+
+ /* check for NET_ADDR_PERM, skip random MAC addresses */
+ r = sd_device_get_sysattr_value(dev, "addr_assign_type", &s);
+ if (r < 0)
+ return r;
+ i = strtoul(s, NULL, 0);
+ if (i != 0)
+ return 0;
+
+ r = sd_device_get_sysattr_value(dev, "address", &s);
+ if (r < 0)
+ return r;
+ if (sscanf(s, "%x:%x:%x:%x:%x:%x", &a1, &a2, &a3, &a4, &a5, &a6) != 6)
+ return -EINVAL;
+
+ /* skip empty MAC addresses */
+ if (a1 + a2 + a3 + a4 + a5 + a6 == 0)
+ return -EINVAL;
+
+ names->mac[0] = a1;
+ names->mac[1] = a2;
+ names->mac[2] = a3;
+ names->mac[3] = a4;
+ names->mac[4] = a5;
+ names->mac[5] = a6;
+ names->mac_valid = true;
+ return 0;
+}
+
+static int names_netdevsim(sd_device *dev, struct netnames *names) {
+ sd_device *netdevsimdev;
+ const char *sysname;
+ unsigned addr;
+ const char *port_name = NULL;
+ int r;
+ bool ok;
+
+ if (!naming_scheme_has(NAMING_NETDEVSIM))
+ return 0;
+
+ assert(dev);
+ assert(names);
+
+ r = sd_device_get_parent_with_subsystem_devtype(dev, "netdevsim", NULL, &netdevsimdev);
+ if (r < 0)
+ return r;
+ r = sd_device_get_sysname(netdevsimdev, &sysname);
+ if (r < 0)
+ return r;
+
+ if (sscanf(sysname, "netdevsim%u", &addr) != 1)
+ return -EINVAL;
+
+ r = sd_device_get_sysattr_value(dev, "phys_port_name", &port_name);
+ if (r < 0)
+ return r;
+
+ ok = snprintf_ok(names->netdevsim_path, sizeof(names->netdevsim_path), "i%un%s", addr, port_name);
+ if (!ok)
+ return -ENOBUFS;
+
+ names->type = NET_NETDEVSIM;
+
+ return 0;
+}
+
+/* IEEE Organizationally Unique Identifier vendor string */
+static int ieee_oui(sd_device *dev, struct netnames *names, bool test) {
+ char str[32];
+
+ if (!names->mac_valid)
+ return -ENOENT;
+ /* skip commonly misused 00:00:00 (Xerox) prefix */
+ if (memcmp(names->mac, "\0\0\0", 3) == 0)
+ return -EINVAL;
+ xsprintf(str, "OUI:%02X%02X%02X%02X%02X%02X", names->mac[0],
+ names->mac[1], names->mac[2], names->mac[3], names->mac[4],
+ names->mac[5]);
+ udev_builtin_hwdb_lookup(dev, NULL, str, NULL, test);
+ return 0;
+}
+
+static int builtin_net_id(sd_device *dev, int argc, char *argv[], bool test) {
+ const char *s, *p, *devtype, *prefix = "en";
+ struct netnames names = {};
+ unsigned long i;
+ int r;
+
+ /* handle only ARPHRD_ETHER, ARPHRD_SLIP and ARPHRD_INFINIBAND devices */
+ r = sd_device_get_sysattr_value(dev, "type", &s);
+ if (r < 0)
+ return r;
+
+ i = strtoul(s, NULL, 0);
+ switch (i) {
+ case ARPHRD_ETHER:
+ prefix = "en";
+ break;
+ case ARPHRD_INFINIBAND:
+ if (naming_scheme_has(NAMING_INFINIBAND))
+ prefix = "ib";
+ else
+ return 0;
+ break;
+ case ARPHRD_SLIP:
+ prefix = "sl";
+ break;
+ default:
+ return 0;
+ }
+
+ /* skip stacked devices, like VLANs, ... */
+ r = sd_device_get_sysattr_value(dev, "ifindex", &s);
+ if (r < 0)
+ return r;
+ r = sd_device_get_sysattr_value(dev, "iflink", &p);
+ if (r < 0)
+ return r;
+ if (!streq(s, p))
+ return 0;
+
+ if (sd_device_get_devtype(dev, &devtype) >= 0) {
+ if (streq("wlan", devtype))
+ prefix = "wl";
+ else if (streq("wwan", devtype))
+ prefix = "ww";
+ }
+
+ udev_builtin_add_property(dev, test, "ID_NET_NAMING_SCHEME", naming_scheme()->name);
+
+ r = names_mac(dev, &names);
+ if (r >= 0 && names.mac_valid) {
+ char str[ALTIFNAMSIZ];
+
+ xsprintf(str, "%sx%02x%02x%02x%02x%02x%02x", prefix,
+ names.mac[0], names.mac[1], names.mac[2],
+ names.mac[3], names.mac[4], names.mac[5]);
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_MAC", str);
+
+ ieee_oui(dev, &names, test);
+ }
+
+ /* get path names for Linux on System z network devices */
+ if (names_ccw(dev, &names) >= 0 && names.type == NET_CCW) {
+ char str[ALTIFNAMSIZ];
+
+ if (snprintf_ok(str, sizeof str, "%s%s", prefix, names.ccw_busid))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_PATH", str);
+ return 0;
+ }
+
+ /* get ibmveth/ibmvnic slot-based names. */
+ if (names_vio(dev, &names) >= 0 && names.type == NET_VIO) {
+ char str[ALTIFNAMSIZ];
+
+ if (snprintf_ok(str, sizeof str, "%s%s", prefix, names.vio_slot))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_SLOT", str);
+ return 0;
+ }
+
+ /* get ACPI path names for ARM64 platform devices */
+ if (names_platform(dev, &names, test) >= 0 && names.type == NET_PLATFORM) {
+ char str[ALTIFNAMSIZ];
+
+ if (snprintf_ok(str, sizeof str, "%s%s", prefix, names.platform_path))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_PATH", str);
+ return 0;
+ }
+
+ /* get netdevsim path names */
+ if (names_netdevsim(dev, &names) >= 0 && names.type == NET_NETDEVSIM) {
+ char str[ALTIFNAMSIZ];
+
+ if (snprintf_ok(str, sizeof str, "%s%s", prefix, names.netdevsim_path))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_PATH", str);
+
+ return 0;
+ }
+
+ /* get PCI based path names, we compose only PCI based paths */
+ if (names_pci(dev, &names) < 0)
+ return 0;
+
+ /* plain PCI device */
+ if (names.type == NET_PCI) {
+ char str[ALTIFNAMSIZ];
+
+ if (names.pci_onboard[0] &&
+ snprintf_ok(str, sizeof str, "%s%s", prefix, names.pci_onboard))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_ONBOARD", str);
+
+ if (names.pci_onboard_label &&
+ snprintf_ok(str, sizeof str, "%s%s",
+ naming_scheme_has(NAMING_LABEL_NOPREFIX) ? "" : prefix,
+ names.pci_onboard_label))
+ udev_builtin_add_property(dev, test, "ID_NET_LABEL_ONBOARD", str);
+
+ if (names.pci_path[0] &&
+ snprintf_ok(str, sizeof str, "%s%s", prefix, names.pci_path))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_PATH", str);
+
+ if (names.pci_slot[0] &&
+ snprintf_ok(str, sizeof str, "%s%s", prefix, names.pci_slot))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_SLOT", str);
+ return 0;
+ }
+
+ /* USB device */
+ if (names_usb(dev, &names) >= 0 && names.type == NET_USB) {
+ char str[ALTIFNAMSIZ];
+
+ if (names.pci_path[0] &&
+ snprintf_ok(str, sizeof str, "%s%s%s", prefix, names.pci_path, names.usb_ports))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_PATH", str);
+
+ if (names.pci_slot[0] &&
+ snprintf_ok(str, sizeof str, "%s%s%s", prefix, names.pci_slot, names.usb_ports))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_SLOT", str);
+ return 0;
+ }
+
+ /* Broadcom bus */
+ if (names_bcma(dev, &names) >= 0 && names.type == NET_BCMA) {
+ char str[ALTIFNAMSIZ];
+
+ if (names.pci_path[0] &&
+ snprintf_ok(str, sizeof str, "%s%s%s", prefix, names.pci_path, names.bcma_core))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_PATH", str);
+
+ if (names.pci_slot[0] &&
+ snprintf(str, sizeof str, "%s%s%s", prefix, names.pci_slot, names.bcma_core))
+ udev_builtin_add_property(dev, test, "ID_NET_NAME_SLOT", str);
+ return 0;
+ }
+
+ return 0;
+}
+
+const UdevBuiltin udev_builtin_net_id = {
+ .name = "net_id",
+ .cmd = builtin_net_id,
+ .help = "Network device properties",
+};
diff --git a/src/udev/udev-builtin-net_setup_link.c b/src/udev/udev-builtin-net_setup_link.c
new file mode 100644
index 0000000..cb12b94
--- /dev/null
+++ b/src/udev/udev-builtin-net_setup_link.c
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "device-util.h"
+#include "alloc-util.h"
+#include "link-config.h"
+#include "log.h"
+#include "string-util.h"
+#include "udev-builtin.h"
+
+static link_config_ctx *ctx = NULL;
+
+static int builtin_net_setup_link(sd_device *dev, int argc, char **argv, bool test) {
+ _cleanup_free_ char *driver = NULL;
+ const char *name = NULL;
+ link_config *link;
+ int r;
+
+ if (argc > 1)
+ return log_device_error_errno(dev, SYNTHETIC_ERRNO(EINVAL), "This program takes no arguments.");
+
+ r = link_get_driver(ctx, dev, &driver);
+ if (r < 0)
+ log_device_full_errno(dev, r == -EOPNOTSUPP ? LOG_DEBUG : LOG_WARNING,
+ r, "Failed to query device driver: %m");
+ else
+ udev_builtin_add_property(dev, test, "ID_NET_DRIVER", driver);
+
+ r = link_config_get(ctx, dev, &link);
+ if (r < 0) {
+ if (r == -ENOENT)
+ return log_device_debug_errno(dev, r, "No matching link configuration found.");
+
+ return log_device_error_errno(dev, r, "Failed to get link config: %m");
+ }
+
+ r = link_config_apply(ctx, link, dev, &name);
+ if (r < 0)
+ log_device_warning_errno(dev, r, "Could not apply link config, ignoring: %m");
+
+ udev_builtin_add_property(dev, test, "ID_NET_LINK_FILE", link->filename);
+
+ if (name)
+ udev_builtin_add_property(dev, test, "ID_NET_NAME", name);
+
+ return 0;
+}
+
+static int builtin_net_setup_link_init(void) {
+ int r;
+
+ if (ctx)
+ return 0;
+
+ r = link_config_ctx_new(&ctx);
+ if (r < 0)
+ return r;
+
+ r = link_config_load(ctx);
+ if (r < 0)
+ return r;
+
+ log_debug("Created link configuration context.");
+ return 0;
+}
+
+static void builtin_net_setup_link_exit(void) {
+ link_config_ctx_free(ctx);
+ ctx = NULL;
+ log_debug("Unloaded link configuration context.");
+}
+
+static bool builtin_net_setup_link_validate(void) {
+ log_debug("Check if link configuration needs reloading.");
+ if (!ctx)
+ return false;
+
+ return link_config_should_reload(ctx);
+}
+
+const UdevBuiltin udev_builtin_net_setup_link = {
+ .name = "net_setup_link",
+ .cmd = builtin_net_setup_link,
+ .init = builtin_net_setup_link_init,
+ .exit = builtin_net_setup_link_exit,
+ .validate = builtin_net_setup_link_validate,
+ .help = "Configure network link",
+ .run_once = false,
+};
diff --git a/src/udev/udev-builtin-path_id.c b/src/udev/udev-builtin-path_id.c
new file mode 100644
index 0000000..0da59e2
--- /dev/null
+++ b/src/udev/udev-builtin-path_id.c
@@ -0,0 +1,732 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * compose persistent device path
+ *
+ * Logic based on Hannes Reinecke's shell script.
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "libudev-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "sysexits.h"
+#include "udev-builtin.h"
+
+_printf_(2,3)
+static void path_prepend(char **path, const char *fmt, ...) {
+ va_list va;
+ _cleanup_free_ char *pre = NULL;
+ int r;
+
+ va_start(va, fmt);
+ r = vasprintf(&pre, fmt, va);
+ va_end(va);
+ if (r < 0) {
+ log_oom();
+ exit(EX_OSERR);
+ }
+
+ if (*path) {
+ char *new;
+
+ new = strjoin(pre, "-", *path);
+ if (!new) {
+ log_oom();
+ exit(EX_OSERR);
+ }
+
+ free_and_replace(*path, new);
+ } else
+ *path = TAKE_PTR(pre);
+}
+
+/*
+** Linux only supports 32 bit luns.
+** See drivers/scsi/scsi_scan.c::scsilun_to_int() for more details.
+*/
+static int format_lun_number(sd_device *dev, char **path) {
+ const char *sysnum;
+ unsigned long lun;
+ int r;
+
+ r = sd_device_get_sysnum(dev, &sysnum);
+ if (r < 0)
+ return r;
+ if (!sysnum)
+ return -ENOENT;
+
+ lun = strtoul(sysnum, NULL, 10);
+ if (lun < 256)
+ /* address method 0, peripheral device addressing with bus id of zero */
+ path_prepend(path, "lun-%lu", lun);
+ else
+ /* handle all other lun addressing methods by using a variant of the original lun format */
+ path_prepend(path, "lun-0x%04lx%04lx00000000", lun & 0xffff, (lun >> 16) & 0xffff);
+
+ return 0;
+}
+
+static sd_device *skip_subsystem(sd_device *dev, const char *subsys) {
+ sd_device *parent;
+
+ assert(dev);
+ assert(subsys);
+
+ for (parent = dev; ; ) {
+ const char *subsystem;
+
+ if (sd_device_get_subsystem(parent, &subsystem) < 0)
+ break;
+
+ if (!streq(subsystem, subsys))
+ break;
+
+ dev = parent;
+ if (sd_device_get_parent(dev, &parent) < 0)
+ break;
+ }
+
+ return dev;
+}
+
+static sd_device *handle_scsi_fibre_channel(sd_device *parent, char **path) {
+ sd_device *targetdev;
+ _cleanup_(sd_device_unrefp) sd_device *fcdev = NULL;
+ const char *port, *sysname;
+ _cleanup_free_ char *lun = NULL;
+
+ assert(parent);
+ assert(path);
+
+ if (sd_device_get_parent_with_subsystem_devtype(parent, "scsi", "scsi_target", &targetdev) < 0)
+ return NULL;
+ if (sd_device_get_sysname(targetdev, &sysname) < 0)
+ return NULL;
+ if (sd_device_new_from_subsystem_sysname(&fcdev, "fc_transport", sysname) < 0)
+ return NULL;
+ if (sd_device_get_sysattr_value(fcdev, "port_name", &port) < 0)
+ return NULL;
+
+ format_lun_number(parent, &lun);
+ path_prepend(path, "fc-%s-%s", port, lun);
+ return parent;
+}
+
+static sd_device *handle_scsi_sas_wide_port(sd_device *parent, char **path) {
+ sd_device *targetdev, *target_parent;
+ _cleanup_(sd_device_unrefp) sd_device *sasdev = NULL;
+ const char *sas_address, *sysname;
+ _cleanup_free_ char *lun = NULL;
+
+ assert(parent);
+ assert(path);
+
+ if (sd_device_get_parent_with_subsystem_devtype(parent, "scsi", "scsi_target", &targetdev) < 0)
+ return NULL;
+ if (sd_device_get_parent(targetdev, &target_parent) < 0)
+ return NULL;
+ if (sd_device_get_sysname(target_parent, &sysname) < 0)
+ return NULL;
+ if (sd_device_new_from_subsystem_sysname(&sasdev, "sas_device", sysname) < 0)
+ return NULL;
+ if (sd_device_get_sysattr_value(sasdev, "sas_address", &sas_address) < 0)
+ return NULL;
+
+ format_lun_number(parent, &lun);
+ path_prepend(path, "sas-%s-%s", sas_address, lun);
+ return parent;
+}
+
+static sd_device *handle_scsi_sas(sd_device *parent, char **path) {
+ sd_device *targetdev, *target_parent, *port, *expander;
+ _cleanup_(sd_device_unrefp) sd_device *target_sasdev = NULL, *expander_sasdev = NULL, *port_sasdev = NULL;
+ const char *sas_address = NULL;
+ const char *phy_id;
+ const char *phy_count, *sysname;
+ _cleanup_free_ char *lun = NULL;
+
+ assert(parent);
+ assert(path);
+
+ if (sd_device_get_parent_with_subsystem_devtype(parent, "scsi", "scsi_target", &targetdev) < 0)
+ return NULL;
+ if (sd_device_get_parent(targetdev, &target_parent) < 0)
+ return NULL;
+ if (sd_device_get_sysname(target_parent, &sysname) < 0)
+ return NULL;
+ /* Get sas device */
+ if (sd_device_new_from_subsystem_sysname(&target_sasdev, "sas_device", sysname) < 0)
+ return NULL;
+ /* The next parent is sas port */
+ if (sd_device_get_parent(target_parent, &port) < 0)
+ return NULL;
+ if (sd_device_get_sysname(port, &sysname) < 0)
+ return NULL;
+ /* Get port device */
+ if (sd_device_new_from_subsystem_sysname(&port_sasdev, "sas_port", sysname) < 0)
+ return NULL;
+ if (sd_device_get_sysattr_value(port_sasdev, "num_phys", &phy_count) < 0)
+ return NULL;
+
+ /* Check if we are simple disk */
+ if (strncmp(phy_count, "1", 2) != 0)
+ return handle_scsi_sas_wide_port(parent, path);
+
+ /* Get connected phy */
+ if (sd_device_get_sysattr_value(target_sasdev, "phy_identifier", &phy_id) < 0)
+ return NULL;
+
+ /* The port's parent is either hba or expander */
+ if (sd_device_get_parent(port, &expander) < 0)
+ return NULL;
+
+ if (sd_device_get_sysname(expander, &sysname) < 0)
+ return NULL;
+ /* Get expander device */
+ if (sd_device_new_from_subsystem_sysname(&expander_sasdev, "sas_device", sysname) >= 0) {
+ /* Get expander's address */
+ if (sd_device_get_sysattr_value(expander_sasdev, "sas_address", &sas_address) < 0)
+ return NULL;
+ }
+
+ format_lun_number(parent, &lun);
+ if (sas_address)
+ path_prepend(path, "sas-exp%s-phy%s-%s", sas_address, phy_id, lun);
+ else
+ path_prepend(path, "sas-phy%s-%s", phy_id, lun);
+
+ return parent;
+}
+
+static sd_device *handle_scsi_iscsi(sd_device *parent, char **path) {
+ sd_device *transportdev;
+ _cleanup_(sd_device_unrefp) sd_device *sessiondev = NULL, *conndev = NULL;
+ const char *target, *connname, *addr, *port;
+ _cleanup_free_ char *lun = NULL;
+ const char *sysname, *sysnum;
+
+ assert(parent);
+ assert(path);
+
+ /* find iscsi session */
+ for (transportdev = parent; ; ) {
+
+ if (sd_device_get_parent(transportdev, &transportdev) < 0)
+ return NULL;
+ if (sd_device_get_sysname(transportdev, &sysname) < 0)
+ return NULL;
+ if (startswith(sysname, "session"))
+ break;
+ }
+
+ /* find iscsi session device */
+ if (sd_device_new_from_subsystem_sysname(&sessiondev, "iscsi_session", sysname) < 0)
+ return NULL;
+
+ if (sd_device_get_sysattr_value(sessiondev, "targetname", &target) < 0)
+ return NULL;
+
+ if (sd_device_get_sysnum(transportdev, &sysnum) < 0 || !sysnum)
+ return NULL;
+ connname = strjoina("connection", sysnum, ":0");
+ if (sd_device_new_from_subsystem_sysname(&conndev, "iscsi_connection", connname) < 0)
+ return NULL;
+
+ if (sd_device_get_sysattr_value(conndev, "persistent_address", &addr) < 0)
+ return NULL;
+ if (sd_device_get_sysattr_value(conndev, "persistent_port", &port) < 0)
+ return NULL;
+
+ format_lun_number(parent, &lun);
+ path_prepend(path, "ip-%s:%s-iscsi-%s-%s", addr, port, target, lun);
+ return parent;
+}
+
+static sd_device *handle_scsi_ata(sd_device *parent, char **path, char **compat_path) {
+ sd_device *targetdev, *target_parent;
+ _cleanup_(sd_device_unrefp) sd_device *atadev = NULL;
+ const char *port_no, *sysname, *name;
+ unsigned host, bus, target, lun;
+
+ assert(parent);
+ assert(path);
+
+ if (sd_device_get_sysname(parent, &name) < 0)
+ return NULL;
+ if (sscanf(name, "%u:%u:%u:%u", &host, &bus, &target, &lun) != 4)
+ return NULL;
+
+ if (sd_device_get_parent_with_subsystem_devtype(parent, "scsi", "scsi_host", &targetdev) < 0)
+ return NULL;
+
+ if (sd_device_get_parent(targetdev, &target_parent) < 0)
+ return NULL;
+
+ if (sd_device_get_sysname(target_parent, &sysname) < 0)
+ return NULL;
+ if (sd_device_new_from_subsystem_sysname(&atadev, "ata_port", sysname) < 0)
+ return NULL;
+
+ if (sd_device_get_sysattr_value(atadev, "port_no", &port_no) < 0)
+ return NULL;
+
+ if (bus != 0)
+ /* Devices behind port multiplier have a bus != 0*/
+ path_prepend(path, "ata-%s.%u.0", port_no, bus);
+ else
+ /* Master/slave are distinguished by target id */
+ path_prepend(path, "ata-%s.%u", port_no, target);
+
+ /* old compatible persistent link for ATA devices */
+ if (compat_path)
+ path_prepend(compat_path, "ata-%s", port_no);
+
+ return parent;
+}
+
+static sd_device *handle_scsi_default(sd_device *parent, char **path) {
+ sd_device *hostdev;
+ int host, bus, target, lun;
+ const char *name, *base, *pos;
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *dent;
+ int basenum = -1;
+
+ assert(parent);
+ assert(path);
+
+ if (sd_device_get_parent_with_subsystem_devtype(parent, "scsi", "scsi_host", &hostdev) < 0)
+ return NULL;
+
+ if (sd_device_get_sysname(parent, &name) < 0)
+ return NULL;
+ if (sscanf(name, "%d:%d:%d:%d", &host, &bus, &target, &lun) != 4)
+ return NULL;
+
+ /*
+ * Rebase host offset to get the local relative number
+ *
+ * Note: This is by definition racy, unreliable and too simple.
+ * Please do not copy this model anywhere. It's just a left-over
+ * from the time we had no idea how things should look like in
+ * the end.
+ *
+ * Making assumptions about a global in-kernel counter and use
+ * that to calculate a local offset is a very broken concept. It
+ * can only work as long as things are in strict order.
+ *
+ * The kernel needs to export the instance/port number of a
+ * controller directly, without the need for rebase magic like
+ * this. Manual driver unbind/bind, parallel hotplug/unplug will
+ * get into the way of this "I hope it works" logic.
+ */
+
+ if (sd_device_get_syspath(hostdev, &base) < 0)
+ return NULL;
+ pos = strrchr(base, '/');
+ if (!pos)
+ return NULL;
+
+ base = strndupa(base, pos - base);
+ dir = opendir(base);
+ if (!dir)
+ return NULL;
+
+ FOREACH_DIRENT_ALL(dent, dir, break) {
+ char *rest;
+ int i;
+
+ if (dent->d_name[0] == '.')
+ continue;
+ if (!IN_SET(dent->d_type, DT_DIR, DT_LNK))
+ continue;
+ if (!startswith(dent->d_name, "host"))
+ continue;
+ i = strtoul(&dent->d_name[4], &rest, 10);
+ if (rest[0] != '\0')
+ continue;
+ /*
+ * find the smallest number; the host really needs to export its
+ * own instance number per parent device; relying on the global host
+ * enumeration and plainly rebasing the numbers sounds unreliable
+ */
+ if (basenum == -1 || i < basenum)
+ basenum = i;
+ }
+ if (basenum == -1)
+ return hostdev;
+ host -= basenum;
+
+ path_prepend(path, "scsi-%u:%u:%u:%u", host, bus, target, lun);
+ return hostdev;
+}
+
+static sd_device *handle_scsi_hyperv(sd_device *parent, char **path, size_t guid_str_len) {
+ sd_device *hostdev;
+ sd_device *vmbusdev;
+ const char *guid_str;
+ _cleanup_free_ char *lun = NULL;
+ char guid[39];
+ size_t i, k;
+
+ assert(parent);
+ assert(path);
+ assert(guid_str_len < sizeof(guid));
+
+ if (sd_device_get_parent_with_subsystem_devtype(parent, "scsi", "scsi_host", &hostdev) < 0)
+ return NULL;
+
+ if (sd_device_get_parent(hostdev, &vmbusdev) < 0)
+ return NULL;
+
+ if (sd_device_get_sysattr_value(vmbusdev, "device_id", &guid_str) < 0)
+ return NULL;
+
+ if (strlen(guid_str) < guid_str_len || guid_str[0] != '{' || guid_str[guid_str_len-1] != '}')
+ return NULL;
+
+ for (i = 1, k = 0; i < guid_str_len-1; i++) {
+ if (guid_str[i] == '-')
+ continue;
+ guid[k++] = guid_str[i];
+ }
+ guid[k] = '\0';
+
+ format_lun_number(parent, &lun);
+ path_prepend(path, "vmbus-%s-%s", guid, lun);
+ return parent;
+}
+
+static sd_device *handle_scsi(sd_device *parent, char **path, char **compat_path, bool *supported_parent) {
+ const char *devtype, *id, *name;
+
+ if (sd_device_get_devtype(parent, &devtype) < 0 ||
+ !streq(devtype, "scsi_device"))
+ return parent;
+
+ /* firewire */
+ if (sd_device_get_sysattr_value(parent, "ieee1394_id", &id) >= 0) {
+ path_prepend(path, "ieee1394-0x%s", id);
+ *supported_parent = true;
+ return skip_subsystem(parent, "scsi");
+ }
+
+ /* scsi sysfs does not have a "subsystem" for the transport */
+ if (sd_device_get_syspath(parent, &name) < 0)
+ return NULL;
+
+ if (strstr(name, "/rport-")) {
+ *supported_parent = true;
+ return handle_scsi_fibre_channel(parent, path);
+ }
+
+ if (strstr(name, "/end_device-")) {
+ *supported_parent = true;
+ return handle_scsi_sas(parent, path);
+ }
+
+ if (strstr(name, "/session")) {
+ *supported_parent = true;
+ return handle_scsi_iscsi(parent, path);
+ }
+
+ if (strstr(name, "/ata"))
+ return handle_scsi_ata(parent, path, compat_path);
+
+ if (strstr(name, "/vmbus_"))
+ return handle_scsi_hyperv(parent, path, 37);
+ else if (strstr(name, "/VMBUS"))
+ return handle_scsi_hyperv(parent, path, 38);
+
+ return handle_scsi_default(parent, path);
+}
+
+static sd_device *handle_cciss(sd_device *parent, char **path) {
+ const char *str;
+ unsigned controller, disk;
+
+ if (sd_device_get_sysname(parent, &str) < 0)
+ return NULL;
+ if (sscanf(str, "c%ud%u%*s", &controller, &disk) != 2)
+ return NULL;
+
+ path_prepend(path, "cciss-disk%u", disk);
+ return skip_subsystem(parent, "cciss");
+}
+
+static void handle_scsi_tape(sd_device *dev, char **path) {
+ const char *name;
+
+ /* must be the last device in the syspath */
+ if (*path)
+ return;
+
+ if (sd_device_get_sysname(dev, &name) < 0)
+ return;
+
+ if (startswith(name, "nst") && strchr("lma", name[3]))
+ path_prepend(path, "nst%c", name[3]);
+ else if (startswith(name, "st") && strchr("lma", name[2]))
+ path_prepend(path, "st%c", name[2]);
+}
+
+static sd_device *handle_usb(sd_device *parent, char **path) {
+ const char *devtype, *str, *port;
+
+ if (sd_device_get_devtype(parent, &devtype) < 0)
+ return parent;
+ if (!STR_IN_SET(devtype, "usb_interface", "usb_device"))
+ return parent;
+
+ if (sd_device_get_sysname(parent, &str) < 0)
+ return parent;
+ port = strchr(str, '-');
+ if (!port)
+ return parent;
+ port++;
+
+ path_prepend(path, "usb-0:%s", port);
+ return skip_subsystem(parent, "usb");
+}
+
+static sd_device *handle_bcma(sd_device *parent, char **path) {
+ const char *sysname;
+ unsigned core;
+
+ if (sd_device_get_sysname(parent, &sysname) < 0)
+ return NULL;
+ if (sscanf(sysname, "bcma%*u:%u", &core) != 1)
+ return NULL;
+
+ path_prepend(path, "bcma-%u", core);
+ return parent;
+}
+
+/* Handle devices of AP bus in System z platform. */
+static sd_device *handle_ap(sd_device *parent, char **path) {
+ const char *type, *func;
+
+ assert(parent);
+ assert(path);
+
+ if (sd_device_get_sysattr_value(parent, "type", &type) >= 0 &&
+ sd_device_get_sysattr_value(parent, "ap_functions", &func) >= 0)
+ path_prepend(path, "ap-%s-%s", type, func);
+ else {
+ const char *sysname;
+
+ if (sd_device_get_sysname(parent, &sysname) >= 0)
+ path_prepend(path, "ap-%s", sysname);
+ }
+
+ return skip_subsystem(parent, "ap");
+}
+
+static int builtin_path_id(sd_device *dev, int argc, char *argv[], bool test) {
+ sd_device *parent;
+ _cleanup_free_ char *path = NULL;
+ _cleanup_free_ char *compat_path = NULL;
+ bool supported_transport = false;
+ bool supported_parent = false;
+ const char *subsystem;
+
+ assert(dev);
+
+ /* walk up the chain of devices and compose path */
+ parent = dev;
+ while (parent) {
+ const char *subsys, *sysname;
+
+ if (sd_device_get_subsystem(parent, &subsys) < 0 ||
+ sd_device_get_sysname(parent, &sysname) < 0) {
+ ;
+ } else if (streq(subsys, "scsi_tape")) {
+ handle_scsi_tape(parent, &path);
+ } else if (streq(subsys, "scsi")) {
+ parent = handle_scsi(parent, &path, &compat_path, &supported_parent);
+ supported_transport = true;
+ } else if (streq(subsys, "cciss")) {
+ parent = handle_cciss(parent, &path);
+ supported_transport = true;
+ } else if (streq(subsys, "usb")) {
+ parent = handle_usb(parent, &path);
+ supported_transport = true;
+ } else if (streq(subsys, "bcma")) {
+ parent = handle_bcma(parent, &path);
+ supported_transport = true;
+ } else if (streq(subsys, "serio")) {
+ const char *sysnum;
+
+ if (sd_device_get_sysnum(parent, &sysnum) >= 0 && sysnum) {
+ path_prepend(&path, "serio-%s", sysnum);
+ parent = skip_subsystem(parent, "serio");
+ }
+ } else if (streq(subsys, "pci")) {
+ path_prepend(&path, "pci-%s", sysname);
+ if (compat_path)
+ path_prepend(&compat_path, "pci-%s", sysname);
+ parent = skip_subsystem(parent, "pci");
+ supported_parent = true;
+ } else if (streq(subsys, "platform")) {
+ path_prepend(&path, "platform-%s", sysname);
+ if (compat_path)
+ path_prepend(&compat_path, "platform-%s", sysname);
+ parent = skip_subsystem(parent, "platform");
+ supported_transport = true;
+ supported_parent = true;
+ } else if (streq(subsys, "acpi")) {
+ path_prepend(&path, "acpi-%s", sysname);
+ if (compat_path)
+ path_prepend(&compat_path, "acpi-%s", sysname);
+ parent = skip_subsystem(parent, "acpi");
+ supported_parent = true;
+ } else if (streq(subsys, "xen")) {
+ path_prepend(&path, "xen-%s", sysname);
+ if (compat_path)
+ path_prepend(&compat_path, "xen-%s", sysname);
+ parent = skip_subsystem(parent, "xen");
+ supported_parent = true;
+ } else if (streq(subsys, "virtio")) {
+ parent = skip_subsystem(parent, "virtio");
+ supported_transport = true;
+ } else if (streq(subsys, "scm")) {
+ path_prepend(&path, "scm-%s", sysname);
+ if (compat_path)
+ path_prepend(&compat_path, "scm-%s", sysname);
+ parent = skip_subsystem(parent, "scm");
+ supported_transport = true;
+ supported_parent = true;
+ } else if (streq(subsys, "ccw")) {
+ path_prepend(&path, "ccw-%s", sysname);
+ if (compat_path)
+ path_prepend(&compat_path, "ccw-%s", sysname);
+ parent = skip_subsystem(parent, "ccw");
+ supported_transport = true;
+ supported_parent = true;
+ } else if (streq(subsys, "ccwgroup")) {
+ path_prepend(&path, "ccwgroup-%s", sysname);
+ if (compat_path)
+ path_prepend(&compat_path, "ccwgroup-%s", sysname);
+ parent = skip_subsystem(parent, "ccwgroup");
+ supported_transport = true;
+ supported_parent = true;
+ } else if (streq(subsys, "ap")) {
+ parent = handle_ap(parent, &path);
+ supported_transport = true;
+ supported_parent = true;
+ } else if (streq(subsys, "iucv")) {
+ path_prepend(&path, "iucv-%s", sysname);
+ if (compat_path)
+ path_prepend(&compat_path, "iucv-%s", sysname);
+ parent = skip_subsystem(parent, "iucv");
+ supported_transport = true;
+ supported_parent = true;
+ } else if (streq(subsys, "nvme")) {
+ const char *nsid;
+
+ if (sd_device_get_sysattr_value(dev, "nsid", &nsid) >= 0) {
+ path_prepend(&path, "nvme-%s", nsid);
+ if (compat_path)
+ path_prepend(&compat_path, "nvme-%s", nsid);
+ parent = skip_subsystem(parent, "nvme");
+ supported_parent = true;
+ supported_transport = true;
+ }
+ } else if (streq(subsys, "spi")) {
+ const char *sysnum;
+
+ if (sd_device_get_sysnum(parent, &sysnum) >= 0 && sysnum) {
+ path_prepend(&path, "cs-%s", sysnum);
+ parent = skip_subsystem(parent, "spi");
+ }
+ }
+
+ if (!parent)
+ break;
+ if (sd_device_get_parent(parent, &parent) < 0)
+ break;
+ }
+
+ if (!path)
+ return -ENOENT;
+
+ /*
+ * Do not return devices with an unknown parent device type. They
+ * might produce conflicting IDs if the parent does not provide a
+ * unique and predictable name.
+ */
+ if (!supported_parent)
+ return -ENOENT;
+
+ /*
+ * Do not return block devices without a well-known transport. Some
+ * devices do not expose their buses and do not provide a unique
+ * and predictable name that way.
+ */
+ if (sd_device_get_subsystem(dev, &subsystem) >= 0 &&
+ streq(subsystem, "block") &&
+ !supported_transport)
+ return -ENOENT;
+
+ {
+ char tag[UTIL_NAME_SIZE];
+ size_t i;
+ const char *p;
+
+ /* compose valid udev tag name */
+ for (p = path, i = 0; *p; p++) {
+ if ((*p >= '0' && *p <= '9') ||
+ (*p >= 'A' && *p <= 'Z') ||
+ (*p >= 'a' && *p <= 'z') ||
+ *p == '-') {
+ tag[i++] = *p;
+ continue;
+ }
+
+ /* skip all leading '_' */
+ if (i == 0)
+ continue;
+
+ /* avoid second '_' */
+ if (tag[i-1] == '_')
+ continue;
+
+ tag[i++] = '_';
+ }
+ /* strip trailing '_' */
+ while (i > 0 && tag[i-1] == '_')
+ i--;
+ tag[i] = '\0';
+
+ udev_builtin_add_property(dev, test, "ID_PATH", path);
+ udev_builtin_add_property(dev, test, "ID_PATH_TAG", tag);
+ }
+
+ /*
+ * Compatible link generation for ATA devices
+ * we assign compat_link to the env variable
+ * ID_PATH_ATA_COMPAT
+ */
+ if (compat_path)
+ udev_builtin_add_property(dev, test, "ID_PATH_ATA_COMPAT", compat_path);
+
+ return 0;
+}
+
+const UdevBuiltin udev_builtin_path_id = {
+ .name = "path_id",
+ .cmd = builtin_path_id,
+ .help = "Compose persistent device path",
+ .run_once = true,
+};
diff --git a/src/udev/udev-builtin-uaccess.c b/src/udev/udev-builtin-uaccess.c
new file mode 100644
index 0000000..cfdf130
--- /dev/null
+++ b/src/udev/udev-builtin-uaccess.c
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * manage device node user ACL
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+
+#include "sd-login.h"
+
+#include "device-util.h"
+#include "login-util.h"
+#include "logind-acl.h"
+#include "log.h"
+#include "udev-builtin.h"
+
+static int builtin_uaccess(sd_device *dev, int argc, char *argv[], bool test) {
+ const char *path = NULL, *seat;
+ bool changed_acl = false;
+ uid_t uid;
+ int r;
+
+ umask(0022);
+
+ /* don't muck around with ACLs when the system is not running systemd */
+ if (!logind_running())
+ return 0;
+
+ r = sd_device_get_devname(dev, &path);
+ if (r < 0) {
+ log_device_error_errno(dev, r, "Failed to get device name: %m");
+ goto finish;
+ }
+
+ if (sd_device_get_property_value(dev, "ID_SEAT", &seat) < 0)
+ seat = "seat0";
+
+ r = sd_seat_get_active(seat, NULL, &uid);
+ if (r < 0) {
+ if (IN_SET(r, -ENXIO, -ENODATA))
+ /* No active session on this seat */
+ r = 0;
+ else
+ log_device_error_errno(dev, r, "Failed to determine active user on seat %s: %m", seat);
+
+ goto finish;
+ }
+
+ r = devnode_acl(path, true, false, 0, true, uid);
+ if (r < 0) {
+ log_device_full_errno(dev, r == -ENOENT ? LOG_DEBUG : LOG_ERR, r, "Failed to apply ACL: %m");
+ goto finish;
+ }
+
+ changed_acl = true;
+ r = 0;
+
+finish:
+ if (path && !changed_acl) {
+ int k;
+
+ /* Better be safe than sorry and reset ACL */
+ k = devnode_acl(path, true, false, 0, false, 0);
+ if (k < 0) {
+ log_device_full_errno(dev, k == -ENOENT ? LOG_DEBUG : LOG_ERR, k, "Failed to apply ACL: %m");
+ if (r >= 0)
+ r = k;
+ }
+ }
+
+ return r;
+}
+
+const UdevBuiltin udev_builtin_uaccess = {
+ .name = "uaccess",
+ .cmd = builtin_uaccess,
+ .help = "Manage device node user ACL",
+};
diff --git a/src/udev/udev-builtin-usb_id.c b/src/udev/udev-builtin-usb_id.c
new file mode 100644
index 0000000..fa554e7
--- /dev/null
+++ b/src/udev/udev-builtin-usb_id.c
@@ -0,0 +1,462 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * USB device properties and persistent device path
+ *
+ * Copyright (c) 2005 SUSE Linux Products GmbH, Germany
+ * Author: Hannes Reinecke <hare@suse.de>
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "device-util.h"
+#include "fd-util.h"
+#include "libudev-util.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "udev-builtin.h"
+
+static void set_usb_iftype(char *to, int if_class_num, size_t len) {
+ const char *type = "generic";
+
+ switch (if_class_num) {
+ case 1:
+ type = "audio";
+ break;
+ case 2: /* CDC-Control */
+ break;
+ case 3:
+ type = "hid";
+ break;
+ case 5: /* Physical */
+ break;
+ case 6:
+ type = "media";
+ break;
+ case 7:
+ type = "printer";
+ break;
+ case 8:
+ type = "storage";
+ break;
+ case 9:
+ type = "hub";
+ break;
+ case 0x0a: /* CDC-Data */
+ break;
+ case 0x0b: /* Chip/Smart Card */
+ break;
+ case 0x0d: /* Content Security */
+ break;
+ case 0x0e:
+ type = "video";
+ break;
+ case 0xdc: /* Diagnostic Device */
+ break;
+ case 0xe0: /* Wireless Controller */
+ break;
+ case 0xfe: /* Application-specific */
+ break;
+ case 0xff: /* Vendor-specific */
+ break;
+ default:
+ break;
+ }
+ strncpy(to, type, len);
+ to[len-1] = '\0';
+}
+
+static int set_usb_mass_storage_ifsubtype(char *to, const char *from, size_t len) {
+ int type_num = 0;
+ char *eptr;
+ const char *type = "generic";
+
+ type_num = strtoul(from, &eptr, 0);
+ if (eptr != from) {
+ switch (type_num) {
+ case 1: /* RBC devices */
+ type = "rbc";
+ break;
+ case 2:
+ type = "atapi";
+ break;
+ case 3:
+ type = "tape";
+ break;
+ case 4: /* UFI */
+ type = "floppy";
+ break;
+ case 6: /* Transparent SPC-2 devices */
+ type = "scsi";
+ break;
+ default:
+ break;
+ }
+ }
+ strscpy(to, len, type);
+ return type_num;
+}
+
+static void set_scsi_type(char *to, const char *from, size_t len) {
+ int type_num;
+ char *eptr;
+ const char *type = "generic";
+
+ type_num = strtoul(from, &eptr, 0);
+ if (eptr != from) {
+ switch (type_num) {
+ case 0:
+ case 0xe:
+ type = "disk";
+ break;
+ case 1:
+ type = "tape";
+ break;
+ case 4:
+ case 7:
+ case 0xf:
+ type = "optical";
+ break;
+ case 5:
+ type = "cd";
+ break;
+ default:
+ break;
+ }
+ }
+ strscpy(to, len, type);
+}
+
+#define USB_DT_DEVICE 0x01
+#define USB_DT_INTERFACE 0x04
+
+static int dev_if_packed_info(sd_device *dev, char *ifs_str, size_t len) {
+ _cleanup_close_ int fd = -1;
+ ssize_t size;
+ unsigned char buf[18 + 65535];
+ size_t pos = 0;
+ unsigned strpos = 0;
+ const char *filename, *syspath;
+ int r;
+ struct usb_interface_descriptor {
+ uint8_t bLength;
+ uint8_t bDescriptorType;
+ uint8_t bInterfaceNumber;
+ uint8_t bAlternateSetting;
+ uint8_t bNumEndpoints;
+ uint8_t bInterfaceClass;
+ uint8_t bInterfaceSubClass;
+ uint8_t bInterfaceProtocol;
+ uint8_t iInterface;
+ } _packed_;
+
+ r = sd_device_get_syspath(dev, &syspath);
+ if (r < 0)
+ return r;
+
+ filename = strjoina(syspath, "/descriptors");
+ fd = open(filename, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return log_device_debug_errno(dev, errno, "Failed to open \"%s\": %m", filename);
+
+ size = read(fd, buf, sizeof(buf));
+ if (size < 18)
+ return log_device_warning_errno(dev, SYNTHETIC_ERRNO(EIO),
+ "Short read from \"%s\"", filename);
+ assert((size_t) size <= sizeof buf);
+
+ ifs_str[0] = '\0';
+ while (pos + sizeof(struct usb_interface_descriptor) < (size_t) size &&
+ strpos + 7 < len - 2) {
+
+ struct usb_interface_descriptor *desc;
+ char if_str[8];
+
+ desc = (struct usb_interface_descriptor *) (buf + pos);
+ if (desc->bLength < 3)
+ break;
+ if (desc->bLength > size - sizeof(struct usb_interface_descriptor))
+ return log_device_debug_errno(dev, SYNTHETIC_ERRNO(EIO),
+ "Corrupt data read from \"%s\"", filename);
+ pos += desc->bLength;
+
+ if (desc->bDescriptorType != USB_DT_INTERFACE)
+ continue;
+
+ if (snprintf(if_str, 8, ":%02x%02x%02x",
+ desc->bInterfaceClass,
+ desc->bInterfaceSubClass,
+ desc->bInterfaceProtocol) != 7)
+ continue;
+
+ if (strstr(ifs_str, if_str))
+ continue;
+
+ memcpy(&ifs_str[strpos], if_str, 8),
+ strpos += 7;
+ }
+
+ if (strpos > 0) {
+ ifs_str[strpos++] = ':';
+ ifs_str[strpos++] = '\0';
+ }
+
+ return 0;
+}
+
+/*
+ * A unique USB identification is generated like this:
+ *
+ * 1.) Get the USB device type from InterfaceClass and InterfaceSubClass
+ * 2.) If the device type is 'Mass-Storage/SPC-2' or 'Mass-Storage/RBC',
+ * use the SCSI vendor and model as USB-Vendor and USB-model.
+ * 3.) Otherwise, use the USB manufacturer and product as
+ * USB-Vendor and USB-model. Any non-printable characters
+ * in those strings will be skipped; a slash '/' will be converted
+ * into a full stop '.'.
+ * 4.) If that fails, too, we will use idVendor and idProduct
+ * as USB-Vendor and USB-model.
+ * 5.) The USB identification is the USB-vendor and USB-model
+ * string concatenated with an underscore '_'.
+ * 6.) If the device supplies a serial number, this number
+ * is concatenated with the identification with an underscore '_'.
+ */
+static int builtin_usb_id(sd_device *dev, int argc, char *argv[], bool test) {
+ char vendor_str[64] = "";
+ char vendor_str_enc[256];
+ const char *vendor_id;
+ char model_str[64] = "";
+ char model_str_enc[256];
+ const char *product_id;
+ char serial_str[UTIL_NAME_SIZE] = "";
+ char packed_if_str[UTIL_NAME_SIZE] = "";
+ char revision_str[64] = "";
+ char type_str[64] = "";
+ char instance_str[64] = "";
+ const char *ifnum = NULL;
+ const char *driver = NULL;
+ char serial[256];
+
+ sd_device *dev_interface, *dev_usb;
+ const char *if_class, *if_subclass;
+ int if_class_num;
+ int protocol = 0;
+ size_t l;
+ char *s;
+
+ const char *syspath, *sysname, *devtype, *interface_syspath;
+ int r;
+
+ assert(dev);
+
+ r = sd_device_get_syspath(dev, &syspath);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_sysname(dev, &sysname);
+ if (r < 0)
+ return r;
+
+ /* shortcut, if we are called directly for a "usb_device" type */
+ if (sd_device_get_devtype(dev, &devtype) >= 0 && streq(devtype, "usb_device")) {
+ dev_if_packed_info(dev, packed_if_str, sizeof(packed_if_str));
+ dev_usb = dev;
+ goto fallback;
+ }
+
+ /* usb interface directory */
+ r = sd_device_get_parent_with_subsystem_devtype(dev, "usb", "usb_interface", &dev_interface);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to access usb_interface: %m");
+
+ r = sd_device_get_syspath(dev_interface, &interface_syspath);
+ if (r < 0)
+ return log_device_debug_errno(dev_interface, r, "Failed to get syspath: %m");
+ (void) sd_device_get_sysattr_value(dev_interface, "bInterfaceNumber", &ifnum);
+ (void) sd_device_get_sysattr_value(dev_interface, "driver", &driver);
+
+ r = sd_device_get_sysattr_value(dev_interface, "bInterfaceClass", &if_class);
+ if (r < 0)
+ return log_device_debug_errno(dev_interface, r, "Failed to get bInterfaceClass attribute: %m");
+
+ if_class_num = strtoul(if_class, NULL, 16);
+ if (if_class_num == 8) {
+ /* mass storage */
+ if (sd_device_get_sysattr_value(dev_interface, "bInterfaceSubClass", &if_subclass) >= 0)
+ protocol = set_usb_mass_storage_ifsubtype(type_str, if_subclass, sizeof(type_str)-1);
+ } else
+ set_usb_iftype(type_str, if_class_num, sizeof(type_str)-1);
+
+ log_device_debug(dev_interface, "if_class:%d protocol:%d", if_class_num, protocol);
+
+ /* usb device directory */
+ r = sd_device_get_parent_with_subsystem_devtype(dev_interface, "usb", "usb_device", &dev_usb);
+ if (r < 0)
+ return log_device_debug_errno(dev_interface, r, "Failed to find parent 'usb' device");
+
+ /* all interfaces of the device in a single string */
+ dev_if_packed_info(dev_usb, packed_if_str, sizeof(packed_if_str));
+
+ /* mass storage : SCSI or ATAPI */
+ if (IN_SET(protocol, 6, 2)) {
+ sd_device *dev_scsi;
+ const char *scsi_sysname, *scsi_model, *scsi_vendor, *scsi_type, *scsi_rev;
+ int host, bus, target, lun;
+
+ /* get scsi device */
+ r = sd_device_get_parent_with_subsystem_devtype(dev, "scsi", "scsi_device", &dev_scsi);
+ if (r < 0) {
+ log_device_debug_errno(dev, r, "Unable to find parent SCSI device");
+ goto fallback;
+ }
+ if (sd_device_get_sysname(dev_scsi, &scsi_sysname) < 0)
+ goto fallback;
+ if (sscanf(scsi_sysname, "%d:%d:%d:%d", &host, &bus, &target, &lun) != 4) {
+ log_device_debug(dev_scsi, "Invalid SCSI device");
+ goto fallback;
+ }
+
+ /* Generic SPC-2 device */
+ r = sd_device_get_sysattr_value(dev_scsi, "vendor", &scsi_vendor);
+ if (r < 0) {
+ log_device_debug_errno(dev_scsi, r, "Failed to get SCSI vendor attribute: %m");
+ goto fallback;
+ }
+ udev_util_encode_string(scsi_vendor, vendor_str_enc, sizeof(vendor_str_enc));
+ util_replace_whitespace(scsi_vendor, vendor_str, sizeof(vendor_str)-1);
+ util_replace_chars(vendor_str, NULL);
+
+ r = sd_device_get_sysattr_value(dev_scsi, "model", &scsi_model);
+ if (r < 0) {
+ log_device_debug_errno(dev_scsi, r, "Failed to get SCSI model attribute: %m");
+ goto fallback;
+ }
+ udev_util_encode_string(scsi_model, model_str_enc, sizeof(model_str_enc));
+ util_replace_whitespace(scsi_model, model_str, sizeof(model_str)-1);
+ util_replace_chars(model_str, NULL);
+
+ r = sd_device_get_sysattr_value(dev_scsi, "type", &scsi_type);
+ if (r < 0) {
+ log_device_debug_errno(dev_scsi, r, "Failed to get SCSI type attribute: %m");
+ goto fallback;
+ }
+ set_scsi_type(type_str, scsi_type, sizeof(type_str)-1);
+
+ r = sd_device_get_sysattr_value(dev_scsi, "rev", &scsi_rev);
+ if (r < 0) {
+ log_device_debug_errno(dev_scsi, r, "Failed to get SCSI revision attribute: %m");
+ goto fallback;
+ }
+ util_replace_whitespace(scsi_rev, revision_str, sizeof(revision_str)-1);
+ util_replace_chars(revision_str, NULL);
+
+ /*
+ * some broken devices have the same identifiers
+ * for all luns, export the target:lun number
+ */
+ sprintf(instance_str, "%d:%d", target, lun);
+ }
+
+fallback:
+ r = sd_device_get_sysattr_value(dev_usb, "idVendor", &vendor_id);
+ if (r < 0)
+ return log_device_debug_errno(dev_usb, r, "Failed to get idVendor attribute: %m");
+
+ r = sd_device_get_sysattr_value(dev_usb, "idProduct", &product_id);
+ if (r < 0)
+ return log_device_debug_errno(dev_usb, r, "Failed to get idProduct attribute: %m");
+
+ /* fall back to USB vendor & device */
+ if (vendor_str[0] == '\0') {
+ const char *usb_vendor;
+
+ if (sd_device_get_sysattr_value(dev_usb, "manufacturer", &usb_vendor) < 0)
+ usb_vendor = vendor_id;
+ udev_util_encode_string(usb_vendor, vendor_str_enc, sizeof(vendor_str_enc));
+ util_replace_whitespace(usb_vendor, vendor_str, sizeof(vendor_str)-1);
+ util_replace_chars(vendor_str, NULL);
+ }
+
+ if (model_str[0] == '\0') {
+ const char *usb_model;
+
+ if (sd_device_get_sysattr_value(dev_usb, "product", &usb_model) < 0)
+ usb_model = product_id;
+ udev_util_encode_string(usb_model, model_str_enc, sizeof(model_str_enc));
+ util_replace_whitespace(usb_model, model_str, sizeof(model_str)-1);
+ util_replace_chars(model_str, NULL);
+ }
+
+ if (revision_str[0] == '\0') {
+ const char *usb_rev;
+
+ if (sd_device_get_sysattr_value(dev_usb, "bcdDevice", &usb_rev) >= 0) {
+ util_replace_whitespace(usb_rev, revision_str, sizeof(revision_str)-1);
+ util_replace_chars(revision_str, NULL);
+ }
+ }
+
+ if (serial_str[0] == '\0') {
+ const char *usb_serial;
+
+ if (sd_device_get_sysattr_value(dev_usb, "serial", &usb_serial) >= 0) {
+ const unsigned char *p;
+
+ /* http://msdn.microsoft.com/en-us/library/windows/hardware/gg487321.aspx */
+ for (p = (unsigned char *) usb_serial; *p != '\0'; p++)
+ if (*p < 0x20 || *p > 0x7f || *p == ',') {
+ usb_serial = NULL;
+ break;
+ }
+
+ if (usb_serial) {
+ util_replace_whitespace(usb_serial, serial_str, sizeof(serial_str)-1);
+ util_replace_chars(serial_str, NULL);
+ }
+ }
+ }
+
+ s = serial;
+ l = strpcpyl(&s, sizeof(serial), vendor_str, "_", model_str, NULL);
+ if (!isempty(serial_str))
+ l = strpcpyl(&s, l, "_", serial_str, NULL);
+
+ if (!isempty(instance_str))
+ strpcpyl(&s, l, "-", instance_str, NULL);
+
+ udev_builtin_add_property(dev, test, "ID_VENDOR", vendor_str);
+ udev_builtin_add_property(dev, test, "ID_VENDOR_ENC", vendor_str_enc);
+ udev_builtin_add_property(dev, test, "ID_VENDOR_ID", vendor_id);
+ udev_builtin_add_property(dev, test, "ID_MODEL", model_str);
+ udev_builtin_add_property(dev, test, "ID_MODEL_ENC", model_str_enc);
+ udev_builtin_add_property(dev, test, "ID_MODEL_ID", product_id);
+ udev_builtin_add_property(dev, test, "ID_REVISION", revision_str);
+ udev_builtin_add_property(dev, test, "ID_SERIAL", serial);
+ if (!isempty(serial_str))
+ udev_builtin_add_property(dev, test, "ID_SERIAL_SHORT", serial_str);
+ if (!isempty(type_str))
+ udev_builtin_add_property(dev, test, "ID_TYPE", type_str);
+ if (!isempty(instance_str))
+ udev_builtin_add_property(dev, test, "ID_INSTANCE", instance_str);
+ udev_builtin_add_property(dev, test, "ID_BUS", "usb");
+ if (!isempty(packed_if_str))
+ udev_builtin_add_property(dev, test, "ID_USB_INTERFACES", packed_if_str);
+ if (ifnum)
+ udev_builtin_add_property(dev, test, "ID_USB_INTERFACE_NUM", ifnum);
+ if (driver)
+ udev_builtin_add_property(dev, test, "ID_USB_DRIVER", driver);
+ return 0;
+}
+
+const UdevBuiltin udev_builtin_usb_id = {
+ .name = "usb_id",
+ .cmd = builtin_usb_id,
+ .help = "USB device properties",
+ .run_once = true,
+};
diff --git a/src/udev/udev-builtin.c b/src/udev/udev-builtin.c
new file mode 100644
index 0000000..80d1766
--- /dev/null
+++ b/src/udev/udev-builtin.c
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <stdio.h>
+
+#include "device-private.h"
+#include "device-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "udev-builtin.h"
+
+static bool initialized;
+
+static const UdevBuiltin *const builtins[_UDEV_BUILTIN_MAX] = {
+#if HAVE_BLKID
+ [UDEV_BUILTIN_BLKID] = &udev_builtin_blkid,
+#endif
+ [UDEV_BUILTIN_BTRFS] = &udev_builtin_btrfs,
+ [UDEV_BUILTIN_HWDB] = &udev_builtin_hwdb,
+ [UDEV_BUILTIN_INPUT_ID] = &udev_builtin_input_id,
+ [UDEV_BUILTIN_KEYBOARD] = &udev_builtin_keyboard,
+#if HAVE_KMOD
+ [UDEV_BUILTIN_KMOD] = &udev_builtin_kmod,
+#endif
+ [UDEV_BUILTIN_NET_ID] = &udev_builtin_net_id,
+ [UDEV_BUILTIN_NET_LINK] = &udev_builtin_net_setup_link,
+ [UDEV_BUILTIN_PATH_ID] = &udev_builtin_path_id,
+ [UDEV_BUILTIN_USB_ID] = &udev_builtin_usb_id,
+#if HAVE_ACL
+ [UDEV_BUILTIN_UACCESS] = &udev_builtin_uaccess,
+#endif
+};
+
+void udev_builtin_init(void) {
+ unsigned i;
+
+ if (initialized)
+ return;
+
+ for (i = 0; i < _UDEV_BUILTIN_MAX; i++)
+ if (builtins[i] && builtins[i]->init)
+ builtins[i]->init();
+
+ initialized = true;
+}
+
+void udev_builtin_exit(void) {
+ unsigned i;
+
+ if (!initialized)
+ return;
+
+ for (i = 0; i < _UDEV_BUILTIN_MAX; i++)
+ if (builtins[i] && builtins[i]->exit)
+ builtins[i]->exit();
+
+ initialized = false;
+}
+
+bool udev_builtin_validate(void) {
+ unsigned i;
+
+ for (i = 0; i < _UDEV_BUILTIN_MAX; i++)
+ if (builtins[i] && builtins[i]->validate && builtins[i]->validate())
+ return true;
+ return false;
+}
+
+void udev_builtin_list(void) {
+ unsigned i;
+
+ for (i = 0; i < _UDEV_BUILTIN_MAX; i++)
+ if (builtins[i])
+ fprintf(stderr, " %-14s %s\n", builtins[i]->name, builtins[i]->help);
+}
+
+const char *udev_builtin_name(UdevBuiltinCommand cmd) {
+ assert(cmd >= 0 && cmd < _UDEV_BUILTIN_MAX);
+
+ if (!builtins[cmd])
+ return NULL;
+
+ return builtins[cmd]->name;
+}
+
+bool udev_builtin_run_once(UdevBuiltinCommand cmd) {
+ assert(cmd >= 0 && cmd < _UDEV_BUILTIN_MAX);
+
+ if (!builtins[cmd])
+ return false;
+
+ return builtins[cmd]->run_once;
+}
+
+UdevBuiltinCommand udev_builtin_lookup(const char *command) {
+ UdevBuiltinCommand i;
+ size_t n;
+
+ assert(command);
+
+ command += strspn(command, WHITESPACE);
+ n = strcspn(command, WHITESPACE);
+ for (i = 0; i < _UDEV_BUILTIN_MAX; i++)
+ if (builtins[i] && strneq(builtins[i]->name, command, n))
+ return i;
+
+ return _UDEV_BUILTIN_INVALID;
+}
+
+int udev_builtin_run(sd_device *dev, UdevBuiltinCommand cmd, const char *command, bool test) {
+ _cleanup_strv_free_ char **argv = NULL;
+ int r;
+
+ assert(dev);
+ assert(cmd >= 0 && cmd < _UDEV_BUILTIN_MAX);
+ assert(command);
+
+ if (!builtins[cmd])
+ return -EOPNOTSUPP;
+
+ r = strv_split_full(&argv, command, NULL, EXTRACT_UNQUOTE | EXTRACT_RELAX | EXTRACT_RETAIN_ESCAPE);
+ if (r < 0)
+ return r;
+
+ /* we need '0' here to reset the internal state */
+ optind = 0;
+ return builtins[cmd]->cmd(dev, strv_length(argv), argv, test);
+}
+
+int udev_builtin_add_property(sd_device *dev, bool test, const char *key, const char *val) {
+ int r;
+
+ assert(dev);
+ assert(key);
+
+ r = device_add_property(dev, key, val);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to add property '%s%s%s'",
+ key, val ? "=" : "", strempty(val));
+
+ if (test)
+ printf("%s=%s\n", key, strempty(val));
+
+ return 0;
+}
diff --git a/src/udev/udev-builtin.h b/src/udev/udev-builtin.h
new file mode 100644
index 0000000..14d6406
--- /dev/null
+++ b/src/udev/udev-builtin.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-device.h"
+
+typedef enum {
+#if HAVE_BLKID
+ UDEV_BUILTIN_BLKID,
+#endif
+ UDEV_BUILTIN_BTRFS,
+ UDEV_BUILTIN_HWDB,
+ UDEV_BUILTIN_INPUT_ID,
+ UDEV_BUILTIN_KEYBOARD,
+#if HAVE_KMOD
+ UDEV_BUILTIN_KMOD,
+#endif
+ UDEV_BUILTIN_NET_ID,
+ UDEV_BUILTIN_NET_LINK,
+ UDEV_BUILTIN_PATH_ID,
+ UDEV_BUILTIN_USB_ID,
+#if HAVE_ACL
+ UDEV_BUILTIN_UACCESS,
+#endif
+ _UDEV_BUILTIN_MAX,
+ _UDEV_BUILTIN_INVALID = -1,
+} UdevBuiltinCommand;
+
+typedef struct UdevBuiltin {
+ const char *name;
+ int (*cmd)(sd_device *dev, int argc, char *argv[], bool test);
+ const char *help;
+ int (*init)(void);
+ void (*exit)(void);
+ bool (*validate)(void);
+ bool run_once;
+} UdevBuiltin;
+
+#define PTR_TO_UDEV_BUILTIN_CMD(p) ((UdevBuiltinCommand) ((intptr_t) (p)-1))
+#define UDEV_BUILTIN_CMD_TO_PTR(u) ((void *) ((intptr_t) (u)+1))
+
+#if HAVE_BLKID
+extern const UdevBuiltin udev_builtin_blkid;
+#endif
+extern const UdevBuiltin udev_builtin_btrfs;
+extern const UdevBuiltin udev_builtin_hwdb;
+extern const UdevBuiltin udev_builtin_input_id;
+extern const UdevBuiltin udev_builtin_keyboard;
+#if HAVE_KMOD
+extern const UdevBuiltin udev_builtin_kmod;
+#endif
+extern const UdevBuiltin udev_builtin_net_id;
+extern const UdevBuiltin udev_builtin_net_setup_link;
+extern const UdevBuiltin udev_builtin_path_id;
+extern const UdevBuiltin udev_builtin_usb_id;
+#if HAVE_ACL
+extern const UdevBuiltin udev_builtin_uaccess;
+#endif
+
+void udev_builtin_init(void);
+void udev_builtin_exit(void);
+UdevBuiltinCommand udev_builtin_lookup(const char *command);
+const char *udev_builtin_name(UdevBuiltinCommand cmd);
+bool udev_builtin_run_once(UdevBuiltinCommand cmd);
+int udev_builtin_run(sd_device *dev, UdevBuiltinCommand cmd, const char *command, bool test);
+void udev_builtin_list(void);
+bool udev_builtin_validate(void);
+int udev_builtin_add_property(sd_device *dev, bool test, const char *key, const char *val);
+int udev_builtin_hwdb_lookup(sd_device *dev, const char *prefix, const char *modalias,
+ const char *filter, bool test);
diff --git a/src/udev/udev-ctrl.c b/src/udev/udev-ctrl.c
new file mode 100644
index 0000000..48355aa
--- /dev/null
+++ b/src/udev/udev-ctrl.c
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ *
+ * libudev - interface to udev device information
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ */
+
+#include <errno.h>
+#include <poll.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "io-util.h"
+#include "socket-util.h"
+#include "strxcpyx.h"
+#include "udev-ctrl.h"
+#include "util.h"
+
+/* wire protocol magic must match */
+#define UDEV_CTRL_MAGIC 0xdead1dea
+
+struct udev_ctrl_msg_wire {
+ char version[16];
+ unsigned magic;
+ enum udev_ctrl_msg_type type;
+ union udev_ctrl_msg_value value;
+};
+
+struct udev_ctrl {
+ unsigned n_ref;
+ int sock;
+ int sock_connect;
+ union sockaddr_union saddr;
+ socklen_t addrlen;
+ bool bound:1;
+ bool cleanup_socket:1;
+ bool connected:1;
+ bool maybe_disconnected:1;
+ sd_event *event;
+ sd_event_source *event_source;
+ sd_event_source *event_source_connect;
+ udev_ctrl_handler_t callback;
+ void *userdata;
+};
+
+int udev_ctrl_new_from_fd(struct udev_ctrl **ret, int fd) {
+ _cleanup_close_ int sock = -1;
+ struct udev_ctrl *uctrl;
+
+ assert(ret);
+
+ if (fd < 0) {
+ sock = socket(AF_LOCAL, SOCK_SEQPACKET|SOCK_NONBLOCK|SOCK_CLOEXEC, 0);
+ if (sock < 0)
+ return log_error_errno(errno, "Failed to create socket: %m");
+ }
+
+ uctrl = new(struct udev_ctrl, 1);
+ if (!uctrl)
+ return -ENOMEM;
+
+ *uctrl = (struct udev_ctrl) {
+ .n_ref = 1,
+ .sock = fd >= 0 ? fd : TAKE_FD(sock),
+ .sock_connect = -1,
+ .bound = fd >= 0,
+ };
+
+ uctrl->saddr.un = (struct sockaddr_un) {
+ .sun_family = AF_UNIX,
+ .sun_path = "/run/udev/control",
+ };
+
+ uctrl->addrlen = SOCKADDR_UN_LEN(uctrl->saddr.un);
+
+ *ret = TAKE_PTR(uctrl);
+ return 0;
+}
+
+int udev_ctrl_enable_receiving(struct udev_ctrl *uctrl) {
+ int r;
+
+ assert(uctrl);
+
+ if (uctrl->bound)
+ return 0;
+
+ r = bind(uctrl->sock, &uctrl->saddr.sa, uctrl->addrlen);
+ if (r < 0 && errno == EADDRINUSE) {
+ (void) sockaddr_un_unlink(&uctrl->saddr.un);
+ r = bind(uctrl->sock, &uctrl->saddr.sa, uctrl->addrlen);
+ }
+
+ if (r < 0)
+ return log_error_errno(errno, "Failed to bind udev control socket: %m");
+
+ if (listen(uctrl->sock, 0) < 0)
+ return log_error_errno(errno, "Failed to listen udev control socket: %m");
+
+ uctrl->bound = true;
+ uctrl->cleanup_socket = true;
+
+ return 0;
+}
+
+static void udev_ctrl_disconnect(struct udev_ctrl *uctrl) {
+ if (!uctrl)
+ return;
+
+ uctrl->event_source_connect = sd_event_source_unref(uctrl->event_source_connect);
+ uctrl->sock_connect = safe_close(uctrl->sock_connect);
+}
+
+static struct udev_ctrl *udev_ctrl_free(struct udev_ctrl *uctrl) {
+ assert(uctrl);
+
+ udev_ctrl_disconnect(uctrl);
+
+ sd_event_source_unref(uctrl->event_source);
+ safe_close(uctrl->sock);
+
+ sd_event_unref(uctrl->event);
+ return mfree(uctrl);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(struct udev_ctrl, udev_ctrl, udev_ctrl_free);
+
+int udev_ctrl_cleanup(struct udev_ctrl *uctrl) {
+ if (!uctrl)
+ return 0;
+ if (uctrl->cleanup_socket)
+ sockaddr_un_unlink(&uctrl->saddr.un);
+ return 0;
+}
+
+int udev_ctrl_attach_event(struct udev_ctrl *uctrl, sd_event *event) {
+ int r;
+
+ assert_return(uctrl, -EINVAL);
+ assert_return(!uctrl->event, -EBUSY);
+
+ if (event)
+ uctrl->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&uctrl->event);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+sd_event_source *udev_ctrl_get_event_source(struct udev_ctrl *uctrl) {
+ assert(uctrl);
+
+ return uctrl->event_source;
+}
+
+static void udev_ctrl_disconnect_and_listen_again(struct udev_ctrl *uctrl) {
+ udev_ctrl_disconnect(uctrl);
+ udev_ctrl_unref(uctrl);
+ (void) sd_event_source_set_enabled(uctrl->event_source, SD_EVENT_ON);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct udev_ctrl *, udev_ctrl_disconnect_and_listen_again);
+
+static int udev_ctrl_connection_event_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(udev_ctrl_disconnect_and_listen_againp) struct udev_ctrl *uctrl = NULL;
+ struct udev_ctrl_msg_wire msg_wire;
+ struct iovec iov = IOVEC_MAKE(&msg_wire, sizeof(struct udev_ctrl_msg_wire));
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control;
+ struct msghdr smsg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ struct cmsghdr *cmsg;
+ struct ucred *cred;
+ ssize_t size;
+
+ assert(userdata);
+
+ /* When UDEV_CTRL_EXIT is received, manager unref udev_ctrl object.
+ * To avoid the object freed, let's increment the refcount. */
+ uctrl = udev_ctrl_ref(userdata);
+
+ size = next_datagram_size_fd(fd);
+ if (size < 0)
+ return log_error_errno(size, "Failed to get size of message: %m");
+ if (size == 0)
+ return 0; /* Client disconnects? */
+
+ size = recvmsg_safe(fd, &smsg, 0);
+ if (size == -EINTR)
+ return 0;
+ if (size < 0)
+ return log_error_errno(size, "Failed to receive ctrl message: %m");
+
+ cmsg_close_all(&smsg);
+
+ cmsg = CMSG_FIRSTHDR(&smsg);
+
+ if (!cmsg || cmsg->cmsg_type != SCM_CREDENTIALS) {
+ log_error("No sender credentials received, ignoring message");
+ return 0;
+ }
+
+ cred = (struct ucred *) CMSG_DATA(cmsg);
+
+ if (cred->uid != 0) {
+ log_error("Invalid sender uid "UID_FMT", ignoring message", cred->uid);
+ return 0;
+ }
+
+ if (msg_wire.magic != UDEV_CTRL_MAGIC) {
+ log_error("Message magic 0x%08x doesn't match, ignoring message", msg_wire.magic);
+ return 0;
+ }
+
+ if (msg_wire.type == _UDEV_CTRL_END_MESSAGES)
+ return 0;
+
+ if (uctrl->callback)
+ (void) uctrl->callback(uctrl, msg_wire.type, &msg_wire.value, uctrl->userdata);
+
+ /* Do not disconnect and wait for next message. */
+ uctrl = udev_ctrl_unref(uctrl);
+ return 0;
+}
+
+static int udev_ctrl_event_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ struct udev_ctrl *uctrl = userdata;
+ _cleanup_close_ int sock = -1;
+ struct ucred ucred;
+ int r;
+
+ assert(uctrl);
+
+ sock = accept4(fd, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK);
+ if (sock < 0) {
+ if (ERRNO_IS_ACCEPT_AGAIN(errno))
+ return 0;
+
+ return log_error_errno(errno, "Failed to accept ctrl connection: %m");
+ }
+
+ /* check peer credential of connection */
+ r = getpeercred(sock, &ucred);
+ if (r < 0) {
+ log_error_errno(r, "Failed to receive credentials of ctrl connection: %m");
+ return 0;
+ }
+
+ if (ucred.uid > 0) {
+ log_error("Invalid sender uid "UID_FMT", closing connection", ucred.uid);
+ return 0;
+ }
+
+ /* enable receiving of the sender credentials in the messages */
+ r = setsockopt_int(sock, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set SO_PASSCRED, ignoring: %m");
+
+ r = sd_event_add_io(uctrl->event, &uctrl->event_source_connect, sock, EPOLLIN, udev_ctrl_connection_event_handler, uctrl);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create event source for udev control connection: %m");
+ return 0;
+ }
+
+ (void) sd_event_source_set_description(uctrl->event_source_connect, "udev-ctrl-connection");
+
+ /* Do not accept multiple connection. */
+ (void) sd_event_source_set_enabled(uctrl->event_source, SD_EVENT_OFF);
+
+ uctrl->sock_connect = TAKE_FD(sock);
+ return 0;
+}
+
+int udev_ctrl_start(struct udev_ctrl *uctrl, udev_ctrl_handler_t callback, void *userdata) {
+ int r;
+
+ assert(uctrl);
+
+ if (!uctrl->event) {
+ r = udev_ctrl_attach_event(uctrl, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ r = udev_ctrl_enable_receiving(uctrl);
+ if (r < 0)
+ return r;
+
+ uctrl->callback = callback;
+ uctrl->userdata = userdata;
+
+ r = sd_event_add_io(uctrl->event, &uctrl->event_source, uctrl->sock, EPOLLIN, udev_ctrl_event_handler, uctrl);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(uctrl->event_source, "udev-ctrl");
+
+ return 0;
+}
+
+int udev_ctrl_send(struct udev_ctrl *uctrl, enum udev_ctrl_msg_type type, int intval, const char *buf) {
+ struct udev_ctrl_msg_wire ctrl_msg_wire = {
+ .version = "udev-" STRINGIFY(PROJECT_VERSION),
+ .magic = UDEV_CTRL_MAGIC,
+ .type = type,
+ };
+
+ if (uctrl->maybe_disconnected)
+ return -ENOANO; /* to distinguish this from other errors. */
+
+ if (buf)
+ strscpy(ctrl_msg_wire.value.buf, sizeof(ctrl_msg_wire.value.buf), buf);
+ else
+ ctrl_msg_wire.value.intval = intval;
+
+ if (!uctrl->connected) {
+ if (connect(uctrl->sock, &uctrl->saddr.sa, uctrl->addrlen) < 0)
+ return -errno;
+ uctrl->connected = true;
+ }
+
+ if (send(uctrl->sock, &ctrl_msg_wire, sizeof(ctrl_msg_wire), 0) < 0)
+ return -errno;
+
+ if (type == UDEV_CTRL_EXIT)
+ uctrl->maybe_disconnected = true;
+
+ return 0;
+}
+
+int udev_ctrl_wait(struct udev_ctrl *uctrl, usec_t timeout) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *source_io = NULL, *source_timeout = NULL;
+ int r;
+
+ assert(uctrl);
+
+ if (uctrl->sock < 0)
+ return 0;
+ if (!uctrl->connected)
+ return 0;
+
+ if (!uctrl->maybe_disconnected) {
+ r = udev_ctrl_send(uctrl, _UDEV_CTRL_END_MESSAGES, 0, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ if (timeout == 0)
+ return 0;
+
+ if (!uctrl->event) {
+ r = udev_ctrl_attach_event(uctrl, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_add_io(uctrl->event, &source_io, uctrl->sock, EPOLLIN, NULL, INT_TO_PTR(0));
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(source_io, "udev-ctrl-wait-io");
+
+ if (timeout != USEC_INFINITY) {
+ r = sd_event_add_time_relative(
+ uctrl->event, &source_timeout, clock_boottime_or_monotonic(),
+ timeout,
+ 0, NULL, INT_TO_PTR(-ETIMEDOUT));
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(source_timeout, "udev-ctrl-wait-timeout");
+ }
+
+ return sd_event_loop(uctrl->event);
+}
diff --git a/src/udev/udev-ctrl.h b/src/udev/udev-ctrl.h
new file mode 100644
index 0000000..680fbf7
--- /dev/null
+++ b/src/udev/udev-ctrl.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#pragma once
+
+#include "sd-event.h"
+
+#include "macro.h"
+#include "time-util.h"
+
+struct udev_ctrl;
+
+enum udev_ctrl_msg_type {
+ _UDEV_CTRL_END_MESSAGES,
+ UDEV_CTRL_SET_LOG_LEVEL,
+ UDEV_CTRL_STOP_EXEC_QUEUE,
+ UDEV_CTRL_START_EXEC_QUEUE,
+ UDEV_CTRL_RELOAD,
+ UDEV_CTRL_SET_ENV,
+ UDEV_CTRL_SET_CHILDREN_MAX,
+ UDEV_CTRL_PING,
+ UDEV_CTRL_EXIT,
+};
+
+union udev_ctrl_msg_value {
+ int intval;
+ char buf[256];
+};
+
+typedef int (*udev_ctrl_handler_t)(struct udev_ctrl *udev_ctrl, enum udev_ctrl_msg_type type,
+ const union udev_ctrl_msg_value *value, void *userdata);
+
+int udev_ctrl_new_from_fd(struct udev_ctrl **ret, int fd);
+static inline int udev_ctrl_new(struct udev_ctrl **ret) {
+ return udev_ctrl_new_from_fd(ret, -1);
+}
+
+int udev_ctrl_enable_receiving(struct udev_ctrl *uctrl);
+struct udev_ctrl *udev_ctrl_ref(struct udev_ctrl *uctrl);
+struct udev_ctrl *udev_ctrl_unref(struct udev_ctrl *uctrl);
+int udev_ctrl_cleanup(struct udev_ctrl *uctrl);
+int udev_ctrl_attach_event(struct udev_ctrl *uctrl, sd_event *event);
+int udev_ctrl_start(struct udev_ctrl *uctrl, udev_ctrl_handler_t callback, void *userdata);
+sd_event_source *udev_ctrl_get_event_source(struct udev_ctrl *uctrl);
+
+int udev_ctrl_wait(struct udev_ctrl *uctrl, usec_t timeout);
+
+int udev_ctrl_send(struct udev_ctrl *uctrl, enum udev_ctrl_msg_type type, int intval, const char *buf);
+static inline int udev_ctrl_send_set_log_level(struct udev_ctrl *uctrl, int priority) {
+ return udev_ctrl_send(uctrl, UDEV_CTRL_SET_LOG_LEVEL, priority, NULL);
+}
+
+static inline int udev_ctrl_send_stop_exec_queue(struct udev_ctrl *uctrl) {
+ return udev_ctrl_send(uctrl, UDEV_CTRL_STOP_EXEC_QUEUE, 0, NULL);
+}
+
+static inline int udev_ctrl_send_start_exec_queue(struct udev_ctrl *uctrl) {
+ return udev_ctrl_send(uctrl, UDEV_CTRL_START_EXEC_QUEUE, 0, NULL);
+}
+
+static inline int udev_ctrl_send_reload(struct udev_ctrl *uctrl) {
+ return udev_ctrl_send(uctrl, UDEV_CTRL_RELOAD, 0, NULL);
+}
+
+static inline int udev_ctrl_send_set_env(struct udev_ctrl *uctrl, const char *key) {
+ return udev_ctrl_send(uctrl, UDEV_CTRL_SET_ENV, 0, key);
+}
+
+static inline int udev_ctrl_send_set_children_max(struct udev_ctrl *uctrl, int count) {
+ return udev_ctrl_send(uctrl, UDEV_CTRL_SET_CHILDREN_MAX, count, NULL);
+}
+
+static inline int udev_ctrl_send_ping(struct udev_ctrl *uctrl) {
+ return udev_ctrl_send(uctrl, UDEV_CTRL_PING, 0, NULL);
+}
+
+static inline int udev_ctrl_send_exit(struct udev_ctrl *uctrl) {
+ return udev_ctrl_send(uctrl, UDEV_CTRL_EXIT, 0, NULL);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct udev_ctrl*, udev_ctrl_unref);
diff --git a/src/udev/udev-event.c b/src/udev/udev-event.c
new file mode 100644
index 0000000..5159d19
--- /dev/null
+++ b/src/udev/udev-event.c
@@ -0,0 +1,1087 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <net/if.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "format-util.h"
+#include "libudev-util.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "udev-builtin.h"
+#include "udev-event.h"
+#include "udev-node.h"
+#include "udev-util.h"
+#include "udev-watch.h"
+#include "user-util.h"
+
+typedef struct Spawn {
+ sd_device *device;
+ const char *cmd;
+ pid_t pid;
+ usec_t timeout_warn_usec;
+ usec_t timeout_usec;
+ int timeout_signal;
+ usec_t event_birth_usec;
+ bool accept_failure;
+ int fd_stdout;
+ int fd_stderr;
+ char *result;
+ size_t result_size;
+ size_t result_len;
+} Spawn;
+
+UdevEvent *udev_event_new(sd_device *dev, usec_t exec_delay_usec, sd_netlink *rtnl) {
+ UdevEvent *event;
+
+ assert(dev);
+
+ event = new(UdevEvent, 1);
+ if (!event)
+ return NULL;
+
+ *event = (UdevEvent) {
+ .dev = sd_device_ref(dev),
+ .birth_usec = now(CLOCK_MONOTONIC),
+ .exec_delay_usec = exec_delay_usec,
+ .rtnl = sd_netlink_ref(rtnl),
+ .uid = UID_INVALID,
+ .gid = GID_INVALID,
+ .mode = MODE_INVALID,
+ };
+
+ return event;
+}
+
+UdevEvent *udev_event_free(UdevEvent *event) {
+ if (!event)
+ return NULL;
+
+ sd_device_unref(event->dev);
+ sd_device_unref(event->dev_db_clone);
+ sd_netlink_unref(event->rtnl);
+ ordered_hashmap_free_free_key(event->run_list);
+ ordered_hashmap_free_free_free(event->seclabel_list);
+ free(event->program_result);
+ free(event->name);
+
+ return mfree(event);
+}
+
+typedef enum {
+ FORMAT_SUBST_DEVNODE,
+ FORMAT_SUBST_ATTR,
+ FORMAT_SUBST_ENV,
+ FORMAT_SUBST_KERNEL,
+ FORMAT_SUBST_KERNEL_NUMBER,
+ FORMAT_SUBST_DRIVER,
+ FORMAT_SUBST_DEVPATH,
+ FORMAT_SUBST_ID,
+ FORMAT_SUBST_MAJOR,
+ FORMAT_SUBST_MINOR,
+ FORMAT_SUBST_RESULT,
+ FORMAT_SUBST_PARENT,
+ FORMAT_SUBST_NAME,
+ FORMAT_SUBST_LINKS,
+ FORMAT_SUBST_ROOT,
+ FORMAT_SUBST_SYS,
+ _FORMAT_SUBST_TYPE_MAX,
+ _FORMAT_SUBST_TYPE_INVALID = -1
+} FormatSubstitutionType;
+
+struct subst_map_entry {
+ const char *name;
+ const char fmt;
+ FormatSubstitutionType type;
+};
+
+static const struct subst_map_entry map[] = {
+ { .name = "devnode", .fmt = 'N', .type = FORMAT_SUBST_DEVNODE },
+ { .name = "tempnode", .fmt = 'N', .type = FORMAT_SUBST_DEVNODE }, /* deprecated */
+ { .name = "attr", .fmt = 's', .type = FORMAT_SUBST_ATTR },
+ { .name = "sysfs", .fmt = 's', .type = FORMAT_SUBST_ATTR }, /* deprecated */
+ { .name = "env", .fmt = 'E', .type = FORMAT_SUBST_ENV },
+ { .name = "kernel", .fmt = 'k', .type = FORMAT_SUBST_KERNEL },
+ { .name = "number", .fmt = 'n', .type = FORMAT_SUBST_KERNEL_NUMBER },
+ { .name = "driver", .fmt = 'd', .type = FORMAT_SUBST_DRIVER },
+ { .name = "devpath", .fmt = 'p', .type = FORMAT_SUBST_DEVPATH },
+ { .name = "id", .fmt = 'b', .type = FORMAT_SUBST_ID },
+ { .name = "major", .fmt = 'M', .type = FORMAT_SUBST_MAJOR },
+ { .name = "minor", .fmt = 'm', .type = FORMAT_SUBST_MINOR },
+ { .name = "result", .fmt = 'c', .type = FORMAT_SUBST_RESULT },
+ { .name = "parent", .fmt = 'P', .type = FORMAT_SUBST_PARENT },
+ { .name = "name", .fmt = 'D', .type = FORMAT_SUBST_NAME },
+ { .name = "links", .fmt = 'L', .type = FORMAT_SUBST_LINKS },
+ { .name = "root", .fmt = 'r', .type = FORMAT_SUBST_ROOT },
+ { .name = "sys", .fmt = 'S', .type = FORMAT_SUBST_SYS },
+};
+
+static const char *format_type_to_string(FormatSubstitutionType t) {
+ for (size_t i = 0; i < ELEMENTSOF(map); i++)
+ if (map[i].type == t)
+ return map[i].name;
+ return NULL;
+}
+
+static char format_type_to_char(FormatSubstitutionType t) {
+ for (size_t i = 0; i < ELEMENTSOF(map); i++)
+ if (map[i].type == t)
+ return map[i].fmt;
+ return '\0';
+}
+
+static int get_subst_type(const char **str, bool strict, FormatSubstitutionType *ret_type, char ret_attr[static UTIL_PATH_SIZE]) {
+ const char *p = *str, *q = NULL;
+ size_t i;
+
+ assert(str);
+ assert(*str);
+ assert(ret_type);
+ assert(ret_attr);
+
+ if (*p == '$') {
+ p++;
+ if (*p == '$') {
+ *str = p;
+ return 0;
+ }
+ for (i = 0; i < ELEMENTSOF(map); i++)
+ if ((q = startswith(p, map[i].name)))
+ break;
+ } else if (*p == '%') {
+ p++;
+ if (*p == '%') {
+ *str = p;
+ return 0;
+ }
+
+ for (i = 0; i < ELEMENTSOF(map); i++)
+ if (*p == map[i].fmt) {
+ q = p + 1;
+ break;
+ }
+ } else
+ return 0;
+ if (!q)
+ /* When 'strict' flag is set, then '$' and '%' must be escaped. */
+ return strict ? -EINVAL : 0;
+
+ if (*q == '{') {
+ const char *start, *end;
+ size_t len;
+
+ start = q + 1;
+ end = strchr(start, '}');
+ if (!end)
+ return -EINVAL;
+
+ len = end - start;
+ if (len == 0 || len >= UTIL_PATH_SIZE)
+ return -EINVAL;
+
+ strnscpy(ret_attr, UTIL_PATH_SIZE, start, len);
+ q = end + 1;
+ } else
+ *ret_attr = '\0';
+
+ *str = q;
+ *ret_type = map[i].type;
+ return 1;
+}
+
+static int safe_atou_optional_plus(const char *s, unsigned *ret) {
+ const char *p;
+ int r;
+
+ assert(s);
+ assert(ret);
+
+ /* Returns 1 if plus, 0 if no plus, negative on error */
+
+ p = endswith(s, "+");
+ if (p)
+ s = strndupa(s, p - s);
+
+ r = safe_atou(s, ret);
+ if (r < 0)
+ return r;
+
+ return !!p;
+}
+
+static ssize_t udev_event_subst_format(
+ UdevEvent *event,
+ FormatSubstitutionType type,
+ const char *attr,
+ char *dest,
+ size_t l) {
+ sd_device *parent, *dev = event->dev;
+ const char *val = NULL;
+ char *s = dest;
+ int r;
+
+ switch (type) {
+ case FORMAT_SUBST_DEVPATH:
+ r = sd_device_get_devpath(dev, &val);
+ if (r < 0)
+ return r;
+ l = strpcpy(&s, l, val);
+ break;
+ case FORMAT_SUBST_KERNEL:
+ r = sd_device_get_sysname(dev, &val);
+ if (r < 0)
+ return r;
+ l = strpcpy(&s, l, val);
+ break;
+ case FORMAT_SUBST_KERNEL_NUMBER:
+ r = sd_device_get_sysnum(dev, &val);
+ if (r == -ENOENT)
+ goto null_terminate;
+ if (r < 0)
+ return r;
+ l = strpcpy(&s, l, val);
+ break;
+ case FORMAT_SUBST_ID:
+ if (!event->dev_parent)
+ goto null_terminate;
+ r = sd_device_get_sysname(event->dev_parent, &val);
+ if (r < 0)
+ return r;
+ l = strpcpy(&s, l, val);
+ break;
+ case FORMAT_SUBST_DRIVER:
+ if (!event->dev_parent)
+ goto null_terminate;
+ r = sd_device_get_driver(event->dev_parent, &val);
+ if (r == -ENOENT)
+ goto null_terminate;
+ if (r < 0)
+ return r;
+ l = strpcpy(&s, l, val);
+ break;
+ case FORMAT_SUBST_MAJOR:
+ case FORMAT_SUBST_MINOR: {
+ dev_t devnum;
+
+ r = sd_device_get_devnum(dev, &devnum);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ l = strpcpyf(&s, l, "%u", r < 0 ? 0 : type == FORMAT_SUBST_MAJOR ? major(devnum) : minor(devnum));
+ break;
+ }
+ case FORMAT_SUBST_RESULT: {
+ unsigned index = 0; /* 0 means whole string */
+ bool has_plus;
+
+ if (!event->program_result)
+ goto null_terminate;
+
+ if (!isempty(attr)) {
+ r = safe_atou_optional_plus(attr, &index);
+ if (r < 0)
+ return r;
+
+ has_plus = r;
+ }
+
+ if (index == 0)
+ l = strpcpy(&s, l, event->program_result);
+ else {
+ const char *start, *p;
+ unsigned i;
+
+ p = skip_leading_chars(event->program_result, NULL);
+
+ for (i = 1; i < index; i++) {
+ while (*p && !strchr(WHITESPACE, *p))
+ p++;
+ p = skip_leading_chars(p, NULL);
+ if (*p == '\0')
+ break;
+ }
+ if (i != index) {
+ log_device_debug(dev, "requested part of result string not found");
+ goto null_terminate;
+ }
+
+ start = p;
+ /* %c{2+} copies the whole string from the second part on */
+ if (has_plus)
+ l = strpcpy(&s, l, start);
+ else {
+ while (*p && !strchr(WHITESPACE, *p))
+ p++;
+ l = strnpcpy(&s, l, start, p - start);
+ }
+ }
+ break;
+ }
+ case FORMAT_SUBST_ATTR: {
+ char vbuf[UTIL_NAME_SIZE];
+ int count;
+
+ if (isempty(attr))
+ return -EINVAL;
+
+ /* try to read the value specified by "[dmi/id]product_name" */
+ if (util_resolve_subsys_kernel(attr, vbuf, sizeof(vbuf), true) == 0)
+ val = vbuf;
+
+ /* try to read the attribute the device */
+ if (!val)
+ (void) sd_device_get_sysattr_value(dev, attr, &val);
+
+ /* try to read the attribute of the parent device, other matches have selected */
+ if (!val && event->dev_parent && event->dev_parent != dev)
+ (void) sd_device_get_sysattr_value(event->dev_parent, attr, &val);
+
+ if (!val)
+ goto null_terminate;
+
+ /* strip trailing whitespace, and replace unwanted characters */
+ if (val != vbuf)
+ strscpy(vbuf, sizeof(vbuf), val);
+ delete_trailing_chars(vbuf, NULL);
+ count = util_replace_chars(vbuf, UDEV_ALLOWED_CHARS_INPUT);
+ if (count > 0)
+ log_device_debug(dev, "%i character(s) replaced", count);
+ l = strpcpy(&s, l, vbuf);
+ break;
+ }
+ case FORMAT_SUBST_PARENT:
+ r = sd_device_get_parent(dev, &parent);
+ if (r == -ENOENT)
+ goto null_terminate;
+ if (r < 0)
+ return r;
+ r = sd_device_get_devname(parent, &val);
+ if (r == -ENOENT)
+ goto null_terminate;
+ if (r < 0)
+ return r;
+ l = strpcpy(&s, l, val + STRLEN("/dev/"));
+ break;
+ case FORMAT_SUBST_DEVNODE:
+ r = sd_device_get_devname(dev, &val);
+ if (r == -ENOENT)
+ goto null_terminate;
+ if (r < 0)
+ return r;
+ l = strpcpy(&s, l, val);
+ break;
+ case FORMAT_SUBST_NAME:
+ if (event->name)
+ l = strpcpy(&s, l, event->name);
+ else if (sd_device_get_devname(dev, &val) >= 0)
+ l = strpcpy(&s, l, val + STRLEN("/dev/"));
+ else {
+ r = sd_device_get_sysname(dev, &val);
+ if (r < 0)
+ return r;
+ l = strpcpy(&s, l, val);
+ }
+ break;
+ case FORMAT_SUBST_LINKS:
+ FOREACH_DEVICE_DEVLINK(dev, val)
+ if (s == dest)
+ l = strpcpy(&s, l, val + STRLEN("/dev/"));
+ else
+ l = strpcpyl(&s, l, " ", val + STRLEN("/dev/"), NULL);
+ if (s == dest)
+ goto null_terminate;
+ break;
+ case FORMAT_SUBST_ROOT:
+ l = strpcpy(&s, l, "/dev");
+ break;
+ case FORMAT_SUBST_SYS:
+ l = strpcpy(&s, l, "/sys");
+ break;
+ case FORMAT_SUBST_ENV:
+ if (isempty(attr))
+ return -EINVAL;
+ r = sd_device_get_property_value(dev, attr, &val);
+ if (r == -ENOENT)
+ goto null_terminate;
+ if (r < 0)
+ return r;
+ l = strpcpy(&s, l, val);
+ break;
+ default:
+ assert_not_reached("Unknown format substitution type");
+ }
+
+ return s - dest;
+
+null_terminate:
+ *s = '\0';
+ return 0;
+}
+
+size_t udev_event_apply_format(UdevEvent *event,
+ const char *src, char *dest, size_t size,
+ bool replace_whitespace) {
+ const char *s = src;
+ int r;
+
+ assert(event);
+ assert(event->dev);
+ assert(src);
+ assert(dest);
+ assert(size > 0);
+
+ while (*s) {
+ FormatSubstitutionType type;
+ char attr[UTIL_PATH_SIZE];
+ ssize_t subst_len;
+
+ r = get_subst_type(&s, false, &type, attr);
+ if (r < 0) {
+ log_device_warning_errno(event->dev, r, "Invalid format string, ignoring: %s", src);
+ break;
+ } else if (r == 0) {
+ if (size < 2) /* need space for this char and the terminating NUL */
+ break;
+ *dest++ = *s++;
+ size--;
+ continue;
+ }
+
+ subst_len = udev_event_subst_format(event, type, attr, dest, size);
+ if (subst_len < 0) {
+ log_device_warning_errno(event->dev, subst_len,
+ "Failed to substitute variable '$%s' or apply format '%%%c', ignoring: %m",
+ format_type_to_string(type), format_type_to_char(type));
+ break;
+ }
+
+ /* FORMAT_SUBST_RESULT handles spaces itself */
+ if (replace_whitespace && type != FORMAT_SUBST_RESULT)
+ /* util_replace_whitespace can replace in-place,
+ * and does nothing if subst_len == 0 */
+ subst_len = util_replace_whitespace(dest, dest, subst_len);
+
+ dest += subst_len;
+ size -= subst_len;
+ }
+
+ assert(size >= 1);
+ *dest = '\0';
+ return size;
+}
+
+int udev_check_format(const char *value, size_t *offset, const char **hint) {
+ FormatSubstitutionType type;
+ const char *s = value;
+ char attr[UTIL_PATH_SIZE];
+ int r;
+
+ while (*s) {
+ r = get_subst_type(&s, true, &type, attr);
+ if (r < 0) {
+ if (offset)
+ *offset = s - value;
+ if (hint)
+ *hint = "invalid substitution type";
+ return r;
+ } else if (r == 0) {
+ s++;
+ continue;
+ }
+
+ if (IN_SET(type, FORMAT_SUBST_ATTR, FORMAT_SUBST_ENV) && isempty(attr)) {
+ if (offset)
+ *offset = s - value;
+ if (hint)
+ *hint = "attribute value missing";
+ return -EINVAL;
+ }
+
+ if (type == FORMAT_SUBST_RESULT && !isempty(attr)) {
+ unsigned i;
+
+ r = safe_atou_optional_plus(attr, &i);
+ if (r < 0) {
+ if (offset)
+ *offset = s - value;
+ if (hint)
+ *hint = "attribute value not a valid number";
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int on_spawn_io(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Spawn *spawn = userdata;
+ char buf[4096], *p;
+ size_t size;
+ ssize_t l;
+ int r;
+
+ assert(spawn);
+ assert(fd == spawn->fd_stdout || fd == spawn->fd_stderr);
+ assert(!spawn->result || spawn->result_len < spawn->result_size);
+
+ if (fd == spawn->fd_stdout && spawn->result) {
+ p = spawn->result + spawn->result_len;
+ size = spawn->result_size - spawn->result_len;
+ } else {
+ p = buf;
+ size = sizeof(buf);
+ }
+
+ l = read(fd, p, size - 1);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ goto reenable;
+
+ log_device_error_errno(spawn->device, errno,
+ "Failed to read stdout of '%s': %m", spawn->cmd);
+
+ return 0;
+ }
+
+ p[l] = '\0';
+ if (fd == spawn->fd_stdout && spawn->result)
+ spawn->result_len += l;
+
+ /* Log output only if we watch stderr. */
+ if (l > 0 && spawn->fd_stderr >= 0) {
+ _cleanup_strv_free_ char **v = NULL;
+ char **q;
+
+ v = strv_split_newlines(p);
+ if (!v)
+ return 0;
+
+ STRV_FOREACH(q, v)
+ log_device_debug(spawn->device, "'%s'(%s) '%s'", spawn->cmd,
+ fd == spawn->fd_stdout ? "out" : "err", *q);
+ }
+
+
+ if (l == 0)
+ return 0;
+
+ /* Re-enable the event source if we did not encounter EOF */
+reenable:
+ r = sd_event_source_set_enabled(s, SD_EVENT_ONESHOT);
+ if (r < 0)
+ log_device_error_errno(spawn->device, r,
+ "Failed to reactivate IO source of '%s'", spawn->cmd);
+ return 0;
+}
+
+static int on_spawn_timeout(sd_event_source *s, uint64_t usec, void *userdata) {
+ Spawn *spawn = userdata;
+ char timeout[FORMAT_TIMESPAN_MAX];
+
+ assert(spawn);
+
+ kill_and_sigcont(spawn->pid, spawn->timeout_signal);
+
+ log_device_error(spawn->device, "Spawned process '%s' ["PID_FMT"] timed out after %s, killing",
+ spawn->cmd, spawn->pid,
+ format_timespan(timeout, sizeof(timeout), spawn->timeout_usec, USEC_PER_SEC));
+
+ return 1;
+}
+
+static int on_spawn_timeout_warning(sd_event_source *s, uint64_t usec, void *userdata) {
+ Spawn *spawn = userdata;
+ char timeout[FORMAT_TIMESPAN_MAX];
+
+ assert(spawn);
+
+ log_device_warning(spawn->device, "Spawned process '%s' ["PID_FMT"] is taking longer than %s to complete",
+ spawn->cmd, spawn->pid,
+ format_timespan(timeout, sizeof(timeout), spawn->timeout_warn_usec, USEC_PER_SEC));
+
+ return 1;
+}
+
+static int on_spawn_sigchld(sd_event_source *s, const siginfo_t *si, void *userdata) {
+ Spawn *spawn = userdata;
+ int ret = -EIO;
+
+ assert(spawn);
+
+ switch (si->si_code) {
+ case CLD_EXITED:
+ if (si->si_status == 0)
+ log_device_debug(spawn->device, "Process '%s' succeeded.", spawn->cmd);
+ else
+ log_device_full(spawn->device, spawn->accept_failure ? LOG_DEBUG : LOG_WARNING,
+ "Process '%s' failed with exit code %i.", spawn->cmd, si->si_status);
+ ret = si->si_status;
+ break;
+ case CLD_KILLED:
+ case CLD_DUMPED:
+ log_device_error(spawn->device, "Process '%s' terminated by signal %s.", spawn->cmd, signal_to_string(si->si_status));
+ break;
+ default:
+ log_device_error(spawn->device, "Process '%s' failed due to unknown reason.", spawn->cmd);
+ }
+
+ sd_event_exit(sd_event_source_get_event(s), ret);
+ return 1;
+}
+
+static int spawn_wait(Spawn *spawn) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ _cleanup_(sd_event_source_unrefp) sd_event_source *sigchld_source = NULL;
+ _cleanup_(sd_event_source_unrefp) sd_event_source *stdout_source = NULL;
+ _cleanup_(sd_event_source_unrefp) sd_event_source *stderr_source = NULL;
+ int r;
+
+ assert(spawn);
+
+ r = sd_event_new(&e);
+ if (r < 0)
+ return r;
+
+ if (spawn->timeout_usec > 0) {
+ usec_t usec, age_usec;
+
+ usec = now(CLOCK_MONOTONIC);
+ age_usec = usec - spawn->event_birth_usec;
+ if (age_usec < spawn->timeout_usec) {
+ if (spawn->timeout_warn_usec > 0 &&
+ spawn->timeout_warn_usec < spawn->timeout_usec &&
+ spawn->timeout_warn_usec > age_usec) {
+ spawn->timeout_warn_usec -= age_usec;
+
+ r = sd_event_add_time(e, NULL, CLOCK_MONOTONIC,
+ usec + spawn->timeout_warn_usec, USEC_PER_SEC,
+ on_spawn_timeout_warning, spawn);
+ if (r < 0)
+ return r;
+ }
+
+ spawn->timeout_usec -= age_usec;
+
+ r = sd_event_add_time(e, NULL, CLOCK_MONOTONIC,
+ usec + spawn->timeout_usec, USEC_PER_SEC, on_spawn_timeout, spawn);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (spawn->fd_stdout >= 0) {
+ r = sd_event_add_io(e, &stdout_source, spawn->fd_stdout, EPOLLIN, on_spawn_io, spawn);
+ if (r < 0)
+ return r;
+ r = sd_event_source_set_enabled(stdout_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return r;
+ }
+
+ if (spawn->fd_stderr >= 0) {
+ r = sd_event_add_io(e, &stderr_source, spawn->fd_stderr, EPOLLIN, on_spawn_io, spawn);
+ if (r < 0)
+ return r;
+ r = sd_event_source_set_enabled(stderr_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_add_child(e, &sigchld_source, spawn->pid, WEXITED, on_spawn_sigchld, spawn);
+ if (r < 0)
+ return r;
+ /* SIGCHLD should be processed after IO is complete */
+ r = sd_event_source_set_priority(sigchld_source, SD_EVENT_PRIORITY_NORMAL + 1);
+ if (r < 0)
+ return r;
+
+
+ return sd_event_loop(e);
+}
+
+int udev_event_spawn(UdevEvent *event,
+ usec_t timeout_usec,
+ int timeout_signal,
+ bool accept_failure,
+ const char *cmd,
+ char *result, size_t ressize) {
+ _cleanup_close_pair_ int outpipe[2] = {-1, -1}, errpipe[2] = {-1, -1};
+ _cleanup_strv_free_ char **argv = NULL;
+ char **envp = NULL;
+ Spawn spawn;
+ pid_t pid;
+ int r;
+
+ assert(event);
+ assert(event->dev);
+ assert(result || ressize == 0);
+
+ /* pipes from child to parent */
+ if (result || log_get_max_level() >= LOG_INFO)
+ if (pipe2(outpipe, O_NONBLOCK|O_CLOEXEC) != 0)
+ return log_device_error_errno(event->dev, errno,
+ "Failed to create pipe for command '%s': %m", cmd);
+
+ if (log_get_max_level() >= LOG_INFO)
+ if (pipe2(errpipe, O_NONBLOCK|O_CLOEXEC) != 0)
+ return log_device_error_errno(event->dev, errno,
+ "Failed to create pipe for command '%s': %m", cmd);
+
+ r = strv_split_full(&argv, cmd, NULL, EXTRACT_UNQUOTE | EXTRACT_RELAX | EXTRACT_RETAIN_ESCAPE);
+ if (r < 0)
+ return log_device_error_errno(event->dev, r, "Failed to split command: %m");
+
+ if (isempty(argv[0]))
+ return log_device_error_errno(event->dev, SYNTHETIC_ERRNO(EINVAL),
+ "Invalid command '%s'", cmd);
+
+ /* allow programs in /usr/lib/udev/ to be called without the path */
+ if (!path_is_absolute(argv[0])) {
+ char *program;
+
+ program = path_join(UDEVLIBEXECDIR, argv[0]);
+ if (!program)
+ return log_oom();
+
+ free_and_replace(argv[0], program);
+ }
+
+ r = device_get_properties_strv(event->dev, &envp);
+ if (r < 0)
+ return log_device_error_errno(event->dev, r, "Failed to get device properties");
+
+ log_device_debug(event->dev, "Starting '%s'", cmd);
+
+ r = safe_fork("(spawn)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &pid);
+ if (r < 0)
+ return log_device_error_errno(event->dev, r,
+ "Failed to fork() to execute command '%s': %m", cmd);
+ if (r == 0) {
+ if (rearrange_stdio(-1, outpipe[WRITE_END], errpipe[WRITE_END]) < 0)
+ _exit(EXIT_FAILURE);
+
+ (void) close_all_fds(NULL, 0);
+ (void) rlimit_nofile_safe();
+
+ execve(argv[0], argv, envp);
+ _exit(EXIT_FAILURE);
+ }
+
+ /* parent closed child's ends of pipes */
+ outpipe[WRITE_END] = safe_close(outpipe[WRITE_END]);
+ errpipe[WRITE_END] = safe_close(errpipe[WRITE_END]);
+
+ spawn = (Spawn) {
+ .device = event->dev,
+ .cmd = cmd,
+ .pid = pid,
+ .accept_failure = accept_failure,
+ .timeout_warn_usec = udev_warn_timeout(timeout_usec),
+ .timeout_usec = timeout_usec,
+ .timeout_signal = timeout_signal,
+ .event_birth_usec = event->birth_usec,
+ .fd_stdout = outpipe[READ_END],
+ .fd_stderr = errpipe[READ_END],
+ .result = result,
+ .result_size = ressize,
+ };
+ r = spawn_wait(&spawn);
+ if (r < 0)
+ return log_device_error_errno(event->dev, r,
+ "Failed to wait for spawned command '%s': %m", cmd);
+
+ if (result)
+ result[spawn.result_len] = '\0';
+
+ return r; /* 0 for success, and positive if the program failed */
+}
+
+static int rename_netif(UdevEvent *event) {
+ sd_device *dev = event->dev;
+ const char *oldname;
+ int ifindex, r;
+
+ if (!event->name)
+ return 0; /* No new name is requested. */
+
+ r = sd_device_get_sysname(dev, &oldname);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get sysname: %m");
+
+ if (streq(event->name, oldname))
+ return 0; /* The interface name is already requested name. */
+
+ if (!device_for_action(dev, DEVICE_ACTION_ADD))
+ return 0; /* Rename the interface only when it is added. */
+
+ r = sd_device_get_ifindex(dev, &ifindex);
+ if (r == -ENOENT)
+ return 0; /* Device is not a network interface. */
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get ifindex: %m");
+
+ /* Set ID_RENAMING boolean property here, and drop it in the corresponding move uevent later. */
+ r = device_add_property(dev, "ID_RENAMING", "1");
+ if (r < 0)
+ return log_device_warning_errno(dev, r, "Failed to add 'ID_RENAMING' property: %m");
+
+ r = device_rename(dev, event->name);
+ if (r < 0)
+ return log_device_warning_errno(dev, r, "Failed to update properties with new name '%s': %m", event->name);
+
+ /* Also set ID_RENAMING boolean property to cloned sd_device object and save it to database
+ * before calling rtnl_set_link_name(). Otherwise, clients (e.g., systemd-networkd) may receive
+ * RTM_NEWLINK netlink message before the database is updated. */
+ r = device_add_property(event->dev_db_clone, "ID_RENAMING", "1");
+ if (r < 0)
+ return log_device_warning_errno(event->dev_db_clone, r, "Failed to add 'ID_RENAMING' property: %m");
+
+ r = device_update_db(event->dev_db_clone);
+ if (r < 0)
+ return log_device_debug_errno(event->dev_db_clone, r, "Failed to update database under /run/udev/data/: %m");
+
+ r = rtnl_set_link_name(&event->rtnl, ifindex, event->name);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to rename network interface %i from '%s' to '%s': %m",
+ ifindex, oldname, event->name);
+
+ log_device_debug(dev, "Network interface %i is renamed from '%s' to '%s'", ifindex, oldname, event->name);
+
+ return 1;
+}
+
+static int update_devnode(UdevEvent *event) {
+ sd_device *dev = event->dev;
+ int r;
+
+ r = sd_device_get_devnum(dev, NULL);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get devnum: %m");
+
+ /* remove/update possible left-over symlinks from old database entry */
+ (void) udev_node_update_old_links(dev, event->dev_db_clone);
+
+ if (!uid_is_valid(event->uid)) {
+ r = device_get_devnode_uid(dev, &event->uid);
+ if (r < 0 && r != -ENOENT)
+ return log_device_error_errno(dev, r, "Failed to get devnode UID: %m");
+ }
+
+ if (!gid_is_valid(event->gid)) {
+ r = device_get_devnode_gid(dev, &event->gid);
+ if (r < 0 && r != -ENOENT)
+ return log_device_error_errno(dev, r, "Failed to get devnode GID: %m");
+ }
+
+ if (event->mode == MODE_INVALID) {
+ r = device_get_devnode_mode(dev, &event->mode);
+ if (r < 0 && r != -ENOENT)
+ return log_device_error_errno(dev, r, "Failed to get devnode mode: %m");
+ }
+ if (event->mode == MODE_INVALID && gid_is_valid(event->gid) && event->gid > 0)
+ /* If group is set, but mode is not set, "upgrade" mode for the group. */
+ event->mode = 0660;
+
+ bool apply_mac = device_for_action(dev, DEVICE_ACTION_ADD);
+
+ return udev_node_add(dev, apply_mac, event->mode, event->uid, event->gid, event->seclabel_list);
+}
+
+static void event_execute_rules_on_remove(
+ UdevEvent *event,
+ usec_t timeout_usec,
+ int timeout_signal,
+ Hashmap *properties_list,
+ UdevRules *rules) {
+
+ sd_device *dev = event->dev;
+ int r;
+
+ r = device_read_db_internal(dev, true);
+ if (r < 0)
+ log_device_debug_errno(dev, r, "Failed to read database under /run/udev/data/: %m");
+
+ r = device_tag_index(dev, NULL, false);
+ if (r < 0)
+ log_device_debug_errno(dev, r, "Failed to remove corresponding tag files under /run/udev/tag/, ignoring: %m");
+
+ r = device_delete_db(dev);
+ if (r < 0)
+ log_device_debug_errno(dev, r, "Failed to delete database under /run/udev/data/, ignoring: %m");
+
+ if (sd_device_get_devnum(dev, NULL) >= 0)
+ (void) udev_watch_end(dev);
+
+ (void) udev_rules_apply_to_event(rules, event, timeout_usec, timeout_signal, properties_list);
+
+ if (sd_device_get_devnum(dev, NULL) >= 0)
+ (void) udev_node_remove(dev);
+}
+
+static int udev_event_on_move(sd_device *dev) {
+ int r;
+
+ /* Drop previously added property */
+ r = device_add_property(dev, "ID_RENAMING", NULL);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to remove 'ID_RENAMING' property: %m");
+
+ return 0;
+}
+
+static int copy_all_tags(sd_device *d, sd_device *s) {
+ const char *tag;
+ int r;
+
+ assert(d);
+
+ if (!s)
+ return 0;
+
+ FOREACH_DEVICE_TAG(s, tag) {
+ r = device_add_tag(d, tag, false);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int udev_event_execute_rules(UdevEvent *event,
+ usec_t timeout_usec,
+ int timeout_signal,
+ Hashmap *properties_list,
+ UdevRules *rules) {
+ const char *subsystem;
+ DeviceAction action;
+ sd_device *dev;
+ int r;
+
+ assert(event);
+ assert(rules);
+
+ dev = event->dev;
+
+ r = sd_device_get_subsystem(dev, &subsystem);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get subsystem: %m");
+
+ r = device_get_action(dev, &action);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get ACTION: %m");
+
+ if (action == DEVICE_ACTION_REMOVE) {
+ event_execute_rules_on_remove(event, timeout_usec, timeout_signal, properties_list, rules);
+ return 0;
+ }
+
+ r = device_clone_with_db(dev, &event->dev_db_clone);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to clone sd_device object: %m");
+
+ r = copy_all_tags(dev, event->dev_db_clone);
+ if (r < 0)
+ log_device_warning_errno(dev, r, "Failed to copy all tags from old database entry, ignoring: %m");
+
+ if (sd_device_get_devnum(dev, NULL) >= 0)
+ /* Disable watch during event processing. */
+ (void) udev_watch_end(event->dev_db_clone);
+
+ if (action == DEVICE_ACTION_MOVE) {
+ r = udev_event_on_move(event->dev);
+ if (r < 0)
+ return r;
+ }
+
+ r = udev_rules_apply_to_event(rules, event, timeout_usec, timeout_signal, properties_list);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to apply udev rules: %m");
+
+ r = rename_netif(event);
+ if (r < 0)
+ return r;
+
+ r = update_devnode(event);
+ if (r < 0)
+ return r;
+
+ /* preserve old, or get new initialization timestamp */
+ r = device_ensure_usec_initialized(dev, event->dev_db_clone);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to set initialization timestamp: %m");
+
+ /* (re)write database file */
+ r = device_tag_index(dev, event->dev_db_clone, true);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to update tags under /run/udev/tag/: %m");
+
+ r = device_update_db(dev);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to update database under /run/udev/data/: %m");
+
+ /* Yes, we run update_devnode() twice, because in the first invocation, that is before update of udev database,
+ * it could happen that two contenders are replacing each other's symlink. Hence we run it again to make sure
+ * symlinks point to devices that claim them with the highest priority. */
+ r = update_devnode(event);
+ if (r < 0)
+ return r;
+
+ device_set_is_initialized(dev);
+
+ return 0;
+}
+
+void udev_event_execute_run(UdevEvent *event, usec_t timeout_usec, int timeout_signal) {
+ const char *command;
+ void *val;
+ int r;
+
+ ORDERED_HASHMAP_FOREACH_KEY(val, command, event->run_list) {
+ UdevBuiltinCommand builtin_cmd = PTR_TO_UDEV_BUILTIN_CMD(val);
+
+ if (builtin_cmd != _UDEV_BUILTIN_INVALID) {
+ log_device_debug(event->dev, "Running built-in command \"%s\"", command);
+ r = udev_builtin_run(event->dev, builtin_cmd, command, false);
+ if (r < 0)
+ log_device_debug_errno(event->dev, r, "Failed to run built-in command \"%s\", ignoring: %m", command);
+ } else {
+ if (event->exec_delay_usec > 0) {
+ char buf[FORMAT_TIMESPAN_MAX];
+
+ log_device_debug(event->dev, "Delaying execution of \"%s\" for %s.",
+ command, format_timespan(buf, sizeof(buf), event->exec_delay_usec, USEC_PER_SEC));
+ (void) usleep(event->exec_delay_usec);
+ }
+
+ log_device_debug(event->dev, "Running command \"%s\"", command);
+
+ r = udev_event_spawn(event, timeout_usec, timeout_signal, false, command, NULL, 0);
+ if (r < 0)
+ log_device_warning_errno(event->dev, r, "Failed to execute '%s', ignoring: %m", command);
+ else if (r > 0) /* returned value is positive when program fails */
+ log_device_debug(event->dev, "Command \"%s\" returned %d (error), ignoring.", command, r);
+ }
+ }
+}
diff --git a/src/udev/udev-event.h b/src/udev/udev-event.h
new file mode 100644
index 0000000..8647828
--- /dev/null
+++ b/src/udev/udev-event.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#pragma once
+
+/*
+ * Copyright © 2003 Greg Kroah-Hartman <greg@kroah.com>
+ */
+
+#include "sd-device.h"
+#include "sd-netlink.h"
+
+#include "hashmap.h"
+#include "macro.h"
+#include "udev-rules.h"
+#include "udev-util.h"
+#include "util.h"
+
+#define READ_END 0
+#define WRITE_END 1
+
+typedef struct UdevEvent {
+ sd_device *dev;
+ sd_device *dev_parent;
+ sd_device *dev_db_clone;
+ char *name;
+ char *program_result;
+ mode_t mode;
+ uid_t uid;
+ gid_t gid;
+ OrderedHashmap *seclabel_list;
+ OrderedHashmap *run_list;
+ usec_t exec_delay_usec;
+ usec_t birth_usec;
+ sd_netlink *rtnl;
+ unsigned builtin_run;
+ unsigned builtin_ret;
+ UdevRuleEscapeType esc:8;
+ bool inotify_watch:1;
+ bool inotify_watch_final:1;
+ bool group_final:1;
+ bool owner_final:1;
+ bool mode_final:1;
+ bool name_final:1;
+ bool devlink_final:1;
+ bool run_final:1;
+} UdevEvent;
+
+UdevEvent *udev_event_new(sd_device *dev, usec_t exec_delay_usec, sd_netlink *rtnl);
+UdevEvent *udev_event_free(UdevEvent *event);
+DEFINE_TRIVIAL_CLEANUP_FUNC(UdevEvent*, udev_event_free);
+
+size_t udev_event_apply_format(UdevEvent *event,
+ const char *src, char *dest, size_t size,
+ bool replace_whitespace);
+int udev_check_format(const char *value, size_t *offset, const char **hint);
+int udev_event_spawn(UdevEvent *event,
+ usec_t timeout_usec,
+ int timeout_signal,
+ bool accept_failure,
+ const char *cmd, char *result, size_t ressize);
+int udev_event_execute_rules(UdevEvent *event,
+ usec_t timeout_usec,
+ int timeout_signal,
+ Hashmap *properties_list,
+ UdevRules *rules);
+void udev_event_execute_run(UdevEvent *event, usec_t timeout_usec, int timeout_signal);
+
+static inline usec_t udev_warn_timeout(usec_t timeout_usec) {
+ return DIV_ROUND_UP(timeout_usec, 3);
+}
diff --git a/src/udev/udev-node.c b/src/udev/udev-node.c
new file mode 100644
index 0000000..b8b93ee
--- /dev/null
+++ b/src/udev/udev-node.c
@@ -0,0 +1,528 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "device-nodes.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "libudev-util.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "selinux-util.h"
+#include "smack-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "udev-node.h"
+#include "user-util.h"
+
+#define LINK_UPDATE_MAX_RETRIES 128
+
+static int node_symlink(sd_device *dev, const char *node, const char *slink) {
+ _cleanup_free_ char *slink_dirname = NULL, *target = NULL;
+ const char *id_filename, *slink_tmp;
+ struct stat stats;
+ int r;
+
+ assert(dev);
+ assert(node);
+ assert(slink);
+
+ slink_dirname = dirname_malloc(slink);
+ if (!slink_dirname)
+ return log_oom();
+
+ /* use relative link */
+ r = path_make_relative(slink_dirname, node, &target);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get relative path from '%s' to '%s': %m", slink, node);
+
+ /* preserve link with correct target, do not replace node of other device */
+ if (lstat(slink, &stats) == 0) {
+ if (S_ISBLK(stats.st_mode) || S_ISCHR(stats.st_mode))
+ return log_device_error_errno(dev, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Conflicting device node '%s' found, link to '%s' will not be created.", slink, node);
+ else if (S_ISLNK(stats.st_mode)) {
+ _cleanup_free_ char *buf = NULL;
+
+ if (readlink_malloc(slink, &buf) >= 0 &&
+ streq(target, buf)) {
+ log_device_debug(dev, "Preserve already existing symlink '%s' to '%s'", slink, target);
+ (void) label_fix(slink, LABEL_IGNORE_ENOENT);
+ (void) utimensat(AT_FDCWD, slink, NULL, AT_SYMLINK_NOFOLLOW);
+ return 0;
+ }
+ }
+ } else {
+ log_device_debug(dev, "Creating symlink '%s' to '%s'", slink, target);
+ do {
+ r = mkdir_parents_label(slink, 0755);
+ if (!IN_SET(r, 0, -ENOENT))
+ break;
+ mac_selinux_create_file_prepare(slink, S_IFLNK);
+ if (symlink(target, slink) < 0)
+ r = -errno;
+ mac_selinux_create_file_clear();
+ } while (r == -ENOENT);
+ if (r == 0)
+ return 0;
+ if (r < 0)
+ log_device_debug_errno(dev, r, "Failed to create symlink '%s' to '%s', trying to replace '%s': %m", slink, target, slink);
+ }
+
+ log_device_debug(dev, "Atomically replace '%s'", slink);
+ r = device_get_id_filename(dev, &id_filename);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get id_filename: %m");
+ slink_tmp = strjoina(slink, ".tmp-", id_filename);
+ (void) unlink(slink_tmp);
+ do {
+ r = mkdir_parents_label(slink_tmp, 0755);
+ if (!IN_SET(r, 0, -ENOENT))
+ break;
+ mac_selinux_create_file_prepare(slink_tmp, S_IFLNK);
+ if (symlink(target, slink_tmp) < 0)
+ r = -errno;
+ mac_selinux_create_file_clear();
+ } while (r == -ENOENT);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to create symlink '%s' to '%s': %m", slink_tmp, target);
+
+ if (rename(slink_tmp, slink) < 0) {
+ r = log_device_error_errno(dev, errno, "Failed to rename '%s' to '%s': %m", slink_tmp, slink);
+ (void) unlink(slink_tmp);
+ } else
+ /* Tell caller that we replaced already existing symlink. */
+ r = 1;
+
+ return r;
+}
+
+/* find device node of device with highest priority */
+static int link_find_prioritized(sd_device *dev, bool add, const char *stackdir, char **ret) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ _cleanup_free_ char *target = NULL;
+ struct dirent *dent;
+ int r, priority = 0;
+
+ assert(!add || dev);
+ assert(stackdir);
+ assert(ret);
+
+ if (add) {
+ const char *devnode;
+
+ r = device_get_devlink_priority(dev, &priority);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_devname(dev, &devnode);
+ if (r < 0)
+ return r;
+
+ target = strdup(devnode);
+ if (!target)
+ return -ENOMEM;
+ }
+
+ dir = opendir(stackdir);
+ if (!dir) {
+ if (target) {
+ *ret = TAKE_PTR(target);
+ return 0;
+ }
+
+ return -errno;
+ }
+
+ FOREACH_DIRENT_ALL(dent, dir, break) {
+ _cleanup_(sd_device_unrefp) sd_device *dev_db = NULL;
+ const char *devnode, *id_filename;
+ int db_prio = 0;
+
+ if (dent->d_name[0] == '\0')
+ break;
+ if (dent->d_name[0] == '.')
+ continue;
+
+ log_device_debug(dev, "Found '%s' claiming '%s'", dent->d_name, stackdir);
+
+ if (device_get_id_filename(dev, &id_filename) < 0)
+ continue;
+
+ /* did we find ourself? */
+ if (streq(dent->d_name, id_filename))
+ continue;
+
+ if (sd_device_new_from_device_id(&dev_db, dent->d_name) < 0)
+ continue;
+
+ if (sd_device_get_devname(dev_db, &devnode) < 0)
+ continue;
+
+ if (device_get_devlink_priority(dev_db, &db_prio) < 0)
+ continue;
+
+ if (target && db_prio <= priority)
+ continue;
+
+ log_device_debug(dev_db, "Device claims priority %i for '%s'", db_prio, stackdir);
+
+ r = free_and_strdup(&target, devnode);
+ if (r < 0)
+ return r;
+ priority = db_prio;
+ }
+
+ if (!target)
+ return -ENOENT;
+
+ *ret = TAKE_PTR(target);
+ return 0;
+}
+
+/* manage "stack of names" with possibly specified device priorities */
+static int link_update(sd_device *dev, const char *slink, bool add) {
+ _cleanup_free_ char *filename = NULL, *dirname = NULL;
+ char name_enc[PATH_MAX];
+ const char *id_filename;
+ int i, r, retries;
+
+ assert(dev);
+ assert(slink);
+
+ r = device_get_id_filename(dev, &id_filename);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get id_filename: %m");
+
+ util_path_encode(slink + STRLEN("/dev"), name_enc, sizeof(name_enc));
+ dirname = path_join("/run/udev/links/", name_enc);
+ if (!dirname)
+ return log_oom();
+ filename = path_join(dirname, id_filename);
+ if (!filename)
+ return log_oom();
+
+ if (!add) {
+ if (unlink(filename) == 0)
+ (void) rmdir(dirname);
+ } else
+ for (;;) {
+ _cleanup_close_ int fd = -1;
+
+ r = mkdir_parents(filename, 0755);
+ if (!IN_SET(r, 0, -ENOENT))
+ return r;
+
+ fd = open(filename, O_WRONLY|O_CREAT|O_CLOEXEC|O_TRUNC|O_NOFOLLOW, 0444);
+ if (fd >= 0)
+ break;
+ if (errno != ENOENT)
+ return -errno;
+ }
+
+ /* If the database entry is not written yet we will just do one iteration and possibly wrong symlink
+ * will be fixed in the second invocation. */
+ retries = sd_device_get_is_initialized(dev) > 0 ? LINK_UPDATE_MAX_RETRIES : 1;
+
+ for (i = 0; i < retries; i++) {
+ _cleanup_free_ char *target = NULL;
+ struct stat st1 = {}, st2 = {};
+
+ r = stat(dirname, &st1);
+ if (r < 0 && errno != ENOENT)
+ return -errno;
+
+ r = link_find_prioritized(dev, add, dirname, &target);
+ if (r == -ENOENT) {
+ log_device_debug(dev, "No reference left, removing '%s'", slink);
+ if (unlink(slink) == 0)
+ (void) rmdir_parents(slink, "/");
+
+ break;
+ } else if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to determine highest priority symlink: %m");
+
+ r = node_symlink(dev, target, slink);
+ if (r < 0) {
+ (void) unlink(filename);
+ break;
+ } else if (r == 1)
+ /* We have replaced already existing symlink, possibly there is some other device trying
+ * to claim the same symlink. Let's do one more iteration to give us a chance to fix
+ * the error if other device actually claims the symlink with higher priority. */
+ continue;
+
+ /* Skip the second stat() if the first failed, stat_inode_unmodified() would return false regardless. */
+ if ((st1.st_mode & S_IFMT) != 0) {
+ r = stat(dirname, &st2);
+ if (r < 0 && errno != ENOENT)
+ return -errno;
+
+ if (stat_inode_unmodified(&st1, &st2))
+ break;
+ }
+ }
+
+ return i < LINK_UPDATE_MAX_RETRIES ? 0 : -ELOOP;
+}
+
+int udev_node_update_old_links(sd_device *dev, sd_device *dev_old) {
+ const char *name, *devpath;
+ int r;
+
+ assert(dev);
+ assert(dev_old);
+
+ r = sd_device_get_devpath(dev, &devpath);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get devpath: %m");
+
+ /* update possible left-over symlinks */
+ FOREACH_DEVICE_DEVLINK(dev_old, name) {
+ const char *name_current;
+ bool found = false;
+
+ /* check if old link name still belongs to this device */
+ FOREACH_DEVICE_DEVLINK(dev, name_current)
+ if (streq(name, name_current)) {
+ found = true;
+ break;
+ }
+
+ if (found)
+ continue;
+
+ log_device_debug(dev, "Updating old name, '%s' no longer belonging to '%s'",
+ name, devpath);
+ link_update(dev, name, false);
+ }
+
+ return 0;
+}
+
+static int node_permissions_apply(sd_device *dev, bool apply_mac,
+ mode_t mode, uid_t uid, gid_t gid,
+ OrderedHashmap *seclabel_list) {
+ const char *devnode, *subsystem, *id_filename = NULL;
+ bool apply_mode, apply_uid, apply_gid;
+ _cleanup_close_ int node_fd = -1;
+ struct stat stats;
+ dev_t devnum;
+ int r;
+
+ assert(dev);
+
+ r = sd_device_get_devname(dev, &devnode);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get devname: %m");
+ r = sd_device_get_subsystem(dev, &subsystem);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get subsystem: %m");
+ r = sd_device_get_devnum(dev, &devnum);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get devnum: %m");
+ (void) device_get_id_filename(dev, &id_filename);
+
+ if (streq(subsystem, "block"))
+ mode |= S_IFBLK;
+ else
+ mode |= S_IFCHR;
+
+ node_fd = open(devnode, O_PATH|O_NOFOLLOW|O_CLOEXEC);
+ if (node_fd < 0) {
+ if (errno == ENOENT) {
+ log_device_debug_errno(dev, errno, "Device node %s is missing, skipping handling.", devnode);
+ return 0; /* This is necessarily racey, so ignore missing the device */
+ }
+
+ return log_device_debug_errno(dev, errno, "Cannot open node %s: %m", devnode);
+ }
+
+ if (fstat(node_fd, &stats) < 0)
+ return log_device_debug_errno(dev, errno, "cannot stat() node %s: %m", devnode);
+
+ if ((mode != MODE_INVALID && (stats.st_mode & S_IFMT) != (mode & S_IFMT)) || stats.st_rdev != devnum) {
+ log_device_debug(dev, "Found node '%s' with non-matching devnum %s, skipping handling.",
+ devnode, id_filename);
+ return 0; /* We might process a device that already got replaced by the time we have a look
+ * at it, handle this gracefully and step away. */
+ }
+
+ apply_mode = mode != MODE_INVALID && (stats.st_mode & 0777) != (mode & 0777);
+ apply_uid = uid_is_valid(uid) && stats.st_uid != uid;
+ apply_gid = gid_is_valid(gid) && stats.st_gid != gid;
+
+ if (apply_mode || apply_uid || apply_gid || apply_mac) {
+ bool selinux = false, smack = false;
+ const char *name, *label;
+
+ if (apply_mode || apply_uid || apply_gid) {
+ log_device_debug(dev, "Setting permissions %s, uid=" UID_FMT ", gid=" GID_FMT ", mode=%#o",
+ devnode,
+ uid_is_valid(uid) ? uid : stats.st_uid,
+ gid_is_valid(gid) ? gid : stats.st_gid,
+ mode != MODE_INVALID ? mode & 0777 : stats.st_mode & 0777);
+
+ r = fchmod_and_chown(node_fd, mode, uid, gid);
+ if (r < 0)
+ log_device_full_errno(dev, r == -ENOENT ? LOG_DEBUG : LOG_ERR, r,
+ "Failed to set owner/mode of %s to uid=" UID_FMT
+ ", gid=" GID_FMT ", mode=%#o: %m",
+ devnode,
+ uid_is_valid(uid) ? uid : stats.st_uid,
+ gid_is_valid(gid) ? gid : stats.st_gid,
+ mode != MODE_INVALID ? mode & 0777 : stats.st_mode & 0777);
+ } else
+ log_device_debug(dev, "Preserve permissions of %s, uid=" UID_FMT ", gid=" GID_FMT ", mode=%#o",
+ devnode,
+ uid_is_valid(uid) ? uid : stats.st_uid,
+ gid_is_valid(gid) ? gid : stats.st_gid,
+ mode != MODE_INVALID ? mode & 0777 : stats.st_mode & 0777);
+
+ /* apply SECLABEL{$module}=$label */
+ ORDERED_HASHMAP_FOREACH_KEY(label, name, seclabel_list) {
+ int q;
+
+ if (streq(name, "selinux")) {
+ selinux = true;
+
+ q = mac_selinux_apply_fd(node_fd, devnode, label);
+ if (q < 0)
+ log_device_full_errno(dev, q == -ENOENT ? LOG_DEBUG : LOG_ERR, q,
+ "SECLABEL: failed to set SELinux label '%s': %m", label);
+ else
+ log_device_debug(dev, "SECLABEL: set SELinux label '%s'", label);
+
+ } else if (streq(name, "smack")) {
+ smack = true;
+
+ q = mac_smack_apply_fd(node_fd, SMACK_ATTR_ACCESS, label);
+ if (q < 0)
+ log_device_full_errno(dev, q == -ENOENT ? LOG_DEBUG : LOG_ERR, q,
+ "SECLABEL: failed to set SMACK label '%s': %m", label);
+ else
+ log_device_debug(dev, "SECLABEL: set SMACK label '%s'", label);
+
+ } else
+ log_device_error(dev, "SECLABEL: unknown subsystem, ignoring '%s'='%s'", name, label);
+ }
+
+ /* set the defaults */
+ if (!selinux)
+ (void) mac_selinux_fix_fd(node_fd, devnode, LABEL_IGNORE_ENOENT);
+ if (!smack)
+ (void) mac_smack_apply_fd(node_fd, SMACK_ATTR_ACCESS, NULL);
+ }
+
+ /* always update timestamp when we re-use the node, like on media change events */
+ r = futimens_opath(node_fd, NULL);
+ if (r < 0)
+ log_device_debug_errno(dev, r, "Failed to adjust timestamp of node %s: %m", devnode);
+
+ return r;
+}
+
+static int xsprintf_dev_num_path_from_sd_device(sd_device *dev, char **ret) {
+ char filename[DEV_NUM_PATH_MAX], *s;
+ const char *subsystem;
+ dev_t devnum;
+ int r;
+
+ assert(ret);
+
+ r = sd_device_get_subsystem(dev, &subsystem);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_devnum(dev, &devnum);
+ if (r < 0)
+ return r;
+
+ xsprintf_dev_num_path(filename,
+ streq(subsystem, "block") ? "block" : "char",
+ devnum);
+
+ s = strdup(filename);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int udev_node_add(sd_device *dev, bool apply,
+ mode_t mode, uid_t uid, gid_t gid,
+ OrderedHashmap *seclabel_list) {
+ const char *devnode, *devlink;
+ _cleanup_free_ char *filename = NULL;
+ int r;
+
+ assert(dev);
+
+ r = sd_device_get_devname(dev, &devnode);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get devnode: %m");
+
+ if (DEBUG_LOGGING) {
+ const char *id_filename = NULL;
+
+ (void) device_get_id_filename(dev, &id_filename);
+ log_device_debug(dev, "Handling device node '%s', devnum=%s", devnode, strnull(id_filename));
+ }
+
+ r = node_permissions_apply(dev, apply, mode, uid, gid, seclabel_list);
+ if (r < 0)
+ return r;
+
+ r = xsprintf_dev_num_path_from_sd_device(dev, &filename);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get device path: %m");
+
+ /* always add /dev/{block,char}/$major:$minor */
+ (void) node_symlink(dev, devnode, filename);
+
+ /* create/update symlinks, add symlinks to name index */
+ FOREACH_DEVICE_DEVLINK(dev, devlink) {
+ r = link_update(dev, devlink, true);
+ if (r < 0)
+ log_device_info_errno(dev, r, "Failed to update device symlinks: %m");
+ }
+
+ return 0;
+}
+
+int udev_node_remove(sd_device *dev) {
+ _cleanup_free_ char *filename = NULL;
+ const char *devlink;
+ int r;
+
+ assert(dev);
+
+ /* remove/update symlinks, remove symlinks from name index */
+ FOREACH_DEVICE_DEVLINK(dev, devlink) {
+ r = link_update(dev, devlink, false);
+ if (r < 0)
+ log_device_info_errno(dev, r, "Failed to update device symlinks: %m");
+ }
+
+ r = xsprintf_dev_num_path_from_sd_device(dev, &filename);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get device path: %m");
+
+ /* remove /dev/{block,char}/$major:$minor */
+ (void) unlink(filename);
+
+ return 0;
+}
diff --git a/src/udev/udev-node.h b/src/udev/udev-node.h
new file mode 100644
index 0000000..84c7e45
--- /dev/null
+++ b/src/udev/udev-node.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-device.h"
+
+#include "hashmap.h"
+
+int udev_node_add(sd_device *dev, bool apply,
+ mode_t mode, uid_t uid, gid_t gid,
+ OrderedHashmap *seclabel_list);
+int udev_node_remove(sd_device *dev);
+int udev_node_update_old_links(sd_device *dev, sd_device *dev_old);
diff --git a/src/udev/udev-rules.c b/src/udev/udev-rules.c
new file mode 100644
index 0000000..ef6a0c1
--- /dev/null
+++ b/src/udev/udev-rules.c
@@ -0,0 +1,2393 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <ctype.h>
+
+#include "alloc-util.h"
+#include "architecture.h"
+#include "conf-files.h"
+#include "def.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "glob-util.h"
+#include "libudev-util.h"
+#include "list.h"
+#include "mkdir.h"
+#include "nulstr-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "stat-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "sysctl-util.h"
+#include "udev-builtin.h"
+#include "udev-event.h"
+#include "udev-rules.h"
+#include "user-util.h"
+#include "virt.h"
+
+#define RULES_DIRS (const char* const*) CONF_PATHS_STRV("udev/rules.d")
+
+typedef enum {
+ OP_MATCH, /* == */
+ OP_NOMATCH, /* != */
+ OP_ADD, /* += */
+ OP_REMOVE, /* -= */
+ OP_ASSIGN, /* = */
+ OP_ASSIGN_FINAL, /* := */
+ _OP_TYPE_MAX,
+ _OP_TYPE_INVALID = -1
+} UdevRuleOperatorType;
+
+typedef enum {
+ MATCH_TYPE_EMPTY, /* empty string */
+ MATCH_TYPE_PLAIN, /* no special characters */
+ MATCH_TYPE_PLAIN_WITH_EMPTY, /* no special characters with empty string, e.g., "|foo" */
+ MATCH_TYPE_GLOB, /* shell globs ?,*,[] */
+ MATCH_TYPE_GLOB_WITH_EMPTY, /* shell globs ?,*,[] with empty string, e.g., "|foo*" */
+ MATCH_TYPE_SUBSYSTEM, /* "subsystem", "bus", or "class" */
+ _MATCH_TYPE_MAX,
+ _MATCH_TYPE_INVALID = -1
+} UdevRuleMatchType;
+
+typedef enum {
+ SUBST_TYPE_PLAIN, /* no substitution */
+ SUBST_TYPE_FORMAT, /* % or $ */
+ SUBST_TYPE_SUBSYS, /* "[<SUBSYSTEM>/<KERNEL>]<attribute>" format */
+ _SUBST_TYPE_MAX,
+ _SUBST_TYPE_INVALID = -1
+} UdevRuleSubstituteType;
+
+typedef enum {
+ /* lvalues which take match or nomatch operator */
+ TK_M_ACTION, /* string, device_get_action() */
+ TK_M_DEVPATH, /* path, sd_device_get_devpath() */
+ TK_M_KERNEL, /* string, sd_device_get_sysname() */
+ TK_M_DEVLINK, /* strv, sd_device_get_devlink_first(), sd_device_get_devlink_next() */
+ TK_M_NAME, /* string, name of network interface */
+ TK_M_ENV, /* string, device property, takes key through attribute */
+ TK_M_CONST, /* string, system-specific hard-coded constant */
+ TK_M_TAG, /* strv, sd_device_get_tag_first(), sd_device_get_tag_next() */
+ TK_M_SUBSYSTEM, /* string, sd_device_get_subsystem() */
+ TK_M_DRIVER, /* string, sd_device_get_driver() */
+ TK_M_ATTR, /* string, takes filename through attribute, sd_device_get_sysattr_value(), util_resolve_subsys_kernel(), etc. */
+ TK_M_SYSCTL, /* string, takes kernel parameter through attribute */
+
+ /* matches parent parameters */
+ TK_M_PARENTS_KERNEL, /* string */
+ TK_M_PARENTS_SUBSYSTEM, /* string */
+ TK_M_PARENTS_DRIVER, /* string */
+ TK_M_PARENTS_ATTR, /* string */
+ TK_M_PARENTS_TAG, /* strv */
+
+ TK_M_TEST, /* path, optionally mode_t can be specified by attribute, test the existence of a file */
+ TK_M_PROGRAM, /* string, execute a program */
+ TK_M_IMPORT_FILE, /* path */
+ TK_M_IMPORT_PROGRAM, /* string, import properties from the result of program */
+ TK_M_IMPORT_BUILTIN, /* string, import properties from the result of built-in command */
+ TK_M_IMPORT_DB, /* string, import properties from database */
+ TK_M_IMPORT_CMDLINE, /* string, kernel command line */
+ TK_M_IMPORT_PARENT, /* string, parent property */
+ TK_M_RESULT, /* string, result of TK_M_PROGRAM */
+
+#define _TK_M_MAX (TK_M_RESULT + 1)
+#define _TK_A_MIN _TK_M_MAX
+
+ /* lvalues which take one of assign operators */
+ TK_A_OPTIONS_STRING_ESCAPE_NONE, /* no argument */
+ TK_A_OPTIONS_STRING_ESCAPE_REPLACE, /* no argument */
+ TK_A_OPTIONS_DB_PERSIST, /* no argument */
+ TK_A_OPTIONS_INOTIFY_WATCH, /* boolean */
+ TK_A_OPTIONS_DEVLINK_PRIORITY, /* int */
+ TK_A_OWNER, /* user name */
+ TK_A_GROUP, /* group name */
+ TK_A_MODE, /* mode string */
+ TK_A_OWNER_ID, /* uid_t */
+ TK_A_GROUP_ID, /* gid_t */
+ TK_A_MODE_ID, /* mode_t */
+ TK_A_TAG, /* string */
+ TK_A_OPTIONS_STATIC_NODE, /* device path, /dev/... */
+ TK_A_SECLABEL, /* string with attribute */
+ TK_A_ENV, /* string with attribute */
+ TK_A_NAME, /* ifname */
+ TK_A_DEVLINK, /* string */
+ TK_A_ATTR, /* string with attribute */
+ TK_A_SYSCTL, /* string with attribute */
+ TK_A_RUN_BUILTIN, /* string */
+ TK_A_RUN_PROGRAM, /* string */
+
+ _TK_TYPE_MAX,
+ _TK_TYPE_INVALID = -1,
+} UdevRuleTokenType;
+
+typedef enum {
+ LINE_HAS_NAME = 1 << 0, /* has NAME= */
+ LINE_HAS_DEVLINK = 1 << 1, /* has SYMLINK=, OWNER=, GROUP= or MODE= */
+ LINE_HAS_STATIC_NODE = 1 << 2, /* has OPTIONS=static_node */
+ LINE_HAS_GOTO = 1 << 3, /* has GOTO= */
+ LINE_HAS_LABEL = 1 << 4, /* has LABEL= */
+ LINE_UPDATE_SOMETHING = 1 << 5, /* has other TK_A_* or TK_M_IMPORT tokens */
+} UdevRuleLineType;
+
+typedef struct UdevRuleFile UdevRuleFile;
+typedef struct UdevRuleLine UdevRuleLine;
+typedef struct UdevRuleToken UdevRuleToken;
+
+struct UdevRuleToken {
+ UdevRuleTokenType type:8;
+ UdevRuleOperatorType op:8;
+ UdevRuleMatchType match_type:8;
+ UdevRuleSubstituteType attr_subst_type:7;
+ bool attr_match_remove_trailing_whitespace:1;
+ const char *value;
+ void *data;
+ LIST_FIELDS(UdevRuleToken, tokens);
+};
+
+struct UdevRuleLine {
+ char *line;
+ unsigned line_number;
+ UdevRuleLineType type;
+
+ const char *label;
+ const char *goto_label;
+ UdevRuleLine *goto_line;
+
+ UdevRuleFile *rule_file;
+ UdevRuleToken *current_token;
+ LIST_HEAD(UdevRuleToken, tokens);
+ LIST_FIELDS(UdevRuleLine, rule_lines);
+};
+
+struct UdevRuleFile {
+ char *filename;
+ UdevRuleLine *current_line;
+ LIST_HEAD(UdevRuleLine, rule_lines);
+ LIST_FIELDS(UdevRuleFile, rule_files);
+};
+
+struct UdevRules {
+ usec_t dirs_ts_usec;
+ ResolveNameTiming resolve_name_timing;
+ Hashmap *known_users;
+ Hashmap *known_groups;
+ UdevRuleFile *current_file;
+ LIST_HEAD(UdevRuleFile, rule_files);
+};
+
+/*** Logging helpers ***/
+
+#define log_rule_full_errno(device, rules, level, error, fmt, ...) \
+ ({ \
+ UdevRules *_r = (rules); \
+ UdevRuleFile *_f = _r ? _r->current_file : NULL; \
+ UdevRuleLine *_l = _f ? _f->current_line : NULL; \
+ const char *_n = _f ? _f->filename : NULL; \
+ \
+ log_device_full_errno(device, level, error, "%s:%u " fmt, \
+ strna(_n), _l ? _l->line_number : 0, \
+ ##__VA_ARGS__); \
+ })
+
+#define log_rule_full(device, rules, level, ...) (void) log_rule_full_errno(device, rules, level, 0, __VA_ARGS__)
+
+#define log_rule_debug(device, rules, ...) log_rule_full_errno(device, rules, LOG_DEBUG, 0, __VA_ARGS__)
+#define log_rule_info(device, rules, ...) log_rule_full(device, rules, LOG_INFO, __VA_ARGS__)
+#define log_rule_notice(device, rules, ...) log_rule_full(device, rules, LOG_NOTICE, __VA_ARGS__)
+#define log_rule_warning(device, rules, ...) log_rule_full(device, rules, LOG_WARNING, __VA_ARGS__)
+#define log_rule_error(device, rules, ...) log_rule_full(device, rules, LOG_ERR, __VA_ARGS__)
+
+#define log_rule_debug_errno(device, rules, error, ...) log_rule_full_errno(device, rules, LOG_DEBUG, error, __VA_ARGS__)
+#define log_rule_info_errno(device, rules, error, ...) log_rule_full_errno(device, rules, LOG_INFO, error, __VA_ARGS__)
+#define log_rule_notice_errno(device, rules, error, ...) log_rule_full_errno(device, rules, LOG_NOTICE, error, __VA_ARGS__)
+#define log_rule_warning_errno(device, rules, error, ...) log_rule_full_errno(device, rules, LOG_WARNING, error, __VA_ARGS__)
+#define log_rule_error_errno(device, rules, error, ...) log_rule_full_errno(device, rules, LOG_ERR, error, __VA_ARGS__)
+
+#define log_token_full_errno(rules, level, error, ...) log_rule_full_errno(NULL, rules, level, error, __VA_ARGS__)
+#define log_token_full(rules, level, ...) (void) log_token_full_errno(rules, level, 0, __VA_ARGS__)
+
+#define log_token_debug(rules, ...) log_token_full_errno(rules, LOG_DEBUG, 0, __VA_ARGS__)
+#define log_token_info(rules, ...) log_token_full(rules, LOG_INFO, __VA_ARGS__)
+#define log_token_notice(rules, ...) log_token_full(rules, LOG_NOTICE, __VA_ARGS__)
+#define log_token_warning(rules, ...) log_token_full(rules, LOG_WARNING, __VA_ARGS__)
+#define log_token_error(rules, ...) log_token_full(rules, LOG_ERR, __VA_ARGS__)
+
+#define log_token_debug_errno(rules, error, ...) log_token_full_errno(rules, LOG_DEBUG, error, __VA_ARGS__)
+#define log_token_info_errno(rules, error, ...) log_token_full_errno(rules, LOG_INFO, error, __VA_ARGS__)
+#define log_token_notice_errno(rules, error, ...) log_token_full_errno(rules, LOG_NOTICE, error, __VA_ARGS__)
+#define log_token_warning_errno(rules, error, ...) log_token_full_errno(rules, LOG_WARNING, error, __VA_ARGS__)
+#define log_token_error_errno(rules, error, ...) log_token_full_errno(rules, LOG_ERR, error, __VA_ARGS__)
+
+#define _log_token_invalid(rules, key, type) \
+ log_token_error_errno(rules, SYNTHETIC_ERRNO(EINVAL), \
+ "Invalid %s for %s.", type, key)
+
+#define log_token_invalid_op(rules, key) _log_token_invalid(rules, key, "operator")
+#define log_token_invalid_attr(rules, key) _log_token_invalid(rules, key, "attribute")
+
+#define log_token_invalid_attr_format(rules, key, attr, offset, hint) \
+ log_token_error_errno(rules, SYNTHETIC_ERRNO(EINVAL), \
+ "Invalid attribute \"%s\" for %s (char %zu: %s), ignoring, but please fix it.", \
+ attr, key, offset, hint)
+#define log_token_invalid_value(rules, key, value, offset, hint) \
+ log_token_error_errno(rules, SYNTHETIC_ERRNO(EINVAL), \
+ "Invalid value \"%s\" for %s (char %zu: %s), ignoring, but please fix it.", \
+ value, key, offset, hint)
+
+static void log_unknown_owner(sd_device *dev, UdevRules *rules, int error, const char *entity, const char *name) {
+ if (IN_SET(abs(error), ENOENT, ESRCH))
+ log_rule_error(dev, rules, "Unknown %s '%s', ignoring", entity, name);
+ else
+ log_rule_error_errno(dev, rules, error, "Failed to resolve %s '%s', ignoring: %m", entity, name);
+}
+
+/*** Other functions ***/
+
+static void udev_rule_token_free(UdevRuleToken *token) {
+ free(token);
+}
+
+static void udev_rule_line_clear_tokens(UdevRuleLine *rule_line) {
+ UdevRuleToken *i, *next;
+
+ assert(rule_line);
+
+ LIST_FOREACH_SAFE(tokens, i, next, rule_line->tokens)
+ udev_rule_token_free(i);
+
+ rule_line->tokens = NULL;
+}
+
+static void udev_rule_line_free(UdevRuleLine *rule_line) {
+ if (!rule_line)
+ return;
+
+ udev_rule_line_clear_tokens(rule_line);
+
+ if (rule_line->rule_file) {
+ if (rule_line->rule_file->current_line == rule_line)
+ rule_line->rule_file->current_line = rule_line->rule_lines_prev;
+
+ LIST_REMOVE(rule_lines, rule_line->rule_file->rule_lines, rule_line);
+ }
+
+ free(rule_line->line);
+ free(rule_line);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(UdevRuleLine*, udev_rule_line_free);
+
+static void udev_rule_file_free(UdevRuleFile *rule_file) {
+ UdevRuleLine *i, *next;
+
+ if (!rule_file)
+ return;
+
+ LIST_FOREACH_SAFE(rule_lines, i, next, rule_file->rule_lines)
+ udev_rule_line_free(i);
+
+ free(rule_file->filename);
+ free(rule_file);
+}
+
+UdevRules *udev_rules_free(UdevRules *rules) {
+ UdevRuleFile *i, *next;
+
+ if (!rules)
+ return NULL;
+
+ LIST_FOREACH_SAFE(rule_files, i, next, rules->rule_files)
+ udev_rule_file_free(i);
+
+ hashmap_free_free_key(rules->known_users);
+ hashmap_free_free_key(rules->known_groups);
+ return mfree(rules);
+}
+
+static int rule_resolve_user(UdevRules *rules, const char *name, uid_t *ret) {
+ _cleanup_free_ char *n = NULL;
+ uid_t uid;
+ void *val;
+ int r;
+
+ assert(rules);
+ assert(name);
+
+ val = hashmap_get(rules->known_users, name);
+ if (val) {
+ *ret = PTR_TO_UID(val);
+ return 0;
+ }
+
+ r = get_user_creds(&name, &uid, NULL, NULL, NULL, USER_CREDS_ALLOW_MISSING);
+ if (r < 0) {
+ log_unknown_owner(NULL, rules, r, "user", name);
+ *ret = UID_INVALID;
+ return 0;
+ }
+
+ n = strdup(name);
+ if (!n)
+ return -ENOMEM;
+
+ r = hashmap_ensure_allocated(&rules->known_users, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(rules->known_users, n, UID_TO_PTR(uid));
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(n);
+ *ret = uid;
+ return 0;
+}
+
+static int rule_resolve_group(UdevRules *rules, const char *name, gid_t *ret) {
+ _cleanup_free_ char *n = NULL;
+ gid_t gid;
+ void *val;
+ int r;
+
+ assert(rules);
+ assert(name);
+
+ val = hashmap_get(rules->known_groups, name);
+ if (val) {
+ *ret = PTR_TO_GID(val);
+ return 0;
+ }
+
+ r = get_group_creds(&name, &gid, USER_CREDS_ALLOW_MISSING);
+ if (r < 0) {
+ log_unknown_owner(NULL, rules, r, "group", name);
+ *ret = GID_INVALID;
+ return 0;
+ }
+
+ n = strdup(name);
+ if (!n)
+ return -ENOMEM;
+
+ r = hashmap_ensure_allocated(&rules->known_groups, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(rules->known_groups, n, GID_TO_PTR(gid));
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(n);
+ *ret = gid;
+ return 0;
+}
+
+static UdevRuleSubstituteType rule_get_substitution_type(const char *str) {
+ assert(str);
+
+ if (str[0] == '[')
+ return SUBST_TYPE_SUBSYS;
+ if (strchr(str, '%') || strchr(str, '$'))
+ return SUBST_TYPE_FORMAT;
+ return SUBST_TYPE_PLAIN;
+}
+
+static void rule_line_append_token(UdevRuleLine *rule_line, UdevRuleToken *token) {
+ assert(rule_line);
+ assert(token);
+
+ if (rule_line->current_token)
+ LIST_APPEND(tokens, rule_line->current_token, token);
+ else
+ LIST_APPEND(tokens, rule_line->tokens, token);
+
+ rule_line->current_token = token;
+}
+
+static int rule_line_add_token(UdevRuleLine *rule_line, UdevRuleTokenType type, UdevRuleOperatorType op, char *value, void *data) {
+ UdevRuleToken *token;
+ UdevRuleMatchType match_type = _MATCH_TYPE_INVALID;
+ UdevRuleSubstituteType subst_type = _SUBST_TYPE_INVALID;
+ bool remove_trailing_whitespace = false;
+ size_t len;
+
+ assert(rule_line);
+ assert(type >= 0 && type < _TK_TYPE_MAX);
+ assert(op >= 0 && op < _OP_TYPE_MAX);
+
+ if (type < _TK_M_MAX) {
+ assert(value);
+ assert(IN_SET(op, OP_MATCH, OP_NOMATCH));
+
+ if (type == TK_M_SUBSYSTEM && STR_IN_SET(value, "subsystem", "bus", "class"))
+ match_type = MATCH_TYPE_SUBSYSTEM;
+ else if (isempty(value))
+ match_type = MATCH_TYPE_EMPTY;
+ else if (streq(value, "?*")) {
+ /* Convert KEY=="?*" -> KEY!="" */
+ match_type = MATCH_TYPE_EMPTY;
+ op = op == OP_MATCH ? OP_NOMATCH : OP_MATCH;
+ } else if (string_is_glob(value))
+ match_type = MATCH_TYPE_GLOB;
+ else
+ match_type = MATCH_TYPE_PLAIN;
+
+ if (type < TK_M_TEST || type == TK_M_RESULT) {
+ /* Convert value string to nulstr. */
+ bool bar = true, empty = false;
+ char *a, *b;
+
+ for (a = b = value; *a != '\0'; a++) {
+ if (*a != '|') {
+ *b++ = *a;
+ bar = false;
+ } else {
+ if (bar)
+ empty = true;
+ else
+ *b++ = '\0';
+ bar = true;
+ }
+ }
+ *b = '\0';
+
+ /* Make sure the value is end, so NULSTR_FOREACH can read correct match */
+ if (b < a)
+ b[1] = '\0';
+
+ if (bar)
+ empty = true;
+
+ if (empty) {
+ if (match_type == MATCH_TYPE_GLOB)
+ match_type = MATCH_TYPE_GLOB_WITH_EMPTY;
+ if (match_type == MATCH_TYPE_PLAIN)
+ match_type = MATCH_TYPE_PLAIN_WITH_EMPTY;
+ }
+ }
+ }
+
+ if (IN_SET(type, TK_M_ATTR, TK_M_PARENTS_ATTR)) {
+ assert(value);
+ assert(data);
+
+ len = strlen(value);
+ if (len > 0 && !isspace(value[len - 1]))
+ remove_trailing_whitespace = true;
+
+ subst_type = rule_get_substitution_type(data);
+ }
+
+ token = new(UdevRuleToken, 1);
+ if (!token)
+ return -ENOMEM;
+
+ *token = (UdevRuleToken) {
+ .type = type,
+ .op = op,
+ .value = value,
+ .data = data,
+ .match_type = match_type,
+ .attr_subst_type = subst_type,
+ .attr_match_remove_trailing_whitespace = remove_trailing_whitespace,
+ };
+
+ rule_line_append_token(rule_line, token);
+
+ if (token->type == TK_A_NAME)
+ SET_FLAG(rule_line->type, LINE_HAS_NAME, true);
+
+ else if (IN_SET(token->type, TK_A_DEVLINK,
+ TK_A_OWNER, TK_A_GROUP, TK_A_MODE,
+ TK_A_OWNER_ID, TK_A_GROUP_ID, TK_A_MODE_ID))
+ SET_FLAG(rule_line->type, LINE_HAS_DEVLINK, true);
+
+ else if (token->type == TK_A_OPTIONS_STATIC_NODE)
+ SET_FLAG(rule_line->type, LINE_HAS_STATIC_NODE, true);
+
+ else if (token->type >= _TK_A_MIN ||
+ IN_SET(token->type, TK_M_PROGRAM,
+ TK_M_IMPORT_FILE, TK_M_IMPORT_PROGRAM, TK_M_IMPORT_BUILTIN,
+ TK_M_IMPORT_DB, TK_M_IMPORT_CMDLINE, TK_M_IMPORT_PARENT))
+ SET_FLAG(rule_line->type, LINE_UPDATE_SOMETHING, true);
+
+ return 0;
+}
+
+static void check_value_format_and_warn(UdevRules *rules, const char *key, const char *value, bool nonempty) {
+ size_t offset;
+ const char *hint;
+
+ if (nonempty && isempty(value))
+ log_token_invalid_value(rules, key, value, (size_t) 0, "empty value");
+ else if (udev_check_format(value, &offset, &hint) < 0)
+ log_token_invalid_value(rules, key, value, offset + 1, hint);
+}
+
+static int check_attr_format_and_warn(UdevRules *rules, const char *key, const char *value) {
+ size_t offset;
+ const char *hint;
+
+ if (isempty(value))
+ return log_token_invalid_attr(rules, key);
+ if (udev_check_format(value, &offset, &hint) < 0)
+ log_token_invalid_attr_format(rules, key, value, offset + 1, hint);
+ return 0;
+}
+
+static int parse_token(UdevRules *rules, const char *key, char *attr, UdevRuleOperatorType op, char *value) {
+ bool is_match = IN_SET(op, OP_MATCH, OP_NOMATCH);
+ UdevRuleLine *rule_line;
+ int r;
+
+ assert(rules);
+ assert(rules->current_file);
+ assert(rules->current_file->current_line);
+ assert(key);
+ assert(value);
+
+ rule_line = rules->current_file->current_line;
+
+ if (streq(key, "ACTION")) {
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ if (!is_match)
+ return log_token_invalid_op(rules, key);
+
+ r = rule_line_add_token(rule_line, TK_M_ACTION, op, value, NULL);
+ } else if (streq(key, "DEVPATH")) {
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ if (!is_match)
+ return log_token_invalid_op(rules, key);
+
+ r = rule_line_add_token(rule_line, TK_M_DEVPATH, op, value, NULL);
+ } else if (streq(key, "KERNEL")) {
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ if (!is_match)
+ return log_token_invalid_op(rules, key);
+
+ r = rule_line_add_token(rule_line, TK_M_KERNEL, op, value, NULL);
+ } else if (streq(key, "SYMLINK")) {
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ if (op == OP_REMOVE)
+ return log_token_invalid_op(rules, key);
+
+ if (!is_match) {
+ check_value_format_and_warn(rules, key, value, false);
+ r = rule_line_add_token(rule_line, TK_A_DEVLINK, op, value, NULL);
+ } else
+ r = rule_line_add_token(rule_line, TK_M_DEVLINK, op, value, NULL);
+ } else if (streq(key, "NAME")) {
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ if (op == OP_REMOVE)
+ return log_token_invalid_op(rules, key);
+ if (op == OP_ADD) {
+ log_token_warning(rules, "%s key takes '==', '!=', '=', or ':=' operator, assuming '=', but please fix it.", key);
+ op = OP_ASSIGN;
+ }
+
+ if (!is_match) {
+ if (streq(value, "%k"))
+ return log_token_error_errno(rules, SYNTHETIC_ERRNO(EINVAL),
+ "NAME=\"%%k\" is ignored, as it breaks kernel supplied names.");
+ if (isempty(value))
+ return log_token_error_errno(rules, SYNTHETIC_ERRNO(EINVAL),
+ "Ignoring NAME=\"\", as udev will not delete any device nodes.");
+ check_value_format_and_warn(rules, key, value, false);
+
+ r = rule_line_add_token(rule_line, TK_A_NAME, op, value, NULL);
+ } else
+ r = rule_line_add_token(rule_line, TK_M_NAME, op, value, NULL);
+ } else if (streq(key, "ENV")) {
+ if (isempty(attr))
+ return log_token_invalid_attr(rules, key);
+ if (op == OP_REMOVE)
+ return log_token_invalid_op(rules, key);
+ if (op == OP_ASSIGN_FINAL) {
+ log_token_warning(rules, "%s key takes '==', '!=', '=', or '+=' operator, assuming '=', but please fix it.", key);
+ op = OP_ASSIGN;
+ }
+
+ if (!is_match) {
+ if (STR_IN_SET(attr,
+ "ACTION", "DEVLINKS", "DEVNAME", "DEVPATH", "DEVTYPE", "DRIVER",
+ "IFINDEX", "MAJOR", "MINOR", "SEQNUM", "SUBSYSTEM", "TAGS"))
+ return log_token_error_errno(rules, SYNTHETIC_ERRNO(EINVAL),
+ "Invalid ENV attribute. '%s' cannot be set.", attr);
+
+ check_value_format_and_warn(rules, key, value, false);
+
+ r = rule_line_add_token(rule_line, TK_A_ENV, op, value, attr);
+ } else
+ r = rule_line_add_token(rule_line, TK_M_ENV, op, value, attr);
+ } else if (streq(key, "CONST")) {
+ if (isempty(attr) || !STR_IN_SET(attr, "arch", "virt"))
+ return log_token_invalid_attr(rules, key);
+ if (!is_match)
+ return log_token_invalid_op(rules, key);
+ r = rule_line_add_token(rule_line, TK_M_CONST, op, value, attr);
+ } else if (streq(key, "TAG")) {
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ if (op == OP_ASSIGN_FINAL) {
+ log_token_warning(rules, "%s key takes '==', '!=', '=', or '+=' operator, assuming '=', but please fix it.", key);
+ op = OP_ASSIGN;
+ }
+
+ if (!is_match) {
+ check_value_format_and_warn(rules, key, value, true);
+
+ r = rule_line_add_token(rule_line, TK_A_TAG, op, value, NULL);
+ } else
+ r = rule_line_add_token(rule_line, TK_M_TAG, op, value, NULL);
+ } else if (streq(key, "SUBSYSTEM")) {
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ if (!is_match)
+ return log_token_invalid_op(rules, key);
+
+ if (STR_IN_SET(value, "bus", "class"))
+ log_token_warning(rules, "'%s' must be specified as 'subsystem'; please fix it", value);
+
+ r = rule_line_add_token(rule_line, TK_M_SUBSYSTEM, op, value, NULL);
+ } else if (streq(key, "DRIVER")) {
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ if (!is_match)
+ return log_token_invalid_op(rules, key);
+
+ r = rule_line_add_token(rule_line, TK_M_DRIVER, op, value, NULL);
+ } else if (streq(key, "ATTR")) {
+ r = check_attr_format_and_warn(rules, key, attr);
+ if (r < 0)
+ return r;
+ if (op == OP_REMOVE)
+ return log_token_invalid_op(rules, key);
+ if (IN_SET(op, OP_ADD, OP_ASSIGN_FINAL)) {
+ log_token_warning(rules, "%s key takes '==', '!=', or '=' operator, assuming '=', but please fix it.", key);
+ op = OP_ASSIGN;
+ }
+
+ if (!is_match) {
+ check_value_format_and_warn(rules, key, value, false);
+ r = rule_line_add_token(rule_line, TK_A_ATTR, op, value, attr);
+ } else
+ r = rule_line_add_token(rule_line, TK_M_ATTR, op, value, attr);
+ } else if (streq(key, "SYSCTL")) {
+ r = check_attr_format_and_warn(rules, key, attr);
+ if (r < 0)
+ return r;
+ if (op == OP_REMOVE)
+ return log_token_invalid_op(rules, key);
+ if (IN_SET(op, OP_ADD, OP_ASSIGN_FINAL)) {
+ log_token_warning(rules, "%s key takes '==', '!=', or '=' operator, assuming '=', but please fix it.", key);
+ op = OP_ASSIGN;
+ }
+
+ if (!is_match) {
+ check_value_format_and_warn(rules, key, value, false);
+ r = rule_line_add_token(rule_line, TK_A_SYSCTL, op, value, attr);
+ } else
+ r = rule_line_add_token(rule_line, TK_M_SYSCTL, op, value, attr);
+ } else if (streq(key, "KERNELS")) {
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ if (!is_match)
+ return log_token_invalid_op(rules, key);
+
+ r = rule_line_add_token(rule_line, TK_M_PARENTS_KERNEL, op, value, NULL);
+ } else if (streq(key, "SUBSYSTEMS")) {
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ if (!is_match)
+ return log_token_invalid_op(rules, key);
+
+ r = rule_line_add_token(rule_line, TK_M_PARENTS_SUBSYSTEM, op, value, NULL);
+ } else if (streq(key, "DRIVERS")) {
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ if (!is_match)
+ return log_token_invalid_op(rules, key);
+
+ r = rule_line_add_token(rule_line, TK_M_PARENTS_DRIVER, op, value, NULL);
+ } else if (streq(key, "ATTRS")) {
+ r = check_attr_format_and_warn(rules, key, attr);
+ if (r < 0)
+ return r;
+ if (!is_match)
+ return log_token_invalid_op(rules, key);
+
+ if (startswith(attr, "device/"))
+ log_token_warning(rules, "'device' link may not be available in future kernels; please fix it.");
+ if (strstr(attr, "../"))
+ log_token_warning(rules, "Direct reference to parent sysfs directory, may break in future kernels; please fix it.");
+
+ r = rule_line_add_token(rule_line, TK_M_PARENTS_ATTR, op, value, attr);
+ } else if (streq(key, "TAGS")) {
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ if (!is_match)
+ return log_token_invalid_op(rules, key);
+
+ r = rule_line_add_token(rule_line, TK_M_PARENTS_TAG, op, value, NULL);
+ } else if (streq(key, "TEST")) {
+ mode_t mode = MODE_INVALID;
+
+ if (!isempty(attr)) {
+ r = parse_mode(attr, &mode);
+ if (r < 0)
+ return log_token_error_errno(rules, r, "Failed to parse mode '%s': %m", attr);
+ }
+ check_value_format_and_warn(rules, key, value, true);
+ if (!is_match)
+ return log_token_invalid_op(rules, key);
+
+ r = rule_line_add_token(rule_line, TK_M_TEST, op, value, MODE_TO_PTR(mode));
+ } else if (streq(key, "PROGRAM")) {
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ check_value_format_and_warn(rules, key, value, true);
+ if (op == OP_REMOVE)
+ return log_token_invalid_op(rules, key);
+ if (!is_match)
+ op = OP_MATCH;
+
+ r = rule_line_add_token(rule_line, TK_M_PROGRAM, op, value, NULL);
+ } else if (streq(key, "IMPORT")) {
+ if (isempty(attr))
+ return log_token_invalid_attr(rules, key);
+ check_value_format_and_warn(rules, key, value, true);
+ if (op == OP_REMOVE)
+ return log_token_invalid_op(rules, key);
+ if (!is_match)
+ op = OP_MATCH;
+
+ if (streq(attr, "file"))
+ r = rule_line_add_token(rule_line, TK_M_IMPORT_FILE, op, value, NULL);
+ else if (streq(attr, "program")) {
+ UdevBuiltinCommand cmd;
+
+ cmd = udev_builtin_lookup(value);
+ if (cmd >= 0) {
+ log_token_debug(rules,"Found builtin command '%s' for %s, replacing attribute", value, key);
+ r = rule_line_add_token(rule_line, TK_M_IMPORT_BUILTIN, op, value, UDEV_BUILTIN_CMD_TO_PTR(cmd));
+ } else
+ r = rule_line_add_token(rule_line, TK_M_IMPORT_PROGRAM, op, value, NULL);
+ } else if (streq(attr, "builtin")) {
+ UdevBuiltinCommand cmd;
+
+ cmd = udev_builtin_lookup(value);
+ if (cmd < 0)
+ return log_token_error_errno(rules, SYNTHETIC_ERRNO(EINVAL),
+ "Unknown builtin command: %s", value);
+ r = rule_line_add_token(rule_line, TK_M_IMPORT_BUILTIN, op, value, UDEV_BUILTIN_CMD_TO_PTR(cmd));
+ } else if (streq(attr, "db"))
+ r = rule_line_add_token(rule_line, TK_M_IMPORT_DB, op, value, NULL);
+ else if (streq(attr, "cmdline"))
+ r = rule_line_add_token(rule_line, TK_M_IMPORT_CMDLINE, op, value, NULL);
+ else if (streq(attr, "parent"))
+ r = rule_line_add_token(rule_line, TK_M_IMPORT_PARENT, op, value, NULL);
+ else
+ return log_token_invalid_attr(rules, key);
+ } else if (streq(key, "RESULT")) {
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ if (!is_match)
+ return log_token_invalid_op(rules, key);
+
+ r = rule_line_add_token(rule_line, TK_M_RESULT, op, value, NULL);
+ } else if (streq(key, "OPTIONS")) {
+ char *tmp;
+
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ if (is_match || op == OP_REMOVE)
+ return log_token_invalid_op(rules, key);
+ if (op == OP_ADD)
+ op = OP_ASSIGN;
+
+ if (streq(value, "string_escape=none"))
+ r = rule_line_add_token(rule_line, TK_A_OPTIONS_STRING_ESCAPE_NONE, op, NULL, NULL);
+ else if (streq(value, "string_escape=replace"))
+ r = rule_line_add_token(rule_line, TK_A_OPTIONS_STRING_ESCAPE_REPLACE, op, NULL, NULL);
+ else if (streq(value, "db_persist"))
+ r = rule_line_add_token(rule_line, TK_A_OPTIONS_DB_PERSIST, op, NULL, NULL);
+ else if (streq(value, "watch"))
+ r = rule_line_add_token(rule_line, TK_A_OPTIONS_INOTIFY_WATCH, op, NULL, INT_TO_PTR(1));
+ else if (streq(value, "nowatch"))
+ r = rule_line_add_token(rule_line, TK_A_OPTIONS_INOTIFY_WATCH, op, NULL, INT_TO_PTR(0));
+ else if ((tmp = startswith(value, "static_node=")))
+ r = rule_line_add_token(rule_line, TK_A_OPTIONS_STATIC_NODE, op, tmp, NULL);
+ else if ((tmp = startswith(value, "link_priority="))) {
+ int prio;
+
+ r = safe_atoi(tmp, &prio);
+ if (r < 0)
+ return log_token_error_errno(rules, r, "Failed to parse link priority '%s': %m", tmp);
+ r = rule_line_add_token(rule_line, TK_A_OPTIONS_DEVLINK_PRIORITY, op, NULL, INT_TO_PTR(prio));
+ } else {
+ log_token_warning(rules, "Invalid value for OPTIONS key, ignoring: '%s'", value);
+ return 0;
+ }
+ } else if (streq(key, "OWNER")) {
+ uid_t uid;
+
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ if (is_match || op == OP_REMOVE)
+ return log_token_invalid_op(rules, key);
+ if (op == OP_ADD) {
+ log_token_warning(rules, "%s key takes '=' or ':=' operator, assuming '=', but please fix it.", key);
+ op = OP_ASSIGN;
+ }
+
+ if (parse_uid(value, &uid) >= 0)
+ r = rule_line_add_token(rule_line, TK_A_OWNER_ID, op, NULL, UID_TO_PTR(uid));
+ else if (rules->resolve_name_timing == RESOLVE_NAME_EARLY &&
+ rule_get_substitution_type(value) == SUBST_TYPE_PLAIN) {
+ r = rule_resolve_user(rules, value, &uid);
+ if (r < 0)
+ return log_token_error_errno(rules, r, "Failed to resolve user name '%s': %m", value);
+
+ r = rule_line_add_token(rule_line, TK_A_OWNER_ID, op, NULL, UID_TO_PTR(uid));
+ } else if (rules->resolve_name_timing != RESOLVE_NAME_NEVER) {
+ check_value_format_and_warn(rules, key, value, true);
+ r = rule_line_add_token(rule_line, TK_A_OWNER, op, value, NULL);
+ } else {
+ log_token_debug(rules, "User name resolution is disabled, ignoring %s=%s", key, value);
+ return 0;
+ }
+ } else if (streq(key, "GROUP")) {
+ gid_t gid;
+
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ if (is_match || op == OP_REMOVE)
+ return log_token_invalid_op(rules, key);
+ if (op == OP_ADD) {
+ log_token_warning(rules, "%s key takes '=' or ':=' operator, assuming '=', but please fix it.", key);
+ op = OP_ASSIGN;
+ }
+
+ if (parse_gid(value, &gid) >= 0)
+ r = rule_line_add_token(rule_line, TK_A_GROUP_ID, op, NULL, GID_TO_PTR(gid));
+ else if (rules->resolve_name_timing == RESOLVE_NAME_EARLY &&
+ rule_get_substitution_type(value) == SUBST_TYPE_PLAIN) {
+ r = rule_resolve_group(rules, value, &gid);
+ if (r < 0)
+ return log_token_error_errno(rules, r, "Failed to resolve group name '%s': %m", value);
+
+ r = rule_line_add_token(rule_line, TK_A_GROUP_ID, op, NULL, GID_TO_PTR(gid));
+ } else if (rules->resolve_name_timing != RESOLVE_NAME_NEVER) {
+ check_value_format_and_warn(rules, key, value, true);
+ r = rule_line_add_token(rule_line, TK_A_GROUP, op, value, NULL);
+ } else {
+ log_token_debug(rules, "Resolving group name is disabled, ignoring %s=%s", key, value);
+ return 0;
+ }
+ } else if (streq(key, "MODE")) {
+ mode_t mode;
+
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ if (is_match || op == OP_REMOVE)
+ return log_token_invalid_op(rules, key);
+ if (op == OP_ADD) {
+ log_token_warning(rules, "%s key takes '=' or ':=' operator, assuming '=', but please fix it.", key);
+ op = OP_ASSIGN;
+ }
+
+ if (parse_mode(value, &mode) >= 0)
+ r = rule_line_add_token(rule_line, TK_A_MODE_ID, op, NULL, MODE_TO_PTR(mode));
+ else {
+ check_value_format_and_warn(rules, key, value, true);
+ r = rule_line_add_token(rule_line, TK_A_MODE, op, value, NULL);
+ }
+ } else if (streq(key, "SECLABEL")) {
+ if (isempty(attr))
+ return log_token_invalid_attr(rules, key);
+ check_value_format_and_warn(rules, key, value, true);
+ if (is_match || op == OP_REMOVE)
+ return log_token_invalid_op(rules, key);
+ if (op == OP_ASSIGN_FINAL) {
+ log_token_warning(rules, "%s key takes '=' or '+=' operator, assuming '=', but please fix it.", key);
+ op = OP_ASSIGN;
+ }
+
+ r = rule_line_add_token(rule_line, TK_A_SECLABEL, op, value, attr);
+ } else if (streq(key, "RUN")) {
+ if (is_match || op == OP_REMOVE)
+ return log_token_invalid_op(rules, key);
+ check_value_format_and_warn(rules, key, value, true);
+ if (!attr || streq(attr, "program"))
+ r = rule_line_add_token(rule_line, TK_A_RUN_PROGRAM, op, value, NULL);
+ else if (streq(attr, "builtin")) {
+ UdevBuiltinCommand cmd;
+
+ cmd = udev_builtin_lookup(value);
+ if (cmd < 0)
+ return log_token_error_errno(rules, SYNTHETIC_ERRNO(EINVAL),
+ "Unknown builtin command '%s', ignoring", value);
+ r = rule_line_add_token(rule_line, TK_A_RUN_BUILTIN, op, value, UDEV_BUILTIN_CMD_TO_PTR(cmd));
+ } else
+ return log_token_invalid_attr(rules, key);
+ } else if (streq(key, "GOTO")) {
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ if (op != OP_ASSIGN)
+ return log_token_invalid_op(rules, key);
+ if (FLAGS_SET(rule_line->type, LINE_HAS_GOTO)) {
+ log_token_warning(rules, "Contains multiple GOTO keys, ignoring GOTO=\"%s\".", value);
+ return 0;
+ }
+
+ rule_line->goto_label = value;
+ SET_FLAG(rule_line->type, LINE_HAS_GOTO, true);
+ return 1;
+ } else if (streq(key, "LABEL")) {
+ if (attr)
+ return log_token_invalid_attr(rules, key);
+ if (op != OP_ASSIGN)
+ return log_token_invalid_op(rules, key);
+
+ rule_line->label = value;
+ SET_FLAG(rule_line->type, LINE_HAS_LABEL, true);
+ return 1;
+ } else
+ return log_token_error_errno(rules, SYNTHETIC_ERRNO(EINVAL), "Invalid key '%s'", key);
+ if (r < 0)
+ return log_oom();
+
+ return 1;
+}
+
+static UdevRuleOperatorType parse_operator(const char *op) {
+ assert(op);
+
+ if (startswith(op, "=="))
+ return OP_MATCH;
+ if (startswith(op, "!="))
+ return OP_NOMATCH;
+ if (startswith(op, "+="))
+ return OP_ADD;
+ if (startswith(op, "-="))
+ return OP_REMOVE;
+ if (startswith(op, "="))
+ return OP_ASSIGN;
+ if (startswith(op, ":="))
+ return OP_ASSIGN_FINAL;
+
+ return _OP_TYPE_INVALID;
+}
+
+static int parse_line(char **line, char **ret_key, char **ret_attr, UdevRuleOperatorType *ret_op, char **ret_value) {
+ char *key_begin, *key_end, *attr, *tmp;
+ UdevRuleOperatorType op;
+ int r;
+
+ assert(line);
+ assert(*line);
+ assert(ret_key);
+ assert(ret_op);
+ assert(ret_value);
+
+ key_begin = skip_leading_chars(*line, WHITESPACE ",");
+
+ if (isempty(key_begin))
+ return 0;
+
+ for (key_end = key_begin; ; key_end++) {
+ if (key_end[0] == '\0')
+ return -EINVAL;
+ if (strchr(WHITESPACE "={", key_end[0]))
+ break;
+ if (strchr("+-!:", key_end[0]) && key_end[1] == '=')
+ break;
+ }
+ if (key_end[0] == '{') {
+ attr = key_end + 1;
+ tmp = strchr(attr, '}');
+ if (!tmp)
+ return -EINVAL;
+ *tmp++ = '\0';
+ } else {
+ attr = NULL;
+ tmp = key_end;
+ }
+
+ tmp = skip_leading_chars(tmp, NULL);
+ op = parse_operator(tmp);
+ if (op < 0)
+ return -EINVAL;
+
+ key_end[0] = '\0';
+
+ tmp += op == OP_ASSIGN ? 1 : 2;
+ tmp = skip_leading_chars(tmp, NULL);
+ r = udev_rule_parse_value(tmp, ret_value, line);
+ if (r < 0)
+ return r;
+
+ *ret_key = key_begin;
+ *ret_attr = attr;
+ *ret_op = op;
+ return 1;
+}
+
+static void sort_tokens(UdevRuleLine *rule_line) {
+ UdevRuleToken *head_old;
+
+ assert(rule_line);
+
+ head_old = TAKE_PTR(rule_line->tokens);
+ rule_line->current_token = NULL;
+
+ while (!LIST_IS_EMPTY(head_old)) {
+ UdevRuleToken *t, *min_token = NULL;
+
+ LIST_FOREACH(tokens, t, head_old)
+ if (!min_token || min_token->type > t->type)
+ min_token = t;
+
+ LIST_REMOVE(tokens, head_old, min_token);
+ rule_line_append_token(rule_line, min_token);
+ }
+}
+
+static int rule_add_line(UdevRules *rules, const char *line_str, unsigned line_nr) {
+ _cleanup_(udev_rule_line_freep) UdevRuleLine *rule_line = NULL;
+ _cleanup_free_ char *line = NULL;
+ UdevRuleFile *rule_file;
+ char *p;
+ int r;
+
+ assert(rules);
+ assert(rules->current_file);
+ assert(line_str);
+
+ rule_file = rules->current_file;
+
+ if (isempty(line_str))
+ return 0;
+
+ /* We use memdup_suffix0() here, since we want to add a second NUL byte to the end, since possibly
+ * some parsers might turn this into a "nulstr", which requires an extra NUL at the end. */
+ line = memdup_suffix0(line_str, strlen(line_str) + 1);
+ if (!line)
+ return log_oom();
+
+ rule_line = new(UdevRuleLine, 1);
+ if (!rule_line)
+ return log_oom();
+
+ *rule_line = (UdevRuleLine) {
+ .line = TAKE_PTR(line),
+ .line_number = line_nr,
+ .rule_file = rule_file,
+ };
+
+ if (rule_file->current_line)
+ LIST_APPEND(rule_lines, rule_file->current_line, rule_line);
+ else
+ LIST_APPEND(rule_lines, rule_file->rule_lines, rule_line);
+
+ rule_file->current_line = rule_line;
+
+ for (p = rule_line->line; !isempty(p); ) {
+ char *key, *attr, *value;
+ UdevRuleOperatorType op;
+
+ r = parse_line(&p, &key, &attr, &op, &value);
+ if (r < 0)
+ return log_token_error_errno(rules, r, "Invalid key/value pair, ignoring.");
+ if (r == 0)
+ break;
+
+ r = parse_token(rules, key, attr, op, value);
+ if (r < 0)
+ return r;
+ }
+
+ if (rule_line->type == 0) {
+ log_token_warning(rules, "The line takes no effect, ignoring.");
+ return 0;
+ }
+
+ sort_tokens(rule_line);
+ TAKE_PTR(rule_line);
+ return 0;
+}
+
+static void rule_resolve_goto(UdevRuleFile *rule_file) {
+ UdevRuleLine *line, *line_next, *i;
+
+ assert(rule_file);
+
+ /* link GOTOs to LABEL rules in this file to be able to fast-forward */
+ LIST_FOREACH_SAFE(rule_lines, line, line_next, rule_file->rule_lines) {
+ if (!FLAGS_SET(line->type, LINE_HAS_GOTO))
+ continue;
+
+ LIST_FOREACH_AFTER(rule_lines, i, line)
+ if (streq_ptr(i->label, line->goto_label)) {
+ line->goto_line = i;
+ break;
+ }
+
+ if (!line->goto_line) {
+ log_error("%s:%u: GOTO=\"%s\" has no matching label, ignoring",
+ rule_file->filename, line->line_number, line->goto_label);
+
+ SET_FLAG(line->type, LINE_HAS_GOTO, false);
+ line->goto_label = NULL;
+
+ if ((line->type & ~LINE_HAS_LABEL) == 0) {
+ log_notice("%s:%u: The line takes no effect any more, dropping",
+ rule_file->filename, line->line_number);
+ if (line->type == LINE_HAS_LABEL)
+ udev_rule_line_clear_tokens(line);
+ else
+ udev_rule_line_free(line);
+ }
+ }
+ }
+}
+
+int udev_rules_parse_file(UdevRules *rules, const char *filename) {
+ _cleanup_free_ char *continuation = NULL, *name = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ UdevRuleFile *rule_file;
+ bool ignore_line = false;
+ unsigned line_nr = 0;
+ int r;
+
+ f = fopen(filename, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return -errno;
+ }
+
+ (void) fd_warn_permissions(filename, fileno(f));
+
+ if (null_or_empty_fd(fileno(f))) {
+ log_debug("Skipping empty file: %s", filename);
+ return 0;
+ }
+
+ log_debug("Reading rules file: %s", filename);
+
+ name = strdup(filename);
+ if (!name)
+ return log_oom();
+
+ rule_file = new(UdevRuleFile, 1);
+ if (!rule_file)
+ return log_oom();
+
+ *rule_file = (UdevRuleFile) {
+ .filename = TAKE_PTR(name),
+ };
+
+ if (rules->current_file)
+ LIST_APPEND(rule_files, rules->current_file, rule_file);
+ else
+ LIST_APPEND(rule_files, rules->rule_files, rule_file);
+
+ rules->current_file = rule_file;
+
+ for (;;) {
+ _cleanup_free_ char *buf = NULL;
+ size_t len;
+ char *line;
+
+ r = read_line(f, UTIL_LINE_SIZE, &buf);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ line_nr++;
+ line = skip_leading_chars(buf, NULL);
+
+ if (line[0] == '#')
+ continue;
+
+ len = strlen(line);
+
+ if (continuation && !ignore_line) {
+ if (strlen(continuation) + len >= UTIL_LINE_SIZE)
+ ignore_line = true;
+
+ if (!strextend(&continuation, line, NULL))
+ return log_oom();
+
+ if (!ignore_line) {
+ line = continuation;
+ len = strlen(line);
+ }
+ }
+
+ if (len > 0 && line[len - 1] == '\\') {
+ if (ignore_line)
+ continue;
+
+ line[len - 1] = '\0';
+ if (!continuation) {
+ continuation = strdup(line);
+ if (!continuation)
+ return log_oom();
+ }
+
+ continue;
+ }
+
+ if (ignore_line)
+ log_error("%s:%u: Line is too long, ignored", filename, line_nr);
+ else if (len > 0)
+ (void) rule_add_line(rules, line, line_nr);
+
+ continuation = mfree(continuation);
+ ignore_line = false;
+ }
+
+ rule_resolve_goto(rule_file);
+ return 0;
+}
+
+UdevRules* udev_rules_new(ResolveNameTiming resolve_name_timing) {
+ assert(resolve_name_timing >= 0 && resolve_name_timing < _RESOLVE_NAME_TIMING_MAX);
+
+ UdevRules *rules = new(UdevRules, 1);
+ if (!rules)
+ return NULL;
+
+ *rules = (UdevRules) {
+ .resolve_name_timing = resolve_name_timing,
+ };
+
+ return rules;
+}
+
+int udev_rules_load(UdevRules **ret_rules, ResolveNameTiming resolve_name_timing) {
+ _cleanup_(udev_rules_freep) UdevRules *rules = NULL;
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+ int r;
+
+ rules = udev_rules_new(resolve_name_timing);
+ if (!rules)
+ return -ENOMEM;
+
+ (void) udev_rules_check_timestamp(rules);
+
+ r = conf_files_list_strv(&files, ".rules", NULL, 0, RULES_DIRS);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to enumerate rules files: %m");
+
+ STRV_FOREACH(f, files) {
+ r = udev_rules_parse_file(rules, *f);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read rules file %s, ignoring: %m", *f);
+ }
+
+ *ret_rules = TAKE_PTR(rules);
+ return 0;
+}
+
+bool udev_rules_check_timestamp(UdevRules *rules) {
+ if (!rules)
+ return false;
+
+ return paths_check_timestamp(RULES_DIRS, &rules->dirs_ts_usec, true);
+}
+
+static bool token_match_string(UdevRuleToken *token, const char *str) {
+ const char *i, *value;
+ bool match = false;
+
+ assert(token);
+ assert(token->value);
+ assert(token->type < _TK_M_MAX);
+
+ str = strempty(str);
+ value = token->value;
+
+ switch (token->match_type) {
+ case MATCH_TYPE_EMPTY:
+ match = isempty(str);
+ break;
+ case MATCH_TYPE_SUBSYSTEM:
+ match = STR_IN_SET(str, "subsystem", "class", "bus");
+ break;
+ case MATCH_TYPE_PLAIN_WITH_EMPTY:
+ if (isempty(str)) {
+ match = true;
+ break;
+ }
+ _fallthrough_;
+ case MATCH_TYPE_PLAIN:
+ NULSTR_FOREACH(i, value)
+ if (streq(i, str)) {
+ match = true;
+ break;
+ }
+ break;
+ case MATCH_TYPE_GLOB_WITH_EMPTY:
+ if (isempty(str)) {
+ match = true;
+ break;
+ }
+ _fallthrough_;
+ case MATCH_TYPE_GLOB:
+ NULSTR_FOREACH(i, value)
+ if ((fnmatch(i, str, 0) == 0)) {
+ match = true;
+ break;
+ }
+ break;
+ default:
+ assert_not_reached("Invalid match type");
+ }
+
+ return token->op == (match ? OP_MATCH : OP_NOMATCH);
+}
+
+static bool token_match_attr(UdevRuleToken *token, sd_device *dev, UdevEvent *event) {
+ char nbuf[UTIL_NAME_SIZE], vbuf[UTIL_NAME_SIZE];
+ const char *name, *value;
+
+ assert(token);
+ assert(dev);
+ assert(event);
+
+ name = token->data;
+
+ switch (token->attr_subst_type) {
+ case SUBST_TYPE_FORMAT:
+ (void) udev_event_apply_format(event, name, nbuf, sizeof(nbuf), false);
+ name = nbuf;
+ _fallthrough_;
+ case SUBST_TYPE_PLAIN:
+ if (sd_device_get_sysattr_value(dev, name, &value) < 0)
+ return false;
+ break;
+ case SUBST_TYPE_SUBSYS:
+ if (util_resolve_subsys_kernel(name, vbuf, sizeof(vbuf), true) < 0)
+ return false;
+ value = vbuf;
+ break;
+ default:
+ assert_not_reached("Invalid attribute substitution type");
+ }
+
+ /* remove trailing whitespace, if not asked to match for it */
+ if (token->attr_match_remove_trailing_whitespace) {
+ if (value != vbuf) {
+ strscpy(vbuf, sizeof(vbuf), value);
+ value = vbuf;
+ }
+
+ delete_trailing_chars(vbuf, NULL);
+ }
+
+ return token_match_string(token, value);
+}
+
+static int get_property_from_string(char *line, char **ret_key, char **ret_value) {
+ char *key, *val;
+ size_t len;
+
+ assert(line);
+ assert(ret_key);
+ assert(ret_value);
+
+ /* find key */
+ key = skip_leading_chars(line, NULL);
+
+ /* comment or empty line */
+ if (IN_SET(key[0], '#', '\0')) {
+ *ret_key = *ret_value = NULL;
+ return 0;
+ }
+
+ /* split key/value */
+ val = strchr(key, '=');
+ if (!val)
+ return -EINVAL;
+ *val++ = '\0';
+
+ key = strstrip(key);
+ if (isempty(key))
+ return -EINVAL;
+
+ val = strstrip(val);
+ if (isempty(val))
+ return -EINVAL;
+
+ /* unquote */
+ if (IN_SET(val[0], '"', '\'')) {
+ len = strlen(val);
+ if (len == 1 || val[len-1] != val[0])
+ return -EINVAL;
+ val[len-1] = '\0';
+ val++;
+ }
+
+ *ret_key = key;
+ *ret_value = val;
+ return 1;
+}
+
+static int import_parent_into_properties(sd_device *dev, const char *filter) {
+ const char *key, *val;
+ sd_device *parent;
+ int r;
+
+ assert(dev);
+ assert(filter);
+
+ r = sd_device_get_parent(dev, &parent);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE_PROPERTY(parent, key, val) {
+ if (fnmatch(filter, key, 0) != 0)
+ continue;
+ r = device_add_property(dev, key, val);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+static int attr_subst_subdir(char attr[static UTIL_PATH_SIZE]) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *dent;
+ char buf[UTIL_PATH_SIZE], *p;
+ const char *tail;
+ size_t len, size;
+
+ assert(attr);
+
+ tail = strstr(attr, "/*/");
+ if (!tail)
+ return 0;
+
+ len = tail - attr + 1; /* include slash at the end */
+ tail += 2; /* include slash at the beginning */
+
+ p = buf;
+ size = sizeof(buf);
+ size -= strnpcpy(&p, size, attr, len);
+
+ dir = opendir(buf);
+ if (!dir)
+ return -errno;
+
+ FOREACH_DIRENT_ALL(dent, dir, break) {
+ if (dent->d_name[0] == '.')
+ continue;
+
+ strscpyl(p, size, dent->d_name, tail, NULL);
+ if (faccessat(dirfd(dir), p, F_OK, 0) < 0)
+ continue;
+
+ strcpy(attr, buf);
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static int udev_rule_apply_token_to_event(
+ UdevRules *rules,
+ sd_device *dev,
+ UdevEvent *event,
+ usec_t timeout_usec,
+ int timeout_signal,
+ Hashmap *properties_list) {
+
+ UdevRuleToken *token;
+ char buf[UTIL_PATH_SIZE];
+ const char *val;
+ size_t count;
+ bool match;
+ int r;
+
+ assert(rules);
+ assert(dev);
+ assert(event);
+
+ /* This returns the following values:
+ * 0 on the current token does not match the event,
+ * 1 on the current token matches the event, and
+ * negative errno on some critical errors. */
+
+ token = rules->current_file->current_line->current_token;
+
+ switch (token->type) {
+ case TK_M_ACTION: {
+ DeviceAction a;
+
+ r = device_get_action(dev, &a);
+ if (r < 0)
+ return log_rule_error_errno(dev, rules, r, "Failed to get uevent action type: %m");
+
+ return token_match_string(token, device_action_to_string(a));
+ }
+ case TK_M_DEVPATH:
+ r = sd_device_get_devpath(dev, &val);
+ if (r < 0)
+ return log_rule_error_errno(dev, rules, r, "Failed to get devpath: %m");
+
+ return token_match_string(token, val);
+ case TK_M_KERNEL:
+ case TK_M_PARENTS_KERNEL:
+ r = sd_device_get_sysname(dev, &val);
+ if (r < 0)
+ return log_rule_error_errno(dev, rules, r, "Failed to get sysname: %m");
+
+ return token_match_string(token, val);
+ case TK_M_DEVLINK:
+ FOREACH_DEVICE_DEVLINK(dev, val)
+ if (token_match_string(token, strempty(startswith(val, "/dev/"))))
+ return token->op == OP_MATCH;
+ return token->op == OP_NOMATCH;
+ case TK_M_NAME:
+ return token_match_string(token, event->name);
+ case TK_M_ENV:
+ if (sd_device_get_property_value(dev, token->data, &val) < 0)
+ val = hashmap_get(properties_list, token->data);
+
+ return token_match_string(token, val);
+ case TK_M_CONST: {
+ const char *k = token->data;
+
+ if (streq(k, "arch"))
+ val = architecture_to_string(uname_architecture());
+ else if (streq(k, "virt"))
+ val = virtualization_to_string(detect_virtualization());
+ else
+ assert_not_reached("Invalid CONST key");
+ return token_match_string(token, val);
+ }
+ case TK_M_TAG:
+ case TK_M_PARENTS_TAG:
+ FOREACH_DEVICE_TAG(dev, val)
+ if (token_match_string(token, val))
+ return token->op == OP_MATCH;
+ return token->op == OP_NOMATCH;
+ case TK_M_SUBSYSTEM:
+ case TK_M_PARENTS_SUBSYSTEM:
+ r = sd_device_get_subsystem(dev, &val);
+ if (r == -ENOENT)
+ val = NULL;
+ else if (r < 0)
+ return log_rule_error_errno(dev, rules, r, "Failed to get subsystem: %m");
+
+ return token_match_string(token, val);
+ case TK_M_DRIVER:
+ case TK_M_PARENTS_DRIVER:
+ r = sd_device_get_driver(dev, &val);
+ if (r == -ENOENT)
+ val = NULL;
+ else if (r < 0)
+ return log_rule_error_errno(dev, rules, r, "Failed to get driver: %m");
+
+ return token_match_string(token, val);
+ case TK_M_ATTR:
+ case TK_M_PARENTS_ATTR:
+ return token_match_attr(token, dev, event);
+ case TK_M_SYSCTL: {
+ _cleanup_free_ char *value = NULL;
+
+ (void) udev_event_apply_format(event, token->data, buf, sizeof(buf), false);
+ r = sysctl_read(sysctl_normalize(buf), &value);
+ if (r < 0 && r != -ENOENT)
+ return log_rule_error_errno(dev, rules, r, "Failed to read sysctl '%s': %m", buf);
+
+ return token_match_string(token, strstrip(value));
+ }
+ case TK_M_TEST: {
+ mode_t mode = PTR_TO_MODE(token->data);
+ struct stat statbuf;
+
+ (void) udev_event_apply_format(event, token->value, buf, sizeof(buf), false);
+ if (!path_is_absolute(buf) &&
+ util_resolve_subsys_kernel(buf, buf, sizeof(buf), false) < 0) {
+ char tmp[UTIL_PATH_SIZE];
+
+ r = sd_device_get_syspath(dev, &val);
+ if (r < 0)
+ return log_rule_error_errno(dev, rules, r, "Failed to get syspath: %m");
+
+ strscpy(tmp, sizeof(tmp), buf);
+ strscpyl(buf, sizeof(buf), val, "/", tmp, NULL);
+ }
+
+ r = attr_subst_subdir(buf);
+ if (r == -ENOENT)
+ return token->op == OP_NOMATCH;
+ if (r < 0)
+ return log_rule_error_errno(dev, rules, r, "Failed to test for the existence of '%s': %m", buf);
+
+ if (stat(buf, &statbuf) < 0)
+ return token->op == OP_NOMATCH;
+
+ if (mode == MODE_INVALID)
+ return token->op == OP_MATCH;
+
+ match = (statbuf.st_mode & mode) > 0;
+ return token->op == (match ? OP_MATCH : OP_NOMATCH);
+ }
+ case TK_M_PROGRAM: {
+ char result[UTIL_LINE_SIZE];
+
+ event->program_result = mfree(event->program_result);
+ (void) udev_event_apply_format(event, token->value, buf, sizeof(buf), false);
+ log_rule_debug(dev, rules, "Running PROGRAM '%s'", buf);
+
+ r = udev_event_spawn(event, timeout_usec, timeout_signal, true, buf, result, sizeof(result));
+ if (r != 0) {
+ if (r < 0)
+ log_rule_warning_errno(dev, rules, r, "Failed to execute \"%s\": %m", buf);
+ else /* returned value is positive when program fails */
+ log_rule_debug(dev, rules, "Command \"%s\" returned %d (error)", buf, r);
+ return token->op == OP_NOMATCH;
+ }
+
+ delete_trailing_chars(result, "\n");
+ count = util_replace_chars(result, UDEV_ALLOWED_CHARS_INPUT);
+ if (count > 0)
+ log_rule_debug(dev, rules, "Replaced %zu character(s) in result of \"%s\"",
+ count, buf);
+
+ event->program_result = strdup(result);
+ return token->op == OP_MATCH;
+ }
+ case TK_M_IMPORT_FILE: {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ (void) udev_event_apply_format(event, token->value, buf, sizeof(buf), false);
+ log_rule_debug(dev, rules, "Importing properties from '%s'", buf);
+
+ f = fopen(buf, "re");
+ if (!f) {
+ if (errno != ENOENT)
+ return log_rule_error_errno(dev, rules, errno,
+ "Failed to open '%s': %m", buf);
+ return token->op == OP_NOMATCH;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *key, *value;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0) {
+ log_rule_debug_errno(dev, rules, r,
+ "Failed to read '%s', ignoring: %m", buf);
+ return token->op == OP_NOMATCH;
+ }
+ if (r == 0)
+ break;
+
+ r = get_property_from_string(line, &key, &value);
+ if (r < 0) {
+ log_rule_debug_errno(dev, rules, r,
+ "Failed to parse key and value from '%s', ignoring: %m",
+ line);
+ continue;
+ }
+ if (r == 0)
+ continue;
+
+ r = device_add_property(dev, key, value);
+ if (r < 0)
+ return log_rule_error_errno(dev, rules, r,
+ "Failed to add property %s=%s: %m",
+ key, value);
+ }
+
+ return token->op == OP_MATCH;
+ }
+ case TK_M_IMPORT_PROGRAM: {
+ char result[UTIL_LINE_SIZE], *line, *pos;
+
+ (void) udev_event_apply_format(event, token->value, buf, sizeof(buf), false);
+ log_rule_debug(dev, rules, "Importing properties from results of '%s'", buf);
+
+ r = udev_event_spawn(event, timeout_usec, timeout_signal, true, buf, result, sizeof result);
+ if (r != 0) {
+ if (r < 0)
+ log_rule_warning_errno(dev, rules, r, "Failed to execute '%s', ignoring: %m", buf);
+ else /* returned value is positive when program fails */
+ log_rule_debug(dev, rules, "Command \"%s\" returned %d (error), ignoring", buf, r);
+ return token->op == OP_NOMATCH;
+ }
+
+ for (line = result; !isempty(line); line = pos) {
+ char *key, *value;
+
+ pos = strchr(line, '\n');
+ if (pos)
+ *pos++ = '\0';
+
+ r = get_property_from_string(line, &key, &value);
+ if (r < 0) {
+ log_rule_debug_errno(dev, rules, r,
+ "Failed to parse key and value from '%s', ignoring: %m",
+ line);
+ continue;
+ }
+ if (r == 0)
+ continue;
+
+ r = device_add_property(dev, key, value);
+ if (r < 0)
+ return log_rule_error_errno(dev, rules, r,
+ "Failed to add property %s=%s: %m",
+ key, value);
+ }
+
+ return token->op == OP_MATCH;
+ }
+ case TK_M_IMPORT_BUILTIN: {
+ UdevBuiltinCommand cmd = PTR_TO_UDEV_BUILTIN_CMD(token->data);
+ unsigned mask = 1U << (int) cmd;
+
+ if (udev_builtin_run_once(cmd)) {
+ /* check if we ran already */
+ if (event->builtin_run & mask) {
+ log_rule_debug(dev, rules, "Skipping builtin '%s' in IMPORT key",
+ udev_builtin_name(cmd));
+ /* return the result from earlier run */
+ return token->op == (event->builtin_ret & mask ? OP_NOMATCH : OP_MATCH);
+ }
+ /* mark as ran */
+ event->builtin_run |= mask;
+ }
+
+ (void) udev_event_apply_format(event, token->value, buf, sizeof(buf), false);
+ log_rule_debug(dev, rules, "Importing properties from results of builtin command '%s'", buf);
+
+ r = udev_builtin_run(dev, cmd, buf, false);
+ if (r < 0) {
+ /* remember failure */
+ log_rule_debug_errno(dev, rules, r, "Failed to run builtin '%s': %m", buf);
+ event->builtin_ret |= mask;
+ }
+ return token->op == (r >= 0 ? OP_MATCH : OP_NOMATCH);
+ }
+ case TK_M_IMPORT_DB: {
+ if (!event->dev_db_clone)
+ return token->op == OP_NOMATCH;
+ r = sd_device_get_property_value(event->dev_db_clone, token->value, &val);
+ if (r == -ENOENT)
+ return token->op == OP_NOMATCH;
+ if (r < 0)
+ return log_rule_error_errno(dev, rules, r,
+ "Failed to get property '%s' from database: %m",
+ token->value);
+
+ r = device_add_property(dev, token->value, val);
+ if (r < 0)
+ return log_rule_error_errno(dev, rules, r, "Failed to add property '%s=%s': %m",
+ token->value, val);
+ return token->op == OP_MATCH;
+ }
+ case TK_M_IMPORT_CMDLINE: {
+ _cleanup_free_ char *value = NULL;
+
+ r = proc_cmdline_get_key(token->value, PROC_CMDLINE_VALUE_OPTIONAL|PROC_CMDLINE_IGNORE_EFI_OPTIONS, &value);
+ if (r < 0)
+ return log_rule_error_errno(dev, rules, r,
+ "Failed to read '%s' option from /proc/cmdline: %m",
+ token->value);
+ if (r == 0)
+ return token->op == OP_NOMATCH;
+
+ r = device_add_property(dev, token->value, value ?: "1");
+ if (r < 0)
+ return log_rule_error_errno(dev, rules, r, "Failed to add property '%s=%s': %m",
+ token->value, value ?: "1");
+ return token->op == OP_MATCH;
+ }
+ case TK_M_IMPORT_PARENT: {
+ (void) udev_event_apply_format(event, token->value, buf, sizeof(buf), false);
+ r = import_parent_into_properties(dev, buf);
+ if (r < 0)
+ return log_rule_error_errno(dev, rules, r,
+ "Failed to import properties '%s' from parent: %m",
+ buf);
+ return token->op == (r > 0 ? OP_MATCH : OP_NOMATCH);
+ }
+ case TK_M_RESULT:
+ return token_match_string(token, event->program_result);
+ case TK_A_OPTIONS_STRING_ESCAPE_NONE:
+ event->esc = ESCAPE_NONE;
+ break;
+ case TK_A_OPTIONS_STRING_ESCAPE_REPLACE:
+ event->esc = ESCAPE_REPLACE;
+ break;
+ case TK_A_OPTIONS_DB_PERSIST:
+ device_set_db_persist(dev);
+ break;
+ case TK_A_OPTIONS_INOTIFY_WATCH:
+ if (event->inotify_watch_final)
+ break;
+ if (token->op == OP_ASSIGN_FINAL)
+ event->inotify_watch_final = true;
+
+ event->inotify_watch = token->data;
+ break;
+ case TK_A_OPTIONS_DEVLINK_PRIORITY:
+ device_set_devlink_priority(dev, PTR_TO_INT(token->data));
+ break;
+ case TK_A_OWNER: {
+ char owner[UTIL_NAME_SIZE];
+ const char *ow = owner;
+
+ if (event->owner_final)
+ break;
+ if (token->op == OP_ASSIGN_FINAL)
+ event->owner_final = true;
+
+ (void) udev_event_apply_format(event, token->value, owner, sizeof(owner), false);
+ r = get_user_creds(&ow, &event->uid, NULL, NULL, NULL, USER_CREDS_ALLOW_MISSING);
+ if (r < 0)
+ log_unknown_owner(dev, rules, r, "user", owner);
+ else
+ log_rule_debug(dev, rules, "OWNER %s(%u)", owner, event->uid);
+ break;
+ }
+ case TK_A_GROUP: {
+ char group[UTIL_NAME_SIZE];
+ const char *gr = group;
+
+ if (event->group_final)
+ break;
+ if (token->op == OP_ASSIGN_FINAL)
+ event->group_final = true;
+
+ (void) udev_event_apply_format(event, token->value, group, sizeof(group), false);
+ r = get_group_creds(&gr, &event->gid, USER_CREDS_ALLOW_MISSING);
+ if (r < 0)
+ log_unknown_owner(dev, rules, r, "group", group);
+ else
+ log_rule_debug(dev, rules, "GROUP %s(%u)", group, event->gid);
+ break;
+ }
+ case TK_A_MODE: {
+ char mode_str[UTIL_NAME_SIZE];
+
+ if (event->mode_final)
+ break;
+ if (token->op == OP_ASSIGN_FINAL)
+ event->mode_final = true;
+
+ (void) udev_event_apply_format(event, token->value, mode_str, sizeof(mode_str), false);
+ r = parse_mode(mode_str, &event->mode);
+ if (r < 0)
+ log_rule_error_errno(dev, rules, r, "Failed to parse mode '%s', ignoring: %m", mode_str);
+ else
+ log_rule_debug(dev, rules, "MODE %#o", event->mode);
+ break;
+ }
+ case TK_A_OWNER_ID:
+ if (event->owner_final)
+ break;
+ if (token->op == OP_ASSIGN_FINAL)
+ event->owner_final = true;
+ if (!token->data)
+ break;
+ event->uid = PTR_TO_UID(token->data);
+ log_rule_debug(dev, rules, "OWNER %u", event->uid);
+ break;
+ case TK_A_GROUP_ID:
+ if (event->group_final)
+ break;
+ if (token->op == OP_ASSIGN_FINAL)
+ event->group_final = true;
+ if (!token->data)
+ break;
+ event->gid = PTR_TO_GID(token->data);
+ log_rule_debug(dev, rules, "GROUP %u", event->gid);
+ break;
+ case TK_A_MODE_ID:
+ if (event->mode_final)
+ break;
+ if (token->op == OP_ASSIGN_FINAL)
+ event->mode_final = true;
+ if (!token->data)
+ break;
+ event->mode = PTR_TO_MODE(token->data);
+ log_rule_debug(dev, rules, "MODE %#o", event->mode);
+ break;
+ case TK_A_SECLABEL: {
+ _cleanup_free_ char *name = NULL, *label = NULL;
+ char label_str[UTIL_LINE_SIZE] = {};
+
+ name = strdup(token->data);
+ if (!name)
+ return log_oom();
+
+ (void) udev_event_apply_format(event, token->value, label_str, sizeof(label_str), false);
+ if (!isempty(label_str))
+ label = strdup(label_str);
+ else
+ label = strdup(token->value);
+ if (!label)
+ return log_oom();
+
+ if (token->op == OP_ASSIGN)
+ ordered_hashmap_clear_free_free(event->seclabel_list);
+
+ r = ordered_hashmap_ensure_allocated(&event->seclabel_list, NULL);
+ if (r < 0)
+ return log_oom();
+
+ r = ordered_hashmap_put(event->seclabel_list, name, label);
+ if (r < 0)
+ return log_oom();
+ log_rule_debug(dev, rules, "SECLABEL{%s}='%s'", name, label);
+ name = label = NULL;
+ break;
+ }
+ case TK_A_ENV: {
+ const char *name = token->data;
+ char value_new[UTIL_NAME_SIZE], *p = value_new;
+ size_t l = sizeof(value_new);
+
+ if (isempty(token->value)) {
+ if (token->op == OP_ADD)
+ break;
+ r = device_add_property(dev, name, NULL);
+ if (r < 0)
+ return log_rule_error_errno(dev, rules, r, "Failed to remove property '%s': %m", name);
+ break;
+ }
+
+ if (token->op == OP_ADD &&
+ sd_device_get_property_value(dev, name, &val) >= 0)
+ l = strpcpyl(&p, l, val, " ", NULL);
+
+ (void) udev_event_apply_format(event, token->value, p, l, false);
+
+ r = device_add_property(dev, name, value_new);
+ if (r < 0)
+ return log_rule_error_errno(dev, rules, r, "Failed to add property '%s=%s': %m", name, value_new);
+ break;
+ }
+ case TK_A_TAG: {
+ (void) udev_event_apply_format(event, token->value, buf, sizeof(buf), false);
+ if (token->op == OP_ASSIGN)
+ device_cleanup_tags(dev);
+
+ if (buf[strspn(buf, ALPHANUMERICAL "-_")] != '\0') {
+ log_rule_error(dev, rules, "Invalid tag name '%s', ignoring", buf);
+ break;
+ }
+ if (token->op == OP_REMOVE)
+ device_remove_tag(dev, buf);
+ else {
+ r = device_add_tag(dev, buf, true);
+ if (r < 0)
+ return log_rule_error_errno(dev, rules, r, "Failed to add tag '%s': %m", buf);
+ }
+ break;
+ }
+ case TK_A_NAME: {
+ if (event->name_final)
+ break;
+ if (token->op == OP_ASSIGN_FINAL)
+ event->name_final = true;
+
+ (void) udev_event_apply_format(event, token->value, buf, sizeof(buf), false);
+ if (IN_SET(event->esc, ESCAPE_UNSET, ESCAPE_REPLACE)) {
+ count = util_replace_chars(buf, "/");
+ if (count > 0)
+ log_rule_debug(dev, rules, "Replaced %zu character(s) from result of NAME=\"%s\"",
+ count, token->value);
+ }
+ if (sd_device_get_devnum(dev, NULL) >= 0 &&
+ (sd_device_get_devname(dev, &val) < 0 ||
+ !streq_ptr(buf, path_startswith(val, "/dev/")))) {
+ log_rule_error(dev, rules,
+ "Kernel device nodes cannot be renamed, ignoring NAME=\"%s\"; please fix it.",
+ token->value);
+ break;
+ }
+ if (free_and_strdup(&event->name, buf) < 0)
+ return log_oom();
+
+ log_rule_debug(dev, rules, "NAME '%s'", event->name);
+ break;
+ }
+ case TK_A_DEVLINK: {
+ char *p;
+
+ if (event->devlink_final)
+ break;
+ if (sd_device_get_devnum(dev, NULL) < 0)
+ break;
+ if (token->op == OP_ASSIGN_FINAL)
+ event->devlink_final = true;
+ if (IN_SET(token->op, OP_ASSIGN, OP_ASSIGN_FINAL))
+ device_cleanup_devlinks(dev);
+
+ /* allow multiple symlinks separated by spaces */
+ (void) udev_event_apply_format(event, token->value, buf, sizeof(buf), event->esc != ESCAPE_NONE);
+ if (event->esc == ESCAPE_UNSET)
+ count = util_replace_chars(buf, "/ ");
+ else if (event->esc == ESCAPE_REPLACE)
+ count = util_replace_chars(buf, "/");
+ else
+ count = 0;
+ if (count > 0)
+ log_rule_debug(dev, rules, "Replaced %zu character(s) from result of LINK", count);
+
+ p = skip_leading_chars(buf, NULL);
+ while (!isempty(p)) {
+ char filename[UTIL_PATH_SIZE], *next;
+
+ next = strchr(p, ' ');
+ if (next) {
+ *next++ = '\0';
+ next = skip_leading_chars(next, NULL);
+ }
+
+ strscpyl(filename, sizeof(filename), "/dev/", p, NULL);
+ r = device_add_devlink(dev, filename);
+ if (r < 0)
+ return log_rule_error_errno(dev, rules, r, "Failed to add devlink '%s': %m", filename);
+
+ log_rule_debug(dev, rules, "LINK '%s'", p);
+ p = next;
+ }
+ break;
+ }
+ case TK_A_ATTR: {
+ const char *key_name = token->data;
+ char value[UTIL_NAME_SIZE];
+
+ if (util_resolve_subsys_kernel(key_name, buf, sizeof(buf), false) < 0 &&
+ sd_device_get_syspath(dev, &val) >= 0)
+ strscpyl(buf, sizeof(buf), val, "/", key_name, NULL);
+
+ r = attr_subst_subdir(buf);
+ if (r < 0) {
+ log_rule_error_errno(dev, rules, r, "Could not find file matches '%s', ignoring: %m", buf);
+ break;
+ }
+ (void) udev_event_apply_format(event, token->value, value, sizeof(value), false);
+
+ log_rule_debug(dev, rules, "ATTR '%s' writing '%s'", buf, value);
+ r = write_string_file(buf, value, WRITE_STRING_FILE_VERIFY_ON_FAILURE | WRITE_STRING_FILE_DISABLE_BUFFER | WRITE_STRING_FILE_AVOID_NEWLINE);
+ if (r < 0)
+ log_rule_error_errno(dev, rules, r, "Failed to write ATTR{%s}, ignoring: %m", buf);
+ break;
+ }
+ case TK_A_SYSCTL: {
+ char value[UTIL_NAME_SIZE];
+
+ (void) udev_event_apply_format(event, token->data, buf, sizeof(buf), false);
+ (void) udev_event_apply_format(event, token->value, value, sizeof(value), false);
+ sysctl_normalize(buf);
+ log_rule_debug(dev, rules, "SYSCTL '%s' writing '%s'", buf, value);
+ r = sysctl_write(buf, value);
+ if (r < 0)
+ log_rule_error_errno(dev, rules, r, "Failed to write SYSCTL{%s}='%s', ignoring: %m", buf, value);
+ break;
+ }
+ case TK_A_RUN_BUILTIN:
+ case TK_A_RUN_PROGRAM: {
+ _cleanup_free_ char *cmd = NULL;
+
+ if (event->run_final)
+ break;
+ if (token->op == OP_ASSIGN_FINAL)
+ event->run_final = true;
+
+ if (IN_SET(token->op, OP_ASSIGN, OP_ASSIGN_FINAL))
+ ordered_hashmap_clear_free_key(event->run_list);
+
+ r = ordered_hashmap_ensure_allocated(&event->run_list, NULL);
+ if (r < 0)
+ return log_oom();
+
+ (void) udev_event_apply_format(event, token->value, buf, sizeof(buf), false);
+
+ cmd = strdup(buf);
+ if (!cmd)
+ return log_oom();
+
+ r = ordered_hashmap_put(event->run_list, cmd, token->data);
+ if (r < 0)
+ return log_oom();
+
+ TAKE_PTR(cmd);
+
+ log_rule_debug(dev, rules, "RUN '%s'", token->value);
+ break;
+ }
+ case TK_A_OPTIONS_STATIC_NODE:
+ /* do nothing for events. */
+ break;
+ default:
+ assert_not_reached("Invalid token type");
+ }
+
+ return true;
+}
+
+static bool token_is_for_parents(UdevRuleToken *token) {
+ return token->type >= TK_M_PARENTS_KERNEL && token->type <= TK_M_PARENTS_TAG;
+}
+
+static int udev_rule_apply_parent_token_to_event(
+ UdevRules *rules,
+ UdevEvent *event,
+ int timeout_signal) {
+
+ UdevRuleLine *line;
+ UdevRuleToken *head;
+ int r;
+
+ line = rules->current_file->current_line;
+ head = rules->current_file->current_line->current_token;
+ event->dev_parent = event->dev;
+ for (;;) {
+ LIST_FOREACH(tokens, line->current_token, head) {
+ if (!token_is_for_parents(line->current_token))
+ return true; /* All parent tokens match. */
+ r = udev_rule_apply_token_to_event(rules, event->dev_parent, event, 0, timeout_signal, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+ if (!line->current_token)
+ /* All parent tokens match. But no assign tokens in the line. Hmm... */
+ return true;
+
+ if (sd_device_get_parent(event->dev_parent, &event->dev_parent) < 0) {
+ event->dev_parent = NULL;
+ return false;
+ }
+ }
+}
+
+static int udev_rule_apply_line_to_event(
+ UdevRules *rules,
+ UdevEvent *event,
+ usec_t timeout_usec,
+ int timeout_signal,
+ Hashmap *properties_list,
+ UdevRuleLine **next_line) {
+
+ UdevRuleLine *line = rules->current_file->current_line;
+ UdevRuleLineType mask = LINE_HAS_GOTO | LINE_UPDATE_SOMETHING;
+ UdevRuleToken *token, *next_token;
+ bool parents_done = false;
+ DeviceAction action;
+ int r;
+
+ r = device_get_action(event->dev, &action);
+ if (r < 0)
+ return r;
+
+ if (action != DEVICE_ACTION_REMOVE) {
+ if (sd_device_get_devnum(event->dev, NULL) >= 0)
+ mask |= LINE_HAS_DEVLINK;
+
+ if (sd_device_get_ifindex(event->dev, NULL) >= 0)
+ mask |= LINE_HAS_NAME;
+ }
+
+ if ((line->type & mask) == 0)
+ return 0;
+
+ event->esc = ESCAPE_UNSET;
+ LIST_FOREACH_SAFE(tokens, token, next_token, line->tokens) {
+ line->current_token = token;
+
+ if (token_is_for_parents(token)) {
+ if (parents_done)
+ continue;
+
+ r = udev_rule_apply_parent_token_to_event(rules, event, timeout_signal);
+ if (r <= 0)
+ return r;
+
+ parents_done = true;
+ continue;
+ }
+
+ r = udev_rule_apply_token_to_event(rules, event->dev, event, timeout_usec, timeout_signal, properties_list);
+ if (r <= 0)
+ return r;
+ }
+
+ if (line->goto_line)
+ *next_line = line->goto_line;
+
+ return 0;
+}
+
+int udev_rules_apply_to_event(
+ UdevRules *rules,
+ UdevEvent *event,
+ usec_t timeout_usec,
+ int timeout_signal,
+ Hashmap *properties_list) {
+
+ UdevRuleFile *file;
+ UdevRuleLine *next_line;
+ int r;
+
+ assert(rules);
+ assert(event);
+
+ LIST_FOREACH(rule_files, file, rules->rule_files) {
+ rules->current_file = file;
+ LIST_FOREACH_SAFE(rule_lines, file->current_line, next_line, file->rule_lines) {
+ r = udev_rule_apply_line_to_event(rules, event, timeout_usec, timeout_signal, properties_list, &next_line);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int apply_static_dev_perms(const char *devnode, uid_t uid, gid_t gid, mode_t mode, char **tags) {
+ char device_node[UTIL_PATH_SIZE], tags_dir[UTIL_PATH_SIZE], tag_symlink[UTIL_PATH_SIZE];
+ _cleanup_free_ char *unescaped_filename = NULL;
+ struct stat stats;
+ char **t;
+ int r;
+
+ assert(devnode);
+
+ if (uid == UID_INVALID && gid == GID_INVALID && mode == MODE_INVALID && !tags)
+ return 0;
+
+ strscpyl(device_node, sizeof(device_node), "/dev/", devnode, NULL);
+ if (stat(device_node, &stats) < 0) {
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to stat %s: %m", device_node);
+ return 0;
+ }
+
+ if (!S_ISBLK(stats.st_mode) && !S_ISCHR(stats.st_mode)) {
+ log_warning("%s is neither block nor character device, ignoring.", device_node);
+ return 0;
+ }
+
+ if (!strv_isempty(tags)) {
+ unescaped_filename = xescape(devnode, "/.");
+ if (!unescaped_filename)
+ return log_oom();
+ }
+
+ /* export the tags to a directory as symlinks, allowing otherwise dead nodes to be tagged */
+ STRV_FOREACH(t, tags) {
+ strscpyl(tags_dir, sizeof(tags_dir), "/run/udev/static_node-tags/", *t, "/", NULL);
+ r = mkdir_p(tags_dir, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create %s: %m", tags_dir);
+
+ strscpyl(tag_symlink, sizeof(tag_symlink), tags_dir, unescaped_filename, NULL);
+ r = symlink(device_node, tag_symlink);
+ if (r < 0 && errno != EEXIST)
+ return log_error_errno(errno, "Failed to create symlink %s -> %s: %m",
+ tag_symlink, device_node);
+ }
+
+ /* don't touch the permissions if only the tags were set */
+ if (uid == UID_INVALID && gid == GID_INVALID && mode == MODE_INVALID)
+ return 0;
+
+ if (mode == MODE_INVALID)
+ mode = gid_is_valid(gid) ? 0660 : 0600;
+ if (!uid_is_valid(uid))
+ uid = 0;
+ if (!gid_is_valid(gid))
+ gid = 0;
+
+ r = chmod_and_chown(device_node, mode, uid, gid);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to chown '%s' %u %u: %m", device_node, uid, gid);
+ else
+ log_debug("chown '%s' %u:%u with mode %#o", device_node, uid, gid, mode);
+
+ (void) utimensat(AT_FDCWD, device_node, NULL, 0);
+ return 0;
+}
+
+static int udev_rule_line_apply_static_dev_perms(UdevRuleLine *rule_line) {
+ UdevRuleToken *token;
+ _cleanup_strv_free_ char **tags = NULL;
+ uid_t uid = UID_INVALID;
+ gid_t gid = GID_INVALID;
+ mode_t mode = MODE_INVALID;
+ int r;
+
+ assert(rule_line);
+
+ if (!FLAGS_SET(rule_line->type, LINE_HAS_STATIC_NODE))
+ return 0;
+
+ LIST_FOREACH(tokens, token, rule_line->tokens)
+ if (token->type == TK_A_OWNER_ID)
+ uid = PTR_TO_UID(token->data);
+ else if (token->type == TK_A_GROUP_ID)
+ gid = PTR_TO_GID(token->data);
+ else if (token->type == TK_A_MODE_ID)
+ mode = PTR_TO_MODE(token->data);
+ else if (token->type == TK_A_TAG) {
+ r = strv_extend(&tags, token->value);
+ if (r < 0)
+ return log_oom();
+ } else if (token->type == TK_A_OPTIONS_STATIC_NODE) {
+ r = apply_static_dev_perms(token->value, uid, gid, mode, tags);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int udev_rules_apply_static_dev_perms(UdevRules *rules) {
+ UdevRuleFile *file;
+ UdevRuleLine *line;
+ int r;
+
+ assert(rules);
+
+ LIST_FOREACH(rule_files, file, rules->rule_files)
+ LIST_FOREACH(rule_lines, line, file->rule_lines) {
+ r = udev_rule_line_apply_static_dev_perms(line);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/udev/udev-rules.h b/src/udev/udev-rules.h
new file mode 100644
index 0000000..3f40a53
--- /dev/null
+++ b/src/udev/udev-rules.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#pragma once
+
+#include "hashmap.h"
+#include "time-util.h"
+#include "udev-util.h"
+
+typedef struct UdevRules UdevRules;
+typedef struct UdevEvent UdevEvent;
+
+typedef enum {
+ ESCAPE_UNSET,
+ ESCAPE_NONE, /* OPTIONS="string_escape=none" */
+ ESCAPE_REPLACE, /* OPTIONS="string_escape=replace" */
+ _ESCAPE_TYPE_MAX,
+ _ESCAPE_TYPE_INVALID = -1
+} UdevRuleEscapeType;
+
+int udev_rules_parse_file(UdevRules *rules, const char *filename);
+UdevRules* udev_rules_new(ResolveNameTiming resolve_name_timing);
+int udev_rules_load(UdevRules **ret_rules, ResolveNameTiming resolve_name_timing);
+UdevRules *udev_rules_free(UdevRules *rules);
+DEFINE_TRIVIAL_CLEANUP_FUNC(UdevRules*, udev_rules_free);
+
+bool udev_rules_check_timestamp(UdevRules *rules);
+int udev_rules_apply_to_event(UdevRules *rules, UdevEvent *event,
+ usec_t timeout_usec,
+ int timeout_signal,
+ Hashmap *properties_list);
+int udev_rules_apply_static_dev_perms(UdevRules *rules);
diff --git a/src/udev/udev-watch.c b/src/udev/udev-watch.c
new file mode 100644
index 0000000..8656fb0
--- /dev/null
+++ b/src/udev/udev-watch.c
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright © 2009 Canonical Ltd.
+ * Copyright © 2009 Scott James Remnant <scott@netsplit.com>
+ */
+
+#include <sys/inotify.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "fs-util.h"
+#include "mkdir.h"
+#include "stdio-util.h"
+#include "udev-watch.h"
+
+static int inotify_fd = -1;
+
+/* inotify descriptor, will be shared with rules directory;
+ * set to cloexec since we need our children to be able to add
+ * watches for us. */
+int udev_watch_init(void) {
+ inotify_fd = inotify_init1(IN_CLOEXEC);
+ if (inotify_fd < 0)
+ return -errno;
+
+ return inotify_fd;
+}
+
+/* Move any old watches directory out of the way, and then restore the watches. */
+int udev_watch_restore(void) {
+ struct dirent *ent;
+ DIR *dir;
+ int r;
+
+ if (inotify_fd < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid inotify descriptor.");
+
+ if (rename("/run/udev/watch", "/run/udev/watch.old") < 0) {
+ if (errno != ENOENT)
+ return log_warning_errno(errno, "Failed to move watches directory /run/udev/watch. Old watches will not be restored: %m");
+
+ return 0;
+ }
+
+ dir = opendir("/run/udev/watch.old");
+ if (!dir)
+ return log_warning_errno(errno, "Failed to open old watches directory /run/udev/watch.old. Old watches will not be restored: %m");
+
+ FOREACH_DIRENT_ALL(ent, dir, break) {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ _cleanup_free_ char *device = NULL;
+
+ if (ent->d_name[0] == '.')
+ continue;
+
+ r = readlinkat_malloc(dirfd(dir), ent->d_name, &device);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to read link '/run/udev/watch.old/%s', ignoring: %m", ent->d_name);
+ goto unlink;
+ }
+
+ r = sd_device_new_from_device_id(&dev, device);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to create sd_device object for '%s', ignoring: %m", device);
+ goto unlink;
+ }
+
+ log_device_debug(dev, "Restoring old watch");
+ (void) udev_watch_begin(dev);
+unlink:
+ (void) unlinkat(dirfd(dir), ent->d_name, 0);
+ }
+
+ (void) closedir(dir);
+ (void) rmdir("/run/udev/watch.old");
+
+ return 0;
+}
+
+int udev_watch_begin(sd_device *dev) {
+ char filename[STRLEN("/run/udev/watch/") + DECIMAL_STR_MAX(int)];
+ const char *devnode, *id_filename;
+ int wd, r;
+
+ if (inotify_fd < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid inotify descriptor.");
+
+ r = sd_device_get_devname(dev, &devnode);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get device name: %m");
+
+ log_device_debug(dev, "Adding watch on '%s'", devnode);
+ wd = inotify_add_watch(inotify_fd, devnode, IN_CLOSE_WRITE);
+ if (wd < 0)
+ return log_device_full_errno(dev, errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to add device '%s' to watch: %m", devnode);
+
+ device_set_watch_handle(dev, wd);
+
+ xsprintf(filename, "/run/udev/watch/%d", wd);
+ r = mkdir_parents(filename, 0755);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to create parent directory of '%s': %m", filename);
+ (void) unlink(filename);
+
+ r = device_get_id_filename(dev, &id_filename);
+ if (r < 0)
+ return log_device_error_errno(dev, r, "Failed to get device id-filename: %m");
+
+ if (symlink(id_filename, filename) < 0)
+ return log_device_error_errno(dev, errno, "Failed to create symlink %s: %m", filename);
+
+ return 0;
+}
+
+int udev_watch_end(sd_device *dev) {
+ char filename[STRLEN("/run/udev/watch/") + DECIMAL_STR_MAX(int)];
+ int wd, r;
+
+ if (inotify_fd < 0)
+ return 0; /* Nothing to do. */
+
+ r = device_get_watch_handle(dev, &wd);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get watch handle, ignoring: %m");
+
+ log_device_debug(dev, "Removing watch");
+ (void) inotify_rm_watch(inotify_fd, wd);
+
+ xsprintf(filename, "/run/udev/watch/%d", wd);
+ (void) unlink(filename);
+
+ device_set_watch_handle(dev, -1);
+
+ return 0;
+}
+
+int udev_watch_lookup(int wd, sd_device **ret) {
+ char filename[STRLEN("/run/udev/watch/") + DECIMAL_STR_MAX(int)];
+ _cleanup_free_ char *device = NULL;
+ int r;
+
+ assert(ret);
+
+ if (inotify_fd < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid inotify descriptor.");
+
+ if (wd < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid watch handle.");
+
+ xsprintf(filename, "/run/udev/watch/%d", wd);
+ r = readlink_malloc(filename, &device);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read link '%s': %m", filename);
+
+ r = sd_device_new_from_device_id(ret, device);
+ if (r == -ENODEV)
+ return 0;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create sd_device object for '%s': %m", device);
+
+ return 1;
+}
diff --git a/src/udev/udev-watch.h b/src/udev/udev-watch.h
new file mode 100644
index 0000000..a15fa27
--- /dev/null
+++ b/src/udev/udev-watch.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#pragma once
+
+#include "sd-device.h"
+
+int udev_watch_init(void);
+int udev_watch_restore(void);
+int udev_watch_begin(sd_device *dev);
+int udev_watch_end(sd_device *dev);
+int udev_watch_lookup(int wd, sd_device **ret);
diff --git a/src/udev/udev.conf b/src/udev/udev.conf
new file mode 100644
index 0000000..07d7f0c
--- /dev/null
+++ b/src/udev/udev.conf
@@ -0,0 +1,11 @@
+# see udev.conf(5) for details
+#
+# udevd is also started in the initrd. When this file is modified you might
+# also want to rebuild the initrd, so that it will include the modified configuration.
+
+#udev_log=info
+#children_max=
+#exec_delay=
+#event_timeout=180
+#timeout_signal=SIGKILL
+#resolve_names=early
diff --git a/src/udev/udev.pc.in b/src/udev/udev.pc.in
new file mode 100644
index 0000000..7b4f400
--- /dev/null
+++ b/src/udev/udev.pc.in
@@ -0,0 +1,6 @@
+Name: udev
+Description: udev
+Version: @PROJECT_VERSION@
+
+udev_dir=@udevlibexecdir@
+udevdir=${udev_dir}
diff --git a/src/udev/udevadm-control.c b/src/udev/udevadm-control.c
new file mode 100644
index 0000000..ef23a6c
--- /dev/null
+++ b/src/udev/udevadm-control.c
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "parse-util.h"
+#include "process-util.h"
+#include "syslog-util.h"
+#include "time-util.h"
+#include "udevadm.h"
+#include "udev-ctrl.h"
+#include "util.h"
+#include "virt.h"
+
+static int help(void) {
+ printf("%s control OPTION\n\n"
+ "Control the udev daemon.\n\n"
+ " -h --help Show this help\n"
+ " -V --version Show package version\n"
+ " -e --exit Instruct the daemon to cleanup and exit\n"
+ " -l --log-level=LEVEL Set the udev log level for the daemon\n"
+ " -s --stop-exec-queue Do not execute events, queue only\n"
+ " -S --start-exec-queue Execute events, flush queue\n"
+ " -R --reload Reload rules and databases\n"
+ " -p --property=KEY=VALUE Set a global property for all events\n"
+ " -m --children-max=N Maximum number of children\n"
+ " --ping Wait for udev to respond to a ping message\n"
+ " -t --timeout=SECONDS Maximum time to block for a reply\n"
+ , program_invocation_short_name);
+
+ return 0;
+}
+
+int control_main(int argc, char *argv[], void *userdata) {
+ _cleanup_(udev_ctrl_unrefp) struct udev_ctrl *uctrl = NULL;
+ usec_t timeout = 60 * USEC_PER_SEC;
+ int c, r;
+
+ enum {
+ ARG_PING = 0x100,
+ };
+
+ static const struct option options[] = {
+ { "exit", no_argument, NULL, 'e' },
+ { "log-level", required_argument, NULL, 'l' },
+ { "log-priority", required_argument, NULL, 'l' }, /* for backward compatibility */
+ { "stop-exec-queue", no_argument, NULL, 's' },
+ { "start-exec-queue", no_argument, NULL, 'S' },
+ { "reload", no_argument, NULL, 'R' },
+ { "reload-rules", no_argument, NULL, 'R' }, /* alias for -R */
+ { "property", required_argument, NULL, 'p' },
+ { "env", required_argument, NULL, 'p' }, /* alias for -p */
+ { "children-max", required_argument, NULL, 'm' },
+ { "ping", no_argument, NULL, ARG_PING },
+ { "timeout", required_argument, NULL, 't' },
+ { "version", no_argument, NULL, 'V' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+
+ if (running_in_chroot() > 0) {
+ log_info("Running in chroot, ignoring request.");
+ return 0;
+ }
+
+ if (argc <= 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This command expects one or more options.");
+
+ r = udev_ctrl_new(&uctrl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize udev control: %m");
+
+ while ((c = getopt_long(argc, argv, "el:sSRp:m:t:Vh", options, NULL)) >= 0)
+ switch (c) {
+ case 'e':
+ r = udev_ctrl_send_exit(uctrl);
+ if (r == -ENOANO)
+ log_warning("Cannot specify --exit after --exit, ignoring.");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to send exit request: %m");
+ break;
+ case 'l':
+ r = log_level_from_string(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse log level '%s': %m", optarg);
+
+ r = udev_ctrl_send_set_log_level(uctrl, r);
+ if (r == -ENOANO)
+ log_warning("Cannot specify --log-level after --exit, ignoring.");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to send request to set log level: %m");
+ break;
+ case 's':
+ r = udev_ctrl_send_stop_exec_queue(uctrl);
+ if (r == -ENOANO)
+ log_warning("Cannot specify --stop-exec-queue after --exit, ignoring.");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to send request to stop exec queue: %m");
+ break;
+ case 'S':
+ r = udev_ctrl_send_start_exec_queue(uctrl);
+ if (r == -ENOANO)
+ log_warning("Cannot specify --start-exec-queue after --exit, ignoring.");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to send request to start exec queue: %m");
+ break;
+ case 'R':
+ r = udev_ctrl_send_reload(uctrl);
+ if (r == -ENOANO)
+ log_warning("Cannot specify --reload after --exit, ignoring.");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to send reload request: %m");
+ break;
+ case 'p':
+ if (!strchr(optarg, '='))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "expect <KEY>=<value> instead of '%s'", optarg);
+
+ r = udev_ctrl_send_set_env(uctrl, optarg);
+ if (r == -ENOANO)
+ log_warning("Cannot specify --property after --exit, ignoring.");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to send request to update environment: %m");
+ break;
+ case 'm': {
+ unsigned i;
+
+ r = safe_atou(optarg, &i);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse maximum number of events '%s': %m", optarg);
+
+ r = udev_ctrl_send_set_children_max(uctrl, i);
+ if (r == -ENOANO)
+ log_warning("Cannot specify --children-max after --exit, ignoring.");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to send request to set number of children: %m");
+ break;
+ }
+ case ARG_PING:
+ r = udev_ctrl_send_ping(uctrl);
+ if (r == -ENOANO)
+ log_error("Cannot specify --ping after --exit, ignoring.");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to send a ping message: %m");
+ break;
+ case 't':
+ r = parse_sec(optarg, &timeout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse timeout value '%s': %m", optarg);
+ break;
+ case 'V':
+ return print_version();
+ case 'h':
+ return help();
+ case '?':
+ return -EINVAL;
+ default:
+ assert_not_reached("Unknown option.");
+ }
+
+ if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Extraneous argument: %s", argv[optind]);
+
+ r = udev_ctrl_wait(uctrl, timeout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait for daemon to reply: %m");
+
+ return 0;
+}
diff --git a/src/udev/udevadm-hwdb.c b/src/udev/udevadm-hwdb.c
new file mode 100644
index 0000000..3d21922
--- /dev/null
+++ b/src/udev/udevadm-hwdb.c
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+
+#include "hwdb-util.h"
+#include "udevadm.h"
+#include "util.h"
+
+static const char *arg_test = NULL;
+static const char *arg_root = NULL;
+static const char *arg_hwdb_bin_dir = NULL;
+static bool arg_update = false;
+static bool arg_strict = false;
+
+static int help(void) {
+ printf("%s hwdb [OPTIONS]\n\n"
+ " -h --help Print this message\n"
+ " -V --version Print version of the program\n"
+ " -u --update Update the hardware database\n"
+ " -s --strict When updating, return non-zero exit value on any parsing error\n"
+ " --usr Generate in " UDEVLIBEXECDIR " instead of /etc/udev\n"
+ " -t --test=MODALIAS Query database and print result\n"
+ " -r --root=PATH Alternative root path in the filesystem\n\n"
+ "NOTE:\n"
+ "The sub-command 'hwdb' is deprecated, and is left for backwards compatibility.\n"
+ "Please use systemd-hwdb instead.\n"
+ , program_invocation_short_name);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_USR = 0x100,
+ };
+
+ static const struct option options[] = {
+ { "update", no_argument, NULL, 'u' },
+ { "usr", no_argument, NULL, ARG_USR },
+ { "strict", no_argument, NULL, 's' },
+ { "test", required_argument, NULL, 't' },
+ { "root", required_argument, NULL, 'r' },
+ { "version", no_argument, NULL, 'V' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+
+ int c;
+
+ while ((c = getopt_long(argc, argv, "ust:r:Vh", options, NULL)) >= 0)
+ switch(c) {
+ case 'u':
+ arg_update = true;
+ break;
+ case ARG_USR:
+ arg_hwdb_bin_dir = UDEVLIBEXECDIR;
+ break;
+ case 's':
+ arg_strict = true;
+ break;
+ case 't':
+ arg_test = optarg;
+ break;
+ case 'r':
+ arg_root = optarg;
+ break;
+ case 'V':
+ return print_version();
+ case 'h':
+ return help();
+ case '?':
+ return -EINVAL;
+ default:
+ assert_not_reached("Unknown option");
+ }
+
+ return 1;
+}
+
+int hwdb_main(int argc, char *argv[], void *userdata) {
+ int r;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (!arg_update && !arg_test)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Either --update or --test must be used.");
+
+ if (arg_update) {
+ r = hwdb_update(arg_root, arg_hwdb_bin_dir, arg_strict, true);
+ if (r < 0)
+ return r;
+ }
+
+ if (arg_test)
+ return hwdb_query(arg_test);
+
+ return 0;
+}
diff --git a/src/udev/udevadm-info.c b/src/udev/udevadm-info.c
new file mode 100644
index 0000000..5ff6256
--- /dev/null
+++ b/src/udev/udevadm-info.c
@@ -0,0 +1,519 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-enumerator-private.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "sort-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "udev-util.h"
+#include "udevadm-util.h"
+#include "udevadm.h"
+
+typedef enum ActionType {
+ ACTION_QUERY,
+ ACTION_ATTRIBUTE_WALK,
+ ACTION_DEVICE_ID_FILE,
+} ActionType;
+
+typedef enum QueryType {
+ QUERY_NAME,
+ QUERY_PATH,
+ QUERY_SYMLINK,
+ QUERY_PROPERTY,
+ QUERY_ALL,
+} QueryType;
+
+static bool arg_root = false;
+static bool arg_export = false;
+static const char *arg_export_prefix = NULL;
+static usec_t arg_wait_for_initialization_timeout = 0;
+
+static bool skip_attribute(const char *name) {
+ /* Those are either displayed separately or should not be shown at all. */
+ return STR_IN_SET(name,
+ "uevent",
+ "dev",
+ "modalias",
+ "resource",
+ "driver",
+ "subsystem",
+ "module");
+}
+
+typedef struct SysAttr {
+ const char *name;
+ const char *value;
+} SysAttr;
+
+static int sysattr_compare(const SysAttr *a, const SysAttr *b) {
+ return strcmp(a->name, b->name);
+}
+
+static int print_all_attributes(sd_device *device, bool is_parent) {
+ _cleanup_free_ SysAttr *sysattrs = NULL;
+ size_t n_items = 0, n_allocated = 0;
+ const char *name, *value;
+
+ value = NULL;
+ (void) sd_device_get_devpath(device, &value);
+ printf(" looking at %sdevice '%s':\n", is_parent ? "parent " : "", strempty(value));
+
+ value = NULL;
+ (void) sd_device_get_sysname(device, &value);
+ printf(" %s==\"%s\"\n", is_parent ? "KERNELS" : "KERNEL", strempty(value));
+
+ value = NULL;
+ (void) sd_device_get_subsystem(device, &value);
+ printf(" %s==\"%s\"\n", is_parent ? "SUBSYSTEMS" : "SUBSYSTEM", strempty(value));
+
+ value = NULL;
+ (void) sd_device_get_driver(device, &value);
+ printf(" %s==\"%s\"\n", is_parent ? "DRIVERS" : "DRIVER", strempty(value));
+
+ FOREACH_DEVICE_SYSATTR(device, name) {
+ size_t len;
+
+ if (skip_attribute(name))
+ continue;
+
+ if (sd_device_get_sysattr_value(device, name, &value) < 0)
+ continue;
+
+ /* skip any values that look like a path */
+ if (value[0] == '/')
+ continue;
+
+ /* skip nonprintable attributes */
+ len = strlen(value);
+ while (len > 0 && isprint((unsigned char) value[len-1]))
+ len--;
+ if (len > 0)
+ continue;
+
+ if (!GREEDY_REALLOC(sysattrs, n_allocated, n_items + 1))
+ return log_oom();
+
+ sysattrs[n_items] = (SysAttr) {
+ .name = name,
+ .value = value,
+ };
+ n_items++;
+ }
+
+ typesafe_qsort(sysattrs, n_items, sysattr_compare);
+
+ for (size_t i = 0; i < n_items; i++)
+ printf(" %s{%s}==\"%s\"\n", is_parent ? "ATTRS" : "ATTR", sysattrs[i].name, sysattrs[i].value);
+
+ puts("");
+
+ return 0;
+}
+
+static int print_device_chain(sd_device *device) {
+ sd_device *child, *parent;
+ int r;
+
+ printf("\n"
+ "Udevadm info starts with the device specified by the devpath and then\n"
+ "walks up the chain of parent devices. It prints for every device\n"
+ "found, all possible attributes in the udev rules key format.\n"
+ "A rule to match, can be composed by the attributes of the device\n"
+ "and the attributes from one single parent device.\n"
+ "\n");
+
+ r = print_all_attributes(device, false);
+ if (r < 0)
+ return r;
+
+ for (child = device; sd_device_get_parent(child, &parent) >= 0; child = parent) {
+ r = print_all_attributes(parent, true);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int print_record(sd_device *device) {
+ const char *str, *val;
+ int i;
+
+ (void) sd_device_get_devpath(device, &str);
+ printf("P: %s\n", str);
+
+ if (sd_device_get_devname(device, &str) >= 0) {
+ assert_se(val = path_startswith(str, "/dev/"));
+ printf("N: %s\n", val);
+ }
+
+ if (device_get_devlink_priority(device, &i) >= 0)
+ printf("L: %i\n", i);
+
+ FOREACH_DEVICE_DEVLINK(device, str) {
+ assert_se(val = path_startswith(str, "/dev/"));
+ printf("S: %s\n", val);
+ }
+
+ FOREACH_DEVICE_PROPERTY(device, str, val)
+ printf("E: %s=%s\n", str, val);
+
+ puts("");
+ return 0;
+}
+
+static int stat_device(const char *name, bool export, const char *prefix) {
+ struct stat statbuf;
+
+ if (stat(name, &statbuf) != 0)
+ return -errno;
+
+ if (export) {
+ if (!prefix)
+ prefix = "INFO_";
+ printf("%sMAJOR=%u\n"
+ "%sMINOR=%u\n",
+ prefix, major(statbuf.st_dev),
+ prefix, minor(statbuf.st_dev));
+ } else
+ printf("%u:%u\n", major(statbuf.st_dev), minor(statbuf.st_dev));
+ return 0;
+}
+
+static int export_devices(void) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *d;
+ int r;
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return log_oom();
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set allowing uninitialized flag: %m");
+
+ r = device_enumerator_scan_devices(e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to scan devices: %m");
+
+ FOREACH_DEVICE_AND_SUBSYSTEM(e, d)
+ (void) print_record(d);
+
+ return 0;
+}
+
+static void cleanup_dir(DIR *dir, mode_t mask, int depth) {
+ struct dirent *dent;
+
+ if (depth <= 0)
+ return;
+
+ FOREACH_DIRENT_ALL(dent, dir, break) {
+ struct stat stats;
+
+ if (dent->d_name[0] == '.')
+ continue;
+ if (fstatat(dirfd(dir), dent->d_name, &stats, AT_SYMLINK_NOFOLLOW) != 0)
+ continue;
+ if ((stats.st_mode & mask) != 0)
+ continue;
+ if (S_ISDIR(stats.st_mode)) {
+ _cleanup_closedir_ DIR *dir2 = NULL;
+
+ dir2 = fdopendir(openat(dirfd(dir), dent->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC));
+ if (dir2)
+ cleanup_dir(dir2, mask, depth-1);
+
+ (void) unlinkat(dirfd(dir), dent->d_name, AT_REMOVEDIR);
+ } else
+ (void) unlinkat(dirfd(dir), dent->d_name, 0);
+ }
+}
+
+static void cleanup_db(void) {
+ _cleanup_closedir_ DIR *dir1 = NULL, *dir2 = NULL, *dir3 = NULL, *dir4 = NULL, *dir5 = NULL;
+
+ (void) unlink("/run/udev/queue.bin");
+
+ dir1 = opendir("/run/udev/data");
+ if (dir1)
+ cleanup_dir(dir1, S_ISVTX, 1);
+
+ dir2 = opendir("/run/udev/links");
+ if (dir2)
+ cleanup_dir(dir2, 0, 2);
+
+ dir3 = opendir("/run/udev/tags");
+ if (dir3)
+ cleanup_dir(dir3, 0, 2);
+
+ dir4 = opendir("/run/udev/static_node-tags");
+ if (dir4)
+ cleanup_dir(dir4, 0, 2);
+
+ dir5 = opendir("/run/udev/watch");
+ if (dir5)
+ cleanup_dir(dir5, 0, 1);
+}
+
+static int query_device(QueryType query, sd_device* device) {
+ int r;
+
+ assert(device);
+
+ switch(query) {
+ case QUERY_NAME: {
+ const char *node;
+
+ r = sd_device_get_devname(device, &node);
+ if (r < 0)
+ return log_error_errno(r, "No device node found: %m");
+
+ if (!arg_root)
+ assert_se(node = path_startswith(node, "/dev/"));
+ printf("%s\n", node);
+ return 0;
+ }
+
+ case QUERY_SYMLINK: {
+ const char *devlink, *prefix = "";
+
+ FOREACH_DEVICE_DEVLINK(device, devlink) {
+ if (!arg_root)
+ assert_se(devlink = path_startswith(devlink, "/dev/"));
+ printf("%s%s", prefix, devlink);
+ prefix = " ";
+ }
+ puts("");
+ return 0;
+ }
+
+ case QUERY_PATH: {
+ const char *devpath;
+
+ r = sd_device_get_devpath(device, &devpath);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device path: %m");
+
+ printf("%s\n", devpath);
+ return 0;
+ }
+
+ case QUERY_PROPERTY: {
+ const char *key, *value;
+
+ FOREACH_DEVICE_PROPERTY(device, key, value)
+ if (arg_export)
+ printf("%s%s='%s'\n", strempty(arg_export_prefix), key, value);
+ else
+ printf("%s=%s\n", key, value);
+ return 0;
+ }
+
+ case QUERY_ALL:
+ return print_record(device);
+ }
+
+ assert_not_reached("unknown query type");
+ return 0;
+}
+
+static int help(void) {
+ printf("%s info [OPTIONS] [DEVPATH|FILE]\n\n"
+ "Query sysfs or the udev database.\n\n"
+ " -h --help Print this message\n"
+ " -V --version Print version of the program\n"
+ " -q --query=TYPE Query device information:\n"
+ " name Name of device node\n"
+ " symlink Pointing to node\n"
+ " path sysfs device path\n"
+ " property The device properties\n"
+ " all All values\n"
+ " -p --path=SYSPATH sysfs device path used for query or attribute walk\n"
+ " -n --name=NAME Node or symlink name used for query or attribute walk\n"
+ " -r --root Prepend dev directory to path names\n"
+ " -a --attribute-walk Print all key matches walking along the chain\n"
+ " of parent devices\n"
+ " -d --device-id-of-file=FILE Print major:minor of device containing this file\n"
+ " -x --export Export key/value pairs\n"
+ " -P --export-prefix Export the key name with a prefix\n"
+ " -e --export-db Export the content of the udev database\n"
+ " -c --cleanup-db Clean up the udev database\n"
+ " -w --wait-for-initialization[=SECONDS]\n"
+ " Wait for device to be initialized\n"
+ , program_invocation_short_name);
+
+ return 0;
+}
+
+int info_main(int argc, char *argv[], void *userdata) {
+ _cleanup_strv_free_ char **devices = NULL;
+ _cleanup_free_ char *name = NULL;
+ int c, r;
+
+ static const struct option options[] = {
+ { "name", required_argument, NULL, 'n' },
+ { "path", required_argument, NULL, 'p' },
+ { "query", required_argument, NULL, 'q' },
+ { "attribute-walk", no_argument, NULL, 'a' },
+ { "cleanup-db", no_argument, NULL, 'c' },
+ { "export-db", no_argument, NULL, 'e' },
+ { "root", no_argument, NULL, 'r' },
+ { "device-id-of-file", required_argument, NULL, 'd' },
+ { "export", no_argument, NULL, 'x' },
+ { "export-prefix", required_argument, NULL, 'P' },
+ { "wait-for-initialization", optional_argument, NULL, 'w' },
+ { "version", no_argument, NULL, 'V' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+
+ ActionType action = ACTION_QUERY;
+ QueryType query = QUERY_ALL;
+
+ while ((c = getopt_long(argc, argv, "aced:n:p:q:rxP:w::Vh", options, NULL)) >= 0)
+ switch (c) {
+ case 'n':
+ case 'p': {
+ const char *prefix = c == 'n' ? "/dev/" : "/sys/";
+ char *path;
+
+ path = path_join(path_startswith(optarg, prefix) ? NULL : prefix, optarg);
+ if (!path)
+ return log_oom();
+
+ r = strv_consume(&devices, path);
+ if (r < 0)
+ return log_oom();
+ break;
+ }
+
+ case 'q':
+ action = ACTION_QUERY;
+ if (streq(optarg, "property") || streq(optarg, "env"))
+ query = QUERY_PROPERTY;
+ else if (streq(optarg, "name"))
+ query = QUERY_NAME;
+ else if (streq(optarg, "symlink"))
+ query = QUERY_SYMLINK;
+ else if (streq(optarg, "path"))
+ query = QUERY_PATH;
+ else if (streq(optarg, "all"))
+ query = QUERY_ALL;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "unknown query type");
+ break;
+ case 'r':
+ arg_root = true;
+ break;
+ case 'd':
+ action = ACTION_DEVICE_ID_FILE;
+ r = free_and_strdup(&name, optarg);
+ if (r < 0)
+ return log_oom();
+ break;
+ case 'a':
+ action = ACTION_ATTRIBUTE_WALK;
+ break;
+ case 'e':
+ return export_devices();
+ case 'c':
+ cleanup_db();
+ return 0;
+ case 'x':
+ arg_export = true;
+ break;
+ case 'P':
+ arg_export = true;
+ arg_export_prefix = optarg;
+ break;
+ case 'w':
+ if (optarg) {
+ r = parse_sec(optarg, &arg_wait_for_initialization_timeout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse timeout value: %m");
+ } else
+ arg_wait_for_initialization_timeout = USEC_INFINITY;
+ break;
+ case 'V':
+ return print_version();
+ case 'h':
+ return help();
+ case '?':
+ return -EINVAL;
+ default:
+ assert_not_reached("Unknown option");
+ }
+
+ if (action == ACTION_DEVICE_ID_FILE) {
+ if (argv[optind])
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Positional arguments are not allowed with -d/--device-id-of-file.");
+ assert(name);
+ return stat_device(name, arg_export, arg_export_prefix);
+ }
+
+ r = strv_extend_strv(&devices, argv + optind, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to build argument list: %m");
+
+ if (strv_isempty(devices))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "A device name or path is required");
+ if (action == ACTION_ATTRIBUTE_WALK && strv_length(devices) > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Only one device may be specified with -a/--attribute-walk");
+
+ char **p;
+ STRV_FOREACH(p, devices) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+
+ r = find_device(*p, NULL, &device);
+ if (r == -EINVAL)
+ return log_error_errno(r, "Bad argument \"%s\", expected an absolute path in /dev/ or /sys or a unit name: %m", *p);
+ if (r < 0)
+ return log_error_errno(r, "Unknown device \"%s\": %m", *p);
+
+ if (arg_wait_for_initialization_timeout > 0) {
+ sd_device *d;
+
+ r = device_wait_for_initialization(
+ device,
+ NULL,
+ usec_add(now(CLOCK_MONOTONIC), arg_wait_for_initialization_timeout),
+ &d);
+ if (r < 0)
+ return r;
+
+ sd_device_unref(device);
+ device = d;
+ }
+
+ if (action == ACTION_QUERY)
+ r = query_device(query, device);
+ else if (action == ACTION_ATTRIBUTE_WALK)
+ r = print_device_chain(device);
+ else
+ assert_not_reached("Unknown action");
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/udev/udevadm-monitor.c b/src/udev/udevadm-monitor.c
new file mode 100644
index 0000000..cae7f1b
--- /dev/null
+++ b/src/udev/udevadm-monitor.c
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+
+#include "sd-device.h"
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "device-monitor-private.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "hashmap.h"
+#include "set.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "udevadm.h"
+#include "virt.h"
+#include "time-util.h"
+
+static bool arg_show_property = false;
+static bool arg_print_kernel = false;
+static bool arg_print_udev = false;
+static Set *arg_tag_filter = NULL;
+static Hashmap *arg_subsystem_filter = NULL;
+
+static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) {
+ DeviceAction action = _DEVICE_ACTION_INVALID;
+ const char *devpath = NULL, *subsystem = NULL;
+ MonitorNetlinkGroup group = PTR_TO_INT(userdata);
+ struct timespec ts;
+
+ assert(device);
+ assert(IN_SET(group, MONITOR_GROUP_UDEV, MONITOR_GROUP_KERNEL));
+
+ (void) device_get_action(device, &action);
+ (void) sd_device_get_devpath(device, &devpath);
+ (void) sd_device_get_subsystem(device, &subsystem);
+
+ assert_se(clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
+
+ printf("%-6s[%"PRI_TIME".%06"PRI_NSEC"] %-8s %s (%s)\n",
+ group == MONITOR_GROUP_UDEV ? "UDEV" : "KERNEL",
+ ts.tv_sec, (nsec_t)ts.tv_nsec/1000,
+ strna(device_action_to_string(action)),
+ devpath, subsystem);
+
+ if (arg_show_property) {
+ const char *key, *value;
+
+ FOREACH_DEVICE_PROPERTY(device, key, value)
+ printf("%s=%s\n", key, value);
+
+ printf("\n");
+ }
+
+ return 0;
+}
+
+static int setup_monitor(MonitorNetlinkGroup sender, sd_event *event, sd_device_monitor **ret) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor = NULL;
+ const char *subsystem, *devtype, *tag;
+ int r;
+
+ r = device_monitor_new_full(&monitor, sender, -1);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create netlink socket: %m");
+
+ (void) sd_device_monitor_set_receive_buffer_size(monitor, 128*1024*1024);
+
+ r = sd_device_monitor_attach_event(monitor, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach event: %m");
+
+ HASHMAP_FOREACH_KEY(devtype, subsystem, arg_subsystem_filter) {
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(monitor, subsystem, devtype);
+ if (r < 0)
+ return log_error_errno(r, "Failed to apply subsystem filter '%s%s%s': %m",
+ subsystem, devtype ? "/" : "", strempty(devtype));
+ }
+
+ SET_FOREACH(tag, arg_tag_filter) {
+ r = sd_device_monitor_filter_add_match_tag(monitor, tag);
+ if (r < 0)
+ return log_error_errno(r, "Failed to apply tag filter '%s': %m", tag);
+ }
+
+ r = sd_device_monitor_start(monitor, device_monitor_handler, INT_TO_PTR(sender));
+ if (r < 0)
+ return log_error_errno(r, "Failed to start device monitor: %m");
+
+ (void) sd_event_source_set_description(sd_device_monitor_get_event_source(monitor),
+ sender == MONITOR_GROUP_UDEV ? "device-monitor-udev" : "device-monitor-kernel");
+
+ *ret = TAKE_PTR(monitor);
+ return 0;
+}
+
+static int help(void) {
+ printf("%s monitor [OPTIONS]\n\n"
+ "Listen to kernel and udev events.\n\n"
+ " -h --help Show this help\n"
+ " -V --version Show package version\n"
+ " -p --property Print the event properties\n"
+ " -k --kernel Print kernel uevents\n"
+ " -u --udev Print udev events\n"
+ " -s --subsystem-match=SUBSYSTEM[/DEVTYPE] Filter events by subsystem\n"
+ " -t --tag-match=TAG Filter events by tag\n"
+ , program_invocation_short_name);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ static const struct option options[] = {
+ { "property", no_argument, NULL, 'p' },
+ { "environment", no_argument, NULL, 'e' }, /* alias for -p */
+ { "kernel", no_argument, NULL, 'k' },
+ { "udev", no_argument, NULL, 'u' },
+ { "subsystem-match", required_argument, NULL, 's' },
+ { "tag-match", required_argument, NULL, 't' },
+ { "version", no_argument, NULL, 'V' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+
+ int r, c;
+
+ while ((c = getopt_long(argc, argv, "pekus:t:Vh", options, NULL)) >= 0)
+ switch (c) {
+ case 'p':
+ case 'e':
+ arg_show_property = true;
+ break;
+ case 'k':
+ arg_print_kernel = true;
+ break;
+ case 'u':
+ arg_print_udev = true;
+ break;
+ case 's': {
+ _cleanup_free_ char *subsystem = NULL, *devtype = NULL;
+ const char *slash;
+
+ slash = strchr(optarg, '/');
+ if (slash) {
+ devtype = strdup(slash + 1);
+ if (!devtype)
+ return -ENOMEM;
+
+ subsystem = strndup(optarg, slash - optarg);
+ } else
+ subsystem = strdup(optarg);
+
+ if (!subsystem)
+ return -ENOMEM;
+
+ r = hashmap_ensure_allocated(&arg_subsystem_filter, NULL);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(arg_subsystem_filter, subsystem, devtype);
+ if (r < 0)
+ return r;
+
+ subsystem = devtype = NULL;
+ break;
+ }
+ case 't':
+ /* optarg is stored in argv[], so we don't need to copy it */
+ r = set_ensure_put(&arg_tag_filter, &string_hash_ops, optarg);
+ if (r < 0)
+ return r;
+ break;
+
+ case 'V':
+ return print_version();
+ case 'h':
+ return help();
+ case '?':
+ return -EINVAL;
+ default:
+ assert_not_reached("Unknown option.");
+ }
+
+ if (!arg_print_kernel && !arg_print_udev) {
+ arg_print_kernel = true;
+ arg_print_udev = true;
+ }
+
+ return 1;
+}
+
+int monitor_main(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *kernel_monitor = NULL, *udev_monitor = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ int r;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ goto finalize;
+
+ if (running_in_chroot() > 0) {
+ log_info("Running in chroot, ignoring request.");
+ return 0;
+ }
+
+ /* Callers are expecting to see events as they happen: Line buffering */
+ setlinebuf(stdout);
+
+ r = sd_event_default(&event);
+ if (r < 0) {
+ log_error_errno(r, "Failed to initialize event: %m");
+ goto finalize;
+ }
+
+ assert_se(sigprocmask_many(SIG_UNBLOCK, NULL, SIGTERM, SIGINT, -1) >= 0);
+ (void) sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
+ (void) sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
+
+ printf("monitor will print the received events for:\n");
+ if (arg_print_udev) {
+ r = setup_monitor(MONITOR_GROUP_UDEV, event, &udev_monitor);
+ if (r < 0)
+ goto finalize;
+
+ printf("UDEV - the event which udev sends out after rule processing\n");
+ }
+
+ if (arg_print_kernel) {
+ r = setup_monitor(MONITOR_GROUP_KERNEL, event, &kernel_monitor);
+ if (r < 0)
+ goto finalize;
+
+ printf("KERNEL - the kernel uevent\n");
+ }
+ printf("\n");
+
+ r = sd_event_loop(event);
+ if (r < 0) {
+ log_error_errno(r, "Failed to run event loop: %m");
+ goto finalize;
+ }
+
+ r = 0;
+
+finalize:
+ hashmap_free_free_free(arg_subsystem_filter);
+ set_free(arg_tag_filter);
+
+ return r;
+}
diff --git a/src/udev/udevadm-settle.c b/src/udev/udevadm-settle.c
new file mode 100644
index 0000000..2bd5853
--- /dev/null
+++ b/src/udev/udevadm-settle.c
@@ -0,0 +1,226 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright © 2009 Canonical Ltd.
+ * Copyright © 2009 Scott James Remnant <scott@netsplit.com>
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <poll.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-login.h"
+#include "sd-messages.h"
+
+#include "bus-util.h"
+#include "io-util.h"
+#include "libudev-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "udev-ctrl.h"
+#include "udevadm.h"
+#include "unit-def.h"
+#include "util.h"
+#include "virt.h"
+
+static usec_t arg_timeout = 120 * USEC_PER_SEC;
+static const char *arg_exists = NULL;
+
+static int help(void) {
+ printf("%s settle [OPTIONS]\n\n"
+ "Wait for pending udev events.\n\n"
+ " -h --help Show this help\n"
+ " -V --version Show package version\n"
+ " -t --timeout=SEC Maximum time to wait for events\n"
+ " -E --exit-if-exists=FILE Stop waiting if file exists\n"
+ , program_invocation_short_name);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ static const struct option options[] = {
+ { "timeout", required_argument, NULL, 't' },
+ { "exit-if-exists", required_argument, NULL, 'E' },
+ { "version", no_argument, NULL, 'V' },
+ { "help", no_argument, NULL, 'h' },
+ { "seq-start", required_argument, NULL, 's' }, /* removed */
+ { "seq-end", required_argument, NULL, 'e' }, /* removed */
+ { "quiet", no_argument, NULL, 'q' }, /* removed */
+ {}
+ };
+
+ int c, r;
+
+ while ((c = getopt_long(argc, argv, "t:E:Vhs:e:q", options, NULL)) >= 0) {
+ switch (c) {
+ case 't':
+ r = parse_sec(optarg, &arg_timeout);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse timeout value '%s': %m", optarg);
+ break;
+ case 'E':
+ arg_exists = optarg;
+ break;
+ case 'V':
+ return print_version();
+ case 'h':
+ return help();
+ case 's':
+ case 'e':
+ case 'q':
+ return log_info_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Option -%c no longer supported.",
+ c);
+ case '?':
+ return -EINVAL;
+ default:
+ assert_not_reached("Unknown option.");
+ }
+ }
+
+ return 1;
+}
+
+static int emit_deprecation_warning(void) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_strv_free_ char **a = NULL;
+ _cleanup_free_ char *unit = NULL;
+ int r;
+
+ r = sd_pid_get_unit(0, &unit);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to determine unit we run in, ignoring: %m");
+ return 0;
+ }
+
+ if (!streq(unit, "systemd-udev-settle.service"))
+ return 0;
+
+ r = bus_connect_system_systemd(&bus);
+ if (r < 0)
+ log_debug_errno(r, "Failed to open connection to systemd, skipping dependency queries: %m");
+ else {
+ _cleanup_strv_free_ char **b = NULL;
+ _cleanup_free_ char *unit_path = NULL;
+
+ unit_path = unit_dbus_path_from_name("systemd-udev-settle.service");
+ if (!unit_path)
+ return -ENOMEM;
+
+ (void) sd_bus_get_property_strv(
+ bus,
+ "org.freedesktop.systemd1",
+ unit_path,
+ "org.freedesktop.systemd1.Unit",
+ "WantedBy",
+ NULL,
+ &a);
+
+ (void) sd_bus_get_property_strv(
+ bus,
+ "org.freedesktop.systemd1",
+ unit_path,
+ "org.freedesktop.systemd1.Unit",
+ "RequiredBy",
+ NULL,
+ &b);
+
+ r = strv_extend_strv(&a, b, true);
+ if (r < 0)
+ return r;
+ }
+
+ if (strv_isempty(a))
+ /* Print a simple message if we cannot determine the dependencies */
+ log_notice("systemd-udev-settle.service is deprecated.");
+ else {
+ /* Print a longer, structured message if we can acquire the dependencies (this should be the
+ * common case). This is hooked up with a catalog entry and everything. */
+ _cleanup_free_ char *t = NULL;
+
+ t = strv_join(a, ", ");
+ if (!t)
+ return -ENOMEM;
+
+ log_struct(LOG_NOTICE,
+ "MESSAGE=systemd-udev-settle.service is deprecated. Please fix %s not to pull it in.", t,
+ "OFFENDING_UNITS=%s", t,
+ "MESSAGE_ID=" SD_MESSAGE_SYSTEMD_UDEV_SETTLE_DEPRECATED_STR);
+ }
+
+ return 0;
+}
+
+int settle_main(int argc, char *argv[], void *userdata) {
+ _cleanup_(udev_queue_unrefp) struct udev_queue *queue = NULL;
+ usec_t deadline;
+ int r, fd;
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (running_in_chroot() > 0) {
+ log_info("Running in chroot, ignoring request.");
+ return 0;
+ }
+
+ deadline = now(CLOCK_MONOTONIC) + arg_timeout;
+
+ /* guarantee that the udev daemon isn't pre-processing */
+ if (getuid() == 0) {
+ _cleanup_(udev_ctrl_unrefp) struct udev_ctrl *uctrl = NULL;
+
+ if (udev_ctrl_new(&uctrl) >= 0) {
+ r = udev_ctrl_send_ping(uctrl);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to connect to udev daemon: %m");
+ return 0;
+ }
+
+ r = udev_ctrl_wait(uctrl, MAX(5 * USEC_PER_SEC, arg_timeout));
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait for daemon to reply: %m");
+ }
+ }
+
+ queue = udev_queue_new(NULL);
+ if (!queue)
+ return log_error_errno(errno, "Failed to get udev queue: %m");
+
+ fd = udev_queue_get_fd(queue);
+ if (fd < 0) {
+ log_debug_errno(fd, "Queue is empty, nothing to watch: %m");
+ return 0;
+ }
+
+ (void) emit_deprecation_warning();
+
+ for (;;) {
+ if (arg_exists && access(arg_exists, F_OK) >= 0)
+ return 0;
+
+ /* exit if queue is empty */
+ if (udev_queue_get_queue_is_empty(queue))
+ return 0;
+
+ if (now(CLOCK_MONOTONIC) >= deadline)
+ return -ETIMEDOUT;
+
+ /* wake up when queue becomes empty */
+ r = fd_wait_for_event(fd, POLLIN, MSEC_PER_SEC);
+ if (r < 0)
+ return r;
+ if (r & POLLIN) {
+ r = udev_queue_flush(queue);
+ if (r < 0)
+ return log_error_errno(r, "Failed to flush queue: %m");
+ }
+ }
+}
diff --git a/src/udev/udevadm-test-builtin.c b/src/udev/udevadm-test-builtin.c
new file mode 100644
index 0000000..8995e5c
--- /dev/null
+++ b/src/udev/udevadm-test-builtin.c
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "log.h"
+#include "udev-builtin.h"
+#include "udevadm.h"
+#include "udevadm-util.h"
+
+static const char *arg_command = NULL;
+static const char *arg_syspath = NULL;
+
+static int help(void) {
+ printf("%s test-builtin [OPTIONS] COMMAND DEVPATH\n\n"
+ "Test a built-in command.\n\n"
+ " -h --help Print this message\n"
+ " -V --version Print version of the program\n\n"
+ "Commands:\n"
+ , program_invocation_short_name);
+
+ udev_builtin_list();
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ static const struct option options[] = {
+ { "version", no_argument, NULL, 'V' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+
+ int c;
+
+ while ((c = getopt_long(argc, argv, "Vh", options, NULL)) >= 0)
+ switch (c) {
+ case 'V':
+ return print_version();
+ case 'h':
+ return help();
+ case '?':
+ return -EINVAL;
+ default:
+ assert_not_reached("Unknown option");
+ }
+
+ arg_command = argv[optind++];
+ if (!arg_command)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Command missing.");
+
+ arg_syspath = argv[optind++];
+ if (!arg_syspath)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "syspath missing.");
+
+ return 1;
+}
+
+int builtin_main(int argc, char *argv[], void *userdata) {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ UdevBuiltinCommand cmd;
+ int r;
+
+ log_set_max_level(LOG_DEBUG);
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ udev_builtin_init();
+
+ cmd = udev_builtin_lookup(arg_command);
+ if (cmd < 0) {
+ log_error("Unknown command '%s'", arg_command);
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = find_device(arg_syspath, "/sys", &dev);
+ if (r < 0) {
+ log_error_errno(r, "Failed to open device '%s': %m", arg_syspath);
+ goto finish;
+ }
+
+ r = udev_builtin_run(dev, cmd, arg_command, true);
+ if (r < 0)
+ log_debug_errno(r, "Builtin command '%s' fails: %m", arg_command);
+
+finish:
+ udev_builtin_exit();
+ return r;
+}
diff --git a/src/udev/udevadm-test.c b/src/udev/udevadm-test.c
new file mode 100644
index 0000000..a029622
--- /dev/null
+++ b/src/udev/udevadm-test.c
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright © 2003-2004 Greg Kroah-Hartman <greg@kroah.com>
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/signalfd.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+
+#include "device-private.h"
+#include "device-util.h"
+#include "libudev-util.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "udev-builtin.h"
+#include "udev-event.h"
+#include "udevadm.h"
+
+static const char *arg_action = "add";
+static ResolveNameTiming arg_resolve_name_timing = RESOLVE_NAME_EARLY;
+static char arg_syspath[UTIL_PATH_SIZE] = {};
+
+static int help(void) {
+
+ printf("%s test [OPTIONS] DEVPATH\n\n"
+ "Test an event run.\n\n"
+ " -h --help Show this help\n"
+ " -V --version Show package version\n"
+ " -a --action=ACTION|help Set action string\n"
+ " -N --resolve-names=early|late|never When to resolve names\n"
+ , program_invocation_short_name);
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ static const struct option options[] = {
+ { "action", required_argument, NULL, 'a' },
+ { "resolve-names", required_argument, NULL, 'N' },
+ { "version", no_argument, NULL, 'V' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+
+ int c;
+
+ while ((c = getopt_long(argc, argv, "a:N:Vh", options, NULL)) >= 0)
+ switch (c) {
+ case 'a': {
+ DeviceAction a;
+
+ if (streq(optarg, "help")) {
+ dump_device_action_table();
+ return 0;
+ }
+
+ a = device_action_from_string(optarg);
+ if (a < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid action '%s'", optarg);
+
+ arg_action = optarg;
+ break;
+ }
+ case 'N':
+ arg_resolve_name_timing = resolve_name_timing_from_string(optarg);
+ if (arg_resolve_name_timing < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--resolve-names= must be early, late or never");
+ break;
+ case 'V':
+ return print_version();
+ case 'h':
+ return help();
+ case '?':
+ return -EINVAL;
+ default:
+ assert_not_reached("Unknown option");
+ }
+
+ if (!argv[optind])
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "syspath parameter missing.");
+
+ /* add /sys if needed */
+ if (!path_startswith(argv[optind], "/sys"))
+ strscpyl(arg_syspath, sizeof(arg_syspath), "/sys", argv[optind], NULL);
+ else
+ strscpy(arg_syspath, sizeof(arg_syspath), argv[optind]);
+
+ return 1;
+}
+
+int test_main(int argc, char *argv[], void *userdata) {
+ _cleanup_(udev_rules_freep) UdevRules *rules = NULL;
+ _cleanup_(udev_event_freep) UdevEvent *event = NULL;
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ const char *cmd, *key, *value;
+ sigset_t mask, sigmask_orig;
+ void *val;
+ int r;
+
+ log_set_max_level(LOG_DEBUG);
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ printf("This program is for debugging only, it does not run any program\n"
+ "specified by a RUN key. It may show incorrect results, because\n"
+ "some values may be different, or not available at a simulation run.\n"
+ "\n");
+
+ assert_se(sigprocmask(SIG_SETMASK, NULL, &sigmask_orig) >= 0);
+
+ udev_builtin_init();
+
+ r = udev_rules_load(&rules, arg_resolve_name_timing);
+ if (r < 0) {
+ log_error_errno(r, "Failed to read udev rules: %m");
+ goto out;
+ }
+
+ r = device_new_from_synthetic_event(&dev, arg_syspath, arg_action);
+ if (r < 0) {
+ log_error_errno(r, "Failed to open device '%s': %m", arg_syspath);
+ goto out;
+ }
+
+ /* don't read info from the db */
+ device_seal(dev);
+
+ event = udev_event_new(dev, 0, NULL);
+
+ assert_se(sigfillset(&mask) >= 0);
+ assert_se(sigprocmask(SIG_SETMASK, &mask, &sigmask_orig) >= 0);
+
+ udev_event_execute_rules(event, 60 * USEC_PER_SEC, SIGKILL, NULL, rules);
+
+ FOREACH_DEVICE_PROPERTY(dev, key, value)
+ printf("%s=%s\n", key, value);
+
+ ORDERED_HASHMAP_FOREACH_KEY(val, cmd, event->run_list) {
+ char program[UTIL_PATH_SIZE];
+
+ (void) udev_event_apply_format(event, cmd, program, sizeof(program), false);
+ printf("run: '%s'\n", program);
+ }
+
+ r = 0;
+out:
+ udev_builtin_exit();
+ return r;
+}
diff --git a/src/udev/udevadm-trigger.c b/src/udev/udevadm-trigger.c
new file mode 100644
index 0000000..5c74184
--- /dev/null
+++ b/src/udev/udevadm-trigger.c
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+
+#include "sd-device.h"
+#include "sd-event.h"
+
+#include "device-enumerator-private.h"
+#include "device-private.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "set.h"
+#include "string-util.h"
+#include "strv.h"
+#include "udevadm.h"
+#include "udevadm-util.h"
+#include "udev-ctrl.h"
+#include "virt.h"
+
+static bool arg_verbose = false;
+static bool arg_dry_run = false;
+
+static int exec_list(sd_device_enumerator *e, const char *action, Set **settle_set) {
+ sd_device *d;
+ int r, ret = 0;
+
+ FOREACH_DEVICE_AND_SUBSYSTEM(e, d) {
+ _cleanup_free_ char *filename = NULL;
+ const char *syspath;
+
+ if (sd_device_get_syspath(d, &syspath) < 0)
+ continue;
+
+ if (arg_verbose)
+ printf("%s\n", syspath);
+ if (arg_dry_run)
+ continue;
+
+ filename = path_join(syspath, "uevent");
+ if (!filename)
+ return log_oom();
+
+ r = write_string_file(filename, action, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0) {
+ bool ignore = IN_SET(r, -ENOENT, -ENODEV);
+
+ log_full_errno(ignore ? LOG_DEBUG : LOG_ERR, r,
+ "Failed to write '%s' to '%s'%s: %m",
+ action, filename, ignore ? ", ignoring" : "");
+ if (IN_SET(r, -EACCES, -EROFS))
+ /* Inovoked by unpriviledged user, or read only filesystem. Return earlier. */
+ return r;
+ if (ret == 0 && !ignore)
+ ret = r;
+ continue;
+ }
+
+ if (settle_set) {
+ r = set_put_strdup(settle_set, syspath);
+ if (r < 0)
+ return log_oom();
+ }
+ }
+
+ return ret;
+}
+
+static int device_monitor_handler(sd_device_monitor *m, sd_device *dev, void *userdata) {
+ _cleanup_free_ char *val = NULL;
+ Set *settle_set = userdata;
+ const char *syspath;
+
+ assert(dev);
+ assert(settle_set);
+
+ if (sd_device_get_syspath(dev, &syspath) < 0)
+ return 0;
+
+ if (arg_verbose)
+ printf("settle %s\n", syspath);
+
+ val = set_remove(settle_set, syspath);
+ if (!val)
+ log_debug("Got epoll event on syspath %s not present in syspath set", syspath);
+
+ if (set_isempty(settle_set))
+ return sd_event_exit(sd_device_monitor_get_event(m), 0);
+
+ return 0;
+}
+
+static char* keyval(const char *str, const char **key, const char **val) {
+ char *buf, *pos;
+
+ buf = strdup(str);
+ if (!buf)
+ return NULL;
+
+ pos = strchr(buf, '=');
+ if (pos) {
+ pos[0] = 0;
+ pos++;
+ }
+
+ *key = buf;
+ *val = pos;
+
+ return buf;
+}
+
+static int help(void) {
+ printf("%s trigger [OPTIONS] DEVPATH\n\n"
+ "Request events from the kernel.\n\n"
+ " -h --help Show this help\n"
+ " -V --version Show package version\n"
+ " -v --verbose Print the list of devices while running\n"
+ " -n --dry-run Do not actually trigger the events\n"
+ " -t --type= Type of events to trigger\n"
+ " devices sysfs devices (default)\n"
+ " subsystems sysfs subsystems and drivers\n"
+ " -c --action=ACTION|help Event action value, default is \"change\"\n"
+ " -s --subsystem-match=SUBSYSTEM Trigger devices from a matching subsystem\n"
+ " -S --subsystem-nomatch=SUBSYSTEM Exclude devices from a matching subsystem\n"
+ " -a --attr-match=FILE[=VALUE] Trigger devices with a matching attribute\n"
+ " -A --attr-nomatch=FILE[=VALUE] Exclude devices with a matching attribute\n"
+ " -p --property-match=KEY=VALUE Trigger devices with a matching property\n"
+ " -g --tag-match=KEY=VALUE Trigger devices with a matching property\n"
+ " -y --sysname-match=NAME Trigger devices with this /sys path\n"
+ " --name-match=NAME Trigger devices with this /dev name\n"
+ " -b --parent-match=NAME Trigger devices with that parent device\n"
+ " -w --settle Wait for the triggered events to complete\n"
+ " --wait-daemon[=SECONDS] Wait for udevd daemon to be initialized\n"
+ " before triggering uevents\n"
+ , program_invocation_short_name);
+
+ return 0;
+}
+
+int trigger_main(int argc, char *argv[], void *userdata) {
+ enum {
+ ARG_NAME = 0x100,
+ ARG_PING,
+ };
+
+ static const struct option options[] = {
+ { "verbose", no_argument, NULL, 'v' },
+ { "dry-run", no_argument, NULL, 'n' },
+ { "type", required_argument, NULL, 't' },
+ { "action", required_argument, NULL, 'c' },
+ { "subsystem-match", required_argument, NULL, 's' },
+ { "subsystem-nomatch", required_argument, NULL, 'S' },
+ { "attr-match", required_argument, NULL, 'a' },
+ { "attr-nomatch", required_argument, NULL, 'A' },
+ { "property-match", required_argument, NULL, 'p' },
+ { "tag-match", required_argument, NULL, 'g' },
+ { "sysname-match", required_argument, NULL, 'y' },
+ { "name-match", required_argument, NULL, ARG_NAME },
+ { "parent-match", required_argument, NULL, 'b' },
+ { "settle", no_argument, NULL, 'w' },
+ { "wait-daemon", optional_argument, NULL, ARG_PING },
+ { "version", no_argument, NULL, 'V' },
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+ enum {
+ TYPE_DEVICES,
+ TYPE_SUBSYSTEMS,
+ } device_type = TYPE_DEVICES;
+ const char *action = "change";
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *m = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_set_free_ Set *settle_set = NULL;
+ usec_t ping_timeout_usec = 5 * USEC_PER_SEC;
+ bool settle = false, ping = false;
+ int c, r;
+
+ if (running_in_chroot() > 0) {
+ log_info("Running in chroot, ignoring request.");
+ return 0;
+ }
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ while ((c = getopt_long(argc, argv, "vnt:c:s:S:a:A:p:g:y:b:wVh", options, NULL)) >= 0) {
+ _cleanup_free_ char *buf = NULL;
+ const char *key, *val;
+
+ switch (c) {
+ case 'v':
+ arg_verbose = true;
+ break;
+ case 'n':
+ arg_dry_run = true;
+ break;
+ case 't':
+ if (streq(optarg, "devices"))
+ device_type = TYPE_DEVICES;
+ else if (streq(optarg, "subsystems"))
+ device_type = TYPE_SUBSYSTEMS;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown type --type=%s", optarg);
+ break;
+ case 'c':
+ if (streq(optarg, "help")) {
+ dump_device_action_table();
+ return 0;
+ }
+ if (device_action_from_string(optarg) < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown action '%s'", optarg);
+
+ action = optarg;
+ break;
+ case 's':
+ r = sd_device_enumerator_add_match_subsystem(e, optarg, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add subsystem match '%s': %m", optarg);
+ break;
+ case 'S':
+ r = sd_device_enumerator_add_match_subsystem(e, optarg, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add negative subsystem match '%s': %m", optarg);
+ break;
+ case 'a':
+ buf = keyval(optarg, &key, &val);
+ if (!buf)
+ return log_oom();
+ r = sd_device_enumerator_add_match_sysattr(e, key, val, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add sysattr match '%s=%s': %m", key, val);
+ break;
+ case 'A':
+ buf = keyval(optarg, &key, &val);
+ if (!buf)
+ return log_oom();
+ r = sd_device_enumerator_add_match_sysattr(e, key, val, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add negative sysattr match '%s=%s': %m", key, val);
+ break;
+ case 'p':
+ buf = keyval(optarg, &key, &val);
+ if (!buf)
+ return log_oom();
+ r = sd_device_enumerator_add_match_property(e, key, val);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add property match '%s=%s': %m", key, val);
+ break;
+ case 'g':
+ r = sd_device_enumerator_add_match_tag(e, optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add tag match '%s': %m", optarg);
+ break;
+ case 'y':
+ r = sd_device_enumerator_add_match_sysname(e, optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add sysname match '%s': %m", optarg);
+ break;
+ case 'b': {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+
+ r = find_device(optarg, "/sys", &dev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open the device '%s': %m", optarg);
+
+ r = device_enumerator_add_match_parent_incremental(e, dev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add parent match '%s': %m", optarg);
+ break;
+ }
+ case 'w':
+ settle = true;
+ break;
+
+ case ARG_NAME: {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+
+ r = find_device(optarg, "/dev/", &dev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open the device '%s': %m", optarg);
+
+ r = device_enumerator_add_match_parent_incremental(e, dev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add parent match '%s': %m", optarg);
+ break;
+ }
+
+ case ARG_PING: {
+ ping = true;
+ if (optarg) {
+ r = parse_sec(optarg, &ping_timeout_usec);
+ if (r < 0)
+ log_error_errno(r, "Failed to parse timeout value '%s', ignoring: %m", optarg);
+ }
+ break;
+ }
+
+ case 'V':
+ return print_version();
+ case 'h':
+ return help();
+ case '?':
+ return -EINVAL;
+ default:
+ assert_not_reached("Unknown option");
+ }
+ }
+
+ if (ping) {
+ _cleanup_(udev_ctrl_unrefp) struct udev_ctrl *uctrl = NULL;
+
+ r = udev_ctrl_new(&uctrl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize udev control: %m");
+
+ r = udev_ctrl_send_ping(uctrl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to udev daemon: %m");
+
+ r = udev_ctrl_wait(uctrl, ping_timeout_usec);
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait for daemon to reply: %m");
+ }
+
+ for (; optind < argc; optind++) {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+
+ r = find_device(argv[optind], NULL, &dev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open the device '%s': %m", argv[optind]);
+
+ r = device_enumerator_add_match_parent_incremental(e, dev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add parent match '%s': %m", argv[optind]);
+ }
+
+ if (settle) {
+ settle_set = set_new(&string_hash_ops_free);
+ if (!settle_set)
+ return log_oom();
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get default event: %m");
+
+ r = sd_device_monitor_new(&m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create device monitor object: %m");
+
+ r = sd_device_monitor_attach_event(m, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach event to device monitor: %m");
+
+ r = sd_device_monitor_start(m, device_monitor_handler, settle_set);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start device monitor: %m");
+ }
+
+ switch (device_type) {
+ case TYPE_SUBSYSTEMS:
+ r = device_enumerator_scan_subsystems(e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to scan subsystems: %m");
+ break;
+ case TYPE_DEVICES:
+ r = device_enumerator_scan_devices(e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to scan devices: %m");
+ break;
+ default:
+ assert_not_reached("Unknown device type");
+ }
+
+ r = exec_list(e, action, settle ? &settle_set : NULL);
+ if (r < 0)
+ return r;
+
+ if (event && !set_isempty(settle_set)) {
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Event loop failed: %m");
+ }
+
+ return 0;
+}
diff --git a/src/udev/udevadm-util.c b/src/udev/udevadm-util.c
new file mode 100644
index 0000000..39d0c7e
--- /dev/null
+++ b/src/udev/udevadm-util.c
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "device-private.h"
+#include "path-util.h"
+#include "udevadm-util.h"
+#include "unit-name.h"
+
+static int find_device_from_path(const char *path, sd_device **ret) {
+ if (path_startswith(path, "/sys/"))
+ return sd_device_new_from_syspath(ret, path);
+
+ if (path_startswith(path, "/dev/")) {
+ struct stat st;
+
+ if (stat(path, &st) < 0)
+ return -errno;
+
+ return device_new_from_stat_rdev(ret, &st);
+ }
+
+ return -EINVAL;
+}
+
+static int find_device_from_unit(const char *unit_name, sd_device **ret) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *unit_path = NULL, *syspath = NULL;
+ int r;
+
+ if (!unit_name_is_valid(unit_name, UNIT_NAME_PLAIN))
+ return -EINVAL;
+
+ if (unit_name_to_type(unit_name) != UNIT_DEVICE)
+ return -EINVAL;
+
+ r = bus_connect_system_systemd(&bus);
+ if (r < 0) {
+ _cleanup_free_ char *path = NULL;
+
+ log_debug_errno(r, "Failed to open connection to systemd, using unit name as syspath: %m");
+
+ r = unit_name_to_path(unit_name, &path);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to convert \"%s\" to a device path: %m", unit_name);
+
+ return find_device_from_path(path, ret);
+ }
+
+ unit_path = unit_dbus_path_from_name(unit_name);
+ if (!unit_path)
+ return -ENOMEM;
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ unit_path,
+ "org.freedesktop.systemd1.Device",
+ "SysFSPath",
+ &error,
+ &syspath);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get SysFSPath= dbus property for %s: %s",
+ unit_name, bus_error_message(&error, r));
+
+ return sd_device_new_from_syspath(ret, syspath);
+}
+
+int find_device(const char *id, const char *prefix, sd_device **ret) {
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ assert(id);
+ assert(ret);
+
+ if (prefix) {
+ if (!path_startswith(id, prefix)) {
+ id = path = path_join(prefix, id);
+ if (!path)
+ return -ENOMEM;
+ }
+ } else {
+ /* In cases where the argument is generic (no prefix specified),
+ * check if the argument looks like a device unit name. */
+ r = find_device_from_unit(id, ret);
+ if (r >= 0)
+ return r;
+ }
+
+ return find_device_from_path(id, ret);
+}
diff --git a/src/udev/udevadm-util.h b/src/udev/udevadm-util.h
new file mode 100644
index 0000000..91587c5
--- /dev/null
+++ b/src/udev/udevadm-util.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#pragma once
+
+#include "sd-device.h"
+
+int find_device(const char *id, const char *prefix, sd_device **ret);
diff --git a/src/udev/udevadm.c b/src/udev/udevadm.c
new file mode 100644
index 0000000..408e4a3
--- /dev/null
+++ b/src/udev/udevadm.c
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "main-func.h"
+#include "pretty-print.h"
+#include "selinux-util.h"
+#include "string-util.h"
+#include "udevadm.h"
+#include "udevd.h"
+#include "udev-util.h"
+#include "verbs.h"
+#include "util.h"
+
+static int help(void) {
+ static const char *const short_descriptions[][2] = {
+ { "info", "Query sysfs or the udev database" },
+ { "trigger", "Request events from the kernel" },
+ { "settle", "Wait for pending udev events" },
+ { "control", "Control the udev daemon" },
+ { "monitor", "Listen to kernel and udev events" },
+ { "test", "Test an event run" },
+ { "test-builtin", "Test a built-in command" },
+ };
+
+ _cleanup_free_ char *link = NULL;
+ size_t i;
+ int r;
+
+ r = terminal_urlify_man("udevadm", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [--help] [--version] [--debug] COMMAND [COMMAND OPTIONS]\n\n"
+ "Send control commands or test the device manager.\n\n"
+ "Commands:\n"
+ , program_invocation_short_name);
+
+ for (i = 0; i < ELEMENTSOF(short_descriptions); i++)
+ printf(" %-12s %s\n", short_descriptions[i][0], short_descriptions[i][1]);
+
+ printf("\nSee the %s for details.\n", link);
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ static const struct option options[] = {
+ { "debug", no_argument, NULL, 'd' },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V' },
+ {}
+ };
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "+dhV", options, NULL)) >= 0)
+ switch (c) {
+
+ case 'd':
+ log_set_max_level(LOG_DEBUG);
+ break;
+
+ case 'h':
+ return help();
+
+ case 'V':
+ return print_version();
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ return 1; /* work to do */
+}
+
+static int version_main(int argc, char *argv[], void *userdata) {
+ return print_version();
+}
+
+static int help_main(int argc, char *argv[], void *userdata) {
+ return help();
+}
+
+static int udevadm_main(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "info", VERB_ANY, VERB_ANY, 0, info_main },
+ { "trigger", VERB_ANY, VERB_ANY, 0, trigger_main },
+ { "settle", VERB_ANY, VERB_ANY, 0, settle_main },
+ { "control", VERB_ANY, VERB_ANY, 0, control_main },
+ { "monitor", VERB_ANY, VERB_ANY, 0, monitor_main },
+ { "hwdb", VERB_ANY, VERB_ANY, 0, hwdb_main },
+ { "test", VERB_ANY, VERB_ANY, 0, test_main },
+ { "test-builtin", VERB_ANY, VERB_ANY, 0, builtin_main },
+ { "version", VERB_ANY, VERB_ANY, 0, version_main },
+ { "help", VERB_ANY, VERB_ANY, 0, help_main },
+ {}
+ };
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ if (strstr(program_invocation_short_name, "udevd"))
+ return run_udevd(argc, argv);
+
+ udev_parse_config();
+ log_parse_environment();
+ log_open();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ log_set_max_level_realm(LOG_REALM_SYSTEMD, log_get_max_level());
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return r;
+
+ return udevadm_main(argc, argv);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/udev/udevadm.h b/src/udev/udevadm.h
new file mode 100644
index 0000000..162bbb9
--- /dev/null
+++ b/src/udev/udevadm.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#pragma once
+
+#include <stdio.h>
+
+#include "build.h"
+#include "macro.h"
+
+int info_main(int argc, char *argv[], void *userdata);
+int trigger_main(int argc, char *argv[], void *userdata);
+int settle_main(int argc, char *argv[], void *userdata);
+int control_main(int argc, char *argv[], void *userdata);
+int monitor_main(int argc, char *argv[], void *userdata);
+int hwdb_main(int argc, char *argv[], void *userdata);
+int test_main(int argc, char *argv[], void *userdata);
+int builtin_main(int argc, char *argv[], void *userdata);
+
+static inline int print_version(void) {
+ /* Dracut relies on the version being a single integer */
+ puts(STRINGIFY(PROJECT_VERSION));
+ return 0;
+}
diff --git a/src/udev/udevd.c b/src/udev/udevd.c
new file mode 100644
index 0000000..d24b8d4
--- /dev/null
+++ b/src/udev/udevd.c
@@ -0,0 +1,1960 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright © 2004 Chris Friesen <chris_friesen@sympatico.ca>
+ * Copyright © 2009 Canonical Ltd.
+ * Copyright © 2009 Scott James Remnant <scott@netsplit.com>
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/epoll.h>
+#include <sys/file.h>
+#include <sys/inotify.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/signalfd.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "build.h"
+#include "cgroup-util.h"
+#include "cpu-set-util.h"
+#include "dev-setup.h"
+#include "device-monitor-private.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "event-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "io-util.h"
+#include "libudev-device-internal.h"
+#include "limits-util.h"
+#include "list.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "syslog-util.h"
+#include "udevd.h"
+#include "udev-builtin.h"
+#include "udev-ctrl.h"
+#include "udev-event.h"
+#include "udev-util.h"
+#include "udev-watch.h"
+#include "user-util.h"
+
+#define WORKER_NUM_MAX 2048U
+
+static bool arg_debug = false;
+static int arg_daemonize = false;
+static ResolveNameTiming arg_resolve_name_timing = RESOLVE_NAME_EARLY;
+static unsigned arg_children_max = 0;
+static usec_t arg_exec_delay_usec = 0;
+static usec_t arg_event_timeout_usec = 180 * USEC_PER_SEC;
+static int arg_timeout_signal = SIGKILL;
+static bool arg_blockdev_read_only = false;
+
+typedef struct Manager {
+ sd_event *event;
+ Hashmap *workers;
+ LIST_HEAD(struct event, events);
+ const char *cgroup;
+ pid_t pid; /* the process that originally allocated the manager object */
+
+ UdevRules *rules;
+ Hashmap *properties;
+
+ sd_netlink *rtnl;
+
+ sd_device_monitor *monitor;
+ struct udev_ctrl *ctrl;
+ int fd_inotify;
+ int worker_watch[2];
+
+ sd_event_source *inotify_event;
+ sd_event_source *kill_workers_event;
+
+ usec_t last_usec;
+
+ bool stop_exec_queue:1;
+ bool exit:1;
+} Manager;
+
+enum event_state {
+ EVENT_UNDEF,
+ EVENT_QUEUED,
+ EVENT_RUNNING,
+};
+
+struct event {
+ Manager *manager;
+ struct worker *worker;
+ enum event_state state;
+
+ sd_device *dev;
+ sd_device *dev_kernel; /* clone of originally received device */
+
+ uint64_t seqnum;
+ uint64_t delaying_seqnum;
+
+ sd_event_source *timeout_warning_event;
+ sd_event_source *timeout_event;
+
+ LIST_FIELDS(struct event, event);
+};
+
+static void event_queue_cleanup(Manager *manager, enum event_state type);
+
+enum worker_state {
+ WORKER_UNDEF,
+ WORKER_RUNNING,
+ WORKER_IDLE,
+ WORKER_KILLED,
+};
+
+struct worker {
+ Manager *manager;
+ pid_t pid;
+ sd_device_monitor *monitor;
+ enum worker_state state;
+ struct event *event;
+};
+
+/* passed from worker to main process */
+struct worker_message {
+};
+
+static void event_free(struct event *event) {
+ if (!event)
+ return;
+
+ assert(event->manager);
+
+ LIST_REMOVE(event, event->manager->events, event);
+ sd_device_unref(event->dev);
+ sd_device_unref(event->dev_kernel);
+
+ sd_event_source_unref(event->timeout_warning_event);
+ sd_event_source_unref(event->timeout_event);
+
+ if (event->worker)
+ event->worker->event = NULL;
+
+ /* only clean up the queue from the process that created it */
+ if (LIST_IS_EMPTY(event->manager->events) &&
+ event->manager->pid == getpid_cached())
+ if (unlink("/run/udev/queue") < 0)
+ log_warning_errno(errno, "Failed to unlink /run/udev/queue: %m");
+
+ free(event);
+}
+
+static void worker_free(struct worker *worker) {
+ if (!worker)
+ return;
+
+ assert(worker->manager);
+
+ hashmap_remove(worker->manager->workers, PID_TO_PTR(worker->pid));
+ sd_device_monitor_unref(worker->monitor);
+ event_free(worker->event);
+
+ free(worker);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct worker *, worker_free);
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(worker_hash_op, void, trivial_hash_func, trivial_compare_func, struct worker, worker_free);
+
+static int worker_new(struct worker **ret, Manager *manager, sd_device_monitor *worker_monitor, pid_t pid) {
+ _cleanup_(worker_freep) struct worker *worker = NULL;
+ int r;
+
+ assert(ret);
+ assert(manager);
+ assert(worker_monitor);
+ assert(pid > 1);
+
+ /* close monitor, but keep address around */
+ device_monitor_disconnect(worker_monitor);
+
+ worker = new(struct worker, 1);
+ if (!worker)
+ return -ENOMEM;
+
+ *worker = (struct worker) {
+ .manager = manager,
+ .monitor = sd_device_monitor_ref(worker_monitor),
+ .pid = pid,
+ };
+
+ r = hashmap_ensure_allocated(&manager->workers, &worker_hash_op);
+ if (r < 0)
+ return r;
+
+ r = hashmap_put(manager->workers, PID_TO_PTR(pid), worker);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(worker);
+
+ return 0;
+}
+
+static int on_event_timeout(sd_event_source *s, uint64_t usec, void *userdata) {
+ struct event *event = userdata;
+
+ assert(event);
+ assert(event->worker);
+
+ kill_and_sigcont(event->worker->pid, arg_timeout_signal);
+ event->worker->state = WORKER_KILLED;
+
+ log_device_error(event->dev, "Worker ["PID_FMT"] processing SEQNUM=%"PRIu64" killed", event->worker->pid, event->seqnum);
+
+ return 1;
+}
+
+static int on_event_timeout_warning(sd_event_source *s, uint64_t usec, void *userdata) {
+ struct event *event = userdata;
+
+ assert(event);
+ assert(event->worker);
+
+ log_device_warning(event->dev, "Worker ["PID_FMT"] processing SEQNUM=%"PRIu64" is taking a long time", event->worker->pid, event->seqnum);
+
+ return 1;
+}
+
+static void worker_attach_event(struct worker *worker, struct event *event) {
+ sd_event *e;
+
+ assert(worker);
+ assert(worker->manager);
+ assert(event);
+ assert(!event->worker);
+ assert(!worker->event);
+
+ worker->state = WORKER_RUNNING;
+ worker->event = event;
+ event->state = EVENT_RUNNING;
+ event->worker = worker;
+
+ e = worker->manager->event;
+
+ (void) sd_event_add_time_relative(e, &event->timeout_warning_event, CLOCK_MONOTONIC,
+ udev_warn_timeout(arg_event_timeout_usec), USEC_PER_SEC,
+ on_event_timeout_warning, event);
+
+ (void) sd_event_add_time_relative(e, &event->timeout_event, CLOCK_MONOTONIC,
+ arg_event_timeout_usec, USEC_PER_SEC,
+ on_event_timeout, event);
+}
+
+static void manager_clear_for_worker(Manager *manager) {
+ assert(manager);
+
+ manager->inotify_event = sd_event_source_unref(manager->inotify_event);
+ manager->kill_workers_event = sd_event_source_unref(manager->kill_workers_event);
+
+ manager->event = sd_event_unref(manager->event);
+
+ manager->workers = hashmap_free(manager->workers);
+ event_queue_cleanup(manager, EVENT_UNDEF);
+
+ manager->monitor = sd_device_monitor_unref(manager->monitor);
+ manager->ctrl = udev_ctrl_unref(manager->ctrl);
+
+ manager->worker_watch[READ_END] = safe_close(manager->worker_watch[READ_END]);
+}
+
+static void manager_free(Manager *manager) {
+ if (!manager)
+ return;
+
+ udev_builtin_exit();
+
+ if (manager->pid == getpid_cached())
+ udev_ctrl_cleanup(manager->ctrl);
+
+ manager_clear_for_worker(manager);
+
+ sd_netlink_unref(manager->rtnl);
+
+ hashmap_free_free_free(manager->properties);
+ udev_rules_free(manager->rules);
+
+ safe_close(manager->fd_inotify);
+ safe_close_pair(manager->worker_watch);
+
+ free(manager);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
+
+static int worker_send_message(int fd) {
+ struct worker_message message = {};
+
+ return loop_write(fd, &message, sizeof(message), false);
+}
+
+static int worker_lock_block_device(sd_device *dev, int *ret_fd) {
+ _cleanup_close_ int fd = -1;
+ const char *val;
+ int r;
+
+ assert(dev);
+ assert(ret_fd);
+
+ /*
+ * Take a shared lock on the device node; this establishes
+ * a concept of device "ownership" to serialize device
+ * access. External processes holding an exclusive lock will
+ * cause udev to skip the event handling; in the case udev
+ * acquired the lock, the external process can block until
+ * udev has finished its event handling.
+ */
+
+ if (device_for_action(dev, DEVICE_ACTION_REMOVE))
+ return 0;
+
+ r = sd_device_get_subsystem(dev, &val);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get subsystem: %m");
+
+ if (!streq(val, "block"))
+ return 0;
+
+ r = sd_device_get_sysname(dev, &val);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get sysname: %m");
+
+ if (STARTSWITH_SET(val, "dm-", "md", "drbd"))
+ return 0;
+
+ r = sd_device_get_devtype(dev, &val);
+ if (r < 0 && r != -ENOENT)
+ return log_device_debug_errno(dev, r, "Failed to get devtype: %m");
+ if (r >= 0 && streq(val, "partition")) {
+ r = sd_device_get_parent(dev, &dev);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get parent device: %m");
+ }
+
+ r = sd_device_get_devname(dev, &val);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get devname: %m");
+
+ fd = open(val, O_RDONLY|O_CLOEXEC|O_NOFOLLOW|O_NONBLOCK);
+ if (fd < 0) {
+ log_device_debug_errno(dev, errno, "Failed to open '%s', ignoring: %m", val);
+ return 0;
+ }
+
+ if (flock(fd, LOCK_SH|LOCK_NB) < 0)
+ return log_device_debug_errno(dev, errno, "Failed to flock(%s): %m", val);
+
+ *ret_fd = TAKE_FD(fd);
+ return 1;
+}
+
+static int worker_mark_block_device_read_only(sd_device *dev) {
+ _cleanup_close_ int fd = -1;
+ const char *val;
+ int state = 1, r;
+
+ assert(dev);
+
+ if (!arg_blockdev_read_only)
+ return 0;
+
+ /* Do this only once, when the block device is new. If the device is later retriggered let's not
+ * toggle the bit again, so that people can boot up with full read-only mode and then unset the bit
+ * for specific devices only. */
+ if (!device_for_action(dev, DEVICE_ACTION_ADD))
+ return 0;
+
+ r = sd_device_get_subsystem(dev, &val);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get subsystem: %m");
+
+ if (!streq(val, "block"))
+ return 0;
+
+ r = sd_device_get_sysname(dev, &val);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get sysname: %m");
+
+ /* Exclude synthetic devices for now, this is supposed to be a safety feature to avoid modification
+ * of physical devices, and what sits on top of those doesn't really matter if we don't allow the
+ * underlying block devices to receive changes. */
+ if (STARTSWITH_SET(val, "dm-", "md", "drbd", "loop", "nbd", "zram"))
+ return 0;
+
+ r = sd_device_get_devname(dev, &val);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get devname: %m");
+
+ fd = open(val, O_RDONLY|O_CLOEXEC|O_NOFOLLOW|O_NONBLOCK);
+ if (fd < 0)
+ return log_device_debug_errno(dev, errno, "Failed to open '%s', ignoring: %m", val);
+
+ if (ioctl(fd, BLKROSET, &state) < 0)
+ return log_device_warning_errno(dev, errno, "Failed to mark block device '%s' read-only: %m", val);
+
+ log_device_info(dev, "Successfully marked block device '%s' read-only.", val);
+ return 0;
+}
+
+static int worker_process_device(Manager *manager, sd_device *dev) {
+ _cleanup_(udev_event_freep) UdevEvent *udev_event = NULL;
+ _cleanup_close_ int fd_lock = -1;
+ DeviceAction action;
+ uint64_t seqnum;
+ int r;
+
+ assert(manager);
+ assert(dev);
+
+ r = device_get_seqnum(dev, &seqnum);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get SEQNUM: %m");
+
+ r = device_get_action(dev, &action);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to get ACTION: %m");
+
+ log_device_debug(dev, "Processing device (SEQNUM=%"PRIu64", ACTION=%s)",
+ seqnum, device_action_to_string(action));
+
+ udev_event = udev_event_new(dev, arg_exec_delay_usec, manager->rtnl);
+ if (!udev_event)
+ return -ENOMEM;
+
+ r = worker_lock_block_device(dev, &fd_lock);
+ if (r == -EAGAIN) {
+ /* So this is a block device and the device is locked currently via the BSD advisory locks —
+ * someone else is exclusively using it. This means we don't run our udev rules now, to not
+ * interfere. However we want to know when the device is unlocked again, and retrigger the
+ * device again then, so that the rules are run eventually. For that we use IN_CLOSE_WRITE
+ * inotify watches (which isn't exactly the same as waiting for the BSD locks to release, but
+ * not totally off, as long as unlock+close() is done together, as it usually is).
+ *
+ * (The user-facing side of this: https://systemd.io/BLOCK_DEVICE_LOCKING)
+ *
+ * There's a bit of a chicken and egg problem here for this however: inotify watching is
+ * supposed to be enabled via an option set via udev rules (OPTIONS+="watch"). If we skip the
+ * udev rules here however (as we just said we do), we would thus never see that specific
+ * udev rule, and thus never turn on inotify watching. But in order to catch up eventually
+ * and run them we we need the inotify watching: hence a classic chicken and egg problem.
+ *
+ * Our way out here: if we see the block device locked, unconditionally watch the device via
+ * inotify, regardless of any explicit request via OPTIONS+="watch". Thus, a device that is
+ * currently locked via the BSD file locks will be treated as if we ran a single udev rule
+ * only for it: the one that turns on inotify watching for it. If we eventually see the
+ * inotify IN_CLOSE_WRITE event, and then run the rules after all and we then realize that
+ * this wasn't actually requested (i.e. no OPTIONS+="watch" set) we'll simply turn off the
+ * watching again (see below). Effectively this means: inotify watching is now enabled either
+ * a) when the udev rules say so, or b) while the device is locked.
+ *
+ * Worst case scenario hence: in the (unlikely) case someone locked the device and we clash
+ * with that we might do inotify watching for a brief moment for a device where we actually
+ * weren't supposed to. But that shouldn't be too bad, in particular as BSD locks being taken
+ * on a block device is kinda an indication that the inotify logic is desired too, to some
+ * degree — they go hand-in-hand after all. */
+
+ log_device_debug(dev, "Block device is currently locked, installing watch to wait until the lock is released.");
+ (void) udev_watch_begin(dev);
+
+ /* Now the watch is installed, let's lock the device again, maybe in the meantime things changed */
+ r = worker_lock_block_device(dev, &fd_lock);
+ }
+ if (r < 0)
+ return r;
+
+ (void) worker_mark_block_device_read_only(dev);
+
+ /* apply rules, create node, symlinks */
+ r = udev_event_execute_rules(udev_event, arg_event_timeout_usec, arg_timeout_signal, manager->properties, manager->rules);
+ if (r < 0)
+ return r;
+
+ udev_event_execute_run(udev_event, arg_event_timeout_usec, arg_timeout_signal);
+
+ if (!manager->rtnl)
+ /* in case rtnl was initialized */
+ manager->rtnl = sd_netlink_ref(udev_event->rtnl);
+
+ /* apply/restore/end inotify watch */
+ if (udev_event->inotify_watch) {
+ (void) udev_watch_begin(dev);
+ r = device_update_db(dev);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to update database under /run/udev/data/: %m");
+ } else
+ (void) udev_watch_end(dev);
+
+ log_device_debug(dev, "Device (SEQNUM=%"PRIu64", ACTION=%s) processed",
+ seqnum, device_action_to_string(action));
+
+ return 0;
+}
+
+static int worker_device_monitor_handler(sd_device_monitor *monitor, sd_device *dev, void *userdata) {
+ Manager *manager = userdata;
+ int r;
+
+ assert(dev);
+ assert(manager);
+
+ r = worker_process_device(manager, dev);
+ if (r == -EAGAIN)
+ /* if we couldn't acquire the flock(), then proceed quietly */
+ log_device_debug_errno(dev, r, "Device currently locked, not processing.");
+ else {
+ if (r < 0)
+ log_device_warning_errno(dev, r, "Failed to process device, ignoring: %m");
+
+ /* send processed event back to libudev listeners */
+ r = device_monitor_send_device(monitor, NULL, dev);
+ if (r < 0)
+ log_device_warning_errno(dev, r, "Failed to send device, ignoring: %m");
+ }
+
+ /* send udevd the result of the event execution */
+ r = worker_send_message(manager->worker_watch[WRITE_END]);
+ if (r < 0)
+ log_device_warning_errno(dev, r, "Failed to send signal to main daemon, ignoring: %m");
+
+ return 1;
+}
+
+static int worker_main(Manager *_manager, sd_device_monitor *monitor, sd_device *first_device) {
+ _cleanup_(sd_device_unrefp) sd_device *dev = first_device;
+ _cleanup_(manager_freep) Manager *manager = _manager;
+ int r;
+
+ assert(manager);
+ assert(monitor);
+ assert(dev);
+
+ assert_se(unsetenv("NOTIFY_SOCKET") == 0);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, -1) >= 0);
+
+ /* Reset OOM score, we only protect the main daemon. */
+ r = set_oom_score_adjust(0);
+ if (r < 0)
+ log_debug_errno(r, "Failed to reset OOM score, ignoring: %m");
+
+ /* Clear unnecessary data in Manager object.*/
+ manager_clear_for_worker(manager);
+
+ r = sd_event_new(&manager->event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ r = sd_event_add_signal(manager->event, NULL, SIGTERM, NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set SIGTERM event: %m");
+
+ r = sd_device_monitor_attach_event(monitor, manager->event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach event loop to device monitor: %m");
+
+ r = sd_device_monitor_start(monitor, worker_device_monitor_handler, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start device monitor: %m");
+
+ (void) sd_event_source_set_description(sd_device_monitor_get_event_source(monitor), "worker-device-monitor");
+
+ /* Process first device */
+ (void) worker_device_monitor_handler(monitor, dev, manager);
+
+ r = sd_event_loop(manager->event);
+ if (r < 0)
+ return log_error_errno(r, "Event loop failed: %m");
+
+ return 0;
+}
+
+static int worker_spawn(Manager *manager, struct event *event) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *worker_monitor = NULL;
+ struct worker *worker;
+ pid_t pid;
+ int r;
+
+ /* listen for new events */
+ r = device_monitor_new_full(&worker_monitor, MONITOR_GROUP_NONE, -1);
+ if (r < 0)
+ return r;
+
+ /* allow the main daemon netlink address to send devices to the worker */
+ r = device_monitor_allow_unicast_sender(worker_monitor, manager->monitor);
+ if (r < 0)
+ return log_error_errno(r, "Worker: Failed to set unicast sender: %m");
+
+ r = device_monitor_enable_receiving(worker_monitor);
+ if (r < 0)
+ return log_error_errno(r, "Worker: Failed to enable receiving of device: %m");
+
+ r = safe_fork(NULL, FORK_DEATHSIG, &pid);
+ if (r < 0) {
+ event->state = EVENT_QUEUED;
+ return log_error_errno(r, "Failed to fork() worker: %m");
+ }
+ if (r == 0) {
+ /* Worker process */
+ r = worker_main(manager, worker_monitor, sd_device_ref(event->dev));
+ log_close();
+ _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
+ }
+
+ r = worker_new(&worker, manager, worker_monitor, pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create worker object: %m");
+
+ worker_attach_event(worker, event);
+
+ log_device_debug(event->dev, "Worker ["PID_FMT"] is forked for processing SEQNUM=%"PRIu64".", pid, event->seqnum);
+ return 0;
+}
+
+static void event_run(Manager *manager, struct event *event) {
+ static bool log_children_max_reached = true;
+ struct worker *worker;
+ int r;
+
+ assert(manager);
+ assert(event);
+
+ if (DEBUG_LOGGING) {
+ DeviceAction action;
+
+ r = device_get_action(event->dev, &action);
+ log_device_debug(event->dev, "Device (SEQNUM=%"PRIu64", ACTION=%s) ready for processing",
+ event->seqnum, r >= 0 ? device_action_to_string(action) : "<unknown>");
+ }
+
+ HASHMAP_FOREACH(worker, manager->workers) {
+ if (worker->state != WORKER_IDLE)
+ continue;
+
+ r = device_monitor_send_device(manager->monitor, worker->monitor, event->dev);
+ if (r < 0) {
+ log_device_error_errno(event->dev, r, "Worker ["PID_FMT"] did not accept message, killing the worker: %m",
+ worker->pid);
+ (void) kill(worker->pid, SIGKILL);
+ worker->state = WORKER_KILLED;
+ continue;
+ }
+ worker_attach_event(worker, event);
+ return;
+ }
+
+ if (hashmap_size(manager->workers) >= arg_children_max) {
+
+ /* Avoid spamming the debug logs if the limit is already reached and
+ * many events still need to be processed */
+ if (log_children_max_reached && arg_children_max > 1) {
+ log_debug("Maximum number (%u) of children reached.", hashmap_size(manager->workers));
+ log_children_max_reached = false;
+ }
+ return;
+ }
+
+ /* Re-enable the debug message for the next batch of events */
+ log_children_max_reached = true;
+
+ /* fork with up-to-date SELinux label database, so the child inherits the up-to-date db
+ and, until the next SELinux policy changes, we safe further reloads in future children */
+ mac_selinux_maybe_reload();
+
+ /* start new worker and pass initial device */
+ worker_spawn(manager, event);
+}
+
+static int event_queue_insert(Manager *manager, sd_device *dev) {
+ _cleanup_(sd_device_unrefp) sd_device *clone = NULL;
+ struct event *event;
+ DeviceAction action;
+ uint64_t seqnum;
+ int r;
+
+ assert(manager);
+ assert(dev);
+
+ /* only one process can add events to the queue */
+ assert(manager->pid == getpid_cached());
+
+ /* We only accepts devices received by device monitor. */
+ r = device_get_seqnum(dev, &seqnum);
+ if (r < 0)
+ return r;
+
+ /* Refuse devices do not have ACTION property. */
+ r = device_get_action(dev, &action);
+ if (r < 0)
+ return r;
+
+ /* Save original device to restore the state on failures. */
+ r = device_shallow_clone(dev, &clone);
+ if (r < 0)
+ return r;
+
+ r = device_copy_properties(clone, dev);
+ if (r < 0)
+ return r;
+
+ event = new(struct event, 1);
+ if (!event)
+ return -ENOMEM;
+
+ *event = (struct event) {
+ .manager = manager,
+ .dev = sd_device_ref(dev),
+ .dev_kernel = TAKE_PTR(clone),
+ .seqnum = seqnum,
+ .state = EVENT_QUEUED,
+ };
+
+ if (LIST_IS_EMPTY(manager->events)) {
+ r = touch("/run/udev/queue");
+ if (r < 0)
+ log_warning_errno(r, "Failed to touch /run/udev/queue: %m");
+ }
+
+ LIST_APPEND(event, manager->events, event);
+
+ log_device_debug(dev, "Device (SEQNUM=%"PRIu64", ACTION=%s) is queued",
+ seqnum, device_action_to_string(action));
+
+ return 0;
+}
+
+static void manager_kill_workers(Manager *manager) {
+ struct worker *worker;
+
+ assert(manager);
+
+ HASHMAP_FOREACH(worker, manager->workers) {
+ if (worker->state == WORKER_KILLED)
+ continue;
+
+ worker->state = WORKER_KILLED;
+ (void) kill(worker->pid, SIGTERM);
+ }
+}
+
+/* lookup event for identical, parent, child device */
+static int is_device_busy(Manager *manager, struct event *event) {
+ const char *subsystem, *devpath, *devpath_old = NULL;
+ dev_t devnum = makedev(0, 0);
+ struct event *loop_event;
+ size_t devpath_len;
+ int r, ifindex = 0;
+ bool is_block;
+
+ r = sd_device_get_subsystem(event->dev, &subsystem);
+ if (r < 0)
+ return r;
+
+ is_block = streq(subsystem, "block");
+
+ r = sd_device_get_devpath(event->dev, &devpath);
+ if (r < 0)
+ return r;
+
+ devpath_len = strlen(devpath);
+
+ r = sd_device_get_property_value(event->dev, "DEVPATH_OLD", &devpath_old);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ r = sd_device_get_devnum(event->dev, &devnum);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ r = sd_device_get_ifindex(event->dev, &ifindex);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ /* check if queue contains events we depend on */
+ LIST_FOREACH(event, loop_event, manager->events) {
+ size_t loop_devpath_len, common;
+ const char *loop_devpath;
+
+ /* we already found a later event, earlier cannot block us, no need to check again */
+ if (loop_event->seqnum < event->delaying_seqnum)
+ continue;
+
+ /* event we checked earlier still exists, no need to check again */
+ if (loop_event->seqnum == event->delaying_seqnum)
+ return true;
+
+ /* found ourself, no later event can block us */
+ if (loop_event->seqnum >= event->seqnum)
+ break;
+
+ /* check major/minor */
+ if (major(devnum) != 0) {
+ const char *s;
+ dev_t d;
+
+ if (sd_device_get_subsystem(loop_event->dev, &s) < 0)
+ continue;
+
+ if (sd_device_get_devnum(loop_event->dev, &d) >= 0 &&
+ devnum == d && is_block == streq(s, "block"))
+ goto set_delaying_seqnum;
+ }
+
+ /* check network device ifindex */
+ if (ifindex > 0) {
+ int i;
+
+ if (sd_device_get_ifindex(loop_event->dev, &i) >= 0 &&
+ ifindex == i)
+ goto set_delaying_seqnum;
+ }
+
+ if (sd_device_get_devpath(loop_event->dev, &loop_devpath) < 0)
+ continue;
+
+ /* check our old name */
+ if (devpath_old && streq(devpath_old, loop_devpath))
+ goto set_delaying_seqnum;
+
+ loop_devpath_len = strlen(loop_devpath);
+
+ /* compare devpath */
+ common = MIN(devpath_len, loop_devpath_len);
+
+ /* one devpath is contained in the other? */
+ if (!strneq(devpath, loop_devpath, common))
+ continue;
+
+ /* identical device event found */
+ if (devpath_len == loop_devpath_len)
+ goto set_delaying_seqnum;
+
+ /* parent device event found */
+ if (devpath[common] == '/')
+ goto set_delaying_seqnum;
+
+ /* child device event found */
+ if (loop_devpath[common] == '/')
+ goto set_delaying_seqnum;
+ }
+
+ return false;
+
+set_delaying_seqnum:
+ log_device_debug(event->dev, "SEQNUM=%" PRIu64 " blocked by SEQNUM=%" PRIu64,
+ event->seqnum, loop_event->seqnum);
+
+ event->delaying_seqnum = loop_event->seqnum;
+ return true;
+}
+
+static void manager_exit(Manager *manager) {
+ assert(manager);
+
+ manager->exit = true;
+
+ sd_notify(false,
+ "STOPPING=1\n"
+ "STATUS=Starting shutdown...");
+
+ /* close sources of new events and discard buffered events */
+ manager->ctrl = udev_ctrl_unref(manager->ctrl);
+
+ manager->inotify_event = sd_event_source_unref(manager->inotify_event);
+ manager->fd_inotify = safe_close(manager->fd_inotify);
+
+ manager->monitor = sd_device_monitor_unref(manager->monitor);
+
+ /* discard queued events and kill workers */
+ event_queue_cleanup(manager, EVENT_QUEUED);
+ manager_kill_workers(manager);
+}
+
+/* reload requested, HUP signal received, rules changed, builtin changed */
+static void manager_reload(Manager *manager) {
+
+ assert(manager);
+
+ sd_notify(false,
+ "RELOADING=1\n"
+ "STATUS=Flushing configuration...");
+
+ manager_kill_workers(manager);
+ manager->rules = udev_rules_free(manager->rules);
+ udev_builtin_exit();
+
+ sd_notifyf(false,
+ "READY=1\n"
+ "STATUS=Processing with %u children at max", arg_children_max);
+}
+
+static int on_kill_workers_event(sd_event_source *s, uint64_t usec, void *userdata) {
+ Manager *manager = userdata;
+
+ assert(manager);
+
+ log_debug("Cleanup idle workers");
+ manager_kill_workers(manager);
+
+ return 1;
+}
+
+static void event_queue_start(Manager *manager) {
+ struct event *event;
+ usec_t usec;
+ int r;
+
+ assert(manager);
+
+ if (LIST_IS_EMPTY(manager->events) ||
+ manager->exit || manager->stop_exec_queue)
+ return;
+
+ assert_se(sd_event_now(manager->event, CLOCK_MONOTONIC, &usec) >= 0);
+ /* check for changed config, every 3 seconds at most */
+ if (manager->last_usec == 0 ||
+ usec - manager->last_usec > 3 * USEC_PER_SEC) {
+ if (udev_rules_check_timestamp(manager->rules) ||
+ udev_builtin_validate())
+ manager_reload(manager);
+
+ manager->last_usec = usec;
+ }
+
+ r = event_source_disable(manager->kill_workers_event);
+ if (r < 0)
+ log_warning_errno(r, "Failed to disable event source for cleaning up idle workers, ignoring: %m");
+
+ udev_builtin_init();
+
+ if (!manager->rules) {
+ r = udev_rules_load(&manager->rules, arg_resolve_name_timing);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to read udev rules: %m");
+ return;
+ }
+ }
+
+ LIST_FOREACH(event, event, manager->events) {
+ if (event->state != EVENT_QUEUED)
+ continue;
+
+ /* do not start event if parent or child event is still running */
+ if (is_device_busy(manager, event) != 0)
+ continue;
+
+ event_run(manager, event);
+ }
+}
+
+static void event_queue_cleanup(Manager *manager, enum event_state match_type) {
+ struct event *event, *tmp;
+
+ LIST_FOREACH_SAFE(event, event, tmp, manager->events) {
+ if (match_type != EVENT_UNDEF && match_type != event->state)
+ continue;
+
+ event_free(event);
+ }
+}
+
+static int on_worker(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Manager *manager = userdata;
+
+ assert(manager);
+
+ for (;;) {
+ struct worker_message msg;
+ struct iovec iovec = {
+ .iov_base = &msg,
+ .iov_len = sizeof(msg),
+ };
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control;
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ ssize_t size;
+ struct ucred *ucred;
+ struct worker *worker;
+
+ size = recvmsg_safe(fd, &msghdr, MSG_DONTWAIT);
+ if (size == -EINTR)
+ continue;
+ if (size == -EAGAIN)
+ /* nothing more to read */
+ break;
+ if (size < 0)
+ return log_error_errno(size, "Failed to receive message: %m");
+
+ cmsg_close_all(&msghdr);
+
+ if (size != sizeof(struct worker_message)) {
+ log_warning("Ignoring worker message with invalid size %zi bytes", size);
+ continue;
+ }
+
+ ucred = CMSG_FIND_DATA(&msghdr, SOL_SOCKET, SCM_CREDENTIALS, struct ucred);
+ if (!ucred || ucred->pid <= 0) {
+ log_warning("Ignoring worker message without valid PID");
+ continue;
+ }
+
+ /* lookup worker who sent the signal */
+ worker = hashmap_get(manager->workers, PID_TO_PTR(ucred->pid));
+ if (!worker) {
+ log_debug("Worker ["PID_FMT"] returned, but is no longer tracked", ucred->pid);
+ continue;
+ }
+
+ if (worker->state != WORKER_KILLED)
+ worker->state = WORKER_IDLE;
+
+ /* worker returned */
+ event_free(worker->event);
+ }
+
+ /* we have free workers, try to schedule events */
+ event_queue_start(manager);
+
+ return 1;
+}
+
+static int on_uevent(sd_device_monitor *monitor, sd_device *dev, void *userdata) {
+ Manager *manager = userdata;
+ int r;
+
+ assert(manager);
+
+ device_ensure_usec_initialized(dev, NULL);
+
+ r = event_queue_insert(manager, dev);
+ if (r < 0) {
+ log_device_error_errno(dev, r, "Failed to insert device into event queue: %m");
+ return 1;
+ }
+
+ /* we have fresh events, try to schedule them */
+ event_queue_start(manager);
+
+ return 1;
+}
+
+/* receive the udevd message from userspace */
+static int on_ctrl_msg(struct udev_ctrl *uctrl, enum udev_ctrl_msg_type type, const union udev_ctrl_msg_value *value, void *userdata) {
+ Manager *manager = userdata;
+ int r;
+
+ assert(value);
+ assert(manager);
+
+ switch (type) {
+ case UDEV_CTRL_SET_LOG_LEVEL:
+ log_debug("Received udev control message (SET_LOG_LEVEL), setting log_level=%i", value->intval);
+ log_set_max_level_realm(LOG_REALM_UDEV, value->intval);
+ log_set_max_level_realm(LOG_REALM_SYSTEMD, value->intval);
+ manager_kill_workers(manager);
+ break;
+ case UDEV_CTRL_STOP_EXEC_QUEUE:
+ log_debug("Received udev control message (STOP_EXEC_QUEUE)");
+ manager->stop_exec_queue = true;
+ break;
+ case UDEV_CTRL_START_EXEC_QUEUE:
+ log_debug("Received udev control message (START_EXEC_QUEUE)");
+ manager->stop_exec_queue = false;
+ event_queue_start(manager);
+ break;
+ case UDEV_CTRL_RELOAD:
+ log_debug("Received udev control message (RELOAD)");
+ manager_reload(manager);
+ break;
+ case UDEV_CTRL_SET_ENV: {
+ _cleanup_free_ char *key = NULL, *val = NULL, *old_key = NULL, *old_val = NULL;
+ const char *eq;
+
+ eq = strchr(value->buf, '=');
+ if (!eq) {
+ log_error("Invalid key format '%s'", value->buf);
+ return 1;
+ }
+
+ key = strndup(value->buf, eq - value->buf);
+ if (!key) {
+ log_oom();
+ return 1;
+ }
+
+ old_val = hashmap_remove2(manager->properties, key, (void **) &old_key);
+
+ r = hashmap_ensure_allocated(&manager->properties, &string_hash_ops);
+ if (r < 0) {
+ log_oom();
+ return 1;
+ }
+
+ eq++;
+ if (isempty(eq)) {
+ log_debug("Received udev control message (ENV), unsetting '%s'", key);
+
+ r = hashmap_put(manager->properties, key, NULL);
+ if (r < 0) {
+ log_oom();
+ return 1;
+ }
+ } else {
+ val = strdup(eq);
+ if (!val) {
+ log_oom();
+ return 1;
+ }
+
+ log_debug("Received udev control message (ENV), setting '%s=%s'", key, val);
+
+ r = hashmap_put(manager->properties, key, val);
+ if (r < 0) {
+ log_oom();
+ return 1;
+ }
+ }
+
+ key = val = NULL;
+ manager_kill_workers(manager);
+ break;
+ }
+ case UDEV_CTRL_SET_CHILDREN_MAX:
+ if (value->intval <= 0) {
+ log_debug("Received invalid udev control message (SET_MAX_CHILDREN, %i), ignoring.", value->intval);
+ return 0;
+ }
+
+ log_debug("Received udev control message (SET_MAX_CHILDREN), setting children_max=%i", value->intval);
+ arg_children_max = value->intval;
+
+ (void) sd_notifyf(false,
+ "READY=1\n"
+ "STATUS=Processing with %u children at max", arg_children_max);
+ break;
+ case UDEV_CTRL_PING:
+ log_debug("Received udev control message (PING)");
+ break;
+ case UDEV_CTRL_EXIT:
+ log_debug("Received udev control message (EXIT)");
+ manager_exit(manager);
+ break;
+ default:
+ log_debug("Received unknown udev control message, ignoring");
+ }
+
+ return 1;
+}
+
+static int synthesize_change_one(sd_device *dev, const char *syspath) {
+ const char *filename;
+ int r;
+
+ filename = strjoina(syspath, "/uevent");
+ log_device_debug(dev, "device is closed, synthesising 'change' on %s", syspath);
+ r = write_string_file(filename, "change", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return log_device_debug_errno(dev, r, "Failed to write 'change' to %s: %m", filename);
+ return 0;
+}
+
+static int synthesize_change(sd_device *dev) {
+ const char *subsystem, *sysname, *devname, *syspath, *devtype;
+ int r;
+
+ r = sd_device_get_subsystem(dev, &subsystem);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_sysname(dev, &sysname);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_devname(dev, &devname);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_syspath(dev, &syspath);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_devtype(dev, &devtype);
+ if (r < 0)
+ return r;
+
+ if (streq_ptr("block", subsystem) &&
+ streq_ptr("disk", devtype) &&
+ !startswith(sysname, "dm-")) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ bool part_table_read = false, has_partitions = false;
+ sd_device *d;
+ int fd;
+
+ /*
+ * Try to re-read the partition table. This only succeeds if
+ * none of the devices is busy. The kernel returns 0 if no
+ * partition table is found, and we will not get an event for
+ * the disk.
+ */
+ fd = open(devname, O_RDONLY|O_CLOEXEC|O_NOFOLLOW|O_NONBLOCK);
+ if (fd >= 0) {
+ r = flock(fd, LOCK_EX|LOCK_NB);
+ if (r >= 0)
+ r = ioctl(fd, BLKRRPART, 0);
+
+ close(fd);
+ if (r >= 0)
+ part_table_read = true;
+ }
+
+ /* search for partitions */
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_parent(e, dev);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_subsystem(e, "block", true);
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, d) {
+ const char *t;
+
+ if (sd_device_get_devtype(d, &t) < 0 ||
+ !streq("partition", t))
+ continue;
+
+ has_partitions = true;
+ break;
+ }
+
+ /*
+ * We have partitions and re-read the table, the kernel already sent
+ * out a "change" event for the disk, and "remove/add" for all
+ * partitions.
+ */
+ if (part_table_read && has_partitions)
+ return 0;
+
+ /*
+ * We have partitions but re-reading the partition table did not
+ * work, synthesize "change" for the disk and all partitions.
+ */
+ (void) synthesize_change_one(dev, syspath);
+
+ FOREACH_DEVICE(e, d) {
+ const char *t, *n, *s;
+
+ if (sd_device_get_devtype(d, &t) < 0 ||
+ !streq("partition", t))
+ continue;
+
+ if (sd_device_get_devname(d, &n) < 0 ||
+ sd_device_get_syspath(d, &s) < 0)
+ continue;
+
+ (void) synthesize_change_one(dev, s);
+ }
+
+ } else
+ (void) synthesize_change_one(dev, syspath);
+
+ return 0;
+}
+
+static int on_inotify(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Manager *manager = userdata;
+ union inotify_event_buffer buffer;
+ struct inotify_event *e;
+ ssize_t l;
+ int r;
+
+ assert(manager);
+
+ r = event_source_disable(manager->kill_workers_event);
+ if (r < 0)
+ log_warning_errno(r, "Failed to disable event source for cleaning up idle workers, ignoring: %m");
+
+ l = read(fd, &buffer, sizeof(buffer));
+ if (l < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 1;
+
+ return log_error_errno(errno, "Failed to read inotify fd: %m");
+ }
+
+ FOREACH_INOTIFY_EVENT(e, buffer, l) {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ const char *devnode;
+
+ if (udev_watch_lookup(e->wd, &dev) <= 0)
+ continue;
+
+ if (sd_device_get_devname(dev, &devnode) < 0)
+ continue;
+
+ log_device_debug(dev, "Inotify event: %x for %s", e->mask, devnode);
+ if (e->mask & IN_CLOSE_WRITE)
+ synthesize_change(dev);
+ else if (e->mask & IN_IGNORED)
+ udev_watch_end(dev);
+ }
+
+ return 1;
+}
+
+static int on_sigterm(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ Manager *manager = userdata;
+
+ assert(manager);
+
+ manager_exit(manager);
+
+ return 1;
+}
+
+static int on_sighup(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ Manager *manager = userdata;
+
+ assert(manager);
+
+ manager_reload(manager);
+
+ return 1;
+}
+
+static int on_sigchld(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ Manager *manager = userdata;
+ int r;
+
+ assert(manager);
+
+ for (;;) {
+ pid_t pid;
+ int status;
+ struct worker *worker;
+
+ pid = waitpid(-1, &status, WNOHANG);
+ if (pid <= 0)
+ break;
+
+ worker = hashmap_get(manager->workers, PID_TO_PTR(pid));
+ if (!worker) {
+ log_warning("Worker ["PID_FMT"] is unknown, ignoring", pid);
+ continue;
+ }
+
+ if (WIFEXITED(status)) {
+ if (WEXITSTATUS(status) == 0)
+ log_debug("Worker ["PID_FMT"] exited", pid);
+ else
+ log_warning("Worker ["PID_FMT"] exited with return code %i", pid, WEXITSTATUS(status));
+ } else if (WIFSIGNALED(status))
+ log_warning("Worker ["PID_FMT"] terminated by signal %i (%s)", pid, WTERMSIG(status), signal_to_string(WTERMSIG(status)));
+ else if (WIFSTOPPED(status)) {
+ log_info("Worker ["PID_FMT"] stopped", pid);
+ continue;
+ } else if (WIFCONTINUED(status)) {
+ log_info("Worker ["PID_FMT"] continued", pid);
+ continue;
+ } else
+ log_warning("Worker ["PID_FMT"] exit with status 0x%04x", pid, status);
+
+ if ((!WIFEXITED(status) || WEXITSTATUS(status) != 0) && worker->event) {
+ log_device_error(worker->event->dev, "Worker ["PID_FMT"] failed", pid);
+
+ /* delete state from disk */
+ device_delete_db(worker->event->dev);
+ device_tag_index(worker->event->dev, NULL, false);
+
+ if (manager->monitor) {
+ /* forward kernel event without amending it */
+ r = device_monitor_send_device(manager->monitor, NULL, worker->event->dev_kernel);
+ if (r < 0)
+ log_device_error_errno(worker->event->dev_kernel, r, "Failed to send back device to kernel: %m");
+ }
+ }
+
+ worker_free(worker);
+ }
+
+ /* we can start new workers, try to schedule events */
+ event_queue_start(manager);
+
+ /* Disable unnecessary cleanup event */
+ if (hashmap_isempty(manager->workers)) {
+ r = event_source_disable(manager->kill_workers_event);
+ if (r < 0)
+ log_warning_errno(r, "Failed to disable event source for cleaning up idle workers, ignoring: %m");
+ }
+
+ return 1;
+}
+
+static int on_post(sd_event_source *s, void *userdata) {
+ Manager *manager = userdata;
+
+ assert(manager);
+
+ if (!LIST_IS_EMPTY(manager->events))
+ return 1;
+
+ /* There are no pending events. Let's cleanup idle process. */
+
+ if (!hashmap_isempty(manager->workers)) {
+ /* There are idle workers */
+ (void) event_reset_time(manager->event, &manager->kill_workers_event, CLOCK_MONOTONIC,
+ now(CLOCK_MONOTONIC) + 3 * USEC_PER_SEC, USEC_PER_SEC,
+ on_kill_workers_event, manager, 0, "kill-workers-event", false);
+ return 1;
+ }
+
+ /* There are no idle workers. */
+
+ if (manager->exit)
+ return sd_event_exit(manager->event, 0);
+
+ if (manager->cgroup)
+ /* cleanup possible left-over processes in our cgroup */
+ (void) cg_kill(SYSTEMD_CGROUP_CONTROLLER, manager->cgroup, SIGKILL, CGROUP_IGNORE_SELF, NULL, NULL, NULL);
+
+ return 1;
+}
+
+static int listen_fds(int *ret_ctrl, int *ret_netlink) {
+ int ctrl_fd = -1, netlink_fd = -1;
+ int fd, n;
+
+ assert(ret_ctrl);
+ assert(ret_netlink);
+
+ n = sd_listen_fds(true);
+ if (n < 0)
+ return n;
+
+ for (fd = SD_LISTEN_FDS_START; fd < n + SD_LISTEN_FDS_START; fd++) {
+ if (sd_is_socket(fd, AF_LOCAL, SOCK_SEQPACKET, -1) > 0) {
+ if (ctrl_fd >= 0)
+ return -EINVAL;
+ ctrl_fd = fd;
+ continue;
+ }
+
+ if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
+ if (netlink_fd >= 0)
+ return -EINVAL;
+ netlink_fd = fd;
+ continue;
+ }
+
+ return -EINVAL;
+ }
+
+ *ret_ctrl = ctrl_fd;
+ *ret_netlink = netlink_fd;
+
+ return 0;
+}
+
+/*
+ * read the kernel command line, in case we need to get into debug mode
+ * udev.log_level=<level> syslog priority
+ * udev.children_max=<number of workers> events are fully serialized if set to 1
+ * udev.exec_delay=<number of seconds> delay execution of every executed program
+ * udev.event_timeout=<number of seconds> seconds to wait before terminating an event
+ * udev.blockdev_read_only<=bool> mark all block devices read-only when they appear
+ */
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ int r;
+
+ assert(key);
+
+ if (proc_cmdline_key_streq(key, "udev.log_level") ||
+ proc_cmdline_key_streq(key, "udev.log_priority")) { /* kept for backward compatibility */
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = log_level_from_string(value);
+ if (r >= 0)
+ log_set_max_level(r);
+
+ } else if (proc_cmdline_key_streq(key, "udev.event_timeout")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = parse_sec(value, &arg_event_timeout_usec);
+
+ } else if (proc_cmdline_key_streq(key, "udev.children_max")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = safe_atou(value, &arg_children_max);
+
+ } else if (proc_cmdline_key_streq(key, "udev.exec_delay")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = parse_sec(value, &arg_exec_delay_usec);
+
+ } else if (proc_cmdline_key_streq(key, "udev.timeout_signal")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = signal_from_string(value);
+ if (r > 0)
+ arg_timeout_signal = r;
+
+ } else if (proc_cmdline_key_streq(key, "udev.blockdev_read_only")) {
+
+ if (!value)
+ arg_blockdev_read_only = true;
+ else {
+ r = parse_boolean(value);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse udev.blockdev-read-only argument, ignoring: %s", value);
+ else
+ arg_blockdev_read_only = r;
+ }
+
+ if (arg_blockdev_read_only)
+ log_notice("All physical block devices will be marked read-only.");
+
+ return 0;
+
+ } else {
+ if (startswith(key, "udev."))
+ log_warning("Unknown udev kernel command line option \"%s\", ignoring.", key);
+
+ return 0;
+ }
+
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse \"%s=%s\", ignoring: %m", key, value);
+
+ return 0;
+}
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-udevd.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "Rule-based manager for device events and files.\n\n"
+ " -h --help Print this message\n"
+ " -V --version Print version of the program\n"
+ " -d --daemon Detach and run in the background\n"
+ " -D --debug Enable debug output\n"
+ " -c --children-max=INT Set maximum number of workers\n"
+ " -e --exec-delay=SECONDS Seconds to wait before executing RUN=\n"
+ " -t --event-timeout=SECONDS Seconds to wait before terminating an event\n"
+ " -N --resolve-names=early|late|never\n"
+ " When to resolve users and groups\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+ enum {
+ ARG_TIMEOUT_SIGNAL,
+ };
+
+ static const struct option options[] = {
+ { "daemon", no_argument, NULL, 'd' },
+ { "debug", no_argument, NULL, 'D' },
+ { "children-max", required_argument, NULL, 'c' },
+ { "exec-delay", required_argument, NULL, 'e' },
+ { "event-timeout", required_argument, NULL, 't' },
+ { "resolve-names", required_argument, NULL, 'N' },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V' },
+ { "timeout-signal", required_argument, NULL, ARG_TIMEOUT_SIGNAL },
+ {}
+ };
+
+ int c, r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "c:de:Dt:N:hV", options, NULL)) >= 0) {
+ switch (c) {
+
+ case 'd':
+ arg_daemonize = true;
+ break;
+ case 'c':
+ r = safe_atou(optarg, &arg_children_max);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse --children-max= value '%s', ignoring: %m", optarg);
+ break;
+ case 'e':
+ r = parse_sec(optarg, &arg_exec_delay_usec);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse --exec-delay= value '%s', ignoring: %m", optarg);
+ break;
+ case ARG_TIMEOUT_SIGNAL:
+ r = signal_from_string(optarg);
+ if (r <= 0)
+ log_warning_errno(r, "Failed to parse --timeout-signal= value '%s', ignoring: %m", optarg);
+ else
+ arg_timeout_signal = r;
+
+ break;
+ case 't':
+ r = parse_sec(optarg, &arg_event_timeout_usec);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse --event-timeout= value '%s', ignoring: %m", optarg);
+ break;
+ case 'D':
+ arg_debug = true;
+ break;
+ case 'N': {
+ ResolveNameTiming t;
+
+ t = resolve_name_timing_from_string(optarg);
+ if (t < 0)
+ log_warning("Invalid --resolve-names= value '%s', ignoring.", optarg);
+ else
+ arg_resolve_name_timing = t;
+ break;
+ }
+ case 'h':
+ return help();
+ case 'V':
+ printf("%s\n", GIT_VERSION);
+ return 0;
+ case '?':
+ return -EINVAL;
+ default:
+ assert_not_reached("Unhandled option");
+
+ }
+ }
+
+ return 1;
+}
+
+static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent, const char *cgroup) {
+ _cleanup_(manager_freep) Manager *manager = NULL;
+ int r;
+
+ assert(ret);
+
+ manager = new(Manager, 1);
+ if (!manager)
+ return log_oom();
+
+ *manager = (Manager) {
+ .fd_inotify = -1,
+ .worker_watch = { -1, -1 },
+ .cgroup = cgroup,
+ };
+
+ r = udev_ctrl_new_from_fd(&manager->ctrl, fd_ctrl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize udev control socket: %m");
+
+ r = udev_ctrl_enable_receiving(manager->ctrl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to bind udev control socket: %m");
+
+ r = device_monitor_new_full(&manager->monitor, MONITOR_GROUP_KERNEL, fd_uevent);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize device monitor: %m");
+
+ /* Bump receiver buffer, but only if we are not called via socket activation, as in that
+ * case systemd sets the receive buffer size for us, and the value in the .socket unit
+ * should take full effect. */
+ if (fd_uevent < 0) {
+ r = sd_device_monitor_set_receive_buffer_size(manager->monitor, 128 * 1024 * 1024);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set receive buffer size for device monitor, ignoring: %m");
+ }
+
+ r = device_monitor_enable_receiving(manager->monitor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to bind netlink socket: %m");
+
+ *ret = TAKE_PTR(manager);
+
+ return 0;
+}
+
+static int main_loop(Manager *manager) {
+ int fd_worker, r;
+
+ manager->pid = getpid_cached();
+
+ /* unnamed socket from workers to the main daemon */
+ r = socketpair(AF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0, manager->worker_watch);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to create socketpair for communicating with workers: %m");
+
+ fd_worker = manager->worker_watch[READ_END];
+
+ r = setsockopt_int(fd_worker, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable SO_PASSCRED: %m");
+
+ r = udev_watch_init();
+ if (r < 0)
+ return log_error_errno(r, "Failed to create inotify descriptor: %m");
+ manager->fd_inotify = r;
+
+ udev_watch_restore();
+
+ /* block and listen to all signals on signalfd */
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, SIGHUP, SIGCHLD, -1) >= 0);
+
+ r = sd_event_default(&manager->event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+
+ r = sd_event_add_signal(manager->event, NULL, SIGINT, on_sigterm, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create SIGINT event source: %m");
+
+ r = sd_event_add_signal(manager->event, NULL, SIGTERM, on_sigterm, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create SIGTERM event source: %m");
+
+ r = sd_event_add_signal(manager->event, NULL, SIGHUP, on_sighup, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create SIGHUP event source: %m");
+
+ r = sd_event_add_signal(manager->event, NULL, SIGCHLD, on_sigchld, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create SIGCHLD event source: %m");
+
+ r = sd_event_set_watchdog(manager->event, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create watchdog event source: %m");
+
+ r = udev_ctrl_attach_event(manager->ctrl, manager->event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach event to udev control: %m");
+
+ r = udev_ctrl_start(manager->ctrl, on_ctrl_msg, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start device monitor: %m");
+
+ /* This needs to be after the inotify and uevent handling, to make sure
+ * that the ping is send back after fully processing the pending uevents
+ * (including the synthetic ones we may create due to inotify events).
+ */
+ r = sd_event_source_set_priority(udev_ctrl_get_event_source(manager->ctrl), SD_EVENT_PRIORITY_IDLE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set IDLE event priority for udev control event source: %m");
+
+ r = sd_event_add_io(manager->event, &manager->inotify_event, manager->fd_inotify, EPOLLIN, on_inotify, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create inotify event source: %m");
+
+ r = sd_device_monitor_attach_event(manager->monitor, manager->event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach event to device monitor: %m");
+
+ r = sd_device_monitor_start(manager->monitor, on_uevent, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start device monitor: %m");
+
+ (void) sd_event_source_set_description(sd_device_monitor_get_event_source(manager->monitor), "device-monitor");
+
+ r = sd_event_add_io(manager->event, NULL, fd_worker, EPOLLIN, on_worker, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create worker event source: %m");
+
+ r = sd_event_add_post(manager->event, NULL, on_post, manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create post event source: %m");
+
+ udev_builtin_init();
+
+ r = udev_rules_load(&manager->rules, arg_resolve_name_timing);
+ if (!manager->rules)
+ return log_error_errno(r, "Failed to read udev rules: %m");
+
+ r = udev_rules_apply_static_dev_perms(manager->rules);
+ if (r < 0)
+ log_error_errno(r, "Failed to apply permissions on static device nodes: %m");
+
+ (void) sd_notifyf(false,
+ "READY=1\n"
+ "STATUS=Processing with %u children at max", arg_children_max);
+
+ r = sd_event_loop(manager->event);
+ if (r < 0)
+ log_error_errno(r, "Event loop failed: %m");
+
+ sd_notify(false,
+ "STOPPING=1\n"
+ "STATUS=Shutting down...");
+ return r;
+}
+
+int run_udevd(int argc, char *argv[]) {
+ _cleanup_free_ char *cgroup = NULL;
+ _cleanup_(manager_freep) Manager *manager = NULL;
+ int fd_ctrl = -1, fd_uevent = -1;
+ int r;
+
+ log_set_target(LOG_TARGET_AUTO);
+ log_open();
+ udev_parse_config_full(&arg_children_max, &arg_exec_delay_usec, &arg_event_timeout_usec, &arg_resolve_name_timing, &arg_timeout_signal);
+ log_parse_environment();
+ log_open(); /* Done again to update after reading configuration. */
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_STRIP_RD_PREFIX);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
+
+ if (arg_debug) {
+ log_set_target(LOG_TARGET_CONSOLE);
+ log_set_max_level(LOG_DEBUG);
+ }
+
+ log_set_max_level_realm(LOG_REALM_SYSTEMD, log_get_max_level());
+
+ r = must_be_root();
+ if (r < 0)
+ return r;
+
+ if (arg_children_max == 0) {
+ unsigned long cpu_limit, mem_limit, cpu_count = 1;
+
+ r = cpus_in_affinity_mask();
+ if (r < 0)
+ log_warning_errno(r, "Failed to determine number of local CPUs, ignoring: %m");
+ else
+ cpu_count = r;
+
+ cpu_limit = cpu_count * 2 + 16;
+ mem_limit = MAX(physical_memory() / (128UL*1024*1024), 10U);
+
+ arg_children_max = MIN(cpu_limit, mem_limit);
+ arg_children_max = MIN(WORKER_NUM_MAX, arg_children_max);
+
+ log_debug("Set children_max to %u", arg_children_max);
+ }
+
+ /* set umask before creating any file/directory */
+ umask(022);
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return r;
+
+ r = mkdir_errno_wrapper("/run/udev", 0755);
+ if (r < 0 && r != -EEXIST)
+ return log_error_errno(r, "Failed to create /run/udev: %m");
+
+ if (getppid() == 1 && sd_booted() > 0) {
+ /* Get our own cgroup, we regularly kill everything udev has left behind.
+ * We only do this on systemd systems, and only if we are directly spawned
+ * by PID1. Otherwise we are not guaranteed to have a dedicated cgroup. */
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
+ if (r < 0) {
+ if (IN_SET(r, -ENOENT, -ENOMEDIUM))
+ log_debug_errno(r, "Dedicated cgroup not found: %m");
+ else
+ log_warning_errno(r, "Failed to get cgroup: %m");
+ }
+ }
+
+ r = listen_fds(&fd_ctrl, &fd_uevent);
+ if (r < 0)
+ return log_error_errno(r, "Failed to listen on fds: %m");
+
+ r = manager_new(&manager, fd_ctrl, fd_uevent, cgroup);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create manager: %m");
+
+ if (arg_daemonize) {
+ pid_t pid;
+
+ log_info("Starting version " GIT_VERSION);
+
+ /* connect /dev/null to stdin, stdout, stderr */
+ if (log_get_max_level() < LOG_DEBUG) {
+ r = make_null_stdio();
+ if (r < 0)
+ log_warning_errno(r, "Failed to redirect standard streams to /dev/null: %m");
+ }
+
+ pid = fork();
+ if (pid < 0)
+ return log_error_errno(errno, "Failed to fork daemon: %m");
+ if (pid > 0)
+ /* parent */
+ return 0;
+
+ /* child */
+ (void) setsid();
+ }
+
+ return main_loop(manager);
+}
diff --git a/src/udev/udevd.h b/src/udev/udevd.h
new file mode 100644
index 0000000..583e895
--- /dev/null
+++ b/src/udev/udevd.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#pragma once
+
+int run_udevd(int argc, char *argv[]);
diff --git a/src/udev/v4l_id/v4l_id.c b/src/udev/v4l_id/v4l_id.c
new file mode 100644
index 0000000..932446b
--- /dev/null
+++ b/src/udev/v4l_id/v4l_id.c
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2009 Filippo Argiolas <filippo.argiolas@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details:
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <linux/videodev2.h>
+
+#include "fd-util.h"
+#include "util.h"
+
+int main(int argc, char *argv[]) {
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ {}
+ };
+ _cleanup_close_ int fd = -1;
+ char *device;
+ struct v4l2_capability v2cap;
+ int c;
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+
+ switch (c) {
+ case 'h':
+ printf("%s [-h,--help] <device file>\n\n"
+ "Video4Linux device identification.\n\n"
+ " -h Print this message\n"
+ , program_invocation_short_name);
+ return 0;
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+
+ device = argv[optind];
+ if (!device)
+ return 2;
+
+ fd = open(device, O_RDONLY);
+ if (fd < 0)
+ return 3;
+
+ if (ioctl(fd, VIDIOC_QUERYCAP, &v2cap) == 0) {
+ int capabilities;
+ printf("ID_V4L_VERSION=2\n");
+ printf("ID_V4L_PRODUCT=%s\n", v2cap.card);
+ printf("ID_V4L_CAPABILITIES=:");
+ if (v2cap.capabilities & V4L2_CAP_DEVICE_CAPS)
+ capabilities = v2cap.device_caps;
+ else
+ capabilities = v2cap.capabilities;
+ if ((capabilities & V4L2_CAP_VIDEO_CAPTURE) > 0 ||
+ (capabilities & V4L2_CAP_VIDEO_CAPTURE_MPLANE) > 0)
+ printf("capture:");
+ if ((capabilities & V4L2_CAP_VIDEO_OUTPUT) > 0 ||
+ (capabilities & V4L2_CAP_VIDEO_OUTPUT_MPLANE) > 0)
+ printf("video_output:");
+ if ((capabilities & V4L2_CAP_VIDEO_OVERLAY) > 0)
+ printf("video_overlay:");
+ if ((capabilities & V4L2_CAP_AUDIO) > 0)
+ printf("audio:");
+ if ((capabilities & V4L2_CAP_TUNER) > 0)
+ printf("tuner:");
+ if ((capabilities & V4L2_CAP_RADIO) > 0)
+ printf("radio:");
+ printf("\n");
+ }
+
+ return 0;
+}
diff --git a/src/update-done/update-done.c b/src/update-done/update-done.c
new file mode 100644
index 0000000..2128925
--- /dev/null
+++ b/src/update-done/update-done.c
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fileio-label.h"
+#include "selinux-util.h"
+#include "time-util.h"
+
+#define MESSAGE \
+ "# This file was created by systemd-update-done. Its only \n" \
+ "# purpose is to hold a timestamp of the time this directory\n" \
+ "# was updated. See man:systemd-update-done.service(8).\n"
+
+static int apply_timestamp(const char *path, struct timespec *ts) {
+ _cleanup_free_ char *message = NULL;
+ int r;
+
+ /*
+ * We store the timestamp both as mtime of the file and in the file itself,
+ * to support filesystems which cannot store nanosecond-precision timestamps.
+ */
+
+ if (asprintf(&message,
+ MESSAGE
+ "TIMESTAMP_NSEC=" NSEC_FMT "\n",
+ timespec_load_nsec(ts)) < 0)
+ return log_oom();
+
+ r = write_string_file_atomic_label_ts(path, message, ts);
+ if (r == -EROFS)
+ log_debug_errno(r, "Cannot create \"%s\", file system is read-only.", path);
+ else if (r < 0)
+ return log_error_errno(r, "Failed to write \"%s\": %m", path);
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ struct stat st;
+ int r, q = 0;
+
+ log_setup_service();
+
+ if (stat("/usr", &st) < 0) {
+ log_error_errno(errno, "Failed to stat /usr: %m");
+ return EXIT_FAILURE;
+ }
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return EXIT_FAILURE;
+
+ r = apply_timestamp("/etc/.updated", &st.st_mtim);
+ q = apply_timestamp("/var/.updated", &st.st_mtim);
+
+ return r < 0 || q < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/src/update-utmp/update-utmp.c b/src/update-utmp/update-utmp.c
new file mode 100644
index 0000000..59c49f2
--- /dev/null
+++ b/src/update-utmp/update-utmp.c
@@ -0,0 +1,251 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#if HAVE_AUDIT
+#include <libaudit.h>
+#endif
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "format-util.h"
+#include "log.h"
+#include "macro.h"
+#include "main-func.h"
+#include "process-util.h"
+#include "special.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "unit-name.h"
+#include "util.h"
+#include "utmp-wtmp.h"
+
+typedef struct Context {
+ sd_bus *bus;
+#if HAVE_AUDIT
+ int audit_fd;
+#endif
+} Context;
+
+static void context_clear(Context *c) {
+ assert(c);
+
+ c->bus = sd_bus_flush_close_unref(c->bus);
+#if HAVE_AUDIT
+ if (c->audit_fd >= 0)
+ audit_close(c->audit_fd);
+ c->audit_fd = -1;
+#endif
+}
+
+static usec_t get_startup_time(Context *c) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ usec_t t = 0;
+ int r;
+
+ assert(c);
+
+ r = sd_bus_get_property_trivial(
+ c->bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "UserspaceTimestamp",
+ &error,
+ 't', &t);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get timestamp: %s", bus_error_message(&error, r));
+ return 0;
+ }
+
+ return t;
+}
+
+static int get_current_runlevel(Context *c) {
+ static const struct {
+ const int runlevel;
+ const char *special;
+ } table[] = {
+ /* The first target of this list that is active or has
+ * a job scheduled wins. We prefer runlevels 5 and 3
+ * here over the others, since these are the main
+ * runlevels used on Fedora. It might make sense to
+ * change the order on some distributions. */
+ { '5', SPECIAL_GRAPHICAL_TARGET },
+ { '3', SPECIAL_MULTI_USER_TARGET },
+ { '1', SPECIAL_RESCUE_TARGET },
+ };
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(c);
+
+ for (size_t i = 0; i < ELEMENTSOF(table); i++) {
+ _cleanup_free_ char *state = NULL, *path = NULL;
+
+ path = unit_dbus_path_from_name(table[i].special);
+ if (!path)
+ return log_oom();
+
+ r = sd_bus_get_property_string(
+ c->bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "ActiveState",
+ &error,
+ &state);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to get state: %s", bus_error_message(&error, r));
+
+ if (STR_IN_SET(state, "active", "reloading"))
+ return table[i].runlevel;
+ }
+
+ return 0;
+}
+
+static int on_reboot(Context *c) {
+ int r = 0, q;
+ usec_t t;
+
+ assert(c);
+
+ /* We finished start-up, so let's write the utmp
+ * record and send the audit msg */
+
+#if HAVE_AUDIT
+ if (c->audit_fd >= 0)
+ if (audit_log_user_comm_message(c->audit_fd, AUDIT_SYSTEM_BOOT, "", "systemd-update-utmp", NULL, NULL, NULL, 1) < 0 &&
+ errno != EPERM)
+ r = log_error_errno(errno, "Failed to send audit message: %m");
+#endif
+
+ /* If this call fails it will return 0, which
+ * utmp_put_reboot() will then fix to the current time */
+ t = get_startup_time(c);
+
+ q = utmp_put_reboot(t);
+ if (q < 0)
+ r = log_error_errno(q, "Failed to write utmp record: %m");
+
+ return r;
+}
+
+static int on_shutdown(Context *c) {
+ int r = 0, q;
+
+ assert(c);
+
+ /* We started shut-down, so let's write the utmp
+ * record and send the audit msg */
+
+#if HAVE_AUDIT
+ if (c->audit_fd >= 0)
+ if (audit_log_user_comm_message(c->audit_fd, AUDIT_SYSTEM_SHUTDOWN, "", "systemd-update-utmp", NULL, NULL, NULL, 1) < 0 &&
+ errno != EPERM)
+ r = log_error_errno(errno, "Failed to send audit message: %m");
+#endif
+
+ q = utmp_put_shutdown();
+ if (q < 0)
+ r = log_error_errno(q, "Failed to write utmp record: %m");
+
+ return r;
+}
+
+static int on_runlevel(Context *c) {
+ int r = 0, q, previous, runlevel;
+
+ assert(c);
+
+ /* We finished changing runlevel, so let's write the
+ * utmp record and send the audit msg */
+
+ /* First, get last runlevel */
+ q = utmp_get_runlevel(&previous, NULL);
+
+ if (q < 0) {
+ if (!IN_SET(q, -ESRCH, -ENOENT))
+ return log_error_errno(q, "Failed to get current runlevel: %m");
+
+ previous = 0;
+ }
+
+ /* Secondly, get new runlevel */
+ runlevel = get_current_runlevel(c);
+ if (runlevel < 0)
+ return runlevel;
+ if (runlevel == 0) {
+ log_warning("Failed to get new runlevel, utmp update skipped.");
+ return 0;
+ }
+
+ if (previous == runlevel)
+ return 0;
+
+#if HAVE_AUDIT
+ if (c->audit_fd >= 0) {
+ char s[STRLEN("old-level=_ new-level=_") + 1];
+
+ xsprintf(s, "old-level=%c new-level=%c",
+ previous > 0 ? previous : 'N',
+ runlevel);
+
+ if (audit_log_user_comm_message(c->audit_fd, AUDIT_SYSTEM_RUNLEVEL, s,
+ "systemd-update-utmp", NULL, NULL, NULL, 1) < 0 && errno != EPERM)
+ r = log_error_errno(errno, "Failed to send audit message: %m");
+ }
+#endif
+
+ q = utmp_put_runlevel(runlevel, previous);
+ if (q < 0 && !IN_SET(q, -ESRCH, -ENOENT))
+ return log_error_errno(q, "Failed to write utmp record: %m");
+
+ return r;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(context_clear) Context c = {
+#if HAVE_AUDIT
+ .audit_fd = -1
+#endif
+ };
+ int r;
+
+ if (argc != 2)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program requires one argument.");
+
+ log_setup_service();
+
+ umask(0022);
+
+#if HAVE_AUDIT
+ /* If the kernel lacks netlink or audit support, don't worry about it. */
+ c.audit_fd = audit_open();
+ if (c.audit_fd < 0)
+ log_full_errno(IN_SET(errno, EAFNOSUPPORT, EPROTONOSUPPORT) ? LOG_DEBUG : LOG_ERR,
+ errno, "Failed to connect to audit log: %m");
+#endif
+ r = bus_connect_system_systemd(&c.bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get D-Bus connection: %m");
+
+ if (streq(argv[1], "reboot"))
+ return on_reboot(&c);
+ if (streq(argv[1], "shutdown"))
+ return on_shutdown(&c);
+ if (streq(argv[1], "runlevel"))
+ return on_runlevel(&c);
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown command %s", argv[1]);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/user-sessions/user-sessions.c b/src/user-sessions/user-sessions.c
new file mode 100644
index 0000000..d1b8176
--- /dev/null
+++ b/src/user-sessions/user-sessions.c
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "fileio.h"
+#include "fileio-label.h"
+#include "fs-util.h"
+#include "main-func.h"
+#include "log.h"
+#include "selinux-util.h"
+#include "string-util.h"
+
+static int run(int argc, char *argv[]) {
+ int r, k;
+
+ if (argc != 2)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program requires one argument.");
+
+ log_setup_service();
+
+ umask(0022);
+
+ r = mac_selinux_init();
+ if (r < 0)
+ return r;
+
+ if (streq(argv[1], "start")) {
+ r = unlink_or_warn("/run/nologin");
+ k = unlink_or_warn("/etc/nologin");
+ if (r < 0)
+ return r;
+ return k;
+
+ } else if (streq(argv[1], "stop"))
+ return create_shutdown_run_nologin_or_warn();
+
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown verb '%s'.", argv[1]);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/userdb/meson.build b/src/userdb/meson.build
new file mode 100644
index 0000000..3a6225e
--- /dev/null
+++ b/src/userdb/meson.build
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+systemd_userwork_sources = files('''
+ userwork.c
+'''.split())
+
+systemd_userdbd_sources = files('''
+ userdbd-manager.c
+ userdbd-manager.h
+ userdbd.c
+'''.split())
+
+userdbctl_sources = files('''
+ userdbctl.c
+'''.split())
diff --git a/src/userdb/userdbctl.c b/src/userdb/userdbctl.c
new file mode 100644
index 0000000..a0e22df
--- /dev/null
+++ b/src/userdb/userdbctl.c
@@ -0,0 +1,789 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <utmp.h>
+
+#include "dirent-util.h"
+#include "errno-list.h"
+#include "fd-util.h"
+#include "format-table.h"
+#include "format-util.h"
+#include "main-func.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "socket-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "user-record-show.h"
+#include "user-util.h"
+#include "userdb.h"
+#include "verbs.h"
+
+static enum {
+ OUTPUT_CLASSIC,
+ OUTPUT_TABLE,
+ OUTPUT_FRIENDLY,
+ OUTPUT_JSON,
+ _OUTPUT_INVALID = -1
+} arg_output = _OUTPUT_INVALID;
+
+static PagerFlags arg_pager_flags = 0;
+static bool arg_legend = true;
+static char** arg_services = NULL;
+static UserDBFlags arg_userdb_flags = 0;
+
+STATIC_DESTRUCTOR_REGISTER(arg_services, strv_freep);
+
+static int show_user(UserRecord *ur, Table *table) {
+ int r;
+
+ assert(ur);
+
+ switch (arg_output) {
+
+ case OUTPUT_CLASSIC:
+ if (!uid_is_valid(ur->uid))
+ break;
+
+ printf("%s:x:" UID_FMT ":" GID_FMT ":%s:%s:%s\n",
+ ur->user_name,
+ ur->uid,
+ user_record_gid(ur),
+ strempty(user_record_real_name(ur)),
+ user_record_home_directory(ur),
+ user_record_shell(ur));
+
+ break;
+
+ case OUTPUT_JSON:
+ json_variant_dump(ur->json, JSON_FORMAT_COLOR_AUTO|JSON_FORMAT_PRETTY, NULL, 0);
+ break;
+
+ case OUTPUT_FRIENDLY:
+ user_record_show(ur, true);
+
+ if (ur->incomplete) {
+ fflush(stdout);
+ log_warning("Warning: lacking rights to acquire privileged fields of user record of '%s', output incomplete.", ur->user_name);
+ }
+
+ break;
+
+ case OUTPUT_TABLE:
+ assert(table);
+
+ r = table_add_many(
+ table,
+ TABLE_STRING, ur->user_name,
+ TABLE_STRING, user_disposition_to_string(user_record_disposition(ur)),
+ TABLE_UID, ur->uid,
+ TABLE_GID, user_record_gid(ur),
+ TABLE_STRING, empty_to_null(ur->real_name),
+ TABLE_STRING, user_record_home_directory(ur),
+ TABLE_STRING, user_record_shell(ur),
+ TABLE_INT, (int) user_record_disposition(ur));
+ if (r < 0)
+ return table_log_add_error(r);
+
+ break;
+
+ default:
+ assert_not_reached("Unexpected output mode");
+ }
+
+ return 0;
+}
+
+static int display_user(int argc, char *argv[], void *userdata) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ bool draw_separator = false;
+ int ret = 0, r;
+
+ if (arg_output < 0)
+ arg_output = argc > 1 ? OUTPUT_FRIENDLY : OUTPUT_TABLE;
+
+ if (arg_output == OUTPUT_TABLE) {
+ table = table_new("name", "disposition", "uid", "gid", "realname", "home", "shell", "disposition-numeric");
+ if (!table)
+ return log_oom();
+
+ (void) table_set_align_percent(table, table_get_cell(table, 0, 2), 100);
+ (void) table_set_align_percent(table, table_get_cell(table, 0, 3), 100);
+ (void) table_set_empty_string(table, "-");
+ (void) table_set_sort(table, (size_t) 7, (size_t) 2, (size_t) -1);
+ (void) table_set_display(table, (size_t) 0, (size_t) 1, (size_t) 2, (size_t) 3, (size_t) 4, (size_t) 5, (size_t) 6, (size_t) -1);
+ }
+
+ if (argc > 1) {
+ char **i;
+
+ STRV_FOREACH(i, argv + 1) {
+ _cleanup_(user_record_unrefp) UserRecord *ur = NULL;
+ uid_t uid;
+
+ if (parse_uid(*i, &uid) >= 0)
+ r = userdb_by_uid(uid, arg_userdb_flags, &ur);
+ else
+ r = userdb_by_name(*i, arg_userdb_flags, &ur);
+ if (r < 0) {
+ if (r == -ESRCH)
+ log_error_errno(r, "User %s does not exist.", *i);
+ else if (r == -EHOSTDOWN)
+ log_error_errno(r, "Selected user database service is not available for this request.");
+ else
+ log_error_errno(r, "Failed to find user %s: %m", *i);
+
+ if (ret >= 0)
+ ret = r;
+ } else {
+ if (draw_separator && arg_output == OUTPUT_FRIENDLY)
+ putchar('\n');
+
+ r = show_user(ur, table);
+ if (r < 0)
+ return r;
+
+ draw_separator = true;
+ }
+ }
+ } else {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+
+ r = userdb_all(arg_userdb_flags, &iterator);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate users: %m");
+
+ for (;;) {
+ _cleanup_(user_record_unrefp) UserRecord *ur = NULL;
+
+ r = userdb_iterator_get(iterator, &ur);
+ if (r == -ESRCH)
+ break;
+ if (r == -EHOSTDOWN)
+ return log_error_errno(r, "Selected user database service is not available for this request.");
+ if (r < 0)
+ return log_error_errno(r, "Failed acquire next user: %m");
+
+ if (draw_separator && arg_output == OUTPUT_FRIENDLY)
+ putchar('\n');
+
+ r = show_user(ur, table);
+ if (r < 0)
+ return r;
+
+ draw_separator = true;
+ }
+ }
+
+ if (table) {
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+ }
+
+ return ret;
+}
+
+static int show_group(GroupRecord *gr, Table *table) {
+ int r;
+
+ assert(gr);
+
+ switch (arg_output) {
+
+ case OUTPUT_CLASSIC: {
+ _cleanup_free_ char *m = NULL;
+
+ if (!gid_is_valid(gr->gid))
+ break;
+
+ m = strv_join(gr->members, ",");
+ if (!m)
+ return log_oom();
+
+ printf("%s:x:" GID_FMT ":%s\n",
+ gr->group_name,
+ gr->gid,
+ m);
+ break;
+ }
+
+ case OUTPUT_JSON:
+ json_variant_dump(gr->json, JSON_FORMAT_COLOR_AUTO|JSON_FORMAT_PRETTY, NULL, 0);
+ break;
+
+ case OUTPUT_FRIENDLY:
+ group_record_show(gr, true);
+
+ if (gr->incomplete) {
+ fflush(stdout);
+ log_warning("Warning: lacking rights to acquire privileged fields of group record of '%s', output incomplete.", gr->group_name);
+ }
+
+ break;
+
+ case OUTPUT_TABLE:
+ assert(table);
+
+ r = table_add_many(
+ table,
+ TABLE_STRING, gr->group_name,
+ TABLE_STRING, user_disposition_to_string(group_record_disposition(gr)),
+ TABLE_GID, gr->gid,
+ TABLE_STRING, gr->description,
+ TABLE_INT, (int) group_record_disposition(gr));
+ if (r < 0)
+ return table_log_add_error(r);
+
+ break;
+
+ default:
+ assert_not_reached("Unexpected display mode");
+ }
+
+ return 0;
+}
+
+
+static int display_group(int argc, char *argv[], void *userdata) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ bool draw_separator = false;
+ int ret = 0, r;
+
+ if (arg_output < 0)
+ arg_output = argc > 1 ? OUTPUT_FRIENDLY : OUTPUT_TABLE;
+
+ if (arg_output == OUTPUT_TABLE) {
+ table = table_new("name", "disposition", "gid", "description", "disposition-numeric");
+ if (!table)
+ return log_oom();
+
+ (void) table_set_align_percent(table, table_get_cell(table, 0, 2), 100);
+ (void) table_set_empty_string(table, "-");
+ (void) table_set_sort(table, (size_t) 3, (size_t) 2, (size_t) -1);
+ (void) table_set_display(table, (size_t) 0, (size_t) 1, (size_t) 2, (size_t) 3, (size_t) -1);
+ }
+
+ if (argc > 1) {
+ char **i;
+
+ STRV_FOREACH(i, argv + 1) {
+ _cleanup_(group_record_unrefp) GroupRecord *gr = NULL;
+ gid_t gid;
+
+ if (parse_gid(*i, &gid) >= 0)
+ r = groupdb_by_gid(gid, arg_userdb_flags, &gr);
+ else
+ r = groupdb_by_name(*i, arg_userdb_flags, &gr);
+ if (r < 0) {
+ if (r == -ESRCH)
+ log_error_errno(r, "Group %s does not exist.", *i);
+ else if (r == -EHOSTDOWN)
+ log_error_errno(r, "Selected group database service is not available for this request.");
+ else
+ log_error_errno(r, "Failed to find group %s: %m", *i);
+
+ if (ret >= 0)
+ ret = r;
+ } else {
+ if (draw_separator && arg_output == OUTPUT_FRIENDLY)
+ putchar('\n');
+
+ r = show_group(gr, table);
+ if (r < 0)
+ return r;
+
+ draw_separator = true;
+ }
+ }
+
+ } else {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+
+ r = groupdb_all(arg_userdb_flags, &iterator);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate groups: %m");
+
+ for (;;) {
+ _cleanup_(group_record_unrefp) GroupRecord *gr = NULL;
+
+ r = groupdb_iterator_get(iterator, &gr);
+ if (r == -ESRCH)
+ break;
+ if (r == -EHOSTDOWN)
+ return log_error_errno(r, "Selected group database service is not available for this request.");
+ if (r < 0)
+ return log_error_errno(r, "Failed acquire next group: %m");
+
+ if (draw_separator && arg_output == OUTPUT_FRIENDLY)
+ putchar('\n');
+
+ r = show_group(gr, table);
+ if (r < 0)
+ return r;
+
+ draw_separator = true;
+ }
+
+ }
+
+ if (table) {
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+ }
+
+ return ret;
+}
+
+static int show_membership(const char *user, const char *group, Table *table) {
+ int r;
+
+ assert(user);
+ assert(group);
+
+ switch (arg_output) {
+
+ case OUTPUT_CLASSIC:
+ /* Strictly speaking there's no 'classic' output for this concept, but let's output it in
+ * similar style to the classic output for user/group info */
+
+ printf("%s:%s\n", user, group);
+ break;
+
+ case OUTPUT_JSON: {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+
+ r = json_build(&v, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("user", JSON_BUILD_STRING(user)),
+ JSON_BUILD_PAIR("group", JSON_BUILD_STRING(group))));
+ if (r < 0)
+ return log_error_errno(r, "Failed to build JSON object: %m");
+
+ json_variant_dump(v, JSON_FORMAT_PRETTY|JSON_FORMAT_COLOR_AUTO, NULL, NULL);
+ break;
+ }
+
+ case OUTPUT_FRIENDLY:
+ /* Hmm, this is not particularly friendly, but not sure how we could do this better */
+ printf("%s: %s\n", group, user);
+ break;
+
+ case OUTPUT_TABLE:
+ assert(table);
+
+ r = table_add_many(
+ table,
+ TABLE_STRING, user,
+ TABLE_STRING, group);
+ if (r < 0)
+ return table_log_add_error(r);
+
+ break;
+
+ default:
+ assert_not_reached("Unexpected output mode");
+ }
+
+ return 0;
+}
+
+static int display_memberships(int argc, char *argv[], void *userdata) {
+ _cleanup_(table_unrefp) Table *table = NULL;
+ int ret = 0, r;
+
+ if (arg_output < 0)
+ arg_output = OUTPUT_TABLE;
+
+ if (arg_output == OUTPUT_TABLE) {
+ table = table_new("user", "group");
+ if (!table)
+ return log_oom();
+
+ (void) table_set_sort(table, (size_t) 0, (size_t) 1, (size_t) -1);
+ }
+
+ if (argc > 1) {
+ char **i;
+
+ STRV_FOREACH(i, argv + 1) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+
+ if (streq(argv[0], "users-in-group")) {
+ r = membershipdb_by_group(*i, arg_userdb_flags, &iterator);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate users in group: %m");
+ } else if (streq(argv[0], "groups-of-user")) {
+ r = membershipdb_by_user(*i, arg_userdb_flags, &iterator);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate groups of user: %m");
+ } else
+ assert_not_reached("Unexpected verb");
+
+ for (;;) {
+ _cleanup_free_ char *user = NULL, *group = NULL;
+
+ r = membershipdb_iterator_get(iterator, &user, &group);
+ if (r == -ESRCH)
+ break;
+ if (r == -EHOSTDOWN)
+ return log_error_errno(r, "Selected membership database service is not available for this request.");
+ if (r < 0)
+ return log_error_errno(r, "Failed acquire next membership: %m");
+
+ r = show_membership(user, group, table);
+ if (r < 0)
+ return r;
+ }
+ }
+ } else {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+
+ r = membershipdb_all(arg_userdb_flags, &iterator);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate memberships: %m");
+
+ for (;;) {
+ _cleanup_free_ char *user = NULL, *group = NULL;
+
+ r = membershipdb_iterator_get(iterator, &user, &group);
+ if (r == -ESRCH)
+ break;
+ if (r == -EHOSTDOWN)
+ return log_error_errno(r, "Selected membership database service is not available for this request.");
+ if (r < 0)
+ return log_error_errno(r, "Failed acquire next membership: %m");
+
+ r = show_membership(user, group, table);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (table) {
+ r = table_print(table, NULL);
+ if (r < 0)
+ return table_log_print_error(r);
+ }
+
+ return ret;
+}
+
+static int display_services(int argc, char *argv[], void *userdata) {
+ _cleanup_(table_unrefp) Table *t = NULL;
+ _cleanup_(closedirp) DIR *d = NULL;
+ struct dirent *de;
+ int r;
+
+ d = opendir("/run/systemd/userdb/");
+ if (!d) {
+ if (errno == ENOENT) {
+ log_info("No services.");
+ return 0;
+ }
+
+ return log_error_errno(errno, "Failed to open /run/systemd/userdb/: %m");
+ }
+
+ t = table_new("service", "listening");
+ if (!t)
+ return log_oom();
+
+ (void) table_set_sort(t, (size_t) 0, (size_t) -1);
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ _cleanup_free_ char *j = NULL, *no = NULL;
+ union sockaddr_union sockaddr;
+ socklen_t sockaddr_len;
+ _cleanup_close_ int fd = -1;
+
+ j = path_join("/run/systemd/userdb/", de->d_name);
+ if (!j)
+ return log_oom();
+
+ r = sockaddr_un_set_path(&sockaddr.un, j);
+ if (r < 0)
+ return log_error_errno(r, "Path %s does not fit in AF_UNIX socket address: %m", j);
+ sockaddr_len = r;
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return log_error_errno(r, "Failed to allocate AF_UNIX/SOCK_STREAM socket: %m");
+
+ if (connect(fd, &sockaddr.un, sockaddr_len) < 0) {
+ no = strjoin("No (", errno_to_name(errno), ")");
+ if (!no)
+ return log_oom();
+ }
+
+ r = table_add_many(t,
+ TABLE_STRING, de->d_name,
+ TABLE_STRING, no ?: "yes",
+ TABLE_SET_COLOR, no ? ansi_highlight_red() : ansi_highlight_green());
+ if (r < 0)
+ return table_log_add_error(r);
+ }
+
+ if (table_get_rows(t) <= 0) {
+ log_info("No services.");
+ return 0;
+ }
+
+ if (arg_output == OUTPUT_JSON)
+ table_print_json(t, NULL, JSON_FORMAT_PRETTY|JSON_FORMAT_COLOR_AUTO);
+ else
+ table_print(t, NULL);
+
+ return 0;
+}
+
+static int ssh_authorized_keys(int argc, char *argv[], void *userdata) {
+ _cleanup_(user_record_unrefp) UserRecord *ur = NULL;
+ int r;
+
+ r = userdb_by_name(argv[1], arg_userdb_flags, &ur);
+ if (r == -ESRCH)
+ return log_error_errno(r, "User %s does not exist.", argv[1]);
+ else if (r == -EHOSTDOWN)
+ return log_error_errno(r, "Selected user database service is not available for this request.");
+ else if (r == -EINVAL)
+ return log_error_errno(r, "Failed to find user %s: %m (Invalid user name?)", argv[1]);
+ else if (r < 0)
+ return log_error_errno(r, "Failed to find user %s: %m", argv[1]);
+
+ if (strv_isempty(ur->ssh_authorized_keys))
+ log_debug("User record for %s has no public SSH keys.", argv[1]);
+ else {
+ char **i;
+
+ STRV_FOREACH(i, ur->ssh_authorized_keys)
+ printf("%s\n", *i);
+ }
+
+ if (ur->incomplete) {
+ fflush(stdout);
+ log_warning("Warning: lacking rights to acquire privileged fields of user record of '%s', output incomplete.", ur->user_name);
+ }
+
+ return EXIT_SUCCESS;
+}
+
+static int help(int argc, char *argv[], void *userdata) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ (void) pager_open(arg_pager_flags);
+
+ r = terminal_urlify_man("userdbctl", "1", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...] COMMAND ...\n\n"
+ "%sShow user and group information.%s\n"
+ "\nCommands:\n"
+ " user [USER…] Inspect user\n"
+ " group [GROUP…] Inspect group\n"
+ " users-in-group [GROUP…] Show users that are members of specified group(s)\n"
+ " groups-of-user [USER…] Show groups the specified user(s) is a member of\n"
+ " services Show enabled database services\n"
+ "\nOptions:\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --no-pager Do not pipe output into a pager\n"
+ " --no-legend Do not show the headers and footers\n"
+ " --output=MODE Select output mode (classic, friendly, table, json)\n"
+ " -j Equivalent to --output=json\n"
+ " -s --service=SERVICE[:SERVICE…]\n"
+ " Query the specified service\n"
+ " --with-nss=BOOL Control whether to include glibc NSS data\n"
+ " -N Do not synthesize or include glibc NSS data\n"
+ " (Same as --synthesize=no --with-nss=no)\n"
+ " --synthesize=BOOL Synthesize root/nobody user\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , ansi_highlight(), ansi_normal()
+ , link
+ );
+
+ return 0;
+}
+
+static int parse_argv(int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_NO_PAGER,
+ ARG_NO_LEGEND,
+ ARG_OUTPUT,
+ ARG_WITH_NSS,
+ ARG_SYNTHESIZE,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "no-pager", no_argument, NULL, ARG_NO_PAGER },
+ { "no-legend", no_argument, NULL, ARG_NO_LEGEND },
+ { "output", required_argument, NULL, ARG_OUTPUT },
+ { "service", required_argument, NULL, 's' },
+ { "with-nss", required_argument, NULL, ARG_WITH_NSS },
+ { "synthesize", required_argument, NULL, ARG_SYNTHESIZE },
+ {}
+ };
+
+ const char *e;
+ int r;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ /* We are going to update this environment variable with our own, hence let's first read what is already set */
+ e = getenv("SYSTEMD_ONLY_USERDB");
+ if (e) {
+ char **l;
+
+ l = strv_split(e, ":");
+ if (!l)
+ return log_oom();
+
+ strv_free(arg_services);
+ arg_services = l;
+ }
+
+ for (;;) {
+ int c;
+
+ c = getopt_long(argc, argv, "hjs:N", options, NULL);
+ if (c < 0)
+ break;
+
+ switch (c) {
+
+ case 'h':
+ return help(0, NULL, NULL);
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_NO_PAGER:
+ arg_pager_flags |= PAGER_DISABLE;
+ break;
+
+ case ARG_NO_LEGEND:
+ arg_legend = false;
+ break;
+
+ case ARG_OUTPUT:
+ if (streq(optarg, "classic"))
+ arg_output = OUTPUT_CLASSIC;
+ else if (streq(optarg, "friendly"))
+ arg_output = OUTPUT_FRIENDLY;
+ else if (streq(optarg, "json"))
+ arg_output = OUTPUT_JSON;
+ else if (streq(optarg, "table"))
+ arg_output = OUTPUT_TABLE;
+ else if (streq(optarg, "help")) {
+ puts("classic\n"
+ "friendly\n"
+ "json\n"
+ "table");
+ return 0;
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid --output= mode: %s", optarg);
+
+ break;
+
+ case 'j':
+ arg_output = OUTPUT_JSON;
+ break;
+
+ case 's':
+ if (isempty(optarg))
+ arg_services = strv_free(arg_services);
+ else {
+ _cleanup_strv_free_ char **l = NULL;
+
+ l = strv_split(optarg, ":");
+ if (!l)
+ return log_oom();
+
+ r = strv_extend_strv(&arg_services, l, true);
+ if (r < 0)
+ return log_oom();
+ }
+
+ break;
+
+ case 'N':
+ arg_userdb_flags |= USERDB_AVOID_NSS|USERDB_DONT_SYNTHESIZE;
+ break;
+
+ case ARG_WITH_NSS:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --with-nss= parameter: %s", optarg);
+
+ SET_FLAG(arg_userdb_flags, USERDB_AVOID_NSS, !r);
+ break;
+
+ case ARG_SYNTHESIZE:
+ r = parse_boolean(optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --synthesize= parameter: %s", optarg);
+
+ SET_FLAG(arg_userdb_flags, USERDB_DONT_SYNTHESIZE, !r);
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unhandled option");
+ }
+ }
+
+ return 1;
+}
+
+static int run(int argc, char *argv[]) {
+ static const Verb verbs[] = {
+ { "help", VERB_ANY, VERB_ANY, 0, help },
+ { "user", VERB_ANY, VERB_ANY, VERB_DEFAULT, display_user },
+ { "group", VERB_ANY, VERB_ANY, 0, display_group },
+ { "users-in-group", VERB_ANY, VERB_ANY, 0, display_memberships },
+ { "groups-of-user", VERB_ANY, VERB_ANY, 0, display_memberships },
+ { "services", VERB_ANY, 1, 0, display_services },
+
+ /* This one is a helper for sshd_config's AuthorizedKeysCommand= setting, it's not a
+ * user-facing verb and thus should not appear in man pages or --help texts. */
+ { "ssh-authorized-keys", 2, 2, 0, ssh_authorized_keys },
+ {}
+ };
+
+ int r;
+
+ log_setup_cli();
+
+ r = parse_argv(argc, argv);
+ if (r <= 0)
+ return r;
+
+ if (arg_services) {
+ _cleanup_free_ char *e = NULL;
+
+ e = strv_join(arg_services, ":");
+ if (!e)
+ return log_oom();
+
+ if (setenv("SYSTEMD_ONLY_USERDB", e, true) < 0)
+ return log_error_errno(r, "Failed to set $SYSTEMD_ONLY_USERDB: %m");
+
+ log_info("Enabled services: %s", e);
+ } else
+ assert_se(unsetenv("SYSTEMD_ONLY_USERDB") == 0);
+
+ return dispatch_verb(argc, argv, verbs, NULL);
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/userdb/userdbd-manager.c b/src/userdb/userdbd-manager.c
new file mode 100644
index 0000000..f8d315c
--- /dev/null
+++ b/src/userdb/userdbd-manager.c
@@ -0,0 +1,301 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/wait.h>
+
+#include "sd-daemon.h"
+
+#include "fd-util.h"
+#include "fs-util.h"
+#include "mkdir.h"
+#include "process-util.h"
+#include "set.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "umask-util.h"
+#include "userdbd-manager.h"
+
+#define LISTEN_TIMEOUT_USEC (25 * USEC_PER_SEC)
+
+static int start_workers(Manager *m, bool explicit_request);
+
+static int on_sigchld(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ Manager *m = userdata;
+
+ assert(s);
+ assert(m);
+
+ for (;;) {
+ siginfo_t siginfo = {};
+ bool removed = false;
+
+ if (waitid(P_ALL, 0, &siginfo, WNOHANG|WEXITED) < 0) {
+ if (errno == ECHILD)
+ break;
+
+ log_warning_errno(errno, "Failed to invoke waitid(): %m");
+ break;
+ }
+ if (siginfo.si_pid == 0)
+ break;
+
+ if (set_remove(m->workers_dynamic, PID_TO_PTR(siginfo.si_pid)))
+ removed = true;
+ if (set_remove(m->workers_fixed, PID_TO_PTR(siginfo.si_pid)))
+ removed = true;
+
+ if (!removed) {
+ log_warning("Weird, got SIGCHLD for unknown child " PID_FMT ", ignoring.", siginfo.si_pid);
+ continue;
+ }
+
+ if (siginfo.si_code == CLD_EXITED) {
+ if (siginfo.si_status == EXIT_SUCCESS)
+ log_debug("Worker " PID_FMT " exited successfully.", siginfo.si_pid);
+ else
+ log_warning("Worker " PID_FMT " died with a failure exit status %i, ignoring.", siginfo.si_pid, siginfo.si_status);
+ } else if (siginfo.si_code == CLD_KILLED)
+ log_warning("Worker " PID_FMT " was killed by signal %s, ignoring.", siginfo.si_pid, signal_to_string(siginfo.si_status));
+ else if (siginfo.si_code == CLD_DUMPED)
+ log_warning("Worker " PID_FMT " dumped core by signal %s, ignoring.", siginfo.si_pid, signal_to_string(siginfo.si_status));
+ else
+ log_warning("Can't handle SIGCHLD of this type");
+ }
+
+ (void) start_workers(m, false); /* Fill up workers again if we fell below the low watermark */
+ return 0;
+}
+
+static int on_sigusr2(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ Manager *m = userdata;
+
+ assert(s);
+ assert(m);
+
+ (void) start_workers(m, true); /* Workers told us there's more work, let's add one more worker as long as we are below the high watermark */
+ return 0;
+}
+
+int manager_new(Manager **ret) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ int r;
+
+ m = new(Manager, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (Manager) {
+ .listen_fd = -1,
+ .worker_ratelimit = {
+ .interval = 5 * USEC_PER_SEC,
+ .burst = 50,
+ },
+ };
+
+ r = sd_event_new(&m->event);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_set_watchdog(m->event, true);
+
+ m->workers_fixed = set_new(NULL);
+ m->workers_dynamic = set_new(NULL);
+
+ if (!m->workers_fixed || !m->workers_dynamic)
+ return -ENOMEM;
+
+ r = sd_event_add_signal(m->event, &m->sigusr2_event_source, SIGUSR2, on_sigusr2, m);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(m->event, &m->sigchld_event_source, SIGCHLD, on_sigchld, m);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(m);
+ return 0;
+}
+
+Manager* manager_free(Manager *m) {
+ if (!m)
+ return NULL;
+
+ set_free(m->workers_fixed);
+ set_free(m->workers_dynamic);
+
+ sd_event_source_disable_unref(m->sigusr2_event_source);
+ sd_event_source_disable_unref(m->sigchld_event_source);
+
+ sd_event_unref(m->event);
+
+ return mfree(m);
+}
+
+static size_t manager_current_workers(Manager *m) {
+ assert(m);
+
+ return set_size(m->workers_fixed) + set_size(m->workers_dynamic);
+}
+
+static int start_one_worker(Manager *m) {
+ bool fixed;
+ pid_t pid;
+ int r;
+
+ assert(m);
+
+ fixed = set_size(m->workers_fixed) < USERDB_WORKERS_MIN;
+
+ r = safe_fork("(sd-worker)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to fork new worker child: %m");
+ if (r == 0) {
+ char pids[DECIMAL_STR_MAX(pid_t)];
+ /* Child */
+
+ log_close();
+
+ r = close_all_fds(&m->listen_fd, 1);
+ if (r < 0) {
+ log_error_errno(r, "Failed to close fds in child: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ log_open();
+
+ if (m->listen_fd == 3) {
+ r = fd_cloexec(3, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to turn off O_CLOEXEC for fd 3: %m");
+ _exit(EXIT_FAILURE);
+ }
+ } else {
+ if (dup2(m->listen_fd, 3) < 0) { /* dup2() creates with O_CLOEXEC off */
+ log_error_errno(errno, "Failed to move listen fd to 3: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ safe_close(m->listen_fd);
+ }
+
+ xsprintf(pids, PID_FMT, pid);
+ if (setenv("LISTEN_PID", pids, 1) < 0) {
+ log_error_errno(errno, "Failed to set $LISTEN_PID: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (setenv("LISTEN_FDS", "1", 1) < 0) {
+ log_error_errno(errno, "Failed to set $LISTEN_FDS: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+
+ if (setenv("USERDB_FIXED_WORKER", one_zero(fixed), 1) < 0) {
+ log_error_errno(errno, "Failed to set $USERDB_FIXED_WORKER: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ /* execl("/home/lennart/projects/systemd/build/systemd-userwork", "systemd-userwork", "xxxxxxxxxxxxxxxx", NULL); /\* With some extra space rename_process() can make use of *\/ */
+ /* execl("/usr/bin/valgrind", "valgrind", "/home/lennart/projects/systemd/build/systemd-userwork", "systemd-userwork", "xxxxxxxxxxxxxxxx", NULL); /\* With some extra space rename_process() can make use of *\/ */
+
+ execl(SYSTEMD_USERWORK_PATH, "systemd-userwork", "xxxxxxxxxxxxxxxx", NULL); /* With some extra space rename_process() can make use of */
+ log_error_errno(errno, "Failed start worker process: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (fixed)
+ r = set_put(m->workers_fixed, PID_TO_PTR(pid));
+ else
+ r = set_put(m->workers_dynamic, PID_TO_PTR(pid));
+ if (r < 0)
+ return log_error_errno(r, "Failed to add child process to set: %m");
+
+ return 0;
+}
+
+static int start_workers(Manager *m, bool explicit_request) {
+ int r;
+
+ assert(m);
+
+ for (;;) {
+ size_t n;
+
+ n = manager_current_workers(m);
+ if (n >= USERDB_WORKERS_MIN && (!explicit_request || n >= USERDB_WORKERS_MAX))
+ break;
+
+ if (!ratelimit_below(&m->worker_ratelimit)) {
+ /* If we keep starting workers too often, let's fail the whole daemon, something is wrong */
+ sd_event_exit(m->event, EXIT_FAILURE);
+
+ return log_error_errno(SYNTHETIC_ERRNO(EUCLEAN), "Worker threads requested too frequently, something is wrong.");
+ }
+
+ r = start_one_worker(m);
+ if (r < 0)
+ return r;
+
+ explicit_request = false;
+ }
+
+ return 0;
+}
+
+int manager_startup(Manager *m) {
+ struct timeval ts;
+ int n, r;
+
+ assert(m);
+ assert(m->listen_fd < 0);
+
+ n = sd_listen_fds(false);
+ if (n < 0)
+ return log_error_errno(n, "Failed to determine number of passed file descriptors: %m");
+ if (n > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Expected one listening fd, got %i.", n);
+ if (n == 1)
+ m->listen_fd = SD_LISTEN_FDS_START;
+ else {
+ union sockaddr_union sockaddr = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/userdb/io.systemd.NameServiceSwitch",
+ };
+
+ r = mkdir_p("/run/systemd/userdb", 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create /run/systemd/userdb: %m");
+
+ m->listen_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
+ if (m->listen_fd < 0)
+ return log_error_errno(errno, "Failed to bind on socket: %m");
+
+ (void) sockaddr_un_unlink(&sockaddr.un);
+
+ RUN_WITH_UMASK(0000)
+ if (bind(m->listen_fd, &sockaddr.sa, SOCKADDR_UN_LEN(sockaddr.un)) < 0)
+ return log_error_errno(errno, "Failed to bind socket: %m");
+
+ r = symlink_idempotent("io.systemd.NameServiceSwitch", "/run/systemd/userdb/io.systemd.Multiplexer", false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to bind io.systemd.Multiplexer: %m");
+
+ if (listen(m->listen_fd, SOMAXCONN) < 0)
+ return log_error_errno(errno, "Failed to listen on socket: %m");
+ }
+
+ /* Let's make sure every accept() call on this socket times out after 25s. This allows workers to be
+ * GC'ed on idle */
+ if (setsockopt(m->listen_fd, SOL_SOCKET, SO_RCVTIMEO, timeval_store(&ts, LISTEN_TIMEOUT_USEC), sizeof(ts)) < 0)
+ return log_error_errno(errno, "Failed to se SO_RCVTIMEO: %m");
+
+ return start_workers(m, false);
+}
diff --git a/src/userdb/userdbd-manager.h b/src/userdb/userdbd-manager.h
new file mode 100644
index 0000000..b81615a
--- /dev/null
+++ b/src/userdb/userdbd-manager.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+#include "sd-event.h"
+
+typedef struct Manager Manager;
+
+#include "hashmap.h"
+#include "varlink.h"
+#include "ratelimit.h"
+
+#define USERDB_WORKERS_MIN 3
+#define USERDB_WORKERS_MAX 4096
+
+struct Manager {
+ sd_event *event;
+
+ Set *workers_fixed; /* Workers 0…USERDB_WORKERS_MIN */
+ Set *workers_dynamic; /* Workers USERD_WORKERS_MIN+1…USERDB_WORKERS_MAX */
+
+ sd_event_source *sigusr2_event_source;
+ sd_event_source *sigchld_event_source;
+
+ int listen_fd;
+
+ RateLimit worker_ratelimit;
+};
+
+int manager_new(Manager **ret);
+Manager* manager_free(Manager *m);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
+
+int manager_startup(Manager *m);
diff --git a/src/userdb/userdbd.c b/src/userdb/userdbd.c
new file mode 100644
index 0000000..6f2c807
--- /dev/null
+++ b/src/userdb/userdbd.c
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "daemon-util.h"
+#include "userdbd-manager.h"
+#include "log.h"
+#include "main-func.h"
+#include "signal-util.h"
+
+/* This service offers two Varlink services, both implementing io.systemd.UserDatabase:
+ *
+ * → io.systemd.NameServiceSwitch: this is a compatibility interface for glibc NSS: it responds to
+ * name lookups by checking the classic NSS interfaces and responding that.
+ *
+ * → io.systemd.Multiplexer: this multiplexes lookup requests to all Varlink services that have a
+ * socket in /run/systemd/userdb/. It's supposed to simplify clients that don't want to implement
+ * the full iterative logic on their own.
+ */
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(manager_freep) Manager *m = NULL;
+ _cleanup_(notify_on_cleanup) const char *notify_stop = NULL;
+ int r;
+
+ log_setup_service();
+
+ umask(0022);
+
+ if (argc != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program takes no arguments.");
+
+ if (setenv("SYSTEMD_BYPASS_USERDB", "io.systemd.NameServiceSwitch:io.systemd.Multiplexer", 1) < 0)
+ return log_error_errno(errno, "Failed to se $SYSTEMD_BYPASS_USERDB: %m");
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, SIGTERM, SIGINT, SIGUSR2, -1) >= 0);
+
+ r = manager_new(&m);
+ if (r < 0)
+ return log_error_errno(r, "Could not create manager: %m");
+
+ r = manager_startup(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start up daemon: %m");
+
+ notify_stop = notify_start(NOTIFY_READY, NOTIFY_STOPPING);
+
+ r = sd_event_loop(m->event);
+ if (r < 0)
+ return log_error_errno(r, "Event loop failed: %m");
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/userdb/userwork.c b/src/userdb/userwork.c
new file mode 100644
index 0000000..d525a6e
--- /dev/null
+++ b/src/userdb/userwork.c
@@ -0,0 +1,775 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <poll.h>
+#include <sys/wait.h>
+
+#include "sd-daemon.h"
+
+#include "env-util.h"
+#include "fd-util.h"
+#include "group-record.h"
+#include "io-util.h"
+#include "main-func.h"
+#include "process-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "user-record-nss.h"
+#include "user-record.h"
+#include "user-util.h"
+#include "userdb.h"
+#include "varlink.h"
+
+#define ITERATIONS_MAX 64U
+#define RUNTIME_MAX_USEC (5 * USEC_PER_MINUTE)
+#define PRESSURE_SLEEP_TIME_USEC (50 * USEC_PER_MSEC)
+#define CONNECTION_IDLE_USEC (15 * USEC_PER_SEC)
+#define LISTEN_IDLE_USEC (90 * USEC_PER_SEC)
+
+typedef struct LookupParameters {
+ const char *user_name;
+ const char *group_name;
+ union {
+ uid_t uid;
+ gid_t gid;
+ };
+ const char *service;
+} LookupParameters;
+
+static int add_nss_service(JsonVariant **v) {
+ _cleanup_(json_variant_unrefp) JsonVariant *status = NULL, *z = NULL;
+ char buf[SD_ID128_STRING_MAX];
+ sd_id128_t mid;
+ int r;
+
+ assert(v);
+
+ /* Patch in service field if it's missing. The assumption here is that this field is unset only for
+ * NSS records */
+
+ if (json_variant_by_key(*v, "service"))
+ return 0;
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return r;
+
+ status = json_variant_ref(json_variant_by_key(*v, "status"));
+ z = json_variant_ref(json_variant_by_key(status, sd_id128_to_string(mid, buf)));
+
+ if (json_variant_by_key(z, "service"))
+ return 0;
+
+ r = json_variant_set_field_string(&z, "service", "io.systemd.NameServiceSwitch");
+ if (r < 0)
+ return r;
+
+ r = json_variant_set_field(&status, buf, z);
+ if (r < 0)
+ return r;
+
+ return json_variant_set_field(v, "status", status);
+}
+
+static int build_user_json(Varlink *link, UserRecord *ur, JsonVariant **ret) {
+ _cleanup_(user_record_unrefp) UserRecord *stripped = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ UserRecordLoadFlags flags;
+ uid_t peer_uid;
+ bool trusted;
+ int r;
+
+ assert(ur);
+ assert(ret);
+
+ r = varlink_get_peer_uid(link, &peer_uid);
+ if (r < 0) {
+ log_debug_errno(r, "Unable to query peer UID, ignoring: %m");
+ trusted = false;
+ } else
+ trusted = peer_uid == 0 || peer_uid == ur->uid;
+
+ flags = USER_RECORD_REQUIRE_REGULAR|USER_RECORD_ALLOW_PER_MACHINE|USER_RECORD_ALLOW_BINDING|USER_RECORD_STRIP_SECRET|USER_RECORD_ALLOW_STATUS|USER_RECORD_ALLOW_SIGNATURE;
+ if (trusted)
+ flags |= USER_RECORD_ALLOW_PRIVILEGED;
+ else
+ flags |= USER_RECORD_STRIP_PRIVILEGED;
+
+ r = user_record_clone(ur, flags, &stripped);
+ if (r < 0)
+ return r;
+
+ stripped->incomplete =
+ ur->incomplete ||
+ (FLAGS_SET(ur->mask, USER_RECORD_PRIVILEGED) &&
+ !FLAGS_SET(stripped->mask, USER_RECORD_PRIVILEGED));
+
+ v = json_variant_ref(stripped->json);
+ r = add_nss_service(&v);
+ if (r < 0)
+ return r;
+
+ return json_build(ret, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("record", JSON_BUILD_VARIANT(v)),
+ JSON_BUILD_PAIR("incomplete", JSON_BUILD_BOOLEAN(stripped->incomplete))));
+}
+
+static int vl_method_get_user_record(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+
+ static const JsonDispatch dispatch_table[] = {
+ { "uid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(LookupParameters, uid), 0 },
+ { "userName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, user_name), 0 },
+ { "service", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, service), 0 },
+ {}
+ };
+
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ LookupParameters p = {
+ .uid = UID_INVALID,
+ };
+ int r;
+
+ assert(parameters);
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &p);
+ if (r < 0)
+ return r;
+
+ if (streq_ptr(p.service, "io.systemd.NameServiceSwitch")) {
+ if (uid_is_valid(p.uid))
+ r = nss_user_record_by_uid(p.uid, true, &hr);
+ else if (p.user_name)
+ r = nss_user_record_by_name(p.user_name, true, &hr);
+ else {
+ _cleanup_(json_variant_unrefp) JsonVariant *last = NULL;
+
+ setpwent();
+
+ for (;;) {
+ _cleanup_(user_record_unrefp) UserRecord *z = NULL;
+ _cleanup_free_ char *sbuf = NULL;
+ struct passwd *pw;
+ struct spwd spwd;
+
+ errno = 0;
+ pw = getpwent();
+ if (!pw) {
+ if (errno != 0)
+ log_debug_errno(errno, "Failure while iterating through NSS user database, ignoring: %m");
+
+ break;
+ }
+
+ r = nss_spwd_for_passwd(pw, &spwd, &sbuf);
+ if (r < 0)
+ log_debug_errno(r, "Failed to acquire shadow entry for user %s, ignoring: %m", pw->pw_name);
+
+ r = nss_passwd_to_user_record(pw, NULL, &z);
+ if (r < 0) {
+ endpwent();
+ return r;
+ }
+
+ if (last) {
+ r = varlink_notify(link, last);
+ if (r < 0) {
+ endpwent();
+ return r;
+ }
+
+ last = json_variant_unref(last);
+ }
+
+ r = build_user_json(link, z, &last);
+ if (r < 0) {
+ endpwent();
+ return r;
+ }
+ }
+
+ endpwent();
+
+ if (!last)
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+
+ return varlink_reply(link, last);
+ }
+
+ } else if (streq_ptr(p.service, "io.systemd.Multiplexer")) {
+
+ if (uid_is_valid(p.uid))
+ r = userdb_by_uid(p.uid, USERDB_AVOID_MULTIPLEXER, &hr);
+ else if (p.user_name)
+ r = userdb_by_name(p.user_name, USERDB_AVOID_MULTIPLEXER, &hr);
+ else {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *last = NULL;
+
+ r = userdb_all(USERDB_AVOID_MULTIPLEXER, &iterator);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_(user_record_unrefp) UserRecord *z = NULL;
+
+ r = userdb_iterator_get(iterator, &z);
+ if (r == -ESRCH)
+ break;
+ if (r < 0)
+ return r;
+
+ if (last) {
+ r = varlink_notify(link, last);
+ if (r < 0)
+ return r;
+
+ last = json_variant_unref(last);
+ }
+
+ r = build_user_json(link, z, &last);
+ if (r < 0)
+ return r;
+ }
+
+ if (!last)
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+
+ return varlink_reply(link, last);
+ }
+ } else
+ return varlink_error(link, "io.systemd.UserDatabase.BadService", NULL);
+ if (r == -ESRCH)
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+ if (r < 0) {
+ log_debug_errno(r, "User lookup failed abnormally: %m");
+ return varlink_error(link, "io.systemd.UserDatabase.ServiceNotAvailable", NULL);
+ }
+
+ if ((uid_is_valid(p.uid) && hr->uid != p.uid) ||
+ (p.user_name && !streq(hr->user_name, p.user_name)))
+ return varlink_error(link, "io.systemd.UserDatabase.ConflictingRecordFound", NULL);
+
+ r = build_user_json(link, hr, &v);
+ if (r < 0)
+ return r;
+
+ return varlink_reply(link, v);
+}
+
+static int build_group_json(Varlink *link, GroupRecord *gr, JsonVariant **ret) {
+ _cleanup_(group_record_unrefp) GroupRecord *stripped = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ UserRecordLoadFlags flags;
+ uid_t peer_uid;
+ bool trusted;
+ int r;
+
+ assert(gr);
+ assert(ret);
+
+ r = varlink_get_peer_uid(link, &peer_uid);
+ if (r < 0) {
+ log_debug_errno(r, "Unable to query peer UID, ignoring: %m");
+ trusted = false;
+ } else
+ trusted = peer_uid == 0;
+
+ flags = USER_RECORD_REQUIRE_REGULAR|USER_RECORD_ALLOW_PER_MACHINE|USER_RECORD_ALLOW_BINDING|USER_RECORD_STRIP_SECRET|USER_RECORD_ALLOW_STATUS|USER_RECORD_ALLOW_SIGNATURE;
+ if (trusted)
+ flags |= USER_RECORD_ALLOW_PRIVILEGED;
+ else
+ flags |= USER_RECORD_STRIP_PRIVILEGED;
+
+ r = group_record_clone(gr, flags, &stripped);
+ if (r < 0)
+ return r;
+
+ stripped->incomplete =
+ gr->incomplete ||
+ (FLAGS_SET(gr->mask, USER_RECORD_PRIVILEGED) &&
+ !FLAGS_SET(stripped->mask, USER_RECORD_PRIVILEGED));
+
+ v = json_variant_ref(gr->json);
+ r = add_nss_service(&v);
+ if (r < 0)
+ return r;
+
+ return json_build(ret, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("record", JSON_BUILD_VARIANT(v)),
+ JSON_BUILD_PAIR("incomplete", JSON_BUILD_BOOLEAN(stripped->incomplete))));
+}
+
+static int vl_method_get_group_record(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+
+ static const JsonDispatch dispatch_table[] = {
+ { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(LookupParameters, gid), 0 },
+ { "groupName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, group_name), 0 },
+ { "service", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, service), 0 },
+ {}
+ };
+
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_(group_record_unrefp) GroupRecord *g = NULL;
+ LookupParameters p = {
+ .gid = GID_INVALID,
+ };
+ int r;
+
+ assert(parameters);
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &p);
+ if (r < 0)
+ return r;
+
+ if (streq_ptr(p.service, "io.systemd.NameServiceSwitch")) {
+
+ if (gid_is_valid(p.gid))
+ r = nss_group_record_by_gid(p.gid, true, &g);
+ else if (p.group_name)
+ r = nss_group_record_by_name(p.group_name, true, &g);
+ else {
+ _cleanup_(json_variant_unrefp) JsonVariant *last = NULL;
+
+ setgrent();
+
+ for (;;) {
+ _cleanup_(group_record_unrefp) GroupRecord *z = NULL;
+ _cleanup_free_ char *sbuf = NULL;
+ struct group *grp;
+ struct sgrp sgrp;
+
+ errno = 0;
+ grp = getgrent();
+ if (!grp) {
+ if (errno != 0)
+ log_debug_errno(errno, "Failure while iterating through NSS group database, ignoring: %m");
+
+ break;
+ }
+
+ r = nss_sgrp_for_group(grp, &sgrp, &sbuf);
+ if (r < 0)
+ log_debug_errno(r, "Failed to acquire shadow entry for group %s, ignoring: %m", grp->gr_name);
+
+ r = nss_group_to_group_record(grp, r >= 0 ? &sgrp : NULL, &z);
+ if (r < 0) {
+ endgrent();
+ return r;
+ }
+
+ if (last) {
+ r = varlink_notify(link, last);
+ if (r < 0) {
+ endgrent();
+ return r;
+ }
+
+ last = json_variant_unref(last);
+ }
+
+ r = build_group_json(link, z, &last);
+ if (r < 0) {
+ endgrent();
+ return r;
+ }
+ }
+
+ endgrent();
+
+ if (!last)
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+
+ return varlink_reply(link, last);
+ }
+
+ } else if (streq_ptr(p.service, "io.systemd.Multiplexer")) {
+
+ if (gid_is_valid(p.gid))
+ r = groupdb_by_gid(p.gid, USERDB_AVOID_MULTIPLEXER, &g);
+ else if (p.group_name)
+ r = groupdb_by_name(p.group_name, USERDB_AVOID_MULTIPLEXER, &g);
+ else {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *last = NULL;
+
+ r = groupdb_all(USERDB_AVOID_MULTIPLEXER, &iterator);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_(group_record_unrefp) GroupRecord *z = NULL;
+
+ r = groupdb_iterator_get(iterator, &z);
+ if (r == -ESRCH)
+ break;
+ if (r < 0)
+ return r;
+
+ if (last) {
+ r = varlink_notify(link, last);
+ if (r < 0)
+ return r;
+
+ last = json_variant_unref(last);
+ }
+
+ r = build_group_json(link, z, &last);
+ if (r < 0)
+ return r;
+ }
+
+ if (!last)
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+
+ return varlink_reply(link, last);
+ }
+ } else
+ return varlink_error(link, "io.systemd.UserDatabase.BadService", NULL);
+ if (r == -ESRCH)
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+ if (r < 0) {
+ log_debug_errno(r, "Group lookup failed abnormally: %m");
+ return varlink_error(link, "io.systemd.UserDatabase.ServiceNotAvailable", NULL);
+ }
+
+ if ((uid_is_valid(p.gid) && g->gid != p.gid) ||
+ (p.group_name && !streq(g->group_name, p.group_name)))
+ return varlink_error(link, "io.systemd.UserDatabase.ConflictingRecordFound", NULL);
+
+ r = build_group_json(link, g, &v);
+ if (r < 0)
+ return r;
+
+ return varlink_reply(link, v);
+}
+
+static int vl_method_get_memberships(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
+ static const JsonDispatch dispatch_table[] = {
+ { "userName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, user_name), 0 },
+ { "groupName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, group_name), 0 },
+ { "service", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(LookupParameters, service), 0 },
+ {}
+ };
+
+ LookupParameters p = {};
+ int r;
+
+ assert(parameters);
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &p);
+ if (r < 0)
+ return r;
+
+ if (streq_ptr(p.service, "io.systemd.NameServiceSwitch")) {
+
+ if (p.group_name) {
+ _cleanup_(group_record_unrefp) GroupRecord *g = NULL;
+ const char *last = NULL;
+ char **i;
+
+ r = nss_group_record_by_name(p.group_name, true, &g);
+ if (r == -ESRCH)
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, g->members) {
+
+ if (p.user_name && !streq_ptr(p.user_name, *i))
+ continue;
+
+ if (last) {
+ r = varlink_notifyb(link, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(last)),
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(g->group_name))));
+ if (r < 0)
+ return r;
+ }
+
+ last = *i;
+ }
+
+ if (!last)
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+
+ return varlink_replyb(link, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(last)),
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(g->group_name))));
+ } else {
+ _cleanup_free_ char *last_user_name = NULL, *last_group_name = NULL;
+
+ setgrent();
+
+ for (;;) {
+ struct group *grp;
+ const char* two[2], **users, **i;
+
+ errno = 0;
+ grp = getgrent();
+ if (!grp) {
+ if (errno != 0)
+ log_debug_errno(errno, "Failure while iterating through NSS group database, ignoring: %m");
+
+ break;
+ }
+
+ if (p.user_name) {
+ if (!strv_contains(grp->gr_mem, p.user_name))
+ continue;
+
+ two[0] = p.user_name;
+ two[1] = NULL;
+
+ users = two;
+ } else
+ users = (const char**) grp->gr_mem;
+
+ STRV_FOREACH(i, users) {
+
+ if (last_user_name) {
+ assert(last_group_name);
+
+ r = varlink_notifyb(link, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(last_user_name)),
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(last_group_name))));
+ if (r < 0) {
+ endgrent();
+ return r;
+ }
+
+ free(last_user_name);
+ free(last_group_name);
+ }
+
+ last_user_name = strdup(*i);
+ last_group_name = strdup(grp->gr_name);
+ if (!last_user_name || !last_group_name) {
+ endgrent();
+ return -ENOMEM;
+ }
+ }
+ }
+
+ endgrent();
+
+ if (!last_user_name) {
+ assert(!last_group_name);
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+ }
+
+ assert(last_group_name);
+
+ return varlink_replyb(link, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(last_user_name)),
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(last_group_name))));
+ }
+
+ } else if (streq_ptr(p.service, "io.systemd.Multiplexer")) {
+
+ _cleanup_free_ char *last_user_name = NULL, *last_group_name = NULL;
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+
+ if (p.group_name)
+ r = membershipdb_by_group(p.group_name, USERDB_AVOID_MULTIPLEXER, &iterator);
+ else if (p.user_name)
+ r = membershipdb_by_user(p.user_name, USERDB_AVOID_MULTIPLEXER, &iterator);
+ else
+ r = membershipdb_all(USERDB_AVOID_MULTIPLEXER, &iterator);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *user_name = NULL, *group_name = NULL;
+
+ r = membershipdb_iterator_get(iterator, &user_name, &group_name);
+ if (r == -ESRCH)
+ break;
+ if (r < 0)
+ return r;
+
+ /* If both group + user are specified do a-posteriori filtering */
+ if (p.group_name && p.user_name && !streq(group_name, p.group_name))
+ continue;
+
+ if (last_user_name) {
+ assert(last_group_name);
+
+ r = varlink_notifyb(link, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(last_user_name)),
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(last_group_name))));
+ if (r < 0)
+ return r;
+
+ free(last_user_name);
+ free(last_group_name);
+ }
+
+ last_user_name = TAKE_PTR(user_name);
+ last_group_name = TAKE_PTR(group_name);
+ }
+
+ if (!last_user_name) {
+ assert(!last_group_name);
+ return varlink_error(link, "io.systemd.UserDatabase.NoRecordFound", NULL);
+ }
+
+ assert(last_group_name);
+
+ return varlink_replyb(link, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(last_user_name)),
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(last_group_name))));
+ }
+
+ return varlink_error(link, "io.systemd.UserDatabase.BadService", NULL);
+}
+
+static int process_connection(VarlinkServer *server, int fd) {
+ _cleanup_(varlink_close_unrefp) Varlink *vl = NULL;
+ int r;
+
+ r = varlink_server_add_connection(server, fd, &vl);
+ if (r < 0) {
+ fd = safe_close(fd);
+ return log_error_errno(r, "Failed to add connection: %m");
+ }
+
+ vl = varlink_ref(vl);
+
+ for (;;) {
+ r = varlink_process(vl);
+ if (r == -ENOTCONN) {
+ log_debug("Connection terminated.");
+ break;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to process connection: %m");
+ if (r > 0)
+ continue;
+
+ r = varlink_wait(vl, CONNECTION_IDLE_USEC);
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait for connection events: %m");
+ if (r == 0)
+ break;
+ }
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ usec_t start_time, listen_idle_usec, last_busy_usec = USEC_INFINITY;
+ _cleanup_(varlink_server_unrefp) VarlinkServer *server = NULL;
+ unsigned n_iterations = 0;
+ int m, listen_fd, r;
+
+ log_setup_service();
+
+ m = sd_listen_fds(false);
+ if (m < 0)
+ return log_error_errno(m, "Failed to determine number of listening fds: %m");
+ if (m == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No socket to listen on received.");
+ if (m > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Worker can only listen on a single socket at a time.");
+
+ listen_fd = SD_LISTEN_FDS_START;
+
+ r = fd_nonblock(listen_fd, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to turn off non-blocking mode for listening socket: %m");
+
+ r = varlink_server_new(&server, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate server: %m");
+
+ r = varlink_server_bind_method_many(
+ server,
+ "io.systemd.UserDatabase.GetUserRecord", vl_method_get_user_record,
+ "io.systemd.UserDatabase.GetGroupRecord", vl_method_get_group_record,
+ "io.systemd.UserDatabase.GetMemberships", vl_method_get_memberships);
+ if (r < 0)
+ return log_error_errno(r, "Failed to bind methods: %m");
+
+ r = getenv_bool("USERDB_FIXED_WORKER");
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse USERDB_FIXED_WORKER: %m");
+ listen_idle_usec = r ? USEC_INFINITY : LISTEN_IDLE_USEC;
+
+ r = userdb_block_nss_systemd(true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to disable userdb NSS compatibility: %m");
+
+ start_time = now(CLOCK_MONOTONIC);
+
+ for (;;) {
+ _cleanup_close_ int fd = -1;
+ usec_t n;
+
+ /* Exit the worker in regular intervals, to flush out all memory use */
+ if (n_iterations++ > ITERATIONS_MAX) {
+ log_debug("Exiting worker, processed %u iterations, that's enough.", n_iterations);
+ break;
+ }
+
+ n = now(CLOCK_MONOTONIC);
+ if (n >= usec_add(start_time, RUNTIME_MAX_USEC)) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ log_debug("Exiting worker, ran for %s, that's enough.",
+ format_timespan(buf, sizeof(buf), usec_sub_unsigned(n, start_time), 0));
+ break;
+ }
+
+ if (last_busy_usec == USEC_INFINITY)
+ last_busy_usec = n;
+ else if (listen_idle_usec != USEC_INFINITY && n >= usec_add(last_busy_usec, listen_idle_usec)) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ log_debug("Exiting worker, been idle for %s.",
+ format_timespan(buf, sizeof(buf), usec_sub_unsigned(n, last_busy_usec), 0));
+ break;
+ }
+
+ (void) rename_process("systemd-userwork: waiting...");
+
+ fd = accept4(listen_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (fd < 0)
+ fd = -errno;
+
+ (void) rename_process("systemd-userwork: processing...");
+
+ if (fd == -EAGAIN)
+ continue; /* The listening socket has SO_RECVTIMEO set, hence a timeout is expected
+ * after a while, let's check if it's time to exit though. */
+ if (fd == -EINTR)
+ continue; /* Might be that somebody attached via strace, let's just continue in that
+ * case */
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to accept() from listening socket: %m");
+
+ if (now(CLOCK_MONOTONIC) <= usec_add(n, PRESSURE_SLEEP_TIME_USEC)) {
+ /* We only slept a very short time? If so, let's see if there are more sockets
+ * pending, and if so, let's ask our parent for more workers */
+
+ r = fd_wait_for_event(listen_fd, POLLIN, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to test for POLLIN on listening socket: %m");
+
+ if (FLAGS_SET(r, POLLIN)) {
+ pid_t parent;
+
+ parent = getppid();
+ if (parent <= 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Parent already died?");
+
+ if (kill(parent, SIGUSR2) < 0)
+ return log_error_errno(errno, "Failed to kill our own parent.");
+ }
+ }
+
+ (void) process_connection(server, TAKE_FD(fd));
+ last_busy_usec = USEC_INFINITY;
+ }
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/vconsole/90-vconsole.rules.in b/src/vconsole/90-vconsole.rules.in
new file mode 100644
index 0000000..a16988c
--- /dev/null
+++ b/src/vconsole/90-vconsole.rules.in
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+# Each vtcon keeps its own state of fonts.
+#
+ACTION=="add", SUBSYSTEM=="vtconsole", KERNEL=="vtcon*", RUN+="@rootlibexecdir@/systemd-vconsole-setup"
diff --git a/src/vconsole/meson.build b/src/vconsole/meson.build
new file mode 100644
index 0000000..695ef02
--- /dev/null
+++ b/src/vconsole/meson.build
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+if conf.get('ENABLE_VCONSOLE') == 1
+ vconsole_rules = configure_file(
+ input : '90-vconsole.rules.in',
+ output : '90-vconsole.rules',
+ configuration : substs)
+ install_data(vconsole_rules,
+ install_dir : udevrulesdir)
+endif
diff --git a/src/vconsole/vconsole-setup.c b/src/vconsole/vconsole-setup.c
new file mode 100644
index 0000000..b28e285
--- /dev/null
+++ b/src/vconsole/vconsole-setup.c
@@ -0,0 +1,489 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2016 Michal Soltys <soltys@ziu.info>
+***/
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/kd.h>
+#include <linux/tiocl.h>
+#include <linux/vt.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sysexits.h>
+#include <termios.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "io-util.h"
+#include "locale-util.h"
+#include "log.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "signal-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+#include "virt.h"
+
+static int verify_vc_device(int fd) {
+ unsigned char data[] = {
+ TIOCL_GETFGCONSOLE,
+ };
+
+ int r;
+
+ r = ioctl(fd, TIOCLINUX, data);
+ if (r < 0)
+ return -errno;
+
+ return r;
+}
+
+static int verify_vc_allocation(unsigned idx) {
+ char vcname[sizeof("/dev/vcs") + DECIMAL_STR_MAX(unsigned) - 2];
+
+ xsprintf(vcname, "/dev/vcs%u", idx);
+
+ if (access(vcname, F_OK) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int verify_vc_allocation_byfd(int fd) {
+ struct vt_stat vcs = {};
+
+ if (ioctl(fd, VT_GETSTATE, &vcs) < 0)
+ return -errno;
+
+ return verify_vc_allocation(vcs.v_active);
+}
+
+static int verify_vc_kbmode(int fd) {
+ int curr_mode;
+
+ /*
+ * Make sure we only adjust consoles in K_XLATE or K_UNICODE mode.
+ * Otherwise we would (likely) interfere with X11's processing of the
+ * key events.
+ *
+ * http://lists.freedesktop.org/archives/systemd-devel/2013-February/008573.html
+ */
+
+ if (ioctl(fd, KDGKBMODE, &curr_mode) < 0)
+ return -errno;
+
+ return IN_SET(curr_mode, K_XLATE, K_UNICODE) ? 0 : -EBUSY;
+}
+
+static int toggle_utf8_vc(const char *name, int fd, bool utf8) {
+ int r;
+ struct termios tc = {};
+
+ assert(name);
+ assert(fd >= 0);
+
+ r = ioctl(fd, KDSKBMODE, utf8 ? K_UNICODE : K_XLATE);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to %s UTF-8 kbdmode on %s: %m", enable_disable(utf8), name);
+
+ r = loop_write(fd, utf8 ? "\033%G" : "\033%@", 3, false);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to %s UTF-8 term processing on %s: %m", enable_disable(utf8), name);
+
+ r = tcgetattr(fd, &tc);
+ if (r >= 0) {
+ SET_FLAG(tc.c_iflag, IUTF8, utf8);
+ r = tcsetattr(fd, TCSANOW, &tc);
+ }
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to %s iutf8 flag on %s: %m", enable_disable(utf8), name);
+
+ log_debug("UTF-8 kbdmode %sd on %s", enable_disable(utf8), name);
+ return 0;
+}
+
+static int toggle_utf8_sysfs(bool utf8) {
+ int r;
+
+ r = write_string_file("/sys/module/vt/parameters/default_utf8", one_zero(utf8), WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to %s sysfs UTF-8 flag: %m", enable_disable(utf8));
+
+ log_debug("Sysfs UTF-8 flag %sd", enable_disable(utf8));
+ return 0;
+}
+
+static int keyboard_load_and_wait(const char *vc, const char *map, const char *map_toggle, bool utf8) {
+ const char *args[8];
+ unsigned i = 0;
+ pid_t pid;
+ int r;
+
+ /* An empty map means kernel map */
+ if (isempty(map))
+ return 0;
+
+ args[i++] = KBD_LOADKEYS;
+ args[i++] = "-q";
+ args[i++] = "-C";
+ args[i++] = vc;
+ if (utf8)
+ args[i++] = "-u";
+ args[i++] = map;
+ if (map_toggle)
+ args[i++] = map_toggle;
+ args[i++] = NULL;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *cmd;
+
+ cmd = strv_join((char**) args, " ");
+ log_debug("Executing \"%s\"...", strnull(cmd));
+ }
+
+ r = safe_fork("(loadkeys)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ execv(args[0], (char **) args);
+ _exit(EXIT_FAILURE);
+ }
+
+ return wait_for_terminate_and_check(KBD_LOADKEYS, pid, WAIT_LOG);
+}
+
+static int font_load_and_wait(const char *vc, const char *font, const char *map, const char *unimap) {
+ const char *args[9];
+ unsigned i = 0;
+ pid_t pid;
+ int r;
+
+ /* Any part can be set independently */
+ if (isempty(font) && isempty(map) && isempty(unimap))
+ return 0;
+
+ args[i++] = KBD_SETFONT;
+ args[i++] = "-C";
+ args[i++] = vc;
+ if (!isempty(map)) {
+ args[i++] = "-m";
+ args[i++] = map;
+ }
+ if (!isempty(unimap)) {
+ args[i++] = "-u";
+ args[i++] = unimap;
+ }
+ if (!isempty(font))
+ args[i++] = font;
+ args[i++] = NULL;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *cmd;
+
+ cmd = strv_join((char**) args, " ");
+ log_debug("Executing \"%s\"...", strnull(cmd));
+ }
+
+ r = safe_fork("(setfont)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ execv(args[0], (char **) args);
+ _exit(EXIT_FAILURE);
+ }
+
+ return wait_for_terminate_and_check(KBD_SETFONT, pid, WAIT_LOG);
+}
+
+/*
+ * A newly allocated VT uses the font from the source VT. Here
+ * we update all possibly already allocated VTs with the configured
+ * font. It also allows to restart systemd-vconsole-setup.service,
+ * to apply a new font to all VTs.
+ *
+ * We also setup per-console utf8 related stuff: kbdmode, term
+ * processing, stty iutf8.
+ */
+static void setup_remaining_vcs(int src_fd, unsigned src_idx, bool utf8) {
+ struct console_font_op cfo = {
+ .op = KD_FONT_OP_GET,
+ .width = UINT_MAX, .height = UINT_MAX,
+ .charcount = UINT_MAX,
+ };
+ struct unimapinit adv = {};
+ struct unimapdesc unimapd;
+ _cleanup_free_ struct unipair* unipairs = NULL;
+ _cleanup_free_ void *fontbuf = NULL;
+ unsigned i;
+ int log_level;
+ int r;
+
+ unipairs = new(struct unipair, USHRT_MAX);
+ if (!unipairs) {
+ log_oom();
+ return;
+ }
+
+ log_level = LOG_WARNING;
+
+ /* get metadata of the current font (width, height, count) */
+ r = ioctl(src_fd, KDFONTOP, &cfo);
+ if (r < 0) {
+ /* We might be called to operate on the dummy console (to setup keymap
+ * mainly) when fbcon deferred takeover is used for example. In such case,
+ * setting font is not supported and is expected to fail. */
+ if (errno == ENOSYS)
+ log_level = LOG_DEBUG;
+
+ log_full_errno(log_level, errno,
+ "KD_FONT_OP_GET failed while trying to get the font metadata: %m");
+ } else {
+ /* verify parameter sanity first */
+ if (cfo.width > 32 || cfo.height > 32 || cfo.charcount > 512)
+ log_warning("Invalid font metadata - width: %u (max 32), height: %u (max 32), count: %u (max 512)",
+ cfo.width, cfo.height, cfo.charcount);
+ else {
+ /*
+ * Console fonts supported by the kernel are limited in size to 32 x 32 and maximum 512
+ * characters. Thus with 1 bit per pixel it requires up to 65536 bytes. The height always
+ * requires 32 per glyph, regardless of the actual height - see the comment above #define
+ * max_font_size 65536 in drivers/tty/vt/vt.c for more details.
+ */
+ fontbuf = malloc_multiply((cfo.width + 7) / 8 * 32, cfo.charcount);
+ if (!fontbuf) {
+ log_oom();
+ return;
+ }
+ /* get fonts from the source console */
+ cfo.data = fontbuf;
+ r = ioctl(src_fd, KDFONTOP, &cfo);
+ if (r < 0)
+ log_warning_errno(errno, "KD_FONT_OP_GET failed while trying to read the font data: %m");
+ else {
+ unimapd.entries = unipairs;
+ unimapd.entry_ct = USHRT_MAX;
+ r = ioctl(src_fd, GIO_UNIMAP, &unimapd);
+ if (r < 0)
+ log_warning_errno(errno, "GIO_UNIMAP failed while trying to read unicode mappings: %m");
+ else
+ cfo.op = KD_FONT_OP_SET;
+ }
+ }
+ }
+
+ if (cfo.op != KD_FONT_OP_SET)
+ log_full(log_level, "Fonts will not be copied to remaining consoles");
+
+ for (i = 1; i <= 63; i++) {
+ char ttyname[sizeof("/dev/tty63")];
+ _cleanup_close_ int fd_d = -1;
+
+ if (i == src_idx || verify_vc_allocation(i) < 0)
+ continue;
+
+ /* try to open terminal */
+ xsprintf(ttyname, "/dev/tty%u", i);
+ fd_d = open_terminal(ttyname, O_RDWR|O_CLOEXEC|O_NOCTTY);
+ if (fd_d < 0) {
+ log_warning_errno(fd_d, "Unable to open tty%u, fonts will not be copied: %m", i);
+ continue;
+ }
+
+ if (verify_vc_kbmode(fd_d) < 0)
+ continue;
+
+ (void) toggle_utf8_vc(ttyname, fd_d, utf8);
+
+ if (cfo.op != KD_FONT_OP_SET)
+ continue;
+
+ r = ioctl(fd_d, KDFONTOP, &cfo);
+ if (r < 0) {
+ int last_errno, mode;
+
+ /* The fonts couldn't have been copied. It might be due to the
+ * terminal being in graphical mode. In this case the kernel
+ * returns -EINVAL which is too generic for distinguishing this
+ * specific case. So we need to retrieve the terminal mode and if
+ * the graphical mode is in used, let's assume that something else
+ * is using the terminal and the failure was expected as we
+ * shouldn't have tried to copy the fonts. */
+
+ last_errno = errno;
+ if (ioctl(fd_d, KDGETMODE, &mode) >= 0 && mode != KD_TEXT)
+ log_debug("KD_FONT_OP_SET skipped: tty%u is not in text mode", i);
+ else
+ log_warning_errno(last_errno, "KD_FONT_OP_SET failed, fonts will not be copied to tty%u: %m", i);
+
+ continue;
+ }
+
+ /*
+ * copy unicode translation table unimapd is a ushort count and a pointer
+ * to an array of struct unipair { ushort, ushort }
+ */
+ r = ioctl(fd_d, PIO_UNIMAPCLR, &adv);
+ if (r < 0) {
+ log_warning_errno(errno, "PIO_UNIMAPCLR failed, unimaps might be incorrect for tty%u: %m", i);
+ continue;
+ }
+
+ r = ioctl(fd_d, PIO_UNIMAP, &unimapd);
+ if (r < 0) {
+ log_warning_errno(errno, "PIO_UNIMAP failed, unimaps might be incorrect for tty%u: %m", i);
+ continue;
+ }
+
+ log_debug("Font and unimap successfully copied to %s", ttyname);
+ }
+}
+
+static int find_source_vc(char **ret_path, unsigned *ret_idx) {
+ _cleanup_free_ char *path = NULL;
+ int r, err = 0;
+ unsigned i;
+
+ path = new(char, sizeof("/dev/tty63"));
+ if (!path)
+ return log_oom();
+
+ for (i = 1; i <= 63; i++) {
+ _cleanup_close_ int fd = -1;
+
+ r = verify_vc_allocation(i);
+ if (r < 0) {
+ if (!err)
+ err = -r;
+ continue;
+ }
+
+ sprintf(path, "/dev/tty%u", i);
+ fd = open_terminal(path, O_RDWR|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0) {
+ if (!err)
+ err = -fd;
+ continue;
+ }
+ r = verify_vc_kbmode(fd);
+ if (r < 0) {
+ if (!err)
+ err = -r;
+ continue;
+ }
+
+ /* all checks passed, return this one as a source console */
+ *ret_idx = i;
+ *ret_path = TAKE_PTR(path);
+ return TAKE_FD(fd);
+ }
+
+ return log_error_errno(err, "No usable source console found: %m");
+}
+
+static int verify_source_vc(char **ret_path, const char *src_vc) {
+ _cleanup_close_ int fd = -1;
+ char *path;
+ int r;
+
+ fd = open_terminal(src_vc, O_RDWR|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to open %s: %m", src_vc);
+
+ r = verify_vc_device(fd);
+ if (r < 0)
+ return log_error_errno(r, "Device %s is not a virtual console: %m", src_vc);
+
+ r = verify_vc_allocation_byfd(fd);
+ if (r < 0)
+ return log_error_errno(r, "Virtual console %s is not allocated: %m", src_vc);
+
+ r = verify_vc_kbmode(fd);
+ if (r < 0)
+ return log_error_errno(r, "Virtual console %s is not in K_XLATE or K_UNICODE: %m", src_vc);
+
+ path = strdup(src_vc);
+ if (!path)
+ return log_oom();
+
+ *ret_path = path;
+ return TAKE_FD(fd);
+}
+
+int main(int argc, char **argv) {
+ _cleanup_free_ char
+ *vc = NULL,
+ *vc_keymap = NULL, *vc_keymap_toggle = NULL,
+ *vc_font = NULL, *vc_font_map = NULL, *vc_font_unimap = NULL;
+ _cleanup_close_ int fd = -1;
+ bool utf8, keyboard_ok;
+ unsigned idx = 0;
+ int r;
+
+ log_setup_service();
+
+ umask(0022);
+
+ if (argv[1])
+ fd = verify_source_vc(&vc, argv[1]);
+ else
+ fd = find_source_vc(&vc, &idx);
+ if (fd < 0)
+ return EXIT_FAILURE;
+
+ utf8 = is_locale_utf8();
+
+ r = parse_env_file(NULL, "/etc/vconsole.conf",
+ "KEYMAP", &vc_keymap,
+ "KEYMAP_TOGGLE", &vc_keymap_toggle,
+ "FONT", &vc_font,
+ "FONT_MAP", &vc_font_map,
+ "FONT_UNIMAP", &vc_font_unimap);
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Failed to read /etc/vconsole.conf: %m");
+
+ /* Let the kernel command line override /etc/vconsole.conf */
+ r = proc_cmdline_get_key_many(
+ PROC_CMDLINE_STRIP_RD_PREFIX,
+ "vconsole.keymap", &vc_keymap,
+ "vconsole.keymap_toggle", &vc_keymap_toggle,
+ "vconsole.font", &vc_font,
+ "vconsole.font_map", &vc_font_map,
+ "vconsole.font_unimap", &vc_font_unimap,
+ /* compatibility with obsolete multiple-dot scheme */
+ "vconsole.keymap.toggle", &vc_keymap_toggle,
+ "vconsole.font.map", &vc_font_map,
+ "vconsole.font.unimap", &vc_font_unimap);
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Failed to read /proc/cmdline: %m");
+
+ (void) toggle_utf8_sysfs(utf8);
+ (void) toggle_utf8_vc(vc, fd, utf8);
+
+ r = font_load_and_wait(vc, vc_font, vc_font_map, vc_font_unimap);
+ keyboard_ok = keyboard_load_and_wait(vc, vc_keymap, vc_keymap_toggle, utf8) == 0;
+
+ if (idx > 0) {
+ if (r == 0)
+ setup_remaining_vcs(fd, idx, utf8);
+ else if (r == EX_OSERR)
+ /* setfont returns EX_OSERR when ioctl(KDFONTOP/PIO_FONTX/PIO_FONTX) fails.
+ * This might mean various things, but in particular lack of a graphical
+ * console. Let's be generous and not treat this as an error. */
+ log_notice("Setting fonts failed with a \"system error\", ignoring.");
+ else
+ log_warning("Setting source virtual console failed, ignoring remaining ones");
+ }
+
+ return IN_SET(r, 0, EX_OSERR) && keyboard_ok ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/src/veritysetup/veritysetup-generator.c b/src/veritysetup/veritysetup-generator.c
new file mode 100644
index 0000000..7c807c8
--- /dev/null
+++ b/src/veritysetup/veritysetup-generator.c
@@ -0,0 +1,230 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fstab-util.h"
+#include "generator.h"
+#include "hexdecoct.h"
+#include "id128-util.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "unit-name.h"
+
+#define SYSTEMD_VERITYSETUP_SERVICE "systemd-veritysetup@root.service"
+
+static const char *arg_dest = NULL;
+static bool arg_enabled = true;
+static char *arg_root_hash = NULL;
+static char *arg_data_what = NULL;
+static char *arg_hash_what = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_root_hash, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_data_what, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_hash_what, freep);
+
+static int create_device(void) {
+ _cleanup_free_ char *u = NULL, *v = NULL, *d = NULL, *e = NULL, *u_escaped = NULL, *v_escaped = NULL, *root_hash_escaped = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *to;
+ int r;
+
+ /* If all three pieces of information are missing, then verity is turned off */
+ if (!arg_root_hash && !arg_data_what && !arg_hash_what)
+ return 0;
+
+ /* if one of them is missing however, the data is simply incomplete and this is an error */
+ if (!arg_root_hash)
+ log_error("Verity information incomplete, root hash unspecified.");
+ if (!arg_data_what)
+ log_error("Verity information incomplete, root data device unspecified.");
+ if (!arg_hash_what)
+ log_error("Verity information incomplete, root hash device unspecified.");
+
+ if (!arg_root_hash || !arg_data_what || !arg_hash_what)
+ return -EINVAL;
+
+ log_debug("Using root verity data device %s,\n"
+ " hash device %s,\n"
+ " and root hash %s.", arg_data_what, arg_hash_what, arg_root_hash);
+
+ u = fstab_node_to_udev_node(arg_data_what);
+ if (!u)
+ return log_oom();
+ v = fstab_node_to_udev_node(arg_hash_what);
+ if (!v)
+ return log_oom();
+
+ u_escaped = specifier_escape(u);
+ if (!u_escaped)
+ return log_oom();
+ v_escaped = specifier_escape(v);
+ if (!v_escaped)
+ return log_oom();
+
+ r = unit_name_from_path(u, ".device", &d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+ r = unit_name_from_path(v, ".device", &e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate unit name: %m");
+
+ root_hash_escaped = specifier_escape(arg_root_hash);
+ if (!root_hash_escaped)
+ return log_oom();
+
+ r = generator_open_unit_file(arg_dest, NULL, SYSTEMD_VERITYSETUP_SERVICE, &f);
+ if (r < 0)
+ return r;
+
+ fprintf(f,
+ "[Unit]\n"
+ "Description=Integrity Protection Setup for %%I\n"
+ "Documentation=man:systemd-veritysetup-generator(8) man:systemd-veritysetup@.service(8)\n"
+ "SourcePath=/proc/cmdline\n"
+ "DefaultDependencies=no\n"
+ "Conflicts=umount.target\n"
+ "BindsTo=%s %s\n"
+ "IgnoreOnIsolate=true\n"
+ "After=cryptsetup-pre.target systemd-udevd-kernel.socket %s %s\n"
+ "Before=cryptsetup.target umount.target\n"
+ "\n[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "ExecStart=" ROOTLIBEXECDIR "/systemd-veritysetup attach root '%s' '%s' '%s'\n"
+ "ExecStop=" ROOTLIBEXECDIR "/systemd-veritysetup detach root\n",
+ d, e,
+ d, e,
+ u_escaped, v_escaped, root_hash_escaped);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write file unit "SYSTEMD_VERITYSETUP_SERVICE": %m");
+
+ to = strjoina(arg_dest, "/cryptsetup.target.requires/" SYSTEMD_VERITYSETUP_SERVICE);
+
+ (void) mkdir_parents(to, 0755);
+ if (symlink("../" SYSTEMD_VERITYSETUP_SERVICE, to) < 0)
+ return log_error_errno(errno, "Failed to create symlink %s: %m", to);
+
+ return 0;
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ int r;
+
+ if (proc_cmdline_key_streq(key, "systemd.verity")) {
+
+ r = value ? parse_boolean(value) : 1;
+ if (r < 0)
+ log_warning("Failed to parse verity= kernel command line switch %s. Ignoring.", value);
+ else
+ arg_enabled = r;
+
+ } else if (proc_cmdline_key_streq(key, "roothash")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = free_and_strdup(&arg_root_hash, value);
+ if (r < 0)
+ return log_oom();
+
+ } else if (proc_cmdline_key_streq(key, "systemd.verity_root_data")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = free_and_strdup(&arg_data_what, value);
+ if (r < 0)
+ return log_oom();
+
+ } else if (proc_cmdline_key_streq(key, "systemd.verity_root_hash")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ r = free_and_strdup(&arg_hash_what, value);
+ if (r < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+static int determine_devices(void) {
+ _cleanup_free_ void *m = NULL;
+ sd_id128_t root_uuid, verity_uuid;
+ char ids[ID128_UUID_STRING_MAX];
+ size_t l;
+ int r;
+
+ /* Try to automatically derive the root data and hash device paths from the root hash */
+
+ if (!arg_root_hash)
+ return 0;
+
+ if (arg_data_what && arg_hash_what)
+ return 0;
+
+ r = unhexmem(arg_root_hash, strlen(arg_root_hash), &m, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse root hash: %s", arg_root_hash);
+ if (l < sizeof(sd_id128_t)) {
+ log_debug("Root hash is shorter than 128 bits (32 characters), ignoring for discovering verity partition.");
+ return 0;
+ }
+
+ if (!arg_data_what) {
+ memcpy(&root_uuid, m, sizeof(root_uuid));
+
+ arg_data_what = path_join("/dev/disk/by-partuuid", id128_to_uuid_string(root_uuid, ids));
+ if (!arg_data_what)
+ return log_oom();
+ }
+
+ if (!arg_hash_what) {
+ memcpy(&verity_uuid, (uint8_t*) m + l - sizeof(verity_uuid), sizeof(verity_uuid));
+
+ arg_hash_what = path_join("/dev/disk/by-partuuid", id128_to_uuid_string(verity_uuid, ids));
+ if (!arg_hash_what)
+ return log_oom();
+ }
+
+ return 1;
+}
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ int r;
+
+ assert_se(arg_dest = dest);
+
+ r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_STRIP_RD_PREFIX);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse kernel command line: %m");
+
+ /* For now we only support the root device on verity. Later on we might want to add support for /etc/veritytab
+ * or similar to define additional mappings */
+
+ if (!arg_enabled)
+ return 0;
+
+ r = determine_devices();
+ if (r < 0)
+ return r;
+
+ return create_device();
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/veritysetup/veritysetup.c b/src/veritysetup/veritysetup.c
new file mode 100644
index 0000000..558e951
--- /dev/null
+++ b/src/veritysetup/veritysetup.c
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+#include <sys/stat.h>
+
+#include "alloc-util.h"
+#include "cryptsetup-util.h"
+#include "fileio.h"
+#include "hexdecoct.h"
+#include "log.h"
+#include "main-func.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "terminal-util.h"
+
+static char *arg_root_hash = NULL;
+static char *arg_data_what = NULL;
+static char *arg_hash_what = NULL;
+
+STATIC_DESTRUCTOR_REGISTER(arg_root_hash, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_data_what, freep);
+STATIC_DESTRUCTOR_REGISTER(arg_hash_what, freep);
+
+static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd-veritysetup@.service", "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s attach VOLUME DATADEVICE HASHDEVICE ROOTHASH [ROOTHASHSIG]\n"
+ "%s detach VOLUME\n\n"
+ "Attaches or detaches an integrity protected block device.\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , program_invocation_short_name
+ , link
+ );
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
+ int r;
+
+ if (argc <= 1)
+ return help();
+
+ if (argc < 3)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program requires at least two arguments.");
+
+ log_setup_service();
+
+ umask(0022);
+
+ if (streq(argv[1], "attach")) {
+ _cleanup_free_ void *m = NULL;
+ crypt_status_info status;
+ size_t l;
+
+ if (argc < 6)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "attach requires at least two arguments.");
+
+ r = unhexmem(argv[5], strlen(argv[5]), &m, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse root hash: %m");
+
+ r = crypt_init(&cd, argv[4]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open verity device %s: %m", argv[4]);
+
+ cryptsetup_enable_logging(cd);
+
+ status = crypt_status(cd, argv[2]);
+ if (IN_SET(status, CRYPT_ACTIVE, CRYPT_BUSY)) {
+ log_info("Volume %s already active.", argv[2]);
+ return 0;
+ }
+
+ r = crypt_load(cd, CRYPT_VERITY, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to load verity superblock: %m");
+
+ r = crypt_set_data_device(cd, argv[3]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to configure data device: %m");
+
+ if (argc > 6) {
+#if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY
+ _cleanup_free_ char *hash_sig = NULL;
+ size_t hash_sig_size;
+ char *value;
+
+ if ((value = startswith(argv[6], "base64:"))) {
+ r = unbase64mem(value, strlen(value), (void *)&hash_sig, &hash_sig_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse root hash signature '%s': %m", argv[6]);
+ } else {
+ r = read_full_file_full(AT_FDCWD, argv[6], READ_FULL_FILE_CONNECT_SOCKET, NULL, &hash_sig, &hash_sig_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read root hash signature: %m");
+ }
+
+ r = crypt_activate_by_signed_key(cd, argv[2], m, l, hash_sig, hash_sig_size, CRYPT_ACTIVATE_READONLY);
+#else
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "activation of verity device with signature %s requested, but not supported by cryptsetup due to missing crypt_activate_by_signed_key()", argv[6]);
+#endif
+ } else
+ r = crypt_activate_by_volume_key(cd, argv[2], m, l, CRYPT_ACTIVATE_READONLY);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up verity device: %m");
+
+ } else if (streq(argv[1], "detach")) {
+
+ r = crypt_init_by_name(&cd, argv[2]);
+ if (r == -ENODEV) {
+ log_info("Volume %s already inactive.", argv[2]);
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "crypt_init_by_name() failed: %m");
+
+ cryptsetup_enable_logging(cd);
+
+ r = crypt_deactivate(cd, argv[2]);
+ if (r < 0)
+ return log_error_errno(r, "Failed to deactivate: %m");
+
+ } else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unknown verb %s.", argv[1]);
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/version/version.h.in b/src/version/version.h.in
new file mode 100644
index 0000000..7b0bf8e
--- /dev/null
+++ b/src/version/version.h.in
@@ -0,0 +1,8 @@
+/* Detailed project version that includes git commit when not built from a release.
+ * Use this in preference to PROJECT_VERSION, with the following exceptions:
+ * - where a simplified form is expected for compatibility, for example
+ * 'udevadm version',
+ * - where a simplified machine-parsable form is more useful, for example
+ * pkgconfig files and version information written to binary files.
+ */
+#define GIT_VERSION "@VCS_TAG@"
diff --git a/src/volatile-root/volatile-root.c b/src/volatile-root/volatile-root.c
new file mode 100644
index 0000000..ee3532c
--- /dev/null
+++ b/src/volatile-root/volatile-root.c
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/mount.h>
+
+#include "alloc-util.h"
+#include "blockdev-util.h"
+#include "escape.h"
+#include "fs-util.h"
+#include "main-func.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "volatile-util.h"
+
+static int make_volatile(const char *path) {
+ _cleanup_free_ char *old_usr = NULL;
+ int r;
+
+ assert(path);
+
+ r = chase_symlinks("/usr", path, CHASE_PREFIX_ROOT, &old_usr, NULL);
+ if (r < 0)
+ return log_error_errno(r, "/usr not available in old root: %m");
+
+ r = mkdir_p("/run/systemd/volatile-sysroot", 0700);
+ if (r < 0)
+ return log_error_errno(r, "Couldn't generate volatile sysroot directory: %m");
+
+ r = mount_nofollow_verbose(LOG_ERR, "tmpfs", "/run/systemd/volatile-sysroot", "tmpfs", MS_STRICTATIME, "mode=755" TMPFS_LIMITS_ROOTFS);
+ if (r < 0)
+ goto finish_rmdir;
+
+ if (mkdir("/run/systemd/volatile-sysroot/usr", 0755) < 0) {
+ r = log_error_errno(errno, "Failed to create /usr directory: %m");
+ goto finish_umount;
+ }
+
+ r = mount_nofollow_verbose(LOG_ERR, old_usr, "/run/systemd/volatile-sysroot/usr", NULL, MS_BIND|MS_REC, NULL);
+ if (r < 0)
+ goto finish_umount;
+
+ r = bind_remount_recursive("/run/systemd/volatile-sysroot/usr", MS_RDONLY, MS_RDONLY, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to remount /usr read-only: %m");
+ goto finish_umount;
+ }
+
+ r = umount_recursive(path, 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to unmount %s: %m", path);
+ goto finish_umount;
+ }
+
+ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
+ log_warning_errno(errno, "Failed to remount %s MS_SLAVE|MS_REC, ignoring: %m", path);
+
+ r = mount_nofollow_verbose(LOG_ERR, "/run/systemd/volatile-sysroot", path, NULL, MS_MOVE, NULL);
+
+finish_umount:
+ (void) umount_recursive("/run/systemd/volatile-sysroot", 0);
+
+finish_rmdir:
+ (void) rmdir("/run/systemd/volatile-sysroot");
+
+ return r;
+}
+
+static int make_overlay(const char *path) {
+ _cleanup_free_ char *escaped_path = NULL;
+ bool tmpfs_mounted = false;
+ const char *options = NULL;
+ int r;
+
+ assert(path);
+
+ r = mkdir_p("/run/systemd/overlay-sysroot", 0700);
+ if (r < 0)
+ return log_error_errno(r, "Couldn't create overlay sysroot directory: %m");
+
+ r = mount_nofollow_verbose(LOG_ERR, "tmpfs", "/run/systemd/overlay-sysroot", "tmpfs", MS_STRICTATIME, "mode=755" TMPFS_LIMITS_ROOTFS);
+ if (r < 0)
+ goto finish;
+
+ tmpfs_mounted = true;
+
+ if (mkdir("/run/systemd/overlay-sysroot/upper", 0755) < 0) {
+ r = log_error_errno(errno, "Failed to create /run/systemd/overlay-sysroot/upper: %m");
+ goto finish;
+ }
+
+ if (mkdir("/run/systemd/overlay-sysroot/work", 0755) < 0) {
+ r = log_error_errno(errno, "Failed to create /run/systemd/overlay-sysroot/work: %m");
+ goto finish;
+ }
+
+ escaped_path = shell_escape(path, ",:");
+ if (!escaped_path) {
+ r = log_oom();
+ goto finish;
+ }
+
+ options = strjoina("lowerdir=", escaped_path, ",upperdir=/run/systemd/overlay-sysroot/upper,workdir=/run/systemd/overlay-sysroot/work");
+ r = mount_nofollow_verbose(LOG_ERR, "overlay", path, "overlay", 0, options);
+
+finish:
+ if (tmpfs_mounted)
+ (void) umount_verbose(LOG_ERR, "/run/systemd/overlay-sysroot", UMOUNT_NOFOLLOW);
+
+ (void) rmdir("/run/systemd/overlay-sysroot");
+ return r;
+}
+
+static int run(int argc, char *argv[]) {
+ VolatileMode m = _VOLATILE_MODE_INVALID;
+ const char *path;
+ dev_t devt;
+ int r;
+
+ log_setup_service();
+
+ if (argc > 3)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many arguments. Expected directory and mode.");
+
+ r = query_volatile_mode(&m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine volatile mode from kernel command line.");
+ if (r == 0 && argc >= 2) {
+ /* The kernel command line always wins. However if nothing was set there, the argument passed here wins instead. */
+ m = volatile_mode_from_string(argv[1]);
+ if (m < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Couldn't parse volatile mode: %s", argv[1]);
+ }
+
+ if (argc < 3)
+ path = "/sysroot";
+ else {
+ path = argv[2];
+
+ if (isempty(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Directory name cannot be empty.");
+ if (!path_is_absolute(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Directory must be specified as absolute path.");
+ if (path_equal(path, "/"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Directory cannot be the root directory.");
+ }
+
+ if (!IN_SET(m, VOLATILE_YES, VOLATILE_OVERLAY))
+ return 0;
+
+ r = path_is_mount_point(path, NULL, AT_SYMLINK_FOLLOW);
+ if (r < 0)
+ return log_error_errno(r, "Couldn't determine whether %s is a mount point: %m", path);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "%s is not a mount point.", path);
+
+ r = path_is_temporary_fs(path);
+ if (r < 0)
+ return log_error_errno(r, "Couldn't determine whether %s is a temporary file system: %m", path);
+ if (r > 0) {
+ log_info("%s already is a temporary file system.", path);
+ return 0;
+ }
+
+ /* We are about to replace the root directory with something else. Later code might want to know what we
+ * replaced here, hence let's save that information as a symlink we can later use. (This is particularly
+ * relevant for the overlayfs case where we'll fully obstruct the view onto the underlying device, hence
+ * querying the backing device node from the file system directly is no longer possible. */
+ r = get_block_device_harder(path, &devt);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine device major/minor of %s: %m", path);
+ else if (r > 0) { /* backed by block device */
+ _cleanup_free_ char *dn = NULL;
+
+ r = device_path_make_major_minor(S_IFBLK, devt, &dn);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format device node path: %m");
+
+ if (symlink(dn, "/run/systemd/volatile-root") < 0)
+ log_warning_errno(errno, "Failed to create symlink /run/systemd/volatile-root: %m");
+ }
+
+ if (m == VOLATILE_YES)
+ return make_volatile(path);
+ else {
+ assert(m == VOLATILE_OVERLAY);
+ return make_overlay(path);
+ }
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/xdg-autostart-generator/xdg-autostart-condition.c b/src/xdg-autostart-generator/xdg-autostart-condition.c
new file mode 100644
index 0000000..c4485cf
--- /dev/null
+++ b/src/xdg-autostart-generator/xdg-autostart-condition.c
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "main-func.h"
+#include "strv.h"
+
+/*
+ * This binary is intended to be run as an ExecCondition= in units generated
+ * by the xdg-autostart-generator. It does the appropriate checks against
+ * XDG_CURRENT_DESKTOP that are too advanced for simple ConditionEnvironment=
+ * matches.
+ */
+
+static int run(int argc, char *argv[]) {
+ _cleanup_strv_free_ char **only_show_in = NULL, **not_show_in = NULL, **desktops = NULL;
+ const char *xdg_current_desktop;
+ char **d;
+
+ if (argc != 3)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Wrong argument count. Expected the OnlyShowIn= and NotShowIn= sets, each colon separated.");
+
+ xdg_current_desktop = getenv("XDG_CURRENT_DESKTOP");
+ if (xdg_current_desktop) {
+ desktops = strv_split(xdg_current_desktop, ":");
+ if (!desktops)
+ return log_oom();
+ }
+
+ only_show_in = strv_split(argv[1], ":");
+ not_show_in = strv_split(argv[2], ":");
+ if (!only_show_in || !not_show_in)
+ return log_oom();
+
+ /* Each desktop in XDG_CURRENT_DESKTOP needs to be matched in order. */
+ STRV_FOREACH(d, desktops) {
+ if (strv_contains(only_show_in, *d))
+ return 0;
+ if (strv_contains(not_show_in, *d))
+ return 1;
+ }
+
+ /* non-zero exit code when only_show_in has a proper value */
+ return !strv_isempty(only_show_in);
+}
+
+DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
diff --git a/src/xdg-autostart-generator/xdg-autostart-generator.c b/src/xdg-autostart-generator/xdg-autostart-generator.c
new file mode 100644
index 0000000..7b441a9
--- /dev/null
+++ b/src/xdg-autostart-generator/xdg-autostart-generator.c
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "generator.h"
+#include "hashmap.h"
+#include "log.h"
+#include "main-func.h"
+#include "nulstr-util.h"
+#include "path-lookup.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "xdg-autostart-service.h"
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(xdgautostartservice_hash_ops, char, string_hash_func, string_compare_func, XdgAutostartService, xdg_autostart_service_free);
+
+static int enumerate_xdg_autostart(Hashmap *all_services) {
+ _cleanup_strv_free_ char **autostart_dirs = NULL;
+ _cleanup_strv_free_ char **config_dirs = NULL;
+ _unused_ _cleanup_strv_free_ char **data_dirs = NULL;
+ _cleanup_free_ char *user_config_autostart_dir = NULL;
+ char **path;
+ int r;
+
+ r = xdg_user_config_dir(&user_config_autostart_dir, "/autostart");
+ if (r < 0)
+ return r;
+ r = strv_extend(&autostart_dirs, user_config_autostart_dir);
+ if (r < 0)
+ return r;
+
+ r = xdg_user_dirs(&config_dirs, &data_dirs);
+ if (r < 0)
+ return r;
+ r = strv_extend_strv_concat(&autostart_dirs, config_dirs, "/autostart");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(path, autostart_dirs) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ d = opendir(*path);
+ if (!d) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Opening %s failed, ignoring: %m", *path);
+ continue;
+ }
+
+ FOREACH_DIRENT(de, d, log_warning_errno(errno, "Failed to enumerate directory %s, ignoring: %m", *path)) {
+ _cleanup_free_ char *fpath = NULL, *name = NULL;
+ _cleanup_(xdg_autostart_service_freep) XdgAutostartService *service = NULL;
+ struct stat st;
+
+ if (fstatat(dirfd(d), de->d_name, &st, 0) < 0) {
+ log_warning_errno(errno, "stat() failed on %s/%s, ignoring: %m", *path, de->d_name);
+ continue;
+ }
+
+ if (!S_ISREG(st.st_mode))
+ continue;
+
+ name = xdg_autostart_service_translate_name(de->d_name);
+ if (!name)
+ return log_oom();
+
+ if (hashmap_contains(all_services, name))
+ continue;
+
+ fpath = path_join(*path, de->d_name);
+ if (!fpath)
+ return log_oom();
+
+ service = xdg_autostart_service_parse_desktop(fpath);
+ if (!service)
+ return log_oom();
+ service->name = TAKE_PTR(name);
+
+ r = hashmap_put(all_services, service->name, service);
+ if (r < 0)
+ return log_oom();
+ TAKE_PTR(service);
+ }
+ }
+
+ return 0;
+}
+
+static int run(const char *dest, const char *dest_early, const char *dest_late) {
+ _cleanup_(hashmap_freep) Hashmap *all_services = NULL;
+ XdgAutostartService *service;
+ int r;
+
+ assert_se(dest_late);
+
+ all_services = hashmap_new(&xdgautostartservice_hash_ops);
+ if (!all_services)
+ return log_oom();
+
+ r = enumerate_xdg_autostart(all_services);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH(service, all_services)
+ (void) xdg_autostart_service_generate_unit(service, dest_late);
+
+ return 0;
+}
+
+DEFINE_MAIN_GENERATOR_FUNCTION(run);
diff --git a/src/xdg-autostart-generator/xdg-autostart-service.c b/src/xdg-autostart-generator/xdg-autostart-service.c
new file mode 100644
index 0000000..671d16d
--- /dev/null
+++ b/src/xdg-autostart-generator/xdg-autostart-service.c
@@ -0,0 +1,660 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "xdg-autostart-service.h"
+
+#include "conf-parser.h"
+#include "escape.h"
+#include "unit-name.h"
+#include "path-util.h"
+#include "fd-util.h"
+#include "generator.h"
+#include "log.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "nulstr-util.h"
+#include "strv.h"
+
+XdgAutostartService* xdg_autostart_service_free(XdgAutostartService *s) {
+ if (!s)
+ return NULL;
+
+ free(s->name);
+ free(s->path);
+ free(s->description);
+
+ free(s->type);
+ free(s->exec_string);
+ free(s->working_directory);
+
+ strv_free(s->only_show_in);
+ strv_free(s->not_show_in);
+
+ free(s->try_exec);
+ free(s->autostart_condition);
+ free(s->kde_autostart_condition);
+
+ free(s->gnome_autostart_phase);
+
+ return mfree(s);
+}
+
+char *xdg_autostart_service_translate_name(const char *name) {
+ _cleanup_free_ char *c = NULL, *escaped = NULL;
+ char *res;
+
+ c = strdup(name);
+ if (!c)
+ return NULL;
+
+ res = endswith(c, ".desktop");
+ if (res)
+ *res = '\0';
+
+ escaped = unit_name_escape(c);
+ if (!escaped)
+ return NULL;
+
+ return strjoin("app-", escaped, "-autostart.service");
+}
+
+static int xdg_config_parse_bool(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ bool *b = data;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(rvalue, "true"))
+ *b = true;
+ else if (streq(rvalue, "false"))
+ *b = false;
+ else
+ return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EINVAL), "Invalid value for boolean: %s", rvalue);
+
+ return 0;
+}
+
+/* Unescapes the string in-place, returns non-zero status on error. */
+static int xdg_unescape_string(
+ const char *unit,
+ const char *filename,
+ int line,
+ char *str) {
+
+ char *in;
+ char *out;
+
+ assert(str);
+
+ in = out = str;
+
+ for (; *in; in++, out++) {
+ if (*in == '\\') {
+ /* Move forward, and ensure it is a valid escape. */
+ in++;
+
+ switch (*in) {
+ case 's':
+ *out = ' ';
+ break;
+ case 'n':
+ *out = '\n';
+ break;
+ case 't':
+ *out = '\t';
+ break;
+ case 'r':
+ *out = '\r';
+ break;
+ case '\\':
+ *out = '\\';
+ break;
+ case ';':
+ /* Technically only permitted for strv. */
+ *out = ';';
+ break;
+ default:
+ return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EINVAL), "Undefined escape sequence \\%c.", *in);
+ }
+
+ continue;
+ }
+
+ *out = *in;
+ }
+ *out = '\0';
+
+ return 0;
+}
+
+/* Note: We do not bother with unescaping the strings, hence the _raw postfix. */
+static int xdg_config_parse_string(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *res = NULL;
+ char **out = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* XDG does not allow duplicate definitions. */
+ if (*out) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Key %s was defined multiple times, ignoring.", lvalue);
+ return 0;
+ }
+
+ res = strdup(rvalue);
+ if (!res)
+ return log_oom();
+
+ r = xdg_unescape_string(unit, filename, line, res);
+ if (r < 0)
+ return r;
+
+ *out = TAKE_PTR(res);
+ return 0;
+}
+
+static int strv_strndup_unescape_and_push(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ char ***sv,
+ size_t *n_allocated,
+ size_t *n,
+ const char *start,
+ const char *end) {
+
+ if (end == start)
+ return 0;
+
+ _cleanup_free_ char *copy = NULL;
+ int r;
+
+ copy = strndup(start, end - start);
+ if (!copy)
+ return log_oom();
+
+ r = xdg_unescape_string(unit, filename, line, copy);
+ if (r < 0)
+ return r;
+
+ if (!greedy_realloc((void**) sv, n_allocated, *n + 2, sizeof(char*))) /* One extra for NULL */
+ return log_oom();
+
+ (*sv)[*n] = TAKE_PTR(copy);
+ (*sv)[*n + 1] = NULL;
+ (*n)++;
+
+ return 0;
+}
+
+static int xdg_config_parse_strv(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char ***ret_sv = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* XDG does not allow duplicate definitions. */
+ if (*ret_sv) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Key %s was already defined, ignoring.", lvalue);
+ return 0;
+ }
+
+ size_t n = 0, n_allocated = 0;
+ _cleanup_strv_free_ char **sv = NULL;
+
+ if (!GREEDY_REALLOC0(sv, n_allocated, 1))
+ return log_oom();
+
+ /* We cannot use strv_split because it does not handle escaping correctly. */
+ const char *start = rvalue, *end;
+
+ for (end = start; *end; end++) {
+ if (*end == '\\') {
+ /* Move forward, and ensure it is a valid escape. */
+ end++;
+ if (!strchr("sntr\\;", *end)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Undefined escape sequence \\%c.", *end);
+ return 0;
+ }
+ continue;
+ }
+
+ if (*end == ';') {
+ r = strv_strndup_unescape_and_push(unit, filename, line,
+ &sv, &n_allocated, &n,
+ start, end);
+ if (r < 0)
+ return r;
+
+ start = end + 1;
+ }
+ }
+
+ /* Handle the trailing entry after the last separator */
+ r = strv_strndup_unescape_and_push(unit, filename, line,
+ &sv, &n_allocated, &n,
+ start, end);
+ if (r < 0)
+ return r;
+
+ *ret_sv = TAKE_PTR(sv);
+ return 0;
+}
+
+static int xdg_config_item_table_lookup(
+ const void *table,
+ const char *section,
+ const char *lvalue,
+ ConfigParserCallback *func,
+ int *ltype,
+ void **data,
+ void *userdata) {
+
+ assert(lvalue);
+
+ /* Ignore any keys with [] as those are translations. */
+ if (strchr(lvalue, '[')) {
+ *func = NULL;
+ *ltype = 0;
+ *data = NULL;
+ return 1;
+ }
+
+ return config_item_table_lookup(table, section, lvalue, func, ltype, data, userdata);
+}
+
+XdgAutostartService *xdg_autostart_service_parse_desktop(const char *path) {
+ _cleanup_(xdg_autostart_service_freep) XdgAutostartService *service = NULL;
+ int r;
+
+ service = new0(XdgAutostartService, 1);
+ if (!service)
+ return NULL;
+
+ service->path = strdup(path);
+ if (!service->path)
+ return NULL;
+
+ const ConfigTableItem items[] = {
+ { "Desktop Entry", "Name", xdg_config_parse_string, 0, &service->description},
+ { "Desktop Entry", "Exec", xdg_config_parse_string, 0, &service->exec_string},
+ { "Desktop Entry", "Path", xdg_config_parse_string, 0, &service->working_directory},
+ { "Desktop Entry", "TryExec", xdg_config_parse_string, 0, &service->try_exec},
+ { "Desktop Entry", "Type", xdg_config_parse_string, 0, &service->type},
+ { "Desktop Entry", "OnlyShowIn", xdg_config_parse_strv, 0, &service->only_show_in},
+ { "Desktop Entry", "NotShowIn", xdg_config_parse_strv, 0, &service->not_show_in},
+ { "Desktop Entry", "Hidden", xdg_config_parse_bool, 0, &service->hidden},
+ { "Desktop Entry", "AutostartCondition", xdg_config_parse_string, 0, &service->autostart_condition},
+ { "Desktop Entry", "X-KDE-autostart-condition", xdg_config_parse_string, 0, &service->kde_autostart_condition},
+ { "Desktop Entry", "X-GNOME-Autostart-Phase", xdg_config_parse_string, 0, &service->gnome_autostart_phase},
+ { "Desktop Entry", "X-systemd-skip", xdg_config_parse_bool, 0, &service->systemd_skip},
+
+ /* Common entries that we do not use currently. */
+ { "Desktop Entry", "Categories", NULL, 0, NULL},
+ { "Desktop Entry", "Comment", NULL, 0, NULL},
+ { "Desktop Entry", "Encoding", NULL, 0, NULL},
+ { "Desktop Entry", "GenericName", NULL, 0, NULL},
+ { "Desktop Entry", "Icon", NULL, 0, NULL},
+ { "Desktop Entry", "Keywords", NULL, 0, NULL},
+ { "Desktop Entry", "MimeType", NULL, 0, NULL},
+ { "Desktop Entry", "NoDisplay", NULL, 0, NULL},
+ { "Desktop Entry", "StartupNotify", NULL, 0, NULL},
+ { "Desktop Entry", "StartupWMClass", NULL, 0, NULL},
+ { "Desktop Entry", "Terminal", NULL, 0, NULL},
+ { "Desktop Entry", "URL", NULL, 0, NULL},
+ { "Desktop Entry", "Version", NULL, 0, NULL},
+ {}
+ };
+
+ r = config_parse(NULL, service->path, NULL,
+ "Desktop Entry\0",
+ xdg_config_item_table_lookup, items,
+ CONFIG_PARSE_WARN, service,
+ NULL);
+ /* If parsing failed, only hide the file so it will still mask others. */
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse %s, ignoring it", service->path);
+ service->hidden = true;
+ }
+
+ return TAKE_PTR(service);
+}
+
+int xdg_autostart_format_exec_start(
+ const char *exec,
+ char **ret_exec_start) {
+
+ _cleanup_strv_free_ char **exec_split = NULL;
+ char *res;
+ size_t n, i;
+ bool first_arg;
+ int r;
+
+ /*
+ * Unfortunately, there is a mismatch between systemd's idea of $PATH
+ * and XDGs. i.e. we need to ensure that we have an absolute path to
+ * support cases where $PATH has been modified from the default set.
+ *
+ * Note that this is only needed for development environments though;
+ * so while it is important, this should have no effect in production
+ * environments.
+ *
+ * To be compliant with the XDG specification, we also need to strip
+ * certain parameters and such. Doing so properly makes parsing the
+ * command line unavoidable.
+ *
+ * NOTE: Technically, XDG only specifies " as quotes, while this also
+ * accepts '.
+ */
+ r = strv_split_full(&exec_split, exec, NULL, EXTRACT_UNQUOTE | EXTRACT_RELAX);
+ if (r < 0)
+ return r;
+
+ if (strv_isempty(exec_split))
+ return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), "Exec line is empty");
+
+ first_arg = true;
+ for (i = n = 0; exec_split[i]; i++) {
+ _cleanup_free_ char *c = NULL, *raw = NULL, *p = NULL, *escaped = NULL, *quoted = NULL;
+
+ r = cunescape(exec_split[i], 0, &c);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to unescape '%s': %m", exec_split[i]);
+
+ if (first_arg) {
+ _cleanup_free_ char *executable = NULL;
+
+ /* This is the executable, find it in $PATH */
+ first_arg = false;
+ r = find_executable(c, &executable);
+ if (r < 0)
+ return log_info_errno(r, "Exec binary '%s' does not exist: %m", c);
+
+ escaped = cescape(executable);
+ if (!escaped)
+ return log_oom();
+
+ free(exec_split[n]);
+ exec_split[n++] = TAKE_PTR(escaped);
+ continue;
+ }
+
+ /*
+ * Remove any standardised XDG fields; we assume they never appear as
+ * part of another argument as that just does not make any sense as
+ * they can be empty (GLib will e.g. turn "%f" into an empty argument).
+ * Other implementations may handle this differently.
+ */
+ if (STR_IN_SET(c,
+ "%f", "%F",
+ "%u", "%U",
+ "%d", "%D",
+ "%n", "%N",
+ "%i", /* Location of icon, could be implemented. */
+ "%c", /* Translated application name, could be implemented. */
+ "%k", /* Location of desktop file, could be implemented. */
+ "%v",
+ "%m"
+ ))
+ continue;
+
+ /*
+ * %% -> % and then % -> %% means that we correctly quote any %
+ * and also quote any left over (and invalid) % specifier from
+ * the desktop file.
+ */
+ raw = strreplace(c, "%%", "%");
+ if (!raw)
+ return log_oom();
+ p = strreplace(raw, "%", "%%");
+ if (!p)
+ return log_oom();
+ escaped = cescape(p);
+ if (!escaped)
+ return log_oom();
+
+ quoted = strjoin("\"", escaped, "\"");
+ if (!quoted)
+ return log_oom();
+
+ free(exec_split[n]);
+ exec_split[n++] = TAKE_PTR(quoted);
+ }
+ for (; exec_split[n]; n++)
+ exec_split[n] = mfree(exec_split[n]);
+
+ res = strv_join(exec_split, " ");
+ if (!res)
+ return log_oom();
+
+ *ret_exec_start = res;
+ return 0;
+}
+
+static int xdg_autostart_generate_desktop_condition(
+ FILE *f,
+ const char *test_binary,
+ const char *condition) {
+
+ int r;
+
+ /* Generate an ExecCondition for GNOME autostart condition */
+ if (!isempty(condition)) {
+ _cleanup_free_ char *gnome_autostart_condition_path = NULL, *e_autostart_condition = NULL;
+
+ r = find_executable(test_binary, &gnome_autostart_condition_path);
+ if (r < 0) {
+ log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
+ "%s not found: %m", test_binary);
+ fprintf(f, "# ExecCondition using %s skipped due to missing binary.\n", test_binary);
+ return r;
+ }
+
+ e_autostart_condition = cescape(condition);
+ if (!e_autostart_condition)
+ return log_oom();
+
+ fprintf(f,
+ "ExecCondition=%s --condition \"%s\"\n",
+ gnome_autostart_condition_path,
+ e_autostart_condition);
+ }
+
+ return 0;
+}
+
+int xdg_autostart_service_generate_unit(
+ XdgAutostartService *service,
+ const char *dest) {
+
+ _cleanup_free_ char *path_escaped = NULL, *exec_start = NULL, *unit = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(service);
+
+ /* Nothing to do for hidden services. */
+ if (service->hidden) {
+ log_debug("Not generating service for XDG autostart %s, it is hidden.", service->name);
+ return 0;
+ }
+
+ if (service->systemd_skip) {
+ log_debug("Not generating service for XDG autostart %s, should be skipped by generator.", service->name);
+ return 0;
+ }
+
+ /* Nothing to do if type is not Application. */
+ if (!streq_ptr(service->type, "Application")) {
+ log_debug("Not generating service for XDG autostart %s, only Type=Application is supported.", service->name);
+ return 0;
+ }
+
+ if (!service->exec_string) {
+ log_warning("Not generating service for XDG autostart %s, it is has no Exec= line.", service->name);
+ return 0;
+ }
+
+ /*
+ * The TryExec key cannot be checked properly from the systemd unit,
+ * it is trivial to check using find_executable though.
+ */
+ if (service->try_exec) {
+ r = find_executable(service->try_exec, NULL);
+ if (r < 0) {
+ log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
+ "Not generating service for XDG autostart %s, could not find TryExec= binary %s: %m",
+ service->name, service->try_exec);
+ return 0;
+ }
+ }
+
+ r = xdg_autostart_format_exec_start(service->exec_string, &exec_start);
+ if (r < 0) {
+ log_warning_errno(r,
+ "Not generating service for XDG autostart %s, error parsing Exec= line: %m",
+ service->name);
+ return 0;
+ }
+
+ if (service->gnome_autostart_phase) {
+ /* There is no explicit value for the "Application" phase. */
+ log_debug("Not generating service for XDG autostart %s, startup phases are not supported.",
+ service->name);
+ return 0;
+ }
+
+ path_escaped = specifier_escape(service->path);
+ if (!path_escaped)
+ return log_oom();
+
+ unit = path_join(dest, service->name);
+ if (!unit)
+ return log_oom();
+
+ f = fopen(unit, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m", unit);
+
+ fprintf(f,
+ "# Automatically generated by systemd-xdg-autostart-generator\n\n"
+ "[Unit]\n"
+ "Documentation=man:systemd-xdg-autostart-generator(8)\n"
+ "SourcePath=%s\n"
+ "PartOf=graphical-session.target\n\n",
+ path_escaped);
+
+ if (service->description) {
+ _cleanup_free_ char *t = NULL;
+
+ t = specifier_escape(service->description);
+ if (!t)
+ return log_oom();
+
+ fprintf(f, "Description=%s\n", t);
+ }
+
+ /* Only start after the session is ready. */
+ fprintf(f,
+ "After=graphical-session.target\n");
+
+ fprintf(f,
+ "\n[Service]\n"
+ "Type=exec\n"
+ "ExecStart=:%s\n"
+ "Restart=no\n"
+ "TimeoutSec=5s\n"
+ "Slice=app.slice\n",
+ exec_start);
+
+ if (service->working_directory) {
+ _cleanup_free_ char *e_working_directory = NULL;
+
+ e_working_directory = cescape(service->working_directory);
+ if (!e_working_directory)
+ return log_oom();
+
+ fprintf(f, "WorkingDirectory=-%s\n", e_working_directory);
+ }
+
+ /* Generate an ExecCondition to check $XDG_CURRENT_DESKTOP */
+ if (!strv_isempty(service->only_show_in) || !strv_isempty(service->not_show_in)) {
+ _cleanup_free_ char *only_show_in = NULL, *not_show_in = NULL, *e_only_show_in = NULL, *e_not_show_in = NULL;
+
+ only_show_in = strv_join(service->only_show_in, ":");
+ not_show_in = strv_join(service->not_show_in, ":");
+ if (!only_show_in || !not_show_in)
+ return log_oom();
+
+ e_only_show_in = cescape(only_show_in);
+ e_not_show_in = cescape(not_show_in);
+ if (!e_only_show_in || !e_not_show_in)
+ return log_oom();
+
+ /* Just assume the values are reasonably sane */
+ fprintf(f,
+ "ExecCondition=" ROOTLIBEXECDIR "/systemd-xdg-autostart-condition \"%s\" \"%s\"\n",
+ e_only_show_in,
+ e_not_show_in);
+ }
+
+ r = xdg_autostart_generate_desktop_condition(f,
+ "gnome-systemd-autostart-condition",
+ service->autostart_condition);
+ if (r < 0)
+ return r;
+
+ r = xdg_autostart_generate_desktop_condition(f,
+ "kde-systemd-start-condition",
+ service->kde_autostart_condition);
+ if (r < 0)
+ return r;
+
+ (void) generator_add_symlink(dest, "xdg-desktop-autostart.target", "wants", service->name);
+
+ return 0;
+}
diff --git a/src/xdg-autostart-generator/xdg-autostart-service.h b/src/xdg-autostart-generator/xdg-autostart-service.h
new file mode 100644
index 0000000..2641718
--- /dev/null
+++ b/src/xdg-autostart-generator/xdg-autostart-service.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "macro.h"
+
+typedef struct XdgAutostartService {
+ char *name;
+ char *path;
+ char *description; /* Name in XDG desktop file */
+
+ char *type; /* Purely as an assertion check */
+ char *exec_string;
+ char *working_directory;
+
+ char **only_show_in;
+ char **not_show_in;
+
+ char *try_exec;
+ char *autostart_condition; /* This is mostly GNOME specific */
+ char *kde_autostart_condition;
+
+ char *gnome_autostart_phase;
+
+ bool hidden;
+ bool systemd_skip;
+
+} XdgAutostartService;
+
+
+XdgAutostartService * xdg_autostart_service_free(XdgAutostartService *s);
+DEFINE_TRIVIAL_CLEANUP_FUNC(XdgAutostartService*, xdg_autostart_service_free);
+
+char *xdg_autostart_service_translate_name(const char *name);
+int xdg_autostart_format_exec_start(const char *exec, char **ret_exec_start);
+
+XdgAutostartService *xdg_autostart_service_parse_desktop(const char *path);
+int xdg_autostart_service_generate_unit(XdgAutostartService *service, const char *dest);